From d05eaba8dd5f4582596739e0832117bf39cc1875 Mon Sep 17 00:00:00 2001 From: Release Configuration Management Date: Wed, 10 Oct 2018 14:53:17 +0000 Subject: [PATCH 001/195] New branch setup --- .gitignore | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e69de29 From 58305efc05fe148d3f2941965096473b623f287c Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Sun, 14 Oct 2018 14:48:54 -0300 Subject: [PATCH 002/195] Initial 3.0.0 rebase --- .gitignore | 1 + 0001-Initial-redhat-build.patch | 455 +++ 0002-Enable-disable-devices-for-RHEL-7.patch | 1094 ++++++ 0003-Add-RHEL-machine-types.patch | 3017 +++++++++++++++++ 0004-Use-kvm-by-default.patch | 32 + ...mber-of-devices-that-can-be-assigned.patch | 65 + ...Add-support-statement-to-help-output.patch | 55 + ...lly-limit-the-maximum-number-of-CPUs.patch | 89 + 0008-Add-support-for-simpletrace.patch | 104 + ...documentation-instead-of-qemu-system.patch | 1040 ++++++ 0010-usb-xhci-Fix-PCI-capability-order.patch | 82 + ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 66 + ...x-headers-asm-s390-kvm.h-header-sync.patch | 72 + ...Enable-KVM-huge-page-backing-support.patch | 114 + 0014-s390x-kvm-add-etoken-facility.patch | 190 ++ ...efault-enable-bpb-and-ppa15-for-z196.patch | 51 + ...-arch_query_cpu_model_expansion-leak.patch | 87 + ...sable-TOPOEXT-by-default-on-cpu-host.patch | 54 + ...ify-off-disable-host-as-well-as-peer.patch | 77 + ...on-postcopy-Clear-have_listen_thread.patch | 51 + ...ion-cleanup-in-error-paths-in-loadvm.patch | 52 + ...hange-start-callback-to-run-callback.patch | 372 ++ 0022-jobs-canonize-Error-object.patch | 283 ++ 0023-jobs-add-exit-shim.patch | 108 + 0024-block-commit-utilize-job_exit-shim.patch | 115 + 0025-block-mirror-utilize-job_exit-shim.patch | 152 + 0026-jobs-utilize-job_exit-shim.patch | 307 ++ ...e-function-variables-consistently-na.patch | 165 + ...argument-to-job_completed-privatize-.patch | 153 + 0029-jobs-remove-job_defer_to_main_loop.patch | 119 + ...-commit-add-block-job-creation-flags.patch | 110 + ...-mirror-add-block-job-creation-flags.patch | 100 + ...-stream-add-block-job-creation-flags.patch | 100 + ...refactor-commit-to-use-job-callbacks.patch | 180 + ...don-t-install-backing-chain-on-abort.patch | 45 + ...or-conservative-mirror_exit-refactor.patch | 136 + ...refactor-stream-to-use-job-callbacks.patch | 94 + ...s-blockjob-replace-Blockjob-with-Job.patch | 233 ++ ...s-test-blockjob-remove-exit-callback.patch | 88 + ...st-blockjob-txn-move-.exit-to-.clean.patch | 53 + 0040-jobs-remove-.exit-callback.patch | 156 + ...ock-commit-expose-new-job-properties.patch | 90 + ...ock-mirror-expose-new-job-properties.patch | 144 + ...ock-stream-expose-new-job-properties.patch | 108 + ...lock-backup-qapi-documentation-fixup.patch | 73 + ...-document-transactional-shortcomings.patch | 53 + 85-kvm.preset | 5 + 95-kvm-memlock.conf | 10 + 99-qemu-guest-agent.rules | 2 + bridge.conf | 1 + ksm.service | 13 + ksm.sysconfig | 4 + ksmctl.c | 77 + ksmtuned | 139 + ksmtuned.conf | 21 + ksmtuned.service | 12 + kvm-s390x.conf | 19 + kvm-setup | 40 + kvm-setup.service | 14 + kvm-x86.conf | 12 + kvm.conf | 3 + kvm.modules | 18 + qemu-ga.sysconfig | 19 + qemu-guest-agent.service | 20 + qemu-kvm.spec | 1651 +++++++++ qemu-pr-helper.service | 15 + qemu-pr-helper.socket | 9 + sources | 1 + vhost.conf | 3 + 69 files changed, 12493 insertions(+) create mode 100644 0001-Initial-redhat-build.patch create mode 100644 0002-Enable-disable-devices-for-RHEL-7.patch create mode 100644 0003-Add-RHEL-machine-types.patch create mode 100644 0004-Use-kvm-by-default.patch create mode 100644 0005-vfio-cap-number-of-devices-that-can-be-assigned.patch create mode 100644 0006-Add-support-statement-to-help-output.patch create mode 100644 0007-globally-limit-the-maximum-number-of-CPUs.patch create mode 100644 0008-Add-support-for-simpletrace.patch create mode 100644 0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch create mode 100644 0010-usb-xhci-Fix-PCI-capability-order.patch create mode 100644 0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch create mode 100644 0012-linux-headers-asm-s390-kvm.h-header-sync.patch create mode 100644 0013-s390x-Enable-KVM-huge-page-backing-support.patch create mode 100644 0014-s390x-kvm-add-etoken-facility.patch create mode 100644 0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch create mode 100644 0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch create mode 100644 0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch create mode 100644 0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch create mode 100644 0019-migration-postcopy-Clear-have_listen_thread.patch create mode 100644 0020-migration-cleanup-in-error-paths-in-loadvm.patch create mode 100644 0021-jobs-change-start-callback-to-run-callback.patch create mode 100644 0022-jobs-canonize-Error-object.patch create mode 100644 0023-jobs-add-exit-shim.patch create mode 100644 0024-block-commit-utilize-job_exit-shim.patch create mode 100644 0025-block-mirror-utilize-job_exit-shim.patch create mode 100644 0026-jobs-utilize-job_exit-shim.patch create mode 100644 0027-block-backup-make-function-variables-consistently-na.patch create mode 100644 0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch create mode 100644 0029-jobs-remove-job_defer_to_main_loop.patch create mode 100644 0030-block-commit-add-block-job-creation-flags.patch create mode 100644 0031-block-mirror-add-block-job-creation-flags.patch create mode 100644 0032-block-stream-add-block-job-creation-flags.patch create mode 100644 0033-block-commit-refactor-commit-to-use-job-callbacks.patch create mode 100644 0034-block-mirror-don-t-install-backing-chain-on-abort.patch create mode 100644 0035-block-mirror-conservative-mirror_exit-refactor.patch create mode 100644 0036-block-stream-refactor-stream-to-use-job-callbacks.patch create mode 100644 0037-tests-blockjob-replace-Blockjob-with-Job.patch create mode 100644 0038-tests-test-blockjob-remove-exit-callback.patch create mode 100644 0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch create mode 100644 0040-jobs-remove-.exit-callback.patch create mode 100644 0041-qapi-block-commit-expose-new-job-properties.patch create mode 100644 0042-qapi-block-mirror-expose-new-job-properties.patch create mode 100644 0043-qapi-block-stream-expose-new-job-properties.patch create mode 100644 0044-block-backup-qapi-documentation-fixup.patch create mode 100644 0045-blockdev-document-transactional-shortcomings.patch create mode 100644 85-kvm.preset create mode 100644 95-kvm-memlock.conf create mode 100644 99-qemu-guest-agent.rules create mode 100644 bridge.conf create mode 100644 ksm.service create mode 100644 ksm.sysconfig create mode 100644 ksmctl.c create mode 100644 ksmtuned create mode 100644 ksmtuned.conf create mode 100644 ksmtuned.service create mode 100644 kvm-s390x.conf create mode 100644 kvm-setup create mode 100644 kvm-setup.service create mode 100644 kvm-x86.conf create mode 100644 kvm.conf create mode 100644 kvm.modules create mode 100644 qemu-ga.sysconfig create mode 100644 qemu-guest-agent.service create mode 100644 qemu-kvm.spec create mode 100644 qemu-pr-helper.service create mode 100644 qemu-pr-helper.socket create mode 100644 sources create mode 100644 vhost.conf diff --git a/.gitignore b/.gitignore index e69de29..caaa10d 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +/qemu-3.0.0.tar.xz diff --git a/0001-Initial-redhat-build.patch b/0001-Initial-redhat-build.patch new file mode 100644 index 0000000..a901328 --- /dev/null +++ b/0001-Initial-redhat-build.patch @@ -0,0 +1,455 @@ +From f03d3b79bc1908b0b6e257ee7aaa6567ecb91e38 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 11 Sep 2017 07:11:00 +0200 +Subject: Initial redhat build + +This patch introduces redhat build structure in redhat subdirectory. +In addition, several issues are fixed in QEMU tree: + +- Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent +- Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree +- Use "/share/qemu-kvm" as SHARE_SUFFIX + - We reconfigured our share to qemu-kvm to be consistent with used name +- Added .gitpublish configuration file + - Support for git publish has to be stored in repository root + +Rebase changes (3.0.0): +- python detection changed +- added --disable-debug-mutex + +Merged patches (3.0.0): +- 9997a46 Fix annocheck issues +- 35230f9 redhat: remove extra % in rhel_rhev_conflicts macro( +- c747d3f redhat: syncronizing specfile +- e6abfc4 rpm: Add nvme VFIO driver to rw whitelist +- 7043465 rpm: Whitelist copy-on-read block driver +- f9a897c rpm: add throttle driver to rw whitelist +- b9ea80f redhat: replacing %pkname by %name +- eeeea85 redhat: Remove unused ApplyPatch macro +- b42c578 redhat:removing disable code for libcacard +- cee6bd5 redhat: improve packaging layout with modularization of the block layer +- 0cb4c60 redhat: Introducing qemu-kvm-core package +- 1ff4106 Add qemu-keymap to qemu-kvm-common +- 47838a5 redhat: Make gitpublish profile the default one +- a82f87b redhat: s390x: add hpage=1 to kvm.conf +- 3d52169 Enabling vhost_user +- 57aa228 spec: Enable Native Ceph support on all architectures +- 5f9ea03 Thu Jun 21 2018 Danilo C. L. de Paula - 2.12.0-13.el8 +- ed4d62a spec: Fix ambiguous 'python' interpreter name +- 74b3e6c qemu-ga: blacklisting guest-exec and guest-exec-status RPCs +- 2fd2cf7 redhat: rewrap "build_configure.sh" cmdline for the "rh-env-prep" target +- f48dc7f redhat: remove the VTD, LIVE_BLOCK_OPS, and RHV options in local builds too +- ccdf46b redhat: fix the "rh-env-prep" target's dependency on the SRPM_NAME macro +- f258fbf redhat: remove dead code related to s390 (not s390x) +- d186100 redhat: sync compiler flags from the spec file to "rh-env-prep" +- 727aa86 redhat: sync guest agent enablement and tcmalloc usage from spec to local +- b5d47e2 redhat: fix up Python 3 dependency for building QEMU +- 70c64dd redhat: fix up Python dependency for SRPM generation +- 96aca9f redhat: disable glusterfs dependency/support temporarily +- e9aff9d block/vxhs: modularize VXHS via g_module +- ecf40bf Defining a shebang for python scripts +- 55e3177 redhat: changing the prefix and blurb scheme to support rhel8-like handling +- 571e4ac Removing "rh-srpm-rhel" make target +- 9db09ef redhat: enforce python3 usage +- 56cda0b spec: Re-add dependency to seavgabios and ipxe for ppc64 architectures +- c780848 Drop build_configure.sh and Makefile.local files +- cca9118 Fix subject line in .gitpublish +- 9745e27 redhat: Update build configuration +- 193830c redhat: Disable vhost crypto +- 9dc30cb redhat: Make rh-local actually work in a RHEL-8 environment +- 99011c9 redhat: enable opengl, add build and runtime deps +- 7290e3f redhat: Improve python check +--- + .gitpublish | 61 +- + Makefile | 3 +- + block/Makefile.objs | 2 +- + block/vxhs.c | 119 ++- + configure | 33 +- + os-posix.c | 2 +- + redhat/.gitignore | 5 + + redhat/85-kvm.preset | 5 + + redhat/95-kvm-memlock.conf | 10 + + redhat/99-qemu-guest-agent.rules | 2 + + redhat/Makefile | 82 ++ + redhat/Makefile.common | 47 ++ + redhat/bridge.conf | 1 + + redhat/ksm.service | 13 + + redhat/ksm.sysconfig | 4 + + redhat/ksmctl.c | 77 ++ + redhat/ksmtuned | 139 ++++ + redhat/ksmtuned.conf | 21 + + redhat/ksmtuned.service | 12 + + redhat/kvm-s390x.conf | 19 + + redhat/kvm-setup | 40 + + redhat/kvm-setup.service | 14 + + redhat/kvm-x86.conf | 12 + + redhat/kvm.conf | 3 + + redhat/kvm.modules | 18 + + redhat/qemu-ga.sysconfig | 19 + + redhat/qemu-guest-agent.service | 20 + + redhat/qemu-kvm.spec.template | 1531 ++++++++++++++++++++++++++++++++++++ + redhat/qemu-pr-helper.service | 15 + + redhat/qemu-pr-helper.socket | 9 + + redhat/rpmbuild/BUILD/.gitignore | 2 + + redhat/rpmbuild/RPMS/.gitignore | 2 + + redhat/rpmbuild/SOURCES/.gitignore | 2 + + redhat/rpmbuild/SPECS/.gitignore | 2 + + redhat/rpmbuild/SRPMS/.gitignore | 2 + + redhat/scripts/frh.py | 24 + + redhat/scripts/git-backport-diff | 327 ++++++++ + redhat/scripts/git-compile-check | 215 +++++ + redhat/scripts/process-patches.sh | 92 +++ + redhat/scripts/tarball_checksum.sh | 3 + + redhat/vhost.conf | 3 + + ui/vnc.c | 2 +- + 42 files changed, 2921 insertions(+), 93 deletions(-) + create mode 100644 redhat/.gitignore + create mode 100644 redhat/85-kvm.preset + create mode 100644 redhat/95-kvm-memlock.conf + create mode 100644 redhat/99-qemu-guest-agent.rules + create mode 100644 redhat/Makefile + create mode 100644 redhat/Makefile.common + create mode 100644 redhat/bridge.conf + create mode 100644 redhat/ksm.service + create mode 100644 redhat/ksm.sysconfig + create mode 100644 redhat/ksmctl.c + create mode 100644 redhat/ksmtuned + create mode 100644 redhat/ksmtuned.conf + create mode 100644 redhat/ksmtuned.service + create mode 100644 redhat/kvm-s390x.conf + create mode 100644 redhat/kvm-setup + create mode 100644 redhat/kvm-setup.service + create mode 100644 redhat/kvm-x86.conf + create mode 100644 redhat/kvm.conf + create mode 100644 redhat/kvm.modules + create mode 100644 redhat/qemu-ga.sysconfig + create mode 100644 redhat/qemu-guest-agent.service + create mode 100644 redhat/qemu-kvm.spec.template + create mode 100644 redhat/qemu-pr-helper.service + create mode 100644 redhat/qemu-pr-helper.socket + create mode 100644 redhat/rpmbuild/BUILD/.gitignore + create mode 100644 redhat/rpmbuild/RPMS/.gitignore + create mode 100644 redhat/rpmbuild/SOURCES/.gitignore + create mode 100644 redhat/rpmbuild/SPECS/.gitignore + create mode 100644 redhat/rpmbuild/SRPMS/.gitignore + create mode 100755 redhat/scripts/frh.py + create mode 100755 redhat/scripts/git-backport-diff + create mode 100755 redhat/scripts/git-compile-check + create mode 100755 redhat/scripts/process-patches.sh + create mode 100755 redhat/scripts/tarball_checksum.sh + create mode 100644 redhat/vhost.conf + +diff --git a/Makefile b/Makefile +index 2da686b..eb4c57a 100644 +--- a/Makefile ++++ b/Makefile +@@ -501,6 +501,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM + CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 + CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC + CAP_CFLAGS += -DCAPSTONE_HAS_X86 ++CAP_CFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS + + subdir-capstone: .git-submodule-status + $(call quiet-command,$(MAKE) -C $(SRC_PATH)/capstone CAPSTONE_SHARED=no BUILDDIR="$(BUILD_DIR)/capstone" CC="$(CC)" AR="$(AR)" LD="$(LD)" RANLIB="$(RANLIB)" CFLAGS="$(CAP_CFLAGS)" $(SUBDIR_MAKEFLAGS) $(BUILD_DIR)/capstone/$(LIBCAPSTONE)) +@@ -819,7 +820,7 @@ install-doc: $(DOCS) + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" + ifdef CONFIG_POSIX + $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" +- $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" ++ $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1/qemu-kvm.1" + $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" +diff --git a/block/Makefile.objs b/block/Makefile.objs +index c8337bf..cd1e309 100644 +--- a/block/Makefile.objs ++++ b/block/Makefile.objs +@@ -21,7 +21,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o + block-obj-$(CONFIG_CURL) += curl.o + block-obj-$(CONFIG_RBD) += rbd.o + block-obj-$(CONFIG_GLUSTERFS) += gluster.o +-block-obj-$(CONFIG_VXHS) += vxhs.o ++#block-obj-$(CONFIG_VXHS) += vxhs.o + block-obj-$(CONFIG_LIBSSH2) += ssh.o + block-obj-y += accounting.o dirty-bitmap.o + block-obj-y += write-threshold.o +diff --git a/block/vxhs.c b/block/vxhs.c +index 0cb0a00..9164b3e 100644 +--- a/block/vxhs.c ++++ b/block/vxhs.c +@@ -9,7 +9,8 @@ + */ + + #include "qemu/osdep.h" +-#include ++#include "block/vxhs_shim.h" ++#include + #include + #include "block/block_int.h" + #include "block/qdict.h" +@@ -59,6 +60,97 @@ typedef struct BDRVVXHSState { + char *tlscredsid; /* tlscredsid */ + } BDRVVXHSState; + ++#define LIBVXHS_FULL_PATHNAME "/usr/lib64/qemu/libvxhs.so.1" ++static bool libvxhs_loaded; ++static GModule *libvxhs_handle; ++ ++static LibVXHSFuncs libvxhs; ++ ++typedef struct LibVXHSSymbols { ++ const char *name; ++ gpointer *addr; ++} LibVXHSSymbols; ++ ++static LibVXHSSymbols libvxhs_symbols[] = { ++ {"iio_init", (gpointer *) &libvxhs.iio_init}, ++ {"iio_fini", (gpointer *) &libvxhs.iio_fini}, ++ {"iio_min_version", (gpointer *) &libvxhs.iio_min_version}, ++ {"iio_max_version", (gpointer *) &libvxhs.iio_max_version}, ++ {"iio_open", (gpointer *) &libvxhs.iio_open}, ++ {"iio_close", (gpointer *) &libvxhs.iio_close}, ++ {"iio_writev", (gpointer *) &libvxhs.iio_writev}, ++ {"iio_readv", (gpointer *) &libvxhs.iio_readv}, ++ {"iio_ioctl", (gpointer *) &libvxhs.iio_ioctl}, ++ {NULL} ++}; ++ ++static void bdrv_vxhs_set_funcs(GModule *handle, Error **errp) ++{ ++ int i = 0; ++ while (libvxhs_symbols[i].name) { ++ const char *name = libvxhs_symbols[i].name; ++ if (!g_module_symbol(handle, name, libvxhs_symbols[i].addr)) { ++ error_setg(errp, "%s could not be loaded from libvxhs: %s", ++ name, g_module_error()); ++ return; ++ } ++ ++i; ++ } ++} ++ ++static void bdrv_vxhs_load_libs(Error **errp) ++{ ++ Error *local_err = NULL; ++ int32_t ver; ++ ++ if (libvxhs_loaded) { ++ return; ++ } ++ ++ if (!g_module_supported()) { ++ error_setg(errp, "modules are not supported on this platform: %s", ++ g_module_error()); ++ return; ++ } ++ ++ libvxhs_handle = g_module_open(LIBVXHS_FULL_PATHNAME, ++ G_MODULE_BIND_LAZY | G_MODULE_BIND_LOCAL); ++ if (!libvxhs_handle) { ++ error_setg(errp, "The VXHS library from Veritas might not be installed " ++ "correctly (%s)", g_module_error()); ++ return; ++ } ++ ++ g_module_make_resident(libvxhs_handle); ++ ++ bdrv_vxhs_set_funcs(libvxhs_handle, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ /* Now check to see if the libvxhs we are using here is supported ++ * by the loaded version */ ++ ++ ver = (*libvxhs.iio_min_version)(); ++ if (ver > QNIO_VERSION) { ++ error_setg(errp, "Trying to use libvxhs version %"PRId32" API, but " ++ "only %"PRId32" or newer is supported by %s", ++ QNIO_VERSION, ver, LIBVXHS_FULL_PATHNAME); ++ return; ++ } ++ ++ ver = (*libvxhs.iio_max_version)(); ++ if (ver < QNIO_VERSION) { ++ error_setg(errp, "Trying to use libvxhs version %"PRId32" API, but " ++ "only %"PRId32" or earlier is supported by %s", ++ QNIO_VERSION, ver, LIBVXHS_FULL_PATHNAME); ++ return; ++ } ++ ++ libvxhs_loaded = true; ++} ++ + static void vxhs_complete_aio_bh(void *opaque) + { + VXHSAIOCB *acb = opaque; +@@ -226,7 +318,7 @@ static void vxhs_refresh_limits(BlockDriverState *bs, Error **errp) + static int vxhs_init_and_ref(void) + { + if (vxhs_ref++ == 0) { +- if (iio_init(QNIO_VERSION, vxhs_iio_callback)) { ++ if ((*libvxhs.iio_init)(QNIO_VERSION, vxhs_iio_callback)) { + return -ENODEV; + } + } +@@ -236,7 +328,7 @@ static int vxhs_init_and_ref(void) + static void vxhs_unref(void) + { + if (--vxhs_ref == 0) { +- iio_fini(); ++ (*libvxhs.iio_fini)(); + } + } + +@@ -306,8 +398,17 @@ static int vxhs_open(BlockDriverState *bs, QDict *options, + char *client_key = NULL; + char *client_cert = NULL; + ++ bdrv_vxhs_load_libs(&local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ /* on error, cannot cleanup because the iio_fini() function ++ * is not loaded */ ++ return -EINVAL; ++ } ++ + ret = vxhs_init_and_ref(); + if (ret < 0) { ++ error_setg(&local_err, "libvxhs iio_init() failed"); + ret = -EINVAL; + goto out; + } +@@ -392,8 +493,8 @@ static int vxhs_open(BlockDriverState *bs, QDict *options, + /* + * Open qnio channel to storage agent if not opened before + */ +- dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0, +- cacert, client_key, client_cert); ++ dev_handlep = (*libvxhs.iio_open)(of_vsa_addr, s->vdisk_guid, 0, ++ cacert, client_key, client_cert); + if (dev_handlep == NULL) { + trace_vxhs_open_iio_open(of_vsa_addr); + ret = -ENODEV; +@@ -453,11 +554,11 @@ static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, uint64_t offset, + + switch (iodir) { + case VDISK_AIO_WRITE: +- ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov, ++ ret = (*libvxhs.iio_writev)(dev_handle, acb, qiov->iov, qiov->niov, + offset, size, iio_flags); + break; + case VDISK_AIO_READ: +- ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov, ++ ret = (*libvxhs.iio_writev)(dev_handle, acb, qiov->iov, qiov->niov, + offset, size, iio_flags); + break; + default: +@@ -506,7 +607,7 @@ static void vxhs_close(BlockDriverState *bs) + * Close vDisk device + */ + if (s->vdisk_hostinfo.dev_handle) { +- iio_close(s->vdisk_hostinfo.dev_handle); ++ (*libvxhs.iio_close)(s->vdisk_hostinfo.dev_handle); + s->vdisk_hostinfo.dev_handle = NULL; + } + +@@ -528,7 +629,7 @@ static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s) + int ret = 0; + void *dev_handle = s->vdisk_hostinfo.dev_handle; + +- ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); ++ ret = (*libvxhs.iio_ioctl)(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); + if (ret < 0) { + trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); + return -EIO; +diff --git a/configure b/configure +index 2a7796e..0314d53 100755 +--- a/configure ++++ b/configure +@@ -3460,7 +3460,7 @@ fi + + glib_req_ver=2.40 + glib_modules=gthread-2.0 +-if test "$modules" = yes; then ++if test "$modules" = yes -o "$vxhs" = yes; then + glib_modules="$glib_modules gmodule-export-2.0" + fi + +@@ -5435,33 +5435,6 @@ if compile_prog "" "" ; then + fi + + ########################################## +-# Veritas HyperScale block driver VxHS +-# Check if libvxhs is installed +- +-if test "$vxhs" != "no" ; then +- cat > $TMPC < +-#include +- +-void *vxhs_callback; +- +-int main(void) { +- iio_init(QNIO_VERSION, vxhs_callback); +- return 0; +-} +-EOF +- vxhs_libs="-lvxhs -lssl" +- if compile_prog "" "$vxhs_libs" ; then +- vxhs=yes +- else +- if test "$vxhs" = "yes" ; then +- feature_not_found "vxhs block device" "Install libvxhs See github" +- fi +- vxhs=no +- fi +-fi +- +-########################################## + # check for _Static_assert() + + have_static_assert=no +@@ -6759,8 +6732,8 @@ if test "$pthread_setname_np" = "yes" ; then + fi + + if test "$vxhs" = "yes" ; then +- echo "CONFIG_VXHS=y" >> $config_host_mak +- echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak ++ echo "CONFIG_VXHS=m" >> $config_host_mak ++ echo "VXHS_LIBS= -lssl" >> $config_host_mak + fi + + if test "$tcg_interpreter" = "yes"; then +diff --git a/os-posix.c b/os-posix.c +index 9ce6f74..c4cfd0d 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -82,7 +82,7 @@ void os_setup_signal_handling(void) + /* Find a likely location for support files using the location of the binary. + For installed binaries this will be "$bindir/../share/qemu". When + running from the build tree this will be "$bindir/../pc-bios". */ +-#define SHARE_SUFFIX "/share/qemu" ++#define SHARE_SUFFIX "/share/qemu-kvm" + #define BUILD_SUFFIX "/pc-bios" + char *os_find_datadir(void) + { +diff --git a/ui/vnc.c b/ui/vnc.c +index 3596932..050c421 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -4054,7 +4054,7 @@ void vnc_display_open(const char *id, Error **errp) + trace_vnc_auth_init(vd, 1, vd->ws_auth, vd->ws_subauth); + + #ifdef CONFIG_VNC_SASL +- if ((saslErr = sasl_server_init(NULL, "qemu")) != SASL_OK) { ++ if ((saslErr = sasl_server_init(NULL, "qemu-kvm")) != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", + sasl_errstring(saslErr, NULL, NULL)); + goto fail; +-- +1.8.3.1 + diff --git a/0002-Enable-disable-devices-for-RHEL-7.patch b/0002-Enable-disable-devices-for-RHEL-7.patch new file mode 100644 index 0000000..de0276f --- /dev/null +++ b/0002-Enable-disable-devices-for-RHEL-7.patch @@ -0,0 +1,1094 @@ +From 7472ed73f89c81f1ca4c86129eed0f5874d82c41 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 11 Jan 2016 11:53:33 +0100 +Subject: Enable/disable devices for RHEL 7 + +This commit adds all changes related to changes in supported devices +up to qemu-kvm-2.12.0-32.el8. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (3.0.0): +- Added CONFIG_SCSI +- Enabled SMMUV3 +- Using CONFIG_VIRTIO_CRYPTO to remove crypto +- Added CONFIG_VIRTIO_MMIO for aarch64 +- Disabled arvm7v.c compile +- Introduced virtio.mak +- Disabled make check +- Removed test changes (moved to next patch) + +Merged patches (3.0.0): +- a2721f6 Re-enable disabled Hyper-V enlightenments +- c670fa1 Disable aarch64 devices reappeared after 2.12 rebase +- 3ebdb95 Disable split-irq device +- d68f80c Disable AT24Cx i2c eeprom +- 3f953e9 Disable CAN bus devices +- 284c393 Disable new superio devices +- 747643c Disable new pvrdma device +- 0d4f38c s390x: Re-enable CONFIG_TERMINAL3270 +- 0f725e9 AArch64: Enable CONFIG_FW_CFG_DMA for aarch64 +--- + default-configs/aarch64-softmmu.mak | 37 +++++++++++++++++++++++++++++-------- + default-configs/pci.mak | 36 ++++++++++++++++++------------------ + default-configs/ppc64-softmmu.mak | 25 +++++++++++++++++++------ + default-configs/s390x-softmmu.mak | 5 +++-- + default-configs/sound.mak | 8 ++++---- + default-configs/usb.mak | 14 +++++++------- + default-configs/virtio.mak | 5 ++--- + default-configs/x86_64-softmmu.mak | 28 ++++++++++++++-------------- + hw/acpi/ich9.c | 4 ++-- + hw/arm/Makefile.objs | 2 +- + hw/block/fdc.c | 1 + + hw/char/serial-pci.c | 4 ++++ + hw/core/Makefile.objs | 9 +++++---- + hw/display/cirrus_vga.c | 2 ++ + hw/i386/pc.c | 2 ++ + hw/ide/piix.c | 5 ++++- + hw/ide/via.c | 2 ++ + hw/input/pckbd.c | 2 ++ + hw/isa/Makefile.objs | 2 +- + hw/misc/Makefile.objs | 2 +- + hw/misc/ivshmem.c | 11 +++++++++++ + hw/net/e1000.c | 2 ++ + hw/nvram/Makefile.objs | 2 +- + hw/pci-host/piix.c | 4 ++++ + hw/ppc/Makefile.objs | 2 +- + hw/ppc/spapr.c | 3 ++- + hw/ppc/spapr_cpu_core.c | 2 ++ + hw/rdma/Makefile.objs | 3 ++- + hw/s390x/virtio-ccw.c | 8 ++++++++ + hw/usb/ccid-card-emulated.c | 2 ++ + hw/vfio/Makefile.objs | 1 - + hw/vfio/pci-quirks.c | 5 +++++ + hw/virtio/virtio-pci.c | 8 ++++---- + qemu-options.hx | 5 ----- + redhat/qemu-kvm.spec.template | 2 +- + stubs/Makefile.objs | 1 + + stubs/ide-isa.c | 13 +++++++++++++ + target/arm/cpu.c | 4 +++- + target/i386/cpu.c | 35 +++++++++++++++++++++++++++-------- + target/ppc/cpu-models.c | 17 ++++++++++++++++- + target/s390x/cpu_models.c | 3 +++ + target/s390x/kvm.c | 8 ++++++++ + vl.c | 2 +- + 43 files changed, 240 insertions(+), 98 deletions(-) + create mode 100644 stubs/ide-isa.c + +diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak +index 6f790f0..3f27540 100644 +--- a/default-configs/aarch64-softmmu.mak ++++ b/default-configs/aarch64-softmmu.mak +@@ -1,11 +1,32 @@ + # Default configuration for aarch64-softmmu + +-# We support all the 32 bit boards so need all their config +-include arm-softmmu.mak +- +-CONFIG_AUX=y +-CONFIG_DDC=y +-CONFIG_DPCD=y +-CONFIG_XLNX_ZYNQMP=y +-CONFIG_XLNX_ZYNQMP_ARM=y + CONFIG_ARM_SMMUV3=y ++# CONFIG_AUX=y ++# CONFIG_DDC=y ++# CONFIG_DPCD=y ++# CONFIG_XLNX_ZYNQMP=y ++# CONFIG_XLNX_ZYNQMP_ARM=y ++CONFIG_PCI=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_MMIO=y ++include virtio.mak ++CONFIG_ARM_GIC=y ++CONFIG_ARM_GIC_KVM=$(CONFIG_KVM) ++CONFIG_PL011=y ++CONFIG_PL031=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PCI_GENERIC=y ++CONFIG_ACPI=y ++CONFIG_PLATFORM_BUS=y ++CONFIG_SMBIOS=y ++CONFIG_PL061=y ++CONFIG_GPIO_KEY=y ++CONFIG_ARM_V7M=y ++CONFIG_PCIE_PORT=y ++CONFIG_XIO3130=y ++CONFIG_IOH3420=y ++CONFIG_USB_XHCI=y ++CONFIG_USB=y ++CONFIG_I2C=y ++CONFIG_FW_CFG_DMA=y +diff --git a/default-configs/pci.mak b/default-configs/pci.mak +index de53d20..5cbe3e4 100644 +--- a/default-configs/pci.mak ++++ b/default-configs/pci.mak +@@ -4,22 +4,22 @@ CONFIG_ISA_BUS=y + CONFIG_VIRTIO_PCI=y + include virtio.mak + CONFIG_USB_UHCI=y +-CONFIG_USB_OHCI=y ++#CONFIG_USB_OHCI=y + CONFIG_USB_EHCI=y + CONFIG_USB_XHCI=y + CONFIG_USB_XHCI_NEC=y +-CONFIG_NE2000_PCI=y +-CONFIG_EEPRO100_PCI=y +-CONFIG_PCNET_PCI=y +-CONFIG_PCNET_COMMON=y ++#CONFIG_NE2000_PCI=y ++#CONFIG_EEPRO100_PCI=y ++#CONFIG_PCNET_PCI=y ++#CONFIG_PCNET_COMMON=y + CONFIG_AC97=y + CONFIG_HDA=y +-CONFIG_ES1370=y ++#CONFIG_ES1370=y + CONFIG_SCSI=y +-CONFIG_LSI_SCSI_PCI=y +-CONFIG_VMW_PVSCSI_SCSI_PCI=y +-CONFIG_MEGASAS_SCSI_PCI=y +-CONFIG_MPTSAS_SCSI_PCI=y ++#CONFIG_LSI_SCSI_PCI=y ++#CONFIG_VMW_PVSCSI_SCSI_PCI=y ++#CONFIG_MEGASAS_SCSI_PCI=y ++#CONFIG_MPTSAS_SCSI_PCI=y + CONFIG_RTL8139_PCI=y + CONFIG_E1000_PCI=y + CONFIG_E1000E_PCI=y +@@ -27,22 +27,22 @@ CONFIG_IDE_CORE=y + CONFIG_IDE_QDEV=y + CONFIG_IDE_PCI=y + CONFIG_AHCI=y +-CONFIG_ESP=y +-CONFIG_ESP_PCI=y ++#CONFIG_ESP=y ++#CONFIG_ESP_PCI=y + CONFIG_SERIAL=y + CONFIG_SERIAL_ISA=y + CONFIG_SERIAL_PCI=y + CONFIG_CAN_BUS=y + CONFIG_CAN_SJA1000=y +-CONFIG_CAN_PCI=y +-CONFIG_IPACK=y ++#CONFIG_CAN_PCI=y ++#CONFIG_IPACK=y + CONFIG_WDT_IB6300ESB=y + CONFIG_PCI_TESTDEV=y +-CONFIG_NVME_PCI=y +-CONFIG_SD=y +-CONFIG_SDHCI=y ++#CONFIG_NVME_PCI=y ++#CONFIG_SD=y ++#CONFIG_SDHCI=y + CONFIG_EDU=y + CONFIG_VGA=y + CONFIG_VGA_PCI=y + CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) +-CONFIG_ROCKER=y ++#CONFIG_ROCKER=y +diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak +index b94af6c..30ca76d 100644 +--- a/default-configs/ppc64-softmmu.mak ++++ b/default-configs/ppc64-softmmu.mak +@@ -1,14 +1,27 @@ + # Default configuration for ppc64-softmmu + +-# Include all 32-bit boards +-include ppc-softmmu.mak ++include usb.mak ++include virtio.mak ++ ++## PCI configuration - cut down from the defaults in pci.mak ++CONFIG_PCI=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_USB_OHCI=y ++CONFIG_VGA=y ++CONFIG_VGA_PCI=y ++CONFIG_SERIAL=y ++CONFIG_I2C=y + + # For PowerNV +-CONFIG_POWERNV=y ++#CONFIG_POWERNV=y + CONFIG_IPMI=y +-CONFIG_IPMI_LOCAL=y +-CONFIG_IPMI_EXTERN=y +-CONFIG_ISA_IPMI_BT=y ++#CONFIG_IPMI_LOCAL=y ++#CONFIG_IPMI_EXTERN=y ++#CONFIG_ISA_IPMI_BT=y + + # For pSeries + CONFIG_PSERIES=y +diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak +index d6b67d5..8b2db3e 100644 +--- a/default-configs/s390x-softmmu.mak ++++ b/default-configs/s390x-softmmu.mak +@@ -1,9 +1,10 @@ + CONFIG_PCI=y +-CONFIG_VIRTIO_PCI=$(CONFIG_PCI) ++#CONFIG_VIRTIO_PCI=$(CONFIG_PCI) + include virtio.mak + CONFIG_SCLPCONSOLE=y + CONFIG_TERMINAL3270=y + CONFIG_S390_FLIC=y + CONFIG_S390_FLIC_KVM=$(CONFIG_KVM) +-CONFIG_VFIO_CCW=$(CONFIG_LINUX) ++# Disabled for Red Hat Enterprise Linux: ++# CONFIG_VFIO_CCW=$(CONFIG_LINUX) + CONFIG_WDT_DIAG288=y +diff --git a/default-configs/sound.mak b/default-configs/sound.mak +index 4f22c34..1bead9b 100644 +--- a/default-configs/sound.mak ++++ b/default-configs/sound.mak +@@ -1,4 +1,4 @@ +-CONFIG_SB16=y +-CONFIG_ADLIB=y +-CONFIG_GUS=y +-CONFIG_CS4231A=y ++#CONFIG_SB16=y ++#CONFIG_ADLIB=y ++#CONFIG_GUS=y ++#CONFIG_CS4231A=y +diff --git a/default-configs/usb.mak b/default-configs/usb.mak +index e42cfea..cef6c0b 100644 +--- a/default-configs/usb.mak ++++ b/default-configs/usb.mak +@@ -1,11 +1,11 @@ + CONFIG_USB=y +-CONFIG_USB_TABLET_WACOM=y ++#CONFIG_USB_TABLET_WACOM=y + CONFIG_USB_STORAGE_BOT=y +-CONFIG_USB_STORAGE_UAS=y +-CONFIG_USB_STORAGE_MTP=y ++#CONFIG_USB_STORAGE_UAS=y ++#CONFIG_USB_STORAGE_MTP=y + CONFIG_SCSI=y + CONFIG_USB_SMARTCARD=y +-CONFIG_USB_AUDIO=y +-CONFIG_USB_SERIAL=y +-CONFIG_USB_NETWORK=y +-CONFIG_USB_BLUETOOTH=y ++#CONFIG_USB_AUDIO=y ++#CONFIG_USB_SERIAL=y ++#CONFIG_USB_NETWORK=y ++#CONFIG_USB_BLUETOOTH=y +diff --git a/default-configs/virtio.mak b/default-configs/virtio.mak +index 1304849..6330e6b 100644 +--- a/default-configs/virtio.mak ++++ b/default-configs/virtio.mak +@@ -1,10 +1,9 @@ +-CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) +-CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) ++#CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) ++#CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) + CONFIG_VIRTIO=y + CONFIG_VIRTIO_9P=y + CONFIG_VIRTIO_BALLOON=y + CONFIG_VIRTIO_BLK=y +-CONFIG_VIRTIO_CRYPTO=y + CONFIG_VIRTIO_GPU=y + CONFIG_VIRTIO_INPUT=y + CONFIG_VIRTIO_NET=y +diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak +index 0390b43..613fc50 100644 +--- a/default-configs/x86_64-softmmu.mak ++++ b/default-configs/x86_64-softmmu.mak +@@ -4,20 +4,20 @@ include pci.mak + include sound.mak + include usb.mak + CONFIG_QXL=$(CONFIG_SPICE) +-CONFIG_VGA_ISA=y ++#CONFIG_VGA_ISA=y + CONFIG_VGA_CIRRUS=y +-CONFIG_VMWARE_VGA=y +-CONFIG_VMXNET3_PCI=y ++#CONFIG_VMWARE_VGA=y ++#CONFIG_VMXNET3_PCI=y + CONFIG_VIRTIO_VGA=y + CONFIG_VMMOUSE=y + CONFIG_IPMI=y +-CONFIG_IPMI_LOCAL=y +-CONFIG_IPMI_EXTERN=y +-CONFIG_ISA_IPMI_KCS=y +-CONFIG_ISA_IPMI_BT=y ++#CONFIG_IPMI_LOCAL=y ++#CONFIG_IPMI_EXTERN=y ++#CONFIG_ISA_IPMI_KCS=y ++#CONFIG_ISA_IPMI_BT=y + CONFIG_SERIAL=y + CONFIG_SERIAL_ISA=y +-CONFIG_PARALLEL=y ++#CONFIG_PARALLEL=y + CONFIG_I8254=y + CONFIG_PCSPK=y + CONFIG_PCKBD=y +@@ -29,11 +29,11 @@ CONFIG_ACPI_MEMORY_HOTPLUG=y + CONFIG_ACPI_CPU_HOTPLUG=y + CONFIG_APM=y + CONFIG_I8257=y +-CONFIG_IDE_ISA=y ++#CONFIG_IDE_ISA=y + CONFIG_IDE_PIIX=y +-CONFIG_NE2000_ISA=y +-CONFIG_HPET=y +-CONFIG_APPLESMC=y ++#CONFIG_NE2000_ISA=y ++#CONFIG_HPET=y ++#CONFIG_APPLESMC=y + CONFIG_I8259=y + CONFIG_PFLASH_CFI01=y + CONFIG_TPM_TIS=$(CONFIG_TPM) +@@ -58,11 +58,11 @@ CONFIG_XIO3130=y + CONFIG_IOH3420=y + CONFIG_I82801B11=y + CONFIG_SMBIOS=y +-CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) ++#CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) + CONFIG_PXB=y + CONFIG_ACPI_VMGENID=y + CONFIG_FW_CFG_DMA=y + CONFIG_I2C=y + CONFIG_SEV=$(CONFIG_KVM) + CONFIG_VTD=y +-CONFIG_AMD_IOMMU=y ++#CONFIG_AMD_IOMMU=y +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index c5d8646..a4e87b8 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -446,8 +446,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 0; +- pm->disable_s4 = 0; ++ pm->disable_s3 = 1; ++ pm->disable_s4 = 1; + pm->s4_val = 2; + + object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, +diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs +index d51fcec..52ec91b 100644 +--- a/hw/arm/Makefile.objs ++++ b/hw/arm/Makefile.objs +@@ -16,7 +16,7 @@ obj-$(CONFIG_STRONGARM) += collie.o + obj-$(CONFIG_VERSATILE) += vexpress.o versatilepb.o + obj-$(CONFIG_ZYNQ) += xilinx_zynq.o + +-obj-$(CONFIG_ARM_V7M) += armv7m.o ++#obj-$(CONFIG_ARM_V7M) += armv7m.o + obj-$(CONFIG_EXYNOS4) += exynos4210.o + obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o + obj-$(CONFIG_DIGIC) += digic.o +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 2e9c1e1..f284df7 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -599,6 +599,7 @@ static void floppy_drive_class_init(ObjectClass *klass, void *data) + k->bus_type = TYPE_FLOPPY_BUS; + k->props = floppy_drive_properties; + k->desc = "virtual floppy drive"; ++ k->user_creatable = false; /* RH state preserve */ + } + + static const TypeInfo floppy_drive_info = { +diff --git a/hw/char/serial-pci.c b/hw/char/serial-pci.c +index cb0d04c..d426982 100644 +--- a/hw/char/serial-pci.c ++++ b/hw/char/serial-pci.c +@@ -228,6 +228,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data) + dc->vmsd = &vmstate_pci_multi_serial; + dc->props = multi_2x_serial_pci_properties; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data) +@@ -243,6 +245,8 @@ static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data) + dc->vmsd = &vmstate_pci_multi_serial; + dc->props = multi_4x_serial_pci_properties; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo serial_pci_info = { +diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs +index eb88ca9..e967fb2 100644 +--- a/hw/core/Makefile.objs ++++ b/hw/core/Makefile.objs +@@ -16,10 +16,11 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o + common-obj-$(CONFIG_SOFTMMU) += loader.o + common-obj-$(CONFIG_FITLOADER) += loader-fit.o + common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o +-common-obj-$(CONFIG_SOFTMMU) += register.o +-common-obj-$(CONFIG_SOFTMMU) += or-irq.o +-common-obj-$(CONFIG_SOFTMMU) += split-irq.o ++# Disabled in Red Hat Enterprise Linux ++# common-obj-$(CONFIG_SOFTMMU) += register.o ++# obj-$(CONFIG_SOFTMMU) += generic-loader.o ++# common-obj-$(CONFIG_SOFTMMU) += or-irq.o ++#common-obj-$(CONFIG_SOFTMMU) += split-irq.o + common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o + +-obj-$(CONFIG_SOFTMMU) += generic-loader.o + obj-$(CONFIG_SOFTMMU) += null-machine.o +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index 7583b18..9fd5665 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -3075,6 +3075,8 @@ static void isa_cirrus_vga_class_init(ObjectClass *klass, void *data) + dc->realize = isa_cirrus_vga_realizefn; + dc->props = isa_cirrus_vga_properties; + set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo isa_cirrus_vga_info = { +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 83a4444..11c287e 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1528,7 +1528,9 @@ static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl, bool no_vmport) + ISADevice *i8042, *port92, *vmmouse; + + serial_hds_isa_init(isa_bus, 0, MAX_ISA_SERIAL_PORTS); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS); ++#endif + + for (i = 0; i < MAX_FD; i++) { + fd[i] = drive_get(IF_FLOPPY, 0, i); +diff --git a/hw/ide/piix.c b/hw/ide/piix.c +index a3afe1f..6de12ca 100644 +--- a/hw/ide/piix.c ++++ b/hw/ide/piix.c +@@ -253,7 +253,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) + k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +- dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix3_ide_info = { +@@ -280,6 +281,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix4_ide_info = { +diff --git a/hw/ide/via.c b/hw/ide/via.c +index 238f038..e4a5e6d 100644 +--- a/hw/ide/via.c ++++ b/hw/ide/via.c +@@ -216,6 +216,8 @@ static void via_ide_class_init(ObjectClass *klass, void *data) + k->revision = 0x06; + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo via_ide_info = { +diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c +index 07c8801..c27a0f8 100644 +--- a/hw/input/pckbd.c ++++ b/hw/input/pckbd.c +@@ -574,6 +574,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) + + dc->realize = i8042_realizefn; + dc->vmsd = &vmstate_kbd_isa; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo i8042_info = { +diff --git a/hw/isa/Makefile.objs b/hw/isa/Makefile.objs +index 83e06f6..7de4f44 100644 +--- a/hw/isa/Makefile.objs ++++ b/hw/isa/Makefile.objs +@@ -1,5 +1,5 @@ + common-obj-$(CONFIG_ISA_BUS) += isa-bus.o +-common-obj-$(CONFIG_ISA_BUS) += isa-superio.o smc37c669-superio.o ++#common-obj-$(CONFIG_ISA_BUS) += isa-superio.o smc37c669-superio.o + common-obj-$(CONFIG_APM) += apm.o + common-obj-$(CONFIG_I82378) += i82378.o + common-obj-$(CONFIG_PC87312) += pc87312.o +diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs +index 9350900..9c2c404 100644 +--- a/hw/misc/Makefile.objs ++++ b/hw/misc/Makefile.objs +@@ -9,7 +9,7 @@ common-obj-$(CONFIG_PCI_TESTDEV) += pci-testdev.o + common-obj-$(CONFIG_EDU) += edu.o + common-obj-$(CONFIG_PCA9552) += pca9552.o + +-common-obj-y += unimp.o ++#common-obj-y += unimp.o + common-obj-$(CONFIG_FW_CFG_DMA) += vmcoreinfo.o + + # ARM devices +diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c +index 6febbab..0786fb9 100644 +--- a/hw/misc/ivshmem.c ++++ b/hw/misc/ivshmem.c +@@ -893,6 +893,13 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp) + return; + } + ++ /* Migration disabled for Red Hat Enterprise Linux: */ ++ if (s->master == ON_OFF_AUTO_ON) { ++ error_setg(errp, "master=on is not supported"); ++ return; ++ } ++ s->master = ON_OFF_AUTO_OFF; ++ + pci_conf = dev->config; + pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; + +@@ -1183,6 +1190,8 @@ static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) + k->realize = ivshmem_doorbell_realize; + dc->props = ivshmem_doorbell_properties; + dc->vmsd = &ivshmem_doorbell_vmsd; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo ivshmem_doorbell_info = { +@@ -1352,6 +1361,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data) + dc->desc = "Inter-VM shared memory (legacy)"; + dc->props = ivshmem_properties; + dc->vmsd = &ivshmem_vmsd; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo ivshmem_info = { +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index 13a9494..742cd0a 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1768,6 +1768,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux 7 */ + { + .name = "e1000-82544gc", + .device_id = E1000_DEV_ID_82544GC_COPPER, +@@ -1780,6 +1781,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#endif + }; + + static void e1000_register_types(void) +diff --git a/hw/nvram/Makefile.objs b/hw/nvram/Makefile.objs +index a912d25..cbc8bba 100644 +--- a/hw/nvram/Makefile.objs ++++ b/hw/nvram/Makefile.objs +@@ -1,6 +1,6 @@ + common-obj-$(CONFIG_DS1225Y) += ds1225y.o + common-obj-y += eeprom93xx.o +-common-obj-$(CONFIG_I2C) += eeprom_at24c.o ++#common-obj-$(CONFIG_I2C) += eeprom_at24c.o + common-obj-y += fw_cfg.o + common-obj-y += chrp_nvram.o + common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o +diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c +index 0e60834..3ce4b14 100644 +--- a/hw/pci-host/piix.c ++++ b/hw/pci-host/piix.c +@@ -787,6 +787,7 @@ static const TypeInfo i440fx_info = { + }, + }; + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* IGD Passthrough Host Bridge. */ + typedef struct { + uint8_t offset; +@@ -870,6 +871,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { + .instance_size = sizeof(PCII440FXState), + .class_init = igd_passthrough_i440fx_class_init, + }; ++#endif + + static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +@@ -915,7 +917,9 @@ static const TypeInfo i440fx_pcihost_info = { + static void i440fx_register_types(void) + { + type_register_static(&i440fx_info); ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + type_register_static(&igd_passthrough_i440fx_info); ++#endif + type_register_static(&piix3_pci_type_info); + type_register_static(&piix3_info); + type_register_static(&piix3_xen_info); +diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs +index bcab632..70e8780 100644 +--- a/hw/ppc/Makefile.objs ++++ b/hw/ppc/Makefile.objs +@@ -3,7 +3,7 @@ obj-y += ppc.o ppc_booke.o fdt.o + # IBM pSeries (sPAPR) + obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o + obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o +-obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o ++obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o + obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o + # IBM PowerNV + obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 421b2dd..2f8c304 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1303,6 +1303,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, + /* /vdevice */ + spapr_dt_vdevice(spapr->vio_bus, fdt); + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) { + ret = spapr_rng_populate_dt(fdt); + if (ret < 0) { +@@ -1310,7 +1311,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, + exit(1); + } + } +- ++#endif + QLIST_FOREACH(phb, &spapr->phbs, list) { + ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt); + if (ret < 0) { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 993759d..fb29eec 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -378,10 +378,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { + .instance_size = sizeof(sPAPRCPUCore), + .class_size = sizeof(sPAPRCPUCoreClass), + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), ++#endif + DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), +diff --git a/hw/rdma/Makefile.objs b/hw/rdma/Makefile.objs +index 3504c39..02ca2a9 100644 +--- a/hw/rdma/Makefile.objs ++++ b/hw/rdma/Makefile.objs +@@ -1,5 +1,6 @@ + ifeq ($(CONFIG_RDMA),y) + obj-$(CONFIG_PCI) += rdma_utils.o rdma_backend.o rdma_rm.o + obj-$(CONFIG_PCI) += vmw/pvrdma_dev_ring.o vmw/pvrdma_cmd.o \ +- vmw/pvrdma_qp_ops.o vmw/pvrdma_main.o ++ vmw/pvrdma_qp_ops.o ++#obj-$(CONFIG_PCI) += vmw/pvrdma_main.o + endif +diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c +index 7ddb378..b131781 100644 +--- a/hw/s390x/virtio-ccw.c ++++ b/hw/s390x/virtio-ccw.c +@@ -925,6 +925,8 @@ static void virtio_ccw_rng_realize(VirtioCcwDevice *ccw_dev, Error **errp) + NULL); + } + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ ++ + static void virtio_ccw_crypto_realize(VirtioCcwDevice *ccw_dev, Error **errp) + { + VirtIOCryptoCcw *dev = VIRTIO_CRYPTO_CCW(ccw_dev); +@@ -942,6 +944,7 @@ static void virtio_ccw_crypto_realize(VirtioCcwDevice *ccw_dev, Error **errp) + OBJECT(dev->vdev.conf.cryptodev), "cryptodev", + NULL); + } ++#endif + + static void virtio_ccw_gpu_realize(VirtioCcwDevice *ccw_dev, Error **errp) + { +@@ -1532,6 +1535,8 @@ static const TypeInfo virtio_ccw_rng = { + .class_init = virtio_ccw_rng_class_init, + }; + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ ++ + static Property virtio_ccw_crypto_properties[] = { + DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, + VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), +@@ -1568,6 +1573,7 @@ static const TypeInfo virtio_ccw_crypto = { + .instance_init = virtio_ccw_crypto_instance_init, + .class_init = virtio_ccw_crypto_class_init, + }; ++#endif + + static Property virtio_ccw_gpu_properties[] = { + DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, +@@ -1888,7 +1894,9 @@ static void virtio_ccw_register(void) + #ifdef CONFIG_VHOST_VSOCK + type_register_static(&vhost_vsock_ccw_info); + #endif ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + type_register_static(&virtio_ccw_crypto); ++#endif + type_register_static(&virtio_ccw_gpu); + type_register_static(&virtio_ccw_input); + type_register_static(&virtio_ccw_input_hid); +diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c +index 5c8b3c9..d1cbe54 100644 +--- a/hw/usb/ccid-card-emulated.c ++++ b/hw/usb/ccid-card-emulated.c +@@ -585,6 +585,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + dc->desc = "emulated smartcard"; + dc->props = emulated_card_properties; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo emulated_card_info = { +diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs +index a2e7a0a..d38205b 100644 +--- a/hw/vfio/Makefile.objs ++++ b/hw/vfio/Makefile.objs +@@ -2,7 +2,6 @@ ifeq ($(CONFIG_LINUX), y) + obj-$(CONFIG_SOFTMMU) += common.o + obj-$(CONFIG_PCI) += pci.o pci-quirks.o display.o + obj-$(CONFIG_VFIO_CCW) += ccw.o +-obj-$(CONFIG_SOFTMMU) += platform.o + obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o + obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o + obj-$(CONFIG_SOFTMMU) += spapr.o +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 481fd08..1c588f5 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1387,6 +1387,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment"; + dc->hotpluggable = false; ++ /* Disabled in Red Hat Enterprise Linux */ ++ dc->user_creatable = false; + k->realize = vfio_pci_igd_lpc_bridge_realize; + k->class_id = PCI_CLASS_BRIDGE_ISA; + } +@@ -1580,6 +1582,9 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + 0, PCI_DEVFN(0x2, 0))) { + return; + } ++ ++ /* Disabled in Red Hat Enterprise Linux */ ++ return; + + /* + * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 3a01fe9..3567faf 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -2003,7 +2003,7 @@ static const TypeInfo virtio_blk_pci_info = { + .class_init = virtio_blk_pci_class_init, + }; + +-#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) ++#if defined(CONFIG_VHOST_USER_BLK) + /* vhost-user-blk */ + + static Property vhost_user_blk_pci_properties[] = { +@@ -2183,7 +2183,7 @@ static const TypeInfo vhost_scsi_pci_info = { + }; + #endif + +-#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) ++#if defined(CONFIG_VHOST_USER_BLK) + /* vhost-user-scsi-pci */ + static Property vhost_user_scsi_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, +@@ -2707,7 +2707,7 @@ static void virtio_pci_register_types(void) + type_register_static(&virtio_9p_pci_info); + #endif + type_register_static(&virtio_blk_pci_info); +-#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) ++#if defined(CONFIG_VHOST_USER_BLK) + type_register_static(&vhost_user_blk_pci_info); + #endif + type_register_static(&virtio_scsi_pci_info); +@@ -2717,7 +2717,7 @@ static void virtio_pci_register_types(void) + #ifdef CONFIG_VHOST_SCSI + type_register_static(&vhost_scsi_pci_info); + #endif +-#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) ++#if defined(CONFIG_VHOST_USER_SCSI) + type_register_static(&vhost_user_scsi_pci_info); + #endif + #ifdef CONFIG_VHOST_VSOCK +diff --git a/qemu-options.hx b/qemu-options.hx +index b1bf0f4..37f2aa8 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -1811,11 +1811,6 @@ ETEXI + + DEF("no-hpet", 0, QEMU_OPTION_no_hpet, + "-no-hpet disable HPET\n", QEMU_ARCH_I386) +-STEXI +-@item -no-hpet +-@findex -no-hpet +-Disable HPET support. +-ETEXI + + DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, + "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" +diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs +index 53d3f32..b1360c4 100644 +--- a/stubs/Makefile.objs ++++ b/stubs/Makefile.objs +@@ -43,3 +43,4 @@ stub-obj-y += xen-common.o + stub-obj-y += xen-hvm.o + stub-obj-y += pci-host-piix.o + stub-obj-y += ram-block.o ++stub-obj-y += ide-isa.o +diff --git a/stubs/ide-isa.c b/stubs/ide-isa.c +new file mode 100644 +index 0000000..9fd50ef +--- /dev/null ++++ b/stubs/ide-isa.c +@@ -0,0 +1,13 @@ ++#include "qemu/osdep.h" ++#include "hw/ide.h" ++#include ++ ++ISADevice *isa_ide_init(ISABus *bus, int iobase, int iobase2, int isairq, ++ DriveInfo *hd0, DriveInfo *hd1) ++{ ++ /* ++ * In theory the real isa_ide_init() function can return NULL, but no ++ * caller actually checks for that. Make sure we go out with a clear bang. ++ */ ++ abort(); ++} +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 64a8005..bc8d09d 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2029,7 +2029,9 @@ static void arm_cpu_register_types(void) + type_register_static(&idau_interface_type_info); + + while (info->name) { +- cpu_register(info); ++ /* RHEL specific: Filter out unsupported cpu models */ ++ if (!strcmp(info->name, "cortex-a15")) ++ cpu_register(info); + info++; + } + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 723e022..338ee37 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1366,14 +1366,14 @@ static X86CPUDefinition builtin_x86_defs[] = { + .family = 6, + .model = 6, + .stepping = 3, +- .features[FEAT_1_EDX] = +- PPRO_FEATURES | +- CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | +- CPUID_PSE36, +- .features[FEAT_1_ECX] = +- CPUID_EXT_SSE3 | CPUID_EXT_CX16, +- .features[FEAT_8000_0001_EDX] = +- CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, ++ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | ++ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | ++ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | ++ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | ++ CPUID_PSE | CPUID_DE | CPUID_FP87, ++ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, + .xlevel = 0x8000000A, +@@ -1603,6 +1603,25 @@ static X86CPUDefinition builtin_x86_defs[] = { + .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", + }, + { ++ .name = "cpu64-rhel6", ++ .level = 4, ++ .vendor = CPUID_VENDOR_AMD, ++ .family = 6, ++ .model = 13, ++ .stepping = 3, ++ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | ++ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | ++ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | ++ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | ++ CPUID_PSE | CPUID_DE | CPUID_FP87, ++ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, ++ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, ++ .xlevel = 0x8000000A, ++ .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", ++ }, ++ { + .name = "Conroe", + .level = 10, + .vendor = CPUID_VENDOR_INTEL, +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index 6c9bfde..77cb298 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -65,6 +65,7 @@ + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ + POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) + ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + /* Embedded PowerPC */ + /* PowerPC 401 family */ + POWERPC_DEF("401", CPU_POWERPC_401, 401, +@@ -739,10 +740,13 @@ + "PowerPC 7447A v1.2 (G4)") + POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, + "PowerPC 7457A v1.2 (G4)") ++#endif + /* 64 bits PowerPC */ + #if defined (TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + "POWER5+ v2.1") ++#endif + POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, + "POWER7 v2.3") + POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -753,14 +757,17 @@ + "POWER8 v2.0") + POWERPC_DEF("power8nvl_v1.0", CPU_POWERPC_POWER8NVL_v10, POWER8, + "POWER8NVL v1.0") ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, + "PowerPC 970 v2.2") ++#endif + + POWERPC_DEF("power9_v1.0", CPU_POWERPC_POWER9_DD1, POWER9, + "POWER9 v1.0") + POWERPC_DEF("power9_v2.0", CPU_POWERPC_POWER9_DD20, POWER9, + "POWER9 v2.0") + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, + "PowerPC 970FX v1.0 (G5)") + POWERPC_DEF("970fx_v2.0", CPU_POWERPC_970FX_v20, 970, +@@ -775,12 +782,14 @@ + "PowerPC 970MP v1.0") + POWERPC_DEF("970mp_v1.1", CPU_POWERPC_970MP_v11, 970, + "PowerPC 970MP v1.1") ++#endif + #endif /* defined (TARGET_PPC64) */ + + /***************************************************************************/ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + { "403", "403gc" }, + { "405", "405d4" }, + { "405cr", "405crc" }, +@@ -939,20 +948,25 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "7447a", "7447a_v1.2" }, + { "7457a", "7457a_v1.2" }, + { "apollo7pm", "7457a_v1.0" }, ++#endif + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "power5+", "power5+_v2.1" }, + { "power5gs", "power5+_v2.1" }, ++#endif + { "power7", "power7_v2.3" }, + { "power7+", "power7+_v2.1" }, + { "power8e", "power8e_v2.1" }, + { "power8", "power8_v2.0" }, + { "power8nvl", "power8nvl_v1.0" }, + { "power9", "power9_v2.0" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "970", "970_v2.2" }, + { "970fx", "970fx_v3.1" }, + { "970mp", "970mp_v1.1" }, + #endif +- ++#endif ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* Generic PowerPCs */ + #if defined(TARGET_PPC64) + { "ppc64", "970fx_v3.1" }, +@@ -960,5 +974,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "ppc32", "604" }, + { "ppc", "604" }, + { "default", "604" }, ++#endif + { NULL, NULL } + }; +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index 604898a..9c469ff 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -373,6 +373,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, + (max_model->def->gen == model->def->gen && + max_model->def->ec_ga < model->def->ec_ga)) { + list_add_feat("type", unavailable); ++ } else if (model->def->gen < 11 && kvm_enabled()) { ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ list_add_feat("type", unavailable); + } + + /* detect missing features if any to properly report them */ +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index d923cf4..bbcbeed 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -2277,6 +2277,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + error_setg(errp, "KVM doesn't support CPU models"); + return; + } ++ ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ if (model->def->gen < 11) { ++ error_setg(errp, "KVM: Unsupported CPU type specified: %s", ++ MACHINE(qdev_get_machine())->cpu_type); ++ return; ++ } ++ + prop.cpuid = s390_cpuid_from_cpu_model(model); + prop.ibc = s390_ibc_from_cpu_model(model); + /* configure cpu features indicated via STFL(e) */ +diff --git a/vl.c b/vl.c +index 16b913f..4f96203 100644 +--- a/vl.c ++++ b/vl.c +@@ -164,7 +164,7 @@ unsigned int max_cpus; + int smp_cores = 1; + int smp_threads = 1; + int acpi_enabled = 1; +-int no_hpet = 0; ++int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ + int fd_bootchk = 1; + static int no_reboot; + int no_shutdown = 0; +-- +1.8.3.1 + diff --git a/0003-Add-RHEL-machine-types.patch b/0003-Add-RHEL-machine-types.patch new file mode 100644 index 0000000..9e8b0db --- /dev/null +++ b/0003-Add-RHEL-machine-types.patch @@ -0,0 +1,3017 @@ +From e145f88a1d3be0e12262c0b3dab80133778ab21a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Sun, 14 Dec 2014 18:32:18 +0100 +Subject: Add RHEL machine types + +This commit adds all changes related to machine types applied since +qemu-kvm-2.12.0-31.el8. + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula + +-- +Rebase notes (3.0.0): +- spapr_cpu_init merged into spapr_realize_vcpu (upstream) +- Commented out virt_machine_device_plug_cb in hw/arm/virt.c +- ifdef virt_get_iommu and virt_set_iommu in hw/arm/virt.c +- test changes refactored and moved all to this patch + +Merged patches (3.0.0): +- 50dd601 s390x: add RHEL 7.6 machine type for ccw +- 8198c8d e1000: Fix tso_props compat for 82540em +- e924798 Use 4 MB vram for cirrus. +- 738561e Fix x-hv-max-vps compat value for 7.4 machine type +- 9cb37fd AArch64: Add virt-rhel7.6 machine type +- 2343d56 migration: introduce decompress-error-check (partialy) +- 188fa88 pc: rhel7.6.0 machine-types +- 88b4505 Remove rhel6* machine types +- 8a50b1c Remove rhel6_ctrl_guest_workaround +- 71562f4 Remove SeaBIOS shadowing +- 40a8867 Remove ich9_uhci123_irqpin_override +- 7574808 redhat: define pseries-rhel7.6.0 machine types +- 6c2f105 RHEL-8.0: Add pseries-rhel7.6.0-sxxm machine type +- 411b30b pc: pc-*-rhel75.5.0 compat code + +Signed-off-by: Miroslav Rezanina + +Conflicts: + tests/qom-test.c +--- + hw/acpi/ich9.c | 16 ++ + hw/acpi/piix4.c | 6 +- + hw/arm/virt.c | 126 ++++++++- + hw/char/serial.c | 16 ++ + hw/display/cirrus_vga.c | 2 +- + hw/display/vga-isa.c | 2 +- + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 7 +- + hw/i386/pc_piix.c | 194 ++++++++++++- + hw/i386/pc_q35.c | 93 ++++++- + hw/net/e1000.c | 18 +- + hw/net/e1000e.c | 21 ++ + hw/net/rtl8139.c | 4 +- + hw/ppc/spapr.c | 252 +++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 + + hw/s390x/s390-virtio-ccw.c | 46 +++- + hw/smbios/smbios.c | 1 + + hw/timer/i8254_common.c | 2 +- + hw/timer/mc146818rtc.c | 6 + + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci.c | 20 ++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/arm/virt.h | 22 ++ + include/hw/compat.h | 229 ++++++++++++++++ + include/hw/i386/pc.h | 564 ++++++++++++++++++++++++++++++++++++++ + include/hw/ppc/spapr.h | 1 + + include/hw/usb.h | 4 + + migration/migration.c | 2 + + migration/migration.h | 5 + + qdev-monitor.c | 1 - + redhat/qemu-kvm.spec.template | 2 +- + scripts/vmstate-static-checker.py | 1 - + target/i386/cpu.c | 9 +- + target/i386/machine.c | 21 ++ + target/ppc/compat.c | 13 +- + target/ppc/cpu.h | 1 + + tests/Makefile.include | 124 ++++----- + tests/boot-serial-test.c | 6 +- + tests/cpu-plug-test.c | 3 +- + tests/e1000-test.c | 2 + + tests/endianness-test.c | 2 + + tests/prom-env-test.c | 2 + + tests/qemu-iotests/051 | 12 +- + tests/qemu-iotests/group | 4 +- + tests/qom-test.c | 2 +- + tests/test-x86-cpuid-compat.c | 2 + + tests/usb-hcd-xhci-test.c | 4 + + 48 files changed, 1800 insertions(+), 95 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index a4e87b8..23a7baa 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -441,6 +441,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) + s->pm.enable_tco = value; + } + ++static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ return s->pm.force_rev1_fadt; ++} ++ ++static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ s->pm.force_rev1_fadt = value; ++} ++ + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + { + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; +@@ -465,6 +477,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + ich9_pm_get_cpu_hotplug_legacy, + ich9_pm_set_cpu_hotplug_legacy, + NULL); ++ object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", ++ ich9_pm_get_force_rev1_fadt, ++ ich9_pm_set_force_rev1_fadt, ++ NULL); + object_property_add(obj, ACPI_PM_PROP_S3_DISABLED, "uint8", + ich9_pm_get_disable_s3, + ich9_pm_set_disable_s3, +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index 6404af5..0f1f9e2 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -310,7 +310,7 @@ static const VMStateDescription vmstate_cpuhp_state = { + static const VMStateDescription vmstate_acpi = { + .name = "piix4_pm", + .version_id = 3, +- .minimum_version_id = 3, ++ .minimum_version_id = 2, + .minimum_version_id_old = 1, + .load_state_old = acpi_load_old, + .post_load = vmstate_acpi_post_load, +@@ -670,8 +670,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, + use_acpi_pci_hotplug, true), +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 281ddcd..b02e4a0 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -60,6 +60,7 @@ + #include "standard-headers/linux/input.h" + #include "hw/arm/smmuv3.h" + ++#if 0 /* disabled Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -87,7 +88,36 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, true) + #define DEFINE_VIRT_MACHINE(major, minor) \ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +- ++#endif /* disabled for RHEL */ ++ ++#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ ++ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ ++ void *data) \ ++ { \ ++ MachineClass *mc = MACHINE_CLASS(oc); \ ++ rhel##m##n##s##_virt_options(mc); \ ++ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ ++ if (latest) { \ ++ mc->alias = "virt"; \ ++ mc->is_default = 1; \ ++ } \ ++ } \ ++ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ ++ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ ++ .parent = TYPE_RHEL_MACHINE, \ ++ .instance_init = rhel##m##n##s##_virt_instance_init, \ ++ .class_init = rhel##m##n##s##_virt_class_init, \ ++ }; \ ++ static void rhel##m##n##s##_machvirt_init(void) \ ++ { \ ++ type_register_static(&rhel##m##n##s##_machvirt_info); \ ++ } \ ++ type_init(rhel##m##n##s##_machvirt_init); ++ ++#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) ++#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) + + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 +@@ -1539,6 +1569,7 @@ static void machvirt_init(MachineState *machine) + qemu_add_machine_init_done_notifier(&vms->machine_done); + } + ++#if 0 /* disabled for RHEL */ + static bool virt_get_secure(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -1567,6 +1598,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + vms->virt = value; + } + ++#endif /* disabled for RHEL */ + static bool virt_get_highmem(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -1621,6 +1653,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) + } + } + ++#if 0 + static char *virt_get_iommu(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -1648,6 +1681,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) + error_append_hint(errp, "Valid values are none, smmuv3.\n"); + } + } ++#endif + + static CpuInstanceProperties + virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) +@@ -1687,6 +1721,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + return ms->possible_cpus; + } + ++#if 0 /* disabled for RHEL */ + static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -1835,6 +1870,9 @@ static void virt_machine_3_0_options(MachineClass *mc) + } + DEFINE_VIRT_MACHINE_AS_LATEST(3, 0) + ++#define VIRT_COMPAT_2_12 \ ++ HW_COMPAT_2_12 ++ + static void virt_2_12_instance_init(Object *obj) + { + virt_3_0_instance_init(obj); +@@ -1960,3 +1998,89 @@ static void virt_machine_2_6_options(MachineClass *mc) + vmc->no_pmu = true; + } + DEFINE_VIRT_MACHINE(2, 6) ++#endif /* disabled for RHEL */ ++ ++static void rhel_machine_class_init(ObjectClass *oc, void *data) ++{ ++ MachineClass *mc = MACHINE_CLASS(oc); ++ ++ mc->family = "virt-rhel-Z"; ++ mc->init = machvirt_init; ++ /* Start max_cpus at the maximum QEMU supports. We'll further restrict ++ * it later in machvirt_init, where we have more information about the ++ * configuration of the particular instance. ++ */ ++ mc->max_cpus = 255; ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->pci_allow_0_address = true; ++ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ ++ mc->minimum_page_bits = 12; ++ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; ++ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; ++ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); ++ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++} ++ ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel760_virt_instance_init(Object *obj) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ ++ /* EL3 is disabled by default and non-configurable for RHEL */ ++ vms->secure = false; ++ /* EL2 is disabled by default and non-configurable for RHEL */ ++ vms->virt = false; ++ /* High memory is enabled by default for RHEL */ ++ vms->highmem = true; ++ object_property_add_bool(obj, "highmem", virt_get_highmem, ++ virt_set_highmem, NULL); ++ object_property_set_description(obj, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits", ++ NULL); ++ /* Default GIC type is still v2, but became configurable for RHEL */ ++ vms->gic_version = 2; ++ object_property_add_str(obj, "gic-version", virt_get_gic_version, ++ virt_set_gic_version, NULL); ++ object_property_set_description(obj, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3 and host", NULL); ++ ++ if (vmc->no_its) { ++ vms->its = false; ++ } else { ++ /* Default allows ITS instantiation */ ++ vms->its = true; ++ object_property_add_bool(obj, "its", virt_get_its, ++ virt_set_its, NULL); ++ object_property_set_description(obj, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation", ++ NULL); ++ } ++ ++ vms->memmap=a15memmap; ++ vms->irqmap=a15irqmap; ++} ++ ++static void rhel760_virt_options(MachineClass *mc) ++{ ++ SET_MACHINE_COMPAT(mc, ARM_RHEL_COMPAT); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(7, 6, 0) +diff --git a/hw/char/serial.c b/hw/char/serial.c +index 251f40f..8e3520c 100644 +--- a/hw/char/serial.c ++++ b/hw/char/serial.c +@@ -30,6 +30,7 @@ + #include "qemu/timer.h" + #include "qemu/error-report.h" + #include "trace.h" ++#include "migration/migration.h" + + //#define DEBUG_SERIAL + +@@ -699,6 +700,9 @@ static int serial_post_load(void *opaque, int version_id) + static bool serial_thr_ipending_needed(void *opaque) + { + SerialState *s = opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } + + if (s->ier & UART_IER_THRI) { + bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); +@@ -780,6 +784,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { + static bool serial_fifo_timeout_timer_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return timer_pending(s->fifo_timeout_timer); + } + +@@ -797,6 +805,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { + static bool serial_timeout_ipending_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->timeout_ipending != 0; + } + +@@ -814,6 +826,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { + static bool serial_poll_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->poll_msl >= 0; + } + +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index 9fd5665..6910014 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -3061,7 +3061,7 @@ static void isa_cirrus_vga_realizefn(DeviceState *dev, Error **errp) + + static Property isa_cirrus_vga_properties[] = { + DEFINE_PROP_UINT32("vgamem_mb", struct ISACirrusVGAState, +- cirrus_vga.vga.vram_size_mb, 4), ++ cirrus_vga.vga.vram_size_mb, 16), + DEFINE_PROP_BOOL("blitter", struct ISACirrusVGAState, + cirrus_vga.enable_blitter, true), + DEFINE_PROP_END_OF_LIST(), +diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c +index fa44242..7835c83 100644 +--- a/hw/display/vga-isa.c ++++ b/hw/display/vga-isa.c +@@ -80,7 +80,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) + } + + static Property vga_isa_properties[] = { +- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), ++ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index e1ee8ae..be9bdb5 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -184,6 +184,9 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; ++ if (object_property_get_bool(lpc, ++ "__com.redhat_force-rev1-fadt", NULL)) ++ pm->fadt.rev = 1; + pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; + } + assert(obj); +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 11c287e..253d48d 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1419,7 +1419,8 @@ void pc_memory_init(PCMachineState *pcms, + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); +- if (pcmc->pci_enabled) { ++ /* RH difference: See bz 1489800, explicitly make ROM ro */ ++ if (pcmc->pc_rom_ro) { + memory_region_set_readonly(option_rom_mr, true); + } + memory_region_add_subregion_overlap(rom_memory, +@@ -2387,6 +2388,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->save_tsc_khz = true; + pcmc->linuxboot_dma_enabled = true; + assert(!mc->get_hotplug_handler); ++ pcmc->pc_rom_ro = true; + mc->get_hotplug_handler = pc_get_hotpug_handler; + mc->cpu_index_to_instance_props = pc_cpu_index_to_props; + mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; +@@ -2396,7 +2398,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->default_boot_order = "cad"; + mc->hot_add_cpu = pc_hot_add_cpu; + mc->block_default_type = IF_IDE; +- mc->max_cpus = 255; ++ /* 240: max CPU count for RHEL */ ++ mc->max_cpus = 240; + mc->reset = pc_machine_reset; + hc->pre_plug = pc_machine_device_pre_plug_cb; + hc->plug = pc_machine_device_plug_cb; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index dc09466..f0484ec 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -50,6 +50,7 @@ + #include "cpu.h" + #include "qapi/error.h" + #include "qemu/error-report.h" ++#include "migration/migration.h" + #ifdef CONFIG_XEN + #include + #include "hw/xen/xen_pt.h" +@@ -170,8 +171,8 @@ static void pc_init1(MachineState *machine, + if (pcmc->smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + SMBIOS_ENTRY_POINT_21); + } +@@ -309,6 +310,7 @@ static void pc_init1(MachineState *machine, + * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). + */ + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3(MachineState *machine) + { + PCMachineState *pcms = PC_MACHINE(machine); +@@ -433,6 +435,7 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) + pc_i440fx_machine_options(m); + m->alias = "pc"; + m->is_default = 1; ++ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); + } + + DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, +@@ -1148,3 +1151,190 @@ static void xenfv_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, + xenfv_machine_options); + #endif ++machine_init(pc_machine_init); ++ ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 machine type */ ++static void pc_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ m->family = "pc_piix_Y"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; ++ pcmc->default_nic_model = "e1000"; ++ m->default_display = "std"; ++ SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); ++ m->alias = "pc"; ++ m->is_default = 1; ++} ++ ++static void pc_init_rhel760(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel760_options(MachineClass *m) ++{ ++ pc_machine_rhel7_options(m); ++ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++} ++ ++DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, ++ pc_machine_rhel760_options); ++ ++static void pc_init_rhel750(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel750_options(MachineClass *m) ++{ ++ pc_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->is_default = 0; ++ m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; ++ m->auto_enable_numa_with_memhp = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_5_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, ++ pc_machine_rhel750_options); ++ ++static void pc_init_rhel740(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel750_options(m); ++ m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; ++ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ pcmc->pc_rom_ro = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_4_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, ++ pc_machine_rhel740_options); ++ ++static void pc_init_rhel730(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel730_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel740_options(m); ++ m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; ++ pcmc->linuxboot_dma_enabled = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_3_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, ++ pc_machine_rhel730_options); ++ ++ ++static void pc_init_rhel720(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel720_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel730_options(m); ++ m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; ++ /* From pc_i440fx_2_5_machine_options */ ++ pcmc->save_tsc_khz = false; ++ m->legacy_fw_cfg_order = 1; ++ /* Note: broken_reserved_end was already in 7.2 */ ++ /* From pc_i440fx_2_6_machine_options */ ++ pcmc->legacy_cpu_hotplug = true; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_2_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, ++ pc_machine_rhel720_options); ++ ++static void pc_compat_rhel710(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ /* From pc_compat_2_2 */ ++ pcmc->rsdp_in_ram = false; ++ machine->suppress_vmdesc = true; ++ ++ /* From pc_compat_2_1 */ ++ pcmc->smbios_uuid_encoded = false; ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->enforce_aligned_dimm = false; ++ ++ /* Disable all the extra subsections that were added in 2.2 */ ++ migrate_pre_2_2 = true; ++ ++ /* From pc_i440fx_2_4_machine_options */ ++ pcmc->broken_reserved_end = true; ++} ++ ++static void pc_init_rhel710(MachineState *machine) ++{ ++ pc_compat_rhel710(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel710_options(MachineClass *m) ++{ ++ pc_machine_rhel720_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; ++ m->default_display = "cirrus"; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_1_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, ++ pc_machine_rhel710_options); ++ ++static void pc_compat_rhel700(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ pc_compat_rhel710(machine); ++ ++ /* Upstream enables it for everyone, we're a little more selective */ ++ x86_cpu_change_kvm_default("x2apic", NULL); ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ ++ pcmc->smbios_legacy_mode = true; ++ pcmc->has_reserved_memory = false; ++ migrate_cve_2014_5263_xhci_fields = true; ++} ++ ++static void pc_init_rhel700(MachineState *machine) ++{ ++ pc_compat_rhel700(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel700_options(MachineClass *m) ++{ ++ pc_machine_rhel710_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_0_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, ++ pc_machine_rhel700_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 532241e..c1024c5 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -145,8 +145,8 @@ static void pc_q35_init(MachineState *machine) + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + SMBIOS_ENTRY_POINT_21); + } +@@ -294,6 +294,7 @@ static void pc_q35_init(MachineState *machine) + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +@@ -315,6 +316,7 @@ static void pc_q35_3_0_machine_options(MachineClass *m) + { + pc_q35_machine_options(m); + m->alias = "q35"; ++ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); + } + + DEFINE_Q35_MACHINE(v3_0, "pc-q35-3.0", NULL, +@@ -416,3 +418,90 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + + DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 q35 machine type */ ++static void pc_q35_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pcmc->default_nic_model = "e1000e"; ++ m->family = "pc_q35_Z"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_display = "std"; ++ m->no_floppy = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_SYS_BUS_DEVICE); ++ m->alias = "q35"; ++ m->max_cpus = 384; ++ SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); ++} ++ ++static void pc_q35_init_rhel760(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel760_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel7_options(m); ++ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, ++ pc_q35_machine_rhel760_options); ++ ++static void pc_q35_init_rhel750(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel750_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; ++ m->auto_enable_numa_with_memhp = false; ++ pcmc->default_nic_model = "e1000"; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_5_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, ++ pc_q35_machine_rhel750_options); ++ ++static void pc_q35_init_rhel740(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel750_options(m); ++ m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; ++ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ pcmc->pc_rom_ro = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_4_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, ++ pc_q35_machine_rhel740_options); ++ ++static void pc_q35_init_rhel730(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel730_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel740_options(m); ++ m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; ++ m->max_cpus = 255; ++ pcmc->linuxboot_dma_enabled = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_3_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, ++ pc_q35_machine_rhel730_options); +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index 742cd0a..7d568da 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1663,6 +1663,16 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) + + pci_conf = pci_dev->config; + ++ if (!(d->compat_flags & E1000_FLAG_AUTONEG)) { ++ /* ++ * We have no capabilities, so capability list bit should normally be 0. ++ * Keep it on for compat machine types to avoid breaking migration. ++ * HACK: abuse E1000_FLAG_AUTONEG, which is off exactly for ++ * the machine types that need this. ++ */ ++ pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_CAP_LIST); ++ } ++ + /* TODO: RST# value should be 0, PCI spec 6.2.4 */ + pci_conf[PCI_CACHE_LINE_SIZE] = 0x10; + +@@ -1763,7 +1773,7 @@ static const TypeInfo e1000_base_info = { + + static const E1000Info e1000_devices[] = { + { +- .name = "e1000", ++ .name = "e1000-82540em", + .device_id = E1000_DEV_ID_82540EM, + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, +@@ -1784,6 +1794,11 @@ static const E1000Info e1000_devices[] = { + #endif + }; + ++static const TypeInfo e1000_default_info = { ++ .name = "e1000", ++ .parent = "e1000-82540em", ++}; ++ + static void e1000_register_types(void) + { + int i; +@@ -1801,6 +1816,7 @@ static void e1000_register_types(void) + + type_register(&type_info); + } ++ type_register_static(&e1000_default_info); + } + + type_init(e1000_register_types) +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index 510ddb3..f1de9e5 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -75,6 +75,11 @@ typedef struct E1000EState { + + E1000ECore core; + ++ /* 7.3 had the intr_state field that was in the original e1000e code ++ * but that was removed prior to 2.7's release ++ */ ++ bool redhat_7_3_intr_state_enable; ++ uint32_t redhat_7_3_intr_state; + } E1000EState; + + #define E1000E_MMIO_IDX 0 +@@ -90,6 +95,10 @@ typedef struct E1000EState { + #define E1000E_MSIX_TABLE (0x0000) + #define E1000E_MSIX_PBA (0x2000) + ++/* Values as in RHEL 7.3 build and original upstream */ ++#define RH_E1000E_USE_MSI BIT(0) ++#define RH_E1000E_USE_MSIX BIT(1) ++ + static uint64_t + e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) + { +@@ -301,6 +310,8 @@ e1000e_init_msix(E1000EState *s) + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; + } + } + } +@@ -472,6 +483,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) + ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); + if (ret) { + trace_e1000e_msi_init_fail(ret); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; + } + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, +@@ -595,6 +608,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + ++static bool rhel_7_3_check(void *opaque, int version_id) ++{ ++ return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; ++} ++ + static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, +@@ -606,6 +624,7 @@ static const VMStateDescription e1000e_vmstate = { + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), ++ VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, +@@ -654,6 +673,8 @@ static PropertyInfo e1000e_prop_disable_vnet, + + static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), ++ DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, ++ redhat_7_3_intr_state_enable, false), + DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 46daa16..05453e7 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -3174,7 +3174,7 @@ static int rtl8139_pre_save(void *opaque) + + static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", +- .version_id = 5, ++ .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, +@@ -3255,7 +3255,9 @@ static const VMStateDescription vmstate_rtl8139 = { + VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), ++#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ + VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), ++#endif + VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), + VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 2f8c304..b8bdb69 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4009,6 +4009,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; + smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ + spapr_caps_add_properties(smc, &error_abort); ++ smc->has_power9_support = true; + } + + static const TypeInfo spapr_machine_info = { +@@ -4059,6 +4060,7 @@ static const TypeInfo spapr_machine_info = { + } \ + type_init(spapr_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * pseries-3.0 + */ +@@ -4248,6 +4250,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + .property = "pre-2.8-migration", \ + .value = "on", \ + }, ++#endif + + static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, +@@ -4298,6 +4301,7 @@ static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, + */ + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_2_7_instance_options(MachineState *machine) + { + sPAPRMachineState *spapr = SPAPR_MACHINE(machine); +@@ -4457,6 +4461,254 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_1); + } + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); ++#endif ++ ++/* ++ * pseries-rhel7.6.0 ++ */ ++ ++static void spapr_machine_rhel760_instance_options(MachineState *machine) ++{ ++} ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", true); ++ ++/* ++ * pseries-rhel7.6.0-sxxm ++ * ++ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default ++ */ ++static void spapr_machine_rhel760sxxm_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel760_instance_options(machine); ++} ++ ++static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel760_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.5.0 ++ * like SPAPR_COMPAT_2_11 and SPAPR_COMPAT_2_10 ++ * SPAPR_CAP_HTM already enabled in 7.4 ++ * ++ */ ++#define SPAPR_COMPAT_RHEL7_5 \ ++ HW_COMPAT_RHEL7_5 \ ++ ++static void spapr_machine_rhel750_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel760_instance_options(machine); ++} ++ ++static void spapr_machine_rhel750_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel760_class_options(mc); ++ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_5); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); ++ ++/* ++ * pseries-rhel7.5.0-sxxm ++ * ++ * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default ++ */ ++static void spapr_machine_rhel750sxxm_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel750_instance_options(machine); ++} ++ ++static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.4.0 ++ * like SPAPR_COMPAT_2_9 ++ */ ++ ++#define SPAPR_COMPAT_RHEL7_4 \ ++ HW_COMPAT_RHEL7_4 \ ++ { \ ++ .driver = TYPE_POWERPC_CPU, \ ++ .property = "pre-2.10-migration", \ ++ .value = "on", \ ++ }, \ ++ ++static void spapr_machine_rhel740_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel750_instance_options(machine); ++} ++ ++static void spapr_machine_rhel740_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_4); ++ mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ smc->has_power9_support = false; ++ smc->pre_2_10_has_unused_icps = true; ++ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; ++ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); ++ ++/* ++ * pseries-rhel7.4.0-sxxm ++ * ++ * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default ++ */ ++static void spapr_machine_rhel740sxxm_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel740_instance_options(machine); ++} ++ ++static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.3.0 ++ * like SPAPR_COMPAT_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 ++ */ ++#define SPAPR_COMPAT_RHEL7_3 \ ++ HW_COMPAT_RHEL7_3 \ ++ { \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ ++ .property = "mem_win_size", \ ++ .value = stringify(SPAPR_PCI_2_7_MMIO_WIN_SIZE),\ ++ }, \ ++ { \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ ++ .property = "mem64_win_size", \ ++ .value = "0", \ ++ }, \ ++ { \ ++ .driver = TYPE_POWERPC_CPU, \ ++ .property = "pre-2.8-migration", \ ++ .value = "on", \ ++ }, \ ++ { \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ ++ .property = "pre-2.8-migration", \ ++ .value = "on", \ ++ }, \ ++ { \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ ++ .property = "pcie-extended-configuration-space",\ ++ .value = "off", \ ++ }, ++ ++static void spapr_machine_rhel730_instance_options(MachineState *machine) ++{ ++ sPAPRMachineState *spapr = SPAPR_MACHINE(machine); ++ ++ spapr_machine_rhel740_instance_options(machine); ++ spapr->use_hotplug_event_source = false; ++} ++ ++static void spapr_machine_rhel730_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); ++ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_3); ++ smc->phb_placement = phb_placement_2_7; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); ++ ++/* ++ * pseries-rhel7.3.0-sxxm ++ * ++ * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default ++ */ ++static void spapr_machine_rhel730sxxm_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel730_instance_options(machine); ++} ++ ++static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.2.0 ++ */ ++/* Should be like SPAPR_COMPAT_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" ++ * has been backported to RHEL7_2 so we don't need it here. ++ */ ++ ++#define SPAPR_COMPAT_RHEL7_2 \ ++ HW_COMPAT_RHEL7_2 \ ++ { \ ++ .driver = "spapr-vlan", \ ++ .property = "use-rx-buffer-pools", \ ++ .value = "off", \ ++ },{ \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\ ++ .property = "ddw",\ ++ .value = stringify(off),\ ++ }, ++ ++ ++static void spapr_machine_rhel720_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel730_instance_options(machine); ++} ++ ++static void spapr_machine_rhel720_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->use_ohci_by_default = true; ++ mc->has_hotpluggable_cpus = NULL; ++ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_2); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); + + static void spapr_machine_register_types(void) + { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index fb29eec..a081b01 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -21,6 +21,7 @@ + #include "sysemu/numa.h" + #include "sysemu/hw_accel.h" + #include "qemu/error-report.h" ++#include "cpu-models.h" + + static void spapr_cpu_reset(void *opaque) + { +@@ -212,6 +213,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, + { + CPUPPCState *env = &cpu->env; + Error *local_err = NULL; ++ sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); + if (local_err) { +@@ -224,6 +226,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, + cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); + kvmppc_set_papr(cpu); + ++ if (!smc->has_power9_support && ++ (((spapr->max_compat_pvr && ++ ppc_compat_cmp(spapr->max_compat_pvr, ++ CPU_POWERPC_LOGICAL_3_00) >= 0)) || ++ (!spapr->max_compat_pvr && ++ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { ++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, ++ "POWER9 CPU is not supported by this machine class"); ++ return; ++ } ++ + qemu_register_reset(spapr_cpu_reset, cpu); + spapr_cpu_reset(cpu); + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 7983185..0f135c9 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -649,7 +649,7 @@ bool css_migration_enabled(void) + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ +- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ ++ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = 1; \ +@@ -676,6 +676,8 @@ bool css_migration_enabled(void) + #define CCW_COMPAT_2_12 \ + HW_COMPAT_2_12 + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ ++ + #define CCW_COMPAT_2_11 \ + HW_COMPAT_2_11 \ + {\ +@@ -898,6 +900,48 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + } + DEFINE_CCW_MACHINE(2_4, "2.4", false); + ++#else ++ ++/* ++ * like CCW_COMPAT_2_11, but includes HW_COMPAT_RHEL7_5 (derived from ++ * HW_COMPAT_2_11 and HW_COMPAT_2_10) instead of HW_COMPAT_2_11 ++ */ ++#define CCW_COMPAT_RHEL7_5 \ ++ HW_COMPAT_RHEL7_5 \ ++ {\ ++ .driver = TYPE_SCLP_EVENT_FACILITY,\ ++ .property = "allow_all_mask_sizes",\ ++ .value = "off",\ ++ }, ++ ++static void ccw_machine_rhel760_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel760_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", true); ++ ++static void ccw_machine_rhel750_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; ++ ccw_machine_rhel760_instance_options(machine); ++ ++ /* before 2.12 we emulated the very first z900, and RHEL 7.5 is ++ based on 2.10 */ ++ s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); ++} ++ ++static void ccw_machine_rhel750_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel760_class_options(mc); ++ SET_MACHINE_COMPAT(mc, CCW_COMPAT_RHEL7_5); ++} ++DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); ++ ++#endif ++ + static void ccw_machine_register_types(void) + { + type_register_static(&ccw_machine_info); +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index a27e54b..144e6e9 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -775,6 +775,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type2.product, product); + SMBIOS_SET_DEFAULT(type2.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 6190b6f..ad2ad2d 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -268,7 +268,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, +diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c +index 6f1f723..68c353f 100644 +--- a/hw/timer/mc146818rtc.c ++++ b/hw/timer/mc146818rtc.c +@@ -34,6 +34,7 @@ + #include "qapi/qapi-commands-misc.h" + #include "qapi/qapi-events-misc.h" + #include "qapi/visitor.h" ++#include "migration/migration.h" + + #ifdef TARGET_I386 + #include "hw/i386/apic.h" +@@ -839,6 +840,11 @@ static int rtc_post_load(void *opaque, int version_id) + static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) + { + RTCState *s = (RTCState *)opaque; ++ ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->irq_reinject_on_ack_count != 0; + } + +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 836b11f..9d7b9df 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1214,12 +1214,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + UHCIState *s = UHCI(dev); + uint8_t *pci_conf = s->dev.config; + int i; ++ int irq_pin; + + pci_conf[PCI_CLASS_PROG] = 0x00; + /* TODO: reset value should be 0. */ + pci_conf[USB_SBRN] = USB_RELEASE_1; // release number + +- pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); ++ irq_pin = u->info.irq_pin; ++ pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); + + if (s->masterbus) { + USBPort *ports[NB_PORTS]; +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index 8f1a01a..ca19474 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3560,9 +3560,27 @@ static const VMStateDescription vmstate_xhci_slot = { + } + }; + ++static int xhci_event_pre_save(void *opaque) ++{ ++ XHCIEvent *s = opaque; ++ ++ s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; ++ s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; ++ ++ return 0; ++} ++ ++bool migrate_cve_2014_5263_xhci_fields; ++ ++static bool xhci_event_cve_2014_5263(void *opaque, int version_id) ++{ ++ return migrate_cve_2014_5263_xhci_fields; ++} ++ + static const VMStateDescription vmstate_xhci_event = { + .name = "xhci-event", + .version_id = 1, ++ .pre_save = xhci_event_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(type, XHCIEvent), + VMSTATE_UINT32(ccode, XHCIEvent), +@@ -3571,6 +3589,8 @@ static const VMStateDescription vmstate_xhci_event = { + VMSTATE_UINT32(flags, XHCIEvent), + VMSTATE_UINT8(slotid, XHCIEvent), + VMSTATE_UINT8(epid, XHCIEvent), ++ VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), ++ VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), + VMSTATE_END_OF_LIST() + } + }; +diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h +index fc36a4c..89d4cf7 100644 +--- a/hw/usb/hcd-xhci.h ++++ b/hw/usb/hcd-xhci.h +@@ -153,6 +153,8 @@ typedef struct XHCIEvent { + uint32_t flags; + uint8_t slotid; + uint8_t epid; ++ uint8_t cve_2014_5263_a; ++ uint8_t cve_2014_5263_b; + } XHCIEvent; + + typedef struct XHCIInterrupter { +diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h +index 59aeb06..7b5cc25 100644 +--- a/include/hw/acpi/ich9.h ++++ b/include/hw/acpi/ich9.h +@@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { + uint8_t smm_enabled; + bool enable_tco; + TCOIORegs tco_regs; ++ ++ /* RH addition, see bz 1489800 */ ++ bool force_rev1_fadt; + } ICH9LPCPMRegs; + + #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 9a870cc..2293315 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -128,6 +128,7 @@ typedef struct { + + #define VIRT_ECAM_ID(high) (high ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM) + ++#if 0 /* disabled for Red Hat Enterprise Linux */ + #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + #define VIRT_MACHINE(obj) \ + OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) +@@ -136,6 +137,27 @@ typedef struct { + #define VIRT_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) + ++#else ++#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") ++#define VIRT_MACHINE(obj) \ ++ OBJECT_CHECK(VirtMachineState, (obj), TYPE_RHEL_MACHINE) ++#define VIRT_MACHINE_GET_CLASS(obj) \ ++ OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_RHEL_MACHINE) ++#define VIRT_MACHINE_CLASS(klass) \ ++ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_RHEL_MACHINE) ++#endif ++ ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++#define ARM_RHEL_COMPAT \ ++ {\ ++ .driver = "virtio-net-pci",\ ++ .property = "romfile",\ ++ .value = "",\ ++ }, ++ + void virt_acpi_setup(VirtMachineState *vms); + + /* Return the number of used redistributor regions */ +diff --git a/include/hw/compat.h b/include/hw/compat.h +index c08f404..22262c7 100644 +--- a/include/hw/compat.h ++++ b/include/hw/compat.h +@@ -282,4 +282,233 @@ + .value = "on",\ + }, + ++/* Mostly like HW_COMPAT_2_1 but: ++ * we don't need virtio-scsi-pci since 7.0 already had that on ++ * ++ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * machine type, but was accidentally turned off in 7.2 onwards. ++ * ++ */ ++#define HW_COMPAT_RHEL7_1 \ ++ { /* COMPAT_RHEL7.1 */ \ ++ .driver = "intel-hda-generic",\ ++ .property = "old_msi_addr",\ ++ .value = "on",\ ++ },{\ ++ .driver = "VGA",\ ++ .property = "qemu-extended-regs",\ ++ .value = "off",\ ++ },{\ ++ .driver = "secondary-vga",\ ++ .property = "qemu-extended-regs",\ ++ .value = "off",\ ++ },{\ ++ .driver = "usb-mouse",\ ++ .property = "usb_version",\ ++ .value = stringify(1),\ ++ },{\ ++ .driver = "usb-kbd",\ ++ .property = "usb_version",\ ++ .value = stringify(1),\ ++ },{\ ++ .driver = "virtio-pci",\ ++ .property = "virtio-pci-bus-master-bug-migration",\ ++ .value = "on",\ ++ },{\ ++ .driver = "virtio-blk-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-balloon-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-serial-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-9p-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-rng-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ \ ++ .driver = "migration",\ ++ .property = "send-configuration",\ ++ .value = "off",\ ++ }, ++ ++/* Mostly like HW_COMPAT_2_4 + 2_3 but: ++ * we don't need "any_layout" as it has been backported to 7.2 ++ */ ++ ++#define HW_COMPAT_RHEL7_2 \ ++ {\ ++ .driver = "virtio-blk-device",\ ++ .property = "scsi",\ ++ .value = "true",\ ++ },{\ ++ .driver = "e1000-82540em",\ ++ .property = "extra_mac_registers",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-pci",\ ++ .property = "x-disable-pcie",\ ++ .value = "on",\ ++ },{\ ++ .driver = "virtio-pci",\ ++ .property = "migrate-extra",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "fw_cfg_mem",\ ++ .property = "dma_enabled",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "fw_cfg_io",\ ++ .property = "dma_enabled",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "isa-fdc",\ ++ .property = "fallback",\ ++ .value = "144",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "virtio-pci",\ ++ .property = "disable-modern",\ ++ .value = "on",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "virtio-pci",\ ++ .property = "disable-legacy",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = TYPE_PCI_DEVICE,\ ++ .property = "x-pcie-lnksta-dllla",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "virtio-pci",\ ++ .property = "page-per-vq",\ ++ .value = "on",\ ++ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ \ ++ .driver = "migration",\ ++ .property = "send-section-footer",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ \ ++ .driver = "migration",\ ++ .property = "store-global-state",\ ++ .value = "off",\ ++ }, ++ ++/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except ++ * disable-modern, disable-legacy, page-per-vq have already been ++ * backported to RHEL7.3 ++ */ ++#define HW_COMPAT_RHEL7_3 \ ++ { /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-mmio",\ ++ .property = "format_transport_address",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-serial-device",\ ++ .property = "emergency-write",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "ioapic",\ ++ .property = "version",\ ++ .value = "0x11",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "intel-iommu",\ ++ .property = "x-buggy-eim",\ ++ .value = "true",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-pci",\ ++ .property = "x-ignore-backend-features",\ ++ .value = "on",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "fw_cfg_mem",\ ++ .property = "x-file-slots",\ ++ .value = stringify(0x10),\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "fw_cfg_io",\ ++ .property = "x-file-slots",\ ++ .value = stringify(0x10),\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "pflash_cfi01",\ ++ .property = "old-multiple-chip-handling",\ ++ .value = "on",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = TYPE_PCI_DEVICE,\ ++ .property = "x-pcie-extcap-init",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-pci",\ ++ .property = "x-pcie-deverr-init",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-pci",\ ++ .property = "x-pcie-lnkctl-init",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-pci",\ ++ .property = "x-pcie-pm-init",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-net-device",\ ++ .property = "x-mtu-bypass-backend",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "e1000e",\ ++ .property = "__redhat_e1000e_7_3_intr_state",\ ++ .value = "on",\ ++ }, ++ ++/* Mostly like HW_COMPAT_2_9 except ++ * x-mtu-bypass-backend, x-migrate-msix has already been ++ * backported to RHEL7.4. shpc was already on in 7.4. ++ */ ++#define HW_COMPAT_RHEL7_4 \ ++ { /* HW_COMPAT_RHEL7_4 */ \ ++ .driver = "intel-iommu",\ ++ .property = "pt",\ ++ .value = "off",\ ++ }, ++ ++/* The same as HW_COMPAT_2_11 + HW_COMPAT_2_10 */ ++#define HW_COMPAT_RHEL7_5 \ ++ { /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ ++ .driver = "hpet",\ ++ .property = "hpet-offset-saved",\ ++ .value = "false",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ ++ .driver = "virtio-blk-pci",\ ++ .property = "vectors",\ ++ .value = "2",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ ++ .driver = "vhost-user-blk-pci",\ ++ .property = "vectors",\ ++ .value = "2",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 but \ ++ bz 1608778 modified for our naming */ \ ++ .driver = "e1000-82540em",\ ++ .property = "migrate_tso_props",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ \ ++ .driver = "virtio-mouse-device",\ ++ .property = "wheel-axis",\ ++ .value = "false",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ \ ++ .driver = "virtio-tablet-device",\ ++ .property = "wheel-axis",\ ++ .value = "false",\ ++ },{ /* HW_COMPAT_RHEL7_5 */ \ ++ .driver = "cirrus-vga",\ ++ .property = "vgamem_mb",\ ++ .value = "16",\ ++ },{ /* HW_COMPAT_RHEL7_5 */ \ ++ .driver = "migration",\ ++ .property = "decompress-error-check",\ ++ .value = "off",\ ++ }, ++ ++ + #endif /* HW_COMPAT_H */ +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 6894f37..ef82513 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -134,6 +134,9 @@ struct PCMachineClass { + + /* use DMA capable linuxboot option rom */ + bool linuxboot_dma_enabled; ++ ++ /* RH only, see bz 1489800 */ ++ bool pc_rom_ro; + }; + + #define TYPE_PC_MACHINE "generic-pc-machine" +@@ -960,4 +963,565 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); + type_init(pc_machine_init_##suffix) + + extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); ++ ++/* See include/hw/compat.h for shared compatibility lists */ ++ ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++#define PC_RHEL_COMPAT \ ++ { /* PC_RHEL_COMPAT */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "host-phys-bits",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL_COMPAT bz 1508330 */ \ ++ .driver = "vfio-pci",\ ++ .property = "x-no-geforce-quirks",\ ++ .value = "on",\ ++ }, ++ ++/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: ++ * - x-hv-max-vps was backported to 7.5 ++ * - x-pci-hole64-fix was backported to 7.5 ++ */ ++#define PC_RHEL7_5_COMPAT \ ++ HW_COMPAT_RHEL7_5 \ ++ { /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_11 */ \ ++ .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ ++ .property = "clflushopt",\ ++ .value = "off",\ ++ }, ++ ++ ++#define PC_RHEL7_4_COMPAT \ ++ HW_COMPAT_RHEL7_4 \ ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_9 */ \ ++ .driver = "mch",\ ++ .property = "extended-tseg-mbytes",\ ++ .value = stringify(0),\ ++ },\ ++ { /* PC_RHEL7_4_COMPAT bz 1489800 */ \ ++ .driver = "ICH9-LPC",\ ++ .property = "__com.redhat_force-rev1-fadt",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ ++ .driver = "i440FX-pcihost",\ ++ .property = "x-pci-hole64-fix",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ ++ .driver = "q35-pcihost",\ ++ .property = "x-pci-hole64-fix",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "x-hv-max-vps",\ ++ .value = "0x40",\ ++ }, ++ ++#define PC_RHEL7_3_COMPAT \ ++ HW_COMPAT_RHEL7_3 \ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ ++ .driver = "kvmclock",\ ++ .property = "x-mach-use-reliable-get-clock",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "l3-cache",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "full-cpuid-auto-level",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "family",\ ++ .value = "15",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "model",\ ++ .value = "6",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "stepping",\ ++ .value = "1",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = "isa-pcspk",\ ++ .property = "migrate",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_6 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "cpuid-0xb",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ ++ .driver = "ICH9-LPC",\ ++ .property = "x-smi-broadcast",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "vmware-cpuid-freq",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ ++ .driver = "Haswell-" TYPE_X86_CPU,\ ++ .property = "stepping",\ ++ .value = "1",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_3 added in 2.9 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "kvm-no-smi-migration",\ ++ .value = "on",\ ++ }, ++ ++#define PC_RHEL7_2_COMPAT \ ++ HW_COMPAT_RHEL7_2 \ ++ {\ ++ .driver = "phenom" "-" TYPE_X86_CPU,\ ++ .property = "rdtscp",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "qemu64" "-" TYPE_X86_CPU,\ ++ .property = "sse4a",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "qemu64" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "Haswell-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "Haswell-noTSX-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "Broadwell-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "Broadwell-noTSX-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "host" "-" TYPE_X86_CPU,\ ++ .property = "host-cache-info",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "check",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "qemu32" "-" TYPE_X86_CPU,\ ++ .property = "popcnt",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "arat",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "usb-redir",\ ++ .property = "streams",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "fill-mtrr-mask",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "apic-common",\ ++ .property = "legacy-instance-id",\ ++ .value = "on",\ ++ }, ++ ++ ++ ++#define PC_RHEL7_1_COMPAT \ ++ HW_COMPAT_RHEL7_1 \ ++ {\ ++ .driver = "kvm64" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "kvm32" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Conroe" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Penryn" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Nehalem" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Westmere" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "SandyBridge" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "SandyBridge-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Haswell" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Broadwell" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G1" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Haswell" "-" TYPE_X86_CPU,\ ++ .property = "f16c",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "f16c",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Haswell" "-" TYPE_X86_CPU,\ ++ .property = "rdrand",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "rdrand",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Broadwell" "-" TYPE_X86_CPU,\ ++ .property = "f16c",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "f16c",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Broadwell" "-" TYPE_X86_CPU,\ ++ .property = "rdrand",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "rdrand",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "coreduo" "-" TYPE_X86_CPU,\ ++ .property = "vmx",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "core2duo" "-" TYPE_X86_CPU,\ ++ .property = "vmx",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "qemu64" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(4),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "kvm64" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(5),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "pentium3" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(2),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "n270" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(5),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Conroe" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(4),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Penryn" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(4),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Nehalem" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(4),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "n270" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Penryn" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Conroe" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Nehalem" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Westmere" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "SandyBridge" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "IvyBridge" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Haswell" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Haswell-noTSX" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Broadwell" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ }, ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ ++/* ++ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* ++ * between our base and 1.5, less stuff backported to RHEL-7.0 ++ * (usb-device.msos-desc), less stuff for devices we changed ++ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, ++ * pci-serial-4x) in 7.0. ++ */ ++#define PC_RHEL7_0_COMPAT \ ++ {\ ++ .driver = "virtio-scsi-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "PIIX4_PM",\ ++ .property = "memory-hotplug-support",\ ++ .value = "off",\ ++ },{\ ++ .driver = "apic",\ ++ .property = "version",\ ++ .value = stringify(0x11),\ ++ },{\ ++ .driver = "nec-usb-xhci",\ ++ .property = "superspeed-ports-first",\ ++ .value = "off",\ ++ },{\ ++ .driver = "nec-usb-xhci",\ ++ .property = "force-pcie-endcap",\ ++ .value = "on",\ ++ },{\ ++ .driver = "pci-serial",\ ++ .property = "prog_if",\ ++ .value = stringify(0),\ ++ },{\ ++ .driver = "virtio-net-pci",\ ++ .property = "guest_announce",\ ++ .value = "off",\ ++ },{\ ++ .driver = "ICH9-LPC",\ ++ .property = "memory-hotplug-support",\ ++ .value = "off",\ ++ },{\ ++ .driver = "xio3130-downstream",\ ++ .property = COMPAT_PROP_PCP,\ ++ .value = "off",\ ++ },{\ ++ .driver = "ioh3420",\ ++ .property = COMPAT_PROP_PCP,\ ++ .value = "off",\ ++ },{\ ++ .driver = "PIIX4_PM",\ ++ .property = "acpi-pci-hotplug-with-bridge-support",\ ++ .value = "off",\ ++ },{\ ++ .driver = "e1000",\ ++ .property = "mitigation",\ ++ .value = "off",\ ++ },{ \ ++ .driver = "virtio-net-pci", \ ++ .property = "ctrl_guest_offloads", \ ++ .value = "off", \ ++ },\ ++ {\ ++ .driver = "Conroe" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Penryn" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Nehalem" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ \ ++ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Westmere" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ \ ++ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G1" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ }, + #endif +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index 7e5de1a..330c370 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -101,6 +101,7 @@ struct sPAPRMachineClass { + bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ + bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ + bool pre_2_10_has_unused_icps; ++ bool has_power9_support; + void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, +diff --git a/include/hw/usb.h b/include/hw/usb.h +index a5080ad..b943ec9 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -606,4 +606,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, + uint8_t interface_class, uint8_t interface_subclass, + uint8_t interface_protocol); + ++ ++/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ ++extern bool migrate_cve_2014_5263_xhci_fields; ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index b7d9854..381039c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -106,6 +106,8 @@ enum mig_rp_message_type { + MIG_RP_MSG_MAX + }; + ++bool migrate_pre_2_2; ++ + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +diff --git a/migration/migration.h b/migration/migration.h +index 64a7b33..405d984 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -288,6 +288,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); + + void dirty_bitmap_mig_before_vm_start(void); + void init_dirty_bitmap_incoming_migration(void); ++/* ++ * Disables a load of subsections that were added in 2.2/rh7.2 for backwards ++ * migration compatibility. ++ */ ++extern bool migrate_pre_2_2; + + #define qemu_ram_foreach_block \ + #warning "Use qemu_ram_foreach_block_migratable in migration code" +diff --git a/qdev-monitor.c b/qdev-monitor.c +index 61e0300..f439b83 100644 +--- a/qdev-monitor.c ++++ b/qdev-monitor.c +@@ -47,7 +47,6 @@ typedef struct QDevAlias + + /* Please keep this table sorted by typename. */ + static const QDevAlias qdev_alias_table[] = { +- { "e1000", "e1000-82540em" }, + { "ich9-ahci", "ahci" }, + { "lsi53c895a", "lsi" }, + { "virtio-9p-ccw", "virtio-9p", QEMU_ARCH_S390X }, +diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py +index d346728..4bca2bf 100755 +--- a/scripts/vmstate-static-checker.py ++++ b/scripts/vmstate-static-checker.py +@@ -105,7 +105,6 @@ def get_changed_sec_name(sec): + # Section names can change -- see commit 292b1634 for an example. + changes = { + "ICH9 LPC": "ICH9-LPC", +- "e1000-82540em": "e1000", + } + + for item in changes: +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 338ee37..051018a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1360,11 +1360,17 @@ static CPUCaches epyc_cache_info = { + + static X86CPUDefinition builtin_x86_defs[] = { + { ++ /* qemu64 is the default CPU model for all *-rhel7.* machine-types. ++ * The default on RHEL-6 was cpu64-rhel6. ++ * libvirt assumes that qemu64 is the default for _all_ machine-types, ++ * so we should try to keep qemu64 and cpu64-rhel6 as similar as ++ * possible. ++ */ + .name = "qemu64", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 6, +- .model = 6, ++ .model = 13, + .stepping = 3, + .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | + CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | +@@ -2684,6 +2690,7 @@ static PropValue kvm_default_props[] = { + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, ++ { "kvm-pv-unhalt", "on" }, + { NULL, NULL }, + }; + +diff --git a/target/i386/machine.c b/target/i386/machine.c +index 084c2c7..0c57c26 100644 +--- a/target/i386/machine.c ++++ b/target/i386/machine.c +@@ -955,6 +955,26 @@ static const VMStateDescription vmstate_svm_npt = { + } + }; + ++static bool vmstate_xsave_needed(void *opaque) ++{ ++ /* The xsave state is already on the main "cpu" section */ ++ return false; ++} ++ ++static const VMStateDescription vmstate_xsave ={ ++ .name = "cpu/xsave", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .minimum_version_id_old = 1, ++ .needed = vmstate_xsave_needed, ++ .fields = (VMStateField []) { ++ VMSTATE_UINT64_V(env.xcr0, X86CPU, 1), ++ VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 1), ++ VMSTATE_YMMH_REGS_VARS(env.xmm_regs, X86CPU, CPU_NB_REGS, 1), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + VMStateDescription vmstate_x86_cpu = { + .name = "cpu", + .version_id = 12, +@@ -1080,6 +1100,7 @@ VMStateDescription vmstate_x86_cpu = { + &vmstate_msr_intel_pt, + &vmstate_msr_virt_ssbd, + &vmstate_svm_npt, ++ &vmstate_xsave, + NULL + } + }; +diff --git a/target/ppc/compat.c b/target/ppc/compat.c +index 7de4bf3..3e2e353 100644 +--- a/target/ppc/compat.c ++++ b/target/ppc/compat.c +@@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) + return NULL; + } + ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) ++{ ++ const CompatInfo *compat1 = compat_by_pvr(pvr1); ++ const CompatInfo *compat2 = compat_by_pvr(pvr2); ++ ++ g_assert(compat1); ++ g_assert(compat2); ++ ++ return compat1 - compat2; ++} ++ + static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, +- uint32_t min_compat_pvr, uint32_t max_compat_pvr) ++ uint32_t min_compat_pvr, uint32_t max_compat_pvr) + { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 4edcf62..532f0d5 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1365,6 +1365,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) + + /* Compatibility modes */ + #if defined(TARGET_PPC64) ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); + bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, + uint32_t min_compat_pvr, uint32_t max_compat_pvr); + bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +diff --git a/tests/Makefile.include b/tests/Makefile.include +index a492827..6016df2 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -184,8 +184,8 @@ gcov-files-generic-y = qdev-monitor.c qmp.c + check-qtest-generic-y += tests/cdrom-test$(EXESUF) + + gcov-files-ipack-y += hw/ipack/ipack.c +-check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF) +-gcov-files-ipack-y += hw/char/ipoctal232.c ++#check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF) ++#gcov-files-ipack-y += hw/char/ipoctal232.c + + check-qtest-virtioserial-y += tests/virtio-console-test$(EXESUF) + gcov-files-virtioserial-y += hw/char/virtio-console.c +@@ -217,23 +217,23 @@ check-qtest-pci-y += tests/e1000e-test$(EXESUF) + gcov-files-pci-y += hw/net/e1000e.c hw/net/e1000e_core.c + check-qtest-pci-y += tests/rtl8139-test$(EXESUF) + gcov-files-pci-y += hw/net/rtl8139.c +-check-qtest-pci-y += tests/pcnet-test$(EXESUF) +-gcov-files-pci-y += hw/net/pcnet.c +-gcov-files-pci-y += hw/net/pcnet-pci.c +-check-qtest-pci-y += tests/eepro100-test$(EXESUF) +-gcov-files-pci-y += hw/net/eepro100.c +-check-qtest-pci-y += tests/ne2000-test$(EXESUF) +-gcov-files-pci-y += hw/net/ne2000.c +-check-qtest-pci-y += tests/nvme-test$(EXESUF) +-gcov-files-pci-y += hw/block/nvme.c ++#check-qtest-pci-y += tests/pcnet-test$(EXESUF) ++#gcov-files-pci-y += hw/net/pcnet.c ++#gcov-files-pci-y += hw/net/pcnet-pci.c ++#check-qtest-pci-y += tests/eepro100-test$(EXESUF) ++#gcov-files-pci-y += hw/net/eepro100.c ++#check-qtest-pci-y += tests/ne2000-test$(EXESUF) ++#gcov-files-pci-y += hw/net/ne2000.c ++#check-qtest-pci-y += tests/nvme-test$(EXESUF) ++#gcov-files-pci-y += hw/block/nvme.c + check-qtest-pci-y += tests/ac97-test$(EXESUF) + gcov-files-pci-y += hw/audio/ac97.c +-check-qtest-pci-y += tests/es1370-test$(EXESUF) +-gcov-files-pci-y += hw/audio/es1370.c ++#check-qtest-pci-y += tests/es1370-test$(EXESUF) ++#gcov-files-pci-y += hw/audio/es1370.c + check-qtest-pci-y += $(check-qtest-virtio-y) + gcov-files-pci-y += $(gcov-files-virtio-y) hw/virtio/virtio-pci.c +-check-qtest-pci-y += tests/tpci200-test$(EXESUF) +-gcov-files-pci-y += hw/ipack/tpci200.c ++#check-qtest-pci-y += tests/tpci200-test$(EXESUF) ++#gcov-files-pci-y += hw/ipack/tpci200.c + check-qtest-pci-y += $(check-qtest-ipack-y) + gcov-files-pci-y += $(gcov-files-ipack-y) + check-qtest-pci-y += tests/display-vga-test$(EXESUF) +@@ -245,25 +245,25 @@ gcov-files-pci-y += hw/display/virtio-gpu-pci.c + gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c + check-qtest-pci-y += tests/intel-hda-test$(EXESUF) + gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c +-check-qtest-pci-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) +-gcov-files-pci-y += hw/misc/ivshmem.c +-check-qtest-pci-y += tests/megasas-test$(EXESUF) +-gcov-files-pci-y += hw/scsi/megasas.c ++#check-qtest-pci-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) ++#gcov-files-pci-y += hw/misc/ivshmem.c ++#check-qtest-pci-y += tests/megasas-test$(EXESUF) ++#gcov-files-pci-y += hw/scsi/megasas.c + + check-qtest-i386-y = tests/endianness-test$(EXESUF) +-check-qtest-i386-y += tests/fdc-test$(EXESUF) +-gcov-files-i386-y = hw/block/fdc.c ++#check-qtest-i386-y += tests/fdc-test$(EXESUF) ++#gcov-files-i386-y = hw/block/fdc.c + check-qtest-i386-y += tests/ide-test$(EXESUF) + check-qtest-i386-y += tests/ahci-test$(EXESUF) + check-qtest-i386-y += tests/hd-geo-test$(EXESUF) + gcov-files-i386-y += hw/block/hd-geometry.c + check-qtest-i386-y += tests/boot-order-test$(EXESUF) +-check-qtest-i386-y += tests/bios-tables-test$(EXESUF) ++#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) + check-qtest-i386-y += tests/boot-serial-test$(EXESUF) + check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) + check-qtest-i386-y += tests/rtc-test$(EXESUF) +-check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) +-check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) ++#check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) ++#check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) + check-qtest-i386-y += tests/i440fx-test$(EXESUF) + check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) + check-qtest-i386-y += tests/drive_del-test$(EXESUF) +@@ -272,8 +272,8 @@ check-qtest-i386-y += tests/tco-test$(EXESUF) + gcov-files-i386-y += hw/watchdog/watchdog.c hw/watchdog/wdt_ib700.c + check-qtest-i386-y += $(check-qtest-pci-y) + gcov-files-i386-y += $(gcov-files-pci-y) +-check-qtest-i386-y += tests/vmxnet3-test$(EXESUF) +-gcov-files-i386-y += hw/net/vmxnet3.c ++#check-qtest-i386-y += tests/vmxnet3-test$(EXESUF) ++#gcov-files-i386-y += hw/net/vmxnet3.c + gcov-files-i386-y += hw/net/net_rx_pkt.c + gcov-files-i386-y += hw/net/net_tx_pkt.c + check-qtest-i386-y += tests/pvpanic-test$(EXESUF) +@@ -282,8 +282,8 @@ check-qtest-i386-y += tests/i82801b11-test$(EXESUF) + gcov-files-i386-y += hw/pci-bridge/i82801b11.c + check-qtest-i386-y += tests/ioh3420-test$(EXESUF) + gcov-files-i386-y += hw/pci-bridge/ioh3420.c +-check-qtest-i386-y += tests/usb-hcd-ohci-test$(EXESUF) +-gcov-files-i386-y += hw/usb/hcd-ohci.c ++#check-qtest-i386-y += tests/usb-hcd-ohci-test$(EXESUF) ++#gcov-files-i386-y += hw/usb/hcd-ohci.c + check-qtest-i386-y += tests/usb-hcd-uhci-test$(EXESUF) + gcov-files-i386-y += hw/usb/hcd-uhci.c + check-qtest-i386-y += tests/usb-hcd-ehci-test$(EXESUF) +@@ -311,7 +311,7 @@ check-qtest-i386-y += tests/migration-test$(EXESUF) + check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF) + check-qtest-i386-y += tests/numa-test$(EXESUF) + check-qtest-x86_64-y += $(check-qtest-i386-y) +-check-qtest-x86_64-y += tests/sdhci-test$(EXESUF) ++#check-qtest-x86_64-y += tests/sdhci-test$(EXESUF) + gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c + gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) + +@@ -332,34 +332,34 @@ check-qtest-mips64el-y = tests/endianness-test$(EXESUF) + check-qtest-moxie-y = tests/boot-serial-test$(EXESUF) + + check-qtest-ppc-y = tests/endianness-test$(EXESUF) +-check-qtest-ppc-y += tests/boot-order-test$(EXESUF) ++#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) + check-qtest-ppc-y += tests/prom-env-test$(EXESUF) + check-qtest-ppc-y += tests/drive_del-test$(EXESUF) + check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) +-check-qtest-ppc-y += tests/m48t59-test$(EXESUF) +-gcov-files-ppc-y += hw/timer/m48t59.c ++#check-qtest-ppc-y += tests/m48t59-test$(EXESUF) ++#gcov-files-ppc-y += hw/timer/m48t59.c + + check-qtest-ppc64-y = $(check-qtest-ppc-y) + gcov-files-ppc64-y = $(subst ppc-softmmu/,ppc64-softmmu/,$(gcov-files-ppc-y)) + check-qtest-ppc64-y += tests/spapr-phb-test$(EXESUF) + gcov-files-ppc64-y += ppc64-softmmu/hw/ppc/spapr_pci.c +-check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) ++#check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) + check-qtest-ppc64-y += tests/migration-test$(EXESUF) + check-qtest-ppc64-y += tests/rtas-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) +-check-qtest-ppc64-y += tests/usb-hcd-ohci-test$(EXESUF) +-gcov-files-ppc64-y += hw/usb/hcd-ohci.c +-check-qtest-ppc64-y += tests/usb-hcd-uhci-test$(EXESUF) +-gcov-files-ppc64-y += hw/usb/hcd-uhci.c ++#check-qtest-ppc64-y += tests/usb-hcd-ohci-test$(EXESUF) ++#gcov-files-ppc64-y += hw/usb/hcd-ohci.c ++#check-qtest-ppc64-y += tests/usb-hcd-uhci-test$(EXESUF) ++#gcov-files-ppc64-y += hw/usb/hcd-uhci.c + check-qtest-ppc64-y += tests/usb-hcd-xhci-test$(EXESUF) + gcov-files-ppc64-y += hw/usb/hcd-xhci.c + check-qtest-ppc64-y += $(check-qtest-virtio-y) +-check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) +-check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) +-check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) + check-qtest-ppc64-y += tests/display-vga-test$(EXESUF) + check-qtest-ppc64-y += tests/numa-test$(EXESUF) +-check-qtest-ppc64-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) + check-qtest-ppc64-y += tests/cpu-plug-test$(EXESUF) + + check-qtest-sh4-y = tests/endianness-test$(EXESUF) +@@ -388,7 +388,7 @@ check-qtest-arm-y += tests/boot-serial-test$(EXESUF) + check-qtest-arm-y += tests/sdhci-test$(EXESUF) + + check-qtest-aarch64-y = tests/numa-test$(EXESUF) +-check-qtest-aarch64-y += tests/sdhci-test$(EXESUF) ++#check-qtest-aarch64-y += tests/sdhci-test$(EXESUF) + check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF) + + check-qtest-microblazeel-y = $(check-qtest-microblaze-y) +@@ -777,15 +777,15 @@ tests/endianness-test$(EXESUF): tests/endianness-test.o + tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y) + tests/prom-env-test$(EXESUF): tests/prom-env-test.o $(libqos-obj-y) + tests/rtas-test$(EXESUF): tests/rtas-test.o $(libqos-spapr-obj-y) +-tests/fdc-test$(EXESUF): tests/fdc-test.o ++#tests/fdc-test$(EXESUF): tests/fdc-test.o + tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y) + tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) +-tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o +-tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o ++#tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o ++#tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o + tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o + tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) + tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y) +-tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ ++#tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ + tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y) + tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) + tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) +@@ -798,11 +798,11 @@ tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y) + tests/e1000-test$(EXESUF): tests/e1000-test.o + tests/e1000e-test$(EXESUF): tests/e1000e-test.o $(libqos-pc-obj-y) + tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y) +-tests/pcnet-test$(EXESUF): tests/pcnet-test.o +-tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o +-tests/eepro100-test$(EXESUF): tests/eepro100-test.o +-tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o +-tests/ne2000-test$(EXESUF): tests/ne2000-test.o ++#tests/pcnet-test$(EXESUF): tests/pcnet-test.o ++#tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o ++#tests/eepro100-test$(EXESUF): tests/eepro100-test.o ++#tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o ++#tests/ne2000-test$(EXESUF): tests/ne2000-test.o + tests/wdt_ib700-test$(EXESUF): tests/wdt_ib700-test.o + tests/tco-test$(EXESUF): tests/tco-test.o $(libqos-pc-obj-y) + tests/virtio-balloon-test$(EXESUF): tests/virtio-balloon-test.o $(libqos-virtio-obj-y) +@@ -813,22 +813,22 @@ tests/virtio-scsi-test$(EXESUF): tests/virtio-scsi-test.o $(libqos-virtio-obj-y) + tests/virtio-9p-test$(EXESUF): tests/virtio-9p-test.o $(libqos-virtio-obj-y) + tests/virtio-serial-test$(EXESUF): tests/virtio-serial-test.o $(libqos-virtio-obj-y) + tests/virtio-console-test$(EXESUF): tests/virtio-console-test.o $(libqos-virtio-obj-y) +-tests/tpci200-test$(EXESUF): tests/tpci200-test.o ++#tests/tpci200-test$(EXESUF): tests/tpci200-test.o + tests/display-vga-test$(EXESUF): tests/display-vga-test.o +-tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o ++#tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o + tests/qom-test$(EXESUF): tests/qom-test.o + tests/test-hmp$(EXESUF): tests/test-hmp.o + tests/machine-none-test$(EXESUF): tests/machine-none-test.o + tests/drive_del-test$(EXESUF): tests/drive_del-test.o $(libqos-virtio-obj-y) + tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y) +-tests/nvme-test$(EXESUF): tests/nvme-test.o ++#tests/nvme-test$(EXESUF): tests/nvme-test.o + tests/pvpanic-test$(EXESUF): tests/pvpanic-test.o + tests/i82801b11-test$(EXESUF): tests/i82801b11-test.o + tests/ac97-test$(EXESUF): tests/ac97-test.o +-tests/es1370-test$(EXESUF): tests/es1370-test.o ++#tests/es1370-test$(EXESUF): tests/es1370-test.o + tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o + tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o +-tests/usb-hcd-ohci-test$(EXESUF): tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y) ++#tests/usb-hcd-ohci-test$(EXESUF): tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y) + tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y) + tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y) + tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y) +@@ -841,19 +841,19 @@ tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_hel + tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) + tests/test-keyval$(EXESUF): tests/test-keyval.o $(test-util-obj-y) $(test-qapi-obj-y) + tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) +-tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) +-tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) +-tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) ++#tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) ++#tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) ++#tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) + tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y) +-tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) +-tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) ++#tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) ++#tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) + tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o $(test-util-obj-y) libvhost-user.a + tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) + tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o + tests/test-qapi-util$(EXESUF): tests/test-qapi-util.o $(test-util-obj-y) + tests/numa-test$(EXESUF): tests/numa-test.o + tests/vmgenid-test$(EXESUF): tests/vmgenid-test.o tests/boot-sector.o tests/acpi-utils.o +-tests/sdhci-test$(EXESUF): tests/sdhci-test.o $(libqos-pc-obj-y) ++#tests/sdhci-test$(EXESUF): tests/sdhci-test.o $(libqos-pc-obj-y) + tests/cdrom-test$(EXESUF): tests/cdrom-test.o tests/boot-sector.o $(libqos-obj-y) + + tests/migration/stress$(EXESUF): tests/migration/stress.o +diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c +index 952a2e7..5217a39 100644 +--- a/tests/boot-serial-test.c ++++ b/tests/boot-serial-test.c +@@ -80,17 +80,21 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "prep", "-boot e", "Booting from device e" }, + { "ppc64", "40p", "-m 192", "Memory size: 192 MB" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", "", "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv", "-cpu POWER8", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c +index 5f39ba0..48b8d09 100644 +--- a/tests/cpu-plug-test.c ++++ b/tests/cpu-plug-test.c +@@ -192,7 +192,8 @@ static void add_pseries_test_case(const char *mname) + PlugTestData *data; + + if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7) || ++ strcmp(mname,"pseries-rhel7.2.0") == 0) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/e1000-test.c b/tests/e1000-test.c +index 0c5fcdc..b830432 100644 +--- a/tests/e1000-test.c ++++ b/tests/e1000-test.c +@@ -29,8 +29,10 @@ static void test_device(gconstpointer data) + static const char *models[] = { + "e1000", + "e1000-82540em", ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + "e1000-82544gc", + "e1000-82545em", ++#endif + }; + + int main(int argc, char **argv) +diff --git a/tests/endianness-test.c b/tests/endianness-test.c +index 546e096..440353d 100644 +--- a/tests/endianness-test.c ++++ b/tests/endianness-test.c +@@ -37,10 +37,12 @@ static const TestCase test_cases[] = { + { "ppc", "g3beige", 0xfe000000, .bswap = true, .superio = "i82378" }, + { "ppc", "prep", 0x80000000, .bswap = true }, + { "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" }, ++#if 0 /* Disabled for RHEL, since ISA is not enabled */ + { "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" }, + { "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" }, + { "ppc64", "pseries-2.7", 0x10080000000ULL, + .bswap = true, .superio = "i82378" }, ++#endif /* Disabled for RHEL, since ISA is not enabled */ + { "sh4", "r2d", 0xfe240000, .superio = "i82378" }, + { "sh4eb", "r2d", 0xfe240000, .bswap = true, .superio = "i82378" }, + { "sparc64", "sun4u", 0x1fe02000000LL, .bswap = true }, +diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c +index 8c867e6..cc9b6ec 100644 +--- a/tests/prom-env-test.c ++++ b/tests/prom-env-test.c +@@ -82,7 +82,9 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); ++#endif + if (g_test_slow()) { + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); + } +diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 +index ee9c820..c5cc0ee 100755 +--- a/tests/qemu-iotests/051 ++++ b/tests/qemu-iotests/051 +@@ -183,11 +183,11 @@ run_qemu -drive if=virtio + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive if=none,id=disk -device ide-cd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive if=none,id=disk -device ide-drive,drive=disk + run_qemu -drive if=none,id=disk -device ide-hd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +@@ -212,11 +212,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-drive,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index b973dc8..f1059f6 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -77,7 +77,7 @@ + 068 rw auto quick + 069 rw auto quick + 070 rw auto quick +-071 rw auto quick ++# 071 rw auto quick -- requires whitelisted blkverify + 072 rw auto quick + 073 rw auto quick + 074 rw auto quick +@@ -105,7 +105,7 @@ + 096 rw auto quick + 097 rw auto backing + 098 rw auto backing quick +-099 rw auto quick ++# 099 rw auto quick -- requires whitelisted blkverify + # 100 was removed, do not reuse + 101 rw auto quick + 102 rw auto quick +diff --git a/tests/qom-test.c b/tests/qom-test.c +index e6f712c..ebd15fd 100644 +--- a/tests/qom-test.c ++++ b/tests/qom-test.c +@@ -16,7 +16,7 @@ + #include "libqtest.h" + + static const char *blacklist_x86[] = { +- "xenfv", "xenpv", NULL ++ "xenfv", "xenpv", "isapc", NULL + }; + + static const struct { +diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c +index 84ce9c7..c1ee197 100644 +--- a/tests/test-x86-cpuid-compat.c ++++ b/tests/test-x86-cpuid-compat.c +@@ -306,6 +306,7 @@ int main(int argc, char **argv) + "-cpu 486,xlevel2=0xC0000002,+xstore", + "xlevel2", 0xC0000002); + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* Check compatibility of old machine-types that didn't + * auto-increase level/xlevel/xlevel2: */ + +@@ -356,6 +357,7 @@ int main(int argc, char **argv) + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", + "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", + "xlevel", 0x80000008); ++#endif + + /* Test feature parsing */ + add_feature_test("x86/cpuid/features/plus", +diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c +index 5b1b681..85fa150 100644 +--- a/tests/usb-hcd-xhci-test.c ++++ b/tests/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug("xhci", 1, NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + qtest_qmp_device_add("usb-uas", "uas", NULL); +@@ -34,6 +35,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del("scsihd"); + qtest_qmp_device_del("uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -52,7 +54,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +-- +1.8.3.1 + diff --git a/0004-Use-kvm-by-default.patch b/0004-Use-kvm-by-default.patch new file mode 100644 index 0000000..f19a64e --- /dev/null +++ b/0004-Use-kvm-by-default.patch @@ -0,0 +1,32 @@ +From 5a441b820faa4e6e9e6fc80cccc813a3c333b6c2 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 18 Dec 2014 06:27:49 +0100 +Subject: Use kvm by default + +Bugzilla: 906185 + +RHEL uses kvm accelerator by default, if available. + +Signed-off-by: Miroslav Rezanina +--- + accel/accel.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/accel/accel.c b/accel/accel.c +index 966b2d8..e8ca7bb 100644 +--- a/accel/accel.c ++++ b/accel/accel.c +@@ -79,8 +79,8 @@ void configure_accelerator(MachineState *ms) + + accel = qemu_opt_get(qemu_get_machine_opts(), "accel"); + if (accel == NULL) { +- /* Use the default "accelerator", tcg */ +- accel = "tcg"; ++ /* RHEL uses kvm as the default accelerator, fallback to tcg */ ++ accel = "kvm:tcg"; + } + + accel_list = g_strsplit(accel, ":", 0); +-- +1.8.3.1 + diff --git a/0005-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0005-vfio-cap-number-of-devices-that-can-be-assigned.patch new file mode 100644 index 0000000..0d40ae5 --- /dev/null +++ b/0005-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -0,0 +1,65 @@ +From 0c57186334ab4ef7f04de604a8f13b39ad6578c8 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Tue, 3 Dec 2013 20:05:13 +0100 +Subject: vfio: cap number of devices that can be assigned + +RH-Author: Bandan Das +Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com> +Patchwork-id: 55984 +O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned +Bugzilla: 678368 +RH-Acked-by: Alex Williamson +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Michael S. Tsirkin + +Go through all groups to get count of total number of devices +active to enforce limit + +Reasoning from Alex for the limit(32) - Assuming 3 slots per +device, with 125 slots (number of memory slots for RHEL 7), +we can support almost 40 devices and still have few slots left +for other uses. Stepping down a bit, the number 32 arbitrarily +matches the number of slots on a PCI bus and is also a nice power +of two. + +Signed-off-by: Bandan Das +--- + hw/vfio/pci.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 6cbb8fa..59b3c0f 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -36,6 +36,7 @@ + #include "qapi/error.h" + + #define MSIX_CAP_LENGTH 12 ++#define MAX_DEV_ASSIGN_CMDLINE 32 + + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); +@@ -2809,7 +2810,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ssize_t len; + struct stat st; + int groupid; +- int i, ret; ++ int ret, i = 0; ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ i++; ++ } ++ } ++ ++ if (i >= MAX_DEV_ASSIGN_CMDLINE) { ++ error_setg(errp, "Maximum supported vfio devices (%d) " ++ "already attached", MAX_DEV_ASSIGN_CMDLINE); ++ return; ++ } + + if (!vdev->vbasedev.sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || +-- +1.8.3.1 + diff --git a/0006-Add-support-statement-to-help-output.patch b/0006-Add-support-statement-to-help-output.patch new file mode 100644 index 0000000..b6ecf11 --- /dev/null +++ b/0006-Add-support-statement-to-help-output.patch @@ -0,0 +1,55 @@ +From c2858d09461c6f69553e8b9d69804f243c2d08bb Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 4 Dec 2013 18:53:17 +0100 +Subject: Add support statement to -help output + +RH-Author: Eduardo Habkost +Message-id: <1386183197-27761-1-git-send-email-ehabkost@redhat.com> +Patchwork-id: 55994 +O-Subject: [qemu-kvm RHEL7 PATCH] Add support statement to -help output +Bugzilla: 972773 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: knoel@redhat.com +RH-Acked-by: Paolo Bonzini + +Add support statement to -help output, reporting direct qemu-kvm usage +as unsupported by Red Hat, and advising users to use libvirt instead. + +Signed-off-by: Eduardo Habkost +--- + vl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/vl.c b/vl.c +index 4f96203..43c4b78 100644 +--- a/vl.c ++++ b/vl.c +@@ -1876,9 +1876,17 @@ static void version(void) + QEMU_COPYRIGHT "\n"); + } + ++static void print_rh_warning(void) ++{ ++ printf("\nWARNING: Direct use of qemu-kvm from the command line is not supported by Red Hat.\n" ++ "WARNING: Use libvirt as the stable management interface.\n" ++ "WARNING: Some command line options listed here may not be available in future releases.\n\n"); ++} ++ + static void help(int exitcode) + { + version(); ++ print_rh_warning(); + printf("usage: %s [options] [disk_image]\n\n" + "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", + error_get_progname()); +@@ -1895,6 +1903,7 @@ static void help(int exitcode) + "\n" + QEMU_HELP_BOTTOM "\n"); + ++ print_rh_warning(); + exit(exitcode); + } + +-- +1.8.3.1 + diff --git a/0007-globally-limit-the-maximum-number-of-CPUs.patch b/0007-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..b8af753 --- /dev/null +++ b/0007-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,89 @@ +From 36dda20ae7312b1db0b4060bb2420ab18e5f5483 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Tue, 21 Jan 2014 10:46:52 +0100 +Subject: globally limit the maximum number of CPUs + +We now globally limit the number of VCPUs. +Especially, there is no way one can specify more than +max_cpus VCPUs for a VM. + +This allows us the restore the ppc max_cpus limitation to the upstream +default and minimize the ppc hack in kvm-all.c. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo Cesar Lemes de Paula +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + vl.c | 18 ++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index eb7db92..c2e7095 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1586,6 +1586,18 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + ++#ifdef HOST_PPC64 ++ /* ++ * On POWER, the kernel advertises a soft limit based on the ++ * number of CPU threads on the host. We want to allow exceeding ++ * this for testing purposes, so we don't want to set hard limit ++ * to soft limit as on x86. ++ */ ++#else ++ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ ++ hard_vcpus_limit = soft_vcpus_limit; ++#endif ++ + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +diff --git a/vl.c b/vl.c +index 43c4b78..b50dbe4 100644 +--- a/vl.c ++++ b/vl.c +@@ -133,6 +133,8 @@ int main(int argc, char **argv) + + #define MAX_VIRTIO_CONSOLES 1 + ++#define RHEL_MAX_CPUS 384 ++ + static const char *data_dir[16]; + static int data_dir_idx; + const char *bios_name = NULL; +@@ -1430,6 +1432,20 @@ MachineClass *find_default_machine(void) + return mc; + } + ++/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ ++static void limit_max_cpus_in_machines(void) ++{ ++ GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); ++ ++ for (el = machines; el; el = el->next) { ++ MachineClass *mc = el->data; ++ ++ if (mc->max_cpus > RHEL_MAX_CPUS) { ++ mc->max_cpus = RHEL_MAX_CPUS; ++ } ++ } ++} ++ + MachineInfoList *qmp_query_machines(Error **errp) + { + GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); +@@ -3993,6 +4009,8 @@ int main(int argc, char **argv, char **envp) + "mutually exclusive"); + exit(EXIT_FAILURE); + } ++ /* Maximum number of CPUs limited for Red Hat Enterprise Linux */ ++ limit_max_cpus_in_machines(); + + machine_class = select_machine(); + +-- +1.8.3.1 + diff --git a/0008-Add-support-for-simpletrace.patch b/0008-Add-support-for-simpletrace.patch new file mode 100644 index 0000000..2c660b8 --- /dev/null +++ b/0008-Add-support-for-simpletrace.patch @@ -0,0 +1,104 @@ +From 84763026a2e71d7b9f7fc9249ba25771724c272d Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 8 Oct 2015 09:50:17 +0200 +Subject: Add support for simpletrace + +As simpletrace is upstream, we just need to properly handle it during rpmbuild. + +Signed-off-by: Miroslav Rezanina +--- + .gitignore | 2 ++ + Makefile | 4 +++ + README.systemtap | 43 +++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 29 ++++++++++++++++++++-- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + 6 files changed, 81 insertions(+), 2 deletions(-) + create mode 100644 README.systemtap + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp + +diff --git a/Makefile b/Makefile +index eb4c57a..6b6d3f6 100644 +--- a/Makefile ++++ b/Makefile +@@ -880,6 +880,10 @@ endif + $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ + done + $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" ++ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/script.d" ++ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/script.d/qemu_kvm.stp "$(DESTDIR)$(qemu_datadir)/systemtap/script.d/" ++ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d" ++ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/conf.d/qemu_kvm.conf "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d/" + for d in $(TARGET_DIRS); do \ + $(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \ + done +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000..ad913fc +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000..372d816 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000..c04abf9 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +-- +1.8.3.1 + diff --git a/0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..93e2343 --- /dev/null +++ b/0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,1040 @@ +From 7f5450ae0077f13427a54bd2868c1986284839d2 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 14 Nov 2014 08:51:50 +0100 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina +--- + docs/can.txt | 8 +-- + docs/pr-manager.rst | 4 +- + docs/qemu-block-drivers.texi | 70 ++++++++++----------- + docs/replay.txt | 4 +- + docs/specs/tpm.txt | 8 +-- + qemu-doc.texi | 70 ++++++++++----------- + qemu-options.hx | 144 ++++++++++++++++++++++--------------------- + 7 files changed, 156 insertions(+), 152 deletions(-) + +diff --git a/docs/can.txt b/docs/can.txt +index 7ba23b2..4ae5690 100644 +--- a/docs/can.txt ++++ b/docs/can.txt +@@ -50,9 +50,9 @@ CAN boards can be selected + The ''kvaser_pci'' board/device model is compatible with and has been tested with + ''kvaser_pci'' driver included in mainline Linux kernel. + The tested setup was Linux 4.9 kernel on the host and guest side. +-Example for qemu-system-x86_64: ++Example for qemu-kvm (intel architecture): + +- qemu-system-x86_64 -accel kvm -kernel /boot/vmlinuz-4.9.0-4-amd64 \ ++ qemu-kvm -accel kvm -kernel /boot/vmlinuz-4.9.0-4-amd64 \ + -initrd ramdisk.cpio \ + -virtfs local,path=shareddir,security_model=none,mount_tag=shareddir \ + -object can-bus,id=canbus0 \ +@@ -60,9 +60,9 @@ Example for qemu-system-x86_64: + -device kvaser_pci,canbus=canbus0 \ + -nographic -append "console=ttyS0" + +-Example for qemu-system-arm: ++Example for qemu-kvm (arm architecture): + +- qemu-system-arm -cpu arm1176 -m 256 -M versatilepb \ ++ qemu-kvm -cpu arm1176 -m 256 -M versatilepb \ + -kernel kernel-qemu-arm1176-versatilepb \ + -hda rpi-wheezy-overlay \ + -append "console=ttyAMA0 root=/dev/sda2 ro init=/sbin/init-overlay" \ +diff --git a/docs/pr-manager.rst b/docs/pr-manager.rst +index 9b1de19..45cb8be 100644 +--- a/docs/pr-manager.rst ++++ b/docs/pr-manager.rst +@@ -36,7 +36,7 @@ accepts the path to the helper program's Unix socket. For example, + the following command line defines a ``pr-manager-helper`` object and + attaches it to a SCSI passthrough device:: + +- $ qemu-system-x86_64 ++ $ qemu-kvm + -device virtio-scsi \ + -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock + -drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0 +@@ -44,7 +44,7 @@ attaches it to a SCSI passthrough device:: + + Alternatively, using ``-blockdev``:: + +- $ qemu-system-x86_64 ++ $ qemu-kvm + -device virtio-scsi \ + -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock + -blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0 +diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi +index 38e9f34..2e71ec9 100644 +--- a/docs/qemu-block-drivers.texi ++++ b/docs/qemu-block-drivers.texi +@@ -405,7 +405,7 @@ QEMU can automatically create a virtual FAT disk image from a + directory tree. In order to use it, just type: + + @example +-qemu-system-i386 linux.img -hdb fat:/my_directory ++qemu-kvm linux.img -hdb fat:/my_directory + @end example + + Then you access access to all the files in the @file{/my_directory} +@@ -415,14 +415,14 @@ them via SAMBA or NFS. The default access is @emph{read-only}. + Floppies can be emulated with the @code{:floppy:} option: + + @example +-qemu-system-i386 linux.img -fda fat:floppy:/my_directory ++qemu-kvm linux.img -fda fat:floppy:/my_directory + @end example + + A read/write support is available for testing (beta stage) with the + @code{:rw:} option: + + @example +-qemu-system-i386 linux.img -fda fat:floppy:rw:/my_directory ++qemu-kvm linux.img -fda fat:floppy:rw:/my_directory + @end example + + What you should @emph{never} do: +@@ -440,14 +440,14 @@ QEMU can access directly to block device exported using the Network Block Device + protocol. + + @example +-qemu-system-i386 linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ ++qemu-kvm linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ + @end example + + If the NBD server is located on the same host, you can use an unix socket instead + of an inet socket: + + @example +-qemu-system-i386 linux.img -hdb nbd+unix://?socket=/tmp/my_socket ++qemu-kvm linux.img -hdb nbd+unix://?socket=/tmp/my_socket + @end example + + In this case, the block device must be exported using qemu-nbd: +@@ -464,23 +464,23 @@ qemu-nbd --socket=/tmp/my_socket --share=2 my_disk.qcow2 + @noindent + and then you can use it with two guests: + @example +-qemu-system-i386 linux1.img -hdb nbd+unix://?socket=/tmp/my_socket +-qemu-system-i386 linux2.img -hdb nbd+unix://?socket=/tmp/my_socket ++qemu-kvm linux1.img -hdb nbd+unix://?socket=/tmp/my_socket ++qemu-kvm linux2.img -hdb nbd+unix://?socket=/tmp/my_socket + @end example + + If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's + own embedded NBD server), you must specify an export name in the URI: + @example +-qemu-system-i386 -cdrom nbd://localhost/debian-500-ppc-netinst +-qemu-system-i386 -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst ++qemu-kvm -cdrom nbd://localhost/debian-500-ppc-netinst ++qemu-kvm -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst + @end example + + The URI syntax for NBD is supported since QEMU 1.3. An alternative syntax is + also available. Here are some example of the older syntax: + @example +-qemu-system-i386 linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 +-qemu-system-i386 linux2.img -hdb nbd:unix:/tmp/my_socket +-qemu-system-i386 -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst ++qemu-kvm linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 ++qemu-kvm linux2.img -hdb nbd:unix:/tmp/my_socket ++qemu-kvm -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst + @end example + + @node disk_images_sheepdog +@@ -505,7 +505,7 @@ qemu-img convert @var{filename} sheepdog:///@var{image} + + You can boot from the Sheepdog disk image with the command: + @example +-qemu-system-i386 sheepdog:///@var{image} ++qemu-kvm sheepdog:///@var{image} + @end example + + You can also create a snapshot of the Sheepdog image like qcow2. +@@ -517,7 +517,7 @@ where @var{tag} is a tag name of the newly created snapshot. + To boot from the Sheepdog snapshot, specify the tag name of the + snapshot. + @example +-qemu-system-i386 sheepdog:///@var{image}#@var{tag} ++qemu-kvm sheepdog:///@var{image}#@var{tag} + @end example + + You can create a cloned image from the existing snapshot. +@@ -530,14 +530,14 @@ is its tag name. + You can use an unix socket instead of an inet socket: + + @example +-qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path} ++qemu-kvm sheepdog+unix:///@var{image}?socket=@var{path} + @end example + + If the Sheepdog daemon doesn't run on the local host, you need to + specify one of the Sheepdog servers to connect to. + @example + qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size} +-qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image} ++qemu-kvm sheepdog://@var{hostname}:@var{port}/@var{image} + @end example + + @node disk_images_iscsi +@@ -627,7 +627,7 @@ cat >iscsi.conf < /sys/bus/pci/devices/0000:06:0d.0/driver/unbind + # echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id + +-# qemu-system-x86_64 -drive file=nvme://@var{host}:@var{bus}:@var{slot}.@var{func}/@var{namespace} ++# qemu-kvm -drive file=nvme://@var{host}:@var{bus}:@var{slot}.@var{func}/@var{namespace} + @end example + + Alternative syntax using properties: + + @example +-qemu-system-x86_64 -drive file.driver=nvme,file.device=@var{host}:@var{bus}:@var{slot}.@var{func},file.namespace=@var{namespace} ++qemu-kvm -drive file.driver=nvme,file.device=@var{host}:@var{bus}:@var{slot}.@var{func},file.namespace=@var{namespace} + @end example + + @var{host}:@var{bus}:@var{slot}.@var{func} is the NVMe controller's PCI device +diff --git a/docs/replay.txt b/docs/replay.txt +index 2e21e9c..f1923e8 100644 +--- a/docs/replay.txt ++++ b/docs/replay.txt +@@ -25,7 +25,7 @@ Deterministic replay has the following features: + + Usage of the record/replay: + * First, record the execution with the following command line: +- qemu-system-i386 \ ++ qemu-kvm \ + -icount shift=7,rr=record,rrfile=replay.bin \ + -drive file=disk.qcow2,if=none,id=img-direct \ + -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \ +@@ -33,7 +33,7 @@ Usage of the record/replay: + -netdev user,id=net1 -device rtl8139,netdev=net1 \ + -object filter-replay,id=replay,netdev=net1 + * After recording, you can replay it by using another command line: +- qemu-system-i386 \ ++ qemu-kvm \ + -icount shift=7,rr=replay,rrfile=replay.bin \ + -drive file=disk.qcow2,if=none,id=img-direct \ + -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \ +diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt +index 0e9bbeb..9320fbd 100644 +--- a/docs/specs/tpm.txt ++++ b/docs/specs/tpm.txt +@@ -98,7 +98,7 @@ QEMU files related to the TPM passthrough device: + Command line to start QEMU with the TPM passthrough device using the host's + hardware TPM /dev/tpm0: + +-qemu-system-x86_64 -display sdl -accel kvm \ ++qemu-kvm -display vnc -accel kvm \ + -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ + -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \ + -device tpm-tis,tpmdev=tpm0 test.img +@@ -164,7 +164,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ + Command line to start QEMU with the TPM emulator device communicating with + the swtpm: + +-qemu-system-x86_64 -display sdl -accel kvm \ ++qemu-kvm -display vnc -accel kvm \ + -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ + -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ +@@ -222,7 +222,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ + + In a 2nd terminal start the VM: + +-qemu-system-x86_64 -display sdl -accel kvm \ ++qemu-kvm -display sdl -accel kvm \ + -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ + -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ +@@ -255,7 +255,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ + In the 2nd terminal restore the state of the VM using the additional + '-incoming' option. + +-qemu-system-x86_64 -display sdl -accel kvm \ ++qemu-kvm -display sdl -accel kvm \ + -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ + -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ +diff --git a/qemu-doc.texi b/qemu-doc.texi +index abfd2db..5827bed 100644 +--- a/qemu-doc.texi ++++ b/qemu-doc.texi +@@ -204,12 +204,12 @@ Note that, by default, GUS shares IRQ(7) with parallel ports and so + QEMU must be told to not have parallel ports to have working GUS. + + @example +-qemu-system-i386 dos.img -soundhw gus -parallel none ++qemu-kvm dos.img -soundhw gus -parallel none + @end example + + Alternatively: + @example +-qemu-system-i386 dos.img -device gus,irq=5 ++qemu-kvm dos.img -device gus,irq=5 + @end example + + Or some other unclaimed IRQ. +@@ -225,7 +225,7 @@ CS4231A is the chip used in Windows Sound System and GUSMAX products + Download and uncompress the linux image (@file{linux.img}) and type: + + @example +-qemu-system-i386 linux.img ++qemu-kvm linux.img + @end example + + Linux should boot and give you a prompt. +@@ -235,7 +235,7 @@ Linux should boot and give you a prompt. + + @example + @c man begin SYNOPSIS +-@command{qemu-system-i386} [@var{options}] [@var{disk_image}] ++@command{qemu-kvm} [@var{options}] [@var{disk_image}] + @c man end + @end example + +@@ -275,21 +275,21 @@ is specified in seconds. The default is 0 which means no timeout. Libiscsi + + Example (without authentication): + @example +-qemu-system-i386 -iscsi initiator-name=iqn.2001-04.com.example:my-initiator \ ++qemu-kvm -iscsi initiator-name=iqn.2001-04.com.example:my-initiator \ + -cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \ + -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 + @end example + + Example (CHAP username/password via URL): + @example +-qemu-system-i386 -drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1 ++qemu-kvm -drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1 + @end example + + Example (CHAP username/password via environment variables): + @example + LIBISCSI_CHAP_USERNAME="user" \ + LIBISCSI_CHAP_PASSWORD="password" \ +-qemu-system-i386 -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 ++qemu-kvm -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 + @end example + + @item NBD +@@ -304,12 +304,12 @@ Syntax for specifying a NBD device using Unix Domain Sockets + + Example for TCP + @example +-qemu-system-i386 --drive file=nbd:192.0.2.1:30000 ++qemu-kvm --drive file=nbd:192.0.2.1:30000 + @end example + + Example for Unix Domain Sockets + @example +-qemu-system-i386 --drive file=nbd:unix:/tmp/nbd-socket ++qemu-kvm --drive file=nbd:unix:/tmp/nbd-socket + @end example + + @item SSH +@@ -317,8 +317,8 @@ QEMU supports SSH (Secure Shell) access to remote disks. + + Examples: + @example +-qemu-system-i386 -drive file=ssh://user@@host/path/to/disk.img +-qemu-system-i386 -drive file.driver=ssh,file.user=user,file.host=host,file.port=22,file.path=/path/to/disk.img ++qemu-kvm -drive file=ssh://user@@host/path/to/disk.img ++qemu-kvm -drive file.driver=ssh,file.user=user,file.host=host,file.port=22,file.path=/path/to/disk.img + @end example + + Currently authentication must be done using ssh-agent. Other +@@ -336,7 +336,7 @@ sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] + + Example + @example +-qemu-system-i386 --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine ++qemu-kvm --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine + @end example + + See also @url{https://sheepdog.github.io/sheepdog/}. +@@ -362,17 +362,17 @@ JSON: + Example + @example + URI: +-qemu-system-x86_64 --drive file=gluster://192.0.2.1/testvol/a.img, ++qemu-kvm --drive file=gluster://192.0.2.1/testvol/a.img, + @ file.debug=9,file.logfile=/var/log/qemu-gluster.log + + JSON: +-qemu-system-x86_64 'json:@{"driver":"qcow2", ++qemu-kvm 'json:@{"driver":"qcow2", + @ "file":@{"driver":"gluster", + @ "volume":"testvol","path":"a.img", + @ "debug":9,"logfile":"/var/log/qemu-gluster.log", + @ "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, + @ @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' +-qemu-system-x86_64 -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, ++qemu-kvm -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, + @ file.debug=9,file.logfile=/var/log/qemu-gluster.log, + @ file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, + @ file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket +@@ -437,9 +437,9 @@ of . + + Example: boot from a remote Fedora 20 live ISO image + @example +-qemu-system-x86_64 --drive media=cdrom,file=http://dl.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly ++qemu-kvm --drive media=cdrom,file=http://dl.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly + +-qemu-system-x86_64 --drive media=cdrom,file.driver=http,file.url=http://dl.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly ++qemu-kvm --drive media=cdrom,file.driver=http,file.url=http://dl.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly + @end example + + Example: boot from a remote Fedora 20 cloud image using a local overlay for +@@ -447,7 +447,7 @@ writes, copy-on-read, and a readahead of 64k + @example + qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"http",, "file.url":"https://dl.fedoraproject.org/pub/fedora/linux/releases/20/Images/x86_64/Fedora-x86_64-20-20131211.1-sda.qcow2",, "file.readahead":"64k"@}' /tmp/Fedora-x86_64-20-20131211.1-sda.qcow2 + +-qemu-system-x86_64 -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-on-read=on ++qemu-kvm -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-on-read=on + @end example + + Example: boot from an image stored on a VMware vSphere server with a self-signed +@@ -456,7 +456,7 @@ of 10 seconds. + @example + qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2 + +-qemu-system-x86_64 -drive file=/tmp/test.qcow2 ++qemu-kvm -drive file=/tmp/test.qcow2 + @end example + + @end table +@@ -818,7 +818,7 @@ On Linux hosts, a shared memory device is available. The basic syntax + is: + + @example +-qemu-system-x86_64 -device ivshmem-plain,memdev=@var{hostmem} ++qemu-kvm -device ivshmem-plain,memdev=@var{hostmem} + @end example + + where @var{hostmem} names a host memory backend. For a POSIX shared +@@ -839,7 +839,7 @@ memory server is: + ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} + + # Then start your qemu instances with matching arguments +-qemu-system-x86_64 -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id} ++qemu-kvm -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id} + -chardev socket,path=@var{path},id=@var{id} + @end example + +@@ -864,7 +864,7 @@ Instead of specifying the using POSIX shm, you may specify + a memory backend that has hugepage support: + + @example +-qemu-system-x86_64 -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1 ++qemu-kvm -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1 + -device ivshmem-plain,memdev=mb1 + @end example + +@@ -880,7 +880,7 @@ kernel testing. + + The syntax is: + @example +-qemu-system-i386 -kernel arch/i386/boot/bzImage -hda root-2.4.20.img -append "root=/dev/hda" ++qemu-kvm -kernel arch/i386/boot/bzImage -hda root-2.4.20.img -append "root=/dev/hda" + @end example + + Use @option{-kernel} to provide the Linux kernel image and +@@ -895,7 +895,7 @@ If you do not need graphical output, you can disable it and redirect + the virtual serial port and the QEMU monitor to the console with the + @option{-nographic} option. The typical command line is: + @example +-qemu-system-i386 -kernel arch/i386/boot/bzImage -hda root-2.4.20.img \ ++qemu-kvm -kernel arch/i386/boot/bzImage -hda root-2.4.20.img \ + -append "root=/dev/hda console=ttyS0" -nographic + @end example + +@@ -961,7 +961,7 @@ Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} + specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}. + For instance, user-mode networking can be used with + @example +-qemu-system-i386 [...] -netdev user,id=net0 -device usb-net,netdev=net0 ++qemu-kvm [...] -netdev user,id=net0 -device usb-net,netdev=net0 + @end example + @item usb-ccid + Smartcard reader device +@@ -980,7 +980,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. + This USB device implements the USB Transport Layer of HCI. Example + usage: + @example +-@command{qemu-system-i386} [...@var{OPTIONS}...] @option{-usbdevice} bt:hci,vlan=3 @option{-bt} device:keyboard,vlan=3 ++@command{qemu-kvm} [...@var{OPTIONS}...] @option{-usbdevice} bt:hci,vlan=3 @option{-bt} device:keyboard,vlan=3 + @end example + @end table + +@@ -1057,7 +1057,7 @@ For this setup it is recommended to restrict it to listen on a UNIX domain + socket only. For example + + @example +-qemu-system-i386 [...OPTIONS...] -vnc unix:/home/joebloggs/.qemu-myvm-vnc ++qemu-kvm [...OPTIONS...] -vnc unix:/home/joebloggs/.qemu-myvm-vnc + @end example + + This ensures that only users on local box with read/write access to that +@@ -1080,7 +1080,7 @@ is running the password is set with the monitor. Until the monitor is used to + set the password all clients will be rejected. + + @example +-qemu-system-i386 [...OPTIONS...] -vnc :1,password -monitor stdio ++qemu-kvm [...OPTIONS...] -vnc :1,password -monitor stdio + (qemu) change vnc password + Password: ******** + (qemu) +@@ -1097,7 +1097,7 @@ support provides a secure session, but no authentication. This allows any + client to connect, and provides an encrypted session. + + @example +-qemu-system-i386 [...OPTIONS...] -vnc :1,tls,x509=/etc/pki/qemu -monitor stdio ++qemu-kvm [...OPTIONS...] -vnc :1,tls,x509=/etc/pki/qemu -monitor stdio + @end example + + In the above example @code{/etc/pki/qemu} should contain at least three files, +@@ -1115,7 +1115,7 @@ then validate against the CA certificate. This is a good choice if deploying + in an environment with a private internal certificate authority. + + @example +-qemu-system-i386 [...OPTIONS...] -vnc :1,tls,x509verify=/etc/pki/qemu -monitor stdio ++qemu-kvm [...OPTIONS...] -vnc :1,tls,x509verify=/etc/pki/qemu -monitor stdio + @end example + + +@@ -1126,7 +1126,7 @@ Finally, the previous method can be combined with VNC password authentication + to provide two layers of authentication for clients. + + @example +-qemu-system-i386 [...OPTIONS...] -vnc :1,password,tls,x509verify=/etc/pki/qemu -monitor stdio ++qemu-kvm [...OPTIONS...] -vnc :1,password,tls,x509verify=/etc/pki/qemu -monitor stdio + (qemu) change vnc password + Password: ******** + (qemu) +@@ -1149,7 +1149,7 @@ used for authentication, but assuming use of one supporting SSF, + then QEMU can be launched with: + + @example +-qemu-system-i386 [...OPTIONS...] -vnc :1,sasl -monitor stdio ++qemu-kvm [...OPTIONS...] -vnc :1,sasl -monitor stdio + @end example + + @node vnc_sec_certificate_sasl +@@ -1163,7 +1163,7 @@ credentials. This can be enabled, by combining the 'sasl' option + with the aforementioned TLS + x509 options: + + @example +-qemu-system-i386 [...OPTIONS...] -vnc :1,tls,x509,sasl -monitor stdio ++qemu-kvm [...OPTIONS...] -vnc :1,tls,x509,sasl -monitor stdio + @end example + + @node vnc_setup_sasl +@@ -1556,7 +1556,7 @@ QEMU has a primitive support to work with gdb, so that you can do + In order to use gdb, launch QEMU with the '-s' option. It will wait for a + gdb connection: + @example +-qemu-system-i386 -s -kernel arch/i386/boot/bzImage -hda root-2.4.20.img \ ++qemu-kvm -s -kernel arch/i386/boot/bzImage -hda root-2.4.20.img \ + -append "root=/dev/hda" + Connected to host network interface: tun0 + Waiting gdb connection on port 1234 +@@ -1802,7 +1802,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. + Set OpenBIOS variables in NVRAM, for example: + + @example +-qemu-system-ppc -prom-env 'auto-boot?=false' \ ++qemu-kvm -prom-env 'auto-boot?=false' \ + -prom-env 'boot-device=hd:2,\yaboot' \ + -prom-env 'boot-args=conf=hd:2,\yaboot.conf' + @end example +diff --git a/qemu-options.hx b/qemu-options.hx +index 37f2aa8..41cb1f3 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -263,7 +263,7 @@ This option defines a free-form string that can be used to describe @var{fd}. + + You can open an image using pre-opened file descriptors from an fd set: + @example +-qemu-system-i386 ++qemu-kvm + -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" + -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" + -drive file=/dev/fdset/2,index=0,media=disk +@@ -292,7 +292,7 @@ STEXI + Set default value of @var{driver}'s property @var{prop} to @var{value}, e.g.: + + @example +-qemu-system-i386 -global ide-hd.physical_block_size=4096 disk-image.img ++qemu-kvm -global ide-hd.physical_block_size=4096 disk-image.img + @end example + + In particular, you can use this to set driver properties for devices which are +@@ -346,11 +346,11 @@ bootindex options. The default is non-strict boot. + + @example + # try to boot from network first, then from hard disk +-qemu-system-i386 -boot order=nc ++qemu-kvm -boot order=nc + # boot from CD-ROM first, switch back to default order after reboot +-qemu-system-i386 -boot once=d ++qemu-kvm -boot once=d + # boot with a splash picture for 5 seconds. +-qemu-system-i386 -boot menu=on,splash=/root/boot.bmp,splash-time=5000 ++qemu-kvm -boot menu=on,splash=/root/boot.bmp,splash-time=5000 + @end example + + Note: The legacy format '-boot @var{drives}' is still supported but its +@@ -379,7 +379,7 @@ For example, the following command-line sets the guest startup RAM size to + memory the guest can reach to 4GB: + + @example +-qemu-system-x86_64 -m 1G,slots=3,maxmem=4G ++qemu-kvm -m 1G,slots=3,maxmem=4G + @end example + + If @var{slots} and @var{maxmem} are not specified, memory hotplug won't +@@ -448,12 +448,12 @@ Enable audio and selected sound hardware. Use 'help' to print all + available sound hardware. + + @example +-qemu-system-i386 -soundhw sb16,adlib disk.img +-qemu-system-i386 -soundhw es1370 disk.img +-qemu-system-i386 -soundhw ac97 disk.img +-qemu-system-i386 -soundhw hda disk.img +-qemu-system-i386 -soundhw all disk.img +-qemu-system-i386 -soundhw help ++qemu-kvm -soundhw sb16,adlib disk.img ++qemu-kvm -soundhw es1370 disk.img ++qemu-kvm -soundhw ac97 disk.img ++qemu-kvm -soundhw hda disk.img ++qemu-kvm -soundhw all disk.img ++qemu-kvm -soundhw help + @end example + + Note that Linux's i810_audio OSS kernel (for AC97) module might +@@ -946,21 +946,21 @@ is off. + + Instead of @option{-cdrom} you can use: + @example +-qemu-system-i386 -drive file=file,index=2,media=cdrom ++qemu-kvm -drive file=file,index=2,media=cdrom + @end example + + Instead of @option{-hda}, @option{-hdb}, @option{-hdc}, @option{-hdd}, you can + use: + @example +-qemu-system-i386 -drive file=file,index=0,media=disk +-qemu-system-i386 -drive file=file,index=1,media=disk +-qemu-system-i386 -drive file=file,index=2,media=disk +-qemu-system-i386 -drive file=file,index=3,media=disk ++qemu-kvm -drive file=file,index=0,media=disk ++qemu-kvm -drive file=file,index=1,media=disk ++qemu-kvm -drive file=file,index=2,media=disk ++qemu-kvm -drive file=file,index=3,media=disk + @end example + + You can open an image using pre-opened file descriptors from an fd set: + @example +-qemu-system-i386 ++qemu-kvm + -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" + -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" + -drive file=/dev/fdset/2,index=0,media=disk +@@ -968,28 +968,28 @@ qemu-system-i386 + + You can connect a CDROM to the slave of ide0: + @example +-qemu-system-i386 -drive file=file,if=ide,index=1,media=cdrom ++qemu-kvm -drive file=file,if=ide,index=1,media=cdrom + @end example + + If you don't specify the "file=" argument, you define an empty drive: + @example +-qemu-system-i386 -drive if=ide,index=1,media=cdrom ++qemu-kvm -drive if=ide,index=1,media=cdrom + @end example + + Instead of @option{-fda}, @option{-fdb}, you can use: + @example +-qemu-system-i386 -drive file=file,index=0,if=floppy +-qemu-system-i386 -drive file=file,index=1,if=floppy ++qemu-kvm -drive file=file,index=0,if=floppy ++qemu-kvm -drive file=file,index=1,if=floppy + @end example + + By default, @var{interface} is "ide" and @var{index} is automatically + incremented: + @example +-qemu-system-i386 -drive file=a -drive file=b" ++qemu-kvm -drive file=a -drive file=b" + @end example + is interpreted like: + @example +-qemu-system-i386 -hda a -hdb b ++qemu-kvm -hda a -hdb b + @end example + ETEXI + +@@ -2055,8 +2055,8 @@ The following two example do exactly the same, to show how @option{-nic} can + be used to shorten the command line length (note that the e1000 is the default + on i386, so the @option{model=e1000} parameter could even be omitted here, too): + @example +-qemu-system-i386 -netdev user,id=n1,ipv6=off -device e1000,netdev=n1,mac=52:54:98:76:54:32 +-qemu-system-i386 -nic user,ipv6=off,model=e1000,mac=52:54:98:76:54:32 ++qemu-kvm -netdev user,id=n1,ipv6=off -device e1000,netdev=n1,mac=52:54:98:76:54:32 ++qemu-kvm -nic user,ipv6=off,model=e1000,mac=52:54:98:76:54:32 + @end example + + @item -nic none +@@ -2127,7 +2127,7 @@ can not be resolved. + + Example: + @example +-qemu-system-i386 -nic user,dnssearch=mgmt.example.org,dnssearch=example.org ++qemu-kvm -nic user,dnssearch=mgmt.example.org,dnssearch=example.org + @end example + + @item domainname=@var{domain} +@@ -2146,7 +2146,7 @@ a guest from a local directory. + + Example (using pxelinux): + @example +-qemu-system-i386 -hda linux.img -boot n -device e1000,netdev=n1 \ ++qemu-kvm -hda linux.img -boot n -device e1000,netdev=n1 \ + -netdev user,id=n1,tftp=/path/to/tftp/files,bootfile=/pxelinux.0 + @end example + +@@ -2180,7 +2180,7 @@ screen 0, use the following: + + @example + # on the host +-qemu-system-i386 -nic user,hostfwd=tcp:127.0.0.1:6001-:6000 ++qemu-kvm -nic user,hostfwd=tcp:127.0.0.1:6001-:6000 + # this host xterm should open in the guest X11 server + xterm -display :1 + @end example +@@ -2190,7 +2190,7 @@ the guest, use the following: + + @example + # on the host +-qemu-system-i386 -nic user,hostfwd=tcp::5555-:23 ++qemu-kvm -nic user,hostfwd=tcp::5555-:23 + telnet localhost 5555 + @end example + +@@ -2209,7 +2209,7 @@ lifetime, like in the following example: + @example + # open 10.10.1.1:4321 on bootup, connect 10.0.2.100:1234 to it whenever + # the guest accesses it +-qemu-system-i386 -nic user,guestfwd=tcp:10.0.2.100:1234-tcp:10.10.1.1:4321 ++qemu-kvm -nic user,guestfwd=tcp:10.0.2.100:1234-tcp:10.10.1.1:4321 + @end example + + Or you can execute a command on every TCP connection established by the guest, +@@ -2218,7 +2218,7 @@ so that QEMU behaves similar to an inetd process for that virtual server: + @example + # call "netcat 10.10.1.1 4321" on every TCP connection to 10.0.2.100:1234 + # and connect the TCP stream to its stdin/stdout +-qemu-system-i386 -nic 'user,id=n1,guestfwd=tcp:10.0.2.100:1234-cmd:netcat 10.10.1.1 4321' ++qemu-kvm -nic 'user,id=n1,guestfwd=tcp:10.0.2.100:1234-cmd:netcat 10.10.1.1 4321' + @end example + + @end table +@@ -2250,21 +2250,22 @@ Examples: + + @example + #launch a QEMU instance with the default network script +-qemu-system-i386 linux.img -nic tap ++qemu-kvm linux.img -nic tap + @end example + + @example + #launch a QEMU instance with two NICs, each one connected + #to a TAP device +-qemu-system-i386 linux.img \ ++qemu-kvm linux.img \ + -netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \ + -netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1 ++ -net nic,vlan=1 -net tap,vlan=1,ifname=tap1 + @end example + + @example + #launch a QEMU instance with the default network helper to + #connect a TAP device to bridge br0 +-qemu-system-i386 linux.img -device virtio-net-pci,netdev=n1 \ ++qemu-kvm linux.img -device virtio-net-pci,netdev=n1 \ + -netdev tap,id=n1,"helper=/path/to/qemu-bridge-helper" + @end example + +@@ -2281,13 +2282,13 @@ Examples: + @example + #launch a QEMU instance with the default network helper to + #connect a TAP device to bridge br0 +-qemu-system-i386 linux.img -netdev bridge,id=n1 -device virtio-net,netdev=n1 ++qemu-kvm linux.img -netdev bridge,id=n1 -device virtio-net,netdev=n1 + @end example + + @example + #launch a QEMU instance with the default network helper to + #connect a TAP device to bridge qemubr0 +-qemu-system-i386 linux.img -netdev bridge,br=qemubr0,id=n1 -device virtio-net,netdev=n1 ++qemu-kvm linux.img -netdev bridge,br=qemubr0,id=n1 -device virtio-net,netdev=n1 + @end example + + @item -netdev socket,id=@var{id}[,fd=@var{h}][,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}] +@@ -2302,13 +2303,13 @@ specifies an already opened TCP socket. + Example: + @example + # launch a first QEMU instance +-qemu-system-i386 linux.img \ +- -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ +- -netdev socket,id=n1,listen=:1234 ++qemu-kvm linux.img \ ++ -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ ++ -netdev socket,id=n1,listen=:1234 + # connect the network of this instance to the network of the first instance +-qemu-system-i386 linux.img \ +- -device e1000,netdev=n2,mac=52:54:00:12:34:57 \ +- -netdev socket,id=n2,connect=127.0.0.1:1234 ++qemu-kvm linux.img \ ++ -device e1000,netdev=n2,mac=52:54:00:12:34:57 \ ++ -netdev socket,id=n2,connect=127.0.0.1:1234 + @end example + + @item -netdev socket,id=@var{id}[,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] +@@ -2331,23 +2332,23 @@ Use @option{fd=h} to specify an already opened UDP multicast socket. + Example: + @example + # launch one QEMU instance +-qemu-system-i386 linux.img \ +- -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ +- -netdev socket,id=n1,mcast=230.0.0.1:1234 ++qemu-kvm linux.img \ ++ -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ ++ -netdev socket,id=n1,mcast=230.0.0.1:1234 + # launch another QEMU instance on same "bus" +-qemu-system-i386 linux.img \ +- -device e1000,netdev=n2,mac=52:54:00:12:34:57 \ +- -netdev socket,id=n2,mcast=230.0.0.1:1234 ++qemu-kvm linux.img \ ++ -device e1000,netdev=n2,mac=52:54:00:12:34:57 \ ++ -netdev socket,id=n2,mcast=230.0.0.1:1234 + # launch yet another QEMU instance on same "bus" +-qemu-system-i386 linux.img \ +- -device e1000,netdev=n3,macaddr=52:54:00:12:34:58 \ +- -netdev socket,id=n3,mcast=230.0.0.1:1234 ++qemu-kvm linux.img \ ++ -device e1000,netdev=n3,macaddr=52:54:00:12:34:58 \ ++ -netdev socket,id=n3,mcast=230.0.0.1:1234 + @end example + + Example (User Mode Linux compat.): + @example + # launch QEMU instance (note mcast address selected is UML's default) +-qemu-system-i386 linux.img \ ++qemu-kvm linux.img \ + -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ + -netdev socket,id=n1,mcast=239.192.168.1:1102 + # launch UML +@@ -2356,9 +2357,12 @@ qemu-system-i386 linux.img \ + + Example (send packets from host's 1.2.3.4): + @example +-qemu-system-i386 linux.img \ +- -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ +- -netdev socket,id=n1,mcast=239.192.168.1:1102,localaddr=1.2.3.4 ++qemu-kvm linux.img \ ++ -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ ++ -netdev socket,id=n1,mcast=239.192.168.1:1102,localaddr=1.2.3.4 ++qemu-kvm linux.img \ ++ -net nic,macaddr=52:54:00:12:34:56 \ ++ -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4 + @end example + + @item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}] +@@ -2416,7 +2420,7 @@ brctl addif br-lan vmtunnel0 + # on 4.3.2.1 + # launch QEMU instance - if your network has reorder or is very lossy add ,pincounter + +-qemu-system-i386 linux.img -device e1000,netdev=n1 \ ++qemu-kvm linux.img -device e1000,netdev=n1 \ + -netdev l2tpv3,id=n1,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter + + @end example +@@ -2433,7 +2437,7 @@ Example: + # launch vde switch + vde_switch -F -sock /tmp/myswitch + # launch QEMU instance +-qemu-system-i386 linux.img -nic vde,sock=/tmp/myswitch ++qemu-kvm linux.img -nic vde,sock=/tmp/myswitch + @end example + + @item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n] +@@ -2447,11 +2451,11 @@ be created for multiqueue vhost-user. + + Example: + @example +-qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + @end example + + @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] +@@ -2877,7 +2881,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can + be used as following: + + @example +-qemu-system-i386 [...OPTIONS...] -bt hci,vlan=5 -bt vhci,vlan=5 ++qemu-kvm [...OPTIONS...] -bt hci,vlan=5 -bt vhci,vlan=5 + @end example + + @item -bt device:@var{dev}[,vlan=@var{n}] +@@ -3322,14 +3326,14 @@ ETEXI + + DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, + "-realtime [mlock=on|off]\n" +- " run qemu with realtime features\n" ++ " run qemu-kvm with realtime features\n" + " mlock=on|off controls mlock support (default: on)\n", + QEMU_ARCH_ALL) + STEXI + @item -realtime mlock=on|off + @findex -realtime +-Run qemu with realtime features. +-mlocking qemu and guest memory can be enabled via @option{mlock=on} ++Run qemu-kvm with realtime features. ++mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on} + (enabled by default). + ETEXI + +@@ -3367,7 +3371,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even + stdio are reasonable use case. The latter is allowing to start QEMU from + within gdb and establish the connection via a pipe: + @example +-(gdb) target remote | exec qemu-system-i386 -gdb stdio ... ++(gdb) target remote | exec qemu-kvm -gdb stdio ... + @end example + ETEXI + +@@ -4299,7 +4303,7 @@ which specify the queue number of cryptodev backend, the default of + + @example + +- # qemu-system-x86_64 \ ++ # qemu-kvm \ + [...] \ + -object cryptodev-backend-builtin,id=cryptodev0 \ + -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \ +@@ -4319,7 +4323,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 + + @example + +- # qemu-system-x86_64 \ ++ # qemu-kvm \ + [...] \ + -chardev socket,id=chardev0,path=/path/to/socket \ + -object cryptodev-vhost-user,id=cryptodev0,chardev=chardev0 \ +-- +1.8.3.1 + diff --git a/0010-usb-xhci-Fix-PCI-capability-order.patch b/0010-usb-xhci-Fix-PCI-capability-order.patch new file mode 100644 index 0000000..a3e6795 --- /dev/null +++ b/0010-usb-xhci-Fix-PCI-capability-order.patch @@ -0,0 +1,82 @@ +From 268966c530da2d8e07e2c9034a82acd01335e2c2 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 5 May 2017 19:06:14 +0200 +Subject: usb-xhci: Fix PCI capability order + +RH-Author: Dr. David Alan Gilbert +Message-id: <20170505190614.15987-2-dgilbert@redhat.com> +Patchwork-id: 75038 +O-Subject: [RHEL-7.4 qemu-kvm-rhev PATCH 1/1] usb-xhci: Fix PCI capability order +Bugzilla: 1447874 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Juan Quintela + +From: "Dr. David Alan Gilbert" + +Upstream commit 1108b2f8a9 in 2.7.0 changed the order +of the PCI capability chain in the XHCI pci device in the case +where the device has the PCIe endpoint capability (i.e. only +older machine types, pc-i440fx-2.0 upstream, pc-i440fx-rhel7.0.0 +apparently for us). + +Changing the order breaks migration compatibility; fixing this +upstream would mean breaking the same case going from 2.7.0->current +that currently works 2.7.0->2.9.0 - so upstream it's a choice +of two breakages. + +Since we never released 2.7.0/2.8.0 we can fix this downstream. + +This reverts the order so that we create the capabilities in the +order: + PCIe + MSI + MSI-X + +The symptom is: +qemu-kvm: get_pci_config_device: Bad config data: i=0x71 read: a0 device: 0 cmask: ff wmask: 0 w1cmask:0 +qemu-kvm: Failed to load PCIDevice:config +qemu-kvm: Failed to load xhci:parent_obj +qemu-kvm: error while loading state for instance 0x0 of device '0000:00:0d.0/xhci' +qemu-kvm: load of migration failed: Invalid argument + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/usb/hcd-xhci.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index ca19474..45fcce3 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3373,6 +3373,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) + xhci->max_pstreams_mask = 0; + } + ++ if (pci_bus_is_express(pci_get_bus(dev)) || ++ xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { ++ ret = pcie_endpoint_cap_init(dev, 0xa0); ++ assert(ret > 0); ++ } ++ + if (xhci->msi != ON_OFF_AUTO_OFF) { + ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); + /* Any error other than -ENOTSUP(board's MSI support is broken) +@@ -3421,12 +3427,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) + PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, + &xhci->mem); + +- if (pci_bus_is_express(pci_get_bus(dev)) || +- xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { +- ret = pcie_endpoint_cap_init(dev, 0xa0); +- assert(ret > 0); +- } +- + if (xhci->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, xhci->numintrs, +-- +1.8.3.1 + diff --git a/0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch new file mode 100644 index 0000000..886de8e --- /dev/null +++ b/0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -0,0 +1,66 @@ +From 126cb3f3717b266f27dc7c657da833779f9f3b54 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Wed, 14 Jun 2017 15:37:01 +0200 +Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] + +RH-Author: Fam Zheng +Message-id: <20170614153701.14757-1-famz@redhat.com> +Patchwork-id: 75613 +O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] +Bugzilla: 1378816 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't +ready. If it were, the changes will be too invasive. To have an idea: + +https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html + +is an incomplete attempt to fix part of the issue, and the remaining +work unfortunately involve even more complex changes. + +As a band-aid, this partially reverts the effect of ef8875b +(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot +simply revert that commit as a whole because we already shipped it in +qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should +only block what has been broken. Also, faithfully reverting the above +commit means adding back the removed op blocker, but that is not enough, +because it still crashes when inserting media into an initially empty +scsi-cd. + +All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable +unless the scsi-cd never enters an empty state, so, disable it +altogether. Otherwise it would be much more difficult to avoid +crashing. + +Signed-off-by: Fam Zheng +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 5a3057d..52a3c1d 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -790,6 +790,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + SCSIDevice *sd = SCSI_DEVICE(dev); + ++ /* XXX: Remove this check once block backend is capable of handling ++ * AioContext change upon eject/insert. ++ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if ++ * data plane is not used, both cases are safe for scsi-cd. */ ++ if (s->ctx && s->ctx != qemu_get_aio_context() && ++ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { ++ error_setg(errp, "scsi-cd is not supported by data plane"); ++ return; ++ } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +1.8.3.1 + diff --git a/0012-linux-headers-asm-s390-kvm.h-header-sync.patch b/0012-linux-headers-asm-s390-kvm.h-header-sync.patch new file mode 100644 index 0000000..c0b4e09 --- /dev/null +++ b/0012-linux-headers-asm-s390-kvm.h-header-sync.patch @@ -0,0 +1,72 @@ +From 811173cac3e80b6235de885b7b2ec4f9be3b4e31 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 9 Aug 2018 10:15:08 +0000 +Subject: linux-headers: asm-s390/kvm.h header sync + +RH-Author: Thomas Huth +Message-id: <1533813309-9643-2-git-send-email-thuth@redhat.com> +Patchwork-id: 81688 +O-Subject: [RHEL-8.0 qemu-kvm PATCH 1/2] linux-headers: asm-s390/kvm.h header sync +Bugzilla: 1612938 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann + +This is a header sync with the linux uapi header. The corresponding +kernel commit id is a3da7b4a3be51f37f434f14e11e60491f098b6ea (in +the kvm/next branch) + +Signed-off-by: Thomas Huth + +Merged patches (3.0.0): +- 57332f1 linux-headers: Update to include KVM_CAP_S390_HPAGE_1M +--- + linux-headers/asm-s390/kvm.h | 5 ++++- + linux-headers/linux/kvm.h | 1 + + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h +index 11def14..1ab9901 100644 +--- a/linux-headers/asm-s390/kvm.h ++++ b/linux-headers/asm-s390/kvm.h +@@ -4,7 +4,7 @@ + /* + * KVM s390 specific structures and definitions + * +- * Copyright IBM Corp. 2008 ++ * Copyright IBM Corp. 2008, 2018 + * + * Author(s): Carsten Otte + * Christian Borntraeger +@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch { + #define KVM_SYNC_FPRS (1UL << 8) + #define KVM_SYNC_GSCB (1UL << 9) + #define KVM_SYNC_BPBC (1UL << 10) ++#define KVM_SYNC_ETOKEN (1UL << 11) + /* length and alignment of the sdnx as a power of two */ + #define SDNXC 8 + #define SDNXL (1UL << SDNXC) +@@ -258,6 +259,8 @@ struct kvm_sync_regs { + struct { + __u64 reserved1[2]; + __u64 gscb[4]; ++ __u64 etoken; ++ __u64 etoken_extension; + }; + }; + }; +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 98f389a..2aae948 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_GET_MSR_FEATURES 153 + #define KVM_CAP_HYPERV_EVENTFD 154 + #define KVM_CAP_HYPERV_TLBFLUSH 155 ++#define KVM_CAP_S390_HPAGE_1M 156 + + #ifdef KVM_CAP_IRQ_ROUTING + +-- +1.8.3.1 + diff --git a/0013-s390x-Enable-KVM-huge-page-backing-support.patch b/0013-s390x-Enable-KVM-huge-page-backing-support.patch new file mode 100644 index 0000000..926fb13 --- /dev/null +++ b/0013-s390x-Enable-KVM-huge-page-backing-support.patch @@ -0,0 +1,114 @@ +From fa8eda01f21298e6bc50abb78775390b4bf3f954 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 6 Aug 2018 14:18:41 +0100 +Subject: s390x: Enable KVM huge page backing support + +RH-Author: David Hildenbrand +Message-id: <20180806141842.23963-3-david@redhat.com> +Patchwork-id: 81645 +O-Subject: [RHEL-8.0 qemu-kvm PATCH v2 2/3] s390x: Enable KVM huge page backing support +Bugzilla: 1610906 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1610906 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=17624600 +Upstream: N/A + +Kernel part is in kvm/next, scheduled for 4.19. Patch has been reviewed +upstream but cannot get picked up yet due to the outstanding linux +header sync. Conflict to upstream patch: We have no units.h, therefore +we have to unfold "4*KiB" and "1*MiB". + +QEMU has had huge page support for a longer time already, but KVM +memory management under s390x needed some changes to work with huge +backings. + +Now that we have support, let's enable it if requested and +available. Otherwise we now properly tell the user if there is no +support and back out instead of failing to run the VM later on. + +Signed-off-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Thomas Huth +Signed-off-by: David Hildenbrand +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/kvm.c | 34 ++++++++++++++++++++++++++++++++-- + 1 file changed, 32 insertions(+), 2 deletions(-) + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index bbcbeed..c36ff36f 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -34,6 +34,7 @@ + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "qemu/timer.h" ++#include "qemu/mmap-alloc.h" + #include "sysemu/sysemu.h" + #include "sysemu/hw_accel.h" + #include "hw/hw.h" +@@ -139,6 +140,7 @@ static int cap_mem_op; + static int cap_s390_irq; + static int cap_ri; + static int cap_gs; ++static int cap_hpage_1m; + + static int active_cmma; + +@@ -220,9 +222,9 @@ static void kvm_s390_enable_cmma(void) + .attr = KVM_S390_VM_MEM_ENABLE_CMMA, + }; + +- if (mem_path) { ++ if (cap_hpage_1m) { + warn_report("CMM will not be enabled because it is not " +- "compatible with hugetlbfs."); ++ "compatible with huge memory backings."); + return; + } + rc = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attr); +@@ -281,10 +283,38 @@ void kvm_s390_crypto_reset(void) + } + } + ++static int kvm_s390_configure_mempath_backing(KVMState *s) ++{ ++ size_t path_psize = qemu_mempath_getpagesize(mem_path); ++ ++ if (path_psize == 4 * 1024) { ++ return 0; ++ } ++ ++ if (path_psize != 1024 * 1024) { ++ error_report("Memory backing with 2G pages was specified, " ++ "but KVM does not support this memory backing"); ++ return -EINVAL; ++ } ++ ++ if (kvm_vm_enable_cap(s, KVM_CAP_S390_HPAGE_1M, 0)) { ++ error_report("Memory backing with 1M pages was specified, " ++ "but KVM does not support this memory backing"); ++ return -EINVAL; ++ } ++ ++ cap_hpage_1m = 1; ++ return 0; ++} ++ + int kvm_arch_init(MachineState *ms, KVMState *s) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); + ++ if (mem_path && kvm_s390_configure_mempath_backing(s)) { ++ return -EINVAL; ++ } ++ + mc->default_cpu_type = S390_CPU_TYPE_NAME("host"); + cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); + cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); +-- +1.8.3.1 + diff --git a/0014-s390x-kvm-add-etoken-facility.patch b/0014-s390x-kvm-add-etoken-facility.patch new file mode 100644 index 0000000..e035dbc --- /dev/null +++ b/0014-s390x-kvm-add-etoken-facility.patch @@ -0,0 +1,190 @@ +From 4b36866031e559bc895e64ecb20417323cb03e3d Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 9 Aug 2018 10:15:09 +0000 +Subject: s390x/kvm: add etoken facility + +RH-Author: Thomas Huth +Message-id: <1533813309-9643-3-git-send-email-thuth@redhat.com> +Patchwork-id: 81687 +O-Subject: [RHEL-8.0 qemu-kvm PATCH 2/2] s390x/kvm: add etoken facility +Bugzilla: 1612938 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann + +Provide the etoken facility. We need to handle cpu model, migration and +clear reset. + +Signed-off-by: Christian Borntraeger +Acked-by: Janosch Frank +Signed-off-by: Thomas Huth +--- + target/s390x/cpu.h | 3 +++ + target/s390x/cpu_features.c | 3 ++- + target/s390x/cpu_features_def.h | 3 ++- + target/s390x/gen-features.c | 3 ++- + target/s390x/kvm.c | 11 +++++++++++ + target/s390x/machine.c | 20 +++++++++++++++++++- + 6 files changed, 39 insertions(+), 4 deletions(-) + +diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h +index 2c3dd2d..21b2f21 100644 +--- a/target/s390x/cpu.h ++++ b/target/s390x/cpu.h +@@ -2,6 +2,7 @@ + * S/390 virtual CPU header + * + * Copyright (c) 2009 Ulrich Hecht ++ * Copyright IBM Corp. 2012, 2018 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public +@@ -68,6 +69,8 @@ struct CPUS390XState { + uint32_t aregs[16]; /* access registers */ + uint8_t riccb[64]; /* runtime instrumentation control */ + uint64_t gscb[4]; /* guarded storage control */ ++ uint64_t etoken; /* etoken */ ++ uint64_t etoken_extension; /* etoken extension */ + + /* Fields up to this point are not cleared by initial CPU reset */ + struct {} start_initial_reset_fields; +diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c +index 3b9e274..e05e6aa 100644 +--- a/target/s390x/cpu_features.c ++++ b/target/s390x/cpu_features.c +@@ -1,7 +1,7 @@ + /* + * CPU features/facilities for s390x + * +- * Copyright 2016 IBM Corp. ++ * Copyright IBM Corp. 2016, 2018 + * + * Author(s): David Hildenbrand + * +@@ -106,6 +106,7 @@ static const S390FeatDef s390_features[] = { + FEAT_INIT("irbm", S390_FEAT_TYPE_STFL, 145, "Insert-reference-bits-multiple facility"), + FEAT_INIT("msa8-base", S390_FEAT_TYPE_STFL, 146, "Message-security-assist-extension-8 facility (excluding subfunctions)"), + FEAT_INIT("cmmnt", S390_FEAT_TYPE_STFL, 147, "CMM: ESSA-enhancement (no translate) facility"), ++ FEAT_INIT("etoken", S390_FEAT_TYPE_STFL, 156, "Etoken facility"), + + /* SCLP SCCB Byte 80 - 98 (bit numbers relative to byte-80) */ + FEAT_INIT("gsls", S390_FEAT_TYPE_SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility"), +diff --git a/target/s390x/cpu_features_def.h b/target/s390x/cpu_features_def.h +index 7c5915c..ac2c947 100644 +--- a/target/s390x/cpu_features_def.h ++++ b/target/s390x/cpu_features_def.h +@@ -1,7 +1,7 @@ + /* + * CPU features/facilities for s390 + * +- * Copyright 2016 IBM Corp. ++ * Copyright IBM Corp. 2016, 2018 + * + * Author(s): Michael Mueller + * David Hildenbrand +@@ -93,6 +93,7 @@ typedef enum { + S390_FEAT_INSERT_REFERENCE_BITS_MULT, + S390_FEAT_MSA_EXT_8, + S390_FEAT_CMM_NT, ++ S390_FEAT_ETOKEN, + + /* Sclp Conf Char */ + S390_FEAT_SIE_GSLS, +diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c +index 6626b6f..5af042c 100644 +--- a/target/s390x/gen-features.c ++++ b/target/s390x/gen-features.c +@@ -1,7 +1,7 @@ + /* + * S390 feature list generator + * +- * Copyright 2016 IBM Corp. ++ * Copyright IBM Corp. 2016, 2018 + * + * Author(s): Michael Mueller + * David Hildenbrand +@@ -471,6 +471,7 @@ static uint16_t full_GEN14_GA1[] = { + S390_FEAT_GROUP_MSA_EXT_7, + S390_FEAT_GROUP_MSA_EXT_8, + S390_FEAT_CMM_NT, ++ S390_FEAT_ETOKEN, + S390_FEAT_HPMA2, + S390_FEAT_SIE_KSS, + S390_FEAT_GROUP_MULTIPLE_EPOCH_PTFF, +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index c36ff36f..71d90f2 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -523,6 +523,12 @@ int kvm_arch_put_registers(CPUState *cs, int level) + cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_BPBC; + } + ++ if (can_sync_regs(cs, KVM_SYNC_ETOKEN)) { ++ cs->kvm_run->s.regs.etoken = env->etoken; ++ cs->kvm_run->s.regs.etoken_extension = env->etoken_extension; ++ cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ETOKEN; ++ } ++ + /* Finally the prefix */ + if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { + cs->kvm_run->s.regs.prefix = env->psa; +@@ -637,6 +643,11 @@ int kvm_arch_get_registers(CPUState *cs) + env->bpbc = cs->kvm_run->s.regs.bpbc; + } + ++ if (can_sync_regs(cs, KVM_SYNC_ETOKEN)) { ++ env->etoken = cs->kvm_run->s.regs.etoken; ++ env->etoken_extension = cs->kvm_run->s.regs.etoken_extension; ++ } ++ + /* pfault parameters */ + if (can_sync_regs(cs, KVM_SYNC_PFAULT)) { + env->pfault_token = cs->kvm_run->s.regs.pft; +diff --git a/target/s390x/machine.c b/target/s390x/machine.c +index bd3230d..cb792aa 100644 +--- a/target/s390x/machine.c ++++ b/target/s390x/machine.c +@@ -1,7 +1,7 @@ + /* + * S390x machine definitions and functions + * +- * Copyright IBM Corp. 2014 ++ * Copyright IBM Corp. 2014, 2018 + * + * Authors: + * Thomas Huth +@@ -216,6 +216,23 @@ const VMStateDescription vmstate_bpbc = { + } + }; + ++static bool etoken_needed(void *opaque) ++{ ++ return s390_has_feat(S390_FEAT_ETOKEN); ++} ++ ++const VMStateDescription vmstate_etoken = { ++ .name = "cpu/etoken", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = etoken_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT64(env.etoken, S390CPU), ++ VMSTATE_UINT64(env.etoken_extension, S390CPU), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + const VMStateDescription vmstate_s390_cpu = { + .name = "cpu", + .post_load = cpu_post_load, +@@ -251,6 +268,7 @@ const VMStateDescription vmstate_s390_cpu = { + &vmstate_exval, + &vmstate_gscb, + &vmstate_bpbc, ++ &vmstate_etoken, + NULL + }, + }; +-- +1.8.3.1 + diff --git a/0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch b/0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch new file mode 100644 index 0000000..988124d --- /dev/null +++ b/0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch @@ -0,0 +1,51 @@ +From 79d0599b21b64f8a8107855e844b347d2cc138d9 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 7 Aug 2018 09:05:54 +0000 +Subject: s390x/cpumodel: default enable bpb and ppa15 for z196 and later + +RH-Author: Cornelia Huck +Message-id: <20180807100554.29643-3-cohuck@redhat.com> +Patchwork-id: 81660 +O-Subject: [qemu-kvm RHEL8/virt212 PATCH 2/2] s390x/cpumodel: default enable bpb and ppa15 for z196 and later +Bugzilla: 1595718 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth +RH-Acked-by: Jens Freimann + +Upstream: downstream version of 8727315111 ("s390x/cpumodel: default + enable bpb and ppa15 for z196 and later"); downstream does + not have the upstream machine types, instead we need to + turn off the bits for the RHEL 7.5 machine + +Most systems and host kernels provide the necessary building blocks for +bpb and ppa15. We can reverse the logic and default enable those +features, while still allowing to disable it via cpu model. + +So let us add bpb and ppa15 to z196 and later default CPU model for the +qemu rhel7.6.0 machine. (like -cpu z13). Older machine types (i.e. +s390-ccw-virtio-rhel7.5.0) will retain the old value and not provide those +bits in the default model. + +Signed-off-by: Cornelia Huck +--- + hw/s390x/s390-virtio-ccw.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 0f135c9..cdf4558 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -931,6 +931,10 @@ static void ccw_machine_rhel750_instance_options(MachineState *machine) + /* before 2.12 we emulated the very first z900, and RHEL 7.5 is + based on 2.10 */ + s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); ++ ++ /* bpb and ppa15 were only in the full model in RHEL 7.5 */ ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); + } + + static void ccw_machine_rhel750_class_options(MachineClass *mc) +-- +1.8.3.1 + diff --git a/0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch b/0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch new file mode 100644 index 0000000..5bed305 --- /dev/null +++ b/0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch @@ -0,0 +1,87 @@ +From 786fb991b644eddb9f52fd04d377cc7a62685d59 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Fri, 31 Aug 2018 13:59:22 +0100 +Subject: i386: Fix arch_query_cpu_model_expansion() leak + +RH-Author: Markus Armbruster +Message-id: <20180831135922.6073-3-armbru@redhat.com> +Patchwork-id: 81980 +O-Subject: [qemu-kvm RHEL8/virt212 PATCH 2/2] i386: Fix arch_query_cpu_model_expansion() leak +Bugzilla: 1615717 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Miroslav Rezanina + +From: Eduardo Habkost + +Reported by Coverity: + +Error: RESOURCE_LEAK (CWE-772): [#def439] +qemu-2.12.0/target/i386/cpu.c:3179: alloc_fn: Storage is returned from allocation function "qdict_new". +qemu-2.12.0/qobject/qdict.c:34:5: alloc_fn: Storage is returned from allocation function "g_malloc0". +qemu-2.12.0/qobject/qdict.c:34:5: var_assign: Assigning: "qdict" = "g_malloc0(4120UL)". +qemu-2.12.0/qobject/qdict.c:37:5: return_alloc: Returning allocated memory "qdict". +qemu-2.12.0/target/i386/cpu.c:3179: var_assign: Assigning: "props" = storage returned from "qdict_new()". +qemu-2.12.0/target/i386/cpu.c:3217: leaked_storage: Variable "props" going out of scope leaks the storage it points to. + +This was introduced by commit b8097deb359b ("i386: Improve +query-cpu-model-expansion full mode"). + +The leak is only theoretical: if ret->model->props is set to +props, the qapi_free_CpuModelExpansionInfo() call will free props +too in case of errors. The only way for this to not happen is if +we enter the default branch of the switch statement, which would +never happen because all CpuModelExpansionType values are being +handled. + +It's still worth to change this to make the allocation logic +easier to follow and make the Coverity error go away. To make +everything simpler, initialize ret->model and ret->model->props +earlier in the function. + +While at it, remove redundant check for !prop because prop is +always initialized at the beginning of the function. + +Fixes: b8097deb359bbbd92592b9670adfe9e245b2d0bd +Signed-off-by: Eduardo Habkost +Message-Id: <20180816183509.8231-1-ehabkost@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit e38bf612477fca62b205ebd909b1372a7e45a8c0) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 051018a..71e2808 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3784,6 +3784,9 @@ arch_query_cpu_model_expansion(CpuModelExpansionType type, + } + + props = qdict_new(); ++ ret->model = g_new0(CpuModelInfo, 1); ++ ret->model->props = QOBJECT(props); ++ ret->model->has_props = true; + + switch (type) { + case CPU_MODEL_EXPANSION_TYPE_STATIC: +@@ -3804,15 +3807,9 @@ arch_query_cpu_model_expansion(CpuModelExpansionType type, + goto out; + } + +- if (!props) { +- props = qdict_new(); +- } + x86_cpu_to_dict(xc, props); + +- ret->model = g_new0(CpuModelInfo, 1); + ret->model->name = g_strdup(base_name); +- ret->model->props = QOBJECT(props); +- ret->model->has_props = true; + + out: + object_unref(OBJECT(xc)); +-- +1.8.3.1 + diff --git a/0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch b/0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch new file mode 100644 index 0000000..a731164 --- /dev/null +++ b/0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch @@ -0,0 +1,54 @@ +From 25abf99ebc7004999e79fa5e5b1370e4dfdaeed2 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 21 Aug 2018 19:15:41 +0100 +Subject: i386: Disable TOPOEXT by default on "-cpu host" + +RH-Author: Eduardo Habkost +Message-id: <20180821191541.31916-2-ehabkost@redhat.com> +Patchwork-id: 81904 +O-Subject: [qemu-kvm RHEL8/virt212 PATCH v2 1/1] i386: Disable TOPOEXT by default on "-cpu host" +Bugzilla: 1619804 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Igor Mammedov + +Enabling TOPOEXT is always allowed, but it can't be enabled +blindly by "-cpu host" because it may make guests crash if the +rest of the cache topology information isn't provided or isn't +consistent. + +This addresses the bug reported at: +https://bugzilla.redhat.com/show_bug.cgi?id=1613277 + +Signed-off-by: Eduardo Habkost +Message-Id: <20180809221852.15285-1-ehabkost@redhat.com> +Tested-by: Richard W.M. Jones +Reviewed-by: Babu Moger +Signed-off-by: Eduardo Habkost +(cherry picked from commit 7210a02c58572b2686a3a8d610c6628f87864aed) +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 71e2808..198d578 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -849,6 +849,12 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + }, + .cpuid_eax = 0x80000001, .cpuid_reg = R_ECX, + .tcg_features = TCG_EXT3_FEATURES, ++ /* ++ * TOPOEXT is always allowed but can't be enabled blindly by ++ * "-cpu host", as it requires consistent cache topology info ++ * to be provided so it doesn't confuse guests. ++ */ ++ .no_autoenable_flags = CPUID_EXT3_TOPOEXT, + }, + [FEAT_C000_0001_EDX] = { + .feat_names = { +-- +1.8.3.1 + diff --git a/0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch b/0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch new file mode 100644 index 0000000..fc5784f --- /dev/null +++ b/0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch @@ -0,0 +1,77 @@ +From 49d4861ffc56cb233dacc1abcb2a5ec608e599ab Mon Sep 17 00:00:00 2001 +From: Jeffrey Cody +Date: Wed, 26 Sep 2018 04:08:14 +0100 +Subject: curl: Make sslverify=off disable host as well as peer verification. + +RH-Author: Jeffrey Cody +Message-id: <543d2f667af465dd809329fcba5175bc974d58d4.1537933576.git.jcody@redhat.com> +Patchwork-id: 82293 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/1] curl: Make sslverify=off disable host as well as peer verification. +Bugzilla: 1575925 +RH-Acked-by: Richard Jones +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: "Richard W.M. Jones" + +The sslverify setting is supposed to turn off all TLS certificate +checks in libcurl. However because of the way we use it, it only +turns off peer certificate authenticity checks +(CURLOPT_SSL_VERIFYPEER). This patch makes it also turn off the check +that the server name in the certificate is the same as the server +you're connecting to (CURLOPT_SSL_VERIFYHOST). + +We can use Google's server at 8.8.8.8 which happens to have a bad TLS +certificate to demonstrate this: + +$ ./qemu-img create -q -f qcow2 -b 'json: { "file.sslverify": "off", "file.driver": "https", "file.url": "https://8.8.8.8/foo" }' /var/tmp/file.qcow2 +qemu-img: /var/tmp/file.qcow2: CURL: Error opening file: SSL: no alternative certificate subject name matches target host name '8.8.8.8' +Could not open backing image to determine size. + +With this patch applied, qemu-img connects to the server regardless of +the bad certificate: + +$ ./qemu-img create -q -f qcow2 -b 'json: { "file.sslverify": "off", "file.driver": "https", "file.url": "https://8.8.8.8/foo" }' /var/tmp/file.qcow2 +qemu-img: /var/tmp/file.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found + +(The 404 error is expected because 8.8.8.8 is not actually serving a +file called "/foo".) + +Of course the default (without sslverify=off) remains to always check +the certificate: + +$ ./qemu-img create -q -f qcow2 -b 'json: { "file.driver": "https", "file.url": "https://8.8.8.8/foo" }' /var/tmp/file.qcow2 +qemu-img: /var/tmp/file.qcow2: CURL: Error opening file: SSL: no alternative certificate subject name matches target host name '8.8.8.8' +Could not open backing image to determine size. + +Further information about the two settings is available here: + +https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html +https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYHOST.html + +Signed-off-by: Richard W.M. Jones +Message-id: 20180914095622.19698-1-rjones@redhat.com +Signed-off-by: Jeff Cody +(cherry picked from commit 637fa44ab80c6b317adf1d117494325a95daad60) +Signed-off-by: Jeff Cody +Signed-off-by: Danilo C. L. de Paula +--- + block/curl.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/block/curl.c b/block/curl.c +index 229bb84..fabb2b4 100644 +--- a/block/curl.c ++++ b/block/curl.c +@@ -483,6 +483,8 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state) + curl_easy_setopt(state->curl, CURLOPT_URL, s->url); + curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER, + (long) s->sslverify); ++ curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST, ++ s->sslverify ? 2L : 0L); + if (s->cookie) { + curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie); + } +-- +1.8.3.1 + diff --git a/0019-migration-postcopy-Clear-have_listen_thread.patch b/0019-migration-postcopy-Clear-have_listen_thread.patch new file mode 100644 index 0000000..f220ad4 --- /dev/null +++ b/0019-migration-postcopy-Clear-have_listen_thread.patch @@ -0,0 +1,51 @@ +From 324493e716a2e5fa60b6b013d5df831b03f2a678 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 1 Oct 2018 10:54:48 +0100 +Subject: migration/postcopy: Clear have_listen_thread + +RH-Author: Dr. David Alan Gilbert +Message-id: <20181001105449.41090-2-dgilbert@redhat.com> +Patchwork-id: 82326 +O-Subject: [RHEL-8.0 qemu-kvm PATCH 1/2] migration/postcopy: Clear have_listen_thread +Bugzilla: 1608765 +RH-Acked-by: Pankaj Gupta +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Laurent Vivier + +From: "Dr. David Alan Gilbert" + +Clear have_listen_thread when we exit the thread. +The fallout from this was that various things thought there was +an ongoing postcopy after the postcopy had finished. + +The case that failed was postcopy->savevm->loadvm. + +This corresponds to RH bug https://bugzilla.redhat.com/show_bug.cgi?id=1608765 + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20180914170430.54271-2-dgilbert@redhat.com> +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9cf4bb8730c669c40550e635a9e2b8ee4f1664ca) + Manual merge due to context + +Signed-off-by: Danilo C. L. de Paula +--- + migration/savevm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/savevm.c b/migration/savevm.c +index 7f92567..762c4b2 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1676,6 +1676,7 @@ static void *postcopy_ram_listen_thread(void *opaque) + migration_incoming_state_destroy(); + qemu_loadvm_state_cleanup(); + ++ mis->have_listen_thread = false; + return NULL; + } + +-- +1.8.3.1 + diff --git a/0020-migration-cleanup-in-error-paths-in-loadvm.patch b/0020-migration-cleanup-in-error-paths-in-loadvm.patch new file mode 100644 index 0000000..a0fea63 --- /dev/null +++ b/0020-migration-cleanup-in-error-paths-in-loadvm.patch @@ -0,0 +1,52 @@ +From 005c4cb023ffdcb8888c7453d263cab95d5b1b1c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 1 Oct 2018 10:54:49 +0100 +Subject: migration: cleanup in error paths in loadvm + +RH-Author: Dr. David Alan Gilbert +Message-id: <20181001105449.41090-3-dgilbert@redhat.com> +Patchwork-id: 82325 +O-Subject: [RHEL-8.0 qemu-kvm PATCH 2/2] migration: cleanup in error paths in loadvm +Bugzilla: 1608765 +RH-Acked-by: Pankaj Gupta +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Laurent Vivier + +From: "Dr. David Alan Gilbert" + +There's a couple of error paths in qemu_loadvm_state +which happen early on but after we've initialised the +load state; that needs to be cleaned up otherwise +we can hit asserts if the state gets reinitialised later. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20180914170430.54271-3-dgilbert@redhat.com> +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 096c83b7219c5a2145435afc8be750281e9cb447) +Signed-off-by: Danilo C. L. de Paula +--- + migration/savevm.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/savevm.c b/migration/savevm.c +index 762c4b2..27e054d 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2328,11 +2328,13 @@ int qemu_loadvm_state(QEMUFile *f) + if (migrate_get_current()->send_configuration) { + if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { + error_report("Configuration section missing"); ++ qemu_loadvm_state_cleanup(); + return -EINVAL; + } + ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); + + if (ret) { ++ qemu_loadvm_state_cleanup(); + return ret; + } + } +-- +1.8.3.1 + diff --git a/0021-jobs-change-start-callback-to-run-callback.patch b/0021-jobs-change-start-callback-to-run-callback.patch new file mode 100644 index 0000000..93e8b27 --- /dev/null +++ b/0021-jobs-change-start-callback-to-run-callback.patch @@ -0,0 +1,372 @@ +From 287cb50c08d64773470732be8a6a566bcdde4b75 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:07 +0100 +Subject: jobs: change start callback to run callback + +RH-Author: John Snow +Message-id: <20180925223431.24791-2-jsnow@redhat.com> +Patchwork-id: 82261 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 01/25] jobs: change start callback to run callback +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Presently we codify the entry point for a job as the "start" callback, +but a more apt name would be "run" to clarify the idea that when this +function returns we consider the job to have "finished," except for +any cleanup which occurs in separate callbacks later. + +As part of this clarification, change the signature to include an error +object and a return code. The error ptr is not yet used, and the return +code while captured, will be overwritten by actions in the job_completed +function. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180830015734.19765-2-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit f67432a2019caf05b57a146bf45c1024a5cb608e) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/backup.c | 7 ++++--- + block/commit.c | 7 ++++--- + block/create.c | 8 +++++--- + block/mirror.c | 10 ++++++---- + block/stream.c | 7 ++++--- + include/qemu/job.h | 2 +- + job.c | 6 +++--- + tests/test-bdrv-drain.c | 7 ++++--- + tests/test-blockjob-txn.c | 16 ++++++++-------- + tests/test-blockjob.c | 7 ++++--- + 10 files changed, 43 insertions(+), 34 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index 8630d32..5d47781 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -480,9 +480,9 @@ static void backup_incremental_init_copy_bitmap(BackupBlockJob *job) + bdrv_dirty_iter_free(dbi); + } + +-static void coroutine_fn backup_run(void *opaque) ++static int coroutine_fn backup_run(Job *opaque_job, Error **errp) + { +- BackupBlockJob *job = opaque; ++ BackupBlockJob *job = container_of(opaque_job, BackupBlockJob, common.job); + BackupCompleteData *data; + BlockDriverState *bs = blk_bs(job->common.blk); + int64_t offset, nb_clusters; +@@ -587,6 +587,7 @@ static void coroutine_fn backup_run(void *opaque) + data = g_malloc(sizeof(*data)); + data->ret = ret; + job_defer_to_main_loop(&job->common.job, backup_complete, data); ++ return ret; + } + + static const BlockJobDriver backup_job_driver = { +@@ -596,7 +597,7 @@ static const BlockJobDriver backup_job_driver = { + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, +- .start = backup_run, ++ .run = backup_run, + .commit = backup_commit, + .abort = backup_abort, + .clean = backup_clean, +diff --git a/block/commit.c b/block/commit.c +index e1814d9..905a1c5 100644 +--- a/block/commit.c ++++ b/block/commit.c +@@ -134,9 +134,9 @@ static void commit_complete(Job *job, void *opaque) + bdrv_unref(top); + } + +-static void coroutine_fn commit_run(void *opaque) ++static int coroutine_fn commit_run(Job *job, Error **errp) + { +- CommitBlockJob *s = opaque; ++ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); + CommitCompleteData *data; + int64_t offset; + uint64_t delay_ns = 0; +@@ -213,6 +213,7 @@ out: + data = g_malloc(sizeof(*data)); + data->ret = ret; + job_defer_to_main_loop(&s->common.job, commit_complete, data); ++ return ret; + } + + static const BlockJobDriver commit_job_driver = { +@@ -222,7 +223,7 @@ static const BlockJobDriver commit_job_driver = { + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, +- .start = commit_run, ++ .run = commit_run, + }, + }; + +diff --git a/block/create.c b/block/create.c +index 915cd41..04733c3 100644 +--- a/block/create.c ++++ b/block/create.c +@@ -45,9 +45,9 @@ static void blockdev_create_complete(Job *job, void *opaque) + job_completed(job, s->ret, s->err); + } + +-static void coroutine_fn blockdev_create_run(void *opaque) ++static int coroutine_fn blockdev_create_run(Job *job, Error **errp) + { +- BlockdevCreateJob *s = opaque; ++ BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); + + job_progress_set_remaining(&s->common, 1); + s->ret = s->drv->bdrv_co_create(s->opts, &s->err); +@@ -55,12 +55,14 @@ static void coroutine_fn blockdev_create_run(void *opaque) + + qapi_free_BlockdevCreateOptions(s->opts); + job_defer_to_main_loop(&s->common, blockdev_create_complete, NULL); ++ ++ return s->ret; + } + + static const JobDriver blockdev_create_job_driver = { + .instance_size = sizeof(BlockdevCreateJob), + .job_type = JOB_TYPE_CREATE, +- .start = blockdev_create_run, ++ .run = blockdev_create_run, + }; + + void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options, +diff --git a/block/mirror.c b/block/mirror.c +index b48c3f8..b3363e9 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -812,9 +812,9 @@ static int mirror_flush(MirrorBlockJob *s) + return ret; + } + +-static void coroutine_fn mirror_run(void *opaque) ++static int coroutine_fn mirror_run(Job *job, Error **errp) + { +- MirrorBlockJob *s = opaque; ++ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); + MirrorExitData *data; + BlockDriverState *bs = s->mirror_top_bs->backing->bs; + BlockDriverState *target_bs = blk_bs(s->target); +@@ -1041,7 +1041,9 @@ immediate_exit: + if (need_drain) { + bdrv_drained_begin(bs); + } ++ + job_defer_to_main_loop(&s->common.job, mirror_exit, data); ++ return ret; + } + + static void mirror_complete(Job *job, Error **errp) +@@ -1138,7 +1140,7 @@ static const BlockJobDriver mirror_job_driver = { + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, +- .start = mirror_run, ++ .run = mirror_run, + .pause = mirror_pause, + .complete = mirror_complete, + }, +@@ -1154,7 +1156,7 @@ static const BlockJobDriver commit_active_job_driver = { + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, +- .start = mirror_run, ++ .run = mirror_run, + .pause = mirror_pause, + .complete = mirror_complete, + }, +diff --git a/block/stream.c b/block/stream.c +index 9264b68..b4b987d 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -97,9 +97,9 @@ out: + g_free(data); + } + +-static void coroutine_fn stream_run(void *opaque) ++static int coroutine_fn stream_run(Job *job, Error **errp) + { +- StreamBlockJob *s = opaque; ++ StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); + StreamCompleteData *data; + BlockBackend *blk = s->common.blk; + BlockDriverState *bs = blk_bs(blk); +@@ -206,6 +206,7 @@ out: + data = g_malloc(sizeof(*data)); + data->ret = ret; + job_defer_to_main_loop(&s->common.job, stream_complete, data); ++ return ret; + } + + static const BlockJobDriver stream_job_driver = { +@@ -213,7 +214,7 @@ static const BlockJobDriver stream_job_driver = { + .instance_size = sizeof(StreamBlockJob), + .job_type = JOB_TYPE_STREAM, + .free = block_job_free, +- .start = stream_run, ++ .run = stream_run, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + }, +diff --git a/include/qemu/job.h b/include/qemu/job.h +index 18c9223..9cf463d 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -169,7 +169,7 @@ struct JobDriver { + JobType job_type; + + /** Mandatory: Entrypoint for the Coroutine. */ +- CoroutineEntry *start; ++ int coroutine_fn (*run)(Job *job, Error **errp); + + /** + * If the callback is not NULL, it will be invoked when the job transitions +diff --git a/job.c b/job.c +index fa671b4..898260b 100644 +--- a/job.c ++++ b/job.c +@@ -544,16 +544,16 @@ static void coroutine_fn job_co_entry(void *opaque) + { + Job *job = opaque; + +- assert(job && job->driver && job->driver->start); ++ assert(job && job->driver && job->driver->run); + job_pause_point(job); +- job->driver->start(job); ++ job->ret = job->driver->run(job, NULL); + } + + + void job_start(Job *job) + { + assert(job && !job_started(job) && job->paused && +- job->driver && job->driver->start); ++ job->driver && job->driver->run); + job->co = qemu_coroutine_create(job_co_entry, job); + job->pause_count--; + job->busy = true; +diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c +index 17bb850..a753386 100644 +--- a/tests/test-bdrv-drain.c ++++ b/tests/test-bdrv-drain.c +@@ -757,9 +757,9 @@ static void test_job_completed(Job *job, void *opaque) + job_completed(job, 0, NULL); + } + +-static void coroutine_fn test_job_start(void *opaque) ++static int coroutine_fn test_job_run(Job *job, Error **errp) + { +- TestBlockJob *s = opaque; ++ TestBlockJob *s = container_of(job, TestBlockJob, common.job); + + job_transition_to_ready(&s->common.job); + while (!s->should_complete) { +@@ -771,6 +771,7 @@ static void coroutine_fn test_job_start(void *opaque) + } + + job_defer_to_main_loop(&s->common.job, test_job_completed, NULL); ++ return 0; + } + + static void test_job_complete(Job *job, Error **errp) +@@ -785,7 +786,7 @@ BlockJobDriver test_job_driver = { + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, +- .start = test_job_start, ++ .run = test_job_run, + .complete = test_job_complete, + }, + }; +diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c +index 58d9b87..3194924 100644 +--- a/tests/test-blockjob-txn.c ++++ b/tests/test-blockjob-txn.c +@@ -38,25 +38,25 @@ static void test_block_job_complete(Job *job, void *opaque) + bdrv_unref(bs); + } + +-static void coroutine_fn test_block_job_run(void *opaque) ++static int coroutine_fn test_block_job_run(Job *job, Error **errp) + { +- TestBlockJob *s = opaque; +- BlockJob *job = &s->common; ++ TestBlockJob *s = container_of(job, TestBlockJob, common.job); + + while (s->iterations--) { + if (s->use_timer) { +- job_sleep_ns(&job->job, 0); ++ job_sleep_ns(job, 0); + } else { +- job_yield(&job->job); ++ job_yield(job); + } + +- if (job_is_cancelled(&job->job)) { ++ if (job_is_cancelled(job)) { + break; + } + } + +- job_defer_to_main_loop(&job->job, test_block_job_complete, ++ job_defer_to_main_loop(job, test_block_job_complete, + (void *)(intptr_t)s->rc); ++ return s->rc; + } + + typedef struct { +@@ -80,7 +80,7 @@ static const BlockJobDriver test_block_job_driver = { + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, +- .start = test_block_job_run, ++ .run = test_block_job_run, + }, + }; + +diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c +index cb42f06..b0462bf 100644 +--- a/tests/test-blockjob.c ++++ b/tests/test-blockjob.c +@@ -176,9 +176,9 @@ static void cancel_job_complete(Job *job, Error **errp) + s->should_complete = true; + } + +-static void coroutine_fn cancel_job_start(void *opaque) ++static int coroutine_fn cancel_job_run(Job *job, Error **errp) + { +- CancelJob *s = opaque; ++ CancelJob *s = container_of(job, CancelJob, common.job); + + while (!s->should_complete) { + if (job_is_cancelled(&s->common.job)) { +@@ -194,6 +194,7 @@ static void coroutine_fn cancel_job_start(void *opaque) + + defer: + job_defer_to_main_loop(&s->common.job, cancel_job_completed, s); ++ return 0; + } + + static const BlockJobDriver test_cancel_driver = { +@@ -202,7 +203,7 @@ static const BlockJobDriver test_cancel_driver = { + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, +- .start = cancel_job_start, ++ .run = cancel_job_run, + .complete = cancel_job_complete, + }, + }; +-- +1.8.3.1 + diff --git a/0022-jobs-canonize-Error-object.patch b/0022-jobs-canonize-Error-object.patch new file mode 100644 index 0000000..ba09278 --- /dev/null +++ b/0022-jobs-canonize-Error-object.patch @@ -0,0 +1,283 @@ +From 9dff1ec5bdde5e8bd8745d2e0697cc6e28c87214 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 29 Aug 2018 21:57:27 -0400 +Subject: jobs: canonize Error object + +RH-Author: John Snow +Message-id: <20180925223431.24791-3-jsnow@redhat.com> +Patchwork-id: 82262 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 02/25] jobs: canonize Error object +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Jobs presently use both an Error object in the case of the create job, +and char strings in the case of generic errors elsewhere. + +Unify the two paths as just j->err, and remove the extra argument from +job_completed. The integer error code for job_completed is kept for now, +to be removed shortly in a separate patch. + +Signed-off-by: John Snow +Message-id: 20180830015734.19765-3-jsnow@redhat.com +[mreitz: Dropped a superfluous g_strdup()] +Reviewed-by: Eric Blake +Signed-off-by: Max Reitz +(cherry picked from commit 3d1f8b07a4c241f81949eff507d9f3a8fd73b87b) +Signed-off-by: John Snow +--- + block/backup.c | 2 +- + block/commit.c | 2 +- + block/create.c | 5 ++--- + block/mirror.c | 2 +- + block/stream.c | 2 +- + include/qemu/job.h | 14 ++++++++------ + job-qmp.c | 5 +++-- + job.c | 18 ++++++------------ + tests/test-bdrv-drain.c | 2 +- + tests/test-blockjob-txn.c | 2 +- + tests/test-blockjob.c | 2 +- + 11 files changed, 26 insertions(+), 30 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index 5d47781..1e965d5 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -388,7 +388,7 @@ static void backup_complete(Job *job, void *opaque) + { + BackupCompleteData *data = opaque; + +- job_completed(job, data->ret, NULL); ++ job_completed(job, data->ret); + g_free(data); + } + +diff --git a/block/commit.c b/block/commit.c +index 905a1c5..af7579d 100644 +--- a/block/commit.c ++++ b/block/commit.c +@@ -117,7 +117,7 @@ static void commit_complete(Job *job, void *opaque) + * bdrv_set_backing_hd() to fail. */ + block_job_remove_all_bdrv(bjob); + +- job_completed(job, ret, NULL); ++ job_completed(job, ret); + g_free(data); + + /* If bdrv_drop_intermediate() didn't already do that, remove the commit +diff --git a/block/create.c b/block/create.c +index 04733c3..26a385c 100644 +--- a/block/create.c ++++ b/block/create.c +@@ -35,14 +35,13 @@ typedef struct BlockdevCreateJob { + BlockDriver *drv; + BlockdevCreateOptions *opts; + int ret; +- Error *err; + } BlockdevCreateJob; + + static void blockdev_create_complete(Job *job, void *opaque) + { + BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); + +- job_completed(job, s->ret, s->err); ++ job_completed(job, s->ret); + } + + static int coroutine_fn blockdev_create_run(Job *job, Error **errp) +@@ -50,7 +49,7 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp) + BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); + + job_progress_set_remaining(&s->common, 1); +- s->ret = s->drv->bdrv_co_create(s->opts, &s->err); ++ s->ret = s->drv->bdrv_co_create(s->opts, errp); + job_progress_update(&s->common, 1); + + qapi_free_BlockdevCreateOptions(s->opts); +diff --git a/block/mirror.c b/block/mirror.c +index b3363e9..6637f2b 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -710,7 +710,7 @@ static void mirror_exit(Job *job, void *opaque) + blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort); + + bs_opaque->job = NULL; +- job_completed(job, data->ret, NULL); ++ job_completed(job, data->ret); + + g_free(data); + bdrv_drained_end(src); +diff --git a/block/stream.c b/block/stream.c +index b4b987d..26a7753 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -93,7 +93,7 @@ out: + } + + g_free(s->backing_file_str); +- job_completed(job, data->ret, NULL); ++ job_completed(job, data->ret); + g_free(data); + } + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index 9cf463d..e0e9987 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -124,12 +124,16 @@ typedef struct Job { + /** Estimated progress_current value at the completion of the job */ + int64_t progress_total; + +- /** Error string for a failed job (NULL if, and only if, job->ret == 0) */ +- char *error; +- + /** ret code passed to job_completed. */ + int ret; + ++ /** ++ * Error object for a failed job. ++ * If job->ret is nonzero and an error object was not set, it will be set ++ * to strerror(-job->ret) during job_completed. ++ */ ++ Error *err; ++ + /** The completion function that will be called when the job completes. */ + BlockCompletionFunc *cb; + +@@ -484,15 +488,13 @@ void job_transition_to_ready(Job *job); + /** + * @job: The job being completed. + * @ret: The status code. +- * @error: The error message for a failing job (only with @ret < 0). If @ret is +- * negative, but NULL is given for @error, strerror() is used. + * + * Marks @job as completed. If @ret is non-zero, the job transaction it is part + * of is aborted. If @ret is zero, the job moves into the WAITING state. If it + * is the last job to complete in its transaction, all jobs in the transaction + * move from WAITING to PENDING. + */ +-void job_completed(Job *job, int ret, Error *error); ++void job_completed(Job *job, int ret); + + /** Asynchronously complete the specified @job. */ + void job_complete(Job *job, Error **errp); +diff --git a/job-qmp.c b/job-qmp.c +index 410775d..a969b2b 100644 +--- a/job-qmp.c ++++ b/job-qmp.c +@@ -146,8 +146,9 @@ static JobInfo *job_query_single(Job *job, Error **errp) + .status = job->status, + .current_progress = job->progress_current, + .total_progress = job->progress_total, +- .has_error = !!job->error, +- .error = g_strdup(job->error), ++ .has_error = !!job->err, ++ .error = job->err ? \ ++ g_strdup(error_get_pretty(job->err)) : NULL, + }; + + return info; +diff --git a/job.c b/job.c +index 898260b..276024a 100644 +--- a/job.c ++++ b/job.c +@@ -369,7 +369,7 @@ void job_unref(Job *job) + + QLIST_REMOVE(job, job_list); + +- g_free(job->error); ++ error_free(job->err); + g_free(job->id); + g_free(job); + } +@@ -546,7 +546,7 @@ static void coroutine_fn job_co_entry(void *opaque) + + assert(job && job->driver && job->driver->run); + job_pause_point(job); +- job->ret = job->driver->run(job, NULL); ++ job->ret = job->driver->run(job, &job->err); + } + + +@@ -666,8 +666,8 @@ static void job_update_rc(Job *job) + job->ret = -ECANCELED; + } + if (job->ret) { +- if (!job->error) { +- job->error = g_strdup(strerror(-job->ret)); ++ if (!job->err) { ++ error_setg(&job->err, "%s", strerror(-job->ret)); + } + job_state_transition(job, JOB_STATUS_ABORTING); + } +@@ -865,17 +865,11 @@ static void job_completed_txn_success(Job *job) + } + } + +-void job_completed(Job *job, int ret, Error *error) ++void job_completed(Job *job, int ret) + { + assert(job && job->txn && !job_is_completed(job)); + + job->ret = ret; +- if (error) { +- assert(job->ret < 0); +- job->error = g_strdup(error_get_pretty(error)); +- error_free(error); +- } +- + job_update_rc(job); + trace_job_completed(job, ret, job->ret); + if (job->ret) { +@@ -893,7 +887,7 @@ void job_cancel(Job *job, bool force) + } + job_cancel_async(job, force); + if (!job_started(job)) { +- job_completed(job, -ECANCELED, NULL); ++ job_completed(job, -ECANCELED); + } else if (job->deferred_to_main_loop) { + job_completed_txn_abort(job); + } else { +diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c +index a753386..00604df 100644 +--- a/tests/test-bdrv-drain.c ++++ b/tests/test-bdrv-drain.c +@@ -754,7 +754,7 @@ typedef struct TestBlockJob { + + static void test_job_completed(Job *job, void *opaque) + { +- job_completed(job, 0, NULL); ++ job_completed(job, 0); + } + + static int coroutine_fn test_job_run(Job *job, Error **errp) +diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c +index 3194924..82cedee 100644 +--- a/tests/test-blockjob-txn.c ++++ b/tests/test-blockjob-txn.c +@@ -34,7 +34,7 @@ static void test_block_job_complete(Job *job, void *opaque) + rc = -ECANCELED; + } + +- job_completed(job, rc, NULL); ++ job_completed(job, rc); + bdrv_unref(bs); + } + +diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c +index b0462bf..408a226 100644 +--- a/tests/test-blockjob.c ++++ b/tests/test-blockjob.c +@@ -167,7 +167,7 @@ static void cancel_job_completed(Job *job, void *opaque) + { + CancelJob *s = opaque; + s->completed = true; +- job_completed(job, 0, NULL); ++ job_completed(job, 0); + } + + static void cancel_job_complete(Job *job, Error **errp) +-- +1.8.3.1 + diff --git a/0023-jobs-add-exit-shim.patch b/0023-jobs-add-exit-shim.patch new file mode 100644 index 0000000..e8493e5 --- /dev/null +++ b/0023-jobs-add-exit-shim.patch @@ -0,0 +1,108 @@ +From 29ae3509885eaa6d24ee82aa4cae47ddeda086db Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:09 +0100 +Subject: jobs: add exit shim + +RH-Author: John Snow +Message-id: <20180925223431.24791-4-jsnow@redhat.com> +Patchwork-id: 82273 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 03/25] jobs: add exit shim +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +All jobs do the same thing when they leave their running loop: +- Store the return code in a structure +- wait to receive this structure in the main thread +- signal job completion via job_completed + +Few jobs do anything beyond exactly this. Consolidate this exit +logic for a net reduction in SLOC. + +More seriously, when we utilize job_defer_to_main_loop_bh to call +a function that calls job_completed, job_finalize_single will run +in a context where it has recursively taken the aio_context lock, +which can cause hangs if it puts down a reference that causes a flush. + +You can observe this in practice by looking at mirror_exit's careful +placement of job_completed and bdrv_unref calls. + +If we centralize job exiting, we can signal job completion from outside +of the aio_context, which should allow for job cleanup code to run with +only one lock, which makes cleanup callbacks less tricky to write. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180830015734.19765-4-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit 00359a71d45a414ee47d8e423104dc0afd24ec65) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + include/qemu/job.h | 11 +++++++++++ + job.c | 18 ++++++++++++++++++ + 2 files changed, 29 insertions(+) + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index e0e9987..1144d67 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -209,6 +209,17 @@ struct JobDriver { + void (*drain)(Job *job); + + /** ++ * If the callback is not NULL, exit will be invoked from the main thread ++ * when the job's coroutine has finished, but before transactional ++ * convergence; before @prepare or @abort. ++ * ++ * FIXME TODO: This callback is only temporary to transition remaining jobs ++ * to prepare/commit/abort/clean callbacks and will be removed before 3.1. ++ * is released. ++ */ ++ void (*exit)(Job *job); ++ ++ /** + * If the callback is not NULL, prepare will be invoked when all the jobs + * belonging to the same transaction complete; or upon this job's completion + * if it is not in a transaction. +diff --git a/job.c b/job.c +index 276024a..abe91af 100644 +--- a/job.c ++++ b/job.c +@@ -535,6 +535,18 @@ void job_drain(Job *job) + } + } + ++static void job_exit(void *opaque) ++{ ++ Job *job = (Job *)opaque; ++ AioContext *aio_context = job->aio_context; ++ ++ if (job->driver->exit) { ++ aio_context_acquire(aio_context); ++ job->driver->exit(job); ++ aio_context_release(aio_context); ++ } ++ job_completed(job, job->ret); ++} + + /** + * All jobs must allow a pause point before entering their job proper. This +@@ -547,6 +559,12 @@ static void coroutine_fn job_co_entry(void *opaque) + assert(job && job->driver && job->driver->run); + job_pause_point(job); + job->ret = job->driver->run(job, &job->err); ++ if (!job->deferred_to_main_loop) { ++ job->deferred_to_main_loop = true; ++ aio_bh_schedule_oneshot(qemu_get_aio_context(), ++ job_exit, ++ job); ++ } + } + + +-- +1.8.3.1 + diff --git a/0024-block-commit-utilize-job_exit-shim.patch b/0024-block-commit-utilize-job_exit-shim.patch new file mode 100644 index 0000000..2d4e3b9 --- /dev/null +++ b/0024-block-commit-utilize-job_exit-shim.patch @@ -0,0 +1,115 @@ +From 2207ab7e71d5d3c3806d60b3f483988a62566292 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:10 +0100 +Subject: block/commit: utilize job_exit shim + +RH-Author: John Snow +Message-id: <20180925223431.24791-5-jsnow@redhat.com> +Patchwork-id: 82265 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 04/25] block/commit: utilize job_exit shim +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Change the manual deferment to commit_complete into the implicit +callback to job_exit, renaming commit_complete to commit_exit. + +This conversion does change the timing of when job_completed is +called to after the bdrv_replace_node and bdrv_unref calls, which +could have implications for bjob->blk which will now be put down +after this cleanup. + +Kevin highlights that we did not take any permissions for that backend +at job creation time, so it is safe to reorder these operations. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180830015734.19765-5-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit f369b48dc4095861223f9bc4329935599e03b1c5) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/commit.c | 22 +++++----------------- + 1 file changed, 5 insertions(+), 17 deletions(-) + +diff --git a/block/commit.c b/block/commit.c +index af7579d..25b3cb8 100644 +--- a/block/commit.c ++++ b/block/commit.c +@@ -68,19 +68,13 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, + return 0; + } + +-typedef struct { +- int ret; +-} CommitCompleteData; +- +-static void commit_complete(Job *job, void *opaque) ++static void commit_exit(Job *job) + { + CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); + BlockJob *bjob = &s->common; +- CommitCompleteData *data = opaque; + BlockDriverState *top = blk_bs(s->top); + BlockDriverState *base = blk_bs(s->base); + BlockDriverState *commit_top_bs = s->commit_top_bs; +- int ret = data->ret; + bool remove_commit_top_bs = false; + + /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ +@@ -91,10 +85,10 @@ static void commit_complete(Job *job, void *opaque) + * the normal backing chain can be restored. */ + blk_unref(s->base); + +- if (!job_is_cancelled(job) && ret == 0) { ++ if (!job_is_cancelled(job) && job->ret == 0) { + /* success */ +- ret = bdrv_drop_intermediate(s->commit_top_bs, base, +- s->backing_file_str); ++ job->ret = bdrv_drop_intermediate(s->commit_top_bs, base, ++ s->backing_file_str); + } else { + /* XXX Can (or should) we somehow keep 'consistent read' blocked even + * after the failed/cancelled commit job is gone? If we already wrote +@@ -117,9 +111,6 @@ static void commit_complete(Job *job, void *opaque) + * bdrv_set_backing_hd() to fail. */ + block_job_remove_all_bdrv(bjob); + +- job_completed(job, ret); +- g_free(data); +- + /* If bdrv_drop_intermediate() didn't already do that, remove the commit + * filter driver from the backing chain. Do this as the final step so that + * the 'consistent read' permission can be granted. */ +@@ -137,7 +128,6 @@ static void commit_complete(Job *job, void *opaque) + static int coroutine_fn commit_run(Job *job, Error **errp) + { + CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); +- CommitCompleteData *data; + int64_t offset; + uint64_t delay_ns = 0; + int ret = 0; +@@ -210,9 +200,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) + out: + qemu_vfree(buf); + +- data = g_malloc(sizeof(*data)); +- data->ret = ret; +- job_defer_to_main_loop(&s->common.job, commit_complete, data); + return ret; + } + +@@ -224,6 +211,7 @@ static const BlockJobDriver commit_job_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = commit_run, ++ .exit = commit_exit, + }, + }; + +-- +1.8.3.1 + diff --git a/0025-block-mirror-utilize-job_exit-shim.patch b/0025-block-mirror-utilize-job_exit-shim.patch new file mode 100644 index 0000000..833eead --- /dev/null +++ b/0025-block-mirror-utilize-job_exit-shim.patch @@ -0,0 +1,152 @@ +From f96869810df10ac28030a31d8cb1e39825133e94 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 29 Aug 2018 21:57:30 -0400 +Subject: block/mirror: utilize job_exit shim + +RH-Author: John Snow +Message-id: <20180925223431.24791-6-jsnow@redhat.com> +Patchwork-id: 82269 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 05/25] block/mirror: utilize job_exit +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Change the manual deferment to mirror_exit into the implicit +callback to job_exit and the mirror_exit callback. + +This does change the order of some bdrv_unref calls and job_completed, +but thanks to the new context in which we call .exit, this is safe to +defer the possible flushing of any nodes to the job_finalize_single +cleanup stage. + +Signed-off-by: John Snow +Message-id: 20180830015734.19765-6-jsnow@redhat.com +Reviewed-by: Max Reitz +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit 7b508f6b7a38a8d9729772fa6e525da883fb120b) +Signed-off-by: John Snow +--- + block/mirror.c | 29 +++++++++++------------------ + 1 file changed, 11 insertions(+), 18 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 6637f2b..4a9558d 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -607,26 +607,22 @@ static void mirror_wait_for_all_io(MirrorBlockJob *s) + } + } + +-typedef struct { +- int ret; +-} MirrorExitData; +- +-static void mirror_exit(Job *job, void *opaque) ++static void mirror_exit(Job *job) + { + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); + BlockJob *bjob = &s->common; +- MirrorExitData *data = opaque; + MirrorBDSOpaque *bs_opaque = s->mirror_top_bs->opaque; + AioContext *replace_aio_context = NULL; + BlockDriverState *src = s->mirror_top_bs->backing->bs; + BlockDriverState *target_bs = blk_bs(s->target); + BlockDriverState *mirror_top_bs = s->mirror_top_bs; + Error *local_err = NULL; ++ int ret = job->ret; + + bdrv_release_dirty_bitmap(src, s->dirty_bitmap); + +- /* Make sure that the source BDS doesn't go away before we called +- * job_completed(). */ ++ /* Make sure that the source BDS doesn't go away during bdrv_replace_node, ++ * before we can call bdrv_drained_end */ + bdrv_ref(src); + bdrv_ref(mirror_top_bs); + bdrv_ref(target_bs); +@@ -652,7 +648,7 @@ static void mirror_exit(Job *job, void *opaque) + bdrv_set_backing_hd(target_bs, backing, &local_err); + if (local_err) { + error_report_err(local_err); +- data->ret = -EPERM; ++ ret = -EPERM; + } + } + } +@@ -662,7 +658,7 @@ static void mirror_exit(Job *job, void *opaque) + aio_context_acquire(replace_aio_context); + } + +- if (s->should_complete && data->ret == 0) { ++ if (s->should_complete && ret == 0) { + BlockDriverState *to_replace = src; + if (s->to_replace) { + to_replace = s->to_replace; +@@ -679,7 +675,7 @@ static void mirror_exit(Job *job, void *opaque) + bdrv_drained_end(target_bs); + if (local_err) { + error_report_err(local_err); +- data->ret = -EPERM; ++ ret = -EPERM; + } + } + if (s->to_replace) { +@@ -710,12 +706,12 @@ static void mirror_exit(Job *job, void *opaque) + blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort); + + bs_opaque->job = NULL; +- job_completed(job, data->ret); + +- g_free(data); + bdrv_drained_end(src); + bdrv_unref(mirror_top_bs); + bdrv_unref(src); ++ ++ job->ret = ret; + } + + static void mirror_throttle(MirrorBlockJob *s) +@@ -815,7 +811,6 @@ static int mirror_flush(MirrorBlockJob *s) + static int coroutine_fn mirror_run(Job *job, Error **errp) + { + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); +- MirrorExitData *data; + BlockDriverState *bs = s->mirror_top_bs->backing->bs; + BlockDriverState *target_bs = blk_bs(s->target); + bool need_drain = true; +@@ -1035,14 +1030,10 @@ immediate_exit: + g_free(s->in_flight_bitmap); + bdrv_dirty_iter_free(s->dbi); + +- data = g_malloc(sizeof(*data)); +- data->ret = ret; +- + if (need_drain) { + bdrv_drained_begin(bs); + } + +- job_defer_to_main_loop(&s->common.job, mirror_exit, data); + return ret; + } + +@@ -1141,6 +1132,7 @@ static const BlockJobDriver mirror_job_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = mirror_run, ++ .exit = mirror_exit, + .pause = mirror_pause, + .complete = mirror_complete, + }, +@@ -1157,6 +1149,7 @@ static const BlockJobDriver commit_active_job_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = mirror_run, ++ .exit = mirror_exit, + .pause = mirror_pause, + .complete = mirror_complete, + }, +-- +1.8.3.1 + diff --git a/0026-jobs-utilize-job_exit-shim.patch b/0026-jobs-utilize-job_exit-shim.patch new file mode 100644 index 0000000..d5ca8e5 --- /dev/null +++ b/0026-jobs-utilize-job_exit-shim.patch @@ -0,0 +1,307 @@ +From 5947e8781d9dffb069fcc570402f775f80068e63 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:12 +0100 +Subject: jobs: utilize job_exit shim + +RH-Author: John Snow +Message-id: <20180925223431.24791-7-jsnow@redhat.com> +Patchwork-id: 82267 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 06/25] jobs: utilize job_exit shim +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Utilize the job_exit shim by not calling job_defer_to_main_loop, and +where applicable, converting the deferred callback into the job_exit +callback. + +This converts backup, stream, create, and the unit tests all at once. +Most of these jobs do not see any changes to the order in which they +clean up their resources, except the test-blockjob-txn test, which +now puts down its bs before job_completed is called. + +This is safe for the same reason the reordering in the mirror job is +safe, because job_completed no longer runs under two locks, making +the unref safe even if it causes a flush. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180830015734.19765-7-jsnow@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit eb23654dbe43b549ea2a9ebff9d8edf544d34a73) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/backup.c | 16 ---------------- + block/create.c | 14 +++----------- + block/stream.c | 22 +++++++--------------- + tests/test-bdrv-drain.c | 6 ------ + tests/test-blockjob-txn.c | 11 ++--------- + tests/test-blockjob.c | 10 ++++------ + 6 files changed, 16 insertions(+), 63 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index 1e965d5..a67b7fa 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -380,18 +380,6 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job, + } + } + +-typedef struct { +- int ret; +-} BackupCompleteData; +- +-static void backup_complete(Job *job, void *opaque) +-{ +- BackupCompleteData *data = opaque; +- +- job_completed(job, data->ret); +- g_free(data); +-} +- + static bool coroutine_fn yield_and_check(BackupBlockJob *job) + { + uint64_t delay_ns; +@@ -483,7 +471,6 @@ static void backup_incremental_init_copy_bitmap(BackupBlockJob *job) + static int coroutine_fn backup_run(Job *opaque_job, Error **errp) + { + BackupBlockJob *job = container_of(opaque_job, BackupBlockJob, common.job); +- BackupCompleteData *data; + BlockDriverState *bs = blk_bs(job->common.blk); + int64_t offset, nb_clusters; + int ret = 0; +@@ -584,9 +571,6 @@ static int coroutine_fn backup_run(Job *opaque_job, Error **errp) + qemu_co_rwlock_unlock(&job->flush_rwlock); + hbitmap_free(job->copy_bitmap); + +- data = g_malloc(sizeof(*data)); +- data->ret = ret; +- job_defer_to_main_loop(&job->common.job, backup_complete, data); + return ret; + } + +diff --git a/block/create.c b/block/create.c +index 26a385c..9534121 100644 +--- a/block/create.c ++++ b/block/create.c +@@ -34,28 +34,20 @@ typedef struct BlockdevCreateJob { + Job common; + BlockDriver *drv; + BlockdevCreateOptions *opts; +- int ret; + } BlockdevCreateJob; + +-static void blockdev_create_complete(Job *job, void *opaque) +-{ +- BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); +- +- job_completed(job, s->ret); +-} +- + static int coroutine_fn blockdev_create_run(Job *job, Error **errp) + { + BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); ++ int ret; + + job_progress_set_remaining(&s->common, 1); +- s->ret = s->drv->bdrv_co_create(s->opts, errp); ++ ret = s->drv->bdrv_co_create(s->opts, errp); + job_progress_update(&s->common, 1); + + qapi_free_BlockdevCreateOptions(s->opts); +- job_defer_to_main_loop(&s->common, blockdev_create_complete, NULL); + +- return s->ret; ++ return ret; + } + + static const JobDriver blockdev_create_job_driver = { +diff --git a/block/stream.c b/block/stream.c +index 26a7753..67e1e72 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -54,20 +54,16 @@ static int coroutine_fn stream_populate(BlockBackend *blk, + return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ); + } + +-typedef struct { +- int ret; +-} StreamCompleteData; +- +-static void stream_complete(Job *job, void *opaque) ++static void stream_exit(Job *job) + { + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); + BlockJob *bjob = &s->common; +- StreamCompleteData *data = opaque; + BlockDriverState *bs = blk_bs(bjob->blk); + BlockDriverState *base = s->base; + Error *local_err = NULL; ++ int ret = job->ret; + +- if (!job_is_cancelled(job) && bs->backing && data->ret == 0) { ++ if (!job_is_cancelled(job) && bs->backing && ret == 0) { + const char *base_id = NULL, *base_fmt = NULL; + if (base) { + base_id = s->backing_file_str; +@@ -75,11 +71,11 @@ static void stream_complete(Job *job, void *opaque) + base_fmt = base->drv->format_name; + } + } +- data->ret = bdrv_change_backing_file(bs, base_id, base_fmt); ++ ret = bdrv_change_backing_file(bs, base_id, base_fmt); + bdrv_set_backing_hd(bs, base, &local_err); + if (local_err) { + error_report_err(local_err); +- data->ret = -EPERM; ++ ret = -EPERM; + goto out; + } + } +@@ -93,14 +89,12 @@ out: + } + + g_free(s->backing_file_str); +- job_completed(job, data->ret); +- g_free(data); ++ job->ret = ret; + } + + static int coroutine_fn stream_run(Job *job, Error **errp) + { + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); +- StreamCompleteData *data; + BlockBackend *blk = s->common.blk; + BlockDriverState *bs = blk_bs(blk); + BlockDriverState *base = s->base; +@@ -203,9 +197,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp) + + out: + /* Modify backing chain and close BDSes in main loop */ +- data = g_malloc(sizeof(*data)); +- data->ret = ret; +- job_defer_to_main_loop(&s->common.job, stream_complete, data); + return ret; + } + +@@ -215,6 +206,7 @@ static const BlockJobDriver stream_job_driver = { + .job_type = JOB_TYPE_STREAM, + .free = block_job_free, + .run = stream_run, ++ .exit = stream_exit, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + }, +diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c +index 00604df..9bcb3c7 100644 +--- a/tests/test-bdrv-drain.c ++++ b/tests/test-bdrv-drain.c +@@ -752,11 +752,6 @@ typedef struct TestBlockJob { + bool should_complete; + } TestBlockJob; + +-static void test_job_completed(Job *job, void *opaque) +-{ +- job_completed(job, 0); +-} +- + static int coroutine_fn test_job_run(Job *job, Error **errp) + { + TestBlockJob *s = container_of(job, TestBlockJob, common.job); +@@ -770,7 +765,6 @@ static int coroutine_fn test_job_run(Job *job, Error **errp) + job_pause_point(&s->common.job); + } + +- job_defer_to_main_loop(&s->common.job, test_job_completed, NULL); + return 0; + } + +diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c +index 82cedee..ef29f35 100644 +--- a/tests/test-blockjob-txn.c ++++ b/tests/test-blockjob-txn.c +@@ -24,17 +24,11 @@ typedef struct { + int *result; + } TestBlockJob; + +-static void test_block_job_complete(Job *job, void *opaque) ++static void test_block_job_exit(Job *job) + { + BlockJob *bjob = container_of(job, BlockJob, job); + BlockDriverState *bs = blk_bs(bjob->blk); +- int rc = (intptr_t)opaque; + +- if (job_is_cancelled(job)) { +- rc = -ECANCELED; +- } +- +- job_completed(job, rc); + bdrv_unref(bs); + } + +@@ -54,8 +48,6 @@ static int coroutine_fn test_block_job_run(Job *job, Error **errp) + } + } + +- job_defer_to_main_loop(job, test_block_job_complete, +- (void *)(intptr_t)s->rc); + return s->rc; + } + +@@ -81,6 +73,7 @@ static const BlockJobDriver test_block_job_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = test_block_job_run, ++ .exit = test_block_job_exit, + }, + }; + +diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c +index 408a226..ad4a65b 100644 +--- a/tests/test-blockjob.c ++++ b/tests/test-blockjob.c +@@ -163,11 +163,10 @@ typedef struct CancelJob { + bool completed; + } CancelJob; + +-static void cancel_job_completed(Job *job, void *opaque) ++static void cancel_job_exit(Job *job) + { +- CancelJob *s = opaque; ++ CancelJob *s = container_of(job, CancelJob, common.job); + s->completed = true; +- job_completed(job, 0); + } + + static void cancel_job_complete(Job *job, Error **errp) +@@ -182,7 +181,7 @@ static int coroutine_fn cancel_job_run(Job *job, Error **errp) + + while (!s->should_complete) { + if (job_is_cancelled(&s->common.job)) { +- goto defer; ++ return 0; + } + + if (!job_is_ready(&s->common.job) && s->should_converge) { +@@ -192,8 +191,6 @@ static int coroutine_fn cancel_job_run(Job *job, Error **errp) + job_sleep_ns(&s->common.job, 100000); + } + +- defer: +- job_defer_to_main_loop(&s->common.job, cancel_job_completed, s); + return 0; + } + +@@ -204,6 +201,7 @@ static const BlockJobDriver test_cancel_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = cancel_job_run, ++ .exit = cancel_job_exit, + .complete = cancel_job_complete, + }, + }; +-- +1.8.3.1 + diff --git a/0027-block-backup-make-function-variables-consistently-na.patch b/0027-block-backup-make-function-variables-consistently-na.patch new file mode 100644 index 0000000..2923dac --- /dev/null +++ b/0027-block-backup-make-function-variables-consistently-na.patch @@ -0,0 +1,165 @@ +From 3e86b802541a7230eda88a6bd7f17b411deab9fa Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:13 +0100 +Subject: block/backup: make function variables consistently named + +RH-Author: John Snow +Message-id: <20180925223431.24791-8-jsnow@redhat.com> +Patchwork-id: 82272 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 07/25] block/backup: make function variables consistently named +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Rename opaque_job to job to be consistent with other job implementations. +Rename 'job', the BackupBlockJob object, to 's' to also be consistent. + +Suggested-by: Eric Blake +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180830015734.19765-8-jsnow@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 6870277535493fea31761d8d11ec23add2de0fb0) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/backup.c | 62 +++++++++++++++++++++++++++++----------------------------- + 1 file changed, 31 insertions(+), 31 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index a67b7fa..4d084f6 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -468,59 +468,59 @@ static void backup_incremental_init_copy_bitmap(BackupBlockJob *job) + bdrv_dirty_iter_free(dbi); + } + +-static int coroutine_fn backup_run(Job *opaque_job, Error **errp) ++static int coroutine_fn backup_run(Job *job, Error **errp) + { +- BackupBlockJob *job = container_of(opaque_job, BackupBlockJob, common.job); +- BlockDriverState *bs = blk_bs(job->common.blk); ++ BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); ++ BlockDriverState *bs = blk_bs(s->common.blk); + int64_t offset, nb_clusters; + int ret = 0; + +- QLIST_INIT(&job->inflight_reqs); +- qemu_co_rwlock_init(&job->flush_rwlock); ++ QLIST_INIT(&s->inflight_reqs); ++ qemu_co_rwlock_init(&s->flush_rwlock); + +- nb_clusters = DIV_ROUND_UP(job->len, job->cluster_size); +- job_progress_set_remaining(&job->common.job, job->len); ++ nb_clusters = DIV_ROUND_UP(s->len, s->cluster_size); ++ job_progress_set_remaining(job, s->len); + +- job->copy_bitmap = hbitmap_alloc(nb_clusters, 0); +- if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { +- backup_incremental_init_copy_bitmap(job); ++ s->copy_bitmap = hbitmap_alloc(nb_clusters, 0); ++ if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { ++ backup_incremental_init_copy_bitmap(s); + } else { +- hbitmap_set(job->copy_bitmap, 0, nb_clusters); ++ hbitmap_set(s->copy_bitmap, 0, nb_clusters); + } + + +- job->before_write.notify = backup_before_write_notify; +- bdrv_add_before_write_notifier(bs, &job->before_write); ++ s->before_write.notify = backup_before_write_notify; ++ bdrv_add_before_write_notifier(bs, &s->before_write); + +- if (job->sync_mode == MIRROR_SYNC_MODE_NONE) { ++ if (s->sync_mode == MIRROR_SYNC_MODE_NONE) { + /* All bits are set in copy_bitmap to allow any cluster to be copied. + * This does not actually require them to be copied. */ +- while (!job_is_cancelled(&job->common.job)) { ++ while (!job_is_cancelled(job)) { + /* Yield until the job is cancelled. We just let our before_write + * notify callback service CoW requests. */ +- job_yield(&job->common.job); ++ job_yield(job); + } +- } else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { +- ret = backup_run_incremental(job); ++ } else if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { ++ ret = backup_run_incremental(s); + } else { + /* Both FULL and TOP SYNC_MODE's require copying.. */ +- for (offset = 0; offset < job->len; +- offset += job->cluster_size) { ++ for (offset = 0; offset < s->len; ++ offset += s->cluster_size) { + bool error_is_read; + int alloced = 0; + +- if (yield_and_check(job)) { ++ if (yield_and_check(s)) { + break; + } + +- if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { ++ if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { + int i; + int64_t n; + + /* Check to see if these blocks are already in the + * backing file. */ + +- for (i = 0; i < job->cluster_size;) { ++ for (i = 0; i < s->cluster_size;) { + /* bdrv_is_allocated() only returns true/false based + * on the first set of sectors it comes across that + * are are all in the same state. +@@ -529,7 +529,7 @@ static int coroutine_fn backup_run(Job *opaque_job, Error **errp) + * needed but at some point that is always the case. */ + alloced = + bdrv_is_allocated(bs, offset + i, +- job->cluster_size - i, &n); ++ s->cluster_size - i, &n); + i += n; + + if (alloced || n == 0) { +@@ -547,29 +547,29 @@ static int coroutine_fn backup_run(Job *opaque_job, Error **errp) + if (alloced < 0) { + ret = alloced; + } else { +- ret = backup_do_cow(job, offset, job->cluster_size, ++ ret = backup_do_cow(s, offset, s->cluster_size, + &error_is_read, false); + } + if (ret < 0) { + /* Depending on error action, fail now or retry cluster */ + BlockErrorAction action = +- backup_error_action(job, error_is_read, -ret); ++ backup_error_action(s, error_is_read, -ret); + if (action == BLOCK_ERROR_ACTION_REPORT) { + break; + } else { +- offset -= job->cluster_size; ++ offset -= s->cluster_size; + continue; + } + } + } + } + +- notifier_with_return_remove(&job->before_write); ++ notifier_with_return_remove(&s->before_write); + + /* wait until pending backup_do_cow() calls have completed */ +- qemu_co_rwlock_wrlock(&job->flush_rwlock); +- qemu_co_rwlock_unlock(&job->flush_rwlock); +- hbitmap_free(job->copy_bitmap); ++ qemu_co_rwlock_wrlock(&s->flush_rwlock); ++ qemu_co_rwlock_unlock(&s->flush_rwlock); ++ hbitmap_free(s->copy_bitmap); + + return ret; + } +-- +1.8.3.1 + diff --git a/0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch b/0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch new file mode 100644 index 0000000..070c907 --- /dev/null +++ b/0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch @@ -0,0 +1,153 @@ +From 3141614c15fbcf6aee7af19069380aa6d186656b Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:14 +0100 +Subject: jobs: remove ret argument to job_completed; privatize it + +RH-Author: John Snow +Message-id: <20180925223431.24791-9-jsnow@redhat.com> +Patchwork-id: 82271 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 08/25] jobs: remove ret argument to job_completed; privatize it +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Jobs are now expected to return their retcode on the stack, from the +.run callback, so we can remove that argument. + +job_cancel does not need to set -ECANCELED because job_completed will +update the return code itself if the job was canceled. + +While we're here, make job_completed static to job.c and remove it from +job.h; move the documentation of return code to the .run() callback and +to the job->ret property, accordingly. + +Signed-off-by: John Snow +Message-id: 20180830015734.19765-9-jsnow@redhat.com +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 404ff28d6ae59fc1c24d631710d4063fc68aed03) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + include/qemu/job.h | 28 +++++++++++++++------------- + job.c | 11 ++++++----- + trace-events | 2 +- + 3 files changed, 22 insertions(+), 19 deletions(-) + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index 1144d67..23395c1 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -124,7 +124,11 @@ typedef struct Job { + /** Estimated progress_current value at the completion of the job */ + int64_t progress_total; + +- /** ret code passed to job_completed. */ ++ /** ++ * Return code from @run and/or @prepare callback(s). ++ * Not final until the job has reached the CONCLUDED status. ++ * 0 on success, -errno on failure. ++ */ + int ret; + + /** +@@ -172,7 +176,16 @@ struct JobDriver { + /** Enum describing the operation */ + JobType job_type; + +- /** Mandatory: Entrypoint for the Coroutine. */ ++ /** ++ * Mandatory: Entrypoint for the Coroutine. ++ * ++ * This callback will be invoked when moving from CREATED to RUNNING. ++ * ++ * If this callback returns nonzero, the job transaction it is part of is ++ * aborted. If it returns zero, the job moves into the WAITING state. If it ++ * is the last job to complete in its transaction, all jobs in the ++ * transaction move from WAITING to PENDING. ++ */ + int coroutine_fn (*run)(Job *job, Error **errp); + + /** +@@ -496,17 +509,6 @@ void job_early_fail(Job *job); + /** Moves the @job from RUNNING to READY */ + void job_transition_to_ready(Job *job); + +-/** +- * @job: The job being completed. +- * @ret: The status code. +- * +- * Marks @job as completed. If @ret is non-zero, the job transaction it is part +- * of is aborted. If @ret is zero, the job moves into the WAITING state. If it +- * is the last job to complete in its transaction, all jobs in the transaction +- * move from WAITING to PENDING. +- */ +-void job_completed(Job *job, int ret); +- + /** Asynchronously complete the specified @job. */ + void job_complete(Job *job, Error **errp); + +diff --git a/job.c b/job.c +index abe91af..61e091a 100644 +--- a/job.c ++++ b/job.c +@@ -535,6 +535,8 @@ void job_drain(Job *job) + } + } + ++static void job_completed(Job *job); ++ + static void job_exit(void *opaque) + { + Job *job = (Job *)opaque; +@@ -545,7 +547,7 @@ static void job_exit(void *opaque) + job->driver->exit(job); + aio_context_release(aio_context); + } +- job_completed(job, job->ret); ++ job_completed(job); + } + + /** +@@ -883,13 +885,12 @@ static void job_completed_txn_success(Job *job) + } + } + +-void job_completed(Job *job, int ret) ++static void job_completed(Job *job) + { + assert(job && job->txn && !job_is_completed(job)); + +- job->ret = ret; + job_update_rc(job); +- trace_job_completed(job, ret, job->ret); ++ trace_job_completed(job, job->ret); + if (job->ret) { + job_completed_txn_abort(job); + } else { +@@ -905,7 +906,7 @@ void job_cancel(Job *job, bool force) + } + job_cancel_async(job, force); + if (!job_started(job)) { +- job_completed(job, -ECANCELED); ++ job_completed(job); + } else if (job->deferred_to_main_loop) { + job_completed_txn_abort(job); + } else { +diff --git a/trace-events b/trace-events +index c445f54..4fd2cb4 100644 +--- a/trace-events ++++ b/trace-events +@@ -107,7 +107,7 @@ gdbstub_err_checksum_incorrect(uint8_t expected, uint8_t got) "got command packe + # job.c + job_state_transition(void *job, int ret, const char *legal, const char *s0, const char *s1) "job %p (ret: %d) attempting %s transition (%s-->%s)" + job_apply_verb(void *job, const char *state, const char *verb, const char *legal) "job %p in state %s; applying verb %s (%s)" +-job_completed(void *job, int ret, int jret) "job %p ret %d corrected ret %d" ++job_completed(void *job, int ret) "job %p ret %d" + + # job-qmp.c + qmp_job_cancel(void *job) "job %p" +-- +1.8.3.1 + diff --git a/0029-jobs-remove-job_defer_to_main_loop.patch b/0029-jobs-remove-job_defer_to_main_loop.patch new file mode 100644 index 0000000..3c302ce --- /dev/null +++ b/0029-jobs-remove-job_defer_to_main_loop.patch @@ -0,0 +1,119 @@ +From 73694b41a7e96fb364bdfd6fbad89c69dc2d1f73 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:15 +0100 +Subject: jobs: remove job_defer_to_main_loop + +RH-Author: John Snow +Message-id: <20180925223431.24791-10-jsnow@redhat.com> +Patchwork-id: 82275 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 09/25] jobs: remove job_defer_to_main_loop +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Now that the job infrastructure is handling the job_completed call for +all implemented jobs, we can remove the interface that allowed jobs to +schedule their own completion. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180830015734.19765-10-jsnow@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit e21a1c9831fc80ae3f3c1affdfa43350035d8588) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + include/qemu/job.h | 17 ----------------- + job.c | 40 ++-------------------------------------- + 2 files changed, 2 insertions(+), 55 deletions(-) + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index 23395c1..e0cff70 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -568,23 +568,6 @@ void job_finalize(Job *job, Error **errp); + */ + void job_dismiss(Job **job, Error **errp); + +-typedef void JobDeferToMainLoopFn(Job *job, void *opaque); +- +-/** +- * @job: The job +- * @fn: The function to run in the main loop +- * @opaque: The opaque value that is passed to @fn +- * +- * This function must be called by the main job coroutine just before it +- * returns. @fn is executed in the main loop with the job AioContext acquired. +- * +- * Block jobs must call bdrv_unref(), bdrv_close(), and anything that uses +- * bdrv_drain_all() in the main loop. +- * +- * The @job AioContext is held while @fn executes. +- */ +-void job_defer_to_main_loop(Job *job, JobDeferToMainLoopFn *fn, void *opaque); +- + /** + * Synchronously finishes the given @job. If @finish is given, it is called to + * trigger completion or cancellation of the job. +diff --git a/job.c b/job.c +index 61e091a..e8d7aee 100644 +--- a/job.c ++++ b/job.c +@@ -561,12 +561,8 @@ static void coroutine_fn job_co_entry(void *opaque) + assert(job && job->driver && job->driver->run); + job_pause_point(job); + job->ret = job->driver->run(job, &job->err); +- if (!job->deferred_to_main_loop) { +- job->deferred_to_main_loop = true; +- aio_bh_schedule_oneshot(qemu_get_aio_context(), +- job_exit, +- job); +- } ++ job->deferred_to_main_loop = true; ++ aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); + } + + +@@ -969,38 +965,6 @@ void job_complete(Job *job, Error **errp) + job->driver->complete(job, errp); + } + +- +-typedef struct { +- Job *job; +- JobDeferToMainLoopFn *fn; +- void *opaque; +-} JobDeferToMainLoopData; +- +-static void job_defer_to_main_loop_bh(void *opaque) +-{ +- JobDeferToMainLoopData *data = opaque; +- Job *job = data->job; +- AioContext *aio_context = job->aio_context; +- +- aio_context_acquire(aio_context); +- data->fn(data->job, data->opaque); +- aio_context_release(aio_context); +- +- g_free(data); +-} +- +-void job_defer_to_main_loop(Job *job, JobDeferToMainLoopFn *fn, void *opaque) +-{ +- JobDeferToMainLoopData *data = g_malloc(sizeof(*data)); +- data->job = job; +- data->fn = fn; +- data->opaque = opaque; +- job->deferred_to_main_loop = true; +- +- aio_bh_schedule_oneshot(qemu_get_aio_context(), +- job_defer_to_main_loop_bh, data); +-} +- + int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) + { + Error *local_err = NULL; +-- +1.8.3.1 + diff --git a/0030-block-commit-add-block-job-creation-flags.patch b/0030-block-commit-add-block-job-creation-flags.patch new file mode 100644 index 0000000..315a78f --- /dev/null +++ b/0030-block-commit-add-block-job-creation-flags.patch @@ -0,0 +1,110 @@ +From 8141d5f8ab70551c59fae63373a9562c99c8e00d Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:16 +0100 +Subject: block/commit: add block job creation flags + +RH-Author: John Snow +Message-id: <20180925223431.24791-11-jsnow@redhat.com> +Patchwork-id: 82264 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 10/25] block/commit: add block job creation flags +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Add support for taking and passing forward job creation flags. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Reviewed-by: Jeff Cody +Message-id: 20180906130225.5118-2-jsnow@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 5360782d0827854383097d560715d8d8027ee590) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/commit.c | 5 +++-- + blockdev.c | 7 ++++--- + include/block/block_int.h | 5 ++++- + 3 files changed, 11 insertions(+), 6 deletions(-) + +diff --git a/block/commit.c b/block/commit.c +index 25b3cb8..c737664 100644 +--- a/block/commit.c ++++ b/block/commit.c +@@ -254,7 +254,8 @@ static BlockDriver bdrv_commit_top = { + }; + + void commit_start(const char *job_id, BlockDriverState *bs, +- BlockDriverState *base, BlockDriverState *top, int64_t speed, ++ BlockDriverState *base, BlockDriverState *top, ++ int creation_flags, int64_t speed, + BlockdevOnError on_error, const char *backing_file_str, + const char *filter_node_name, Error **errp) + { +@@ -272,7 +273,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, + } + + s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL, +- speed, JOB_DEFAULT, NULL, NULL, errp); ++ speed, creation_flags, NULL, NULL, errp); + if (!s) { + return; + } +diff --git a/blockdev.c b/blockdev.c +index dcf8c8d..88ad8d9 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3324,6 +3324,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, + * BlockdevOnError change for blkmirror makes it in + */ + BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT; ++ int job_flags = JOB_DEFAULT; + + if (!has_speed) { + speed = 0; +@@ -3405,15 +3406,15 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, + goto out; + } + commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, +- JOB_DEFAULT, speed, on_error, ++ job_flags, speed, on_error, + filter_node_name, NULL, NULL, false, &local_err); + } else { + BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); + if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { + goto out; + } +- commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed, +- on_error, has_backing_file ? backing_file : NULL, ++ commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, job_flags, ++ speed, on_error, has_backing_file ? backing_file : NULL, + filter_node_name, &local_err); + } + if (local_err != NULL) { +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 903b9c1..ffab0b4 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -980,6 +980,8 @@ void stream_start(const char *job_id, BlockDriverState *bs, + * @bs: Active block device. + * @top: Top block device to be committed. + * @base: Block device that will be written into, and become the new top. ++ * @creation_flags: Flags that control the behavior of the Job lifetime. ++ * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @backing_file_str: String to use as the backing file in @top's overlay +@@ -990,7 +992,8 @@ void stream_start(const char *job_id, BlockDriverState *bs, + * + */ + void commit_start(const char *job_id, BlockDriverState *bs, +- BlockDriverState *base, BlockDriverState *top, int64_t speed, ++ BlockDriverState *base, BlockDriverState *top, ++ int creation_flags, int64_t speed, + BlockdevOnError on_error, const char *backing_file_str, + const char *filter_node_name, Error **errp); + /** +-- +1.8.3.1 + diff --git a/0031-block-mirror-add-block-job-creation-flags.patch b/0031-block-mirror-add-block-job-creation-flags.patch new file mode 100644 index 0000000..088c370 --- /dev/null +++ b/0031-block-mirror-add-block-job-creation-flags.patch @@ -0,0 +1,100 @@ +From 8ac0fb4e4202e6321d57f1be01f4ca6e51a98687 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:17 +0100 +Subject: block/mirror: add block job creation flags + +RH-Author: John Snow +Message-id: <20180925223431.24791-12-jsnow@redhat.com> +Patchwork-id: 82268 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 11/25] block/mirror: add block job creation flags +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Add support for taking and passing forward job creation flags. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Reviewed-by: Jeff Cody +Message-id: 20180906130225.5118-3-jsnow@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit a1999b33488daba68a1bcd7c6fdf314ddeacc6a2) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 5 +++-- + blockdev.c | 3 ++- + include/block/block_int.h | 5 ++++- + 3 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 4a9558d..cd13835 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -1639,7 +1639,8 @@ fail: + + void mirror_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, const char *replaces, +- int64_t speed, uint32_t granularity, int64_t buf_size, ++ int creation_flags, int64_t speed, ++ uint32_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, +@@ -1655,7 +1656,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, + } + is_none_mode = mode == MIRROR_SYNC_MODE_NONE; + base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; +- mirror_start_job(job_id, bs, JOB_DEFAULT, target, replaces, ++ mirror_start_job(job_id, bs, creation_flags, target, replaces, + speed, granularity, buf_size, backing_mode, + on_source_error, on_target_error, unmap, NULL, NULL, + &mirror_job_driver, is_none_mode, base, false, +diff --git a/blockdev.c b/blockdev.c +index 88ad8d9..d31750b 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3700,6 +3700,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + bool has_copy_mode, MirrorCopyMode copy_mode, + Error **errp) + { ++ int job_flags = JOB_DEFAULT; + + if (!has_speed) { + speed = 0; +@@ -3752,7 +3753,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + * and will allow to check whether the node still exist at mirror completion + */ + mirror_start(job_id, bs, target, +- has_replaces ? replaces : NULL, ++ has_replaces ? replaces : NULL, job_flags, + speed, granularity, buf_size, sync, backing_mode, + on_source_error, on_target_error, unmap, filter_node_name, + copy_mode, errp); +diff --git a/include/block/block_int.h b/include/block/block_int.h +index ffab0b4..b40f0bf 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1029,6 +1029,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, + * @target: Block device to write to. + * @replaces: Block graph node name to replace once the mirror is done. Can + * only be used when full mirroring is selected. ++ * @creation_flags: Flags that control the behavior of the Job lifetime. ++ * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @granularity: The chosen granularity for the dirty bitmap. + * @buf_size: The amount of data that can be in flight at one time. +@@ -1050,7 +1052,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, + */ + void mirror_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, const char *replaces, +- int64_t speed, uint32_t granularity, int64_t buf_size, ++ int creation_flags, int64_t speed, ++ uint32_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, +-- +1.8.3.1 + diff --git a/0032-block-stream-add-block-job-creation-flags.patch b/0032-block-stream-add-block-job-creation-flags.patch new file mode 100644 index 0000000..1dda670 --- /dev/null +++ b/0032-block-stream-add-block-job-creation-flags.patch @@ -0,0 +1,100 @@ +From 64569465b360642820193586116aa51ed0b356bd Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:18 +0100 +Subject: block/stream: add block job creation flags + +RH-Author: John Snow +Message-id: <20180925223431.24791-13-jsnow@redhat.com> +Patchwork-id: 82263 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 12/25] block/stream: add block job creation flags +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Add support for taking and passing forward job creation flags. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Reviewed-by: Jeff Cody +Message-id: 20180906130225.5118-4-jsnow@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit cf6320df581e6cbde6a95075266859a8f9ba9d55) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/stream.c | 5 +++-- + blockdev.c | 3 ++- + include/block/block_int.h | 5 ++++- + 3 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/block/stream.c b/block/stream.c +index 67e1e72..700eb23 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -214,7 +214,8 @@ static const BlockJobDriver stream_job_driver = { + + void stream_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, const char *backing_file_str, +- int64_t speed, BlockdevOnError on_error, Error **errp) ++ int creation_flags, int64_t speed, ++ BlockdevOnError on_error, Error **errp) + { + StreamBlockJob *s; + BlockDriverState *iter; +@@ -236,7 +237,7 @@ void stream_start(const char *job_id, BlockDriverState *bs, + BLK_PERM_GRAPH_MOD, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE, +- speed, JOB_DEFAULT, NULL, NULL, errp); ++ speed, creation_flags, NULL, NULL, errp); + if (!s) { + goto fail; + } +diff --git a/blockdev.c b/blockdev.c +index d31750b..c2e6402 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3233,6 +3233,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, + AioContext *aio_context; + Error *local_err = NULL; + const char *base_name = NULL; ++ int job_flags = JOB_DEFAULT; + + if (!has_on_error) { + on_error = BLOCKDEV_ON_ERROR_REPORT; +@@ -3295,7 +3296,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, + base_name = has_backing_file ? backing_file : base_name; + + stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name, +- has_speed ? speed : 0, on_error, &local_err); ++ job_flags, has_speed ? speed : 0, on_error, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; +diff --git a/include/block/block_int.h b/include/block/block_int.h +index b40f0bf..4000d2a 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -958,6 +958,8 @@ int is_windows_drive(const char *filename); + * flatten the whole backing file chain onto @bs. + * @backing_file_str: The file name that will be written to @bs as the + * the new backing file if the job completes. Ignored if @base is %NULL. ++ * @creation_flags: Flags that control the behavior of the Job lifetime. ++ * See @BlockJobCreateFlags + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @errp: Error object. +@@ -971,7 +973,8 @@ int is_windows_drive(const char *filename); + */ + void stream_start(const char *job_id, BlockDriverState *bs, + BlockDriverState *base, const char *backing_file_str, +- int64_t speed, BlockdevOnError on_error, Error **errp); ++ int creation_flags, int64_t speed, ++ BlockdevOnError on_error, Error **errp); + + /** + * commit_start: +-- +1.8.3.1 + diff --git a/0033-block-commit-refactor-commit-to-use-job-callbacks.patch b/0033-block-commit-refactor-commit-to-use-job-callbacks.patch new file mode 100644 index 0000000..2a5f69b --- /dev/null +++ b/0033-block-commit-refactor-commit-to-use-job-callbacks.patch @@ -0,0 +1,180 @@ +From b0ac95edde586e808a1118c4b04c1608de8b5b6c Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:19 +0100 +Subject: block/commit: refactor commit to use job callbacks + +RH-Author: John Snow +Message-id: <20180925223431.24791-14-jsnow@redhat.com> +Patchwork-id: 82279 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 13/25] block/commit: refactor commit to use job callbacks +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Use the component callbacks; prepare, abort, and clean. + +NB: prepare is only called when the job has not yet failed; +and abort can be called after prepare. + +complete -> prepare -> abort -> clean +complete -> abort -> clean + +During refactor, a potential problem with bdrv_drop_intermediate +was identified, the patched behavior is no worse than the pre-patch +behavior, so leave a FIXME for now to be fixed in a future patch. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-5-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit 22dffcbec62ba918db690ed44beba4bd4e970bb9) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/commit.c | 92 ++++++++++++++++++++++++++++++++-------------------------- + 1 file changed, 51 insertions(+), 41 deletions(-) + +diff --git a/block/commit.c b/block/commit.c +index c737664..b387765 100644 +--- a/block/commit.c ++++ b/block/commit.c +@@ -36,6 +36,7 @@ typedef struct CommitBlockJob { + BlockDriverState *commit_top_bs; + BlockBackend *top; + BlockBackend *base; ++ BlockDriverState *base_bs; + BlockdevOnError on_error; + int base_flags; + char *backing_file_str; +@@ -68,61 +69,67 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, + return 0; + } + +-static void commit_exit(Job *job) ++static int commit_prepare(Job *job) + { + CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); +- BlockJob *bjob = &s->common; +- BlockDriverState *top = blk_bs(s->top); +- BlockDriverState *base = blk_bs(s->base); +- BlockDriverState *commit_top_bs = s->commit_top_bs; +- bool remove_commit_top_bs = false; +- +- /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ +- bdrv_ref(top); +- bdrv_ref(commit_top_bs); + + /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before + * the normal backing chain can be restored. */ + blk_unref(s->base); ++ s->base = NULL; ++ ++ /* FIXME: bdrv_drop_intermediate treats total failures and partial failures ++ * identically. Further work is needed to disambiguate these cases. */ ++ return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs, ++ s->backing_file_str); ++} + +- if (!job_is_cancelled(job) && job->ret == 0) { +- /* success */ +- job->ret = bdrv_drop_intermediate(s->commit_top_bs, base, +- s->backing_file_str); +- } else { +- /* XXX Can (or should) we somehow keep 'consistent read' blocked even +- * after the failed/cancelled commit job is gone? If we already wrote +- * something to base, the intermediate images aren't valid any more. */ +- remove_commit_top_bs = true; ++static void commit_abort(Job *job) ++{ ++ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); ++ BlockDriverState *top_bs = blk_bs(s->top); ++ ++ /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ ++ bdrv_ref(top_bs); ++ bdrv_ref(s->commit_top_bs); ++ ++ if (s->base) { ++ blk_unref(s->base); + } + ++ /* free the blockers on the intermediate nodes so that bdrv_replace_nodes ++ * can succeed */ ++ block_job_remove_all_bdrv(&s->common); ++ ++ /* If bdrv_drop_intermediate() failed (or was not invoked), remove the ++ * commit filter driver from the backing chain now. Do this as the final ++ * step so that the 'consistent read' permission can be granted. ++ * ++ * XXX Can (or should) we somehow keep 'consistent read' blocked even ++ * after the failed/cancelled commit job is gone? If we already wrote ++ * something to base, the intermediate images aren't valid any more. */ ++ bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL, ++ &error_abort); ++ bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs), ++ &error_abort); ++ ++ bdrv_unref(s->commit_top_bs); ++ bdrv_unref(top_bs); ++} ++ ++static void commit_clean(Job *job) ++{ ++ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); ++ + /* restore base open flags here if appropriate (e.g., change the base back + * to r/o). These reopens do not need to be atomic, since we won't abort + * even on failure here */ +- if (s->base_flags != bdrv_get_flags(base)) { +- bdrv_reopen(base, s->base_flags, NULL); ++ if (s->base_flags != bdrv_get_flags(s->base_bs)) { ++ bdrv_reopen(s->base_bs, s->base_flags, NULL); + } ++ + g_free(s->backing_file_str); + blk_unref(s->top); +- +- /* If there is more than one reference to the job (e.g. if called from +- * job_finish_sync()), job_completed() won't free it and therefore the +- * blockers on the intermediate nodes remain. This would cause +- * bdrv_set_backing_hd() to fail. */ +- block_job_remove_all_bdrv(bjob); +- +- /* If bdrv_drop_intermediate() didn't already do that, remove the commit +- * filter driver from the backing chain. Do this as the final step so that +- * the 'consistent read' permission can be granted. */ +- if (remove_commit_top_bs) { +- bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL, +- &error_abort); +- bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs), +- &error_abort); +- } +- +- bdrv_unref(commit_top_bs); +- bdrv_unref(top); + } + + static int coroutine_fn commit_run(Job *job, Error **errp) +@@ -211,7 +218,9 @@ static const BlockJobDriver commit_job_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = commit_run, +- .exit = commit_exit, ++ .prepare = commit_prepare, ++ .abort = commit_abort, ++ .clean = commit_clean + }, + }; + +@@ -350,6 +359,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, + if (ret < 0) { + goto fail; + } ++ s->base_bs = base; + + /* Required permissions are already taken with block_job_add_bdrv() */ + s->top = blk_new(0, BLK_PERM_ALL); +-- +1.8.3.1 + diff --git a/0034-block-mirror-don-t-install-backing-chain-on-abort.patch b/0034-block-mirror-don-t-install-backing-chain-on-abort.patch new file mode 100644 index 0000000..241ae5f --- /dev/null +++ b/0034-block-mirror-don-t-install-backing-chain-on-abort.patch @@ -0,0 +1,45 @@ +From 7f155f96e9db0be97501f90e482a29d51779f887 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:20 +0100 +Subject: block/mirror: don't install backing chain on abort + +RH-Author: John Snow +Message-id: <20180925223431.24791-15-jsnow@redhat.com> +Patchwork-id: 82277 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 14/25] block/mirror: don't install backing chain on abort +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +In cases where we abort the block/mirror job, there's no point in +installing the new backing chain before we finish aborting. + +Signed-off-by: John Snow +Message-id: 20180906130225.5118-6-jsnow@redhat.com +Reviewed-by: Jeff Cody +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit c2924ceaa7f1866148e2847c969fc1902a2524fa) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/mirror.c b/block/mirror.c +index cd13835..19b57b8 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -642,7 +642,7 @@ static void mirror_exit(Job *job) + * required before it could become a backing file of target_bs. */ + bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); +- if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { ++ if (ret == 0 && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + BlockDriverState *backing = s->is_none_mode ? src : s->base; + if (backing_bs(target_bs) != backing) { + bdrv_set_backing_hd(target_bs, backing, &local_err); +-- +1.8.3.1 + diff --git a/0035-block-mirror-conservative-mirror_exit-refactor.patch b/0035-block-mirror-conservative-mirror_exit-refactor.patch new file mode 100644 index 0000000..1c34fec --- /dev/null +++ b/0035-block-mirror-conservative-mirror_exit-refactor.patch @@ -0,0 +1,136 @@ +From 8b394ff523e607060c80c6b647dbb89a2f73571d Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Thu, 6 Sep 2018 09:02:15 -0400 +Subject: block/mirror: conservative mirror_exit refactor + +RH-Author: John Snow +Message-id: <20180925223431.24791-16-jsnow@redhat.com> +Patchwork-id: 82270 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 15/25] block/mirror: conservative mirr +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +For purposes of minimum code movement, refactor the mirror_exit +callback to use the post-finalization callbacks in a trivial way. + +Signed-off-by: John Snow +Message-id: 20180906130225.5118-7-jsnow@redhat.com +Reviewed-by: Jeff Cody +Reviewed-by: Max Reitz +[mreitz: Added comment for the mirror_exit() function] +Signed-off-by: Max Reitz +(cherry picked from commit 737efc1eda23b904fbe0e66b37715fb0e5c3e58b) +Signed-off-by: John Snow +--- + block/mirror.c | 44 +++++++++++++++++++++++++++++++++----------- + 1 file changed, 33 insertions(+), 11 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 19b57b8..7efba77 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -79,6 +79,7 @@ typedef struct MirrorBlockJob { + int max_iov; + bool initial_zeroing_ongoing; + int in_active_write_counter; ++ bool prepared; + } MirrorBlockJob; + + typedef struct MirrorBDSOpaque { +@@ -607,7 +608,12 @@ static void mirror_wait_for_all_io(MirrorBlockJob *s) + } + } + +-static void mirror_exit(Job *job) ++/** ++ * mirror_exit_common: handle both abort() and prepare() cases. ++ * for .prepare, returns 0 on success and -errno on failure. ++ * for .abort cases, denoted by abort = true, MUST return 0. ++ */ ++static int mirror_exit_common(Job *job) + { + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); + BlockJob *bjob = &s->common; +@@ -617,7 +623,13 @@ static void mirror_exit(Job *job) + BlockDriverState *target_bs = blk_bs(s->target); + BlockDriverState *mirror_top_bs = s->mirror_top_bs; + Error *local_err = NULL; +- int ret = job->ret; ++ bool abort = job->ret < 0; ++ int ret = 0; ++ ++ if (s->prepared) { ++ return 0; ++ } ++ s->prepared = true; + + bdrv_release_dirty_bitmap(src, s->dirty_bitmap); + +@@ -642,7 +654,7 @@ static void mirror_exit(Job *job) + * required before it could become a backing file of target_bs. */ + bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); +- if (ret == 0 && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { ++ if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + BlockDriverState *backing = s->is_none_mode ? src : s->base; + if (backing_bs(target_bs) != backing) { + bdrv_set_backing_hd(target_bs, backing, &local_err); +@@ -658,11 +670,8 @@ static void mirror_exit(Job *job) + aio_context_acquire(replace_aio_context); + } + +- if (s->should_complete && ret == 0) { +- BlockDriverState *to_replace = src; +- if (s->to_replace) { +- to_replace = s->to_replace; +- } ++ if (s->should_complete && !abort) { ++ BlockDriverState *to_replace = s->to_replace ?: src; + + if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) { + bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL); +@@ -711,7 +720,18 @@ static void mirror_exit(Job *job) + bdrv_unref(mirror_top_bs); + bdrv_unref(src); + +- job->ret = ret; ++ return ret; ++} ++ ++static int mirror_prepare(Job *job) ++{ ++ return mirror_exit_common(job); ++} ++ ++static void mirror_abort(Job *job) ++{ ++ int ret = mirror_exit_common(job); ++ assert(ret == 0); + } + + static void mirror_throttle(MirrorBlockJob *s) +@@ -1132,7 +1152,8 @@ static const BlockJobDriver mirror_job_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = mirror_run, +- .exit = mirror_exit, ++ .prepare = mirror_prepare, ++ .abort = mirror_abort, + .pause = mirror_pause, + .complete = mirror_complete, + }, +@@ -1149,7 +1170,8 @@ static const BlockJobDriver commit_active_job_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = mirror_run, +- .exit = mirror_exit, ++ .prepare = mirror_prepare, ++ .abort = mirror_abort, + .pause = mirror_pause, + .complete = mirror_complete, + }, +-- +1.8.3.1 + diff --git a/0036-block-stream-refactor-stream-to-use-job-callbacks.patch b/0036-block-stream-refactor-stream-to-use-job-callbacks.patch new file mode 100644 index 0000000..4ff194d --- /dev/null +++ b/0036-block-stream-refactor-stream-to-use-job-callbacks.patch @@ -0,0 +1,94 @@ +From 533c77ee076c0050b4c4deb26fda54c085a994ce Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:22 +0100 +Subject: block/stream: refactor stream to use job callbacks + +RH-Author: John Snow +Message-id: <20180925223431.24791-17-jsnow@redhat.com> +Patchwork-id: 82280 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 16/25] block/stream: refactor stream to use job callbacks +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-8-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit 1b57488acf1beba157bcd8c926e596342bcb5c60) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/stream.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/block/stream.c b/block/stream.c +index 700eb23..81a7ec8 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -54,16 +54,16 @@ static int coroutine_fn stream_populate(BlockBackend *blk, + return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ); + } + +-static void stream_exit(Job *job) ++static int stream_prepare(Job *job) + { + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); + BlockJob *bjob = &s->common; + BlockDriverState *bs = blk_bs(bjob->blk); + BlockDriverState *base = s->base; + Error *local_err = NULL; +- int ret = job->ret; ++ int ret = 0; + +- if (!job_is_cancelled(job) && bs->backing && ret == 0) { ++ if (bs->backing) { + const char *base_id = NULL, *base_fmt = NULL; + if (base) { + base_id = s->backing_file_str; +@@ -75,12 +75,19 @@ static void stream_exit(Job *job) + bdrv_set_backing_hd(bs, base, &local_err); + if (local_err) { + error_report_err(local_err); +- ret = -EPERM; +- goto out; ++ return -EPERM; + } + } + +-out: ++ return ret; ++} ++ ++static void stream_clean(Job *job) ++{ ++ StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); ++ BlockJob *bjob = &s->common; ++ BlockDriverState *bs = blk_bs(bjob->blk); ++ + /* Reopen the image back in read-only mode if necessary */ + if (s->bs_flags != bdrv_get_flags(bs)) { + /* Give up write permissions before making it read-only */ +@@ -89,7 +96,6 @@ out: + } + + g_free(s->backing_file_str); +- job->ret = ret; + } + + static int coroutine_fn stream_run(Job *job, Error **errp) +@@ -206,7 +212,8 @@ static const BlockJobDriver stream_job_driver = { + .job_type = JOB_TYPE_STREAM, + .free = block_job_free, + .run = stream_run, +- .exit = stream_exit, ++ .prepare = stream_prepare, ++ .clean = stream_clean, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + }, +-- +1.8.3.1 + diff --git a/0037-tests-blockjob-replace-Blockjob-with-Job.patch b/0037-tests-blockjob-replace-Blockjob-with-Job.patch new file mode 100644 index 0000000..f408d83 --- /dev/null +++ b/0037-tests-blockjob-replace-Blockjob-with-Job.patch @@ -0,0 +1,233 @@ +From ac945e63cca25c453d472834c64aa3a4192729f9 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:23 +0100 +Subject: tests/blockjob: replace Blockjob with Job + +RH-Author: John Snow +Message-id: <20180925223431.24791-18-jsnow@redhat.com> +Patchwork-id: 82281 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 17/25] tests/blockjob: replace Blockjob with Job +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +These tests don't actually test blockjobs anymore, they test +generic Job lifetimes. Change the types accordingly. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-9-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit 0cc4643b01a0138543e886db8e3bf8a3f74ff8f9) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + tests/test-blockjob.c | 98 ++++++++++++++++++++++++++------------------------- + 1 file changed, 50 insertions(+), 48 deletions(-) + +diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c +index ad4a65b..8e8b680 100644 +--- a/tests/test-blockjob.c ++++ b/tests/test-blockjob.c +@@ -206,18 +206,20 @@ static const BlockJobDriver test_cancel_driver = { + }, + }; + +-static CancelJob *create_common(BlockJob **pjob) ++static CancelJob *create_common(Job **pjob) + { + BlockBackend *blk; +- BlockJob *job; ++ Job *job; ++ BlockJob *bjob; + CancelJob *s; + + blk = create_blk(NULL); +- job = mk_job(blk, "Steve", &test_cancel_driver, true, +- JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); +- job_ref(&job->job); +- assert(job->job.status == JOB_STATUS_CREATED); +- s = container_of(job, CancelJob, common); ++ bjob = mk_job(blk, "Steve", &test_cancel_driver, true, ++ JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); ++ job = &bjob->job; ++ job_ref(job); ++ assert(job->status == JOB_STATUS_CREATED); ++ s = container_of(bjob, CancelJob, common); + s->blk = blk; + + *pjob = job; +@@ -242,7 +244,7 @@ static void cancel_common(CancelJob *s) + + static void test_cancel_created(void) + { +- BlockJob *job; ++ Job *job; + CancelJob *s; + + s = create_common(&job); +@@ -251,119 +253,119 @@ static void test_cancel_created(void) + + static void test_cancel_running(void) + { +- BlockJob *job; ++ Job *job; + CancelJob *s; + + s = create_common(&job); + +- job_start(&job->job); +- assert(job->job.status == JOB_STATUS_RUNNING); ++ job_start(job); ++ assert(job->status == JOB_STATUS_RUNNING); + + cancel_common(s); + } + + static void test_cancel_paused(void) + { +- BlockJob *job; ++ Job *job; + CancelJob *s; + + s = create_common(&job); + +- job_start(&job->job); +- assert(job->job.status == JOB_STATUS_RUNNING); ++ job_start(job); ++ assert(job->status == JOB_STATUS_RUNNING); + +- job_user_pause(&job->job, &error_abort); +- job_enter(&job->job); +- assert(job->job.status == JOB_STATUS_PAUSED); ++ job_user_pause(job, &error_abort); ++ job_enter(job); ++ assert(job->status == JOB_STATUS_PAUSED); + + cancel_common(s); + } + + static void test_cancel_ready(void) + { +- BlockJob *job; ++ Job *job; + CancelJob *s; + + s = create_common(&job); + +- job_start(&job->job); +- assert(job->job.status == JOB_STATUS_RUNNING); ++ job_start(job); ++ assert(job->status == JOB_STATUS_RUNNING); + + s->should_converge = true; +- job_enter(&job->job); +- assert(job->job.status == JOB_STATUS_READY); ++ job_enter(job); ++ assert(job->status == JOB_STATUS_READY); + + cancel_common(s); + } + + static void test_cancel_standby(void) + { +- BlockJob *job; ++ Job *job; + CancelJob *s; + + s = create_common(&job); + +- job_start(&job->job); +- assert(job->job.status == JOB_STATUS_RUNNING); ++ job_start(job); ++ assert(job->status == JOB_STATUS_RUNNING); + + s->should_converge = true; +- job_enter(&job->job); +- assert(job->job.status == JOB_STATUS_READY); ++ job_enter(job); ++ assert(job->status == JOB_STATUS_READY); + +- job_user_pause(&job->job, &error_abort); +- job_enter(&job->job); +- assert(job->job.status == JOB_STATUS_STANDBY); ++ job_user_pause(job, &error_abort); ++ job_enter(job); ++ assert(job->status == JOB_STATUS_STANDBY); + + cancel_common(s); + } + + static void test_cancel_pending(void) + { +- BlockJob *job; ++ Job *job; + CancelJob *s; + + s = create_common(&job); + +- job_start(&job->job); +- assert(job->job.status == JOB_STATUS_RUNNING); ++ job_start(job); ++ assert(job->status == JOB_STATUS_RUNNING); + + s->should_converge = true; +- job_enter(&job->job); +- assert(job->job.status == JOB_STATUS_READY); ++ job_enter(job); ++ assert(job->status == JOB_STATUS_READY); + +- job_complete(&job->job, &error_abort); +- job_enter(&job->job); ++ job_complete(job, &error_abort); ++ job_enter(job); + while (!s->completed) { + aio_poll(qemu_get_aio_context(), true); + } +- assert(job->job.status == JOB_STATUS_PENDING); ++ assert(job->status == JOB_STATUS_PENDING); + + cancel_common(s); + } + + static void test_cancel_concluded(void) + { +- BlockJob *job; ++ Job *job; + CancelJob *s; + + s = create_common(&job); + +- job_start(&job->job); +- assert(job->job.status == JOB_STATUS_RUNNING); ++ job_start(job); ++ assert(job->status == JOB_STATUS_RUNNING); + + s->should_converge = true; +- job_enter(&job->job); +- assert(job->job.status == JOB_STATUS_READY); ++ job_enter(job); ++ assert(job->status == JOB_STATUS_READY); + +- job_complete(&job->job, &error_abort); +- job_enter(&job->job); ++ job_complete(job, &error_abort); ++ job_enter(job); + while (!s->completed) { + aio_poll(qemu_get_aio_context(), true); + } +- assert(job->job.status == JOB_STATUS_PENDING); ++ assert(job->status == JOB_STATUS_PENDING); + +- job_finalize(&job->job, &error_abort); +- assert(job->job.status == JOB_STATUS_CONCLUDED); ++ job_finalize(job, &error_abort); ++ assert(job->status == JOB_STATUS_CONCLUDED); + + cancel_common(s); + } +-- +1.8.3.1 + diff --git a/0038-tests-test-blockjob-remove-exit-callback.patch b/0038-tests-test-blockjob-remove-exit-callback.patch new file mode 100644 index 0000000..9bd1a7c --- /dev/null +++ b/0038-tests-test-blockjob-remove-exit-callback.patch @@ -0,0 +1,88 @@ +From 62fd56870fb6296f795c9fc7f5965d83a72dabac Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:24 +0100 +Subject: tests/test-blockjob: remove exit callback + +RH-Author: John Snow +Message-id: <20180925223431.24791-19-jsnow@redhat.com> +Patchwork-id: 82276 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 18/25] tests/test-blockjob: remove exit callback +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +We remove the exit callback and the completed boolean along with it. +We can simulate it just fine by waiting for the job to defer to the +main loop, and then giving it one final kick to get the main loop +portion to run. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-10-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit 977d26fdbeb35d8d2d0f203f9556d44a353e0dfd) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + tests/test-blockjob.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c +index 8e8b680..de4c1c2 100644 +--- a/tests/test-blockjob.c ++++ b/tests/test-blockjob.c +@@ -160,15 +160,8 @@ typedef struct CancelJob { + BlockBackend *blk; + bool should_converge; + bool should_complete; +- bool completed; + } CancelJob; + +-static void cancel_job_exit(Job *job) +-{ +- CancelJob *s = container_of(job, CancelJob, common.job); +- s->completed = true; +-} +- + static void cancel_job_complete(Job *job, Error **errp) + { + CancelJob *s = container_of(job, CancelJob, common.job); +@@ -201,7 +194,6 @@ static const BlockJobDriver test_cancel_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = cancel_job_run, +- .exit = cancel_job_exit, + .complete = cancel_job_complete, + }, + }; +@@ -335,9 +327,11 @@ static void test_cancel_pending(void) + + job_complete(job, &error_abort); + job_enter(job); +- while (!s->completed) { ++ while (!job->deferred_to_main_loop) { + aio_poll(qemu_get_aio_context(), true); + } ++ assert(job->status == JOB_STATUS_READY); ++ aio_poll(qemu_get_aio_context(), true); + assert(job->status == JOB_STATUS_PENDING); + + cancel_common(s); +@@ -359,9 +353,11 @@ static void test_cancel_concluded(void) + + job_complete(job, &error_abort); + job_enter(job); +- while (!s->completed) { ++ while (!job->deferred_to_main_loop) { + aio_poll(qemu_get_aio_context(), true); + } ++ assert(job->status == JOB_STATUS_READY); ++ aio_poll(qemu_get_aio_context(), true); + assert(job->status == JOB_STATUS_PENDING); + + job_finalize(job, &error_abort); +-- +1.8.3.1 + diff --git a/0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch b/0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch new file mode 100644 index 0000000..ef6db4f --- /dev/null +++ b/0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch @@ -0,0 +1,53 @@ +From 6247c4b10e3fb6c677947a503ddad961cb71faff Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:25 +0100 +Subject: tests/test-blockjob-txn: move .exit to .clean + +RH-Author: John Snow +Message-id: <20180925223431.24791-20-jsnow@redhat.com> +Patchwork-id: 82282 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 19/25] tests/test-blockjob-txn: move .exit to .clean +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +The exit callback in this test actually only performs cleanup. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-11-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit e4dad4275d51b594c8abbe726a4927f6f388e427) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + tests/test-blockjob-txn.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c +index ef29f35..86606f9 100644 +--- a/tests/test-blockjob-txn.c ++++ b/tests/test-blockjob-txn.c +@@ -24,7 +24,7 @@ typedef struct { + int *result; + } TestBlockJob; + +-static void test_block_job_exit(Job *job) ++static void test_block_job_clean(Job *job) + { + BlockJob *bjob = container_of(job, BlockJob, job); + BlockDriverState *bs = blk_bs(bjob->blk); +@@ -73,7 +73,7 @@ static const BlockJobDriver test_block_job_driver = { + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = test_block_job_run, +- .exit = test_block_job_exit, ++ .clean = test_block_job_clean, + }, + }; + +-- +1.8.3.1 + diff --git a/0040-jobs-remove-.exit-callback.patch b/0040-jobs-remove-.exit-callback.patch new file mode 100644 index 0000000..00704a6 --- /dev/null +++ b/0040-jobs-remove-.exit-callback.patch @@ -0,0 +1,156 @@ +From c2c10f4fac6757d292f8b3d9ac7723a718e596aa Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:26 +0100 +Subject: jobs: remove .exit callback + +RH-Author: John Snow +Message-id: <20180925223431.24791-21-jsnow@redhat.com> +Patchwork-id: 82283 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 20/25] jobs: remove .exit callback +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Now that all of the jobs use the component finalization callbacks, +there's no use for the heavy-hammer .exit callback anymore. + +job_exit becomes a glorified type shim so that we can call +job_completed from aio_bh_schedule_oneshot. + +Move these three functions down into job.c to eliminate a +forward reference. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-12-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit ccbfb3319aa265e71c16dac976ff857d0a5bcb4b) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + include/qemu/job.h | 11 -------- + job.c | 77 ++++++++++++++++++++++++------------------------------ + 2 files changed, 34 insertions(+), 54 deletions(-) + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index e0cff70..5cb0681 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -222,17 +222,6 @@ struct JobDriver { + void (*drain)(Job *job); + + /** +- * If the callback is not NULL, exit will be invoked from the main thread +- * when the job's coroutine has finished, but before transactional +- * convergence; before @prepare or @abort. +- * +- * FIXME TODO: This callback is only temporary to transition remaining jobs +- * to prepare/commit/abort/clean callbacks and will be removed before 3.1. +- * is released. +- */ +- void (*exit)(Job *job); +- +- /** + * If the callback is not NULL, prepare will be invoked when all the jobs + * belonging to the same transaction complete; or upon this job's completion + * if it is not in a transaction. +diff --git a/job.c b/job.c +index e8d7aee..87c9aa4 100644 +--- a/job.c ++++ b/job.c +@@ -535,49 +535,6 @@ void job_drain(Job *job) + } + } + +-static void job_completed(Job *job); +- +-static void job_exit(void *opaque) +-{ +- Job *job = (Job *)opaque; +- AioContext *aio_context = job->aio_context; +- +- if (job->driver->exit) { +- aio_context_acquire(aio_context); +- job->driver->exit(job); +- aio_context_release(aio_context); +- } +- job_completed(job); +-} +- +-/** +- * All jobs must allow a pause point before entering their job proper. This +- * ensures that jobs can be paused prior to being started, then resumed later. +- */ +-static void coroutine_fn job_co_entry(void *opaque) +-{ +- Job *job = opaque; +- +- assert(job && job->driver && job->driver->run); +- job_pause_point(job); +- job->ret = job->driver->run(job, &job->err); +- job->deferred_to_main_loop = true; +- aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); +-} +- +- +-void job_start(Job *job) +-{ +- assert(job && !job_started(job) && job->paused && +- job->driver && job->driver->run); +- job->co = qemu_coroutine_create(job_co_entry, job); +- job->pause_count--; +- job->busy = true; +- job->paused = false; +- job_state_transition(job, JOB_STATUS_RUNNING); +- aio_co_enter(job->aio_context, job->co); +-} +- + /* Assumes the block_job_mutex is held */ + static bool job_timer_not_pending(Job *job) + { +@@ -894,6 +851,40 @@ static void job_completed(Job *job) + } + } + ++/** Useful only as a type shim for aio_bh_schedule_oneshot. */ ++static void job_exit(void *opaque) ++{ ++ Job *job = (Job *)opaque; ++ job_completed(job); ++} ++ ++/** ++ * All jobs must allow a pause point before entering their job proper. This ++ * ensures that jobs can be paused prior to being started, then resumed later. ++ */ ++static void coroutine_fn job_co_entry(void *opaque) ++{ ++ Job *job = opaque; ++ ++ assert(job && job->driver && job->driver->run); ++ job_pause_point(job); ++ job->ret = job->driver->run(job, &job->err); ++ job->deferred_to_main_loop = true; ++ aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); ++} ++ ++void job_start(Job *job) ++{ ++ assert(job && !job_started(job) && job->paused && ++ job->driver && job->driver->run); ++ job->co = qemu_coroutine_create(job_co_entry, job); ++ job->pause_count--; ++ job->busy = true; ++ job->paused = false; ++ job_state_transition(job, JOB_STATUS_RUNNING); ++ aio_co_enter(job->aio_context, job->co); ++} ++ + void job_cancel(Job *job, bool force) + { + if (job->status == JOB_STATUS_CONCLUDED) { +-- +1.8.3.1 + diff --git a/0041-qapi-block-commit-expose-new-job-properties.patch b/0041-qapi-block-commit-expose-new-job-properties.patch new file mode 100644 index 0000000..a5ec394 --- /dev/null +++ b/0041-qapi-block-commit-expose-new-job-properties.patch @@ -0,0 +1,90 @@ +From ce81bd3fa7316bcdee5e121e6ea71c7b2e1e81e1 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:27 +0100 +Subject: qapi/block-commit: expose new job properties + +RH-Author: John Snow +Message-id: <20180925223431.24791-22-jsnow@redhat.com> +Patchwork-id: 82285 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 21/25] qapi/block-commit: expose new job properties +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-13-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit 96fbf5345f60a87fab8e7ea79a2406f381027db9) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 8 ++++++++ + qapi/block-core.json | 16 +++++++++++++++- + 2 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/blockdev.c b/blockdev.c +index c2e6402..8efc47e 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3314,6 +3314,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, + bool has_backing_file, const char *backing_file, + bool has_speed, int64_t speed, + bool has_filter_node_name, const char *filter_node_name, ++ bool has_auto_finalize, bool auto_finalize, ++ bool has_auto_dismiss, bool auto_dismiss, + Error **errp) + { + BlockDriverState *bs; +@@ -3333,6 +3335,12 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, + if (!has_filter_node_name) { + filter_node_name = NULL; + } ++ if (has_auto_finalize && !auto_finalize) { ++ job_flags |= JOB_MANUAL_FINALIZE; ++ } ++ if (has_auto_dismiss && !auto_dismiss) { ++ job_flags |= JOB_MANUAL_DISMISS; ++ } + + /* Important Note: + * libvirt relies on the DeviceNotFound error class in order to probe for +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 5b9084a..ca7d1b3 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1498,6 +1498,19 @@ + # above @top. If this option is not given, a node name is + # autogenerated. (Since: 2.9) + # ++# @auto-finalize: When false, this job will wait in a PENDING state after it has ++# finished its work, waiting for @block-job-finalize before ++# making any block graph changes. ++# When true, this job will automatically ++# perform its abort or commit actions. ++# Defaults to true. (Since 3.1) ++# ++# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it ++# has completely ceased all work, and awaits @block-job-dismiss. ++# When true, this job will automatically disappear from the query ++# list without user intervention. ++# Defaults to true. (Since 3.1) ++# + # Returns: Nothing on success + # If commit or stream is already active on this device, DeviceInUse + # If @device does not exist, DeviceNotFound +@@ -1518,7 +1531,8 @@ + { 'command': 'block-commit', + 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str', + '*backing-file': 'str', '*speed': 'int', +- '*filter-node-name': 'str' } } ++ '*filter-node-name': 'str', ++ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } + + ## + # @drive-backup: +-- +1.8.3.1 + diff --git a/0042-qapi-block-mirror-expose-new-job-properties.patch b/0042-qapi-block-mirror-expose-new-job-properties.patch new file mode 100644 index 0000000..52f77cd --- /dev/null +++ b/0042-qapi-block-mirror-expose-new-job-properties.patch @@ -0,0 +1,144 @@ +From 318445193efc33c06e63e021a988814d49658a0f Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Thu, 6 Sep 2018 09:02:22 -0400 +Subject: qapi/block-mirror: expose new job properties + +RH-Author: John Snow +Message-id: <20180925223431.24791-23-jsnow@redhat.com> +Patchwork-id: 82274 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 22/25] qapi/block-mirror: expose new j +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-14-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit a6b58adec28ff43c0f29ff7c95cdd5d11e87cf61) +Signed-off-by: John Snow +--- + blockdev.c | 14 ++++++++++++++ + qapi/block-core.json | 30 ++++++++++++++++++++++++++++-- + 2 files changed, 42 insertions(+), 2 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 8efc47e..bbb3279 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3707,6 +3707,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + bool has_filter_node_name, + const char *filter_node_name, + bool has_copy_mode, MirrorCopyMode copy_mode, ++ bool has_auto_finalize, bool auto_finalize, ++ bool has_auto_dismiss, bool auto_dismiss, + Error **errp) + { + int job_flags = JOB_DEFAULT; +@@ -3735,6 +3737,12 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + if (!has_copy_mode) { + copy_mode = MIRROR_COPY_MODE_BACKGROUND; + } ++ if (has_auto_finalize && !auto_finalize) { ++ job_flags |= JOB_MANUAL_FINALIZE; ++ } ++ if (has_auto_dismiss && !auto_dismiss) { ++ job_flags |= JOB_MANUAL_DISMISS; ++ } + + if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", +@@ -3912,6 +3920,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + arg->has_unmap, arg->unmap, + false, NULL, + arg->has_copy_mode, arg->copy_mode, ++ arg->has_auto_finalize, arg->auto_finalize, ++ arg->has_auto_dismiss, arg->auto_dismiss, + &local_err); + bdrv_unref(target_bs); + error_propagate(errp, local_err); +@@ -3933,6 +3943,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + bool has_filter_node_name, + const char *filter_node_name, + bool has_copy_mode, MirrorCopyMode copy_mode, ++ bool has_auto_finalize, bool auto_finalize, ++ bool has_auto_dismiss, bool auto_dismiss, + Error **errp) + { + BlockDriverState *bs; +@@ -3966,6 +3978,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + true, true, + has_filter_node_name, filter_node_name, + has_copy_mode, copy_mode, ++ has_auto_finalize, auto_finalize, ++ has_auto_dismiss, auto_dismiss, + &local_err); + error_propagate(errp, local_err); + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index ca7d1b3..9193d49 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1732,6 +1732,18 @@ + # @copy-mode: when to copy data to the destination; defaults to 'background' + # (Since: 3.0) + # ++# @auto-finalize: When false, this job will wait in a PENDING state after it has ++# finished its work, waiting for @block-job-finalize before ++# making any block graph changes. ++# When true, this job will automatically ++# perform its abort or commit actions. ++# Defaults to true. (Since 3.1) ++# ++# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it ++# has completely ceased all work, and awaits @block-job-dismiss. ++# When true, this job will automatically disappear from the query ++# list without user intervention. ++# Defaults to true. (Since 3.1) + # Since: 1.3 + ## + { 'struct': 'DriveMirror', +@@ -1741,7 +1753,8 @@ + '*speed': 'int', '*granularity': 'uint32', + '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError', +- '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } } ++ '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode', ++ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } + + ## + # @BlockDirtyBitmap: +@@ -2007,6 +2020,18 @@ + # @copy-mode: when to copy data to the destination; defaults to 'background' + # (Since: 3.0) + # ++# @auto-finalize: When false, this job will wait in a PENDING state after it has ++# finished its work, waiting for @block-job-finalize before ++# making any block graph changes. ++# When true, this job will automatically ++# perform its abort or commit actions. ++# Defaults to true. (Since 3.1) ++# ++# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it ++# has completely ceased all work, and awaits @block-job-dismiss. ++# When true, this job will automatically disappear from the query ++# list without user intervention. ++# Defaults to true. (Since 3.1) + # Returns: nothing on success. + # + # Since: 2.6 +@@ -2028,7 +2053,8 @@ + '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError', + '*filter-node-name': 'str', +- '*copy-mode': 'MirrorCopyMode' } } ++ '*copy-mode': 'MirrorCopyMode', ++ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } + + ## + # @block_set_io_throttle: +-- +1.8.3.1 + diff --git a/0043-qapi-block-stream-expose-new-job-properties.patch b/0043-qapi-block-stream-expose-new-job-properties.patch new file mode 100644 index 0000000..4e5a8fa --- /dev/null +++ b/0043-qapi-block-stream-expose-new-job-properties.patch @@ -0,0 +1,108 @@ +From 67fa4ccaffcd7e2698d30597f51093903aef4a5d Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:29 +0100 +Subject: qapi/block-stream: expose new job properties + +RH-Author: John Snow +Message-id: <20180925223431.24791-24-jsnow@redhat.com> +Patchwork-id: 82278 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 23/25] qapi/block-stream: expose new job properties +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-15-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit 241ca1ab78542f02e666636e0323bcfe3cb1d5e8) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 9 +++++++++ + hmp.c | 5 +++-- + qapi/block-core.json | 16 +++++++++++++++- + 3 files changed, 27 insertions(+), 3 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index bbb3279..806531d 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3226,6 +3226,8 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, + bool has_backing_file, const char *backing_file, + bool has_speed, int64_t speed, + bool has_on_error, BlockdevOnError on_error, ++ bool has_auto_finalize, bool auto_finalize, ++ bool has_auto_dismiss, bool auto_dismiss, + Error **errp) + { + BlockDriverState *bs, *iter; +@@ -3295,6 +3297,13 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, + /* backing_file string overrides base bs filename */ + base_name = has_backing_file ? backing_file : base_name; + ++ if (has_auto_finalize && !auto_finalize) { ++ job_flags |= JOB_MANUAL_FINALIZE; ++ } ++ if (has_auto_dismiss && !auto_dismiss) { ++ job_flags |= JOB_MANUAL_DISMISS; ++ } ++ + stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name, + job_flags, has_speed ? speed : 0, on_error, &local_err); + if (local_err) { +diff --git a/hmp.c b/hmp.c +index 2aafb50..e3c3ecd 100644 +--- a/hmp.c ++++ b/hmp.c +@@ -1865,8 +1865,9 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict) + int64_t speed = qdict_get_try_int(qdict, "speed", 0); + + qmp_block_stream(true, device, device, base != NULL, base, false, NULL, +- false, NULL, qdict_haskey(qdict, "speed"), speed, +- true, BLOCKDEV_ON_ERROR_REPORT, &error); ++ false, NULL, qdict_haskey(qdict, "speed"), speed, true, ++ BLOCKDEV_ON_ERROR_REPORT, false, false, false, false, ++ &error); + + hmp_handle_error(mon, &error); + } +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 9193d49..d1a9c3e 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -2320,6 +2320,19 @@ + # 'stop' and 'enospc' can only be used if the block device + # supports io-status (see BlockInfo). Since 1.3. + # ++# @auto-finalize: When false, this job will wait in a PENDING state after it has ++# finished its work, waiting for @block-job-finalize before ++# making any block graph changes. ++# When true, this job will automatically ++# perform its abort or commit actions. ++# Defaults to true. (Since 3.1) ++# ++# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it ++# has completely ceased all work, and awaits @block-job-dismiss. ++# When true, this job will automatically disappear from the query ++# list without user intervention. ++# Defaults to true. (Since 3.1) ++# + # Returns: Nothing on success. If @device does not exist, DeviceNotFound. + # + # Since: 1.1 +@@ -2335,7 +2348,8 @@ + { 'command': 'block-stream', + 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', + '*base-node': 'str', '*backing-file': 'str', '*speed': 'int', +- '*on-error': 'BlockdevOnError' } } ++ '*on-error': 'BlockdevOnError', ++ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } + + ## + # @block-job-set-speed: +-- +1.8.3.1 + diff --git a/0044-block-backup-qapi-documentation-fixup.patch b/0044-block-backup-qapi-documentation-fixup.patch new file mode 100644 index 0000000..c8b3273 --- /dev/null +++ b/0044-block-backup-qapi-documentation-fixup.patch @@ -0,0 +1,73 @@ +From c104ce571b585040ca4d0c77419d2ca06c2087b8 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:30 +0100 +Subject: block/backup: qapi documentation fixup + +RH-Author: John Snow +Message-id: <20180925223431.24791-25-jsnow@redhat.com> +Patchwork-id: 82284 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 24/25] block/backup: qapi documentation fixup +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Fix documentation to match the other jobs amended for 3.1. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20180906130225.5118-16-jsnow@redhat.com +Reviewed-by: Jeff Cody +Signed-off-by: Max Reitz +(cherry picked from commit dfaff2c37dfa52ab045cf87503e60ea56317230a) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + qapi/block-core.json | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index d1a9c3e..2953991 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1272,13 +1272,14 @@ + # a different block device than @device). + # + # @auto-finalize: When false, this job will wait in a PENDING state after it has +-# finished its work, waiting for @block-job-finalize. +-# When true, this job will automatically perform its abort or +-# commit actions. ++# finished its work, waiting for @block-job-finalize before ++# making any block graph changes. ++# When true, this job will automatically ++# perform its abort or commit actions. + # Defaults to true. (Since 2.12) + # + # @auto-dismiss: When false, this job will wait in a CONCLUDED state after it +-# has completed ceased all work, and wait for @block-job-dismiss. ++# has completely ceased all work, and awaits @block-job-dismiss. + # When true, this job will automatically disappear from the query + # list without user intervention. + # Defaults to true. (Since 2.12) +@@ -1327,13 +1328,14 @@ + # a different block device than @device). + # + # @auto-finalize: When false, this job will wait in a PENDING state after it has +-# finished its work, waiting for @block-job-finalize. +-# When true, this job will automatically perform its abort or +-# commit actions. ++# finished its work, waiting for @block-job-finalize before ++# making any block graph changes. ++# When true, this job will automatically ++# perform its abort or commit actions. + # Defaults to true. (Since 2.12) + # + # @auto-dismiss: When false, this job will wait in a CONCLUDED state after it +-# has completed ceased all work, and wait for @block-job-dismiss. ++# has completely ceased all work, and awaits @block-job-dismiss. + # When true, this job will automatically disappear from the query + # list without user intervention. + # Defaults to true. (Since 2.12) +-- +1.8.3.1 + diff --git a/0045-blockdev-document-transactional-shortcomings.patch b/0045-blockdev-document-transactional-shortcomings.patch new file mode 100644 index 0000000..7562949 --- /dev/null +++ b/0045-blockdev-document-transactional-shortcomings.patch @@ -0,0 +1,53 @@ +From 53dc1dce0b91a7ebb1c32d10a7482461c01326d6 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 25 Sep 2018 22:34:31 +0100 +Subject: blockdev: document transactional shortcomings + +RH-Author: John Snow +Message-id: <20180925223431.24791-26-jsnow@redhat.com> +Patchwork-id: 82286 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 25/25] blockdev: document transactional shortcomings +Bugzilla: 1632939 +RH-Acked-by: Jeffrey Cody +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +Presently only the backup job really guarantees what one would consider +transactional semantics. To guard against someone helpfully adding them +in the future, document that there are shortcomings in the model that +would need to be audited at that time. + +Signed-off-by: John Snow +Message-id: 20180906130225.5118-17-jsnow@redhat.com +Reviewed-by: Jeff Cody +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 66da04ddd3dcb8c61ee664b6faced132da002006) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/blockdev.c b/blockdev.c +index 806531d..d97202a 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2292,7 +2292,13 @@ static const BlkActionOps actions[] = { + .instance_size = sizeof(BlockDirtyBitmapState), + .prepare = block_dirty_bitmap_disable_prepare, + .abort = block_dirty_bitmap_disable_abort, +- } ++ }, ++ /* Where are transactions for MIRROR, COMMIT and STREAM? ++ * Although these blockjobs use transaction callbacks like the backup job, ++ * these jobs do not necessarily adhere to transaction semantics. ++ * These jobs may not fully undo all of their actions on abort, nor do they ++ * necessarily work in transactions with more than one job in them. ++ */ + }; + + /** +-- +1.8.3.1 + diff --git a/85-kvm.preset b/85-kvm.preset new file mode 100644 index 0000000..8024052 --- /dev/null +++ b/85-kvm.preset @@ -0,0 +1,5 @@ +# Enable kvm-setup by default. This can have odd side effects on +# PowerNV systems that aren't intended as KVM hosts, but at present we +# only support RHEL on PowerNV for the purpose of being a RHEV host. + +enable kvm-setup.service diff --git a/95-kvm-memlock.conf b/95-kvm-memlock.conf new file mode 100644 index 0000000..fc59dbe --- /dev/null +++ b/95-kvm-memlock.conf @@ -0,0 +1,10 @@ +# The KVM HV implementation on Power can require a significant amount +# of unswappable memory (about half of which also needs to be host +# physically contiguous) to hold the guest's Hash Page Table (HPT) - +# roughly 1/64th of the guest's RAM size, minimum 16MiB. +# +# These limits allow unprivileged users to start smallish VMs, such as +# those used by libguestfs. +# +* hard memlock 65536 +* soft memlock 65536 diff --git a/99-qemu-guest-agent.rules b/99-qemu-guest-agent.rules new file mode 100644 index 0000000..8a290ab --- /dev/null +++ b/99-qemu-guest-agent.rules @@ -0,0 +1,2 @@ +SUBSYSTEM=="virtio-ports", ATTR{name}=="org.qemu.guest_agent.0", \ + TAG+="systemd" ENV{SYSTEMD_WANTS}="qemu-guest-agent.service" diff --git a/bridge.conf b/bridge.conf new file mode 100644 index 0000000..a573665 --- /dev/null +++ b/bridge.conf @@ -0,0 +1 @@ +allow virbr0 diff --git a/ksm.service b/ksm.service new file mode 100644 index 0000000..35c6f1d --- /dev/null +++ b/ksm.service @@ -0,0 +1,13 @@ +[Unit] +Description=Kernel Samepage Merging +ConditionPathExists=/sys/kernel/mm/ksm + +[Service] +Type=oneshot +RemainAfterExit=yes +EnvironmentFile=-/etc/sysconfig/ksm +ExecStart=/usr/libexec/ksmctl start +ExecStop=/usr/libexec/ksmctl stop + +[Install] +WantedBy=multi-user.target diff --git a/ksm.sysconfig b/ksm.sysconfig new file mode 100644 index 0000000..d99656d --- /dev/null +++ b/ksm.sysconfig @@ -0,0 +1,4 @@ +# The maximum number of unswappable kernel pages +# which may be allocated by ksm (0 for unlimited) +# If unset, defaults to half of total memory +# KSM_MAX_KERNEL_PAGES= diff --git a/ksmctl.c b/ksmctl.c new file mode 100644 index 0000000..af39591 --- /dev/null +++ b/ksmctl.c @@ -0,0 +1,77 @@ +/* Start/stop KSM, for systemd. + * Copyright (C) 2009, 2011 Red Hat, Inc. + * Written by Paolo Bonzini . + * Based on the original sysvinit script by Dan Kenigsberg + * This file is distributed under the GNU General Public License, version 2 + * or later. */ + +#include +#include +#include +#include +#include +#include + +#define KSM_MAX_KERNEL_PAGES_FILE "/sys/kernel/mm/ksm/max_kernel_pages" +#define KSM_RUN_FILE "/sys/kernel/mm/ksm/run" + +char *program_name; + +int usage(void) +{ + fprintf(stderr, "Usage: %s {start|stop}\n", program_name); + return 1; +} + +int write_value(uint64_t value, char *filename) +{ + FILE *fp; + if (!(fp = fopen(filename, "w")) || + fprintf(fp, "%llu\n", (unsigned long long) value) == EOF || + fflush(fp) == EOF || + fclose(fp) == EOF) + return 1; + + return 0; +} + +uint64_t ksm_max_kernel_pages() +{ + char *var = getenv("KSM_MAX_KERNEL_PAGES"); + char *endptr; + uint64_t value; + if (var && *var) { + value = strtoll(var, &endptr, 0); + if (value < LLONG_MAX && !*endptr) + return value; + } + /* Unless KSM_MAX_KERNEL_PAGES is set, let KSM munch up to half of + * total memory. */ + return sysconf(_SC_PHYS_PAGES) / 2; +} + +int start(void) +{ + if (access(KSM_MAX_KERNEL_PAGES_FILE, R_OK) >= 0) + write_value(ksm_max_kernel_pages(), KSM_MAX_KERNEL_PAGES_FILE); + return write_value(1, KSM_RUN_FILE); +} + +int stop(void) +{ + return write_value(0, KSM_RUN_FILE); +} + +int main(int argc, char **argv) +{ + program_name = argv[0]; + if (argc < 2) { + return usage(); + } else if (!strcmp(argv[1], "start")) { + return start(); + } else if (!strcmp(argv[1], "stop")) { + return stop(); + } else { + return usage(); + } +} diff --git a/ksmtuned b/ksmtuned new file mode 100644 index 0000000..7bc5743 --- /dev/null +++ b/ksmtuned @@ -0,0 +1,139 @@ +#!/bin/bash +# +# Copyright 2009 Red Hat, Inc. and/or its affiliates. +# Released under the GPL +# +# Author: Dan Kenigsberg +# +# ksmtuned - a simple script that controls whether (and with what vigor) ksm +# should search for duplicated pages. +# +# starts ksm when memory commited to qemu processes exceeds a threshold, and +# make ksm work harder and harder untill memory load falls below that +# threshold. +# +# send SIGUSR1 to this process right after a new qemu process is started, or +# following its death, to retune ksm accordingly +# +# needs testing and ironing. contact danken@redhat.com if something breaks. + +if [ -f /etc/ksmtuned.conf ]; then + . /etc/ksmtuned.conf +fi + +debug() { + if [ -n "$DEBUG" ]; then + s="`/bin/date`: $*" + [ -n "$LOGFILE" ] && echo "$s" >> "$LOGFILE" || echo "$s" + fi +} + + +KSM_MONITOR_INTERVAL=${KSM_MONITOR_INTERVAL:-60} +KSM_NPAGES_BOOST=${KSM_NPAGES_BOOST:-300} +KSM_NPAGES_DECAY=${KSM_NPAGES_DECAY:--50} + +KSM_NPAGES_MIN=${KSM_NPAGES_MIN:-64} +KSM_NPAGES_MAX=${KSM_NPAGES_MAX:-1250} +# millisecond sleep between ksm scans for 16Gb server. Smaller servers sleep +# more, bigger sleep less. +KSM_SLEEP_MSEC=${KSM_SLEEP_MSEC:-10} + +KSM_THRES_COEF=${KSM_THRES_COEF:-20} +KSM_THRES_CONST=${KSM_THRES_CONST:-2048} + +total=`awk '/^MemTotal:/ {print $2}' /proc/meminfo` +debug total $total + +npages=0 +sleep=$[KSM_SLEEP_MSEC * 16 * 1024 * 1024 / total] +[ $sleep -le 10 ] && sleep=10 +debug sleep $sleep +thres=$[total * KSM_THRES_COEF / 100] +if [ $KSM_THRES_CONST -gt $thres ]; then + thres=$KSM_THRES_CONST +fi +debug thres $thres + +KSMCTL () { + case x$1 in + xstop) + echo 0 > /sys/kernel/mm/ksm/run + ;; + xstart) + echo $2 > /sys/kernel/mm/ksm/pages_to_scan + echo $3 > /sys/kernel/mm/ksm/sleep_millisecs + echo 1 > /sys/kernel/mm/ksm/run + ;; + esac +} + +committed_memory () { + # calculate how much memory is committed to running qemu processes + local pidlist + pidlist=$(pgrep -d ' ' -- '^qemu(-(kvm|system-.+)|:.{1,11})$') + if [ -n "$pidlist" ]; then + ps -p "$pidlist" -o rsz= + fi | awk '{ sum += $1 }; END { print 0+sum }' +} + +free_memory () { + awk '/^(MemFree|Buffers|Cached):/ {free += $2}; END {print free}' \ + /proc/meminfo +} + +increase_npages() { + local delta + delta=${1:-0} + npages=$[npages + delta] + if [ $npages -lt $KSM_NPAGES_MIN ]; then + npages=$KSM_NPAGES_MIN + elif [ $npages -gt $KSM_NPAGES_MAX ]; then + npages=$KSM_NPAGES_MAX + fi + echo $npages +} + + +adjust () { + local free committed + free=`free_memory` + committed=`committed_memory` + debug committed $committed free $free + if [ $[committed + thres] -lt $total -a $free -gt $thres ]; then + KSMCTL stop + debug "$[committed + thres] < $total and free > $thres, stop ksm" + return 1 + fi + debug "$[committed + thres] > $total, start ksm" + if [ $free -lt $thres ]; then + npages=`increase_npages $KSM_NPAGES_BOOST` + debug "$free < $thres, boost" + else + npages=`increase_npages $KSM_NPAGES_DECAY` + debug "$free > $thres, decay" + fi + KSMCTL start $npages $sleep + debug "KSMCTL start $npages $sleep" + return 0 +} + +function nothing () { + : +} + +loop () { + trap nothing SIGUSR1 + while true + do + sleep $KSM_MONITOR_INTERVAL & + wait $! + adjust + done +} + +PIDFILE=${PIDFILE-/var/run/ksmtune.pid} +if touch "$PIDFILE"; then + loop & + echo $! > "$PIDFILE" +fi diff --git a/ksmtuned.conf b/ksmtuned.conf new file mode 100644 index 0000000..fc4518c --- /dev/null +++ b/ksmtuned.conf @@ -0,0 +1,21 @@ +# Configuration file for ksmtuned. + +# How long ksmtuned should sleep between tuning adjustments +# KSM_MONITOR_INTERVAL=60 + +# Millisecond sleep between ksm scans for 16Gb server. +# Smaller servers sleep more, bigger sleep less. +# KSM_SLEEP_MSEC=10 + +# KSM_NPAGES_BOOST=300 +# KSM_NPAGES_DECAY=-50 +# KSM_NPAGES_MIN=64 +# KSM_NPAGES_MAX=1250 + +# KSM_THRES_COEF=20 +# KSM_THRES_CONST=2048 + +# uncomment the following if you want ksmtuned debug info + +# LOGFILE=/var/log/ksmtuned +# DEBUG=1 diff --git a/ksmtuned.service b/ksmtuned.service new file mode 100644 index 0000000..39febcc --- /dev/null +++ b/ksmtuned.service @@ -0,0 +1,12 @@ +[Unit] +Description=Kernel Samepage Merging (KSM) Tuning Daemon +After=ksm.service +Requires=ksm.service + +[Service] +ExecStart=/usr/sbin/ksmtuned +ExecReload=/bin/kill -USR1 $MAINPID +Type=forking + +[Install] +WantedBy=multi-user.target diff --git a/kvm-s390x.conf b/kvm-s390x.conf new file mode 100644 index 0000000..d82b818 --- /dev/null +++ b/kvm-s390x.conf @@ -0,0 +1,19 @@ +# User changes in this file are preserved across upgrades. +# +# Setting "modprobe kvm nested=1" only enables Nested Virtualization until +# the next reboot or module reload. Uncomment the option below to enable +# the feature permanently. +# +#options kvm nested=1 +# +# +# Setting "modprobe kvm hpage=1" only enables Huge Page Backing (1MB) +# support until the next reboot or module reload. Uncomment the option +# below to enable the feature permanently. +# +# Note: - Incompatible with "nested=1". Loading the module will fail. +# - Dirty page logging will be performed on a 1MB (not 4KB) basis, +# which can result in a lot of data having to be transferred during +# migration, and therefore taking very long to converge. +# +#options kvm hpage=1 diff --git a/kvm-setup b/kvm-setup new file mode 100644 index 0000000..abbd587 --- /dev/null +++ b/kvm-setup @@ -0,0 +1,40 @@ +#! /bin/bash + +kvm_setup_powerpc () { + if grep '^platform[[:space:]]*:[[:space:]]*PowerNV' /proc/cpuinfo > /dev/null; then + # PowerNV platform, which is KVM HV capable + + if [ -z "$SUBCORES" ]; then + SUBCORES=1 + fi + + # Step 1. Load the KVM HVmodule + if ! modprobe -b kvm_hv; then + return + fi + + # On POWER8 a host core can only run threads of a single + # guest, meaning that SMT must be disabled on the host in + # order to run KVM guests. (Also applieds to POWER7, but we + # don't support that). + # + # POWER9 doesn't have this limitation (though it will for hash + # guests on radix host when that's implemented). So, only set + # up subcores and disable SMT for POWER*. + if grep '^cpu[[:space:]]*:[[:space:]]*POWER8' /proc/cpuinfo > /dev/null; then + # Step 2. Configure subcore mode + /usr/sbin/ppc64_cpu --subcores-per-core=$SUBCORES + + # Step 3. Disable SMT (multithreading) + /usr/sbin/ppc64_cpu --smt=off + fi + fi +} + +case $(uname -m) in + ppc64|ppc64le) + kvm_setup_powerpc + ;; +esac + +exit 0 diff --git a/kvm-setup.service b/kvm-setup.service new file mode 100644 index 0000000..9c4bf97 --- /dev/null +++ b/kvm-setup.service @@ -0,0 +1,14 @@ +[Unit] +Description=Perform system configuration to prepare system to run KVM guests +# Offlining CPUs can cause irqbalance to throw warnings if it's running +Before=irqbalance.service +# libvirtd reads CPU topology at startup, so change it before +Before=libvirtd.service + +[Service] +Type=oneshot +EnvironmentFile=-/etc/sysconfig/kvm +ExecStart=/usr/lib/systemd/kvm-setup + +[Install] +WantedBy=multi-user.target diff --git a/kvm-x86.conf b/kvm-x86.conf new file mode 100644 index 0000000..3f7842a --- /dev/null +++ b/kvm-x86.conf @@ -0,0 +1,12 @@ +# Setting modprobe kvm_intel/kvm_amd nested = 1 +# only enables Nested Virtualization until the next reboot or +# module reload. Uncomment the option applicable +# to your system below to enable the feature permanently. +# +# User changes in this file are preserved across upgrades. +# +# For Intel +#options kvm_intel nested=1 +# +# For AMD +#options kvm_amd nested=1 diff --git a/kvm.conf b/kvm.conf new file mode 100644 index 0000000..24e60e9 --- /dev/null +++ b/kvm.conf @@ -0,0 +1,3 @@ +# +# User changes in this file are preserved across upgrades. +# diff --git a/kvm.modules b/kvm.modules new file mode 100644 index 0000000..b9d9646 --- /dev/null +++ b/kvm.modules @@ -0,0 +1,18 @@ +#!/bin/sh + +case $(uname -m) in + ppc64) + grep OPAL /proc/cpuinfo >/dev/null 2>&1 && opal=1 + + modprobe -b kvm >/dev/null 2>&1 + modprobe -b kvm-pr >/dev/null 2>&1 && kvm=1 + if [ "$opal" ]; then + modprobe -b kvm-hv >/dev/null 2>&1 + fi + ;; + s390x) + modprobe -b kvm >/dev/null 2>&1 && kvm=1 + ;; +esac + +exit 0 diff --git a/qemu-ga.sysconfig b/qemu-ga.sysconfig new file mode 100644 index 0000000..67bad0c --- /dev/null +++ b/qemu-ga.sysconfig @@ -0,0 +1,19 @@ +# This is a systemd environment file, not a shell script. +# It provides settings for "/lib/systemd/system/qemu-guest-agent.service". + +# Comma-separated blacklist of RPCs to disable, or empty list to enable all. +# +# You can get the list of RPC commands using "qemu-ga --blacklist='?'". +# There should be no spaces between commas and commands in the blacklist. +BLACKLIST_RPC=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status + +# Fsfreeze hook script specification. +# +# FSFREEZE_HOOK_PATHNAME=/dev/null : disables the feature. +# +# FSFREEZE_HOOK_PATHNAME=/path/to/executable : enables the feature with the +# specified binary or shell script. +# +# FSFREEZE_HOOK_PATHNAME= : enables the feature with the +# default value (invoke "qemu-ga --help" to interrogate). +FSFREEZE_HOOK_PATHNAME=/etc/qemu-ga/fsfreeze-hook diff --git a/qemu-guest-agent.service b/qemu-guest-agent.service new file mode 100644 index 0000000..b33e951 --- /dev/null +++ b/qemu-guest-agent.service @@ -0,0 +1,20 @@ +[Unit] +Description=QEMU Guest Agent +BindsTo=dev-virtio\x2dports-org.qemu.guest_agent.0.device +After=dev-virtio\x2dports-org.qemu.guest_agent.0.device +IgnoreOnIsolate=True + +[Service] +UMask=0077 +EnvironmentFile=/etc/sysconfig/qemu-ga +ExecStart=/usr/bin/qemu-ga \ + --method=virtio-serial \ + --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ + --blacklist=${BLACKLIST_RPC} \ + -F${FSFREEZE_HOOK_PATHNAME} +StandardError=syslog +Restart=always +RestartSec=0 + +[Install] +WantedBy=dev-virtio\x2dports-org.qemu.guest_agent.0.device diff --git a/qemu-kvm.spec b/qemu-kvm.spec new file mode 100644 index 0000000..60b6e11 --- /dev/null +++ b/qemu-kvm.spec @@ -0,0 +1,1651 @@ +%global SLOF_gittagdate 20170724 +%global SLOF_gittagcommit 89f519f + +%global have_usbredir 1 +%global have_spice 1 +%global have_opengl 1 +%global have_fdt 0 +%global have_gluster 0 +%global have_kvm_setup 0 +%global have_seccomp 1 +%global have_memlock_limits 0 +%global have_vxhs 0 +%global have_vhost_user 1 +%global have_tcmalloc 0 + +%ifnarch %{ix86} x86_64 + %global have_usbredir 0 +%endif + +%ifnarch s390x + %global have_librdma 1 +%else + %global have_librdma 0 +%endif + +%ifarch %{ix86} + %global kvm_target i386 +%endif +%ifarch x86_64 + %global kvm_target x86_64 + %global have_vxhs 1 +%else + %global have_spice 0 + %global have_opengl 0 + %global have_gluster 0 +%endif +%ifarch %{power64} + %global kvm_target ppc64 + %global have_fdt 1 + %global have_kvm_setup 1 + %global have_memlock_limits 1 +%endif +%ifarch s390x + %global kvm_target s390x +%endif +%ifarch ppc + %global kvm_target ppc + %global have_fdt 1 +%endif +%ifarch aarch64 + %global kvm_target aarch64 + %global have_fdt 1 +%endif + +#Versions of various parts: + +%global requires_all_modules \ +Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} + +# Macro to properly setup RHEL/RHEV conflict handling +%define rhel_rhev_conflicts() \ +Conflicts: %1-ma \ +Conflicts: %1-rhev \ +Provides: %1-rhel = %{epoch}:%{version}-%{release} + +Summary: QEMU is a machine emulator and virtualizer +Name: qemu-kvm +Version: 3.0.0 +Release: 1%{?dist} +# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped +Epoch: 15 +License: GPLv2 and GPLv2+ and CC-BY +Group: Development/Tools +URL: http://www.qemu.org/ +ExclusiveArch: x86_64 %{power64} aarch64 s390x + + +# OOM killer breaks builds with parallel make on s390x +%ifarch s390x + %define _smp_mflags %{nil} +%endif + +Source0: http://wiki.qemu.org/download/qemu-3.0.0.tar.xz + +# KSM control scripts +Source4: ksm.service +Source5: ksm.sysconfig +Source6: ksmctl.c +Source7: ksmtuned.service +Source8: ksmtuned +Source9: ksmtuned.conf +Source10: qemu-guest-agent.service +Source11: 99-qemu-guest-agent.rules +Source12: bridge.conf +Source13: qemu-ga.sysconfig +Source21: kvm-setup +Source22: kvm-setup.service +Source23: 85-kvm.preset +Source26: vhost.conf +Source27: kvm.conf +Source28: 95-kvm-memlock.conf +Source30: kvm-s390x.conf +Source31: kvm-x86.conf +Source32: qemu-pr-helper.service +Source33: qemu-pr-helper.socket + + + +Patch0001: 0001-Initial-redhat-build.patch +Patch0002: 0002-Enable-disable-devices-for-RHEL-7.patch +Patch0003: 0003-Add-RHEL-machine-types.patch +Patch0004: 0004-Use-kvm-by-default.patch +Patch0005: 0005-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0006: 0006-Add-support-statement-to-help-output.patch +Patch0007: 0007-globally-limit-the-maximum-number-of-CPUs.patch +Patch0008: 0008-Add-support-for-simpletrace.patch +Patch0009: 0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0010: 0010-usb-xhci-Fix-PCI-capability-order.patch +Patch0011: 0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0012: 0012-linux-headers-asm-s390-kvm.h-header-sync.patch +Patch0013: 0013-s390x-Enable-KVM-huge-page-backing-support.patch +Patch0014: 0014-s390x-kvm-add-etoken-facility.patch +Patch0015: 0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch +Patch0016: 0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch +Patch0017: 0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch +Patch0018: 0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch +Patch0019: 0019-migration-postcopy-Clear-have_listen_thread.patch +Patch0020: 0020-migration-cleanup-in-error-paths-in-loadvm.patch +Patch0021: 0021-jobs-change-start-callback-to-run-callback.patch +Patch0022: 0022-jobs-canonize-Error-object.patch +Patch0023: 0023-jobs-add-exit-shim.patch +Patch0024: 0024-block-commit-utilize-job_exit-shim.patch +Patch0025: 0025-block-mirror-utilize-job_exit-shim.patch +Patch0026: 0026-jobs-utilize-job_exit-shim.patch +Patch0027: 0027-block-backup-make-function-variables-consistently-na.patch +Patch0028: 0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch +Patch0029: 0029-jobs-remove-job_defer_to_main_loop.patch +Patch0030: 0030-block-commit-add-block-job-creation-flags.patch +Patch0031: 0031-block-mirror-add-block-job-creation-flags.patch +Patch0032: 0032-block-stream-add-block-job-creation-flags.patch +Patch0033: 0033-block-commit-refactor-commit-to-use-job-callbacks.patch +Patch0034: 0034-block-mirror-don-t-install-backing-chain-on-abort.patch +Patch0035: 0035-block-mirror-conservative-mirror_exit-refactor.patch +Patch0036: 0036-block-stream-refactor-stream-to-use-job-callbacks.patch +Patch0037: 0037-tests-blockjob-replace-Blockjob-with-Job.patch +Patch0038: 0038-tests-test-blockjob-remove-exit-callback.patch +Patch0039: 0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch +Patch0040: 0040-jobs-remove-.exit-callback.patch +Patch0041: 0041-qapi-block-commit-expose-new-job-properties.patch +Patch0042: 0042-qapi-block-mirror-expose-new-job-properties.patch +Patch0043: 0043-qapi-block-stream-expose-new-job-properties.patch +Patch0044: 0044-block-backup-qapi-documentation-fixup.patch +Patch0045: 0045-blockdev-document-transactional-shortcomings.patch + +BuildRequires: zlib-devel +BuildRequires: glib2-devel +BuildRequires: which +BuildRequires: gnutls-devel +BuildRequires: cyrus-sasl-devel +BuildRequires: libtool +BuildRequires: libaio-devel +BuildRequires: rsync +BuildRequires: python3-devel +BuildRequires: pciutils-devel +BuildRequires: libiscsi-devel +BuildRequires: ncurses-devel +BuildRequires: libattr-devel +BuildRequires: libusbx-devel >= 1.0.19 +%if %{have_usbredir} +BuildRequires: usbredir-devel >= 0.7.1 +%endif +BuildRequires: texinfo +%if %{have_spice} +BuildRequires: spice-protocol >= 0.12.12 +BuildRequires: spice-server-devel >= 0.12.8 +BuildRequires: libcacard-devel +# For smartcard NSS support +BuildRequires: nss-devel +%endif +%if %{have_seccomp} +BuildRequires: libseccomp-devel >= 2.3.0 +%endif +# For network block driver +BuildRequires: libcurl-devel +BuildRequires: libssh2-devel +BuildRequires: librados-devel +BuildRequires: librbd-devel +%if %{have_gluster} +# For gluster block driver +BuildRequires: glusterfs-api-devel >= 3.6.0 +BuildRequires: glusterfs-devel +%endif +# We need both because the 'stap' binary is probed for by configure +BuildRequires: systemtap +BuildRequires: systemtap-sdt-devel +# For VNC PNG support +BuildRequires: libpng-devel +# For uuid generation +BuildRequires: libuuid-devel +# For BlueZ device support +BuildRequires: bluez-libs-devel +# For Braille device support +BuildRequires: brlapi-devel +# For test suite +BuildRequires: check-devel +# For virtfs +BuildRequires: libcap-devel +# Hard requirement for version >= 1.3 +BuildRequires: pixman-devel +# Documentation requirement +BuildRequires: perl-podlators +BuildRequires: texinfo +# For rdma +%if 0%{?have_librdma} +BuildRequires: rdma-core-devel +%endif +%if %{have_fdt} +BuildRequires: libfdt-devel >= 1.4.3 +%endif +# iasl and cpp for acpi generation (not a hard requirement as we can use +# pre-compiled files, but it's better to use this) +%ifarch %{ix86} x86_64 +BuildRequires: iasl +BuildRequires: cpp +%endif +# For compressed guest memory dumps +BuildRequires: lzo-devel snappy-devel +# For NUMA memory binding +%ifnarch s390x +BuildRequires: numactl-devel +%endif +BuildRequires: libgcrypt-devel +# qemu-pr-helper multipath support (requires libudev too) +BuildRequires: device-mapper-multipath-devel +BuildRequires: systemd-devel +# used by qemu-bridge-helper and qemu-pr-helper +BuildRequires: libcap-ng-devel + +BuildRequires: diffutils + +# qemu-keymap +BuildRequires: pkgconfig(xkbcommon) + +# For s390-pgste flag +%ifarch s390x +BuildRequires: binutils >= 2.27-16 +%endif + +%if %{have_opengl} +BuildRequires: pkgconfig(epoxy) +BuildRequires: pkgconfig(libdrm) +BuildRequires: pkgconfig(gbm) +Requires: mesa-libGL +Requires: mesa-libEGL +Requires: mesa-dri-drivers +%endif + +Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} +Conflicts: qemu-kvm-ma +Conflicts: qemu-kvm-rhev + +%{requires_all_modules} + +%define qemudocdir %{_docdir}/%{name} + +%description +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + + +%package -n qemu-kvm-core +Summary: qemu-kvm core components +Requires: qemu-img = %{epoch}:%{version}-%{release} +%ifarch %{ix86} x86_64 +Requires: seabios-bin >= 1.10.2-1 +Requires: sgabios-bin +%endif +%ifnarch aarch64 s390x +Requires: seavgabios-bin >= 1.10.2-1 +Requires: ipxe-roms-qemu >= 20170123-1 +%endif +%ifarch %{power64} +Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} +%endif +Requires: %{name}-common = %{epoch}:%{version}-%{release} +%if %{have_seccomp} +Requires: libseccomp >= 2.3.0 +%endif +# For compressed guest memory dumps +Requires: lzo snappy +%if %{have_gluster} +Requires: glusterfs-api >= 3.6.0 +%endif +%if %{have_kvm_setup} +Requires(post): systemd-units + %ifarch %{power64} +Requires: powerpc-utils + %endif +%endif +Requires: libusbx >= 1.0.19 +%if %{have_usbredir} +Requires: usbredir >= 0.7.1 +%endif + +%rhel_rhev_conflicts qemu-kvm + +%description -n qemu-kvm-core +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + + +%package -n qemu-img +Summary: QEMU command line tool for manipulating disk images +Group: Development/Tools + +%rhel_rhev_conflicts qemu-img + +%description -n qemu-img +This package provides a command line tool for manipulating disk images. + +%package -n qemu-kvm-common +Summary: QEMU common files needed by all QEMU targets +Group: Development/Tools +Requires(post): /usr/bin/getent +Requires(post): /usr/sbin/groupadd +Requires(post): /usr/sbin/useradd +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +%rhel_rhev_conflicts qemu-kvm-common + +%description -n qemu-kvm-common +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides documentation and auxiliary programs used with qemu-kvm. + + +%package -n qemu-guest-agent +Summary: QEMU guest agent +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +# OOM killer breaks builds with parallel make on s390x +%ifarch s390x + %define _smp_mflags %{nil} +%endif + +%description -n qemu-guest-agent +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides an agent to run inside guests, which communicates +with the host over a virtio-serial channel named "org.qemu.guest_agent.0" + +This package does not need to be installed on the host OS. + + +%package block-curl +Summary: QEMU CURL block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-curl +This package provides the additional CURL block driver for QEMU. + +Install this package if you want to access remote disks over +http, https, ftp and other transports provided by the CURL library. + + +%package block-iscsi +Summary: QEMU iSCSI block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-iscsi +This package provides the additional iSCSI block driver for QEMU. + +Install this package if you want to access iSCSI volumes. + + +%package block-rbd +Summary: QEMU Ceph/RBD block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-rbd +This package provides the additional Ceph/RBD block driver for QEMU. + +Install this package if you want to access remote Ceph volumes +using the rbd protocol. + + +%package block-ssh +Summary: QEMU SSH block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-ssh +This package provides the additional SSH block driver for QEMU. + +Install this package if you want to access remote disks using +the Secure Shell (SSH) protocol. + + +%prep +%setup -q -n qemu-%{version} + +%patch0001 -p1 +%patch0002 -p1 +%patch0003 -p1 +%patch0004 -p1 +%patch0005 -p1 +%patch0006 -p1 +%patch0007 -p1 +%patch0008 -p1 +%patch0009 -p1 +%patch0010 -p1 +%patch0011 -p1 +%patch0012 -p1 +%patch0013 -p1 +%patch0014 -p1 +%patch0015 -p1 +%patch0016 -p1 +%patch0017 -p1 +%patch0018 -p1 +%patch0019 -p1 +%patch0020 -p1 +%patch0021 -p1 +%patch0022 -p1 +%patch0023 -p1 +%patch0024 -p1 +%patch0025 -p1 +%patch0026 -p1 +%patch0027 -p1 +%patch0028 -p1 +%patch0029 -p1 +%patch0030 -p1 +%patch0031 -p1 +%patch0032 -p1 +%patch0033 -p1 +%patch0034 -p1 +%patch0035 -p1 +%patch0036 -p1 +%patch0037 -p1 +%patch0038 -p1 +%patch0039 -p1 +%patch0040 -p1 +%patch0041 -p1 +%patch0042 -p1 +%patch0043 -p1 +%patch0044 -p1 +%patch0045 -p1 + +%build +%global buildarch %{kvm_target}-softmmu + +# --build-id option is used for giving info to the debug packages. +buildldflags="VL_LDFLAGS=-Wl,--build-id" + +%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle + +%if 0%{have_vxhs} + %global block_drivers_list %{block_drivers_list},vxhs +%endif +%if 0%{have_gluster} + %global block_drivers_list %{block_drivers_list},gluster +%endif + +./configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{qemudocdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-confsuffix=/"%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ +%if 0%{have_fdt} + --enable-fdt \ +%else + --disable-fdt \ + %endif +%if 0%{have_gluster} + --enable-glusterfs \ +%else + --disable-glusterfs \ +%endif + --enable-guest-agent \ +%ifnarch s390x + --enable-numa \ +%else + --disable-numa \ +%endif + --enable-rbd \ +%if 0%{have_librdma} + --enable-rdma \ +%else + --disable-rdma \ +%endif +%if 0%{have_seccomp} + --enable-seccomp \ +%else + --disable-seccomp \ +%endif +%if 0%{have_spice} + --enable-spice \ + --enable-smartcard \ +%else + --disable-spice \ + --disable-smartcard \ +%endif +%if 0%{have_opengl} + --enable-opengl \ +%else + --disable-opengl \ +%endif +%if 0%{have_usbredir} + --enable-usb-redir \ +%else + --disable-usb-redir \ +%endif +%if 0%{have_tcmalloc} + --enable-tcmalloc \ +%else + --disable-tcmalloc \ +%endif +%if 0%{have_vxhs} + --enable-vxhs \ +%else + --disable-vxhs \ +%endif +%if 0%{have_vhost_user} + --enable-vhost-user \ +%else + --disable-vhost-user \ +%endif + --python=%{__python3} \ + --target-list="%{buildarch}" \ + --block-drv-rw-whitelist=%{block_drivers_list} \ + --audio-drv-list= \ + --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ + --with-coroutine=ucontext \ + --tls-priority=NORMAL \ + --disable-bluez \ + --disable-brlapi \ + --disable-cap-ng \ + --enable-coroutine-pool \ + --enable-curl \ + --disable-curses \ + --disable-debug-tcg \ + --enable-docs \ + --disable-gtk \ + --enable-kvm \ + --enable-libiscsi \ + --disable-libnfs \ + --enable-libssh2 \ + --enable-libusb \ + --disable-bzip2 \ + --enable-linux-aio \ + --disable-live-block-migration \ + --enable-lzo \ + --enable-pie \ + --disable-qom-cast-debug \ + --disable-sdl \ + --enable-snappy \ + --disable-sparse \ + --disable-strip \ + --disable-tpm \ + --enable-trace-backend=dtrace \ + --disable-vde \ + --disable-vhost-scsi \ + --disable-virtfs \ + --disable-vnc-jpeg \ + --disable-vte \ + --enable-vnc-png \ + --enable-vnc-sasl \ + --enable-werror \ + --disable-xen \ + --disable-xfsctl \ + --enable-gnutls \ + --enable-gcrypt \ + --disable-nettle \ + --enable-attr \ + --disable-bsd-user \ + --disable-cocoa \ + --enable-debug-info \ + --disable-guest-agent-msi \ + --disable-hax \ + --disable-jemalloc \ + --disable-linux-user \ + --enable-modules \ + --disable-netmap \ + --disable-replication \ + --enable-system \ + --enable-tools \ + --disable-user \ + --enable-vhost-net \ + --enable-vhost-vsock \ + --enable-vnc \ + --enable-mpath \ + --disable-virglrenderer \ + --disable-xen-pci-passthrough \ + --enable-tcg \ + --with-git=git \ + --disable-sanitizers \ + --disable-hvf \ + --disable-whpx \ + --enable-malloc-trim \ + --disable-membarrier \ + --disable-vhost-crypto \ + --disable-libxml2 \ + --enable-capstone \ + --disable-git-update \ + --disable-crypto-afalg \ + --disable-debug-mutex + + +echo "config-host.mak contents:" +echo "===" +cat config-host.mak +echo "===" + +make V=1 %{?_smp_mflags} $buildldflags + +# Setup back compat qemu-kvm binary +%{__python3} scripts/tracetool.py --backend dtrace --format stap --group=all \ + --binary %{_libexecdir}/qemu-kvm --target-name %{kvm_target} \ + --target-type system --probe-prefix \ + qemu.kvm trace-events-all > qemu-kvm.stp + +%{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --target-name %{kvm_target} \ + --target-type system --probe-prefix \ + qemu.kvm trace-events-all > qemu-kvm-simpletrace.stp + +cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm + +gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl + +%install +%define _udevdir %(pkg-config --variable=udevdir udev)/rules.d + +install -D -p -m 0644 %{SOURCE4} $RPM_BUILD_ROOT%{_unitdir}/ksm.service +install -D -p -m 0644 %{SOURCE5} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ksm +install -D -p -m 0755 ksmctl $RPM_BUILD_ROOT%{_libexecdir}/ksmctl + +install -D -p -m 0644 %{SOURCE7} $RPM_BUILD_ROOT%{_unitdir}/ksmtuned.service +install -D -p -m 0755 %{SOURCE8} $RPM_BUILD_ROOT%{_sbindir}/ksmtuned +install -D -p -m 0644 %{SOURCE9} $RPM_BUILD_ROOT%{_sysconfdir}/ksmtuned.conf +install -D -p -m 0644 %{SOURCE26} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/vhost.conf +%ifarch s390x + install -D -p -m 0644 %{SOURCE30} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else +%ifarch %{ix86} x86_64 + install -D -p -m 0644 %{SOURCE31} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else + install -D -p -m 0644 %{SOURCE27} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%endif +%endif + +mkdir -p $RPM_BUILD_ROOT%{_bindir}/ +mkdir -p $RPM_BUILD_ROOT%{_udevdir} + +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} +install -m 0644 scripts/dump-guest-memory.py \ + $RPM_BUILD_ROOT%{_datadir}/%{name} + +make DESTDIR=$RPM_BUILD_ROOT \ + sharedir="%{_datadir}/%{name}" \ + datadir="%{_datadir}/%{name}" \ + install + +mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset + +# Install qemu-guest-agent service and udev rules +install -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga +install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevdir} + +# - the fsfreeze hook script: +install -D --preserve-timestamps \ + scripts/qemu-guest-agent/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook + +# - the directory for user scripts: +mkdir $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook.d + +# - and the fsfreeze script samples: +mkdir --parents $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ +install --preserve-timestamps --mode=0644 \ + scripts/qemu-guest-agent/fsfreeze-hook.d/*.sample \ + $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ + +# - Install dedicated log directory: +mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ + +mkdir -p $RPM_BUILD_ROOT%{_bindir} +install -c -m 0755 qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga + +mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 +install -m 0644 qemu-ga.8 ${RPM_BUILD_ROOT}%{_mandir}/man8/ + + +install -m 0755 qemu-kvm $RPM_BUILD_ROOT%{_libexecdir}/ +install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ + +rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp + +# Install simpletrace +install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +# Avoid ambiguous 'python' interpreter name +sed -i -e '1 s/python/python3/' $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend scripts/tracetool/backend/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py + +mkdir -p $RPM_BUILD_ROOT%{qemudocdir} +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} Changelog README README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* + +install -D -p -m 0644 qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf + +# Provided by package openbios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc32 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc64 +# Provided by package SLOF +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/slof.bin + +# Remove unpackaged files. +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/palcode-clipper +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/petalogix*.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bamboo.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/ppc_rom.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-zipl.rom +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot.e500 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu_vga.ndrv +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/canyonlands.dtb +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin + +%ifarch s390x + # Use the s390-ccw.img that we've just built, not the pre-built one + install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ +%else + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img +%endif + +%ifnarch %{power64} + rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/spapr-rtas.bin +%endif + +%ifnarch x86_64 + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot.bin +%endif + +# Remove sparc files +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,tcx.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin + +# Remove ivshmem example programs +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server + +# Remove efi roms +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom + +# Provided by package ipxe +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pxe*rom +# Provided by package vgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/vgabios*bin +# Provided by package seabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bios*.bin +# Provided by package sgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/sgabios.bin + +# the pxe gpxe images will be symlinks to the images on +# /usr/share/ipxe, as QEMU doesn't know how to look +# for other paths, yet. +pxe_link() { + ln -s ../ipxe.efi/$2.rom %{buildroot}%{_datadir}/%{name}/efi-$1.rom +} + +%ifnarch aarch64 s390x +pxe_link e1000 8086100e +pxe_link ne2k_pci 10ec8029 +pxe_link pcnet 10222000 +pxe_link rtl8139 10ec8139 +pxe_link virtio 1af41000 +pxe_link e1000e 808610d3 +%endif + +rom_link() { + ln -s $1 %{buildroot}%{_datadir}/%{name}/$2 +} + +%ifnarch aarch64 s390x + rom_link ../seavgabios/vgabios-isavga.bin vgabios.bin + rom_link ../seavgabios/vgabios-cirrus.bin vgabios-cirrus.bin + rom_link ../seavgabios/vgabios-qxl.bin vgabios-qxl.bin + rom_link ../seavgabios/vgabios-stdvga.bin vgabios-stdvga.bin + rom_link ../seavgabios/vgabios-vmware.bin vgabios-vmware.bin + rom_link ../seavgabios/vgabios-virtio.bin vgabios-virtio.bin +%endif +%ifarch x86_64 + rom_link ../seabios/bios.bin bios.bin + rom_link ../seabios/bios-256k.bin bios-256k.bin + rom_link ../sgabios/sgabios.bin sgabios.bin +%endif + +%if 0%{have_kvm_setup} + install -D -p -m 755 %{SOURCE21} $RPM_BUILD_ROOT%{_prefix}/lib/systemd/kvm-setup + install -D -p -m 644 %{SOURCE22} $RPM_BUILD_ROOT%{_unitdir}/kvm-setup.service + install -D -p -m 644 %{SOURCE23} $RPM_BUILD_ROOT%{_presetdir}/85-kvm.preset +%endif + +%if 0%{have_memlock_limits} + install -D -p -m 644 %{SOURCE28} $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif + +# Install rules to use the bridge helper with libvirt's virbr0 +install -D -m 0644 %{SOURCE12} $RPM_BUILD_ROOT%{_sysconfdir}/%{name}/bridge.conf + +# Install qemu-pr-helper service +install -m 0644 %{_sourcedir}/qemu-pr-helper.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} + +%if 0 +make %{?_smp_mflags} $buildldflags DESTDIR=$RPM_BUILD_ROOT install-libcacard + +find $RPM_BUILD_ROOT -name "libcacard.so*" -exec chmod +x \{\} \; +%endif + +find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f + +# We need to make the block device modules executable else +# RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so + +%check +export DIFF=diff; make check V=1 + +%post -n qemu-kvm-core +# load kvm modules now, so we can make sure no reboot is needed. +# If there's already a kvm module installed, we don't mess with it +%udev_rules_update +sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : + udevadm trigger --subsystem-match=misc --sysname-match=kvm --action=add || : +%if %{have_kvm_setup} + systemctl daemon-reload # Make sure it sees the new presets and unitfile + %systemd_post kvm-setup.service + if systemctl is-enabled kvm-setup.service > /dev/null; then + systemctl start kvm-setup.service + fi +%endif + +%post -n qemu-kvm-common +%systemd_post ksm.service +%systemd_post ksmtuned.service + +getent group kvm >/dev/null || groupadd -g 36 -r kvm +getent group qemu >/dev/null || groupadd -g 107 -r qemu +getent passwd qemu >/dev/null || \ +useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ + -c "qemu user" qemu + +%preun -n qemu-kvm-common +%systemd_preun ksm.service +%systemd_preun ksmtuned.service + +%postun -n qemu-kvm-common +%systemd_postun_with_restart ksm.service +%systemd_postun_with_restart ksmtuned.service + +%global qemu_kvm_files \ +%{_libexecdir}/qemu-kvm \ +%{_datadir}/systemtap/tapset/qemu-kvm.stp \ +%{_datadir}/%{name}/trace-events-all \ +%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp \ +%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp \ +%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf + +%files +# Deliberately empty + + +%files -n qemu-kvm-common +%defattr(-,root,root) +%dir %{qemudocdir} +%doc %{qemudocdir}/Changelog +%doc %{qemudocdir}/README +%doc %{qemudocdir}/qemu-doc.html +%doc %{qemudocdir}/COPYING +%doc %{qemudocdir}/COPYING.LIB +%doc %{qemudocdir}/LICENSE +%doc %{qemudocdir}/README.systemtap +%doc %{qemudocdir}/qmp-spec.txt +%doc %{qemudocdir}/qemu-doc.txt +%doc %{qemudocdir}/qemu-ga-ref.html +%doc %{qemudocdir}/qemu-ga-ref.txt +%doc %{qemudocdir}/qemu-qmp-ref.html +%doc %{qemudocdir}/qemu-qmp-ref.txt +%{_mandir}/man7/qemu-qmp-ref.7* +%{_bindir}/qemu-keymap +%{_bindir}/qemu-pr-helper +%{_unitdir}/qemu-pr-helper.service +%{_unitdir}/qemu-pr-helper.socket +%{_mandir}/man7/qemu-ga-ref.7* + +%dir %{_datadir}/%{name}/ +%{_datadir}/%{name}/keymaps/ +%{_mandir}/man1/%{name}.1* +%{_mandir}/man7/qemu-block-drivers.7* +%attr(4755, -, -) %{_libexecdir}/qemu-bridge-helper +%config(noreplace) %{_sysconfdir}/sasl2/%{name}.conf +%{_unitdir}/ksm.service +%{_libexecdir}/ksmctl +%config(noreplace) %{_sysconfdir}/sysconfig/ksm +%{_unitdir}/ksmtuned.service +%{_sbindir}/ksmtuned +%config(noreplace) %{_sysconfdir}/ksmtuned.conf +%dir %{_sysconfdir}/%{name} +%config(noreplace) %{_sysconfdir}/%{name}/bridge.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/vhost.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/kvm.conf +%{_datadir}/%{name}/simpletrace.py* +%{_datadir}/%{name}/tracetool/*.py* +%{_datadir}/%{name}/tracetool/backend/*.py* +%{_datadir}/%{name}/tracetool/format/*.py* + +%files -n qemu-kvm-core +%defattr(-,root,root) +%ifarch x86_64 + %{_datadir}/%{name}/bios.bin + %{_datadir}/%{name}/bios-256k.bin + %{_datadir}/%{name}/linuxboot.bin + %{_datadir}/%{name}/multiboot.bin + %{_datadir}/%{name}/kvmvapic.bin + %{_datadir}/%{name}/sgabios.bin +%endif +%ifarch s390x + %{_datadir}/%{name}/s390-ccw.img + %{_datadir}/%{name}/s390-netboot.img +%endif +%ifnarch aarch64 s390x + %{_datadir}/%{name}/vgabios.bin + %{_datadir}/%{name}/vgabios-cirrus.bin + %{_datadir}/%{name}/vgabios-qxl.bin + %{_datadir}/%{name}/vgabios-stdvga.bin + %{_datadir}/%{name}/vgabios-vmware.bin + %{_datadir}/%{name}/vgabios-virtio.bin + %{_datadir}/%{name}/efi-e1000.rom + %{_datadir}/%{name}/efi-e1000e.rom + %{_datadir}/%{name}/efi-virtio.rom + %{_datadir}/%{name}/efi-pcnet.rom + %{_datadir}/%{name}/efi-rtl8139.rom + %{_datadir}/%{name}/efi-ne2k_pci.rom +%endif +%{_datadir}/%{name}/qemu-icon.bmp +%{_datadir}/%{name}/qemu_logo_no_text.svg +%{_datadir}/%{name}/linuxboot_dma.bin +%{_datadir}/%{name}/dump-guest-memory.py* +%ifarch %{power64} + %{_datadir}/%{name}/spapr-rtas.bin +%endif +%{?qemu_kvm_files:} +%if 0%{have_kvm_setup} + %{_prefix}/lib/systemd/kvm-setup + %{_unitdir}/kvm-setup.service + %{_presetdir}/85-kvm.preset +%endif +%if 0%{have_memlock_limits} + %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif + +%files -n qemu-img +%defattr(-,root,root) +%{_bindir}/qemu-img +%{_bindir}/qemu-io +%{_bindir}/qemu-nbd +%{_mandir}/man1/qemu-img.1* +%{_mandir}/man8/qemu-nbd.8* + +%files -n qemu-guest-agent +%defattr(-,root,root,-) +%doc COPYING README +%{_bindir}/qemu-ga +%{_mandir}/man8/qemu-ga.8* +%{_unitdir}/qemu-guest-agent.service +%{_udevdir}/99-qemu-guest-agent.rules +%config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga +%{_sysconfdir}/qemu-ga +%{_datadir}/%{name}/qemu-ga +%dir %{_localstatedir}/log/qemu-ga + +%files block-curl +%{_libdir}/qemu-kvm/block-curl.so + +%files block-iscsi +%{_libdir}/qemu-kvm/block-iscsi.so + +%files block-rbd +%{_libdir}/qemu-kvm/block-rbd.so + +%files block-ssh +%{_libdir}/qemu-kvm/block-ssh.so + + +%changelog +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 3.0.0-1.el8 +- Rebase on qemu-kvm 3.0.0 + +* Fri Oct 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-33.el8 +- kvm-migration-postcopy-Clear-have_listen_thread.patch [bz#1608765] +- kvm-migration-cleanup-in-error-paths-in-loadvm.patch [bz#1608765] +- kvm-jobs-change-start-callback-to-run-callback.patch [bz#1632939] +- kvm-jobs-canonize-Error-object.patch [bz#1632939] +- kvm-jobs-add-exit-shim.patch [bz#1632939] +- kvm-block-commit-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-mirror-utilize-job_exit-shim.patch [bz#1632939] +- kvm-jobs-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-backup-make-function-variables-consistently-na.patch [bz#1632939] +- kvm-jobs-remove-ret-argument-to-job_completed-privatize-.patch [bz#1632939] +- kvm-jobs-remove-job_defer_to_main_loop.patch [bz#1632939] +- kvm-block-commit-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-mirror-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-stream-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-commit-refactor-commit-to-use-job-callbacks.patch [bz#1632939] +- kvm-block-mirror-don-t-install-backing-chain-on-abort.patch [bz#1632939] +- kvm-block-mirror-conservative-mirror_exit-refactor.patch [bz#1632939] +- kvm-block-stream-refactor-stream-to-use-job-callbacks.patch [bz#1632939] +- kvm-tests-blockjob-replace-Blockjob-with-Job.patch [bz#1632939] +- kvm-tests-test-blockjob-remove-exit-callback.patch [bz#1632939] +- kvm-tests-test-blockjob-txn-move-.exit-to-.clean.patch [bz#1632939] +- kvm-jobs-remove-.exit-callback.patch [bz#1632939] +- kvm-qapi-block-commit-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-mirror-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-stream-expose-new-job-properties.patch [bz#1632939] +- kvm-block-backup-qapi-documentation-fixup.patch [bz#1632939] +- kvm-blockdev-document-transactional-shortcomings.patch [bz#1632939] +- Resolves: bz#1608765 + (After postcopy migration, do savevm and loadvm, guest hang and call trace) +- Resolves: bz#1632939 + (qemu blockjobs other than backup do not support job-finalize or job-dismiss) + +* Fri Sep 28 2018 Danilo Cesar Lemes de Paula - 2.12.0-32.el8 +- kvm-Re-enable-disabled-Hyper-V-enlightenments.patch [bz#1625185] +- kvm-Fix-annocheck-issues.patch [bz#1624164] +- kvm-exec-check-that-alignment-is-a-power-of-two.patch [bz#1630746] +- kvm-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch [bz#1575925] +- Resolves: bz#1575925 + ("SSL: no alternative certificate subject name matches target host name" error even though sslverify = off) +- Resolves: bz#1624164 + (Review annocheck distro flag failures in qemu-kvm) +- Resolves: bz#1625185 + (Re-enable disabled Hyper-V enlightenments) +- Resolves: bz#1630746 + (qemu_ram_mmap: Assertion `is_power_of_2(align)' failed) + +* Tue Sep 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-31.el8 +- kvm-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch [bz#1619804] +- kvm-redhat-enable-opengl-add-build-and-runtime-deps.patch [bz#1618412] +- Resolves: bz#1618412 + (Enable opengl (for intel vgpu display)) +- Resolves: bz#1619804 + (kernel panic in init_amd_cacheinfo) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-30.el8 +- kvm-redhat-Disable-vhost-crypto.patch [bz#1625668] +- Resolves: bz#1625668 + (Decide if we should disable 'vhost-crypto' or not) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-29.el8 +- kvm-target-i386-sev-fix-memory-leaks.patch [bz#1615717] +- kvm-i386-Fix-arch_query_cpu_model_expansion-leak.patch [bz#1615717] +- kvm-redhat-Update-build-configuration.patch [bz#1573156] +- Resolves: bz#1573156 + (Update build configure for QEMU 2.12.0) +- Resolves: bz#1615717 + (Memory leaks) + +* Wed Aug 29 2018 Danilo Cesar Lemes de Paula - 2.12.0-27.el8 +- kvm-Fix-libusb-1.0.22-deprecated-libusb_set_debug-with-l.patch [bz#1622656] +- Resolves: bz#1622656 + (qemu-kvm fails to build due to libusb_set_debug being deprecated) + +* Fri Aug 17 2018 Danilo Cesar Lemes de Paula - 2.12.0-26.el8 +- kvm-redhat-remove-extra-in-rhel_rhev_conflicts-macro.patch [bz#1618752] +- Resolves: bz#1618752 + (qemu-kvm can't be installed in RHEL-8 as it Conflicts with itself.) + +* Thu Aug 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-25.el8 +- kvm-Migration-TLS-Fix-crash-due-to-double-cleanup.patch [bz#1594384] +- Resolves: bz#1594384 + (2.12 migration fixes) + +* Tue Aug 14 2018 Danilo Cesar Lemes de Paula - 2.12.0-24.el8 +- kvm-Add-qemu-keymap-to-qemu-kvm-common.patch [bz#1593117] +- Resolves: bz#1593117 + (add qemu-keymap utility) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-23.el8 +- Fixing an issue with some old command in the spec file + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-22.el8 +- Fix an issue with the build_configure script. +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) + + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-20.el8 +- kvm-migration-stop-compressing-page-in-migration-thread.patch [bz#1594384] +- kvm-migration-stop-compression-to-allocate-and-free-memo.patch [bz#1594384] +- kvm-migration-stop-decompression-to-allocate-and-free-me.patch [bz#1594384] +- kvm-migration-detect-compression-and-decompression-error.patch [bz#1594384] +- kvm-migration-introduce-control_save_page.patch [bz#1594384] +- kvm-migration-move-some-code-to-ram_save_host_page.patch [bz#1594384] +- kvm-migration-move-calling-control_save_page-to-the-comm.patch [bz#1594384] +- kvm-migration-move-calling-save_zero_page-to-the-common-.patch [bz#1594384] +- kvm-migration-introduce-save_normal_page.patch [bz#1594384] +- kvm-migration-remove-ram_save_compressed_page.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-memory-leak-in-dirt.patch [bz#1594384] +- kvm-migration-fix-saving-normal-page-even-if-it-s-been-c.patch [bz#1594384] +- kvm-migration-update-index-field-when-delete-or-qsort-RD.patch [bz#1594384] +- kvm-migration-introduce-decompress-error-check.patch [bz#1594384] +- kvm-migration-Don-t-activate-block-devices-if-using-S.patch [bz#1594384] +- kvm-migration-not-wait-RDMA_CM_EVENT_DISCONNECTED-event-.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-dirty_bitmap_load.patch [bz#1594384] +- kvm-s390x-add-RHEL-7.6-machine-type-for-ccw.patch [bz#1595718] +- kvm-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch [bz#1595718] +- kvm-linux-headers-asm-s390-kvm.h-header-sync.patch [bz#1612938] +- kvm-s390x-kvm-add-etoken-facility.patch [bz#1612938] +- Resolves: bz#1594384 + (2.12 migration fixes) +- Resolves: bz#1595718 + (Add ppa15/bpb to the default cpu model for z196 and higher in the 7.6 s390-ccw-virtio machine) +- Resolves: bz#1612938 + (Add etoken support to qemu-kvm for s390x KVM guests) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-18.el8 + Mass import from RHEL 7.6 qemu-kvm-rhev, including fixes to the following BZs: + +- kvm-AArch64-Add-virt-rhel7.6-machine-type.patch [bz#1558723] +- kvm-cpus-Fix-event-order-on-resume-of-stopped-guest.patch [bz#1566153] +- kvm-qemu-img-Check-post-truncation-size.patch [bz#1523065] +- kvm-vga-catch-depth-0.patch [bz#1575541] +- kvm-Fix-x-hv-max-vps-compat-value-for-7.4-machine-type.patch [bz#1583959] +- kvm-ccid-card-passthru-fix-regression-in-realize.patch [bz#1584984] +- kvm-Use-4-MB-vram-for-cirrus.patch [bz#1542080] +- kvm-spapr_pci-Remove-unhelpful-pagesize-warning.patch [bz#1505664] +- kvm-rpm-Add-nvme-VFIO-driver-to-rw-whitelist.patch [bz#1416180] +- kvm-qobject-Use-qobject_to-instead-of-type-cast.patch [bz#1557995] +- kvm-qobject-Ensure-base-is-at-offset-0.patch [bz#1557995] +- kvm-qobject-use-a-QObjectBase_-struct.patch [bz#1557995] +- kvm-qobject-Replace-qobject_incref-QINCREF-qobject_decre.patch [bz#1557995] +- kvm-qobject-Modify-qobject_ref-to-return-obj.patch [bz#1557995] +- kvm-rbd-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-iscsi-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-block-Add-block-specific-QDict-header.patch [bz#1557995] +- kvm-qobject-Move-block-specific-qdict-code-to-block-qdic.patch [bz#1557995] +- kvm-block-Fix-blockdev-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Fix-drive-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Clean-up-a-misuse-of-qobject_to-in-.bdrv_co_cr.patch [bz#1557995] +- kvm-block-Factor-out-qobject_input_visitor_new_flat_conf.patch [bz#1557995] +- kvm-block-Make-remaining-uses-of-qobject-input-visitor-m.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_flatten_qdict.patch [bz#1557995] +- kvm-block-qdict-Tweak-qdict_flatten_qdict-qdict_flatten_.patch [bz#1557995] +- kvm-block-qdict-Clean-up-qdict_crumple-a-bit.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_is_list-some.patch [bz#1557995] +- kvm-check-block-qdict-Rename-qdict_flatten-s-variables-f.patch [bz#1557995] +- kvm-check-block-qdict-Cover-flattening-of-empty-lists-an.patch [bz#1557995] +- kvm-block-Fix-blockdev-blockdev-add-for-empty-objects-an.patch [bz#1557995] +- kvm-rbd-New-parameter-auth-client-required.patch [bz#1557995] +- kvm-rbd-New-parameter-key-secret.patch [bz#1557995] +- kvm-block-mirror-honor-ratelimit-again.patch [bz#1572856] +- kvm-block-mirror-Make-cancel-always-cancel-pre-READY.patch [bz#1572856] +- kvm-iotests-Add-test-for-cancelling-a-mirror-job.patch [bz#1572856] +- kvm-iotests-Split-214-off-of-122.patch [bz#1518738] +- kvm-block-Add-COR-filter-driver.patch [bz#1518738] +- kvm-block-BLK_PERM_WRITE-includes-._UNCHANGED.patch [bz#1518738] +- kvm-block-Add-BDRV_REQ_WRITE_UNCHANGED-flag.patch [bz#1518738] +- kvm-block-Set-BDRV_REQ_WRITE_UNCHANGED-for-COR-writes.patch [bz#1518738] +- kvm-block-quorum-Support-BDRV_REQ_WRITE_UNCHANGED.patch [bz#1518738] +- kvm-block-Support-BDRV_REQ_WRITE_UNCHANGED-in-filters.patch [bz#1518738] +- kvm-iotests-Clean-up-wrap-image-in-197.patch [bz#1518738] +- kvm-iotests-Copy-197-for-COR-filter-driver.patch [bz#1518738] +- kvm-iotests-Add-test-for-COR-across-nodes.patch [bz#1518738] +- kvm-qemu-io-Use-purely-string-blockdev-options.patch [bz#1576598] +- kvm-qemu-img-Use-only-string-options-in-img_open_opts.patch [bz#1576598] +- kvm-iotests-Add-test-for-U-force-share-conflicts.patch [bz#1576598] +- kvm-qemu-io-Drop-command-functions-return-values.patch [bz#1519617] +- kvm-qemu-io-Let-command-functions-return-error-code.patch [bz#1519617] +- kvm-qemu-io-Exit-with-error-when-a-command-failed.patch [bz#1519617] +- kvm-iotests.py-Add-qemu_io_silent.patch [bz#1519617] +- kvm-iotests-Let-216-make-use-of-qemu-io-s-exit-code.patch [bz#1519617] +- kvm-qcow2-Repair-OFLAG_COPIED-when-fixing-leaks.patch [bz#1527085] +- kvm-iotests-Repairing-error-during-snapshot-deletion.patch [bz#1527085] +- kvm-block-Make-bdrv_is_writable-public.patch [bz#1588039] +- kvm-qcow2-Do-not-mark-inactive-images-corrupt.patch [bz#1588039] +- kvm-iotests-Add-case-for-a-corrupted-inactive-image.patch [bz#1588039] +- kvm-main-loop-drop-spin_counter.patch [bz#1168213] +- kvm-target-ppc-Factor-out-the-parsing-in-kvmppc_get_cpu_.patch [bz#1560847] +- kvm-target-ppc-Don-t-require-private-l1d-cache-on-POWER8.patch [bz#1560847] +- kvm-ppc-spapr_caps-Don-t-disable-cap_cfpc-on-POWER8-by-d.patch [bz#1560847] +- kvm-qxl-fix-local-renderer-crash.patch [bz#1567733] +- kvm-qemu-img-Amendment-support-implies-create_opts.patch [bz#1537956] +- kvm-block-Add-Error-parameter-to-bdrv_amend_options.patch [bz#1537956] +- kvm-qemu-option-Pull-out-Supported-options-print.patch [bz#1537956] +- kvm-qemu-img-Add-print_amend_option_help.patch [bz#1537956] +- kvm-qemu-img-Recognize-no-creation-support-in-o-help.patch [bz#1537956] +- kvm-iotests-Test-help-option-for-unsupporting-formats.patch [bz#1537956] +- kvm-iotests-Rework-113.patch [bz#1537956] +- kvm-qemu-img-Resolve-relative-backing-paths-in-rebase.patch [bz#1569835] +- kvm-iotests-Add-test-for-rebasing-with-relative-paths.patch [bz#1569835] +- kvm-qemu-img-Special-post-backing-convert-handling.patch [bz#1527898] +- kvm-iotests-Test-post-backing-convert-target-behavior.patch [bz#1527898] +- kvm-migration-calculate-expected_downtime-with-ram_bytes.patch [bz#1564576] +- kvm-sheepdog-Fix-sd_co_create_opts-memory-leaks.patch [bz#1513543] +- kvm-qemu-iotests-reduce-chance-of-races-in-185.patch [bz#1513543] +- kvm-blockjob-do-not-cancel-timer-in-resume.patch [bz#1513543] +- kvm-nfs-Fix-error-path-in-nfs_options_qdict_to_qapi.patch [bz#1513543] +- kvm-nfs-Remove-processed-options-from-QDict.patch [bz#1513543] +- kvm-blockjob-drop-block_job_pause-resume_all.patch [bz#1513543] +- kvm-blockjob-expose-error-string-via-query.patch [bz#1513543] +- kvm-blockjob-Fix-assertion-in-block_job_finalize.patch [bz#1513543] +- kvm-blockjob-Wrappers-for-progress-counter-access.patch [bz#1513543] +- kvm-blockjob-Move-RateLimit-to-BlockJob.patch [bz#1513543] +- kvm-blockjob-Implement-block_job_set_speed-centrally.patch [bz#1513543] +- kvm-blockjob-Introduce-block_job_ratelimit_get_delay.patch [bz#1513543] +- kvm-blockjob-Add-block_job_driver.patch [bz#1513543] +- kvm-blockjob-Update-block-job-pause-resume-documentation.patch [bz#1513543] +- kvm-blockjob-Improve-BlockJobInfo.offset-len-documentati.patch [bz#1513543] +- kvm-job-Create-Job-JobDriver-and-job_create.patch [bz#1513543] +- kvm-job-Rename-BlockJobType-into-JobType.patch [bz#1513543] +- kvm-job-Add-JobDriver.job_type.patch [bz#1513543] +- kvm-job-Add-job_delete.patch [bz#1513543] +- kvm-job-Maintain-a-list-of-all-jobs.patch [bz#1513543] +- kvm-job-Move-state-transitions-to-Job.patch [bz#1513543] +- kvm-job-Add-reference-counting.patch [bz#1513543] +- kvm-job-Move-cancelled-to-Job.patch [bz#1513543] +- kvm-job-Add-Job.aio_context.patch [bz#1513543] +- kvm-job-Move-defer_to_main_loop-to-Job.patch [bz#1513543] +- kvm-job-Move-coroutine-and-related-code-to-Job.patch [bz#1513543] +- kvm-job-Add-job_sleep_ns.patch [bz#1513543] +- kvm-job-Move-pause-resume-functions-to-Job.patch [bz#1513543] +- kvm-job-Replace-BlockJob.completed-with-job_is_completed.patch [bz#1513543] +- kvm-job-Move-BlockJobCreateFlags-to-Job.patch [bz#1513543] +- kvm-blockjob-Split-block_job_event_pending.patch [bz#1513543] +- kvm-job-Add-job_event_.patch [bz#1513543] +- kvm-job-Move-single-job-finalisation-to-Job.patch [bz#1513543] +- kvm-job-Convert-block_job_cancel_async-to-Job.patch [bz#1513543] +- kvm-job-Add-job_drain.patch [bz#1513543] +- kvm-job-Move-.complete-callback-to-Job.patch [bz#1513543] +- kvm-job-Move-job_finish_sync-to-Job.patch [bz#1513543] +- kvm-job-Switch-transactions-to-JobTxn.patch [bz#1513543] +- kvm-job-Move-transactions-to-Job.patch [bz#1513543] +- kvm-job-Move-completion-and-cancellation-to-Job.patch [bz#1513543] +- kvm-block-Cancel-job-in-bdrv_close_all-callers.patch [bz#1513543] +- kvm-job-Add-job_yield.patch [bz#1513543] +- kvm-job-Add-job_dismiss.patch [bz#1513543] +- kvm-job-Add-job_is_ready.patch [bz#1513543] +- kvm-job-Add-job_transition_to_ready.patch [bz#1513543] +- kvm-job-Move-progress-fields-to-Job.patch [bz#1513543] +- kvm-job-Introduce-qapi-job.json.patch [bz#1513543] +- kvm-job-Add-JOB_STATUS_CHANGE-QMP-event.patch [bz#1513543] +- kvm-job-Add-lifecycle-QMP-commands.patch [bz#1513543] +- kvm-job-Add-query-jobs-QMP-command.patch [bz#1513543] +- kvm-blockjob-Remove-BlockJob.driver.patch [bz#1513543] +- kvm-iotests-Move-qmp_to_opts-to-VM.patch [bz#1513543] +- kvm-qemu-iotests-Test-job-with-block-jobs.patch [bz#1513543] +- kvm-vdi-Fix-vdi_co_do_create-return-value.patch [bz#1513543] +- kvm-vhdx-Fix-vhdx_co_create-return-value.patch [bz#1513543] +- kvm-job-Add-error-message-for-failing-jobs.patch [bz#1513543] +- kvm-block-create-Make-x-blockdev-create-a-job.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.get_qmp_events_filtered.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.qmp_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-iotests.img_info_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.run_job.patch [bz#1513543] +- kvm-qemu-iotests-iotests.py-helper-for-non-file-protocol.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-206-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-207-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-210-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-211-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-212-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-213-for-blockdev-create-job.patch [bz#1513543] +- kvm-block-create-Mark-blockdev-create-stable.patch [bz#1513543] +- kvm-jobs-fix-stale-wording.patch [bz#1513543] +- kvm-jobs-fix-verb-references-in-docs.patch [bz#1513543] +- kvm-iotests-Fix-219-s-timing.patch [bz#1513543] +- kvm-iotests-improve-pause_job.patch [bz#1513543] +- kvm-rpm-Whitelist-copy-on-read-block-driver.patch [bz#1518738] +- kvm-rpm-add-throttle-driver-to-rw-whitelist.patch [bz#1591076] +- kvm-usb-host-skip-open-on-pending-postload-bh.patch [bz#1572851] +- kvm-i386-Define-the-Virt-SSBD-MSR-and-handling-of-it-CVE.patch [bz#1574216] +- kvm-i386-define-the-AMD-virt-ssbd-CPUID-feature-bit-CVE-.patch [bz#1574216] +- kvm-block-file-posix-Pass-FD-to-locking-helpers.patch [bz#1519144] +- kvm-block-file-posix-File-locking-during-creation.patch [bz#1519144] +- kvm-iotests-Add-creation-test-to-153.patch [bz#1519144] +- kvm-vhost-user-add-Net-prefix-to-internal-state-structur.patch [bz#1526645] +- kvm-virtio-support-setting-memory-region-based-host-noti.patch [bz#1526645] +- kvm-vhost-user-support-receiving-file-descriptors-in-sla.patch [bz#1526645] +- kvm-osdep-add-wait.h-compat-macros.patch [bz#1526645] +- kvm-vhost-user-bridge-support-host-notifier.patch [bz#1526645] +- kvm-vhost-allow-backends-to-filter-memory-sections.patch [bz#1526645] +- kvm-vhost-user-allow-slave-to-send-fds-via-slave-channel.patch [bz#1526645] +- kvm-vhost-user-introduce-shared-vhost-user-state.patch [bz#1526645] +- kvm-vhost-user-support-registering-external-host-notifie.patch [bz#1526645] +- kvm-libvhost-user-support-host-notifier.patch [bz#1526645] +- kvm-block-Introduce-API-for-copy-offloading.patch [bz#1482537] +- kvm-raw-Check-byte-range-uniformly.patch [bz#1482537] +- kvm-raw-Implement-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Implement-copy-offloading.patch [bz#1482537] +- kvm-file-posix-Implement-bdrv_co_copy_range.patch [bz#1482537] +- kvm-iscsi-Query-and-save-device-designator-when-opening.patch [bz#1482537] +- kvm-iscsi-Create-and-use-iscsi_co_wait_for_task.patch [bz#1482537] +- kvm-iscsi-Implement-copy-offloading.patch [bz#1482537] +- kvm-block-backend-Add-blk_co_copy_range.patch [bz#1482537] +- kvm-qemu-img-Convert-with-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Fix-src_offset-in-copy-offloading.patch [bz#1482537] +- kvm-iscsi-Don-t-blindly-use-designator-length-in-respons.patch [bz#1482537] +- kvm-file-posix-Fix-EINTR-handling.patch [bz#1482537] +- kvm-usb-storage-Add-rerror-werror-properties.patch [bz#1595180] +- kvm-numa-clarify-error-message-when-node-index-is-out-of.patch [bz#1578381] +- kvm-qemu-iotests-Update-026.out.nocache-reference-output.patch [bz#1528541] +- kvm-qcow2-Free-allocated-clusters-on-write-error.patch [bz#1528541] +- kvm-qemu-iotests-Test-qcow2-not-leaking-clusters-on-writ.patch [bz#1528541] +- kvm-qemu-options-Add-missing-newline-to-accel-help-text.patch [bz#1586313] +- kvm-xhci-fix-guest-triggerable-assert.patch [bz#1594135] +- kvm-virtio-gpu-tweak-scanout-disable.patch [bz#1589634] +- kvm-virtio-gpu-update-old-resource-too.patch [bz#1589634] +- kvm-virtio-gpu-disable-scanout-when-backing-resource-is-.patch [bz#1589634] +- kvm-block-Don-t-silently-truncate-node-names.patch [bz#1549654] +- kvm-pr-helper-fix-socket-path-default-in-help.patch [bz#1533158] +- kvm-pr-helper-fix-assertion-failure-on-failed-multipath-.patch [bz#1533158] +- kvm-pr-manager-helper-avoid-SIGSEGV-when-writing-to-the-.patch [bz#1533158] +- kvm-pr-manager-put-stubs-in-.c-file.patch [bz#1533158] +- kvm-pr-manager-add-query-pr-managers-QMP-command.patch [bz#1533158] +- kvm-pr-manager-helper-report-event-on-connection-disconn.patch [bz#1533158] +- kvm-pr-helper-avoid-error-on-PR-IN-command-with-zero-req.patch [bz#1533158] +- kvm-pr-helper-Rework-socket-path-handling.patch [bz#1533158] +- kvm-pr-manager-helper-fix-memory-leak-on-event.patch [bz#1533158] +- kvm-object-fix-OBJ_PROP_LINK_UNREF_ON_RELEASE-ambivalenc.patch [bz#1556678] +- kvm-usb-hcd-xhci-test-add-a-test-for-ccid-hotplug.patch [bz#1556678] +- kvm-Revert-usb-release-the-created-buses.patch [bz#1556678] +- kvm-file-posix-Fix-creation-locking.patch [bz#1599335] +- kvm-file-posix-Unlock-FD-after-creation.patch [bz#1599335] +- kvm-ahci-trim-signatures-on-raise-lower.patch [bz#1584914] +- kvm-ahci-fix-PxCI-register-race.patch [bz#1584914] +- kvm-ahci-don-t-schedule-unnecessary-BH.patch [bz#1584914] +- kvm-qcow2-Fix-qcow2_truncate-error-return-value.patch [bz#1595173] +- kvm-block-Convert-.bdrv_truncate-callback-to-coroutine_f.patch [bz#1595173] +- kvm-qcow2-Remove-coroutine-trampoline-for-preallocate_co.patch [bz#1595173] +- kvm-block-Move-bdrv_truncate-implementation-to-io.c.patch [bz#1595173] +- kvm-block-Use-tracked-request-for-truncate.patch [bz#1595173] +- kvm-file-posix-Make-.bdrv_co_truncate-asynchronous.patch [bz#1595173] +- kvm-block-Fix-copy-on-read-crash-with-partial-final-clus.patch [bz#1590640] +- kvm-block-fix-QEMU-crash-with-scsi-hd-and-drive_del.patch [bz#1599515] +- kvm-virtio-rng-process-pending-requests-on-DRIVER_OK.patch [bz#1576743] +- kvm-file-posix-specify-expected-filetypes.patch [bz#1525829] +- kvm-iotests-add-test-226-for-file-driver-types.patch [bz#1525829] +- kvm-block-dirty-bitmap-add-lock-to-bdrv_enable-disable_d.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-enable-disable.patch [bz#1207657] +- kvm-qmp-transaction-support-for-x-block-dirty-bitmap-ena.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-merge.patch [bz#1207657] +- kvm-qapi-add-disabled-parameter-to-block-dirty-bitmap-ad.patch [bz#1207657] +- kvm-block-dirty-bitmap-add-bdrv_enable_dirty_bitmap_lock.patch [bz#1207657] +- kvm-dirty-bitmap-fix-double-lock-on-bitmap-enabling.patch [bz#1207657] +- kvm-block-qcow2-bitmap-fix-free_bitmap_clusters.patch [bz#1207657] +- kvm-qcow2-add-overlap-check-for-bitmap-directory.patch [bz#1207657] +- kvm-blockdev-enable-non-root-nodes-for-backup-source.patch [bz#1207657] +- kvm-iotests-add-222-to-test-basic-fleecing.patch [bz#1207657] +- kvm-qcow2-Remove-dead-check-on-ret.patch [bz#1207657] +- kvm-block-Move-request-tracking-to-children-in-copy-offl.patch [bz#1207657] +- kvm-block-Fix-parameter-checking-in-bdrv_co_copy_range_i.patch [bz#1207657] +- kvm-block-Honour-BDRV_REQ_NO_SERIALISING-in-copy-range.patch [bz#1207657] +- kvm-backup-Use-copy-offloading.patch [bz#1207657] +- kvm-block-backup-disable-copy-offloading-for-backup.patch [bz#1207657] +- kvm-iotests-222-Don-t-run-with-luks.patch [bz#1207657] +- kvm-block-io-fix-copy_range.patch [bz#1207657] +- kvm-block-split-flags-in-copy_range.patch [bz#1207657] +- kvm-block-add-BDRV_REQ_SERIALISING-flag.patch [bz#1207657] +- kvm-block-backup-fix-fleecing-scheme-use-serialized-writ.patch [bz#1207657] +- kvm-nbd-server-Reject-0-length-block-status-request.patch [bz#1207657] +- kvm-nbd-server-fix-trace.patch [bz#1207657] +- kvm-nbd-server-refactor-NBDExportMetaContexts.patch [bz#1207657] +- kvm-nbd-server-add-nbd_meta_empty_or_pattern-helper.patch [bz#1207657] +- kvm-nbd-server-implement-dirty-bitmap-export.patch [bz#1207657] +- kvm-qapi-new-qmp-command-nbd-server-add-bitmap.patch [bz#1207657] +- kvm-docs-interop-add-nbd.txt.patch [bz#1207657] +- kvm-nbd-server-introduce-NBD_CMD_CACHE.patch [bz#1207657] +- kvm-nbd-server-Silence-gcc-false-positive.patch [bz#1207657] +- kvm-nbd-server-Fix-dirty-bitmap-logic-regression.patch [bz#1207657] +- kvm-nbd-server-fix-nbd_co_send_block_status.patch [bz#1207657] +- kvm-nbd-client-Add-x-dirty-bitmap-to-query-bitmap-from-s.patch [bz#1207657] +- kvm-iotests-New-test-223-for-exporting-dirty-bitmap-over.patch [bz#1207657] +- kvm-hw-char-serial-Only-retry-if-qemu_chr_fe_write-retur.patch [bz#1592817] +- kvm-hw-char-serial-retry-write-if-EAGAIN.patch [bz#1592817] +- kvm-throttle-groups-fix-hang-when-group-member-leaves.patch [bz#1535914] +- kvm-Disable-aarch64-devices-reappeared-after-2.12-rebase.patch [bz#1586357] +- kvm-Disable-split-irq-device.patch [bz#1586357] +- kvm-Disable-AT24Cx-i2c-eeprom.patch [bz#1586357] +- kvm-Disable-CAN-bus-devices.patch [bz#1586357] +- kvm-Disable-new-superio-devices.patch [bz#1586357] +- kvm-Disable-new-pvrdma-device.patch [bz#1586357] +- kvm-qdev-add-HotplugHandler-post_plug-callback.patch [bz#1607891] +- kvm-virtio-scsi-fix-hotplug-reset-vs-event-race.patch [bz#1607891] +- kvm-e1000-Fix-tso_props-compat-for-82540em.patch [bz#1608778] +- kvm-slirp-correct-size-computation-while-concatenating-m.patch [bz#1586255] +- kvm-s390x-sclp-fix-maxram-calculation.patch [bz#1595740] +- kvm-redhat-Make-gitpublish-profile-the-default-one.patch [bz#1425820] +- Resolves: bz#1168213 + (main-loop: WARNING: I/O thread spun for 1000 iterations while doing stream block device.) +- Resolves: bz#1207657 + (RFE: QEMU Incremental live backup - push and pull modes) +- Resolves: bz#1416180 + (QEMU VFIO based block driver for NVMe devices) +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) +- Resolves: bz#1482537 + ([RFE] qemu-img copy-offloading (convert command)) +- Resolves: bz#1505664 + ("qemu-kvm: System page size 0x1000000 is not enabled in page_size_mask (0x11000). Performance may be slow" show up while using hugepage as guest's memory) +- Resolves: bz#1513543 + ([RFE] Add block job to create format on a storage device) +- Resolves: bz#1518738 + (Add 'copy-on-read' filter driver for use with blockdev-add) +- Resolves: bz#1519144 + (qemu-img: image locking doesn't cover image creation) +- Resolves: bz#1519617 + (The exit code should be non-zero when qemu-io reports an error) +- Resolves: bz#1523065 + ("qemu-img resize" should fail to decrease the size of logical partition/lvm/iSCSI image with raw format) +- Resolves: bz#1525829 + (can not boot up a scsi-block passthrough disk via -blockdev with error "cannot get SG_IO version number: Operation not supported. Is this a SCSI device?") +- Resolves: bz#1526645 + ([Intel 7.6 FEAT] vHost Data Plane Acceleration (vDPA) - vhost user client - qemu-kvm-rhev) +- Resolves: bz#1527085 + (The copied flag should be updated during '-r leaks') +- Resolves: bz#1527898 + ([RFE] qemu-img should leave cluster unallocated if it's read as zero throughout the backing chain) +- Resolves: bz#1528541 + (qemu-img check reports tons of leaked clusters after re-start nfs service to resume writing data in guest) +- Resolves: bz#1533158 + (QEMU support for libvirtd restarting qemu-pr-helper) +- Resolves: bz#1535914 + (Disable io throttling for one member disk of a group during io will induce the other one hang with io) +- Resolves: bz#1537956 + (RFE: qemu-img amend should list the true supported options) +- Resolves: bz#1542080 + (Qemu core dump at cirrus_invalidate_region) +- Resolves: bz#1549654 + (Reject node-names which would be truncated by the block layer commands) +- Resolves: bz#1556678 + (Hot plug usb-ccid for the 2nd time with the same ID as the 1st time failed) +- Resolves: bz#1557995 + (QAPI schema for RBD storage misses the 'password-secret' option) +- Resolves: bz#1558723 + (Create RHEL-7.6 QEMU machine type for AArch64) +- Resolves: bz#1560847 + ([Power8][FW b0320a_1812.861][rhel7.5rc2 3.10.0-861.el7.ppc64le][qemu-kvm-{ma,rhev}-2.10.0-21.el7_5.1.ppc64le] KVM guest does not default to ori type flush even with pseries-rhel7.5.0-sxxm) +- Resolves: bz#1564576 + (Pegas 1.1 - Require to backport qemu-kvm patch that fixes expected_downtime calculation during migration) +- Resolves: bz#1566153 + (IOERROR pause code lost after resuming a VM while I/O error is still present) +- Resolves: bz#1567733 + (qemu abort when migrate during guest reboot) +- Resolves: bz#1569835 + (qemu-img get wrong backing file path after rebasing image with relative path) +- Resolves: bz#1572851 + (Core dumped after migration when with usb-host) +- Resolves: bz#1572856 + ('block-job-cancel' can not cancel a "drive-mirror" job) +- Resolves: bz#1574216 + (CVE-2018-3639 qemu-kvm-rhev: hw: cpu: speculative store bypass [rhel-7.6]) +- Resolves: bz#1575541 + (qemu core dump while installing win10 guest) +- Resolves: bz#1576598 + (Segfault in qemu-io and qemu-img with -U --image-opts force-share=off) +- Resolves: bz#1576743 + (virtio-rng hangs when running on recent (2.x) QEMU versions) +- Resolves: bz#1578381 + (Error message need update when specify numa distance with node index >=128) +- Resolves: bz#1583959 + (Incorrect vcpu count limit for 7.4 machine types for windows guests) +- Resolves: bz#1584914 + (SATA emulator lags and hangs) +- Resolves: bz#1584984 + (Vm starts failed with 'passthrough' smartcard) +- Resolves: bz#1586255 + (CVE-2018-11806 qemu-kvm-rhev: QEMU: slirp: heap buffer overflow while reassembling fragmented datagrams [rhel-7.6]) +- Resolves: bz#1586313 + (-smp option is not easily found in the output of qemu help) +- Resolves: bz#1586357 + (Disable new devices in 2.12) +- Resolves: bz#1588039 + (Possible assertion failure in qemu when a corrupted image is used during an incoming migration) +- Resolves: bz#1589634 + (Migration failed when rebooting guest with multiple virtio videos) +- Resolves: bz#1590640 + (qemu-kvm: block/io.c:1098: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.) +- Resolves: bz#1591076 + (The driver of 'throttle' is not whitelisted) +- Resolves: bz#1592817 + (Retrying on serial_xmit if the pipe is broken may compromise the Guest) +- Resolves: bz#1594135 + (system_reset many times linux guests cause qemu process Aborted) +- Resolves: bz#1595173 + (blockdev-create is blocking) +- Resolves: bz#1595180 + (Can't set rerror/werror with usb-storage) +- Resolves: bz#1595740 + (RHEL-Alt-7.6 - qemu has error during migration of larger guests) +- Resolves: bz#1599335 + (Image creation locking is too tight and is not properly released) +- Resolves: bz#1599515 + (qemu core-dump with aio_read via hmp (util/qemu-thread-posix.c:64: qemu_mutex_lock_impl: Assertion `mutex->initialized' failed)) +- Resolves: bz#1607891 + (Hotplug events are sometimes lost with virtio-scsi + iothread) +- Resolves: bz#1608778 + (qemu/migration: migrate failed from RHEL.7.6 to RHEL.7.5 with e1000-82540em) + +* Mon Aug 06 2018 Danilo Cesar Lemes de Paula - 2.12.0-17.el8 +- kvm-linux-headers-Update-to-include-KVM_CAP_S390_HPAGE_1.patch [bz#1610906] +- kvm-s390x-Enable-KVM-huge-page-backing-support.patch [bz#1610906] +- kvm-redhat-s390x-add-hpage-1-to-kvm.conf.patch [bz#1610906] +- Resolves: bz#1610906 + ([IBM 8.0 FEAT] KVM: Huge Pages - libhugetlbfs Enablement - qemu-kvm part) + +* Tue Jul 31 2018 Danilo Cesar Lemes de Paula - 2.12.0-16.el8 +- kvm-spapr-Correct-inverted-test-in-spapr_pc_dimm_node.patch [bz#1601671] +- kvm-osdep-powerpc64-align-memory-to-allow-2MB-radix-THP-.patch [bz#1601317] +- kvm-RHEL-8.0-Add-pseries-rhel7.6.0-sxxm-machine-type.patch [bz#1595501] +- kvm-i386-Helpers-to-encode-cache-information-consistentl.patch [bz#1597739] +- kvm-i386-Add-cache-information-in-X86CPUDefinition.patch [bz#1597739] +- kvm-i386-Initialize-cache-information-for-EPYC-family-pr.patch [bz#1597739] +- kvm-i386-Add-new-property-to-control-cache-info.patch [bz#1597739] +- kvm-i386-Clean-up-cache-CPUID-code.patch [bz#1597739] +- kvm-i386-Populate-AMD-Processor-Cache-Information-for-cp.patch [bz#1597739] +- kvm-i386-Add-support-for-CPUID_8000_001E-for-AMD.patch [bz#1597739] +- kvm-i386-Fix-up-the-Node-id-for-CPUID_8000_001E.patch [bz#1597739] +- kvm-i386-Enable-TOPOEXT-feature-on-AMD-EPYC-CPU.patch [bz#1597739] +- kvm-i386-Remove-generic-SMT-thread-check.patch [bz#1597739] +- kvm-i386-Allow-TOPOEXT-to-be-enabled-on-older-kernels.patch [bz#1597739] +- Resolves: bz#1595501 + (Create pseries-rhel7.6.0-sxxm machine type) +- Resolves: bz#1597739 + (AMD EPYC/Zen SMT support for KVM / QEMU guest (qemu-kvm)) +- Resolves: bz#1601317 + (RHEL8.0 - qemu patch to align memory to allow 2MB THP) +- Resolves: bz#1601671 + (After rebooting guest,all the hot plug memory will be assigned to the 1st numa node.) + +* Tue Jul 24 2018 Danilo Cesar Lemes de Paula - 2.12.0-15.el8 +- kvm-spapr-Add-ibm-max-associativity-domains-property.patch [bz#1599593] +- kvm-Revert-spapr-Don-t-allow-memory-hotplug-to-memory-le.patch [bz#1599593] +- kvm-simpletrace-Convert-name-from-mapping-record-to-str.patch [bz#1594969] +- kvm-tests-fix-TLS-handshake-failure-with-TLS-1.3.patch [bz#1602403] +- Resolves: bz#1594969 + (simpletrace.py fails when running with Python 3) +- Resolves: bz#1599593 + (User can't hotplug memory to less memory numa node on rhel8) +- Resolves: bz#1602403 + (test-crypto-tlssession unit test fails with assertions) + +* Mon Jul 09 2018 Danilo Cesar Lemes de Paula - 2.12.0-14.el8 +- kvm-vfio-pci-Default-display-option-to-off.patch [bz#1590511] +- kvm-python-futurize-f-libfuturize.fixes.fix_print_with_i.patch [bz#1571533] +- kvm-python-futurize-f-lib2to3.fixes.fix_except.patch [bz#1571533] +- kvm-Revert-Defining-a-shebang-for-python-scripts.patch [bz#1571533] +- kvm-spec-Fix-ambiguous-python-interpreter-name.patch [bz#1571533] +- kvm-qemu-ga-blacklisting-guest-exec-and-guest-exec-statu.patch [bz#1518132] +- kvm-redhat-rewrap-build_configure.sh-cmdline-for-the-rh-.patch [] +- kvm-redhat-remove-the-VTD-LIVE_BLOCK_OPS-and-RHV-options.patch [] +- kvm-redhat-fix-the-rh-env-prep-target-s-dependency-on-th.patch [] +- kvm-redhat-remove-dead-code-related-to-s390-not-s390x.patch [] +- kvm-redhat-sync-compiler-flags-from-the-spec-file-to-rh-.patch [] +- kvm-redhat-sync-guest-agent-enablement-and-tcmalloc-usag.patch [] +- kvm-redhat-fix-up-Python-3-dependency-for-building-QEMU.patch [] +- kvm-redhat-fix-up-Python-dependency-for-SRPM-generation.patch [] +- kvm-redhat-disable-glusterfs-dependency-support-temporar.patch [] +- Resolves: bz#1518132 + (Ensure file access RPCs are disabled by default) +- Resolves: bz#1571533 + (Convert qemu-kvm python scripts to python3) +- Resolves: bz#1590511 + (Fails to start guest with Intel vGPU device) + +* Thu Jun 21 2018 Danilo C. L. de Paula - 2.12.0-13.el8 +- Resolves: bz#1508137 + ([IBM 8.0 FEAT] KVM: Interactive Bootloader (qemu)) +- Resolves: bz#1513558 + (Remove RHEL6 machine types) +- Resolves: bz#1568600 + (pc-i440fx-rhel7.6.0 and pc-q35-rhel7.6.0 machine types (x86)) +- Resolves: bz#1570029 + ([IBM 8.0 FEAT] KVM: 3270 Connectivity - qemu part) +- Resolves: bz#1578855 + (Enable Native Ceph support on non x86_64 CPUs) +- Resolves: bz#1585651 + (RHEL 7.6 new pseries machine type (ppc64le)) +- Resolves: bz#1592337 + ([IBM 8.0 FEAT] KVM: CPU Model z14 ZR1 (qemu-kvm)) + +* Tue May 15 2018 Danilo C. L. de Paula - 2.12.0-11.el8.1 +- Resolves: bz#1576468 + (Enable vhost_user in qemu-kvm 2.12) + +* Wed May 09 2018 Danilo de Paula - 2.12.0-11.el8 +- Resolves: bz#1574406 + ([RHEL 8][qemu-kvm] Failed to find romfile "efi-virtio.rom") +- Resolves: bz#1569675 + (Backwards compatibility of pc-*-rhel7.5.0 and older machine-types) +- Resolves: bz#1576045 + (Fix build issue by using python3) +- Resolves: bz#1571145 + (qemu-kvm segfaults on RHEL 8 when run guestfsd under TCG) + +* Fri Apr 20 2018 Danilo de Paula - 2.12.0-10.el +- Fixing some issues with packaging. +- Rebasing to 2.12.0-rc4 + +* Fri Apr 13 2018 Danilo de Paula - 2.11.0-7.el8 +- Bumping epoch for RHEL8 and dropping self-obsoleting + +* Thu Apr 12 2018 Danilo de Paula - 2.11.0-6.el8 +- Rebuilding + +* Mon Mar 05 2018 Danilo de Paula - 2.11.0-5.el8 +- Prepare building on RHEL-8.0 diff --git a/qemu-pr-helper.service b/qemu-pr-helper.service new file mode 100644 index 0000000..a1d27b0 --- /dev/null +++ b/qemu-pr-helper.service @@ -0,0 +1,15 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Service] +WorkingDirectory=/tmp +Type=simple +ExecStart=/usr/bin/qemu-pr-helper +PrivateTmp=yes +ProtectSystem=strict +ReadWritePaths=/var/run +RestrictAddressFamilies=AF_UNIX +Restart=always +RestartSec=0 + +[Install] diff --git a/qemu-pr-helper.socket b/qemu-pr-helper.socket new file mode 100644 index 0000000..9d7c3e5 --- /dev/null +++ b/qemu-pr-helper.socket @@ -0,0 +1,9 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Socket] +ListenStream=/run/qemu-pr-helper.sock +SocketMode=0600 + +[Install] +WantedBy=multi-user.target diff --git a/sources b/sources new file mode 100644 index 0000000..c7b3cb2 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +SHA512 (qemu-3.0.0.tar.xz) = a764302f50b9aca4134bbbc1f361b98e71240cdc7b25600dfe733bf4cf17bd86000bd28357697b08f3b656899dceb9e459350b8d55557817444ed5d7fa380a5a diff --git a/vhost.conf b/vhost.conf new file mode 100644 index 0000000..68d6d7f --- /dev/null +++ b/vhost.conf @@ -0,0 +1,3 @@ +# Increase default vhost memory map limit to match +# KVM's memory slot limit +options vhost max_mem_regions=509 From ec15be19f0643b47e89dd36e7f99c64b0e052fa5 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 8 Nov 2018 15:02:33 -0200 Subject: [PATCH 003/195] Mass import patches from 2.12.0 --- 0003-Add-RHEL-machine-types.patch | 3017 ----------------- ...d.patch => 0004-Initial-redhat-build.patch | 166 +- ...05-Enable-disable-devices-for-RHEL-7.patch | 22 +- ...Machine-type-related-general-changes.patch | 764 +++++ 0007-Add-aarch64-machine-types.patch | 246 ++ 0008-Add-ppc64-machine-types.patch | 397 +++ 0009-Add-s390x-machine-types.patch | 86 + 0010-Add-x86_64-machine-types.patch | 1069 ++++++ 0011-Enable-make-check.patch | 498 +++ ...ult.patch => 0012-Use-kvm-by-default.patch | 2 +- ...mber-of-devices-that-can-be-assigned.patch | 2 +- ...Add-support-statement-to-help-output.patch | 2 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 2 +- ... => 0016-Add-support-for-simpletrace.patch | 2 +- ...documentation-instead-of-qemu-system.patch | 2 +- ...18-usb-xhci-Fix-PCI-capability-order.patch | 2 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 2 +- ...x-headers-asm-s390-kvm.h-header-sync.patch | 2 +- ...Enable-KVM-huge-page-backing-support.patch | 2 +- ...> 0022-s390x-kvm-add-etoken-facility.patch | 2 +- ...efault-enable-bpb-and-ppa15-for-z196.patch | 2 +- ...-arch_query_cpu_model_expansion-leak.patch | 2 +- ...sable-TOPOEXT-by-default-on-cpu-host.patch | 2 +- ...ify-off-disable-host-as-well-as-peer.patch | 2 +- ...on-postcopy-Clear-have_listen_thread.patch | 2 +- ...ion-cleanup-in-error-paths-in-loadvm.patch | 2 +- ...hange-start-callback-to-run-callback.patch | 2 +- ...h => 0030-jobs-canonize-Error-object.patch | 2 +- ...him.patch => 0031-jobs-add-exit-shim.patch | 2 +- ...2-block-commit-utilize-job_exit-shim.patch | 2 +- ...3-block-mirror-utilize-job_exit-shim.patch | 2 +- ...h => 0034-jobs-utilize-job_exit-shim.patch | 2 +- ...e-function-variables-consistently-na.patch | 2 +- ...argument-to-job_completed-privatize-.patch | 2 +- ...7-jobs-remove-job_defer_to_main_loop.patch | 2 +- ...-commit-add-block-job-creation-flags.patch | 2 +- ...-mirror-add-block-job-creation-flags.patch | 2 +- ...-stream-add-block-job-creation-flags.patch | 2 +- ...refactor-commit-to-use-job-callbacks.patch | 2 +- ...don-t-install-backing-chain-on-abort.patch | 2 +- ...or-conservative-mirror_exit-refactor.patch | 2 +- ...refactor-stream-to-use-job-callbacks.patch | 2 +- ...s-blockjob-replace-Blockjob-with-Job.patch | 2 +- ...s-test-blockjob-remove-exit-callback.patch | 2 +- ...st-blockjob-txn-move-.exit-to-.clean.patch | 2 +- ...h => 0048-jobs-remove-.exit-callback.patch | 2 +- ...ock-commit-expose-new-job-properties.patch | 2 +- ...ock-mirror-expose-new-job-properties.patch | 2 +- ...ock-stream-expose-new-job-properties.patch | 2 +- ...lock-backup-qapi-documentation-fixup.patch | 2 +- ...-document-transactional-shortcomings.patch | 2 +- ...YS-signal-instead-of-killing-the-thr.patch | 67 + ...r-SCMP_ACT_KILL_PROCESS-if-available.patch | 110 + ...et-the-seccomp-filter-to-all-threads.patch | 77 + ...ide-effects-of-memory_region_init_fo.patch | 185 + ...or-Fail-gracefully-for-source-target.patch | 87 + ...ommit-Add-top-node-base-node-options.patch | 141 + ...-Test-commit-with-top-node-base-node.patch | 127 + ...o-not-clear-user_paused-until-after-.patch | 59 + ...catch-abort-on-forced-blockjob-cance.patch | 173 + ...uild-build-SRAT-memory-affinity-stru.patch | 117 + ...count-ctx-notifier-as-progress-when-.patch | 48 + ...y_accept-only-during-blocking-aio_po.patch | 124 + ...oncurrent-access-to-poll_disable_cnt.patch | 122 + ...posix-compute-timeout-before-polling.patch | 186 + ...p-system-call-if-ctx-notifier-pollin.patch | 64 + 0069-linux-headers-update.patch | 202 ++ ...et-up-CPU-model-for-AP-device-suppor.patch | 148 + ...-AP-instruction-interpretation-for-g.patch | 89 + ...se-Adjunct-Processor-AP-object-model.patch | 281 ++ ...90x-vfio-ap-Introduce-VFIO-AP-device.patch | 305 ++ ...d-specifications-for-AP-virtualizati.patch | 889 +++++ ...-sasl_server_init-only-when-required.patch | 89 + 0076-nbd-server-fix-NBD_CMD_CACHE.patch | 52 + 0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch | 96 + ...acquire-AioContext-before-qemu_laio_.patch | 134 + ...emu_aio_coroutine_enter-in-co_schedu.patch | 78 + ...ed-aio_poll-hanging-in-job_txn_apply.patch | 105 + ...-Fix-missing-locking-due-to-mismerge.patch | 55 + ...ob-Wake-up-BDS-when-job-becomes-idle.patch | 161 + ...ease-num_waiters-even-in-home-thread.patch | 64 + ...Drain-with-block-jobs-in-an-I-O-thre.patch | 208 ++ ...quire-AioContext-around-job_cancel_s.patch | 86 + ...se-AIO_WAIT_WHILE-in-job_finish_sync.patch | 77 + ...Test-AIO_WAIT_WHILE-in-completion-ca.patch | 59 + ...ssing-locking-in-bdrv_co_drain_bh_cb.patch | 96 + ...k-backend-Add-.drained_poll-callback.patch | 66 + ...kend-Fix-potential-double-blk_delete.patch | 67 + ...crease-in_flight-only-after-callback.patch | 74 + ...Lie-better-in-child_job_drained_poll.patch | 104 + ...aio_poll-in-bdrv_drain_poll-variants.patch | 64 + ...Test-nested-poll-in-bdrv_drain_poll_.patch | 63 + ...deadlocks-in-job_completed_txn_abort.patch | 85 + ...AIO_WAIT_WHILE-in-job-.commit-.abort.patch | 241 ++ ...est-bdrv-drain-Fix-outdated-comments.patch | 69 + 0098-block-Use-a-single-global-AioWait.patch | 367 ++ ...Test-draining-job-source-child-and-p.patch | 198 ++ ...bd-pull-out-qemu_rbd_convert_options.patch | 95 + ...bd-Attempt-to-parse-legacy-filenames.patch | 120 + ...precation-documentation-for-filename.patch | 59 + ...test-for-rbd-legacy-keyvalue-filenam.patch | 141 + 0104-luks-Allow-share-rw-on.patch | 52 + 81-kvm-rhel.rules | 1 + qemu-kvm.spec | 403 ++- udev-kvm-check.c | 172 + 105 files changed, 10496 insertions(+), 3255 deletions(-) delete mode 100644 0003-Add-RHEL-machine-types.patch rename 0001-Initial-redhat-build.patch => 0004-Initial-redhat-build.patch (62%) rename 0002-Enable-disable-devices-for-RHEL-7.patch => 0005-Enable-disable-devices-for-RHEL-7.patch (98%) create mode 100644 0006-Machine-type-related-general-changes.patch create mode 100644 0007-Add-aarch64-machine-types.patch create mode 100644 0008-Add-ppc64-machine-types.patch create mode 100644 0009-Add-s390x-machine-types.patch create mode 100644 0010-Add-x86_64-machine-types.patch create mode 100644 0011-Enable-make-check.patch rename 0004-Use-kvm-by-default.patch => 0012-Use-kvm-by-default.patch (92%) rename 0005-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch (96%) rename 0006-Add-support-statement-to-help-output.patch => 0014-Add-support-statement-to-help-output.patch (96%) rename 0007-globally-limit-the-maximum-number-of-CPUs.patch => 0015-globally-limit-the-maximum-number-of-CPUs.patch (97%) rename 0008-Add-support-for-simpletrace.patch => 0016-Add-support-for-simpletrace.patch (98%) rename 0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch => 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch (99%) rename 0010-usb-xhci-Fix-PCI-capability-order.patch => 0018-usb-xhci-Fix-PCI-capability-order.patch (97%) rename 0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch => 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch (97%) rename 0012-linux-headers-asm-s390-kvm.h-header-sync.patch => 0020-linux-headers-asm-s390-kvm.h-header-sync.patch (97%) rename 0013-s390x-Enable-KVM-huge-page-backing-support.patch => 0021-s390x-Enable-KVM-huge-page-backing-support.patch (98%) rename 0014-s390x-kvm-add-etoken-facility.patch => 0022-s390x-kvm-add-etoken-facility.patch (99%) rename 0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch => 0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch (96%) rename 0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch => 0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch (98%) rename 0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch => 0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch (96%) rename 0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch => 0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch (98%) rename 0019-migration-postcopy-Clear-have_listen_thread.patch => 0027-migration-postcopy-Clear-have_listen_thread.patch (96%) rename 0020-migration-cleanup-in-error-paths-in-loadvm.patch => 0028-migration-cleanup-in-error-paths-in-loadvm.patch (96%) rename 0021-jobs-change-start-callback-to-run-callback.patch => 0029-jobs-change-start-callback-to-run-callback.patch (99%) rename 0022-jobs-canonize-Error-object.patch => 0030-jobs-canonize-Error-object.patch (99%) rename 0023-jobs-add-exit-shim.patch => 0031-jobs-add-exit-shim.patch (98%) rename 0024-block-commit-utilize-job_exit-shim.patch => 0032-block-commit-utilize-job_exit-shim.patch (98%) rename 0025-block-mirror-utilize-job_exit-shim.patch => 0033-block-mirror-utilize-job_exit-shim.patch (98%) rename 0026-jobs-utilize-job_exit-shim.patch => 0034-jobs-utilize-job_exit-shim.patch (99%) rename 0027-block-backup-make-function-variables-consistently-na.patch => 0035-block-backup-make-function-variables-consistently-na.patch (99%) rename 0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch => 0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch (98%) rename 0029-jobs-remove-job_defer_to_main_loop.patch => 0037-jobs-remove-job_defer_to_main_loop.patch (98%) rename 0030-block-commit-add-block-job-creation-flags.patch => 0038-block-commit-add-block-job-creation-flags.patch (98%) rename 0031-block-mirror-add-block-job-creation-flags.patch => 0039-block-mirror-add-block-job-creation-flags.patch (98%) rename 0032-block-stream-add-block-job-creation-flags.patch => 0040-block-stream-add-block-job-creation-flags.patch (98%) rename 0033-block-commit-refactor-commit-to-use-job-callbacks.patch => 0041-block-commit-refactor-commit-to-use-job-callbacks.patch (99%) rename 0034-block-mirror-don-t-install-backing-chain-on-abort.patch => 0042-block-mirror-don-t-install-backing-chain-on-abort.patch (96%) rename 0035-block-mirror-conservative-mirror_exit-refactor.patch => 0043-block-mirror-conservative-mirror_exit-refactor.patch (98%) rename 0036-block-stream-refactor-stream-to-use-job-callbacks.patch => 0044-block-stream-refactor-stream-to-use-job-callbacks.patch (97%) rename 0037-tests-blockjob-replace-Blockjob-with-Job.patch => 0045-tests-blockjob-replace-Blockjob-with-Job.patch (98%) rename 0038-tests-test-blockjob-remove-exit-callback.patch => 0046-tests-test-blockjob-remove-exit-callback.patch (97%) rename 0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch => 0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch (96%) rename 0040-jobs-remove-.exit-callback.patch => 0048-jobs-remove-.exit-callback.patch (98%) rename 0041-qapi-block-commit-expose-new-job-properties.patch => 0049-qapi-block-commit-expose-new-job-properties.patch (98%) rename 0042-qapi-block-mirror-expose-new-job-properties.patch => 0050-qapi-block-mirror-expose-new-job-properties.patch (98%) rename 0043-qapi-block-stream-expose-new-job-properties.patch => 0051-qapi-block-stream-expose-new-job-properties.patch (98%) rename 0044-block-backup-qapi-documentation-fixup.patch => 0052-block-backup-qapi-documentation-fixup.patch (98%) rename 0045-blockdev-document-transactional-shortcomings.patch => 0053-blockdev-document-transactional-shortcomings.patch (96%) create mode 100644 0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch create mode 100644 0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch create mode 100644 0056-seccomp-set-the-seccomp-filter-to-all-threads.patch create mode 100644 0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch create mode 100644 0058-mirror-Fail-gracefully-for-source-target.patch create mode 100644 0059-commit-Add-top-node-base-node-options.patch create mode 100644 0060-qemu-iotests-Test-commit-with-top-node-base-node.patch create mode 100644 0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch create mode 100644 0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch create mode 100644 0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch create mode 100644 0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch create mode 100644 0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch create mode 100644 0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch create mode 100644 0067-aio-posix-compute-timeout-before-polling.patch create mode 100644 0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch create mode 100644 0069-linux-headers-update.patch create mode 100644 0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch create mode 100644 0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch create mode 100644 0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch create mode 100644 0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch create mode 100644 0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch create mode 100644 0075-vnc-call-sasl_server_init-only-when-required.patch create mode 100644 0076-nbd-server-fix-NBD_CMD_CACHE.patch create mode 100644 0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch create mode 100644 0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch create mode 100644 0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch create mode 100644 0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch create mode 100644 0081-job-Fix-missing-locking-due-to-mismerge.patch create mode 100644 0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch create mode 100644 0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch create mode 100644 0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch create mode 100644 0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch create mode 100644 0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch create mode 100644 0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch create mode 100644 0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch create mode 100644 0089-block-backend-Add-.drained_poll-callback.patch create mode 100644 0090-block-backend-Fix-potential-double-blk_delete.patch create mode 100644 0091-block-backend-Decrease-in_flight-only-after-callback.patch create mode 100644 0092-blockjob-Lie-better-in-child_job_drained_poll.patch create mode 100644 0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch create mode 100644 0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch create mode 100644 0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch create mode 100644 0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch create mode 100644 0097-test-bdrv-drain-Fix-outdated-comments.patch create mode 100644 0098-block-Use-a-single-global-AioWait.patch create mode 100644 0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch create mode 100644 0100-block-rbd-pull-out-qemu_rbd_convert_options.patch create mode 100644 0101-block-rbd-Attempt-to-parse-legacy-filenames.patch create mode 100644 0102-block-rbd-add-deprecation-documentation-for-filename.patch create mode 100644 0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch create mode 100644 0104-luks-Allow-share-rw-on.patch create mode 100644 81-kvm-rhel.rules create mode 100644 udev-kvm-check.c diff --git a/0003-Add-RHEL-machine-types.patch b/0003-Add-RHEL-machine-types.patch deleted file mode 100644 index 9e8b0db..0000000 --- a/0003-Add-RHEL-machine-types.patch +++ /dev/null @@ -1,3017 +0,0 @@ -From e145f88a1d3be0e12262c0b3dab80133778ab21a Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Sun, 14 Dec 2014 18:32:18 +0100 -Subject: Add RHEL machine types - -This commit adds all changes related to machine types applied since -qemu-kvm-2.12.0-31.el8. - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula - --- -Rebase notes (3.0.0): -- spapr_cpu_init merged into spapr_realize_vcpu (upstream) -- Commented out virt_machine_device_plug_cb in hw/arm/virt.c -- ifdef virt_get_iommu and virt_set_iommu in hw/arm/virt.c -- test changes refactored and moved all to this patch - -Merged patches (3.0.0): -- 50dd601 s390x: add RHEL 7.6 machine type for ccw -- 8198c8d e1000: Fix tso_props compat for 82540em -- e924798 Use 4 MB vram for cirrus. -- 738561e Fix x-hv-max-vps compat value for 7.4 machine type -- 9cb37fd AArch64: Add virt-rhel7.6 machine type -- 2343d56 migration: introduce decompress-error-check (partialy) -- 188fa88 pc: rhel7.6.0 machine-types -- 88b4505 Remove rhel6* machine types -- 8a50b1c Remove rhel6_ctrl_guest_workaround -- 71562f4 Remove SeaBIOS shadowing -- 40a8867 Remove ich9_uhci123_irqpin_override -- 7574808 redhat: define pseries-rhel7.6.0 machine types -- 6c2f105 RHEL-8.0: Add pseries-rhel7.6.0-sxxm machine type -- 411b30b pc: pc-*-rhel75.5.0 compat code - -Signed-off-by: Miroslav Rezanina - -Conflicts: - tests/qom-test.c ---- - hw/acpi/ich9.c | 16 ++ - hw/acpi/piix4.c | 6 +- - hw/arm/virt.c | 126 ++++++++- - hw/char/serial.c | 16 ++ - hw/display/cirrus_vga.c | 2 +- - hw/display/vga-isa.c | 2 +- - hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 7 +- - hw/i386/pc_piix.c | 194 ++++++++++++- - hw/i386/pc_q35.c | 93 ++++++- - hw/net/e1000.c | 18 +- - hw/net/e1000e.c | 21 ++ - hw/net/rtl8139.c | 4 +- - hw/ppc/spapr.c | 252 +++++++++++++++++ - hw/ppc/spapr_cpu_core.c | 13 + - hw/s390x/s390-virtio-ccw.c | 46 +++- - hw/smbios/smbios.c | 1 + - hw/timer/i8254_common.c | 2 +- - hw/timer/mc146818rtc.c | 6 + - hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 ++ - hw/usb/hcd-xhci.h | 2 + - include/hw/acpi/ich9.h | 3 + - include/hw/arm/virt.h | 22 ++ - include/hw/compat.h | 229 ++++++++++++++++ - include/hw/i386/pc.h | 564 ++++++++++++++++++++++++++++++++++++++ - include/hw/ppc/spapr.h | 1 + - include/hw/usb.h | 4 + - migration/migration.c | 2 + - migration/migration.h | 5 + - qdev-monitor.c | 1 - - redhat/qemu-kvm.spec.template | 2 +- - scripts/vmstate-static-checker.py | 1 - - target/i386/cpu.c | 9 +- - target/i386/machine.c | 21 ++ - target/ppc/compat.c | 13 +- - target/ppc/cpu.h | 1 + - tests/Makefile.include | 124 ++++----- - tests/boot-serial-test.c | 6 +- - tests/cpu-plug-test.c | 3 +- - tests/e1000-test.c | 2 + - tests/endianness-test.c | 2 + - tests/prom-env-test.c | 2 + - tests/qemu-iotests/051 | 12 +- - tests/qemu-iotests/group | 4 +- - tests/qom-test.c | 2 +- - tests/test-x86-cpuid-compat.c | 2 + - tests/usb-hcd-xhci-test.c | 4 + - 48 files changed, 1800 insertions(+), 95 deletions(-) - -diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index a4e87b8..23a7baa 100644 ---- a/hw/acpi/ich9.c -+++ b/hw/acpi/ich9.c -@@ -441,6 +441,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) - s->pm.enable_tco = value; - } - -+static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) -+{ -+ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); -+ return s->pm.force_rev1_fadt; -+} -+ -+static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) -+{ -+ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); -+ s->pm.force_rev1_fadt = value; -+} -+ - void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) - { - static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; -@@ -465,6 +477,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) - ich9_pm_get_cpu_hotplug_legacy, - ich9_pm_set_cpu_hotplug_legacy, - NULL); -+ object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", -+ ich9_pm_get_force_rev1_fadt, -+ ich9_pm_set_force_rev1_fadt, -+ NULL); - object_property_add(obj, ACPI_PM_PROP_S3_DISABLED, "uint8", - ich9_pm_get_disable_s3, - ich9_pm_set_disable_s3, -diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 6404af5..0f1f9e2 100644 ---- a/hw/acpi/piix4.c -+++ b/hw/acpi/piix4.c -@@ -310,7 +310,7 @@ static const VMStateDescription vmstate_cpuhp_state = { - static const VMStateDescription vmstate_acpi = { - .name = "piix4_pm", - .version_id = 3, -- .minimum_version_id = 3, -+ .minimum_version_id = 2, - .minimum_version_id_old = 1, - .load_state_old = acpi_load_old, - .post_load = vmstate_acpi_post_load, -@@ -670,8 +670,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) - - static Property piix4_pm_properties[] = { - DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), - DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), - DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, - use_acpi_pci_hotplug, true), -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 281ddcd..b02e4a0 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -60,6 +60,7 @@ - #include "standard-headers/linux/input.h" - #include "hw/arm/smmuv3.h" - -+#if 0 /* disabled Red Hat Enterprise Linux */ - #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ - static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ - void *data) \ -@@ -87,7 +88,36 @@ - DEFINE_VIRT_MACHINE_LATEST(major, minor, true) - #define DEFINE_VIRT_MACHINE(major, minor) \ - DEFINE_VIRT_MACHINE_LATEST(major, minor, false) -- -+#endif /* disabled for RHEL */ -+ -+#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ -+ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ -+ void *data) \ -+ { \ -+ MachineClass *mc = MACHINE_CLASS(oc); \ -+ rhel##m##n##s##_virt_options(mc); \ -+ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ -+ if (latest) { \ -+ mc->alias = "virt"; \ -+ mc->is_default = 1; \ -+ } \ -+ } \ -+ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ -+ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ -+ .parent = TYPE_RHEL_MACHINE, \ -+ .instance_init = rhel##m##n##s##_virt_instance_init, \ -+ .class_init = rhel##m##n##s##_virt_class_init, \ -+ }; \ -+ static void rhel##m##n##s##_machvirt_init(void) \ -+ { \ -+ type_register_static(&rhel##m##n##s##_machvirt_info); \ -+ } \ -+ type_init(rhel##m##n##s##_machvirt_init); -+ -+#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ -+ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) -+#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ -+ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) - - /* Number of external interrupt lines to configure the GIC with */ - #define NUM_IRQS 256 -@@ -1539,6 +1569,7 @@ static void machvirt_init(MachineState *machine) - qemu_add_machine_init_done_notifier(&vms->machine_done); - } - -+#if 0 /* disabled for RHEL */ - static bool virt_get_secure(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1567,6 +1598,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) - vms->virt = value; - } - -+#endif /* disabled for RHEL */ - static bool virt_get_highmem(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1621,6 +1653,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) - } - } - -+#if 0 - static char *virt_get_iommu(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1648,6 +1681,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) - error_append_hint(errp, "Valid values are none, smmuv3.\n"); - } - } -+#endif - - static CpuInstanceProperties - virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) -@@ -1687,6 +1721,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - return ms->possible_cpus; - } - -+#if 0 /* disabled for RHEL */ - static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) - { -@@ -1835,6 +1870,9 @@ static void virt_machine_3_0_options(MachineClass *mc) - } - DEFINE_VIRT_MACHINE_AS_LATEST(3, 0) - -+#define VIRT_COMPAT_2_12 \ -+ HW_COMPAT_2_12 -+ - static void virt_2_12_instance_init(Object *obj) - { - virt_3_0_instance_init(obj); -@@ -1960,3 +1998,89 @@ static void virt_machine_2_6_options(MachineClass *mc) - vmc->no_pmu = true; - } - DEFINE_VIRT_MACHINE(2, 6) -+#endif /* disabled for RHEL */ -+ -+static void rhel_machine_class_init(ObjectClass *oc, void *data) -+{ -+ MachineClass *mc = MACHINE_CLASS(oc); -+ -+ mc->family = "virt-rhel-Z"; -+ mc->init = machvirt_init; -+ /* Start max_cpus at the maximum QEMU supports. We'll further restrict -+ * it later in machvirt_init, where we have more information about the -+ * configuration of the particular instance. -+ */ -+ mc->max_cpus = 255; -+ mc->block_default_type = IF_VIRTIO; -+ mc->no_cdrom = 1; -+ mc->pci_allow_0_address = true; -+ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ -+ mc->minimum_page_bits = 12; -+ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; -+ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; -+ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); -+ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; -+} -+ -+static const TypeInfo rhel_machine_info = { -+ .name = TYPE_RHEL_MACHINE, -+ .parent = TYPE_MACHINE, -+ .abstract = true, -+ .instance_size = sizeof(VirtMachineState), -+ .class_size = sizeof(VirtMachineClass), -+ .class_init = rhel_machine_class_init, -+}; -+ -+static void rhel_machine_init(void) -+{ -+ type_register_static(&rhel_machine_info); -+} -+type_init(rhel_machine_init); -+ -+static void rhel760_virt_instance_init(Object *obj) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); -+ -+ /* EL3 is disabled by default and non-configurable for RHEL */ -+ vms->secure = false; -+ /* EL2 is disabled by default and non-configurable for RHEL */ -+ vms->virt = false; -+ /* High memory is enabled by default for RHEL */ -+ vms->highmem = true; -+ object_property_add_bool(obj, "highmem", virt_get_highmem, -+ virt_set_highmem, NULL); -+ object_property_set_description(obj, "highmem", -+ "Set on/off to enable/disable using " -+ "physical address space above 32 bits", -+ NULL); -+ /* Default GIC type is still v2, but became configurable for RHEL */ -+ vms->gic_version = 2; -+ object_property_add_str(obj, "gic-version", virt_get_gic_version, -+ virt_set_gic_version, NULL); -+ object_property_set_description(obj, "gic-version", -+ "Set GIC version. " -+ "Valid values are 2, 3 and host", NULL); -+ -+ if (vmc->no_its) { -+ vms->its = false; -+ } else { -+ /* Default allows ITS instantiation */ -+ vms->its = true; -+ object_property_add_bool(obj, "its", virt_get_its, -+ virt_set_its, NULL); -+ object_property_set_description(obj, "its", -+ "Set on/off to enable/disable " -+ "ITS instantiation", -+ NULL); -+ } -+ -+ vms->memmap=a15memmap; -+ vms->irqmap=a15irqmap; -+} -+ -+static void rhel760_virt_options(MachineClass *mc) -+{ -+ SET_MACHINE_COMPAT(mc, ARM_RHEL_COMPAT); -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(7, 6, 0) -diff --git a/hw/char/serial.c b/hw/char/serial.c -index 251f40f..8e3520c 100644 ---- a/hw/char/serial.c -+++ b/hw/char/serial.c -@@ -30,6 +30,7 @@ - #include "qemu/timer.h" - #include "qemu/error-report.h" - #include "trace.h" -+#include "migration/migration.h" - - //#define DEBUG_SERIAL - -@@ -699,6 +700,9 @@ static int serial_post_load(void *opaque, int version_id) - static bool serial_thr_ipending_needed(void *opaque) - { - SerialState *s = opaque; -+ if (migrate_pre_2_2) { -+ return false; -+ } - - if (s->ier & UART_IER_THRI) { - bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); -@@ -780,6 +784,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { - static bool serial_fifo_timeout_timer_needed(void *opaque) - { - SerialState *s = (SerialState *)opaque; -+ if (migrate_pre_2_2) { -+ return false; -+ } -+ - return timer_pending(s->fifo_timeout_timer); - } - -@@ -797,6 +805,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { - static bool serial_timeout_ipending_needed(void *opaque) - { - SerialState *s = (SerialState *)opaque; -+ if (migrate_pre_2_2) { -+ return false; -+ } -+ - return s->timeout_ipending != 0; - } - -@@ -814,6 +826,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { - static bool serial_poll_needed(void *opaque) - { - SerialState *s = (SerialState *)opaque; -+ if (migrate_pre_2_2) { -+ return false; -+ } -+ - return s->poll_msl >= 0; - } - -diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 9fd5665..6910014 100644 ---- a/hw/display/cirrus_vga.c -+++ b/hw/display/cirrus_vga.c -@@ -3061,7 +3061,7 @@ static void isa_cirrus_vga_realizefn(DeviceState *dev, Error **errp) - - static Property isa_cirrus_vga_properties[] = { - DEFINE_PROP_UINT32("vgamem_mb", struct ISACirrusVGAState, -- cirrus_vga.vga.vram_size_mb, 4), -+ cirrus_vga.vga.vram_size_mb, 16), - DEFINE_PROP_BOOL("blitter", struct ISACirrusVGAState, - cirrus_vga.enable_blitter, true), - DEFINE_PROP_END_OF_LIST(), -diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index fa44242..7835c83 100644 ---- a/hw/display/vga-isa.c -+++ b/hw/display/vga-isa.c -@@ -80,7 +80,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) - } - - static Property vga_isa_properties[] = { -- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), -+ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index e1ee8ae..be9bdb5 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -184,6 +184,9 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) - pm->fadt.reset_reg = r; - pm->fadt.reset_val = 0xf; - pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; -+ if (object_property_get_bool(lpc, -+ "__com.redhat_force-rev1-fadt", NULL)) -+ pm->fadt.rev = 1; - pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; - } - assert(obj); -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 11c287e..253d48d 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1419,7 +1419,8 @@ void pc_memory_init(PCMachineState *pcms, - option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); -- if (pcmc->pci_enabled) { -+ /* RH difference: See bz 1489800, explicitly make ROM ro */ -+ if (pcmc->pc_rom_ro) { - memory_region_set_readonly(option_rom_mr, true); - } - memory_region_add_subregion_overlap(rom_memory, -@@ -2387,6 +2388,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->save_tsc_khz = true; - pcmc->linuxboot_dma_enabled = true; - assert(!mc->get_hotplug_handler); -+ pcmc->pc_rom_ro = true; - mc->get_hotplug_handler = pc_get_hotpug_handler; - mc->cpu_index_to_instance_props = pc_cpu_index_to_props; - mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; -@@ -2396,7 +2398,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - mc->default_boot_order = "cad"; - mc->hot_add_cpu = pc_hot_add_cpu; - mc->block_default_type = IF_IDE; -- mc->max_cpus = 255; -+ /* 240: max CPU count for RHEL */ -+ mc->max_cpus = 240; - mc->reset = pc_machine_reset; - hc->pre_plug = pc_machine_device_pre_plug_cb; - hc->plug = pc_machine_device_plug_cb; -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index dc09466..f0484ec 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -50,6 +50,7 @@ - #include "cpu.h" - #include "qapi/error.h" - #include "qemu/error-report.h" -+#include "migration/migration.h" - #ifdef CONFIG_XEN - #include - #include "hw/xen/xen_pt.h" -@@ -170,8 +171,8 @@ static void pc_init1(MachineState *machine, - if (pcmc->smbios_defaults) { - MachineClass *mc = MACHINE_GET_CLASS(machine); - /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", -- mc->name, pcmc->smbios_legacy_mode, -+ smbios_set_defaults("Red Hat", "KVM", -+ mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } -@@ -309,6 +310,7 @@ static void pc_init1(MachineState *machine, - * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). - */ - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void pc_compat_2_3(MachineState *machine) - { - PCMachineState *pcms = PC_MACHINE(machine); -@@ -433,6 +435,7 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) - pc_i440fx_machine_options(m); - m->alias = "pc"; - m->is_default = 1; -+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); - } - - DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, -@@ -1148,3 +1151,190 @@ static void xenfv_machine_options(MachineClass *m) - DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, - xenfv_machine_options); - #endif -+machine_init(pc_machine_init); -+ -+#endif /* Disabled for Red Hat Enterprise Linux */ -+ -+/* Red Hat Enterprise Linux machine types */ -+ -+/* Options for the latest rhel7 machine type */ -+static void pc_machine_rhel7_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ m->family = "pc_piix_Y"; -+ m->default_machine_opts = "firmware=bios-256k.bin"; -+ pcmc->default_nic_model = "e1000"; -+ m->default_display = "std"; -+ SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); -+ m->alias = "pc"; -+ m->is_default = 1; -+} -+ -+static void pc_init_rhel760(MachineState *machine) -+{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel760_options(MachineClass *m) -+{ -+ pc_machine_rhel7_options(m); -+ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; -+} -+ -+DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, -+ pc_machine_rhel760_options); -+ -+static void pc_init_rhel750(MachineState *machine) -+{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel750_options(MachineClass *m) -+{ -+ pc_machine_rhel760_options(m); -+ m->alias = NULL; -+ m->is_default = 0; -+ m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; -+ m->auto_enable_numa_with_memhp = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_5_COMPAT); -+} -+ -+DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, -+ pc_machine_rhel750_options); -+ -+static void pc_init_rhel740(MachineState *machine) -+{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel740_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_machine_rhel750_options(m); -+ m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; -+ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; -+ pcmc->pc_rom_ro = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_4_COMPAT); -+} -+ -+DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, -+ pc_machine_rhel740_options); -+ -+static void pc_init_rhel730(MachineState *machine) -+{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel730_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_machine_rhel740_options(m); -+ m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; -+ pcmc->linuxboot_dma_enabled = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_3_COMPAT); -+} -+ -+DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, -+ pc_machine_rhel730_options); -+ -+ -+static void pc_init_rhel720(MachineState *machine) -+{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel720_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_machine_rhel730_options(m); -+ m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; -+ /* From pc_i440fx_2_5_machine_options */ -+ pcmc->save_tsc_khz = false; -+ m->legacy_fw_cfg_order = 1; -+ /* Note: broken_reserved_end was already in 7.2 */ -+ /* From pc_i440fx_2_6_machine_options */ -+ pcmc->legacy_cpu_hotplug = true; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_2_COMPAT); -+} -+ -+DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, -+ pc_machine_rhel720_options); -+ -+static void pc_compat_rhel710(MachineState *machine) -+{ -+ PCMachineState *pcms = PC_MACHINE(machine); -+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); -+ -+ /* From pc_compat_2_2 */ -+ pcmc->rsdp_in_ram = false; -+ machine->suppress_vmdesc = true; -+ -+ /* From pc_compat_2_1 */ -+ pcmc->smbios_uuid_encoded = false; -+ x86_cpu_change_kvm_default("svm", NULL); -+ pcmc->enforce_aligned_dimm = false; -+ -+ /* Disable all the extra subsections that were added in 2.2 */ -+ migrate_pre_2_2 = true; -+ -+ /* From pc_i440fx_2_4_machine_options */ -+ pcmc->broken_reserved_end = true; -+} -+ -+static void pc_init_rhel710(MachineState *machine) -+{ -+ pc_compat_rhel710(machine); -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel710_options(MachineClass *m) -+{ -+ pc_machine_rhel720_options(m); -+ m->family = "pc_piix_Y"; -+ m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; -+ m->default_display = "cirrus"; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_1_COMPAT); -+} -+ -+DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, -+ pc_machine_rhel710_options); -+ -+static void pc_compat_rhel700(MachineState *machine) -+{ -+ PCMachineState *pcms = PC_MACHINE(machine); -+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); -+ -+ pc_compat_rhel710(machine); -+ -+ /* Upstream enables it for everyone, we're a little more selective */ -+ x86_cpu_change_kvm_default("x2apic", NULL); -+ x86_cpu_change_kvm_default("svm", NULL); -+ pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ -+ pcmc->smbios_legacy_mode = true; -+ pcmc->has_reserved_memory = false; -+ migrate_cve_2014_5263_xhci_fields = true; -+} -+ -+static void pc_init_rhel700(MachineState *machine) -+{ -+ pc_compat_rhel700(machine); -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel700_options(MachineClass *m) -+{ -+ pc_machine_rhel710_options(m); -+ m->family = "pc_piix_Y"; -+ m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_0_COMPAT); -+} -+ -+DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, -+ pc_machine_rhel700_options); -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 532241e..c1024c5 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -145,8 +145,8 @@ static void pc_q35_init(MachineState *machine) - - if (pcmc->smbios_defaults) { - /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", -- mc->name, pcmc->smbios_legacy_mode, -+ smbios_set_defaults("Red Hat", "KVM", -+ mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } -@@ -294,6 +294,7 @@ static void pc_q35_init(MachineState *machine) - DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) - - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void pc_q35_machine_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -315,6 +316,7 @@ static void pc_q35_3_0_machine_options(MachineClass *m) - { - pc_q35_machine_options(m); - m->alias = "q35"; -+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); - } - - DEFINE_Q35_MACHINE(v3_0, "pc-q35-3.0", NULL, -@@ -416,3 +418,90 @@ static void pc_q35_2_4_machine_options(MachineClass *m) - - DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, - pc_q35_2_4_machine_options); -+#endif /* Disabled for Red Hat Enterprise Linux */ -+ -+/* Red Hat Enterprise Linux machine types */ -+ -+/* Options for the latest rhel7 q35 machine type */ -+static void pc_q35_machine_rhel7_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pcmc->default_nic_model = "e1000e"; -+ m->family = "pc_q35_Z"; -+ m->default_machine_opts = "firmware=bios-256k.bin"; -+ m->default_display = "std"; -+ m->no_floppy = 1; -+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_SYS_BUS_DEVICE); -+ m->alias = "q35"; -+ m->max_cpus = 384; -+ SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); -+} -+ -+static void pc_q35_init_rhel760(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel760_options(MachineClass *m) -+{ -+ pc_q35_machine_rhel7_options(m); -+ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, -+ pc_q35_machine_rhel760_options); -+ -+static void pc_q35_init_rhel750(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel750_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel760_options(m); -+ m->alias = NULL; -+ m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; -+ m->auto_enable_numa_with_memhp = false; -+ pcmc->default_nic_model = "e1000"; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_5_COMPAT); -+} -+ -+DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, -+ pc_q35_machine_rhel750_options); -+ -+static void pc_q35_init_rhel740(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel740_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel750_options(m); -+ m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; -+ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; -+ pcmc->pc_rom_ro = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_4_COMPAT); -+} -+ -+DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, -+ pc_q35_machine_rhel740_options); -+ -+static void pc_q35_init_rhel730(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel730_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel740_options(m); -+ m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; -+ m->max_cpus = 255; -+ pcmc->linuxboot_dma_enabled = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_3_COMPAT); -+} -+ -+DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, -+ pc_q35_machine_rhel730_options); -diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 742cd0a..7d568da 100644 ---- a/hw/net/e1000.c -+++ b/hw/net/e1000.c -@@ -1663,6 +1663,16 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) - - pci_conf = pci_dev->config; - -+ if (!(d->compat_flags & E1000_FLAG_AUTONEG)) { -+ /* -+ * We have no capabilities, so capability list bit should normally be 0. -+ * Keep it on for compat machine types to avoid breaking migration. -+ * HACK: abuse E1000_FLAG_AUTONEG, which is off exactly for -+ * the machine types that need this. -+ */ -+ pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_CAP_LIST); -+ } -+ - /* TODO: RST# value should be 0, PCI spec 6.2.4 */ - pci_conf[PCI_CACHE_LINE_SIZE] = 0x10; - -@@ -1763,7 +1773,7 @@ static const TypeInfo e1000_base_info = { - - static const E1000Info e1000_devices[] = { - { -- .name = "e1000", -+ .name = "e1000-82540em", - .device_id = E1000_DEV_ID_82540EM, - .revision = 0x03, - .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, -@@ -1784,6 +1794,11 @@ static const E1000Info e1000_devices[] = { - #endif - }; - -+static const TypeInfo e1000_default_info = { -+ .name = "e1000", -+ .parent = "e1000-82540em", -+}; -+ - static void e1000_register_types(void) - { - int i; -@@ -1801,6 +1816,7 @@ static void e1000_register_types(void) - - type_register(&type_info); - } -+ type_register_static(&e1000_default_info); - } - - type_init(e1000_register_types) -diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index 510ddb3..f1de9e5 100644 ---- a/hw/net/e1000e.c -+++ b/hw/net/e1000e.c -@@ -75,6 +75,11 @@ typedef struct E1000EState { - - E1000ECore core; - -+ /* 7.3 had the intr_state field that was in the original e1000e code -+ * but that was removed prior to 2.7's release -+ */ -+ bool redhat_7_3_intr_state_enable; -+ uint32_t redhat_7_3_intr_state; - } E1000EState; - - #define E1000E_MMIO_IDX 0 -@@ -90,6 +95,10 @@ typedef struct E1000EState { - #define E1000E_MSIX_TABLE (0x0000) - #define E1000E_MSIX_PBA (0x2000) - -+/* Values as in RHEL 7.3 build and original upstream */ -+#define RH_E1000E_USE_MSI BIT(0) -+#define RH_E1000E_USE_MSIX BIT(1) -+ - static uint64_t - e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) - { -@@ -301,6 +310,8 @@ e1000e_init_msix(E1000EState *s) - } else { - if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { - msix_uninit(d, &s->msix, &s->msix); -+ } else { -+ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; - } - } - } -@@ -472,6 +483,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) - ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); - if (ret) { - trace_e1000e_msi_init_fail(ret); -+ } else { -+ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; - } - - if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, -@@ -595,6 +608,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { - VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ - e1000e_vmstate_intr_timer, E1000IntrDelayTimer) - -+static bool rhel_7_3_check(void *opaque, int version_id) -+{ -+ return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; -+} -+ - static const VMStateDescription e1000e_vmstate = { - .name = "e1000e", - .version_id = 1, -@@ -606,6 +624,7 @@ static const VMStateDescription e1000e_vmstate = { - VMSTATE_MSIX(parent_obj, E1000EState), - - VMSTATE_UINT32(ioaddr, E1000EState), -+ VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), - VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), - VMSTATE_UINT8(core.rx_desc_len, E1000EState), - VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, -@@ -654,6 +673,8 @@ static PropertyInfo e1000e_prop_disable_vnet, - - static Property e1000e_properties[] = { - DEFINE_NIC_PROPERTIES(E1000EState, conf), -+ DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, -+ redhat_7_3_intr_state_enable, false), - DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, - e1000e_prop_disable_vnet, bool), - DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, -diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 46daa16..05453e7 100644 ---- a/hw/net/rtl8139.c -+++ b/hw/net/rtl8139.c -@@ -3174,7 +3174,7 @@ static int rtl8139_pre_save(void *opaque) - - static const VMStateDescription vmstate_rtl8139 = { - .name = "rtl8139", -- .version_id = 5, -+ .version_id = 4, - .minimum_version_id = 3, - .post_load = rtl8139_post_load, - .pre_save = rtl8139_pre_save, -@@ -3255,7 +3255,9 @@ static const VMStateDescription vmstate_rtl8139 = { - VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), - VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), - VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), -+#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ - VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), -+#endif - VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), - VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 2f8c304..b8bdb69 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4009,6 +4009,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; - smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ - spapr_caps_add_properties(smc, &error_abort); -+ smc->has_power9_support = true; - } - - static const TypeInfo spapr_machine_info = { -@@ -4059,6 +4060,7 @@ static const TypeInfo spapr_machine_info = { - } \ - type_init(spapr_machine_register_##suffix) - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* - * pseries-3.0 - */ -@@ -4248,6 +4250,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); - .property = "pre-2.8-migration", \ - .value = "on", \ - }, -+#endif - - static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, -@@ -4298,6 +4301,7 @@ static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, - */ - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void spapr_machine_2_7_instance_options(MachineState *machine) - { - sPAPRMachineState *spapr = SPAPR_MACHINE(machine); -@@ -4457,6 +4461,254 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) - SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_1); - } - DEFINE_SPAPR_MACHINE(2_1, "2.1", false); -+#endif -+ -+/* -+ * pseries-rhel7.6.0 -+ */ -+ -+static void spapr_machine_rhel760_instance_options(MachineState *machine) -+{ -+} -+ -+static void spapr_machine_rhel760_class_options(MachineClass *mc) -+{ -+ /* Defaults for the latest behaviour inherited from the base class */ -+} -+ -+DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", true); -+ -+/* -+ * pseries-rhel7.6.0-sxxm -+ * -+ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default -+ */ -+static void spapr_machine_rhel760sxxm_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel760_instance_options(machine); -+} -+ -+static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) -+{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel760_class_options(mc); -+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); -+ -+/* -+ * pseries-rhel7.5.0 -+ * like SPAPR_COMPAT_2_11 and SPAPR_COMPAT_2_10 -+ * SPAPR_CAP_HTM already enabled in 7.4 -+ * -+ */ -+#define SPAPR_COMPAT_RHEL7_5 \ -+ HW_COMPAT_RHEL7_5 \ -+ -+static void spapr_machine_rhel750_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel760_instance_options(machine); -+} -+ -+static void spapr_machine_rhel750_class_options(MachineClass *mc) -+{ -+ spapr_machine_rhel760_class_options(mc); -+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_5); -+} -+ -+DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); -+ -+/* -+ * pseries-rhel7.5.0-sxxm -+ * -+ * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default -+ */ -+static void spapr_machine_rhel750sxxm_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel750_instance_options(machine); -+} -+ -+static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) -+{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel750_class_options(mc); -+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); -+ -+/* -+ * pseries-rhel7.4.0 -+ * like SPAPR_COMPAT_2_9 -+ */ -+ -+#define SPAPR_COMPAT_RHEL7_4 \ -+ HW_COMPAT_RHEL7_4 \ -+ { \ -+ .driver = TYPE_POWERPC_CPU, \ -+ .property = "pre-2.10-migration", \ -+ .value = "on", \ -+ }, \ -+ -+static void spapr_machine_rhel740_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel750_instance_options(machine); -+} -+ -+static void spapr_machine_rhel740_class_options(MachineClass *mc) -+{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel750_class_options(mc); -+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_4); -+ mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; -+ smc->has_power9_support = false; -+ smc->pre_2_10_has_unused_icps = true; -+ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; -+ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); -+ -+/* -+ * pseries-rhel7.4.0-sxxm -+ * -+ * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default -+ */ -+static void spapr_machine_rhel740sxxm_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel740_instance_options(machine); -+} -+ -+static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) -+{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel740_class_options(mc); -+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); -+ -+/* -+ * pseries-rhel7.3.0 -+ * like SPAPR_COMPAT_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 -+ */ -+#define SPAPR_COMPAT_RHEL7_3 \ -+ HW_COMPAT_RHEL7_3 \ -+ { \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ -+ .property = "mem_win_size", \ -+ .value = stringify(SPAPR_PCI_2_7_MMIO_WIN_SIZE),\ -+ }, \ -+ { \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ -+ .property = "mem64_win_size", \ -+ .value = "0", \ -+ }, \ -+ { \ -+ .driver = TYPE_POWERPC_CPU, \ -+ .property = "pre-2.8-migration", \ -+ .value = "on", \ -+ }, \ -+ { \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ -+ .property = "pre-2.8-migration", \ -+ .value = "on", \ -+ }, \ -+ { \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ -+ .property = "pcie-extended-configuration-space",\ -+ .value = "off", \ -+ }, -+ -+static void spapr_machine_rhel730_instance_options(MachineState *machine) -+{ -+ sPAPRMachineState *spapr = SPAPR_MACHINE(machine); -+ -+ spapr_machine_rhel740_instance_options(machine); -+ spapr->use_hotplug_event_source = false; -+} -+ -+static void spapr_machine_rhel730_class_options(MachineClass *mc) -+{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel740_class_options(mc); -+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); -+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_3); -+ smc->phb_placement = phb_placement_2_7; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); -+ -+/* -+ * pseries-rhel7.3.0-sxxm -+ * -+ * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default -+ */ -+static void spapr_machine_rhel730sxxm_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel730_instance_options(machine); -+} -+ -+static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) -+{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel730_class_options(mc); -+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); -+ -+/* -+ * pseries-rhel7.2.0 -+ */ -+/* Should be like SPAPR_COMPAT_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" -+ * has been backported to RHEL7_2 so we don't need it here. -+ */ -+ -+#define SPAPR_COMPAT_RHEL7_2 \ -+ HW_COMPAT_RHEL7_2 \ -+ { \ -+ .driver = "spapr-vlan", \ -+ .property = "use-rx-buffer-pools", \ -+ .value = "off", \ -+ },{ \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\ -+ .property = "ddw",\ -+ .value = stringify(off),\ -+ }, -+ -+ -+static void spapr_machine_rhel720_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel730_instance_options(machine); -+} -+ -+static void spapr_machine_rhel720_class_options(MachineClass *mc) -+{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel730_class_options(mc); -+ smc->use_ohci_by_default = true; -+ mc->has_hotpluggable_cpus = NULL; -+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_2); -+} -+ -+DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); - - static void spapr_machine_register_types(void) - { -diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index fb29eec..a081b01 100644 ---- a/hw/ppc/spapr_cpu_core.c -+++ b/hw/ppc/spapr_cpu_core.c -@@ -21,6 +21,7 @@ - #include "sysemu/numa.h" - #include "sysemu/hw_accel.h" - #include "qemu/error-report.h" -+#include "cpu-models.h" - - static void spapr_cpu_reset(void *opaque) - { -@@ -212,6 +213,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, - { - CPUPPCState *env = &cpu->env; - Error *local_err = NULL; -+ sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - - object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); - if (local_err) { -@@ -224,6 +226,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, - cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); - kvmppc_set_papr(cpu); - -+ if (!smc->has_power9_support && -+ (((spapr->max_compat_pvr && -+ ppc_compat_cmp(spapr->max_compat_pvr, -+ CPU_POWERPC_LOGICAL_3_00) >= 0)) || -+ (!spapr->max_compat_pvr && -+ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { -+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, -+ "POWER9 CPU is not supported by this machine class"); -+ return; -+ } -+ - qemu_register_reset(spapr_cpu_reset, cpu); - spapr_cpu_reset(cpu); - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 7983185..0f135c9 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -649,7 +649,7 @@ bool css_migration_enabled(void) - { \ - MachineClass *mc = MACHINE_CLASS(oc); \ - ccw_machine_##suffix##_class_options(mc); \ -- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ -+ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ - if (latest) { \ - mc->alias = "s390-ccw-virtio"; \ - mc->is_default = 1; \ -@@ -676,6 +676,8 @@ bool css_migration_enabled(void) - #define CCW_COMPAT_2_12 \ - HW_COMPAT_2_12 - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ -+ - #define CCW_COMPAT_2_11 \ - HW_COMPAT_2_11 \ - {\ -@@ -898,6 +900,48 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) - } - DEFINE_CCW_MACHINE(2_4, "2.4", false); - -+#else -+ -+/* -+ * like CCW_COMPAT_2_11, but includes HW_COMPAT_RHEL7_5 (derived from -+ * HW_COMPAT_2_11 and HW_COMPAT_2_10) instead of HW_COMPAT_2_11 -+ */ -+#define CCW_COMPAT_RHEL7_5 \ -+ HW_COMPAT_RHEL7_5 \ -+ {\ -+ .driver = TYPE_SCLP_EVENT_FACILITY,\ -+ .property = "allow_all_mask_sizes",\ -+ .value = "off",\ -+ }, -+ -+static void ccw_machine_rhel760_instance_options(MachineState *machine) -+{ -+} -+ -+static void ccw_machine_rhel760_class_options(MachineClass *mc) -+{ -+} -+DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", true); -+ -+static void ccw_machine_rhel750_instance_options(MachineState *machine) -+{ -+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; -+ ccw_machine_rhel760_instance_options(machine); -+ -+ /* before 2.12 we emulated the very first z900, and RHEL 7.5 is -+ based on 2.10 */ -+ s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); -+} -+ -+static void ccw_machine_rhel750_class_options(MachineClass *mc) -+{ -+ ccw_machine_rhel760_class_options(mc); -+ SET_MACHINE_COMPAT(mc, CCW_COMPAT_RHEL7_5); -+} -+DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); -+ -+#endif -+ - static void ccw_machine_register_types(void) - { - type_register_static(&ccw_machine_info); -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index a27e54b..144e6e9 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -775,6 +775,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type1.product, product); - SMBIOS_SET_DEFAULT(type1.version, version); -+ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); - SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type2.product, product); - SMBIOS_SET_DEFAULT(type2.version, version); -diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 6190b6f..ad2ad2d 100644 ---- a/hw/timer/i8254_common.c -+++ b/hw/timer/i8254_common.c -@@ -268,7 +268,7 @@ static const VMStateDescription vmstate_pit_common = { - .pre_save = pit_dispatch_pre_save, - .post_load = pit_dispatch_post_load, - .fields = (VMStateField[]) { -- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), -+ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ - VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, - vmstate_pit_channel, PITChannelState), - VMSTATE_INT64(channels[0].next_transition_time, -diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c -index 6f1f723..68c353f 100644 ---- a/hw/timer/mc146818rtc.c -+++ b/hw/timer/mc146818rtc.c -@@ -34,6 +34,7 @@ - #include "qapi/qapi-commands-misc.h" - #include "qapi/qapi-events-misc.h" - #include "qapi/visitor.h" -+#include "migration/migration.h" - - #ifdef TARGET_I386 - #include "hw/i386/apic.h" -@@ -839,6 +840,11 @@ static int rtc_post_load(void *opaque, int version_id) - static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) - { - RTCState *s = (RTCState *)opaque; -+ -+ if (migrate_pre_2_2) { -+ return false; -+ } -+ - return s->irq_reinject_on_ack_count != 0; - } - -diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 836b11f..9d7b9df 100644 ---- a/hw/usb/hcd-uhci.c -+++ b/hw/usb/hcd-uhci.c -@@ -1214,12 +1214,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) - UHCIState *s = UHCI(dev); - uint8_t *pci_conf = s->dev.config; - int i; -+ int irq_pin; - - pci_conf[PCI_CLASS_PROG] = 0x00; - /* TODO: reset value should be 0. */ - pci_conf[USB_SBRN] = USB_RELEASE_1; // release number - -- pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); -+ irq_pin = u->info.irq_pin; -+ pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); - - if (s->masterbus) { - USBPort *ports[NB_PORTS]; -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 8f1a01a..ca19474 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -3560,9 +3560,27 @@ static const VMStateDescription vmstate_xhci_slot = { - } - }; - -+static int xhci_event_pre_save(void *opaque) -+{ -+ XHCIEvent *s = opaque; -+ -+ s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; -+ s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; -+ -+ return 0; -+} -+ -+bool migrate_cve_2014_5263_xhci_fields; -+ -+static bool xhci_event_cve_2014_5263(void *opaque, int version_id) -+{ -+ return migrate_cve_2014_5263_xhci_fields; -+} -+ - static const VMStateDescription vmstate_xhci_event = { - .name = "xhci-event", - .version_id = 1, -+ .pre_save = xhci_event_pre_save, - .fields = (VMStateField[]) { - VMSTATE_UINT32(type, XHCIEvent), - VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3571,6 +3589,8 @@ static const VMStateDescription vmstate_xhci_event = { - VMSTATE_UINT32(flags, XHCIEvent), - VMSTATE_UINT8(slotid, XHCIEvent), - VMSTATE_UINT8(epid, XHCIEvent), -+ VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), -+ VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), - VMSTATE_END_OF_LIST() - } - }; -diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index fc36a4c..89d4cf7 100644 ---- a/hw/usb/hcd-xhci.h -+++ b/hw/usb/hcd-xhci.h -@@ -153,6 +153,8 @@ typedef struct XHCIEvent { - uint32_t flags; - uint8_t slotid; - uint8_t epid; -+ uint8_t cve_2014_5263_a; -+ uint8_t cve_2014_5263_b; - } XHCIEvent; - - typedef struct XHCIInterrupter { -diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 59aeb06..7b5cc25 100644 ---- a/include/hw/acpi/ich9.h -+++ b/include/hw/acpi/ich9.h -@@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { - uint8_t smm_enabled; - bool enable_tco; - TCOIORegs tco_regs; -+ -+ /* RH addition, see bz 1489800 */ -+ bool force_rev1_fadt; - } ICH9LPCPMRegs; - - #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 9a870cc..2293315 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -128,6 +128,7 @@ typedef struct { - - #define VIRT_ECAM_ID(high) (high ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM) - -+#if 0 /* disabled for Red Hat Enterprise Linux */ - #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") - #define VIRT_MACHINE(obj) \ - OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -136,6 +137,27 @@ typedef struct { - #define VIRT_MACHINE_CLASS(klass) \ - OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) - -+#else -+#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") -+#define VIRT_MACHINE(obj) \ -+ OBJECT_CHECK(VirtMachineState, (obj), TYPE_RHEL_MACHINE) -+#define VIRT_MACHINE_GET_CLASS(obj) \ -+ OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_RHEL_MACHINE) -+#define VIRT_MACHINE_CLASS(klass) \ -+ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_RHEL_MACHINE) -+#endif -+ -+/* This macro is for changes to properties that are RHEL specific, -+ * different to the current upstream and to be applied to the latest -+ * machine type. -+ */ -+#define ARM_RHEL_COMPAT \ -+ {\ -+ .driver = "virtio-net-pci",\ -+ .property = "romfile",\ -+ .value = "",\ -+ }, -+ - void virt_acpi_setup(VirtMachineState *vms); - - /* Return the number of used redistributor regions */ -diff --git a/include/hw/compat.h b/include/hw/compat.h -index c08f404..22262c7 100644 ---- a/include/hw/compat.h -+++ b/include/hw/compat.h -@@ -282,4 +282,233 @@ - .value = "on",\ - }, - -+/* Mostly like HW_COMPAT_2_1 but: -+ * we don't need virtio-scsi-pci since 7.0 already had that on -+ * -+ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 -+ * machine type, but was accidentally turned off in 7.2 onwards. -+ * -+ */ -+#define HW_COMPAT_RHEL7_1 \ -+ { /* COMPAT_RHEL7.1 */ \ -+ .driver = "intel-hda-generic",\ -+ .property = "old_msi_addr",\ -+ .value = "on",\ -+ },{\ -+ .driver = "VGA",\ -+ .property = "qemu-extended-regs",\ -+ .value = "off",\ -+ },{\ -+ .driver = "secondary-vga",\ -+ .property = "qemu-extended-regs",\ -+ .value = "off",\ -+ },{\ -+ .driver = "usb-mouse",\ -+ .property = "usb_version",\ -+ .value = stringify(1),\ -+ },{\ -+ .driver = "usb-kbd",\ -+ .property = "usb_version",\ -+ .value = stringify(1),\ -+ },{\ -+ .driver = "virtio-pci",\ -+ .property = "virtio-pci-bus-master-bug-migration",\ -+ .value = "on",\ -+ },{\ -+ .driver = "virtio-blk-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-balloon-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-serial-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-9p-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-rng-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ \ -+ .driver = "migration",\ -+ .property = "send-configuration",\ -+ .value = "off",\ -+ }, -+ -+/* Mostly like HW_COMPAT_2_4 + 2_3 but: -+ * we don't need "any_layout" as it has been backported to 7.2 -+ */ -+ -+#define HW_COMPAT_RHEL7_2 \ -+ {\ -+ .driver = "virtio-blk-device",\ -+ .property = "scsi",\ -+ .value = "true",\ -+ },{\ -+ .driver = "e1000-82540em",\ -+ .property = "extra_mac_registers",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-pci",\ -+ .property = "x-disable-pcie",\ -+ .value = "on",\ -+ },{\ -+ .driver = "virtio-pci",\ -+ .property = "migrate-extra",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "fw_cfg_mem",\ -+ .property = "dma_enabled",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "fw_cfg_io",\ -+ .property = "dma_enabled",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "isa-fdc",\ -+ .property = "fallback",\ -+ .value = "144",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "virtio-pci",\ -+ .property = "disable-modern",\ -+ .value = "on",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "virtio-pci",\ -+ .property = "disable-legacy",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = TYPE_PCI_DEVICE,\ -+ .property = "x-pcie-lnksta-dllla",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "virtio-pci",\ -+ .property = "page-per-vq",\ -+ .value = "on",\ -+ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ \ -+ .driver = "migration",\ -+ .property = "send-section-footer",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ \ -+ .driver = "migration",\ -+ .property = "store-global-state",\ -+ .value = "off",\ -+ }, -+ -+/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except -+ * disable-modern, disable-legacy, page-per-vq have already been -+ * backported to RHEL7.3 -+ */ -+#define HW_COMPAT_RHEL7_3 \ -+ { /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-mmio",\ -+ .property = "format_transport_address",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-serial-device",\ -+ .property = "emergency-write",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "ioapic",\ -+ .property = "version",\ -+ .value = "0x11",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "intel-iommu",\ -+ .property = "x-buggy-eim",\ -+ .value = "true",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-pci",\ -+ .property = "x-ignore-backend-features",\ -+ .value = "on",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "fw_cfg_mem",\ -+ .property = "x-file-slots",\ -+ .value = stringify(0x10),\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "fw_cfg_io",\ -+ .property = "x-file-slots",\ -+ .value = stringify(0x10),\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "pflash_cfi01",\ -+ .property = "old-multiple-chip-handling",\ -+ .value = "on",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = TYPE_PCI_DEVICE,\ -+ .property = "x-pcie-extcap-init",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-pci",\ -+ .property = "x-pcie-deverr-init",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-pci",\ -+ .property = "x-pcie-lnkctl-init",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-pci",\ -+ .property = "x-pcie-pm-init",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-net-device",\ -+ .property = "x-mtu-bypass-backend",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "e1000e",\ -+ .property = "__redhat_e1000e_7_3_intr_state",\ -+ .value = "on",\ -+ }, -+ -+/* Mostly like HW_COMPAT_2_9 except -+ * x-mtu-bypass-backend, x-migrate-msix has already been -+ * backported to RHEL7.4. shpc was already on in 7.4. -+ */ -+#define HW_COMPAT_RHEL7_4 \ -+ { /* HW_COMPAT_RHEL7_4 */ \ -+ .driver = "intel-iommu",\ -+ .property = "pt",\ -+ .value = "off",\ -+ }, -+ -+/* The same as HW_COMPAT_2_11 + HW_COMPAT_2_10 */ -+#define HW_COMPAT_RHEL7_5 \ -+ { /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ -+ .driver = "hpet",\ -+ .property = "hpet-offset-saved",\ -+ .value = "false",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ -+ .driver = "virtio-blk-pci",\ -+ .property = "vectors",\ -+ .value = "2",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ -+ .driver = "vhost-user-blk-pci",\ -+ .property = "vectors",\ -+ .value = "2",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 but \ -+ bz 1608778 modified for our naming */ \ -+ .driver = "e1000-82540em",\ -+ .property = "migrate_tso_props",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ \ -+ .driver = "virtio-mouse-device",\ -+ .property = "wheel-axis",\ -+ .value = "false",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ \ -+ .driver = "virtio-tablet-device",\ -+ .property = "wheel-axis",\ -+ .value = "false",\ -+ },{ /* HW_COMPAT_RHEL7_5 */ \ -+ .driver = "cirrus-vga",\ -+ .property = "vgamem_mb",\ -+ .value = "16",\ -+ },{ /* HW_COMPAT_RHEL7_5 */ \ -+ .driver = "migration",\ -+ .property = "decompress-error-check",\ -+ .value = "off",\ -+ }, -+ -+ - #endif /* HW_COMPAT_H */ -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 6894f37..ef82513 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -134,6 +134,9 @@ struct PCMachineClass { - - /* use DMA capable linuxboot option rom */ - bool linuxboot_dma_enabled; -+ -+ /* RH only, see bz 1489800 */ -+ bool pc_rom_ro; - }; - - #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -960,4 +963,565 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); - type_init(pc_machine_init_##suffix) - - extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); -+ -+/* See include/hw/compat.h for shared compatibility lists */ -+ -+/* This macro is for changes to properties that are RHEL specific, -+ * different to the current upstream and to be applied to the latest -+ * machine type. -+ */ -+#define PC_RHEL_COMPAT \ -+ { /* PC_RHEL_COMPAT */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "host-phys-bits",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL_COMPAT bz 1508330 */ \ -+ .driver = "vfio-pci",\ -+ .property = "x-no-geforce-quirks",\ -+ .value = "on",\ -+ }, -+ -+/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: -+ * - x-hv-max-vps was backported to 7.5 -+ * - x-pci-hole64-fix was backported to 7.5 -+ */ -+#define PC_RHEL7_5_COMPAT \ -+ HW_COMPAT_RHEL7_5 \ -+ { /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_11 */ \ -+ .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ -+ .property = "clflushopt",\ -+ .value = "off",\ -+ }, -+ -+ -+#define PC_RHEL7_4_COMPAT \ -+ HW_COMPAT_RHEL7_4 \ -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_9 */ \ -+ .driver = "mch",\ -+ .property = "extended-tseg-mbytes",\ -+ .value = stringify(0),\ -+ },\ -+ { /* PC_RHEL7_4_COMPAT bz 1489800 */ \ -+ .driver = "ICH9-LPC",\ -+ .property = "__com.redhat_force-rev1-fadt",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ -+ .driver = "i440FX-pcihost",\ -+ .property = "x-pci-hole64-fix",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ -+ .driver = "q35-pcihost",\ -+ .property = "x-pci-hole64-fix",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "x-hv-max-vps",\ -+ .value = "0x40",\ -+ }, -+ -+#define PC_RHEL7_3_COMPAT \ -+ HW_COMPAT_RHEL7_3 \ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ -+ .driver = "kvmclock",\ -+ .property = "x-mach-use-reliable-get-clock",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "l3-cache",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "full-cpuid-auto-level",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "family",\ -+ .value = "15",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "model",\ -+ .value = "6",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "stepping",\ -+ .value = "1",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = "isa-pcspk",\ -+ .property = "migrate",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_6 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "cpuid-0xb",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ -+ .driver = "ICH9-LPC",\ -+ .property = "x-smi-broadcast",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "vmware-cpuid-freq",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ -+ .driver = "Haswell-" TYPE_X86_CPU,\ -+ .property = "stepping",\ -+ .value = "1",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_3 added in 2.9 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "kvm-no-smi-migration",\ -+ .value = "on",\ -+ }, -+ -+#define PC_RHEL7_2_COMPAT \ -+ HW_COMPAT_RHEL7_2 \ -+ {\ -+ .driver = "phenom" "-" TYPE_X86_CPU,\ -+ .property = "rdtscp",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "qemu64" "-" TYPE_X86_CPU,\ -+ .property = "sse4a",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "qemu64" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "Haswell-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "Haswell-noTSX-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "Broadwell-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "Broadwell-noTSX-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "host" "-" TYPE_X86_CPU,\ -+ .property = "host-cache-info",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "check",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "qemu32" "-" TYPE_X86_CPU,\ -+ .property = "popcnt",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "arat",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "usb-redir",\ -+ .property = "streams",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "fill-mtrr-mask",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "apic-common",\ -+ .property = "legacy-instance-id",\ -+ .value = "on",\ -+ }, -+ -+ -+ -+#define PC_RHEL7_1_COMPAT \ -+ HW_COMPAT_RHEL7_1 \ -+ {\ -+ .driver = "kvm64" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "kvm32" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Conroe" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Penryn" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Nehalem" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Westmere" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "SandyBridge" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "SandyBridge-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Haswell" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Broadwell" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G1" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Haswell" "-" TYPE_X86_CPU,\ -+ .property = "f16c",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "f16c",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Haswell" "-" TYPE_X86_CPU,\ -+ .property = "rdrand",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "rdrand",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Broadwell" "-" TYPE_X86_CPU,\ -+ .property = "f16c",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "f16c",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Broadwell" "-" TYPE_X86_CPU,\ -+ .property = "rdrand",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "rdrand",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "coreduo" "-" TYPE_X86_CPU,\ -+ .property = "vmx",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "core2duo" "-" TYPE_X86_CPU,\ -+ .property = "vmx",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "qemu64" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(4),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "kvm64" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(5),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "pentium3" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(2),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "n270" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(5),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Conroe" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(4),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Penryn" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(4),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Nehalem" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(4),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "n270" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Penryn" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Conroe" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Nehalem" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Westmere" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "SandyBridge" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "IvyBridge" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Haswell" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Haswell-noTSX" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Broadwell" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ }, -+ -+/* -+ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine -+ * types as the PC_COMPAT_* do for upstream types. -+ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. -+ */ -+ -+/* -+ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* -+ * between our base and 1.5, less stuff backported to RHEL-7.0 -+ * (usb-device.msos-desc), less stuff for devices we changed -+ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, -+ * pci-serial-4x) in 7.0. -+ */ -+#define PC_RHEL7_0_COMPAT \ -+ {\ -+ .driver = "virtio-scsi-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "PIIX4_PM",\ -+ .property = "memory-hotplug-support",\ -+ .value = "off",\ -+ },{\ -+ .driver = "apic",\ -+ .property = "version",\ -+ .value = stringify(0x11),\ -+ },{\ -+ .driver = "nec-usb-xhci",\ -+ .property = "superspeed-ports-first",\ -+ .value = "off",\ -+ },{\ -+ .driver = "nec-usb-xhci",\ -+ .property = "force-pcie-endcap",\ -+ .value = "on",\ -+ },{\ -+ .driver = "pci-serial",\ -+ .property = "prog_if",\ -+ .value = stringify(0),\ -+ },{\ -+ .driver = "virtio-net-pci",\ -+ .property = "guest_announce",\ -+ .value = "off",\ -+ },{\ -+ .driver = "ICH9-LPC",\ -+ .property = "memory-hotplug-support",\ -+ .value = "off",\ -+ },{\ -+ .driver = "xio3130-downstream",\ -+ .property = COMPAT_PROP_PCP,\ -+ .value = "off",\ -+ },{\ -+ .driver = "ioh3420",\ -+ .property = COMPAT_PROP_PCP,\ -+ .value = "off",\ -+ },{\ -+ .driver = "PIIX4_PM",\ -+ .property = "acpi-pci-hotplug-with-bridge-support",\ -+ .value = "off",\ -+ },{\ -+ .driver = "e1000",\ -+ .property = "mitigation",\ -+ .value = "off",\ -+ },{ \ -+ .driver = "virtio-net-pci", \ -+ .property = "ctrl_guest_offloads", \ -+ .value = "off", \ -+ },\ -+ {\ -+ .driver = "Conroe" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Penryn" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Nehalem" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ \ -+ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Westmere" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ \ -+ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G1" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ }, - #endif -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 7e5de1a..330c370 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -101,6 +101,7 @@ struct sPAPRMachineClass { - bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ - bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ - bool pre_2_10_has_unused_icps; -+ bool has_power9_support; - void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, - hwaddr *mmio32, hwaddr *mmio64, -diff --git a/include/hw/usb.h b/include/hw/usb.h -index a5080ad..b943ec9 100644 ---- a/include/hw/usb.h -+++ b/include/hw/usb.h -@@ -606,4 +606,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, - uint8_t interface_class, uint8_t interface_subclass, - uint8_t interface_protocol); - -+ -+/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ -+extern bool migrate_cve_2014_5263_xhci_fields; -+ - #endif -diff --git a/migration/migration.c b/migration/migration.c -index b7d9854..381039c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -106,6 +106,8 @@ enum mig_rp_message_type { - MIG_RP_MSG_MAX - }; - -+bool migrate_pre_2_2; -+ - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add - dynamic creation of migration */ -diff --git a/migration/migration.h b/migration/migration.h -index 64a7b33..405d984 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -288,6 +288,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); - - void dirty_bitmap_mig_before_vm_start(void); - void init_dirty_bitmap_incoming_migration(void); -+/* -+ * Disables a load of subsections that were added in 2.2/rh7.2 for backwards -+ * migration compatibility. -+ */ -+extern bool migrate_pre_2_2; - - #define qemu_ram_foreach_block \ - #warning "Use qemu_ram_foreach_block_migratable in migration code" -diff --git a/qdev-monitor.c b/qdev-monitor.c -index 61e0300..f439b83 100644 ---- a/qdev-monitor.c -+++ b/qdev-monitor.c -@@ -47,7 +47,6 @@ typedef struct QDevAlias - - /* Please keep this table sorted by typename. */ - static const QDevAlias qdev_alias_table[] = { -- { "e1000", "e1000-82540em" }, - { "ich9-ahci", "ahci" }, - { "lsi53c895a", "lsi" }, - { "virtio-9p-ccw", "virtio-9p", QEMU_ARCH_S390X }, -diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py -index d346728..4bca2bf 100755 ---- a/scripts/vmstate-static-checker.py -+++ b/scripts/vmstate-static-checker.py -@@ -105,7 +105,6 @@ def get_changed_sec_name(sec): - # Section names can change -- see commit 292b1634 for an example. - changes = { - "ICH9 LPC": "ICH9-LPC", -- "e1000-82540em": "e1000", - } - - for item in changes: -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 338ee37..051018a 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1360,11 +1360,17 @@ static CPUCaches epyc_cache_info = { - - static X86CPUDefinition builtin_x86_defs[] = { - { -+ /* qemu64 is the default CPU model for all *-rhel7.* machine-types. -+ * The default on RHEL-6 was cpu64-rhel6. -+ * libvirt assumes that qemu64 is the default for _all_ machine-types, -+ * so we should try to keep qemu64 and cpu64-rhel6 as similar as -+ * possible. -+ */ - .name = "qemu64", - .level = 0xd, - .vendor = CPUID_VENDOR_AMD, - .family = 6, -- .model = 6, -+ .model = 13, - .stepping = 3, - .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | - CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -2684,6 +2690,7 @@ static PropValue kvm_default_props[] = { - { "acpi", "off" }, - { "monitor", "off" }, - { "svm", "off" }, -+ { "kvm-pv-unhalt", "on" }, - { NULL, NULL }, - }; - -diff --git a/target/i386/machine.c b/target/i386/machine.c -index 084c2c7..0c57c26 100644 ---- a/target/i386/machine.c -+++ b/target/i386/machine.c -@@ -955,6 +955,26 @@ static const VMStateDescription vmstate_svm_npt = { - } - }; - -+static bool vmstate_xsave_needed(void *opaque) -+{ -+ /* The xsave state is already on the main "cpu" section */ -+ return false; -+} -+ -+static const VMStateDescription vmstate_xsave ={ -+ .name = "cpu/xsave", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .minimum_version_id_old = 1, -+ .needed = vmstate_xsave_needed, -+ .fields = (VMStateField []) { -+ VMSTATE_UINT64_V(env.xcr0, X86CPU, 1), -+ VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 1), -+ VMSTATE_YMMH_REGS_VARS(env.xmm_regs, X86CPU, CPU_NB_REGS, 1), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ - VMStateDescription vmstate_x86_cpu = { - .name = "cpu", - .version_id = 12, -@@ -1080,6 +1100,7 @@ VMStateDescription vmstate_x86_cpu = { - &vmstate_msr_intel_pt, - &vmstate_msr_virt_ssbd, - &vmstate_svm_npt, -+ &vmstate_xsave, - NULL - } - }; -diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7de4bf3..3e2e353 100644 ---- a/target/ppc/compat.c -+++ b/target/ppc/compat.c -@@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) - return NULL; - } - -+long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) -+{ -+ const CompatInfo *compat1 = compat_by_pvr(pvr1); -+ const CompatInfo *compat2 = compat_by_pvr(pvr2); -+ -+ g_assert(compat1); -+ g_assert(compat2); -+ -+ return compat1 - compat2; -+} -+ - static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, -- uint32_t min_compat_pvr, uint32_t max_compat_pvr) -+ uint32_t min_compat_pvr, uint32_t max_compat_pvr) - { - const CompatInfo *compat = compat_by_pvr(compat_pvr); - const CompatInfo *min = compat_by_pvr(min_compat_pvr); -diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 4edcf62..532f0d5 100644 ---- a/target/ppc/cpu.h -+++ b/target/ppc/cpu.h -@@ -1365,6 +1365,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) - - /* Compatibility modes */ - #if defined(TARGET_PPC64) -+long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); - bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, - uint32_t min_compat_pvr, uint32_t max_compat_pvr); - bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -diff --git a/tests/Makefile.include b/tests/Makefile.include -index a492827..6016df2 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -184,8 +184,8 @@ gcov-files-generic-y = qdev-monitor.c qmp.c - check-qtest-generic-y += tests/cdrom-test$(EXESUF) - - gcov-files-ipack-y += hw/ipack/ipack.c --check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF) --gcov-files-ipack-y += hw/char/ipoctal232.c -+#check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF) -+#gcov-files-ipack-y += hw/char/ipoctal232.c - - check-qtest-virtioserial-y += tests/virtio-console-test$(EXESUF) - gcov-files-virtioserial-y += hw/char/virtio-console.c -@@ -217,23 +217,23 @@ check-qtest-pci-y += tests/e1000e-test$(EXESUF) - gcov-files-pci-y += hw/net/e1000e.c hw/net/e1000e_core.c - check-qtest-pci-y += tests/rtl8139-test$(EXESUF) - gcov-files-pci-y += hw/net/rtl8139.c --check-qtest-pci-y += tests/pcnet-test$(EXESUF) --gcov-files-pci-y += hw/net/pcnet.c --gcov-files-pci-y += hw/net/pcnet-pci.c --check-qtest-pci-y += tests/eepro100-test$(EXESUF) --gcov-files-pci-y += hw/net/eepro100.c --check-qtest-pci-y += tests/ne2000-test$(EXESUF) --gcov-files-pci-y += hw/net/ne2000.c --check-qtest-pci-y += tests/nvme-test$(EXESUF) --gcov-files-pci-y += hw/block/nvme.c -+#check-qtest-pci-y += tests/pcnet-test$(EXESUF) -+#gcov-files-pci-y += hw/net/pcnet.c -+#gcov-files-pci-y += hw/net/pcnet-pci.c -+#check-qtest-pci-y += tests/eepro100-test$(EXESUF) -+#gcov-files-pci-y += hw/net/eepro100.c -+#check-qtest-pci-y += tests/ne2000-test$(EXESUF) -+#gcov-files-pci-y += hw/net/ne2000.c -+#check-qtest-pci-y += tests/nvme-test$(EXESUF) -+#gcov-files-pci-y += hw/block/nvme.c - check-qtest-pci-y += tests/ac97-test$(EXESUF) - gcov-files-pci-y += hw/audio/ac97.c --check-qtest-pci-y += tests/es1370-test$(EXESUF) --gcov-files-pci-y += hw/audio/es1370.c -+#check-qtest-pci-y += tests/es1370-test$(EXESUF) -+#gcov-files-pci-y += hw/audio/es1370.c - check-qtest-pci-y += $(check-qtest-virtio-y) - gcov-files-pci-y += $(gcov-files-virtio-y) hw/virtio/virtio-pci.c --check-qtest-pci-y += tests/tpci200-test$(EXESUF) --gcov-files-pci-y += hw/ipack/tpci200.c -+#check-qtest-pci-y += tests/tpci200-test$(EXESUF) -+#gcov-files-pci-y += hw/ipack/tpci200.c - check-qtest-pci-y += $(check-qtest-ipack-y) - gcov-files-pci-y += $(gcov-files-ipack-y) - check-qtest-pci-y += tests/display-vga-test$(EXESUF) -@@ -245,25 +245,25 @@ gcov-files-pci-y += hw/display/virtio-gpu-pci.c - gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c - check-qtest-pci-y += tests/intel-hda-test$(EXESUF) - gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c --check-qtest-pci-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) --gcov-files-pci-y += hw/misc/ivshmem.c --check-qtest-pci-y += tests/megasas-test$(EXESUF) --gcov-files-pci-y += hw/scsi/megasas.c -+#check-qtest-pci-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) -+#gcov-files-pci-y += hw/misc/ivshmem.c -+#check-qtest-pci-y += tests/megasas-test$(EXESUF) -+#gcov-files-pci-y += hw/scsi/megasas.c - - check-qtest-i386-y = tests/endianness-test$(EXESUF) --check-qtest-i386-y += tests/fdc-test$(EXESUF) --gcov-files-i386-y = hw/block/fdc.c -+#check-qtest-i386-y += tests/fdc-test$(EXESUF) -+#gcov-files-i386-y = hw/block/fdc.c - check-qtest-i386-y += tests/ide-test$(EXESUF) - check-qtest-i386-y += tests/ahci-test$(EXESUF) - check-qtest-i386-y += tests/hd-geo-test$(EXESUF) - gcov-files-i386-y += hw/block/hd-geometry.c - check-qtest-i386-y += tests/boot-order-test$(EXESUF) --check-qtest-i386-y += tests/bios-tables-test$(EXESUF) -+#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) - check-qtest-i386-y += tests/boot-serial-test$(EXESUF) - check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) - check-qtest-i386-y += tests/rtc-test$(EXESUF) --check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) --check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) -+#check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) -+#check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) - check-qtest-i386-y += tests/i440fx-test$(EXESUF) - check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) - check-qtest-i386-y += tests/drive_del-test$(EXESUF) -@@ -272,8 +272,8 @@ check-qtest-i386-y += tests/tco-test$(EXESUF) - gcov-files-i386-y += hw/watchdog/watchdog.c hw/watchdog/wdt_ib700.c - check-qtest-i386-y += $(check-qtest-pci-y) - gcov-files-i386-y += $(gcov-files-pci-y) --check-qtest-i386-y += tests/vmxnet3-test$(EXESUF) --gcov-files-i386-y += hw/net/vmxnet3.c -+#check-qtest-i386-y += tests/vmxnet3-test$(EXESUF) -+#gcov-files-i386-y += hw/net/vmxnet3.c - gcov-files-i386-y += hw/net/net_rx_pkt.c - gcov-files-i386-y += hw/net/net_tx_pkt.c - check-qtest-i386-y += tests/pvpanic-test$(EXESUF) -@@ -282,8 +282,8 @@ check-qtest-i386-y += tests/i82801b11-test$(EXESUF) - gcov-files-i386-y += hw/pci-bridge/i82801b11.c - check-qtest-i386-y += tests/ioh3420-test$(EXESUF) - gcov-files-i386-y += hw/pci-bridge/ioh3420.c --check-qtest-i386-y += tests/usb-hcd-ohci-test$(EXESUF) --gcov-files-i386-y += hw/usb/hcd-ohci.c -+#check-qtest-i386-y += tests/usb-hcd-ohci-test$(EXESUF) -+#gcov-files-i386-y += hw/usb/hcd-ohci.c - check-qtest-i386-y += tests/usb-hcd-uhci-test$(EXESUF) - gcov-files-i386-y += hw/usb/hcd-uhci.c - check-qtest-i386-y += tests/usb-hcd-ehci-test$(EXESUF) -@@ -311,7 +311,7 @@ check-qtest-i386-y += tests/migration-test$(EXESUF) - check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF) - check-qtest-i386-y += tests/numa-test$(EXESUF) - check-qtest-x86_64-y += $(check-qtest-i386-y) --check-qtest-x86_64-y += tests/sdhci-test$(EXESUF) -+#check-qtest-x86_64-y += tests/sdhci-test$(EXESUF) - gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c - gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) - -@@ -332,34 +332,34 @@ check-qtest-mips64el-y = tests/endianness-test$(EXESUF) - check-qtest-moxie-y = tests/boot-serial-test$(EXESUF) - - check-qtest-ppc-y = tests/endianness-test$(EXESUF) --check-qtest-ppc-y += tests/boot-order-test$(EXESUF) -+#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) - check-qtest-ppc-y += tests/prom-env-test$(EXESUF) - check-qtest-ppc-y += tests/drive_del-test$(EXESUF) - check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) --check-qtest-ppc-y += tests/m48t59-test$(EXESUF) --gcov-files-ppc-y += hw/timer/m48t59.c -+#check-qtest-ppc-y += tests/m48t59-test$(EXESUF) -+#gcov-files-ppc-y += hw/timer/m48t59.c - - check-qtest-ppc64-y = $(check-qtest-ppc-y) - gcov-files-ppc64-y = $(subst ppc-softmmu/,ppc64-softmmu/,$(gcov-files-ppc-y)) - check-qtest-ppc64-y += tests/spapr-phb-test$(EXESUF) - gcov-files-ppc64-y += ppc64-softmmu/hw/ppc/spapr_pci.c --check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) -+#check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) - check-qtest-ppc64-y += tests/migration-test$(EXESUF) - check-qtest-ppc64-y += tests/rtas-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) --check-qtest-ppc64-y += tests/usb-hcd-ohci-test$(EXESUF) --gcov-files-ppc64-y += hw/usb/hcd-ohci.c --check-qtest-ppc64-y += tests/usb-hcd-uhci-test$(EXESUF) --gcov-files-ppc64-y += hw/usb/hcd-uhci.c -+#check-qtest-ppc64-y += tests/usb-hcd-ohci-test$(EXESUF) -+#gcov-files-ppc64-y += hw/usb/hcd-ohci.c -+#check-qtest-ppc64-y += tests/usb-hcd-uhci-test$(EXESUF) -+#gcov-files-ppc64-y += hw/usb/hcd-uhci.c - check-qtest-ppc64-y += tests/usb-hcd-xhci-test$(EXESUF) - gcov-files-ppc64-y += hw/usb/hcd-xhci.c - check-qtest-ppc64-y += $(check-qtest-virtio-y) --check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) --check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) --check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) - check-qtest-ppc64-y += tests/display-vga-test$(EXESUF) - check-qtest-ppc64-y += tests/numa-test$(EXESUF) --check-qtest-ppc64-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) - check-qtest-ppc64-y += tests/cpu-plug-test$(EXESUF) - - check-qtest-sh4-y = tests/endianness-test$(EXESUF) -@@ -388,7 +388,7 @@ check-qtest-arm-y += tests/boot-serial-test$(EXESUF) - check-qtest-arm-y += tests/sdhci-test$(EXESUF) - - check-qtest-aarch64-y = tests/numa-test$(EXESUF) --check-qtest-aarch64-y += tests/sdhci-test$(EXESUF) -+#check-qtest-aarch64-y += tests/sdhci-test$(EXESUF) - check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF) - - check-qtest-microblazeel-y = $(check-qtest-microblaze-y) -@@ -777,15 +777,15 @@ tests/endianness-test$(EXESUF): tests/endianness-test.o - tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y) - tests/prom-env-test$(EXESUF): tests/prom-env-test.o $(libqos-obj-y) - tests/rtas-test$(EXESUF): tests/rtas-test.o $(libqos-spapr-obj-y) --tests/fdc-test$(EXESUF): tests/fdc-test.o -+#tests/fdc-test$(EXESUF): tests/fdc-test.o - tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y) - tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) --tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o --tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o -+#tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o -+#tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o - tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o - tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) - tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y) --tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ -+#tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ - tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y) - tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) - tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) -@@ -798,11 +798,11 @@ tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y) - tests/e1000-test$(EXESUF): tests/e1000-test.o - tests/e1000e-test$(EXESUF): tests/e1000e-test.o $(libqos-pc-obj-y) - tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y) --tests/pcnet-test$(EXESUF): tests/pcnet-test.o --tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o --tests/eepro100-test$(EXESUF): tests/eepro100-test.o --tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o --tests/ne2000-test$(EXESUF): tests/ne2000-test.o -+#tests/pcnet-test$(EXESUF): tests/pcnet-test.o -+#tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o -+#tests/eepro100-test$(EXESUF): tests/eepro100-test.o -+#tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o -+#tests/ne2000-test$(EXESUF): tests/ne2000-test.o - tests/wdt_ib700-test$(EXESUF): tests/wdt_ib700-test.o - tests/tco-test$(EXESUF): tests/tco-test.o $(libqos-pc-obj-y) - tests/virtio-balloon-test$(EXESUF): tests/virtio-balloon-test.o $(libqos-virtio-obj-y) -@@ -813,22 +813,22 @@ tests/virtio-scsi-test$(EXESUF): tests/virtio-scsi-test.o $(libqos-virtio-obj-y) - tests/virtio-9p-test$(EXESUF): tests/virtio-9p-test.o $(libqos-virtio-obj-y) - tests/virtio-serial-test$(EXESUF): tests/virtio-serial-test.o $(libqos-virtio-obj-y) - tests/virtio-console-test$(EXESUF): tests/virtio-console-test.o $(libqos-virtio-obj-y) --tests/tpci200-test$(EXESUF): tests/tpci200-test.o -+#tests/tpci200-test$(EXESUF): tests/tpci200-test.o - tests/display-vga-test$(EXESUF): tests/display-vga-test.o --tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o -+#tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o - tests/qom-test$(EXESUF): tests/qom-test.o - tests/test-hmp$(EXESUF): tests/test-hmp.o - tests/machine-none-test$(EXESUF): tests/machine-none-test.o - tests/drive_del-test$(EXESUF): tests/drive_del-test.o $(libqos-virtio-obj-y) - tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y) --tests/nvme-test$(EXESUF): tests/nvme-test.o -+#tests/nvme-test$(EXESUF): tests/nvme-test.o - tests/pvpanic-test$(EXESUF): tests/pvpanic-test.o - tests/i82801b11-test$(EXESUF): tests/i82801b11-test.o - tests/ac97-test$(EXESUF): tests/ac97-test.o --tests/es1370-test$(EXESUF): tests/es1370-test.o -+#tests/es1370-test$(EXESUF): tests/es1370-test.o - tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o - tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o --tests/usb-hcd-ohci-test$(EXESUF): tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y) -+#tests/usb-hcd-ohci-test$(EXESUF): tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y) - tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y) - tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y) - tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y) -@@ -841,19 +841,19 @@ tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_hel - tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) - tests/test-keyval$(EXESUF): tests/test-keyval.o $(test-util-obj-y) $(test-qapi-obj-y) - tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) --tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) --tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) --tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) -+#tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) -+#tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) -+#tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) - tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y) --tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) --tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) -+#tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) -+#tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) - tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o $(test-util-obj-y) libvhost-user.a - tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) - tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o - tests/test-qapi-util$(EXESUF): tests/test-qapi-util.o $(test-util-obj-y) - tests/numa-test$(EXESUF): tests/numa-test.o - tests/vmgenid-test$(EXESUF): tests/vmgenid-test.o tests/boot-sector.o tests/acpi-utils.o --tests/sdhci-test$(EXESUF): tests/sdhci-test.o $(libqos-pc-obj-y) -+#tests/sdhci-test$(EXESUF): tests/sdhci-test.o $(libqos-pc-obj-y) - tests/cdrom-test$(EXESUF): tests/cdrom-test.o tests/boot-sector.o $(libqos-obj-y) - - tests/migration/stress$(EXESUF): tests/migration/stress.o -diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index 952a2e7..5217a39 100644 ---- a/tests/boot-serial-test.c -+++ b/tests/boot-serial-test.c -@@ -80,17 +80,21 @@ static testdef_t tests[] = { - { "ppc", "g3beige", "", "PowerPC,750" }, - { "ppc", "mac99", "", "PowerPC,G4" }, - { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "ppce500", "", "U-Boot" }, - { "ppc64", "prep", "-boot e", "Booting from device e" }, - { "ppc64", "40p", "-m 192", "Memory size: 192 MB" }, - { "ppc64", "mac99", "", "PowerPC,970FX" }, -+#endif - { "ppc64", "pseries", "", "Open Firmware" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "powernv", "-cpu POWER8", "OPAL" }, - { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, -+#endif - { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "i386", "pc", "-device sga", "SGABIOS" }, - { "i386", "q35", "-device sga", "SGABIOS" }, -- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, -+ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "x86_64", "q35", "-device sga", "SGABIOS" }, - { "sparc", "LX", "", "TMS390S10" }, - { "sparc", "SS-4", "", "MB86904" }, -diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 5f39ba0..48b8d09 100644 ---- a/tests/cpu-plug-test.c -+++ b/tests/cpu-plug-test.c -@@ -192,7 +192,8 @@ static void add_pseries_test_case(const char *mname) - PlugTestData *data; - - if (!g_str_has_prefix(mname, "pseries-") || -- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { -+ (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7) || -+ strcmp(mname,"pseries-rhel7.2.0") == 0) { - return; - } - data = g_new(PlugTestData, 1); -diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index 0c5fcdc..b830432 100644 ---- a/tests/e1000-test.c -+++ b/tests/e1000-test.c -@@ -29,8 +29,10 @@ static void test_device(gconstpointer data) - static const char *models[] = { - "e1000", - "e1000-82540em", -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - "e1000-82544gc", - "e1000-82545em", -+#endif - }; - - int main(int argc, char **argv) -diff --git a/tests/endianness-test.c b/tests/endianness-test.c -index 546e096..440353d 100644 ---- a/tests/endianness-test.c -+++ b/tests/endianness-test.c -@@ -37,10 +37,12 @@ static const TestCase test_cases[] = { - { "ppc", "g3beige", 0xfe000000, .bswap = true, .superio = "i82378" }, - { "ppc", "prep", 0x80000000, .bswap = true }, - { "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" }, -+#if 0 /* Disabled for RHEL, since ISA is not enabled */ - { "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" }, - { "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" }, - { "ppc64", "pseries-2.7", 0x10080000000ULL, - .bswap = true, .superio = "i82378" }, -+#endif /* Disabled for RHEL, since ISA is not enabled */ - { "sh4", "r2d", 0xfe240000, .superio = "i82378" }, - { "sh4eb", "r2d", 0xfe240000, .bswap = true, .superio = "i82378" }, - { "sparc64", "sun4u", 0x1fe02000000LL, .bswap = true }, -diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 8c867e6..cc9b6ec 100644 ---- a/tests/prom-env-test.c -+++ b/tests/prom-env-test.c -@@ -82,7 +82,9 @@ int main(int argc, char *argv[]) - if (!strcmp(arch, "ppc")) { - add_tests(ppc_machines); - } else if (!strcmp(arch, "ppc64")) { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - add_tests(ppc_machines); -+#endif - if (g_test_slow()) { - qtest_add_data_func("prom-env/pseries", "pseries", test_machine); - } -diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index ee9c820..c5cc0ee 100755 ---- a/tests/qemu-iotests/051 -+++ b/tests/qemu-iotests/051 -@@ -183,11 +183,11 @@ run_qemu -drive if=virtio - case "$QEMU_DEFAULT_MACHINE" in - pc) - run_qemu -drive if=none,id=disk -device ide-cd,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive if=none,id=disk -device ide-drive,drive=disk - run_qemu -drive if=none,id=disk -device ide-hd,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk - ;; - *) - ;; -@@ -212,11 +212,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on - case "$QEMU_DEFAULT_MACHINE" in - pc) - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-drive,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk - ;; - *) - ;; -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index b973dc8..f1059f6 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -77,7 +77,7 @@ - 068 rw auto quick - 069 rw auto quick - 070 rw auto quick --071 rw auto quick -+# 071 rw auto quick -- requires whitelisted blkverify - 072 rw auto quick - 073 rw auto quick - 074 rw auto quick -@@ -105,7 +105,7 @@ - 096 rw auto quick - 097 rw auto backing - 098 rw auto backing quick --099 rw auto quick -+# 099 rw auto quick -- requires whitelisted blkverify - # 100 was removed, do not reuse - 101 rw auto quick - 102 rw auto quick -diff --git a/tests/qom-test.c b/tests/qom-test.c -index e6f712c..ebd15fd 100644 ---- a/tests/qom-test.c -+++ b/tests/qom-test.c -@@ -16,7 +16,7 @@ - #include "libqtest.h" - - static const char *blacklist_x86[] = { -- "xenfv", "xenpv", NULL -+ "xenfv", "xenpv", "isapc", NULL - }; - - static const struct { -diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index 84ce9c7..c1ee197 100644 ---- a/tests/test-x86-cpuid-compat.c -+++ b/tests/test-x86-cpuid-compat.c -@@ -306,6 +306,7 @@ int main(int argc, char **argv) - "-cpu 486,xlevel2=0xC0000002,+xstore", - "xlevel2", 0xC0000002); - -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - /* Check compatibility of old machine-types that didn't - * auto-increase level/xlevel/xlevel2: */ - -@@ -356,6 +357,7 @@ int main(int argc, char **argv) - add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", - "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", - "xlevel", 0x80000008); -+#endif - - /* Test feature parsing */ - add_feature_test("x86/cpuid/features/plus", -diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 5b1b681..85fa150 100644 ---- a/tests/usb-hcd-xhci-test.c -+++ b/tests/usb-hcd-xhci-test.c -@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) - usb_test_hotplug("xhci", 1, NULL); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void test_usb_uas_hotplug(void) - { - qtest_qmp_device_add("usb-uas", "uas", NULL); -@@ -34,6 +35,7 @@ static void test_usb_uas_hotplug(void) - qtest_qmp_device_del("scsihd"); - qtest_qmp_device_del("uas"); - } -+#endif - - static void test_usb_ccid_hotplug(void) - { -@@ -52,7 +54,9 @@ int main(int argc, char **argv) - - qtest_add_func("/xhci/pci/init", test_xhci_init); - qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); -+#endif - qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); - - qtest_start("-device nec-usb-xhci,id=xhci" --- -1.8.3.1 - diff --git a/0001-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch similarity index 62% rename from 0001-Initial-redhat-build.patch rename to 0004-Initial-redhat-build.patch index a901328..06670c5 100644 --- a/0001-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,10 +1,10 @@ -From f03d3b79bc1908b0b6e257ee7aaa6567ecb91e38 Mon Sep 17 00:00:00 2001 +From a1f1313c0c96b2a159647aabc6a4b0f3a3f4424a Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Mon, 11 Sep 2017 07:11:00 +0200 +Date: Thu, 8 Nov 2018 11:17:08 +0100 Subject: Initial redhat build -This patch introduces redhat build structure in redhat subdirectory. -In addition, several issues are fixed in QEMU tree: +This patch introduces redhat build structure in redhat subdirectory. In addition, +several issues are fixed in QEMU tree: - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm - As we use qemu-kvm as name in all places, this is updated to be consistent @@ -12,134 +12,24 @@ In addition, several issues are fixed in QEMU tree: - man page is installed using make install so we have to fix it in qemu tree - Use "/share/qemu-kvm" as SHARE_SUFFIX - We reconfigured our share to qemu-kvm to be consistent with used name -- Added .gitpublish configuration file - - Support for git publish has to be stored in repository root -Rebase changes (3.0.0): -- python detection changed -- added --disable-debug-mutex +This commit is synchronized with qemu-kvm-2.12.0-42.el8 build. -Merged patches (3.0.0): -- 9997a46 Fix annocheck issues -- 35230f9 redhat: remove extra % in rhel_rhev_conflicts macro( -- c747d3f redhat: syncronizing specfile -- e6abfc4 rpm: Add nvme VFIO driver to rw whitelist -- 7043465 rpm: Whitelist copy-on-read block driver -- f9a897c rpm: add throttle driver to rw whitelist -- b9ea80f redhat: replacing %pkname by %name -- eeeea85 redhat: Remove unused ApplyPatch macro -- b42c578 redhat:removing disable code for libcacard -- cee6bd5 redhat: improve packaging layout with modularization of the block layer -- 0cb4c60 redhat: Introducing qemu-kvm-core package -- 1ff4106 Add qemu-keymap to qemu-kvm-common -- 47838a5 redhat: Make gitpublish profile the default one -- a82f87b redhat: s390x: add hpage=1 to kvm.conf -- 3d52169 Enabling vhost_user -- 57aa228 spec: Enable Native Ceph support on all architectures -- 5f9ea03 Thu Jun 21 2018 Danilo C. L. de Paula - 2.12.0-13.el8 -- ed4d62a spec: Fix ambiguous 'python' interpreter name -- 74b3e6c qemu-ga: blacklisting guest-exec and guest-exec-status RPCs -- 2fd2cf7 redhat: rewrap "build_configure.sh" cmdline for the "rh-env-prep" target -- f48dc7f redhat: remove the VTD, LIVE_BLOCK_OPS, and RHV options in local builds too -- ccdf46b redhat: fix the "rh-env-prep" target's dependency on the SRPM_NAME macro -- f258fbf redhat: remove dead code related to s390 (not s390x) -- d186100 redhat: sync compiler flags from the spec file to "rh-env-prep" -- 727aa86 redhat: sync guest agent enablement and tcmalloc usage from spec to local -- b5d47e2 redhat: fix up Python 3 dependency for building QEMU -- 70c64dd redhat: fix up Python dependency for SRPM generation -- 96aca9f redhat: disable glusterfs dependency/support temporarily -- e9aff9d block/vxhs: modularize VXHS via g_module -- ecf40bf Defining a shebang for python scripts -- 55e3177 redhat: changing the prefix and blurb scheme to support rhel8-like handling -- 571e4ac Removing "rh-srpm-rhel" make target -- 9db09ef redhat: enforce python3 usage -- 56cda0b spec: Re-add dependency to seavgabios and ipxe for ppc64 architectures -- c780848 Drop build_configure.sh and Makefile.local files -- cca9118 Fix subject line in .gitpublish -- 9745e27 redhat: Update build configuration -- 193830c redhat: Disable vhost crypto -- 9dc30cb redhat: Make rh-local actually work in a RHEL-8 environment -- 99011c9 redhat: enable opengl, add build and runtime deps -- 7290e3f redhat: Improve python check +Signed-off-by: Miroslav Rezanina --- - .gitpublish | 61 +- - Makefile | 3 +- - block/Makefile.objs | 2 +- - block/vxhs.c | 119 ++- - configure | 33 +- - os-posix.c | 2 +- - redhat/.gitignore | 5 + - redhat/85-kvm.preset | 5 + - redhat/95-kvm-memlock.conf | 10 + - redhat/99-qemu-guest-agent.rules | 2 + - redhat/Makefile | 82 ++ - redhat/Makefile.common | 47 ++ - redhat/bridge.conf | 1 + - redhat/ksm.service | 13 + - redhat/ksm.sysconfig | 4 + - redhat/ksmctl.c | 77 ++ - redhat/ksmtuned | 139 ++++ - redhat/ksmtuned.conf | 21 + - redhat/ksmtuned.service | 12 + - redhat/kvm-s390x.conf | 19 + - redhat/kvm-setup | 40 + - redhat/kvm-setup.service | 14 + - redhat/kvm-x86.conf | 12 + - redhat/kvm.conf | 3 + - redhat/kvm.modules | 18 + - redhat/qemu-ga.sysconfig | 19 + - redhat/qemu-guest-agent.service | 20 + - redhat/qemu-kvm.spec.template | 1531 ++++++++++++++++++++++++++++++++++++ - redhat/qemu-pr-helper.service | 15 + - redhat/qemu-pr-helper.socket | 9 + - redhat/rpmbuild/BUILD/.gitignore | 2 + - redhat/rpmbuild/RPMS/.gitignore | 2 + - redhat/rpmbuild/SOURCES/.gitignore | 2 + - redhat/rpmbuild/SPECS/.gitignore | 2 + - redhat/rpmbuild/SRPMS/.gitignore | 2 + - redhat/scripts/frh.py | 24 + - redhat/scripts/git-backport-diff | 327 ++++++++ - redhat/scripts/git-compile-check | 215 +++++ - redhat/scripts/process-patches.sh | 92 +++ - redhat/scripts/tarball_checksum.sh | 3 + - redhat/vhost.conf | 3 + - ui/vnc.c | 2 +- - 42 files changed, 2921 insertions(+), 93 deletions(-) - create mode 100644 redhat/.gitignore - create mode 100644 redhat/85-kvm.preset - create mode 100644 redhat/95-kvm-memlock.conf - create mode 100644 redhat/99-qemu-guest-agent.rules + Makefile | 3 +- + block/Makefile.objs | 2 +- + block/vxhs.c | 119 ++- + configure | 40 +- + os-posix.c | 2 +- + redhat/Makefile | 82 ++ + redhat/Makefile.common | 49 ++ + redhat/qemu-kvm.spec.template | 1721 +++++++++++++++++++++++++++++++++++++++++ + ui/vnc.c | 2 +- + 9 files changed, 1972 insertions(+), 48 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common - create mode 100644 redhat/bridge.conf - create mode 100644 redhat/ksm.service - create mode 100644 redhat/ksm.sysconfig - create mode 100644 redhat/ksmctl.c - create mode 100644 redhat/ksmtuned - create mode 100644 redhat/ksmtuned.conf - create mode 100644 redhat/ksmtuned.service - create mode 100644 redhat/kvm-s390x.conf - create mode 100644 redhat/kvm-setup - create mode 100644 redhat/kvm-setup.service - create mode 100644 redhat/kvm-x86.conf - create mode 100644 redhat/kvm.conf - create mode 100644 redhat/kvm.modules - create mode 100644 redhat/qemu-ga.sysconfig - create mode 100644 redhat/qemu-guest-agent.service create mode 100644 redhat/qemu-kvm.spec.template - create mode 100644 redhat/qemu-pr-helper.service - create mode 100644 redhat/qemu-pr-helper.socket - create mode 100644 redhat/rpmbuild/BUILD/.gitignore - create mode 100644 redhat/rpmbuild/RPMS/.gitignore - create mode 100644 redhat/rpmbuild/SOURCES/.gitignore - create mode 100644 redhat/rpmbuild/SPECS/.gitignore - create mode 100644 redhat/rpmbuild/SRPMS/.gitignore - create mode 100755 redhat/scripts/frh.py - create mode 100755 redhat/scripts/git-backport-diff - create mode 100755 redhat/scripts/git-compile-check - create mode 100755 redhat/scripts/process-patches.sh - create mode 100755 redhat/scripts/tarball_checksum.sh - create mode 100644 redhat/vhost.conf diff --git a/Makefile b/Makefile index 2da686b..eb4c57a 100644 @@ -367,10 +257,26 @@ index 0cb0a00..9164b3e 100644 trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); return -EIO; diff --git a/configure b/configure -index 2a7796e..0314d53 100755 +index 2a7796e..0a27137 100755 --- a/configure +++ b/configure -@@ -3460,7 +3460,7 @@ fi +@@ -2216,13 +2216,10 @@ fi + ########################################## + # libseccomp check + ++libseccomp_minver="2.2.0" + if test "$seccomp" != "no" ; then + case "$cpu" in +- i386|x86_64) +- libseccomp_minver="2.1.0" +- ;; +- mips) +- libseccomp_minver="2.2.0" ++ i386|x86_64|mips) + ;; + arm|aarch64) + libseccomp_minver="2.2.3" +@@ -3460,7 +3457,7 @@ fi glib_req_ver=2.40 glib_modules=gthread-2.0 @@ -379,7 +285,7 @@ index 2a7796e..0314d53 100755 glib_modules="$glib_modules gmodule-export-2.0" fi -@@ -5435,33 +5435,6 @@ if compile_prog "" "" ; then +@@ -5435,33 +5432,6 @@ if compile_prog "" "" ; then fi ########################################## @@ -413,7 +319,7 @@ index 2a7796e..0314d53 100755 # check for _Static_assert() have_static_assert=no -@@ -6759,8 +6732,8 @@ if test "$pthread_setname_np" = "yes" ; then +@@ -6759,8 +6729,8 @@ if test "$pthread_setname_np" = "yes" ; then fi if test "$vxhs" = "yes" ; then diff --git a/0002-Enable-disable-devices-for-RHEL-7.patch b/0005-Enable-disable-devices-for-RHEL-7.patch similarity index 98% rename from 0002-Enable-disable-devices-for-RHEL-7.patch rename to 0005-Enable-disable-devices-for-RHEL-7.patch index de0276f..ec26beb 100644 --- a/0002-Enable-disable-devices-for-RHEL-7.patch +++ b/0005-Enable-disable-devices-for-RHEL-7.patch @@ -1,10 +1,10 @@ -From 7472ed73f89c81f1ca4c86129eed0f5874d82c41 Mon Sep 17 00:00:00 2001 +From b4a5b95153ca86eba72ff4a368a24ac31b77bbe5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL 7 This commit adds all changes related to changes in supported devices -up to qemu-kvm-2.12.0-32.el8. +up to qemu-kvm-2.12.0-42.el8. Signed-off-by: Miroslav Rezanina @@ -28,15 +28,16 @@ Merged patches (3.0.0): - 747643c Disable new pvrdma device - 0d4f38c s390x: Re-enable CONFIG_TERMINAL3270 - 0f725e9 AArch64: Enable CONFIG_FW_CFG_DMA for aarch64 +- 67c5a8c Disable ivshmem --- - default-configs/aarch64-softmmu.mak | 37 +++++++++++++++++++++++++++++-------- - default-configs/pci.mak | 36 ++++++++++++++++++------------------ - default-configs/ppc64-softmmu.mak | 25 +++++++++++++++++++------ + default-configs/aarch64-softmmu.mak | 37 ++++++++++++++++++++++++++++-------- + default-configs/pci.mak | 38 ++++++++++++++++++------------------- + default-configs/ppc64-softmmu.mak | 25 ++++++++++++++++++------ default-configs/s390x-softmmu.mak | 5 +++-- default-configs/sound.mak | 8 ++++---- default-configs/usb.mak | 14 +++++++------- default-configs/virtio.mak | 5 ++--- - default-configs/x86_64-softmmu.mak | 28 ++++++++++++++-------------- + default-configs/x86_64-softmmu.mak | 28 +++++++++++++-------------- hw/acpi/ich9.c | 4 ++-- hw/arm/Makefile.objs | 2 +- hw/block/fdc.c | 1 + @@ -67,12 +68,12 @@ Merged patches (3.0.0): stubs/Makefile.objs | 1 + stubs/ide-isa.c | 13 +++++++++++++ target/arm/cpu.c | 4 +++- - target/i386/cpu.c | 35 +++++++++++++++++++++++++++-------- + target/i386/cpu.c | 35 ++++++++++++++++++++++++++-------- target/ppc/cpu-models.c | 17 ++++++++++++++++- target/s390x/cpu_models.c | 3 +++ target/s390x/kvm.c | 8 ++++++++ vl.c | 2 +- - 43 files changed, 240 insertions(+), 98 deletions(-) + 43 files changed, 241 insertions(+), 99 deletions(-) create mode 100644 stubs/ide-isa.c diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak @@ -121,7 +122,7 @@ index 6f790f0..3f27540 100644 +CONFIG_I2C=y +CONFIG_FW_CFG_DMA=y diff --git a/default-configs/pci.mak b/default-configs/pci.mak -index de53d20..5cbe3e4 100644 +index de53d20..70e40ad 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -4,22 +4,22 @@ CONFIG_ISA_BUS=y @@ -185,8 +186,9 @@ index de53d20..5cbe3e4 100644 CONFIG_EDU=y CONFIG_VGA=y CONFIG_VGA_PCI=y - CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) +-CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) -CONFIG_ROCKER=y ++#CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) +#CONFIG_ROCKER=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak index b94af6c..30ca76d 100644 diff --git a/0006-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch new file mode 100644 index 0000000..7693f8a --- /dev/null +++ b/0006-Machine-type-related-general-changes.patch @@ -0,0 +1,764 @@ +From e34179d713443601a16936e2e80b8fbd044429be Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 8 Nov 2018 11:59:55 +0100 +Subject: Machine type related general changes + +This patch is first part of original "Add RHEL machine types" patch we +split to allow easier review. It contains changes not related to any +architecture. + +Signed-off-by: Miroslav Rezanina +--- + hw/acpi/ich9.c | 16 +++ + hw/acpi/piix4.c | 6 +- + hw/char/serial.c | 16 +++ + hw/display/cirrus_vga.c | 2 +- + hw/display/vga-isa.c | 2 +- + hw/net/e1000.c | 18 ++- + hw/net/e1000e.c | 21 ++++ + hw/net/rtl8139.c | 4 +- + hw/smbios/smbios.c | 1 + + hw/timer/i8254_common.c | 2 +- + hw/timer/mc146818rtc.c | 6 + + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci.c | 20 ++++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/compat.h | 229 ++++++++++++++++++++++++++++++++++++++ + include/hw/usb.h | 4 + + migration/migration.c | 2 + + migration/migration.h | 5 + + qdev-monitor.c | 1 - + scripts/vmstate-static-checker.py | 1 - + 21 files changed, 354 insertions(+), 11 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index a4e87b8..23a7baa 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -441,6 +441,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) + s->pm.enable_tco = value; + } + ++static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ return s->pm.force_rev1_fadt; ++} ++ ++static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ s->pm.force_rev1_fadt = value; ++} ++ + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + { + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; +@@ -465,6 +477,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + ich9_pm_get_cpu_hotplug_legacy, + ich9_pm_set_cpu_hotplug_legacy, + NULL); ++ object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", ++ ich9_pm_get_force_rev1_fadt, ++ ich9_pm_set_force_rev1_fadt, ++ NULL); + object_property_add(obj, ACPI_PM_PROP_S3_DISABLED, "uint8", + ich9_pm_get_disable_s3, + ich9_pm_set_disable_s3, +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index 6404af5..0f1f9e2 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -310,7 +310,7 @@ static const VMStateDescription vmstate_cpuhp_state = { + static const VMStateDescription vmstate_acpi = { + .name = "piix4_pm", + .version_id = 3, +- .minimum_version_id = 3, ++ .minimum_version_id = 2, + .minimum_version_id_old = 1, + .load_state_old = acpi_load_old, + .post_load = vmstate_acpi_post_load, +@@ -670,8 +670,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, + use_acpi_pci_hotplug, true), +diff --git a/hw/char/serial.c b/hw/char/serial.c +index 251f40f..8e3520c 100644 +--- a/hw/char/serial.c ++++ b/hw/char/serial.c +@@ -30,6 +30,7 @@ + #include "qemu/timer.h" + #include "qemu/error-report.h" + #include "trace.h" ++#include "migration/migration.h" + + //#define DEBUG_SERIAL + +@@ -699,6 +700,9 @@ static int serial_post_load(void *opaque, int version_id) + static bool serial_thr_ipending_needed(void *opaque) + { + SerialState *s = opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } + + if (s->ier & UART_IER_THRI) { + bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); +@@ -780,6 +784,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { + static bool serial_fifo_timeout_timer_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return timer_pending(s->fifo_timeout_timer); + } + +@@ -797,6 +805,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { + static bool serial_timeout_ipending_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->timeout_ipending != 0; + } + +@@ -814,6 +826,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { + static bool serial_poll_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->poll_msl >= 0; + } + +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index 9fd5665..6910014 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -3061,7 +3061,7 @@ static void isa_cirrus_vga_realizefn(DeviceState *dev, Error **errp) + + static Property isa_cirrus_vga_properties[] = { + DEFINE_PROP_UINT32("vgamem_mb", struct ISACirrusVGAState, +- cirrus_vga.vga.vram_size_mb, 4), ++ cirrus_vga.vga.vram_size_mb, 16), + DEFINE_PROP_BOOL("blitter", struct ISACirrusVGAState, + cirrus_vga.enable_blitter, true), + DEFINE_PROP_END_OF_LIST(), +diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c +index fa44242..7835c83 100644 +--- a/hw/display/vga-isa.c ++++ b/hw/display/vga-isa.c +@@ -80,7 +80,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) + } + + static Property vga_isa_properties[] = { +- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), ++ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index 742cd0a..7d568da 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1663,6 +1663,16 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) + + pci_conf = pci_dev->config; + ++ if (!(d->compat_flags & E1000_FLAG_AUTONEG)) { ++ /* ++ * We have no capabilities, so capability list bit should normally be 0. ++ * Keep it on for compat machine types to avoid breaking migration. ++ * HACK: abuse E1000_FLAG_AUTONEG, which is off exactly for ++ * the machine types that need this. ++ */ ++ pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_CAP_LIST); ++ } ++ + /* TODO: RST# value should be 0, PCI spec 6.2.4 */ + pci_conf[PCI_CACHE_LINE_SIZE] = 0x10; + +@@ -1763,7 +1773,7 @@ static const TypeInfo e1000_base_info = { + + static const E1000Info e1000_devices[] = { + { +- .name = "e1000", ++ .name = "e1000-82540em", + .device_id = E1000_DEV_ID_82540EM, + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, +@@ -1784,6 +1794,11 @@ static const E1000Info e1000_devices[] = { + #endif + }; + ++static const TypeInfo e1000_default_info = { ++ .name = "e1000", ++ .parent = "e1000-82540em", ++}; ++ + static void e1000_register_types(void) + { + int i; +@@ -1801,6 +1816,7 @@ static void e1000_register_types(void) + + type_register(&type_info); + } ++ type_register_static(&e1000_default_info); + } + + type_init(e1000_register_types) +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index 510ddb3..f1de9e5 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -75,6 +75,11 @@ typedef struct E1000EState { + + E1000ECore core; + ++ /* 7.3 had the intr_state field that was in the original e1000e code ++ * but that was removed prior to 2.7's release ++ */ ++ bool redhat_7_3_intr_state_enable; ++ uint32_t redhat_7_3_intr_state; + } E1000EState; + + #define E1000E_MMIO_IDX 0 +@@ -90,6 +95,10 @@ typedef struct E1000EState { + #define E1000E_MSIX_TABLE (0x0000) + #define E1000E_MSIX_PBA (0x2000) + ++/* Values as in RHEL 7.3 build and original upstream */ ++#define RH_E1000E_USE_MSI BIT(0) ++#define RH_E1000E_USE_MSIX BIT(1) ++ + static uint64_t + e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) + { +@@ -301,6 +310,8 @@ e1000e_init_msix(E1000EState *s) + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; + } + } + } +@@ -472,6 +483,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) + ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); + if (ret) { + trace_e1000e_msi_init_fail(ret); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; + } + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, +@@ -595,6 +608,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + ++static bool rhel_7_3_check(void *opaque, int version_id) ++{ ++ return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; ++} ++ + static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, +@@ -606,6 +624,7 @@ static const VMStateDescription e1000e_vmstate = { + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), ++ VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, +@@ -654,6 +673,8 @@ static PropertyInfo e1000e_prop_disable_vnet, + + static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), ++ DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, ++ redhat_7_3_intr_state_enable, false), + DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 46daa16..05453e7 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -3174,7 +3174,7 @@ static int rtl8139_pre_save(void *opaque) + + static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", +- .version_id = 5, ++ .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, +@@ -3255,7 +3255,9 @@ static const VMStateDescription vmstate_rtl8139 = { + VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), ++#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ + VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), ++#endif + VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), + VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index a27e54b..144e6e9 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -775,6 +775,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type2.product, product); + SMBIOS_SET_DEFAULT(type2.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 6190b6f..ad2ad2d 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -268,7 +268,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, +diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c +index 6f1f723..68c353f 100644 +--- a/hw/timer/mc146818rtc.c ++++ b/hw/timer/mc146818rtc.c +@@ -34,6 +34,7 @@ + #include "qapi/qapi-commands-misc.h" + #include "qapi/qapi-events-misc.h" + #include "qapi/visitor.h" ++#include "migration/migration.h" + + #ifdef TARGET_I386 + #include "hw/i386/apic.h" +@@ -839,6 +840,11 @@ static int rtc_post_load(void *opaque, int version_id) + static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) + { + RTCState *s = (RTCState *)opaque; ++ ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->irq_reinject_on_ack_count != 0; + } + +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 836b11f..9d7b9df 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1214,12 +1214,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + UHCIState *s = UHCI(dev); + uint8_t *pci_conf = s->dev.config; + int i; ++ int irq_pin; + + pci_conf[PCI_CLASS_PROG] = 0x00; + /* TODO: reset value should be 0. */ + pci_conf[USB_SBRN] = USB_RELEASE_1; // release number + +- pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); ++ irq_pin = u->info.irq_pin; ++ pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); + + if (s->masterbus) { + USBPort *ports[NB_PORTS]; +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index 8f1a01a..ca19474 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3560,9 +3560,27 @@ static const VMStateDescription vmstate_xhci_slot = { + } + }; + ++static int xhci_event_pre_save(void *opaque) ++{ ++ XHCIEvent *s = opaque; ++ ++ s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; ++ s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; ++ ++ return 0; ++} ++ ++bool migrate_cve_2014_5263_xhci_fields; ++ ++static bool xhci_event_cve_2014_5263(void *opaque, int version_id) ++{ ++ return migrate_cve_2014_5263_xhci_fields; ++} ++ + static const VMStateDescription vmstate_xhci_event = { + .name = "xhci-event", + .version_id = 1, ++ .pre_save = xhci_event_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(type, XHCIEvent), + VMSTATE_UINT32(ccode, XHCIEvent), +@@ -3571,6 +3589,8 @@ static const VMStateDescription vmstate_xhci_event = { + VMSTATE_UINT32(flags, XHCIEvent), + VMSTATE_UINT8(slotid, XHCIEvent), + VMSTATE_UINT8(epid, XHCIEvent), ++ VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), ++ VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), + VMSTATE_END_OF_LIST() + } + }; +diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h +index fc36a4c..89d4cf7 100644 +--- a/hw/usb/hcd-xhci.h ++++ b/hw/usb/hcd-xhci.h +@@ -153,6 +153,8 @@ typedef struct XHCIEvent { + uint32_t flags; + uint8_t slotid; + uint8_t epid; ++ uint8_t cve_2014_5263_a; ++ uint8_t cve_2014_5263_b; + } XHCIEvent; + + typedef struct XHCIInterrupter { +diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h +index 59aeb06..7b5cc25 100644 +--- a/include/hw/acpi/ich9.h ++++ b/include/hw/acpi/ich9.h +@@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { + uint8_t smm_enabled; + bool enable_tco; + TCOIORegs tco_regs; ++ ++ /* RH addition, see bz 1489800 */ ++ bool force_rev1_fadt; + } ICH9LPCPMRegs; + + #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" +diff --git a/include/hw/compat.h b/include/hw/compat.h +index c08f404..22262c7 100644 +--- a/include/hw/compat.h ++++ b/include/hw/compat.h +@@ -282,4 +282,233 @@ + .value = "on",\ + }, + ++/* Mostly like HW_COMPAT_2_1 but: ++ * we don't need virtio-scsi-pci since 7.0 already had that on ++ * ++ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * machine type, but was accidentally turned off in 7.2 onwards. ++ * ++ */ ++#define HW_COMPAT_RHEL7_1 \ ++ { /* COMPAT_RHEL7.1 */ \ ++ .driver = "intel-hda-generic",\ ++ .property = "old_msi_addr",\ ++ .value = "on",\ ++ },{\ ++ .driver = "VGA",\ ++ .property = "qemu-extended-regs",\ ++ .value = "off",\ ++ },{\ ++ .driver = "secondary-vga",\ ++ .property = "qemu-extended-regs",\ ++ .value = "off",\ ++ },{\ ++ .driver = "usb-mouse",\ ++ .property = "usb_version",\ ++ .value = stringify(1),\ ++ },{\ ++ .driver = "usb-kbd",\ ++ .property = "usb_version",\ ++ .value = stringify(1),\ ++ },{\ ++ .driver = "virtio-pci",\ ++ .property = "virtio-pci-bus-master-bug-migration",\ ++ .value = "on",\ ++ },{\ ++ .driver = "virtio-blk-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-balloon-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-serial-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-9p-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-rng-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ \ ++ .driver = "migration",\ ++ .property = "send-configuration",\ ++ .value = "off",\ ++ }, ++ ++/* Mostly like HW_COMPAT_2_4 + 2_3 but: ++ * we don't need "any_layout" as it has been backported to 7.2 ++ */ ++ ++#define HW_COMPAT_RHEL7_2 \ ++ {\ ++ .driver = "virtio-blk-device",\ ++ .property = "scsi",\ ++ .value = "true",\ ++ },{\ ++ .driver = "e1000-82540em",\ ++ .property = "extra_mac_registers",\ ++ .value = "off",\ ++ },{\ ++ .driver = "virtio-pci",\ ++ .property = "x-disable-pcie",\ ++ .value = "on",\ ++ },{\ ++ .driver = "virtio-pci",\ ++ .property = "migrate-extra",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "fw_cfg_mem",\ ++ .property = "dma_enabled",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "fw_cfg_io",\ ++ .property = "dma_enabled",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "isa-fdc",\ ++ .property = "fallback",\ ++ .value = "144",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "virtio-pci",\ ++ .property = "disable-modern",\ ++ .value = "on",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "virtio-pci",\ ++ .property = "disable-legacy",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = TYPE_PCI_DEVICE,\ ++ .property = "x-pcie-lnksta-dllla",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 */ \ ++ .driver = "virtio-pci",\ ++ .property = "page-per-vq",\ ++ .value = "on",\ ++ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ \ ++ .driver = "migration",\ ++ .property = "send-section-footer",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ \ ++ .driver = "migration",\ ++ .property = "store-global-state",\ ++ .value = "off",\ ++ }, ++ ++/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except ++ * disable-modern, disable-legacy, page-per-vq have already been ++ * backported to RHEL7.3 ++ */ ++#define HW_COMPAT_RHEL7_3 \ ++ { /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-mmio",\ ++ .property = "format_transport_address",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-serial-device",\ ++ .property = "emergency-write",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "ioapic",\ ++ .property = "version",\ ++ .value = "0x11",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "intel-iommu",\ ++ .property = "x-buggy-eim",\ ++ .value = "true",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-pci",\ ++ .property = "x-ignore-backend-features",\ ++ .value = "on",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "fw_cfg_mem",\ ++ .property = "x-file-slots",\ ++ .value = stringify(0x10),\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "fw_cfg_io",\ ++ .property = "x-file-slots",\ ++ .value = stringify(0x10),\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "pflash_cfi01",\ ++ .property = "old-multiple-chip-handling",\ ++ .value = "on",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = TYPE_PCI_DEVICE,\ ++ .property = "x-pcie-extcap-init",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-pci",\ ++ .property = "x-pcie-deverr-init",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-pci",\ ++ .property = "x-pcie-lnkctl-init",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-pci",\ ++ .property = "x-pcie-pm-init",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "virtio-net-device",\ ++ .property = "x-mtu-bypass-backend",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_3 */ \ ++ .driver = "e1000e",\ ++ .property = "__redhat_e1000e_7_3_intr_state",\ ++ .value = "on",\ ++ }, ++ ++/* Mostly like HW_COMPAT_2_9 except ++ * x-mtu-bypass-backend, x-migrate-msix has already been ++ * backported to RHEL7.4. shpc was already on in 7.4. ++ */ ++#define HW_COMPAT_RHEL7_4 \ ++ { /* HW_COMPAT_RHEL7_4 */ \ ++ .driver = "intel-iommu",\ ++ .property = "pt",\ ++ .value = "off",\ ++ }, ++ ++/* The same as HW_COMPAT_2_11 + HW_COMPAT_2_10 */ ++#define HW_COMPAT_RHEL7_5 \ ++ { /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ ++ .driver = "hpet",\ ++ .property = "hpet-offset-saved",\ ++ .value = "false",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ ++ .driver = "virtio-blk-pci",\ ++ .property = "vectors",\ ++ .value = "2",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ ++ .driver = "vhost-user-blk-pci",\ ++ .property = "vectors",\ ++ .value = "2",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 but \ ++ bz 1608778 modified for our naming */ \ ++ .driver = "e1000-82540em",\ ++ .property = "migrate_tso_props",\ ++ .value = "off",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ \ ++ .driver = "virtio-mouse-device",\ ++ .property = "wheel-axis",\ ++ .value = "false",\ ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ \ ++ .driver = "virtio-tablet-device",\ ++ .property = "wheel-axis",\ ++ .value = "false",\ ++ },{ /* HW_COMPAT_RHEL7_5 */ \ ++ .driver = "cirrus-vga",\ ++ .property = "vgamem_mb",\ ++ .value = "16",\ ++ },{ /* HW_COMPAT_RHEL7_5 */ \ ++ .driver = "migration",\ ++ .property = "decompress-error-check",\ ++ .value = "off",\ ++ }, ++ ++ + #endif /* HW_COMPAT_H */ +diff --git a/include/hw/usb.h b/include/hw/usb.h +index a5080ad..b943ec9 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -606,4 +606,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, + uint8_t interface_class, uint8_t interface_subclass, + uint8_t interface_protocol); + ++ ++/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ ++extern bool migrate_cve_2014_5263_xhci_fields; ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index b7d9854..381039c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -106,6 +106,8 @@ enum mig_rp_message_type { + MIG_RP_MSG_MAX + }; + ++bool migrate_pre_2_2; ++ + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +diff --git a/migration/migration.h b/migration/migration.h +index 64a7b33..405d984 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -288,6 +288,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); + + void dirty_bitmap_mig_before_vm_start(void); + void init_dirty_bitmap_incoming_migration(void); ++/* ++ * Disables a load of subsections that were added in 2.2/rh7.2 for backwards ++ * migration compatibility. ++ */ ++extern bool migrate_pre_2_2; + + #define qemu_ram_foreach_block \ + #warning "Use qemu_ram_foreach_block_migratable in migration code" +diff --git a/qdev-monitor.c b/qdev-monitor.c +index 61e0300..f439b83 100644 +--- a/qdev-monitor.c ++++ b/qdev-monitor.c +@@ -47,7 +47,6 @@ typedef struct QDevAlias + + /* Please keep this table sorted by typename. */ + static const QDevAlias qdev_alias_table[] = { +- { "e1000", "e1000-82540em" }, + { "ich9-ahci", "ahci" }, + { "lsi53c895a", "lsi" }, + { "virtio-9p-ccw", "virtio-9p", QEMU_ARCH_S390X }, +diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py +index d346728..4bca2bf 100755 +--- a/scripts/vmstate-static-checker.py ++++ b/scripts/vmstate-static-checker.py +@@ -105,7 +105,6 @@ def get_changed_sec_name(sec): + # Section names can change -- see commit 292b1634 for an example. + changes = { + "ICH9 LPC": "ICH9-LPC", +- "e1000-82540em": "e1000", + } + + for item in changes: +-- +1.8.3.1 + diff --git a/0007-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch new file mode 100644 index 0000000..c546843 --- /dev/null +++ b/0007-Add-aarch64-machine-types.patch @@ -0,0 +1,246 @@ +From 2c0d79871ccb5383b1a91e5fc9139b6f8e8ed8e0 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 8 Nov 2018 12:00:54 +0100 +Subject: Add aarch64 machine types + +Adding changes to add RHEL machine types for aarch64 architecture. + +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 22 +++++++++ + 2 files changed, 147 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 281ddcd..b02e4a0 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -60,6 +60,7 @@ + #include "standard-headers/linux/input.h" + #include "hw/arm/smmuv3.h" + ++#if 0 /* disabled Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -87,7 +88,36 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, true) + #define DEFINE_VIRT_MACHINE(major, minor) \ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +- ++#endif /* disabled for RHEL */ ++ ++#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ ++ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ ++ void *data) \ ++ { \ ++ MachineClass *mc = MACHINE_CLASS(oc); \ ++ rhel##m##n##s##_virt_options(mc); \ ++ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ ++ if (latest) { \ ++ mc->alias = "virt"; \ ++ mc->is_default = 1; \ ++ } \ ++ } \ ++ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ ++ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ ++ .parent = TYPE_RHEL_MACHINE, \ ++ .instance_init = rhel##m##n##s##_virt_instance_init, \ ++ .class_init = rhel##m##n##s##_virt_class_init, \ ++ }; \ ++ static void rhel##m##n##s##_machvirt_init(void) \ ++ { \ ++ type_register_static(&rhel##m##n##s##_machvirt_info); \ ++ } \ ++ type_init(rhel##m##n##s##_machvirt_init); ++ ++#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) ++#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) + + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 +@@ -1539,6 +1569,7 @@ static void machvirt_init(MachineState *machine) + qemu_add_machine_init_done_notifier(&vms->machine_done); + } + ++#if 0 /* disabled for RHEL */ + static bool virt_get_secure(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -1567,6 +1598,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + vms->virt = value; + } + ++#endif /* disabled for RHEL */ + static bool virt_get_highmem(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -1621,6 +1653,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) + } + } + ++#if 0 + static char *virt_get_iommu(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -1648,6 +1681,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) + error_append_hint(errp, "Valid values are none, smmuv3.\n"); + } + } ++#endif + + static CpuInstanceProperties + virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) +@@ -1687,6 +1721,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + return ms->possible_cpus; + } + ++#if 0 /* disabled for RHEL */ + static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -1835,6 +1870,9 @@ static void virt_machine_3_0_options(MachineClass *mc) + } + DEFINE_VIRT_MACHINE_AS_LATEST(3, 0) + ++#define VIRT_COMPAT_2_12 \ ++ HW_COMPAT_2_12 ++ + static void virt_2_12_instance_init(Object *obj) + { + virt_3_0_instance_init(obj); +@@ -1960,3 +1998,89 @@ static void virt_machine_2_6_options(MachineClass *mc) + vmc->no_pmu = true; + } + DEFINE_VIRT_MACHINE(2, 6) ++#endif /* disabled for RHEL */ ++ ++static void rhel_machine_class_init(ObjectClass *oc, void *data) ++{ ++ MachineClass *mc = MACHINE_CLASS(oc); ++ ++ mc->family = "virt-rhel-Z"; ++ mc->init = machvirt_init; ++ /* Start max_cpus at the maximum QEMU supports. We'll further restrict ++ * it later in machvirt_init, where we have more information about the ++ * configuration of the particular instance. ++ */ ++ mc->max_cpus = 255; ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->pci_allow_0_address = true; ++ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ ++ mc->minimum_page_bits = 12; ++ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; ++ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; ++ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); ++ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++} ++ ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel760_virt_instance_init(Object *obj) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ ++ /* EL3 is disabled by default and non-configurable for RHEL */ ++ vms->secure = false; ++ /* EL2 is disabled by default and non-configurable for RHEL */ ++ vms->virt = false; ++ /* High memory is enabled by default for RHEL */ ++ vms->highmem = true; ++ object_property_add_bool(obj, "highmem", virt_get_highmem, ++ virt_set_highmem, NULL); ++ object_property_set_description(obj, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits", ++ NULL); ++ /* Default GIC type is still v2, but became configurable for RHEL */ ++ vms->gic_version = 2; ++ object_property_add_str(obj, "gic-version", virt_get_gic_version, ++ virt_set_gic_version, NULL); ++ object_property_set_description(obj, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3 and host", NULL); ++ ++ if (vmc->no_its) { ++ vms->its = false; ++ } else { ++ /* Default allows ITS instantiation */ ++ vms->its = true; ++ object_property_add_bool(obj, "its", virt_get_its, ++ virt_set_its, NULL); ++ object_property_set_description(obj, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation", ++ NULL); ++ } ++ ++ vms->memmap=a15memmap; ++ vms->irqmap=a15irqmap; ++} ++ ++static void rhel760_virt_options(MachineClass *mc) ++{ ++ SET_MACHINE_COMPAT(mc, ARM_RHEL_COMPAT); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(7, 6, 0) +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 9a870cc..2293315 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -128,6 +128,7 @@ typedef struct { + + #define VIRT_ECAM_ID(high) (high ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM) + ++#if 0 /* disabled for Red Hat Enterprise Linux */ + #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + #define VIRT_MACHINE(obj) \ + OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) +@@ -136,6 +137,27 @@ typedef struct { + #define VIRT_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) + ++#else ++#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") ++#define VIRT_MACHINE(obj) \ ++ OBJECT_CHECK(VirtMachineState, (obj), TYPE_RHEL_MACHINE) ++#define VIRT_MACHINE_GET_CLASS(obj) \ ++ OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_RHEL_MACHINE) ++#define VIRT_MACHINE_CLASS(klass) \ ++ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_RHEL_MACHINE) ++#endif ++ ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++#define ARM_RHEL_COMPAT \ ++ {\ ++ .driver = "virtio-net-pci",\ ++ .property = "romfile",\ ++ .value = "",\ ++ }, ++ + void virt_acpi_setup(VirtMachineState *vms); + + /* Return the number of used redistributor regions */ +-- +1.8.3.1 + diff --git a/0008-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch new file mode 100644 index 0000000..dbe223e --- /dev/null +++ b/0008-Add-ppc64-machine-types.patch @@ -0,0 +1,397 @@ +From b6c41d9cfe7ae58455737c967f2e47d6bc99d21e Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 8 Nov 2018 12:01:38 +0100 +Subject: Add ppc64 machine types + +Adding changes to add RHEL machine types for ppc64 architecture. + +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/spapr.c | 252 ++++++++++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 +++ + include/hw/ppc/spapr.h | 1 + + target/ppc/compat.c | 13 ++- + target/ppc/cpu.h | 1 + + 5 files changed, 279 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 2f8c304..b8bdb69 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4009,6 +4009,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; + smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ + spapr_caps_add_properties(smc, &error_abort); ++ smc->has_power9_support = true; + } + + static const TypeInfo spapr_machine_info = { +@@ -4059,6 +4060,7 @@ static const TypeInfo spapr_machine_info = { + } \ + type_init(spapr_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * pseries-3.0 + */ +@@ -4248,6 +4250,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + .property = "pre-2.8-migration", \ + .value = "on", \ + }, ++#endif + + static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, +@@ -4298,6 +4301,7 @@ static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, + */ + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_2_7_instance_options(MachineState *machine) + { + sPAPRMachineState *spapr = SPAPR_MACHINE(machine); +@@ -4457,6 +4461,254 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_1); + } + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); ++#endif ++ ++/* ++ * pseries-rhel7.6.0 ++ */ ++ ++static void spapr_machine_rhel760_instance_options(MachineState *machine) ++{ ++} ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", true); ++ ++/* ++ * pseries-rhel7.6.0-sxxm ++ * ++ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default ++ */ ++static void spapr_machine_rhel760sxxm_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel760_instance_options(machine); ++} ++ ++static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel760_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.5.0 ++ * like SPAPR_COMPAT_2_11 and SPAPR_COMPAT_2_10 ++ * SPAPR_CAP_HTM already enabled in 7.4 ++ * ++ */ ++#define SPAPR_COMPAT_RHEL7_5 \ ++ HW_COMPAT_RHEL7_5 \ ++ ++static void spapr_machine_rhel750_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel760_instance_options(machine); ++} ++ ++static void spapr_machine_rhel750_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel760_class_options(mc); ++ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_5); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); ++ ++/* ++ * pseries-rhel7.5.0-sxxm ++ * ++ * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default ++ */ ++static void spapr_machine_rhel750sxxm_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel750_instance_options(machine); ++} ++ ++static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.4.0 ++ * like SPAPR_COMPAT_2_9 ++ */ ++ ++#define SPAPR_COMPAT_RHEL7_4 \ ++ HW_COMPAT_RHEL7_4 \ ++ { \ ++ .driver = TYPE_POWERPC_CPU, \ ++ .property = "pre-2.10-migration", \ ++ .value = "on", \ ++ }, \ ++ ++static void spapr_machine_rhel740_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel750_instance_options(machine); ++} ++ ++static void spapr_machine_rhel740_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_4); ++ mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ smc->has_power9_support = false; ++ smc->pre_2_10_has_unused_icps = true; ++ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; ++ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); ++ ++/* ++ * pseries-rhel7.4.0-sxxm ++ * ++ * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default ++ */ ++static void spapr_machine_rhel740sxxm_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel740_instance_options(machine); ++} ++ ++static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.3.0 ++ * like SPAPR_COMPAT_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 ++ */ ++#define SPAPR_COMPAT_RHEL7_3 \ ++ HW_COMPAT_RHEL7_3 \ ++ { \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ ++ .property = "mem_win_size", \ ++ .value = stringify(SPAPR_PCI_2_7_MMIO_WIN_SIZE),\ ++ }, \ ++ { \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ ++ .property = "mem64_win_size", \ ++ .value = "0", \ ++ }, \ ++ { \ ++ .driver = TYPE_POWERPC_CPU, \ ++ .property = "pre-2.8-migration", \ ++ .value = "on", \ ++ }, \ ++ { \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ ++ .property = "pre-2.8-migration", \ ++ .value = "on", \ ++ }, \ ++ { \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ ++ .property = "pcie-extended-configuration-space",\ ++ .value = "off", \ ++ }, ++ ++static void spapr_machine_rhel730_instance_options(MachineState *machine) ++{ ++ sPAPRMachineState *spapr = SPAPR_MACHINE(machine); ++ ++ spapr_machine_rhel740_instance_options(machine); ++ spapr->use_hotplug_event_source = false; ++} ++ ++static void spapr_machine_rhel730_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); ++ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_3); ++ smc->phb_placement = phb_placement_2_7; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); ++ ++/* ++ * pseries-rhel7.3.0-sxxm ++ * ++ * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default ++ */ ++static void spapr_machine_rhel730sxxm_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel730_instance_options(machine); ++} ++ ++static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.2.0 ++ */ ++/* Should be like SPAPR_COMPAT_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" ++ * has been backported to RHEL7_2 so we don't need it here. ++ */ ++ ++#define SPAPR_COMPAT_RHEL7_2 \ ++ HW_COMPAT_RHEL7_2 \ ++ { \ ++ .driver = "spapr-vlan", \ ++ .property = "use-rx-buffer-pools", \ ++ .value = "off", \ ++ },{ \ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\ ++ .property = "ddw",\ ++ .value = stringify(off),\ ++ }, ++ ++ ++static void spapr_machine_rhel720_instance_options(MachineState *machine) ++{ ++ spapr_machine_rhel730_instance_options(machine); ++} ++ ++static void spapr_machine_rhel720_class_options(MachineClass *mc) ++{ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->use_ohci_by_default = true; ++ mc->has_hotpluggable_cpus = NULL; ++ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_2); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); + + static void spapr_machine_register_types(void) + { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index fb29eec..a081b01 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -21,6 +21,7 @@ + #include "sysemu/numa.h" + #include "sysemu/hw_accel.h" + #include "qemu/error-report.h" ++#include "cpu-models.h" + + static void spapr_cpu_reset(void *opaque) + { +@@ -212,6 +213,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, + { + CPUPPCState *env = &cpu->env; + Error *local_err = NULL; ++ sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); + if (local_err) { +@@ -224,6 +226,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, + cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); + kvmppc_set_papr(cpu); + ++ if (!smc->has_power9_support && ++ (((spapr->max_compat_pvr && ++ ppc_compat_cmp(spapr->max_compat_pvr, ++ CPU_POWERPC_LOGICAL_3_00) >= 0)) || ++ (!spapr->max_compat_pvr && ++ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { ++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, ++ "POWER9 CPU is not supported by this machine class"); ++ return; ++ } ++ + qemu_register_reset(spapr_cpu_reset, cpu); + spapr_cpu_reset(cpu); + +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index 7e5de1a..330c370 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -101,6 +101,7 @@ struct sPAPRMachineClass { + bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ + bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ + bool pre_2_10_has_unused_icps; ++ bool has_power9_support; + void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, +diff --git a/target/ppc/compat.c b/target/ppc/compat.c +index 7de4bf3..3e2e353 100644 +--- a/target/ppc/compat.c ++++ b/target/ppc/compat.c +@@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) + return NULL; + } + ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) ++{ ++ const CompatInfo *compat1 = compat_by_pvr(pvr1); ++ const CompatInfo *compat2 = compat_by_pvr(pvr2); ++ ++ g_assert(compat1); ++ g_assert(compat2); ++ ++ return compat1 - compat2; ++} ++ + static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, +- uint32_t min_compat_pvr, uint32_t max_compat_pvr) ++ uint32_t min_compat_pvr, uint32_t max_compat_pvr) + { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 4edcf62..532f0d5 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1365,6 +1365,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) + + /* Compatibility modes */ + #if defined(TARGET_PPC64) ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); + bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, + uint32_t min_compat_pvr, uint32_t max_compat_pvr); + bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +-- +1.8.3.1 + diff --git a/0009-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch new file mode 100644 index 0000000..92b5cbd --- /dev/null +++ b/0009-Add-s390x-machine-types.patch @@ -0,0 +1,86 @@ +From 05b950dccdf9e8f58f3358730aa4705642d0196f Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 8 Nov 2018 12:02:37 +0100 +Subject: Add s390x machine types + +Adding changes to add RHEL machine types for s390x architecture. + +Signed-off-by: Miroslav Rezanina +--- + hw/s390x/s390-virtio-ccw.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 45 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 7983185..0f135c9 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -649,7 +649,7 @@ bool css_migration_enabled(void) + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ +- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ ++ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = 1; \ +@@ -676,6 +676,8 @@ bool css_migration_enabled(void) + #define CCW_COMPAT_2_12 \ + HW_COMPAT_2_12 + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ ++ + #define CCW_COMPAT_2_11 \ + HW_COMPAT_2_11 \ + {\ +@@ -898,6 +900,48 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + } + DEFINE_CCW_MACHINE(2_4, "2.4", false); + ++#else ++ ++/* ++ * like CCW_COMPAT_2_11, but includes HW_COMPAT_RHEL7_5 (derived from ++ * HW_COMPAT_2_11 and HW_COMPAT_2_10) instead of HW_COMPAT_2_11 ++ */ ++#define CCW_COMPAT_RHEL7_5 \ ++ HW_COMPAT_RHEL7_5 \ ++ {\ ++ .driver = TYPE_SCLP_EVENT_FACILITY,\ ++ .property = "allow_all_mask_sizes",\ ++ .value = "off",\ ++ }, ++ ++static void ccw_machine_rhel760_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel760_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", true); ++ ++static void ccw_machine_rhel750_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; ++ ccw_machine_rhel760_instance_options(machine); ++ ++ /* before 2.12 we emulated the very first z900, and RHEL 7.5 is ++ based on 2.10 */ ++ s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); ++} ++ ++static void ccw_machine_rhel750_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel760_class_options(mc); ++ SET_MACHINE_COMPAT(mc, CCW_COMPAT_RHEL7_5); ++} ++DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); ++ ++#endif ++ + static void ccw_machine_register_types(void) + { + type_register_static(&ccw_machine_info); +-- +1.8.3.1 + diff --git a/0010-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch new file mode 100644 index 0000000..30ec2d5 --- /dev/null +++ b/0010-Add-x86_64-machine-types.patch @@ -0,0 +1,1069 @@ +From b95483e9a18050c7dac0e6c17b049f0733a409cd Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 8 Nov 2018 12:03:11 +0100 +Subject: Add x86_64 machine types + +Adding changes to add RHEL machine types for x86_64 architecture. + +Signed-off-by: Miroslav Rezanina +--- + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 7 +- + hw/i386/pc_piix.c | 194 ++++++++++++++++- + hw/i386/pc_q35.c | 93 ++++++++- + include/hw/i386/pc.h | 564 ++++++++++++++++++++++++++++++++++++++++++++++++++ + target/i386/cpu.c | 9 +- + target/i386/machine.c | 21 ++ + 7 files changed, 884 insertions(+), 7 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index e1ee8ae..be9bdb5 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -184,6 +184,9 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; ++ if (object_property_get_bool(lpc, ++ "__com.redhat_force-rev1-fadt", NULL)) ++ pm->fadt.rev = 1; + pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; + } + assert(obj); +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 11c287e..253d48d 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1419,7 +1419,8 @@ void pc_memory_init(PCMachineState *pcms, + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); +- if (pcmc->pci_enabled) { ++ /* RH difference: See bz 1489800, explicitly make ROM ro */ ++ if (pcmc->pc_rom_ro) { + memory_region_set_readonly(option_rom_mr, true); + } + memory_region_add_subregion_overlap(rom_memory, +@@ -2387,6 +2388,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->save_tsc_khz = true; + pcmc->linuxboot_dma_enabled = true; + assert(!mc->get_hotplug_handler); ++ pcmc->pc_rom_ro = true; + mc->get_hotplug_handler = pc_get_hotpug_handler; + mc->cpu_index_to_instance_props = pc_cpu_index_to_props; + mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; +@@ -2396,7 +2398,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->default_boot_order = "cad"; + mc->hot_add_cpu = pc_hot_add_cpu; + mc->block_default_type = IF_IDE; +- mc->max_cpus = 255; ++ /* 240: max CPU count for RHEL */ ++ mc->max_cpus = 240; + mc->reset = pc_machine_reset; + hc->pre_plug = pc_machine_device_pre_plug_cb; + hc->plug = pc_machine_device_plug_cb; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index dc09466..f0484ec 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -50,6 +50,7 @@ + #include "cpu.h" + #include "qapi/error.h" + #include "qemu/error-report.h" ++#include "migration/migration.h" + #ifdef CONFIG_XEN + #include + #include "hw/xen/xen_pt.h" +@@ -170,8 +171,8 @@ static void pc_init1(MachineState *machine, + if (pcmc->smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + SMBIOS_ENTRY_POINT_21); + } +@@ -309,6 +310,7 @@ static void pc_init1(MachineState *machine, + * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). + */ + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3(MachineState *machine) + { + PCMachineState *pcms = PC_MACHINE(machine); +@@ -433,6 +435,7 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) + pc_i440fx_machine_options(m); + m->alias = "pc"; + m->is_default = 1; ++ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); + } + + DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, +@@ -1148,3 +1151,190 @@ static void xenfv_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, + xenfv_machine_options); + #endif ++machine_init(pc_machine_init); ++ ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 machine type */ ++static void pc_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ m->family = "pc_piix_Y"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; ++ pcmc->default_nic_model = "e1000"; ++ m->default_display = "std"; ++ SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); ++ m->alias = "pc"; ++ m->is_default = 1; ++} ++ ++static void pc_init_rhel760(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel760_options(MachineClass *m) ++{ ++ pc_machine_rhel7_options(m); ++ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++} ++ ++DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, ++ pc_machine_rhel760_options); ++ ++static void pc_init_rhel750(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel750_options(MachineClass *m) ++{ ++ pc_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->is_default = 0; ++ m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; ++ m->auto_enable_numa_with_memhp = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_5_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, ++ pc_machine_rhel750_options); ++ ++static void pc_init_rhel740(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel750_options(m); ++ m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; ++ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ pcmc->pc_rom_ro = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_4_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, ++ pc_machine_rhel740_options); ++ ++static void pc_init_rhel730(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel730_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel740_options(m); ++ m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; ++ pcmc->linuxboot_dma_enabled = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_3_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, ++ pc_machine_rhel730_options); ++ ++ ++static void pc_init_rhel720(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel720_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel730_options(m); ++ m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; ++ /* From pc_i440fx_2_5_machine_options */ ++ pcmc->save_tsc_khz = false; ++ m->legacy_fw_cfg_order = 1; ++ /* Note: broken_reserved_end was already in 7.2 */ ++ /* From pc_i440fx_2_6_machine_options */ ++ pcmc->legacy_cpu_hotplug = true; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_2_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, ++ pc_machine_rhel720_options); ++ ++static void pc_compat_rhel710(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ /* From pc_compat_2_2 */ ++ pcmc->rsdp_in_ram = false; ++ machine->suppress_vmdesc = true; ++ ++ /* From pc_compat_2_1 */ ++ pcmc->smbios_uuid_encoded = false; ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->enforce_aligned_dimm = false; ++ ++ /* Disable all the extra subsections that were added in 2.2 */ ++ migrate_pre_2_2 = true; ++ ++ /* From pc_i440fx_2_4_machine_options */ ++ pcmc->broken_reserved_end = true; ++} ++ ++static void pc_init_rhel710(MachineState *machine) ++{ ++ pc_compat_rhel710(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel710_options(MachineClass *m) ++{ ++ pc_machine_rhel720_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; ++ m->default_display = "cirrus"; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_1_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, ++ pc_machine_rhel710_options); ++ ++static void pc_compat_rhel700(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ pc_compat_rhel710(machine); ++ ++ /* Upstream enables it for everyone, we're a little more selective */ ++ x86_cpu_change_kvm_default("x2apic", NULL); ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ ++ pcmc->smbios_legacy_mode = true; ++ pcmc->has_reserved_memory = false; ++ migrate_cve_2014_5263_xhci_fields = true; ++} ++ ++static void pc_init_rhel700(MachineState *machine) ++{ ++ pc_compat_rhel700(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel700_options(MachineClass *m) ++{ ++ pc_machine_rhel710_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_0_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, ++ pc_machine_rhel700_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 532241e..c1024c5 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -145,8 +145,8 @@ static void pc_q35_init(MachineState *machine) + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + SMBIOS_ENTRY_POINT_21); + } +@@ -294,6 +294,7 @@ static void pc_q35_init(MachineState *machine) + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +@@ -315,6 +316,7 @@ static void pc_q35_3_0_machine_options(MachineClass *m) + { + pc_q35_machine_options(m); + m->alias = "q35"; ++ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); + } + + DEFINE_Q35_MACHINE(v3_0, "pc-q35-3.0", NULL, +@@ -416,3 +418,90 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + + DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 q35 machine type */ ++static void pc_q35_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pcmc->default_nic_model = "e1000e"; ++ m->family = "pc_q35_Z"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_display = "std"; ++ m->no_floppy = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_SYS_BUS_DEVICE); ++ m->alias = "q35"; ++ m->max_cpus = 384; ++ SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); ++} ++ ++static void pc_q35_init_rhel760(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel760_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel7_options(m); ++ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, ++ pc_q35_machine_rhel760_options); ++ ++static void pc_q35_init_rhel750(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel750_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; ++ m->auto_enable_numa_with_memhp = false; ++ pcmc->default_nic_model = "e1000"; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_5_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, ++ pc_q35_machine_rhel750_options); ++ ++static void pc_q35_init_rhel740(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel750_options(m); ++ m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; ++ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; ++ pcmc->pc_rom_ro = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_4_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, ++ pc_q35_machine_rhel740_options); ++ ++static void pc_q35_init_rhel730(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel730_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel740_options(m); ++ m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; ++ m->max_cpus = 255; ++ pcmc->linuxboot_dma_enabled = false; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_3_COMPAT); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, ++ pc_q35_machine_rhel730_options); +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 6894f37..ef82513 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -134,6 +134,9 @@ struct PCMachineClass { + + /* use DMA capable linuxboot option rom */ + bool linuxboot_dma_enabled; ++ ++ /* RH only, see bz 1489800 */ ++ bool pc_rom_ro; + }; + + #define TYPE_PC_MACHINE "generic-pc-machine" +@@ -960,4 +963,565 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); + type_init(pc_machine_init_##suffix) + + extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); ++ ++/* See include/hw/compat.h for shared compatibility lists */ ++ ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++#define PC_RHEL_COMPAT \ ++ { /* PC_RHEL_COMPAT */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "host-phys-bits",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL_COMPAT bz 1508330 */ \ ++ .driver = "vfio-pci",\ ++ .property = "x-no-geforce-quirks",\ ++ .value = "on",\ ++ }, ++ ++/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: ++ * - x-hv-max-vps was backported to 7.5 ++ * - x-pci-hole64-fix was backported to 7.5 ++ */ ++#define PC_RHEL7_5_COMPAT \ ++ HW_COMPAT_RHEL7_5 \ ++ { /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_11 */ \ ++ .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ ++ .property = "clflushopt",\ ++ .value = "off",\ ++ }, ++ ++ ++#define PC_RHEL7_4_COMPAT \ ++ HW_COMPAT_RHEL7_4 \ ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_9 */ \ ++ .driver = "mch",\ ++ .property = "extended-tseg-mbytes",\ ++ .value = stringify(0),\ ++ },\ ++ { /* PC_RHEL7_4_COMPAT bz 1489800 */ \ ++ .driver = "ICH9-LPC",\ ++ .property = "__com.redhat_force-rev1-fadt",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ ++ .driver = "i440FX-pcihost",\ ++ .property = "x-pci-hole64-fix",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ ++ .driver = "q35-pcihost",\ ++ .property = "x-pci-hole64-fix",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "x-hv-max-vps",\ ++ .value = "0x40",\ ++ }, ++ ++#define PC_RHEL7_3_COMPAT \ ++ HW_COMPAT_RHEL7_3 \ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ ++ .driver = "kvmclock",\ ++ .property = "x-mach-use-reliable-get-clock",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "l3-cache",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "full-cpuid-auto-level",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "family",\ ++ .value = "15",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "model",\ ++ .value = "6",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "stepping",\ ++ .value = "1",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ ++ .driver = "isa-pcspk",\ ++ .property = "migrate",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_6 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "cpuid-0xb",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ ++ .driver = "ICH9-LPC",\ ++ .property = "x-smi-broadcast",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "vmware-cpuid-freq",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ ++ .driver = "Haswell-" TYPE_X86_CPU,\ ++ .property = "stepping",\ ++ .value = "1",\ ++ },\ ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_3 added in 2.9 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "kvm-no-smi-migration",\ ++ .value = "on",\ ++ }, ++ ++#define PC_RHEL7_2_COMPAT \ ++ HW_COMPAT_RHEL7_2 \ ++ {\ ++ .driver = "phenom" "-" TYPE_X86_CPU,\ ++ .property = "rdtscp",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "qemu64" "-" TYPE_X86_CPU,\ ++ .property = "sse4a",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "qemu64" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "Haswell-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "Haswell-noTSX-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "Broadwell-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "Broadwell-noTSX-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "abm",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "host" "-" TYPE_X86_CPU,\ ++ .property = "host-cache-info",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "check",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "qemu32" "-" TYPE_X86_CPU,\ ++ .property = "popcnt",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "arat",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "usb-redir",\ ++ .property = "streams",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "fill-mtrr-mask",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_2_COMPAT */ \ ++ .driver = "apic-common",\ ++ .property = "legacy-instance-id",\ ++ .value = "on",\ ++ }, ++ ++ ++ ++#define PC_RHEL7_1_COMPAT \ ++ HW_COMPAT_RHEL7_1 \ ++ {\ ++ .driver = "kvm64" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "kvm32" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Conroe" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Penryn" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Nehalem" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Westmere" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "SandyBridge" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "SandyBridge-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Haswell" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Broadwell" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G1" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ ++ .property = "vme",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Haswell" "-" TYPE_X86_CPU,\ ++ .property = "f16c",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "f16c",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Haswell" "-" TYPE_X86_CPU,\ ++ .property = "rdrand",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "rdrand",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Broadwell" "-" TYPE_X86_CPU,\ ++ .property = "f16c",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "f16c",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "Broadwell" "-" TYPE_X86_CPU,\ ++ .property = "rdrand",\ ++ .value = "off",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "rdrand",\ ++ .value = "off",\ ++ },\ ++ {\ ++ .driver = "coreduo" "-" TYPE_X86_CPU,\ ++ .property = "vmx",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "core2duo" "-" TYPE_X86_CPU,\ ++ .property = "vmx",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "qemu64" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(4),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "kvm64" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(5),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "pentium3" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(2),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "n270" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(5),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Conroe" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(4),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Penryn" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(4),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Nehalem" "-" TYPE_X86_CPU,\ ++ .property = "min-level",\ ++ .value = stringify(4),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "n270" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Penryn" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Conroe" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Nehalem" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Westmere" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "SandyBridge" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "IvyBridge" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Haswell" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Haswell-noTSX" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Broadwell" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_1_COMPAT */ \ ++ .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU,\ ++ .property = "min-xlevel",\ ++ .value = stringify(0x8000000a),\ ++ }, ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ ++/* ++ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* ++ * between our base and 1.5, less stuff backported to RHEL-7.0 ++ * (usb-device.msos-desc), less stuff for devices we changed ++ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, ++ * pci-serial-4x) in 7.0. ++ */ ++#define PC_RHEL7_0_COMPAT \ ++ {\ ++ .driver = "virtio-scsi-pci",\ ++ .property = "any_layout",\ ++ .value = "off",\ ++ },{\ ++ .driver = "PIIX4_PM",\ ++ .property = "memory-hotplug-support",\ ++ .value = "off",\ ++ },{\ ++ .driver = "apic",\ ++ .property = "version",\ ++ .value = stringify(0x11),\ ++ },{\ ++ .driver = "nec-usb-xhci",\ ++ .property = "superspeed-ports-first",\ ++ .value = "off",\ ++ },{\ ++ .driver = "nec-usb-xhci",\ ++ .property = "force-pcie-endcap",\ ++ .value = "on",\ ++ },{\ ++ .driver = "pci-serial",\ ++ .property = "prog_if",\ ++ .value = stringify(0),\ ++ },{\ ++ .driver = "virtio-net-pci",\ ++ .property = "guest_announce",\ ++ .value = "off",\ ++ },{\ ++ .driver = "ICH9-LPC",\ ++ .property = "memory-hotplug-support",\ ++ .value = "off",\ ++ },{\ ++ .driver = "xio3130-downstream",\ ++ .property = COMPAT_PROP_PCP,\ ++ .value = "off",\ ++ },{\ ++ .driver = "ioh3420",\ ++ .property = COMPAT_PROP_PCP,\ ++ .value = "off",\ ++ },{\ ++ .driver = "PIIX4_PM",\ ++ .property = "acpi-pci-hotplug-with-bridge-support",\ ++ .value = "off",\ ++ },{\ ++ .driver = "e1000",\ ++ .property = "mitigation",\ ++ .value = "off",\ ++ },{ \ ++ .driver = "virtio-net-pci", \ ++ .property = "ctrl_guest_offloads", \ ++ .value = "off", \ ++ },\ ++ {\ ++ .driver = "Conroe" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Penryn" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Nehalem" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ \ ++ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Westmere" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ \ ++ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G1" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ },\ ++ {\ ++ .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ ++ .property = "x2apic",\ ++ .value = "on",\ ++ }, + #endif +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 338ee37..051018a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1360,11 +1360,17 @@ static CPUCaches epyc_cache_info = { + + static X86CPUDefinition builtin_x86_defs[] = { + { ++ /* qemu64 is the default CPU model for all *-rhel7.* machine-types. ++ * The default on RHEL-6 was cpu64-rhel6. ++ * libvirt assumes that qemu64 is the default for _all_ machine-types, ++ * so we should try to keep qemu64 and cpu64-rhel6 as similar as ++ * possible. ++ */ + .name = "qemu64", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 6, +- .model = 6, ++ .model = 13, + .stepping = 3, + .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | + CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | +@@ -2684,6 +2690,7 @@ static PropValue kvm_default_props[] = { + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, ++ { "kvm-pv-unhalt", "on" }, + { NULL, NULL }, + }; + +diff --git a/target/i386/machine.c b/target/i386/machine.c +index 084c2c7..0c57c26 100644 +--- a/target/i386/machine.c ++++ b/target/i386/machine.c +@@ -955,6 +955,26 @@ static const VMStateDescription vmstate_svm_npt = { + } + }; + ++static bool vmstate_xsave_needed(void *opaque) ++{ ++ /* The xsave state is already on the main "cpu" section */ ++ return false; ++} ++ ++static const VMStateDescription vmstate_xsave ={ ++ .name = "cpu/xsave", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .minimum_version_id_old = 1, ++ .needed = vmstate_xsave_needed, ++ .fields = (VMStateField []) { ++ VMSTATE_UINT64_V(env.xcr0, X86CPU, 1), ++ VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 1), ++ VMSTATE_YMMH_REGS_VARS(env.xmm_regs, X86CPU, CPU_NB_REGS, 1), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + VMStateDescription vmstate_x86_cpu = { + .name = "cpu", + .version_id = 12, +@@ -1080,6 +1100,7 @@ VMStateDescription vmstate_x86_cpu = { + &vmstate_msr_intel_pt, + &vmstate_msr_virt_ssbd, + &vmstate_svm_npt, ++ &vmstate_xsave, + NULL + } + }; +-- +1.8.3.1 + diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch new file mode 100644 index 0000000..1be86ed --- /dev/null +++ b/0011-Enable-make-check.patch @@ -0,0 +1,498 @@ +From f4e3d697cb6a18301b1279c0b07896eb5b228aa9 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 8 Nov 2018 12:03:48 +0100 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina +--- + redhat/qemu-kvm.spec.template | 2 +- + tests/Makefile.include | 123 +++++++++++++++++++++--------------------- + tests/boot-serial-test.c | 6 ++- + tests/cpu-plug-test.c | 3 +- + tests/e1000-test.c | 2 + + tests/endianness-test.c | 2 + + tests/prom-env-test.c | 2 + + tests/qemu-iotests/051 | 12 ++--- + tests/qemu-iotests/group | 4 +- + tests/qom-test.c | 2 +- + tests/test-x86-cpuid-compat.c | 2 + + tests/usb-hcd-xhci-test.c | 4 ++ + 12 files changed, 91 insertions(+), 73 deletions(-) + +diff --git a/tests/Makefile.include b/tests/Makefile.include +index a492827..4b78396 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -184,8 +184,8 @@ gcov-files-generic-y = qdev-monitor.c qmp.c + check-qtest-generic-y += tests/cdrom-test$(EXESUF) + + gcov-files-ipack-y += hw/ipack/ipack.c +-check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF) +-gcov-files-ipack-y += hw/char/ipoctal232.c ++#check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF) ++#gcov-files-ipack-y += hw/char/ipoctal232.c + + check-qtest-virtioserial-y += tests/virtio-console-test$(EXESUF) + gcov-files-virtioserial-y += hw/char/virtio-console.c +@@ -217,23 +217,23 @@ check-qtest-pci-y += tests/e1000e-test$(EXESUF) + gcov-files-pci-y += hw/net/e1000e.c hw/net/e1000e_core.c + check-qtest-pci-y += tests/rtl8139-test$(EXESUF) + gcov-files-pci-y += hw/net/rtl8139.c +-check-qtest-pci-y += tests/pcnet-test$(EXESUF) +-gcov-files-pci-y += hw/net/pcnet.c +-gcov-files-pci-y += hw/net/pcnet-pci.c +-check-qtest-pci-y += tests/eepro100-test$(EXESUF) +-gcov-files-pci-y += hw/net/eepro100.c +-check-qtest-pci-y += tests/ne2000-test$(EXESUF) +-gcov-files-pci-y += hw/net/ne2000.c +-check-qtest-pci-y += tests/nvme-test$(EXESUF) +-gcov-files-pci-y += hw/block/nvme.c ++#check-qtest-pci-y += tests/pcnet-test$(EXESUF) ++#gcov-files-pci-y += hw/net/pcnet.c ++#gcov-files-pci-y += hw/net/pcnet-pci.c ++#check-qtest-pci-y += tests/eepro100-test$(EXESUF) ++#gcov-files-pci-y += hw/net/eepro100.c ++#check-qtest-pci-y += tests/ne2000-test$(EXESUF) ++#gcov-files-pci-y += hw/net/ne2000.c ++#check-qtest-pci-y += tests/nvme-test$(EXESUF) ++#gcov-files-pci-y += hw/block/nvme.c + check-qtest-pci-y += tests/ac97-test$(EXESUF) + gcov-files-pci-y += hw/audio/ac97.c +-check-qtest-pci-y += tests/es1370-test$(EXESUF) +-gcov-files-pci-y += hw/audio/es1370.c ++#check-qtest-pci-y += tests/es1370-test$(EXESUF) ++#gcov-files-pci-y += hw/audio/es1370.c + check-qtest-pci-y += $(check-qtest-virtio-y) + gcov-files-pci-y += $(gcov-files-virtio-y) hw/virtio/virtio-pci.c +-check-qtest-pci-y += tests/tpci200-test$(EXESUF) +-gcov-files-pci-y += hw/ipack/tpci200.c ++#check-qtest-pci-y += tests/tpci200-test$(EXESUF) ++#gcov-files-pci-y += hw/ipack/tpci200.c + check-qtest-pci-y += $(check-qtest-ipack-y) + gcov-files-pci-y += $(gcov-files-ipack-y) + check-qtest-pci-y += tests/display-vga-test$(EXESUF) +@@ -245,25 +245,25 @@ gcov-files-pci-y += hw/display/virtio-gpu-pci.c + gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c + check-qtest-pci-y += tests/intel-hda-test$(EXESUF) + gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c +-check-qtest-pci-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) +-gcov-files-pci-y += hw/misc/ivshmem.c +-check-qtest-pci-y += tests/megasas-test$(EXESUF) +-gcov-files-pci-y += hw/scsi/megasas.c ++check-qtest-pci-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF) ++gcov-files-pci-$(CONFIG_IVSHMEM_DEVICE) += hw/misc/ivshmem.c ++#check-qtest-pci-y += tests/megasas-test$(EXESUF) ++#gcov-files-pci-y += hw/scsi/megasas.c + + check-qtest-i386-y = tests/endianness-test$(EXESUF) +-check-qtest-i386-y += tests/fdc-test$(EXESUF) +-gcov-files-i386-y = hw/block/fdc.c ++#check-qtest-i386-y += tests/fdc-test$(EXESUF) ++#gcov-files-i386-y = hw/block/fdc.c + check-qtest-i386-y += tests/ide-test$(EXESUF) + check-qtest-i386-y += tests/ahci-test$(EXESUF) + check-qtest-i386-y += tests/hd-geo-test$(EXESUF) + gcov-files-i386-y += hw/block/hd-geometry.c + check-qtest-i386-y += tests/boot-order-test$(EXESUF) +-check-qtest-i386-y += tests/bios-tables-test$(EXESUF) ++#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) + check-qtest-i386-y += tests/boot-serial-test$(EXESUF) + check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) + check-qtest-i386-y += tests/rtc-test$(EXESUF) +-check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) +-check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) ++#check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) ++#check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) + check-qtest-i386-y += tests/i440fx-test$(EXESUF) + check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) + check-qtest-i386-y += tests/drive_del-test$(EXESUF) +@@ -272,8 +272,8 @@ check-qtest-i386-y += tests/tco-test$(EXESUF) + gcov-files-i386-y += hw/watchdog/watchdog.c hw/watchdog/wdt_ib700.c + check-qtest-i386-y += $(check-qtest-pci-y) + gcov-files-i386-y += $(gcov-files-pci-y) +-check-qtest-i386-y += tests/vmxnet3-test$(EXESUF) +-gcov-files-i386-y += hw/net/vmxnet3.c ++#check-qtest-i386-y += tests/vmxnet3-test$(EXESUF) ++#gcov-files-i386-y += hw/net/vmxnet3.c + gcov-files-i386-y += hw/net/net_rx_pkt.c + gcov-files-i386-y += hw/net/net_tx_pkt.c + check-qtest-i386-y += tests/pvpanic-test$(EXESUF) +@@ -282,8 +282,8 @@ check-qtest-i386-y += tests/i82801b11-test$(EXESUF) + gcov-files-i386-y += hw/pci-bridge/i82801b11.c + check-qtest-i386-y += tests/ioh3420-test$(EXESUF) + gcov-files-i386-y += hw/pci-bridge/ioh3420.c +-check-qtest-i386-y += tests/usb-hcd-ohci-test$(EXESUF) +-gcov-files-i386-y += hw/usb/hcd-ohci.c ++#check-qtest-i386-y += tests/usb-hcd-ohci-test$(EXESUF) ++#gcov-files-i386-y += hw/usb/hcd-ohci.c + check-qtest-i386-y += tests/usb-hcd-uhci-test$(EXESUF) + gcov-files-i386-y += hw/usb/hcd-uhci.c + check-qtest-i386-y += tests/usb-hcd-ehci-test$(EXESUF) +@@ -311,7 +311,7 @@ check-qtest-i386-y += tests/migration-test$(EXESUF) + check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF) + check-qtest-i386-y += tests/numa-test$(EXESUF) + check-qtest-x86_64-y += $(check-qtest-i386-y) +-check-qtest-x86_64-y += tests/sdhci-test$(EXESUF) ++#check-qtest-x86_64-y += tests/sdhci-test$(EXESUF) + gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c + gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) + +@@ -332,34 +332,35 @@ check-qtest-mips64el-y = tests/endianness-test$(EXESUF) + check-qtest-moxie-y = tests/boot-serial-test$(EXESUF) + + check-qtest-ppc-y = tests/endianness-test$(EXESUF) +-check-qtest-ppc-y += tests/boot-order-test$(EXESUF) ++#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) + check-qtest-ppc-y += tests/prom-env-test$(EXESUF) + check-qtest-ppc-y += tests/drive_del-test$(EXESUF) + check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) +-check-qtest-ppc-y += tests/m48t59-test$(EXESUF) +-gcov-files-ppc-y += hw/timer/m48t59.c ++#check-qtest-ppc-y += tests/m48t59-test$(EXESUF) ++#gcov-files-ppc-y += hw/timer/m48t59.c + + check-qtest-ppc64-y = $(check-qtest-ppc-y) + gcov-files-ppc64-y = $(subst ppc-softmmu/,ppc64-softmmu/,$(gcov-files-ppc-y)) + check-qtest-ppc64-y += tests/spapr-phb-test$(EXESUF) + gcov-files-ppc64-y += ppc64-softmmu/hw/ppc/spapr_pci.c +-check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) ++#check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) + check-qtest-ppc64-y += tests/migration-test$(EXESUF) + check-qtest-ppc64-y += tests/rtas-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) +-check-qtest-ppc64-y += tests/usb-hcd-ohci-test$(EXESUF) +-gcov-files-ppc64-y += hw/usb/hcd-ohci.c +-check-qtest-ppc64-y += tests/usb-hcd-uhci-test$(EXESUF) +-gcov-files-ppc64-y += hw/usb/hcd-uhci.c ++#check-qtest-ppc64-y += tests/usb-hcd-ohci-test$(EXESUF) ++#gcov-files-ppc64-y += hw/usb/hcd-ohci.c ++#check-qtest-ppc64-y += tests/usb-hcd-uhci-test$(EXESUF) ++#gcov-files-ppc64-y += hw/usb/hcd-uhci.c + check-qtest-ppc64-y += tests/usb-hcd-xhci-test$(EXESUF) + gcov-files-ppc64-y += hw/usb/hcd-xhci.c + check-qtest-ppc64-y += $(check-qtest-virtio-y) +-check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) +-check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) +-check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) + check-qtest-ppc64-y += tests/display-vga-test$(EXESUF) + check-qtest-ppc64-y += tests/numa-test$(EXESUF) +-check-qtest-ppc64-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) ++check-qtest-ppc64-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF) ++gcov-files-ppc64-$(CONFIG_IVSHMEM_DEVICE) += hw/misc/ivshmem.c + check-qtest-ppc64-y += tests/cpu-plug-test$(EXESUF) + + check-qtest-sh4-y = tests/endianness-test$(EXESUF) +@@ -388,7 +389,7 @@ check-qtest-arm-y += tests/boot-serial-test$(EXESUF) + check-qtest-arm-y += tests/sdhci-test$(EXESUF) + + check-qtest-aarch64-y = tests/numa-test$(EXESUF) +-check-qtest-aarch64-y += tests/sdhci-test$(EXESUF) ++#check-qtest-aarch64-y += tests/sdhci-test$(EXESUF) + check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF) + + check-qtest-microblazeel-y = $(check-qtest-microblaze-y) +@@ -777,15 +778,15 @@ tests/endianness-test$(EXESUF): tests/endianness-test.o + tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y) + tests/prom-env-test$(EXESUF): tests/prom-env-test.o $(libqos-obj-y) + tests/rtas-test$(EXESUF): tests/rtas-test.o $(libqos-spapr-obj-y) +-tests/fdc-test$(EXESUF): tests/fdc-test.o ++#tests/fdc-test$(EXESUF): tests/fdc-test.o + tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y) + tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) +-tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o +-tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o ++#tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o ++#tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o + tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o + tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) + tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y) +-tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ ++#tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ + tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y) + tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) + tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) +@@ -798,11 +799,11 @@ tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y) + tests/e1000-test$(EXESUF): tests/e1000-test.o + tests/e1000e-test$(EXESUF): tests/e1000e-test.o $(libqos-pc-obj-y) + tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y) +-tests/pcnet-test$(EXESUF): tests/pcnet-test.o +-tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o +-tests/eepro100-test$(EXESUF): tests/eepro100-test.o +-tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o +-tests/ne2000-test$(EXESUF): tests/ne2000-test.o ++#tests/pcnet-test$(EXESUF): tests/pcnet-test.o ++#tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o ++#tests/eepro100-test$(EXESUF): tests/eepro100-test.o ++#tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o ++#tests/ne2000-test$(EXESUF): tests/ne2000-test.o + tests/wdt_ib700-test$(EXESUF): tests/wdt_ib700-test.o + tests/tco-test$(EXESUF): tests/tco-test.o $(libqos-pc-obj-y) + tests/virtio-balloon-test$(EXESUF): tests/virtio-balloon-test.o $(libqos-virtio-obj-y) +@@ -813,22 +814,22 @@ tests/virtio-scsi-test$(EXESUF): tests/virtio-scsi-test.o $(libqos-virtio-obj-y) + tests/virtio-9p-test$(EXESUF): tests/virtio-9p-test.o $(libqos-virtio-obj-y) + tests/virtio-serial-test$(EXESUF): tests/virtio-serial-test.o $(libqos-virtio-obj-y) + tests/virtio-console-test$(EXESUF): tests/virtio-console-test.o $(libqos-virtio-obj-y) +-tests/tpci200-test$(EXESUF): tests/tpci200-test.o ++#tests/tpci200-test$(EXESUF): tests/tpci200-test.o + tests/display-vga-test$(EXESUF): tests/display-vga-test.o +-tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o ++#tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o + tests/qom-test$(EXESUF): tests/qom-test.o + tests/test-hmp$(EXESUF): tests/test-hmp.o + tests/machine-none-test$(EXESUF): tests/machine-none-test.o + tests/drive_del-test$(EXESUF): tests/drive_del-test.o $(libqos-virtio-obj-y) + tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y) +-tests/nvme-test$(EXESUF): tests/nvme-test.o ++#tests/nvme-test$(EXESUF): tests/nvme-test.o + tests/pvpanic-test$(EXESUF): tests/pvpanic-test.o + tests/i82801b11-test$(EXESUF): tests/i82801b11-test.o + tests/ac97-test$(EXESUF): tests/ac97-test.o +-tests/es1370-test$(EXESUF): tests/es1370-test.o ++#tests/es1370-test$(EXESUF): tests/es1370-test.o + tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o + tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o +-tests/usb-hcd-ohci-test$(EXESUF): tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y) ++#tests/usb-hcd-ohci-test$(EXESUF): tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y) + tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y) + tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y) + tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y) +@@ -841,19 +842,19 @@ tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_hel + tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) + tests/test-keyval$(EXESUF): tests/test-keyval.o $(test-util-obj-y) $(test-qapi-obj-y) + tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) +-tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) +-tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) +-tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) ++#tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) ++#tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) ++#tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) + tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y) + tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) +-tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) ++#tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) + tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o $(test-util-obj-y) libvhost-user.a + tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) + tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o + tests/test-qapi-util$(EXESUF): tests/test-qapi-util.o $(test-util-obj-y) + tests/numa-test$(EXESUF): tests/numa-test.o + tests/vmgenid-test$(EXESUF): tests/vmgenid-test.o tests/boot-sector.o tests/acpi-utils.o +-tests/sdhci-test$(EXESUF): tests/sdhci-test.o $(libqos-pc-obj-y) ++#tests/sdhci-test$(EXESUF): tests/sdhci-test.o $(libqos-pc-obj-y) + tests/cdrom-test$(EXESUF): tests/cdrom-test.o tests/boot-sector.o $(libqos-obj-y) + + tests/migration/stress$(EXESUF): tests/migration/stress.o +diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c +index 952a2e7..5217a39 100644 +--- a/tests/boot-serial-test.c ++++ b/tests/boot-serial-test.c +@@ -80,17 +80,21 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "prep", "-boot e", "Booting from device e" }, + { "ppc64", "40p", "-m 192", "Memory size: 192 MB" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", "", "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv", "-cpu POWER8", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c +index 5f39ba0..48b8d09 100644 +--- a/tests/cpu-plug-test.c ++++ b/tests/cpu-plug-test.c +@@ -192,7 +192,8 @@ static void add_pseries_test_case(const char *mname) + PlugTestData *data; + + if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7) || ++ strcmp(mname,"pseries-rhel7.2.0") == 0) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/e1000-test.c b/tests/e1000-test.c +index 0c5fcdc..b830432 100644 +--- a/tests/e1000-test.c ++++ b/tests/e1000-test.c +@@ -29,8 +29,10 @@ static void test_device(gconstpointer data) + static const char *models[] = { + "e1000", + "e1000-82540em", ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + "e1000-82544gc", + "e1000-82545em", ++#endif + }; + + int main(int argc, char **argv) +diff --git a/tests/endianness-test.c b/tests/endianness-test.c +index 546e096..440353d 100644 +--- a/tests/endianness-test.c ++++ b/tests/endianness-test.c +@@ -37,10 +37,12 @@ static const TestCase test_cases[] = { + { "ppc", "g3beige", 0xfe000000, .bswap = true, .superio = "i82378" }, + { "ppc", "prep", 0x80000000, .bswap = true }, + { "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" }, ++#if 0 /* Disabled for RHEL, since ISA is not enabled */ + { "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" }, + { "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" }, + { "ppc64", "pseries-2.7", 0x10080000000ULL, + .bswap = true, .superio = "i82378" }, ++#endif /* Disabled for RHEL, since ISA is not enabled */ + { "sh4", "r2d", 0xfe240000, .superio = "i82378" }, + { "sh4eb", "r2d", 0xfe240000, .bswap = true, .superio = "i82378" }, + { "sparc64", "sun4u", 0x1fe02000000LL, .bswap = true }, +diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c +index 8c867e6..cc9b6ec 100644 +--- a/tests/prom-env-test.c ++++ b/tests/prom-env-test.c +@@ -82,7 +82,9 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); ++#endif + if (g_test_slow()) { + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); + } +diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 +index ee9c820..c5cc0ee 100755 +--- a/tests/qemu-iotests/051 ++++ b/tests/qemu-iotests/051 +@@ -183,11 +183,11 @@ run_qemu -drive if=virtio + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive if=none,id=disk -device ide-cd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive if=none,id=disk -device ide-drive,drive=disk + run_qemu -drive if=none,id=disk -device ide-hd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +@@ -212,11 +212,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-drive,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index b973dc8..f1059f6 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -77,7 +77,7 @@ + 068 rw auto quick + 069 rw auto quick + 070 rw auto quick +-071 rw auto quick ++# 071 rw auto quick -- requires whitelisted blkverify + 072 rw auto quick + 073 rw auto quick + 074 rw auto quick +@@ -105,7 +105,7 @@ + 096 rw auto quick + 097 rw auto backing + 098 rw auto backing quick +-099 rw auto quick ++# 099 rw auto quick -- requires whitelisted blkverify + # 100 was removed, do not reuse + 101 rw auto quick + 102 rw auto quick +diff --git a/tests/qom-test.c b/tests/qom-test.c +index e6f712c..ebd15fd 100644 +--- a/tests/qom-test.c ++++ b/tests/qom-test.c +@@ -16,7 +16,7 @@ + #include "libqtest.h" + + static const char *blacklist_x86[] = { +- "xenfv", "xenpv", NULL ++ "xenfv", "xenpv", "isapc", NULL + }; + + static const struct { +diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c +index 84ce9c7..c1ee197 100644 +--- a/tests/test-x86-cpuid-compat.c ++++ b/tests/test-x86-cpuid-compat.c +@@ -306,6 +306,7 @@ int main(int argc, char **argv) + "-cpu 486,xlevel2=0xC0000002,+xstore", + "xlevel2", 0xC0000002); + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* Check compatibility of old machine-types that didn't + * auto-increase level/xlevel/xlevel2: */ + +@@ -356,6 +357,7 @@ int main(int argc, char **argv) + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", + "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", + "xlevel", 0x80000008); ++#endif + + /* Test feature parsing */ + add_feature_test("x86/cpuid/features/plus", +diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c +index 5b1b681..85fa150 100644 +--- a/tests/usb-hcd-xhci-test.c ++++ b/tests/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug("xhci", 1, NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + qtest_qmp_device_add("usb-uas", "uas", NULL); +@@ -34,6 +35,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del("scsihd"); + qtest_qmp_device_del("uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -52,7 +54,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +-- +1.8.3.1 + diff --git a/0004-Use-kvm-by-default.patch b/0012-Use-kvm-by-default.patch similarity index 92% rename from 0004-Use-kvm-by-default.patch rename to 0012-Use-kvm-by-default.patch index f19a64e..d6e2835 100644 --- a/0004-Use-kvm-by-default.patch +++ b/0012-Use-kvm-by-default.patch @@ -1,4 +1,4 @@ -From 5a441b820faa4e6e9e6fc80cccc813a3c333b6c2 Mon Sep 17 00:00:00 2001 +From ce4cd21e28e1511e056877e3cc8dcf6f0b8c7baa Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 18 Dec 2014 06:27:49 +0100 Subject: Use kvm by default diff --git a/0005-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 96% rename from 0005-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index 0d40ae5..9e22d48 100644 --- a/0005-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 0c57186334ab4ef7f04de604a8f13b39ad6578c8 Mon Sep 17 00:00:00 2001 +From 43a09e06e76cba94c6ecd448f51912362b42f94d Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned diff --git a/0006-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch similarity index 96% rename from 0006-Add-support-statement-to-help-output.patch rename to 0014-Add-support-statement-to-help-output.patch index b6ecf11..ea0d9ea 100644 --- a/0006-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From c2858d09461c6f69553e8b9d69804f243c2d08bb Mon Sep 17 00:00:00 2001 +From f8e7911bb97eb942a4eadad1731b7c59c43fd2eb Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output diff --git a/0007-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch similarity index 97% rename from 0007-globally-limit-the-maximum-number-of-CPUs.patch rename to 0015-globally-limit-the-maximum-number-of-CPUs.patch index b8af753..3543fec 100644 --- a/0007-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 36dda20ae7312b1db0b4060bb2420ab18e5f5483 Mon Sep 17 00:00:00 2001 +From 8413778453742aeb3ad6b38d5f4440a0dbabca7d Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs diff --git a/0008-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch similarity index 98% rename from 0008-Add-support-for-simpletrace.patch rename to 0016-Add-support-for-simpletrace.patch index 2c660b8..4b945c5 100644 --- a/0008-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 84763026a2e71d7b9f7fc9249ba25771724c272d Mon Sep 17 00:00:00 2001 +From f262acdee88f36b625fcbd5eb1cd66739428cca3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace diff --git a/0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch similarity index 99% rename from 0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename to 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 93e2343..aca5827 100644 --- a/0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 7f5450ae0077f13427a54bd2868c1986284839d2 Mon Sep 17 00:00:00 2001 +From 33e2c01c1b0b64a76d5193b60378d2329a86626b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 14 Nov 2014 08:51:50 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- diff --git a/0010-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch similarity index 97% rename from 0010-usb-xhci-Fix-PCI-capability-order.patch rename to 0018-usb-xhci-Fix-PCI-capability-order.patch index a3e6795..a44ef20 100644 --- a/0010-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From 268966c530da2d8e07e2c9034a82acd01335e2c2 Mon Sep 17 00:00:00 2001 +From 69912b533a88bda6377292231fb94475a674a90d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order diff --git a/0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch similarity index 97% rename from 0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename to 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 886de8e..e5e93ce 100644 --- a/0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 126cb3f3717b266f27dc7c657da833779f9f3b54 Mon Sep 17 00:00:00 2001 +From a883dbcc1c55cab189ff4a48cbdd12c4b4246b9c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] diff --git a/0012-linux-headers-asm-s390-kvm.h-header-sync.patch b/0020-linux-headers-asm-s390-kvm.h-header-sync.patch similarity index 97% rename from 0012-linux-headers-asm-s390-kvm.h-header-sync.patch rename to 0020-linux-headers-asm-s390-kvm.h-header-sync.patch index c0b4e09..cb0ee04 100644 --- a/0012-linux-headers-asm-s390-kvm.h-header-sync.patch +++ b/0020-linux-headers-asm-s390-kvm.h-header-sync.patch @@ -1,4 +1,4 @@ -From 811173cac3e80b6235de885b7b2ec4f9be3b4e31 Mon Sep 17 00:00:00 2001 +From f3d0b355f946ab87b281ef75ebfb52f7b7592f2a Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 9 Aug 2018 10:15:08 +0000 Subject: linux-headers: asm-s390/kvm.h header sync diff --git a/0013-s390x-Enable-KVM-huge-page-backing-support.patch b/0021-s390x-Enable-KVM-huge-page-backing-support.patch similarity index 98% rename from 0013-s390x-Enable-KVM-huge-page-backing-support.patch rename to 0021-s390x-Enable-KVM-huge-page-backing-support.patch index 926fb13..636c94d 100644 --- a/0013-s390x-Enable-KVM-huge-page-backing-support.patch +++ b/0021-s390x-Enable-KVM-huge-page-backing-support.patch @@ -1,4 +1,4 @@ -From fa8eda01f21298e6bc50abb78775390b4bf3f954 Mon Sep 17 00:00:00 2001 +From 3b4526245dcb2daad3a6393b6b129f85f9e2c7a2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 6 Aug 2018 14:18:41 +0100 Subject: s390x: Enable KVM huge page backing support diff --git a/0014-s390x-kvm-add-etoken-facility.patch b/0022-s390x-kvm-add-etoken-facility.patch similarity index 99% rename from 0014-s390x-kvm-add-etoken-facility.patch rename to 0022-s390x-kvm-add-etoken-facility.patch index e035dbc..d56ba50 100644 --- a/0014-s390x-kvm-add-etoken-facility.patch +++ b/0022-s390x-kvm-add-etoken-facility.patch @@ -1,4 +1,4 @@ -From 4b36866031e559bc895e64ecb20417323cb03e3d Mon Sep 17 00:00:00 2001 +From 8eacbf0e8e26b2a8aa3de955a57a7a3cb680d922 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 9 Aug 2018 10:15:09 +0000 Subject: s390x/kvm: add etoken facility diff --git a/0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch b/0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch similarity index 96% rename from 0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch rename to 0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch index 988124d..01227d7 100644 --- a/0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch +++ b/0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch @@ -1,4 +1,4 @@ -From 79d0599b21b64f8a8107855e844b347d2cc138d9 Mon Sep 17 00:00:00 2001 +From 29df663d045345a8c498dc3966cc59dcf091a50d Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 7 Aug 2018 09:05:54 +0000 Subject: s390x/cpumodel: default enable bpb and ppa15 for z196 and later diff --git a/0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch b/0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch similarity index 98% rename from 0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch rename to 0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch index 5bed305..e2570c5 100644 --- a/0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch +++ b/0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch @@ -1,4 +1,4 @@ -From 786fb991b644eddb9f52fd04d377cc7a62685d59 Mon Sep 17 00:00:00 2001 +From 43b08a1e4bc47d810212f569cc0fc30eebfd7036 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Fri, 31 Aug 2018 13:59:22 +0100 Subject: i386: Fix arch_query_cpu_model_expansion() leak diff --git a/0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch b/0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch similarity index 96% rename from 0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch rename to 0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch index a731164..12692e8 100644 --- a/0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch +++ b/0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch @@ -1,4 +1,4 @@ -From 25abf99ebc7004999e79fa5e5b1370e4dfdaeed2 Mon Sep 17 00:00:00 2001 +From 628b10cd4d5cd8fde97dab66f143db78fe03398a Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 21 Aug 2018 19:15:41 +0100 Subject: i386: Disable TOPOEXT by default on "-cpu host" diff --git a/0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch b/0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch similarity index 98% rename from 0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch rename to 0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch index fc5784f..3f2736c 100644 --- a/0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch +++ b/0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch @@ -1,4 +1,4 @@ -From 49d4861ffc56cb233dacc1abcb2a5ec608e599ab Mon Sep 17 00:00:00 2001 +From 1ed2bb0d831983b68bcdecd057c2c5bfd419c304 Mon Sep 17 00:00:00 2001 From: Jeffrey Cody Date: Wed, 26 Sep 2018 04:08:14 +0100 Subject: curl: Make sslverify=off disable host as well as peer verification. diff --git a/0019-migration-postcopy-Clear-have_listen_thread.patch b/0027-migration-postcopy-Clear-have_listen_thread.patch similarity index 96% rename from 0019-migration-postcopy-Clear-have_listen_thread.patch rename to 0027-migration-postcopy-Clear-have_listen_thread.patch index f220ad4..86157d9 100644 --- a/0019-migration-postcopy-Clear-have_listen_thread.patch +++ b/0027-migration-postcopy-Clear-have_listen_thread.patch @@ -1,4 +1,4 @@ -From 324493e716a2e5fa60b6b013d5df831b03f2a678 Mon Sep 17 00:00:00 2001 +From 096b7abf1d2755ad469e4bcb3dc6302021979814 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 1 Oct 2018 10:54:48 +0100 Subject: migration/postcopy: Clear have_listen_thread diff --git a/0020-migration-cleanup-in-error-paths-in-loadvm.patch b/0028-migration-cleanup-in-error-paths-in-loadvm.patch similarity index 96% rename from 0020-migration-cleanup-in-error-paths-in-loadvm.patch rename to 0028-migration-cleanup-in-error-paths-in-loadvm.patch index a0fea63..f576c82 100644 --- a/0020-migration-cleanup-in-error-paths-in-loadvm.patch +++ b/0028-migration-cleanup-in-error-paths-in-loadvm.patch @@ -1,4 +1,4 @@ -From 005c4cb023ffdcb8888c7453d263cab95d5b1b1c Mon Sep 17 00:00:00 2001 +From bff052b89b0c32c179d858bd8eed91e0d9f98db4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 1 Oct 2018 10:54:49 +0100 Subject: migration: cleanup in error paths in loadvm diff --git a/0021-jobs-change-start-callback-to-run-callback.patch b/0029-jobs-change-start-callback-to-run-callback.patch similarity index 99% rename from 0021-jobs-change-start-callback-to-run-callback.patch rename to 0029-jobs-change-start-callback-to-run-callback.patch index 93e8b27..0955ee2 100644 --- a/0021-jobs-change-start-callback-to-run-callback.patch +++ b/0029-jobs-change-start-callback-to-run-callback.patch @@ -1,4 +1,4 @@ -From 287cb50c08d64773470732be8a6a566bcdde4b75 Mon Sep 17 00:00:00 2001 +From 2999207ffd4de9f139922b444edba07b051d4a67 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:07 +0100 Subject: jobs: change start callback to run callback diff --git a/0022-jobs-canonize-Error-object.patch b/0030-jobs-canonize-Error-object.patch similarity index 99% rename from 0022-jobs-canonize-Error-object.patch rename to 0030-jobs-canonize-Error-object.patch index ba09278..92dc0b8 100644 --- a/0022-jobs-canonize-Error-object.patch +++ b/0030-jobs-canonize-Error-object.patch @@ -1,4 +1,4 @@ -From 9dff1ec5bdde5e8bd8745d2e0697cc6e28c87214 Mon Sep 17 00:00:00 2001 +From df9702d737eea1720a10d350c24bdcc3f54bcba9 Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 29 Aug 2018 21:57:27 -0400 Subject: jobs: canonize Error object diff --git a/0023-jobs-add-exit-shim.patch b/0031-jobs-add-exit-shim.patch similarity index 98% rename from 0023-jobs-add-exit-shim.patch rename to 0031-jobs-add-exit-shim.patch index e8493e5..2a0ccb0 100644 --- a/0023-jobs-add-exit-shim.patch +++ b/0031-jobs-add-exit-shim.patch @@ -1,4 +1,4 @@ -From 29ae3509885eaa6d24ee82aa4cae47ddeda086db Mon Sep 17 00:00:00 2001 +From 17511eb281e005da6e617acd12c81a0a1fa1771d Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:09 +0100 Subject: jobs: add exit shim diff --git a/0024-block-commit-utilize-job_exit-shim.patch b/0032-block-commit-utilize-job_exit-shim.patch similarity index 98% rename from 0024-block-commit-utilize-job_exit-shim.patch rename to 0032-block-commit-utilize-job_exit-shim.patch index 2d4e3b9..3994481 100644 --- a/0024-block-commit-utilize-job_exit-shim.patch +++ b/0032-block-commit-utilize-job_exit-shim.patch @@ -1,4 +1,4 @@ -From 2207ab7e71d5d3c3806d60b3f483988a62566292 Mon Sep 17 00:00:00 2001 +From 912e8eaa87f8dab40466cf0d45c3290d02e6a9d5 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:10 +0100 Subject: block/commit: utilize job_exit shim diff --git a/0025-block-mirror-utilize-job_exit-shim.patch b/0033-block-mirror-utilize-job_exit-shim.patch similarity index 98% rename from 0025-block-mirror-utilize-job_exit-shim.patch rename to 0033-block-mirror-utilize-job_exit-shim.patch index 833eead..65eb25a 100644 --- a/0025-block-mirror-utilize-job_exit-shim.patch +++ b/0033-block-mirror-utilize-job_exit-shim.patch @@ -1,4 +1,4 @@ -From f96869810df10ac28030a31d8cb1e39825133e94 Mon Sep 17 00:00:00 2001 +From 2322917770da98e175e7ae8bf0bb1a624ec3cebc Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 29 Aug 2018 21:57:30 -0400 Subject: block/mirror: utilize job_exit shim diff --git a/0026-jobs-utilize-job_exit-shim.patch b/0034-jobs-utilize-job_exit-shim.patch similarity index 99% rename from 0026-jobs-utilize-job_exit-shim.patch rename to 0034-jobs-utilize-job_exit-shim.patch index d5ca8e5..8b765d8 100644 --- a/0026-jobs-utilize-job_exit-shim.patch +++ b/0034-jobs-utilize-job_exit-shim.patch @@ -1,4 +1,4 @@ -From 5947e8781d9dffb069fcc570402f775f80068e63 Mon Sep 17 00:00:00 2001 +From 83d2840eeadd8a55b796eae5454783d42913963c Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:12 +0100 Subject: jobs: utilize job_exit shim diff --git a/0027-block-backup-make-function-variables-consistently-na.patch b/0035-block-backup-make-function-variables-consistently-na.patch similarity index 99% rename from 0027-block-backup-make-function-variables-consistently-na.patch rename to 0035-block-backup-make-function-variables-consistently-na.patch index 2923dac..ab0af70 100644 --- a/0027-block-backup-make-function-variables-consistently-na.patch +++ b/0035-block-backup-make-function-variables-consistently-na.patch @@ -1,4 +1,4 @@ -From 3e86b802541a7230eda88a6bd7f17b411deab9fa Mon Sep 17 00:00:00 2001 +From b5532575bb8aa748dc066834d7ac150bbb6575a7 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:13 +0100 Subject: block/backup: make function variables consistently named diff --git a/0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch b/0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch similarity index 98% rename from 0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch rename to 0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch index 070c907..6d2791c 100644 --- a/0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch +++ b/0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch @@ -1,4 +1,4 @@ -From 3141614c15fbcf6aee7af19069380aa6d186656b Mon Sep 17 00:00:00 2001 +From 7fe6d53387852907871d82997fbccc2cf774bdb4 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:14 +0100 Subject: jobs: remove ret argument to job_completed; privatize it diff --git a/0029-jobs-remove-job_defer_to_main_loop.patch b/0037-jobs-remove-job_defer_to_main_loop.patch similarity index 98% rename from 0029-jobs-remove-job_defer_to_main_loop.patch rename to 0037-jobs-remove-job_defer_to_main_loop.patch index 3c302ce..2b0fec4 100644 --- a/0029-jobs-remove-job_defer_to_main_loop.patch +++ b/0037-jobs-remove-job_defer_to_main_loop.patch @@ -1,4 +1,4 @@ -From 73694b41a7e96fb364bdfd6fbad89c69dc2d1f73 Mon Sep 17 00:00:00 2001 +From 1827993a08cc8c86cc40ca9ccb7ef668261b2bc4 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:15 +0100 Subject: jobs: remove job_defer_to_main_loop diff --git a/0030-block-commit-add-block-job-creation-flags.patch b/0038-block-commit-add-block-job-creation-flags.patch similarity index 98% rename from 0030-block-commit-add-block-job-creation-flags.patch rename to 0038-block-commit-add-block-job-creation-flags.patch index 315a78f..b145fc1 100644 --- a/0030-block-commit-add-block-job-creation-flags.patch +++ b/0038-block-commit-add-block-job-creation-flags.patch @@ -1,4 +1,4 @@ -From 8141d5f8ab70551c59fae63373a9562c99c8e00d Mon Sep 17 00:00:00 2001 +From 6c8da2ba018d7546a15c3917f52ad1cc2b5b133c Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:16 +0100 Subject: block/commit: add block job creation flags diff --git a/0031-block-mirror-add-block-job-creation-flags.patch b/0039-block-mirror-add-block-job-creation-flags.patch similarity index 98% rename from 0031-block-mirror-add-block-job-creation-flags.patch rename to 0039-block-mirror-add-block-job-creation-flags.patch index 088c370..fec813e 100644 --- a/0031-block-mirror-add-block-job-creation-flags.patch +++ b/0039-block-mirror-add-block-job-creation-flags.patch @@ -1,4 +1,4 @@ -From 8ac0fb4e4202e6321d57f1be01f4ca6e51a98687 Mon Sep 17 00:00:00 2001 +From d4f6cfe194df3236bf53b1093e0a7f98f0a5da0e Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:17 +0100 Subject: block/mirror: add block job creation flags diff --git a/0032-block-stream-add-block-job-creation-flags.patch b/0040-block-stream-add-block-job-creation-flags.patch similarity index 98% rename from 0032-block-stream-add-block-job-creation-flags.patch rename to 0040-block-stream-add-block-job-creation-flags.patch index 1dda670..224be80 100644 --- a/0032-block-stream-add-block-job-creation-flags.patch +++ b/0040-block-stream-add-block-job-creation-flags.patch @@ -1,4 +1,4 @@ -From 64569465b360642820193586116aa51ed0b356bd Mon Sep 17 00:00:00 2001 +From 4fd98648eb0df8157c1238a1cee36373278d44a5 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:18 +0100 Subject: block/stream: add block job creation flags diff --git a/0033-block-commit-refactor-commit-to-use-job-callbacks.patch b/0041-block-commit-refactor-commit-to-use-job-callbacks.patch similarity index 99% rename from 0033-block-commit-refactor-commit-to-use-job-callbacks.patch rename to 0041-block-commit-refactor-commit-to-use-job-callbacks.patch index 2a5f69b..212513e 100644 --- a/0033-block-commit-refactor-commit-to-use-job-callbacks.patch +++ b/0041-block-commit-refactor-commit-to-use-job-callbacks.patch @@ -1,4 +1,4 @@ -From b0ac95edde586e808a1118c4b04c1608de8b5b6c Mon Sep 17 00:00:00 2001 +From b0b7d48f97dd97efacf93e5529d7597bd2280095 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:19 +0100 Subject: block/commit: refactor commit to use job callbacks diff --git a/0034-block-mirror-don-t-install-backing-chain-on-abort.patch b/0042-block-mirror-don-t-install-backing-chain-on-abort.patch similarity index 96% rename from 0034-block-mirror-don-t-install-backing-chain-on-abort.patch rename to 0042-block-mirror-don-t-install-backing-chain-on-abort.patch index 241ae5f..8a2f14f 100644 --- a/0034-block-mirror-don-t-install-backing-chain-on-abort.patch +++ b/0042-block-mirror-don-t-install-backing-chain-on-abort.patch @@ -1,4 +1,4 @@ -From 7f155f96e9db0be97501f90e482a29d51779f887 Mon Sep 17 00:00:00 2001 +From e849bf276e59b282f3288b42abe9d6dff51dc678 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:20 +0100 Subject: block/mirror: don't install backing chain on abort diff --git a/0035-block-mirror-conservative-mirror_exit-refactor.patch b/0043-block-mirror-conservative-mirror_exit-refactor.patch similarity index 98% rename from 0035-block-mirror-conservative-mirror_exit-refactor.patch rename to 0043-block-mirror-conservative-mirror_exit-refactor.patch index 1c34fec..b964981 100644 --- a/0035-block-mirror-conservative-mirror_exit-refactor.patch +++ b/0043-block-mirror-conservative-mirror_exit-refactor.patch @@ -1,4 +1,4 @@ -From 8b394ff523e607060c80c6b647dbb89a2f73571d Mon Sep 17 00:00:00 2001 +From 430c298d6bf9a7c8b90ad30bc2cd445e5cd6dd50 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 6 Sep 2018 09:02:15 -0400 Subject: block/mirror: conservative mirror_exit refactor diff --git a/0036-block-stream-refactor-stream-to-use-job-callbacks.patch b/0044-block-stream-refactor-stream-to-use-job-callbacks.patch similarity index 97% rename from 0036-block-stream-refactor-stream-to-use-job-callbacks.patch rename to 0044-block-stream-refactor-stream-to-use-job-callbacks.patch index 4ff194d..c798419 100644 --- a/0036-block-stream-refactor-stream-to-use-job-callbacks.patch +++ b/0044-block-stream-refactor-stream-to-use-job-callbacks.patch @@ -1,4 +1,4 @@ -From 533c77ee076c0050b4c4deb26fda54c085a994ce Mon Sep 17 00:00:00 2001 +From 57ede8577bbecac73a2945ca5278662dfc019dca Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:22 +0100 Subject: block/stream: refactor stream to use job callbacks diff --git a/0037-tests-blockjob-replace-Blockjob-with-Job.patch b/0045-tests-blockjob-replace-Blockjob-with-Job.patch similarity index 98% rename from 0037-tests-blockjob-replace-Blockjob-with-Job.patch rename to 0045-tests-blockjob-replace-Blockjob-with-Job.patch index f408d83..3d9bf41 100644 --- a/0037-tests-blockjob-replace-Blockjob-with-Job.patch +++ b/0045-tests-blockjob-replace-Blockjob-with-Job.patch @@ -1,4 +1,4 @@ -From ac945e63cca25c453d472834c64aa3a4192729f9 Mon Sep 17 00:00:00 2001 +From 3817b0c67fb4636bacd9c4ebdef39f51b18e05c1 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:23 +0100 Subject: tests/blockjob: replace Blockjob with Job diff --git a/0038-tests-test-blockjob-remove-exit-callback.patch b/0046-tests-test-blockjob-remove-exit-callback.patch similarity index 97% rename from 0038-tests-test-blockjob-remove-exit-callback.patch rename to 0046-tests-test-blockjob-remove-exit-callback.patch index 9bd1a7c..81856fb 100644 --- a/0038-tests-test-blockjob-remove-exit-callback.patch +++ b/0046-tests-test-blockjob-remove-exit-callback.patch @@ -1,4 +1,4 @@ -From 62fd56870fb6296f795c9fc7f5965d83a72dabac Mon Sep 17 00:00:00 2001 +From f641d3f6946af31724c578aa6f09ba883bb5fab3 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:24 +0100 Subject: tests/test-blockjob: remove exit callback diff --git a/0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch b/0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch similarity index 96% rename from 0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch rename to 0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch index ef6db4f..b6cc4fd 100644 --- a/0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch +++ b/0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch @@ -1,4 +1,4 @@ -From 6247c4b10e3fb6c677947a503ddad961cb71faff Mon Sep 17 00:00:00 2001 +From 43b1e07411d06cd676f3f55e14e0ac1082a679d0 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:25 +0100 Subject: tests/test-blockjob-txn: move .exit to .clean diff --git a/0040-jobs-remove-.exit-callback.patch b/0048-jobs-remove-.exit-callback.patch similarity index 98% rename from 0040-jobs-remove-.exit-callback.patch rename to 0048-jobs-remove-.exit-callback.patch index 00704a6..b4ece99 100644 --- a/0040-jobs-remove-.exit-callback.patch +++ b/0048-jobs-remove-.exit-callback.patch @@ -1,4 +1,4 @@ -From c2c10f4fac6757d292f8b3d9ac7723a718e596aa Mon Sep 17 00:00:00 2001 +From ea31341d12bc2080f7a1b606dcf578376d6a4637 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:26 +0100 Subject: jobs: remove .exit callback diff --git a/0041-qapi-block-commit-expose-new-job-properties.patch b/0049-qapi-block-commit-expose-new-job-properties.patch similarity index 98% rename from 0041-qapi-block-commit-expose-new-job-properties.patch rename to 0049-qapi-block-commit-expose-new-job-properties.patch index a5ec394..97a192c 100644 --- a/0041-qapi-block-commit-expose-new-job-properties.patch +++ b/0049-qapi-block-commit-expose-new-job-properties.patch @@ -1,4 +1,4 @@ -From ce81bd3fa7316bcdee5e121e6ea71c7b2e1e81e1 Mon Sep 17 00:00:00 2001 +From 756c3ccf83d5612ca2b326a8fed8fdf1f7958adb Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:27 +0100 Subject: qapi/block-commit: expose new job properties diff --git a/0042-qapi-block-mirror-expose-new-job-properties.patch b/0050-qapi-block-mirror-expose-new-job-properties.patch similarity index 98% rename from 0042-qapi-block-mirror-expose-new-job-properties.patch rename to 0050-qapi-block-mirror-expose-new-job-properties.patch index 52f77cd..7f6443a 100644 --- a/0042-qapi-block-mirror-expose-new-job-properties.patch +++ b/0050-qapi-block-mirror-expose-new-job-properties.patch @@ -1,4 +1,4 @@ -From 318445193efc33c06e63e021a988814d49658a0f Mon Sep 17 00:00:00 2001 +From 254a2b41a647cf39abaa5d94f17aef62f035d30f Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 6 Sep 2018 09:02:22 -0400 Subject: qapi/block-mirror: expose new job properties diff --git a/0043-qapi-block-stream-expose-new-job-properties.patch b/0051-qapi-block-stream-expose-new-job-properties.patch similarity index 98% rename from 0043-qapi-block-stream-expose-new-job-properties.patch rename to 0051-qapi-block-stream-expose-new-job-properties.patch index 4e5a8fa..c55039a 100644 --- a/0043-qapi-block-stream-expose-new-job-properties.patch +++ b/0051-qapi-block-stream-expose-new-job-properties.patch @@ -1,4 +1,4 @@ -From 67fa4ccaffcd7e2698d30597f51093903aef4a5d Mon Sep 17 00:00:00 2001 +From 50990953696a8803f6b2b7ad71901c58c375eb8c Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:29 +0100 Subject: qapi/block-stream: expose new job properties diff --git a/0044-block-backup-qapi-documentation-fixup.patch b/0052-block-backup-qapi-documentation-fixup.patch similarity index 98% rename from 0044-block-backup-qapi-documentation-fixup.patch rename to 0052-block-backup-qapi-documentation-fixup.patch index c8b3273..fb695b0 100644 --- a/0044-block-backup-qapi-documentation-fixup.patch +++ b/0052-block-backup-qapi-documentation-fixup.patch @@ -1,4 +1,4 @@ -From c104ce571b585040ca4d0c77419d2ca06c2087b8 Mon Sep 17 00:00:00 2001 +From 6ecfc87059e78892c868227319a91adea909e09e Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:30 +0100 Subject: block/backup: qapi documentation fixup diff --git a/0045-blockdev-document-transactional-shortcomings.patch b/0053-blockdev-document-transactional-shortcomings.patch similarity index 96% rename from 0045-blockdev-document-transactional-shortcomings.patch rename to 0053-blockdev-document-transactional-shortcomings.patch index 7562949..50e40af 100644 --- a/0045-blockdev-document-transactional-shortcomings.patch +++ b/0053-blockdev-document-transactional-shortcomings.patch @@ -1,4 +1,4 @@ -From 53dc1dce0b91a7ebb1c32d10a7482461c01326d6 Mon Sep 17 00:00:00 2001 +From 00a437d87c6bd8ec956b25fc0dffe8397ce475b8 Mon Sep 17 00:00:00 2001 From: John Snow Date: Tue, 25 Sep 2018 22:34:31 +0100 Subject: blockdev: document transactional shortcomings diff --git a/0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch b/0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch new file mode 100644 index 0000000..f7a741f --- /dev/null +++ b/0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch @@ -0,0 +1,67 @@ +From 5b9ccef27363b61223b31312062cde1210216985 Mon Sep 17 00:00:00 2001 +From: Eduardo Otubo +Date: Fri, 28 Sep 2018 07:56:36 +0100 +Subject: seccomp: use SIGSYS signal instead of killing the thread +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eduardo Otubo +Message-id: <20180928075639.16746-3-otubo@redhat.com> +Patchwork-id: 82314 +O-Subject: [RHEL-8 qemu-kvm PATCH 2/5] seccomp: use SIGSYS signal instead of killing the thread +Bugzilla: 1618356 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Thomas Huth + +From: Marc-André Lureau + +commit 6f2231e9b0931e1998d9ed0c509adf7aedc02db2 +Author: Marc-André Lureau +Date: Wed Aug 22 19:02:47 2018 +0200 + + seccomp: use SIGSYS signal instead of killing the thread + + The seccomp action SCMP_ACT_KILL results in immediate termination of + the thread that made the bad system call. However, qemu being + multi-threaded, it keeps running. There is no easy way for parent + process / management layer (libvirt) to know about that situation. + + Instead, the default SIGSYS handler when invoked with SCMP_ACT_TRAP + will terminate the program and core dump. + + This may not be the most secure solution, but probably better than + just killing the offending thread. SCMP_ACT_KILL_PROCESS has been + added in Linux 4.14 to improve the situation, which I propose to use + by default if available in the next patch. + + Related to: + https://bugzilla.redhat.com/show_bug.cgi?id=1594456 + + Signed-off-by: Marc-André Lureau + Reviewed-by: Daniel P. Berrangé + Acked-by: Eduardo Otubo + +Signed-off-by: Eduardo Otubo +Signed-off-by: Danilo C. L. de Paula +--- + qemu-seccomp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/qemu-seccomp.c b/qemu-seccomp.c +index 9cd8eb9..b117a92 100644 +--- a/qemu-seccomp.c ++++ b/qemu-seccomp.c +@@ -125,7 +125,7 @@ static int seccomp_start(uint32_t seccomp_opts) + continue; + } + +- rc = seccomp_rule_add_array(ctx, SCMP_ACT_KILL, blacklist[i].num, ++ rc = seccomp_rule_add_array(ctx, SCMP_ACT_TRAP, blacklist[i].num, + blacklist[i].narg, blacklist[i].arg_cmp); + if (rc < 0) { + goto seccomp_return; +-- +1.8.3.1 + diff --git a/0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch b/0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch new file mode 100644 index 0000000..809c9c2 --- /dev/null +++ b/0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch @@ -0,0 +1,110 @@ +From 80574fd1c226ca5c8555b3bb37bc3fe121bbf69f Mon Sep 17 00:00:00 2001 +From: Eduardo Otubo +Date: Fri, 28 Sep 2018 07:56:37 +0100 +Subject: seccomp: prefer SCMP_ACT_KILL_PROCESS if available +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eduardo Otubo +Message-id: <20180928075639.16746-4-otubo@redhat.com> +Patchwork-id: 82315 +O-Subject: [RHEL-8 qemu-kvm PATCH 3/5] seccomp: prefer SCMP_ACT_KILL_PROCESS if available +Bugzilla: 1618356 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Thomas Huth + +From: Marc-André Lureau + +commit bda08a5764d470f101fa38635d30b41179a313e1 +Author: Marc-André Lureau +Date: Wed Aug 22 19:02:48 2018 +0200 + + seccomp: prefer SCMP_ACT_KILL_PROCESS if available + + The upcoming libseccomp release should have SCMP_ACT_KILL_PROCESS + action (https://github.com/seccomp/libseccomp/issues/96). + + SCMP_ACT_KILL_PROCESS is preferable to immediately terminate the + offending process, rather than having the SIGSYS handler running. + + Use SECCOMP_GET_ACTION_AVAIL to check availability of kernel support, + as libseccomp will fallback on SCMP_ACT_KILL otherwise, and we still + prefer SCMP_ACT_TRAP. + + Signed-off-by: Marc-André Lureau + Reviewed-by: Daniel P. Berrangé + Acked-by: Eduardo Otubo + +Signed-off-by: Eduardo Otubo +Signed-off-by: Danilo C. L. de Paula +--- + qemu-seccomp.c | 31 ++++++++++++++++++++++++++++++- + 1 file changed, 30 insertions(+), 1 deletion(-) + +diff --git a/qemu-seccomp.c b/qemu-seccomp.c +index b117a92..f0c833f 100644 +--- a/qemu-seccomp.c ++++ b/qemu-seccomp.c +@@ -20,6 +20,7 @@ + #include + #include + #include "sysemu/seccomp.h" ++#include + + /* For some architectures (notably ARM) cacheflush is not supported until + * libseccomp 2.2.3, but configure enforces that we are using a more recent +@@ -107,12 +108,40 @@ static const struct QemuSeccompSyscall blacklist[] = { + { SCMP_SYS(sched_get_priority_min), QEMU_SECCOMP_SET_RESOURCECTL }, + }; + ++static inline __attribute__((unused)) int ++qemu_seccomp(unsigned int operation, unsigned int flags, void *args) ++{ ++#ifdef __NR_seccomp ++ return syscall(__NR_seccomp, operation, flags, args); ++#else ++ errno = ENOSYS; ++ return -1; ++#endif ++} ++ ++static uint32_t qemu_seccomp_get_kill_action(void) ++{ ++#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \ ++ defined(SECCOMP_RET_KILL_PROCESS) ++ { ++ uint32_t action = SECCOMP_RET_KILL_PROCESS; ++ ++ if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { ++ return SCMP_ACT_KILL_PROCESS; ++ } ++ } ++#endif ++ ++ return SCMP_ACT_TRAP; ++} ++ + + static int seccomp_start(uint32_t seccomp_opts) + { + int rc = 0; + unsigned int i = 0; + scmp_filter_ctx ctx; ++ uint32_t action = qemu_seccomp_get_kill_action(); + + ctx = seccomp_init(SCMP_ACT_ALLOW); + if (ctx == NULL) { +@@ -125,7 +154,7 @@ static int seccomp_start(uint32_t seccomp_opts) + continue; + } + +- rc = seccomp_rule_add_array(ctx, SCMP_ACT_TRAP, blacklist[i].num, ++ rc = seccomp_rule_add_array(ctx, action, blacklist[i].num, + blacklist[i].narg, blacklist[i].arg_cmp); + if (rc < 0) { + goto seccomp_return; +-- +1.8.3.1 + diff --git a/0056-seccomp-set-the-seccomp-filter-to-all-threads.patch b/0056-seccomp-set-the-seccomp-filter-to-all-threads.patch new file mode 100644 index 0000000..b1e37ad --- /dev/null +++ b/0056-seccomp-set-the-seccomp-filter-to-all-threads.patch @@ -0,0 +1,77 @@ +From ef8bae877ca544af956f8314cdd702d1c62a9b15 Mon Sep 17 00:00:00 2001 +From: Eduardo Otubo +Date: Fri, 28 Sep 2018 07:56:39 +0100 +Subject: seccomp: set the seccomp filter to all threads +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eduardo Otubo +Message-id: <20180928075639.16746-6-otubo@redhat.com> +Patchwork-id: 82316 +O-Subject: [RHEL-8 qemu-kvm PATCH 5/5] seccomp: set the seccomp filter to all threads +Bugzilla: 1618356 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Thomas Huth + +From: Marc-André Lureau + +commit 70dfabeaa79ba4d7a3b699abe1a047c8012db114 +Author: Marc-André Lureau +Date: Wed Aug 22 19:02:50 2018 +0200 + + seccomp: set the seccomp filter to all threads + + When using "-seccomp on", the seccomp policy is only applied to the + main thread, the vcpu worker thread and other worker threads created + after seccomp policy is applied; the seccomp policy is not applied to + e.g. the RCU thread because it is created before the seccomp policy is + applied and SECCOMP_FILTER_FLAG_TSYNC isn't used. + + This can be verified with + for task in /proc/`pidof qemu`/task/*; do cat $task/status | grep Secc ; done + Seccomp: 2 + Seccomp: 0 + Seccomp: 0 + Seccomp: 2 + Seccomp: 2 + Seccomp: 2 + + Starting with libseccomp 2.2.0 and kernel >= 3.17, we can use + seccomp_attr_set(ctx, > SCMP_FLTATR_CTL_TSYNC, 1) to update the policy + on all threads. + + libseccomp requirement was bumped to 2.2.0 in previous patch. + libseccomp should fail to set the filter if it can't honour + SCMP_FLTATR_CTL_TSYNC (untested), and thus -sandbox will now fail on + kernel < 3.17. + + Signed-off-by: Marc-André Lureau + Acked-by: Eduardo Otubo + +Signed-off-by: Eduardo Otubo +Signed-off-by: Danilo C. L. de Paula +--- + qemu-seccomp.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/qemu-seccomp.c b/qemu-seccomp.c +index f0c833f..4729eb1 100644 +--- a/qemu-seccomp.c ++++ b/qemu-seccomp.c +@@ -149,6 +149,11 @@ static int seccomp_start(uint32_t seccomp_opts) + goto seccomp_return; + } + ++ rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); ++ if (rc != 0) { ++ goto seccomp_return; ++ } ++ + for (i = 0; i < ARRAY_SIZE(blacklist); i++) { + if (!(seccomp_opts & blacklist[i].set)) { + continue; +-- +1.8.3.1 + diff --git a/0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch b/0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch new file mode 100644 index 0000000..e866c28 --- /dev/null +++ b/0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch @@ -0,0 +1,185 @@ +From da9c980b19783915f8675894b88da631f27dd34d Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 5 Oct 2018 12:59:47 +0100 +Subject: memory: cleanup side effects of memory_region_init_foo() on failure + +RH-Author: Igor Mammedov +Message-id: <1538744387-84898-1-git-send-email-imammedo@redhat.com> +Patchwork-id: 82391 +O-Subject: [RHEL-8 qemu-kvm PATCH] memory: cleanup side effects of memory_region_init_foo() on failure +Bugzilla: 1600365 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Pankaj Gupta +RH-Acked-by: Laszlo Ersek + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1600365 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=18658506 + +if MemoryRegion intialization fails it's left in semi-initialized state, +where it's size is not 0 and attached as child to owner object. +And this leds to crash in following use-case: + (monitor) object_add memory-backend-file,id=mem1,size=99999G,mem-path=/tmp/foo,discard-data=yes + memory.c:2083: memory_region_get_ram_ptr: Assertion `mr->ram_block' failed + Aborted (core dumped) +it happens due to assumption that memory region is intialized when + memory_region_size() != 0 +and therefore it's ok to access it in + file_backend_unparent() + if (memory_region_size() != 0) + memory_region_get_ram_ptr() + +which happens when object_add fails and unparents failed backend making +file_backend_unparent() access invalid memory region. + +Fix it by making sure that memory_region_init_foo() APIs cleanup externally +visible side effects on failure (like set size to 0 and unparenting object) + +Signed-off-by: Igor Mammedov +Message-Id: <1536064777-42312-1-git-send-email-imammedo@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1cd3d492624da399d66c4c3e6a5eabb8f96bb0a2) +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + memory.c + due missing (cbfc01710 "memory, exec: switch file ram allocation functions to 'flags' parameters") + not related to the patch signature mismatch of + qemu_ram_alloc_from_file()/qemu_ram_alloc_from_fd() +--- + memory.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 42 insertions(+), 6 deletions(-) + +diff --git a/memory.c b/memory.c +index e9cd446..88c75d8 100644 +--- a/memory.c ++++ b/memory.c +@@ -1518,12 +1518,18 @@ void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr, + bool share, + Error **errp) + { ++ Error *err = NULL; + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; +- mr->ram_block = qemu_ram_alloc(size, share, mr, errp); ++ mr->ram_block = qemu_ram_alloc(size, share, mr, &err); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; ++ if (err) { ++ mr->size = int128_zero(); ++ object_unparent(OBJECT(mr)); ++ error_propagate(errp, err); ++ } + } + + void memory_region_init_resizeable_ram(MemoryRegion *mr, +@@ -1536,13 +1542,19 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, + void *host), + Error **errp) + { ++ Error *err = NULL; + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->ram_block = qemu_ram_alloc_resizeable(size, max_size, resized, +- mr, errp); ++ mr, &err); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; ++ if (err) { ++ mr->size = int128_zero(); ++ object_unparent(OBJECT(mr)); ++ error_propagate(errp, err); ++ } + } + + #ifdef __linux__ +@@ -1555,13 +1567,19 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, + const char *path, + Error **errp) + { ++ Error *err = NULL; + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; + mr->align = align; +- mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp); ++ mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, &err); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; ++ if (err) { ++ mr->size = int128_zero(); ++ object_unparent(OBJECT(mr)); ++ error_propagate(errp, err); ++ } + } + + void memory_region_init_ram_from_fd(MemoryRegion *mr, +@@ -1572,12 +1590,18 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, + int fd, + Error **errp) + { ++ Error *err = NULL; + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; +- mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); ++ mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, &err); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; ++ if (err) { ++ mr->size = int128_zero(); ++ object_unparent(OBJECT(mr)); ++ error_propagate(errp, err); ++ } + } + #endif + +@@ -1628,13 +1652,19 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr, + uint64_t size, + Error **errp) + { ++ Error *err = NULL; + memory_region_init(mr, owner, name, size); + mr->ram = true; + mr->readonly = true; + mr->terminates = true; + mr->destructor = memory_region_destructor_ram; +- mr->ram_block = qemu_ram_alloc(size, false, mr, errp); ++ mr->ram_block = qemu_ram_alloc(size, false, mr, &err); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; ++ if (err) { ++ mr->size = int128_zero(); ++ object_unparent(OBJECT(mr)); ++ error_propagate(errp, err); ++ } + } + + void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, +@@ -1645,6 +1675,7 @@ void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, + uint64_t size, + Error **errp) + { ++ Error *err = NULL; + assert(ops); + memory_region_init(mr, owner, name, size); + mr->ops = ops; +@@ -1652,7 +1683,12 @@ void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, + mr->terminates = true; + mr->rom_device = true; + mr->destructor = memory_region_destructor_ram; +- mr->ram_block = qemu_ram_alloc(size, false, mr, errp); ++ mr->ram_block = qemu_ram_alloc(size, false, mr, &err); ++ if (err) { ++ mr->size = int128_zero(); ++ object_unparent(OBJECT(mr)); ++ error_propagate(errp, err); ++ } + } + + void memory_region_init_iommu(void *_iommu_mr, +-- +1.8.3.1 + diff --git a/0058-mirror-Fail-gracefully-for-source-target.patch b/0058-mirror-Fail-gracefully-for-source-target.patch new file mode 100644 index 0000000..56c3baf --- /dev/null +++ b/0058-mirror-Fail-gracefully-for-source-target.patch @@ -0,0 +1,87 @@ +From a96ed7a8374891516e626b797321d4be69cb071d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 13:19:57 +0100 +Subject: mirror: Fail gracefully for source == target + +RH-Author: Kevin Wolf +Message-id: <20181010131957.23198-2-kwolf@redhat.com> +Patchwork-id: 82564 +O-Subject: [RHEL-8 qemu-kvm PATCH 1/1] mirror: Fail gracefully for source == target +Bugzilla: 1637963 +RH-Acked-by: John Snow +RH-Acked-by: Fam Zheng +RH-Acked-by: Stefan Hajnoczi + +blockdev-mirror with the same node for source and target segfaults +today: A node is in its own backing chain, so mirror_start_job() decides +that this is an active commit. When adding the intermediate nodes with +block_job_add_bdrv(), it starts the iteration through the subchain with +the backing file of source, though, so it never reaches target and +instead runs into NULL at the base. + +While we could fix that by starting with source itself, there is no +point in allowing mirroring a node into itself and I wouldn't be +surprised if this caused more problems later. + +So just check for this scenario and error out. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry picked from commit 86fae10c64d642256cf019e6829929fa0d259c7a) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 5 +++++ + tests/qemu-iotests/041 | 6 ++++++ + tests/qemu-iotests/041.out | 4 ++-- + 3 files changed, 13 insertions(+), 2 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 7efba77..b61f99b 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -1516,6 +1516,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, + buf_size = DEFAULT_MIRROR_BUF_SIZE; + } + ++ if (bs == target) { ++ error_setg(errp, "Can't mirror node into itself"); ++ return; ++ } ++ + /* In the case of active commit, add dummy driver to provide consistent + * reads on the top, while disabling it in the intermediate nodes, and make + * the backing chain writable. */ +diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 +index c20ac7d..9336ab6 100755 +--- a/tests/qemu-iotests/041 ++++ b/tests/qemu-iotests/041 +@@ -234,6 +234,12 @@ class TestSingleBlockdev(TestSingleDrive): + result = self.vm.qmp("blockdev-add", **args) + self.assert_qmp(result, 'return', {}) + ++ def test_mirror_to_self(self): ++ result = self.vm.qmp(self.qmp_cmd, job_id='job0', ++ device=self.qmp_target, sync='full', ++ target=self.qmp_target) ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ + test_large_cluster = None + test_image_not_found = None + test_small_buffer2 = None +diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out +index c28b392..e071d0b 100644 +--- a/tests/qemu-iotests/041.out ++++ b/tests/qemu-iotests/041.out +@@ -1,5 +1,5 @@ +-..................................................................................... ++........................................................................................ + ---------------------------------------------------------------------- +-Ran 85 tests ++Ran 88 tests + + OK +-- +1.8.3.1 + diff --git a/0059-commit-Add-top-node-base-node-options.patch b/0059-commit-Add-top-node-base-node-options.patch new file mode 100644 index 0000000..c3cde82 --- /dev/null +++ b/0059-commit-Add-top-node-base-node-options.patch @@ -0,0 +1,141 @@ +From 0086e14eef7fc78bc1254ee888bd7d720d6ee5b9 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 13:50:54 +0100 +Subject: commit: Add top-node/base-node options + +RH-Author: Kevin Wolf +Message-id: <20181010135055.3874-2-kwolf@redhat.com> +Patchwork-id: 82569 +O-Subject: [RHEL-8 qemu-kvm PATCH 1/2] commit: Add top-node/base-node options +Bugzilla: 1637970 +RH-Acked-by: John Snow +RH-Acked-by: Fam Zheng +RH-Acked-by: Stefan Hajnoczi + +The block-commit QMP command required specifying the top and base nodes +of the commit jobs using the file name of that node. While this works +in simple cases (local files with absolute paths), the file names +generated for more complicated setups can be hard to predict. + +The block-commit command has more problems than just this, so we want to +replace it altogether in the long run, but libvirt needs a reliable way +to address nodes now. So we don't want to wait for a new, cleaner +command, but just add the minimal thing needed right now. + +This adds two new options top-node and base-node to the command, which +allow specifying node names instead. They are mutually exclusive with +the old options. + +Signed-off-by: Kevin Wolf +(cherry picked from commit 3c605f4074ebeb97970eb660fb56a9cb06525923) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 32 ++++++++++++++++++++++++++++++-- + qapi/block-core.json | 24 ++++++++++++++++++------ + 2 files changed, 48 insertions(+), 8 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index d97202a..df256e6 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3324,7 +3324,9 @@ out: + } + + void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, ++ bool has_base_node, const char *base_node, + bool has_base, const char *base, ++ bool has_top_node, const char *top_node, + bool has_top, const char *top, + bool has_backing_file, const char *backing_file, + bool has_speed, int64_t speed, +@@ -3385,7 +3387,20 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, + /* default top_bs is the active layer */ + top_bs = bs; + +- if (has_top && top) { ++ if (has_top_node && has_top) { ++ error_setg(errp, "'top-node' and 'top' are mutually exclusive"); ++ goto out; ++ } else if (has_top_node) { ++ top_bs = bdrv_lookup_bs(NULL, top_node, errp); ++ if (top_bs == NULL) { ++ goto out; ++ } ++ if (!bdrv_chain_contains(bs, top_bs)) { ++ error_setg(errp, "'%s' is not in this backing file chain", ++ top_node); ++ goto out; ++ } ++ } else if (has_top && top) { + if (strcmp(bs->filename, top) != 0) { + top_bs = bdrv_find_backing_image(bs, top); + } +@@ -3398,7 +3413,20 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, + + assert(bdrv_get_aio_context(top_bs) == aio_context); + +- if (has_base && base) { ++ if (has_base_node && has_base) { ++ error_setg(errp, "'base-node' and 'base' are mutually exclusive"); ++ goto out; ++ } else if (has_base_node) { ++ base_bs = bdrv_lookup_bs(NULL, base_node, errp); ++ if (base_bs == NULL) { ++ goto out; ++ } ++ if (!bdrv_chain_contains(top_bs, base_bs)) { ++ error_setg(errp, "'%s' is not in this backing file chain", ++ base_node); ++ goto out; ++ } ++ } else if (has_base && base) { + base_bs = bdrv_find_backing_image(top_bs, base); + } else { + base_bs = bdrv_find_base(top_bs); +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 2953991..6f38dc0 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1457,12 +1457,23 @@ + # + # @device: the device name or node-name of a root node + # +-# @base: The file name of the backing image to write data into. +-# If not specified, this is the deepest backing image. ++# @base-node: The node name of the backing image to write data into. ++# If not specified, this is the deepest backing image. ++# (since: 3.1) + # +-# @top: The file name of the backing image within the image chain, +-# which contains the topmost data to be committed down. If +-# not specified, this is the active layer. ++# @base: Same as @base-node, except that it is a file name rather than a node ++# name. This must be the exact filename string that was used to open the ++# node; other strings, even if addressing the same file, are not ++# accepted (deprecated, use @base-node instead) ++# ++# @top-node: The node name of the backing image within the image chain ++# which contains the topmost data to be committed down. If ++# not specified, this is the active layer. (since: 3.1) ++# ++# @top: Same as @top-node, except that it is a file name rather than a node ++# name. This must be the exact filename string that was used to open the ++# node; other strings, even if addressing the same file, are not ++# accepted (deprecated, use @base-node instead) + # + # @backing-file: The backing file string to write into the overlay + # image of 'top'. If 'top' is the active layer, +@@ -1531,7 +1542,8 @@ + # + ## + { 'command': 'block-commit', +- 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str', ++ 'data': { '*job-id': 'str', 'device': 'str', '*base-node': 'str', ++ '*base': 'str', '*top-node': 'str', '*top': 'str', + '*backing-file': 'str', '*speed': 'int', + '*filter-node-name': 'str', + '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } +-- +1.8.3.1 + diff --git a/0060-qemu-iotests-Test-commit-with-top-node-base-node.patch b/0060-qemu-iotests-Test-commit-with-top-node-base-node.patch new file mode 100644 index 0000000..a593117 --- /dev/null +++ b/0060-qemu-iotests-Test-commit-with-top-node-base-node.patch @@ -0,0 +1,127 @@ +From bb9687c8dadef42d11f3606e68e956a7c60b2487 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 13:50:55 +0100 +Subject: qemu-iotests: Test commit with top-node/base-node + +RH-Author: Kevin Wolf +Message-id: <20181010135055.3874-3-kwolf@redhat.com> +Patchwork-id: 82568 +O-Subject: [RHEL-8 qemu-kvm PATCH 2/2] qemu-iotests: Test commit with top-node/base-node +Bugzilla: 1637970 +RH-Acked-by: John Snow +RH-Acked-by: Fam Zheng +RH-Acked-by: Stefan Hajnoczi + +This adds some tests for block-commit with the new options top-node and +base-node (taking node names) instead of top and base (taking file +names). + +Signed-off-by: Kevin Wolf +(cherry picked from commit d57177a48fc604e5427921bf20b22ee0e6d578b3) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/040 | 52 ++++++++++++++++++++++++++++++++++++++++++++-- + tests/qemu-iotests/040.out | 4 ++-- + 2 files changed, 52 insertions(+), 4 deletions(-) + +diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 +index 1beb5e6..1cb1cee 100755 +--- a/tests/qemu-iotests/040 ++++ b/tests/qemu-iotests/040 +@@ -57,9 +57,12 @@ class ImageCommitTestCase(iotests.QMPTestCase): + self.assert_no_active_block_jobs() + self.vm.shutdown() + +- def run_commit_test(self, top, base, need_ready=False): ++ def run_commit_test(self, top, base, need_ready=False, node_names=False): + self.assert_no_active_block_jobs() +- result = self.vm.qmp('block-commit', device='drive0', top=top, base=base) ++ if node_names: ++ result = self.vm.qmp('block-commit', device='drive0', top_node=top, base_node=base) ++ else: ++ result = self.vm.qmp('block-commit', device='drive0', top=top, base=base) + self.assert_qmp(result, 'return', {}) + self.wait_for_complete(need_ready) + +@@ -101,6 +104,11 @@ class TestSingleDrive(ImageCommitTestCase): + self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) + self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) + ++ def test_commit_node(self): ++ self.run_commit_test("mid", "base", node_names=True) ++ self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) ++ self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) ++ + def test_device_not_found(self): + result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') +@@ -123,6 +131,30 @@ class TestSingleDrive(ImageCommitTestCase): + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found') + ++ def test_top_node_invalid(self): ++ self.assert_no_active_block_jobs() ++ result = self.vm.qmp('block-commit', device='drive0', top_node='badfile', base_node='base') ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile") ++ ++ def test_base_node_invalid(self): ++ self.assert_no_active_block_jobs() ++ result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='badfile') ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile") ++ ++ def test_top_path_and_node(self): ++ self.assert_no_active_block_jobs() ++ result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='base', top='%s' % mid_img) ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ self.assert_qmp(result, 'error/desc', "'top-node' and 'top' are mutually exclusive") ++ ++ def test_base_path_and_node(self): ++ self.assert_no_active_block_jobs() ++ result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='base', base='%s' % backing_img) ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ self.assert_qmp(result, 'error/desc', "'base-node' and 'base' are mutually exclusive") ++ + def test_top_is_active(self): + self.run_commit_test(test_img, backing_img, need_ready=True) + self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) +@@ -139,6 +171,22 @@ class TestSingleDrive(ImageCommitTestCase): + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % mid_img) + ++ def test_top_and_base_node_reversed(self): ++ self.assert_no_active_block_jobs() ++ result = self.vm.qmp('block-commit', device='drive0', top_node='base', base_node='top') ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ self.assert_qmp(result, 'error/desc', "'top' is not in this backing file chain") ++ ++ def test_top_node_in_wrong_chain(self): ++ self.assert_no_active_block_jobs() ++ ++ result = self.vm.qmp('blockdev-add', driver='null-co', node_name='null') ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm.qmp('block-commit', device='drive0', top_node='null', base_node='base') ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ self.assert_qmp(result, 'error/desc', "'null' is not in this backing file chain") ++ + # When the job is running on a BB that is automatically deleted on hot + # unplug, the job is cancelled when the device disappears + def test_hot_unplug(self): +diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out +index e20a75c..802ffaa 100644 +--- a/tests/qemu-iotests/040.out ++++ b/tests/qemu-iotests/040.out +@@ -1,5 +1,5 @@ +-............................. ++........................................... + ---------------------------------------------------------------------- +-Ran 29 tests ++Ran 43 tests + + OK +-- +1.8.3.1 + diff --git a/0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch b/0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch new file mode 100644 index 0000000..f3c3385 --- /dev/null +++ b/0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch @@ -0,0 +1,59 @@ +From 0908cd5291828eca03bbba206f133a37b87c8b41 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 10 Oct 2018 20:50:58 +0100 +Subject: block: for jobs, do not clear user_paused until after the resume + +RH-Author: John Snow +Message-id: <20181010205100.17689-2-jsnow@redhat.com> +Patchwork-id: 82631 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/3] block: for jobs, do not clear user_paused until after the resume +Bugzilla: 1635583 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +From: Jeff Cody + +The function job_cancel_async() will always cause an assert for blockjob +user resume. We set job->user_paused to false, and then call +job->driver->user_resume(). In the case of blockjobs, this is the +block_job_user_resume() function. + +In that function, we assert that job.user_paused is set to true. +Unfortunately, right before calling this function, it has explicitly +been set to false. + +The fix is pretty simple: set job->user_paused to false only after the +job user_resume() function has been called. + +Reviewed-by: John Snow +Reviewed-by: Eric Blake +Signed-off-by: Jeff Cody +Message-id: bb183b77d8f2dd6bd67b8da559a90ac1e74b2052.1534868459.git.jcody@redhat.com +Signed-off-by: Jeff Cody +(cherry picked from commit e321c0597c7590499bacab239d7f86e257f96bcd) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + job.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/job.c b/job.c +index 87c9aa4..bb322de 100644 +--- a/job.c ++++ b/job.c +@@ -705,10 +705,10 @@ static void job_cancel_async(Job *job, bool force) + { + if (job->user_paused) { + /* Do not call job_enter here, the caller will handle it. */ +- job->user_paused = false; + if (job->driver->user_resume) { + job->driver->user_resume(job); + } ++ job->user_paused = false; + assert(job->pause_count > 0); + job->pause_count--; + } +-- +1.8.3.1 + diff --git a/0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch b/0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch new file mode 100644 index 0000000..5c30cf9 --- /dev/null +++ b/0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch @@ -0,0 +1,173 @@ +From d26430360b5996c99c0e1dd95b4dbb48bd894944 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 10 Oct 2018 20:51:00 +0100 +Subject: block: iotest to catch abort on forced blockjob cancel + +RH-Author: John Snow +Message-id: <20181010205100.17689-4-jsnow@redhat.com> +Patchwork-id: 82632 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 3/3] block: iotest to catch abort on forced blockjob cancel +Bugzilla: 1635583 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +From: Jeff Cody + +Signed-off-by: Jeff Cody +Reviewed-by: John Snow +Message-id: df317f617fbe5affcf699cb8560e7b0c2e028a64.1534868459.git.jcody@redhat.com +Signed-off-by: Jeff Cody +(cherry picked from commit 26bf474ba92c76e61bea51726e22da6dfd185296) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/229 | 95 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/229.out | 23 +++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 119 insertions(+) + create mode 100755 tests/qemu-iotests/229 + create mode 100644 tests/qemu-iotests/229.out + +diff --git a/tests/qemu-iotests/229 b/tests/qemu-iotests/229 +new file mode 100755 +index 0000000..ff851ec +--- /dev/null ++++ b/tests/qemu-iotests/229 +@@ -0,0 +1,95 @@ ++#!/bin/bash ++# ++# Test for force canceling a running blockjob that is paused in ++# an error state. ++# ++# Copyright (C) 2018 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=jcody@redhat.com ++ ++seq="$(basename $0)" ++echo "QA output created by $seq" ++ ++here="$PWD" ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_qemu ++ _cleanup_test_img ++ rm -f "$TEST_IMG" "$DEST_IMG" ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++. ./common.qemu ++ ++# Needs backing file and backing format support ++_supported_fmt qcow2 qed ++_supported_proto file ++_supported_os Linux ++ ++ ++DEST_IMG="$TEST_DIR/d.$IMGFMT" ++TEST_IMG="$TEST_DIR/b.$IMGFMT" ++ ++_make_test_img 2M ++ ++# destination for mirror will be too small, causing error ++TEST_IMG=$DEST_IMG _make_test_img 1M ++ ++$QEMU_IO -c 'write 0 2M' "$TEST_IMG" | _filter_qemu_io ++ ++_launch_qemu -drive id=testdisk,file="$TEST_IMG",format="$IMGFMT" ++ ++_send_qemu_cmd $QEMU_HANDLE \ ++ "{'execute': 'qmp_capabilities'}" \ ++ 'return' ++ ++echo ++echo '=== Starting drive-mirror, causing error & stop ===' ++echo ++ ++_send_qemu_cmd $QEMU_HANDLE \ ++ "{'execute': 'drive-mirror', ++ 'arguments': {'device': 'testdisk', ++ 'mode': 'absolute-paths', ++ 'format': '$IMGFMT', ++ 'target': '$DEST_IMG', ++ 'sync': 'full', ++ 'mode': 'existing', ++ 'on-source-error': 'stop', ++ 'on-target-error': 'stop' }}" \ ++ "JOB_STATUS_CHANGE.*pause" ++ ++echo ++echo '=== Force cancel job paused in error state ===' ++echo ++ ++success_or_failure="y" _send_qemu_cmd $QEMU_HANDLE \ ++ "{'execute': 'block-job-cancel', ++ 'arguments': { 'device': 'testdisk', ++ 'force': true}}" \ ++ "BLOCK_JOB_CANCELLED" "Assertion" ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/229.out b/tests/qemu-iotests/229.out +new file mode 100644 +index 0000000..4c41128 +--- /dev/null ++++ b/tests/qemu-iotests/229.out +@@ -0,0 +1,23 @@ ++QA output created by 229 ++Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=2097152 ++Formatting 'TEST_DIR/d.IMGFMT', fmt=IMGFMT size=1048576 ++wrote 2097152/2097152 bytes at offset 0 ++2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++{"return": {}} ++ ++=== Starting drive-mirror, causing error & stop === ++ ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "testdisk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "testdisk"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "testdisk", "operation": "write", "action": "stop"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "testdisk"}} ++ ++=== Force cancel job paused in error state === ++ ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "testdisk"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "testdisk", "operation": "write", "action": "stop"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "testdisk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "testdisk", "len": 2097152, "offset": 1048576, "speed": 0, "type": "mirror"}} ++*** done +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index f1059f6..23ab4d3 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -225,3 +225,4 @@ + 225 rw auto quick + 226 auto quick + 227 auto quick ++229 auto quick +-- +1.8.3.1 + diff --git a/0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch b/0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch new file mode 100644 index 0000000..9776c47 --- /dev/null +++ b/0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch @@ -0,0 +1,117 @@ +From c0bedad9bd133c14096eeeae49877fbb9eb179c3 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Thu, 4 Oct 2018 10:31:31 +0100 +Subject: Revert "hw/acpi-build: build SRAT memory affinity structures for DIMM + devices" + +RH-Author: Igor Mammedov +Message-id: <1538649091-70517-1-git-send-email-imammedo@redhat.com> +Patchwork-id: 82373 +O-Subject: [RHEL8/virt-8.0.0 qemu-kvm PATCH] Revert "hw/acpi-build: build SRAT memory affinity structures for DIMM devices" +Bugzilla: 1609235 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Thomas Huth + +Since upstream commits + (0efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT entry size") + (dbb6da8ba7 "pc: acpi: revert back to 1 SRAT entry for hotpluggable area") +hasn't been backported to RHEL8, it's sufficient to revert commit + (848a1cc1e8 "hw/acpi-build: build SRAT memory affinity structures for DIMM devices") +for the result to match the current upstream state and fix the bug. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula + +Rebase notes (3.0.0): +- Replace hotplug_memory with device_memory in PCMachineState +--- + hw/i386/acpi-build.c | 65 ++++------------------------------------------------ + 1 file changed, 4 insertions(+), 61 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index be9bdb5..f95516c 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2254,64 +2254,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) + #define HOLE_640K_START (640 * KiB) + #define HOLE_640K_END (1 * MiB) + +-static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base, +- uint64_t len, int default_node) +-{ +- MemoryDeviceInfoList *info_list = qmp_memory_device_list(); +- MemoryDeviceInfoList *info; +- MemoryDeviceInfo *mi; +- PCDIMMDeviceInfo *di; +- uint64_t end = base + len, cur, size; +- bool is_nvdimm; +- AcpiSratMemoryAffinity *numamem; +- MemoryAffinityFlags flags; +- +- for (cur = base, info = info_list; +- cur < end; +- cur += size, info = info->next) { +- numamem = acpi_data_push(table_data, sizeof *numamem); +- +- if (!info) { +- /* +- * Entry is required for Windows to enable memory hotplug in OS +- * and for Linux to enable SWIOTLB when booted with less than +- * 4G of RAM. Windows works better if the entry sets proximity +- * to the highest NUMA node in the machine at the end of the +- * reserved space. +- * Memory devices may override proximity set by this entry, +- * providing _PXM method if necessary. +- */ +- build_srat_memory(numamem, end - 1, 1, default_node, +- MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); +- break; +- } +- +- mi = info->value; +- is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM); +- di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data; +- +- if (cur < di->addr) { +- build_srat_memory(numamem, cur, di->addr - cur, default_node, +- MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); +- numamem = acpi_data_push(table_data, sizeof *numamem); +- } +- +- size = di->size; +- +- flags = MEM_AFFINITY_ENABLED; +- if (di->hotpluggable) { +- flags |= MEM_AFFINITY_HOTPLUGGABLE; +- } +- if (is_nvdimm) { +- flags |= MEM_AFFINITY_NON_VOLATILE; +- } +- +- build_srat_memory(numamem, di->addr, size, di->node, flags); +- } +- +- qapi_free_MemoryDeviceInfoList(info_list); +-} +- + static void + build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + { +@@ -2418,9 +2360,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + } + + if (hotplugabble_address_space_size) { +- build_srat_hotpluggable_memory(table_data, machine->device_memory->base, +- hotplugabble_address_space_size, +- pcms->numa_nodes - 1); ++ numamem = acpi_data_push(table_data, sizeof *numamem); ++ build_srat_memory(numamem, machine->device_memory->base, ++ hotplugabble_address_space_size, pcms->numa_nodes - 1, ++ MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); + } + + build_header(linker, table_data, +-- +1.8.3.1 + diff --git a/0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch b/0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch new file mode 100644 index 0000000..3fc21dd --- /dev/null +++ b/0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch @@ -0,0 +1,48 @@ +From c476cf6c76298803fe896eb7c597085af3b73c12 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Tue, 9 Oct 2018 08:16:47 +0100 +Subject: aio-posix: Don't count ctx->notifier as progress when polling + +RH-Author: Fam Zheng +Message-id: <20181009081651.15463-2-famz@redhat.com> +Patchwork-id: 82454 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/5] aio-posix: Don't count ctx->notifier as progress when polling +Bugzilla: 1623085 +RH-Acked-by: Thomas Huth +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula + +BZ: 1623085 + +The same logic exists in fd polling. This change is especially important +to avoid busy loop once we limit aio_notify_accept() to blocking +aio_poll(). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Fam Zheng +Message-Id: <20180809132259.18402-2-famz@redhat.com> +Signed-off-by: Fam Zheng +(cherry picked from commit 70232b5253a3c4e03ed1ac47ef9246a8ac66c6fa) +Signed-off-by: Fam Zheng +Signed-off-by: Danilo C. L. de Paula +--- + util/aio-posix.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/util/aio-posix.c b/util/aio-posix.c +index 118bf57..b5c7f46 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -494,7 +494,8 @@ static bool run_poll_handlers_once(AioContext *ctx) + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { + if (!node->deleted && node->io_poll && + aio_node_check(ctx, node->is_external) && +- node->io_poll(node->opaque)) { ++ node->io_poll(node->opaque) && ++ node->opaque != &ctx->notifier) { + progress = true; + } + +-- +1.8.3.1 + diff --git a/0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch b/0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch new file mode 100644 index 0000000..7b815ae --- /dev/null +++ b/0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch @@ -0,0 +1,124 @@ +From 1580d01151ceea428dc9a25dd3d83990a594e286 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Tue, 9 Oct 2018 08:16:48 +0100 +Subject: aio: Do aio_notify_accept only during blocking aio_poll + +RH-Author: Fam Zheng +Message-id: <20181009081651.15463-3-famz@redhat.com> +Patchwork-id: 82450 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 2/5] aio: Do aio_notify_accept only during blocking aio_poll +Bugzilla: 1623085 +RH-Acked-by: Thomas Huth +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula + +BZ: 1623085 + +An aio_notify() pairs with an aio_notify_accept(). The former should +happen in the main thread or a vCPU thread, and the latter should be +done in the IOThread. + +There is one rare case that the main thread or vCPU thread may "steal" +the aio_notify() event just raised by itself, in bdrv_set_aio_context() +[1]. The sequence is like this: + + main thread IO Thread + =============================================================== + bdrv_drained_begin() + aio_disable_external(ctx) + aio_poll(ctx, true) + ctx->notify_me += 2 + ... + bdrv_drained_end() + ... + aio_notify() + ... + bdrv_set_aio_context() + aio_poll(ctx, false) +[1] aio_notify_accept(ctx) + ppoll() /* Hang! */ + +[1] is problematic. It will clear the ctx->notifier event so that +the blocked ppoll() will not return. + +(For the curious, this bug was noticed when booting a number of VMs +simultaneously in RHV. One or two of the VMs will hit this race +condition, making the VIRTIO device unresponsive to I/O commands. When +it hangs, Seabios is busy waiting for a read request to complete (read +MBR), right after initializing the virtio-blk-pci device, using 100% +guest CPU. See also https://bugzilla.redhat.com/show_bug.cgi?id=1562750 +for the original bug analysis.) + +aio_notify() only injects an event when ctx->notify_me is set, +correspondingly aio_notify_accept() is only useful when ctx->notify_me +_was_ set. Move the call to it into the "blocking" branch. This will +effectively skip [1] and fix the hang. + +Furthermore, blocking aio_poll is only allowed on home thread +(in_aio_context_home_thread), because otherwise two blocking +aio_poll()'s can steal each other's ctx->notifier event and cause +hanging just like described above. + +Cc: qemu-stable@nongnu.org +Suggested-by: Paolo Bonzini +Signed-off-by: Fam Zheng +Message-Id: <20180809132259.18402-3-famz@redhat.com> +Signed-off-by: Fam Zheng +(cherry picked from commit b37548fcd1b8ac2e88e185a395bef851f3fc4e65) +Signed-off-by: Fam Zheng +Signed-off-by: Danilo C. L. de Paula +--- + util/aio-posix.c | 4 ++-- + util/aio-win32.c | 3 ++- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/util/aio-posix.c b/util/aio-posix.c +index b5c7f46..b5c609b 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -591,6 +591,7 @@ bool aio_poll(AioContext *ctx, bool blocking) + * so disable the optimization now. + */ + if (blocking) { ++ assert(in_aio_context_home_thread(ctx)); + atomic_add(&ctx->notify_me, 2); + } + +@@ -633,6 +634,7 @@ bool aio_poll(AioContext *ctx, bool blocking) + + if (blocking) { + atomic_sub(&ctx->notify_me, 2); ++ aio_notify_accept(ctx); + } + + /* Adjust polling time */ +@@ -676,8 +678,6 @@ bool aio_poll(AioContext *ctx, bool blocking) + } + } + +- aio_notify_accept(ctx); +- + /* if we have any readable fds, dispatch event */ + if (ret > 0) { + for (i = 0; i < npfd; i++) { +diff --git a/util/aio-win32.c b/util/aio-win32.c +index e676a8d..c58957c 100644 +--- a/util/aio-win32.c ++++ b/util/aio-win32.c +@@ -373,11 +373,12 @@ bool aio_poll(AioContext *ctx, bool blocking) + ret = WaitForMultipleObjects(count, events, FALSE, timeout); + if (blocking) { + assert(first); ++ assert(in_aio_context_home_thread(ctx)); + atomic_sub(&ctx->notify_me, 2); ++ aio_notify_accept(ctx); + } + + if (first) { +- aio_notify_accept(ctx); + progress |= aio_bh_poll(ctx); + first = false; + } +-- +1.8.3.1 + diff --git a/0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch b/0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch new file mode 100644 index 0000000..b1c677f --- /dev/null +++ b/0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch @@ -0,0 +1,122 @@ +From 07bbb6779b2a628b3e83b5474be550009aae034d Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Tue, 9 Oct 2018 08:16:49 +0100 +Subject: aio-posix: fix concurrent access to poll_disable_cnt + +RH-Author: Fam Zheng +Message-id: <20181009081651.15463-4-famz@redhat.com> +Patchwork-id: 82452 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 3/5] aio-posix: fix concurrent access to poll_disable_cnt +Bugzilla: 1632622 +RH-Acked-by: Thomas Huth +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula + +From: Paolo Bonzini + +BZ: 1632622 + +It is valid for an aio_set_fd_handler to happen concurrently with +aio_poll. In that case, poll_disable_cnt can change under the heels +of aio_poll, and the assertion on poll_disable_cnt can fail in +run_poll_handlers. + +Therefore, this patch simply checks the counter on every polling +iteration. There are no particular needs for ordering, since the +polling loop is terminated anyway by aio_notify at the end of +aio_set_fd_handler. + +Signed-off-by: Paolo Bonzini +Message-Id: <20180912171040.1732-2-pbonzini@redhat.com> +Reviewed-by: Fam Zheng +Signed-off-by: Fam Zheng +(cherry picked from commit d7be5dd19c0df7f76e1b42f0c2cbbabefa1974cb) +Signed-off-by: Fam Zheng +Signed-off-by: Danilo C. L. de Paula +--- + util/aio-posix.c | 26 +++++++++++++++----------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +diff --git a/util/aio-posix.c b/util/aio-posix.c +index b5c609b..9189033 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -211,6 +211,7 @@ void aio_set_fd_handler(AioContext *ctx, + AioHandler *node; + bool is_new = false; + bool deleted = false; ++ int poll_disable_change; + + qemu_lockcnt_lock(&ctx->list_lock); + +@@ -244,11 +245,9 @@ void aio_set_fd_handler(AioContext *ctx, + QLIST_REMOVE(node, node); + deleted = true; + } +- +- if (!node->io_poll) { +- ctx->poll_disable_cnt--; +- } ++ poll_disable_change = -!node->io_poll; + } else { ++ poll_disable_change = !io_poll - (node && !node->io_poll); + if (node == NULL) { + /* Alloc and insert if it's not already there */ + node = g_new0(AioHandler, 1); +@@ -257,10 +256,6 @@ void aio_set_fd_handler(AioContext *ctx, + + g_source_add_poll(&ctx->source, &node->pfd); + is_new = true; +- +- ctx->poll_disable_cnt += !io_poll; +- } else { +- ctx->poll_disable_cnt += !io_poll - !node->io_poll; + } + + /* Update handler with latest information */ +@@ -274,6 +269,15 @@ void aio_set_fd_handler(AioContext *ctx, + node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0); + } + ++ /* No need to order poll_disable_cnt writes against other updates; ++ * the counter is only used to avoid wasting time and latency on ++ * iterated polling when the system call will be ultimately necessary. ++ * Changing handlers is a rare event, and a little wasted polling until ++ * the aio_notify below is not an issue. ++ */ ++ atomic_set(&ctx->poll_disable_cnt, ++ atomic_read(&ctx->poll_disable_cnt) + poll_disable_change); ++ + aio_epoll_update(ctx, node, is_new); + qemu_lockcnt_unlock(&ctx->list_lock); + aio_notify(ctx); +@@ -525,7 +529,6 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) + + assert(ctx->notify_me); + assert(qemu_lockcnt_count(&ctx->list_lock) > 0); +- assert(ctx->poll_disable_cnt == 0); + + trace_run_poll_handlers_begin(ctx, max_ns); + +@@ -533,7 +536,8 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) + + do { + progress = run_poll_handlers_once(ctx); +- } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time); ++ } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time ++ && !atomic_read(&ctx->poll_disable_cnt)); + + trace_run_poll_handlers_end(ctx, progress); + +@@ -552,7 +556,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) + */ + static bool try_poll_mode(AioContext *ctx, bool blocking) + { +- if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) { ++ if (blocking && ctx->poll_max_ns && !atomic_read(&ctx->poll_disable_cnt)) { + /* See qemu_soonest_timeout() uint64_t hack */ + int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx), + (uint64_t)ctx->poll_ns); +-- +1.8.3.1 + diff --git a/0067-aio-posix-compute-timeout-before-polling.patch b/0067-aio-posix-compute-timeout-before-polling.patch new file mode 100644 index 0000000..3a5c4fe --- /dev/null +++ b/0067-aio-posix-compute-timeout-before-polling.patch @@ -0,0 +1,186 @@ +From 44bb29739a1cfa471447d6c5880e7527399b146f Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Tue, 9 Oct 2018 08:16:50 +0100 +Subject: aio-posix: compute timeout before polling + +RH-Author: Fam Zheng +Message-id: <20181009081651.15463-5-famz@redhat.com> +Patchwork-id: 82453 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 4/5] aio-posix: compute timeout before polling +Bugzilla: 1632622 +RH-Acked-by: Thomas Huth +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula + +From: Paolo Bonzini + +BZ: 1632622 + +This is a preparation for the next patch, and also a very small +optimization. Compute the timeout only once, before invoking +try_poll_mode, and adjust it in run_poll_handlers. The adjustment +is the polling time when polling fails, or zero (non-blocking) if +polling succeeds. + +Fixes: 70232b5253a3c4e03ed1ac47ef9246a8ac66c6fa +Signed-off-by: Paolo Bonzini +Message-Id: <20180912171040.1732-3-pbonzini@redhat.com> +Reviewed-by: Fam Zheng +Signed-off-by: Fam Zheng +(cherry picked from commit e30cffa04d52e35996569f1cfac111be19576bde) +Signed-off-by: Fam Zheng +Signed-off-by: Danilo C. L. de Paula +--- + util/aio-posix.c | 59 ++++++++++++++++++++++++++++++++----------------------- + util/trace-events | 4 ++-- + 2 files changed, 36 insertions(+), 27 deletions(-) + +diff --git a/util/aio-posix.c b/util/aio-posix.c +index 9189033..bb862e1 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -490,7 +490,7 @@ static void add_pollfd(AioHandler *node) + npfd++; + } + +-static bool run_poll_handlers_once(AioContext *ctx) ++static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) + { + bool progress = false; + AioHandler *node; +@@ -500,6 +500,7 @@ static bool run_poll_handlers_once(AioContext *ctx) + aio_node_check(ctx, node->is_external) && + node->io_poll(node->opaque) && + node->opaque != &ctx->notifier) { ++ *timeout = 0; + progress = true; + } + +@@ -522,31 +523,38 @@ static bool run_poll_handlers_once(AioContext *ctx) + * + * Returns: true if progress was made, false otherwise + */ +-static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) ++static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) + { + bool progress; +- int64_t end_time; ++ int64_t start_time, elapsed_time; + + assert(ctx->notify_me); + assert(qemu_lockcnt_count(&ctx->list_lock) > 0); + +- trace_run_poll_handlers_begin(ctx, max_ns); +- +- end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns; ++ trace_run_poll_handlers_begin(ctx, max_ns, *timeout); + ++ start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + do { +- progress = run_poll_handlers_once(ctx); +- } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time ++ progress = run_poll_handlers_once(ctx, timeout); ++ elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time; ++ } while (!progress && elapsed_time < max_ns + && !atomic_read(&ctx->poll_disable_cnt)); + +- trace_run_poll_handlers_end(ctx, progress); ++ /* If time has passed with no successful polling, adjust *timeout to ++ * keep the same ending time. ++ */ ++ if (*timeout != -1) { ++ *timeout -= MIN(*timeout, elapsed_time); ++ } + ++ trace_run_poll_handlers_end(ctx, progress, *timeout); + return progress; + } + + /* try_poll_mode: + * @ctx: the AioContext +- * @blocking: busy polling is only attempted when blocking is true ++ * @timeout: timeout for blocking wait, computed by the caller and updated if ++ * polling succeeds. + * + * ctx->notify_me must be non-zero so this function can detect aio_notify(). + * +@@ -554,19 +562,16 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) + * + * Returns: true if progress was made, false otherwise + */ +-static bool try_poll_mode(AioContext *ctx, bool blocking) ++static bool try_poll_mode(AioContext *ctx, int64_t *timeout) + { +- if (blocking && ctx->poll_max_ns && !atomic_read(&ctx->poll_disable_cnt)) { +- /* See qemu_soonest_timeout() uint64_t hack */ +- int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx), +- (uint64_t)ctx->poll_ns); ++ /* See qemu_soonest_timeout() uint64_t hack */ ++ int64_t max_ns = MIN((uint64_t)*timeout, (uint64_t)ctx->poll_ns); + +- if (max_ns) { +- poll_set_started(ctx, true); ++ if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) { ++ poll_set_started(ctx, true); + +- if (run_poll_handlers(ctx, max_ns)) { +- return true; +- } ++ if (run_poll_handlers(ctx, max_ns, timeout)) { ++ return true; + } + } + +@@ -575,7 +580,7 @@ static bool try_poll_mode(AioContext *ctx, bool blocking) + /* Even if we don't run busy polling, try polling once in case it can make + * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2). + */ +- return run_poll_handlers_once(ctx); ++ return run_poll_handlers_once(ctx, timeout); + } + + bool aio_poll(AioContext *ctx, bool blocking) +@@ -605,8 +610,14 @@ bool aio_poll(AioContext *ctx, bool blocking) + start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + } + +- progress = try_poll_mode(ctx, blocking); +- if (!progress) { ++ timeout = blocking ? aio_compute_timeout(ctx) : 0; ++ progress = try_poll_mode(ctx, &timeout); ++ assert(!(timeout && progress)); ++ ++ /* If polling is allowed, non-blocking aio_poll does not need the ++ * system call---a single round of run_poll_handlers_once suffices. ++ */ ++ if (timeout || atomic_read(&ctx->poll_disable_cnt)) { + assert(npfd == 0); + + /* fill pollfds */ +@@ -620,8 +631,6 @@ bool aio_poll(AioContext *ctx, bool blocking) + } + } + +- timeout = blocking ? aio_compute_timeout(ctx) : 0; +- + /* wait until next event */ + if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) { + AioHandler epoll_handler; +diff --git a/util/trace-events b/util/trace-events +index 4822434..79569b7 100644 +--- a/util/trace-events ++++ b/util/trace-events +@@ -1,8 +1,8 @@ + # See docs/devel/tracing.txt for syntax documentation. + + # util/aio-posix.c +-run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64 +-run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d" ++run_poll_handlers_begin(void *ctx, int64_t max_ns, int64_t timeout) "ctx %p max_ns %"PRId64 " timeout %"PRId64 ++run_poll_handlers_end(void *ctx, bool progress, int64_t timeout) "ctx %p progress %d new timeout %"PRId64 + poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 + poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 + +-- +1.8.3.1 + diff --git a/0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch b/0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch new file mode 100644 index 0000000..cf50d42 --- /dev/null +++ b/0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch @@ -0,0 +1,64 @@ +From ea1db6ad3fcbcda2068d3aeb21c384d42004aaaf Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Tue, 9 Oct 2018 08:16:51 +0100 +Subject: aio-posix: do skip system call if ctx->notifier polling succeeds + +RH-Author: Fam Zheng +Message-id: <20181009081651.15463-6-famz@redhat.com> +Patchwork-id: 82449 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 5/5] aio-posix: do skip system call if ctx->notifier polling succeeds +Bugzilla: 1632622 +RH-Acked-by: Thomas Huth +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula + +From: Paolo Bonzini + +BZ: 1632622 + +Commit 70232b5253 ("aio-posix: Don't count ctx->notifier as progress when +2018-08-15), by not reporting progress, causes aio_poll to execute the +system call when polling succeeds because of ctx->notifier. This introduces +latency before the call to aio_bh_poll() and negates the advantages of +polling, unfortunately. + +The fix builds on the previous patch, separating the effect of polling on +the timeout from the progress reported to aio_poll(). ctx->notifier +does zero the timeout, causing the caller to skip the system call, +but it does not report progress, so that the bug fix of commit 70232b5253 +still stands. + +Fixes: 70232b5253a3c4e03ed1ac47ef9246a8ac66c6fa +Signed-off-by: Paolo Bonzini +Message-Id: <20180912171040.1732-4-pbonzini@redhat.com> +Reviewed-by: Fam Zheng +Signed-off-by: Fam Zheng +(cherry picked from commit cfeb35d6774b2e936046aa9923217818bd160299) +Signed-off-by: Fam Zheng +Signed-off-by: Danilo C. L. de Paula +--- + util/aio-posix.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/util/aio-posix.c b/util/aio-posix.c +index bb862e1..a959ff6 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -498,10 +498,11 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { + if (!node->deleted && node->io_poll && + aio_node_check(ctx, node->is_external) && +- node->io_poll(node->opaque) && +- node->opaque != &ctx->notifier) { ++ node->io_poll(node->opaque)) { + *timeout = 0; +- progress = true; ++ if (node->opaque != &ctx->notifier) { ++ progress = true; ++ } + } + + /* Caller handles freeing deleted nodes. Don't do it here. */ +-- +1.8.3.1 + diff --git a/0069-linux-headers-update.patch b/0069-linux-headers-update.patch new file mode 100644 index 0000000..a48ddbc --- /dev/null +++ b/0069-linux-headers-update.patch @@ -0,0 +1,202 @@ +From 7e13447e23269939c3d1267a957187a60fef36e9 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 15 Oct 2018 10:19:26 +0100 +Subject: linux-headers: update + +RH-Author: Thomas Huth +Message-id: <1539598771-16223-2-git-send-email-thuth@redhat.com> +Patchwork-id: 82696 +O-Subject: [RHEL-8 qemu-kvm PATCH 1/6] linux-headers: update +Bugzilla: 1508142 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann + +From: Cornelia Huck + +Update to kvm/next commit dd5bd0a65ff6 ("Merge tag 'kvm-s390-next-4.20-1' +of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD") + +Signed-off-by: Cornelia Huck +(cherry picked from commit 8f3cd250a897213d39e621e3d824507b48158d42) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + linux-headers/linux/kvm.h + linux-headers/linux/vhost.h + (simple contextual conflicts due to some missing patches in downstream) + +Signed-off-by: Thomas Huth +--- + include/standard-headers/linux/input.h | 9 +++++---- + linux-headers/asm-arm/kvm.h | 13 +++++++++++++ + linux-headers/asm-arm64/kvm.h | 13 +++++++++++++ + linux-headers/asm-s390/kvm.h | 2 ++ + linux-headers/asm-x86/kvm.h | 1 + + linux-headers/linux/kvm.h | 3 +++ + linux-headers/linux/vfio.h | 2 ++ + linux-headers/linux/vhost.h | 8 ++++++++ + 8 files changed, 47 insertions(+), 4 deletions(-) + +diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h +index 6d6128c..c0ad9fc 100644 +--- a/include/standard-headers/linux/input.h ++++ b/include/standard-headers/linux/input.h +@@ -267,10 +267,11 @@ struct input_mask { + /* + * MT_TOOL types + */ +-#define MT_TOOL_FINGER 0 +-#define MT_TOOL_PEN 1 +-#define MT_TOOL_PALM 2 +-#define MT_TOOL_MAX 2 ++#define MT_TOOL_FINGER 0x00 ++#define MT_TOOL_PEN 0x01 ++#define MT_TOOL_PALM 0x02 ++#define MT_TOOL_DIAL 0x0a ++#define MT_TOOL_MAX 0x0f + + /* + * Values describing the status of a force-feedback effect +diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h +index 72aa226..e1f8b74 100644 +--- a/linux-headers/asm-arm/kvm.h ++++ b/linux-headers/asm-arm/kvm.h +@@ -27,6 +27,7 @@ + #define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_IRQ_LINE + #define __KVM_HAVE_READONLY_MEM ++#define __KVM_HAVE_VCPU_EVENTS + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +@@ -125,6 +126,18 @@ struct kvm_sync_regs { + struct kvm_arch_memory_slot { + }; + ++/* for KVM_GET/SET_VCPU_EVENTS */ ++struct kvm_vcpu_events { ++ struct { ++ __u8 serror_pending; ++ __u8 serror_has_esr; ++ /* Align it to 8 bytes */ ++ __u8 pad[6]; ++ __u64 serror_esr; ++ } exception; ++ __u32 reserved[12]; ++}; ++ + /* If you need to interpret the index values, here is the key: */ + #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 + #define KVM_REG_ARM_COPROC_SHIFT 16 +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index 99cb9ad..e6a98c1 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -39,6 +39,7 @@ + #define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_IRQ_LINE + #define __KVM_HAVE_READONLY_MEM ++#define __KVM_HAVE_VCPU_EVENTS + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +@@ -154,6 +155,18 @@ struct kvm_sync_regs { + struct kvm_arch_memory_slot { + }; + ++/* for KVM_GET/SET_VCPU_EVENTS */ ++struct kvm_vcpu_events { ++ struct { ++ __u8 serror_pending; ++ __u8 serror_has_esr; ++ /* Align it to 8 bytes */ ++ __u8 pad[6]; ++ __u64 serror_esr; ++ } exception; ++ __u32 reserved[12]; ++}; ++ + /* If you need to interpret the index values, here is the key: */ + #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 + #define KVM_REG_ARM_COPROC_SHIFT 16 +diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h +index 1ab9901..0265482 100644 +--- a/linux-headers/asm-s390/kvm.h ++++ b/linux-headers/asm-s390/kvm.h +@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc { + #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 + #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 + #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 ++#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4 ++#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5 + + /* kvm attributes for migration mode */ + #define KVM_S390_VM_MIGRATION_STOP 0 +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index c535c2f..9bba973 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -377,5 +377,6 @@ struct kvm_sync_regs { + + #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) + #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) ++#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) + + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 2aae948..c4a5542 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -950,6 +950,9 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_HYPERV_EVENTFD 154 + #define KVM_CAP_HYPERV_TLBFLUSH 155 + #define KVM_CAP_S390_HPAGE_1M 156 ++#define KVM_CAP_NESTED_STATE 157 ++#define KVM_CAP_ARM_INJECT_SERROR_ESR 158 ++#define KVM_CAP_MSR_PLATFORM_INFO 159 + + #ifdef KVM_CAP_IRQ_ROUTING + +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 3615a26..ceb6453 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -200,6 +200,7 @@ struct vfio_device_info { + #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ + #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ + #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ ++#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ + __u32 num_regions; /* Max region index + 1 */ + __u32 num_irqs; /* Max IRQ index + 1 */ + }; +@@ -215,6 +216,7 @@ struct vfio_device_info { + #define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" + #define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" + #define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" ++#define VFIO_DEVICE_API_AP_STRING "vfio-ap" + + /** + * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index e336395..3421624 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -160,6 +160,14 @@ struct vhost_memory { + #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \ + struct vhost_vring_state) + ++/* Set or get vhost backend capability */ ++ ++/* Use message type V2 */ ++#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 ++ ++#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) ++#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) ++ + /* VHOST_NET specific defines */ + + /* Attach virtio net ring to a raw socket, or tap device. +-- +1.8.3.1 + diff --git a/0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch b/0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch new file mode 100644 index 0000000..dea147b --- /dev/null +++ b/0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch @@ -0,0 +1,148 @@ +From 9ceba72eb99b073a86b0aa529154de3e06330720 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 15 Oct 2018 10:19:27 +0100 +Subject: s390x/cpumodel: Set up CPU model for AP device support + +RH-Author: Thomas Huth +Message-id: <1539598771-16223-3-git-send-email-thuth@redhat.com> +Patchwork-id: 82694 +O-Subject: [RHEL-8 qemu-kvm PATCH 2/6] s390x/cpumodel: Set up CPU model for AP device support +Bugzilla: 1508142 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann + +From: Tony Krowiak + +A new CPU model feature and two new CPU model facilities are +introduced to support AP devices for a KVM guest. + +CPU model features: + +1. The S390_FEAT_AP CPU model feature indicates whether AP + instructions are available to the guest. This feature will + be enabled only if the AP instructions are available on the + linux host as determined by the availability of the + KVM_S390_VM_CRYPTO_ENABLE_APIE VM attribute which is exposed + by KVM only if the AP instructions are available on the + host. + + This feature must be turned on from userspace to execute AP + instructions on the KVM guest. The QEMU command line to turn + this feature on looks something like this: + + qemu-system-s390x ... -cpu xxx,ap=on ... + + This feature will be supported for zEC12 and newer CPU models. + The feature will not be supported for older models because + there are few older systems on which to test and the older + crypto cards will be going out of service in the relatively + near future. + +CPU model facilities: + +1. The S390_FEAT_AP_QUERY_CONFIG_INFO feature indicates whether the + AP Query Configuration Information (QCI) facility is available + to the guest as determined by whether the facility is available + on the host. This feature will be exposed by KVM only if the + QCI facility is installed on the host. + +2. The S390_FEAT_AP_FACILITY_TEST feature indicates whether the AP + Facility Test (APFT) facility is available to the guest as + determined by whether the facility is available on the host. + This feature will be exposed by KVM only if APFT is installed + on the host. + +Signed-off-by: Tony Krowiak +Tested-by: Pierre Morel +Reviewed-by: David Hildenbrand +Reviewed-by: Halil Pasic +Reviewed-by: Christian Borntraeger +Tested-by: Christian Borntraeger +Message-Id: <20181010170309.12045-3-akrowiak@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit c5cd17afddda89376712b315a41ede96b034e4c2) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/cpu_features.c | 3 +++ + target/s390x/cpu_features_def.h | 3 +++ + target/s390x/cpu_models.c | 2 ++ + target/s390x/gen-features.c | 3 +++ + 4 files changed, 11 insertions(+) + +diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c +index e05e6aa..0fbee27 100644 +--- a/target/s390x/cpu_features.c ++++ b/target/s390x/cpu_features.c +@@ -40,8 +40,10 @@ static const S390FeatDef s390_features[] = { + FEAT_INIT("srs", S390_FEAT_TYPE_STFL, 9, "Sense-running-status facility"), + FEAT_INIT("csske", S390_FEAT_TYPE_STFL, 10, "Conditional-SSKE facility"), + FEAT_INIT("ctop", S390_FEAT_TYPE_STFL, 11, "Configuration-topology facility"), ++ FEAT_INIT("apqci", S390_FEAT_TYPE_STFL, 12, "Query AP Configuration Information facility"), + FEAT_INIT("ipter", S390_FEAT_TYPE_STFL, 13, "IPTE-range facility"), + FEAT_INIT("nonqks", S390_FEAT_TYPE_STFL, 14, "Nonquiescing key-setting facility"), ++ FEAT_INIT("apft", S390_FEAT_TYPE_STFL, 15, "AP Facilities Test facility"), + FEAT_INIT("etf2", S390_FEAT_TYPE_STFL, 16, "Extended-translation facility 2"), + FEAT_INIT("msa-base", S390_FEAT_TYPE_STFL, 17, "Message-security-assist facility (excluding subfunctions)"), + FEAT_INIT("ldisp", S390_FEAT_TYPE_STFL, 18, "Long-displacement facility"), +@@ -130,6 +132,7 @@ static const S390FeatDef s390_features[] = { + + FEAT_INIT_MISC("dateh2", "DAT-enhancement facility 2"), + FEAT_INIT_MISC("cmm", "Collaborative-memory-management facility"), ++ FEAT_INIT_MISC("ap", "AP instructions installed"), + + FEAT_INIT("plo-cl", S390_FEAT_TYPE_PLO, 0, "PLO Compare and load (32 bit in general registers)"), + FEAT_INIT("plo-clg", S390_FEAT_TYPE_PLO, 1, "PLO Compare and load (64 bit in parameter list)"), +diff --git a/target/s390x/cpu_features_def.h b/target/s390x/cpu_features_def.h +index ac2c947..5fc7e7b 100644 +--- a/target/s390x/cpu_features_def.h ++++ b/target/s390x/cpu_features_def.h +@@ -27,8 +27,10 @@ typedef enum { + S390_FEAT_SENSE_RUNNING_STATUS, + S390_FEAT_CONDITIONAL_SSKE, + S390_FEAT_CONFIGURATION_TOPOLOGY, ++ S390_FEAT_AP_QUERY_CONFIG_INFO, + S390_FEAT_IPTE_RANGE, + S390_FEAT_NONQ_KEY_SETTING, ++ S390_FEAT_AP_FACILITIES_TEST, + S390_FEAT_EXTENDED_TRANSLATION_2, + S390_FEAT_MSA, + S390_FEAT_LONG_DISPLACEMENT, +@@ -119,6 +121,7 @@ typedef enum { + /* Misc */ + S390_FEAT_DAT_ENH_2, + S390_FEAT_CMM, ++ S390_FEAT_AP, + + /* PLO */ + S390_FEAT_PLO_CL, +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index 9c469ff..a8722cd 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -782,6 +782,8 @@ static void check_consistency(const S390CPUModel *model) + { S390_FEAT_PRNO_TRNG_QRTCR, S390_FEAT_MSA_EXT_5 }, + { S390_FEAT_PRNO_TRNG, S390_FEAT_MSA_EXT_5 }, + { S390_FEAT_SIE_KSS, S390_FEAT_SIE_F2 }, ++ { S390_FEAT_AP_QUERY_CONFIG_INFO, S390_FEAT_AP }, ++ { S390_FEAT_AP_FACILITIES_TEST, S390_FEAT_AP }, + }; + int i; + +diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c +index 5af042c..7302269 100644 +--- a/target/s390x/gen-features.c ++++ b/target/s390x/gen-features.c +@@ -447,6 +447,9 @@ static uint16_t full_GEN12_GA1[] = { + S390_FEAT_ADAPTER_INT_SUPPRESSION, + S390_FEAT_EDAT_2, + S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2, ++ S390_FEAT_AP_QUERY_CONFIG_INFO, ++ S390_FEAT_AP_FACILITIES_TEST, ++ S390_FEAT_AP, + }; + + static uint16_t full_GEN12_GA2[] = { +-- +1.8.3.1 + diff --git a/0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch b/0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch new file mode 100644 index 0000000..47012cd --- /dev/null +++ b/0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch @@ -0,0 +1,89 @@ +From ef6a15cefa04a4f29d0d800d17caa9a37c40b05c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 15 Oct 2018 10:19:28 +0100 +Subject: s390x/kvm: enable AP instruction interpretation for guest + +RH-Author: Thomas Huth +Message-id: <1539598771-16223-4-git-send-email-thuth@redhat.com> +Patchwork-id: 82697 +O-Subject: [RHEL-8 qemu-kvm PATCH 3/6] s390x/kvm: enable AP instruction interpretation for guest +Bugzilla: 1508142 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann + +From: Tony Krowiak + +Let's use the KVM_SET_DEVICE_ATTR ioctl to enable hardware +interpretation of AP instructions executed on the guest. +If the S390_FEAT_AP feature is switched on for the guest, +AP instructions must be interpreted by default; otherwise, +they will be intercepted. + +This attribute setting may be overridden by a device. For example, +a device may want to provide AP instructions to the guest (i.e., +S390_FEAT_AP turned on), but it may want to emulate them. In this +case, the AP instructions executed on the guest must be +intercepted; so when the device is realized, it must disable +interpretation. + +Signed-off-by: Tony Krowiak +Tested-by: Pierre Morel +Reviewed-by: David Hildenbrand +Reviewed-by: Thomas Huth +Reviewed-by: Christian Borntraeger +Acked-by: Halil Pasic +Tested-by: Christian Borntraeger +Message-Id: <20181010170309.12045-4-akrowiak@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 1d7db85b61cb9888b8ed8c8923343b468405b7a0) +Signed-off-by: Danilo C. L. de Paula +--- + target/s390x/kvm.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 71d90f2..d25e2e2 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -2290,11 +2290,26 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) + error_setg(errp, "KVM: host CPU model could not be identified"); + return; + } ++ /* for now, we can only provide the AP feature with HW support */ ++ if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, ++ KVM_S390_VM_CRYPTO_ENABLE_APIE)) { ++ set_bit(S390_FEAT_AP, model->features); ++ } + /* strip of features that are not part of the maximum model */ + bitmap_and(model->features, model->features, model->def->full_feat, + S390_FEAT_MAX); + } + ++static void kvm_s390_configure_apie(bool interpret) ++{ ++ uint64_t attr = interpret ? KVM_S390_VM_CRYPTO_ENABLE_APIE : ++ KVM_S390_VM_CRYPTO_DISABLE_APIE; ++ ++ if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, attr)) { ++ kvm_s390_set_attr(attr); ++ } ++} ++ + void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + { + struct kvm_s390_vm_cpu_processor prop = { +@@ -2352,6 +2367,10 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + if (test_bit(S390_FEAT_CMM, model->features)) { + kvm_s390_enable_cmma(); + } ++ ++ if (test_bit(S390_FEAT_AP, model->features)) { ++ kvm_s390_configure_apie(true); ++ } + } + + void kvm_s390_restart_interrupt(S390CPU *cpu) +-- +1.8.3.1 + diff --git a/0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch b/0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch new file mode 100644 index 0000000..9059681 --- /dev/null +++ b/0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch @@ -0,0 +1,281 @@ +From a57558fc97a82853d0c5e1e190297f7677598d5a Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 15 Oct 2018 10:19:29 +0100 +Subject: s390x/ap: base Adjunct Processor (AP) object model + +RH-Author: Thomas Huth +Message-id: <1539598771-16223-5-git-send-email-thuth@redhat.com> +Patchwork-id: 82695 +O-Subject: [RHEL-8 qemu-kvm PATCH 4/6] s390x/ap: base Adjunct Processor (AP) object model +Bugzilla: 1508142 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann + +From: Tony Krowiak + +Introduces the base object model for virtualizing AP devices. + +Signed-off-by: Tony Krowiak +Tested-by: Pierre Morel +Acked-by: David Hildenbrand +Reviewed-by: Thomas Huth +Reviewed-by: Halil Pasic +Tested-by: Christian Borntraeger +Message-Id: <20181010170309.12045-5-akrowiak@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit a51b31535a8ec13997de29b357f7cc1dcd8a7f9c) +Signed-off-by: Danilo C. L. de Paula +--- + MAINTAINERS | 12 +++++++ + hw/s390x/Makefile.objs | 2 ++ + hw/s390x/ap-bridge.c | 78 ++++++++++++++++++++++++++++++++++++++++++++ + hw/s390x/ap-device.c | 38 +++++++++++++++++++++ + hw/s390x/s390-virtio-ccw.c | 4 +++ + include/hw/s390x/ap-bridge.h | 19 +++++++++++ + include/hw/s390x/ap-device.h | 22 +++++++++++++ + 7 files changed, 175 insertions(+) + create mode 100644 hw/s390x/ap-bridge.c + create mode 100644 hw/s390x/ap-device.c + create mode 100644 include/hw/s390x/ap-bridge.h + create mode 100644 include/hw/s390x/ap-device.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index 666e936..d5b3c18 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -1184,6 +1184,18 @@ F: include/hw/s390x/s390-ccw.h + T: git git://github.com/cohuck/qemu.git s390-next + L: qemu-s390x@nongnu.org + ++vfio-ap ++M: Christian Borntraeger ++M: Tony Krowiak ++M: Halil Pasic ++M: Pierre Morel ++S: Supported ++F: hw/s390x/ap-device.c ++F: hw/s390x/ap-bridge.c ++F: include/hw/s390x/ap-device.h ++F: include/hw/s390x/ap-bridge.h ++L: qemu-s390x@nongnu.org ++ + vhost + M: Michael S. Tsirkin + S: Supported +diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs +index 93282f7..add89b1 100644 +--- a/hw/s390x/Makefile.objs ++++ b/hw/s390x/Makefile.objs +@@ -20,3 +20,5 @@ obj-$(CONFIG_TCG) += tod-qemu.o + obj-$(CONFIG_KVM) += s390-skeys-kvm.o + obj-$(CONFIG_KVM) += s390-stattrib-kvm.o + obj-y += s390-ccw.o ++obj-y += ap-device.o ++obj-y += ap-bridge.o +diff --git a/hw/s390x/ap-bridge.c b/hw/s390x/ap-bridge.c +new file mode 100644 +index 0000000..3795d30 +--- /dev/null ++++ b/hw/s390x/ap-bridge.c +@@ -0,0 +1,78 @@ ++/* ++ * ap bridge ++ * ++ * Copyright 2018 IBM Corp. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++#include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "hw/sysbus.h" ++#include "qemu/bitops.h" ++#include "hw/s390x/ap-bridge.h" ++#include "cpu.h" ++ ++static char *ap_bus_get_dev_path(DeviceState *dev) ++{ ++ /* at most one */ ++ return g_strdup_printf("/1"); ++} ++ ++static void ap_bus_class_init(ObjectClass *oc, void *data) ++{ ++ BusClass *k = BUS_CLASS(oc); ++ ++ k->get_dev_path = ap_bus_get_dev_path; ++ /* More than one ap device does not make sense */ ++ k->max_dev = 1; ++} ++ ++static const TypeInfo ap_bus_info = { ++ .name = TYPE_AP_BUS, ++ .parent = TYPE_BUS, ++ .instance_size = 0, ++ .class_init = ap_bus_class_init, ++}; ++ ++void s390_init_ap(void) ++{ ++ DeviceState *dev; ++ ++ /* If no AP instructions then no need for AP bridge */ ++ if (!s390_has_feat(S390_FEAT_AP)) { ++ return; ++ } ++ ++ /* Create bridge device */ ++ dev = qdev_create(NULL, TYPE_AP_BRIDGE); ++ object_property_add_child(qdev_get_machine(), TYPE_AP_BRIDGE, ++ OBJECT(dev), NULL); ++ qdev_init_nofail(dev); ++ ++ /* Create bus on bridge device */ ++ qbus_create(TYPE_AP_BUS, dev, TYPE_AP_BUS); ++ } ++ ++static void ap_bridge_class_init(ObjectClass *oc, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(oc); ++ ++ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); ++} ++ ++static const TypeInfo ap_bridge_info = { ++ .name = TYPE_AP_BRIDGE, ++ .parent = TYPE_SYS_BUS_DEVICE, ++ .instance_size = 0, ++ .class_init = ap_bridge_class_init, ++}; ++ ++static void ap_register(void) ++{ ++ type_register_static(&ap_bridge_info); ++ type_register_static(&ap_bus_info); ++} ++ ++type_init(ap_register) +diff --git a/hw/s390x/ap-device.c b/hw/s390x/ap-device.c +new file mode 100644 +index 0000000..f5ac8db +--- /dev/null ++++ b/hw/s390x/ap-device.c +@@ -0,0 +1,38 @@ ++/* ++ * Adjunct Processor (AP) matrix device ++ * ++ * Copyright 2018 IBM Corp. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/module.h" ++#include "qapi/error.h" ++#include "hw/qdev.h" ++#include "hw/s390x/ap-device.h" ++ ++static void ap_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ ++ dc->desc = "AP device class"; ++ dc->hotpluggable = false; ++} ++ ++static const TypeInfo ap_device_info = { ++ .name = AP_DEVICE_TYPE, ++ .parent = TYPE_DEVICE, ++ .instance_size = sizeof(APDevice), ++ .class_size = sizeof(DeviceClass), ++ .class_init = ap_class_init, ++ .abstract = true, ++}; ++ ++static void ap_device_register(void) ++{ ++ type_register_static(&ap_device_info); ++} ++ ++type_init(ap_device_register) +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index cdf4558..a4b8b62 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -32,6 +32,7 @@ + #include "ipl.h" + #include "hw/s390x/s390-virtio-ccw.h" + #include "hw/s390x/css-bridge.h" ++#include "hw/s390x/ap-bridge.h" + #include "migration/register.h" + #include "cpu_models.h" + #include "hw/nmi.h" +@@ -263,6 +264,9 @@ static void ccw_init(MachineState *machine) + /* init the SIGP facility */ + s390_init_sigp(); + ++ /* create AP bridge and bus(es) */ ++ s390_init_ap(); ++ + /* get a BUS */ + css_bus = virtual_css_bus_init(); + s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline, +diff --git a/include/hw/s390x/ap-bridge.h b/include/hw/s390x/ap-bridge.h +new file mode 100644 +index 0000000..470e439 +--- /dev/null ++++ b/include/hw/s390x/ap-bridge.h +@@ -0,0 +1,19 @@ ++/* ++ * ap bridge ++ * ++ * Copyright 2018 IBM Corp. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#ifndef HW_S390X_AP_BRIDGE_H ++#define HW_S390X_AP_BRIDGE_H ++ ++#define TYPE_AP_BRIDGE "ap-bridge" ++#define TYPE_AP_BUS "ap-bus" ++ ++void s390_init_ap(void); ++ ++#endif +diff --git a/include/hw/s390x/ap-device.h b/include/hw/s390x/ap-device.h +new file mode 100644 +index 0000000..765e908 +--- /dev/null ++++ b/include/hw/s390x/ap-device.h +@@ -0,0 +1,22 @@ ++/* ++ * Adjunct Processor (AP) matrix device interfaces ++ * ++ * Copyright 2018 IBM Corp. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++#ifndef HW_S390X_AP_DEVICE_H ++#define HW_S390X_AP_DEVICE_H ++ ++#define AP_DEVICE_TYPE "ap-device" ++ ++typedef struct APDevice { ++ DeviceState parent_obj; ++} APDevice; ++ ++#define AP_DEVICE(obj) \ ++ OBJECT_CHECK(APDevice, (obj), AP_DEVICE_TYPE) ++ ++#endif /* HW_S390X_AP_DEVICE_H */ +-- +1.8.3.1 + diff --git a/0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch b/0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch new file mode 100644 index 0000000..d4e8441 --- /dev/null +++ b/0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch @@ -0,0 +1,305 @@ +From 9f3a3325bb6859b1d3b46818a7d5b75c5d609f32 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 15 Oct 2018 10:19:30 +0100 +Subject: s390x/vfio: ap: Introduce VFIO AP device + +RH-Author: Thomas Huth +Message-id: <1539598771-16223-6-git-send-email-thuth@redhat.com> +Patchwork-id: 82700 +O-Subject: [RHEL-8 qemu-kvm PATCH 5/6] s390x/vfio: ap: Introduce VFIO AP device +Bugzilla: 1508142 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann + +From: Tony Krowiak + +Introduces a VFIO based AP device. The device is defined via +the QEMU command line by specifying: + + -device vfio-ap,sysfsdev= + +There may be only one vfio-ap device configured for a guest. + +The mediated matrix device is created by the VFIO AP device +driver by writing a UUID to a sysfs attribute file (see +docs/vfio-ap.txt). The mediated matrix device will be named +after the UUID. Symbolic links to the $uuid are created in +many places, so the path to the mediated matrix device $uuid +can be specified in any of the following ways: + +/sys/devices/vfio_ap/matrix/$uuid +/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid +/sys/bus/mdev/devices/$uuid +/sys/bus/mdev/drivers/vfio_mdev/$uuid + +When the vfio-ap device is realized, it acquires and opens the +VFIO iommu group to which the mediated matrix device is +bound. This causes a VFIO group notification event to be +signaled. The vfio_ap device driver's group notification +handler will get called at which time the device driver +will configure the the AP devices to which the guest will +be granted access. + +Signed-off-by: Tony Krowiak +Tested-by: Pierre Morel +Acked-by: Halil Pasic +Tested-by: Pierre Morel +Tested-by: Christian Borntraeger +Message-Id: <20181010170309.12045-6-akrowiak@linux.ibm.com> +Reviewed-by: Thomas Huth +[CH: added missing g_free and device category] +Signed-off-by: Cornelia Huck + +(cherry picked from commit 2fe2942cd6ddad8ddd40fe5d16d67599c28959d7) +Signed-off-by: Danilo C. L. de Paula +--- + MAINTAINERS | 2 + + default-configs/s390x-softmmu.mak | 1 + + hw/vfio/Makefile.objs | 1 + + hw/vfio/ap.c | 181 ++++++++++++++++++++++++++++++++++++++ + include/hw/vfio/vfio-common.h | 1 + + 5 files changed, 186 insertions(+) + create mode 100644 hw/vfio/ap.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index d5b3c18..f2fa1b8 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -88,6 +88,7 @@ F: hw/char/terminal3270.c + F: hw/intc/s390_flic.c + F: hw/intc/s390_flic_kvm.c + F: hw/s390x/ ++F: hw/vfio/ap.c + F: hw/vfio/ccw.c + F: hw/watchdog/wdt_diag288.c + F: include/hw/s390x/ +@@ -1194,6 +1195,7 @@ F: hw/s390x/ap-device.c + F: hw/s390x/ap-bridge.c + F: include/hw/s390x/ap-device.h + F: include/hw/s390x/ap-bridge.h ++F: hw/vfio/ap.c + L: qemu-s390x@nongnu.org + + vhost +diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak +index 8b2db3e..49a59fc 100644 +--- a/default-configs/s390x-softmmu.mak ++++ b/default-configs/s390x-softmmu.mak +@@ -8,3 +8,4 @@ CONFIG_S390_FLIC_KVM=$(CONFIG_KVM) + # Disabled for Red Hat Enterprise Linux: + # CONFIG_VFIO_CCW=$(CONFIG_LINUX) + CONFIG_WDT_DIAG288=y ++CONFIG_VFIO_AP=$(CONFIG_LINUX) +diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs +index d38205b..53b4cbe 100644 +--- a/hw/vfio/Makefile.objs ++++ b/hw/vfio/Makefile.objs +@@ -5,4 +5,5 @@ obj-$(CONFIG_VFIO_CCW) += ccw.o + obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o + obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o + obj-$(CONFIG_SOFTMMU) += spapr.o ++obj-$(CONFIG_VFIO_AP) += ap.o + endif +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +new file mode 100644 +index 0000000..3962bb7 +--- /dev/null ++++ b/hw/vfio/ap.c +@@ -0,0 +1,181 @@ ++/* ++ * VFIO based AP matrix device assignment ++ * ++ * Copyright 2018 IBM Corp. ++ * Author(s): Tony Krowiak ++ * Halil Pasic ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#include ++#include ++#include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "hw/sysbus.h" ++#include "hw/vfio/vfio.h" ++#include "hw/vfio/vfio-common.h" ++#include "hw/s390x/ap-device.h" ++#include "qemu/error-report.h" ++#include "qemu/queue.h" ++#include "qemu/option.h" ++#include "qemu/config-file.h" ++#include "cpu.h" ++#include "kvm_s390x.h" ++#include "sysemu/sysemu.h" ++#include "hw/s390x/ap-bridge.h" ++#include "exec/address-spaces.h" ++ ++#define VFIO_AP_DEVICE_TYPE "vfio-ap" ++ ++typedef struct VFIOAPDevice { ++ APDevice apdev; ++ VFIODevice vdev; ++} VFIOAPDevice; ++ ++#define VFIO_AP_DEVICE(obj) \ ++ OBJECT_CHECK(VFIOAPDevice, (obj), VFIO_AP_DEVICE_TYPE) ++ ++static void vfio_ap_compute_needs_reset(VFIODevice *vdev) ++{ ++ vdev->needs_reset = false; ++} ++ ++/* ++ * We don't need vfio_hot_reset_multi and vfio_eoi operations for ++ * vfio-ap device now. ++ */ ++struct VFIODeviceOps vfio_ap_ops = { ++ .vfio_compute_needs_reset = vfio_ap_compute_needs_reset, ++}; ++ ++static void vfio_ap_put_device(VFIOAPDevice *vapdev) ++{ ++ g_free(vapdev->vdev.name); ++ vfio_put_base_device(&vapdev->vdev); ++} ++ ++static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) ++{ ++ GError *gerror = NULL; ++ char *symlink, *group_path; ++ int groupid; ++ ++ symlink = g_strdup_printf("%s/iommu_group", vapdev->vdev.sysfsdev); ++ group_path = g_file_read_link(symlink, &gerror); ++ g_free(symlink); ++ ++ if (!group_path) { ++ error_setg(errp, "%s: no iommu_group found for %s: %s", ++ VFIO_AP_DEVICE_TYPE, vapdev->vdev.sysfsdev, gerror->message); ++ return NULL; ++ } ++ ++ if (sscanf(basename(group_path), "%d", &groupid) != 1) { ++ error_setg(errp, "vfio: failed to read %s", group_path); ++ g_free(group_path); ++ return NULL; ++ } ++ ++ g_free(group_path); ++ ++ return vfio_get_group(groupid, &address_space_memory, errp); ++} ++ ++static void vfio_ap_realize(DeviceState *dev, Error **errp) ++{ ++ int ret; ++ char *mdevid; ++ Error *local_err = NULL; ++ VFIOGroup *vfio_group; ++ APDevice *apdev = AP_DEVICE(dev); ++ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); ++ ++ vfio_group = vfio_ap_get_group(vapdev, &local_err); ++ if (!vfio_group) { ++ goto out_err; ++ } ++ ++ vapdev->vdev.ops = &vfio_ap_ops; ++ vapdev->vdev.type = VFIO_DEVICE_TYPE_AP; ++ mdevid = basename(vapdev->vdev.sysfsdev); ++ vapdev->vdev.name = g_strdup_printf("%s", mdevid); ++ vapdev->vdev.dev = dev; ++ ++ ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, &local_err); ++ if (ret) { ++ goto out_get_dev_err; ++ } ++ ++ return; ++ ++out_get_dev_err: ++ vfio_ap_put_device(vapdev); ++ vfio_put_group(vfio_group); ++out_err: ++ error_propagate(errp, local_err); ++} ++ ++static void vfio_ap_unrealize(DeviceState *dev, Error **errp) ++{ ++ APDevice *apdev = AP_DEVICE(dev); ++ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); ++ VFIOGroup *group = vapdev->vdev.group; ++ ++ vfio_ap_put_device(vapdev); ++ vfio_put_group(group); ++} ++ ++static Property vfio_ap_properties[] = { ++ DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static void vfio_ap_reset(DeviceState *dev) ++{ ++ int ret; ++ APDevice *apdev = AP_DEVICE(dev); ++ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); ++ ++ ret = ioctl(vapdev->vdev.fd, VFIO_DEVICE_RESET); ++ if (ret) { ++ error_report("%s: failed to reset %s device: %s", __func__, ++ vapdev->vdev.name, strerror(ret)); ++ } ++} ++ ++static const VMStateDescription vfio_ap_vmstate = { ++ .name = VFIO_AP_DEVICE_TYPE, ++ .unmigratable = 1, ++}; ++ ++static void vfio_ap_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ ++ dc->props = vfio_ap_properties; ++ dc->vmsd = &vfio_ap_vmstate; ++ dc->desc = "VFIO-based AP device assignment"; ++ set_bit(DEVICE_CATEGORY_MISC, dc->categories); ++ dc->realize = vfio_ap_realize; ++ dc->unrealize = vfio_ap_unrealize; ++ dc->hotpluggable = false; ++ dc->reset = vfio_ap_reset; ++ dc->bus_type = TYPE_AP_BUS; ++} ++ ++static const TypeInfo vfio_ap_info = { ++ .name = VFIO_AP_DEVICE_TYPE, ++ .parent = AP_DEVICE_TYPE, ++ .instance_size = sizeof(VFIOAPDevice), ++ .class_init = vfio_ap_class_init, ++}; ++ ++static void vfio_ap_type_init(void) ++{ ++ type_register_static(&vfio_ap_info); ++} ++ ++type_init(vfio_ap_type_init) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index a903692..1389da4 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -37,6 +37,7 @@ enum { + VFIO_DEVICE_TYPE_PCI = 0, + VFIO_DEVICE_TYPE_PLATFORM = 1, + VFIO_DEVICE_TYPE_CCW = 2, ++ VFIO_DEVICE_TYPE_AP = 3, + }; + + typedef struct VFIOMmap { +-- +1.8.3.1 + diff --git a/0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch b/0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch new file mode 100644 index 0000000..166fbc5 --- /dev/null +++ b/0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch @@ -0,0 +1,889 @@ +From 8f59c31a8b0c4cde4bc92126d7102c1be9da97d4 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 15 Oct 2018 10:19:31 +0100 +Subject: s390: doc: detailed specifications for AP virtualization + +RH-Author: Thomas Huth +Message-id: <1539598771-16223-7-git-send-email-thuth@redhat.com> +Patchwork-id: 82699 +O-Subject: [RHEL-8 qemu-kvm PATCH 6/6] s390: doc: detailed specifications for AP virtualization +Bugzilla: 1508142 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jens Freimann + +From: Tony Krowiak + +This patch provides documentation describing the AP architecture and +design concepts behind the virtualization of AP devices. It also +includes an example of how to configure AP devices for exclusive +use of KVM guests. + +Signed-off-by: Tony Krowiak +Reviewed-by: Pierre Morel +Tested-by: Pierre Morel +Tested-by: Christian Borntraeger +Message-Id: <20181010170309.12045-7-akrowiak@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 694a8d703bfe06226a0574f5ec4af17a2b7060ef) +Signed-off-by: Danilo C. L. de Paula +--- + MAINTAINERS | 2 + + docs/vfio-ap.txt | 825 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 827 insertions(+) + create mode 100644 docs/vfio-ap.txt + +diff --git a/MAINTAINERS b/MAINTAINERS +index f2fa1b8..fdbfc04 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -96,6 +96,7 @@ F: include/hw/watchdog/wdt_diag288.h + F: pc-bios/s390-ccw/ + F: pc-bios/s390-ccw.img + F: target/s390x/ ++F: docs/vfio-ap.txt + K: ^Subject:.*(?i)s390x? + T: git git://github.com/cohuck/qemu.git s390-next + L: qemu-s390x@nongnu.org +@@ -1196,6 +1197,7 @@ F: hw/s390x/ap-bridge.c + F: include/hw/s390x/ap-device.h + F: include/hw/s390x/ap-bridge.h + F: hw/vfio/ap.c ++F: docs/vfio-ap.txt + L: qemu-s390x@nongnu.org + + vhost +diff --git a/docs/vfio-ap.txt b/docs/vfio-ap.txt +new file mode 100644 +index 0000000..1233968 +--- /dev/null ++++ b/docs/vfio-ap.txt +@@ -0,0 +1,825 @@ ++Adjunct Processor (AP) Device ++============================= ++ ++Contents: ++========= ++* Introduction ++* AP Architectural Overview ++* Start Interpretive Execution (SIE) Instruction ++* AP Matrix Configuration on Linux Host ++* Starting a Linux Guest Configured with an AP Matrix ++* Example: Configure AP Matrices for Three Linux Guests ++ ++Introduction: ++============ ++The IBM Adjunct Processor (AP) Cryptographic Facility is comprised ++of three AP instructions and from 1 to 256 PCIe cryptographic adapter cards. ++These AP devices provide cryptographic functions to all CPUs assigned to a ++linux system running in an IBM Z system LPAR. ++ ++On s390x, AP adapter cards are exposed via the AP bus. This document ++describes how those cards may be made available to KVM guests using the ++VFIO mediated device framework. ++ ++AP Architectural Overview: ++========================= ++In order understand the terminology used in the rest of this document, let's ++start with some definitions: ++ ++* AP adapter ++ ++ An AP adapter is an IBM Z adapter card that can perform cryptographic ++ functions. There can be from 0 to 256 adapters assigned to an LPAR depending ++ on the machine model. Adapters assigned to the LPAR in which a linux host is ++ running will be available to the linux host. Each adapter is identified by a ++ number from 0 to 255; however, the maximum adapter number allowed is ++ determined by machine model. When installed, an AP adapter is accessed by ++ AP instructions executed by any CPU. ++ ++* AP domain ++ ++ An adapter is partitioned into domains. Each domain can be thought of as ++ a set of hardware registers for processing AP instructions. An adapter can ++ hold up to 256 domains; however, the maximum domain number allowed is ++ determined by machine model. Each domain is identified by a number from 0 to ++ 255. Domains can be further classified into two types: ++ ++ * Usage domains are domains that can be accessed directly to process AP ++ commands ++ ++ * Control domains are domains that are accessed indirectly by AP ++ commands sent to a usage domain to control or change the domain; for ++ example, to set a secure private key for the domain. ++ ++* AP Queue ++ ++ An AP queue is the means by which an AP command-request message is sent to an ++ AP usage domain inside a specific AP. An AP queue is identified by a tuple ++ comprised of an AP adapter ID (APID) and an AP queue index (APQI). The ++ APQI corresponds to a given usage domain number within the adapter. This tuple ++ forms an AP Queue Number (APQN) uniquely identifying an AP queue. AP ++ instructions include a field containing the APQN to identify the AP queue to ++ which the AP command-request message is to be sent for processing. ++ ++* AP Instructions: ++ ++ There are three AP instructions: ++ ++ * NQAP: to enqueue an AP command-request message to a queue ++ * DQAP: to dequeue an AP command-reply message from a queue ++ * PQAP: to administer the queues ++ ++ AP instructions identify the domain that is targeted to process the AP ++ command; this must be one of the usage domains. An AP command may modify a ++ domain that is not one of the usage domains, but the modified domain ++ must be one of the control domains. ++ ++Start Interpretive Execution (SIE) Instruction ++============================================== ++A KVM guest is started by executing the Start Interpretive Execution (SIE) ++instruction. The SIE state description is a control block that contains the ++state information for a KVM guest and is supplied as input to the SIE ++instruction. The SIE state description contains a satellite control block called ++the Crypto Control Block (CRYCB). The CRYCB contains three fields to identify ++the adapters, usage domains and control domains assigned to the KVM guest: ++ ++* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned ++ to the KVM guest. Each bit in the mask, from left to right, corresponds to ++ an APID from 0-255. If a bit is set, the corresponding adapter is valid for ++ use by the KVM guest. ++ ++* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains ++ assigned to the KVM guest. Each bit in the mask, from left to right, ++ corresponds to an AP queue index (APQI) from 0-255. If a bit is set, the ++ corresponding queue is valid for use by the KVM guest. ++ ++* The AP Domain Mask field is a bit mask that identifies the AP control domains ++ assigned to the KVM guest. The ADM bit mask controls which domains can be ++ changed by an AP command-request message sent to a usage domain from the ++ guest. Each bit in the mask, from left to right, corresponds to a domain from ++ 0-255. If a bit is set, the corresponding domain can be modified by an AP ++ command-request message sent to a usage domain. ++ ++If you recall from the description of an AP Queue, AP instructions include ++an APQN to identify the AP adapter and AP queue to which an AP command-request ++message is to be sent (NQAP and PQAP instructions), or from which a ++command-reply message is to be received (DQAP instruction). The validity of an ++APQN is defined by the matrix calculated from the APM and AQM; it is the ++cross product of all assigned adapter numbers (APM) with all assigned queue ++indexes (AQM). For example, if adapters 1 and 2 and usage domains 5 and 6 are ++assigned to a guest, the APQNs (1,5), (1,6), (2,5) and (2,6) will be valid for ++the guest. ++ ++The APQNs can provide secure key functionality - i.e., a private key is stored ++on the adapter card for each of its domains - so each APQN must be assigned to ++at most one guest or the linux host. ++ ++ Example 1: Valid configuration: ++ ------------------------------ ++ Guest1: adapters 1,2 domains 5,6 ++ Guest2: adapter 1,2 domain 7 ++ ++ This is valid because both guests have a unique set of APQNs: Guest1 has ++ APQNs (1,5), (1,6), (2,5) and (2,6); Guest2 has APQNs (1,7) and (2,7). ++ ++ Example 2: Valid configuration: ++ ------------------------------ ++ Guest1: adapters 1,2 domains 5,6 ++ Guest2: adapters 3,4 domains 5,6 ++ ++ This is also valid because both guests have a unique set of APQNs: ++ Guest1 has APQNs (1,5), (1,6), (2,5), (2,6); ++ Guest2 has APQNs (3,5), (3,6), (4,5), (4,6) ++ ++ Example 3: Invalid configuration: ++ -------------------------------- ++ Guest1: adapters 1,2 domains 5,6 ++ Guest2: adapter 1 domains 6,7 ++ ++ This is an invalid configuration because both guests have access to ++ APQN (1,6). ++ ++AP Matrix Configuration on Linux Host: ++===================================== ++A linux system is a guest of the LPAR in which it is running and has access to ++the AP resources configured for the LPAR. The LPAR's AP matrix is ++configured via its Activation Profile which can be edited on the HMC. When the ++linux system is started, the AP bus will detect the AP devices assigned to the ++LPAR and create the following in sysfs: ++ ++/sys/bus/ap ++... [devices] ++...... xx.yyyy ++...... ... ++...... cardxx ++...... ... ++ ++Where: ++ cardxx is AP adapter number xx (in hex) ++....xx.yyyy is an APQN with xx specifying the APID and yyyy specifying the ++ APQI ++ ++For example, if AP adapters 5 and 6 and domains 4, 71 (0x47), 171 (0xab) and ++255 (0xff) are configured for the LPAR, the sysfs representation on the linux ++host system would look like this: ++ ++/sys/bus/ap ++... [devices] ++...... 05.0004 ++...... 05.0047 ++...... 05.00ab ++...... 05.00ff ++...... 06.0004 ++...... 06.0047 ++...... 06.00ab ++...... 06.00ff ++...... card05 ++...... card06 ++ ++A set of default device drivers are also created to control each type of AP ++device that can be assigned to the LPAR on which a linux host is running: ++ ++/sys/bus/ap ++... [drivers] ++...... [cex2acard] for Crypto Express 2/3 accelerator cards ++...... [cex2aqueue] for AP queues served by Crypto Express 2/3 ++ accelerator cards ++...... [cex4card] for Crypto Express 4/5/6 accelerator and coprocessor ++ cards ++...... [cex4queue] for AP queues served by Crypto Express 4/5/6 ++ accelerator and coprocessor cards ++...... [pcixcccard] for Crypto Express 2/3 coprocessor cards ++...... [pcixccqueue] for AP queues served by Crypto Express 2/3 ++ coprocessor cards ++ ++Binding AP devices to device drivers ++------------------------------------ ++There are two sysfs files that specify bitmasks marking a subset of the APQN ++range as 'usable by the default AP queue device drivers' or 'not usable by the ++default device drivers' and thus available for use by the alternate device ++driver(s). The sysfs locations of the masks are: ++ ++ /sys/bus/ap/apmask ++ /sys/bus/ap/aqmask ++ ++ The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs ++ (APID). Each bit in the mask, from left to right (i.e., from most significant ++ to least significant bit in big endian order), corresponds to an APID from ++ 0-255. If a bit is set, the APID is marked as usable only by the default AP ++ queue device drivers; otherwise, the APID is usable by the vfio_ap ++ device driver. ++ ++ The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes ++ (APQI). Each bit in the mask, from left to right (i.e., from most significant ++ to least significant bit in big endian order), corresponds to an APQI from ++ 0-255. If a bit is set, the APQI is marked as usable only by the default AP ++ queue device drivers; otherwise, the APQI is usable by the vfio_ap device ++ driver. ++ ++ Take, for example, the following mask: ++ ++ 0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff ++ ++ It indicates: ++ ++ 1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6 ++ belong to the vfio_ap device driver's pool. ++ ++ The APQN of each AP queue device assigned to the linux host is checked by the ++ AP bus against the set of APQNs derived from the cross product of APIDs ++ and APQIs marked as usable only by the default AP queue device drivers. If a ++ match is detected, only the default AP queue device drivers will be probed; ++ otherwise, the vfio_ap device driver will be probed. ++ ++ By default, the two masks are set to reserve all APQNs for use by the default ++ AP queue device drivers. There are two ways the default masks can be changed: ++ ++ 1. The sysfs mask files can be edited by echoing a string into the ++ respective sysfs mask file in one of two formats: ++ ++ * An absolute hex string starting with 0x - like "0x12345678" - sets ++ the mask. If the given string is shorter than the mask, it is padded ++ with 0s on the right; for example, specifying a mask value of 0x41 is ++ the same as specifying: ++ ++ 0x4100000000000000000000000000000000000000000000000000000000000000 ++ ++ Keep in mind that the mask reads from left to right (i.e., most ++ significant to least significant bit in big endian order), so the mask ++ above identifies device numbers 1 and 7 (01000001). ++ ++ If the string is longer than the mask, the operation is terminated with ++ an error (EINVAL). ++ ++ * Individual bits in the mask can be switched on and off by specifying ++ each bit number to be switched in a comma separated list. Each bit ++ number string must be prepended with a ('+') or minus ('-') to indicate ++ the corresponding bit is to be switched on ('+') or off ('-'). Some ++ valid values are: ++ ++ "+0" switches bit 0 on ++ "-13" switches bit 13 off ++ "+0x41" switches bit 65 on ++ "-0xff" switches bit 255 off ++ ++ The following example: ++ +0,-6,+0x47,-0xf0 ++ ++ Switches bits 0 and 71 (0x47) on ++ Switches bits 6 and 240 (0xf0) off ++ ++ Note that the bits not specified in the list remain as they were before ++ the operation. ++ ++ 2. The masks can also be changed at boot time via parameters on the kernel ++ command line like this: ++ ++ ap.apmask=0xffff ap.aqmask=0x40 ++ ++ This would create the following masks: ++ ++ apmask: ++ 0xffff000000000000000000000000000000000000000000000000000000000000 ++ ++ aqmask: ++ 0x4000000000000000000000000000000000000000000000000000000000000000 ++ ++ Resulting in these two pools: ++ ++ default drivers pool: adapter 0-15, domain 1 ++ alternate drivers pool: adapter 16-255, domains 0, 2-255 ++ ++Configuring an AP matrix for a linux guest. ++------------------------------------------ ++The sysfs interfaces for configuring an AP matrix for a guest are built on the ++VFIO mediated device framework. To configure an AP matrix for a guest, a ++mediated matrix device must first be created for the /sys/devices/vfio_ap/matrix ++device. When the vfio_ap device driver is loaded, it registers with the VFIO ++mediated device framework. When the driver registers, the sysfs interfaces for ++creating mediated matrix devices is created: ++ ++/sys/devices ++... [vfio_ap] ++......[matrix] ++......... [mdev_supported_types] ++............ [vfio_ap-passthrough] ++............... create ++............... [devices] ++ ++A mediated AP matrix device is created by writing a UUID to the attribute file ++named 'create', for example: ++ ++ uuidgen > create ++ ++ or ++ ++ echo $uuid > create ++ ++When a mediated AP matrix device is created, a sysfs directory named after ++the UUID is created in the 'devices' subdirectory: ++ ++/sys/devices ++... [vfio_ap] ++......[matrix] ++......... [mdev_supported_types] ++............ [vfio_ap-passthrough] ++............... create ++............... [devices] ++.................. [$uuid] ++ ++There will also be three sets of attribute files created in the mediated ++matrix device's sysfs directory to configure an AP matrix for the ++KVM guest: ++ ++/sys/devices ++... [vfio_ap] ++......[matrix] ++......... [mdev_supported_types] ++............ [vfio_ap-passthrough] ++............... create ++............... [devices] ++.................. [$uuid] ++..................... assign_adapter ++..................... assign_control_domain ++..................... assign_domain ++..................... matrix ++..................... unassign_adapter ++..................... unassign_control_domain ++..................... unassign_domain ++ ++assign_adapter ++ To assign an AP adapter to the mediated matrix device, its APID is written ++ to the 'assign_adapter' file. This may be done multiple times to assign more ++ than one adapter. The APID may be specified using conventional semantics ++ as a decimal, hexadecimal, or octal number. For example, to assign adapters ++ 4, 5 and 16 to a mediated matrix device in decimal, hexadecimal and octal ++ respectively: ++ ++ echo 4 > assign_adapter ++ echo 0x5 > assign_adapter ++ echo 020 > assign_adapter ++ ++ In order to successfully assign an adapter: ++ ++ * The adapter number specified must represent a value from 0 up to the ++ maximum adapter number allowed by the machine model. If an adapter number ++ higher than the maximum is specified, the operation will terminate with ++ an error (ENODEV). ++ ++ * All APQNs that can be derived from the adapter ID being assigned and the ++ IDs of the previously assigned domains must be bound to the vfio_ap device ++ driver. If no domains have yet been assigned, then there must be at least ++ one APQN with the specified APID bound to the vfio_ap driver. If no such ++ APQNs are bound to the driver, the operation will terminate with an ++ error (EADDRNOTAVAIL). ++ ++ No APQN that can be derived from the adapter ID and the IDs of the ++ previously assigned domains can be assigned to another mediated matrix ++ device. If an APQN is assigned to another mediated matrix device, the ++ operation will terminate with an error (EADDRINUSE). ++ ++unassign_adapter ++ To unassign an AP adapter, its APID is written to the 'unassign_adapter' ++ file. This may also be done multiple times to unassign more than one adapter. ++ ++assign_domain ++ To assign a usage domain, the domain number is written into the ++ 'assign_domain' file. This may be done multiple times to assign more than one ++ usage domain. The domain number is specified using conventional semantics as ++ a decimal, hexadecimal, or octal number. For example, to assign usage domains ++ 4, 8, and 71 to a mediated matrix device in decimal, hexadecimal and octal ++ respectively: ++ ++ echo 4 > assign_domain ++ echo 0x8 > assign_domain ++ echo 0107 > assign_domain ++ ++ In order to successfully assign a domain: ++ ++ * The domain number specified must represent a value from 0 up to the ++ maximum domain number allowed by the machine model. If a domain number ++ higher than the maximum is specified, the operation will terminate with ++ an error (ENODEV). ++ ++ * All APQNs that can be derived from the domain ID being assigned and the IDs ++ of the previously assigned adapters must be bound to the vfio_ap device ++ driver. If no domains have yet been assigned, then there must be at least ++ one APQN with the specified APQI bound to the vfio_ap driver. If no such ++ APQNs are bound to the driver, the operation will terminate with an ++ error (EADDRNOTAVAIL). ++ ++ No APQN that can be derived from the domain ID being assigned and the IDs ++ of the previously assigned adapters can be assigned to another mediated ++ matrix device. If an APQN is assigned to another mediated matrix device, ++ the operation will terminate with an error (EADDRINUSE). ++ ++unassign_domain ++ To unassign a usage domain, the domain number is written into the ++ 'unassign_domain' file. This may be done multiple times to unassign more than ++ one usage domain. ++ ++assign_control_domain ++ To assign a control domain, the domain number is written into the ++ 'assign_control_domain' file. This may be done multiple times to ++ assign more than one control domain. The domain number may be specified using ++ conventional semantics as a decimal, hexadecimal, or octal number. For ++ example, to assign control domains 4, 8, and 71 to a mediated matrix device ++ in decimal, hexadecimal and octal respectively: ++ ++ echo 4 > assign_domain ++ echo 0x8 > assign_domain ++ echo 0107 > assign_domain ++ ++ In order to successfully assign a control domain, the domain number ++ specified must represent a value from 0 up to the maximum domain number ++ allowed by the machine model. If a control domain number higher than the ++ maximum is specified, the operation will terminate with an error (ENODEV). ++ ++unassign_control_domain ++ To unassign a control domain, the domain number is written into the ++ 'unassign_domain' file. This may be done multiple times to unassign more than ++ one control domain. ++ ++Notes: Hot plug/unplug is not currently supported for mediated AP matrix ++devices, so no changes to the AP matrix will be allowed while a guest using ++the mediated matrix device is running. Attempts to assign an adapter, ++domain or control domain will be rejected and an error (EBUSY) returned. ++ ++Starting a Linux Guest Configured with an AP Matrix: ++=================================================== ++To provide a mediated matrix device for use by a guest, the following option ++must be specified on the QEMU command line: ++ ++ -device vfio_ap,sysfsdev=$path-to-mdev ++ ++The sysfsdev parameter specifies the path to the mediated matrix device. ++There are a number of ways to specify this path: ++ ++/sys/devices/vfio_ap/matrix/$uuid ++/sys/bus/mdev/devices/$uuid ++/sys/bus/mdev/drivers/vfio_mdev/$uuid ++/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid ++ ++When the linux guest is started, the guest will open the mediated ++matrix device's file descriptor to get information about the mediated matrix ++device. The vfio_ap device driver will update the APM, AQM, and ADM fields in ++the guest's CRYCB with the adapter, usage domain and control domains assigned ++via the mediated matrix device's sysfs attribute files. Programs running on the ++linux guest will then: ++ ++1. Have direct access to the APQNs derived from the cross product of the AP ++ adapter numbers (APID) and queue indexes (APQI) specified in the APM and AQM ++ fields of the guests's CRYCB respectively. These APQNs identify the AP queues ++ that are valid for use by the guest; meaning, AP commands can be sent by the ++ guest to any of these queues for processing. ++ ++2. Have authorization to process AP commands to change a control domain ++ identified in the ADM field of the guest's CRYCB. The AP command must be sent ++ to a valid APQN (see 1 above). ++ ++CPU model features: ++ ++Three CPU model features are available for controlling guest access to AP ++facilities: ++ ++1. AP facilities feature ++ ++ The AP facilities feature indicates that AP facilities are installed on the ++ guest. This feature will be exposed for use only if the AP facilities ++ are installed on the host system. The feature is s390-specific and is ++ represented as a parameter of the -cpu option on the QEMU command line: ++ ++ qemu-system-s390x -cpu $model,ap=on|off ++ ++ Where: ++ ++ $model is the CPU model defined for the guest (defaults to the model of ++ the host system if not specified). ++ ++ ap=on|off indicates whether AP facilities are installed (on) or not ++ (off). The default for CPU models zEC12 or newer ++ is ap=on. AP facilities must be installed on the guest if a ++ vfio-ap device (-device vfio-ap,sysfsdev=$path) is configured ++ for the guest, or the guest will fail to start. ++ ++2. Query Configuration Information (QCI) facility ++ ++ The QCI facility is used by the AP bus running on the guest to query the ++ configuration of the AP facilities. This facility will be available ++ only if the QCI facility is installed on the host system. The feature is ++ s390-specific and is represented as a parameter of the -cpu option on the ++ QEMU command line: ++ ++ qemu-system-s390x -cpu $model,apqci=on|off ++ ++ Where: ++ ++ $model is the CPU model defined for the guest ++ ++ apqci=on|off indicates whether the QCI facility is installed (on) or ++ not (off). The default for CPU models zEC12 or newer ++ is apqci=on; for older models, QCI will not be installed. ++ ++ If QCI is installed (apqci=on) but AP facilities are not ++ (ap=off), an error message will be logged, but the guest ++ will be allowed to start. It makes no sense to have QCI ++ installed if the AP facilities are not; this is considered ++ an invalid configuration. ++ ++ If the QCI facility is not installed, APQNs with an APQI ++ greater than 15 will not be detected by the AP bus ++ running on the guest. ++ ++3. Adjunct Process Facility Test (APFT) facility ++ ++ The APFT facility is used by the AP bus running on the guest to test the ++ AP facilities available for a given AP queue. This facility will be available ++ only if the APFT facility is installed on the host system. The feature is ++ s390-specific and is represented as a parameter of the -cpu option on the ++ QEMU command line: ++ ++ qemu-system-s390x -cpu $model,apft=on|off ++ ++ Where: ++ ++ $model is the CPU model defined for the guest (defaults to the model of ++ the host system if not specified). ++ ++ apft=on|off indicates whether the APFT facility is installed (on) or ++ not (off). The default for CPU models zEC12 and ++ newer is apft=on for older models, APFT will not be ++ installed. ++ ++ If APFT is installed (apft=on) but AP facilities are not ++ (ap=off), an error message will be logged, but the guest ++ will be allowed to start. It makes no sense to have APFT ++ installed if the AP facilities are not; this is considered ++ an invalid configuration. ++ ++ It also makes no sense to turn APFT off because the AP bus ++ running on the guest will not detect CEX4 and newer devices ++ without it. Since only CEX4 and newer devices are supported ++ for guest usage, no AP devices can be made accessible to a ++ guest started without APFT installed. ++ ++Example: Configure AP Matrixes for Three Linux Guests: ++===================================================== ++Let's now provide an example to illustrate how KVM guests may be given ++access to AP facilities. For this example, we will show how to configure ++three guests such that executing the lszcrypt command on the guests would ++look like this: ++ ++Guest1 ++------ ++CARD.DOMAIN TYPE MODE ++------------------------------ ++05 CEX5C CCA-Coproc ++05.0004 CEX5C CCA-Coproc ++05.00ab CEX5C CCA-Coproc ++06 CEX5A Accelerator ++06.0004 CEX5A Accelerator ++06.00ab CEX5C CCA-Coproc ++ ++Guest2 ++------ ++CARD.DOMAIN TYPE MODE ++------------------------------ ++05 CEX5A Accelerator ++05.0047 CEX5A Accelerator ++05.00ff CEX5A Accelerator (5,4), (5,171), (6,4), (6,171), ++ ++Guest3 ++------ ++CARD.DOMAIN TYPE MODE ++------------------------------ ++06 CEX5A Accelerator ++06.0047 CEX5A Accelerator ++06.00ff CEX5A Accelerator ++ ++These are the steps: ++ ++1. Install the vfio_ap module on the linux host. The dependency chain for the ++ vfio_ap module is: ++ * iommu ++ * s390 ++ * zcrypt ++ * vfio ++ * vfio_mdev ++ * vfio_mdev_device ++ * KVM ++ ++ To build the vfio_ap module, the kernel build must be configured with the ++ following Kconfig elements selected: ++ * IOMMU_SUPPORT ++ * S390 ++ * ZCRYPT ++ * S390_AP_IOMMU ++ * VFIO ++ * VFIO_MDEV ++ * VFIO_MDEV_DEVICE ++ * KVM ++ ++ If using make menuconfig select the following to build the vfio_ap module: ++ -> Device Drivers ++ -> IOMMU Hardware Support ++ select S390 AP IOMMU Support ++ -> VFIO Non-Privileged userspace driver framework ++ -> Mediated device driver frramework ++ -> VFIO driver for Mediated devices ++ -> I/O subsystem ++ -> VFIO support for AP devices ++ ++2. Secure the AP queues to be used by the three guests so that the host can not ++ access them. To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, ++ 06.0004, 06.0047, 06.00ab, and 06.00ff for use by the vfio_ap device driver, ++ the corresponding APQNs must be removed from the default queue drivers pool ++ as follows: ++ ++ echo -5,-6 > /sys/bus/ap/apmask ++ ++ echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask ++ ++ This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, ++ 06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The ++ sysfs directory for the vfio_ap device driver will now contain symbolic links ++ to the AP queue devices bound to it: ++ ++ /sys/bus/ap ++ ... [drivers] ++ ...... [vfio_ap] ++ ......... [05.0004] ++ ......... [05.0047] ++ ......... [05.00ab] ++ ......... [05.00ff] ++ ......... [06.0004] ++ ......... [06.0047] ++ ......... [06.00ab] ++ ......... [06.00ff] ++ ++ Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later) ++ can be bound to the vfio_ap device driver. The reason for this is to ++ simplify the implementation by not needlessly complicating the design by ++ supporting older devices that will go out of service in the relatively near ++ future, and for which there are few older systems on which to test. ++ ++ The administrator, therefore, must take care to secure only AP queues that ++ can be bound to the vfio_ap device driver. The device type for a given AP ++ queue device can be read from the parent card's sysfs directory. For example, ++ to see the hardware type of the queue 05.0004: ++ ++ cat /sys/bus/ap/devices/card05/hwtype ++ ++ The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the ++ vfio_ap device driver. ++ ++3. Create the mediated devices needed to configure the AP matrixes for the ++ three guests and to provide an interface to the vfio_ap driver for ++ use by the guests: ++ ++ /sys/devices/vfio_ap/matrix/ ++ --- [mdev_supported_types] ++ ------ [vfio_ap-passthrough] (passthrough mediated matrix device type) ++ --------- create ++ --------- [devices] ++ ++ To create the mediated devices for the three guests: ++ ++ uuidgen > create ++ uuidgen > create ++ uuidgen > create ++ ++ or ++ ++ echo $uuid1 > create ++ echo $uuid2 > create ++ echo $uuid3 > create ++ ++ This will create three mediated devices in the [devices] subdirectory named ++ after the UUID used to create the mediated device. We'll call them $uuid1, ++ $uuid2 and $uuid3 and this is the sysfs directory structure after creation: ++ ++ /sys/devices/vfio_ap/matrix/ ++ --- [mdev_supported_types] ++ ------ [vfio_ap-passthrough] ++ --------- [devices] ++ ------------ [$uuid1] ++ --------------- assign_adapter ++ --------------- assign_control_domain ++ --------------- assign_domain ++ --------------- matrix ++ --------------- unassign_adapter ++ --------------- unassign_control_domain ++ --------------- unassign_domain ++ ++ ------------ [$uuid2] ++ --------------- assign_adapter ++ --------------- assign_control_domain ++ --------------- assign_domain ++ --------------- matrix ++ --------------- unassign_adapter ++ ----------------unassign_control_domain ++ ----------------unassign_domain ++ ++ ------------ [$uuid3] ++ --------------- assign_adapter ++ --------------- assign_control_domain ++ --------------- assign_domain ++ --------------- matrix ++ --------------- unassign_adapter ++ ----------------unassign_control_domain ++ ----------------unassign_domain ++ ++4. The administrator now needs to configure the matrixes for the mediated ++ devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3). ++ ++ This is how the matrix is configured for Guest1: ++ ++ echo 5 > assign_adapter ++ echo 6 > assign_adapter ++ echo 4 > assign_domain ++ echo 0xab > assign_domain ++ ++ Control domains can similarly be assigned using the assign_control_domain ++ sysfs file. ++ ++ If a mistake is made configuring an adapter, domain or control domain, ++ you can use the unassign_xxx interfaces to unassign the adapter, domain or ++ control domain. ++ ++ To display the matrix configuration for Guest1: ++ ++ cat matrix ++ ++ The output will display the APQNs in the format xx.yyyy, where xx is ++ the adapter number and yyyy is the domain number. The output for Guest1 ++ will look like this: ++ ++ 05.0004 ++ 05.00ab ++ 06.0004 ++ 06.00ab ++ ++ This is how the matrix is configured for Guest2: ++ ++ echo 5 > assign_adapter ++ echo 0x47 > assign_domain ++ echo 0xff > assign_domain ++ ++ This is how the matrix is configured for Guest3: ++ ++ echo 6 > assign_adapter ++ echo 0x47 > assign_domain ++ echo 0xff > assign_domain ++ ++5. Start Guest1: ++ ++ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ ++ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ... ++ ++7. Start Guest2: ++ ++ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ ++ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ... ++ ++7. Start Guest3: ++ ++ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ ++ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ... ++ ++When the guest is shut down, the mediated matrix devices may be removed. ++ ++Using our example again, to remove the mediated matrix device $uuid1: ++ ++ /sys/devices/vfio_ap/matrix/ ++ --- [mdev_supported_types] ++ ------ [vfio_ap-passthrough] ++ --------- [devices] ++ ------------ [$uuid1] ++ --------------- remove ++ ++ ++ echo 1 > remove ++ ++ This will remove all of the mdev matrix device's sysfs structures including ++ the mdev device itself. To recreate and reconfigure the mdev matrix device, ++ all of the steps starting with step 3 will have to be performed again. Note ++ that the remove will fail if a guest using the mdev is still running. ++ ++ It is not necessary to remove an mdev matrix device, but one may want to ++ remove it if no guest will use it during the remaining lifetime of the linux ++ host. If the mdev matrix device is removed, one may want to also reconfigure ++ the pool of adapters and queues reserved for use by the default drivers. ++ ++Limitations ++=========== ++* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN ++ to the default drivers pool of a queue that is still assigned to a mediated ++ device in use by a guest. It is incumbent upon the administrator to ++ ensure there is no mediated device in use by a guest to which the APQN is ++ assigned lest the host be given access to the private data of the AP queue ++ device, such as a private key configured specifically for the guest. ++ ++* Dynamically modifying the AP matrix for a running guest (which would amount to ++ hot(un)plug of AP devices for the guest) is currently not supported ++ ++* Live guest migration is not supported for guests using AP devices. +-- +1.8.3.1 + diff --git a/0075-vnc-call-sasl_server_init-only-when-required.patch b/0075-vnc-call-sasl_server_init-only-when-required.patch new file mode 100644 index 0000000..292688c --- /dev/null +++ b/0075-vnc-call-sasl_server_init-only-when-required.patch @@ -0,0 +1,89 @@ +From dbf0257cf3587d5580765cbd2040f370820fb5e3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Tue, 2 Oct 2018 12:34:03 +0100 +Subject: vnc: call sasl_server_init() only when required +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20181002123403.20747-2-marcandre.lureau@redhat.com> +Patchwork-id: 82356 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/1] vnc: call sasl_server_init() only when required +Bugzilla: 1609327 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Thomas Huth +RH-Acked-by: Danilo de Paula + +VNC server is calling sasl_server_init() during startup of QEMU, even +if SASL auth has not been enabled. + +This may create undesirable warnings like "Could not find keytab file: +/etc/qemu/krb5.tab" when the user didn't configure SASL on host and +started VNC server. + +Instead, only initialize SASL when needed. Note that HMP/QMP "change +vnc" calls vnc_display_open() again, which will initialize SASL if +needed. + +Fix assignment in if condition, while touching this code. + +Related to: +https://bugzilla.redhat.com/show_bug.cgi?id=1609327 + +Signed-off-by: Marc-André Lureau +Reviewed-by: Daniel P. Berrangé +Message-id: 20180907063634.359-1-marcandre.lureau@redhat.com +Signed-off-by: Gerd Hoffmann + +(cherry picked from commit b5dc0d7d565048fcf2767060261d8385805aced1) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1609327 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=18601393 +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + ui/vnc.c + Due to "qemu"->"qemu-kvm" rename. + +Signed-off-by: Marc-André Lureau +--- + ui/vnc.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/ui/vnc.c b/ui/vnc.c +index 050c421..b3fe7d7 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -3878,9 +3878,6 @@ void vnc_display_open(const char *id, Error **errp) + bool reverse = false; + const char *credid; + bool sasl = false; +-#ifdef CONFIG_VNC_SASL +- int saslErr; +-#endif + int acl = 0; + int lock_key_sync = 1; + int key_delay_ms; +@@ -4054,10 +4051,14 @@ void vnc_display_open(const char *id, Error **errp) + trace_vnc_auth_init(vd, 1, vd->ws_auth, vd->ws_subauth); + + #ifdef CONFIG_VNC_SASL +- if ((saslErr = sasl_server_init(NULL, "qemu-kvm")) != SASL_OK) { +- error_setg(errp, "Failed to initialize SASL auth: %s", +- sasl_errstring(saslErr, NULL, NULL)); +- goto fail; ++ if (sasl) { ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); ++ ++ if (saslErr != SASL_OK) { ++ error_setg(errp, "Failed to initialize SASL auth: %s", ++ sasl_errstring(saslErr, NULL, NULL)); ++ goto fail; ++ } + } + #endif + vd->lock_key_sync = lock_key_sync; +-- +1.8.3.1 + diff --git a/0076-nbd-server-fix-NBD_CMD_CACHE.patch b/0076-nbd-server-fix-NBD_CMD_CACHE.patch new file mode 100644 index 0000000..05c36f6 --- /dev/null +++ b/0076-nbd-server-fix-NBD_CMD_CACHE.patch @@ -0,0 +1,52 @@ +From c10de200e291af4a6a5cb41ac10e1ae7a2b9c5b2 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 10 Oct 2018 18:19:23 +0100 +Subject: nbd/server: fix NBD_CMD_CACHE + +RH-Author: John Snow +Message-id: <20181010181924.30470-2-jsnow@redhat.com> +Patchwork-id: 82576 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/2] nbd/server: fix NBD_CMD_CACHE +Bugzilla: 1636142 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula +RH-Acked-by: Thomas Huth + +From: Vladimir Sementsov-Ogievskiy + +We should not go to structured-read branch on CACHE command, fix that. + +Bug introduced in bc37b06a5cde24 "nbd/server: introduce NBD_CMD_CACHE" +with the whole feature and affects 3.0.0 release. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +CC: qemu-stable@nongnu.org +Message-Id: <20181003144738.70670-1-vsementsov@virtuozzo.com> +Reviewed-by: Eric Blake +[eblake: commit message typo fix] +Signed-off-by: Eric Blake +(cherry picked from commit 2f454defc23e1be78f2a96bad2877ce7829f61b4) +Signed-off-by: John Snow + +Signed-off-by: Danilo C. L. de Paula +--- + nbd/server.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/nbd/server.c b/nbd/server.c +index ea5fe0e..1ce3f44 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -2135,7 +2135,8 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request, + } + + if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) && +- request->len) { ++ request->len && request->type != NBD_CMD_CACHE) ++ { + return nbd_co_send_sparse_read(client, request->handle, request->from, + data, request->len, errp); + } +-- +1.8.3.1 + diff --git a/0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch b/0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch new file mode 100644 index 0000000..c876e37 --- /dev/null +++ b/0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch @@ -0,0 +1,96 @@ +From 24022cbbfd2230d4781a079d1856e0315895c8ce Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 10 Oct 2018 18:19:24 +0100 +Subject: nbd: fix NBD_FLAG_SEND_CACHE value + +RH-Author: John Snow +Message-id: <20181010181924.30470-3-jsnow@redhat.com> +Patchwork-id: 82578 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 2/2] nbd: fix NBD_FLAG_SEND_CACHE value +Bugzilla: 1636142 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula +RH-Acked-by: Thomas Huth + +From: "Denis V. Lunev" + +Commit bc37b06a5 added NBD_CMD_CACHE support, but used the wrong value +for NBD_FLAG_SEND_CACHE flag for negotiation. That commit picked bit 8, +which had already been assigned by the NBD specification to mean +NBD_FLAG_CAN_MULTI_CONN, and which was already implemented in the +Linux kernel as a part of stable userspace-kernel API since 4.10: + +"bit 8, NBD_FLAG_CAN_MULTI_CONN: Indicates that the server operates +entirely without cache, or that the cache it uses is shared among all +connections to the given device. In particular, if this flag is +present, then the effects of NBD_CMD_FLUSH and NBD_CMD_FLAG_FUA +MUST be visible across all connections when the server sends its reply +to that command to the client. In the absense of this flag, clients +SHOULD NOT multiplex their commands over more than one connection to +the export. +... +bit 10, NBD_FLAG_SEND_CACHE: documents that the server understands +NBD_CMD_CACHE; however, note that server implementations exist +which support the command without advertising this bit, and +conversely that this bit does not guarantee that the command will +succeed or have an impact." + +Consequences: +- a client trying to use NBD_CMD_CACHE per the NBD spec will not +see the feature as available from a qemu 3.0 server (not fatal, +clients already have to be prepared for caching to not exist) +- a client accidentally coded to the qemu 3.0 bit value instead +of following the spec may interpret NBD_CMD_CACHE as being available +when it is not (probably not fatal, the spec says the server should +gracefully fail unknown commands, and that clients of NBD_CMD_CACHE +should be prepared for failure even when the feature is advertised); +such clients are unlikely (perhaps only in unreleased Virtuozzo code), +and will disappear over time +- a client prepared to use multiple connections based on +NBD_FLAG_CAN_MULTI_CONN may cause data corruption when it assumes +that caching is consistent when in reality qemu 3.0 did not have +a consistent cache. Partially mitigated by using read-only +connections (where nothing needs to be flushed, so caching is +indeed consistent) or when using qemu-nbd with the default -e 1 +(at most one client at a time); visible only when using -e 2 or +more for a writable export. + +Thus the commit fixes negotiation flag in QEMU according to the +specification. + +Signed-off-by: Denis V. Lunev +CC: Vladimir Sementsov-Ogievskiy +CC: Valery Vdovin +CC: Eric Blake +CC: Paolo Bonzini +CC: qemu-stable@nongnu.org +Message-Id: <20181004100313.4253-1-den@openvz.org> +Reviewed-by: Eric Blake +[eblake: enhance commit message, add defines for unimplemented flags] +Signed-off-by: Eric Blake +(cherry picked from commit df91328adab8490367776d2b21b35d790a606120) +Signed-off-by: John Snow + +Signed-off-by: Danilo C. L. de Paula +--- + include/block/nbd.h | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 4638c83..a53b0cf 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -135,7 +135,9 @@ typedef struct NBDExtent { + #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ + #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ + #define NBD_FLAG_SEND_DF (1 << 7) /* Send DF (Do not Fragment) */ +-#define NBD_FLAG_SEND_CACHE (1 << 8) /* Send CACHE (prefetch) */ ++#define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Multi-client cache consistent */ ++#define NBD_FLAG_SEND_RESIZE (1 << 9) /* Send resize */ ++#define NBD_FLAG_SEND_CACHE (1 << 10) /* Send CACHE (prefetch) */ + + /* New-style handshake (global) flags, sent from server to client, and + control what will happen during handshake phase. */ +-- +1.8.3.1 + diff --git a/0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch b/0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch new file mode 100644 index 0000000..0e324f3 --- /dev/null +++ b/0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch @@ -0,0 +1,134 @@ +From ca570895f9825c8ed6691bb520341ac9e07bac5a Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:21:52 +0100 +Subject: block/linux-aio: acquire AioContext before + qemu_laio_process_completions + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-14-kwolf@redhat.com> +Patchwork-id: 82603 +O-Subject: [RHEL-8 qemu-kvm PATCH 23/44] block/linux-aio: acquire AioContext before qemu_laio_process_completions +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +From: Sergio Lopez + +In qemu_laio_process_completions_and_submit, the AioContext is acquired +before the ioq_submit iteration and after qemu_laio_process_completions, +but the latter is not thread safe either. + +This change avoids a number of random crashes when the Main Thread and +an IO Thread collide processing completions for the same AioContext. +This is an example of such crash: + + - The IO Thread is trying to acquire the AioContext at aio_co_enter, + which evidences that it didn't lock it before: + +Thread 3 (Thread 0x7fdfd8bd8700 (LWP 36743)): + #0 0x00007fdfe0dd542d in __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135 + #1 0x00007fdfe0dd0de6 in _L_lock_870 () at /lib64/libpthread.so.0 + #2 0x00007fdfe0dd0cdf in __GI___pthread_mutex_lock (mutex=mutex@entry=0x5631fde0e6c0) + at ../nptl/pthread_mutex_lock.c:114 + #3 0x00005631fc0603a7 in qemu_mutex_lock_impl (mutex=0x5631fde0e6c0, file=0x5631fc23520f "util/async.c", line=511) at util/qemu-thread-posix.c:66 + #4 0x00005631fc05b558 in aio_co_enter (ctx=0x5631fde0e660, co=0x7fdfcc0c2b40) at util/async.c:493 + #5 0x00005631fc05b5ac in aio_co_wake (co=) at util/async.c:478 + #6 0x00005631fbfc51ad in qemu_laio_process_completion (laiocb=) at block/linux-aio.c:104 + #7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670) + at block/linux-aio.c:222 + #8 0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670) + at block/linux-aio.c:237 + #9 0x00005631fc05d978 in aio_dispatch_handlers (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:406 + #10 0x00005631fc05e3ea in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=true) + at util/aio-posix.c:693 + #11 0x00005631fbd7ad96 in iothread_run (opaque=0x5631fde0e1c0) at iothread.c:64 + #12 0x00007fdfe0dcee25 in start_thread (arg=0x7fdfd8bd8700) at pthread_create.c:308 + #13 0x00007fdfe0afc34d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 + + - The Main Thread is also processing completions from the same + AioContext, and crashes due to failed assertion at util/iov.c:78: + +Thread 1 (Thread 0x7fdfeb5eac80 (LWP 36740)): + #0 0x00007fdfe0a391f7 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56 + #1 0x00007fdfe0a3a8e8 in __GI_abort () at abort.c:90 + #2 0x00007fdfe0a32266 in __assert_fail_base (fmt=0x7fdfe0b84e68 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset") + at assert.c:92 + #3 0x00007fdfe0a32312 in __GI___assert_fail (assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset") at assert.c:101 + #4 0x00005631fc065287 in iov_memset (iov=, iov_cnt=, offset=, offset@entry=65536, fillc=fillc@entry=0, bytes=15515191315812405248) at util/iov.c:78 + #5 0x00005631fc065a63 in qemu_iovec_memset (qiov=, offset=offset@entry=65536, fillc=fillc@entry=0, bytes=) at util/iov.c:410 + #6 0x00005631fbfc5178 in qemu_laio_process_completion (laiocb=0x7fdd920df630) at block/linux-aio.c:88 + #7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670) + at block/linux-aio.c:222 + #8 0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670) + at block/linux-aio.c:237 + #9 0x00005631fbfc54ed in qemu_laio_poll_cb (opaque=) at block/linux-aio.c:272 + #10 0x00005631fc05d85e in run_poll_handlers_once (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:497 + #11 0x00005631fc05e2ca in aio_poll (blocking=false, ctx=0x5631fde0e660) at util/aio-posix.c:574 + #12 0x00005631fc05e2ca in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=false) + at util/aio-posix.c:604 + #13 0x00005631fbfcb8a3 in bdrv_do_drained_begin (ignore_parent=, recursive=, bs=) at block/io.c:273 + #14 0x00005631fbfcb8a3 in bdrv_do_drained_begin (bs=0x5631fe8b6200, recursive=, parent=0x0, ignore_bds_parents=, poll=) at block/io.c:390 + #15 0x00005631fbfbcd2e in blk_drain (blk=0x5631fe83ac80) at block/block-backend.c:1590 + #16 0x00005631fbfbe138 in blk_remove_bs (blk=blk@entry=0x5631fe83ac80) at block/block-backend.c:774 + #17 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:401 + #18 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:449 + #19 0x00005631fbfc9a69 in commit_complete (job=0x5631fe8b94b0, opaque=0x7fdfcc1bb080) + at block/commit.c:92 + #20 0x00005631fbf7d662 in job_defer_to_main_loop_bh (opaque=0x7fdfcc1b4560) at job.c:973 + #21 0x00005631fc05ad41 in aio_bh_poll (bh=0x7fdfcc01ad90) at util/async.c:90 + #22 0x00005631fc05ad41 in aio_bh_poll (ctx=ctx@entry=0x5631fddffdb0) at util/async.c:118 + #23 0x00005631fc05e210 in aio_dispatch (ctx=0x5631fddffdb0) at util/aio-posix.c:436 + #24 0x00005631fc05ac1e in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:261 + #25 0x00007fdfeaae44c9 in g_main_context_dispatch (context=0x5631fde00140) at gmain.c:3201 + #26 0x00007fdfeaae44c9 in g_main_context_dispatch (context=context@entry=0x5631fde00140) at gmain.c:3854 + #27 0x00005631fc05d503 in main_loop_wait () at util/main-loop.c:215 + #28 0x00005631fc05d503 in main_loop_wait (timeout=) at util/main-loop.c:238 + #29 0x00005631fc05d503 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497 + #30 0x00005631fbd81412 in main_loop () at vl.c:1866 + #31 0x00005631fbc18ff3 in main (argc=, argv=, envp=) + at vl.c:4647 + + - A closer examination shows that s->io_q.in_flight appears to have + gone backwards: + +(gdb) frame 7 + #7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670) + at block/linux-aio.c:222 +222 qemu_laio_process_completion(laiocb); +(gdb) p s +$2 = (LinuxAioState *) 0x7fdfc0297670 +(gdb) p *s +$3 = {aio_context = 0x5631fde0e660, ctx = 0x7fdfeb43b000, e = {rfd = 33, wfd = 33}, io_q = {plugged = 0, + in_queue = 0, in_flight = 4294967280, blocked = false, pending = {sqh_first = 0x0, + sqh_last = 0x7fdfc0297698}}, completion_bh = 0x7fdfc0280ef0, event_idx = 21, event_max = 241} +(gdb) p/x s->io_q.in_flight +$4 = 0xfffffff0 + +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit e091f0e905a4481f347913420f327d427f18d9d4) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/linux-aio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 19eb922..217ce60 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -234,9 +234,9 @@ static void qemu_laio_process_completions(LinuxAioState *s) + + static void qemu_laio_process_completions_and_submit(LinuxAioState *s) + { ++ aio_context_acquire(s->aio_context); + qemu_laio_process_completions(s); + +- aio_context_acquire(s->aio_context); + if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { + ioq_submit(s); + } +-- +1.8.3.1 + diff --git a/0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch b/0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch new file mode 100644 index 0000000..7288227 --- /dev/null +++ b/0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch @@ -0,0 +1,78 @@ +From faa3d5106cb296858227cc240e045ca16cb28c81 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:21:53 +0100 +Subject: util/async: use qemu_aio_coroutine_enter in co_schedule_bh_cb + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-15-kwolf@redhat.com> +Patchwork-id: 82604 +O-Subject: [RHEL-8 qemu-kvm PATCH 24/44] util/async: use qemu_aio_coroutine_enter in co_schedule_bh_cb +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +From: Sergio Lopez + +AIO Coroutines shouldn't by managed by an AioContext different than the +one assigned when they are created. aio_co_enter avoids entering a +coroutine from a different AioContext, calling aio_co_schedule instead. + +Scheduled coroutines are then entered by co_schedule_bh_cb using +qemu_coroutine_enter, which just calls qemu_aio_coroutine_enter with the +current AioContext obtained with qemu_get_current_aio_context. +Eventually, co->ctx will be set to the AioContext passed as an argument +to qemu_aio_coroutine_enter. + +This means that, if an IO Thread's AioConext is being processed by the +Main Thread (due to aio_poll being called with a BDS AioContext, as it +happens in AIO_WAIT_WHILE among other places), the AioContext from some +coroutines may be wrongly replaced with the one from the Main Thread. + +This is the root cause behind some crashes, mainly triggered by the +drain code at block/io.c. The most common are these abort and failed +assertion: + +util/async.c:aio_co_schedule +456 if (scheduled) { +457 fprintf(stderr, +458 "%s: Co-routine was already scheduled in '%s'\n", +459 __func__, scheduled); +460 abort(); +461 } + +util/qemu-coroutine-lock.c: +286 assert(mutex->holder == self); + +But it's also known to cause random errors at different locations, and +even SIGSEGV with broken coroutine backtraces. + +By using qemu_aio_coroutine_enter directly in co_schedule_bh_cb, we can +pass the correct AioContext as an argument, making sure co->ctx is not +wrongly altered. + +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 6808ae0417131f8dbe7b051256dff7a16634dc1d) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + util/async.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/util/async.c b/util/async.c +index 05979f8..c10642a 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -400,7 +400,7 @@ static void co_schedule_bh_cb(void *opaque) + + /* Protected by write barrier in qemu_aio_coroutine_enter */ + atomic_set(&co->scheduled, NULL); +- qemu_coroutine_enter(co); ++ qemu_aio_coroutine_enter(ctx, co); + aio_context_release(ctx); + } + } +-- +1.8.3.1 + diff --git a/0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch b/0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch new file mode 100644 index 0000000..568e097 --- /dev/null +++ b/0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch @@ -0,0 +1,105 @@ +From f78998e365809f77ed146ee2afdcf132b12c838c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:21:54 +0100 +Subject: job: Fix nested aio_poll() hanging in job_txn_apply + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-16-kwolf@redhat.com> +Patchwork-id: 82605 +O-Subject: [RHEL-8 qemu-kvm PATCH 25/44] job: Fix nested aio_poll() hanging in job_txn_apply +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +From: Fam Zheng + +All callers have acquired ctx already. Doing that again results in +aio_poll() hang. This fixes the problem that a BDRV_POLL_WHILE() in the +callback cannot make progress because ctx is recursively locked, for +example, when drive-backup finishes. + +There are two callers of job_finalize(): + + fam@lemon:~/work/qemu [master]$ git grep -w -A1 '^\s*job_finalize' + blockdev.c: job_finalize(&job->job, errp); + blockdev.c- aio_context_release(aio_context); + -- + job-qmp.c: job_finalize(job, errp); + job-qmp.c- aio_context_release(aio_context); + -- + tests/test-blockjob.c: job_finalize(&job->job, &error_abort); + tests/test-blockjob.c- assert(job->job.status == JOB_STATUS_CONCLUDED); + +Ignoring the test, it's easy to see both callers to job_finalize (and +job_do_finalize) have acquired the context. + +Cc: qemu-stable@nongnu.org +Reported-by: Gu Nini +Reviewed-by: Eric Blake +Signed-off-by: Fam Zheng +Signed-off-by: Kevin Wolf +(cherry picked from commit 49880165a44f26dc84651858750facdee31f2513) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + job.c | 18 +++++------------- + 1 file changed, 5 insertions(+), 13 deletions(-) + +diff --git a/job.c b/job.c +index bb322de..82b4692 100644 +--- a/job.c ++++ b/job.c +@@ -136,21 +136,13 @@ static void job_txn_del_job(Job *job) + } + } + +-static int job_txn_apply(JobTxn *txn, int fn(Job *), bool lock) ++static int job_txn_apply(JobTxn *txn, int fn(Job *)) + { +- AioContext *ctx; + Job *job, *next; + int rc = 0; + + QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { +- if (lock) { +- ctx = job->aio_context; +- aio_context_acquire(ctx); +- } + rc = fn(job); +- if (lock) { +- aio_context_release(ctx); +- } + if (rc) { + break; + } +@@ -780,11 +772,11 @@ static void job_do_finalize(Job *job) + assert(job && job->txn); + + /* prepare the transaction to complete */ +- rc = job_txn_apply(job->txn, job_prepare, true); ++ rc = job_txn_apply(job->txn, job_prepare); + if (rc) { + job_completed_txn_abort(job); + } else { +- job_txn_apply(job->txn, job_finalize_single, true); ++ job_txn_apply(job->txn, job_finalize_single); + } + } + +@@ -830,10 +822,10 @@ static void job_completed_txn_success(Job *job) + assert(other_job->ret == 0); + } + +- job_txn_apply(txn, job_transition_to_pending, false); ++ job_txn_apply(txn, job_transition_to_pending); + + /* If no jobs need manual finalization, automatically do so */ +- if (job_txn_apply(txn, job_needs_finalize, false) == 0) { ++ if (job_txn_apply(txn, job_needs_finalize) == 0) { + job_do_finalize(job); + } + } +-- +1.8.3.1 + diff --git a/0081-job-Fix-missing-locking-due-to-mismerge.patch b/0081-job-Fix-missing-locking-due-to-mismerge.patch new file mode 100644 index 0000000..76074d2 --- /dev/null +++ b/0081-job-Fix-missing-locking-due-to-mismerge.patch @@ -0,0 +1,55 @@ +From bb58f00a6c09bd1fe9af6dabe9ea173adc406d7b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:21:55 +0100 +Subject: job: Fix missing locking due to mismerge + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-17-kwolf@redhat.com> +Patchwork-id: 82607 +O-Subject: [RHEL-8 qemu-kvm PATCH 26/44] job: Fix missing locking due to mismerge +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +job_completed() had a problem with double locking that was recently +fixed independently by two different commits: + +"job: Fix nested aio_poll() hanging in job_txn_apply" +"jobs: add exit shim" + +One fix removed the first aio_context_acquire(), the other fix removed +the other one. Now we have a bug again and the code is run without any +locking. + +Add it back in one of the places. + +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +Reviewed-by: John Snow +(cherry picked from commit d1756c780b7879fb64e41135feac781d84a1f995) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + job.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/job.c b/job.c +index 82b4692..5c4e84f 100644 +--- a/job.c ++++ b/job.c +@@ -847,7 +847,11 @@ static void job_completed(Job *job) + static void job_exit(void *opaque) + { + Job *job = (Job *)opaque; ++ AioContext *ctx = job->aio_context; ++ ++ aio_context_acquire(ctx); + job_completed(job); ++ aio_context_release(ctx); + } + + /** +-- +1.8.3.1 + diff --git a/0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch b/0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch new file mode 100644 index 0000000..f51df38 --- /dev/null +++ b/0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch @@ -0,0 +1,161 @@ +From ac751d8909fa4b734fab48e27c0213df48ffd76b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:21:56 +0100 +Subject: blockjob: Wake up BDS when job becomes idle + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-18-kwolf@redhat.com> +Patchwork-id: 82610 +O-Subject: [RHEL-8 qemu-kvm PATCH 27/44] blockjob: Wake up BDS when job becomes idle +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +In the context of draining a BDS, the .drained_poll callback of block +jobs is called. If this returns true (i.e. there is still some activity +pending), the drain operation may call aio_poll() with blocking=true to +wait for completion. + +As soon as the pending activity is completed and the job finally arrives +in a quiescent state (i.e. its coroutine either yields with busy=false +or terminates), the block job must notify the aio_poll() loop to wake +up, otherwise we get a deadlock if both are running in different +threads. + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +Reviewed-by: Max Reitz +(cherry picked from commit 34dc97b9a0e592bc466bdb0bbfe45d77304a72b6) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + blockjob.c | 18 ++++++++++++++++++ + include/block/blockjob.h | 13 +++++++++++++ + include/qemu/job.h | 3 +++ + job.c | 7 +++++++ + 4 files changed, 41 insertions(+) + +diff --git a/blockjob.c b/blockjob.c +index be5903a..8d27e8e 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -221,6 +221,22 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + return 0; + } + ++void block_job_wakeup_all_bdrv(BlockJob *job) ++{ ++ GSList *l; ++ ++ for (l = job->nodes; l; l = l->next) { ++ BdrvChild *c = l->data; ++ bdrv_wakeup(c->bs); ++ } ++} ++ ++static void block_job_on_idle(Notifier *n, void *opaque) ++{ ++ BlockJob *job = opaque; ++ block_job_wakeup_all_bdrv(job); ++} ++ + bool block_job_is_internal(BlockJob *job) + { + return (job->job.id == NULL); +@@ -419,6 +435,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, + job->finalize_completed_notifier.notify = block_job_event_completed; + job->pending_notifier.notify = block_job_event_pending; + job->ready_notifier.notify = block_job_event_ready; ++ job->idle_notifier.notify = block_job_on_idle; + + notifier_list_add(&job->job.on_finalize_cancelled, + &job->finalize_cancelled_notifier); +@@ -426,6 +443,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, + &job->finalize_completed_notifier); + notifier_list_add(&job->job.on_pending, &job->pending_notifier); + notifier_list_add(&job->job.on_ready, &job->ready_notifier); ++ notifier_list_add(&job->job.on_idle, &job->idle_notifier); + + error_setg(&job->blocker, "block device is in use by block job: %s", + job_type_str(&job->job)); +diff --git a/include/block/blockjob.h b/include/block/blockjob.h +index 32c00b7..2290bbb 100644 +--- a/include/block/blockjob.h ++++ b/include/block/blockjob.h +@@ -70,6 +70,9 @@ typedef struct BlockJob { + /** Called when the job transitions to READY */ + Notifier ready_notifier; + ++ /** Called when the job coroutine yields or terminates */ ++ Notifier idle_notifier; ++ + /** BlockDriverStates that are involved in this block job */ + GSList *nodes; + } BlockJob; +@@ -119,6 +122,16 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + void block_job_remove_all_bdrv(BlockJob *job); + + /** ++ * block_job_wakeup_all_bdrv: ++ * @job: The block job ++ * ++ * Calls bdrv_wakeup() for all BlockDriverStates that have been added to the ++ * job. This function is to be called whenever child_job_drained_poll() would ++ * go from true to false to notify waiting drain requests. ++ */ ++void block_job_wakeup_all_bdrv(BlockJob *job); ++ ++/** + * block_job_set_speed: + * @job: The job to set the speed for. + * @speed: The new value +diff --git a/include/qemu/job.h b/include/qemu/job.h +index 5cb0681..b4a784d 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -156,6 +156,9 @@ typedef struct Job { + /** Notifiers called when the job transitions to READY */ + NotifierList on_ready; + ++ /** Notifiers called when the job coroutine yields or terminates */ ++ NotifierList on_idle; ++ + /** Element of the list of jobs */ + QLIST_ENTRY(Job) job_list; + +diff --git a/job.c b/job.c +index 5c4e84f..48a767c 100644 +--- a/job.c ++++ b/job.c +@@ -402,6 +402,11 @@ static void job_event_ready(Job *job) + notifier_list_notify(&job->on_ready, job); + } + ++static void job_event_idle(Job *job) ++{ ++ notifier_list_notify(&job->on_idle, job); ++} ++ + void job_enter_cond(Job *job, bool(*fn)(Job *job)) + { + if (!job_started(job)) { +@@ -447,6 +452,7 @@ static void coroutine_fn job_do_yield(Job *job, uint64_t ns) + timer_mod(&job->sleep_timer, ns); + } + job->busy = false; ++ job_event_idle(job); + job_unlock(); + qemu_coroutine_yield(); + +@@ -865,6 +871,7 @@ static void coroutine_fn job_co_entry(void *opaque) + assert(job && job->driver && job->driver->run); + job_pause_point(job); + job->ret = job->driver->run(job, &job->err); ++ job_event_idle(job); + job->deferred_to_main_loop = true; + aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); + } +-- +1.8.3.1 + diff --git a/0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch b/0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch new file mode 100644 index 0000000..14b67eb --- /dev/null +++ b/0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch @@ -0,0 +1,64 @@ +From 0e651f939d3fd65071a8edc8090a777bdb45b921 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:21:57 +0100 +Subject: aio-wait: Increase num_waiters even in home thread + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-19-kwolf@redhat.com> +Patchwork-id: 82609 +O-Subject: [RHEL-8 qemu-kvm PATCH 28/44] aio-wait: Increase num_waiters even in home thread +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +Even if AIO_WAIT_WHILE() is called in the home context of the +AioContext, we still want to allow the condition to change depending on +other threads as long as they kick the AioWait. Specfically block jobs +can be running in an I/O thread and should then be able to kick a drain +in the main loop context. + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +(cherry picked from commit 486574483aba988c83b20e7d3f1ccd50c4c333d8) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + include/block/aio-wait.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index c85a62f..600fad1 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -76,6 +76,8 @@ typedef struct { + bool waited_ = false; \ + AioWait *wait_ = (wait); \ + AioContext *ctx_ = (ctx); \ ++ /* Increment wait_->num_waiters before evaluating cond. */ \ ++ atomic_inc(&wait_->num_waiters); \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ +@@ -84,8 +86,6 @@ typedef struct { + } else { \ + assert(qemu_get_current_aio_context() == \ + qemu_get_aio_context()); \ +- /* Increment wait_->num_waiters before evaluating cond. */ \ +- atomic_inc(&wait_->num_waiters); \ + while ((cond)) { \ + if (ctx_) { \ + aio_context_release(ctx_); \ +@@ -96,8 +96,8 @@ typedef struct { + } \ + waited_ = true; \ + } \ +- atomic_dec(&wait_->num_waiters); \ + } \ ++ atomic_dec(&wait_->num_waiters); \ + waited_; }) + + /** +-- +1.8.3.1 + diff --git a/0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch b/0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch new file mode 100644 index 0000000..38d58ff --- /dev/null +++ b/0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch @@ -0,0 +1,208 @@ +From 6d374393478f0d57ec8cd338342687d043565662 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:21:58 +0100 +Subject: test-bdrv-drain: Drain with block jobs in an I/O thread + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-20-kwolf@redhat.com> +Patchwork-id: 82608 +O-Subject: [RHEL-8 qemu-kvm PATCH 29/44] test-bdrv-drain: Drain with block jobs in an I/O thread +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +This extends the existing drain test with a block job to include +variants where the block job runs in a different AioContext. + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +(cherry picked from commit f62c172959cd2b6de4dd8ba782e855d64d94764b) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/test-bdrv-drain.c | 92 +++++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 86 insertions(+), 6 deletions(-) + +diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c +index 9bcb3c7..3cf3ba3 100644 +--- a/tests/test-bdrv-drain.c ++++ b/tests/test-bdrv-drain.c +@@ -174,6 +174,28 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) + } + } + ++static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs) ++{ ++ if (drain_type != BDRV_DRAIN_ALL) { ++ aio_context_acquire(bdrv_get_aio_context(bs)); ++ } ++ do_drain_begin(drain_type, bs); ++ if (drain_type != BDRV_DRAIN_ALL) { ++ aio_context_release(bdrv_get_aio_context(bs)); ++ } ++} ++ ++static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs) ++{ ++ if (drain_type != BDRV_DRAIN_ALL) { ++ aio_context_acquire(bdrv_get_aio_context(bs)); ++ } ++ do_drain_end(drain_type, bs); ++ if (drain_type != BDRV_DRAIN_ALL) { ++ aio_context_release(bdrv_get_aio_context(bs)); ++ } ++} ++ + static void test_drv_cb_common(enum drain_type drain_type, bool recursive) + { + BlockBackend *blk; +@@ -785,11 +807,13 @@ BlockJobDriver test_job_driver = { + }, + }; + +-static void test_blockjob_common(enum drain_type drain_type) ++static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) + { + BlockBackend *blk_src, *blk_target; + BlockDriverState *src, *target; + BlockJob *job; ++ IOThread *iothread = NULL; ++ AioContext *ctx; + int ret; + + src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, +@@ -797,21 +821,31 @@ static void test_blockjob_common(enum drain_type drain_type) + blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk_src, src, &error_abort); + ++ if (use_iothread) { ++ iothread = iothread_new(); ++ ctx = iothread_get_aio_context(iothread); ++ blk_set_aio_context(blk_src, ctx); ++ } else { ++ ctx = qemu_get_aio_context(); ++ } ++ + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, + &error_abort); + blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk_target, target, &error_abort); + ++ aio_context_acquire(ctx); + job = block_job_create("job0", &test_job_driver, NULL, src, 0, BLK_PERM_ALL, + 0, 0, NULL, NULL, &error_abort); + block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); + job_start(&job->job); ++ aio_context_release(ctx); + + g_assert_cmpint(job->job.pause_count, ==, 0); + g_assert_false(job->job.paused); + g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ + +- do_drain_begin(drain_type, src); ++ do_drain_begin_unlocked(drain_type, src); + + if (drain_type == BDRV_DRAIN_ALL) { + /* bdrv_drain_all() drains both src and target */ +@@ -822,7 +856,14 @@ static void test_blockjob_common(enum drain_type drain_type) + g_assert_true(job->job.paused); + g_assert_false(job->job.busy); /* The job is paused */ + +- do_drain_end(drain_type, src); ++ do_drain_end_unlocked(drain_type, src); ++ ++ if (use_iothread) { ++ /* paused is reset in the I/O thread, wait for it */ ++ while (job->job.paused) { ++ aio_poll(qemu_get_aio_context(), false); ++ } ++ } + + g_assert_cmpint(job->job.pause_count, ==, 0); + g_assert_false(job->job.paused); +@@ -841,32 +882,64 @@ static void test_blockjob_common(enum drain_type drain_type) + + do_drain_end(drain_type, target); + ++ if (use_iothread) { ++ /* paused is reset in the I/O thread, wait for it */ ++ while (job->job.paused) { ++ aio_poll(qemu_get_aio_context(), false); ++ } ++ } ++ + g_assert_cmpint(job->job.pause_count, ==, 0); + g_assert_false(job->job.paused); + g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ + ++ aio_context_acquire(ctx); + ret = job_complete_sync(&job->job, &error_abort); + g_assert_cmpint(ret, ==, 0); + ++ if (use_iothread) { ++ blk_set_aio_context(blk_src, qemu_get_aio_context()); ++ } ++ aio_context_release(ctx); ++ + blk_unref(blk_src); + blk_unref(blk_target); + bdrv_unref(src); + bdrv_unref(target); ++ ++ if (iothread) { ++ iothread_join(iothread); ++ } + } + + static void test_blockjob_drain_all(void) + { +- test_blockjob_common(BDRV_DRAIN_ALL); ++ test_blockjob_common(BDRV_DRAIN_ALL, false); + } + + static void test_blockjob_drain(void) + { +- test_blockjob_common(BDRV_DRAIN); ++ test_blockjob_common(BDRV_DRAIN, false); + } + + static void test_blockjob_drain_subtree(void) + { +- test_blockjob_common(BDRV_SUBTREE_DRAIN); ++ test_blockjob_common(BDRV_SUBTREE_DRAIN, false); ++} ++ ++static void test_blockjob_iothread_drain_all(void) ++{ ++ test_blockjob_common(BDRV_DRAIN_ALL, true); ++} ++ ++static void test_blockjob_iothread_drain(void) ++{ ++ test_blockjob_common(BDRV_DRAIN, true); ++} ++ ++static void test_blockjob_iothread_drain_subtree(void) ++{ ++ test_blockjob_common(BDRV_SUBTREE_DRAIN, true); + } + + +@@ -1337,6 +1410,13 @@ int main(int argc, char **argv) + g_test_add_func("/bdrv-drain/blockjob/drain_subtree", + test_blockjob_drain_subtree); + ++ g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", ++ test_blockjob_iothread_drain_all); ++ g_test_add_func("/bdrv-drain/blockjob/iothread/drain", ++ test_blockjob_iothread_drain); ++ g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", ++ test_blockjob_iothread_drain_subtree); ++ + g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); + g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); + g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); +-- +1.8.3.1 + diff --git a/0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch b/0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch new file mode 100644 index 0000000..976ce6a --- /dev/null +++ b/0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch @@ -0,0 +1,86 @@ +From 99172abebcedfb48ca06d4c1bd0cd16372449600 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:21:59 +0100 +Subject: test-blockjob: Acquire AioContext around job_cancel_sync() + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-21-kwolf@redhat.com> +Patchwork-id: 82606 +O-Subject: [RHEL-8 qemu-kvm PATCH 30/44] test-blockjob: Acquire AioContext around job_cancel_sync() +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +All callers in QEMU proper hold the AioContext lock when calling +job_finish_sync(). test-blockjob should do the same when it calls the +function indirectly through job_cancel_sync(). + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +(cherry picked from commit 30c070a547322a5e41ce129d540bca3653b1a9c8) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + include/qemu/job.h | 6 ++++++ + tests/test-blockjob.c | 6 ++++++ + 2 files changed, 12 insertions(+) + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index b4a784d..63c60ef 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -524,6 +524,8 @@ void job_user_cancel(Job *job, bool force, Error **errp); + * + * Returns the return value from the job if the job actually completed + * during the call, or -ECANCELED if it was canceled. ++ * ++ * Callers must hold the AioContext lock of job->aio_context. + */ + int job_cancel_sync(Job *job); + +@@ -541,6 +543,8 @@ void job_cancel_sync_all(void); + * function). + * + * Returns the return value from the job. ++ * ++ * Callers must hold the AioContext lock of job->aio_context. + */ + int job_complete_sync(Job *job, Error **errp); + +@@ -566,6 +570,8 @@ void job_dismiss(Job **job, Error **errp); + * + * Returns 0 if the job is successfully completed, -ECANCELED if the job was + * cancelled before completing, and -errno in other error cases. ++ * ++ * Callers must hold the AioContext lock of job->aio_context. + */ + int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp); + +diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c +index de4c1c2..652d1e8 100644 +--- a/tests/test-blockjob.c ++++ b/tests/test-blockjob.c +@@ -223,6 +223,10 @@ static void cancel_common(CancelJob *s) + BlockJob *job = &s->common; + BlockBackend *blk = s->blk; + JobStatus sts = job->job.status; ++ AioContext *ctx; ++ ++ ctx = job->job.aio_context; ++ aio_context_acquire(ctx); + + job_cancel_sync(&job->job); + if (sts != JOB_STATUS_CREATED && sts != JOB_STATUS_CONCLUDED) { +@@ -232,6 +236,8 @@ static void cancel_common(CancelJob *s) + assert(job->job.status == JOB_STATUS_NULL); + job_unref(&job->job); + destroy_blk(blk); ++ ++ aio_context_release(ctx); + } + + static void test_cancel_created(void) +-- +1.8.3.1 + diff --git a/0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch b/0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch new file mode 100644 index 0000000..b16aa60 --- /dev/null +++ b/0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch @@ -0,0 +1,77 @@ +From 3f3282c8ffa29e3dbcf58618beefb36afe8ba79b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:00 +0100 +Subject: job: Use AIO_WAIT_WHILE() in job_finish_sync() + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-22-kwolf@redhat.com> +Patchwork-id: 82612 +O-Subject: [RHEL-8 qemu-kvm PATCH 31/44] job: Use AIO_WAIT_WHILE() in job_finish_sync() +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +job_finish_sync() needs to release the AioContext lock of the job before +calling aio_poll(). Otherwise, callbacks called by aio_poll() would +possibly take the lock a second time and run into a deadlock with a +nested AIO_WAIT_WHILE() call. + +Also, job_drain() without aio_poll() isn't necessarily enough to make +progress on a job, it could depend on bottom halves to be executed. + +Combine both open-coded while loops into a single AIO_WAIT_WHILE() call +that solves both of these problems. + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +Reviewed-by: Max Reitz +(cherry picked from commit de0fbe64806321fc3e6399bfab360553db87a41d) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + job.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +diff --git a/job.c b/job.c +index 48a767c..fa74558 100644 +--- a/job.c ++++ b/job.c +@@ -29,6 +29,7 @@ + #include "qemu/job.h" + #include "qemu/id.h" + #include "qemu/main-loop.h" ++#include "block/aio-wait.h" + #include "trace-root.h" + #include "qapi/qapi-events-job.h" + +@@ -962,6 +963,7 @@ void job_complete(Job *job, Error **errp) + int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) + { + Error *local_err = NULL; ++ AioWait dummy_wait = {}; + int ret; + + job_ref(job); +@@ -974,14 +976,10 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) + job_unref(job); + return -EBUSY; + } +- /* job_drain calls job_enter, and it should be enough to induce progress +- * until the job completes or moves to the main thread. */ +- while (!job->deferred_to_main_loop && !job_is_completed(job)) { +- job_drain(job); +- } +- while (!job_is_completed(job)) { +- aio_poll(qemu_get_aio_context(), true); +- } ++ ++ AIO_WAIT_WHILE(&dummy_wait, job->aio_context, ++ (job_drain(job), !job_is_completed(job))); ++ + ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret; + job_unref(job); + return ret; +-- +1.8.3.1 + diff --git a/0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch b/0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch new file mode 100644 index 0000000..978a41d --- /dev/null +++ b/0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch @@ -0,0 +1,59 @@ +From b9c555343b6567159effe1b3eb736fd1e02257bd Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:01 +0100 +Subject: test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-23-kwolf@redhat.com> +Patchwork-id: 82611 +O-Subject: [RHEL-8 qemu-kvm PATCH 32/44] test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +This is a regression test for a deadlock that occurred in block job +completion callbacks (via job_defer_to_main_loop) because the AioContext +lock was taken twice: once in job_finish_sync() and then again in +job_defer_to_main_loop_bh(). This would cause AIO_WAIT_WHILE() to hang. + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +(cherry picked from commit ae23dde9dd486e57e152a0ebc9802caddedc45fc) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/test-bdrv-drain.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c +index 3cf3ba3..05f3b55 100644 +--- a/tests/test-bdrv-drain.c ++++ b/tests/test-bdrv-drain.c +@@ -774,6 +774,15 @@ typedef struct TestBlockJob { + bool should_complete; + } TestBlockJob; + ++static int test_job_prepare(Job *job) ++{ ++ TestBlockJob *s = container_of(job, TestBlockJob, common.job); ++ ++ /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ ++ blk_flush(s->common.blk); ++ return 0; ++} ++ + static int coroutine_fn test_job_run(Job *job, Error **errp) + { + TestBlockJob *s = container_of(job, TestBlockJob, common.job); +@@ -804,6 +813,7 @@ BlockJobDriver test_job_driver = { + .drain = block_job_drain, + .run = test_job_run, + .complete = test_job_complete, ++ .prepare = test_job_prepare, + }, + }; + +-- +1.8.3.1 + diff --git a/0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch b/0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch new file mode 100644 index 0000000..1d2abee --- /dev/null +++ b/0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch @@ -0,0 +1,96 @@ +From 51c1069568d78941554c70f9084531c279899c83 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:02 +0100 +Subject: block: Add missing locking in bdrv_co_drain_bh_cb() + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-24-kwolf@redhat.com> +Patchwork-id: 82613 +O-Subject: [RHEL-8 qemu-kvm PATCH 33/44] block: Add missing locking in bdrv_co_drain_bh_cb() +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +bdrv_do_drained_begin/end() assume that they are called with the +AioContext lock of bs held. If we call drain functions from a coroutine +with the AioContext lock held, we yield and schedule a BH to move out of +coroutine context. This means that the lock for the home context of the +coroutine is released and must be re-acquired in the bottom half. + +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit aa1361d54aac43094b98024b8b6c804eb6e41661) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/io.c | 15 +++++++++++++++ + include/qemu/coroutine.h | 5 +++++ + util/qemu-coroutine.c | 5 +++++ + 3 files changed, 25 insertions(+) + +diff --git a/block/io.c b/block/io.c +index 7100344..914ba78 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -288,6 +288,18 @@ static void bdrv_co_drain_bh_cb(void *opaque) + BlockDriverState *bs = data->bs; + + if (bs) { ++ AioContext *ctx = bdrv_get_aio_context(bs); ++ AioContext *co_ctx = qemu_coroutine_get_aio_context(co); ++ ++ /* ++ * When the coroutine yielded, the lock for its home context was ++ * released, so we need to re-acquire it here. If it explicitly ++ * acquired a different context, the lock is still held and we don't ++ * want to lock it a second time (or AIO_WAIT_WHILE() would hang). ++ */ ++ if (ctx == co_ctx) { ++ aio_context_acquire(ctx); ++ } + bdrv_dec_in_flight(bs); + if (data->begin) { + bdrv_do_drained_begin(bs, data->recursive, data->parent, +@@ -296,6 +308,9 @@ static void bdrv_co_drain_bh_cb(void *opaque) + bdrv_do_drained_end(bs, data->recursive, data->parent, + data->ignore_bds_parents); + } ++ if (ctx == co_ctx) { ++ aio_context_release(ctx); ++ } + } else { + assert(data->begin); + bdrv_drain_all_begin(); +diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h +index 6f8a487..9801e7f 100644 +--- a/include/qemu/coroutine.h ++++ b/include/qemu/coroutine.h +@@ -90,6 +90,11 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co); + void coroutine_fn qemu_coroutine_yield(void); + + /** ++ * Get the AioContext of the given coroutine ++ */ ++AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co); ++ ++/** + * Get the currently executing coroutine + */ + Coroutine *coroutine_fn qemu_coroutine_self(void); +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 1ba4191..2295928 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -198,3 +198,8 @@ bool qemu_coroutine_entered(Coroutine *co) + { + return co->caller; + } ++ ++AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) ++{ ++ return co->ctx; ++} +-- +1.8.3.1 + diff --git a/0089-block-backend-Add-.drained_poll-callback.patch b/0089-block-backend-Add-.drained_poll-callback.patch new file mode 100644 index 0000000..64eae39 --- /dev/null +++ b/0089-block-backend-Add-.drained_poll-callback.patch @@ -0,0 +1,66 @@ +From ea3026a59a3772f84697af9b62b6272cfb41f40c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:03 +0100 +Subject: block-backend: Add .drained_poll callback + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-25-kwolf@redhat.com> +Patchwork-id: 82614 +O-Subject: [RHEL-8 qemu-kvm PATCH 34/44] block-backend: Add .drained_poll callback +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +A bdrv_drain operation must ensure that all parents are quiesced, this +includes BlockBackends. Otherwise, callbacks called by requests that are +completed on the BDS layer, but not quite yet on the BlockBackend layer +could still create new requests. + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +Reviewed-by: Max Reitz +(cherry picked from commit fe5258a503a87e69be37c9ac48799e293809386e) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index f2f75a9..2b837d1 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -121,6 +121,7 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options, + abort(); + } + static void blk_root_drained_begin(BdrvChild *child); ++static bool blk_root_drained_poll(BdrvChild *child); + static void blk_root_drained_end(BdrvChild *child); + + static void blk_root_change_media(BdrvChild *child, bool load); +@@ -294,6 +295,7 @@ static const BdrvChildRole child_root = { + .get_parent_desc = blk_root_get_parent_desc, + + .drained_begin = blk_root_drained_begin, ++ .drained_poll = blk_root_drained_poll, + .drained_end = blk_root_drained_end, + + .activate = blk_root_activate, +@@ -2192,6 +2194,13 @@ static void blk_root_drained_begin(BdrvChild *child) + } + } + ++static bool blk_root_drained_poll(BdrvChild *child) ++{ ++ BlockBackend *blk = child->opaque; ++ assert(blk->quiesce_counter); ++ return !!blk->in_flight; ++} ++ + static void blk_root_drained_end(BdrvChild *child) + { + BlockBackend *blk = child->opaque; +-- +1.8.3.1 + diff --git a/0090-block-backend-Fix-potential-double-blk_delete.patch b/0090-block-backend-Fix-potential-double-blk_delete.patch new file mode 100644 index 0000000..56799a1 --- /dev/null +++ b/0090-block-backend-Fix-potential-double-blk_delete.patch @@ -0,0 +1,67 @@ +From 21a2ef76c6aa33f0058d149b1bfdde1d27ba1df4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:04 +0100 +Subject: block-backend: Fix potential double blk_delete() + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-26-kwolf@redhat.com> +Patchwork-id: 82615 +O-Subject: [RHEL-8 qemu-kvm PATCH 35/44] block-backend: Fix potential double blk_delete() +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +blk_unref() first decreases the refcount of the BlockBackend and calls +blk_delete() if the refcount reaches zero. Requests can still be in +flight at this point, they are only drained during blk_delete(): + +At this point, arbitrary callbacks can run. If any callback takes a +temporary BlockBackend reference, it will first increase the refcount to +1 and then decrease it to 0 again, triggering another blk_delete(). This +will cause a use-after-free crash in the outer blk_delete(). + +Fix it by draining the BlockBackend before decreasing to refcount to 0. +Assert in blk_ref() that it never takes the first refcount (which would +mean that the BlockBackend is already being deleted). + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +Reviewed-by: Max Reitz +(cherry picked from commit 5ca9d21bd1c8eeb578d0964e31bd03d47c25773d) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 2b837d1..94046f0 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -436,6 +436,7 @@ int blk_get_refcnt(BlockBackend *blk) + */ + void blk_ref(BlockBackend *blk) + { ++ assert(blk->refcnt > 0); + blk->refcnt++; + } + +@@ -448,7 +449,13 @@ void blk_unref(BlockBackend *blk) + { + if (blk) { + assert(blk->refcnt > 0); +- if (!--blk->refcnt) { ++ if (blk->refcnt > 1) { ++ blk->refcnt--; ++ } else { ++ blk_drain(blk); ++ /* blk_drain() cannot resurrect blk, nobody held a reference */ ++ assert(blk->refcnt == 1); ++ blk->refcnt = 0; + blk_delete(blk); + } + } +-- +1.8.3.1 + diff --git a/0091-block-backend-Decrease-in_flight-only-after-callback.patch b/0091-block-backend-Decrease-in_flight-only-after-callback.patch new file mode 100644 index 0000000..69805e9 --- /dev/null +++ b/0091-block-backend-Decrease-in_flight-only-after-callback.patch @@ -0,0 +1,74 @@ +From 91ae719381f75ed3554b0c5e1d8bf58583a9208f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:05 +0100 +Subject: block-backend: Decrease in_flight only after callback + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-27-kwolf@redhat.com> +Patchwork-id: 82617 +O-Subject: [RHEL-8 qemu-kvm PATCH 36/44] block-backend: Decrease in_flight only after callback +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +Request callbacks can do pretty much anything, including operations that +will yield from the coroutine (such as draining the backend). In that +case, a decreased in_flight would be visible to other code and could +lead to a drain completing while the callback hasn't actually completed +yet. + +Note that reordering these operations forbids calling drain directly +inside an AIO callback. As Paolo explains, indirectly calling it is +okay: + +- Calling it through a coroutine is okay, because then + bdrv_drained_begin() goes through bdrv_co_yield_to_drain() and you + have in_flight=2 when bdrv_co_yield_to_drain() yields, then soon + in_flight=1 when the aio_co_wake() in the AIO callback completes, then + in_flight=0 after the bottom half starts. + +- Calling it through a bottom half would be okay too, as long as the AIO + callback remembers to do inc_in_flight/dec_in_flight just like + bdrv_co_yield_to_drain() and bdrv_co_drain_bh_cb() do + +A few more important cases that come to mind: + +- A coroutine that yields because of I/O is okay, with a sequence + similar to bdrv_co_yield_to_drain(). + +- A coroutine that yields with no I/O pending will correctly decrease + in_flight to zero before yielding. + +- Calling more AIO from the callback won't overflow the counter just + because of mutual recursion, because AIO functions always yield at + least once before invoking the callback. + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +Reviewed-by: Max Reitz +Reviewed-by: Paolo Bonzini +(cherry picked from commit 46aaf2a566e364a62315219255099cbf1c9b990d) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 94046f0..9a3e060 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1341,8 +1341,8 @@ static const AIOCBInfo blk_aio_em_aiocb_info = { + static void blk_aio_complete(BlkAioEmAIOCB *acb) + { + if (acb->has_returned) { +- blk_dec_in_flight(acb->rwco.blk); + acb->common.cb(acb->common.opaque, acb->rwco.ret); ++ blk_dec_in_flight(acb->rwco.blk); + qemu_aio_unref(acb); + } + } +-- +1.8.3.1 + diff --git a/0092-blockjob-Lie-better-in-child_job_drained_poll.patch b/0092-blockjob-Lie-better-in-child_job_drained_poll.patch new file mode 100644 index 0000000..64f5aaa --- /dev/null +++ b/0092-blockjob-Lie-better-in-child_job_drained_poll.patch @@ -0,0 +1,104 @@ +From bc17446b1e7c9578a3e3079173891c93998dfa00 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:06 +0100 +Subject: blockjob: Lie better in child_job_drained_poll() + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-28-kwolf@redhat.com> +Patchwork-id: 82616 +O-Subject: [RHEL-8 qemu-kvm PATCH 37/44] blockjob: Lie better in child_job_drained_poll() +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +Block jobs claim in .drained_poll() that they are in a quiescent state +as soon as job->deferred_to_main_loop is true. This is obviously wrong, +they still have a completion BH to run. We only get away with this +because commit 91af091f923 added an unconditional aio_poll(false) to the +drain functions, but this is bypassing the regular drain mechanisms. + +However, just removing this and telling that the job is still active +doesn't work either: The completion callbacks themselves call drain +functions (directly, or indirectly with bdrv_reopen), so they would +deadlock then. + +As a better lie, tell that the job is active as long as the BH is +pending, but falsely call it quiescent from the point in the BH when the +completion callback is called. At this point, nested drain calls won't +deadlock because they ignore the job, and outer drains will wait for the +job to really reach a quiescent state because the callback is already +running. + +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit b5a7a0573530698ee448b063ac01d485e30446bd) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + blockjob.c | 2 +- + include/qemu/job.h | 3 +++ + job.c | 11 ++++++++++- + 3 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/blockjob.c b/blockjob.c +index 8d27e8e..617d86f 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -164,7 +164,7 @@ static bool child_job_drained_poll(BdrvChild *c) + /* An inactive or completed job doesn't have any pending requests. Jobs + * with !job->busy are either already paused or have a pause point after + * being reentered, so no job driver code will run before they pause. */ +- if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) { ++ if (!job->busy || job_is_completed(job)) { + return false; + } + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index 63c60ef..9e7cd1e 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -76,6 +76,9 @@ typedef struct Job { + * Set to false by the job while the coroutine has yielded and may be + * re-entered by job_enter(). There may still be I/O or event loop activity + * pending. Accessed under block_job_mutex (in blockjob.c). ++ * ++ * When the job is deferred to the main loop, busy is true as long as the ++ * bottom half is still pending. + */ + bool busy; + +diff --git a/job.c b/job.c +index fa74558..00a1cd1 100644 +--- a/job.c ++++ b/job.c +@@ -857,7 +857,16 @@ static void job_exit(void *opaque) + AioContext *ctx = job->aio_context; + + aio_context_acquire(ctx); ++ ++ /* This is a lie, we're not quiescent, but still doing the completion ++ * callbacks. However, completion callbacks tend to involve operations that ++ * drain block nodes, and if .drained_poll still returned true, we would ++ * deadlock. */ ++ job->busy = false; ++ job_event_idle(job); ++ + job_completed(job); ++ + aio_context_release(ctx); + } + +@@ -872,8 +881,8 @@ static void coroutine_fn job_co_entry(void *opaque) + assert(job && job->driver && job->driver->run); + job_pause_point(job); + job->ret = job->driver->run(job, &job->err); +- job_event_idle(job); + job->deferred_to_main_loop = true; ++ job->busy = true; + aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); + } + +-- +1.8.3.1 + diff --git a/0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch b/0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch new file mode 100644 index 0000000..60cdc36 --- /dev/null +++ b/0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch @@ -0,0 +1,64 @@ +From ce7a9c21d6a43b736d5aa2041acbd5d1edca0070 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:07 +0100 +Subject: block: Remove aio_poll() in bdrv_drain_poll variants + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-29-kwolf@redhat.com> +Patchwork-id: 82619 +O-Subject: [RHEL-8 qemu-kvm PATCH 38/44] block: Remove aio_poll() in bdrv_drain_poll variants +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +bdrv_drain_poll_top_level() was buggy because it didn't release the +AioContext lock of the node to be drained before calling aio_poll(). +This way, callbacks called by aio_poll() would possibly take the lock a +second time and run into a deadlock with a nested AIO_WAIT_WHILE() call. + +However, it turns out that the aio_poll() call isn't actually needed any +more. It was introduced in commit 91af091f923, which is effectively +reverted by this patch. The cases it was supposed to fix are now covered +by bdrv_drain_poll(), which waits for block jobs to reach a quiescent +state. + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +Reviewed-by: Max Reitz +(cherry picked from commit 4cf077b59fc73eec29f8b7d082919dbb278bdc86) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/io.c | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 914ba78..8b81ff3 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -268,10 +268,6 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent) + { +- /* Execute pending BHs first and check everything else only after the BHs +- * have executed. */ +- while (aio_poll(bs->aio_context, false)); +- + return bdrv_drain_poll(bs, recursive, ignore_parent, false); + } + +@@ -511,10 +507,6 @@ static bool bdrv_drain_all_poll(void) + BlockDriverState *bs = NULL; + bool result = false; + +- /* Execute pending BHs first (may modify the graph) and check everything +- * else only after the BHs have executed. */ +- while (aio_poll(qemu_get_aio_context(), false)); +- + /* bdrv_drain_poll() can't make changes to the graph and we are holding the + * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ + while ((bs = bdrv_next_all_states(bs))) { +-- +1.8.3.1 + diff --git a/0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch b/0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch new file mode 100644 index 0000000..8de1ae2 --- /dev/null +++ b/0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch @@ -0,0 +1,63 @@ +From 6c315602205e494dd084a4692a06c16b0e233875 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:08 +0100 +Subject: test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level() + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-30-kwolf@redhat.com> +Patchwork-id: 82618 +O-Subject: [RHEL-8 qemu-kvm PATCH 39/44] test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level() +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +This is a regression test for a deadlock that could occur in callbacks +called from the aio_poll() in bdrv_drain_poll_top_level(). The +AioContext lock wasn't released and therefore would be taken a second +time in the callback. This would cause a possible AIO_WAIT_WHILE() in +the callback to hang. + +Signed-off-by: Kevin Wolf +Reviewed-by: Fam Zheng +(cherry picked from commit ecc1a5c790cf2c7732cb9755ca388c2fe108d1a1) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/test-bdrv-drain.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c +index 05f3b55..f4b57f7 100644 +--- a/tests/test-bdrv-drain.c ++++ b/tests/test-bdrv-drain.c +@@ -636,6 +636,17 @@ static void test_iothread_aio_cb(void *opaque, int ret) + qemu_event_set(&done_event); + } + ++static void test_iothread_main_thread_bh(void *opaque) ++{ ++ struct test_iothread_data *data = opaque; ++ ++ /* Test that the AioContext is not yet locked in a random BH that is ++ * executed during drain, otherwise this would deadlock. */ ++ aio_context_acquire(bdrv_get_aio_context(data->bs)); ++ bdrv_flush(data->bs); ++ aio_context_release(bdrv_get_aio_context(data->bs)); ++} ++ + /* + * Starts an AIO request on a BDS that runs in the AioContext of iothread 1. + * The request involves a BH on iothread 2 before it can complete. +@@ -705,6 +716,8 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + aio_context_acquire(ctx_a); + } + ++ aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, &data); ++ + /* The request is running on the IOThread a. Draining its block device + * will make sure that it has completed as far as the BDS is concerned, + * but the drain in this thread can continue immediately after +-- +1.8.3.1 + diff --git a/0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch b/0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch new file mode 100644 index 0000000..0b8a8fd --- /dev/null +++ b/0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch @@ -0,0 +1,85 @@ +From 287d4267dcb2d5f262dba7f6e7f35dcd294b622a Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:09 +0100 +Subject: job: Avoid deadlocks in job_completed_txn_abort() + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-31-kwolf@redhat.com> +Patchwork-id: 82622 +O-Subject: [RHEL-8 qemu-kvm PATCH 40/44] job: Avoid deadlocks in job_completed_txn_abort() +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +Amongst others, job_finalize_single() calls the .prepare/.commit/.abort +callbacks of the individual job driver. Recently, their use was adapted +for all block jobs so that they involve code calling AIO_WAIT_WHILE() +now. Such code must be called under the AioContext lock for the +respective job, but without holding any other AioContext lock. + +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit 644f3a29bd4974aefd46d2adb5062d86063c8a50) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + job.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff --git a/job.c b/job.c +index 00a1cd1..0b02186 100644 +--- a/job.c ++++ b/job.c +@@ -718,6 +718,7 @@ static void job_cancel_async(Job *job, bool force) + + static void job_completed_txn_abort(Job *job) + { ++ AioContext *outer_ctx = job->aio_context; + AioContext *ctx; + JobTxn *txn = job->txn; + Job *other_job; +@@ -731,23 +732,26 @@ static void job_completed_txn_abort(Job *job) + txn->aborting = true; + job_txn_ref(txn); + +- /* We are the first failed job. Cancel other jobs. */ +- QLIST_FOREACH(other_job, &txn->jobs, txn_list) { +- ctx = other_job->aio_context; +- aio_context_acquire(ctx); +- } ++ /* We can only hold the single job's AioContext lock while calling ++ * job_finalize_single() because the finalization callbacks can involve ++ * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */ ++ aio_context_release(outer_ctx); + + /* Other jobs are effectively cancelled by us, set the status for + * them; this job, however, may or may not be cancelled, depending + * on the caller, so leave it. */ + QLIST_FOREACH(other_job, &txn->jobs, txn_list) { + if (other_job != job) { ++ ctx = other_job->aio_context; ++ aio_context_acquire(ctx); + job_cancel_async(other_job, false); ++ aio_context_release(ctx); + } + } + while (!QLIST_EMPTY(&txn->jobs)) { + other_job = QLIST_FIRST(&txn->jobs); + ctx = other_job->aio_context; ++ aio_context_acquire(ctx); + if (!job_is_completed(other_job)) { + assert(job_is_cancelled(other_job)); + job_finish_sync(other_job, NULL, NULL); +@@ -756,6 +760,8 @@ static void job_completed_txn_abort(Job *job) + aio_context_release(ctx); + } + ++ aio_context_acquire(outer_ctx); ++ + job_txn_unref(txn); + } + +-- +1.8.3.1 + diff --git a/0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch b/0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch new file mode 100644 index 0000000..44cdf9f --- /dev/null +++ b/0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch @@ -0,0 +1,241 @@ +From 10fbd3c89739a1879f47f2a2256831ce5e1ae7ad Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:10 +0100 +Subject: test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-32-kwolf@redhat.com> +Patchwork-id: 82620 +O-Subject: [RHEL-8 qemu-kvm PATCH 41/44] test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +This adds tests for calling AIO_WAIT_WHILE() in the .commit and .abort +callbacks. Both reasons why .abort could be called for a single job are +tested: Either .run or .prepare could return an error. + +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit d49725af46a7710cde02cc120b7f1e485154b483) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/test-bdrv-drain.c | 116 +++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 104 insertions(+), 12 deletions(-) + +diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c +index f4b57f7..d6202b2 100644 +--- a/tests/test-bdrv-drain.c ++++ b/tests/test-bdrv-drain.c +@@ -784,6 +784,8 @@ static void test_iothread_drain_subtree(void) + + typedef struct TestBlockJob { + BlockJob common; ++ int run_ret; ++ int prepare_ret; + bool should_complete; + } TestBlockJob; + +@@ -793,7 +795,23 @@ static int test_job_prepare(Job *job) + + /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ + blk_flush(s->common.blk); +- return 0; ++ return s->prepare_ret; ++} ++ ++static void test_job_commit(Job *job) ++{ ++ TestBlockJob *s = container_of(job, TestBlockJob, common.job); ++ ++ /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ ++ blk_flush(s->common.blk); ++} ++ ++static void test_job_abort(Job *job) ++{ ++ TestBlockJob *s = container_of(job, TestBlockJob, common.job); ++ ++ /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ ++ blk_flush(s->common.blk); + } + + static int coroutine_fn test_job_run(Job *job, Error **errp) +@@ -809,7 +827,7 @@ static int coroutine_fn test_job_run(Job *job, Error **errp) + job_pause_point(&s->common.job); + } + +- return 0; ++ return s->run_ret; + } + + static void test_job_complete(Job *job, Error **errp) +@@ -827,14 +845,24 @@ BlockJobDriver test_job_driver = { + .run = test_job_run, + .complete = test_job_complete, + .prepare = test_job_prepare, ++ .commit = test_job_commit, ++ .abort = test_job_abort, + }, + }; + +-static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) ++enum test_job_result { ++ TEST_JOB_SUCCESS, ++ TEST_JOB_FAIL_RUN, ++ TEST_JOB_FAIL_PREPARE, ++}; ++ ++static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, ++ enum test_job_result result) + { + BlockBackend *blk_src, *blk_target; + BlockDriverState *src, *target; + BlockJob *job; ++ TestBlockJob *tjob; + IOThread *iothread = NULL; + AioContext *ctx; + int ret; +@@ -858,9 +886,23 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) + blk_insert_bs(blk_target, target, &error_abort); + + aio_context_acquire(ctx); +- job = block_job_create("job0", &test_job_driver, NULL, src, 0, BLK_PERM_ALL, +- 0, 0, NULL, NULL, &error_abort); ++ tjob = block_job_create("job0", &test_job_driver, NULL, src, ++ 0, BLK_PERM_ALL, ++ 0, 0, NULL, NULL, &error_abort); ++ job = &tjob->common; + block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); ++ ++ switch (result) { ++ case TEST_JOB_SUCCESS: ++ break; ++ case TEST_JOB_FAIL_RUN: ++ tjob->run_ret = -EIO; ++ break; ++ case TEST_JOB_FAIL_PREPARE: ++ tjob->prepare_ret = -EIO; ++ break; ++ } ++ + job_start(&job->job); + aio_context_release(ctx); + +@@ -918,7 +960,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) + + aio_context_acquire(ctx); + ret = job_complete_sync(&job->job, &error_abort); +- g_assert_cmpint(ret, ==, 0); ++ g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); + + if (use_iothread) { + blk_set_aio_context(blk_src, qemu_get_aio_context()); +@@ -937,32 +979,68 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) + + static void test_blockjob_drain_all(void) + { +- test_blockjob_common(BDRV_DRAIN_ALL, false); ++ test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS); + } + + static void test_blockjob_drain(void) + { +- test_blockjob_common(BDRV_DRAIN, false); ++ test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); + } + + static void test_blockjob_drain_subtree(void) + { +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false); ++ test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); ++} ++ ++static void test_blockjob_error_drain_all(void) ++{ ++ test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); ++ test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_PREPARE); ++} ++ ++static void test_blockjob_error_drain(void) ++{ ++ test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_RUN); ++ test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); ++} ++ ++static void test_blockjob_error_drain_subtree(void) ++{ ++ test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); ++ test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); + } + + static void test_blockjob_iothread_drain_all(void) + { +- test_blockjob_common(BDRV_DRAIN_ALL, true); ++ test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); + } + + static void test_blockjob_iothread_drain(void) + { +- test_blockjob_common(BDRV_DRAIN, true); ++ test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); + } + + static void test_blockjob_iothread_drain_subtree(void) + { +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true); ++ test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); ++} ++ ++static void test_blockjob_iothread_error_drain_all(void) ++{ ++ test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); ++ test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_PREPARE); ++} ++ ++static void test_blockjob_iothread_error_drain(void) ++{ ++ test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_RUN); ++ test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); ++} ++ ++static void test_blockjob_iothread_error_drain_subtree(void) ++{ ++ test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); ++ test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); + } + + +@@ -1433,6 +1511,13 @@ int main(int argc, char **argv) + g_test_add_func("/bdrv-drain/blockjob/drain_subtree", + test_blockjob_drain_subtree); + ++ g_test_add_func("/bdrv-drain/blockjob/error/drain_all", ++ test_blockjob_error_drain_all); ++ g_test_add_func("/bdrv-drain/blockjob/error/drain", ++ test_blockjob_error_drain); ++ g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", ++ test_blockjob_error_drain_subtree); ++ + g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", + test_blockjob_iothread_drain_all); + g_test_add_func("/bdrv-drain/blockjob/iothread/drain", +@@ -1440,6 +1525,13 @@ int main(int argc, char **argv) + g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", + test_blockjob_iothread_drain_subtree); + ++ g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", ++ test_blockjob_iothread_error_drain_all); ++ g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", ++ test_blockjob_iothread_error_drain); ++ g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", ++ test_blockjob_iothread_error_drain_subtree); ++ + g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); + g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); + g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); +-- +1.8.3.1 + diff --git a/0097-test-bdrv-drain-Fix-outdated-comments.patch b/0097-test-bdrv-drain-Fix-outdated-comments.patch new file mode 100644 index 0000000..08e50a7 --- /dev/null +++ b/0097-test-bdrv-drain-Fix-outdated-comments.patch @@ -0,0 +1,69 @@ +From 1eaa60bc24cb3fecba8da61f21c44e6f4c9ee4c1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:11 +0100 +Subject: test-bdrv-drain: Fix outdated comments + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-33-kwolf@redhat.com> +Patchwork-id: 82621 +O-Subject: [RHEL-8 qemu-kvm PATCH 42/44] test-bdrv-drain: Fix outdated comments +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +Commit 89bd030533e changed the test case from using job_sleep_ns() to +using qemu_co_sleep_ns() instead. Also, block_job_sleep_ns() became +job_sleep_ns() in commit 5d43e86e11f. + +In both cases, some comments in the test case were not updated. Do that +now. + +Reported-by: Max Reitz +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry picked from commit 5599c162c3bec2bc8f0123e4d5802a70d9984b3b) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/test-bdrv-drain.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c +index d6202b2..7e7ba9b 100644 +--- a/tests/test-bdrv-drain.c ++++ b/tests/test-bdrv-drain.c +@@ -820,9 +820,9 @@ static int coroutine_fn test_job_run(Job *job, Error **errp) + + job_transition_to_ready(&s->common.job); + while (!s->should_complete) { +- /* Avoid block_job_sleep_ns() because it marks the job as !busy. We +- * want to emulate some actual activity (probably some I/O) here so +- * that drain has to wait for this acitivity to stop. */ ++ /* Avoid job_sleep_ns() because it marks the job as !busy. We want to ++ * emulate some actual activity (probably some I/O) here so that drain ++ * has to wait for this activity to stop. */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + job_pause_point(&s->common.job); + } +@@ -908,7 +908,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + + g_assert_cmpint(job->job.pause_count, ==, 0); + g_assert_false(job->job.paused); +- g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ ++ g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ + + do_drain_begin_unlocked(drain_type, src); + +@@ -956,7 +956,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + + g_assert_cmpint(job->job.pause_count, ==, 0); + g_assert_false(job->job.paused); +- g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ ++ g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ + + aio_context_acquire(ctx); + ret = job_complete_sync(&job->job, &error_abort); +-- +1.8.3.1 + diff --git a/0098-block-Use-a-single-global-AioWait.patch b/0098-block-Use-a-single-global-AioWait.patch new file mode 100644 index 0000000..4fc26b8 --- /dev/null +++ b/0098-block-Use-a-single-global-AioWait.patch @@ -0,0 +1,367 @@ +From ea2355d819127ace6195e1d007bc305a49e7d465 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:12 +0100 +Subject: block: Use a single global AioWait + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-34-kwolf@redhat.com> +Patchwork-id: 82623 +O-Subject: [RHEL-8 qemu-kvm PATCH 43/44] block: Use a single global AioWait +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +When draining a block node, we recurse to its parent and for subtree +drains also to its children. A single AIO_WAIT_WHILE() is then used to +wait for bdrv_drain_poll() to become true, which depends on all of the +nodes we recursed to. However, if the respective child or parent becomes +quiescent and calls bdrv_wakeup(), only the AioWait of the child/parent +is checked, while AIO_WAIT_WHILE() depends on the AioWait of the +original node. + +Fix this by using a single AioWait for all callers of AIO_WAIT_WHILE(). + +This may mean that the draining thread gets a few more unnecessary +wakeups because an unrelated operation got completed, but we already +wake it up when something _could_ have changed rather than only if it +has certainly changed. + +Apart from that, drain is a slow path anyway. In theory it would be +possible to use wakeups more selectively and still correctly, but the +gains are likely not worth the additional complexity. In fact, this +patch is a nice simplification for some places in the code. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Max Reitz +(cherry picked from commit cfe29d8294e06420e15d4938421ae006c8ac49e7) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 5 ----- + block/block-backend.c | 11 ++++------- + block/io.c | 7 ++----- + blockjob.c | 13 +------------ + include/block/aio-wait.h | 22 +++++++++++----------- + include/block/block.h | 6 +----- + include/block/block_int.h | 3 --- + include/block/blockjob.h | 10 ---------- + job.c | 3 +-- + util/aio-wait.c | 11 ++++++----- + 10 files changed, 26 insertions(+), 65 deletions(-) + +diff --git a/block.c b/block.c +index 39f373e..9b55956 100644 +--- a/block.c ++++ b/block.c +@@ -4865,11 +4865,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs) + return bs ? bs->aio_context : qemu_get_aio_context(); + } + +-AioWait *bdrv_get_aio_wait(BlockDriverState *bs) +-{ +- return bs ? &bs->wait : NULL; +-} +- + void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) + { + aio_co_enter(bdrv_get_aio_context(bs), co); +diff --git a/block/block-backend.c b/block/block-backend.c +index 9a3e060..723ab5a 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -88,7 +88,6 @@ struct BlockBackend { + * Accessed with atomic ops. + */ + unsigned int in_flight; +- AioWait wait; + }; + + typedef struct BlockBackendAIOCB { +@@ -1300,7 +1299,7 @@ static void blk_inc_in_flight(BlockBackend *blk) + static void blk_dec_in_flight(BlockBackend *blk) + { + atomic_dec(&blk->in_flight); +- aio_wait_kick(&blk->wait); ++ aio_wait_kick(); + } + + static void error_callback_bh(void *opaque) +@@ -1601,9 +1600,8 @@ void blk_drain(BlockBackend *blk) + } + + /* We may have -ENOMEDIUM completions in flight */ +- AIO_WAIT_WHILE(&blk->wait, +- blk_get_aio_context(blk), +- atomic_mb_read(&blk->in_flight) > 0); ++ AIO_WAIT_WHILE(blk_get_aio_context(blk), ++ atomic_mb_read(&blk->in_flight) > 0); + + if (bs) { + bdrv_drained_end(bs); +@@ -1622,8 +1620,7 @@ void blk_drain_all(void) + aio_context_acquire(ctx); + + /* We may have -ENOMEDIUM completions in flight */ +- AIO_WAIT_WHILE(&blk->wait, ctx, +- atomic_mb_read(&blk->in_flight) > 0); ++ AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0); + + aio_context_release(ctx); + } +diff --git a/block/io.c b/block/io.c +index 8b81ff3..bd9d688 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -38,8 +38,6 @@ + /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ + #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) + +-static AioWait drain_all_aio_wait; +- + static void bdrv_parent_cb_resize(BlockDriverState *bs); + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int bytes, BdrvRequestFlags flags); +@@ -557,7 +555,7 @@ void bdrv_drain_all_begin(void) + } + + /* Now poll the in-flight requests */ +- AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll()); ++ AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll()); + + while ((bs = bdrv_next_all_states(bs))) { + bdrv_drain_assert_idle(bs); +@@ -713,8 +711,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs) + + void bdrv_wakeup(BlockDriverState *bs) + { +- aio_wait_kick(bdrv_get_aio_wait(bs)); +- aio_wait_kick(&drain_all_aio_wait); ++ aio_wait_kick(); + } + + void bdrv_dec_in_flight(BlockDriverState *bs) +diff --git a/blockjob.c b/blockjob.c +index 617d86f..06f2429 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -221,20 +221,9 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + return 0; + } + +-void block_job_wakeup_all_bdrv(BlockJob *job) +-{ +- GSList *l; +- +- for (l = job->nodes; l; l = l->next) { +- BdrvChild *c = l->data; +- bdrv_wakeup(c->bs); +- } +-} +- + static void block_job_on_idle(Notifier *n, void *opaque) + { +- BlockJob *job = opaque; +- block_job_wakeup_all_bdrv(job); ++ aio_wait_kick(); + } + + bool block_job_is_internal(BlockJob *job) +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index 600fad1..afd0ff7 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -30,14 +30,15 @@ + /** + * AioWait: + * +- * An object that facilitates synchronous waiting on a condition. The main +- * loop can wait on an operation running in an IOThread as follows: ++ * An object that facilitates synchronous waiting on a condition. A single ++ * global AioWait object (global_aio_wait) is used internally. ++ * ++ * The main loop can wait on an operation running in an IOThread as follows: + * +- * AioWait *wait = ...; + * AioContext *ctx = ...; + * MyWork work = { .done = false }; + * schedule_my_work_in_iothread(ctx, &work); +- * AIO_WAIT_WHILE(wait, ctx, !work.done); ++ * AIO_WAIT_WHILE(ctx, !work.done); + * + * The IOThread must call aio_wait_kick() to notify the main loop when + * work.done changes: +@@ -46,7 +47,7 @@ + * { + * ... + * work.done = true; +- * aio_wait_kick(wait); ++ * aio_wait_kick(); + * } + */ + typedef struct { +@@ -54,9 +55,10 @@ typedef struct { + unsigned num_waiters; + } AioWait; + ++extern AioWait global_aio_wait; ++ + /** + * AIO_WAIT_WHILE: +- * @wait: the aio wait object + * @ctx: the aio context, or NULL if multiple aio contexts (for which the + * caller does not hold a lock) are involved in the polling condition. + * @cond: wait while this conditional expression is true +@@ -72,9 +74,9 @@ typedef struct { + * wait on conditions between two IOThreads since that could lead to deadlock, + * go via the main loop instead. + */ +-#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \ ++#define AIO_WAIT_WHILE(ctx, cond) ({ \ + bool waited_ = false; \ +- AioWait *wait_ = (wait); \ ++ AioWait *wait_ = &global_aio_wait; \ + AioContext *ctx_ = (ctx); \ + /* Increment wait_->num_waiters before evaluating cond. */ \ + atomic_inc(&wait_->num_waiters); \ +@@ -102,14 +104,12 @@ typedef struct { + + /** + * aio_wait_kick: +- * @wait: the aio wait object that should re-evaluate its condition +- * + * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During + * synchronous operations performed in an IOThread, the main thread lets the + * IOThread's event loop run, waiting for the operation to complete. A + * aio_wait_kick() call will wake up the main thread. + */ +-void aio_wait_kick(AioWait *wait); ++void aio_wait_kick(void); + + /** + * aio_wait_bh_oneshot: +diff --git a/include/block/block.h b/include/block/block.h +index 4e0871a..4edc1e8 100644 +--- a/include/block/block.h ++++ b/include/block/block.h +@@ -410,13 +410,9 @@ void bdrv_drain_all_begin(void); + void bdrv_drain_all_end(void); + void bdrv_drain_all(void); + +-/* Returns NULL when bs == NULL */ +-AioWait *bdrv_get_aio_wait(BlockDriverState *bs); +- + #define BDRV_POLL_WHILE(bs, cond) ({ \ + BlockDriverState *bs_ = (bs); \ +- AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \ +- bdrv_get_aio_context(bs_), \ ++ AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \ + cond); }) + + int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes); +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 4000d2a..92ecbd8 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -794,9 +794,6 @@ struct BlockDriverState { + unsigned int in_flight; + unsigned int serialising_in_flight; + +- /* Kicked to signal main loop when a request completes. */ +- AioWait wait; +- + /* counter for nested bdrv_io_plug. + * Accessed with atomic ops. + */ +diff --git a/include/block/blockjob.h b/include/block/blockjob.h +index 2290bbb..ede0bd8 100644 +--- a/include/block/blockjob.h ++++ b/include/block/blockjob.h +@@ -122,16 +122,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + void block_job_remove_all_bdrv(BlockJob *job); + + /** +- * block_job_wakeup_all_bdrv: +- * @job: The block job +- * +- * Calls bdrv_wakeup() for all BlockDriverStates that have been added to the +- * job. This function is to be called whenever child_job_drained_poll() would +- * go from true to false to notify waiting drain requests. +- */ +-void block_job_wakeup_all_bdrv(BlockJob *job); +- +-/** + * block_job_set_speed: + * @job: The job to set the speed for. + * @speed: The new value +diff --git a/job.c b/job.c +index 0b02186..ed4da6f 100644 +--- a/job.c ++++ b/job.c +@@ -978,7 +978,6 @@ void job_complete(Job *job, Error **errp) + int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) + { + Error *local_err = NULL; +- AioWait dummy_wait = {}; + int ret; + + job_ref(job); +@@ -992,7 +991,7 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) + return -EBUSY; + } + +- AIO_WAIT_WHILE(&dummy_wait, job->aio_context, ++ AIO_WAIT_WHILE(job->aio_context, + (job_drain(job), !job_is_completed(job))); + + ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret; +diff --git a/util/aio-wait.c b/util/aio-wait.c +index b8a8f86..b487749 100644 +--- a/util/aio-wait.c ++++ b/util/aio-wait.c +@@ -26,21 +26,22 @@ + #include "qemu/main-loop.h" + #include "block/aio-wait.h" + ++AioWait global_aio_wait; ++ + static void dummy_bh_cb(void *opaque) + { + /* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */ + } + +-void aio_wait_kick(AioWait *wait) ++void aio_wait_kick(void) + { + /* The barrier (or an atomic op) is in the caller. */ +- if (atomic_read(&wait->num_waiters)) { ++ if (atomic_read(&global_aio_wait.num_waiters)) { + aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); + } + } + + typedef struct { +- AioWait wait; + bool done; + QEMUBHFunc *cb; + void *opaque; +@@ -54,7 +55,7 @@ static void aio_wait_bh(void *opaque) + data->cb(data->opaque); + + data->done = true; +- aio_wait_kick(&data->wait); ++ aio_wait_kick(); + } + + void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) +@@ -67,5 +68,5 @@ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + + aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data); +- AIO_WAIT_WHILE(&data.wait, ctx, !data.done); ++ AIO_WAIT_WHILE(ctx, !data.done); + } +-- +1.8.3.1 + diff --git a/0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch b/0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch new file mode 100644 index 0000000..5533b5f --- /dev/null +++ b/0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch @@ -0,0 +1,198 @@ +From f31ce5e7d486c860d44cb103b672f81de9bc537c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 10 Oct 2018 20:22:13 +0100 +Subject: test-bdrv-drain: Test draining job source child and parent + +RH-Author: Kevin Wolf +Message-id: <20181010202213.7372-35-kwolf@redhat.com> +Patchwork-id: 82624 +O-Subject: [RHEL-8 qemu-kvm PATCH 44/44] test-bdrv-drain: Test draining job source child and parent +Bugzilla: 1637976 +RH-Acked-by: Max Reitz +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +For the block job drain test, don't only test draining the source and +the target node, but create a backing chain for the source +(source_backing <- source <- source_overlay) and test draining each of +the nodes in it. + +When using iothreads, the source node (and therefore the job) is in a +different AioContext than the drain, which happens from the main +thread. This way, the main thread waits in AIO_WAIT_WHILE() for the +iothread to make process and aio_wait_kick() is required to notify it. +The test validates that calling bdrv_wakeup() for a child or a parent +node will actually notify AIO_WAIT_WHILE() instead of letting it hang. + +Increase the sleep time a bit (to 1 ms) because the test case is racy +and with the shorter sleep, it didn't reproduce the bug it is supposed +to test for me under 'rr record -n'. + +This was because bdrv_drain_invoke_entry() (in the main thread) was only +called after the job had already reached the pause point, so we got a +bdrv_dec_in_flight() from the main thread and the additional +aio_wait_kick() when the job becomes idle (that we really wanted to test +here) wasn't even necessary any more to make progress. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Max Reitz +(cherry picked from commit d8b3afd597d54e496809b05ac39ac29a5799664f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/test-bdrv-drain.c | 77 ++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 69 insertions(+), 8 deletions(-) + +diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c +index 7e7ba9b..8641b54 100644 +--- a/tests/test-bdrv-drain.c ++++ b/tests/test-bdrv-drain.c +@@ -786,6 +786,7 @@ typedef struct TestBlockJob { + BlockJob common; + int run_ret; + int prepare_ret; ++ bool running; + bool should_complete; + } TestBlockJob; + +@@ -818,12 +819,17 @@ static int coroutine_fn test_job_run(Job *job, Error **errp) + { + TestBlockJob *s = container_of(job, TestBlockJob, common.job); + ++ /* We are running the actual job code past the pause point in ++ * job_co_entry(). */ ++ s->running = true; ++ + job_transition_to_ready(&s->common.job); + while (!s->should_complete) { + /* Avoid job_sleep_ns() because it marks the job as !busy. We want to + * emulate some actual activity (probably some I/O) here so that drain + * has to wait for this activity to stop. */ +- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); ++ + job_pause_point(&s->common.job); + } + +@@ -856,11 +862,19 @@ enum test_job_result { + TEST_JOB_FAIL_PREPARE, + }; + +-static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, +- enum test_job_result result) ++enum test_job_drain_node { ++ TEST_JOB_DRAIN_SRC, ++ TEST_JOB_DRAIN_SRC_CHILD, ++ TEST_JOB_DRAIN_SRC_PARENT, ++}; ++ ++static void test_blockjob_common_drain_node(enum drain_type drain_type, ++ bool use_iothread, ++ enum test_job_result result, ++ enum test_job_drain_node drain_node) + { + BlockBackend *blk_src, *blk_target; +- BlockDriverState *src, *target; ++ BlockDriverState *src, *src_backing, *src_overlay, *target, *drain_bs; + BlockJob *job; + TestBlockJob *tjob; + IOThread *iothread = NULL; +@@ -869,8 +883,32 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + + src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, + &error_abort); ++ src_backing = bdrv_new_open_driver(&bdrv_test, "source-backing", ++ BDRV_O_RDWR, &error_abort); ++ src_overlay = bdrv_new_open_driver(&bdrv_test, "source-overlay", ++ BDRV_O_RDWR, &error_abort); ++ ++ bdrv_set_backing_hd(src_overlay, src, &error_abort); ++ bdrv_unref(src); ++ bdrv_set_backing_hd(src, src_backing, &error_abort); ++ bdrv_unref(src_backing); ++ + blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); +- blk_insert_bs(blk_src, src, &error_abort); ++ blk_insert_bs(blk_src, src_overlay, &error_abort); ++ ++ switch (drain_node) { ++ case TEST_JOB_DRAIN_SRC: ++ drain_bs = src; ++ break; ++ case TEST_JOB_DRAIN_SRC_CHILD: ++ drain_bs = src_backing; ++ break; ++ case TEST_JOB_DRAIN_SRC_PARENT: ++ drain_bs = src_overlay; ++ break; ++ default: ++ g_assert_not_reached(); ++ } + + if (use_iothread) { + iothread = iothread_new(); +@@ -906,11 +944,21 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + job_start(&job->job); + aio_context_release(ctx); + ++ if (use_iothread) { ++ /* job_co_entry() is run in the I/O thread, wait for the actual job ++ * code to start (we don't want to catch the job in the pause point in ++ * job_co_entry(). */ ++ while (!tjob->running) { ++ aio_poll(qemu_get_aio_context(), false); ++ } ++ } ++ + g_assert_cmpint(job->job.pause_count, ==, 0); + g_assert_false(job->job.paused); ++ g_assert_true(tjob->running); + g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ + +- do_drain_begin_unlocked(drain_type, src); ++ do_drain_begin_unlocked(drain_type, drain_bs); + + if (drain_type == BDRV_DRAIN_ALL) { + /* bdrv_drain_all() drains both src and target */ +@@ -921,7 +969,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + g_assert_true(job->job.paused); + g_assert_false(job->job.busy); /* The job is paused */ + +- do_drain_end_unlocked(drain_type, src); ++ do_drain_end_unlocked(drain_type, drain_bs); + + if (use_iothread) { + /* paused is reset in the I/O thread, wait for it */ +@@ -969,7 +1017,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + + blk_unref(blk_src); + blk_unref(blk_target); +- bdrv_unref(src); ++ bdrv_unref(src_overlay); + bdrv_unref(target); + + if (iothread) { +@@ -977,6 +1025,19 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + } + } + ++static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, ++ enum test_job_result result) ++{ ++ test_blockjob_common_drain_node(drain_type, use_iothread, result, ++ TEST_JOB_DRAIN_SRC); ++ test_blockjob_common_drain_node(drain_type, use_iothread, result, ++ TEST_JOB_DRAIN_SRC_CHILD); ++ if (drain_type == BDRV_SUBTREE_DRAIN) { ++ test_blockjob_common_drain_node(drain_type, use_iothread, result, ++ TEST_JOB_DRAIN_SRC_PARENT); ++ } ++} ++ + static void test_blockjob_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS); +-- +1.8.3.1 + diff --git a/0100-block-rbd-pull-out-qemu_rbd_convert_options.patch b/0100-block-rbd-pull-out-qemu_rbd_convert_options.patch new file mode 100644 index 0000000..a88abfb --- /dev/null +++ b/0100-block-rbd-pull-out-qemu_rbd_convert_options.patch @@ -0,0 +1,95 @@ +From 5fcd80dc34d84739e75e6d1ec5e21ad73af14ff9 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 10 Oct 2018 20:30:12 +0100 +Subject: block/rbd: pull out qemu_rbd_convert_options + +RH-Author: John Snow +Message-id: <20181010203015.11719-2-jsnow@redhat.com> +Patchwork-id: 82627 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/4] block/rbd: pull out qemu_rbd_convert_options +Bugzilla: 1635585 +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth + +From: Jeff Cody + +Code movement to pull the conversion from Qdict to BlockdevOptionsRbd +into a helper function. + +Reviewed-by: Eric Blake +Reviewed-by: John Snow +Signed-off-by: Jeff Cody +Message-id: 5b49a980f2cde6610ab1df41bb0277d00b5db893.1536704901.git.jcody@redhat.com +Signed-off-by: Jeff Cody +(cherry picked from commit f24b03b56cdb28d753b4ff9ae210d555f14cb0d8) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/rbd.c | 36 ++++++++++++++++++++++++------------ + 1 file changed, 24 insertions(+), 12 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index ca8e5bb..b199450 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -655,12 +655,34 @@ failed_opts: + return r; + } + ++static int qemu_rbd_convert_options(QDict *options, BlockdevOptionsRbd **opts, ++ Error **errp) ++{ ++ Visitor *v; ++ Error *local_err = NULL; ++ ++ /* Convert the remaining options into a QAPI object */ ++ v = qobject_input_visitor_new_flat_confused(options, errp); ++ if (!v) { ++ return -EINVAL; ++ } ++ ++ visit_type_BlockdevOptionsRbd(v, NULL, opts, &local_err); ++ visit_free(v); ++ ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ + static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) + { + BDRVRBDState *s = bs->opaque; + BlockdevOptionsRbd *opts = NULL; +- Visitor *v; + const QDictEntry *e; + Error *local_err = NULL; + char *keypairs, *secretid; +@@ -676,19 +698,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, + qdict_del(options, "password-secret"); + } + +- /* Convert the remaining options into a QAPI object */ +- v = qobject_input_visitor_new_flat_confused(options, errp); +- if (!v) { +- r = -EINVAL; +- goto out; +- } +- +- visit_type_BlockdevOptionsRbd(v, NULL, &opts, &local_err); +- visit_free(v); +- ++ r = qemu_rbd_convert_options(options, &opts, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- r = -EINVAL; + goto out; + } + +-- +1.8.3.1 + diff --git a/0101-block-rbd-Attempt-to-parse-legacy-filenames.patch b/0101-block-rbd-Attempt-to-parse-legacy-filenames.patch new file mode 100644 index 0000000..24add45 --- /dev/null +++ b/0101-block-rbd-Attempt-to-parse-legacy-filenames.patch @@ -0,0 +1,120 @@ +From 6198ce651b242298fa6f5cc7ba79eb168789899c Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 10 Oct 2018 20:30:13 +0100 +Subject: block/rbd: Attempt to parse legacy filenames + +RH-Author: John Snow +Message-id: <20181010203015.11719-3-jsnow@redhat.com> +Patchwork-id: 82629 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 2/4] block/rbd: Attempt to parse legacy filenames +Bugzilla: 1635585 +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth + +From: Jeff Cody + +When we converted rbd to get rid of the older key/value-centric +encoding format, we broke compatibility with image files with backing +file strings encoded in the old format. + +This leaves a bit of an ugly conundrum, and a hacky solution. + +If the initial attempt to parse the "proper" options fails, it assumes +that we may have an older key/value encoded filename. Fall back to +attempting to parse the filename, and extract the required options from +it. If that fails, pass along the original error message. + +We do not support mixed modern usage alongside legacy keyvalue pair +usage. + +A deprecation warning has been added, although care should be taken +when actually deprecating since the impact is not limited to +commandline or qapi usage, but also opening existing images. + +Reviewed-by: Eric Blake +Signed-off-by: Jeff Cody +Message-id: 15b332e5432ad069441f7275a46080f465d789a0.1536704901.git.jcody@redhat.com +Signed-off-by: Jeff Cody +(cherry picked from commit 084d1d13bdb753d558b991996e7686c077bd6d80) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + block/rbd.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 52 insertions(+), 2 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index b199450..014c68d 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -678,6 +678,33 @@ static int qemu_rbd_convert_options(QDict *options, BlockdevOptionsRbd **opts, + return 0; + } + ++static int qemu_rbd_attempt_legacy_options(QDict *options, ++ BlockdevOptionsRbd **opts, ++ char **keypairs) ++{ ++ char *filename; ++ int r; ++ ++ filename = g_strdup(qdict_get_try_str(options, "filename")); ++ if (!filename) { ++ return -EINVAL; ++ } ++ qdict_del(options, "filename"); ++ ++ qemu_rbd_parse_filename(filename, options, NULL); ++ ++ /* keypairs freed by caller */ ++ *keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs")); ++ if (*keypairs) { ++ qdict_del(options, "=keyvalue-pairs"); ++ } ++ ++ r = qemu_rbd_convert_options(options, opts, NULL); ++ ++ g_free(filename); ++ return r; ++} ++ + static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) + { +@@ -700,8 +727,31 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, + + r = qemu_rbd_convert_options(options, &opts, &local_err); + if (local_err) { +- error_propagate(errp, local_err); +- goto out; ++ /* If keypairs are present, that means some options are present in ++ * the modern option format. Don't attempt to parse legacy option ++ * formats, as we won't support mixed usage. */ ++ if (keypairs) { ++ error_propagate(errp, local_err); ++ goto out; ++ } ++ ++ /* If the initial attempt to convert and process the options failed, ++ * we may be attempting to open an image file that has the rbd options ++ * specified in the older format consisting of all key/value pairs ++ * encoded in the filename. Go ahead and attempt to parse the ++ * filename, and see if we can pull out the required options. */ ++ r = qemu_rbd_attempt_legacy_options(options, &opts, &keypairs); ++ if (r < 0) { ++ /* Propagate the original error, not the legacy parsing fallback ++ * error, as the latter was just a best-effort attempt. */ ++ error_propagate(errp, local_err); ++ goto out; ++ } ++ /* Take care whenever deciding to actually deprecate; once this ability ++ * is removed, we will not be able to open any images with legacy-styled ++ * backing image strings. */ ++ error_report("RBD options encoded in the filename as keyvalue pairs " ++ "is deprecated"); + } + + /* Remove the processed options from the QDict (the visitor processes +-- +1.8.3.1 + diff --git a/0102-block-rbd-add-deprecation-documentation-for-filename.patch b/0102-block-rbd-add-deprecation-documentation-for-filename.patch new file mode 100644 index 0000000..fdd94ea --- /dev/null +++ b/0102-block-rbd-add-deprecation-documentation-for-filename.patch @@ -0,0 +1,59 @@ +From aed464e31c9f6d92aa67960dc1a0891461393305 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Tue, 11 Sep 2018 18:32:33 -0400 +Subject: block/rbd: add deprecation documentation for filename keyvalue pairs + +RH-Author: John Snow +Message-id: <20181010203015.11719-5-jsnow@redhat.com> +Patchwork-id: 82625 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 4/4] block/rbd: add deprecation docume +Bugzilla: 1635585 +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth + +From: Jeff Cody + +Signed-off-by: Jeff Cody +Message-id: 647f5b5ab7efd8bf567a504c832b1d2d6f719b23.1536704901.git.jcody@re +Signed-off-by: Jeff Cody +(cherry picked from commit 3bebd37e04f972775b1ece1bdda95451bc9fb14c) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: John Snow + +Rebase notes (3.0.0): +- Used upstream version +--- + qemu-deprecated.texi | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi +index 9920a85..cff0e8b 100644 +--- a/qemu-deprecated.texi ++++ b/qemu-deprecated.texi +@@ -227,6 +227,21 @@ from old QEMU versions anymore. A newer machine type should be used instead. + In order to prevent QEMU from automatically opening an image's backing + chain, use ``"backing": null'' instead. + ++@subsubsection rbd keyvalue pair encoded filenames: "" (since 3.1.0) ++ ++Options for ``rbd'' should be specified according to its runtime options, ++like other block drivers. Legacy parsing of keyvalue pair encoded ++filenames is useful to open images with the old format for backing files; ++These image files should be updated to use the current format. ++ ++Example of legacy encoding: ++ ++@code{json:@{"file.driver":"rbd", "file.filename":"rbd:rbd/name"@}} ++ ++The above, converted to the current supported format: ++ ++@code{json:@{"file.driver":"rbd", "file.pool":"rbd", "file.image":"name"@}} ++ + @subsection vio-spapr-device device options + + @subsubsection "irq": "" (since 3.0.0) +-- +1.8.3.1 + diff --git a/0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch b/0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch new file mode 100644 index 0000000..397826f --- /dev/null +++ b/0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch @@ -0,0 +1,141 @@ +From 76eb6df2dfd755e7cbda2eb07df464d25f9d73c5 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 10 Oct 2018 20:30:14 +0100 +Subject: block/rbd: add iotest for rbd legacy keyvalue filename parsing + +RH-Author: John Snow +Message-id: <20181010203015.11719-4-jsnow@redhat.com> +Patchwork-id: 82628 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 3/4] block/rbd: add iotest for rbd legacy keyvalue filename parsing +Bugzilla: 1635585 +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth + +From: Jeff Cody + +This is a small test that will check for the ability to parse +both legacy and modern options for rbd. + +The way the test is set up is for failure to occur, but without +having to wait to timeout on a non-existent rbd server. The error +messages in the success path show that the arguments were parsed. + +The failure behavior prior to the patch series that has this test, is +qemu-img complaining about mandatory options (e.g. 'pool') not being +provided. + +Reviewed-by: Eric Blake +Signed-off-by: Jeff Cody +Message-id: f830580e339b974a83ed4870d11adcdc17f49a47.1536704901.git.jcody@redhat.com +Signed-off-by: Jeff Cody +(cherry picked from commit 66e6a735e97450ac50fcaf40f78600c688534cae) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + tests/qemu-iotests/group: context (missing prior tests) +Signed-off-by: John Snow +--- + tests/qemu-iotests/231 | 62 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/231.out | 9 +++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 72 insertions(+) + create mode 100755 tests/qemu-iotests/231 + create mode 100644 tests/qemu-iotests/231.out + +diff --git a/tests/qemu-iotests/231 b/tests/qemu-iotests/231 +new file mode 100755 +index 0000000..3e28370 +--- /dev/null ++++ b/tests/qemu-iotests/231 +@@ -0,0 +1,62 @@ ++#!/bin/bash ++# ++# Test legacy and modern option parsing for rbd/ceph. This will not ++# actually connect to a ceph server, but rather looks for the appropriate ++# error message that indicates we parsed the options correctly. ++# ++# Copyright (C) 2018 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=jcody@redhat.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++here=`pwd` ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ rm "${BOGUS_CONF}" ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt generic ++_supported_proto rbd ++_supported_os Linux ++ ++BOGUS_CONF=${TEST_DIR}/ceph-$$.conf ++touch "${BOGUS_CONF}" ++ ++_filter_conf() ++{ ++ sed -e "s#$BOGUS_CONF#BOGUS_CONF#g" ++} ++ ++# We expect this to fail, with no monitor ip provided and a null conf file. Just want it ++# to fail in the right way. ++$QEMU_IMG info "json:{'file.driver':'rbd','file.filename':'rbd:rbd/bogus:conf=${BOGUS_CONF}'}" 2>&1 | _filter_conf ++$QEMU_IMG info "json:{'file.driver':'rbd','file.pool':'rbd','file.image':'bogus','file.conf':'${BOGUS_CONF}'}" 2>&1 | _filter_conf ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/231.out b/tests/qemu-iotests/231.out +new file mode 100644 +index 0000000..579ba11 +--- /dev/null ++++ b/tests/qemu-iotests/231.out +@@ -0,0 +1,9 @@ ++QA output created by 231 ++qemu-img: RBD options encoded in the filename as keyvalue pairs is deprecated. Future versions may cease to parse these options in the future. ++unable to get monitor info from DNS SRV with service name: ceph-mon ++no monitors specified to connect to. ++qemu-img: Could not open 'json:{'file.driver':'rbd','file.filename':'rbd:rbd/bogus:conf=BOGUS_CONF'}': error connecting: No such file or directory ++unable to get monitor info from DNS SRV with service name: ceph-mon ++no monitors specified to connect to. ++qemu-img: Could not open 'json:{'file.driver':'rbd','file.pool':'rbd','file.image':'bogus','file.conf':'BOGUS_CONF'}': error connecting: No such file or directory ++*** done +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 23ab4d3..cc1ca7e 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -226,3 +226,4 @@ + 226 auto quick + 227 auto quick + 229 auto quick ++231 auto quick +-- +1.8.3.1 + diff --git a/0104-luks-Allow-share-rw-on.patch b/0104-luks-Allow-share-rw-on.patch new file mode 100644 index 0000000..bdb3f47 --- /dev/null +++ b/0104-luks-Allow-share-rw-on.patch @@ -0,0 +1,52 @@ +From 8a08519b748ec41c6f542f4ef9406647269db18c Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Fri, 28 Sep 2018 06:09:52 +0100 +Subject: luks: Allow share-rw=on +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Fam Zheng +Message-id: <20180928060952.8616-2-famz@redhat.com> +Patchwork-id: 82311 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/1] luks: Allow share-rw=on +Bugzilla: 1629701 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth + +Format drivers such as qcow2 don't allow sharing the same image between +two QEMU instances in order to prevent image corruptions, because of +metadata cache. LUKS driver don't modify metadata except for when +creating image, so it is safe to relax the permission. This makes +share-rw=on property work on virtual devices. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Fam Zheng +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Kevin Wolf +(cherry picked from commit 497da8236ab2663a8108858ba7ea59aac21c5fe6) +Signed-off-by: Fam Zheng +Signed-off-by: Danilo C. L. de Paula +--- + block/crypto.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/crypto.c b/block/crypto.c +index 146d81c..33ee01b 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -627,7 +627,9 @@ BlockDriver bdrv_crypto_luks = { + .bdrv_probe = block_crypto_probe_luks, + .bdrv_open = block_crypto_open_luks, + .bdrv_close = block_crypto_close, +- .bdrv_child_perm = bdrv_format_default_perms, ++ /* This driver doesn't modify LUKS metadata except when creating image. ++ * Allow share-rw=on as a special case. */ ++ .bdrv_child_perm = bdrv_filter_default_perms, + .bdrv_co_create = block_crypto_co_create_luks, + .bdrv_co_create_opts = block_crypto_co_create_opts_luks, + .bdrv_co_truncate = block_crypto_co_truncate, +-- +1.8.3.1 + diff --git a/81-kvm-rhel.rules b/81-kvm-rhel.rules new file mode 100644 index 0000000..787cad6 --- /dev/null +++ b/81-kvm-rhel.rules @@ -0,0 +1 @@ +DEVPATH=="*/kvm", ACTION=="change", RUN+="/lib/udev/udev-kvm-check $env{COUNT} $env{EVENT}" diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 60b6e11..75804fb 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -5,7 +5,7 @@ %global have_spice 1 %global have_opengl 1 %global have_fdt 0 -%global have_gluster 0 +%global have_gluster 1 %global have_kvm_setup 0 %global have_seccomp 1 %global have_memlock_limits 0 @@ -56,6 +56,9 @@ %global requires_all_modules \ Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ +%if %{have_gluster} \ +Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ +%endif \ Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} @@ -69,7 +72,7 @@ Provides: %1-rhel = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.0.0 -Release: 1%{?dist} +Release: 2%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -106,54 +109,112 @@ Source30: kvm-s390x.conf Source31: kvm-x86.conf Source32: qemu-pr-helper.service Source33: qemu-pr-helper.socket +Source34: 81-kvm-rhel.rules +Source35: udev-kvm-check.c -Patch0001: 0001-Initial-redhat-build.patch -Patch0002: 0002-Enable-disable-devices-for-RHEL-7.patch -Patch0003: 0003-Add-RHEL-machine-types.patch -Patch0004: 0004-Use-kvm-by-default.patch -Patch0005: 0005-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0006: 0006-Add-support-statement-to-help-output.patch -Patch0007: 0007-globally-limit-the-maximum-number-of-CPUs.patch -Patch0008: 0008-Add-support-for-simpletrace.patch -Patch0009: 0009-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0010: 0010-usb-xhci-Fix-PCI-capability-order.patch -Patch0011: 0011-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0012: 0012-linux-headers-asm-s390-kvm.h-header-sync.patch -Patch0013: 0013-s390x-Enable-KVM-huge-page-backing-support.patch -Patch0014: 0014-s390x-kvm-add-etoken-facility.patch -Patch0015: 0015-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch -Patch0016: 0016-i386-Fix-arch_query_cpu_model_expansion-leak.patch -Patch0017: 0017-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch -Patch0018: 0018-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch -Patch0019: 0019-migration-postcopy-Clear-have_listen_thread.patch -Patch0020: 0020-migration-cleanup-in-error-paths-in-loadvm.patch -Patch0021: 0021-jobs-change-start-callback-to-run-callback.patch -Patch0022: 0022-jobs-canonize-Error-object.patch -Patch0023: 0023-jobs-add-exit-shim.patch -Patch0024: 0024-block-commit-utilize-job_exit-shim.patch -Patch0025: 0025-block-mirror-utilize-job_exit-shim.patch -Patch0026: 0026-jobs-utilize-job_exit-shim.patch -Patch0027: 0027-block-backup-make-function-variables-consistently-na.patch -Patch0028: 0028-jobs-remove-ret-argument-to-job_completed-privatize-.patch -Patch0029: 0029-jobs-remove-job_defer_to_main_loop.patch -Patch0030: 0030-block-commit-add-block-job-creation-flags.patch -Patch0031: 0031-block-mirror-add-block-job-creation-flags.patch -Patch0032: 0032-block-stream-add-block-job-creation-flags.patch -Patch0033: 0033-block-commit-refactor-commit-to-use-job-callbacks.patch -Patch0034: 0034-block-mirror-don-t-install-backing-chain-on-abort.patch -Patch0035: 0035-block-mirror-conservative-mirror_exit-refactor.patch -Patch0036: 0036-block-stream-refactor-stream-to-use-job-callbacks.patch -Patch0037: 0037-tests-blockjob-replace-Blockjob-with-Job.patch -Patch0038: 0038-tests-test-blockjob-remove-exit-callback.patch -Patch0039: 0039-tests-test-blockjob-txn-move-.exit-to-.clean.patch -Patch0040: 0040-jobs-remove-.exit-callback.patch -Patch0041: 0041-qapi-block-commit-expose-new-job-properties.patch -Patch0042: 0042-qapi-block-mirror-expose-new-job-properties.patch -Patch0043: 0043-qapi-block-stream-expose-new-job-properties.patch -Patch0044: 0044-block-backup-qapi-documentation-fixup.patch -Patch0045: 0045-blockdev-document-transactional-shortcomings.patch +Patch0004: 0004-Initial-redhat-build.patch +Patch0005: 0005-Enable-disable-devices-for-RHEL-7.patch +Patch0006: 0006-Machine-type-related-general-changes.patch +Patch0007: 0007-Add-aarch64-machine-types.patch +Patch0008: 0008-Add-ppc64-machine-types.patch +Patch0009: 0009-Add-s390x-machine-types.patch +Patch0010: 0010-Add-x86_64-machine-types.patch +Patch0011: 0011-Enable-make-check.patch +Patch0012: 0012-Use-kvm-by-default.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch +Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch +Patch0016: 0016-Add-support-for-simpletrace.patch +Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch +Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0020: 0020-linux-headers-asm-s390-kvm.h-header-sync.patch +Patch0021: 0021-s390x-Enable-KVM-huge-page-backing-support.patch +Patch0022: 0022-s390x-kvm-add-etoken-facility.patch +Patch0023: 0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch +Patch0024: 0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch +Patch0025: 0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch +Patch0026: 0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch +Patch0027: 0027-migration-postcopy-Clear-have_listen_thread.patch +Patch0028: 0028-migration-cleanup-in-error-paths-in-loadvm.patch +Patch0029: 0029-jobs-change-start-callback-to-run-callback.patch +Patch0030: 0030-jobs-canonize-Error-object.patch +Patch0031: 0031-jobs-add-exit-shim.patch +Patch0032: 0032-block-commit-utilize-job_exit-shim.patch +Patch0033: 0033-block-mirror-utilize-job_exit-shim.patch +Patch0034: 0034-jobs-utilize-job_exit-shim.patch +Patch0035: 0035-block-backup-make-function-variables-consistently-na.patch +Patch0036: 0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch +Patch0037: 0037-jobs-remove-job_defer_to_main_loop.patch +Patch0038: 0038-block-commit-add-block-job-creation-flags.patch +Patch0039: 0039-block-mirror-add-block-job-creation-flags.patch +Patch0040: 0040-block-stream-add-block-job-creation-flags.patch +Patch0041: 0041-block-commit-refactor-commit-to-use-job-callbacks.patch +Patch0042: 0042-block-mirror-don-t-install-backing-chain-on-abort.patch +Patch0043: 0043-block-mirror-conservative-mirror_exit-refactor.patch +Patch0044: 0044-block-stream-refactor-stream-to-use-job-callbacks.patch +Patch0045: 0045-tests-blockjob-replace-Blockjob-with-Job.patch +Patch0046: 0046-tests-test-blockjob-remove-exit-callback.patch +Patch0047: 0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch +Patch0048: 0048-jobs-remove-.exit-callback.patch +Patch0049: 0049-qapi-block-commit-expose-new-job-properties.patch +Patch0050: 0050-qapi-block-mirror-expose-new-job-properties.patch +Patch0051: 0051-qapi-block-stream-expose-new-job-properties.patch +Patch0052: 0052-block-backup-qapi-documentation-fixup.patch +Patch0053: 0053-blockdev-document-transactional-shortcomings.patch +Patch0054: 0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch +Patch0055: 0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch +Patch0056: 0056-seccomp-set-the-seccomp-filter-to-all-threads.patch +Patch0057: 0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch +Patch0058: 0058-mirror-Fail-gracefully-for-source-target.patch +Patch0059: 0059-commit-Add-top-node-base-node-options.patch +Patch0060: 0060-qemu-iotests-Test-commit-with-top-node-base-node.patch +Patch0061: 0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch +Patch0062: 0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch +Patch0063: 0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch +Patch0064: 0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch +Patch0065: 0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch +Patch0066: 0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch +Patch0067: 0067-aio-posix-compute-timeout-before-polling.patch +Patch0068: 0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch +Patch0069: 0069-linux-headers-update.patch +Patch0070: 0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch +Patch0071: 0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch +Patch0072: 0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch +Patch0073: 0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch +Patch0074: 0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch +Patch0075: 0075-vnc-call-sasl_server_init-only-when-required.patch +Patch0076: 0076-nbd-server-fix-NBD_CMD_CACHE.patch +Patch0077: 0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch +Patch0078: 0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch +Patch0079: 0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch +Patch0080: 0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch +Patch0081: 0081-job-Fix-missing-locking-due-to-mismerge.patch +Patch0082: 0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch +Patch0083: 0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch +Patch0084: 0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch +Patch0085: 0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch +Patch0086: 0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch +Patch0087: 0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch +Patch0088: 0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch +Patch0089: 0089-block-backend-Add-.drained_poll-callback.patch +Patch0090: 0090-block-backend-Fix-potential-double-blk_delete.patch +Patch0091: 0091-block-backend-Decrease-in_flight-only-after-callback.patch +Patch0092: 0092-blockjob-Lie-better-in-child_job_drained_poll.patch +Patch0093: 0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch +Patch0094: 0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch +Patch0095: 0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch +Patch0096: 0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch +Patch0097: 0097-test-bdrv-drain-Fix-outdated-comments.patch +Patch0098: 0098-block-Use-a-single-global-AioWait.patch +Patch0099: 0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch +Patch0100: 0100-block-rbd-pull-out-qemu_rbd_convert_options.patch +Patch0101: 0101-block-rbd-Attempt-to-parse-legacy-filenames.patch +Patch0102: 0102-block-rbd-add-deprecation-documentation-for-filename.patch +Patch0103: 0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch +Patch0104: 0104-luks-Allow-share-rw-on.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -168,7 +229,7 @@ BuildRequires: pciutils-devel BuildRequires: libiscsi-devel BuildRequires: ncurses-devel BuildRequires: libattr-devel -BuildRequires: libusbx-devel >= 1.0.19 +BuildRequires: libusbx-devel >= 1.0.22 %if %{have_usbredir} BuildRequires: usbredir-devel >= 0.7.1 %endif @@ -376,6 +437,17 @@ Install this package if you want to access remote disks over http, https, ftp and other transports provided by the CURL library. +%if %{have_gluster} +%package block-gluster +Summary: QEMU Gluster block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description block-gluster +This package provides the additional Gluster block driver for QEMU. + +Install this package if you want to access remote Gluster storage. +%endif + + %package block-iscsi Summary: QEMU iSCSI block driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} @@ -411,9 +483,6 @@ the Secure Shell (SSH) protocol. %prep %setup -q -n qemu-%{version} -%patch0001 -p1 -%patch0002 -p1 -%patch0003 -p1 %patch0004 -p1 %patch0005 -p1 %patch0006 -p1 @@ -456,6 +525,65 @@ the Secure Shell (SSH) protocol. %patch0043 -p1 %patch0044 -p1 %patch0045 -p1 +%patch0046 -p1 +%patch0047 -p1 +%patch0048 -p1 +%patch0049 -p1 +%patch0050 -p1 +%patch0051 -p1 +%patch0052 -p1 +%patch0053 -p1 +%patch0054 -p1 +%patch0055 -p1 +%patch0056 -p1 +%patch0057 -p1 +%patch0058 -p1 +%patch0059 -p1 +%patch0060 -p1 +%patch0061 -p1 +%patch0062 -p1 +%patch0063 -p1 +%patch0064 -p1 +%patch0065 -p1 +%patch0066 -p1 +%patch0067 -p1 +%patch0068 -p1 +%patch0069 -p1 +%patch0070 -p1 +%patch0071 -p1 +%patch0072 -p1 +%patch0073 -p1 +%patch0074 -p1 +%patch0075 -p1 +%patch0076 -p1 +%patch0077 -p1 +%patch0078 -p1 +%patch0079 -p1 +%patch0080 -p1 +%patch0081 -p1 +%patch0082 -p1 +%patch0083 -p1 +%patch0084 -p1 +%patch0085 -p1 +%patch0086 -p1 +%patch0087 -p1 +%patch0088 -p1 +%patch0089 -p1 +%patch0090 -p1 +%patch0091 -p1 +%patch0092 -p1 +%patch0093 -p1 +%patch0094 -p1 +%patch0095 -p1 +%patch0096 -p1 +%patch0097 -p1 +%patch0098 -p1 +%patch0099 -p1 +%patch0100 -p1 +%patch0101 -p1 +%patch0102 -p1 +%patch0103 -p1 +%patch0104 -p1 %build %global buildarch %{kvm_target}-softmmu @@ -646,9 +774,11 @@ make V=1 %{?_smp_mflags} $buildldflags cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl +gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check %install -%define _udevdir %(pkg-config --variable=udevdir udev)/rules.d +%define _udevdir %(pkg-config --variable=udevdir udev) +%define _udevrulesdir %{_udevdir}/rules.d install -D -p -m 0644 %{SOURCE4} $RPM_BUILD_ROOT%{_unitdir}/ksm.service install -D -p -m 0644 %{SOURCE5} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ksm @@ -669,9 +799,12 @@ install -D -p -m 0644 %{SOURCE26} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/vhost %endif mkdir -p $RPM_BUILD_ROOT%{_bindir}/ -mkdir -p $RPM_BUILD_ROOT%{_udevdir} - +mkdir -p $RPM_BUILD_ROOT%{_udevrulesdir}/ mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} + +install -p -m 0755 udev-kvm-check $RPM_BUILD_ROOT%{_udevdir} +install -p -m 0644 %{SOURCE34} $RPM_BUILD_ROOT%{_udevrulesdir} + install -m 0644 scripts/dump-guest-memory.py \ $RPM_BUILD_ROOT%{_datadir}/%{name} @@ -685,7 +818,7 @@ mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset # Install qemu-guest-agent service and udev rules install -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir} install -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga -install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevdir} +install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrulesdir} # - the fsfreeze hook script: install -D --preserve-timestamps \ @@ -940,6 +1073,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %config(noreplace) %{_sysconfdir}/sysconfig/ksm %{_unitdir}/ksmtuned.service %{_sbindir}/ksmtuned +%{_udevdir}/udev-kvm-check +%{_udevrulesdir}/81-kvm-rhel.rules +%ghost %{_sysconfdir}/kvm %config(noreplace) %{_sysconfdir}/ksmtuned.conf %dir %{_sysconfdir}/%{name} %config(noreplace) %{_sysconfdir}/%{name}/bridge.conf @@ -1009,7 +1145,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_bindir}/qemu-ga %{_mandir}/man8/qemu-ga.8* %{_unitdir}/qemu-guest-agent.service -%{_udevdir}/99-qemu-guest-agent.rules +%{_udevrulesdir}/99-qemu-guest-agent.rules %config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga %{_sysconfdir}/qemu-ga %{_datadir}/%{name}/qemu-ga @@ -1018,6 +1154,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files block-curl %{_libdir}/qemu-kvm/block-curl.so +%if %{have_gluster} +%files block-gluster +%{_libdir}/qemu-kvm/block-gluster.so +%endif + %files block-iscsi %{_libdir}/qemu-kvm/block-iscsi.so @@ -1029,6 +1170,156 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Nov 08 2018 Danilo Cesar Lemes de Paula - 3.0.0-2.el8 +- Mass import 2.12.0 fixes into 3.0 + +- kvm-luks-Allow-share-rw-on.patch [bz#1629701] +- kvm-redhat-reenable-gluster-support.patch [bz#1599340] +- kvm-redhat-bump-libusb-requirement.patch [bz#1627970] +- Resolves: bz#1599340 + (Reenable glusterfs in qemu-kvm once BZ#1567292 gets fixed) +- Resolves: bz#1627970 + (symbol lookup error: /usr/libexec/qemu-kvm: undefined symbol: libusb_set_option) +- Resolves: bz#1629701 + ("share-rw=on" does not work for luks format image - Fast Train) + +- kvm-block-rbd-pull-out-qemu_rbd_convert_options.patch [bz#1635585] +- kvm-block-rbd-Attempt-to-parse-legacy-filenames.patch [bz#1635585] +- kvm-block-rbd-add-deprecation-documentation-for-filename.patch [bz#1635585] +- kvm-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch [bz#1635585] +- Resolves: bz#1635585 + (rbd json format of 7.6 is incompatible with 7.5) + +- kvm-vnc-call-sasl_server_init-only-when-required.patch [bz#1609327] +- kvm-nbd-server-fix-NBD_CMD_CACHE.patch [bz#1636142] +- kvm-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch [bz#1636142] +- kvm-test-bdrv-drain-bdrv_drain-works-with-cross-AioConte.patch [bz#1637976] +- kvm-block-Use-bdrv_do_drain_begin-end-in-bdrv_drain_all.patch [bz#1637976] +- kvm-block-Remove-recursive-parameter-from-bdrv_drain_inv.patch [bz#1637976] +- kvm-block-Don-t-manually-poll-in-bdrv_drain_all.patch [bz#1637976] +- kvm-tests-test-bdrv-drain-bdrv_drain_all-works-in-corout.patch [bz#1637976] +- kvm-block-Avoid-unnecessary-aio_poll-in-AIO_WAIT_WHILE.patch [bz#1637976] +- kvm-block-Really-pause-block-jobs-on-drain.patch [bz#1637976] +- kvm-block-Remove-bdrv_drain_recurse.patch [bz#1637976] +- kvm-test-bdrv-drain-Add-test-for-node-deletion.patch [bz#1637976] +- kvm-block-Drain-recursively-with-a-single-BDRV_POLL_WHIL.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-node-deletion-in-subtree-recurs.patch [bz#1637976] +- kvm-block-Don-t-poll-in-parent-drain-callbacks.patch [bz#1637976] +- kvm-test-bdrv-drain-Graph-change-through-parent-callback.patch [bz#1637976] +- kvm-block-Defer-.bdrv_drain_begin-callback-to-polling-ph.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-that-bdrv_drain_invoke-doesn-t-.patch [bz#1637976] +- kvm-block-Allow-AIO_WAIT_WHILE-with-NULL-ctx.patch [bz#1637976] +- kvm-block-Move-bdrv_drain_all_begin-out-of-coroutine-con.patch [bz#1637976] +- kvm-block-ignore_bds_parents-parameter-for-drain-functio.patch [bz#1637976] +- kvm-block-Allow-graph-changes-in-bdrv_drain_all_begin-en.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-graph-changes-in-drain_all-sect.patch [bz#1637976] +- kvm-block-Poll-after-drain-on-attaching-a-node.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-bdrv_append-to-drained-node.patch [bz#1637976] +- kvm-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch [bz#1637976] +- kvm-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch [bz#1637976] +- kvm-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch [bz#1637976] +- kvm-job-Fix-missing-locking-due-to-mismerge.patch [bz#1637976] +- kvm-blockjob-Wake-up-BDS-when-job-becomes-idle.patch [bz#1637976] +- kvm-aio-wait-Increase-num_waiters-even-in-home-thread.patch [bz#1637976] +- kvm-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch [bz#1637976] +- kvm-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch [bz#1637976] +- kvm-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch [bz#1637976] +- kvm-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch [bz#1637976] +- kvm-block-backend-Add-.drained_poll-callback.patch [bz#1637976] +- kvm-block-backend-Fix-potential-double-blk_delete.patch [bz#1637976] +- kvm-block-backend-Decrease-in_flight-only-after-callback.patch [bz#1637976] +- kvm-blockjob-Lie-better-in-child_job_drained_poll.patch [bz#1637976] +- kvm-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch [bz#1637976] +- kvm-job-Avoid-deadlocks-in-job_completed_txn_abort.patch [bz#1637976] +- kvm-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch [bz#1637976] +- kvm-test-bdrv-drain-Fix-outdated-comments.patch [bz#1637976] +- kvm-block-Use-a-single-global-AioWait.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-draining-job-source-child-and-p.patch [bz#1637976] +- kvm-qemu-img-Fix-assert-when-mapping-unaligned-raw-file.patch [bz#1639374] +- kvm-iotests-Add-test-221-to-catch-qemu-img-map-regressio.patch [bz#1639374] +- Resolves: bz#1609327 + (qemu-kvm[37046]: Could not find keytab file: /etc/qemu/krb5.tab: Unknown error 49408) +- Resolves: bz#1636142 + (qemu NBD_CMD_CACHE flaws impacting non-qemu NBD clients) +- Resolves: bz#1637976 + (Crashes and hangs with iothreads vs. block jobs) +- Resolves: bz#1639374 + (qemu-img map 'Aborted (core dumped)' when specifying a plain file) + +- kvm-linux-headers-update.patch [bz#1508142] +- kvm-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch [bz#1508142] +- kvm-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch [bz#1508142] +- kvm-s390x-ap-base-Adjunct-Processor-AP-object-model.patch [bz#1508142] +- kvm-s390x-vfio-ap-Introduce-VFIO-AP-device.patch [bz#1508142] +- kvm-s390-doc-detailed-specifications-for-AP-virtualizati.patch [bz#1508142] +- Resolves: bz#1508142 + ([IBM 8.0 FEAT] KVM: Guest-dedicated Crypto Adapters - qemu part) + +- kvm-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch [bz#1609235] +- kvm-add-udev-kvm-check.patch [bz#1552663] +- kvm-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch [bz#1623085] +- kvm-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch [bz#1623085] +- kvm-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch [bz#1632622] +- kvm-aio-posix-compute-timeout-before-polling.patch [bz#1632622] +- kvm-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch [bz#1632622] +- kvm-intel-iommu-send-PSI-always-even-if-across-PDEs.patch [bz#1450712] +- kvm-intel-iommu-remove-IntelIOMMUNotifierNode.patch [bz#1450712] +- kvm-intel-iommu-add-iommu-lock.patch [bz#1450712] +- kvm-intel-iommu-only-do-page-walk-for-MAP-notifiers.patch [bz#1450712] +- kvm-intel-iommu-introduce-vtd_page_walk_info.patch [bz#1450712] +- kvm-intel-iommu-pass-in-address-space-when-page-walk.patch [bz#1450712] +- kvm-intel-iommu-trace-domain-id-during-page-walk.patch [bz#1450712] +- kvm-util-implement-simple-iova-tree.patch [bz#1450712] +- kvm-intel-iommu-rework-the-page-walk-logic.patch [bz#1450712] +- kvm-i386-define-the-ssbd-CPUID-feature-bit-CVE-2018-3639.patch [bz#1633928] +- Resolves: bz#1450712 + (Booting nested guest with vIOMMU, the assigned network devices can not receive packets (qemu)) +- Resolves: bz#1552663 + (81-kvm-rhel.rules is no longer part of initscripts) +- Resolves: bz#1609235 + (Win2016 guest can't recognize pc-dimm hotplugged to node 0) +- Resolves: bz#1623085 + (VM doesn't boot from HD) +- Resolves: bz#1632622 + (~40% virtio_blk disk performance drop for win2012r2 guest when comparing qemu-kvm-rhev-2.12.0-9 with qemu-kvm-rhev-2.12.0-12) +- Resolves: bz#1633928 + (CVE-2018-3639 qemu-kvm: hw: cpu: speculative store bypass [rhel-8.0]) + +- kvm-block-for-jobs-do-not-clear-user_paused-until-after-.patch [bz#1635583] +- kvm-iotests-Add-failure-matching-to-common.qemu.patch [bz#1635583] +- kvm-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch [bz#1635583] +- Resolves: bz#1635583 + (Quitting VM causes qemu core dump once the block mirror job paused for no enough target space) + +- kvm-check-Only-test-ivshm-when-it-is-compiled-in.patch [bz#1621817] +- kvm-Disable-ivshmem.patch [bz#1621817] +- kvm-mirror-Fail-gracefully-for-source-target.patch [bz#1637963] +- kvm-commit-Add-top-node-base-node-options.patch [bz#1637970] +- kvm-qemu-iotests-Test-commit-with-top-node-base-node.patch [bz#1637970] +- Resolves: bz#1621817 + (Disable IVSHMEM in RHEL 8) +- Resolves: bz#1637963 + (Segfault on 'blockdev-mirror' with same node as source and target) +- Resolves: bz#1637970 + (allow using node-names with block-commit) + +- kvm-redhat-make-the-plugins-executable.patch [bz#1638304] +- Resolves: bz#1638304 + (the driver packages lack all the library Requires) + +- kvm-seccomp-allow-sched_setscheduler-with-SCHED_IDLE-pol.patch [bz#1618356] +- kvm-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch [bz#1618356] +- kvm-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch [bz#1618356] +- kvm-configure-require-libseccomp-2.2.0.patch [bz#1618356] +- kvm-seccomp-set-the-seccomp-filter-to-all-threads.patch [bz#1618356] +- kvm-memory-cleanup-side-effects-of-memory_region_init_fo.patch [bz#1600365] +- Resolves: bz#1600365 + (QEMU core dumped when hotplug memory exceeding host hugepages and with discard-data=yes) +- Resolves: bz#1618356 + (qemu-kvm: Qemu: seccomp: blacklist is not applied to all threads [rhel-8]) + * Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 3.0.0-1.el8 - Rebase on qemu-kvm 3.0.0 diff --git a/udev-kvm-check.c b/udev-kvm-check.c new file mode 100644 index 0000000..cb0ecba --- /dev/null +++ b/udev-kvm-check.c @@ -0,0 +1,172 @@ +/* + * udev-kvm-check.c + * + * Copyright 2018 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include +#include +#include +#include + +#define DEFAULT 0 +#define FACILITY "kvm" +#define SYSCONFIG_KVM "/etc/sysconfig/kvm" + +#define COUNT_MSG \ + "%d %s now active" + +#define SUBSCRIPTION_MSG \ + "%d %s now active; your Red Hat Enterprise Linux subscription" \ + " limit is %d guests. Please review your Red Hat Enterprise Linux" \ + " subscription agreement or contact your Red Hat" \ + " support representative for more information. You" \ + " may review the Red Hat Enterprise subscription" \ + " limits at http://www.redhat.com/rhel-virt-limits" + +int get_threshold_from_file(FILE *fp) +{ + static const char key[] = "THRESHOLD="; + int pos = 0; + int thres; + int ch; + +start: + /* State START - at beginning of line, search for beginning of "THRESHOLD=" + * string. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (isspace(ch)) { + goto start; + } + if (ch == 'T') { + pos = 1; + goto key; + } + goto eol; + +eol: + /* State EOL - loop until end of line */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == '\n') { + goto start; + } + goto eol; + +key: + /* State KEY - match "THRESHOLD=" string, go to THRESHOLD if found */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == key[pos]) { + pos++; + if (key[pos] == 0) { + goto threshold; + } else { + goto key; + } + } + goto eol; + +threshold: + /* State THRESHOLD - parse number using fscanf, expect comment or space + * or EOL. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (!isdigit(ch)) { + goto eol; + } + ungetc(ch, fp); + if (fscanf(fp, "%d", &thres) != 1) { + return DEFAULT; + } + ch = getc(fp); + if (ch == '#' || ch == EOF || ch == '\n' || isspace(ch)) { + return thres; + } + goto eol; +} + +int get_threshold() +{ + FILE *fp = fopen(SYSCONFIG_KVM, "r"); + int val; + + if (!fp) { + return DEFAULT; + } + + val = get_threshold_from_file(fp); + fclose (fp); + return val; +} + +const char *guest(int count) +{ + return (count == 1 ? "guest" : "guests"); +} + +void emit_count_message(int count) +{ + openlog(FACILITY, LOG_CONS, LOG_USER); + syslog(LOG_INFO, COUNT_MSG, count, guest(count)); + closelog(); +} + +void emit_subscription_message(int count, int threshold) +{ + openlog(FACILITY, LOG_CONS, LOG_USER); + syslog(LOG_WARNING, SUBSCRIPTION_MSG, count, guest(count), threshold); + closelog(); +} + +int main(int argc, char **argv) +{ + int count, threshold; + + if (argc < 3) + exit(1); + + count = atoi(argv[1]); + threshold = get_threshold(); + + if (!strcmp(argv[2], "create")) { + if (threshold == 0) { + emit_count_message(count); + } else if (count > threshold) { + emit_subscription_message(count, threshold); + } + } else { + if (count >= threshold) { + emit_count_message(count); + } + } + + return 0; +} From a711492897fe1c48ddedda0fe902b6f27d59699c Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 29 Nov 2018 10:09:34 -0200 Subject: [PATCH 004/195] Rebase to qemu 3.1.0-RC2 --- .gitignore | 2 +- ...-Fix-compilation-issue-when-disable-.patch | 128 +++ ...d.patch => 0005-Initial-redhat-build.patch | 80 +- ...0006-Enable-disable-devices-for-RHEL.patch | 492 ++++------ ...Machine-type-related-general-changes.patch | 56 +- ...ch => 0008-Add-aarch64-machine-types.patch | 30 +- ...atch => 0009-Add-ppc64-machine-types.patch | 46 +- ...atch => 0010-Add-s390x-machine-types.patch | 23 +- ...tch => 0011-Add-x86_64-machine-types.patch | 46 +- 0011-Enable-make-check.patch | 498 ---------- 0012-Enable-make-check.patch | 298 ++++++ ...ult.patch => 0013-Use-kvm-by-default.patch | 11 +- ...mber-of-devices-that-can-be-assigned.patch | 25 +- ...Add-support-statement-to-help-output.patch | 10 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 89 -- ...lly-limit-the-maximum-number-of-CPUs.patch | 153 +++ ... => 0017-Add-support-for-simpletrace.patch | 23 +- ...documentation-instead-of-qemu-system.patch | 234 +++-- ...19-usb-xhci-Fix-PCI-capability-order.patch | 15 +- ...x-headers-asm-s390-kvm.h-header-sync.patch | 72 -- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 7 +- ...Enable-KVM-huge-page-backing-support.patch | 114 --- 0022-s390x-kvm-add-etoken-facility.patch | 190 ---- ...efault-enable-bpb-and-ppa15-for-z196.patch | 51 - ...-arch_query_cpu_model_expansion-leak.patch | 87 -- ...sable-TOPOEXT-by-default-on-cpu-host.patch | 54 -- ...ify-off-disable-host-as-well-as-peer.patch | 77 -- ...on-postcopy-Clear-have_listen_thread.patch | 51 - ...ion-cleanup-in-error-paths-in-loadvm.patch | 52 - ...hange-start-callback-to-run-callback.patch | 372 -------- 0030-jobs-canonize-Error-object.patch | 283 ------ 0031-jobs-add-exit-shim.patch | 108 --- 0032-block-commit-utilize-job_exit-shim.patch | 115 --- 0033-block-mirror-utilize-job_exit-shim.patch | 152 --- 0034-jobs-utilize-job_exit-shim.patch | 307 ------ ...e-function-variables-consistently-na.patch | 165 ---- ...argument-to-job_completed-privatize-.patch | 153 --- 0037-jobs-remove-job_defer_to_main_loop.patch | 119 --- ...-commit-add-block-job-creation-flags.patch | 110 --- ...-mirror-add-block-job-creation-flags.patch | 100 -- ...-stream-add-block-job-creation-flags.patch | 100 -- ...refactor-commit-to-use-job-callbacks.patch | 180 ---- ...don-t-install-backing-chain-on-abort.patch | 45 - ...or-conservative-mirror_exit-refactor.patch | 136 --- ...refactor-stream-to-use-job-callbacks.patch | 94 -- ...s-blockjob-replace-Blockjob-with-Job.patch | 233 ----- ...s-test-blockjob-remove-exit-callback.patch | 88 -- ...st-blockjob-txn-move-.exit-to-.clean.patch | 53 -- 0048-jobs-remove-.exit-callback.patch | 156 --- ...ock-commit-expose-new-job-properties.patch | 90 -- ...ock-mirror-expose-new-job-properties.patch | 144 --- ...ock-stream-expose-new-job-properties.patch | 108 --- ...lock-backup-qapi-documentation-fixup.patch | 73 -- ...-document-transactional-shortcomings.patch | 53 -- ...YS-signal-instead-of-killing-the-thr.patch | 67 -- ...r-SCMP_ACT_KILL_PROCESS-if-available.patch | 110 --- ...et-the-seccomp-filter-to-all-threads.patch | 77 -- ...ide-effects-of-memory_region_init_fo.patch | 185 ---- ...or-Fail-gracefully-for-source-target.patch | 87 -- ...ommit-Add-top-node-base-node-options.patch | 141 --- ...-Test-commit-with-top-node-base-node.patch | 127 --- ...o-not-clear-user_paused-until-after-.patch | 59 -- ...catch-abort-on-forced-blockjob-cance.patch | 173 ---- ...uild-build-SRAT-memory-affinity-stru.patch | 117 --- ...count-ctx-notifier-as-progress-when-.patch | 48 - ...y_accept-only-during-blocking-aio_po.patch | 124 --- ...oncurrent-access-to-poll_disable_cnt.patch | 122 --- ...posix-compute-timeout-before-polling.patch | 186 ---- ...p-system-call-if-ctx-notifier-pollin.patch | 64 -- 0069-linux-headers-update.patch | 202 ---- ...et-up-CPU-model-for-AP-device-suppor.patch | 148 --- ...-AP-instruction-interpretation-for-g.patch | 89 -- ...se-Adjunct-Processor-AP-object-model.patch | 281 ------ ...90x-vfio-ap-Introduce-VFIO-AP-device.patch | 305 ------ ...d-specifications-for-AP-virtualizati.patch | 889 ------------------ ...-sasl_server_init-only-when-required.patch | 89 -- 0076-nbd-server-fix-NBD_CMD_CACHE.patch | 52 - 0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch | 96 -- ...acquire-AioContext-before-qemu_laio_.patch | 134 --- ...emu_aio_coroutine_enter-in-co_schedu.patch | 78 -- ...ed-aio_poll-hanging-in-job_txn_apply.patch | 105 --- ...-Fix-missing-locking-due-to-mismerge.patch | 55 -- ...ob-Wake-up-BDS-when-job-becomes-idle.patch | 161 ---- ...ease-num_waiters-even-in-home-thread.patch | 64 -- ...Drain-with-block-jobs-in-an-I-O-thre.patch | 208 ---- ...quire-AioContext-around-job_cancel_s.patch | 86 -- ...se-AIO_WAIT_WHILE-in-job_finish_sync.patch | 77 -- ...Test-AIO_WAIT_WHILE-in-completion-ca.patch | 59 -- ...ssing-locking-in-bdrv_co_drain_bh_cb.patch | 96 -- ...k-backend-Add-.drained_poll-callback.patch | 66 -- ...kend-Fix-potential-double-blk_delete.patch | 67 -- ...crease-in_flight-only-after-callback.patch | 74 -- ...Lie-better-in-child_job_drained_poll.patch | 104 -- ...aio_poll-in-bdrv_drain_poll-variants.patch | 64 -- ...Test-nested-poll-in-bdrv_drain_poll_.patch | 63 -- ...deadlocks-in-job_completed_txn_abort.patch | 85 -- ...AIO_WAIT_WHILE-in-job-.commit-.abort.patch | 241 ----- ...est-bdrv-drain-Fix-outdated-comments.patch | 69 -- 0098-block-Use-a-single-global-AioWait.patch | 367 -------- ...Test-draining-job-source-child-and-p.patch | 198 ---- ...bd-pull-out-qemu_rbd_convert_options.patch | 95 -- ...bd-Attempt-to-parse-legacy-filenames.patch | 120 --- ...precation-documentation-for-filename.patch | 59 -- ...test-for-rbd-legacy-keyvalue-filenam.patch | 141 --- 0104-luks-Allow-share-rw-on.patch | 52 - qemu-kvm.spec | 229 +---- sources | 2 +- 107 files changed, 1155 insertions(+), 12453 deletions(-) create mode 100644 0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch rename 0004-Initial-redhat-build.patch => 0005-Initial-redhat-build.patch (83%) rename 0005-Enable-disable-devices-for-RHEL-7.patch => 0006-Enable-disable-devices-for-RHEL.patch (74%) rename 0006-Machine-type-related-general-changes.patch => 0007-Machine-type-related-general-changes.patch (95%) rename 0007-Add-aarch64-machine-types.patch => 0008-Add-aarch64-machine-types.patch (92%) rename 0008-Add-ppc64-machine-types.patch => 0009-Add-ppc64-machine-types.patch (91%) rename 0009-Add-s390x-machine-types.patch => 0010-Add-s390x-machine-types.patch (78%) rename 0010-Add-x86_64-machine-types.patch => 0011-Add-x86_64-machine-types.patch (96%) delete mode 100644 0011-Enable-make-check.patch create mode 100644 0012-Enable-make-check.patch rename 0012-Use-kvm-by-default.patch => 0013-Use-kvm-by-default.patch (72%) rename 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0014-vfio-cap-number-of-devices-that-can-be-assigned.patch (73%) rename 0014-Add-support-statement-to-help-output.patch => 0015-Add-support-statement-to-help-output.patch (82%) delete mode 100644 0015-globally-limit-the-maximum-number-of-CPUs.patch create mode 100644 0016-globally-limit-the-maximum-number-of-CPUs.patch rename 0016-Add-support-for-simpletrace.patch => 0017-Add-support-for-simpletrace.patch (86%) rename 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch => 0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch (85%) rename 0018-usb-xhci-Fix-PCI-capability-order.patch => 0019-usb-xhci-Fix-PCI-capability-order.patch (87%) delete mode 100644 0020-linux-headers-asm-s390-kvm.h-header-sync.patch rename 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch => 0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch (90%) delete mode 100644 0021-s390x-Enable-KVM-huge-page-backing-support.patch delete mode 100644 0022-s390x-kvm-add-etoken-facility.patch delete mode 100644 0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch delete mode 100644 0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch delete mode 100644 0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch delete mode 100644 0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch delete mode 100644 0027-migration-postcopy-Clear-have_listen_thread.patch delete mode 100644 0028-migration-cleanup-in-error-paths-in-loadvm.patch delete mode 100644 0029-jobs-change-start-callback-to-run-callback.patch delete mode 100644 0030-jobs-canonize-Error-object.patch delete mode 100644 0031-jobs-add-exit-shim.patch delete mode 100644 0032-block-commit-utilize-job_exit-shim.patch delete mode 100644 0033-block-mirror-utilize-job_exit-shim.patch delete mode 100644 0034-jobs-utilize-job_exit-shim.patch delete mode 100644 0035-block-backup-make-function-variables-consistently-na.patch delete mode 100644 0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch delete mode 100644 0037-jobs-remove-job_defer_to_main_loop.patch delete mode 100644 0038-block-commit-add-block-job-creation-flags.patch delete mode 100644 0039-block-mirror-add-block-job-creation-flags.patch delete mode 100644 0040-block-stream-add-block-job-creation-flags.patch delete mode 100644 0041-block-commit-refactor-commit-to-use-job-callbacks.patch delete mode 100644 0042-block-mirror-don-t-install-backing-chain-on-abort.patch delete mode 100644 0043-block-mirror-conservative-mirror_exit-refactor.patch delete mode 100644 0044-block-stream-refactor-stream-to-use-job-callbacks.patch delete mode 100644 0045-tests-blockjob-replace-Blockjob-with-Job.patch delete mode 100644 0046-tests-test-blockjob-remove-exit-callback.patch delete mode 100644 0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch delete mode 100644 0048-jobs-remove-.exit-callback.patch delete mode 100644 0049-qapi-block-commit-expose-new-job-properties.patch delete mode 100644 0050-qapi-block-mirror-expose-new-job-properties.patch delete mode 100644 0051-qapi-block-stream-expose-new-job-properties.patch delete mode 100644 0052-block-backup-qapi-documentation-fixup.patch delete mode 100644 0053-blockdev-document-transactional-shortcomings.patch delete mode 100644 0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch delete mode 100644 0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch delete mode 100644 0056-seccomp-set-the-seccomp-filter-to-all-threads.patch delete mode 100644 0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch delete mode 100644 0058-mirror-Fail-gracefully-for-source-target.patch delete mode 100644 0059-commit-Add-top-node-base-node-options.patch delete mode 100644 0060-qemu-iotests-Test-commit-with-top-node-base-node.patch delete mode 100644 0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch delete mode 100644 0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch delete mode 100644 0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch delete mode 100644 0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch delete mode 100644 0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch delete mode 100644 0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch delete mode 100644 0067-aio-posix-compute-timeout-before-polling.patch delete mode 100644 0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch delete mode 100644 0069-linux-headers-update.patch delete mode 100644 0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch delete mode 100644 0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch delete mode 100644 0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch delete mode 100644 0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch delete mode 100644 0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch delete mode 100644 0075-vnc-call-sasl_server_init-only-when-required.patch delete mode 100644 0076-nbd-server-fix-NBD_CMD_CACHE.patch delete mode 100644 0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch delete mode 100644 0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch delete mode 100644 0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch delete mode 100644 0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch delete mode 100644 0081-job-Fix-missing-locking-due-to-mismerge.patch delete mode 100644 0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch delete mode 100644 0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch delete mode 100644 0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch delete mode 100644 0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch delete mode 100644 0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch delete mode 100644 0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch delete mode 100644 0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch delete mode 100644 0089-block-backend-Add-.drained_poll-callback.patch delete mode 100644 0090-block-backend-Fix-potential-double-blk_delete.patch delete mode 100644 0091-block-backend-Decrease-in_flight-only-after-callback.patch delete mode 100644 0092-blockjob-Lie-better-in-child_job_drained_poll.patch delete mode 100644 0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch delete mode 100644 0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch delete mode 100644 0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch delete mode 100644 0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch delete mode 100644 0097-test-bdrv-drain-Fix-outdated-comments.patch delete mode 100644 0098-block-Use-a-single-global-AioWait.patch delete mode 100644 0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch delete mode 100644 0100-block-rbd-pull-out-qemu_rbd_convert_options.patch delete mode 100644 0101-block-rbd-Attempt-to-parse-legacy-filenames.patch delete mode 100644 0102-block-rbd-add-deprecation-documentation-for-filename.patch delete mode 100644 0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch delete mode 100644 0104-luks-Allow-share-rw-on.patch diff --git a/.gitignore b/.gitignore index caaa10d..220f7bc 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -/qemu-3.0.0.tar.xz +/qemu-3.1.0.tar.xz diff --git a/0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch b/0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch new file mode 100644 index 0000000..b2ca136 --- /dev/null +++ b/0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch @@ -0,0 +1,128 @@ +From 5be2fefbc0999ada944c36a865b154d398e16e27 Mon Sep 17 00:00:00 2001 +From: Zhang Chen +Date: Thu, 1 Nov 2018 10:12:26 +0800 +Subject: migration/colo.c: Fix compilation issue when disable replication + +This compilation issue will occur when user use --disable-replication +to config Qemu. + +Reported-by: Thomas Huth +Signed-off-by: Zhang Chen +--- + migration/colo.c | 28 +++++++++++++++++++++++++++- + 1 file changed, 27 insertions(+), 1 deletion(-) + +diff --git a/migration/colo.c b/migration/colo.c +index 956ac23..fcff04c 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -59,6 +59,8 @@ static bool colo_runstate_is_stopped(void) + + static void secondary_vm_do_failover(void) + { ++/* COLO needs enable block-replication */ ++#ifdef CONFIG_REPLICATION + int old_state; + MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; +@@ -121,10 +123,14 @@ static void secondary_vm_do_failover(void) + if (mis->migration_incoming_co) { + qemu_coroutine_enter(mis->migration_incoming_co); + } ++#else ++ abort(); ++#endif + } + + static void primary_vm_do_failover(void) + { ++#ifdef CONFIG_REPLICATION + MigrationState *s = migrate_get_current(); + int old_state; + Error *local_err = NULL; +@@ -165,6 +171,9 @@ static void primary_vm_do_failover(void) + + /* Notify COLO thread that failover work is finished */ + qemu_sem_post(&s->colo_exit_sem); ++#else ++ abort(); ++#endif + } + + COLOMode get_colo_mode(void) +@@ -415,11 +424,16 @@ static int colo_do_checkpoint_transaction(MigrationState *s, + /* Disable block migration */ + migrate_set_block_enabled(false, &local_err); + qemu_mutex_lock_iothread(); ++ ++#ifdef CONFIG_REPLICATION + replication_do_checkpoint_all(&local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } ++#else ++ abort(); ++#endif + + colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); + if (local_err) { +@@ -523,11 +537,15 @@ static void colo_process_checkpoint(MigrationState *s) + object_unref(OBJECT(bioc)); + + qemu_mutex_lock_iothread(); ++#ifdef CONFIG_REPLICATION + replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } ++#else ++ abort(); ++#endif + + vm_start(); + qemu_mutex_unlock_iothread(); +@@ -690,11 +708,15 @@ void *colo_process_incoming_thread(void *opaque) + object_unref(OBJECT(bioc)); + + qemu_mutex_lock_iothread(); ++#ifdef CONFIG_REPLICATION + replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } ++#else ++ abort(); ++#endif + vm_start(); + trace_colo_vm_state_change("stop", "run"); + qemu_mutex_unlock_iothread(); +@@ -785,18 +807,22 @@ void *colo_process_incoming_thread(void *opaque) + goto out; + } + ++#ifdef CONFIG_REPLICATION + replication_get_error_all(&local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } ++ + /* discard colo disk buffer */ + replication_do_checkpoint_all(&local_err); + if (local_err) { + qemu_mutex_unlock_iothread(); + goto out; + } +- ++#else ++ abort(); ++#endif + /* Notify all filters of all NIC to do checkpoint */ + colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); + +-- +1.8.3.1 + diff --git a/0004-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch similarity index 83% rename from 0004-Initial-redhat-build.patch rename to 0005-Initial-redhat-build.patch index 06670c5..2e1f16c 100644 --- a/0004-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,41 +1,39 @@ -From a1f1313c0c96b2a159647aabc6a4b0f3a3f4424a Mon Sep 17 00:00:00 2001 +From f0cd0ed26f3a3ae0610fad93c9dde26b54910abb Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Thu, 8 Nov 2018 11:17:08 +0100 +Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build This patch introduces redhat build structure in redhat subdirectory. In addition, several issues are fixed in QEMU tree: -- Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm - - As we use qemu-kvm as name in all places, this is updated to be consistent -- Man page renamed from qemu to qemu-kvm - - man page is installed using make install so we have to fix it in qemu tree -- Use "/share/qemu-kvm" as SHARE_SUFFIX - - We reconfigured our share to qemu-kvm to be consistent with used name + - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent + - Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree + - Use "/share/qemu-kvm" as SHARE_SUFFIX + - We reconfigured our share to qemu-kvm to be consistent with used name -This commit is synchronized with qemu-kvm-2.12.0-42.el8 build. - -Signed-off-by: Miroslav Rezanina +This rebase includes changes up to qemu-kvm-2.12.0-42.el8 --- Makefile | 3 +- block/Makefile.objs | 2 +- block/vxhs.c | 119 ++- - configure | 40 +- + configure | 33 +- os-posix.c | 2 +- redhat/Makefile | 82 ++ redhat/Makefile.common | 49 ++ - redhat/qemu-kvm.spec.template | 1721 +++++++++++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 1723 +++++++++++++++++++++++++++++++++++++++++ ui/vnc.c | 2 +- - 9 files changed, 1972 insertions(+), 48 deletions(-) + 9 files changed, 1972 insertions(+), 43 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index 2da686b..eb4c57a 100644 +index f294718..152821a 100644 --- a/Makefile +++ b/Makefile -@@ -501,6 +501,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM +@@ -503,6 +503,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC CAP_CFLAGS += -DCAPSTONE_HAS_X86 @@ -43,7 +41,7 @@ index 2da686b..eb4c57a 100644 subdir-capstone: .git-submodule-status $(call quiet-command,$(MAKE) -C $(SRC_PATH)/capstone CAPSTONE_SHARED=no BUILDDIR="$(BUILD_DIR)/capstone" CC="$(CC)" AR="$(AR)" LD="$(LD)" RANLIB="$(RANLIB)" CFLAGS="$(CAP_CFLAGS)" $(SUBDIR_MAKEFLAGS) $(BUILD_DIR)/capstone/$(LIBCAPSTONE)) -@@ -819,7 +820,7 @@ install-doc: $(DOCS) +@@ -830,7 +831,7 @@ install-doc: $(DOCS) $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" @@ -53,10 +51,10 @@ index 2da686b..eb4c57a 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/block/Makefile.objs b/block/Makefile.objs -index c8337bf..cd1e309 100644 +index 46d585c..a244100 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs -@@ -21,7 +21,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o +@@ -30,7 +30,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o @@ -257,26 +255,10 @@ index 0cb0a00..9164b3e 100644 trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); return -EIO; diff --git a/configure b/configure -index 2a7796e..0a27137 100755 +index 0a3c6a7..98b05c5 100755 --- a/configure +++ b/configure -@@ -2216,13 +2216,10 @@ fi - ########################################## - # libseccomp check - -+libseccomp_minver="2.2.0" - if test "$seccomp" != "no" ; then - case "$cpu" in -- i386|x86_64) -- libseccomp_minver="2.1.0" -- ;; -- mips) -- libseccomp_minver="2.2.0" -+ i386|x86_64|mips) - ;; - arm|aarch64) - libseccomp_minver="2.2.3" -@@ -3460,7 +3457,7 @@ fi +@@ -3459,7 +3459,7 @@ fi glib_req_ver=2.40 glib_modules=gthread-2.0 @@ -285,7 +267,7 @@ index 2a7796e..0a27137 100755 glib_modules="$glib_modules gmodule-export-2.0" fi -@@ -5435,33 +5432,6 @@ if compile_prog "" "" ; then +@@ -5494,33 +5494,6 @@ if compile_prog "" "" ; then fi ########################################## @@ -319,7 +301,7 @@ index 2a7796e..0a27137 100755 # check for _Static_assert() have_static_assert=no -@@ -6759,8 +6729,8 @@ if test "$pthread_setname_np" = "yes" ; then +@@ -6854,8 +6827,8 @@ if test "$pthread_setname_np" = "yes" ; then fi if test "$vxhs" = "yes" ; then @@ -329,9 +311,9 @@ index 2a7796e..0a27137 100755 + echo "VXHS_LIBS= -lssl" >> $config_host_mak fi - if test "$tcg_interpreter" = "yes"; then + if test "$libpmem" = "yes" ; then diff --git a/os-posix.c b/os-posix.c -index 9ce6f74..c4cfd0d 100644 +index 4bd80e4..ca13206 100644 --- a/os-posix.c +++ b/os-posix.c @@ -82,7 +82,7 @@ void os_setup_signal_handling(void) @@ -344,18 +326,18 @@ index 9ce6f74..c4cfd0d 100644 char *os_find_datadir(void) { diff --git a/ui/vnc.c b/ui/vnc.c -index 3596932..050c421 100644 +index 0c1b477..d7903a7 100644 --- a/ui/vnc.c +++ b/ui/vnc.c -@@ -4054,7 +4054,7 @@ void vnc_display_open(const char *id, Error **errp) - trace_vnc_auth_init(vd, 1, vd->ws_auth, vd->ws_subauth); +@@ -3962,7 +3962,7 @@ void vnc_display_open(const char *id, Error **errp) #ifdef CONFIG_VNC_SASL -- if ((saslErr = sasl_server_init(NULL, "qemu")) != SASL_OK) { -+ if ((saslErr = sasl_server_init(NULL, "qemu-kvm")) != SASL_OK) { - error_setg(errp, "Failed to initialize SASL auth: %s", - sasl_errstring(saslErr, NULL, NULL)); - goto fail; + if (sasl) { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", -- 1.8.3.1 diff --git a/0005-Enable-disable-devices-for-RHEL-7.patch b/0006-Enable-disable-devices-for-RHEL.patch similarity index 74% rename from 0005-Enable-disable-devices-for-RHEL-7.patch rename to 0006-Enable-disable-devices-for-RHEL.patch index ec26beb..6a367f2 100644 --- a/0005-Enable-disable-devices-for-RHEL-7.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,88 +1,68 @@ -From b4a5b95153ca86eba72ff4a368a24ac31b77bbe5 Mon Sep 17 00:00:00 2001 +From 38eba79aaa865ffa3e85bfa56e644e0846731744 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 -Subject: Enable/disable devices for RHEL 7 +Subject: Enable/disable devices for RHEL -This commit adds all changes related to changes in supported devices -up to qemu-kvm-2.12.0-42.el8. +This commit adds all changes related to changes in supported devices. Signed-off-by: Miroslav Rezanina -Rebase notes (3.0.0): -- Added CONFIG_SCSI -- Enabled SMMUV3 -- Using CONFIG_VIRTIO_CRYPTO to remove crypto -- Added CONFIG_VIRTIO_MMIO for aarch64 -- Disabled arvm7v.c compile -- Introduced virtio.mak -- Disabled make check -- Removed test changes (moved to next patch) - -Merged patches (3.0.0): -- a2721f6 Re-enable disabled Hyper-V enlightenments -- c670fa1 Disable aarch64 devices reappeared after 2.12 rebase -- 3ebdb95 Disable split-irq device -- d68f80c Disable AT24Cx i2c eeprom -- 3f953e9 Disable CAN bus devices -- 284c393 Disable new superio devices -- 747643c Disable new pvrdma device -- 0d4f38c s390x: Re-enable CONFIG_TERMINAL3270 -- 0f725e9 AArch64: Enable CONFIG_FW_CFG_DMA for aarch64 -- 67c5a8c Disable ivshmem +Rebase notes (qemu 3.1.0) +- spapr_rng disabled in default_config +- new hyperv.mak in default configs +- Move changes from x86_64-softmmu.mak to i386-softmmu.mak +- Added CONFIG_VIRTIO_MMIO to aarch64-softmmu.mak --- - default-configs/aarch64-softmmu.mak | 37 ++++++++++++++++++++++++++++-------- - default-configs/pci.mak | 38 ++++++++++++++++++------------------- - default-configs/ppc64-softmmu.mak | 25 ++++++++++++++++++------ + default-configs/aarch64-softmmu.mak | 40 +++++++++++++++++++++++++++---------- + default-configs/hyperv.mak | 2 +- + default-configs/i386-softmmu.mak | 26 ++++++++++++------------ + default-configs/pci.mak | 38 +++++++++++++++++------------------ + default-configs/ppc64-softmmu.mak | 28 +++++++++++++++++++------- default-configs/s390x-softmmu.mak | 5 +++-- default-configs/sound.mak | 8 ++++---- - default-configs/usb.mak | 14 +++++++------- + default-configs/usb.mak | 14 ++++++------- default-configs/virtio.mak | 5 ++--- - default-configs/x86_64-softmmu.mak | 28 +++++++++++++-------------- hw/acpi/ich9.c | 4 ++-- hw/arm/Makefile.objs | 2 +- hw/block/fdc.c | 1 + hw/char/serial-pci.c | 4 ++++ - hw/core/Makefile.objs | 9 +++++---- - hw/display/cirrus_vga.c | 2 ++ + hw/core/Makefile.objs | 10 ++++++---- + hw/display/cirrus_vga_isa.c | 2 ++ hw/i386/pc.c | 2 ++ hw/ide/piix.c | 5 ++++- hw/ide/via.c | 2 ++ hw/input/pckbd.c | 2 ++ hw/isa/Makefile.objs | 2 +- hw/misc/Makefile.objs | 2 +- - hw/misc/ivshmem.c | 11 +++++++++++ + hw/misc/ivshmem.c | 11 ++++++++++ hw/net/e1000.c | 2 ++ - hw/nvram/Makefile.objs | 2 +- hw/pci-host/piix.c | 4 ++++ - hw/ppc/Makefile.objs | 2 +- - hw/ppc/spapr.c | 3 ++- hw/ppc/spapr_cpu_core.c | 2 ++ hw/rdma/Makefile.objs | 3 ++- - hw/s390x/virtio-ccw.c | 8 ++++++++ hw/usb/ccid-card-emulated.c | 2 ++ - hw/vfio/Makefile.objs | 1 - + hw/vfio/Makefile.objs | 3 --- hw/vfio/pci-quirks.c | 5 +++++ hw/virtio/virtio-pci.c | 8 ++++---- qemu-options.hx | 5 ----- redhat/qemu-kvm.spec.template | 2 +- stubs/Makefile.objs | 1 + - stubs/ide-isa.c | 13 +++++++++++++ + stubs/ide-isa.c | 13 ++++++++++++ target/arm/cpu.c | 4 +++- - target/i386/cpu.c | 35 ++++++++++++++++++++++++++-------- - target/ppc/cpu-models.c | 17 ++++++++++++++++- + target/i386/cpu.c | 35 ++++++++++++++++++++++++-------- + target/ppc/cpu-models.c | 12 +++++++++++ target/s390x/cpu_models.c | 3 +++ target/s390x/kvm.c | 8 ++++++++ vl.c | 2 +- - 43 files changed, 241 insertions(+), 99 deletions(-) + 40 files changed, 229 insertions(+), 100 deletions(-) create mode 100644 stubs/ide-isa.c diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 6f790f0..3f27540 100644 +index 4ea9add..221e266 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak -@@ -1,11 +1,32 @@ +@@ -1,12 +1,32 @@ # Default configuration for aarch64-softmmu - +- -# We support all the 32 bit boards so need all their config -include arm-softmmu.mak - @@ -91,12 +71,14 @@ index 6f790f0..3f27540 100644 -CONFIG_DPCD=y -CONFIG_XLNX_ZYNQMP=y -CONFIG_XLNX_ZYNQMP_ARM=y - CONFIG_ARM_SMMUV3=y +-CONFIG_XLNX_VERSAL=y +# CONFIG_AUX=y +# CONFIG_DDC=y +# CONFIG_DPCD=y +# CONFIG_XLNX_ZYNQMP=y +# CONFIG_XLNX_ZYNQMP_ARM=y ++# CONFIG_XLNX_VERSAL=y + CONFIG_ARM_SMMUV3=y +CONFIG_PCI=y +CONFIG_PCI_TESTDEV=y +CONFIG_VIRTIO_PCI=y @@ -121,8 +103,71 @@ index 6f790f0..3f27540 100644 +CONFIG_USB=y +CONFIG_I2C=y +CONFIG_FW_CFG_DMA=y +diff --git a/default-configs/hyperv.mak b/default-configs/hyperv.mak +index 5d0d9fd..fce5d91 100644 +--- a/default-configs/hyperv.mak ++++ b/default-configs/hyperv.mak +@@ -1,2 +1,2 @@ + CONFIG_HYPERV=$(CONFIG_KVM) +-CONFIG_HYPERV_TESTDEV=y ++#CONFIG_HYPERV_TESTDEV=y +diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak +index 64c998c..faea212 100644 +--- a/default-configs/i386-softmmu.mak ++++ b/default-configs/i386-softmmu.mak +@@ -5,20 +5,20 @@ include sound.mak + include usb.mak + include hyperv.mak + CONFIG_QXL=$(CONFIG_SPICE) +-CONFIG_VGA_ISA=y ++#CONFIG_VGA_ISA=y + CONFIG_VGA_CIRRUS=y +-CONFIG_VMWARE_VGA=y +-CONFIG_VMXNET3_PCI=y ++#CONFIG_VMWARE_VGA=y ++#CONFIG_VMXNET3_PCI=y + CONFIG_VIRTIO_VGA=y + CONFIG_VMMOUSE=y + CONFIG_IPMI=y +-CONFIG_IPMI_LOCAL=y +-CONFIG_IPMI_EXTERN=y +-CONFIG_ISA_IPMI_KCS=y +-CONFIG_ISA_IPMI_BT=y ++#CONFIG_IPMI_LOCAL=y ++#CONFIG_IPMI_EXTERN=y ++#CONFIG_ISA_IPMI_KCS=y ++#CONFIG_ISA_IPMI_BT=y + CONFIG_SERIAL=y + CONFIG_SERIAL_ISA=y +-CONFIG_PARALLEL=y ++#CONFIG_PARALLEL=y + CONFIG_I8254=y + CONFIG_PCSPK=y + CONFIG_PCKBD=y +@@ -30,11 +30,11 @@ CONFIG_ACPI_MEMORY_HOTPLUG=y + CONFIG_ACPI_CPU_HOTPLUG=y + CONFIG_APM=y + CONFIG_I8257=y +-CONFIG_IDE_ISA=y ++#CONFIG_IDE_ISA=y + CONFIG_IDE_PIIX=y +-CONFIG_NE2000_ISA=y +-CONFIG_HPET=y +-CONFIG_APPLESMC=y ++#CONFIG_NE2000_ISA=y ++#CONFIG_HPET=y ++#CONFIG_APPLESMC=y + CONFIG_I8259=y + CONFIG_PFLASH_CFI01=y + CONFIG_TPM_TIS=$(CONFIG_TPM) +@@ -66,4 +66,4 @@ CONFIG_FW_CFG_DMA=y + CONFIG_I2C=y + CONFIG_SEV=$(CONFIG_KVM) + CONFIG_VTD=y +-CONFIG_AMD_IOMMU=y ++#CONFIG_AMD_IOMMU=y diff --git a/default-configs/pci.mak b/default-configs/pci.mak -index de53d20..70e40ad 100644 +index 6c7be12..292b3f2 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -4,22 +4,22 @@ CONFIG_ISA_BUS=y @@ -158,7 +203,7 @@ index de53d20..70e40ad 100644 CONFIG_RTL8139_PCI=y CONFIG_E1000_PCI=y CONFIG_E1000E_PCI=y -@@ -27,22 +27,22 @@ CONFIG_IDE_CORE=y +@@ -27,23 +27,23 @@ CONFIG_IDE_CORE=y CONFIG_IDE_QDEV=y CONFIG_IDE_PCI=y CONFIG_AHCI=y @@ -186,19 +231,21 @@ index de53d20..70e40ad 100644 CONFIG_EDU=y CONFIG_VGA=y CONFIG_VGA_PCI=y + CONFIG_BOCHS_DISPLAY=y -CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) -CONFIG_ROCKER=y +#CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) +#CONFIG_ROCKER=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index b94af6c..30ca76d 100644 +index aec2855..a492986 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak -@@ -1,14 +1,27 @@ +@@ -1,14 +1,28 @@ # Default configuration for ppc64-softmmu -# Include all 32-bit boards -include ppc-softmmu.mak ++include sound.mak +include usb.mak +include virtio.mak + @@ -228,11 +275,17 @@ index b94af6c..30ca76d 100644 # For pSeries CONFIG_PSERIES=y +@@ -18,4 +32,4 @@ CONFIG_XICS_SPAPR=$(CONFIG_PSERIES) + CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM)) + CONFIG_MEM_DEVICE=y + CONFIG_DIMM=y +-CONFIG_SPAPR_RNG=y ++#CONFIG_SPAPR_RNG=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index d6b67d5..8b2db3e 100644 +index 5eef375..49a59fc 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak -@@ -1,9 +1,10 @@ +@@ -1,10 +1,11 @@ CONFIG_PCI=y -CONFIG_VIRTIO_PCI=$(CONFIG_PCI) +#CONFIG_VIRTIO_PCI=$(CONFIG_PCI) @@ -245,6 +298,7 @@ index d6b67d5..8b2db3e 100644 +# Disabled for Red Hat Enterprise Linux: +# CONFIG_VFIO_CCW=$(CONFIG_LINUX) CONFIG_WDT_DIAG288=y + CONFIG_VFIO_AP=$(CONFIG_LINUX) diff --git a/default-configs/sound.mak b/default-configs/sound.mak index 4f22c34..1bead9b 100644 --- a/default-configs/sound.mak @@ -298,69 +352,6 @@ index 1304849..6330e6b 100644 CONFIG_VIRTIO_GPU=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_NET=y -diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 0390b43..613fc50 100644 ---- a/default-configs/x86_64-softmmu.mak -+++ b/default-configs/x86_64-softmmu.mak -@@ -4,20 +4,20 @@ include pci.mak - include sound.mak - include usb.mak - CONFIG_QXL=$(CONFIG_SPICE) --CONFIG_VGA_ISA=y -+#CONFIG_VGA_ISA=y - CONFIG_VGA_CIRRUS=y --CONFIG_VMWARE_VGA=y --CONFIG_VMXNET3_PCI=y -+#CONFIG_VMWARE_VGA=y -+#CONFIG_VMXNET3_PCI=y - CONFIG_VIRTIO_VGA=y - CONFIG_VMMOUSE=y - CONFIG_IPMI=y --CONFIG_IPMI_LOCAL=y --CONFIG_IPMI_EXTERN=y --CONFIG_ISA_IPMI_KCS=y --CONFIG_ISA_IPMI_BT=y -+#CONFIG_IPMI_LOCAL=y -+#CONFIG_IPMI_EXTERN=y -+#CONFIG_ISA_IPMI_KCS=y -+#CONFIG_ISA_IPMI_BT=y - CONFIG_SERIAL=y - CONFIG_SERIAL_ISA=y --CONFIG_PARALLEL=y -+#CONFIG_PARALLEL=y - CONFIG_I8254=y - CONFIG_PCSPK=y - CONFIG_PCKBD=y -@@ -29,11 +29,11 @@ CONFIG_ACPI_MEMORY_HOTPLUG=y - CONFIG_ACPI_CPU_HOTPLUG=y - CONFIG_APM=y - CONFIG_I8257=y --CONFIG_IDE_ISA=y -+#CONFIG_IDE_ISA=y - CONFIG_IDE_PIIX=y --CONFIG_NE2000_ISA=y --CONFIG_HPET=y --CONFIG_APPLESMC=y -+#CONFIG_NE2000_ISA=y -+#CONFIG_HPET=y -+#CONFIG_APPLESMC=y - CONFIG_I8259=y - CONFIG_PFLASH_CFI01=y - CONFIG_TPM_TIS=$(CONFIG_TPM) -@@ -58,11 +58,11 @@ CONFIG_XIO3130=y - CONFIG_IOH3420=y - CONFIG_I82801B11=y - CONFIG_SMBIOS=y --CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) -+#CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) - CONFIG_PXB=y - CONFIG_ACPI_VMGENID=y - CONFIG_FW_CFG_DMA=y - CONFIG_I2C=y - CONFIG_SEV=$(CONFIG_KVM) - CONFIG_VTD=y --CONFIG_AMD_IOMMU=y -+#CONFIG_AMD_IOMMU=y diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index c5d8646..a4e87b8 100644 --- a/hw/acpi/ich9.c @@ -377,7 +368,7 @@ index c5d8646..a4e87b8 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index d51fcec..52ec91b 100644 +index 50c7b4a..149848e 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -16,7 +16,7 @@ obj-$(CONFIG_STRONGARM) += collie.o @@ -390,7 +381,7 @@ index d51fcec..52ec91b 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 2e9c1e1..f284df7 100644 +index 6f19f12..56b7aeb 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -599,6 +599,7 @@ static void floppy_drive_class_init(ObjectClass *klass, void *data) @@ -424,10 +415,10 @@ index cb0d04c..d426982 100644 static const TypeInfo serial_pci_info = { diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs -index eb88ca9..e967fb2 100644 +index a799c83..1c7ba0b 100644 --- a/hw/core/Makefile.objs +++ b/hw/core/Makefile.objs -@@ -16,10 +16,11 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o +@@ -16,9 +16,11 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o common-obj-$(CONFIG_SOFTMMU) += loader.o common-obj-$(CONFIG_FITLOADER) += loader-fit.o common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o @@ -435,19 +426,19 @@ index eb88ca9..e967fb2 100644 -common-obj-$(CONFIG_SOFTMMU) += or-irq.o -common-obj-$(CONFIG_SOFTMMU) += split-irq.o +# Disabled in Red Hat Enterprise Linux -+# common-obj-$(CONFIG_SOFTMMU) += register.o -+# obj-$(CONFIG_SOFTMMU) += generic-loader.o -+# common-obj-$(CONFIG_SOFTMMU) += or-irq.o ++#common-obj-$(CONFIG_SOFTMMU) += register.o ++#obj-$(CONFIG_SOFTMMU) += generic-loader.o ++#common-obj-$(CONFIG_SOFTMMU) += or-irq.o +#common-obj-$(CONFIG_SOFTMMU) += split-irq.o common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o - --obj-$(CONFIG_SOFTMMU) += generic-loader.o - obj-$(CONFIG_SOFTMMU) += null-machine.o -diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 7583b18..9fd5665 100644 ---- a/hw/display/cirrus_vga.c -+++ b/hw/display/cirrus_vga.c -@@ -3075,6 +3075,8 @@ static void isa_cirrus_vga_class_init(ObjectClass *klass, void *data) +-common-obj-$(CONFIG_SOFTMMU) += generic-loader.o ++#common-obj-$(CONFIG_SOFTMMU) += generic-loader.o + common-obj-$(CONFIG_SOFTMMU) += null-machine.o +diff --git a/hw/display/cirrus_vga_isa.c b/hw/display/cirrus_vga_isa.c +index fa10b74..1cb607d 100644 +--- a/hw/display/cirrus_vga_isa.c ++++ b/hw/display/cirrus_vga_isa.c +@@ -81,6 +81,8 @@ static void isa_cirrus_vga_class_init(ObjectClass *klass, void *data) dc->realize = isa_cirrus_vga_realizefn; dc->props = isa_cirrus_vga_properties; set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); @@ -457,10 +448,10 @@ index 7583b18..9fd5665 100644 static const TypeInfo isa_cirrus_vga_info = { diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 83a4444..11c287e 100644 +index f095725..567439e 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -1528,7 +1528,9 @@ static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl, bool no_vmport) +@@ -1533,7 +1533,9 @@ static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl, bool no_vmport) ISADevice *i8042, *port92, *vmmouse; serial_hds_isa_init(isa_bus, 0, MAX_ISA_SERIAL_PORTS); @@ -520,18 +511,18 @@ index 07c8801..c27a0f8 100644 static const TypeInfo i8042_info = { diff --git a/hw/isa/Makefile.objs b/hw/isa/Makefile.objs -index 83e06f6..7de4f44 100644 +index 9e106df..0828964 100644 --- a/hw/isa/Makefile.objs +++ b/hw/isa/Makefile.objs @@ -1,5 +1,5 @@ common-obj-$(CONFIG_ISA_BUS) += isa-bus.o --common-obj-$(CONFIG_ISA_BUS) += isa-superio.o smc37c669-superio.o -+#common-obj-$(CONFIG_ISA_BUS) += isa-superio.o smc37c669-superio.o +-common-obj-$(CONFIG_ISA_BUS) += isa-superio.o ++#common-obj-$(CONFIG_ISA_BUS) += isa-superio.o common-obj-$(CONFIG_APM) += apm.o common-obj-$(CONFIG_I82378) += i82378.o common-obj-$(CONFIG_PC87312) += pc87312.o diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs -index 9350900..9c2c404 100644 +index 680350b..ed543a6 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -9,7 +9,7 @@ common-obj-$(CONFIG_PCI_TESTDEV) += pci-testdev.o @@ -544,7 +535,7 @@ index 9350900..9c2c404 100644 # ARM devices diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c -index 6febbab..0786fb9 100644 +index ecfd10a..8059563 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -893,6 +893,13 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp) @@ -561,7 +552,7 @@ index 6febbab..0786fb9 100644 pci_conf = dev->config; pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; -@@ -1183,6 +1190,8 @@ static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) +@@ -1179,6 +1186,8 @@ static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) k->realize = ivshmem_doorbell_realize; dc->props = ivshmem_doorbell_properties; dc->vmsd = &ivshmem_doorbell_vmsd; @@ -570,7 +561,7 @@ index 6febbab..0786fb9 100644 } static const TypeInfo ivshmem_doorbell_info = { -@@ -1352,6 +1361,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data) +@@ -1349,6 +1358,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data) dc->desc = "Inter-VM shared memory (legacy)"; dc->props = ivshmem_properties; dc->vmsd = &ivshmem_vmsd; @@ -580,10 +571,10 @@ index 6febbab..0786fb9 100644 static const TypeInfo ivshmem_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 13a9494..742cd0a 100644 +index 5e144cb..2e07880 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1768,6 +1768,7 @@ static const E1000Info e1000_devices[] = { +@@ -1778,6 +1778,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -591,7 +582,7 @@ index 13a9494..742cd0a 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1780,6 +1781,7 @@ static const E1000Info e1000_devices[] = { +@@ -1790,6 +1791,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -599,23 +590,11 @@ index 13a9494..742cd0a 100644 }; static void e1000_register_types(void) -diff --git a/hw/nvram/Makefile.objs b/hw/nvram/Makefile.objs -index a912d25..cbc8bba 100644 ---- a/hw/nvram/Makefile.objs -+++ b/hw/nvram/Makefile.objs -@@ -1,6 +1,6 @@ - common-obj-$(CONFIG_DS1225Y) += ds1225y.o - common-obj-y += eeprom93xx.o --common-obj-$(CONFIG_I2C) += eeprom_at24c.o -+#common-obj-$(CONFIG_I2C) += eeprom_at24c.o - common-obj-y += fw_cfg.o - common-obj-y += chrp_nvram.o - common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c -index 0e60834..3ce4b14 100644 +index d9c70f7..f294fbc 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c -@@ -787,6 +787,7 @@ static const TypeInfo i440fx_info = { +@@ -801,6 +801,7 @@ static const TypeInfo i440fx_info = { }, }; @@ -623,7 +602,7 @@ index 0e60834..3ce4b14 100644 /* IGD Passthrough Host Bridge. */ typedef struct { uint8_t offset; -@@ -870,6 +871,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { +@@ -884,6 +885,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { .instance_size = sizeof(PCII440FXState), .class_init = igd_passthrough_i440fx_class_init, }; @@ -631,7 +610,7 @@ index 0e60834..3ce4b14 100644 static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, PCIBus *rootbus) -@@ -915,7 +917,9 @@ static const TypeInfo i440fx_pcihost_info = { +@@ -929,7 +931,9 @@ static const TypeInfo i440fx_pcihost_info = { static void i440fx_register_types(void) { type_register_static(&i440fx_info); @@ -641,45 +620,11 @@ index 0e60834..3ce4b14 100644 type_register_static(&piix3_pci_type_info); type_register_static(&piix3_info); type_register_static(&piix3_xen_info); -diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs -index bcab632..70e8780 100644 ---- a/hw/ppc/Makefile.objs -+++ b/hw/ppc/Makefile.objs -@@ -3,7 +3,7 @@ obj-y += ppc.o ppc_booke.o fdt.o - # IBM pSeries (sPAPR) - obj-$(CONFIG_PSERIES) += spapr.o spapr_caps.o spapr_vio.o spapr_events.o - obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o --obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o -+obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o - obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o - # IBM PowerNV - obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o pnv_psi.o pnv_occ.o pnv_bmc.o -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 421b2dd..2f8c304 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -1303,6 +1303,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, - /* /vdevice */ - spapr_dt_vdevice(spapr->vio_bus, fdt); - -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) { - ret = spapr_rng_populate_dt(fdt); - if (ret < 0) { -@@ -1310,7 +1311,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr, - exit(1); - } - } -- -+#endif - QLIST_FOREACH(phb, &spapr->phbs, list) { - ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt); - if (ret < 0) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 993759d..fb29eec 100644 +index 2398ce6..63a7bb6 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -378,10 +378,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -382,10 +382,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(sPAPRCPUCore), .class_size = sizeof(sPAPRCPUCoreClass), }, @@ -693,70 +638,22 @@ index 993759d..fb29eec 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/rdma/Makefile.objs b/hw/rdma/Makefile.objs -index 3504c39..02ca2a9 100644 +index bd36cbf..e87e7e5 100644 --- a/hw/rdma/Makefile.objs +++ b/hw/rdma/Makefile.objs @@ -1,5 +1,6 @@ - ifeq ($(CONFIG_RDMA),y) + ifeq ($(CONFIG_PVRDMA),y) obj-$(CONFIG_PCI) += rdma_utils.o rdma_backend.o rdma_rm.o obj-$(CONFIG_PCI) += vmw/pvrdma_dev_ring.o vmw/pvrdma_cmd.o \ - vmw/pvrdma_qp_ops.o vmw/pvrdma_main.o + vmw/pvrdma_qp_ops.o +#obj-$(CONFIG_PCI) += vmw/pvrdma_main.o endif -diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c -index 7ddb378..b131781 100644 ---- a/hw/s390x/virtio-ccw.c -+++ b/hw/s390x/virtio-ccw.c -@@ -925,6 +925,8 @@ static void virtio_ccw_rng_realize(VirtioCcwDevice *ccw_dev, Error **errp) - NULL); - } - -+#if 0 /* Disabled in Red Hat Enterprise Linux */ -+ - static void virtio_ccw_crypto_realize(VirtioCcwDevice *ccw_dev, Error **errp) - { - VirtIOCryptoCcw *dev = VIRTIO_CRYPTO_CCW(ccw_dev); -@@ -942,6 +944,7 @@ static void virtio_ccw_crypto_realize(VirtioCcwDevice *ccw_dev, Error **errp) - OBJECT(dev->vdev.conf.cryptodev), "cryptodev", - NULL); - } -+#endif - - static void virtio_ccw_gpu_realize(VirtioCcwDevice *ccw_dev, Error **errp) - { -@@ -1532,6 +1535,8 @@ static const TypeInfo virtio_ccw_rng = { - .class_init = virtio_ccw_rng_class_init, - }; - -+#if 0 /* Disabled in Red Hat Enterprise Linux */ -+ - static Property virtio_ccw_crypto_properties[] = { - DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, - VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), -@@ -1568,6 +1573,7 @@ static const TypeInfo virtio_ccw_crypto = { - .instance_init = virtio_ccw_crypto_instance_init, - .class_init = virtio_ccw_crypto_class_init, - }; -+#endif - - static Property virtio_ccw_gpu_properties[] = { - DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, -@@ -1888,7 +1894,9 @@ static void virtio_ccw_register(void) - #ifdef CONFIG_VHOST_VSOCK - type_register_static(&vhost_vsock_ccw_info); - #endif -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - type_register_static(&virtio_ccw_crypto); -+#endif - type_register_static(&virtio_ccw_gpu); - type_register_static(&virtio_ccw_input); - type_register_static(&virtio_ccw_input_hid); diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c -index 5c8b3c9..d1cbe54 100644 +index 25976ed..a793584 100644 --- a/hw/usb/ccid-card-emulated.c +++ b/hw/usb/ccid-card-emulated.c -@@ -585,6 +585,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) +@@ -600,6 +600,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_INPUT, dc->categories); dc->desc = "emulated smartcard"; dc->props = emulated_card_properties; @@ -766,19 +663,21 @@ index 5c8b3c9..d1cbe54 100644 static const TypeInfo emulated_card_info = { diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs -index a2e7a0a..d38205b 100644 +index 8b3f664..a6b6039 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs -@@ -2,7 +2,6 @@ ifeq ($(CONFIG_LINUX), y) +@@ -2,9 +2,6 @@ ifeq ($(CONFIG_LINUX), y) obj-$(CONFIG_SOFTMMU) += common.o obj-$(CONFIG_PCI) += pci.o pci-quirks.o display.o obj-$(CONFIG_VFIO_CCW) += ccw.o -obj-$(CONFIG_SOFTMMU) += platform.o - obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o - obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o +-obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o +-obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o obj-$(CONFIG_SOFTMMU) += spapr.o + obj-$(CONFIG_VFIO_AP) += ap.o + endif diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 481fd08..1c588f5 100644 +index eae31c7..4a6e98e 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1387,6 +1387,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) @@ -801,7 +700,7 @@ index 481fd08..1c588f5 100644 /* * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index 3a01fe9..3567faf 100644 +index a954799..9a987cb 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2003,7 +2003,7 @@ static const TypeInfo virtio_blk_pci_info = { @@ -841,10 +740,10 @@ index 3a01fe9..3567faf 100644 #endif #ifdef CONFIG_VHOST_VSOCK diff --git a/qemu-options.hx b/qemu-options.hx -index b1bf0f4..37f2aa8 100644 +index f7df472..cd2b25b 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -1811,11 +1811,6 @@ ETEXI +@@ -1741,11 +1741,6 @@ ETEXI DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) @@ -857,13 +756,13 @@ index b1bf0f4..37f2aa8 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs -index 53d3f32..b1360c4 100644 +index 5dd0aee..f27250e 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs -@@ -43,3 +43,4 @@ stub-obj-y += xen-common.o - stub-obj-y += xen-hvm.o +@@ -44,3 +44,4 @@ stub-obj-y += xen-hvm.o stub-obj-y += pci-host-piix.o stub-obj-y += ram-block.o + stub-obj-y += ramfb.o +stub-obj-y += ide-isa.o diff --git a/stubs/ide-isa.c b/stubs/ide-isa.c new file mode 100644 @@ -885,10 +784,10 @@ index 0000000..9fd50ef + abort(); +} diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 64a8005..bc8d09d 100644 +index 60411f6..d2ac5bb 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2029,7 +2029,9 @@ static void arm_cpu_register_types(void) +@@ -2149,7 +2149,9 @@ static void arm_cpu_register_types(void) type_register_static(&idau_interface_type_info); while (info->name) { @@ -900,10 +799,10 @@ index 64a8005..bc8d09d 100644 } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 723e022..338ee37 100644 +index f81d35e..e9b9183 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1366,14 +1366,14 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1443,14 +1443,14 @@ static X86CPUDefinition builtin_x86_defs[] = { .family = 6, .model = 6, .stepping = 3, @@ -926,7 +825,7 @@ index 723e022..338ee37 100644 .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, -@@ -1603,6 +1603,25 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1680,6 +1680,25 @@ static X86CPUDefinition builtin_x86_defs[] = { .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", }, { @@ -953,7 +852,7 @@ index 723e022..338ee37 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 6c9bfde..77cb298 100644 +index 7c75963..7f179ff 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -65,6 +65,7 @@ @@ -964,13 +863,22 @@ index 6c9bfde..77cb298 100644 /* Embedded PowerPC */ /* PowerPC 401 family */ POWERPC_DEF("401", CPU_POWERPC_401, 401, -@@ -739,10 +740,13 @@ +@@ -739,8 +740,10 @@ "PowerPC 7447A v1.2 (G4)") POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, "PowerPC 7457A v1.2 (G4)") +#endif /* 64 bits PowerPC */ #if defined (TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, + "PowerPC 970 v2.2") + POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, +@@ -757,8 +760,11 @@ + "PowerPC 970MP v1.0") + POWERPC_DEF("970mp_v1.1", CPU_POWERPC_970MP_v11, 970, + "PowerPC 970MP v1.1") ++#endif +#if 0 /* Disabled for Red Hat Enterprise Linux */ POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, "POWER5+ v2.1") @@ -978,32 +886,7 @@ index 6c9bfde..77cb298 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -753,14 +757,17 @@ - "POWER8 v2.0") - POWERPC_DEF("power8nvl_v1.0", CPU_POWERPC_POWER8NVL_v10, POWER8, - "POWER8NVL v1.0") -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, - "PowerPC 970 v2.2") -+#endif - - POWERPC_DEF("power9_v1.0", CPU_POWERPC_POWER9_DD1, POWER9, - "POWER9 v1.0") - POWERPC_DEF("power9_v2.0", CPU_POWERPC_POWER9_DD20, POWER9, - "POWER9 v2.0") - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, - "PowerPC 970FX v1.0 (G5)") - POWERPC_DEF("970fx_v2.0", CPU_POWERPC_970FX_v20, 970, -@@ -775,12 +782,14 @@ - "PowerPC 970MP v1.0") - POWERPC_DEF("970mp_v1.1", CPU_POWERPC_970MP_v11, 970, - "PowerPC 970MP v1.1") -+#endif - #endif /* defined (TARGET_PPC64) */ - - /***************************************************************************/ +@@ -779,6 +785,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -1011,34 +894,31 @@ index 6c9bfde..77cb298 100644 { "403", "403gc" }, { "405", "405d4" }, { "405cr", "405crc" }, -@@ -939,20 +948,25 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -937,12 +944,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, +#endif #if defined(TARGET_PPC64) +#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "970", "970_v2.2" }, + { "970fx", "970fx_v3.1" }, + { "970mp", "970mp_v1.1" }, { "power5+", "power5+_v2.1" }, { "power5gs", "power5+_v2.1" }, +#endif { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, - { "power8", "power8_v2.0" }, - { "power8nvl", "power8nvl_v1.0" }, +@@ -951,6 +961,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power9", "power9_v2.0" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "970", "970_v2.2" }, - { "970fx", "970fx_v3.1" }, - { "970mp", "970mp_v1.1" }, #endif -- -+#endif + +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* Generic PowerPCs */ #if defined(TARGET_PPC64) { "ppc64", "970fx_v3.1" }, -@@ -960,5 +974,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -958,5 +969,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "ppc32", "604" }, { "ppc", "604" }, { "default", "604" }, @@ -1046,10 +926,10 @@ index 6c9bfde..77cb298 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 604898a..9c469ff 100644 +index 7c253ff..e73f812 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c -@@ -373,6 +373,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -380,6 +380,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -1060,10 +940,10 @@ index 604898a..9c469ff 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index d923cf4..bbcbeed 100644 +index 2ebf26a..3a0337a 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2277,6 +2277,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2342,6 +2342,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -1079,10 +959,10 @@ index d923cf4..bbcbeed 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/vl.c b/vl.c -index 16b913f..4f96203 100644 +index fa25d1a..39d152a 100644 --- a/vl.c +++ b/vl.c -@@ -164,7 +164,7 @@ unsigned int max_cpus; +@@ -171,7 +171,7 @@ unsigned int max_cpus; int smp_cores = 1; int smp_threads = 1; int acpi_enabled = 1; diff --git a/0006-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch similarity index 95% rename from 0006-Machine-type-related-general-changes.patch rename to 0007-Machine-type-related-general-changes.patch index 7693f8a..62a0b14 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,6 +1,6 @@ -From e34179d713443601a16936e2e80b8fbd044429be Mon Sep 17 00:00:00 2001 +From c59789ec7d5213bda9aeb48aacef2e3e897fdf7e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Thu, 8 Nov 2018 11:59:55 +0100 +Date: Fri, 19 Oct 2018 12:36:59 +0200 Subject: Machine type related general changes This patch is first part of original "Add RHEL machine types" patch we @@ -12,7 +12,7 @@ Signed-off-by: Miroslav Rezanina hw/acpi/ich9.c | 16 +++ hw/acpi/piix4.c | 6 +- hw/char/serial.c | 16 +++ - hw/display/cirrus_vga.c | 2 +- + hw/display/cirrus_vga_isa.c | 2 +- hw/display/vga-isa.c | 2 +- hw/net/e1000.c | 18 ++- hw/net/e1000e.c | 21 ++++ @@ -67,7 +67,7 @@ index a4e87b8..23a7baa 100644 ich9_pm_get_disable_s3, ich9_pm_set_disable_s3, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 6404af5..0f1f9e2 100644 +index e330f24..b213f65 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -310,7 +310,7 @@ static const VMStateDescription vmstate_cpuhp_state = { @@ -91,7 +91,7 @@ index 6404af5..0f1f9e2 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), diff --git a/hw/char/serial.c b/hw/char/serial.c -index 251f40f..8e3520c 100644 +index 02463e3..a591387 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -30,6 +30,7 @@ @@ -145,11 +145,11 @@ index 251f40f..8e3520c 100644 return s->poll_msl >= 0; } -diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 9fd5665..6910014 100644 ---- a/hw/display/cirrus_vga.c -+++ b/hw/display/cirrus_vga.c -@@ -3061,7 +3061,7 @@ static void isa_cirrus_vga_realizefn(DeviceState *dev, Error **errp) +diff --git a/hw/display/cirrus_vga_isa.c b/hw/display/cirrus_vga_isa.c +index 1cb607d..22678a3 100644 +--- a/hw/display/cirrus_vga_isa.c ++++ b/hw/display/cirrus_vga_isa.c +@@ -67,7 +67,7 @@ static void isa_cirrus_vga_realizefn(DeviceState *dev, Error **errp) static Property isa_cirrus_vga_properties[] = { DEFINE_PROP_UINT32("vgamem_mb", struct ISACirrusVGAState, @@ -172,10 +172,10 @@ index fa44242..7835c83 100644 }; diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 742cd0a..7d568da 100644 +index 2e07880..e886e7c 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1663,6 +1663,16 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) +@@ -1673,6 +1673,16 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) pci_conf = pci_dev->config; @@ -192,7 +192,7 @@ index 742cd0a..7d568da 100644 /* TODO: RST# value should be 0, PCI spec 6.2.4 */ pci_conf[PCI_CACHE_LINE_SIZE] = 0x10; -@@ -1763,7 +1773,7 @@ static const TypeInfo e1000_base_info = { +@@ -1773,7 +1783,7 @@ static const TypeInfo e1000_base_info = { static const E1000Info e1000_devices[] = { { @@ -201,7 +201,7 @@ index 742cd0a..7d568da 100644 .device_id = E1000_DEV_ID_82540EM, .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, -@@ -1784,6 +1794,11 @@ static const E1000Info e1000_devices[] = { +@@ -1794,6 +1804,11 @@ static const E1000Info e1000_devices[] = { #endif }; @@ -213,7 +213,7 @@ index 742cd0a..7d568da 100644 static void e1000_register_types(void) { int i; -@@ -1801,6 +1816,7 @@ static void e1000_register_types(void) +@@ -1811,6 +1826,7 @@ static void e1000_register_types(void) type_register(&type_info); } @@ -296,7 +296,7 @@ index 510ddb3..f1de9e5 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 46daa16..05453e7 100644 +index 2342a09..0c916b7 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3174,7 +3174,7 @@ static int rtl8139_pre_save(void *opaque) @@ -319,7 +319,7 @@ index 46daa16..05453e7 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index a27e54b..144e6e9 100644 +index 9209394..43cf057 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -775,6 +775,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, @@ -344,18 +344,18 @@ index 6190b6f..ad2ad2d 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c -index 6f1f723..68c353f 100644 +index e4e4de8..3eced9c 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c -@@ -34,6 +34,7 @@ - #include "qapi/qapi-commands-misc.h" +@@ -35,6 +35,7 @@ #include "qapi/qapi-events-misc.h" #include "qapi/visitor.h" + #include "exec/address-spaces.h" +#include "migration/migration.h" #ifdef TARGET_I386 #include "hw/i386/apic.h" -@@ -839,6 +840,11 @@ static int rtc_post_load(void *opaque, int version_id) +@@ -841,6 +842,11 @@ static int rtc_post_load(void *opaque, int version_id) static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) { RTCState *s = (RTCState *)opaque; @@ -456,10 +456,10 @@ index 59aeb06..7b5cc25 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/compat.h b/include/hw/compat.h -index c08f404..22262c7 100644 +index 6f4d5fc..f08cc7c 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h -@@ -282,4 +282,233 @@ +@@ -285,4 +285,233 @@ .value = "on",\ }, @@ -707,10 +707,10 @@ index a5080ad..b943ec9 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index b7d9854..381039c 100644 +index b261c1e..fb425b5 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -106,6 +106,8 @@ enum mig_rp_message_type { +@@ -105,6 +105,8 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; @@ -720,10 +720,10 @@ index b7d9854..381039c 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 64a7b33..405d984 100644 +index e413d4d..795238c 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -288,6 +288,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); +@@ -292,6 +292,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); void dirty_bitmap_mig_before_vm_start(void); void init_dirty_bitmap_incoming_migration(void); @@ -736,7 +736,7 @@ index 64a7b33..405d984 100644 #define qemu_ram_foreach_block \ #warning "Use qemu_ram_foreach_block_migratable in migration code" diff --git a/qdev-monitor.c b/qdev-monitor.c -index 61e0300..f439b83 100644 +index 07147c6..47ea051 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -47,7 +47,6 @@ typedef struct QDevAlias diff --git a/0007-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch similarity index 92% rename from 0007-Add-aarch64-machine-types.patch rename to 0008-Add-aarch64-machine-types.patch index c546843..ede1ec5 100644 --- a/0007-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,6 +1,6 @@ -From 2c0d79871ccb5383b1a91e5fc9139b6f8e8ed8e0 Mon Sep 17 00:00:00 2001 +From 6df04926524e1a9f1178b53bf2b7b8978a6d5935 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Thu, 8 Nov 2018 12:00:54 +0100 +Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types Adding changes to add RHEL machine types for aarch64 architecture. @@ -12,7 +12,7 @@ Signed-off-by: Miroslav Rezanina 2 files changed, 147 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 281ddcd..b02e4a0 100644 +index a2b8d8f..703f0dd 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -60,6 +60,7 @@ @@ -61,7 +61,7 @@ index 281ddcd..b02e4a0 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1539,6 +1569,7 @@ static void machvirt_init(MachineState *machine) +@@ -1577,6 +1607,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -69,7 +69,7 @@ index 281ddcd..b02e4a0 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1567,6 +1598,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -1605,6 +1636,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -77,7 +77,7 @@ index 281ddcd..b02e4a0 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1621,6 +1653,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) +@@ -1659,6 +1691,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) } } @@ -85,7 +85,7 @@ index 281ddcd..b02e4a0 100644 static char *virt_get_iommu(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1648,6 +1681,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) +@@ -1686,6 +1719,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) error_append_hint(errp, "Valid values are none, smmuv3.\n"); } } @@ -93,7 +93,7 @@ index 281ddcd..b02e4a0 100644 static CpuInstanceProperties virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) -@@ -1687,6 +1721,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +@@ -1725,6 +1759,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) return ms->possible_cpus; } @@ -101,9 +101,9 @@ index 281ddcd..b02e4a0 100644 static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { -@@ -1835,6 +1870,9 @@ static void virt_machine_3_0_options(MachineClass *mc) - } - DEFINE_VIRT_MACHINE_AS_LATEST(3, 0) +@@ -1889,6 +1924,9 @@ DEFINE_VIRT_MACHINE(3, 0) + #define VIRT_COMPAT_2_12 \ + HW_COMPAT_2_12 +#define VIRT_COMPAT_2_12 \ + HW_COMPAT_2_12 @@ -111,7 +111,7 @@ index 281ddcd..b02e4a0 100644 static void virt_2_12_instance_init(Object *obj) { virt_3_0_instance_init(obj); -@@ -1960,3 +1998,89 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2016,3 +2054,89 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -202,10 +202,10 @@ index 281ddcd..b02e4a0 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(7, 6, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 9a870cc..2293315 100644 +index 4cc57a7..3237e97 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -128,6 +128,7 @@ typedef struct { +@@ -130,6 +130,7 @@ typedef struct { #define VIRT_ECAM_ID(high) (high ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM) @@ -213,7 +213,7 @@ index 9a870cc..2293315 100644 #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -136,6 +137,27 @@ typedef struct { +@@ -138,6 +139,27 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) diff --git a/0008-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch similarity index 91% rename from 0008-Add-ppc64-machine-types.patch rename to 0009-Add-ppc64-machine-types.patch index dbe223e..2fd63fd 100644 --- a/0008-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,6 +1,6 @@ -From b6c41d9cfe7ae58455737c967f2e47d6bc99d21e Mon Sep 17 00:00:00 2001 +From 0f1a361c4bd8fc0874cc5d05e611fadb67524a1e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Thu, 8 Nov 2018 12:01:38 +0100 +Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types Adding changes to add RHEL machine types for ppc64 architecture. @@ -15,26 +15,26 @@ Signed-off-by: Miroslav Rezanina 5 files changed, 279 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 2f8c304..b8bdb69 100644 +index 7afd1a1..76a4e83 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4009,6 +4009,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; - smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ +@@ -3906,6 +3906,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; spapr_caps_add_properties(smc, &error_abort); + smc->irq = &spapr_irq_xics; + smc->has_power9_support = true; } static const TypeInfo spapr_machine_info = { -@@ -4059,6 +4060,7 @@ static const TypeInfo spapr_machine_info = { +@@ -3956,6 +3957,7 @@ static const TypeInfo spapr_machine_info = { } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* - * pseries-3.0 + /* + * pseries-3.1 */ -@@ -4248,6 +4250,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4169,6 +4171,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); .property = "pre-2.8-migration", \ .value = "on", \ }, @@ -42,7 +42,7 @@ index 2f8c304..b8bdb69 100644 static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4298,6 +4301,7 @@ static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, +@@ -4219,6 +4222,7 @@ static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, */ } @@ -50,7 +50,7 @@ index 2f8c304..b8bdb69 100644 static void spapr_machine_2_7_instance_options(MachineState *machine) { sPAPRMachineState *spapr = SPAPR_MACHINE(machine); -@@ -4457,6 +4461,254 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4378,6 +4382,254 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_1); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -306,10 +306,10 @@ index 2f8c304..b8bdb69 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index fb29eec..a081b01 100644 +index 63a7bb6..fcf6174 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -21,6 +21,7 @@ +@@ -22,6 +22,7 @@ #include "sysemu/numa.h" #include "sysemu/hw_accel.h" #include "qemu/error-report.h" @@ -317,15 +317,15 @@ index fb29eec..a081b01 100644 static void spapr_cpu_reset(void *opaque) { -@@ -212,6 +213,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, - { +@@ -218,6 +219,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); Error *local_err = NULL; + sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { -@@ -224,6 +226,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, +@@ -230,6 +232,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -344,13 +344,13 @@ index fb29eec..a081b01 100644 spapr_cpu_reset(cpu); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 7e5de1a..330c370 100644 +index 6279711..d2370e5 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -101,6 +101,7 @@ struct sPAPRMachineClass { - bool dr_lmb_enabled; /* enable dynamic-reconfig/hotplug of LMBs */ - bool use_ohci_by_default; /* use USB-OHCI instead of XHCI */ +@@ -106,6 +106,7 @@ struct sPAPRMachineClass { bool pre_2_10_has_unused_icps; + bool legacy_irq_allocation; + + bool has_power9_support; void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, @@ -381,10 +381,10 @@ index 7de4bf3..3e2e353 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 4edcf62..532f0d5 100644 +index ab68abe..c559740 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1365,6 +1365,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) +@@ -1376,6 +1376,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) diff --git a/0009-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch similarity index 78% rename from 0009-Add-s390x-machine-types.patch rename to 0010-Add-s390x-machine-types.patch index 92b5cbd..7159bb4 100644 --- a/0009-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,20 +1,23 @@ -From 05b950dccdf9e8f58f3358730aa4705642d0196f Mon Sep 17 00:00:00 2001 +From a47c6d2b9d75dcb15810fcfedcddf5eadf0ec227 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Thu, 8 Nov 2018 12:02:37 +0100 +Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina + +Merged patches (3.1.0): +- 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later --- - hw/s390x/s390-virtio-ccw.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 45 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 50 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 7983185..0f135c9 100644 +index a0615a8..04f4c1a 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -649,7 +649,7 @@ bool css_migration_enabled(void) +@@ -627,7 +627,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -23,7 +26,7 @@ index 7983185..0f135c9 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = 1; \ -@@ -676,6 +676,8 @@ bool css_migration_enabled(void) +@@ -657,6 +657,8 @@ bool css_migration_enabled(void) #define CCW_COMPAT_2_12 \ HW_COMPAT_2_12 @@ -32,7 +35,7 @@ index 7983185..0f135c9 100644 #define CCW_COMPAT_2_11 \ HW_COMPAT_2_11 \ {\ -@@ -898,6 +900,48 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -894,6 +896,52 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) } DEFINE_CCW_MACHINE(2_4, "2.4", false); @@ -67,6 +70,10 @@ index 7983185..0f135c9 100644 + /* before 2.12 we emulated the very first z900, and RHEL 7.5 is + based on 2.10 */ + s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); ++ ++ /* bpb and ppa15 were only in the full model in RHEL 7.5 */ ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); +} + +static void ccw_machine_rhel750_class_options(MachineClass *mc) diff --git a/0010-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch similarity index 96% rename from 0010-Add-x86_64-machine-types.patch rename to 0011-Add-x86_64-machine-types.patch index 30ec2d5..84feac0 100644 --- a/0010-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,6 +1,6 @@ -From b95483e9a18050c7dac0e6c17b049f0733a409cd Mon Sep 17 00:00:00 2001 +From edae60c4f30697c3c859cc9c88f80c0ed3dc0f0e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Thu, 8 Nov 2018 12:03:11 +0100 +Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types Adding changes to add RHEL machine types for x86_64 architecture. @@ -17,7 +17,7 @@ Signed-off-by: Miroslav Rezanina 7 files changed, 884 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index e1ee8ae..be9bdb5 100644 +index 236a20e..3360da9 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -184,6 +184,9 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) @@ -31,10 +31,10 @@ index e1ee8ae..be9bdb5 100644 } assert(obj); diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 11c287e..253d48d 100644 +index 567439e..a609332 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -1419,7 +1419,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -1424,7 +1424,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -44,7 +44,7 @@ index 11c287e..253d48d 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -2387,6 +2388,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -2389,6 +2390,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->save_tsc_khz = true; pcmc->linuxboot_dma_enabled = true; assert(!mc->get_hotplug_handler); @@ -52,7 +52,7 @@ index 11c287e..253d48d 100644 mc->get_hotplug_handler = pc_get_hotpug_handler; mc->cpu_index_to_instance_props = pc_cpu_index_to_props; mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; -@@ -2396,7 +2398,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -2398,7 +2400,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->default_boot_order = "cad"; mc->hot_add_cpu = pc_hot_add_cpu; mc->block_default_type = IF_IDE; @@ -63,7 +63,7 @@ index 11c287e..253d48d 100644 hc->pre_plug = pc_machine_device_pre_plug_cb; hc->plug = pc_machine_device_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index dc09466..f0484ec 100644 +index 7092d6d..83c22ae 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -50,6 +50,7 @@ @@ -93,15 +93,15 @@ index dc09466..f0484ec 100644 static void pc_compat_2_3(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); -@@ -433,6 +435,7 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) +@@ -433,6 +435,7 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m) pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); } - DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, -@@ -1148,3 +1151,190 @@ static void xenfv_machine_options(MachineClass *m) + DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL, +@@ -1157,3 +1160,190 @@ static void xenfv_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, xenfv_machine_options); #endif @@ -293,7 +293,7 @@ index dc09466..f0484ec 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 532241e..c1024c5 100644 +index 4702bb1..163546e 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -145,8 +145,8 @@ static void pc_q35_init(MachineState *machine) @@ -315,15 +315,15 @@ index 532241e..c1024c5 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -315,6 +316,7 @@ static void pc_q35_3_0_machine_options(MachineClass *m) +@@ -315,6 +316,7 @@ static void pc_q35_3_1_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; + SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); } - DEFINE_Q35_MACHINE(v3_0, "pc-q35-3.0", NULL, -@@ -416,3 +418,90 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + DEFINE_Q35_MACHINE(v3_1, "pc-q35-3.1", NULL, +@@ -425,3 +427,90 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -415,7 +415,7 @@ index 532241e..c1024c5 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 6894f37..ef82513 100644 +index 136fe49..f8f35af 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -134,6 +134,9 @@ struct PCMachineClass { @@ -428,7 +428,7 @@ index 6894f37..ef82513 100644 }; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -960,4 +963,565 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +@@ -976,4 +979,565 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); type_init(pc_machine_init_##suffix) extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); @@ -995,10 +995,10 @@ index 6894f37..ef82513 100644 + }, #endif diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 338ee37..051018a 100644 +index e9b9183..573de14 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1360,11 +1360,17 @@ static CPUCaches epyc_cache_info = { +@@ -1437,11 +1437,17 @@ static CPUCaches epyc_cache_info = { static X86CPUDefinition builtin_x86_defs[] = { { @@ -1017,7 +1017,7 @@ index 338ee37..051018a 100644 .stepping = 3, .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -2684,6 +2690,7 @@ static PropValue kvm_default_props[] = { +@@ -2934,6 +2940,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -1026,10 +1026,10 @@ index 338ee37..051018a 100644 }; diff --git a/target/i386/machine.c b/target/i386/machine.c -index 084c2c7..0c57c26 100644 +index 225b5d4..c60e1b8 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c -@@ -955,6 +955,26 @@ static const VMStateDescription vmstate_svm_npt = { +@@ -964,6 +964,26 @@ static const VMStateDescription vmstate_svm_npt = { } }; @@ -1056,7 +1056,7 @@ index 084c2c7..0c57c26 100644 VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, -@@ -1080,6 +1100,7 @@ VMStateDescription vmstate_x86_cpu = { +@@ -1089,6 +1109,7 @@ VMStateDescription vmstate_x86_cpu = { &vmstate_msr_intel_pt, &vmstate_msr_virt_ssbd, &vmstate_svm_npt, diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch deleted file mode 100644 index 1be86ed..0000000 --- a/0011-Enable-make-check.patch +++ /dev/null @@ -1,498 +0,0 @@ -From f4e3d697cb6a18301b1279c0b07896eb5b228aa9 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 8 Nov 2018 12:03:48 +0100 -Subject: Enable make check - -Fixing tests after device disabling and machine types changes and enabling -make check run during build. - -Signed-off-by: Miroslav Rezanina ---- - redhat/qemu-kvm.spec.template | 2 +- - tests/Makefile.include | 123 +++++++++++++++++++++--------------------- - tests/boot-serial-test.c | 6 ++- - tests/cpu-plug-test.c | 3 +- - tests/e1000-test.c | 2 + - tests/endianness-test.c | 2 + - tests/prom-env-test.c | 2 + - tests/qemu-iotests/051 | 12 ++--- - tests/qemu-iotests/group | 4 +- - tests/qom-test.c | 2 +- - tests/test-x86-cpuid-compat.c | 2 + - tests/usb-hcd-xhci-test.c | 4 ++ - 12 files changed, 91 insertions(+), 73 deletions(-) - -diff --git a/tests/Makefile.include b/tests/Makefile.include -index a492827..4b78396 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -184,8 +184,8 @@ gcov-files-generic-y = qdev-monitor.c qmp.c - check-qtest-generic-y += tests/cdrom-test$(EXESUF) - - gcov-files-ipack-y += hw/ipack/ipack.c --check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF) --gcov-files-ipack-y += hw/char/ipoctal232.c -+#check-qtest-ipack-y += tests/ipoctal232-test$(EXESUF) -+#gcov-files-ipack-y += hw/char/ipoctal232.c - - check-qtest-virtioserial-y += tests/virtio-console-test$(EXESUF) - gcov-files-virtioserial-y += hw/char/virtio-console.c -@@ -217,23 +217,23 @@ check-qtest-pci-y += tests/e1000e-test$(EXESUF) - gcov-files-pci-y += hw/net/e1000e.c hw/net/e1000e_core.c - check-qtest-pci-y += tests/rtl8139-test$(EXESUF) - gcov-files-pci-y += hw/net/rtl8139.c --check-qtest-pci-y += tests/pcnet-test$(EXESUF) --gcov-files-pci-y += hw/net/pcnet.c --gcov-files-pci-y += hw/net/pcnet-pci.c --check-qtest-pci-y += tests/eepro100-test$(EXESUF) --gcov-files-pci-y += hw/net/eepro100.c --check-qtest-pci-y += tests/ne2000-test$(EXESUF) --gcov-files-pci-y += hw/net/ne2000.c --check-qtest-pci-y += tests/nvme-test$(EXESUF) --gcov-files-pci-y += hw/block/nvme.c -+#check-qtest-pci-y += tests/pcnet-test$(EXESUF) -+#gcov-files-pci-y += hw/net/pcnet.c -+#gcov-files-pci-y += hw/net/pcnet-pci.c -+#check-qtest-pci-y += tests/eepro100-test$(EXESUF) -+#gcov-files-pci-y += hw/net/eepro100.c -+#check-qtest-pci-y += tests/ne2000-test$(EXESUF) -+#gcov-files-pci-y += hw/net/ne2000.c -+#check-qtest-pci-y += tests/nvme-test$(EXESUF) -+#gcov-files-pci-y += hw/block/nvme.c - check-qtest-pci-y += tests/ac97-test$(EXESUF) - gcov-files-pci-y += hw/audio/ac97.c --check-qtest-pci-y += tests/es1370-test$(EXESUF) --gcov-files-pci-y += hw/audio/es1370.c -+#check-qtest-pci-y += tests/es1370-test$(EXESUF) -+#gcov-files-pci-y += hw/audio/es1370.c - check-qtest-pci-y += $(check-qtest-virtio-y) - gcov-files-pci-y += $(gcov-files-virtio-y) hw/virtio/virtio-pci.c --check-qtest-pci-y += tests/tpci200-test$(EXESUF) --gcov-files-pci-y += hw/ipack/tpci200.c -+#check-qtest-pci-y += tests/tpci200-test$(EXESUF) -+#gcov-files-pci-y += hw/ipack/tpci200.c - check-qtest-pci-y += $(check-qtest-ipack-y) - gcov-files-pci-y += $(gcov-files-ipack-y) - check-qtest-pci-y += tests/display-vga-test$(EXESUF) -@@ -245,25 +245,25 @@ gcov-files-pci-y += hw/display/virtio-gpu-pci.c - gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c - check-qtest-pci-y += tests/intel-hda-test$(EXESUF) - gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c --check-qtest-pci-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) --gcov-files-pci-y += hw/misc/ivshmem.c --check-qtest-pci-y += tests/megasas-test$(EXESUF) --gcov-files-pci-y += hw/scsi/megasas.c -+check-qtest-pci-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF) -+gcov-files-pci-$(CONFIG_IVSHMEM_DEVICE) += hw/misc/ivshmem.c -+#check-qtest-pci-y += tests/megasas-test$(EXESUF) -+#gcov-files-pci-y += hw/scsi/megasas.c - - check-qtest-i386-y = tests/endianness-test$(EXESUF) --check-qtest-i386-y += tests/fdc-test$(EXESUF) --gcov-files-i386-y = hw/block/fdc.c -+#check-qtest-i386-y += tests/fdc-test$(EXESUF) -+#gcov-files-i386-y = hw/block/fdc.c - check-qtest-i386-y += tests/ide-test$(EXESUF) - check-qtest-i386-y += tests/ahci-test$(EXESUF) - check-qtest-i386-y += tests/hd-geo-test$(EXESUF) - gcov-files-i386-y += hw/block/hd-geometry.c - check-qtest-i386-y += tests/boot-order-test$(EXESUF) --check-qtest-i386-y += tests/bios-tables-test$(EXESUF) -+#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) - check-qtest-i386-y += tests/boot-serial-test$(EXESUF) - check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) - check-qtest-i386-y += tests/rtc-test$(EXESUF) --check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) --check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) -+#check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) -+#check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) - check-qtest-i386-y += tests/i440fx-test$(EXESUF) - check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) - check-qtest-i386-y += tests/drive_del-test$(EXESUF) -@@ -272,8 +272,8 @@ check-qtest-i386-y += tests/tco-test$(EXESUF) - gcov-files-i386-y += hw/watchdog/watchdog.c hw/watchdog/wdt_ib700.c - check-qtest-i386-y += $(check-qtest-pci-y) - gcov-files-i386-y += $(gcov-files-pci-y) --check-qtest-i386-y += tests/vmxnet3-test$(EXESUF) --gcov-files-i386-y += hw/net/vmxnet3.c -+#check-qtest-i386-y += tests/vmxnet3-test$(EXESUF) -+#gcov-files-i386-y += hw/net/vmxnet3.c - gcov-files-i386-y += hw/net/net_rx_pkt.c - gcov-files-i386-y += hw/net/net_tx_pkt.c - check-qtest-i386-y += tests/pvpanic-test$(EXESUF) -@@ -282,8 +282,8 @@ check-qtest-i386-y += tests/i82801b11-test$(EXESUF) - gcov-files-i386-y += hw/pci-bridge/i82801b11.c - check-qtest-i386-y += tests/ioh3420-test$(EXESUF) - gcov-files-i386-y += hw/pci-bridge/ioh3420.c --check-qtest-i386-y += tests/usb-hcd-ohci-test$(EXESUF) --gcov-files-i386-y += hw/usb/hcd-ohci.c -+#check-qtest-i386-y += tests/usb-hcd-ohci-test$(EXESUF) -+#gcov-files-i386-y += hw/usb/hcd-ohci.c - check-qtest-i386-y += tests/usb-hcd-uhci-test$(EXESUF) - gcov-files-i386-y += hw/usb/hcd-uhci.c - check-qtest-i386-y += tests/usb-hcd-ehci-test$(EXESUF) -@@ -311,7 +311,7 @@ check-qtest-i386-y += tests/migration-test$(EXESUF) - check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF) - check-qtest-i386-y += tests/numa-test$(EXESUF) - check-qtest-x86_64-y += $(check-qtest-i386-y) --check-qtest-x86_64-y += tests/sdhci-test$(EXESUF) -+#check-qtest-x86_64-y += tests/sdhci-test$(EXESUF) - gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c - gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) - -@@ -332,34 +332,35 @@ check-qtest-mips64el-y = tests/endianness-test$(EXESUF) - check-qtest-moxie-y = tests/boot-serial-test$(EXESUF) - - check-qtest-ppc-y = tests/endianness-test$(EXESUF) --check-qtest-ppc-y += tests/boot-order-test$(EXESUF) -+#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) - check-qtest-ppc-y += tests/prom-env-test$(EXESUF) - check-qtest-ppc-y += tests/drive_del-test$(EXESUF) - check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) --check-qtest-ppc-y += tests/m48t59-test$(EXESUF) --gcov-files-ppc-y += hw/timer/m48t59.c -+#check-qtest-ppc-y += tests/m48t59-test$(EXESUF) -+#gcov-files-ppc-y += hw/timer/m48t59.c - - check-qtest-ppc64-y = $(check-qtest-ppc-y) - gcov-files-ppc64-y = $(subst ppc-softmmu/,ppc64-softmmu/,$(gcov-files-ppc-y)) - check-qtest-ppc64-y += tests/spapr-phb-test$(EXESUF) - gcov-files-ppc64-y += ppc64-softmmu/hw/ppc/spapr_pci.c --check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) -+#check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) - check-qtest-ppc64-y += tests/migration-test$(EXESUF) - check-qtest-ppc64-y += tests/rtas-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) --check-qtest-ppc64-y += tests/usb-hcd-ohci-test$(EXESUF) --gcov-files-ppc64-y += hw/usb/hcd-ohci.c --check-qtest-ppc64-y += tests/usb-hcd-uhci-test$(EXESUF) --gcov-files-ppc64-y += hw/usb/hcd-uhci.c -+#check-qtest-ppc64-y += tests/usb-hcd-ohci-test$(EXESUF) -+#gcov-files-ppc64-y += hw/usb/hcd-ohci.c -+#check-qtest-ppc64-y += tests/usb-hcd-uhci-test$(EXESUF) -+#gcov-files-ppc64-y += hw/usb/hcd-uhci.c - check-qtest-ppc64-y += tests/usb-hcd-xhci-test$(EXESUF) - gcov-files-ppc64-y += hw/usb/hcd-xhci.c - check-qtest-ppc64-y += $(check-qtest-virtio-y) --check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) --check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) --check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) - check-qtest-ppc64-y += tests/display-vga-test$(EXESUF) - check-qtest-ppc64-y += tests/numa-test$(EXESUF) --check-qtest-ppc64-$(CONFIG_IVSHMEM) += tests/ivshmem-test$(EXESUF) -+check-qtest-ppc64-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF) -+gcov-files-ppc64-$(CONFIG_IVSHMEM_DEVICE) += hw/misc/ivshmem.c - check-qtest-ppc64-y += tests/cpu-plug-test$(EXESUF) - - check-qtest-sh4-y = tests/endianness-test$(EXESUF) -@@ -388,7 +389,7 @@ check-qtest-arm-y += tests/boot-serial-test$(EXESUF) - check-qtest-arm-y += tests/sdhci-test$(EXESUF) - - check-qtest-aarch64-y = tests/numa-test$(EXESUF) --check-qtest-aarch64-y += tests/sdhci-test$(EXESUF) -+#check-qtest-aarch64-y += tests/sdhci-test$(EXESUF) - check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF) - - check-qtest-microblazeel-y = $(check-qtest-microblaze-y) -@@ -777,15 +778,15 @@ tests/endianness-test$(EXESUF): tests/endianness-test.o - tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y) - tests/prom-env-test$(EXESUF): tests/prom-env-test.o $(libqos-obj-y) - tests/rtas-test$(EXESUF): tests/rtas-test.o $(libqos-spapr-obj-y) --tests/fdc-test$(EXESUF): tests/fdc-test.o -+#tests/fdc-test$(EXESUF): tests/fdc-test.o - tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y) - tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) --tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o --tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o -+#tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o -+#tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o - tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o - tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) - tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y) --tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ -+#tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ - tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y) - tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) - tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) -@@ -798,11 +799,11 @@ tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y) - tests/e1000-test$(EXESUF): tests/e1000-test.o - tests/e1000e-test$(EXESUF): tests/e1000e-test.o $(libqos-pc-obj-y) - tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y) --tests/pcnet-test$(EXESUF): tests/pcnet-test.o --tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o --tests/eepro100-test$(EXESUF): tests/eepro100-test.o --tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o --tests/ne2000-test$(EXESUF): tests/ne2000-test.o -+#tests/pcnet-test$(EXESUF): tests/pcnet-test.o -+#tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o -+#tests/eepro100-test$(EXESUF): tests/eepro100-test.o -+#tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o -+#tests/ne2000-test$(EXESUF): tests/ne2000-test.o - tests/wdt_ib700-test$(EXESUF): tests/wdt_ib700-test.o - tests/tco-test$(EXESUF): tests/tco-test.o $(libqos-pc-obj-y) - tests/virtio-balloon-test$(EXESUF): tests/virtio-balloon-test.o $(libqos-virtio-obj-y) -@@ -813,22 +814,22 @@ tests/virtio-scsi-test$(EXESUF): tests/virtio-scsi-test.o $(libqos-virtio-obj-y) - tests/virtio-9p-test$(EXESUF): tests/virtio-9p-test.o $(libqos-virtio-obj-y) - tests/virtio-serial-test$(EXESUF): tests/virtio-serial-test.o $(libqos-virtio-obj-y) - tests/virtio-console-test$(EXESUF): tests/virtio-console-test.o $(libqos-virtio-obj-y) --tests/tpci200-test$(EXESUF): tests/tpci200-test.o -+#tests/tpci200-test$(EXESUF): tests/tpci200-test.o - tests/display-vga-test$(EXESUF): tests/display-vga-test.o --tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o -+#tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o - tests/qom-test$(EXESUF): tests/qom-test.o - tests/test-hmp$(EXESUF): tests/test-hmp.o - tests/machine-none-test$(EXESUF): tests/machine-none-test.o - tests/drive_del-test$(EXESUF): tests/drive_del-test.o $(libqos-virtio-obj-y) - tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y) --tests/nvme-test$(EXESUF): tests/nvme-test.o -+#tests/nvme-test$(EXESUF): tests/nvme-test.o - tests/pvpanic-test$(EXESUF): tests/pvpanic-test.o - tests/i82801b11-test$(EXESUF): tests/i82801b11-test.o - tests/ac97-test$(EXESUF): tests/ac97-test.o --tests/es1370-test$(EXESUF): tests/es1370-test.o -+#tests/es1370-test$(EXESUF): tests/es1370-test.o - tests/intel-hda-test$(EXESUF): tests/intel-hda-test.o - tests/ioh3420-test$(EXESUF): tests/ioh3420-test.o --tests/usb-hcd-ohci-test$(EXESUF): tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y) -+#tests/usb-hcd-ohci-test$(EXESUF): tests/usb-hcd-ohci-test.o $(libqos-usb-obj-y) - tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y) - tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y) - tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y) -@@ -841,19 +842,19 @@ tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_hel - tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) - tests/test-keyval$(EXESUF): tests/test-keyval.o $(test-util-obj-y) $(test-qapi-obj-y) - tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) --tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) --tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) --tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) -+#tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) -+#tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) -+#tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) - tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y) - tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) --tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) -+#tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) - tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o $(test-util-obj-y) libvhost-user.a - tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) - tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o - tests/test-qapi-util$(EXESUF): tests/test-qapi-util.o $(test-util-obj-y) - tests/numa-test$(EXESUF): tests/numa-test.o - tests/vmgenid-test$(EXESUF): tests/vmgenid-test.o tests/boot-sector.o tests/acpi-utils.o --tests/sdhci-test$(EXESUF): tests/sdhci-test.o $(libqos-pc-obj-y) -+#tests/sdhci-test$(EXESUF): tests/sdhci-test.o $(libqos-pc-obj-y) - tests/cdrom-test$(EXESUF): tests/cdrom-test.o tests/boot-sector.o $(libqos-obj-y) - - tests/migration/stress$(EXESUF): tests/migration/stress.o -diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index 952a2e7..5217a39 100644 ---- a/tests/boot-serial-test.c -+++ b/tests/boot-serial-test.c -@@ -80,17 +80,21 @@ static testdef_t tests[] = { - { "ppc", "g3beige", "", "PowerPC,750" }, - { "ppc", "mac99", "", "PowerPC,G4" }, - { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "ppce500", "", "U-Boot" }, - { "ppc64", "prep", "-boot e", "Booting from device e" }, - { "ppc64", "40p", "-m 192", "Memory size: 192 MB" }, - { "ppc64", "mac99", "", "PowerPC,970FX" }, -+#endif - { "ppc64", "pseries", "", "Open Firmware" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "powernv", "-cpu POWER8", "OPAL" }, - { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, -+#endif - { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "i386", "pc", "-device sga", "SGABIOS" }, - { "i386", "q35", "-device sga", "SGABIOS" }, -- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, -+ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "x86_64", "q35", "-device sga", "SGABIOS" }, - { "sparc", "LX", "", "TMS390S10" }, - { "sparc", "SS-4", "", "MB86904" }, -diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 5f39ba0..48b8d09 100644 ---- a/tests/cpu-plug-test.c -+++ b/tests/cpu-plug-test.c -@@ -192,7 +192,8 @@ static void add_pseries_test_case(const char *mname) - PlugTestData *data; - - if (!g_str_has_prefix(mname, "pseries-") || -- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { -+ (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7) || -+ strcmp(mname,"pseries-rhel7.2.0") == 0) { - return; - } - data = g_new(PlugTestData, 1); -diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index 0c5fcdc..b830432 100644 ---- a/tests/e1000-test.c -+++ b/tests/e1000-test.c -@@ -29,8 +29,10 @@ static void test_device(gconstpointer data) - static const char *models[] = { - "e1000", - "e1000-82540em", -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - "e1000-82544gc", - "e1000-82545em", -+#endif - }; - - int main(int argc, char **argv) -diff --git a/tests/endianness-test.c b/tests/endianness-test.c -index 546e096..440353d 100644 ---- a/tests/endianness-test.c -+++ b/tests/endianness-test.c -@@ -37,10 +37,12 @@ static const TestCase test_cases[] = { - { "ppc", "g3beige", 0xfe000000, .bswap = true, .superio = "i82378" }, - { "ppc", "prep", 0x80000000, .bswap = true }, - { "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" }, -+#if 0 /* Disabled for RHEL, since ISA is not enabled */ - { "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" }, - { "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" }, - { "ppc64", "pseries-2.7", 0x10080000000ULL, - .bswap = true, .superio = "i82378" }, -+#endif /* Disabled for RHEL, since ISA is not enabled */ - { "sh4", "r2d", 0xfe240000, .superio = "i82378" }, - { "sh4eb", "r2d", 0xfe240000, .bswap = true, .superio = "i82378" }, - { "sparc64", "sun4u", 0x1fe02000000LL, .bswap = true }, -diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 8c867e6..cc9b6ec 100644 ---- a/tests/prom-env-test.c -+++ b/tests/prom-env-test.c -@@ -82,7 +82,9 @@ int main(int argc, char *argv[]) - if (!strcmp(arch, "ppc")) { - add_tests(ppc_machines); - } else if (!strcmp(arch, "ppc64")) { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - add_tests(ppc_machines); -+#endif - if (g_test_slow()) { - qtest_add_data_func("prom-env/pseries", "pseries", test_machine); - } -diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index ee9c820..c5cc0ee 100755 ---- a/tests/qemu-iotests/051 -+++ b/tests/qemu-iotests/051 -@@ -183,11 +183,11 @@ run_qemu -drive if=virtio - case "$QEMU_DEFAULT_MACHINE" in - pc) - run_qemu -drive if=none,id=disk -device ide-cd,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive if=none,id=disk -device ide-drive,drive=disk - run_qemu -drive if=none,id=disk -device ide-hd,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk - ;; - *) - ;; -@@ -212,11 +212,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on - case "$QEMU_DEFAULT_MACHINE" in - pc) - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-drive,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk - ;; - *) - ;; -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index b973dc8..f1059f6 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -77,7 +77,7 @@ - 068 rw auto quick - 069 rw auto quick - 070 rw auto quick --071 rw auto quick -+# 071 rw auto quick -- requires whitelisted blkverify - 072 rw auto quick - 073 rw auto quick - 074 rw auto quick -@@ -105,7 +105,7 @@ - 096 rw auto quick - 097 rw auto backing - 098 rw auto backing quick --099 rw auto quick -+# 099 rw auto quick -- requires whitelisted blkverify - # 100 was removed, do not reuse - 101 rw auto quick - 102 rw auto quick -diff --git a/tests/qom-test.c b/tests/qom-test.c -index e6f712c..ebd15fd 100644 ---- a/tests/qom-test.c -+++ b/tests/qom-test.c -@@ -16,7 +16,7 @@ - #include "libqtest.h" - - static const char *blacklist_x86[] = { -- "xenfv", "xenpv", NULL -+ "xenfv", "xenpv", "isapc", NULL - }; - - static const struct { -diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index 84ce9c7..c1ee197 100644 ---- a/tests/test-x86-cpuid-compat.c -+++ b/tests/test-x86-cpuid-compat.c -@@ -306,6 +306,7 @@ int main(int argc, char **argv) - "-cpu 486,xlevel2=0xC0000002,+xstore", - "xlevel2", 0xC0000002); - -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - /* Check compatibility of old machine-types that didn't - * auto-increase level/xlevel/xlevel2: */ - -@@ -356,6 +357,7 @@ int main(int argc, char **argv) - add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", - "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", - "xlevel", 0x80000008); -+#endif - - /* Test feature parsing */ - add_feature_test("x86/cpuid/features/plus", -diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 5b1b681..85fa150 100644 ---- a/tests/usb-hcd-xhci-test.c -+++ b/tests/usb-hcd-xhci-test.c -@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) - usb_test_hotplug("xhci", 1, NULL); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void test_usb_uas_hotplug(void) - { - qtest_qmp_device_add("usb-uas", "uas", NULL); -@@ -34,6 +35,7 @@ static void test_usb_uas_hotplug(void) - qtest_qmp_device_del("scsihd"); - qtest_qmp_device_del("uas"); - } -+#endif - - static void test_usb_ccid_hotplug(void) - { -@@ -52,7 +54,9 @@ int main(int argc, char **argv) - - qtest_add_func("/xhci/pci/init", test_xhci_init); - qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); -+#endif - qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); - - qtest_start("-device nec-usb-xhci,id=xhci" --- -1.8.3.1 - diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch new file mode 100644 index 0000000..23edace --- /dev/null +++ b/0012-Enable-make-check.patch @@ -0,0 +1,298 @@ +From 51a0ce09fb01c87cb9bd7f1fca850e8d5d573f5f Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:48:41 +0200 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina +--- + redhat/qemu-kvm.spec.template | 2 +- + tests/Makefile.include | 32 ++++++++++++++++---------------- + tests/boot-serial-test.c | 6 +++++- + tests/cpu-plug-test.c | 3 ++- + tests/e1000-test.c | 2 ++ + tests/prom-env-test.c | 2 ++ + tests/qemu-iotests/051 | 12 ++++++------ + tests/qemu-iotests/group | 4 ++-- + tests/test-x86-cpuid-compat.c | 2 ++ + tests/usb-hcd-xhci-test.c | 4 ++++ + 10 files changed, 42 insertions(+), 27 deletions(-) + +diff --git a/tests/Makefile.include b/tests/Makefile.include +index 613242b..baeb608 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -171,20 +171,20 @@ check-qtest-pci-$(CONFIG_IPACK) += $(check-qtest-ipack-y) + check-qtest-pci-y += tests/display-vga-test$(EXESUF) + check-qtest-pci-$(CONFIG_HDA) += tests/intel-hda-test$(EXESUF) + check-qtest-pci-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF) +-check-qtest-pci-y += tests/megasas-test$(EXESUF) ++#check-qtest-pci-y += tests/megasas-test$(EXESUF) + + check-qtest-i386-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) +-check-qtest-i386-y += tests/fdc-test$(EXESUF) ++#check-qtest-i386-y += tests/fdc-test$(EXESUF) + check-qtest-i386-y += tests/ide-test$(EXESUF) + check-qtest-i386-y += tests/ahci-test$(EXESUF) + check-qtest-i386-y += tests/hd-geo-test$(EXESUF) + check-qtest-i386-y += tests/boot-order-test$(EXESUF) +-check-qtest-i386-y += tests/bios-tables-test$(EXESUF) ++#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) + check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) + check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) + check-qtest-i386-y += tests/rtc-test$(EXESUF) +-check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) +-check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) ++#check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) ++#check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) + check-qtest-i386-y += tests/i440fx-test$(EXESUF) + check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) + check-qtest-i386-y += tests/drive_del-test$(EXESUF) +@@ -238,15 +238,15 @@ check-qtest-mips64el-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) + check-qtest-moxie-y += tests/boot-serial-test$(EXESUF) + + check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) +-check-qtest-ppc-y += tests/boot-order-test$(EXESUF) ++#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) + check-qtest-ppc-y += tests/prom-env-test$(EXESUF) + check-qtest-ppc-y += tests/drive_del-test$(EXESUF) + check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) +-check-qtest-ppc-y += tests/m48t59-test$(EXESUF) ++#check-qtest-ppc-y += tests/m48t59-test$(EXESUF) + + check-qtest-ppc64-y += $(check-qtest-ppc-y) + check-qtest-ppc64-y += tests/spapr-phb-test$(EXESUF) +-check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) ++#check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) + check-qtest-ppc64-y += tests/migration-test$(EXESUF) + check-qtest-ppc64-y += tests/rtas-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) +@@ -254,8 +254,8 @@ check-qtest-ppc64-$(CONFIG_USB_OHCI) += tests/usb-hcd-ohci-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF) + check-qtest-ppc64-y += $(check-qtest-virtio-y) +-check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) +-check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) ++#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) + check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) + check-qtest-ppc64-y += tests/display-vga-test$(EXESUF) + check-qtest-ppc64-y += tests/numa-test$(EXESUF) +@@ -685,15 +685,15 @@ tests/endianness-test$(EXESUF): tests/endianness-test.o + tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y) + tests/prom-env-test$(EXESUF): tests/prom-env-test.o $(libqos-obj-y) + tests/rtas-test$(EXESUF): tests/rtas-test.o $(libqos-spapr-obj-y) +-tests/fdc-test$(EXESUF): tests/fdc-test.o ++#tests/fdc-test$(EXESUF): tests/fdc-test.o + tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y) + tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) +-tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o +-tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o ++#tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o ++#tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o + tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o + tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) + tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y) +-tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ ++#tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ + tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y) + tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) + tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) +@@ -707,7 +707,7 @@ tests/e1000-test$(EXESUF): tests/e1000-test.o + tests/e1000e-test$(EXESUF): tests/e1000e-test.o $(libqos-pc-obj-y) + tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y) + tests/pcnet-test$(EXESUF): tests/pcnet-test.o +-tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o ++#tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o + tests/eepro100-test$(EXESUF): tests/eepro100-test.o + tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o + tests/ne2000-test$(EXESUF): tests/ne2000-test.o +@@ -755,7 +755,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) + tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) + tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y) + tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) +-tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) ++#tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) + tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o $(test-util-obj-y) libvhost-user.a + tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) + tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o +diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c +index 8ec6aed..6a533b9 100644 +--- a/tests/boot-serial-test.c ++++ b/tests/boot-serial-test.c +@@ -97,16 +97,20 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "40p", "-m 192", "Memory: 192M" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", "", "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv", "-cpu POWER8", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c +index f4a677d..70a3d1d 100644 +--- a/tests/cpu-plug-test.c ++++ b/tests/cpu-plug-test.c +@@ -193,7 +193,8 @@ static void add_pseries_test_case(const char *mname) + PlugTestData *data; + + if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7) || ++ strcmp(mname,"pseries-rhel7.2.0") == 0) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/e1000-test.c b/tests/e1000-test.c +index 0c5fcdc..b830432 100644 +--- a/tests/e1000-test.c ++++ b/tests/e1000-test.c +@@ -29,8 +29,10 @@ static void test_device(gconstpointer data) + static const char *models[] = { + "e1000", + "e1000-82540em", ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + "e1000-82544gc", + "e1000-82545em", ++#endif + }; + + int main(int argc, char **argv) +diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c +index 198d007..4bea07f 100644 +--- a/tests/prom-env-test.c ++++ b/tests/prom-env-test.c +@@ -82,7 +82,9 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); ++#endif + if (g_test_slow()) { + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); + } +diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 +index 32741d7..85ef52e 100755 +--- a/tests/qemu-iotests/051 ++++ b/tests/qemu-iotests/051 +@@ -182,11 +182,11 @@ run_qemu -drive if=virtio + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive if=none,id=disk -device ide-cd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive if=none,id=disk -device ide-drive,drive=disk + run_qemu -drive if=none,id=disk -device ide-hd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +@@ -211,11 +211,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-drive,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 2722103..ede8887 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -77,7 +77,7 @@ + 068 rw auto quick + 069 rw auto quick + 070 rw auto quick +-071 rw auto quick ++# 071 rw auto quick -- requires whitelisted blkverify + 072 rw auto quick + 073 rw auto quick + 074 rw auto quick +@@ -105,7 +105,7 @@ + 096 rw auto quick + 097 rw auto backing + 098 rw auto backing quick +-099 rw auto quick ++# 099 rw auto quick -- requires whitelisted blkverify + # 100 was removed, do not reuse + 101 rw auto quick + 102 rw auto quick +diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c +index e75b959..6b46b73 100644 +--- a/tests/test-x86-cpuid-compat.c ++++ b/tests/test-x86-cpuid-compat.c +@@ -300,6 +300,7 @@ int main(int argc, char **argv) + "-cpu 486,xlevel2=0xC0000002,+xstore", + "xlevel2", 0xC0000002); + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* Check compatibility of old machine-types that didn't + * auto-increase level/xlevel/xlevel2: */ + +@@ -350,6 +351,7 @@ int main(int argc, char **argv) + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", + "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", + "xlevel", 0x80000008); ++#endif + + /* Test feature parsing */ + add_feature_test("x86/cpuid/features/plus", +diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c +index 9eb24b0..465ed26 100644 +--- a/tests/usb-hcd-xhci-test.c ++++ b/tests/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug("xhci", "1", NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + qtest_qmp_device_add("usb-uas", "uas", "{}"); +@@ -34,6 +35,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del("scsihd"); + qtest_qmp_device_del("uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -52,7 +54,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +-- +1.8.3.1 + diff --git a/0012-Use-kvm-by-default.patch b/0013-Use-kvm-by-default.patch similarity index 72% rename from 0012-Use-kvm-by-default.patch rename to 0013-Use-kvm-by-default.patch index d6e2835..2c59bf6 100644 --- a/0012-Use-kvm-by-default.patch +++ b/0013-Use-kvm-by-default.patch @@ -1,4 +1,4 @@ -From ce4cd21e28e1511e056877e3cc8dcf6f0b8c7baa Mon Sep 17 00:00:00 2001 +From b91ee13e30cef65d02e3e0f9324931f1e2589426 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 18 Dec 2014 06:27:49 +0100 Subject: Use kvm by default @@ -8,6 +8,15 @@ Bugzilla: 906185 RHEL uses kvm accelerator by default, if available. Signed-off-by: Miroslav Rezanina + +Rebase notes (2.10.0) +- variable rename (upstream) + +Rebase notes (2.2.0): +- Move code from vl.c to accel.c + +(cherry picked from commit abcd662eb8e516ebe4a6b401e83a62f749491a15) +(cherry picked from commit eca6d5766d956c37e3f7f28d70903d357308c846) --- accel/accel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0014-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 73% rename from 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0014-vfio-cap-number-of-devices-that-can-be-assigned.patch index 9e22d48..c445c7f 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0014-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 43a09e06e76cba94c6ecd448f51912362b42f94d Mon Sep 17 00:00:00 2001 +From 3094b4ac400f54f26b837226f44fc0a18f0726e6 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -23,12 +23,21 @@ matches the number of slots on a PCI bus and is also a nice power of two. Signed-off-by: Bandan Das + +Rebase notes (2.8.0): +- removed return value for vfio_realize (commit 1a22aca) + +Merged patches (2.9.0): +- 17eb774 vfio: Use error_setg when reporting max assigned device overshoot + +(cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) +(cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) --- hw/vfio/pci.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6cbb8fa..59b3c0f 100644 +index 5c7bd96..598f771 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -36,6 +36,7 @@ @@ -37,15 +46,16 @@ index 6cbb8fa..59b3c0f 100644 #define MSIX_CAP_LENGTH 12 +#define MAX_DEV_ASSIGN_CMDLINE 32 - static void vfio_disable_interrupts(VFIOPCIDevice *vdev); - static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2809,7 +2810,19 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + #define TYPE_VFIO_PCI "vfio-pci" + #define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI) +@@ -2811,9 +2812,21 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; - int i, ret; + int ret, i = 0; -+ + bool is_mdev; + + QLIST_FOREACH(group, &vfio_group_list, next) { + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + i++; @@ -57,9 +67,10 @@ index 6cbb8fa..59b3c0f 100644 + "already attached", MAX_DEV_ASSIGN_CMDLINE); + return; + } - ++ if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { -- 1.8.3.1 diff --git a/0014-Add-support-statement-to-help-output.patch b/0015-Add-support-statement-to-help-output.patch similarity index 82% rename from 0014-Add-support-statement-to-help-output.patch rename to 0015-Add-support-statement-to-help-output.patch index ea0d9ea..6e991bb 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0015-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From f8e7911bb97eb942a4eadad1731b7c59c43fd2eb Mon Sep 17 00:00:00 2001 +From 2bfcbb3ece3cda4cf977cb3983df84830bde90a3 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -16,15 +16,17 @@ Add support statement to -help output, reporting direct qemu-kvm usage as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost +(cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) +(cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) --- vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vl.c b/vl.c -index 4f96203..43c4b78 100644 +index 39d152a..db628b8 100644 --- a/vl.c +++ b/vl.c -@@ -1876,9 +1876,17 @@ static void version(void) +@@ -1904,9 +1904,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +44,7 @@ index 4f96203..43c4b78 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1895,6 +1903,7 @@ static void help(int exitcode) +@@ -1923,6 +1931,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch deleted file mode 100644 index 3543fec..0000000 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 8413778453742aeb3ad6b38d5f4440a0dbabca7d Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Tue, 21 Jan 2014 10:46:52 +0100 -Subject: globally limit the maximum number of CPUs - -We now globally limit the number of VCPUs. -Especially, there is no way one can specify more than -max_cpus VCPUs for a VM. - -This allows us the restore the ppc max_cpus limitation to the upstream -default and minimize the ppc hack in kvm-all.c. - -Signed-off-by: David Hildenbrand -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo Cesar Lemes de Paula ---- - accel/kvm/kvm-all.c | 12 ++++++++++++ - vl.c | 18 ++++++++++++++++++ - 2 files changed, 30 insertions(+) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index eb7db92..c2e7095 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -1586,6 +1586,18 @@ static int kvm_init(MachineState *ms) - soft_vcpus_limit = kvm_recommended_vcpus(s); - hard_vcpus_limit = kvm_max_vcpus(s); - -+#ifdef HOST_PPC64 -+ /* -+ * On POWER, the kernel advertises a soft limit based on the -+ * number of CPU threads on the host. We want to allow exceeding -+ * this for testing purposes, so we don't want to set hard limit -+ * to soft limit as on x86. -+ */ -+#else -+ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ -+ hard_vcpus_limit = soft_vcpus_limit; -+#endif -+ - while (nc->name) { - if (nc->num > soft_vcpus_limit) { - warn_report("Number of %s cpus requested (%d) exceeds " -diff --git a/vl.c b/vl.c -index 43c4b78..b50dbe4 100644 ---- a/vl.c -+++ b/vl.c -@@ -133,6 +133,8 @@ int main(int argc, char **argv) - - #define MAX_VIRTIO_CONSOLES 1 - -+#define RHEL_MAX_CPUS 384 -+ - static const char *data_dir[16]; - static int data_dir_idx; - const char *bios_name = NULL; -@@ -1430,6 +1432,20 @@ MachineClass *find_default_machine(void) - return mc; - } - -+/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ -+static void limit_max_cpus_in_machines(void) -+{ -+ GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); -+ -+ for (el = machines; el; el = el->next) { -+ MachineClass *mc = el->data; -+ -+ if (mc->max_cpus > RHEL_MAX_CPUS) { -+ mc->max_cpus = RHEL_MAX_CPUS; -+ } -+ } -+} -+ - MachineInfoList *qmp_query_machines(Error **errp) - { - GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); -@@ -3993,6 +4009,8 @@ int main(int argc, char **argv, char **envp) - "mutually exclusive"); - exit(EXIT_FAILURE); - } -+ /* Maximum number of CPUs limited for Red Hat Enterprise Linux */ -+ limit_max_cpus_in_machines(); - - machine_class = select_machine(); - --- -1.8.3.1 - diff --git a/0016-globally-limit-the-maximum-number-of-CPUs.patch b/0016-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..5c51d59 --- /dev/null +++ b/0016-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,153 @@ +From c214bfc318a3128dc92fe5017ca0dd54fc50ffed Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Tue, 21 Jan 2014 10:46:52 +0100 +Subject: globally limit the maximum number of CPUs + +We now globally limit the number of VCPUs. +Especially, there is no way one can specify more than +max_cpus VCPUs for a VM. + +This allows us the restore the ppc max_cpus limitation to the upstream +default and minimize the ppc hack in kvm-all.c. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo Cesar Lemes de Paula + +Rebase notes (2.11.0): +- Removed CONFIG_RHV reference +- Update commit log + +Merged patches (2.11.0): +- 92fef14623 redhat: remove manual max_cpus limitations for ppc +- bb722e9eff redhat: globally limit the maximum number of CPUs +- fdeef3c1c7 RHEL: Set vcpus hard limit to 240 for Power +- 0584216921 Match POWER max cpus to x86 + +Signed-off-by: Andrew Jones +(cherry picked from commit a4ceb63bdc5cbac19f5f633ec761b9de0dedb55e) +(cherry picked from commit a1f26d85171b4d554225150053700e93ba6eba10) + +redhat: globally limit the maximum number of CPUs + +RH-Author: David Hildenbrand +Message-id: <20180109103253.24517-2-david@redhat.com> +Patchwork-id: 78531 +O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 1/2] redhat: globally limit the maximum number of CPUs +Bugzilla: 1527449 +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +Upstream-status: n/a + +For RHEL, we support 240, for RHV up to 384 VCPUs. Let's limit this +globally instead of fixing up all machines. This way, we can easily +change (increase) the product specific levels later. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina + +redhat: remove manual max_cpus limitations for ppc + +RH-Author: David Hildenbrand +Message-id: <20180109103253.24517-3-david@redhat.com> +Patchwork-id: 78532 +O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 2/2] redhat: remove manual max_cpus limitations for ppc +Bugzilla: 1527449 +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +Upstream-status: n/a + +RH-Author: Andrew Jones +Message-id: <1390301212-15344-1-git-send-email-drjones@redhat.com> +Patchwork-id: 56862 +O-Subject: [RHEL7.0 qemu-kvm PATCH v6] use recommended max vcpu count +Bugzilla: 998708 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Marcelo Tosatti + +The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead +of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. + +This commit matches the limit to current KVM_CAP_NR_VCPUS value. + +Conflicts: + vl.c +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + vl.c | 18 ++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 4880a05..a8f5d47 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1627,6 +1627,18 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + ++#ifdef HOST_PPC64 ++ /* ++ * On POWER, the kernel advertises a soft limit based on the ++ * number of CPU threads on the host. We want to allow exceeding ++ * this for testing purposes, so we don't want to set hard limit ++ * to soft limit as on x86. ++ */ ++#else ++ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ ++ hard_vcpus_limit = soft_vcpus_limit; ++#endif ++ + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +diff --git a/vl.c b/vl.c +index db628b8..7b0f19a 100644 +--- a/vl.c ++++ b/vl.c +@@ -133,6 +133,8 @@ int main(int argc, char **argv) + + #define MAX_VIRTIO_CONSOLES 1 + ++#define RHEL_MAX_CPUS 384 ++ + static const char *data_dir[16]; + static int data_dir_idx; + const char *bios_name = NULL; +@@ -1460,6 +1462,20 @@ MachineClass *find_default_machine(void) + return mc; + } + ++/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ ++static void limit_max_cpus_in_machines(void) ++{ ++ GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); ++ ++ for (el = machines; el; el = el->next) { ++ MachineClass *mc = el->data; ++ ++ if (mc->max_cpus > RHEL_MAX_CPUS) { ++ mc->max_cpus = RHEL_MAX_CPUS; ++ } ++ } ++} ++ + MachineInfoList *qmp_query_machines(Error **errp) + { + GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); +@@ -4012,6 +4028,8 @@ int main(int argc, char **argv, char **envp) + "mutually exclusive"); + exit(EXIT_FAILURE); + } ++ /* Maximum number of CPUs limited for Red Hat Enterprise Linux */ ++ limit_max_cpus_in_machines(); + + configure_rtc(qemu_find_opts_singleton("rtc")); + +-- +1.8.3.1 + diff --git a/0016-Add-support-for-simpletrace.patch b/0017-Add-support-for-simpletrace.patch similarity index 86% rename from 0016-Add-support-for-simpletrace.patch rename to 0017-Add-support-for-simpletrace.patch index 4b945c5..3e295c3 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0017-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From f262acdee88f36b625fcbd5eb1cd66739428cca3 Mon Sep 17 00:00:00 2001 +From 30887ffc7e908ebed5381c08181cd6a2a6bc5e98 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -6,6 +6,23 @@ Subject: Add support for simpletrace As simpletrace is upstream, we just need to properly handle it during rpmbuild. Signed-off-by: Miroslav Rezanina + +Rebase notes (weekly-180727): +- Fixed python 2 to python3 switch + +Rebase notes (2.9.0): +- Added group argument for tracetool.py (upstream) + +Rebase notes (2.8.0): +- Changed tracetool.py parameters + +Merged patches (2.3.0): +- db959d6 redhat/qemu-kvm.spec.template: Install qemu-kvm-simpletrace.stp +- 5292fc3 trace: add SystemTap init scripts for simpletrace bridge +- eda9e5e simpletrace: install simpletrace.py +- 85c4c8f trace: add systemtap-initscript README file to RPM + +(cherry picked from commit bfc1d7f3628f2ffbabbae71d57a506cea6663ddf) --- .gitignore | 2 ++ Makefile | 4 +++ @@ -19,10 +36,10 @@ Signed-off-by: Miroslav Rezanina create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index eb4c57a..6b6d3f6 100644 +index 152821a..8710720 100644 --- a/Makefile +++ b/Makefile -@@ -880,6 +880,10 @@ endif +@@ -892,6 +892,10 @@ endif $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ done $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch similarity index 85% rename from 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename to 0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index aca5827..109b97c 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 33e2c01c1b0b64a76d5193b60378d2329a86626b Mon Sep 17 00:00:00 2001 +From d0656d8b2e0de42d04c224db36fe9c1ec015a9cc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 14 Nov 2014 08:51:50 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -17,15 +17,39 @@ to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina --- + docs/COLO-FT.txt | 4 +- docs/can.txt | 8 +-- docs/pr-manager.rst | 4 +- - docs/qemu-block-drivers.texi | 70 ++++++++++----------- + docs/qemu-block-drivers.texi | 70 +++++++++++----------- + docs/qemu-cpu-models.texi | 8 +-- docs/replay.txt | 4 +- docs/specs/tpm.txt | 8 +-- - qemu-doc.texi | 70 ++++++++++----------- - qemu-options.hx | 144 ++++++++++++++++++++++--------------------- - 7 files changed, 156 insertions(+), 152 deletions(-) + qemu-doc.texi | 70 +++++++++++----------- + qemu-options.hx | 140 ++++++++++++++++++++++--------------------- + 9 files changed, 160 insertions(+), 156 deletions(-) +diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt +index e2686bb..8c48f86 100644 +--- a/docs/COLO-FT.txt ++++ b/docs/COLO-FT.txt +@@ -147,7 +147,7 @@ in test procedure. + == Test procedure == + 1. Startup qemu + Primary: +-# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name primary \ ++# qemu-kvm -accel kvm -m 2048 -smp 2 -qmp stdio -name primary \ + -device piix3-usb-uhci -vnc :7 \ + -device usb-tablet -netdev tap,id=hn0,vhost=off \ + -device virtio-net-pci,id=net-pci0,netdev=hn0 \ +@@ -155,7 +155,7 @@ Primary: + children.0.file.filename=1.raw,\ + children.0.driver=raw -S + Secondary: +-# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name secondary \ ++# qemu-kvm -accel kvm -m 2048 -smp 2 -qmp stdio -name secondary \ + -device piix3-usb-uhci -vnc :7 \ + -device usb-tablet -netdev tap,id=hn0,vhost=off \ + -device virtio-net-pci,id=net-pci0,netdev=hn0 \ diff --git a/docs/can.txt b/docs/can.txt index 7ba23b2..4ae5690 100644 --- a/docs/can.txt @@ -287,8 +311,42 @@ index 38e9f34..2e71ec9 100644 @end example @var{host}:@var{bus}:@var{slot}.@var{func} is the NVMe controller's PCI device +diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi +index 1935f98..93672de 100644 +--- a/docs/qemu-cpu-models.texi ++++ b/docs/qemu-cpu-models.texi +@@ -387,25 +387,25 @@ CPU models / features in QEMU and libvirt + @item Host passthrough + + @example +- $ qemu-system-x86_64 -cpu host ++ $ qemu-kvm -cpu host + @end example + + With feature customization: + + @example +- $ qemu-system-x86_64 -cpu host,-vmx,... ++ $ qemu-kvm -cpu host,-vmx,... + @end example + + @item Named CPU models + + @example +- $ qemu-system-x86_64 -cpu Westmere ++ $ qemu-kvm -cpu Westmere + @end example + + With feature customization: + + @example +- $ qemu-system-x86_64 -cpu Westmere,+pcid,... ++ $ qemu-kvm -cpu Westmere,+pcid,... + @end example + + @end table diff --git a/docs/replay.txt b/docs/replay.txt -index 2e21e9c..f1923e8 100644 +index 3497585..8d67ea7 100644 --- a/docs/replay.txt +++ b/docs/replay.txt @@ -25,7 +25,7 @@ Deterministic replay has the following features: @@ -310,10 +368,10 @@ index 2e21e9c..f1923e8 100644 -drive file=disk.qcow2,if=none,id=img-direct \ -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \ diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt -index 0e9bbeb..9320fbd 100644 +index 1af82bb..b2741ac 100644 --- a/docs/specs/tpm.txt +++ b/docs/specs/tpm.txt -@@ -98,7 +98,7 @@ QEMU files related to the TPM passthrough device: +@@ -113,7 +113,7 @@ QEMU files related to the TPM passthrough device: Command line to start QEMU with the TPM passthrough device using the host's hardware TPM /dev/tpm0: @@ -322,7 +380,7 @@ index 0e9bbeb..9320fbd 100644 -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \ -device tpm-tis,tpmdev=tpm0 test.img -@@ -164,7 +164,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ +@@ -179,7 +179,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ Command line to start QEMU with the TPM emulator device communicating with the swtpm: @@ -331,7 +389,7 @@ index 0e9bbeb..9320fbd 100644 -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -tpmdev emulator,id=tpm0,chardev=chrtpm \ -@@ -222,7 +222,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ +@@ -237,7 +237,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ In a 2nd terminal start the VM: @@ -340,7 +398,7 @@ index 0e9bbeb..9320fbd 100644 -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -tpmdev emulator,id=tpm0,chardev=chrtpm \ -@@ -255,7 +255,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ +@@ -270,7 +270,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ In the 2nd terminal restore the state of the VM using the additional '-incoming' option. @@ -350,10 +408,10 @@ index 0e9bbeb..9320fbd 100644 -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ -tpmdev emulator,id=tpm0,chardev=chrtpm \ diff --git a/qemu-doc.texi b/qemu-doc.texi -index abfd2db..5827bed 100644 +index f7ad1df..0e4b34a 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi -@@ -204,12 +204,12 @@ Note that, by default, GUS shares IRQ(7) with parallel ports and so +@@ -205,12 +205,12 @@ Note that, by default, GUS shares IRQ(7) with parallel ports and so QEMU must be told to not have parallel ports to have working GUS. @example @@ -368,7 +426,7 @@ index abfd2db..5827bed 100644 @end example Or some other unclaimed IRQ. -@@ -225,7 +225,7 @@ CS4231A is the chip used in Windows Sound System and GUSMAX products +@@ -226,7 +226,7 @@ CS4231A is the chip used in Windows Sound System and GUSMAX products Download and uncompress the linux image (@file{linux.img}) and type: @example @@ -377,7 +435,7 @@ index abfd2db..5827bed 100644 @end example Linux should boot and give you a prompt. -@@ -235,7 +235,7 @@ Linux should boot and give you a prompt. +@@ -236,7 +236,7 @@ Linux should boot and give you a prompt. @example @c man begin SYNOPSIS @@ -386,7 +444,7 @@ index abfd2db..5827bed 100644 @c man end @end example -@@ -275,21 +275,21 @@ is specified in seconds. The default is 0 which means no timeout. Libiscsi +@@ -276,21 +276,21 @@ is specified in seconds. The default is 0 which means no timeout. Libiscsi Example (without authentication): @example @@ -411,7 +469,7 @@ index abfd2db..5827bed 100644 @end example @item NBD -@@ -304,12 +304,12 @@ Syntax for specifying a NBD device using Unix Domain Sockets +@@ -305,12 +305,12 @@ Syntax for specifying a NBD device using Unix Domain Sockets Example for TCP @example @@ -426,7 +484,7 @@ index abfd2db..5827bed 100644 @end example @item SSH -@@ -317,8 +317,8 @@ QEMU supports SSH (Secure Shell) access to remote disks. +@@ -318,8 +318,8 @@ QEMU supports SSH (Secure Shell) access to remote disks. Examples: @example @@ -437,7 +495,7 @@ index abfd2db..5827bed 100644 @end example Currently authentication must be done using ssh-agent. Other -@@ -336,7 +336,7 @@ sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] +@@ -337,7 +337,7 @@ sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] Example @example @@ -446,7 +504,7 @@ index abfd2db..5827bed 100644 @end example See also @url{https://sheepdog.github.io/sheepdog/}. -@@ -362,17 +362,17 @@ JSON: +@@ -363,17 +363,17 @@ JSON: Example @example URI: @@ -467,7 +525,7 @@ index abfd2db..5827bed 100644 @ file.debug=9,file.logfile=/var/log/qemu-gluster.log, @ file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, @ file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket -@@ -437,9 +437,9 @@ of . +@@ -438,9 +438,9 @@ of . Example: boot from a remote Fedora 20 live ISO image @example @@ -479,7 +537,7 @@ index abfd2db..5827bed 100644 @end example Example: boot from a remote Fedora 20 cloud image using a local overlay for -@@ -447,7 +447,7 @@ writes, copy-on-read, and a readahead of 64k +@@ -448,7 +448,7 @@ writes, copy-on-read, and a readahead of 64k @example qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"http",, "file.url":"https://dl.fedoraproject.org/pub/fedora/linux/releases/20/Images/x86_64/Fedora-x86_64-20-20131211.1-sda.qcow2",, "file.readahead":"64k"@}' /tmp/Fedora-x86_64-20-20131211.1-sda.qcow2 @@ -488,7 +546,7 @@ index abfd2db..5827bed 100644 @end example Example: boot from an image stored on a VMware vSphere server with a self-signed -@@ -456,7 +456,7 @@ of 10 seconds. +@@ -457,7 +457,7 @@ of 10 seconds. @example qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2 @@ -497,7 +555,7 @@ index abfd2db..5827bed 100644 @end example @end table -@@ -818,7 +818,7 @@ On Linux hosts, a shared memory device is available. The basic syntax +@@ -824,7 +824,7 @@ On Linux hosts, a shared memory device is available. The basic syntax is: @example @@ -506,7 +564,7 @@ index abfd2db..5827bed 100644 @end example where @var{hostmem} names a host memory backend. For a POSIX shared -@@ -839,7 +839,7 @@ memory server is: +@@ -845,7 +845,7 @@ memory server is: ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} # Then start your qemu instances with matching arguments @@ -515,7 +573,7 @@ index abfd2db..5827bed 100644 -chardev socket,path=@var{path},id=@var{id} @end example -@@ -864,7 +864,7 @@ Instead of specifying the using POSIX shm, you may specify +@@ -870,7 +870,7 @@ Instead of specifying the using POSIX shm, you may specify a memory backend that has hugepage support: @example @@ -524,7 +582,7 @@ index abfd2db..5827bed 100644 -device ivshmem-plain,memdev=mb1 @end example -@@ -880,7 +880,7 @@ kernel testing. +@@ -886,7 +886,7 @@ kernel testing. The syntax is: @example @@ -533,7 +591,7 @@ index abfd2db..5827bed 100644 @end example Use @option{-kernel} to provide the Linux kernel image and -@@ -895,7 +895,7 @@ If you do not need graphical output, you can disable it and redirect +@@ -901,7 +901,7 @@ If you do not need graphical output, you can disable it and redirect the virtual serial port and the QEMU monitor to the console with the @option{-nographic} option. The typical command line is: @example @@ -542,7 +600,7 @@ index abfd2db..5827bed 100644 -append "root=/dev/hda console=ttyS0" -nographic @end example -@@ -961,7 +961,7 @@ Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} +@@ -967,7 +967,7 @@ Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}. For instance, user-mode networking can be used with @example @@ -551,7 +609,7 @@ index abfd2db..5827bed 100644 @end example @item usb-ccid Smartcard reader device -@@ -980,7 +980,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. +@@ -986,7 +986,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. This USB device implements the USB Transport Layer of HCI. Example usage: @example @@ -560,7 +618,7 @@ index abfd2db..5827bed 100644 @end example @end table -@@ -1057,7 +1057,7 @@ For this setup it is recommended to restrict it to listen on a UNIX domain +@@ -1063,7 +1063,7 @@ For this setup it is recommended to restrict it to listen on a UNIX domain socket only. For example @example @@ -569,7 +627,7 @@ index abfd2db..5827bed 100644 @end example This ensures that only users on local box with read/write access to that -@@ -1080,7 +1080,7 @@ is running the password is set with the monitor. Until the monitor is used to +@@ -1086,7 +1086,7 @@ is running the password is set with the monitor. Until the monitor is used to set the password all clients will be rejected. @example @@ -578,34 +636,34 @@ index abfd2db..5827bed 100644 (qemu) change vnc password Password: ******** (qemu) -@@ -1097,7 +1097,7 @@ support provides a secure session, but no authentication. This allows any +@@ -1103,7 +1103,7 @@ support provides a secure session, but no authentication. This allows any client to connect, and provides an encrypted session. @example --qemu-system-i386 [...OPTIONS...] -vnc :1,tls,x509=/etc/pki/qemu -monitor stdio -+qemu-kvm [...OPTIONS...] -vnc :1,tls,x509=/etc/pki/qemu -monitor stdio +-qemu-system-i386 [...OPTIONS...] \ ++qemu-kvm [...OPTIONS...] \ + -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=no \ + -vnc :1,tls-creds=tls0 -monitor stdio @end example - - In the above example @code{/etc/pki/qemu} should contain at least three files, -@@ -1115,7 +1115,7 @@ then validate against the CA certificate. This is a good choice if deploying - in an environment with a private internal certificate authority. +@@ -1125,7 +1125,7 @@ same syntax as previously, but with @code{verify-peer} set to @code{yes} + instead. @example --qemu-system-i386 [...OPTIONS...] -vnc :1,tls,x509verify=/etc/pki/qemu -monitor stdio -+qemu-kvm [...OPTIONS...] -vnc :1,tls,x509verify=/etc/pki/qemu -monitor stdio +-qemu-system-i386 [...OPTIONS...] \ ++qemu-kvm [...OPTIONS...] \ + -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ + -vnc :1,tls-creds=tls0 -monitor stdio @end example - - -@@ -1126,7 +1126,7 @@ Finally, the previous method can be combined with VNC password authentication +@@ -1138,7 +1138,7 @@ Finally, the previous method can be combined with VNC password authentication to provide two layers of authentication for clients. @example --qemu-system-i386 [...OPTIONS...] -vnc :1,password,tls,x509verify=/etc/pki/qemu -monitor stdio -+qemu-kvm [...OPTIONS...] -vnc :1,password,tls,x509verify=/etc/pki/qemu -monitor stdio +-qemu-system-i386 [...OPTIONS...] \ ++qemu-kvm [...OPTIONS...] \ + -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ + -vnc :1,tls-creds=tls0,password -monitor stdio (qemu) change vnc password - Password: ******** - (qemu) -@@ -1149,7 +1149,7 @@ used for authentication, but assuming use of one supporting SSF, +@@ -1163,7 +1163,7 @@ used for authentication, but assuming use of one supporting SSF, then QEMU can be launched with: @example @@ -614,16 +672,16 @@ index abfd2db..5827bed 100644 @end example @node vnc_sec_certificate_sasl -@@ -1163,7 +1163,7 @@ credentials. This can be enabled, by combining the 'sasl' option +@@ -1177,7 +1177,7 @@ credentials. This can be enabled, by combining the 'sasl' option with the aforementioned TLS + x509 options: @example --qemu-system-i386 [...OPTIONS...] -vnc :1,tls,x509,sasl -monitor stdio -+qemu-kvm [...OPTIONS...] -vnc :1,tls,x509,sasl -monitor stdio +-qemu-system-i386 [...OPTIONS...] \ ++qemu-kvm [...OPTIONS...] \ + -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ + -vnc :1,tls-creds=tls0,sasl -monitor stdio @end example - - @node vnc_setup_sasl -@@ -1556,7 +1556,7 @@ QEMU has a primitive support to work with gdb, so that you can do +@@ -1572,7 +1572,7 @@ QEMU has a primitive support to work with gdb, so that you can do In order to use gdb, launch QEMU with the '-s' option. It will wait for a gdb connection: @example @@ -632,7 +690,7 @@ index abfd2db..5827bed 100644 -append "root=/dev/hda" Connected to host network interface: tun0 Waiting gdb connection on port 1234 -@@ -1802,7 +1802,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. +@@ -1818,7 +1818,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. Set OpenBIOS variables in NVRAM, for example: @example @@ -642,10 +700,10 @@ index abfd2db..5827bed 100644 -prom-env 'boot-args=conf=hd:2,\yaboot.conf' @end example diff --git a/qemu-options.hx b/qemu-options.hx -index 37f2aa8..41cb1f3 100644 +index cd2b25b..a65c63b 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -263,7 +263,7 @@ This option defines a free-form string that can be used to describe @var{fd}. +@@ -253,7 +253,7 @@ This option defines a free-form string that can be used to describe @var{fd}. You can open an image using pre-opened file descriptors from an fd set: @example @@ -654,7 +712,7 @@ index 37f2aa8..41cb1f3 100644 -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" -drive file=/dev/fdset/2,index=0,media=disk -@@ -292,7 +292,7 @@ STEXI +@@ -282,7 +282,7 @@ STEXI Set default value of @var{driver}'s property @var{prop} to @var{value}, e.g.: @example @@ -663,7 +721,7 @@ index 37f2aa8..41cb1f3 100644 @end example In particular, you can use this to set driver properties for devices which are -@@ -346,11 +346,11 @@ bootindex options. The default is non-strict boot. +@@ -336,11 +336,11 @@ bootindex options. The default is non-strict boot. @example # try to boot from network first, then from hard disk @@ -678,7 +736,7 @@ index 37f2aa8..41cb1f3 100644 @end example Note: The legacy format '-boot @var{drives}' is still supported but its -@@ -379,7 +379,7 @@ For example, the following command-line sets the guest startup RAM size to +@@ -369,7 +369,7 @@ For example, the following command-line sets the guest startup RAM size to memory the guest can reach to 4GB: @example @@ -687,7 +745,7 @@ index 37f2aa8..41cb1f3 100644 @end example If @var{slots} and @var{maxmem} are not specified, memory hotplug won't -@@ -448,12 +448,12 @@ Enable audio and selected sound hardware. Use 'help' to print all +@@ -438,12 +438,12 @@ Enable audio and selected sound hardware. Use 'help' to print all available sound hardware. @example @@ -706,7 +764,7 @@ index 37f2aa8..41cb1f3 100644 @end example Note that Linux's i810_audio OSS kernel (for AC97) module might -@@ -946,21 +946,21 @@ is off. +@@ -918,21 +918,21 @@ is off. Instead of @option{-cdrom} you can use: @example @@ -734,7 +792,7 @@ index 37f2aa8..41cb1f3 100644 -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" -drive file=/dev/fdset/2,index=0,media=disk -@@ -968,28 +968,28 @@ qemu-system-i386 +@@ -940,28 +940,28 @@ qemu-system-i386 You can connect a CDROM to the slave of ide0: @example @@ -769,7 +827,7 @@ index 37f2aa8..41cb1f3 100644 @end example ETEXI -@@ -2055,8 +2055,8 @@ The following two example do exactly the same, to show how @option{-nic} can +@@ -1975,8 +1975,8 @@ The following two example do exactly the same, to show how @option{-nic} can be used to shorten the command line length (note that the e1000 is the default on i386, so the @option{model=e1000} parameter could even be omitted here, too): @example @@ -780,7 +838,7 @@ index 37f2aa8..41cb1f3 100644 @end example @item -nic none -@@ -2127,7 +2127,7 @@ can not be resolved. +@@ -2047,7 +2047,7 @@ can not be resolved. Example: @example @@ -789,7 +847,7 @@ index 37f2aa8..41cb1f3 100644 @end example @item domainname=@var{domain} -@@ -2146,7 +2146,7 @@ a guest from a local directory. +@@ -2071,7 +2071,7 @@ a guest from a local directory. Example (using pxelinux): @example @@ -798,7 +856,7 @@ index 37f2aa8..41cb1f3 100644 -netdev user,id=n1,tftp=/path/to/tftp/files,bootfile=/pxelinux.0 @end example -@@ -2180,7 +2180,7 @@ screen 0, use the following: +@@ -2105,7 +2105,7 @@ screen 0, use the following: @example # on the host @@ -807,7 +865,7 @@ index 37f2aa8..41cb1f3 100644 # this host xterm should open in the guest X11 server xterm -display :1 @end example -@@ -2190,7 +2190,7 @@ the guest, use the following: +@@ -2115,7 +2115,7 @@ the guest, use the following: @example # on the host @@ -816,7 +874,7 @@ index 37f2aa8..41cb1f3 100644 telnet localhost 5555 @end example -@@ -2209,7 +2209,7 @@ lifetime, like in the following example: +@@ -2134,7 +2134,7 @@ lifetime, like in the following example: @example # open 10.10.1.1:4321 on bootup, connect 10.0.2.100:1234 to it whenever # the guest accesses it @@ -825,7 +883,7 @@ index 37f2aa8..41cb1f3 100644 @end example Or you can execute a command on every TCP connection established by the guest, -@@ -2218,7 +2218,7 @@ so that QEMU behaves similar to an inetd process for that virtual server: +@@ -2143,7 +2143,7 @@ so that QEMU behaves similar to an inetd process for that virtual server: @example # call "netcat 10.10.1.1 4321" on every TCP connection to 10.0.2.100:1234 # and connect the TCP stream to its stdin/stdout @@ -834,7 +892,7 @@ index 37f2aa8..41cb1f3 100644 @end example @end table -@@ -2250,21 +2250,22 @@ Examples: +@@ -2170,21 +2170,22 @@ Examples: @example #launch a QEMU instance with the default network script @@ -860,7 +918,7 @@ index 37f2aa8..41cb1f3 100644 -netdev tap,id=n1,"helper=/path/to/qemu-bridge-helper" @end example -@@ -2281,13 +2282,13 @@ Examples: +@@ -2201,13 +2202,13 @@ Examples: @example #launch a QEMU instance with the default network helper to #connect a TAP device to bridge br0 @@ -876,7 +934,7 @@ index 37f2aa8..41cb1f3 100644 @end example @item -netdev socket,id=@var{id}[,fd=@var{h}][,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}] -@@ -2302,13 +2303,13 @@ specifies an already opened TCP socket. +@@ -2222,13 +2223,13 @@ specifies an already opened TCP socket. Example: @example # launch a first QEMU instance @@ -896,7 +954,7 @@ index 37f2aa8..41cb1f3 100644 @end example @item -netdev socket,id=@var{id}[,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] -@@ -2331,23 +2332,23 @@ Use @option{fd=h} to specify an already opened UDP multicast socket. +@@ -2251,15 +2252,15 @@ Use @option{fd=h} to specify an already opened UDP multicast socket. Example: @example # launch one QEMU instance @@ -915,13 +973,11 @@ index 37f2aa8..41cb1f3 100644 + -netdev socket,id=n2,mcast=230.0.0.1:1234 # launch yet another QEMU instance on same "bus" -qemu-system-i386 linux.img \ -- -device e1000,netdev=n3,macaddr=52:54:00:12:34:58 \ -- -netdev socket,id=n3,mcast=230.0.0.1:1234 +qemu-kvm linux.img \ -+ -device e1000,netdev=n3,macaddr=52:54:00:12:34:58 \ -+ -netdev socket,id=n3,mcast=230.0.0.1:1234 + -device e1000,netdev=n3,mac=52:54:00:12:34:58 \ + -netdev socket,id=n3,mcast=230.0.0.1:1234 @end example - +@@ -2267,7 +2268,7 @@ qemu-system-i386 linux.img \ Example (User Mode Linux compat.): @example # launch QEMU instance (note mcast address selected is UML's default) @@ -930,7 +986,7 @@ index 37f2aa8..41cb1f3 100644 -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ -netdev socket,id=n1,mcast=239.192.168.1:1102 # launch UML -@@ -2356,9 +2357,12 @@ qemu-system-i386 linux.img \ +@@ -2276,9 +2277,12 @@ qemu-system-i386 linux.img \ Example (send packets from host's 1.2.3.4): @example @@ -946,7 +1002,7 @@ index 37f2aa8..41cb1f3 100644 @end example @item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}] -@@ -2416,7 +2420,7 @@ brctl addif br-lan vmtunnel0 +@@ -2336,7 +2340,7 @@ brctl addif br-lan vmtunnel0 # on 4.3.2.1 # launch QEMU instance - if your network has reorder or is very lossy add ,pincounter @@ -955,7 +1011,7 @@ index 37f2aa8..41cb1f3 100644 -netdev l2tpv3,id=n1,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter @end example -@@ -2433,7 +2437,7 @@ Example: +@@ -2353,7 +2357,7 @@ Example: # launch vde switch vde_switch -F -sock /tmp/myswitch # launch QEMU instance @@ -964,7 +1020,7 @@ index 37f2aa8..41cb1f3 100644 @end example @item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n] -@@ -2447,11 +2451,11 @@ be created for multiqueue vhost-user. +@@ -2367,11 +2371,11 @@ be created for multiqueue vhost-user. Example: @example @@ -981,7 +1037,7 @@ index 37f2aa8..41cb1f3 100644 @end example @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] -@@ -2877,7 +2881,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can +@@ -2804,7 +2808,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can be used as following: @example @@ -990,7 +1046,7 @@ index 37f2aa8..41cb1f3 100644 @end example @item -bt device:@var{dev}[,vlan=@var{n}] -@@ -3322,14 +3326,14 @@ ETEXI +@@ -3253,14 +3257,14 @@ ETEXI DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, "-realtime [mlock=on|off]\n" @@ -1008,7 +1064,7 @@ index 37f2aa8..41cb1f3 100644 (enabled by default). ETEXI -@@ -3367,7 +3371,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even +@@ -3298,7 +3302,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even stdio are reasonable use case. The latter is allowing to start QEMU from within gdb and establish the connection via a pipe: @example @@ -1017,7 +1073,7 @@ index 37f2aa8..41cb1f3 100644 @end example ETEXI -@@ -4299,7 +4303,7 @@ which specify the queue number of cryptodev backend, the default of +@@ -4247,7 +4251,7 @@ which specify the queue number of cryptodev backend, the default of @example @@ -1026,7 +1082,7 @@ index 37f2aa8..41cb1f3 100644 [...] \ -object cryptodev-backend-builtin,id=cryptodev0 \ -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \ -@@ -4319,7 +4323,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 +@@ -4267,7 +4271,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 @example diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0019-usb-xhci-Fix-PCI-capability-order.patch similarity index 87% rename from 0018-usb-xhci-Fix-PCI-capability-order.patch rename to 0019-usb-xhci-Fix-PCI-capability-order.patch index a44ef20..881fdd0 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0019-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From 69912b533a88bda6377292231fb94475a674a90d Mon Sep 17 00:00:00 2001 +From 0c8a71a5751106013f9ecfdc20f308cc1e44045b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -43,6 +43,19 @@ qemu-kvm: load of migration failed: Invalid argument Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Miroslav Rezanina + +-- +Rebase notes (2.9.0): +- Change in assert condition (upstream) + +(cherry picked from commit aad727a5ecde1ad4935eb8427604d4df5a1f1f35) +(cherry picked from commit 2dd7402227e77d748a7375233ac9e7feab244bda) + +Conflicts: + hw/usb/hcd-xhci.c + +(cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) +(cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) --- hw/usb/hcd-xhci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/0020-linux-headers-asm-s390-kvm.h-header-sync.patch b/0020-linux-headers-asm-s390-kvm.h-header-sync.patch deleted file mode 100644 index cb0ee04..0000000 --- a/0020-linux-headers-asm-s390-kvm.h-header-sync.patch +++ /dev/null @@ -1,72 +0,0 @@ -From f3d0b355f946ab87b281ef75ebfb52f7b7592f2a Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 9 Aug 2018 10:15:08 +0000 -Subject: linux-headers: asm-s390/kvm.h header sync - -RH-Author: Thomas Huth -Message-id: <1533813309-9643-2-git-send-email-thuth@redhat.com> -Patchwork-id: 81688 -O-Subject: [RHEL-8.0 qemu-kvm PATCH 1/2] linux-headers: asm-s390/kvm.h header sync -Bugzilla: 1612938 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann - -This is a header sync with the linux uapi header. The corresponding -kernel commit id is a3da7b4a3be51f37f434f14e11e60491f098b6ea (in -the kvm/next branch) - -Signed-off-by: Thomas Huth - -Merged patches (3.0.0): -- 57332f1 linux-headers: Update to include KVM_CAP_S390_HPAGE_1M ---- - linux-headers/asm-s390/kvm.h | 5 ++++- - linux-headers/linux/kvm.h | 1 + - 2 files changed, 5 insertions(+), 1 deletion(-) - -diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h -index 11def14..1ab9901 100644 ---- a/linux-headers/asm-s390/kvm.h -+++ b/linux-headers/asm-s390/kvm.h -@@ -4,7 +4,7 @@ - /* - * KVM s390 specific structures and definitions - * -- * Copyright IBM Corp. 2008 -+ * Copyright IBM Corp. 2008, 2018 - * - * Author(s): Carsten Otte - * Christian Borntraeger -@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch { - #define KVM_SYNC_FPRS (1UL << 8) - #define KVM_SYNC_GSCB (1UL << 9) - #define KVM_SYNC_BPBC (1UL << 10) -+#define KVM_SYNC_ETOKEN (1UL << 11) - /* length and alignment of the sdnx as a power of two */ - #define SDNXC 8 - #define SDNXL (1UL << SDNXC) -@@ -258,6 +259,8 @@ struct kvm_sync_regs { - struct { - __u64 reserved1[2]; - __u64 gscb[4]; -+ __u64 etoken; -+ __u64 etoken_extension; - }; - }; - }; -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index 98f389a..2aae948 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_GET_MSR_FEATURES 153 - #define KVM_CAP_HYPERV_EVENTFD 154 - #define KVM_CAP_HYPERV_TLBFLUSH 155 -+#define KVM_CAP_S390_HPAGE_1M 156 - - #ifdef KVM_CAP_IRQ_ROUTING - --- -1.8.3.1 - diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch similarity index 90% rename from 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename to 0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index e5e93ce..af07bfd 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From a883dbcc1c55cab189ff4a48cbdd12c4b4246b9c Mon Sep 17 00:00:00 2001 +From 7d316f7aebb9634c3ac5b79f5b1198e27726a0fd Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -36,13 +36,16 @@ crashing. Signed-off-by: Fam Zheng Signed-off-by: Miroslav Rezanina +(cherry picked from commit b0caf00bbc35c7d89e02999bdce86e1f867728e8) +(cherry picked from commit c9c4f117d8b507c2f86035c282d537c0a327364f) +(cherry picked from commit 5d586bb2543337f0ff172c6ce942dba3acbcedff) Signed-off-by: Danilo C. L. de Paula --- hw/scsi/virtio-scsi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 5a3057d..52a3c1d 100644 +index 3aa9971..9f754c4 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -790,6 +790,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, diff --git a/0021-s390x-Enable-KVM-huge-page-backing-support.patch b/0021-s390x-Enable-KVM-huge-page-backing-support.patch deleted file mode 100644 index 636c94d..0000000 --- a/0021-s390x-Enable-KVM-huge-page-backing-support.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 3b4526245dcb2daad3a6393b6b129f85f9e2c7a2 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Mon, 6 Aug 2018 14:18:41 +0100 -Subject: s390x: Enable KVM huge page backing support - -RH-Author: David Hildenbrand -Message-id: <20180806141842.23963-3-david@redhat.com> -Patchwork-id: 81645 -O-Subject: [RHEL-8.0 qemu-kvm PATCH v2 2/3] s390x: Enable KVM huge page backing support -Bugzilla: 1610906 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Paolo Bonzini - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1610906 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=17624600 -Upstream: N/A - -Kernel part is in kvm/next, scheduled for 4.19. Patch has been reviewed -upstream but cannot get picked up yet due to the outstanding linux -header sync. Conflict to upstream patch: We have no units.h, therefore -we have to unfold "4*KiB" and "1*MiB". - -QEMU has had huge page support for a longer time already, but KVM -memory management under s390x needed some changes to work with huge -backings. - -Now that we have support, let's enable it if requested and -available. Otherwise we now properly tell the user if there is no -support and back out instead of failing to run the VM later on. - -Signed-off-by: Janosch Frank -Reviewed-by: David Hildenbrand -Reviewed-by: Thomas Huth -Signed-off-by: David Hildenbrand -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/kvm.c | 34 ++++++++++++++++++++++++++++++++-- - 1 file changed, 32 insertions(+), 2 deletions(-) - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index bbcbeed..c36ff36f 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -34,6 +34,7 @@ - #include "qapi/error.h" - #include "qemu/error-report.h" - #include "qemu/timer.h" -+#include "qemu/mmap-alloc.h" - #include "sysemu/sysemu.h" - #include "sysemu/hw_accel.h" - #include "hw/hw.h" -@@ -139,6 +140,7 @@ static int cap_mem_op; - static int cap_s390_irq; - static int cap_ri; - static int cap_gs; -+static int cap_hpage_1m; - - static int active_cmma; - -@@ -220,9 +222,9 @@ static void kvm_s390_enable_cmma(void) - .attr = KVM_S390_VM_MEM_ENABLE_CMMA, - }; - -- if (mem_path) { -+ if (cap_hpage_1m) { - warn_report("CMM will not be enabled because it is not " -- "compatible with hugetlbfs."); -+ "compatible with huge memory backings."); - return; - } - rc = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attr); -@@ -281,10 +283,38 @@ void kvm_s390_crypto_reset(void) - } - } - -+static int kvm_s390_configure_mempath_backing(KVMState *s) -+{ -+ size_t path_psize = qemu_mempath_getpagesize(mem_path); -+ -+ if (path_psize == 4 * 1024) { -+ return 0; -+ } -+ -+ if (path_psize != 1024 * 1024) { -+ error_report("Memory backing with 2G pages was specified, " -+ "but KVM does not support this memory backing"); -+ return -EINVAL; -+ } -+ -+ if (kvm_vm_enable_cap(s, KVM_CAP_S390_HPAGE_1M, 0)) { -+ error_report("Memory backing with 1M pages was specified, " -+ "but KVM does not support this memory backing"); -+ return -EINVAL; -+ } -+ -+ cap_hpage_1m = 1; -+ return 0; -+} -+ - int kvm_arch_init(MachineState *ms, KVMState *s) - { - MachineClass *mc = MACHINE_GET_CLASS(ms); - -+ if (mem_path && kvm_s390_configure_mempath_backing(s)) { -+ return -EINVAL; -+ } -+ - mc->default_cpu_type = S390_CPU_TYPE_NAME("host"); - cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); - cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); --- -1.8.3.1 - diff --git a/0022-s390x-kvm-add-etoken-facility.patch b/0022-s390x-kvm-add-etoken-facility.patch deleted file mode 100644 index d56ba50..0000000 --- a/0022-s390x-kvm-add-etoken-facility.patch +++ /dev/null @@ -1,190 +0,0 @@ -From 8eacbf0e8e26b2a8aa3de955a57a7a3cb680d922 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 9 Aug 2018 10:15:09 +0000 -Subject: s390x/kvm: add etoken facility - -RH-Author: Thomas Huth -Message-id: <1533813309-9643-3-git-send-email-thuth@redhat.com> -Patchwork-id: 81687 -O-Subject: [RHEL-8.0 qemu-kvm PATCH 2/2] s390x/kvm: add etoken facility -Bugzilla: 1612938 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann - -Provide the etoken facility. We need to handle cpu model, migration and -clear reset. - -Signed-off-by: Christian Borntraeger -Acked-by: Janosch Frank -Signed-off-by: Thomas Huth ---- - target/s390x/cpu.h | 3 +++ - target/s390x/cpu_features.c | 3 ++- - target/s390x/cpu_features_def.h | 3 ++- - target/s390x/gen-features.c | 3 ++- - target/s390x/kvm.c | 11 +++++++++++ - target/s390x/machine.c | 20 +++++++++++++++++++- - 6 files changed, 39 insertions(+), 4 deletions(-) - -diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h -index 2c3dd2d..21b2f21 100644 ---- a/target/s390x/cpu.h -+++ b/target/s390x/cpu.h -@@ -2,6 +2,7 @@ - * S/390 virtual CPU header - * - * Copyright (c) 2009 Ulrich Hecht -+ * Copyright IBM Corp. 2012, 2018 - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public -@@ -68,6 +69,8 @@ struct CPUS390XState { - uint32_t aregs[16]; /* access registers */ - uint8_t riccb[64]; /* runtime instrumentation control */ - uint64_t gscb[4]; /* guarded storage control */ -+ uint64_t etoken; /* etoken */ -+ uint64_t etoken_extension; /* etoken extension */ - - /* Fields up to this point are not cleared by initial CPU reset */ - struct {} start_initial_reset_fields; -diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c -index 3b9e274..e05e6aa 100644 ---- a/target/s390x/cpu_features.c -+++ b/target/s390x/cpu_features.c -@@ -1,7 +1,7 @@ - /* - * CPU features/facilities for s390x - * -- * Copyright 2016 IBM Corp. -+ * Copyright IBM Corp. 2016, 2018 - * - * Author(s): David Hildenbrand - * -@@ -106,6 +106,7 @@ static const S390FeatDef s390_features[] = { - FEAT_INIT("irbm", S390_FEAT_TYPE_STFL, 145, "Insert-reference-bits-multiple facility"), - FEAT_INIT("msa8-base", S390_FEAT_TYPE_STFL, 146, "Message-security-assist-extension-8 facility (excluding subfunctions)"), - FEAT_INIT("cmmnt", S390_FEAT_TYPE_STFL, 147, "CMM: ESSA-enhancement (no translate) facility"), -+ FEAT_INIT("etoken", S390_FEAT_TYPE_STFL, 156, "Etoken facility"), - - /* SCLP SCCB Byte 80 - 98 (bit numbers relative to byte-80) */ - FEAT_INIT("gsls", S390_FEAT_TYPE_SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility"), -diff --git a/target/s390x/cpu_features_def.h b/target/s390x/cpu_features_def.h -index 7c5915c..ac2c947 100644 ---- a/target/s390x/cpu_features_def.h -+++ b/target/s390x/cpu_features_def.h -@@ -1,7 +1,7 @@ - /* - * CPU features/facilities for s390 - * -- * Copyright 2016 IBM Corp. -+ * Copyright IBM Corp. 2016, 2018 - * - * Author(s): Michael Mueller - * David Hildenbrand -@@ -93,6 +93,7 @@ typedef enum { - S390_FEAT_INSERT_REFERENCE_BITS_MULT, - S390_FEAT_MSA_EXT_8, - S390_FEAT_CMM_NT, -+ S390_FEAT_ETOKEN, - - /* Sclp Conf Char */ - S390_FEAT_SIE_GSLS, -diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c -index 6626b6f..5af042c 100644 ---- a/target/s390x/gen-features.c -+++ b/target/s390x/gen-features.c -@@ -1,7 +1,7 @@ - /* - * S390 feature list generator - * -- * Copyright 2016 IBM Corp. -+ * Copyright IBM Corp. 2016, 2018 - * - * Author(s): Michael Mueller - * David Hildenbrand -@@ -471,6 +471,7 @@ static uint16_t full_GEN14_GA1[] = { - S390_FEAT_GROUP_MSA_EXT_7, - S390_FEAT_GROUP_MSA_EXT_8, - S390_FEAT_CMM_NT, -+ S390_FEAT_ETOKEN, - S390_FEAT_HPMA2, - S390_FEAT_SIE_KSS, - S390_FEAT_GROUP_MULTIPLE_EPOCH_PTFF, -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index c36ff36f..71d90f2 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -523,6 +523,12 @@ int kvm_arch_put_registers(CPUState *cs, int level) - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_BPBC; - } - -+ if (can_sync_regs(cs, KVM_SYNC_ETOKEN)) { -+ cs->kvm_run->s.regs.etoken = env->etoken; -+ cs->kvm_run->s.regs.etoken_extension = env->etoken_extension; -+ cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ETOKEN; -+ } -+ - /* Finally the prefix */ - if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { - cs->kvm_run->s.regs.prefix = env->psa; -@@ -637,6 +643,11 @@ int kvm_arch_get_registers(CPUState *cs) - env->bpbc = cs->kvm_run->s.regs.bpbc; - } - -+ if (can_sync_regs(cs, KVM_SYNC_ETOKEN)) { -+ env->etoken = cs->kvm_run->s.regs.etoken; -+ env->etoken_extension = cs->kvm_run->s.regs.etoken_extension; -+ } -+ - /* pfault parameters */ - if (can_sync_regs(cs, KVM_SYNC_PFAULT)) { - env->pfault_token = cs->kvm_run->s.regs.pft; -diff --git a/target/s390x/machine.c b/target/s390x/machine.c -index bd3230d..cb792aa 100644 ---- a/target/s390x/machine.c -+++ b/target/s390x/machine.c -@@ -1,7 +1,7 @@ - /* - * S390x machine definitions and functions - * -- * Copyright IBM Corp. 2014 -+ * Copyright IBM Corp. 2014, 2018 - * - * Authors: - * Thomas Huth -@@ -216,6 +216,23 @@ const VMStateDescription vmstate_bpbc = { - } - }; - -+static bool etoken_needed(void *opaque) -+{ -+ return s390_has_feat(S390_FEAT_ETOKEN); -+} -+ -+const VMStateDescription vmstate_etoken = { -+ .name = "cpu/etoken", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .needed = etoken_needed, -+ .fields = (VMStateField[]) { -+ VMSTATE_UINT64(env.etoken, S390CPU), -+ VMSTATE_UINT64(env.etoken_extension, S390CPU), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ - const VMStateDescription vmstate_s390_cpu = { - .name = "cpu", - .post_load = cpu_post_load, -@@ -251,6 +268,7 @@ const VMStateDescription vmstate_s390_cpu = { - &vmstate_exval, - &vmstate_gscb, - &vmstate_bpbc, -+ &vmstate_etoken, - NULL - }, - }; --- -1.8.3.1 - diff --git a/0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch b/0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch deleted file mode 100644 index 01227d7..0000000 --- a/0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 29df663d045345a8c498dc3966cc59dcf091a50d Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 7 Aug 2018 09:05:54 +0000 -Subject: s390x/cpumodel: default enable bpb and ppa15 for z196 and later - -RH-Author: Cornelia Huck -Message-id: <20180807100554.29643-3-cohuck@redhat.com> -Patchwork-id: 81660 -O-Subject: [qemu-kvm RHEL8/virt212 PATCH 2/2] s390x/cpumodel: default enable bpb and ppa15 for z196 and later -Bugzilla: 1595718 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth -RH-Acked-by: Jens Freimann - -Upstream: downstream version of 8727315111 ("s390x/cpumodel: default - enable bpb and ppa15 for z196 and later"); downstream does - not have the upstream machine types, instead we need to - turn off the bits for the RHEL 7.5 machine - -Most systems and host kernels provide the necessary building blocks for -bpb and ppa15. We can reverse the logic and default enable those -features, while still allowing to disable it via cpu model. - -So let us add bpb and ppa15 to z196 and later default CPU model for the -qemu rhel7.6.0 machine. (like -cpu z13). Older machine types (i.e. -s390-ccw-virtio-rhel7.5.0) will retain the old value and not provide those -bits in the default model. - -Signed-off-by: Cornelia Huck ---- - hw/s390x/s390-virtio-ccw.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 0f135c9..cdf4558 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -931,6 +931,10 @@ static void ccw_machine_rhel750_instance_options(MachineState *machine) - /* before 2.12 we emulated the very first z900, and RHEL 7.5 is - based on 2.10 */ - s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); -+ -+ /* bpb and ppa15 were only in the full model in RHEL 7.5 */ -+ s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); -+ s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); - } - - static void ccw_machine_rhel750_class_options(MachineClass *mc) --- -1.8.3.1 - diff --git a/0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch b/0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch deleted file mode 100644 index e2570c5..0000000 --- a/0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 43b08a1e4bc47d810212f569cc0fc30eebfd7036 Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Fri, 31 Aug 2018 13:59:22 +0100 -Subject: i386: Fix arch_query_cpu_model_expansion() leak - -RH-Author: Markus Armbruster -Message-id: <20180831135922.6073-3-armbru@redhat.com> -Patchwork-id: 81980 -O-Subject: [qemu-kvm RHEL8/virt212 PATCH 2/2] i386: Fix arch_query_cpu_model_expansion() leak -Bugzilla: 1615717 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Miroslav Rezanina - -From: Eduardo Habkost - -Reported by Coverity: - -Error: RESOURCE_LEAK (CWE-772): [#def439] -qemu-2.12.0/target/i386/cpu.c:3179: alloc_fn: Storage is returned from allocation function "qdict_new". -qemu-2.12.0/qobject/qdict.c:34:5: alloc_fn: Storage is returned from allocation function "g_malloc0". -qemu-2.12.0/qobject/qdict.c:34:5: var_assign: Assigning: "qdict" = "g_malloc0(4120UL)". -qemu-2.12.0/qobject/qdict.c:37:5: return_alloc: Returning allocated memory "qdict". -qemu-2.12.0/target/i386/cpu.c:3179: var_assign: Assigning: "props" = storage returned from "qdict_new()". -qemu-2.12.0/target/i386/cpu.c:3217: leaked_storage: Variable "props" going out of scope leaks the storage it points to. - -This was introduced by commit b8097deb359b ("i386: Improve -query-cpu-model-expansion full mode"). - -The leak is only theoretical: if ret->model->props is set to -props, the qapi_free_CpuModelExpansionInfo() call will free props -too in case of errors. The only way for this to not happen is if -we enter the default branch of the switch statement, which would -never happen because all CpuModelExpansionType values are being -handled. - -It's still worth to change this to make the allocation logic -easier to follow and make the Coverity error go away. To make -everything simpler, initialize ret->model and ret->model->props -earlier in the function. - -While at it, remove redundant check for !prop because prop is -always initialized at the beginning of the function. - -Fixes: b8097deb359bbbd92592b9670adfe9e245b2d0bd -Signed-off-by: Eduardo Habkost -Message-Id: <20180816183509.8231-1-ehabkost@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit e38bf612477fca62b205ebd909b1372a7e45a8c0) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 051018a..71e2808 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3784,6 +3784,9 @@ arch_query_cpu_model_expansion(CpuModelExpansionType type, - } - - props = qdict_new(); -+ ret->model = g_new0(CpuModelInfo, 1); -+ ret->model->props = QOBJECT(props); -+ ret->model->has_props = true; - - switch (type) { - case CPU_MODEL_EXPANSION_TYPE_STATIC: -@@ -3804,15 +3807,9 @@ arch_query_cpu_model_expansion(CpuModelExpansionType type, - goto out; - } - -- if (!props) { -- props = qdict_new(); -- } - x86_cpu_to_dict(xc, props); - -- ret->model = g_new0(CpuModelInfo, 1); - ret->model->name = g_strdup(base_name); -- ret->model->props = QOBJECT(props); -- ret->model->has_props = true; - - out: - object_unref(OBJECT(xc)); --- -1.8.3.1 - diff --git a/0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch b/0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch deleted file mode 100644 index 12692e8..0000000 --- a/0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 628b10cd4d5cd8fde97dab66f143db78fe03398a Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Tue, 21 Aug 2018 19:15:41 +0100 -Subject: i386: Disable TOPOEXT by default on "-cpu host" - -RH-Author: Eduardo Habkost -Message-id: <20180821191541.31916-2-ehabkost@redhat.com> -Patchwork-id: 81904 -O-Subject: [qemu-kvm RHEL8/virt212 PATCH v2 1/1] i386: Disable TOPOEXT by default on "-cpu host" -Bugzilla: 1619804 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Igor Mammedov - -Enabling TOPOEXT is always allowed, but it can't be enabled -blindly by "-cpu host" because it may make guests crash if the -rest of the cache topology information isn't provided or isn't -consistent. - -This addresses the bug reported at: -https://bugzilla.redhat.com/show_bug.cgi?id=1613277 - -Signed-off-by: Eduardo Habkost -Message-Id: <20180809221852.15285-1-ehabkost@redhat.com> -Tested-by: Richard W.M. Jones -Reviewed-by: Babu Moger -Signed-off-by: Eduardo Habkost -(cherry picked from commit 7210a02c58572b2686a3a8d610c6628f87864aed) -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 71e2808..198d578 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -849,6 +849,12 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - }, - .cpuid_eax = 0x80000001, .cpuid_reg = R_ECX, - .tcg_features = TCG_EXT3_FEATURES, -+ /* -+ * TOPOEXT is always allowed but can't be enabled blindly by -+ * "-cpu host", as it requires consistent cache topology info -+ * to be provided so it doesn't confuse guests. -+ */ -+ .no_autoenable_flags = CPUID_EXT3_TOPOEXT, - }, - [FEAT_C000_0001_EDX] = { - .feat_names = { --- -1.8.3.1 - diff --git a/0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch b/0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch deleted file mode 100644 index 3f2736c..0000000 --- a/0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 1ed2bb0d831983b68bcdecd057c2c5bfd419c304 Mon Sep 17 00:00:00 2001 -From: Jeffrey Cody -Date: Wed, 26 Sep 2018 04:08:14 +0100 -Subject: curl: Make sslverify=off disable host as well as peer verification. - -RH-Author: Jeffrey Cody -Message-id: <543d2f667af465dd809329fcba5175bc974d58d4.1537933576.git.jcody@redhat.com> -Patchwork-id: 82293 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/1] curl: Make sslverify=off disable host as well as peer verification. -Bugzilla: 1575925 -RH-Acked-by: Richard Jones -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: "Richard W.M. Jones" - -The sslverify setting is supposed to turn off all TLS certificate -checks in libcurl. However because of the way we use it, it only -turns off peer certificate authenticity checks -(CURLOPT_SSL_VERIFYPEER). This patch makes it also turn off the check -that the server name in the certificate is the same as the server -you're connecting to (CURLOPT_SSL_VERIFYHOST). - -We can use Google's server at 8.8.8.8 which happens to have a bad TLS -certificate to demonstrate this: - -$ ./qemu-img create -q -f qcow2 -b 'json: { "file.sslverify": "off", "file.driver": "https", "file.url": "https://8.8.8.8/foo" }' /var/tmp/file.qcow2 -qemu-img: /var/tmp/file.qcow2: CURL: Error opening file: SSL: no alternative certificate subject name matches target host name '8.8.8.8' -Could not open backing image to determine size. - -With this patch applied, qemu-img connects to the server regardless of -the bad certificate: - -$ ./qemu-img create -q -f qcow2 -b 'json: { "file.sslverify": "off", "file.driver": "https", "file.url": "https://8.8.8.8/foo" }' /var/tmp/file.qcow2 -qemu-img: /var/tmp/file.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found - -(The 404 error is expected because 8.8.8.8 is not actually serving a -file called "/foo".) - -Of course the default (without sslverify=off) remains to always check -the certificate: - -$ ./qemu-img create -q -f qcow2 -b 'json: { "file.driver": "https", "file.url": "https://8.8.8.8/foo" }' /var/tmp/file.qcow2 -qemu-img: /var/tmp/file.qcow2: CURL: Error opening file: SSL: no alternative certificate subject name matches target host name '8.8.8.8' -Could not open backing image to determine size. - -Further information about the two settings is available here: - -https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html -https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYHOST.html - -Signed-off-by: Richard W.M. Jones -Message-id: 20180914095622.19698-1-rjones@redhat.com -Signed-off-by: Jeff Cody -(cherry picked from commit 637fa44ab80c6b317adf1d117494325a95daad60) -Signed-off-by: Jeff Cody -Signed-off-by: Danilo C. L. de Paula ---- - block/curl.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/block/curl.c b/block/curl.c -index 229bb84..fabb2b4 100644 ---- a/block/curl.c -+++ b/block/curl.c -@@ -483,6 +483,8 @@ static int curl_init_state(BDRVCURLState *s, CURLState *state) - curl_easy_setopt(state->curl, CURLOPT_URL, s->url); - curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER, - (long) s->sslverify); -+ curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYHOST, -+ s->sslverify ? 2L : 0L); - if (s->cookie) { - curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie); - } --- -1.8.3.1 - diff --git a/0027-migration-postcopy-Clear-have_listen_thread.patch b/0027-migration-postcopy-Clear-have_listen_thread.patch deleted file mode 100644 index 86157d9..0000000 --- a/0027-migration-postcopy-Clear-have_listen_thread.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 096b7abf1d2755ad469e4bcb3dc6302021979814 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 1 Oct 2018 10:54:48 +0100 -Subject: migration/postcopy: Clear have_listen_thread - -RH-Author: Dr. David Alan Gilbert -Message-id: <20181001105449.41090-2-dgilbert@redhat.com> -Patchwork-id: 82326 -O-Subject: [RHEL-8.0 qemu-kvm PATCH 1/2] migration/postcopy: Clear have_listen_thread -Bugzilla: 1608765 -RH-Acked-by: Pankaj Gupta -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Laurent Vivier - -From: "Dr. David Alan Gilbert" - -Clear have_listen_thread when we exit the thread. -The fallout from this was that various things thought there was -an ongoing postcopy after the postcopy had finished. - -The case that failed was postcopy->savevm->loadvm. - -This corresponds to RH bug https://bugzilla.redhat.com/show_bug.cgi?id=1608765 - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20180914170430.54271-2-dgilbert@redhat.com> -Reviewed-by: Peter Xu -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9cf4bb8730c669c40550e635a9e2b8ee4f1664ca) - Manual merge due to context - -Signed-off-by: Danilo C. L. de Paula ---- - migration/savevm.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/migration/savevm.c b/migration/savevm.c -index 7f92567..762c4b2 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1676,6 +1676,7 @@ static void *postcopy_ram_listen_thread(void *opaque) - migration_incoming_state_destroy(); - qemu_loadvm_state_cleanup(); - -+ mis->have_listen_thread = false; - return NULL; - } - --- -1.8.3.1 - diff --git a/0028-migration-cleanup-in-error-paths-in-loadvm.patch b/0028-migration-cleanup-in-error-paths-in-loadvm.patch deleted file mode 100644 index f576c82..0000000 --- a/0028-migration-cleanup-in-error-paths-in-loadvm.patch +++ /dev/null @@ -1,52 +0,0 @@ -From bff052b89b0c32c179d858bd8eed91e0d9f98db4 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 1 Oct 2018 10:54:49 +0100 -Subject: migration: cleanup in error paths in loadvm - -RH-Author: Dr. David Alan Gilbert -Message-id: <20181001105449.41090-3-dgilbert@redhat.com> -Patchwork-id: 82325 -O-Subject: [RHEL-8.0 qemu-kvm PATCH 2/2] migration: cleanup in error paths in loadvm -Bugzilla: 1608765 -RH-Acked-by: Pankaj Gupta -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Laurent Vivier - -From: "Dr. David Alan Gilbert" - -There's a couple of error paths in qemu_loadvm_state -which happen early on but after we've initialised the -load state; that needs to be cleaned up otherwise -we can hit asserts if the state gets reinitialised later. - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20180914170430.54271-3-dgilbert@redhat.com> -Reviewed-by: Peter Xu -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 096c83b7219c5a2145435afc8be750281e9cb447) -Signed-off-by: Danilo C. L. de Paula ---- - migration/savevm.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/migration/savevm.c b/migration/savevm.c -index 762c4b2..27e054d 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -2328,11 +2328,13 @@ int qemu_loadvm_state(QEMUFile *f) - if (migrate_get_current()->send_configuration) { - if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) { - error_report("Configuration section missing"); -+ qemu_loadvm_state_cleanup(); - return -EINVAL; - } - ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0); - - if (ret) { -+ qemu_loadvm_state_cleanup(); - return ret; - } - } --- -1.8.3.1 - diff --git a/0029-jobs-change-start-callback-to-run-callback.patch b/0029-jobs-change-start-callback-to-run-callback.patch deleted file mode 100644 index 0955ee2..0000000 --- a/0029-jobs-change-start-callback-to-run-callback.patch +++ /dev/null @@ -1,372 +0,0 @@ -From 2999207ffd4de9f139922b444edba07b051d4a67 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:07 +0100 -Subject: jobs: change start callback to run callback - -RH-Author: John Snow -Message-id: <20180925223431.24791-2-jsnow@redhat.com> -Patchwork-id: 82261 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 01/25] jobs: change start callback to run callback -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Presently we codify the entry point for a job as the "start" callback, -but a more apt name would be "run" to clarify the idea that when this -function returns we consider the job to have "finished," except for -any cleanup which occurs in separate callbacks later. - -As part of this clarification, change the signature to include an error -object and a return code. The error ptr is not yet used, and the return -code while captured, will be overwritten by actions in the job_completed -function. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180830015734.19765-2-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit f67432a2019caf05b57a146bf45c1024a5cb608e) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/backup.c | 7 ++++--- - block/commit.c | 7 ++++--- - block/create.c | 8 +++++--- - block/mirror.c | 10 ++++++---- - block/stream.c | 7 ++++--- - include/qemu/job.h | 2 +- - job.c | 6 +++--- - tests/test-bdrv-drain.c | 7 ++++--- - tests/test-blockjob-txn.c | 16 ++++++++-------- - tests/test-blockjob.c | 7 ++++--- - 10 files changed, 43 insertions(+), 34 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 8630d32..5d47781 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -480,9 +480,9 @@ static void backup_incremental_init_copy_bitmap(BackupBlockJob *job) - bdrv_dirty_iter_free(dbi); - } - --static void coroutine_fn backup_run(void *opaque) -+static int coroutine_fn backup_run(Job *opaque_job, Error **errp) - { -- BackupBlockJob *job = opaque; -+ BackupBlockJob *job = container_of(opaque_job, BackupBlockJob, common.job); - BackupCompleteData *data; - BlockDriverState *bs = blk_bs(job->common.blk); - int64_t offset, nb_clusters; -@@ -587,6 +587,7 @@ static void coroutine_fn backup_run(void *opaque) - data = g_malloc(sizeof(*data)); - data->ret = ret; - job_defer_to_main_loop(&job->common.job, backup_complete, data); -+ return ret; - } - - static const BlockJobDriver backup_job_driver = { -@@ -596,7 +597,7 @@ static const BlockJobDriver backup_job_driver = { - .free = block_job_free, - .user_resume = block_job_user_resume, - .drain = block_job_drain, -- .start = backup_run, -+ .run = backup_run, - .commit = backup_commit, - .abort = backup_abort, - .clean = backup_clean, -diff --git a/block/commit.c b/block/commit.c -index e1814d9..905a1c5 100644 ---- a/block/commit.c -+++ b/block/commit.c -@@ -134,9 +134,9 @@ static void commit_complete(Job *job, void *opaque) - bdrv_unref(top); - } - --static void coroutine_fn commit_run(void *opaque) -+static int coroutine_fn commit_run(Job *job, Error **errp) - { -- CommitBlockJob *s = opaque; -+ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); - CommitCompleteData *data; - int64_t offset; - uint64_t delay_ns = 0; -@@ -213,6 +213,7 @@ out: - data = g_malloc(sizeof(*data)); - data->ret = ret; - job_defer_to_main_loop(&s->common.job, commit_complete, data); -+ return ret; - } - - static const BlockJobDriver commit_job_driver = { -@@ -222,7 +223,7 @@ static const BlockJobDriver commit_job_driver = { - .free = block_job_free, - .user_resume = block_job_user_resume, - .drain = block_job_drain, -- .start = commit_run, -+ .run = commit_run, - }, - }; - -diff --git a/block/create.c b/block/create.c -index 915cd41..04733c3 100644 ---- a/block/create.c -+++ b/block/create.c -@@ -45,9 +45,9 @@ static void blockdev_create_complete(Job *job, void *opaque) - job_completed(job, s->ret, s->err); - } - --static void coroutine_fn blockdev_create_run(void *opaque) -+static int coroutine_fn blockdev_create_run(Job *job, Error **errp) - { -- BlockdevCreateJob *s = opaque; -+ BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); - - job_progress_set_remaining(&s->common, 1); - s->ret = s->drv->bdrv_co_create(s->opts, &s->err); -@@ -55,12 +55,14 @@ static void coroutine_fn blockdev_create_run(void *opaque) - - qapi_free_BlockdevCreateOptions(s->opts); - job_defer_to_main_loop(&s->common, blockdev_create_complete, NULL); -+ -+ return s->ret; - } - - static const JobDriver blockdev_create_job_driver = { - .instance_size = sizeof(BlockdevCreateJob), - .job_type = JOB_TYPE_CREATE, -- .start = blockdev_create_run, -+ .run = blockdev_create_run, - }; - - void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options, -diff --git a/block/mirror.c b/block/mirror.c -index b48c3f8..b3363e9 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -812,9 +812,9 @@ static int mirror_flush(MirrorBlockJob *s) - return ret; - } - --static void coroutine_fn mirror_run(void *opaque) -+static int coroutine_fn mirror_run(Job *job, Error **errp) - { -- MirrorBlockJob *s = opaque; -+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); - MirrorExitData *data; - BlockDriverState *bs = s->mirror_top_bs->backing->bs; - BlockDriverState *target_bs = blk_bs(s->target); -@@ -1041,7 +1041,9 @@ immediate_exit: - if (need_drain) { - bdrv_drained_begin(bs); - } -+ - job_defer_to_main_loop(&s->common.job, mirror_exit, data); -+ return ret; - } - - static void mirror_complete(Job *job, Error **errp) -@@ -1138,7 +1140,7 @@ static const BlockJobDriver mirror_job_driver = { - .free = block_job_free, - .user_resume = block_job_user_resume, - .drain = block_job_drain, -- .start = mirror_run, -+ .run = mirror_run, - .pause = mirror_pause, - .complete = mirror_complete, - }, -@@ -1154,7 +1156,7 @@ static const BlockJobDriver commit_active_job_driver = { - .free = block_job_free, - .user_resume = block_job_user_resume, - .drain = block_job_drain, -- .start = mirror_run, -+ .run = mirror_run, - .pause = mirror_pause, - .complete = mirror_complete, - }, -diff --git a/block/stream.c b/block/stream.c -index 9264b68..b4b987d 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -97,9 +97,9 @@ out: - g_free(data); - } - --static void coroutine_fn stream_run(void *opaque) -+static int coroutine_fn stream_run(Job *job, Error **errp) - { -- StreamBlockJob *s = opaque; -+ StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); - StreamCompleteData *data; - BlockBackend *blk = s->common.blk; - BlockDriverState *bs = blk_bs(blk); -@@ -206,6 +206,7 @@ out: - data = g_malloc(sizeof(*data)); - data->ret = ret; - job_defer_to_main_loop(&s->common.job, stream_complete, data); -+ return ret; - } - - static const BlockJobDriver stream_job_driver = { -@@ -213,7 +214,7 @@ static const BlockJobDriver stream_job_driver = { - .instance_size = sizeof(StreamBlockJob), - .job_type = JOB_TYPE_STREAM, - .free = block_job_free, -- .start = stream_run, -+ .run = stream_run, - .user_resume = block_job_user_resume, - .drain = block_job_drain, - }, -diff --git a/include/qemu/job.h b/include/qemu/job.h -index 18c9223..9cf463d 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -169,7 +169,7 @@ struct JobDriver { - JobType job_type; - - /** Mandatory: Entrypoint for the Coroutine. */ -- CoroutineEntry *start; -+ int coroutine_fn (*run)(Job *job, Error **errp); - - /** - * If the callback is not NULL, it will be invoked when the job transitions -diff --git a/job.c b/job.c -index fa671b4..898260b 100644 ---- a/job.c -+++ b/job.c -@@ -544,16 +544,16 @@ static void coroutine_fn job_co_entry(void *opaque) - { - Job *job = opaque; - -- assert(job && job->driver && job->driver->start); -+ assert(job && job->driver && job->driver->run); - job_pause_point(job); -- job->driver->start(job); -+ job->ret = job->driver->run(job, NULL); - } - - - void job_start(Job *job) - { - assert(job && !job_started(job) && job->paused && -- job->driver && job->driver->start); -+ job->driver && job->driver->run); - job->co = qemu_coroutine_create(job_co_entry, job); - job->pause_count--; - job->busy = true; -diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c -index 17bb850..a753386 100644 ---- a/tests/test-bdrv-drain.c -+++ b/tests/test-bdrv-drain.c -@@ -757,9 +757,9 @@ static void test_job_completed(Job *job, void *opaque) - job_completed(job, 0, NULL); - } - --static void coroutine_fn test_job_start(void *opaque) -+static int coroutine_fn test_job_run(Job *job, Error **errp) - { -- TestBlockJob *s = opaque; -+ TestBlockJob *s = container_of(job, TestBlockJob, common.job); - - job_transition_to_ready(&s->common.job); - while (!s->should_complete) { -@@ -771,6 +771,7 @@ static void coroutine_fn test_job_start(void *opaque) - } - - job_defer_to_main_loop(&s->common.job, test_job_completed, NULL); -+ return 0; - } - - static void test_job_complete(Job *job, Error **errp) -@@ -785,7 +786,7 @@ BlockJobDriver test_job_driver = { - .free = block_job_free, - .user_resume = block_job_user_resume, - .drain = block_job_drain, -- .start = test_job_start, -+ .run = test_job_run, - .complete = test_job_complete, - }, - }; -diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c -index 58d9b87..3194924 100644 ---- a/tests/test-blockjob-txn.c -+++ b/tests/test-blockjob-txn.c -@@ -38,25 +38,25 @@ static void test_block_job_complete(Job *job, void *opaque) - bdrv_unref(bs); - } - --static void coroutine_fn test_block_job_run(void *opaque) -+static int coroutine_fn test_block_job_run(Job *job, Error **errp) - { -- TestBlockJob *s = opaque; -- BlockJob *job = &s->common; -+ TestBlockJob *s = container_of(job, TestBlockJob, common.job); - - while (s->iterations--) { - if (s->use_timer) { -- job_sleep_ns(&job->job, 0); -+ job_sleep_ns(job, 0); - } else { -- job_yield(&job->job); -+ job_yield(job); - } - -- if (job_is_cancelled(&job->job)) { -+ if (job_is_cancelled(job)) { - break; - } - } - -- job_defer_to_main_loop(&job->job, test_block_job_complete, -+ job_defer_to_main_loop(job, test_block_job_complete, - (void *)(intptr_t)s->rc); -+ return s->rc; - } - - typedef struct { -@@ -80,7 +80,7 @@ static const BlockJobDriver test_block_job_driver = { - .free = block_job_free, - .user_resume = block_job_user_resume, - .drain = block_job_drain, -- .start = test_block_job_run, -+ .run = test_block_job_run, - }, - }; - -diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c -index cb42f06..b0462bf 100644 ---- a/tests/test-blockjob.c -+++ b/tests/test-blockjob.c -@@ -176,9 +176,9 @@ static void cancel_job_complete(Job *job, Error **errp) - s->should_complete = true; - } - --static void coroutine_fn cancel_job_start(void *opaque) -+static int coroutine_fn cancel_job_run(Job *job, Error **errp) - { -- CancelJob *s = opaque; -+ CancelJob *s = container_of(job, CancelJob, common.job); - - while (!s->should_complete) { - if (job_is_cancelled(&s->common.job)) { -@@ -194,6 +194,7 @@ static void coroutine_fn cancel_job_start(void *opaque) - - defer: - job_defer_to_main_loop(&s->common.job, cancel_job_completed, s); -+ return 0; - } - - static const BlockJobDriver test_cancel_driver = { -@@ -202,7 +203,7 @@ static const BlockJobDriver test_cancel_driver = { - .free = block_job_free, - .user_resume = block_job_user_resume, - .drain = block_job_drain, -- .start = cancel_job_start, -+ .run = cancel_job_run, - .complete = cancel_job_complete, - }, - }; --- -1.8.3.1 - diff --git a/0030-jobs-canonize-Error-object.patch b/0030-jobs-canonize-Error-object.patch deleted file mode 100644 index 92dc0b8..0000000 --- a/0030-jobs-canonize-Error-object.patch +++ /dev/null @@ -1,283 +0,0 @@ -From df9702d737eea1720a10d350c24bdcc3f54bcba9 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 29 Aug 2018 21:57:27 -0400 -Subject: jobs: canonize Error object - -RH-Author: John Snow -Message-id: <20180925223431.24791-3-jsnow@redhat.com> -Patchwork-id: 82262 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 02/25] jobs: canonize Error object -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Jobs presently use both an Error object in the case of the create job, -and char strings in the case of generic errors elsewhere. - -Unify the two paths as just j->err, and remove the extra argument from -job_completed. The integer error code for job_completed is kept for now, -to be removed shortly in a separate patch. - -Signed-off-by: John Snow -Message-id: 20180830015734.19765-3-jsnow@redhat.com -[mreitz: Dropped a superfluous g_strdup()] -Reviewed-by: Eric Blake -Signed-off-by: Max Reitz -(cherry picked from commit 3d1f8b07a4c241f81949eff507d9f3a8fd73b87b) -Signed-off-by: John Snow ---- - block/backup.c | 2 +- - block/commit.c | 2 +- - block/create.c | 5 ++--- - block/mirror.c | 2 +- - block/stream.c | 2 +- - include/qemu/job.h | 14 ++++++++------ - job-qmp.c | 5 +++-- - job.c | 18 ++++++------------ - tests/test-bdrv-drain.c | 2 +- - tests/test-blockjob-txn.c | 2 +- - tests/test-blockjob.c | 2 +- - 11 files changed, 26 insertions(+), 30 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 5d47781..1e965d5 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -388,7 +388,7 @@ static void backup_complete(Job *job, void *opaque) - { - BackupCompleteData *data = opaque; - -- job_completed(job, data->ret, NULL); -+ job_completed(job, data->ret); - g_free(data); - } - -diff --git a/block/commit.c b/block/commit.c -index 905a1c5..af7579d 100644 ---- a/block/commit.c -+++ b/block/commit.c -@@ -117,7 +117,7 @@ static void commit_complete(Job *job, void *opaque) - * bdrv_set_backing_hd() to fail. */ - block_job_remove_all_bdrv(bjob); - -- job_completed(job, ret, NULL); -+ job_completed(job, ret); - g_free(data); - - /* If bdrv_drop_intermediate() didn't already do that, remove the commit -diff --git a/block/create.c b/block/create.c -index 04733c3..26a385c 100644 ---- a/block/create.c -+++ b/block/create.c -@@ -35,14 +35,13 @@ typedef struct BlockdevCreateJob { - BlockDriver *drv; - BlockdevCreateOptions *opts; - int ret; -- Error *err; - } BlockdevCreateJob; - - static void blockdev_create_complete(Job *job, void *opaque) - { - BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); - -- job_completed(job, s->ret, s->err); -+ job_completed(job, s->ret); - } - - static int coroutine_fn blockdev_create_run(Job *job, Error **errp) -@@ -50,7 +49,7 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp) - BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); - - job_progress_set_remaining(&s->common, 1); -- s->ret = s->drv->bdrv_co_create(s->opts, &s->err); -+ s->ret = s->drv->bdrv_co_create(s->opts, errp); - job_progress_update(&s->common, 1); - - qapi_free_BlockdevCreateOptions(s->opts); -diff --git a/block/mirror.c b/block/mirror.c -index b3363e9..6637f2b 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -710,7 +710,7 @@ static void mirror_exit(Job *job, void *opaque) - blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort); - - bs_opaque->job = NULL; -- job_completed(job, data->ret, NULL); -+ job_completed(job, data->ret); - - g_free(data); - bdrv_drained_end(src); -diff --git a/block/stream.c b/block/stream.c -index b4b987d..26a7753 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -93,7 +93,7 @@ out: - } - - g_free(s->backing_file_str); -- job_completed(job, data->ret, NULL); -+ job_completed(job, data->ret); - g_free(data); - } - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index 9cf463d..e0e9987 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -124,12 +124,16 @@ typedef struct Job { - /** Estimated progress_current value at the completion of the job */ - int64_t progress_total; - -- /** Error string for a failed job (NULL if, and only if, job->ret == 0) */ -- char *error; -- - /** ret code passed to job_completed. */ - int ret; - -+ /** -+ * Error object for a failed job. -+ * If job->ret is nonzero and an error object was not set, it will be set -+ * to strerror(-job->ret) during job_completed. -+ */ -+ Error *err; -+ - /** The completion function that will be called when the job completes. */ - BlockCompletionFunc *cb; - -@@ -484,15 +488,13 @@ void job_transition_to_ready(Job *job); - /** - * @job: The job being completed. - * @ret: The status code. -- * @error: The error message for a failing job (only with @ret < 0). If @ret is -- * negative, but NULL is given for @error, strerror() is used. - * - * Marks @job as completed. If @ret is non-zero, the job transaction it is part - * of is aborted. If @ret is zero, the job moves into the WAITING state. If it - * is the last job to complete in its transaction, all jobs in the transaction - * move from WAITING to PENDING. - */ --void job_completed(Job *job, int ret, Error *error); -+void job_completed(Job *job, int ret); - - /** Asynchronously complete the specified @job. */ - void job_complete(Job *job, Error **errp); -diff --git a/job-qmp.c b/job-qmp.c -index 410775d..a969b2b 100644 ---- a/job-qmp.c -+++ b/job-qmp.c -@@ -146,8 +146,9 @@ static JobInfo *job_query_single(Job *job, Error **errp) - .status = job->status, - .current_progress = job->progress_current, - .total_progress = job->progress_total, -- .has_error = !!job->error, -- .error = g_strdup(job->error), -+ .has_error = !!job->err, -+ .error = job->err ? \ -+ g_strdup(error_get_pretty(job->err)) : NULL, - }; - - return info; -diff --git a/job.c b/job.c -index 898260b..276024a 100644 ---- a/job.c -+++ b/job.c -@@ -369,7 +369,7 @@ void job_unref(Job *job) - - QLIST_REMOVE(job, job_list); - -- g_free(job->error); -+ error_free(job->err); - g_free(job->id); - g_free(job); - } -@@ -546,7 +546,7 @@ static void coroutine_fn job_co_entry(void *opaque) - - assert(job && job->driver && job->driver->run); - job_pause_point(job); -- job->ret = job->driver->run(job, NULL); -+ job->ret = job->driver->run(job, &job->err); - } - - -@@ -666,8 +666,8 @@ static void job_update_rc(Job *job) - job->ret = -ECANCELED; - } - if (job->ret) { -- if (!job->error) { -- job->error = g_strdup(strerror(-job->ret)); -+ if (!job->err) { -+ error_setg(&job->err, "%s", strerror(-job->ret)); - } - job_state_transition(job, JOB_STATUS_ABORTING); - } -@@ -865,17 +865,11 @@ static void job_completed_txn_success(Job *job) - } - } - --void job_completed(Job *job, int ret, Error *error) -+void job_completed(Job *job, int ret) - { - assert(job && job->txn && !job_is_completed(job)); - - job->ret = ret; -- if (error) { -- assert(job->ret < 0); -- job->error = g_strdup(error_get_pretty(error)); -- error_free(error); -- } -- - job_update_rc(job); - trace_job_completed(job, ret, job->ret); - if (job->ret) { -@@ -893,7 +887,7 @@ void job_cancel(Job *job, bool force) - } - job_cancel_async(job, force); - if (!job_started(job)) { -- job_completed(job, -ECANCELED, NULL); -+ job_completed(job, -ECANCELED); - } else if (job->deferred_to_main_loop) { - job_completed_txn_abort(job); - } else { -diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c -index a753386..00604df 100644 ---- a/tests/test-bdrv-drain.c -+++ b/tests/test-bdrv-drain.c -@@ -754,7 +754,7 @@ typedef struct TestBlockJob { - - static void test_job_completed(Job *job, void *opaque) - { -- job_completed(job, 0, NULL); -+ job_completed(job, 0); - } - - static int coroutine_fn test_job_run(Job *job, Error **errp) -diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c -index 3194924..82cedee 100644 ---- a/tests/test-blockjob-txn.c -+++ b/tests/test-blockjob-txn.c -@@ -34,7 +34,7 @@ static void test_block_job_complete(Job *job, void *opaque) - rc = -ECANCELED; - } - -- job_completed(job, rc, NULL); -+ job_completed(job, rc); - bdrv_unref(bs); - } - -diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c -index b0462bf..408a226 100644 ---- a/tests/test-blockjob.c -+++ b/tests/test-blockjob.c -@@ -167,7 +167,7 @@ static void cancel_job_completed(Job *job, void *opaque) - { - CancelJob *s = opaque; - s->completed = true; -- job_completed(job, 0, NULL); -+ job_completed(job, 0); - } - - static void cancel_job_complete(Job *job, Error **errp) --- -1.8.3.1 - diff --git a/0031-jobs-add-exit-shim.patch b/0031-jobs-add-exit-shim.patch deleted file mode 100644 index 2a0ccb0..0000000 --- a/0031-jobs-add-exit-shim.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 17511eb281e005da6e617acd12c81a0a1fa1771d Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:09 +0100 -Subject: jobs: add exit shim - -RH-Author: John Snow -Message-id: <20180925223431.24791-4-jsnow@redhat.com> -Patchwork-id: 82273 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 03/25] jobs: add exit shim -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -All jobs do the same thing when they leave their running loop: -- Store the return code in a structure -- wait to receive this structure in the main thread -- signal job completion via job_completed - -Few jobs do anything beyond exactly this. Consolidate this exit -logic for a net reduction in SLOC. - -More seriously, when we utilize job_defer_to_main_loop_bh to call -a function that calls job_completed, job_finalize_single will run -in a context where it has recursively taken the aio_context lock, -which can cause hangs if it puts down a reference that causes a flush. - -You can observe this in practice by looking at mirror_exit's careful -placement of job_completed and bdrv_unref calls. - -If we centralize job exiting, we can signal job completion from outside -of the aio_context, which should allow for job cleanup code to run with -only one lock, which makes cleanup callbacks less tricky to write. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180830015734.19765-4-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit 00359a71d45a414ee47d8e423104dc0afd24ec65) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - include/qemu/job.h | 11 +++++++++++ - job.c | 18 ++++++++++++++++++ - 2 files changed, 29 insertions(+) - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index e0e9987..1144d67 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -209,6 +209,17 @@ struct JobDriver { - void (*drain)(Job *job); - - /** -+ * If the callback is not NULL, exit will be invoked from the main thread -+ * when the job's coroutine has finished, but before transactional -+ * convergence; before @prepare or @abort. -+ * -+ * FIXME TODO: This callback is only temporary to transition remaining jobs -+ * to prepare/commit/abort/clean callbacks and will be removed before 3.1. -+ * is released. -+ */ -+ void (*exit)(Job *job); -+ -+ /** - * If the callback is not NULL, prepare will be invoked when all the jobs - * belonging to the same transaction complete; or upon this job's completion - * if it is not in a transaction. -diff --git a/job.c b/job.c -index 276024a..abe91af 100644 ---- a/job.c -+++ b/job.c -@@ -535,6 +535,18 @@ void job_drain(Job *job) - } - } - -+static void job_exit(void *opaque) -+{ -+ Job *job = (Job *)opaque; -+ AioContext *aio_context = job->aio_context; -+ -+ if (job->driver->exit) { -+ aio_context_acquire(aio_context); -+ job->driver->exit(job); -+ aio_context_release(aio_context); -+ } -+ job_completed(job, job->ret); -+} - - /** - * All jobs must allow a pause point before entering their job proper. This -@@ -547,6 +559,12 @@ static void coroutine_fn job_co_entry(void *opaque) - assert(job && job->driver && job->driver->run); - job_pause_point(job); - job->ret = job->driver->run(job, &job->err); -+ if (!job->deferred_to_main_loop) { -+ job->deferred_to_main_loop = true; -+ aio_bh_schedule_oneshot(qemu_get_aio_context(), -+ job_exit, -+ job); -+ } - } - - --- -1.8.3.1 - diff --git a/0032-block-commit-utilize-job_exit-shim.patch b/0032-block-commit-utilize-job_exit-shim.patch deleted file mode 100644 index 3994481..0000000 --- a/0032-block-commit-utilize-job_exit-shim.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 912e8eaa87f8dab40466cf0d45c3290d02e6a9d5 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:10 +0100 -Subject: block/commit: utilize job_exit shim - -RH-Author: John Snow -Message-id: <20180925223431.24791-5-jsnow@redhat.com> -Patchwork-id: 82265 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 04/25] block/commit: utilize job_exit shim -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Change the manual deferment to commit_complete into the implicit -callback to job_exit, renaming commit_complete to commit_exit. - -This conversion does change the timing of when job_completed is -called to after the bdrv_replace_node and bdrv_unref calls, which -could have implications for bjob->blk which will now be put down -after this cleanup. - -Kevin highlights that we did not take any permissions for that backend -at job creation time, so it is safe to reorder these operations. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180830015734.19765-5-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit f369b48dc4095861223f9bc4329935599e03b1c5) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/commit.c | 22 +++++----------------- - 1 file changed, 5 insertions(+), 17 deletions(-) - -diff --git a/block/commit.c b/block/commit.c -index af7579d..25b3cb8 100644 ---- a/block/commit.c -+++ b/block/commit.c -@@ -68,19 +68,13 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, - return 0; - } - --typedef struct { -- int ret; --} CommitCompleteData; -- --static void commit_complete(Job *job, void *opaque) -+static void commit_exit(Job *job) - { - CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); - BlockJob *bjob = &s->common; -- CommitCompleteData *data = opaque; - BlockDriverState *top = blk_bs(s->top); - BlockDriverState *base = blk_bs(s->base); - BlockDriverState *commit_top_bs = s->commit_top_bs; -- int ret = data->ret; - bool remove_commit_top_bs = false; - - /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ -@@ -91,10 +85,10 @@ static void commit_complete(Job *job, void *opaque) - * the normal backing chain can be restored. */ - blk_unref(s->base); - -- if (!job_is_cancelled(job) && ret == 0) { -+ if (!job_is_cancelled(job) && job->ret == 0) { - /* success */ -- ret = bdrv_drop_intermediate(s->commit_top_bs, base, -- s->backing_file_str); -+ job->ret = bdrv_drop_intermediate(s->commit_top_bs, base, -+ s->backing_file_str); - } else { - /* XXX Can (or should) we somehow keep 'consistent read' blocked even - * after the failed/cancelled commit job is gone? If we already wrote -@@ -117,9 +111,6 @@ static void commit_complete(Job *job, void *opaque) - * bdrv_set_backing_hd() to fail. */ - block_job_remove_all_bdrv(bjob); - -- job_completed(job, ret); -- g_free(data); -- - /* If bdrv_drop_intermediate() didn't already do that, remove the commit - * filter driver from the backing chain. Do this as the final step so that - * the 'consistent read' permission can be granted. */ -@@ -137,7 +128,6 @@ static void commit_complete(Job *job, void *opaque) - static int coroutine_fn commit_run(Job *job, Error **errp) - { - CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); -- CommitCompleteData *data; - int64_t offset; - uint64_t delay_ns = 0; - int ret = 0; -@@ -210,9 +200,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) - out: - qemu_vfree(buf); - -- data = g_malloc(sizeof(*data)); -- data->ret = ret; -- job_defer_to_main_loop(&s->common.job, commit_complete, data); - return ret; - } - -@@ -224,6 +211,7 @@ static const BlockJobDriver commit_job_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = commit_run, -+ .exit = commit_exit, - }, - }; - --- -1.8.3.1 - diff --git a/0033-block-mirror-utilize-job_exit-shim.patch b/0033-block-mirror-utilize-job_exit-shim.patch deleted file mode 100644 index 65eb25a..0000000 --- a/0033-block-mirror-utilize-job_exit-shim.patch +++ /dev/null @@ -1,152 +0,0 @@ -From 2322917770da98e175e7ae8bf0bb1a624ec3cebc Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 29 Aug 2018 21:57:30 -0400 -Subject: block/mirror: utilize job_exit shim - -RH-Author: John Snow -Message-id: <20180925223431.24791-6-jsnow@redhat.com> -Patchwork-id: 82269 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 05/25] block/mirror: utilize job_exit -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Change the manual deferment to mirror_exit into the implicit -callback to job_exit and the mirror_exit callback. - -This does change the order of some bdrv_unref calls and job_completed, -but thanks to the new context in which we call .exit, this is safe to -defer the possible flushing of any nodes to the job_finalize_single -cleanup stage. - -Signed-off-by: John Snow -Message-id: 20180830015734.19765-6-jsnow@redhat.com -Reviewed-by: Max Reitz -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit 7b508f6b7a38a8d9729772fa6e525da883fb120b) -Signed-off-by: John Snow ---- - block/mirror.c | 29 +++++++++++------------------ - 1 file changed, 11 insertions(+), 18 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 6637f2b..4a9558d 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -607,26 +607,22 @@ static void mirror_wait_for_all_io(MirrorBlockJob *s) - } - } - --typedef struct { -- int ret; --} MirrorExitData; -- --static void mirror_exit(Job *job, void *opaque) -+static void mirror_exit(Job *job) - { - MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); - BlockJob *bjob = &s->common; -- MirrorExitData *data = opaque; - MirrorBDSOpaque *bs_opaque = s->mirror_top_bs->opaque; - AioContext *replace_aio_context = NULL; - BlockDriverState *src = s->mirror_top_bs->backing->bs; - BlockDriverState *target_bs = blk_bs(s->target); - BlockDriverState *mirror_top_bs = s->mirror_top_bs; - Error *local_err = NULL; -+ int ret = job->ret; - - bdrv_release_dirty_bitmap(src, s->dirty_bitmap); - -- /* Make sure that the source BDS doesn't go away before we called -- * job_completed(). */ -+ /* Make sure that the source BDS doesn't go away during bdrv_replace_node, -+ * before we can call bdrv_drained_end */ - bdrv_ref(src); - bdrv_ref(mirror_top_bs); - bdrv_ref(target_bs); -@@ -652,7 +648,7 @@ static void mirror_exit(Job *job, void *opaque) - bdrv_set_backing_hd(target_bs, backing, &local_err); - if (local_err) { - error_report_err(local_err); -- data->ret = -EPERM; -+ ret = -EPERM; - } - } - } -@@ -662,7 +658,7 @@ static void mirror_exit(Job *job, void *opaque) - aio_context_acquire(replace_aio_context); - } - -- if (s->should_complete && data->ret == 0) { -+ if (s->should_complete && ret == 0) { - BlockDriverState *to_replace = src; - if (s->to_replace) { - to_replace = s->to_replace; -@@ -679,7 +675,7 @@ static void mirror_exit(Job *job, void *opaque) - bdrv_drained_end(target_bs); - if (local_err) { - error_report_err(local_err); -- data->ret = -EPERM; -+ ret = -EPERM; - } - } - if (s->to_replace) { -@@ -710,12 +706,12 @@ static void mirror_exit(Job *job, void *opaque) - blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort); - - bs_opaque->job = NULL; -- job_completed(job, data->ret); - -- g_free(data); - bdrv_drained_end(src); - bdrv_unref(mirror_top_bs); - bdrv_unref(src); -+ -+ job->ret = ret; - } - - static void mirror_throttle(MirrorBlockJob *s) -@@ -815,7 +811,6 @@ static int mirror_flush(MirrorBlockJob *s) - static int coroutine_fn mirror_run(Job *job, Error **errp) - { - MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); -- MirrorExitData *data; - BlockDriverState *bs = s->mirror_top_bs->backing->bs; - BlockDriverState *target_bs = blk_bs(s->target); - bool need_drain = true; -@@ -1035,14 +1030,10 @@ immediate_exit: - g_free(s->in_flight_bitmap); - bdrv_dirty_iter_free(s->dbi); - -- data = g_malloc(sizeof(*data)); -- data->ret = ret; -- - if (need_drain) { - bdrv_drained_begin(bs); - } - -- job_defer_to_main_loop(&s->common.job, mirror_exit, data); - return ret; - } - -@@ -1141,6 +1132,7 @@ static const BlockJobDriver mirror_job_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = mirror_run, -+ .exit = mirror_exit, - .pause = mirror_pause, - .complete = mirror_complete, - }, -@@ -1157,6 +1149,7 @@ static const BlockJobDriver commit_active_job_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = mirror_run, -+ .exit = mirror_exit, - .pause = mirror_pause, - .complete = mirror_complete, - }, --- -1.8.3.1 - diff --git a/0034-jobs-utilize-job_exit-shim.patch b/0034-jobs-utilize-job_exit-shim.patch deleted file mode 100644 index 8b765d8..0000000 --- a/0034-jobs-utilize-job_exit-shim.patch +++ /dev/null @@ -1,307 +0,0 @@ -From 83d2840eeadd8a55b796eae5454783d42913963c Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:12 +0100 -Subject: jobs: utilize job_exit shim - -RH-Author: John Snow -Message-id: <20180925223431.24791-7-jsnow@redhat.com> -Patchwork-id: 82267 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 06/25] jobs: utilize job_exit shim -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Utilize the job_exit shim by not calling job_defer_to_main_loop, and -where applicable, converting the deferred callback into the job_exit -callback. - -This converts backup, stream, create, and the unit tests all at once. -Most of these jobs do not see any changes to the order in which they -clean up their resources, except the test-blockjob-txn test, which -now puts down its bs before job_completed is called. - -This is safe for the same reason the reordering in the mirror job is -safe, because job_completed no longer runs under two locks, making -the unref safe even if it causes a flush. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180830015734.19765-7-jsnow@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit eb23654dbe43b549ea2a9ebff9d8edf544d34a73) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/backup.c | 16 ---------------- - block/create.c | 14 +++----------- - block/stream.c | 22 +++++++--------------- - tests/test-bdrv-drain.c | 6 ------ - tests/test-blockjob-txn.c | 11 ++--------- - tests/test-blockjob.c | 10 ++++------ - 6 files changed, 16 insertions(+), 63 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 1e965d5..a67b7fa 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -380,18 +380,6 @@ static BlockErrorAction backup_error_action(BackupBlockJob *job, - } - } - --typedef struct { -- int ret; --} BackupCompleteData; -- --static void backup_complete(Job *job, void *opaque) --{ -- BackupCompleteData *data = opaque; -- -- job_completed(job, data->ret); -- g_free(data); --} -- - static bool coroutine_fn yield_and_check(BackupBlockJob *job) - { - uint64_t delay_ns; -@@ -483,7 +471,6 @@ static void backup_incremental_init_copy_bitmap(BackupBlockJob *job) - static int coroutine_fn backup_run(Job *opaque_job, Error **errp) - { - BackupBlockJob *job = container_of(opaque_job, BackupBlockJob, common.job); -- BackupCompleteData *data; - BlockDriverState *bs = blk_bs(job->common.blk); - int64_t offset, nb_clusters; - int ret = 0; -@@ -584,9 +571,6 @@ static int coroutine_fn backup_run(Job *opaque_job, Error **errp) - qemu_co_rwlock_unlock(&job->flush_rwlock); - hbitmap_free(job->copy_bitmap); - -- data = g_malloc(sizeof(*data)); -- data->ret = ret; -- job_defer_to_main_loop(&job->common.job, backup_complete, data); - return ret; - } - -diff --git a/block/create.c b/block/create.c -index 26a385c..9534121 100644 ---- a/block/create.c -+++ b/block/create.c -@@ -34,28 +34,20 @@ typedef struct BlockdevCreateJob { - Job common; - BlockDriver *drv; - BlockdevCreateOptions *opts; -- int ret; - } BlockdevCreateJob; - --static void blockdev_create_complete(Job *job, void *opaque) --{ -- BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); -- -- job_completed(job, s->ret); --} -- - static int coroutine_fn blockdev_create_run(Job *job, Error **errp) - { - BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common); -+ int ret; - - job_progress_set_remaining(&s->common, 1); -- s->ret = s->drv->bdrv_co_create(s->opts, errp); -+ ret = s->drv->bdrv_co_create(s->opts, errp); - job_progress_update(&s->common, 1); - - qapi_free_BlockdevCreateOptions(s->opts); -- job_defer_to_main_loop(&s->common, blockdev_create_complete, NULL); - -- return s->ret; -+ return ret; - } - - static const JobDriver blockdev_create_job_driver = { -diff --git a/block/stream.c b/block/stream.c -index 26a7753..67e1e72 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -54,20 +54,16 @@ static int coroutine_fn stream_populate(BlockBackend *blk, - return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ); - } - --typedef struct { -- int ret; --} StreamCompleteData; -- --static void stream_complete(Job *job, void *opaque) -+static void stream_exit(Job *job) - { - StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); - BlockJob *bjob = &s->common; -- StreamCompleteData *data = opaque; - BlockDriverState *bs = blk_bs(bjob->blk); - BlockDriverState *base = s->base; - Error *local_err = NULL; -+ int ret = job->ret; - -- if (!job_is_cancelled(job) && bs->backing && data->ret == 0) { -+ if (!job_is_cancelled(job) && bs->backing && ret == 0) { - const char *base_id = NULL, *base_fmt = NULL; - if (base) { - base_id = s->backing_file_str; -@@ -75,11 +71,11 @@ static void stream_complete(Job *job, void *opaque) - base_fmt = base->drv->format_name; - } - } -- data->ret = bdrv_change_backing_file(bs, base_id, base_fmt); -+ ret = bdrv_change_backing_file(bs, base_id, base_fmt); - bdrv_set_backing_hd(bs, base, &local_err); - if (local_err) { - error_report_err(local_err); -- data->ret = -EPERM; -+ ret = -EPERM; - goto out; - } - } -@@ -93,14 +89,12 @@ out: - } - - g_free(s->backing_file_str); -- job_completed(job, data->ret); -- g_free(data); -+ job->ret = ret; - } - - static int coroutine_fn stream_run(Job *job, Error **errp) - { - StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); -- StreamCompleteData *data; - BlockBackend *blk = s->common.blk; - BlockDriverState *bs = blk_bs(blk); - BlockDriverState *base = s->base; -@@ -203,9 +197,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp) - - out: - /* Modify backing chain and close BDSes in main loop */ -- data = g_malloc(sizeof(*data)); -- data->ret = ret; -- job_defer_to_main_loop(&s->common.job, stream_complete, data); - return ret; - } - -@@ -215,6 +206,7 @@ static const BlockJobDriver stream_job_driver = { - .job_type = JOB_TYPE_STREAM, - .free = block_job_free, - .run = stream_run, -+ .exit = stream_exit, - .user_resume = block_job_user_resume, - .drain = block_job_drain, - }, -diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c -index 00604df..9bcb3c7 100644 ---- a/tests/test-bdrv-drain.c -+++ b/tests/test-bdrv-drain.c -@@ -752,11 +752,6 @@ typedef struct TestBlockJob { - bool should_complete; - } TestBlockJob; - --static void test_job_completed(Job *job, void *opaque) --{ -- job_completed(job, 0); --} -- - static int coroutine_fn test_job_run(Job *job, Error **errp) - { - TestBlockJob *s = container_of(job, TestBlockJob, common.job); -@@ -770,7 +765,6 @@ static int coroutine_fn test_job_run(Job *job, Error **errp) - job_pause_point(&s->common.job); - } - -- job_defer_to_main_loop(&s->common.job, test_job_completed, NULL); - return 0; - } - -diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c -index 82cedee..ef29f35 100644 ---- a/tests/test-blockjob-txn.c -+++ b/tests/test-blockjob-txn.c -@@ -24,17 +24,11 @@ typedef struct { - int *result; - } TestBlockJob; - --static void test_block_job_complete(Job *job, void *opaque) -+static void test_block_job_exit(Job *job) - { - BlockJob *bjob = container_of(job, BlockJob, job); - BlockDriverState *bs = blk_bs(bjob->blk); -- int rc = (intptr_t)opaque; - -- if (job_is_cancelled(job)) { -- rc = -ECANCELED; -- } -- -- job_completed(job, rc); - bdrv_unref(bs); - } - -@@ -54,8 +48,6 @@ static int coroutine_fn test_block_job_run(Job *job, Error **errp) - } - } - -- job_defer_to_main_loop(job, test_block_job_complete, -- (void *)(intptr_t)s->rc); - return s->rc; - } - -@@ -81,6 +73,7 @@ static const BlockJobDriver test_block_job_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = test_block_job_run, -+ .exit = test_block_job_exit, - }, - }; - -diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c -index 408a226..ad4a65b 100644 ---- a/tests/test-blockjob.c -+++ b/tests/test-blockjob.c -@@ -163,11 +163,10 @@ typedef struct CancelJob { - bool completed; - } CancelJob; - --static void cancel_job_completed(Job *job, void *opaque) -+static void cancel_job_exit(Job *job) - { -- CancelJob *s = opaque; -+ CancelJob *s = container_of(job, CancelJob, common.job); - s->completed = true; -- job_completed(job, 0); - } - - static void cancel_job_complete(Job *job, Error **errp) -@@ -182,7 +181,7 @@ static int coroutine_fn cancel_job_run(Job *job, Error **errp) - - while (!s->should_complete) { - if (job_is_cancelled(&s->common.job)) { -- goto defer; -+ return 0; - } - - if (!job_is_ready(&s->common.job) && s->should_converge) { -@@ -192,8 +191,6 @@ static int coroutine_fn cancel_job_run(Job *job, Error **errp) - job_sleep_ns(&s->common.job, 100000); - } - -- defer: -- job_defer_to_main_loop(&s->common.job, cancel_job_completed, s); - return 0; - } - -@@ -204,6 +201,7 @@ static const BlockJobDriver test_cancel_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = cancel_job_run, -+ .exit = cancel_job_exit, - .complete = cancel_job_complete, - }, - }; --- -1.8.3.1 - diff --git a/0035-block-backup-make-function-variables-consistently-na.patch b/0035-block-backup-make-function-variables-consistently-na.patch deleted file mode 100644 index ab0af70..0000000 --- a/0035-block-backup-make-function-variables-consistently-na.patch +++ /dev/null @@ -1,165 +0,0 @@ -From b5532575bb8aa748dc066834d7ac150bbb6575a7 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:13 +0100 -Subject: block/backup: make function variables consistently named - -RH-Author: John Snow -Message-id: <20180925223431.24791-8-jsnow@redhat.com> -Patchwork-id: 82272 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 07/25] block/backup: make function variables consistently named -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Rename opaque_job to job to be consistent with other job implementations. -Rename 'job', the BackupBlockJob object, to 's' to also be consistent. - -Suggested-by: Eric Blake -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180830015734.19765-8-jsnow@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 6870277535493fea31761d8d11ec23add2de0fb0) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/backup.c | 62 +++++++++++++++++++++++++++++----------------------------- - 1 file changed, 31 insertions(+), 31 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index a67b7fa..4d084f6 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -468,59 +468,59 @@ static void backup_incremental_init_copy_bitmap(BackupBlockJob *job) - bdrv_dirty_iter_free(dbi); - } - --static int coroutine_fn backup_run(Job *opaque_job, Error **errp) -+static int coroutine_fn backup_run(Job *job, Error **errp) - { -- BackupBlockJob *job = container_of(opaque_job, BackupBlockJob, common.job); -- BlockDriverState *bs = blk_bs(job->common.blk); -+ BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); -+ BlockDriverState *bs = blk_bs(s->common.blk); - int64_t offset, nb_clusters; - int ret = 0; - -- QLIST_INIT(&job->inflight_reqs); -- qemu_co_rwlock_init(&job->flush_rwlock); -+ QLIST_INIT(&s->inflight_reqs); -+ qemu_co_rwlock_init(&s->flush_rwlock); - -- nb_clusters = DIV_ROUND_UP(job->len, job->cluster_size); -- job_progress_set_remaining(&job->common.job, job->len); -+ nb_clusters = DIV_ROUND_UP(s->len, s->cluster_size); -+ job_progress_set_remaining(job, s->len); - -- job->copy_bitmap = hbitmap_alloc(nb_clusters, 0); -- if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { -- backup_incremental_init_copy_bitmap(job); -+ s->copy_bitmap = hbitmap_alloc(nb_clusters, 0); -+ if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { -+ backup_incremental_init_copy_bitmap(s); - } else { -- hbitmap_set(job->copy_bitmap, 0, nb_clusters); -+ hbitmap_set(s->copy_bitmap, 0, nb_clusters); - } - - -- job->before_write.notify = backup_before_write_notify; -- bdrv_add_before_write_notifier(bs, &job->before_write); -+ s->before_write.notify = backup_before_write_notify; -+ bdrv_add_before_write_notifier(bs, &s->before_write); - -- if (job->sync_mode == MIRROR_SYNC_MODE_NONE) { -+ if (s->sync_mode == MIRROR_SYNC_MODE_NONE) { - /* All bits are set in copy_bitmap to allow any cluster to be copied. - * This does not actually require them to be copied. */ -- while (!job_is_cancelled(&job->common.job)) { -+ while (!job_is_cancelled(job)) { - /* Yield until the job is cancelled. We just let our before_write - * notify callback service CoW requests. */ -- job_yield(&job->common.job); -+ job_yield(job); - } -- } else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { -- ret = backup_run_incremental(job); -+ } else if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { -+ ret = backup_run_incremental(s); - } else { - /* Both FULL and TOP SYNC_MODE's require copying.. */ -- for (offset = 0; offset < job->len; -- offset += job->cluster_size) { -+ for (offset = 0; offset < s->len; -+ offset += s->cluster_size) { - bool error_is_read; - int alloced = 0; - -- if (yield_and_check(job)) { -+ if (yield_and_check(s)) { - break; - } - -- if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { -+ if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { - int i; - int64_t n; - - /* Check to see if these blocks are already in the - * backing file. */ - -- for (i = 0; i < job->cluster_size;) { -+ for (i = 0; i < s->cluster_size;) { - /* bdrv_is_allocated() only returns true/false based - * on the first set of sectors it comes across that - * are are all in the same state. -@@ -529,7 +529,7 @@ static int coroutine_fn backup_run(Job *opaque_job, Error **errp) - * needed but at some point that is always the case. */ - alloced = - bdrv_is_allocated(bs, offset + i, -- job->cluster_size - i, &n); -+ s->cluster_size - i, &n); - i += n; - - if (alloced || n == 0) { -@@ -547,29 +547,29 @@ static int coroutine_fn backup_run(Job *opaque_job, Error **errp) - if (alloced < 0) { - ret = alloced; - } else { -- ret = backup_do_cow(job, offset, job->cluster_size, -+ ret = backup_do_cow(s, offset, s->cluster_size, - &error_is_read, false); - } - if (ret < 0) { - /* Depending on error action, fail now or retry cluster */ - BlockErrorAction action = -- backup_error_action(job, error_is_read, -ret); -+ backup_error_action(s, error_is_read, -ret); - if (action == BLOCK_ERROR_ACTION_REPORT) { - break; - } else { -- offset -= job->cluster_size; -+ offset -= s->cluster_size; - continue; - } - } - } - } - -- notifier_with_return_remove(&job->before_write); -+ notifier_with_return_remove(&s->before_write); - - /* wait until pending backup_do_cow() calls have completed */ -- qemu_co_rwlock_wrlock(&job->flush_rwlock); -- qemu_co_rwlock_unlock(&job->flush_rwlock); -- hbitmap_free(job->copy_bitmap); -+ qemu_co_rwlock_wrlock(&s->flush_rwlock); -+ qemu_co_rwlock_unlock(&s->flush_rwlock); -+ hbitmap_free(s->copy_bitmap); - - return ret; - } --- -1.8.3.1 - diff --git a/0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch b/0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch deleted file mode 100644 index 6d2791c..0000000 --- a/0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 7fe6d53387852907871d82997fbccc2cf774bdb4 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:14 +0100 -Subject: jobs: remove ret argument to job_completed; privatize it - -RH-Author: John Snow -Message-id: <20180925223431.24791-9-jsnow@redhat.com> -Patchwork-id: 82271 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 08/25] jobs: remove ret argument to job_completed; privatize it -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Jobs are now expected to return their retcode on the stack, from the -.run callback, so we can remove that argument. - -job_cancel does not need to set -ECANCELED because job_completed will -update the return code itself if the job was canceled. - -While we're here, make job_completed static to job.c and remove it from -job.h; move the documentation of return code to the .run() callback and -to the job->ret property, accordingly. - -Signed-off-by: John Snow -Message-id: 20180830015734.19765-9-jsnow@redhat.com -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 404ff28d6ae59fc1c24d631710d4063fc68aed03) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - include/qemu/job.h | 28 +++++++++++++++------------- - job.c | 11 ++++++----- - trace-events | 2 +- - 3 files changed, 22 insertions(+), 19 deletions(-) - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index 1144d67..23395c1 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -124,7 +124,11 @@ typedef struct Job { - /** Estimated progress_current value at the completion of the job */ - int64_t progress_total; - -- /** ret code passed to job_completed. */ -+ /** -+ * Return code from @run and/or @prepare callback(s). -+ * Not final until the job has reached the CONCLUDED status. -+ * 0 on success, -errno on failure. -+ */ - int ret; - - /** -@@ -172,7 +176,16 @@ struct JobDriver { - /** Enum describing the operation */ - JobType job_type; - -- /** Mandatory: Entrypoint for the Coroutine. */ -+ /** -+ * Mandatory: Entrypoint for the Coroutine. -+ * -+ * This callback will be invoked when moving from CREATED to RUNNING. -+ * -+ * If this callback returns nonzero, the job transaction it is part of is -+ * aborted. If it returns zero, the job moves into the WAITING state. If it -+ * is the last job to complete in its transaction, all jobs in the -+ * transaction move from WAITING to PENDING. -+ */ - int coroutine_fn (*run)(Job *job, Error **errp); - - /** -@@ -496,17 +509,6 @@ void job_early_fail(Job *job); - /** Moves the @job from RUNNING to READY */ - void job_transition_to_ready(Job *job); - --/** -- * @job: The job being completed. -- * @ret: The status code. -- * -- * Marks @job as completed. If @ret is non-zero, the job transaction it is part -- * of is aborted. If @ret is zero, the job moves into the WAITING state. If it -- * is the last job to complete in its transaction, all jobs in the transaction -- * move from WAITING to PENDING. -- */ --void job_completed(Job *job, int ret); -- - /** Asynchronously complete the specified @job. */ - void job_complete(Job *job, Error **errp); - -diff --git a/job.c b/job.c -index abe91af..61e091a 100644 ---- a/job.c -+++ b/job.c -@@ -535,6 +535,8 @@ void job_drain(Job *job) - } - } - -+static void job_completed(Job *job); -+ - static void job_exit(void *opaque) - { - Job *job = (Job *)opaque; -@@ -545,7 +547,7 @@ static void job_exit(void *opaque) - job->driver->exit(job); - aio_context_release(aio_context); - } -- job_completed(job, job->ret); -+ job_completed(job); - } - - /** -@@ -883,13 +885,12 @@ static void job_completed_txn_success(Job *job) - } - } - --void job_completed(Job *job, int ret) -+static void job_completed(Job *job) - { - assert(job && job->txn && !job_is_completed(job)); - -- job->ret = ret; - job_update_rc(job); -- trace_job_completed(job, ret, job->ret); -+ trace_job_completed(job, job->ret); - if (job->ret) { - job_completed_txn_abort(job); - } else { -@@ -905,7 +906,7 @@ void job_cancel(Job *job, bool force) - } - job_cancel_async(job, force); - if (!job_started(job)) { -- job_completed(job, -ECANCELED); -+ job_completed(job); - } else if (job->deferred_to_main_loop) { - job_completed_txn_abort(job); - } else { -diff --git a/trace-events b/trace-events -index c445f54..4fd2cb4 100644 ---- a/trace-events -+++ b/trace-events -@@ -107,7 +107,7 @@ gdbstub_err_checksum_incorrect(uint8_t expected, uint8_t got) "got command packe - # job.c - job_state_transition(void *job, int ret, const char *legal, const char *s0, const char *s1) "job %p (ret: %d) attempting %s transition (%s-->%s)" - job_apply_verb(void *job, const char *state, const char *verb, const char *legal) "job %p in state %s; applying verb %s (%s)" --job_completed(void *job, int ret, int jret) "job %p ret %d corrected ret %d" -+job_completed(void *job, int ret) "job %p ret %d" - - # job-qmp.c - qmp_job_cancel(void *job) "job %p" --- -1.8.3.1 - diff --git a/0037-jobs-remove-job_defer_to_main_loop.patch b/0037-jobs-remove-job_defer_to_main_loop.patch deleted file mode 100644 index 2b0fec4..0000000 --- a/0037-jobs-remove-job_defer_to_main_loop.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 1827993a08cc8c86cc40ca9ccb7ef668261b2bc4 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:15 +0100 -Subject: jobs: remove job_defer_to_main_loop - -RH-Author: John Snow -Message-id: <20180925223431.24791-10-jsnow@redhat.com> -Patchwork-id: 82275 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 09/25] jobs: remove job_defer_to_main_loop -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Now that the job infrastructure is handling the job_completed call for -all implemented jobs, we can remove the interface that allowed jobs to -schedule their own completion. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180830015734.19765-10-jsnow@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit e21a1c9831fc80ae3f3c1affdfa43350035d8588) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - include/qemu/job.h | 17 ----------------- - job.c | 40 ++-------------------------------------- - 2 files changed, 2 insertions(+), 55 deletions(-) - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index 23395c1..e0cff70 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -568,23 +568,6 @@ void job_finalize(Job *job, Error **errp); - */ - void job_dismiss(Job **job, Error **errp); - --typedef void JobDeferToMainLoopFn(Job *job, void *opaque); -- --/** -- * @job: The job -- * @fn: The function to run in the main loop -- * @opaque: The opaque value that is passed to @fn -- * -- * This function must be called by the main job coroutine just before it -- * returns. @fn is executed in the main loop with the job AioContext acquired. -- * -- * Block jobs must call bdrv_unref(), bdrv_close(), and anything that uses -- * bdrv_drain_all() in the main loop. -- * -- * The @job AioContext is held while @fn executes. -- */ --void job_defer_to_main_loop(Job *job, JobDeferToMainLoopFn *fn, void *opaque); -- - /** - * Synchronously finishes the given @job. If @finish is given, it is called to - * trigger completion or cancellation of the job. -diff --git a/job.c b/job.c -index 61e091a..e8d7aee 100644 ---- a/job.c -+++ b/job.c -@@ -561,12 +561,8 @@ static void coroutine_fn job_co_entry(void *opaque) - assert(job && job->driver && job->driver->run); - job_pause_point(job); - job->ret = job->driver->run(job, &job->err); -- if (!job->deferred_to_main_loop) { -- job->deferred_to_main_loop = true; -- aio_bh_schedule_oneshot(qemu_get_aio_context(), -- job_exit, -- job); -- } -+ job->deferred_to_main_loop = true; -+ aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); - } - - -@@ -969,38 +965,6 @@ void job_complete(Job *job, Error **errp) - job->driver->complete(job, errp); - } - -- --typedef struct { -- Job *job; -- JobDeferToMainLoopFn *fn; -- void *opaque; --} JobDeferToMainLoopData; -- --static void job_defer_to_main_loop_bh(void *opaque) --{ -- JobDeferToMainLoopData *data = opaque; -- Job *job = data->job; -- AioContext *aio_context = job->aio_context; -- -- aio_context_acquire(aio_context); -- data->fn(data->job, data->opaque); -- aio_context_release(aio_context); -- -- g_free(data); --} -- --void job_defer_to_main_loop(Job *job, JobDeferToMainLoopFn *fn, void *opaque) --{ -- JobDeferToMainLoopData *data = g_malloc(sizeof(*data)); -- data->job = job; -- data->fn = fn; -- data->opaque = opaque; -- job->deferred_to_main_loop = true; -- -- aio_bh_schedule_oneshot(qemu_get_aio_context(), -- job_defer_to_main_loop_bh, data); --} -- - int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) - { - Error *local_err = NULL; --- -1.8.3.1 - diff --git a/0038-block-commit-add-block-job-creation-flags.patch b/0038-block-commit-add-block-job-creation-flags.patch deleted file mode 100644 index b145fc1..0000000 --- a/0038-block-commit-add-block-job-creation-flags.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 6c8da2ba018d7546a15c3917f52ad1cc2b5b133c Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:16 +0100 -Subject: block/commit: add block job creation flags - -RH-Author: John Snow -Message-id: <20180925223431.24791-11-jsnow@redhat.com> -Patchwork-id: 82264 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 10/25] block/commit: add block job creation flags -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Add support for taking and passing forward job creation flags. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Reviewed-by: Jeff Cody -Message-id: 20180906130225.5118-2-jsnow@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 5360782d0827854383097d560715d8d8027ee590) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/commit.c | 5 +++-- - blockdev.c | 7 ++++--- - include/block/block_int.h | 5 ++++- - 3 files changed, 11 insertions(+), 6 deletions(-) - -diff --git a/block/commit.c b/block/commit.c -index 25b3cb8..c737664 100644 ---- a/block/commit.c -+++ b/block/commit.c -@@ -254,7 +254,8 @@ static BlockDriver bdrv_commit_top = { - }; - - void commit_start(const char *job_id, BlockDriverState *bs, -- BlockDriverState *base, BlockDriverState *top, int64_t speed, -+ BlockDriverState *base, BlockDriverState *top, -+ int creation_flags, int64_t speed, - BlockdevOnError on_error, const char *backing_file_str, - const char *filter_node_name, Error **errp) - { -@@ -272,7 +273,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, - } - - s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL, -- speed, JOB_DEFAULT, NULL, NULL, errp); -+ speed, creation_flags, NULL, NULL, errp); - if (!s) { - return; - } -diff --git a/blockdev.c b/blockdev.c -index dcf8c8d..88ad8d9 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3324,6 +3324,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, - * BlockdevOnError change for blkmirror makes it in - */ - BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT; -+ int job_flags = JOB_DEFAULT; - - if (!has_speed) { - speed = 0; -@@ -3405,15 +3406,15 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, - goto out; - } - commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, -- JOB_DEFAULT, speed, on_error, -+ job_flags, speed, on_error, - filter_node_name, NULL, NULL, false, &local_err); - } else { - BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); - if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { - goto out; - } -- commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed, -- on_error, has_backing_file ? backing_file : NULL, -+ commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, job_flags, -+ speed, on_error, has_backing_file ? backing_file : NULL, - filter_node_name, &local_err); - } - if (local_err != NULL) { -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 903b9c1..ffab0b4 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -980,6 +980,8 @@ void stream_start(const char *job_id, BlockDriverState *bs, - * @bs: Active block device. - * @top: Top block device to be committed. - * @base: Block device that will be written into, and become the new top. -+ * @creation_flags: Flags that control the behavior of the Job lifetime. -+ * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @on_error: The action to take upon error. - * @backing_file_str: String to use as the backing file in @top's overlay -@@ -990,7 +992,8 @@ void stream_start(const char *job_id, BlockDriverState *bs, - * - */ - void commit_start(const char *job_id, BlockDriverState *bs, -- BlockDriverState *base, BlockDriverState *top, int64_t speed, -+ BlockDriverState *base, BlockDriverState *top, -+ int creation_flags, int64_t speed, - BlockdevOnError on_error, const char *backing_file_str, - const char *filter_node_name, Error **errp); - /** --- -1.8.3.1 - diff --git a/0039-block-mirror-add-block-job-creation-flags.patch b/0039-block-mirror-add-block-job-creation-flags.patch deleted file mode 100644 index fec813e..0000000 --- a/0039-block-mirror-add-block-job-creation-flags.patch +++ /dev/null @@ -1,100 +0,0 @@ -From d4f6cfe194df3236bf53b1093e0a7f98f0a5da0e Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:17 +0100 -Subject: block/mirror: add block job creation flags - -RH-Author: John Snow -Message-id: <20180925223431.24791-12-jsnow@redhat.com> -Patchwork-id: 82268 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 11/25] block/mirror: add block job creation flags -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Add support for taking and passing forward job creation flags. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Reviewed-by: Jeff Cody -Message-id: 20180906130225.5118-3-jsnow@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit a1999b33488daba68a1bcd7c6fdf314ddeacc6a2) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 5 +++-- - blockdev.c | 3 ++- - include/block/block_int.h | 5 ++++- - 3 files changed, 9 insertions(+), 4 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 4a9558d..cd13835 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -1639,7 +1639,8 @@ fail: - - void mirror_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *target, const char *replaces, -- int64_t speed, uint32_t granularity, int64_t buf_size, -+ int creation_flags, int64_t speed, -+ uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, -@@ -1655,7 +1656,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, - } - is_none_mode = mode == MIRROR_SYNC_MODE_NONE; - base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; -- mirror_start_job(job_id, bs, JOB_DEFAULT, target, replaces, -+ mirror_start_job(job_id, bs, creation_flags, target, replaces, - speed, granularity, buf_size, backing_mode, - on_source_error, on_target_error, unmap, NULL, NULL, - &mirror_job_driver, is_none_mode, base, false, -diff --git a/blockdev.c b/blockdev.c -index 88ad8d9..d31750b 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3700,6 +3700,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - bool has_copy_mode, MirrorCopyMode copy_mode, - Error **errp) - { -+ int job_flags = JOB_DEFAULT; - - if (!has_speed) { - speed = 0; -@@ -3752,7 +3753,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - * and will allow to check whether the node still exist at mirror completion - */ - mirror_start(job_id, bs, target, -- has_replaces ? replaces : NULL, -+ has_replaces ? replaces : NULL, job_flags, - speed, granularity, buf_size, sync, backing_mode, - on_source_error, on_target_error, unmap, filter_node_name, - copy_mode, errp); -diff --git a/include/block/block_int.h b/include/block/block_int.h -index ffab0b4..b40f0bf 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1029,6 +1029,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, - * @target: Block device to write to. - * @replaces: Block graph node name to replace once the mirror is done. Can - * only be used when full mirroring is selected. -+ * @creation_flags: Flags that control the behavior of the Job lifetime. -+ * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @granularity: The chosen granularity for the dirty bitmap. - * @buf_size: The amount of data that can be in flight at one time. -@@ -1050,7 +1052,8 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, - */ - void mirror_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *target, const char *replaces, -- int64_t speed, uint32_t granularity, int64_t buf_size, -+ int creation_flags, int64_t speed, -+ uint32_t granularity, int64_t buf_size, - MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, - BlockdevOnError on_source_error, - BlockdevOnError on_target_error, --- -1.8.3.1 - diff --git a/0040-block-stream-add-block-job-creation-flags.patch b/0040-block-stream-add-block-job-creation-flags.patch deleted file mode 100644 index 224be80..0000000 --- a/0040-block-stream-add-block-job-creation-flags.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 4fd98648eb0df8157c1238a1cee36373278d44a5 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:18 +0100 -Subject: block/stream: add block job creation flags - -RH-Author: John Snow -Message-id: <20180925223431.24791-13-jsnow@redhat.com> -Patchwork-id: 82263 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 12/25] block/stream: add block job creation flags -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Add support for taking and passing forward job creation flags. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Reviewed-by: Jeff Cody -Message-id: 20180906130225.5118-4-jsnow@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit cf6320df581e6cbde6a95075266859a8f9ba9d55) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/stream.c | 5 +++-- - blockdev.c | 3 ++- - include/block/block_int.h | 5 ++++- - 3 files changed, 9 insertions(+), 4 deletions(-) - -diff --git a/block/stream.c b/block/stream.c -index 67e1e72..700eb23 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -214,7 +214,8 @@ static const BlockJobDriver stream_job_driver = { - - void stream_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *base, const char *backing_file_str, -- int64_t speed, BlockdevOnError on_error, Error **errp) -+ int creation_flags, int64_t speed, -+ BlockdevOnError on_error, Error **errp) - { - StreamBlockJob *s; - BlockDriverState *iter; -@@ -236,7 +237,7 @@ void stream_start(const char *job_id, BlockDriverState *bs, - BLK_PERM_GRAPH_MOD, - BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | - BLK_PERM_WRITE, -- speed, JOB_DEFAULT, NULL, NULL, errp); -+ speed, creation_flags, NULL, NULL, errp); - if (!s) { - goto fail; - } -diff --git a/blockdev.c b/blockdev.c -index d31750b..c2e6402 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3233,6 +3233,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, - AioContext *aio_context; - Error *local_err = NULL; - const char *base_name = NULL; -+ int job_flags = JOB_DEFAULT; - - if (!has_on_error) { - on_error = BLOCKDEV_ON_ERROR_REPORT; -@@ -3295,7 +3296,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, - base_name = has_backing_file ? backing_file : base_name; - - stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name, -- has_speed ? speed : 0, on_error, &local_err); -+ job_flags, has_speed ? speed : 0, on_error, &local_err); - if (local_err) { - error_propagate(errp, local_err); - goto out; -diff --git a/include/block/block_int.h b/include/block/block_int.h -index b40f0bf..4000d2a 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -958,6 +958,8 @@ int is_windows_drive(const char *filename); - * flatten the whole backing file chain onto @bs. - * @backing_file_str: The file name that will be written to @bs as the - * the new backing file if the job completes. Ignored if @base is %NULL. -+ * @creation_flags: Flags that control the behavior of the Job lifetime. -+ * See @BlockJobCreateFlags - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @on_error: The action to take upon error. - * @errp: Error object. -@@ -971,7 +973,8 @@ int is_windows_drive(const char *filename); - */ - void stream_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *base, const char *backing_file_str, -- int64_t speed, BlockdevOnError on_error, Error **errp); -+ int creation_flags, int64_t speed, -+ BlockdevOnError on_error, Error **errp); - - /** - * commit_start: --- -1.8.3.1 - diff --git a/0041-block-commit-refactor-commit-to-use-job-callbacks.patch b/0041-block-commit-refactor-commit-to-use-job-callbacks.patch deleted file mode 100644 index 212513e..0000000 --- a/0041-block-commit-refactor-commit-to-use-job-callbacks.patch +++ /dev/null @@ -1,180 +0,0 @@ -From b0b7d48f97dd97efacf93e5529d7597bd2280095 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:19 +0100 -Subject: block/commit: refactor commit to use job callbacks - -RH-Author: John Snow -Message-id: <20180925223431.24791-14-jsnow@redhat.com> -Patchwork-id: 82279 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 13/25] block/commit: refactor commit to use job callbacks -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Use the component callbacks; prepare, abort, and clean. - -NB: prepare is only called when the job has not yet failed; -and abort can be called after prepare. - -complete -> prepare -> abort -> clean -complete -> abort -> clean - -During refactor, a potential problem with bdrv_drop_intermediate -was identified, the patched behavior is no worse than the pre-patch -behavior, so leave a FIXME for now to be fixed in a future patch. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-5-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit 22dffcbec62ba918db690ed44beba4bd4e970bb9) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/commit.c | 92 ++++++++++++++++++++++++++++++++-------------------------- - 1 file changed, 51 insertions(+), 41 deletions(-) - -diff --git a/block/commit.c b/block/commit.c -index c737664..b387765 100644 ---- a/block/commit.c -+++ b/block/commit.c -@@ -36,6 +36,7 @@ typedef struct CommitBlockJob { - BlockDriverState *commit_top_bs; - BlockBackend *top; - BlockBackend *base; -+ BlockDriverState *base_bs; - BlockdevOnError on_error; - int base_flags; - char *backing_file_str; -@@ -68,61 +69,67 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, - return 0; - } - --static void commit_exit(Job *job) -+static int commit_prepare(Job *job) - { - CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); -- BlockJob *bjob = &s->common; -- BlockDriverState *top = blk_bs(s->top); -- BlockDriverState *base = blk_bs(s->base); -- BlockDriverState *commit_top_bs = s->commit_top_bs; -- bool remove_commit_top_bs = false; -- -- /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ -- bdrv_ref(top); -- bdrv_ref(commit_top_bs); - - /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before - * the normal backing chain can be restored. */ - blk_unref(s->base); -+ s->base = NULL; -+ -+ /* FIXME: bdrv_drop_intermediate treats total failures and partial failures -+ * identically. Further work is needed to disambiguate these cases. */ -+ return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs, -+ s->backing_file_str); -+} - -- if (!job_is_cancelled(job) && job->ret == 0) { -- /* success */ -- job->ret = bdrv_drop_intermediate(s->commit_top_bs, base, -- s->backing_file_str); -- } else { -- /* XXX Can (or should) we somehow keep 'consistent read' blocked even -- * after the failed/cancelled commit job is gone? If we already wrote -- * something to base, the intermediate images aren't valid any more. */ -- remove_commit_top_bs = true; -+static void commit_abort(Job *job) -+{ -+ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); -+ BlockDriverState *top_bs = blk_bs(s->top); -+ -+ /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ -+ bdrv_ref(top_bs); -+ bdrv_ref(s->commit_top_bs); -+ -+ if (s->base) { -+ blk_unref(s->base); - } - -+ /* free the blockers on the intermediate nodes so that bdrv_replace_nodes -+ * can succeed */ -+ block_job_remove_all_bdrv(&s->common); -+ -+ /* If bdrv_drop_intermediate() failed (or was not invoked), remove the -+ * commit filter driver from the backing chain now. Do this as the final -+ * step so that the 'consistent read' permission can be granted. -+ * -+ * XXX Can (or should) we somehow keep 'consistent read' blocked even -+ * after the failed/cancelled commit job is gone? If we already wrote -+ * something to base, the intermediate images aren't valid any more. */ -+ bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL, -+ &error_abort); -+ bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs), -+ &error_abort); -+ -+ bdrv_unref(s->commit_top_bs); -+ bdrv_unref(top_bs); -+} -+ -+static void commit_clean(Job *job) -+{ -+ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); -+ - /* restore base open flags here if appropriate (e.g., change the base back - * to r/o). These reopens do not need to be atomic, since we won't abort - * even on failure here */ -- if (s->base_flags != bdrv_get_flags(base)) { -- bdrv_reopen(base, s->base_flags, NULL); -+ if (s->base_flags != bdrv_get_flags(s->base_bs)) { -+ bdrv_reopen(s->base_bs, s->base_flags, NULL); - } -+ - g_free(s->backing_file_str); - blk_unref(s->top); -- -- /* If there is more than one reference to the job (e.g. if called from -- * job_finish_sync()), job_completed() won't free it and therefore the -- * blockers on the intermediate nodes remain. This would cause -- * bdrv_set_backing_hd() to fail. */ -- block_job_remove_all_bdrv(bjob); -- -- /* If bdrv_drop_intermediate() didn't already do that, remove the commit -- * filter driver from the backing chain. Do this as the final step so that -- * the 'consistent read' permission can be granted. */ -- if (remove_commit_top_bs) { -- bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL, -- &error_abort); -- bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs), -- &error_abort); -- } -- -- bdrv_unref(commit_top_bs); -- bdrv_unref(top); - } - - static int coroutine_fn commit_run(Job *job, Error **errp) -@@ -211,7 +218,9 @@ static const BlockJobDriver commit_job_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = commit_run, -- .exit = commit_exit, -+ .prepare = commit_prepare, -+ .abort = commit_abort, -+ .clean = commit_clean - }, - }; - -@@ -350,6 +359,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, - if (ret < 0) { - goto fail; - } -+ s->base_bs = base; - - /* Required permissions are already taken with block_job_add_bdrv() */ - s->top = blk_new(0, BLK_PERM_ALL); --- -1.8.3.1 - diff --git a/0042-block-mirror-don-t-install-backing-chain-on-abort.patch b/0042-block-mirror-don-t-install-backing-chain-on-abort.patch deleted file mode 100644 index 8a2f14f..0000000 --- a/0042-block-mirror-don-t-install-backing-chain-on-abort.patch +++ /dev/null @@ -1,45 +0,0 @@ -From e849bf276e59b282f3288b42abe9d6dff51dc678 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:20 +0100 -Subject: block/mirror: don't install backing chain on abort - -RH-Author: John Snow -Message-id: <20180925223431.24791-15-jsnow@redhat.com> -Patchwork-id: 82277 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 14/25] block/mirror: don't install backing chain on abort -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -In cases where we abort the block/mirror job, there's no point in -installing the new backing chain before we finish aborting. - -Signed-off-by: John Snow -Message-id: 20180906130225.5118-6-jsnow@redhat.com -Reviewed-by: Jeff Cody -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit c2924ceaa7f1866148e2847c969fc1902a2524fa) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/mirror.c b/block/mirror.c -index cd13835..19b57b8 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -642,7 +642,7 @@ static void mirror_exit(Job *job) - * required before it could become a backing file of target_bs. */ - bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, - &error_abort); -- if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { -+ if (ret == 0 && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { - BlockDriverState *backing = s->is_none_mode ? src : s->base; - if (backing_bs(target_bs) != backing) { - bdrv_set_backing_hd(target_bs, backing, &local_err); --- -1.8.3.1 - diff --git a/0043-block-mirror-conservative-mirror_exit-refactor.patch b/0043-block-mirror-conservative-mirror_exit-refactor.patch deleted file mode 100644 index b964981..0000000 --- a/0043-block-mirror-conservative-mirror_exit-refactor.patch +++ /dev/null @@ -1,136 +0,0 @@ -From 430c298d6bf9a7c8b90ad30bc2cd445e5cd6dd50 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Thu, 6 Sep 2018 09:02:15 -0400 -Subject: block/mirror: conservative mirror_exit refactor - -RH-Author: John Snow -Message-id: <20180925223431.24791-16-jsnow@redhat.com> -Patchwork-id: 82270 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 15/25] block/mirror: conservative mirr -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -For purposes of minimum code movement, refactor the mirror_exit -callback to use the post-finalization callbacks in a trivial way. - -Signed-off-by: John Snow -Message-id: 20180906130225.5118-7-jsnow@redhat.com -Reviewed-by: Jeff Cody -Reviewed-by: Max Reitz -[mreitz: Added comment for the mirror_exit() function] -Signed-off-by: Max Reitz -(cherry picked from commit 737efc1eda23b904fbe0e66b37715fb0e5c3e58b) -Signed-off-by: John Snow ---- - block/mirror.c | 44 +++++++++++++++++++++++++++++++++----------- - 1 file changed, 33 insertions(+), 11 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 19b57b8..7efba77 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -79,6 +79,7 @@ typedef struct MirrorBlockJob { - int max_iov; - bool initial_zeroing_ongoing; - int in_active_write_counter; -+ bool prepared; - } MirrorBlockJob; - - typedef struct MirrorBDSOpaque { -@@ -607,7 +608,12 @@ static void mirror_wait_for_all_io(MirrorBlockJob *s) - } - } - --static void mirror_exit(Job *job) -+/** -+ * mirror_exit_common: handle both abort() and prepare() cases. -+ * for .prepare, returns 0 on success and -errno on failure. -+ * for .abort cases, denoted by abort = true, MUST return 0. -+ */ -+static int mirror_exit_common(Job *job) - { - MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); - BlockJob *bjob = &s->common; -@@ -617,7 +623,13 @@ static void mirror_exit(Job *job) - BlockDriverState *target_bs = blk_bs(s->target); - BlockDriverState *mirror_top_bs = s->mirror_top_bs; - Error *local_err = NULL; -- int ret = job->ret; -+ bool abort = job->ret < 0; -+ int ret = 0; -+ -+ if (s->prepared) { -+ return 0; -+ } -+ s->prepared = true; - - bdrv_release_dirty_bitmap(src, s->dirty_bitmap); - -@@ -642,7 +654,7 @@ static void mirror_exit(Job *job) - * required before it could become a backing file of target_bs. */ - bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, - &error_abort); -- if (ret == 0 && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { -+ if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { - BlockDriverState *backing = s->is_none_mode ? src : s->base; - if (backing_bs(target_bs) != backing) { - bdrv_set_backing_hd(target_bs, backing, &local_err); -@@ -658,11 +670,8 @@ static void mirror_exit(Job *job) - aio_context_acquire(replace_aio_context); - } - -- if (s->should_complete && ret == 0) { -- BlockDriverState *to_replace = src; -- if (s->to_replace) { -- to_replace = s->to_replace; -- } -+ if (s->should_complete && !abort) { -+ BlockDriverState *to_replace = s->to_replace ?: src; - - if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) { - bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL); -@@ -711,7 +720,18 @@ static void mirror_exit(Job *job) - bdrv_unref(mirror_top_bs); - bdrv_unref(src); - -- job->ret = ret; -+ return ret; -+} -+ -+static int mirror_prepare(Job *job) -+{ -+ return mirror_exit_common(job); -+} -+ -+static void mirror_abort(Job *job) -+{ -+ int ret = mirror_exit_common(job); -+ assert(ret == 0); - } - - static void mirror_throttle(MirrorBlockJob *s) -@@ -1132,7 +1152,8 @@ static const BlockJobDriver mirror_job_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = mirror_run, -- .exit = mirror_exit, -+ .prepare = mirror_prepare, -+ .abort = mirror_abort, - .pause = mirror_pause, - .complete = mirror_complete, - }, -@@ -1149,7 +1170,8 @@ static const BlockJobDriver commit_active_job_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = mirror_run, -- .exit = mirror_exit, -+ .prepare = mirror_prepare, -+ .abort = mirror_abort, - .pause = mirror_pause, - .complete = mirror_complete, - }, --- -1.8.3.1 - diff --git a/0044-block-stream-refactor-stream-to-use-job-callbacks.patch b/0044-block-stream-refactor-stream-to-use-job-callbacks.patch deleted file mode 100644 index c798419..0000000 --- a/0044-block-stream-refactor-stream-to-use-job-callbacks.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 57ede8577bbecac73a2945ca5278662dfc019dca Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:22 +0100 -Subject: block/stream: refactor stream to use job callbacks - -RH-Author: John Snow -Message-id: <20180925223431.24791-17-jsnow@redhat.com> -Patchwork-id: 82280 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 16/25] block/stream: refactor stream to use job callbacks -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-8-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit 1b57488acf1beba157bcd8c926e596342bcb5c60) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/stream.c | 23 +++++++++++++++-------- - 1 file changed, 15 insertions(+), 8 deletions(-) - -diff --git a/block/stream.c b/block/stream.c -index 700eb23..81a7ec8 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -54,16 +54,16 @@ static int coroutine_fn stream_populate(BlockBackend *blk, - return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ); - } - --static void stream_exit(Job *job) -+static int stream_prepare(Job *job) - { - StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); - BlockJob *bjob = &s->common; - BlockDriverState *bs = blk_bs(bjob->blk); - BlockDriverState *base = s->base; - Error *local_err = NULL; -- int ret = job->ret; -+ int ret = 0; - -- if (!job_is_cancelled(job) && bs->backing && ret == 0) { -+ if (bs->backing) { - const char *base_id = NULL, *base_fmt = NULL; - if (base) { - base_id = s->backing_file_str; -@@ -75,12 +75,19 @@ static void stream_exit(Job *job) - bdrv_set_backing_hd(bs, base, &local_err); - if (local_err) { - error_report_err(local_err); -- ret = -EPERM; -- goto out; -+ return -EPERM; - } - } - --out: -+ return ret; -+} -+ -+static void stream_clean(Job *job) -+{ -+ StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); -+ BlockJob *bjob = &s->common; -+ BlockDriverState *bs = blk_bs(bjob->blk); -+ - /* Reopen the image back in read-only mode if necessary */ - if (s->bs_flags != bdrv_get_flags(bs)) { - /* Give up write permissions before making it read-only */ -@@ -89,7 +96,6 @@ out: - } - - g_free(s->backing_file_str); -- job->ret = ret; - } - - static int coroutine_fn stream_run(Job *job, Error **errp) -@@ -206,7 +212,8 @@ static const BlockJobDriver stream_job_driver = { - .job_type = JOB_TYPE_STREAM, - .free = block_job_free, - .run = stream_run, -- .exit = stream_exit, -+ .prepare = stream_prepare, -+ .clean = stream_clean, - .user_resume = block_job_user_resume, - .drain = block_job_drain, - }, --- -1.8.3.1 - diff --git a/0045-tests-blockjob-replace-Blockjob-with-Job.patch b/0045-tests-blockjob-replace-Blockjob-with-Job.patch deleted file mode 100644 index 3d9bf41..0000000 --- a/0045-tests-blockjob-replace-Blockjob-with-Job.patch +++ /dev/null @@ -1,233 +0,0 @@ -From 3817b0c67fb4636bacd9c4ebdef39f51b18e05c1 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:23 +0100 -Subject: tests/blockjob: replace Blockjob with Job - -RH-Author: John Snow -Message-id: <20180925223431.24791-18-jsnow@redhat.com> -Patchwork-id: 82281 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 17/25] tests/blockjob: replace Blockjob with Job -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -These tests don't actually test blockjobs anymore, they test -generic Job lifetimes. Change the types accordingly. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-9-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit 0cc4643b01a0138543e886db8e3bf8a3f74ff8f9) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - tests/test-blockjob.c | 98 ++++++++++++++++++++++++++------------------------- - 1 file changed, 50 insertions(+), 48 deletions(-) - -diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c -index ad4a65b..8e8b680 100644 ---- a/tests/test-blockjob.c -+++ b/tests/test-blockjob.c -@@ -206,18 +206,20 @@ static const BlockJobDriver test_cancel_driver = { - }, - }; - --static CancelJob *create_common(BlockJob **pjob) -+static CancelJob *create_common(Job **pjob) - { - BlockBackend *blk; -- BlockJob *job; -+ Job *job; -+ BlockJob *bjob; - CancelJob *s; - - blk = create_blk(NULL); -- job = mk_job(blk, "Steve", &test_cancel_driver, true, -- JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); -- job_ref(&job->job); -- assert(job->job.status == JOB_STATUS_CREATED); -- s = container_of(job, CancelJob, common); -+ bjob = mk_job(blk, "Steve", &test_cancel_driver, true, -+ JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); -+ job = &bjob->job; -+ job_ref(job); -+ assert(job->status == JOB_STATUS_CREATED); -+ s = container_of(bjob, CancelJob, common); - s->blk = blk; - - *pjob = job; -@@ -242,7 +244,7 @@ static void cancel_common(CancelJob *s) - - static void test_cancel_created(void) - { -- BlockJob *job; -+ Job *job; - CancelJob *s; - - s = create_common(&job); -@@ -251,119 +253,119 @@ static void test_cancel_created(void) - - static void test_cancel_running(void) - { -- BlockJob *job; -+ Job *job; - CancelJob *s; - - s = create_common(&job); - -- job_start(&job->job); -- assert(job->job.status == JOB_STATUS_RUNNING); -+ job_start(job); -+ assert(job->status == JOB_STATUS_RUNNING); - - cancel_common(s); - } - - static void test_cancel_paused(void) - { -- BlockJob *job; -+ Job *job; - CancelJob *s; - - s = create_common(&job); - -- job_start(&job->job); -- assert(job->job.status == JOB_STATUS_RUNNING); -+ job_start(job); -+ assert(job->status == JOB_STATUS_RUNNING); - -- job_user_pause(&job->job, &error_abort); -- job_enter(&job->job); -- assert(job->job.status == JOB_STATUS_PAUSED); -+ job_user_pause(job, &error_abort); -+ job_enter(job); -+ assert(job->status == JOB_STATUS_PAUSED); - - cancel_common(s); - } - - static void test_cancel_ready(void) - { -- BlockJob *job; -+ Job *job; - CancelJob *s; - - s = create_common(&job); - -- job_start(&job->job); -- assert(job->job.status == JOB_STATUS_RUNNING); -+ job_start(job); -+ assert(job->status == JOB_STATUS_RUNNING); - - s->should_converge = true; -- job_enter(&job->job); -- assert(job->job.status == JOB_STATUS_READY); -+ job_enter(job); -+ assert(job->status == JOB_STATUS_READY); - - cancel_common(s); - } - - static void test_cancel_standby(void) - { -- BlockJob *job; -+ Job *job; - CancelJob *s; - - s = create_common(&job); - -- job_start(&job->job); -- assert(job->job.status == JOB_STATUS_RUNNING); -+ job_start(job); -+ assert(job->status == JOB_STATUS_RUNNING); - - s->should_converge = true; -- job_enter(&job->job); -- assert(job->job.status == JOB_STATUS_READY); -+ job_enter(job); -+ assert(job->status == JOB_STATUS_READY); - -- job_user_pause(&job->job, &error_abort); -- job_enter(&job->job); -- assert(job->job.status == JOB_STATUS_STANDBY); -+ job_user_pause(job, &error_abort); -+ job_enter(job); -+ assert(job->status == JOB_STATUS_STANDBY); - - cancel_common(s); - } - - static void test_cancel_pending(void) - { -- BlockJob *job; -+ Job *job; - CancelJob *s; - - s = create_common(&job); - -- job_start(&job->job); -- assert(job->job.status == JOB_STATUS_RUNNING); -+ job_start(job); -+ assert(job->status == JOB_STATUS_RUNNING); - - s->should_converge = true; -- job_enter(&job->job); -- assert(job->job.status == JOB_STATUS_READY); -+ job_enter(job); -+ assert(job->status == JOB_STATUS_READY); - -- job_complete(&job->job, &error_abort); -- job_enter(&job->job); -+ job_complete(job, &error_abort); -+ job_enter(job); - while (!s->completed) { - aio_poll(qemu_get_aio_context(), true); - } -- assert(job->job.status == JOB_STATUS_PENDING); -+ assert(job->status == JOB_STATUS_PENDING); - - cancel_common(s); - } - - static void test_cancel_concluded(void) - { -- BlockJob *job; -+ Job *job; - CancelJob *s; - - s = create_common(&job); - -- job_start(&job->job); -- assert(job->job.status == JOB_STATUS_RUNNING); -+ job_start(job); -+ assert(job->status == JOB_STATUS_RUNNING); - - s->should_converge = true; -- job_enter(&job->job); -- assert(job->job.status == JOB_STATUS_READY); -+ job_enter(job); -+ assert(job->status == JOB_STATUS_READY); - -- job_complete(&job->job, &error_abort); -- job_enter(&job->job); -+ job_complete(job, &error_abort); -+ job_enter(job); - while (!s->completed) { - aio_poll(qemu_get_aio_context(), true); - } -- assert(job->job.status == JOB_STATUS_PENDING); -+ assert(job->status == JOB_STATUS_PENDING); - -- job_finalize(&job->job, &error_abort); -- assert(job->job.status == JOB_STATUS_CONCLUDED); -+ job_finalize(job, &error_abort); -+ assert(job->status == JOB_STATUS_CONCLUDED); - - cancel_common(s); - } --- -1.8.3.1 - diff --git a/0046-tests-test-blockjob-remove-exit-callback.patch b/0046-tests-test-blockjob-remove-exit-callback.patch deleted file mode 100644 index 81856fb..0000000 --- a/0046-tests-test-blockjob-remove-exit-callback.patch +++ /dev/null @@ -1,88 +0,0 @@ -From f641d3f6946af31724c578aa6f09ba883bb5fab3 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:24 +0100 -Subject: tests/test-blockjob: remove exit callback - -RH-Author: John Snow -Message-id: <20180925223431.24791-19-jsnow@redhat.com> -Patchwork-id: 82276 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 18/25] tests/test-blockjob: remove exit callback -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -We remove the exit callback and the completed boolean along with it. -We can simulate it just fine by waiting for the job to defer to the -main loop, and then giving it one final kick to get the main loop -portion to run. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-10-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit 977d26fdbeb35d8d2d0f203f9556d44a353e0dfd) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - tests/test-blockjob.c | 16 ++++++---------- - 1 file changed, 6 insertions(+), 10 deletions(-) - -diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c -index 8e8b680..de4c1c2 100644 ---- a/tests/test-blockjob.c -+++ b/tests/test-blockjob.c -@@ -160,15 +160,8 @@ typedef struct CancelJob { - BlockBackend *blk; - bool should_converge; - bool should_complete; -- bool completed; - } CancelJob; - --static void cancel_job_exit(Job *job) --{ -- CancelJob *s = container_of(job, CancelJob, common.job); -- s->completed = true; --} -- - static void cancel_job_complete(Job *job, Error **errp) - { - CancelJob *s = container_of(job, CancelJob, common.job); -@@ -201,7 +194,6 @@ static const BlockJobDriver test_cancel_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = cancel_job_run, -- .exit = cancel_job_exit, - .complete = cancel_job_complete, - }, - }; -@@ -335,9 +327,11 @@ static void test_cancel_pending(void) - - job_complete(job, &error_abort); - job_enter(job); -- while (!s->completed) { -+ while (!job->deferred_to_main_loop) { - aio_poll(qemu_get_aio_context(), true); - } -+ assert(job->status == JOB_STATUS_READY); -+ aio_poll(qemu_get_aio_context(), true); - assert(job->status == JOB_STATUS_PENDING); - - cancel_common(s); -@@ -359,9 +353,11 @@ static void test_cancel_concluded(void) - - job_complete(job, &error_abort); - job_enter(job); -- while (!s->completed) { -+ while (!job->deferred_to_main_loop) { - aio_poll(qemu_get_aio_context(), true); - } -+ assert(job->status == JOB_STATUS_READY); -+ aio_poll(qemu_get_aio_context(), true); - assert(job->status == JOB_STATUS_PENDING); - - job_finalize(job, &error_abort); --- -1.8.3.1 - diff --git a/0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch b/0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch deleted file mode 100644 index b6cc4fd..0000000 --- a/0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 43b1e07411d06cd676f3f55e14e0ac1082a679d0 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:25 +0100 -Subject: tests/test-blockjob-txn: move .exit to .clean - -RH-Author: John Snow -Message-id: <20180925223431.24791-20-jsnow@redhat.com> -Patchwork-id: 82282 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 19/25] tests/test-blockjob-txn: move .exit to .clean -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -The exit callback in this test actually only performs cleanup. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-11-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit e4dad4275d51b594c8abbe726a4927f6f388e427) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - tests/test-blockjob-txn.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c -index ef29f35..86606f9 100644 ---- a/tests/test-blockjob-txn.c -+++ b/tests/test-blockjob-txn.c -@@ -24,7 +24,7 @@ typedef struct { - int *result; - } TestBlockJob; - --static void test_block_job_exit(Job *job) -+static void test_block_job_clean(Job *job) - { - BlockJob *bjob = container_of(job, BlockJob, job); - BlockDriverState *bs = blk_bs(bjob->blk); -@@ -73,7 +73,7 @@ static const BlockJobDriver test_block_job_driver = { - .user_resume = block_job_user_resume, - .drain = block_job_drain, - .run = test_block_job_run, -- .exit = test_block_job_exit, -+ .clean = test_block_job_clean, - }, - }; - --- -1.8.3.1 - diff --git a/0048-jobs-remove-.exit-callback.patch b/0048-jobs-remove-.exit-callback.patch deleted file mode 100644 index b4ece99..0000000 --- a/0048-jobs-remove-.exit-callback.patch +++ /dev/null @@ -1,156 +0,0 @@ -From ea31341d12bc2080f7a1b606dcf578376d6a4637 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:26 +0100 -Subject: jobs: remove .exit callback - -RH-Author: John Snow -Message-id: <20180925223431.24791-21-jsnow@redhat.com> -Patchwork-id: 82283 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 20/25] jobs: remove .exit callback -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Now that all of the jobs use the component finalization callbacks, -there's no use for the heavy-hammer .exit callback anymore. - -job_exit becomes a glorified type shim so that we can call -job_completed from aio_bh_schedule_oneshot. - -Move these three functions down into job.c to eliminate a -forward reference. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-12-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit ccbfb3319aa265e71c16dac976ff857d0a5bcb4b) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - include/qemu/job.h | 11 -------- - job.c | 77 ++++++++++++++++++++++++------------------------------ - 2 files changed, 34 insertions(+), 54 deletions(-) - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index e0cff70..5cb0681 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -222,17 +222,6 @@ struct JobDriver { - void (*drain)(Job *job); - - /** -- * If the callback is not NULL, exit will be invoked from the main thread -- * when the job's coroutine has finished, but before transactional -- * convergence; before @prepare or @abort. -- * -- * FIXME TODO: This callback is only temporary to transition remaining jobs -- * to prepare/commit/abort/clean callbacks and will be removed before 3.1. -- * is released. -- */ -- void (*exit)(Job *job); -- -- /** - * If the callback is not NULL, prepare will be invoked when all the jobs - * belonging to the same transaction complete; or upon this job's completion - * if it is not in a transaction. -diff --git a/job.c b/job.c -index e8d7aee..87c9aa4 100644 ---- a/job.c -+++ b/job.c -@@ -535,49 +535,6 @@ void job_drain(Job *job) - } - } - --static void job_completed(Job *job); -- --static void job_exit(void *opaque) --{ -- Job *job = (Job *)opaque; -- AioContext *aio_context = job->aio_context; -- -- if (job->driver->exit) { -- aio_context_acquire(aio_context); -- job->driver->exit(job); -- aio_context_release(aio_context); -- } -- job_completed(job); --} -- --/** -- * All jobs must allow a pause point before entering their job proper. This -- * ensures that jobs can be paused prior to being started, then resumed later. -- */ --static void coroutine_fn job_co_entry(void *opaque) --{ -- Job *job = opaque; -- -- assert(job && job->driver && job->driver->run); -- job_pause_point(job); -- job->ret = job->driver->run(job, &job->err); -- job->deferred_to_main_loop = true; -- aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); --} -- -- --void job_start(Job *job) --{ -- assert(job && !job_started(job) && job->paused && -- job->driver && job->driver->run); -- job->co = qemu_coroutine_create(job_co_entry, job); -- job->pause_count--; -- job->busy = true; -- job->paused = false; -- job_state_transition(job, JOB_STATUS_RUNNING); -- aio_co_enter(job->aio_context, job->co); --} -- - /* Assumes the block_job_mutex is held */ - static bool job_timer_not_pending(Job *job) - { -@@ -894,6 +851,40 @@ static void job_completed(Job *job) - } - } - -+/** Useful only as a type shim for aio_bh_schedule_oneshot. */ -+static void job_exit(void *opaque) -+{ -+ Job *job = (Job *)opaque; -+ job_completed(job); -+} -+ -+/** -+ * All jobs must allow a pause point before entering their job proper. This -+ * ensures that jobs can be paused prior to being started, then resumed later. -+ */ -+static void coroutine_fn job_co_entry(void *opaque) -+{ -+ Job *job = opaque; -+ -+ assert(job && job->driver && job->driver->run); -+ job_pause_point(job); -+ job->ret = job->driver->run(job, &job->err); -+ job->deferred_to_main_loop = true; -+ aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); -+} -+ -+void job_start(Job *job) -+{ -+ assert(job && !job_started(job) && job->paused && -+ job->driver && job->driver->run); -+ job->co = qemu_coroutine_create(job_co_entry, job); -+ job->pause_count--; -+ job->busy = true; -+ job->paused = false; -+ job_state_transition(job, JOB_STATUS_RUNNING); -+ aio_co_enter(job->aio_context, job->co); -+} -+ - void job_cancel(Job *job, bool force) - { - if (job->status == JOB_STATUS_CONCLUDED) { --- -1.8.3.1 - diff --git a/0049-qapi-block-commit-expose-new-job-properties.patch b/0049-qapi-block-commit-expose-new-job-properties.patch deleted file mode 100644 index 97a192c..0000000 --- a/0049-qapi-block-commit-expose-new-job-properties.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 756c3ccf83d5612ca2b326a8fed8fdf1f7958adb Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:27 +0100 -Subject: qapi/block-commit: expose new job properties - -RH-Author: John Snow -Message-id: <20180925223431.24791-22-jsnow@redhat.com> -Patchwork-id: 82285 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 21/25] qapi/block-commit: expose new job properties -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-13-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit 96fbf5345f60a87fab8e7ea79a2406f381027db9) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 8 ++++++++ - qapi/block-core.json | 16 +++++++++++++++- - 2 files changed, 23 insertions(+), 1 deletion(-) - -diff --git a/blockdev.c b/blockdev.c -index c2e6402..8efc47e 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3314,6 +3314,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, - bool has_backing_file, const char *backing_file, - bool has_speed, int64_t speed, - bool has_filter_node_name, const char *filter_node_name, -+ bool has_auto_finalize, bool auto_finalize, -+ bool has_auto_dismiss, bool auto_dismiss, - Error **errp) - { - BlockDriverState *bs; -@@ -3333,6 +3335,12 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, - if (!has_filter_node_name) { - filter_node_name = NULL; - } -+ if (has_auto_finalize && !auto_finalize) { -+ job_flags |= JOB_MANUAL_FINALIZE; -+ } -+ if (has_auto_dismiss && !auto_dismiss) { -+ job_flags |= JOB_MANUAL_DISMISS; -+ } - - /* Important Note: - * libvirt relies on the DeviceNotFound error class in order to probe for -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 5b9084a..ca7d1b3 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1498,6 +1498,19 @@ - # above @top. If this option is not given, a node name is - # autogenerated. (Since: 2.9) - # -+# @auto-finalize: When false, this job will wait in a PENDING state after it has -+# finished its work, waiting for @block-job-finalize before -+# making any block graph changes. -+# When true, this job will automatically -+# perform its abort or commit actions. -+# Defaults to true. (Since 3.1) -+# -+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it -+# has completely ceased all work, and awaits @block-job-dismiss. -+# When true, this job will automatically disappear from the query -+# list without user intervention. -+# Defaults to true. (Since 3.1) -+# - # Returns: Nothing on success - # If commit or stream is already active on this device, DeviceInUse - # If @device does not exist, DeviceNotFound -@@ -1518,7 +1531,8 @@ - { 'command': 'block-commit', - 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str', - '*backing-file': 'str', '*speed': 'int', -- '*filter-node-name': 'str' } } -+ '*filter-node-name': 'str', -+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } - - ## - # @drive-backup: --- -1.8.3.1 - diff --git a/0050-qapi-block-mirror-expose-new-job-properties.patch b/0050-qapi-block-mirror-expose-new-job-properties.patch deleted file mode 100644 index 7f6443a..0000000 --- a/0050-qapi-block-mirror-expose-new-job-properties.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 254a2b41a647cf39abaa5d94f17aef62f035d30f Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Thu, 6 Sep 2018 09:02:22 -0400 -Subject: qapi/block-mirror: expose new job properties - -RH-Author: John Snow -Message-id: <20180925223431.24791-23-jsnow@redhat.com> -Patchwork-id: 82274 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 22/25] qapi/block-mirror: expose new j -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-14-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit a6b58adec28ff43c0f29ff7c95cdd5d11e87cf61) -Signed-off-by: John Snow ---- - blockdev.c | 14 ++++++++++++++ - qapi/block-core.json | 30 ++++++++++++++++++++++++++++-- - 2 files changed, 42 insertions(+), 2 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 8efc47e..bbb3279 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3707,6 +3707,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - bool has_filter_node_name, - const char *filter_node_name, - bool has_copy_mode, MirrorCopyMode copy_mode, -+ bool has_auto_finalize, bool auto_finalize, -+ bool has_auto_dismiss, bool auto_dismiss, - Error **errp) - { - int job_flags = JOB_DEFAULT; -@@ -3735,6 +3737,12 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, - if (!has_copy_mode) { - copy_mode = MIRROR_COPY_MODE_BACKGROUND; - } -+ if (has_auto_finalize && !auto_finalize) { -+ job_flags |= JOB_MANUAL_FINALIZE; -+ } -+ if (has_auto_dismiss && !auto_dismiss) { -+ job_flags |= JOB_MANUAL_DISMISS; -+ } - - if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", -@@ -3912,6 +3920,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - arg->has_unmap, arg->unmap, - false, NULL, - arg->has_copy_mode, arg->copy_mode, -+ arg->has_auto_finalize, arg->auto_finalize, -+ arg->has_auto_dismiss, arg->auto_dismiss, - &local_err); - bdrv_unref(target_bs); - error_propagate(errp, local_err); -@@ -3933,6 +3943,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - bool has_filter_node_name, - const char *filter_node_name, - bool has_copy_mode, MirrorCopyMode copy_mode, -+ bool has_auto_finalize, bool auto_finalize, -+ bool has_auto_dismiss, bool auto_dismiss, - Error **errp) - { - BlockDriverState *bs; -@@ -3966,6 +3978,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - true, true, - has_filter_node_name, filter_node_name, - has_copy_mode, copy_mode, -+ has_auto_finalize, auto_finalize, -+ has_auto_dismiss, auto_dismiss, - &local_err); - error_propagate(errp, local_err); - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index ca7d1b3..9193d49 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1732,6 +1732,18 @@ - # @copy-mode: when to copy data to the destination; defaults to 'background' - # (Since: 3.0) - # -+# @auto-finalize: When false, this job will wait in a PENDING state after it has -+# finished its work, waiting for @block-job-finalize before -+# making any block graph changes. -+# When true, this job will automatically -+# perform its abort or commit actions. -+# Defaults to true. (Since 3.1) -+# -+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it -+# has completely ceased all work, and awaits @block-job-dismiss. -+# When true, this job will automatically disappear from the query -+# list without user intervention. -+# Defaults to true. (Since 3.1) - # Since: 1.3 - ## - { 'struct': 'DriveMirror', -@@ -1741,7 +1753,8 @@ - '*speed': 'int', '*granularity': 'uint32', - '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError', -- '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } } -+ '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode', -+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } - - ## - # @BlockDirtyBitmap: -@@ -2007,6 +2020,18 @@ - # @copy-mode: when to copy data to the destination; defaults to 'background' - # (Since: 3.0) - # -+# @auto-finalize: When false, this job will wait in a PENDING state after it has -+# finished its work, waiting for @block-job-finalize before -+# making any block graph changes. -+# When true, this job will automatically -+# perform its abort or commit actions. -+# Defaults to true. (Since 3.1) -+# -+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it -+# has completely ceased all work, and awaits @block-job-dismiss. -+# When true, this job will automatically disappear from the query -+# list without user intervention. -+# Defaults to true. (Since 3.1) - # Returns: nothing on success. - # - # Since: 2.6 -@@ -2028,7 +2053,8 @@ - '*buf-size': 'int', '*on-source-error': 'BlockdevOnError', - '*on-target-error': 'BlockdevOnError', - '*filter-node-name': 'str', -- '*copy-mode': 'MirrorCopyMode' } } -+ '*copy-mode': 'MirrorCopyMode', -+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } - - ## - # @block_set_io_throttle: --- -1.8.3.1 - diff --git a/0051-qapi-block-stream-expose-new-job-properties.patch b/0051-qapi-block-stream-expose-new-job-properties.patch deleted file mode 100644 index c55039a..0000000 --- a/0051-qapi-block-stream-expose-new-job-properties.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 50990953696a8803f6b2b7ad71901c58c375eb8c Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:29 +0100 -Subject: qapi/block-stream: expose new job properties - -RH-Author: John Snow -Message-id: <20180925223431.24791-24-jsnow@redhat.com> -Patchwork-id: 82278 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 23/25] qapi/block-stream: expose new job properties -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-15-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit 241ca1ab78542f02e666636e0323bcfe3cb1d5e8) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 9 +++++++++ - hmp.c | 5 +++-- - qapi/block-core.json | 16 +++++++++++++++- - 3 files changed, 27 insertions(+), 3 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index bbb3279..806531d 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3226,6 +3226,8 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, - bool has_backing_file, const char *backing_file, - bool has_speed, int64_t speed, - bool has_on_error, BlockdevOnError on_error, -+ bool has_auto_finalize, bool auto_finalize, -+ bool has_auto_dismiss, bool auto_dismiss, - Error **errp) - { - BlockDriverState *bs, *iter; -@@ -3295,6 +3297,13 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, - /* backing_file string overrides base bs filename */ - base_name = has_backing_file ? backing_file : base_name; - -+ if (has_auto_finalize && !auto_finalize) { -+ job_flags |= JOB_MANUAL_FINALIZE; -+ } -+ if (has_auto_dismiss && !auto_dismiss) { -+ job_flags |= JOB_MANUAL_DISMISS; -+ } -+ - stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name, - job_flags, has_speed ? speed : 0, on_error, &local_err); - if (local_err) { -diff --git a/hmp.c b/hmp.c -index 2aafb50..e3c3ecd 100644 ---- a/hmp.c -+++ b/hmp.c -@@ -1865,8 +1865,9 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict) - int64_t speed = qdict_get_try_int(qdict, "speed", 0); - - qmp_block_stream(true, device, device, base != NULL, base, false, NULL, -- false, NULL, qdict_haskey(qdict, "speed"), speed, -- true, BLOCKDEV_ON_ERROR_REPORT, &error); -+ false, NULL, qdict_haskey(qdict, "speed"), speed, true, -+ BLOCKDEV_ON_ERROR_REPORT, false, false, false, false, -+ &error); - - hmp_handle_error(mon, &error); - } -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 9193d49..d1a9c3e 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -2320,6 +2320,19 @@ - # 'stop' and 'enospc' can only be used if the block device - # supports io-status (see BlockInfo). Since 1.3. - # -+# @auto-finalize: When false, this job will wait in a PENDING state after it has -+# finished its work, waiting for @block-job-finalize before -+# making any block graph changes. -+# When true, this job will automatically -+# perform its abort or commit actions. -+# Defaults to true. (Since 3.1) -+# -+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it -+# has completely ceased all work, and awaits @block-job-dismiss. -+# When true, this job will automatically disappear from the query -+# list without user intervention. -+# Defaults to true. (Since 3.1) -+# - # Returns: Nothing on success. If @device does not exist, DeviceNotFound. - # - # Since: 1.1 -@@ -2335,7 +2348,8 @@ - { 'command': 'block-stream', - 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', - '*base-node': 'str', '*backing-file': 'str', '*speed': 'int', -- '*on-error': 'BlockdevOnError' } } -+ '*on-error': 'BlockdevOnError', -+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } - - ## - # @block-job-set-speed: --- -1.8.3.1 - diff --git a/0052-block-backup-qapi-documentation-fixup.patch b/0052-block-backup-qapi-documentation-fixup.patch deleted file mode 100644 index fb695b0..0000000 --- a/0052-block-backup-qapi-documentation-fixup.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 6ecfc87059e78892c868227319a91adea909e09e Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:30 +0100 -Subject: block/backup: qapi documentation fixup - -RH-Author: John Snow -Message-id: <20180925223431.24791-25-jsnow@redhat.com> -Patchwork-id: 82284 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 24/25] block/backup: qapi documentation fixup -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Fix documentation to match the other jobs amended for 3.1. - -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20180906130225.5118-16-jsnow@redhat.com -Reviewed-by: Jeff Cody -Signed-off-by: Max Reitz -(cherry picked from commit dfaff2c37dfa52ab045cf87503e60ea56317230a) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - qapi/block-core.json | 18 ++++++++++-------- - 1 file changed, 10 insertions(+), 8 deletions(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index d1a9c3e..2953991 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1272,13 +1272,14 @@ - # a different block device than @device). - # - # @auto-finalize: When false, this job will wait in a PENDING state after it has --# finished its work, waiting for @block-job-finalize. --# When true, this job will automatically perform its abort or --# commit actions. -+# finished its work, waiting for @block-job-finalize before -+# making any block graph changes. -+# When true, this job will automatically -+# perform its abort or commit actions. - # Defaults to true. (Since 2.12) - # - # @auto-dismiss: When false, this job will wait in a CONCLUDED state after it --# has completed ceased all work, and wait for @block-job-dismiss. -+# has completely ceased all work, and awaits @block-job-dismiss. - # When true, this job will automatically disappear from the query - # list without user intervention. - # Defaults to true. (Since 2.12) -@@ -1327,13 +1328,14 @@ - # a different block device than @device). - # - # @auto-finalize: When false, this job will wait in a PENDING state after it has --# finished its work, waiting for @block-job-finalize. --# When true, this job will automatically perform its abort or --# commit actions. -+# finished its work, waiting for @block-job-finalize before -+# making any block graph changes. -+# When true, this job will automatically -+# perform its abort or commit actions. - # Defaults to true. (Since 2.12) - # - # @auto-dismiss: When false, this job will wait in a CONCLUDED state after it --# has completed ceased all work, and wait for @block-job-dismiss. -+# has completely ceased all work, and awaits @block-job-dismiss. - # When true, this job will automatically disappear from the query - # list without user intervention. - # Defaults to true. (Since 2.12) --- -1.8.3.1 - diff --git a/0053-blockdev-document-transactional-shortcomings.patch b/0053-blockdev-document-transactional-shortcomings.patch deleted file mode 100644 index 50e40af..0000000 --- a/0053-blockdev-document-transactional-shortcomings.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 00a437d87c6bd8ec956b25fc0dffe8397ce475b8 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 25 Sep 2018 22:34:31 +0100 -Subject: blockdev: document transactional shortcomings - -RH-Author: John Snow -Message-id: <20180925223431.24791-26-jsnow@redhat.com> -Patchwork-id: 82286 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 25/25] blockdev: document transactional shortcomings -Bugzilla: 1632939 -RH-Acked-by: Jeffrey Cody -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -Presently only the backup job really guarantees what one would consider -transactional semantics. To guard against someone helpfully adding them -in the future, document that there are shortcomings in the model that -would need to be audited at that time. - -Signed-off-by: John Snow -Message-id: 20180906130225.5118-17-jsnow@redhat.com -Reviewed-by: Jeff Cody -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 66da04ddd3dcb8c61ee664b6faced132da002006) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/blockdev.c b/blockdev.c -index 806531d..d97202a 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2292,7 +2292,13 @@ static const BlkActionOps actions[] = { - .instance_size = sizeof(BlockDirtyBitmapState), - .prepare = block_dirty_bitmap_disable_prepare, - .abort = block_dirty_bitmap_disable_abort, -- } -+ }, -+ /* Where are transactions for MIRROR, COMMIT and STREAM? -+ * Although these blockjobs use transaction callbacks like the backup job, -+ * these jobs do not necessarily adhere to transaction semantics. -+ * These jobs may not fully undo all of their actions on abort, nor do they -+ * necessarily work in transactions with more than one job in them. -+ */ - }; - - /** --- -1.8.3.1 - diff --git a/0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch b/0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch deleted file mode 100644 index f7a741f..0000000 --- a/0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 5b9ccef27363b61223b31312062cde1210216985 Mon Sep 17 00:00:00 2001 -From: Eduardo Otubo -Date: Fri, 28 Sep 2018 07:56:36 +0100 -Subject: seccomp: use SIGSYS signal instead of killing the thread -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eduardo Otubo -Message-id: <20180928075639.16746-3-otubo@redhat.com> -Patchwork-id: 82314 -O-Subject: [RHEL-8 qemu-kvm PATCH 2/5] seccomp: use SIGSYS signal instead of killing the thread -Bugzilla: 1618356 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Thomas Huth - -From: Marc-André Lureau - -commit 6f2231e9b0931e1998d9ed0c509adf7aedc02db2 -Author: Marc-André Lureau -Date: Wed Aug 22 19:02:47 2018 +0200 - - seccomp: use SIGSYS signal instead of killing the thread - - The seccomp action SCMP_ACT_KILL results in immediate termination of - the thread that made the bad system call. However, qemu being - multi-threaded, it keeps running. There is no easy way for parent - process / management layer (libvirt) to know about that situation. - - Instead, the default SIGSYS handler when invoked with SCMP_ACT_TRAP - will terminate the program and core dump. - - This may not be the most secure solution, but probably better than - just killing the offending thread. SCMP_ACT_KILL_PROCESS has been - added in Linux 4.14 to improve the situation, which I propose to use - by default if available in the next patch. - - Related to: - https://bugzilla.redhat.com/show_bug.cgi?id=1594456 - - Signed-off-by: Marc-André Lureau - Reviewed-by: Daniel P. Berrangé - Acked-by: Eduardo Otubo - -Signed-off-by: Eduardo Otubo -Signed-off-by: Danilo C. L. de Paula ---- - qemu-seccomp.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/qemu-seccomp.c b/qemu-seccomp.c -index 9cd8eb9..b117a92 100644 ---- a/qemu-seccomp.c -+++ b/qemu-seccomp.c -@@ -125,7 +125,7 @@ static int seccomp_start(uint32_t seccomp_opts) - continue; - } - -- rc = seccomp_rule_add_array(ctx, SCMP_ACT_KILL, blacklist[i].num, -+ rc = seccomp_rule_add_array(ctx, SCMP_ACT_TRAP, blacklist[i].num, - blacklist[i].narg, blacklist[i].arg_cmp); - if (rc < 0) { - goto seccomp_return; --- -1.8.3.1 - diff --git a/0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch b/0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch deleted file mode 100644 index 809c9c2..0000000 --- a/0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 80574fd1c226ca5c8555b3bb37bc3fe121bbf69f Mon Sep 17 00:00:00 2001 -From: Eduardo Otubo -Date: Fri, 28 Sep 2018 07:56:37 +0100 -Subject: seccomp: prefer SCMP_ACT_KILL_PROCESS if available -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eduardo Otubo -Message-id: <20180928075639.16746-4-otubo@redhat.com> -Patchwork-id: 82315 -O-Subject: [RHEL-8 qemu-kvm PATCH 3/5] seccomp: prefer SCMP_ACT_KILL_PROCESS if available -Bugzilla: 1618356 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Thomas Huth - -From: Marc-André Lureau - -commit bda08a5764d470f101fa38635d30b41179a313e1 -Author: Marc-André Lureau -Date: Wed Aug 22 19:02:48 2018 +0200 - - seccomp: prefer SCMP_ACT_KILL_PROCESS if available - - The upcoming libseccomp release should have SCMP_ACT_KILL_PROCESS - action (https://github.com/seccomp/libseccomp/issues/96). - - SCMP_ACT_KILL_PROCESS is preferable to immediately terminate the - offending process, rather than having the SIGSYS handler running. - - Use SECCOMP_GET_ACTION_AVAIL to check availability of kernel support, - as libseccomp will fallback on SCMP_ACT_KILL otherwise, and we still - prefer SCMP_ACT_TRAP. - - Signed-off-by: Marc-André Lureau - Reviewed-by: Daniel P. Berrangé - Acked-by: Eduardo Otubo - -Signed-off-by: Eduardo Otubo -Signed-off-by: Danilo C. L. de Paula ---- - qemu-seccomp.c | 31 ++++++++++++++++++++++++++++++- - 1 file changed, 30 insertions(+), 1 deletion(-) - -diff --git a/qemu-seccomp.c b/qemu-seccomp.c -index b117a92..f0c833f 100644 ---- a/qemu-seccomp.c -+++ b/qemu-seccomp.c -@@ -20,6 +20,7 @@ - #include - #include - #include "sysemu/seccomp.h" -+#include - - /* For some architectures (notably ARM) cacheflush is not supported until - * libseccomp 2.2.3, but configure enforces that we are using a more recent -@@ -107,12 +108,40 @@ static const struct QemuSeccompSyscall blacklist[] = { - { SCMP_SYS(sched_get_priority_min), QEMU_SECCOMP_SET_RESOURCECTL }, - }; - -+static inline __attribute__((unused)) int -+qemu_seccomp(unsigned int operation, unsigned int flags, void *args) -+{ -+#ifdef __NR_seccomp -+ return syscall(__NR_seccomp, operation, flags, args); -+#else -+ errno = ENOSYS; -+ return -1; -+#endif -+} -+ -+static uint32_t qemu_seccomp_get_kill_action(void) -+{ -+#if defined(SECCOMP_GET_ACTION_AVAIL) && defined(SCMP_ACT_KILL_PROCESS) && \ -+ defined(SECCOMP_RET_KILL_PROCESS) -+ { -+ uint32_t action = SECCOMP_RET_KILL_PROCESS; -+ -+ if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { -+ return SCMP_ACT_KILL_PROCESS; -+ } -+ } -+#endif -+ -+ return SCMP_ACT_TRAP; -+} -+ - - static int seccomp_start(uint32_t seccomp_opts) - { - int rc = 0; - unsigned int i = 0; - scmp_filter_ctx ctx; -+ uint32_t action = qemu_seccomp_get_kill_action(); - - ctx = seccomp_init(SCMP_ACT_ALLOW); - if (ctx == NULL) { -@@ -125,7 +154,7 @@ static int seccomp_start(uint32_t seccomp_opts) - continue; - } - -- rc = seccomp_rule_add_array(ctx, SCMP_ACT_TRAP, blacklist[i].num, -+ rc = seccomp_rule_add_array(ctx, action, blacklist[i].num, - blacklist[i].narg, blacklist[i].arg_cmp); - if (rc < 0) { - goto seccomp_return; --- -1.8.3.1 - diff --git a/0056-seccomp-set-the-seccomp-filter-to-all-threads.patch b/0056-seccomp-set-the-seccomp-filter-to-all-threads.patch deleted file mode 100644 index b1e37ad..0000000 --- a/0056-seccomp-set-the-seccomp-filter-to-all-threads.patch +++ /dev/null @@ -1,77 +0,0 @@ -From ef8bae877ca544af956f8314cdd702d1c62a9b15 Mon Sep 17 00:00:00 2001 -From: Eduardo Otubo -Date: Fri, 28 Sep 2018 07:56:39 +0100 -Subject: seccomp: set the seccomp filter to all threads -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eduardo Otubo -Message-id: <20180928075639.16746-6-otubo@redhat.com> -Patchwork-id: 82316 -O-Subject: [RHEL-8 qemu-kvm PATCH 5/5] seccomp: set the seccomp filter to all threads -Bugzilla: 1618356 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Thomas Huth - -From: Marc-André Lureau - -commit 70dfabeaa79ba4d7a3b699abe1a047c8012db114 -Author: Marc-André Lureau -Date: Wed Aug 22 19:02:50 2018 +0200 - - seccomp: set the seccomp filter to all threads - - When using "-seccomp on", the seccomp policy is only applied to the - main thread, the vcpu worker thread and other worker threads created - after seccomp policy is applied; the seccomp policy is not applied to - e.g. the RCU thread because it is created before the seccomp policy is - applied and SECCOMP_FILTER_FLAG_TSYNC isn't used. - - This can be verified with - for task in /proc/`pidof qemu`/task/*; do cat $task/status | grep Secc ; done - Seccomp: 2 - Seccomp: 0 - Seccomp: 0 - Seccomp: 2 - Seccomp: 2 - Seccomp: 2 - - Starting with libseccomp 2.2.0 and kernel >= 3.17, we can use - seccomp_attr_set(ctx, > SCMP_FLTATR_CTL_TSYNC, 1) to update the policy - on all threads. - - libseccomp requirement was bumped to 2.2.0 in previous patch. - libseccomp should fail to set the filter if it can't honour - SCMP_FLTATR_CTL_TSYNC (untested), and thus -sandbox will now fail on - kernel < 3.17. - - Signed-off-by: Marc-André Lureau - Acked-by: Eduardo Otubo - -Signed-off-by: Eduardo Otubo -Signed-off-by: Danilo C. L. de Paula ---- - qemu-seccomp.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/qemu-seccomp.c b/qemu-seccomp.c -index f0c833f..4729eb1 100644 ---- a/qemu-seccomp.c -+++ b/qemu-seccomp.c -@@ -149,6 +149,11 @@ static int seccomp_start(uint32_t seccomp_opts) - goto seccomp_return; - } - -+ rc = seccomp_attr_set(ctx, SCMP_FLTATR_CTL_TSYNC, 1); -+ if (rc != 0) { -+ goto seccomp_return; -+ } -+ - for (i = 0; i < ARRAY_SIZE(blacklist); i++) { - if (!(seccomp_opts & blacklist[i].set)) { - continue; --- -1.8.3.1 - diff --git a/0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch b/0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch deleted file mode 100644 index e866c28..0000000 --- a/0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch +++ /dev/null @@ -1,185 +0,0 @@ -From da9c980b19783915f8675894b88da631f27dd34d Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 5 Oct 2018 12:59:47 +0100 -Subject: memory: cleanup side effects of memory_region_init_foo() on failure - -RH-Author: Igor Mammedov -Message-id: <1538744387-84898-1-git-send-email-imammedo@redhat.com> -Patchwork-id: 82391 -O-Subject: [RHEL-8 qemu-kvm PATCH] memory: cleanup side effects of memory_region_init_foo() on failure -Bugzilla: 1600365 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Pankaj Gupta -RH-Acked-by: Laszlo Ersek - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1600365 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=18658506 - -if MemoryRegion intialization fails it's left in semi-initialized state, -where it's size is not 0 and attached as child to owner object. -And this leds to crash in following use-case: - (monitor) object_add memory-backend-file,id=mem1,size=99999G,mem-path=/tmp/foo,discard-data=yes - memory.c:2083: memory_region_get_ram_ptr: Assertion `mr->ram_block' failed - Aborted (core dumped) -it happens due to assumption that memory region is intialized when - memory_region_size() != 0 -and therefore it's ok to access it in - file_backend_unparent() - if (memory_region_size() != 0) - memory_region_get_ram_ptr() - -which happens when object_add fails and unparents failed backend making -file_backend_unparent() access invalid memory region. - -Fix it by making sure that memory_region_init_foo() APIs cleanup externally -visible side effects on failure (like set size to 0 and unparenting object) - -Signed-off-by: Igor Mammedov -Message-Id: <1536064777-42312-1-git-send-email-imammedo@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 1cd3d492624da399d66c4c3e6a5eabb8f96bb0a2) -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - memory.c - due missing (cbfc01710 "memory, exec: switch file ram allocation functions to 'flags' parameters") - not related to the patch signature mismatch of - qemu_ram_alloc_from_file()/qemu_ram_alloc_from_fd() ---- - memory.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ - 1 file changed, 42 insertions(+), 6 deletions(-) - -diff --git a/memory.c b/memory.c -index e9cd446..88c75d8 100644 ---- a/memory.c -+++ b/memory.c -@@ -1518,12 +1518,18 @@ void memory_region_init_ram_shared_nomigrate(MemoryRegion *mr, - bool share, - Error **errp) - { -+ Error *err = NULL; - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; -- mr->ram_block = qemu_ram_alloc(size, share, mr, errp); -+ mr->ram_block = qemu_ram_alloc(size, share, mr, &err); - mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; -+ if (err) { -+ mr->size = int128_zero(); -+ object_unparent(OBJECT(mr)); -+ error_propagate(errp, err); -+ } - } - - void memory_region_init_resizeable_ram(MemoryRegion *mr, -@@ -1536,13 +1542,19 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, - void *host), - Error **errp) - { -+ Error *err = NULL; - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; - mr->ram_block = qemu_ram_alloc_resizeable(size, max_size, resized, -- mr, errp); -+ mr, &err); - mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; -+ if (err) { -+ mr->size = int128_zero(); -+ object_unparent(OBJECT(mr)); -+ error_propagate(errp, err); -+ } - } - - #ifdef __linux__ -@@ -1555,13 +1567,19 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, - const char *path, - Error **errp) - { -+ Error *err = NULL; - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; - mr->align = align; -- mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp); -+ mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, &err); - mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; -+ if (err) { -+ mr->size = int128_zero(); -+ object_unparent(OBJECT(mr)); -+ error_propagate(errp, err); -+ } - } - - void memory_region_init_ram_from_fd(MemoryRegion *mr, -@@ -1572,12 +1590,18 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, - int fd, - Error **errp) - { -+ Error *err = NULL; - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; -- mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); -+ mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, &err); - mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; -+ if (err) { -+ mr->size = int128_zero(); -+ object_unparent(OBJECT(mr)); -+ error_propagate(errp, err); -+ } - } - #endif - -@@ -1628,13 +1652,19 @@ void memory_region_init_rom_nomigrate(MemoryRegion *mr, - uint64_t size, - Error **errp) - { -+ Error *err = NULL; - memory_region_init(mr, owner, name, size); - mr->ram = true; - mr->readonly = true; - mr->terminates = true; - mr->destructor = memory_region_destructor_ram; -- mr->ram_block = qemu_ram_alloc(size, false, mr, errp); -+ mr->ram_block = qemu_ram_alloc(size, false, mr, &err); - mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; -+ if (err) { -+ mr->size = int128_zero(); -+ object_unparent(OBJECT(mr)); -+ error_propagate(errp, err); -+ } - } - - void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, -@@ -1645,6 +1675,7 @@ void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, - uint64_t size, - Error **errp) - { -+ Error *err = NULL; - assert(ops); - memory_region_init(mr, owner, name, size); - mr->ops = ops; -@@ -1652,7 +1683,12 @@ void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, - mr->terminates = true; - mr->rom_device = true; - mr->destructor = memory_region_destructor_ram; -- mr->ram_block = qemu_ram_alloc(size, false, mr, errp); -+ mr->ram_block = qemu_ram_alloc(size, false, mr, &err); -+ if (err) { -+ mr->size = int128_zero(); -+ object_unparent(OBJECT(mr)); -+ error_propagate(errp, err); -+ } - } - - void memory_region_init_iommu(void *_iommu_mr, --- -1.8.3.1 - diff --git a/0058-mirror-Fail-gracefully-for-source-target.patch b/0058-mirror-Fail-gracefully-for-source-target.patch deleted file mode 100644 index 56c3baf..0000000 --- a/0058-mirror-Fail-gracefully-for-source-target.patch +++ /dev/null @@ -1,87 +0,0 @@ -From a96ed7a8374891516e626b797321d4be69cb071d Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 13:19:57 +0100 -Subject: mirror: Fail gracefully for source == target - -RH-Author: Kevin Wolf -Message-id: <20181010131957.23198-2-kwolf@redhat.com> -Patchwork-id: 82564 -O-Subject: [RHEL-8 qemu-kvm PATCH 1/1] mirror: Fail gracefully for source == target -Bugzilla: 1637963 -RH-Acked-by: John Snow -RH-Acked-by: Fam Zheng -RH-Acked-by: Stefan Hajnoczi - -blockdev-mirror with the same node for source and target segfaults -today: A node is in its own backing chain, so mirror_start_job() decides -that this is an active commit. When adding the intermediate nodes with -block_job_add_bdrv(), it starts the iteration through the subchain with -the backing file of source, though, so it never reaches target and -instead runs into NULL at the base. - -While we could fix that by starting with source itself, there is no -point in allowing mirroring a node into itself and I wouldn't be -surprised if this caused more problems later. - -So just check for this scenario and error out. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -(cherry picked from commit 86fae10c64d642256cf019e6829929fa0d259c7a) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 5 +++++ - tests/qemu-iotests/041 | 6 ++++++ - tests/qemu-iotests/041.out | 4 ++-- - 3 files changed, 13 insertions(+), 2 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 7efba77..b61f99b 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -1516,6 +1516,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, - buf_size = DEFAULT_MIRROR_BUF_SIZE; - } - -+ if (bs == target) { -+ error_setg(errp, "Can't mirror node into itself"); -+ return; -+ } -+ - /* In the case of active commit, add dummy driver to provide consistent - * reads on the top, while disabling it in the intermediate nodes, and make - * the backing chain writable. */ -diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 -index c20ac7d..9336ab6 100755 ---- a/tests/qemu-iotests/041 -+++ b/tests/qemu-iotests/041 -@@ -234,6 +234,12 @@ class TestSingleBlockdev(TestSingleDrive): - result = self.vm.qmp("blockdev-add", **args) - self.assert_qmp(result, 'return', {}) - -+ def test_mirror_to_self(self): -+ result = self.vm.qmp(self.qmp_cmd, job_id='job0', -+ device=self.qmp_target, sync='full', -+ target=self.qmp_target) -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ - test_large_cluster = None - test_image_not_found = None - test_small_buffer2 = None -diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out -index c28b392..e071d0b 100644 ---- a/tests/qemu-iotests/041.out -+++ b/tests/qemu-iotests/041.out -@@ -1,5 +1,5 @@ --..................................................................................... -+........................................................................................ - ---------------------------------------------------------------------- --Ran 85 tests -+Ran 88 tests - - OK --- -1.8.3.1 - diff --git a/0059-commit-Add-top-node-base-node-options.patch b/0059-commit-Add-top-node-base-node-options.patch deleted file mode 100644 index c3cde82..0000000 --- a/0059-commit-Add-top-node-base-node-options.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 0086e14eef7fc78bc1254ee888bd7d720d6ee5b9 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 13:50:54 +0100 -Subject: commit: Add top-node/base-node options - -RH-Author: Kevin Wolf -Message-id: <20181010135055.3874-2-kwolf@redhat.com> -Patchwork-id: 82569 -O-Subject: [RHEL-8 qemu-kvm PATCH 1/2] commit: Add top-node/base-node options -Bugzilla: 1637970 -RH-Acked-by: John Snow -RH-Acked-by: Fam Zheng -RH-Acked-by: Stefan Hajnoczi - -The block-commit QMP command required specifying the top and base nodes -of the commit jobs using the file name of that node. While this works -in simple cases (local files with absolute paths), the file names -generated for more complicated setups can be hard to predict. - -The block-commit command has more problems than just this, so we want to -replace it altogether in the long run, but libvirt needs a reliable way -to address nodes now. So we don't want to wait for a new, cleaner -command, but just add the minimal thing needed right now. - -This adds two new options top-node and base-node to the command, which -allow specifying node names instead. They are mutually exclusive with -the old options. - -Signed-off-by: Kevin Wolf -(cherry picked from commit 3c605f4074ebeb97970eb660fb56a9cb06525923) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 32 ++++++++++++++++++++++++++++++-- - qapi/block-core.json | 24 ++++++++++++++++++------ - 2 files changed, 48 insertions(+), 8 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index d97202a..df256e6 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3324,7 +3324,9 @@ out: - } - - void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, -+ bool has_base_node, const char *base_node, - bool has_base, const char *base, -+ bool has_top_node, const char *top_node, - bool has_top, const char *top, - bool has_backing_file, const char *backing_file, - bool has_speed, int64_t speed, -@@ -3385,7 +3387,20 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, - /* default top_bs is the active layer */ - top_bs = bs; - -- if (has_top && top) { -+ if (has_top_node && has_top) { -+ error_setg(errp, "'top-node' and 'top' are mutually exclusive"); -+ goto out; -+ } else if (has_top_node) { -+ top_bs = bdrv_lookup_bs(NULL, top_node, errp); -+ if (top_bs == NULL) { -+ goto out; -+ } -+ if (!bdrv_chain_contains(bs, top_bs)) { -+ error_setg(errp, "'%s' is not in this backing file chain", -+ top_node); -+ goto out; -+ } -+ } else if (has_top && top) { - if (strcmp(bs->filename, top) != 0) { - top_bs = bdrv_find_backing_image(bs, top); - } -@@ -3398,7 +3413,20 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, - - assert(bdrv_get_aio_context(top_bs) == aio_context); - -- if (has_base && base) { -+ if (has_base_node && has_base) { -+ error_setg(errp, "'base-node' and 'base' are mutually exclusive"); -+ goto out; -+ } else if (has_base_node) { -+ base_bs = bdrv_lookup_bs(NULL, base_node, errp); -+ if (base_bs == NULL) { -+ goto out; -+ } -+ if (!bdrv_chain_contains(top_bs, base_bs)) { -+ error_setg(errp, "'%s' is not in this backing file chain", -+ base_node); -+ goto out; -+ } -+ } else if (has_base && base) { - base_bs = bdrv_find_backing_image(top_bs, base); - } else { - base_bs = bdrv_find_base(top_bs); -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 2953991..6f38dc0 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1457,12 +1457,23 @@ - # - # @device: the device name or node-name of a root node - # --# @base: The file name of the backing image to write data into. --# If not specified, this is the deepest backing image. -+# @base-node: The node name of the backing image to write data into. -+# If not specified, this is the deepest backing image. -+# (since: 3.1) - # --# @top: The file name of the backing image within the image chain, --# which contains the topmost data to be committed down. If --# not specified, this is the active layer. -+# @base: Same as @base-node, except that it is a file name rather than a node -+# name. This must be the exact filename string that was used to open the -+# node; other strings, even if addressing the same file, are not -+# accepted (deprecated, use @base-node instead) -+# -+# @top-node: The node name of the backing image within the image chain -+# which contains the topmost data to be committed down. If -+# not specified, this is the active layer. (since: 3.1) -+# -+# @top: Same as @top-node, except that it is a file name rather than a node -+# name. This must be the exact filename string that was used to open the -+# node; other strings, even if addressing the same file, are not -+# accepted (deprecated, use @base-node instead) - # - # @backing-file: The backing file string to write into the overlay - # image of 'top'. If 'top' is the active layer, -@@ -1531,7 +1542,8 @@ - # - ## - { 'command': 'block-commit', -- 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str', -+ 'data': { '*job-id': 'str', 'device': 'str', '*base-node': 'str', -+ '*base': 'str', '*top-node': 'str', '*top': 'str', - '*backing-file': 'str', '*speed': 'int', - '*filter-node-name': 'str', - '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } --- -1.8.3.1 - diff --git a/0060-qemu-iotests-Test-commit-with-top-node-base-node.patch b/0060-qemu-iotests-Test-commit-with-top-node-base-node.patch deleted file mode 100644 index a593117..0000000 --- a/0060-qemu-iotests-Test-commit-with-top-node-base-node.patch +++ /dev/null @@ -1,127 +0,0 @@ -From bb9687c8dadef42d11f3606e68e956a7c60b2487 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 13:50:55 +0100 -Subject: qemu-iotests: Test commit with top-node/base-node - -RH-Author: Kevin Wolf -Message-id: <20181010135055.3874-3-kwolf@redhat.com> -Patchwork-id: 82568 -O-Subject: [RHEL-8 qemu-kvm PATCH 2/2] qemu-iotests: Test commit with top-node/base-node -Bugzilla: 1637970 -RH-Acked-by: John Snow -RH-Acked-by: Fam Zheng -RH-Acked-by: Stefan Hajnoczi - -This adds some tests for block-commit with the new options top-node and -base-node (taking node names) instead of top and base (taking file -names). - -Signed-off-by: Kevin Wolf -(cherry picked from commit d57177a48fc604e5427921bf20b22ee0e6d578b3) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/040 | 52 ++++++++++++++++++++++++++++++++++++++++++++-- - tests/qemu-iotests/040.out | 4 ++-- - 2 files changed, 52 insertions(+), 4 deletions(-) - -diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 -index 1beb5e6..1cb1cee 100755 ---- a/tests/qemu-iotests/040 -+++ b/tests/qemu-iotests/040 -@@ -57,9 +57,12 @@ class ImageCommitTestCase(iotests.QMPTestCase): - self.assert_no_active_block_jobs() - self.vm.shutdown() - -- def run_commit_test(self, top, base, need_ready=False): -+ def run_commit_test(self, top, base, need_ready=False, node_names=False): - self.assert_no_active_block_jobs() -- result = self.vm.qmp('block-commit', device='drive0', top=top, base=base) -+ if node_names: -+ result = self.vm.qmp('block-commit', device='drive0', top_node=top, base_node=base) -+ else: -+ result = self.vm.qmp('block-commit', device='drive0', top=top, base=base) - self.assert_qmp(result, 'return', {}) - self.wait_for_complete(need_ready) - -@@ -101,6 +104,11 @@ class TestSingleDrive(ImageCommitTestCase): - self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) - self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) - -+ def test_commit_node(self): -+ self.run_commit_test("mid", "base", node_names=True) -+ self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) -+ self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) -+ - def test_device_not_found(self): - result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img) - self.assert_qmp(result, 'error/class', 'DeviceNotFound') -@@ -123,6 +131,30 @@ class TestSingleDrive(ImageCommitTestCase): - self.assert_qmp(result, 'error/class', 'GenericError') - self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found') - -+ def test_top_node_invalid(self): -+ self.assert_no_active_block_jobs() -+ result = self.vm.qmp('block-commit', device='drive0', top_node='badfile', base_node='base') -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile") -+ -+ def test_base_node_invalid(self): -+ self.assert_no_active_block_jobs() -+ result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='badfile') -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile") -+ -+ def test_top_path_and_node(self): -+ self.assert_no_active_block_jobs() -+ result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='base', top='%s' % mid_img) -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ self.assert_qmp(result, 'error/desc', "'top-node' and 'top' are mutually exclusive") -+ -+ def test_base_path_and_node(self): -+ self.assert_no_active_block_jobs() -+ result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='base', base='%s' % backing_img) -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ self.assert_qmp(result, 'error/desc', "'base-node' and 'base' are mutually exclusive") -+ - def test_top_is_active(self): - self.run_commit_test(test_img, backing_img, need_ready=True) - self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) -@@ -139,6 +171,22 @@ class TestSingleDrive(ImageCommitTestCase): - self.assert_qmp(result, 'error/class', 'GenericError') - self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % mid_img) - -+ def test_top_and_base_node_reversed(self): -+ self.assert_no_active_block_jobs() -+ result = self.vm.qmp('block-commit', device='drive0', top_node='base', base_node='top') -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ self.assert_qmp(result, 'error/desc', "'top' is not in this backing file chain") -+ -+ def test_top_node_in_wrong_chain(self): -+ self.assert_no_active_block_jobs() -+ -+ result = self.vm.qmp('blockdev-add', driver='null-co', node_name='null') -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm.qmp('block-commit', device='drive0', top_node='null', base_node='base') -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ self.assert_qmp(result, 'error/desc', "'null' is not in this backing file chain") -+ - # When the job is running on a BB that is automatically deleted on hot - # unplug, the job is cancelled when the device disappears - def test_hot_unplug(self): -diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out -index e20a75c..802ffaa 100644 ---- a/tests/qemu-iotests/040.out -+++ b/tests/qemu-iotests/040.out -@@ -1,5 +1,5 @@ --............................. -+........................................... - ---------------------------------------------------------------------- --Ran 29 tests -+Ran 43 tests - - OK --- -1.8.3.1 - diff --git a/0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch b/0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch deleted file mode 100644 index f3c3385..0000000 --- a/0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 0908cd5291828eca03bbba206f133a37b87c8b41 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 10 Oct 2018 20:50:58 +0100 -Subject: block: for jobs, do not clear user_paused until after the resume - -RH-Author: John Snow -Message-id: <20181010205100.17689-2-jsnow@redhat.com> -Patchwork-id: 82631 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/3] block: for jobs, do not clear user_paused until after the resume -Bugzilla: 1635583 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -From: Jeff Cody - -The function job_cancel_async() will always cause an assert for blockjob -user resume. We set job->user_paused to false, and then call -job->driver->user_resume(). In the case of blockjobs, this is the -block_job_user_resume() function. - -In that function, we assert that job.user_paused is set to true. -Unfortunately, right before calling this function, it has explicitly -been set to false. - -The fix is pretty simple: set job->user_paused to false only after the -job user_resume() function has been called. - -Reviewed-by: John Snow -Reviewed-by: Eric Blake -Signed-off-by: Jeff Cody -Message-id: bb183b77d8f2dd6bd67b8da559a90ac1e74b2052.1534868459.git.jcody@redhat.com -Signed-off-by: Jeff Cody -(cherry picked from commit e321c0597c7590499bacab239d7f86e257f96bcd) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - job.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/job.c b/job.c -index 87c9aa4..bb322de 100644 ---- a/job.c -+++ b/job.c -@@ -705,10 +705,10 @@ static void job_cancel_async(Job *job, bool force) - { - if (job->user_paused) { - /* Do not call job_enter here, the caller will handle it. */ -- job->user_paused = false; - if (job->driver->user_resume) { - job->driver->user_resume(job); - } -+ job->user_paused = false; - assert(job->pause_count > 0); - job->pause_count--; - } --- -1.8.3.1 - diff --git a/0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch b/0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch deleted file mode 100644 index 5c30cf9..0000000 --- a/0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch +++ /dev/null @@ -1,173 +0,0 @@ -From d26430360b5996c99c0e1dd95b4dbb48bd894944 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 10 Oct 2018 20:51:00 +0100 -Subject: block: iotest to catch abort on forced blockjob cancel - -RH-Author: John Snow -Message-id: <20181010205100.17689-4-jsnow@redhat.com> -Patchwork-id: 82632 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 3/3] block: iotest to catch abort on forced blockjob cancel -Bugzilla: 1635583 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -From: Jeff Cody - -Signed-off-by: Jeff Cody -Reviewed-by: John Snow -Message-id: df317f617fbe5affcf699cb8560e7b0c2e028a64.1534868459.git.jcody@redhat.com -Signed-off-by: Jeff Cody -(cherry picked from commit 26bf474ba92c76e61bea51726e22da6dfd185296) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/229 | 95 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/229.out | 23 +++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 119 insertions(+) - create mode 100755 tests/qemu-iotests/229 - create mode 100644 tests/qemu-iotests/229.out - -diff --git a/tests/qemu-iotests/229 b/tests/qemu-iotests/229 -new file mode 100755 -index 0000000..ff851ec ---- /dev/null -+++ b/tests/qemu-iotests/229 -@@ -0,0 +1,95 @@ -+#!/bin/bash -+# -+# Test for force canceling a running blockjob that is paused in -+# an error state. -+# -+# Copyright (C) 2018 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=jcody@redhat.com -+ -+seq="$(basename $0)" -+echo "QA output created by $seq" -+ -+here="$PWD" -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_qemu -+ _cleanup_test_img -+ rm -f "$TEST_IMG" "$DEST_IMG" -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+. ./common.qemu -+ -+# Needs backing file and backing format support -+_supported_fmt qcow2 qed -+_supported_proto file -+_supported_os Linux -+ -+ -+DEST_IMG="$TEST_DIR/d.$IMGFMT" -+TEST_IMG="$TEST_DIR/b.$IMGFMT" -+ -+_make_test_img 2M -+ -+# destination for mirror will be too small, causing error -+TEST_IMG=$DEST_IMG _make_test_img 1M -+ -+$QEMU_IO -c 'write 0 2M' "$TEST_IMG" | _filter_qemu_io -+ -+_launch_qemu -drive id=testdisk,file="$TEST_IMG",format="$IMGFMT" -+ -+_send_qemu_cmd $QEMU_HANDLE \ -+ "{'execute': 'qmp_capabilities'}" \ -+ 'return' -+ -+echo -+echo '=== Starting drive-mirror, causing error & stop ===' -+echo -+ -+_send_qemu_cmd $QEMU_HANDLE \ -+ "{'execute': 'drive-mirror', -+ 'arguments': {'device': 'testdisk', -+ 'mode': 'absolute-paths', -+ 'format': '$IMGFMT', -+ 'target': '$DEST_IMG', -+ 'sync': 'full', -+ 'mode': 'existing', -+ 'on-source-error': 'stop', -+ 'on-target-error': 'stop' }}" \ -+ "JOB_STATUS_CHANGE.*pause" -+ -+echo -+echo '=== Force cancel job paused in error state ===' -+echo -+ -+success_or_failure="y" _send_qemu_cmd $QEMU_HANDLE \ -+ "{'execute': 'block-job-cancel', -+ 'arguments': { 'device': 'testdisk', -+ 'force': true}}" \ -+ "BLOCK_JOB_CANCELLED" "Assertion" -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/229.out b/tests/qemu-iotests/229.out -new file mode 100644 -index 0000000..4c41128 ---- /dev/null -+++ b/tests/qemu-iotests/229.out -@@ -0,0 +1,23 @@ -+QA output created by 229 -+Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=2097152 -+Formatting 'TEST_DIR/d.IMGFMT', fmt=IMGFMT size=1048576 -+wrote 2097152/2097152 bytes at offset 0 -+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+{"return": {}} -+ -+=== Starting drive-mirror, causing error & stop === -+ -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "testdisk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "testdisk"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "testdisk", "operation": "write", "action": "stop"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "testdisk"}} -+ -+=== Force cancel job paused in error state === -+ -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "testdisk"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "testdisk", "operation": "write", "action": "stop"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "testdisk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "testdisk", "len": 2097152, "offset": 1048576, "speed": 0, "type": "mirror"}} -+*** done -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index f1059f6..23ab4d3 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -225,3 +225,4 @@ - 225 rw auto quick - 226 auto quick - 227 auto quick -+229 auto quick --- -1.8.3.1 - diff --git a/0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch b/0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch deleted file mode 100644 index 9776c47..0000000 --- a/0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch +++ /dev/null @@ -1,117 +0,0 @@ -From c0bedad9bd133c14096eeeae49877fbb9eb179c3 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Thu, 4 Oct 2018 10:31:31 +0100 -Subject: Revert "hw/acpi-build: build SRAT memory affinity structures for DIMM - devices" - -RH-Author: Igor Mammedov -Message-id: <1538649091-70517-1-git-send-email-imammedo@redhat.com> -Patchwork-id: 82373 -O-Subject: [RHEL8/virt-8.0.0 qemu-kvm PATCH] Revert "hw/acpi-build: build SRAT memory affinity structures for DIMM devices" -Bugzilla: 1609235 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Thomas Huth - -Since upstream commits - (0efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT entry size") - (dbb6da8ba7 "pc: acpi: revert back to 1 SRAT entry for hotpluggable area") -hasn't been backported to RHEL8, it's sufficient to revert commit - (848a1cc1e8 "hw/acpi-build: build SRAT memory affinity structures for DIMM devices") -for the result to match the current upstream state and fix the bug. - -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula - -Rebase notes (3.0.0): -- Replace hotplug_memory with device_memory in PCMachineState ---- - hw/i386/acpi-build.c | 65 ++++------------------------------------------------ - 1 file changed, 4 insertions(+), 61 deletions(-) - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index be9bdb5..f95516c 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -2254,64 +2254,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) - #define HOLE_640K_START (640 * KiB) - #define HOLE_640K_END (1 * MiB) - --static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base, -- uint64_t len, int default_node) --{ -- MemoryDeviceInfoList *info_list = qmp_memory_device_list(); -- MemoryDeviceInfoList *info; -- MemoryDeviceInfo *mi; -- PCDIMMDeviceInfo *di; -- uint64_t end = base + len, cur, size; -- bool is_nvdimm; -- AcpiSratMemoryAffinity *numamem; -- MemoryAffinityFlags flags; -- -- for (cur = base, info = info_list; -- cur < end; -- cur += size, info = info->next) { -- numamem = acpi_data_push(table_data, sizeof *numamem); -- -- if (!info) { -- /* -- * Entry is required for Windows to enable memory hotplug in OS -- * and for Linux to enable SWIOTLB when booted with less than -- * 4G of RAM. Windows works better if the entry sets proximity -- * to the highest NUMA node in the machine at the end of the -- * reserved space. -- * Memory devices may override proximity set by this entry, -- * providing _PXM method if necessary. -- */ -- build_srat_memory(numamem, end - 1, 1, default_node, -- MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); -- break; -- } -- -- mi = info->value; -- is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM); -- di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data; -- -- if (cur < di->addr) { -- build_srat_memory(numamem, cur, di->addr - cur, default_node, -- MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); -- numamem = acpi_data_push(table_data, sizeof *numamem); -- } -- -- size = di->size; -- -- flags = MEM_AFFINITY_ENABLED; -- if (di->hotpluggable) { -- flags |= MEM_AFFINITY_HOTPLUGGABLE; -- } -- if (is_nvdimm) { -- flags |= MEM_AFFINITY_NON_VOLATILE; -- } -- -- build_srat_memory(numamem, di->addr, size, di->node, flags); -- } -- -- qapi_free_MemoryDeviceInfoList(info_list); --} -- - static void - build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) - { -@@ -2418,9 +2360,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) - } - - if (hotplugabble_address_space_size) { -- build_srat_hotpluggable_memory(table_data, machine->device_memory->base, -- hotplugabble_address_space_size, -- pcms->numa_nodes - 1); -+ numamem = acpi_data_push(table_data, sizeof *numamem); -+ build_srat_memory(numamem, machine->device_memory->base, -+ hotplugabble_address_space_size, pcms->numa_nodes - 1, -+ MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); - } - - build_header(linker, table_data, --- -1.8.3.1 - diff --git a/0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch b/0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch deleted file mode 100644 index 3fc21dd..0000000 --- a/0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch +++ /dev/null @@ -1,48 +0,0 @@ -From c476cf6c76298803fe896eb7c597085af3b73c12 Mon Sep 17 00:00:00 2001 -From: Fam Zheng -Date: Tue, 9 Oct 2018 08:16:47 +0100 -Subject: aio-posix: Don't count ctx->notifier as progress when polling - -RH-Author: Fam Zheng -Message-id: <20181009081651.15463-2-famz@redhat.com> -Patchwork-id: 82454 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/5] aio-posix: Don't count ctx->notifier as progress when polling -Bugzilla: 1623085 -RH-Acked-by: Thomas Huth -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula - -BZ: 1623085 - -The same logic exists in fd polling. This change is especially important -to avoid busy loop once we limit aio_notify_accept() to blocking -aio_poll(). - -Cc: qemu-stable@nongnu.org -Signed-off-by: Fam Zheng -Message-Id: <20180809132259.18402-2-famz@redhat.com> -Signed-off-by: Fam Zheng -(cherry picked from commit 70232b5253a3c4e03ed1ac47ef9246a8ac66c6fa) -Signed-off-by: Fam Zheng -Signed-off-by: Danilo C. L. de Paula ---- - util/aio-posix.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/util/aio-posix.c b/util/aio-posix.c -index 118bf57..b5c7f46 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -494,7 +494,8 @@ static bool run_poll_handlers_once(AioContext *ctx) - QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { - if (!node->deleted && node->io_poll && - aio_node_check(ctx, node->is_external) && -- node->io_poll(node->opaque)) { -+ node->io_poll(node->opaque) && -+ node->opaque != &ctx->notifier) { - progress = true; - } - --- -1.8.3.1 - diff --git a/0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch b/0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch deleted file mode 100644 index 7b815ae..0000000 --- a/0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 1580d01151ceea428dc9a25dd3d83990a594e286 Mon Sep 17 00:00:00 2001 -From: Fam Zheng -Date: Tue, 9 Oct 2018 08:16:48 +0100 -Subject: aio: Do aio_notify_accept only during blocking aio_poll - -RH-Author: Fam Zheng -Message-id: <20181009081651.15463-3-famz@redhat.com> -Patchwork-id: 82450 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 2/5] aio: Do aio_notify_accept only during blocking aio_poll -Bugzilla: 1623085 -RH-Acked-by: Thomas Huth -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula - -BZ: 1623085 - -An aio_notify() pairs with an aio_notify_accept(). The former should -happen in the main thread or a vCPU thread, and the latter should be -done in the IOThread. - -There is one rare case that the main thread or vCPU thread may "steal" -the aio_notify() event just raised by itself, in bdrv_set_aio_context() -[1]. The sequence is like this: - - main thread IO Thread - =============================================================== - bdrv_drained_begin() - aio_disable_external(ctx) - aio_poll(ctx, true) - ctx->notify_me += 2 - ... - bdrv_drained_end() - ... - aio_notify() - ... - bdrv_set_aio_context() - aio_poll(ctx, false) -[1] aio_notify_accept(ctx) - ppoll() /* Hang! */ - -[1] is problematic. It will clear the ctx->notifier event so that -the blocked ppoll() will not return. - -(For the curious, this bug was noticed when booting a number of VMs -simultaneously in RHV. One or two of the VMs will hit this race -condition, making the VIRTIO device unresponsive to I/O commands. When -it hangs, Seabios is busy waiting for a read request to complete (read -MBR), right after initializing the virtio-blk-pci device, using 100% -guest CPU. See also https://bugzilla.redhat.com/show_bug.cgi?id=1562750 -for the original bug analysis.) - -aio_notify() only injects an event when ctx->notify_me is set, -correspondingly aio_notify_accept() is only useful when ctx->notify_me -_was_ set. Move the call to it into the "blocking" branch. This will -effectively skip [1] and fix the hang. - -Furthermore, blocking aio_poll is only allowed on home thread -(in_aio_context_home_thread), because otherwise two blocking -aio_poll()'s can steal each other's ctx->notifier event and cause -hanging just like described above. - -Cc: qemu-stable@nongnu.org -Suggested-by: Paolo Bonzini -Signed-off-by: Fam Zheng -Message-Id: <20180809132259.18402-3-famz@redhat.com> -Signed-off-by: Fam Zheng -(cherry picked from commit b37548fcd1b8ac2e88e185a395bef851f3fc4e65) -Signed-off-by: Fam Zheng -Signed-off-by: Danilo C. L. de Paula ---- - util/aio-posix.c | 4 ++-- - util/aio-win32.c | 3 ++- - 2 files changed, 4 insertions(+), 3 deletions(-) - -diff --git a/util/aio-posix.c b/util/aio-posix.c -index b5c7f46..b5c609b 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -591,6 +591,7 @@ bool aio_poll(AioContext *ctx, bool blocking) - * so disable the optimization now. - */ - if (blocking) { -+ assert(in_aio_context_home_thread(ctx)); - atomic_add(&ctx->notify_me, 2); - } - -@@ -633,6 +634,7 @@ bool aio_poll(AioContext *ctx, bool blocking) - - if (blocking) { - atomic_sub(&ctx->notify_me, 2); -+ aio_notify_accept(ctx); - } - - /* Adjust polling time */ -@@ -676,8 +678,6 @@ bool aio_poll(AioContext *ctx, bool blocking) - } - } - -- aio_notify_accept(ctx); -- - /* if we have any readable fds, dispatch event */ - if (ret > 0) { - for (i = 0; i < npfd; i++) { -diff --git a/util/aio-win32.c b/util/aio-win32.c -index e676a8d..c58957c 100644 ---- a/util/aio-win32.c -+++ b/util/aio-win32.c -@@ -373,11 +373,12 @@ bool aio_poll(AioContext *ctx, bool blocking) - ret = WaitForMultipleObjects(count, events, FALSE, timeout); - if (blocking) { - assert(first); -+ assert(in_aio_context_home_thread(ctx)); - atomic_sub(&ctx->notify_me, 2); -+ aio_notify_accept(ctx); - } - - if (first) { -- aio_notify_accept(ctx); - progress |= aio_bh_poll(ctx); - first = false; - } --- -1.8.3.1 - diff --git a/0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch b/0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch deleted file mode 100644 index b1c677f..0000000 --- a/0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 07bbb6779b2a628b3e83b5474be550009aae034d Mon Sep 17 00:00:00 2001 -From: Fam Zheng -Date: Tue, 9 Oct 2018 08:16:49 +0100 -Subject: aio-posix: fix concurrent access to poll_disable_cnt - -RH-Author: Fam Zheng -Message-id: <20181009081651.15463-4-famz@redhat.com> -Patchwork-id: 82452 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 3/5] aio-posix: fix concurrent access to poll_disable_cnt -Bugzilla: 1632622 -RH-Acked-by: Thomas Huth -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula - -From: Paolo Bonzini - -BZ: 1632622 - -It is valid for an aio_set_fd_handler to happen concurrently with -aio_poll. In that case, poll_disable_cnt can change under the heels -of aio_poll, and the assertion on poll_disable_cnt can fail in -run_poll_handlers. - -Therefore, this patch simply checks the counter on every polling -iteration. There are no particular needs for ordering, since the -polling loop is terminated anyway by aio_notify at the end of -aio_set_fd_handler. - -Signed-off-by: Paolo Bonzini -Message-Id: <20180912171040.1732-2-pbonzini@redhat.com> -Reviewed-by: Fam Zheng -Signed-off-by: Fam Zheng -(cherry picked from commit d7be5dd19c0df7f76e1b42f0c2cbbabefa1974cb) -Signed-off-by: Fam Zheng -Signed-off-by: Danilo C. L. de Paula ---- - util/aio-posix.c | 26 +++++++++++++++----------- - 1 file changed, 15 insertions(+), 11 deletions(-) - -diff --git a/util/aio-posix.c b/util/aio-posix.c -index b5c609b..9189033 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -211,6 +211,7 @@ void aio_set_fd_handler(AioContext *ctx, - AioHandler *node; - bool is_new = false; - bool deleted = false; -+ int poll_disable_change; - - qemu_lockcnt_lock(&ctx->list_lock); - -@@ -244,11 +245,9 @@ void aio_set_fd_handler(AioContext *ctx, - QLIST_REMOVE(node, node); - deleted = true; - } -- -- if (!node->io_poll) { -- ctx->poll_disable_cnt--; -- } -+ poll_disable_change = -!node->io_poll; - } else { -+ poll_disable_change = !io_poll - (node && !node->io_poll); - if (node == NULL) { - /* Alloc and insert if it's not already there */ - node = g_new0(AioHandler, 1); -@@ -257,10 +256,6 @@ void aio_set_fd_handler(AioContext *ctx, - - g_source_add_poll(&ctx->source, &node->pfd); - is_new = true; -- -- ctx->poll_disable_cnt += !io_poll; -- } else { -- ctx->poll_disable_cnt += !io_poll - !node->io_poll; - } - - /* Update handler with latest information */ -@@ -274,6 +269,15 @@ void aio_set_fd_handler(AioContext *ctx, - node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0); - } - -+ /* No need to order poll_disable_cnt writes against other updates; -+ * the counter is only used to avoid wasting time and latency on -+ * iterated polling when the system call will be ultimately necessary. -+ * Changing handlers is a rare event, and a little wasted polling until -+ * the aio_notify below is not an issue. -+ */ -+ atomic_set(&ctx->poll_disable_cnt, -+ atomic_read(&ctx->poll_disable_cnt) + poll_disable_change); -+ - aio_epoll_update(ctx, node, is_new); - qemu_lockcnt_unlock(&ctx->list_lock); - aio_notify(ctx); -@@ -525,7 +529,6 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) - - assert(ctx->notify_me); - assert(qemu_lockcnt_count(&ctx->list_lock) > 0); -- assert(ctx->poll_disable_cnt == 0); - - trace_run_poll_handlers_begin(ctx, max_ns); - -@@ -533,7 +536,8 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) - - do { - progress = run_poll_handlers_once(ctx); -- } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time); -+ } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time -+ && !atomic_read(&ctx->poll_disable_cnt)); - - trace_run_poll_handlers_end(ctx, progress); - -@@ -552,7 +556,7 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) - */ - static bool try_poll_mode(AioContext *ctx, bool blocking) - { -- if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) { -+ if (blocking && ctx->poll_max_ns && !atomic_read(&ctx->poll_disable_cnt)) { - /* See qemu_soonest_timeout() uint64_t hack */ - int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx), - (uint64_t)ctx->poll_ns); --- -1.8.3.1 - diff --git a/0067-aio-posix-compute-timeout-before-polling.patch b/0067-aio-posix-compute-timeout-before-polling.patch deleted file mode 100644 index 3a5c4fe..0000000 --- a/0067-aio-posix-compute-timeout-before-polling.patch +++ /dev/null @@ -1,186 +0,0 @@ -From 44bb29739a1cfa471447d6c5880e7527399b146f Mon Sep 17 00:00:00 2001 -From: Fam Zheng -Date: Tue, 9 Oct 2018 08:16:50 +0100 -Subject: aio-posix: compute timeout before polling - -RH-Author: Fam Zheng -Message-id: <20181009081651.15463-5-famz@redhat.com> -Patchwork-id: 82453 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 4/5] aio-posix: compute timeout before polling -Bugzilla: 1632622 -RH-Acked-by: Thomas Huth -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula - -From: Paolo Bonzini - -BZ: 1632622 - -This is a preparation for the next patch, and also a very small -optimization. Compute the timeout only once, before invoking -try_poll_mode, and adjust it in run_poll_handlers. The adjustment -is the polling time when polling fails, or zero (non-blocking) if -polling succeeds. - -Fixes: 70232b5253a3c4e03ed1ac47ef9246a8ac66c6fa -Signed-off-by: Paolo Bonzini -Message-Id: <20180912171040.1732-3-pbonzini@redhat.com> -Reviewed-by: Fam Zheng -Signed-off-by: Fam Zheng -(cherry picked from commit e30cffa04d52e35996569f1cfac111be19576bde) -Signed-off-by: Fam Zheng -Signed-off-by: Danilo C. L. de Paula ---- - util/aio-posix.c | 59 ++++++++++++++++++++++++++++++++----------------------- - util/trace-events | 4 ++-- - 2 files changed, 36 insertions(+), 27 deletions(-) - -diff --git a/util/aio-posix.c b/util/aio-posix.c -index 9189033..bb862e1 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -490,7 +490,7 @@ static void add_pollfd(AioHandler *node) - npfd++; - } - --static bool run_poll_handlers_once(AioContext *ctx) -+static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) - { - bool progress = false; - AioHandler *node; -@@ -500,6 +500,7 @@ static bool run_poll_handlers_once(AioContext *ctx) - aio_node_check(ctx, node->is_external) && - node->io_poll(node->opaque) && - node->opaque != &ctx->notifier) { -+ *timeout = 0; - progress = true; - } - -@@ -522,31 +523,38 @@ static bool run_poll_handlers_once(AioContext *ctx) - * - * Returns: true if progress was made, false otherwise - */ --static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) -+static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout) - { - bool progress; -- int64_t end_time; -+ int64_t start_time, elapsed_time; - - assert(ctx->notify_me); - assert(qemu_lockcnt_count(&ctx->list_lock) > 0); - -- trace_run_poll_handlers_begin(ctx, max_ns); -- -- end_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + max_ns; -+ trace_run_poll_handlers_begin(ctx, max_ns, *timeout); - -+ start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - do { -- progress = run_poll_handlers_once(ctx); -- } while (!progress && qemu_clock_get_ns(QEMU_CLOCK_REALTIME) < end_time -+ progress = run_poll_handlers_once(ctx, timeout); -+ elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time; -+ } while (!progress && elapsed_time < max_ns - && !atomic_read(&ctx->poll_disable_cnt)); - -- trace_run_poll_handlers_end(ctx, progress); -+ /* If time has passed with no successful polling, adjust *timeout to -+ * keep the same ending time. -+ */ -+ if (*timeout != -1) { -+ *timeout -= MIN(*timeout, elapsed_time); -+ } - -+ trace_run_poll_handlers_end(ctx, progress, *timeout); - return progress; - } - - /* try_poll_mode: - * @ctx: the AioContext -- * @blocking: busy polling is only attempted when blocking is true -+ * @timeout: timeout for blocking wait, computed by the caller and updated if -+ * polling succeeds. - * - * ctx->notify_me must be non-zero so this function can detect aio_notify(). - * -@@ -554,19 +562,16 @@ static bool run_poll_handlers(AioContext *ctx, int64_t max_ns) - * - * Returns: true if progress was made, false otherwise - */ --static bool try_poll_mode(AioContext *ctx, bool blocking) -+static bool try_poll_mode(AioContext *ctx, int64_t *timeout) - { -- if (blocking && ctx->poll_max_ns && !atomic_read(&ctx->poll_disable_cnt)) { -- /* See qemu_soonest_timeout() uint64_t hack */ -- int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx), -- (uint64_t)ctx->poll_ns); -+ /* See qemu_soonest_timeout() uint64_t hack */ -+ int64_t max_ns = MIN((uint64_t)*timeout, (uint64_t)ctx->poll_ns); - -- if (max_ns) { -- poll_set_started(ctx, true); -+ if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) { -+ poll_set_started(ctx, true); - -- if (run_poll_handlers(ctx, max_ns)) { -- return true; -- } -+ if (run_poll_handlers(ctx, max_ns, timeout)) { -+ return true; - } - } - -@@ -575,7 +580,7 @@ static bool try_poll_mode(AioContext *ctx, bool blocking) - /* Even if we don't run busy polling, try polling once in case it can make - * progress and the caller will be able to avoid ppoll(2)/epoll_wait(2). - */ -- return run_poll_handlers_once(ctx); -+ return run_poll_handlers_once(ctx, timeout); - } - - bool aio_poll(AioContext *ctx, bool blocking) -@@ -605,8 +610,14 @@ bool aio_poll(AioContext *ctx, bool blocking) - start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - } - -- progress = try_poll_mode(ctx, blocking); -- if (!progress) { -+ timeout = blocking ? aio_compute_timeout(ctx) : 0; -+ progress = try_poll_mode(ctx, &timeout); -+ assert(!(timeout && progress)); -+ -+ /* If polling is allowed, non-blocking aio_poll does not need the -+ * system call---a single round of run_poll_handlers_once suffices. -+ */ -+ if (timeout || atomic_read(&ctx->poll_disable_cnt)) { - assert(npfd == 0); - - /* fill pollfds */ -@@ -620,8 +631,6 @@ bool aio_poll(AioContext *ctx, bool blocking) - } - } - -- timeout = blocking ? aio_compute_timeout(ctx) : 0; -- - /* wait until next event */ - if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) { - AioHandler epoll_handler; -diff --git a/util/trace-events b/util/trace-events -index 4822434..79569b7 100644 ---- a/util/trace-events -+++ b/util/trace-events -@@ -1,8 +1,8 @@ - # See docs/devel/tracing.txt for syntax documentation. - - # util/aio-posix.c --run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64 --run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d" -+run_poll_handlers_begin(void *ctx, int64_t max_ns, int64_t timeout) "ctx %p max_ns %"PRId64 " timeout %"PRId64 -+run_poll_handlers_end(void *ctx, bool progress, int64_t timeout) "ctx %p progress %d new timeout %"PRId64 - poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 - poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64 - --- -1.8.3.1 - diff --git a/0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch b/0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch deleted file mode 100644 index cf50d42..0000000 --- a/0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch +++ /dev/null @@ -1,64 +0,0 @@ -From ea1db6ad3fcbcda2068d3aeb21c384d42004aaaf Mon Sep 17 00:00:00 2001 -From: Fam Zheng -Date: Tue, 9 Oct 2018 08:16:51 +0100 -Subject: aio-posix: do skip system call if ctx->notifier polling succeeds - -RH-Author: Fam Zheng -Message-id: <20181009081651.15463-6-famz@redhat.com> -Patchwork-id: 82449 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 5/5] aio-posix: do skip system call if ctx->notifier polling succeeds -Bugzilla: 1632622 -RH-Acked-by: Thomas Huth -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula - -From: Paolo Bonzini - -BZ: 1632622 - -Commit 70232b5253 ("aio-posix: Don't count ctx->notifier as progress when -2018-08-15), by not reporting progress, causes aio_poll to execute the -system call when polling succeeds because of ctx->notifier. This introduces -latency before the call to aio_bh_poll() and negates the advantages of -polling, unfortunately. - -The fix builds on the previous patch, separating the effect of polling on -the timeout from the progress reported to aio_poll(). ctx->notifier -does zero the timeout, causing the caller to skip the system call, -but it does not report progress, so that the bug fix of commit 70232b5253 -still stands. - -Fixes: 70232b5253a3c4e03ed1ac47ef9246a8ac66c6fa -Signed-off-by: Paolo Bonzini -Message-Id: <20180912171040.1732-4-pbonzini@redhat.com> -Reviewed-by: Fam Zheng -Signed-off-by: Fam Zheng -(cherry picked from commit cfeb35d6774b2e936046aa9923217818bd160299) -Signed-off-by: Fam Zheng -Signed-off-by: Danilo C. L. de Paula ---- - util/aio-posix.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/util/aio-posix.c b/util/aio-posix.c -index bb862e1..a959ff6 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -498,10 +498,11 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) - QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { - if (!node->deleted && node->io_poll && - aio_node_check(ctx, node->is_external) && -- node->io_poll(node->opaque) && -- node->opaque != &ctx->notifier) { -+ node->io_poll(node->opaque)) { - *timeout = 0; -- progress = true; -+ if (node->opaque != &ctx->notifier) { -+ progress = true; -+ } - } - - /* Caller handles freeing deleted nodes. Don't do it here. */ --- -1.8.3.1 - diff --git a/0069-linux-headers-update.patch b/0069-linux-headers-update.patch deleted file mode 100644 index a48ddbc..0000000 --- a/0069-linux-headers-update.patch +++ /dev/null @@ -1,202 +0,0 @@ -From 7e13447e23269939c3d1267a957187a60fef36e9 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 15 Oct 2018 10:19:26 +0100 -Subject: linux-headers: update - -RH-Author: Thomas Huth -Message-id: <1539598771-16223-2-git-send-email-thuth@redhat.com> -Patchwork-id: 82696 -O-Subject: [RHEL-8 qemu-kvm PATCH 1/6] linux-headers: update -Bugzilla: 1508142 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann - -From: Cornelia Huck - -Update to kvm/next commit dd5bd0a65ff6 ("Merge tag 'kvm-s390-next-4.20-1' -of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD") - -Signed-off-by: Cornelia Huck -(cherry picked from commit 8f3cd250a897213d39e621e3d824507b48158d42) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - linux-headers/linux/kvm.h - linux-headers/linux/vhost.h - (simple contextual conflicts due to some missing patches in downstream) - -Signed-off-by: Thomas Huth ---- - include/standard-headers/linux/input.h | 9 +++++---- - linux-headers/asm-arm/kvm.h | 13 +++++++++++++ - linux-headers/asm-arm64/kvm.h | 13 +++++++++++++ - linux-headers/asm-s390/kvm.h | 2 ++ - linux-headers/asm-x86/kvm.h | 1 + - linux-headers/linux/kvm.h | 3 +++ - linux-headers/linux/vfio.h | 2 ++ - linux-headers/linux/vhost.h | 8 ++++++++ - 8 files changed, 47 insertions(+), 4 deletions(-) - -diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h -index 6d6128c..c0ad9fc 100644 ---- a/include/standard-headers/linux/input.h -+++ b/include/standard-headers/linux/input.h -@@ -267,10 +267,11 @@ struct input_mask { - /* - * MT_TOOL types - */ --#define MT_TOOL_FINGER 0 --#define MT_TOOL_PEN 1 --#define MT_TOOL_PALM 2 --#define MT_TOOL_MAX 2 -+#define MT_TOOL_FINGER 0x00 -+#define MT_TOOL_PEN 0x01 -+#define MT_TOOL_PALM 0x02 -+#define MT_TOOL_DIAL 0x0a -+#define MT_TOOL_MAX 0x0f - - /* - * Values describing the status of a force-feedback effect -diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h -index 72aa226..e1f8b74 100644 ---- a/linux-headers/asm-arm/kvm.h -+++ b/linux-headers/asm-arm/kvm.h -@@ -27,6 +27,7 @@ - #define __KVM_HAVE_GUEST_DEBUG - #define __KVM_HAVE_IRQ_LINE - #define __KVM_HAVE_READONLY_MEM -+#define __KVM_HAVE_VCPU_EVENTS - - #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 - -@@ -125,6 +126,18 @@ struct kvm_sync_regs { - struct kvm_arch_memory_slot { - }; - -+/* for KVM_GET/SET_VCPU_EVENTS */ -+struct kvm_vcpu_events { -+ struct { -+ __u8 serror_pending; -+ __u8 serror_has_esr; -+ /* Align it to 8 bytes */ -+ __u8 pad[6]; -+ __u64 serror_esr; -+ } exception; -+ __u32 reserved[12]; -+}; -+ - /* If you need to interpret the index values, here is the key: */ - #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 - #define KVM_REG_ARM_COPROC_SHIFT 16 -diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h -index 99cb9ad..e6a98c1 100644 ---- a/linux-headers/asm-arm64/kvm.h -+++ b/linux-headers/asm-arm64/kvm.h -@@ -39,6 +39,7 @@ - #define __KVM_HAVE_GUEST_DEBUG - #define __KVM_HAVE_IRQ_LINE - #define __KVM_HAVE_READONLY_MEM -+#define __KVM_HAVE_VCPU_EVENTS - - #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 - -@@ -154,6 +155,18 @@ struct kvm_sync_regs { - struct kvm_arch_memory_slot { - }; - -+/* for KVM_GET/SET_VCPU_EVENTS */ -+struct kvm_vcpu_events { -+ struct { -+ __u8 serror_pending; -+ __u8 serror_has_esr; -+ /* Align it to 8 bytes */ -+ __u8 pad[6]; -+ __u64 serror_esr; -+ } exception; -+ __u32 reserved[12]; -+}; -+ - /* If you need to interpret the index values, here is the key: */ - #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 - #define KVM_REG_ARM_COPROC_SHIFT 16 -diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h -index 1ab9901..0265482 100644 ---- a/linux-headers/asm-s390/kvm.h -+++ b/linux-headers/asm-s390/kvm.h -@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc { - #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 - #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2 - #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3 -+#define KVM_S390_VM_CRYPTO_ENABLE_APIE 4 -+#define KVM_S390_VM_CRYPTO_DISABLE_APIE 5 - - /* kvm attributes for migration mode */ - #define KVM_S390_VM_MIGRATION_STOP 0 -diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h -index c535c2f..9bba973 100644 ---- a/linux-headers/asm-x86/kvm.h -+++ b/linux-headers/asm-x86/kvm.h -@@ -377,5 +377,6 @@ struct kvm_sync_regs { - - #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) - #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) -+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) - - #endif /* _ASM_X86_KVM_H */ -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index 2aae948..c4a5542 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -950,6 +950,9 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_HYPERV_EVENTFD 154 - #define KVM_CAP_HYPERV_TLBFLUSH 155 - #define KVM_CAP_S390_HPAGE_1M 156 -+#define KVM_CAP_NESTED_STATE 157 -+#define KVM_CAP_ARM_INJECT_SERROR_ESR 158 -+#define KVM_CAP_MSR_PLATFORM_INFO 159 - - #ifdef KVM_CAP_IRQ_ROUTING - -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index 3615a26..ceb6453 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -200,6 +200,7 @@ struct vfio_device_info { - #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ - #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ - #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ -+#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ - __u32 num_regions; /* Max region index + 1 */ - __u32 num_irqs; /* Max IRQ index + 1 */ - }; -@@ -215,6 +216,7 @@ struct vfio_device_info { - #define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" - #define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" - #define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" -+#define VFIO_DEVICE_API_AP_STRING "vfio-ap" - - /** - * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, -diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h -index e336395..3421624 100644 ---- a/linux-headers/linux/vhost.h -+++ b/linux-headers/linux/vhost.h -@@ -160,6 +160,14 @@ struct vhost_memory { - #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \ - struct vhost_vring_state) - -+/* Set or get vhost backend capability */ -+ -+/* Use message type V2 */ -+#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 -+ -+#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) -+#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) -+ - /* VHOST_NET specific defines */ - - /* Attach virtio net ring to a raw socket, or tap device. --- -1.8.3.1 - diff --git a/0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch b/0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch deleted file mode 100644 index dea147b..0000000 --- a/0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch +++ /dev/null @@ -1,148 +0,0 @@ -From 9ceba72eb99b073a86b0aa529154de3e06330720 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 15 Oct 2018 10:19:27 +0100 -Subject: s390x/cpumodel: Set up CPU model for AP device support - -RH-Author: Thomas Huth -Message-id: <1539598771-16223-3-git-send-email-thuth@redhat.com> -Patchwork-id: 82694 -O-Subject: [RHEL-8 qemu-kvm PATCH 2/6] s390x/cpumodel: Set up CPU model for AP device support -Bugzilla: 1508142 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann - -From: Tony Krowiak - -A new CPU model feature and two new CPU model facilities are -introduced to support AP devices for a KVM guest. - -CPU model features: - -1. The S390_FEAT_AP CPU model feature indicates whether AP - instructions are available to the guest. This feature will - be enabled only if the AP instructions are available on the - linux host as determined by the availability of the - KVM_S390_VM_CRYPTO_ENABLE_APIE VM attribute which is exposed - by KVM only if the AP instructions are available on the - host. - - This feature must be turned on from userspace to execute AP - instructions on the KVM guest. The QEMU command line to turn - this feature on looks something like this: - - qemu-system-s390x ... -cpu xxx,ap=on ... - - This feature will be supported for zEC12 and newer CPU models. - The feature will not be supported for older models because - there are few older systems on which to test and the older - crypto cards will be going out of service in the relatively - near future. - -CPU model facilities: - -1. The S390_FEAT_AP_QUERY_CONFIG_INFO feature indicates whether the - AP Query Configuration Information (QCI) facility is available - to the guest as determined by whether the facility is available - on the host. This feature will be exposed by KVM only if the - QCI facility is installed on the host. - -2. The S390_FEAT_AP_FACILITY_TEST feature indicates whether the AP - Facility Test (APFT) facility is available to the guest as - determined by whether the facility is available on the host. - This feature will be exposed by KVM only if APFT is installed - on the host. - -Signed-off-by: Tony Krowiak -Tested-by: Pierre Morel -Reviewed-by: David Hildenbrand -Reviewed-by: Halil Pasic -Reviewed-by: Christian Borntraeger -Tested-by: Christian Borntraeger -Message-Id: <20181010170309.12045-3-akrowiak@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit c5cd17afddda89376712b315a41ede96b034e4c2) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/cpu_features.c | 3 +++ - target/s390x/cpu_features_def.h | 3 +++ - target/s390x/cpu_models.c | 2 ++ - target/s390x/gen-features.c | 3 +++ - 4 files changed, 11 insertions(+) - -diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c -index e05e6aa..0fbee27 100644 ---- a/target/s390x/cpu_features.c -+++ b/target/s390x/cpu_features.c -@@ -40,8 +40,10 @@ static const S390FeatDef s390_features[] = { - FEAT_INIT("srs", S390_FEAT_TYPE_STFL, 9, "Sense-running-status facility"), - FEAT_INIT("csske", S390_FEAT_TYPE_STFL, 10, "Conditional-SSKE facility"), - FEAT_INIT("ctop", S390_FEAT_TYPE_STFL, 11, "Configuration-topology facility"), -+ FEAT_INIT("apqci", S390_FEAT_TYPE_STFL, 12, "Query AP Configuration Information facility"), - FEAT_INIT("ipter", S390_FEAT_TYPE_STFL, 13, "IPTE-range facility"), - FEAT_INIT("nonqks", S390_FEAT_TYPE_STFL, 14, "Nonquiescing key-setting facility"), -+ FEAT_INIT("apft", S390_FEAT_TYPE_STFL, 15, "AP Facilities Test facility"), - FEAT_INIT("etf2", S390_FEAT_TYPE_STFL, 16, "Extended-translation facility 2"), - FEAT_INIT("msa-base", S390_FEAT_TYPE_STFL, 17, "Message-security-assist facility (excluding subfunctions)"), - FEAT_INIT("ldisp", S390_FEAT_TYPE_STFL, 18, "Long-displacement facility"), -@@ -130,6 +132,7 @@ static const S390FeatDef s390_features[] = { - - FEAT_INIT_MISC("dateh2", "DAT-enhancement facility 2"), - FEAT_INIT_MISC("cmm", "Collaborative-memory-management facility"), -+ FEAT_INIT_MISC("ap", "AP instructions installed"), - - FEAT_INIT("plo-cl", S390_FEAT_TYPE_PLO, 0, "PLO Compare and load (32 bit in general registers)"), - FEAT_INIT("plo-clg", S390_FEAT_TYPE_PLO, 1, "PLO Compare and load (64 bit in parameter list)"), -diff --git a/target/s390x/cpu_features_def.h b/target/s390x/cpu_features_def.h -index ac2c947..5fc7e7b 100644 ---- a/target/s390x/cpu_features_def.h -+++ b/target/s390x/cpu_features_def.h -@@ -27,8 +27,10 @@ typedef enum { - S390_FEAT_SENSE_RUNNING_STATUS, - S390_FEAT_CONDITIONAL_SSKE, - S390_FEAT_CONFIGURATION_TOPOLOGY, -+ S390_FEAT_AP_QUERY_CONFIG_INFO, - S390_FEAT_IPTE_RANGE, - S390_FEAT_NONQ_KEY_SETTING, -+ S390_FEAT_AP_FACILITIES_TEST, - S390_FEAT_EXTENDED_TRANSLATION_2, - S390_FEAT_MSA, - S390_FEAT_LONG_DISPLACEMENT, -@@ -119,6 +121,7 @@ typedef enum { - /* Misc */ - S390_FEAT_DAT_ENH_2, - S390_FEAT_CMM, -+ S390_FEAT_AP, - - /* PLO */ - S390_FEAT_PLO_CL, -diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 9c469ff..a8722cd 100644 ---- a/target/s390x/cpu_models.c -+++ b/target/s390x/cpu_models.c -@@ -782,6 +782,8 @@ static void check_consistency(const S390CPUModel *model) - { S390_FEAT_PRNO_TRNG_QRTCR, S390_FEAT_MSA_EXT_5 }, - { S390_FEAT_PRNO_TRNG, S390_FEAT_MSA_EXT_5 }, - { S390_FEAT_SIE_KSS, S390_FEAT_SIE_F2 }, -+ { S390_FEAT_AP_QUERY_CONFIG_INFO, S390_FEAT_AP }, -+ { S390_FEAT_AP_FACILITIES_TEST, S390_FEAT_AP }, - }; - int i; - -diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c -index 5af042c..7302269 100644 ---- a/target/s390x/gen-features.c -+++ b/target/s390x/gen-features.c -@@ -447,6 +447,9 @@ static uint16_t full_GEN12_GA1[] = { - S390_FEAT_ADAPTER_INT_SUPPRESSION, - S390_FEAT_EDAT_2, - S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2, -+ S390_FEAT_AP_QUERY_CONFIG_INFO, -+ S390_FEAT_AP_FACILITIES_TEST, -+ S390_FEAT_AP, - }; - - static uint16_t full_GEN12_GA2[] = { --- -1.8.3.1 - diff --git a/0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch b/0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch deleted file mode 100644 index 47012cd..0000000 --- a/0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch +++ /dev/null @@ -1,89 +0,0 @@ -From ef6a15cefa04a4f29d0d800d17caa9a37c40b05c Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 15 Oct 2018 10:19:28 +0100 -Subject: s390x/kvm: enable AP instruction interpretation for guest - -RH-Author: Thomas Huth -Message-id: <1539598771-16223-4-git-send-email-thuth@redhat.com> -Patchwork-id: 82697 -O-Subject: [RHEL-8 qemu-kvm PATCH 3/6] s390x/kvm: enable AP instruction interpretation for guest -Bugzilla: 1508142 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann - -From: Tony Krowiak - -Let's use the KVM_SET_DEVICE_ATTR ioctl to enable hardware -interpretation of AP instructions executed on the guest. -If the S390_FEAT_AP feature is switched on for the guest, -AP instructions must be interpreted by default; otherwise, -they will be intercepted. - -This attribute setting may be overridden by a device. For example, -a device may want to provide AP instructions to the guest (i.e., -S390_FEAT_AP turned on), but it may want to emulate them. In this -case, the AP instructions executed on the guest must be -intercepted; so when the device is realized, it must disable -interpretation. - -Signed-off-by: Tony Krowiak -Tested-by: Pierre Morel -Reviewed-by: David Hildenbrand -Reviewed-by: Thomas Huth -Reviewed-by: Christian Borntraeger -Acked-by: Halil Pasic -Tested-by: Christian Borntraeger -Message-Id: <20181010170309.12045-4-akrowiak@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 1d7db85b61cb9888b8ed8c8923343b468405b7a0) -Signed-off-by: Danilo C. L. de Paula ---- - target/s390x/kvm.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 71d90f2..d25e2e2 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -2290,11 +2290,26 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) - error_setg(errp, "KVM: host CPU model could not be identified"); - return; - } -+ /* for now, we can only provide the AP feature with HW support */ -+ if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, -+ KVM_S390_VM_CRYPTO_ENABLE_APIE)) { -+ set_bit(S390_FEAT_AP, model->features); -+ } - /* strip of features that are not part of the maximum model */ - bitmap_and(model->features, model->features, model->def->full_feat, - S390_FEAT_MAX); - } - -+static void kvm_s390_configure_apie(bool interpret) -+{ -+ uint64_t attr = interpret ? KVM_S390_VM_CRYPTO_ENABLE_APIE : -+ KVM_S390_VM_CRYPTO_DISABLE_APIE; -+ -+ if (kvm_vm_check_attr(kvm_state, KVM_S390_VM_CRYPTO, attr)) { -+ kvm_s390_set_attr(attr); -+ } -+} -+ - void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) - { - struct kvm_s390_vm_cpu_processor prop = { -@@ -2352,6 +2367,10 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) - if (test_bit(S390_FEAT_CMM, model->features)) { - kvm_s390_enable_cmma(); - } -+ -+ if (test_bit(S390_FEAT_AP, model->features)) { -+ kvm_s390_configure_apie(true); -+ } - } - - void kvm_s390_restart_interrupt(S390CPU *cpu) --- -1.8.3.1 - diff --git a/0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch b/0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch deleted file mode 100644 index 9059681..0000000 --- a/0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch +++ /dev/null @@ -1,281 +0,0 @@ -From a57558fc97a82853d0c5e1e190297f7677598d5a Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 15 Oct 2018 10:19:29 +0100 -Subject: s390x/ap: base Adjunct Processor (AP) object model - -RH-Author: Thomas Huth -Message-id: <1539598771-16223-5-git-send-email-thuth@redhat.com> -Patchwork-id: 82695 -O-Subject: [RHEL-8 qemu-kvm PATCH 4/6] s390x/ap: base Adjunct Processor (AP) object model -Bugzilla: 1508142 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann - -From: Tony Krowiak - -Introduces the base object model for virtualizing AP devices. - -Signed-off-by: Tony Krowiak -Tested-by: Pierre Morel -Acked-by: David Hildenbrand -Reviewed-by: Thomas Huth -Reviewed-by: Halil Pasic -Tested-by: Christian Borntraeger -Message-Id: <20181010170309.12045-5-akrowiak@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit a51b31535a8ec13997de29b357f7cc1dcd8a7f9c) -Signed-off-by: Danilo C. L. de Paula ---- - MAINTAINERS | 12 +++++++ - hw/s390x/Makefile.objs | 2 ++ - hw/s390x/ap-bridge.c | 78 ++++++++++++++++++++++++++++++++++++++++++++ - hw/s390x/ap-device.c | 38 +++++++++++++++++++++ - hw/s390x/s390-virtio-ccw.c | 4 +++ - include/hw/s390x/ap-bridge.h | 19 +++++++++++ - include/hw/s390x/ap-device.h | 22 +++++++++++++ - 7 files changed, 175 insertions(+) - create mode 100644 hw/s390x/ap-bridge.c - create mode 100644 hw/s390x/ap-device.c - create mode 100644 include/hw/s390x/ap-bridge.h - create mode 100644 include/hw/s390x/ap-device.h - -diff --git a/MAINTAINERS b/MAINTAINERS -index 666e936..d5b3c18 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -1184,6 +1184,18 @@ F: include/hw/s390x/s390-ccw.h - T: git git://github.com/cohuck/qemu.git s390-next - L: qemu-s390x@nongnu.org - -+vfio-ap -+M: Christian Borntraeger -+M: Tony Krowiak -+M: Halil Pasic -+M: Pierre Morel -+S: Supported -+F: hw/s390x/ap-device.c -+F: hw/s390x/ap-bridge.c -+F: include/hw/s390x/ap-device.h -+F: include/hw/s390x/ap-bridge.h -+L: qemu-s390x@nongnu.org -+ - vhost - M: Michael S. Tsirkin - S: Supported -diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs -index 93282f7..add89b1 100644 ---- a/hw/s390x/Makefile.objs -+++ b/hw/s390x/Makefile.objs -@@ -20,3 +20,5 @@ obj-$(CONFIG_TCG) += tod-qemu.o - obj-$(CONFIG_KVM) += s390-skeys-kvm.o - obj-$(CONFIG_KVM) += s390-stattrib-kvm.o - obj-y += s390-ccw.o -+obj-y += ap-device.o -+obj-y += ap-bridge.o -diff --git a/hw/s390x/ap-bridge.c b/hw/s390x/ap-bridge.c -new file mode 100644 -index 0000000..3795d30 ---- /dev/null -+++ b/hw/s390x/ap-bridge.c -@@ -0,0 +1,78 @@ -+/* -+ * ap bridge -+ * -+ * Copyright 2018 IBM Corp. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+#include "qemu/osdep.h" -+#include "qapi/error.h" -+#include "hw/sysbus.h" -+#include "qemu/bitops.h" -+#include "hw/s390x/ap-bridge.h" -+#include "cpu.h" -+ -+static char *ap_bus_get_dev_path(DeviceState *dev) -+{ -+ /* at most one */ -+ return g_strdup_printf("/1"); -+} -+ -+static void ap_bus_class_init(ObjectClass *oc, void *data) -+{ -+ BusClass *k = BUS_CLASS(oc); -+ -+ k->get_dev_path = ap_bus_get_dev_path; -+ /* More than one ap device does not make sense */ -+ k->max_dev = 1; -+} -+ -+static const TypeInfo ap_bus_info = { -+ .name = TYPE_AP_BUS, -+ .parent = TYPE_BUS, -+ .instance_size = 0, -+ .class_init = ap_bus_class_init, -+}; -+ -+void s390_init_ap(void) -+{ -+ DeviceState *dev; -+ -+ /* If no AP instructions then no need for AP bridge */ -+ if (!s390_has_feat(S390_FEAT_AP)) { -+ return; -+ } -+ -+ /* Create bridge device */ -+ dev = qdev_create(NULL, TYPE_AP_BRIDGE); -+ object_property_add_child(qdev_get_machine(), TYPE_AP_BRIDGE, -+ OBJECT(dev), NULL); -+ qdev_init_nofail(dev); -+ -+ /* Create bus on bridge device */ -+ qbus_create(TYPE_AP_BUS, dev, TYPE_AP_BUS); -+ } -+ -+static void ap_bridge_class_init(ObjectClass *oc, void *data) -+{ -+ DeviceClass *dc = DEVICE_CLASS(oc); -+ -+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); -+} -+ -+static const TypeInfo ap_bridge_info = { -+ .name = TYPE_AP_BRIDGE, -+ .parent = TYPE_SYS_BUS_DEVICE, -+ .instance_size = 0, -+ .class_init = ap_bridge_class_init, -+}; -+ -+static void ap_register(void) -+{ -+ type_register_static(&ap_bridge_info); -+ type_register_static(&ap_bus_info); -+} -+ -+type_init(ap_register) -diff --git a/hw/s390x/ap-device.c b/hw/s390x/ap-device.c -new file mode 100644 -index 0000000..f5ac8db ---- /dev/null -+++ b/hw/s390x/ap-device.c -@@ -0,0 +1,38 @@ -+/* -+ * Adjunct Processor (AP) matrix device -+ * -+ * Copyright 2018 IBM Corp. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+#include "qemu/osdep.h" -+#include "qemu/module.h" -+#include "qapi/error.h" -+#include "hw/qdev.h" -+#include "hw/s390x/ap-device.h" -+ -+static void ap_class_init(ObjectClass *klass, void *data) -+{ -+ DeviceClass *dc = DEVICE_CLASS(klass); -+ -+ dc->desc = "AP device class"; -+ dc->hotpluggable = false; -+} -+ -+static const TypeInfo ap_device_info = { -+ .name = AP_DEVICE_TYPE, -+ .parent = TYPE_DEVICE, -+ .instance_size = sizeof(APDevice), -+ .class_size = sizeof(DeviceClass), -+ .class_init = ap_class_init, -+ .abstract = true, -+}; -+ -+static void ap_device_register(void) -+{ -+ type_register_static(&ap_device_info); -+} -+ -+type_init(ap_device_register) -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index cdf4558..a4b8b62 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -32,6 +32,7 @@ - #include "ipl.h" - #include "hw/s390x/s390-virtio-ccw.h" - #include "hw/s390x/css-bridge.h" -+#include "hw/s390x/ap-bridge.h" - #include "migration/register.h" - #include "cpu_models.h" - #include "hw/nmi.h" -@@ -263,6 +264,9 @@ static void ccw_init(MachineState *machine) - /* init the SIGP facility */ - s390_init_sigp(); - -+ /* create AP bridge and bus(es) */ -+ s390_init_ap(); -+ - /* get a BUS */ - css_bus = virtual_css_bus_init(); - s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline, -diff --git a/include/hw/s390x/ap-bridge.h b/include/hw/s390x/ap-bridge.h -new file mode 100644 -index 0000000..470e439 ---- /dev/null -+++ b/include/hw/s390x/ap-bridge.h -@@ -0,0 +1,19 @@ -+/* -+ * ap bridge -+ * -+ * Copyright 2018 IBM Corp. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+ -+#ifndef HW_S390X_AP_BRIDGE_H -+#define HW_S390X_AP_BRIDGE_H -+ -+#define TYPE_AP_BRIDGE "ap-bridge" -+#define TYPE_AP_BUS "ap-bus" -+ -+void s390_init_ap(void); -+ -+#endif -diff --git a/include/hw/s390x/ap-device.h b/include/hw/s390x/ap-device.h -new file mode 100644 -index 0000000..765e908 ---- /dev/null -+++ b/include/hw/s390x/ap-device.h -@@ -0,0 +1,22 @@ -+/* -+ * Adjunct Processor (AP) matrix device interfaces -+ * -+ * Copyright 2018 IBM Corp. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+#ifndef HW_S390X_AP_DEVICE_H -+#define HW_S390X_AP_DEVICE_H -+ -+#define AP_DEVICE_TYPE "ap-device" -+ -+typedef struct APDevice { -+ DeviceState parent_obj; -+} APDevice; -+ -+#define AP_DEVICE(obj) \ -+ OBJECT_CHECK(APDevice, (obj), AP_DEVICE_TYPE) -+ -+#endif /* HW_S390X_AP_DEVICE_H */ --- -1.8.3.1 - diff --git a/0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch b/0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch deleted file mode 100644 index d4e8441..0000000 --- a/0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch +++ /dev/null @@ -1,305 +0,0 @@ -From 9f3a3325bb6859b1d3b46818a7d5b75c5d609f32 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 15 Oct 2018 10:19:30 +0100 -Subject: s390x/vfio: ap: Introduce VFIO AP device - -RH-Author: Thomas Huth -Message-id: <1539598771-16223-6-git-send-email-thuth@redhat.com> -Patchwork-id: 82700 -O-Subject: [RHEL-8 qemu-kvm PATCH 5/6] s390x/vfio: ap: Introduce VFIO AP device -Bugzilla: 1508142 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann - -From: Tony Krowiak - -Introduces a VFIO based AP device. The device is defined via -the QEMU command line by specifying: - - -device vfio-ap,sysfsdev= - -There may be only one vfio-ap device configured for a guest. - -The mediated matrix device is created by the VFIO AP device -driver by writing a UUID to a sysfs attribute file (see -docs/vfio-ap.txt). The mediated matrix device will be named -after the UUID. Symbolic links to the $uuid are created in -many places, so the path to the mediated matrix device $uuid -can be specified in any of the following ways: - -/sys/devices/vfio_ap/matrix/$uuid -/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid -/sys/bus/mdev/devices/$uuid -/sys/bus/mdev/drivers/vfio_mdev/$uuid - -When the vfio-ap device is realized, it acquires and opens the -VFIO iommu group to which the mediated matrix device is -bound. This causes a VFIO group notification event to be -signaled. The vfio_ap device driver's group notification -handler will get called at which time the device driver -will configure the the AP devices to which the guest will -be granted access. - -Signed-off-by: Tony Krowiak -Tested-by: Pierre Morel -Acked-by: Halil Pasic -Tested-by: Pierre Morel -Tested-by: Christian Borntraeger -Message-Id: <20181010170309.12045-6-akrowiak@linux.ibm.com> -Reviewed-by: Thomas Huth -[CH: added missing g_free and device category] -Signed-off-by: Cornelia Huck - -(cherry picked from commit 2fe2942cd6ddad8ddd40fe5d16d67599c28959d7) -Signed-off-by: Danilo C. L. de Paula ---- - MAINTAINERS | 2 + - default-configs/s390x-softmmu.mak | 1 + - hw/vfio/Makefile.objs | 1 + - hw/vfio/ap.c | 181 ++++++++++++++++++++++++++++++++++++++ - include/hw/vfio/vfio-common.h | 1 + - 5 files changed, 186 insertions(+) - create mode 100644 hw/vfio/ap.c - -diff --git a/MAINTAINERS b/MAINTAINERS -index d5b3c18..f2fa1b8 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -88,6 +88,7 @@ F: hw/char/terminal3270.c - F: hw/intc/s390_flic.c - F: hw/intc/s390_flic_kvm.c - F: hw/s390x/ -+F: hw/vfio/ap.c - F: hw/vfio/ccw.c - F: hw/watchdog/wdt_diag288.c - F: include/hw/s390x/ -@@ -1194,6 +1195,7 @@ F: hw/s390x/ap-device.c - F: hw/s390x/ap-bridge.c - F: include/hw/s390x/ap-device.h - F: include/hw/s390x/ap-bridge.h -+F: hw/vfio/ap.c - L: qemu-s390x@nongnu.org - - vhost -diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index 8b2db3e..49a59fc 100644 ---- a/default-configs/s390x-softmmu.mak -+++ b/default-configs/s390x-softmmu.mak -@@ -8,3 +8,4 @@ CONFIG_S390_FLIC_KVM=$(CONFIG_KVM) - # Disabled for Red Hat Enterprise Linux: - # CONFIG_VFIO_CCW=$(CONFIG_LINUX) - CONFIG_WDT_DIAG288=y -+CONFIG_VFIO_AP=$(CONFIG_LINUX) -diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs -index d38205b..53b4cbe 100644 ---- a/hw/vfio/Makefile.objs -+++ b/hw/vfio/Makefile.objs -@@ -5,4 +5,5 @@ obj-$(CONFIG_VFIO_CCW) += ccw.o - obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o - obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o - obj-$(CONFIG_SOFTMMU) += spapr.o -+obj-$(CONFIG_VFIO_AP) += ap.o - endif -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -new file mode 100644 -index 0000000..3962bb7 ---- /dev/null -+++ b/hw/vfio/ap.c -@@ -0,0 +1,181 @@ -+/* -+ * VFIO based AP matrix device assignment -+ * -+ * Copyright 2018 IBM Corp. -+ * Author(s): Tony Krowiak -+ * Halil Pasic -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or (at -+ * your option) any later version. See the COPYING file in the top-level -+ * directory. -+ */ -+ -+#include -+#include -+#include "qemu/osdep.h" -+#include "qapi/error.h" -+#include "hw/sysbus.h" -+#include "hw/vfio/vfio.h" -+#include "hw/vfio/vfio-common.h" -+#include "hw/s390x/ap-device.h" -+#include "qemu/error-report.h" -+#include "qemu/queue.h" -+#include "qemu/option.h" -+#include "qemu/config-file.h" -+#include "cpu.h" -+#include "kvm_s390x.h" -+#include "sysemu/sysemu.h" -+#include "hw/s390x/ap-bridge.h" -+#include "exec/address-spaces.h" -+ -+#define VFIO_AP_DEVICE_TYPE "vfio-ap" -+ -+typedef struct VFIOAPDevice { -+ APDevice apdev; -+ VFIODevice vdev; -+} VFIOAPDevice; -+ -+#define VFIO_AP_DEVICE(obj) \ -+ OBJECT_CHECK(VFIOAPDevice, (obj), VFIO_AP_DEVICE_TYPE) -+ -+static void vfio_ap_compute_needs_reset(VFIODevice *vdev) -+{ -+ vdev->needs_reset = false; -+} -+ -+/* -+ * We don't need vfio_hot_reset_multi and vfio_eoi operations for -+ * vfio-ap device now. -+ */ -+struct VFIODeviceOps vfio_ap_ops = { -+ .vfio_compute_needs_reset = vfio_ap_compute_needs_reset, -+}; -+ -+static void vfio_ap_put_device(VFIOAPDevice *vapdev) -+{ -+ g_free(vapdev->vdev.name); -+ vfio_put_base_device(&vapdev->vdev); -+} -+ -+static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) -+{ -+ GError *gerror = NULL; -+ char *symlink, *group_path; -+ int groupid; -+ -+ symlink = g_strdup_printf("%s/iommu_group", vapdev->vdev.sysfsdev); -+ group_path = g_file_read_link(symlink, &gerror); -+ g_free(symlink); -+ -+ if (!group_path) { -+ error_setg(errp, "%s: no iommu_group found for %s: %s", -+ VFIO_AP_DEVICE_TYPE, vapdev->vdev.sysfsdev, gerror->message); -+ return NULL; -+ } -+ -+ if (sscanf(basename(group_path), "%d", &groupid) != 1) { -+ error_setg(errp, "vfio: failed to read %s", group_path); -+ g_free(group_path); -+ return NULL; -+ } -+ -+ g_free(group_path); -+ -+ return vfio_get_group(groupid, &address_space_memory, errp); -+} -+ -+static void vfio_ap_realize(DeviceState *dev, Error **errp) -+{ -+ int ret; -+ char *mdevid; -+ Error *local_err = NULL; -+ VFIOGroup *vfio_group; -+ APDevice *apdev = AP_DEVICE(dev); -+ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); -+ -+ vfio_group = vfio_ap_get_group(vapdev, &local_err); -+ if (!vfio_group) { -+ goto out_err; -+ } -+ -+ vapdev->vdev.ops = &vfio_ap_ops; -+ vapdev->vdev.type = VFIO_DEVICE_TYPE_AP; -+ mdevid = basename(vapdev->vdev.sysfsdev); -+ vapdev->vdev.name = g_strdup_printf("%s", mdevid); -+ vapdev->vdev.dev = dev; -+ -+ ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, &local_err); -+ if (ret) { -+ goto out_get_dev_err; -+ } -+ -+ return; -+ -+out_get_dev_err: -+ vfio_ap_put_device(vapdev); -+ vfio_put_group(vfio_group); -+out_err: -+ error_propagate(errp, local_err); -+} -+ -+static void vfio_ap_unrealize(DeviceState *dev, Error **errp) -+{ -+ APDevice *apdev = AP_DEVICE(dev); -+ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); -+ VFIOGroup *group = vapdev->vdev.group; -+ -+ vfio_ap_put_device(vapdev); -+ vfio_put_group(group); -+} -+ -+static Property vfio_ap_properties[] = { -+ DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev), -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ -+static void vfio_ap_reset(DeviceState *dev) -+{ -+ int ret; -+ APDevice *apdev = AP_DEVICE(dev); -+ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); -+ -+ ret = ioctl(vapdev->vdev.fd, VFIO_DEVICE_RESET); -+ if (ret) { -+ error_report("%s: failed to reset %s device: %s", __func__, -+ vapdev->vdev.name, strerror(ret)); -+ } -+} -+ -+static const VMStateDescription vfio_ap_vmstate = { -+ .name = VFIO_AP_DEVICE_TYPE, -+ .unmigratable = 1, -+}; -+ -+static void vfio_ap_class_init(ObjectClass *klass, void *data) -+{ -+ DeviceClass *dc = DEVICE_CLASS(klass); -+ -+ dc->props = vfio_ap_properties; -+ dc->vmsd = &vfio_ap_vmstate; -+ dc->desc = "VFIO-based AP device assignment"; -+ set_bit(DEVICE_CATEGORY_MISC, dc->categories); -+ dc->realize = vfio_ap_realize; -+ dc->unrealize = vfio_ap_unrealize; -+ dc->hotpluggable = false; -+ dc->reset = vfio_ap_reset; -+ dc->bus_type = TYPE_AP_BUS; -+} -+ -+static const TypeInfo vfio_ap_info = { -+ .name = VFIO_AP_DEVICE_TYPE, -+ .parent = AP_DEVICE_TYPE, -+ .instance_size = sizeof(VFIOAPDevice), -+ .class_init = vfio_ap_class_init, -+}; -+ -+static void vfio_ap_type_init(void) -+{ -+ type_register_static(&vfio_ap_info); -+} -+ -+type_init(vfio_ap_type_init) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index a903692..1389da4 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -37,6 +37,7 @@ enum { - VFIO_DEVICE_TYPE_PCI = 0, - VFIO_DEVICE_TYPE_PLATFORM = 1, - VFIO_DEVICE_TYPE_CCW = 2, -+ VFIO_DEVICE_TYPE_AP = 3, - }; - - typedef struct VFIOMmap { --- -1.8.3.1 - diff --git a/0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch b/0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch deleted file mode 100644 index 166fbc5..0000000 --- a/0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch +++ /dev/null @@ -1,889 +0,0 @@ -From 8f59c31a8b0c4cde4bc92126d7102c1be9da97d4 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 15 Oct 2018 10:19:31 +0100 -Subject: s390: doc: detailed specifications for AP virtualization - -RH-Author: Thomas Huth -Message-id: <1539598771-16223-7-git-send-email-thuth@redhat.com> -Patchwork-id: 82699 -O-Subject: [RHEL-8 qemu-kvm PATCH 6/6] s390: doc: detailed specifications for AP virtualization -Bugzilla: 1508142 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck -RH-Acked-by: Jens Freimann - -From: Tony Krowiak - -This patch provides documentation describing the AP architecture and -design concepts behind the virtualization of AP devices. It also -includes an example of how to configure AP devices for exclusive -use of KVM guests. - -Signed-off-by: Tony Krowiak -Reviewed-by: Pierre Morel -Tested-by: Pierre Morel -Tested-by: Christian Borntraeger -Message-Id: <20181010170309.12045-7-akrowiak@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 694a8d703bfe06226a0574f5ec4af17a2b7060ef) -Signed-off-by: Danilo C. L. de Paula ---- - MAINTAINERS | 2 + - docs/vfio-ap.txt | 825 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 827 insertions(+) - create mode 100644 docs/vfio-ap.txt - -diff --git a/MAINTAINERS b/MAINTAINERS -index f2fa1b8..fdbfc04 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -96,6 +96,7 @@ F: include/hw/watchdog/wdt_diag288.h - F: pc-bios/s390-ccw/ - F: pc-bios/s390-ccw.img - F: target/s390x/ -+F: docs/vfio-ap.txt - K: ^Subject:.*(?i)s390x? - T: git git://github.com/cohuck/qemu.git s390-next - L: qemu-s390x@nongnu.org -@@ -1196,6 +1197,7 @@ F: hw/s390x/ap-bridge.c - F: include/hw/s390x/ap-device.h - F: include/hw/s390x/ap-bridge.h - F: hw/vfio/ap.c -+F: docs/vfio-ap.txt - L: qemu-s390x@nongnu.org - - vhost -diff --git a/docs/vfio-ap.txt b/docs/vfio-ap.txt -new file mode 100644 -index 0000000..1233968 ---- /dev/null -+++ b/docs/vfio-ap.txt -@@ -0,0 +1,825 @@ -+Adjunct Processor (AP) Device -+============================= -+ -+Contents: -+========= -+* Introduction -+* AP Architectural Overview -+* Start Interpretive Execution (SIE) Instruction -+* AP Matrix Configuration on Linux Host -+* Starting a Linux Guest Configured with an AP Matrix -+* Example: Configure AP Matrices for Three Linux Guests -+ -+Introduction: -+============ -+The IBM Adjunct Processor (AP) Cryptographic Facility is comprised -+of three AP instructions and from 1 to 256 PCIe cryptographic adapter cards. -+These AP devices provide cryptographic functions to all CPUs assigned to a -+linux system running in an IBM Z system LPAR. -+ -+On s390x, AP adapter cards are exposed via the AP bus. This document -+describes how those cards may be made available to KVM guests using the -+VFIO mediated device framework. -+ -+AP Architectural Overview: -+========================= -+In order understand the terminology used in the rest of this document, let's -+start with some definitions: -+ -+* AP adapter -+ -+ An AP adapter is an IBM Z adapter card that can perform cryptographic -+ functions. There can be from 0 to 256 adapters assigned to an LPAR depending -+ on the machine model. Adapters assigned to the LPAR in which a linux host is -+ running will be available to the linux host. Each adapter is identified by a -+ number from 0 to 255; however, the maximum adapter number allowed is -+ determined by machine model. When installed, an AP adapter is accessed by -+ AP instructions executed by any CPU. -+ -+* AP domain -+ -+ An adapter is partitioned into domains. Each domain can be thought of as -+ a set of hardware registers for processing AP instructions. An adapter can -+ hold up to 256 domains; however, the maximum domain number allowed is -+ determined by machine model. Each domain is identified by a number from 0 to -+ 255. Domains can be further classified into two types: -+ -+ * Usage domains are domains that can be accessed directly to process AP -+ commands -+ -+ * Control domains are domains that are accessed indirectly by AP -+ commands sent to a usage domain to control or change the domain; for -+ example, to set a secure private key for the domain. -+ -+* AP Queue -+ -+ An AP queue is the means by which an AP command-request message is sent to an -+ AP usage domain inside a specific AP. An AP queue is identified by a tuple -+ comprised of an AP adapter ID (APID) and an AP queue index (APQI). The -+ APQI corresponds to a given usage domain number within the adapter. This tuple -+ forms an AP Queue Number (APQN) uniquely identifying an AP queue. AP -+ instructions include a field containing the APQN to identify the AP queue to -+ which the AP command-request message is to be sent for processing. -+ -+* AP Instructions: -+ -+ There are three AP instructions: -+ -+ * NQAP: to enqueue an AP command-request message to a queue -+ * DQAP: to dequeue an AP command-reply message from a queue -+ * PQAP: to administer the queues -+ -+ AP instructions identify the domain that is targeted to process the AP -+ command; this must be one of the usage domains. An AP command may modify a -+ domain that is not one of the usage domains, but the modified domain -+ must be one of the control domains. -+ -+Start Interpretive Execution (SIE) Instruction -+============================================== -+A KVM guest is started by executing the Start Interpretive Execution (SIE) -+instruction. The SIE state description is a control block that contains the -+state information for a KVM guest and is supplied as input to the SIE -+instruction. The SIE state description contains a satellite control block called -+the Crypto Control Block (CRYCB). The CRYCB contains three fields to identify -+the adapters, usage domains and control domains assigned to the KVM guest: -+ -+* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned -+ to the KVM guest. Each bit in the mask, from left to right, corresponds to -+ an APID from 0-255. If a bit is set, the corresponding adapter is valid for -+ use by the KVM guest. -+ -+* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains -+ assigned to the KVM guest. Each bit in the mask, from left to right, -+ corresponds to an AP queue index (APQI) from 0-255. If a bit is set, the -+ corresponding queue is valid for use by the KVM guest. -+ -+* The AP Domain Mask field is a bit mask that identifies the AP control domains -+ assigned to the KVM guest. The ADM bit mask controls which domains can be -+ changed by an AP command-request message sent to a usage domain from the -+ guest. Each bit in the mask, from left to right, corresponds to a domain from -+ 0-255. If a bit is set, the corresponding domain can be modified by an AP -+ command-request message sent to a usage domain. -+ -+If you recall from the description of an AP Queue, AP instructions include -+an APQN to identify the AP adapter and AP queue to which an AP command-request -+message is to be sent (NQAP and PQAP instructions), or from which a -+command-reply message is to be received (DQAP instruction). The validity of an -+APQN is defined by the matrix calculated from the APM and AQM; it is the -+cross product of all assigned adapter numbers (APM) with all assigned queue -+indexes (AQM). For example, if adapters 1 and 2 and usage domains 5 and 6 are -+assigned to a guest, the APQNs (1,5), (1,6), (2,5) and (2,6) will be valid for -+the guest. -+ -+The APQNs can provide secure key functionality - i.e., a private key is stored -+on the adapter card for each of its domains - so each APQN must be assigned to -+at most one guest or the linux host. -+ -+ Example 1: Valid configuration: -+ ------------------------------ -+ Guest1: adapters 1,2 domains 5,6 -+ Guest2: adapter 1,2 domain 7 -+ -+ This is valid because both guests have a unique set of APQNs: Guest1 has -+ APQNs (1,5), (1,6), (2,5) and (2,6); Guest2 has APQNs (1,7) and (2,7). -+ -+ Example 2: Valid configuration: -+ ------------------------------ -+ Guest1: adapters 1,2 domains 5,6 -+ Guest2: adapters 3,4 domains 5,6 -+ -+ This is also valid because both guests have a unique set of APQNs: -+ Guest1 has APQNs (1,5), (1,6), (2,5), (2,6); -+ Guest2 has APQNs (3,5), (3,6), (4,5), (4,6) -+ -+ Example 3: Invalid configuration: -+ -------------------------------- -+ Guest1: adapters 1,2 domains 5,6 -+ Guest2: adapter 1 domains 6,7 -+ -+ This is an invalid configuration because both guests have access to -+ APQN (1,6). -+ -+AP Matrix Configuration on Linux Host: -+===================================== -+A linux system is a guest of the LPAR in which it is running and has access to -+the AP resources configured for the LPAR. The LPAR's AP matrix is -+configured via its Activation Profile which can be edited on the HMC. When the -+linux system is started, the AP bus will detect the AP devices assigned to the -+LPAR and create the following in sysfs: -+ -+/sys/bus/ap -+... [devices] -+...... xx.yyyy -+...... ... -+...... cardxx -+...... ... -+ -+Where: -+ cardxx is AP adapter number xx (in hex) -+....xx.yyyy is an APQN with xx specifying the APID and yyyy specifying the -+ APQI -+ -+For example, if AP adapters 5 and 6 and domains 4, 71 (0x47), 171 (0xab) and -+255 (0xff) are configured for the LPAR, the sysfs representation on the linux -+host system would look like this: -+ -+/sys/bus/ap -+... [devices] -+...... 05.0004 -+...... 05.0047 -+...... 05.00ab -+...... 05.00ff -+...... 06.0004 -+...... 06.0047 -+...... 06.00ab -+...... 06.00ff -+...... card05 -+...... card06 -+ -+A set of default device drivers are also created to control each type of AP -+device that can be assigned to the LPAR on which a linux host is running: -+ -+/sys/bus/ap -+... [drivers] -+...... [cex2acard] for Crypto Express 2/3 accelerator cards -+...... [cex2aqueue] for AP queues served by Crypto Express 2/3 -+ accelerator cards -+...... [cex4card] for Crypto Express 4/5/6 accelerator and coprocessor -+ cards -+...... [cex4queue] for AP queues served by Crypto Express 4/5/6 -+ accelerator and coprocessor cards -+...... [pcixcccard] for Crypto Express 2/3 coprocessor cards -+...... [pcixccqueue] for AP queues served by Crypto Express 2/3 -+ coprocessor cards -+ -+Binding AP devices to device drivers -+------------------------------------ -+There are two sysfs files that specify bitmasks marking a subset of the APQN -+range as 'usable by the default AP queue device drivers' or 'not usable by the -+default device drivers' and thus available for use by the alternate device -+driver(s). The sysfs locations of the masks are: -+ -+ /sys/bus/ap/apmask -+ /sys/bus/ap/aqmask -+ -+ The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs -+ (APID). Each bit in the mask, from left to right (i.e., from most significant -+ to least significant bit in big endian order), corresponds to an APID from -+ 0-255. If a bit is set, the APID is marked as usable only by the default AP -+ queue device drivers; otherwise, the APID is usable by the vfio_ap -+ device driver. -+ -+ The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes -+ (APQI). Each bit in the mask, from left to right (i.e., from most significant -+ to least significant bit in big endian order), corresponds to an APQI from -+ 0-255. If a bit is set, the APQI is marked as usable only by the default AP -+ queue device drivers; otherwise, the APQI is usable by the vfio_ap device -+ driver. -+ -+ Take, for example, the following mask: -+ -+ 0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff -+ -+ It indicates: -+ -+ 1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6 -+ belong to the vfio_ap device driver's pool. -+ -+ The APQN of each AP queue device assigned to the linux host is checked by the -+ AP bus against the set of APQNs derived from the cross product of APIDs -+ and APQIs marked as usable only by the default AP queue device drivers. If a -+ match is detected, only the default AP queue device drivers will be probed; -+ otherwise, the vfio_ap device driver will be probed. -+ -+ By default, the two masks are set to reserve all APQNs for use by the default -+ AP queue device drivers. There are two ways the default masks can be changed: -+ -+ 1. The sysfs mask files can be edited by echoing a string into the -+ respective sysfs mask file in one of two formats: -+ -+ * An absolute hex string starting with 0x - like "0x12345678" - sets -+ the mask. If the given string is shorter than the mask, it is padded -+ with 0s on the right; for example, specifying a mask value of 0x41 is -+ the same as specifying: -+ -+ 0x4100000000000000000000000000000000000000000000000000000000000000 -+ -+ Keep in mind that the mask reads from left to right (i.e., most -+ significant to least significant bit in big endian order), so the mask -+ above identifies device numbers 1 and 7 (01000001). -+ -+ If the string is longer than the mask, the operation is terminated with -+ an error (EINVAL). -+ -+ * Individual bits in the mask can be switched on and off by specifying -+ each bit number to be switched in a comma separated list. Each bit -+ number string must be prepended with a ('+') or minus ('-') to indicate -+ the corresponding bit is to be switched on ('+') or off ('-'). Some -+ valid values are: -+ -+ "+0" switches bit 0 on -+ "-13" switches bit 13 off -+ "+0x41" switches bit 65 on -+ "-0xff" switches bit 255 off -+ -+ The following example: -+ +0,-6,+0x47,-0xf0 -+ -+ Switches bits 0 and 71 (0x47) on -+ Switches bits 6 and 240 (0xf0) off -+ -+ Note that the bits not specified in the list remain as they were before -+ the operation. -+ -+ 2. The masks can also be changed at boot time via parameters on the kernel -+ command line like this: -+ -+ ap.apmask=0xffff ap.aqmask=0x40 -+ -+ This would create the following masks: -+ -+ apmask: -+ 0xffff000000000000000000000000000000000000000000000000000000000000 -+ -+ aqmask: -+ 0x4000000000000000000000000000000000000000000000000000000000000000 -+ -+ Resulting in these two pools: -+ -+ default drivers pool: adapter 0-15, domain 1 -+ alternate drivers pool: adapter 16-255, domains 0, 2-255 -+ -+Configuring an AP matrix for a linux guest. -+------------------------------------------ -+The sysfs interfaces for configuring an AP matrix for a guest are built on the -+VFIO mediated device framework. To configure an AP matrix for a guest, a -+mediated matrix device must first be created for the /sys/devices/vfio_ap/matrix -+device. When the vfio_ap device driver is loaded, it registers with the VFIO -+mediated device framework. When the driver registers, the sysfs interfaces for -+creating mediated matrix devices is created: -+ -+/sys/devices -+... [vfio_ap] -+......[matrix] -+......... [mdev_supported_types] -+............ [vfio_ap-passthrough] -+............... create -+............... [devices] -+ -+A mediated AP matrix device is created by writing a UUID to the attribute file -+named 'create', for example: -+ -+ uuidgen > create -+ -+ or -+ -+ echo $uuid > create -+ -+When a mediated AP matrix device is created, a sysfs directory named after -+the UUID is created in the 'devices' subdirectory: -+ -+/sys/devices -+... [vfio_ap] -+......[matrix] -+......... [mdev_supported_types] -+............ [vfio_ap-passthrough] -+............... create -+............... [devices] -+.................. [$uuid] -+ -+There will also be three sets of attribute files created in the mediated -+matrix device's sysfs directory to configure an AP matrix for the -+KVM guest: -+ -+/sys/devices -+... [vfio_ap] -+......[matrix] -+......... [mdev_supported_types] -+............ [vfio_ap-passthrough] -+............... create -+............... [devices] -+.................. [$uuid] -+..................... assign_adapter -+..................... assign_control_domain -+..................... assign_domain -+..................... matrix -+..................... unassign_adapter -+..................... unassign_control_domain -+..................... unassign_domain -+ -+assign_adapter -+ To assign an AP adapter to the mediated matrix device, its APID is written -+ to the 'assign_adapter' file. This may be done multiple times to assign more -+ than one adapter. The APID may be specified using conventional semantics -+ as a decimal, hexadecimal, or octal number. For example, to assign adapters -+ 4, 5 and 16 to a mediated matrix device in decimal, hexadecimal and octal -+ respectively: -+ -+ echo 4 > assign_adapter -+ echo 0x5 > assign_adapter -+ echo 020 > assign_adapter -+ -+ In order to successfully assign an adapter: -+ -+ * The adapter number specified must represent a value from 0 up to the -+ maximum adapter number allowed by the machine model. If an adapter number -+ higher than the maximum is specified, the operation will terminate with -+ an error (ENODEV). -+ -+ * All APQNs that can be derived from the adapter ID being assigned and the -+ IDs of the previously assigned domains must be bound to the vfio_ap device -+ driver. If no domains have yet been assigned, then there must be at least -+ one APQN with the specified APID bound to the vfio_ap driver. If no such -+ APQNs are bound to the driver, the operation will terminate with an -+ error (EADDRNOTAVAIL). -+ -+ No APQN that can be derived from the adapter ID and the IDs of the -+ previously assigned domains can be assigned to another mediated matrix -+ device. If an APQN is assigned to another mediated matrix device, the -+ operation will terminate with an error (EADDRINUSE). -+ -+unassign_adapter -+ To unassign an AP adapter, its APID is written to the 'unassign_adapter' -+ file. This may also be done multiple times to unassign more than one adapter. -+ -+assign_domain -+ To assign a usage domain, the domain number is written into the -+ 'assign_domain' file. This may be done multiple times to assign more than one -+ usage domain. The domain number is specified using conventional semantics as -+ a decimal, hexadecimal, or octal number. For example, to assign usage domains -+ 4, 8, and 71 to a mediated matrix device in decimal, hexadecimal and octal -+ respectively: -+ -+ echo 4 > assign_domain -+ echo 0x8 > assign_domain -+ echo 0107 > assign_domain -+ -+ In order to successfully assign a domain: -+ -+ * The domain number specified must represent a value from 0 up to the -+ maximum domain number allowed by the machine model. If a domain number -+ higher than the maximum is specified, the operation will terminate with -+ an error (ENODEV). -+ -+ * All APQNs that can be derived from the domain ID being assigned and the IDs -+ of the previously assigned adapters must be bound to the vfio_ap device -+ driver. If no domains have yet been assigned, then there must be at least -+ one APQN with the specified APQI bound to the vfio_ap driver. If no such -+ APQNs are bound to the driver, the operation will terminate with an -+ error (EADDRNOTAVAIL). -+ -+ No APQN that can be derived from the domain ID being assigned and the IDs -+ of the previously assigned adapters can be assigned to another mediated -+ matrix device. If an APQN is assigned to another mediated matrix device, -+ the operation will terminate with an error (EADDRINUSE). -+ -+unassign_domain -+ To unassign a usage domain, the domain number is written into the -+ 'unassign_domain' file. This may be done multiple times to unassign more than -+ one usage domain. -+ -+assign_control_domain -+ To assign a control domain, the domain number is written into the -+ 'assign_control_domain' file. This may be done multiple times to -+ assign more than one control domain. The domain number may be specified using -+ conventional semantics as a decimal, hexadecimal, or octal number. For -+ example, to assign control domains 4, 8, and 71 to a mediated matrix device -+ in decimal, hexadecimal and octal respectively: -+ -+ echo 4 > assign_domain -+ echo 0x8 > assign_domain -+ echo 0107 > assign_domain -+ -+ In order to successfully assign a control domain, the domain number -+ specified must represent a value from 0 up to the maximum domain number -+ allowed by the machine model. If a control domain number higher than the -+ maximum is specified, the operation will terminate with an error (ENODEV). -+ -+unassign_control_domain -+ To unassign a control domain, the domain number is written into the -+ 'unassign_domain' file. This may be done multiple times to unassign more than -+ one control domain. -+ -+Notes: Hot plug/unplug is not currently supported for mediated AP matrix -+devices, so no changes to the AP matrix will be allowed while a guest using -+the mediated matrix device is running. Attempts to assign an adapter, -+domain or control domain will be rejected and an error (EBUSY) returned. -+ -+Starting a Linux Guest Configured with an AP Matrix: -+=================================================== -+To provide a mediated matrix device for use by a guest, the following option -+must be specified on the QEMU command line: -+ -+ -device vfio_ap,sysfsdev=$path-to-mdev -+ -+The sysfsdev parameter specifies the path to the mediated matrix device. -+There are a number of ways to specify this path: -+ -+/sys/devices/vfio_ap/matrix/$uuid -+/sys/bus/mdev/devices/$uuid -+/sys/bus/mdev/drivers/vfio_mdev/$uuid -+/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough/devices/$uuid -+ -+When the linux guest is started, the guest will open the mediated -+matrix device's file descriptor to get information about the mediated matrix -+device. The vfio_ap device driver will update the APM, AQM, and ADM fields in -+the guest's CRYCB with the adapter, usage domain and control domains assigned -+via the mediated matrix device's sysfs attribute files. Programs running on the -+linux guest will then: -+ -+1. Have direct access to the APQNs derived from the cross product of the AP -+ adapter numbers (APID) and queue indexes (APQI) specified in the APM and AQM -+ fields of the guests's CRYCB respectively. These APQNs identify the AP queues -+ that are valid for use by the guest; meaning, AP commands can be sent by the -+ guest to any of these queues for processing. -+ -+2. Have authorization to process AP commands to change a control domain -+ identified in the ADM field of the guest's CRYCB. The AP command must be sent -+ to a valid APQN (see 1 above). -+ -+CPU model features: -+ -+Three CPU model features are available for controlling guest access to AP -+facilities: -+ -+1. AP facilities feature -+ -+ The AP facilities feature indicates that AP facilities are installed on the -+ guest. This feature will be exposed for use only if the AP facilities -+ are installed on the host system. The feature is s390-specific and is -+ represented as a parameter of the -cpu option on the QEMU command line: -+ -+ qemu-system-s390x -cpu $model,ap=on|off -+ -+ Where: -+ -+ $model is the CPU model defined for the guest (defaults to the model of -+ the host system if not specified). -+ -+ ap=on|off indicates whether AP facilities are installed (on) or not -+ (off). The default for CPU models zEC12 or newer -+ is ap=on. AP facilities must be installed on the guest if a -+ vfio-ap device (-device vfio-ap,sysfsdev=$path) is configured -+ for the guest, or the guest will fail to start. -+ -+2. Query Configuration Information (QCI) facility -+ -+ The QCI facility is used by the AP bus running on the guest to query the -+ configuration of the AP facilities. This facility will be available -+ only if the QCI facility is installed on the host system. The feature is -+ s390-specific and is represented as a parameter of the -cpu option on the -+ QEMU command line: -+ -+ qemu-system-s390x -cpu $model,apqci=on|off -+ -+ Where: -+ -+ $model is the CPU model defined for the guest -+ -+ apqci=on|off indicates whether the QCI facility is installed (on) or -+ not (off). The default for CPU models zEC12 or newer -+ is apqci=on; for older models, QCI will not be installed. -+ -+ If QCI is installed (apqci=on) but AP facilities are not -+ (ap=off), an error message will be logged, but the guest -+ will be allowed to start. It makes no sense to have QCI -+ installed if the AP facilities are not; this is considered -+ an invalid configuration. -+ -+ If the QCI facility is not installed, APQNs with an APQI -+ greater than 15 will not be detected by the AP bus -+ running on the guest. -+ -+3. Adjunct Process Facility Test (APFT) facility -+ -+ The APFT facility is used by the AP bus running on the guest to test the -+ AP facilities available for a given AP queue. This facility will be available -+ only if the APFT facility is installed on the host system. The feature is -+ s390-specific and is represented as a parameter of the -cpu option on the -+ QEMU command line: -+ -+ qemu-system-s390x -cpu $model,apft=on|off -+ -+ Where: -+ -+ $model is the CPU model defined for the guest (defaults to the model of -+ the host system if not specified). -+ -+ apft=on|off indicates whether the APFT facility is installed (on) or -+ not (off). The default for CPU models zEC12 and -+ newer is apft=on for older models, APFT will not be -+ installed. -+ -+ If APFT is installed (apft=on) but AP facilities are not -+ (ap=off), an error message will be logged, but the guest -+ will be allowed to start. It makes no sense to have APFT -+ installed if the AP facilities are not; this is considered -+ an invalid configuration. -+ -+ It also makes no sense to turn APFT off because the AP bus -+ running on the guest will not detect CEX4 and newer devices -+ without it. Since only CEX4 and newer devices are supported -+ for guest usage, no AP devices can be made accessible to a -+ guest started without APFT installed. -+ -+Example: Configure AP Matrixes for Three Linux Guests: -+===================================================== -+Let's now provide an example to illustrate how KVM guests may be given -+access to AP facilities. For this example, we will show how to configure -+three guests such that executing the lszcrypt command on the guests would -+look like this: -+ -+Guest1 -+------ -+CARD.DOMAIN TYPE MODE -+------------------------------ -+05 CEX5C CCA-Coproc -+05.0004 CEX5C CCA-Coproc -+05.00ab CEX5C CCA-Coproc -+06 CEX5A Accelerator -+06.0004 CEX5A Accelerator -+06.00ab CEX5C CCA-Coproc -+ -+Guest2 -+------ -+CARD.DOMAIN TYPE MODE -+------------------------------ -+05 CEX5A Accelerator -+05.0047 CEX5A Accelerator -+05.00ff CEX5A Accelerator (5,4), (5,171), (6,4), (6,171), -+ -+Guest3 -+------ -+CARD.DOMAIN TYPE MODE -+------------------------------ -+06 CEX5A Accelerator -+06.0047 CEX5A Accelerator -+06.00ff CEX5A Accelerator -+ -+These are the steps: -+ -+1. Install the vfio_ap module on the linux host. The dependency chain for the -+ vfio_ap module is: -+ * iommu -+ * s390 -+ * zcrypt -+ * vfio -+ * vfio_mdev -+ * vfio_mdev_device -+ * KVM -+ -+ To build the vfio_ap module, the kernel build must be configured with the -+ following Kconfig elements selected: -+ * IOMMU_SUPPORT -+ * S390 -+ * ZCRYPT -+ * S390_AP_IOMMU -+ * VFIO -+ * VFIO_MDEV -+ * VFIO_MDEV_DEVICE -+ * KVM -+ -+ If using make menuconfig select the following to build the vfio_ap module: -+ -> Device Drivers -+ -> IOMMU Hardware Support -+ select S390 AP IOMMU Support -+ -> VFIO Non-Privileged userspace driver framework -+ -> Mediated device driver frramework -+ -> VFIO driver for Mediated devices -+ -> I/O subsystem -+ -> VFIO support for AP devices -+ -+2. Secure the AP queues to be used by the three guests so that the host can not -+ access them. To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, -+ 06.0004, 06.0047, 06.00ab, and 06.00ff for use by the vfio_ap device driver, -+ the corresponding APQNs must be removed from the default queue drivers pool -+ as follows: -+ -+ echo -5,-6 > /sys/bus/ap/apmask -+ -+ echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask -+ -+ This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, -+ 06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The -+ sysfs directory for the vfio_ap device driver will now contain symbolic links -+ to the AP queue devices bound to it: -+ -+ /sys/bus/ap -+ ... [drivers] -+ ...... [vfio_ap] -+ ......... [05.0004] -+ ......... [05.0047] -+ ......... [05.00ab] -+ ......... [05.00ff] -+ ......... [06.0004] -+ ......... [06.0047] -+ ......... [06.00ab] -+ ......... [06.00ff] -+ -+ Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later) -+ can be bound to the vfio_ap device driver. The reason for this is to -+ simplify the implementation by not needlessly complicating the design by -+ supporting older devices that will go out of service in the relatively near -+ future, and for which there are few older systems on which to test. -+ -+ The administrator, therefore, must take care to secure only AP queues that -+ can be bound to the vfio_ap device driver. The device type for a given AP -+ queue device can be read from the parent card's sysfs directory. For example, -+ to see the hardware type of the queue 05.0004: -+ -+ cat /sys/bus/ap/devices/card05/hwtype -+ -+ The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the -+ vfio_ap device driver. -+ -+3. Create the mediated devices needed to configure the AP matrixes for the -+ three guests and to provide an interface to the vfio_ap driver for -+ use by the guests: -+ -+ /sys/devices/vfio_ap/matrix/ -+ --- [mdev_supported_types] -+ ------ [vfio_ap-passthrough] (passthrough mediated matrix device type) -+ --------- create -+ --------- [devices] -+ -+ To create the mediated devices for the three guests: -+ -+ uuidgen > create -+ uuidgen > create -+ uuidgen > create -+ -+ or -+ -+ echo $uuid1 > create -+ echo $uuid2 > create -+ echo $uuid3 > create -+ -+ This will create three mediated devices in the [devices] subdirectory named -+ after the UUID used to create the mediated device. We'll call them $uuid1, -+ $uuid2 and $uuid3 and this is the sysfs directory structure after creation: -+ -+ /sys/devices/vfio_ap/matrix/ -+ --- [mdev_supported_types] -+ ------ [vfio_ap-passthrough] -+ --------- [devices] -+ ------------ [$uuid1] -+ --------------- assign_adapter -+ --------------- assign_control_domain -+ --------------- assign_domain -+ --------------- matrix -+ --------------- unassign_adapter -+ --------------- unassign_control_domain -+ --------------- unassign_domain -+ -+ ------------ [$uuid2] -+ --------------- assign_adapter -+ --------------- assign_control_domain -+ --------------- assign_domain -+ --------------- matrix -+ --------------- unassign_adapter -+ ----------------unassign_control_domain -+ ----------------unassign_domain -+ -+ ------------ [$uuid3] -+ --------------- assign_adapter -+ --------------- assign_control_domain -+ --------------- assign_domain -+ --------------- matrix -+ --------------- unassign_adapter -+ ----------------unassign_control_domain -+ ----------------unassign_domain -+ -+4. The administrator now needs to configure the matrixes for the mediated -+ devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3). -+ -+ This is how the matrix is configured for Guest1: -+ -+ echo 5 > assign_adapter -+ echo 6 > assign_adapter -+ echo 4 > assign_domain -+ echo 0xab > assign_domain -+ -+ Control domains can similarly be assigned using the assign_control_domain -+ sysfs file. -+ -+ If a mistake is made configuring an adapter, domain or control domain, -+ you can use the unassign_xxx interfaces to unassign the adapter, domain or -+ control domain. -+ -+ To display the matrix configuration for Guest1: -+ -+ cat matrix -+ -+ The output will display the APQNs in the format xx.yyyy, where xx is -+ the adapter number and yyyy is the domain number. The output for Guest1 -+ will look like this: -+ -+ 05.0004 -+ 05.00ab -+ 06.0004 -+ 06.00ab -+ -+ This is how the matrix is configured for Guest2: -+ -+ echo 5 > assign_adapter -+ echo 0x47 > assign_domain -+ echo 0xff > assign_domain -+ -+ This is how the matrix is configured for Guest3: -+ -+ echo 6 > assign_adapter -+ echo 0x47 > assign_domain -+ echo 0xff > assign_domain -+ -+5. Start Guest1: -+ -+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ -+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ... -+ -+7. Start Guest2: -+ -+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ -+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ... -+ -+7. Start Guest3: -+ -+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \ -+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ... -+ -+When the guest is shut down, the mediated matrix devices may be removed. -+ -+Using our example again, to remove the mediated matrix device $uuid1: -+ -+ /sys/devices/vfio_ap/matrix/ -+ --- [mdev_supported_types] -+ ------ [vfio_ap-passthrough] -+ --------- [devices] -+ ------------ [$uuid1] -+ --------------- remove -+ -+ -+ echo 1 > remove -+ -+ This will remove all of the mdev matrix device's sysfs structures including -+ the mdev device itself. To recreate and reconfigure the mdev matrix device, -+ all of the steps starting with step 3 will have to be performed again. Note -+ that the remove will fail if a guest using the mdev is still running. -+ -+ It is not necessary to remove an mdev matrix device, but one may want to -+ remove it if no guest will use it during the remaining lifetime of the linux -+ host. If the mdev matrix device is removed, one may want to also reconfigure -+ the pool of adapters and queues reserved for use by the default drivers. -+ -+Limitations -+=========== -+* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN -+ to the default drivers pool of a queue that is still assigned to a mediated -+ device in use by a guest. It is incumbent upon the administrator to -+ ensure there is no mediated device in use by a guest to which the APQN is -+ assigned lest the host be given access to the private data of the AP queue -+ device, such as a private key configured specifically for the guest. -+ -+* Dynamically modifying the AP matrix for a running guest (which would amount to -+ hot(un)plug of AP devices for the guest) is currently not supported -+ -+* Live guest migration is not supported for guests using AP devices. --- -1.8.3.1 - diff --git a/0075-vnc-call-sasl_server_init-only-when-required.patch b/0075-vnc-call-sasl_server_init-only-when-required.patch deleted file mode 100644 index 292688c..0000000 --- a/0075-vnc-call-sasl_server_init-only-when-required.patch +++ /dev/null @@ -1,89 +0,0 @@ -From dbf0257cf3587d5580765cbd2040f370820fb5e3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Tue, 2 Oct 2018 12:34:03 +0100 -Subject: vnc: call sasl_server_init() only when required -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20181002123403.20747-2-marcandre.lureau@redhat.com> -Patchwork-id: 82356 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/1] vnc: call sasl_server_init() only when required -Bugzilla: 1609327 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Thomas Huth -RH-Acked-by: Danilo de Paula - -VNC server is calling sasl_server_init() during startup of QEMU, even -if SASL auth has not been enabled. - -This may create undesirable warnings like "Could not find keytab file: -/etc/qemu/krb5.tab" when the user didn't configure SASL on host and -started VNC server. - -Instead, only initialize SASL when needed. Note that HMP/QMP "change -vnc" calls vnc_display_open() again, which will initialize SASL if -needed. - -Fix assignment in if condition, while touching this code. - -Related to: -https://bugzilla.redhat.com/show_bug.cgi?id=1609327 - -Signed-off-by: Marc-André Lureau -Reviewed-by: Daniel P. Berrangé -Message-id: 20180907063634.359-1-marcandre.lureau@redhat.com -Signed-off-by: Gerd Hoffmann - -(cherry picked from commit b5dc0d7d565048fcf2767060261d8385805aced1) - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1609327 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=18601393 -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - ui/vnc.c - Due to "qemu"->"qemu-kvm" rename. - -Signed-off-by: Marc-André Lureau ---- - ui/vnc.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/ui/vnc.c b/ui/vnc.c -index 050c421..b3fe7d7 100644 ---- a/ui/vnc.c -+++ b/ui/vnc.c -@@ -3878,9 +3878,6 @@ void vnc_display_open(const char *id, Error **errp) - bool reverse = false; - const char *credid; - bool sasl = false; --#ifdef CONFIG_VNC_SASL -- int saslErr; --#endif - int acl = 0; - int lock_key_sync = 1; - int key_delay_ms; -@@ -4054,10 +4051,14 @@ void vnc_display_open(const char *id, Error **errp) - trace_vnc_auth_init(vd, 1, vd->ws_auth, vd->ws_subauth); - - #ifdef CONFIG_VNC_SASL -- if ((saslErr = sasl_server_init(NULL, "qemu-kvm")) != SASL_OK) { -- error_setg(errp, "Failed to initialize SASL auth: %s", -- sasl_errstring(saslErr, NULL, NULL)); -- goto fail; -+ if (sasl) { -+ int saslErr = sasl_server_init(NULL, "qemu-kvm"); -+ -+ if (saslErr != SASL_OK) { -+ error_setg(errp, "Failed to initialize SASL auth: %s", -+ sasl_errstring(saslErr, NULL, NULL)); -+ goto fail; -+ } - } - #endif - vd->lock_key_sync = lock_key_sync; --- -1.8.3.1 - diff --git a/0076-nbd-server-fix-NBD_CMD_CACHE.patch b/0076-nbd-server-fix-NBD_CMD_CACHE.patch deleted file mode 100644 index 05c36f6..0000000 --- a/0076-nbd-server-fix-NBD_CMD_CACHE.patch +++ /dev/null @@ -1,52 +0,0 @@ -From c10de200e291af4a6a5cb41ac10e1ae7a2b9c5b2 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 10 Oct 2018 18:19:23 +0100 -Subject: nbd/server: fix NBD_CMD_CACHE - -RH-Author: John Snow -Message-id: <20181010181924.30470-2-jsnow@redhat.com> -Patchwork-id: 82576 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/2] nbd/server: fix NBD_CMD_CACHE -Bugzilla: 1636142 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula -RH-Acked-by: Thomas Huth - -From: Vladimir Sementsov-Ogievskiy - -We should not go to structured-read branch on CACHE command, fix that. - -Bug introduced in bc37b06a5cde24 "nbd/server: introduce NBD_CMD_CACHE" -with the whole feature and affects 3.0.0 release. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -CC: qemu-stable@nongnu.org -Message-Id: <20181003144738.70670-1-vsementsov@virtuozzo.com> -Reviewed-by: Eric Blake -[eblake: commit message typo fix] -Signed-off-by: Eric Blake -(cherry picked from commit 2f454defc23e1be78f2a96bad2877ce7829f61b4) -Signed-off-by: John Snow - -Signed-off-by: Danilo C. L. de Paula ---- - nbd/server.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/nbd/server.c b/nbd/server.c -index ea5fe0e..1ce3f44 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -2135,7 +2135,8 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request, - } - - if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) && -- request->len) { -+ request->len && request->type != NBD_CMD_CACHE) -+ { - return nbd_co_send_sparse_read(client, request->handle, request->from, - data, request->len, errp); - } --- -1.8.3.1 - diff --git a/0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch b/0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch deleted file mode 100644 index c876e37..0000000 --- a/0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 24022cbbfd2230d4781a079d1856e0315895c8ce Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 10 Oct 2018 18:19:24 +0100 -Subject: nbd: fix NBD_FLAG_SEND_CACHE value - -RH-Author: John Snow -Message-id: <20181010181924.30470-3-jsnow@redhat.com> -Patchwork-id: 82578 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 2/2] nbd: fix NBD_FLAG_SEND_CACHE value -Bugzilla: 1636142 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula -RH-Acked-by: Thomas Huth - -From: "Denis V. Lunev" - -Commit bc37b06a5 added NBD_CMD_CACHE support, but used the wrong value -for NBD_FLAG_SEND_CACHE flag for negotiation. That commit picked bit 8, -which had already been assigned by the NBD specification to mean -NBD_FLAG_CAN_MULTI_CONN, and which was already implemented in the -Linux kernel as a part of stable userspace-kernel API since 4.10: - -"bit 8, NBD_FLAG_CAN_MULTI_CONN: Indicates that the server operates -entirely without cache, or that the cache it uses is shared among all -connections to the given device. In particular, if this flag is -present, then the effects of NBD_CMD_FLUSH and NBD_CMD_FLAG_FUA -MUST be visible across all connections when the server sends its reply -to that command to the client. In the absense of this flag, clients -SHOULD NOT multiplex their commands over more than one connection to -the export. -... -bit 10, NBD_FLAG_SEND_CACHE: documents that the server understands -NBD_CMD_CACHE; however, note that server implementations exist -which support the command without advertising this bit, and -conversely that this bit does not guarantee that the command will -succeed or have an impact." - -Consequences: -- a client trying to use NBD_CMD_CACHE per the NBD spec will not -see the feature as available from a qemu 3.0 server (not fatal, -clients already have to be prepared for caching to not exist) -- a client accidentally coded to the qemu 3.0 bit value instead -of following the spec may interpret NBD_CMD_CACHE as being available -when it is not (probably not fatal, the spec says the server should -gracefully fail unknown commands, and that clients of NBD_CMD_CACHE -should be prepared for failure even when the feature is advertised); -such clients are unlikely (perhaps only in unreleased Virtuozzo code), -and will disappear over time -- a client prepared to use multiple connections based on -NBD_FLAG_CAN_MULTI_CONN may cause data corruption when it assumes -that caching is consistent when in reality qemu 3.0 did not have -a consistent cache. Partially mitigated by using read-only -connections (where nothing needs to be flushed, so caching is -indeed consistent) or when using qemu-nbd with the default -e 1 -(at most one client at a time); visible only when using -e 2 or -more for a writable export. - -Thus the commit fixes negotiation flag in QEMU according to the -specification. - -Signed-off-by: Denis V. Lunev -CC: Vladimir Sementsov-Ogievskiy -CC: Valery Vdovin -CC: Eric Blake -CC: Paolo Bonzini -CC: qemu-stable@nongnu.org -Message-Id: <20181004100313.4253-1-den@openvz.org> -Reviewed-by: Eric Blake -[eblake: enhance commit message, add defines for unimplemented flags] -Signed-off-by: Eric Blake -(cherry picked from commit df91328adab8490367776d2b21b35d790a606120) -Signed-off-by: John Snow - -Signed-off-by: Danilo C. L. de Paula ---- - include/block/nbd.h | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/include/block/nbd.h b/include/block/nbd.h -index 4638c83..a53b0cf 100644 ---- a/include/block/nbd.h -+++ b/include/block/nbd.h -@@ -135,7 +135,9 @@ typedef struct NBDExtent { - #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ - #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ - #define NBD_FLAG_SEND_DF (1 << 7) /* Send DF (Do not Fragment) */ --#define NBD_FLAG_SEND_CACHE (1 << 8) /* Send CACHE (prefetch) */ -+#define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Multi-client cache consistent */ -+#define NBD_FLAG_SEND_RESIZE (1 << 9) /* Send resize */ -+#define NBD_FLAG_SEND_CACHE (1 << 10) /* Send CACHE (prefetch) */ - - /* New-style handshake (global) flags, sent from server to client, and - control what will happen during handshake phase. */ --- -1.8.3.1 - diff --git a/0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch b/0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch deleted file mode 100644 index 0e324f3..0000000 --- a/0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch +++ /dev/null @@ -1,134 +0,0 @@ -From ca570895f9825c8ed6691bb520341ac9e07bac5a Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:21:52 +0100 -Subject: block/linux-aio: acquire AioContext before - qemu_laio_process_completions - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-14-kwolf@redhat.com> -Patchwork-id: 82603 -O-Subject: [RHEL-8 qemu-kvm PATCH 23/44] block/linux-aio: acquire AioContext before qemu_laio_process_completions -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -From: Sergio Lopez - -In qemu_laio_process_completions_and_submit, the AioContext is acquired -before the ioq_submit iteration and after qemu_laio_process_completions, -but the latter is not thread safe either. - -This change avoids a number of random crashes when the Main Thread and -an IO Thread collide processing completions for the same AioContext. -This is an example of such crash: - - - The IO Thread is trying to acquire the AioContext at aio_co_enter, - which evidences that it didn't lock it before: - -Thread 3 (Thread 0x7fdfd8bd8700 (LWP 36743)): - #0 0x00007fdfe0dd542d in __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135 - #1 0x00007fdfe0dd0de6 in _L_lock_870 () at /lib64/libpthread.so.0 - #2 0x00007fdfe0dd0cdf in __GI___pthread_mutex_lock (mutex=mutex@entry=0x5631fde0e6c0) - at ../nptl/pthread_mutex_lock.c:114 - #3 0x00005631fc0603a7 in qemu_mutex_lock_impl (mutex=0x5631fde0e6c0, file=0x5631fc23520f "util/async.c", line=511) at util/qemu-thread-posix.c:66 - #4 0x00005631fc05b558 in aio_co_enter (ctx=0x5631fde0e660, co=0x7fdfcc0c2b40) at util/async.c:493 - #5 0x00005631fc05b5ac in aio_co_wake (co=) at util/async.c:478 - #6 0x00005631fbfc51ad in qemu_laio_process_completion (laiocb=) at block/linux-aio.c:104 - #7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670) - at block/linux-aio.c:222 - #8 0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670) - at block/linux-aio.c:237 - #9 0x00005631fc05d978 in aio_dispatch_handlers (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:406 - #10 0x00005631fc05e3ea in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=true) - at util/aio-posix.c:693 - #11 0x00005631fbd7ad96 in iothread_run (opaque=0x5631fde0e1c0) at iothread.c:64 - #12 0x00007fdfe0dcee25 in start_thread (arg=0x7fdfd8bd8700) at pthread_create.c:308 - #13 0x00007fdfe0afc34d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 - - - The Main Thread is also processing completions from the same - AioContext, and crashes due to failed assertion at util/iov.c:78: - -Thread 1 (Thread 0x7fdfeb5eac80 (LWP 36740)): - #0 0x00007fdfe0a391f7 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56 - #1 0x00007fdfe0a3a8e8 in __GI_abort () at abort.c:90 - #2 0x00007fdfe0a32266 in __assert_fail_base (fmt=0x7fdfe0b84e68 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset") - at assert.c:92 - #3 0x00007fdfe0a32312 in __GI___assert_fail (assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset") at assert.c:101 - #4 0x00005631fc065287 in iov_memset (iov=, iov_cnt=, offset=, offset@entry=65536, fillc=fillc@entry=0, bytes=15515191315812405248) at util/iov.c:78 - #5 0x00005631fc065a63 in qemu_iovec_memset (qiov=, offset=offset@entry=65536, fillc=fillc@entry=0, bytes=) at util/iov.c:410 - #6 0x00005631fbfc5178 in qemu_laio_process_completion (laiocb=0x7fdd920df630) at block/linux-aio.c:88 - #7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670) - at block/linux-aio.c:222 - #8 0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670) - at block/linux-aio.c:237 - #9 0x00005631fbfc54ed in qemu_laio_poll_cb (opaque=) at block/linux-aio.c:272 - #10 0x00005631fc05d85e in run_poll_handlers_once (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:497 - #11 0x00005631fc05e2ca in aio_poll (blocking=false, ctx=0x5631fde0e660) at util/aio-posix.c:574 - #12 0x00005631fc05e2ca in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=false) - at util/aio-posix.c:604 - #13 0x00005631fbfcb8a3 in bdrv_do_drained_begin (ignore_parent=, recursive=, bs=) at block/io.c:273 - #14 0x00005631fbfcb8a3 in bdrv_do_drained_begin (bs=0x5631fe8b6200, recursive=, parent=0x0, ignore_bds_parents=, poll=) at block/io.c:390 - #15 0x00005631fbfbcd2e in blk_drain (blk=0x5631fe83ac80) at block/block-backend.c:1590 - #16 0x00005631fbfbe138 in blk_remove_bs (blk=blk@entry=0x5631fe83ac80) at block/block-backend.c:774 - #17 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:401 - #18 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:449 - #19 0x00005631fbfc9a69 in commit_complete (job=0x5631fe8b94b0, opaque=0x7fdfcc1bb080) - at block/commit.c:92 - #20 0x00005631fbf7d662 in job_defer_to_main_loop_bh (opaque=0x7fdfcc1b4560) at job.c:973 - #21 0x00005631fc05ad41 in aio_bh_poll (bh=0x7fdfcc01ad90) at util/async.c:90 - #22 0x00005631fc05ad41 in aio_bh_poll (ctx=ctx@entry=0x5631fddffdb0) at util/async.c:118 - #23 0x00005631fc05e210 in aio_dispatch (ctx=0x5631fddffdb0) at util/aio-posix.c:436 - #24 0x00005631fc05ac1e in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:261 - #25 0x00007fdfeaae44c9 in g_main_context_dispatch (context=0x5631fde00140) at gmain.c:3201 - #26 0x00007fdfeaae44c9 in g_main_context_dispatch (context=context@entry=0x5631fde00140) at gmain.c:3854 - #27 0x00005631fc05d503 in main_loop_wait () at util/main-loop.c:215 - #28 0x00005631fc05d503 in main_loop_wait (timeout=) at util/main-loop.c:238 - #29 0x00005631fc05d503 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497 - #30 0x00005631fbd81412 in main_loop () at vl.c:1866 - #31 0x00005631fbc18ff3 in main (argc=, argv=, envp=) - at vl.c:4647 - - - A closer examination shows that s->io_q.in_flight appears to have - gone backwards: - -(gdb) frame 7 - #7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670) - at block/linux-aio.c:222 -222 qemu_laio_process_completion(laiocb); -(gdb) p s -$2 = (LinuxAioState *) 0x7fdfc0297670 -(gdb) p *s -$3 = {aio_context = 0x5631fde0e660, ctx = 0x7fdfeb43b000, e = {rfd = 33, wfd = 33}, io_q = {plugged = 0, - in_queue = 0, in_flight = 4294967280, blocked = false, pending = {sqh_first = 0x0, - sqh_last = 0x7fdfc0297698}}, completion_bh = 0x7fdfc0280ef0, event_idx = 21, event_max = 241} -(gdb) p/x s->io_q.in_flight -$4 = 0xfffffff0 - -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit e091f0e905a4481f347913420f327d427f18d9d4) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/linux-aio.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/linux-aio.c b/block/linux-aio.c -index 19eb922..217ce60 100644 ---- a/block/linux-aio.c -+++ b/block/linux-aio.c -@@ -234,9 +234,9 @@ static void qemu_laio_process_completions(LinuxAioState *s) - - static void qemu_laio_process_completions_and_submit(LinuxAioState *s) - { -+ aio_context_acquire(s->aio_context); - qemu_laio_process_completions(s); - -- aio_context_acquire(s->aio_context); - if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { - ioq_submit(s); - } --- -1.8.3.1 - diff --git a/0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch b/0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch deleted file mode 100644 index 7288227..0000000 --- a/0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch +++ /dev/null @@ -1,78 +0,0 @@ -From faa3d5106cb296858227cc240e045ca16cb28c81 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:21:53 +0100 -Subject: util/async: use qemu_aio_coroutine_enter in co_schedule_bh_cb - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-15-kwolf@redhat.com> -Patchwork-id: 82604 -O-Subject: [RHEL-8 qemu-kvm PATCH 24/44] util/async: use qemu_aio_coroutine_enter in co_schedule_bh_cb -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -From: Sergio Lopez - -AIO Coroutines shouldn't by managed by an AioContext different than the -one assigned when they are created. aio_co_enter avoids entering a -coroutine from a different AioContext, calling aio_co_schedule instead. - -Scheduled coroutines are then entered by co_schedule_bh_cb using -qemu_coroutine_enter, which just calls qemu_aio_coroutine_enter with the -current AioContext obtained with qemu_get_current_aio_context. -Eventually, co->ctx will be set to the AioContext passed as an argument -to qemu_aio_coroutine_enter. - -This means that, if an IO Thread's AioConext is being processed by the -Main Thread (due to aio_poll being called with a BDS AioContext, as it -happens in AIO_WAIT_WHILE among other places), the AioContext from some -coroutines may be wrongly replaced with the one from the Main Thread. - -This is the root cause behind some crashes, mainly triggered by the -drain code at block/io.c. The most common are these abort and failed -assertion: - -util/async.c:aio_co_schedule -456 if (scheduled) { -457 fprintf(stderr, -458 "%s: Co-routine was already scheduled in '%s'\n", -459 __func__, scheduled); -460 abort(); -461 } - -util/qemu-coroutine-lock.c: -286 assert(mutex->holder == self); - -But it's also known to cause random errors at different locations, and -even SIGSEGV with broken coroutine backtraces. - -By using qemu_aio_coroutine_enter directly in co_schedule_bh_cb, we can -pass the correct AioContext as an argument, making sure co->ctx is not -wrongly altered. - -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 6808ae0417131f8dbe7b051256dff7a16634dc1d) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - util/async.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/util/async.c b/util/async.c -index 05979f8..c10642a 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -400,7 +400,7 @@ static void co_schedule_bh_cb(void *opaque) - - /* Protected by write barrier in qemu_aio_coroutine_enter */ - atomic_set(&co->scheduled, NULL); -- qemu_coroutine_enter(co); -+ qemu_aio_coroutine_enter(ctx, co); - aio_context_release(ctx); - } - } --- -1.8.3.1 - diff --git a/0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch b/0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch deleted file mode 100644 index 568e097..0000000 --- a/0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch +++ /dev/null @@ -1,105 +0,0 @@ -From f78998e365809f77ed146ee2afdcf132b12c838c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:21:54 +0100 -Subject: job: Fix nested aio_poll() hanging in job_txn_apply - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-16-kwolf@redhat.com> -Patchwork-id: 82605 -O-Subject: [RHEL-8 qemu-kvm PATCH 25/44] job: Fix nested aio_poll() hanging in job_txn_apply -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -From: Fam Zheng - -All callers have acquired ctx already. Doing that again results in -aio_poll() hang. This fixes the problem that a BDRV_POLL_WHILE() in the -callback cannot make progress because ctx is recursively locked, for -example, when drive-backup finishes. - -There are two callers of job_finalize(): - - fam@lemon:~/work/qemu [master]$ git grep -w -A1 '^\s*job_finalize' - blockdev.c: job_finalize(&job->job, errp); - blockdev.c- aio_context_release(aio_context); - -- - job-qmp.c: job_finalize(job, errp); - job-qmp.c- aio_context_release(aio_context); - -- - tests/test-blockjob.c: job_finalize(&job->job, &error_abort); - tests/test-blockjob.c- assert(job->job.status == JOB_STATUS_CONCLUDED); - -Ignoring the test, it's easy to see both callers to job_finalize (and -job_do_finalize) have acquired the context. - -Cc: qemu-stable@nongnu.org -Reported-by: Gu Nini -Reviewed-by: Eric Blake -Signed-off-by: Fam Zheng -Signed-off-by: Kevin Wolf -(cherry picked from commit 49880165a44f26dc84651858750facdee31f2513) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - job.c | 18 +++++------------- - 1 file changed, 5 insertions(+), 13 deletions(-) - -diff --git a/job.c b/job.c -index bb322de..82b4692 100644 ---- a/job.c -+++ b/job.c -@@ -136,21 +136,13 @@ static void job_txn_del_job(Job *job) - } - } - --static int job_txn_apply(JobTxn *txn, int fn(Job *), bool lock) -+static int job_txn_apply(JobTxn *txn, int fn(Job *)) - { -- AioContext *ctx; - Job *job, *next; - int rc = 0; - - QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { -- if (lock) { -- ctx = job->aio_context; -- aio_context_acquire(ctx); -- } - rc = fn(job); -- if (lock) { -- aio_context_release(ctx); -- } - if (rc) { - break; - } -@@ -780,11 +772,11 @@ static void job_do_finalize(Job *job) - assert(job && job->txn); - - /* prepare the transaction to complete */ -- rc = job_txn_apply(job->txn, job_prepare, true); -+ rc = job_txn_apply(job->txn, job_prepare); - if (rc) { - job_completed_txn_abort(job); - } else { -- job_txn_apply(job->txn, job_finalize_single, true); -+ job_txn_apply(job->txn, job_finalize_single); - } - } - -@@ -830,10 +822,10 @@ static void job_completed_txn_success(Job *job) - assert(other_job->ret == 0); - } - -- job_txn_apply(txn, job_transition_to_pending, false); -+ job_txn_apply(txn, job_transition_to_pending); - - /* If no jobs need manual finalization, automatically do so */ -- if (job_txn_apply(txn, job_needs_finalize, false) == 0) { -+ if (job_txn_apply(txn, job_needs_finalize) == 0) { - job_do_finalize(job); - } - } --- -1.8.3.1 - diff --git a/0081-job-Fix-missing-locking-due-to-mismerge.patch b/0081-job-Fix-missing-locking-due-to-mismerge.patch deleted file mode 100644 index 76074d2..0000000 --- a/0081-job-Fix-missing-locking-due-to-mismerge.patch +++ /dev/null @@ -1,55 +0,0 @@ -From bb58f00a6c09bd1fe9af6dabe9ea173adc406d7b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:21:55 +0100 -Subject: job: Fix missing locking due to mismerge - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-17-kwolf@redhat.com> -Patchwork-id: 82607 -O-Subject: [RHEL-8 qemu-kvm PATCH 26/44] job: Fix missing locking due to mismerge -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -job_completed() had a problem with double locking that was recently -fixed independently by two different commits: - -"job: Fix nested aio_poll() hanging in job_txn_apply" -"jobs: add exit shim" - -One fix removed the first aio_context_acquire(), the other fix removed -the other one. Now we have a bug again and the code is run without any -locking. - -Add it back in one of the places. - -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -Reviewed-by: John Snow -(cherry picked from commit d1756c780b7879fb64e41135feac781d84a1f995) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - job.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/job.c b/job.c -index 82b4692..5c4e84f 100644 ---- a/job.c -+++ b/job.c -@@ -847,7 +847,11 @@ static void job_completed(Job *job) - static void job_exit(void *opaque) - { - Job *job = (Job *)opaque; -+ AioContext *ctx = job->aio_context; -+ -+ aio_context_acquire(ctx); - job_completed(job); -+ aio_context_release(ctx); - } - - /** --- -1.8.3.1 - diff --git a/0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch b/0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch deleted file mode 100644 index f51df38..0000000 --- a/0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch +++ /dev/null @@ -1,161 +0,0 @@ -From ac751d8909fa4b734fab48e27c0213df48ffd76b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:21:56 +0100 -Subject: blockjob: Wake up BDS when job becomes idle - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-18-kwolf@redhat.com> -Patchwork-id: 82610 -O-Subject: [RHEL-8 qemu-kvm PATCH 27/44] blockjob: Wake up BDS when job becomes idle -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -In the context of draining a BDS, the .drained_poll callback of block -jobs is called. If this returns true (i.e. there is still some activity -pending), the drain operation may call aio_poll() with blocking=true to -wait for completion. - -As soon as the pending activity is completed and the job finally arrives -in a quiescent state (i.e. its coroutine either yields with busy=false -or terminates), the block job must notify the aio_poll() loop to wake -up, otherwise we get a deadlock if both are running in different -threads. - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -Reviewed-by: Max Reitz -(cherry picked from commit 34dc97b9a0e592bc466bdb0bbfe45d77304a72b6) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - blockjob.c | 18 ++++++++++++++++++ - include/block/blockjob.h | 13 +++++++++++++ - include/qemu/job.h | 3 +++ - job.c | 7 +++++++ - 4 files changed, 41 insertions(+) - -diff --git a/blockjob.c b/blockjob.c -index be5903a..8d27e8e 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -221,6 +221,22 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, - return 0; - } - -+void block_job_wakeup_all_bdrv(BlockJob *job) -+{ -+ GSList *l; -+ -+ for (l = job->nodes; l; l = l->next) { -+ BdrvChild *c = l->data; -+ bdrv_wakeup(c->bs); -+ } -+} -+ -+static void block_job_on_idle(Notifier *n, void *opaque) -+{ -+ BlockJob *job = opaque; -+ block_job_wakeup_all_bdrv(job); -+} -+ - bool block_job_is_internal(BlockJob *job) - { - return (job->job.id == NULL); -@@ -419,6 +435,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - job->finalize_completed_notifier.notify = block_job_event_completed; - job->pending_notifier.notify = block_job_event_pending; - job->ready_notifier.notify = block_job_event_ready; -+ job->idle_notifier.notify = block_job_on_idle; - - notifier_list_add(&job->job.on_finalize_cancelled, - &job->finalize_cancelled_notifier); -@@ -426,6 +443,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - &job->finalize_completed_notifier); - notifier_list_add(&job->job.on_pending, &job->pending_notifier); - notifier_list_add(&job->job.on_ready, &job->ready_notifier); -+ notifier_list_add(&job->job.on_idle, &job->idle_notifier); - - error_setg(&job->blocker, "block device is in use by block job: %s", - job_type_str(&job->job)); -diff --git a/include/block/blockjob.h b/include/block/blockjob.h -index 32c00b7..2290bbb 100644 ---- a/include/block/blockjob.h -+++ b/include/block/blockjob.h -@@ -70,6 +70,9 @@ typedef struct BlockJob { - /** Called when the job transitions to READY */ - Notifier ready_notifier; - -+ /** Called when the job coroutine yields or terminates */ -+ Notifier idle_notifier; -+ - /** BlockDriverStates that are involved in this block job */ - GSList *nodes; - } BlockJob; -@@ -119,6 +122,16 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, - void block_job_remove_all_bdrv(BlockJob *job); - - /** -+ * block_job_wakeup_all_bdrv: -+ * @job: The block job -+ * -+ * Calls bdrv_wakeup() for all BlockDriverStates that have been added to the -+ * job. This function is to be called whenever child_job_drained_poll() would -+ * go from true to false to notify waiting drain requests. -+ */ -+void block_job_wakeup_all_bdrv(BlockJob *job); -+ -+/** - * block_job_set_speed: - * @job: The job to set the speed for. - * @speed: The new value -diff --git a/include/qemu/job.h b/include/qemu/job.h -index 5cb0681..b4a784d 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -156,6 +156,9 @@ typedef struct Job { - /** Notifiers called when the job transitions to READY */ - NotifierList on_ready; - -+ /** Notifiers called when the job coroutine yields or terminates */ -+ NotifierList on_idle; -+ - /** Element of the list of jobs */ - QLIST_ENTRY(Job) job_list; - -diff --git a/job.c b/job.c -index 5c4e84f..48a767c 100644 ---- a/job.c -+++ b/job.c -@@ -402,6 +402,11 @@ static void job_event_ready(Job *job) - notifier_list_notify(&job->on_ready, job); - } - -+static void job_event_idle(Job *job) -+{ -+ notifier_list_notify(&job->on_idle, job); -+} -+ - void job_enter_cond(Job *job, bool(*fn)(Job *job)) - { - if (!job_started(job)) { -@@ -447,6 +452,7 @@ static void coroutine_fn job_do_yield(Job *job, uint64_t ns) - timer_mod(&job->sleep_timer, ns); - } - job->busy = false; -+ job_event_idle(job); - job_unlock(); - qemu_coroutine_yield(); - -@@ -865,6 +871,7 @@ static void coroutine_fn job_co_entry(void *opaque) - assert(job && job->driver && job->driver->run); - job_pause_point(job); - job->ret = job->driver->run(job, &job->err); -+ job_event_idle(job); - job->deferred_to_main_loop = true; - aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); - } --- -1.8.3.1 - diff --git a/0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch b/0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch deleted file mode 100644 index 14b67eb..0000000 --- a/0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 0e651f939d3fd65071a8edc8090a777bdb45b921 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:21:57 +0100 -Subject: aio-wait: Increase num_waiters even in home thread - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-19-kwolf@redhat.com> -Patchwork-id: 82609 -O-Subject: [RHEL-8 qemu-kvm PATCH 28/44] aio-wait: Increase num_waiters even in home thread -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -Even if AIO_WAIT_WHILE() is called in the home context of the -AioContext, we still want to allow the condition to change depending on -other threads as long as they kick the AioWait. Specfically block jobs -can be running in an I/O thread and should then be able to kick a drain -in the main loop context. - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -(cherry picked from commit 486574483aba988c83b20e7d3f1ccd50c4c333d8) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - include/block/aio-wait.h | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h -index c85a62f..600fad1 100644 ---- a/include/block/aio-wait.h -+++ b/include/block/aio-wait.h -@@ -76,6 +76,8 @@ typedef struct { - bool waited_ = false; \ - AioWait *wait_ = (wait); \ - AioContext *ctx_ = (ctx); \ -+ /* Increment wait_->num_waiters before evaluating cond. */ \ -+ atomic_inc(&wait_->num_waiters); \ - if (ctx_ && in_aio_context_home_thread(ctx_)) { \ - while ((cond)) { \ - aio_poll(ctx_, true); \ -@@ -84,8 +86,6 @@ typedef struct { - } else { \ - assert(qemu_get_current_aio_context() == \ - qemu_get_aio_context()); \ -- /* Increment wait_->num_waiters before evaluating cond. */ \ -- atomic_inc(&wait_->num_waiters); \ - while ((cond)) { \ - if (ctx_) { \ - aio_context_release(ctx_); \ -@@ -96,8 +96,8 @@ typedef struct { - } \ - waited_ = true; \ - } \ -- atomic_dec(&wait_->num_waiters); \ - } \ -+ atomic_dec(&wait_->num_waiters); \ - waited_; }) - - /** --- -1.8.3.1 - diff --git a/0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch b/0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch deleted file mode 100644 index 38d58ff..0000000 --- a/0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch +++ /dev/null @@ -1,208 +0,0 @@ -From 6d374393478f0d57ec8cd338342687d043565662 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:21:58 +0100 -Subject: test-bdrv-drain: Drain with block jobs in an I/O thread - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-20-kwolf@redhat.com> -Patchwork-id: 82608 -O-Subject: [RHEL-8 qemu-kvm PATCH 29/44] test-bdrv-drain: Drain with block jobs in an I/O thread -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -This extends the existing drain test with a block job to include -variants where the block job runs in a different AioContext. - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -(cherry picked from commit f62c172959cd2b6de4dd8ba782e855d64d94764b) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/test-bdrv-drain.c | 92 +++++++++++++++++++++++++++++++++++++++++++++---- - 1 file changed, 86 insertions(+), 6 deletions(-) - -diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c -index 9bcb3c7..3cf3ba3 100644 ---- a/tests/test-bdrv-drain.c -+++ b/tests/test-bdrv-drain.c -@@ -174,6 +174,28 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) - } - } - -+static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs) -+{ -+ if (drain_type != BDRV_DRAIN_ALL) { -+ aio_context_acquire(bdrv_get_aio_context(bs)); -+ } -+ do_drain_begin(drain_type, bs); -+ if (drain_type != BDRV_DRAIN_ALL) { -+ aio_context_release(bdrv_get_aio_context(bs)); -+ } -+} -+ -+static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs) -+{ -+ if (drain_type != BDRV_DRAIN_ALL) { -+ aio_context_acquire(bdrv_get_aio_context(bs)); -+ } -+ do_drain_end(drain_type, bs); -+ if (drain_type != BDRV_DRAIN_ALL) { -+ aio_context_release(bdrv_get_aio_context(bs)); -+ } -+} -+ - static void test_drv_cb_common(enum drain_type drain_type, bool recursive) - { - BlockBackend *blk; -@@ -785,11 +807,13 @@ BlockJobDriver test_job_driver = { - }, - }; - --static void test_blockjob_common(enum drain_type drain_type) -+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) - { - BlockBackend *blk_src, *blk_target; - BlockDriverState *src, *target; - BlockJob *job; -+ IOThread *iothread = NULL; -+ AioContext *ctx; - int ret; - - src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, -@@ -797,21 +821,31 @@ static void test_blockjob_common(enum drain_type drain_type) - blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); - blk_insert_bs(blk_src, src, &error_abort); - -+ if (use_iothread) { -+ iothread = iothread_new(); -+ ctx = iothread_get_aio_context(iothread); -+ blk_set_aio_context(blk_src, ctx); -+ } else { -+ ctx = qemu_get_aio_context(); -+ } -+ - target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, - &error_abort); - blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); - blk_insert_bs(blk_target, target, &error_abort); - -+ aio_context_acquire(ctx); - job = block_job_create("job0", &test_job_driver, NULL, src, 0, BLK_PERM_ALL, - 0, 0, NULL, NULL, &error_abort); - block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); - job_start(&job->job); -+ aio_context_release(ctx); - - g_assert_cmpint(job->job.pause_count, ==, 0); - g_assert_false(job->job.paused); - g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ - -- do_drain_begin(drain_type, src); -+ do_drain_begin_unlocked(drain_type, src); - - if (drain_type == BDRV_DRAIN_ALL) { - /* bdrv_drain_all() drains both src and target */ -@@ -822,7 +856,14 @@ static void test_blockjob_common(enum drain_type drain_type) - g_assert_true(job->job.paused); - g_assert_false(job->job.busy); /* The job is paused */ - -- do_drain_end(drain_type, src); -+ do_drain_end_unlocked(drain_type, src); -+ -+ if (use_iothread) { -+ /* paused is reset in the I/O thread, wait for it */ -+ while (job->job.paused) { -+ aio_poll(qemu_get_aio_context(), false); -+ } -+ } - - g_assert_cmpint(job->job.pause_count, ==, 0); - g_assert_false(job->job.paused); -@@ -841,32 +882,64 @@ static void test_blockjob_common(enum drain_type drain_type) - - do_drain_end(drain_type, target); - -+ if (use_iothread) { -+ /* paused is reset in the I/O thread, wait for it */ -+ while (job->job.paused) { -+ aio_poll(qemu_get_aio_context(), false); -+ } -+ } -+ - g_assert_cmpint(job->job.pause_count, ==, 0); - g_assert_false(job->job.paused); - g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ - -+ aio_context_acquire(ctx); - ret = job_complete_sync(&job->job, &error_abort); - g_assert_cmpint(ret, ==, 0); - -+ if (use_iothread) { -+ blk_set_aio_context(blk_src, qemu_get_aio_context()); -+ } -+ aio_context_release(ctx); -+ - blk_unref(blk_src); - blk_unref(blk_target); - bdrv_unref(src); - bdrv_unref(target); -+ -+ if (iothread) { -+ iothread_join(iothread); -+ } - } - - static void test_blockjob_drain_all(void) - { -- test_blockjob_common(BDRV_DRAIN_ALL); -+ test_blockjob_common(BDRV_DRAIN_ALL, false); - } - - static void test_blockjob_drain(void) - { -- test_blockjob_common(BDRV_DRAIN); -+ test_blockjob_common(BDRV_DRAIN, false); - } - - static void test_blockjob_drain_subtree(void) - { -- test_blockjob_common(BDRV_SUBTREE_DRAIN); -+ test_blockjob_common(BDRV_SUBTREE_DRAIN, false); -+} -+ -+static void test_blockjob_iothread_drain_all(void) -+{ -+ test_blockjob_common(BDRV_DRAIN_ALL, true); -+} -+ -+static void test_blockjob_iothread_drain(void) -+{ -+ test_blockjob_common(BDRV_DRAIN, true); -+} -+ -+static void test_blockjob_iothread_drain_subtree(void) -+{ -+ test_blockjob_common(BDRV_SUBTREE_DRAIN, true); - } - - -@@ -1337,6 +1410,13 @@ int main(int argc, char **argv) - g_test_add_func("/bdrv-drain/blockjob/drain_subtree", - test_blockjob_drain_subtree); - -+ g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", -+ test_blockjob_iothread_drain_all); -+ g_test_add_func("/bdrv-drain/blockjob/iothread/drain", -+ test_blockjob_iothread_drain); -+ g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", -+ test_blockjob_iothread_drain_subtree); -+ - g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); - g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); - g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); --- -1.8.3.1 - diff --git a/0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch b/0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch deleted file mode 100644 index 976ce6a..0000000 --- a/0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 99172abebcedfb48ca06d4c1bd0cd16372449600 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:21:59 +0100 -Subject: test-blockjob: Acquire AioContext around job_cancel_sync() - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-21-kwolf@redhat.com> -Patchwork-id: 82606 -O-Subject: [RHEL-8 qemu-kvm PATCH 30/44] test-blockjob: Acquire AioContext around job_cancel_sync() -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -All callers in QEMU proper hold the AioContext lock when calling -job_finish_sync(). test-blockjob should do the same when it calls the -function indirectly through job_cancel_sync(). - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -(cherry picked from commit 30c070a547322a5e41ce129d540bca3653b1a9c8) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - include/qemu/job.h | 6 ++++++ - tests/test-blockjob.c | 6 ++++++ - 2 files changed, 12 insertions(+) - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index b4a784d..63c60ef 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -524,6 +524,8 @@ void job_user_cancel(Job *job, bool force, Error **errp); - * - * Returns the return value from the job if the job actually completed - * during the call, or -ECANCELED if it was canceled. -+ * -+ * Callers must hold the AioContext lock of job->aio_context. - */ - int job_cancel_sync(Job *job); - -@@ -541,6 +543,8 @@ void job_cancel_sync_all(void); - * function). - * - * Returns the return value from the job. -+ * -+ * Callers must hold the AioContext lock of job->aio_context. - */ - int job_complete_sync(Job *job, Error **errp); - -@@ -566,6 +570,8 @@ void job_dismiss(Job **job, Error **errp); - * - * Returns 0 if the job is successfully completed, -ECANCELED if the job was - * cancelled before completing, and -errno in other error cases. -+ * -+ * Callers must hold the AioContext lock of job->aio_context. - */ - int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp); - -diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c -index de4c1c2..652d1e8 100644 ---- a/tests/test-blockjob.c -+++ b/tests/test-blockjob.c -@@ -223,6 +223,10 @@ static void cancel_common(CancelJob *s) - BlockJob *job = &s->common; - BlockBackend *blk = s->blk; - JobStatus sts = job->job.status; -+ AioContext *ctx; -+ -+ ctx = job->job.aio_context; -+ aio_context_acquire(ctx); - - job_cancel_sync(&job->job); - if (sts != JOB_STATUS_CREATED && sts != JOB_STATUS_CONCLUDED) { -@@ -232,6 +236,8 @@ static void cancel_common(CancelJob *s) - assert(job->job.status == JOB_STATUS_NULL); - job_unref(&job->job); - destroy_blk(blk); -+ -+ aio_context_release(ctx); - } - - static void test_cancel_created(void) --- -1.8.3.1 - diff --git a/0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch b/0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch deleted file mode 100644 index b16aa60..0000000 --- a/0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 3f3282c8ffa29e3dbcf58618beefb36afe8ba79b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:00 +0100 -Subject: job: Use AIO_WAIT_WHILE() in job_finish_sync() - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-22-kwolf@redhat.com> -Patchwork-id: 82612 -O-Subject: [RHEL-8 qemu-kvm PATCH 31/44] job: Use AIO_WAIT_WHILE() in job_finish_sync() -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -job_finish_sync() needs to release the AioContext lock of the job before -calling aio_poll(). Otherwise, callbacks called by aio_poll() would -possibly take the lock a second time and run into a deadlock with a -nested AIO_WAIT_WHILE() call. - -Also, job_drain() without aio_poll() isn't necessarily enough to make -progress on a job, it could depend on bottom halves to be executed. - -Combine both open-coded while loops into a single AIO_WAIT_WHILE() call -that solves both of these problems. - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -Reviewed-by: Max Reitz -(cherry picked from commit de0fbe64806321fc3e6399bfab360553db87a41d) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - job.c | 14 ++++++-------- - 1 file changed, 6 insertions(+), 8 deletions(-) - -diff --git a/job.c b/job.c -index 48a767c..fa74558 100644 ---- a/job.c -+++ b/job.c -@@ -29,6 +29,7 @@ - #include "qemu/job.h" - #include "qemu/id.h" - #include "qemu/main-loop.h" -+#include "block/aio-wait.h" - #include "trace-root.h" - #include "qapi/qapi-events-job.h" - -@@ -962,6 +963,7 @@ void job_complete(Job *job, Error **errp) - int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) - { - Error *local_err = NULL; -+ AioWait dummy_wait = {}; - int ret; - - job_ref(job); -@@ -974,14 +976,10 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) - job_unref(job); - return -EBUSY; - } -- /* job_drain calls job_enter, and it should be enough to induce progress -- * until the job completes or moves to the main thread. */ -- while (!job->deferred_to_main_loop && !job_is_completed(job)) { -- job_drain(job); -- } -- while (!job_is_completed(job)) { -- aio_poll(qemu_get_aio_context(), true); -- } -+ -+ AIO_WAIT_WHILE(&dummy_wait, job->aio_context, -+ (job_drain(job), !job_is_completed(job))); -+ - ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret; - job_unref(job); - return ret; --- -1.8.3.1 - diff --git a/0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch b/0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch deleted file mode 100644 index 978a41d..0000000 --- a/0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch +++ /dev/null @@ -1,59 +0,0 @@ -From b9c555343b6567159effe1b3eb736fd1e02257bd Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:01 +0100 -Subject: test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-23-kwolf@redhat.com> -Patchwork-id: 82611 -O-Subject: [RHEL-8 qemu-kvm PATCH 32/44] test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -This is a regression test for a deadlock that occurred in block job -completion callbacks (via job_defer_to_main_loop) because the AioContext -lock was taken twice: once in job_finish_sync() and then again in -job_defer_to_main_loop_bh(). This would cause AIO_WAIT_WHILE() to hang. - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -(cherry picked from commit ae23dde9dd486e57e152a0ebc9802caddedc45fc) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/test-bdrv-drain.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c -index 3cf3ba3..05f3b55 100644 ---- a/tests/test-bdrv-drain.c -+++ b/tests/test-bdrv-drain.c -@@ -774,6 +774,15 @@ typedef struct TestBlockJob { - bool should_complete; - } TestBlockJob; - -+static int test_job_prepare(Job *job) -+{ -+ TestBlockJob *s = container_of(job, TestBlockJob, common.job); -+ -+ /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ -+ blk_flush(s->common.blk); -+ return 0; -+} -+ - static int coroutine_fn test_job_run(Job *job, Error **errp) - { - TestBlockJob *s = container_of(job, TestBlockJob, common.job); -@@ -804,6 +813,7 @@ BlockJobDriver test_job_driver = { - .drain = block_job_drain, - .run = test_job_run, - .complete = test_job_complete, -+ .prepare = test_job_prepare, - }, - }; - --- -1.8.3.1 - diff --git a/0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch b/0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch deleted file mode 100644 index 1d2abee..0000000 --- a/0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 51c1069568d78941554c70f9084531c279899c83 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:02 +0100 -Subject: block: Add missing locking in bdrv_co_drain_bh_cb() - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-24-kwolf@redhat.com> -Patchwork-id: 82613 -O-Subject: [RHEL-8 qemu-kvm PATCH 33/44] block: Add missing locking in bdrv_co_drain_bh_cb() -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -bdrv_do_drained_begin/end() assume that they are called with the -AioContext lock of bs held. If we call drain functions from a coroutine -with the AioContext lock held, we yield and schedule a BH to move out of -coroutine context. This means that the lock for the home context of the -coroutine is released and must be re-acquired in the bottom half. - -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -(cherry picked from commit aa1361d54aac43094b98024b8b6c804eb6e41661) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/io.c | 15 +++++++++++++++ - include/qemu/coroutine.h | 5 +++++ - util/qemu-coroutine.c | 5 +++++ - 3 files changed, 25 insertions(+) - -diff --git a/block/io.c b/block/io.c -index 7100344..914ba78 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -288,6 +288,18 @@ static void bdrv_co_drain_bh_cb(void *opaque) - BlockDriverState *bs = data->bs; - - if (bs) { -+ AioContext *ctx = bdrv_get_aio_context(bs); -+ AioContext *co_ctx = qemu_coroutine_get_aio_context(co); -+ -+ /* -+ * When the coroutine yielded, the lock for its home context was -+ * released, so we need to re-acquire it here. If it explicitly -+ * acquired a different context, the lock is still held and we don't -+ * want to lock it a second time (or AIO_WAIT_WHILE() would hang). -+ */ -+ if (ctx == co_ctx) { -+ aio_context_acquire(ctx); -+ } - bdrv_dec_in_flight(bs); - if (data->begin) { - bdrv_do_drained_begin(bs, data->recursive, data->parent, -@@ -296,6 +308,9 @@ static void bdrv_co_drain_bh_cb(void *opaque) - bdrv_do_drained_end(bs, data->recursive, data->parent, - data->ignore_bds_parents); - } -+ if (ctx == co_ctx) { -+ aio_context_release(ctx); -+ } - } else { - assert(data->begin); - bdrv_drain_all_begin(); -diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h -index 6f8a487..9801e7f 100644 ---- a/include/qemu/coroutine.h -+++ b/include/qemu/coroutine.h -@@ -90,6 +90,11 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co); - void coroutine_fn qemu_coroutine_yield(void); - - /** -+ * Get the AioContext of the given coroutine -+ */ -+AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co); -+ -+/** - * Get the currently executing coroutine - */ - Coroutine *coroutine_fn qemu_coroutine_self(void); -diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c -index 1ba4191..2295928 100644 ---- a/util/qemu-coroutine.c -+++ b/util/qemu-coroutine.c -@@ -198,3 +198,8 @@ bool qemu_coroutine_entered(Coroutine *co) - { - return co->caller; - } -+ -+AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) -+{ -+ return co->ctx; -+} --- -1.8.3.1 - diff --git a/0089-block-backend-Add-.drained_poll-callback.patch b/0089-block-backend-Add-.drained_poll-callback.patch deleted file mode 100644 index 64eae39..0000000 --- a/0089-block-backend-Add-.drained_poll-callback.patch +++ /dev/null @@ -1,66 +0,0 @@ -From ea3026a59a3772f84697af9b62b6272cfb41f40c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:03 +0100 -Subject: block-backend: Add .drained_poll callback - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-25-kwolf@redhat.com> -Patchwork-id: 82614 -O-Subject: [RHEL-8 qemu-kvm PATCH 34/44] block-backend: Add .drained_poll callback -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -A bdrv_drain operation must ensure that all parents are quiesced, this -includes BlockBackends. Otherwise, callbacks called by requests that are -completed on the BDS layer, but not quite yet on the BlockBackend layer -could still create new requests. - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -Reviewed-by: Max Reitz -(cherry picked from commit fe5258a503a87e69be37c9ac48799e293809386e) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/block/block-backend.c b/block/block-backend.c -index f2f75a9..2b837d1 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -121,6 +121,7 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options, - abort(); - } - static void blk_root_drained_begin(BdrvChild *child); -+static bool blk_root_drained_poll(BdrvChild *child); - static void blk_root_drained_end(BdrvChild *child); - - static void blk_root_change_media(BdrvChild *child, bool load); -@@ -294,6 +295,7 @@ static const BdrvChildRole child_root = { - .get_parent_desc = blk_root_get_parent_desc, - - .drained_begin = blk_root_drained_begin, -+ .drained_poll = blk_root_drained_poll, - .drained_end = blk_root_drained_end, - - .activate = blk_root_activate, -@@ -2192,6 +2194,13 @@ static void blk_root_drained_begin(BdrvChild *child) - } - } - -+static bool blk_root_drained_poll(BdrvChild *child) -+{ -+ BlockBackend *blk = child->opaque; -+ assert(blk->quiesce_counter); -+ return !!blk->in_flight; -+} -+ - static void blk_root_drained_end(BdrvChild *child) - { - BlockBackend *blk = child->opaque; --- -1.8.3.1 - diff --git a/0090-block-backend-Fix-potential-double-blk_delete.patch b/0090-block-backend-Fix-potential-double-blk_delete.patch deleted file mode 100644 index 56799a1..0000000 --- a/0090-block-backend-Fix-potential-double-blk_delete.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 21a2ef76c6aa33f0058d149b1bfdde1d27ba1df4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:04 +0100 -Subject: block-backend: Fix potential double blk_delete() - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-26-kwolf@redhat.com> -Patchwork-id: 82615 -O-Subject: [RHEL-8 qemu-kvm PATCH 35/44] block-backend: Fix potential double blk_delete() -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -blk_unref() first decreases the refcount of the BlockBackend and calls -blk_delete() if the refcount reaches zero. Requests can still be in -flight at this point, they are only drained during blk_delete(): - -At this point, arbitrary callbacks can run. If any callback takes a -temporary BlockBackend reference, it will first increase the refcount to -1 and then decrease it to 0 again, triggering another blk_delete(). This -will cause a use-after-free crash in the outer blk_delete(). - -Fix it by draining the BlockBackend before decreasing to refcount to 0. -Assert in blk_ref() that it never takes the first refcount (which would -mean that the BlockBackend is already being deleted). - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -Reviewed-by: Max Reitz -(cherry picked from commit 5ca9d21bd1c8eeb578d0964e31bd03d47c25773d) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 2b837d1..94046f0 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -436,6 +436,7 @@ int blk_get_refcnt(BlockBackend *blk) - */ - void blk_ref(BlockBackend *blk) - { -+ assert(blk->refcnt > 0); - blk->refcnt++; - } - -@@ -448,7 +449,13 @@ void blk_unref(BlockBackend *blk) - { - if (blk) { - assert(blk->refcnt > 0); -- if (!--blk->refcnt) { -+ if (blk->refcnt > 1) { -+ blk->refcnt--; -+ } else { -+ blk_drain(blk); -+ /* blk_drain() cannot resurrect blk, nobody held a reference */ -+ assert(blk->refcnt == 1); -+ blk->refcnt = 0; - blk_delete(blk); - } - } --- -1.8.3.1 - diff --git a/0091-block-backend-Decrease-in_flight-only-after-callback.patch b/0091-block-backend-Decrease-in_flight-only-after-callback.patch deleted file mode 100644 index 69805e9..0000000 --- a/0091-block-backend-Decrease-in_flight-only-after-callback.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 91ae719381f75ed3554b0c5e1d8bf58583a9208f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:05 +0100 -Subject: block-backend: Decrease in_flight only after callback - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-27-kwolf@redhat.com> -Patchwork-id: 82617 -O-Subject: [RHEL-8 qemu-kvm PATCH 36/44] block-backend: Decrease in_flight only after callback -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -Request callbacks can do pretty much anything, including operations that -will yield from the coroutine (such as draining the backend). In that -case, a decreased in_flight would be visible to other code and could -lead to a drain completing while the callback hasn't actually completed -yet. - -Note that reordering these operations forbids calling drain directly -inside an AIO callback. As Paolo explains, indirectly calling it is -okay: - -- Calling it through a coroutine is okay, because then - bdrv_drained_begin() goes through bdrv_co_yield_to_drain() and you - have in_flight=2 when bdrv_co_yield_to_drain() yields, then soon - in_flight=1 when the aio_co_wake() in the AIO callback completes, then - in_flight=0 after the bottom half starts. - -- Calling it through a bottom half would be okay too, as long as the AIO - callback remembers to do inc_in_flight/dec_in_flight just like - bdrv_co_yield_to_drain() and bdrv_co_drain_bh_cb() do - -A few more important cases that come to mind: - -- A coroutine that yields because of I/O is okay, with a sequence - similar to bdrv_co_yield_to_drain(). - -- A coroutine that yields with no I/O pending will correctly decrease - in_flight to zero before yielding. - -- Calling more AIO from the callback won't overflow the counter just - because of mutual recursion, because AIO functions always yield at - least once before invoking the callback. - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -Reviewed-by: Max Reitz -Reviewed-by: Paolo Bonzini -(cherry picked from commit 46aaf2a566e364a62315219255099cbf1c9b990d) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 94046f0..9a3e060 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1341,8 +1341,8 @@ static const AIOCBInfo blk_aio_em_aiocb_info = { - static void blk_aio_complete(BlkAioEmAIOCB *acb) - { - if (acb->has_returned) { -- blk_dec_in_flight(acb->rwco.blk); - acb->common.cb(acb->common.opaque, acb->rwco.ret); -+ blk_dec_in_flight(acb->rwco.blk); - qemu_aio_unref(acb); - } - } --- -1.8.3.1 - diff --git a/0092-blockjob-Lie-better-in-child_job_drained_poll.patch b/0092-blockjob-Lie-better-in-child_job_drained_poll.patch deleted file mode 100644 index 64f5aaa..0000000 --- a/0092-blockjob-Lie-better-in-child_job_drained_poll.patch +++ /dev/null @@ -1,104 +0,0 @@ -From bc17446b1e7c9578a3e3079173891c93998dfa00 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:06 +0100 -Subject: blockjob: Lie better in child_job_drained_poll() - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-28-kwolf@redhat.com> -Patchwork-id: 82616 -O-Subject: [RHEL-8 qemu-kvm PATCH 37/44] blockjob: Lie better in child_job_drained_poll() -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -Block jobs claim in .drained_poll() that they are in a quiescent state -as soon as job->deferred_to_main_loop is true. This is obviously wrong, -they still have a completion BH to run. We only get away with this -because commit 91af091f923 added an unconditional aio_poll(false) to the -drain functions, but this is bypassing the regular drain mechanisms. - -However, just removing this and telling that the job is still active -doesn't work either: The completion callbacks themselves call drain -functions (directly, or indirectly with bdrv_reopen), so they would -deadlock then. - -As a better lie, tell that the job is active as long as the BH is -pending, but falsely call it quiescent from the point in the BH when the -completion callback is called. At this point, nested drain calls won't -deadlock because they ignore the job, and outer drains will wait for the -job to really reach a quiescent state because the callback is already -running. - -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -(cherry picked from commit b5a7a0573530698ee448b063ac01d485e30446bd) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - blockjob.c | 2 +- - include/qemu/job.h | 3 +++ - job.c | 11 ++++++++++- - 3 files changed, 14 insertions(+), 2 deletions(-) - -diff --git a/blockjob.c b/blockjob.c -index 8d27e8e..617d86f 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -164,7 +164,7 @@ static bool child_job_drained_poll(BdrvChild *c) - /* An inactive or completed job doesn't have any pending requests. Jobs - * with !job->busy are either already paused or have a pause point after - * being reentered, so no job driver code will run before they pause. */ -- if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) { -+ if (!job->busy || job_is_completed(job)) { - return false; - } - -diff --git a/include/qemu/job.h b/include/qemu/job.h -index 63c60ef..9e7cd1e 100644 ---- a/include/qemu/job.h -+++ b/include/qemu/job.h -@@ -76,6 +76,9 @@ typedef struct Job { - * Set to false by the job while the coroutine has yielded and may be - * re-entered by job_enter(). There may still be I/O or event loop activity - * pending. Accessed under block_job_mutex (in blockjob.c). -+ * -+ * When the job is deferred to the main loop, busy is true as long as the -+ * bottom half is still pending. - */ - bool busy; - -diff --git a/job.c b/job.c -index fa74558..00a1cd1 100644 ---- a/job.c -+++ b/job.c -@@ -857,7 +857,16 @@ static void job_exit(void *opaque) - AioContext *ctx = job->aio_context; - - aio_context_acquire(ctx); -+ -+ /* This is a lie, we're not quiescent, but still doing the completion -+ * callbacks. However, completion callbacks tend to involve operations that -+ * drain block nodes, and if .drained_poll still returned true, we would -+ * deadlock. */ -+ job->busy = false; -+ job_event_idle(job); -+ - job_completed(job); -+ - aio_context_release(ctx); - } - -@@ -872,8 +881,8 @@ static void coroutine_fn job_co_entry(void *opaque) - assert(job && job->driver && job->driver->run); - job_pause_point(job); - job->ret = job->driver->run(job, &job->err); -- job_event_idle(job); - job->deferred_to_main_loop = true; -+ job->busy = true; - aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); - } - --- -1.8.3.1 - diff --git a/0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch b/0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch deleted file mode 100644 index 60cdc36..0000000 --- a/0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch +++ /dev/null @@ -1,64 +0,0 @@ -From ce7a9c21d6a43b736d5aa2041acbd5d1edca0070 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:07 +0100 -Subject: block: Remove aio_poll() in bdrv_drain_poll variants - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-29-kwolf@redhat.com> -Patchwork-id: 82619 -O-Subject: [RHEL-8 qemu-kvm PATCH 38/44] block: Remove aio_poll() in bdrv_drain_poll variants -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -bdrv_drain_poll_top_level() was buggy because it didn't release the -AioContext lock of the node to be drained before calling aio_poll(). -This way, callbacks called by aio_poll() would possibly take the lock a -second time and run into a deadlock with a nested AIO_WAIT_WHILE() call. - -However, it turns out that the aio_poll() call isn't actually needed any -more. It was introduced in commit 91af091f923, which is effectively -reverted by this patch. The cases it was supposed to fix are now covered -by bdrv_drain_poll(), which waits for block jobs to reach a quiescent -state. - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -Reviewed-by: Max Reitz -(cherry picked from commit 4cf077b59fc73eec29f8b7d082919dbb278bdc86) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/io.c | 8 -------- - 1 file changed, 8 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 914ba78..8b81ff3 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -268,10 +268,6 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent) - { -- /* Execute pending BHs first and check everything else only after the BHs -- * have executed. */ -- while (aio_poll(bs->aio_context, false)); -- - return bdrv_drain_poll(bs, recursive, ignore_parent, false); - } - -@@ -511,10 +507,6 @@ static bool bdrv_drain_all_poll(void) - BlockDriverState *bs = NULL; - bool result = false; - -- /* Execute pending BHs first (may modify the graph) and check everything -- * else only after the BHs have executed. */ -- while (aio_poll(qemu_get_aio_context(), false)); -- - /* bdrv_drain_poll() can't make changes to the graph and we are holding the - * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ - while ((bs = bdrv_next_all_states(bs))) { --- -1.8.3.1 - diff --git a/0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch b/0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch deleted file mode 100644 index 8de1ae2..0000000 --- a/0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 6c315602205e494dd084a4692a06c16b0e233875 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:08 +0100 -Subject: test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level() - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-30-kwolf@redhat.com> -Patchwork-id: 82618 -O-Subject: [RHEL-8 qemu-kvm PATCH 39/44] test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level() -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -This is a regression test for a deadlock that could occur in callbacks -called from the aio_poll() in bdrv_drain_poll_top_level(). The -AioContext lock wasn't released and therefore would be taken a second -time in the callback. This would cause a possible AIO_WAIT_WHILE() in -the callback to hang. - -Signed-off-by: Kevin Wolf -Reviewed-by: Fam Zheng -(cherry picked from commit ecc1a5c790cf2c7732cb9755ca388c2fe108d1a1) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/test-bdrv-drain.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c -index 05f3b55..f4b57f7 100644 ---- a/tests/test-bdrv-drain.c -+++ b/tests/test-bdrv-drain.c -@@ -636,6 +636,17 @@ static void test_iothread_aio_cb(void *opaque, int ret) - qemu_event_set(&done_event); - } - -+static void test_iothread_main_thread_bh(void *opaque) -+{ -+ struct test_iothread_data *data = opaque; -+ -+ /* Test that the AioContext is not yet locked in a random BH that is -+ * executed during drain, otherwise this would deadlock. */ -+ aio_context_acquire(bdrv_get_aio_context(data->bs)); -+ bdrv_flush(data->bs); -+ aio_context_release(bdrv_get_aio_context(data->bs)); -+} -+ - /* - * Starts an AIO request on a BDS that runs in the AioContext of iothread 1. - * The request involves a BH on iothread 2 before it can complete. -@@ -705,6 +716,8 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) - aio_context_acquire(ctx_a); - } - -+ aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, &data); -+ - /* The request is running on the IOThread a. Draining its block device - * will make sure that it has completed as far as the BDS is concerned, - * but the drain in this thread can continue immediately after --- -1.8.3.1 - diff --git a/0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch b/0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch deleted file mode 100644 index 0b8a8fd..0000000 --- a/0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 287d4267dcb2d5f262dba7f6e7f35dcd294b622a Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:09 +0100 -Subject: job: Avoid deadlocks in job_completed_txn_abort() - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-31-kwolf@redhat.com> -Patchwork-id: 82622 -O-Subject: [RHEL-8 qemu-kvm PATCH 40/44] job: Avoid deadlocks in job_completed_txn_abort() -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -Amongst others, job_finalize_single() calls the .prepare/.commit/.abort -callbacks of the individual job driver. Recently, their use was adapted -for all block jobs so that they involve code calling AIO_WAIT_WHILE() -now. Such code must be called under the AioContext lock for the -respective job, but without holding any other AioContext lock. - -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -(cherry picked from commit 644f3a29bd4974aefd46d2adb5062d86063c8a50) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - job.c | 16 +++++++++++----- - 1 file changed, 11 insertions(+), 5 deletions(-) - -diff --git a/job.c b/job.c -index 00a1cd1..0b02186 100644 ---- a/job.c -+++ b/job.c -@@ -718,6 +718,7 @@ static void job_cancel_async(Job *job, bool force) - - static void job_completed_txn_abort(Job *job) - { -+ AioContext *outer_ctx = job->aio_context; - AioContext *ctx; - JobTxn *txn = job->txn; - Job *other_job; -@@ -731,23 +732,26 @@ static void job_completed_txn_abort(Job *job) - txn->aborting = true; - job_txn_ref(txn); - -- /* We are the first failed job. Cancel other jobs. */ -- QLIST_FOREACH(other_job, &txn->jobs, txn_list) { -- ctx = other_job->aio_context; -- aio_context_acquire(ctx); -- } -+ /* We can only hold the single job's AioContext lock while calling -+ * job_finalize_single() because the finalization callbacks can involve -+ * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */ -+ aio_context_release(outer_ctx); - - /* Other jobs are effectively cancelled by us, set the status for - * them; this job, however, may or may not be cancelled, depending - * on the caller, so leave it. */ - QLIST_FOREACH(other_job, &txn->jobs, txn_list) { - if (other_job != job) { -+ ctx = other_job->aio_context; -+ aio_context_acquire(ctx); - job_cancel_async(other_job, false); -+ aio_context_release(ctx); - } - } - while (!QLIST_EMPTY(&txn->jobs)) { - other_job = QLIST_FIRST(&txn->jobs); - ctx = other_job->aio_context; -+ aio_context_acquire(ctx); - if (!job_is_completed(other_job)) { - assert(job_is_cancelled(other_job)); - job_finish_sync(other_job, NULL, NULL); -@@ -756,6 +760,8 @@ static void job_completed_txn_abort(Job *job) - aio_context_release(ctx); - } - -+ aio_context_acquire(outer_ctx); -+ - job_txn_unref(txn); - } - --- -1.8.3.1 - diff --git a/0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch b/0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch deleted file mode 100644 index 44cdf9f..0000000 --- a/0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch +++ /dev/null @@ -1,241 +0,0 @@ -From 10fbd3c89739a1879f47f2a2256831ce5e1ae7ad Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:10 +0100 -Subject: test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-32-kwolf@redhat.com> -Patchwork-id: 82620 -O-Subject: [RHEL-8 qemu-kvm PATCH 41/44] test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -This adds tests for calling AIO_WAIT_WHILE() in the .commit and .abort -callbacks. Both reasons why .abort could be called for a single job are -tested: Either .run or .prepare could return an error. - -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -(cherry picked from commit d49725af46a7710cde02cc120b7f1e485154b483) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/test-bdrv-drain.c | 116 +++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 104 insertions(+), 12 deletions(-) - -diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c -index f4b57f7..d6202b2 100644 ---- a/tests/test-bdrv-drain.c -+++ b/tests/test-bdrv-drain.c -@@ -784,6 +784,8 @@ static void test_iothread_drain_subtree(void) - - typedef struct TestBlockJob { - BlockJob common; -+ int run_ret; -+ int prepare_ret; - bool should_complete; - } TestBlockJob; - -@@ -793,7 +795,23 @@ static int test_job_prepare(Job *job) - - /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ - blk_flush(s->common.blk); -- return 0; -+ return s->prepare_ret; -+} -+ -+static void test_job_commit(Job *job) -+{ -+ TestBlockJob *s = container_of(job, TestBlockJob, common.job); -+ -+ /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ -+ blk_flush(s->common.blk); -+} -+ -+static void test_job_abort(Job *job) -+{ -+ TestBlockJob *s = container_of(job, TestBlockJob, common.job); -+ -+ /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */ -+ blk_flush(s->common.blk); - } - - static int coroutine_fn test_job_run(Job *job, Error **errp) -@@ -809,7 +827,7 @@ static int coroutine_fn test_job_run(Job *job, Error **errp) - job_pause_point(&s->common.job); - } - -- return 0; -+ return s->run_ret; - } - - static void test_job_complete(Job *job, Error **errp) -@@ -827,14 +845,24 @@ BlockJobDriver test_job_driver = { - .run = test_job_run, - .complete = test_job_complete, - .prepare = test_job_prepare, -+ .commit = test_job_commit, -+ .abort = test_job_abort, - }, - }; - --static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) -+enum test_job_result { -+ TEST_JOB_SUCCESS, -+ TEST_JOB_FAIL_RUN, -+ TEST_JOB_FAIL_PREPARE, -+}; -+ -+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, -+ enum test_job_result result) - { - BlockBackend *blk_src, *blk_target; - BlockDriverState *src, *target; - BlockJob *job; -+ TestBlockJob *tjob; - IOThread *iothread = NULL; - AioContext *ctx; - int ret; -@@ -858,9 +886,23 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) - blk_insert_bs(blk_target, target, &error_abort); - - aio_context_acquire(ctx); -- job = block_job_create("job0", &test_job_driver, NULL, src, 0, BLK_PERM_ALL, -- 0, 0, NULL, NULL, &error_abort); -+ tjob = block_job_create("job0", &test_job_driver, NULL, src, -+ 0, BLK_PERM_ALL, -+ 0, 0, NULL, NULL, &error_abort); -+ job = &tjob->common; - block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); -+ -+ switch (result) { -+ case TEST_JOB_SUCCESS: -+ break; -+ case TEST_JOB_FAIL_RUN: -+ tjob->run_ret = -EIO; -+ break; -+ case TEST_JOB_FAIL_PREPARE: -+ tjob->prepare_ret = -EIO; -+ break; -+ } -+ - job_start(&job->job); - aio_context_release(ctx); - -@@ -918,7 +960,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) - - aio_context_acquire(ctx); - ret = job_complete_sync(&job->job, &error_abort); -- g_assert_cmpint(ret, ==, 0); -+ g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); - - if (use_iothread) { - blk_set_aio_context(blk_src, qemu_get_aio_context()); -@@ -937,32 +979,68 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread) - - static void test_blockjob_drain_all(void) - { -- test_blockjob_common(BDRV_DRAIN_ALL, false); -+ test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS); - } - - static void test_blockjob_drain(void) - { -- test_blockjob_common(BDRV_DRAIN, false); -+ test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); - } - - static void test_blockjob_drain_subtree(void) - { -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false); -+ test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); -+} -+ -+static void test_blockjob_error_drain_all(void) -+{ -+ test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); -+ test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_PREPARE); -+} -+ -+static void test_blockjob_error_drain(void) -+{ -+ test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_RUN); -+ test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); -+} -+ -+static void test_blockjob_error_drain_subtree(void) -+{ -+ test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); -+ test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); - } - - static void test_blockjob_iothread_drain_all(void) - { -- test_blockjob_common(BDRV_DRAIN_ALL, true); -+ test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); - } - - static void test_blockjob_iothread_drain(void) - { -- test_blockjob_common(BDRV_DRAIN, true); -+ test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); - } - - static void test_blockjob_iothread_drain_subtree(void) - { -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true); -+ test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); -+} -+ -+static void test_blockjob_iothread_error_drain_all(void) -+{ -+ test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); -+ test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_PREPARE); -+} -+ -+static void test_blockjob_iothread_error_drain(void) -+{ -+ test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_RUN); -+ test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); -+} -+ -+static void test_blockjob_iothread_error_drain_subtree(void) -+{ -+ test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); -+ test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); - } - - -@@ -1433,6 +1511,13 @@ int main(int argc, char **argv) - g_test_add_func("/bdrv-drain/blockjob/drain_subtree", - test_blockjob_drain_subtree); - -+ g_test_add_func("/bdrv-drain/blockjob/error/drain_all", -+ test_blockjob_error_drain_all); -+ g_test_add_func("/bdrv-drain/blockjob/error/drain", -+ test_blockjob_error_drain); -+ g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", -+ test_blockjob_error_drain_subtree); -+ - g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", - test_blockjob_iothread_drain_all); - g_test_add_func("/bdrv-drain/blockjob/iothread/drain", -@@ -1440,6 +1525,13 @@ int main(int argc, char **argv) - g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", - test_blockjob_iothread_drain_subtree); - -+ g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", -+ test_blockjob_iothread_error_drain_all); -+ g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", -+ test_blockjob_iothread_error_drain); -+ g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", -+ test_blockjob_iothread_error_drain_subtree); -+ - g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); - g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); - g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); --- -1.8.3.1 - diff --git a/0097-test-bdrv-drain-Fix-outdated-comments.patch b/0097-test-bdrv-drain-Fix-outdated-comments.patch deleted file mode 100644 index 08e50a7..0000000 --- a/0097-test-bdrv-drain-Fix-outdated-comments.patch +++ /dev/null @@ -1,69 +0,0 @@ -From 1eaa60bc24cb3fecba8da61f21c44e6f4c9ee4c1 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:11 +0100 -Subject: test-bdrv-drain: Fix outdated comments - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-33-kwolf@redhat.com> -Patchwork-id: 82621 -O-Subject: [RHEL-8 qemu-kvm PATCH 42/44] test-bdrv-drain: Fix outdated comments -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -Commit 89bd030533e changed the test case from using job_sleep_ns() to -using qemu_co_sleep_ns() instead. Also, block_job_sleep_ns() became -job_sleep_ns() in commit 5d43e86e11f. - -In both cases, some comments in the test case were not updated. Do that -now. - -Reported-by: Max Reitz -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -(cherry picked from commit 5599c162c3bec2bc8f0123e4d5802a70d9984b3b) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/test-bdrv-drain.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c -index d6202b2..7e7ba9b 100644 ---- a/tests/test-bdrv-drain.c -+++ b/tests/test-bdrv-drain.c -@@ -820,9 +820,9 @@ static int coroutine_fn test_job_run(Job *job, Error **errp) - - job_transition_to_ready(&s->common.job); - while (!s->should_complete) { -- /* Avoid block_job_sleep_ns() because it marks the job as !busy. We -- * want to emulate some actual activity (probably some I/O) here so -- * that drain has to wait for this acitivity to stop. */ -+ /* Avoid job_sleep_ns() because it marks the job as !busy. We want to -+ * emulate some actual activity (probably some I/O) here so that drain -+ * has to wait for this activity to stop. */ - qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); - job_pause_point(&s->common.job); - } -@@ -908,7 +908,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - - g_assert_cmpint(job->job.pause_count, ==, 0); - g_assert_false(job->job.paused); -- g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ -+ g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ - - do_drain_begin_unlocked(drain_type, src); - -@@ -956,7 +956,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - - g_assert_cmpint(job->job.pause_count, ==, 0); - g_assert_false(job->job.paused); -- g_assert_true(job->job.busy); /* We're in job_sleep_ns() */ -+ g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ - - aio_context_acquire(ctx); - ret = job_complete_sync(&job->job, &error_abort); --- -1.8.3.1 - diff --git a/0098-block-Use-a-single-global-AioWait.patch b/0098-block-Use-a-single-global-AioWait.patch deleted file mode 100644 index 4fc26b8..0000000 --- a/0098-block-Use-a-single-global-AioWait.patch +++ /dev/null @@ -1,367 +0,0 @@ -From ea2355d819127ace6195e1d007bc305a49e7d465 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:12 +0100 -Subject: block: Use a single global AioWait - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-34-kwolf@redhat.com> -Patchwork-id: 82623 -O-Subject: [RHEL-8 qemu-kvm PATCH 43/44] block: Use a single global AioWait -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -When draining a block node, we recurse to its parent and for subtree -drains also to its children. A single AIO_WAIT_WHILE() is then used to -wait for bdrv_drain_poll() to become true, which depends on all of the -nodes we recursed to. However, if the respective child or parent becomes -quiescent and calls bdrv_wakeup(), only the AioWait of the child/parent -is checked, while AIO_WAIT_WHILE() depends on the AioWait of the -original node. - -Fix this by using a single AioWait for all callers of AIO_WAIT_WHILE(). - -This may mean that the draining thread gets a few more unnecessary -wakeups because an unrelated operation got completed, but we already -wake it up when something _could_ have changed rather than only if it -has certainly changed. - -Apart from that, drain is a slow path anyway. In theory it would be -possible to use wakeups more selectively and still correctly, but the -gains are likely not worth the additional complexity. In fact, this -patch is a nice simplification for some places in the code. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Max Reitz -(cherry picked from commit cfe29d8294e06420e15d4938421ae006c8ac49e7) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 5 ----- - block/block-backend.c | 11 ++++------- - block/io.c | 7 ++----- - blockjob.c | 13 +------------ - include/block/aio-wait.h | 22 +++++++++++----------- - include/block/block.h | 6 +----- - include/block/block_int.h | 3 --- - include/block/blockjob.h | 10 ---------- - job.c | 3 +-- - util/aio-wait.c | 11 ++++++----- - 10 files changed, 26 insertions(+), 65 deletions(-) - -diff --git a/block.c b/block.c -index 39f373e..9b55956 100644 ---- a/block.c -+++ b/block.c -@@ -4865,11 +4865,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs) - return bs ? bs->aio_context : qemu_get_aio_context(); - } - --AioWait *bdrv_get_aio_wait(BlockDriverState *bs) --{ -- return bs ? &bs->wait : NULL; --} -- - void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) - { - aio_co_enter(bdrv_get_aio_context(bs), co); -diff --git a/block/block-backend.c b/block/block-backend.c -index 9a3e060..723ab5a 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -88,7 +88,6 @@ struct BlockBackend { - * Accessed with atomic ops. - */ - unsigned int in_flight; -- AioWait wait; - }; - - typedef struct BlockBackendAIOCB { -@@ -1300,7 +1299,7 @@ static void blk_inc_in_flight(BlockBackend *blk) - static void blk_dec_in_flight(BlockBackend *blk) - { - atomic_dec(&blk->in_flight); -- aio_wait_kick(&blk->wait); -+ aio_wait_kick(); - } - - static void error_callback_bh(void *opaque) -@@ -1601,9 +1600,8 @@ void blk_drain(BlockBackend *blk) - } - - /* We may have -ENOMEDIUM completions in flight */ -- AIO_WAIT_WHILE(&blk->wait, -- blk_get_aio_context(blk), -- atomic_mb_read(&blk->in_flight) > 0); -+ AIO_WAIT_WHILE(blk_get_aio_context(blk), -+ atomic_mb_read(&blk->in_flight) > 0); - - if (bs) { - bdrv_drained_end(bs); -@@ -1622,8 +1620,7 @@ void blk_drain_all(void) - aio_context_acquire(ctx); - - /* We may have -ENOMEDIUM completions in flight */ -- AIO_WAIT_WHILE(&blk->wait, ctx, -- atomic_mb_read(&blk->in_flight) > 0); -+ AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0); - - aio_context_release(ctx); - } -diff --git a/block/io.c b/block/io.c -index 8b81ff3..bd9d688 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -38,8 +38,6 @@ - /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ - #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) - --static AioWait drain_all_aio_wait; -- - static void bdrv_parent_cb_resize(BlockDriverState *bs); - static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int bytes, BdrvRequestFlags flags); -@@ -557,7 +555,7 @@ void bdrv_drain_all_begin(void) - } - - /* Now poll the in-flight requests */ -- AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll()); -+ AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll()); - - while ((bs = bdrv_next_all_states(bs))) { - bdrv_drain_assert_idle(bs); -@@ -713,8 +711,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs) - - void bdrv_wakeup(BlockDriverState *bs) - { -- aio_wait_kick(bdrv_get_aio_wait(bs)); -- aio_wait_kick(&drain_all_aio_wait); -+ aio_wait_kick(); - } - - void bdrv_dec_in_flight(BlockDriverState *bs) -diff --git a/blockjob.c b/blockjob.c -index 617d86f..06f2429 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -221,20 +221,9 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, - return 0; - } - --void block_job_wakeup_all_bdrv(BlockJob *job) --{ -- GSList *l; -- -- for (l = job->nodes; l; l = l->next) { -- BdrvChild *c = l->data; -- bdrv_wakeup(c->bs); -- } --} -- - static void block_job_on_idle(Notifier *n, void *opaque) - { -- BlockJob *job = opaque; -- block_job_wakeup_all_bdrv(job); -+ aio_wait_kick(); - } - - bool block_job_is_internal(BlockJob *job) -diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h -index 600fad1..afd0ff7 100644 ---- a/include/block/aio-wait.h -+++ b/include/block/aio-wait.h -@@ -30,14 +30,15 @@ - /** - * AioWait: - * -- * An object that facilitates synchronous waiting on a condition. The main -- * loop can wait on an operation running in an IOThread as follows: -+ * An object that facilitates synchronous waiting on a condition. A single -+ * global AioWait object (global_aio_wait) is used internally. -+ * -+ * The main loop can wait on an operation running in an IOThread as follows: - * -- * AioWait *wait = ...; - * AioContext *ctx = ...; - * MyWork work = { .done = false }; - * schedule_my_work_in_iothread(ctx, &work); -- * AIO_WAIT_WHILE(wait, ctx, !work.done); -+ * AIO_WAIT_WHILE(ctx, !work.done); - * - * The IOThread must call aio_wait_kick() to notify the main loop when - * work.done changes: -@@ -46,7 +47,7 @@ - * { - * ... - * work.done = true; -- * aio_wait_kick(wait); -+ * aio_wait_kick(); - * } - */ - typedef struct { -@@ -54,9 +55,10 @@ typedef struct { - unsigned num_waiters; - } AioWait; - -+extern AioWait global_aio_wait; -+ - /** - * AIO_WAIT_WHILE: -- * @wait: the aio wait object - * @ctx: the aio context, or NULL if multiple aio contexts (for which the - * caller does not hold a lock) are involved in the polling condition. - * @cond: wait while this conditional expression is true -@@ -72,9 +74,9 @@ typedef struct { - * wait on conditions between two IOThreads since that could lead to deadlock, - * go via the main loop instead. - */ --#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \ -+#define AIO_WAIT_WHILE(ctx, cond) ({ \ - bool waited_ = false; \ -- AioWait *wait_ = (wait); \ -+ AioWait *wait_ = &global_aio_wait; \ - AioContext *ctx_ = (ctx); \ - /* Increment wait_->num_waiters before evaluating cond. */ \ - atomic_inc(&wait_->num_waiters); \ -@@ -102,14 +104,12 @@ typedef struct { - - /** - * aio_wait_kick: -- * @wait: the aio wait object that should re-evaluate its condition -- * - * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During - * synchronous operations performed in an IOThread, the main thread lets the - * IOThread's event loop run, waiting for the operation to complete. A - * aio_wait_kick() call will wake up the main thread. - */ --void aio_wait_kick(AioWait *wait); -+void aio_wait_kick(void); - - /** - * aio_wait_bh_oneshot: -diff --git a/include/block/block.h b/include/block/block.h -index 4e0871a..4edc1e8 100644 ---- a/include/block/block.h -+++ b/include/block/block.h -@@ -410,13 +410,9 @@ void bdrv_drain_all_begin(void); - void bdrv_drain_all_end(void); - void bdrv_drain_all(void); - --/* Returns NULL when bs == NULL */ --AioWait *bdrv_get_aio_wait(BlockDriverState *bs); -- - #define BDRV_POLL_WHILE(bs, cond) ({ \ - BlockDriverState *bs_ = (bs); \ -- AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \ -- bdrv_get_aio_context(bs_), \ -+ AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \ - cond); }) - - int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes); -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 4000d2a..92ecbd8 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -794,9 +794,6 @@ struct BlockDriverState { - unsigned int in_flight; - unsigned int serialising_in_flight; - -- /* Kicked to signal main loop when a request completes. */ -- AioWait wait; -- - /* counter for nested bdrv_io_plug. - * Accessed with atomic ops. - */ -diff --git a/include/block/blockjob.h b/include/block/blockjob.h -index 2290bbb..ede0bd8 100644 ---- a/include/block/blockjob.h -+++ b/include/block/blockjob.h -@@ -122,16 +122,6 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, - void block_job_remove_all_bdrv(BlockJob *job); - - /** -- * block_job_wakeup_all_bdrv: -- * @job: The block job -- * -- * Calls bdrv_wakeup() for all BlockDriverStates that have been added to the -- * job. This function is to be called whenever child_job_drained_poll() would -- * go from true to false to notify waiting drain requests. -- */ --void block_job_wakeup_all_bdrv(BlockJob *job); -- --/** - * block_job_set_speed: - * @job: The job to set the speed for. - * @speed: The new value -diff --git a/job.c b/job.c -index 0b02186..ed4da6f 100644 ---- a/job.c -+++ b/job.c -@@ -978,7 +978,6 @@ void job_complete(Job *job, Error **errp) - int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) - { - Error *local_err = NULL; -- AioWait dummy_wait = {}; - int ret; - - job_ref(job); -@@ -992,7 +991,7 @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) - return -EBUSY; - } - -- AIO_WAIT_WHILE(&dummy_wait, job->aio_context, -+ AIO_WAIT_WHILE(job->aio_context, - (job_drain(job), !job_is_completed(job))); - - ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret; -diff --git a/util/aio-wait.c b/util/aio-wait.c -index b8a8f86..b487749 100644 ---- a/util/aio-wait.c -+++ b/util/aio-wait.c -@@ -26,21 +26,22 @@ - #include "qemu/main-loop.h" - #include "block/aio-wait.h" - -+AioWait global_aio_wait; -+ - static void dummy_bh_cb(void *opaque) - { - /* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */ - } - --void aio_wait_kick(AioWait *wait) -+void aio_wait_kick(void) - { - /* The barrier (or an atomic op) is in the caller. */ -- if (atomic_read(&wait->num_waiters)) { -+ if (atomic_read(&global_aio_wait.num_waiters)) { - aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); - } - } - - typedef struct { -- AioWait wait; - bool done; - QEMUBHFunc *cb; - void *opaque; -@@ -54,7 +55,7 @@ static void aio_wait_bh(void *opaque) - data->cb(data->opaque); - - data->done = true; -- aio_wait_kick(&data->wait); -+ aio_wait_kick(); - } - - void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) -@@ -67,5 +68,5 @@ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); - - aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data); -- AIO_WAIT_WHILE(&data.wait, ctx, !data.done); -+ AIO_WAIT_WHILE(ctx, !data.done); - } --- -1.8.3.1 - diff --git a/0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch b/0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch deleted file mode 100644 index 5533b5f..0000000 --- a/0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch +++ /dev/null @@ -1,198 +0,0 @@ -From f31ce5e7d486c860d44cb103b672f81de9bc537c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 10 Oct 2018 20:22:13 +0100 -Subject: test-bdrv-drain: Test draining job source child and parent - -RH-Author: Kevin Wolf -Message-id: <20181010202213.7372-35-kwolf@redhat.com> -Patchwork-id: 82624 -O-Subject: [RHEL-8 qemu-kvm PATCH 44/44] test-bdrv-drain: Test draining job source child and parent -Bugzilla: 1637976 -RH-Acked-by: Max Reitz -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -For the block job drain test, don't only test draining the source and -the target node, but create a backing chain for the source -(source_backing <- source <- source_overlay) and test draining each of -the nodes in it. - -When using iothreads, the source node (and therefore the job) is in a -different AioContext than the drain, which happens from the main -thread. This way, the main thread waits in AIO_WAIT_WHILE() for the -iothread to make process and aio_wait_kick() is required to notify it. -The test validates that calling bdrv_wakeup() for a child or a parent -node will actually notify AIO_WAIT_WHILE() instead of letting it hang. - -Increase the sleep time a bit (to 1 ms) because the test case is racy -and with the shorter sleep, it didn't reproduce the bug it is supposed -to test for me under 'rr record -n'. - -This was because bdrv_drain_invoke_entry() (in the main thread) was only -called after the job had already reached the pause point, so we got a -bdrv_dec_in_flight() from the main thread and the additional -aio_wait_kick() when the job becomes idle (that we really wanted to test -here) wasn't even necessary any more to make progress. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Max Reitz -(cherry picked from commit d8b3afd597d54e496809b05ac39ac29a5799664f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/test-bdrv-drain.c | 77 ++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 69 insertions(+), 8 deletions(-) - -diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c -index 7e7ba9b..8641b54 100644 ---- a/tests/test-bdrv-drain.c -+++ b/tests/test-bdrv-drain.c -@@ -786,6 +786,7 @@ typedef struct TestBlockJob { - BlockJob common; - int run_ret; - int prepare_ret; -+ bool running; - bool should_complete; - } TestBlockJob; - -@@ -818,12 +819,17 @@ static int coroutine_fn test_job_run(Job *job, Error **errp) - { - TestBlockJob *s = container_of(job, TestBlockJob, common.job); - -+ /* We are running the actual job code past the pause point in -+ * job_co_entry(). */ -+ s->running = true; -+ - job_transition_to_ready(&s->common.job); - while (!s->should_complete) { - /* Avoid job_sleep_ns() because it marks the job as !busy. We want to - * emulate some actual activity (probably some I/O) here so that drain - * has to wait for this activity to stop. */ -- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); -+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); -+ - job_pause_point(&s->common.job); - } - -@@ -856,11 +862,19 @@ enum test_job_result { - TEST_JOB_FAIL_PREPARE, - }; - --static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, -- enum test_job_result result) -+enum test_job_drain_node { -+ TEST_JOB_DRAIN_SRC, -+ TEST_JOB_DRAIN_SRC_CHILD, -+ TEST_JOB_DRAIN_SRC_PARENT, -+}; -+ -+static void test_blockjob_common_drain_node(enum drain_type drain_type, -+ bool use_iothread, -+ enum test_job_result result, -+ enum test_job_drain_node drain_node) - { - BlockBackend *blk_src, *blk_target; -- BlockDriverState *src, *target; -+ BlockDriverState *src, *src_backing, *src_overlay, *target, *drain_bs; - BlockJob *job; - TestBlockJob *tjob; - IOThread *iothread = NULL; -@@ -869,8 +883,32 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - - src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, - &error_abort); -+ src_backing = bdrv_new_open_driver(&bdrv_test, "source-backing", -+ BDRV_O_RDWR, &error_abort); -+ src_overlay = bdrv_new_open_driver(&bdrv_test, "source-overlay", -+ BDRV_O_RDWR, &error_abort); -+ -+ bdrv_set_backing_hd(src_overlay, src, &error_abort); -+ bdrv_unref(src); -+ bdrv_set_backing_hd(src, src_backing, &error_abort); -+ bdrv_unref(src_backing); -+ - blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); -- blk_insert_bs(blk_src, src, &error_abort); -+ blk_insert_bs(blk_src, src_overlay, &error_abort); -+ -+ switch (drain_node) { -+ case TEST_JOB_DRAIN_SRC: -+ drain_bs = src; -+ break; -+ case TEST_JOB_DRAIN_SRC_CHILD: -+ drain_bs = src_backing; -+ break; -+ case TEST_JOB_DRAIN_SRC_PARENT: -+ drain_bs = src_overlay; -+ break; -+ default: -+ g_assert_not_reached(); -+ } - - if (use_iothread) { - iothread = iothread_new(); -@@ -906,11 +944,21 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - job_start(&job->job); - aio_context_release(ctx); - -+ if (use_iothread) { -+ /* job_co_entry() is run in the I/O thread, wait for the actual job -+ * code to start (we don't want to catch the job in the pause point in -+ * job_co_entry(). */ -+ while (!tjob->running) { -+ aio_poll(qemu_get_aio_context(), false); -+ } -+ } -+ - g_assert_cmpint(job->job.pause_count, ==, 0); - g_assert_false(job->job.paused); -+ g_assert_true(tjob->running); - g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ - -- do_drain_begin_unlocked(drain_type, src); -+ do_drain_begin_unlocked(drain_type, drain_bs); - - if (drain_type == BDRV_DRAIN_ALL) { - /* bdrv_drain_all() drains both src and target */ -@@ -921,7 +969,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - g_assert_true(job->job.paused); - g_assert_false(job->job.busy); /* The job is paused */ - -- do_drain_end_unlocked(drain_type, src); -+ do_drain_end_unlocked(drain_type, drain_bs); - - if (use_iothread) { - /* paused is reset in the I/O thread, wait for it */ -@@ -969,7 +1017,7 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - - blk_unref(blk_src); - blk_unref(blk_target); -- bdrv_unref(src); -+ bdrv_unref(src_overlay); - bdrv_unref(target); - - if (iothread) { -@@ -977,6 +1025,19 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - } - } - -+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, -+ enum test_job_result result) -+{ -+ test_blockjob_common_drain_node(drain_type, use_iothread, result, -+ TEST_JOB_DRAIN_SRC); -+ test_blockjob_common_drain_node(drain_type, use_iothread, result, -+ TEST_JOB_DRAIN_SRC_CHILD); -+ if (drain_type == BDRV_SUBTREE_DRAIN) { -+ test_blockjob_common_drain_node(drain_type, use_iothread, result, -+ TEST_JOB_DRAIN_SRC_PARENT); -+ } -+} -+ - static void test_blockjob_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS); --- -1.8.3.1 - diff --git a/0100-block-rbd-pull-out-qemu_rbd_convert_options.patch b/0100-block-rbd-pull-out-qemu_rbd_convert_options.patch deleted file mode 100644 index a88abfb..0000000 --- a/0100-block-rbd-pull-out-qemu_rbd_convert_options.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 5fcd80dc34d84739e75e6d1ec5e21ad73af14ff9 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 10 Oct 2018 20:30:12 +0100 -Subject: block/rbd: pull out qemu_rbd_convert_options - -RH-Author: John Snow -Message-id: <20181010203015.11719-2-jsnow@redhat.com> -Patchwork-id: 82627 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/4] block/rbd: pull out qemu_rbd_convert_options -Bugzilla: 1635585 -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Jeff Cody - -Code movement to pull the conversion from Qdict to BlockdevOptionsRbd -into a helper function. - -Reviewed-by: Eric Blake -Reviewed-by: John Snow -Signed-off-by: Jeff Cody -Message-id: 5b49a980f2cde6610ab1df41bb0277d00b5db893.1536704901.git.jcody@redhat.com -Signed-off-by: Jeff Cody -(cherry picked from commit f24b03b56cdb28d753b4ff9ae210d555f14cb0d8) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/rbd.c | 36 ++++++++++++++++++++++++------------ - 1 file changed, 24 insertions(+), 12 deletions(-) - -diff --git a/block/rbd.c b/block/rbd.c -index ca8e5bb..b199450 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -655,12 +655,34 @@ failed_opts: - return r; - } - -+static int qemu_rbd_convert_options(QDict *options, BlockdevOptionsRbd **opts, -+ Error **errp) -+{ -+ Visitor *v; -+ Error *local_err = NULL; -+ -+ /* Convert the remaining options into a QAPI object */ -+ v = qobject_input_visitor_new_flat_confused(options, errp); -+ if (!v) { -+ return -EINVAL; -+ } -+ -+ visit_type_BlockdevOptionsRbd(v, NULL, opts, &local_err); -+ visit_free(v); -+ -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ - static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, - Error **errp) - { - BDRVRBDState *s = bs->opaque; - BlockdevOptionsRbd *opts = NULL; -- Visitor *v; - const QDictEntry *e; - Error *local_err = NULL; - char *keypairs, *secretid; -@@ -676,19 +698,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, - qdict_del(options, "password-secret"); - } - -- /* Convert the remaining options into a QAPI object */ -- v = qobject_input_visitor_new_flat_confused(options, errp); -- if (!v) { -- r = -EINVAL; -- goto out; -- } -- -- visit_type_BlockdevOptionsRbd(v, NULL, &opts, &local_err); -- visit_free(v); -- -+ r = qemu_rbd_convert_options(options, &opts, &local_err); - if (local_err) { - error_propagate(errp, local_err); -- r = -EINVAL; - goto out; - } - --- -1.8.3.1 - diff --git a/0101-block-rbd-Attempt-to-parse-legacy-filenames.patch b/0101-block-rbd-Attempt-to-parse-legacy-filenames.patch deleted file mode 100644 index 24add45..0000000 --- a/0101-block-rbd-Attempt-to-parse-legacy-filenames.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 6198ce651b242298fa6f5cc7ba79eb168789899c Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 10 Oct 2018 20:30:13 +0100 -Subject: block/rbd: Attempt to parse legacy filenames - -RH-Author: John Snow -Message-id: <20181010203015.11719-3-jsnow@redhat.com> -Patchwork-id: 82629 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 2/4] block/rbd: Attempt to parse legacy filenames -Bugzilla: 1635585 -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Jeff Cody - -When we converted rbd to get rid of the older key/value-centric -encoding format, we broke compatibility with image files with backing -file strings encoded in the old format. - -This leaves a bit of an ugly conundrum, and a hacky solution. - -If the initial attempt to parse the "proper" options fails, it assumes -that we may have an older key/value encoded filename. Fall back to -attempting to parse the filename, and extract the required options from -it. If that fails, pass along the original error message. - -We do not support mixed modern usage alongside legacy keyvalue pair -usage. - -A deprecation warning has been added, although care should be taken -when actually deprecating since the impact is not limited to -commandline or qapi usage, but also opening existing images. - -Reviewed-by: Eric Blake -Signed-off-by: Jeff Cody -Message-id: 15b332e5432ad069441f7275a46080f465d789a0.1536704901.git.jcody@redhat.com -Signed-off-by: Jeff Cody -(cherry picked from commit 084d1d13bdb753d558b991996e7686c077bd6d80) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - block/rbd.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 52 insertions(+), 2 deletions(-) - -diff --git a/block/rbd.c b/block/rbd.c -index b199450..014c68d 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -678,6 +678,33 @@ static int qemu_rbd_convert_options(QDict *options, BlockdevOptionsRbd **opts, - return 0; - } - -+static int qemu_rbd_attempt_legacy_options(QDict *options, -+ BlockdevOptionsRbd **opts, -+ char **keypairs) -+{ -+ char *filename; -+ int r; -+ -+ filename = g_strdup(qdict_get_try_str(options, "filename")); -+ if (!filename) { -+ return -EINVAL; -+ } -+ qdict_del(options, "filename"); -+ -+ qemu_rbd_parse_filename(filename, options, NULL); -+ -+ /* keypairs freed by caller */ -+ *keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs")); -+ if (*keypairs) { -+ qdict_del(options, "=keyvalue-pairs"); -+ } -+ -+ r = qemu_rbd_convert_options(options, opts, NULL); -+ -+ g_free(filename); -+ return r; -+} -+ - static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, - Error **errp) - { -@@ -700,8 +727,31 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, - - r = qemu_rbd_convert_options(options, &opts, &local_err); - if (local_err) { -- error_propagate(errp, local_err); -- goto out; -+ /* If keypairs are present, that means some options are present in -+ * the modern option format. Don't attempt to parse legacy option -+ * formats, as we won't support mixed usage. */ -+ if (keypairs) { -+ error_propagate(errp, local_err); -+ goto out; -+ } -+ -+ /* If the initial attempt to convert and process the options failed, -+ * we may be attempting to open an image file that has the rbd options -+ * specified in the older format consisting of all key/value pairs -+ * encoded in the filename. Go ahead and attempt to parse the -+ * filename, and see if we can pull out the required options. */ -+ r = qemu_rbd_attempt_legacy_options(options, &opts, &keypairs); -+ if (r < 0) { -+ /* Propagate the original error, not the legacy parsing fallback -+ * error, as the latter was just a best-effort attempt. */ -+ error_propagate(errp, local_err); -+ goto out; -+ } -+ /* Take care whenever deciding to actually deprecate; once this ability -+ * is removed, we will not be able to open any images with legacy-styled -+ * backing image strings. */ -+ error_report("RBD options encoded in the filename as keyvalue pairs " -+ "is deprecated"); - } - - /* Remove the processed options from the QDict (the visitor processes --- -1.8.3.1 - diff --git a/0102-block-rbd-add-deprecation-documentation-for-filename.patch b/0102-block-rbd-add-deprecation-documentation-for-filename.patch deleted file mode 100644 index fdd94ea..0000000 --- a/0102-block-rbd-add-deprecation-documentation-for-filename.patch +++ /dev/null @@ -1,59 +0,0 @@ -From aed464e31c9f6d92aa67960dc1a0891461393305 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Tue, 11 Sep 2018 18:32:33 -0400 -Subject: block/rbd: add deprecation documentation for filename keyvalue pairs - -RH-Author: John Snow -Message-id: <20181010203015.11719-5-jsnow@redhat.com> -Patchwork-id: 82625 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 4/4] block/rbd: add deprecation docume -Bugzilla: 1635585 -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Jeff Cody - -Signed-off-by: Jeff Cody -Message-id: 647f5b5ab7efd8bf567a504c832b1d2d6f719b23.1536704901.git.jcody@re -Signed-off-by: Jeff Cody -(cherry picked from commit 3bebd37e04f972775b1ece1bdda95451bc9fb14c) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: John Snow - -Rebase notes (3.0.0): -- Used upstream version ---- - qemu-deprecated.texi | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi -index 9920a85..cff0e8b 100644 ---- a/qemu-deprecated.texi -+++ b/qemu-deprecated.texi -@@ -227,6 +227,21 @@ from old QEMU versions anymore. A newer machine type should be used instead. - In order to prevent QEMU from automatically opening an image's backing - chain, use ``"backing": null'' instead. - -+@subsubsection rbd keyvalue pair encoded filenames: "" (since 3.1.0) -+ -+Options for ``rbd'' should be specified according to its runtime options, -+like other block drivers. Legacy parsing of keyvalue pair encoded -+filenames is useful to open images with the old format for backing files; -+These image files should be updated to use the current format. -+ -+Example of legacy encoding: -+ -+@code{json:@{"file.driver":"rbd", "file.filename":"rbd:rbd/name"@}} -+ -+The above, converted to the current supported format: -+ -+@code{json:@{"file.driver":"rbd", "file.pool":"rbd", "file.image":"name"@}} -+ - @subsection vio-spapr-device device options - - @subsubsection "irq": "" (since 3.0.0) --- -1.8.3.1 - diff --git a/0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch b/0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch deleted file mode 100644 index 397826f..0000000 --- a/0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 76eb6df2dfd755e7cbda2eb07df464d25f9d73c5 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 10 Oct 2018 20:30:14 +0100 -Subject: block/rbd: add iotest for rbd legacy keyvalue filename parsing - -RH-Author: John Snow -Message-id: <20181010203015.11719-4-jsnow@redhat.com> -Patchwork-id: 82628 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 3/4] block/rbd: add iotest for rbd legacy keyvalue filename parsing -Bugzilla: 1635585 -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -From: Jeff Cody - -This is a small test that will check for the ability to parse -both legacy and modern options for rbd. - -The way the test is set up is for failure to occur, but without -having to wait to timeout on a non-existent rbd server. The error -messages in the success path show that the arguments were parsed. - -The failure behavior prior to the patch series that has this test, is -qemu-img complaining about mandatory options (e.g. 'pool') not being -provided. - -Reviewed-by: Eric Blake -Signed-off-by: Jeff Cody -Message-id: f830580e339b974a83ed4870d11adcdc17f49a47.1536704901.git.jcody@redhat.com -Signed-off-by: Jeff Cody -(cherry picked from commit 66e6a735e97450ac50fcaf40f78600c688534cae) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - tests/qemu-iotests/group: context (missing prior tests) -Signed-off-by: John Snow ---- - tests/qemu-iotests/231 | 62 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/231.out | 9 +++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 72 insertions(+) - create mode 100755 tests/qemu-iotests/231 - create mode 100644 tests/qemu-iotests/231.out - -diff --git a/tests/qemu-iotests/231 b/tests/qemu-iotests/231 -new file mode 100755 -index 0000000..3e28370 ---- /dev/null -+++ b/tests/qemu-iotests/231 -@@ -0,0 +1,62 @@ -+#!/bin/bash -+# -+# Test legacy and modern option parsing for rbd/ceph. This will not -+# actually connect to a ceph server, but rather looks for the appropriate -+# error message that indicates we parsed the options correctly. -+# -+# Copyright (C) 2018 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=jcody@redhat.com -+ -+seq=`basename $0` -+echo "QA output created by $seq" -+ -+here=`pwd` -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ rm "${BOGUS_CONF}" -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt generic -+_supported_proto rbd -+_supported_os Linux -+ -+BOGUS_CONF=${TEST_DIR}/ceph-$$.conf -+touch "${BOGUS_CONF}" -+ -+_filter_conf() -+{ -+ sed -e "s#$BOGUS_CONF#BOGUS_CONF#g" -+} -+ -+# We expect this to fail, with no monitor ip provided and a null conf file. Just want it -+# to fail in the right way. -+$QEMU_IMG info "json:{'file.driver':'rbd','file.filename':'rbd:rbd/bogus:conf=${BOGUS_CONF}'}" 2>&1 | _filter_conf -+$QEMU_IMG info "json:{'file.driver':'rbd','file.pool':'rbd','file.image':'bogus','file.conf':'${BOGUS_CONF}'}" 2>&1 | _filter_conf -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/231.out b/tests/qemu-iotests/231.out -new file mode 100644 -index 0000000..579ba11 ---- /dev/null -+++ b/tests/qemu-iotests/231.out -@@ -0,0 +1,9 @@ -+QA output created by 231 -+qemu-img: RBD options encoded in the filename as keyvalue pairs is deprecated. Future versions may cease to parse these options in the future. -+unable to get monitor info from DNS SRV with service name: ceph-mon -+no monitors specified to connect to. -+qemu-img: Could not open 'json:{'file.driver':'rbd','file.filename':'rbd:rbd/bogus:conf=BOGUS_CONF'}': error connecting: No such file or directory -+unable to get monitor info from DNS SRV with service name: ceph-mon -+no monitors specified to connect to. -+qemu-img: Could not open 'json:{'file.driver':'rbd','file.pool':'rbd','file.image':'bogus','file.conf':'BOGUS_CONF'}': error connecting: No such file or directory -+*** done -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 23ab4d3..cc1ca7e 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -226,3 +226,4 @@ - 226 auto quick - 227 auto quick - 229 auto quick -+231 auto quick --- -1.8.3.1 - diff --git a/0104-luks-Allow-share-rw-on.patch b/0104-luks-Allow-share-rw-on.patch deleted file mode 100644 index bdb3f47..0000000 --- a/0104-luks-Allow-share-rw-on.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 8a08519b748ec41c6f542f4ef9406647269db18c Mon Sep 17 00:00:00 2001 -From: Fam Zheng -Date: Fri, 28 Sep 2018 06:09:52 +0100 -Subject: luks: Allow share-rw=on -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Fam Zheng -Message-id: <20180928060952.8616-2-famz@redhat.com> -Patchwork-id: 82311 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/1] luks: Allow share-rw=on -Bugzilla: 1629701 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth - -Format drivers such as qcow2 don't allow sharing the same image between -two QEMU instances in order to prevent image corruptions, because of -metadata cache. LUKS driver don't modify metadata except for when -creating image, so it is safe to relax the permission. This makes -share-rw=on property work on virtual devices. - -Suggested-by: Daniel P. Berrangé -Signed-off-by: Fam Zheng -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Kevin Wolf -(cherry picked from commit 497da8236ab2663a8108858ba7ea59aac21c5fe6) -Signed-off-by: Fam Zheng -Signed-off-by: Danilo C. L. de Paula ---- - block/crypto.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/block/crypto.c b/block/crypto.c -index 146d81c..33ee01b 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -627,7 +627,9 @@ BlockDriver bdrv_crypto_luks = { - .bdrv_probe = block_crypto_probe_luks, - .bdrv_open = block_crypto_open_luks, - .bdrv_close = block_crypto_close, -- .bdrv_child_perm = bdrv_format_default_perms, -+ /* This driver doesn't modify LUKS metadata except when creating image. -+ * Allow share-rw=on as a special case. */ -+ .bdrv_child_perm = bdrv_filter_default_perms, - .bdrv_co_create = block_crypto_co_create_luks, - .bdrv_co_create_opts = block_crypto_co_create_opts_luks, - .bdrv_co_truncate = block_crypto_co_truncate, --- -1.8.3.1 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 75804fb..ef0e02b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -71,8 +71,8 @@ Provides: %1-rhel = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 3.0.0 -Release: 2%{?dist} +Version: 3.1.0 +Release: 0%{?dist}.next.candidate # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -86,7 +86,7 @@ ExclusiveArch: x86_64 %{power64} aarch64 s390x %define _smp_mflags %{nil} %endif -Source0: http://wiki.qemu.org/download/qemu-3.0.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-3.1.0.tar.xz # KSM control scripts Source4: ksm.service @@ -114,107 +114,23 @@ Source35: udev-kvm-check.c -Patch0004: 0004-Initial-redhat-build.patch -Patch0005: 0005-Enable-disable-devices-for-RHEL-7.patch -Patch0006: 0006-Machine-type-related-general-changes.patch -Patch0007: 0007-Add-aarch64-machine-types.patch -Patch0008: 0008-Add-ppc64-machine-types.patch -Patch0009: 0009-Add-s390x-machine-types.patch -Patch0010: 0010-Add-x86_64-machine-types.patch -Patch0011: 0011-Enable-make-check.patch -Patch0012: 0012-Use-kvm-by-default.patch -Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0014: 0014-Add-support-statement-to-help-output.patch -Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch -Patch0016: 0016-Add-support-for-simpletrace.patch -Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch -Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0020: 0020-linux-headers-asm-s390-kvm.h-header-sync.patch -Patch0021: 0021-s390x-Enable-KVM-huge-page-backing-support.patch -Patch0022: 0022-s390x-kvm-add-etoken-facility.patch -Patch0023: 0023-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch -Patch0024: 0024-i386-Fix-arch_query_cpu_model_expansion-leak.patch -Patch0025: 0025-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch -Patch0026: 0026-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch -Patch0027: 0027-migration-postcopy-Clear-have_listen_thread.patch -Patch0028: 0028-migration-cleanup-in-error-paths-in-loadvm.patch -Patch0029: 0029-jobs-change-start-callback-to-run-callback.patch -Patch0030: 0030-jobs-canonize-Error-object.patch -Patch0031: 0031-jobs-add-exit-shim.patch -Patch0032: 0032-block-commit-utilize-job_exit-shim.patch -Patch0033: 0033-block-mirror-utilize-job_exit-shim.patch -Patch0034: 0034-jobs-utilize-job_exit-shim.patch -Patch0035: 0035-block-backup-make-function-variables-consistently-na.patch -Patch0036: 0036-jobs-remove-ret-argument-to-job_completed-privatize-.patch -Patch0037: 0037-jobs-remove-job_defer_to_main_loop.patch -Patch0038: 0038-block-commit-add-block-job-creation-flags.patch -Patch0039: 0039-block-mirror-add-block-job-creation-flags.patch -Patch0040: 0040-block-stream-add-block-job-creation-flags.patch -Patch0041: 0041-block-commit-refactor-commit-to-use-job-callbacks.patch -Patch0042: 0042-block-mirror-don-t-install-backing-chain-on-abort.patch -Patch0043: 0043-block-mirror-conservative-mirror_exit-refactor.patch -Patch0044: 0044-block-stream-refactor-stream-to-use-job-callbacks.patch -Patch0045: 0045-tests-blockjob-replace-Blockjob-with-Job.patch -Patch0046: 0046-tests-test-blockjob-remove-exit-callback.patch -Patch0047: 0047-tests-test-blockjob-txn-move-.exit-to-.clean.patch -Patch0048: 0048-jobs-remove-.exit-callback.patch -Patch0049: 0049-qapi-block-commit-expose-new-job-properties.patch -Patch0050: 0050-qapi-block-mirror-expose-new-job-properties.patch -Patch0051: 0051-qapi-block-stream-expose-new-job-properties.patch -Patch0052: 0052-block-backup-qapi-documentation-fixup.patch -Patch0053: 0053-blockdev-document-transactional-shortcomings.patch -Patch0054: 0054-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch -Patch0055: 0055-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch -Patch0056: 0056-seccomp-set-the-seccomp-filter-to-all-threads.patch -Patch0057: 0057-memory-cleanup-side-effects-of-memory_region_init_fo.patch -Patch0058: 0058-mirror-Fail-gracefully-for-source-target.patch -Patch0059: 0059-commit-Add-top-node-base-node-options.patch -Patch0060: 0060-qemu-iotests-Test-commit-with-top-node-base-node.patch -Patch0061: 0061-block-for-jobs-do-not-clear-user_paused-until-after-.patch -Patch0062: 0062-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch -Patch0063: 0063-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch -Patch0064: 0064-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch -Patch0065: 0065-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch -Patch0066: 0066-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch -Patch0067: 0067-aio-posix-compute-timeout-before-polling.patch -Patch0068: 0068-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch -Patch0069: 0069-linux-headers-update.patch -Patch0070: 0070-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch -Patch0071: 0071-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch -Patch0072: 0072-s390x-ap-base-Adjunct-Processor-AP-object-model.patch -Patch0073: 0073-s390x-vfio-ap-Introduce-VFIO-AP-device.patch -Patch0074: 0074-s390-doc-detailed-specifications-for-AP-virtualizati.patch -Patch0075: 0075-vnc-call-sasl_server_init-only-when-required.patch -Patch0076: 0076-nbd-server-fix-NBD_CMD_CACHE.patch -Patch0077: 0077-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch -Patch0078: 0078-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch -Patch0079: 0079-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch -Patch0080: 0080-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch -Patch0081: 0081-job-Fix-missing-locking-due-to-mismerge.patch -Patch0082: 0082-blockjob-Wake-up-BDS-when-job-becomes-idle.patch -Patch0083: 0083-aio-wait-Increase-num_waiters-even-in-home-thread.patch -Patch0084: 0084-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch -Patch0085: 0085-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch -Patch0086: 0086-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch -Patch0087: 0087-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch -Patch0088: 0088-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch -Patch0089: 0089-block-backend-Add-.drained_poll-callback.patch -Patch0090: 0090-block-backend-Fix-potential-double-blk_delete.patch -Patch0091: 0091-block-backend-Decrease-in_flight-only-after-callback.patch -Patch0092: 0092-blockjob-Lie-better-in-child_job_drained_poll.patch -Patch0093: 0093-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch -Patch0094: 0094-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch -Patch0095: 0095-job-Avoid-deadlocks-in-job_completed_txn_abort.patch -Patch0096: 0096-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch -Patch0097: 0097-test-bdrv-drain-Fix-outdated-comments.patch -Patch0098: 0098-block-Use-a-single-global-AioWait.patch -Patch0099: 0099-test-bdrv-drain-Test-draining-job-source-child-and-p.patch -Patch0100: 0100-block-rbd-pull-out-qemu_rbd_convert_options.patch -Patch0101: 0101-block-rbd-Attempt-to-parse-legacy-filenames.patch -Patch0102: 0102-block-rbd-add-deprecation-documentation-for-filename.patch -Patch0103: 0103-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch -Patch0104: 0104-luks-Allow-share-rw-on.patch +Patch0001: 0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch +Patch0005: 0005-Initial-redhat-build.patch +Patch0006: 0006-Enable-disable-devices-for-RHEL.patch +Patch0007: 0007-Machine-type-related-general-changes.patch +Patch0008: 0008-Add-aarch64-machine-types.patch +Patch0009: 0009-Add-ppc64-machine-types.patch +Patch0010: 0010-Add-s390x-machine-types.patch +Patch0011: 0011-Add-x86_64-machine-types.patch +Patch0012: 0012-Enable-make-check.patch +Patch0013: 0013-Use-kvm-by-default.patch +Patch0014: 0014-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0015: 0015-Add-support-statement-to-help-output.patch +Patch0016: 0016-globally-limit-the-maximum-number-of-CPUs.patch +Patch0017: 0017-Add-support-for-simpletrace.patch +Patch0018: 0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0019: 0019-usb-xhci-Fix-PCI-capability-order.patch +Patch0020: 0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -483,7 +399,7 @@ the Secure Shell (SSH) protocol. %prep %setup -q -n qemu-%{version} -%patch0004 -p1 +%patch0001 -p1 %patch0005 -p1 %patch0006 -p1 %patch0007 -p1 @@ -500,90 +416,6 @@ the Secure Shell (SSH) protocol. %patch0018 -p1 %patch0019 -p1 %patch0020 -p1 -%patch0021 -p1 -%patch0022 -p1 -%patch0023 -p1 -%patch0024 -p1 -%patch0025 -p1 -%patch0026 -p1 -%patch0027 -p1 -%patch0028 -p1 -%patch0029 -p1 -%patch0030 -p1 -%patch0031 -p1 -%patch0032 -p1 -%patch0033 -p1 -%patch0034 -p1 -%patch0035 -p1 -%patch0036 -p1 -%patch0037 -p1 -%patch0038 -p1 -%patch0039 -p1 -%patch0040 -p1 -%patch0041 -p1 -%patch0042 -p1 -%patch0043 -p1 -%patch0044 -p1 -%patch0045 -p1 -%patch0046 -p1 -%patch0047 -p1 -%patch0048 -p1 -%patch0049 -p1 -%patch0050 -p1 -%patch0051 -p1 -%patch0052 -p1 -%patch0053 -p1 -%patch0054 -p1 -%patch0055 -p1 -%patch0056 -p1 -%patch0057 -p1 -%patch0058 -p1 -%patch0059 -p1 -%patch0060 -p1 -%patch0061 -p1 -%patch0062 -p1 -%patch0063 -p1 -%patch0064 -p1 -%patch0065 -p1 -%patch0066 -p1 -%patch0067 -p1 -%patch0068 -p1 -%patch0069 -p1 -%patch0070 -p1 -%patch0071 -p1 -%patch0072 -p1 -%patch0073 -p1 -%patch0074 -p1 -%patch0075 -p1 -%patch0076 -p1 -%patch0077 -p1 -%patch0078 -p1 -%patch0079 -p1 -%patch0080 -p1 -%patch0081 -p1 -%patch0082 -p1 -%patch0083 -p1 -%patch0084 -p1 -%patch0085 -p1 -%patch0086 -p1 -%patch0087 -p1 -%patch0088 -p1 -%patch0089 -p1 -%patch0090 -p1 -%patch0091 -p1 -%patch0092 -p1 -%patch0093 -p1 -%patch0094 -p1 -%patch0095 -p1 -%patch0096 -p1 -%patch0097 -p1 -%patch0098 -p1 -%patch0099 -p1 -%patch0100 -p1 -%patch0101 -p1 -%patch0102 -p1 -%patch0103 -p1 -%patch0104 -p1 %build %global buildarch %{kvm_target}-softmmu @@ -1056,8 +888,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %doc %{qemudocdir}/qemu-qmp-ref.html %doc %{qemudocdir}/qemu-qmp-ref.txt %{_mandir}/man7/qemu-qmp-ref.7* +%{_mandir}/man7/qemu-cpu-models.7* %{_bindir}/qemu-keymap %{_bindir}/qemu-pr-helper +%{_bindir}/qemu-edid %{_unitdir}/qemu-pr-helper.service %{_unitdir}/qemu-pr-helper.socket %{_mandir}/man7/qemu-ga-ref.7* @@ -1170,9 +1004,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Thu Nov 08 2018 Danilo Cesar Lemes de Paula - 3.0.0-2.el8 -- Mass import 2.12.0 fixes into 3.0 +* Mon Nov 05 2018 Danilo Cesar Lemes de Paula - 3.1.0-0.el8 +- Rebase qemu-kvm to qemu 3.1.0 RC2 +* Mon Nov 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-42.el8 - kvm-luks-Allow-share-rw-on.patch [bz#1629701] - kvm-redhat-reenable-gluster-support.patch [bz#1599340] - kvm-redhat-bump-libusb-requirement.patch [bz#1627970] @@ -1183,6 +1018,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1629701 ("share-rw=on" does not work for luks format image - Fast Train) +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-41.el8 - kvm-block-rbd-pull-out-qemu_rbd_convert_options.patch [bz#1635585] - kvm-block-rbd-Attempt-to-parse-legacy-filenames.patch [bz#1635585] - kvm-block-rbd-add-deprecation-documentation-for-filename.patch [bz#1635585] @@ -1190,6 +1026,8 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1635585 (rbd json format of 7.6 is incompatible with 7.5) +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-40.el8 + - kvm-vnc-call-sasl_server_init-only-when-required.patch [bz#1609327] - kvm-nbd-server-fix-NBD_CMD_CACHE.patch [bz#1636142] - kvm-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch [bz#1636142] @@ -1248,6 +1086,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1639374 (qemu-img map 'Aborted (core dumped)' when specifying a plain file) +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - - kvm-linux-headers-update.patch [bz#1508142] - kvm-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch [bz#1508142] - kvm-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch [bz#1508142] @@ -1257,6 +1096,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1508142 ([IBM 8.0 FEAT] KVM: Guest-dedicated Crypto Adapters - qemu part) +* Mon Oct 15 2018 Danilo Cesar Lemes de Paula - 2.12.0-38.el8 - kvm-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch [bz#1609235] - kvm-add-udev-kvm-check.patch [bz#1552663] - kvm-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch [bz#1623085] @@ -1287,12 +1127,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1633928 (CVE-2018-3639 qemu-kvm: hw: cpu: speculative store bypass [rhel-8.0]) +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 2.12.0-37.el8 - kvm-block-for-jobs-do-not-clear-user_paused-until-after-.patch [bz#1635583] - kvm-iotests-Add-failure-matching-to-common.qemu.patch [bz#1635583] - kvm-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch [bz#1635583] - Resolves: bz#1635583 (Quitting VM causes qemu core dump once the block mirror job paused for no enough target space) +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-36 - kvm-check-Only-test-ivshm-when-it-is-compiled-in.patch [bz#1621817] - kvm-Disable-ivshmem.patch [bz#1621817] - kvm-mirror-Fail-gracefully-for-source-target.patch [bz#1637963] @@ -1305,10 +1147,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1637970 (allow using node-names with block-commit) +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-35.el8 - kvm-redhat-make-the-plugins-executable.patch [bz#1638304] - Resolves: bz#1638304 (the driver packages lack all the library Requires) +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-34.el8 - kvm-seccomp-allow-sched_setscheduler-with-SCHED_IDLE-pol.patch [bz#1618356] - kvm-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch [bz#1618356] - kvm-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch [bz#1618356] @@ -1320,9 +1164,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1618356 (qemu-kvm: Qemu: seccomp: blacklist is not applied to all threads [rhel-8]) -* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 3.0.0-1.el8 -- Rebase on qemu-kvm 3.0.0 - * Fri Oct 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-33.el8 - kvm-migration-postcopy-Clear-have_listen_thread.patch [bz#1608765] - kvm-migration-cleanup-in-error-paths-in-loadvm.patch [bz#1608765] diff --git a/sources b/sources index c7b3cb2..5fb3730 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-3.0.0.tar.xz) = a764302f50b9aca4134bbbc1f361b98e71240cdc7b25600dfe733bf4cf17bd86000bd28357697b08f3b656899dceb9e459350b8d55557817444ed5d7fa380a5a +SHA512 (qemu-3.1.0.tar.xz) = 610ec222853ebeea764a2c972418645dea7917e796a0f540d6c4a0f588244e6fdf6f5e6c214f161eb11fbc497f136a43e000fc85554ebb2cf8f10c846a186437 From fe3d84fd83d522d4a825a6dfc6a60d8443fb069e Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 13 Dec 2018 10:54:47 -0200 Subject: [PATCH 005/195] Rebase to qemu-kvm-3.1.0 --- ...-Fix-compilation-issue-when-disable-.patch | 128 --------- ...d.patch => 0004-Initial-redhat-build.patch | 32 ++- ...0005-Enable-disable-devices-for-RHEL.patch | 268 +++++++++++++----- ...Machine-type-related-general-changes.patch | 20 +- ...ch => 0007-Add-aarch64-machine-types.patch | 18 +- ...atch => 0008-Add-ppc64-machine-types.patch | 2 +- ...atch => 0009-Add-s390x-machine-types.patch | 2 +- ...tch => 0010-Add-x86_64-machine-types.patch | 73 +---- ...heck.patch => 0011-Enable-make-check.patch | 48 +--- ...ult.patch => 0012-Use-kvm-by-default.patch | 2 +- ...mber-of-devices-that-can-be-assigned.patch | 4 +- ...Add-support-statement-to-help-output.patch | 4 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 10 +- ... => 0016-Add-support-for-simpletrace.patch | 2 +- ...documentation-instead-of-qemu-system.patch | 157 +--------- ...18-usb-xhci-Fix-PCI-capability-order.patch | 2 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 2 +- qemu-kvm.spec | 241 ++++++++++------ sources | 1 - 19 files changed, 427 insertions(+), 589 deletions(-) delete mode 100644 0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch rename 0005-Initial-redhat-build.patch => 0004-Initial-redhat-build.patch (93%) rename 0006-Enable-disable-devices-for-RHEL.patch => 0005-Enable-disable-devices-for-RHEL.patch (82%) rename 0007-Machine-type-related-general-changes.patch => 0006-Machine-type-related-general-changes.patch (96%) rename 0008-Add-aarch64-machine-types.patch => 0007-Add-aarch64-machine-types.patch (94%) rename 0009-Add-ppc64-machine-types.patch => 0008-Add-ppc64-machine-types.patch (99%) rename 0010-Add-s390x-machine-types.patch => 0009-Add-s390x-machine-types.patch (98%) rename 0011-Add-x86_64-machine-types.patch => 0010-Add-x86_64-machine-types.patch (94%) rename 0012-Enable-make-check.patch => 0011-Enable-make-check.patch (79%) rename 0013-Use-kvm-by-default.patch => 0012-Use-kvm-by-default.patch (94%) rename 0014-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch (96%) rename 0015-Add-support-statement-to-help-output.patch => 0014-Add-support-statement-to-help-output.patch (94%) rename 0016-globally-limit-the-maximum-number-of-CPUs.patch => 0015-globally-limit-the-maximum-number-of-CPUs.patch (94%) rename 0017-Add-support-for-simpletrace.patch => 0016-Add-support-for-simpletrace.patch (98%) rename 0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch => 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch (85%) rename 0019-usb-xhci-Fix-PCI-capability-order.patch => 0018-usb-xhci-Fix-PCI-capability-order.patch (98%) rename 0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch => 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch (97%) delete mode 100644 sources diff --git a/0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch b/0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch deleted file mode 100644 index b2ca136..0000000 --- a/0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 5be2fefbc0999ada944c36a865b154d398e16e27 Mon Sep 17 00:00:00 2001 -From: Zhang Chen -Date: Thu, 1 Nov 2018 10:12:26 +0800 -Subject: migration/colo.c: Fix compilation issue when disable replication - -This compilation issue will occur when user use --disable-replication -to config Qemu. - -Reported-by: Thomas Huth -Signed-off-by: Zhang Chen ---- - migration/colo.c | 28 +++++++++++++++++++++++++++- - 1 file changed, 27 insertions(+), 1 deletion(-) - -diff --git a/migration/colo.c b/migration/colo.c -index 956ac23..fcff04c 100644 ---- a/migration/colo.c -+++ b/migration/colo.c -@@ -59,6 +59,8 @@ static bool colo_runstate_is_stopped(void) - - static void secondary_vm_do_failover(void) - { -+/* COLO needs enable block-replication */ -+#ifdef CONFIG_REPLICATION - int old_state; - MigrationIncomingState *mis = migration_incoming_get_current(); - Error *local_err = NULL; -@@ -121,10 +123,14 @@ static void secondary_vm_do_failover(void) - if (mis->migration_incoming_co) { - qemu_coroutine_enter(mis->migration_incoming_co); - } -+#else -+ abort(); -+#endif - } - - static void primary_vm_do_failover(void) - { -+#ifdef CONFIG_REPLICATION - MigrationState *s = migrate_get_current(); - int old_state; - Error *local_err = NULL; -@@ -165,6 +171,9 @@ static void primary_vm_do_failover(void) - - /* Notify COLO thread that failover work is finished */ - qemu_sem_post(&s->colo_exit_sem); -+#else -+ abort(); -+#endif - } - - COLOMode get_colo_mode(void) -@@ -415,11 +424,16 @@ static int colo_do_checkpoint_transaction(MigrationState *s, - /* Disable block migration */ - migrate_set_block_enabled(false, &local_err); - qemu_mutex_lock_iothread(); -+ -+#ifdef CONFIG_REPLICATION - replication_do_checkpoint_all(&local_err); - if (local_err) { - qemu_mutex_unlock_iothread(); - goto out; - } -+#else -+ abort(); -+#endif - - colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); - if (local_err) { -@@ -523,11 +537,15 @@ static void colo_process_checkpoint(MigrationState *s) - object_unref(OBJECT(bioc)); - - qemu_mutex_lock_iothread(); -+#ifdef CONFIG_REPLICATION - replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); - if (local_err) { - qemu_mutex_unlock_iothread(); - goto out; - } -+#else -+ abort(); -+#endif - - vm_start(); - qemu_mutex_unlock_iothread(); -@@ -690,11 +708,15 @@ void *colo_process_incoming_thread(void *opaque) - object_unref(OBJECT(bioc)); - - qemu_mutex_lock_iothread(); -+#ifdef CONFIG_REPLICATION - replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); - if (local_err) { - qemu_mutex_unlock_iothread(); - goto out; - } -+#else -+ abort(); -+#endif - vm_start(); - trace_colo_vm_state_change("stop", "run"); - qemu_mutex_unlock_iothread(); -@@ -785,18 +807,22 @@ void *colo_process_incoming_thread(void *opaque) - goto out; - } - -+#ifdef CONFIG_REPLICATION - replication_get_error_all(&local_err); - if (local_err) { - qemu_mutex_unlock_iothread(); - goto out; - } -+ - /* discard colo disk buffer */ - replication_do_checkpoint_all(&local_err); - if (local_err) { - qemu_mutex_unlock_iothread(); - goto out; - } -- -+#else -+ abort(); -+#endif - /* Notify all filters of all NIC to do checkpoint */ - colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); - --- -1.8.3.1 - diff --git a/0005-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch similarity index 93% rename from 0005-Initial-redhat-build.patch rename to 0004-Initial-redhat-build.patch index 2e1f16c..38ad215 100644 --- a/0005-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From f0cd0ed26f3a3ae0610fad93c9dde26b54910abb Mon Sep 17 00:00:00 2001 +From 8ff84e7f181aa0f64b3b6178d9ac808c61f5bec8 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -13,18 +13,26 @@ several issues are fixed in QEMU tree: - Use "/share/qemu-kvm" as SHARE_SUFFIX - We reconfigured our share to qemu-kvm to be consistent with used name -This rebase includes changes up to qemu-kvm-2.12.0-42.el8 +This rebase includes changes up to qemu-kvm-2.12.0-47.el8 + +Rebase notes (3.1.0): +- added new configure options + +Merged patches (3.1.0): +- 01f0c9f RHEL8: Add disable configure options to qemu spec file +- Spec file cleanups --- - Makefile | 3 +- - block/Makefile.objs | 2 +- - block/vxhs.c | 119 ++- - configure | 33 +- - os-posix.c | 2 +- - redhat/Makefile | 82 ++ - redhat/Makefile.common | 49 ++ - redhat/qemu-kvm.spec.template | 1723 +++++++++++++++++++++++++++++++++++++++++ - ui/vnc.c | 2 +- - 9 files changed, 1972 insertions(+), 43 deletions(-) + Makefile | 3 +- + block/Makefile.objs | 2 +- + block/vxhs.c | 119 ++- + configure | 33 +- + os-posix.c | 2 +- + redhat/Makefile | 82 ++ + redhat/Makefile.common | 49 + + redhat/qemu-kvm.spec.template | 1813 +++++++++++++++++++++++++++++++++++++ + redhat/scripts/process-patches.sh | 7 +- + ui/vnc.c | 2 +- + 10 files changed, 2064 insertions(+), 48 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/qemu-kvm.spec.template diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch similarity index 82% rename from 0006-Enable-disable-devices-for-RHEL.patch rename to 0005-Enable-disable-devices-for-RHEL.patch index 6a367f2..246e148 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 38eba79aaa865ffa3e85bfa56e644e0846731744 Mon Sep 17 00:00:00 2001 +From 0533a6ee98fedfad9ca1466f5e6f5576169ed808 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -12,22 +12,33 @@ Rebase notes (qemu 3.1.0) - new hyperv.mak in default configs - Move changes from x86_64-softmmu.mak to i386-softmmu.mak - Added CONFIG_VIRTIO_MMIO to aarch64-softmmu.mak +- Removed config_vga_isa.c changes as no longer needed +- Removed new devices + +Merged patches (qemu 3.1.0): +- d51e082 Re-enable CONFIG_HYPERV_TESTDEV +- 4b889f3 Declare cirrus-vga as deprecated +- b579d32 Do not build bluetooth support +- 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 +- 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 --- - default-configs/aarch64-softmmu.mak | 40 +++++++++++++++++++++++++++---------- - default-configs/hyperv.mak | 2 +- - default-configs/i386-softmmu.mak | 26 ++++++++++++------------ - default-configs/pci.mak | 38 +++++++++++++++++------------------ - default-configs/ppc64-softmmu.mak | 28 +++++++++++++++++++------- + Makefile.objs | 4 ++-- + default-configs/aarch64-softmmu.mak | 40 ++++++++++++++++++++++++--------- + default-configs/i386-softmmu.mak | 26 +++++++++++----------- + default-configs/pci.mak | 44 ++++++++++++++++++------------------- + default-configs/ppc64-softmmu.mak | 30 ++++++++++++++++++------- default-configs/s390x-softmmu.mak | 5 +++-- - default-configs/sound.mak | 8 ++++---- - default-configs/usb.mak | 14 ++++++------- - default-configs/virtio.mak | 5 ++--- + default-configs/sound.mak | 8 +++---- + default-configs/usb.mak | 14 ++++++------ + default-configs/virtio.mak | 6 ++--- hw/acpi/ich9.c | 4 ++-- hw/arm/Makefile.objs | 2 +- hw/block/fdc.c | 1 + + hw/bt/Makefile.objs | 4 ++-- hw/char/serial-pci.c | 4 ++++ - hw/core/Makefile.objs | 10 ++++++---- - hw/display/cirrus_vga_isa.c | 2 ++ + hw/core/Makefile.objs | 10 +++++---- + hw/display/Makefile.objs | 5 +++-- + hw/display/cirrus_vga.c | 3 +++ hw/i386/pc.c | 2 ++ hw/ide/piix.c | 5 ++++- hw/ide/via.c | 2 ++ @@ -38,24 +49,39 @@ Rebase notes (qemu 3.1.0) hw/net/e1000.c | 2 ++ hw/pci-host/piix.c | 4 ++++ hw/ppc/spapr_cpu_core.c | 2 ++ - hw/rdma/Makefile.objs | 3 ++- hw/usb/ccid-card-emulated.c | 2 ++ - hw/vfio/Makefile.objs | 3 --- + hw/vfio/Makefile.objs | 2 +- hw/vfio/pci-quirks.c | 5 +++++ - hw/virtio/virtio-pci.c | 8 ++++---- - qemu-options.hx | 5 ----- + hw/vfio/pci.c | 5 +++++ + hw/virtio/virtio-pci.c | 8 +++---- + qemu-options.hx | 7 ++---- redhat/qemu-kvm.spec.template | 2 +- stubs/Makefile.objs | 1 + - stubs/ide-isa.c | 13 ++++++++++++ + stubs/ide-isa.c | 13 +++++++++++ target/arm/cpu.c | 4 +++- - target/i386/cpu.c | 35 ++++++++++++++++++++++++-------- - target/ppc/cpu-models.c | 12 +++++++++++ + target/i386/cpu.c | 35 ++++++++++++++++++++++------- + target/ppc/cpu-models.c | 12 ++++++++++ target/s390x/cpu_models.c | 3 +++ - target/s390x/kvm.c | 8 ++++++++ - vl.c | 2 +- - 40 files changed, 229 insertions(+), 100 deletions(-) + target/s390x/kvm.c | 8 +++++++ + vl.c | 8 ++++++- + 42 files changed, 253 insertions(+), 106 deletions(-) create mode 100644 stubs/ide-isa.c +diff --git a/Makefile.objs b/Makefile.objs +index 1e1ff38..26d578e 100644 +--- a/Makefile.objs ++++ b/Makefile.objs +@@ -115,8 +115,8 @@ common-obj-y += replay/ + + common-obj-y += ui/ + common-obj-m += ui/ +-common-obj-y += bt-host.o bt-vhci.o +-bt-host.o-cflags := $(BLUEZ_CFLAGS) ++#common-obj-y += bt-host.o bt-vhci.o ++#bt-host.o-cflags := $(BLUEZ_CFLAGS) + + common-obj-y += dma-helpers.o + common-obj-y += vl.o diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak index 4ea9add..221e266 100644 --- a/default-configs/aarch64-softmmu.mak @@ -103,14 +129,6 @@ index 4ea9add..221e266 100644 +CONFIG_USB=y +CONFIG_I2C=y +CONFIG_FW_CFG_DMA=y -diff --git a/default-configs/hyperv.mak b/default-configs/hyperv.mak -index 5d0d9fd..fce5d91 100644 ---- a/default-configs/hyperv.mak -+++ b/default-configs/hyperv.mak -@@ -1,2 +1,2 @@ - CONFIG_HYPERV=$(CONFIG_KVM) --CONFIG_HYPERV_TESTDEV=y -+#CONFIG_HYPERV_TESTDEV=y diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 64c998c..faea212 100644 --- a/default-configs/i386-softmmu.mak @@ -167,7 +185,7 @@ index 64c998c..faea212 100644 -CONFIG_AMD_IOMMU=y +#CONFIG_AMD_IOMMU=y diff --git a/default-configs/pci.mak b/default-configs/pci.mak -index 6c7be12..292b3f2 100644 +index 6c7be12..5929b5d 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -4,22 +4,22 @@ CONFIG_ISA_BUS=y @@ -214,10 +232,12 @@ index 6c7be12..292b3f2 100644 CONFIG_SERIAL=y CONFIG_SERIAL_ISA=y CONFIG_SERIAL_PCI=y - CONFIG_CAN_BUS=y - CONFIG_CAN_SJA1000=y +-CONFIG_CAN_BUS=y +-CONFIG_CAN_SJA1000=y -CONFIG_CAN_PCI=y -CONFIG_IPACK=y ++#CONFIG_CAN_BUS=y ++#CONFIG_CAN_SJA1000=y +#CONFIG_CAN_PCI=y +#CONFIG_IPACK=y CONFIG_WDT_IB6300ESB=y @@ -231,13 +251,14 @@ index 6c7be12..292b3f2 100644 CONFIG_EDU=y CONFIG_VGA=y CONFIG_VGA_PCI=y - CONFIG_BOCHS_DISPLAY=y +-CONFIG_BOCHS_DISPLAY=y -CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) -CONFIG_ROCKER=y ++#CONFIG_BOCHS_DISPLAY=y +#CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) +#CONFIG_ROCKER=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index aec2855..a492986 100644 +index aec2855..242d277 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -1,14 +1,28 @@ @@ -260,15 +281,16 @@ index aec2855..a492986 100644 +CONFIG_VGA=y +CONFIG_VGA_PCI=y +CONFIG_SERIAL=y -+CONFIG_I2C=y ++#CONFIG_I2C=y # For PowerNV -CONFIG_POWERNV=y -+#CONFIG_POWERNV=y - CONFIG_IPMI=y +-CONFIG_IPMI=y -CONFIG_IPMI_LOCAL=y -CONFIG_IPMI_EXTERN=y -CONFIG_ISA_IPMI_BT=y ++#CONFIG_POWERNV=y ++#CONFIG_IPMI=y +#CONFIG_IPMI_LOCAL=y +#CONFIG_IPMI_EXTERN=y +#CONFIG_ISA_IPMI_BT=y @@ -336,10 +358,10 @@ index e42cfea..cef6c0b 100644 +#CONFIG_USB_NETWORK=y +#CONFIG_USB_BLUETOOTH=y diff --git a/default-configs/virtio.mak b/default-configs/virtio.mak -index 1304849..6330e6b 100644 +index 1304849..b4f4743 100644 --- a/default-configs/virtio.mak +++ b/default-configs/virtio.mak -@@ -1,10 +1,9 @@ +@@ -1,10 +1,10 @@ -CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) -CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) +#CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) @@ -349,6 +371,7 @@ index 1304849..6330e6b 100644 CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CRYPTO=y ++#CONFIG_VIRTIO_CRYPTO=y CONFIG_VIRTIO_GPU=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_NET=y @@ -392,6 +415,16 @@ index 6f19f12..56b7aeb 100644 } static const TypeInfo floppy_drive_info = { +diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs +index 867a7d2..e678e9e 100644 +--- a/hw/bt/Makefile.objs ++++ b/hw/bt/Makefile.objs +@@ -1,3 +1,3 @@ +-common-obj-y += core.o l2cap.o sdp.o hci.o hid.o +-common-obj-y += hci-csr.o ++#common-obj-y += core.o l2cap.o sdp.o hci.o hid.o ++#common-obj-y += hci-csr.o + diff --git a/hw/char/serial-pci.c b/hw/char/serial-pci.c index cb0d04c..d426982 100644 --- a/hw/char/serial-pci.c @@ -434,19 +467,35 @@ index a799c83..1c7ba0b 100644 -common-obj-$(CONFIG_SOFTMMU) += generic-loader.o +#common-obj-$(CONFIG_SOFTMMU) += generic-loader.o common-obj-$(CONFIG_SOFTMMU) += null-machine.o -diff --git a/hw/display/cirrus_vga_isa.c b/hw/display/cirrus_vga_isa.c -index fa10b74..1cb607d 100644 ---- a/hw/display/cirrus_vga_isa.c -+++ b/hw/display/cirrus_vga_isa.c -@@ -81,6 +81,8 @@ static void isa_cirrus_vga_class_init(ObjectClass *klass, void *data) - dc->realize = isa_cirrus_vga_realizefn; - dc->props = isa_cirrus_vga_properties; - set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); -+ /* Disabled for Red Hat Enterprise Linux: */ -+ dc->user_creatable = false; - } +diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs +index 97acd5b..e39b87c 100644 +--- a/hw/display/Makefile.objs ++++ b/hw/display/Makefile.objs +@@ -1,7 +1,8 @@ + common-obj-y += edid-generate.o - static const TypeInfo isa_cirrus_vga_info = { +-common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o +-common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o ++# Disabled for Red Hat Enterprise Linux ++#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o ++#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o + + common-obj-$(CONFIG_ADS7846) += ads7846.o + common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index a0e7146..058a8e0 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -2967,6 +2967,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + int16_t device_id = pc->device_id; + ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); ++ + /* follow real hardware, cirrus card emulated has 4 MB video memory. + Also accept 8 MB/16 MB for backward compatibility. */ + if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f095725..567439e 100644 --- a/hw/i386/pc.c @@ -637,18 +686,6 @@ index 2398ce6..63a7bb6 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), -diff --git a/hw/rdma/Makefile.objs b/hw/rdma/Makefile.objs -index bd36cbf..e87e7e5 100644 ---- a/hw/rdma/Makefile.objs -+++ b/hw/rdma/Makefile.objs -@@ -1,5 +1,6 @@ - ifeq ($(CONFIG_PVRDMA),y) - obj-$(CONFIG_PCI) += rdma_utils.o rdma_backend.o rdma_rm.o - obj-$(CONFIG_PCI) += vmw/pvrdma_dev_ring.o vmw/pvrdma_cmd.o \ -- vmw/pvrdma_qp_ops.o vmw/pvrdma_main.o -+ vmw/pvrdma_qp_ops.o -+#obj-$(CONFIG_PCI) += vmw/pvrdma_main.o - endif diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c index 25976ed..a793584 100644 --- a/hw/usb/ccid-card-emulated.c @@ -663,19 +700,18 @@ index 25976ed..a793584 100644 static const TypeInfo emulated_card_info = { diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs -index 8b3f664..a6b6039 100644 +index 8b3f664..7e5c0ad 100644 --- a/hw/vfio/Makefile.objs +++ b/hw/vfio/Makefile.objs -@@ -2,9 +2,6 @@ ifeq ($(CONFIG_LINUX), y) +@@ -2,7 +2,7 @@ ifeq ($(CONFIG_LINUX), y) obj-$(CONFIG_SOFTMMU) += common.o obj-$(CONFIG_PCI) += pci.o pci-quirks.o display.o obj-$(CONFIG_VFIO_CCW) += ccw.o -obj-$(CONFIG_SOFTMMU) += platform.o --obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o --obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o ++#obj-$(CONFIG_SOFTMMU) += platform.o + obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o + obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o obj-$(CONFIG_SOFTMMU) += spapr.o - obj-$(CONFIG_VFIO_AP) += ap.o - endif diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index eae31c7..4a6e98e 100644 --- a/hw/vfio/pci-quirks.c @@ -699,6 +735,34 @@ index eae31c7..4a6e98e 100644 /* * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 5c7bd96..6e0000c 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3262,6 +3262,7 @@ static const TypeInfo vfio_pci_dev_info = { + }, + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static Property vfio_pci_dev_nohotplug_properties[] = { + DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), + DEFINE_PROP_END_OF_LIST(), +@@ -3281,11 +3282,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { + .instance_size = sizeof(VFIOPCIDevice), + .class_init = vfio_pci_nohotplug_dev_class_init, + }; ++#endif + + static void register_vfio_pci_dev_type(void) + { + type_register_static(&vfio_pci_dev_info); ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + type_register_static(&vfio_pci_nohotplug_dev_info); ++#endif + } + + type_init(register_vfio_pci_dev_type) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index a954799..9a987cb 100644 --- a/hw/virtio/virtio-pci.c @@ -740,7 +804,7 @@ index a954799..9a987cb 100644 #endif #ifdef CONFIG_VHOST_VSOCK diff --git a/qemu-options.hx b/qemu-options.hx -index f7df472..cd2b25b 100644 +index 269eda7..5e13aa9 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1741,11 +1741,6 @@ ETEXI @@ -755,6 +819,22 @@ index f7df472..cd2b25b 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" +@@ -2749,6 +2744,7 @@ STEXI + ETEXI + DEFHEADING() + ++#if 0 + DEFHEADING(Bluetooth(R) options:) + STEXI + @table @option +@@ -2827,6 +2823,7 @@ STEXI + @end table + ETEXI + DEFHEADING() ++#endif + + #ifdef CONFIG_TPM + DEFHEADING(TPM device options:) diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 5dd0aee..f27250e 100644 --- a/stubs/Makefile.objs @@ -959,10 +1039,10 @@ index 2ebf26a..3a0337a 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/vl.c b/vl.c -index fa25d1a..39d152a 100644 +index a5ae5f2..22bd99c 100644 --- a/vl.c +++ b/vl.c -@@ -171,7 +171,7 @@ unsigned int max_cpus; +@@ -172,7 +172,7 @@ unsigned int max_cpus; int smp_cores = 1; int smp_threads = 1; int acpi_enabled = 1; @@ -971,6 +1051,50 @@ index fa25d1a..39d152a 100644 int fd_bootchk = 1; static int no_reboot; int no_shutdown = 0; +@@ -937,6 +937,7 @@ static void configure_rtc(QemuOpts *opts) + } + } + ++#if 0 // Disabled for Red Hat Enterprise Linux + /***********************************************************/ + /* Bluetooth support */ + static int nb_hcis; +@@ -1058,6 +1059,7 @@ static int bt_parse(const char *opt) + error_report("bad bluetooth parameter '%s'", opt); + return 1; + } ++#endif + + static int parse_name(void *opaque, QemuOpts *opts, Error **errp) + { +@@ -3273,6 +3275,7 @@ int main(int argc, char **argv, char **envp) + } + break; + #endif ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + case QEMU_OPTION_bt: + warn_report("The bluetooth subsystem is deprecated and will " + "be removed soon. If the bluetooth subsystem is " +@@ -3280,6 +3283,7 @@ int main(int argc, char **argv, char **envp) + "qemu-devel@nongnu.org with your usecase."); + add_device_config(DEV_BT, optarg); + break; ++#endif + case QEMU_OPTION_audio_help: + AUD_help (); + exit (0); +@@ -4417,9 +4421,11 @@ int main(int argc, char **argv, char **envp) + + tpm_init(); + ++#if 0 // Disabled for Red Hat Enterprise Linux + /* init the bluetooth world */ + if (foreach_device_config(DEV_BT, bt_parse)) + exit(1); ++#endif + + if (!xen_enabled()) { + /* On 32-bit hosts, QEMU is limited by virtual address space */ -- 1.8.3.1 diff --git a/0007-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch similarity index 96% rename from 0007-Machine-type-related-general-changes.patch rename to 0006-Machine-type-related-general-changes.patch index 62a0b14..66f2920 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From c59789ec7d5213bda9aeb48aacef2e3e897fdf7e Mon Sep 17 00:00:00 2001 +From 01e49fb467fd831c62f3640e546e313298a7c5c0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:36:59 +0200 Subject: Machine type related general changes @@ -12,7 +12,6 @@ Signed-off-by: Miroslav Rezanina hw/acpi/ich9.c | 16 +++ hw/acpi/piix4.c | 6 +- hw/char/serial.c | 16 +++ - hw/display/cirrus_vga_isa.c | 2 +- hw/display/vga-isa.c | 2 +- hw/net/e1000.c | 18 ++- hw/net/e1000e.c | 21 ++++ @@ -30,7 +29,7 @@ Signed-off-by: Miroslav Rezanina migration/migration.h | 5 + qdev-monitor.c | 1 - scripts/vmstate-static-checker.py | 1 - - 21 files changed, 354 insertions(+), 11 deletions(-) + 20 files changed, 353 insertions(+), 10 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index a4e87b8..23a7baa 100644 @@ -145,19 +144,6 @@ index 02463e3..a591387 100644 return s->poll_msl >= 0; } -diff --git a/hw/display/cirrus_vga_isa.c b/hw/display/cirrus_vga_isa.c -index 1cb607d..22678a3 100644 ---- a/hw/display/cirrus_vga_isa.c -+++ b/hw/display/cirrus_vga_isa.c -@@ -67,7 +67,7 @@ static void isa_cirrus_vga_realizefn(DeviceState *dev, Error **errp) - - static Property isa_cirrus_vga_properties[] = { - DEFINE_PROP_UINT32("vgamem_mb", struct ISACirrusVGAState, -- cirrus_vga.vga.vram_size_mb, 4), -+ cirrus_vga.vga.vram_size_mb, 16), - DEFINE_PROP_BOOL("blitter", struct ISACirrusVGAState, - cirrus_vga.enable_blitter, true), - DEFINE_PROP_END_OF_LIST(), diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index fa44242..7835c83 100644 --- a/hw/display/vga-isa.c @@ -707,7 +693,7 @@ index a5080ad..b943ec9 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index b261c1e..fb425b5 100644 +index 49ffb99..0d9cb7a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -105,6 +105,8 @@ enum mig_rp_message_type { diff --git a/0008-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch similarity index 94% rename from 0008-Add-aarch64-machine-types.patch rename to 0007-Add-aarch64-machine-types.patch index ede1ec5..1c0ca0c 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 6df04926524e1a9f1178b53bf2b7b8978a6d5935 Mon Sep 17 00:00:00 2001 +From 68e46b1a3bc650bc35ccc49606c3f31e29165512 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -12,7 +12,7 @@ Signed-off-by: Miroslav Rezanina 2 files changed, 147 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index a2b8d8f..703f0dd 100644 +index f69e7eb..84a86c1 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -60,6 +60,7 @@ @@ -61,7 +61,7 @@ index a2b8d8f..703f0dd 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1577,6 +1607,7 @@ static void machvirt_init(MachineState *machine) +@@ -1578,6 +1608,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -69,7 +69,7 @@ index a2b8d8f..703f0dd 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1605,6 +1636,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -1606,6 +1637,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -77,7 +77,7 @@ index a2b8d8f..703f0dd 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1659,6 +1691,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) +@@ -1660,6 +1692,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) } } @@ -85,7 +85,7 @@ index a2b8d8f..703f0dd 100644 static char *virt_get_iommu(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1686,6 +1719,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) +@@ -1687,6 +1720,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) error_append_hint(errp, "Valid values are none, smmuv3.\n"); } } @@ -93,7 +93,7 @@ index a2b8d8f..703f0dd 100644 static CpuInstanceProperties virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) -@@ -1725,6 +1759,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +@@ -1726,6 +1760,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) return ms->possible_cpus; } @@ -101,7 +101,7 @@ index a2b8d8f..703f0dd 100644 static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { -@@ -1889,6 +1924,9 @@ DEFINE_VIRT_MACHINE(3, 0) +@@ -1890,6 +1925,9 @@ DEFINE_VIRT_MACHINE(3, 0) #define VIRT_COMPAT_2_12 \ HW_COMPAT_2_12 @@ -111,7 +111,7 @@ index a2b8d8f..703f0dd 100644 static void virt_2_12_instance_init(Object *obj) { virt_3_0_instance_init(obj); -@@ -2016,3 +2054,89 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2017,3 +2055,89 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) diff --git a/0009-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch similarity index 99% rename from 0009-Add-ppc64-machine-types.patch rename to 0008-Add-ppc64-machine-types.patch index 2fd63fd..d38031f 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 0f1a361c4bd8fc0874cc5d05e611fadb67524a1e Mon Sep 17 00:00:00 2001 +From 4f9094b11eb831317879d9c6108f6f706546fea5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types diff --git a/0010-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch similarity index 98% rename from 0010-Add-s390x-machine-types.patch rename to 0009-Add-s390x-machine-types.patch index 7159bb4..1fef71c 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From a47c6d2b9d75dcb15810fcfedcddf5eadf0ec227 Mon Sep 17 00:00:00 2001 +From 07b2731f23166ca13bace5faacd35ab5129fb878 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types diff --git a/0011-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch similarity index 94% rename from 0011-Add-x86_64-machine-types.patch rename to 0010-Add-x86_64-machine-types.patch index 84feac0..53d27bf 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From edae60c4f30697c3c859cc9c88f80c0ed3dc0f0e Mon Sep 17 00:00:00 2001 +From a4f172dc05fc5b2fc28cd1a2121b70a3d4549ab2 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -6,15 +6,17 @@ Subject: Add x86_64 machine types Adding changes to add RHEL machine types for x86_64 architecture. Signed-off-by: Miroslav Rezanina + +Rebase notes (3.1.0): +- Removed xsave changes --- - hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 7 +- - hw/i386/pc_piix.c | 194 ++++++++++++++++- - hw/i386/pc_q35.c | 93 ++++++++- - include/hw/i386/pc.h | 564 ++++++++++++++++++++++++++++++++++++++++++++++++++ - target/i386/cpu.c | 9 +- - target/i386/machine.c | 21 ++ - 7 files changed, 884 insertions(+), 7 deletions(-) + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 7 +- + hw/i386/pc_piix.c | 191 ++++++++++++++++- + hw/i386/pc_q35.c | 93 ++++++++- + include/hw/i386/pc.h | 564 +++++++++++++++++++++++++++++++++++++++++++++++++++ + target/i386/cpu.c | 9 +- + 6 files changed, 860 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 236a20e..3360da9 100644 @@ -63,7 +65,7 @@ index 567439e..a609332 100644 hc->pre_plug = pc_machine_device_pre_plug_cb; hc->plug = pc_machine_device_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 7092d6d..83c22ae 100644 +index 7092d6d..af9eb8c 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -50,6 +50,7 @@ @@ -93,20 +95,10 @@ index 7092d6d..83c22ae 100644 static void pc_compat_2_3(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); -@@ -433,6 +435,7 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m) - pc_i440fx_machine_options(m); - m->alias = "pc"; - m->is_default = 1; -+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); - } - - DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL, -@@ -1157,3 +1160,190 @@ static void xenfv_machine_options(MachineClass *m) +@@ -1157,3 +1159,188 @@ static void xenfv_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, xenfv_machine_options); #endif -+machine_init(pc_machine_init); -+ +#endif /* Disabled for Red Hat Enterprise Linux */ + +/* Red Hat Enterprise Linux machine types */ @@ -1025,45 +1017,6 @@ index e9b9183..573de14 100644 { NULL, NULL }, }; -diff --git a/target/i386/machine.c b/target/i386/machine.c -index 225b5d4..c60e1b8 100644 ---- a/target/i386/machine.c -+++ b/target/i386/machine.c -@@ -964,6 +964,26 @@ static const VMStateDescription vmstate_svm_npt = { - } - }; - -+static bool vmstate_xsave_needed(void *opaque) -+{ -+ /* The xsave state is already on the main "cpu" section */ -+ return false; -+} -+ -+static const VMStateDescription vmstate_xsave ={ -+ .name = "cpu/xsave", -+ .version_id = 1, -+ .minimum_version_id = 1, -+ .minimum_version_id_old = 1, -+ .needed = vmstate_xsave_needed, -+ .fields = (VMStateField []) { -+ VMSTATE_UINT64_V(env.xcr0, X86CPU, 1), -+ VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 1), -+ VMSTATE_YMMH_REGS_VARS(env.xmm_regs, X86CPU, CPU_NB_REGS, 1), -+ VMSTATE_END_OF_LIST() -+ } -+}; -+ - VMStateDescription vmstate_x86_cpu = { - .name = "cpu", - .version_id = 12, -@@ -1089,6 +1109,7 @@ VMStateDescription vmstate_x86_cpu = { - &vmstate_msr_intel_pt, - &vmstate_msr_virt_ssbd, - &vmstate_svm_npt, -+ &vmstate_xsave, - NULL - } - }; -- 1.8.3.1 diff --git a/0012-Enable-make-check.patch b/0011-Enable-make-check.patch similarity index 79% rename from 0012-Enable-make-check.patch rename to 0011-Enable-make-check.patch index 23edace..79897f5 100644 --- a/0012-Enable-make-check.patch +++ b/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 51a0ce09fb01c87cb9bd7f1fca850e8d5d573f5f Mon Sep 17 00:00:00 2001 +From 5a614332800ab5367d0be248e665250fe92964ba Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -9,7 +9,7 @@ make check run during build. Signed-off-by: Miroslav Rezanina --- redhat/qemu-kvm.spec.template | 2 +- - tests/Makefile.include | 32 ++++++++++++++++---------------- + tests/Makefile.include | 20 ++++++++++---------- tests/boot-serial-test.c | 6 +++++- tests/cpu-plug-test.c | 3 ++- tests/e1000-test.c | 2 ++ @@ -18,10 +18,10 @@ Signed-off-by: Miroslav Rezanina tests/qemu-iotests/group | 4 ++-- tests/test-x86-cpuid-compat.c | 2 ++ tests/usb-hcd-xhci-test.c | 4 ++++ - 10 files changed, 42 insertions(+), 27 deletions(-) + 10 files changed, 36 insertions(+), 21 deletions(-) diff --git a/tests/Makefile.include b/tests/Makefile.include -index 613242b..baeb608 100644 +index fb0b449..f2305a5 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -171,20 +171,20 @@ check-qtest-pci-$(CONFIG_IPACK) += $(check-qtest-ipack-y) @@ -80,44 +80,6 @@ index 613242b..baeb608 100644 check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) check-qtest-ppc64-y += tests/display-vga-test$(EXESUF) check-qtest-ppc64-y += tests/numa-test$(EXESUF) -@@ -685,15 +685,15 @@ tests/endianness-test$(EXESUF): tests/endianness-test.o - tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y) - tests/prom-env-test$(EXESUF): tests/prom-env-test.o $(libqos-obj-y) - tests/rtas-test$(EXESUF): tests/rtas-test.o $(libqos-spapr-obj-y) --tests/fdc-test$(EXESUF): tests/fdc-test.o -+#tests/fdc-test$(EXESUF): tests/fdc-test.o - tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y) - tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) --tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o --tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o -+#tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o -+#tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o - tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o - tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) - tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y) --tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ -+#tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ - tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y) - tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) - tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) -@@ -707,7 +707,7 @@ tests/e1000-test$(EXESUF): tests/e1000-test.o - tests/e1000e-test$(EXESUF): tests/e1000e-test.o $(libqos-pc-obj-y) - tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y) - tests/pcnet-test$(EXESUF): tests/pcnet-test.o --tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o -+#tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o - tests/eepro100-test$(EXESUF): tests/eepro100-test.o - tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o - tests/ne2000-test$(EXESUF): tests/ne2000-test.o -@@ -755,7 +755,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) - tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y) - tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y) - tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y) $(libqos-spapr-obj-y) --tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) -+#tests/megasas-test$(EXESUF): tests/megasas-test.o $(libqos-spapr-obj-y) $(libqos-pc-obj-y) - tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o $(test-util-obj-y) libvhost-user.a - tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y) - tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c index 8ec6aed..6a533b9 100644 --- a/tests/boot-serial-test.c @@ -222,7 +184,7 @@ index 32741d7..85ef52e 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 2722103..ede8887 100644 +index 61a6d98..05996ae 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -77,7 +77,7 @@ diff --git a/0013-Use-kvm-by-default.patch b/0012-Use-kvm-by-default.patch similarity index 94% rename from 0013-Use-kvm-by-default.patch rename to 0012-Use-kvm-by-default.patch index 2c59bf6..b0bc077 100644 --- a/0013-Use-kvm-by-default.patch +++ b/0012-Use-kvm-by-default.patch @@ -1,4 +1,4 @@ -From b91ee13e30cef65d02e3e0f9324931f1e2589426 Mon Sep 17 00:00:00 2001 +From 445df6a548c20d21c3275d91bcd96c6b0fde9c97 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 18 Dec 2014 06:27:49 +0100 Subject: Use kvm by default diff --git a/0014-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 96% rename from 0014-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index c445c7f..fbd961f 100644 --- a/0014-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 3094b4ac400f54f26b837226f44fc0a18f0726e6 Mon Sep 17 00:00:00 2001 +From 0b8b44ec1fc3a0d17f13c07ac4c7351769a63300 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -37,7 +37,7 @@ Merged patches (2.9.0): 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 5c7bd96..598f771 100644 +index 6e0000c..d242c62 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -36,6 +36,7 @@ diff --git a/0015-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch similarity index 94% rename from 0015-Add-support-statement-to-help-output.patch rename to 0014-Add-support-statement-to-help-output.patch index 6e991bb..1d2f7f3 100644 --- a/0015-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 2bfcbb3ece3cda4cf977cb3983df84830bde90a3 Mon Sep 17 00:00:00 2001 +From f06750384e9e241c7cc6f14b6fdedea3c4237790 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -23,7 +23,7 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/vl.c b/vl.c -index 39d152a..db628b8 100644 +index 22bd99c..74b32c4 100644 --- a/vl.c +++ b/vl.c @@ -1904,9 +1904,17 @@ static void version(void) diff --git a/0016-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch similarity index 94% rename from 0016-globally-limit-the-maximum-number-of-CPUs.patch rename to 0015-globally-limit-the-maximum-number-of-CPUs.patch index 5c51d59..2c77287 100644 --- a/0016-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From c214bfc318a3128dc92fe5017ca0dd54fc50ffed Mon Sep 17 00:00:00 2001 +From b64599f8f72d89ac75557608f1890abb8effb88b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -106,10 +106,10 @@ index 4880a05..a8f5d47 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/vl.c b/vl.c -index db628b8..7b0f19a 100644 +index 74b32c4..0a9bdc2 100644 --- a/vl.c +++ b/vl.c -@@ -133,6 +133,8 @@ int main(int argc, char **argv) +@@ -134,6 +134,8 @@ int main(int argc, char **argv) #define MAX_VIRTIO_CONSOLES 1 @@ -118,7 +118,7 @@ index db628b8..7b0f19a 100644 static const char *data_dir[16]; static int data_dir_idx; const char *bios_name = NULL; -@@ -1460,6 +1462,20 @@ MachineClass *find_default_machine(void) +@@ -1463,6 +1465,20 @@ MachineClass *find_default_machine(void) return mc; } @@ -139,7 +139,7 @@ index db628b8..7b0f19a 100644 MachineInfoList *qmp_query_machines(Error **errp) { GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); -@@ -4012,6 +4028,8 @@ int main(int argc, char **argv, char **envp) +@@ -4021,6 +4037,8 @@ int main(int argc, char **argv, char **envp) "mutually exclusive"); exit(EXIT_FAILURE); } diff --git a/0017-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch similarity index 98% rename from 0017-Add-support-for-simpletrace.patch rename to 0016-Add-support-for-simpletrace.patch index 3e295c3..5db59b4 100644 --- a/0017-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 30887ffc7e908ebed5381c08181cd6a2a6bc5e98 Mon Sep 17 00:00:00 2001 +From 490f864563ddcb367e6cf58d1be4a9cbed3a5e2b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace diff --git a/0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch similarity index 85% rename from 0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename to 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 109b97c..824bf82 100644 --- a/0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,6 +1,6 @@ -From d0656d8b2e0de42d04c224db36fe9c1ec015a9cc Mon Sep 17 00:00:00 2001 +From 081800f80d82ce8d71a1075b3d888f2e298ad183 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Fri, 14 Nov 2014 08:51:50 +0100 +Date: Fri, 30 Nov 2018 09:11:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- Patchwork-id: 62380 @@ -17,89 +17,12 @@ to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina --- - docs/COLO-FT.txt | 4 +- - docs/can.txt | 8 +-- - docs/pr-manager.rst | 4 +- docs/qemu-block-drivers.texi | 70 +++++++++++----------- docs/qemu-cpu-models.texi | 8 +-- - docs/replay.txt | 4 +- - docs/specs/tpm.txt | 8 +-- qemu-doc.texi | 70 +++++++++++----------- qemu-options.hx | 140 ++++++++++++++++++++++--------------------- - 9 files changed, 160 insertions(+), 156 deletions(-) + 4 files changed, 146 insertions(+), 142 deletions(-) -diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt -index e2686bb..8c48f86 100644 ---- a/docs/COLO-FT.txt -+++ b/docs/COLO-FT.txt -@@ -147,7 +147,7 @@ in test procedure. - == Test procedure == - 1. Startup qemu - Primary: --# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name primary \ -+# qemu-kvm -accel kvm -m 2048 -smp 2 -qmp stdio -name primary \ - -device piix3-usb-uhci -vnc :7 \ - -device usb-tablet -netdev tap,id=hn0,vhost=off \ - -device virtio-net-pci,id=net-pci0,netdev=hn0 \ -@@ -155,7 +155,7 @@ Primary: - children.0.file.filename=1.raw,\ - children.0.driver=raw -S - Secondary: --# qemu-system-x86_64 -accel kvm -m 2048 -smp 2 -qmp stdio -name secondary \ -+# qemu-kvm -accel kvm -m 2048 -smp 2 -qmp stdio -name secondary \ - -device piix3-usb-uhci -vnc :7 \ - -device usb-tablet -netdev tap,id=hn0,vhost=off \ - -device virtio-net-pci,id=net-pci0,netdev=hn0 \ -diff --git a/docs/can.txt b/docs/can.txt -index 7ba23b2..4ae5690 100644 ---- a/docs/can.txt -+++ b/docs/can.txt -@@ -50,9 +50,9 @@ CAN boards can be selected - The ''kvaser_pci'' board/device model is compatible with and has been tested with - ''kvaser_pci'' driver included in mainline Linux kernel. - The tested setup was Linux 4.9 kernel on the host and guest side. --Example for qemu-system-x86_64: -+Example for qemu-kvm (intel architecture): - -- qemu-system-x86_64 -accel kvm -kernel /boot/vmlinuz-4.9.0-4-amd64 \ -+ qemu-kvm -accel kvm -kernel /boot/vmlinuz-4.9.0-4-amd64 \ - -initrd ramdisk.cpio \ - -virtfs local,path=shareddir,security_model=none,mount_tag=shareddir \ - -object can-bus,id=canbus0 \ -@@ -60,9 +60,9 @@ Example for qemu-system-x86_64: - -device kvaser_pci,canbus=canbus0 \ - -nographic -append "console=ttyS0" - --Example for qemu-system-arm: -+Example for qemu-kvm (arm architecture): - -- qemu-system-arm -cpu arm1176 -m 256 -M versatilepb \ -+ qemu-kvm -cpu arm1176 -m 256 -M versatilepb \ - -kernel kernel-qemu-arm1176-versatilepb \ - -hda rpi-wheezy-overlay \ - -append "console=ttyAMA0 root=/dev/sda2 ro init=/sbin/init-overlay" \ -diff --git a/docs/pr-manager.rst b/docs/pr-manager.rst -index 9b1de19..45cb8be 100644 ---- a/docs/pr-manager.rst -+++ b/docs/pr-manager.rst -@@ -36,7 +36,7 @@ accepts the path to the helper program's Unix socket. For example, - the following command line defines a ``pr-manager-helper`` object and - attaches it to a SCSI passthrough device:: - -- $ qemu-system-x86_64 -+ $ qemu-kvm - -device virtio-scsi \ - -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock - -drive if=none,id=hd,driver=raw,file.filename=/dev/sdb,file.pr-manager=helper0 -@@ -44,7 +44,7 @@ attaches it to a SCSI passthrough device:: - - Alternatively, using ``-blockdev``:: - -- $ qemu-system-x86_64 -+ $ qemu-kvm - -device virtio-scsi \ - -object pr-manager-helper,id=helper0,path=/var/run/qemu-pr-helper.sock - -blockdev node-name=hd,driver=raw,file.driver=host_device,file.filename=/dev/sdb,file.pr-manager=helper0 diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi index 38e9f34..2e71ec9 100644 --- a/docs/qemu-block-drivers.texi @@ -345,68 +268,6 @@ index 1935f98..93672de 100644 @end example @end table -diff --git a/docs/replay.txt b/docs/replay.txt -index 3497585..8d67ea7 100644 ---- a/docs/replay.txt -+++ b/docs/replay.txt -@@ -25,7 +25,7 @@ Deterministic replay has the following features: - - Usage of the record/replay: - * First, record the execution with the following command line: -- qemu-system-i386 \ -+ qemu-kvm \ - -icount shift=7,rr=record,rrfile=replay.bin \ - -drive file=disk.qcow2,if=none,id=img-direct \ - -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \ -@@ -33,7 +33,7 @@ Usage of the record/replay: - -netdev user,id=net1 -device rtl8139,netdev=net1 \ - -object filter-replay,id=replay,netdev=net1 - * After recording, you can replay it by using another command line: -- qemu-system-i386 \ -+ qemu-kvm \ - -icount shift=7,rr=replay,rrfile=replay.bin \ - -drive file=disk.qcow2,if=none,id=img-direct \ - -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \ -diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt -index 1af82bb..b2741ac 100644 ---- a/docs/specs/tpm.txt -+++ b/docs/specs/tpm.txt -@@ -113,7 +113,7 @@ QEMU files related to the TPM passthrough device: - Command line to start QEMU with the TPM passthrough device using the host's - hardware TPM /dev/tpm0: - --qemu-system-x86_64 -display sdl -accel kvm \ -+qemu-kvm -display vnc -accel kvm \ - -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ - -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \ - -device tpm-tis,tpmdev=tpm0 test.img -@@ -179,7 +179,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ - Command line to start QEMU with the TPM emulator device communicating with - the swtpm: - --qemu-system-x86_64 -display sdl -accel kvm \ -+qemu-kvm -display vnc -accel kvm \ - -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ - -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ - -tpmdev emulator,id=tpm0,chardev=chrtpm \ -@@ -237,7 +237,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ - - In a 2nd terminal start the VM: - --qemu-system-x86_64 -display sdl -accel kvm \ -+qemu-kvm -display sdl -accel kvm \ - -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ - -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ - -tpmdev emulator,id=tpm0,chardev=chrtpm \ -@@ -270,7 +270,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ - In the 2nd terminal restore the state of the VM using the additional - '-incoming' option. - --qemu-system-x86_64 -display sdl -accel kvm \ -+qemu-kvm -display sdl -accel kvm \ - -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ - -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ - -tpmdev emulator,id=tpm0,chardev=chrtpm \ diff --git a/qemu-doc.texi b/qemu-doc.texi index f7ad1df..0e4b34a 100644 --- a/qemu-doc.texi @@ -700,7 +561,7 @@ index f7ad1df..0e4b34a 100644 -prom-env 'boot-args=conf=hd:2,\yaboot.conf' @end example diff --git a/qemu-options.hx b/qemu-options.hx -index cd2b25b..a65c63b 100644 +index 5e13aa9..2a60e51 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -253,7 +253,7 @@ This option defines a free-form string that can be used to describe @var{fd}. @@ -1037,7 +898,7 @@ index cd2b25b..a65c63b 100644 @end example @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] -@@ -2804,7 +2808,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can +@@ -2805,7 +2809,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can be used as following: @example @@ -1046,7 +907,7 @@ index cd2b25b..a65c63b 100644 @end example @item -bt device:@var{dev}[,vlan=@var{n}] -@@ -3253,14 +3257,14 @@ ETEXI +@@ -3255,14 +3259,14 @@ ETEXI DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, "-realtime [mlock=on|off]\n" @@ -1064,7 +925,7 @@ index cd2b25b..a65c63b 100644 (enabled by default). ETEXI -@@ -3298,7 +3302,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even +@@ -3300,7 +3304,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even stdio are reasonable use case. The latter is allowing to start QEMU from within gdb and establish the connection via a pipe: @example @@ -1073,7 +934,7 @@ index cd2b25b..a65c63b 100644 @end example ETEXI -@@ -4247,7 +4251,7 @@ which specify the queue number of cryptodev backend, the default of +@@ -4251,7 +4255,7 @@ which specify the queue number of cryptodev backend, the default of @example @@ -1082,7 +943,7 @@ index cd2b25b..a65c63b 100644 [...] \ -object cryptodev-backend-builtin,id=cryptodev0 \ -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \ -@@ -4267,7 +4271,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 +@@ -4271,7 +4275,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 @example diff --git a/0019-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch similarity index 98% rename from 0019-usb-xhci-Fix-PCI-capability-order.patch rename to 0018-usb-xhci-Fix-PCI-capability-order.patch index 881fdd0..6f18fde 100644 --- a/0019-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From 0c8a71a5751106013f9ecfdc20f308cc1e44045b Mon Sep 17 00:00:00 2001 +From a3d01620dcbc301493f6704265edddf78f6bfe1b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order diff --git a/0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch similarity index 97% rename from 0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename to 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index af07bfd..f3600dd 100644 --- a/0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 7d316f7aebb9634c3ac5b79f5b1198e27726a0fd Mon Sep 17 00:00:00 2001 +From 19f1b23aa923eab88c3fdda7b7fdc157c313e4eb Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] diff --git a/qemu-kvm.spec b/qemu-kvm.spec index ef0e02b..19362a2 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -7,11 +7,8 @@ %global have_fdt 0 %global have_gluster 1 %global have_kvm_setup 0 -%global have_seccomp 1 %global have_memlock_limits 0 %global have_vxhs 0 -%global have_vhost_user 1 -%global have_tcmalloc 0 %ifnarch %{ix86} x86_64 %global have_usbredir 0 @@ -64,15 +61,14 @@ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} # Macro to properly setup RHEL/RHEV conflict handling -%define rhel_rhev_conflicts() \ -Conflicts: %1-ma \ -Conflicts: %1-rhev \ -Provides: %1-rhel = %{epoch}:%{version}-%{release} +%define rhev_ma_conflicts() \ +Obsoletes: %1-ma \ +Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 0%{?dist}.next.candidate +Release: 1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -81,11 +77,6 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -# OOM killer breaks builds with parallel make on s390x -%ifarch s390x - %define _smp_mflags %{nil} -%endif - Source0: http://wiki.qemu.org/download/qemu-3.1.0.tar.xz # KSM control scripts @@ -113,24 +104,22 @@ Source34: 81-kvm-rhel.rules Source35: udev-kvm-check.c - -Patch0001: 0001-migration-colo.c-Fix-compilation-issue-when-disable-.patch -Patch0005: 0005-Initial-redhat-build.patch -Patch0006: 0006-Enable-disable-devices-for-RHEL.patch -Patch0007: 0007-Machine-type-related-general-changes.patch -Patch0008: 0008-Add-aarch64-machine-types.patch -Patch0009: 0009-Add-ppc64-machine-types.patch -Patch0010: 0010-Add-s390x-machine-types.patch -Patch0011: 0011-Add-x86_64-machine-types.patch -Patch0012: 0012-Enable-make-check.patch -Patch0013: 0013-Use-kvm-by-default.patch -Patch0014: 0014-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0015: 0015-Add-support-statement-to-help-output.patch -Patch0016: 0016-globally-limit-the-maximum-number-of-CPUs.patch -Patch0017: 0017-Add-support-for-simpletrace.patch -Patch0018: 0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0019: 0019-usb-xhci-Fix-PCI-capability-order.patch -Patch0020: 0020-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0004: 0004-Initial-redhat-build.patch +Patch0005: 0005-Enable-disable-devices-for-RHEL.patch +Patch0006: 0006-Machine-type-related-general-changes.patch +Patch0007: 0007-Add-aarch64-machine-types.patch +Patch0008: 0008-Add-ppc64-machine-types.patch +Patch0009: 0009-Add-s390x-machine-types.patch +Patch0010: 0010-Add-x86_64-machine-types.patch +Patch0011: 0011-Enable-make-check.patch +Patch0012: 0012-Use-kvm-by-default.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch +Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch +Patch0016: 0016-Add-support-for-simpletrace.patch +Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch +Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -157,9 +146,7 @@ BuildRequires: libcacard-devel # For smartcard NSS support BuildRequires: nss-devel %endif -%if %{have_seccomp} BuildRequires: libseccomp-devel >= 2.3.0 -%endif # For network block driver BuildRequires: libcurl-devel BuildRequires: libssh2-devel @@ -236,8 +223,7 @@ Requires: mesa-dri-drivers %endif Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} -Conflicts: qemu-kvm-ma -Conflicts: qemu-kvm-rhev +%rhev_ma_conflicts qemu-kvm %{requires_all_modules} @@ -265,9 +251,7 @@ Requires: ipxe-roms-qemu >= 20170123-1 Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} %endif Requires: %{name}-common = %{epoch}:%{version}-%{release} -%if %{have_seccomp} Requires: libseccomp >= 2.3.0 -%endif # For compressed guest memory dumps Requires: lzo snappy %if %{have_gluster} @@ -284,7 +268,7 @@ Requires: libusbx >= 1.0.19 Requires: usbredir >= 0.7.1 %endif -%rhel_rhev_conflicts qemu-kvm +%rhev_ma_conflicts qemu-kvm %description -n qemu-kvm-core qemu-kvm is an open source virtualizer that provides hardware @@ -297,7 +281,7 @@ hardware for a full system such as a PC and its associated peripherals. Summary: QEMU command line tool for manipulating disk images Group: Development/Tools -%rhel_rhev_conflicts qemu-img +%rhev_ma_conflicts qemu-img %description -n qemu-img This package provides a command line tool for manipulating disk images. @@ -312,7 +296,7 @@ Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units -%rhel_rhev_conflicts qemu-kvm-common +%rhev_ma_conflicts qemu-kvm-common %description -n qemu-kvm-common qemu-kvm is an open source virtualizer that provides hardware emulation for @@ -327,11 +311,6 @@ Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units -# OOM killer breaks builds with parallel make on s390x -%ifarch s390x - %define _smp_mflags %{nil} -%endif - %description -n qemu-guest-agent qemu-kvm is an open source virtualizer that provides hardware emulation for the KVM hypervisor. @@ -397,25 +376,8 @@ the Secure Shell (SSH) protocol. %prep -%setup -q -n qemu-%{version} - -%patch0001 -p1 -%patch0005 -p1 -%patch0006 -p1 -%patch0007 -p1 -%patch0008 -p1 -%patch0009 -p1 -%patch0010 -p1 -%patch0011 -p1 -%patch0012 -p1 -%patch0013 -p1 -%patch0014 -p1 -%patch0015 -p1 -%patch0016 -p1 -%patch0017 -p1 -%patch0018 -p1 -%patch0019 -p1 -%patch0020 -p1 +%setup -n qemu-%{version} +%autopatch -p1 %build %global buildarch %{kvm_target}-softmmu @@ -467,11 +429,8 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %else --disable-rdma \ %endif -%if 0%{have_seccomp} + --disable-pvrdma \ --enable-seccomp \ -%else - --disable-seccomp \ -%endif %if 0%{have_spice} --enable-spice \ --enable-smartcard \ @@ -489,20 +448,17 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %else --disable-usb-redir \ %endif -%if 0%{have_tcmalloc} - --enable-tcmalloc \ -%else --disable-tcmalloc \ -%endif %if 0%{have_vxhs} --enable-vxhs \ %else --disable-vxhs \ %endif -%if 0%{have_vhost_user} --enable-vhost-user \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ %else - --disable-vhost-user \ + --disable-avx2 \ %endif --python=%{__python3} \ --target-list="%{buildarch}" \ @@ -582,8 +538,16 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --enable-capstone \ --disable-git-update \ --disable-crypto-afalg \ - --disable-debug-mutex - + --disable-debug-mutex \ + --disable-bochs \ + --disable-cloop \ + --disable-dmg \ + --disable-qcow1 \ + --disable-vdi \ + --disable-vvfat \ + --disable-qed \ + --disable-parallels \ + --disable-sheepdog echo "config-host.mak contents:" echo "===" @@ -812,12 +776,6 @@ install -D -m 0644 %{SOURCE12} $RPM_BUILD_ROOT%{_sysconfdir}/%{name}/bridge.conf install -m 0644 %{_sourcedir}/qemu-pr-helper.service %{buildroot}%{_unitdir} install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} -%if 0 -make %{?_smp_mflags} $buildldflags DESTDIR=$RPM_BUILD_ROOT install-libcacard - -find $RPM_BUILD_ROOT -name "libcacard.so*" -exec chmod +x \{\} \; -%endif - find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f # We need to make the block device modules executable else @@ -1004,8 +962,123 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Mon Nov 05 2018 Danilo Cesar Lemes de Paula - 3.1.0-0.el8 -- Rebase qemu-kvm to qemu 3.1.0 RC2 +* Thu Dec 13 2018 Danilo Cesar Lemes de Paula - 3.1.0-1.el8 +- Rebase to qemu-kvm 3.1.0 + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-47 +- kvm-Disable-CONFIG_IPMI-and-CONFIG_I2C-for-ppc64.patch [bz#1640044] +- kvm-Disable-CONFIG_CAN_BUS-and-CONFIG_CAN_SJA1000.patch [bz#1640042] +- Resolves: bz#1640042 + (Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 config switches) +- Resolves: bz#1640044 + (Disable CONFIG_I2C and CONFIG_IPMI in default-configs/ppc64-softmmu.mak) + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 +- kvm-qcow2-Give-the-refcount-cache-the-minimum-possible-s.patch [bz#1656507] +- kvm-docs-Document-the-new-default-sizes-of-the-qcow2-cac.patch [bz#1656507] +- kvm-qcow2-Fix-Coverity-warning-when-calculating-the-refc.patch [bz#1656507] +- kvm-include-Add-IEC-binary-prefixes-in-qemu-units.h.patch [bz#1656507] +- kvm-qcow2-Options-documentation-fixes.patch [bz#1656507] +- kvm-include-Add-a-lookup-table-of-sizes.patch [bz#1656507] +- kvm-qcow2-Make-sizes-more-humanly-readable.patch [bz#1656507] +- kvm-qcow2-Avoid-duplication-in-setting-the-refcount-cach.patch [bz#1656507] +- kvm-qcow2-Assign-the-L2-cache-relatively-to-the-image-si.patch [bz#1656507] +- kvm-qcow2-Increase-the-default-upper-limit-on-the-L2-cac.patch [bz#1656507] +- kvm-qcow2-Resize-the-cache-upon-image-resizing.patch [bz#1656507] +- kvm-qcow2-Set-the-default-cache-clean-interval-to-10-min.patch [bz#1656507] +- kvm-qcow2-Explicit-number-replaced-by-a-constant.patch [bz#1656507] +- kvm-block-backend-Set-werror-rerror-defaults-in-blk_new.patch [bz#1657637] +- kvm-qcow2-Fix-cache-clean-interval-documentation.patch [bz#1656507] +- Resolves: bz#1656507 + ([RHEL.8] qcow2 cache is too small) +- Resolves: bz#1657637 + (Wrong werror default for -device drive=) + +* Thu Dec 06 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-45 +- kvm-target-ppc-add-basic-support-for-PTCR-on-POWER9.patch [bz#1639069] +- kvm-linux-headers-Update-for-nested-KVM-HV-downstream-on.patch [bz#1639069] +- kvm-target-ppc-Add-one-reg-id-for-ptcr.patch [bz#1639069] +- kvm-ppc-spapr_caps-Add-SPAPR_CAP_NESTED_KVM_HV.patch [bz#1639069] +- kvm-Re-enable-CONFIG_HYPERV_TESTDEV.patch [bz#1651195] +- kvm-qxl-use-guest_monitor_config-for-local-renderer.patch [bz#1610163] +- kvm-Declare-cirrus-vga-as-deprecated.patch [bz#1651994] +- kvm-Do-not-build-bluetooth-support.patch [bz#1654651] +- kvm-vfio-helpers-Fix-qemu_vfio_open_pci-crash.patch [bz#1645840] +- kvm-balloon-Allow-multiple-inhibit-users.patch [bz#1650272] +- kvm-Use-inhibit-to-prevent-ballooning-without-synchr.patch [bz#1650272] +- kvm-vfio-Inhibit-ballooning-based-on-group-attachment-to.patch [bz#1650272] +- kvm-vfio-ccw-pci-Allow-devices-to-opt-in-for-ballooning.patch [bz#1650272] +- kvm-vfio-pci-Handle-subsystem-realpath-returning-NULL.patch [bz#1650272] +- kvm-vfio-pci-Fix-failure-to-close-file-descriptor-on-err.patch [bz#1650272] +- kvm-postcopy-Synchronize-usage-of-the-balloon-inhibitor.patch [bz#1650272] +- Resolves: bz#1610163 + (guest shows border blurred screen with some resolutions when qemu boot with -device qxl-vga ,and guest on rhel7.6 has no such question) +- Resolves: bz#1639069 + ([IBM 8.0 FEAT] POWER9 - Nested virtualization in RHEL8.0 KVM for ppc64le - qemu-kvm side) +- Resolves: bz#1645840 + (Qemu core dump when hotplug nvme:// drive via -blockdev) +- Resolves: bz#1650272 + (Ballooning is incompatible with vfio assigned devices, but not prevented) +- Resolves: bz#1651195 + (Re-enable hyperv-testdev device) +- Resolves: bz#1651994 + (Declare the "Cirrus VGA" device emulation of QEMU as deprecated in RHEL8) +- Resolves: bz#1654651 + (Qemu: hw: bt: keep bt/* objects from building [rhel-8.0]) + +* Tue Nov 27 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 +- kvm-block-Make-more-block-drivers-compile-time-configura.patch [bz#1598842 bz#1598842] +- kvm-RHEL8-Add-disable-configure-options-to-qemu-spec-fil.patch [bz#1598842] +- Resolves: bz#1598842 + (Compile out unused block drivers) + +* Mon Nov 26 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 + +- kvm-configure-add-test-for-libudev.patch [bz#1636185] +- kvm-qga-linux-report-disk-serial-number.patch [bz#1636185] +- kvm-qga-linux-return-disk-device-in-guest-get-fsinfo.patch [bz#1636185] +- kvm-qemu-error-introduce-error-warn-_report_once.patch [bz#1625173] +- kvm-intel-iommu-start-to-use-error_report_once.patch [bz#1625173] +- kvm-intel-iommu-replace-more-vtd_err_-traces.patch [bz#1625173] +- kvm-intel_iommu-introduce-vtd_reset_caches.patch [bz#1625173] +- kvm-intel_iommu-better-handling-of-dmar-state-switch.patch [bz#1625173] +- kvm-intel_iommu-move-ce-fetching-out-when-sync-shadow.patch [bz#1625173 bz#1629616] +- kvm-intel_iommu-handle-invalid-ce-for-shadow-sync.patch [bz#1625173 bz#1629616] +- kvm-block-remove-bdrv_dirty_bitmap_make_anon.patch [bz#1518989] +- kvm-block-simplify-code-around-releasing-bitmaps.patch [bz#1518989] +- kvm-hbitmap-Add-advance-param-to-hbitmap_iter_next.patch [bz#1518989] +- kvm-test-hbitmap-Add-non-advancing-iter_next-tests.patch [bz#1518989] +- kvm-block-dirty-bitmap-Add-bdrv_dirty_iter_next_area.patch [bz#1518989] +- kvm-blockdev-backup-add-bitmap-argument.patch [bz#1518989] +- kvm-dirty-bitmap-switch-assert-fails-to-errors-in-bdrv_m.patch [bz#1518989] +- kvm-dirty-bitmap-rename-bdrv_undo_clear_dirty_bitmap.patch [bz#1518989] +- kvm-dirty-bitmap-make-it-possible-to-restore-bitmap-afte.patch [bz#1518989] +- kvm-blockdev-rename-block-dirty-bitmap-clear-transaction.patch [bz#1518989] +- kvm-qapi-add-transaction-support-for-x-block-dirty-bitma.patch [bz#1518989] +- kvm-block-dirty-bitmaps-add-user_locked-status-checker.patch [bz#1518989] +- kvm-block-dirty-bitmaps-fix-merge-permissions.patch [bz#1518989] +- kvm-block-dirty-bitmaps-allow-clear-on-disabled-bitmaps.patch [bz#1518989] +- kvm-block-dirty-bitmaps-prohibit-enable-disable-on-locke.patch [bz#1518989] +- kvm-block-backup-prohibit-backup-from-using-in-use-bitma.patch [bz#1518989] +- kvm-nbd-forbid-use-of-frozen-bitmaps.patch [bz#1518989] +- kvm-bitmap-Update-count-after-a-merge.patch [bz#1518989] +- kvm-iotests-169-drop-deprecated-autoload-parameter.patch [bz#1518989] +- kvm-block-qcow2-improve-error-message-in-qcow2_inactivat.patch [bz#1518989] +- kvm-bloc-qcow2-drop-dirty_bitmaps_loaded-state-variable.patch [bz#1518989] +- kvm-dirty-bitmaps-clean-up-bitmaps-loading-and-migration.patch [bz#1518989] +- kvm-iotests-improve-169.patch [bz#1518989] +- kvm-iotests-169-add-cases-for-source-vm-resuming.patch [bz#1518989] +- kvm-pc-dimm-turn-alignment-assert-into-check.patch [bz#1630116] +- Resolves: bz#1518989 + (RFE: QEMU Incremental live backup) +- Resolves: bz#1625173 + ([NVMe Device Assignment] Guest could not boot up with q35+iommu) +- Resolves: bz#1629616 + (boot guest with q35+vIOMMU+ device assignment, qemu terminal shows "qemu-kvm: VFIO_UNMAP_DMA: -22" when return assigned network devices from vfio driver to ixgbe in guest) +- Resolves: bz#1630116 + (pc_dimm_get_free_addr: assertion failed: (QEMU_ALIGN_UP(address_space_start, align) == address_space_start)) +- Resolves: bz#1636185 + ([RFE] Report disk device name and serial number (qemu-guest-agent on Linux)) * Mon Nov 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-42.el8 - kvm-luks-Allow-share-rw-on.patch [bz#1629701] diff --git a/sources b/sources deleted file mode 100644 index 5fb3730..0000000 --- a/sources +++ /dev/null @@ -1 +0,0 @@ -SHA512 (qemu-3.1.0.tar.xz) = 610ec222853ebeea764a2c972418645dea7917e796a0f540d6c4a0f588244e6fdf6f5e6c214f161eb11fbc497f136a43e000fc85554ebb2cf8f10c846a186437 From 56a8b5ca3ee5d94bf19451ed3f5b1547351f3caa Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 13 Dec 2018 11:51:16 -0200 Subject: [PATCH 006/195] Missing sources file --- sources | 1 + 1 file changed, 1 insertion(+) create mode 100644 sources diff --git a/sources b/sources new file mode 100644 index 0000000..d417a06 --- /dev/null +++ b/sources @@ -0,0 +1 @@ +SHA512 (qemu-3.1.0.tar.xz) = 7e8dae823937cfac2f0c60406bd3bdcb89df40313dab2a4bed327d5198f7fcc68ac8b31e44692caa09299cc71256ee0b8c17e4f49f78ada8043d424f5daf82fe From 1744d66d6e56f9627e286ff90df30d84ba4a7704 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 21 Dec 2018 19:29:30 +0000 Subject: [PATCH 007/195] * Fri Dec 21 2018 Danilo Cesar Lemes de Paula - 3.1.0-2.el8 - kvm-pc-7.5-compat-entries.patch [bz#1655820] - kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch [bz#1655820] - kvm-pc-PC_RHEL7_6_COMPAT.patch [bz#1655820] - kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch [bz#1655820] - kvm-pc-Add-pc-q35-8.0.0-machine-type.patch [bz#1655820] - kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch [bz#1655820] - kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch [bz#1659604] - kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1660208] - Resolves: bz#1655820 (Can't migarate between rhel8 and rhel7 when guest has device "video") - Resolves: bz#1659604 (8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285) - Resolves: bz#1660208 (qemu-kvm: Should depend on the architecture-appropriate guest firmware) --- ...M_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch | 119 ++++++++++++++++++ kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch | 66 ++++++++++ kvm-pc-7.5-compat-entries.patch | 55 ++++++++ ...for-pc-i440fx-rhel7.6.0-machine-type.patch | 41 ++++++ kvm-pc-Add-pc-q35-8.0.0-machine-type.patch | 73 +++++++++++ ...e-smi-count-off-to-PC_RHEL7_6_COMPAT.patch | 60 +++++++++ kvm-pc-PC_RHEL7_6_COMPAT.patch | 58 +++++++++ qemu-kvm.spec | 37 +++++- 8 files changed, 508 insertions(+), 1 deletion(-) create mode 100644 kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch create mode 100644 kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch create mode 100644 kvm-pc-7.5-compat-entries.patch create mode 100644 kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch create mode 100644 kvm-pc-Add-pc-q35-8.0.0-machine-type.patch create mode 100644 kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch create mode 100644 kvm-pc-PC_RHEL7_6_COMPAT.patch diff --git a/kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch b/kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch new file mode 100644 index 0000000..bcf15cf --- /dev/null +++ b/kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch @@ -0,0 +1,119 @@ +From fece44d5054ef13f483d7531a8462cb7f8ff5b93 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Fri, 14 Dec 2018 19:33:40 +0000 +Subject: [PATCH 7/8] kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for + older machine types + +RH-Author: Bandan Das +Message-id: +Patchwork-id: 83523 +O-Subject: [RHEL8 qemu-kvm PATCH] kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types +Bugzilla: 1659604 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Pankaj Gupta +RH-Acked-by: Eduardo Habkost + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1659604 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=19521246 +Upstream: Not applicable +Branch: rhel8/master-3.1.0 on top of [RHEL8 qemu-kvm PATCH v3 0/5] 8.0.0 x86 machine types + +After the addition of support for async pf injection to L1, newer +hypervisors advertise the feature using bit 2 of the +MSR_KVM_ASYNC_PF_EN msr. However, this was reserved in older +hypervisors which results in an error during migration like so: + +qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285 +qemu-kvm: /builddir/build/BUILD/qemu-2.12.0/target/i386/kvm.c:1940: kvm_put_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. +Aborted (core dumped) + +This patch introduces a new bool that is set for older machine types. +When set, Qemu's stored value clears out bit 2. This should be safe +because the guest can still enable it by writing to the MSR after +checking for support. A reset/migration for <7.6 machine type would +reset the bit though. + +Signed-off-by: Bandan Das +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 1 + + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + include/hw/boards.h | 2 ++ + target/i386/kvm.c | 4 ++++ + 5 files changed, 9 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index a609332..18268d3 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2391,6 +2391,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->linuxboot_dma_enabled = true; + assert(!mc->get_hotplug_handler); + pcmc->pc_rom_ro = true; ++ mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotpug_handler; + mc->cpu_index_to_instance_props = pc_cpu_index_to_props; + mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index efee5e7..46c494a 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1186,6 +1186,7 @@ static void pc_machine_rhel760_options(MachineClass *m) + { + pc_machine_rhel7_options(m); + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ m->async_pf_vmexit_disable = true; + SET_MACHINE_COMPAT(m, PC_RHEL7_6_COMPAT); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 0b7223f..1810cf2 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -470,6 +470,7 @@ static void pc_q35_machine_rhel760_options(MachineClass *m) + pc_q35_machine_rhel800_options(m); + m->alias = NULL; + m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ m->async_pf_vmexit_disable = true; + SET_MACHINE_COMPAT(m, PC_RHEL7_6_COMPAT); + } + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index f82f284..27463fb 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -204,6 +204,8 @@ struct MachineClass { + const char **valid_cpu_types; + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; ++ /* RHEL only */ ++ bool async_pf_vmexit_disable; + void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); + bool ignore_boot_device_suffixes; +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index b2401d1..5b0ce82 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -2351,6 +2351,7 @@ static int kvm_get_msrs(X86CPU *cpu) + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + kvm_msr_buf_reset(cpu); + +@@ -2648,6 +2649,9 @@ static int kvm_get_msrs(X86CPU *cpu) + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; ++ if (mc->async_pf_vmexit_disable) { ++ env->async_pf_en_msr &= ~(1ULL << 2); ++ } + break; + case MSR_KVM_PV_EOI_EN: + env->pv_eoi_en_msr = msrs[i].data; +-- +1.8.3.1 + diff --git a/kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch b/kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch new file mode 100644 index 0000000..f366d96 --- /dev/null +++ b/kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch @@ -0,0 +1,66 @@ +From a75b0a6a7ef6e14e3b65b34bbc6935491d3b016e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 14 Dec 2018 17:02:07 +0000 +Subject: [PATCH 2/8] compat: Generic HW_COMPAT_RHEL7_6 + +RH-Author: Dr. David Alan Gilbert +Message-id: <20181214170211.14267-3-dgilbert@redhat.com> +Patchwork-id: 83516 +O-Subject: [RHEL8 qemu-kvm PATCH v3 2/6] compat: Generic HW_COMPAT_RHEL7_6 +Bugzilla: 1655820 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +From: "Dr. David Alan Gilbert" + +Add the HW_COMPAT_RHEL7_6 macro based on the 2.12 and 3.0 macros. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/compat.h | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/include/hw/compat.h b/include/hw/compat.h +index f08cc7c..40db243 100644 +--- a/include/hw/compat.h ++++ b/include/hw/compat.h +@@ -513,5 +513,34 @@ + .value = "off",\ + }, + ++/* The same as HW_COMPAT_3_0 + HW_COMPAT_2_12 ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ * ++ */ ++#define HW_COMPAT_RHEL7_6 \ ++ { /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ ++ .driver = "hda-audio",\ ++ .property = "use-timer",\ ++ .value = "false",\ ++ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ ++ .driver = "cirrus-vga",\ ++ .property = "global-vmstate",\ ++ .value = "true",\ ++ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ ++ .driver = "VGA",\ ++ .property = "global-vmstate",\ ++ .value = "true",\ ++ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ ++ .driver = "vmware-svga",\ ++ .property = "global-vmstate",\ ++ .value = "true",\ ++ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ ++ .driver = "qxl-vga",\ ++ .property = "global-vmstate",\ ++ .value = "true",\ ++ }, ++ + + #endif /* HW_COMPAT_H */ +-- +1.8.3.1 + diff --git a/kvm-pc-7.5-compat-entries.patch b/kvm-pc-7.5-compat-entries.patch new file mode 100644 index 0000000..5a1d439 --- /dev/null +++ b/kvm-pc-7.5-compat-entries.patch @@ -0,0 +1,55 @@ +From d89b308fb81b12ff2f452739d103cfee8043f94a Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 14 Dec 2018 17:02:06 +0000 +Subject: [PATCH 1/8] pc: 7.5 compat entries + +RH-Author: Dr. David Alan Gilbert +Message-id: <20181214170211.14267-2-dgilbert@redhat.com> +Patchwork-id: 83515 +O-Subject: [RHEL8 qemu-kvm PATCH v3 1/6] pc: 7.5 compat entries +Bugzilla: 1655820 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth + +From: "Dr. David Alan Gilbert" + +These are missing entries that are already in 7.6 from commits +64b860ac7db and 2f039646554 but are missing in the 3.1 rebase. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/i386/pc.h | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index f8f35af..efcb208 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -1008,6 +1008,22 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); + .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ + .property = "clflushopt",\ + .value = "off",\ ++ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "legacy-cache",\ ++ .value = "on",\ ++ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "topoext",\ ++ .value = "off",\ ++ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ \ ++ .driver = "EPYC-" TYPE_X86_CPU,\ ++ .property = "xlevel",\ ++ .value = stringify(0x8000000a),\ ++ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ \ ++ .driver = "EPYC-IBPB-" TYPE_X86_CPU,\ ++ .property = "xlevel",\ ++ .value = stringify(0x8000000a),\ + }, + + +-- +1.8.3.1 + diff --git a/kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch b/kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch new file mode 100644 index 0000000..f1c639b --- /dev/null +++ b/kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch @@ -0,0 +1,41 @@ +From 5121d3ce3d4a7932e8b62b7eb4c70aed802c9abd Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 14 Dec 2018 17:02:09 +0000 +Subject: [PATCH 4/8] pc: Add compat for pc-i440fx-rhel7.6.0 machine type + +RH-Author: Dr. David Alan Gilbert +Message-id: <20181214170211.14267-5-dgilbert@redhat.com> +Patchwork-id: 83517 +O-Subject: [RHEL8 qemu-kvm PATCH v3 4/6] pc: Add compat for pc-i440fx-rhel7.6.0 machine type +Bugzilla: 1655820 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth + +From: "Dr. David Alan Gilbert" + +Use the PC_RHEL7_6_COMPAT macro for our i440fx machine type. +We're not adding new RHEL8 i440 machine types at this time, so it's +just a matter of keeping the current i440fx machine types compatible. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc_piix.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index af9eb8c..efee5e7 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1186,6 +1186,7 @@ static void pc_machine_rhel760_options(MachineClass *m) + { + pc_machine_rhel7_options(m); + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_6_COMPAT); + } + + DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, +-- +1.8.3.1 + diff --git a/kvm-pc-Add-pc-q35-8.0.0-machine-type.patch b/kvm-pc-Add-pc-q35-8.0.0-machine-type.patch new file mode 100644 index 0000000..785dfda --- /dev/null +++ b/kvm-pc-Add-pc-q35-8.0.0-machine-type.patch @@ -0,0 +1,73 @@ +From b9db46a8df4b0eb25bf71205b188b903ff61936c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 14 Dec 2018 17:02:10 +0000 +Subject: [PATCH 5/8] pc: Add pc-q35-8.0.0 machine type + +RH-Author: Dr. David Alan Gilbert +Message-id: <20181214170211.14267-6-dgilbert@redhat.com> +Patchwork-id: 83518 +O-Subject: [RHEL8 qemu-kvm PATCH v3 5/6] pc: Add pc-q35-8.0.0 machine type +Bugzilla: 1655820 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth + +From: "Dr. David Alan Gilbert" + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc_q35.c | 22 +++++++++++++++++++--- + 1 file changed, 19 insertions(+), 3 deletions(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 163546e..0b7223f 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -431,8 +431,8 @@ DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + + /* Red Hat Enterprise Linux machine types */ + +-/* Options for the latest rhel7 q35 machine type */ +-static void pc_q35_machine_rhel7_options(MachineClass *m) ++/* Options for the latest rhel q35 machine type */ ++static void pc_q35_machine_rhel_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pcmc->default_nic_model = "e1000e"; +@@ -446,6 +446,20 @@ static void pc_q35_machine_rhel7_options(MachineClass *m) + SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); + } + ++static void pc_q35_init_rhel800(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel800_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, ++ pc_q35_machine_rhel800_options); ++ + static void pc_q35_init_rhel760(MachineState *machine) + { + pc_q35_init(machine); +@@ -453,8 +467,10 @@ static void pc_q35_init_rhel760(MachineState *machine) + + static void pc_q35_machine_rhel760_options(MachineClass *m) + { +- pc_q35_machine_rhel7_options(m); ++ pc_q35_machine_rhel800_options(m); ++ m->alias = NULL; + m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ SET_MACHINE_COMPAT(m, PC_RHEL7_6_COMPAT); + } + + DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, +-- +1.8.3.1 + diff --git a/kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch b/kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch new file mode 100644 index 0000000..e328b9a --- /dev/null +++ b/kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch @@ -0,0 +1,60 @@ +From f58ebc7e755e7baa122b906e061feb4de10bbe4c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 14 Dec 2018 17:02:11 +0000 +Subject: [PATCH 6/8] pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT + +RH-Author: Dr. David Alan Gilbert +Message-id: <20181214170211.14267-7-dgilbert@redhat.com> +Patchwork-id: 83514 +O-Subject: [RHEL8 qemu-kvm PATCH v3 6/6] pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT +Bugzilla: 1655820 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth + +From: "Dr. David Alan Gilbert" + +MSR_SMI_COUNT started being migrated in QEMU 2.12 and in the 2.12 +release this broke back migration to earlier versions; however +that didn't cause a problem on RHEL because it also relied on newer +kernel features that RHEL 7.* doesn't have. + +QEMU 3.0 got a fix (in PC_COMPAT_2_11) to fix the 2.12->earlier +breakage, but given the kernel dependency, it makes more sense +for us to tie it to 8.* machine types and keep the feature off for +all 7.* machine types. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/i386/pc.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 5533863..426a975 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -1000,6 +1000,9 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); + + /* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: + * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ * x-migrate-smi-count comes from PC_COMPAT_2_11 but ++ * is really tied to kernel version so keep it off on 7.x ++ * machine types irrespective of host. + */ + #define PC_RHEL7_6_COMPAT \ + HW_COMPAT_RHEL7_6 \ +@@ -1015,6 +1018,10 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); + .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU,\ + .property = "pku",\ + .value = "off",\ ++ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_2_11 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "x-migrate-smi-count",\ ++ .value = "off",\ + }, + + /* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: +-- +1.8.3.1 + diff --git a/kvm-pc-PC_RHEL7_6_COMPAT.patch b/kvm-pc-PC_RHEL7_6_COMPAT.patch new file mode 100644 index 0000000..457ceaf --- /dev/null +++ b/kvm-pc-PC_RHEL7_6_COMPAT.patch @@ -0,0 +1,58 @@ +From 83fd182901d50d150416afaa1236c3b798b320e7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 14 Dec 2018 17:02:08 +0000 +Subject: [PATCH 3/8] pc: PC_RHEL7_6_COMPAT + +RH-Author: Dr. David Alan Gilbert +Message-id: <20181214170211.14267-4-dgilbert@redhat.com> +Patchwork-id: 83513 +O-Subject: [RHEL8 qemu-kvm PATCH v3 3/6] pc: PC_RHEL7_6_COMPAT +Bugzilla: 1655820 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth + +From: "Dr. David Alan Gilbert" + +Add the PC_RHEL7_6_COMPAT macro derived from the 3.0 and 2.12 +upstream macros. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/i386/pc.h | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index efcb208..5533863 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -998,6 +998,25 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); + .value = "on",\ + }, + ++/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: ++ * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ */ ++#define PC_RHEL7_6_COMPAT \ ++ HW_COMPAT_RHEL7_6 \ ++ { /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "x-hv-synic-kvm-only",\ ++ .value = "on",\ ++ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ \ ++ .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ ++ .property = "pku",\ ++ .value = "off",\ ++ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ \ ++ .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "pku",\ ++ .value = "off",\ ++ }, ++ + /* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: + * - x-hv-max-vps was backported to 7.5 + * - x-pci-hole64-fix was backported to 7.5 +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 19362a2..7990bc6 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 1%{?dist} +Release: 2%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -120,6 +120,20 @@ Patch0016: 0016-Add-support-for-simpletrace.patch Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" +Patch20: kvm-pc-7.5-compat-entries.patch +# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" +Patch21: kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch +# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" +Patch22: kvm-pc-PC_RHEL7_6_COMPAT.patch +# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" +Patch23: kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch +# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" +Patch24: kvm-pc-Add-pc-q35-8.0.0-machine-type.patch +# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" +Patch25: kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch +# For bz#1659604 - 8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285 +Patch26: kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -242,7 +256,12 @@ Requires: qemu-img = %{epoch}:%{version}-%{release} %ifarch %{ix86} x86_64 Requires: seabios-bin >= 1.10.2-1 Requires: sgabios-bin +Requires: edk2-ovmf %endif +%ifarch aarch64 +Requires: edk2-aarch64 +%endif + %ifnarch aarch64 s390x Requires: seavgabios-bin >= 1.10.2-1 Requires: ipxe-roms-qemu >= 20170123-1 @@ -962,6 +981,22 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Dec 21 2018 Danilo Cesar Lemes de Paula - 3.1.0-2.el8 +- kvm-pc-7.5-compat-entries.patch [bz#1655820] +- kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch [bz#1655820] +- kvm-pc-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-pc-q35-8.0.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch [bz#1659604] +- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1660208] +- Resolves: bz#1655820 + (Can't migarate between rhel8 and rhel7 when guest has device "video") +- Resolves: bz#1659604 + (8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285) +- Resolves: bz#1660208 + (qemu-kvm: Should depend on the architecture-appropriate guest firmware) + * Thu Dec 13 2018 Danilo Cesar Lemes de Paula - 3.1.0-1.el8 - Rebase to qemu-kvm 3.1.0 From bfacc7d6323c0e258d1b94be058f5047743c32b1 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 3 Jan 2019 14:45:54 +0000 Subject: [PATCH 008/195] * Thu Jan 03 2019 Danilo Cesar Lemes de Paula - 3.1.0-3.el8 - kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch [bz#1656508] - Resolves: bz#1656508 (Machine types for qemu-kvm based on rebase to qemu-3.1 (ppc64le)) --- ...efine-pseries-rhel8.0.0-machine-type.patch | 101 ++++++++++++++++++ qemu-kvm.spec | 9 +- 2 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch diff --git a/kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch b/kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch new file mode 100644 index 0000000..bee8f1d --- /dev/null +++ b/kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch @@ -0,0 +1,101 @@ +From 6eddc4b5d380a5a21b17ca5424a10619ff42ad56 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 20 Dec 2018 14:48:58 +0000 +Subject: [PATCH] redhat: define pseries-rhel8.0.0 machine type + +RH-Author: Laurent Vivier +Message-id: <20181220144858.2598-1-lvivier@redhat.com> +Patchwork-id: 83723 +O-Subject: [RHEL8/rhel qemu-kvm PATCH] redhat: define pseries-rhel8.0.0 machine type +Bugzilla: 1656508 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Gibson +RH-Acked-by: Serhii Popovych + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1656508 +BRANCH: rhel8/master-3.1.0 +UPSTREAM: downstream only +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=19589897 +BASED-ON: <20181214170211.14267-1-dgilbert@redhat.com> + [RHEL8 qemu-kvm PATCH v3 0/6] 8.0.0 x86 machine types + +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 47 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 76a4e83..91d38f9 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4385,19 +4385,64 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); + #endif + + /* ++ * pseries-rhel8.0.0 ++ */ ++ ++static void spapr_machine_rhel800_instance_options(MachineState *machine) ++{ ++} ++ ++static void spapr_machine_rhel800_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", true); ++ ++/* + * pseries-rhel7.6.0 ++ * like SPAPR_COMPAT_2_12 and SPAPR_COMPAT_3_0 ++ * SPAPR_COMPAT_3_0 is empty + */ ++#define SPAPR_COMPAT_RHEL7_6 \ ++ HW_COMPAT_RHEL7_6 \ ++ { \ ++ .driver = TYPE_POWERPC_CPU, \ ++ .property = "pre-3.0-migration", \ ++ .value = "on", \ ++ }, \ ++ { \ ++ .driver = TYPE_SPAPR_CPU_CORE, \ ++ .property = "pre-3.0-migration", \ ++ .value = "on", \ ++ }, + + static void spapr_machine_rhel760_instance_options(MachineState *machine) + { ++ spapr_machine_rhel800_instance_options(machine); + } + + static void spapr_machine_rhel760_class_options(MachineClass *mc) + { +- /* Defaults for the latest behaviour inherited from the base class */ ++ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel800_class_options(mc); ++ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_6); ++ ++ /* from spapr_machine_3_0_class_options() */ ++ smc->legacy_irq_allocation = true; ++ smc->irq = &spapr_irq_xics_legacy; ++ ++ /* from spapr_machine_2_12_class_options() */ ++ /* We depend on kvm_enabled() to choose a default value for the ++ * hpt-max-page-size capability. Of course we can't do it here ++ * because this is too early and the HW accelerator isn't initialzed ++ * yet. Postpone this to machine init (see default_caps_with_cpu()). ++ */ ++ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; + } + +-DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", true); ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); + + /* + * pseries-rhel7.6.0-sxxm +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 7990bc6..0cd861e 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 2%{?dist} +Release: 3%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -134,6 +134,8 @@ Patch24: kvm-pc-Add-pc-q35-8.0.0-machine-type.patch Patch25: kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch # For bz#1659604 - 8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285 Patch26: kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch +# For bz#1656508 - Machine types for qemu-kvm based on rebase to qemu-3.1 (ppc64le) +Patch27: kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -981,6 +983,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Jan 03 2019 Danilo Cesar Lemes de Paula - 3.1.0-3.el8 +- kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch [bz#1656508] +- Resolves: bz#1656508 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (ppc64le)) + * Fri Dec 21 2018 Danilo Cesar Lemes de Paula - 3.1.0-2.el8 - kvm-pc-7.5-compat-entries.patch [bz#1655820] - kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch [bz#1655820] From 1b2adb97f32594ecc88eebb52624fbe09a759b60 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 11 Jan 2019 13:46:39 +0000 Subject: [PATCH 009/195] * Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-4.el8 - kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch [bz#1656510] - kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch [bz#1661967] - kvm-redhat-Fixing-.gitpublish-to-include-AV-information.patch [] - Resolves: bz#1656510 (Machine types for qemu-kvm based on rebase to qemu-3.1 (s390x)) - Resolves: bz#1661967 (Kernel prints the message "VPHN is not supported. Disabling polling...") --- ...rtio-ccw-Add-machine-types-for-RHEL8.patch | 106 +++++++++++++++ ...Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch | 124 ++++++++++++++++++ qemu-kvm.spec | 14 +- 3 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch create mode 100644 kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch diff --git a/kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch b/kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch new file mode 100644 index 0000000..592fe51 --- /dev/null +++ b/kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch @@ -0,0 +1,106 @@ +From 6c200d665b8730ea86104e7aea2d59035b1398e5 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Fri, 21 Dec 2018 14:08:56 +0000 +Subject: [PATCH 1/3] hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 + +RH-Author: David Hildenbrand +Message-id: <20181221150856.26324-3-david@redhat.com> +Patchwork-id: 83740 +O-Subject: [RHEL8 qemu-kvm PATCH v3 2/2] hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 +Bugzilla: 1656510 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1656510 +Upstream: n/a (downstream only) +Branch: rhel8/master-3.1.0 + +Downstream s390x machine types for the Advanced Virtualization module. + +s390mc->hpage_1m_allowed has to stay enabled for the rhel7.6.0 machine, +because RHEL 8 supports huge pages. For RHEL 7.6-alt, this is fenced +using a different mechanism (bail out if huge pages are used right from +the start). + +Signed-off-by: Thomas Huth +Signed-off-by: David Hildenbrand +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 26 +++++++++++++++++++++++--- + 1 file changed, 23 insertions(+), 3 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 04f4c1a..776a6d6 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -651,14 +651,14 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ ++ + #define CCW_COMPAT_3_0 \ + HW_COMPAT_3_0 + + #define CCW_COMPAT_2_12 \ + HW_COMPAT_2_12 + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ +- + #define CCW_COMPAT_2_11 \ + HW_COMPAT_2_11 \ + {\ +@@ -899,6 +899,13 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + #else + + /* ++ * like CCW_COMPAT_2_12 + CCW_COMPAT_3_0 (which are empty), but includes ++ * HW_COMPAT_RHEL7_6 instead of HW_COMPAT_2_11 and HW_COMPAT_3_0 ++ */ ++#define CCW_COMPAT_RHEL7_6 \ ++ HW_COMPAT_RHEL7_6 ++ ++/* + * like CCW_COMPAT_2_11, but includes HW_COMPAT_RHEL7_5 (derived from + * HW_COMPAT_2_11 and HW_COMPAT_2_10) instead of HW_COMPAT_2_11 + */ +@@ -910,14 +917,26 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + .value = "off",\ + }, + ++static void ccw_machine_rhel800_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel800_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel800, "rhel8.0.0", true); ++ + static void ccw_machine_rhel760_instance_options(MachineState *machine) + { ++ ccw_machine_rhel800_instance_options(machine); + } + + static void ccw_machine_rhel760_class_options(MachineClass *mc) + { ++ ccw_machine_rhel800_class_options(mc); ++ SET_MACHINE_COMPAT(mc, CCW_COMPAT_RHEL7_6); + } +-DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", true); ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); + + static void ccw_machine_rhel750_instance_options(MachineState *machine) + { +@@ -937,6 +956,7 @@ static void ccw_machine_rhel750_class_options(MachineClass *mc) + { + ccw_machine_rhel760_class_options(mc); + SET_MACHINE_COMPAT(mc, CCW_COMPAT_RHEL7_5); ++ S390_MACHINE_CLASS(mc)->hpage_1m_allowed = false; + } + DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); + +-- +1.8.3.1 + diff --git a/kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch b/kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch new file mode 100644 index 0000000..48f2d8b --- /dev/null +++ b/kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch @@ -0,0 +1,124 @@ +From 41c461465fb5b0d23f5826cd4b62a78f25607420 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 3 Jan 2019 12:51:40 +0000 +Subject: [PATCH 2/3] spapr: Add H-Call H_HOME_NODE_ASSOCIATIVITY + +RH-Author: Laurent Vivier +Message-id: <20190103135140.13948-2-lvivier@redhat.com> +Patchwork-id: 83848 +O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/1] spapr: Add H-Call H_HOME_NODE_ASSOCIATIVITY +Bugzilla: 1661967 +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth +RH-Acked-by: David Gibson + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1661967 + +H_HOME_NODE_ASSOCIATIVITY H-Call returns the associativity domain +designation associated with the identifier input parameter + +This fixes a crash when we try to hotplug a CPU in memory-less and +CPU-less numa node. In this case, the kernel tries to online the +node, but without the information provided by this h-call, the node id, +it cannot and the CPU is started while the node is not onlined. + +It also removes the warning message from the kernel: + VPHN is not supported. Disabling polling.. + +Signed-off-by: Laurent Vivier +Reviewed-by: Greg Kurz +Signed-off-by: David Gibson +(cherry picked from commit c24ba3d0a34f68ad2c6bf1a15bc43770005f6cc0) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 1 + + hw/ppc/spapr_hcall.c | 40 ++++++++++++++++++++++++++++++++++++++++ + include/hw/ppc/spapr.h | 1 + + 3 files changed, 42 insertions(+) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 91d38f9..d5d2eb4 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1051,6 +1051,7 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) + add_str(hypertas, "hcall-sprg0"); + add_str(hypertas, "hcall-copy"); + add_str(hypertas, "hcall-debug"); ++ add_str(hypertas, "hcall-vphn"); + add_str(qemu_hypertas, "hcall-memop1"); + + if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { +diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c +index ae913d0..f131c7e 100644 +--- a/hw/ppc/spapr_hcall.c ++++ b/hw/ppc/spapr_hcall.c +@@ -1663,6 +1663,42 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + return H_SUCCESS; + } + ++static target_ulong h_home_node_associativity(PowerPCCPU *cpu, ++ sPAPRMachineState *spapr, ++ target_ulong opcode, ++ target_ulong *args) ++{ ++ target_ulong flags = args[0]; ++ target_ulong procno = args[1]; ++ PowerPCCPU *tcpu; ++ int idx; ++ ++ /* only support procno from H_REGISTER_VPA */ ++ if (flags != 0x1) { ++ return H_FUNCTION; ++ } ++ ++ tcpu = spapr_find_cpu(procno); ++ if (tcpu == NULL) { ++ return H_P2; ++ } ++ ++ /* sequence is the same as in the "ibm,associativity" property */ ++ ++ idx = 0; ++#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \ ++ ((uint64_t)(b) & 0xffffffff)) ++ args[idx++] = ASSOCIATIVITY(0, 0); ++ args[idx++] = ASSOCIATIVITY(0, tcpu->node_id); ++ args[idx++] = ASSOCIATIVITY(procno, -1); ++ for ( ; idx < 6; idx++) { ++ args[idx] = -1; ++ } ++#undef ASSOCIATIVITY ++ ++ return H_SUCCESS; ++} ++ + static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu, + sPAPRMachineState *spapr, + target_ulong opcode, +@@ -1822,6 +1858,10 @@ static void hypercall_register_types(void) + + /* ibm,client-architecture-support support */ + spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support); ++ ++ /* Virtual Processor Home Node */ ++ spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY, ++ h_home_node_associativity); + } + + type_init(hypercall_register_types) +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index d2370e5..0cc123e 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -442,6 +442,7 @@ struct sPAPRMachineState { + #define H_GET_EM_PARMS 0x2B8 + #define H_SET_MPP 0x2D0 + #define H_GET_MPP 0x2D4 ++#define H_HOME_NODE_ASSOCIATIVITY 0x2EC + #define H_XIRR_X 0x2FC + #define H_RANDOM 0x300 + #define H_SET_MODE 0x31C +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 0cd861e..cdc387b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 3%{?dist} +Release: 4%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -136,6 +136,10 @@ Patch25: kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch Patch26: kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch # For bz#1656508 - Machine types for qemu-kvm based on rebase to qemu-3.1 (ppc64le) Patch27: kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch +# For bz#1656510 - Machine types for qemu-kvm based on rebase to qemu-3.1 (s390x) +Patch28: kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch +# For bz#1661967 - Kernel prints the message "VPHN is not supported. Disabling polling..." +Patch29: kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -983,6 +987,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-4.el8 +- kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch [bz#1656510] +- kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch [bz#1661967] +- Resolves: bz#1656510 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (s390x)) +- Resolves: bz#1661967 + (Kernel prints the message "VPHN is not supported. Disabling polling...") + * Thu Jan 03 2019 Danilo Cesar Lemes de Paula - 3.1.0-3.el8 - kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch [bz#1656508] - Resolves: bz#1656508 From 9781800bc34cbcd52797e3d7d6dd24f41410cb3e Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 21 Jan 2019 21:16:33 +0000 Subject: [PATCH 010/195] * Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 3.1.0-5.el8 - kvm-virtio-Helper-for-registering-virtio-device-types.patch [bz#1648023] - kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch [bz#1648023] - kvm-globals-Allow-global-properties-to-be-optional.patch [bz#1648023] - kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1648023] - kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch [bz#1656504] - kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch [bz#1656504] - kvm-aarch64-Use-256MB-ECAM-region-by-default.patch [bz#1656504] - Resolves: bz#1648023 (Provide separate device types for transitional virtio PCI devices - Fast Train) - Resolves: bz#1656504 (Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64)) --- ...-virt-rhel8.0.0-machine-type-for-ARM.patch | 70 +++ ...4-Set-virt-rhel8.0.0-max_cpus-to-512.patch | 50 ++ ...h64-Use-256MB-ECAM-region-by-default.patch | 38 ++ ...low-global-properties-to-be-optional.patch | 84 +++ ...-for-registering-virtio-device-types.patch | 553 ++++++++++++++++++ ...ble-legacy-disable-modern-compat-pro.patch | 73 +++ ...ersion-specific-variants-of-virtio-P.patch | 519 ++++++++++++++++ qemu-kvm.spec | 29 +- 8 files changed, 1415 insertions(+), 1 deletion(-) create mode 100644 kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch create mode 100644 kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch create mode 100644 kvm-aarch64-Use-256MB-ECAM-region-by-default.patch create mode 100644 kvm-globals-Allow-global-properties-to-be-optional.patch create mode 100644 kvm-virtio-Helper-for-registering-virtio-device-types.patch create mode 100644 kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch create mode 100644 kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch diff --git a/kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch b/kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch new file mode 100644 index 0000000..be3b71a --- /dev/null +++ b/kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch @@ -0,0 +1,70 @@ +From 7bfdb4cd2b490d6da5648b1bfeacf9078b4b3ecb Mon Sep 17 00:00:00 2001 +From: Wei Huang +Date: Thu, 17 Jan 2019 17:33:55 +0000 +Subject: [PATCH 5/7] aarch64: Add virt-rhel8.0.0 machine type for ARM + +RH-Author: Wei Huang +Message-id: <20190117173357.31514-2-wei@redhat.com> +Patchwork-id: 84037 +O-Subject: [RHEL8 qemu-kvm PATCH v3 1/3] aarch64: Add virt-rhel8.0.0 machine type for ARM +Bugzilla: 1656504 +RH-Acked-by: Andrew Jones +RH-Acked-by: Auger Eric +RH-Acked-by: Igor Mammedov +RH-Acked-by: Laszlo Ersek + +This patch adds a new machine type, virt-rhel8.0.0, for QEMU fast train +to replace old virt-rhel7.6.0 because virt-rhel7.6.0 was defined for +an unsupported preview product. Note that we explicitly disable +256MB ECAM region in virt-rhel8.0.0 to match what virt-rhel7.6.0 offered. + +Signed-off-by: Wei Huang +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 84a86c1..156721a 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2094,7 +2094,7 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + +-static void rhel760_virt_instance_init(Object *obj) ++static void rhel800_virt_instance_init(Object *obj) + { + VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); +@@ -2119,6 +2119,8 @@ static void rhel760_virt_instance_init(Object *obj) + "Set GIC version. " + "Valid values are 2, 3 and host", NULL); + ++ vms->highmem_ecam = !vmc->no_highmem_ecam; ++ + if (vmc->no_its) { + vms->its = false; + } else { +@@ -2132,12 +2134,16 @@ static void rhel760_virt_instance_init(Object *obj) + NULL); + } + ++ /* IOMMU is disabled by default and non-configurable for RHEL */ ++ vms->iommu = VIRT_IOMMU_NONE; ++ + vms->memmap=a15memmap; + vms->irqmap=a15irqmap; + } + +-static void rhel760_virt_options(MachineClass *mc) ++static void rhel800_virt_options(MachineClass *mc) + { + SET_MACHINE_COMPAT(mc, ARM_RHEL_COMPAT); ++ vmc->no_highmem_ecam = true; + } +-DEFINE_RHEL_MACHINE_AS_LATEST(7, 6, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 0, 0) +-- +1.8.3.1 + diff --git a/kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch b/kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch new file mode 100644 index 0000000..76eae19 --- /dev/null +++ b/kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch @@ -0,0 +1,50 @@ +From 3433e6920a4aaa2177f3503ef08256a58f866b33 Mon Sep 17 00:00:00 2001 +From: Wei Huang +Date: Thu, 17 Jan 2019 17:33:56 +0000 +Subject: [PATCH 6/7] aarch64: Set virt-rhel8.0.0 max_cpus to 512 + +RH-Author: Wei Huang +Message-id: <20190117173357.31514-3-wei@redhat.com> +Patchwork-id: 84038 +O-Subject: [RHEL8 qemu-kvm PATCH v3 2/3] aarch64: Set virt-rhel8.0.0 max_cpus to 512 +Bugzilla: 1656504 +RH-Acked-by: Andrew Jones +RH-Acked-by: Auger Eric +RH-Acked-by: Igor Mammedov +RH-Acked-by: Laszlo Ersek + +This patch increases max_cpus of virt-rhel8.0.0 from 255 to 512. +This decision is made due to a recent trend of ARM servers Red Hat +received. Currently Red Hat has HPE Apollo machines that contains 256 +cpu cores. It is reasonable to expect that we will see new machines +with > 256 cores very soon. We have verified this patch partially with a +256-vcpu VM on Apollo and expect 512 vcpus will work as well. + +Signed-off-by: Wei Huang +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 156721a..21965e4 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2063,11 +2063,11 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; +- /* Start max_cpus at the maximum QEMU supports. We'll further restrict +- * it later in machvirt_init, where we have more information about the ++ /* Start with max_cpus set to 512, which is the maximum supported by KVM. ++ * The value may be reduced later when we have more information about the + * configuration of the particular instance. + */ +- mc->max_cpus = 255; ++ mc->max_cpus = 512; + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; + mc->pci_allow_0_address = true; +-- +1.8.3.1 + diff --git a/kvm-aarch64-Use-256MB-ECAM-region-by-default.patch b/kvm-aarch64-Use-256MB-ECAM-region-by-default.patch new file mode 100644 index 0000000..759adcc --- /dev/null +++ b/kvm-aarch64-Use-256MB-ECAM-region-by-default.patch @@ -0,0 +1,38 @@ +From 4d20863ae382c9ce2a8b7f88aee7a1d1228112a7 Mon Sep 17 00:00:00 2001 +From: Wei Huang +Date: Thu, 17 Jan 2019 17:33:57 +0000 +Subject: [PATCH 7/7] aarch64: Use 256MB ECAM region by default + +RH-Author: Wei Huang +Message-id: <20190117173357.31514-4-wei@redhat.com> +Patchwork-id: 84039 +O-Subject: [RHEL8 qemu-kvm PATCH v3 3/3] aarch64: Use 256MB ECAM region by default +Bugzilla: 1656504 +RH-Acked-by: Andrew Jones +RH-Acked-by: Auger Eric +RH-Acked-by: Igor Mammedov +RH-Acked-by: Laszlo Ersek + +This patch turns on 256MB ECAM region to support a larger number (up to +256) of PCIe buses on virt-rhel8.0.0. + +Signed-off-by: Wei Huang +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 21965e4..bee8f10 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2144,6 +2144,5 @@ static void rhel800_virt_instance_init(Object *obj) + static void rhel800_virt_options(MachineClass *mc) + { + SET_MACHINE_COMPAT(mc, ARM_RHEL_COMPAT); +- vmc->no_highmem_ecam = true; + } + DEFINE_RHEL_MACHINE_AS_LATEST(8, 0, 0) +-- +1.8.3.1 + diff --git a/kvm-globals-Allow-global-properties-to-be-optional.patch b/kvm-globals-Allow-global-properties-to-be-optional.patch new file mode 100644 index 0000000..b039c64 --- /dev/null +++ b/kvm-globals-Allow-global-properties-to-be-optional.patch @@ -0,0 +1,84 @@ +From c63a58b4e1d3db52301bec072ac8025216731f35 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 16 Jan 2019 23:18:18 +0000 +Subject: [PATCH 3/7] globals: Allow global properties to be optional +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eduardo Habkost +Message-id: <20190116231819.29310-4-ehabkost@redhat.com> +Patchwork-id: 84029 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 3/4] globals: Allow global properties to be optional +Bugzilla: 1648023 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier +RH-Acked-by: Michael S. Tsirkin + +Upstream tree: git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git +Upstream commit: d7741743f4f3d2683d1bb6938f88dc0167c21afa + +Making some global properties optional will let us simplify +compat code when a given property works on most (but not all) +subclasses of a given type. + +Device types will be able to opt out from optional compat +properties by simply not registering those properties. + +Backport conflict notes: + Patching qdev_prop_set_globals(), because our downstream tree + still doesn't have object_apply_global_props() from commit + ea9ce8934c5d ("hw: apply accel compat properties without + touching globals") + +Signed-off-by: Eduardo Habkost +Reviewed-by: Cornelia Huck +Reviewed-by: Marc-André Lureau +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/qdev-properties.c | 3 +++ + include/hw/qdev-core.h | 3 +++ + 2 files changed, 6 insertions(+) + +diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c +index 35072de..2023c1a 100644 +--- a/hw/core/qdev-properties.c ++++ b/hw/core/qdev-properties.c +@@ -1255,6 +1255,9 @@ void qdev_prop_set_globals(DeviceState *dev) + if (object_dynamic_cast(OBJECT(dev), prop->driver) == NULL) { + continue; + } ++ if (prop->optional && !object_property_find(OBJECT(dev), prop->property, NULL)) { ++ continue; ++ } + prop->used = true; + object_property_parse(OBJECT(dev), prop->value, prop->property, &err); + if (err != NULL) { +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index a24d0dd..a10f60f 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -252,6 +252,8 @@ struct PropertyInfo { + * @user_provided: Set to true if property comes from user-provided config + * (command-line or config file). + * @used: Set to true if property was used when initializing a device. ++ * @optional: If set to true, GlobalProperty will be skipped without errors ++ * if the property doesn't exist. + * @errp: Error destination, used like first argument of error_setg() + * in case property setting fails later. If @errp is NULL, we + * print warnings instead of ignoring errors silently. For +@@ -264,6 +266,7 @@ typedef struct GlobalProperty { + const char *value; + bool user_provided; + bool used; ++ bool optional; + Error **errp; + } GlobalProperty; + +-- +1.8.3.1 + diff --git a/kvm-virtio-Helper-for-registering-virtio-device-types.patch b/kvm-virtio-Helper-for-registering-virtio-device-types.patch new file mode 100644 index 0000000..c0dae87 --- /dev/null +++ b/kvm-virtio-Helper-for-registering-virtio-device-types.patch @@ -0,0 +1,553 @@ +From 8a7371fb25d2072d60b029eb092a083cfa064afb Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 16 Jan 2019 23:18:16 +0000 +Subject: [PATCH 1/7] virtio: Helper for registering virtio device types + +RH-Author: Eduardo Habkost +Message-id: <20190116231819.29310-2-ehabkost@redhat.com> +Patchwork-id: 84027 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/4] virtio: Helper for registering virtio device types +Bugzilla: 1648023 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier +RH-Acked-by: Michael S. Tsirkin + +Introduce a helper for registering different flavours of virtio +devices. Convert code to use the helper, but keep only the +existing generic types. Transitional and non-transitional device +types will be added by another patch. + +Backport conflicts: + hw/virtio/virtio-pci.c: because our downstream tree doesn't + have commit e6a74868d92f ("build-sys: add --disable-vhost-user") + +Acked-by: Andrea Bolognani +Reviewed-by: Cornelia Huck +Signed-off-by: Eduardo Habkost +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit a4ee4c8baa37154f42b4dc6a13fee79268d15238) +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + hw/display/virtio-gpu-pci.c | 7 +- + hw/display/virtio-vga.c | 7 +- + hw/virtio/virtio-crypto-pci.c | 7 +- + hw/virtio/virtio-pci.c | 231 ++++++++++++++++++++++++++++++------------ + hw/virtio/virtio-pci.h | 54 ++++++++++ + 5 files changed, 228 insertions(+), 78 deletions(-) + +diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c +index cece4aa..faf76a8 100644 +--- a/hw/display/virtio-gpu-pci.c ++++ b/hw/display/virtio-gpu-pci.c +@@ -69,9 +69,8 @@ static void virtio_gpu_initfn(Object *obj) + TYPE_VIRTIO_GPU); + } + +-static const TypeInfo virtio_gpu_pci_info = { +- .name = TYPE_VIRTIO_GPU_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = { ++ .generic_name = TYPE_VIRTIO_GPU_PCI, + .instance_size = sizeof(VirtIOGPUPCI), + .instance_init = virtio_gpu_initfn, + .class_init = virtio_gpu_pci_class_init, +@@ -79,6 +78,6 @@ static const TypeInfo virtio_gpu_pci_info = { + + static void virtio_gpu_pci_register_types(void) + { +- type_register_static(&virtio_gpu_pci_info); ++ virtio_pci_types_register(&virtio_gpu_pci_info); + } + type_init(virtio_gpu_pci_register_types) +diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c +index ab2e369..8db4d91 100644 +--- a/hw/display/virtio-vga.c ++++ b/hw/display/virtio-vga.c +@@ -207,9 +207,8 @@ static void virtio_vga_inst_initfn(Object *obj) + TYPE_VIRTIO_GPU); + } + +-static TypeInfo virtio_vga_info = { +- .name = TYPE_VIRTIO_VGA, +- .parent = TYPE_VIRTIO_PCI, ++static VirtioPCIDeviceTypeInfo virtio_vga_info = { ++ .generic_name = TYPE_VIRTIO_VGA, + .instance_size = sizeof(struct VirtIOVGA), + .instance_init = virtio_vga_inst_initfn, + .class_init = virtio_vga_class_init, +@@ -217,7 +216,7 @@ static TypeInfo virtio_vga_info = { + + static void virtio_vga_register_types(void) + { +- type_register_static(&virtio_vga_info); ++ virtio_pci_types_register(&virtio_vga_info); + } + + type_init(virtio_vga_register_types) +diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c +index bf64996..8cc3fa3 100644 +--- a/hw/virtio/virtio-crypto-pci.c ++++ b/hw/virtio/virtio-crypto-pci.c +@@ -64,9 +64,8 @@ static void virtio_crypto_initfn(Object *obj) + TYPE_VIRTIO_CRYPTO); + } + +-static const TypeInfo virtio_crypto_pci_info = { +- .name = TYPE_VIRTIO_CRYPTO_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_crypto_pci_info = { ++ .generic_name = TYPE_VIRTIO_CRYPTO_PCI, + .instance_size = sizeof(VirtIOCryptoPCI), + .instance_init = virtio_crypto_initfn, + .class_init = virtio_crypto_pci_class_init, +@@ -74,6 +73,6 @@ static const TypeInfo virtio_crypto_pci_info = { + + static void virtio_crypto_pci_register_types(void) + { +- type_register_static(&virtio_crypto_pci_info); ++ virtio_pci_types_register(&virtio_crypto_pci_info); + } + type_init(virtio_crypto_pci_register_types) +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 9a987cb..bee3509 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1119,9 +1119,8 @@ static void virtio_9p_pci_instance_init(Object *obj) + TYPE_VIRTIO_9P); + } + +-static const TypeInfo virtio_9p_pci_info = { +- .name = TYPE_VIRTIO_9P_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = { ++ .generic_name = TYPE_VIRTIO_9P_PCI, + .instance_size = sizeof(V9fsPCIState), + .instance_init = virtio_9p_pci_instance_init, + .class_init = virtio_9p_pci_class_init, +@@ -1877,9 +1876,6 @@ static void virtio_pci_reset(DeviceState *qdev) + static Property virtio_pci_properties[] = { + DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), +- DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, +- ON_OFF_AUTO_AUTO), +- DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), + DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), + DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, +@@ -1939,13 +1935,123 @@ static const TypeInfo virtio_pci_info = { + .class_init = virtio_pci_class_init, + .class_size = sizeof(VirtioPCIClass), + .abstract = true, +- .interfaces = (InterfaceInfo[]) { +- { INTERFACE_PCIE_DEVICE }, +- { INTERFACE_CONVENTIONAL_PCI_DEVICE }, +- { } +- }, + }; + ++static Property virtio_pci_generic_properties[] = { ++ DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, ++ ON_OFF_AUTO_AUTO), ++ DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static void virtio_pci_base_class_init(ObjectClass *klass, void *data) ++{ ++ const VirtioPCIDeviceTypeInfo *t = data; ++ if (t->class_init) { ++ t->class_init(klass, NULL); ++ } ++} ++ ++static void virtio_pci_generic_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ ++ dc->props = virtio_pci_generic_properties; ++} ++ ++/* Used when the generic type and the base type is the same */ ++static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data) ++{ ++ virtio_pci_base_class_init(klass, data); ++ virtio_pci_generic_class_init(klass, NULL); ++} ++ ++static void virtio_pci_transitional_instance_init(Object *obj) ++{ ++ VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); ++ ++ proxy->disable_legacy = ON_OFF_AUTO_OFF; ++ proxy->disable_modern = false; ++} ++ ++static void virtio_pci_non_transitional_instance_init(Object *obj) ++{ ++ VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); ++ ++ proxy->disable_legacy = ON_OFF_AUTO_ON; ++ proxy->disable_modern = false; ++} ++ ++void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) ++{ ++ TypeInfo base_type_info = { ++ .name = t->base_name, ++ .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI, ++ .instance_size = t->instance_size, ++ .instance_init = t->instance_init, ++ .class_init = virtio_pci_base_class_init, ++ .class_data = (void *)t, ++ .abstract = true, ++ }; ++ TypeInfo generic_type_info = { ++ .name = t->generic_name, ++ .parent = base_type_info.name, ++ .class_init = virtio_pci_generic_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { INTERFACE_PCIE_DEVICE }, ++ { INTERFACE_CONVENTIONAL_PCI_DEVICE }, ++ { } ++ }, ++ }; ++ ++ if (!base_type_info.name) { ++ /* No base type -> register a single generic device type */ ++ base_type_info.name = t->generic_name; ++ base_type_info.class_init = virtio_pci_generic_base_class_init; ++ base_type_info.interfaces = generic_type_info.interfaces; ++ base_type_info.abstract = false; ++ generic_type_info.name = NULL; ++ assert(!t->non_transitional_name); ++ assert(!t->transitional_name); ++ } ++ ++ type_register(&base_type_info); ++ if (generic_type_info.name) { ++ type_register(&generic_type_info); ++ } ++ ++ if (t->non_transitional_name) { ++ const TypeInfo non_transitional_type_info = { ++ .name = t->non_transitional_name, ++ .parent = base_type_info.name, ++ .instance_init = virtio_pci_non_transitional_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { INTERFACE_PCIE_DEVICE }, ++ { INTERFACE_CONVENTIONAL_PCI_DEVICE }, ++ { } ++ }, ++ }; ++ type_register(&non_transitional_type_info); ++ } ++ ++ if (t->transitional_name) { ++ const TypeInfo transitional_type_info = { ++ .name = t->transitional_name, ++ .parent = base_type_info.name, ++ .instance_init = virtio_pci_transitional_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ /* ++ * Transitional virtio devices work only as Conventional PCI ++ * devices because they require PIO ports. ++ */ ++ { INTERFACE_CONVENTIONAL_PCI_DEVICE }, ++ { } ++ }, ++ }; ++ type_register(&transitional_type_info); ++ } ++} ++ + /* virtio-blk-pci */ + + static Property virtio_blk_pci_properties[] = { +@@ -1995,9 +2101,8 @@ static void virtio_blk_pci_instance_init(Object *obj) + "bootindex", &error_abort); + } + +-static const TypeInfo virtio_blk_pci_info = { +- .name = TYPE_VIRTIO_BLK_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = { ++ .generic_name = TYPE_VIRTIO_BLK_PCI, + .instance_size = sizeof(VirtIOBlkPCI), + .instance_init = virtio_blk_pci_instance_init, + .class_init = virtio_blk_pci_class_init, +@@ -2051,9 +2156,8 @@ static void vhost_user_blk_pci_instance_init(Object *obj) + "bootindex", &error_abort); + } + +-static const TypeInfo vhost_user_blk_pci_info = { +- .name = TYPE_VHOST_USER_BLK_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = { ++ .generic_name = TYPE_VHOST_USER_BLK_PCI, + .instance_size = sizeof(VHostUserBlkPCI), + .instance_init = vhost_user_blk_pci_instance_init, + .class_init = vhost_user_blk_pci_class_init, +@@ -2119,9 +2223,8 @@ static void virtio_scsi_pci_instance_init(Object *obj) + TYPE_VIRTIO_SCSI); + } + +-static const TypeInfo virtio_scsi_pci_info = { +- .name = TYPE_VIRTIO_SCSI_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = { ++ .generic_name = TYPE_VIRTIO_SCSI_PCI, + .instance_size = sizeof(VirtIOSCSIPCI), + .instance_init = virtio_scsi_pci_instance_init, + .class_init = virtio_scsi_pci_class_init, +@@ -2174,9 +2277,8 @@ static void vhost_scsi_pci_instance_init(Object *obj) + "bootindex", &error_abort); + } + +-static const TypeInfo vhost_scsi_pci_info = { +- .name = TYPE_VHOST_SCSI_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = { ++ .generic_name = TYPE_VHOST_SCSI_PCI, + .instance_size = sizeof(VHostSCSIPCI), + .instance_init = vhost_scsi_pci_instance_init, + .class_init = vhost_scsi_pci_class_init, +@@ -2229,9 +2331,8 @@ static void vhost_user_scsi_pci_instance_init(Object *obj) + "bootindex", &error_abort); + } + +-static const TypeInfo vhost_user_scsi_pci_info = { +- .name = TYPE_VHOST_USER_SCSI_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = { ++ .generic_name = TYPE_VHOST_USER_SCSI_PCI, + .instance_size = sizeof(VHostUserSCSIPCI), + .instance_init = vhost_user_scsi_pci_instance_init, + .class_init = vhost_user_scsi_pci_class_init, +@@ -2277,9 +2378,8 @@ static void vhost_vsock_pci_instance_init(Object *obj) + TYPE_VHOST_VSOCK); + } + +-static const TypeInfo vhost_vsock_pci_info = { +- .name = TYPE_VHOST_VSOCK_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { ++ .generic_name = TYPE_VHOST_VSOCK_PCI, + .instance_size = sizeof(VHostVSockPCI), + .instance_init = vhost_vsock_pci_instance_init, + .class_init = vhost_vsock_pci_class_init, +@@ -2334,9 +2434,8 @@ static void virtio_balloon_pci_instance_init(Object *obj) + "guest-stats-polling-interval", &error_abort); + } + +-static const TypeInfo virtio_balloon_pci_info = { +- .name = TYPE_VIRTIO_BALLOON_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = { ++ .generic_name = TYPE_VIRTIO_BALLOON_PCI, + .instance_size = sizeof(VirtIOBalloonPCI), + .instance_init = virtio_balloon_pci_instance_init, + .class_init = virtio_balloon_pci_class_init, +@@ -2407,9 +2506,8 @@ static void virtio_serial_pci_instance_init(Object *obj) + TYPE_VIRTIO_SERIAL); + } + +-static const TypeInfo virtio_serial_pci_info = { +- .name = TYPE_VIRTIO_SERIAL_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = { ++ .generic_name = TYPE_VIRTIO_SERIAL_PCI, + .instance_size = sizeof(VirtIOSerialPCI), + .instance_init = virtio_serial_pci_instance_init, + .class_init = virtio_serial_pci_class_init, +@@ -2462,9 +2560,8 @@ static void virtio_net_pci_instance_init(Object *obj) + "bootindex", &error_abort); + } + +-static const TypeInfo virtio_net_pci_info = { +- .name = TYPE_VIRTIO_NET_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = { ++ .generic_name = TYPE_VIRTIO_NET_PCI, + .instance_size = sizeof(VirtIONetPCI), + .instance_init = virtio_net_pci_instance_init, + .class_init = virtio_net_pci_class_init, +@@ -2513,9 +2610,8 @@ static void virtio_rng_initfn(Object *obj) + TYPE_VIRTIO_RNG); + } + +-static const TypeInfo virtio_rng_pci_info = { +- .name = TYPE_VIRTIO_RNG_PCI, +- .parent = TYPE_VIRTIO_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = { ++ .generic_name = TYPE_VIRTIO_RNG_PCI, + .instance_size = sizeof(VirtIORngPCI), + .instance_init = virtio_rng_initfn, + .class_init = virtio_rng_pci_class_init, +@@ -2605,24 +2701,24 @@ static const TypeInfo virtio_input_hid_pci_info = { + .abstract = true, + }; + +-static const TypeInfo virtio_keyboard_pci_info = { +- .name = TYPE_VIRTIO_KEYBOARD_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_keyboard_pci_info = { ++ .generic_name = TYPE_VIRTIO_KEYBOARD_PCI, + .parent = TYPE_VIRTIO_INPUT_HID_PCI, + .class_init = virtio_input_hid_kbd_pci_class_init, + .instance_size = sizeof(VirtIOInputHIDPCI), + .instance_init = virtio_keyboard_initfn, + }; + +-static const TypeInfo virtio_mouse_pci_info = { +- .name = TYPE_VIRTIO_MOUSE_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_mouse_pci_info = { ++ .generic_name = TYPE_VIRTIO_MOUSE_PCI, + .parent = TYPE_VIRTIO_INPUT_HID_PCI, + .class_init = virtio_input_hid_mouse_pci_class_init, + .instance_size = sizeof(VirtIOInputHIDPCI), + .instance_init = virtio_mouse_initfn, + }; + +-static const TypeInfo virtio_tablet_pci_info = { +- .name = TYPE_VIRTIO_TABLET_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_tablet_pci_info = { ++ .generic_name = TYPE_VIRTIO_TABLET_PCI, + .parent = TYPE_VIRTIO_INPUT_HID_PCI, + .instance_size = sizeof(VirtIOInputHIDPCI), + .instance_init = virtio_tablet_initfn, +@@ -2637,8 +2733,8 @@ static void virtio_host_initfn(Object *obj) + TYPE_VIRTIO_INPUT_HOST); + } + +-static const TypeInfo virtio_host_pci_info = { +- .name = TYPE_VIRTIO_INPUT_HOST_PCI, ++static const VirtioPCIDeviceTypeInfo virtio_host_pci_info = { ++ .generic_name = TYPE_VIRTIO_INPUT_HOST_PCI, + .parent = TYPE_VIRTIO_INPUT_PCI, + .instance_size = sizeof(VirtIOInputHostPCI), + .instance_init = virtio_host_initfn, +@@ -2692,36 +2788,39 @@ static const TypeInfo virtio_pci_bus_info = { + + static void virtio_pci_register_types(void) + { +- type_register_static(&virtio_rng_pci_info); ++ /* Base types: */ ++ type_register_static(&virtio_pci_bus_info); ++ type_register_static(&virtio_pci_info); + type_register_static(&virtio_input_pci_info); + type_register_static(&virtio_input_hid_pci_info); +- type_register_static(&virtio_keyboard_pci_info); +- type_register_static(&virtio_mouse_pci_info); +- type_register_static(&virtio_tablet_pci_info); ++ ++ /* Implementations: */ ++ virtio_pci_types_register(&virtio_rng_pci_info); ++ virtio_pci_types_register(&virtio_keyboard_pci_info); ++ virtio_pci_types_register(&virtio_mouse_pci_info); ++ virtio_pci_types_register(&virtio_tablet_pci_info); + #ifdef CONFIG_LINUX +- type_register_static(&virtio_host_pci_info); ++ virtio_pci_types_register(&virtio_host_pci_info); + #endif +- type_register_static(&virtio_pci_bus_info); +- type_register_static(&virtio_pci_info); + #ifdef CONFIG_VIRTFS +- type_register_static(&virtio_9p_pci_info); ++ virtio_pci_types_register(&virtio_9p_pci_info); + #endif +- type_register_static(&virtio_blk_pci_info); ++ virtio_pci_types_register(&virtio_blk_pci_info); + #if defined(CONFIG_VHOST_USER_BLK) +- type_register_static(&vhost_user_blk_pci_info); ++ virtio_pci_types_register(&vhost_user_blk_pci_info); + #endif +- type_register_static(&virtio_scsi_pci_info); +- type_register_static(&virtio_balloon_pci_info); +- type_register_static(&virtio_serial_pci_info); +- type_register_static(&virtio_net_pci_info); ++ virtio_pci_types_register(&virtio_scsi_pci_info); ++ virtio_pci_types_register(&virtio_balloon_pci_info); ++ virtio_pci_types_register(&virtio_serial_pci_info); ++ virtio_pci_types_register(&virtio_net_pci_info); + #ifdef CONFIG_VHOST_SCSI +- type_register_static(&vhost_scsi_pci_info); ++ virtio_pci_types_register(&vhost_scsi_pci_info); + #endif + #if defined(CONFIG_VHOST_USER_SCSI) +- type_register_static(&vhost_user_scsi_pci_info); ++ virtio_pci_types_register(&vhost_user_scsi_pci_info); + #endif + #ifdef CONFIG_VHOST_VSOCK +- type_register_static(&vhost_vsock_pci_info); ++ virtio_pci_types_register(&vhost_vsock_pci_info); + #endif + } + +diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h +index 813082b..8cd5466 100644 +--- a/hw/virtio/virtio-pci.h ++++ b/hw/virtio/virtio-pci.h +@@ -417,4 +417,58 @@ struct VirtIOCryptoPCI { + /* Virtio ABI version, if we increment this, we break the guest driver. */ + #define VIRTIO_PCI_ABI_VERSION 0 + ++/* Input for virtio_pci_types_register() */ ++typedef struct VirtioPCIDeviceTypeInfo { ++ /* ++ * Common base class for the subclasses below. ++ * ++ * Required only if transitional_name or non_transitional_name is set. ++ * ++ * We need a separate base type instead of making all types ++ * inherit from generic_name for two reasons: ++ * 1) generic_name implements INTERFACE_PCIE_DEVICE, but ++ * transitional_name does not. ++ * 2) generic_name has the "disable-legacy" and "disable-modern" ++ * properties, transitional_name and non_transitional name don't. ++ */ ++ const char *base_name; ++ /* ++ * Generic device type. Optional. ++ * ++ * Supports both transitional and non-transitional modes, ++ * using the disable-legacy and disable-modern properties. ++ * If disable-legacy=auto, (non-)transitional mode is selected ++ * depending on the bus where the device is plugged. ++ * ++ * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE, ++ * but PCI Express is supported only in non-transitional mode. ++ * ++ * The only type implemented by QEMU 3.1 and older. ++ */ ++ const char *generic_name; ++ /* ++ * The transitional device type. Optional. ++ * ++ * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE. ++ */ ++ const char *transitional_name; ++ /* ++ * The non-transitional device type. Optional. ++ * ++ * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only. ++ */ ++ const char *non_transitional_name; ++ ++ /* Parent type. If NULL, TYPE_VIRTIO_PCI is used */ ++ const char *parent; ++ ++ /* Same as TypeInfo fields: */ ++ size_t instance_size; ++ void (*instance_init)(Object *obj); ++ void (*class_init)(ObjectClass *klass, void *data); ++} VirtioPCIDeviceTypeInfo; ++ ++/* Register virtio-pci type(s). @t must be static. */ ++void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); ++ + #endif +-- +1.8.3.1 + diff --git a/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch b/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch new file mode 100644 index 0000000..c17fc0f --- /dev/null +++ b/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch @@ -0,0 +1,73 @@ +From cbac773c0218dc23f2c434f2d04e3e3cc1d081a9 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 16 Jan 2019 23:18:19 +0000 +Subject: [PATCH 4/7] virtio: Make disable-legacy/disable-modern compat + properties optional + +RH-Author: Eduardo Habkost +Message-id: <20190116231819.29310-5-ehabkost@redhat.com> +Patchwork-id: 84030 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 4/4] virtio: Make disable-legacy/disable-modern compat properties optional +Bugzilla: 1648023 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier +RH-Acked-by: Michael S. Tsirkin + +Upstream tree: git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git +Upstream commit: 53921bfdce3f8fffcc22338633855247fb7b7a74 + +The disable-legacy and disable-modern properties apply only to +some virtio-pci devices. Make those properties optional. + +This fixes the crash introduced by commit f6e501a28ef9 ("virtio: Provide +version-specific variants of virtio PCI devices"): + + $ qemu-system-x86_64 -machine pc-i440fx-2.6 \ + -device virtio-net-pci-non-transitional + Unexpected error in object_property_find() at qom/object.c:1092: + qemu-system-x86_64: -device virtio-net-pci-non-transitional: can't apply \ + global virtio-pci.disable-modern=on: Property '.disable-modern' not found + Aborted (core dumped) + +Backport notes: + HW_COMPAT_RHEL7_2 (include/hw/compat.h) being patched instead + of hw_compat_2_6 (hw/core/machine.c). Our downstream tree + doesn't have the equivalent of commit ff8f261f113b ("compat: + replace PC_COMPAT_2_6 & HW_COMPAT_2_6 macros") + +Reported-by: Thomas Huth +Fixes: f6e501a28ef9 ("virtio: Provide version-specific variants of virtio PCI devices") +Signed-off-by: Eduardo Habkost +Reviewed-by: Cornelia Huck +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1b0ba10643110f9590c0da0167d0cb1db48ca2e4) +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/compat.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/include/hw/compat.h b/include/hw/compat.h +index 40db243..aeb2f73 100644 +--- a/include/hw/compat.h ++++ b/include/hw/compat.h +@@ -380,10 +380,14 @@ + .driver = "virtio-pci",\ + .property = "disable-modern",\ + .value = "on",\ ++ /* Optional because not all virtio-pci devices support legacy mode */ \ ++ .optional = true,\ + },{ /* HW_COMPAT_RHEL7_2 */ \ + .driver = "virtio-pci",\ + .property = "disable-legacy",\ + .value = "off",\ ++ /* Optional because not all virtio-pci devices support legacy mode */ \ ++ .optional = true,\ + },{ /* HW_COMPAT_RHEL7_2 */ \ + .driver = TYPE_PCI_DEVICE,\ + .property = "x-pcie-lnksta-dllla",\ +-- +1.8.3.1 + diff --git a/kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch b/kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch new file mode 100644 index 0000000..c093144 --- /dev/null +++ b/kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch @@ -0,0 +1,519 @@ +From 14004eb28d1e573365ed21ed09afad85d239c769 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 16 Jan 2019 23:18:17 +0000 +Subject: [PATCH 2/7] virtio: Provide version-specific variants of virtio PCI + devices + +RH-Author: Eduardo Habkost +Message-id: <20190116231819.29310-3-ehabkost@redhat.com> +Patchwork-id: 84028 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 2/4] virtio: Provide version-specific variants of virtio PCI devices +Bugzilla: 1648023 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier +RH-Acked-by: Michael S. Tsirkin + +Many of the current virtio-*-pci device types actually represent +3 different types of devices: +* virtio 1.0 non-transitional devices +* virtio 1.0 transitional devices +* virtio 0.9 ("legacy device" in virtio 1.0 terminology) + +That would be just an annoyance if it didn't break our device/bus +compatibility QMP interfaces. With these multi-purpose device +types, there's no way to tell management software that +transitional devices and legacy devices require a Conventional +PCI bus. + +The multi-purpose device types would also prevent us from telling +management software what's the PCI vendor/device ID for them, +because their PCI IDs change at runtime depending on the bus +where they were plugged. + +This patch adds separate device types for each of those virtio +device flavors: + +- virtio-*-pci: the existing multi-purpose device types + - Configurable using `disable-legacy` and `disable-modern` + properties + - Legacy driver support is automatically enabled/disabled + depending on the bus where it is plugged + - Supports Conventional PCI and PCI Express buses + (but Conventional PCI is incompatible with + disable-legacy=off) + - Changes PCI vendor/device IDs at runtime +- virtio-*-pci-transitional: virtio-1.0 device supporting legacy drivers + - Supports Conventional PCI buses only, because + it has a PIO BAR +- virtio-*-pci-non-transitional: modern-only + - Supports both Conventional PCI and PCI Express buses + +The existing TYPE_* macros for these types will point to an +abstract base type, so existing casts in the code will keep +working for all variants. + +A simple test script (tests/acceptance/virtio_version.py) is +included, to check if the new device types are equivalent to +using the `disable-legacy` and `disable-modern` options. + +Acked-by: Andrea Bolognani +Reviewed-by: Cornelia Huck +Signed-off-by: Eduardo Habkost +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f6e501a28ef9b69f6df6252160aa87876cc92a1a) +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio-pci.c | 60 ++++++++++--- + hw/virtio/virtio-pci.h | 24 ++--- + tests/acceptance/virtio_version.py | 176 +++++++++++++++++++++++++++++++++++++ + 3 files changed, 236 insertions(+), 24 deletions(-) + create mode 100644 tests/acceptance/virtio_version.py + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index bee3509..68cffcd 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1120,7 +1120,10 @@ static void virtio_9p_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = { +- .generic_name = TYPE_VIRTIO_9P_PCI, ++ .base_name = TYPE_VIRTIO_9P_PCI, ++ .generic_name = "virtio-9p-pci", ++ .transitional_name = "virtio-9p-pci-transitional", ++ .non_transitional_name = "virtio-9p-pci-non-transitional", + .instance_size = sizeof(V9fsPCIState), + .instance_init = virtio_9p_pci_instance_init, + .class_init = virtio_9p_pci_class_init, +@@ -2102,7 +2105,10 @@ static void virtio_blk_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = { +- .generic_name = TYPE_VIRTIO_BLK_PCI, ++ .base_name = TYPE_VIRTIO_BLK_PCI, ++ .generic_name = "virtio-blk-pci", ++ .transitional_name = "virtio-blk-pci-transitional", ++ .non_transitional_name = "virtio-blk-pci-non-transitional", + .instance_size = sizeof(VirtIOBlkPCI), + .instance_init = virtio_blk_pci_instance_init, + .class_init = virtio_blk_pci_class_init, +@@ -2157,7 +2163,10 @@ static void vhost_user_blk_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = { +- .generic_name = TYPE_VHOST_USER_BLK_PCI, ++ .base_name = TYPE_VHOST_USER_BLK_PCI, ++ .generic_name = "vhost-user-blk-pci", ++ .transitional_name = "vhost-user-blk-pci-transitional", ++ .non_transitional_name = "vhost-user-blk-pci-non-transitional", + .instance_size = sizeof(VHostUserBlkPCI), + .instance_init = vhost_user_blk_pci_instance_init, + .class_init = vhost_user_blk_pci_class_init, +@@ -2224,7 +2233,10 @@ static void virtio_scsi_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = { +- .generic_name = TYPE_VIRTIO_SCSI_PCI, ++ .base_name = TYPE_VIRTIO_SCSI_PCI, ++ .generic_name = "virtio-scsi-pci", ++ .transitional_name = "virtio-scsi-pci-transitional", ++ .non_transitional_name = "virtio-scsi-pci-non-transitional", + .instance_size = sizeof(VirtIOSCSIPCI), + .instance_init = virtio_scsi_pci_instance_init, + .class_init = virtio_scsi_pci_class_init, +@@ -2278,7 +2290,10 @@ static void vhost_scsi_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = { +- .generic_name = TYPE_VHOST_SCSI_PCI, ++ .base_name = TYPE_VHOST_SCSI_PCI, ++ .generic_name = "vhost-scsi-pci", ++ .transitional_name = "vhost-scsi-pci-transitional", ++ .non_transitional_name = "vhost-scsi-pci-non-transitional", + .instance_size = sizeof(VHostSCSIPCI), + .instance_init = vhost_scsi_pci_instance_init, + .class_init = vhost_scsi_pci_class_init, +@@ -2332,7 +2347,10 @@ static void vhost_user_scsi_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = { +- .generic_name = TYPE_VHOST_USER_SCSI_PCI, ++ .base_name = TYPE_VHOST_USER_SCSI_PCI, ++ .generic_name = "vhost-user-scsi-pci", ++ .transitional_name = "vhost-user-scsi-pci-transitional", ++ .non_transitional_name = "vhost-user-scsi-pci-non-transitional", + .instance_size = sizeof(VHostUserSCSIPCI), + .instance_init = vhost_user_scsi_pci_instance_init, + .class_init = vhost_user_scsi_pci_class_init, +@@ -2379,7 +2397,10 @@ static void vhost_vsock_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { +- .generic_name = TYPE_VHOST_VSOCK_PCI, ++ .base_name = TYPE_VHOST_VSOCK_PCI, ++ .generic_name = "vhost-vsock-pci", ++ .transitional_name = "vhost-vsock-pci-transitional", ++ .non_transitional_name = "vhost-vsock-pci-non-transitional", + .instance_size = sizeof(VHostVSockPCI), + .instance_init = vhost_vsock_pci_instance_init, + .class_init = vhost_vsock_pci_class_init, +@@ -2435,7 +2456,10 @@ static void virtio_balloon_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = { +- .generic_name = TYPE_VIRTIO_BALLOON_PCI, ++ .base_name = TYPE_VIRTIO_BALLOON_PCI, ++ .generic_name = "virtio-balloon-pci", ++ .transitional_name = "virtio-balloon-pci-transitional", ++ .non_transitional_name = "virtio-balloon-pci-non-transitional", + .instance_size = sizeof(VirtIOBalloonPCI), + .instance_init = virtio_balloon_pci_instance_init, + .class_init = virtio_balloon_pci_class_init, +@@ -2507,7 +2531,10 @@ static void virtio_serial_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = { +- .generic_name = TYPE_VIRTIO_SERIAL_PCI, ++ .base_name = TYPE_VIRTIO_SERIAL_PCI, ++ .generic_name = "virtio-serial-pci", ++ .transitional_name = "virtio-serial-pci-transitional", ++ .non_transitional_name = "virtio-serial-pci-non-transitional", + .instance_size = sizeof(VirtIOSerialPCI), + .instance_init = virtio_serial_pci_instance_init, + .class_init = virtio_serial_pci_class_init, +@@ -2561,7 +2588,10 @@ static void virtio_net_pci_instance_init(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = { +- .generic_name = TYPE_VIRTIO_NET_PCI, ++ .base_name = TYPE_VIRTIO_NET_PCI, ++ .generic_name = "virtio-net-pci", ++ .transitional_name = "virtio-net-pci-transitional", ++ .non_transitional_name = "virtio-net-pci-non-transitional", + .instance_size = sizeof(VirtIONetPCI), + .instance_init = virtio_net_pci_instance_init, + .class_init = virtio_net_pci_class_init, +@@ -2611,7 +2641,10 @@ static void virtio_rng_initfn(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = { +- .generic_name = TYPE_VIRTIO_RNG_PCI, ++ .base_name = TYPE_VIRTIO_RNG_PCI, ++ .generic_name = "virtio-rng-pci", ++ .transitional_name = "virtio-rng-pci-transitional", ++ .non_transitional_name = "virtio-rng-pci-non-transitional", + .instance_size = sizeof(VirtIORngPCI), + .instance_init = virtio_rng_initfn, + .class_init = virtio_rng_pci_class_init, +@@ -2734,7 +2767,10 @@ static void virtio_host_initfn(Object *obj) + } + + static const VirtioPCIDeviceTypeInfo virtio_host_pci_info = { +- .generic_name = TYPE_VIRTIO_INPUT_HOST_PCI, ++ .base_name = TYPE_VIRTIO_INPUT_HOST_PCI, ++ .generic_name = "virtio-input-host-pci", ++ .transitional_name = "virtio-input-host-pci-transitional", ++ .non_transitional_name = "virtio-input-host-pci-non-transitional", + .parent = TYPE_VIRTIO_INPUT_PCI, + .instance_size = sizeof(VirtIOInputHostPCI), + .instance_init = virtio_host_initfn, +diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h +index 8cd5466..29b4216 100644 +--- a/hw/virtio/virtio-pci.h ++++ b/hw/virtio/virtio-pci.h +@@ -216,7 +216,7 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) + /* + * virtio-scsi-pci: This extends VirtioPCIProxy. + */ +-#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci" ++#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci-base" + #define VIRTIO_SCSI_PCI(obj) \ + OBJECT_CHECK(VirtIOSCSIPCI, (obj), TYPE_VIRTIO_SCSI_PCI) + +@@ -229,7 +229,7 @@ struct VirtIOSCSIPCI { + /* + * vhost-scsi-pci: This extends VirtioPCIProxy. + */ +-#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci" ++#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci-base" + #define VHOST_SCSI_PCI(obj) \ + OBJECT_CHECK(VHostSCSIPCI, (obj), TYPE_VHOST_SCSI_PCI) + +@@ -239,7 +239,7 @@ struct VHostSCSIPCI { + }; + #endif + +-#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci" ++#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci-base" + #define VHOST_USER_SCSI_PCI(obj) \ + OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI) + +@@ -252,7 +252,7 @@ struct VHostUserSCSIPCI { + /* + * vhost-user-blk-pci: This extends VirtioPCIProxy. + */ +-#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci" ++#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci-base" + #define VHOST_USER_BLK_PCI(obj) \ + OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI) + +@@ -265,7 +265,7 @@ struct VHostUserBlkPCI { + /* + * virtio-blk-pci: This extends VirtioPCIProxy. + */ +-#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci" ++#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci-base" + #define VIRTIO_BLK_PCI(obj) \ + OBJECT_CHECK(VirtIOBlkPCI, (obj), TYPE_VIRTIO_BLK_PCI) + +@@ -277,7 +277,7 @@ struct VirtIOBlkPCI { + /* + * virtio-balloon-pci: This extends VirtioPCIProxy. + */ +-#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci" ++#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci-base" + #define VIRTIO_BALLOON_PCI(obj) \ + OBJECT_CHECK(VirtIOBalloonPCI, (obj), TYPE_VIRTIO_BALLOON_PCI) + +@@ -289,7 +289,7 @@ struct VirtIOBalloonPCI { + /* + * virtio-serial-pci: This extends VirtioPCIProxy. + */ +-#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci" ++#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci-base" + #define VIRTIO_SERIAL_PCI(obj) \ + OBJECT_CHECK(VirtIOSerialPCI, (obj), TYPE_VIRTIO_SERIAL_PCI) + +@@ -301,7 +301,7 @@ struct VirtIOSerialPCI { + /* + * virtio-net-pci: This extends VirtioPCIProxy. + */ +-#define TYPE_VIRTIO_NET_PCI "virtio-net-pci" ++#define TYPE_VIRTIO_NET_PCI "virtio-net-pci-base" + #define VIRTIO_NET_PCI(obj) \ + OBJECT_CHECK(VirtIONetPCI, (obj), TYPE_VIRTIO_NET_PCI) + +@@ -316,7 +316,7 @@ struct VirtIONetPCI { + + #ifdef CONFIG_VIRTFS + +-#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci" ++#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci-base" + #define VIRTIO_9P_PCI(obj) \ + OBJECT_CHECK(V9fsPCIState, (obj), TYPE_VIRTIO_9P_PCI) + +@@ -330,7 +330,7 @@ typedef struct V9fsPCIState { + /* + * virtio-rng-pci: This extends VirtioPCIProxy. + */ +-#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci" ++#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci-base" + #define VIRTIO_RNG_PCI(obj) \ + OBJECT_CHECK(VirtIORngPCI, (obj), TYPE_VIRTIO_RNG_PCI) + +@@ -365,7 +365,7 @@ struct VirtIOInputHIDPCI { + + #ifdef CONFIG_LINUX + +-#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci" ++#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci-base" + #define VIRTIO_INPUT_HOST_PCI(obj) \ + OBJECT_CHECK(VirtIOInputHostPCI, (obj), TYPE_VIRTIO_INPUT_HOST_PCI) + +@@ -392,7 +392,7 @@ struct VirtIOGPUPCI { + /* + * vhost-vsock-pci: This extends VirtioPCIProxy. + */ +-#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci" ++#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci-base" + #define VHOST_VSOCK_PCI(obj) \ + OBJECT_CHECK(VHostVSockPCI, (obj), TYPE_VHOST_VSOCK_PCI) + +diff --git a/tests/acceptance/virtio_version.py b/tests/acceptance/virtio_version.py +new file mode 100644 +index 0000000..ce99025 +--- /dev/null ++++ b/tests/acceptance/virtio_version.py +@@ -0,0 +1,176 @@ ++""" ++Check compatibility of virtio device types ++""" ++# Copyright (c) 2018 Red Hat, Inc. ++# ++# Author: ++# Eduardo Habkost ++# ++# This work is licensed under the terms of the GNU GPL, version 2 or ++# later. See the COPYING file in the top-level directory. ++import sys ++import os ++ ++sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "scripts")) ++from qemu import QEMUMachine ++from avocado_qemu import Test ++ ++# Virtio Device IDs: ++VIRTIO_NET = 1 ++VIRTIO_BLOCK = 2 ++VIRTIO_CONSOLE = 3 ++VIRTIO_RNG = 4 ++VIRTIO_BALLOON = 5 ++VIRTIO_RPMSG = 7 ++VIRTIO_SCSI = 8 ++VIRTIO_9P = 9 ++VIRTIO_RPROC_SERIAL = 11 ++VIRTIO_CAIF = 12 ++VIRTIO_GPU = 16 ++VIRTIO_INPUT = 18 ++VIRTIO_VSOCK = 19 ++VIRTIO_CRYPTO = 20 ++ ++PCI_VENDOR_ID_REDHAT_QUMRANET = 0x1af4 ++ ++# Device IDs for legacy/transitional devices: ++PCI_LEGACY_DEVICE_IDS = { ++ VIRTIO_NET: 0x1000, ++ VIRTIO_BLOCK: 0x1001, ++ VIRTIO_BALLOON: 0x1002, ++ VIRTIO_CONSOLE: 0x1003, ++ VIRTIO_SCSI: 0x1004, ++ VIRTIO_RNG: 0x1005, ++ VIRTIO_9P: 0x1009, ++ VIRTIO_VSOCK: 0x1012, ++} ++ ++def pci_modern_device_id(virtio_devid): ++ return virtio_devid + 0x1040 ++ ++def devtype_implements(vm, devtype, implements): ++ return devtype in [d['name'] for d in vm.command('qom-list-types', implements=implements)] ++ ++def get_pci_interfaces(vm, devtype): ++ interfaces = ('pci-express-device', 'conventional-pci-device') ++ return [i for i in interfaces if devtype_implements(vm, devtype, i)] ++ ++class VirtioVersionCheck(Test): ++ """ ++ Check if virtio-version-specific device types result in the ++ same device tree created by `disable-modern` and ++ `disable-legacy`. ++ ++ :avocado: enable ++ :avocado: tags=x86_64 ++ """ ++ ++ # just in case there are failures, show larger diff: ++ maxDiff = 4096 ++ ++ def run_device(self, devtype, opts=None, machine='pc'): ++ """ ++ Run QEMU with `-device DEVTYPE`, return device info from `query-pci` ++ """ ++ with QEMUMachine(self.qemu_bin) as vm: ++ vm.set_machine(machine) ++ if opts: ++ devtype += ',' + opts ++ vm.add_args('-device', '%s,id=devfortest' % (devtype)) ++ vm.add_args('-S') ++ vm.launch() ++ ++ pcibuses = vm.command('query-pci') ++ alldevs = [dev for bus in pcibuses for dev in bus['devices']] ++ devfortest = [dev for dev in alldevs ++ if dev['qdev_id'] == 'devfortest'] ++ return devfortest[0], get_pci_interfaces(vm, devtype) ++ ++ ++ def assert_devids(self, dev, devid, non_transitional=False): ++ self.assertEqual(dev['id']['vendor'], PCI_VENDOR_ID_REDHAT_QUMRANET) ++ self.assertEqual(dev['id']['device'], devid) ++ if non_transitional: ++ self.assertTrue(0x1040 <= dev['id']['device'] <= 0x107f) ++ self.assertGreaterEqual(dev['id']['subsystem'], 0x40) ++ ++ def check_all_variants(self, qemu_devtype, virtio_devid): ++ """Check if a virtio device type and its variants behave as expected""" ++ # Force modern mode: ++ dev_modern, _ = self.run_device(qemu_devtype, ++ 'disable-modern=off,disable-legacy=on') ++ self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid), ++ non_transitional=True) ++ ++ # -non-transitional device types should be 100% equivalent to ++ # ,disable-modern=off,disable-legacy=on ++ dev_1_0, nt_ifaces = self.run_device('%s-non-transitional' % (qemu_devtype)) ++ self.assertEqual(dev_modern, dev_1_0) ++ ++ # Force transitional mode: ++ dev_trans, _ = self.run_device(qemu_devtype, ++ 'disable-modern=off,disable-legacy=off') ++ self.assert_devids(dev_trans, PCI_LEGACY_DEVICE_IDS[virtio_devid]) ++ ++ # Force legacy mode: ++ dev_legacy, _ = self.run_device(qemu_devtype, ++ 'disable-modern=on,disable-legacy=off') ++ self.assert_devids(dev_legacy, PCI_LEGACY_DEVICE_IDS[virtio_devid]) ++ ++ # No options: default to transitional on PC machine-type: ++ no_opts_pc, generic_ifaces = self.run_device(qemu_devtype) ++ self.assertEqual(dev_trans, no_opts_pc) ++ ++ #TODO: check if plugging on a PCI Express bus will make the ++ # device non-transitional ++ #no_opts_q35 = self.run_device(qemu_devtype, machine='q35') ++ #self.assertEqual(dev_modern, no_opts_q35) ++ ++ # -transitional device types should be 100% equivalent to ++ # ,disable-modern=off,disable-legacy=off ++ dev_trans, trans_ifaces = self.run_device('%s-transitional' % (qemu_devtype)) ++ self.assertEqual(dev_trans, dev_trans) ++ ++ # ensure the interface information is correct: ++ self.assertIn('conventional-pci-device', generic_ifaces) ++ self.assertIn('pci-express-device', generic_ifaces) ++ ++ self.assertIn('conventional-pci-device', nt_ifaces) ++ self.assertIn('pci-express-device', nt_ifaces) ++ ++ self.assertIn('conventional-pci-device', trans_ifaces) ++ self.assertNotIn('pci-express-device', trans_ifaces) ++ ++ ++ def test_conventional_devs(self): ++ self.check_all_variants('virtio-net-pci', VIRTIO_NET) ++ # virtio-blk requires 'driver' parameter ++ #self.check_all_variants('virtio-blk-pci', VIRTIO_BLOCK) ++ self.check_all_variants('virtio-serial-pci', VIRTIO_CONSOLE) ++ self.check_all_variants('virtio-rng-pci', VIRTIO_RNG) ++ self.check_all_variants('virtio-balloon-pci', VIRTIO_BALLOON) ++ self.check_all_variants('virtio-scsi-pci', VIRTIO_SCSI) ++ # virtio-9p requires 'fsdev' parameter ++ #self.check_all_variants('virtio-9p-pci', VIRTIO_9P) ++ ++ def check_modern_only(self, qemu_devtype, virtio_devid): ++ """Check if a modern-only virtio device type behaves as expected""" ++ # Force modern mode: ++ dev_modern, _ = self.run_device(qemu_devtype, ++ 'disable-modern=off,disable-legacy=on') ++ self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid), ++ non_transitional=True) ++ ++ # No options: should be modern anyway ++ dev_no_opts, ifaces = self.run_device(qemu_devtype) ++ self.assertEqual(dev_modern, dev_no_opts) ++ ++ self.assertIn('conventional-pci-device', ifaces) ++ self.assertIn('pci-express-device', ifaces) ++ ++ def test_modern_only_devs(self): ++ self.check_modern_only('virtio-vga', VIRTIO_GPU) ++ self.check_modern_only('virtio-gpu-pci', VIRTIO_GPU) ++ self.check_modern_only('virtio-mouse-pci', VIRTIO_INPUT) ++ self.check_modern_only('virtio-tablet-pci', VIRTIO_INPUT) ++ self.check_modern_only('virtio-keyboard-pci', VIRTIO_INPUT) +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index cdc387b..f285fc6 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 4%{?dist} +Release: 5%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -140,6 +140,20 @@ Patch27: kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch Patch28: kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch # For bz#1661967 - Kernel prints the message "VPHN is not supported. Disabling polling..." Patch29: kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch +# For bz#1648023 - Provide separate device types for transitional virtio PCI devices - Fast Train +Patch30: kvm-virtio-Helper-for-registering-virtio-device-types.patch +# For bz#1648023 - Provide separate device types for transitional virtio PCI devices - Fast Train +Patch31: kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch +# For bz#1648023 - Provide separate device types for transitional virtio PCI devices - Fast Train +Patch32: kvm-globals-Allow-global-properties-to-be-optional.patch +# For bz#1648023 - Provide separate device types for transitional virtio PCI devices - Fast Train +Patch33: kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch +# For bz#1656504 - Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64) +Patch34: kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch +# For bz#1656504 - Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64) +Patch35: kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch +# For bz#1656504 - Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64) +Patch36: kvm-aarch64-Use-256MB-ECAM-region-by-default.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -987,6 +1001,19 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 3.1.0-5.el8 +- kvm-virtio-Helper-for-registering-virtio-device-types.patch [bz#1648023] +- kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch [bz#1648023] +- kvm-globals-Allow-global-properties-to-be-optional.patch [bz#1648023] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1648023] +- kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch [bz#1656504] +- kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch [bz#1656504] +- kvm-aarch64-Use-256MB-ECAM-region-by-default.patch [bz#1656504] +- Resolves: bz#1648023 + (Provide separate device types for transitional virtio PCI devices - Fast Train) +- Resolves: bz#1656504 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64)) + * Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-4.el8 - kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch [bz#1656510] - kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch [bz#1661967] From a9595f021e726b216626bb21274c11c62c4d638d Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 23 Jan 2019 17:22:30 +0000 Subject: [PATCH 011/195] * Wed Jan 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-6.el8 - kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1653114] - kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1668205] - Resolves: bz#1653114 (Incorrect NUMA nodes passed to qemu-kvm guest in ibm,max-associativity-domains property) - Resolves: bz#1668205 (Guest quit with error when hotunplug cpu) --- ...ignore-ESRCH-in-qemu_cpu_kick_thread.patch | 62 ++++++++ ...x-associativity-domains-property-num.patch | 149 ++++++++++++++++++ qemu-kvm.spec | 14 +- 3 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch create mode 100644 kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch diff --git a/kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch b/kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch new file mode 100644 index 0000000..fe79e80 --- /dev/null +++ b/kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch @@ -0,0 +1,62 @@ +From 5f57f764ebb7451c71ffa04130ad2f2e4cb531e8 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 23 Jan 2019 09:15:22 +0000 +Subject: [PATCH 2/2] cpus: ignore ESRCH in qemu_cpu_kick_thread() + +RH-Author: Laurent Vivier +Message-id: <20190123091522.17581-1-lvivier@redhat.com> +Patchwork-id: 84092 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH v2] cpus: ignore ESRCH in qemu_cpu_kick_thread() +Bugzilla: 1668205 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Thomas Huth +RH-Acked-by: Serhii Popovych +RH-Acked-by: Laszlo Ersek + +We can have a race condition between qemu_cpu_kick_thread() and +qemu_kvm_cpu_thread_fn() when we hotunplug a CPU. In this case, +qemu_cpu_kick_thread() can try to kick a thread that is exiting. +pthread_kill() returns an error and qemu is stopped by an exit(1). + + qemu:qemu_cpu_kick_thread: No such process + +We can ignore safely this error. + +Signed-off-by: Laurent Vivier +Signed-off-by: Paolo Bonzini +(cherry picked from commit e9979ef245549b8e1fd240ec9937271c7fda0b57) +Signed-off-by: Laurent Vivier + +BRANCH: rhel8/master-3.1.0 +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1668205 +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=19905615 +UPSTREAM: In maintainer pull request + git://github.com/bonzini/qemu.git tags/for-upstream +TEST: Upstream version tested by QE +--- +v2: add BRANCH: tag to make happy virt-ci-maint-team + + cpus.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +Signed-off-by: Danilo C. L. de Paula +--- + cpus.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cpus.c b/cpus.c +index 0ddeeef..4717490 100644 +--- a/cpus.c ++++ b/cpus.c +@@ -1778,7 +1778,7 @@ static void qemu_cpu_kick_thread(CPUState *cpu) + } + cpu->thread_kicked = true; + err = pthread_kill(cpu->thread->thread, SIG_IPI); +- if (err) { ++ if (err && err != ESRCH) { + fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); + exit(1); + } +-- +1.8.3.1 + diff --git a/kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch b/kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch new file mode 100644 index 0000000..bf01cdf --- /dev/null +++ b/kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch @@ -0,0 +1,149 @@ +From f39913b42600b838c415f6fb561be940bea265dd Mon Sep 17 00:00:00 2001 +From: Serhii Popovych +Date: Wed, 9 Jan 2019 13:31:49 +0000 +Subject: [PATCH 1/2] spapr: Fix ibm, max-associativity-domains property number + of nodes + +RH-Author: Serhii Popovych +Message-id: <1547040709-797-1-git-send-email-spopovyc@redhat.com> +Patchwork-id: 83920 +O-Subject: [RHEL-8.0 qemu-kvm PATCH v2] spapr: Fix ibm, max-associativity-domains property number of nodes +Bugzilla: 1653114 +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1653114 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=19727263 +Branch: rhel8/master-3.1.0 +Upstream: Merged +Testing: Build and boot tested on rhel-7.6 with steps described in + comment 0. Issue no longer reproducible. + +Laurent Vivier reported off by one with maximum number of NUMA nodes +provided by qemu-kvm being less by one than required according to +description of "ibm,max-associativity-domains" property in LoPAPR. + +It appears that I incorrectly treated LoPAPR description of this +property assuming it provides last valid domain (NUMA node here) +instead of maximum number of domains. + + ### Before hot-add + + (qemu) info numa + 3 nodes + node 0 cpus: 0 + node 0 size: 0 MB + node 0 plugged: 0 MB + node 1 cpus: + node 1 size: 1024 MB + node 1 plugged: 0 MB + node 2 cpus: + node 2 size: 0 MB + node 2 plugged: 0 MB + + $ numactl -H + available: 2 nodes (0-1) + node 0 cpus: 0 + node 0 size: 0 MB + node 0 free: 0 MB + node 1 cpus: + node 1 size: 999 MB + node 1 free: 658 MB + node distances: + node 0 1 + 0: 10 40 + 1: 40 10 + + ### Hot-add + + (qemu) object_add memory-backend-ram,id=mem0,size=1G + (qemu) device_add pc-dimm,id=dimm1,memdev=mem0,node=2 + (qemu) [ 87.704898] pseries-hotplug-mem: Attempting to hot-add 4 ... + + [ 87.705128] lpar: Attempting to resize HPT to shift 21 + ... + + ### After hot-add + + (qemu) info numa + 3 nodes + node 0 cpus: 0 + node 0 size: 0 MB + node 0 plugged: 0 MB + node 1 cpus: + node 1 size: 1024 MB + node 1 plugged: 0 MB + node 2 cpus: + node 2 size: 1024 MB + node 2 plugged: 1024 MB + + $ numactl -H + available: 2 nodes (0-1) + ^^^^^^^^^^^^^^^^^^^^^^^^ + Still only two nodes (and memory hot-added to node 0 below) + node 0 cpus: 0 + node 0 size: 1024 MB + node 0 free: 1021 MB + node 1 cpus: + node 1 size: 999 MB + node 1 free: 658 MB + node distances: + node 0 1 + 0: 10 40 + 1: 40 10 + +After fix applied numactl(8) reports 3 nodes available and memory +plugged into node 2 as expected. + +>From David Gibson: +------------------ + Qemu makes a distinction between "non NUMA" (nb_numa_nodes == 0) and + "NUMA with one node" (nb_numa_nodes == 1). But from a PAPR guests's + point of view these are equivalent. I don't want to present two + different cases to the guest when we don't need to, so even though the + guest can handle it, I'd prefer we put a '1' here for both the + nb_numa_nodes == 0 and nb_numa_nodes == 1 case. + +This consolidates everything discussed previously on mailing list. + +Fixes: da9f80fbad21 ("spapr: Add ibm,max-associativity-domains property") +Reported-by: Laurent Vivier +Signed-off-by: Serhii Popovych + +Signed-off-by: David Gibson +Reviewed-by: Greg Kurz +Reviewed-by: Laurent Vivier +(cherry picked from commit 3908a24fcb83913079d315de0ca6d598e8616dbb) +Signed-off-by: Serhii Popovych +--- +v2: + Rebased against rhel8/qemu-kvm-3.1.0 for RHEL Advanced Virtualization + product. + + Added "Brach:" tag to commint message as suggested by Laurent Vivier. + + hw/ppc/spapr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index d5d2eb4..bd2abb7 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1033,7 +1033,7 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) + cpu_to_be32(0), + cpu_to_be32(0), + cpu_to_be32(0), +- cpu_to_be32(nb_numa_nodes ? nb_numa_nodes - 1 : 0), ++ cpu_to_be32(nb_numa_nodes ? nb_numa_nodes : 1), + }; + + _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas")); +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index f285fc6..5b8b7f0 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 5%{?dist} +Release: 6%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -154,6 +154,10 @@ Patch34: kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch Patch35: kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch # For bz#1656504 - Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64) Patch36: kvm-aarch64-Use-256MB-ECAM-region-by-default.patch +# For bz#1653114 - Incorrect NUMA nodes passed to qemu-kvm guest in ibm,max-associativity-domains property +Patch37: kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch +# For bz#1668205 - Guest quit with error when hotunplug cpu +Patch38: kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1001,6 +1005,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Wed Jan 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-6.el8 +- kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1653114] +- kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1668205] +- Resolves: bz#1653114 + (Incorrect NUMA nodes passed to qemu-kvm guest in ibm,max-associativity-domains property) +- Resolves: bz#1668205 + (Guest quit with error when hotunplug cpu) + * Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 3.1.0-5.el8 - kvm-virtio-Helper-for-registering-virtio-device-types.patch [bz#1648023] - kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch [bz#1648023] From aa76ca88ee4bf7fd3fb0c96c38bd2becee609bab Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 24 Jan 2019 18:00:01 +0000 Subject: [PATCH 012/195] * Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 3.1.0-7.el8 - kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch [bz#1653511] - kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch [bz#1653511] - Resolves: bz#1653511 (qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush) --- ...omment-explaining-why-.feat_names-ar.patch | 48 ++++ ...HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch | 211 ++++++++++++++++++ qemu-kvm.spec | 12 +- 3 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch create mode 100644 kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch diff --git a/kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch b/kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch new file mode 100644 index 0000000..38b6d88 --- /dev/null +++ b/kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch @@ -0,0 +1,48 @@ +From 61470c276a7785f3615da564f15a5c2368354638 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Fri, 18 Jan 2019 11:57:05 +0000 +Subject: [PATCH 2/2] i386/kvm: add a comment explaining why .feat_names are + commented out for Hyper-V feature bits + +RH-Author: Vitaly Kuznetsov +Message-id: <20190118115705.19731-3-vkuznets@redhat.com> +Patchwork-id: 84048 +O-Subject: [RHEL8 qemu-kvm PATCH 2/2] i386/kvm: add a comment explaining why .feat_names are commented out for Hyper-V feature bits +Bugzilla: 1653511 +RH-Acked-by: Mohammed Gamal +RH-Acked-by: Eduardo Otubo +RH-Acked-by: Eduardo Habkost + +Hyper-V .feat_names are, unlike hardware features, commented out and it is +not obvious why we do that. Document the current status quo. + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20181221141604.16935-1-vkuznets@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit abd5fc4c862d033a989552914149f01c9476bb16) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 460fe06..8570b25 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -929,6 +929,13 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + */ + .no_autoenable_flags = ~0U, + }, ++ /* ++ * .feat_names are commented out for Hyper-V enlightenments because we ++ * don't want to have two different ways for enabling them on QEMU command ++ * line. Some features (e.g. "hyperv_time", "hyperv_vapic", ...) require ++ * enabling several feature bits simultaneously, exposing these bits ++ * individually may just confuse guests. ++ */ + [FEAT_HYPERV_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { +-- +1.8.3.1 + diff --git a/kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch b/kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch new file mode 100644 index 0000000..b39a55f --- /dev/null +++ b/kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch @@ -0,0 +1,211 @@ +From 4d58784a4a507fa1070b330846d941f91bb9abdc Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Fri, 18 Jan 2019 11:57:04 +0000 +Subject: [PATCH 1/2] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and + HV_CPUID_NESTED_FEATURES.EAX as feature words + +RH-Author: Vitaly Kuznetsov +Message-id: <20190118115705.19731-2-vkuznets@redhat.com> +Patchwork-id: 84046 +O-Subject: [RHEL8 qemu-kvm PATCH 1/2] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words +Bugzilla: 1653511 +RH-Acked-by: Mohammed Gamal +RH-Acked-by: Eduardo Otubo +RH-Acked-by: Eduardo Habkost + +It was found that QMP users of QEMU (e.g. libvirt) may need +HV_CPUID_ENLIGHTMENT_INFO.EAX/HV_CPUID_NESTED_FEATURES.EAX information. In +particular, 'hv_tlbflush' and 'hv_evmcs' enlightenments are only exposed in +HV_CPUID_ENLIGHTMENT_INFO.EAX. + +HV_CPUID_NESTED_FEATURES.EAX is exposed for two reasons: convenience +(we don't need to export it from hyperv_handle_properties() and as +future-proof for Enlightened MSR-Bitmap, PV EPT invalidation and +direct virtual flush features. + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20181126135958.20956-1-vkuznets@redhat.com> +Reviewed-by: Roman Kagan +Signed-off-by: Eduardo Habkost +(cherry picked from commit a2b107dbbd342ff2077aa5af705efaf68c375459) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 30 ++++++++++++++++++++ + target/i386/cpu.h | 2 ++ + target/i386/kvm.c | 85 +++++++++++++++++++++++++++++-------------------------- + 3 files changed, 77 insertions(+), 40 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 573de14..460fe06 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -980,6 +980,36 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + }, + .cpuid = { .eax = 0x40000003, .reg = R_EDX, }, + }, ++ [FEAT_HV_RECOMM_EAX] = { ++ .type = CPUID_FEATURE_WORD, ++ .feat_names = { ++ NULL /* hv_recommend_pv_as_switch */, ++ NULL /* hv_recommend_pv_tlbflush_local */, ++ NULL /* hv_recommend_pv_tlbflush_remote */, ++ NULL /* hv_recommend_msr_apic_access */, ++ NULL /* hv_recommend_msr_reset */, ++ NULL /* hv_recommend_relaxed_timing */, ++ NULL /* hv_recommend_dma_remapping */, ++ NULL /* hv_recommend_int_remapping */, ++ NULL /* hv_recommend_x2apic_msrs */, ++ NULL /* hv_recommend_autoeoi_deprecation */, ++ NULL /* hv_recommend_pv_ipi */, ++ NULL /* hv_recommend_ex_hypercalls */, ++ NULL /* hv_hypervisor_is_nested */, ++ NULL /* hv_recommend_int_mbec */, ++ NULL /* hv_recommend_evmcs */, ++ NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .cpuid = { .eax = 0x40000004, .reg = R_EAX, }, ++ }, ++ [FEAT_HV_NESTED_EAX] = { ++ .type = CPUID_FEATURE_WORD, ++ .cpuid = { .eax = 0x4000000A, .reg = R_EAX, }, ++ }, + [FEAT_SVM] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 9c52d0c..dd88151 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -497,6 +497,8 @@ typedef enum FeatureWord { + FEAT_HYPERV_EAX, /* CPUID[4000_0003].EAX */ + FEAT_HYPERV_EBX, /* CPUID[4000_0003].EBX */ + FEAT_HYPERV_EDX, /* CPUID[4000_0003].EDX */ ++ FEAT_HV_RECOMM_EAX, /* CPUID[4000_0004].EAX */ ++ FEAT_HV_NESTED_EAX, /* CPUID[4000_000A].EAX */ + FEAT_SVM, /* CPUID[8000_000A].EDX */ + FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ + FEAT_6_EAX, /* CPUID[6].EAX */ +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 5b0ce82..0c9a5e4 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -798,6 +798,48 @@ static int hyperv_handle_properties(CPUState *cs) + } + env->features[FEAT_HYPERV_EAX] |= HV_SYNTIMERS_AVAILABLE; + } ++ if (cpu->hyperv_relaxed_timing) { ++ env->features[FEAT_HV_RECOMM_EAX] |= HV_RELAXED_TIMING_RECOMMENDED; ++ } ++ if (cpu->hyperv_vapic) { ++ env->features[FEAT_HV_RECOMM_EAX] |= HV_APIC_ACCESS_RECOMMENDED; ++ } ++ if (cpu->hyperv_tlbflush) { ++ if (kvm_check_extension(cs->kvm_state, ++ KVM_CAP_HYPERV_TLBFLUSH) <= 0) { ++ fprintf(stderr, "Hyper-V TLB flush support " ++ "(requested by 'hv-tlbflush' cpu flag) " ++ " is not supported by kernel\n"); ++ return -ENOSYS; ++ } ++ env->features[FEAT_HV_RECOMM_EAX] |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; ++ env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; ++ } ++ if (cpu->hyperv_ipi) { ++ if (kvm_check_extension(cs->kvm_state, ++ KVM_CAP_HYPERV_SEND_IPI) <= 0) { ++ fprintf(stderr, "Hyper-V IPI send support " ++ "(requested by 'hv-ipi' cpu flag) " ++ " is not supported by kernel\n"); ++ return -ENOSYS; ++ } ++ env->features[FEAT_HV_RECOMM_EAX] |= HV_CLUSTER_IPI_RECOMMENDED; ++ env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; ++ } ++ if (cpu->hyperv_evmcs) { ++ uint16_t evmcs_version; ++ ++ if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, ++ (uintptr_t)&evmcs_version)) { ++ fprintf(stderr, "Hyper-V Enlightened VMCS " ++ "(requested by 'hv-evmcs' cpu flag) " ++ "is not supported by kernel\n"); ++ return -ENOSYS; ++ } ++ env->features[FEAT_HV_RECOMM_EAX] |= HV_ENLIGHTENED_VMCS_RECOMMENDED; ++ env->features[FEAT_HV_NESTED_EAX] = evmcs_version; ++ } ++ + return 0; + } + +@@ -871,7 +913,6 @@ int kvm_arch_init_vcpu(CPUState *cs) + uint32_t unused; + struct kvm_cpuid_entry2 *c; + uint32_t signature[3]; +- uint16_t evmcs_version; + int kvm_base = KVM_CPUID_SIGNATURE; + int r; + Error *local_err = NULL; +@@ -946,44 +987,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + + c = &cpuid_data.entries[cpuid_i++]; + c->function = HV_CPUID_ENLIGHTMENT_INFO; +- if (cpu->hyperv_relaxed_timing) { +- c->eax |= HV_RELAXED_TIMING_RECOMMENDED; +- } +- if (cpu->hyperv_vapic) { +- c->eax |= HV_APIC_ACCESS_RECOMMENDED; +- } +- if (cpu->hyperv_tlbflush) { +- if (kvm_check_extension(cs->kvm_state, +- KVM_CAP_HYPERV_TLBFLUSH) <= 0) { +- fprintf(stderr, "Hyper-V TLB flush support " +- "(requested by 'hv-tlbflush' cpu flag) " +- " is not supported by kernel\n"); +- return -ENOSYS; +- } +- c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; +- c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; +- } +- if (cpu->hyperv_ipi) { +- if (kvm_check_extension(cs->kvm_state, +- KVM_CAP_HYPERV_SEND_IPI) <= 0) { +- fprintf(stderr, "Hyper-V IPI send support " +- "(requested by 'hv-ipi' cpu flag) " +- " is not supported by kernel\n"); +- return -ENOSYS; +- } +- c->eax |= HV_CLUSTER_IPI_RECOMMENDED; +- c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; +- } +- if (cpu->hyperv_evmcs) { +- if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, +- (uintptr_t)&evmcs_version)) { +- fprintf(stderr, "Hyper-V Enlightened VMCS " +- "(requested by 'hv-evmcs' cpu flag) " +- "is not supported by kernel\n"); +- return -ENOSYS; +- } +- c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; +- } ++ ++ c->eax = env->features[FEAT_HV_RECOMM_EAX]; + c->ebx = cpu->hyperv_spinlock_attempts; + + c = &cpuid_data.entries[cpuid_i++]; +@@ -1007,7 +1012,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + + c = &cpuid_data.entries[cpuid_i++]; + c->function = HV_CPUID_NESTED_FEATURES; +- c->eax = evmcs_version; ++ c->eax = env->features[FEAT_HV_NESTED_EAX]; + } + } + +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 5b8b7f0..e722aaa 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 6%{?dist} +Release: 7%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -158,6 +158,10 @@ Patch36: kvm-aarch64-Use-256MB-ECAM-region-by-default.patch Patch37: kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch # For bz#1668205 - Guest quit with error when hotunplug cpu Patch38: kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch +# For bz#1653511 - qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush +Patch39: kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch +# For bz#1653511 - qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush +Patch40: kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1005,6 +1009,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 3.1.0-7.el8 +- kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch [bz#1653511] +- kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch [bz#1653511] +- Resolves: bz#1653511 + (qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush) + * Wed Jan 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-6.el8 - kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1653114] - kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1668205] From 2f332438f939b4c464609a4abb1c5d75aabf66ca Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 28 Jan 2019 14:41:10 +0000 Subject: [PATCH 013/195] * Mon Jan 28 2019 Danilo Cesar Lemes de Paula - 3.1.0-8.el8 - kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] - Resolves: bz#1666601 ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) --- ...migration-rdma-unregister-fd-handler.patch | 53 +++++++++++++++++++ qemu-kvm.spec | 9 +++- 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 kvm-migration-rdma-unregister-fd-handler.patch diff --git a/kvm-migration-rdma-unregister-fd-handler.patch b/kvm-migration-rdma-unregister-fd-handler.patch new file mode 100644 index 0000000..f76b244 --- /dev/null +++ b/kvm-migration-rdma-unregister-fd-handler.patch @@ -0,0 +1,53 @@ +From 7e65f4bfb4737b32ace64a5b53fa8915a7d28565 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 24 Jan 2019 16:41:10 +0000 +Subject: [PATCH] migration/rdma: unregister fd handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190124164110.18787-2-dgilbert@redhat.com> +Patchwork-id: 84110 +O-Subject: [RHEL-8.0 qemu-kvm PATCH 1/1] migration/rdma: unregister fd handler +Bugzilla: 1666601 +RH-Acked-by: Peter Xu +RH-Acked-by: Pankaj Gupta +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Dr. David Alan Gilbert" + +Unregister the fd handler before we destroy the channel, +otherwise we've got a race where we might land in the +fd handler just as we're closing the device. + +(The race is quite data dependent, you just have to have +the right set of devices for it to trigger). + +Corresponds to RH bz: https://bugzilla.redhat.com/show_bug.cgi?id=1666601 + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20190122173111.29821-1-dgilbert@redhat.com> +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fbbaacab2758cb3f32a07524710533b1d6422be4) +Signed-off-by: Danilo C. L. de Paula +--- + migration/rdma.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/rdma.c b/migration/rdma.c +index 9b2e7e1..54a3c11 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2321,6 +2321,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) + rdma->connected = false; + } + ++ qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); + g_free(rdma->dest_blocks); + rdma->dest_blocks = NULL; + +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e722aaa..2537dd3 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 7%{?dist} +Release: 8%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -162,6 +162,8 @@ Patch38: kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch Patch39: kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch # For bz#1653511 - qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush Patch40: kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch +# For bz#1666601 - [q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card +Patch41: kvm-migration-rdma-unregister-fd-handler.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1009,6 +1011,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Jan 28 2019 Danilo Cesar Lemes de Paula - 3.1.0-8.el8 +- kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] +- Resolves: bz#1666601 + ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) + * Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 3.1.0-7.el8 - kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch [bz#1653511] - kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch [bz#1653511] From 18677cc3dcec15e9b037defe895fed6bace1c5dc Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 29 Jan 2019 13:07:08 +0000 Subject: [PATCH 014/195] * Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-9.el8 - kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] - kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1659127] - kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch [bz#1659127] - Resolves: bz#1659127 (Stress guest and stop it, then do live migration, guest hit call trace on destination end) - Resolves: bz#1666601 ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) --- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch | 50 ++++ ...migration-rdma-unregister-fd-handler.patch | 4 +- ...ly-stop-the-KVM-TOD-while-the-guest-.patch | 250 ++++++++++++++++++ qemu-kvm.spec | 15 +- 4 files changed, 316 insertions(+), 3 deletions(-) create mode 100644 kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch create mode 100644 kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch diff --git a/kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch b/kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch new file mode 100644 index 0000000..29e394d --- /dev/null +++ b/kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch @@ -0,0 +1,50 @@ +From c6ac9501471c3c931367b1967ad97ecfc498249c Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Fri, 21 Dec 2018 15:33:17 +0000 +Subject: [PATCH 3/3] hw/s390x: Fix bad mask in time2tod() + +RH-Author: David Hildenbrand +Message-id: <20181221153317.27647-3-david@redhat.com> +Patchwork-id: 83743 +O-Subject: [RHEL-8.0 qemu-kvm v2 PATCH 2/2] hw/s390x: Fix bad mask in time2tod() +Bugzilla: 1659127 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier + +Since "s390x/tcg: avoid overflows in time2tod/tod2time", the +time2tod() function tries to deal with the 9 uppermost bits in the +time value, but uses the wrong mask for this: 0xff80000000000000 should +be used instead of 0xff10000000000000 here. + +Fixes: 14055ce53c2d901d826ffad7fb7d6bb8ab46bdfd +Cc: qemu-stable@nongnu.org +Signed-off-by: Thomas Huth +Message-Id: <1544792887-14575-1-git-send-email-thuth@redhat.com> +Reviewed-by: David Hildenbrand +[CH: tweaked commit message] +Signed-off-by: Cornelia Huck +(cherry picked from commit aba7a5a2de3dba5917024df25441f715b9249e31) +Signed-off-by: David Hildenbrand + +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/s390x/tod.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h +index cbd7552..47ef9de 100644 +--- a/include/hw/s390x/tod.h ++++ b/include/hw/s390x/tod.h +@@ -56,7 +56,7 @@ typedef struct S390TODClass { + /* Converts ns to s390's clock format */ + static inline uint64_t time2tod(uint64_t ns) + { +- return (ns << 9) / 125 + (((ns & 0xff10000000000000ull) / 125) << 9); ++ return (ns << 9) / 125 + (((ns & 0xff80000000000000ull) / 125) << 9); + } + + /* Converts s390's clock format to ns */ +-- +1.8.3.1 + diff --git a/kvm-migration-rdma-unregister-fd-handler.patch b/kvm-migration-rdma-unregister-fd-handler.patch index f76b244..b6281a3 100644 --- a/kvm-migration-rdma-unregister-fd-handler.patch +++ b/kvm-migration-rdma-unregister-fd-handler.patch @@ -1,7 +1,7 @@ -From 7e65f4bfb4737b32ace64a5b53fa8915a7d28565 Mon Sep 17 00:00:00 2001 +From 93cfdba0a95999ba3d44afe5c15cc3810446f11b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 24 Jan 2019 16:41:10 +0000 -Subject: [PATCH] migration/rdma: unregister fd handler +Subject: [PATCH 1/3] migration/rdma: unregister fd handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit diff --git a/kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch b/kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch new file mode 100644 index 0000000..787c637 --- /dev/null +++ b/kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch @@ -0,0 +1,250 @@ +From 04a18cff27e2a0c93682adbdd2013c8f1473d520 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Fri, 21 Dec 2018 15:33:16 +0000 +Subject: [PATCH 2/3] s390x/tod: Properly stop the KVM TOD while the guest is + not running + +RH-Author: David Hildenbrand +Message-id: <20181221153317.27647-2-david@redhat.com> +Patchwork-id: 83741 +O-Subject: [RHEL-8.0 qemu-kvm v2 PATCH 1/2] s390x/tod: Properly stop the KVM TOD while the guest is not running +Bugzilla: 1659127 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier + +Just like on other architectures, we should stop the clock while the guest +is not running. This is already properly done for TCG. Right now, doing an +offline migration (stop, migrate, cont) can easily trigger stalls in the +guest. + +Even doing a + (hmp) stop + ... wait 2 minutes ... + (hmp) cont +will already trigger stalls. + +So whenever the guest stops, backup the KVM TOD. When continuing to run +the guest, restore the KVM TOD. + +One special case is starting a simple VM: Reading the TOD from KVM to +stop it right away until the guest is actually started means that the +time of any simple VM will already differ to the host time. We can +simply leave the TOD running and the guest won't be able to recognize +it. + +For migration, we actually want to keep the TOD stopped until really +starting the guest. To be able to catch most errors, we should however +try to set the TOD in addition to simply storing it. So we can still +catch basic migration problems. + +If anything goes wrong while backing up/restoring the TOD, we have to +ignore it (but print a warning). This is then basically a fallback to +old behavior (TOD remains running). + +I tested this very basically with an initrd: + 1. Start a simple VM. Observed that the TOD is kept running. Old + behavior. + 2. Ordinary live migration. Observed that the TOD is temporarily + stopped on the destination when setting the new value and + correctly started when finally starting the guest. + 3. Offline live migration. (stop, migrate, cont). Observed that the + TOD will be stopped on the source with the "stop" command. On the + destination, the TOD is temporarily stopped when setting the new + value and correctly started when finally starting the guest via + "cont". + 4. Simple stop/cont correctly stops/starts the TOD. (multiple stops + or conts in a row have no effect, so works as expected) + +In the future, we might want to send the guest a special kind of time sync +interrupt under some conditions, so it can synchronize its tod to the +host tod. This is interesting for migration scenarios but also when we +get time sync interrupts ourselves. This however will most probably have +to be handled in KVM (e.g. when the tods differ too much) and is not +desired e.g. when debugging the guest (single stepping should not +result in permanent time syncs). I consider something like that an add-on +on top of this basic "don't break the guest" handling. + +Signed-off-by: David Hildenbrand +Message-Id: <20181130094957.4121-1-david@redhat.com> +Acked-by: Christian Borntraeger +Reviewed-by: Thomas Huth +Signed-off-by: Cornelia Huck +(cherry picked from commit 9bc9d3d1ae3bcd1caaad1946494726b52f58b291) +Signed-off-by: David Hildenbrand +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/tod-kvm.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++- + include/hw/s390x/tod.h | 8 +++- + 2 files changed, 107 insertions(+), 3 deletions(-) + +diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c +index df564ab..2456bf7 100644 +--- a/hw/s390x/tod-kvm.c ++++ b/hw/s390x/tod-kvm.c +@@ -10,10 +10,11 @@ + + #include "qemu/osdep.h" + #include "qapi/error.h" ++#include "sysemu/sysemu.h" + #include "hw/s390x/tod.h" + #include "kvm_s390x.h" + +-static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp) ++static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp) + { + int r; + +@@ -27,7 +28,17 @@ static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp) + } + } + +-static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp) ++static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp) ++{ ++ if (td->stopped) { ++ *tod = td->base; ++ return; ++ } ++ ++ kvm_s390_get_tod_raw(tod, errp); ++} ++ ++static void kvm_s390_set_tod_raw(const S390TOD *tod, Error **errp) + { + int r; + +@@ -41,18 +52,105 @@ static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp) + } + } + ++static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp) ++{ ++ Error *local_err = NULL; ++ ++ /* ++ * Somebody (e.g. migration) set the TOD. We'll store it into KVM to ++ * properly detect errors now but take a look at the runstate to decide ++ * whether really to keep the tod running. E.g. during migration, this ++ * is the point where we want to stop the initially running TOD to fire ++ * it back up when actually starting the migrated guest. ++ */ ++ kvm_s390_set_tod_raw(tod, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ if (runstate_is_running()) { ++ td->stopped = false; ++ } else { ++ td->stopped = true; ++ td->base = *tod; ++ } ++} ++ ++static void kvm_s390_tod_vm_state_change(void *opaque, int running, ++ RunState state) ++{ ++ S390TODState *td = opaque; ++ Error *local_err = NULL; ++ ++ if (running && td->stopped) { ++ /* Set the old TOD when running the VM - start the TOD clock. */ ++ kvm_s390_set_tod_raw(&td->base, &local_err); ++ if (local_err) { ++ warn_report_err(local_err); ++ } ++ /* Treat errors like the TOD was running all the time. */ ++ td->stopped = false; ++ } else if (!running && !td->stopped) { ++ /* Store the TOD when stopping the VM - stop the TOD clock. */ ++ kvm_s390_get_tod_raw(&td->base, &local_err); ++ if (local_err) { ++ /* Keep the TOD running in case we could not back it up. */ ++ warn_report_err(local_err); ++ } else { ++ td->stopped = true; ++ } ++ } ++} ++ ++static void kvm_s390_tod_realize(DeviceState *dev, Error **errp) ++{ ++ S390TODState *td = S390_TOD(dev); ++ S390TODClass *tdc = S390_TOD_GET_CLASS(td); ++ Error *local_err = NULL; ++ ++ tdc->parent_realize(dev, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ /* ++ * We need to know when the VM gets started/stopped to start/stop the TOD. ++ * As we can never have more than one TOD instance (and that will never be ++ * removed), registering here and never unregistering is good enough. ++ */ ++ qemu_add_vm_change_state_handler(kvm_s390_tod_vm_state_change, td); ++} ++ + static void kvm_s390_tod_class_init(ObjectClass *oc, void *data) + { + S390TODClass *tdc = S390_TOD_CLASS(oc); + ++ device_class_set_parent_realize(DEVICE_CLASS(oc), kvm_s390_tod_realize, ++ &tdc->parent_realize); + tdc->get = kvm_s390_tod_get; + tdc->set = kvm_s390_tod_set; + } + ++static void kvm_s390_tod_init(Object *obj) ++{ ++ S390TODState *td = S390_TOD(obj); ++ ++ /* ++ * The TOD is initially running (value stored in KVM). Avoid needless ++ * loading/storing of the TOD when starting a simple VM, so let it ++ * run although the (never started) VM is stopped. For migration, we ++ * will properly set the TOD later. ++ */ ++ td->stopped = false; ++} ++ + static TypeInfo kvm_s390_tod_info = { + .name = TYPE_KVM_S390_TOD, + .parent = TYPE_S390_TOD, + .instance_size = sizeof(S390TODState), ++ .instance_init = kvm_s390_tod_init, + .class_init = kvm_s390_tod_class_init, + .class_size = sizeof(S390TODClass), + }; +diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h +index 413c0d7..cbd7552 100644 +--- a/include/hw/s390x/tod.h ++++ b/include/hw/s390x/tod.h +@@ -31,13 +31,19 @@ typedef struct S390TODState { + /* private */ + DeviceState parent_obj; + +- /* unused by KVM implementation */ ++ /* ++ * Used by TCG to remember the time base. Used by KVM to backup the TOD ++ * while the TOD is stopped. ++ */ + S390TOD base; ++ /* Used by KVM to remember if the TOD is stopped and base is valid. */ ++ bool stopped; + } S390TODState; + + typedef struct S390TODClass { + /* private */ + DeviceClass parent_class; ++ void (*parent_realize)(DeviceState *dev, Error **errp); + + /* public */ + void (*get)(const S390TODState *td, S390TOD *tod, Error **errp); +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 2537dd3..0729955 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 8%{?dist} +Release: 9%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -164,6 +164,10 @@ Patch39: kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch Patch40: kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch # For bz#1666601 - [q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card Patch41: kvm-migration-rdma-unregister-fd-handler.patch +# For bz#1659127 - Stress guest and stop it, then do live migration, guest hit call trace on destination end +Patch42: kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch +# For bz#1659127 - Stress guest and stop it, then do live migration, guest hit call trace on destination end +Patch43: kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1011,6 +1015,15 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-9.el8 +- kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] +- kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1659127] +- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch [bz#1659127] +- Resolves: bz#1659127 + (Stress guest and stop it, then do live migration, guest hit call trace on destination end) +- Resolves: bz#1666601 + ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) + * Mon Jan 28 2019 Danilo Cesar Lemes de Paula - 3.1.0-8.el8 - kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] - Resolves: bz#1666601 From d9dd3f8dd3918252024700b88a65067d4daf609f Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 29 Jan 2019 14:28:22 +0000 Subject: [PATCH 015/195] * Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-10.el8 - kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch [bz#1655947] - kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch [bz#1655947] - Resolves: bz#1655947 (qemu-kvm core dumped after unplug the device which was set io throttling parameters) --- ...for-throttling-tgm-unregister-iothre.patch | 122 ++++++++++++++++++ ...-fix-restart-coroutine-iothread-race.patch | 120 +++++++++++++++++ qemu-kvm.spec | 12 +- 3 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch create mode 100644 kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch diff --git a/kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch b/kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch new file mode 100644 index 0000000..6da2540 --- /dev/null +++ b/kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch @@ -0,0 +1,122 @@ +From 91ae068923b70fc62c8504f7c77e42829b4c2e18 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 29 Jan 2019 07:02:50 +0000 +Subject: [PATCH 2/2] iotests: add 238 for throttling tgm unregister iothread + segfault + +RH-Author: Stefan Hajnoczi +Message-id: <20190129070250.22709-3-stefanha@redhat.com> +Patchwork-id: 84138 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 2/2] iotests: add 238 for throttling tgm unregister iothread segfault +Bugzilla: 1655947 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier + +Hot-unplug a scsi-hd using an iothread. The previous patch fixes a +segfault in this scenario. + +This patch adds a regression test. + +Suggested-by: Alberto Garcia +Suggested-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Alberto Garcia +Message-id: 20190114133257.30299-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 202277f43d544779b7a63123a51c54c3a16b74ad) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + tests/qemu-iotests/group + + Context conflict because downstream doesn't have 236. This patch adds + 238 and doesn't depend on 236. + +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/238 | 47 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/238.out | 6 ++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 54 insertions(+) + create mode 100755 tests/qemu-iotests/238 + create mode 100644 tests/qemu-iotests/238.out + +diff --git a/tests/qemu-iotests/238 b/tests/qemu-iotests/238 +new file mode 100755 +index 0000000..f81ee11 +--- /dev/null ++++ b/tests/qemu-iotests/238 +@@ -0,0 +1,47 @@ ++#!/usr/bin/env python ++# ++# Regression test for throttle group member unregister segfault with iothread ++# ++# Copyright (c) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import sys ++import os ++import iotests ++from iotests import log ++ ++sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts')) ++ ++from qemu import QEMUMachine ++ ++if iotests.qemu_default_machine == 's390-ccw-virtio': ++ virtio_scsi_device = 'virtio-scsi-ccw' ++else: ++ virtio_scsi_device = 'virtio-scsi-pci' ++ ++vm = QEMUMachine(iotests.qemu_prog) ++vm.add_args('-machine', 'accel=kvm') ++vm.launch() ++ ++log(vm.qmp('blockdev-add', node_name='hd0', driver='null-co')) ++log(vm.qmp('object-add', qom_type='iothread', id='iothread0')) ++log(vm.qmp('device_add', id='scsi0', driver=virtio_scsi_device, iothread='iothread0')) ++log(vm.qmp('device_add', id='scsi-hd0', driver='scsi-hd', drive='hd0')) ++log(vm.qmp('block_set_io_throttle', id='scsi-hd0', bps=0, bps_rd=0, bps_wr=0, ++ iops=1000, iops_rd=0, iops_wr=0, conv_keys=False)) ++log(vm.qmp('device_del', id='scsi-hd0')) ++ ++vm.shutdown() +diff --git a/tests/qemu-iotests/238.out b/tests/qemu-iotests/238.out +new file mode 100644 +index 0000000..4de840b +--- /dev/null ++++ b/tests/qemu-iotests/238.out +@@ -0,0 +1,6 @@ ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 05996ae..268fefa 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -233,3 +233,4 @@ + 233 auto quick + 234 auto quick migration + 235 auto quick ++238 auto quick +-- +1.8.3.1 + diff --git a/kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch b/kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch new file mode 100644 index 0000000..42ff2e7 --- /dev/null +++ b/kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch @@ -0,0 +1,120 @@ +From 02287430957782ffb1db0d7d17693a73925ea02f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 29 Jan 2019 07:02:49 +0000 +Subject: [PATCH 1/2] throttle-groups: fix restart coroutine iothread race + +RH-Author: Stefan Hajnoczi +Message-id: <20190129070250.22709-2-stefanha@redhat.com> +Patchwork-id: 84139 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/2] throttle-groups: fix restart coroutine iothread race +Bugzilla: 1655947 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier + +The following QMP command leads to a crash when iothreads are used: + + { 'execute': 'device_del', 'arguments': {'id': 'data'} } + +The backtrace involves the queue restart coroutine where +tgm->throttle_state is a NULL pointer because +throttle_group_unregister_tgm() has already been called: + + (gdb) bt full + #0 0x00005585a7a3b378 in qemu_mutex_lock_impl (mutex=0xffffffffffffffd0, file=0x5585a7bb3d54 "block/throttle-groups.c", line=412) at util/qemu-thread-posix.c:64 + err = + __PRETTY_FUNCTION__ = "qemu_mutex_lock_impl" + __func__ = "qemu_mutex_lock_impl" + #1 0x00005585a79be074 in throttle_group_restart_queue_entry (opaque=0x5585a9de4eb0) at block/throttle-groups.c:412 + _f = + data = 0x5585a9de4eb0 + tgm = 0x5585a9079440 + ts = 0x0 + tg = 0xffffffffffffff98 + is_write = false + empty_queue = 255 + +This coroutine should not execute in the iothread after the throttle +group member has been unregistered! + +The root cause is that the device_del code path schedules the restart +coroutine in the iothread while holding the AioContext lock. Therefore +the iothread cannot execute the coroutine until after device_del +releases the lock - by this time it's too late. + +This patch adds a reference count to ThrottleGroupMember so we can +synchronously wait for restart coroutines to complete. Once they are +done it is safe to unregister the ThrottleGroupMember. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Alberto Garcia +Message-id: 20190114133257.30299-2-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit bc19a0a6e4505390f99d3c593ebaf11b7962cc59) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + block/throttle-groups.c | 9 +++++++++ + include/block/throttle-groups.h | 5 +++++ + 2 files changed, 14 insertions(+) + +diff --git a/block/throttle-groups.c b/block/throttle-groups.c +index 5d8213a..a5a2037 100644 +--- a/block/throttle-groups.c ++++ b/block/throttle-groups.c +@@ -415,6 +415,9 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) + } + + g_free(data); ++ ++ atomic_dec(&tgm->restart_pending); ++ aio_wait_kick(); + } + + static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) +@@ -430,6 +433,8 @@ static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write + * be no timer pending on this tgm at this point */ + assert(!timer_pending(tgm->throttle_timers.timers[is_write])); + ++ atomic_inc(&tgm->restart_pending); ++ + co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd); + aio_co_enter(tgm->aio_context, co); + } +@@ -538,6 +543,7 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm, + + tgm->throttle_state = ts; + tgm->aio_context = ctx; ++ atomic_set(&tgm->restart_pending, 0); + + qemu_mutex_lock(&tg->lock); + /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */ +@@ -584,6 +590,9 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm) + return; + } + ++ /* Wait for throttle_group_restart_queue_entry() coroutines to finish */ ++ AIO_WAIT_WHILE(tgm->aio_context, atomic_read(&tgm->restart_pending) > 0); ++ + qemu_mutex_lock(&tg->lock); + for (i = 0; i < 2; i++) { + assert(tgm->pending_reqs[i] == 0); +diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h +index e2fd051..712a8e6 100644 +--- a/include/block/throttle-groups.h ++++ b/include/block/throttle-groups.h +@@ -43,6 +43,11 @@ typedef struct ThrottleGroupMember { + */ + unsigned int io_limits_disabled; + ++ /* Number of pending throttle_group_restart_queue_entry() coroutines. ++ * Accessed with atomic operations. ++ */ ++ unsigned int restart_pending; ++ + /* The following fields are protected by the ThrottleGroup lock. + * See the ThrottleGroup documentation for details. + * throttle_state tells us if I/O limits are configured. */ +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 0729955..974abee 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 9%{?dist} +Release: 10%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -168,6 +168,10 @@ Patch41: kvm-migration-rdma-unregister-fd-handler.patch Patch42: kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch # For bz#1659127 - Stress guest and stop it, then do live migration, guest hit call trace on destination end Patch43: kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch +# For bz#1655947 - qemu-kvm core dumped after unplug the device which was set io throttling parameters +Patch44: kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch +# For bz#1655947 - qemu-kvm core dumped after unplug the device which was set io throttling parameters +Patch45: kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1015,6 +1019,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-10.el8 +- kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch [bz#1655947] +- kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch [bz#1655947] +- Resolves: bz#1655947 + (qemu-kvm core dumped after unplug the device which was set io throttling parameters) + * Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-9.el8 - kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] - kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1659127] From 903966e69c12395482a079f21839959de391e40f Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 31 Jan 2019 20:40:05 +0000 Subject: [PATCH 016/195] * Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-11.el8 - kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] - kvm-json-Fix-handling-when-not-interpolating.patch [bz#1668244] - Resolves: bz#1644985 (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) - Resolves: bz#1668244 (qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found) --- ...x-fsfreeze-hook-path-in-the-man-page.patch | 52 ++++++++ ...-Fix-handling-when-not-interpolating.patch | 122 ++++++++++++++++++ qemu-kvm.spec | 14 +- 3 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch create mode 100644 kvm-json-Fix-handling-when-not-interpolating.patch diff --git a/kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch b/kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch new file mode 100644 index 0000000..0f72600 --- /dev/null +++ b/kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch @@ -0,0 +1,52 @@ +From cd667708de23ea78479cc68dd8ae4c9b429a4ec8 Mon Sep 17 00:00:00 2001 +From: Danilo de Paula +Date: Thu, 31 Jan 2019 00:16:44 +0000 +Subject: [PATCH 1/2] Fix fsfreeze-hook path in the man page. + +RH-Author: Danilo de Paula +Message-id: <20190131001644.6237-2-ddepaula@redhat.com> +Patchwork-id: 84152 +O-Subject: [RHEL8/rhel + RHEL8/AV qemu-kvm PATCH 1/1] Fix fsfreeze-hook path in the man page. +Bugzilla: 1644985 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth + +fsfreeze-hook final PATH is defined in redhat/qemu-kvm.spec.template +since it's being copied in the install section. + +In that way, even with the configuration changes proposed by Marc +in qemu-devel [1], the information would still be different. + +Wanting to avoid having to re-write some part of the building system +including a new option, or use [1] and call ./configure again with --with-confsuffix=/qemu-ga, +I decided to manually edit this directly in the file. + +To play safe, an alternative is to do this in the spec file with something like +sed -i '/\/etc\/qemu\/fsfreeze-hook/,${s//\/etc\/qemu-ga\/fsfreeze-hook//;b};$q1' qemu-ga.texi +In case the original file changes during a rebase, sed would fail and +the problem could be detected. But I believe this would make maintaining this even harder. + +[1] http://lists.gnu.org/archive/html/qemu-devel/2018-11/msg05088.html + +Signed-off-by: Danilo C. L. de Paula +--- + qemu-ga.texi | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/qemu-ga.texi b/qemu-ga.texi +index 4c7a8fd..3105e30 100644 +--- a/qemu-ga.texi ++++ b/qemu-ga.texi +@@ -58,7 +58,7 @@ file format). + Enable fsfreeze hook. Accepts an optional argument that specifies + script to run on freeze/thaw. Script will be called with + 'freeze'/'thaw' arguments accordingly (default is +- @samp{/etc/qemu/fsfreeze-hook}). If using -F with an argument, do ++ @samp{/etc/qemu-ga/fsfreeze-hook}). If using -F with an argument, do + not follow -F with a space (for example: + @samp{-F/var/run/fsfreezehook.sh}). + +-- +1.8.3.1 + diff --git a/kvm-json-Fix-handling-when-not-interpolating.patch b/kvm-json-Fix-handling-when-not-interpolating.patch new file mode 100644 index 0000000..e589371 --- /dev/null +++ b/kvm-json-Fix-handling-when-not-interpolating.patch @@ -0,0 +1,122 @@ +From ee704181e5f2dd1ebc6a2de0f9e750a11541cd47 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Thu, 31 Jan 2019 14:28:01 +0000 +Subject: [PATCH 2/2] json: Fix % handling when not interpolating + +RH-Author: Markus Armbruster +Message-id: <20190131142801.15268-2-armbru@redhat.com> +Patchwork-id: 84158 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] json: Fix % handling when not interpolating +Bugzilla: 1668244 +RH-Acked-by: Richard Jones +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Danilo de Paula + +From: Christophe Fergeau + +Commit 8bca4613 added support for %% in json strings when interpolating, +but in doing so broke handling of % when not interpolating. + +When parse_string() is fed a string token containing '%', it skips the +'%' regardless of ctxt->ap, i.e. even it's not interpolating. If the +'%' is the string's last character, it fails an assertion. Else, it +"merely" swallows the '%'. + +Fix parse_string() to handle '%' specially only when interpolating. + +To gauge the bug's impact, let's review non-interpolating users of this +parser, i.e. code passing NULL context to json_message_parser_init(): + +* tests/check-qjson.c, tests/test-qobject-input-visitor.c, + tests/test-visitor-serialization.c + + Plenty of tests, but we still failed to cover the buggy case. + +* monitor.c: QMP input + +* qga/main.c: QGA input + +* qobject_from_json(): + + - qobject-input-visitor.c: JSON command line option arguments of + -display and -blockdev + + Reproducer: -blockdev '{"%"}' + + - block.c: JSON pseudo-filenames starting with "json:" + + Reproducer: https://bugzilla.redhat.com/show_bug.cgi?id=1668244#c3 + + - block/rbd.c: JSON key pairs + + Pseudo-filenames starting with "rbd:". + +Command line, QMP and QGA input are trusted. + +Filenames are trusted when they come from command line, QMP or HMP. +They are untrusted when they come from from image file headers. +Example: QCOW2 backing file name. Note that this is *not* the security +boundary between host and guest. It's the boundary between host and an +image file from an untrusted source. + +Neither failing an assertion nor skipping a character in a filename of +your choice looks exploitable. Note that we don't support compiling +with NDEBUG. + +Fixes: 8bca4613e6cddd948895b8db3def05950463495b +Cc: qemu-stable@nongnu.org +Signed-off-by: Christophe Fergeau +Message-Id: <20190102140535.11512-1-cfergeau@redhat.com> +Reviewed-by: Eric Blake +Tested-by: Richard W.M. Jones +[Commit message extended to discuss impact] +Signed-off-by: Markus Armbruster +(cherry picked from commit bbc0586ced6e9ffdfd29d89fcc917b3d90ac3938) + +Signed-off-by: Danilo C. L. de Paula +--- + qobject/json-parser.c | 10 ++++++---- + tests/check-qjson.c | 5 +++++ + 2 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/qobject/json-parser.c b/qobject/json-parser.c +index 5a840df..53e91cb 100644 +--- a/qobject/json-parser.c ++++ b/qobject/json-parser.c +@@ -208,11 +208,13 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) + } + break; + case '%': +- if (ctxt->ap && ptr[1] != '%') { +- parse_error(ctxt, token, "can't interpolate into string"); +- goto out; ++ if (ctxt->ap) { ++ if (ptr[1] != '%') { ++ parse_error(ctxt, token, "can't interpolate into string"); ++ goto out; ++ } ++ ptr++; + } +- ptr++; + /* fall through */ + default: + cp = mod_utf8_codepoint(ptr, 6, &end); +diff --git a/tests/check-qjson.c b/tests/check-qjson.c +index d876a7a..fa2afcc 100644 +--- a/tests/check-qjson.c ++++ b/tests/check-qjson.c +@@ -176,6 +176,11 @@ static void utf8_string(void) + "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5", + "\\u03BA\\u1F79\\u03C3\\u03BC\\u03B5", + }, ++ /* '%' character when not interpolating */ ++ { ++ "100%", ++ "100%", ++ }, + /* 2 Boundary condition test cases */ + /* 2.1 First possible sequence of a certain length */ + /* +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 974abee..95b3e86 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 10%{?dist} +Release: 11%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -172,6 +172,10 @@ Patch43: kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch Patch44: kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch # For bz#1655947 - qemu-kvm core dumped after unplug the device which was set io throttling parameters Patch45: kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch +# For bz#1644985 - The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train +Patch46: kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch +# For bz#1668244 - qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found +Patch47: kvm-json-Fix-handling-when-not-interpolating.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1019,6 +1023,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-11.el8 +- kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] +- kvm-json-Fix-handling-when-not-interpolating.patch [bz#1668244] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) +- Resolves: bz#1668244 + (qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found) + * Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-10.el8 - kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch [bz#1655947] - kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch [bz#1655947] From df2f32921cc6f9a6cf724b47979c3bb0103766a3 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 8 Feb 2019 10:09:02 -0200 Subject: [PATCH 017/195] Removing kvm-fix-fsfreeze-hook path commit Removing kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] --- qemu-kvm.spec | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 95b3e86..040639c 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 11%{?dist} +Release: 12%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -172,8 +172,6 @@ Patch43: kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch Patch44: kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch # For bz#1655947 - qemu-kvm core dumped after unplug the device which was set io throttling parameters Patch45: kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch -# For bz#1644985 - The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train -Patch46: kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch # For bz#1668244 - qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found Patch47: kvm-json-Fix-handling-when-not-interpolating.patch @@ -1023,6 +1021,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-12.el8 +- Removing kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] + * Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-11.el8 - kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] - kvm-json-Fix-handling-when-not-interpolating.patch [bz#1668244] From 1cbaf605abad0e7676fe626c3709fb27272e7efe Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 8 Feb 2019 12:18:59 +0000 Subject: [PATCH 018/195] * Fri Feb 08 2019 Danilo Cesar Lemes de Paula - 3.1.0-13.el8 - kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch [bz#1665896] - kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch [bz#1668248] - kvm-scsi-disk-Add-device_id-property.patch [bz#1668248] - Resolves: bz#1665896 (VNC unix listener socket is deleted after first client quits) - Resolves: bz#1668248 ("An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination)) --- ...-client-doesn-t-unlink-server-socket.patch | 294 ++++++++++++++++++ kvm-scsi-disk-Add-device_id-property.patch | 98 ++++++ ...-Don-t-use-empty-string-as-device-id.patch | 71 +++++ qemu-kvm.spec | 15 +- 4 files changed, 477 insertions(+), 1 deletion(-) create mode 100644 kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch create mode 100644 kvm-scsi-disk-Add-device_id-property.patch create mode 100644 kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch diff --git a/kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch b/kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch new file mode 100644 index 0000000..860e1a4 --- /dev/null +++ b/kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch @@ -0,0 +1,294 @@ +From dea7d39cce3b1da16de0bfb47a028f770547098a Mon Sep 17 00:00:00 2001 +From: "Daniel P. Berrange" +Date: Tue, 29 Jan 2019 13:58:57 +0000 +Subject: [PATCH 1/3] io: ensure UNIX client doesn't unlink server socket +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrange +Message-id: <20190129135857.10581-2-berrange@redhat.com> +Patchwork-id: 84141 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] io: ensure UNIX client doesn't unlink server socket +Bugzilla: 1665896 +RH-Acked-by: John Snow +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +The qio_channel_socket_close method for was mistakenly unlinking the +UNIX server socket, even if the channel was a client connection. This +was not noticed with chardevs, since they never call close, but with the +VNC server, this caused the VNC server socket to be deleted after the +first client quit. + +The qio_channel_socket_close method also needlessly reimplemented the +logic that already exists in socket_listen_cleanup(). Just call that +method directly, for listen sockets only. + +This fixes a regression introduced in QEMU 3.0.0 with + + commit d66f78e1eaa832f73c771d9df1b606fe75d52a50 + Author: Pavel Balaev + Date: Mon May 21 19:17:35 2018 +0300 + + Delete AF_UNIX socket after close + +Fixes launchpad #1795100 + +Reviewed-by: Eric Blake +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 73564c407caedf992a1c688b5fea776a8b56ba2a) +Signed-off-by: Danilo C. L. de Paula +--- + io/channel-socket.c | 19 ++-------- + tests/test-io-channel-socket.c | 86 +++++++++++++++++++++++++++++++++++++----- + 2 files changed, 80 insertions(+), 25 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index b50e63a..bc5f80e 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -688,10 +688,13 @@ qio_channel_socket_close(QIOChannel *ioc, + int rc = 0; + + if (sioc->fd != -1) { +- SocketAddress *addr = socket_local_address(sioc->fd, errp); + #ifdef WIN32 + WSAEventSelect(sioc->fd, NULL, 0); + #endif ++ if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_LISTEN)) { ++ socket_listen_cleanup(sioc->fd, errp); ++ } ++ + if (closesocket(sioc->fd) < 0) { + sioc->fd = -1; + error_setg_errno(errp, errno, +@@ -699,20 +702,6 @@ qio_channel_socket_close(QIOChannel *ioc, + return -1; + } + sioc->fd = -1; +- +- if (addr && addr->type == SOCKET_ADDRESS_TYPE_UNIX +- && addr->u.q_unix.path) { +- if (unlink(addr->u.q_unix.path) < 0 && errno != ENOENT) { +- error_setg_errno(errp, errno, +- "Failed to unlink socket %s", +- addr->u.q_unix.path); +- rc = -1; +- } +- } +- +- if (addr) { +- qapi_free_SocketAddress(addr); +- } + } + return rc; + } +diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c +index 0597213..c253ae3 100644 +--- a/tests/test-io-channel-socket.c ++++ b/tests/test-io-channel-socket.c +@@ -49,6 +49,7 @@ static void test_io_channel_set_socket_bufs(QIOChannel *src, + + static void test_io_channel_setup_sync(SocketAddress *listen_addr, + SocketAddress *connect_addr, ++ QIOChannel **srv, + QIOChannel **src, + QIOChannel **dst) + { +@@ -78,7 +79,7 @@ static void test_io_channel_setup_sync(SocketAddress *listen_addr, + + test_io_channel_set_socket_bufs(*src, *dst); + +- object_unref(OBJECT(lioc)); ++ *srv = QIO_CHANNEL(lioc); + } + + +@@ -99,6 +100,7 @@ static void test_io_channel_complete(QIOTask *task, + + static void test_io_channel_setup_async(SocketAddress *listen_addr, + SocketAddress *connect_addr, ++ QIOChannel **srv, + QIOChannel **src, + QIOChannel **dst) + { +@@ -146,21 +148,34 @@ static void test_io_channel_setup_async(SocketAddress *listen_addr, + qio_channel_set_delay(*src, false); + test_io_channel_set_socket_bufs(*src, *dst); + +- object_unref(OBJECT(lioc)); ++ *srv = QIO_CHANNEL(lioc); + + g_main_loop_unref(data.loop); + } + + ++static void test_io_channel_socket_path_exists(SocketAddress *addr, ++ bool expectExists) ++{ ++ if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) { ++ return; ++ } ++ ++ g_assert(g_file_test(addr->u.q_unix.path, ++ G_FILE_TEST_EXISTS) == expectExists); ++} ++ ++ + static void test_io_channel(bool async, + SocketAddress *listen_addr, + SocketAddress *connect_addr, + bool passFD) + { +- QIOChannel *src, *dst; ++ QIOChannel *src, *dst, *srv; + QIOChannelTest *test; + if (async) { +- test_io_channel_setup_async(listen_addr, connect_addr, &src, &dst); ++ test_io_channel_setup_async(listen_addr, connect_addr, ++ &srv, &src, &dst); + + g_assert(!passFD || + qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); +@@ -169,14 +184,25 @@ static void test_io_channel(bool async, + g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN)); + g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN)); + ++ test_io_channel_socket_path_exists(listen_addr, true); ++ + test = qio_channel_test_new(); + qio_channel_test_run_threads(test, true, src, dst); + qio_channel_test_validate(test); + ++ test_io_channel_socket_path_exists(listen_addr, true); ++ ++ /* unref without close, to ensure finalize() cleans up */ ++ + object_unref(OBJECT(src)); + object_unref(OBJECT(dst)); ++ test_io_channel_socket_path_exists(listen_addr, true); + +- test_io_channel_setup_async(listen_addr, connect_addr, &src, &dst); ++ object_unref(OBJECT(srv)); ++ test_io_channel_socket_path_exists(listen_addr, false); ++ ++ test_io_channel_setup_async(listen_addr, connect_addr, ++ &srv, &src, &dst); + + g_assert(!passFD || + qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); +@@ -189,10 +215,24 @@ static void test_io_channel(bool async, + qio_channel_test_run_threads(test, false, src, dst); + qio_channel_test_validate(test); + ++ /* close before unref, to ensure finalize copes with already closed */ ++ ++ qio_channel_close(src, &error_abort); ++ qio_channel_close(dst, &error_abort); ++ test_io_channel_socket_path_exists(listen_addr, true); ++ + object_unref(OBJECT(src)); + object_unref(OBJECT(dst)); ++ test_io_channel_socket_path_exists(listen_addr, true); ++ ++ qio_channel_close(srv, &error_abort); ++ test_io_channel_socket_path_exists(listen_addr, false); ++ ++ object_unref(OBJECT(srv)); ++ test_io_channel_socket_path_exists(listen_addr, false); + } else { +- test_io_channel_setup_sync(listen_addr, connect_addr, &src, &dst); ++ test_io_channel_setup_sync(listen_addr, connect_addr, ++ &srv, &src, &dst); + + g_assert(!passFD || + qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); +@@ -201,14 +241,25 @@ static void test_io_channel(bool async, + g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN)); + g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN)); + ++ test_io_channel_socket_path_exists(listen_addr, true); ++ + test = qio_channel_test_new(); + qio_channel_test_run_threads(test, true, src, dst); + qio_channel_test_validate(test); + ++ test_io_channel_socket_path_exists(listen_addr, true); ++ ++ /* unref without close, to ensure finalize() cleans up */ ++ + object_unref(OBJECT(src)); + object_unref(OBJECT(dst)); ++ test_io_channel_socket_path_exists(listen_addr, true); ++ ++ object_unref(OBJECT(srv)); ++ test_io_channel_socket_path_exists(listen_addr, false); + +- test_io_channel_setup_sync(listen_addr, connect_addr, &src, &dst); ++ test_io_channel_setup_sync(listen_addr, connect_addr, ++ &srv, &src, &dst); + + g_assert(!passFD || + qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); +@@ -221,8 +272,23 @@ static void test_io_channel(bool async, + qio_channel_test_run_threads(test, false, src, dst); + qio_channel_test_validate(test); + ++ test_io_channel_socket_path_exists(listen_addr, true); ++ ++ /* close before unref, to ensure finalize copes with already closed */ ++ ++ qio_channel_close(src, &error_abort); ++ qio_channel_close(dst, &error_abort); ++ test_io_channel_socket_path_exists(listen_addr, true); ++ + object_unref(OBJECT(src)); + object_unref(OBJECT(dst)); ++ test_io_channel_socket_path_exists(listen_addr, true); ++ ++ qio_channel_close(srv, &error_abort); ++ test_io_channel_socket_path_exists(listen_addr, false); ++ ++ object_unref(OBJECT(srv)); ++ test_io_channel_socket_path_exists(listen_addr, false); + } + } + +@@ -316,7 +382,6 @@ static void test_io_channel_unix(bool async) + + qapi_free_SocketAddress(listen_addr); + qapi_free_SocketAddress(connect_addr); +- g_assert(g_file_test(TEST_SOCKET, G_FILE_TEST_EXISTS) == FALSE); + } + + +@@ -335,7 +400,7 @@ static void test_io_channel_unix_fd_pass(void) + { + SocketAddress *listen_addr = g_new0(SocketAddress, 1); + SocketAddress *connect_addr = g_new0(SocketAddress, 1); +- QIOChannel *src, *dst; ++ QIOChannel *src, *dst, *srv; + int testfd; + int fdsend[3]; + int *fdrecv = NULL; +@@ -359,7 +424,7 @@ static void test_io_channel_unix_fd_pass(void) + connect_addr->type = SOCKET_ADDRESS_TYPE_UNIX; + connect_addr->u.q_unix.path = g_strdup(TEST_SOCKET); + +- test_io_channel_setup_sync(listen_addr, connect_addr, &src, &dst); ++ test_io_channel_setup_sync(listen_addr, connect_addr, &srv, &src, &dst); + + memcpy(bufsend, "Hello World", G_N_ELEMENTS(bufsend)); + +@@ -412,6 +477,7 @@ static void test_io_channel_unix_fd_pass(void) + + object_unref(OBJECT(src)); + object_unref(OBJECT(dst)); ++ object_unref(OBJECT(srv)); + qapi_free_SocketAddress(listen_addr); + qapi_free_SocketAddress(connect_addr); + unlink(TEST_SOCKET); +-- +1.8.3.1 + diff --git a/kvm-scsi-disk-Add-device_id-property.patch b/kvm-scsi-disk-Add-device_id-property.patch new file mode 100644 index 0000000..96d7d9b --- /dev/null +++ b/kvm-scsi-disk-Add-device_id-property.patch @@ -0,0 +1,98 @@ +From c9f6e5639cc9d7b1d336b55ccacb6673933a3864 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 6 Feb 2019 15:58:29 +0000 +Subject: [PATCH 3/3] scsi-disk: Add device_id property + +RH-Author: Kevin Wolf +Message-id: <20190206155829.14641-3-kwolf@redhat.com> +Patchwork-id: 84254 +O-Subject: [RHEL-7.7/8.0-AV qemu-kvm-rhev PATCH 2/2] scsi-disk: Add device_id property +Bugzilla: 1668248 +RH-Acked-by: Max Reitz +RH-Acked-by: Thomas Huth +RH-Acked-by: Paolo Bonzini + +The new device_id property specifies which value to use for the vendor +specific designator in the Device Identification VPD page. + +In particular, this is necessary for libvirt to maintain guest ABI +compatibility when no serial number is given and a VM is switched from +-drive (where the BlockBackend name is used) to -blockdev (where the +vendor specific designator is left out by default). + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry picked from commit 7471a649fc3a391dd497297013fb2525ca9821ba) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/scsi-disk.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 93eef40..e74e1e7 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -104,6 +104,7 @@ typedef struct SCSIDiskState + char *serial; + char *vendor; + char *product; ++ char *device_id; + bool tray_open; + bool tray_locked; + /* +@@ -642,13 +643,8 @@ static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) + + case 0x83: /* Device identification page, mandatory */ + { +- const char *str = s->serial ?: blk_name(s->qdev.conf.blk); +- int max_len = s->serial ? 20 : 255 - 8; +- int id_len = strlen(str); ++ int id_len = s->device_id ? MIN(strlen(s->device_id), 255 - 8) : 0; + +- if (id_len > max_len) { +- id_len = max_len; +- } + DPRINTF("Inquiry EVPD[Device identification] " + "buffer size %zd\n", req->cmd.xfer); + +@@ -657,7 +653,7 @@ static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) + outbuf[buflen++] = 0; /* not officially assigned */ + outbuf[buflen++] = 0; /* reserved */ + outbuf[buflen++] = id_len; /* length of data following */ +- memcpy(outbuf + buflen, str, id_len); ++ memcpy(outbuf + buflen, s->device_id, id_len); + buflen += id_len; + } + +@@ -2363,6 +2359,16 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) + if (!s->vendor) { + s->vendor = g_strdup("QEMU"); + } ++ if (!s->device_id) { ++ if (s->serial) { ++ s->device_id = g_strdup_printf("%.20s", s->serial); ++ } else { ++ const char *str = blk_name(s->qdev.conf.blk); ++ if (str && *str) { ++ s->device_id = g_strdup(str); ++ } ++ } ++ } + + if (blk_is_sg(s->qdev.conf.blk)) { + error_setg(errp, "unwanted /dev/sg*"); +@@ -2904,7 +2910,9 @@ static const TypeInfo scsi_disk_base_info = { + DEFINE_PROP_STRING("ver", SCSIDiskState, version), \ + DEFINE_PROP_STRING("serial", SCSIDiskState, serial), \ + DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor), \ +- DEFINE_PROP_STRING("product", SCSIDiskState, product) ++ DEFINE_PROP_STRING("product", SCSIDiskState, product), \ ++ DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id) ++ + + static Property scsi_hd_properties[] = { + DEFINE_SCSI_DISK_PROPERTIES(), +-- +1.8.3.1 + diff --git a/kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch b/kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch new file mode 100644 index 0000000..c08ea92 --- /dev/null +++ b/kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch @@ -0,0 +1,71 @@ +From 18d600a76319abe59dc4b5e371e5807c089f9159 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 6 Feb 2019 15:58:28 +0000 +Subject: [PATCH 2/3] scsi-disk: Don't use empty string as device id +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +Message-id: <20190206155829.14641-2-kwolf@redhat.com> +Patchwork-id: 84253 +O-Subject: [RHEL-7.7/8.0-AV qemu-kvm-rhev PATCH 1/2] scsi-disk: Don't use empty string as device id +Bugzilla: 1668248 +RH-Acked-by: Max Reitz +RH-Acked-by: Thomas Huth +RH-Acked-by: Paolo Bonzini + +scsi-disk includes in the Device Identification VPD page, depending on +configuration amongst others, a vendor specific designator that consists +either of the serial number if given or the BlockBackend name (which is +a host detail that better shouldn't have been leaked to the guest, but +now we have to maintain it for compatibility). + +With anonymous BlockBackends, i.e. scsi-disk devices constructed with +drive=, and no serial number explicitly specified, this ends +up as an empty string. If this happens to more than one disk, we have +accidentally signalled to the OS that this is a multipath setup, which +is obviously not what was intended. + +Instead of using an empty string for the vendor specific designator, +simply leave out that designator, which makes Linux detect such setups +as separate disks again. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit a8f58afcdb86e266e06c9dc41a71605e570244c3) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/scsi-disk.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 0e9027c..93eef40 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -652,12 +652,14 @@ static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) + DPRINTF("Inquiry EVPD[Device identification] " + "buffer size %zd\n", req->cmd.xfer); + +- outbuf[buflen++] = 0x2; /* ASCII */ +- outbuf[buflen++] = 0; /* not officially assigned */ +- outbuf[buflen++] = 0; /* reserved */ +- outbuf[buflen++] = id_len; /* length of data following */ +- memcpy(outbuf + buflen, str, id_len); +- buflen += id_len; ++ if (id_len) { ++ outbuf[buflen++] = 0x2; /* ASCII */ ++ outbuf[buflen++] = 0; /* not officially assigned */ ++ outbuf[buflen++] = 0; /* reserved */ ++ outbuf[buflen++] = id_len; /* length of data following */ ++ memcpy(outbuf + buflen, str, id_len); ++ buflen += id_len; ++ } + + if (s->qdev.wwn) { + outbuf[buflen++] = 0x1; /* Binary */ +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 040639c..f08edde 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -174,6 +174,12 @@ Patch44: kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch Patch45: kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch # For bz#1668244 - qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found Patch47: kvm-json-Fix-handling-when-not-interpolating.patch +# For bz#1665896 - VNC unix listener socket is deleted after first client quits +Patch48: kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch +# For bz#1668248 - "An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination) +Patch49: kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch +# For bz#1668248 - "An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination) +Patch50: kvm-scsi-disk-Add-device_id-property.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1021,8 +1027,15 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-12.el8 +* Fri Feb 08 2019 Danilo Cesar Lemes de Paula - 3.1.0-12.el8 - Removing kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] +- kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch [bz#1665896] +- kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch [bz#1668248] +- kvm-scsi-disk-Add-device_id-property.patch [bz#1668248] +- Resolves: bz#1665896 + (VNC unix listener socket is deleted after first client quits) +- Resolves: bz#1668248 + ("An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination)) * Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-11.el8 - kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] From f02b1b6cc15493bbb00383d6a719123206c64cca Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 8 Feb 2019 12:28:39 +0000 Subject: [PATCH 019/195] Removing unused patch This patch has been dropped from the spec file in the latest change, but not removed from git. --- ...x-fsfreeze-hook-path-in-the-man-page.patch | 52 ------------------- 1 file changed, 52 deletions(-) delete mode 100644 kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch diff --git a/kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch b/kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch deleted file mode 100644 index 0f72600..0000000 --- a/kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch +++ /dev/null @@ -1,52 +0,0 @@ -From cd667708de23ea78479cc68dd8ae4c9b429a4ec8 Mon Sep 17 00:00:00 2001 -From: Danilo de Paula -Date: Thu, 31 Jan 2019 00:16:44 +0000 -Subject: [PATCH 1/2] Fix fsfreeze-hook path in the man page. - -RH-Author: Danilo de Paula -Message-id: <20190131001644.6237-2-ddepaula@redhat.com> -Patchwork-id: 84152 -O-Subject: [RHEL8/rhel + RHEL8/AV qemu-kvm PATCH 1/1] Fix fsfreeze-hook path in the man page. -Bugzilla: 1644985 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth - -fsfreeze-hook final PATH is defined in redhat/qemu-kvm.spec.template -since it's being copied in the install section. - -In that way, even with the configuration changes proposed by Marc -in qemu-devel [1], the information would still be different. - -Wanting to avoid having to re-write some part of the building system -including a new option, or use [1] and call ./configure again with --with-confsuffix=/qemu-ga, -I decided to manually edit this directly in the file. - -To play safe, an alternative is to do this in the spec file with something like -sed -i '/\/etc\/qemu\/fsfreeze-hook/,${s//\/etc\/qemu-ga\/fsfreeze-hook//;b};$q1' qemu-ga.texi -In case the original file changes during a rebase, sed would fail and -the problem could be detected. But I believe this would make maintaining this even harder. - -[1] http://lists.gnu.org/archive/html/qemu-devel/2018-11/msg05088.html - -Signed-off-by: Danilo C. L. de Paula ---- - qemu-ga.texi | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/qemu-ga.texi b/qemu-ga.texi -index 4c7a8fd..3105e30 100644 ---- a/qemu-ga.texi -+++ b/qemu-ga.texi -@@ -58,7 +58,7 @@ file format). - Enable fsfreeze hook. Accepts an optional argument that specifies - script to run on freeze/thaw. Script will be called with - 'freeze'/'thaw' arguments accordingly (default is -- @samp{/etc/qemu/fsfreeze-hook}). If using -F with an argument, do -+ @samp{/etc/qemu-ga/fsfreeze-hook}). If using -F with an argument, do - not follow -F with a space (for example: - @samp{-F/var/run/fsfreezehook.sh}). - --- -1.8.3.1 - From b705db35b769f63d0583be2dcd0943047182c8db Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 11 Feb 2019 16:09:01 +0000 Subject: [PATCH 020/195] * Mon Feb 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-13.el8 - kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch [bz#1669922] - kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1671519] - kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch [bz#1671519] - kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch [bz#1653590] - kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch [bz#1673014] - kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch [bz#1656276 bz#1662508] - kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch [bz#1656276 bz#1662508] - kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch [bz#1656276 bz#1662508] - Resolves: bz#1653590 ([Fast train]had better stop qemu immediately while guest was making use of an improper page size) - Resolves: bz#1656276 (qemu-kvm core dumped after hotplug the deleted disk with iothread parameter) - Resolves: bz#1662508 (Qemu core dump when start guest with two disks using same drive) - Resolves: bz#1669922 (Backport avocado-qemu tests for QEMU 3.1) - Resolves: bz#1671519 (RHEL8.0 Snapshot3 - qemu doesn't free up hugepage memory when hotplug/hotunplug using memory-backend-file (qemu-kvm)) - Resolves: bz#1673014 (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) --- ...tests-add-Linux-initrd-checking-test.patch | 105 ++++++++++ ...e-at-least-64kiB-pages-for-downstrea.patch | 64 ++++++ ...date_cache-error-path-for-parent-act.patch | 69 +++++++ ...hugetlbfs-misaligned-length-in-ppc64.patch | 177 +++++++++++++++++ kvm-mmap-alloc-unfold-qemu_ram_mmap.patch | 138 +++++++++++++ ...ire-the-AioContext-in-scsi_-_realize.patch | 187 ++++++++++++++++++ ...id-devices-with-different-iothreads-.patch | 116 +++++++++++ ...-BlockBackend-back-to-the-main-AioCo.patch | 186 +++++++++++++++++ qemu-kvm.spec | 43 +++- 9 files changed, 1084 insertions(+), 1 deletion(-) create mode 100644 kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch create mode 100644 kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch create mode 100644 kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch create mode 100644 kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch create mode 100644 kvm-mmap-alloc-unfold-qemu_ram_mmap.patch create mode 100644 kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch create mode 100644 kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch create mode 100644 kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch diff --git a/kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch b/kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch new file mode 100644 index 0000000..99f762c --- /dev/null +++ b/kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch @@ -0,0 +1,105 @@ +From ba8f254594f7033183cda79b47e2c7a5c1f5bcfe Mon Sep 17 00:00:00 2001 +From: Yash Mankad +Date: Mon, 4 Feb 2019 19:14:22 +0000 +Subject: [PATCH 1/8] Acceptance tests: add Linux initrd checking test +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Yash Mankad +Message-id: +Patchwork-id: 84209 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] Acceptance tests: add Linux initrd checking test +Bugzilla: 1669922 +RH-Acked-by: Wainer dos Santos Moschetta +RH-Acked-by: Thomas Huth +RH-Acked-by: Laszlo Ersek + +From: Wainer dos Santos Moschetta + +QEMU used to exits with a not accurate error message when +an initrd > 2GiB was passed. That was fixed on patch: + + commit f3839fda5771596152b75dd1e1a6d050e6e6e380 + Author: Li Zhijian + Date: Thu Sep 13 18:07:13 2018 +0800 + + change get_image_size return type to int64_t + +This change adds a regression test for that fix. It starts +QEMU with a 2GiB dummy initrd, and checks that it evaluates the +file size correctly and prints an accurate message. + +Signed-off-by: Wainer dos Santos Moschetta +Reviewed-by: Caio Carrara +Reviewed-by: Cleber Rosa +Reviewed-by: Eduardo Habkost +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Message-Id: <20181109182153.5390-1-wainersm@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 82d4c923a580751c86dc0852a7cc8e369a78e8ad) +Signed-off-by: Yash Mankad +Signed-off-by: Danilo C. L. de Paula +--- + tests/acceptance/linux_initrd.py | 48 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 48 insertions(+) + create mode 100644 tests/acceptance/linux_initrd.py + +diff --git a/tests/acceptance/linux_initrd.py b/tests/acceptance/linux_initrd.py +new file mode 100644 +index 0000000..737355c +--- /dev/null ++++ b/tests/acceptance/linux_initrd.py +@@ -0,0 +1,48 @@ ++# Linux initrd acceptance test. ++# ++# Copyright (c) 2018 Red Hat, Inc. ++# ++# Author: ++# Wainer dos Santos Moschetta ++# ++# This work is licensed under the terms of the GNU GPL, version 2 or ++# later. See the COPYING file in the top-level directory. ++ ++import tempfile ++from avocado.utils.process import run ++ ++from avocado_qemu import Test ++ ++ ++class LinuxInitrd(Test): ++ """ ++ Checks QEMU evaluates correctly the initrd file passed as -initrd option. ++ ++ :avocado: enable ++ :avocado: tags=x86_64 ++ """ ++ ++ timeout = 60 ++ ++ def test_with_2gib_file_should_exit_error_msg(self): ++ """ ++ Pretends to boot QEMU with an initrd file with size of 2GiB ++ and expect it exits with error message. ++ """ ++ kernel_url = ('https://mirrors.kernel.org/fedora/releases/28/' ++ 'Everything/x86_64/os/images/pxeboot/vmlinuz') ++ kernel_hash = '238e083e114c48200f80d889f7e32eeb2793e02a' ++ kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) ++ max_size = 2 * (1024 ** 3) - 1 ++ ++ with tempfile.NamedTemporaryFile() as initrd: ++ initrd.seek(max_size) ++ initrd.write(b'\0') ++ initrd.flush() ++ cmd = "%s -kernel %s -initrd %s" % (self.qemu_bin, kernel_path, ++ initrd.name) ++ res = run(cmd, ignore_status=True) ++ self.assertEqual(res.exit_status, 1) ++ expected_msg = r'.*initrd is too large.*max: \d+, need %s.*' % ( ++ max_size + 1) ++ self.assertRegex(res.stderr_text, expected_msg) +-- +1.8.3.1 + diff --git a/kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..47fa62c --- /dev/null +++ b/kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,64 @@ +From b5ede4ceec64a7b812f40457771948420cd3780f Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: [PATCH 4/8] BZ1653590: Require at least 64kiB pages for downstream + guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1653590 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=20089737 +Branch: rhel8/master-3.1.0 +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index 64f98ae..ace7325 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -325,12 +325,19 @@ void spapr_check_pagesize(sPAPRMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(sPAPRMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_getrampagesize(), errp); + } +-- +1.8.3.1 + diff --git a/kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch b/kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch new file mode 100644 index 0000000..357b528 --- /dev/null +++ b/kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch @@ -0,0 +1,69 @@ +From d6445c856c6199938eccbd73721c0c8257604557 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 6 Feb 2019 15:13:14 +0000 +Subject: [PATCH 5/8] block: Fix invalidate_cache error path for parent + activation + +RH-Author: Kevin Wolf +Message-id: <20190206151314.4789-2-kwolf@redhat.com> +Patchwork-id: 84251 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] block: Fix invalidate_cache error path for parent activation +Bugzilla: 1673014 +RH-Acked-by: John Snow +RH-Acked-by: Markus Armbruster +RH-Acked-by: Max Reitz + +bdrv_co_invalidate_cache() clears the BDRV_O_INACTIVE flag before +actually activating a node so that the correct permissions etc. are +taken. In case of errors, the flag must be restored so that the next +call to bdrv_co_invalidate_cache() retries activation. + +Restoring the flag was missing in the error path for a failed +parent->role->activate() call. The consequence is that this attempt to +activate all images correctly fails because we still set errp, however +on the next attempt BDRV_O_INACTIVE is already clear, so we return +success without actually retrying the failed action. + +An example where this is observable in practice is migration to a QEMU +instance that has a raw format block node attached to a guest device +with share-rw=off (the default) while another process holds +BLK_PERM_WRITE for the same image. In this case, all activation steps +before parent->role->activate() succeed because raw can tolerate other +writers to the image. Only the parent callback (in particular +blk_root_activate()) tries to implement the share-rw=on property and +requests exclusive write permissions. This fails when the migration +completes and correctly displays an error. However, a manual 'cont' will +incorrectly resume the VM without calling blk_root_activate() again. + +This case is described in more detail in the following bug report: +https://bugzilla.redhat.com/show_bug.cgi?id=1531888 + +Fix this by correctly restoring the BDRV_O_INACTIVE flag in the error +path. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Tested-by: Markus Armbruster +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 78fc3b3a26c145eebcdee992988644974b243a74) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block.c b/block.c +index 811239c..1ec4512 100644 +--- a/block.c ++++ b/block.c +@@ -4553,6 +4553,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, + if (parent->role->activate) { + parent->role->activate(parent, &local_err); + if (local_err) { ++ bs->open_flags |= BDRV_O_INACTIVE; + error_propagate(errp, local_err); + return; + } +-- +1.8.3.1 + diff --git a/kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch b/kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch new file mode 100644 index 0000000..7bf4e7f --- /dev/null +++ b/kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch @@ -0,0 +1,177 @@ +From 1a283b8cdd349b9085488a516f26f453c8591ce2 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 5 Feb 2019 04:47:57 +0000 +Subject: [PATCH 3/8] mmap-alloc: fix hugetlbfs misaligned length in ppc64 + +RH-Author: David Gibson +Message-id: <20190205044757.13591-3-dgibson@redhat.com> +Patchwork-id: 84233 +O-Subject: [RHELAV-8.0 qemu-kvm PATCH 2/2] mmap-alloc: fix hugetlbfs misaligned length in ppc64 +Bugzilla: 1671519 +RH-Acked-by: Pankaj Gupta +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Murilo Opsfelder Araujo + +The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc: +fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64. + +However, we still need to consider the underlying huge page size +during munmap() because it requires that both address and length be a +multiple of the underlying huge page size for Huge TLB mappings. +Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES +section of the munmap(2) manual: + + "For munmap(), addr and length must both be a multiple of the + underlying huge page size." + +On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB +mappings because the mapped segment can be aligned with the underlying +huge page size, not aligned with the native system page size, as +returned by getpagesize(). + +This has the side effect of not releasing huge pages back to the pool +after a hugetlbfs file-backed memory device is hot-unplugged. + +This patch fixes the situation in qemu_ram_mmap() and +qemu_ram_munmap() by considering the underlying page size on ppc64. + +After this patch, memory hot-unplug releases huge pages back to the +pool. + +Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218 +Signed-off-by: Murilo Opsfelder Araujo +Reviewed-by: Greg Kurz +Signed-off-by: David Gibson +(cherry picked from commit 53adb9d43e1abba187387a51f238e878e934c647) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1671519 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + exec.c | 4 ++-- + include/qemu/mmap-alloc.h | 2 +- + util/mmap-alloc.c | 22 ++++++++++++++++------ + util/oslib-posix.c | 2 +- + 4 files changed, 20 insertions(+), 10 deletions(-) + +diff --git a/exec.c b/exec.c +index bb6170d..38eaf0f 100644 +--- a/exec.c ++++ b/exec.c +@@ -1870,7 +1870,7 @@ static void *file_ram_alloc(RAMBlock *block, + if (mem_prealloc) { + os_mem_prealloc(fd, area, memory, smp_cpus, errp); + if (errp && *errp) { +- qemu_ram_munmap(area, memory); ++ qemu_ram_munmap(fd, area, memory); + return NULL; + } + } +@@ -2391,7 +2391,7 @@ static void reclaim_ramblock(RAMBlock *block) + xen_invalidate_map_cache_entry(block->host); + #ifndef _WIN32 + } else if (block->fd >= 0) { +- qemu_ram_munmap(block->host, block->max_length); ++ qemu_ram_munmap(block->fd, block->host, block->max_length); + close(block->fd); + #endif + } else { +diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h +index 50385e3..ef04f0e 100644 +--- a/include/qemu/mmap-alloc.h ++++ b/include/qemu/mmap-alloc.h +@@ -9,6 +9,6 @@ size_t qemu_mempath_getpagesize(const char *mem_path); + + void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); + +-void qemu_ram_munmap(void *ptr, size_t size); ++void qemu_ram_munmap(int fd, void *ptr, size_t size); + + #endif +diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c +index f71ea03..8565885 100644 +--- a/util/mmap-alloc.c ++++ b/util/mmap-alloc.c +@@ -80,6 +80,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) + int flags; + int guardfd; + size_t offset; ++ size_t pagesize; + size_t total; + void *guardptr; + void *ptr; +@@ -100,7 +101,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) + * anonymous memory is OK. + */ + flags = MAP_PRIVATE; +- if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) { ++ pagesize = qemu_fd_getpagesize(fd); ++ if (fd == -1 || pagesize == getpagesize()) { + guardfd = -1; + flags |= MAP_ANONYMOUS; + } else { +@@ -109,6 +111,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) + } + #else + guardfd = -1; ++ pagesize = getpagesize(); + flags = MAP_PRIVATE | MAP_ANONYMOUS; + #endif + +@@ -120,7 +123,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) + + assert(is_power_of_2(align)); + /* Always align to host page size */ +- assert(align >= getpagesize()); ++ assert(align >= pagesize); + + flags = MAP_FIXED; + flags |= fd == -1 ? MAP_ANONYMOUS : 0; +@@ -143,17 +146,24 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) + * a guard page guarding against potential buffer overflows. + */ + total -= offset; +- if (total > size + getpagesize()) { +- munmap(ptr + size + getpagesize(), total - size - getpagesize()); ++ if (total > size + pagesize) { ++ munmap(ptr + size + pagesize, total - size - pagesize); + } + + return ptr; + } + +-void qemu_ram_munmap(void *ptr, size_t size) ++void qemu_ram_munmap(int fd, void *ptr, size_t size) + { ++ size_t pagesize; ++ + if (ptr) { + /* Unmap both the RAM block and the guard page */ +- munmap(ptr, size + getpagesize()); ++#if defined(__powerpc64__) && defined(__linux__) ++ pagesize = qemu_fd_getpagesize(fd); ++#else ++ pagesize = getpagesize(); ++#endif ++ munmap(ptr, size + pagesize); + } + } +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index c1bee2a..97b2f3b 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -226,7 +226,7 @@ void qemu_vfree(void *ptr) + void qemu_anon_ram_free(void *ptr, size_t size) + { + trace_qemu_anon_ram_free(ptr, size); +- qemu_ram_munmap(ptr, size); ++ qemu_ram_munmap(-1, ptr, size); + } + + void qemu_set_block(int fd) +-- +1.8.3.1 + diff --git a/kvm-mmap-alloc-unfold-qemu_ram_mmap.patch b/kvm-mmap-alloc-unfold-qemu_ram_mmap.patch new file mode 100644 index 0000000..3a6dd86 --- /dev/null +++ b/kvm-mmap-alloc-unfold-qemu_ram_mmap.patch @@ -0,0 +1,138 @@ +From 2215d38d1231284cced64d94b4430e92c9e2c017 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 5 Feb 2019 04:47:56 +0000 +Subject: [PATCH 2/8] mmap-alloc: unfold qemu_ram_mmap() + +RH-Author: David Gibson +Message-id: <20190205044757.13591-2-dgibson@redhat.com> +Patchwork-id: 84234 +O-Subject: [RHELAV-8.0 qemu-kvm PATCH 1/2] mmap-alloc: unfold qemu_ram_mmap() +Bugzilla: 1671519 +RH-Acked-by: Pankaj Gupta +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych + +From: Murilo Opsfelder Araujo + +Unfold parts of qemu_ram_mmap() for the sake of understanding, moving +declarations to the top, and keeping architecture-specifics in the +ifdef-else blocks. No changes in the function behaviour. + +Give ptr and ptr1 meaningful names: + ptr -> guardptr : pointer to the PROT_NONE guard region + ptr1 -> ptr : pointer to the mapped memory returned to caller + +Signed-off-by: Murilo Opsfelder Araujo +Reviewed-by: Greg Kurz +Signed-off-by: David Gibson +(cherry picked from commit 2044c3e7116eeac0449dcb4a4130cc8f8b9310da) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1671519 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + util/mmap-alloc.c | 53 ++++++++++++++++++++++++++++++++++------------------- + 1 file changed, 34 insertions(+), 19 deletions(-) + +diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c +index fd329ec..f71ea03 100644 +--- a/util/mmap-alloc.c ++++ b/util/mmap-alloc.c +@@ -77,11 +77,19 @@ size_t qemu_mempath_getpagesize(const char *mem_path) + + void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) + { ++ int flags; ++ int guardfd; ++ size_t offset; ++ size_t total; ++ void *guardptr; ++ void *ptr; ++ + /* + * Note: this always allocates at least one extra page of virtual address + * space, even if size is already aligned. + */ +- size_t total = size + align; ++ total = size + align; ++ + #if defined(__powerpc64__) && defined(__linux__) + /* On ppc64 mappings in the same segment (aka slice) must share the same + * page size. Since we will be re-allocating part of this segment +@@ -91,16 +99,22 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) + * We do this unless we are using the system page size, in which case + * anonymous memory is OK. + */ +- int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd; +- int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE; +- void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0); ++ flags = MAP_PRIVATE; ++ if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) { ++ guardfd = -1; ++ flags |= MAP_ANONYMOUS; ++ } else { ++ guardfd = fd; ++ flags |= MAP_NORESERVE; ++ } + #else +- void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); ++ guardfd = -1; ++ flags = MAP_PRIVATE | MAP_ANONYMOUS; + #endif +- size_t offset; +- void *ptr1; + +- if (ptr == MAP_FAILED) { ++ guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0); ++ ++ if (guardptr == MAP_FAILED) { + return MAP_FAILED; + } + +@@ -108,19 +122,20 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) + /* Always align to host page size */ + assert(align >= getpagesize()); + +- offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; +- ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, +- MAP_FIXED | +- (fd == -1 ? MAP_ANONYMOUS : 0) | +- (shared ? MAP_SHARED : MAP_PRIVATE), +- fd, 0); +- if (ptr1 == MAP_FAILED) { +- munmap(ptr, total); ++ flags = MAP_FIXED; ++ flags |= fd == -1 ? MAP_ANONYMOUS : 0; ++ flags |= shared ? MAP_SHARED : MAP_PRIVATE; ++ offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr; ++ ++ ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, flags, fd, 0); ++ ++ if (ptr == MAP_FAILED) { ++ munmap(guardptr, total); + return MAP_FAILED; + } + + if (offset > 0) { +- munmap(ptr, offset); ++ munmap(guardptr, offset); + } + + /* +@@ -129,10 +144,10 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) + */ + total -= offset; + if (total > size + getpagesize()) { +- munmap(ptr1 + size + getpagesize(), total - size - getpagesize()); ++ munmap(ptr + size + getpagesize(), total - size - getpagesize()); + } + +- return ptr1; ++ return ptr; + } + + void qemu_ram_munmap(void *ptr, size_t size) +-- +1.8.3.1 + diff --git a/kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch b/kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch new file mode 100644 index 0000000..da2e60b --- /dev/null +++ b/kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch @@ -0,0 +1,187 @@ +From acf3927aef42a4a380fa1366b2eca8a8f2b44bc4 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Thu, 7 Feb 2019 12:18:18 +0000 +Subject: [PATCH 7/8] scsi-disk: Acquire the AioContext in scsi_*_realize() + +RH-Author: Markus Armbruster +Message-id: <20190207121819.20092-3-armbru@redhat.com> +Patchwork-id: 84291 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 2/3] scsi-disk: Acquire the AioContext in scsi_*_realize() +Bugzilla: 1656276 1662508 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +From: Alberto Garcia + +This fixes a crash when attaching two disks with the same blockdev to +a SCSI device that is using iothreads. Test case included. + +Signed-off-by: Alberto Garcia +Signed-off-by: Kevin Wolf +(cherry picked from commit 3ff35ba391134e4e43ab96152deb38a62e62f858) +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/scsi-disk.c | 23 ++++++++++++++++++++--- + tests/qemu-iotests/240 | 18 ++++++++++++++++++ + tests/qemu-iotests/240.out | 16 ++++++++++++++++ + 3 files changed, 54 insertions(+), 3 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index e74e1e7..e6db6d7 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2389,10 +2389,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) + static void scsi_hd_realize(SCSIDevice *dev, Error **errp) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); ++ AioContext *ctx = NULL; + /* can happen for devices without drive. The error message for missing + * backend will be issued in scsi_realize + */ + if (s->qdev.conf.blk) { ++ ctx = blk_get_aio_context(s->qdev.conf.blk); ++ aio_context_acquire(ctx); + blkconf_blocksizes(&s->qdev.conf); + } + s->qdev.blocksize = s->qdev.conf.logical_block_size; +@@ -2401,11 +2404,15 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) + s->product = g_strdup("QEMU HARDDISK"); + } + scsi_realize(&s->qdev, errp); ++ if (ctx) { ++ aio_context_release(ctx); ++ } + } + + static void scsi_cd_realize(SCSIDevice *dev, Error **errp) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); ++ AioContext *ctx; + int ret; + + if (!dev->conf.blk) { +@@ -2416,6 +2423,8 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) + assert(ret == 0); + } + ++ ctx = blk_get_aio_context(dev->conf.blk); ++ aio_context_acquire(ctx); + s->qdev.blocksize = 2048; + s->qdev.type = TYPE_ROM; + s->features |= 1 << SCSI_DISK_F_REMOVABLE; +@@ -2423,6 +2432,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) + s->product = g_strdup("QEMU CD-ROM"); + } + scsi_realize(&s->qdev, errp); ++ aio_context_release(ctx); + } + + static void scsi_disk_realize(SCSIDevice *dev, Error **errp) +@@ -2561,6 +2571,7 @@ static int get_device_type(SCSIDiskState *s) + static void scsi_block_realize(SCSIDevice *dev, Error **errp) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); ++ AioContext *ctx; + int sg_version; + int rc; + +@@ -2575,6 +2586,9 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) + "be removed in a future version"); + } + ++ ctx = blk_get_aio_context(s->qdev.conf.blk); ++ aio_context_acquire(ctx); ++ + /* check we are using a driver managing SG_IO (version 3 and after) */ + rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version); + if (rc < 0) { +@@ -2582,18 +2596,18 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) + if (rc != -EPERM) { + error_append_hint(errp, "Is this a SCSI device?\n"); + } +- return; ++ goto out; + } + if (sg_version < 30000) { + error_setg(errp, "scsi generic interface too old"); +- return; ++ goto out; + } + + /* get device type from INQUIRY data */ + rc = get_device_type(s); + if (rc < 0) { + error_setg(errp, "INQUIRY failed"); +- return; ++ goto out; + } + + /* Make a guess for the block size, we'll fix it when the guest sends. +@@ -2613,6 +2627,9 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) + + scsi_realize(&s->qdev, errp); + scsi_generic_read_device_inquiry(&s->qdev); ++ ++out: ++ aio_context_release(ctx); + } + + typedef struct SCSIBlockReq { +diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 +index ead7ee0..5d499c9 100755 +--- a/tests/qemu-iotests/240 ++++ b/tests/qemu-iotests/240 +@@ -83,6 +83,24 @@ run_qemu < +Date: Thu, 7 Feb 2019 12:18:19 +0000 +Subject: [PATCH 8/8] virtio-scsi: Forbid devices with different iothreads + sharing a blockdev + +RH-Author: Markus Armbruster +Message-id: <20190207121819.20092-4-armbru@redhat.com> +Patchwork-id: 84289 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 3/3] virtio-scsi: Forbid devices with different iothreads sharing a blockdev +Bugzilla: 1656276 1662508 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +From: Alberto Garcia + +This patch forbids attaching a disk to a SCSI device if its using a +different AioContext. Test case included. + +Signed-off-by: Alberto Garcia +Signed-off-by: Kevin Wolf +(cherry picked from commit eb97813ff5fd5bdffc8ed9f5be5a3a50eae70a2c) +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 7 +++++++ + tests/qemu-iotests/240 | 22 ++++++++++++++++++++++ + tests/qemu-iotests/240.out | 20 ++++++++++++++++++++ + 3 files changed, 49 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index f0d4f10..66df30d 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -800,9 +800,16 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + if (s->ctx && !s->dataplane_fenced) { ++ AioContext *ctx; + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; + } ++ ctx = blk_get_aio_context(sd->conf.blk); ++ if (ctx != s->ctx && ctx != qemu_get_aio_context()) { ++ error_setg(errp, "Cannot attach a blockdev that is using " ++ "a different iothread"); ++ return; ++ } + virtio_scsi_acquire(s); + blk_set_aio_context(sd->conf.blk, s->ctx); + virtio_scsi_release(s); +diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 +index 5d499c9..65cc3b3 100755 +--- a/tests/qemu-iotests/240 ++++ b/tests/qemu-iotests/240 +@@ -101,6 +101,28 @@ run_qemu < +Date: Thu, 7 Feb 2019 12:18:17 +0000 +Subject: [PATCH 6/8] virtio-scsi: Move BlockBackend back to the main + AioContext on unplug + +RH-Author: Markus Armbruster +Message-id: <20190207121819.20092-2-armbru@redhat.com> +Patchwork-id: 84290 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/3] virtio-scsi: Move BlockBackend back to the main AioContext on unplug +Bugzilla: 1656276 1662508 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +From: Alberto Garcia + +This fixes a crash when attaching a disk to a SCSI device using +iothreads, then detaching it and reattaching it again. Test case +included. + +Signed-off-by: Alberto Garcia +Signed-off-by: Kevin Wolf +(cherry picked from commit a6f230c8d13a7ff3a0c7f1097412f44bfd9eff0b) +[Trivial conflict in tests/qemu-iotests/group due to lack of commit +1c4e7b640b resolved] +Signed-off-by: Markus Armbruster + +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 6 ++++ + tests/qemu-iotests/240 | 89 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/240.out | 18 ++++++++++ + tests/qemu-iotests/group | 1 + + 4 files changed, 114 insertions(+) + create mode 100755 tests/qemu-iotests/240 + create mode 100644 tests/qemu-iotests/240.out + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 9f754c4..f0d4f10 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -833,6 +833,12 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, + virtio_scsi_release(s); + } + ++ if (s->ctx) { ++ virtio_scsi_acquire(s); ++ blk_set_aio_context(sd->conf.blk, qemu_get_aio_context()); ++ virtio_scsi_release(s); ++ } ++ + qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); + } + +diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 +new file mode 100755 +index 0000000..ead7ee0 +--- /dev/null ++++ b/tests/qemu-iotests/240 +@@ -0,0 +1,89 @@ ++#!/bin/bash ++# ++# Test hot plugging and unplugging with iothreads ++# ++# Copyright (C) 2019 Igalia, S.L. ++# Author: Alberto Garcia ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=berto@igalia.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt generic ++_supported_proto generic ++_supported_os Linux ++ ++do_run_qemu() ++{ ++ echo Testing: "$@" ++ $QEMU -nographic -qmp stdio -serial none "$@" ++ echo ++} ++ ++# Remove QMP events from (pretty-printed) output. Doesn't handle ++# nested dicts correctly, but we don't get any of those in this test. ++_filter_qmp_events() ++{ ++ tr '\n' '\t' | sed -e \ ++ 's/{\s*"timestamp":\s*{[^}]*},\s*"event":[^,}]*\(,\s*"data":\s*{[^}]*}\)\?\s*}\s*//g' \ ++ | tr '\t' '\n' ++} ++ ++run_qemu() ++{ ++ do_run_qemu "$@" 2>&1 | _filter_qmp | _filter_qmp_events ++} ++ ++case "$QEMU_DEFAULT_MACHINE" in ++ s390-ccw-virtio) ++ virtio_scsi=virtio-scsi-ccw ++ ;; ++ *) ++ virtio_scsi=virtio-scsi-pci ++ ;; ++esac ++ ++echo ++echo === Unplug a SCSI disk and then plug it again === ++echo ++ ++run_qemu < - 3.1.0-13.el8 +- kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch [bz#1669922] +- kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1671519] +- kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch [bz#1671519] +- kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch [bz#1653590] +- kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch [bz#1673014] +- kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch [bz#1656276 bz#1662508] +- kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch [bz#1656276 bz#1662508] +- kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch [bz#1656276 bz#1662508] +- Resolves: bz#1653590 + ([Fast train]had better stop qemu immediately while guest was making use of an improper page size) +- Resolves: bz#1656276 + (qemu-kvm core dumped after hotplug the deleted disk with iothread parameter) +- Resolves: bz#1662508 + (Qemu core dump when start guest with two disks using same drive) +- Resolves: bz#1669922 + (Backport avocado-qemu tests for QEMU 3.1) +- Resolves: bz#1671519 + (RHEL8.0 Snapshot3 - qemu doesn't free up hugepage memory when hotplug/hotunplug using memory-backend-file (qemu-kvm)) +- Resolves: bz#1673014 + (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) + * Fri Feb 08 2019 Danilo Cesar Lemes de Paula - 3.1.0-12.el8 - Removing kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] - kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch [bz#1665896] From be7cf91b151212a7510d43564abec957144739d3 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 12 Feb 2019 14:09:04 +0000 Subject: [PATCH 021/195] * Tue Feb 12 2019 Danilo Cesar Lemes de Paula - 3.1.0-14.el8 - kvm-doc-fix-the-configuration-path.patch [bz#1644985] - Resolves: bz#1644985 (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) --- kvm-doc-fix-the-configuration-path.patch | 80 ++++++++++++++++++++++++ qemu-kvm.spec | 9 ++- 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 kvm-doc-fix-the-configuration-path.patch diff --git a/kvm-doc-fix-the-configuration-path.patch b/kvm-doc-fix-the-configuration-path.patch new file mode 100644 index 0000000..dc729c1 --- /dev/null +++ b/kvm-doc-fix-the-configuration-path.patch @@ -0,0 +1,80 @@ +From a02908747e85b4064da8acabe724f9202ab17fba Mon Sep 17 00:00:00 2001 +From: Danilo de Paula +Date: Fri, 8 Feb 2019 11:51:14 +0000 +Subject: [PATCH] doc: fix the configuration path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Danilo de Paula +Message-id: <20190208115114.24850-2-ddepaula@redhat.com> +Patchwork-id: 84320 +O-Subject: [RHEL8/rhel qemu-kvm PATCH v2 1/1] doc: fix the configuration path +Bugzilla: 1644985 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Wainer dos Santos Moschetta + +From: Marc-André Lureau + +Use a CONFDIR variable to show the configured sysconf path in the +generated documentations (html, man pages etc). + +Related to: +https://bugzilla.redhat.com/show_bug.cgi?id=1644985 + +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + Makefile | 9 ++++++--- + qemu-ga.texi | 4 ++-- + 2 files changed, 8 insertions(+), 5 deletions(-) + +diff --git a/Makefile b/Makefile +index 8710720..24e9ef0 100644 +--- a/Makefile ++++ b/Makefile +@@ -937,11 +937,14 @@ ui/shader.o: $(SRC_PATH)/ui/shader.c \ + MAKEINFO=makeinfo + MAKEINFOINCLUDES= -I docs -I $( $@,"GEN","$@") ++docs/version.texi: $(SRC_PATH)/VERSION config-host.mak ++ $(call quiet-command,(\ ++ echo "@set VERSION $(VERSION)" && \ ++ echo "@set CONFDIR $(qemu_confdir)" \ ++ )> $@,"GEN","$@") + + %.html: %.texi docs/version.texi + $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \ +diff --git a/qemu-ga.texi b/qemu-ga.texi +index 4c7a8fd..f00ad83 100644 +--- a/qemu-ga.texi ++++ b/qemu-ga.texi +@@ -30,7 +30,7 @@ set user's password + @end itemize + + qemu-ga will read a system configuration file on startup (located at +-@file{/etc/qemu/qemu-ga.conf} by default), then parse remaining ++@file{@value{CONFDIR}/qemu-ga.conf} by default), then parse remaining + configuration options on the command line. For the same key, the last + option wins, but the lists accumulate (see below for configuration + file format). +@@ -58,7 +58,7 @@ file format). + Enable fsfreeze hook. Accepts an optional argument that specifies + script to run on freeze/thaw. Script will be called with + 'freeze'/'thaw' arguments accordingly (default is +- @samp{/etc/qemu/fsfreeze-hook}). If using -F with an argument, do ++ @samp{@value{CONFDIR}/fsfreeze-hook}). If using -F with an argument, do + not follow -F with a space (for example: + @samp{-F/var/run/fsfreezehook.sh}). + +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 73b1c92..36163fe 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 13%{?dist} +Release: 14%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -199,6 +199,8 @@ Patch57: kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch # For bz#1656276 - qemu-kvm core dumped after hotplug the deleted disk with iothread parameter # For bz#1662508 - Qemu core dump when start guest with two disks using same drive Patch58: kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch +# For bz#1644985 - The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train +Patch59: kvm-doc-fix-the-configuration-path.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1046,6 +1048,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Feb 12 2019 Danilo Cesar Lemes de Paula - 3.1.0-14.el8 +- kvm-doc-fix-the-configuration-path.patch [bz#1644985] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) + * Mon Feb 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-13.el8 - kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch [bz#1669922] - kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1671519] From 876f46e0cfbdad116778be370eeb5901d26054e1 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 13 Feb 2019 21:25:57 +0000 Subject: [PATCH 022/195] * Wed Feb 13 2019 Danilo Cesar Lemes de Paula - 3.1.0-15.el8 - kvm-Add-raw-qcow2-nbd-and-luks-iotests-to-run-during-the.patch [bz#1664855] - kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1669924] - Resolves: bz#1664855 (Run iotests in qemu-kvm build %check phase) - Resolves: bz#1669924 (qemu-kvm packaging: Package the avocado_qemu tests and qemu-iotests in a new rpm) --- README.tests | 39 +++++++++++++++++++++++++++++++++++ qemu-kvm.spec | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 README.tests diff --git a/README.tests b/README.tests new file mode 100644 index 0000000..9932773 --- /dev/null +++ b/README.tests @@ -0,0 +1,39 @@ +qemu-kvm-tests README +===================== + +The qemu-kvm-tests rpm contains tests that can be used to verify the +functionality of the installed qemu-kvm package + +When installed, the files from this rpm will be arranged in the following +directory structure + +tests-src/ +├── README +├── scripts +│   ├── qemu.py +│   └── qmp +└── tests + ├── acceptance + ├── Makefile.include + └── qemu-iotests + +The tests/ directory within the tests-src/ directory is setup to remain a copy +of a subset of the tests/ directory from the QEMU source tree + +The avocado_qemu tests and qemu-iotests, along with files required for the +execution of the avocado_qemu tests (scripts/qemu.py and scripts/qmp/) will be +installed in a new location - /usr/lib64/qemu-kvm/tests-src/ + +avocado_qemu tests: +The avocado_qemu tests can be executed by running the following avocado command: +avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/ +Avocado needs to be installed separately using either pip or from source as +Avocado is not being packaged for RHEL-8. + +qemu-iotests: +symlinks to corresponding binaries need to be created for QEMU_PROG, +QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be +executed. + +The primary purpose of this package is to make these tests available to be +executed as gating tests for the virt module in the RHEL-8 OSCI environment. diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 36163fe..f0e6a60 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 14%{?dist} +Release: 15%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -102,6 +102,7 @@ Source32: qemu-pr-helper.service Source33: qemu-pr-helper.socket Source34: 81-kvm-rhel.rules Source35: udev-kvm-check.c +Source36: README.tests Patch0004: 0004-Initial-redhat-build.patch @@ -406,6 +407,18 @@ with the host over a virtio-serial channel named "org.qemu.guest_agent.0" This package does not need to be installed on the host OS. +%package tests +Summary: tests for the qemu-kvm package +Requires: %{name} = %{epoch}:%{version}-%{release} + +%define testsdir %{_libdir}/%{name}/tests-src + +%description tests +The qemu-kvm-tests rpm contains tests that can be used to verify +the functionality of the installed qemu-kvm package + +Install this package if you want access to the avocado_qemu +tests, or qemu-iotests. %package block-curl Summary: QEMU CURL block driver @@ -684,12 +697,36 @@ mkdir -p $RPM_BUILD_ROOT%{_bindir}/ mkdir -p $RPM_BUILD_ROOT%{_udevrulesdir}/ mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} +# Create new directories and put them all under tests-src +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/ +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/acceptance +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts +mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp + install -p -m 0755 udev-kvm-check $RPM_BUILD_ROOT%{_udevdir} install -p -m 0644 %{SOURCE34} $RPM_BUILD_ROOT%{_udevrulesdir} install -m 0644 scripts/dump-guest-memory.py \ $RPM_BUILD_ROOT%{_datadir}/%{name} +# Install avocado_qemu tests +cp -R tests/acceptance/* $RPM_BUILD_ROOT%{testsdir}/tests/acceptance/ + +# Install qemu.py and qmp/ scripts required to run avocado_qemu tests +install -p -m 0644 scripts/qemu.py $RPM_BUILD_ROOT%{testsdir}/scripts/ +cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp +install -p -m 0755 tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ + +# Install qemu-iotests +cp -R tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +# Avoid ambiguous 'python' interpreter name +find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/python+%{__python3}+' {} \; + +install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README + make DESTDIR=$RPM_BUILD_ROOT \ sharedir="%{_datadir}/%{name}" \ datadir="%{_datadir}/%{name}" \ @@ -870,6 +907,12 @@ chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so %check export DIFF=diff; make check V=1 +pushd tests/qemu-iotests +./check -v -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 045 048 052 063 077 086 101 104 106 120 132 140 143 145 147 150 152 157 159 160 162 170 171 175 181 184 194 205 208 218 221 222 226 227 232 +./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 013 017 018 019 020 021 022 024 025 027 028 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 057 058 060 061 062 063 065 066 068 069 072 073 074 080 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 122 126 127 130 132 133 134 137 138 140 141 142 143 144 145 147 150 151 152 154 156 157 158 159 162 165 170 174 176 177 179 181 184 187 188 189 190 191 194 195 196 198 201 202 203 204 205 206 208 209 214 216 217 218 222 223 226 227 232 +./check -v -luks 001 002 003 004 005 008 009 010 011 012 021 032 033 048 052 140 143 145 157 162 174 181 184 208 218 227 +./check -v -nbd 001 002 003 004 005 008 009 010 011 021 032 033 045 077 094 104 119 123 132 143 145 147 151 152 162 181 184 194 205 208 218 222 +popd %post -n qemu-kvm-core # load kvm modules now, so we can make sure no reboot is needed. @@ -1029,6 +1072,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/qemu-ga %dir %{_localstatedir}/log/qemu-ga +%files tests +%{testsdir} + %files block-curl %{_libdir}/qemu-kvm/block-curl.so @@ -1048,6 +1094,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Wed Feb 13 2019 Danilo Cesar Lemes de Paula - 3.1.0-15.el8 +- kvm-Add-raw-qcow2-nbd-and-luks-iotests-to-run-during-the.patch [bz#1664855] +- kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1669924] +- Resolves: bz#1664855 + (Run iotests in qemu-kvm build %check phase) +- Resolves: bz#1669924 + (qemu-kvm packaging: Package the avocado_qemu tests and qemu-iotests in a new rpm) + * Tue Feb 12 2019 Danilo Cesar Lemes de Paula - 3.1.0-14.el8 - kvm-doc-fix-the-configuration-path.patch [bz#1644985] - Resolves: bz#1644985 From dd7d9e5a432b7b8bd135587dcb643bf7c6706b81 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 25 Feb 2019 18:26:37 +0000 Subject: [PATCH 023/195] * Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 3.1.0-16.el8 - kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch [bz#1664997] - kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch [bz#1664997] - Resolves: bz#1664997 (Restrict floppy device to RHEL-7 machine types) --- ...ppy-controllers-to-RHEL-7-machine-ty.patch | 55 +++++++++++++++++++ ...nstream-disablement-of-device-floppy.patch | 41 ++++++++++++++ qemu-kvm.spec | 12 +++- 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch create mode 100644 kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch diff --git a/kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch b/kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch new file mode 100644 index 0000000..51368ef --- /dev/null +++ b/kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch @@ -0,0 +1,55 @@ +From f869cc062302515f4d031305584386ead0d32714 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Thu, 21 Feb 2019 09:11:01 +0000 +Subject: [PATCH 2/2] fdc: Restrict floppy controllers to RHEL-7 machine types + +RH-Author: Markus Armbruster +Message-id: <20190221091101.31999-3-armbru@redhat.com> +Patchwork-id: 84693 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH v2 2/2] fdc: Restrict floppy controllers to RHEL-7 machine types +Bugzilla: 1664997 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Thomas Huth + +Make floppy controllers' realize() fail except with RHEL-7 machine +types. The "is a RHEL-7 machine type" test is a bit of a hack: it +looks for "-rhel7." in the machine type name. + +Signed-off-by: Markus Armbruster +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/fdc.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 6f19f12..9ece2db 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -42,6 +42,8 @@ + #include "qemu/log.h" + #include "trace.h" + ++#include "hw/boards.h" ++ + /********************************************************/ + /* debug Floppy devices */ + +@@ -2629,6 +2631,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, + int i, j; + static int command_tables_inited = 0; + ++ /* Restricted for Red Hat Enterprise Linux: */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!strstr(mc->name, "-rhel7.")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { + error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); + } +-- +1.8.3.1 + diff --git a/kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch b/kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch new file mode 100644 index 0000000..15baaa9 --- /dev/null +++ b/kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch @@ -0,0 +1,41 @@ +From 20a51f6e5ebc56707554a52e2fb6a61bf6511315 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Thu, 21 Feb 2019 09:11:00 +0000 +Subject: [PATCH 1/2] fdc: Revert downstream disablement of device "floppy" + +RH-Author: Markus Armbruster +Message-id: <20190221091101.31999-2-armbru@redhat.com> +Patchwork-id: 84691 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH v2 1/2] fdc: Revert downstream disablement of device "floppy" +Bugzilla: 1664997 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Thomas Huth + +Board code creates floppy devices on behalf of -drive if=floppy,... +When they got qdevifified, they also became available with -device. +We made it unavailable downstream as per our policy to permit new +devices only when we have a use for them (commit 0533a6ee98f). We now +have a use: we need it to move from -drive to -blockdev. + +Signed-off-by: Markus Armbruster +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/fdc.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 56b7aeb..6f19f12 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -599,7 +599,6 @@ static void floppy_drive_class_init(ObjectClass *klass, void *data) + k->bus_type = TYPE_FLOPPY_BUS; + k->props = floppy_drive_properties; + k->desc = "virtual floppy drive"; +- k->user_creatable = false; /* RH state preserve */ + } + + static const TypeInfo floppy_drive_info = { +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index f0e6a60..9f6ae4a 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 15%{?dist} +Release: 16%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -202,6 +202,10 @@ Patch57: kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch Patch58: kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch # For bz#1644985 - The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train Patch59: kvm-doc-fix-the-configuration-path.patch +# For bz#1664997 - Restrict floppy device to RHEL-7 machine types +Patch60: kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch +# For bz#1664997 - Restrict floppy device to RHEL-7 machine types +Patch61: kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1094,6 +1098,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 3.1.0-16.el8 +- kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch [bz#1664997] +- kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch [bz#1664997] +- Resolves: bz#1664997 + (Restrict floppy device to RHEL-7 machine types) + * Wed Feb 13 2019 Danilo Cesar Lemes de Paula - 3.1.0-15.el8 - kvm-Add-raw-qcow2-nbd-and-luks-iotests-to-run-during-the.patch [bz#1664855] - kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1669924] From dd688447c452c031df35695695721f8a61ba8613 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 26 Feb 2019 12:21:07 +0000 Subject: [PATCH 024/195] * Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-17.el8 - kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch [bz#1678968] - Resolves: bz#1678968 (-blockdev: auto-read-only is ineffective for drivers on read-only whitelist) --- ...o-read-only-for-ro-whitelist-drivers.patch | 66 +++++++++++++++++++ qemu-kvm.spec | 9 ++- 2 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch diff --git a/kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch b/kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch new file mode 100644 index 0000000..50392ca --- /dev/null +++ b/kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch @@ -0,0 +1,66 @@ +From fa7a2c6b323882bb64d0015b842f05d6078bbe48 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 20 Feb 2019 10:37:05 +0000 +Subject: [PATCH] block: Apply auto-read-only for ro-whitelist drivers + +RH-Author: Kevin Wolf +Message-id: <20190220103705.22630-2-kwolf@redhat.com> +Patchwork-id: 84561 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] block: Apply auto-read-only for ro-whitelist drivers +Bugzilla: 1678968 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +If QEMU was configured with a driver in --block-drv-ro-whitelist, trying +to use that driver read-write resulted in an error message even if +auto-read-only=on was set. + +Consider auto-read-only=on for the whitelist checking and use it to +automatically degrade to read-only for block drivers on the read-only +whitelist. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 8be25de64315ef768353eb61f2b2bf6cddc34230) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +diff --git a/block.c b/block.c +index 1ec4512..f1f6924 100644 +--- a/block.c ++++ b/block.c +@@ -1442,13 +1442,19 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, + bs->read_only = !(bs->open_flags & BDRV_O_RDWR); + + if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { +- error_setg(errp, +- !bs->read_only && bdrv_is_whitelisted(drv, true) +- ? "Driver '%s' can only be used for read-only devices" +- : "Driver '%s' is not whitelisted", +- drv->format_name); +- ret = -ENOTSUP; +- goto fail_opts; ++ if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { ++ ret = bdrv_apply_auto_read_only(bs, NULL, NULL); ++ } else { ++ ret = -ENOTSUP; ++ } ++ if (ret < 0) { ++ error_setg(errp, ++ !bs->read_only && bdrv_is_whitelisted(drv, true) ++ ? "Driver '%s' can only be used for read-only devices" ++ : "Driver '%s' is not whitelisted", ++ drv->format_name); ++ goto fail_opts; ++ } + } + + /* bdrv_new() and bdrv_close() make it so */ +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 9f6ae4a..ca5bb95 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 16%{?dist} +Release: 17%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -206,6 +206,8 @@ Patch59: kvm-doc-fix-the-configuration-path.patch Patch60: kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch # For bz#1664997 - Restrict floppy device to RHEL-7 machine types Patch61: kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch +# For bz#1678968 - -blockdev: auto-read-only is ineffective for drivers on read-only whitelist +Patch62: kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1098,6 +1100,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-17.el8 +- kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch [bz#1678968] +- Resolves: bz#1678968 + (-blockdev: auto-read-only is ineffective for drivers on read-only whitelist) + * Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 3.1.0-16.el8 - kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch [bz#1664997] - kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch [bz#1664997] From 5a65f8223271bfc06a666b10261c9da08f3572f1 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 26 Feb 2019 17:50:29 +0000 Subject: [PATCH 025/195] * Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-18.el8 - kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1661030] - kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1661515] - kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch [bz#1661515] - kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1661515] - Resolves: bz#1661030 (Remove MPX support from 8.0 machine types) - Resolves: bz#1661515 (Remove PCONFIG and INTEL_PT from Icelake-* CPU models) --- ...evert-i386-Add-CPUID-bit-for-PCONFIG.patch | 57 +++++++ ...INTEL_PT-CPUID-bit-from-named-CPU-mo.patch | 64 ++++++++ ...new-CPUID-PCONFIG-from-Icelake-Serve.patch | 48 ++++++ ...able-MPX-support-on-named-CPU-models.patch | 153 ++++++++++++++++++ qemu-kvm.spec | 20 ++- 5 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch create mode 100644 kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch create mode 100644 kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch create mode 100644 kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch diff --git a/kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch b/kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch new file mode 100644 index 0000000..6e0906c --- /dev/null +++ b/kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch @@ -0,0 +1,57 @@ +From da2d528c3cffe22bd1b90b446a045376e4370845 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Sat, 16 Feb 2019 00:00:50 +0000 +Subject: [PATCH 4/4] Revert "i386: Add CPUID bit for PCONFIG" + +RH-Author: Paolo Bonzini +Message-id: <1550275250-41719-4-git-send-email-pbonzini@redhat.com> +Patchwork-id: 84524 +O-Subject: [rhel-av-8.0.0 qemu-kvm PATCH 3/3] Revert "i386: Add CPUID bit for PCONFIG" +Bugzilla: 1661515 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Stefano Garzarella + +From: Robert Hoo + +This reverts commit 5131dc433df54b37e8e918d8fba7fe10344e7a7b. +For new instruction 'PCONFIG' will not be exposed to guest. + +Signed-off-by: Robert Hoo +Message-Id: <1545227081-213696-3-git-send-email-robert.hu@linux.intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 712f807e1965c8f1f1da5bbec2b92a8c540e6631) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 1 - + 2 files changed, 1 insertion(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 169a2ce..d990070 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1077,7 +1077,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +- NULL, NULL, "pconfig", NULL, ++ NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, "spec-ctrl", NULL, + NULL, "arch-capabilities", NULL, "ssbd", +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index dd88151..26412f1 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -692,7 +692,6 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; + + #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ + #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ +-#define CPUID_7_0_EDX_PCONFIG (1U << 18) /* Platform Configuration */ + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Speculation Control */ + #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) /*Arch Capabilities*/ + #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) /* Speculative Store Bypass Disable */ +-- +1.8.3.1 + diff --git a/kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch b/kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch new file mode 100644 index 0000000..2370ef4 --- /dev/null +++ b/kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch @@ -0,0 +1,64 @@ +From adf78309059e3346dddac518601f88f348ec7758 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Sat, 16 Feb 2019 00:00:49 +0000 +Subject: [PATCH 3/4] i386: remove the 'INTEL_PT' CPUID bit from named CPU + models + +RH-Author: Paolo Bonzini +Message-id: <1550275250-41719-3-git-send-email-pbonzini@redhat.com> +Patchwork-id: 84522 +O-Subject: [rhel-av-8.0.0 qemu-kvm PATCH 2/3] i386: remove the 'INTEL_PT' CPUID bit from named CPU models +Bugzilla: 1661515 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Stefano Garzarella + +Processor tracing is not yet implemented for KVM and it will be an +opt in feature requiring a special module parameter. +Disable it, because it is wrong to enable it by default and +it is impossible that no one has ever used it. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4c257911dcc7c4189768e9651755c849ce9db4e8) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 7b63900..169a2ce 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2555,8 +2555,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | +- CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | +- CPUID_7_0_EBX_INTEL_PT, ++ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | + CPUID_7_0_ECX_AVX512VNNI, +@@ -2608,7 +2607,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | +- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_INTEL_PT, ++ CPUID_7_0_EBX_SMAP, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | +@@ -2666,8 +2665,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | +- CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | +- CPUID_7_0_EBX_INTEL_PT, ++ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | +-- +1.8.3.1 + diff --git a/kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch b/kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch new file mode 100644 index 0000000..dc9dee0 --- /dev/null +++ b/kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch @@ -0,0 +1,48 @@ +From 9fc28ea52c88d603e85fa806a708b53b373f511e Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Sat, 16 Feb 2019 00:00:48 +0000 +Subject: [PATCH 2/4] i386: remove the new CPUID 'PCONFIG' from Icelake-Server + CPU model + +RH-Author: Paolo Bonzini +Message-id: <1550275250-41719-2-git-send-email-pbonzini@redhat.com> +Patchwork-id: 84526 +O-Subject: [rhel-av-8.0.0 qemu-kvm PATCH 1/3] i386: remove the new CPUID 'PCONFIG' from Icelake-Server CPU model +Bugzilla: 1661515 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Stefano Garzarella + +From: Robert Hoo + +PCONFIG is not available to guests; it must be specifically enabled +using the PCONFIG_ENABLE execution control. Disable it, because +no one can ever use it. + +Signed-off-by: Robert Hoo +Message-Id: <1545227081-213696-2-git-send-email-robert.hu@linux.intel.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry picked from commit 76e5a4d58357b9d077afccf7f7c82e17f733b722) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index dbcf632..7b63900 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2675,8 +2675,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57, + .features[FEAT_7_0_EDX] = +- CPUID_7_0_EDX_PCONFIG | CPUID_7_0_EDX_SPEC_CTRL | +- CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, + /* Missing: XSAVES (not supported by some Linux versions, + * including v4.1 to v4.12). + * KVM doesn't yet expose any XSAVES state save component, +-- +1.8.3.1 + diff --git a/kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch b/kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch new file mode 100644 index 0000000..03d72b1 --- /dev/null +++ b/kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch @@ -0,0 +1,153 @@ +From 18cf0d751c615e83243e13f3170508289cd78457 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Sat, 16 Feb 2019 00:01:45 +0000 +Subject: [PATCH 1/4] target/i386: Disable MPX support on named CPU models +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <1550275305-42020-1-git-send-email-pbonzini@redhat.com> +Patchwork-id: 84525 +O-Subject: [rhel-av-8.0.0 qemu-kvm PATCH] target/i386: Disable MPX support on named CPU models +Bugzilla: 1661030 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Igor Mammedov +RH-Acked-by: Stefano Garzarella + +Bugzilla: 1661030 + +Brew build: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=20232369 + +MPX support is being phased out by Intel; GCC has dropped it, Linux +is also going to do that. Even though KVM will have special code +to support MPX after the kernel proper stops enabling it in XCR0, +we probably also want to deprecate that in a few years. As a start, +do not enable it by default for any named CPU model starting with +the 4.0 machine types; this include Skylake, Icelake and Cascadelake. + +Signed-off-by: Paolo Bonzini +Message-Id: <20181220121100.21554-1-pbonzini@redhat.com> +Reviewed-by:   Wainer dos Santos Moschetta +Signed-off-by: Eduardo Habkost +(cherry picked from commit ecb85fe48cacb2f8740186e81f2f38a2e02bd963) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + hw/i386/pc.c [old-style global properties] + +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/i386/pc.h | 28 ++++++++++++++++++++++++++++ + target/i386/cpu.c | 14 +++++++------- + 2 files changed, 35 insertions(+), 7 deletions(-) + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 426a975..782d728 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -1022,6 +1022,34 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); + .driver = TYPE_X86_CPU,\ + .property = "x-migrate-smi-count",\ + .value = "off",\ ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ ++ .driver = "Skylake-Client" "-" TYPE_X86_CPU,\ ++ .property = "mpx",\ ++ .value = "on",\ ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ ++ .driver = "Skylake-Client-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "mpx",\ ++ .value = "on",\ ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ ++ .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ ++ .property = "mpx",\ ++ .value = "on",\ ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ ++ .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU,\ ++ .property = "mpx",\ ++ .value = "on",\ ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ ++ .driver = "Cascadelake-Server" "-" TYPE_X86_CPU,\ ++ .property = "mpx",\ ++ .value = "on",\ ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ ++ .driver = "Icelake-Client" "-" TYPE_X86_CPU,\ ++ .property = "mpx",\ ++ .value = "on",\ ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ ++ .driver = "Icelake-Server" "-" TYPE_X86_CPU,\ ++ .property = "mpx",\ ++ .value = "on",\ + }, + + /* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 8570b25..dbcf632 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2358,7 +2358,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | +- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, ++ CPUID_7_0_EBX_SMAP, + /* Missing: XSAVES (not supported by some Linux versions, + * including v4.1 to v4.12). + * KVM doesn't yet expose any XSAVES state save component, +@@ -2405,7 +2405,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | +- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, ++ CPUID_7_0_EBX_SMAP, + /* Missing: XSAVES (not supported by some Linux versions, + * including v4.1 to v4.12). + * KVM doesn't yet expose any XSAVES state save component, +@@ -2450,7 +2450,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | +- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | + CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, +@@ -2502,7 +2502,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | +- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | + CPUID_7_0_EBX_AVX512VL, +@@ -2552,7 +2552,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | +- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | + CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | +@@ -2608,7 +2608,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | +- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_INTEL_PT, ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_INTEL_PT, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | +@@ -2663,7 +2663,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | +- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | + CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index ca5bb95..5a43298 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 17%{?dist} +Release: 18%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -208,6 +208,14 @@ Patch60: kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch Patch61: kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch # For bz#1678968 - -blockdev: auto-read-only is ineffective for drivers on read-only whitelist Patch62: kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch +# For bz#1661030 - Remove MPX support from 8.0 machine types +Patch63: kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch +# For bz#1661515 - Remove PCONFIG and INTEL_PT from Icelake-* CPU models +Patch64: kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch +# For bz#1661515 - Remove PCONFIG and INTEL_PT from Icelake-* CPU models +Patch65: kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch +# For bz#1661515 - Remove PCONFIG and INTEL_PT from Icelake-* CPU models +Patch66: kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1100,6 +1108,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-18.el8 +- kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1661030] +- kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1661515] +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch [bz#1661515] +- kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1661515] +- Resolves: bz#1661030 + (Remove MPX support from 8.0 machine types) +- Resolves: bz#1661515 + (Remove PCONFIG and INTEL_PT from Icelake-* CPU models) + * Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-17.el8 - kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch [bz#1678968] - Resolves: bz#1678968 From b1acd1afc1d9ecde85a3547eef2abeacfe7caa35 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 15 Mar 2019 00:25:00 +0000 Subject: [PATCH 026/195] * Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-19.el8 - kvm-migration-Fix-cancel-state.patch [bz#1608649] - kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch [bz#1608649] - Resolves: bz#1608649 (Query-migrate get "failed" status after migrate-cancel) --- kvm-migration-Fix-cancel-state.patch | 75 +++++++++++++++++++ ...dma-Fix-qemu_rdma_cleanup-null-check.patch | 54 +++++++++++++ qemu-kvm.spec | 12 ++- 3 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 kvm-migration-Fix-cancel-state.patch create mode 100644 kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch diff --git a/kvm-migration-Fix-cancel-state.patch b/kvm-migration-Fix-cancel-state.patch new file mode 100644 index 0000000..618acba --- /dev/null +++ b/kvm-migration-Fix-cancel-state.patch @@ -0,0 +1,75 @@ +From 31566a415b69d58fdf09f05e362685fcc3aee00b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 7 Mar 2019 12:26:21 +0000 +Subject: [PATCH 1/2] migration: Fix cancel state + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190307122622.9387-2-dgilbert@redhat.com> +Patchwork-id: 84820 +O-Subject: [RHEL-8.0 qemu-kvm AV PATCH 1/2] migration: Fix cancel state +Bugzilla: 1608649 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Pankaj Gupta + +From: "Dr. David Alan Gilbert" + +During a cancelled migration there's a race where the fd can +go into an error state before we get back around the migration loop +and migration_detect_error transitions from cancelling->failed. + +Check for cancelled/cancelling and don't change the state. + +Red Hat bug: https://bugzilla.redhat.com/show_bug.cgi?id=1608649 + +Fixes: b23c2ade250 +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20190219195928.12289-1-dgilbert@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +(cherry picked from commit c3c5eae6ac69d9f7d4618407aa6c8ac5edc2267c) +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 0d9cb7a..ecdf01d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2824,6 +2824,13 @@ static MigThrError postcopy_pause(MigrationState *s) + static MigThrError migration_detect_error(MigrationState *s) + { + int ret; ++ int state = s->state; ++ ++ if (state == MIGRATION_STATUS_CANCELLING || ++ state == MIGRATION_STATUS_CANCELLED) { ++ /* End the migration, but don't set the state to failed */ ++ return MIG_THR_ERR_FATAL; ++ } + + /* Try to detect any file errors */ + ret = qemu_file_get_error(s->to_dst_file); +@@ -2833,7 +2840,7 @@ static MigThrError migration_detect_error(MigrationState *s) + return MIG_THR_ERR_NONE; + } + +- if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { ++ if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { + /* + * For postcopy, we allow the network to be down for a + * while. After that, it can be continued by a +@@ -2845,7 +2852,7 @@ static MigThrError migration_detect_error(MigrationState *s) + * For precopy (or postcopy with error outside IO), we fail + * with no time. + */ +- migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); ++ migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); + trace_migration_thread_file_err(); + + /* Time to stop the migration, now. */ +-- +1.8.3.1 + diff --git a/kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch b/kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch new file mode 100644 index 0000000..8f99d0e --- /dev/null +++ b/kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch @@ -0,0 +1,54 @@ +From aa5367b5e81cdc3cc3dc8dbc31daef6acecc1f97 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 7 Mar 2019 12:26:22 +0000 +Subject: [PATCH 2/2] migration/rdma: Fix qemu_rdma_cleanup null check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190307122622.9387-3-dgilbert@redhat.com> +Patchwork-id: 84819 +O-Subject: [RHEL-8.0 qemu-kvm AV PATCH 2/2] migration/rdma: Fix qemu_rdma_cleanup null check +Bugzilla: 1608649 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Pankaj Gupta + +From: "Dr. David Alan Gilbert" + +If the migration fails before the channel is open (e.g. a bad +address) we end up in the cleanup with rdma->channel==NULL. + +Spotted by Coverity: CID 1398634 +Fixes: fbbaacab2758cb3f32a0 +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20190214185351.5927-1-dgilbert@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit cf75e2684938413f0bbe95f5a4b7db5c845e42c8) + +Signed-off-by: Danilo C. L. de Paula +--- + migration/rdma.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/migration/rdma.c b/migration/rdma.c +index 54a3c11..9fa3b17 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2321,7 +2321,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) + rdma->connected = false; + } + +- qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); ++ if (rdma->channel) { ++ qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); ++ } + g_free(rdma->dest_blocks); + rdma->dest_blocks = NULL; + +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 5a43298..ebb0766 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 18%{?dist} +Release: 19%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -216,6 +216,10 @@ Patch64: kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch Patch65: kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch # For bz#1661515 - Remove PCONFIG and INTEL_PT from Icelake-* CPU models Patch66: kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch +# For bz#1608649 - Query-migrate get "failed" status after migrate-cancel +Patch67: kvm-migration-Fix-cancel-state.patch +# For bz#1608649 - Query-migrate get "failed" status after migrate-cancel +Patch68: kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1108,6 +1112,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-19.el8 +- kvm-migration-Fix-cancel-state.patch [bz#1608649] +- kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch [bz#1608649] +- Resolves: bz#1608649 + (Query-migrate get "failed" status after migrate-cancel) + * Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-18.el8 - kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1661030] - kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1661515] From 9374e45db47cceef88fc3afb29b5fdf660bfe505 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 15 Mar 2019 14:37:42 +0000 Subject: [PATCH 027/195] * Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-20.el8 - kvm-i386-Add-stibp-flag-name.patch [bz#1686260] - Resolves: bz#1686260 (stibp is missing on qemu 3.0 and qemu 3.1) --- kvm-i386-Add-stibp-flag-name.patch | 51 ++++++++++++++++++++++++++++++ qemu-kvm.spec | 9 +++++- 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 kvm-i386-Add-stibp-flag-name.patch diff --git a/kvm-i386-Add-stibp-flag-name.patch b/kvm-i386-Add-stibp-flag-name.patch new file mode 100644 index 0000000..b286de7 --- /dev/null +++ b/kvm-i386-Add-stibp-flag-name.patch @@ -0,0 +1,51 @@ +From 730ab8e3a8e9a703f2b2374b8f55429dd6b2254c Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Thu, 14 Mar 2019 19:41:28 +0000 +Subject: [PATCH] i386: Add "stibp" flag name + +RH-Author: Eduardo Habkost +Message-id: <20190314194128.15795-2-ehabkost@redhat.com> +Patchwork-id: 84870 +O-Subject: [RHEL-AV-8.0.0 qemu-kvm PATCH 1/1] i386: Add "stibp" flag name +Bugzilla: 1686260 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Igor Mammedov +RH-Acked-by: Danilo de Paula + +The STIBP flag may be supported by the host KVM module, so QEMU +can allow it to be configured manually, and it can be exposed to +guests when using "-cpu host". + +No additional migration code is required because the whole +contents of spec_ctrl is already migrated in the "cpu/spec_ctrl" +section. + +Corresponding KVM patch was submitted at: +https://lore.kernel.org/lkml/20181205191956.31480-1-ehabkost@redhat.com/ + +Signed-off-by: Eduardo Habkost +Message-Id: <20181210180250.31299-1-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 0e8916582991b9fd0b94850a8444b8b80d0a0955) +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index d990070..c115572 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1079,7 +1079,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +- NULL, NULL, "spec-ctrl", NULL, ++ NULL, NULL, "spec-ctrl", "stibp", + NULL, "arch-capabilities", NULL, "ssbd", + }, + .cpuid = { +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index ebb0766..ae8e0a4 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 19%{?dist} +Release: 20%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -220,6 +220,8 @@ Patch66: kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch Patch67: kvm-migration-Fix-cancel-state.patch # For bz#1608649 - Query-migrate get "failed" status after migrate-cancel Patch68: kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch +# For bz#1686260 - stibp is missing on qemu 3.0 and qemu 3.1 +Patch69: kvm-i386-Add-stibp-flag-name.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1112,6 +1114,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-20.el8 +- kvm-i386-Add-stibp-flag-name.patch [bz#1686260] +- Resolves: bz#1686260 + (stibp is missing on qemu 3.0 and qemu 3.1) + * Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-19.el8 - kvm-migration-Fix-cancel-state.patch [bz#1608649] - kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch [bz#1608649] From 797ce578a4b29084d3455d4fa3eca0667d330875 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 11 Apr 2019 18:06:32 +0100 Subject: [PATCH 028/195] * Thu Apr 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-21.el8 - kvm-Remove-7-qcow2-and-luks-iotests-that-are-taking-25-s.patch [bz#1683473] - kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch [bz#1674438] - kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch [bz#1655065] - kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch [bz#1655065] - kvm-vnc-detect-and-optimize-pageflips.patch [bz#1666206] - kvm-Load-kvm-module-during-boot.patch [bz#1676907 bz#1685995] - kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch [bz#1669053] - kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch [bz#1687582] - kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch [bz#1652572] - Resolves: bz#1652572 (QEMU core dumped if stop nfs service during migration) - Resolves: bz#1655065 ([rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image) - Resolves: bz#1666206 (vnc server should detect page-flips and avoid sending fullscreen updates then.) - Resolves: bz#1669053 (Guest call trace when boot with nvdimm device backed by /dev/dax) - Resolves: bz#1674438 (RHEL8.0 - Guest reboot fails after memory hotplug multiple times (kvm)) - Resolves: bz#1676907 (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) - Resolves: bz#1683473 (Remove 7 qcow2 & luks iotests from rhel8 fast train build %check phase) - Resolves: bz#1685995 (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) - Resolves: bz#1687582 (QEMU IOTEST 200 fails with 'virtio-scsi-pci is not a valid device model name') --- ...le-posix-do-not-fail-on-unlock-bytes.patch | 58 ++++++ ...-file-reject-invalid-pmem-file-sizes.patch | 183 ++++++++++++++++++ ...test-200-on-s390x-without-virtio-pci.patch | 62 ++++++ ...-payload-overhead-to-178-qemu-img-me.patch | 113 +++++++++++ ...KS-payload-overhead-in-qemu-img-meas.patch | 148 ++++++++++++++ kvm-setup | 9 + ...-bounds-write-in-spapr_populate_drme.patch | 74 +++++++ kvm-vnc-detect-and-optimize-pageflips.patch | 95 +++++++++ qemu-kvm.spec | 56 +++++- 9 files changed, 795 insertions(+), 3 deletions(-) create mode 100644 kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch create mode 100644 kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch create mode 100644 kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch create mode 100644 kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch create mode 100644 kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch create mode 100644 kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch create mode 100644 kvm-vnc-detect-and-optimize-pageflips.patch diff --git a/kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch b/kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch new file mode 100644 index 0000000..75e4362 --- /dev/null +++ b/kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch @@ -0,0 +1,58 @@ +From cc7dbe3ae92a2bb1557df184493a331fe2381003 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Wed, 3 Apr 2019 17:53:52 +0100 +Subject: [PATCH 9/9] block/file-posix: do not fail on unlock bytes + +RH-Author: Max Reitz +Message-id: <20190403175352.27439-2-mreitz@redhat.com> +Patchwork-id: 85408 +O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/1] block/file-posix: do not fail on unlock bytes +Bugzilla: 1652572 +RH-Acked-by: John Snow +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Vladimir Sementsov-Ogievskiy + +bdrv_replace_child() calls bdrv_check_perm() with error_abort on +loosening permissions. However file-locking operations may fail even +in this case, for example on NFS. And this leads to Qemu crash. + +Let's avoid such errors. Note, that we ignore such things anyway on +permission update commit and abort. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 696aaaed579ac5bf5fa336216909b46d3d8f07a8) +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + block/file-posix.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 07bbdab..f0af144 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -812,6 +812,18 @@ static int raw_handle_perm_lock(BlockDriverState *bs, + + switch (op) { + case RAW_PL_PREPARE: ++ if ((s->perm | new_perm) == s->perm && ++ (s->shared_perm & new_shared) == s->shared_perm) ++ { ++ /* ++ * We are going to unlock bytes, it should not fail. If it fail due ++ * to some fs-dependent permission-unrelated reasons (which occurs ++ * sometimes on NFS and leads to abort in bdrv_replace_child) we ++ * can't prevent such errors by any check here. And we ignore them ++ * anyway in ABORT and COMMIT. ++ */ ++ return 0; ++ } + ret = raw_apply_lock_bytes(s, s->fd, s->perm | new_perm, + ~s->shared_perm | ~new_shared, + false, errp); +-- +1.8.3.1 + diff --git a/kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch b/kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch new file mode 100644 index 0000000..5be56ee --- /dev/null +++ b/kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch @@ -0,0 +1,183 @@ +From 1b9f228788eb2d7f50961241e28f7a9afadc62ab Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 21 Mar 2019 10:56:08 +0000 +Subject: [PATCH 7/9] hostmem-file: reject invalid pmem file sizes + +RH-Author: Stefan Hajnoczi +Message-id: <20190321105608.29960-2-stefanha@redhat.com> +Patchwork-id: 85081 +O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/1] hostmem-file: reject invalid pmem file sizes +Bugzilla: 1669053 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Pankaj Gupta +RH-Acked-by: Igor Mammedov + +Guests started with NVDIMMs larger than the underlying host file produce +confusing errors inside the guest. This happens because the guest +accesses pages beyond the end of the file. + +Check the pmem file size on startup and print a clear error message if +the size is invalid. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1669053 +Cc: Wei Yang +Cc: Zhang Yi +Cc: Eduardo Habkost +Cc: Igor Mammedov +Signed-off-by: Stefan Hajnoczi +Message-Id: <20190214031004.32522-3-stefanha@redhat.com> +Reviewed-by: Wei Yang +Reviewed-by: Igor Mammedov +Reviewed-by: Pankaj Gupta +Signed-off-by: Eduardo Habkost +(cherry picked from commit 314aec4a6e06844937f1677f6cba21981005f389) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + backends/hostmem-file.c | 23 +++++++++++++++++++++ + include/qemu/osdep.h | 13 ++++++++++++ + util/oslib-posix.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ + util/oslib-win32.c | 5 +++++ + 4 files changed, 94 insertions(+) + +diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c +index 6630021..8d85d56 100644 +--- a/backends/hostmem-file.c ++++ b/backends/hostmem-file.c +@@ -57,6 +57,29 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + #ifndef CONFIG_POSIX + error_setg(errp, "-mem-path not supported on this host"); + #else ++ ++ /* ++ * Verify pmem file size since starting a guest with an incorrect size ++ * leads to confusing failures inside the guest. ++ */ ++ if (fb->is_pmem) { ++ Error *local_err = NULL; ++ uint64_t size; ++ ++ size = qemu_get_pmem_size(fb->mem_path, &local_err); ++ if (!size) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ if (backend->size > size) { ++ error_setg(errp, "size property %" PRIu64 " is larger than " ++ "pmem file \"%s\" size %" PRIu64, backend->size, ++ fb->mem_path, size); ++ return; ++ } ++ } ++ + backend->force_prealloc = mem_prealloc; + path = object_get_canonical_path(OBJECT(backend)); + memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), +diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h +index 3bf48bc..c68a85b 100644 +--- a/include/qemu/osdep.h ++++ b/include/qemu/osdep.h +@@ -553,6 +553,19 @@ void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, + Error **errp); + + /** ++ * qemu_get_pmem_size: ++ * @filename: path to a pmem file ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Determine the size of a persistent memory file. Besides supporting files on ++ * DAX file systems, this function also supports Linux devdax character ++ * devices. ++ * ++ * Returns: the size or 0 on failure ++ */ ++uint64_t qemu_get_pmem_size(const char *filename, Error **errp); ++ ++/** + * qemu_get_pid_name: + * @pid: pid of a process + * +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index 97b2f3b..b173fc0 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -496,6 +496,59 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, + } + } + ++uint64_t qemu_get_pmem_size(const char *filename, Error **errp) ++{ ++ struct stat st; ++ ++ if (stat(filename, &st) < 0) { ++ error_setg(errp, "unable to stat pmem file \"%s\"", filename); ++ return 0; ++ } ++ ++#if defined(__linux__) ++ /* Special handling for devdax character devices */ ++ if (S_ISCHR(st.st_mode)) { ++ char *subsystem_path = NULL; ++ char *subsystem = NULL; ++ char *size_path = NULL; ++ char *size_str = NULL; ++ uint64_t ret = 0; ++ ++ subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", ++ major(st.st_rdev), minor(st.st_rdev)); ++ subsystem = g_file_read_link(subsystem_path, NULL); ++ if (!subsystem) { ++ error_setg(errp, "unable to read subsystem for pmem file \"%s\"", ++ filename); ++ goto devdax_err; ++ } ++ ++ if (!g_str_has_suffix(subsystem, "/dax")) { ++ error_setg(errp, "pmem file \"%s\" is not a dax device", filename); ++ goto devdax_err; ++ } ++ ++ size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", ++ major(st.st_rdev), minor(st.st_rdev)); ++ if (!g_file_get_contents(size_path, &size_str, NULL, NULL)) { ++ error_setg(errp, "unable to read size for pmem file \"%s\"", ++ size_path); ++ goto devdax_err; ++ } ++ ++ ret = g_ascii_strtoull(size_str, NULL, 0); ++ ++devdax_err: ++ g_free(size_str); ++ g_free(size_path); ++ g_free(subsystem); ++ g_free(subsystem_path); ++ return ret; ++ } ++#endif /* defined(__linux__) */ ++ ++ return st.st_size; ++} + + char *qemu_get_pid_name(pid_t pid) + { +diff --git a/util/oslib-win32.c b/util/oslib-win32.c +index b4c17f5..bd633af 100644 +--- a/util/oslib-win32.c ++++ b/util/oslib-win32.c +@@ -560,6 +560,11 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, + } + } + ++uint64_t qemu_get_pmem_size(const char *filename, Error **errp) ++{ ++ error_setg(errp, "pmem support not available"); ++ return 0; ++} + + char *qemu_get_pid_name(pid_t pid) + { +-- +1.8.3.1 + diff --git a/kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch b/kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch new file mode 100644 index 0000000..9766ea7 --- /dev/null +++ b/kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch @@ -0,0 +1,62 @@ +From 887d5afffeff844b1284b380e53f178f68e15087 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 3 Apr 2019 10:17:09 +0100 +Subject: [PATCH 8/9] iotests: Fix test 200 on s390x without virtio-pci + +RH-Author: Thomas Huth +Message-id: <20190403101709.3284-2-thuth@redhat.com> +Patchwork-id: 85312 +O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/1] iotests: Fix test 200 on s390x without virtio-pci +Bugzilla: 1687582 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +virtio-pci is optional on s390x, e.g. in downstream RHEL builds, it +is disabled. On s390x, virtio-ccw should be used instead. Other tests +like 051 or 240 already use virtio-scsi-ccw instead of virtio-scsi-pci +on s390x, so let's do the same here and always use virtio-scsi-ccw on +s390x. + +Signed-off-by: Thomas Huth +Reviewed-by: John Snow +Signed-off-by: Kevin Wolf +(cherry picked from commit e0a59749efc246646bb208e553489b894450cbcd) +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/200 | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/tests/qemu-iotests/200 b/tests/qemu-iotests/200 +index b9ebd5a..8301ff5 100755 +--- a/tests/qemu-iotests/200 ++++ b/tests/qemu-iotests/200 +@@ -52,13 +52,21 @@ ${QEMU_IMG} create -f $IMGFMT -F $IMGFMT "${TEST_IMG}" -b "${BACKING_IMG}" 512M + + ${QEMU_IO} -c "write -P 0xa5 512 300M" "${BACKING_IMG}" | _filter_qemu_io + ++case "$QEMU_DEFAULT_MACHINE" in ++ s390-ccw-virtio) ++ virtio_scsi="-device virtio-scsi-ccw,id=scsi0,iothread=iothread0" ++ ;; ++ *) ++ virtio_scsi="-device pci-bridge,id=bridge1,chassis_nr=1,bus=pci.0 ++ -device virtio-scsi-pci,bus=bridge1,addr=0x1f,id=scsi0,iothread=iothread0" ++ ;; ++esac ++ + echo + echo === Starting QEMU VM === + echo + qemu_comm_method="qmp" +-_launch_qemu -device pci-bridge,id=bridge1,chassis_nr=1,bus=pci.0 \ +- -object iothread,id=iothread0 \ +- -device virtio-scsi-pci,bus=bridge1,addr=0x1f,id=scsi0,iothread=iothread0 \ ++_launch_qemu -object iothread,id=iothread0 $virtio_scsi \ + -drive file="${TEST_IMG}",media=disk,if=none,cache=$CACHEMODE,id=drive_sysdisk,format=$IMGFMT \ + -device scsi-hd,drive=drive_sysdisk,bus=scsi0.0,id=sysdisk,bootindex=0 + h1=$QEMU_HANDLE +-- +1.8.3.1 + diff --git a/kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch b/kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch new file mode 100644 index 0000000..074fc32 --- /dev/null +++ b/kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch @@ -0,0 +1,113 @@ +From dcce446158c042fd0aa54a6ebcc61c00f8d4759e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Mar 2019 08:54:27 +0000 +Subject: [PATCH 4/9] iotests: add LUKS payload overhead to 178 qemu-img + measure test +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +Message-id: <20190304085427.8148-3-stefanha@redhat.com> +Patchwork-id: 84777 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 2/2] iotests: add LUKS payload overhead to 178 qemu-img measure test +Bugzilla: 1655065 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf + +The previous patch includes the LUKS payload overhead into the qemu-img +measure calculation for qcow2. Update qemu-iotests 178 to exercise this +new code path. + +Reviewed-by: Max Reitz +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Stefan Hajnoczi +Message-id: 20190218104525.23674-3-stefanha@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 0482098608b83b559bc1802e4c612051b51f6c4c) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/178 | 8 ++++++++ + tests/qemu-iotests/178.out.qcow2 | 24 ++++++++++++++++++++++++ + 2 files changed, 32 insertions(+) + +diff --git a/tests/qemu-iotests/178 b/tests/qemu-iotests/178 +index 3f4b4a4..927bf06 100755 +--- a/tests/qemu-iotests/178 ++++ b/tests/qemu-iotests/178 +@@ -142,6 +142,14 @@ for ofmt in human json; do + # The backing file doesn't need to exist :) + $QEMU_IMG measure --output=$ofmt -o backing_file=x \ + -f "$fmt" -O "$IMGFMT" "$TEST_IMG" ++ ++ echo ++ echo "== $fmt input image and LUKS encryption ==" ++ echo ++ $QEMU_IMG measure --output=$ofmt \ ++ --object secret,id=sec0,data=base \ ++ -o encrypt.format=luks,encrypt.key-secret=sec0,encrypt.iter-time=10 \ ++ -f "$fmt" -O "$IMGFMT" "$TEST_IMG" + fi + + echo +diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 +index d42d4a4..55a8dc9 100644 +--- a/tests/qemu-iotests/178.out.qcow2 ++++ b/tests/qemu-iotests/178.out.qcow2 +@@ -68,6 +68,11 @@ converted image file size in bytes: 458752 + required size: 1074135040 + fully allocated size: 1074135040 + ++== qcow2 input image and LUKS encryption == ++ ++required size: 2686976 ++fully allocated size: 1076232192 ++ + == qcow2 input image and preallocation (human) == + + required size: 1074135040 +@@ -114,6 +119,11 @@ converted image file size in bytes: 524288 + required size: 1074135040 + fully allocated size: 1074135040 + ++== raw input image and LUKS encryption == ++ ++required size: 2686976 ++fully allocated size: 1076232192 ++ + == raw input image and preallocation (human) == + + required size: 1074135040 +@@ -205,6 +215,13 @@ converted image file size in bytes: 458752 + "fully-allocated": 1074135040 + } + ++== qcow2 input image and LUKS encryption == ++ ++{ ++ "required": 2686976, ++ "fully-allocated": 1076232192 ++} ++ + == qcow2 input image and preallocation (json) == + + { +@@ -263,6 +280,13 @@ converted image file size in bytes: 524288 + "fully-allocated": 1074135040 + } + ++== raw input image and LUKS encryption == ++ ++{ ++ "required": 2686976, ++ "fully-allocated": 1076232192 ++} ++ + == raw input image and preallocation (json) == + + { +-- +1.8.3.1 + diff --git a/kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch b/kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch new file mode 100644 index 0000000..aa3582e --- /dev/null +++ b/kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch @@ -0,0 +1,148 @@ +From e5d1850f80df1b548b1c3bdc0914e7790702d543 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Mar 2019 08:54:26 +0000 +Subject: [PATCH 3/9] qcow2: include LUKS payload overhead in qemu-img measure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +Message-id: <20190304085427.8148-2-stefanha@redhat.com> +Patchwork-id: 84778 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/2] qcow2: include LUKS payload overhead in qemu-img measure +Bugzilla: 1655065 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf + +LUKS encryption reserves clusters for its own payload data. The size of +this area must be included in the qemu-img measure calculation so that +we arrive at the correct minimum required image size. + +(Ab)use the qcrypto_block_create() API to determine the payload +overhead. We discard the payload data that qcrypto thinks will be +written to the image. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Max Reitz +Message-id: 20190218104525.23674-2-stefanha@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 61914f8906fabbae26372a576d9dd988c5e22b75) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 71 insertions(+), 1 deletion(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 991d6ac..1b41e4c 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -4113,6 +4113,60 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) + return ret; + } + ++static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block, ++ size_t headerlen, void *opaque, Error **errp) ++{ ++ size_t *headerlenp = opaque; ++ ++ /* Stash away the payload size */ ++ *headerlenp = headerlen; ++ return 0; ++} ++ ++static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block, ++ size_t offset, const uint8_t *buf, size_t buflen, ++ void *opaque, Error **errp) ++{ ++ /* Discard the bytes, we're not actually writing to an image */ ++ return buflen; ++} ++ ++/* Determine the number of bytes for the LUKS payload */ ++static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len, ++ Error **errp) ++{ ++ QDict *opts_qdict; ++ QDict *cryptoopts_qdict; ++ QCryptoBlockCreateOptions *cryptoopts; ++ QCryptoBlock *crypto; ++ ++ /* Extract "encrypt." options into a qdict */ ++ opts_qdict = qemu_opts_to_qdict(opts, NULL); ++ qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt."); ++ qobject_unref(opts_qdict); ++ ++ /* Build QCryptoBlockCreateOptions object from qdict */ ++ qdict_put_str(cryptoopts_qdict, "format", "luks"); ++ cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp); ++ qobject_unref(cryptoopts_qdict); ++ if (!cryptoopts) { ++ return false; ++ } ++ ++ /* Fake LUKS creation in order to determine the payload size */ ++ crypto = qcrypto_block_create(cryptoopts, "encrypt.", ++ qcow2_measure_crypto_hdr_init_func, ++ qcow2_measure_crypto_hdr_write_func, ++ len, errp); ++ qapi_free_QCryptoBlockCreateOptions(cryptoopts); ++ if (!crypto) { ++ return false; ++ } ++ ++ qcrypto_block_free(crypto); ++ return true; ++} ++ + static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, + Error **errp) + { +@@ -4122,11 +4176,13 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, + uint64_t virtual_size; /* disk size as seen by guest */ + uint64_t refcount_bits; + uint64_t l2_tables; ++ uint64_t luks_payload_size = 0; + size_t cluster_size; + int version; + char *optstr; + PreallocMode prealloc; + bool has_backing_file; ++ bool has_luks; + + /* Parse image creation options */ + cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); +@@ -4156,6 +4212,20 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, + has_backing_file = !!optstr; + g_free(optstr); + ++ optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT); ++ has_luks = optstr && strcmp(optstr, "luks") == 0; ++ g_free(optstr); ++ ++ if (has_luks) { ++ size_t headerlen; ++ ++ if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) { ++ goto err; ++ } ++ ++ luks_payload_size = ROUND_UP(headerlen, cluster_size); ++ } ++ + virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); + virtual_size = ROUND_UP(virtual_size, cluster_size); + +@@ -4226,7 +4296,7 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, + info = g_new(BlockMeasureInfo, 1); + info->fully_allocated = + qcow2_calc_prealloc_size(virtual_size, cluster_size, +- ctz32(refcount_bits)); ++ ctz32(refcount_bits)) + luks_payload_size; + + /* Remove data clusters that are not required. This overestimates the + * required size because metadata needed for the fully allocated file is +-- +1.8.3.1 + diff --git a/kvm-setup b/kvm-setup index abbd587..3bfedf6 100644 --- a/kvm-setup +++ b/kvm-setup @@ -31,10 +31,19 @@ kvm_setup_powerpc () { fi } +kvm_setup_s390x () { + if grep -q "^features.*sie" /proc/cpuinfo; then + modprobe kvm + fi +} + case $(uname -m) in ppc64|ppc64le) kvm_setup_powerpc ;; + s390x) + kvm_setup_s390x + ;; esac exit 0 diff --git a/kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch b/kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch new file mode 100644 index 0000000..48fec33 --- /dev/null +++ b/kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch @@ -0,0 +1,74 @@ +From 2744bd7eb7955e7ae995a48784760e48c33c1e73 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 27 Feb 2019 04:54:34 +0000 +Subject: [PATCH 2/9] spapr: fix out of bounds write in spapr_populate_drmem_v2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20190227045434.23465-1-dgibson@redhat.com> +Patchwork-id: 84720 +O-Subject: [RHELAV-8.1 qemu-kvm PATCH] spapr: fix out of bounds write in spapr_populate_drmem_v2 +Bugzilla: 1674438 +RH-Acked-by: Thomas Huth +RH-Acked-by: Serhii Popovych +RH-Acked-by: Philippe Mathieu-Daudé + +From: Fabiano Rosas + +buf_len is uint8_t which is not large enough to hold the result of: + + nr_entries * sizeof(struct sPAPRDrconfCellV2) + sizeof(uint32_t); + +for a nr_entries greater than 10. + +This causes the allocated buffer 'int_buf' to be smaller than expected +and we eventually overwrite some of glibc's control structures (see +"chunk" in https://sourceware.org/glibc/wiki/MallocInternals) + +The following error is seen while trying to free int_buf: + + "free(): invalid next size (fast)" + +Fixes: a324d6f166 "spapr: Support ibm,dynamic-memory-v2 property" +Signed-off-by: Fabiano Rosas +Message-Id: <20190213172926.21740-1-farosas@linux.ibm.com> +Reviewed-by: Greg Kurz +Signed-off-by: David Gibson +(cherry picked from commit cc941111a5bc5f498185fa3824c3b6579c7d45ad) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1674438 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=20382722 +Branch: rhel8/master-3.1.0 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index bd2abb7..c1478bf 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -688,14 +688,14 @@ static int spapr_populate_drmem_v2(sPAPRMachineState *spapr, void *fdt, + int offset, MemoryDeviceInfoList *dimms) + { + MachineState *machine = MACHINE(spapr); +- uint8_t *int_buf, *cur_index, buf_len; ++ uint8_t *int_buf, *cur_index; + int ret; + uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; + uint64_t addr, cur_addr, size; + uint32_t nr_boot_lmbs = (machine->device_memory->base / lmb_size); + uint64_t mem_end = machine->device_memory->base + + memory_region_size(&machine->device_memory->mr); +- uint32_t node, nr_entries = 0; ++ uint32_t node, buf_len, nr_entries = 0; + sPAPRDRConnector *drc; + DrconfCellQueue *elem, *next; + MemoryDeviceInfoList *info; +-- +1.8.3.1 + diff --git a/kvm-vnc-detect-and-optimize-pageflips.patch b/kvm-vnc-detect-and-optimize-pageflips.patch new file mode 100644 index 0000000..d738230 --- /dev/null +++ b/kvm-vnc-detect-and-optimize-pageflips.patch @@ -0,0 +1,95 @@ +From c233fdd40580baf460b34655aa72a9a489b4501b Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Thu, 7 Mar 2019 09:11:50 +0000 +Subject: [PATCH 5/9] vnc: detect and optimize pageflips +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gerd Hoffmann +Message-id: <20190307091150.6551-2-kraxel@redhat.com> +Patchwork-id: 84816 +O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] vnc: detect and optimize pageflips +Bugzilla: 1666206 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +When size and format of the display surface stays the same we can just +tag the guest display as dirty and be done with it. + +There is no need need to resize the vnc server display or to touch the +vnc client dirty bits. On the next refresh cycle +vnc_refresh_server_surface() will check for actual display content +changes and update the client dirty bits as needed. + +The desktop resize and framebuffer format notifications to the vnc +client will be skipped too. + +Signed-off-by: Gerd Hoffmann +Reviewed-by: Daniel P. Berrangé +Message-id: 20190116101049.8929-1-kraxel@redhat.com +(cherry picked from commit 61e77a5f0c788495566aecb437bcf6b2cf9cda97) +Signed-off-by: Danilo C. L. de Paula +--- + ui/vnc.c | 25 ++++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +diff --git a/ui/vnc.c b/ui/vnc.c +index d7903a7..765bdc5 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -742,6 +742,17 @@ static void vnc_update_server_surface(VncDisplay *vd) + width, height); + } + ++static bool vnc_check_pageflip(DisplaySurface *s1, ++ DisplaySurface *s2) ++{ ++ return (s1 != NULL && ++ s2 != NULL && ++ surface_width(s1) == surface_width(s2) && ++ surface_height(s1) == surface_height(s2) && ++ surface_format(s1) == surface_format(s2)); ++ ++} ++ + static void vnc_dpy_switch(DisplayChangeListener *dcl, + DisplaySurface *surface) + { +@@ -749,6 +760,7 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl, + "Display output is not active."; + static DisplaySurface *placeholder; + VncDisplay *vd = container_of(dcl, VncDisplay, dcl); ++ bool pageflip = vnc_check_pageflip(vd->ds, surface); + VncState *vs; + + if (surface == NULL) { +@@ -761,14 +773,21 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl, + vnc_abort_display_jobs(vd); + vd->ds = surface; + +- /* server surface */ +- vnc_update_server_surface(vd); +- + /* guest surface */ + qemu_pixman_image_unref(vd->guest.fb); + vd->guest.fb = pixman_image_ref(surface->image); + vd->guest.format = surface->format; + ++ if (pageflip) { ++ vnc_set_area_dirty(vd->guest.dirty, vd, 0, 0, ++ surface_width(surface), ++ surface_height(surface)); ++ return; ++ } ++ ++ /* server surface */ ++ vnc_update_server_surface(vd); ++ + QTAILQ_FOREACH(vs, &vd->clients, next) { + vnc_colordepth(vs); + vnc_desktop_resize(vs); +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index ae8e0a4..9ed199c 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -39,6 +39,7 @@ %endif %ifarch s390x %global kvm_target s390x + %global have_kvm_setup 1 %endif %ifarch ppc %global kvm_target ppc @@ -68,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 20%{?dist} +Release: 21%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -222,6 +223,20 @@ Patch67: kvm-migration-Fix-cancel-state.patch Patch68: kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch # For bz#1686260 - stibp is missing on qemu 3.0 and qemu 3.1 Patch69: kvm-i386-Add-stibp-flag-name.patch +# For bz#1674438 - RHEL8.0 - Guest reboot fails after memory hotplug multiple times (kvm) +Patch71: kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch +# For bz#1655065 - [rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image +Patch72: kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch +# For bz#1655065 - [rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image +Patch73: kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch +# For bz#1666206 - vnc server should detect page-flips and avoid sending fullscreen updates then. +Patch74: kvm-vnc-detect-and-optimize-pageflips.patch +# For bz#1669053 - Guest call trace when boot with nvdimm device backed by /dev/dax +Patch76: kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch +# For bz#1687582 - QEMU IOTEST 200 fails with 'virtio-scsi-pci is not a valid device model name' +Patch77: kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch +# For bz#1652572 - QEMU core dumped if stop nfs service during migration +Patch78: kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -366,6 +381,7 @@ Requires: glusterfs-api >= 3.6.0 %endif %if %{have_kvm_setup} Requires(post): systemd-units +Requires(preun): systemd-units %ifarch %{power64} Requires: powerpc-utils %endif @@ -929,8 +945,8 @@ chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so export DIFF=diff; make check V=1 pushd tests/qemu-iotests ./check -v -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 045 048 052 063 077 086 101 104 106 120 132 140 143 145 147 150 152 157 159 160 162 170 171 175 181 184 194 205 208 218 221 222 226 227 232 -./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 013 017 018 019 020 021 022 024 025 027 028 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 057 058 060 061 062 063 065 066 068 069 072 073 074 080 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 122 126 127 130 132 133 134 137 138 140 141 142 143 144 145 147 150 151 152 154 156 157 158 159 162 165 170 174 176 177 179 181 184 187 188 189 190 191 194 195 196 198 201 202 203 204 205 206 208 209 214 216 217 218 222 223 226 227 232 -./check -v -luks 001 002 003 004 005 008 009 010 011 012 021 032 033 048 052 140 143 145 157 162 174 181 184 208 218 227 +./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 017 018 019 020 021 022 024 025 027 028 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 057 058 062 063 065 066 068 069 072 073 074 080 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 126 127 130 132 133 134 137 138 140 141 142 143 144 145 147 150 151 152 156 157 158 159 162 165 170 174 177 179 181 184 187 188 189 190 191 194 195 196 198 201 202 203 204 205 206 208 209 214 216 217 218 222 223 226 227 232 +./check -v -luks 001 002 003 004 005 008 009 010 011 012 021 032 033 052 140 143 145 157 162 174 181 184 208 218 227 ./check -v -nbd 001 002 003 004 005 008 009 010 011 021 032 033 045 077 094 104 119 123 132 143 145 147 151 152 162 181 184 194 205 208 218 222 popd @@ -948,6 +964,11 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : fi %endif +%if %{have_kvm_setup} +%preun -n qemu-kvm-core +%systemd_preun kvm-setup.service +%endif + %post -n qemu-kvm-common %systemd_post ksm.service %systemd_post ksmtuned.service @@ -1114,6 +1135,35 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Apr 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-21.el8 +- kvm-Remove-7-qcow2-and-luks-iotests-that-are-taking-25-s.patch [bz#1683473] +- kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch [bz#1674438] +- kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch [bz#1655065] +- kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch [bz#1655065] +- kvm-vnc-detect-and-optimize-pageflips.patch [bz#1666206] +- kvm-Load-kvm-module-during-boot.patch [bz#1676907 bz#1685995] +- kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch [bz#1669053] +- kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch [bz#1687582] +- kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch [bz#1652572] +- Resolves: bz#1652572 + (QEMU core dumped if stop nfs service during migration) +- Resolves: bz#1655065 + ([rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image) +- Resolves: bz#1666206 + (vnc server should detect page-flips and avoid sending fullscreen updates then.) +- Resolves: bz#1669053 + (Guest call trace when boot with nvdimm device backed by /dev/dax) +- Resolves: bz#1674438 + (RHEL8.0 - Guest reboot fails after memory hotplug multiple times (kvm)) +- Resolves: bz#1676907 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1683473 + (Remove 7 qcow2 & luks iotests from rhel8 fast train build %check phase) +- Resolves: bz#1685995 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1687582 + (QEMU IOTEST 200 fails with 'virtio-scsi-pci is not a valid device model name') + * Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-20.el8 - kvm-i386-Add-stibp-flag-name.patch [bz#1686260] - Resolves: bz#1686260 From dfe0f200488d76100d2209f886c9f5897e60e420 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 15 Apr 2019 13:31:59 +0100 Subject: [PATCH 029/195] * Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-22.el8 - kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1687578] - kvm-i386-Make-arch_capabilities-migratable.patch [bz#1687578] - Resolves: bz#1687578 (Incorrect CVE vulnerabilities reported on Cascade Lake cpus) --- ...86-Make-arch_capabilities-migratable.patch | 42 ++++++++++++ ...-arch_capabilities-if-MSR-can-t-be-s.patch | 68 +++++++++++++++++++ qemu-kvm.spec | 12 +++- 3 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 kvm-i386-Make-arch_capabilities-migratable.patch create mode 100644 kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch diff --git a/kvm-i386-Make-arch_capabilities-migratable.patch b/kvm-i386-Make-arch_capabilities-migratable.patch new file mode 100644 index 0000000..afcc947 --- /dev/null +++ b/kvm-i386-Make-arch_capabilities-migratable.patch @@ -0,0 +1,42 @@ +From f906636aa5024f6e64e2a1802b2eca448085d06a Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 10 Apr 2019 20:50:03 +0100 +Subject: [PATCH 2/2] i386: Make arch_capabilities migratable + +RH-Author: Eduardo Habkost +Message-id: <20190410205003.18916-3-ehabkost@redhat.com> +Patchwork-id: 85551 +O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 2/2] i386: Make arch_capabilities migratable +Bugzilla: 1687578 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Igor Mammedov + +Now that kvm_arch_get_supported_cpuid() will only return +arch_capabilities if QEMU is able to initialize the MSR properly, +we know that the feature is safely migratable. + +Signed-off-by: Eduardo Habkost +Message-Id: <20190125220606.4864-3-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 014018e19b3c54dd1bf5072bc912ceffea40abe8) +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index c115572..d92c128 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1088,7 +1088,6 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .reg = R_EDX, + }, + .tcg_features = TCG_7_0_EDX_FEATURES, +- .unmigratable_flags = CPUID_7_0_EDX_ARCH_CAPABILITIES, + }, + [FEAT_8000_0007_EDX] = { + .type = CPUID_FEATURE_WORD, +-- +1.8.3.1 + diff --git a/kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch b/kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch new file mode 100644 index 0000000..ce9be0b --- /dev/null +++ b/kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch @@ -0,0 +1,68 @@ +From 03f812fa6ea821f5d1c968ab6fc0fb92054f9a1b Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 10 Apr 2019 20:50:02 +0100 +Subject: [PATCH 1/2] i386: kvm: Disable arch_capabilities if MSR can't be set + +RH-Author: Eduardo Habkost +Message-id: <20190410205003.18916-2-ehabkost@redhat.com> +Patchwork-id: 85550 +O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/2] i386: kvm: Disable arch_capabilities if MSR can't be set +Bugzilla: 1687578 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Igor Mammedov + +KVM has two bugs in the handling of MSR_IA32_ARCH_CAPABILITIES: + +1) Linux commit commit 1eaafe91a0df ("kvm: x86: IA32_ARCH_CAPABILITIES + is always supported") makes GET_SUPPORTED_CPUID return + arch_capabilities even if running on SVM. This makes "-cpu + host,migratable=off" incorrectly expose arch_capabilities on CPUID on + AMD hosts (where the MSR is not emulated by KVM). + +2) KVM_GET_MSR_INDEX_LIST does not return MSR_IA32_ARCH_CAPABILITIES if + the MSR is not supported by the host CPU. This makes QEMU not + initialize the MSR properly at kvm_put_msrs() on those hosts. + +Work around both bugs on the QEMU side, by checking if the MSR +was returned by KVM_GET_MSR_INDEX_LIST before returning the +feature flag on kvm_arch_get_supported_cpuid(). + +This has the unfortunate side effect of making arch_capabilities +unavailable on hosts without hardware support for the MSR until bug #2 +is fixed on KVM, but I can't see another way to work around bug #1 +without that side effect. + +Signed-off-by: Eduardo Habkost +Message-Id: <20190125220606.4864-2-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 485b1d256bcb0874bcde0223727c159b6837e6f8) +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 0c9a5e4..720948a 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -389,6 +389,15 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + if (host_tsx_blacklisted()) { + ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE); + } ++ } else if (function == 7 && index == 0 && reg == R_EDX) { ++ /* ++ * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts. ++ * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is ++ * returned by KVM_GET_MSR_INDEX_LIST. ++ */ ++ if (!has_msr_arch_capabs) { ++ ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; ++ } + } else if (function == 0x80000001 && reg == R_ECX) { + /* + * It's safe to enable TOPOEXT even if it's not returned by +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 9ed199c..9a4c503 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 21%{?dist} +Release: 22%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -237,6 +237,10 @@ Patch76: kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch Patch77: kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch # For bz#1652572 - QEMU core dumped if stop nfs service during migration Patch78: kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch +# For bz#1687578 - Incorrect CVE vulnerabilities reported on Cascade Lake cpus +Patch79: kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch +# For bz#1687578 - Incorrect CVE vulnerabilities reported on Cascade Lake cpus +Patch80: kvm-i386-Make-arch_capabilities-migratable.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1135,6 +1139,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-22.el8 +- kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1687578] +- kvm-i386-Make-arch_capabilities-migratable.patch [bz#1687578] +- Resolves: bz#1687578 + (Incorrect CVE vulnerabilities reported on Cascade Lake cpus) + * Thu Apr 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-21.el8 - kvm-Remove-7-qcow2-and-luks-iotests-that-are-taking-25-s.patch [bz#1683473] - kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch [bz#1674438] From 7d4fc9ff6849c3fcfcf5e27f94d1206908cf951d Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 23 Apr 2019 00:12:35 +0100 Subject: [PATCH 031/195] * Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-23.el8 - kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693173] - Resolves: bz#1693173 (CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-av-8]) --- ...integer-overflowing-in-load_device_t.patch | 60 +++++++++++++++++++ qemu-kvm.spec | 9 ++- 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch diff --git a/kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch b/kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch new file mode 100644 index 0000000..3ff218b --- /dev/null +++ b/kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch @@ -0,0 +1,60 @@ +From 1aefd1b8a1dbbf63d28901081102b31455f96290 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Mon, 15 Apr 2019 12:22:02 +0100 +Subject: [PATCH] device_tree: Fix integer overflowing in load_device_tree() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Sergio Lopez Pascual +Message-id: <20190415122202.52108-2-slp@redhat.com> +Patchwork-id: 85670 +O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/1] device_tree: Fix integer overflowing in load_device_tree() +Bugzilla: 1693173 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +From: Markus Armbruster + +If the value of get_image_size() exceeds INT_MAX / 2 - 10000, the +computation of @dt_size overflows to a negative number, which then +gets converted to a very large size_t for g_malloc0() and +load_image_size(). In the (fortunately improbable) case g_malloc0() +succeeds and load_image_size() survives, we'd assign the negative +number to *sizep. What that would do to the callers I can't say, but +it's unlikely to be good. + +Fix by rejecting images whose size would overflow. + +Reported-by: Kurtis Miller +Signed-off-by: Markus Armbruster +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Alistair Francis +Message-Id: <20190409174018.25798-1-armbru@redhat.com> +(cherry picked from 065e6298a75164b4347682b63381dbe752c2b156) +Signed-off-by: Sergio Lopez + +Signed-off-by: Danilo C. L. de Paula +--- + device_tree.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/device_tree.c b/device_tree.c +index 6d9c972..fe61027 100644 +--- a/device_tree.c ++++ b/device_tree.c +@@ -84,6 +84,10 @@ void *load_device_tree(const char *filename_path, int *sizep) + filename_path); + goto fail; + } ++ if (dt_size > INT_MAX / 2 - 10000) { ++ error_report("Device tree file '%s' is too large", filename_path); ++ goto fail; ++ } + + /* Expand to 2x size to give enough room for manipulation. */ + dt_size += 10000; +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 9a4c503..cac22f3 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 22%{?dist} +Release: 23%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -241,6 +241,8 @@ Patch78: kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch Patch79: kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch # For bz#1687578 - Incorrect CVE vulnerabilities reported on Cascade Lake cpus Patch80: kvm-i386-Make-arch_capabilities-migratable.patch +# For bz#1693173 - CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-av-8] +Patch81: kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1139,6 +1141,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-23.el8 +- kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693173] +- Resolves: bz#1693173 + (CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-av-8]) + * Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-22.el8 - kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1687578] - kvm-i386-Make-arch_capabilities-migratable.patch [bz#1687578] From c82fe81190a6446b2474364410817bb7e46280d7 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 26 Apr 2019 14:54:36 +0100 Subject: [PATCH 032/195] * Fri Apr 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-24.el8 - kvm-x86-host-phys-bits-limit-option.patch [bz#1688915] - kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1688915] - Resolves: bz#1688915 ([Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) --- ...ys-bits-limit-48-on-rhel-machine-typ.patch | 57 +++++++++++ kvm-x86-host-phys-bits-limit-option.patch | 97 +++++++++++++++++++ qemu-kvm.spec | 12 ++- 3 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch create mode 100644 kvm-x86-host-phys-bits-limit-option.patch diff --git a/kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch b/kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch new file mode 100644 index 0000000..09ab876 --- /dev/null +++ b/kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch @@ -0,0 +1,57 @@ +From e204c887357f2d2ee1df5436a7d7f68b227c4b64 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Fri, 22 Mar 2019 17:45:15 +0000 +Subject: [PATCH 2/2] rhel: Set host-phys-bits-limit=48 on rhel machine-types + +RH-Author: plai@redhat.com +Message-id: <1553276715-26278-3-git-send-email-plai@redhat.com> +Patchwork-id: 85126 +O-Subject: [RHEL8.0 qemu-kvm PATCH 2/2] rhel: Set host-phys-bits-limit=48 on rhel machine-types +Bugzilla: 1688915 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Michael S. Tsirkin + +From: Eduardo Habkost + +Upstream status: not applicable + +Currently we use the host physical address size by default on +VMs. This was a good default on most cases, but this is not the +case on host CPUs supporting 5-level EPT. On those cases, we +want VMs to use 4-level EPT by default. + +Ensure VMs will use 4-level EPT by default, by limiting physical +address bits to 48. + +Not applicable upstream because upstream doesn't set +host-phys-bits=on by default. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +(cherry picked from commit 01a2ecb4c38fe4a35455ea706e76984ee8d5a769) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/i386/pc.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 782d728..de25407 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -992,6 +992,11 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); + .property = "host-phys-bits",\ + .value = "on",\ + },\ ++ { /* PC_RHEL_COMPAT */ \ ++ .driver = TYPE_X86_CPU,\ ++ .property = "host-phys-bits-limit",\ ++ .value = "48",\ ++ },\ + { /* PC_RHEL_COMPAT bz 1508330 */ \ + .driver = "vfio-pci",\ + .property = "x-no-geforce-quirks",\ +-- +1.8.3.1 + diff --git a/kvm-x86-host-phys-bits-limit-option.patch b/kvm-x86-host-phys-bits-limit-option.patch new file mode 100644 index 0000000..4060a98 --- /dev/null +++ b/kvm-x86-host-phys-bits-limit-option.patch @@ -0,0 +1,97 @@ +From 5588db6fc02b530c73e51b43a0119562aa93f51d Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Fri, 22 Mar 2019 17:45:14 +0000 +Subject: [PATCH 1/2] x86: host-phys-bits-limit option + +RH-Author: plai@redhat.com +Message-id: <1553276715-26278-2-git-send-email-plai@redhat.com> +Patchwork-id: 85128 +O-Subject: [RHEL8.0 qemu-kvm PATCH 1/2] x86: host-phys-bits-limit option +Bugzilla: 1688915 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Michael S. Tsirkin + +From: Eduardo Habkost + +Some downstream distributions of QEMU set host-phys-bits=on by +default. This worked very well for most use cases, because +phys-bits really didn't have huge consequences. The only +difference was on the CPUID data seen by guests, and on the +handling of reserved bits. + +This changed in KVM commit 855feb673640 ("KVM: MMU: Add 5 level +EPT & Shadow page table support"). Now choosing a large +phys-bits value for a VM has bigger impact: it will make KVM use +5-level EPT even when it's not really necessary. This means +using the host phys-bits value may not be the best choice. + +Management software could address this problem by manually +configuring phys-bits depending on the size of the VM and the +amount of MMIO address space required for hotplug. But this is +not trivial to implement. + +However, there's another workaround that would work for most +cases: keep using the host phys-bits value, but only if it's +smaller than 48. This patch makes this possible by introducing a +new "-cpu" option: "host-phys-bits-limit". Management software +or users can make sure they will always use 4-level EPT using: +"host-phys-bits=on,host-phys-bits-limit=48". + +This behavior is still not enabled by default because QEMU +doesn't enable host-phys-bits=on by default. But users, +management software, or downstream distributions may choose to +change their defaults using the new option. + +Signed-off-by: Eduardo Habkost +Message-Id: <20181211192527.13254-1-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +(cherry picked from commit b6a062c64f9639558a88f46edc3dd76b54b26bb5) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 5 +++++ + target/i386/cpu.h | 3 +++ + 2 files changed, 8 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index d92c128..0c2e1c7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5211,6 +5211,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + if (cpu->host_phys_bits) { + /* The user asked for us to use the host physical bits */ + cpu->phys_bits = host_phys_bits; ++ if (cpu->host_phys_bits_limit && ++ cpu->phys_bits > cpu->host_phys_bits_limit) { ++ cpu->phys_bits = cpu->host_phys_bits_limit; ++ } + } + + /* Print a warning if the user set it to a value that's not the +@@ -5798,6 +5802,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), ++ DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), + DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), + DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), + DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, UINT32_MAX), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 26412f1..db49f44 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1458,6 +1458,9 @@ struct X86CPU { + /* if true override the phys_bits value with a value read from the host */ + bool host_phys_bits; + ++ /* if set, limit maximum value for phys_bits when host_phys_bits is true */ ++ uint8_t host_phys_bits_limit; ++ + /* Stop SMI delivery for migration compatibility with old machines */ + bool kvm_no_smi_migration; + +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index cac22f3..890e320 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 3.1.0 -Release: 23%{?dist} +Release: 24%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -243,6 +243,10 @@ Patch79: kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch Patch80: kvm-i386-Make-arch_capabilities-migratable.patch # For bz#1693173 - CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-av-8] Patch81: kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch +# For bz#1688915 - [Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters. +Patch82: kvm-x86-host-phys-bits-limit-option.patch +# For bz#1688915 - [Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters. +Patch83: kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1141,6 +1145,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Apr 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-24.el8 +- kvm-x86-host-phys-bits-limit-option.patch [bz#1688915] +- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1688915] +- Resolves: bz#1688915 + ([Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) + * Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-23.el8 - kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693173] - Resolves: bz#1693173 From 837556b8d05cf352121fb3ea1e4da881ab15d8bd Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 7 May 2019 18:00:36 -0300 Subject: [PATCH 033/195] Rebase to qemu-4.0.0 This is the rebase to qemu-kvm-4.0.0, based on qemu-kvm-3.1.0-23 commits --- .gitignore | 1 + 0004-Initial-redhat-build.patch | 66 +- 0005-Enable-disable-devices-for-RHEL.patch | 914 +++++------ ...Machine-type-related-general-changes.patch | 702 ++++---- 0007-Add-aarch64-machine-types.patch | 117 +- 0008-Add-ppc64-machine-types.patch | 307 ++-- 0009-Add-s390x-machine-types.patch | 77 +- 0010-Add-x86_64-machine-types.patch | 1406 ++++++++++------- 0011-Enable-make-check.patch | 124 +- 0012-Use-kvm-by-default.patch | 41 - ...mber-of-devices-that-can-be-assigned.patch | 9 +- ...Add-support-statement-to-help-output.patch | 11 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 18 +- ... => 0015-Add-support-for-simpletrace.patch | 21 +- ...documentation-instead-of-qemu-system.patch | 125 +- ...17-usb-xhci-Fix-PCI-capability-order.patch | 11 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 10 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 16 +- ... 0020-doc-fix-the-configuration-path.patch | 12 +- ...tests-add-Linux-initrd-checking-test.patch | 105 -- ...evert-i386-Add-CPUID-bit-for-PCONFIG.patch | 57 - ...-virt-rhel8.0.0-machine-type-for-ARM.patch | 70 - ...4-Set-virt-rhel8.0.0-max_cpus-to-512.patch | 50 - ...h64-Use-256MB-ECAM-region-by-default.patch | 38 - ...o-read-only-for-ro-whitelist-drivers.patch | 66 - ...date_cache-error-path-for-parent-act.patch | 69 - ...le-posix-do-not-fail-on-unlock-bytes.patch | 58 - ...M_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch | 119 -- kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch | 66 - ...ignore-ESRCH-in-qemu_cpu_kick_thread.patch | 62 - ...integer-overflowing-in-load_device_t.patch | 60 - ...ppy-controllers-to-RHEL-7-machine-ty.patch | 55 - ...nstream-disablement-of-device-floppy.patch | 41 - ...low-global-properties-to-be-optional.patch | 84 - ...-file-reject-invalid-pmem-file-sizes.patch | 183 --- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch | 50 - ...rtio-ccw-Add-machine-types-for-RHEL8.patch | 106 -- kvm-i386-Add-stibp-flag-name.patch | 51 - ...86-Make-arch_capabilities-migratable.patch | 42 - ...-arch_capabilities-if-MSR-can-t-be-s.patch | 68 - ...omment-explaining-why-.feat_names-ar.patch | 48 - ...HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch | 211 --- ...INTEL_PT-CPUID-bit-from-named-CPU-mo.patch | 64 - ...new-CPUID-PCONFIG-from-Icelake-Serve.patch | 48 - ...-client-doesn-t-unlink-server-socket.patch | 294 ---- ...test-200-on-s390x-without-virtio-pci.patch | 62 - ...for-throttling-tgm-unregister-iothre.patch | 122 -- ...-payload-overhead-to-178-qemu-img-me.patch | 113 -- ...-Fix-handling-when-not-interpolating.patch | 122 -- kvm-migration-Fix-cancel-state.patch | 75 - ...dma-Fix-qemu_rdma_cleanup-null-check.patch | 54 - ...migration-rdma-unregister-fd-handler.patch | 53 - ...hugetlbfs-misaligned-length-in-ppc64.patch | 177 --- kvm-mmap-alloc-unfold-qemu_ram_mmap.patch | 138 -- kvm-pc-7.5-compat-entries.patch | 55 - ...for-pc-i440fx-rhel7.6.0-machine-type.patch | 41 - kvm-pc-Add-pc-q35-8.0.0-machine-type.patch | 73 - ...e-smi-count-off-to-PC_RHEL7_6_COMPAT.patch | 60 - kvm-pc-PC_RHEL7_6_COMPAT.patch | 58 - ...KS-payload-overhead-in-qemu-img-meas.patch | 148 -- ...efine-pseries-rhel8.0.0-machine-type.patch | 101 -- ...ys-bits-limit-48-on-rhel-machine-typ.patch | 57 - ...ly-stop-the-KVM-TOD-while-the-guest-.patch | 250 --- ...ire-the-AioContext-in-scsi_-_realize.patch | 187 --- kvm-scsi-disk-Add-device_id-property.patch | 98 -- ...-Don-t-use-empty-string-as-device-id.patch | 71 - ...Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch | 124 -- ...x-associativity-domains-property-num.patch | 149 -- ...-bounds-write-in-spapr_populate_drme.patch | 74 - ...able-MPX-support-on-named-CPU-models.patch | 153 -- ...-fix-restart-coroutine-iothread-race.patch | 120 -- ...-for-registering-virtio-device-types.patch | 553 ------- ...ble-legacy-disable-modern-compat-pro.patch | 73 - ...ersion-specific-variants-of-virtio-P.patch | 519 ------ ...id-devices-with-different-iothreads-.patch | 116 -- ...-BlockBackend-back-to-the-main-AioCo.patch | 186 --- kvm-vnc-detect-and-optimize-pageflips.patch | 95 -- kvm-x86-host-phys-bits-limit-option.patch | 97 -- qemu-kvm.spec | 227 +-- sources | 2 +- 80 files changed, 2142 insertions(+), 8614 deletions(-) delete mode 100644 0012-Use-kvm-by-default.patch rename 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch (91%) rename 0014-Add-support-statement-to-help-output.patch => 0013-Add-support-statement-to-help-output.patch (86%) rename 0015-globally-limit-the-maximum-number-of-CPUs.patch => 0014-globally-limit-the-maximum-number-of-CPUs.patch (91%) rename 0016-Add-support-for-simpletrace.patch => 0015-Add-support-for-simpletrace.patch (91%) rename 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch => 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch (91%) rename 0018-usb-xhci-Fix-PCI-capability-order.patch => 0017-usb-xhci-Fix-PCI-capability-order.patch (91%) rename 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch => 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch (93%) rename kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch => 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch (80%) rename kvm-doc-fix-the-configuration-path.patch => 0020-doc-fix-the-configuration-path.patch (92%) delete mode 100644 kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch delete mode 100644 kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch delete mode 100644 kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch delete mode 100644 kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch delete mode 100644 kvm-aarch64-Use-256MB-ECAM-region-by-default.patch delete mode 100644 kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch delete mode 100644 kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch delete mode 100644 kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch delete mode 100644 kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch delete mode 100644 kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch delete mode 100644 kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch delete mode 100644 kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch delete mode 100644 kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch delete mode 100644 kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch delete mode 100644 kvm-globals-Allow-global-properties-to-be-optional.patch delete mode 100644 kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch delete mode 100644 kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch delete mode 100644 kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch delete mode 100644 kvm-i386-Add-stibp-flag-name.patch delete mode 100644 kvm-i386-Make-arch_capabilities-migratable.patch delete mode 100644 kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch delete mode 100644 kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch delete mode 100644 kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch delete mode 100644 kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch delete mode 100644 kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch delete mode 100644 kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch delete mode 100644 kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch delete mode 100644 kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch delete mode 100644 kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch delete mode 100644 kvm-json-Fix-handling-when-not-interpolating.patch delete mode 100644 kvm-migration-Fix-cancel-state.patch delete mode 100644 kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch delete mode 100644 kvm-migration-rdma-unregister-fd-handler.patch delete mode 100644 kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch delete mode 100644 kvm-mmap-alloc-unfold-qemu_ram_mmap.patch delete mode 100644 kvm-pc-7.5-compat-entries.patch delete mode 100644 kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch delete mode 100644 kvm-pc-Add-pc-q35-8.0.0-machine-type.patch delete mode 100644 kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch delete mode 100644 kvm-pc-PC_RHEL7_6_COMPAT.patch delete mode 100644 kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch delete mode 100644 kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch delete mode 100644 kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch delete mode 100644 kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch delete mode 100644 kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch delete mode 100644 kvm-scsi-disk-Add-device_id-property.patch delete mode 100644 kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch delete mode 100644 kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch delete mode 100644 kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch delete mode 100644 kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch delete mode 100644 kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch delete mode 100644 kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch delete mode 100644 kvm-virtio-Helper-for-registering-virtio-device-types.patch delete mode 100644 kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch delete mode 100644 kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch delete mode 100644 kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch delete mode 100644 kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch delete mode 100644 kvm-vnc-detect-and-optimize-pageflips.patch delete mode 100644 kvm-x86-host-phys-bits-limit-option.patch diff --git a/.gitignore b/.gitignore index 220f7bc..678d910 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /qemu-3.1.0.tar.xz +/qemu-4.0.0.tar.xz diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch index 38ad215..75b88d3 100644 --- a/0004-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 8ff84e7f181aa0f64b3b6178d9ac808c61f5bec8 Mon Sep 17 00:00:00 2001 +From 223a694c9878013afa2ae9024cb35fbc3a334174 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -13,35 +13,57 @@ several issues are fixed in QEMU tree: - Use "/share/qemu-kvm" as SHARE_SUFFIX - We reconfigured our share to qemu-kvm to be consistent with used name -This rebase includes changes up to qemu-kvm-2.12.0-47.el8 +This rebase includes changes up to qemu-kvm-3.1.0-23.el8 Rebase notes (3.1.0): - added new configure options +Rebase notes (4.0.0): +- Added dependency to perl-Test-Harness (upstream) +- Added dependency to python3-sphinx (upstream) +- Change location of icons (upstream) +- Remove .desktop file (added upstream) +- Added qemu-trace-stap (added upstream) +- Removed elf2dmp (added upstream) +- Remove .buildinfo +- Added pvh.bin rom (added upstream) +- Added interop documentation files +- Use python module instead of qemu.py (upstream) + Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file - Spec file cleanups + +Merged patches (4.0.0): +- aa4297c Add edk2 Requires to qemu-kvm +- d124ff5779 Fixing brew build target +- eb204b5 Introduce the qemu-kvm-tests rpm +- 223cf0c Load kvm module during boot (partial) + +Signed-off-by: Danilo C. L. de Paula --- Makefile | 3 +- block/Makefile.objs | 2 +- - block/vxhs.c | 119 ++- + block/vxhs.c | 119 +- configure | 33 +- os-posix.c | 2 +- redhat/Makefile | 82 ++ - redhat/Makefile.common | 49 + - redhat/qemu-kvm.spec.template | 1813 +++++++++++++++++++++++++++++++++++++ + redhat/Makefile.common | 51 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 2082 +++++++++++++++++++++++++++++ redhat/scripts/process-patches.sh | 7 +- ui/vnc.c | 2 +- - 10 files changed, 2064 insertions(+), 48 deletions(-) + 11 files changed, 2374 insertions(+), 48 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common + create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index f294718..152821a 100644 +index 04a0d45050..05f62eab3c 100644 --- a/Makefile +++ b/Makefile -@@ -503,6 +503,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM +@@ -470,6 +470,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC CAP_CFLAGS += -DCAPSTONE_HAS_X86 @@ -49,7 +71,7 @@ index f294718..152821a 100644 subdir-capstone: .git-submodule-status $(call quiet-command,$(MAKE) -C $(SRC_PATH)/capstone CAPSTONE_SHARED=no BUILDDIR="$(BUILD_DIR)/capstone" CC="$(CC)" AR="$(AR)" LD="$(LD)" RANLIB="$(RANLIB)" CFLAGS="$(CAP_CFLAGS)" $(SUBDIR_MAKEFLAGS) $(BUILD_DIR)/capstone/$(LIBCAPSTONE)) -@@ -830,7 +831,7 @@ install-doc: $(DOCS) +@@ -749,7 +750,7 @@ install-doc: $(DOCS) install-sphinxdocs $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" @@ -59,7 +81,7 @@ index f294718..152821a 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/block/Makefile.objs b/block/Makefile.objs -index 46d585c..a244100 100644 +index 7a81892a52..f4cf03bed9 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -30,7 +30,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o @@ -72,7 +94,7 @@ index 46d585c..a244100 100644 block-obj-y += accounting.o dirty-bitmap.o block-obj-y += write-threshold.o diff --git a/block/vxhs.c b/block/vxhs.c -index 0cb0a00..9164b3e 100644 +index 2e18229ba4..3dbb9544bc 100644 --- a/block/vxhs.c +++ b/block/vxhs.c @@ -9,7 +9,8 @@ @@ -263,10 +285,10 @@ index 0cb0a00..9164b3e 100644 trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); return -EIO; diff --git a/configure b/configure -index 0a3c6a7..98b05c5 100755 +index 1c563a7027..eb0a0dde86 100755 --- a/configure +++ b/configure -@@ -3459,7 +3459,7 @@ fi +@@ -3612,7 +3612,7 @@ fi glib_req_ver=2.40 glib_modules=gthread-2.0 @@ -275,10 +297,11 @@ index 0a3c6a7..98b05c5 100755 glib_modules="$glib_modules gmodule-export-2.0" fi -@@ -5494,33 +5494,6 @@ if compile_prog "" "" ; then +@@ -5755,33 +5755,6 @@ if compile_prog "" "" ; then + have_sysmacros=yes fi - ########################################## +-########################################## -# Veritas HyperScale block driver VxHS -# Check if libvxhs is installed - @@ -305,11 +328,10 @@ index 0a3c6a7..98b05c5 100755 - fi -fi - --########################################## + ########################################## # check for _Static_assert() - have_static_assert=no -@@ -6854,8 +6827,8 @@ if test "$pthread_setname_np" = "yes" ; then +@@ -7218,8 +7191,8 @@ elif test "$pthread_setname_np_wo_tid" = "yes" ; then fi if test "$vxhs" = "yes" ; then @@ -321,7 +343,7 @@ index 0a3c6a7..98b05c5 100755 if test "$libpmem" = "yes" ; then diff --git a/os-posix.c b/os-posix.c -index 4bd80e4..ca13206 100644 +index 4bd80e44e6..ca13206b31 100644 --- a/os-posix.c +++ b/os-posix.c @@ -82,7 +82,7 @@ void os_setup_signal_handling(void) @@ -334,10 +356,10 @@ index 4bd80e4..ca13206 100644 char *os_find_datadir(void) { diff --git a/ui/vnc.c b/ui/vnc.c -index 0c1b477..d7903a7 100644 +index 1871422e1d..8226524c16 100644 --- a/ui/vnc.c +++ b/ui/vnc.c -@@ -3962,7 +3962,7 @@ void vnc_display_open(const char *id, Error **errp) +@@ -3982,7 +3982,7 @@ void vnc_display_open(const char *id, Error **errp) #ifdef CONFIG_VNC_SASL if (sasl) { @@ -347,5 +369,5 @@ index 0c1b477..d7903a7 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -1.8.3.1 +2.20.1 diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch index 246e148..d5b0ae8 100644 --- a/0005-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 0533a6ee98fedfad9ca1466f5e6f5576169ed808 Mon Sep 17 00:00:00 2001 +From 8ee745d1dc16e5cd0f9eb18ed0671ad00e789501 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -15,63 +15,77 @@ Rebase notes (qemu 3.1.0) - Removed config_vga_isa.c changes as no longer needed - Removed new devices +Rebase notes (4.0.0): +- Added CONFIG_PCI_EXPRESS_GENERIC_BRIDGE for aarch64-softmmu.mak +- Added CONFIG_ARM_VIRT for aarch64-softmmu.mak +- Switch to KConfig (upstream) + - Using device whitelist + without-defualt-devices option + Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV - 4b889f3 Declare cirrus-vga as deprecated - b579d32 Do not build bluetooth support - 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 - 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 + +Merged patches (weekly-190301): +- 20a51f6 fdc: Revert downstream disablement of device "floppy" +- f869cc0 fdc: Restrict floppy controllers to RHEL-7 machine types + +Signed-off-by: Danilo C. L. de Paula --- - Makefile.objs | 4 ++-- - default-configs/aarch64-softmmu.mak | 40 ++++++++++++++++++++++++--------- - default-configs/i386-softmmu.mak | 26 +++++++++++----------- - default-configs/pci.mak | 44 ++++++++++++++++++------------------- - default-configs/ppc64-softmmu.mak | 30 ++++++++++++++++++------- - default-configs/s390x-softmmu.mak | 5 +++-- - default-configs/sound.mak | 8 +++---- - default-configs/usb.mak | 14 ++++++------ - default-configs/virtio.mak | 6 ++--- - hw/acpi/ich9.c | 4 ++-- - hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 1 + - hw/bt/Makefile.objs | 4 ++-- - hw/char/serial-pci.c | 4 ++++ - hw/core/Makefile.objs | 10 +++++---- - hw/display/Makefile.objs | 5 +++-- - hw/display/cirrus_vga.c | 3 +++ - hw/i386/pc.c | 2 ++ - hw/ide/piix.c | 5 ++++- - hw/ide/via.c | 2 ++ - hw/input/pckbd.c | 2 ++ - hw/isa/Makefile.objs | 2 +- - hw/misc/Makefile.objs | 2 +- - hw/misc/ivshmem.c | 11 ++++++++++ - hw/net/e1000.c | 2 ++ - hw/pci-host/piix.c | 4 ++++ - hw/ppc/spapr_cpu_core.c | 2 ++ - hw/usb/ccid-card-emulated.c | 2 ++ - hw/vfio/Makefile.objs | 2 +- - hw/vfio/pci-quirks.c | 5 +++++ - hw/vfio/pci.c | 5 +++++ - hw/virtio/virtio-pci.c | 8 +++---- - qemu-options.hx | 7 ++---- - redhat/qemu-kvm.spec.template | 2 +- - stubs/Makefile.objs | 1 + - stubs/ide-isa.c | 13 +++++++++++ - target/arm/cpu.c | 4 +++- - target/i386/cpu.c | 35 ++++++++++++++++++++++------- - target/ppc/cpu-models.c | 12 ++++++++++ - target/s390x/cpu_models.c | 3 +++ - target/s390x/kvm.c | 8 +++++++ - vl.c | 8 ++++++- - 42 files changed, 253 insertions(+), 106 deletions(-) + Makefile.objs | 4 +- + default-configs/aarch64-rh-devices.mak | 40 +++++++++++ + default-configs/aarch64-softmmu.mak | 17 ++--- + default-configs/ppc64-rh-devices.mak | 30 +++++++++ + default-configs/ppc64-softmmu.mak | 8 ++- + default-configs/rh-virtio.mak | 10 +++ + default-configs/s390x-rh-devices.mak | 15 +++++ + default-configs/s390x-softmmu.mak | 4 +- + default-configs/x86_64-rh-devices.mak | 93 ++++++++++++++++++++++++++ + default-configs/x86_64-softmmu.mak | 4 +- + hw/acpi/ich9.c | 4 +- + hw/arm/Makefile.objs | 2 +- + hw/block/fdc.c | 10 +++ + hw/bt/Makefile.objs | 4 +- + hw/char/serial-pci.c | 4 ++ + hw/core/Makefile.objs | 9 +-- + hw/cpu/Makefile.objs | 3 +- + hw/display/Makefile.objs | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/isa/Makefile.objs | 2 +- + hw/misc/Makefile.objs | 2 +- + hw/net/e1000.c | 2 + + hw/pci-host/piix.c | 4 ++ + hw/ppc/spapr_cpu_core.c | 2 + + hw/usb/ccid-card-emulated.c | 2 + + hw/vfio/pci-quirks.c | 5 ++ + hw/vfio/pci.c | 5 ++ + qemu-options.hx | 7 +- + redhat/qemu-kvm.spec.template | 6 +- + stubs/Makefile.objs | 1 + + stubs/ide-isa.c | 13 ++++ + target/arm/cpu.c | 4 +- + target/i386/cpu.c | 35 +++++++--- + target/ppc/cpu-models.c | 12 ++++ + target/s390x/cpu_models.c | 3 + + target/s390x/kvm.c | 8 +++ + vl.c | 8 ++- + 39 files changed, 348 insertions(+), 49 deletions(-) + create mode 100644 default-configs/aarch64-rh-devices.mak + create mode 100644 default-configs/ppc64-rh-devices.mak + create mode 100644 default-configs/rh-virtio.mak + create mode 100644 default-configs/s390x-rh-devices.mak + create mode 100644 default-configs/x86_64-rh-devices.mak create mode 100644 stubs/ide-isa.c diff --git a/Makefile.objs b/Makefile.objs -index 1e1ff38..26d578e 100644 +index cf065de5ed..0b78970763 100644 --- a/Makefile.objs +++ b/Makefile.objs -@@ -115,8 +115,8 @@ common-obj-y += replay/ +@@ -63,8 +63,8 @@ common-obj-y += replay/ common-obj-y += ui/ common-obj-m += ui/ @@ -82,13 +96,65 @@ index 1e1ff38..26d578e 100644 common-obj-y += dma-helpers.o common-obj-y += vl.o +diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak +new file mode 100644 +index 0000000000..13ce7c7987 +--- /dev/null ++++ b/default-configs/aarch64-rh-devices.mak +@@ -0,0 +1,40 @@ ++include rh-virtio.mak ++ ++CONFIG_ACPI=y ++CONFIG_ARM_GIC=y ++CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_SMMUV3=y ++CONFIG_ARM_V7M=y ++CONFIG_ARM_VIRT=y ++CONFIG_CAN_BUS=y ++CONFIG_CAN_SJA1000=y ++CONFIG_EDID=y ++CONFIG_FW_CFG_DMA=y ++CONFIG_GPIO_KEY=y ++CONFIG_I2C=y ++CONFIG_IOH3420=y ++CONFIG_IVSHMEM=y ++CONFIG_KVM=y ++CONFIG_LINUX=y ++CONFIG_PCI=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PCI_EXPRESS_GENERIC_BRIDGE=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PL011=y ++CONFIG_PL031=y ++CONFIG_PL061=y ++CONFIG_PLATFORM_BUS=y ++CONFIG_SCSI=y ++CONFIG_SMBIOS=y ++CONFIG_SMBUS_EEPROM=y ++CONFIG_USB=y ++CONFIG_USB_XHCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_XIO3130=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 4ea9add..221e266 100644 +index 4ea9add003..cfff806b50 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak -@@ -1,12 +1,32 @@ +@@ -1,12 +1,9 @@ # Default configuration for aarch64-softmmu -- ++# CONFIG_AUX=y ++# CONFIG_DDC=y ++# CONFIG_DPCD=y ++# CONFIG_XLNX_ZYNQMP=y ++# CONFIG_XLNX_ZYNQMP_ARM=y ++# CONFIG_XLNX_VERSAL=y + -# We support all the 32 bit boards so need all their config -include arm-softmmu.mak - @@ -98,285 +164,225 @@ index 4ea9add..221e266 100644 -CONFIG_XLNX_ZYNQMP=y -CONFIG_XLNX_ZYNQMP_ARM=y -CONFIG_XLNX_VERSAL=y -+# CONFIG_AUX=y -+# CONFIG_DDC=y -+# CONFIG_DPCD=y -+# CONFIG_XLNX_ZYNQMP=y -+# CONFIG_XLNX_ZYNQMP_ARM=y -+# CONFIG_XLNX_VERSAL=y - CONFIG_ARM_SMMUV3=y -+CONFIG_PCI=y -+CONFIG_PCI_TESTDEV=y -+CONFIG_VIRTIO_PCI=y -+CONFIG_VIRTIO_MMIO=y -+include virtio.mak -+CONFIG_ARM_GIC=y -+CONFIG_ARM_GIC_KVM=$(CONFIG_KVM) -+CONFIG_PL011=y -+CONFIG_PL031=y -+CONFIG_PFLASH_CFI01=y -+CONFIG_PCI_GENERIC=y -+CONFIG_ACPI=y -+CONFIG_PLATFORM_BUS=y -+CONFIG_SMBIOS=y -+CONFIG_PL061=y -+CONFIG_GPIO_KEY=y -+CONFIG_ARM_V7M=y -+CONFIG_PCIE_PORT=y -+CONFIG_XIO3130=y -+CONFIG_IOH3420=y -+CONFIG_USB_XHCI=y -+CONFIG_USB=y -+CONFIG_I2C=y -+CONFIG_FW_CFG_DMA=y -diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak -index 64c998c..faea212 100644 ---- a/default-configs/i386-softmmu.mak -+++ b/default-configs/i386-softmmu.mak -@@ -5,20 +5,20 @@ include sound.mak - include usb.mak - include hyperv.mak - CONFIG_QXL=$(CONFIG_SPICE) --CONFIG_VGA_ISA=y -+#CONFIG_VGA_ISA=y - CONFIG_VGA_CIRRUS=y --CONFIG_VMWARE_VGA=y --CONFIG_VMXNET3_PCI=y -+#CONFIG_VMWARE_VGA=y -+#CONFIG_VMXNET3_PCI=y - CONFIG_VIRTIO_VGA=y - CONFIG_VMMOUSE=y - CONFIG_IPMI=y --CONFIG_IPMI_LOCAL=y --CONFIG_IPMI_EXTERN=y --CONFIG_ISA_IPMI_KCS=y --CONFIG_ISA_IPMI_BT=y -+#CONFIG_IPMI_LOCAL=y -+#CONFIG_IPMI_EXTERN=y -+#CONFIG_ISA_IPMI_KCS=y -+#CONFIG_ISA_IPMI_BT=y - CONFIG_SERIAL=y - CONFIG_SERIAL_ISA=y --CONFIG_PARALLEL=y -+#CONFIG_PARALLEL=y - CONFIG_I8254=y - CONFIG_PCSPK=y - CONFIG_PCKBD=y -@@ -30,11 +30,11 @@ CONFIG_ACPI_MEMORY_HOTPLUG=y - CONFIG_ACPI_CPU_HOTPLUG=y - CONFIG_APM=y - CONFIG_I8257=y --CONFIG_IDE_ISA=y -+#CONFIG_IDE_ISA=y - CONFIG_IDE_PIIX=y --CONFIG_NE2000_ISA=y --CONFIG_HPET=y --CONFIG_APPLESMC=y -+#CONFIG_NE2000_ISA=y -+#CONFIG_HPET=y -+#CONFIG_APPLESMC=y - CONFIG_I8259=y - CONFIG_PFLASH_CFI01=y - CONFIG_TPM_TIS=$(CONFIG_TPM) -@@ -66,4 +66,4 @@ CONFIG_FW_CFG_DMA=y - CONFIG_I2C=y - CONFIG_SEV=$(CONFIG_KVM) - CONFIG_VTD=y --CONFIG_AMD_IOMMU=y -+#CONFIG_AMD_IOMMU=y -diff --git a/default-configs/pci.mak b/default-configs/pci.mak -index 6c7be12..5929b5d 100644 ---- a/default-configs/pci.mak -+++ b/default-configs/pci.mak -@@ -4,22 +4,22 @@ CONFIG_ISA_BUS=y - CONFIG_VIRTIO_PCI=y - include virtio.mak - CONFIG_USB_UHCI=y --CONFIG_USB_OHCI=y -+#CONFIG_USB_OHCI=y - CONFIG_USB_EHCI=y - CONFIG_USB_XHCI=y - CONFIG_USB_XHCI_NEC=y --CONFIG_NE2000_PCI=y --CONFIG_EEPRO100_PCI=y --CONFIG_PCNET_PCI=y --CONFIG_PCNET_COMMON=y -+#CONFIG_NE2000_PCI=y -+#CONFIG_EEPRO100_PCI=y -+#CONFIG_PCNET_PCI=y -+#CONFIG_PCNET_COMMON=y - CONFIG_AC97=y - CONFIG_HDA=y --CONFIG_ES1370=y -+#CONFIG_ES1370=y - CONFIG_SCSI=y --CONFIG_LSI_SCSI_PCI=y --CONFIG_VMW_PVSCSI_SCSI_PCI=y --CONFIG_MEGASAS_SCSI_PCI=y --CONFIG_MPTSAS_SCSI_PCI=y -+#CONFIG_LSI_SCSI_PCI=y -+#CONFIG_VMW_PVSCSI_SCSI_PCI=y -+#CONFIG_MEGASAS_SCSI_PCI=y -+#CONFIG_MPTSAS_SCSI_PCI=y - CONFIG_RTL8139_PCI=y - CONFIG_E1000_PCI=y - CONFIG_E1000E_PCI=y -@@ -27,23 +27,23 @@ CONFIG_IDE_CORE=y - CONFIG_IDE_QDEV=y - CONFIG_IDE_PCI=y - CONFIG_AHCI=y --CONFIG_ESP=y --CONFIG_ESP_PCI=y -+#CONFIG_ESP=y -+#CONFIG_ESP_PCI=y - CONFIG_SERIAL=y - CONFIG_SERIAL_ISA=y - CONFIG_SERIAL_PCI=y --CONFIG_CAN_BUS=y --CONFIG_CAN_SJA1000=y --CONFIG_CAN_PCI=y --CONFIG_IPACK=y -+#CONFIG_CAN_BUS=y -+#CONFIG_CAN_SJA1000=y -+#CONFIG_CAN_PCI=y -+#CONFIG_IPACK=y - CONFIG_WDT_IB6300ESB=y - CONFIG_PCI_TESTDEV=y --CONFIG_NVME_PCI=y --CONFIG_SD=y --CONFIG_SDHCI=y -+#CONFIG_NVME_PCI=y -+#CONFIG_SD=y -+#CONFIG_SDHCI=y - CONFIG_EDU=y - CONFIG_VGA=y - CONFIG_VGA_PCI=y --CONFIG_BOCHS_DISPLAY=y --CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) --CONFIG_ROCKER=y -+#CONFIG_BOCHS_DISPLAY=y -+#CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) -+#CONFIG_ROCKER=y -diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index aec2855..242d277 100644 ---- a/default-configs/ppc64-softmmu.mak -+++ b/default-configs/ppc64-softmmu.mak -@@ -1,14 +1,28 @@ - # Default configuration for ppc64-softmmu - --# Include all 32-bit boards --include ppc-softmmu.mak -+include sound.mak -+include usb.mak -+include virtio.mak +-CONFIG_ARM_SMMUV3=y ++include aarch64-rh-devices.mak +diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak +new file mode 100644 +index 0000000000..3be1750736 +--- /dev/null ++++ b/default-configs/ppc64-rh-devices.mak +@@ -0,0 +1,30 @@ ++include rh-virtio.mak + -+## PCI configuration - cut down from the defaults in pci.mak ++CONFIG_DIMM=y ++CONFIG_MEM_DEVICE=y +CONFIG_PCI=y -+CONFIG_VIRTIO_PCI=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PSERIES=y ++CONFIG_SCSI=y ++CONFIG_SPAPR_VSCSI=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_OHCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_BOT=y +CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_NEC=y -+CONFIG_WDT_IB6300ESB=y -+CONFIG_PCI_TESTDEV=y -+CONFIG_USB_OHCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y +CONFIG_VGA=y +CONFIG_VGA_PCI=y -+CONFIG_SERIAL=y -+#CONFIG_I2C=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_XICS=y ++CONFIG_XICS_KVM=y ++CONFIG_XICS_SPAPR=y ++CONFIG_XIVE=y ++CONFIG_XIVE_SPAPR=y +diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak +index cca52665d9..fec354f327 100644 +--- a/default-configs/ppc64-softmmu.mak ++++ b/default-configs/ppc64-softmmu.mak +@@ -1,10 +1,12 @@ + # Default configuration for ppc64-softmmu + + # Include all 32-bit boards +-include ppc-softmmu.mak ++#include ppc-softmmu.mak # For PowerNV -CONFIG_POWERNV=y --CONFIG_IPMI=y --CONFIG_IPMI_LOCAL=y --CONFIG_IPMI_EXTERN=y --CONFIG_ISA_IPMI_BT=y +#CONFIG_POWERNV=y -+#CONFIG_IPMI=y -+#CONFIG_IPMI_LOCAL=y -+#CONFIG_IPMI_EXTERN=y -+#CONFIG_ISA_IPMI_BT=y # For pSeries - CONFIG_PSERIES=y -@@ -18,4 +32,4 @@ CONFIG_XICS_SPAPR=$(CONFIG_PSERIES) - CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM)) - CONFIG_MEM_DEVICE=y - CONFIG_DIMM=y --CONFIG_SPAPR_RNG=y -+#CONFIG_SPAPR_RNG=y +-CONFIG_PSERIES=y ++#CONFIG_PSERIES=y ++ ++include ppc64-rh-devices.mak +diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak +new file mode 100644 +index 0000000000..94ede1b5f6 +--- /dev/null ++++ b/default-configs/rh-virtio.mak +@@ -0,0 +1,10 @@ ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_GPU=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_INPUT_HOST=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_RNG=y ++CONFIG_VIRTIO_SCSI=y ++CONFIG_VIRTIO_SERIAL=y +diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak +new file mode 100644 +index 0000000000..c3c73fe752 +--- /dev/null ++++ b/default-configs/s390x-rh-devices.mak +@@ -0,0 +1,15 @@ ++include rh-virtio.mak ++ ++CONFIG_PCI=y ++CONFIG_S390_CCW_VIRTIO=y ++CONFIG_S390_FLIC=y ++CONFIG_S390_FLIC_KVM=y ++CONFIG_SCLPCONSOLE=y ++CONFIG_SCSI=y ++CONFIG_TERMINAL3270=y ++CONFIG_VFIO=y ++CONFIG_VFIO_AP=y ++CONFIG_VFIO_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_CCW=y ++CONFIG_WDT_DIAG288=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index 5eef375..49a59fc 100644 +index f2287a133f..3e2e388e91 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak -@@ -1,10 +1,11 @@ - CONFIG_PCI=y --CONFIG_VIRTIO_PCI=$(CONFIG_PCI) -+#CONFIG_VIRTIO_PCI=$(CONFIG_PCI) - include virtio.mak - CONFIG_SCLPCONSOLE=y - CONFIG_TERMINAL3270=y - CONFIG_S390_FLIC=y - CONFIG_S390_FLIC_KVM=$(CONFIG_KVM) --CONFIG_VFIO_CCW=$(CONFIG_LINUX) -+# Disabled for Red Hat Enterprise Linux: -+# CONFIG_VFIO_CCW=$(CONFIG_LINUX) - CONFIG_WDT_DIAG288=y - CONFIG_VFIO_AP=$(CONFIG_LINUX) -diff --git a/default-configs/sound.mak b/default-configs/sound.mak -index 4f22c34..1bead9b 100644 ---- a/default-configs/sound.mak -+++ b/default-configs/sound.mak -@@ -1,4 +1,4 @@ --CONFIG_SB16=y --CONFIG_ADLIB=y --CONFIG_GUS=y --CONFIG_CS4231A=y -+#CONFIG_SB16=y -+#CONFIG_ADLIB=y -+#CONFIG_GUS=y -+#CONFIG_CS4231A=y -diff --git a/default-configs/usb.mak b/default-configs/usb.mak -index e42cfea..cef6c0b 100644 ---- a/default-configs/usb.mak -+++ b/default-configs/usb.mak -@@ -1,11 +1,11 @@ - CONFIG_USB=y --CONFIG_USB_TABLET_WACOM=y -+#CONFIG_USB_TABLET_WACOM=y - CONFIG_USB_STORAGE_BOT=y --CONFIG_USB_STORAGE_UAS=y --CONFIG_USB_STORAGE_MTP=y -+#CONFIG_USB_STORAGE_UAS=y -+#CONFIG_USB_STORAGE_MTP=y - CONFIG_SCSI=y - CONFIG_USB_SMARTCARD=y --CONFIG_USB_AUDIO=y --CONFIG_USB_SERIAL=y --CONFIG_USB_NETWORK=y --CONFIG_USB_BLUETOOTH=y -+#CONFIG_USB_AUDIO=y -+#CONFIG_USB_SERIAL=y -+#CONFIG_USB_NETWORK=y -+#CONFIG_USB_BLUETOOTH=y -diff --git a/default-configs/virtio.mak b/default-configs/virtio.mak -index 1304849..b4f4743 100644 ---- a/default-configs/virtio.mak -+++ b/default-configs/virtio.mak -@@ -1,10 +1,10 @@ --CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) --CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) -+#CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) -+#CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) - CONFIG_VIRTIO=y - CONFIG_VIRTIO_9P=y - CONFIG_VIRTIO_BALLOON=y - CONFIG_VIRTIO_BLK=y --CONFIG_VIRTIO_CRYPTO=y -+#CONFIG_VIRTIO_CRYPTO=y - CONFIG_VIRTIO_GPU=y - CONFIG_VIRTIO_INPUT=y - CONFIG_VIRTIO_NET=y +@@ -10,4 +10,6 @@ + + # Boards: + # +-CONFIG_S390_CCW_VIRTIO=y ++#CONFIG_S390_CCW_VIRTIO=y ++ ++include s390x-rh-devices.mak +diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak +new file mode 100644 +index 0000000000..01b5363c79 +--- /dev/null ++++ b/default-configs/x86_64-rh-devices.mak +@@ -0,0 +1,93 @@ ++include rh-virtio.mak ++ ++CONFIG_AC97=y ++CONFIG_ACPI=y ++CONFIG_ACPI_CPU_HOTPLUG=y ++CONFIG_ACPI_MEMORY_HOTPLUG=y ++CONFIG_ACPI_NVDIMM=y ++CONFIG_ACPI_SMBUS=y ++CONFIG_ACPI_VMGENID=y ++CONFIG_ACPI_X86=y ++CONFIG_ACPI_X86_ICH=y ++CONFIG_AHCI=y ++CONFIG_APIC=y ++CONFIG_APM=y ++CONFIG_DIMM=y ++CONFIG_E1000E_PCI_EXPRESS=y ++CONFIG_E1000_PCI=y ++CONFIG_EDU=y ++CONFIG_FDC=y ++CONFIG_FW_CFG_DMA=y ++CONFIG_HDA=y ++CONFIG_HYPERV=y ++CONFIG_HYPERV_TESTDEV=y ++CONFIG_I2C=y ++CONFIG_I440FX=y ++CONFIG_I8254=y ++CONFIG_I8257=y ++CONFIG_I8259=y ++CONFIG_I82801B11=y ++CONFIG_IDE_CORE=y ++CONFIG_IDE_PCI=y ++CONFIG_IDE_PIIX=y ++CONFIG_IDE_QDEV=y ++CONFIG_IOAPIC=y ++CONFIG_IOH3420=y ++CONFIG_ISA_BUS=y ++CONFIG_ISA_DEBUG=y ++CONFIG_ISA_TESTDEV=y ++CONFIG_LPC_ICH9=y ++CONFIG_MC146818RTC=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_OPENGL=y ++CONFIG_PAM=y ++CONFIG_PC=y ++CONFIG_PCI=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PCI_EXPRESS_Q35=y ++CONFIG_PCI_PIIX=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCKBD=y ++CONFIG_PCSPK=y ++CONFIG_PC_ACPI=y ++CONFIG_PC_PCI=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PVPANIC=y ++CONFIG_PXB=y ++CONFIG_Q35=y ++CONFIG_QXL=y ++CONFIG_RTL8139_PCI=y ++CONFIG_SCSI=y ++CONFIG_SERIAL=y ++CONFIG_SERIAL_ISA=y ++CONFIG_SERIAL_PCI=y ++CONFIG_SEV=y ++CONFIG_SGA=y ++CONFIG_SMBIOS=y ++CONFIG_SMBUS_EEPROM=y ++CONFIG_SPICE=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_EHCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_BOT=y ++CONFIG_USB_UHCI=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_CIRRUS=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_VMMOUSE=y ++CONFIG_VMPORT=y ++CONFIG_VTD=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_WDT_IB700=y ++CONFIG_XIO3130=y +diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak +index 64b2ee2960..b5de7e5279 100644 +--- a/default-configs/x86_64-softmmu.mak ++++ b/default-configs/x86_64-softmmu.mak +@@ -1,3 +1,5 @@ + # Default configuration for x86_64-softmmu + +-include i386-softmmu.mak ++#include i386-softmmu.mak ++ ++include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index c5d8646..a4e87b8 100644 +index e53dfe1ee3..168a713eff 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -446,8 +446,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) @@ -391,10 +397,10 @@ index c5d8646..a4e87b8 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index 50c7b4a..149848e 100644 +index fa57c7c770..75cf31650c 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs -@@ -16,7 +16,7 @@ obj-$(CONFIG_STRONGARM) += collie.o +@@ -17,7 +17,7 @@ obj-$(CONFIG_STRONGARM) += collie.o obj-$(CONFIG_VERSATILE) += vexpress.o versatilepb.o obj-$(CONFIG_ZYNQ) += xilinx_zynq.o @@ -404,19 +410,35 @@ index 50c7b4a..149848e 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 6f19f12..56b7aeb 100644 +index 6f19f127a5..9ece2dbbd2 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -599,6 +599,7 @@ static void floppy_drive_class_init(ObjectClass *klass, void *data) - k->bus_type = TYPE_FLOPPY_BUS; - k->props = floppy_drive_properties; - k->desc = "virtual floppy drive"; -+ k->user_creatable = false; /* RH state preserve */ - } +@@ -42,6 +42,8 @@ + #include "qemu/log.h" + #include "trace.h" - static const TypeInfo floppy_drive_info = { ++#include "hw/boards.h" ++ + /********************************************************/ + /* debug Floppy devices */ + +@@ -2629,6 +2631,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, + int i, j; + static int command_tables_inited = 0; + ++ /* Restricted for Red Hat Enterprise Linux: */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!strstr(mc->name, "-rhel7.")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { + error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); + } diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs -index 867a7d2..e678e9e 100644 +index 867a7d2e8a..e678e9ee3c 100644 --- a/hw/bt/Makefile.objs +++ b/hw/bt/Makefile.objs @@ -1,3 +1,3 @@ @@ -426,7 +448,7 @@ index 867a7d2..e678e9e 100644 +#common-obj-y += hci-csr.o diff --git a/hw/char/serial-pci.c b/hw/char/serial-pci.c -index cb0d04c..d426982 100644 +index cb0d04c1d9..d426982df7 100644 --- a/hw/char/serial-pci.c +++ b/hw/char/serial-pci.c @@ -228,6 +228,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data) @@ -448,10 +470,10 @@ index cb0d04c..d426982 100644 static const TypeInfo serial_pci_info = { diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs -index a799c83..1c7ba0b 100644 +index a799c83815..0bcb4d50d0 100644 --- a/hw/core/Makefile.objs +++ b/hw/core/Makefile.objs -@@ -16,9 +16,11 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o +@@ -16,9 +16,10 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o common-obj-$(CONFIG_SOFTMMU) += loader.o common-obj-$(CONFIG_FITLOADER) += loader-fit.o common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o @@ -460,19 +482,29 @@ index a799c83..1c7ba0b 100644 -common-obj-$(CONFIG_SOFTMMU) += split-irq.o +# Disabled in Red Hat Enterprise Linux +#common-obj-$(CONFIG_SOFTMMU) += register.o -+#obj-$(CONFIG_SOFTMMU) += generic-loader.o +#common-obj-$(CONFIG_SOFTMMU) += or-irq.o +#common-obj-$(CONFIG_SOFTMMU) += split-irq.o common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o -common-obj-$(CONFIG_SOFTMMU) += generic-loader.o +#common-obj-$(CONFIG_SOFTMMU) += generic-loader.o common-obj-$(CONFIG_SOFTMMU) += null-machine.o +diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs +index 8db9e8a7b3..87d4bdc27d 100644 +--- a/hw/cpu/Makefile.objs ++++ b/hw/cpu/Makefile.objs +@@ -2,4 +2,5 @@ obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o + obj-$(CONFIG_REALVIEW) += realview_mpcore.o + obj-$(CONFIG_A9MPCORE) += a9mpcore.o + obj-$(CONFIG_A15MPCORE) += a15mpcore.o +-common-obj-y += core.o cluster.o ++common-obj-y += core.o ++# cluster.o diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index 97acd5b..e39b87c 100644 +index dbd453ab1b..c3cefab578 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs @@ -1,7 +1,8 @@ - common-obj-y += edid-generate.o + common-obj-$(CONFIG_EDID) += edid-generate.o edid-region.o -common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o -common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o @@ -483,7 +515,7 @@ index 97acd5b..e39b87c 100644 common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index a0e7146..058a8e0 100644 +index a0e71469f4..058a8e0f4a 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -2967,6 +2967,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) @@ -496,25 +528,11 @@ index a0e7146..058a8e0 100644 /* follow real hardware, cirrus card emulated has 4 MB video memory. Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index f095725..567439e 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1533,7 +1533,9 @@ static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl, bool no_vmport) - ISADevice *i8042, *port92, *vmmouse; - - serial_hds_isa_init(isa_bus, 0, MAX_ISA_SERIAL_PORTS); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS); -+#endif - - for (i = 0; i < MAX_FD; i++) { - fd[i] = drive_get(IF_FLOPPY, 0, i); diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index a3afe1f..6de12ca 100644 +index 885c16e938..d19c107f1e 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -253,7 +253,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -252,7 +252,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -524,7 +542,7 @@ index a3afe1f..6de12ca 100644 } static const TypeInfo piix3_ide_info = { -@@ -280,6 +281,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -279,6 +280,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -533,34 +551,21 @@ index a3afe1f..6de12ca 100644 } static const TypeInfo piix4_ide_info = { -diff --git a/hw/ide/via.c b/hw/ide/via.c -index 238f038..e4a5e6d 100644 ---- a/hw/ide/via.c -+++ b/hw/ide/via.c -@@ -216,6 +216,8 @@ static void via_ide_class_init(ObjectClass *klass, void *data) - k->revision = 0x06; - k->class_id = PCI_CLASS_STORAGE_IDE; - set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -+ /* Disabled for Red Hat Enterprise Linux: */ -+ dc->user_creatable = false; - } - - static const TypeInfo via_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index 07c8801..c27a0f8 100644 +index 47a606f5e3..562a9bc0a6 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -574,6 +574,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) - +@@ -568,6 +568,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->realize = i8042_realizefn; dc->vmsd = &vmstate_kbd_isa; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + /* Disabled for Red Hat Enterprise Linux: */ + dc->user_creatable = false; } static const TypeInfo i8042_info = { diff --git a/hw/isa/Makefile.objs b/hw/isa/Makefile.objs -index 9e106df..0828964 100644 +index 9e106df186..0828964014 100644 --- a/hw/isa/Makefile.objs +++ b/hw/isa/Makefile.objs @@ -1,5 +1,5 @@ @@ -571,7 +576,7 @@ index 9e106df..0828964 100644 common-obj-$(CONFIG_I82378) += i82378.o common-obj-$(CONFIG_PC87312) += pc87312.o diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs -index 680350b..ed543a6 100644 +index c71e07ae35..a5c3ff8617 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -9,7 +9,7 @@ common-obj-$(CONFIG_PCI_TESTDEV) += pci-testdev.o @@ -583,47 +588,11 @@ index 680350b..ed543a6 100644 common-obj-$(CONFIG_FW_CFG_DMA) += vmcoreinfo.o # ARM devices -diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c -index ecfd10a..8059563 100644 ---- a/hw/misc/ivshmem.c -+++ b/hw/misc/ivshmem.c -@@ -893,6 +893,13 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp) - return; - } - -+ /* Migration disabled for Red Hat Enterprise Linux: */ -+ if (s->master == ON_OFF_AUTO_ON) { -+ error_setg(errp, "master=on is not supported"); -+ return; -+ } -+ s->master = ON_OFF_AUTO_OFF; -+ - pci_conf = dev->config; - pci_conf[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY; - -@@ -1179,6 +1186,8 @@ static void ivshmem_doorbell_class_init(ObjectClass *klass, void *data) - k->realize = ivshmem_doorbell_realize; - dc->props = ivshmem_doorbell_properties; - dc->vmsd = &ivshmem_doorbell_vmsd; -+ /* Disabled for Red Hat Enterprise Linux: */ -+ dc->user_creatable = false; - } - - static const TypeInfo ivshmem_doorbell_info = { -@@ -1349,6 +1358,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data) - dc->desc = "Inter-VM shared memory (legacy)"; - dc->props = ivshmem_properties; - dc->vmsd = &ivshmem_vmsd; -+ /* Disabled for Red Hat Enterprise Linux: */ -+ dc->user_creatable = false; - } - - static const TypeInfo ivshmem_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 5e144cb..2e07880 100644 +index 9b39bccfb2..6ac19216df 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1778,6 +1778,7 @@ static const E1000Info e1000_devices[] = { +@@ -1798,6 +1798,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -631,7 +600,7 @@ index 5e144cb..2e07880 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1790,6 +1791,7 @@ static const E1000Info e1000_devices[] = { +@@ -1810,6 +1811,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -640,7 +609,7 @@ index 5e144cb..2e07880 100644 static void e1000_register_types(void) diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c -index d9c70f7..f294fbc 100644 +index d9c70f7ce6..f294fbce6a 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -801,6 +801,7 @@ static const TypeInfo i440fx_info = { @@ -670,12 +639,12 @@ index d9c70f7..f294fbc 100644 type_register_static(&piix3_info); type_register_static(&piix3_xen_info); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 2398ce6..63a7bb6 100644 +index f04e06cdf6..c664969b5b 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -382,10 +382,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { - .instance_size = sizeof(sPAPRCPUCore), - .class_size = sizeof(sPAPRCPUCoreClass), +@@ -385,10 +385,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { + .instance_size = sizeof(SpaprCpuCore), + .class_size = sizeof(SpaprCpuCoreClass), }, +#if 0 /* Disabled for Red Hat Enterprise Linux */ DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), @@ -687,10 +656,10 @@ index 2398ce6..63a7bb6 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c -index 25976ed..a793584 100644 +index 963373ba95..6771930154 100644 --- a/hw/usb/ccid-card-emulated.c +++ b/hw/usb/ccid-card-emulated.c -@@ -600,6 +600,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) +@@ -602,6 +602,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_INPUT, dc->categories); dc->desc = "emulated smartcard"; dc->props = emulated_card_properties; @@ -699,24 +668,11 @@ index 25976ed..a793584 100644 } static const TypeInfo emulated_card_info = { -diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs -index 8b3f664..7e5c0ad 100644 ---- a/hw/vfio/Makefile.objs -+++ b/hw/vfio/Makefile.objs -@@ -2,7 +2,7 @@ ifeq ($(CONFIG_LINUX), y) - obj-$(CONFIG_SOFTMMU) += common.o - obj-$(CONFIG_PCI) += pci.o pci-quirks.o display.o - obj-$(CONFIG_VFIO_CCW) += ccw.o --obj-$(CONFIG_SOFTMMU) += platform.o -+#obj-$(CONFIG_SOFTMMU) += platform.o - obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o - obj-$(CONFIG_VFIO_AMD_XGBE) += amd-xgbe.o - obj-$(CONFIG_SOFTMMU) += spapr.o diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index eae31c7..4a6e98e 100644 +index 40a12001f5..921d694e2f 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c -@@ -1387,6 +1387,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) +@@ -1385,6 +1385,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment"; dc->hotpluggable = false; @@ -725,7 +681,7 @@ index eae31c7..4a6e98e 100644 k->realize = vfio_pci_igd_lpc_bridge_realize; k->class_id = PCI_CLASS_BRIDGE_ISA; } -@@ -1580,6 +1582,9 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) +@@ -1578,6 +1580,9 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) 0, PCI_DEVFN(0x2, 0))) { return; } @@ -736,10 +692,10 @@ index eae31c7..4a6e98e 100644 /* * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 5c7bd96..6e0000c 100644 +index 504019c458..13badcd6ed 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -3262,6 +3262,7 @@ static const TypeInfo vfio_pci_dev_info = { +@@ -3269,6 +3269,7 @@ static const TypeInfo vfio_pci_dev_info = { }, }; @@ -747,7 +703,7 @@ index 5c7bd96..6e0000c 100644 static Property vfio_pci_dev_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), DEFINE_PROP_END_OF_LIST(), -@@ -3281,11 +3282,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { +@@ -3288,11 +3289,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_nohotplug_dev_class_init, }; @@ -763,51 +719,11 @@ index 5c7bd96..6e0000c 100644 } type_init(register_vfio_pci_dev_type) -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index a954799..9a987cb 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -2003,7 +2003,7 @@ static const TypeInfo virtio_blk_pci_info = { - .class_init = virtio_blk_pci_class_init, - }; - --#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) -+#if defined(CONFIG_VHOST_USER_BLK) - /* vhost-user-blk */ - - static Property vhost_user_blk_pci_properties[] = { -@@ -2183,7 +2183,7 @@ static const TypeInfo vhost_scsi_pci_info = { - }; - #endif - --#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) -+#if defined(CONFIG_VHOST_USER_BLK) - /* vhost-user-scsi-pci */ - static Property vhost_user_scsi_pci_properties[] = { - DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, -@@ -2707,7 +2707,7 @@ static void virtio_pci_register_types(void) - type_register_static(&virtio_9p_pci_info); - #endif - type_register_static(&virtio_blk_pci_info); --#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) -+#if defined(CONFIG_VHOST_USER_BLK) - type_register_static(&vhost_user_blk_pci_info); - #endif - type_register_static(&virtio_scsi_pci_info); -@@ -2717,7 +2717,7 @@ static void virtio_pci_register_types(void) - #ifdef CONFIG_VHOST_SCSI - type_register_static(&vhost_scsi_pci_info); - #endif --#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) -+#if defined(CONFIG_VHOST_USER_SCSI) - type_register_static(&vhost_user_scsi_pci_info); - #endif - #ifdef CONFIG_VHOST_VSOCK diff --git a/qemu-options.hx b/qemu-options.hx -index 269eda7..5e13aa9 100644 +index 08749a3391..0f55062546 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -1741,11 +1741,6 @@ ETEXI +@@ -1983,11 +1983,6 @@ ETEXI DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) @@ -819,7 +735,7 @@ index 269eda7..5e13aa9 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" -@@ -2749,6 +2744,7 @@ STEXI +@@ -2997,6 +2992,7 @@ STEXI ETEXI DEFHEADING() @@ -827,7 +743,7 @@ index 269eda7..5e13aa9 100644 DEFHEADING(Bluetooth(R) options:) STEXI @table @option -@@ -2827,6 +2823,7 @@ STEXI +@@ -3075,6 +3071,7 @@ STEXI @end table ETEXI DEFHEADING() @@ -836,17 +752,17 @@ index 269eda7..5e13aa9 100644 #ifdef CONFIG_TPM DEFHEADING(TPM device options:) diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs -index 5dd0aee..f27250e 100644 +index 269dfa5832..205ebe21db 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs -@@ -44,3 +44,4 @@ stub-obj-y += xen-hvm.o +@@ -39,3 +39,4 @@ stub-obj-y += xen-hvm.o stub-obj-y += pci-host-piix.o stub-obj-y += ram-block.o stub-obj-y += ramfb.o +stub-obj-y += ide-isa.o diff --git a/stubs/ide-isa.c b/stubs/ide-isa.c new file mode 100644 -index 0000000..9fd50ef +index 0000000000..9fd50ef691 --- /dev/null +++ b/stubs/ide-isa.c @@ -0,0 +1,13 @@ @@ -864,10 +780,10 @@ index 0000000..9fd50ef + abort(); +} diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 60411f6..d2ac5bb 100644 +index 4155782197..2a19b96a92 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2149,7 +2149,9 @@ static void arm_cpu_register_types(void) +@@ -2270,7 +2270,9 @@ static void arm_cpu_register_types(void) type_register_static(&idau_interface_type_info); while (info->name) { @@ -879,10 +795,10 @@ index 60411f6..d2ac5bb 100644 } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index f81d35e..e9b9183 100644 +index d6bb57d210..6616303782 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1443,14 +1443,14 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1480,14 +1480,14 @@ static X86CPUDefinition builtin_x86_defs[] = { .family = 6, .model = 6, .stepping = 3, @@ -905,10 +821,11 @@ index f81d35e..e9b9183 100644 .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, -@@ -1680,6 +1680,25 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1716,6 +1716,25 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x80000008, .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", }, - { ++ { + .name = "cpu64-rhel6", + .level = 4, + .vendor = CPUID_VENDOR_AMD, @@ -927,12 +844,11 @@ index f81d35e..e9b9183 100644 + .xlevel = 0x8000000A, + .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", + }, -+ { + { .name = "Conroe", .level = 10, - .vendor = CPUID_VENDOR_INTEL, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 7c75963..7f179ff 100644 +index 7c75963e3c..7f179ff65e 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -65,6 +65,7 @@ @@ -1006,10 +922,10 @@ index 7c75963..7f179ff 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 7c253ff..e73f812 100644 +index eb125d4d0d..2ed9c1bd2c 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c -@@ -380,6 +380,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -406,6 +406,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -1020,10 +936,10 @@ index 7c253ff..e73f812 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 2ebf26a..3a0337a 100644 +index 19530fb94e..57e15404ff 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2342,6 +2342,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2340,6 +2340,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -1039,10 +955,10 @@ index 2ebf26a..3a0337a 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/vl.c b/vl.c -index a5ae5f2..22bd99c 100644 +index c696ad2a13..3ada215270 100644 --- a/vl.c +++ b/vl.c -@@ -172,7 +172,7 @@ unsigned int max_cpus; +@@ -167,7 +167,7 @@ unsigned int max_cpus; int smp_cores = 1; int smp_threads = 1; int acpi_enabled = 1; @@ -1051,7 +967,7 @@ index a5ae5f2..22bd99c 100644 int fd_bootchk = 1; static int no_reboot; int no_shutdown = 0; -@@ -937,6 +937,7 @@ static void configure_rtc(QemuOpts *opts) +@@ -933,6 +933,7 @@ static void configure_rtc(QemuOpts *opts) } } @@ -1059,7 +975,7 @@ index a5ae5f2..22bd99c 100644 /***********************************************************/ /* Bluetooth support */ static int nb_hcis; -@@ -1058,6 +1059,7 @@ static int bt_parse(const char *opt) +@@ -1054,6 +1055,7 @@ static int bt_parse(const char *opt) error_report("bad bluetooth parameter '%s'", opt); return 1; } @@ -1067,7 +983,7 @@ index a5ae5f2..22bd99c 100644 static int parse_name(void *opaque, QemuOpts *opts, Error **errp) { -@@ -3273,6 +3275,7 @@ int main(int argc, char **argv, char **envp) +@@ -3279,6 +3281,7 @@ int main(int argc, char **argv, char **envp) } break; #endif @@ -1075,15 +991,15 @@ index a5ae5f2..22bd99c 100644 case QEMU_OPTION_bt: warn_report("The bluetooth subsystem is deprecated and will " "be removed soon. If the bluetooth subsystem is " -@@ -3280,6 +3283,7 @@ int main(int argc, char **argv, char **envp) +@@ -3286,6 +3289,7 @@ int main(int argc, char **argv, char **envp) "qemu-devel@nongnu.org with your usecase."); add_device_config(DEV_BT, optarg); break; +#endif case QEMU_OPTION_audio_help: - AUD_help (); + audio_legacy_help(); exit (0); -@@ -4417,9 +4421,11 @@ int main(int argc, char **argv, char **envp) +@@ -4409,9 +4413,11 @@ int main(int argc, char **argv, char **envp) tpm_init(); @@ -1096,5 +1012,5 @@ index a5ae5f2..22bd99c 100644 if (!xen_enabled()) { /* On 32-bit hosts, QEMU is limited by virtual address space */ -- -1.8.3.1 +2.20.1 diff --git a/0006-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch index 66f2920..18e5181 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,6 +1,6 @@ -From 01e49fb467fd831c62f3640e546e313298a7c5c0 Mon Sep 17 00:00:00 2001 +From 4f20f7503073886c51e82fbbdfe78d6c79ea5df7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Fri, 19 Oct 2018 12:36:59 +0200 +Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes This patch is first part of original "Add RHEL machine types" patch we @@ -8,31 +8,40 @@ split to allow easier review. It contains changes not related to any architecture. Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove e1000 device duplication changes to reflect upstream solution +- Rewrite machine compat properties to upstream solution + +Merged patches (4.0.0): +- d4c0957 compat: Generic HW_COMPAT_RHEL7_6 +- cbac773 virtio: Make disable-legacy/disable-modern compat properties optional + +Signed-off-by: Danilo C. L. de Paula --- - hw/acpi/ich9.c | 16 +++ - hw/acpi/piix4.c | 6 +- - hw/char/serial.c | 16 +++ - hw/display/vga-isa.c | 2 +- - hw/net/e1000.c | 18 ++- - hw/net/e1000e.c | 21 ++++ - hw/net/rtl8139.c | 4 +- - hw/smbios/smbios.c | 1 + - hw/timer/i8254_common.c | 2 +- - hw/timer/mc146818rtc.c | 6 + - hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 ++++ - hw/usb/hcd-xhci.h | 2 + - include/hw/acpi/ich9.h | 3 + - include/hw/compat.h | 229 ++++++++++++++++++++++++++++++++++++++ - include/hw/usb.h | 4 + - migration/migration.c | 2 + - migration/migration.h | 5 + - qdev-monitor.c | 1 - - scripts/vmstate-static-checker.py | 1 - - 20 files changed, 353 insertions(+), 10 deletions(-) + hw/acpi/ich9.c | 16 +++ + hw/acpi/piix4.c | 6 +- + hw/char/serial.c | 16 +++ + hw/core/machine.c | 268 ++++++++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/net/e1000.c | 10 ++ + hw/net/e1000e.c | 21 ++++ + hw/net/rtl8139.c | 4 +- + hw/smbios/smbios.c | 1 + + hw/timer/i8254_common.c | 2 +- + hw/timer/mc146818rtc.c | 6 + + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci.c | 20 +++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/boards.h | 18 +++ + include/hw/usb.h | 4 + + migration/migration.c | 2 + + migration/migration.h | 5 + + 19 files changed, 403 insertions(+), 7 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index a4e87b8..23a7baa 100644 +index 168a713eff..0a6346f1cf 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -441,6 +441,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -66,10 +75,10 @@ index a4e87b8..23a7baa 100644 ich9_pm_get_disable_s3, ich9_pm_set_disable_s3, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index e330f24..b213f65 100644 +index 9c079d6834..d742777134 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -310,7 +310,7 @@ static const VMStateDescription vmstate_cpuhp_state = { +@@ -315,7 +315,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -78,7 +87,7 @@ index e330f24..b213f65 100644 .minimum_version_id_old = 1, .load_state_old = acpi_load_old, .post_load = vmstate_acpi_post_load, -@@ -670,8 +670,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -684,8 +684,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -90,7 +99,7 @@ index e330f24..b213f65 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), diff --git a/hw/char/serial.c b/hw/char/serial.c -index 02463e3..a591387 100644 +index 7c42a2abfc..ae63cc0104 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -30,6 +30,7 @@ @@ -144,8 +153,287 @@ index 02463e3..a591387 100644 return s->poll_msl >= 0; } +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 743fef2898..fd1594d1ad 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -24,6 +24,274 @@ + #include "hw/pci/pci.h" + #include "hw/mem/nvdimm.h" + ++/* Mostly like hw_compat_2_1 but: ++ * * we don't need virtio-scsi-pci since 7.0 already had that on ++ * * ++ * * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * * machine type, but was accidentally turned off in 7.2 onwards. ++ * * ++ * */ ++GlobalProperty hw_compat_rhel_7_1[] = { ++ { /* COMPAT_RHEL7.1 */ ++ .driver = "intel-hda-generic", ++ .property = "old_msi_addr", ++ .value = "on", ++ },{ ++ .driver = "VGA", ++ .property = "qemu-extended-regs", ++ .value = "off", ++ },{ ++ .driver = "secondary-vga", ++ .property = "qemu-extended-regs", ++ .value = "off", ++ },{ ++ .driver = "usb-mouse", ++ .property = "usb_version", ++ .value = stringify(1), ++ },{ ++ .driver = "usb-kbd", ++ .property = "usb_version", ++ .value = stringify(1), ++ },{ ++ .driver = "virtio-pci", ++ .property = "virtio-pci-bus-master-bug-migration", ++ .value = "on", ++ },{ ++ .driver = "virtio-blk-pci", ++ .property = "any_layout", ++ .value = "off", ++ },{ ++ .driver = "virtio-serial-pci", ++ .property = "any_layout", ++ .value = "off", ++ },{ ++ .driver = "virtio-9p-pci", ++ .property = "any_layout", ++ .value = "off", ++ },{ ++ .driver = "virtio-rng-pci", ++ .property = "any_layout", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ ++ .driver = "migration", ++ .property = "send-configuration", ++ .value = "off", ++ }, ++}; ++const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); ++ ++/* Mostly like hw_compat_2_4 + 2_3 but: ++ * * we don't need "any_layout" as it has been backported to 7.2 ++ * */ ++ ++GlobalProperty hw_compat_rhel_7_2[] = { ++ { ++ .driver = "virtio-blk-device", ++ .property = "scsi", ++ .value = "true", ++ },{ ++ .driver = "e1000-82540em", ++ .property = "extra_mac_registers", ++ .value = "off", ++ },{ ++ .driver = "virtio-pci", ++ .property = "x-disable-pcie", ++ .value = "on", ++ },{ ++ .driver = "virtio-pci", ++ .property = "migrate-extra", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_2 */ ++ .driver = "fw_cfg_mem", ++ .property = "dma_enabled", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_2 */ ++ .driver = "fw_cfg_io", ++ .property = "dma_enabled", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_2 */ ++ .driver = "isa-fdc", ++ .property = "fallback", ++ .value = "144", ++ },{ /* HW_COMPAT_RHEL7_2 */ ++ .driver = "virtio-pci", ++ .property = "disable-modern", ++ .value = "on", ++ .optional = true, ++ },{ /* HW_COMPAT_RHEL7_2 */ ++ .driver = "virtio-pci", ++ .property = "disable-legacy", ++ .value = "off", ++ .optional = true, ++ },{ /* HW_COMPAT_RHEL7_2 */ ++ .driver = TYPE_PCI_DEVICE, ++ .property = "x-pcie-lnksta-dllla", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_2 */ ++ .driver = "virtio-pci", ++ .property = "page-per-vq", ++ .value = "on", ++ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ ++ .driver = "migration", ++ .property = "send-section-footer", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ ++ .driver = "migration", ++ .property = "store-global-state", ++ .value = "off", ++ }, ++}; ++const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); ++ ++/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except ++ * * disable-modern, disable-legacy, page-per-vq have already been ++ * * backported to RHEL7.3 ++ * */ ++GlobalProperty hw_compat_rhel_7_3[] = { ++ { /* HW_COMPAT_RHEL7_3 */ ++ .driver = "virtio-mmio", ++ .property = "format_transport_address", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "virtio-serial-device", ++ .property = "emergency-write", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "ioapic", ++ .property = "version", ++ .value = "0x11", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "intel-iommu", ++ .property = "x-buggy-eim", ++ .value = "true", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "virtio-pci", ++ .property = "x-ignore-backend-features", ++ .value = "on", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "fw_cfg_mem", ++ .property = "x-file-slots", ++ .value = stringify(0x10), ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "fw_cfg_io", ++ .property = "x-file-slots", ++ .value = stringify(0x10), ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "pflash_cfi01", ++ .property = "old-multiple-chip-handling", ++ .value = "on", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = TYPE_PCI_DEVICE, ++ .property = "x-pcie-extcap-init", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "virtio-pci", ++ .property = "x-pcie-deverr-init", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "virtio-pci", ++ .property = "x-pcie-lnkctl-init", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "virtio-pci", ++ .property = "x-pcie-pm-init", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "virtio-net-device", ++ .property = "x-mtu-bypass-backend", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_3 */ ++ .driver = "e1000e", ++ .property = "__redhat_e1000e_7_3_intr_state", ++ .value = "on", ++ }, ++}; ++const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); ++ ++/* Mostly like hw_compat_2_9 except ++ * * x-mtu-bypass-backend, x-migrate-msix has already been ++ * * backported to RHEL7.4. shpc was already on in 7.4. ++ * */ ++GlobalProperty hw_compat_rhel_7_4[] = { ++ { /* HW_COMPAT_RHEL7_4 */ ++ .driver = "intel-iommu", ++ .property = "pt", ++ .value = "off", ++ }, ++}; ++const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); ++ ++/* The same as hw_compat_2_11 + hw_compat_2_10 */ ++GlobalProperty hw_compat_rhel_7_5[] = { ++ { /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ ++ .driver = "hpet", ++ .property = "hpet-offset-saved", ++ .value = "false", ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ ++ .driver = "virtio-blk-pci", ++ .property = "vectors", ++ .value = "2", ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ ++ .driver = "vhost-user-blk-pci", ++ .property = "vectors", ++ .value = "2", ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 but ++ bz 1608778 modified for our naming */ ++ .driver = "e1000-82540em", ++ .property = "migrate_tso_props", ++ .value = "off", ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ ++ .driver = "virtio-mouse-device", ++ .property = "wheel-axis", ++ .value = "false", ++ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ ++ .driver = "virtio-tablet-device", ++ .property = "wheel-axis", ++ .value = "false", ++ },{ /* HW_COMPAT_RHEL7_5 */ ++ .driver = "cirrus-vga", ++ .property = "vgamem_mb", ++ .value = "16", ++ },{ /* HW_COMPAT_RHEL7_5 */ ++ .driver = "migration", ++ .property = "decompress-error-check", ++ .value = "off", ++ }, ++}; ++const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); ++ ++/* The same as hw_compat_3_0 + hw_compat_2_12 ++ * * except that ++ * * there's nothing in 3_0 ++ * * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ * * ++ * */ ++GlobalProperty hw_compat_rhel_7_6[] = { ++ { /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ ++ .driver = "hda-audio", ++ .property = "use-timer", ++ .value = "false", ++ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ ++ .driver = "cirrus-vga", ++ .property = "global-vmstate", ++ .value = "true", ++ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ ++ .driver = "VGA", ++ .property = "global-vmstate", ++ .value = "true", ++ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ ++ .driver = "vmware-svga", ++ .property = "global-vmstate", ++ .value = "true", ++ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ ++ .driver = "qxl-vga", ++ .property = "global-vmstate", ++ .value = "true", ++ }, ++}; ++const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); ++ ++ + GlobalProperty hw_compat_3_1[] = { + { "pcie-root-port", "x-speed", "2_5" }, + { "pcie-root-port", "x-width", "1" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index fa44242..7835c83 100644 +index fa44242e0d..7835c8380a 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -80,7 +80,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) @@ -158,10 +446,10 @@ index fa44242..7835c83 100644 }; diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 2e07880..e886e7c 100644 +index 6ac19216df..a4de04ab89 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1673,6 +1673,16 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) +@@ -1691,6 +1691,16 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) pci_conf = pci_dev->config; @@ -178,37 +466,8 @@ index 2e07880..e886e7c 100644 /* TODO: RST# value should be 0, PCI spec 6.2.4 */ pci_conf[PCI_CACHE_LINE_SIZE] = 0x10; -@@ -1773,7 +1783,7 @@ static const TypeInfo e1000_base_info = { - - static const E1000Info e1000_devices[] = { - { -- .name = "e1000", -+ .name = "e1000-82540em", - .device_id = E1000_DEV_ID_82540EM, - .revision = 0x03, - .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, -@@ -1794,6 +1804,11 @@ static const E1000Info e1000_devices[] = { - #endif - }; - -+static const TypeInfo e1000_default_info = { -+ .name = "e1000", -+ .parent = "e1000-82540em", -+}; -+ - static void e1000_register_types(void) - { - int i; -@@ -1811,6 +1826,7 @@ static void e1000_register_types(void) - - type_register(&type_info); - } -+ type_register_static(&e1000_default_info); - } - - type_init(e1000_register_types) diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index 510ddb3..f1de9e5 100644 +index 510ddb3897..f1de9e5058 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -75,6 +75,11 @@ typedef struct E1000EState { @@ -282,7 +541,7 @@ index 510ddb3..f1de9e5 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 2342a09..0c916b7 100644 +index 2342a095e3..0c916b7a21 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3174,7 +3174,7 @@ static int rtl8139_pre_save(void *opaque) @@ -305,7 +564,7 @@ index 2342a09..0c916b7 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 9209394..43cf057 100644 +index 47be9071fa..3ea04e621e 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -775,6 +775,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, @@ -317,7 +576,7 @@ index 9209394..43cf057 100644 SMBIOS_SET_DEFAULT(type2.product, product); SMBIOS_SET_DEFAULT(type2.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 6190b6f..ad2ad2d 100644 +index 6190b6fc5d..ad2ad2d09f 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -268,7 +268,7 @@ static const VMStateDescription vmstate_pit_common = { @@ -330,11 +589,11 @@ index 6190b6f..ad2ad2d 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c -index e4e4de8..3eced9c 100644 +index 513f105e62..10a3d44be1 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -35,6 +35,7 @@ - #include "qapi/qapi-events-misc.h" + #include "qapi/qapi-events-target.h" #include "qapi/visitor.h" #include "exec/address-spaces.h" +#include "migration/migration.h" @@ -354,10 +613,10 @@ index e4e4de8..3eced9c 100644 } diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 836b11f..9d7b9df 100644 +index 09df29ff9c..95542290ff 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c -@@ -1214,12 +1214,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) +@@ -1216,12 +1216,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) UHCIState *s = UHCI(dev); uint8_t *pci_conf = s->dev.config; int i; @@ -374,10 +633,10 @@ index 836b11f..9d7b9df 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 8f1a01a..ca19474 100644 +index ec28bee319..ad351a7b6d 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3560,9 +3560,27 @@ static const VMStateDescription vmstate_xhci_slot = { +@@ -3580,9 +3580,27 @@ static const VMStateDescription vmstate_xhci_slot = { } }; @@ -405,7 +664,7 @@ index 8f1a01a..ca19474 100644 .fields = (VMStateField[]) { VMSTATE_UINT32(type, XHCIEvent), VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3571,6 +3589,8 @@ static const VMStateDescription vmstate_xhci_event = { +@@ -3591,6 +3609,8 @@ static const VMStateDescription vmstate_xhci_event = { VMSTATE_UINT32(flags, XHCIEvent), VMSTATE_UINT8(slotid, XHCIEvent), VMSTATE_UINT8(epid, XHCIEvent), @@ -415,10 +674,10 @@ index 8f1a01a..ca19474 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index fc36a4c..89d4cf7 100644 +index 240caa4e51..11bd05dfd5 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h -@@ -153,6 +153,8 @@ typedef struct XHCIEvent { +@@ -154,6 +154,8 @@ typedef struct XHCIEvent { uint32_t flags; uint8_t slotid; uint8_t epid; @@ -428,7 +687,7 @@ index fc36a4c..89d4cf7 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 59aeb06..7b5cc25 100644 +index 41568d1837..1a23ccc412 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -441,249 +700,38 @@ index 59aeb06..7b5cc25 100644 } ICH9LPCPMRegs; #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" -diff --git a/include/hw/compat.h b/include/hw/compat.h -index 6f4d5fc..f08cc7c 100644 ---- a/include/hw/compat.h -+++ b/include/hw/compat.h -@@ -285,4 +285,233 @@ - .value = "on",\ - }, +diff --git a/include/hw/boards.h b/include/hw/boards.h +index e231860666..456e4a944c 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -335,4 +335,22 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; -+/* Mostly like HW_COMPAT_2_1 but: -+ * we don't need virtio-scsi-pci since 7.0 already had that on -+ * -+ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 -+ * machine type, but was accidentally turned off in 7.2 onwards. -+ * -+ */ -+#define HW_COMPAT_RHEL7_1 \ -+ { /* COMPAT_RHEL7.1 */ \ -+ .driver = "intel-hda-generic",\ -+ .property = "old_msi_addr",\ -+ .value = "on",\ -+ },{\ -+ .driver = "VGA",\ -+ .property = "qemu-extended-regs",\ -+ .value = "off",\ -+ },{\ -+ .driver = "secondary-vga",\ -+ .property = "qemu-extended-regs",\ -+ .value = "off",\ -+ },{\ -+ .driver = "usb-mouse",\ -+ .property = "usb_version",\ -+ .value = stringify(1),\ -+ },{\ -+ .driver = "usb-kbd",\ -+ .property = "usb_version",\ -+ .value = stringify(1),\ -+ },{\ -+ .driver = "virtio-pci",\ -+ .property = "virtio-pci-bus-master-bug-migration",\ -+ .value = "on",\ -+ },{\ -+ .driver = "virtio-blk-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-balloon-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-serial-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-9p-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-rng-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ \ -+ .driver = "migration",\ -+ .property = "send-configuration",\ -+ .value = "off",\ -+ }, ++extern GlobalProperty hw_compat_rhel_7_6[]; ++extern const size_t hw_compat_rhel_7_6_len; + -+/* Mostly like HW_COMPAT_2_4 + 2_3 but: -+ * we don't need "any_layout" as it has been backported to 7.2 -+ */ ++extern GlobalProperty hw_compat_rhel_7_5[]; ++extern const size_t hw_compat_rhel_7_5_len; + -+#define HW_COMPAT_RHEL7_2 \ -+ {\ -+ .driver = "virtio-blk-device",\ -+ .property = "scsi",\ -+ .value = "true",\ -+ },{\ -+ .driver = "e1000-82540em",\ -+ .property = "extra_mac_registers",\ -+ .value = "off",\ -+ },{\ -+ .driver = "virtio-pci",\ -+ .property = "x-disable-pcie",\ -+ .value = "on",\ -+ },{\ -+ .driver = "virtio-pci",\ -+ .property = "migrate-extra",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "fw_cfg_mem",\ -+ .property = "dma_enabled",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "fw_cfg_io",\ -+ .property = "dma_enabled",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "isa-fdc",\ -+ .property = "fallback",\ -+ .value = "144",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "virtio-pci",\ -+ .property = "disable-modern",\ -+ .value = "on",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "virtio-pci",\ -+ .property = "disable-legacy",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = TYPE_PCI_DEVICE,\ -+ .property = "x-pcie-lnksta-dllla",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 */ \ -+ .driver = "virtio-pci",\ -+ .property = "page-per-vq",\ -+ .value = "on",\ -+ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ \ -+ .driver = "migration",\ -+ .property = "send-section-footer",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ \ -+ .driver = "migration",\ -+ .property = "store-global-state",\ -+ .value = "off",\ -+ }, ++extern GlobalProperty hw_compat_rhel_7_4[]; ++extern const size_t hw_compat_rhel_7_4_len; + -+/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except -+ * disable-modern, disable-legacy, page-per-vq have already been -+ * backported to RHEL7.3 -+ */ -+#define HW_COMPAT_RHEL7_3 \ -+ { /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-mmio",\ -+ .property = "format_transport_address",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-serial-device",\ -+ .property = "emergency-write",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "ioapic",\ -+ .property = "version",\ -+ .value = "0x11",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "intel-iommu",\ -+ .property = "x-buggy-eim",\ -+ .value = "true",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-pci",\ -+ .property = "x-ignore-backend-features",\ -+ .value = "on",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "fw_cfg_mem",\ -+ .property = "x-file-slots",\ -+ .value = stringify(0x10),\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "fw_cfg_io",\ -+ .property = "x-file-slots",\ -+ .value = stringify(0x10),\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "pflash_cfi01",\ -+ .property = "old-multiple-chip-handling",\ -+ .value = "on",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = TYPE_PCI_DEVICE,\ -+ .property = "x-pcie-extcap-init",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-pci",\ -+ .property = "x-pcie-deverr-init",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-pci",\ -+ .property = "x-pcie-lnkctl-init",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-pci",\ -+ .property = "x-pcie-pm-init",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "virtio-net-device",\ -+ .property = "x-mtu-bypass-backend",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_3 */ \ -+ .driver = "e1000e",\ -+ .property = "__redhat_e1000e_7_3_intr_state",\ -+ .value = "on",\ -+ }, ++extern GlobalProperty hw_compat_rhel_7_3[]; ++extern const size_t hw_compat_rhel_7_3_len; + -+/* Mostly like HW_COMPAT_2_9 except -+ * x-mtu-bypass-backend, x-migrate-msix has already been -+ * backported to RHEL7.4. shpc was already on in 7.4. -+ */ -+#define HW_COMPAT_RHEL7_4 \ -+ { /* HW_COMPAT_RHEL7_4 */ \ -+ .driver = "intel-iommu",\ -+ .property = "pt",\ -+ .value = "off",\ -+ }, ++extern GlobalProperty hw_compat_rhel_7_2[]; ++extern const size_t hw_compat_rhel_7_2_len; + -+/* The same as HW_COMPAT_2_11 + HW_COMPAT_2_10 */ -+#define HW_COMPAT_RHEL7_5 \ -+ { /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ -+ .driver = "hpet",\ -+ .property = "hpet-offset-saved",\ -+ .value = "false",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ -+ .driver = "virtio-blk-pci",\ -+ .property = "vectors",\ -+ .value = "2",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ \ -+ .driver = "vhost-user-blk-pci",\ -+ .property = "vectors",\ -+ .value = "2",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 but \ -+ bz 1608778 modified for our naming */ \ -+ .driver = "e1000-82540em",\ -+ .property = "migrate_tso_props",\ -+ .value = "off",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ \ -+ .driver = "virtio-mouse-device",\ -+ .property = "wheel-axis",\ -+ .value = "false",\ -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ \ -+ .driver = "virtio-tablet-device",\ -+ .property = "wheel-axis",\ -+ .value = "false",\ -+ },{ /* HW_COMPAT_RHEL7_5 */ \ -+ .driver = "cirrus-vga",\ -+ .property = "vgamem_mb",\ -+ .value = "16",\ -+ },{ /* HW_COMPAT_RHEL7_5 */ \ -+ .driver = "migration",\ -+ .property = "decompress-error-check",\ -+ .value = "off",\ -+ }, ++extern GlobalProperty hw_compat_rhel_7_1[]; ++extern const size_t hw_compat_rhel_7_1_len; + -+ - #endif /* HW_COMPAT_H */ + #endif diff --git a/include/hw/usb.h b/include/hw/usb.h -index a5080ad..b943ec9 100644 +index c21f41c8a9..71502b0bad 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h -@@ -606,4 +606,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, +@@ -604,4 +604,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, uint8_t interface_class, uint8_t interface_subclass, uint8_t interface_protocol); @@ -693,10 +741,10 @@ index a5080ad..b943ec9 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 49ffb99..0d9cb7a 100644 +index 609e0df5d0..a160172a0c 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -105,6 +105,8 @@ enum mig_rp_message_type { +@@ -116,6 +116,8 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; @@ -706,13 +754,13 @@ index 49ffb99..0d9cb7a 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index e413d4d..795238c 100644 +index 438f17edad..c793dcc50b 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -292,6 +292,11 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); +@@ -307,6 +307,11 @@ void init_dirty_bitmap_incoming_migration(void); + void migrate_add_address(SocketAddress *address); - void dirty_bitmap_mig_before_vm_start(void); - void init_dirty_bitmap_incoming_migration(void); + int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); +/* + * Disables a load of subsections that were added in 2.2/rh7.2 for backwards + * migration compatibility. @@ -720,31 +768,7 @@ index e413d4d..795238c 100644 +extern bool migrate_pre_2_2; #define qemu_ram_foreach_block \ - #warning "Use qemu_ram_foreach_block_migratable in migration code" -diff --git a/qdev-monitor.c b/qdev-monitor.c -index 07147c6..47ea051 100644 ---- a/qdev-monitor.c -+++ b/qdev-monitor.c -@@ -47,7 +47,6 @@ typedef struct QDevAlias - - /* Please keep this table sorted by typename. */ - static const QDevAlias qdev_alias_table[] = { -- { "e1000", "e1000-82540em" }, - { "ich9-ahci", "ahci" }, - { "lsi53c895a", "lsi" }, - { "virtio-9p-ccw", "virtio-9p", QEMU_ARCH_S390X }, -diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py -index d346728..4bca2bf 100755 ---- a/scripts/vmstate-static-checker.py -+++ b/scripts/vmstate-static-checker.py -@@ -105,7 +105,6 @@ def get_changed_sec_name(sec): - # Section names can change -- see commit 292b1634 for an example. - changes = { - "ICH9 LPC": "ICH9-LPC", -- "e1000-82540em": "e1000", - } - - for item in changes: + #warning "Use foreach_not_ignored_block in migration code" -- -1.8.3.1 +2.20.1 diff --git a/0007-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch index 1c0ca0c..374c94c 100644 --- a/0007-Add-aarch64-machine-types.patch +++ b/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 68e46b1a3bc650bc35ccc49606c3f31e29165512 Mon Sep 17 00:00:00 2001 +From b2c73bd35f8c0fa536004d18275ffcfe63096622 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -6,24 +6,37 @@ Subject: Add aarch64 machine types Adding changes to add RHEL machine types for aarch64 architecture. Signed-off-by: Miroslav Rezanina + +Rebase changes (weekly-190111): +- Use upstream compat handling + +Rebase changes (weekly-190308): +- Removed a15memmap (upstream) + +Merged patches (weekly-190125): +- 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM +- 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 +- 4d20863 aarch64: Use 256MB ECAM region by default + +Signed-off-by: Danilo C. L. de Paula --- - hw/arm/virt.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 22 +++++++++ - 2 files changed, 147 insertions(+), 1 deletion(-) + hw/arm/virt.c | 140 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 11 ++++ + 2 files changed, 150 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index f69e7eb..84a86c1 100644 +index ce2664a30b..5602d9f6b0 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -60,6 +60,7 @@ - #include "standard-headers/linux/input.h" - #include "hw/arm/smmuv3.h" +@@ -62,6 +62,7 @@ + #include "hw/acpi/acpi.h" + #include "target/arm/internals.h" -+#if 0 /* disabled Red Hat Enterprise Linux */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -87,7 +88,36 @@ +@@ -88,7 +89,49 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -58,18 +71,31 @@ index f69e7eb..84a86c1 100644 + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) +#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) ++ ++/* This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ { ++ .driver = "virtio-net-pci", ++ .property = "romfile", ++ .value = "", ++ }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1578,6 +1608,7 @@ static void machvirt_init(MachineState *machine) +@@ -1658,6 +1701,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } -+#if 0 /* disabled for RHEL */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1606,6 +1637,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -1686,6 +1730,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -77,41 +103,31 @@ index f69e7eb..84a86c1 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1660,6 +1692,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) +@@ -1740,6 +1785,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) } } -+#if 0 ++#if 0 /* Disabled for Red Hat Enterprise Linux */ static char *virt_get_iommu(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1687,6 +1720,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) +@@ -1767,6 +1813,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) error_append_hint(errp, "Valid values are none, smmuv3.\n"); } } -+#endif ++#endif /* disabled for RHEL */ static CpuInstanceProperties virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) -@@ -1726,6 +1760,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +@@ -1806,6 +1853,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) return ms->possible_cpus; } -+#if 0 /* disabled for RHEL */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { -@@ -1890,6 +1925,9 @@ DEFINE_VIRT_MACHINE(3, 0) - #define VIRT_COMPAT_2_12 \ - HW_COMPAT_2_12 - -+#define VIRT_COMPAT_2_12 \ -+ HW_COMPAT_2_12 -+ - static void virt_2_12_instance_init(Object *obj) - { - virt_3_0_instance_init(obj); -@@ -2017,3 +2055,89 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2071,3 +2119,93 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -123,11 +139,11 @@ index f69e7eb..84a86c1 100644 + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; -+ /* Start max_cpus at the maximum QEMU supports. We'll further restrict -+ * it later in machvirt_init, where we have more information about the ++ /* Start with max_cpus set to 512, which is the maximum supported by KVM. ++ * The value may be reduced later when we have more information about the + * configuration of the particular instance. + */ -+ mc->max_cpus = 255; ++ mc->max_cpus = 512; + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; + mc->pci_allow_0_address = true; @@ -154,7 +170,7 @@ index f69e7eb..84a86c1 100644 +} +type_init(rhel_machine_init); + -+static void rhel760_virt_instance_init(Object *obj) ++static void rhel800_virt_instance_init(Object *obj) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); @@ -179,6 +195,8 @@ index f69e7eb..84a86c1 100644 + "Set GIC version. " + "Valid values are 2, 3 and host", NULL); + ++ vms->highmem_ecam = !vmc->no_highmem_ecam; ++ + if (vmc->no_its) { + vms->its = false; + } else { @@ -192,28 +210,30 @@ index f69e7eb..84a86c1 100644 + NULL); + } + -+ vms->memmap=a15memmap; ++ /* IOMMU is disabled by default and non-configurable for RHEL */ ++ vms->iommu = VIRT_IOMMU_NONE; ++ + vms->irqmap=a15irqmap; +} + -+static void rhel760_virt_options(MachineClass *mc) ++static void rhel800_virt_options(MachineClass *mc) +{ -+ SET_MACHINE_COMPAT(mc, ARM_RHEL_COMPAT); ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); +} -+DEFINE_RHEL_MACHINE_AS_LATEST(7, 6, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 4cc57a7..3237e97 100644 +index 507517c603..e49f872b1c 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -130,6 +130,7 @@ typedef struct { +@@ -136,6 +136,7 @@ typedef struct { - #define VIRT_ECAM_ID(high) (high ? VIRT_PCIE_ECAM_HIGH : VIRT_PCIE_ECAM) + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +#if 0 /* disabled for Red Hat Enterprise Linux */ #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -138,6 +139,27 @@ typedef struct { +@@ -144,6 +145,16 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) @@ -226,21 +246,10 @@ index 4cc57a7..3237e97 100644 +#define VIRT_MACHINE_CLASS(klass) \ + OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_RHEL_MACHINE) +#endif -+ -+/* This macro is for changes to properties that are RHEL specific, -+ * different to the current upstream and to be applied to the latest -+ * machine type. -+ */ -+#define ARM_RHEL_COMPAT \ -+ {\ -+ .driver = "virtio-net-pci",\ -+ .property = "romfile",\ -+ .value = "",\ -+ }, + void virt_acpi_setup(VirtMachineState *vms); /* Return the number of used redistributor regions */ -- -1.8.3.1 +2.20.1 diff --git a/0008-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch index d38031f..3463404 100644 --- a/0008-Add-ppc64-machine-types.patch +++ b/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 4f9094b11eb831317879d9c6108f6f706546fea5 Mon Sep 17 00:00:00 2001 +From 373a3f8f11227ba6bce10dab17ddfb6caffc75cf Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -6,8 +6,25 @@ Subject: Add ppc64 machine types Adding changes to add RHEL machine types for ppc64 architecture. Signed-off-by: Miroslav Rezanina + +Rebase changes (weekly-190111): +- remove instance options and use upstream solution +- Use upstream compat handling +- Replace SPAPR_PCI_2_7_MMIO_WIN_SIZE with value (changed upstream) + +Rebase changes (weekly-190104): +- re-add handling of instance_options (removed upstream) +- Use p8 as default for rhel machine types (p9 default upstream) + +Rebase changes (weekly-190315): +- sPAPRMachineClass renamed to SpaprMachineClass (upstream) + +Merged patches (weekly-190104): +- 467d59a redhat: define pseries-rhel8.0.0 machine type + +Signed-off-by: Danilo C. L. de Paula --- - hw/ppc/spapr.c | 252 ++++++++++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr.c | 252 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 1 + target/ppc/compat.c | 13 ++- @@ -15,75 +32,112 @@ Signed-off-by: Miroslav Rezanina 5 files changed, 279 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 7afd1a1..76a4e83 100644 +index b52b82d298..daa59203ea 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -3906,6 +3906,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; +@@ -4300,6 +4300,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) spapr_caps_add_properties(smc, &error_abort); smc->irq = &spapr_irq_xics; + smc->dr_phb_enabled = true; + smc->has_power9_support = true; } static const TypeInfo spapr_machine_info = { -@@ -3956,6 +3957,7 @@ static const TypeInfo spapr_machine_info = { +@@ -4344,6 +4345,7 @@ static const TypeInfo spapr_machine_info = { } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* - * pseries-3.1 + /* + * pseries-4.0 + */ +@@ -4499,6 +4501,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + /* + * pseries-2.7 */ -@@ -4169,6 +4171,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); - .property = "pre-2.8-migration", \ - .value = "on", \ - }, +#endif - static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, + static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4219,6 +4222,7 @@ static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index, +@@ -4549,6 +4552,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, */ } +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void spapr_machine_2_7_instance_options(MachineState *machine) + static void spapr_machine_2_7_class_options(MachineClass *mc) { - sPAPRMachineState *spapr = SPAPR_MACHINE(machine); -@@ -4378,6 +4382,254 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) - SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_1); + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4663,6 +4667,254 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); +#endif + +/* -+ * pseries-rhel7.6.0 ++ * pseries-rhel8.0.0 + */ + -+static void spapr_machine_rhel760_instance_options(MachineState *machine) -+{ -+} -+ -+static void spapr_machine_rhel760_class_options(MachineClass *mc) ++static void spapr_machine_rhel800_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + -+DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", true); ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", true); ++ ++/* ++ * pseries-rhel7.6.0 ++ * like spapr_compat_2_12 and spapr_compat_3_0 ++ * spapr_compat_0 is empty ++ */ ++GlobalProperty spapr_compat_rhel7_6[] = { ++ { ++ .driver = TYPE_POWERPC_CPU, ++ .property = "pre-3.0-migration", ++ .value = "on", ++ }, ++ { ++ .driver = TYPE_SPAPR_CPU_CORE, ++ .property = "pre-3.0-migration", ++ .value = "on", ++ }, ++}; ++const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6); ++ ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel800_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_6, spapr_compat_rhel7_6_len); ++ ++ /* from spapr_machine_3_0_class_options() */ ++ smc->legacy_irq_allocation = true; ++ smc->irq = &spapr_irq_xics_legacy; ++ ++ /* from spapr_machine_2_12_class_options() */ ++ /* We depend on kvm_enabled() to choose a default value for the ++ * hpt-max-page-size capability. Of course we can't do it here ++ * because this is too early and the HW accelerator isn't initialzed ++ * yet. Postpone this to machine init (see default_caps_with_cpu()). ++ */ ++ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; ++ /* Defaults for the latest behaviour inherited from the base class */ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); + +/* + * pseries-rhel7.6.0-sxxm + * + * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default + */ -+static void spapr_machine_rhel760sxxm_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel760_instance_options(machine); -+} + +static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) +{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_rhel760_class_options(mc); + smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; @@ -93,24 +147,11 @@ index 7afd1a1..76a4e83 100644 + +DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); + -+/* -+ * pseries-rhel7.5.0 -+ * like SPAPR_COMPAT_2_11 and SPAPR_COMPAT_2_10 -+ * SPAPR_CAP_HTM already enabled in 7.4 -+ * -+ */ -+#define SPAPR_COMPAT_RHEL7_5 \ -+ HW_COMPAT_RHEL7_5 \ -+ -+static void spapr_machine_rhel750_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel760_instance_options(machine); -+} -+ +static void spapr_machine_rhel750_class_options(MachineClass *mc) +{ + spapr_machine_rhel760_class_options(mc); -+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_5); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ +} + +DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); @@ -120,14 +161,10 @@ index 7afd1a1..76a4e83 100644 + * + * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default + */ -+static void spapr_machine_rhel750sxxm_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel750_instance_options(machine); -+} + +static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) +{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_rhel750_class_options(mc); + smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; @@ -139,28 +176,24 @@ index 7afd1a1..76a4e83 100644 + +/* + * pseries-rhel7.4.0 -+ * like SPAPR_COMPAT_2_9 ++ * like spapr_compat_2_9 + */ -+ -+#define SPAPR_COMPAT_RHEL7_4 \ -+ HW_COMPAT_RHEL7_4 \ -+ { \ -+ .driver = TYPE_POWERPC_CPU, \ -+ .property = "pre-2.10-migration", \ -+ .value = "on", \ -+ }, \ -+ -+static void spapr_machine_rhel740_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel750_instance_options(machine); -+} ++GlobalProperty spapr_compat_rhel7_4[] = { ++ { ++ .driver = TYPE_POWERPC_CPU, ++ .property = "pre-2.10-migration", ++ .value = "on", ++ }, ++}; ++const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); + +static void spapr_machine_rhel740_class_options(MachineClass *mc) +{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_rhel750_class_options(mc); -+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_4); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); + mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + smc->has_power9_support = false; + smc->pre_2_10_has_unused_icps = true; @@ -175,14 +208,10 @@ index 7afd1a1..76a4e83 100644 + * + * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default + */ -+static void spapr_machine_rhel740sxxm_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel740_instance_options(machine); -+} + +static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) +{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_rhel740_class_options(mc); + smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; @@ -194,51 +223,47 @@ index 7afd1a1..76a4e83 100644 + +/* + * pseries-rhel7.3.0 -+ * like SPAPR_COMPAT_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 ++ * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 + */ -+#define SPAPR_COMPAT_RHEL7_3 \ -+ HW_COMPAT_RHEL7_3 \ -+ { \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ -+ .property = "mem_win_size", \ -+ .value = stringify(SPAPR_PCI_2_7_MMIO_WIN_SIZE),\ -+ }, \ -+ { \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ -+ .property = "mem64_win_size", \ -+ .value = "0", \ -+ }, \ -+ { \ -+ .driver = TYPE_POWERPC_CPU, \ -+ .property = "pre-2.8-migration", \ -+ .value = "on", \ -+ }, \ -+ { \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ -+ .property = "pre-2.8-migration", \ -+ .value = "on", \ -+ }, \ -+ { \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \ -+ .property = "pcie-extended-configuration-space",\ -+ .value = "off", \ ++GlobalProperty spapr_compat_rhel7_3[] = { ++ { ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, ++ .property = "mem_win_size", ++ .value = "0xf80000000", + }, -+ -+static void spapr_machine_rhel730_instance_options(MachineState *machine) -+{ -+ sPAPRMachineState *spapr = SPAPR_MACHINE(machine); -+ -+ spapr_machine_rhel740_instance_options(machine); -+ spapr->use_hotplug_event_source = false; -+} ++ { ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, ++ .property = "mem64_win_size", ++ .value = "0", ++ }, ++ { ++ .driver = TYPE_POWERPC_CPU, ++ .property = "pre-2.8-migration", ++ .value = "on", ++ }, ++ { ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, ++ .property = "pre-2.8-migration", ++ .value = "on", ++ }, ++ { ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, ++ .property = "pcie-extended-configuration-space", ++ .value = "off", ++ }, ++}; ++const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); + +static void spapr_machine_rhel730_class_options(MachineClass *mc) +{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_rhel740_class_options(mc); + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); -+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_3); ++ mc->default_machine_opts = "modern-hotplug-events=off"; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_3, spapr_compat_rhel7_3_len); ++ + smc->phb_placement = phb_placement_2_7; +} + @@ -249,14 +274,10 @@ index 7afd1a1..76a4e83 100644 + * + * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default + */ -+static void spapr_machine_rhel730sxxm_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel730_instance_options(machine); -+} + +static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) +{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_rhel730_class_options(mc); + smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; @@ -269,36 +290,32 @@ index 7afd1a1..76a4e83 100644 +/* + * pseries-rhel7.2.0 + */ -+/* Should be like SPAPR_COMPAT_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" ++/* Should be like spapr_compat_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" + * has been backported to RHEL7_2 so we don't need it here. + */ + -+#define SPAPR_COMPAT_RHEL7_2 \ -+ HW_COMPAT_RHEL7_2 \ -+ { \ -+ .driver = "spapr-vlan", \ -+ .property = "use-rx-buffer-pools", \ -+ .value = "off", \ -+ },{ \ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\ -+ .property = "ddw",\ -+ .value = stringify(off),\ ++GlobalProperty spapr_compat_rhel7_2[] = { ++ { ++ .driver = "spapr-vlan", ++ .property = "use-rx-buffer-pools", ++ .value = "off", ++ },{ ++ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, ++ .property = "ddw", ++ .value = "off", + }, -+ -+ -+static void spapr_machine_rhel720_instance_options(MachineState *machine) -+{ -+ spapr_machine_rhel730_instance_options(machine); -+} ++}; ++const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); + +static void spapr_machine_rhel720_class_options(MachineClass *mc) +{ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_rhel730_class_options(mc); + smc->use_ohci_by_default = true; + mc->has_hotpluggable_cpus = NULL; -+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_2); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_2, spapr_compat_rhel7_2_len); +} + +DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); @@ -306,10 +323,10 @@ index 7afd1a1..76a4e83 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 63a7bb6..fcf6174 100644 +index c664969b5b..7556debbef 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -22,6 +22,7 @@ +@@ -21,6 +21,7 @@ #include "sysemu/numa.h" #include "sysemu/hw_accel.h" #include "qemu/error-report.h" @@ -317,15 +334,15 @@ index 63a7bb6..fcf6174 100644 static void spapr_cpu_reset(void *opaque) { -@@ -218,6 +219,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, +@@ -222,6 +223,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); Error *local_err = NULL; -+ sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); ++ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { -@@ -230,6 +232,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, sPAPRMachineState *spapr, +@@ -234,6 +236,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -344,19 +361,19 @@ index 63a7bb6..fcf6174 100644 spapr_cpu_reset(cpu); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 6279711..d2370e5 100644 +index 5ea8081041..8737516ada 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -106,6 +106,7 @@ struct sPAPRMachineClass { - bool pre_2_10_has_unused_icps; +@@ -120,6 +120,7 @@ struct SpaprMachineClass { bool legacy_irq_allocation; + bool broken_host_serial_model; /* present real host info to the guest */ + bool has_power9_support; - void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index, + void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7de4bf3..3e2e353 100644 +index 7de4bf3122..3e2e35342d 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -381,10 +398,10 @@ index 7de4bf3..3e2e353 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index ab68abe..c559740 100644 +index 0707177584..60cc41fd53 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1376,6 +1376,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) +@@ -1381,6 +1381,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -393,5 +410,5 @@ index ab68abe..c559740 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -- -1.8.3.1 +2.20.1 diff --git a/0009-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch index 1fef71c..206c29b 100644 --- a/0009-Add-s390x-machine-types.patch +++ b/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 07b2731f23166ca13bace5faacd35ab5129fb878 Mon Sep 17 00:00:00 2001 +From 4f7991558861584776c7c61dd56f339ed453372d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -7,17 +7,25 @@ Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina +Rebase changes (weekly-190111): +- Use upstream compat handling + Merged patches (3.1.0): - 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later + +Merged patches (weekly-190118): +- 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 + +Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 50 +++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 49 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 73 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index a0615a8..04f4c1a 100644 +index d11069b860..8c672dfd02 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -627,7 +627,7 @@ bool css_migration_enabled(void) +@@ -622,7 +622,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -26,41 +34,62 @@ index a0615a8..04f4c1a 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = 1; \ -@@ -657,6 +657,8 @@ bool css_migration_enabled(void) - #define CCW_COMPAT_2_12 \ - HW_COMPAT_2_12 +@@ -646,6 +646,9 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) ++ +#if 0 /* Disabled for Red Hat Enterprise Linux */ + - #define CCW_COMPAT_2_11 \ - HW_COMPAT_2_11 \ - {\ -@@ -894,6 +896,52 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + static void ccw_machine_4_0_instance_options(MachineState *machine) + { + } +@@ -846,6 +849,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#else + +/* -+ * like CCW_COMPAT_2_11, but includes HW_COMPAT_RHEL7_5 (derived from -+ * HW_COMPAT_2_11 and HW_COMPAT_2_10) instead of HW_COMPAT_2_11 ++ * like CCW_COMPAT_2_12 + CCW_COMPAT_3_0 (which are empty), but includes ++ * HW_COMPAT_RHEL7_6 instead of HW_COMPAT_2_11 and HW_COMPAT_3_0 + */ -+#define CCW_COMPAT_RHEL7_5 \ -+ HW_COMPAT_RHEL7_5 \ -+ {\ -+ .driver = TYPE_SCLP_EVENT_FACILITY,\ -+ .property = "allow_all_mask_sizes",\ -+ .value = "off",\ ++ ++GlobalProperty ccw_compat_rhel_7_6[] = ++{ ++}; ++const size_t ccw_compat_rhel_7_6_len = G_N_ELEMENTS(ccw_compat_rhel_7_6); ++ ++GlobalProperty ccw_compat_rhel_7_5[] = { ++ { ++ .driver = TYPE_SCLP_EVENT_FACILITY, ++ .property = "allow_all_mask_sizes", ++ .value = "off", + }, ++}; ++const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); ++ ++static void ccw_machine_rhel800_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel800_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel800, "rhel8.0.0", true); + +static void ccw_machine_rhel760_instance_options(MachineState *machine) +{ ++ ccw_machine_rhel800_instance_options(machine); +} + +static void ccw_machine_rhel760_class_options(MachineClass *mc) +{ ++ ccw_machine_rhel800_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(mc->compat_props, ccw_compat_rhel_7_6, ccw_compat_rhel_7_6_len); +} -+DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", true); ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); + +static void ccw_machine_rhel750_instance_options(MachineState *machine) +{ @@ -79,7 +108,9 @@ index a0615a8..04f4c1a 100644 +static void ccw_machine_rhel750_class_options(MachineClass *mc) +{ + ccw_machine_rhel760_class_options(mc); -+ SET_MACHINE_COMPAT(mc, CCW_COMPAT_RHEL7_5); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); ++ S390_MACHINE_CLASS(mc)->hpage_1m_allowed = false; +} +DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); + @@ -89,5 +120,5 @@ index a0615a8..04f4c1a 100644 { type_register_static(&ccw_machine_info); -- -1.8.3.1 +2.20.1 diff --git a/0010-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch index 53d27bf..92d8384 100644 --- a/0010-Add-x86_64-machine-types.patch +++ b/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From a4f172dc05fc5b2fc28cd1a2121b70a3d4549ab2 Mon Sep 17 00:00:00 2001 +From 3c0a8f41da22fe3b88b6f4811b58b0fec6d09d8e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -7,22 +7,40 @@ Adding changes to add RHEL machine types for x86_64 architecture. Signed-off-by: Miroslav Rezanina +Rebase changes (weekly-190111): +- Use upstream compat handling + Rebase notes (3.1.0): - Removed xsave changes + +Merged patches (weekly-190104): +- f4dc802 pc: 7.5 compat entries +- 456ed3e pc: PC_RHEL7_6_COMPAT +- 04119ee pc: Add compat for pc-i440fx-rhel7.6.0 machine type +- b3b3687 pc: Add pc-q35-8.0.0 machine type +- 8d46fc6 pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT +- 1de7949 kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types + +Merged patches (weekly-190301): +- 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) + +Signed-off-by: Danilo C. L. de Paula --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 7 +- - hw/i386/pc_piix.c | 191 ++++++++++++++++- - hw/i386/pc_q35.c | 93 ++++++++- - include/hw/i386/pc.h | 564 +++++++++++++++++++++++++++++++++++++++++++++++++++ + hw/i386/pc.c | 645 ++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 200 +++++++++++++- + hw/i386/pc_q35.c | 114 +++++++- + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 27 ++ target/i386/cpu.c | 9 +- - 6 files changed, 860 insertions(+), 7 deletions(-) + target/i386/kvm.c | 4 + + 8 files changed, 997 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 236a20e..3360da9 100644 +index 416da318ae..4cad5645b2 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -184,6 +184,9 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) +@@ -190,6 +190,9 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -33,10 +51,654 @@ index 236a20e..3360da9 100644 } assert(obj); diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 567439e..a609332 100644 +index f2c15bf1f2..03497eed9a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -1424,7 +1424,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -350,6 +350,643 @@ GlobalProperty pc_compat_1_4[] = { + }; + const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty pc_rhel_compat[] = { ++ { /* PC_RHEL_COMPAT */ ++ .driver = TYPE_X86_CPU, ++ .property = "host-phys-bits", ++ .value = "on", ++ }, ++ { /* PC_RHEL_COMPAT bz 1508330 */ ++ .driver = "vfio-pci", ++ .property = "x-no-geforce-quirks", ++ .value = "on", ++ }, ++}; ++const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); ++ ++/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: ++ * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ * x-migrate-smi-count comes from PC_COMPAT_2_11 but ++ * is really tied to kernel version so keep it off on 7.x ++ * machine types irrespective of host. ++ */ ++GlobalProperty pc_rhel_7_6_compat[] = { ++ { /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ ++ .driver = TYPE_X86_CPU, ++ .property = "x-hv-synic-kvm-only", ++ .value = "on", ++ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ ++ .driver = "Skylake-Server" "-" TYPE_X86_CPU, ++ .property = "pku", ++ .value = "off", ++ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ ++ .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU, ++ .property = "pku", ++ .value = "off", ++ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_2_11 */ ++ .driver = TYPE_X86_CPU, ++ .property = "x-migrate-smi-count", ++ .value = "off", ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ ++ .driver = "Skylake-Client" "-" TYPE_X86_CPU, ++ .property = "mpx", ++ .value = "on", ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ ++ .driver = "Skylake-Client-IBRS" "-" TYPE_X86_CPU, ++ .property = "mpx", ++ .value = "on", ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ ++ .driver = "Skylake-Server" "-" TYPE_X86_CPU, ++ .property = "mpx", ++ .value = "on", ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ ++ .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU, ++ .property = "mpx", ++ .value = "on", ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ ++ .driver = "Cascadelake-Server" "-" TYPE_X86_CPU, ++ .property = "mpx", ++ .value = "on", ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ ++ .driver = "Icelake-Client" "-" TYPE_X86_CPU, ++ .property = "mpx", ++ .value = "on", ++ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ ++ .driver = "Icelake-Server" "-" TYPE_X86_CPU, ++ .property = "mpx", ++ .value = "on", ++ }, ++}; ++const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); ++ ++/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: ++ * - x-hv-max-vps was backported to 7.5 ++ * - x-pci-hole64-fix was backported to 7.5 ++ */ ++GlobalProperty pc_rhel_7_5_compat[] = { ++ { /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_11 */ ++ .driver = "Skylake-Server" "-" TYPE_X86_CPU, ++ .property = "clflushopt", ++ .value = "off", ++ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ ++ .driver = TYPE_X86_CPU, ++ .property = "legacy-cache", ++ .value = "on", ++ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ ++ .driver = TYPE_X86_CPU, ++ .property = "topoext", ++ .value = "off", ++ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ ++ .driver = "EPYC-" TYPE_X86_CPU, ++ .property = "xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ ++ .driver = "EPYC-IBPB-" TYPE_X86_CPU, ++ .property = "xlevel", ++ .value = stringify(0x8000000a), ++ }, ++}; ++const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); ++ ++ ++GlobalProperty pc_rhel_7_4_compat[] = { ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_9 */ ++ .driver = "mch", ++ .property = "extended-tseg-mbytes", ++ .value = stringify(0), ++ }, ++ { /* PC_RHEL7_4_COMPAT bz 1489800 */ ++ .driver = "ICH9-LPC", ++ .property = "__com.redhat_force-rev1-fadt", ++ .value = "on", ++ }, ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ ++ .driver = "i440FX-pcihost", ++ .property = "x-pci-hole64-fix", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ ++ .driver = "q35-pcihost", ++ .property = "x-pci-hole64-fix", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ ++ .driver = TYPE_X86_CPU, ++ .property = "x-hv-max-vps", ++ .value = "0x40", ++ }, ++}; ++const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); ++ ++GlobalProperty pc_rhel_7_3_compat[] = { ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ ++ .driver = "kvmclock", ++ .property = "x-mach-use-reliable-get-clock", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ ++ .driver = TYPE_X86_CPU, ++ .property = "l3-cache", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ ++ .driver = TYPE_X86_CPU, ++ .property = "full-cpuid-auto-level", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU, ++ .property = "family", ++ .value = "15", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU, ++ .property = "model", ++ .value = "6", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU, ++ .property = "stepping", ++ .value = "1", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ ++ .driver = "isa-pcspk", ++ .property = "migrate", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_6 */ ++ .driver = TYPE_X86_CPU, ++ .property = "cpuid-0xb", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ ++ .driver = "ICH9-LPC", ++ .property = "x-smi-broadcast", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ ++ .driver = TYPE_X86_CPU, ++ .property = "vmware-cpuid-freq", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ ++ .driver = "Haswell-" TYPE_X86_CPU, ++ .property = "stepping", ++ .value = "1", ++ }, ++ {/* PC_RHEL7_3_COMPAT from PC_COMPAT_2_3 added in 2.9 */ ++ .driver = TYPE_X86_CPU, ++ .property = "kvm-no-smi-migration", ++ .value = "on", ++ }, ++}; ++const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); ++ ++GlobalProperty pc_rhel_7_2_compat[] = { ++ { ++ .driver = "phenom" "-" TYPE_X86_CPU, ++ .property = "rdtscp", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "qemu64" "-" TYPE_X86_CPU, ++ .property = "sse4a", ++ .value = "on", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "qemu64" "-" TYPE_X86_CPU, ++ .property = "abm", ++ .value = "on", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "Haswell-" TYPE_X86_CPU, ++ .property = "abm", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU, ++ .property = "abm", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "Haswell-noTSX-" TYPE_X86_CPU, ++ .property = "abm", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ ++ .driver = "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, ++ .property = "abm", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "Broadwell-" TYPE_X86_CPU, ++ .property = "abm", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU, ++ .property = "abm", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "Broadwell-noTSX-" TYPE_X86_CPU, ++ .property = "abm", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ ++ .driver = "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, ++ .property = "abm", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "host" "-" TYPE_X86_CPU, ++ .property = "host-cache-info", ++ .value = "on", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = TYPE_X86_CPU, ++ .property = "check", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "qemu32" "-" TYPE_X86_CPU, ++ .property = "popcnt", ++ .value = "on", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = TYPE_X86_CPU, ++ .property = "arat", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "usb-redir", ++ .property = "streams", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = TYPE_X86_CPU, ++ .property = "fill-mtrr-mask", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_2_COMPAT */ ++ .driver = "apic-common", ++ .property = "legacy-instance-id", ++ .value = "on", ++ }, ++}; ++const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); ++ ++GlobalProperty pc_rhel_7_1_compat[] = { ++ { ++ .driver = "kvm64" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "kvm32" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Conroe" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Penryn" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Nehalem" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ ++ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Westmere" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ ++ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "SandyBridge" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ ++ .driver = "SandyBridge-IBRS" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Haswell" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Broadwell" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Opteron_G1" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Opteron_G2" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Opteron_G4" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Opteron_G5" "-" TYPE_X86_CPU, ++ .property = "vme", ++ .value = "off", ++ }, ++ { ++ .driver = "Haswell" "-" TYPE_X86_CPU, ++ .property = "f16c", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU, ++ .property = "f16c", ++ .value = "off", ++ }, ++ { ++ .driver = "Haswell" "-" TYPE_X86_CPU, ++ .property = "rdrand", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ ++ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU, ++ .property = "rdrand", ++ .value = "off", ++ }, ++ { ++ .driver = "Broadwell" "-" TYPE_X86_CPU, ++ .property = "f16c", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU, ++ .property = "f16c", ++ .value = "off", ++ }, ++ { ++ .driver = "Broadwell" "-" TYPE_X86_CPU, ++ .property = "rdrand", ++ .value = "off", ++ }, ++ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ ++ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU, ++ .property = "rdrand", ++ .value = "off", ++ }, ++ { ++ .driver = "coreduo" "-" TYPE_X86_CPU, ++ .property = "vmx", ++ .value = "on", ++ }, ++ { ++ .driver = "core2duo" "-" TYPE_X86_CPU, ++ .property = "vmx", ++ .value = "on", ++ }, ++ { /* PC_RHEL7_1_COMPAT */ ++ .driver = "qemu64" "-" TYPE_X86_CPU, ++ .property = "min-level", ++ .value = stringify(4), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "kvm64" "-" TYPE_X86_CPU, ++ .property = "min-level", ++ .value = stringify(5), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "pentium3" "-" TYPE_X86_CPU, ++ .property = "min-level", ++ .value = stringify(2), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "n270" "-" TYPE_X86_CPU, ++ .property = "min-level", ++ .value = stringify(5), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Conroe" "-" TYPE_X86_CPU, ++ .property = "min-level", ++ .value = stringify(4), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Penryn" "-" TYPE_X86_CPU, ++ .property = "min-level", ++ .value = stringify(4), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Nehalem" "-" TYPE_X86_CPU, ++ .property = "min-level", ++ .value = stringify(4), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "n270" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Penryn" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Conroe" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Nehalem" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Westmere" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "SandyBridge" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "IvyBridge" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Haswell" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Haswell-noTSX" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Broadwell" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ },{ /* PC_RHEL7_1_COMPAT */ ++ .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU, ++ .property = "min-xlevel", ++ .value = stringify(0x8000000a), ++ }, ++}; ++const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ ++/* ++ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* ++ * between our base and 1.5, less stuff backported to RHEL-7.0 ++ * (usb-device.msos-desc), less stuff for devices we changed ++ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, ++ * pci-serial-4x) in 7.0. ++ */ ++GlobalProperty pc_rhel_7_0_compat[] = { ++ { ++ .driver = "virtio-scsi-pci", ++ .property = "any_layout", ++ .value = "off", ++ },{ ++ .driver = "PIIX4_PM", ++ .property = "memory-hotplug-support", ++ .value = "off", ++ },{ ++ .driver = "apic", ++ .property = "version", ++ .value = stringify(0x11), ++ },{ ++ .driver = "nec-usb-xhci", ++ .property = "superspeed-ports-first", ++ .value = "off", ++ },{ ++ .driver = "nec-usb-xhci", ++ .property = "force-pcie-endcap", ++ .value = "on", ++ },{ ++ .driver = "pci-serial", ++ .property = "prog_if", ++ .value = stringify(0), ++ },{ ++ .driver = "virtio-net-pci", ++ .property = "guest_announce", ++ .value = "off", ++ },{ ++ .driver = "ICH9-LPC", ++ .property = "memory-hotplug-support", ++ .value = "off", ++ },{ ++ .driver = "xio3130-downstream", ++ .property = COMPAT_PROP_PCP, ++ .value = "off", ++ },{ ++ .driver = "ioh3420", ++ .property = COMPAT_PROP_PCP, ++ .value = "off", ++ },{ ++ .driver = "PIIX4_PM", ++ .property = "acpi-pci-hotplug-with-bridge-support", ++ .value = "off", ++ },{ ++ .driver = "e1000", ++ .property = "mitigation", ++ .value = "off", ++ },{ ++ .driver = "virtio-net-pci", ++ .property = "ctrl_guest_offloads", ++ .value = "off", ++ }, ++ { ++ .driver = "Conroe" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { ++ .driver = "Penryn" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { ++ .driver = "Nehalem" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ ++ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { ++ .driver = "Westmere" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ ++ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { ++ .driver = "Opteron_G1" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { ++ .driver = "Opteron_G2" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { ++ .driver = "Opteron_G3" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { ++ .driver = "Opteron_G4" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ }, ++ { ++ .driver = "Opteron_G5" "-" TYPE_X86_CPU, ++ .property = "x2apic", ++ .value = "on", ++ } ++}; ++const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); ++ + void gsi_handler(void *opaque, int n, int level) + { + GSIState *s = opaque; +@@ -1814,7 +2451,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -46,15 +708,16 @@ index 567439e..a609332 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -2389,6 +2390,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->save_tsc_khz = true; +@@ -2731,6 +3369,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; + pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); + pcmc->pc_rom_ro = true; - mc->get_hotplug_handler = pc_get_hotpug_handler; ++ mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; mc->cpu_index_to_instance_props = pc_cpu_index_to_props; mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; -@@ -2398,7 +2400,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -2740,7 +3380,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->default_boot_order = "cad"; mc->hot_add_cpu = pc_hot_add_cpu; mc->block_default_type = IF_IDE; @@ -65,7 +728,7 @@ index 567439e..a609332 100644 hc->pre_plug = pc_machine_device_pre_plug_cb; hc->plug = pc_machine_device_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 7092d6d..af9eb8c 100644 +index 8ad8e885c6..37907fe76a 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -50,6 +50,7 @@ @@ -87,15 +750,15 @@ index 7092d6d..af9eb8c 100644 pcmc->smbios_uuid_encoded, SMBIOS_ENTRY_POINT_21); } -@@ -309,6 +310,7 @@ static void pc_init1(MachineState *machine, +@@ -310,6 +311,7 @@ static void pc_init1(MachineState *machine, * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). */ +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void pc_compat_2_3(MachineState *machine) + static void pc_compat_2_3_fn(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); -@@ -1157,3 +1159,188 @@ static void xenfv_machine_options(MachineClass *m) +@@ -998,3 +1000,197 @@ static void xenfv_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, xenfv_machine_options); #endif @@ -111,7 +774,8 @@ index 7092d6d..af9eb8c 100644 + m->default_machine_opts = "firmware=bios-256k.bin"; + pcmc->default_nic_model = "e1000"; + m->default_display = "std"; -+ SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); ++ m->no_parallel = 1; ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; + m->is_default = 1; +} @@ -126,6 +790,9 @@ index 7092d6d..af9eb8c 100644 +{ + pc_machine_rhel7_options(m); + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ m->async_pf_vmexit_disable = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); +} + +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, @@ -144,7 +811,8 @@ index 7092d6d..af9eb8c 100644 + m->is_default = 0; + m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; + m->auto_enable_numa_with_memhp = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_5_COMPAT); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); +} + +DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, @@ -163,7 +831,8 @@ index 7092d6d..af9eb8c 100644 + m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; + m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + pcmc->pc_rom_ro = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_4_COMPAT); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); +} + +DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, @@ -181,7 +850,8 @@ index 7092d6d..af9eb8c 100644 + pc_machine_rhel740_options(m); + m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; + pcmc->linuxboot_dma_enabled = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_3_COMPAT); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); +} + +DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, @@ -205,7 +875,8 @@ index 7092d6d..af9eb8c 100644 + /* Note: broken_reserved_end was already in 7.2 */ + /* From pc_i440fx_2_6_machine_options */ + pcmc->legacy_cpu_hotplug = true; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_2_COMPAT); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(m->compat_props, pc_rhel_7_2_compat, pc_rhel_7_2_compat_len); +} + +DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, @@ -245,7 +916,8 @@ index 7092d6d..af9eb8c 100644 + m->family = "pc_piix_Y"; + m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; + m->default_display = "cirrus"; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_1_COMPAT); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_1, hw_compat_rhel_7_1_len); ++ compat_props_add(m->compat_props, pc_rhel_7_1_compat, pc_rhel_7_1_compat_len); +} + +DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, @@ -279,16 +951,16 @@ index 7092d6d..af9eb8c 100644 + pc_machine_rhel710_options(m); + m->family = "pc_piix_Y"; + m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_0_COMPAT); ++ compat_props_add(m->compat_props, pc_rhel_7_0_compat, pc_rhel_7_0_compat_len); +} + +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 4702bb1..163546e 100644 +index 372c6b73be..4b7df48919 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -145,8 +145,8 @@ static void pc_q35_init(MachineState *machine) +@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -299,7 +971,7 @@ index 4702bb1..163546e 100644 pcmc->smbios_uuid_encoded, SMBIOS_ENTRY_POINT_21); } -@@ -294,6 +294,7 @@ static void pc_q35_init(MachineState *machine) +@@ -347,6 +347,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -307,15 +979,7 @@ index 4702bb1..163546e 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -315,6 +316,7 @@ static void pc_q35_3_1_machine_options(MachineClass *m) - { - pc_q35_machine_options(m); - m->alias = "q35"; -+ SET_MACHINE_COMPAT(m, PC_COMPAT_2_12); - } - - DEFINE_Q35_MACHINE(v3_1, "pc-q35-3.1", NULL, -@@ -425,3 +427,90 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -507,3 +508,112 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -323,8 +987,8 @@ index 4702bb1..163546e 100644 + +/* Red Hat Enterprise Linux machine types */ + -+/* Options for the latest rhel7 q35 machine type */ -+static void pc_q35_machine_rhel7_options(MachineClass *m) ++/* Options for the latest rhel q35 machine type */ ++static void pc_q35_machine_rhel_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pcmc->default_nic_model = "e1000e"; @@ -332,12 +996,27 @@ index 4702bb1..163546e 100644 + m->default_machine_opts = "firmware=bios-256k.bin"; + m->default_display = "std"; + m->no_floppy = 1; ++ m->no_parallel = 1; + machine_class_allow_dynamic_sysbus_dev(m, TYPE_SYS_BUS_DEVICE); + m->alias = "q35"; + m->max_cpus = 384; -+ SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel800(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel800_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, ++ pc_q35_machine_rhel800_options); ++ +static void pc_q35_init_rhel760(MachineState *machine) +{ + pc_q35_init(machine); @@ -345,8 +1024,12 @@ index 4702bb1..163546e 100644 + +static void pc_q35_machine_rhel760_options(MachineClass *m) +{ -+ pc_q35_machine_rhel7_options(m); ++ pc_q35_machine_rhel800_options(m); ++ m->alias = NULL; + m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ m->async_pf_vmexit_disable = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, @@ -365,7 +1048,8 @@ index 4702bb1..163546e 100644 + m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; + m->auto_enable_numa_with_memhp = false; + pcmc->default_nic_model = "e1000"; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_5_COMPAT); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, @@ -383,7 +1067,8 @@ index 4702bb1..163546e 100644 + m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; + m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + pcmc->pc_rom_ro = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_4_COMPAT); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, @@ -401,596 +1086,75 @@ index 4702bb1..163546e 100644 + m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; + m->max_cpus = 255; + pcmc->linuxboot_dma_enabled = false; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_3_COMPAT); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 456e4a944c..b292365bfa 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -206,6 +206,8 @@ struct MachineClass { + const char **valid_cpu_types; + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; ++ /* RHEL only */ ++ bool async_pf_vmexit_disable; + void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size); + bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 136fe49..f8f35af 100644 +index ca65ef18af..b066f0a539 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -134,6 +134,9 @@ struct PCMachineClass { +@@ -134,6 +134,9 @@ typedef struct PCMachineClass { - /* use DMA capable linuxboot option rom */ - bool linuxboot_dma_enabled; + /* use PVH to load kernels that support this feature */ + bool pvh_enabled; + + /* RH only, see bz 1489800 */ + bool pc_rom_ro; - }; + } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -976,4 +979,565 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); - type_init(pc_machine_init_##suffix) +@@ -350,6 +353,30 @@ extern const size_t pc_compat_1_5_len; + extern GlobalProperty pc_compat_1_4[]; + extern const size_t pc_compat_1_4_len; - extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); ++extern GlobalProperty pc_rhel_compat[]; ++extern const size_t pc_rhel_compat_len; + -+/* See include/hw/compat.h for shared compatibility lists */ ++extern GlobalProperty pc_rhel_7_6_compat[]; ++extern const size_t pc_rhel_7_6_compat_len; + -+/* This macro is for changes to properties that are RHEL specific, -+ * different to the current upstream and to be applied to the latest -+ * machine type. -+ */ -+#define PC_RHEL_COMPAT \ -+ { /* PC_RHEL_COMPAT */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "host-phys-bits",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL_COMPAT bz 1508330 */ \ -+ .driver = "vfio-pci",\ -+ .property = "x-no-geforce-quirks",\ -+ .value = "on",\ -+ }, ++extern GlobalProperty pc_rhel_7_5_compat[]; ++extern const size_t pc_rhel_7_5_compat_len; + -+/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: -+ * - x-hv-max-vps was backported to 7.5 -+ * - x-pci-hole64-fix was backported to 7.5 -+ */ -+#define PC_RHEL7_5_COMPAT \ -+ HW_COMPAT_RHEL7_5 \ -+ { /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_11 */ \ -+ .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ -+ .property = "clflushopt",\ -+ .value = "off",\ -+ }, ++extern GlobalProperty pc_rhel_7_4_compat[]; ++extern const size_t pc_rhel_7_4_compat_len; + ++extern GlobalProperty pc_rhel_7_3_compat[]; ++extern const size_t pc_rhel_7_3_compat_len; + -+#define PC_RHEL7_4_COMPAT \ -+ HW_COMPAT_RHEL7_4 \ -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_9 */ \ -+ .driver = "mch",\ -+ .property = "extended-tseg-mbytes",\ -+ .value = stringify(0),\ -+ },\ -+ { /* PC_RHEL7_4_COMPAT bz 1489800 */ \ -+ .driver = "ICH9-LPC",\ -+ .property = "__com.redhat_force-rev1-fadt",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ -+ .driver = "i440FX-pcihost",\ -+ .property = "x-pci-hole64-fix",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ -+ .driver = "q35-pcihost",\ -+ .property = "x-pci-hole64-fix",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "x-hv-max-vps",\ -+ .value = "0x40",\ -+ }, ++extern GlobalProperty pc_rhel_7_2_compat[]; ++extern const size_t pc_rhel_7_2_compat_len; + -+#define PC_RHEL7_3_COMPAT \ -+ HW_COMPAT_RHEL7_3 \ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ -+ .driver = "kvmclock",\ -+ .property = "x-mach-use-reliable-get-clock",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "l3-cache",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "full-cpuid-auto-level",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "family",\ -+ .value = "15",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "model",\ -+ .value = "6",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "stepping",\ -+ .value = "1",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ \ -+ .driver = "isa-pcspk",\ -+ .property = "migrate",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_6 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "cpuid-0xb",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ -+ .driver = "ICH9-LPC",\ -+ .property = "x-smi-broadcast",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "vmware-cpuid-freq",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ \ -+ .driver = "Haswell-" TYPE_X86_CPU,\ -+ .property = "stepping",\ -+ .value = "1",\ -+ },\ -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_3 added in 2.9 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "kvm-no-smi-migration",\ -+ .value = "on",\ -+ }, ++extern GlobalProperty pc_rhel_7_1_compat[]; ++extern const size_t pc_rhel_7_1_compat_len; + -+#define PC_RHEL7_2_COMPAT \ -+ HW_COMPAT_RHEL7_2 \ -+ {\ -+ .driver = "phenom" "-" TYPE_X86_CPU,\ -+ .property = "rdtscp",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "qemu64" "-" TYPE_X86_CPU,\ -+ .property = "sse4a",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "qemu64" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "Haswell-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "Haswell-noTSX-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "Broadwell-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "Broadwell-noTSX-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "abm",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "host" "-" TYPE_X86_CPU,\ -+ .property = "host-cache-info",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "check",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "qemu32" "-" TYPE_X86_CPU,\ -+ .property = "popcnt",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "arat",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "usb-redir",\ -+ .property = "streams",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "fill-mtrr-mask",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_2_COMPAT */ \ -+ .driver = "apic-common",\ -+ .property = "legacy-instance-id",\ -+ .value = "on",\ -+ }, ++extern GlobalProperty pc_rhel_7_0_compat[]; ++extern const size_t pc_rhel_7_0_compat_len; + -+ -+ -+#define PC_RHEL7_1_COMPAT \ -+ HW_COMPAT_RHEL7_1 \ -+ {\ -+ .driver = "kvm64" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "kvm32" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Conroe" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Penryn" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Nehalem" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Westmere" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "SandyBridge" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "SandyBridge-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Haswell" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Broadwell" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G1" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ -+ .property = "vme",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Haswell" "-" TYPE_X86_CPU,\ -+ .property = "f16c",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "f16c",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Haswell" "-" TYPE_X86_CPU,\ -+ .property = "rdrand",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "rdrand",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Broadwell" "-" TYPE_X86_CPU,\ -+ .property = "f16c",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "f16c",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "Broadwell" "-" TYPE_X86_CPU,\ -+ .property = "rdrand",\ -+ .value = "off",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ \ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "rdrand",\ -+ .value = "off",\ -+ },\ -+ {\ -+ .driver = "coreduo" "-" TYPE_X86_CPU,\ -+ .property = "vmx",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "core2duo" "-" TYPE_X86_CPU,\ -+ .property = "vmx",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "qemu64" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(4),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "kvm64" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(5),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "pentium3" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(2),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "n270" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(5),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Conroe" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(4),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Penryn" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(4),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Nehalem" "-" TYPE_X86_CPU,\ -+ .property = "min-level",\ -+ .value = stringify(4),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "n270" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Penryn" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Conroe" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Nehalem" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Westmere" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "SandyBridge" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "IvyBridge" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Haswell" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Haswell-noTSX" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Broadwell" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_1_COMPAT */ \ -+ .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU,\ -+ .property = "min-xlevel",\ -+ .value = stringify(0x8000000a),\ -+ }, -+ -+/* -+ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine -+ * types as the PC_COMPAT_* do for upstream types. -+ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. -+ */ -+ -+/* -+ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* -+ * between our base and 1.5, less stuff backported to RHEL-7.0 -+ * (usb-device.msos-desc), less stuff for devices we changed -+ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, -+ * pci-serial-4x) in 7.0. -+ */ -+#define PC_RHEL7_0_COMPAT \ -+ {\ -+ .driver = "virtio-scsi-pci",\ -+ .property = "any_layout",\ -+ .value = "off",\ -+ },{\ -+ .driver = "PIIX4_PM",\ -+ .property = "memory-hotplug-support",\ -+ .value = "off",\ -+ },{\ -+ .driver = "apic",\ -+ .property = "version",\ -+ .value = stringify(0x11),\ -+ },{\ -+ .driver = "nec-usb-xhci",\ -+ .property = "superspeed-ports-first",\ -+ .value = "off",\ -+ },{\ -+ .driver = "nec-usb-xhci",\ -+ .property = "force-pcie-endcap",\ -+ .value = "on",\ -+ },{\ -+ .driver = "pci-serial",\ -+ .property = "prog_if",\ -+ .value = stringify(0),\ -+ },{\ -+ .driver = "virtio-net-pci",\ -+ .property = "guest_announce",\ -+ .value = "off",\ -+ },{\ -+ .driver = "ICH9-LPC",\ -+ .property = "memory-hotplug-support",\ -+ .value = "off",\ -+ },{\ -+ .driver = "xio3130-downstream",\ -+ .property = COMPAT_PROP_PCP,\ -+ .value = "off",\ -+ },{\ -+ .driver = "ioh3420",\ -+ .property = COMPAT_PROP_PCP,\ -+ .value = "off",\ -+ },{\ -+ .driver = "PIIX4_PM",\ -+ .property = "acpi-pci-hotplug-with-bridge-support",\ -+ .value = "off",\ -+ },{\ -+ .driver = "e1000",\ -+ .property = "mitigation",\ -+ .value = "off",\ -+ },{ \ -+ .driver = "virtio-net-pci", \ -+ .property = "ctrl_guest_offloads", \ -+ .value = "off", \ -+ },\ -+ {\ -+ .driver = "Conroe" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Penryn" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Nehalem" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ \ -+ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Westmere" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ \ -+ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G1" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G2" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G4" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ },\ -+ {\ -+ .driver = "Opteron_G5" "-" TYPE_X86_CPU,\ -+ .property = "x2apic",\ -+ .value = "on",\ -+ }, - #endif + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index e9b9183..573de14 100644 +index 6616303782..6472cd21f8 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1437,11 +1437,17 @@ static CPUCaches epyc_cache_info = { +@@ -1474,11 +1474,17 @@ static CPUCaches epyc_cache_info = { static X86CPUDefinition builtin_x86_defs[] = { { @@ -1009,7 +1173,7 @@ index e9b9183..573de14 100644 .stepping = 3, .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -2934,6 +2940,7 @@ static PropValue kvm_default_props[] = { +@@ -2973,6 +2979,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -1017,6 +1181,28 @@ index e9b9183..573de14 100644 { NULL, NULL }, }; +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 3b29ce5c0d..478942187a 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -2373,6 +2373,7 @@ static int kvm_get_msrs(X86CPU *cpu) + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + kvm_msr_buf_reset(cpu); + +@@ -2670,6 +2671,9 @@ static int kvm_get_msrs(X86CPU *cpu) + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; ++ if (mc->async_pf_vmexit_disable) { ++ env->async_pf_en_msr &= ~(1ULL << 2); ++ } + break; + case MSR_KVM_PV_EOI_EN: + env->pv_eoi_en_msr = msrs[i].data; -- -1.8.3.1 +2.20.1 diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch index 79897f5..74c8add 100644 --- a/0011-Enable-make-check.patch +++ b/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 5a614332800ab5367d0be248e665250fe92964ba Mon Sep 17 00:00:00 2001 +From 0442a72cf9da8f79fe26b08999f2dca900af6ad0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -7,34 +7,35 @@ Fixing tests after device disabling and machine types changes and enabling make check run during build. Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove testing for pseries-2.7 in endianess test +- Disable device-plug-test on s390x as it use disabled device +- Do not run cpu-plug-tests on 7.3 and older machine types + +Merged patches (4.0.0: +- f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce + +Signed-off-by: Danilo C. L. de Paula --- - redhat/qemu-kvm.spec.template | 2 +- - tests/Makefile.include | 20 ++++++++++---------- + redhat/qemu-kvm.spec.template | 8 +++++++- + tests/Makefile.include | 10 +++++----- tests/boot-serial-test.c | 6 +++++- - tests/cpu-plug-test.c | 3 ++- + tests/cpu-plug-test.c | 4 ++-- tests/e1000-test.c | 2 ++ + tests/endianness-test.c | 2 ++ tests/prom-env-test.c | 2 ++ tests/qemu-iotests/051 | 12 ++++++------ tests/qemu-iotests/group | 4 ++-- tests/test-x86-cpuid-compat.c | 2 ++ tests/usb-hcd-xhci-test.c | 4 ++++ - 10 files changed, 36 insertions(+), 21 deletions(-) + 11 files changed, 39 insertions(+), 17 deletions(-) diff --git a/tests/Makefile.include b/tests/Makefile.include -index fb0b449..f2305a5 100644 +index 36fc73fef5..e8f5fbc2c6 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include -@@ -171,20 +171,20 @@ check-qtest-pci-$(CONFIG_IPACK) += $(check-qtest-ipack-y) - check-qtest-pci-y += tests/display-vga-test$(EXESUF) - check-qtest-pci-$(CONFIG_HDA) += tests/intel-hda-test$(EXESUF) - check-qtest-pci-$(CONFIG_IVSHMEM_DEVICE) += tests/ivshmem-test$(EXESUF) --check-qtest-pci-y += tests/megasas-test$(EXESUF) -+#check-qtest-pci-y += tests/megasas-test$(EXESUF) - - check-qtest-i386-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) --check-qtest-i386-y += tests/fdc-test$(EXESUF) -+#check-qtest-i386-y += tests/fdc-test$(EXESUF) - check-qtest-i386-y += tests/ide-test$(EXESUF) +@@ -162,7 +162,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) check-qtest-i386-y += tests/ahci-test$(EXESUF) check-qtest-i386-y += tests/hd-geo-test$(EXESUF) check-qtest-i386-y += tests/boot-order-test$(EXESUF) @@ -43,14 +44,7 @@ index fb0b449..f2305a5 100644 check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) check-qtest-i386-y += tests/rtc-test$(EXESUF) --check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) --check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) -+#check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) -+#check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) - check-qtest-i386-y += tests/i440fx-test$(EXESUF) - check-qtest-i386-y += tests/fw_cfg-test$(EXESUF) - check-qtest-i386-y += tests/drive_del-test$(EXESUF) -@@ -238,15 +238,15 @@ check-qtest-mips64el-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) +@@ -221,7 +221,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) check-qtest-moxie-y += tests/boot-serial-test$(EXESUF) check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) @@ -59,32 +53,31 @@ index fb0b449..f2305a5 100644 check-qtest-ppc-y += tests/prom-env-test$(EXESUF) check-qtest-ppc-y += tests/drive_del-test$(EXESUF) check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) --check-qtest-ppc-y += tests/m48t59-test$(EXESUF) -+#check-qtest-ppc-y += tests/m48t59-test$(EXESUF) - - check-qtest-ppc64-y += $(check-qtest-ppc-y) - check-qtest-ppc64-y += tests/spapr-phb-test$(EXESUF) --check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) -+#check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF) - check-qtest-ppc64-y += tests/migration-test$(EXESUF) - check-qtest-ppc64-y += tests/rtas-test$(EXESUF) +@@ -235,8 +235,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) -@@ -254,8 +254,8 @@ check-qtest-ppc64-$(CONFIG_USB_OHCI) += tests/usb-hcd-ohci-test$(EXESUF) check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF) check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF) - check-qtest-ppc64-y += $(check-qtest-virtio-y) -check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) -check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) +#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) +#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) - check-qtest-ppc64-y += tests/display-vga-test$(EXESUF) + check-qtest-ppc64-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) check-qtest-ppc64-y += tests/numa-test$(EXESUF) +@@ -278,7 +278,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) + check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) + check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) + check-qtest-s390x-y += tests/drive_del-test$(EXESUF) +-check-qtest-s390x-y += tests/device-plug-test$(EXESUF) ++#check-qtest-s390x-y += tests/device-plug-test$(EXESUF) + check-qtest-s390x-y += tests/virtio-ccw-test$(EXESUF) + check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) + check-qtest-s390x-y += tests/migration-test$(EXESUF) diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index 8ec6aed..6a533b9 100644 +index c591748aaf..c5873f8b24 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c -@@ -97,16 +97,20 @@ static testdef_t tests[] = { +@@ -97,18 +97,22 @@ static testdef_t tests[] = { { "ppc", "g3beige", "", "PowerPC,750" }, { "ppc", "mac99", "", "PowerPC,G4" }, { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, @@ -93,7 +86,9 @@ index 8ec6aed..6a533b9 100644 { "ppc64", "40p", "-m 192", "Memory: 192M" }, { "ppc64", "mac99", "", "PowerPC,970FX" }, +#endif - { "ppc64", "pseries", "", "Open Firmware" }, + { "ppc64", "pseries", + "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken", + "Open Firmware" }, +#if 0 /* Disabled for Red Hat Enterprise Linux */ { "ppc64", "powernv", "-cpu POWER8", "OPAL" }, { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, @@ -107,39 +102,56 @@ index 8ec6aed..6a533b9 100644 { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index f4a677d..70a3d1d 100644 +index 668f00144e..453ca8e583 100644 --- a/tests/cpu-plug-test.c +++ b/tests/cpu-plug-test.c -@@ -193,7 +193,8 @@ static void add_pseries_test_case(const char *mname) +@@ -190,8 +190,8 @@ static void add_pseries_test_case(const char *mname) + char *path; PlugTestData *data; - if (!g_str_has_prefix(mname, "pseries-") || +- if (!g_str_has_prefix(mname, "pseries-") || - (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { -+ (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7) || -+ strcmp(mname,"pseries-rhel7.2.0") == 0) { ++ if (!g_str_has_prefix(mname, "pseries-rhel") || ++ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { return; } data = g_new(PlugTestData, 1); diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index 0c5fcdc..b830432 100644 +index 9e67916169..adbd07f997 100644 --- a/tests/e1000-test.c +++ b/tests/e1000-test.c -@@ -29,8 +29,10 @@ static void test_device(gconstpointer data) +@@ -21,9 +21,11 @@ struct QE1000 { + static const char *models[] = { "e1000", - "e1000-82540em", +#if 0 /* Disabled for Red Hat Enterprise Linux */ + "e1000-82540em", "e1000-82544gc", "e1000-82545em", +#endif }; - int main(int argc, char **argv) + static void *e1000_get_driver(void *obj, const char *interface) +diff --git a/tests/endianness-test.c b/tests/endianness-test.c +index 58527952a5..1d729ec7f2 100644 +--- a/tests/endianness-test.c ++++ b/tests/endianness-test.c +@@ -39,8 +39,10 @@ static const TestCase test_cases[] = { + { "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" }, + { "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" }, + { "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" }, ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + { "ppc64", "pseries-2.7", 0x10080000000ULL, + .bswap = true, .superio = "i82378" }, ++#endif + { "sh4", "r2d", 0xfe240000, .superio = "i82378" }, + { "sh4eb", "r2d", 0xfe240000, .bswap = true, .superio = "i82378" }, + { "sparc64", "sun4u", 0x1fe02000000LL, .bswap = true }, diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 198d007..4bea07f 100644 +index 61bc1d1e7b..aeb76b0bc3 100644 --- a/tests/prom-env-test.c +++ b/tests/prom-env-test.c -@@ -82,7 +82,9 @@ int main(int argc, char *argv[]) +@@ -88,7 +88,9 @@ int main(int argc, char *argv[]) if (!strcmp(arch, "ppc")) { add_tests(ppc_machines); } else if (!strcmp(arch, "ppc64")) { @@ -150,7 +162,7 @@ index 198d007..4bea07f 100644 qtest_add_data_func("prom-env/pseries", "pseries", test_machine); } diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 32741d7..85ef52e 100755 +index 02ac960da4..29f13440c0 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -182,11 +182,11 @@ run_qemu -drive if=virtio @@ -184,7 +196,7 @@ index 32741d7..85ef52e 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 61a6d98..05996ae 100644 +index bae7718380..0db5e68af1 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -77,7 +77,7 @@ @@ -206,7 +218,7 @@ index 61a6d98..05996ae 100644 101 rw auto quick 102 rw auto quick diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index e75b959..6b46b73 100644 +index e75b959950..6b46b73dd0 100644 --- a/tests/test-x86-cpuid-compat.c +++ b/tests/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -226,7 +238,7 @@ index e75b959..6b46b73 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 9eb24b0..465ed26 100644 +index 9eb24b00e4..465ed26dfc 100644 --- a/tests/usb-hcd-xhci-test.c +++ b/tests/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) @@ -256,5 +268,5 @@ index 9eb24b0..465ed26 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -1.8.3.1 +2.20.1 diff --git a/0012-Use-kvm-by-default.patch b/0012-Use-kvm-by-default.patch deleted file mode 100644 index b0bc077..0000000 --- a/0012-Use-kvm-by-default.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 445df6a548c20d21c3275d91bcd96c6b0fde9c97 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 18 Dec 2014 06:27:49 +0100 -Subject: Use kvm by default - -Bugzilla: 906185 - -RHEL uses kvm accelerator by default, if available. - -Signed-off-by: Miroslav Rezanina - -Rebase notes (2.10.0) -- variable rename (upstream) - -Rebase notes (2.2.0): -- Move code from vl.c to accel.c - -(cherry picked from commit abcd662eb8e516ebe4a6b401e83a62f749491a15) -(cherry picked from commit eca6d5766d956c37e3f7f28d70903d357308c846) ---- - accel/accel.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/accel/accel.c b/accel/accel.c -index 966b2d8..e8ca7bb 100644 ---- a/accel/accel.c -+++ b/accel/accel.c -@@ -79,8 +79,8 @@ void configure_accelerator(MachineState *ms) - - accel = qemu_opt_get(qemu_get_machine_opts(), "accel"); - if (accel == NULL) { -- /* Use the default "accelerator", tcg */ -- accel = "tcg"; -+ /* RHEL uses kvm as the default accelerator, fallback to tcg */ -+ accel = "kvm:tcg"; - } - - accel_list = g_strsplit(accel, ":", 0); --- -1.8.3.1 - diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 91% rename from 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index fbd961f..7a3fc9d 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 0b8b44ec1fc3a0d17f13c07ac4c7351769a63300 Mon Sep 17 00:00:00 2001 +From b0626d1bb4f6ec6fba1973ebb26a297e79e95437 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,12 +32,13 @@ Merged patches (2.9.0): (cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) (cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) +Signed-off-by: Danilo C. L. de Paula --- hw/vfio/pci.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6e0000c..d242c62 100644 +index 13badcd6ed..7c998afc0f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -36,6 +36,7 @@ @@ -48,7 +49,7 @@ index 6e0000c..d242c62 100644 #define TYPE_VFIO_PCI "vfio-pci" #define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI) -@@ -2811,9 +2812,21 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2806,9 +2807,21 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -72,5 +73,5 @@ index 6e0000c..d242c62 100644 if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -- -1.8.3.1 +2.20.1 diff --git a/0014-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch similarity index 86% rename from 0014-Add-support-statement-to-help-output.patch rename to 0013-Add-support-statement-to-help-output.patch index 1d2f7f3..d77418a 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From f06750384e9e241c7cc6f14b6fdedea3c4237790 Mon Sep 17 00:00:00 2001 +From 6ae79fe200eab13f75050a10b48cea3b5bf21aab Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -18,15 +18,16 @@ as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost (cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) (cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) +Signed-off-by: Danilo C. L. de Paula --- vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vl.c b/vl.c -index 22bd99c..74b32c4 100644 +index 3ada215270..32886ebf7f 100644 --- a/vl.c +++ b/vl.c -@@ -1904,9 +1904,17 @@ static void version(void) +@@ -1982,9 +1982,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -44,7 +45,7 @@ index 22bd99c..74b32c4 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1923,6 +1931,7 @@ static void help(int exitcode) +@@ -2001,6 +2009,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -53,5 +54,5 @@ index 22bd99c..74b32c4 100644 } -- -1.8.3.1 +2.20.1 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0014-globally-limit-the-maximum-number-of-CPUs.patch similarity index 91% rename from 0015-globally-limit-the-maximum-number-of-CPUs.patch rename to 0014-globally-limit-the-maximum-number-of-CPUs.patch index 2c77287..f7e0b07 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0014-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From b64599f8f72d89ac75557608f1890abb8effb88b Mon Sep 17 00:00:00 2001 +From 4d15d2010db402f5910a9b7e4c147a9a3e2f2604 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -77,16 +77,18 @@ This commit matches the limit to current KVM_CAP_NR_VCPUS value. Conflicts: vl.c + +Signed-off-by: Danilo C. L. de Paula --- accel/kvm/kvm-all.c | 12 ++++++++++++ vl.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 4880a05..a8f5d47 100644 +index 241db496c3..003c0abee2 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -1627,6 +1627,18 @@ static int kvm_init(MachineState *ms) +@@ -1630,6 +1630,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -106,10 +108,10 @@ index 4880a05..a8f5d47 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/vl.c b/vl.c -index 74b32c4..0a9bdc2 100644 +index 32886ebf7f..2b95925f9b 100644 --- a/vl.c +++ b/vl.c -@@ -134,6 +134,8 @@ int main(int argc, char **argv) +@@ -131,6 +131,8 @@ int main(int argc, char **argv) #define MAX_VIRTIO_CONSOLES 1 @@ -118,7 +120,7 @@ index 74b32c4..0a9bdc2 100644 static const char *data_dir[16]; static int data_dir_idx; const char *bios_name = NULL; -@@ -1463,6 +1465,20 @@ MachineClass *find_default_machine(void) +@@ -1508,6 +1510,20 @@ MachineClass *find_default_machine(void) return mc; } @@ -139,7 +141,7 @@ index 74b32c4..0a9bdc2 100644 MachineInfoList *qmp_query_machines(Error **errp) { GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); -@@ -4021,6 +4037,8 @@ int main(int argc, char **argv, char **envp) +@@ -3994,6 +4010,8 @@ int main(int argc, char **argv, char **envp) "mutually exclusive"); exit(EXIT_FAILURE); } @@ -149,5 +151,5 @@ index 74b32c4..0a9bdc2 100644 configure_rtc(qemu_find_opts_singleton("rtc")); -- -1.8.3.1 +2.20.1 diff --git a/0016-Add-support-for-simpletrace.patch b/0015-Add-support-for-simpletrace.patch similarity index 91% rename from 0016-Add-support-for-simpletrace.patch rename to 0015-Add-support-for-simpletrace.patch index 5db59b4..3a51b93 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0015-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 490f864563ddcb367e6cf58d1be4a9cbed3a5e2b Mon Sep 17 00:00:00 2001 +From 75da6e38041efca51934a46a4d481eaac57d4b1e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -23,23 +23,24 @@ Merged patches (2.3.0): - 85c4c8f trace: add systemtap-initscript README file to RPM (cherry picked from commit bfc1d7f3628f2ffbabbae71d57a506cea6663ddf) +Signed-off-by: Danilo C. L. de Paula --- .gitignore | 2 ++ Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 29 ++++++++++++++++++++-- + README.systemtap | 43 +++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 26 ++++++++++++++- scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ scripts/systemtap/script.d/qemu_kvm.stp | 1 + - 6 files changed, 81 insertions(+), 2 deletions(-) + 6 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 README.systemtap create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 152821a..8710720 100644 +index 05f62eab3c..41a77a90af 100644 --- a/Makefile +++ b/Makefile -@@ -892,6 +892,10 @@ endif +@@ -832,6 +832,10 @@ endif $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ done $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" @@ -52,7 +53,7 @@ index 152821a..8710720 100644 done diff --git a/README.systemtap b/README.systemtap new file mode 100644 -index 0000000..ad913fc +index 0000000000..ad913fc990 --- /dev/null +++ b/README.systemtap @@ -0,0 +1,43 @@ @@ -101,7 +102,7 @@ index 0000000..ad913fc + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf new file mode 100644 -index 0000000..372d816 +index 0000000000..372d8160a4 --- /dev/null +++ b/scripts/systemtap/conf.d/qemu_kvm.conf @@ -0,0 +1,4 @@ @@ -111,11 +112,11 @@ index 0000000..372d816 +qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp new file mode 100644 -index 0000000..c04abf9 +index 0000000000..c04abf9449 --- /dev/null +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -- -1.8.3.1 +2.20.1 diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch similarity index 91% rename from 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename to 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 824bf82..640e92a 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 081800f80d82ce8d71a1075b3d888f2e298ad183 Mon Sep 17 00:00:00 2001 +From ec354bd88aaaf383bdb2177f94dec39b930cbf90 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 30 Nov 2018 09:11:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -16,15 +16,16 @@ We change the name and location of qemu-kvm binaries. Update documentation to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula --- - docs/qemu-block-drivers.texi | 70 +++++++++++----------- - docs/qemu-cpu-models.texi | 8 +-- - qemu-doc.texi | 70 +++++++++++----------- - qemu-options.hx | 140 ++++++++++++++++++++++--------------------- + docs/qemu-block-drivers.texi | 70 +++++++++--------- + docs/qemu-cpu-models.texi | 8 +- + qemu-doc.texi | 70 +++++++++--------- + qemu-options.hx | 140 ++++++++++++++++++----------------- 4 files changed, 146 insertions(+), 142 deletions(-) diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi -index 38e9f34..2e71ec9 100644 +index da06a9bc83..03c6705505 100644 --- a/docs/qemu-block-drivers.texi +++ b/docs/qemu-block-drivers.texi @@ -405,7 +405,7 @@ QEMU can automatically create a virtual FAT disk image from a @@ -235,10 +236,10 @@ index 38e9f34..2e71ec9 100644 @var{host}:@var{bus}:@var{slot}.@var{func} is the NVMe controller's PCI device diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -index 1935f98..93672de 100644 +index 23c11dc86f..8697254ad4 100644 --- a/docs/qemu-cpu-models.texi +++ b/docs/qemu-cpu-models.texi -@@ -387,25 +387,25 @@ CPU models / features in QEMU and libvirt +@@ -566,25 +566,25 @@ CPU models / features in QEMU and libvirt @item Host passthrough @example @@ -269,10 +270,10 @@ index 1935f98..93672de 100644 @end table diff --git a/qemu-doc.texi b/qemu-doc.texi -index f7ad1df..0e4b34a 100644 +index ae3c3f9632..a8cd3660bc 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi -@@ -205,12 +205,12 @@ Note that, by default, GUS shares IRQ(7) with parallel ports and so +@@ -206,12 +206,12 @@ Note that, by default, GUS shares IRQ(7) with parallel ports and so QEMU must be told to not have parallel ports to have working GUS. @example @@ -287,7 +288,7 @@ index f7ad1df..0e4b34a 100644 @end example Or some other unclaimed IRQ. -@@ -226,7 +226,7 @@ CS4231A is the chip used in Windows Sound System and GUSMAX products +@@ -227,7 +227,7 @@ CS4231A is the chip used in Windows Sound System and GUSMAX products Download and uncompress the linux image (@file{linux.img}) and type: @example @@ -296,7 +297,7 @@ index f7ad1df..0e4b34a 100644 @end example Linux should boot and give you a prompt. -@@ -236,7 +236,7 @@ Linux should boot and give you a prompt. +@@ -237,7 +237,7 @@ Linux should boot and give you a prompt. @example @c man begin SYNOPSIS @@ -305,7 +306,7 @@ index f7ad1df..0e4b34a 100644 @c man end @end example -@@ -276,21 +276,21 @@ is specified in seconds. The default is 0 which means no timeout. Libiscsi +@@ -277,21 +277,21 @@ is specified in seconds. The default is 0 which means no timeout. Libiscsi Example (without authentication): @example @@ -330,7 +331,7 @@ index f7ad1df..0e4b34a 100644 @end example @item NBD -@@ -305,12 +305,12 @@ Syntax for specifying a NBD device using Unix Domain Sockets +@@ -306,12 +306,12 @@ Syntax for specifying a NBD device using Unix Domain Sockets Example for TCP @example @@ -345,7 +346,7 @@ index f7ad1df..0e4b34a 100644 @end example @item SSH -@@ -318,8 +318,8 @@ QEMU supports SSH (Secure Shell) access to remote disks. +@@ -319,8 +319,8 @@ QEMU supports SSH (Secure Shell) access to remote disks. Examples: @example @@ -356,7 +357,7 @@ index f7ad1df..0e4b34a 100644 @end example Currently authentication must be done using ssh-agent. Other -@@ -337,7 +337,7 @@ sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] +@@ -338,7 +338,7 @@ sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] Example @example @@ -365,7 +366,7 @@ index f7ad1df..0e4b34a 100644 @end example See also @url{https://sheepdog.github.io/sheepdog/}. -@@ -363,17 +363,17 @@ JSON: +@@ -364,17 +364,17 @@ JSON: Example @example URI: @@ -386,7 +387,7 @@ index f7ad1df..0e4b34a 100644 @ file.debug=9,file.logfile=/var/log/qemu-gluster.log, @ file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, @ file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket -@@ -438,9 +438,9 @@ of . +@@ -439,9 +439,9 @@ of . Example: boot from a remote Fedora 20 live ISO image @example @@ -398,7 +399,7 @@ index f7ad1df..0e4b34a 100644 @end example Example: boot from a remote Fedora 20 cloud image using a local overlay for -@@ -448,7 +448,7 @@ writes, copy-on-read, and a readahead of 64k +@@ -449,7 +449,7 @@ writes, copy-on-read, and a readahead of 64k @example qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"http",, "file.url":"https://dl.fedoraproject.org/pub/fedora/linux/releases/20/Images/x86_64/Fedora-x86_64-20-20131211.1-sda.qcow2",, "file.readahead":"64k"@}' /tmp/Fedora-x86_64-20-20131211.1-sda.qcow2 @@ -407,7 +408,7 @@ index f7ad1df..0e4b34a 100644 @end example Example: boot from an image stored on a VMware vSphere server with a self-signed -@@ -457,7 +457,7 @@ of 10 seconds. +@@ -458,7 +458,7 @@ of 10 seconds. @example qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2 @@ -416,7 +417,7 @@ index f7ad1df..0e4b34a 100644 @end example @end table -@@ -824,7 +824,7 @@ On Linux hosts, a shared memory device is available. The basic syntax +@@ -825,7 +825,7 @@ On Linux hosts, a shared memory device is available. The basic syntax is: @example @@ -425,7 +426,7 @@ index f7ad1df..0e4b34a 100644 @end example where @var{hostmem} names a host memory backend. For a POSIX shared -@@ -845,7 +845,7 @@ memory server is: +@@ -846,7 +846,7 @@ memory server is: ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} # Then start your qemu instances with matching arguments @@ -434,7 +435,7 @@ index f7ad1df..0e4b34a 100644 -chardev socket,path=@var{path},id=@var{id} @end example -@@ -870,7 +870,7 @@ Instead of specifying the using POSIX shm, you may specify +@@ -871,7 +871,7 @@ Instead of specifying the using POSIX shm, you may specify a memory backend that has hugepage support: @example @@ -443,7 +444,7 @@ index f7ad1df..0e4b34a 100644 -device ivshmem-plain,memdev=mb1 @end example -@@ -886,7 +886,7 @@ kernel testing. +@@ -887,7 +887,7 @@ kernel testing. The syntax is: @example @@ -452,7 +453,7 @@ index f7ad1df..0e4b34a 100644 @end example Use @option{-kernel} to provide the Linux kernel image and -@@ -901,7 +901,7 @@ If you do not need graphical output, you can disable it and redirect +@@ -902,7 +902,7 @@ If you do not need graphical output, you can disable it and redirect the virtual serial port and the QEMU monitor to the console with the @option{-nographic} option. The typical command line is: @example @@ -461,7 +462,7 @@ index f7ad1df..0e4b34a 100644 -append "root=/dev/hda console=ttyS0" -nographic @end example -@@ -967,7 +967,7 @@ Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} +@@ -968,7 +968,7 @@ Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}. For instance, user-mode networking can be used with @example @@ -470,7 +471,7 @@ index f7ad1df..0e4b34a 100644 @end example @item usb-ccid Smartcard reader device -@@ -986,7 +986,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. +@@ -987,7 +987,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. This USB device implements the USB Transport Layer of HCI. Example usage: @example @@ -479,7 +480,7 @@ index f7ad1df..0e4b34a 100644 @end example @end table -@@ -1063,7 +1063,7 @@ For this setup it is recommended to restrict it to listen on a UNIX domain +@@ -1064,7 +1064,7 @@ For this setup it is recommended to restrict it to listen on a UNIX domain socket only. For example @example @@ -488,7 +489,7 @@ index f7ad1df..0e4b34a 100644 @end example This ensures that only users on local box with read/write access to that -@@ -1086,7 +1086,7 @@ is running the password is set with the monitor. Until the monitor is used to +@@ -1087,7 +1087,7 @@ is running the password is set with the monitor. Until the monitor is used to set the password all clients will be rejected. @example @@ -497,7 +498,7 @@ index f7ad1df..0e4b34a 100644 (qemu) change vnc password Password: ******** (qemu) -@@ -1103,7 +1103,7 @@ support provides a secure session, but no authentication. This allows any +@@ -1104,7 +1104,7 @@ support provides a secure session, but no authentication. This allows any client to connect, and provides an encrypted session. @example @@ -506,7 +507,7 @@ index f7ad1df..0e4b34a 100644 -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=no \ -vnc :1,tls-creds=tls0 -monitor stdio @end example -@@ -1125,7 +1125,7 @@ same syntax as previously, but with @code{verify-peer} set to @code{yes} +@@ -1126,7 +1126,7 @@ same syntax as previously, but with @code{verify-peer} set to @code{yes} instead. @example @@ -515,7 +516,7 @@ index f7ad1df..0e4b34a 100644 -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ -vnc :1,tls-creds=tls0 -monitor stdio @end example -@@ -1138,7 +1138,7 @@ Finally, the previous method can be combined with VNC password authentication +@@ -1139,7 +1139,7 @@ Finally, the previous method can be combined with VNC password authentication to provide two layers of authentication for clients. @example @@ -524,7 +525,7 @@ index f7ad1df..0e4b34a 100644 -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ -vnc :1,tls-creds=tls0,password -monitor stdio (qemu) change vnc password -@@ -1163,7 +1163,7 @@ used for authentication, but assuming use of one supporting SSF, +@@ -1164,7 +1164,7 @@ used for authentication, but assuming use of one supporting SSF, then QEMU can be launched with: @example @@ -533,7 +534,7 @@ index f7ad1df..0e4b34a 100644 @end example @node vnc_sec_certificate_sasl -@@ -1177,7 +1177,7 @@ credentials. This can be enabled, by combining the 'sasl' option +@@ -1178,7 +1178,7 @@ credentials. This can be enabled, by combining the 'sasl' option with the aforementioned TLS + x509 options: @example @@ -542,7 +543,7 @@ index f7ad1df..0e4b34a 100644 -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ -vnc :1,tls-creds=tls0,sasl -monitor stdio @end example -@@ -1572,7 +1572,7 @@ QEMU has a primitive support to work with gdb, so that you can do +@@ -1573,7 +1573,7 @@ QEMU has a primitive support to work with gdb, so that you can do In order to use gdb, launch QEMU with the '-s' option. It will wait for a gdb connection: @example @@ -551,7 +552,7 @@ index f7ad1df..0e4b34a 100644 -append "root=/dev/hda" Connected to host network interface: tun0 Waiting gdb connection on port 1234 -@@ -1818,7 +1818,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. +@@ -1819,7 +1819,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. Set OpenBIOS variables in NVRAM, for example: @example @@ -561,7 +562,7 @@ index f7ad1df..0e4b34a 100644 -prom-env 'boot-args=conf=hd:2,\yaboot.conf' @end example diff --git a/qemu-options.hx b/qemu-options.hx -index 5e13aa9..2a60e51 100644 +index 0f55062546..7ec1a3b099 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -253,7 +253,7 @@ This option defines a free-form string that can be used to describe @var{fd}. @@ -606,7 +607,7 @@ index 5e13aa9..2a60e51 100644 @end example If @var{slots} and @var{maxmem} are not specified, memory hotplug won't -@@ -438,12 +438,12 @@ Enable audio and selected sound hardware. Use 'help' to print all +@@ -668,12 +668,12 @@ Enable audio and selected sound hardware. Use 'help' to print all available sound hardware. @example @@ -625,7 +626,7 @@ index 5e13aa9..2a60e51 100644 @end example Note that Linux's i810_audio OSS kernel (for AC97) module might -@@ -918,21 +918,21 @@ is off. +@@ -1148,21 +1148,21 @@ is off. Instead of @option{-cdrom} you can use: @example @@ -653,7 +654,7 @@ index 5e13aa9..2a60e51 100644 -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" -drive file=/dev/fdset/2,index=0,media=disk -@@ -940,28 +940,28 @@ qemu-system-i386 +@@ -1170,28 +1170,28 @@ qemu-system-i386 You can connect a CDROM to the slave of ide0: @example @@ -688,7 +689,7 @@ index 5e13aa9..2a60e51 100644 @end example ETEXI -@@ -1975,8 +1975,8 @@ The following two example do exactly the same, to show how @option{-nic} can +@@ -2217,8 +2217,8 @@ The following two example do exactly the same, to show how @option{-nic} can be used to shorten the command line length (note that the e1000 is the default on i386, so the @option{model=e1000} parameter could even be omitted here, too): @example @@ -699,7 +700,7 @@ index 5e13aa9..2a60e51 100644 @end example @item -nic none -@@ -2047,7 +2047,7 @@ can not be resolved. +@@ -2289,7 +2289,7 @@ can not be resolved. Example: @example @@ -708,7 +709,7 @@ index 5e13aa9..2a60e51 100644 @end example @item domainname=@var{domain} -@@ -2071,7 +2071,7 @@ a guest from a local directory. +@@ -2313,7 +2313,7 @@ a guest from a local directory. Example (using pxelinux): @example @@ -717,7 +718,7 @@ index 5e13aa9..2a60e51 100644 -netdev user,id=n1,tftp=/path/to/tftp/files,bootfile=/pxelinux.0 @end example -@@ -2105,7 +2105,7 @@ screen 0, use the following: +@@ -2347,7 +2347,7 @@ screen 0, use the following: @example # on the host @@ -726,7 +727,7 @@ index 5e13aa9..2a60e51 100644 # this host xterm should open in the guest X11 server xterm -display :1 @end example -@@ -2115,7 +2115,7 @@ the guest, use the following: +@@ -2357,7 +2357,7 @@ the guest, use the following: @example # on the host @@ -735,7 +736,7 @@ index 5e13aa9..2a60e51 100644 telnet localhost 5555 @end example -@@ -2134,7 +2134,7 @@ lifetime, like in the following example: +@@ -2376,7 +2376,7 @@ lifetime, like in the following example: @example # open 10.10.1.1:4321 on bootup, connect 10.0.2.100:1234 to it whenever # the guest accesses it @@ -744,7 +745,7 @@ index 5e13aa9..2a60e51 100644 @end example Or you can execute a command on every TCP connection established by the guest, -@@ -2143,7 +2143,7 @@ so that QEMU behaves similar to an inetd process for that virtual server: +@@ -2385,7 +2385,7 @@ so that QEMU behaves similar to an inetd process for that virtual server: @example # call "netcat 10.10.1.1 4321" on every TCP connection to 10.0.2.100:1234 # and connect the TCP stream to its stdin/stdout @@ -753,7 +754,7 @@ index 5e13aa9..2a60e51 100644 @end example @end table -@@ -2170,21 +2170,22 @@ Examples: +@@ -2412,21 +2412,22 @@ Examples: @example #launch a QEMU instance with the default network script @@ -779,7 +780,7 @@ index 5e13aa9..2a60e51 100644 -netdev tap,id=n1,"helper=/path/to/qemu-bridge-helper" @end example -@@ -2201,13 +2202,13 @@ Examples: +@@ -2443,13 +2444,13 @@ Examples: @example #launch a QEMU instance with the default network helper to #connect a TAP device to bridge br0 @@ -795,7 +796,7 @@ index 5e13aa9..2a60e51 100644 @end example @item -netdev socket,id=@var{id}[,fd=@var{h}][,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}] -@@ -2222,13 +2223,13 @@ specifies an already opened TCP socket. +@@ -2464,13 +2465,13 @@ specifies an already opened TCP socket. Example: @example # launch a first QEMU instance @@ -815,7 +816,7 @@ index 5e13aa9..2a60e51 100644 @end example @item -netdev socket,id=@var{id}[,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] -@@ -2251,15 +2252,15 @@ Use @option{fd=h} to specify an already opened UDP multicast socket. +@@ -2493,15 +2494,15 @@ Use @option{fd=h} to specify an already opened UDP multicast socket. Example: @example # launch one QEMU instance @@ -838,7 +839,7 @@ index 5e13aa9..2a60e51 100644 -device e1000,netdev=n3,mac=52:54:00:12:34:58 \ -netdev socket,id=n3,mcast=230.0.0.1:1234 @end example -@@ -2267,7 +2268,7 @@ qemu-system-i386 linux.img \ +@@ -2509,7 +2510,7 @@ qemu-system-i386 linux.img \ Example (User Mode Linux compat.): @example # launch QEMU instance (note mcast address selected is UML's default) @@ -847,7 +848,7 @@ index 5e13aa9..2a60e51 100644 -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ -netdev socket,id=n1,mcast=239.192.168.1:1102 # launch UML -@@ -2276,9 +2277,12 @@ qemu-system-i386 linux.img \ +@@ -2518,9 +2519,12 @@ qemu-system-i386 linux.img \ Example (send packets from host's 1.2.3.4): @example @@ -863,7 +864,7 @@ index 5e13aa9..2a60e51 100644 @end example @item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}] -@@ -2336,7 +2340,7 @@ brctl addif br-lan vmtunnel0 +@@ -2578,7 +2582,7 @@ brctl addif br-lan vmtunnel0 # on 4.3.2.1 # launch QEMU instance - if your network has reorder or is very lossy add ,pincounter @@ -872,7 +873,7 @@ index 5e13aa9..2a60e51 100644 -netdev l2tpv3,id=n1,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter @end example -@@ -2353,7 +2357,7 @@ Example: +@@ -2595,7 +2599,7 @@ Example: # launch vde switch vde_switch -F -sock /tmp/myswitch # launch QEMU instance @@ -881,7 +882,7 @@ index 5e13aa9..2a60e51 100644 @end example @item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n] -@@ -2367,11 +2371,11 @@ be created for multiqueue vhost-user. +@@ -2609,11 +2613,11 @@ be created for multiqueue vhost-user. Example: @example @@ -898,7 +899,7 @@ index 5e13aa9..2a60e51 100644 @end example @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] -@@ -2805,7 +2809,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can +@@ -3053,7 +3057,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can be used as following: @example @@ -907,7 +908,7 @@ index 5e13aa9..2a60e51 100644 @end example @item -bt device:@var{dev}[,vlan=@var{n}] -@@ -3255,14 +3259,14 @@ ETEXI +@@ -3503,14 +3507,14 @@ ETEXI DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, "-realtime [mlock=on|off]\n" @@ -925,7 +926,7 @@ index 5e13aa9..2a60e51 100644 (enabled by default). ETEXI -@@ -3300,7 +3304,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even +@@ -3548,7 +3552,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even stdio are reasonable use case. The latter is allowing to start QEMU from within gdb and establish the connection via a pipe: @example @@ -934,7 +935,7 @@ index 5e13aa9..2a60e51 100644 @end example ETEXI -@@ -4251,7 +4255,7 @@ which specify the queue number of cryptodev backend, the default of +@@ -4467,7 +4471,7 @@ which specify the queue number of cryptodev backend, the default of @example @@ -943,7 +944,7 @@ index 5e13aa9..2a60e51 100644 [...] \ -object cryptodev-backend-builtin,id=cryptodev0 \ -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \ -@@ -4271,7 +4275,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 +@@ -4487,7 +4491,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 @example @@ -953,5 +954,5 @@ index 5e13aa9..2a60e51 100644 -chardev socket,id=chardev0,path=/path/to/socket \ -object cryptodev-vhost-user,id=cryptodev0,chardev=chardev0 \ -- -1.8.3.1 +2.20.1 diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0017-usb-xhci-Fix-PCI-capability-order.patch similarity index 91% rename from 0018-usb-xhci-Fix-PCI-capability-order.patch rename to 0017-usb-xhci-Fix-PCI-capability-order.patch index 6f18fde..b5e74a1 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0017-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From a3d01620dcbc301493f6704265edddf78f6bfe1b Mon Sep 17 00:00:00 2001 +From a4af398c3849751bc240c4f8f1f0cf2fcf57d21e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -56,15 +56,16 @@ Conflicts: (cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) (cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) +Signed-off-by: Danilo C. L. de Paula --- hw/usb/hcd-xhci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index ca19474..45fcce3 100644 +index ad351a7b6d..43a47539aa 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3373,6 +3373,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3393,6 +3393,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) xhci->max_pstreams_mask = 0; } @@ -77,7 +78,7 @@ index ca19474..45fcce3 100644 if (xhci->msi != ON_OFF_AUTO_OFF) { ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); /* Any error other than -ENOTSUP(board's MSI support is broken) -@@ -3421,12 +3427,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3441,12 +3447,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, &xhci->mem); @@ -91,5 +92,5 @@ index ca19474..45fcce3 100644 /* TODO check for errors, and should fail when msix=on */ msix_init(dev, xhci->numintrs, -- -1.8.3.1 +2.20.1 diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch similarity index 93% rename from 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename to 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index f3600dd..c1e39f5 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 19f1b23aa923eab88c3fdda7b7fdc157c313e4eb Mon Sep 17 00:00:00 2001 +From bf7fc2d5907b351ddfc85416d48e47f66024ccbe Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,10 +45,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 3aa9971..9f754c4 100644 +index 839f120256..fd4b5c46ec 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c -@@ -790,6 +790,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -796,6 +796,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, VirtIOSCSI *s = VIRTIO_SCSI(vdev); SCSIDevice *sd = SCSI_DEVICE(dev); @@ -62,8 +62,8 @@ index 3aa9971..9f754c4 100644 + return; + } if (s->ctx && !s->dataplane_fenced) { + AioContext *ctx; if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; -- -1.8.3.1 +2.20.1 diff --git a/kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch similarity index 80% rename from kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename to 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 47fa62c..6061ba9 100644 --- a/kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,8 +1,7 @@ -From b5ede4ceec64a7b812f40457771948420cd3780f Mon Sep 17 00:00:00 2001 +From 581909cc6cb4e8286126721b76a79e93a9880e1f Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 -Subject: [PATCH 4/8] BZ1653590: Require at least 64kiB pages for downstream - guests & hosts +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts RH-Author: David Gibson Message-id: <20190206035856.19058-1-dgibson@redhat.com> @@ -23,9 +22,6 @@ Downstream, however, we simply don't support 4kiB pagesize configured kernels in guest or host, so we can have qemu simply error out in this situation. -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1653590 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=20089737 -Branch: rhel8/master-3.1.0 Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified it failed immediately with a qemu error @@ -36,11 +32,11 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 64f98ae..ace7325 100644 +index edc5ed0e0c..f59ab398d4 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c -@@ -325,12 +325,19 @@ void spapr_check_pagesize(sPAPRMachineState *spapr, hwaddr pagesize, - static void cap_hpt_maxpagesize_apply(sPAPRMachineState *spapr, +@@ -340,12 +340,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { +#if 0 /* disabled for RHEL */ @@ -60,5 +56,5 @@ index 64f98ae..ace7325 100644 spapr_check_pagesize(spapr, qemu_getrampagesize(), errp); } -- -1.8.3.1 +2.20.1 diff --git a/kvm-doc-fix-the-configuration-path.patch b/0020-doc-fix-the-configuration-path.patch similarity index 92% rename from kvm-doc-fix-the-configuration-path.patch rename to 0020-doc-fix-the-configuration-path.patch index dc729c1..f4eea28 100644 --- a/kvm-doc-fix-the-configuration-path.patch +++ b/0020-doc-fix-the-configuration-path.patch @@ -1,7 +1,7 @@ -From a02908747e85b4064da8acabe724f9202ab17fba Mon Sep 17 00:00:00 2001 +From b1799db735fe51f8b31c565814d79c7ec4fc82dc Mon Sep 17 00:00:00 2001 From: Danilo de Paula Date: Fri, 8 Feb 2019 11:51:14 +0000 -Subject: [PATCH] doc: fix the configuration path +Subject: doc: fix the configuration path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -32,10 +32,10 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile -index 8710720..24e9ef0 100644 +index 41a77a90af..20c39c41a8 100644 --- a/Makefile +++ b/Makefile -@@ -937,11 +937,14 @@ ui/shader.o: $(SRC_PATH)/ui/shader.c \ +@@ -877,11 +877,14 @@ ui/shader.o: $(SRC_PATH)/ui/shader.c \ MAKEINFO=makeinfo MAKEINFOINCLUDES= -I docs -I $( -Date: Mon, 4 Feb 2019 19:14:22 +0000 -Subject: [PATCH 1/8] Acceptance tests: add Linux initrd checking test -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Yash Mankad -Message-id: -Patchwork-id: 84209 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] Acceptance tests: add Linux initrd checking test -Bugzilla: 1669922 -RH-Acked-by: Wainer dos Santos Moschetta -RH-Acked-by: Thomas Huth -RH-Acked-by: Laszlo Ersek - -From: Wainer dos Santos Moschetta - -QEMU used to exits with a not accurate error message when -an initrd > 2GiB was passed. That was fixed on patch: - - commit f3839fda5771596152b75dd1e1a6d050e6e6e380 - Author: Li Zhijian - Date: Thu Sep 13 18:07:13 2018 +0800 - - change get_image_size return type to int64_t - -This change adds a regression test for that fix. It starts -QEMU with a 2GiB dummy initrd, and checks that it evaluates the -file size correctly and prints an accurate message. - -Signed-off-by: Wainer dos Santos Moschetta -Reviewed-by: Caio Carrara -Reviewed-by: Cleber Rosa -Reviewed-by: Eduardo Habkost -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Message-Id: <20181109182153.5390-1-wainersm@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 82d4c923a580751c86dc0852a7cc8e369a78e8ad) -Signed-off-by: Yash Mankad -Signed-off-by: Danilo C. L. de Paula ---- - tests/acceptance/linux_initrd.py | 48 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 48 insertions(+) - create mode 100644 tests/acceptance/linux_initrd.py - -diff --git a/tests/acceptance/linux_initrd.py b/tests/acceptance/linux_initrd.py -new file mode 100644 -index 0000000..737355c ---- /dev/null -+++ b/tests/acceptance/linux_initrd.py -@@ -0,0 +1,48 @@ -+# Linux initrd acceptance test. -+# -+# Copyright (c) 2018 Red Hat, Inc. -+# -+# Author: -+# Wainer dos Santos Moschetta -+# -+# This work is licensed under the terms of the GNU GPL, version 2 or -+# later. See the COPYING file in the top-level directory. -+ -+import tempfile -+from avocado.utils.process import run -+ -+from avocado_qemu import Test -+ -+ -+class LinuxInitrd(Test): -+ """ -+ Checks QEMU evaluates correctly the initrd file passed as -initrd option. -+ -+ :avocado: enable -+ :avocado: tags=x86_64 -+ """ -+ -+ timeout = 60 -+ -+ def test_with_2gib_file_should_exit_error_msg(self): -+ """ -+ Pretends to boot QEMU with an initrd file with size of 2GiB -+ and expect it exits with error message. -+ """ -+ kernel_url = ('https://mirrors.kernel.org/fedora/releases/28/' -+ 'Everything/x86_64/os/images/pxeboot/vmlinuz') -+ kernel_hash = '238e083e114c48200f80d889f7e32eeb2793e02a' -+ kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) -+ max_size = 2 * (1024 ** 3) - 1 -+ -+ with tempfile.NamedTemporaryFile() as initrd: -+ initrd.seek(max_size) -+ initrd.write(b'\0') -+ initrd.flush() -+ cmd = "%s -kernel %s -initrd %s" % (self.qemu_bin, kernel_path, -+ initrd.name) -+ res = run(cmd, ignore_status=True) -+ self.assertEqual(res.exit_status, 1) -+ expected_msg = r'.*initrd is too large.*max: \d+, need %s.*' % ( -+ max_size + 1) -+ self.assertRegex(res.stderr_text, expected_msg) --- -1.8.3.1 - diff --git a/kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch b/kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch deleted file mode 100644 index 6e0906c..0000000 --- a/kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch +++ /dev/null @@ -1,57 +0,0 @@ -From da2d528c3cffe22bd1b90b446a045376e4370845 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Sat, 16 Feb 2019 00:00:50 +0000 -Subject: [PATCH 4/4] Revert "i386: Add CPUID bit for PCONFIG" - -RH-Author: Paolo Bonzini -Message-id: <1550275250-41719-4-git-send-email-pbonzini@redhat.com> -Patchwork-id: 84524 -O-Subject: [rhel-av-8.0.0 qemu-kvm PATCH 3/3] Revert "i386: Add CPUID bit for PCONFIG" -Bugzilla: 1661515 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Igor Mammedov -RH-Acked-by: Stefano Garzarella - -From: Robert Hoo - -This reverts commit 5131dc433df54b37e8e918d8fba7fe10344e7a7b. -For new instruction 'PCONFIG' will not be exposed to guest. - -Signed-off-by: Robert Hoo -Message-Id: <1545227081-213696-3-git-send-email-robert.hu@linux.intel.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 712f807e1965c8f1f1da5bbec2b92a8c540e6631) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 1 - - 2 files changed, 1 insertion(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 169a2ce..d990070 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1077,7 +1077,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -- NULL, NULL, "pconfig", NULL, -+ NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, "spec-ctrl", NULL, - NULL, "arch-capabilities", NULL, "ssbd", -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index dd88151..26412f1 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -692,7 +692,6 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; - - #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ - #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ --#define CPUID_7_0_EDX_PCONFIG (1U << 18) /* Platform Configuration */ - #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Speculation Control */ - #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) /*Arch Capabilities*/ - #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) /* Speculative Store Bypass Disable */ --- -1.8.3.1 - diff --git a/kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch b/kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch deleted file mode 100644 index be3b71a..0000000 --- a/kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 7bfdb4cd2b490d6da5648b1bfeacf9078b4b3ecb Mon Sep 17 00:00:00 2001 -From: Wei Huang -Date: Thu, 17 Jan 2019 17:33:55 +0000 -Subject: [PATCH 5/7] aarch64: Add virt-rhel8.0.0 machine type for ARM - -RH-Author: Wei Huang -Message-id: <20190117173357.31514-2-wei@redhat.com> -Patchwork-id: 84037 -O-Subject: [RHEL8 qemu-kvm PATCH v3 1/3] aarch64: Add virt-rhel8.0.0 machine type for ARM -Bugzilla: 1656504 -RH-Acked-by: Andrew Jones -RH-Acked-by: Auger Eric -RH-Acked-by: Igor Mammedov -RH-Acked-by: Laszlo Ersek - -This patch adds a new machine type, virt-rhel8.0.0, for QEMU fast train -to replace old virt-rhel7.6.0 because virt-rhel7.6.0 was defined for -an unsupported preview product. Note that we explicitly disable -256MB ECAM region in virt-rhel8.0.0 to match what virt-rhel7.6.0 offered. - -Signed-off-by: Wei Huang -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 12 +++++++++--- - 1 file changed, 9 insertions(+), 3 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 84a86c1..156721a 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2094,7 +2094,7 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - --static void rhel760_virt_instance_init(Object *obj) -+static void rhel800_virt_instance_init(Object *obj) - { - VirtMachineState *vms = VIRT_MACHINE(obj); - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); -@@ -2119,6 +2119,8 @@ static void rhel760_virt_instance_init(Object *obj) - "Set GIC version. " - "Valid values are 2, 3 and host", NULL); - -+ vms->highmem_ecam = !vmc->no_highmem_ecam; -+ - if (vmc->no_its) { - vms->its = false; - } else { -@@ -2132,12 +2134,16 @@ static void rhel760_virt_instance_init(Object *obj) - NULL); - } - -+ /* IOMMU is disabled by default and non-configurable for RHEL */ -+ vms->iommu = VIRT_IOMMU_NONE; -+ - vms->memmap=a15memmap; - vms->irqmap=a15irqmap; - } - --static void rhel760_virt_options(MachineClass *mc) -+static void rhel800_virt_options(MachineClass *mc) - { - SET_MACHINE_COMPAT(mc, ARM_RHEL_COMPAT); -+ vmc->no_highmem_ecam = true; - } --DEFINE_RHEL_MACHINE_AS_LATEST(7, 6, 0) -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 0, 0) --- -1.8.3.1 - diff --git a/kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch b/kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch deleted file mode 100644 index 76eae19..0000000 --- a/kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 3433e6920a4aaa2177f3503ef08256a58f866b33 Mon Sep 17 00:00:00 2001 -From: Wei Huang -Date: Thu, 17 Jan 2019 17:33:56 +0000 -Subject: [PATCH 6/7] aarch64: Set virt-rhel8.0.0 max_cpus to 512 - -RH-Author: Wei Huang -Message-id: <20190117173357.31514-3-wei@redhat.com> -Patchwork-id: 84038 -O-Subject: [RHEL8 qemu-kvm PATCH v3 2/3] aarch64: Set virt-rhel8.0.0 max_cpus to 512 -Bugzilla: 1656504 -RH-Acked-by: Andrew Jones -RH-Acked-by: Auger Eric -RH-Acked-by: Igor Mammedov -RH-Acked-by: Laszlo Ersek - -This patch increases max_cpus of virt-rhel8.0.0 from 255 to 512. -This decision is made due to a recent trend of ARM servers Red Hat -received. Currently Red Hat has HPE Apollo machines that contains 256 -cpu cores. It is reasonable to expect that we will see new machines -with > 256 cores very soon. We have verified this patch partially with a -256-vcpu VM on Apollo and expect 512 vcpus will work as well. - -Signed-off-by: Wei Huang -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 156721a..21965e4 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2063,11 +2063,11 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - - mc->family = "virt-rhel-Z"; - mc->init = machvirt_init; -- /* Start max_cpus at the maximum QEMU supports. We'll further restrict -- * it later in machvirt_init, where we have more information about the -+ /* Start with max_cpus set to 512, which is the maximum supported by KVM. -+ * The value may be reduced later when we have more information about the - * configuration of the particular instance. - */ -- mc->max_cpus = 255; -+ mc->max_cpus = 512; - mc->block_default_type = IF_VIRTIO; - mc->no_cdrom = 1; - mc->pci_allow_0_address = true; --- -1.8.3.1 - diff --git a/kvm-aarch64-Use-256MB-ECAM-region-by-default.patch b/kvm-aarch64-Use-256MB-ECAM-region-by-default.patch deleted file mode 100644 index 759adcc..0000000 --- a/kvm-aarch64-Use-256MB-ECAM-region-by-default.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 4d20863ae382c9ce2a8b7f88aee7a1d1228112a7 Mon Sep 17 00:00:00 2001 -From: Wei Huang -Date: Thu, 17 Jan 2019 17:33:57 +0000 -Subject: [PATCH 7/7] aarch64: Use 256MB ECAM region by default - -RH-Author: Wei Huang -Message-id: <20190117173357.31514-4-wei@redhat.com> -Patchwork-id: 84039 -O-Subject: [RHEL8 qemu-kvm PATCH v3 3/3] aarch64: Use 256MB ECAM region by default -Bugzilla: 1656504 -RH-Acked-by: Andrew Jones -RH-Acked-by: Auger Eric -RH-Acked-by: Igor Mammedov -RH-Acked-by: Laszlo Ersek - -This patch turns on 256MB ECAM region to support a larger number (up to -256) of PCIe buses on virt-rhel8.0.0. - -Signed-off-by: Wei Huang -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 21965e4..bee8f10 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2144,6 +2144,5 @@ static void rhel800_virt_instance_init(Object *obj) - static void rhel800_virt_options(MachineClass *mc) - { - SET_MACHINE_COMPAT(mc, ARM_RHEL_COMPAT); -- vmc->no_highmem_ecam = true; - } - DEFINE_RHEL_MACHINE_AS_LATEST(8, 0, 0) --- -1.8.3.1 - diff --git a/kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch b/kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch deleted file mode 100644 index 50392ca..0000000 --- a/kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch +++ /dev/null @@ -1,66 +0,0 @@ -From fa7a2c6b323882bb64d0015b842f05d6078bbe48 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 20 Feb 2019 10:37:05 +0000 -Subject: [PATCH] block: Apply auto-read-only for ro-whitelist drivers - -RH-Author: Kevin Wolf -Message-id: <20190220103705.22630-2-kwolf@redhat.com> -Patchwork-id: 84561 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] block: Apply auto-read-only for ro-whitelist drivers -Bugzilla: 1678968 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -If QEMU was configured with a driver in --block-drv-ro-whitelist, trying -to use that driver read-write resulted in an error message even if -auto-read-only=on was set. - -Consider auto-read-only=on for the whitelist checking and use it to -automatically degrade to read-only for block drivers on the read-only -whitelist. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 8be25de64315ef768353eb61f2b2bf6cddc34230) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 20 +++++++++++++------- - 1 file changed, 13 insertions(+), 7 deletions(-) - -diff --git a/block.c b/block.c -index 1ec4512..f1f6924 100644 ---- a/block.c -+++ b/block.c -@@ -1442,13 +1442,19 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, - bs->read_only = !(bs->open_flags & BDRV_O_RDWR); - - if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { -- error_setg(errp, -- !bs->read_only && bdrv_is_whitelisted(drv, true) -- ? "Driver '%s' can only be used for read-only devices" -- : "Driver '%s' is not whitelisted", -- drv->format_name); -- ret = -ENOTSUP; -- goto fail_opts; -+ if (!bs->read_only && bdrv_is_whitelisted(drv, true)) { -+ ret = bdrv_apply_auto_read_only(bs, NULL, NULL); -+ } else { -+ ret = -ENOTSUP; -+ } -+ if (ret < 0) { -+ error_setg(errp, -+ !bs->read_only && bdrv_is_whitelisted(drv, true) -+ ? "Driver '%s' can only be used for read-only devices" -+ : "Driver '%s' is not whitelisted", -+ drv->format_name); -+ goto fail_opts; -+ } - } - - /* bdrv_new() and bdrv_close() make it so */ --- -1.8.3.1 - diff --git a/kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch b/kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch deleted file mode 100644 index 357b528..0000000 --- a/kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch +++ /dev/null @@ -1,69 +0,0 @@ -From d6445c856c6199938eccbd73721c0c8257604557 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 6 Feb 2019 15:13:14 +0000 -Subject: [PATCH 5/8] block: Fix invalidate_cache error path for parent - activation - -RH-Author: Kevin Wolf -Message-id: <20190206151314.4789-2-kwolf@redhat.com> -Patchwork-id: 84251 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] block: Fix invalidate_cache error path for parent activation -Bugzilla: 1673014 -RH-Acked-by: John Snow -RH-Acked-by: Markus Armbruster -RH-Acked-by: Max Reitz - -bdrv_co_invalidate_cache() clears the BDRV_O_INACTIVE flag before -actually activating a node so that the correct permissions etc. are -taken. In case of errors, the flag must be restored so that the next -call to bdrv_co_invalidate_cache() retries activation. - -Restoring the flag was missing in the error path for a failed -parent->role->activate() call. The consequence is that this attempt to -activate all images correctly fails because we still set errp, however -on the next attempt BDRV_O_INACTIVE is already clear, so we return -success without actually retrying the failed action. - -An example where this is observable in practice is migration to a QEMU -instance that has a raw format block node attached to a guest device -with share-rw=off (the default) while another process holds -BLK_PERM_WRITE for the same image. In this case, all activation steps -before parent->role->activate() succeed because raw can tolerate other -writers to the image. Only the parent callback (in particular -blk_root_activate()) tries to implement the share-rw=on property and -requests exclusive write permissions. This fails when the migration -completes and correctly displays an error. However, a manual 'cont' will -incorrectly resume the VM without calling blk_root_activate() again. - -This case is described in more detail in the following bug report: -https://bugzilla.redhat.com/show_bug.cgi?id=1531888 - -Fix this by correctly restoring the BDRV_O_INACTIVE flag in the error -path. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Tested-by: Markus Armbruster -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 78fc3b3a26c145eebcdee992988644974b243a74) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block.c b/block.c -index 811239c..1ec4512 100644 ---- a/block.c -+++ b/block.c -@@ -4553,6 +4553,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, - if (parent->role->activate) { - parent->role->activate(parent, &local_err); - if (local_err) { -+ bs->open_flags |= BDRV_O_INACTIVE; - error_propagate(errp, local_err); - return; - } --- -1.8.3.1 - diff --git a/kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch b/kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch deleted file mode 100644 index 75e4362..0000000 --- a/kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch +++ /dev/null @@ -1,58 +0,0 @@ -From cc7dbe3ae92a2bb1557df184493a331fe2381003 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Wed, 3 Apr 2019 17:53:52 +0100 -Subject: [PATCH 9/9] block/file-posix: do not fail on unlock bytes - -RH-Author: Max Reitz -Message-id: <20190403175352.27439-2-mreitz@redhat.com> -Patchwork-id: 85408 -O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/1] block/file-posix: do not fail on unlock bytes -Bugzilla: 1652572 -RH-Acked-by: John Snow -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Vladimir Sementsov-Ogievskiy - -bdrv_replace_child() calls bdrv_check_perm() with error_abort on -loosening permissions. However file-locking operations may fail even -in this case, for example on NFS. And this leads to Qemu crash. - -Let's avoid such errors. Note, that we ignore such things anyway on -permission update commit and abort. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 696aaaed579ac5bf5fa336216909b46d3d8f07a8) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 07bbdab..f0af144 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -812,6 +812,18 @@ static int raw_handle_perm_lock(BlockDriverState *bs, - - switch (op) { - case RAW_PL_PREPARE: -+ if ((s->perm | new_perm) == s->perm && -+ (s->shared_perm & new_shared) == s->shared_perm) -+ { -+ /* -+ * We are going to unlock bytes, it should not fail. If it fail due -+ * to some fs-dependent permission-unrelated reasons (which occurs -+ * sometimes on NFS and leads to abort in bdrv_replace_child) we -+ * can't prevent such errors by any check here. And we ignore them -+ * anyway in ABORT and COMMIT. -+ */ -+ return 0; -+ } - ret = raw_apply_lock_bytes(s, s->fd, s->perm | new_perm, - ~s->shared_perm | ~new_shared, - false, errp); --- -1.8.3.1 - diff --git a/kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch b/kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch deleted file mode 100644 index bcf15cf..0000000 --- a/kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch +++ /dev/null @@ -1,119 +0,0 @@ -From fece44d5054ef13f483d7531a8462cb7f8ff5b93 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Fri, 14 Dec 2018 19:33:40 +0000 -Subject: [PATCH 7/8] kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for - older machine types - -RH-Author: Bandan Das -Message-id: -Patchwork-id: 83523 -O-Subject: [RHEL8 qemu-kvm PATCH] kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types -Bugzilla: 1659604 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Pankaj Gupta -RH-Acked-by: Eduardo Habkost - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1659604 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=19521246 -Upstream: Not applicable -Branch: rhel8/master-3.1.0 on top of [RHEL8 qemu-kvm PATCH v3 0/5] 8.0.0 x86 machine types - -After the addition of support for async pf injection to L1, newer -hypervisors advertise the feature using bit 2 of the -MSR_KVM_ASYNC_PF_EN msr. However, this was reserved in older -hypervisors which results in an error during migration like so: - -qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285 -qemu-kvm: /builddir/build/BUILD/qemu-2.12.0/target/i386/kvm.c:1940: kvm_put_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. -Aborted (core dumped) - -This patch introduces a new bool that is set for older machine types. -When set, Qemu's stored value clears out bit 2. This should be safe -because the guest can still enable it by writing to the MSR after -checking for support. A reset/migration for <7.6 machine type would -reset the bit though. - -Signed-off-by: Bandan Das -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc.c | 1 + - hw/i386/pc_piix.c | 1 + - hw/i386/pc_q35.c | 1 + - include/hw/boards.h | 2 ++ - target/i386/kvm.c | 4 ++++ - 5 files changed, 9 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index a609332..18268d3 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -2391,6 +2391,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->linuxboot_dma_enabled = true; - assert(!mc->get_hotplug_handler); - pcmc->pc_rom_ro = true; -+ mc->async_pf_vmexit_disable = false; - mc->get_hotplug_handler = pc_get_hotpug_handler; - mc->cpu_index_to_instance_props = pc_cpu_index_to_props; - mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index efee5e7..46c494a 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1186,6 +1186,7 @@ static void pc_machine_rhel760_options(MachineClass *m) - { - pc_machine_rhel7_options(m); - m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; -+ m->async_pf_vmexit_disable = true; - SET_MACHINE_COMPAT(m, PC_RHEL7_6_COMPAT); - } - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 0b7223f..1810cf2 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -470,6 +470,7 @@ static void pc_q35_machine_rhel760_options(MachineClass *m) - pc_q35_machine_rhel800_options(m); - m->alias = NULL; - m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; -+ m->async_pf_vmexit_disable = true; - SET_MACHINE_COMPAT(m, PC_RHEL7_6_COMPAT); - } - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index f82f284..27463fb 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -204,6 +204,8 @@ struct MachineClass { - const char **valid_cpu_types; - strList *allowed_dynamic_sysbus_devices; - bool auto_enable_numa_with_memhp; -+ /* RHEL only */ -+ bool async_pf_vmexit_disable; - void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, - int nb_nodes, ram_addr_t size); - bool ignore_boot_device_suffixes; -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index b2401d1..5b0ce82 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -2351,6 +2351,7 @@ static int kvm_get_msrs(X86CPU *cpu) - struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; - int ret, i; - uint64_t mtrr_top_bits; -+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - - kvm_msr_buf_reset(cpu); - -@@ -2648,6 +2649,9 @@ static int kvm_get_msrs(X86CPU *cpu) - break; - case MSR_KVM_ASYNC_PF_EN: - env->async_pf_en_msr = msrs[i].data; -+ if (mc->async_pf_vmexit_disable) { -+ env->async_pf_en_msr &= ~(1ULL << 2); -+ } - break; - case MSR_KVM_PV_EOI_EN: - env->pv_eoi_en_msr = msrs[i].data; --- -1.8.3.1 - diff --git a/kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch b/kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch deleted file mode 100644 index f366d96..0000000 --- a/kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch +++ /dev/null @@ -1,66 +0,0 @@ -From a75b0a6a7ef6e14e3b65b34bbc6935491d3b016e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 14 Dec 2018 17:02:07 +0000 -Subject: [PATCH 2/8] compat: Generic HW_COMPAT_RHEL7_6 - -RH-Author: Dr. David Alan Gilbert -Message-id: <20181214170211.14267-3-dgilbert@redhat.com> -Patchwork-id: 83516 -O-Subject: [RHEL8 qemu-kvm PATCH v3 2/6] compat: Generic HW_COMPAT_RHEL7_6 -Bugzilla: 1655820 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -From: "Dr. David Alan Gilbert" - -Add the HW_COMPAT_RHEL7_6 macro based on the 2.12 and 3.0 macros. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/compat.h | 29 +++++++++++++++++++++++++++++ - 1 file changed, 29 insertions(+) - -diff --git a/include/hw/compat.h b/include/hw/compat.h -index f08cc7c..40db243 100644 ---- a/include/hw/compat.h -+++ b/include/hw/compat.h -@@ -513,5 +513,34 @@ - .value = "off",\ - }, - -+/* The same as HW_COMPAT_3_0 + HW_COMPAT_2_12 -+ * except that -+ * there's nothing in 3_0 -+ * migration.decompress-error-check=off was in 7.5 from bz 1584139 -+ * -+ */ -+#define HW_COMPAT_RHEL7_6 \ -+ { /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ -+ .driver = "hda-audio",\ -+ .property = "use-timer",\ -+ .value = "false",\ -+ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ -+ .driver = "cirrus-vga",\ -+ .property = "global-vmstate",\ -+ .value = "true",\ -+ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ -+ .driver = "VGA",\ -+ .property = "global-vmstate",\ -+ .value = "true",\ -+ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ -+ .driver = "vmware-svga",\ -+ .property = "global-vmstate",\ -+ .value = "true",\ -+ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ \ -+ .driver = "qxl-vga",\ -+ .property = "global-vmstate",\ -+ .value = "true",\ -+ }, -+ - - #endif /* HW_COMPAT_H */ --- -1.8.3.1 - diff --git a/kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch b/kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch deleted file mode 100644 index fe79e80..0000000 --- a/kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 5f57f764ebb7451c71ffa04130ad2f2e4cb531e8 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 23 Jan 2019 09:15:22 +0000 -Subject: [PATCH 2/2] cpus: ignore ESRCH in qemu_cpu_kick_thread() - -RH-Author: Laurent Vivier -Message-id: <20190123091522.17581-1-lvivier@redhat.com> -Patchwork-id: 84092 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH v2] cpus: ignore ESRCH in qemu_cpu_kick_thread() -Bugzilla: 1668205 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Thomas Huth -RH-Acked-by: Serhii Popovych -RH-Acked-by: Laszlo Ersek - -We can have a race condition between qemu_cpu_kick_thread() and -qemu_kvm_cpu_thread_fn() when we hotunplug a CPU. In this case, -qemu_cpu_kick_thread() can try to kick a thread that is exiting. -pthread_kill() returns an error and qemu is stopped by an exit(1). - - qemu:qemu_cpu_kick_thread: No such process - -We can ignore safely this error. - -Signed-off-by: Laurent Vivier -Signed-off-by: Paolo Bonzini -(cherry picked from commit e9979ef245549b8e1fd240ec9937271c7fda0b57) -Signed-off-by: Laurent Vivier - -BRANCH: rhel8/master-3.1.0 -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1668205 -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=19905615 -UPSTREAM: In maintainer pull request - git://github.com/bonzini/qemu.git tags/for-upstream -TEST: Upstream version tested by QE ---- -v2: add BRANCH: tag to make happy virt-ci-maint-team - - cpus.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -Signed-off-by: Danilo C. L. de Paula ---- - cpus.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/cpus.c b/cpus.c -index 0ddeeef..4717490 100644 ---- a/cpus.c -+++ b/cpus.c -@@ -1778,7 +1778,7 @@ static void qemu_cpu_kick_thread(CPUState *cpu) - } - cpu->thread_kicked = true; - err = pthread_kill(cpu->thread->thread, SIG_IPI); -- if (err) { -+ if (err && err != ESRCH) { - fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); - exit(1); - } --- -1.8.3.1 - diff --git a/kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch b/kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch deleted file mode 100644 index 3ff218b..0000000 --- a/kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 1aefd1b8a1dbbf63d28901081102b31455f96290 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Mon, 15 Apr 2019 12:22:02 +0100 -Subject: [PATCH] device_tree: Fix integer overflowing in load_device_tree() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Sergio Lopez Pascual -Message-id: <20190415122202.52108-2-slp@redhat.com> -Patchwork-id: 85670 -O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/1] device_tree: Fix integer overflowing in load_device_tree() -Bugzilla: 1693173 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella - -From: Markus Armbruster - -If the value of get_image_size() exceeds INT_MAX / 2 - 10000, the -computation of @dt_size overflows to a negative number, which then -gets converted to a very large size_t for g_malloc0() and -load_image_size(). In the (fortunately improbable) case g_malloc0() -succeeds and load_image_size() survives, we'd assign the negative -number to *sizep. What that would do to the callers I can't say, but -it's unlikely to be good. - -Fix by rejecting images whose size would overflow. - -Reported-by: Kurtis Miller -Signed-off-by: Markus Armbruster -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Alistair Francis -Message-Id: <20190409174018.25798-1-armbru@redhat.com> -(cherry picked from 065e6298a75164b4347682b63381dbe752c2b156) -Signed-off-by: Sergio Lopez - -Signed-off-by: Danilo C. L. de Paula ---- - device_tree.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/device_tree.c b/device_tree.c -index 6d9c972..fe61027 100644 ---- a/device_tree.c -+++ b/device_tree.c -@@ -84,6 +84,10 @@ void *load_device_tree(const char *filename_path, int *sizep) - filename_path); - goto fail; - } -+ if (dt_size > INT_MAX / 2 - 10000) { -+ error_report("Device tree file '%s' is too large", filename_path); -+ goto fail; -+ } - - /* Expand to 2x size to give enough room for manipulation. */ - dt_size += 10000; --- -1.8.3.1 - diff --git a/kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch b/kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch deleted file mode 100644 index 51368ef..0000000 --- a/kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch +++ /dev/null @@ -1,55 +0,0 @@ -From f869cc062302515f4d031305584386ead0d32714 Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Thu, 21 Feb 2019 09:11:01 +0000 -Subject: [PATCH 2/2] fdc: Restrict floppy controllers to RHEL-7 machine types - -RH-Author: Markus Armbruster -Message-id: <20190221091101.31999-3-armbru@redhat.com> -Patchwork-id: 84693 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH v2 2/2] fdc: Restrict floppy controllers to RHEL-7 machine types -Bugzilla: 1664997 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Thomas Huth - -Make floppy controllers' realize() fail except with RHEL-7 machine -types. The "is a RHEL-7 machine type" test is a bit of a hack: it -looks for "-rhel7." in the machine type name. - -Signed-off-by: Markus Armbruster -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/fdc.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 6f19f12..9ece2db 100644 ---- a/hw/block/fdc.c -+++ b/hw/block/fdc.c -@@ -42,6 +42,8 @@ - #include "qemu/log.h" - #include "trace.h" - -+#include "hw/boards.h" -+ - /********************************************************/ - /* debug Floppy devices */ - -@@ -2629,6 +2631,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, - int i, j; - static int command_tables_inited = 0; - -+ /* Restricted for Red Hat Enterprise Linux: */ -+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); -+ if (!strstr(mc->name, "-rhel7.")) { -+ error_setg(errp, "Device %s is not supported with machine type %s", -+ object_get_typename(OBJECT(dev)), mc->name); -+ return; -+ } -+ - if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { - error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); - } --- -1.8.3.1 - diff --git a/kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch b/kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch deleted file mode 100644 index 15baaa9..0000000 --- a/kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 20a51f6e5ebc56707554a52e2fb6a61bf6511315 Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Thu, 21 Feb 2019 09:11:00 +0000 -Subject: [PATCH 1/2] fdc: Revert downstream disablement of device "floppy" - -RH-Author: Markus Armbruster -Message-id: <20190221091101.31999-2-armbru@redhat.com> -Patchwork-id: 84691 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH v2 1/2] fdc: Revert downstream disablement of device "floppy" -Bugzilla: 1664997 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Thomas Huth - -Board code creates floppy devices on behalf of -drive if=floppy,... -When they got qdevifified, they also became available with -device. -We made it unavailable downstream as per our policy to permit new -devices only when we have a use for them (commit 0533a6ee98f). We now -have a use: we need it to move from -drive to -blockdev. - -Signed-off-by: Markus Armbruster -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/fdc.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 56b7aeb..6f19f12 100644 ---- a/hw/block/fdc.c -+++ b/hw/block/fdc.c -@@ -599,7 +599,6 @@ static void floppy_drive_class_init(ObjectClass *klass, void *data) - k->bus_type = TYPE_FLOPPY_BUS; - k->props = floppy_drive_properties; - k->desc = "virtual floppy drive"; -- k->user_creatable = false; /* RH state preserve */ - } - - static const TypeInfo floppy_drive_info = { --- -1.8.3.1 - diff --git a/kvm-globals-Allow-global-properties-to-be-optional.patch b/kvm-globals-Allow-global-properties-to-be-optional.patch deleted file mode 100644 index b039c64..0000000 --- a/kvm-globals-Allow-global-properties-to-be-optional.patch +++ /dev/null @@ -1,84 +0,0 @@ -From c63a58b4e1d3db52301bec072ac8025216731f35 Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 16 Jan 2019 23:18:18 +0000 -Subject: [PATCH 3/7] globals: Allow global properties to be optional -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eduardo Habkost -Message-id: <20190116231819.29310-4-ehabkost@redhat.com> -Patchwork-id: 84029 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 3/4] globals: Allow global properties to be optional -Bugzilla: 1648023 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier -RH-Acked-by: Michael S. Tsirkin - -Upstream tree: git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git -Upstream commit: d7741743f4f3d2683d1bb6938f88dc0167c21afa - -Making some global properties optional will let us simplify -compat code when a given property works on most (but not all) -subclasses of a given type. - -Device types will be able to opt out from optional compat -properties by simply not registering those properties. - -Backport conflict notes: - Patching qdev_prop_set_globals(), because our downstream tree - still doesn't have object_apply_global_props() from commit - ea9ce8934c5d ("hw: apply accel compat properties without - touching globals") - -Signed-off-by: Eduardo Habkost -Reviewed-by: Cornelia Huck -Reviewed-by: Marc-André Lureau -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/qdev-properties.c | 3 +++ - include/hw/qdev-core.h | 3 +++ - 2 files changed, 6 insertions(+) - -diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c -index 35072de..2023c1a 100644 ---- a/hw/core/qdev-properties.c -+++ b/hw/core/qdev-properties.c -@@ -1255,6 +1255,9 @@ void qdev_prop_set_globals(DeviceState *dev) - if (object_dynamic_cast(OBJECT(dev), prop->driver) == NULL) { - continue; - } -+ if (prop->optional && !object_property_find(OBJECT(dev), prop->property, NULL)) { -+ continue; -+ } - prop->used = true; - object_property_parse(OBJECT(dev), prop->value, prop->property, &err); - if (err != NULL) { -diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h -index a24d0dd..a10f60f 100644 ---- a/include/hw/qdev-core.h -+++ b/include/hw/qdev-core.h -@@ -252,6 +252,8 @@ struct PropertyInfo { - * @user_provided: Set to true if property comes from user-provided config - * (command-line or config file). - * @used: Set to true if property was used when initializing a device. -+ * @optional: If set to true, GlobalProperty will be skipped without errors -+ * if the property doesn't exist. - * @errp: Error destination, used like first argument of error_setg() - * in case property setting fails later. If @errp is NULL, we - * print warnings instead of ignoring errors silently. For -@@ -264,6 +266,7 @@ typedef struct GlobalProperty { - const char *value; - bool user_provided; - bool used; -+ bool optional; - Error **errp; - } GlobalProperty; - --- -1.8.3.1 - diff --git a/kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch b/kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch deleted file mode 100644 index 5be56ee..0000000 --- a/kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch +++ /dev/null @@ -1,183 +0,0 @@ -From 1b9f228788eb2d7f50961241e28f7a9afadc62ab Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 21 Mar 2019 10:56:08 +0000 -Subject: [PATCH 7/9] hostmem-file: reject invalid pmem file sizes - -RH-Author: Stefan Hajnoczi -Message-id: <20190321105608.29960-2-stefanha@redhat.com> -Patchwork-id: 85081 -O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/1] hostmem-file: reject invalid pmem file sizes -Bugzilla: 1669053 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Pankaj Gupta -RH-Acked-by: Igor Mammedov - -Guests started with NVDIMMs larger than the underlying host file produce -confusing errors inside the guest. This happens because the guest -accesses pages beyond the end of the file. - -Check the pmem file size on startup and print a clear error message if -the size is invalid. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1669053 -Cc: Wei Yang -Cc: Zhang Yi -Cc: Eduardo Habkost -Cc: Igor Mammedov -Signed-off-by: Stefan Hajnoczi -Message-Id: <20190214031004.32522-3-stefanha@redhat.com> -Reviewed-by: Wei Yang -Reviewed-by: Igor Mammedov -Reviewed-by: Pankaj Gupta -Signed-off-by: Eduardo Habkost -(cherry picked from commit 314aec4a6e06844937f1677f6cba21981005f389) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - backends/hostmem-file.c | 23 +++++++++++++++++++++ - include/qemu/osdep.h | 13 ++++++++++++ - util/oslib-posix.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ - util/oslib-win32.c | 5 +++++ - 4 files changed, 94 insertions(+) - -diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c -index 6630021..8d85d56 100644 ---- a/backends/hostmem-file.c -+++ b/backends/hostmem-file.c -@@ -57,6 +57,29 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) - #ifndef CONFIG_POSIX - error_setg(errp, "-mem-path not supported on this host"); - #else -+ -+ /* -+ * Verify pmem file size since starting a guest with an incorrect size -+ * leads to confusing failures inside the guest. -+ */ -+ if (fb->is_pmem) { -+ Error *local_err = NULL; -+ uint64_t size; -+ -+ size = qemu_get_pmem_size(fb->mem_path, &local_err); -+ if (!size) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ -+ if (backend->size > size) { -+ error_setg(errp, "size property %" PRIu64 " is larger than " -+ "pmem file \"%s\" size %" PRIu64, backend->size, -+ fb->mem_path, size); -+ return; -+ } -+ } -+ - backend->force_prealloc = mem_prealloc; - path = object_get_canonical_path(OBJECT(backend)); - memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), -diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h -index 3bf48bc..c68a85b 100644 ---- a/include/qemu/osdep.h -+++ b/include/qemu/osdep.h -@@ -553,6 +553,19 @@ void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, - Error **errp); - - /** -+ * qemu_get_pmem_size: -+ * @filename: path to a pmem file -+ * @errp: pointer to a NULL-initialized error object -+ * -+ * Determine the size of a persistent memory file. Besides supporting files on -+ * DAX file systems, this function also supports Linux devdax character -+ * devices. -+ * -+ * Returns: the size or 0 on failure -+ */ -+uint64_t qemu_get_pmem_size(const char *filename, Error **errp); -+ -+/** - * qemu_get_pid_name: - * @pid: pid of a process - * -diff --git a/util/oslib-posix.c b/util/oslib-posix.c -index 97b2f3b..b173fc0 100644 ---- a/util/oslib-posix.c -+++ b/util/oslib-posix.c -@@ -496,6 +496,59 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, - } - } - -+uint64_t qemu_get_pmem_size(const char *filename, Error **errp) -+{ -+ struct stat st; -+ -+ if (stat(filename, &st) < 0) { -+ error_setg(errp, "unable to stat pmem file \"%s\"", filename); -+ return 0; -+ } -+ -+#if defined(__linux__) -+ /* Special handling for devdax character devices */ -+ if (S_ISCHR(st.st_mode)) { -+ char *subsystem_path = NULL; -+ char *subsystem = NULL; -+ char *size_path = NULL; -+ char *size_str = NULL; -+ uint64_t ret = 0; -+ -+ subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", -+ major(st.st_rdev), minor(st.st_rdev)); -+ subsystem = g_file_read_link(subsystem_path, NULL); -+ if (!subsystem) { -+ error_setg(errp, "unable to read subsystem for pmem file \"%s\"", -+ filename); -+ goto devdax_err; -+ } -+ -+ if (!g_str_has_suffix(subsystem, "/dax")) { -+ error_setg(errp, "pmem file \"%s\" is not a dax device", filename); -+ goto devdax_err; -+ } -+ -+ size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", -+ major(st.st_rdev), minor(st.st_rdev)); -+ if (!g_file_get_contents(size_path, &size_str, NULL, NULL)) { -+ error_setg(errp, "unable to read size for pmem file \"%s\"", -+ size_path); -+ goto devdax_err; -+ } -+ -+ ret = g_ascii_strtoull(size_str, NULL, 0); -+ -+devdax_err: -+ g_free(size_str); -+ g_free(size_path); -+ g_free(subsystem); -+ g_free(subsystem_path); -+ return ret; -+ } -+#endif /* defined(__linux__) */ -+ -+ return st.st_size; -+} - - char *qemu_get_pid_name(pid_t pid) - { -diff --git a/util/oslib-win32.c b/util/oslib-win32.c -index b4c17f5..bd633af 100644 ---- a/util/oslib-win32.c -+++ b/util/oslib-win32.c -@@ -560,6 +560,11 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, - } - } - -+uint64_t qemu_get_pmem_size(const char *filename, Error **errp) -+{ -+ error_setg(errp, "pmem support not available"); -+ return 0; -+} - - char *qemu_get_pid_name(pid_t pid) - { --- -1.8.3.1 - diff --git a/kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch b/kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch deleted file mode 100644 index 29e394d..0000000 --- a/kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch +++ /dev/null @@ -1,50 +0,0 @@ -From c6ac9501471c3c931367b1967ad97ecfc498249c Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Fri, 21 Dec 2018 15:33:17 +0000 -Subject: [PATCH 3/3] hw/s390x: Fix bad mask in time2tod() - -RH-Author: David Hildenbrand -Message-id: <20181221153317.27647-3-david@redhat.com> -Patchwork-id: 83743 -O-Subject: [RHEL-8.0 qemu-kvm v2 PATCH 2/2] hw/s390x: Fix bad mask in time2tod() -Bugzilla: 1659127 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier - -Since "s390x/tcg: avoid overflows in time2tod/tod2time", the -time2tod() function tries to deal with the 9 uppermost bits in the -time value, but uses the wrong mask for this: 0xff80000000000000 should -be used instead of 0xff10000000000000 here. - -Fixes: 14055ce53c2d901d826ffad7fb7d6bb8ab46bdfd -Cc: qemu-stable@nongnu.org -Signed-off-by: Thomas Huth -Message-Id: <1544792887-14575-1-git-send-email-thuth@redhat.com> -Reviewed-by: David Hildenbrand -[CH: tweaked commit message] -Signed-off-by: Cornelia Huck -(cherry picked from commit aba7a5a2de3dba5917024df25441f715b9249e31) -Signed-off-by: David Hildenbrand - -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/s390x/tod.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h -index cbd7552..47ef9de 100644 ---- a/include/hw/s390x/tod.h -+++ b/include/hw/s390x/tod.h -@@ -56,7 +56,7 @@ typedef struct S390TODClass { - /* Converts ns to s390's clock format */ - static inline uint64_t time2tod(uint64_t ns) - { -- return (ns << 9) / 125 + (((ns & 0xff10000000000000ull) / 125) << 9); -+ return (ns << 9) / 125 + (((ns & 0xff80000000000000ull) / 125) << 9); - } - - /* Converts s390's clock format to ns */ --- -1.8.3.1 - diff --git a/kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch b/kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch deleted file mode 100644 index 592fe51..0000000 --- a/kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 6c200d665b8730ea86104e7aea2d59035b1398e5 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Fri, 21 Dec 2018 14:08:56 +0000 -Subject: [PATCH 1/3] hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 - -RH-Author: David Hildenbrand -Message-id: <20181221150856.26324-3-david@redhat.com> -Patchwork-id: 83740 -O-Subject: [RHEL8 qemu-kvm PATCH v3 2/2] hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 -Bugzilla: 1656510 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1656510 -Upstream: n/a (downstream only) -Branch: rhel8/master-3.1.0 - -Downstream s390x machine types for the Advanced Virtualization module. - -s390mc->hpage_1m_allowed has to stay enabled for the rhel7.6.0 machine, -because RHEL 8 supports huge pages. For RHEL 7.6-alt, this is fenced -using a different mechanism (bail out if huge pages are used right from -the start). - -Signed-off-by: Thomas Huth -Signed-off-by: David Hildenbrand -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 26 +++++++++++++++++++++++--- - 1 file changed, 23 insertions(+), 3 deletions(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 04f4c1a..776a6d6 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -651,14 +651,14 @@ bool css_migration_enabled(void) - } \ - type_init(ccw_machine_register_##suffix) - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ -+ - #define CCW_COMPAT_3_0 \ - HW_COMPAT_3_0 - - #define CCW_COMPAT_2_12 \ - HW_COMPAT_2_12 - --#if 0 /* Disabled for Red Hat Enterprise Linux */ -- - #define CCW_COMPAT_2_11 \ - HW_COMPAT_2_11 \ - {\ -@@ -899,6 +899,13 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); - #else - - /* -+ * like CCW_COMPAT_2_12 + CCW_COMPAT_3_0 (which are empty), but includes -+ * HW_COMPAT_RHEL7_6 instead of HW_COMPAT_2_11 and HW_COMPAT_3_0 -+ */ -+#define CCW_COMPAT_RHEL7_6 \ -+ HW_COMPAT_RHEL7_6 -+ -+/* - * like CCW_COMPAT_2_11, but includes HW_COMPAT_RHEL7_5 (derived from - * HW_COMPAT_2_11 and HW_COMPAT_2_10) instead of HW_COMPAT_2_11 - */ -@@ -910,14 +917,26 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); - .value = "off",\ - }, - -+static void ccw_machine_rhel800_instance_options(MachineState *machine) -+{ -+} -+ -+static void ccw_machine_rhel800_class_options(MachineClass *mc) -+{ -+} -+DEFINE_CCW_MACHINE(rhel800, "rhel8.0.0", true); -+ - static void ccw_machine_rhel760_instance_options(MachineState *machine) - { -+ ccw_machine_rhel800_instance_options(machine); - } - - static void ccw_machine_rhel760_class_options(MachineClass *mc) - { -+ ccw_machine_rhel800_class_options(mc); -+ SET_MACHINE_COMPAT(mc, CCW_COMPAT_RHEL7_6); - } --DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", true); -+DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); - - static void ccw_machine_rhel750_instance_options(MachineState *machine) - { -@@ -937,6 +956,7 @@ static void ccw_machine_rhel750_class_options(MachineClass *mc) - { - ccw_machine_rhel760_class_options(mc); - SET_MACHINE_COMPAT(mc, CCW_COMPAT_RHEL7_5); -+ S390_MACHINE_CLASS(mc)->hpage_1m_allowed = false; - } - DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); - --- -1.8.3.1 - diff --git a/kvm-i386-Add-stibp-flag-name.patch b/kvm-i386-Add-stibp-flag-name.patch deleted file mode 100644 index b286de7..0000000 --- a/kvm-i386-Add-stibp-flag-name.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 730ab8e3a8e9a703f2b2374b8f55429dd6b2254c Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Thu, 14 Mar 2019 19:41:28 +0000 -Subject: [PATCH] i386: Add "stibp" flag name - -RH-Author: Eduardo Habkost -Message-id: <20190314194128.15795-2-ehabkost@redhat.com> -Patchwork-id: 84870 -O-Subject: [RHEL-AV-8.0.0 qemu-kvm PATCH 1/1] i386: Add "stibp" flag name -Bugzilla: 1686260 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Igor Mammedov -RH-Acked-by: Danilo de Paula - -The STIBP flag may be supported by the host KVM module, so QEMU -can allow it to be configured manually, and it can be exposed to -guests when using "-cpu host". - -No additional migration code is required because the whole -contents of spec_ctrl is already migrated in the "cpu/spec_ctrl" -section. - -Corresponding KVM patch was submitted at: -https://lore.kernel.org/lkml/20181205191956.31480-1-ehabkost@redhat.com/ - -Signed-off-by: Eduardo Habkost -Message-Id: <20181210180250.31299-1-ehabkost@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 0e8916582991b9fd0b94850a8444b8b80d0a0955) -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index d990070..c115572 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1079,7 +1079,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -- NULL, NULL, "spec-ctrl", NULL, -+ NULL, NULL, "spec-ctrl", "stibp", - NULL, "arch-capabilities", NULL, "ssbd", - }, - .cpuid = { --- -1.8.3.1 - diff --git a/kvm-i386-Make-arch_capabilities-migratable.patch b/kvm-i386-Make-arch_capabilities-migratable.patch deleted file mode 100644 index afcc947..0000000 --- a/kvm-i386-Make-arch_capabilities-migratable.patch +++ /dev/null @@ -1,42 +0,0 @@ -From f906636aa5024f6e64e2a1802b2eca448085d06a Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 10 Apr 2019 20:50:03 +0100 -Subject: [PATCH 2/2] i386: Make arch_capabilities migratable - -RH-Author: Eduardo Habkost -Message-id: <20190410205003.18916-3-ehabkost@redhat.com> -Patchwork-id: 85551 -O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 2/2] i386: Make arch_capabilities migratable -Bugzilla: 1687578 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Igor Mammedov - -Now that kvm_arch_get_supported_cpuid() will only return -arch_capabilities if QEMU is able to initialize the MSR properly, -we know that the feature is safely migratable. - -Signed-off-by: Eduardo Habkost -Message-Id: <20190125220606.4864-3-ehabkost@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 014018e19b3c54dd1bf5072bc912ceffea40abe8) -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index c115572..d92c128 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1088,7 +1088,6 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .reg = R_EDX, - }, - .tcg_features = TCG_7_0_EDX_FEATURES, -- .unmigratable_flags = CPUID_7_0_EDX_ARCH_CAPABILITIES, - }, - [FEAT_8000_0007_EDX] = { - .type = CPUID_FEATURE_WORD, --- -1.8.3.1 - diff --git a/kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch b/kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch deleted file mode 100644 index ce9be0b..0000000 --- a/kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 03f812fa6ea821f5d1c968ab6fc0fb92054f9a1b Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 10 Apr 2019 20:50:02 +0100 -Subject: [PATCH 1/2] i386: kvm: Disable arch_capabilities if MSR can't be set - -RH-Author: Eduardo Habkost -Message-id: <20190410205003.18916-2-ehabkost@redhat.com> -Patchwork-id: 85550 -O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/2] i386: kvm: Disable arch_capabilities if MSR can't be set -Bugzilla: 1687578 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Igor Mammedov - -KVM has two bugs in the handling of MSR_IA32_ARCH_CAPABILITIES: - -1) Linux commit commit 1eaafe91a0df ("kvm: x86: IA32_ARCH_CAPABILITIES - is always supported") makes GET_SUPPORTED_CPUID return - arch_capabilities even if running on SVM. This makes "-cpu - host,migratable=off" incorrectly expose arch_capabilities on CPUID on - AMD hosts (where the MSR is not emulated by KVM). - -2) KVM_GET_MSR_INDEX_LIST does not return MSR_IA32_ARCH_CAPABILITIES if - the MSR is not supported by the host CPU. This makes QEMU not - initialize the MSR properly at kvm_put_msrs() on those hosts. - -Work around both bugs on the QEMU side, by checking if the MSR -was returned by KVM_GET_MSR_INDEX_LIST before returning the -feature flag on kvm_arch_get_supported_cpuid(). - -This has the unfortunate side effect of making arch_capabilities -unavailable on hosts without hardware support for the MSR until bug #2 -is fixed on KVM, but I can't see another way to work around bug #1 -without that side effect. - -Signed-off-by: Eduardo Habkost -Message-Id: <20190125220606.4864-2-ehabkost@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 485b1d256bcb0874bcde0223727c159b6837e6f8) -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 0c9a5e4..720948a 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -389,6 +389,15 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, - if (host_tsx_blacklisted()) { - ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE); - } -+ } else if (function == 7 && index == 0 && reg == R_EDX) { -+ /* -+ * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts. -+ * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is -+ * returned by KVM_GET_MSR_INDEX_LIST. -+ */ -+ if (!has_msr_arch_capabs) { -+ ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; -+ } - } else if (function == 0x80000001 && reg == R_ECX) { - /* - * It's safe to enable TOPOEXT even if it's not returned by --- -1.8.3.1 - diff --git a/kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch b/kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch deleted file mode 100644 index 38b6d88..0000000 --- a/kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 61470c276a7785f3615da564f15a5c2368354638 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Fri, 18 Jan 2019 11:57:05 +0000 -Subject: [PATCH 2/2] i386/kvm: add a comment explaining why .feat_names are - commented out for Hyper-V feature bits - -RH-Author: Vitaly Kuznetsov -Message-id: <20190118115705.19731-3-vkuznets@redhat.com> -Patchwork-id: 84048 -O-Subject: [RHEL8 qemu-kvm PATCH 2/2] i386/kvm: add a comment explaining why .feat_names are commented out for Hyper-V feature bits -Bugzilla: 1653511 -RH-Acked-by: Mohammed Gamal -RH-Acked-by: Eduardo Otubo -RH-Acked-by: Eduardo Habkost - -Hyper-V .feat_names are, unlike hardware features, commented out and it is -not obvious why we do that. Document the current status quo. - -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20181221141604.16935-1-vkuznets@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit abd5fc4c862d033a989552914149f01c9476bb16) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 460fe06..8570b25 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -929,6 +929,13 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - */ - .no_autoenable_flags = ~0U, - }, -+ /* -+ * .feat_names are commented out for Hyper-V enlightenments because we -+ * don't want to have two different ways for enabling them on QEMU command -+ * line. Some features (e.g. "hyperv_time", "hyperv_vapic", ...) require -+ * enabling several feature bits simultaneously, exposing these bits -+ * individually may just confuse guests. -+ */ - [FEAT_HYPERV_EAX] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { --- -1.8.3.1 - diff --git a/kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch b/kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch deleted file mode 100644 index b39a55f..0000000 --- a/kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch +++ /dev/null @@ -1,211 +0,0 @@ -From 4d58784a4a507fa1070b330846d941f91bb9abdc Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Fri, 18 Jan 2019 11:57:04 +0000 -Subject: [PATCH 1/2] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and - HV_CPUID_NESTED_FEATURES.EAX as feature words - -RH-Author: Vitaly Kuznetsov -Message-id: <20190118115705.19731-2-vkuznets@redhat.com> -Patchwork-id: 84046 -O-Subject: [RHEL8 qemu-kvm PATCH 1/2] i386/kvm: expose HV_CPUID_ENLIGHTMENT_INFO.EAX and HV_CPUID_NESTED_FEATURES.EAX as feature words -Bugzilla: 1653511 -RH-Acked-by: Mohammed Gamal -RH-Acked-by: Eduardo Otubo -RH-Acked-by: Eduardo Habkost - -It was found that QMP users of QEMU (e.g. libvirt) may need -HV_CPUID_ENLIGHTMENT_INFO.EAX/HV_CPUID_NESTED_FEATURES.EAX information. In -particular, 'hv_tlbflush' and 'hv_evmcs' enlightenments are only exposed in -HV_CPUID_ENLIGHTMENT_INFO.EAX. - -HV_CPUID_NESTED_FEATURES.EAX is exposed for two reasons: convenience -(we don't need to export it from hyperv_handle_properties() and as -future-proof for Enlightened MSR-Bitmap, PV EPT invalidation and -direct virtual flush features. - -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20181126135958.20956-1-vkuznets@redhat.com> -Reviewed-by: Roman Kagan -Signed-off-by: Eduardo Habkost -(cherry picked from commit a2b107dbbd342ff2077aa5af705efaf68c375459) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 30 ++++++++++++++++++++ - target/i386/cpu.h | 2 ++ - target/i386/kvm.c | 85 +++++++++++++++++++++++++++++-------------------------- - 3 files changed, 77 insertions(+), 40 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 573de14..460fe06 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -980,6 +980,36 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - }, - .cpuid = { .eax = 0x40000003, .reg = R_EDX, }, - }, -+ [FEAT_HV_RECOMM_EAX] = { -+ .type = CPUID_FEATURE_WORD, -+ .feat_names = { -+ NULL /* hv_recommend_pv_as_switch */, -+ NULL /* hv_recommend_pv_tlbflush_local */, -+ NULL /* hv_recommend_pv_tlbflush_remote */, -+ NULL /* hv_recommend_msr_apic_access */, -+ NULL /* hv_recommend_msr_reset */, -+ NULL /* hv_recommend_relaxed_timing */, -+ NULL /* hv_recommend_dma_remapping */, -+ NULL /* hv_recommend_int_remapping */, -+ NULL /* hv_recommend_x2apic_msrs */, -+ NULL /* hv_recommend_autoeoi_deprecation */, -+ NULL /* hv_recommend_pv_ipi */, -+ NULL /* hv_recommend_ex_hypercalls */, -+ NULL /* hv_hypervisor_is_nested */, -+ NULL /* hv_recommend_int_mbec */, -+ NULL /* hv_recommend_evmcs */, -+ NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .cpuid = { .eax = 0x40000004, .reg = R_EAX, }, -+ }, -+ [FEAT_HV_NESTED_EAX] = { -+ .type = CPUID_FEATURE_WORD, -+ .cpuid = { .eax = 0x4000000A, .reg = R_EAX, }, -+ }, - [FEAT_SVM] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 9c52d0c..dd88151 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -497,6 +497,8 @@ typedef enum FeatureWord { - FEAT_HYPERV_EAX, /* CPUID[4000_0003].EAX */ - FEAT_HYPERV_EBX, /* CPUID[4000_0003].EBX */ - FEAT_HYPERV_EDX, /* CPUID[4000_0003].EDX */ -+ FEAT_HV_RECOMM_EAX, /* CPUID[4000_0004].EAX */ -+ FEAT_HV_NESTED_EAX, /* CPUID[4000_000A].EAX */ - FEAT_SVM, /* CPUID[8000_000A].EDX */ - FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ - FEAT_6_EAX, /* CPUID[6].EAX */ -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 5b0ce82..0c9a5e4 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -798,6 +798,48 @@ static int hyperv_handle_properties(CPUState *cs) - } - env->features[FEAT_HYPERV_EAX] |= HV_SYNTIMERS_AVAILABLE; - } -+ if (cpu->hyperv_relaxed_timing) { -+ env->features[FEAT_HV_RECOMM_EAX] |= HV_RELAXED_TIMING_RECOMMENDED; -+ } -+ if (cpu->hyperv_vapic) { -+ env->features[FEAT_HV_RECOMM_EAX] |= HV_APIC_ACCESS_RECOMMENDED; -+ } -+ if (cpu->hyperv_tlbflush) { -+ if (kvm_check_extension(cs->kvm_state, -+ KVM_CAP_HYPERV_TLBFLUSH) <= 0) { -+ fprintf(stderr, "Hyper-V TLB flush support " -+ "(requested by 'hv-tlbflush' cpu flag) " -+ " is not supported by kernel\n"); -+ return -ENOSYS; -+ } -+ env->features[FEAT_HV_RECOMM_EAX] |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; -+ env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; -+ } -+ if (cpu->hyperv_ipi) { -+ if (kvm_check_extension(cs->kvm_state, -+ KVM_CAP_HYPERV_SEND_IPI) <= 0) { -+ fprintf(stderr, "Hyper-V IPI send support " -+ "(requested by 'hv-ipi' cpu flag) " -+ " is not supported by kernel\n"); -+ return -ENOSYS; -+ } -+ env->features[FEAT_HV_RECOMM_EAX] |= HV_CLUSTER_IPI_RECOMMENDED; -+ env->features[FEAT_HV_RECOMM_EAX] |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; -+ } -+ if (cpu->hyperv_evmcs) { -+ uint16_t evmcs_version; -+ -+ if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, -+ (uintptr_t)&evmcs_version)) { -+ fprintf(stderr, "Hyper-V Enlightened VMCS " -+ "(requested by 'hv-evmcs' cpu flag) " -+ "is not supported by kernel\n"); -+ return -ENOSYS; -+ } -+ env->features[FEAT_HV_RECOMM_EAX] |= HV_ENLIGHTENED_VMCS_RECOMMENDED; -+ env->features[FEAT_HV_NESTED_EAX] = evmcs_version; -+ } -+ - return 0; - } - -@@ -871,7 +913,6 @@ int kvm_arch_init_vcpu(CPUState *cs) - uint32_t unused; - struct kvm_cpuid_entry2 *c; - uint32_t signature[3]; -- uint16_t evmcs_version; - int kvm_base = KVM_CPUID_SIGNATURE; - int r; - Error *local_err = NULL; -@@ -946,44 +987,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - - c = &cpuid_data.entries[cpuid_i++]; - c->function = HV_CPUID_ENLIGHTMENT_INFO; -- if (cpu->hyperv_relaxed_timing) { -- c->eax |= HV_RELAXED_TIMING_RECOMMENDED; -- } -- if (cpu->hyperv_vapic) { -- c->eax |= HV_APIC_ACCESS_RECOMMENDED; -- } -- if (cpu->hyperv_tlbflush) { -- if (kvm_check_extension(cs->kvm_state, -- KVM_CAP_HYPERV_TLBFLUSH) <= 0) { -- fprintf(stderr, "Hyper-V TLB flush support " -- "(requested by 'hv-tlbflush' cpu flag) " -- " is not supported by kernel\n"); -- return -ENOSYS; -- } -- c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; -- c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; -- } -- if (cpu->hyperv_ipi) { -- if (kvm_check_extension(cs->kvm_state, -- KVM_CAP_HYPERV_SEND_IPI) <= 0) { -- fprintf(stderr, "Hyper-V IPI send support " -- "(requested by 'hv-ipi' cpu flag) " -- " is not supported by kernel\n"); -- return -ENOSYS; -- } -- c->eax |= HV_CLUSTER_IPI_RECOMMENDED; -- c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; -- } -- if (cpu->hyperv_evmcs) { -- if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, -- (uintptr_t)&evmcs_version)) { -- fprintf(stderr, "Hyper-V Enlightened VMCS " -- "(requested by 'hv-evmcs' cpu flag) " -- "is not supported by kernel\n"); -- return -ENOSYS; -- } -- c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; -- } -+ -+ c->eax = env->features[FEAT_HV_RECOMM_EAX]; - c->ebx = cpu->hyperv_spinlock_attempts; - - c = &cpuid_data.entries[cpuid_i++]; -@@ -1007,7 +1012,7 @@ int kvm_arch_init_vcpu(CPUState *cs) - - c = &cpuid_data.entries[cpuid_i++]; - c->function = HV_CPUID_NESTED_FEATURES; -- c->eax = evmcs_version; -+ c->eax = env->features[FEAT_HV_NESTED_EAX]; - } - } - --- -1.8.3.1 - diff --git a/kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch b/kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch deleted file mode 100644 index 2370ef4..0000000 --- a/kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch +++ /dev/null @@ -1,64 +0,0 @@ -From adf78309059e3346dddac518601f88f348ec7758 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Sat, 16 Feb 2019 00:00:49 +0000 -Subject: [PATCH 3/4] i386: remove the 'INTEL_PT' CPUID bit from named CPU - models - -RH-Author: Paolo Bonzini -Message-id: <1550275250-41719-3-git-send-email-pbonzini@redhat.com> -Patchwork-id: 84522 -O-Subject: [rhel-av-8.0.0 qemu-kvm PATCH 2/3] i386: remove the 'INTEL_PT' CPUID bit from named CPU models -Bugzilla: 1661515 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Igor Mammedov -RH-Acked-by: Stefano Garzarella - -Processor tracing is not yet implemented for KVM and it will be an -opt in feature requiring a special module parameter. -Disable it, because it is wrong to enable it by default and -it is impossible that no one has ever used it. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4c257911dcc7c4189768e9651755c849ce9db4e8) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 8 +++----- - 1 file changed, 3 insertions(+), 5 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 7b63900..169a2ce 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2555,8 +2555,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | - CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | - CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | -- CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | -- CPUID_7_0_EBX_INTEL_PT, -+ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, - .features[FEAT_7_0_ECX] = - CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE | - CPUID_7_0_ECX_AVX512VNNI, -@@ -2608,7 +2607,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_INTEL_PT, -+ CPUID_7_0_EBX_SMAP, - .features[FEAT_7_0_ECX] = - CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | - CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | -@@ -2666,8 +2665,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | - CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | - CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | -- CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | -- CPUID_7_0_EBX_INTEL_PT, -+ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, - .features[FEAT_7_0_ECX] = - CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | - CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | --- -1.8.3.1 - diff --git a/kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch b/kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch deleted file mode 100644 index dc9dee0..0000000 --- a/kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 9fc28ea52c88d603e85fa806a708b53b373f511e Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Sat, 16 Feb 2019 00:00:48 +0000 -Subject: [PATCH 2/4] i386: remove the new CPUID 'PCONFIG' from Icelake-Server - CPU model - -RH-Author: Paolo Bonzini -Message-id: <1550275250-41719-2-git-send-email-pbonzini@redhat.com> -Patchwork-id: 84526 -O-Subject: [rhel-av-8.0.0 qemu-kvm PATCH 1/3] i386: remove the new CPUID 'PCONFIG' from Icelake-Server CPU model -Bugzilla: 1661515 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Igor Mammedov -RH-Acked-by: Stefano Garzarella - -From: Robert Hoo - -PCONFIG is not available to guests; it must be specifically enabled -using the PCONFIG_ENABLE execution control. Disable it, because -no one can ever use it. - -Signed-off-by: Robert Hoo -Message-Id: <1545227081-213696-2-git-send-email-robert.hu@linux.intel.com> -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -(cherry picked from commit 76e5a4d58357b9d077afccf7f7c82e17f733b722) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index dbcf632..7b63900 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2675,8 +2675,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | - CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57, - .features[FEAT_7_0_EDX] = -- CPUID_7_0_EDX_PCONFIG | CPUID_7_0_EDX_SPEC_CTRL | -- CPUID_7_0_EDX_SPEC_CTRL_SSBD, -+ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component, --- -1.8.3.1 - diff --git a/kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch b/kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch deleted file mode 100644 index 860e1a4..0000000 --- a/kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch +++ /dev/null @@ -1,294 +0,0 @@ -From dea7d39cce3b1da16de0bfb47a028f770547098a Mon Sep 17 00:00:00 2001 -From: "Daniel P. Berrange" -Date: Tue, 29 Jan 2019 13:58:57 +0000 -Subject: [PATCH 1/3] io: ensure UNIX client doesn't unlink server socket -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrange -Message-id: <20190129135857.10581-2-berrange@redhat.com> -Patchwork-id: 84141 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] io: ensure UNIX client doesn't unlink server socket -Bugzilla: 1665896 -RH-Acked-by: John Snow -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -The qio_channel_socket_close method for was mistakenly unlinking the -UNIX server socket, even if the channel was a client connection. This -was not noticed with chardevs, since they never call close, but with the -VNC server, this caused the VNC server socket to be deleted after the -first client quit. - -The qio_channel_socket_close method also needlessly reimplemented the -logic that already exists in socket_listen_cleanup(). Just call that -method directly, for listen sockets only. - -This fixes a regression introduced in QEMU 3.0.0 with - - commit d66f78e1eaa832f73c771d9df1b606fe75d52a50 - Author: Pavel Balaev - Date: Mon May 21 19:17:35 2018 +0300 - - Delete AF_UNIX socket after close - -Fixes launchpad #1795100 - -Reviewed-by: Eric Blake -Signed-off-by: Daniel P. Berrangé -(cherry picked from commit 73564c407caedf992a1c688b5fea776a8b56ba2a) -Signed-off-by: Danilo C. L. de Paula ---- - io/channel-socket.c | 19 ++-------- - tests/test-io-channel-socket.c | 86 +++++++++++++++++++++++++++++++++++++----- - 2 files changed, 80 insertions(+), 25 deletions(-) - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index b50e63a..bc5f80e 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -688,10 +688,13 @@ qio_channel_socket_close(QIOChannel *ioc, - int rc = 0; - - if (sioc->fd != -1) { -- SocketAddress *addr = socket_local_address(sioc->fd, errp); - #ifdef WIN32 - WSAEventSelect(sioc->fd, NULL, 0); - #endif -+ if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_LISTEN)) { -+ socket_listen_cleanup(sioc->fd, errp); -+ } -+ - if (closesocket(sioc->fd) < 0) { - sioc->fd = -1; - error_setg_errno(errp, errno, -@@ -699,20 +702,6 @@ qio_channel_socket_close(QIOChannel *ioc, - return -1; - } - sioc->fd = -1; -- -- if (addr && addr->type == SOCKET_ADDRESS_TYPE_UNIX -- && addr->u.q_unix.path) { -- if (unlink(addr->u.q_unix.path) < 0 && errno != ENOENT) { -- error_setg_errno(errp, errno, -- "Failed to unlink socket %s", -- addr->u.q_unix.path); -- rc = -1; -- } -- } -- -- if (addr) { -- qapi_free_SocketAddress(addr); -- } - } - return rc; - } -diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c -index 0597213..c253ae3 100644 ---- a/tests/test-io-channel-socket.c -+++ b/tests/test-io-channel-socket.c -@@ -49,6 +49,7 @@ static void test_io_channel_set_socket_bufs(QIOChannel *src, - - static void test_io_channel_setup_sync(SocketAddress *listen_addr, - SocketAddress *connect_addr, -+ QIOChannel **srv, - QIOChannel **src, - QIOChannel **dst) - { -@@ -78,7 +79,7 @@ static void test_io_channel_setup_sync(SocketAddress *listen_addr, - - test_io_channel_set_socket_bufs(*src, *dst); - -- object_unref(OBJECT(lioc)); -+ *srv = QIO_CHANNEL(lioc); - } - - -@@ -99,6 +100,7 @@ static void test_io_channel_complete(QIOTask *task, - - static void test_io_channel_setup_async(SocketAddress *listen_addr, - SocketAddress *connect_addr, -+ QIOChannel **srv, - QIOChannel **src, - QIOChannel **dst) - { -@@ -146,21 +148,34 @@ static void test_io_channel_setup_async(SocketAddress *listen_addr, - qio_channel_set_delay(*src, false); - test_io_channel_set_socket_bufs(*src, *dst); - -- object_unref(OBJECT(lioc)); -+ *srv = QIO_CHANNEL(lioc); - - g_main_loop_unref(data.loop); - } - - -+static void test_io_channel_socket_path_exists(SocketAddress *addr, -+ bool expectExists) -+{ -+ if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) { -+ return; -+ } -+ -+ g_assert(g_file_test(addr->u.q_unix.path, -+ G_FILE_TEST_EXISTS) == expectExists); -+} -+ -+ - static void test_io_channel(bool async, - SocketAddress *listen_addr, - SocketAddress *connect_addr, - bool passFD) - { -- QIOChannel *src, *dst; -+ QIOChannel *src, *dst, *srv; - QIOChannelTest *test; - if (async) { -- test_io_channel_setup_async(listen_addr, connect_addr, &src, &dst); -+ test_io_channel_setup_async(listen_addr, connect_addr, -+ &srv, &src, &dst); - - g_assert(!passFD || - qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); -@@ -169,14 +184,25 @@ static void test_io_channel(bool async, - g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN)); - g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN)); - -+ test_io_channel_socket_path_exists(listen_addr, true); -+ - test = qio_channel_test_new(); - qio_channel_test_run_threads(test, true, src, dst); - qio_channel_test_validate(test); - -+ test_io_channel_socket_path_exists(listen_addr, true); -+ -+ /* unref without close, to ensure finalize() cleans up */ -+ - object_unref(OBJECT(src)); - object_unref(OBJECT(dst)); -+ test_io_channel_socket_path_exists(listen_addr, true); - -- test_io_channel_setup_async(listen_addr, connect_addr, &src, &dst); -+ object_unref(OBJECT(srv)); -+ test_io_channel_socket_path_exists(listen_addr, false); -+ -+ test_io_channel_setup_async(listen_addr, connect_addr, -+ &srv, &src, &dst); - - g_assert(!passFD || - qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); -@@ -189,10 +215,24 @@ static void test_io_channel(bool async, - qio_channel_test_run_threads(test, false, src, dst); - qio_channel_test_validate(test); - -+ /* close before unref, to ensure finalize copes with already closed */ -+ -+ qio_channel_close(src, &error_abort); -+ qio_channel_close(dst, &error_abort); -+ test_io_channel_socket_path_exists(listen_addr, true); -+ - object_unref(OBJECT(src)); - object_unref(OBJECT(dst)); -+ test_io_channel_socket_path_exists(listen_addr, true); -+ -+ qio_channel_close(srv, &error_abort); -+ test_io_channel_socket_path_exists(listen_addr, false); -+ -+ object_unref(OBJECT(srv)); -+ test_io_channel_socket_path_exists(listen_addr, false); - } else { -- test_io_channel_setup_sync(listen_addr, connect_addr, &src, &dst); -+ test_io_channel_setup_sync(listen_addr, connect_addr, -+ &srv, &src, &dst); - - g_assert(!passFD || - qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); -@@ -201,14 +241,25 @@ static void test_io_channel(bool async, - g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN)); - g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN)); - -+ test_io_channel_socket_path_exists(listen_addr, true); -+ - test = qio_channel_test_new(); - qio_channel_test_run_threads(test, true, src, dst); - qio_channel_test_validate(test); - -+ test_io_channel_socket_path_exists(listen_addr, true); -+ -+ /* unref without close, to ensure finalize() cleans up */ -+ - object_unref(OBJECT(src)); - object_unref(OBJECT(dst)); -+ test_io_channel_socket_path_exists(listen_addr, true); -+ -+ object_unref(OBJECT(srv)); -+ test_io_channel_socket_path_exists(listen_addr, false); - -- test_io_channel_setup_sync(listen_addr, connect_addr, &src, &dst); -+ test_io_channel_setup_sync(listen_addr, connect_addr, -+ &srv, &src, &dst); - - g_assert(!passFD || - qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); -@@ -221,8 +272,23 @@ static void test_io_channel(bool async, - qio_channel_test_run_threads(test, false, src, dst); - qio_channel_test_validate(test); - -+ test_io_channel_socket_path_exists(listen_addr, true); -+ -+ /* close before unref, to ensure finalize copes with already closed */ -+ -+ qio_channel_close(src, &error_abort); -+ qio_channel_close(dst, &error_abort); -+ test_io_channel_socket_path_exists(listen_addr, true); -+ - object_unref(OBJECT(src)); - object_unref(OBJECT(dst)); -+ test_io_channel_socket_path_exists(listen_addr, true); -+ -+ qio_channel_close(srv, &error_abort); -+ test_io_channel_socket_path_exists(listen_addr, false); -+ -+ object_unref(OBJECT(srv)); -+ test_io_channel_socket_path_exists(listen_addr, false); - } - } - -@@ -316,7 +382,6 @@ static void test_io_channel_unix(bool async) - - qapi_free_SocketAddress(listen_addr); - qapi_free_SocketAddress(connect_addr); -- g_assert(g_file_test(TEST_SOCKET, G_FILE_TEST_EXISTS) == FALSE); - } - - -@@ -335,7 +400,7 @@ static void test_io_channel_unix_fd_pass(void) - { - SocketAddress *listen_addr = g_new0(SocketAddress, 1); - SocketAddress *connect_addr = g_new0(SocketAddress, 1); -- QIOChannel *src, *dst; -+ QIOChannel *src, *dst, *srv; - int testfd; - int fdsend[3]; - int *fdrecv = NULL; -@@ -359,7 +424,7 @@ static void test_io_channel_unix_fd_pass(void) - connect_addr->type = SOCKET_ADDRESS_TYPE_UNIX; - connect_addr->u.q_unix.path = g_strdup(TEST_SOCKET); - -- test_io_channel_setup_sync(listen_addr, connect_addr, &src, &dst); -+ test_io_channel_setup_sync(listen_addr, connect_addr, &srv, &src, &dst); - - memcpy(bufsend, "Hello World", G_N_ELEMENTS(bufsend)); - -@@ -412,6 +477,7 @@ static void test_io_channel_unix_fd_pass(void) - - object_unref(OBJECT(src)); - object_unref(OBJECT(dst)); -+ object_unref(OBJECT(srv)); - qapi_free_SocketAddress(listen_addr); - qapi_free_SocketAddress(connect_addr); - unlink(TEST_SOCKET); --- -1.8.3.1 - diff --git a/kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch b/kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch deleted file mode 100644 index 9766ea7..0000000 --- a/kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 887d5afffeff844b1284b380e53f178f68e15087 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 3 Apr 2019 10:17:09 +0100 -Subject: [PATCH 8/9] iotests: Fix test 200 on s390x without virtio-pci - -RH-Author: Thomas Huth -Message-id: <20190403101709.3284-2-thuth@redhat.com> -Patchwork-id: 85312 -O-Subject: [RHEL-AV-8.0.1 qemu-kvm PATCH 1/1] iotests: Fix test 200 on s390x without virtio-pci -Bugzilla: 1687582 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -virtio-pci is optional on s390x, e.g. in downstream RHEL builds, it -is disabled. On s390x, virtio-ccw should be used instead. Other tests -like 051 or 240 already use virtio-scsi-ccw instead of virtio-scsi-pci -on s390x, so let's do the same here and always use virtio-scsi-ccw on -s390x. - -Signed-off-by: Thomas Huth -Reviewed-by: John Snow -Signed-off-by: Kevin Wolf -(cherry picked from commit e0a59749efc246646bb208e553489b894450cbcd) -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/200 | 14 +++++++++++--- - 1 file changed, 11 insertions(+), 3 deletions(-) - -diff --git a/tests/qemu-iotests/200 b/tests/qemu-iotests/200 -index b9ebd5a..8301ff5 100755 ---- a/tests/qemu-iotests/200 -+++ b/tests/qemu-iotests/200 -@@ -52,13 +52,21 @@ ${QEMU_IMG} create -f $IMGFMT -F $IMGFMT "${TEST_IMG}" -b "${BACKING_IMG}" 512M - - ${QEMU_IO} -c "write -P 0xa5 512 300M" "${BACKING_IMG}" | _filter_qemu_io - -+case "$QEMU_DEFAULT_MACHINE" in -+ s390-ccw-virtio) -+ virtio_scsi="-device virtio-scsi-ccw,id=scsi0,iothread=iothread0" -+ ;; -+ *) -+ virtio_scsi="-device pci-bridge,id=bridge1,chassis_nr=1,bus=pci.0 -+ -device virtio-scsi-pci,bus=bridge1,addr=0x1f,id=scsi0,iothread=iothread0" -+ ;; -+esac -+ - echo - echo === Starting QEMU VM === - echo - qemu_comm_method="qmp" --_launch_qemu -device pci-bridge,id=bridge1,chassis_nr=1,bus=pci.0 \ -- -object iothread,id=iothread0 \ -- -device virtio-scsi-pci,bus=bridge1,addr=0x1f,id=scsi0,iothread=iothread0 \ -+_launch_qemu -object iothread,id=iothread0 $virtio_scsi \ - -drive file="${TEST_IMG}",media=disk,if=none,cache=$CACHEMODE,id=drive_sysdisk,format=$IMGFMT \ - -device scsi-hd,drive=drive_sysdisk,bus=scsi0.0,id=sysdisk,bootindex=0 - h1=$QEMU_HANDLE --- -1.8.3.1 - diff --git a/kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch b/kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch deleted file mode 100644 index 6da2540..0000000 --- a/kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 91ae068923b70fc62c8504f7c77e42829b4c2e18 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 29 Jan 2019 07:02:50 +0000 -Subject: [PATCH 2/2] iotests: add 238 for throttling tgm unregister iothread - segfault - -RH-Author: Stefan Hajnoczi -Message-id: <20190129070250.22709-3-stefanha@redhat.com> -Patchwork-id: 84138 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 2/2] iotests: add 238 for throttling tgm unregister iothread segfault -Bugzilla: 1655947 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier - -Hot-unplug a scsi-hd using an iothread. The previous patch fixes a -segfault in this scenario. - -This patch adds a regression test. - -Suggested-by: Alberto Garcia -Suggested-by: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Alberto Garcia -Message-id: 20190114133257.30299-3-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 202277f43d544779b7a63123a51c54c3a16b74ad) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - tests/qemu-iotests/group - - Context conflict because downstream doesn't have 236. This patch adds - 238 and doesn't depend on 236. - -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/238 | 47 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/238.out | 6 ++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 54 insertions(+) - create mode 100755 tests/qemu-iotests/238 - create mode 100644 tests/qemu-iotests/238.out - -diff --git a/tests/qemu-iotests/238 b/tests/qemu-iotests/238 -new file mode 100755 -index 0000000..f81ee11 ---- /dev/null -+++ b/tests/qemu-iotests/238 -@@ -0,0 +1,47 @@ -+#!/usr/bin/env python -+# -+# Regression test for throttle group member unregister segfault with iothread -+# -+# Copyright (c) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+import sys -+import os -+import iotests -+from iotests import log -+ -+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts')) -+ -+from qemu import QEMUMachine -+ -+if iotests.qemu_default_machine == 's390-ccw-virtio': -+ virtio_scsi_device = 'virtio-scsi-ccw' -+else: -+ virtio_scsi_device = 'virtio-scsi-pci' -+ -+vm = QEMUMachine(iotests.qemu_prog) -+vm.add_args('-machine', 'accel=kvm') -+vm.launch() -+ -+log(vm.qmp('blockdev-add', node_name='hd0', driver='null-co')) -+log(vm.qmp('object-add', qom_type='iothread', id='iothread0')) -+log(vm.qmp('device_add', id='scsi0', driver=virtio_scsi_device, iothread='iothread0')) -+log(vm.qmp('device_add', id='scsi-hd0', driver='scsi-hd', drive='hd0')) -+log(vm.qmp('block_set_io_throttle', id='scsi-hd0', bps=0, bps_rd=0, bps_wr=0, -+ iops=1000, iops_rd=0, iops_wr=0, conv_keys=False)) -+log(vm.qmp('device_del', id='scsi-hd0')) -+ -+vm.shutdown() -diff --git a/tests/qemu-iotests/238.out b/tests/qemu-iotests/238.out -new file mode 100644 -index 0000000..4de840b ---- /dev/null -+++ b/tests/qemu-iotests/238.out -@@ -0,0 +1,6 @@ -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -+{"return": {}} -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 05996ae..268fefa 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -233,3 +233,4 @@ - 233 auto quick - 234 auto quick migration - 235 auto quick -+238 auto quick --- -1.8.3.1 - diff --git a/kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch b/kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch deleted file mode 100644 index 074fc32..0000000 --- a/kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch +++ /dev/null @@ -1,113 +0,0 @@ -From dcce446158c042fd0aa54a6ebcc61c00f8d4759e Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Mar 2019 08:54:27 +0000 -Subject: [PATCH 4/9] iotests: add LUKS payload overhead to 178 qemu-img - measure test -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -Message-id: <20190304085427.8148-3-stefanha@redhat.com> -Patchwork-id: 84777 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 2/2] iotests: add LUKS payload overhead to 178 qemu-img measure test -Bugzilla: 1655065 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf - -The previous patch includes the LUKS payload overhead into the qemu-img -measure calculation for qcow2. Update qemu-iotests 178 to exercise this -new code path. - -Reviewed-by: Max Reitz -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Stefan Hajnoczi -Message-id: 20190218104525.23674-3-stefanha@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 0482098608b83b559bc1802e4c612051b51f6c4c) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/178 | 8 ++++++++ - tests/qemu-iotests/178.out.qcow2 | 24 ++++++++++++++++++++++++ - 2 files changed, 32 insertions(+) - -diff --git a/tests/qemu-iotests/178 b/tests/qemu-iotests/178 -index 3f4b4a4..927bf06 100755 ---- a/tests/qemu-iotests/178 -+++ b/tests/qemu-iotests/178 -@@ -142,6 +142,14 @@ for ofmt in human json; do - # The backing file doesn't need to exist :) - $QEMU_IMG measure --output=$ofmt -o backing_file=x \ - -f "$fmt" -O "$IMGFMT" "$TEST_IMG" -+ -+ echo -+ echo "== $fmt input image and LUKS encryption ==" -+ echo -+ $QEMU_IMG measure --output=$ofmt \ -+ --object secret,id=sec0,data=base \ -+ -o encrypt.format=luks,encrypt.key-secret=sec0,encrypt.iter-time=10 \ -+ -f "$fmt" -O "$IMGFMT" "$TEST_IMG" - fi - - echo -diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 -index d42d4a4..55a8dc9 100644 ---- a/tests/qemu-iotests/178.out.qcow2 -+++ b/tests/qemu-iotests/178.out.qcow2 -@@ -68,6 +68,11 @@ converted image file size in bytes: 458752 - required size: 1074135040 - fully allocated size: 1074135040 - -+== qcow2 input image and LUKS encryption == -+ -+required size: 2686976 -+fully allocated size: 1076232192 -+ - == qcow2 input image and preallocation (human) == - - required size: 1074135040 -@@ -114,6 +119,11 @@ converted image file size in bytes: 524288 - required size: 1074135040 - fully allocated size: 1074135040 - -+== raw input image and LUKS encryption == -+ -+required size: 2686976 -+fully allocated size: 1076232192 -+ - == raw input image and preallocation (human) == - - required size: 1074135040 -@@ -205,6 +215,13 @@ converted image file size in bytes: 458752 - "fully-allocated": 1074135040 - } - -+== qcow2 input image and LUKS encryption == -+ -+{ -+ "required": 2686976, -+ "fully-allocated": 1076232192 -+} -+ - == qcow2 input image and preallocation (json) == - - { -@@ -263,6 +280,13 @@ converted image file size in bytes: 524288 - "fully-allocated": 1074135040 - } - -+== raw input image and LUKS encryption == -+ -+{ -+ "required": 2686976, -+ "fully-allocated": 1076232192 -+} -+ - == raw input image and preallocation (json) == - - { --- -1.8.3.1 - diff --git a/kvm-json-Fix-handling-when-not-interpolating.patch b/kvm-json-Fix-handling-when-not-interpolating.patch deleted file mode 100644 index e589371..0000000 --- a/kvm-json-Fix-handling-when-not-interpolating.patch +++ /dev/null @@ -1,122 +0,0 @@ -From ee704181e5f2dd1ebc6a2de0f9e750a11541cd47 Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Thu, 31 Jan 2019 14:28:01 +0000 -Subject: [PATCH 2/2] json: Fix % handling when not interpolating - -RH-Author: Markus Armbruster -Message-id: <20190131142801.15268-2-armbru@redhat.com> -Patchwork-id: 84158 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] json: Fix % handling when not interpolating -Bugzilla: 1668244 -RH-Acked-by: Richard Jones -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Danilo de Paula - -From: Christophe Fergeau - -Commit 8bca4613 added support for %% in json strings when interpolating, -but in doing so broke handling of % when not interpolating. - -When parse_string() is fed a string token containing '%', it skips the -'%' regardless of ctxt->ap, i.e. even it's not interpolating. If the -'%' is the string's last character, it fails an assertion. Else, it -"merely" swallows the '%'. - -Fix parse_string() to handle '%' specially only when interpolating. - -To gauge the bug's impact, let's review non-interpolating users of this -parser, i.e. code passing NULL context to json_message_parser_init(): - -* tests/check-qjson.c, tests/test-qobject-input-visitor.c, - tests/test-visitor-serialization.c - - Plenty of tests, but we still failed to cover the buggy case. - -* monitor.c: QMP input - -* qga/main.c: QGA input - -* qobject_from_json(): - - - qobject-input-visitor.c: JSON command line option arguments of - -display and -blockdev - - Reproducer: -blockdev '{"%"}' - - - block.c: JSON pseudo-filenames starting with "json:" - - Reproducer: https://bugzilla.redhat.com/show_bug.cgi?id=1668244#c3 - - - block/rbd.c: JSON key pairs - - Pseudo-filenames starting with "rbd:". - -Command line, QMP and QGA input are trusted. - -Filenames are trusted when they come from command line, QMP or HMP. -They are untrusted when they come from from image file headers. -Example: QCOW2 backing file name. Note that this is *not* the security -boundary between host and guest. It's the boundary between host and an -image file from an untrusted source. - -Neither failing an assertion nor skipping a character in a filename of -your choice looks exploitable. Note that we don't support compiling -with NDEBUG. - -Fixes: 8bca4613e6cddd948895b8db3def05950463495b -Cc: qemu-stable@nongnu.org -Signed-off-by: Christophe Fergeau -Message-Id: <20190102140535.11512-1-cfergeau@redhat.com> -Reviewed-by: Eric Blake -Tested-by: Richard W.M. Jones -[Commit message extended to discuss impact] -Signed-off-by: Markus Armbruster -(cherry picked from commit bbc0586ced6e9ffdfd29d89fcc917b3d90ac3938) - -Signed-off-by: Danilo C. L. de Paula ---- - qobject/json-parser.c | 10 ++++++---- - tests/check-qjson.c | 5 +++++ - 2 files changed, 11 insertions(+), 4 deletions(-) - -diff --git a/qobject/json-parser.c b/qobject/json-parser.c -index 5a840df..53e91cb 100644 ---- a/qobject/json-parser.c -+++ b/qobject/json-parser.c -@@ -208,11 +208,13 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token) - } - break; - case '%': -- if (ctxt->ap && ptr[1] != '%') { -- parse_error(ctxt, token, "can't interpolate into string"); -- goto out; -+ if (ctxt->ap) { -+ if (ptr[1] != '%') { -+ parse_error(ctxt, token, "can't interpolate into string"); -+ goto out; -+ } -+ ptr++; - } -- ptr++; - /* fall through */ - default: - cp = mod_utf8_codepoint(ptr, 6, &end); -diff --git a/tests/check-qjson.c b/tests/check-qjson.c -index d876a7a..fa2afcc 100644 ---- a/tests/check-qjson.c -+++ b/tests/check-qjson.c -@@ -176,6 +176,11 @@ static void utf8_string(void) - "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5", - "\\u03BA\\u1F79\\u03C3\\u03BC\\u03B5", - }, -+ /* '%' character when not interpolating */ -+ { -+ "100%", -+ "100%", -+ }, - /* 2 Boundary condition test cases */ - /* 2.1 First possible sequence of a certain length */ - /* --- -1.8.3.1 - diff --git a/kvm-migration-Fix-cancel-state.patch b/kvm-migration-Fix-cancel-state.patch deleted file mode 100644 index 618acba..0000000 --- a/kvm-migration-Fix-cancel-state.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 31566a415b69d58fdf09f05e362685fcc3aee00b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 7 Mar 2019 12:26:21 +0000 -Subject: [PATCH 1/2] migration: Fix cancel state - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190307122622.9387-2-dgilbert@redhat.com> -Patchwork-id: 84820 -O-Subject: [RHEL-8.0 qemu-kvm AV PATCH 1/2] migration: Fix cancel state -Bugzilla: 1608649 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Pankaj Gupta - -From: "Dr. David Alan Gilbert" - -During a cancelled migration there's a race where the fd can -go into an error state before we get back around the migration loop -and migration_detect_error transitions from cancelling->failed. - -Check for cancelled/cancelling and don't change the state. - -Red Hat bug: https://bugzilla.redhat.com/show_bug.cgi?id=1608649 - -Fixes: b23c2ade250 -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20190219195928.12289-1-dgilbert@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -(cherry picked from commit c3c5eae6ac69d9f7d4618407aa6c8ac5edc2267c) -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 0d9cb7a..ecdf01d 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2824,6 +2824,13 @@ static MigThrError postcopy_pause(MigrationState *s) - static MigThrError migration_detect_error(MigrationState *s) - { - int ret; -+ int state = s->state; -+ -+ if (state == MIGRATION_STATUS_CANCELLING || -+ state == MIGRATION_STATUS_CANCELLED) { -+ /* End the migration, but don't set the state to failed */ -+ return MIG_THR_ERR_FATAL; -+ } - - /* Try to detect any file errors */ - ret = qemu_file_get_error(s->to_dst_file); -@@ -2833,7 +2840,7 @@ static MigThrError migration_detect_error(MigrationState *s) - return MIG_THR_ERR_NONE; - } - -- if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { -+ if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) { - /* - * For postcopy, we allow the network to be down for a - * while. After that, it can be continued by a -@@ -2845,7 +2852,7 @@ static MigThrError migration_detect_error(MigrationState *s) - * For precopy (or postcopy with error outside IO), we fail - * with no time. - */ -- migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); -+ migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED); - trace_migration_thread_file_err(); - - /* Time to stop the migration, now. */ --- -1.8.3.1 - diff --git a/kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch b/kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch deleted file mode 100644 index 8f99d0e..0000000 --- a/kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch +++ /dev/null @@ -1,54 +0,0 @@ -From aa5367b5e81cdc3cc3dc8dbc31daef6acecc1f97 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 7 Mar 2019 12:26:22 +0000 -Subject: [PATCH 2/2] migration/rdma: Fix qemu_rdma_cleanup null check -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190307122622.9387-3-dgilbert@redhat.com> -Patchwork-id: 84819 -O-Subject: [RHEL-8.0 qemu-kvm AV PATCH 2/2] migration/rdma: Fix qemu_rdma_cleanup null check -Bugzilla: 1608649 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Pankaj Gupta - -From: "Dr. David Alan Gilbert" - -If the migration fails before the channel is open (e.g. a bad -address) we end up in the cleanup with rdma->channel==NULL. - -Spotted by Coverity: CID 1398634 -Fixes: fbbaacab2758cb3f32a0 -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20190214185351.5927-1-dgilbert@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Peter Xu -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit cf75e2684938413f0bbe95f5a4b7db5c845e42c8) - -Signed-off-by: Danilo C. L. de Paula ---- - migration/rdma.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/migration/rdma.c b/migration/rdma.c -index 54a3c11..9fa3b17 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -2321,7 +2321,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) - rdma->connected = false; - } - -- qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); -+ if (rdma->channel) { -+ qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); -+ } - g_free(rdma->dest_blocks); - rdma->dest_blocks = NULL; - --- -1.8.3.1 - diff --git a/kvm-migration-rdma-unregister-fd-handler.patch b/kvm-migration-rdma-unregister-fd-handler.patch deleted file mode 100644 index b6281a3..0000000 --- a/kvm-migration-rdma-unregister-fd-handler.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 93cfdba0a95999ba3d44afe5c15cc3810446f11b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 24 Jan 2019 16:41:10 +0000 -Subject: [PATCH 1/3] migration/rdma: unregister fd handler -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190124164110.18787-2-dgilbert@redhat.com> -Patchwork-id: 84110 -O-Subject: [RHEL-8.0 qemu-kvm PATCH 1/1] migration/rdma: unregister fd handler -Bugzilla: 1666601 -RH-Acked-by: Peter Xu -RH-Acked-by: Pankaj Gupta -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Dr. David Alan Gilbert" - -Unregister the fd handler before we destroy the channel, -otherwise we've got a race where we might land in the -fd handler just as we're closing the device. - -(The race is quite data dependent, you just have to have -the right set of devices for it to trigger). - -Corresponds to RH bz: https://bugzilla.redhat.com/show_bug.cgi?id=1666601 - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20190122173111.29821-1-dgilbert@redhat.com> -Reviewed-by: Peter Xu -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fbbaacab2758cb3f32a07524710533b1d6422be4) -Signed-off-by: Danilo C. L. de Paula ---- - migration/rdma.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/migration/rdma.c b/migration/rdma.c -index 9b2e7e1..54a3c11 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -2321,6 +2321,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) - rdma->connected = false; - } - -+ qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL); - g_free(rdma->dest_blocks); - rdma->dest_blocks = NULL; - --- -1.8.3.1 - diff --git a/kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch b/kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch deleted file mode 100644 index 7bf4e7f..0000000 --- a/kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 1a283b8cdd349b9085488a516f26f453c8591ce2 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 5 Feb 2019 04:47:57 +0000 -Subject: [PATCH 3/8] mmap-alloc: fix hugetlbfs misaligned length in ppc64 - -RH-Author: David Gibson -Message-id: <20190205044757.13591-3-dgibson@redhat.com> -Patchwork-id: 84233 -O-Subject: [RHELAV-8.0 qemu-kvm PATCH 2/2] mmap-alloc: fix hugetlbfs misaligned length in ppc64 -Bugzilla: 1671519 -RH-Acked-by: Pankaj Gupta -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Murilo Opsfelder Araujo - -The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc: -fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64. - -However, we still need to consider the underlying huge page size -during munmap() because it requires that both address and length be a -multiple of the underlying huge page size for Huge TLB mappings. -Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES -section of the munmap(2) manual: - - "For munmap(), addr and length must both be a multiple of the - underlying huge page size." - -On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB -mappings because the mapped segment can be aligned with the underlying -huge page size, not aligned with the native system page size, as -returned by getpagesize(). - -This has the side effect of not releasing huge pages back to the pool -after a hugetlbfs file-backed memory device is hot-unplugged. - -This patch fixes the situation in qemu_ram_mmap() and -qemu_ram_munmap() by considering the underlying page size on ppc64. - -After this patch, memory hot-unplug releases huge pages back to the -pool. - -Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218 -Signed-off-by: Murilo Opsfelder Araujo -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -(cherry picked from commit 53adb9d43e1abba187387a51f238e878e934c647) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1671519 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - exec.c | 4 ++-- - include/qemu/mmap-alloc.h | 2 +- - util/mmap-alloc.c | 22 ++++++++++++++++------ - util/oslib-posix.c | 2 +- - 4 files changed, 20 insertions(+), 10 deletions(-) - -diff --git a/exec.c b/exec.c -index bb6170d..38eaf0f 100644 ---- a/exec.c -+++ b/exec.c -@@ -1870,7 +1870,7 @@ static void *file_ram_alloc(RAMBlock *block, - if (mem_prealloc) { - os_mem_prealloc(fd, area, memory, smp_cpus, errp); - if (errp && *errp) { -- qemu_ram_munmap(area, memory); -+ qemu_ram_munmap(fd, area, memory); - return NULL; - } - } -@@ -2391,7 +2391,7 @@ static void reclaim_ramblock(RAMBlock *block) - xen_invalidate_map_cache_entry(block->host); - #ifndef _WIN32 - } else if (block->fd >= 0) { -- qemu_ram_munmap(block->host, block->max_length); -+ qemu_ram_munmap(block->fd, block->host, block->max_length); - close(block->fd); - #endif - } else { -diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h -index 50385e3..ef04f0e 100644 ---- a/include/qemu/mmap-alloc.h -+++ b/include/qemu/mmap-alloc.h -@@ -9,6 +9,6 @@ size_t qemu_mempath_getpagesize(const char *mem_path); - - void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); - --void qemu_ram_munmap(void *ptr, size_t size); -+void qemu_ram_munmap(int fd, void *ptr, size_t size); - - #endif -diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c -index f71ea03..8565885 100644 ---- a/util/mmap-alloc.c -+++ b/util/mmap-alloc.c -@@ -80,6 +80,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) - int flags; - int guardfd; - size_t offset; -+ size_t pagesize; - size_t total; - void *guardptr; - void *ptr; -@@ -100,7 +101,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) - * anonymous memory is OK. - */ - flags = MAP_PRIVATE; -- if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) { -+ pagesize = qemu_fd_getpagesize(fd); -+ if (fd == -1 || pagesize == getpagesize()) { - guardfd = -1; - flags |= MAP_ANONYMOUS; - } else { -@@ -109,6 +111,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) - } - #else - guardfd = -1; -+ pagesize = getpagesize(); - flags = MAP_PRIVATE | MAP_ANONYMOUS; - #endif - -@@ -120,7 +123,7 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) - - assert(is_power_of_2(align)); - /* Always align to host page size */ -- assert(align >= getpagesize()); -+ assert(align >= pagesize); - - flags = MAP_FIXED; - flags |= fd == -1 ? MAP_ANONYMOUS : 0; -@@ -143,17 +146,24 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) - * a guard page guarding against potential buffer overflows. - */ - total -= offset; -- if (total > size + getpagesize()) { -- munmap(ptr + size + getpagesize(), total - size - getpagesize()); -+ if (total > size + pagesize) { -+ munmap(ptr + size + pagesize, total - size - pagesize); - } - - return ptr; - } - --void qemu_ram_munmap(void *ptr, size_t size) -+void qemu_ram_munmap(int fd, void *ptr, size_t size) - { -+ size_t pagesize; -+ - if (ptr) { - /* Unmap both the RAM block and the guard page */ -- munmap(ptr, size + getpagesize()); -+#if defined(__powerpc64__) && defined(__linux__) -+ pagesize = qemu_fd_getpagesize(fd); -+#else -+ pagesize = getpagesize(); -+#endif -+ munmap(ptr, size + pagesize); - } - } -diff --git a/util/oslib-posix.c b/util/oslib-posix.c -index c1bee2a..97b2f3b 100644 ---- a/util/oslib-posix.c -+++ b/util/oslib-posix.c -@@ -226,7 +226,7 @@ void qemu_vfree(void *ptr) - void qemu_anon_ram_free(void *ptr, size_t size) - { - trace_qemu_anon_ram_free(ptr, size); -- qemu_ram_munmap(ptr, size); -+ qemu_ram_munmap(-1, ptr, size); - } - - void qemu_set_block(int fd) --- -1.8.3.1 - diff --git a/kvm-mmap-alloc-unfold-qemu_ram_mmap.patch b/kvm-mmap-alloc-unfold-qemu_ram_mmap.patch deleted file mode 100644 index 3a6dd86..0000000 --- a/kvm-mmap-alloc-unfold-qemu_ram_mmap.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 2215d38d1231284cced64d94b4430e92c9e2c017 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 5 Feb 2019 04:47:56 +0000 -Subject: [PATCH 2/8] mmap-alloc: unfold qemu_ram_mmap() - -RH-Author: David Gibson -Message-id: <20190205044757.13591-2-dgibson@redhat.com> -Patchwork-id: 84234 -O-Subject: [RHELAV-8.0 qemu-kvm PATCH 1/2] mmap-alloc: unfold qemu_ram_mmap() -Bugzilla: 1671519 -RH-Acked-by: Pankaj Gupta -RH-Acked-by: Laurent Vivier -RH-Acked-by: Serhii Popovych - -From: Murilo Opsfelder Araujo - -Unfold parts of qemu_ram_mmap() for the sake of understanding, moving -declarations to the top, and keeping architecture-specifics in the -ifdef-else blocks. No changes in the function behaviour. - -Give ptr and ptr1 meaningful names: - ptr -> guardptr : pointer to the PROT_NONE guard region - ptr1 -> ptr : pointer to the mapped memory returned to caller - -Signed-off-by: Murilo Opsfelder Araujo -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -(cherry picked from commit 2044c3e7116eeac0449dcb4a4130cc8f8b9310da) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1671519 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - util/mmap-alloc.c | 53 ++++++++++++++++++++++++++++++++++------------------- - 1 file changed, 34 insertions(+), 19 deletions(-) - -diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c -index fd329ec..f71ea03 100644 ---- a/util/mmap-alloc.c -+++ b/util/mmap-alloc.c -@@ -77,11 +77,19 @@ size_t qemu_mempath_getpagesize(const char *mem_path) - - void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) - { -+ int flags; -+ int guardfd; -+ size_t offset; -+ size_t total; -+ void *guardptr; -+ void *ptr; -+ - /* - * Note: this always allocates at least one extra page of virtual address - * space, even if size is already aligned. - */ -- size_t total = size + align; -+ total = size + align; -+ - #if defined(__powerpc64__) && defined(__linux__) - /* On ppc64 mappings in the same segment (aka slice) must share the same - * page size. Since we will be re-allocating part of this segment -@@ -91,16 +99,22 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) - * We do this unless we are using the system page size, in which case - * anonymous memory is OK. - */ -- int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd; -- int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE; -- void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0); -+ flags = MAP_PRIVATE; -+ if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) { -+ guardfd = -1; -+ flags |= MAP_ANONYMOUS; -+ } else { -+ guardfd = fd; -+ flags |= MAP_NORESERVE; -+ } - #else -- void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); -+ guardfd = -1; -+ flags = MAP_PRIVATE | MAP_ANONYMOUS; - #endif -- size_t offset; -- void *ptr1; - -- if (ptr == MAP_FAILED) { -+ guardptr = mmap(0, total, PROT_NONE, flags, guardfd, 0); -+ -+ if (guardptr == MAP_FAILED) { - return MAP_FAILED; - } - -@@ -108,19 +122,20 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) - /* Always align to host page size */ - assert(align >= getpagesize()); - -- offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; -- ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, -- MAP_FIXED | -- (fd == -1 ? MAP_ANONYMOUS : 0) | -- (shared ? MAP_SHARED : MAP_PRIVATE), -- fd, 0); -- if (ptr1 == MAP_FAILED) { -- munmap(ptr, total); -+ flags = MAP_FIXED; -+ flags |= fd == -1 ? MAP_ANONYMOUS : 0; -+ flags |= shared ? MAP_SHARED : MAP_PRIVATE; -+ offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr; -+ -+ ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, flags, fd, 0); -+ -+ if (ptr == MAP_FAILED) { -+ munmap(guardptr, total); - return MAP_FAILED; - } - - if (offset > 0) { -- munmap(ptr, offset); -+ munmap(guardptr, offset); - } - - /* -@@ -129,10 +144,10 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) - */ - total -= offset; - if (total > size + getpagesize()) { -- munmap(ptr1 + size + getpagesize(), total - size - getpagesize()); -+ munmap(ptr + size + getpagesize(), total - size - getpagesize()); - } - -- return ptr1; -+ return ptr; - } - - void qemu_ram_munmap(void *ptr, size_t size) --- -1.8.3.1 - diff --git a/kvm-pc-7.5-compat-entries.patch b/kvm-pc-7.5-compat-entries.patch deleted file mode 100644 index 5a1d439..0000000 --- a/kvm-pc-7.5-compat-entries.patch +++ /dev/null @@ -1,55 +0,0 @@ -From d89b308fb81b12ff2f452739d103cfee8043f94a Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 14 Dec 2018 17:02:06 +0000 -Subject: [PATCH 1/8] pc: 7.5 compat entries - -RH-Author: Dr. David Alan Gilbert -Message-id: <20181214170211.14267-2-dgilbert@redhat.com> -Patchwork-id: 83515 -O-Subject: [RHEL8 qemu-kvm PATCH v3 1/6] pc: 7.5 compat entries -Bugzilla: 1655820 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth - -From: "Dr. David Alan Gilbert" - -These are missing entries that are already in 7.6 from commits -64b860ac7db and 2f039646554 but are missing in the 3.1 rebase. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/i386/pc.h | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index f8f35af..efcb208 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -1008,6 +1008,22 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); - .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ - .property = "clflushopt",\ - .value = "off",\ -+ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "legacy-cache",\ -+ .value = "on",\ -+ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "topoext",\ -+ .value = "off",\ -+ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ \ -+ .driver = "EPYC-" TYPE_X86_CPU,\ -+ .property = "xlevel",\ -+ .value = stringify(0x8000000a),\ -+ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ \ -+ .driver = "EPYC-IBPB-" TYPE_X86_CPU,\ -+ .property = "xlevel",\ -+ .value = stringify(0x8000000a),\ - }, - - --- -1.8.3.1 - diff --git a/kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch b/kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch deleted file mode 100644 index f1c639b..0000000 --- a/kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 5121d3ce3d4a7932e8b62b7eb4c70aed802c9abd Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 14 Dec 2018 17:02:09 +0000 -Subject: [PATCH 4/8] pc: Add compat for pc-i440fx-rhel7.6.0 machine type - -RH-Author: Dr. David Alan Gilbert -Message-id: <20181214170211.14267-5-dgilbert@redhat.com> -Patchwork-id: 83517 -O-Subject: [RHEL8 qemu-kvm PATCH v3 4/6] pc: Add compat for pc-i440fx-rhel7.6.0 machine type -Bugzilla: 1655820 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth - -From: "Dr. David Alan Gilbert" - -Use the PC_RHEL7_6_COMPAT macro for our i440fx machine type. -We're not adding new RHEL8 i440 machine types at this time, so it's -just a matter of keeping the current i440fx machine types compatible. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc_piix.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index af9eb8c..efee5e7 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1186,6 +1186,7 @@ static void pc_machine_rhel760_options(MachineClass *m) - { - pc_machine_rhel7_options(m); - m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_6_COMPAT); - } - - DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, --- -1.8.3.1 - diff --git a/kvm-pc-Add-pc-q35-8.0.0-machine-type.patch b/kvm-pc-Add-pc-q35-8.0.0-machine-type.patch deleted file mode 100644 index 785dfda..0000000 --- a/kvm-pc-Add-pc-q35-8.0.0-machine-type.patch +++ /dev/null @@ -1,73 +0,0 @@ -From b9db46a8df4b0eb25bf71205b188b903ff61936c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 14 Dec 2018 17:02:10 +0000 -Subject: [PATCH 5/8] pc: Add pc-q35-8.0.0 machine type - -RH-Author: Dr. David Alan Gilbert -Message-id: <20181214170211.14267-6-dgilbert@redhat.com> -Patchwork-id: 83518 -O-Subject: [RHEL8 qemu-kvm PATCH v3 5/6] pc: Add pc-q35-8.0.0 machine type -Bugzilla: 1655820 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth - -From: "Dr. David Alan Gilbert" - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc_q35.c | 22 +++++++++++++++++++--- - 1 file changed, 19 insertions(+), 3 deletions(-) - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 163546e..0b7223f 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -431,8 +431,8 @@ DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, - - /* Red Hat Enterprise Linux machine types */ - --/* Options for the latest rhel7 q35 machine type */ --static void pc_q35_machine_rhel7_options(MachineClass *m) -+/* Options for the latest rhel q35 machine type */ -+static void pc_q35_machine_rhel_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pcmc->default_nic_model = "e1000e"; -@@ -446,6 +446,20 @@ static void pc_q35_machine_rhel7_options(MachineClass *m) - SET_MACHINE_COMPAT(m, PC_RHEL_COMPAT); - } - -+static void pc_q35_init_rhel800(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel800_options(MachineClass *m) -+{ -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, -+ pc_q35_machine_rhel800_options); -+ - static void pc_q35_init_rhel760(MachineState *machine) - { - pc_q35_init(machine); -@@ -453,8 +467,10 @@ static void pc_q35_init_rhel760(MachineState *machine) - - static void pc_q35_machine_rhel760_options(MachineClass *m) - { -- pc_q35_machine_rhel7_options(m); -+ pc_q35_machine_rhel800_options(m); -+ m->alias = NULL; - m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; -+ SET_MACHINE_COMPAT(m, PC_RHEL7_6_COMPAT); - } - - DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, --- -1.8.3.1 - diff --git a/kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch b/kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch deleted file mode 100644 index e328b9a..0000000 --- a/kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch +++ /dev/null @@ -1,60 +0,0 @@ -From f58ebc7e755e7baa122b906e061feb4de10bbe4c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 14 Dec 2018 17:02:11 +0000 -Subject: [PATCH 6/8] pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT - -RH-Author: Dr. David Alan Gilbert -Message-id: <20181214170211.14267-7-dgilbert@redhat.com> -Patchwork-id: 83514 -O-Subject: [RHEL8 qemu-kvm PATCH v3 6/6] pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT -Bugzilla: 1655820 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth - -From: "Dr. David Alan Gilbert" - -MSR_SMI_COUNT started being migrated in QEMU 2.12 and in the 2.12 -release this broke back migration to earlier versions; however -that didn't cause a problem on RHEL because it also relied on newer -kernel features that RHEL 7.* doesn't have. - -QEMU 3.0 got a fix (in PC_COMPAT_2_11) to fix the 2.12->earlier -breakage, but given the kernel dependency, it makes more sense -for us to tie it to 8.* machine types and keep the feature off for -all 7.* machine types. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/i386/pc.h | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 5533863..426a975 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -1000,6 +1000,9 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); - - /* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: - * all of the 2_12 stuff was already in 7.6 from bz 1481253 -+ * x-migrate-smi-count comes from PC_COMPAT_2_11 but -+ * is really tied to kernel version so keep it off on 7.x -+ * machine types irrespective of host. - */ - #define PC_RHEL7_6_COMPAT \ - HW_COMPAT_RHEL7_6 \ -@@ -1015,6 +1018,10 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); - .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU,\ - .property = "pku",\ - .value = "off",\ -+ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_2_11 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "x-migrate-smi-count",\ -+ .value = "off",\ - }, - - /* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: --- -1.8.3.1 - diff --git a/kvm-pc-PC_RHEL7_6_COMPAT.patch b/kvm-pc-PC_RHEL7_6_COMPAT.patch deleted file mode 100644 index 457ceaf..0000000 --- a/kvm-pc-PC_RHEL7_6_COMPAT.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 83fd182901d50d150416afaa1236c3b798b320e7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 14 Dec 2018 17:02:08 +0000 -Subject: [PATCH 3/8] pc: PC_RHEL7_6_COMPAT - -RH-Author: Dr. David Alan Gilbert -Message-id: <20181214170211.14267-4-dgilbert@redhat.com> -Patchwork-id: 83513 -O-Subject: [RHEL8 qemu-kvm PATCH v3 3/6] pc: PC_RHEL7_6_COMPAT -Bugzilla: 1655820 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth - -From: "Dr. David Alan Gilbert" - -Add the PC_RHEL7_6_COMPAT macro derived from the 3.0 and 2.12 -upstream macros. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/i386/pc.h | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index efcb208..5533863 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -998,6 +998,25 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); - .value = "on",\ - }, - -+/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: -+ * all of the 2_12 stuff was already in 7.6 from bz 1481253 -+ */ -+#define PC_RHEL7_6_COMPAT \ -+ HW_COMPAT_RHEL7_6 \ -+ { /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "x-hv-synic-kvm-only",\ -+ .value = "on",\ -+ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ \ -+ .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ -+ .property = "pku",\ -+ .value = "off",\ -+ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ \ -+ .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "pku",\ -+ .value = "off",\ -+ }, -+ - /* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: - * - x-hv-max-vps was backported to 7.5 - * - x-pci-hole64-fix was backported to 7.5 --- -1.8.3.1 - diff --git a/kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch b/kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch deleted file mode 100644 index aa3582e..0000000 --- a/kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch +++ /dev/null @@ -1,148 +0,0 @@ -From e5d1850f80df1b548b1c3bdc0914e7790702d543 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 4 Mar 2019 08:54:26 +0000 -Subject: [PATCH 3/9] qcow2: include LUKS payload overhead in qemu-img measure -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -Message-id: <20190304085427.8148-2-stefanha@redhat.com> -Patchwork-id: 84778 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/2] qcow2: include LUKS payload overhead in qemu-img measure -Bugzilla: 1655065 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf - -LUKS encryption reserves clusters for its own payload data. The size of -this area must be included in the qemu-img measure calculation so that -we arrive at the correct minimum required image size. - -(Ab)use the qcrypto_block_create() API to determine the payload -overhead. We discard the payload data that qcrypto thinks will be -written to the image. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Max Reitz -Message-id: 20190218104525.23674-2-stefanha@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 61914f8906fabbae26372a576d9dd988c5e22b75) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 71 insertions(+), 1 deletion(-) - -diff --git a/block/qcow2.c b/block/qcow2.c -index 991d6ac..1b41e4c 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -4113,6 +4113,60 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) - return ret; - } - -+static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block, -+ size_t headerlen, void *opaque, Error **errp) -+{ -+ size_t *headerlenp = opaque; -+ -+ /* Stash away the payload size */ -+ *headerlenp = headerlen; -+ return 0; -+} -+ -+static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block, -+ size_t offset, const uint8_t *buf, size_t buflen, -+ void *opaque, Error **errp) -+{ -+ /* Discard the bytes, we're not actually writing to an image */ -+ return buflen; -+} -+ -+/* Determine the number of bytes for the LUKS payload */ -+static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len, -+ Error **errp) -+{ -+ QDict *opts_qdict; -+ QDict *cryptoopts_qdict; -+ QCryptoBlockCreateOptions *cryptoopts; -+ QCryptoBlock *crypto; -+ -+ /* Extract "encrypt." options into a qdict */ -+ opts_qdict = qemu_opts_to_qdict(opts, NULL); -+ qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt."); -+ qobject_unref(opts_qdict); -+ -+ /* Build QCryptoBlockCreateOptions object from qdict */ -+ qdict_put_str(cryptoopts_qdict, "format", "luks"); -+ cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp); -+ qobject_unref(cryptoopts_qdict); -+ if (!cryptoopts) { -+ return false; -+ } -+ -+ /* Fake LUKS creation in order to determine the payload size */ -+ crypto = qcrypto_block_create(cryptoopts, "encrypt.", -+ qcow2_measure_crypto_hdr_init_func, -+ qcow2_measure_crypto_hdr_write_func, -+ len, errp); -+ qapi_free_QCryptoBlockCreateOptions(cryptoopts); -+ if (!crypto) { -+ return false; -+ } -+ -+ qcrypto_block_free(crypto); -+ return true; -+} -+ - static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, - Error **errp) - { -@@ -4122,11 +4176,13 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, - uint64_t virtual_size; /* disk size as seen by guest */ - uint64_t refcount_bits; - uint64_t l2_tables; -+ uint64_t luks_payload_size = 0; - size_t cluster_size; - int version; - char *optstr; - PreallocMode prealloc; - bool has_backing_file; -+ bool has_luks; - - /* Parse image creation options */ - cluster_size = qcow2_opt_get_cluster_size_del(opts, &local_err); -@@ -4156,6 +4212,20 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, - has_backing_file = !!optstr; - g_free(optstr); - -+ optstr = qemu_opt_get_del(opts, BLOCK_OPT_ENCRYPT_FORMAT); -+ has_luks = optstr && strcmp(optstr, "luks") == 0; -+ g_free(optstr); -+ -+ if (has_luks) { -+ size_t headerlen; -+ -+ if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) { -+ goto err; -+ } -+ -+ luks_payload_size = ROUND_UP(headerlen, cluster_size); -+ } -+ - virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); - virtual_size = ROUND_UP(virtual_size, cluster_size); - -@@ -4226,7 +4296,7 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs, - info = g_new(BlockMeasureInfo, 1); - info->fully_allocated = - qcow2_calc_prealloc_size(virtual_size, cluster_size, -- ctz32(refcount_bits)); -+ ctz32(refcount_bits)) + luks_payload_size; - - /* Remove data clusters that are not required. This overestimates the - * required size because metadata needed for the fully allocated file is --- -1.8.3.1 - diff --git a/kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch b/kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch deleted file mode 100644 index bee8f1d..0000000 --- a/kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 6eddc4b5d380a5a21b17ca5424a10619ff42ad56 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 20 Dec 2018 14:48:58 +0000 -Subject: [PATCH] redhat: define pseries-rhel8.0.0 machine type - -RH-Author: Laurent Vivier -Message-id: <20181220144858.2598-1-lvivier@redhat.com> -Patchwork-id: 83723 -O-Subject: [RHEL8/rhel qemu-kvm PATCH] redhat: define pseries-rhel8.0.0 machine type -Bugzilla: 1656508 -RH-Acked-by: Thomas Huth -RH-Acked-by: David Gibson -RH-Acked-by: Serhii Popovych - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1656508 -BRANCH: rhel8/master-3.1.0 -UPSTREAM: downstream only -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=19589897 -BASED-ON: <20181214170211.14267-1-dgilbert@redhat.com> - [RHEL8 qemu-kvm PATCH v3 0/6] 8.0.0 x86 machine types - -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 47 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 76a4e83..91d38f9 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4385,19 +4385,64 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); - #endif - - /* -+ * pseries-rhel8.0.0 -+ */ -+ -+static void spapr_machine_rhel800_instance_options(MachineState *machine) -+{ -+} -+ -+static void spapr_machine_rhel800_class_options(MachineClass *mc) -+{ -+ /* Defaults for the latest behaviour inherited from the base class */ -+} -+ -+DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", true); -+ -+/* - * pseries-rhel7.6.0 -+ * like SPAPR_COMPAT_2_12 and SPAPR_COMPAT_3_0 -+ * SPAPR_COMPAT_3_0 is empty - */ -+#define SPAPR_COMPAT_RHEL7_6 \ -+ HW_COMPAT_RHEL7_6 \ -+ { \ -+ .driver = TYPE_POWERPC_CPU, \ -+ .property = "pre-3.0-migration", \ -+ .value = "on", \ -+ }, \ -+ { \ -+ .driver = TYPE_SPAPR_CPU_CORE, \ -+ .property = "pre-3.0-migration", \ -+ .value = "on", \ -+ }, - - static void spapr_machine_rhel760_instance_options(MachineState *machine) - { -+ spapr_machine_rhel800_instance_options(machine); - } - - static void spapr_machine_rhel760_class_options(MachineClass *mc) - { -- /* Defaults for the latest behaviour inherited from the base class */ -+ sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel800_class_options(mc); -+ SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_RHEL7_6); -+ -+ /* from spapr_machine_3_0_class_options() */ -+ smc->legacy_irq_allocation = true; -+ smc->irq = &spapr_irq_xics_legacy; -+ -+ /* from spapr_machine_2_12_class_options() */ -+ /* We depend on kvm_enabled() to choose a default value for the -+ * hpt-max-page-size capability. Of course we can't do it here -+ * because this is too early and the HW accelerator isn't initialzed -+ * yet. Postpone this to machine init (see default_caps_with_cpu()). -+ */ -+ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; - } - --DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", true); -+DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); - - /* - * pseries-rhel7.6.0-sxxm --- -1.8.3.1 - diff --git a/kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch b/kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch deleted file mode 100644 index 09ab876..0000000 --- a/kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch +++ /dev/null @@ -1,57 +0,0 @@ -From e204c887357f2d2ee1df5436a7d7f68b227c4b64 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Fri, 22 Mar 2019 17:45:15 +0000 -Subject: [PATCH 2/2] rhel: Set host-phys-bits-limit=48 on rhel machine-types - -RH-Author: plai@redhat.com -Message-id: <1553276715-26278-3-git-send-email-plai@redhat.com> -Patchwork-id: 85126 -O-Subject: [RHEL8.0 qemu-kvm PATCH 2/2] rhel: Set host-phys-bits-limit=48 on rhel machine-types -Bugzilla: 1688915 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Michael S. Tsirkin - -From: Eduardo Habkost - -Upstream status: not applicable - -Currently we use the host physical address size by default on -VMs. This was a good default on most cases, but this is not the -case on host CPUs supporting 5-level EPT. On those cases, we -want VMs to use 4-level EPT by default. - -Ensure VMs will use 4-level EPT by default, by limiting physical -address bits to 48. - -Not applicable upstream because upstream doesn't set -host-phys-bits=on by default. - -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula -(cherry picked from commit 01a2ecb4c38fe4a35455ea706e76984ee8d5a769) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/i386/pc.h | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 782d728..de25407 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -992,6 +992,11 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); - .property = "host-phys-bits",\ - .value = "on",\ - },\ -+ { /* PC_RHEL_COMPAT */ \ -+ .driver = TYPE_X86_CPU,\ -+ .property = "host-phys-bits-limit",\ -+ .value = "48",\ -+ },\ - { /* PC_RHEL_COMPAT bz 1508330 */ \ - .driver = "vfio-pci",\ - .property = "x-no-geforce-quirks",\ --- -1.8.3.1 - diff --git a/kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch b/kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch deleted file mode 100644 index 787c637..0000000 --- a/kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch +++ /dev/null @@ -1,250 +0,0 @@ -From 04a18cff27e2a0c93682adbdd2013c8f1473d520 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Fri, 21 Dec 2018 15:33:16 +0000 -Subject: [PATCH 2/3] s390x/tod: Properly stop the KVM TOD while the guest is - not running - -RH-Author: David Hildenbrand -Message-id: <20181221153317.27647-2-david@redhat.com> -Patchwork-id: 83741 -O-Subject: [RHEL-8.0 qemu-kvm v2 PATCH 1/2] s390x/tod: Properly stop the KVM TOD while the guest is not running -Bugzilla: 1659127 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier - -Just like on other architectures, we should stop the clock while the guest -is not running. This is already properly done for TCG. Right now, doing an -offline migration (stop, migrate, cont) can easily trigger stalls in the -guest. - -Even doing a - (hmp) stop - ... wait 2 minutes ... - (hmp) cont -will already trigger stalls. - -So whenever the guest stops, backup the KVM TOD. When continuing to run -the guest, restore the KVM TOD. - -One special case is starting a simple VM: Reading the TOD from KVM to -stop it right away until the guest is actually started means that the -time of any simple VM will already differ to the host time. We can -simply leave the TOD running and the guest won't be able to recognize -it. - -For migration, we actually want to keep the TOD stopped until really -starting the guest. To be able to catch most errors, we should however -try to set the TOD in addition to simply storing it. So we can still -catch basic migration problems. - -If anything goes wrong while backing up/restoring the TOD, we have to -ignore it (but print a warning). This is then basically a fallback to -old behavior (TOD remains running). - -I tested this very basically with an initrd: - 1. Start a simple VM. Observed that the TOD is kept running. Old - behavior. - 2. Ordinary live migration. Observed that the TOD is temporarily - stopped on the destination when setting the new value and - correctly started when finally starting the guest. - 3. Offline live migration. (stop, migrate, cont). Observed that the - TOD will be stopped on the source with the "stop" command. On the - destination, the TOD is temporarily stopped when setting the new - value and correctly started when finally starting the guest via - "cont". - 4. Simple stop/cont correctly stops/starts the TOD. (multiple stops - or conts in a row have no effect, so works as expected) - -In the future, we might want to send the guest a special kind of time sync -interrupt under some conditions, so it can synchronize its tod to the -host tod. This is interesting for migration scenarios but also when we -get time sync interrupts ourselves. This however will most probably have -to be handled in KVM (e.g. when the tods differ too much) and is not -desired e.g. when debugging the guest (single stepping should not -result in permanent time syncs). I consider something like that an add-on -on top of this basic "don't break the guest" handling. - -Signed-off-by: David Hildenbrand -Message-Id: <20181130094957.4121-1-david@redhat.com> -Acked-by: Christian Borntraeger -Reviewed-by: Thomas Huth -Signed-off-by: Cornelia Huck -(cherry picked from commit 9bc9d3d1ae3bcd1caaad1946494726b52f58b291) -Signed-off-by: David Hildenbrand -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/tod-kvm.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++- - include/hw/s390x/tod.h | 8 +++- - 2 files changed, 107 insertions(+), 3 deletions(-) - -diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c -index df564ab..2456bf7 100644 ---- a/hw/s390x/tod-kvm.c -+++ b/hw/s390x/tod-kvm.c -@@ -10,10 +10,11 @@ - - #include "qemu/osdep.h" - #include "qapi/error.h" -+#include "sysemu/sysemu.h" - #include "hw/s390x/tod.h" - #include "kvm_s390x.h" - --static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp) -+static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp) - { - int r; - -@@ -27,7 +28,17 @@ static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp) - } - } - --static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp) -+static void kvm_s390_tod_get(const S390TODState *td, S390TOD *tod, Error **errp) -+{ -+ if (td->stopped) { -+ *tod = td->base; -+ return; -+ } -+ -+ kvm_s390_get_tod_raw(tod, errp); -+} -+ -+static void kvm_s390_set_tod_raw(const S390TOD *tod, Error **errp) - { - int r; - -@@ -41,18 +52,105 @@ static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp) - } - } - -+static void kvm_s390_tod_set(S390TODState *td, const S390TOD *tod, Error **errp) -+{ -+ Error *local_err = NULL; -+ -+ /* -+ * Somebody (e.g. migration) set the TOD. We'll store it into KVM to -+ * properly detect errors now but take a look at the runstate to decide -+ * whether really to keep the tod running. E.g. during migration, this -+ * is the point where we want to stop the initially running TOD to fire -+ * it back up when actually starting the migrated guest. -+ */ -+ kvm_s390_set_tod_raw(tod, &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ -+ if (runstate_is_running()) { -+ td->stopped = false; -+ } else { -+ td->stopped = true; -+ td->base = *tod; -+ } -+} -+ -+static void kvm_s390_tod_vm_state_change(void *opaque, int running, -+ RunState state) -+{ -+ S390TODState *td = opaque; -+ Error *local_err = NULL; -+ -+ if (running && td->stopped) { -+ /* Set the old TOD when running the VM - start the TOD clock. */ -+ kvm_s390_set_tod_raw(&td->base, &local_err); -+ if (local_err) { -+ warn_report_err(local_err); -+ } -+ /* Treat errors like the TOD was running all the time. */ -+ td->stopped = false; -+ } else if (!running && !td->stopped) { -+ /* Store the TOD when stopping the VM - stop the TOD clock. */ -+ kvm_s390_get_tod_raw(&td->base, &local_err); -+ if (local_err) { -+ /* Keep the TOD running in case we could not back it up. */ -+ warn_report_err(local_err); -+ } else { -+ td->stopped = true; -+ } -+ } -+} -+ -+static void kvm_s390_tod_realize(DeviceState *dev, Error **errp) -+{ -+ S390TODState *td = S390_TOD(dev); -+ S390TODClass *tdc = S390_TOD_GET_CLASS(td); -+ Error *local_err = NULL; -+ -+ tdc->parent_realize(dev, &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ -+ /* -+ * We need to know when the VM gets started/stopped to start/stop the TOD. -+ * As we can never have more than one TOD instance (and that will never be -+ * removed), registering here and never unregistering is good enough. -+ */ -+ qemu_add_vm_change_state_handler(kvm_s390_tod_vm_state_change, td); -+} -+ - static void kvm_s390_tod_class_init(ObjectClass *oc, void *data) - { - S390TODClass *tdc = S390_TOD_CLASS(oc); - -+ device_class_set_parent_realize(DEVICE_CLASS(oc), kvm_s390_tod_realize, -+ &tdc->parent_realize); - tdc->get = kvm_s390_tod_get; - tdc->set = kvm_s390_tod_set; - } - -+static void kvm_s390_tod_init(Object *obj) -+{ -+ S390TODState *td = S390_TOD(obj); -+ -+ /* -+ * The TOD is initially running (value stored in KVM). Avoid needless -+ * loading/storing of the TOD when starting a simple VM, so let it -+ * run although the (never started) VM is stopped. For migration, we -+ * will properly set the TOD later. -+ */ -+ td->stopped = false; -+} -+ - static TypeInfo kvm_s390_tod_info = { - .name = TYPE_KVM_S390_TOD, - .parent = TYPE_S390_TOD, - .instance_size = sizeof(S390TODState), -+ .instance_init = kvm_s390_tod_init, - .class_init = kvm_s390_tod_class_init, - .class_size = sizeof(S390TODClass), - }; -diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h -index 413c0d7..cbd7552 100644 ---- a/include/hw/s390x/tod.h -+++ b/include/hw/s390x/tod.h -@@ -31,13 +31,19 @@ typedef struct S390TODState { - /* private */ - DeviceState parent_obj; - -- /* unused by KVM implementation */ -+ /* -+ * Used by TCG to remember the time base. Used by KVM to backup the TOD -+ * while the TOD is stopped. -+ */ - S390TOD base; -+ /* Used by KVM to remember if the TOD is stopped and base is valid. */ -+ bool stopped; - } S390TODState; - - typedef struct S390TODClass { - /* private */ - DeviceClass parent_class; -+ void (*parent_realize)(DeviceState *dev, Error **errp); - - /* public */ - void (*get)(const S390TODState *td, S390TOD *tod, Error **errp); --- -1.8.3.1 - diff --git a/kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch b/kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch deleted file mode 100644 index da2e60b..0000000 --- a/kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch +++ /dev/null @@ -1,187 +0,0 @@ -From acf3927aef42a4a380fa1366b2eca8a8f2b44bc4 Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Thu, 7 Feb 2019 12:18:18 +0000 -Subject: [PATCH 7/8] scsi-disk: Acquire the AioContext in scsi_*_realize() - -RH-Author: Markus Armbruster -Message-id: <20190207121819.20092-3-armbru@redhat.com> -Patchwork-id: 84291 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 2/3] scsi-disk: Acquire the AioContext in scsi_*_realize() -Bugzilla: 1656276 1662508 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -From: Alberto Garcia - -This fixes a crash when attaching two disks with the same blockdev to -a SCSI device that is using iothreads. Test case included. - -Signed-off-by: Alberto Garcia -Signed-off-by: Kevin Wolf -(cherry picked from commit 3ff35ba391134e4e43ab96152deb38a62e62f858) -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/scsi-disk.c | 23 ++++++++++++++++++++--- - tests/qemu-iotests/240 | 18 ++++++++++++++++++ - tests/qemu-iotests/240.out | 16 ++++++++++++++++ - 3 files changed, 54 insertions(+), 3 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index e74e1e7..e6db6d7 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -2389,10 +2389,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) - static void scsi_hd_realize(SCSIDevice *dev, Error **errp) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); -+ AioContext *ctx = NULL; - /* can happen for devices without drive. The error message for missing - * backend will be issued in scsi_realize - */ - if (s->qdev.conf.blk) { -+ ctx = blk_get_aio_context(s->qdev.conf.blk); -+ aio_context_acquire(ctx); - blkconf_blocksizes(&s->qdev.conf); - } - s->qdev.blocksize = s->qdev.conf.logical_block_size; -@@ -2401,11 +2404,15 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) - s->product = g_strdup("QEMU HARDDISK"); - } - scsi_realize(&s->qdev, errp); -+ if (ctx) { -+ aio_context_release(ctx); -+ } - } - - static void scsi_cd_realize(SCSIDevice *dev, Error **errp) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); -+ AioContext *ctx; - int ret; - - if (!dev->conf.blk) { -@@ -2416,6 +2423,8 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) - assert(ret == 0); - } - -+ ctx = blk_get_aio_context(dev->conf.blk); -+ aio_context_acquire(ctx); - s->qdev.blocksize = 2048; - s->qdev.type = TYPE_ROM; - s->features |= 1 << SCSI_DISK_F_REMOVABLE; -@@ -2423,6 +2432,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) - s->product = g_strdup("QEMU CD-ROM"); - } - scsi_realize(&s->qdev, errp); -+ aio_context_release(ctx); - } - - static void scsi_disk_realize(SCSIDevice *dev, Error **errp) -@@ -2561,6 +2571,7 @@ static int get_device_type(SCSIDiskState *s) - static void scsi_block_realize(SCSIDevice *dev, Error **errp) - { - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); -+ AioContext *ctx; - int sg_version; - int rc; - -@@ -2575,6 +2586,9 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) - "be removed in a future version"); - } - -+ ctx = blk_get_aio_context(s->qdev.conf.blk); -+ aio_context_acquire(ctx); -+ - /* check we are using a driver managing SG_IO (version 3 and after) */ - rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version); - if (rc < 0) { -@@ -2582,18 +2596,18 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) - if (rc != -EPERM) { - error_append_hint(errp, "Is this a SCSI device?\n"); - } -- return; -+ goto out; - } - if (sg_version < 30000) { - error_setg(errp, "scsi generic interface too old"); -- return; -+ goto out; - } - - /* get device type from INQUIRY data */ - rc = get_device_type(s); - if (rc < 0) { - error_setg(errp, "INQUIRY failed"); -- return; -+ goto out; - } - - /* Make a guess for the block size, we'll fix it when the guest sends. -@@ -2613,6 +2627,9 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) - - scsi_realize(&s->qdev, errp); - scsi_generic_read_device_inquiry(&s->qdev); -+ -+out: -+ aio_context_release(ctx); - } - - typedef struct SCSIBlockReq { -diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 -index ead7ee0..5d499c9 100755 ---- a/tests/qemu-iotests/240 -+++ b/tests/qemu-iotests/240 -@@ -83,6 +83,24 @@ run_qemu < -Date: Wed, 6 Feb 2019 15:58:29 +0000 -Subject: [PATCH 3/3] scsi-disk: Add device_id property - -RH-Author: Kevin Wolf -Message-id: <20190206155829.14641-3-kwolf@redhat.com> -Patchwork-id: 84254 -O-Subject: [RHEL-7.7/8.0-AV qemu-kvm-rhev PATCH 2/2] scsi-disk: Add device_id property -Bugzilla: 1668248 -RH-Acked-by: Max Reitz -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini - -The new device_id property specifies which value to use for the vendor -specific designator in the Device Identification VPD page. - -In particular, this is necessary for libvirt to maintain guest ABI -compatibility when no serial number is given and a VM is switched from --drive (where the BlockBackend name is used) to -blockdev (where the -vendor specific designator is left out by default). - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -(cherry picked from commit 7471a649fc3a391dd497297013fb2525ca9821ba) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/scsi-disk.c | 24 ++++++++++++++++-------- - 1 file changed, 16 insertions(+), 8 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 93eef40..e74e1e7 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -104,6 +104,7 @@ typedef struct SCSIDiskState - char *serial; - char *vendor; - char *product; -+ char *device_id; - bool tray_open; - bool tray_locked; - /* -@@ -642,13 +643,8 @@ static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) - - case 0x83: /* Device identification page, mandatory */ - { -- const char *str = s->serial ?: blk_name(s->qdev.conf.blk); -- int max_len = s->serial ? 20 : 255 - 8; -- int id_len = strlen(str); -+ int id_len = s->device_id ? MIN(strlen(s->device_id), 255 - 8) : 0; - -- if (id_len > max_len) { -- id_len = max_len; -- } - DPRINTF("Inquiry EVPD[Device identification] " - "buffer size %zd\n", req->cmd.xfer); - -@@ -657,7 +653,7 @@ static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) - outbuf[buflen++] = 0; /* not officially assigned */ - outbuf[buflen++] = 0; /* reserved */ - outbuf[buflen++] = id_len; /* length of data following */ -- memcpy(outbuf + buflen, str, id_len); -+ memcpy(outbuf + buflen, s->device_id, id_len); - buflen += id_len; - } - -@@ -2363,6 +2359,16 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) - if (!s->vendor) { - s->vendor = g_strdup("QEMU"); - } -+ if (!s->device_id) { -+ if (s->serial) { -+ s->device_id = g_strdup_printf("%.20s", s->serial); -+ } else { -+ const char *str = blk_name(s->qdev.conf.blk); -+ if (str && *str) { -+ s->device_id = g_strdup(str); -+ } -+ } -+ } - - if (blk_is_sg(s->qdev.conf.blk)) { - error_setg(errp, "unwanted /dev/sg*"); -@@ -2904,7 +2910,9 @@ static const TypeInfo scsi_disk_base_info = { - DEFINE_PROP_STRING("ver", SCSIDiskState, version), \ - DEFINE_PROP_STRING("serial", SCSIDiskState, serial), \ - DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor), \ -- DEFINE_PROP_STRING("product", SCSIDiskState, product) -+ DEFINE_PROP_STRING("product", SCSIDiskState, product), \ -+ DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id) -+ - - static Property scsi_hd_properties[] = { - DEFINE_SCSI_DISK_PROPERTIES(), --- -1.8.3.1 - diff --git a/kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch b/kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch deleted file mode 100644 index c08ea92..0000000 --- a/kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 18d600a76319abe59dc4b5e371e5807c089f9159 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 6 Feb 2019 15:58:28 +0000 -Subject: [PATCH 2/3] scsi-disk: Don't use empty string as device id -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -Message-id: <20190206155829.14641-2-kwolf@redhat.com> -Patchwork-id: 84253 -O-Subject: [RHEL-7.7/8.0-AV qemu-kvm-rhev PATCH 1/2] scsi-disk: Don't use empty string as device id -Bugzilla: 1668248 -RH-Acked-by: Max Reitz -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini - -scsi-disk includes in the Device Identification VPD page, depending on -configuration amongst others, a vendor specific designator that consists -either of the serial number if given or the BlockBackend name (which is -a host detail that better shouldn't have been leaked to the guest, but -now we have to maintain it for compatibility). - -With anonymous BlockBackends, i.e. scsi-disk devices constructed with -drive=, and no serial number explicitly specified, this ends -up as an empty string. If this happens to more than one disk, we have -accidentally signalled to the OS that this is a multipath setup, which -is obviously not what was intended. - -Instead of using an empty string for the vendor specific designator, -simply leave out that designator, which makes Linux detect such setups -as separate disks again. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit a8f58afcdb86e266e06c9dc41a71605e570244c3) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/scsi-disk.c | 14 ++++++++------ - 1 file changed, 8 insertions(+), 6 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 0e9027c..93eef40 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -652,12 +652,14 @@ static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) - DPRINTF("Inquiry EVPD[Device identification] " - "buffer size %zd\n", req->cmd.xfer); - -- outbuf[buflen++] = 0x2; /* ASCII */ -- outbuf[buflen++] = 0; /* not officially assigned */ -- outbuf[buflen++] = 0; /* reserved */ -- outbuf[buflen++] = id_len; /* length of data following */ -- memcpy(outbuf + buflen, str, id_len); -- buflen += id_len; -+ if (id_len) { -+ outbuf[buflen++] = 0x2; /* ASCII */ -+ outbuf[buflen++] = 0; /* not officially assigned */ -+ outbuf[buflen++] = 0; /* reserved */ -+ outbuf[buflen++] = id_len; /* length of data following */ -+ memcpy(outbuf + buflen, str, id_len); -+ buflen += id_len; -+ } - - if (s->qdev.wwn) { - outbuf[buflen++] = 0x1; /* Binary */ --- -1.8.3.1 - diff --git a/kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch b/kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch deleted file mode 100644 index 48f2d8b..0000000 --- a/kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 41c461465fb5b0d23f5826cd4b62a78f25607420 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 3 Jan 2019 12:51:40 +0000 -Subject: [PATCH 2/3] spapr: Add H-Call H_HOME_NODE_ASSOCIATIVITY - -RH-Author: Laurent Vivier -Message-id: <20190103135140.13948-2-lvivier@redhat.com> -Patchwork-id: 83848 -O-Subject: [RHEL8/rhel qemu-kvm PATCH 1/1] spapr: Add H-Call H_HOME_NODE_ASSOCIATIVITY -Bugzilla: 1661967 -RH-Acked-by: Serhii Popovych -RH-Acked-by: Thomas Huth -RH-Acked-by: David Gibson - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1661967 - -H_HOME_NODE_ASSOCIATIVITY H-Call returns the associativity domain -designation associated with the identifier input parameter - -This fixes a crash when we try to hotplug a CPU in memory-less and -CPU-less numa node. In this case, the kernel tries to online the -node, but without the information provided by this h-call, the node id, -it cannot and the CPU is started while the node is not onlined. - -It also removes the warning message from the kernel: - VPHN is not supported. Disabling polling.. - -Signed-off-by: Laurent Vivier -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -(cherry picked from commit c24ba3d0a34f68ad2c6bf1a15bc43770005f6cc0) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 1 + - hw/ppc/spapr_hcall.c | 40 ++++++++++++++++++++++++++++++++++++++++ - include/hw/ppc/spapr.h | 1 + - 3 files changed, 42 insertions(+) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 91d38f9..d5d2eb4 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -1051,6 +1051,7 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) - add_str(hypertas, "hcall-sprg0"); - add_str(hypertas, "hcall-copy"); - add_str(hypertas, "hcall-debug"); -+ add_str(hypertas, "hcall-vphn"); - add_str(qemu_hypertas, "hcall-memop1"); - - if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { -diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c -index ae913d0..f131c7e 100644 ---- a/hw/ppc/spapr_hcall.c -+++ b/hw/ppc/spapr_hcall.c -@@ -1663,6 +1663,42 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - return H_SUCCESS; - } - -+static target_ulong h_home_node_associativity(PowerPCCPU *cpu, -+ sPAPRMachineState *spapr, -+ target_ulong opcode, -+ target_ulong *args) -+{ -+ target_ulong flags = args[0]; -+ target_ulong procno = args[1]; -+ PowerPCCPU *tcpu; -+ int idx; -+ -+ /* only support procno from H_REGISTER_VPA */ -+ if (flags != 0x1) { -+ return H_FUNCTION; -+ } -+ -+ tcpu = spapr_find_cpu(procno); -+ if (tcpu == NULL) { -+ return H_P2; -+ } -+ -+ /* sequence is the same as in the "ibm,associativity" property */ -+ -+ idx = 0; -+#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \ -+ ((uint64_t)(b) & 0xffffffff)) -+ args[idx++] = ASSOCIATIVITY(0, 0); -+ args[idx++] = ASSOCIATIVITY(0, tcpu->node_id); -+ args[idx++] = ASSOCIATIVITY(procno, -1); -+ for ( ; idx < 6; idx++) { -+ args[idx] = -1; -+ } -+#undef ASSOCIATIVITY -+ -+ return H_SUCCESS; -+} -+ - static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu, - sPAPRMachineState *spapr, - target_ulong opcode, -@@ -1822,6 +1858,10 @@ static void hypercall_register_types(void) - - /* ibm,client-architecture-support support */ - spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support); -+ -+ /* Virtual Processor Home Node */ -+ spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY, -+ h_home_node_associativity); - } - - type_init(hypercall_register_types) -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index d2370e5..0cc123e 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -442,6 +442,7 @@ struct sPAPRMachineState { - #define H_GET_EM_PARMS 0x2B8 - #define H_SET_MPP 0x2D0 - #define H_GET_MPP 0x2D4 -+#define H_HOME_NODE_ASSOCIATIVITY 0x2EC - #define H_XIRR_X 0x2FC - #define H_RANDOM 0x300 - #define H_SET_MODE 0x31C --- -1.8.3.1 - diff --git a/kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch b/kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch deleted file mode 100644 index bf01cdf..0000000 --- a/kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch +++ /dev/null @@ -1,149 +0,0 @@ -From f39913b42600b838c415f6fb561be940bea265dd Mon Sep 17 00:00:00 2001 -From: Serhii Popovych -Date: Wed, 9 Jan 2019 13:31:49 +0000 -Subject: [PATCH 1/2] spapr: Fix ibm, max-associativity-domains property number - of nodes - -RH-Author: Serhii Popovych -Message-id: <1547040709-797-1-git-send-email-spopovyc@redhat.com> -Patchwork-id: 83920 -O-Subject: [RHEL-8.0 qemu-kvm PATCH v2] spapr: Fix ibm, max-associativity-domains property number of nodes -Bugzilla: 1653114 -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson -RH-Acked-by: Thomas Huth - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1653114 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=19727263 -Branch: rhel8/master-3.1.0 -Upstream: Merged -Testing: Build and boot tested on rhel-7.6 with steps described in - comment 0. Issue no longer reproducible. - -Laurent Vivier reported off by one with maximum number of NUMA nodes -provided by qemu-kvm being less by one than required according to -description of "ibm,max-associativity-domains" property in LoPAPR. - -It appears that I incorrectly treated LoPAPR description of this -property assuming it provides last valid domain (NUMA node here) -instead of maximum number of domains. - - ### Before hot-add - - (qemu) info numa - 3 nodes - node 0 cpus: 0 - node 0 size: 0 MB - node 0 plugged: 0 MB - node 1 cpus: - node 1 size: 1024 MB - node 1 plugged: 0 MB - node 2 cpus: - node 2 size: 0 MB - node 2 plugged: 0 MB - - $ numactl -H - available: 2 nodes (0-1) - node 0 cpus: 0 - node 0 size: 0 MB - node 0 free: 0 MB - node 1 cpus: - node 1 size: 999 MB - node 1 free: 658 MB - node distances: - node 0 1 - 0: 10 40 - 1: 40 10 - - ### Hot-add - - (qemu) object_add memory-backend-ram,id=mem0,size=1G - (qemu) device_add pc-dimm,id=dimm1,memdev=mem0,node=2 - (qemu) [ 87.704898] pseries-hotplug-mem: Attempting to hot-add 4 ... - - [ 87.705128] lpar: Attempting to resize HPT to shift 21 - ... - - ### After hot-add - - (qemu) info numa - 3 nodes - node 0 cpus: 0 - node 0 size: 0 MB - node 0 plugged: 0 MB - node 1 cpus: - node 1 size: 1024 MB - node 1 plugged: 0 MB - node 2 cpus: - node 2 size: 1024 MB - node 2 plugged: 1024 MB - - $ numactl -H - available: 2 nodes (0-1) - ^^^^^^^^^^^^^^^^^^^^^^^^ - Still only two nodes (and memory hot-added to node 0 below) - node 0 cpus: 0 - node 0 size: 1024 MB - node 0 free: 1021 MB - node 1 cpus: - node 1 size: 999 MB - node 1 free: 658 MB - node distances: - node 0 1 - 0: 10 40 - 1: 40 10 - -After fix applied numactl(8) reports 3 nodes available and memory -plugged into node 2 as expected. - ->From David Gibson: ------------------- - Qemu makes a distinction between "non NUMA" (nb_numa_nodes == 0) and - "NUMA with one node" (nb_numa_nodes == 1). But from a PAPR guests's - point of view these are equivalent. I don't want to present two - different cases to the guest when we don't need to, so even though the - guest can handle it, I'd prefer we put a '1' here for both the - nb_numa_nodes == 0 and nb_numa_nodes == 1 case. - -This consolidates everything discussed previously on mailing list. - -Fixes: da9f80fbad21 ("spapr: Add ibm,max-associativity-domains property") -Reported-by: Laurent Vivier -Signed-off-by: Serhii Popovych - -Signed-off-by: David Gibson -Reviewed-by: Greg Kurz -Reviewed-by: Laurent Vivier -(cherry picked from commit 3908a24fcb83913079d315de0ca6d598e8616dbb) -Signed-off-by: Serhii Popovych ---- -v2: - Rebased against rhel8/qemu-kvm-3.1.0 for RHEL Advanced Virtualization - product. - - Added "Brach:" tag to commint message as suggested by Laurent Vivier. - - hw/ppc/spapr.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index d5d2eb4..bd2abb7 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -1033,7 +1033,7 @@ static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt) - cpu_to_be32(0), - cpu_to_be32(0), - cpu_to_be32(0), -- cpu_to_be32(nb_numa_nodes ? nb_numa_nodes - 1 : 0), -+ cpu_to_be32(nb_numa_nodes ? nb_numa_nodes : 1), - }; - - _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas")); --- -1.8.3.1 - diff --git a/kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch b/kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch deleted file mode 100644 index 48fec33..0000000 --- a/kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 2744bd7eb7955e7ae995a48784760e48c33c1e73 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Wed, 27 Feb 2019 04:54:34 +0000 -Subject: [PATCH 2/9] spapr: fix out of bounds write in spapr_populate_drmem_v2 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20190227045434.23465-1-dgibson@redhat.com> -Patchwork-id: 84720 -O-Subject: [RHELAV-8.1 qemu-kvm PATCH] spapr: fix out of bounds write in spapr_populate_drmem_v2 -Bugzilla: 1674438 -RH-Acked-by: Thomas Huth -RH-Acked-by: Serhii Popovych -RH-Acked-by: Philippe Mathieu-Daudé - -From: Fabiano Rosas - -buf_len is uint8_t which is not large enough to hold the result of: - - nr_entries * sizeof(struct sPAPRDrconfCellV2) + sizeof(uint32_t); - -for a nr_entries greater than 10. - -This causes the allocated buffer 'int_buf' to be smaller than expected -and we eventually overwrite some of glibc's control structures (see -"chunk" in https://sourceware.org/glibc/wiki/MallocInternals) - -The following error is seen while trying to free int_buf: - - "free(): invalid next size (fast)" - -Fixes: a324d6f166 "spapr: Support ibm,dynamic-memory-v2 property" -Signed-off-by: Fabiano Rosas -Message-Id: <20190213172926.21740-1-farosas@linux.ibm.com> -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -(cherry picked from commit cc941111a5bc5f498185fa3824c3b6579c7d45ad) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1674438 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=20382722 -Branch: rhel8/master-3.1.0 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index bd2abb7..c1478bf 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -688,14 +688,14 @@ static int spapr_populate_drmem_v2(sPAPRMachineState *spapr, void *fdt, - int offset, MemoryDeviceInfoList *dimms) - { - MachineState *machine = MACHINE(spapr); -- uint8_t *int_buf, *cur_index, buf_len; -+ uint8_t *int_buf, *cur_index; - int ret; - uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; - uint64_t addr, cur_addr, size; - uint32_t nr_boot_lmbs = (machine->device_memory->base / lmb_size); - uint64_t mem_end = machine->device_memory->base + - memory_region_size(&machine->device_memory->mr); -- uint32_t node, nr_entries = 0; -+ uint32_t node, buf_len, nr_entries = 0; - sPAPRDRConnector *drc; - DrconfCellQueue *elem, *next; - MemoryDeviceInfoList *info; --- -1.8.3.1 - diff --git a/kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch b/kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch deleted file mode 100644 index 03d72b1..0000000 --- a/kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 18cf0d751c615e83243e13f3170508289cd78457 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Sat, 16 Feb 2019 00:01:45 +0000 -Subject: [PATCH 1/4] target/i386: Disable MPX support on named CPU models -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <1550275305-42020-1-git-send-email-pbonzini@redhat.com> -Patchwork-id: 84525 -O-Subject: [rhel-av-8.0.0 qemu-kvm PATCH] target/i386: Disable MPX support on named CPU models -Bugzilla: 1661030 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Igor Mammedov -RH-Acked-by: Stefano Garzarella - -Bugzilla: 1661030 - -Brew build: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=20232369 - -MPX support is being phased out by Intel; GCC has dropped it, Linux -is also going to do that. Even though KVM will have special code -to support MPX after the kernel proper stops enabling it in XCR0, -we probably also want to deprecate that in a few years. As a start, -do not enable it by default for any named CPU model starting with -the 4.0 machine types; this include Skylake, Icelake and Cascadelake. - -Signed-off-by: Paolo Bonzini -Message-Id: <20181220121100.21554-1-pbonzini@redhat.com> -Reviewed-by:   Wainer dos Santos Moschetta -Signed-off-by: Eduardo Habkost -(cherry picked from commit ecb85fe48cacb2f8740186e81f2f38a2e02bd963) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - hw/i386/pc.c [old-style global properties] - -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/i386/pc.h | 28 ++++++++++++++++++++++++++++ - target/i386/cpu.c | 14 +++++++------- - 2 files changed, 35 insertions(+), 7 deletions(-) - -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 426a975..782d728 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -1022,6 +1022,34 @@ extern void igd_passthrough_isa_bridge_create(PCIBus *bus, uint16_t gpu_dev_id); - .driver = TYPE_X86_CPU,\ - .property = "x-migrate-smi-count",\ - .value = "off",\ -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ -+ .driver = "Skylake-Client" "-" TYPE_X86_CPU,\ -+ .property = "mpx",\ -+ .value = "on",\ -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ -+ .driver = "Skylake-Client-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "mpx",\ -+ .value = "on",\ -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ -+ .driver = "Skylake-Server" "-" TYPE_X86_CPU,\ -+ .property = "mpx",\ -+ .value = "on",\ -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ -+ .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU,\ -+ .property = "mpx",\ -+ .value = "on",\ -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ -+ .driver = "Cascadelake-Server" "-" TYPE_X86_CPU,\ -+ .property = "mpx",\ -+ .value = "on",\ -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ -+ .driver = "Icelake-Client" "-" TYPE_X86_CPU,\ -+ .property = "mpx",\ -+ .value = "on",\ -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ \ -+ .driver = "Icelake-Server" "-" TYPE_X86_CPU,\ -+ .property = "mpx",\ -+ .value = "on",\ - }, - - /* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 8570b25..dbcf632 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2358,7 +2358,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, -+ CPUID_7_0_EBX_SMAP, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component, -@@ -2405,7 +2405,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, -+ CPUID_7_0_EBX_SMAP, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component, -@@ -2450,7 +2450,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | - CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | - CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | - CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, -@@ -2502,7 +2502,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | - CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | - CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | - CPUID_7_0_EBX_AVX512VL, -@@ -2552,7 +2552,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | - CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | - CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | - CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | -@@ -2608,7 +2608,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_INTEL_PT, -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_INTEL_PT, - .features[FEAT_7_0_ECX] = - CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | - CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | -@@ -2663,7 +2663,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -- CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | - CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | - CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | - CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | --- -1.8.3.1 - diff --git a/kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch b/kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch deleted file mode 100644 index 42ff2e7..0000000 --- a/kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 02287430957782ffb1db0d7d17693a73925ea02f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 29 Jan 2019 07:02:49 +0000 -Subject: [PATCH 1/2] throttle-groups: fix restart coroutine iothread race - -RH-Author: Stefan Hajnoczi -Message-id: <20190129070250.22709-2-stefanha@redhat.com> -Patchwork-id: 84139 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/2] throttle-groups: fix restart coroutine iothread race -Bugzilla: 1655947 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier - -The following QMP command leads to a crash when iothreads are used: - - { 'execute': 'device_del', 'arguments': {'id': 'data'} } - -The backtrace involves the queue restart coroutine where -tgm->throttle_state is a NULL pointer because -throttle_group_unregister_tgm() has already been called: - - (gdb) bt full - #0 0x00005585a7a3b378 in qemu_mutex_lock_impl (mutex=0xffffffffffffffd0, file=0x5585a7bb3d54 "block/throttle-groups.c", line=412) at util/qemu-thread-posix.c:64 - err = - __PRETTY_FUNCTION__ = "qemu_mutex_lock_impl" - __func__ = "qemu_mutex_lock_impl" - #1 0x00005585a79be074 in throttle_group_restart_queue_entry (opaque=0x5585a9de4eb0) at block/throttle-groups.c:412 - _f = - data = 0x5585a9de4eb0 - tgm = 0x5585a9079440 - ts = 0x0 - tg = 0xffffffffffffff98 - is_write = false - empty_queue = 255 - -This coroutine should not execute in the iothread after the throttle -group member has been unregistered! - -The root cause is that the device_del code path schedules the restart -coroutine in the iothread while holding the AioContext lock. Therefore -the iothread cannot execute the coroutine until after device_del -releases the lock - by this time it's too late. - -This patch adds a reference count to ThrottleGroupMember so we can -synchronously wait for restart coroutines to complete. Once they are -done it is safe to unregister the ThrottleGroupMember. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Alberto Garcia -Message-id: 20190114133257.30299-2-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit bc19a0a6e4505390f99d3c593ebaf11b7962cc59) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - block/throttle-groups.c | 9 +++++++++ - include/block/throttle-groups.h | 5 +++++ - 2 files changed, 14 insertions(+) - -diff --git a/block/throttle-groups.c b/block/throttle-groups.c -index 5d8213a..a5a2037 100644 ---- a/block/throttle-groups.c -+++ b/block/throttle-groups.c -@@ -415,6 +415,9 @@ static void coroutine_fn throttle_group_restart_queue_entry(void *opaque) - } - - g_free(data); -+ -+ atomic_dec(&tgm->restart_pending); -+ aio_wait_kick(); - } - - static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write) -@@ -430,6 +433,8 @@ static void throttle_group_restart_queue(ThrottleGroupMember *tgm, bool is_write - * be no timer pending on this tgm at this point */ - assert(!timer_pending(tgm->throttle_timers.timers[is_write])); - -+ atomic_inc(&tgm->restart_pending); -+ - co = qemu_coroutine_create(throttle_group_restart_queue_entry, rd); - aio_co_enter(tgm->aio_context, co); - } -@@ -538,6 +543,7 @@ void throttle_group_register_tgm(ThrottleGroupMember *tgm, - - tgm->throttle_state = ts; - tgm->aio_context = ctx; -+ atomic_set(&tgm->restart_pending, 0); - - qemu_mutex_lock(&tg->lock); - /* If the ThrottleGroup is new set this ThrottleGroupMember as the token */ -@@ -584,6 +590,9 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm) - return; - } - -+ /* Wait for throttle_group_restart_queue_entry() coroutines to finish */ -+ AIO_WAIT_WHILE(tgm->aio_context, atomic_read(&tgm->restart_pending) > 0); -+ - qemu_mutex_lock(&tg->lock); - for (i = 0; i < 2; i++) { - assert(tgm->pending_reqs[i] == 0); -diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h -index e2fd051..712a8e6 100644 ---- a/include/block/throttle-groups.h -+++ b/include/block/throttle-groups.h -@@ -43,6 +43,11 @@ typedef struct ThrottleGroupMember { - */ - unsigned int io_limits_disabled; - -+ /* Number of pending throttle_group_restart_queue_entry() coroutines. -+ * Accessed with atomic operations. -+ */ -+ unsigned int restart_pending; -+ - /* The following fields are protected by the ThrottleGroup lock. - * See the ThrottleGroup documentation for details. - * throttle_state tells us if I/O limits are configured. */ --- -1.8.3.1 - diff --git a/kvm-virtio-Helper-for-registering-virtio-device-types.patch b/kvm-virtio-Helper-for-registering-virtio-device-types.patch deleted file mode 100644 index c0dae87..0000000 --- a/kvm-virtio-Helper-for-registering-virtio-device-types.patch +++ /dev/null @@ -1,553 +0,0 @@ -From 8a7371fb25d2072d60b029eb092a083cfa064afb Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 16 Jan 2019 23:18:16 +0000 -Subject: [PATCH 1/7] virtio: Helper for registering virtio device types - -RH-Author: Eduardo Habkost -Message-id: <20190116231819.29310-2-ehabkost@redhat.com> -Patchwork-id: 84027 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/4] virtio: Helper for registering virtio device types -Bugzilla: 1648023 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier -RH-Acked-by: Michael S. Tsirkin - -Introduce a helper for registering different flavours of virtio -devices. Convert code to use the helper, but keep only the -existing generic types. Transitional and non-transitional device -types will be added by another patch. - -Backport conflicts: - hw/virtio/virtio-pci.c: because our downstream tree doesn't - have commit e6a74868d92f ("build-sys: add --disable-vhost-user") - -Acked-by: Andrea Bolognani -Reviewed-by: Cornelia Huck -Signed-off-by: Eduardo Habkost -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit a4ee4c8baa37154f42b4dc6a13fee79268d15238) -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - hw/display/virtio-gpu-pci.c | 7 +- - hw/display/virtio-vga.c | 7 +- - hw/virtio/virtio-crypto-pci.c | 7 +- - hw/virtio/virtio-pci.c | 231 ++++++++++++++++++++++++++++++------------ - hw/virtio/virtio-pci.h | 54 ++++++++++ - 5 files changed, 228 insertions(+), 78 deletions(-) - -diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c -index cece4aa..faf76a8 100644 ---- a/hw/display/virtio-gpu-pci.c -+++ b/hw/display/virtio-gpu-pci.c -@@ -69,9 +69,8 @@ static void virtio_gpu_initfn(Object *obj) - TYPE_VIRTIO_GPU); - } - --static const TypeInfo virtio_gpu_pci_info = { -- .name = TYPE_VIRTIO_GPU_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = { -+ .generic_name = TYPE_VIRTIO_GPU_PCI, - .instance_size = sizeof(VirtIOGPUPCI), - .instance_init = virtio_gpu_initfn, - .class_init = virtio_gpu_pci_class_init, -@@ -79,6 +78,6 @@ static const TypeInfo virtio_gpu_pci_info = { - - static void virtio_gpu_pci_register_types(void) - { -- type_register_static(&virtio_gpu_pci_info); -+ virtio_pci_types_register(&virtio_gpu_pci_info); - } - type_init(virtio_gpu_pci_register_types) -diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c -index ab2e369..8db4d91 100644 ---- a/hw/display/virtio-vga.c -+++ b/hw/display/virtio-vga.c -@@ -207,9 +207,8 @@ static void virtio_vga_inst_initfn(Object *obj) - TYPE_VIRTIO_GPU); - } - --static TypeInfo virtio_vga_info = { -- .name = TYPE_VIRTIO_VGA, -- .parent = TYPE_VIRTIO_PCI, -+static VirtioPCIDeviceTypeInfo virtio_vga_info = { -+ .generic_name = TYPE_VIRTIO_VGA, - .instance_size = sizeof(struct VirtIOVGA), - .instance_init = virtio_vga_inst_initfn, - .class_init = virtio_vga_class_init, -@@ -217,7 +216,7 @@ static TypeInfo virtio_vga_info = { - - static void virtio_vga_register_types(void) - { -- type_register_static(&virtio_vga_info); -+ virtio_pci_types_register(&virtio_vga_info); - } - - type_init(virtio_vga_register_types) -diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c -index bf64996..8cc3fa3 100644 ---- a/hw/virtio/virtio-crypto-pci.c -+++ b/hw/virtio/virtio-crypto-pci.c -@@ -64,9 +64,8 @@ static void virtio_crypto_initfn(Object *obj) - TYPE_VIRTIO_CRYPTO); - } - --static const TypeInfo virtio_crypto_pci_info = { -- .name = TYPE_VIRTIO_CRYPTO_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_crypto_pci_info = { -+ .generic_name = TYPE_VIRTIO_CRYPTO_PCI, - .instance_size = sizeof(VirtIOCryptoPCI), - .instance_init = virtio_crypto_initfn, - .class_init = virtio_crypto_pci_class_init, -@@ -74,6 +73,6 @@ static const TypeInfo virtio_crypto_pci_info = { - - static void virtio_crypto_pci_register_types(void) - { -- type_register_static(&virtio_crypto_pci_info); -+ virtio_pci_types_register(&virtio_crypto_pci_info); - } - type_init(virtio_crypto_pci_register_types) -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index 9a987cb..bee3509 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -1119,9 +1119,8 @@ static void virtio_9p_pci_instance_init(Object *obj) - TYPE_VIRTIO_9P); - } - --static const TypeInfo virtio_9p_pci_info = { -- .name = TYPE_VIRTIO_9P_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = { -+ .generic_name = TYPE_VIRTIO_9P_PCI, - .instance_size = sizeof(V9fsPCIState), - .instance_init = virtio_9p_pci_instance_init, - .class_init = virtio_9p_pci_class_init, -@@ -1877,9 +1876,6 @@ static void virtio_pci_reset(DeviceState *qdev) - static Property virtio_pci_properties[] = { - DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), -- DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, -- ON_OFF_AUTO_AUTO), -- DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), - DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), - DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, -@@ -1939,13 +1935,123 @@ static const TypeInfo virtio_pci_info = { - .class_init = virtio_pci_class_init, - .class_size = sizeof(VirtioPCIClass), - .abstract = true, -- .interfaces = (InterfaceInfo[]) { -- { INTERFACE_PCIE_DEVICE }, -- { INTERFACE_CONVENTIONAL_PCI_DEVICE }, -- { } -- }, - }; - -+static Property virtio_pci_generic_properties[] = { -+ DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, -+ ON_OFF_AUTO_AUTO), -+ DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ -+static void virtio_pci_base_class_init(ObjectClass *klass, void *data) -+{ -+ const VirtioPCIDeviceTypeInfo *t = data; -+ if (t->class_init) { -+ t->class_init(klass, NULL); -+ } -+} -+ -+static void virtio_pci_generic_class_init(ObjectClass *klass, void *data) -+{ -+ DeviceClass *dc = DEVICE_CLASS(klass); -+ -+ dc->props = virtio_pci_generic_properties; -+} -+ -+/* Used when the generic type and the base type is the same */ -+static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data) -+{ -+ virtio_pci_base_class_init(klass, data); -+ virtio_pci_generic_class_init(klass, NULL); -+} -+ -+static void virtio_pci_transitional_instance_init(Object *obj) -+{ -+ VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); -+ -+ proxy->disable_legacy = ON_OFF_AUTO_OFF; -+ proxy->disable_modern = false; -+} -+ -+static void virtio_pci_non_transitional_instance_init(Object *obj) -+{ -+ VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); -+ -+ proxy->disable_legacy = ON_OFF_AUTO_ON; -+ proxy->disable_modern = false; -+} -+ -+void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) -+{ -+ TypeInfo base_type_info = { -+ .name = t->base_name, -+ .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI, -+ .instance_size = t->instance_size, -+ .instance_init = t->instance_init, -+ .class_init = virtio_pci_base_class_init, -+ .class_data = (void *)t, -+ .abstract = true, -+ }; -+ TypeInfo generic_type_info = { -+ .name = t->generic_name, -+ .parent = base_type_info.name, -+ .class_init = virtio_pci_generic_class_init, -+ .interfaces = (InterfaceInfo[]) { -+ { INTERFACE_PCIE_DEVICE }, -+ { INTERFACE_CONVENTIONAL_PCI_DEVICE }, -+ { } -+ }, -+ }; -+ -+ if (!base_type_info.name) { -+ /* No base type -> register a single generic device type */ -+ base_type_info.name = t->generic_name; -+ base_type_info.class_init = virtio_pci_generic_base_class_init; -+ base_type_info.interfaces = generic_type_info.interfaces; -+ base_type_info.abstract = false; -+ generic_type_info.name = NULL; -+ assert(!t->non_transitional_name); -+ assert(!t->transitional_name); -+ } -+ -+ type_register(&base_type_info); -+ if (generic_type_info.name) { -+ type_register(&generic_type_info); -+ } -+ -+ if (t->non_transitional_name) { -+ const TypeInfo non_transitional_type_info = { -+ .name = t->non_transitional_name, -+ .parent = base_type_info.name, -+ .instance_init = virtio_pci_non_transitional_instance_init, -+ .interfaces = (InterfaceInfo[]) { -+ { INTERFACE_PCIE_DEVICE }, -+ { INTERFACE_CONVENTIONAL_PCI_DEVICE }, -+ { } -+ }, -+ }; -+ type_register(&non_transitional_type_info); -+ } -+ -+ if (t->transitional_name) { -+ const TypeInfo transitional_type_info = { -+ .name = t->transitional_name, -+ .parent = base_type_info.name, -+ .instance_init = virtio_pci_transitional_instance_init, -+ .interfaces = (InterfaceInfo[]) { -+ /* -+ * Transitional virtio devices work only as Conventional PCI -+ * devices because they require PIO ports. -+ */ -+ { INTERFACE_CONVENTIONAL_PCI_DEVICE }, -+ { } -+ }, -+ }; -+ type_register(&transitional_type_info); -+ } -+} -+ - /* virtio-blk-pci */ - - static Property virtio_blk_pci_properties[] = { -@@ -1995,9 +2101,8 @@ static void virtio_blk_pci_instance_init(Object *obj) - "bootindex", &error_abort); - } - --static const TypeInfo virtio_blk_pci_info = { -- .name = TYPE_VIRTIO_BLK_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = { -+ .generic_name = TYPE_VIRTIO_BLK_PCI, - .instance_size = sizeof(VirtIOBlkPCI), - .instance_init = virtio_blk_pci_instance_init, - .class_init = virtio_blk_pci_class_init, -@@ -2051,9 +2156,8 @@ static void vhost_user_blk_pci_instance_init(Object *obj) - "bootindex", &error_abort); - } - --static const TypeInfo vhost_user_blk_pci_info = { -- .name = TYPE_VHOST_USER_BLK_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = { -+ .generic_name = TYPE_VHOST_USER_BLK_PCI, - .instance_size = sizeof(VHostUserBlkPCI), - .instance_init = vhost_user_blk_pci_instance_init, - .class_init = vhost_user_blk_pci_class_init, -@@ -2119,9 +2223,8 @@ static void virtio_scsi_pci_instance_init(Object *obj) - TYPE_VIRTIO_SCSI); - } - --static const TypeInfo virtio_scsi_pci_info = { -- .name = TYPE_VIRTIO_SCSI_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = { -+ .generic_name = TYPE_VIRTIO_SCSI_PCI, - .instance_size = sizeof(VirtIOSCSIPCI), - .instance_init = virtio_scsi_pci_instance_init, - .class_init = virtio_scsi_pci_class_init, -@@ -2174,9 +2277,8 @@ static void vhost_scsi_pci_instance_init(Object *obj) - "bootindex", &error_abort); - } - --static const TypeInfo vhost_scsi_pci_info = { -- .name = TYPE_VHOST_SCSI_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = { -+ .generic_name = TYPE_VHOST_SCSI_PCI, - .instance_size = sizeof(VHostSCSIPCI), - .instance_init = vhost_scsi_pci_instance_init, - .class_init = vhost_scsi_pci_class_init, -@@ -2229,9 +2331,8 @@ static void vhost_user_scsi_pci_instance_init(Object *obj) - "bootindex", &error_abort); - } - --static const TypeInfo vhost_user_scsi_pci_info = { -- .name = TYPE_VHOST_USER_SCSI_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = { -+ .generic_name = TYPE_VHOST_USER_SCSI_PCI, - .instance_size = sizeof(VHostUserSCSIPCI), - .instance_init = vhost_user_scsi_pci_instance_init, - .class_init = vhost_user_scsi_pci_class_init, -@@ -2277,9 +2378,8 @@ static void vhost_vsock_pci_instance_init(Object *obj) - TYPE_VHOST_VSOCK); - } - --static const TypeInfo vhost_vsock_pci_info = { -- .name = TYPE_VHOST_VSOCK_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { -+ .generic_name = TYPE_VHOST_VSOCK_PCI, - .instance_size = sizeof(VHostVSockPCI), - .instance_init = vhost_vsock_pci_instance_init, - .class_init = vhost_vsock_pci_class_init, -@@ -2334,9 +2434,8 @@ static void virtio_balloon_pci_instance_init(Object *obj) - "guest-stats-polling-interval", &error_abort); - } - --static const TypeInfo virtio_balloon_pci_info = { -- .name = TYPE_VIRTIO_BALLOON_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = { -+ .generic_name = TYPE_VIRTIO_BALLOON_PCI, - .instance_size = sizeof(VirtIOBalloonPCI), - .instance_init = virtio_balloon_pci_instance_init, - .class_init = virtio_balloon_pci_class_init, -@@ -2407,9 +2506,8 @@ static void virtio_serial_pci_instance_init(Object *obj) - TYPE_VIRTIO_SERIAL); - } - --static const TypeInfo virtio_serial_pci_info = { -- .name = TYPE_VIRTIO_SERIAL_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = { -+ .generic_name = TYPE_VIRTIO_SERIAL_PCI, - .instance_size = sizeof(VirtIOSerialPCI), - .instance_init = virtio_serial_pci_instance_init, - .class_init = virtio_serial_pci_class_init, -@@ -2462,9 +2560,8 @@ static void virtio_net_pci_instance_init(Object *obj) - "bootindex", &error_abort); - } - --static const TypeInfo virtio_net_pci_info = { -- .name = TYPE_VIRTIO_NET_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = { -+ .generic_name = TYPE_VIRTIO_NET_PCI, - .instance_size = sizeof(VirtIONetPCI), - .instance_init = virtio_net_pci_instance_init, - .class_init = virtio_net_pci_class_init, -@@ -2513,9 +2610,8 @@ static void virtio_rng_initfn(Object *obj) - TYPE_VIRTIO_RNG); - } - --static const TypeInfo virtio_rng_pci_info = { -- .name = TYPE_VIRTIO_RNG_PCI, -- .parent = TYPE_VIRTIO_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = { -+ .generic_name = TYPE_VIRTIO_RNG_PCI, - .instance_size = sizeof(VirtIORngPCI), - .instance_init = virtio_rng_initfn, - .class_init = virtio_rng_pci_class_init, -@@ -2605,24 +2701,24 @@ static const TypeInfo virtio_input_hid_pci_info = { - .abstract = true, - }; - --static const TypeInfo virtio_keyboard_pci_info = { -- .name = TYPE_VIRTIO_KEYBOARD_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_keyboard_pci_info = { -+ .generic_name = TYPE_VIRTIO_KEYBOARD_PCI, - .parent = TYPE_VIRTIO_INPUT_HID_PCI, - .class_init = virtio_input_hid_kbd_pci_class_init, - .instance_size = sizeof(VirtIOInputHIDPCI), - .instance_init = virtio_keyboard_initfn, - }; - --static const TypeInfo virtio_mouse_pci_info = { -- .name = TYPE_VIRTIO_MOUSE_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_mouse_pci_info = { -+ .generic_name = TYPE_VIRTIO_MOUSE_PCI, - .parent = TYPE_VIRTIO_INPUT_HID_PCI, - .class_init = virtio_input_hid_mouse_pci_class_init, - .instance_size = sizeof(VirtIOInputHIDPCI), - .instance_init = virtio_mouse_initfn, - }; - --static const TypeInfo virtio_tablet_pci_info = { -- .name = TYPE_VIRTIO_TABLET_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_tablet_pci_info = { -+ .generic_name = TYPE_VIRTIO_TABLET_PCI, - .parent = TYPE_VIRTIO_INPUT_HID_PCI, - .instance_size = sizeof(VirtIOInputHIDPCI), - .instance_init = virtio_tablet_initfn, -@@ -2637,8 +2733,8 @@ static void virtio_host_initfn(Object *obj) - TYPE_VIRTIO_INPUT_HOST); - } - --static const TypeInfo virtio_host_pci_info = { -- .name = TYPE_VIRTIO_INPUT_HOST_PCI, -+static const VirtioPCIDeviceTypeInfo virtio_host_pci_info = { -+ .generic_name = TYPE_VIRTIO_INPUT_HOST_PCI, - .parent = TYPE_VIRTIO_INPUT_PCI, - .instance_size = sizeof(VirtIOInputHostPCI), - .instance_init = virtio_host_initfn, -@@ -2692,36 +2788,39 @@ static const TypeInfo virtio_pci_bus_info = { - - static void virtio_pci_register_types(void) - { -- type_register_static(&virtio_rng_pci_info); -+ /* Base types: */ -+ type_register_static(&virtio_pci_bus_info); -+ type_register_static(&virtio_pci_info); - type_register_static(&virtio_input_pci_info); - type_register_static(&virtio_input_hid_pci_info); -- type_register_static(&virtio_keyboard_pci_info); -- type_register_static(&virtio_mouse_pci_info); -- type_register_static(&virtio_tablet_pci_info); -+ -+ /* Implementations: */ -+ virtio_pci_types_register(&virtio_rng_pci_info); -+ virtio_pci_types_register(&virtio_keyboard_pci_info); -+ virtio_pci_types_register(&virtio_mouse_pci_info); -+ virtio_pci_types_register(&virtio_tablet_pci_info); - #ifdef CONFIG_LINUX -- type_register_static(&virtio_host_pci_info); -+ virtio_pci_types_register(&virtio_host_pci_info); - #endif -- type_register_static(&virtio_pci_bus_info); -- type_register_static(&virtio_pci_info); - #ifdef CONFIG_VIRTFS -- type_register_static(&virtio_9p_pci_info); -+ virtio_pci_types_register(&virtio_9p_pci_info); - #endif -- type_register_static(&virtio_blk_pci_info); -+ virtio_pci_types_register(&virtio_blk_pci_info); - #if defined(CONFIG_VHOST_USER_BLK) -- type_register_static(&vhost_user_blk_pci_info); -+ virtio_pci_types_register(&vhost_user_blk_pci_info); - #endif -- type_register_static(&virtio_scsi_pci_info); -- type_register_static(&virtio_balloon_pci_info); -- type_register_static(&virtio_serial_pci_info); -- type_register_static(&virtio_net_pci_info); -+ virtio_pci_types_register(&virtio_scsi_pci_info); -+ virtio_pci_types_register(&virtio_balloon_pci_info); -+ virtio_pci_types_register(&virtio_serial_pci_info); -+ virtio_pci_types_register(&virtio_net_pci_info); - #ifdef CONFIG_VHOST_SCSI -- type_register_static(&vhost_scsi_pci_info); -+ virtio_pci_types_register(&vhost_scsi_pci_info); - #endif - #if defined(CONFIG_VHOST_USER_SCSI) -- type_register_static(&vhost_user_scsi_pci_info); -+ virtio_pci_types_register(&vhost_user_scsi_pci_info); - #endif - #ifdef CONFIG_VHOST_VSOCK -- type_register_static(&vhost_vsock_pci_info); -+ virtio_pci_types_register(&vhost_vsock_pci_info); - #endif - } - -diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h -index 813082b..8cd5466 100644 ---- a/hw/virtio/virtio-pci.h -+++ b/hw/virtio/virtio-pci.h -@@ -417,4 +417,58 @@ struct VirtIOCryptoPCI { - /* Virtio ABI version, if we increment this, we break the guest driver. */ - #define VIRTIO_PCI_ABI_VERSION 0 - -+/* Input for virtio_pci_types_register() */ -+typedef struct VirtioPCIDeviceTypeInfo { -+ /* -+ * Common base class for the subclasses below. -+ * -+ * Required only if transitional_name or non_transitional_name is set. -+ * -+ * We need a separate base type instead of making all types -+ * inherit from generic_name for two reasons: -+ * 1) generic_name implements INTERFACE_PCIE_DEVICE, but -+ * transitional_name does not. -+ * 2) generic_name has the "disable-legacy" and "disable-modern" -+ * properties, transitional_name and non_transitional name don't. -+ */ -+ const char *base_name; -+ /* -+ * Generic device type. Optional. -+ * -+ * Supports both transitional and non-transitional modes, -+ * using the disable-legacy and disable-modern properties. -+ * If disable-legacy=auto, (non-)transitional mode is selected -+ * depending on the bus where the device is plugged. -+ * -+ * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE, -+ * but PCI Express is supported only in non-transitional mode. -+ * -+ * The only type implemented by QEMU 3.1 and older. -+ */ -+ const char *generic_name; -+ /* -+ * The transitional device type. Optional. -+ * -+ * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE. -+ */ -+ const char *transitional_name; -+ /* -+ * The non-transitional device type. Optional. -+ * -+ * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only. -+ */ -+ const char *non_transitional_name; -+ -+ /* Parent type. If NULL, TYPE_VIRTIO_PCI is used */ -+ const char *parent; -+ -+ /* Same as TypeInfo fields: */ -+ size_t instance_size; -+ void (*instance_init)(Object *obj); -+ void (*class_init)(ObjectClass *klass, void *data); -+} VirtioPCIDeviceTypeInfo; -+ -+/* Register virtio-pci type(s). @t must be static. */ -+void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); -+ - #endif --- -1.8.3.1 - diff --git a/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch b/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch deleted file mode 100644 index c17fc0f..0000000 --- a/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch +++ /dev/null @@ -1,73 +0,0 @@ -From cbac773c0218dc23f2c434f2d04e3e3cc1d081a9 Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 16 Jan 2019 23:18:19 +0000 -Subject: [PATCH 4/7] virtio: Make disable-legacy/disable-modern compat - properties optional - -RH-Author: Eduardo Habkost -Message-id: <20190116231819.29310-5-ehabkost@redhat.com> -Patchwork-id: 84030 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 4/4] virtio: Make disable-legacy/disable-modern compat properties optional -Bugzilla: 1648023 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier -RH-Acked-by: Michael S. Tsirkin - -Upstream tree: git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git -Upstream commit: 53921bfdce3f8fffcc22338633855247fb7b7a74 - -The disable-legacy and disable-modern properties apply only to -some virtio-pci devices. Make those properties optional. - -This fixes the crash introduced by commit f6e501a28ef9 ("virtio: Provide -version-specific variants of virtio PCI devices"): - - $ qemu-system-x86_64 -machine pc-i440fx-2.6 \ - -device virtio-net-pci-non-transitional - Unexpected error in object_property_find() at qom/object.c:1092: - qemu-system-x86_64: -device virtio-net-pci-non-transitional: can't apply \ - global virtio-pci.disable-modern=on: Property '.disable-modern' not found - Aborted (core dumped) - -Backport notes: - HW_COMPAT_RHEL7_2 (include/hw/compat.h) being patched instead - of hw_compat_2_6 (hw/core/machine.c). Our downstream tree - doesn't have the equivalent of commit ff8f261f113b ("compat: - replace PC_COMPAT_2_6 & HW_COMPAT_2_6 macros") - -Reported-by: Thomas Huth -Fixes: f6e501a28ef9 ("virtio: Provide version-specific variants of virtio PCI devices") -Signed-off-by: Eduardo Habkost -Reviewed-by: Cornelia Huck -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1b0ba10643110f9590c0da0167d0cb1db48ca2e4) -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/compat.h | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/include/hw/compat.h b/include/hw/compat.h -index 40db243..aeb2f73 100644 ---- a/include/hw/compat.h -+++ b/include/hw/compat.h -@@ -380,10 +380,14 @@ - .driver = "virtio-pci",\ - .property = "disable-modern",\ - .value = "on",\ -+ /* Optional because not all virtio-pci devices support legacy mode */ \ -+ .optional = true,\ - },{ /* HW_COMPAT_RHEL7_2 */ \ - .driver = "virtio-pci",\ - .property = "disable-legacy",\ - .value = "off",\ -+ /* Optional because not all virtio-pci devices support legacy mode */ \ -+ .optional = true,\ - },{ /* HW_COMPAT_RHEL7_2 */ \ - .driver = TYPE_PCI_DEVICE,\ - .property = "x-pcie-lnksta-dllla",\ --- -1.8.3.1 - diff --git a/kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch b/kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch deleted file mode 100644 index c093144..0000000 --- a/kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch +++ /dev/null @@ -1,519 +0,0 @@ -From 14004eb28d1e573365ed21ed09afad85d239c769 Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 16 Jan 2019 23:18:17 +0000 -Subject: [PATCH 2/7] virtio: Provide version-specific variants of virtio PCI - devices - -RH-Author: Eduardo Habkost -Message-id: <20190116231819.29310-3-ehabkost@redhat.com> -Patchwork-id: 84028 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 2/4] virtio: Provide version-specific variants of virtio PCI devices -Bugzilla: 1648023 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier -RH-Acked-by: Michael S. Tsirkin - -Many of the current virtio-*-pci device types actually represent -3 different types of devices: -* virtio 1.0 non-transitional devices -* virtio 1.0 transitional devices -* virtio 0.9 ("legacy device" in virtio 1.0 terminology) - -That would be just an annoyance if it didn't break our device/bus -compatibility QMP interfaces. With these multi-purpose device -types, there's no way to tell management software that -transitional devices and legacy devices require a Conventional -PCI bus. - -The multi-purpose device types would also prevent us from telling -management software what's the PCI vendor/device ID for them, -because their PCI IDs change at runtime depending on the bus -where they were plugged. - -This patch adds separate device types for each of those virtio -device flavors: - -- virtio-*-pci: the existing multi-purpose device types - - Configurable using `disable-legacy` and `disable-modern` - properties - - Legacy driver support is automatically enabled/disabled - depending on the bus where it is plugged - - Supports Conventional PCI and PCI Express buses - (but Conventional PCI is incompatible with - disable-legacy=off) - - Changes PCI vendor/device IDs at runtime -- virtio-*-pci-transitional: virtio-1.0 device supporting legacy drivers - - Supports Conventional PCI buses only, because - it has a PIO BAR -- virtio-*-pci-non-transitional: modern-only - - Supports both Conventional PCI and PCI Express buses - -The existing TYPE_* macros for these types will point to an -abstract base type, so existing casts in the code will keep -working for all variants. - -A simple test script (tests/acceptance/virtio_version.py) is -included, to check if the new device types are equivalent to -using the `disable-legacy` and `disable-modern` options. - -Acked-by: Andrea Bolognani -Reviewed-by: Cornelia Huck -Signed-off-by: Eduardo Habkost -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f6e501a28ef9b69f6df6252160aa87876cc92a1a) -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio-pci.c | 60 ++++++++++--- - hw/virtio/virtio-pci.h | 24 ++--- - tests/acceptance/virtio_version.py | 176 +++++++++++++++++++++++++++++++++++++ - 3 files changed, 236 insertions(+), 24 deletions(-) - create mode 100644 tests/acceptance/virtio_version.py - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index bee3509..68cffcd 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -1120,7 +1120,10 @@ static void virtio_9p_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = { -- .generic_name = TYPE_VIRTIO_9P_PCI, -+ .base_name = TYPE_VIRTIO_9P_PCI, -+ .generic_name = "virtio-9p-pci", -+ .transitional_name = "virtio-9p-pci-transitional", -+ .non_transitional_name = "virtio-9p-pci-non-transitional", - .instance_size = sizeof(V9fsPCIState), - .instance_init = virtio_9p_pci_instance_init, - .class_init = virtio_9p_pci_class_init, -@@ -2102,7 +2105,10 @@ static void virtio_blk_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = { -- .generic_name = TYPE_VIRTIO_BLK_PCI, -+ .base_name = TYPE_VIRTIO_BLK_PCI, -+ .generic_name = "virtio-blk-pci", -+ .transitional_name = "virtio-blk-pci-transitional", -+ .non_transitional_name = "virtio-blk-pci-non-transitional", - .instance_size = sizeof(VirtIOBlkPCI), - .instance_init = virtio_blk_pci_instance_init, - .class_init = virtio_blk_pci_class_init, -@@ -2157,7 +2163,10 @@ static void vhost_user_blk_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = { -- .generic_name = TYPE_VHOST_USER_BLK_PCI, -+ .base_name = TYPE_VHOST_USER_BLK_PCI, -+ .generic_name = "vhost-user-blk-pci", -+ .transitional_name = "vhost-user-blk-pci-transitional", -+ .non_transitional_name = "vhost-user-blk-pci-non-transitional", - .instance_size = sizeof(VHostUserBlkPCI), - .instance_init = vhost_user_blk_pci_instance_init, - .class_init = vhost_user_blk_pci_class_init, -@@ -2224,7 +2233,10 @@ static void virtio_scsi_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = { -- .generic_name = TYPE_VIRTIO_SCSI_PCI, -+ .base_name = TYPE_VIRTIO_SCSI_PCI, -+ .generic_name = "virtio-scsi-pci", -+ .transitional_name = "virtio-scsi-pci-transitional", -+ .non_transitional_name = "virtio-scsi-pci-non-transitional", - .instance_size = sizeof(VirtIOSCSIPCI), - .instance_init = virtio_scsi_pci_instance_init, - .class_init = virtio_scsi_pci_class_init, -@@ -2278,7 +2290,10 @@ static void vhost_scsi_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = { -- .generic_name = TYPE_VHOST_SCSI_PCI, -+ .base_name = TYPE_VHOST_SCSI_PCI, -+ .generic_name = "vhost-scsi-pci", -+ .transitional_name = "vhost-scsi-pci-transitional", -+ .non_transitional_name = "vhost-scsi-pci-non-transitional", - .instance_size = sizeof(VHostSCSIPCI), - .instance_init = vhost_scsi_pci_instance_init, - .class_init = vhost_scsi_pci_class_init, -@@ -2332,7 +2347,10 @@ static void vhost_user_scsi_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = { -- .generic_name = TYPE_VHOST_USER_SCSI_PCI, -+ .base_name = TYPE_VHOST_USER_SCSI_PCI, -+ .generic_name = "vhost-user-scsi-pci", -+ .transitional_name = "vhost-user-scsi-pci-transitional", -+ .non_transitional_name = "vhost-user-scsi-pci-non-transitional", - .instance_size = sizeof(VHostUserSCSIPCI), - .instance_init = vhost_user_scsi_pci_instance_init, - .class_init = vhost_user_scsi_pci_class_init, -@@ -2379,7 +2397,10 @@ static void vhost_vsock_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { -- .generic_name = TYPE_VHOST_VSOCK_PCI, -+ .base_name = TYPE_VHOST_VSOCK_PCI, -+ .generic_name = "vhost-vsock-pci", -+ .transitional_name = "vhost-vsock-pci-transitional", -+ .non_transitional_name = "vhost-vsock-pci-non-transitional", - .instance_size = sizeof(VHostVSockPCI), - .instance_init = vhost_vsock_pci_instance_init, - .class_init = vhost_vsock_pci_class_init, -@@ -2435,7 +2456,10 @@ static void virtio_balloon_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = { -- .generic_name = TYPE_VIRTIO_BALLOON_PCI, -+ .base_name = TYPE_VIRTIO_BALLOON_PCI, -+ .generic_name = "virtio-balloon-pci", -+ .transitional_name = "virtio-balloon-pci-transitional", -+ .non_transitional_name = "virtio-balloon-pci-non-transitional", - .instance_size = sizeof(VirtIOBalloonPCI), - .instance_init = virtio_balloon_pci_instance_init, - .class_init = virtio_balloon_pci_class_init, -@@ -2507,7 +2531,10 @@ static void virtio_serial_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = { -- .generic_name = TYPE_VIRTIO_SERIAL_PCI, -+ .base_name = TYPE_VIRTIO_SERIAL_PCI, -+ .generic_name = "virtio-serial-pci", -+ .transitional_name = "virtio-serial-pci-transitional", -+ .non_transitional_name = "virtio-serial-pci-non-transitional", - .instance_size = sizeof(VirtIOSerialPCI), - .instance_init = virtio_serial_pci_instance_init, - .class_init = virtio_serial_pci_class_init, -@@ -2561,7 +2588,10 @@ static void virtio_net_pci_instance_init(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = { -- .generic_name = TYPE_VIRTIO_NET_PCI, -+ .base_name = TYPE_VIRTIO_NET_PCI, -+ .generic_name = "virtio-net-pci", -+ .transitional_name = "virtio-net-pci-transitional", -+ .non_transitional_name = "virtio-net-pci-non-transitional", - .instance_size = sizeof(VirtIONetPCI), - .instance_init = virtio_net_pci_instance_init, - .class_init = virtio_net_pci_class_init, -@@ -2611,7 +2641,10 @@ static void virtio_rng_initfn(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = { -- .generic_name = TYPE_VIRTIO_RNG_PCI, -+ .base_name = TYPE_VIRTIO_RNG_PCI, -+ .generic_name = "virtio-rng-pci", -+ .transitional_name = "virtio-rng-pci-transitional", -+ .non_transitional_name = "virtio-rng-pci-non-transitional", - .instance_size = sizeof(VirtIORngPCI), - .instance_init = virtio_rng_initfn, - .class_init = virtio_rng_pci_class_init, -@@ -2734,7 +2767,10 @@ static void virtio_host_initfn(Object *obj) - } - - static const VirtioPCIDeviceTypeInfo virtio_host_pci_info = { -- .generic_name = TYPE_VIRTIO_INPUT_HOST_PCI, -+ .base_name = TYPE_VIRTIO_INPUT_HOST_PCI, -+ .generic_name = "virtio-input-host-pci", -+ .transitional_name = "virtio-input-host-pci-transitional", -+ .non_transitional_name = "virtio-input-host-pci-non-transitional", - .parent = TYPE_VIRTIO_INPUT_PCI, - .instance_size = sizeof(VirtIOInputHostPCI), - .instance_init = virtio_host_initfn, -diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h -index 8cd5466..29b4216 100644 ---- a/hw/virtio/virtio-pci.h -+++ b/hw/virtio/virtio-pci.h -@@ -216,7 +216,7 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) - /* - * virtio-scsi-pci: This extends VirtioPCIProxy. - */ --#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci" -+#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci-base" - #define VIRTIO_SCSI_PCI(obj) \ - OBJECT_CHECK(VirtIOSCSIPCI, (obj), TYPE_VIRTIO_SCSI_PCI) - -@@ -229,7 +229,7 @@ struct VirtIOSCSIPCI { - /* - * vhost-scsi-pci: This extends VirtioPCIProxy. - */ --#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci" -+#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci-base" - #define VHOST_SCSI_PCI(obj) \ - OBJECT_CHECK(VHostSCSIPCI, (obj), TYPE_VHOST_SCSI_PCI) - -@@ -239,7 +239,7 @@ struct VHostSCSIPCI { - }; - #endif - --#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci" -+#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci-base" - #define VHOST_USER_SCSI_PCI(obj) \ - OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI) - -@@ -252,7 +252,7 @@ struct VHostUserSCSIPCI { - /* - * vhost-user-blk-pci: This extends VirtioPCIProxy. - */ --#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci" -+#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci-base" - #define VHOST_USER_BLK_PCI(obj) \ - OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI) - -@@ -265,7 +265,7 @@ struct VHostUserBlkPCI { - /* - * virtio-blk-pci: This extends VirtioPCIProxy. - */ --#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci" -+#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci-base" - #define VIRTIO_BLK_PCI(obj) \ - OBJECT_CHECK(VirtIOBlkPCI, (obj), TYPE_VIRTIO_BLK_PCI) - -@@ -277,7 +277,7 @@ struct VirtIOBlkPCI { - /* - * virtio-balloon-pci: This extends VirtioPCIProxy. - */ --#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci" -+#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci-base" - #define VIRTIO_BALLOON_PCI(obj) \ - OBJECT_CHECK(VirtIOBalloonPCI, (obj), TYPE_VIRTIO_BALLOON_PCI) - -@@ -289,7 +289,7 @@ struct VirtIOBalloonPCI { - /* - * virtio-serial-pci: This extends VirtioPCIProxy. - */ --#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci" -+#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci-base" - #define VIRTIO_SERIAL_PCI(obj) \ - OBJECT_CHECK(VirtIOSerialPCI, (obj), TYPE_VIRTIO_SERIAL_PCI) - -@@ -301,7 +301,7 @@ struct VirtIOSerialPCI { - /* - * virtio-net-pci: This extends VirtioPCIProxy. - */ --#define TYPE_VIRTIO_NET_PCI "virtio-net-pci" -+#define TYPE_VIRTIO_NET_PCI "virtio-net-pci-base" - #define VIRTIO_NET_PCI(obj) \ - OBJECT_CHECK(VirtIONetPCI, (obj), TYPE_VIRTIO_NET_PCI) - -@@ -316,7 +316,7 @@ struct VirtIONetPCI { - - #ifdef CONFIG_VIRTFS - --#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci" -+#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci-base" - #define VIRTIO_9P_PCI(obj) \ - OBJECT_CHECK(V9fsPCIState, (obj), TYPE_VIRTIO_9P_PCI) - -@@ -330,7 +330,7 @@ typedef struct V9fsPCIState { - /* - * virtio-rng-pci: This extends VirtioPCIProxy. - */ --#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci" -+#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci-base" - #define VIRTIO_RNG_PCI(obj) \ - OBJECT_CHECK(VirtIORngPCI, (obj), TYPE_VIRTIO_RNG_PCI) - -@@ -365,7 +365,7 @@ struct VirtIOInputHIDPCI { - - #ifdef CONFIG_LINUX - --#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci" -+#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci-base" - #define VIRTIO_INPUT_HOST_PCI(obj) \ - OBJECT_CHECK(VirtIOInputHostPCI, (obj), TYPE_VIRTIO_INPUT_HOST_PCI) - -@@ -392,7 +392,7 @@ struct VirtIOGPUPCI { - /* - * vhost-vsock-pci: This extends VirtioPCIProxy. - */ --#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci" -+#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci-base" - #define VHOST_VSOCK_PCI(obj) \ - OBJECT_CHECK(VHostVSockPCI, (obj), TYPE_VHOST_VSOCK_PCI) - -diff --git a/tests/acceptance/virtio_version.py b/tests/acceptance/virtio_version.py -new file mode 100644 -index 0000000..ce99025 ---- /dev/null -+++ b/tests/acceptance/virtio_version.py -@@ -0,0 +1,176 @@ -+""" -+Check compatibility of virtio device types -+""" -+# Copyright (c) 2018 Red Hat, Inc. -+# -+# Author: -+# Eduardo Habkost -+# -+# This work is licensed under the terms of the GNU GPL, version 2 or -+# later. See the COPYING file in the top-level directory. -+import sys -+import os -+ -+sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "scripts")) -+from qemu import QEMUMachine -+from avocado_qemu import Test -+ -+# Virtio Device IDs: -+VIRTIO_NET = 1 -+VIRTIO_BLOCK = 2 -+VIRTIO_CONSOLE = 3 -+VIRTIO_RNG = 4 -+VIRTIO_BALLOON = 5 -+VIRTIO_RPMSG = 7 -+VIRTIO_SCSI = 8 -+VIRTIO_9P = 9 -+VIRTIO_RPROC_SERIAL = 11 -+VIRTIO_CAIF = 12 -+VIRTIO_GPU = 16 -+VIRTIO_INPUT = 18 -+VIRTIO_VSOCK = 19 -+VIRTIO_CRYPTO = 20 -+ -+PCI_VENDOR_ID_REDHAT_QUMRANET = 0x1af4 -+ -+# Device IDs for legacy/transitional devices: -+PCI_LEGACY_DEVICE_IDS = { -+ VIRTIO_NET: 0x1000, -+ VIRTIO_BLOCK: 0x1001, -+ VIRTIO_BALLOON: 0x1002, -+ VIRTIO_CONSOLE: 0x1003, -+ VIRTIO_SCSI: 0x1004, -+ VIRTIO_RNG: 0x1005, -+ VIRTIO_9P: 0x1009, -+ VIRTIO_VSOCK: 0x1012, -+} -+ -+def pci_modern_device_id(virtio_devid): -+ return virtio_devid + 0x1040 -+ -+def devtype_implements(vm, devtype, implements): -+ return devtype in [d['name'] for d in vm.command('qom-list-types', implements=implements)] -+ -+def get_pci_interfaces(vm, devtype): -+ interfaces = ('pci-express-device', 'conventional-pci-device') -+ return [i for i in interfaces if devtype_implements(vm, devtype, i)] -+ -+class VirtioVersionCheck(Test): -+ """ -+ Check if virtio-version-specific device types result in the -+ same device tree created by `disable-modern` and -+ `disable-legacy`. -+ -+ :avocado: enable -+ :avocado: tags=x86_64 -+ """ -+ -+ # just in case there are failures, show larger diff: -+ maxDiff = 4096 -+ -+ def run_device(self, devtype, opts=None, machine='pc'): -+ """ -+ Run QEMU with `-device DEVTYPE`, return device info from `query-pci` -+ """ -+ with QEMUMachine(self.qemu_bin) as vm: -+ vm.set_machine(machine) -+ if opts: -+ devtype += ',' + opts -+ vm.add_args('-device', '%s,id=devfortest' % (devtype)) -+ vm.add_args('-S') -+ vm.launch() -+ -+ pcibuses = vm.command('query-pci') -+ alldevs = [dev for bus in pcibuses for dev in bus['devices']] -+ devfortest = [dev for dev in alldevs -+ if dev['qdev_id'] == 'devfortest'] -+ return devfortest[0], get_pci_interfaces(vm, devtype) -+ -+ -+ def assert_devids(self, dev, devid, non_transitional=False): -+ self.assertEqual(dev['id']['vendor'], PCI_VENDOR_ID_REDHAT_QUMRANET) -+ self.assertEqual(dev['id']['device'], devid) -+ if non_transitional: -+ self.assertTrue(0x1040 <= dev['id']['device'] <= 0x107f) -+ self.assertGreaterEqual(dev['id']['subsystem'], 0x40) -+ -+ def check_all_variants(self, qemu_devtype, virtio_devid): -+ """Check if a virtio device type and its variants behave as expected""" -+ # Force modern mode: -+ dev_modern, _ = self.run_device(qemu_devtype, -+ 'disable-modern=off,disable-legacy=on') -+ self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid), -+ non_transitional=True) -+ -+ # -non-transitional device types should be 100% equivalent to -+ # ,disable-modern=off,disable-legacy=on -+ dev_1_0, nt_ifaces = self.run_device('%s-non-transitional' % (qemu_devtype)) -+ self.assertEqual(dev_modern, dev_1_0) -+ -+ # Force transitional mode: -+ dev_trans, _ = self.run_device(qemu_devtype, -+ 'disable-modern=off,disable-legacy=off') -+ self.assert_devids(dev_trans, PCI_LEGACY_DEVICE_IDS[virtio_devid]) -+ -+ # Force legacy mode: -+ dev_legacy, _ = self.run_device(qemu_devtype, -+ 'disable-modern=on,disable-legacy=off') -+ self.assert_devids(dev_legacy, PCI_LEGACY_DEVICE_IDS[virtio_devid]) -+ -+ # No options: default to transitional on PC machine-type: -+ no_opts_pc, generic_ifaces = self.run_device(qemu_devtype) -+ self.assertEqual(dev_trans, no_opts_pc) -+ -+ #TODO: check if plugging on a PCI Express bus will make the -+ # device non-transitional -+ #no_opts_q35 = self.run_device(qemu_devtype, machine='q35') -+ #self.assertEqual(dev_modern, no_opts_q35) -+ -+ # -transitional device types should be 100% equivalent to -+ # ,disable-modern=off,disable-legacy=off -+ dev_trans, trans_ifaces = self.run_device('%s-transitional' % (qemu_devtype)) -+ self.assertEqual(dev_trans, dev_trans) -+ -+ # ensure the interface information is correct: -+ self.assertIn('conventional-pci-device', generic_ifaces) -+ self.assertIn('pci-express-device', generic_ifaces) -+ -+ self.assertIn('conventional-pci-device', nt_ifaces) -+ self.assertIn('pci-express-device', nt_ifaces) -+ -+ self.assertIn('conventional-pci-device', trans_ifaces) -+ self.assertNotIn('pci-express-device', trans_ifaces) -+ -+ -+ def test_conventional_devs(self): -+ self.check_all_variants('virtio-net-pci', VIRTIO_NET) -+ # virtio-blk requires 'driver' parameter -+ #self.check_all_variants('virtio-blk-pci', VIRTIO_BLOCK) -+ self.check_all_variants('virtio-serial-pci', VIRTIO_CONSOLE) -+ self.check_all_variants('virtio-rng-pci', VIRTIO_RNG) -+ self.check_all_variants('virtio-balloon-pci', VIRTIO_BALLOON) -+ self.check_all_variants('virtio-scsi-pci', VIRTIO_SCSI) -+ # virtio-9p requires 'fsdev' parameter -+ #self.check_all_variants('virtio-9p-pci', VIRTIO_9P) -+ -+ def check_modern_only(self, qemu_devtype, virtio_devid): -+ """Check if a modern-only virtio device type behaves as expected""" -+ # Force modern mode: -+ dev_modern, _ = self.run_device(qemu_devtype, -+ 'disable-modern=off,disable-legacy=on') -+ self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid), -+ non_transitional=True) -+ -+ # No options: should be modern anyway -+ dev_no_opts, ifaces = self.run_device(qemu_devtype) -+ self.assertEqual(dev_modern, dev_no_opts) -+ -+ self.assertIn('conventional-pci-device', ifaces) -+ self.assertIn('pci-express-device', ifaces) -+ -+ def test_modern_only_devs(self): -+ self.check_modern_only('virtio-vga', VIRTIO_GPU) -+ self.check_modern_only('virtio-gpu-pci', VIRTIO_GPU) -+ self.check_modern_only('virtio-mouse-pci', VIRTIO_INPUT) -+ self.check_modern_only('virtio-tablet-pci', VIRTIO_INPUT) -+ self.check_modern_only('virtio-keyboard-pci', VIRTIO_INPUT) --- -1.8.3.1 - diff --git a/kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch b/kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch deleted file mode 100644 index cf57da8..0000000 --- a/kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 5910975bcc4445e366399a438b811cdc8c3dd87e Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Thu, 7 Feb 2019 12:18:19 +0000 -Subject: [PATCH 8/8] virtio-scsi: Forbid devices with different iothreads - sharing a blockdev - -RH-Author: Markus Armbruster -Message-id: <20190207121819.20092-4-armbru@redhat.com> -Patchwork-id: 84289 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 3/3] virtio-scsi: Forbid devices with different iothreads sharing a blockdev -Bugzilla: 1656276 1662508 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -From: Alberto Garcia - -This patch forbids attaching a disk to a SCSI device if its using a -different AioContext. Test case included. - -Signed-off-by: Alberto Garcia -Signed-off-by: Kevin Wolf -(cherry picked from commit eb97813ff5fd5bdffc8ed9f5be5a3a50eae70a2c) -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/virtio-scsi.c | 7 +++++++ - tests/qemu-iotests/240 | 22 ++++++++++++++++++++++ - tests/qemu-iotests/240.out | 20 ++++++++++++++++++++ - 3 files changed, 49 insertions(+) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index f0d4f10..66df30d 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -800,9 +800,16 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - return; - } - if (s->ctx && !s->dataplane_fenced) { -+ AioContext *ctx; - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; - } -+ ctx = blk_get_aio_context(sd->conf.blk); -+ if (ctx != s->ctx && ctx != qemu_get_aio_context()) { -+ error_setg(errp, "Cannot attach a blockdev that is using " -+ "a different iothread"); -+ return; -+ } - virtio_scsi_acquire(s); - blk_set_aio_context(sd->conf.blk, s->ctx); - virtio_scsi_release(s); -diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 -index 5d499c9..65cc3b3 100755 ---- a/tests/qemu-iotests/240 -+++ b/tests/qemu-iotests/240 -@@ -101,6 +101,28 @@ run_qemu < -Date: Thu, 7 Feb 2019 12:18:17 +0000 -Subject: [PATCH 6/8] virtio-scsi: Move BlockBackend back to the main - AioContext on unplug - -RH-Author: Markus Armbruster -Message-id: <20190207121819.20092-2-armbru@redhat.com> -Patchwork-id: 84290 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/3] virtio-scsi: Move BlockBackend back to the main AioContext on unplug -Bugzilla: 1656276 1662508 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -From: Alberto Garcia - -This fixes a crash when attaching a disk to a SCSI device using -iothreads, then detaching it and reattaching it again. Test case -included. - -Signed-off-by: Alberto Garcia -Signed-off-by: Kevin Wolf -(cherry picked from commit a6f230c8d13a7ff3a0c7f1097412f44bfd9eff0b) -[Trivial conflict in tests/qemu-iotests/group due to lack of commit -1c4e7b640b resolved] -Signed-off-by: Markus Armbruster - -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/virtio-scsi.c | 6 ++++ - tests/qemu-iotests/240 | 89 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/240.out | 18 ++++++++++ - tests/qemu-iotests/group | 1 + - 4 files changed, 114 insertions(+) - create mode 100755 tests/qemu-iotests/240 - create mode 100644 tests/qemu-iotests/240.out - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 9f754c4..f0d4f10 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -833,6 +833,12 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, - virtio_scsi_release(s); - } - -+ if (s->ctx) { -+ virtio_scsi_acquire(s); -+ blk_set_aio_context(sd->conf.blk, qemu_get_aio_context()); -+ virtio_scsi_release(s); -+ } -+ - qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); - } - -diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 -new file mode 100755 -index 0000000..ead7ee0 ---- /dev/null -+++ b/tests/qemu-iotests/240 -@@ -0,0 +1,89 @@ -+#!/bin/bash -+# -+# Test hot plugging and unplugging with iothreads -+# -+# Copyright (C) 2019 Igalia, S.L. -+# Author: Alberto Garcia -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=berto@igalia.com -+ -+seq=`basename $0` -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt generic -+_supported_proto generic -+_supported_os Linux -+ -+do_run_qemu() -+{ -+ echo Testing: "$@" -+ $QEMU -nographic -qmp stdio -serial none "$@" -+ echo -+} -+ -+# Remove QMP events from (pretty-printed) output. Doesn't handle -+# nested dicts correctly, but we don't get any of those in this test. -+_filter_qmp_events() -+{ -+ tr '\n' '\t' | sed -e \ -+ 's/{\s*"timestamp":\s*{[^}]*},\s*"event":[^,}]*\(,\s*"data":\s*{[^}]*}\)\?\s*}\s*//g' \ -+ | tr '\t' '\n' -+} -+ -+run_qemu() -+{ -+ do_run_qemu "$@" 2>&1 | _filter_qmp | _filter_qmp_events -+} -+ -+case "$QEMU_DEFAULT_MACHINE" in -+ s390-ccw-virtio) -+ virtio_scsi=virtio-scsi-ccw -+ ;; -+ *) -+ virtio_scsi=virtio-scsi-pci -+ ;; -+esac -+ -+echo -+echo === Unplug a SCSI disk and then plug it again === -+echo -+ -+run_qemu < -Date: Thu, 7 Mar 2019 09:11:50 +0000 -Subject: [PATCH 5/9] vnc: detect and optimize pageflips -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -Message-id: <20190307091150.6551-2-kraxel@redhat.com> -Patchwork-id: 84816 -O-Subject: [RHEL-8.0/AV qemu-kvm PATCH 1/1] vnc: detect and optimize pageflips -Bugzilla: 1666206 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: John Snow - -When size and format of the display surface stays the same we can just -tag the guest display as dirty and be done with it. - -There is no need need to resize the vnc server display or to touch the -vnc client dirty bits. On the next refresh cycle -vnc_refresh_server_surface() will check for actual display content -changes and update the client dirty bits as needed. - -The desktop resize and framebuffer format notifications to the vnc -client will be skipped too. - -Signed-off-by: Gerd Hoffmann -Reviewed-by: Daniel P. Berrangé -Message-id: 20190116101049.8929-1-kraxel@redhat.com -(cherry picked from commit 61e77a5f0c788495566aecb437bcf6b2cf9cda97) -Signed-off-by: Danilo C. L. de Paula ---- - ui/vnc.c | 25 ++++++++++++++++++++++--- - 1 file changed, 22 insertions(+), 3 deletions(-) - -diff --git a/ui/vnc.c b/ui/vnc.c -index d7903a7..765bdc5 100644 ---- a/ui/vnc.c -+++ b/ui/vnc.c -@@ -742,6 +742,17 @@ static void vnc_update_server_surface(VncDisplay *vd) - width, height); - } - -+static bool vnc_check_pageflip(DisplaySurface *s1, -+ DisplaySurface *s2) -+{ -+ return (s1 != NULL && -+ s2 != NULL && -+ surface_width(s1) == surface_width(s2) && -+ surface_height(s1) == surface_height(s2) && -+ surface_format(s1) == surface_format(s2)); -+ -+} -+ - static void vnc_dpy_switch(DisplayChangeListener *dcl, - DisplaySurface *surface) - { -@@ -749,6 +760,7 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl, - "Display output is not active."; - static DisplaySurface *placeholder; - VncDisplay *vd = container_of(dcl, VncDisplay, dcl); -+ bool pageflip = vnc_check_pageflip(vd->ds, surface); - VncState *vs; - - if (surface == NULL) { -@@ -761,14 +773,21 @@ static void vnc_dpy_switch(DisplayChangeListener *dcl, - vnc_abort_display_jobs(vd); - vd->ds = surface; - -- /* server surface */ -- vnc_update_server_surface(vd); -- - /* guest surface */ - qemu_pixman_image_unref(vd->guest.fb); - vd->guest.fb = pixman_image_ref(surface->image); - vd->guest.format = surface->format; - -+ if (pageflip) { -+ vnc_set_area_dirty(vd->guest.dirty, vd, 0, 0, -+ surface_width(surface), -+ surface_height(surface)); -+ return; -+ } -+ -+ /* server surface */ -+ vnc_update_server_surface(vd); -+ - QTAILQ_FOREACH(vs, &vd->clients, next) { - vnc_colordepth(vs); - vnc_desktop_resize(vs); --- -1.8.3.1 - diff --git a/kvm-x86-host-phys-bits-limit-option.patch b/kvm-x86-host-phys-bits-limit-option.patch deleted file mode 100644 index 4060a98..0000000 --- a/kvm-x86-host-phys-bits-limit-option.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 5588db6fc02b530c73e51b43a0119562aa93f51d Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Fri, 22 Mar 2019 17:45:14 +0000 -Subject: [PATCH 1/2] x86: host-phys-bits-limit option - -RH-Author: plai@redhat.com -Message-id: <1553276715-26278-2-git-send-email-plai@redhat.com> -Patchwork-id: 85128 -O-Subject: [RHEL8.0 qemu-kvm PATCH 1/2] x86: host-phys-bits-limit option -Bugzilla: 1688915 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Michael S. Tsirkin - -From: Eduardo Habkost - -Some downstream distributions of QEMU set host-phys-bits=on by -default. This worked very well for most use cases, because -phys-bits really didn't have huge consequences. The only -difference was on the CPUID data seen by guests, and on the -handling of reserved bits. - -This changed in KVM commit 855feb673640 ("KVM: MMU: Add 5 level -EPT & Shadow page table support"). Now choosing a large -phys-bits value for a VM has bigger impact: it will make KVM use -5-level EPT even when it's not really necessary. This means -using the host phys-bits value may not be the best choice. - -Management software could address this problem by manually -configuring phys-bits depending on the size of the VM and the -amount of MMIO address space required for hotplug. But this is -not trivial to implement. - -However, there's another workaround that would work for most -cases: keep using the host phys-bits value, but only if it's -smaller than 48. This patch makes this possible by introducing a -new "-cpu" option: "host-phys-bits-limit". Management software -or users can make sure they will always use 4-level EPT using: -"host-phys-bits=on,host-phys-bits-limit=48". - -This behavior is still not enabled by default because QEMU -doesn't enable host-phys-bits=on by default. But users, -management software, or downstream distributions may choose to -change their defaults using the new option. - -Signed-off-by: Eduardo Habkost -Message-Id: <20181211192527.13254-1-ehabkost@redhat.com> -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula -(cherry picked from commit b6a062c64f9639558a88f46edc3dd76b54b26bb5) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 5 +++++ - target/i386/cpu.h | 3 +++ - 2 files changed, 8 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index d92c128..0c2e1c7 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -5211,6 +5211,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - if (cpu->host_phys_bits) { - /* The user asked for us to use the host physical bits */ - cpu->phys_bits = host_phys_bits; -+ if (cpu->host_phys_bits_limit && -+ cpu->phys_bits > cpu->host_phys_bits_limit) { -+ cpu->phys_bits = cpu->host_phys_bits_limit; -+ } - } - - /* Print a warning if the user set it to a value that's not the -@@ -5798,6 +5802,7 @@ static Property x86_cpu_properties[] = { - DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), - DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), - DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), -+ DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), - DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), - DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), - DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, UINT32_MAX), -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 26412f1..db49f44 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -1458,6 +1458,9 @@ struct X86CPU { - /* if true override the phys_bits value with a value read from the host */ - bool host_phys_bits; - -+ /* if set, limit maximum value for phys_bits when host_phys_bits is true */ -+ uint8_t host_phys_bits_limit; -+ - /* Stop SMI delivery for migration compatibility with old machines */ - bool kvm_no_smi_migration; - --- -1.8.3.1 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 890e320..1bbb2d0 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,8 +68,8 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 3.1.0 -Release: 24%{?dist} +Version: 4.0.0 +Release: 0%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -78,7 +78,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-3.1.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-4.0.0.tar.xz # KSM control scripts Source4: ksm.service @@ -114,139 +114,15 @@ Patch0008: 0008-Add-ppc64-machine-types.patch Patch0009: 0009-Add-s390x-machine-types.patch Patch0010: 0010-Add-x86_64-machine-types.patch Patch0011: 0011-Enable-make-check.patch -Patch0012: 0012-Use-kvm-by-default.patch -Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0014: 0014-Add-support-statement-to-help-output.patch -Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch -Patch0016: 0016-Add-support-for-simpletrace.patch -Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch -Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" -Patch20: kvm-pc-7.5-compat-entries.patch -# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" -Patch21: kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch -# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" -Patch22: kvm-pc-PC_RHEL7_6_COMPAT.patch -# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" -Patch23: kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch -# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" -Patch24: kvm-pc-Add-pc-q35-8.0.0-machine-type.patch -# For bz#1655820 - Can't migarate between rhel8 and rhel7 when guest has device "video" -Patch25: kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch -# For bz#1659604 - 8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285 -Patch26: kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch -# For bz#1656508 - Machine types for qemu-kvm based on rebase to qemu-3.1 (ppc64le) -Patch27: kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch -# For bz#1656510 - Machine types for qemu-kvm based on rebase to qemu-3.1 (s390x) -Patch28: kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch -# For bz#1661967 - Kernel prints the message "VPHN is not supported. Disabling polling..." -Patch29: kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch -# For bz#1648023 - Provide separate device types for transitional virtio PCI devices - Fast Train -Patch30: kvm-virtio-Helper-for-registering-virtio-device-types.patch -# For bz#1648023 - Provide separate device types for transitional virtio PCI devices - Fast Train -Patch31: kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch -# For bz#1648023 - Provide separate device types for transitional virtio PCI devices - Fast Train -Patch32: kvm-globals-Allow-global-properties-to-be-optional.patch -# For bz#1648023 - Provide separate device types for transitional virtio PCI devices - Fast Train -Patch33: kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch -# For bz#1656504 - Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64) -Patch34: kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch -# For bz#1656504 - Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64) -Patch35: kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch -# For bz#1656504 - Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64) -Patch36: kvm-aarch64-Use-256MB-ECAM-region-by-default.patch -# For bz#1653114 - Incorrect NUMA nodes passed to qemu-kvm guest in ibm,max-associativity-domains property -Patch37: kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch -# For bz#1668205 - Guest quit with error when hotunplug cpu -Patch38: kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch -# For bz#1653511 - qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush -Patch39: kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch -# For bz#1653511 - qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush -Patch40: kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch -# For bz#1666601 - [q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card -Patch41: kvm-migration-rdma-unregister-fd-handler.patch -# For bz#1659127 - Stress guest and stop it, then do live migration, guest hit call trace on destination end -Patch42: kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch -# For bz#1659127 - Stress guest and stop it, then do live migration, guest hit call trace on destination end -Patch43: kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch -# For bz#1655947 - qemu-kvm core dumped after unplug the device which was set io throttling parameters -Patch44: kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch -# For bz#1655947 - qemu-kvm core dumped after unplug the device which was set io throttling parameters -Patch45: kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch -# For bz#1668244 - qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found -Patch47: kvm-json-Fix-handling-when-not-interpolating.patch -# For bz#1665896 - VNC unix listener socket is deleted after first client quits -Patch48: kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch -# For bz#1668248 - "An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination) -Patch49: kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch -# For bz#1668248 - "An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination) -Patch50: kvm-scsi-disk-Add-device_id-property.patch -# For bz#1669922 - Backport avocado-qemu tests for QEMU 3.1 -Patch51: kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch -# For bz#1671519 - RHEL8.0 Snapshot3 - qemu doesn't free up hugepage memory when hotplug/hotunplug using memory-backend-file (qemu-kvm) -Patch52: kvm-mmap-alloc-unfold-qemu_ram_mmap.patch -# For bz#1671519 - RHEL8.0 Snapshot3 - qemu doesn't free up hugepage memory when hotplug/hotunplug using memory-backend-file (qemu-kvm) -Patch53: kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch -# For bz#1653590 - [Fast train]had better stop qemu immediately while guest was making use of an improper page size -Patch54: kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -# For bz#1673014 - Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled -Patch55: kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch -# For bz#1656276 - qemu-kvm core dumped after hotplug the deleted disk with iothread parameter -# For bz#1662508 - Qemu core dump when start guest with two disks using same drive -Patch56: kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch -# For bz#1656276 - qemu-kvm core dumped after hotplug the deleted disk with iothread parameter -# For bz#1662508 - Qemu core dump when start guest with two disks using same drive -Patch57: kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch -# For bz#1656276 - qemu-kvm core dumped after hotplug the deleted disk with iothread parameter -# For bz#1662508 - Qemu core dump when start guest with two disks using same drive -Patch58: kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch -# For bz#1644985 - The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train -Patch59: kvm-doc-fix-the-configuration-path.patch -# For bz#1664997 - Restrict floppy device to RHEL-7 machine types -Patch60: kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch -# For bz#1664997 - Restrict floppy device to RHEL-7 machine types -Patch61: kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch -# For bz#1678968 - -blockdev: auto-read-only is ineffective for drivers on read-only whitelist -Patch62: kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch -# For bz#1661030 - Remove MPX support from 8.0 machine types -Patch63: kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch -# For bz#1661515 - Remove PCONFIG and INTEL_PT from Icelake-* CPU models -Patch64: kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch -# For bz#1661515 - Remove PCONFIG and INTEL_PT from Icelake-* CPU models -Patch65: kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch -# For bz#1661515 - Remove PCONFIG and INTEL_PT from Icelake-* CPU models -Patch66: kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch -# For bz#1608649 - Query-migrate get "failed" status after migrate-cancel -Patch67: kvm-migration-Fix-cancel-state.patch -# For bz#1608649 - Query-migrate get "failed" status after migrate-cancel -Patch68: kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch -# For bz#1686260 - stibp is missing on qemu 3.0 and qemu 3.1 -Patch69: kvm-i386-Add-stibp-flag-name.patch -# For bz#1674438 - RHEL8.0 - Guest reboot fails after memory hotplug multiple times (kvm) -Patch71: kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch -# For bz#1655065 - [rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image -Patch72: kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch -# For bz#1655065 - [rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image -Patch73: kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch -# For bz#1666206 - vnc server should detect page-flips and avoid sending fullscreen updates then. -Patch74: kvm-vnc-detect-and-optimize-pageflips.patch -# For bz#1669053 - Guest call trace when boot with nvdimm device backed by /dev/dax -Patch76: kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch -# For bz#1687582 - QEMU IOTEST 200 fails with 'virtio-scsi-pci is not a valid device model name' -Patch77: kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch -# For bz#1652572 - QEMU core dumped if stop nfs service during migration -Patch78: kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch -# For bz#1687578 - Incorrect CVE vulnerabilities reported on Cascade Lake cpus -Patch79: kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch -# For bz#1687578 - Incorrect CVE vulnerabilities reported on Cascade Lake cpus -Patch80: kvm-i386-Make-arch_capabilities-migratable.patch -# For bz#1693173 - CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-av-8] -Patch81: kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch -# For bz#1688915 - [Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters. -Patch82: kvm-x86-host-phys-bits-limit-option.patch -# For bz#1688915 - [Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters. -Patch83: kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch +Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0013: 0013-Add-support-statement-to-help-output.patch +Patch0014: 0014-globally-limit-the-maximum-number-of-CPUs.patch +Patch0015: 0015-Add-support-for-simpletrace.patch +Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0017: 0017-usb-xhci-Fix-PCI-capability-order.patch +Patch0018: 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0019: 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0020: 0020-doc-fix-the-configuration-path.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -304,6 +180,7 @@ BuildRequires: pixman-devel # Documentation requirement BuildRequires: perl-podlators BuildRequires: texinfo +BuildRequires: python3-sphinx # For rdma %if 0%{?have_librdma} BuildRequires: rdma-core-devel @@ -349,6 +226,8 @@ Requires: mesa-libEGL Requires: mesa-dri-drivers %endif +BuildRequires: perl-Test-Harness + Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} %rhev_ma_conflicts qemu-kvm @@ -692,7 +571,9 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-vvfat \ --disable-qed \ --disable-parallels \ - --disable-sheepdog + --disable-sheepdog \ + --without-default-devices + echo "config-host.mak contents:" echo "===" @@ -702,15 +583,17 @@ echo "===" make V=1 %{?_smp_mflags} $buildldflags # Setup back compat qemu-kvm binary -%{__python3} scripts/tracetool.py --backend dtrace --format stap --group=all \ - --binary %{_libexecdir}/qemu-kvm --target-name %{kvm_target} \ - --target-type system --probe-prefix \ - qemu.kvm trace-events-all > qemu-kvm.stp +%{__python3} scripts/tracetool.py --backend dtrace --format stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace-events-all > qemu-kvm.stp + +%{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace-events-all > qemu-kvm-log.stp %{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ - --group=all --binary %{_libexecdir}/qemu-kvm --target-name %{kvm_target} \ - --target-type system --probe-prefix \ - qemu.kvm trace-events-all > qemu-kvm-simpletrace.stp + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace-events-all > qemu-kvm-simpletrace.stp cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm @@ -744,10 +627,10 @@ mkdir -p $RPM_BUILD_ROOT%{_udevrulesdir}/ mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} # Create new directories and put them all under tests-src -mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/ +mkdir -p $RPM_BUILD_ROOT%{testsdir}/python +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/acceptance mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests -mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp install -p -m 0755 udev-kvm-check $RPM_BUILD_ROOT%{_udevdir} @@ -760,7 +643,7 @@ install -m 0644 scripts/dump-guest-memory.py \ cp -R tests/acceptance/* $RPM_BUILD_ROOT%{testsdir}/tests/acceptance/ # Install qemu.py and qmp/ scripts required to run avocado_qemu tests -install -p -m 0644 scripts/qemu.py $RPM_BUILD_ROOT%{testsdir}/scripts/ +cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp install -p -m 0755 tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ @@ -811,11 +694,18 @@ install -m 0644 qemu-ga.8 ${RPM_BUILD_ROOT}%{_mandir}/man8/ install -m 0755 qemu-kvm $RPM_BUILD_ROOT%{_libexecdir}/ install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp +rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp + +# Mangle qemu-kvm-stap +sed -i -e '1 s/python/python3/' $RPM_BUILD_ROOT%{_bindir}/qemu-trace-stap # Install simpletrace install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py @@ -872,6 +762,7 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pvh.bin %endif # Remove sparc files @@ -951,11 +842,14 @@ find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f # RPM won't pick up their dependencies. chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so +# Remove buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo + %check export DIFF=diff; make check V=1 pushd tests/qemu-iotests ./check -v -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 045 048 052 063 077 086 101 104 106 120 132 140 143 145 147 150 152 157 159 160 162 170 171 175 181 184 194 205 208 218 221 222 226 227 232 -./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 017 018 019 020 021 022 024 025 027 028 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 057 058 062 063 065 066 068 069 072 073 074 080 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 126 127 130 132 133 134 137 138 140 141 142 143 144 145 147 150 151 152 156 157 158 159 162 165 170 174 177 179 181 184 187 188 189 190 191 194 195 196 198 201 202 203 204 205 206 208 209 214 216 217 218 222 223 226 227 232 +./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 017 018 019 020 021 022 024 025 027 028 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 057 058 062 063 065 066 068 069 072 073 074 080 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 126 127 130 132 133 134 137 138 140 141 142 143 144 145 147 150 151 152 156 157 158 159 162 165 170 174 177 179 181 184 187 188 189 190 191 194 195 196 198 201 202 203 204 205 206 208 209 214 216 217 218 222 226 227 232 ./check -v -luks 001 002 003 004 005 008 009 010 011 012 021 032 033 052 140 143 145 157 162 174 181 184 208 218 227 ./check -v -nbd 001 002 003 004 005 008 009 010 011 021 032 033 045 077 094 104 119 123 132 143 145 147 151 152 162 181 184 194 205 208 218 222 popd @@ -997,14 +891,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %systemd_postun_with_restart ksm.service %systemd_postun_with_restart ksmtuned.service -%global qemu_kvm_files \ -%{_libexecdir}/qemu-kvm \ -%{_datadir}/systemtap/tapset/qemu-kvm.stp \ -%{_datadir}/%{name}/trace-events-all \ -%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp \ -%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp \ -%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf - %files # Deliberately empty @@ -1025,11 +911,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %doc %{qemudocdir}/qemu-ga-ref.txt %doc %{qemudocdir}/qemu-qmp-ref.html %doc %{qemudocdir}/qemu-qmp-ref.txt +%doc %{qemudocdir}/interop/* %{_mandir}/man7/qemu-qmp-ref.7* %{_mandir}/man7/qemu-cpu-models.7* %{_bindir}/qemu-keymap %{_bindir}/qemu-pr-helper %{_bindir}/qemu-edid +%{_bindir}/qemu-trace-stap %{_unitdir}/qemu-pr-helper.service %{_unitdir}/qemu-pr-helper.socket %{_mandir}/man7/qemu-ga-ref.7* @@ -1037,6 +925,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %dir %{_datadir}/%{name}/ %{_datadir}/%{name}/keymaps/ %{_mandir}/man1/%{name}.1* +%{_mandir}/man1/qemu-trace-stap.1* %{_mandir}/man7/qemu-block-drivers.7* %attr(4755, -, -) %{_libexecdir}/qemu-bridge-helper %config(noreplace) %{_sysconfdir}/sasl2/%{name}.conf @@ -1067,6 +956,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/multiboot.bin %{_datadir}/%{name}/kvmvapic.bin %{_datadir}/%{name}/sgabios.bin + %{_datadir}/%{name}/pvh.bin %endif %ifarch s390x %{_datadir}/%{name}/s390-ccw.img @@ -1086,14 +976,19 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/efi-rtl8139.rom %{_datadir}/%{name}/efi-ne2k_pci.rom %endif -%{_datadir}/%{name}/qemu-icon.bmp -%{_datadir}/%{name}/qemu_logo_no_text.svg +%{_datadir}/icons/* %{_datadir}/%{name}/linuxboot_dma.bin %{_datadir}/%{name}/dump-guest-memory.py* %ifarch %{power64} %{_datadir}/%{name}/spapr-rtas.bin %endif -%{?qemu_kvm_files:} +%{_libexecdir}/qemu-kvm +%{_datadir}/systemtap/tapset/qemu-kvm.stp +%{_datadir}/systemtap/tapset/qemu-kvm-log.stp +%{_datadir}/%{name}/trace-events-all +%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp +%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp +%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf %if 0%{have_kvm_setup} %{_prefix}/lib/systemd/kvm-setup %{_unitdir}/kvm-setup.service @@ -1145,11 +1040,8 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Fri Apr 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-24.el8 -- kvm-x86-host-phys-bits-limit-option.patch [bz#1688915] -- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1688915] -- Resolves: bz#1688915 - ([Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) +* Tue May 7 2019 Danilo Cesar Lemes de Paula - 4.0.0-0.el8 +- Rebase qemu-kvm to 4.0.0 * Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-23.el8 - kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693173] @@ -1259,7 +1151,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) * Fri Feb 08 2019 Danilo Cesar Lemes de Paula - 3.1.0-12.el8 -- Removing kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] - kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch [bz#1665896] - kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch [bz#1668248] - kvm-scsi-disk-Add-device_id-property.patch [bz#1668248] @@ -1291,11 +1182,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1666601 ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) -* Mon Jan 28 2019 Danilo Cesar Lemes de Paula - 3.1.0-8.el8 -- kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] -- Resolves: bz#1666601 - ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) - * Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 3.1.0-7.el8 - kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch [bz#1653511] - kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch [bz#1653511] @@ -1326,6 +1212,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ * Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-4.el8 - kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch [bz#1656510] - kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch [bz#1661967] +- kvm-redhat-Fixing-.gitpublish-to-include-AV-information.patch [] - Resolves: bz#1656510 (Machine types for qemu-kvm based on rebase to qemu-3.1 (s390x)) - Resolves: bz#1661967 diff --git a/sources b/sources index d417a06..e43b6a0 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-3.1.0.tar.xz) = 7e8dae823937cfac2f0c60406bd3bdcb89df40313dab2a4bed327d5198f7fcc68ac8b31e44692caa09299cc71256ee0b8c17e4f49f78ada8043d424f5daf82fe +SHA512 (qemu-4.0.0.tar.xz) = 952e94194ce9e64c15388c59035cb31fb9f761d30095c2fb9441012b609c18c9976285727b93bf37b95e15675802d73f8e1c4619ebecd23606675bb503646b13 From 4b3f8a9dfe0ad9f9fcb11d369bc611e4472285c1 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 16 May 2019 15:50:01 -0300 Subject: [PATCH 034/195] * Thu May 16 2019 Danilo Cesar Lemes de Paula - 4.0.0-1.el8 - 4.0.0 temporary rebase - Resolves: bz#1705235 (Rebase qemu-kvm for RHEL-AV 8.1.0) --- ...ys-bits-limit-48-on-rhel-machine-typ.patch | 62 +++++++++++++++++++ 0022-redhat-Post-rebase-synchronization.patch | 36 +++++++++++ qemu-kvm.spec | 38 ++++++++++-- 3 files changed, 132 insertions(+), 4 deletions(-) create mode 100644 0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch create mode 100644 0022-redhat-Post-rebase-synchronization.patch diff --git a/0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch b/0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch new file mode 100644 index 0000000..2146485 --- /dev/null +++ b/0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch @@ -0,0 +1,62 @@ +From 26606676d9d88c3f301fb8b3d39941ceff267af5 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Mon, 13 May 2019 16:27:20 +0200 +Subject: rhel: Set host-phys-bits-limit=48 on rhel machine-types + +RH-Author: plai@redhat.com +Message-id: <1553276715-26278-3-git-send-email-plai@redhat.com> +Patchwork-id: 85126 +O-Subject: [RHEL8.0 qemu-kvm PATCH 2/2] rhel: Set host-phys-bits-limit=48 on rhel machine-types +Bugzilla: 1688915 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Michael S. Tsirkin + +From: Eduardo Habkost + +Upstream status: not applicable + +Currently we use the host physical address size by default on +VMs. This was a good default on most cases, but this is not the +case on host CPUs supporting 5-level EPT. On those cases, we +want VMs to use 4-level EPT by default. + +Ensure VMs will use 4-level EPT by default, by limiting physical +address bits to 48. + +Not applicable upstream because upstream doesn't set +host-phys-bits=on by default. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +(cherry picked from commit 01a2ecb4c38fe4a35455ea706e76984ee8d5a769) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula + +Rebase notes (4.0.0): +- update to no compat format + +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 03497eed9a..91a4c5833a 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -360,6 +360,11 @@ GlobalProperty pc_rhel_compat[] = { + .property = "host-phys-bits", + .value = "on", + }, ++ { /* PC_RHEL_COMPAT */ ++ .driver = TYPE_X86_CPU, ++ .property = "host-phys-bits-limit", ++ .value = "48", ++ }, + { /* PC_RHEL_COMPAT bz 1508330 */ + .driver = "vfio-pci", + .property = "x-no-geforce-quirks", +-- +2.20.1 + diff --git a/0022-redhat-Post-rebase-synchronization.patch b/0022-redhat-Post-rebase-synchronization.patch new file mode 100644 index 0000000..001c982 --- /dev/null +++ b/0022-redhat-Post-rebase-synchronization.patch @@ -0,0 +1,36 @@ +From bbdbdb6ebc696a8bef420b6bd39fb3f5bbe0d5d4 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 13 May 2019 16:32:48 +0200 +Subject: redhat: Post rebase synchronization + +We based rebase on qemu-kvm-3.1.0-23.el8. However, 8.0.1 version moved +to qemu-kvm-3.1.0-25.el8. Adding missing changes. + +Merged-patches (4.0.0): +- ebb6e97 redhat: Fix LOCALVERSION creation +- b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) +- 7cb3c4a Enable libpmem to support nvdimm + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + configure | 4 ++++ + redhat/Makefile | 4 ++-- + redhat/qemu-kvm.spec.template | 28 +++++++++++++++++++++++++++- + 3 files changed, 33 insertions(+), 3 deletions(-) + +diff --git a/configure b/configure +index eb0a0dde86..d6d5912ee8 100755 +--- a/configure ++++ b/configure +@@ -2411,6 +2411,10 @@ if test "$seccomp" != "no" ; then + seccomp="no" + fi + fi ++ ++# RHEL8-specific, only passthrough for now, rhbz#1688312 ++tpm_emulator=no ++ + ########################################## + # xen probe + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 1bbb2d0..d95f994 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.0.0 -Release: 0%{?dist} +Release: 1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -123,6 +123,8 @@ Patch0017: 0017-usb-xhci-Fix-PCI-capability-order.patch Patch0018: 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0019: 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0020: 0020-doc-fix-the-configuration-path.patch +Patch0021: 0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch +Patch0022: 0022-redhat-Post-rebase-synchronization.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -208,6 +210,10 @@ BuildRequires: systemd-devel BuildRequires: libcap-ng-devel BuildRequires: diffutils +%ifarch x86_64 +BuildRequires: libpmem-devel +Requires: libpmem +%endif # qemu-keymap BuildRequires: pkgconfig(xkbcommon) @@ -477,6 +483,11 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --enable-vxhs \ %else --disable-vxhs \ +%endif +%ifarch x86_64 + --enable-libpmem \ +%else + --disable-libpmem \ %endif --enable-vhost-user \ %ifarch %{ix86} x86_64 @@ -515,7 +526,7 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --enable-snappy \ --disable-sparse \ --disable-strip \ - --disable-tpm \ + --enable-tpm \ --enable-trace-backend=dtrace \ --disable-vde \ --disable-vhost-scsi \ @@ -1040,8 +1051,27 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Tue May 7 2019 Danilo Cesar Lemes de Paula - 4.0.0-0.el8 -- Rebase qemu-kvm to 4.0.0 +* Thu May 16 2019 Danilo Cesar Lemes de Paula - 4.0.0-1.el8 +- 4.0.0 temporary rebase +- Resolves: bz#1705235 + (Rebase qemu-kvm for RHEL-AV 8.1.0) + +* Mon May 06 2019 Danilo Cesar Lemes de Paula - 3.1.0-25.el8 +- kvm-redhat-enable-tpmdev-passthrough.patch [bz#1688312] +- kvm-exec-Only-count-mapped-memory-backends-for-qemu_getr.patch [bz#1680492] +- kvm-Enable-libpmem-to-support-nvdimm.patch [bz#1705149] +- Resolves: bz#1680492 + (Qemu quits suddenly while system_reset after hot-plugging unsupported memory by compatible guest on P9 with 1G huge page set) +- Resolves: bz#1688312 + ([RFE] enable TPM passthrough at compile time (qemu-kvm)) +- Resolves: bz#1705149 + (libpmem support is not enabled in qemu-kvm) + +* Fri Apr 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-24.el8 +- kvm-x86-host-phys-bits-limit-option.patch [bz#1688915] +- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1688915] +- Resolves: bz#1688915 + ([Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) * Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-23.el8 - kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693173] From 46b360cff9be76f5f51e8f7096dc5234e0a8ea94 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Sat, 25 May 2019 04:15:39 +0100 Subject: [PATCH 035/195] * Sat May 25 2019 Danilo Cesar Lemes de Paula - 4.0.0-2.el8 - kvm-target-i386-define-md-clear-bit.patch [bz#1703297 bz#1703304 bz#1703310 bz#1707274] - Resolves: bz#1703297 (CVE-2018-12126 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Store Buffer Data Sampling (MSBDS) [rhel-av-8]) - Resolves: bz#1703304 (CVE-2018-12130 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-av-8]) - Resolves: bz#1703310 (CVE-2018-12127 virt:8.0.0/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-av-8]) - Resolves: bz#1707274 (CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0]) --- kvm-target-i386-define-md-clear-bit.patch | 58 +++++++++++++++++++++++ qemu-kvm.spec | 20 +++++++- 2 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 kvm-target-i386-define-md-clear-bit.patch diff --git a/kvm-target-i386-define-md-clear-bit.patch b/kvm-target-i386-define-md-clear-bit.patch new file mode 100644 index 0000000..5bb1258 --- /dev/null +++ b/kvm-target-i386-define-md-clear-bit.patch @@ -0,0 +1,58 @@ +From 790c94e3240549de61881aebb94d4d933252e540 Mon Sep 17 00:00:00 2001 +From: Danilo de Paula +Date: Mon, 20 May 2019 18:29:57 +0100 +Subject: [PATCH] target/i386: define md-clear bit + +RH-Author: Danilo de Paula +Message-id: <20190520182957.26425-1-ddepaula@redhat.com> +Patchwork-id: 88110 +O-Subject: [RHEL-8 + RHEL-AV qemu-kvm PATCH] target/i386: define md-clear bit +Bugzilla: 1703297 1703304 1703310 1707274 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: John Snow + +From: Paolo Bonzini + +BZ: 1703310 +BZ: 1703304 +BZ: 1703297 +BZ: 1707274 +branch: rhel-av-8.1.0/master-4.0.0 + +BZ: 1705851 +BZ: 1704542 +BZ: 1704538 +BZ: 1704534 +branch: rhel-av-8.0.1 + +BZ: 1703308 +BZ: 1703302 +branch: rhel-8.1.0 + +md-clear is a new CPUID bit which is set when microcode provides the +mechanism to invoke a flush of various exploitable CPU buffers by invoking +the VERW instruction. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6472cd2..3886464 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1076,7 +1076,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .feat_names = { + NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", + NULL, NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ NULL, NULL, "md-clear", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index d95f994..d4ebc85 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.0.0 -Release: 1%{?dist} +Release: 2%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -125,6 +125,11 @@ Patch0019: 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0020: 0020-doc-fix-the-configuration-path.patch Patch0021: 0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch Patch0022: 0022-redhat-Post-rebase-synchronization.patch +# For bz#1703297 - CVE-2018-12126 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Store Buffer Data Sampling (MSBDS) [rhel-av-8] +# For bz#1703304 - CVE-2018-12130 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-av-8] +# For bz#1703310 - CVE-2018-12127 virt:8.0.0/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-av-8] +# For bz#1707274 - CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0] +Patch23: kvm-target-i386-define-md-clear-bit.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -859,7 +864,7 @@ rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo %check export DIFF=diff; make check V=1 pushd tests/qemu-iotests -./check -v -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 045 048 052 063 077 086 101 104 106 120 132 140 143 145 147 150 152 157 159 160 162 170 171 175 181 184 194 205 208 218 221 222 226 227 232 +./check -v -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 045 048 052 063 077 086 101 104 106 120 132 140 143 145 147 150 152 157 159 160 162 170 171 175 181 184 194 208 218 221 222 226 227 232 ./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 017 018 019 020 021 022 024 025 027 028 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 057 058 062 063 065 066 068 069 072 073 074 080 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 126 127 130 132 133 134 137 138 140 141 142 143 144 145 147 150 151 152 156 157 158 159 162 165 170 174 177 179 181 184 187 188 189 190 191 194 195 196 198 201 202 203 204 205 206 208 209 214 216 217 218 222 226 227 232 ./check -v -luks 001 002 003 004 005 008 009 010 011 012 021 032 033 052 140 143 145 157 162 174 181 184 208 218 227 ./check -v -nbd 001 002 003 004 005 008 009 010 011 021 032 033 045 077 094 104 119 123 132 143 145 147 151 152 162 181 184 194 205 208 218 222 @@ -1051,6 +1056,17 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Sat May 25 2019 Danilo Cesar Lemes de Paula - 4.0.0-2.el8 +- kvm-target-i386-define-md-clear-bit.patch [bz#1703297 bz#1703304 bz#1703310 bz#1707274] +- Resolves: bz#1703297 + (CVE-2018-12126 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Store Buffer Data Sampling (MSBDS) [rhel-av-8]) +- Resolves: bz#1703304 + (CVE-2018-12130 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-av-8]) +- Resolves: bz#1703310 + (CVE-2018-12127 virt:8.0.0/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-av-8]) +- Resolves: bz#1707274 + (CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0]) + * Thu May 16 2019 Danilo Cesar Lemes de Paula - 4.0.0-1.el8 - 4.0.0 temporary rebase - Resolves: bz#1705235 From 115fdcbf37edac902ab9accd5d4b6cfb2c903110 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 28 May 2019 22:25:31 +0100 Subject: [PATCH 036/195] * Tue May 28 2019 Danilo Cesar Lemes de Paula - 4.0.0-3.el8 - kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch [bz#1709726] - kvm-compat-Generic-hw_compat_rhel_8_0.patch [bz#1709726] - kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch [bz#1709726] - kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch [bz#1709726] - Resolves: bz#1709726 (Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") --- kvm-compat-Generic-hw_compat_rhel_8_0.patch | 79 +++++++++++++ ...efine-pseries-rhel8.1.0-machine-type.patch | 73 ++++++++++++ ...-paste-garbage-in-hw_compat-comments.patch | 104 ++++++++++++++++++ ...pseries-rhel7.6.0-with-rhel-av-8.0.1.patch | 54 +++++++++ qemu-kvm.spec | 18 ++- 5 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 kvm-compat-Generic-hw_compat_rhel_8_0.patch create mode 100644 kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch create mode 100644 kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch create mode 100644 kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch diff --git a/kvm-compat-Generic-hw_compat_rhel_8_0.patch b/kvm-compat-Generic-hw_compat_rhel_8_0.patch new file mode 100644 index 0000000..017b400 --- /dev/null +++ b/kvm-compat-Generic-hw_compat_rhel_8_0.patch @@ -0,0 +1,79 @@ +From f19738e4bf8b9d5fb44ef30042cdad31b92edb22 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 24 May 2019 12:13:02 +0100 +Subject: [PATCH 2/4] compat: Generic hw_compat_rhel_8_0 + +RH-Author: Laurent Vivier +Message-id: <20190524121304.15530-3-lvivier@redhat.com> +Patchwork-id: 88201 +O-Subject: [RHEL-AV qemu-kvm PATCH v4 2/4] compat: Generic hw_compat_rhel_8_0 +Bugzilla: 1709726 +RH-Acked-by: David Gibson +RH-Acked-by: Cornelia Huck +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 30 ++++++++++++++++++++++++++++++ + include/hw/boards.h | 3 +++ + 2 files changed, 33 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index e808151..c4aa354 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -291,6 +291,36 @@ GlobalProperty hw_compat_rhel_7_6[] = { + }; + const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + ++/* The same as hw_compat_3_1 ++ * format of array has been changed by: ++ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") ++ */ ++GlobalProperty hw_compat_rhel_8_0[] = { ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-speed", "2_5" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-width", "1" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-crb", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-tis", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-kbd", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-mouse", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-tablet", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "discard", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "write-zeroes", "false" }, ++}; ++const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); ++ + + GlobalProperty hw_compat_3_1[] = { + { "pcie-root-port", "x-speed", "2_5" }, +diff --git a/include/hw/boards.h b/include/hw/boards.h +index b292365..f159e9e 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -337,6 +337,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_0[]; ++extern const size_t hw_compat_rhel_8_0_len; ++ + extern GlobalProperty hw_compat_rhel_7_6[]; + extern const size_t hw_compat_rhel_7_6_len; + +-- +1.8.3.1 + diff --git a/kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch b/kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch new file mode 100644 index 0000000..9c65c32 --- /dev/null +++ b/kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch @@ -0,0 +1,73 @@ +From 89f01dab29d3c9a833c9c36a8f2151b6f97696c7 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 24 May 2019 12:13:04 +0100 +Subject: [PATCH 4/4] redhat: define pseries-rhel8.1.0 machine type + +RH-Author: Laurent Vivier +Message-id: <20190524121304.15530-5-lvivier@redhat.com> +Patchwork-id: 88203 +O-Subject: [RHEL-AV qemu-kvm PATCH v4 4/4] redhat: define pseries-rhel8.1.0 machine type +Bugzilla: 1709726 +RH-Acked-by: David Gibson +RH-Acked-by: Cornelia Huck +RH-Acked-by: Dr. David Alan Gilbert + +update pseries-rhel8.0.0 like pseries-3.1 except +for SPAPR_CAP_CFPC, SPAPR_CAP_SBBC, SPAPR_CAP_IBS +that have already been backported. + +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 28 ++++++++++++++++++++++++++-- + 1 file changed, 26 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 67c9e01..8adcb97 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4670,15 +4670,39 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); + #endif + + /* ++ * pseries-rhel8.1.0 ++ */ ++ ++static void spapr_machine_rhel810_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", true); ++ ++/* + * pseries-rhel8.0.0 ++ * like spapr_compat_3_1 ++ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS ++ * that have been backported to pseries-rhel8.0.0 + */ + + static void spapr_machine_rhel800_class_options(MachineClass *mc) + { +- /* Defaults for the latest behaviour inherited from the base class */ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel810_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, ++ hw_compat_rhel_8_0_len); ++ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ smc->update_dt_enabled = false; ++ smc->dr_phb_enabled = false; ++ smc->broken_host_serial_model = true; ++ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; + } + +-DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", true); ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); + + /* + * pseries-rhel7.6.0 +-- +1.8.3.1 + diff --git a/kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch b/kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch new file mode 100644 index 0000000..f039cc5 --- /dev/null +++ b/kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch @@ -0,0 +1,104 @@ +From 479ad3075d0d2832f5327b917c8279feaae5f6d3 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 24 May 2019 12:13:01 +0100 +Subject: [PATCH 1/4] redhat: fix cut'n'paste garbage in hw_compat comments +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +Message-id: <20190524121304.15530-2-lvivier@redhat.com> +Patchwork-id: 88200 +O-Subject: [RHEL-AV qemu-kvm PATCH v4 1/4] redhat: fix cut'n'paste garbage in hw_compat comments +Bugzilla: 1709726 +RH-Acked-by: David Gibson +RH-Acked-by: Cornelia Huck +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Philippe Mathieu-Daudé + +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 38 +++++++++++++++++++------------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index fd1594d..e808151 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -25,12 +25,12 @@ + #include "hw/mem/nvdimm.h" + + /* Mostly like hw_compat_2_1 but: +- * * we don't need virtio-scsi-pci since 7.0 already had that on +- * * +- * * RH: Note, qemu-extended-regs should have been enabled in the 7.1 +- * * machine type, but was accidentally turned off in 7.2 onwards. +- * * +- * */ ++ * we don't need virtio-scsi-pci since 7.0 already had that on ++ * ++ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * machine type, but was accidentally turned off in 7.2 onwards. ++ * ++ */ + GlobalProperty hw_compat_rhel_7_1[] = { + { /* COMPAT_RHEL7.1 */ + .driver = "intel-hda-generic", +@@ -81,8 +81,8 @@ GlobalProperty hw_compat_rhel_7_1[] = { + const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + + /* Mostly like hw_compat_2_4 + 2_3 but: +- * * we don't need "any_layout" as it has been backported to 7.2 +- * */ ++ * we don't need "any_layout" as it has been backported to 7.2 ++ */ + + GlobalProperty hw_compat_rhel_7_2[] = { + { +@@ -144,9 +144,9 @@ GlobalProperty hw_compat_rhel_7_2[] = { + const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); + + /* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except +- * * disable-modern, disable-legacy, page-per-vq have already been +- * * backported to RHEL7.3 +- * */ ++ * disable-modern, disable-legacy, page-per-vq have already been ++ * backported to RHEL7.3 ++ */ + GlobalProperty hw_compat_rhel_7_3[] = { + { /* HW_COMPAT_RHEL7_3 */ + .driver = "virtio-mmio", +@@ -209,9 +209,9 @@ GlobalProperty hw_compat_rhel_7_3[] = { + const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); + + /* Mostly like hw_compat_2_9 except +- * * x-mtu-bypass-backend, x-migrate-msix has already been +- * * backported to RHEL7.4. shpc was already on in 7.4. +- * */ ++ * x-mtu-bypass-backend, x-migrate-msix has already been ++ * backported to RHEL7.4. shpc was already on in 7.4. ++ */ + GlobalProperty hw_compat_rhel_7_4[] = { + { /* HW_COMPAT_RHEL7_4 */ + .driver = "intel-iommu", +@@ -261,11 +261,11 @@ GlobalProperty hw_compat_rhel_7_5[] = { + const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); + + /* The same as hw_compat_3_0 + hw_compat_2_12 +- * * except that +- * * there's nothing in 3_0 +- * * migration.decompress-error-check=off was in 7.5 from bz 1584139 +- * * +- * */ ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ * ++ */ + GlobalProperty hw_compat_rhel_7_6[] = { + { /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ + .driver = "hda-audio", +-- +1.8.3.1 + diff --git a/kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch b/kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch new file mode 100644 index 0000000..3d890ba --- /dev/null +++ b/kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch @@ -0,0 +1,54 @@ +From 2511c637a95e2a5f2a471c03a4dafcb14fbe4db8 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 24 May 2019 12:13:03 +0100 +Subject: [PATCH 3/4] redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 + +RH-Author: Laurent Vivier +Message-id: <20190524121304.15530-4-lvivier@redhat.com> +Patchwork-id: 88199 +O-Subject: [RHEL-AV qemu-kvm PATCH v4 3/4] redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 +Bugzilla: 1709726 +RH-Acked-by: David Gibson +RH-Acked-by: Cornelia Huck +RH-Acked-by: Dr. David Alan Gilbert + +The default values for SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and +SPAPR_CAP_IBS are SPAPR_CAP_WORKAROUND in pseries-4.0 and +SPAPR_CAP_BROKEN in pseries-3.1, but the change has been +backported to pseries-rhel8.0.0 in rhel-av-8.0.1 so it has +to be reset to SPAPR_CAP_BROKEN in pseries-rhe7.6.0 rather +than in pseries-rhel8.0.0. + +This patch also removes default_cpu_type to POWER8 as it will +be set in pseries-rhel8.0.0 (POWER9 appears with pseries-4.0) + +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index daa5920..67c9e01 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4719,8 +4719,14 @@ static void spapr_machine_rhel760_class_options(MachineClass *mc) + * yet. Postpone this to machine init (see default_caps_with_cpu()). + */ + smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; +- /* Defaults for the latest behaviour inherited from the base class */ +- mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ ++ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by ++ * f21757edc554 ++ * "Enable mitigations by default for pseries-4.0 machine type") ++ */ ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; + } + + DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index d4ebc85..4701316 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.0.0 -Release: 2%{?dist} +Release: 3%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -130,6 +130,14 @@ Patch0022: 0022-redhat-Post-rebase-synchronization.patch # For bz#1703310 - CVE-2018-12127 virt:8.0.0/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-av-8] # For bz#1707274 - CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0] Patch23: kvm-target-i386-define-md-clear-bit.patch +# For bz#1709726 - Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" +Patch24: kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch +# For bz#1709726 - Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" +Patch25: kvm-compat-Generic-hw_compat_rhel_8_0.patch +# For bz#1709726 - Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" +Patch26: kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch +# For bz#1709726 - Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" +Patch27: kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1056,6 +1064,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue May 28 2019 Danilo Cesar Lemes de Paula - 4.0.0-3.el8 +- kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch [bz#1709726] +- kvm-compat-Generic-hw_compat_rhel_8_0.patch [bz#1709726] +- kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch [bz#1709726] +- kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch [bz#1709726] +- Resolves: bz#1709726 + (Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + * Sat May 25 2019 Danilo Cesar Lemes de Paula - 4.0.0-2.el8 - kvm-target-i386-define-md-clear-bit.patch [bz#1703297 bz#1703304 bz#1703310 bz#1707274] - Resolves: bz#1703297 From 0544bace3c4a87653491ec8bab337fd30e365455 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 11 Jun 2019 22:15:20 +0100 Subject: [PATCH 037/195] * Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 4.0.0-4.el8 - kvm-Disable-VXHS-support.patch [bz#1714937] - kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch [bz#1713735] - kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch [bz#1713735] - kvm-usb-call-reset-handler-before-updating-state.patch [bz#1713679] - kvm-usb-host-skip-reset-for-untouched-devices.patch [bz#1713679] - kvm-usb-host-avoid-libusb_set_configuration-calls.patch [bz#1713679] - kvm-aarch64-Compile-out-IOH3420.patch [bz#1627283] - kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch [bz#1714891] - kvm-vl-Document-why-objects-are-delayed.patch [bz#1714891] - Resolves: bz#1627283 (Compile out IOH3420 on aarch64) - Resolves: bz#1713679 (Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU) - Resolves: bz#1713735 (Allow ARM VIRT iommu option in RHEL8.1 machine) - Resolves: bz#1714891 (Guest with persistent reservation manager for a disk fails to start) - Resolves: bz#1714937 (Disable VXHS support) --- kvm-Disable-VXHS-support.patch | 293 ++++++++++++++++++ ...-virt-rhel8.1.0-machine-type-for-ARM.patch | 56 ++++ ...M-VIRT-iommu-option-in-RHEL8.1-machi.patch | 66 ++++ kvm-aarch64-Compile-out-IOH3420.patch | 54 ++++ ...-reset-handler-before-updating-state.patch | 47 +++ ...avoid-libusb_set_configuration-calls.patch | 68 ++++ ...ost-skip-reset-for-untouched-devices.patch | 46 +++ kvm-vl-Document-why-objects-are-delayed.patch | 66 ++++ ...ckdev-persistent-reservation-managem.patch | 66 ++++ qemu-kvm.spec | 52 +++- 10 files changed, 803 insertions(+), 11 deletions(-) create mode 100644 kvm-Disable-VXHS-support.patch create mode 100644 kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch create mode 100644 kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch create mode 100644 kvm-aarch64-Compile-out-IOH3420.patch create mode 100644 kvm-usb-call-reset-handler-before-updating-state.patch create mode 100644 kvm-usb-host-avoid-libusb_set_configuration-calls.patch create mode 100644 kvm-usb-host-skip-reset-for-untouched-devices.patch create mode 100644 kvm-vl-Document-why-objects-are-delayed.patch create mode 100644 kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch diff --git a/kvm-Disable-VXHS-support.patch b/kvm-Disable-VXHS-support.patch new file mode 100644 index 0000000..75a89ad --- /dev/null +++ b/kvm-Disable-VXHS-support.patch @@ -0,0 +1,293 @@ +From 517e4a2f490ff56c8b3fbf9a56c8d4a6e167c2b6 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 29 May 2019 16:09:59 +0100 +Subject: [PATCH 1/9] Disable VXHS support + +RH-Author: Miroslav Rezanina +Message-id: <1559146199-30110-1-git-send-email-mrezanin@redhat.com> +Patchwork-id: 88273 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] Disable VXHS support +Bugzilla: 1714937 +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula + +From: Miroslav Rezanina + +As we ended our partnership with Veritas, we do not support VXHS anymore. +Reverting our downstream changes included in "Initial redhat commit". + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + block/Makefile.objs | 2 +- + block/vxhs.c | 119 ++++-------------------------------------- + configure | 33 ++++++++++-- + redhat/qemu-kvm.spec.template | 11 +--- + 4 files changed, 41 insertions(+), 124 deletions(-) + +diff --git a/block/Makefile.objs b/block/Makefile.objs +index f4cf03b..7a81892 100644 +--- a/block/Makefile.objs ++++ b/block/Makefile.objs +@@ -30,7 +30,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o + block-obj-$(CONFIG_CURL) += curl.o + block-obj-$(CONFIG_RBD) += rbd.o + block-obj-$(CONFIG_GLUSTERFS) += gluster.o +-#block-obj-$(CONFIG_VXHS) += vxhs.o ++block-obj-$(CONFIG_VXHS) += vxhs.o + block-obj-$(CONFIG_LIBSSH2) += ssh.o + block-obj-y += accounting.o dirty-bitmap.o + block-obj-y += write-threshold.o +diff --git a/block/vxhs.c b/block/vxhs.c +index 3dbb954..2e18229 100644 +--- a/block/vxhs.c ++++ b/block/vxhs.c +@@ -9,8 +9,7 @@ + */ + + #include "qemu/osdep.h" +-#include "block/vxhs_shim.h" +-#include ++#include + #include + #include "block/block_int.h" + #include "block/qdict.h" +@@ -60,97 +59,6 @@ typedef struct BDRVVXHSState { + char *tlscredsid; /* tlscredsid */ + } BDRVVXHSState; + +-#define LIBVXHS_FULL_PATHNAME "/usr/lib64/qemu/libvxhs.so.1" +-static bool libvxhs_loaded; +-static GModule *libvxhs_handle; +- +-static LibVXHSFuncs libvxhs; +- +-typedef struct LibVXHSSymbols { +- const char *name; +- gpointer *addr; +-} LibVXHSSymbols; +- +-static LibVXHSSymbols libvxhs_symbols[] = { +- {"iio_init", (gpointer *) &libvxhs.iio_init}, +- {"iio_fini", (gpointer *) &libvxhs.iio_fini}, +- {"iio_min_version", (gpointer *) &libvxhs.iio_min_version}, +- {"iio_max_version", (gpointer *) &libvxhs.iio_max_version}, +- {"iio_open", (gpointer *) &libvxhs.iio_open}, +- {"iio_close", (gpointer *) &libvxhs.iio_close}, +- {"iio_writev", (gpointer *) &libvxhs.iio_writev}, +- {"iio_readv", (gpointer *) &libvxhs.iio_readv}, +- {"iio_ioctl", (gpointer *) &libvxhs.iio_ioctl}, +- {NULL} +-}; +- +-static void bdrv_vxhs_set_funcs(GModule *handle, Error **errp) +-{ +- int i = 0; +- while (libvxhs_symbols[i].name) { +- const char *name = libvxhs_symbols[i].name; +- if (!g_module_symbol(handle, name, libvxhs_symbols[i].addr)) { +- error_setg(errp, "%s could not be loaded from libvxhs: %s", +- name, g_module_error()); +- return; +- } +- ++i; +- } +-} +- +-static void bdrv_vxhs_load_libs(Error **errp) +-{ +- Error *local_err = NULL; +- int32_t ver; +- +- if (libvxhs_loaded) { +- return; +- } +- +- if (!g_module_supported()) { +- error_setg(errp, "modules are not supported on this platform: %s", +- g_module_error()); +- return; +- } +- +- libvxhs_handle = g_module_open(LIBVXHS_FULL_PATHNAME, +- G_MODULE_BIND_LAZY | G_MODULE_BIND_LOCAL); +- if (!libvxhs_handle) { +- error_setg(errp, "The VXHS library from Veritas might not be installed " +- "correctly (%s)", g_module_error()); +- return; +- } +- +- g_module_make_resident(libvxhs_handle); +- +- bdrv_vxhs_set_funcs(libvxhs_handle, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- return; +- } +- +- /* Now check to see if the libvxhs we are using here is supported +- * by the loaded version */ +- +- ver = (*libvxhs.iio_min_version)(); +- if (ver > QNIO_VERSION) { +- error_setg(errp, "Trying to use libvxhs version %"PRId32" API, but " +- "only %"PRId32" or newer is supported by %s", +- QNIO_VERSION, ver, LIBVXHS_FULL_PATHNAME); +- return; +- } +- +- ver = (*libvxhs.iio_max_version)(); +- if (ver < QNIO_VERSION) { +- error_setg(errp, "Trying to use libvxhs version %"PRId32" API, but " +- "only %"PRId32" or earlier is supported by %s", +- QNIO_VERSION, ver, LIBVXHS_FULL_PATHNAME); +- return; +- } +- +- libvxhs_loaded = true; +-} +- + static void vxhs_complete_aio_bh(void *opaque) + { + VXHSAIOCB *acb = opaque; +@@ -318,7 +226,7 @@ static void vxhs_refresh_limits(BlockDriverState *bs, Error **errp) + static int vxhs_init_and_ref(void) + { + if (vxhs_ref++ == 0) { +- if ((*libvxhs.iio_init)(QNIO_VERSION, vxhs_iio_callback)) { ++ if (iio_init(QNIO_VERSION, vxhs_iio_callback)) { + return -ENODEV; + } + } +@@ -328,7 +236,7 @@ static int vxhs_init_and_ref(void) + static void vxhs_unref(void) + { + if (--vxhs_ref == 0) { +- (*libvxhs.iio_fini)(); ++ iio_fini(); + } + } + +@@ -398,17 +306,8 @@ static int vxhs_open(BlockDriverState *bs, QDict *options, + char *client_key = NULL; + char *client_cert = NULL; + +- bdrv_vxhs_load_libs(&local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- /* on error, cannot cleanup because the iio_fini() function +- * is not loaded */ +- return -EINVAL; +- } +- + ret = vxhs_init_and_ref(); + if (ret < 0) { +- error_setg(&local_err, "libvxhs iio_init() failed"); + ret = -EINVAL; + goto out; + } +@@ -493,8 +392,8 @@ static int vxhs_open(BlockDriverState *bs, QDict *options, + /* + * Open qnio channel to storage agent if not opened before + */ +- dev_handlep = (*libvxhs.iio_open)(of_vsa_addr, s->vdisk_guid, 0, +- cacert, client_key, client_cert); ++ dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0, ++ cacert, client_key, client_cert); + if (dev_handlep == NULL) { + trace_vxhs_open_iio_open(of_vsa_addr); + ret = -ENODEV; +@@ -554,11 +453,11 @@ static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, uint64_t offset, + + switch (iodir) { + case VDISK_AIO_WRITE: +- ret = (*libvxhs.iio_writev)(dev_handle, acb, qiov->iov, qiov->niov, ++ ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov, + offset, size, iio_flags); + break; + case VDISK_AIO_READ: +- ret = (*libvxhs.iio_writev)(dev_handle, acb, qiov->iov, qiov->niov, ++ ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov, + offset, size, iio_flags); + break; + default: +@@ -607,7 +506,7 @@ static void vxhs_close(BlockDriverState *bs) + * Close vDisk device + */ + if (s->vdisk_hostinfo.dev_handle) { +- (*libvxhs.iio_close)(s->vdisk_hostinfo.dev_handle); ++ iio_close(s->vdisk_hostinfo.dev_handle); + s->vdisk_hostinfo.dev_handle = NULL; + } + +@@ -629,7 +528,7 @@ static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s) + int ret = 0; + void *dev_handle = s->vdisk_hostinfo.dev_handle; + +- ret = (*libvxhs.iio_ioctl)(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); ++ ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); + if (ret < 0) { + trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); + return -EIO; +diff --git a/configure b/configure +index d6d5912..8cb6740 100755 +--- a/configure ++++ b/configure +@@ -3616,7 +3616,7 @@ fi + + glib_req_ver=2.40 + glib_modules=gthread-2.0 +-if test "$modules" = yes -o "$vxhs" = yes; then ++if test "$modules" = yes; then + glib_modules="$glib_modules gmodule-export-2.0" + fi + +@@ -5760,6 +5760,33 @@ if compile_prog "" "" ; then + fi + + ########################################## ++# Veritas HyperScale block driver VxHS ++# Check if libvxhs is installed ++ ++if test "$vxhs" != "no" ; then ++ cat > $TMPC < ++#include ++ ++void *vxhs_callback; ++ ++int main(void) { ++ iio_init(QNIO_VERSION, vxhs_callback); ++ return 0; ++} ++EOF ++ vxhs_libs="-lvxhs -lssl" ++ if compile_prog "" "$vxhs_libs" ; then ++ vxhs=yes ++ else ++ if test "$vxhs" = "yes" ; then ++ feature_not_found "vxhs block device" "Install libvxhs See github" ++ fi ++ vxhs=no ++ fi ++fi ++ ++########################################## + # check for _Static_assert() + + have_static_assert=no +@@ -7195,8 +7222,8 @@ elif test "$pthread_setname_np_wo_tid" = "yes" ; then + fi + + if test "$vxhs" = "yes" ; then +- echo "CONFIG_VXHS=m" >> $config_host_mak +- echo "VXHS_LIBS= -lssl" >> $config_host_mak ++ echo "CONFIG_VXHS=y" >> $config_host_mak ++ echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak + fi + + if test "$libpmem" = "yes" ; then +-- +1.8.3.1 + diff --git a/kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch b/kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch new file mode 100644 index 0000000..db57f7b --- /dev/null +++ b/kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch @@ -0,0 +1,56 @@ +From c3e39ef14e99f903b95fa051936f40ebd6c35484 Mon Sep 17 00:00:00 2001 +From: Auger Eric +Date: Mon, 3 Jun 2019 19:17:39 +0100 +Subject: [PATCH 2/9] aarch64: Add virt-rhel8.1.0 machine type for ARM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Auger Eric +Message-id: <20190603191740.32665-2-eric.auger@redhat.com> +Patchwork-id: 88461 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] aarch64: Add virt-rhel8.1.0 machine type for ARM +Bugzilla: 1713735 +RH-Acked-by: Andrew Jones +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +This patch adds a new machine type, virt-rhel8.1.0, for QEMU rhel-av. +This machine type is based off 4.0 with the exception that it removes +support for dynamic sysbus devices: VFIO_CALXEDA_XGMAC, VFIO_AMD_XGBE, and +RAMFB_DEVICE because downstream RHEL doesn't support these devices. IOMMU +instantiation still is disabled but will be enabled in subsequent patches. + +Signed-off-by: Eric Auger +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 5602d9f..9316a8d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2158,7 +2158,7 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + +-static void rhel800_virt_instance_init(Object *obj) ++static void rhel810_virt_instance_init(Object *obj) + { + VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); +@@ -2204,8 +2204,8 @@ static void rhel800_virt_instance_init(Object *obj) + vms->irqmap=a15irqmap; + } + +-static void rhel800_virt_options(MachineClass *mc) ++static void rhel810_virt_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 0, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 1, 0) +-- +1.8.3.1 + diff --git a/kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch b/kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch new file mode 100644 index 0000000..d25318a --- /dev/null +++ b/kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch @@ -0,0 +1,66 @@ +From 59a46d11cad715b082d30c0de33317466a9bab9e Mon Sep 17 00:00:00 2001 +From: Auger Eric +Date: Mon, 3 Jun 2019 19:17:40 +0100 +Subject: [PATCH 3/9] aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Auger Eric +Message-id: <20190603191740.32665-3-eric.auger@redhat.com> +Patchwork-id: 88460 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine +Bugzilla: 1713735 +RH-Acked-by: Andrew Jones +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +This patch restores the ARM VIRT iommu option historically +introduced in qemu 3.0 and disabled in rhel8.0 (RHBZ#1656504 / +commit 7bfdb4cd2b49) as we did not have support of the feature +at libvirt level. + +Signed-off-by: Eric Auger +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 9316a8d..670fa10 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1785,7 +1785,6 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) + } + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static char *virt_get_iommu(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -1813,7 +1812,6 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) + error_append_hint(errp, "Valid values are none, smmuv3.\n"); + } + } +-#endif /* disabled for RHEL */ + + static CpuInstanceProperties + virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) +@@ -2198,8 +2196,13 @@ static void rhel810_virt_instance_init(Object *obj) + NULL); + } + +- /* IOMMU is disabled by default and non-configurable for RHEL */ ++ /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; ++ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu, NULL); ++ object_property_set_description(obj, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3", ++ NULL); + + vms->irqmap=a15irqmap; + } +-- +1.8.3.1 + diff --git a/kvm-aarch64-Compile-out-IOH3420.patch b/kvm-aarch64-Compile-out-IOH3420.patch new file mode 100644 index 0000000..baf434c --- /dev/null +++ b/kvm-aarch64-Compile-out-IOH3420.patch @@ -0,0 +1,54 @@ +From 59097210f25bfcabc82c9989d3c083a6febeadd6 Mon Sep 17 00:00:00 2001 +From: Auger Eric +Date: Tue, 4 Jun 2019 15:23:26 +0100 +Subject: [PATCH 7/9] aarch64: Compile out IOH3420 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Auger Eric +Message-id: <20190604152326.18510-1-eric.auger@redhat.com> +Patchwork-id: 88525 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2] aarch64: Compile out IOH3420 +Bugzilla: 1627283 +RH-Acked-by: Andrew Jones +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Laszlo Ersek + +BZ: 1627283 +BRANCH: rhel-av-8.1.0/master-4.0.0 +UPSTREAM: N/A +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=21989313 +TEST: On Sabre, no ioh3420 available + +IOH3420 is not used on aarch64 or arm as we prefer the generic root port +so let's compile it out. + +Signed-off-by: Eric Auger + +--- + +v1 -> v2 +- do not touch arm-softmmu.mak + +Signed-off-by: Danilo C. L. de Paula +--- + default-configs/aarch64-rh-devices.mak | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak +index 13ce7c7..224e29b 100644 +--- a/default-configs/aarch64-rh-devices.mak ++++ b/default-configs/aarch64-rh-devices.mak +@@ -12,7 +12,6 @@ CONFIG_EDID=y + CONFIG_FW_CFG_DMA=y + CONFIG_GPIO_KEY=y + CONFIG_I2C=y +-CONFIG_IOH3420=y + CONFIG_IVSHMEM=y + CONFIG_KVM=y + CONFIG_LINUX=y +-- +1.8.3.1 + diff --git a/kvm-usb-call-reset-handler-before-updating-state.patch b/kvm-usb-call-reset-handler-before-updating-state.patch new file mode 100644 index 0000000..53806c6 --- /dev/null +++ b/kvm-usb-call-reset-handler-before-updating-state.patch @@ -0,0 +1,47 @@ +From 646f497c8e1dcea5c1ec8731693e3f06be8f6cc2 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Tue, 4 Jun 2019 07:13:39 +0100 +Subject: [PATCH 4/9] usb: call reset handler before updating state +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gerd Hoffmann +Message-id: <20190604071341.3432-2-kraxel@redhat.com> +Patchwork-id: 88478 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/3] usb: call reset handler before updating state +Bugzilla: 1713679 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Max Reitz + +That way the device reset handler can see what +the before-reset state of the device is. + +Signed-off-by: Gerd Hoffmann +Message-id: 20190522094702.17619-2-kraxel@redhat.com +(cherry picked from commit 7ed4657396add28382081a15557c78cd480c1cf1) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/usb/core.c b/hw/usb/core.c +index 8fbd9c7..3ab48a1 100644 +--- a/hw/usb/core.c ++++ b/hw/usb/core.c +@@ -87,10 +87,10 @@ void usb_device_reset(USBDevice *dev) + if (dev == NULL || !dev->attached) { + return; + } ++ usb_device_handle_reset(dev); + dev->remote_wakeup = 0; + dev->addr = 0; + dev->state = USB_STATE_DEFAULT; +- usb_device_handle_reset(dev); + } + + void usb_wakeup(USBEndpoint *ep, unsigned int stream) +-- +1.8.3.1 + diff --git a/kvm-usb-host-avoid-libusb_set_configuration-calls.patch b/kvm-usb-host-avoid-libusb_set_configuration-calls.patch new file mode 100644 index 0000000..cfde8ce --- /dev/null +++ b/kvm-usb-host-avoid-libusb_set_configuration-calls.patch @@ -0,0 +1,68 @@ +From a92cfe1254f947c32f15ebb1f81b825076c5625e Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Tue, 4 Jun 2019 07:13:41 +0100 +Subject: [PATCH 6/9] usb-host: avoid libusb_set_configuration calls +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gerd Hoffmann +Message-id: <20190604071341.3432-4-kraxel@redhat.com> +Patchwork-id: 88477 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 3/3] usb-host: avoid libusb_set_configuration calls +Bugzilla: 1713679 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Max Reitz + +Seems some devices become confused when we call +libusb_set_configuration(). So before calling the function check +whenever the device has multiple configurations in the first place, and +in case it hasn't (which is the case for the majority of devices) simply +skip the call as it will have no effect anyway. + +Signed-off-by: Gerd Hoffmann +Message-id: 20190522094702.17619-4-kraxel@redhat.com +(cherry picked from commit bfe44898848614cfcb3a269bc965afbe1f0f331c) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/host-libusb.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index 4e9a45a..4f765d7 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -1225,19 +1225,21 @@ static void usb_host_set_address(USBHostDevice *s, int addr) + + static void usb_host_set_config(USBHostDevice *s, int config, USBPacket *p) + { +- int rc; ++ int rc = 0; + + trace_usb_host_set_config(s->bus_num, s->addr, config); + + usb_host_release_interfaces(s); +- rc = libusb_set_configuration(s->dh, config); +- if (rc != 0) { +- usb_host_libusb_error("libusb_set_configuration", rc); +- p->status = USB_RET_STALL; +- if (rc == LIBUSB_ERROR_NO_DEVICE) { +- usb_host_nodev(s); ++ if (s->ddesc.bNumConfigurations != 1) { ++ rc = libusb_set_configuration(s->dh, config); ++ if (rc != 0) { ++ usb_host_libusb_error("libusb_set_configuration", rc); ++ p->status = USB_RET_STALL; ++ if (rc == LIBUSB_ERROR_NO_DEVICE) { ++ usb_host_nodev(s); ++ } ++ return; + } +- return; + } + p->status = usb_host_claim_interfaces(s, config); + if (p->status != USB_RET_SUCCESS) { +-- +1.8.3.1 + diff --git a/kvm-usb-host-skip-reset-for-untouched-devices.patch b/kvm-usb-host-skip-reset-for-untouched-devices.patch new file mode 100644 index 0000000..9968baf --- /dev/null +++ b/kvm-usb-host-skip-reset-for-untouched-devices.patch @@ -0,0 +1,46 @@ +From 507b4bb7a63544c72e8ef1713ada9ff7c2e0024f Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Tue, 4 Jun 2019 07:13:40 +0100 +Subject: [PATCH 5/9] usb-host: skip reset for untouched devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gerd Hoffmann +Message-id: <20190604071341.3432-3-kraxel@redhat.com> +Patchwork-id: 88479 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/3] usb-host: skip reset for untouched devices +Bugzilla: 1713679 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Max Reitz + +If the guest didn't talk to the device yet, skip the reset. +Without this usb-host devices get resetted a number of times +at boot time for no good reason. + +Signed-off-by: Gerd Hoffmann +Message-id: 20190522094702.17619-3-kraxel@redhat.com +(cherry picked from commit 65f14ab98da1da920f98ee8734dc1588b01d6b2b) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/host-libusb.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index 67b7465..4e9a45a 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -1459,6 +1459,9 @@ static void usb_host_handle_reset(USBDevice *udev) + if (!s->allow_guest_reset) { + return; + } ++ if (udev->addr == 0) { ++ return; ++ } + + trace_usb_host_reset(s->bus_num, s->addr); + +-- +1.8.3.1 + diff --git a/kvm-vl-Document-why-objects-are-delayed.patch b/kvm-vl-Document-why-objects-are-delayed.patch new file mode 100644 index 0000000..7cd695d --- /dev/null +++ b/kvm-vl-Document-why-objects-are-delayed.patch @@ -0,0 +1,66 @@ +From d5a193a4b838b00b064d441da5c038a5ff251f8b Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Thu, 6 Jun 2019 19:31:39 +0100 +Subject: [PATCH 9/9] vl: Document why objects are delayed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Markus Armbruster +Message-id: <20190606193139.31976-3-armbru@redhat.com> +Patchwork-id: 88610 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] vl: Document why objects are delayed +Bugzilla: 1714891 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Philippe Mathieu-Daudé + +Objects should not be "delayed" without a reason, as the previous +commit demonstrates. The remaining ones have reasons. State them. +and demand future ones come with such a statement. + +Signed-off-by: Markus Armbruster +Message-Id: <20190604151251.9903-3-armbru@redhat.com> +Reviewed-by: Michal Privoznik +Signed-off-by: Paolo Bonzini +(cherry picked from commit edfb4389c26cbfd873707306024130bda6049780) +Signed-off-by: Danilo C. L. de Paula +--- + vl.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/vl.c b/vl.c +index 627e37d..686c639 100644 +--- a/vl.c ++++ b/vl.c +@@ -2852,19 +2852,25 @@ static bool object_create_initial(const char *type, QemuOpts *opts) + exit(0); + } + ++ /* ++ * Objects should not be made "delayed" without a reason. If you ++ * add one, state the reason in a comment! ++ */ ++ ++ /* Reason: rng-egd property "chardev" */ + if (g_str_equal(type, "rng-egd")) { + return false; + } + + #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) ++ /* Reason: cryptodev-vhost-user property "chardev" */ + if (g_str_equal(type, "cryptodev-vhost-user")) { + return false; + } + #endif + + /* +- * return false for concrete netfilters since +- * they depend on netdevs already existing ++ * Reason: filter-* property "netdev" etc. + */ + if (g_str_equal(type, "filter-buffer") || + g_str_equal(type, "filter-dump") || +-- +1.8.3.1 + diff --git a/kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch b/kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch new file mode 100644 index 0000000..5f9841f --- /dev/null +++ b/kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch @@ -0,0 +1,66 @@ +From 6473a5d45b4ae75d5eef64b7b5dcd6735f498fb3 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Thu, 6 Jun 2019 19:31:38 +0100 +Subject: [PATCH 8/9] vl: Fix -drive / -blockdev persistent reservation + management + +RH-Author: Markus Armbruster +Message-id: <20190606193139.31976-2-armbru@redhat.com> +Patchwork-id: 88609 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] vl: Fix -drive / -blockdev persistent reservation management +Bugzilla: 1714891 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eduardo Habkost + +qemu-system-FOO's main() acts on command line arguments in its own +idiosyncratic order. There's not much method to its madness. +Whenever we find a case where one kind of command line argument needs +to refer to something created for another kind later, we rejigger the +order. + +Recent commit cda4aa9a5a "vl: Create block backends before setting +machine properties" was such a rejigger. Block backends are now +created before "delayed" objects. This broke persistent reservation +management. Reproducer: + + $ qemu-system-x86_64 -object pr-manager-helper,id=pr-helper0,path=/tmp/pr-helper0.sock-drive -drive file=/dev/mapper/crypt,file.pr-manager=pr-helper0,format=raw,if=none,id=drive-scsi0-0-0-2 + qemu-system-x86_64: -drive file=/dev/mapper/crypt,file.pr-manager=pr-helper0,format=raw,if=none,id=drive-scsi0-0-0-2: No persistent reservation manager with id 'pr-helper0' + +The delayed pr-manager-helper object is created too late for use by +-drive or -blockdev. Normal objects are still created in time. + +pr-manager-helper has always been a delayed object (commit 7c9e527659 +"scsi, file-posix: add support for persistent reservation +management"). Turns out there's no real reason for that. Make it a +normal object. + +Fixes: cda4aa9a5a08777cf13e164c0543bd4888b8adce +Signed-off-by: Markus Armbruster +Message-Id: <20190604151251.9903-2-armbru@redhat.com> +Reviewed-by: Michal Privoznik +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9ea18ed25a36527167e9676f25d983df5e7f76e6) +Signed-off-by: Danilo C. L. de Paula +--- + vl.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/vl.c b/vl.c +index 2b95925..627e37d 100644 +--- a/vl.c ++++ b/vl.c +@@ -2852,8 +2852,7 @@ static bool object_create_initial(const char *type, QemuOpts *opts) + exit(0); + } + +- if (g_str_equal(type, "rng-egd") || +- g_str_has_prefix(type, "pr-manager-")) { ++ if (g_str_equal(type, "rng-egd")) { + return false; + } + +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4701316..a8fa99d 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -8,7 +8,6 @@ %global have_gluster 1 %global have_kvm_setup 0 %global have_memlock_limits 0 -%global have_vxhs 0 %ifnarch %{ix86} x86_64 %global have_usbredir 0 @@ -25,7 +24,6 @@ %endif %ifarch x86_64 %global kvm_target x86_64 - %global have_vxhs 1 %else %global have_spice 0 %global have_opengl 0 @@ -69,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.0.0 -Release: 3%{?dist} +Release: 4%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -138,6 +136,24 @@ Patch25: kvm-compat-Generic-hw_compat_rhel_8_0.patch Patch26: kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch # For bz#1709726 - Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" Patch27: kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch +# For bz#1714937 - Disable VXHS support +Patch28: kvm-Disable-VXHS-support.patch +# For bz#1713735 - Allow ARM VIRT iommu option in RHEL8.1 machine +Patch29: kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch +# For bz#1713735 - Allow ARM VIRT iommu option in RHEL8.1 machine +Patch30: kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch +# For bz#1713679 - Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU +Patch31: kvm-usb-call-reset-handler-before-updating-state.patch +# For bz#1713679 - Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU +Patch32: kvm-usb-host-skip-reset-for-untouched-devices.patch +# For bz#1713679 - Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU +Patch33: kvm-usb-host-avoid-libusb_set_configuration-calls.patch +# For bz#1627283 - Compile out IOH3420 on aarch64 +Patch34: kvm-aarch64-Compile-out-IOH3420.patch +# For bz#1714891 - Guest with persistent reservation manager for a disk fails to start +Patch35: kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch +# For bz#1714891 - Guest with persistent reservation manager for a disk fails to start +Patch36: kvm-vl-Document-why-objects-are-delayed.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -430,9 +446,6 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle -%if 0%{have_vxhs} - %global block_drivers_list %{block_drivers_list},vxhs -%endif %if 0%{have_gluster} %global block_drivers_list %{block_drivers_list},gluster %endif @@ -492,11 +505,6 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-usb-redir \ %endif --disable-tcmalloc \ -%if 0%{have_vxhs} - --enable-vxhs \ -%else - --disable-vxhs \ -%endif %ifarch x86_64 --enable-libpmem \ %else @@ -543,6 +551,7 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --enable-trace-backend=dtrace \ --disable-vde \ --disable-vhost-scsi \ + --disable-vxhs \ --disable-virtfs \ --disable-vnc-jpeg \ --disable-vte \ @@ -1064,6 +1073,27 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 4.0.0-4.el8 +- kvm-Disable-VXHS-support.patch [bz#1714937] +- kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch [bz#1713735] +- kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch [bz#1713735] +- kvm-usb-call-reset-handler-before-updating-state.patch [bz#1713679] +- kvm-usb-host-skip-reset-for-untouched-devices.patch [bz#1713679] +- kvm-usb-host-avoid-libusb_set_configuration-calls.patch [bz#1713679] +- kvm-aarch64-Compile-out-IOH3420.patch [bz#1627283] +- kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch [bz#1714891] +- kvm-vl-Document-why-objects-are-delayed.patch [bz#1714891] +- Resolves: bz#1627283 + (Compile out IOH3420 on aarch64) +- Resolves: bz#1713679 + (Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU) +- Resolves: bz#1713735 + (Allow ARM VIRT iommu option in RHEL8.1 machine) +- Resolves: bz#1714891 + (Guest with persistent reservation manager for a disk fails to start) +- Resolves: bz#1714937 + (Disable VXHS support) + * Tue May 28 2019 Danilo Cesar Lemes de Paula - 4.0.0-3.el8 - kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch [bz#1709726] - kvm-compat-Generic-hw_compat_rhel_8_0.patch [bz#1709726] From 0ba0561a8b38983705bc7bf10cf77a3ed2e96a5b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 8 Jul 2019 14:20:34 +0200 Subject: [PATCH 038/195] * Mon Jul 08 2019 Miroslav Rezanina - 4.0.0-5.el8 - kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1720306] - kvm-qxl-check-release-info-object.patch [bz#1712717] - kvm-target-i386-add-MDS-NO-feature.patch [bz#1722839] - kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch [bz#1588356] - kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch [bz#1588356] - kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch [bz#1707118] - Resolves: bz#1588356 (qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img') - Resolves: bz#1707118 (enable device: bochs-display (QEMU)) - Resolves: bz#1712717 (CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-av-8]) - Resolves: bz#1720306 (VM failed to start with error "failed to install seccomp syscall filter in the kernel") - Resolves: bz#1722839 ([Intel 8.1 FEAT] MDS_NO exposure to guest - Fast Train) --- ...osix-Unaligned-O_DIRECT-block-status.patch | 98 ++++++++++ ...t-unaligned-raw-images-with-O_DIRECT.patch | 182 ++++++++++++++++++ kvm-qxl-check-release-info-object.patch | 51 +++++ ...h-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch | 43 +++++ kvm-target-i386-add-MDS-NO-feature.patch | 51 +++++ qemu-kvm.spec | 38 +++- 6 files changed, 460 insertions(+), 3 deletions(-) create mode 100644 kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch create mode 100644 kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch create mode 100644 kvm-qxl-check-release-info-object.patch create mode 100644 kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch create mode 100644 kvm-target-i386-add-MDS-NO-feature.patch diff --git a/kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch b/kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch new file mode 100644 index 0000000..68c7e07 --- /dev/null +++ b/kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch @@ -0,0 +1,98 @@ +From cff152749afe8b045db50fdd065756a217efa6e9 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Tue, 25 Jun 2019 21:07:09 +0200 +Subject: [PATCH 4/6] block/file-posix: Unaligned O_DIRECT block-status + +RH-Author: Max Reitz +Message-id: <20190625210710.20946-2-mreitz@redhat.com> +Patchwork-id: 88945 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] block/file-posix: Unaligned O_DIRECT block-status +Bugzilla: 1588356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: John Snow +RH-Acked-by: Stefano Garzarella + +Currently, qemu crashes whenever someone queries the block status of an +unaligned image tail of an O_DIRECT image: +$ echo > foo +$ qemu-img map --image-opts driver=file,filename=foo,cache.direct=on +Offset Length Mapped to File +qemu-img: block/io.c:2093: bdrv_co_block_status: Assertion `*pnum && +QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset' +failed. + +This is because bdrv_co_block_status() checks that the result returned +by the driver's implementation is aligned to the request_alignment, but +file-posix can fail to do so, which is actually mentioned in a comment +there: "[...] possibly including a partial sector at EOF". + +Fix this by rounding up those partial sectors. + +There are two possible alternative fixes: +(1) We could refuse to open unaligned image files with O_DIRECT + altogether. That sounds reasonable until you realize that qcow2 + does necessarily not fill up its metadata clusters, and that nobody + runs qemu-img create with O_DIRECT. Therefore, unpreallocated qcow2 + files usually have an unaligned image tail. + +(2) bdrv_co_block_status() could ignore unaligned tails. It actually + throws away everything past the EOF already, so that sounds + reasonable. + Unfortunately, the block layer knows file lengths only with a + granularity of BDRV_SECTOR_SIZE, so bdrv_co_block_status() usually + would have to guess whether its file length information is inexact + or whether the driver is broken. + +Fixing what raw_co_block_status() returns is the safest thing to do. + +There seems to be no other block driver that sets request_alignment and +does not make sure that it always returns aligned values. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 9c3db310ff0b7473272ae8dce5e04e2f8a825390) +Signed-off-by: Max Reitz +Signed-off-by: Miroslav Rezanina +--- + block/file-posix.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 1cf4ee4..c185f34 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2475,6 +2475,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, + off_t data = 0, hole = 0; + int ret; + ++ assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); ++ + ret = fd_open(bs); + if (ret < 0) { + return ret; +@@ -2500,6 +2502,20 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, + /* On a data extent, compute bytes to the end of the extent, + * possibly including a partial sector at EOF. */ + *pnum = MIN(bytes, hole - offset); ++ ++ /* ++ * We are not allowed to return partial sectors, though, so ++ * round up if necessary. ++ */ ++ if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) { ++ int64_t file_length = raw_getlength(bs); ++ if (file_length > 0) { ++ /* Ignore errors, this is just a safeguard */ ++ assert(hole == file_length); ++ } ++ *pnum = ROUND_UP(*pnum, bs->bl.request_alignment); ++ } ++ + ret = BDRV_BLOCK_DATA; + } else { + /* On a hole, compute bytes to the beginning of the next extent. */ +-- +1.8.3.1 + diff --git a/kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch b/kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch new file mode 100644 index 0000000..8c47593 --- /dev/null +++ b/kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch @@ -0,0 +1,182 @@ +From d986fc898ca8a20b486afe92dc0c7b370f482366 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Tue, 25 Jun 2019 21:07:10 +0200 +Subject: [PATCH 5/6] iotests: Test unaligned raw images with O_DIRECT + +RH-Author: Max Reitz +Message-id: <20190625210710.20946-3-mreitz@redhat.com> +Patchwork-id: 88946 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] iotests: Test unaligned raw images with O_DIRECT +Bugzilla: 1588356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: John Snow +RH-Acked-by: Stefano Garzarella + +We already have 221 for accesses through the page cache, but it is +better to create a new file for O_DIRECT instead of integrating those +test cases into 221. This way, we can make use of +_supported_cache_modes (and _default_cache_mode) so the test is +automatically skipped on filesystems that do not support O_DIRECT. + +As part of the split, add _supported_cache_modes to 221. With that, it +no longer fails when run with -c none or -c directsync. + +Signed-off-by: Max Reitz +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 2fab30c80b33cdc6157c7efe6207e54b6835cf92) +Signed-off-by: Max Reitz +Signed-off-by: Miroslav Rezanina +--- + tests/qemu-iotests/221 | 4 +++ + tests/qemu-iotests/253 | 84 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/253.out | 14 ++++++++ + tests/qemu-iotests/group | 1 + + 4 files changed, 103 insertions(+) + create mode 100755 tests/qemu-iotests/253 + create mode 100644 tests/qemu-iotests/253.out + +diff --git a/tests/qemu-iotests/221 b/tests/qemu-iotests/221 +index 808cd9a..92c9b13 100755 +--- a/tests/qemu-iotests/221 ++++ b/tests/qemu-iotests/221 +@@ -1,6 +1,7 @@ + #!/usr/bin/env bash + # + # Test qemu-img vs. unaligned images ++# (See also 253, which is the O_DIRECT version) + # + # Copyright (C) 2018 Red Hat, Inc. + # +@@ -37,6 +38,9 @@ _supported_fmt raw + _supported_proto file + _supported_os Linux + ++_default_cache_mode writeback ++_supported_cache_modes writeback writethrough unsafe ++ + echo + echo "=== Check mapping of unaligned raw image ===" + echo +diff --git a/tests/qemu-iotests/253 b/tests/qemu-iotests/253 +new file mode 100755 +index 0000000..d88d5af +--- /dev/null ++++ b/tests/qemu-iotests/253 +@@ -0,0 +1,84 @@ ++#!/usr/bin/env bash ++# ++# Test qemu-img vs. unaligned images; O_DIRECT version ++# (Originates from 221) ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++seq="$(basename $0)" ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt raw ++_supported_proto file ++_supported_os Linux ++ ++_default_cache_mode none ++_supported_cache_modes none directsync ++ ++echo ++echo "=== Check mapping of unaligned raw image ===" ++echo ++ ++# We do not know how large a physical sector is, but it is certainly ++# going to be a factor of 1 MB ++size=$((1 * 1024 * 1024 - 1)) ++ ++# qemu-img create rounds size up to BDRV_SECTOR_SIZE ++_make_test_img $size ++$QEMU_IMG map --output=json --image-opts \ ++ "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ ++ | _filter_qemu_img_map ++ ++# so we resize it and check again ++truncate --size=$size "$TEST_IMG" ++$QEMU_IMG map --output=json --image-opts \ ++ "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ ++ | _filter_qemu_img_map ++ ++# qemu-io with O_DIRECT always writes whole physical sectors. Again, ++# we do not know how large a physical sector is, so we just start ++# writing from a 64 kB boundary, which should always be aligned. ++offset=$((1 * 1024 * 1024 - 64 * 1024)) ++$QEMU_IO -c "w $offset $((size - offset))" "$TEST_IMG" | _filter_qemu_io ++$QEMU_IMG map --output=json --image-opts \ ++ "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ ++ | _filter_qemu_img_map ++ ++# Resize it and check again -- contrary to 221, we may not get partial ++# sectors here, so there should be only two areas (one zero, one ++# data). ++truncate --size=$size "$TEST_IMG" ++$QEMU_IMG map --output=json --image-opts \ ++ "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ ++ | _filter_qemu_img_map ++ ++# success, all done ++echo '*** done' ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out +new file mode 100644 +index 0000000..607c0ba +--- /dev/null ++++ b/tests/qemu-iotests/253.out +@@ -0,0 +1,14 @@ ++QA output created by 253 ++ ++=== Check mapping of unaligned raw image === ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575 ++[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++wrote 65535/65535 bytes at offset 983040 ++63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] ++[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] ++*** done +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 0db5e68..3ea739d 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -248,3 +248,4 @@ + 246 rw auto quick + 247 rw auto quick + 248 rw auto quick ++253 rw auto quick +-- +1.8.3.1 + diff --git a/kvm-qxl-check-release-info-object.patch b/kvm-qxl-check-release-info-object.patch new file mode 100644 index 0000000..10e0147 --- /dev/null +++ b/kvm-qxl-check-release-info-object.patch @@ -0,0 +1,51 @@ +From b779db9ffd8626b74f969a7c2484239715f2d9e8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Wed, 19 Jun 2019 17:18:47 +0200 +Subject: [PATCH 2/6] qxl: check release info object +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190619171847.32603-2-philmd@redhat.com> +Patchwork-id: 88739 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] qxl: check release info object +Bugzilla: 1712717 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Stefan Hajnoczi + +From: Prasad J Pandit + +When releasing spice resources in release_resource() routine, +if release info object 'ext.info' is null, it leads to null +pointer dereference. Add check to avoid it. + +Reported-by: Bugs SysSec +Signed-off-by: Prasad J Pandit +Message-id: 20190425063534.32747-1-ppandit@redhat.com +Signed-off-by: Gerd Hoffmann +(cherry picked from commit d52680fc932efb8a2f334cc6993e705ed1e31e99) +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Miroslav Rezanina +--- + hw/display/qxl.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index c8ce578..632923a 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -777,6 +777,9 @@ static void interface_release_resource(QXLInstance *sin, + QXLReleaseRing *ring; + uint64_t *item, id; + ++ if (!ext.info) { ++ return; ++ } + if (ext.group_id == MEMSLOT_GROUP_HOST) { + /* host group -> vga mode update request */ + QXLCommandExt *cmdext = (void *)(intptr_t)(ext.info->id); +-- +1.8.3.1 + diff --git a/kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch b/kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch new file mode 100644 index 0000000..20f0f65 --- /dev/null +++ b/kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch @@ -0,0 +1,43 @@ +From 27b7c444c3a568e87647f5386fbfc2c0f2b1ff9b Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Fri, 28 Jun 2019 08:34:00 +0200 +Subject: [PATCH 6/6] rh: set CONFIG_BOCHS_DISPLAY=y for x86 + +RH-Author: Gerd Hoffmann +Message-id: <20190628083400.7016-2-kraxel@redhat.com> +Patchwork-id: 88989 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/1] rh: set CONFIG_BOCHS_DISPLAY=y for x86 +Bugzilla: 1707118 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Danilo de Paula + +-device bochs-display can replace -device VGA for cases where +legacy vga emulation is not needed. That is the case for UEFI +guests (they use EFI GOP for boot display). seabios guest can +work too with some quirks. + +Main advantage: Much simpler device emulation -> reduced attach surface. + +Signed-off-by: Gerd Hoffmann +Signed-off-by: Miroslav Rezanina +--- + default-configs/x86_64-rh-devices.mak | 1 + + redhat/qemu-kvm.spec.template | 4 ++++ + 2 files changed, 5 insertions(+) + +diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak +index 01b5363..05ad6cf 100644 +--- a/default-configs/x86_64-rh-devices.mak ++++ b/default-configs/x86_64-rh-devices.mak +@@ -12,6 +12,7 @@ CONFIG_ACPI_X86_ICH=y + CONFIG_AHCI=y + CONFIG_APIC=y + CONFIG_APM=y ++CONFIG_BOCHS_DISPLAY=y + CONFIG_DIMM=y + CONFIG_E1000E_PCI_EXPRESS=y + CONFIG_E1000_PCI=y +-- +1.8.3.1 + diff --git a/kvm-target-i386-add-MDS-NO-feature.patch b/kvm-target-i386-add-MDS-NO-feature.patch new file mode 100644 index 0000000..9fec966 --- /dev/null +++ b/kvm-target-i386-add-MDS-NO-feature.patch @@ -0,0 +1,51 @@ +From dd19ddadfbabc54415977cd0b9b3f520a87988ad Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Sun, 23 Jun 2019 15:19:17 +0200 +Subject: [PATCH 3/6] target/i386: add MDS-NO feature + +RH-Author: Paolo Bonzini +Message-id: <20190623151917.7942-1-pbonzini@redhat.com> +Patchwork-id: 88873 +O-Subject: [RHEL-AV-8.1.0 PATCH qemu-kvm] target/i386: add MDS-NO feature +Bugzilla: 1722839 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Miroslav Rezanina + +Bugzilla: 1722839 + +Brew build: 22317828 + +Microarchitectural Data Sampling is a hardware vulnerability which allows +unprivileged speculative access to data which is available in various CPU +internal buffers. + +Some Intel processors use the ARCH_CAP_MDS_NO bit in the +IA32_ARCH_CAPABILITIES +MSR to report that they are not vulnerable, make it available to guests. + +Signed-off-by: Paolo Bonzini +Message-Id: <20190516185320.28340-1-pbonzini@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 20140a82c67467f53814ca197403d5e1b561a5e5) +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 3886464..2e73821 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1183,7 +1183,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", +- "ssb-no", NULL, NULL, NULL, ++ "ssb-no", "mds-no", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index a8fa99d..3e46a8a 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.0.0 -Release: 4%{?dist} +Release: 5%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -154,6 +154,16 @@ Patch34: kvm-aarch64-Compile-out-IOH3420.patch Patch35: kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch # For bz#1714891 - Guest with persistent reservation manager for a disk fails to start Patch36: kvm-vl-Document-why-objects-are-delayed.patch +# For bz#1712717 - CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-av-8] +Patch37: kvm-qxl-check-release-info-object.patch +# For bz#1722839 - [Intel 8.1 FEAT] MDS_NO exposure to guest - Fast Train +Patch38: kvm-target-i386-add-MDS-NO-feature.patch +# For bz#1588356 - qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img' +Patch39: kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch +# For bz#1588356 - qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img' +Patch40: kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch +# For bz#1707118 - enable device: bochs-display (QEMU) +Patch41: kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -180,7 +190,7 @@ BuildRequires: libcacard-devel # For smartcard NSS support BuildRequires: nss-devel %endif -BuildRequires: libseccomp-devel >= 2.3.0 +BuildRequires: libseccomp-devel >= 2.4.0 # For network block driver BuildRequires: libcurl-devel BuildRequires: libssh2-devel @@ -297,7 +307,7 @@ Requires: ipxe-roms-qemu >= 20170123-1 Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} %endif Requires: %{name}-common = %{epoch}:%{version}-%{release} -Requires: libseccomp >= 2.3.0 +Requires: libseccomp >= 2.4.0 # For compressed guest memory dumps Requires: lzo snappy %if %{have_gluster} @@ -845,6 +855,8 @@ rom_link() { rom_link ../seavgabios/vgabios-stdvga.bin vgabios-stdvga.bin rom_link ../seavgabios/vgabios-vmware.bin vgabios-vmware.bin rom_link ../seavgabios/vgabios-virtio.bin vgabios-virtio.bin + rom_link ../seavgabios/vgabios-ramfb.bin vgabios-ramfb.bin + rom_link ../seavgabios/vgabios-bochs-display.bin vgabios-bochs-display.bin %endif %ifarch x86_64 rom_link ../seabios/bios.bin bios.bin @@ -1002,6 +1014,8 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/vgabios-stdvga.bin %{_datadir}/%{name}/vgabios-vmware.bin %{_datadir}/%{name}/vgabios-virtio.bin + %{_datadir}/%{name}/vgabios-ramfb.bin + %{_datadir}/%{name}/vgabios-bochs-display.bin %{_datadir}/%{name}/efi-e1000.rom %{_datadir}/%{name}/efi-e1000e.rom %{_datadir}/%{name}/efi-virtio.rom @@ -1073,6 +1087,24 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Jul 08 2019 Miroslav Rezanina - 4.0.0-5.el8 +- kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1720306] +- kvm-qxl-check-release-info-object.patch [bz#1712717] +- kvm-target-i386-add-MDS-NO-feature.patch [bz#1722839] +- kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch [bz#1588356] +- kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch [bz#1588356] +- kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch [bz#1707118] +- Resolves: bz#1588356 + (qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img') +- Resolves: bz#1707118 + (enable device: bochs-display (QEMU)) +- Resolves: bz#1712717 + (CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-av-8]) +- Resolves: bz#1720306 + (VM failed to start with error "failed to install seccomp syscall filter in the kernel") +- Resolves: bz#1722839 + ([Intel 8.1 FEAT] MDS_NO exposure to guest - Fast Train) + * Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 4.0.0-4.el8 - kvm-Disable-VXHS-support.patch [bz#1714937] - kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch [bz#1713735] From b3fbad895777bff127fa4a8619968e0cd2eb1dcc Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 23 Jul 2019 15:53:44 +0100 Subject: [PATCH 039/195] * Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 4.0.0-6.el8 - kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch [bz#1519013] - kvm-x86_64-rh-devices-enable-TPM-emulation.patch [bz#1519013] - kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch [bz#1719823] - Resolves: bz#1519013 ([RFE] QEMU Software TPM support (vTPM, or TPM emulation)) - Resolves: bz#1719823 ([RHEL 8.1] [RFE] increase the maximum of vfio devices to more than 32 in qemu-kvm) --- ...e-cap-on-number-of-assigned-devices-.patch | 111 ++++++++++++++++++ ...-devices-add-missing-TPM-passthrough.patch | 40 +++++++ ...6_64-rh-devices-enable-TPM-emulation.patch | 53 +++++++++ qemu-kvm.spec | 17 ++- 4 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch create mode 100644 kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch create mode 100644 kvm-x86_64-rh-devices-enable-TPM-emulation.patch diff --git a/kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch b/kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch new file mode 100644 index 0000000..61a853a --- /dev/null +++ b/kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch @@ -0,0 +1,111 @@ +From 2b89558946fc396c6ecb10249b69960d2a74e18f Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Wed, 12 Jun 2019 16:56:23 +0100 +Subject: [PATCH 3/3] vfio: increase the cap on number of assigned devices to + 64 + +RH-Author: Bandan Das +Message-id: +Patchwork-id: 88653 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] vfio: increase the cap on number of assigned devices to 64 +Bugzilla: 1719823 +RH-Acked-by: Alex Williamson +RH-Acked-by: Auger Eric +RH-Acked-by: Laszlo Ersek + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1719823 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=22124174 +Branch: rhel-av-8.1.0/master-4.0.0 +Upstrea: N/A, the device limit change is downstream only + +In addition to bumping up the limit, also add a property for +future-proofing. This needs to be set for every assigned device +or via "global": -global vfio-pci.x-assigned-device-limit + +RHEL Notes: +For each vm using vfio, there is at least a container fd. For +each assigned device, there is likely a group fd, a device fd, +an error signaling fd and a device request fd. Assuming SR-IOV +VFs, vectors/device considering MSI/MSI-X could be ~3-5. Therefore, +we have ~14 file descriptors per device or 897 for 64 devices. +The default open fd limit is 1024 on Linux but libvirt bumps it to +8192 and the qemu process inherits that value as well. + +Signed-off-by: Bandan Das +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/pci.c | 20 +++++++++++++++++--- + hw/vfio/pci.h | 1 + + 2 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 7c998af..7c0d93a 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -36,11 +36,13 @@ + #include "qapi/error.h" + + #define MSIX_CAP_LENGTH 12 +-#define MAX_DEV_ASSIGN_CMDLINE 32 + + #define TYPE_VFIO_PCI "vfio-pci" + #define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI) + ++/* RHEL only: Set once for the first assigned dev */ ++static uint16_t device_limit; ++ + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); + +@@ -2810,15 +2812,24 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + int ret, i = 0; + bool is_mdev; + ++ if (device_limit && device_limit != vdev->assigned_device_limit) { ++ error_setg(errp, "Assigned device limit has been redefined. " ++ "Old:%d, New:%d", ++ device_limit, vdev->assigned_device_limit); ++ return; ++ } else { ++ device_limit = vdev->assigned_device_limit; ++ } ++ + QLIST_FOREACH(group, &vfio_group_list, next) { + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + i++; + } + } + +- if (i >= MAX_DEV_ASSIGN_CMDLINE) { ++ if (i >= vdev->assigned_device_limit) { + error_setg(errp, "Maximum supported vfio devices (%d) " +- "already attached", MAX_DEV_ASSIGN_CMDLINE); ++ "already attached", vdev->assigned_device_limit); + return; + } + +@@ -3223,6 +3234,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), ++ /* RHEL only */ ++ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, ++ assigned_device_limit, 64), + DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, + false), + DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index c11c3f1..29a8add 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -136,6 +136,7 @@ typedef struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); ++ uint16_t assigned_device_limit; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; +-- +1.8.3.1 + diff --git a/kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch b/kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch new file mode 100644 index 0000000..0f52f10 --- /dev/null +++ b/kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch @@ -0,0 +1,40 @@ +From 495a27daa8ca91bb357a065c986552c3375eda82 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 24 May 2019 18:40:01 +0100 +Subject: [PATCH 1/3] x86_64-rh-devices: add missing TPM passthrough +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20190524184002.14050-2-marcandre.lureau@redhat.com> +Patchwork-id: 88230 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] x86_64-rh-devices: add missing TPM passthrough +Bugzilla: 1519013 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Dr. David Alan Gilbert + +The TPM passthrough support got lost with 4.0 rebase, due to +configure/Kconfig changes. + +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + default-configs/x86_64-rh-devices.mak | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak +index 05ad6cf..8079fa7 100644 +--- a/default-configs/x86_64-rh-devices.mak ++++ b/default-configs/x86_64-rh-devices.mak +@@ -92,3 +92,6 @@ CONFIG_VTD=y + CONFIG_WDT_IB6300ESB=y + CONFIG_WDT_IB700=y + CONFIG_XIO3130=y ++CONFIG_TPM_CRB=y ++CONFIG_TPM_TIS=y ++CONFIG_TPM_PASSTHROUGH=y +-- +1.8.3.1 + diff --git a/kvm-x86_64-rh-devices-enable-TPM-emulation.patch b/kvm-x86_64-rh-devices-enable-TPM-emulation.patch new file mode 100644 index 0000000..e5c7beb --- /dev/null +++ b/kvm-x86_64-rh-devices-enable-TPM-emulation.patch @@ -0,0 +1,53 @@ +From e1fe9feada882ece852c66f123535a98ea2230ce Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 24 May 2019 18:40:02 +0100 +Subject: [PATCH 2/3] x86_64-rh-devices: enable TPM emulation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20190524184002.14050-3-marcandre.lureau@redhat.com> +Patchwork-id: 88229 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] x86_64-rh-devices: enable TPM emulation +Bugzilla: 1519013 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Dr. David Alan Gilbert + +Remove the useless & misleading configure lines. + +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + configure | 3 --- + default-configs/x86_64-rh-devices.mak | 1 + + 2 files changed, 1 insertion(+), 3 deletions(-) + +diff --git a/configure b/configure +index 8cb6740..638c881 100755 +--- a/configure ++++ b/configure +@@ -2412,9 +2412,6 @@ if test "$seccomp" != "no" ; then + fi + fi + +-# RHEL8-specific, only passthrough for now, rhbz#1688312 +-tpm_emulator=no +- + ########################################## + # xen probe + +diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak +index 8079fa7..906b42d 100644 +--- a/default-configs/x86_64-rh-devices.mak ++++ b/default-configs/x86_64-rh-devices.mak +@@ -94,4 +94,5 @@ CONFIG_WDT_IB700=y + CONFIG_XIO3130=y + CONFIG_TPM_CRB=y + CONFIG_TPM_TIS=y ++CONFIG_TPM_EMULATOR=y + CONFIG_TPM_PASSTHROUGH=y +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 3e46a8a..16c7a39 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.0.0 -Release: 5%{?dist} +Release: 6%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -164,6 +164,12 @@ Patch39: kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch Patch40: kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch # For bz#1707118 - enable device: bochs-display (QEMU) Patch41: kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch +# For bz#1519013 - [RFE] QEMU Software TPM support (vTPM, or TPM emulation) +Patch42: kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch +# For bz#1519013 - [RFE] QEMU Software TPM support (vTPM, or TPM emulation) +Patch43: kvm-x86_64-rh-devices-enable-TPM-emulation.patch +# For bz#1719823 - [RHEL 8.1] [RFE] increase the maximum of vfio devices to more than 32 in qemu-kvm +Patch44: kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -1087,6 +1093,15 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 4.0.0-6.el8 +- kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch [bz#1519013] +- kvm-x86_64-rh-devices-enable-TPM-emulation.patch [bz#1519013] +- kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch [bz#1719823] +- Resolves: bz#1519013 + ([RFE] QEMU Software TPM support (vTPM, or TPM emulation)) +- Resolves: bz#1719823 + ([RHEL 8.1] [RFE] increase the maximum of vfio devices to more than 32 in qemu-kvm) + * Mon Jul 08 2019 Miroslav Rezanina - 4.0.0-5.el8 - kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1720306] - kvm-qxl-check-release-info-object.patch [bz#1712717] From 41331165db65bf92024132555f41cbcd329c74f1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 15 Aug 2019 06:45:41 +0200 Subject: [PATCH 040/195] * Wed Aug 14 2019 Miroslav Rezanina - 4.1.0-1.el8 - Rebase to qemu 4.1.0 rc4 [bz#1705235] - Resolves: bz#1705235 (Rebase qemu-kvm for RHEL-AV 8.1.0) --- .gitignore | 1 + 0004-Initial-redhat-build.patch | 319 +------ 0005-Enable-disable-devices-for-RHEL.patch | 391 ++++----- ...Machine-type-related-general-changes.patch | 550 +++++------- 0007-Add-aarch64-machine-types.patch | 69 +- 0008-Add-ppc64-machine-types.patch | 159 ++-- 0009-Add-s390x-machine-types.patch | 34 +- 0010-Add-x86_64-machine-types.patch | 823 +++++------------- 0011-Enable-make-check.patch | 93 +- ...mber-of-devices-that-can-be-assigned.patch | 66 +- ...Add-support-statement-to-help-output.patch | 11 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 27 +- 0015-Add-support-for-simpletrace.patch | 29 +- ...documentation-instead-of-qemu-system.patch | 133 ++- 0017-usb-xhci-Fix-PCI-capability-order.patch | 11 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 12 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 10 +- 0020-doc-fix-the-configuration-path.patch | 80 -- ...ys-bits-limit-48-on-rhel-machine-typ.patch | 62 -- 0022-redhat-Post-rebase-synchronization.patch | 36 - kvm-Disable-VXHS-support.patch | 293 ------- ...-virt-rhel8.1.0-machine-type-for-ARM.patch | 56 -- ...M-VIRT-iommu-option-in-RHEL8.1-machi.patch | 66 -- kvm-aarch64-Compile-out-IOH3420.patch | 54 -- ...osix-Unaligned-O_DIRECT-block-status.patch | 98 --- kvm-compat-Generic-hw_compat_rhel_8_0.patch | 79 -- ...t-unaligned-raw-images-with-O_DIRECT.patch | 182 ---- kvm-qxl-check-release-info-object.patch | 51 -- ...efine-pseries-rhel8.1.0-machine-type.patch | 73 -- ...-paste-garbage-in-hw_compat-comments.patch | 104 --- ...pseries-rhel7.6.0-with-rhel-av-8.0.1.patch | 54 -- ...h-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch | 43 - kvm-target-i386-add-MDS-NO-feature.patch | 51 -- kvm-target-i386-define-md-clear-bit.patch | 58 -- ...-reset-handler-before-updating-state.patch | 47 - ...avoid-libusb_set_configuration-calls.patch | 68 -- ...ost-skip-reset-for-untouched-devices.patch | 46 - ...e-cap-on-number-of-assigned-devices-.patch | 111 --- kvm-vl-Document-why-objects-are-delayed.patch | 66 -- ...ckdev-persistent-reservation-managem.patch | 66 -- ...-devices-add-missing-TPM-passthrough.patch | 40 - ...6_64-rh-devices-enable-TPM-emulation.patch | 53 -- kvm.modules | 18 - qemu-kvm.spec | 107 +-- sources | 2 +- 45 files changed, 1004 insertions(+), 3798 deletions(-) delete mode 100644 0020-doc-fix-the-configuration-path.patch delete mode 100644 0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch delete mode 100644 0022-redhat-Post-rebase-synchronization.patch delete mode 100644 kvm-Disable-VXHS-support.patch delete mode 100644 kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch delete mode 100644 kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch delete mode 100644 kvm-aarch64-Compile-out-IOH3420.patch delete mode 100644 kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch delete mode 100644 kvm-compat-Generic-hw_compat_rhel_8_0.patch delete mode 100644 kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch delete mode 100644 kvm-qxl-check-release-info-object.patch delete mode 100644 kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch delete mode 100644 kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch delete mode 100644 kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch delete mode 100644 kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch delete mode 100644 kvm-target-i386-add-MDS-NO-feature.patch delete mode 100644 kvm-target-i386-define-md-clear-bit.patch delete mode 100644 kvm-usb-call-reset-handler-before-updating-state.patch delete mode 100644 kvm-usb-host-avoid-libusb_set_configuration-calls.patch delete mode 100644 kvm-usb-host-skip-reset-for-untouched-devices.patch delete mode 100644 kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch delete mode 100644 kvm-vl-Document-why-objects-are-delayed.patch delete mode 100644 kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch delete mode 100644 kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch delete mode 100644 kvm-x86_64-rh-devices-enable-TPM-emulation.patch delete mode 100644 kvm.modules diff --git a/.gitignore b/.gitignore index 678d910..f08ebba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /qemu-3.1.0.tar.xz /qemu-4.0.0.tar.xz +/qemu-4.1.0-rc4.tar.xz diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch index 75b88d3..882f7ef 100644 --- a/0004-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 223a694c9878013afa2ae9024cb35fbc3a334174 Mon Sep 17 00:00:00 2001 +From d7ed59eb82b87992582d05933bbf68107962ba43 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: - Use "/share/qemu-kvm" as SHARE_SUFFIX - We reconfigured our share to qemu-kvm to be consistent with used name -This rebase includes changes up to qemu-kvm-3.1.0-23.el8 +This rebase includes changes up to qemu-kvm-4.0.0-6.el8 Rebase notes (3.1.0): - added new configure options @@ -30,6 +30,20 @@ Rebase notes (4.0.0): - Added interop documentation files - Use python module instead of qemu.py (upstream) +Rebase notes (4.1.0-rc0): +- Remove edk2 files generated by build +- Switch to rhel-8.1-candidate build target +- Remove specs documentation +- Switched from libssh2 to libssh +- Add rc0 tarball usage hacks + +Rebase notes (4.1.0-rc1): +- Added BuildRequires for wget, rpm-build and python3-sphinx + +Rebase notes (4.1.0-rc2): +- Removed new unpacked files +- Update configure line to use new options + Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file - Spec file cleanups @@ -40,38 +54,45 @@ Merged patches (4.0.0): - eb204b5 Introduce the qemu-kvm-tests rpm - 223cf0c Load kvm module during boot (partial) -Signed-off-by: Danilo C. L. de Paula +Merged patches (4.1.0-rc0): +- ebb6e97 redhat: Fix LOCALVERSION creation +- b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) +- 7cb3c4a Enable libpmem to support nvdimm +- 8943607 qemu-kvm.spec: bump libseccomp >= 2.4.0 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) + +Merged patches (4.1.0-rc3): +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) --- + .gitignore | 1 + Makefile | 3 +- - block/Makefile.objs | 2 +- - block/vxhs.c | 119 +- - configure | 33 +- + configure | 1 + os-posix.c | 2 +- redhat/Makefile | 82 ++ redhat/Makefile.common | 51 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2082 +++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2202 +++++++++++++++++++++++++++++++++++++ redhat/scripts/process-patches.sh | 7 +- ui/vnc.c | 2 +- - 11 files changed, 2374 insertions(+), 48 deletions(-) + 10 files changed, 2382 insertions(+), 8 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index 04a0d45050..05f62eab3c 100644 +index 85862fb..288a5ac 100644 --- a/Makefile +++ b/Makefile -@@ -470,6 +470,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM +@@ -493,6 +493,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC CAP_CFLAGS += -DCAPSTONE_HAS_X86 +CAP_CFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS - subdir-capstone: .git-submodule-status - $(call quiet-command,$(MAKE) -C $(SRC_PATH)/capstone CAPSTONE_SHARED=no BUILDDIR="$(BUILD_DIR)/capstone" CC="$(CC)" AR="$(AR)" LD="$(LD)" RANLIB="$(RANLIB)" CFLAGS="$(CAP_CFLAGS)" $(SUBDIR_MAKEFLAGS) $(BUILD_DIR)/capstone/$(LIBCAPSTONE)) -@@ -749,7 +750,7 @@ install-doc: $(DOCS) install-sphinxdocs + .PHONY: capstone/all + capstone/all: .git-submodule-status +@@ -804,7 +805,7 @@ install-doc: $(DOCS) install-sphinxdocs $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" @@ -80,273 +101,23 @@ index 04a0d45050..05f62eab3c 100644 $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" -diff --git a/block/Makefile.objs b/block/Makefile.objs -index 7a81892a52..f4cf03bed9 100644 ---- a/block/Makefile.objs -+++ b/block/Makefile.objs -@@ -30,7 +30,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o - block-obj-$(CONFIG_CURL) += curl.o - block-obj-$(CONFIG_RBD) += rbd.o - block-obj-$(CONFIG_GLUSTERFS) += gluster.o --block-obj-$(CONFIG_VXHS) += vxhs.o -+#block-obj-$(CONFIG_VXHS) += vxhs.o - block-obj-$(CONFIG_LIBSSH2) += ssh.o - block-obj-y += accounting.o dirty-bitmap.o - block-obj-y += write-threshold.o -diff --git a/block/vxhs.c b/block/vxhs.c -index 2e18229ba4..3dbb9544bc 100644 ---- a/block/vxhs.c -+++ b/block/vxhs.c -@@ -9,7 +9,8 @@ - */ - - #include "qemu/osdep.h" --#include -+#include "block/vxhs_shim.h" -+#include - #include - #include "block/block_int.h" - #include "block/qdict.h" -@@ -59,6 +60,97 @@ typedef struct BDRVVXHSState { - char *tlscredsid; /* tlscredsid */ - } BDRVVXHSState; - -+#define LIBVXHS_FULL_PATHNAME "/usr/lib64/qemu/libvxhs.so.1" -+static bool libvxhs_loaded; -+static GModule *libvxhs_handle; -+ -+static LibVXHSFuncs libvxhs; -+ -+typedef struct LibVXHSSymbols { -+ const char *name; -+ gpointer *addr; -+} LibVXHSSymbols; -+ -+static LibVXHSSymbols libvxhs_symbols[] = { -+ {"iio_init", (gpointer *) &libvxhs.iio_init}, -+ {"iio_fini", (gpointer *) &libvxhs.iio_fini}, -+ {"iio_min_version", (gpointer *) &libvxhs.iio_min_version}, -+ {"iio_max_version", (gpointer *) &libvxhs.iio_max_version}, -+ {"iio_open", (gpointer *) &libvxhs.iio_open}, -+ {"iio_close", (gpointer *) &libvxhs.iio_close}, -+ {"iio_writev", (gpointer *) &libvxhs.iio_writev}, -+ {"iio_readv", (gpointer *) &libvxhs.iio_readv}, -+ {"iio_ioctl", (gpointer *) &libvxhs.iio_ioctl}, -+ {NULL} -+}; -+ -+static void bdrv_vxhs_set_funcs(GModule *handle, Error **errp) -+{ -+ int i = 0; -+ while (libvxhs_symbols[i].name) { -+ const char *name = libvxhs_symbols[i].name; -+ if (!g_module_symbol(handle, name, libvxhs_symbols[i].addr)) { -+ error_setg(errp, "%s could not be loaded from libvxhs: %s", -+ name, g_module_error()); -+ return; -+ } -+ ++i; -+ } -+} -+ -+static void bdrv_vxhs_load_libs(Error **errp) -+{ -+ Error *local_err = NULL; -+ int32_t ver; -+ -+ if (libvxhs_loaded) { -+ return; -+ } -+ -+ if (!g_module_supported()) { -+ error_setg(errp, "modules are not supported on this platform: %s", -+ g_module_error()); -+ return; -+ } -+ -+ libvxhs_handle = g_module_open(LIBVXHS_FULL_PATHNAME, -+ G_MODULE_BIND_LAZY | G_MODULE_BIND_LOCAL); -+ if (!libvxhs_handle) { -+ error_setg(errp, "The VXHS library from Veritas might not be installed " -+ "correctly (%s)", g_module_error()); -+ return; -+ } -+ -+ g_module_make_resident(libvxhs_handle); -+ -+ bdrv_vxhs_set_funcs(libvxhs_handle, &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ -+ /* Now check to see if the libvxhs we are using here is supported -+ * by the loaded version */ -+ -+ ver = (*libvxhs.iio_min_version)(); -+ if (ver > QNIO_VERSION) { -+ error_setg(errp, "Trying to use libvxhs version %"PRId32" API, but " -+ "only %"PRId32" or newer is supported by %s", -+ QNIO_VERSION, ver, LIBVXHS_FULL_PATHNAME); -+ return; -+ } -+ -+ ver = (*libvxhs.iio_max_version)(); -+ if (ver < QNIO_VERSION) { -+ error_setg(errp, "Trying to use libvxhs version %"PRId32" API, but " -+ "only %"PRId32" or earlier is supported by %s", -+ QNIO_VERSION, ver, LIBVXHS_FULL_PATHNAME); -+ return; -+ } -+ -+ libvxhs_loaded = true; -+} -+ - static void vxhs_complete_aio_bh(void *opaque) - { - VXHSAIOCB *acb = opaque; -@@ -226,7 +318,7 @@ static void vxhs_refresh_limits(BlockDriverState *bs, Error **errp) - static int vxhs_init_and_ref(void) - { - if (vxhs_ref++ == 0) { -- if (iio_init(QNIO_VERSION, vxhs_iio_callback)) { -+ if ((*libvxhs.iio_init)(QNIO_VERSION, vxhs_iio_callback)) { - return -ENODEV; - } - } -@@ -236,7 +328,7 @@ static int vxhs_init_and_ref(void) - static void vxhs_unref(void) - { - if (--vxhs_ref == 0) { -- iio_fini(); -+ (*libvxhs.iio_fini)(); - } - } - -@@ -306,8 +398,17 @@ static int vxhs_open(BlockDriverState *bs, QDict *options, - char *client_key = NULL; - char *client_cert = NULL; - -+ bdrv_vxhs_load_libs(&local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ /* on error, cannot cleanup because the iio_fini() function -+ * is not loaded */ -+ return -EINVAL; -+ } -+ - ret = vxhs_init_and_ref(); - if (ret < 0) { -+ error_setg(&local_err, "libvxhs iio_init() failed"); - ret = -EINVAL; - goto out; - } -@@ -392,8 +493,8 @@ static int vxhs_open(BlockDriverState *bs, QDict *options, - /* - * Open qnio channel to storage agent if not opened before - */ -- dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0, -- cacert, client_key, client_cert); -+ dev_handlep = (*libvxhs.iio_open)(of_vsa_addr, s->vdisk_guid, 0, -+ cacert, client_key, client_cert); - if (dev_handlep == NULL) { - trace_vxhs_open_iio_open(of_vsa_addr); - ret = -ENODEV; -@@ -453,11 +554,11 @@ static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, uint64_t offset, - - switch (iodir) { - case VDISK_AIO_WRITE: -- ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov, -+ ret = (*libvxhs.iio_writev)(dev_handle, acb, qiov->iov, qiov->niov, - offset, size, iio_flags); - break; - case VDISK_AIO_READ: -- ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov, -+ ret = (*libvxhs.iio_writev)(dev_handle, acb, qiov->iov, qiov->niov, - offset, size, iio_flags); - break; - default: -@@ -506,7 +607,7 @@ static void vxhs_close(BlockDriverState *bs) - * Close vDisk device - */ - if (s->vdisk_hostinfo.dev_handle) { -- iio_close(s->vdisk_hostinfo.dev_handle); -+ (*libvxhs.iio_close)(s->vdisk_hostinfo.dev_handle); - s->vdisk_hostinfo.dev_handle = NULL; - } - -@@ -528,7 +629,7 @@ static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s) - int ret = 0; - void *dev_handle = s->vdisk_hostinfo.dev_handle; - -- ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); -+ ret = (*libvxhs.iio_ioctl)(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); - if (ret < 0) { - trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); - return -EIO; diff --git a/configure b/configure -index 1c563a7027..eb0a0dde86 100755 +index 714e7fb..4ecc861 100755 --- a/configure +++ b/configure -@@ -3612,7 +3612,7 @@ fi - - glib_req_ver=2.40 - glib_modules=gthread-2.0 --if test "$modules" = yes; then -+if test "$modules" = yes -o "$vxhs" = yes; then - glib_modules="$glib_modules gmodule-export-2.0" +@@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then + seccomp="no" + fi fi - -@@ -5755,33 +5755,6 @@ if compile_prog "" "" ; then - have_sysmacros=yes - fi - --########################################## --# Veritas HyperScale block driver VxHS --# Check if libvxhs is installed -- --if test "$vxhs" != "no" ; then -- cat > $TMPC < --#include -- --void *vxhs_callback; -- --int main(void) { -- iio_init(QNIO_VERSION, vxhs_callback); -- return 0; --} --EOF -- vxhs_libs="-lvxhs -lssl" -- if compile_prog "" "$vxhs_libs" ; then -- vxhs=yes -- else -- if test "$vxhs" = "yes" ; then -- feature_not_found "vxhs block device" "Install libvxhs See github" -- fi -- vxhs=no -- fi --fi -- ++ ########################################## - # check for _Static_assert() + # xen probe -@@ -7218,8 +7191,8 @@ elif test "$pthread_setname_np_wo_tid" = "yes" ; then - fi - - if test "$vxhs" = "yes" ; then -- echo "CONFIG_VXHS=y" >> $config_host_mak -- echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak -+ echo "CONFIG_VXHS=m" >> $config_host_mak -+ echo "VXHS_LIBS= -lssl" >> $config_host_mak - fi - - if test "$libpmem" = "yes" ; then diff --git a/os-posix.c b/os-posix.c -index 4bd80e44e6..ca13206b31 100644 +index 3ba7df8..ff26068 100644 --- a/os-posix.c +++ b/os-posix.c -@@ -82,7 +82,7 @@ void os_setup_signal_handling(void) +@@ -83,7 +83,7 @@ void os_setup_signal_handling(void) /* Find a likely location for support files using the location of the binary. For installed binaries this will be "$bindir/../share/qemu". When running from the build tree this will be "$bindir/../pc-bios". */ @@ -356,10 +127,10 @@ index 4bd80e44e6..ca13206b31 100644 char *os_find_datadir(void) { diff --git a/ui/vnc.c b/ui/vnc.c -index 1871422e1d..8226524c16 100644 +index 38f92bf..933dc36 100644 --- a/ui/vnc.c +++ b/ui/vnc.c -@@ -3982,7 +3982,7 @@ void vnc_display_open(const char *id, Error **errp) +@@ -3976,7 +3976,7 @@ void vnc_display_open(const char *id, Error **errp) #ifdef CONFIG_VNC_SASL if (sasl) { @@ -369,5 +140,5 @@ index 1871422e1d..8226524c16 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.20.1 +1.8.3.1 diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch index d5b0ae8..6ae6fc7 100644 --- a/0005-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 8ee745d1dc16e5cd0f9eb18ed0671ad00e789501 Mon Sep 17 00:00:00 2001 +From 1421c61010f1de5e7381c107963839e17ea5b43a Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -21,6 +21,22 @@ Rebase notes (4.0.0): - Switch to KConfig (upstream) - Using device whitelist + without-defualt-devices option +Rebase notes (4.1.0-rc0): +- Added CONFIG_USB_OHCI_PCI for ppc64 +- Added CONFIG_XIVE_KVM for ppc64 +- Added CONFIG_ACPI_PCI for x86_64 +- Added CONFIG_SEMIHOSTING for aarch64 + +Rebase notes (4.1.0-rc1): +- Cleanup aarch64 devices +- Do not build a15mpcore.c + +Rebase notes (4.1.0-rc2): +- Removed ide-isa.c stub file + +Rebase notes (4.1.0-rc3): +- Use CONFIG_USB_EHCI_PCI on x86_64 (new upstream) + Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV - 4b889f3 Declare cirrus-vga as deprecated @@ -28,64 +44,64 @@ Merged patches (qemu 3.1.0): - 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 - 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 -Merged patches (weekly-190301): +Merged patches (4.1.0-rc0): - 20a51f6 fdc: Revert downstream disablement of device "floppy" - f869cc0 fdc: Restrict floppy controllers to RHEL-7 machine types +- 5909721 aarch64: Compile out IOH3420 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) -Signed-off-by: Danilo C. L. de Paula +Merged patches (4.1.0-rc3): +- 495a27d x86_64-rh-devices: add missing TPM passthrough +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) --- - Makefile.objs | 4 +- - default-configs/aarch64-rh-devices.mak | 40 +++++++++++ - default-configs/aarch64-softmmu.mak | 17 ++--- - default-configs/ppc64-rh-devices.mak | 30 +++++++++ - default-configs/ppc64-softmmu.mak | 8 ++- - default-configs/rh-virtio.mak | 10 +++ - default-configs/s390x-rh-devices.mak | 15 +++++ - default-configs/s390x-softmmu.mak | 4 +- - default-configs/x86_64-rh-devices.mak | 93 ++++++++++++++++++++++++++ - default-configs/x86_64-softmmu.mak | 4 +- - hw/acpi/ich9.c | 4 +- - hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 10 +++ - hw/bt/Makefile.objs | 4 +- - hw/char/serial-pci.c | 4 ++ - hw/core/Makefile.objs | 9 +-- - hw/cpu/Makefile.objs | 3 +- - hw/display/Makefile.objs | 5 +- - hw/display/cirrus_vga.c | 3 + - hw/ide/piix.c | 5 +- - hw/input/pckbd.c | 2 + - hw/isa/Makefile.objs | 2 +- - hw/misc/Makefile.objs | 2 +- - hw/net/e1000.c | 2 + - hw/pci-host/piix.c | 4 ++ - hw/ppc/spapr_cpu_core.c | 2 + - hw/usb/ccid-card-emulated.c | 2 + - hw/vfio/pci-quirks.c | 5 ++ - hw/vfio/pci.c | 5 ++ - qemu-options.hx | 7 +- - redhat/qemu-kvm.spec.template | 6 +- - stubs/Makefile.objs | 1 + - stubs/ide-isa.c | 13 ++++ - target/arm/cpu.c | 4 +- - target/i386/cpu.c | 35 +++++++--- - target/ppc/cpu-models.c | 12 ++++ - target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 +++ - vl.c | 8 ++- - 39 files changed, 348 insertions(+), 49 deletions(-) + Makefile.objs | 4 +- + default-configs/aarch64-rh-devices.mak | 20 +++++++ + default-configs/aarch64-softmmu.mak | 10 ++-- + default-configs/ppc64-rh-devices.mak | 32 +++++++++++ + default-configs/ppc64-softmmu.mak | 8 ++- + default-configs/rh-virtio.mak | 10 ++++ + default-configs/s390x-rh-devices.mak | 15 +++++ + default-configs/s390x-softmmu.mak | 4 +- + default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++++++++++ + default-configs/x86_64-softmmu.mak | 4 +- + hw/acpi/ich9.c | 4 +- + hw/arm/Makefile.objs | 2 +- + hw/block/fdc.c | 10 ++++ + hw/bt/Makefile.objs | 4 +- + hw/core/Makefile.objs | 9 +-- + hw/cpu/Makefile.objs | 5 +- + hw/display/Makefile.objs | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/isa/Makefile.objs | 2 +- + hw/misc/Makefile.objs | 2 +- + hw/net/e1000.c | 2 + + hw/pci-host/piix.c | 4 ++ + hw/ppc/spapr_cpu_core.c | 2 + + hw/usb/ccid-card-emulated.c | 2 + + hw/vfio/pci-quirks.c | 5 ++ + hw/vfio/pci.c | 5 ++ + qemu-options.hx | 7 +-- + redhat/qemu-kvm.spec.template | 5 +- + target/arm/cpu.c | 4 +- + target/i386/cpu.c | 35 +++++++++--- + target/ppc/cpu-models.c | 10 ++++ + target/s390x/cpu_models.c | 3 + + target/s390x/kvm.c | 8 +++ + vl.c | 8 ++- + 36 files changed, 316 insertions(+), 44 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak create mode 100644 default-configs/rh-virtio.mak create mode 100644 default-configs/s390x-rh-devices.mak create mode 100644 default-configs/x86_64-rh-devices.mak - create mode 100644 stubs/ide-isa.c diff --git a/Makefile.objs b/Makefile.objs -index cf065de5ed..0b78970763 100644 +index 6a143dc..8e96af1 100644 --- a/Makefile.objs +++ b/Makefile.objs -@@ -63,8 +63,8 @@ common-obj-y += replay/ +@@ -65,8 +65,8 @@ common-obj-y += replay/ common-obj-y += ui/ common-obj-m += ui/ @@ -98,80 +114,55 @@ index cf065de5ed..0b78970763 100644 common-obj-y += vl.o diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..13ce7c7987 +index 0000000..a1ed641 --- /dev/null +++ b/default-configs/aarch64-rh-devices.mak -@@ -0,0 +1,40 @@ +@@ -0,0 +1,20 @@ +include rh-virtio.mak + -+CONFIG_ACPI=y -+CONFIG_ARM_GIC=y +CONFIG_ARM_GIC_KVM=y +CONFIG_ARM_SMMUV3=y +CONFIG_ARM_V7M=y +CONFIG_ARM_VIRT=y -+CONFIG_CAN_BUS=y -+CONFIG_CAN_SJA1000=y +CONFIG_EDID=y -+CONFIG_FW_CFG_DMA=y -+CONFIG_GPIO_KEY=y -+CONFIG_I2C=y -+CONFIG_IOH3420=y -+CONFIG_IVSHMEM=y -+CONFIG_KVM=y -+CONFIG_LINUX=y -+CONFIG_PCI=y +CONFIG_PCIE_PORT=y +CONFIG_PCI_DEVICES=y -+CONFIG_PCI_EXPRESS=y -+CONFIG_PCI_EXPRESS_GENERIC_BRIDGE=y +CONFIG_PCI_TESTDEV=y +CONFIG_PFLASH_CFI01=y -+CONFIG_PL011=y -+CONFIG_PL031=y -+CONFIG_PL061=y -+CONFIG_PLATFORM_BUS=y +CONFIG_SCSI=y -+CONFIG_SMBIOS=y -+CONFIG_SMBUS_EEPROM=y ++CONFIG_SEMIHOSTING=y +CONFIG_USB=y +CONFIG_USB_XHCI=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y -+CONFIG_VHOST_USER=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_XIO3130=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 4ea9add003..cfff806b50 100644 +index 958b1e0..8f6867d 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak -@@ -1,12 +1,9 @@ +@@ -1,8 +1,10 @@ # Default configuration for aarch64-softmmu -+# CONFIG_AUX=y -+# CONFIG_DDC=y -+# CONFIG_DPCD=y -+# CONFIG_XLNX_ZYNQMP=y -+# CONFIG_XLNX_ZYNQMP_ARM=y -+# CONFIG_XLNX_VERSAL=y --# We support all the 32 bit boards so need all their config + # We support all the 32 bit boards so need all their config -include arm-softmmu.mak -- --CONFIG_AUX=y --CONFIG_DDC=y --CONFIG_DPCD=y --CONFIG_XLNX_ZYNQMP=y ++#include arm-softmmu.mak + -CONFIG_XLNX_ZYNQMP_ARM=y -CONFIG_XLNX_VERSAL=y --CONFIG_ARM_SMMUV3=y +-CONFIG_SBSA_REF=y ++#CONFIG_XLNX_ZYNQMP_ARM=y ++#CONFIG_XLNX_VERSAL=y ++#CONFIG_SBSA_REF=y ++ +include aarch64-rh-devices.mak diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..3be1750736 +index 0000000..35f2106 --- /dev/null +++ b/default-configs/ppc64-rh-devices.mak -@@ -0,0 +1,30 @@ +@@ -0,0 +1,32 @@ +include rh-virtio.mak + +CONFIG_DIMM=y @@ -185,6 +176,7 @@ index 0000000000..3be1750736 +CONFIG_TEST_DEVICES=y +CONFIG_USB=y +CONFIG_USB_OHCI=y ++CONFIG_USB_OHCI_PCI=y +CONFIG_USB_SMARTCARD=y +CONFIG_USB_STORAGE_BOT=y +CONFIG_USB_XHCI=y @@ -202,8 +194,9 @@ index 0000000000..3be1750736 +CONFIG_XICS_SPAPR=y +CONFIG_XIVE=y +CONFIG_XIVE_SPAPR=y ++CONFIG_XIVE_KVM=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index cca52665d9..fec354f327 100644 +index cca5266..fec354f 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -1,10 +1,12 @@ @@ -224,7 +217,7 @@ index cca52665d9..fec354f327 100644 +include ppc64-rh-devices.mak diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak new file mode 100644 -index 0000000000..94ede1b5f6 +index 0000000..94ede1b --- /dev/null +++ b/default-configs/rh-virtio.mak @@ -0,0 +1,10 @@ @@ -240,7 +233,7 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak new file mode 100644 -index 0000000000..c3c73fe752 +index 0000000..c3c73fe --- /dev/null +++ b/default-configs/s390x-rh-devices.mak @@ -0,0 +1,15 @@ @@ -260,7 +253,7 @@ index 0000000000..c3c73fe752 +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index f2287a133f..3e2e388e91 100644 +index f2287a1..3e2e388 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -10,4 +10,6 @@ @@ -273,14 +266,15 @@ index f2287a133f..3e2e388e91 100644 +include s390x-rh-devices.mak diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..01b5363c79 +index 0000000..6b5d68e --- /dev/null +++ b/default-configs/x86_64-rh-devices.mak -@@ -0,0 +1,93 @@ +@@ -0,0 +1,100 @@ +include rh-virtio.mak + +CONFIG_AC97=y +CONFIG_ACPI=y ++CONFIG_ACPI_PCI=y +CONFIG_ACPI_CPU_HOTPLUG=y +CONFIG_ACPI_MEMORY_HOTPLUG=y +CONFIG_ACPI_NVDIMM=y @@ -291,6 +285,7 @@ index 0000000000..01b5363c79 +CONFIG_AHCI=y +CONFIG_APIC=y +CONFIG_APM=y ++CONFIG_BOCHS_DISPLAY=y +CONFIG_DIMM=y +CONFIG_E1000E_PCI_EXPRESS=y +CONFIG_E1000_PCI=y @@ -351,6 +346,7 @@ index 0000000000..01b5363c79 +CONFIG_TEST_DEVICES=y +CONFIG_USB=y +CONFIG_USB_EHCI=y ++CONFIG_USB_EHCI_PCI=y +CONFIG_USB_SMARTCARD=y +CONFIG_USB_STORAGE_BOT=y +CONFIG_USB_UHCI=y @@ -370,8 +366,12 @@ index 0000000000..01b5363c79 +CONFIG_WDT_IB6300ESB=y +CONFIG_WDT_IB700=y +CONFIG_XIO3130=y ++CONFIG_TPM_CRB=y ++CONFIG_TPM_TIS=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 64b2ee2960..b5de7e5279 100644 +index 64b2ee2..b5de7e5 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -1,3 +1,5 @@ @@ -382,7 +382,7 @@ index 64b2ee2960..b5de7e5279 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index e53dfe1ee3..168a713eff 100644 +index e53dfe1..168a713 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -446,8 +446,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) @@ -397,12 +397,12 @@ index e53dfe1ee3..168a713eff 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index fa57c7c770..75cf31650c 100644 +index 43ce8d5..13fc950 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs -@@ -17,7 +17,7 @@ obj-$(CONFIG_STRONGARM) += collie.o - obj-$(CONFIG_VERSATILE) += vexpress.o versatilepb.o +@@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o obj-$(CONFIG_ZYNQ) += xilinx_zynq.o + obj-$(CONFIG_SABRELITE) += sabrelite.o -obj-$(CONFIG_ARM_V7M) += armv7m.o +#obj-$(CONFIG_ARM_V7M) += armv7m.o @@ -410,11 +410,11 @@ index fa57c7c770..75cf31650c 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 6f19f127a5..9ece2dbbd2 100644 +index 9b24cb9..440b53b 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -42,6 +42,8 @@ - #include "qemu/log.h" +@@ -43,6 +43,8 @@ + #include "qemu/module.h" #include "trace.h" +#include "hw/boards.h" @@ -422,7 +422,7 @@ index 6f19f127a5..9ece2dbbd2 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2629,6 +2631,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, +@@ -2635,6 +2637,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, int i, j; static int command_tables_inited = 0; @@ -438,7 +438,7 @@ index 6f19f127a5..9ece2dbbd2 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); } diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs -index 867a7d2e8a..e678e9ee3c 100644 +index 867a7d2..e678e9e 100644 --- a/hw/bt/Makefile.objs +++ b/hw/bt/Makefile.objs @@ -1,3 +1,3 @@ @@ -447,33 +447,11 @@ index 867a7d2e8a..e678e9ee3c 100644 +#common-obj-y += core.o l2cap.o sdp.o hci.o hid.o +#common-obj-y += hci-csr.o -diff --git a/hw/char/serial-pci.c b/hw/char/serial-pci.c -index cb0d04c1d9..d426982df7 100644 ---- a/hw/char/serial-pci.c -+++ b/hw/char/serial-pci.c -@@ -228,6 +228,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data) - dc->vmsd = &vmstate_pci_multi_serial; - dc->props = multi_2x_serial_pci_properties; - set_bit(DEVICE_CATEGORY_INPUT, dc->categories); -+ /* Disabled for Red Hat Enterprise Linux: */ -+ dc->user_creatable = false; - } - - static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data) -@@ -243,6 +245,8 @@ static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data) - dc->vmsd = &vmstate_pci_multi_serial; - dc->props = multi_4x_serial_pci_properties; - set_bit(DEVICE_CATEGORY_INPUT, dc->categories); -+ /* Disabled for Red Hat Enterprise Linux: */ -+ dc->user_creatable = false; - } - - static const TypeInfo serial_pci_info = { diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs -index a799c83815..0bcb4d50d0 100644 +index f8481d9..bab9c2d 100644 --- a/hw/core/Makefile.objs +++ b/hw/core/Makefile.objs -@@ -16,9 +16,10 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o +@@ -17,11 +17,12 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o common-obj-$(CONFIG_SOFTMMU) += loader.o common-obj-$(CONFIG_FITLOADER) += loader-fit.o common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o @@ -488,22 +466,27 @@ index a799c83815..0bcb4d50d0 100644 -common-obj-$(CONFIG_SOFTMMU) += generic-loader.o +#common-obj-$(CONFIG_SOFTMMU) += generic-loader.o common-obj-$(CONFIG_SOFTMMU) += null-machine.o + + obj-$(CONFIG_SOFTMMU) += machine-qmp-cmds.o diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs -index 8db9e8a7b3..87d4bdc27d 100644 +index 8db9e8a..1601ea9 100644 --- a/hw/cpu/Makefile.objs +++ b/hw/cpu/Makefile.objs -@@ -2,4 +2,5 @@ obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o +@@ -1,5 +1,6 @@ + obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o obj-$(CONFIG_REALVIEW) += realview_mpcore.o obj-$(CONFIG_A9MPCORE) += a9mpcore.o - obj-$(CONFIG_A15MPCORE) += a15mpcore.o +-obj-$(CONFIG_A15MPCORE) += a15mpcore.o -common-obj-y += core.o cluster.o ++#obj-$(CONFIG_A15MPCORE) += a15mpcore.o +common-obj-y += core.o +# cluster.o diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index dbd453ab1b..c3cefab578 100644 +index a64998f..88a60b3 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs -@@ -1,7 +1,8 @@ +@@ -1,8 +1,9 @@ + common-obj-$(CONFIG_DDC) += i2c-ddc.o common-obj-$(CONFIG_EDID) += edid-generate.o edid-region.o -common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o @@ -515,10 +498,10 @@ index dbd453ab1b..c3cefab578 100644 common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index a0e71469f4..058a8e0f4a 100644 +index 2e4911a..49c16c8 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2967,6 +2967,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2973,6 +2973,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -529,10 +512,10 @@ index a0e71469f4..058a8e0f4a 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 885c16e938..d19c107f1e 100644 +index b97e555..55b30e6 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -252,7 +252,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -253,7 +253,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -542,7 +525,7 @@ index 885c16e938..d19c107f1e 100644 } static const TypeInfo piix3_ide_info = { -@@ -279,6 +280,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -280,6 +281,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -552,7 +535,7 @@ index 885c16e938..d19c107f1e 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index 47a606f5e3..562a9bc0a6 100644 +index 47a606f..562a9bc 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -568,6 +568,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) @@ -565,7 +548,7 @@ index 47a606f5e3..562a9bc0a6 100644 static const TypeInfo i8042_info = { diff --git a/hw/isa/Makefile.objs b/hw/isa/Makefile.objs -index 9e106df186..0828964014 100644 +index 9e106df..0828964 100644 --- a/hw/isa/Makefile.objs +++ b/hw/isa/Makefile.objs @@ -1,5 +1,5 @@ @@ -576,7 +559,7 @@ index 9e106df186..0828964014 100644 common-obj-$(CONFIG_I82378) += i82378.o common-obj-$(CONFIG_PC87312) += pc87312.o diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs -index c71e07ae35..a5c3ff8617 100644 +index e9aab51..17f9422 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -9,7 +9,7 @@ common-obj-$(CONFIG_PCI_TESTDEV) += pci-testdev.o @@ -589,10 +572,10 @@ index c71e07ae35..a5c3ff8617 100644 # ARM devices diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 9b39bccfb2..6ac19216df 100644 +index a023ceb..15d6c7d 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1798,6 +1798,7 @@ static const E1000Info e1000_devices[] = { +@@ -1794,6 +1794,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -600,7 +583,7 @@ index 9b39bccfb2..6ac19216df 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1810,6 +1811,7 @@ static const E1000Info e1000_devices[] = { +@@ -1806,6 +1807,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -609,7 +592,7 @@ index 9b39bccfb2..6ac19216df 100644 static void e1000_register_types(void) diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c -index d9c70f7ce6..f294fbce6a 100644 +index d9c70f7..f294fbc 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -801,6 +801,7 @@ static const TypeInfo i440fx_info = { @@ -639,10 +622,10 @@ index d9c70f7ce6..f294fbce6a 100644 type_register_static(&piix3_info); type_register_static(&piix3_xen_info); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index f04e06cdf6..c664969b5b 100644 +index 5621fb9..b91a106 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -385,10 +385,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -387,10 +387,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -656,10 +639,10 @@ index f04e06cdf6..c664969b5b 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c -index 963373ba95..6771930154 100644 +index e20f8ed..0ddc26c 100644 --- a/hw/usb/ccid-card-emulated.c +++ b/hw/usb/ccid-card-emulated.c -@@ -602,6 +602,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) +@@ -603,6 +603,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_INPUT, dc->categories); dc->desc = "emulated smartcard"; dc->props = emulated_card_properties; @@ -669,10 +652,10 @@ index 963373ba95..6771930154 100644 static const TypeInfo emulated_card_info = { diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 40a12001f5..921d694e2f 100644 +index b35a640..e8e7480 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c -@@ -1385,6 +1385,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) +@@ -1386,6 +1386,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment"; dc->hotpluggable = false; @@ -681,7 +664,7 @@ index 40a12001f5..921d694e2f 100644 k->realize = vfio_pci_igd_lpc_bridge_realize; k->class_id = PCI_CLASS_BRIDGE_ISA; } -@@ -1578,6 +1580,9 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) +@@ -1579,6 +1581,9 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) 0, PCI_DEVFN(0x2, 0))) { return; } @@ -692,10 +675,10 @@ index 40a12001f5..921d694e2f 100644 /* * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 504019c458..13badcd6ed 100644 +index d7a4e18..7c474a9 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -3269,6 +3269,7 @@ static const TypeInfo vfio_pci_dev_info = { +@@ -3180,6 +3180,7 @@ static const TypeInfo vfio_pci_dev_info = { }, }; @@ -703,7 +686,7 @@ index 504019c458..13badcd6ed 100644 static Property vfio_pci_dev_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), DEFINE_PROP_END_OF_LIST(), -@@ -3288,11 +3289,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { +@@ -3199,11 +3200,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_nohotplug_dev_class_init, }; @@ -720,10 +703,10 @@ index 504019c458..13badcd6ed 100644 type_init(register_vfio_pci_dev_type) diff --git a/qemu-options.hx b/qemu-options.hx -index 08749a3391..0f55062546 100644 +index 9621e93..6873f9e 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -1983,11 +1983,6 @@ ETEXI +@@ -2024,11 +2024,6 @@ ETEXI DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) @@ -735,7 +718,7 @@ index 08749a3391..0f55062546 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" -@@ -2997,6 +2992,7 @@ STEXI +@@ -3038,6 +3033,7 @@ STEXI ETEXI DEFHEADING() @@ -743,7 +726,7 @@ index 08749a3391..0f55062546 100644 DEFHEADING(Bluetooth(R) options:) STEXI @table @option -@@ -3075,6 +3071,7 @@ STEXI +@@ -3116,6 +3112,7 @@ STEXI @end table ETEXI DEFHEADING() @@ -751,39 +734,11 @@ index 08749a3391..0f55062546 100644 #ifdef CONFIG_TPM DEFHEADING(TPM device options:) -diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs -index 269dfa5832..205ebe21db 100644 ---- a/stubs/Makefile.objs -+++ b/stubs/Makefile.objs -@@ -39,3 +39,4 @@ stub-obj-y += xen-hvm.o - stub-obj-y += pci-host-piix.o - stub-obj-y += ram-block.o - stub-obj-y += ramfb.o -+stub-obj-y += ide-isa.o -diff --git a/stubs/ide-isa.c b/stubs/ide-isa.c -new file mode 100644 -index 0000000000..9fd50ef691 ---- /dev/null -+++ b/stubs/ide-isa.c -@@ -0,0 +1,13 @@ -+#include "qemu/osdep.h" -+#include "hw/ide.h" -+#include -+ -+ISADevice *isa_ide_init(ISABus *bus, int iobase, int iobase2, int isairq, -+ DriveInfo *hd0, DriveInfo *hd1) -+{ -+ /* -+ * In theory the real isa_ide_init() function can return NULL, but no -+ * caller actually checks for that. Make sure we go out with a clear bang. -+ */ -+ abort(); -+} diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 4155782197..2a19b96a92 100644 +index ec2ab95..7e7ccee 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2270,7 +2270,9 @@ static void arm_cpu_register_types(void) +@@ -2702,7 +2702,9 @@ static void arm_cpu_register_types(void) type_register_static(&idau_interface_type_info); while (info->name) { @@ -795,10 +750,10 @@ index 4155782197..2a19b96a92 100644 } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index d6bb57d210..6616303782 100644 +index 19751e3..47a1236 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1480,14 +1480,14 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1554,14 +1554,14 @@ static X86CPUDefinition builtin_x86_defs[] = { .family = 6, .model = 6, .stepping = 3, @@ -821,11 +776,10 @@ index d6bb57d210..6616303782 100644 .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, -@@ -1716,6 +1716,25 @@ static X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x80000008, +@@ -1791,6 +1791,25 @@ static X86CPUDefinition builtin_x86_defs[] = { .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", }, -+ { + { + .name = "cpu64-rhel6", + .level = 4, + .vendor = CPUID_VENDOR_AMD, @@ -844,14 +798,15 @@ index d6bb57d210..6616303782 100644 + .xlevel = 0x8000000A, + .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", + }, - { ++ { .name = "Conroe", .level = 10, + .vendor = CPUID_VENDOR_INTEL, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 7c75963e3c..7f179ff65e 100644 +index 086548e..1bbf378 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c -@@ -65,6 +65,7 @@ +@@ -66,6 +66,7 @@ #define POWERPC_DEF(_name, _pvr, _type, _desc) \ POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) @@ -859,30 +814,26 @@ index 7c75963e3c..7f179ff65e 100644 /* Embedded PowerPC */ /* PowerPC 401 family */ POWERPC_DEF("401", CPU_POWERPC_401, 401, -@@ -739,8 +740,10 @@ +@@ -740,8 +741,10 @@ "PowerPC 7447A v1.2 (G4)") POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, "PowerPC 7457A v1.2 (G4)") +#endif /* 64 bits PowerPC */ - #if defined (TARGET_PPC64) + #if defined(TARGET_PPC64) +#if 0 /* Disabled for Red Hat Enterprise Linux */ POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, "PowerPC 970 v2.2") POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, -@@ -757,8 +760,11 @@ - "PowerPC 970MP v1.0") - POWERPC_DEF("970mp_v1.1", CPU_POWERPC_970MP_v11, 970, +@@ -760,6 +763,7 @@ "PowerPC 970MP v1.1") -+#endif -+#if 0 /* Disabled for Red Hat Enterprise Linux */ POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, "POWER5+ v2.1") +#endif POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -779,6 +785,7 @@ +@@ -780,6 +784,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -890,7 +841,7 @@ index 7c75963e3c..7f179ff65e 100644 { "403", "403gc" }, { "405", "405d4" }, { "405cr", "405crc" }, -@@ -937,12 +944,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -938,12 +943,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -906,7 +857,7 @@ index 7c75963e3c..7f179ff65e 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -951,6 +961,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -952,6 +960,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power9", "power9_v2.0" }, #endif @@ -914,7 +865,7 @@ index 7c75963e3c..7f179ff65e 100644 /* Generic PowerPCs */ #if defined(TARGET_PPC64) { "ppc64", "970fx_v3.1" }, -@@ -958,5 +969,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -959,5 +968,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "ppc32", "604" }, { "ppc", "604" }, { "default", "604" }, @@ -922,10 +873,10 @@ index 7c75963e3c..7f179ff65e 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index eb125d4d0d..2ed9c1bd2c 100644 +index 1d16d7d..47188ed 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c -@@ -406,6 +406,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -936,10 +887,10 @@ index eb125d4d0d..2ed9c1bd2c 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 19530fb94e..57e15404ff 100644 +index 6e814c2..153d092 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2340,6 +2340,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2363,6 +2363,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -955,19 +906,19 @@ index 19530fb94e..57e15404ff 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/vl.c b/vl.c -index c696ad2a13..3ada215270 100644 +index b426b32..f9166f5 100644 --- a/vl.c +++ b/vl.c -@@ -167,7 +167,7 @@ unsigned int max_cpus; - int smp_cores = 1; - int smp_threads = 1; +@@ -164,7 +164,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; + int win2k_install_hack = 0; + int singlestep = 0; int acpi_enabled = 1; -int no_hpet = 0; +int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ int fd_bootchk = 1; static int no_reboot; int no_shutdown = 0; -@@ -933,6 +933,7 @@ static void configure_rtc(QemuOpts *opts) +@@ -912,6 +912,7 @@ static void configure_rtc(QemuOpts *opts) } } @@ -975,7 +926,7 @@ index c696ad2a13..3ada215270 100644 /***********************************************************/ /* Bluetooth support */ static int nb_hcis; -@@ -1054,6 +1055,7 @@ static int bt_parse(const char *opt) +@@ -1033,6 +1034,7 @@ static int bt_parse(const char *opt) error_report("bad bluetooth parameter '%s'", opt); return 1; } @@ -983,7 +934,7 @@ index c696ad2a13..3ada215270 100644 static int parse_name(void *opaque, QemuOpts *opts, Error **errp) { -@@ -3279,6 +3281,7 @@ int main(int argc, char **argv, char **envp) +@@ -3149,6 +3151,7 @@ int main(int argc, char **argv, char **envp) } break; #endif @@ -991,7 +942,7 @@ index c696ad2a13..3ada215270 100644 case QEMU_OPTION_bt: warn_report("The bluetooth subsystem is deprecated and will " "be removed soon. If the bluetooth subsystem is " -@@ -3286,6 +3289,7 @@ int main(int argc, char **argv, char **envp) +@@ -3156,6 +3159,7 @@ int main(int argc, char **argv, char **envp) "qemu-devel@nongnu.org with your usecase."); add_device_config(DEV_BT, optarg); break; @@ -999,7 +950,7 @@ index c696ad2a13..3ada215270 100644 case QEMU_OPTION_audio_help: audio_legacy_help(); exit (0); -@@ -4409,9 +4413,11 @@ int main(int argc, char **argv, char **envp) +@@ -4284,9 +4288,11 @@ int main(int argc, char **argv, char **envp) tpm_init(); @@ -1012,5 +963,5 @@ index c696ad2a13..3ada215270 100644 if (!xen_enabled()) { /* On 32-bit hosts, QEMU is limited by virtual address space */ -- -2.20.1 +1.8.3.1 diff --git a/0006-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch index 18e5181..42f26fa 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 4f20f7503073886c51e82fbbdfe78d6c79ea5df7 Mon Sep 17 00:00:00 2001 +From 3d84e663576b363fc4cb808752cebd7d9766cdce Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -13,35 +13,49 @@ Rebase changes (4.0.0): - Remove e1000 device duplication changes to reflect upstream solution - Rewrite machine compat properties to upstream solution +Rebase changes (4.1.0-rc0): +- Removed optional flag for machine compat properties (upstream) + +Rebase changes (4.1.0-rc1): +- Remove c3e002cb chunk from hw/net/e1000.c + +Rebase changes (4.1.0-rc2): +- Reorder compat structures +- Use one format for compat scructures + +Rebase changes (4.1.0-rc4): +- Added compat for virtio-balloon-pci.any_layout for rhel71 + Merged patches (4.0.0): - d4c0957 compat: Generic HW_COMPAT_RHEL7_6 - cbac773 virtio: Make disable-legacy/disable-modern compat properties optional -Signed-off-by: Danilo C. L. de Paula +Merged patches (4.0.0-rc0): +- 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments +- f19738e compat: Generic hw_compat_rhel_8_0 --- - hw/acpi/ich9.c | 16 +++ + hw/acpi/ich9.c | 16 ++++++ hw/acpi/piix4.c | 6 +- - hw/char/serial.c | 16 +++ - hw/core/machine.c | 268 ++++++++++++++++++++++++++++++++++++++++ + hw/char/serial.c | 16 ++++++ + hw/core/machine.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- - hw/net/e1000.c | 10 ++ - hw/net/e1000e.c | 21 ++++ + hw/net/e1000e.c | 21 +++++++ hw/net/rtl8139.c | 4 +- hw/smbios/smbios.c | 1 + hw/timer/i8254_common.c | 2 +- - hw/timer/mc146818rtc.c | 6 + + hw/timer/mc146818rtc.c | 6 ++ hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 +++ + hw/usb/hcd-xhci.c | 20 +++++++ hw/usb/hcd-xhci.h | 2 + include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 18 +++ - include/hw/usb.h | 4 + + include/hw/boards.h | 21 +++++++ + include/hw/usb.h | 4 ++ migration/migration.c | 2 + - migration/migration.h | 5 + - 19 files changed, 403 insertions(+), 7 deletions(-) + migration/migration.h | 5 ++ + 18 files changed, 274 insertions(+), 7 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 168a713eff..0a6346f1cf 100644 +index 168a713..0a6346f 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -441,6 +441,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -75,10 +89,10 @@ index 168a713eff..0a6346f1cf 100644 ich9_pm_get_disable_s3, ich9_pm_set_disable_s3, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 9c079d6834..d742777134 100644 +index ec4e186..0d2c8e4 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -315,7 +315,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) +@@ -306,7 +306,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -87,7 +101,7 @@ index 9c079d6834..d742777134 100644 .minimum_version_id_old = 1, .load_state_old = acpi_load_old, .post_load = vmstate_acpi_post_load, -@@ -684,8 +684,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -663,8 +663,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -99,7 +113,7 @@ index 9c079d6834..d742777134 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), diff --git a/hw/char/serial.c b/hw/char/serial.c -index 7c42a2abfc..ae63cc0104 100644 +index 7c42a2a..ae63cc0 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -30,6 +30,7 @@ @@ -154,289 +168,167 @@ index 7c42a2abfc..ae63cc0104 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 743fef2898..fd1594d1ad 100644 +index 32d1ca9..f30afe0 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -24,6 +24,274 @@ +@@ -27,6 +27,152 @@ #include "hw/pci/pci.h" #include "hw/mem/nvdimm.h" -+/* Mostly like hw_compat_2_1 but: -+ * * we don't need virtio-scsi-pci since 7.0 already had that on -+ * * -+ * * RH: Note, qemu-extended-regs should have been enabled in the 7.1 -+ * * machine type, but was accidentally turned off in 7.2 onwards. -+ * * -+ * */ -+GlobalProperty hw_compat_rhel_7_1[] = { -+ { /* COMPAT_RHEL7.1 */ -+ .driver = "intel-hda-generic", -+ .property = "old_msi_addr", -+ .value = "on", -+ },{ -+ .driver = "VGA", -+ .property = "qemu-extended-regs", -+ .value = "off", -+ },{ -+ .driver = "secondary-vga", -+ .property = "qemu-extended-regs", -+ .value = "off", -+ },{ -+ .driver = "usb-mouse", -+ .property = "usb_version", -+ .value = stringify(1), -+ },{ -+ .driver = "usb-kbd", -+ .property = "usb_version", -+ .value = stringify(1), -+ },{ -+ .driver = "virtio-pci", -+ .property = "virtio-pci-bus-master-bug-migration", -+ .value = "on", -+ },{ -+ .driver = "virtio-blk-pci", -+ .property = "any_layout", -+ .value = "off", -+ },{ -+ .driver = "virtio-serial-pci", -+ .property = "any_layout", -+ .value = "off", -+ },{ -+ .driver = "virtio-9p-pci", -+ .property = "any_layout", -+ .value = "off", -+ },{ -+ .driver = "virtio-rng-pci", -+ .property = "any_layout", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ -+ .driver = "migration", -+ .property = "send-configuration", -+ .value = "off", -+ }, ++/* The same as hw_compat_3_1 ++ * format of array has been changed by: ++ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") ++ */ ++GlobalProperty hw_compat_rhel_8_0[] = { ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-speed", "2_5" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-width", "1" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-crb", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-tis", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-kbd", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-mouse", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-tablet", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "discard", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "write-zeroes", "false" }, +}; -+const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); ++const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); ++ ++/* The same as hw_compat_3_0 + hw_compat_2_12 ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ */ ++GlobalProperty hw_compat_rhel_7_6[] = { ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "hda-audio", "use-timer", "false" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "cirrus-vga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "VGA", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "vmware-svga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "qxl-vga", "global-vmstate", "true" }, ++}; ++const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); ++ ++/* The same as hw_compat_2_11 + hw_compat_2_10 */ ++GlobalProperty hw_compat_rhel_7_5[] = { ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "hpet", "hpet-offset-saved", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "virtio-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "vhost-user-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 ++ bz 1608778 modified for our naming */ ++ { "e1000-82540em", "migrate_tso_props", "off" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-mouse-device", "wheel-axis", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-tablet-device", "wheel-axis", "false" }, ++ { "cirrus-vga", "vgamem_mb", "16" }, ++ { "migration", "decompress-error-check", "off" }, ++}; ++const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); ++ ++/* Mostly like hw_compat_2_9 except ++ * x-mtu-bypass-backend, x-migrate-msix has already been ++ * backported to RHEL7.4. shpc was already on in 7.4. ++ */ ++GlobalProperty hw_compat_rhel_7_4[] = { ++ { "intel-iommu", "pt", "off" }, ++}; ++ ++const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); ++/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except ++ * disable-modern, disable-legacy, page-per-vq have already been ++ * backported to RHEL7.3 ++ */ ++GlobalProperty hw_compat_rhel_7_3[] = { ++ { "virtio-mmio", "format_transport_address", "off" }, ++ { "virtio-serial-device", "emergency-write", "off" }, ++ { "ioapic", "version", "0x11" }, ++ { "intel-iommu", "x-buggy-eim", "true" }, ++ { "virtio-pci", "x-ignore-backend-features", "on" }, ++ { "fw_cfg_mem", "x-file-slots", stringify(0x10) }, ++ { "fw_cfg_io", "x-file-slots", stringify(0x10) }, ++ { "pflash_cfi01", "old-multiple-chip-handling", "on" }, ++ { TYPE_PCI_DEVICE, "x-pcie-extcap-init", "off" }, ++ { "virtio-pci", "x-pcie-deverr-init", "off" }, ++ { "virtio-pci", "x-pcie-lnkctl-init", "off" }, ++ { "virtio-pci", "x-pcie-pm-init", "off" }, ++ { "virtio-net-device", "x-mtu-bypass-backend", "off" }, ++ { "e1000e", "__redhat_e1000e_7_3_intr_state", "on" }, ++}; ++const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); + +/* Mostly like hw_compat_2_4 + 2_3 but: -+ * * we don't need "any_layout" as it has been backported to 7.2 -+ * */ -+ ++ * we don't need "any_layout" as it has been backported to 7.2 ++ */ +GlobalProperty hw_compat_rhel_7_2[] = { -+ { -+ .driver = "virtio-blk-device", -+ .property = "scsi", -+ .value = "true", -+ },{ -+ .driver = "e1000-82540em", -+ .property = "extra_mac_registers", -+ .value = "off", -+ },{ -+ .driver = "virtio-pci", -+ .property = "x-disable-pcie", -+ .value = "on", -+ },{ -+ .driver = "virtio-pci", -+ .property = "migrate-extra", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_2 */ -+ .driver = "fw_cfg_mem", -+ .property = "dma_enabled", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_2 */ -+ .driver = "fw_cfg_io", -+ .property = "dma_enabled", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_2 */ -+ .driver = "isa-fdc", -+ .property = "fallback", -+ .value = "144", -+ },{ /* HW_COMPAT_RHEL7_2 */ -+ .driver = "virtio-pci", -+ .property = "disable-modern", -+ .value = "on", -+ .optional = true, -+ },{ /* HW_COMPAT_RHEL7_2 */ -+ .driver = "virtio-pci", -+ .property = "disable-legacy", -+ .value = "off", -+ .optional = true, -+ },{ /* HW_COMPAT_RHEL7_2 */ -+ .driver = TYPE_PCI_DEVICE, -+ .property = "x-pcie-lnksta-dllla", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_2 */ -+ .driver = "virtio-pci", -+ .property = "page-per-vq", -+ .value = "on", -+ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ -+ .driver = "migration", -+ .property = "send-section-footer", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_2 - introduced with 2.10.0 */ -+ .driver = "migration", -+ .property = "store-global-state", -+ .value = "off", ++ { "virtio-blk-device", "scsi", "true" }, ++ { "e1000-82540em", "extra_mac_registers", "off" }, ++ { "virtio-pci", "x-disable-pcie", "on" }, ++ { "virtio-pci", "migrate-extra", "off" }, ++ { "fw_cfg_mem", "dma_enabled", "off" }, ++ { "fw_cfg_io", "dma_enabled", "off" }, ++ { "isa-fdc", "fallback", "144" }, ++ { "virtio-pci", "disable-modern", "on" }, ++ { "virtio-pci", "disable-legacy", "off" }, ++ { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, ++ { "virtio-pci", "page-per-vq", "on" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "send-section-footer", "off" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "store-global-state", "off", + }, +}; +const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); + -+/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except -+ * * disable-modern, disable-legacy, page-per-vq have already been -+ * * backported to RHEL7.3 -+ * */ -+GlobalProperty hw_compat_rhel_7_3[] = { -+ { /* HW_COMPAT_RHEL7_3 */ -+ .driver = "virtio-mmio", -+ .property = "format_transport_address", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "virtio-serial-device", -+ .property = "emergency-write", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "ioapic", -+ .property = "version", -+ .value = "0x11", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "intel-iommu", -+ .property = "x-buggy-eim", -+ .value = "true", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "virtio-pci", -+ .property = "x-ignore-backend-features", -+ .value = "on", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "fw_cfg_mem", -+ .property = "x-file-slots", -+ .value = stringify(0x10), -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "fw_cfg_io", -+ .property = "x-file-slots", -+ .value = stringify(0x10), -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "pflash_cfi01", -+ .property = "old-multiple-chip-handling", -+ .value = "on", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = TYPE_PCI_DEVICE, -+ .property = "x-pcie-extcap-init", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "virtio-pci", -+ .property = "x-pcie-deverr-init", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "virtio-pci", -+ .property = "x-pcie-lnkctl-init", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "virtio-pci", -+ .property = "x-pcie-pm-init", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "virtio-net-device", -+ .property = "x-mtu-bypass-backend", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_3 */ -+ .driver = "e1000e", -+ .property = "__redhat_e1000e_7_3_intr_state", -+ .value = "on", -+ }, ++/* Mostly like hw_compat_2_1 but: ++ * we don't need virtio-scsi-pci since 7.0 already had that on ++ * ++ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * machine type, but was accidentally turned off in 7.2 onwards. ++ */ ++GlobalProperty hw_compat_rhel_7_1[] = { ++ { "intel-hda-generic", "old_msi_addr", "on" }, ++ { "VGA", "qemu-extended-regs", "off" }, ++ { "secondary-vga", "qemu-extended-regs", "off" }, ++ { "usb-mouse", "usb_version", stringify(1) }, ++ { "usb-kbd", "usb_version", stringify(1) }, ++ { "virtio-pci", "virtio-pci-bus-master-bug-migration", "on" }, ++ { "virtio-blk-pci", "any_layout", "off" }, ++ { "virtio-balloon-pci", "any_layout", "off" }, ++ { "virtio-serial-pci", "any_layout", "off" }, ++ { "virtio-9p-pci", "any_layout", "off" }, ++ { "virtio-rng-pci", "any_layout", "off" }, ++ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ ++ { "migration", "send-configuration", "off" }, +}; -+const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); ++const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + -+/* Mostly like hw_compat_2_9 except -+ * * x-mtu-bypass-backend, x-migrate-msix has already been -+ * * backported to RHEL7.4. shpc was already on in 7.4. -+ * */ -+GlobalProperty hw_compat_rhel_7_4[] = { -+ { /* HW_COMPAT_RHEL7_4 */ -+ .driver = "intel-iommu", -+ .property = "pt", -+ .value = "off", -+ }, -+}; -+const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); -+ -+/* The same as hw_compat_2_11 + hw_compat_2_10 */ -+GlobalProperty hw_compat_rhel_7_5[] = { -+ { /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ -+ .driver = "hpet", -+ .property = "hpet-offset-saved", -+ .value = "false", -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ -+ .driver = "virtio-blk-pci", -+ .property = "vectors", -+ .value = "2", -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 */ -+ .driver = "vhost-user-blk-pci", -+ .property = "vectors", -+ .value = "2", -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_11 but -+ bz 1608778 modified for our naming */ -+ .driver = "e1000-82540em", -+ .property = "migrate_tso_props", -+ .value = "off", -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ -+ .driver = "virtio-mouse-device", -+ .property = "wheel-axis", -+ .value = "false", -+ },{ /* HW_COMPAT_RHEL7_5 from HW_COMPAT_2_10 */ -+ .driver = "virtio-tablet-device", -+ .property = "wheel-axis", -+ .value = "false", -+ },{ /* HW_COMPAT_RHEL7_5 */ -+ .driver = "cirrus-vga", -+ .property = "vgamem_mb", -+ .value = "16", -+ },{ /* HW_COMPAT_RHEL7_5 */ -+ .driver = "migration", -+ .property = "decompress-error-check", -+ .value = "off", -+ }, -+}; -+const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); -+ -+/* The same as hw_compat_3_0 + hw_compat_2_12 -+ * * except that -+ * * there's nothing in 3_0 -+ * * migration.decompress-error-check=off was in 7.5 from bz 1584139 -+ * * -+ * */ -+GlobalProperty hw_compat_rhel_7_6[] = { -+ { /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ -+ .driver = "hda-audio", -+ .property = "use-timer", -+ .value = "false", -+ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ -+ .driver = "cirrus-vga", -+ .property = "global-vmstate", -+ .value = "true", -+ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ -+ .driver = "VGA", -+ .property = "global-vmstate", -+ .value = "true", -+ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ -+ .driver = "vmware-svga", -+ .property = "global-vmstate", -+ .value = "true", -+ },{ /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ -+ .driver = "qxl-vga", -+ .property = "global-vmstate", -+ .value = "true", -+ }, -+}; -+const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); -+ -+ - GlobalProperty hw_compat_3_1[] = { - { "pcie-root-port", "x-speed", "2_5" }, - { "pcie-root-port", "x-width", "1" }, + GlobalProperty hw_compat_4_0[] = { + { "VGA", "edid", "false" }, + { "secondary-vga", "edid", "false" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index fa44242e0d..7835c8380a 100644 +index 945952a..db0681e 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c -@@ -80,7 +80,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) +@@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) } static Property vga_isa_properties[] = { @@ -445,32 +337,11 @@ index fa44242e0d..7835c8380a 100644 DEFINE_PROP_END_OF_LIST(), }; -diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 6ac19216df..a4de04ab89 100644 ---- a/hw/net/e1000.c -+++ b/hw/net/e1000.c -@@ -1691,6 +1691,16 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) - - pci_conf = pci_dev->config; - -+ if (!(d->compat_flags & E1000_FLAG_AUTONEG)) { -+ /* -+ * We have no capabilities, so capability list bit should normally be 0. -+ * Keep it on for compat machine types to avoid breaking migration. -+ * HACK: abuse E1000_FLAG_AUTONEG, which is off exactly for -+ * the machine types that need this. -+ */ -+ pci_set_word(pci_conf + PCI_STATUS, PCI_STATUS_CAP_LIST); -+ } -+ - /* TODO: RST# value should be 0, PCI spec 6.2.4 */ - pci_conf[PCI_CACHE_LINE_SIZE] = 0x10; - diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index 510ddb3897..f1de9e5058 100644 +index 581f7d0..9a8b1ed 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c -@@ -75,6 +75,11 @@ typedef struct E1000EState { +@@ -76,6 +76,11 @@ typedef struct E1000EState { E1000ECore core; @@ -482,7 +353,7 @@ index 510ddb3897..f1de9e5058 100644 } E1000EState; #define E1000E_MMIO_IDX 0 -@@ -90,6 +95,10 @@ typedef struct E1000EState { +@@ -91,6 +96,10 @@ typedef struct E1000EState { #define E1000E_MSIX_TABLE (0x0000) #define E1000E_MSIX_PBA (0x2000) @@ -493,7 +364,7 @@ index 510ddb3897..f1de9e5058 100644 static uint64_t e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) { -@@ -301,6 +310,8 @@ e1000e_init_msix(E1000EState *s) +@@ -302,6 +311,8 @@ e1000e_init_msix(E1000EState *s) } else { if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { msix_uninit(d, &s->msix, &s->msix); @@ -502,7 +373,7 @@ index 510ddb3897..f1de9e5058 100644 } } } -@@ -472,6 +483,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) +@@ -473,6 +484,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); if (ret) { trace_e1000e_msi_init_fail(ret); @@ -511,7 +382,7 @@ index 510ddb3897..f1de9e5058 100644 } if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, -@@ -595,6 +608,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { +@@ -596,6 +609,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ e1000e_vmstate_intr_timer, E1000IntrDelayTimer) @@ -523,7 +394,7 @@ index 510ddb3897..f1de9e5058 100644 static const VMStateDescription e1000e_vmstate = { .name = "e1000e", .version_id = 1, -@@ -606,6 +624,7 @@ static const VMStateDescription e1000e_vmstate = { +@@ -607,6 +625,7 @@ static const VMStateDescription e1000e_vmstate = { VMSTATE_MSIX(parent_obj, E1000EState), VMSTATE_UINT32(ioaddr, E1000EState), @@ -531,7 +402,7 @@ index 510ddb3897..f1de9e5058 100644 VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), VMSTATE_UINT8(core.rx_desc_len, E1000EState), VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, -@@ -654,6 +673,8 @@ static PropertyInfo e1000e_prop_disable_vnet, +@@ -655,6 +674,8 @@ static PropertyInfo e1000e_prop_disable_vnet, static Property e1000e_properties[] = { DEFINE_NIC_PROPERTIES(E1000EState, conf), @@ -541,10 +412,10 @@ index 510ddb3897..f1de9e5058 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 2342a095e3..0c916b7a21 100644 +index 0927317..0cead11 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3174,7 +3174,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3176,7 +3176,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -553,7 +424,7 @@ index 2342a095e3..0c916b7a21 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3255,7 +3255,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3257,7 +3257,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -564,10 +435,10 @@ index 2342a095e3..0c916b7a21 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 47be9071fa..3ea04e621e 100644 +index 7bcd67b..dd0bc2b 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -775,6 +775,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -778,6 +778,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type1.product, product); SMBIOS_SET_DEFAULT(type1.version, version); @@ -576,10 +447,10 @@ index 47be9071fa..3ea04e621e 100644 SMBIOS_SET_DEFAULT(type2.product, product); SMBIOS_SET_DEFAULT(type2.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 6190b6fc5d..ad2ad2d09f 100644 +index 76ca6ec..e7c87a9 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c -@@ -268,7 +268,7 @@ static const VMStateDescription vmstate_pit_common = { +@@ -270,7 +270,7 @@ static const VMStateDescription vmstate_pit_common = { .pre_save = pit_dispatch_pre_save, .post_load = pit_dispatch_post_load, .fields = (VMStateField[]) { @@ -589,18 +460,18 @@ index 6190b6fc5d..ad2ad2d09f 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c -index 513f105e62..10a3d44be1 100644 +index ce4550b..777efe9 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c -@@ -35,6 +35,7 @@ - #include "qapi/qapi-events-target.h" +@@ -37,6 +37,7 @@ + #include "qapi/qapi-events-misc-target.h" #include "qapi/visitor.h" #include "exec/address-spaces.h" +#include "migration/migration.h" #ifdef TARGET_I386 #include "hw/i386/apic.h" -@@ -841,6 +842,11 @@ static int rtc_post_load(void *opaque, int version_id) +@@ -843,6 +844,11 @@ static int rtc_post_load(void *opaque, int version_id) static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) { RTCState *s = (RTCState *)opaque; @@ -613,10 +484,10 @@ index 513f105e62..10a3d44be1 100644 } diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 09df29ff9c..95542290ff 100644 +index 98bd5cf..2fd2a9b 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c -@@ -1216,12 +1216,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) +@@ -1218,12 +1218,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) UHCIState *s = UHCI(dev); uint8_t *pci_conf = s->dev.config; int i; @@ -633,10 +504,10 @@ index 09df29ff9c..95542290ff 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index ec28bee319..ad351a7b6d 100644 +index 5894a18..9854fae 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3580,9 +3580,27 @@ static const VMStateDescription vmstate_xhci_slot = { +@@ -3584,9 +3584,27 @@ static const VMStateDescription vmstate_xhci_slot = { } }; @@ -664,7 +535,7 @@ index ec28bee319..ad351a7b6d 100644 .fields = (VMStateField[]) { VMSTATE_UINT32(type, XHCIEvent), VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3591,6 +3609,8 @@ static const VMStateDescription vmstate_xhci_event = { +@@ -3595,6 +3613,8 @@ static const VMStateDescription vmstate_xhci_event = { VMSTATE_UINT32(flags, XHCIEvent), VMSTATE_UINT8(slotid, XHCIEvent), VMSTATE_UINT8(epid, XHCIEvent), @@ -674,10 +545,10 @@ index ec28bee319..ad351a7b6d 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 240caa4e51..11bd05dfd5 100644 +index 2fad4df..f554b67 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h -@@ -154,6 +154,8 @@ typedef struct XHCIEvent { +@@ -157,6 +157,8 @@ typedef struct XHCIEvent { uint32_t flags; uint8_t slotid; uint8_t epid; @@ -687,7 +558,7 @@ index 240caa4e51..11bd05dfd5 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 41568d1837..1a23ccc412 100644 +index 41568d1..1a23ccc 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -701,13 +572,16 @@ index 41568d1837..1a23ccc412 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index e231860666..456e4a944c 100644 +index a71d1a5..521c625 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -335,4 +335,22 @@ extern const size_t hw_compat_2_2_len; +@@ -362,4 +362,25 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_8_0[]; ++extern const size_t hw_compat_rhel_8_0_len; ++ +extern GlobalProperty hw_compat_rhel_7_6[]; +extern const size_t hw_compat_rhel_7_6_len; + @@ -728,7 +602,7 @@ index e231860666..456e4a944c 100644 + #endif diff --git a/include/hw/usb.h b/include/hw/usb.h -index c21f41c8a9..71502b0bad 100644 +index c21f41c..71502b0 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -604,4 +604,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, @@ -741,7 +615,7 @@ index c21f41c8a9..71502b0bad 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 609e0df5d0..a160172a0c 100644 +index 8a607fe..beffbfd 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -116,6 +116,8 @@ enum mig_rp_message_type { @@ -754,10 +628,10 @@ index 609e0df5d0..a160172a0c 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 438f17edad..c793dcc50b 100644 +index 1fdd7b2..b89e901 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -307,6 +307,11 @@ void init_dirty_bitmap_incoming_migration(void); +@@ -332,6 +332,11 @@ void init_dirty_bitmap_incoming_migration(void); void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); @@ -770,5 +644,5 @@ index 438f17edad..c793dcc50b 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -2.20.1 +1.8.3.1 diff --git a/0007-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch index 374c94c..4195926 100644 --- a/0007-Add-aarch64-machine-types.patch +++ b/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From b2c73bd35f8c0fa536004d18275ffcfe63096622 Mon Sep 17 00:00:00 2001 +From 47a1c8ecb7b0b3846fe5fcefcbb912cd31d291c7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -7,28 +7,31 @@ Adding changes to add RHEL machine types for aarch64 architecture. Signed-off-by: Miroslav Rezanina -Rebase changes (weekly-190111): +Rebase changes (4.0.0): - Use upstream compat handling -Rebase changes (weekly-190308): +Rebase changes (4.1.0-rc0): - Removed a15memmap (upstream) +- Use virt_flash_create in rhel800_virt_instance_init -Merged patches (weekly-190125): +Merged patches (4.0.0): - 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM - 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 - 4d20863 aarch64: Use 256MB ECAM region by default -Signed-off-by: Danilo C. L. de Paula +Merged patches (4.1.0-rc0): +- c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM +- 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine --- - hw/arm/virt.c | 140 +++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 11 ++++ - 2 files changed, 150 insertions(+), 1 deletion(-) + 2 files changed, 154 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ce2664a30b..5602d9f6b0 100644 +index d9496c9..51fb5f8 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -62,6 +62,7 @@ +@@ -65,6 +65,7 @@ #include "hw/acpi/acpi.h" #include "target/arm/internals.h" @@ -36,7 +39,7 @@ index ce2664a30b..5602d9f6b0 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -88,7 +89,49 @@ +@@ -91,7 +92,49 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -87,7 +90,7 @@ index ce2664a30b..5602d9f6b0 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1658,6 +1701,7 @@ static void machvirt_init(MachineState *machine) +@@ -1722,6 +1765,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -95,7 +98,7 @@ index ce2664a30b..5602d9f6b0 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1686,6 +1730,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -1750,6 +1794,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -103,23 +106,7 @@ index ce2664a30b..5602d9f6b0 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1740,6 +1785,7 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) - } - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static char *virt_get_iommu(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1767,6 +1813,7 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) - error_append_hint(errp, "Valid values are none, smmuv3.\n"); - } - } -+#endif /* disabled for RHEL */ - - static CpuInstanceProperties - virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) -@@ -1806,6 +1853,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +@@ -1871,6 +1916,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) return ms->possible_cpus; } @@ -127,7 +114,7 @@ index ce2664a30b..5602d9f6b0 100644 static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { -@@ -2071,3 +2119,93 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2146,3 +2192,99 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -170,7 +157,7 @@ index ce2664a30b..5602d9f6b0 100644 +} +type_init(rhel_machine_init); + -+static void rhel800_virt_instance_init(Object *obj) ++static void rhel810_virt_instance_init(Object *obj) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); @@ -210,22 +197,28 @@ index ce2664a30b..5602d9f6b0 100644 + NULL); + } + -+ /* IOMMU is disabled by default and non-configurable for RHEL */ ++ /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; ++ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu, NULL); ++ object_property_set_description(obj, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3", ++ NULL); + + vms->irqmap=a15irqmap; ++ virt_flash_create(vms); +} + -+static void rhel800_virt_options(MachineClass *mc) ++static void rhel810_virt_options(MachineClass *mc) +{ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); +} -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 0, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 1, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 507517c603..e49f872b1c 100644 +index a720942..1baa48d 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -136,6 +136,7 @@ typedef struct { +@@ -137,6 +137,7 @@ typedef struct { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -233,7 +226,7 @@ index 507517c603..e49f872b1c 100644 #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -144,6 +145,16 @@ typedef struct { +@@ -145,6 +146,16 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) @@ -251,5 +244,5 @@ index 507517c603..e49f872b1c 100644 /* Return the number of used redistributor regions */ -- -2.20.1 +1.8.3.1 diff --git a/0008-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch index 3463404..be2cde0 100644 --- a/0008-Add-ppc64-machine-types.patch +++ b/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 373a3f8f11227ba6bce10dab17ddfb6caffc75cf Mon Sep 17 00:00:00 2001 +From 9c73e7109477fecb0477bd6d53e94080eca30e64 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -7,51 +7,53 @@ Adding changes to add RHEL machine types for ppc64 architecture. Signed-off-by: Miroslav Rezanina -Rebase changes (weekly-190111): +Rebase changes (4.0.0): - remove instance options and use upstream solution - Use upstream compat handling - Replace SPAPR_PCI_2_7_MMIO_WIN_SIZE with value (changed upstream) - -Rebase changes (weekly-190104): - re-add handling of instance_options (removed upstream) - Use p8 as default for rhel machine types (p9 default upstream) - -Rebase changes (weekly-190315): - sPAPRMachineClass renamed to SpaprMachineClass (upstream) -Merged patches (weekly-190104): +Rebase changes (4.1.0-rc2): +- Update format for compat structures + +Merged patches (4.0.0): - 467d59a redhat: define pseries-rhel8.0.0 machine type -Signed-off-by: Danilo C. L. de Paula +Merged patches (4.1.0-rc0): +- f21757edc target/ppc/spapr: Enable mitigations by default for pseries-4.0 machine type +- 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 +- 89f01da redhat: define pseries-rhel8.1.0 machine type --- - hw/ppc/spapr.c | 252 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 1 + target/ppc/compat.c | 13 ++- target/ppc/cpu.h | 1 + - 5 files changed, 279 insertions(+), 1 deletion(-) + 5 files changed, 270 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index b52b82d298..daa59203ea 100644 +index 821f0d4..ab64d43 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4300,6 +4300,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4382,6 +4382,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) spapr_caps_add_properties(smc, &error_abort); - smc->irq = &spapr_irq_xics; + smc->irq = &spapr_irq_dual; smc->dr_phb_enabled = true; + smc->has_power9_support = true; } static const TypeInfo spapr_machine_info = { -@@ -4344,6 +4345,7 @@ static const TypeInfo spapr_machine_info = { +@@ -4426,6 +4427,7 @@ static const TypeInfo spapr_machine_info = { } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-4.0 + * pseries-4.1 */ -@@ -4499,6 +4501,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4609,6 +4611,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ @@ -59,30 +61,54 @@ index b52b82d298..daa59203ea 100644 static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4549,6 +4552,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, - */ +@@ -4663,6 +4666,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; } +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4663,6 +4667,254 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4777,6 +4781,245 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); +#endif + +/* -+ * pseries-rhel8.0.0 ++ * pseries-rhel8.1.0 + */ + -+static void spapr_machine_rhel800_class_options(MachineClass *mc) ++static void spapr_machine_rhel810_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + -+DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", true); ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", true); ++ ++/* ++ * pseries-rhel8.0.0 ++ * like spapr_compat_3_1 ++ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS ++ * that have been backported to pseries-rhel8.0.0 ++ */ ++ ++static void spapr_machine_rhel800_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel810_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, ++ hw_compat_rhel_8_0_len); ++ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ smc->update_dt_enabled = false; ++ smc->dr_phb_enabled = false; ++ smc->broken_host_serial_model = true; ++ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); + +/* + * pseries-rhel7.6.0 @@ -90,16 +116,8 @@ index b52b82d298..daa59203ea 100644 + * spapr_compat_0 is empty + */ +GlobalProperty spapr_compat_rhel7_6[] = { -+ { -+ .driver = TYPE_POWERPC_CPU, -+ .property = "pre-3.0-migration", -+ .value = "on", -+ }, -+ { -+ .driver = TYPE_SPAPR_CPU_CORE, -+ .property = "pre-3.0-migration", -+ .value = "on", -+ }, ++ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" }, ++ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" }, +}; +const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6); + @@ -123,8 +141,14 @@ index b52b82d298..daa59203ea 100644 + * yet. Postpone this to machine init (see default_caps_with_cpu()). + */ + smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; -+ /* Defaults for the latest behaviour inherited from the base class */ -+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ ++ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by ++ * f21757edc554 ++ * "Enable mitigations by default for pseries-4.0 machine type") ++ */ ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; +} + +DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); @@ -179,11 +203,7 @@ index b52b82d298..daa59203ea 100644 + * like spapr_compat_2_9 + */ +GlobalProperty spapr_compat_rhel7_4[] = { -+ { -+ .driver = TYPE_POWERPC_CPU, -+ .property = "pre-2.10-migration", -+ .value = "on", -+ }, ++ { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, +}; +const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); + @@ -226,31 +246,11 @@ index b52b82d298..daa59203ea 100644 + * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 + */ +GlobalProperty spapr_compat_rhel7_3[] = { -+ { -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, -+ .property = "mem_win_size", -+ .value = "0xf80000000", -+ }, -+ { -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, -+ .property = "mem64_win_size", -+ .value = "0", -+ }, -+ { -+ .driver = TYPE_POWERPC_CPU, -+ .property = "pre-2.8-migration", -+ .value = "on", -+ }, -+ { -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, -+ .property = "pre-2.8-migration", -+ .value = "on", -+ }, -+ { -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, -+ .property = "pcie-extended-configuration-space", -+ .value = "off", -+ }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0" }, ++ { TYPE_POWERPC_CPU, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, +}; +const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); + @@ -295,15 +295,8 @@ index b52b82d298..daa59203ea 100644 + */ + +GlobalProperty spapr_compat_rhel7_2[] = { -+ { -+ .driver = "spapr-vlan", -+ .property = "use-rx-buffer-pools", -+ .value = "off", -+ },{ -+ .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, -+ .property = "ddw", -+ .value = "off", -+ }, ++ { "spapr-vlan", "use-rx-buffer-pools", "off" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, +}; +const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); + @@ -323,7 +316,7 @@ index b52b82d298..daa59203ea 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index c664969b5b..7556debbef 100644 +index b91a106..29a3c7d 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -21,6 +21,7 @@ @@ -334,7 +327,7 @@ index c664969b5b..7556debbef 100644 static void spapr_cpu_reset(void *opaque) { -@@ -222,6 +223,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -224,6 +225,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); Error *local_err = NULL; @@ -342,7 +335,7 @@ index c664969b5b..7556debbef 100644 object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { -@@ -234,6 +236,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -236,6 +238,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -361,19 +354,19 @@ index c664969b5b..7556debbef 100644 spapr_cpu_reset(cpu); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 5ea8081041..8737516ada 100644 +index 60553d3..b0ba32e 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -120,6 +120,7 @@ struct SpaprMachineClass { - bool legacy_irq_allocation; +@@ -121,6 +121,7 @@ struct SpaprMachineClass { bool broken_host_serial_model; /* present real host info to the guest */ + bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ + bool has_power9_support; void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7de4bf3122..3e2e35342d 100644 +index 7de4bf3..3e2e353 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -398,10 +391,10 @@ index 7de4bf3122..3e2e35342d 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 0707177584..60cc41fd53 100644 +index c9beba2..76cb7c2 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1381,6 +1381,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool ifetch) +@@ -1350,6 +1350,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -410,5 +403,5 @@ index 0707177584..60cc41fd53 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -- -2.20.1 +1.8.3.1 diff --git a/0009-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch index 206c29b..767272b 100644 --- a/0009-Add-s390x-machine-types.patch +++ b/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 4f7991558861584776c7c61dd56f339ed453372d Mon Sep 17 00:00:00 2001 +From e9de3d4a98a79e820be14e2a752e2ebb20e341d4 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -13,19 +13,17 @@ Rebase changes (weekly-190111): Merged patches (3.1.0): - 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later -Merged patches (weekly-190118): +Merged patches (4.1.0-rc0): - 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 - -Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 73 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 72 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 67 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index d11069b860..8c672dfd02 100644 +index 5b6a9a4..4d8c2ec 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -622,7 +622,7 @@ bool css_migration_enabled(void) +@@ -636,7 +636,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -34,22 +32,19 @@ index d11069b860..8c672dfd02 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = 1; \ -@@ -646,6 +646,9 @@ bool css_migration_enabled(void) +@@ -660,6 +660,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) -+ +#if 0 /* Disabled for Red Hat Enterprise Linux */ -+ - static void ccw_machine_4_0_instance_options(MachineState *machine) + static void ccw_machine_4_1_instance_options(MachineState *machine) { } -@@ -846,6 +849,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -873,6 +874,70 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); - -+#else -+ ++#endif +/* + * like CCW_COMPAT_2_12 + CCW_COMPAT_3_0 (which are empty), but includes + * HW_COMPAT_RHEL7_6 instead of HW_COMPAT_2_11 and HW_COMPAT_3_0 @@ -113,12 +108,9 @@ index d11069b860..8c672dfd02 100644 + S390_MACHINE_CLASS(mc)->hpage_1m_allowed = false; +} +DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); -+ -+#endif -+ + static void ccw_machine_register_types(void) { - type_register_static(&ccw_machine_info); -- -2.20.1 +1.8.3.1 diff --git a/0010-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch index 92d8384..0a6f333 100644 --- a/0010-Add-x86_64-machine-types.patch +++ b/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 3c0a8f41da22fe3b88b6f4811b58b0fec6d09d8e Mon Sep 17 00:00:00 2001 +From 83831de11704d0f48ab56fec1f434ae9396e9cc1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -7,40 +7,40 @@ Adding changes to add RHEL machine types for x86_64 architecture. Signed-off-by: Miroslav Rezanina -Rebase changes (weekly-190111): +Rebase changes (qemu-4.0.0): - Use upstream compat handling Rebase notes (3.1.0): - Removed xsave changes -Merged patches (weekly-190104): +Rebase notes (4.1.0-rc2): +- Updated format for compat structures + +Merged patches (4.1.0-rc0): - f4dc802 pc: 7.5 compat entries - 456ed3e pc: PC_RHEL7_6_COMPAT - 04119ee pc: Add compat for pc-i440fx-rhel7.6.0 machine type - b3b3687 pc: Add pc-q35-8.0.0 machine type - 8d46fc6 pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT - 1de7949 kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types - -Merged patches (weekly-190301): - 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) - -Signed-off-by: Danilo C. L. de Paula +- 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 645 ++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 200 +++++++++++++- - hw/i386/pc_q35.c | 114 +++++++- + hw/i386/pc.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 200 ++++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 114 +++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 27 ++ + include/hw/i386/pc.h | 27 ++++++ target/i386/cpu.c | 9 +- target/i386/kvm.c | 4 + - 8 files changed, 997 insertions(+), 7 deletions(-) + 8 files changed, 578 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 416da318ae..4cad5645b2 100644 +index f3fdfef..1d64f0b 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -190,6 +190,9 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) +@@ -203,6 +203,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -49,12 +49,12 @@ index 416da318ae..4cad5645b2 100644 + pm->fadt.rev = 1; pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; } - assert(obj); + diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index f2c15bf1f2..03497eed9a 100644 +index 549c437..9ded0db 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -350,6 +350,643 @@ GlobalProperty pc_compat_1_4[] = { +@@ -354,6 +354,224 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -63,16 +63,10 @@ index f2c15bf1f2..03497eed9a 100644 + * machine type. + */ +GlobalProperty pc_rhel_compat[] = { -+ { /* PC_RHEL_COMPAT */ -+ .driver = TYPE_X86_CPU, -+ .property = "host-phys-bits", -+ .value = "on", -+ }, -+ { /* PC_RHEL_COMPAT bz 1508330 */ -+ .driver = "vfio-pci", -+ .property = "x-no-geforce-quirks", -+ .value = "on", -+ }, ++ { TYPE_X86_CPU, "host-phys-bits", "on" }, ++ { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ /* bz 1508330 */ ++ { "vfio-pci", "x-no-geforce-quirks", "on" }, +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + @@ -83,51 +77,28 @@ index f2c15bf1f2..03497eed9a 100644 + * machine types irrespective of host. + */ +GlobalProperty pc_rhel_7_6_compat[] = { -+ { /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ -+ .driver = TYPE_X86_CPU, -+ .property = "x-hv-synic-kvm-only", -+ .value = "on", -+ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ -+ .driver = "Skylake-Server" "-" TYPE_X86_CPU, -+ .property = "pku", -+ .value = "off", -+ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_3_0 */ -+ .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU, -+ .property = "pku", -+ .value = "off", -+ },{ /* PC_RHEL7_6_COMPAT from PC_COMPAT_2_11 */ -+ .driver = TYPE_X86_CPU, -+ .property = "x-migrate-smi-count", -+ .value = "off", -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ -+ .driver = "Skylake-Client" "-" TYPE_X86_CPU, -+ .property = "mpx", -+ .value = "on", -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ -+ .driver = "Skylake-Client-IBRS" "-" TYPE_X86_CPU, -+ .property = "mpx", -+ .value = "on", -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ -+ .driver = "Skylake-Server" "-" TYPE_X86_CPU, -+ .property = "mpx", -+ .value = "on", -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ -+ .driver = "Skylake-Server-IBRS" "-" TYPE_X86_CPU, -+ .property = "mpx", -+ .value = "on", -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ -+ .driver = "Cascadelake-Server" "-" TYPE_X86_CPU, -+ .property = "mpx", -+ .value = "on", -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ -+ .driver = "Icelake-Client" "-" TYPE_X86_CPU, -+ .property = "mpx", -+ .value = "on", -+ },{ /* PC_RHEL7_6_COMPAT from pc_compat_3_1 */ -+ .driver = "Icelake-Server" "-" TYPE_X86_CPU, -+ .property = "mpx", -+ .value = "on", -+ }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, +}; +const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); + @@ -136,437 +107,131 @@ index f2c15bf1f2..03497eed9a 100644 + * - x-pci-hole64-fix was backported to 7.5 + */ +GlobalProperty pc_rhel_7_5_compat[] = { -+ { /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_11 */ -+ .driver = "Skylake-Server" "-" TYPE_X86_CPU, -+ .property = "clflushopt", -+ .value = "off", -+ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ -+ .driver = TYPE_X86_CPU, -+ .property = "legacy-cache", -+ .value = "on", -+ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ -+ .driver = TYPE_X86_CPU, -+ .property = "topoext", -+ .value = "off", -+ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ -+ .driver = "EPYC-" TYPE_X86_CPU, -+ .property = "xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_5_COMPAT from PC_COMPAT_2_12 */ -+ .driver = "EPYC-IBPB-" TYPE_X86_CPU, -+ .property = "xlevel", -+ .value = stringify(0x8000000a), -+ }, ++ /* pc_rhel_7_5_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "legacy-cache", "on" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "topoext", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, +}; +const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); + -+ +GlobalProperty pc_rhel_7_4_compat[] = { -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_9 */ -+ .driver = "mch", -+ .property = "extended-tseg-mbytes", -+ .value = stringify(0), -+ }, -+ { /* PC_RHEL7_4_COMPAT bz 1489800 */ -+ .driver = "ICH9-LPC", -+ .property = "__com.redhat_force-rev1-fadt", -+ .value = "on", -+ }, -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ -+ .driver = "i440FX-pcihost", -+ .property = "x-pci-hole64-fix", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ -+ .driver = "q35-pcihost", -+ .property = "x-pci-hole64-fix", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_4_COMPAT from PC_COMPAT_2_10 */ -+ .driver = TYPE_X86_CPU, -+ .property = "x-hv-max-vps", -+ .value = "0x40", -+ }, ++ /* pc_rhel_7_4_compat from pc_compat_2_9 */ ++ { "mch", "extended-tseg-mbytes", stringify(0) }, ++ /* bz 1489800 */ ++ { "ICH9-LPC", "__com.redhat_force-rev1-fadt", "on" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "i440FX-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "q35-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { TYPE_X86_CPU, "x-hv-max-vps", "0x40" }, +}; +const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); + +GlobalProperty pc_rhel_7_3_compat[] = { -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ -+ .driver = "kvmclock", -+ .property = "x-mach-use-reliable-get-clock", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ -+ .driver = TYPE_X86_CPU, -+ .property = "l3-cache", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ -+ .driver = TYPE_X86_CPU, -+ .property = "full-cpuid-auto-level", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU, -+ .property = "family", -+ .value = "15", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU, -+ .property = "model", -+ .value = "6", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU, -+ .property = "stepping", -+ .value = "1", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_7 */ -+ .driver = "isa-pcspk", -+ .property = "migrate", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_6 */ -+ .driver = TYPE_X86_CPU, -+ .property = "cpuid-0xb", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ -+ .driver = "ICH9-LPC", -+ .property = "x-smi-broadcast", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ -+ .driver = TYPE_X86_CPU, -+ .property = "vmware-cpuid-freq", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_3_COMPAT from PC_COMPAT_2_8 */ -+ .driver = "Haswell-" TYPE_X86_CPU, -+ .property = "stepping", -+ .value = "1", -+ }, -+ {/* PC_RHEL7_3_COMPAT from PC_COMPAT_2_3 added in 2.9 */ -+ .driver = TYPE_X86_CPU, -+ .property = "kvm-no-smi-migration", -+ .value = "on", -+ }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "kvmclock", "x-mach-use-reliable-get-clock", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "l3-cache", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "full-cpuid-auto-level", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "isa-pcspk", "migrate", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_6 */ ++ { TYPE_X86_CPU, "cpuid-0xb", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "ICH9-LPC", "x-smi-broadcast", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { TYPE_X86_CPU, "vmware-cpuid-freq", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "Haswell-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_3 added in 2.9*/ ++ { TYPE_X86_CPU, "kvm-no-smi-migration", "on" }, +}; +const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); + +GlobalProperty pc_rhel_7_2_compat[] = { -+ { -+ .driver = "phenom" "-" TYPE_X86_CPU, -+ .property = "rdtscp", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "qemu64" "-" TYPE_X86_CPU, -+ .property = "sse4a", -+ .value = "on", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "qemu64" "-" TYPE_X86_CPU, -+ .property = "abm", -+ .value = "on", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "Haswell-" TYPE_X86_CPU, -+ .property = "abm", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU, -+ .property = "abm", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "Haswell-noTSX-" TYPE_X86_CPU, -+ .property = "abm", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ -+ .driver = "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, -+ .property = "abm", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "Broadwell-" TYPE_X86_CPU, -+ .property = "abm", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU, -+ .property = "abm", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "Broadwell-noTSX-" TYPE_X86_CPU, -+ .property = "abm", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT (copied from the entry above) */ -+ .driver = "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, -+ .property = "abm", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "host" "-" TYPE_X86_CPU, -+ .property = "host-cache-info", -+ .value = "on", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = TYPE_X86_CPU, -+ .property = "check", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "qemu32" "-" TYPE_X86_CPU, -+ .property = "popcnt", -+ .value = "on", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = TYPE_X86_CPU, -+ .property = "arat", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "usb-redir", -+ .property = "streams", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = TYPE_X86_CPU, -+ .property = "fill-mtrr-mask", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_2_COMPAT */ -+ .driver = "apic-common", -+ .property = "legacy-instance-id", -+ .value = "on", -+ }, ++ { "phenom" "-" TYPE_X86_CPU, "rdtscp", "off"}, ++ { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, ++ { "Haswell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, ++ { TYPE_X86_CPU, "check", "off" }, ++ { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, ++ { TYPE_X86_CPU, "arat", "off" }, ++ { "usb-redir", "streams", "off" }, ++ { TYPE_X86_CPU, "fill-mtrr-mask", "off" }, ++ { "apic-common", "legacy-instance-id", "on" }, +}; +const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); + +GlobalProperty pc_rhel_7_1_compat[] = { -+ { -+ .driver = "kvm64" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "kvm32" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Conroe" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Penryn" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Nehalem" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ -+ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Westmere" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ -+ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "SandyBridge" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ -+ .driver = "SandyBridge-IBRS" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Haswell" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Broadwell" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Opteron_G1" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Opteron_G2" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Opteron_G4" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Opteron_G5" "-" TYPE_X86_CPU, -+ .property = "vme", -+ .value = "off", -+ }, -+ { -+ .driver = "Haswell" "-" TYPE_X86_CPU, -+ .property = "f16c", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU, -+ .property = "f16c", -+ .value = "off", -+ }, -+ { -+ .driver = "Haswell" "-" TYPE_X86_CPU, -+ .property = "rdrand", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ -+ .driver = "Haswell-IBRS" "-" TYPE_X86_CPU, -+ .property = "rdrand", -+ .value = "off", -+ }, -+ { -+ .driver = "Broadwell" "-" TYPE_X86_CPU, -+ .property = "f16c", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU, -+ .property = "f16c", -+ .value = "off", -+ }, -+ { -+ .driver = "Broadwell" "-" TYPE_X86_CPU, -+ .property = "rdrand", -+ .value = "off", -+ }, -+ { /* PC_RHEL7_1_COMPAT (copied from the entry above) */ -+ .driver = "Broadwell-IBRS" "-" TYPE_X86_CPU, -+ .property = "rdrand", -+ .value = "off", -+ }, -+ { -+ .driver = "coreduo" "-" TYPE_X86_CPU, -+ .property = "vmx", -+ .value = "on", -+ }, -+ { -+ .driver = "core2duo" "-" TYPE_X86_CPU, -+ .property = "vmx", -+ .value = "on", -+ }, -+ { /* PC_RHEL7_1_COMPAT */ -+ .driver = "qemu64" "-" TYPE_X86_CPU, -+ .property = "min-level", -+ .value = stringify(4), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "kvm64" "-" TYPE_X86_CPU, -+ .property = "min-level", -+ .value = stringify(5), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "pentium3" "-" TYPE_X86_CPU, -+ .property = "min-level", -+ .value = stringify(2), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "n270" "-" TYPE_X86_CPU, -+ .property = "min-level", -+ .value = stringify(5), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Conroe" "-" TYPE_X86_CPU, -+ .property = "min-level", -+ .value = stringify(4), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Penryn" "-" TYPE_X86_CPU, -+ .property = "min-level", -+ .value = stringify(4), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Nehalem" "-" TYPE_X86_CPU, -+ .property = "min-level", -+ .value = stringify(4), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "n270" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Penryn" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Conroe" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Nehalem" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Westmere" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "SandyBridge" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "IvyBridge" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Haswell" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Haswell-noTSX" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Broadwell" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ },{ /* PC_RHEL7_1_COMPAT */ -+ .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU, -+ .property = "min-xlevel", -+ .value = stringify(0x8000000a), -+ }, ++ { "kvm64" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "kvm32" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Penryn" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "kvm64" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "pentium3" "-" TYPE_X86_CPU, "min-level", stringify(2) }, ++ { "n270" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "n270" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +}; +const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); + @@ -584,121 +249,37 @@ index f2c15bf1f2..03497eed9a 100644 + * pci-serial-4x) in 7.0. + */ +GlobalProperty pc_rhel_7_0_compat[] = { -+ { -+ .driver = "virtio-scsi-pci", -+ .property = "any_layout", -+ .value = "off", -+ },{ -+ .driver = "PIIX4_PM", -+ .property = "memory-hotplug-support", -+ .value = "off", -+ },{ -+ .driver = "apic", -+ .property = "version", -+ .value = stringify(0x11), -+ },{ -+ .driver = "nec-usb-xhci", -+ .property = "superspeed-ports-first", -+ .value = "off", -+ },{ -+ .driver = "nec-usb-xhci", -+ .property = "force-pcie-endcap", -+ .value = "on", -+ },{ -+ .driver = "pci-serial", -+ .property = "prog_if", -+ .value = stringify(0), -+ },{ -+ .driver = "virtio-net-pci", -+ .property = "guest_announce", -+ .value = "off", -+ },{ -+ .driver = "ICH9-LPC", -+ .property = "memory-hotplug-support", -+ .value = "off", -+ },{ -+ .driver = "xio3130-downstream", -+ .property = COMPAT_PROP_PCP, -+ .value = "off", -+ },{ -+ .driver = "ioh3420", -+ .property = COMPAT_PROP_PCP, -+ .value = "off", -+ },{ -+ .driver = "PIIX4_PM", -+ .property = "acpi-pci-hotplug-with-bridge-support", -+ .value = "off", -+ },{ -+ .driver = "e1000", -+ .property = "mitigation", -+ .value = "off", -+ },{ -+ .driver = "virtio-net-pci", -+ .property = "ctrl_guest_offloads", -+ .value = "off", -+ }, -+ { -+ .driver = "Conroe" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { -+ .driver = "Penryn" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { -+ .driver = "Nehalem" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ -+ .driver = "Nehalem-IBRS" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { -+ .driver = "Westmere" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { /* PC_RHEL7_0_COMPAT (copied from the entry above) */ -+ .driver = "Westmere-IBRS" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { -+ .driver = "Opteron_G1" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { -+ .driver = "Opteron_G2" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { -+ .driver = "Opteron_G3" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { -+ .driver = "Opteron_G4" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ }, -+ { -+ .driver = "Opteron_G5" "-" TYPE_X86_CPU, -+ .property = "x2apic", -+ .value = "on", -+ } ++ { "virtio-scsi-pci", "any_layout", "off" }, ++ { "PIIX4_PM", "memory-hotplug-support", "off" }, ++ { "apic", "version", stringify(0x11) }, ++ { "nec-usb-xhci", "superspeed-ports-first", "off" }, ++ { "nec-usb-xhci", "force-pcie-endcap", "on" }, ++ { "pci-serial", "prog_if", stringify(0) }, ++ { "virtio-net-pci", "guest_announce", "off" }, ++ { "ICH9-LPC", "memory-hotplug-support", "off" }, ++ { "xio3130-downstream", COMPAT_PROP_PCP, "off" }, ++ { "ioh3420", COMPAT_PROP_PCP, "off" }, ++ { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, ++ { "e1000", "mitigation", "off" }, ++ { "virtio-net-pci", "ctrl_guest_offloads", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Penryn" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "x2apic", "on" }, +}; +const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); + void gsi_handler(void *opaque, int n, int level) { GSIState *s = opaque; -@@ -1814,7 +2451,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -1901,7 +2119,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -708,7 +289,7 @@ index f2c15bf1f2..03497eed9a 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -2731,6 +3369,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -2928,6 +3147,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); @@ -717,9 +298,9 @@ index f2c15bf1f2..03497eed9a 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->cpu_index_to_instance_props = pc_cpu_index_to_props; mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; -@@ -2740,7 +3380,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - mc->default_boot_order = "cad"; +@@ -2938,7 +3159,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; + mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; - mc->max_cpus = 255; + /* 240: max CPU count for RHEL */ @@ -728,10 +309,10 @@ index f2c15bf1f2..03497eed9a 100644 hc->pre_plug = pc_machine_device_pre_plug_cb; hc->plug = pc_machine_device_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 8ad8e885c6..37907fe76a 100644 +index c2280c7..c86c48c 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -50,6 +50,7 @@ +@@ -51,6 +51,7 @@ #include "cpu.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -739,7 +320,7 @@ index 8ad8e885c6..37907fe76a 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -170,8 +171,8 @@ static void pc_init1(MachineState *machine, +@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -750,7 +331,7 @@ index 8ad8e885c6..37907fe76a 100644 pcmc->smbios_uuid_encoded, SMBIOS_ENTRY_POINT_21); } -@@ -310,6 +311,7 @@ static void pc_init1(MachineState *machine, +@@ -316,6 +317,7 @@ else { * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). */ @@ -758,7 +339,7 @@ index 8ad8e885c6..37907fe76a 100644 static void pc_compat_2_3_fn(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); -@@ -998,3 +1000,197 @@ static void xenfv_machine_options(MachineClass *m) +@@ -1022,3 +1024,197 @@ static void xenfv_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, xenfv_machine_options); #endif @@ -957,7 +538,7 @@ index 8ad8e885c6..37907fe76a 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 372c6b73be..4b7df48919 100644 +index 397e1fd..4959ed3 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) @@ -979,7 +560,7 @@ index 372c6b73be..4b7df48919 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -507,3 +508,112 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -538,3 +539,112 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -1093,10 +674,10 @@ index 372c6b73be..4b7df48919 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 456e4a944c..b292365bfa 100644 +index 521c625..b4a8c4a 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -206,6 +206,8 @@ struct MachineClass { +@@ -214,6 +214,8 @@ struct MachineClass { const char **valid_cpu_types; strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; @@ -1106,20 +687,20 @@ index 456e4a944c..b292365bfa 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index ca65ef18af..b066f0a539 100644 +index 859b64c..605cc71 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -134,6 +134,9 @@ typedef struct PCMachineClass { +@@ -142,6 +142,9 @@ typedef struct PCMachineClass { - /* use PVH to load kernels that support this feature */ - bool pvh_enabled; -+ + /* Enables contiguous-apic-ID mode */ + bool compat_apic_id_mode; ++ + /* RH only, see bz 1489800 */ + bool pc_rom_ro; } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -350,6 +353,30 @@ extern const size_t pc_compat_1_5_len; +@@ -362,6 +365,30 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; @@ -1151,10 +732,10 @@ index ca65ef18af..b066f0a539 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6616303782..6472cd21f8 100644 +index 47a1236..cd71a09 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1474,11 +1474,17 @@ static CPUCaches epyc_cache_info = { +@@ -1548,11 +1548,17 @@ static CPUCaches epyc_cache_info = { static X86CPUDefinition builtin_x86_defs[] = { { @@ -1173,7 +754,7 @@ index 6616303782..6472cd21f8 100644 .stepping = 3, .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -2973,6 +2979,7 @@ static PropValue kvm_default_props[] = { +@@ -2861,6 +2867,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -1182,10 +763,10 @@ index 6616303782..6472cd21f8 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 3b29ce5c0d..478942187a 100644 +index dbbb137..da5a5ef 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -2373,6 +2373,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -2805,6 +2805,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -1193,7 +774,7 @@ index 3b29ce5c0d..478942187a 100644 kvm_msr_buf_reset(cpu); -@@ -2670,6 +2671,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3102,6 +3103,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -1204,5 +785,5 @@ index 3b29ce5c0d..478942187a 100644 case MSR_KVM_PV_EOI_EN: env->pv_eoi_en_msr = msrs[i].data; -- -2.20.1 +1.8.3.1 diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch index 74c8add..a7a4882 100644 --- a/0011-Enable-make-check.patch +++ b/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 0442a72cf9da8f79fe26b08999f2dca900af6ad0 Mon Sep 17 00:00:00 2001 +From c1635c2c93f0efe82a6ab1dc973402882882382c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -13,29 +13,38 @@ Rebase changes (4.0.0): - Disable device-plug-test on s390x as it use disabled device - Do not run cpu-plug-tests on 7.3 and older machine types -Merged patches (4.0.0: +Rebase changes (4.1.0-rc0): +- removed iotests 068 + +Rebase changes (4.1.0-rc1): +- remove all 205 tests (unstable) + +Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce -Signed-off-by: Danilo C. L. de Paula +Merged patches (4.1.0-rc0): +- 41288ff redhat: Remove raw iotest 205 + +Conflicts: + tests/qemu-iotests/group --- redhat/qemu-kvm.spec.template | 8 +++++++- tests/Makefile.include | 10 +++++----- tests/boot-serial-test.c | 6 +++++- tests/cpu-plug-test.c | 4 ++-- tests/e1000-test.c | 2 ++ - tests/endianness-test.c | 2 ++ - tests/prom-env-test.c | 2 ++ + tests/prom-env-test.c | 4 ++++ tests/qemu-iotests/051 | 12 ++++++------ tests/qemu-iotests/group | 4 ++-- tests/test-x86-cpuid-compat.c | 2 ++ tests/usb-hcd-xhci-test.c | 4 ++++ - 11 files changed, 39 insertions(+), 17 deletions(-) + 10 files changed, 39 insertions(+), 17 deletions(-) diff --git a/tests/Makefile.include b/tests/Makefile.include -index 36fc73fef5..e8f5fbc2c6 100644 +index fd7fdb8..d3da940 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include -@@ -162,7 +162,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) +@@ -164,7 +164,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) check-qtest-i386-y += tests/ahci-test$(EXESUF) check-qtest-i386-y += tests/hd-geo-test$(EXESUF) check-qtest-i386-y += tests/boot-order-test$(EXESUF) @@ -44,7 +53,7 @@ index 36fc73fef5..e8f5fbc2c6 100644 check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) check-qtest-i386-y += tests/rtc-test$(EXESUF) -@@ -221,7 +221,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) +@@ -223,7 +223,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) check-qtest-moxie-y += tests/boot-serial-test$(EXESUF) check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) @@ -53,7 +62,7 @@ index 36fc73fef5..e8f5fbc2c6 100644 check-qtest-ppc-y += tests/prom-env-test$(EXESUF) check-qtest-ppc-y += tests/drive_del-test$(EXESUF) check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) -@@ -235,8 +235,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) +@@ -237,8 +237,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF) check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF) @@ -64,7 +73,7 @@ index 36fc73fef5..e8f5fbc2c6 100644 check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) check-qtest-ppc64-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) check-qtest-ppc64-y += tests/numa-test$(EXESUF) -@@ -278,7 +278,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) +@@ -282,7 +282,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) check-qtest-s390x-y += tests/drive_del-test$(EXESUF) @@ -74,7 +83,7 @@ index 36fc73fef5..e8f5fbc2c6 100644 check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) check-qtest-s390x-y += tests/migration-test$(EXESUF) diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index c591748aaf..c5873f8b24 100644 +index 24852d4..dce5860 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -97,18 +97,22 @@ static testdef_t tests[] = { @@ -102,7 +111,7 @@ index c591748aaf..c5873f8b24 100644 { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 668f00144e..453ca8e583 100644 +index 668f001..453ca8e 100644 --- a/tests/cpu-plug-test.c +++ b/tests/cpu-plug-test.c @@ -190,8 +190,8 @@ static void add_pseries_test_case(const char *mname) @@ -117,10 +126,10 @@ index 668f00144e..453ca8e583 100644 } data = g_new(PlugTestData, 1); diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index 9e67916169..adbd07f997 100644 +index c387984..c89112d 100644 --- a/tests/e1000-test.c +++ b/tests/e1000-test.c -@@ -21,9 +21,11 @@ struct QE1000 { +@@ -22,9 +22,11 @@ struct QE1000 { static const char *models[] = { "e1000", @@ -132,40 +141,30 @@ index 9e67916169..adbd07f997 100644 }; static void *e1000_get_driver(void *obj, const char *interface) -diff --git a/tests/endianness-test.c b/tests/endianness-test.c -index 58527952a5..1d729ec7f2 100644 ---- a/tests/endianness-test.c -+++ b/tests/endianness-test.c -@@ -39,8 +39,10 @@ static const TestCase test_cases[] = { - { "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" }, - { "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" }, - { "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" }, -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - { "ppc64", "pseries-2.7", 0x10080000000ULL, - .bswap = true, .superio = "i82378" }, -+#endif - { "sh4", "r2d", 0xfe240000, .superio = "i82378" }, - { "sh4eb", "r2d", 0xfe240000, .bswap = true, .superio = "i82378" }, - { "sparc64", "sun4u", 0x1fe02000000LL, .bswap = true }, diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 61bc1d1e7b..aeb76b0bc3 100644 +index 61bc1d1..028d45c 100644 --- a/tests/prom-env-test.c +++ b/tests/prom-env-test.c -@@ -88,7 +88,9 @@ int main(int argc, char *argv[]) +@@ -88,10 +88,14 @@ int main(int argc, char *argv[]) if (!strcmp(arch, "ppc")) { add_tests(ppc_machines); } else if (!strcmp(arch, "ppc64")) { +#if 0 /* Disabled for Red Hat Enterprise Linux */ add_tests(ppc_machines); -+#endif if (g_test_slow()) { ++#endif qtest_add_data_func("prom-env/pseries", "pseries", test_machine); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ } ++#endif + } else if (!strcmp(arch, "sparc")) { + add_tests(sparc_machines); + } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 02ac960da4..29f13440c0 100755 +index ce942a5..f64429e 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 -@@ -182,11 +182,11 @@ run_qemu -drive if=virtio +@@ -181,11 +181,11 @@ run_qemu -drive if=virtio case "$QEMU_DEFAULT_MACHINE" in pc) run_qemu -drive if=none,id=disk -device ide-cd,drive=disk @@ -180,7 +179,7 @@ index 02ac960da4..29f13440c0 100755 ;; *) ;; -@@ -211,11 +211,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on +@@ -234,11 +234,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on case "$QEMU_DEFAULT_MACHINE" in pc) run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk @@ -196,29 +195,29 @@ index 02ac960da4..29f13440c0 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index bae7718380..0db5e68af1 100644 +index f13e5f2..813db25 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group -@@ -77,7 +77,7 @@ - 068 rw auto quick +@@ -92,7 +92,7 @@ + 068 rw quick 069 rw auto quick - 070 rw auto quick + 070 rw quick -071 rw auto quick +# 071 rw auto quick -- requires whitelisted blkverify 072 rw auto quick 073 rw auto quick 074 rw auto quick -@@ -105,7 +105,7 @@ - 096 rw auto quick +@@ -120,7 +120,7 @@ + 096 rw quick 097 rw auto backing 098 rw auto backing quick -099 rw auto quick +# 099 rw auto quick -- requires whitelisted blkverify # 100 was removed, do not reuse - 101 rw auto quick - 102 rw auto quick + 101 rw quick + 102 rw quick diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index e75b959950..6b46b73dd0 100644 +index e75b959..6b46b73 100644 --- a/tests/test-x86-cpuid-compat.c +++ b/tests/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -238,7 +237,7 @@ index e75b959950..6b46b73dd0 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 9eb24b00e4..465ed26dfc 100644 +index 9eb24b0..465ed26 100644 --- a/tests/usb-hcd-xhci-test.c +++ b/tests/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) @@ -268,5 +267,5 @@ index 9eb24b00e4..465ed26dfc 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -2.20.1 +1.8.3.1 diff --git a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 7a3fc9d..42b8e91 100644 --- a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From b0626d1bb4f6ec6fba1973ebb26a297e79e95437 Mon Sep 17 00:00:00 2001 +From ce492d69e627db370aef7907409e88b83e71619b Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -30,26 +30,31 @@ Rebase notes (2.8.0): Merged patches (2.9.0): - 17eb774 vfio: Use error_setg when reporting max assigned device overshoot + Merged patches (4.1.0-rc3): +- 2b89558 vfio: increase the cap on number of assigned devices to 64 + (cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) (cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) -Signed-off-by: Danilo C. L. de Paula --- - hw/vfio/pci.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) + hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 13badcd6ed..7c998afc0f 100644 +index 7c474a9..bb9f28e 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -36,6 +36,7 @@ - #include "qapi/error.h" +@@ -41,6 +41,9 @@ - #define MSIX_CAP_LENGTH 12 -+#define MAX_DEV_ASSIGN_CMDLINE 32 + #define TYPE_VIFO_PCI_NOHOTPLUG "vfio-pci-nohotplug" - #define TYPE_VFIO_PCI "vfio-pci" - #define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI) -@@ -2806,9 +2807,21 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ++/* RHEL only: Set once for the first assigned dev */ ++static uint16_t device_limit; ++ + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); + +@@ -2703,9 +2706,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -57,21 +62,52 @@ index 13badcd6ed..7c998afc0f 100644 + int ret, i = 0; bool is_mdev; ++ if (device_limit && device_limit != vdev->assigned_device_limit) { ++ error_setg(errp, "Assigned device limit has been redefined. " ++ "Old:%d, New:%d", ++ device_limit, vdev->assigned_device_limit); ++ return; ++ } else { ++ device_limit = vdev->assigned_device_limit; ++ } ++ + QLIST_FOREACH(group, &vfio_group_list, next) { + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + i++; + } + } + -+ if (i >= MAX_DEV_ASSIGN_CMDLINE) { ++ if (i >= vdev->assigned_device_limit) { + error_setg(errp, "Maximum supported vfio devices (%d) " -+ "already attached", MAX_DEV_ASSIGN_CMDLINE); ++ "already attached", vdev->assigned_device_limit); + return; + } + if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { +@@ -3121,6 +3145,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), ++ /* RHEL only */ ++ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, ++ assigned_device_limit, 64), + DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, + false), + DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 834a90d..205954e 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); ++ uint16_t assigned_device_limit; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; -- -2.20.1 +1.8.3.1 diff --git a/0013-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch index d77418a..421b5b5 100644 --- a/0013-Add-support-statement-to-help-output.patch +++ b/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 6ae79fe200eab13f75050a10b48cea3b5bf21aab Mon Sep 17 00:00:00 2001 +From c219069712b16be5664bfa6a4fcec4aa261d21c8 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -18,16 +18,15 @@ as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost (cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) (cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) -Signed-off-by: Danilo C. L. de Paula --- vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vl.c b/vl.c -index 3ada215270..32886ebf7f 100644 +index f9166f5..dd1fadf 100644 --- a/vl.c +++ b/vl.c -@@ -1982,9 +1982,17 @@ static void version(void) +@@ -1803,9 +1803,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -45,7 +44,7 @@ index 3ada215270..32886ebf7f 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -2001,6 +2009,7 @@ static void help(int exitcode) +@@ -1822,6 +1830,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -54,5 +53,5 @@ index 3ada215270..32886ebf7f 100644 } -- -2.20.1 +1.8.3.1 diff --git a/0014-globally-limit-the-maximum-number-of-CPUs.patch b/0014-globally-limit-the-maximum-number-of-CPUs.patch index f7e0b07..84fa5b6 100644 --- a/0014-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0014-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 4d15d2010db402f5910a9b7e4c147a9a3e2f2604 Mon Sep 17 00:00:00 2001 +From 29def10374e810c82ef7e01bcdac0c0e77b42aec Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -74,21 +74,16 @@ The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. This commit matches the limit to current KVM_CAP_NR_VCPUS value. - -Conflicts: - vl.c - -Signed-off-by: Danilo C. L. de Paula --- accel/kvm/kvm-all.c | 12 ++++++++++++ vl.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 241db496c3..003c0abee2 100644 +index f450f25..2d850df 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -1630,6 +1630,18 @@ static int kvm_init(MachineState *ms) +@@ -1869,6 +1869,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -108,10 +103,10 @@ index 241db496c3..003c0abee2 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/vl.c b/vl.c -index 32886ebf7f..2b95925f9b 100644 +index dd1fadf..7c52255 100644 --- a/vl.c +++ b/vl.c -@@ -131,6 +131,8 @@ int main(int argc, char **argv) +@@ -132,6 +132,8 @@ int main(int argc, char **argv) #define MAX_VIRTIO_CONSOLES 1 @@ -120,8 +115,8 @@ index 32886ebf7f..2b95925f9b 100644 static const char *data_dir[16]; static int data_dir_idx; const char *bios_name = NULL; -@@ -1508,6 +1510,20 @@ MachineClass *find_default_machine(void) - return mc; +@@ -1337,6 +1339,20 @@ static MachineClass *find_default_machine(GSList *machines) + return NULL; } +/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ @@ -138,10 +133,10 @@ index 32886ebf7f..2b95925f9b 100644 + } +} + - MachineInfoList *qmp_query_machines(Error **errp) + static int machine_help_func(QemuOpts *opts, MachineState *machine) { - GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); -@@ -3994,6 +4010,8 @@ int main(int argc, char **argv, char **envp) + ObjectProperty *prop; +@@ -3864,6 +3880,8 @@ int main(int argc, char **argv, char **envp) "mutually exclusive"); exit(EXIT_FAILURE); } @@ -151,5 +146,5 @@ index 32886ebf7f..2b95925f9b 100644 configure_rtc(qemu_find_opts_singleton("rtc")); -- -2.20.1 +1.8.3.1 diff --git a/0015-Add-support-for-simpletrace.patch b/0015-Add-support-for-simpletrace.patch index 3a51b93..a845db4 100644 --- a/0015-Add-support-for-simpletrace.patch +++ b/0015-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 75da6e38041efca51934a46a4d481eaac57d4b1e Mon Sep 17 00:00:00 2001 +From 1e856ec9fb79314305380aba4c89c5d8987616a9 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -7,7 +7,7 @@ As simpletrace is upstream, we just need to properly handle it during rpmbuild. Signed-off-by: Miroslav Rezanina -Rebase notes (weekly-180727): +Rebase notes (3.1.0): - Fixed python 2 to python3 switch Rebase notes (2.9.0): @@ -21,14 +21,11 @@ Merged patches (2.3.0): - 5292fc3 trace: add SystemTap init scripts for simpletrace bridge - eda9e5e simpletrace: install simpletrace.py - 85c4c8f trace: add systemtap-initscript README file to RPM - -(cherry picked from commit bfc1d7f3628f2ffbabbae71d57a506cea6663ddf) -Signed-off-by: Danilo C. L. de Paula --- .gitignore | 2 ++ Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 26 ++++++++++++++- + README.systemtap | 43 +++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 26 +++++++++++++++++++- scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ scripts/systemtap/script.d/qemu_kvm.stp | 1 + 6 files changed, 79 insertions(+), 1 deletion(-) @@ -37,10 +34,10 @@ Signed-off-by: Danilo C. L. de Paula create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 05f62eab3c..41a77a90af 100644 +index 288a5ac..8caca6c 100644 --- a/Makefile +++ b/Makefile -@@ -832,6 +832,10 @@ endif +@@ -910,6 +910,10 @@ endif $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ done $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" @@ -48,12 +45,12 @@ index 05f62eab3c..41a77a90af 100644 + $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/script.d/qemu_kvm.stp "$(DESTDIR)$(qemu_datadir)/systemtap/script.d/" + $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d" + $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/conf.d/qemu_kvm.conf "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d/" - for d in $(TARGET_DIRS); do \ - $(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \ - done + + .PHONY: ctags + ctags: diff --git a/README.systemtap b/README.systemtap new file mode 100644 -index 0000000000..ad913fc990 +index 0000000..ad913fc --- /dev/null +++ b/README.systemtap @@ -0,0 +1,43 @@ @@ -102,7 +99,7 @@ index 0000000000..ad913fc990 + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf new file mode 100644 -index 0000000000..372d8160a4 +index 0000000..372d816 --- /dev/null +++ b/scripts/systemtap/conf.d/qemu_kvm.conf @@ -0,0 +1,4 @@ @@ -112,11 +109,11 @@ index 0000000000..372d8160a4 +qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp new file mode 100644 -index 0000000000..c04abf9449 +index 0000000..c04abf9 --- /dev/null +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -- -2.20.1 +1.8.3.1 diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 640e92a..35095e1 100644 --- a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From ec354bd88aaaf383bdb2177f94dec39b930cbf90 Mon Sep 17 00:00:00 2001 +From 6653aed7b06fd81e60633f7ff81d2d2cd35777fd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 30 Nov 2018 09:11:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -16,16 +16,15 @@ We change the name and location of qemu-kvm binaries. Update documentation to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula --- - docs/qemu-block-drivers.texi | 70 +++++++++--------- - docs/qemu-cpu-models.texi | 8 +- - qemu-doc.texi | 70 +++++++++--------- - qemu-options.hx | 140 ++++++++++++++++++----------------- + docs/qemu-block-drivers.texi | 70 +++++++++++----------- + docs/qemu-cpu-models.texi | 8 +-- + qemu-doc.texi | 70 +++++++++++----------- + qemu-options.hx | 140 ++++++++++++++++++++++--------------------- 4 files changed, 146 insertions(+), 142 deletions(-) diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi -index da06a9bc83..03c6705505 100644 +index 91ab0ec..35cc3d8 100644 --- a/docs/qemu-block-drivers.texi +++ b/docs/qemu-block-drivers.texi @@ -405,7 +405,7 @@ QEMU can automatically create a virtual FAT disk image from a @@ -236,10 +235,10 @@ index da06a9bc83..03c6705505 100644 @var{host}:@var{bus}:@var{slot}.@var{func} is the NVMe controller's PCI device diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -index 23c11dc86f..8697254ad4 100644 +index ad040cf..0de3229 100644 --- a/docs/qemu-cpu-models.texi +++ b/docs/qemu-cpu-models.texi -@@ -566,25 +566,25 @@ CPU models / features in QEMU and libvirt +@@ -578,25 +578,25 @@ CPU models / features in QEMU and libvirt @item Host passthrough @example @@ -270,10 +269,10 @@ index 23c11dc86f..8697254ad4 100644 @end table diff --git a/qemu-doc.texi b/qemu-doc.texi -index ae3c3f9632..a8cd3660bc 100644 +index 577d1e8..44427bb 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi -@@ -206,12 +206,12 @@ Note that, by default, GUS shares IRQ(7) with parallel ports and so +@@ -207,12 +207,12 @@ Note that, by default, GUS shares IRQ(7) with parallel ports and so QEMU must be told to not have parallel ports to have working GUS. @example @@ -288,7 +287,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example Or some other unclaimed IRQ. -@@ -227,7 +227,7 @@ CS4231A is the chip used in Windows Sound System and GUSMAX products +@@ -228,7 +228,7 @@ CS4231A is the chip used in Windows Sound System and GUSMAX products Download and uncompress the linux image (@file{linux.img}) and type: @example @@ -297,7 +296,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example Linux should boot and give you a prompt. -@@ -237,7 +237,7 @@ Linux should boot and give you a prompt. +@@ -238,7 +238,7 @@ Linux should boot and give you a prompt. @example @c man begin SYNOPSIS @@ -306,7 +305,7 @@ index ae3c3f9632..a8cd3660bc 100644 @c man end @end example -@@ -277,21 +277,21 @@ is specified in seconds. The default is 0 which means no timeout. Libiscsi +@@ -278,21 +278,21 @@ is specified in seconds. The default is 0 which means no timeout. Libiscsi Example (without authentication): @example @@ -331,7 +330,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example @item NBD -@@ -306,12 +306,12 @@ Syntax for specifying a NBD device using Unix Domain Sockets +@@ -307,12 +307,12 @@ Syntax for specifying a NBD device using Unix Domain Sockets Example for TCP @example @@ -346,7 +345,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example @item SSH -@@ -319,8 +319,8 @@ QEMU supports SSH (Secure Shell) access to remote disks. +@@ -320,8 +320,8 @@ QEMU supports SSH (Secure Shell) access to remote disks. Examples: @example @@ -357,7 +356,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example Currently authentication must be done using ssh-agent. Other -@@ -338,7 +338,7 @@ sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] +@@ -339,7 +339,7 @@ sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] Example @example @@ -366,7 +365,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example See also @url{https://sheepdog.github.io/sheepdog/}. -@@ -364,17 +364,17 @@ JSON: +@@ -365,17 +365,17 @@ JSON: Example @example URI: @@ -387,7 +386,7 @@ index ae3c3f9632..a8cd3660bc 100644 @ file.debug=9,file.logfile=/var/log/qemu-gluster.log, @ file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, @ file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket -@@ -439,9 +439,9 @@ of . +@@ -440,9 +440,9 @@ of . Example: boot from a remote Fedora 20 live ISO image @example @@ -399,7 +398,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example Example: boot from a remote Fedora 20 cloud image using a local overlay for -@@ -449,7 +449,7 @@ writes, copy-on-read, and a readahead of 64k +@@ -450,7 +450,7 @@ writes, copy-on-read, and a readahead of 64k @example qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"http",, "file.url":"https://dl.fedoraproject.org/pub/fedora/linux/releases/20/Images/x86_64/Fedora-x86_64-20-20131211.1-sda.qcow2",, "file.readahead":"64k"@}' /tmp/Fedora-x86_64-20-20131211.1-sda.qcow2 @@ -408,7 +407,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example Example: boot from an image stored on a VMware vSphere server with a self-signed -@@ -458,7 +458,7 @@ of 10 seconds. +@@ -459,7 +459,7 @@ of 10 seconds. @example qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2 @@ -417,7 +416,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example @end table -@@ -825,7 +825,7 @@ On Linux hosts, a shared memory device is available. The basic syntax +@@ -826,7 +826,7 @@ On Linux hosts, a shared memory device is available. The basic syntax is: @example @@ -426,7 +425,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example where @var{hostmem} names a host memory backend. For a POSIX shared -@@ -846,7 +846,7 @@ memory server is: +@@ -847,7 +847,7 @@ memory server is: ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} # Then start your qemu instances with matching arguments @@ -435,7 +434,7 @@ index ae3c3f9632..a8cd3660bc 100644 -chardev socket,path=@var{path},id=@var{id} @end example -@@ -871,7 +871,7 @@ Instead of specifying the using POSIX shm, you may specify +@@ -872,7 +872,7 @@ Instead of specifying the using POSIX shm, you may specify a memory backend that has hugepage support: @example @@ -444,7 +443,7 @@ index ae3c3f9632..a8cd3660bc 100644 -device ivshmem-plain,memdev=mb1 @end example -@@ -887,7 +887,7 @@ kernel testing. +@@ -888,7 +888,7 @@ kernel testing. The syntax is: @example @@ -453,7 +452,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example Use @option{-kernel} to provide the Linux kernel image and -@@ -902,7 +902,7 @@ If you do not need graphical output, you can disable it and redirect +@@ -903,7 +903,7 @@ If you do not need graphical output, you can disable it and redirect the virtual serial port and the QEMU monitor to the console with the @option{-nographic} option. The typical command line is: @example @@ -462,7 +461,7 @@ index ae3c3f9632..a8cd3660bc 100644 -append "root=/dev/hda console=ttyS0" -nographic @end example -@@ -968,7 +968,7 @@ Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} +@@ -969,7 +969,7 @@ Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}. For instance, user-mode networking can be used with @example @@ -471,7 +470,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example @item usb-ccid Smartcard reader device -@@ -987,7 +987,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. +@@ -988,7 +988,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. This USB device implements the USB Transport Layer of HCI. Example usage: @example @@ -480,7 +479,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example @end table -@@ -1064,7 +1064,7 @@ For this setup it is recommended to restrict it to listen on a UNIX domain +@@ -1065,7 +1065,7 @@ For this setup it is recommended to restrict it to listen on a UNIX domain socket only. For example @example @@ -489,7 +488,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example This ensures that only users on local box with read/write access to that -@@ -1087,7 +1087,7 @@ is running the password is set with the monitor. Until the monitor is used to +@@ -1088,7 +1088,7 @@ is running the password is set with the monitor. Until the monitor is used to set the password all clients will be rejected. @example @@ -498,7 +497,7 @@ index ae3c3f9632..a8cd3660bc 100644 (qemu) change vnc password Password: ******** (qemu) -@@ -1104,7 +1104,7 @@ support provides a secure session, but no authentication. This allows any +@@ -1105,7 +1105,7 @@ support provides a secure session, but no authentication. This allows any client to connect, and provides an encrypted session. @example @@ -507,7 +506,7 @@ index ae3c3f9632..a8cd3660bc 100644 -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=no \ -vnc :1,tls-creds=tls0 -monitor stdio @end example -@@ -1126,7 +1126,7 @@ same syntax as previously, but with @code{verify-peer} set to @code{yes} +@@ -1127,7 +1127,7 @@ same syntax as previously, but with @code{verify-peer} set to @code{yes} instead. @example @@ -516,7 +515,7 @@ index ae3c3f9632..a8cd3660bc 100644 -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ -vnc :1,tls-creds=tls0 -monitor stdio @end example -@@ -1139,7 +1139,7 @@ Finally, the previous method can be combined with VNC password authentication +@@ -1140,7 +1140,7 @@ Finally, the previous method can be combined with VNC password authentication to provide two layers of authentication for clients. @example @@ -525,7 +524,7 @@ index ae3c3f9632..a8cd3660bc 100644 -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ -vnc :1,tls-creds=tls0,password -monitor stdio (qemu) change vnc password -@@ -1164,7 +1164,7 @@ used for authentication, but assuming use of one supporting SSF, +@@ -1165,7 +1165,7 @@ used for authentication, but assuming use of one supporting SSF, then QEMU can be launched with: @example @@ -534,7 +533,7 @@ index ae3c3f9632..a8cd3660bc 100644 @end example @node vnc_sec_certificate_sasl -@@ -1178,7 +1178,7 @@ credentials. This can be enabled, by combining the 'sasl' option +@@ -1179,7 +1179,7 @@ credentials. This can be enabled, by combining the 'sasl' option with the aforementioned TLS + x509 options: @example @@ -543,7 +542,7 @@ index ae3c3f9632..a8cd3660bc 100644 -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ -vnc :1,tls-creds=tls0,sasl -monitor stdio @end example -@@ -1573,7 +1573,7 @@ QEMU has a primitive support to work with gdb, so that you can do +@@ -1574,7 +1574,7 @@ QEMU has a primitive support to work with gdb, so that you can do In order to use gdb, launch QEMU with the '-s' option. It will wait for a gdb connection: @example @@ -552,7 +551,7 @@ index ae3c3f9632..a8cd3660bc 100644 -append "root=/dev/hda" Connected to host network interface: tun0 Waiting gdb connection on port 1234 -@@ -1819,7 +1819,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. +@@ -1820,7 +1820,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. Set OpenBIOS variables in NVRAM, for example: @example @@ -562,10 +561,10 @@ index ae3c3f9632..a8cd3660bc 100644 -prom-env 'boot-args=conf=hd:2,\yaboot.conf' @end example diff --git a/qemu-options.hx b/qemu-options.hx -index 0f55062546..7ec1a3b099 100644 +index 6873f9e..9f323ec 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -253,7 +253,7 @@ This option defines a free-form string that can be used to describe @var{fd}. +@@ -254,7 +254,7 @@ This option defines a free-form string that can be used to describe @var{fd}. You can open an image using pre-opened file descriptors from an fd set: @example @@ -574,7 +573,7 @@ index 0f55062546..7ec1a3b099 100644 -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" -drive file=/dev/fdset/2,index=0,media=disk -@@ -282,7 +282,7 @@ STEXI +@@ -283,7 +283,7 @@ STEXI Set default value of @var{driver}'s property @var{prop} to @var{value}, e.g.: @example @@ -583,7 +582,7 @@ index 0f55062546..7ec1a3b099 100644 @end example In particular, you can use this to set driver properties for devices which are -@@ -336,11 +336,11 @@ bootindex options. The default is non-strict boot. +@@ -337,11 +337,11 @@ bootindex options. The default is non-strict boot. @example # try to boot from network first, then from hard disk @@ -598,7 +597,7 @@ index 0f55062546..7ec1a3b099 100644 @end example Note: The legacy format '-boot @var{drives}' is still supported but its -@@ -369,7 +369,7 @@ For example, the following command-line sets the guest startup RAM size to +@@ -370,7 +370,7 @@ For example, the following command-line sets the guest startup RAM size to memory the guest can reach to 4GB: @example @@ -607,7 +606,7 @@ index 0f55062546..7ec1a3b099 100644 @end example If @var{slots} and @var{maxmem} are not specified, memory hotplug won't -@@ -668,12 +668,12 @@ Enable audio and selected sound hardware. Use 'help' to print all +@@ -669,12 +669,12 @@ Enable audio and selected sound hardware. Use 'help' to print all available sound hardware. @example @@ -626,7 +625,7 @@ index 0f55062546..7ec1a3b099 100644 @end example Note that Linux's i810_audio OSS kernel (for AC97) module might -@@ -1148,21 +1148,21 @@ is off. +@@ -1149,21 +1149,21 @@ is off. Instead of @option{-cdrom} you can use: @example @@ -654,7 +653,7 @@ index 0f55062546..7ec1a3b099 100644 -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" -drive file=/dev/fdset/2,index=0,media=disk -@@ -1170,28 +1170,28 @@ qemu-system-i386 +@@ -1171,28 +1171,28 @@ qemu-system-i386 You can connect a CDROM to the slave of ide0: @example @@ -689,7 +688,7 @@ index 0f55062546..7ec1a3b099 100644 @end example ETEXI -@@ -2217,8 +2217,8 @@ The following two example do exactly the same, to show how @option{-nic} can +@@ -2258,8 +2258,8 @@ The following two example do exactly the same, to show how @option{-nic} can be used to shorten the command line length (note that the e1000 is the default on i386, so the @option{model=e1000} parameter could even be omitted here, too): @example @@ -700,7 +699,7 @@ index 0f55062546..7ec1a3b099 100644 @end example @item -nic none -@@ -2289,7 +2289,7 @@ can not be resolved. +@@ -2330,7 +2330,7 @@ can not be resolved. Example: @example @@ -709,7 +708,7 @@ index 0f55062546..7ec1a3b099 100644 @end example @item domainname=@var{domain} -@@ -2313,7 +2313,7 @@ a guest from a local directory. +@@ -2354,7 +2354,7 @@ a guest from a local directory. Example (using pxelinux): @example @@ -718,7 +717,7 @@ index 0f55062546..7ec1a3b099 100644 -netdev user,id=n1,tftp=/path/to/tftp/files,bootfile=/pxelinux.0 @end example -@@ -2347,7 +2347,7 @@ screen 0, use the following: +@@ -2388,7 +2388,7 @@ screen 0, use the following: @example # on the host @@ -727,7 +726,7 @@ index 0f55062546..7ec1a3b099 100644 # this host xterm should open in the guest X11 server xterm -display :1 @end example -@@ -2357,7 +2357,7 @@ the guest, use the following: +@@ -2398,7 +2398,7 @@ the guest, use the following: @example # on the host @@ -736,7 +735,7 @@ index 0f55062546..7ec1a3b099 100644 telnet localhost 5555 @end example -@@ -2376,7 +2376,7 @@ lifetime, like in the following example: +@@ -2417,7 +2417,7 @@ lifetime, like in the following example: @example # open 10.10.1.1:4321 on bootup, connect 10.0.2.100:1234 to it whenever # the guest accesses it @@ -745,7 +744,7 @@ index 0f55062546..7ec1a3b099 100644 @end example Or you can execute a command on every TCP connection established by the guest, -@@ -2385,7 +2385,7 @@ so that QEMU behaves similar to an inetd process for that virtual server: +@@ -2426,7 +2426,7 @@ so that QEMU behaves similar to an inetd process for that virtual server: @example # call "netcat 10.10.1.1 4321" on every TCP connection to 10.0.2.100:1234 # and connect the TCP stream to its stdin/stdout @@ -754,7 +753,7 @@ index 0f55062546..7ec1a3b099 100644 @end example @end table -@@ -2412,21 +2412,22 @@ Examples: +@@ -2453,21 +2453,22 @@ Examples: @example #launch a QEMU instance with the default network script @@ -780,7 +779,7 @@ index 0f55062546..7ec1a3b099 100644 -netdev tap,id=n1,"helper=/path/to/qemu-bridge-helper" @end example -@@ -2443,13 +2444,13 @@ Examples: +@@ -2484,13 +2485,13 @@ Examples: @example #launch a QEMU instance with the default network helper to #connect a TAP device to bridge br0 @@ -796,7 +795,7 @@ index 0f55062546..7ec1a3b099 100644 @end example @item -netdev socket,id=@var{id}[,fd=@var{h}][,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}] -@@ -2464,13 +2465,13 @@ specifies an already opened TCP socket. +@@ -2505,13 +2506,13 @@ specifies an already opened TCP socket. Example: @example # launch a first QEMU instance @@ -816,7 +815,7 @@ index 0f55062546..7ec1a3b099 100644 @end example @item -netdev socket,id=@var{id}[,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] -@@ -2493,15 +2494,15 @@ Use @option{fd=h} to specify an already opened UDP multicast socket. +@@ -2534,15 +2535,15 @@ Use @option{fd=h} to specify an already opened UDP multicast socket. Example: @example # launch one QEMU instance @@ -839,7 +838,7 @@ index 0f55062546..7ec1a3b099 100644 -device e1000,netdev=n3,mac=52:54:00:12:34:58 \ -netdev socket,id=n3,mcast=230.0.0.1:1234 @end example -@@ -2509,7 +2510,7 @@ qemu-system-i386 linux.img \ +@@ -2550,7 +2551,7 @@ qemu-system-i386 linux.img \ Example (User Mode Linux compat.): @example # launch QEMU instance (note mcast address selected is UML's default) @@ -848,7 +847,7 @@ index 0f55062546..7ec1a3b099 100644 -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ -netdev socket,id=n1,mcast=239.192.168.1:1102 # launch UML -@@ -2518,9 +2519,12 @@ qemu-system-i386 linux.img \ +@@ -2559,9 +2560,12 @@ qemu-system-i386 linux.img \ Example (send packets from host's 1.2.3.4): @example @@ -864,7 +863,7 @@ index 0f55062546..7ec1a3b099 100644 @end example @item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}] -@@ -2578,7 +2582,7 @@ brctl addif br-lan vmtunnel0 +@@ -2619,7 +2623,7 @@ brctl addif br-lan vmtunnel0 # on 4.3.2.1 # launch QEMU instance - if your network has reorder or is very lossy add ,pincounter @@ -873,7 +872,7 @@ index 0f55062546..7ec1a3b099 100644 -netdev l2tpv3,id=n1,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter @end example -@@ -2595,7 +2599,7 @@ Example: +@@ -2636,7 +2640,7 @@ Example: # launch vde switch vde_switch -F -sock /tmp/myswitch # launch QEMU instance @@ -882,7 +881,7 @@ index 0f55062546..7ec1a3b099 100644 @end example @item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n] -@@ -2609,11 +2613,11 @@ be created for multiqueue vhost-user. +@@ -2650,11 +2654,11 @@ be created for multiqueue vhost-user. Example: @example @@ -899,7 +898,7 @@ index 0f55062546..7ec1a3b099 100644 @end example @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] -@@ -3053,7 +3057,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can +@@ -3094,7 +3098,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can be used as following: @example @@ -908,7 +907,7 @@ index 0f55062546..7ec1a3b099 100644 @end example @item -bt device:@var{dev}[,vlan=@var{n}] -@@ -3503,14 +3507,14 @@ ETEXI +@@ -3544,14 +3548,14 @@ ETEXI DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, "-realtime [mlock=on|off]\n" @@ -926,7 +925,7 @@ index 0f55062546..7ec1a3b099 100644 (enabled by default). ETEXI -@@ -3548,7 +3552,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even +@@ -3589,7 +3593,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even stdio are reasonable use case. The latter is allowing to start QEMU from within gdb and establish the connection via a pipe: @example @@ -935,7 +934,7 @@ index 0f55062546..7ec1a3b099 100644 @end example ETEXI -@@ -4467,7 +4471,7 @@ which specify the queue number of cryptodev backend, the default of +@@ -4559,7 +4563,7 @@ which specify the queue number of cryptodev backend, the default of @example @@ -944,7 +943,7 @@ index 0f55062546..7ec1a3b099 100644 [...] \ -object cryptodev-backend-builtin,id=cryptodev0 \ -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \ -@@ -4487,7 +4491,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 +@@ -4579,7 +4583,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 @example @@ -954,5 +953,5 @@ index 0f55062546..7ec1a3b099 100644 -chardev socket,id=chardev0,path=/path/to/socket \ -object cryptodev-vhost-user,id=cryptodev0,chardev=chardev0 \ -- -2.20.1 +1.8.3.1 diff --git a/0017-usb-xhci-Fix-PCI-capability-order.patch b/0017-usb-xhci-Fix-PCI-capability-order.patch index b5e74a1..eea2b60 100644 --- a/0017-usb-xhci-Fix-PCI-capability-order.patch +++ b/0017-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From a4af398c3849751bc240c4f8f1f0cf2fcf57d21e Mon Sep 17 00:00:00 2001 +From 3d4a21ffdf876acfd5a8546a2f3e9ab2bfcf2ff9 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -56,16 +56,15 @@ Conflicts: (cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) (cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) -Signed-off-by: Danilo C. L. de Paula --- hw/usb/hcd-xhci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index ad351a7b6d..43a47539aa 100644 +index 9854fae..adfacac 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3393,6 +3393,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3397,6 +3397,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) xhci->max_pstreams_mask = 0; } @@ -78,7 +77,7 @@ index ad351a7b6d..43a47539aa 100644 if (xhci->msi != ON_OFF_AUTO_OFF) { ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); /* Any error other than -ENOTSUP(board's MSI support is broken) -@@ -3441,12 +3447,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3445,12 +3451,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, &xhci->mem); @@ -92,5 +91,5 @@ index ad351a7b6d..43a47539aa 100644 /* TODO check for errors, and should fail when msix=on */ msix_init(dev, xhci->numintrs, -- -2.20.1 +1.8.3.1 diff --git a/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index c1e39f5..585ef69 100644 --- a/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From bf7fc2d5907b351ddfc85416d48e47f66024ccbe Mon Sep 17 00:00:00 2001 +From 5ba84e6e9b843cc224735e293da35fc218da27fb Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,12 +45,12 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 839f120256..fd4b5c46ec 100644 +index 8b9e5e2..9df5494 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c -@@ -796,6 +796,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - VirtIOSCSI *s = VIRTIO_SCSI(vdev); +@@ -805,6 +805,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, SCSIDevice *sd = SCSI_DEVICE(dev); + int ret; + /* XXX: Remove this check once block backend is capable of handling + * AioContext change upon eject/insert. @@ -62,8 +62,8 @@ index 839f120256..fd4b5c46ec 100644 + return; + } if (s->ctx && !s->dataplane_fenced) { - AioContext *ctx; if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; -- -2.20.1 +1.8.3.1 diff --git a/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 6061ba9..b932c83 100644 --- a/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 581909cc6cb4e8286126721b76a79e93a9880e1f Mon Sep 17 00:00:00 2001 +From 4653a397c083e29b2a54ade8b17c977f224a3b80 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,10 +32,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index edc5ed0e0c..f59ab398d4 100644 +index bbb001f..40cd9dc 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c -@@ -340,12 +340,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, +@@ -343,12 +343,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { @@ -53,8 +53,8 @@ index edc5ed0e0c..f59ab398d4 100644 + } +#endif - spapr_check_pagesize(spapr, qemu_getrampagesize(), errp); + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -2.20.1 +1.8.3.1 diff --git a/0020-doc-fix-the-configuration-path.patch b/0020-doc-fix-the-configuration-path.patch deleted file mode 100644 index f4eea28..0000000 --- a/0020-doc-fix-the-configuration-path.patch +++ /dev/null @@ -1,80 +0,0 @@ -From b1799db735fe51f8b31c565814d79c7ec4fc82dc Mon Sep 17 00:00:00 2001 -From: Danilo de Paula -Date: Fri, 8 Feb 2019 11:51:14 +0000 -Subject: doc: fix the configuration path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Danilo de Paula -Message-id: <20190208115114.24850-2-ddepaula@redhat.com> -Patchwork-id: 84320 -O-Subject: [RHEL8/rhel qemu-kvm PATCH v2 1/1] doc: fix the configuration path -Bugzilla: 1644985 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Wainer dos Santos Moschetta - -From: Marc-André Lureau - -Use a CONFDIR variable to show the configured sysconf path in the -generated documentations (html, man pages etc). - -Related to: -https://bugzilla.redhat.com/show_bug.cgi?id=1644985 - -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - Makefile | 9 ++++++--- - qemu-ga.texi | 4 ++-- - 2 files changed, 8 insertions(+), 5 deletions(-) - -diff --git a/Makefile b/Makefile -index 41a77a90af..20c39c41a8 100644 ---- a/Makefile -+++ b/Makefile -@@ -877,11 +877,14 @@ ui/shader.o: $(SRC_PATH)/ui/shader.c \ - MAKEINFO=makeinfo - MAKEINFOINCLUDES= -I docs -I $( $@,"GEN","$@") -+docs/version.texi: $(SRC_PATH)/VERSION config-host.mak -+ $(call quiet-command,(\ -+ echo "@set VERSION $(VERSION)" && \ -+ echo "@set CONFDIR $(qemu_confdir)" \ -+ )> $@,"GEN","$@") - - %.html: %.texi docs/version.texi - $(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --no-headers \ -diff --git a/qemu-ga.texi b/qemu-ga.texi -index 4c7a8fd163..f00ad830f2 100644 ---- a/qemu-ga.texi -+++ b/qemu-ga.texi -@@ -30,7 +30,7 @@ set user's password - @end itemize - - qemu-ga will read a system configuration file on startup (located at --@file{/etc/qemu/qemu-ga.conf} by default), then parse remaining -+@file{@value{CONFDIR}/qemu-ga.conf} by default), then parse remaining - configuration options on the command line. For the same key, the last - option wins, but the lists accumulate (see below for configuration - file format). -@@ -58,7 +58,7 @@ file format). - Enable fsfreeze hook. Accepts an optional argument that specifies - script to run on freeze/thaw. Script will be called with - 'freeze'/'thaw' arguments accordingly (default is -- @samp{/etc/qemu/fsfreeze-hook}). If using -F with an argument, do -+ @samp{@value{CONFDIR}/fsfreeze-hook}). If using -F with an argument, do - not follow -F with a space (for example: - @samp{-F/var/run/fsfreezehook.sh}). - --- -2.20.1 - diff --git a/0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch b/0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch deleted file mode 100644 index 2146485..0000000 --- a/0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 26606676d9d88c3f301fb8b3d39941ceff267af5 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Mon, 13 May 2019 16:27:20 +0200 -Subject: rhel: Set host-phys-bits-limit=48 on rhel machine-types - -RH-Author: plai@redhat.com -Message-id: <1553276715-26278-3-git-send-email-plai@redhat.com> -Patchwork-id: 85126 -O-Subject: [RHEL8.0 qemu-kvm PATCH 2/2] rhel: Set host-phys-bits-limit=48 on rhel machine-types -Bugzilla: 1688915 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Michael S. Tsirkin - -From: Eduardo Habkost - -Upstream status: not applicable - -Currently we use the host physical address size by default on -VMs. This was a good default on most cases, but this is not the -case on host CPUs supporting 5-level EPT. On those cases, we -want VMs to use 4-level EPT by default. - -Ensure VMs will use 4-level EPT by default, by limiting physical -address bits to 48. - -Not applicable upstream because upstream doesn't set -host-phys-bits=on by default. - -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula -(cherry picked from commit 01a2ecb4c38fe4a35455ea706e76984ee8d5a769) -Signed-off-by: Paul Lai -Signed-off-by: Danilo C. L. de Paula - -Rebase notes (4.0.0): -- update to no compat format - -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 03497eed9a..91a4c5833a 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -360,6 +360,11 @@ GlobalProperty pc_rhel_compat[] = { - .property = "host-phys-bits", - .value = "on", - }, -+ { /* PC_RHEL_COMPAT */ -+ .driver = TYPE_X86_CPU, -+ .property = "host-phys-bits-limit", -+ .value = "48", -+ }, - { /* PC_RHEL_COMPAT bz 1508330 */ - .driver = "vfio-pci", - .property = "x-no-geforce-quirks", --- -2.20.1 - diff --git a/0022-redhat-Post-rebase-synchronization.patch b/0022-redhat-Post-rebase-synchronization.patch deleted file mode 100644 index 001c982..0000000 --- a/0022-redhat-Post-rebase-synchronization.patch +++ /dev/null @@ -1,36 +0,0 @@ -From bbdbdb6ebc696a8bef420b6bd39fb3f5bbe0d5d4 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 13 May 2019 16:32:48 +0200 -Subject: redhat: Post rebase synchronization - -We based rebase on qemu-kvm-3.1.0-23.el8. However, 8.0.1 version moved -to qemu-kvm-3.1.0-25.el8. Adding missing changes. - -Merged-patches (4.0.0): -- ebb6e97 redhat: Fix LOCALVERSION creation -- b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) -- 7cb3c4a Enable libpmem to support nvdimm - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - configure | 4 ++++ - redhat/Makefile | 4 ++-- - redhat/qemu-kvm.spec.template | 28 +++++++++++++++++++++++++++- - 3 files changed, 33 insertions(+), 3 deletions(-) - -diff --git a/configure b/configure -index eb0a0dde86..d6d5912ee8 100755 ---- a/configure -+++ b/configure -@@ -2411,6 +2411,10 @@ if test "$seccomp" != "no" ; then - seccomp="no" - fi - fi -+ -+# RHEL8-specific, only passthrough for now, rhbz#1688312 -+tpm_emulator=no -+ - ########################################## - # xen probe - diff --git a/kvm-Disable-VXHS-support.patch b/kvm-Disable-VXHS-support.patch deleted file mode 100644 index 75a89ad..0000000 --- a/kvm-Disable-VXHS-support.patch +++ /dev/null @@ -1,293 +0,0 @@ -From 517e4a2f490ff56c8b3fbf9a56c8d4a6e167c2b6 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 29 May 2019 16:09:59 +0100 -Subject: [PATCH 1/9] Disable VXHS support - -RH-Author: Miroslav Rezanina -Message-id: <1559146199-30110-1-git-send-email-mrezanin@redhat.com> -Patchwork-id: 88273 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] Disable VXHS support -Bugzilla: 1714937 -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula - -From: Miroslav Rezanina - -As we ended our partnership with Veritas, we do not support VXHS anymore. -Reverting our downstream changes included in "Initial redhat commit". - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - block/Makefile.objs | 2 +- - block/vxhs.c | 119 ++++-------------------------------------- - configure | 33 ++++++++++-- - redhat/qemu-kvm.spec.template | 11 +--- - 4 files changed, 41 insertions(+), 124 deletions(-) - -diff --git a/block/Makefile.objs b/block/Makefile.objs -index f4cf03b..7a81892 100644 ---- a/block/Makefile.objs -+++ b/block/Makefile.objs -@@ -30,7 +30,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o - block-obj-$(CONFIG_CURL) += curl.o - block-obj-$(CONFIG_RBD) += rbd.o - block-obj-$(CONFIG_GLUSTERFS) += gluster.o --#block-obj-$(CONFIG_VXHS) += vxhs.o -+block-obj-$(CONFIG_VXHS) += vxhs.o - block-obj-$(CONFIG_LIBSSH2) += ssh.o - block-obj-y += accounting.o dirty-bitmap.o - block-obj-y += write-threshold.o -diff --git a/block/vxhs.c b/block/vxhs.c -index 3dbb954..2e18229 100644 ---- a/block/vxhs.c -+++ b/block/vxhs.c -@@ -9,8 +9,7 @@ - */ - - #include "qemu/osdep.h" --#include "block/vxhs_shim.h" --#include -+#include - #include - #include "block/block_int.h" - #include "block/qdict.h" -@@ -60,97 +59,6 @@ typedef struct BDRVVXHSState { - char *tlscredsid; /* tlscredsid */ - } BDRVVXHSState; - --#define LIBVXHS_FULL_PATHNAME "/usr/lib64/qemu/libvxhs.so.1" --static bool libvxhs_loaded; --static GModule *libvxhs_handle; -- --static LibVXHSFuncs libvxhs; -- --typedef struct LibVXHSSymbols { -- const char *name; -- gpointer *addr; --} LibVXHSSymbols; -- --static LibVXHSSymbols libvxhs_symbols[] = { -- {"iio_init", (gpointer *) &libvxhs.iio_init}, -- {"iio_fini", (gpointer *) &libvxhs.iio_fini}, -- {"iio_min_version", (gpointer *) &libvxhs.iio_min_version}, -- {"iio_max_version", (gpointer *) &libvxhs.iio_max_version}, -- {"iio_open", (gpointer *) &libvxhs.iio_open}, -- {"iio_close", (gpointer *) &libvxhs.iio_close}, -- {"iio_writev", (gpointer *) &libvxhs.iio_writev}, -- {"iio_readv", (gpointer *) &libvxhs.iio_readv}, -- {"iio_ioctl", (gpointer *) &libvxhs.iio_ioctl}, -- {NULL} --}; -- --static void bdrv_vxhs_set_funcs(GModule *handle, Error **errp) --{ -- int i = 0; -- while (libvxhs_symbols[i].name) { -- const char *name = libvxhs_symbols[i].name; -- if (!g_module_symbol(handle, name, libvxhs_symbols[i].addr)) { -- error_setg(errp, "%s could not be loaded from libvxhs: %s", -- name, g_module_error()); -- return; -- } -- ++i; -- } --} -- --static void bdrv_vxhs_load_libs(Error **errp) --{ -- Error *local_err = NULL; -- int32_t ver; -- -- if (libvxhs_loaded) { -- return; -- } -- -- if (!g_module_supported()) { -- error_setg(errp, "modules are not supported on this platform: %s", -- g_module_error()); -- return; -- } -- -- libvxhs_handle = g_module_open(LIBVXHS_FULL_PATHNAME, -- G_MODULE_BIND_LAZY | G_MODULE_BIND_LOCAL); -- if (!libvxhs_handle) { -- error_setg(errp, "The VXHS library from Veritas might not be installed " -- "correctly (%s)", g_module_error()); -- return; -- } -- -- g_module_make_resident(libvxhs_handle); -- -- bdrv_vxhs_set_funcs(libvxhs_handle, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- return; -- } -- -- /* Now check to see if the libvxhs we are using here is supported -- * by the loaded version */ -- -- ver = (*libvxhs.iio_min_version)(); -- if (ver > QNIO_VERSION) { -- error_setg(errp, "Trying to use libvxhs version %"PRId32" API, but " -- "only %"PRId32" or newer is supported by %s", -- QNIO_VERSION, ver, LIBVXHS_FULL_PATHNAME); -- return; -- } -- -- ver = (*libvxhs.iio_max_version)(); -- if (ver < QNIO_VERSION) { -- error_setg(errp, "Trying to use libvxhs version %"PRId32" API, but " -- "only %"PRId32" or earlier is supported by %s", -- QNIO_VERSION, ver, LIBVXHS_FULL_PATHNAME); -- return; -- } -- -- libvxhs_loaded = true; --} -- - static void vxhs_complete_aio_bh(void *opaque) - { - VXHSAIOCB *acb = opaque; -@@ -318,7 +226,7 @@ static void vxhs_refresh_limits(BlockDriverState *bs, Error **errp) - static int vxhs_init_and_ref(void) - { - if (vxhs_ref++ == 0) { -- if ((*libvxhs.iio_init)(QNIO_VERSION, vxhs_iio_callback)) { -+ if (iio_init(QNIO_VERSION, vxhs_iio_callback)) { - return -ENODEV; - } - } -@@ -328,7 +236,7 @@ static int vxhs_init_and_ref(void) - static void vxhs_unref(void) - { - if (--vxhs_ref == 0) { -- (*libvxhs.iio_fini)(); -+ iio_fini(); - } - } - -@@ -398,17 +306,8 @@ static int vxhs_open(BlockDriverState *bs, QDict *options, - char *client_key = NULL; - char *client_cert = NULL; - -- bdrv_vxhs_load_libs(&local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- /* on error, cannot cleanup because the iio_fini() function -- * is not loaded */ -- return -EINVAL; -- } -- - ret = vxhs_init_and_ref(); - if (ret < 0) { -- error_setg(&local_err, "libvxhs iio_init() failed"); - ret = -EINVAL; - goto out; - } -@@ -493,8 +392,8 @@ static int vxhs_open(BlockDriverState *bs, QDict *options, - /* - * Open qnio channel to storage agent if not opened before - */ -- dev_handlep = (*libvxhs.iio_open)(of_vsa_addr, s->vdisk_guid, 0, -- cacert, client_key, client_cert); -+ dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0, -+ cacert, client_key, client_cert); - if (dev_handlep == NULL) { - trace_vxhs_open_iio_open(of_vsa_addr); - ret = -ENODEV; -@@ -554,11 +453,11 @@ static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, uint64_t offset, - - switch (iodir) { - case VDISK_AIO_WRITE: -- ret = (*libvxhs.iio_writev)(dev_handle, acb, qiov->iov, qiov->niov, -+ ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov, - offset, size, iio_flags); - break; - case VDISK_AIO_READ: -- ret = (*libvxhs.iio_writev)(dev_handle, acb, qiov->iov, qiov->niov, -+ ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov, - offset, size, iio_flags); - break; - default: -@@ -607,7 +506,7 @@ static void vxhs_close(BlockDriverState *bs) - * Close vDisk device - */ - if (s->vdisk_hostinfo.dev_handle) { -- (*libvxhs.iio_close)(s->vdisk_hostinfo.dev_handle); -+ iio_close(s->vdisk_hostinfo.dev_handle); - s->vdisk_hostinfo.dev_handle = NULL; - } - -@@ -629,7 +528,7 @@ static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s) - int ret = 0; - void *dev_handle = s->vdisk_hostinfo.dev_handle; - -- ret = (*libvxhs.iio_ioctl)(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); -+ ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); - if (ret < 0) { - trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); - return -EIO; -diff --git a/configure b/configure -index d6d5912..8cb6740 100755 ---- a/configure -+++ b/configure -@@ -3616,7 +3616,7 @@ fi - - glib_req_ver=2.40 - glib_modules=gthread-2.0 --if test "$modules" = yes -o "$vxhs" = yes; then -+if test "$modules" = yes; then - glib_modules="$glib_modules gmodule-export-2.0" - fi - -@@ -5760,6 +5760,33 @@ if compile_prog "" "" ; then - fi - - ########################################## -+# Veritas HyperScale block driver VxHS -+# Check if libvxhs is installed -+ -+if test "$vxhs" != "no" ; then -+ cat > $TMPC < -+#include -+ -+void *vxhs_callback; -+ -+int main(void) { -+ iio_init(QNIO_VERSION, vxhs_callback); -+ return 0; -+} -+EOF -+ vxhs_libs="-lvxhs -lssl" -+ if compile_prog "" "$vxhs_libs" ; then -+ vxhs=yes -+ else -+ if test "$vxhs" = "yes" ; then -+ feature_not_found "vxhs block device" "Install libvxhs See github" -+ fi -+ vxhs=no -+ fi -+fi -+ -+########################################## - # check for _Static_assert() - - have_static_assert=no -@@ -7195,8 +7222,8 @@ elif test "$pthread_setname_np_wo_tid" = "yes" ; then - fi - - if test "$vxhs" = "yes" ; then -- echo "CONFIG_VXHS=m" >> $config_host_mak -- echo "VXHS_LIBS= -lssl" >> $config_host_mak -+ echo "CONFIG_VXHS=y" >> $config_host_mak -+ echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak - fi - - if test "$libpmem" = "yes" ; then --- -1.8.3.1 - diff --git a/kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch b/kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch deleted file mode 100644 index db57f7b..0000000 --- a/kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch +++ /dev/null @@ -1,56 +0,0 @@ -From c3e39ef14e99f903b95fa051936f40ebd6c35484 Mon Sep 17 00:00:00 2001 -From: Auger Eric -Date: Mon, 3 Jun 2019 19:17:39 +0100 -Subject: [PATCH 2/9] aarch64: Add virt-rhel8.1.0 machine type for ARM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Auger Eric -Message-id: <20190603191740.32665-2-eric.auger@redhat.com> -Patchwork-id: 88461 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] aarch64: Add virt-rhel8.1.0 machine type for ARM -Bugzilla: 1713735 -RH-Acked-by: Andrew Jones -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -This patch adds a new machine type, virt-rhel8.1.0, for QEMU rhel-av. -This machine type is based off 4.0 with the exception that it removes -support for dynamic sysbus devices: VFIO_CALXEDA_XGMAC, VFIO_AMD_XGBE, and -RAMFB_DEVICE because downstream RHEL doesn't support these devices. IOMMU -instantiation still is disabled but will be enabled in subsequent patches. - -Signed-off-by: Eric Auger -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 5602d9f..9316a8d 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2158,7 +2158,7 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - --static void rhel800_virt_instance_init(Object *obj) -+static void rhel810_virt_instance_init(Object *obj) - { - VirtMachineState *vms = VIRT_MACHINE(obj); - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); -@@ -2204,8 +2204,8 @@ static void rhel800_virt_instance_init(Object *obj) - vms->irqmap=a15irqmap; - } - --static void rhel800_virt_options(MachineClass *mc) -+static void rhel810_virt_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - } --DEFINE_RHEL_MACHINE_AS_LATEST(8, 0, 0) -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 1, 0) --- -1.8.3.1 - diff --git a/kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch b/kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch deleted file mode 100644 index d25318a..0000000 --- a/kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 59a46d11cad715b082d30c0de33317466a9bab9e Mon Sep 17 00:00:00 2001 -From: Auger Eric -Date: Mon, 3 Jun 2019 19:17:40 +0100 -Subject: [PATCH 3/9] aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Auger Eric -Message-id: <20190603191740.32665-3-eric.auger@redhat.com> -Patchwork-id: 88460 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine -Bugzilla: 1713735 -RH-Acked-by: Andrew Jones -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -This patch restores the ARM VIRT iommu option historically -introduced in qemu 3.0 and disabled in rhel8.0 (RHBZ#1656504 / -commit 7bfdb4cd2b49) as we did not have support of the feature -at libvirt level. - -Signed-off-by: Eric Auger -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 9316a8d..670fa10 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1785,7 +1785,6 @@ static void virt_set_gic_version(Object *obj, const char *value, Error **errp) - } - } - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static char *virt_get_iommu(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1813,7 +1812,6 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) - error_append_hint(errp, "Valid values are none, smmuv3.\n"); - } - } --#endif /* disabled for RHEL */ - - static CpuInstanceProperties - virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) -@@ -2198,8 +2196,13 @@ static void rhel810_virt_instance_init(Object *obj) - NULL); - } - -- /* IOMMU is disabled by default and non-configurable for RHEL */ -+ /* Default disallows iommu instantiation */ - vms->iommu = VIRT_IOMMU_NONE; -+ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu, NULL); -+ object_property_set_description(obj, "iommu", -+ "Set the IOMMU type. " -+ "Valid values are none and smmuv3", -+ NULL); - - vms->irqmap=a15irqmap; - } --- -1.8.3.1 - diff --git a/kvm-aarch64-Compile-out-IOH3420.patch b/kvm-aarch64-Compile-out-IOH3420.patch deleted file mode 100644 index baf434c..0000000 --- a/kvm-aarch64-Compile-out-IOH3420.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 59097210f25bfcabc82c9989d3c083a6febeadd6 Mon Sep 17 00:00:00 2001 -From: Auger Eric -Date: Tue, 4 Jun 2019 15:23:26 +0100 -Subject: [PATCH 7/9] aarch64: Compile out IOH3420 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Auger Eric -Message-id: <20190604152326.18510-1-eric.auger@redhat.com> -Patchwork-id: 88525 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2] aarch64: Compile out IOH3420 -Bugzilla: 1627283 -RH-Acked-by: Andrew Jones -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Laszlo Ersek - -BZ: 1627283 -BRANCH: rhel-av-8.1.0/master-4.0.0 -UPSTREAM: N/A -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=21989313 -TEST: On Sabre, no ioh3420 available - -IOH3420 is not used on aarch64 or arm as we prefer the generic root port -so let's compile it out. - -Signed-off-by: Eric Auger - ---- - -v1 -> v2 -- do not touch arm-softmmu.mak - -Signed-off-by: Danilo C. L. de Paula ---- - default-configs/aarch64-rh-devices.mak | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak -index 13ce7c7..224e29b 100644 ---- a/default-configs/aarch64-rh-devices.mak -+++ b/default-configs/aarch64-rh-devices.mak -@@ -12,7 +12,6 @@ CONFIG_EDID=y - CONFIG_FW_CFG_DMA=y - CONFIG_GPIO_KEY=y - CONFIG_I2C=y --CONFIG_IOH3420=y - CONFIG_IVSHMEM=y - CONFIG_KVM=y - CONFIG_LINUX=y --- -1.8.3.1 - diff --git a/kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch b/kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch deleted file mode 100644 index 68c7e07..0000000 --- a/kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch +++ /dev/null @@ -1,98 +0,0 @@ -From cff152749afe8b045db50fdd065756a217efa6e9 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Tue, 25 Jun 2019 21:07:09 +0200 -Subject: [PATCH 4/6] block/file-posix: Unaligned O_DIRECT block-status - -RH-Author: Max Reitz -Message-id: <20190625210710.20946-2-mreitz@redhat.com> -Patchwork-id: 88945 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] block/file-posix: Unaligned O_DIRECT block-status -Bugzilla: 1588356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: John Snow -RH-Acked-by: Stefano Garzarella - -Currently, qemu crashes whenever someone queries the block status of an -unaligned image tail of an O_DIRECT image: -$ echo > foo -$ qemu-img map --image-opts driver=file,filename=foo,cache.direct=on -Offset Length Mapped to File -qemu-img: block/io.c:2093: bdrv_co_block_status: Assertion `*pnum && -QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset' -failed. - -This is because bdrv_co_block_status() checks that the result returned -by the driver's implementation is aligned to the request_alignment, but -file-posix can fail to do so, which is actually mentioned in a comment -there: "[...] possibly including a partial sector at EOF". - -Fix this by rounding up those partial sectors. - -There are two possible alternative fixes: -(1) We could refuse to open unaligned image files with O_DIRECT - altogether. That sounds reasonable until you realize that qcow2 - does necessarily not fill up its metadata clusters, and that nobody - runs qemu-img create with O_DIRECT. Therefore, unpreallocated qcow2 - files usually have an unaligned image tail. - -(2) bdrv_co_block_status() could ignore unaligned tails. It actually - throws away everything past the EOF already, so that sounds - reasonable. - Unfortunately, the block layer knows file lengths only with a - granularity of BDRV_SECTOR_SIZE, so bdrv_co_block_status() usually - would have to guess whether its file length information is inexact - or whether the driver is broken. - -Fixing what raw_co_block_status() returns is the safest thing to do. - -There seems to be no other block driver that sets request_alignment and -does not make sure that it always returns aligned values. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 9c3db310ff0b7473272ae8dce5e04e2f8a825390) -Signed-off-by: Max Reitz -Signed-off-by: Miroslav Rezanina ---- - block/file-posix.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 1cf4ee4..c185f34 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2475,6 +2475,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, - off_t data = 0, hole = 0; - int ret; - -+ assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment)); -+ - ret = fd_open(bs); - if (ret < 0) { - return ret; -@@ -2500,6 +2502,20 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs, - /* On a data extent, compute bytes to the end of the extent, - * possibly including a partial sector at EOF. */ - *pnum = MIN(bytes, hole - offset); -+ -+ /* -+ * We are not allowed to return partial sectors, though, so -+ * round up if necessary. -+ */ -+ if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) { -+ int64_t file_length = raw_getlength(bs); -+ if (file_length > 0) { -+ /* Ignore errors, this is just a safeguard */ -+ assert(hole == file_length); -+ } -+ *pnum = ROUND_UP(*pnum, bs->bl.request_alignment); -+ } -+ - ret = BDRV_BLOCK_DATA; - } else { - /* On a hole, compute bytes to the beginning of the next extent. */ --- -1.8.3.1 - diff --git a/kvm-compat-Generic-hw_compat_rhel_8_0.patch b/kvm-compat-Generic-hw_compat_rhel_8_0.patch deleted file mode 100644 index 017b400..0000000 --- a/kvm-compat-Generic-hw_compat_rhel_8_0.patch +++ /dev/null @@ -1,79 +0,0 @@ -From f19738e4bf8b9d5fb44ef30042cdad31b92edb22 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 24 May 2019 12:13:02 +0100 -Subject: [PATCH 2/4] compat: Generic hw_compat_rhel_8_0 - -RH-Author: Laurent Vivier -Message-id: <20190524121304.15530-3-lvivier@redhat.com> -Patchwork-id: 88201 -O-Subject: [RHEL-AV qemu-kvm PATCH v4 2/4] compat: Generic hw_compat_rhel_8_0 -Bugzilla: 1709726 -RH-Acked-by: David Gibson -RH-Acked-by: Cornelia Huck -RH-Acked-by: Dr. David Alan Gilbert - -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 30 ++++++++++++++++++++++++++++++ - include/hw/boards.h | 3 +++ - 2 files changed, 33 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index e808151..c4aa354 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -291,6 +291,36 @@ GlobalProperty hw_compat_rhel_7_6[] = { - }; - const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); - -+/* The same as hw_compat_3_1 -+ * format of array has been changed by: -+ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") -+ */ -+GlobalProperty hw_compat_rhel_8_0[] = { -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "pcie-root-port", "x-speed", "2_5" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "pcie-root-port", "x-width", "1" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "tpm-crb", "ppi", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "tpm-tis", "ppi", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "usb-kbd", "serial", "42" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "usb-mouse", "serial", "42" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "usb-tablet", "serial", "42" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "virtio-blk-device", "discard", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ -+ { "virtio-blk-device", "write-zeroes", "false" }, -+}; -+const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); -+ - - GlobalProperty hw_compat_3_1[] = { - { "pcie-root-port", "x-speed", "2_5" }, -diff --git a/include/hw/boards.h b/include/hw/boards.h -index b292365..f159e9e 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -337,6 +337,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_8_0[]; -+extern const size_t hw_compat_rhel_8_0_len; -+ - extern GlobalProperty hw_compat_rhel_7_6[]; - extern const size_t hw_compat_rhel_7_6_len; - --- -1.8.3.1 - diff --git a/kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch b/kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch deleted file mode 100644 index 8c47593..0000000 --- a/kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch +++ /dev/null @@ -1,182 +0,0 @@ -From d986fc898ca8a20b486afe92dc0c7b370f482366 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Tue, 25 Jun 2019 21:07:10 +0200 -Subject: [PATCH 5/6] iotests: Test unaligned raw images with O_DIRECT - -RH-Author: Max Reitz -Message-id: <20190625210710.20946-3-mreitz@redhat.com> -Patchwork-id: 88946 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] iotests: Test unaligned raw images with O_DIRECT -Bugzilla: 1588356 -RH-Acked-by: Kevin Wolf -RH-Acked-by: John Snow -RH-Acked-by: Stefano Garzarella - -We already have 221 for accesses through the page cache, but it is -better to create a new file for O_DIRECT instead of integrating those -test cases into 221. This way, we can make use of -_supported_cache_modes (and _default_cache_mode) so the test is -automatically skipped on filesystems that do not support O_DIRECT. - -As part of the split, add _supported_cache_modes to 221. With that, it -no longer fails when run with -c none or -c directsync. - -Signed-off-by: Max Reitz -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 2fab30c80b33cdc6157c7efe6207e54b6835cf92) -Signed-off-by: Max Reitz -Signed-off-by: Miroslav Rezanina ---- - tests/qemu-iotests/221 | 4 +++ - tests/qemu-iotests/253 | 84 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/253.out | 14 ++++++++ - tests/qemu-iotests/group | 1 + - 4 files changed, 103 insertions(+) - create mode 100755 tests/qemu-iotests/253 - create mode 100644 tests/qemu-iotests/253.out - -diff --git a/tests/qemu-iotests/221 b/tests/qemu-iotests/221 -index 808cd9a..92c9b13 100755 ---- a/tests/qemu-iotests/221 -+++ b/tests/qemu-iotests/221 -@@ -1,6 +1,7 @@ - #!/usr/bin/env bash - # - # Test qemu-img vs. unaligned images -+# (See also 253, which is the O_DIRECT version) - # - # Copyright (C) 2018 Red Hat, Inc. - # -@@ -37,6 +38,9 @@ _supported_fmt raw - _supported_proto file - _supported_os Linux - -+_default_cache_mode writeback -+_supported_cache_modes writeback writethrough unsafe -+ - echo - echo "=== Check mapping of unaligned raw image ===" - echo -diff --git a/tests/qemu-iotests/253 b/tests/qemu-iotests/253 -new file mode 100755 -index 0000000..d88d5af ---- /dev/null -+++ b/tests/qemu-iotests/253 -@@ -0,0 +1,84 @@ -+#!/usr/bin/env bash -+# -+# Test qemu-img vs. unaligned images; O_DIRECT version -+# (Originates from 221) -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+seq="$(basename $0)" -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt raw -+_supported_proto file -+_supported_os Linux -+ -+_default_cache_mode none -+_supported_cache_modes none directsync -+ -+echo -+echo "=== Check mapping of unaligned raw image ===" -+echo -+ -+# We do not know how large a physical sector is, but it is certainly -+# going to be a factor of 1 MB -+size=$((1 * 1024 * 1024 - 1)) -+ -+# qemu-img create rounds size up to BDRV_SECTOR_SIZE -+_make_test_img $size -+$QEMU_IMG map --output=json --image-opts \ -+ "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ -+ | _filter_qemu_img_map -+ -+# so we resize it and check again -+truncate --size=$size "$TEST_IMG" -+$QEMU_IMG map --output=json --image-opts \ -+ "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ -+ | _filter_qemu_img_map -+ -+# qemu-io with O_DIRECT always writes whole physical sectors. Again, -+# we do not know how large a physical sector is, so we just start -+# writing from a 64 kB boundary, which should always be aligned. -+offset=$((1 * 1024 * 1024 - 64 * 1024)) -+$QEMU_IO -c "w $offset $((size - offset))" "$TEST_IMG" | _filter_qemu_io -+$QEMU_IMG map --output=json --image-opts \ -+ "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ -+ | _filter_qemu_img_map -+ -+# Resize it and check again -- contrary to 221, we may not get partial -+# sectors here, so there should be only two areas (one zero, one -+# data). -+truncate --size=$size "$TEST_IMG" -+$QEMU_IMG map --output=json --image-opts \ -+ "driver=$IMGFMT,file.driver=file,file.filename=$TEST_IMG,cache.direct=on" \ -+ | _filter_qemu_img_map -+ -+# success, all done -+echo '*** done' -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out -new file mode 100644 -index 0000000..607c0ba ---- /dev/null -+++ b/tests/qemu-iotests/253.out -@@ -0,0 +1,14 @@ -+QA output created by 253 -+ -+=== Check mapping of unaligned raw image === -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575 -+[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+wrote 65535/65535 bytes at offset 983040 -+63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] -+[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] -+*** done -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 0db5e68..3ea739d 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -248,3 +248,4 @@ - 246 rw auto quick - 247 rw auto quick - 248 rw auto quick -+253 rw auto quick --- -1.8.3.1 - diff --git a/kvm-qxl-check-release-info-object.patch b/kvm-qxl-check-release-info-object.patch deleted file mode 100644 index 10e0147..0000000 --- a/kvm-qxl-check-release-info-object.patch +++ /dev/null @@ -1,51 +0,0 @@ -From b779db9ffd8626b74f969a7c2484239715f2d9e8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Wed, 19 Jun 2019 17:18:47 +0200 -Subject: [PATCH 2/6] qxl: check release info object -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Philippe Mathieu-Daudé -Message-id: <20190619171847.32603-2-philmd@redhat.com> -Patchwork-id: 88739 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] qxl: check release info object -Bugzilla: 1712717 -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Stefan Hajnoczi - -From: Prasad J Pandit - -When releasing spice resources in release_resource() routine, -if release info object 'ext.info' is null, it leads to null -pointer dereference. Add check to avoid it. - -Reported-by: Bugs SysSec -Signed-off-by: Prasad J Pandit -Message-id: 20190425063534.32747-1-ppandit@redhat.com -Signed-off-by: Gerd Hoffmann -(cherry picked from commit d52680fc932efb8a2f334cc6993e705ed1e31e99) -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Miroslav Rezanina ---- - hw/display/qxl.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/display/qxl.c b/hw/display/qxl.c -index c8ce578..632923a 100644 ---- a/hw/display/qxl.c -+++ b/hw/display/qxl.c -@@ -777,6 +777,9 @@ static void interface_release_resource(QXLInstance *sin, - QXLReleaseRing *ring; - uint64_t *item, id; - -+ if (!ext.info) { -+ return; -+ } - if (ext.group_id == MEMSLOT_GROUP_HOST) { - /* host group -> vga mode update request */ - QXLCommandExt *cmdext = (void *)(intptr_t)(ext.info->id); --- -1.8.3.1 - diff --git a/kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch b/kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch deleted file mode 100644 index 9c65c32..0000000 --- a/kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 89f01dab29d3c9a833c9c36a8f2151b6f97696c7 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 24 May 2019 12:13:04 +0100 -Subject: [PATCH 4/4] redhat: define pseries-rhel8.1.0 machine type - -RH-Author: Laurent Vivier -Message-id: <20190524121304.15530-5-lvivier@redhat.com> -Patchwork-id: 88203 -O-Subject: [RHEL-AV qemu-kvm PATCH v4 4/4] redhat: define pseries-rhel8.1.0 machine type -Bugzilla: 1709726 -RH-Acked-by: David Gibson -RH-Acked-by: Cornelia Huck -RH-Acked-by: Dr. David Alan Gilbert - -update pseries-rhel8.0.0 like pseries-3.1 except -for SPAPR_CAP_CFPC, SPAPR_CAP_SBBC, SPAPR_CAP_IBS -that have already been backported. - -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 28 ++++++++++++++++++++++++++-- - 1 file changed, 26 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 67c9e01..8adcb97 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4670,15 +4670,39 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); - #endif - - /* -+ * pseries-rhel8.1.0 -+ */ -+ -+static void spapr_machine_rhel810_class_options(MachineClass *mc) -+{ -+ /* Defaults for the latest behaviour inherited from the base class */ -+} -+ -+DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", true); -+ -+/* - * pseries-rhel8.0.0 -+ * like spapr_compat_3_1 -+ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS -+ * that have been backported to pseries-rhel8.0.0 - */ - - static void spapr_machine_rhel800_class_options(MachineClass *mc) - { -- /* Defaults for the latest behaviour inherited from the base class */ -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel810_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, -+ hw_compat_rhel_8_0_len); -+ -+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); -+ smc->update_dt_enabled = false; -+ smc->dr_phb_enabled = false; -+ smc->broken_host_serial_model = true; -+ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; - } - --DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", true); -+DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); - - /* - * pseries-rhel7.6.0 --- -1.8.3.1 - diff --git a/kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch b/kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch deleted file mode 100644 index f039cc5..0000000 --- a/kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 479ad3075d0d2832f5327b917c8279feaae5f6d3 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 24 May 2019 12:13:01 +0100 -Subject: [PATCH 1/4] redhat: fix cut'n'paste garbage in hw_compat comments -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -Message-id: <20190524121304.15530-2-lvivier@redhat.com> -Patchwork-id: 88200 -O-Subject: [RHEL-AV qemu-kvm PATCH v4 1/4] redhat: fix cut'n'paste garbage in hw_compat comments -Bugzilla: 1709726 -RH-Acked-by: David Gibson -RH-Acked-by: Cornelia Huck -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Philippe Mathieu-Daudé - -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 38 +++++++++++++++++++------------------- - 1 file changed, 19 insertions(+), 19 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index fd1594d..e808151 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -25,12 +25,12 @@ - #include "hw/mem/nvdimm.h" - - /* Mostly like hw_compat_2_1 but: -- * * we don't need virtio-scsi-pci since 7.0 already had that on -- * * -- * * RH: Note, qemu-extended-regs should have been enabled in the 7.1 -- * * machine type, but was accidentally turned off in 7.2 onwards. -- * * -- * */ -+ * we don't need virtio-scsi-pci since 7.0 already had that on -+ * -+ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 -+ * machine type, but was accidentally turned off in 7.2 onwards. -+ * -+ */ - GlobalProperty hw_compat_rhel_7_1[] = { - { /* COMPAT_RHEL7.1 */ - .driver = "intel-hda-generic", -@@ -81,8 +81,8 @@ GlobalProperty hw_compat_rhel_7_1[] = { - const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); - - /* Mostly like hw_compat_2_4 + 2_3 but: -- * * we don't need "any_layout" as it has been backported to 7.2 -- * */ -+ * we don't need "any_layout" as it has been backported to 7.2 -+ */ - - GlobalProperty hw_compat_rhel_7_2[] = { - { -@@ -144,9 +144,9 @@ GlobalProperty hw_compat_rhel_7_2[] = { - const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); - - /* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except -- * * disable-modern, disable-legacy, page-per-vq have already been -- * * backported to RHEL7.3 -- * */ -+ * disable-modern, disable-legacy, page-per-vq have already been -+ * backported to RHEL7.3 -+ */ - GlobalProperty hw_compat_rhel_7_3[] = { - { /* HW_COMPAT_RHEL7_3 */ - .driver = "virtio-mmio", -@@ -209,9 +209,9 @@ GlobalProperty hw_compat_rhel_7_3[] = { - const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); - - /* Mostly like hw_compat_2_9 except -- * * x-mtu-bypass-backend, x-migrate-msix has already been -- * * backported to RHEL7.4. shpc was already on in 7.4. -- * */ -+ * x-mtu-bypass-backend, x-migrate-msix has already been -+ * backported to RHEL7.4. shpc was already on in 7.4. -+ */ - GlobalProperty hw_compat_rhel_7_4[] = { - { /* HW_COMPAT_RHEL7_4 */ - .driver = "intel-iommu", -@@ -261,11 +261,11 @@ GlobalProperty hw_compat_rhel_7_5[] = { - const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); - - /* The same as hw_compat_3_0 + hw_compat_2_12 -- * * except that -- * * there's nothing in 3_0 -- * * migration.decompress-error-check=off was in 7.5 from bz 1584139 -- * * -- * */ -+ * except that -+ * there's nothing in 3_0 -+ * migration.decompress-error-check=off was in 7.5 from bz 1584139 -+ * -+ */ - GlobalProperty hw_compat_rhel_7_6[] = { - { /* HW_COMPAT_RHEL7_6 from HW_COMPAT_2_12 */ - .driver = "hda-audio", --- -1.8.3.1 - diff --git a/kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch b/kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch deleted file mode 100644 index 3d890ba..0000000 --- a/kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 2511c637a95e2a5f2a471c03a4dafcb14fbe4db8 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 24 May 2019 12:13:03 +0100 -Subject: [PATCH 3/4] redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 - -RH-Author: Laurent Vivier -Message-id: <20190524121304.15530-4-lvivier@redhat.com> -Patchwork-id: 88199 -O-Subject: [RHEL-AV qemu-kvm PATCH v4 3/4] redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 -Bugzilla: 1709726 -RH-Acked-by: David Gibson -RH-Acked-by: Cornelia Huck -RH-Acked-by: Dr. David Alan Gilbert - -The default values for SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and -SPAPR_CAP_IBS are SPAPR_CAP_WORKAROUND in pseries-4.0 and -SPAPR_CAP_BROKEN in pseries-3.1, but the change has been -backported to pseries-rhel8.0.0 in rhel-av-8.0.1 so it has -to be reset to SPAPR_CAP_BROKEN in pseries-rhe7.6.0 rather -than in pseries-rhel8.0.0. - -This patch also removes default_cpu_type to POWER8 as it will -be set in pseries-rhel8.0.0 (POWER9 appears with pseries-4.0) - -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index daa5920..67c9e01 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4719,8 +4719,14 @@ static void spapr_machine_rhel760_class_options(MachineClass *mc) - * yet. Postpone this to machine init (see default_caps_with_cpu()). - */ - smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; -- /* Defaults for the latest behaviour inherited from the base class */ -- mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); -+ -+ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by -+ * f21757edc554 -+ * "Enable mitigations by default for pseries-4.0 machine type") -+ */ -+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; -+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; -+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; - } - - DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); --- -1.8.3.1 - diff --git a/kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch b/kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch deleted file mode 100644 index 20f0f65..0000000 --- a/kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 27b7c444c3a568e87647f5386fbfc2c0f2b1ff9b Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Fri, 28 Jun 2019 08:34:00 +0200 -Subject: [PATCH 6/6] rh: set CONFIG_BOCHS_DISPLAY=y for x86 - -RH-Author: Gerd Hoffmann -Message-id: <20190628083400.7016-2-kraxel@redhat.com> -Patchwork-id: 88989 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/1] rh: set CONFIG_BOCHS_DISPLAY=y for x86 -Bugzilla: 1707118 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Danilo de Paula - --device bochs-display can replace -device VGA for cases where -legacy vga emulation is not needed. That is the case for UEFI -guests (they use EFI GOP for boot display). seabios guest can -work too with some quirks. - -Main advantage: Much simpler device emulation -> reduced attach surface. - -Signed-off-by: Gerd Hoffmann -Signed-off-by: Miroslav Rezanina ---- - default-configs/x86_64-rh-devices.mak | 1 + - redhat/qemu-kvm.spec.template | 4 ++++ - 2 files changed, 5 insertions(+) - -diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak -index 01b5363..05ad6cf 100644 ---- a/default-configs/x86_64-rh-devices.mak -+++ b/default-configs/x86_64-rh-devices.mak -@@ -12,6 +12,7 @@ CONFIG_ACPI_X86_ICH=y - CONFIG_AHCI=y - CONFIG_APIC=y - CONFIG_APM=y -+CONFIG_BOCHS_DISPLAY=y - CONFIG_DIMM=y - CONFIG_E1000E_PCI_EXPRESS=y - CONFIG_E1000_PCI=y --- -1.8.3.1 - diff --git a/kvm-target-i386-add-MDS-NO-feature.patch b/kvm-target-i386-add-MDS-NO-feature.patch deleted file mode 100644 index 9fec966..0000000 --- a/kvm-target-i386-add-MDS-NO-feature.patch +++ /dev/null @@ -1,51 +0,0 @@ -From dd19ddadfbabc54415977cd0b9b3f520a87988ad Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Sun, 23 Jun 2019 15:19:17 +0200 -Subject: [PATCH 3/6] target/i386: add MDS-NO feature - -RH-Author: Paolo Bonzini -Message-id: <20190623151917.7942-1-pbonzini@redhat.com> -Patchwork-id: 88873 -O-Subject: [RHEL-AV-8.1.0 PATCH qemu-kvm] target/i386: add MDS-NO feature -Bugzilla: 1722839 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Miroslav Rezanina - -Bugzilla: 1722839 - -Brew build: 22317828 - -Microarchitectural Data Sampling is a hardware vulnerability which allows -unprivileged speculative access to data which is available in various CPU -internal buffers. - -Some Intel processors use the ARCH_CAP_MDS_NO bit in the -IA32_ARCH_CAPABILITIES -MSR to report that they are not vulnerable, make it available to guests. - -Signed-off-by: Paolo Bonzini -Message-Id: <20190516185320.28340-1-pbonzini@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 20140a82c67467f53814ca197403d5e1b561a5e5) -Signed-off-by: Miroslav Rezanina ---- - target/i386/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 3886464..2e73821 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1183,7 +1183,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .type = MSR_FEATURE_WORD, - .feat_names = { - "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", -- "ssb-no", NULL, NULL, NULL, -+ "ssb-no", "mds-no", NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, --- -1.8.3.1 - diff --git a/kvm-target-i386-define-md-clear-bit.patch b/kvm-target-i386-define-md-clear-bit.patch deleted file mode 100644 index 5bb1258..0000000 --- a/kvm-target-i386-define-md-clear-bit.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 790c94e3240549de61881aebb94d4d933252e540 Mon Sep 17 00:00:00 2001 -From: Danilo de Paula -Date: Mon, 20 May 2019 18:29:57 +0100 -Subject: [PATCH] target/i386: define md-clear bit - -RH-Author: Danilo de Paula -Message-id: <20190520182957.26425-1-ddepaula@redhat.com> -Patchwork-id: 88110 -O-Subject: [RHEL-8 + RHEL-AV qemu-kvm PATCH] target/i386: define md-clear bit -Bugzilla: 1703297 1703304 1703310 1707274 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: John Snow - -From: Paolo Bonzini - -BZ: 1703310 -BZ: 1703304 -BZ: 1703297 -BZ: 1707274 -branch: rhel-av-8.1.0/master-4.0.0 - -BZ: 1705851 -BZ: 1704542 -BZ: 1704538 -BZ: 1704534 -branch: rhel-av-8.0.1 - -BZ: 1703308 -BZ: 1703302 -branch: rhel-8.1.0 - -md-clear is a new CPUID bit which is set when microcode provides the -mechanism to invoke a flush of various exploitable CPU buffers by invoking -the VERW instruction. - -Signed-off-by: Paolo Bonzini -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6472cd2..3886464 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1076,7 +1076,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .feat_names = { - NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", - NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ NULL, NULL, "md-clear", NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, --- -1.8.3.1 - diff --git a/kvm-usb-call-reset-handler-before-updating-state.patch b/kvm-usb-call-reset-handler-before-updating-state.patch deleted file mode 100644 index 53806c6..0000000 --- a/kvm-usb-call-reset-handler-before-updating-state.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 646f497c8e1dcea5c1ec8731693e3f06be8f6cc2 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Tue, 4 Jun 2019 07:13:39 +0100 -Subject: [PATCH 4/9] usb: call reset handler before updating state -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -Message-id: <20190604071341.3432-2-kraxel@redhat.com> -Patchwork-id: 88478 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/3] usb: call reset handler before updating state -Bugzilla: 1713679 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Max Reitz - -That way the device reset handler can see what -the before-reset state of the device is. - -Signed-off-by: Gerd Hoffmann -Message-id: 20190522094702.17619-2-kraxel@redhat.com -(cherry picked from commit 7ed4657396add28382081a15557c78cd480c1cf1) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/usb/core.c b/hw/usb/core.c -index 8fbd9c7..3ab48a1 100644 ---- a/hw/usb/core.c -+++ b/hw/usb/core.c -@@ -87,10 +87,10 @@ void usb_device_reset(USBDevice *dev) - if (dev == NULL || !dev->attached) { - return; - } -+ usb_device_handle_reset(dev); - dev->remote_wakeup = 0; - dev->addr = 0; - dev->state = USB_STATE_DEFAULT; -- usb_device_handle_reset(dev); - } - - void usb_wakeup(USBEndpoint *ep, unsigned int stream) --- -1.8.3.1 - diff --git a/kvm-usb-host-avoid-libusb_set_configuration-calls.patch b/kvm-usb-host-avoid-libusb_set_configuration-calls.patch deleted file mode 100644 index cfde8ce..0000000 --- a/kvm-usb-host-avoid-libusb_set_configuration-calls.patch +++ /dev/null @@ -1,68 +0,0 @@ -From a92cfe1254f947c32f15ebb1f81b825076c5625e Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Tue, 4 Jun 2019 07:13:41 +0100 -Subject: [PATCH 6/9] usb-host: avoid libusb_set_configuration calls -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -Message-id: <20190604071341.3432-4-kraxel@redhat.com> -Patchwork-id: 88477 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 3/3] usb-host: avoid libusb_set_configuration calls -Bugzilla: 1713679 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Max Reitz - -Seems some devices become confused when we call -libusb_set_configuration(). So before calling the function check -whenever the device has multiple configurations in the first place, and -in case it hasn't (which is the case for the majority of devices) simply -skip the call as it will have no effect anyway. - -Signed-off-by: Gerd Hoffmann -Message-id: 20190522094702.17619-4-kraxel@redhat.com -(cherry picked from commit bfe44898848614cfcb3a269bc965afbe1f0f331c) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/host-libusb.c | 18 ++++++++++-------- - 1 file changed, 10 insertions(+), 8 deletions(-) - -diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c -index 4e9a45a..4f765d7 100644 ---- a/hw/usb/host-libusb.c -+++ b/hw/usb/host-libusb.c -@@ -1225,19 +1225,21 @@ static void usb_host_set_address(USBHostDevice *s, int addr) - - static void usb_host_set_config(USBHostDevice *s, int config, USBPacket *p) - { -- int rc; -+ int rc = 0; - - trace_usb_host_set_config(s->bus_num, s->addr, config); - - usb_host_release_interfaces(s); -- rc = libusb_set_configuration(s->dh, config); -- if (rc != 0) { -- usb_host_libusb_error("libusb_set_configuration", rc); -- p->status = USB_RET_STALL; -- if (rc == LIBUSB_ERROR_NO_DEVICE) { -- usb_host_nodev(s); -+ if (s->ddesc.bNumConfigurations != 1) { -+ rc = libusb_set_configuration(s->dh, config); -+ if (rc != 0) { -+ usb_host_libusb_error("libusb_set_configuration", rc); -+ p->status = USB_RET_STALL; -+ if (rc == LIBUSB_ERROR_NO_DEVICE) { -+ usb_host_nodev(s); -+ } -+ return; - } -- return; - } - p->status = usb_host_claim_interfaces(s, config); - if (p->status != USB_RET_SUCCESS) { --- -1.8.3.1 - diff --git a/kvm-usb-host-skip-reset-for-untouched-devices.patch b/kvm-usb-host-skip-reset-for-untouched-devices.patch deleted file mode 100644 index 9968baf..0000000 --- a/kvm-usb-host-skip-reset-for-untouched-devices.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 507b4bb7a63544c72e8ef1713ada9ff7c2e0024f Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Tue, 4 Jun 2019 07:13:40 +0100 -Subject: [PATCH 5/9] usb-host: skip reset for untouched devices -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -Message-id: <20190604071341.3432-3-kraxel@redhat.com> -Patchwork-id: 88479 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/3] usb-host: skip reset for untouched devices -Bugzilla: 1713679 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Max Reitz - -If the guest didn't talk to the device yet, skip the reset. -Without this usb-host devices get resetted a number of times -at boot time for no good reason. - -Signed-off-by: Gerd Hoffmann -Message-id: 20190522094702.17619-3-kraxel@redhat.com -(cherry picked from commit 65f14ab98da1da920f98ee8734dc1588b01d6b2b) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/host-libusb.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c -index 67b7465..4e9a45a 100644 ---- a/hw/usb/host-libusb.c -+++ b/hw/usb/host-libusb.c -@@ -1459,6 +1459,9 @@ static void usb_host_handle_reset(USBDevice *udev) - if (!s->allow_guest_reset) { - return; - } -+ if (udev->addr == 0) { -+ return; -+ } - - trace_usb_host_reset(s->bus_num, s->addr); - --- -1.8.3.1 - diff --git a/kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch b/kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch deleted file mode 100644 index 61a853a..0000000 --- a/kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 2b89558946fc396c6ecb10249b69960d2a74e18f Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 12 Jun 2019 16:56:23 +0100 -Subject: [PATCH 3/3] vfio: increase the cap on number of assigned devices to - 64 - -RH-Author: Bandan Das -Message-id: -Patchwork-id: 88653 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] vfio: increase the cap on number of assigned devices to 64 -Bugzilla: 1719823 -RH-Acked-by: Alex Williamson -RH-Acked-by: Auger Eric -RH-Acked-by: Laszlo Ersek - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1719823 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=22124174 -Branch: rhel-av-8.1.0/master-4.0.0 -Upstrea: N/A, the device limit change is downstream only - -In addition to bumping up the limit, also add a property for -future-proofing. This needs to be set for every assigned device -or via "global": -global vfio-pci.x-assigned-device-limit - -RHEL Notes: -For each vm using vfio, there is at least a container fd. For -each assigned device, there is likely a group fd, a device fd, -an error signaling fd and a device request fd. Assuming SR-IOV -VFs, vectors/device considering MSI/MSI-X could be ~3-5. Therefore, -we have ~14 file descriptors per device or 897 for 64 devices. -The default open fd limit is 1024 on Linux but libvirt bumps it to -8192 and the qemu process inherits that value as well. - -Signed-off-by: Bandan Das -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/pci.c | 20 +++++++++++++++++--- - hw/vfio/pci.h | 1 + - 2 files changed, 18 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 7c998af..7c0d93a 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -36,11 +36,13 @@ - #include "qapi/error.h" - - #define MSIX_CAP_LENGTH 12 --#define MAX_DEV_ASSIGN_CMDLINE 32 - - #define TYPE_VFIO_PCI "vfio-pci" - #define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI) - -+/* RHEL only: Set once for the first assigned dev */ -+static uint16_t device_limit; -+ - static void vfio_disable_interrupts(VFIOPCIDevice *vdev); - static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); - -@@ -2810,15 +2812,24 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - int ret, i = 0; - bool is_mdev; - -+ if (device_limit && device_limit != vdev->assigned_device_limit) { -+ error_setg(errp, "Assigned device limit has been redefined. " -+ "Old:%d, New:%d", -+ device_limit, vdev->assigned_device_limit); -+ return; -+ } else { -+ device_limit = vdev->assigned_device_limit; -+ } -+ - QLIST_FOREACH(group, &vfio_group_list, next) { - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { - i++; - } - } - -- if (i >= MAX_DEV_ASSIGN_CMDLINE) { -+ if (i >= vdev->assigned_device_limit) { - error_setg(errp, "Maximum supported vfio devices (%d) " -- "already attached", MAX_DEV_ASSIGN_CMDLINE); -+ "already attached", vdev->assigned_device_limit); - return; - } - -@@ -3223,6 +3234,9 @@ static Property vfio_pci_dev_properties[] = { - DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), - DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, - no_geforce_quirks, false), -+ /* RHEL only */ -+ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, -+ assigned_device_limit, 64), - DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, - false), - DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index c11c3f1..29a8add 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -136,6 +136,7 @@ typedef struct VFIOPCIDevice { - EventNotifier err_notifier; - EventNotifier req_notifier; - int (*resetfn)(struct VFIOPCIDevice *); -+ uint16_t assigned_device_limit; - uint32_t vendor_id; - uint32_t device_id; - uint32_t sub_vendor_id; --- -1.8.3.1 - diff --git a/kvm-vl-Document-why-objects-are-delayed.patch b/kvm-vl-Document-why-objects-are-delayed.patch deleted file mode 100644 index 7cd695d..0000000 --- a/kvm-vl-Document-why-objects-are-delayed.patch +++ /dev/null @@ -1,66 +0,0 @@ -From d5a193a4b838b00b064d441da5c038a5ff251f8b Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Thu, 6 Jun 2019 19:31:39 +0100 -Subject: [PATCH 9/9] vl: Document why objects are delayed -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Markus Armbruster -Message-id: <20190606193139.31976-3-armbru@redhat.com> -Patchwork-id: 88610 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] vl: Document why objects are delayed -Bugzilla: 1714891 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Philippe Mathieu-Daudé - -Objects should not be "delayed" without a reason, as the previous -commit demonstrates. The remaining ones have reasons. State them. -and demand future ones come with such a statement. - -Signed-off-by: Markus Armbruster -Message-Id: <20190604151251.9903-3-armbru@redhat.com> -Reviewed-by: Michal Privoznik -Signed-off-by: Paolo Bonzini -(cherry picked from commit edfb4389c26cbfd873707306024130bda6049780) -Signed-off-by: Danilo C. L. de Paula ---- - vl.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/vl.c b/vl.c -index 627e37d..686c639 100644 ---- a/vl.c -+++ b/vl.c -@@ -2852,19 +2852,25 @@ static bool object_create_initial(const char *type, QemuOpts *opts) - exit(0); - } - -+ /* -+ * Objects should not be made "delayed" without a reason. If you -+ * add one, state the reason in a comment! -+ */ -+ -+ /* Reason: rng-egd property "chardev" */ - if (g_str_equal(type, "rng-egd")) { - return false; - } - - #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) -+ /* Reason: cryptodev-vhost-user property "chardev" */ - if (g_str_equal(type, "cryptodev-vhost-user")) { - return false; - } - #endif - - /* -- * return false for concrete netfilters since -- * they depend on netdevs already existing -+ * Reason: filter-* property "netdev" etc. - */ - if (g_str_equal(type, "filter-buffer") || - g_str_equal(type, "filter-dump") || --- -1.8.3.1 - diff --git a/kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch b/kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch deleted file mode 100644 index 5f9841f..0000000 --- a/kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 6473a5d45b4ae75d5eef64b7b5dcd6735f498fb3 Mon Sep 17 00:00:00 2001 -From: Markus Armbruster -Date: Thu, 6 Jun 2019 19:31:38 +0100 -Subject: [PATCH 8/9] vl: Fix -drive / -blockdev persistent reservation - management - -RH-Author: Markus Armbruster -Message-id: <20190606193139.31976-2-armbru@redhat.com> -Patchwork-id: 88609 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] vl: Fix -drive / -blockdev persistent reservation management -Bugzilla: 1714891 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Eduardo Habkost - -qemu-system-FOO's main() acts on command line arguments in its own -idiosyncratic order. There's not much method to its madness. -Whenever we find a case where one kind of command line argument needs -to refer to something created for another kind later, we rejigger the -order. - -Recent commit cda4aa9a5a "vl: Create block backends before setting -machine properties" was such a rejigger. Block backends are now -created before "delayed" objects. This broke persistent reservation -management. Reproducer: - - $ qemu-system-x86_64 -object pr-manager-helper,id=pr-helper0,path=/tmp/pr-helper0.sock-drive -drive file=/dev/mapper/crypt,file.pr-manager=pr-helper0,format=raw,if=none,id=drive-scsi0-0-0-2 - qemu-system-x86_64: -drive file=/dev/mapper/crypt,file.pr-manager=pr-helper0,format=raw,if=none,id=drive-scsi0-0-0-2: No persistent reservation manager with id 'pr-helper0' - -The delayed pr-manager-helper object is created too late for use by --drive or -blockdev. Normal objects are still created in time. - -pr-manager-helper has always been a delayed object (commit 7c9e527659 -"scsi, file-posix: add support for persistent reservation -management"). Turns out there's no real reason for that. Make it a -normal object. - -Fixes: cda4aa9a5a08777cf13e164c0543bd4888b8adce -Signed-off-by: Markus Armbruster -Message-Id: <20190604151251.9903-2-armbru@redhat.com> -Reviewed-by: Michal Privoznik -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9ea18ed25a36527167e9676f25d983df5e7f76e6) -Signed-off-by: Danilo C. L. de Paula ---- - vl.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/vl.c b/vl.c -index 2b95925..627e37d 100644 ---- a/vl.c -+++ b/vl.c -@@ -2852,8 +2852,7 @@ static bool object_create_initial(const char *type, QemuOpts *opts) - exit(0); - } - -- if (g_str_equal(type, "rng-egd") || -- g_str_has_prefix(type, "pr-manager-")) { -+ if (g_str_equal(type, "rng-egd")) { - return false; - } - --- -1.8.3.1 - diff --git a/kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch b/kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch deleted file mode 100644 index 0f52f10..0000000 --- a/kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 495a27daa8ca91bb357a065c986552c3375eda82 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 24 May 2019 18:40:01 +0100 -Subject: [PATCH 1/3] x86_64-rh-devices: add missing TPM passthrough -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20190524184002.14050-2-marcandre.lureau@redhat.com> -Patchwork-id: 88230 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] x86_64-rh-devices: add missing TPM passthrough -Bugzilla: 1519013 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Dr. David Alan Gilbert - -The TPM passthrough support got lost with 4.0 rebase, due to -configure/Kconfig changes. - -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - default-configs/x86_64-rh-devices.mak | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak -index 05ad6cf..8079fa7 100644 ---- a/default-configs/x86_64-rh-devices.mak -+++ b/default-configs/x86_64-rh-devices.mak -@@ -92,3 +92,6 @@ CONFIG_VTD=y - CONFIG_WDT_IB6300ESB=y - CONFIG_WDT_IB700=y - CONFIG_XIO3130=y -+CONFIG_TPM_CRB=y -+CONFIG_TPM_TIS=y -+CONFIG_TPM_PASSTHROUGH=y --- -1.8.3.1 - diff --git a/kvm-x86_64-rh-devices-enable-TPM-emulation.patch b/kvm-x86_64-rh-devices-enable-TPM-emulation.patch deleted file mode 100644 index e5c7beb..0000000 --- a/kvm-x86_64-rh-devices-enable-TPM-emulation.patch +++ /dev/null @@ -1,53 +0,0 @@ -From e1fe9feada882ece852c66f123535a98ea2230ce Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 24 May 2019 18:40:02 +0100 -Subject: [PATCH 2/3] x86_64-rh-devices: enable TPM emulation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20190524184002.14050-3-marcandre.lureau@redhat.com> -Patchwork-id: 88229 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] x86_64-rh-devices: enable TPM emulation -Bugzilla: 1519013 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Dr. David Alan Gilbert - -Remove the useless & misleading configure lines. - -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - configure | 3 --- - default-configs/x86_64-rh-devices.mak | 1 + - 2 files changed, 1 insertion(+), 3 deletions(-) - -diff --git a/configure b/configure -index 8cb6740..638c881 100755 ---- a/configure -+++ b/configure -@@ -2412,9 +2412,6 @@ if test "$seccomp" != "no" ; then - fi - fi - --# RHEL8-specific, only passthrough for now, rhbz#1688312 --tpm_emulator=no -- - ########################################## - # xen probe - -diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak -index 8079fa7..906b42d 100644 ---- a/default-configs/x86_64-rh-devices.mak -+++ b/default-configs/x86_64-rh-devices.mak -@@ -94,4 +94,5 @@ CONFIG_WDT_IB700=y - CONFIG_XIO3130=y - CONFIG_TPM_CRB=y - CONFIG_TPM_TIS=y -+CONFIG_TPM_EMULATOR=y - CONFIG_TPM_PASSTHROUGH=y --- -1.8.3.1 - diff --git a/kvm.modules b/kvm.modules deleted file mode 100644 index b9d9646..0000000 --- a/kvm.modules +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -case $(uname -m) in - ppc64) - grep OPAL /proc/cpuinfo >/dev/null 2>&1 && opal=1 - - modprobe -b kvm >/dev/null 2>&1 - modprobe -b kvm-pr >/dev/null 2>&1 && kvm=1 - if [ "$opal" ]; then - modprobe -b kvm-hv >/dev/null 2>&1 - fi - ;; - s390x) - modprobe -b kvm >/dev/null 2>&1 && kvm=1 - ;; -esac - -exit 0 diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 16c7a39..4bdff57 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -66,8 +66,8 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 4.0.0 -Release: 6%{?dist} +Version: 4.1.0 +Release: 1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -76,7 +76,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-4.0.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-4.1.0-rc4.tar.xz # KSM control scripts Source4: ksm.service @@ -120,57 +120,9 @@ Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0017: 0017-usb-xhci-Fix-PCI-capability-order.patch Patch0018: 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0019: 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0020: 0020-doc-fix-the-configuration-path.patch -Patch0021: 0021-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch -Patch0022: 0022-redhat-Post-rebase-synchronization.patch -# For bz#1703297 - CVE-2018-12126 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Store Buffer Data Sampling (MSBDS) [rhel-av-8] -# For bz#1703304 - CVE-2018-12130 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-av-8] -# For bz#1703310 - CVE-2018-12127 virt:8.0.0/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-av-8] -# For bz#1707274 - CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0] -Patch23: kvm-target-i386-define-md-clear-bit.patch -# For bz#1709726 - Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" -Patch24: kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch -# For bz#1709726 - Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" -Patch25: kvm-compat-Generic-hw_compat_rhel_8_0.patch -# For bz#1709726 - Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" -Patch26: kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch -# For bz#1709726 - Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" -Patch27: kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch -# For bz#1714937 - Disable VXHS support -Patch28: kvm-Disable-VXHS-support.patch -# For bz#1713735 - Allow ARM VIRT iommu option in RHEL8.1 machine -Patch29: kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch -# For bz#1713735 - Allow ARM VIRT iommu option in RHEL8.1 machine -Patch30: kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch -# For bz#1713679 - Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU -Patch31: kvm-usb-call-reset-handler-before-updating-state.patch -# For bz#1713679 - Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU -Patch32: kvm-usb-host-skip-reset-for-untouched-devices.patch -# For bz#1713679 - Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU -Patch33: kvm-usb-host-avoid-libusb_set_configuration-calls.patch -# For bz#1627283 - Compile out IOH3420 on aarch64 -Patch34: kvm-aarch64-Compile-out-IOH3420.patch -# For bz#1714891 - Guest with persistent reservation manager for a disk fails to start -Patch35: kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch -# For bz#1714891 - Guest with persistent reservation manager for a disk fails to start -Patch36: kvm-vl-Document-why-objects-are-delayed.patch -# For bz#1712717 - CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-av-8] -Patch37: kvm-qxl-check-release-info-object.patch -# For bz#1722839 - [Intel 8.1 FEAT] MDS_NO exposure to guest - Fast Train -Patch38: kvm-target-i386-add-MDS-NO-feature.patch -# For bz#1588356 - qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img' -Patch39: kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch -# For bz#1588356 - qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img' -Patch40: kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch -# For bz#1707118 - enable device: bochs-display (QEMU) -Patch41: kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch -# For bz#1519013 - [RFE] QEMU Software TPM support (vTPM, or TPM emulation) -Patch42: kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch -# For bz#1519013 - [RFE] QEMU Software TPM support (vTPM, or TPM emulation) -Patch43: kvm-x86_64-rh-devices-enable-TPM-emulation.patch -# For bz#1719823 - [RHEL 8.1] [RFE] increase the maximum of vfio devices to more than 32 in qemu-kvm -Patch44: kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch +BuildRequires: wget +BuildRequires: rpm-build BuildRequires: zlib-devel BuildRequires: glib2-devel BuildRequires: which @@ -189,6 +141,7 @@ BuildRequires: libusbx-devel >= 1.0.22 BuildRequires: usbredir-devel >= 0.7.1 %endif BuildRequires: texinfo +BuildRequires: python3-sphinx %if %{have_spice} BuildRequires: spice-protocol >= 0.12.12 BuildRequires: spice-server-devel >= 0.12.8 @@ -199,7 +152,7 @@ BuildRequires: nss-devel BuildRequires: libseccomp-devel >= 2.4.0 # For network block driver BuildRequires: libcurl-devel -BuildRequires: libssh2-devel +BuildRequires: libssh-devel BuildRequires: librados-devel BuildRequires: librbd-devel %if %{have_gluster} @@ -451,7 +404,7 @@ the Secure Shell (SSH) protocol. %prep -%setup -n qemu-%{version} +%setup -n qemu-%{version}-rc4 %autopatch -p1 %build @@ -551,7 +504,7 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --enable-kvm \ --enable-libiscsi \ --disable-libnfs \ - --enable-libssh2 \ + --enable-libssh \ --enable-libusb \ --disable-bzip2 \ --enable-linux-aio \ @@ -621,9 +574,12 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-qed \ --disable-parallels \ --disable-sheepdog \ + --disable-auth-pam \ + --enable-iconv \ + --disable-lzfse \ + --enable-vhost-kernel \ --without-default-devices - echo "config-host.mak contents:" echo "===" cat config-host.mak @@ -796,6 +752,15 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/canyonlands.dtb rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp + %ifarch s390x # Use the s390-ccw.img that we've just built, not the pre-built one install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ @@ -896,13 +861,16 @@ chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so # Remove buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo +# Remove spec +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs + %check export DIFF=diff; make check V=1 pushd tests/qemu-iotests ./check -v -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 045 048 052 063 077 086 101 104 106 120 132 140 143 145 147 150 152 157 159 160 162 170 171 175 181 184 194 208 218 221 222 226 227 232 -./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 017 018 019 020 021 022 024 025 027 028 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 057 058 062 063 065 066 068 069 072 073 074 080 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 126 127 130 132 133 134 137 138 140 141 142 143 144 145 147 150 151 152 156 157 158 159 162 165 170 174 177 179 181 184 187 188 189 190 191 194 195 196 198 201 202 203 204 205 206 208 209 214 216 217 218 222 226 227 232 +./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 017 018 019 020 021 022 024 025 027 028 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 057 058 062 063 065 066 069 072 073 074 080 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 126 127 130 132 133 134 137 138 140 141 142 143 144 145 147 150 151 152 156 157 158 159 162 165 170 174 177 179 181 184 187 188 189 190 191 194 195 196 198 201 202 203 204 206 208 209 214 216 217 218 222 226 227 232 ./check -v -luks 001 002 003 004 005 008 009 010 011 012 021 032 033 052 140 143 145 157 162 174 181 184 208 218 227 -./check -v -nbd 001 002 003 004 005 008 009 010 011 021 032 033 045 077 094 104 119 123 132 143 145 147 151 152 162 181 184 194 205 208 218 222 +./check -v -nbd 001 002 003 004 005 008 009 010 011 021 032 033 045 077 094 104 119 123 132 143 145 147 151 152 162 181 184 194 208 218 222 popd %post -n qemu-kvm-core @@ -1093,6 +1061,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Wed Aug 14 2019 Miroslav Rezanina - 4.1.0-1.el8 +- Rebase to qemu 4.1.0 rc4 [bz#1705235] +- Resolves: bz#1705235 + (Rebase qemu-kvm for RHEL-AV 8.1.0) + * Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 4.0.0-6.el8 - kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch [bz#1519013] - kvm-x86_64-rh-devices-enable-TPM-emulation.patch [bz#1519013] @@ -1160,10 +1133,18 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1707274 (CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0]) -* Thu May 16 2019 Danilo Cesar Lemes de Paula - 4.0.0-1.el8 -- 4.0.0 temporary rebase -- Resolves: bz#1705235 - (Rebase qemu-kvm for RHEL-AV 8.1.0) +* Wed May 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-26.el8 +- kvm-target-ppc-spapr-Add-SPAPR_CAP_LARGE_DECREMENTER.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-workaround-option-to-SPAPR_CAP_.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-SPAPR_CAP_CCF_ASSIST.patch [bz#1698711] +- kvm-target-ppc-tcg-make-spapr_caps-apply-cap-cfpc-sbbc-i.patch [bz#1698711] +- kvm-target-ppc-spapr-Enable-mitigations-by-default-for-p.patch [bz#1698711] +- kvm-slirp-ensure-there-is-enough-space-in-mbuf-to-null-t.patch [bz#1693076] +- kvm-slirp-don-t-manipulate-so_rcv-in-tcp_emu.patch [bz#1693076] +- Resolves: bz#1693076 + (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu() [rhel-av-8]) +- Resolves: bz#1698711 + (Enable Spectre / Meltdown mitigations by default in pseries-rhel8.0.0 machine type) * Mon May 06 2019 Danilo Cesar Lemes de Paula - 3.1.0-25.el8 - kvm-redhat-enable-tpmdev-passthrough.patch [bz#1688312] diff --git a/sources b/sources index e43b6a0..3c58d7d 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-4.0.0.tar.xz) = 952e94194ce9e64c15388c59035cb31fb9f761d30095c2fb9441012b609c18c9976285727b93bf37b95e15675802d73f8e1c4619ebecd23606675bb503646b13 +SHA512 (qemu-4.1.0-rc4.tar.xz) = 349e1e5281fa62201bfd332b1e8f628488302eeac0211a3464551a3ac0f3212e6e816d2e046f07e5fa84ec6f86954615c6215f722af995c0a0cb07c2190ddba9 From beffde47a893723ec5dfb00b0a286ad67893401e Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 19 Aug 2019 15:50:32 +0100 Subject: [PATCH 041/195] * Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-2.el8 - kvm-spec-Update-seavgabios-dependency.patch [bz#1725664] - kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch [bz#1741451] - kvm-display-bochs-fix-pcie-support.patch [bz#1733977 bz#1740692] - kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch [bz#1733977] - kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch [bz#1733977] - kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch [bz#1733977 bz#1740692] - kvm-Update-version-for-v4.1.0-release.patch [bz#1733977 bz#1740692] - Resolves: bz#1725664 (Update seabios dependency) - Resolves: bz#1733977 (Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed) - Resolves: bz#1740692 (Backport QEMU 4.1.0 rc5 & ga patches) - Resolves: bz#1741451 (Failed to hot-plug vcpus) --- kvm-Update-version-for-v4.1.0-release.patch | 42 ++++++ kvm-display-bochs-fix-pcie-support.patch | 86 +++++++++++ ...ke-die-id-mandatory-unless-necessary.patch | 115 +++++++++++++++ ...ake-rules-for-building-sifive_u-bios.patch | 50 +++++++ ...eset-CAS-IRQ-subsystem-after-devices.patch | 125 ++++++++++++++++ ...ve-Fix-migration-of-hot-plugged-CPUs.patch | 135 ++++++++++++++++++ kvm-spec-Update-seavgabios-dependency.patch | 48 +++++++ qemu-kvm.spec | 36 ++++- 8 files changed, 635 insertions(+), 2 deletions(-) create mode 100644 kvm-Update-version-for-v4.1.0-release.patch create mode 100644 kvm-display-bochs-fix-pcie-support.patch create mode 100644 kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch create mode 100644 kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch create mode 100644 kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch create mode 100644 kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch create mode 100644 kvm-spec-Update-seavgabios-dependency.patch diff --git a/kvm-Update-version-for-v4.1.0-release.patch b/kvm-Update-version-for-v4.1.0-release.patch new file mode 100644 index 0000000..dcd36bc --- /dev/null +++ b/kvm-Update-version-for-v4.1.0-release.patch @@ -0,0 +1,42 @@ +From cf5e1c094d045cbbb9fd58fd9ec9c8fe8342eb4f Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 19 Aug 2019 08:54:19 +0100 +Subject: [PATCH 7/7] Update version for v4.1.0 release +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +Message-id: <7fdfb1e847814fc97d2ef85172e75e2af70b5cae.1566204425.git.mrezanin@redhat.com> +Patchwork-id: 90055 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 5/5] Update version for v4.1.0 release +Bugzilla: 1733977 1740692 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Yash Mankad +RH-Acked-by: Danilo de Paula + +From: Peter Maydell + +Signed-off-by: Peter Maydell +(cherry picked from commit 9e06029aea3b2eca1d5261352e695edc1e7d7b8b) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + VERSION - we do not backport rc5 version change + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + VERSION | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/VERSION b/VERSION +index 8048f65..ee74734 100644 +--- a/VERSION ++++ b/VERSION +@@ -1 +1 @@ +-4.0.94 ++4.1.0 +-- +1.8.3.1 + diff --git a/kvm-display-bochs-fix-pcie-support.patch b/kvm-display-bochs-fix-pcie-support.patch new file mode 100644 index 0000000..dfdf84b --- /dev/null +++ b/kvm-display-bochs-fix-pcie-support.patch @@ -0,0 +1,86 @@ +From 8ba44c5854ab6e6feac8bc979f07ae241825673d Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 19 Aug 2019 08:54:15 +0100 +Subject: [PATCH 3/7] display/bochs: fix pcie support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +Message-id: +Patchwork-id: 90060 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/5] display/bochs: fix pcie support +Bugzilla: 1733977 1740692 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Yash Mankad +RH-Acked-by: Danilo de Paula + +From: Gerd Hoffmann + +Set QEMU_PCI_CAP_EXPRESS unconditionally in init(), then clear it in +realize() in case the device is not connected to a PCIe bus. + +This makes sure the pci config space allocation is big enough, so +accessing the PCIe extended config space doesn't overflow the pci +config space buffer. + +PCI(e) config space is guest writable. Writes are limited by +write mask (which probably is also filled with random stuff), +so the guest can only flip enabled bits. But I suspect it +still might be exploitable, so rather serious because it might +be a host escape for the guest. On the other hand the device +is probably not yet in widespread use. + +(For a QEMU version without this commit, a mitigation for the +bug is available: use "-device bochs-display" as a conventional pci +device only.) + +Cc: qemu-stable@nongnu.org +Signed-off-by: Gerd Hoffmann +Message-id: 20190812065221.20907-2-kraxel@redhat.com +Reviewed-by: Alex Williamson +Reviewed-by: Paolo Bonzini +Signed-off-by: Peter Maydell +(cherry picked from commit 5e7bcdcfe69ce0fad66012b2cfb2035003c37eef) + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/display/bochs-display.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c +index 582133d..8e83b51 100644 +--- a/hw/display/bochs-display.c ++++ b/hw/display/bochs-display.c +@@ -297,9 +297,10 @@ static void bochs_display_realize(PCIDevice *dev, Error **errp) + } + + if (pci_bus_is_express(pci_get_bus(dev))) { +- dev->cap_present |= QEMU_PCI_CAP_EXPRESS; + ret = pcie_endpoint_cap_init(dev, 0x80); + assert(ret > 0); ++ } else { ++ dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS; + } + + memory_region_set_log(&s->vram, true, DIRTY_MEMORY_VGA); +@@ -322,11 +323,15 @@ static void bochs_display_set_big_endian_fb(Object *obj, bool value, + + static void bochs_display_init(Object *obj) + { ++ PCIDevice *dev = PCI_DEVICE(obj); ++ + /* Expose framebuffer byteorder via QOM */ + object_property_add_bool(obj, "big-endian-framebuffer", + bochs_display_get_big_endian_fb, + bochs_display_set_big_endian_fb, + NULL); ++ ++ dev->cap_present |= QEMU_PCI_CAP_EXPRESS; + } + + static void bochs_display_exit(PCIDevice *dev) +-- +1.8.3.1 + diff --git a/kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch b/kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch new file mode 100644 index 0000000..fa8a1bb --- /dev/null +++ b/kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch @@ -0,0 +1,115 @@ +From 7909ad1654df63be6321af36f1ef436a990ab6df Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Fri, 16 Aug 2019 17:16:33 +0100 +Subject: [PATCH 2/7] pc: Don't make die-id mandatory unless necessary + +RH-Author: Eduardo Habkost +Message-id: <20190816171633.26797-2-ehabkost@redhat.com> +Patchwork-id: 90038 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/1] pc: Don't make die-id mandatory unless necessary +Bugzilla: 1741451 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Danilo de Paula +RH-Acked-by: Erik Skultety +RH-Acked-by: Miroslav Rezanina + +We have this issue reported when using libvirt to hotplug CPUs: +https://bugzilla.redhat.com/show_bug.cgi?id=1741451 + +Basically, libvirt is not copying die-id from +query-hotpluggable-cpus, but die-id is now mandatory. + +We could blame libvirt and say it is not following the documented +interface, because we have this buried in the QAPI schema +documentation: + +> Note: currently there are 5 properties that could be present +> but management should be prepared to pass through other +> properties with device_add command to allow for future +> interface extension. This also requires the filed names to be kept in +> sync with the properties passed to -device/device_add. + +But I don't think this would be reasonable from us. We can just +make QEMU more flexible and let die-id to be omitted when there's +no ambiguity. This will allow us to keep compatibility with +existing libvirt versions. + +Test case included to ensure we don't break this again. + +Fixes: commit 176d2cda0dee ("i386/cpu: Consolidate die-id validity in smp context") +Signed-off-by: Eduardo Habkost +--- +Changes v1 -> v2: +* v1 was "pc: Don't make CPU properties mandatory unless necessary" +* Make only die-id optional (Igor Mammedov) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 8 ++++++++ + tests/acceptance/pc_cpu_hotplug_props.py | 35 ++++++++++++++++++++++++++++++++ + 2 files changed, 43 insertions(+) + create mode 100644 tests/acceptance/pc_cpu_hotplug_props.py + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 9ded0db..b3d2d1e 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2622,6 +2622,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + int max_socket = (ms->smp.max_cpus - 1) / + smp_threads / smp_cores / pcms->smp_dies; + ++ /* ++ * die-id was optional in QEMU 4.0 and older, so keep it optional ++ * if there's only one die per socket. ++ */ ++ if (cpu->die_id < 0 && pcms->smp_dies == 1) { ++ cpu->die_id = 0; ++ } ++ + if (cpu->socket_id < 0) { + error_setg(errp, "CPU socket-id is not set"); + return; +diff --git a/tests/acceptance/pc_cpu_hotplug_props.py b/tests/acceptance/pc_cpu_hotplug_props.py +new file mode 100644 +index 0000000..08b7e63 +--- /dev/null ++++ b/tests/acceptance/pc_cpu_hotplug_props.py +@@ -0,0 +1,35 @@ ++# ++# Ensure CPU die-id can be omitted on -device ++# ++# Copyright (c) 2019 Red Hat Inc ++# ++# Author: ++# Eduardo Habkost ++# ++# This library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2 of the License, or (at your option) any later version. ++# ++# This library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with this library; if not, see . ++# ++ ++from avocado_qemu import Test ++ ++class OmittedCPUProps(Test): ++ """ ++ :avocado: tags=arch:x86_64 ++ """ ++ def test_no_die_id(self): ++ self.vm.add_args('-nodefaults', '-S') ++ self.vm.add_args('-smp', '1,sockets=2,cores=2,threads=2,maxcpus=8') ++ self.vm.add_args('-cpu', 'qemu64') ++ self.vm.add_args('-device', 'qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id=0') ++ self.vm.launch() ++ self.assertEquals(len(self.vm.command('query-cpus')), 2) +-- +1.8.3.1 + diff --git a/kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch b/kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch new file mode 100644 index 0000000..c3d3a29 --- /dev/null +++ b/kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch @@ -0,0 +1,50 @@ +From 21412b5480ec8432fb47fad1a460512424d53dce Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 19 Aug 2019 08:54:18 +0100 +Subject: [PATCH 6/7] riscv: roms: Fix make rules for building sifive_u bios +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +Message-id: <87c86632af83319c3c4a7ab01f2cf6de7e455ef6.1566204425.git.mrezanin@redhat.com> +Patchwork-id: 90059 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 4/5] riscv: roms: Fix make rules for building sifive_u bios +Bugzilla: 1733977 1740692 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Yash Mankad +RH-Acked-by: Danilo de Paula + +From: Bin Meng + +Currently the make rules are wrongly using qemu/virt opensbi image +for sifive_u machine. Correct it. + +Signed-off-by: Bin Meng +Reviewed-by: Chih-Min Chao +Reviewed-by: Alistair Francis +Message-id: 1564812484-20385-1-git-send-email-bmeng.cn@gmail.com +Signed-off-by: Peter Maydell +(cherry picked from commit 02db1be1d074d75ad1c9bd1e9681f1b0b6299cee) +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + roms/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/roms/Makefile b/roms/Makefile +index dc70fb5..775c963 100644 +--- a/roms/Makefile ++++ b/roms/Makefile +@@ -183,7 +183,7 @@ opensbi64-sifive_u: + $(MAKE) -C opensbi \ + CROSS_COMPILE=$(riscv64_cross_prefix) \ + PLATFORM="qemu/sifive_u" +- cp opensbi/build/platform/qemu/virt/firmware/fw_jump.bin ../pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin ++ cp opensbi/build/platform/qemu/sifive_u/firmware/fw_jump.bin ../pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin + + clean: + rm -rf seabios/.config seabios/out seabios/builds +-- +1.8.3.1 + diff --git a/kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch b/kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch new file mode 100644 index 0000000..4842a54 --- /dev/null +++ b/kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch @@ -0,0 +1,125 @@ +From 2edb7c1181fb69e410ffc688986a12d36899f976 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 19 Aug 2019 08:54:16 +0100 +Subject: [PATCH 4/7] spapr: Reset CAS & IRQ subsystem after devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +Message-id: <9b7c319c271fa2c8cda410e87aef985d8c180049.1566204425.git.mrezanin@redhat.com> +Patchwork-id: 90057 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 2/5] spapr: Reset CAS & IRQ subsystem after devices +Bugzilla: 1733977 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Yash Mankad +RH-Acked-by: Danilo de Paula + +From: David Gibson + +Bugzilla: 1733977 + +This fixes a nasty regression in qemu-4.1 for the 'pseries' machine, +caused by the new "dual" interrupt controller model. Specifically, +qemu can crash when used with KVM if a 'system_reset' is requested +while there's active I/O in the guest. + +The problem is that in spapr_machine_reset() we: + +1. Reset the CAS vector state + spapr_ovec_cleanup(spapr->ov5_cas); + +2. Reset all devices + qemu_devices_reset() + +3. Reset the irq subsystem + spapr_irq_reset(); + +However (1) implicitly changes the interrupt delivery mode, because +whether we're using XICS or XIVE depends on the CAS state. We don't +properly initialize the new irq mode until (3) though - in particular +setting up the KVM devices. + +During (2), we can temporarily drop the BQL allowing some irqs to be +delivered which will go to an irq system that's not properly set up. + +Specifically, if the previous guest was in (KVM) XIVE mode, the CAS +reset will put us back in XICS mode. kvm_kernel_irqchip() still +returns true, because XIVE was using KVM, however XICs doesn't have +its KVM components intialized and kernel_xics_fd == -1. When the irq +is delivered it goes via ics_kvm_set_irq() which assert()s that +kernel_xics_fd != -1. + +This change addresses the problem by delaying the CAS reset until +after the devices reset. The device reset should quiesce all the +devices so we won't get irqs delivered while we mess around with the +IRQ. The CAS reset and irq re-initialize should also now be under the +same BQL critical section so nothing else should be able to interrupt +it either. + +We also move the spapr_irq_msi_reset() used in one of the legacy irq +modes, since it logically makes sense at the same point as the +spapr_irq_reset() (it's essentially an equivalent operation for older +machine types). Since we don't need to switch between different +interrupt controllers for those old machine types it shouldn't +actually be broken in those cases though. + +Cc: Cédric Le Goater + +Fixes: b2e22477 "spapr: add a 'reset' method to the sPAPR IRQ backend" +Fixes: 13db0cd9 "spapr: introduce a new sPAPR IRQ backend supporting + XIVE and XICS" +Signed-off-by: David Gibson +(cherry picked from commit 25c9780d38d4494f8610371d883865cf40b35dd6) + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index ab64d43..669eae1 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1727,6 +1727,18 @@ static void spapr_machine_reset(MachineState *machine) + } + + /* ++ * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node. ++ * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is ++ * called from vPHB reset handler so we initialize the counter here. ++ * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM ++ * must be equally distant from any other node. ++ * The final value of spapr->gpu_numa_id is going to be written to ++ * max-associativity-domains in spapr_build_fdt(). ++ */ ++ spapr->gpu_numa_id = MAX(1, nb_numa_nodes); ++ qemu_devices_reset(); ++ ++ /* + * If this reset wasn't generated by CAS, we should reset our + * negotiated options and start from scratch + */ +@@ -1742,18 +1754,6 @@ static void spapr_machine_reset(MachineState *machine) + } + + /* +- * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node. +- * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is +- * called from vPHB reset handler so we initialize the counter here. +- * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM +- * must be equally distant from any other node. +- * The final value of spapr->gpu_numa_id is going to be written to +- * max-associativity-domains in spapr_build_fdt(). +- */ +- spapr->gpu_numa_id = MAX(1, nb_numa_nodes); +- qemu_devices_reset(); +- +- /* + * This is fixing some of the default configuration of the XIVE + * devices. To be called after the reset of the machine devices. + */ +-- +1.8.3.1 + diff --git a/kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch b/kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch new file mode 100644 index 0000000..3cc18fb --- /dev/null +++ b/kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch @@ -0,0 +1,135 @@ +From 1a1ee2ef4c4c1dc310929e5d752a64eeb3f5245b Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 19 Aug 2019 08:54:17 +0100 +Subject: [PATCH 5/7] spapr/xive: Fix migration of hot-plugged CPUs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +Message-id: <7cdadb2fe39b9edfc4c281f4efb6f5afa0b1503f.1566204425.git.mrezanin@redhat.com> +Patchwork-id: 90058 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 3/5] spapr/xive: Fix migration of hot-plugged CPUs +Bugzilla: 1733977 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Yash Mankad +RH-Acked-by: Danilo de Paula + +From: Cédric Le Goater + +Bugzilla: 1733977 + +The migration sequence of a guest using the XIVE exploitation mode +relies on the fact that the states of all devices are restored before +the machine is. This is not true for hot-plug devices such as CPUs +which state come after the machine. This breaks migration because the +thread interrupt context registers are not correctly set. + +Fix migration of hotplugged CPUs by restoring their context in the +'post_load' handler of the XiveTCTX model. + +Fixes: 277dd3d7712a ("spapr/xive: add migration support for KVM") +Signed-off-by: Cédric Le Goater +Message-Id: <20190813064853.29310-1-clg@kaod.org> +Signed-off-by: David Gibson +(cherry picked from commit 310cda5b5e9df642b19a0e9c504368ffba3b3ab9) +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/spapr_xive_kvm.c | 19 +++++++++++++++++-- + hw/intc/xive.c | 21 ++++++++++++++++++++- + include/hw/ppc/xive.h | 1 + + 3 files changed, 38 insertions(+), 3 deletions(-) + +diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c +index 3bf8e7a..8898615 100644 +--- a/hw/intc/spapr_xive_kvm.c ++++ b/hw/intc/spapr_xive_kvm.c +@@ -72,11 +72,17 @@ static void kvm_cpu_disable_all(void) + * XIVE Thread Interrupt Management context (KVM) + */ + +-static void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) ++void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) + { ++ SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; + uint64_t state[2]; + int ret; + ++ /* The KVM XIVE device is not in use yet */ ++ if (xive->fd == -1) { ++ return; ++ } ++ + /* word0 and word1 of the OS ring. */ + state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); + +@@ -655,7 +661,16 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) + } + } + +- /* Restore the thread interrupt contexts */ ++ /* ++ * Restore the thread interrupt contexts of initial CPUs. ++ * ++ * The context of hotplugged CPUs is restored later, by the ++ * 'post_load' handler of the XiveTCTX model because they are not ++ * available at the time the SpaprXive 'post_load' method is ++ * called. We can not restore the context of all CPUs in the ++ * 'post_load' handler of XiveTCTX because the machine is not ++ * necessarily connected to the KVM device at that time. ++ */ + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + +diff --git a/hw/intc/xive.c b/hw/intc/xive.c +index cf77bdb..da148e9 100644 +--- a/hw/intc/xive.c ++++ b/hw/intc/xive.c +@@ -615,12 +615,31 @@ static int vmstate_xive_tctx_pre_save(void *opaque) + return 0; + } + ++static int vmstate_xive_tctx_post_load(void *opaque, int version_id) ++{ ++ Error *local_err = NULL; ++ ++ if (kvm_irqchip_in_kernel()) { ++ /* ++ * Required for hotplugged CPU, for which the state comes ++ * after all states of the machine. ++ */ ++ kvmppc_xive_cpu_set_state(XIVE_TCTX(opaque), &local_err); ++ if (local_err) { ++ error_report_err(local_err); ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ + static const VMStateDescription vmstate_xive_tctx = { + .name = TYPE_XIVE_TCTX, + .version_id = 1, + .minimum_version_id = 1, + .pre_save = vmstate_xive_tctx_pre_save, +- .post_load = NULL, /* handled by the sPAPRxive model */ ++ .post_load = vmstate_xive_tctx_post_load, + .fields = (VMStateField[]) { + VMSTATE_BUFFER(regs, XiveTCTX), + VMSTATE_END_OF_LIST() +diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h +index 55c53c7..7363351 100644 +--- a/include/hw/ppc/xive.h ++++ b/include/hw/ppc/xive.h +@@ -438,5 +438,6 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val); + void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); + void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); + void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); ++void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp); + + #endif /* PPC_XIVE_H */ +-- +1.8.3.1 + diff --git a/kvm-spec-Update-seavgabios-dependency.patch b/kvm-spec-Update-seavgabios-dependency.patch new file mode 100644 index 0000000..ee00575 --- /dev/null +++ b/kvm-spec-Update-seavgabios-dependency.patch @@ -0,0 +1,48 @@ +From a6e6495fc1369e1c9f8db872411eea7e0a4f2a67 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Tue, 9 Jul 2019 12:57:07 +0100 +Subject: [PATCH 1/7] spec: Update seavgabios dependency + +RH-Author: Miroslav Rezanina +Message-id: <1562677027-9619-1-git-send-email-mrezanin@redhat.com> +Patchwork-id: 89436 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] spec: Update seavgabios dependency +Bugzilla: 1725664 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Danilo de Paula +RH-Acked-by: Wainer dos Santos Moschetta + +From: Miroslav Rezanina + +Branch: rhel-av-8.1.0/master-4.0.0 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1725664 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=22574916 +Upstream: n/a + +We introduced new vga bios files (vgabios-ramfb.bin and +vgabios-bochs-display.bin) recently. To prevent dangling symlinks, +we bump required seavgabios-bin package to version adding these +bios files. + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + redhat/qemu-kvm.spec.template | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/qemu-kvm.spec b/qemu-kvm.spec +index e6c0980..a43ebc8 100644 +--- a/qemu-kvm.spec ++++ b/qemu-kvm.spec +@@ -244,7 +244,7 @@ Requires: edk2-aarch64 + %endif + + %ifnarch aarch64 s390x +-Requires: seavgabios-bin >= 1.10.2-1 ++Requires: seavgabios-bin >= 1.12.0-3 + Requires: ipxe-roms-qemu >= 20170123-1 + %endif + %ifarch %{power64} +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4bdff57..93b6b7e 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 1%{?dist} +Release: 2%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -120,6 +120,21 @@ Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0017: 0017-usb-xhci-Fix-PCI-capability-order.patch Patch0018: 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0019: 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +# For bz#1741451 - Failed to hot-plug vcpus +Patch21: kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch +# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed +# For bz#1740692 - Backport QEMU 4.1.0 rc5 & ga patches +Patch22: kvm-display-bochs-fix-pcie-support.patch +# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed +Patch23: kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch +# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed +Patch24: kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch +# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed +# For bz#1740692 - Backport QEMU 4.1.0 rc5 & ga patches +Patch25: kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch +# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed +# For bz#1740692 - Backport QEMU 4.1.0 rc5 & ga patches +Patch26: kvm-Update-version-for-v4.1.0-release.patch BuildRequires: wget BuildRequires: rpm-build @@ -259,7 +274,7 @@ Requires: edk2-aarch64 %endif %ifnarch aarch64 s390x -Requires: seavgabios-bin >= 1.10.2-1 +Requires: seavgabios-bin >= 1.12.0-3 Requires: ipxe-roms-qemu >= 20170123-1 %endif %ifarch %{power64} @@ -1061,6 +1076,23 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-2.el8 +- kvm-spec-Update-seavgabios-dependency.patch [bz#1725664] +- kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch [bz#1741451] +- kvm-display-bochs-fix-pcie-support.patch [bz#1733977 bz#1740692] +- kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch [bz#1733977] +- kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch [bz#1733977] +- kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch [bz#1733977 bz#1740692] +- kvm-Update-version-for-v4.1.0-release.patch [bz#1733977 bz#1740692] +- Resolves: bz#1725664 + (Update seabios dependency) +- Resolves: bz#1733977 + (Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed) +- Resolves: bz#1740692 + (Backport QEMU 4.1.0 rc5 & ga patches) +- Resolves: bz#1741451 + (Failed to hot-plug vcpus) + * Wed Aug 14 2019 Miroslav Rezanina - 4.1.0-1.el8 - Rebase to qemu 4.1.0 rc4 [bz#1705235] - Resolves: bz#1705235 From b190fee1347a5e02016a1cfdfdd798efb79b9878 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 19 Aug 2019 22:26:36 +0100 Subject: [PATCH 042/195] Update qemu-kvm with 4.1.0 patches Since qemu-kvm tree is syncronized with 4.1.0 now, it's easier to handle this as 4.1.0 and not as rc4 - Resolves: bz#1740692 --- .gitignore | 1 + 0004-Initial-redhat-build.patch | 16 ++- 0005-Enable-disable-devices-for-RHEL.patch | 114 +++++++-------- ...Machine-type-related-general-changes.patch | 54 +++---- 0007-Add-aarch64-machine-types.patch | 12 +- 0008-Add-ppc64-machine-types.patch | 18 +-- 0009-Add-s390x-machine-types.patch | 10 +- 0010-Add-x86_64-machine-types.patch | 28 ++-- 0011-Enable-make-check.patch | 24 ++-- ...mber-of-devices-that-can-be-assigned.patch | 9 +- ...Add-support-statement-to-help-output.patch | 7 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 10 +- 0015-Add-support-for-simpletrace.patch | 18 +-- ...documentation-instead-of-qemu-system.patch | 21 +-- 0017-usb-xhci-Fix-PCI-capability-order.patch | 7 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 6 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 6 +- ...ke-die-id-mandatory-unless-necessary.patch | 14 +- kvm-Update-version-for-v4.1.0-release.patch | 42 ------ kvm-display-bochs-fix-pcie-support.patch | 86 ----------- ...ake-rules-for-building-sifive_u-bios.patch | 50 ------- ...eset-CAS-IRQ-subsystem-after-devices.patch | 125 ---------------- ...ve-Fix-migration-of-hot-plugged-CPUs.patch | 135 ------------------ kvm-spec-Update-seavgabios-dependency.patch | 48 ------- kvm.modules | 18 +++ qemu-kvm.spec | 20 +-- sources | 2 +- 27 files changed, 222 insertions(+), 679 deletions(-) rename kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch => 0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch (93%) delete mode 100644 kvm-Update-version-for-v4.1.0-release.patch delete mode 100644 kvm-display-bochs-fix-pcie-support.patch delete mode 100644 kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch delete mode 100644 kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch delete mode 100644 kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch delete mode 100644 kvm-spec-Update-seavgabios-dependency.patch create mode 100644 kvm.modules diff --git a/.gitignore b/.gitignore index f08ebba..514e53e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /qemu-3.1.0.tar.xz /qemu-4.0.0.tar.xz /qemu-4.1.0-rc4.tar.xz +/qemu-4.1.0.tar.xz diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch index 882f7ef..ae67911 100644 --- a/0004-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From d7ed59eb82b87992582d05933bbf68107962ba43 Mon Sep 17 00:00:00 2001 +From faeb6cfa3f274d32bcaee546b9fb5f577f895c34 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -63,6 +63,8 @@ Merged patches (4.1.0-rc0): Merged patches (4.1.0-rc3): - e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Signed-off-by: Danilo C. L. de Paula --- .gitignore | 1 + Makefile | 3 +- @@ -71,7 +73,7 @@ Merged patches (4.1.0-rc3): redhat/Makefile | 82 ++ redhat/Makefile.common | 51 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2202 +++++++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2202 +++++++++++++++++++++++++++++ redhat/scripts/process-patches.sh | 7 +- ui/vnc.c | 2 +- 10 files changed, 2382 insertions(+), 8 deletions(-) @@ -81,7 +83,7 @@ Merged patches (4.1.0-rc3): create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index 85862fb..288a5ac 100644 +index 85862fb81a..288a5ac28a 100644 --- a/Makefile +++ b/Makefile @@ -493,6 +493,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM @@ -102,7 +104,7 @@ index 85862fb..288a5ac 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index 714e7fb..4ecc861 100755 +index 714e7fb6a1..4ecc861a43 100755 --- a/configure +++ b/configure @@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then @@ -114,7 +116,7 @@ index 714e7fb..4ecc861 100755 # xen probe diff --git a/os-posix.c b/os-posix.c -index 3ba7df8..ff26068 100644 +index 3ba7df8d75..ff26068d89 100644 --- a/os-posix.c +++ b/os-posix.c @@ -83,7 +83,7 @@ void os_setup_signal_handling(void) @@ -127,7 +129,7 @@ index 3ba7df8..ff26068 100644 char *os_find_datadir(void) { diff --git a/ui/vnc.c b/ui/vnc.c -index 38f92bf..933dc36 100644 +index 38f92bfca3..933dc36db5 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -3976,7 +3976,7 @@ void vnc_display_open(const char *id, Error **errp) @@ -140,5 +142,5 @@ index 38f92bf..933dc36 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -1.8.3.1 +2.20.1 diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch index 6ae6fc7..8fa1c6d 100644 --- a/0005-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 1421c61010f1de5e7381c107963839e17ea5b43a Mon Sep 17 00:00:00 2001 +From 36dad4518633c16a975df51d4d3a475c346b8aed Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -53,22 +53,24 @@ Merged patches (4.1.0-rc0): Merged patches (4.1.0-rc3): - 495a27d x86_64-rh-devices: add missing TPM passthrough - e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Signed-off-by: Danilo C. L. de Paula --- Makefile.objs | 4 +- - default-configs/aarch64-rh-devices.mak | 20 +++++++ - default-configs/aarch64-softmmu.mak | 10 ++-- - default-configs/ppc64-rh-devices.mak | 32 +++++++++++ - default-configs/ppc64-softmmu.mak | 8 ++- - default-configs/rh-virtio.mak | 10 ++++ - default-configs/s390x-rh-devices.mak | 15 +++++ + default-configs/aarch64-rh-devices.mak | 20 +++++ + default-configs/aarch64-softmmu.mak | 10 ++- + default-configs/ppc64-rh-devices.mak | 32 ++++++++ + default-configs/ppc64-softmmu.mak | 8 +- + default-configs/rh-virtio.mak | 10 +++ + default-configs/s390x-rh-devices.mak | 15 ++++ default-configs/s390x-softmmu.mak | 4 +- - default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++++++++++ + default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ default-configs/x86_64-softmmu.mak | 4 +- hw/acpi/ich9.c | 4 +- hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 10 ++++ + hw/block/fdc.c | 10 +++ hw/bt/Makefile.objs | 4 +- - hw/core/Makefile.objs | 9 +-- + hw/core/Makefile.objs | 9 ++- hw/cpu/Makefile.objs | 5 +- hw/display/Makefile.objs | 5 +- hw/display/cirrus_vga.c | 3 + @@ -77,19 +79,19 @@ Merged patches (4.1.0-rc3): hw/isa/Makefile.objs | 2 +- hw/misc/Makefile.objs | 2 +- hw/net/e1000.c | 2 + - hw/pci-host/piix.c | 4 ++ + hw/pci-host/piix.c | 4 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/ccid-card-emulated.c | 2 + hw/vfio/pci-quirks.c | 5 ++ hw/vfio/pci.c | 5 ++ - qemu-options.hx | 7 +-- + qemu-options.hx | 7 +- redhat/qemu-kvm.spec.template | 5 +- target/arm/cpu.c | 4 +- - target/i386/cpu.c | 35 +++++++++--- - target/ppc/cpu-models.c | 10 ++++ + target/i386/cpu.c | 35 +++++++-- + target/ppc/cpu-models.c | 10 +++ target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 +++ - vl.c | 8 ++- + target/s390x/kvm.c | 8 ++ + vl.c | 8 +- 36 files changed, 316 insertions(+), 44 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak @@ -98,7 +100,7 @@ Merged patches (4.1.0-rc3): create mode 100644 default-configs/x86_64-rh-devices.mak diff --git a/Makefile.objs b/Makefile.objs -index 6a143dc..8e96af1 100644 +index 6a143dcd57..8e96af153d 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -65,8 +65,8 @@ common-obj-y += replay/ @@ -114,7 +116,7 @@ index 6a143dc..8e96af1 100644 common-obj-y += vl.o diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 -index 0000000..a1ed641 +index 0000000000..a1ed641174 --- /dev/null +++ b/default-configs/aarch64-rh-devices.mak @@ -0,0 +1,20 @@ @@ -139,7 +141,7 @@ index 0000000..a1ed641 +CONFIG_VIRTIO_PCI=y +CONFIG_XIO3130=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 958b1e0..8f6867d 100644 +index 958b1e08e4..8f6867d48a 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak @@ -1,8 +1,10 @@ @@ -159,7 +161,7 @@ index 958b1e0..8f6867d 100644 +include aarch64-rh-devices.mak diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak new file mode 100644 -index 0000000..35f2106 +index 0000000000..35f2106d06 --- /dev/null +++ b/default-configs/ppc64-rh-devices.mak @@ -0,0 +1,32 @@ @@ -196,7 +198,7 @@ index 0000000..35f2106 +CONFIG_XIVE_SPAPR=y +CONFIG_XIVE_KVM=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index cca5266..fec354f 100644 +index cca52665d9..fec354f327 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -1,10 +1,12 @@ @@ -217,7 +219,7 @@ index cca5266..fec354f 100644 +include ppc64-rh-devices.mak diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak new file mode 100644 -index 0000000..94ede1b +index 0000000000..94ede1b5f6 --- /dev/null +++ b/default-configs/rh-virtio.mak @@ -0,0 +1,10 @@ @@ -233,7 +235,7 @@ index 0000000..94ede1b +CONFIG_VIRTIO_SERIAL=y diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak new file mode 100644 -index 0000000..c3c73fe +index 0000000000..c3c73fe752 --- /dev/null +++ b/default-configs/s390x-rh-devices.mak @@ -0,0 +1,15 @@ @@ -253,7 +255,7 @@ index 0000000..c3c73fe +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index f2287a1..3e2e388 100644 +index f2287a133f..3e2e388e91 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -10,4 +10,6 @@ @@ -266,7 +268,7 @@ index f2287a1..3e2e388 100644 +include s390x-rh-devices.mak diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak new file mode 100644 -index 0000000..6b5d68e +index 0000000000..6b5d68e155 --- /dev/null +++ b/default-configs/x86_64-rh-devices.mak @@ -0,0 +1,100 @@ @@ -371,7 +373,7 @@ index 0000000..6b5d68e +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 64b2ee2..b5de7e5 100644 +index 64b2ee2960..b5de7e5279 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -1,3 +1,5 @@ @@ -382,7 +384,7 @@ index 64b2ee2..b5de7e5 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index e53dfe1..168a713 100644 +index e53dfe1ee3..168a713eff 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -446,8 +446,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) @@ -397,7 +399,7 @@ index e53dfe1..168a713 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index 43ce8d5..13fc950 100644 +index 43ce8d5b19..13fc9502ff 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o @@ -410,7 +412,7 @@ index 43ce8d5..13fc950 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 9b24cb9..440b53b 100644 +index 9b24cb9b85..440b53b60c 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -43,6 +43,8 @@ @@ -438,7 +440,7 @@ index 9b24cb9..440b53b 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); } diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs -index 867a7d2..e678e9e 100644 +index 867a7d2e8a..e678e9ee3c 100644 --- a/hw/bt/Makefile.objs +++ b/hw/bt/Makefile.objs @@ -1,3 +1,3 @@ @@ -448,7 +450,7 @@ index 867a7d2..e678e9e 100644 +#common-obj-y += hci-csr.o diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs -index f8481d9..bab9c2d 100644 +index f8481d959f..bab9c2d443 100644 --- a/hw/core/Makefile.objs +++ b/hw/core/Makefile.objs @@ -17,11 +17,12 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o @@ -469,7 +471,7 @@ index f8481d9..bab9c2d 100644 obj-$(CONFIG_SOFTMMU) += machine-qmp-cmds.o diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs -index 8db9e8a..1601ea9 100644 +index 8db9e8a7b3..1601ea93c7 100644 --- a/hw/cpu/Makefile.objs +++ b/hw/cpu/Makefile.objs @@ -1,5 +1,6 @@ @@ -482,7 +484,7 @@ index 8db9e8a..1601ea9 100644 +common-obj-y += core.o +# cluster.o diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index a64998f..88a60b3 100644 +index a64998fc7b..88a60b36c5 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs @@ -1,8 +1,9 @@ @@ -498,7 +500,7 @@ index a64998f..88a60b3 100644 common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 2e4911a..49c16c8 100644 +index 2e4911a1e3..49c16c8f8b 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -2973,6 +2973,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) @@ -512,7 +514,7 @@ index 2e4911a..49c16c8 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index b97e555..55b30e6 100644 +index b97e555072..55b30e65ae 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -253,7 +253,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) @@ -535,7 +537,7 @@ index b97e555..55b30e6 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index 47a606f..562a9bc 100644 +index 47a606f5e3..562a9bc0a6 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -568,6 +568,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) @@ -548,7 +550,7 @@ index 47a606f..562a9bc 100644 static const TypeInfo i8042_info = { diff --git a/hw/isa/Makefile.objs b/hw/isa/Makefile.objs -index 9e106df..0828964 100644 +index 9e106df186..0828964014 100644 --- a/hw/isa/Makefile.objs +++ b/hw/isa/Makefile.objs @@ -1,5 +1,5 @@ @@ -559,7 +561,7 @@ index 9e106df..0828964 100644 common-obj-$(CONFIG_I82378) += i82378.o common-obj-$(CONFIG_PC87312) += pc87312.o diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs -index e9aab51..17f9422 100644 +index e9aab519a1..17f94225a6 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -9,7 +9,7 @@ common-obj-$(CONFIG_PCI_TESTDEV) += pci-testdev.o @@ -572,7 +574,7 @@ index e9aab51..17f9422 100644 # ARM devices diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a023ceb..15d6c7d 100644 +index a023ceb27c..15d6c7d3be 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1794,6 +1794,7 @@ static const E1000Info e1000_devices[] = { @@ -592,7 +594,7 @@ index a023ceb..15d6c7d 100644 static void e1000_register_types(void) diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c -index d9c70f7..f294fbc 100644 +index d9c70f7ce6..f294fbce6a 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -801,6 +801,7 @@ static const TypeInfo i440fx_info = { @@ -622,7 +624,7 @@ index d9c70f7..f294fbc 100644 type_register_static(&piix3_info); type_register_static(&piix3_xen_info); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 5621fb9..b91a106 100644 +index 5621fb9a3d..b91a106074 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -387,10 +387,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { @@ -639,7 +641,7 @@ index 5621fb9..b91a106 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c -index e20f8ed..0ddc26c 100644 +index e20f8ed293..0ddc26cb6c 100644 --- a/hw/usb/ccid-card-emulated.c +++ b/hw/usb/ccid-card-emulated.c @@ -603,6 +603,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) @@ -652,7 +654,7 @@ index e20f8ed..0ddc26c 100644 static const TypeInfo emulated_card_info = { diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index b35a640..e8e7480 100644 +index b35a640030..e8e7480c51 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1386,6 +1386,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) @@ -675,7 +677,7 @@ index b35a640..e8e7480 100644 /* * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d7a4e18..7c474a9 100644 +index d7a4e1875c..7c474a9d4a 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3180,6 +3180,7 @@ static const TypeInfo vfio_pci_dev_info = { @@ -703,7 +705,7 @@ index d7a4e18..7c474a9 100644 type_init(register_vfio_pci_dev_type) diff --git a/qemu-options.hx b/qemu-options.hx -index 9621e93..6873f9e 100644 +index 9621e934c0..6873f9e674 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2024,11 +2024,6 @@ ETEXI @@ -735,7 +737,7 @@ index 9621e93..6873f9e 100644 #ifdef CONFIG_TPM DEFHEADING(TPM device options:) diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index ec2ab95..7e7ccee 100644 +index ec2ab95dbe..7e7ccee45d 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2702,7 +2702,9 @@ static void arm_cpu_register_types(void) @@ -750,7 +752,7 @@ index ec2ab95..7e7ccee 100644 } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 19751e3..47a1236 100644 +index 19751e37a7..47a1236e9f 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1554,14 +1554,14 @@ static X86CPUDefinition builtin_x86_defs[] = { @@ -776,10 +778,11 @@ index 19751e3..47a1236 100644 .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, -@@ -1791,6 +1791,25 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1790,6 +1790,25 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x80000008, .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", }, - { ++ { + .name = "cpu64-rhel6", + .level = 4, + .vendor = CPUID_VENDOR_AMD, @@ -798,12 +801,11 @@ index 19751e3..47a1236 100644 + .xlevel = 0x8000000A, + .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", + }, -+ { + { .name = "Conroe", .level = 10, - .vendor = CPUID_VENDOR_INTEL, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 086548e..1bbf378 100644 +index 086548e9b9..1bbf378c18 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -873,7 +875,7 @@ index 086548e..1bbf378 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 1d16d7d..47188ed 100644 +index 1d16d7d5e7..47188eddf4 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -887,7 +889,7 @@ index 1d16d7d..47188ed 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 6e814c2..153d092 100644 +index 6e814c230b..153d092d62 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2363,6 +2363,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) @@ -906,7 +908,7 @@ index 6e814c2..153d092 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/vl.c b/vl.c -index b426b32..f9166f5 100644 +index b426b32134..f9166f509b 100644 --- a/vl.c +++ b/vl.c @@ -164,7 +164,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; @@ -963,5 +965,5 @@ index b426b32..f9166f5 100644 if (!xen_enabled()) { /* On 32-bit hosts, QEMU is limited by virtual address space */ -- -1.8.3.1 +2.20.1 diff --git a/0006-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch index 42f26fa..0490225 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 3d84e663576b363fc4cb808752cebd7d9766cdce Mon Sep 17 00:00:00 2001 +From 34edf0da6480f60393083de194d1a04cd2cfe5c7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -33,29 +33,31 @@ Merged patches (4.0.0): Merged patches (4.0.0-rc0): - 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments - f19738e compat: Generic hw_compat_rhel_8_0 + +Signed-off-by: Danilo C. L. de Paula --- - hw/acpi/ich9.c | 16 ++++++ + hw/acpi/ich9.c | 16 +++++ hw/acpi/piix4.c | 6 +- - hw/char/serial.c | 16 ++++++ - hw/core/machine.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++ + hw/char/serial.c | 16 +++++ + hw/core/machine.c | 146 ++++++++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- - hw/net/e1000e.c | 21 +++++++ + hw/net/e1000e.c | 21 ++++++ hw/net/rtl8139.c | 4 +- hw/smbios/smbios.c | 1 + hw/timer/i8254_common.c | 2 +- hw/timer/mc146818rtc.c | 6 ++ hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 +++++++ + hw/usb/hcd-xhci.c | 20 ++++++ hw/usb/hcd-xhci.h | 2 + include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 21 +++++++ + include/hw/boards.h | 21 ++++++ include/hw/usb.h | 4 ++ migration/migration.c | 2 + migration/migration.h | 5 ++ 18 files changed, 274 insertions(+), 7 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 168a713..0a6346f 100644 +index 168a713eff..0a6346f1cf 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -441,6 +441,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -89,7 +91,7 @@ index 168a713..0a6346f 100644 ich9_pm_get_disable_s3, ich9_pm_set_disable_s3, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index ec4e186..0d2c8e4 100644 +index ec4e186cec..0d2c8e4fe3 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -306,7 +306,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) @@ -113,7 +115,7 @@ index ec4e186..0d2c8e4 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), diff --git a/hw/char/serial.c b/hw/char/serial.c -index 7c42a2a..ae63cc0 100644 +index 7c42a2abfc..ae63cc0104 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -30,6 +30,7 @@ @@ -168,7 +170,7 @@ index 7c42a2a..ae63cc0 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 32d1ca9..f30afe0 100644 +index 32d1ca9abc..f30afe0f0b 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -27,6 +27,152 @@ @@ -325,7 +327,7 @@ index 32d1ca9..f30afe0 100644 { "VGA", "edid", "false" }, { "secondary-vga", "edid", "false" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 945952a..db0681e 100644 +index 945952a9b0..db0681e891 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) @@ -338,7 +340,7 @@ index 945952a..db0681e 100644 }; diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index 581f7d0..9a8b1ed 100644 +index 581f7d03d5..9a8b1ed805 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -76,6 +76,11 @@ typedef struct E1000EState { @@ -412,7 +414,7 @@ index 581f7d0..9a8b1ed 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 0927317..0cead11 100644 +index 09273171e5..0cead119ab 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3176,7 +3176,7 @@ static int rtl8139_pre_save(void *opaque) @@ -435,7 +437,7 @@ index 0927317..0cead11 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 7bcd67b..dd0bc2b 100644 +index 7bcd67b098..dd0bc2b977 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -778,6 +778,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, @@ -447,7 +449,7 @@ index 7bcd67b..dd0bc2b 100644 SMBIOS_SET_DEFAULT(type2.product, product); SMBIOS_SET_DEFAULT(type2.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 76ca6ec..e7c87a9 100644 +index 76ca6ec444..e7c87a9c67 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -270,7 +270,7 @@ static const VMStateDescription vmstate_pit_common = { @@ -460,7 +462,7 @@ index 76ca6ec..e7c87a9 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c -index ce4550b..777efe9 100644 +index ce4550b6f2..777efe9819 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -37,6 +37,7 @@ @@ -484,7 +486,7 @@ index ce4550b..777efe9 100644 } diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 98bd5cf..2fd2a9b 100644 +index 98bd5cf49d..2fd2a9bbf0 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1218,12 +1218,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) @@ -504,7 +506,7 @@ index 98bd5cf..2fd2a9b 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 5894a18..9854fae 100644 +index 5894a18663..9854fae583 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3584,9 +3584,27 @@ static const VMStateDescription vmstate_xhci_slot = { @@ -545,7 +547,7 @@ index 5894a18..9854fae 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 2fad4df..f554b67 100644 +index 2fad4df2a7..f554b671e3 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -157,6 +157,8 @@ typedef struct XHCIEvent { @@ -558,7 +560,7 @@ index 2fad4df..f554b67 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 41568d1..1a23ccc 100644 +index 41568d1837..1a23ccc412 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -572,7 +574,7 @@ index 41568d1..1a23ccc 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index a71d1a5..521c625 100644 +index a71d1a53a5..521c6252bf 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -362,4 +362,25 @@ extern const size_t hw_compat_2_2_len; @@ -602,7 +604,7 @@ index a71d1a5..521c625 100644 + #endif diff --git a/include/hw/usb.h b/include/hw/usb.h -index c21f41c..71502b0 100644 +index c21f41c8a9..71502b0bad 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -604,4 +604,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, @@ -615,7 +617,7 @@ index c21f41c..71502b0 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 8a607fe..beffbfd 100644 +index 8a607fe1e2..beffbfd6e1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -116,6 +116,8 @@ enum mig_rp_message_type { @@ -628,7 +630,7 @@ index 8a607fe..beffbfd 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 1fdd7b2..b89e901 100644 +index 1fdd7b21fd..b89e90199c 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -332,6 +332,11 @@ void init_dirty_bitmap_incoming_migration(void); @@ -644,5 +646,5 @@ index 1fdd7b2..b89e901 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -1.8.3.1 +2.20.1 diff --git a/0007-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch index 4195926..578f879 100644 --- a/0007-Add-aarch64-machine-types.patch +++ b/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 47a1c8ecb7b0b3846fe5fcefcbb912cd31d291c7 Mon Sep 17 00:00:00 2001 +From cddb8f9ba14dba3bc5d37241924e84340a2b1f6a Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -22,13 +22,15 @@ Merged patches (4.0.0): Merged patches (4.1.0-rc0): - c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM - 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine + +Signed-off-by: Danilo C. L. de Paula --- - hw/arm/virt.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 144 +++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 11 ++++ 2 files changed, 154 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d9496c9..51fb5f8 100644 +index d9496c9363..51fb5f82b4 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -65,6 +65,7 @@ @@ -215,7 +217,7 @@ index d9496c9..51fb5f8 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 1, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index a720942..1baa48d 100644 +index a72094204e..1baa48d46e 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -137,6 +137,7 @@ typedef struct { @@ -244,5 +246,5 @@ index a720942..1baa48d 100644 /* Return the number of used redistributor regions */ -- -1.8.3.1 +2.20.1 diff --git a/0008-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch index be2cde0..95f4901 100644 --- a/0008-Add-ppc64-machine-types.patch +++ b/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 9c73e7109477fecb0477bd6d53e94080eca30e64 Mon Sep 17 00:00:00 2001 +From 765586a71ea9b01f414e4662d11ea77288976f31 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -25,8 +25,10 @@ Merged patches (4.1.0-rc0): - f21757edc target/ppc/spapr: Enable mitigations by default for pseries-4.0 machine type - 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 - 89f01da redhat: define pseries-rhel8.1.0 machine type + +Signed-off-by: Danilo C. L. de Paula --- - hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 1 + target/ppc/compat.c | 13 ++- @@ -34,7 +36,7 @@ Merged patches (4.1.0-rc0): 5 files changed, 270 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 821f0d4..ab64d43 100644 +index 12ed4b065c..669eae100e 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4382,6 +4382,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) @@ -316,7 +318,7 @@ index 821f0d4..ab64d43 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index b91a106..29a3c7d 100644 +index b91a106074..29a3c7d8ee 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -21,6 +21,7 @@ @@ -354,7 +356,7 @@ index b91a106..29a3c7d 100644 spapr_cpu_reset(cpu); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 60553d3..b0ba32e 100644 +index 60553d32c4..b0ba32e6dd 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -121,6 +121,7 @@ struct SpaprMachineClass { @@ -366,7 +368,7 @@ index 60553d3..b0ba32e 100644 uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7de4bf3..3e2e353 100644 +index 7de4bf3122..3e2e35342d 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -391,7 +393,7 @@ index 7de4bf3..3e2e353 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index c9beba2..76cb7c2 100644 +index c9beba2a5c..76cb7c2e37 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1350,6 +1350,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) @@ -403,5 +405,5 @@ index c9beba2..76cb7c2 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -- -1.8.3.1 +2.20.1 diff --git a/0009-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch index 767272b..3c353ee 100644 --- a/0009-Add-s390x-machine-types.patch +++ b/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From e9de3d4a98a79e820be14e2a752e2ebb20e341d4 Mon Sep 17 00:00:00 2001 +From 2ae1e5940185a169886f2492f97fc98a625da889 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -15,12 +15,14 @@ Merged patches (3.1.0): Merged patches (4.1.0-rc0): - 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 + +Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 67 +++++++++++++++++++++++++++++++++++++++++++++- + hw/s390x/s390-virtio-ccw.c | 67 +++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 5b6a9a4..4d8c2ec 100644 +index 5b6a9a4e55..4d8c2ec22a 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -636,7 +636,7 @@ bool css_migration_enabled(void) @@ -112,5 +114,5 @@ index 5b6a9a4..4d8c2ec 100644 static void ccw_machine_register_types(void) { -- -1.8.3.1 +2.20.1 diff --git a/0010-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch index 0a6f333..23a3ebe 100644 --- a/0010-Add-x86_64-machine-types.patch +++ b/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 83831de11704d0f48ab56fec1f434ae9396e9cc1 Mon Sep 17 00:00:00 2001 +From 728953bda3fcd8bc06026b59b3b86191fb7787c0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -25,11 +25,13 @@ Merged patches (4.1.0-rc0): - 1de7949 kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types - 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) - 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types + +Signed-off-by: Danilo C. L. de Paula --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 200 ++++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 114 +++++++++++++++++++++++++- + hw/i386/pc.c | 226 ++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 200 +++++++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 114 +++++++++++++++++++++- include/hw/boards.h | 2 + include/hw/i386/pc.h | 27 ++++++ target/i386/cpu.c | 9 +- @@ -37,7 +39,7 @@ Merged patches (4.1.0-rc0): 8 files changed, 578 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index f3fdfef..1d64f0b 100644 +index f3fdfefcd5..1d64f0bad2 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -203,6 +203,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) @@ -51,7 +53,7 @@ index f3fdfef..1d64f0b 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 549c437..9ded0db 100644 +index 549c437050..9ded0db80d 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -354,6 +354,224 @@ GlobalProperty pc_compat_1_4[] = { @@ -309,7 +311,7 @@ index 549c437..9ded0db 100644 hc->pre_plug = pc_machine_device_pre_plug_cb; hc->plug = pc_machine_device_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index c2280c7..c86c48c 100644 +index c2280c72ef..c86c48c092 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -51,6 +51,7 @@ @@ -538,7 +540,7 @@ index c2280c7..c86c48c 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 397e1fd..4959ed3 100644 +index 397e1fdd2f..4959ed329f 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) @@ -674,7 +676,7 @@ index 397e1fd..4959ed3 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 521c625..b4a8c4a 100644 +index 521c6252bf..b4a8c4ab10 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -214,6 +214,8 @@ struct MachineClass { @@ -687,7 +689,7 @@ index 521c625..b4a8c4a 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 859b64c..605cc71 100644 +index 859b64c51d..605cc714d3 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -142,6 +142,9 @@ typedef struct PCMachineClass { @@ -732,7 +734,7 @@ index 859b64c..605cc71 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 47a1236..cd71a09 100644 +index 47a1236e9f..cd71a09b33 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1548,11 +1548,17 @@ static CPUCaches epyc_cache_info = { @@ -763,7 +765,7 @@ index 47a1236..cd71a09 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index dbbb137..da5a5ef 100644 +index dbbb13772a..da5a5ef5f3 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -2805,6 +2805,7 @@ static int kvm_get_msrs(X86CPU *cpu) @@ -785,5 +787,5 @@ index dbbb137..da5a5ef 100644 case MSR_KVM_PV_EOI_EN: env->pv_eoi_en_msr = msrs[i].data; -- -1.8.3.1 +2.20.1 diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch index a7a4882..6497683 100644 --- a/0011-Enable-make-check.patch +++ b/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From c1635c2c93f0efe82a6ab1dc973402882882382c Mon Sep 17 00:00:00 2001 +From 70e14e50ec8652a4243111dc293fe09ebcef8de1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -27,6 +27,8 @@ Merged patches (4.1.0-rc0): Conflicts: tests/qemu-iotests/group + +Signed-off-by: Danilo C. L. de Paula --- redhat/qemu-kvm.spec.template | 8 +++++++- tests/Makefile.include | 10 +++++----- @@ -41,7 +43,7 @@ Conflicts: 10 files changed, 39 insertions(+), 17 deletions(-) diff --git a/tests/Makefile.include b/tests/Makefile.include -index fd7fdb8..d3da940 100644 +index fd7fdb8658..d3da940f8c 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -164,7 +164,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) @@ -83,7 +85,7 @@ index fd7fdb8..d3da940 100644 check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) check-qtest-s390x-y += tests/migration-test$(EXESUF) diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index 24852d4..dce5860 100644 +index 24852d4c7d..dce5860d99 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -97,18 +97,22 @@ static testdef_t tests[] = { @@ -111,7 +113,7 @@ index 24852d4..dce5860 100644 { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 668f001..453ca8e 100644 +index 668f00144e..453ca8e583 100644 --- a/tests/cpu-plug-test.c +++ b/tests/cpu-plug-test.c @@ -190,8 +190,8 @@ static void add_pseries_test_case(const char *mname) @@ -126,7 +128,7 @@ index 668f001..453ca8e 100644 } data = g_new(PlugTestData, 1); diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index c387984..c89112d 100644 +index c387984ef6..c89112d6f8 100644 --- a/tests/e1000-test.c +++ b/tests/e1000-test.c @@ -22,9 +22,11 @@ struct QE1000 { @@ -142,7 +144,7 @@ index c387984..c89112d 100644 static void *e1000_get_driver(void *obj, const char *interface) diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 61bc1d1..028d45c 100644 +index 61bc1d1e7b..028d45c7d7 100644 --- a/tests/prom-env-test.c +++ b/tests/prom-env-test.c @@ -88,10 +88,14 @@ int main(int argc, char *argv[]) @@ -161,7 +163,7 @@ index 61bc1d1..028d45c 100644 add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index ce942a5..f64429e 100755 +index ce942a5444..f64429e21f 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -181,11 +181,11 @@ run_qemu -drive if=virtio @@ -195,7 +197,7 @@ index ce942a5..f64429e 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index f13e5f2..813db25 100644 +index f13e5f2e23..813db2565b 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ @@ -217,7 +219,7 @@ index f13e5f2..813db25 100644 101 rw quick 102 rw quick diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index e75b959..6b46b73 100644 +index e75b959950..6b46b73dd0 100644 --- a/tests/test-x86-cpuid-compat.c +++ b/tests/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -237,7 +239,7 @@ index e75b959..6b46b73 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 9eb24b0..465ed26 100644 +index 9eb24b00e4..465ed26dfc 100644 --- a/tests/usb-hcd-xhci-test.c +++ b/tests/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) @@ -267,5 +269,5 @@ index 9eb24b0..465ed26 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -1.8.3.1 +2.20.1 diff --git a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 42b8e91..02030ab 100644 --- a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From ce492d69e627db370aef7907409e88b83e71619b Mon Sep 17 00:00:00 2001 +From 59f231a2ac7d8915f905cec514de580bbbf039c0 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -35,13 +35,14 @@ Merged patches (2.9.0): (cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) (cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) +Signed-off-by: Danilo C. L. de Paula --- hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 7c474a9..bb9f28e 100644 +index 7c474a9d4a..bb9f28ed95 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -41,6 +41,9 @@ @@ -97,7 +98,7 @@ index 7c474a9..bb9f28e 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 834a90d..205954e 100644 +index 834a90d646..205954e9f8 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { @@ -109,5 +110,5 @@ index 834a90d..205954e 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -1.8.3.1 +2.20.1 diff --git a/0013-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch index 421b5b5..b93abbf 100644 --- a/0013-Add-support-statement-to-help-output.patch +++ b/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From c219069712b16be5664bfa6a4fcec4aa261d21c8 Mon Sep 17 00:00:00 2001 +From 9176ac2bc13e273deab798ec95fc2a01f35db43d Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -18,12 +18,13 @@ as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost (cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) (cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) +Signed-off-by: Danilo C. L. de Paula --- vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vl.c b/vl.c -index f9166f5..dd1fadf 100644 +index f9166f509b..dd1fadfe08 100644 --- a/vl.c +++ b/vl.c @@ -1803,9 +1803,17 @@ static void version(void) @@ -53,5 +54,5 @@ index f9166f5..dd1fadf 100644 } -- -1.8.3.1 +2.20.1 diff --git a/0014-globally-limit-the-maximum-number-of-CPUs.patch b/0014-globally-limit-the-maximum-number-of-CPUs.patch index 84fa5b6..136895d 100644 --- a/0014-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0014-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 29def10374e810c82ef7e01bcdac0c0e77b42aec Mon Sep 17 00:00:00 2001 +From 11b7001f3eb0c4de20695821eaee5589c4c3ac32 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -74,13 +74,15 @@ The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. This commit matches the limit to current KVM_CAP_NR_VCPUS value. + +Signed-off-by: Danilo C. L. de Paula --- accel/kvm/kvm-all.c | 12 ++++++++++++ vl.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index f450f25..2d850df 100644 +index f450f25295..2d850df9e0 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1869,6 +1869,18 @@ static int kvm_init(MachineState *ms) @@ -103,7 +105,7 @@ index f450f25..2d850df 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/vl.c b/vl.c -index dd1fadf..7c52255 100644 +index dd1fadfe08..7c52255141 100644 --- a/vl.c +++ b/vl.c @@ -132,6 +132,8 @@ int main(int argc, char **argv) @@ -146,5 +148,5 @@ index dd1fadf..7c52255 100644 configure_rtc(qemu_find_opts_singleton("rtc")); -- -1.8.3.1 +2.20.1 diff --git a/0015-Add-support-for-simpletrace.patch b/0015-Add-support-for-simpletrace.patch index a845db4..6fc5c29 100644 --- a/0015-Add-support-for-simpletrace.patch +++ b/0015-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 1e856ec9fb79314305380aba4c89c5d8987616a9 Mon Sep 17 00:00:00 2001 +From c32f0e6a6369ce5bc5c337b1fa158c43c067ca6f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -21,11 +21,13 @@ Merged patches (2.3.0): - 5292fc3 trace: add SystemTap init scripts for simpletrace bridge - eda9e5e simpletrace: install simpletrace.py - 85c4c8f trace: add systemtap-initscript README file to RPM + +Signed-off-by: Danilo C. L. de Paula --- .gitignore | 2 ++ Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 26 +++++++++++++++++++- + README.systemtap | 43 +++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 26 ++++++++++++++- scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ scripts/systemtap/script.d/qemu_kvm.stp | 1 + 6 files changed, 79 insertions(+), 1 deletion(-) @@ -34,7 +36,7 @@ Merged patches (2.3.0): create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 288a5ac..8caca6c 100644 +index 288a5ac28a..8caca6cfc0 100644 --- a/Makefile +++ b/Makefile @@ -910,6 +910,10 @@ endif @@ -50,7 +52,7 @@ index 288a5ac..8caca6c 100644 ctags: diff --git a/README.systemtap b/README.systemtap new file mode 100644 -index 0000000..ad913fc +index 0000000000..ad913fc990 --- /dev/null +++ b/README.systemtap @@ -0,0 +1,43 @@ @@ -99,7 +101,7 @@ index 0000000..ad913fc + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf new file mode 100644 -index 0000000..372d816 +index 0000000000..372d8160a4 --- /dev/null +++ b/scripts/systemtap/conf.d/qemu_kvm.conf @@ -0,0 +1,4 @@ @@ -109,11 +111,11 @@ index 0000000..372d816 +qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp new file mode 100644 -index 0000000..c04abf9 +index 0000000000..c04abf9449 --- /dev/null +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -- -1.8.3.1 +2.20.1 diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 35095e1..f6a6a9d 100644 --- a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 6653aed7b06fd81e60633f7ff81d2d2cd35777fd Mon Sep 17 00:00:00 2001 +From 70a20c03876ba38ba94a5e8e6e1d848e60ef42eb Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 30 Nov 2018 09:11:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -16,15 +16,16 @@ We change the name and location of qemu-kvm binaries. Update documentation to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula --- - docs/qemu-block-drivers.texi | 70 +++++++++++----------- - docs/qemu-cpu-models.texi | 8 +-- - qemu-doc.texi | 70 +++++++++++----------- - qemu-options.hx | 140 ++++++++++++++++++++++--------------------- + docs/qemu-block-drivers.texi | 70 +++++++++--------- + docs/qemu-cpu-models.texi | 8 +- + qemu-doc.texi | 70 +++++++++--------- + qemu-options.hx | 140 ++++++++++++++++++----------------- 4 files changed, 146 insertions(+), 142 deletions(-) diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi -index 91ab0ec..35cc3d8 100644 +index 91ab0eceae..35cc3d838c 100644 --- a/docs/qemu-block-drivers.texi +++ b/docs/qemu-block-drivers.texi @@ -405,7 +405,7 @@ QEMU can automatically create a virtual FAT disk image from a @@ -235,7 +236,7 @@ index 91ab0ec..35cc3d8 100644 @var{host}:@var{bus}:@var{slot}.@var{func} is the NVMe controller's PCI device diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -index ad040cf..0de3229 100644 +index ad040cfc98..0de3229e43 100644 --- a/docs/qemu-cpu-models.texi +++ b/docs/qemu-cpu-models.texi @@ -578,25 +578,25 @@ CPU models / features in QEMU and libvirt @@ -269,7 +270,7 @@ index ad040cf..0de3229 100644 @end table diff --git a/qemu-doc.texi b/qemu-doc.texi -index 577d1e8..44427bb 100644 +index 577d1e8376..44427bb0e1 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -207,12 +207,12 @@ Note that, by default, GUS shares IRQ(7) with parallel ports and so @@ -561,7 +562,7 @@ index 577d1e8..44427bb 100644 -prom-env 'boot-args=conf=hd:2,\yaboot.conf' @end example diff --git a/qemu-options.hx b/qemu-options.hx -index 6873f9e..9f323ec 100644 +index 6873f9e674..9f323ec2a2 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -254,7 +254,7 @@ This option defines a free-form string that can be used to describe @var{fd}. @@ -953,5 +954,5 @@ index 6873f9e..9f323ec 100644 -chardev socket,id=chardev0,path=/path/to/socket \ -object cryptodev-vhost-user,id=cryptodev0,chardev=chardev0 \ -- -1.8.3.1 +2.20.1 diff --git a/0017-usb-xhci-Fix-PCI-capability-order.patch b/0017-usb-xhci-Fix-PCI-capability-order.patch index eea2b60..a74cc82 100644 --- a/0017-usb-xhci-Fix-PCI-capability-order.patch +++ b/0017-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From 3d4a21ffdf876acfd5a8546a2f3e9ab2bfcf2ff9 Mon Sep 17 00:00:00 2001 +From 4ebea24b67b02abb2fbbd67c3334496315d75b6f Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -56,12 +56,13 @@ Conflicts: (cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) (cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) +Signed-off-by: Danilo C. L. de Paula --- hw/usb/hcd-xhci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 9854fae..adfacac 100644 +index 9854fae583..adfacace65 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3397,6 +3397,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) @@ -91,5 +92,5 @@ index 9854fae..adfacac 100644 /* TODO check for errors, and should fail when msix=on */ msix_init(dev, xhci->numintrs, -- -1.8.3.1 +2.20.1 diff --git a/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 585ef69..3b3ae2c 100644 --- a/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 5ba84e6e9b843cc224735e293da35fc218da27fb Mon Sep 17 00:00:00 2001 +From 1a030226ff750613c7f567150f6bf97b902b0151 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,7 +45,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 8b9e5e2..9df5494 100644 +index 8b9e5e2b49..9df5494398 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -805,6 +805,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -65,5 +65,5 @@ index 8b9e5e2..9df5494 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -1.8.3.1 +2.20.1 diff --git a/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index b932c83..a0260c2 100644 --- a/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 4653a397c083e29b2a54ade8b17c977f224a3b80 Mon Sep 17 00:00:00 2001 +From e0daf3e497f1f82a0d8c45a9d26e9982a6f866ac Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,7 +32,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index bbb001f..40cd9dc 100644 +index bbb001f84a..40cd9dcde6 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -343,12 +343,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, @@ -56,5 +56,5 @@ index bbb001f..40cd9dc 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -1.8.3.1 +2.20.1 diff --git a/kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch b/0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch similarity index 93% rename from kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch rename to 0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch index fa8a1bb..b527a07 100644 --- a/kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch +++ b/0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch @@ -1,7 +1,7 @@ -From 7909ad1654df63be6321af36f1ef436a990ab6df Mon Sep 17 00:00:00 2001 +From 7d5c2ef35d0bd7eb90fac2f40225bcfb4a46421d Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 16 Aug 2019 17:16:33 +0100 -Subject: [PATCH 2/7] pc: Don't make die-id mandatory unless necessary +Subject: pc: Don't make die-id mandatory unless necessary RH-Author: Eduardo Habkost Message-id: <20190816171633.26797-2-ehabkost@redhat.com> @@ -45,13 +45,13 @@ Changes v1 -> v2: Signed-off-by: Danilo C. L. de Paula --- - hw/i386/pc.c | 8 ++++++++ - tests/acceptance/pc_cpu_hotplug_props.py | 35 ++++++++++++++++++++++++++++++++ + hw/i386/pc.c | 8 ++++++ + tests/acceptance/pc_cpu_hotplug_props.py | 35 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 tests/acceptance/pc_cpu_hotplug_props.py diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 9ded0db..b3d2d1e 100644 +index 9ded0db80d..b3d2d1e88a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2622,6 +2622,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, @@ -71,7 +71,7 @@ index 9ded0db..b3d2d1e 100644 return; diff --git a/tests/acceptance/pc_cpu_hotplug_props.py b/tests/acceptance/pc_cpu_hotplug_props.py new file mode 100644 -index 0000000..08b7e63 +index 0000000000..08b7e632c6 --- /dev/null +++ b/tests/acceptance/pc_cpu_hotplug_props.py @@ -0,0 +1,35 @@ @@ -111,5 +111,5 @@ index 0000000..08b7e63 + self.vm.launch() + self.assertEquals(len(self.vm.command('query-cpus')), 2) -- -1.8.3.1 +2.20.1 diff --git a/kvm-Update-version-for-v4.1.0-release.patch b/kvm-Update-version-for-v4.1.0-release.patch deleted file mode 100644 index dcd36bc..0000000 --- a/kvm-Update-version-for-v4.1.0-release.patch +++ /dev/null @@ -1,42 +0,0 @@ -From cf5e1c094d045cbbb9fd58fd9ec9c8fe8342eb4f Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 19 Aug 2019 08:54:19 +0100 -Subject: [PATCH 7/7] Update version for v4.1.0 release -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -Message-id: <7fdfb1e847814fc97d2ef85172e75e2af70b5cae.1566204425.git.mrezanin@redhat.com> -Patchwork-id: 90055 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 5/5] Update version for v4.1.0 release -Bugzilla: 1733977 1740692 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Yash Mankad -RH-Acked-by: Danilo de Paula - -From: Peter Maydell - -Signed-off-by: Peter Maydell -(cherry picked from commit 9e06029aea3b2eca1d5261352e695edc1e7d7b8b) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - VERSION - we do not backport rc5 version change - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - VERSION | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/VERSION b/VERSION -index 8048f65..ee74734 100644 ---- a/VERSION -+++ b/VERSION -@@ -1 +1 @@ --4.0.94 -+4.1.0 --- -1.8.3.1 - diff --git a/kvm-display-bochs-fix-pcie-support.patch b/kvm-display-bochs-fix-pcie-support.patch deleted file mode 100644 index dfdf84b..0000000 --- a/kvm-display-bochs-fix-pcie-support.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 8ba44c5854ab6e6feac8bc979f07ae241825673d Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 19 Aug 2019 08:54:15 +0100 -Subject: [PATCH 3/7] display/bochs: fix pcie support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -Message-id: -Patchwork-id: 90060 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/5] display/bochs: fix pcie support -Bugzilla: 1733977 1740692 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Yash Mankad -RH-Acked-by: Danilo de Paula - -From: Gerd Hoffmann - -Set QEMU_PCI_CAP_EXPRESS unconditionally in init(), then clear it in -realize() in case the device is not connected to a PCIe bus. - -This makes sure the pci config space allocation is big enough, so -accessing the PCIe extended config space doesn't overflow the pci -config space buffer. - -PCI(e) config space is guest writable. Writes are limited by -write mask (which probably is also filled with random stuff), -so the guest can only flip enabled bits. But I suspect it -still might be exploitable, so rather serious because it might -be a host escape for the guest. On the other hand the device -is probably not yet in widespread use. - -(For a QEMU version without this commit, a mitigation for the -bug is available: use "-device bochs-display" as a conventional pci -device only.) - -Cc: qemu-stable@nongnu.org -Signed-off-by: Gerd Hoffmann -Message-id: 20190812065221.20907-2-kraxel@redhat.com -Reviewed-by: Alex Williamson -Reviewed-by: Paolo Bonzini -Signed-off-by: Peter Maydell -(cherry picked from commit 5e7bcdcfe69ce0fad66012b2cfb2035003c37eef) - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/display/bochs-display.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c -index 582133d..8e83b51 100644 ---- a/hw/display/bochs-display.c -+++ b/hw/display/bochs-display.c -@@ -297,9 +297,10 @@ static void bochs_display_realize(PCIDevice *dev, Error **errp) - } - - if (pci_bus_is_express(pci_get_bus(dev))) { -- dev->cap_present |= QEMU_PCI_CAP_EXPRESS; - ret = pcie_endpoint_cap_init(dev, 0x80); - assert(ret > 0); -+ } else { -+ dev->cap_present &= ~QEMU_PCI_CAP_EXPRESS; - } - - memory_region_set_log(&s->vram, true, DIRTY_MEMORY_VGA); -@@ -322,11 +323,15 @@ static void bochs_display_set_big_endian_fb(Object *obj, bool value, - - static void bochs_display_init(Object *obj) - { -+ PCIDevice *dev = PCI_DEVICE(obj); -+ - /* Expose framebuffer byteorder via QOM */ - object_property_add_bool(obj, "big-endian-framebuffer", - bochs_display_get_big_endian_fb, - bochs_display_set_big_endian_fb, - NULL); -+ -+ dev->cap_present |= QEMU_PCI_CAP_EXPRESS; - } - - static void bochs_display_exit(PCIDevice *dev) --- -1.8.3.1 - diff --git a/kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch b/kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch deleted file mode 100644 index c3d3a29..0000000 --- a/kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 21412b5480ec8432fb47fad1a460512424d53dce Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 19 Aug 2019 08:54:18 +0100 -Subject: [PATCH 6/7] riscv: roms: Fix make rules for building sifive_u bios -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -Message-id: <87c86632af83319c3c4a7ab01f2cf6de7e455ef6.1566204425.git.mrezanin@redhat.com> -Patchwork-id: 90059 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 4/5] riscv: roms: Fix make rules for building sifive_u bios -Bugzilla: 1733977 1740692 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Yash Mankad -RH-Acked-by: Danilo de Paula - -From: Bin Meng - -Currently the make rules are wrongly using qemu/virt opensbi image -for sifive_u machine. Correct it. - -Signed-off-by: Bin Meng -Reviewed-by: Chih-Min Chao -Reviewed-by: Alistair Francis -Message-id: 1564812484-20385-1-git-send-email-bmeng.cn@gmail.com -Signed-off-by: Peter Maydell -(cherry picked from commit 02db1be1d074d75ad1c9bd1e9681f1b0b6299cee) -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - roms/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/roms/Makefile b/roms/Makefile -index dc70fb5..775c963 100644 ---- a/roms/Makefile -+++ b/roms/Makefile -@@ -183,7 +183,7 @@ opensbi64-sifive_u: - $(MAKE) -C opensbi \ - CROSS_COMPILE=$(riscv64_cross_prefix) \ - PLATFORM="qemu/sifive_u" -- cp opensbi/build/platform/qemu/virt/firmware/fw_jump.bin ../pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin -+ cp opensbi/build/platform/qemu/sifive_u/firmware/fw_jump.bin ../pc-bios/opensbi-riscv64-sifive_u-fw_jump.bin - - clean: - rm -rf seabios/.config seabios/out seabios/builds --- -1.8.3.1 - diff --git a/kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch b/kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch deleted file mode 100644 index 4842a54..0000000 --- a/kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 2edb7c1181fb69e410ffc688986a12d36899f976 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 19 Aug 2019 08:54:16 +0100 -Subject: [PATCH 4/7] spapr: Reset CAS & IRQ subsystem after devices -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -Message-id: <9b7c319c271fa2c8cda410e87aef985d8c180049.1566204425.git.mrezanin@redhat.com> -Patchwork-id: 90057 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 2/5] spapr: Reset CAS & IRQ subsystem after devices -Bugzilla: 1733977 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Yash Mankad -RH-Acked-by: Danilo de Paula - -From: David Gibson - -Bugzilla: 1733977 - -This fixes a nasty regression in qemu-4.1 for the 'pseries' machine, -caused by the new "dual" interrupt controller model. Specifically, -qemu can crash when used with KVM if a 'system_reset' is requested -while there's active I/O in the guest. - -The problem is that in spapr_machine_reset() we: - -1. Reset the CAS vector state - spapr_ovec_cleanup(spapr->ov5_cas); - -2. Reset all devices - qemu_devices_reset() - -3. Reset the irq subsystem - spapr_irq_reset(); - -However (1) implicitly changes the interrupt delivery mode, because -whether we're using XICS or XIVE depends on the CAS state. We don't -properly initialize the new irq mode until (3) though - in particular -setting up the KVM devices. - -During (2), we can temporarily drop the BQL allowing some irqs to be -delivered which will go to an irq system that's not properly set up. - -Specifically, if the previous guest was in (KVM) XIVE mode, the CAS -reset will put us back in XICS mode. kvm_kernel_irqchip() still -returns true, because XIVE was using KVM, however XICs doesn't have -its KVM components intialized and kernel_xics_fd == -1. When the irq -is delivered it goes via ics_kvm_set_irq() which assert()s that -kernel_xics_fd != -1. - -This change addresses the problem by delaying the CAS reset until -after the devices reset. The device reset should quiesce all the -devices so we won't get irqs delivered while we mess around with the -IRQ. The CAS reset and irq re-initialize should also now be under the -same BQL critical section so nothing else should be able to interrupt -it either. - -We also move the spapr_irq_msi_reset() used in one of the legacy irq -modes, since it logically makes sense at the same point as the -spapr_irq_reset() (it's essentially an equivalent operation for older -machine types). Since we don't need to switch between different -interrupt controllers for those old machine types it shouldn't -actually be broken in those cases though. - -Cc: Cédric Le Goater - -Fixes: b2e22477 "spapr: add a 'reset' method to the sPAPR IRQ backend" -Fixes: 13db0cd9 "spapr: introduce a new sPAPR IRQ backend supporting - XIVE and XICS" -Signed-off-by: David Gibson -(cherry picked from commit 25c9780d38d4494f8610371d883865cf40b35dd6) - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index ab64d43..669eae1 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -1727,6 +1727,18 @@ static void spapr_machine_reset(MachineState *machine) - } - - /* -+ * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node. -+ * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is -+ * called from vPHB reset handler so we initialize the counter here. -+ * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM -+ * must be equally distant from any other node. -+ * The final value of spapr->gpu_numa_id is going to be written to -+ * max-associativity-domains in spapr_build_fdt(). -+ */ -+ spapr->gpu_numa_id = MAX(1, nb_numa_nodes); -+ qemu_devices_reset(); -+ -+ /* - * If this reset wasn't generated by CAS, we should reset our - * negotiated options and start from scratch - */ -@@ -1742,18 +1754,6 @@ static void spapr_machine_reset(MachineState *machine) - } - - /* -- * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node. -- * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is -- * called from vPHB reset handler so we initialize the counter here. -- * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM -- * must be equally distant from any other node. -- * The final value of spapr->gpu_numa_id is going to be written to -- * max-associativity-domains in spapr_build_fdt(). -- */ -- spapr->gpu_numa_id = MAX(1, nb_numa_nodes); -- qemu_devices_reset(); -- -- /* - * This is fixing some of the default configuration of the XIVE - * devices. To be called after the reset of the machine devices. - */ --- -1.8.3.1 - diff --git a/kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch b/kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch deleted file mode 100644 index 3cc18fb..0000000 --- a/kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch +++ /dev/null @@ -1,135 +0,0 @@ -From 1a1ee2ef4c4c1dc310929e5d752a64eeb3f5245b Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 19 Aug 2019 08:54:17 +0100 -Subject: [PATCH 5/7] spapr/xive: Fix migration of hot-plugged CPUs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -Message-id: <7cdadb2fe39b9edfc4c281f4efb6f5afa0b1503f.1566204425.git.mrezanin@redhat.com> -Patchwork-id: 90058 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 3/5] spapr/xive: Fix migration of hot-plugged CPUs -Bugzilla: 1733977 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Yash Mankad -RH-Acked-by: Danilo de Paula - -From: Cédric Le Goater - -Bugzilla: 1733977 - -The migration sequence of a guest using the XIVE exploitation mode -relies on the fact that the states of all devices are restored before -the machine is. This is not true for hot-plug devices such as CPUs -which state come after the machine. This breaks migration because the -thread interrupt context registers are not correctly set. - -Fix migration of hotplugged CPUs by restoring their context in the -'post_load' handler of the XiveTCTX model. - -Fixes: 277dd3d7712a ("spapr/xive: add migration support for KVM") -Signed-off-by: Cédric Le Goater -Message-Id: <20190813064853.29310-1-clg@kaod.org> -Signed-off-by: David Gibson -(cherry picked from commit 310cda5b5e9df642b19a0e9c504368ffba3b3ab9) -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/spapr_xive_kvm.c | 19 +++++++++++++++++-- - hw/intc/xive.c | 21 ++++++++++++++++++++- - include/hw/ppc/xive.h | 1 + - 3 files changed, 38 insertions(+), 3 deletions(-) - -diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c -index 3bf8e7a..8898615 100644 ---- a/hw/intc/spapr_xive_kvm.c -+++ b/hw/intc/spapr_xive_kvm.c -@@ -72,11 +72,17 @@ static void kvm_cpu_disable_all(void) - * XIVE Thread Interrupt Management context (KVM) - */ - --static void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) -+void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) - { -+ SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; - uint64_t state[2]; - int ret; - -+ /* The KVM XIVE device is not in use yet */ -+ if (xive->fd == -1) { -+ return; -+ } -+ - /* word0 and word1 of the OS ring. */ - state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); - -@@ -655,7 +661,16 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) - } - } - -- /* Restore the thread interrupt contexts */ -+ /* -+ * Restore the thread interrupt contexts of initial CPUs. -+ * -+ * The context of hotplugged CPUs is restored later, by the -+ * 'post_load' handler of the XiveTCTX model because they are not -+ * available at the time the SpaprXive 'post_load' method is -+ * called. We can not restore the context of all CPUs in the -+ * 'post_load' handler of XiveTCTX because the machine is not -+ * necessarily connected to the KVM device at that time. -+ */ - CPU_FOREACH(cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - -diff --git a/hw/intc/xive.c b/hw/intc/xive.c -index cf77bdb..da148e9 100644 ---- a/hw/intc/xive.c -+++ b/hw/intc/xive.c -@@ -615,12 +615,31 @@ static int vmstate_xive_tctx_pre_save(void *opaque) - return 0; - } - -+static int vmstate_xive_tctx_post_load(void *opaque, int version_id) -+{ -+ Error *local_err = NULL; -+ -+ if (kvm_irqchip_in_kernel()) { -+ /* -+ * Required for hotplugged CPU, for which the state comes -+ * after all states of the machine. -+ */ -+ kvmppc_xive_cpu_set_state(XIVE_TCTX(opaque), &local_err); -+ if (local_err) { -+ error_report_err(local_err); -+ return -1; -+ } -+ } -+ -+ return 0; -+} -+ - static const VMStateDescription vmstate_xive_tctx = { - .name = TYPE_XIVE_TCTX, - .version_id = 1, - .minimum_version_id = 1, - .pre_save = vmstate_xive_tctx_pre_save, -- .post_load = NULL, /* handled by the sPAPRxive model */ -+ .post_load = vmstate_xive_tctx_post_load, - .fields = (VMStateField[]) { - VMSTATE_BUFFER(regs, XiveTCTX), - VMSTATE_END_OF_LIST() -diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h -index 55c53c7..7363351 100644 ---- a/include/hw/ppc/xive.h -+++ b/include/hw/ppc/xive.h -@@ -438,5 +438,6 @@ void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val); - void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp); - void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp); - void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp); -+void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp); - - #endif /* PPC_XIVE_H */ --- -1.8.3.1 - diff --git a/kvm-spec-Update-seavgabios-dependency.patch b/kvm-spec-Update-seavgabios-dependency.patch deleted file mode 100644 index ee00575..0000000 --- a/kvm-spec-Update-seavgabios-dependency.patch +++ /dev/null @@ -1,48 +0,0 @@ -From a6e6495fc1369e1c9f8db872411eea7e0a4f2a67 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Tue, 9 Jul 2019 12:57:07 +0100 -Subject: [PATCH 1/7] spec: Update seavgabios dependency - -RH-Author: Miroslav Rezanina -Message-id: <1562677027-9619-1-git-send-email-mrezanin@redhat.com> -Patchwork-id: 89436 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] spec: Update seavgabios dependency -Bugzilla: 1725664 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Danilo de Paula -RH-Acked-by: Wainer dos Santos Moschetta - -From: Miroslav Rezanina - -Branch: rhel-av-8.1.0/master-4.0.0 -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1725664 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=22574916 -Upstream: n/a - -We introduced new vga bios files (vgabios-ramfb.bin and -vgabios-bochs-display.bin) recently. To prevent dangling symlinks, -we bump required seavgabios-bin package to version adding these -bios files. - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - redhat/qemu-kvm.spec.template | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/qemu-kvm.spec b/qemu-kvm.spec -index e6c0980..a43ebc8 100644 ---- a/qemu-kvm.spec -+++ b/qemu-kvm.spec -@@ -244,7 +244,7 @@ Requires: edk2-aarch64 - %endif - - %ifnarch aarch64 s390x --Requires: seavgabios-bin >= 1.10.2-1 -+Requires: seavgabios-bin >= 1.12.0-3 - Requires: ipxe-roms-qemu >= 20170123-1 - %endif - %ifarch %{power64} --- -1.8.3.1 - diff --git a/kvm.modules b/kvm.modules new file mode 100644 index 0000000..b9d9646 --- /dev/null +++ b/kvm.modules @@ -0,0 +1,18 @@ +#!/bin/sh + +case $(uname -m) in + ppc64) + grep OPAL /proc/cpuinfo >/dev/null 2>&1 && opal=1 + + modprobe -b kvm >/dev/null 2>&1 + modprobe -b kvm-pr >/dev/null 2>&1 && kvm=1 + if [ "$opal" ]; then + modprobe -b kvm-hv >/dev/null 2>&1 + fi + ;; + s390x) + modprobe -b kvm >/dev/null 2>&1 && kvm=1 + ;; +esac + +exit 0 diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 93b6b7e..2af9ec5 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -76,7 +76,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-4.1.0-rc4.tar.xz +Source0: http://wiki.qemu.org/download/qemu-4.1.0.tar.xz # KSM control scripts Source4: ksm.service @@ -120,21 +120,7 @@ Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0017: 0017-usb-xhci-Fix-PCI-capability-order.patch Patch0018: 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0019: 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -# For bz#1741451 - Failed to hot-plug vcpus -Patch21: kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch -# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed -# For bz#1740692 - Backport QEMU 4.1.0 rc5 & ga patches -Patch22: kvm-display-bochs-fix-pcie-support.patch -# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed -Patch23: kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch -# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed -Patch24: kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch -# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed -# For bz#1740692 - Backport QEMU 4.1.0 rc5 & ga patches -Patch25: kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch -# For bz#1733977 - Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed -# For bz#1740692 - Backport QEMU 4.1.0 rc5 & ga patches -Patch26: kvm-Update-version-for-v4.1.0-release.patch +Patch0020: 0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch BuildRequires: wget BuildRequires: rpm-build @@ -419,7 +405,7 @@ the Secure Shell (SSH) protocol. %prep -%setup -n qemu-%{version}-rc4 +%setup -n qemu-%{version} %autopatch -p1 %build diff --git a/sources b/sources index 3c58d7d..b509b83 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-4.1.0-rc4.tar.xz) = 349e1e5281fa62201bfd332b1e8f628488302eeac0211a3464551a3ac0f3212e6e816d2e046f07e5fa84ec6f86954615c6215f722af995c0a0cb07c2190ddba9 +SHA512 (qemu-4.1.0.tar.xz) = 82fd51702a7b9b1b00b2f1bd3b4a832b80249018dbba1add0b0a73e7d4bee452afd45574b4d8df7ce4477d8711f3bda4ca072a1a6de25895c93eb21cf78fc4b2 From 4312e9a49588f468055903440551082cb702ab6e Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 19 Aug 2019 23:03:56 +0100 Subject: [PATCH 043/195] * Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-3.el8 - kvm-x86-machine-types-pc_rhel_8_0_compat.patch [bz#1719649] - kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch [bz#1719649] - kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch [bz#1719649] - kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch [bz#1719649] - kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch [bz#1719649] - kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1719649] - Resolves: bz#1719649 (8.1 machine type for x86) --- ...date-hw_compat_rhel_8_0-from-hw_comp.patch | 59 ++++++++++ ...ble-legacy-disable-modern-compat-pro.patch | 48 ++++++++ ...e-types-Fixup-dynamic-sysbus-entries.patch | 60 ++++++++++ ...6-machine-types-add-pc-q35-rhel8.1.0.patch | 103 ++++++++++++++++++ ...x86-machine-types-pc_rhel_8_0_compat.patch | 88 +++++++++++++++ ...ypes-q35-Fixup-units_per_default_bus.patch | 57 ++++++++++ qemu-kvm.spec | 24 +++- 7 files changed, 438 insertions(+), 1 deletion(-) create mode 100644 kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch create mode 100644 kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch create mode 100644 kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch create mode 100644 kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch create mode 100644 kvm-x86-machine-types-pc_rhel_8_0_compat.patch create mode 100644 kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch diff --git a/kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch b/kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch new file mode 100644 index 0000000..b6d3807 --- /dev/null +++ b/kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch @@ -0,0 +1,59 @@ +From 9f2bfaaa73a8fb2790ebc4fb16f5d3a629b39898 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 31 Jul 2019 15:08:14 +0100 +Subject: [PATCH 5/6] machine types: Update hw_compat_rhel_8_0 from + hw_compat_4_0 + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190731150814.29571-6-dgilbert@redhat.com> +Patchwork-id: 89817 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 5/5] machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 +Bugzilla: 1719649 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Cornelia Huck + +From: "Dr. David Alan Gilbert" + +Pull in the 4_0 compat entries into hw_compat_rhel_8_0 + +We don't need pl031's migrate-tick-offset because it's an ARM only +device and we don't have compatible types for Aarch yet. + +We don't have to copy virtio-balloon-device.qemu-4-0-config-size=false from +hw_compat_3_1[], since it immediately got reverted in hw_compat_4_0[]. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index f30afe0..e2f812a 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -54,6 +54,20 @@ GlobalProperty hw_compat_rhel_8_0[] = { + { "virtio-blk-device", "discard", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_3_1 */ + { "virtio-blk-device", "write-zeroes", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "VGA", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "secondary-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "bochs-display", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-gpu-pci", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-device", "use-started", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ ++ { "pcie-root-port-base", "disable-acs", "true" }, + }; + const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); + +-- +1.8.3.1 + diff --git a/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch b/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch new file mode 100644 index 0000000..b59bdfe --- /dev/null +++ b/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch @@ -0,0 +1,48 @@ +From ca4a5e85de406a495512d544c3b2187ac3654e97 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 1 Aug 2019 10:26:47 +0100 +Subject: [PATCH 6/6] virtio: Make disable-legacy/disable-modern compat + properties optional + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190801102647.14173-1-dgilbert@redhat.com> +Patchwork-id: 89849 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 6/5] virtio: Make disable-legacy/disable-modern compat properties optional +Bugzilla: 1719649 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Markus Armbruster +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost + +From: "Dr. David Alan Gilbert" + +Upstream 53921bfdce3 by Eduardo made some hw_compat_2_6 entries optional +to fix a bug where non-transitional devices failed to be created on +old upstream machine types. Do the same fix to our old downstream +machines. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index e2f812a..c796b54 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -152,8 +152,9 @@ GlobalProperty hw_compat_rhel_7_2[] = { + { "fw_cfg_mem", "dma_enabled", "off" }, + { "fw_cfg_io", "dma_enabled", "off" }, + { "isa-fdc", "fallback", "144" }, +- { "virtio-pci", "disable-modern", "on" }, +- { "virtio-pci", "disable-legacy", "off" }, ++ /* Optional because not all virtio-pci devices support legacy mode */ ++ { "virtio-pci", "disable-modern", "on", .optional = true }, ++ { "virtio-pci", "disable-legacy", "off", .optional = true }, + { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, + { "virtio-pci", "page-per-vq", "on" }, + /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ +-- +1.8.3.1 + diff --git a/kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch b/kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch new file mode 100644 index 0000000..0ac47c3 --- /dev/null +++ b/kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch @@ -0,0 +1,60 @@ +From 6df1559af7cd65e3faf7c61a2bb8f02667767ad6 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 31 Jul 2019 15:08:12 +0100 +Subject: [PATCH 3/6] x86 machine types: Fixup dynamic sysbus entries + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190731150814.29571-4-dgilbert@redhat.com> +Patchwork-id: 89814 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 3/5] x86 machine types: Fixup dynamic sysbus entries +Bugzilla: 1719649 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Cornelia Huck + +From: "Dr. David Alan Gilbert" + +We're missing a couple of upstream changes, add them for +consistency: + + v2.11.0-824-gef18310d54 Shouldn't have any effect + v2.12.0-1411-g94692dcd71 Should allow us to use RAMFB if we enable + it + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 4 +++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index c86c48c..3b9ba95 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1037,6 +1037,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + pcmc->default_nic_model = "e1000"; + m->default_display = "std"; + m->no_parallel = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; + m->is_default = 1; +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 068813d..edf8e54 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -554,7 +554,9 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + m->default_display = "std"; + m->no_floppy = 1; + m->no_parallel = 1; +- machine_class_allow_dynamic_sysbus_dev(m, TYPE_SYS_BUS_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + m->alias = "q35"; + m->max_cpus = 384; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +-- +1.8.3.1 + diff --git a/kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch b/kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch new file mode 100644 index 0000000..73eb148 --- /dev/null +++ b/kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch @@ -0,0 +1,103 @@ +From 0784125ba3ccd72a590d210cf3f52d80e96b4263 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 31 Jul 2019 15:08:13 +0100 +Subject: [PATCH 4/6] x86 machine types: add pc-q35-rhel8.1.0 + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190731150814.29571-5-dgilbert@redhat.com> +Patchwork-id: 89813 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 4/5] x86 machine types: add pc-q35-rhel8.1.0 +Bugzilla: 1719649 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Cornelia Huck + +From: "Dr. David Alan Gilbert" + +Create the 8.1.0 machine type for q35 and update the _options +functions to keep compatibility. + +Note: + We don't have to copy the kernel_irqchip_split from 4_0 since it + immediately got reverted in 4_0_1 + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc_piix.c | 6 ++++++ + hw/i386/pc_q35.c | 24 +++++++++++++++++++++++- + 2 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 3b9ba95..bf6b444 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1051,9 +1051,15 @@ static void pc_init_rhel760(MachineState *machine) + + static void pc_machine_rhel760_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_machine_rhel7_options(m); + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; + m->async_pf_vmexit_disable = true; ++ m->smbus_no_migration_support = true; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); + compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); + } +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index edf8e54..b6d0bb3 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -554,6 +554,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + m->default_display = "std"; + m->no_floppy = 1; + m->no_parallel = 1; ++ pcmc->default_cpu_version = 1; + machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); + machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); +@@ -562,6 +563,20 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel810(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel810_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, ++ pc_q35_machine_rhel810_options); ++ + static void pc_q35_init_rhel800(MachineState *machine) + { + pc_q35_init(machine); +@@ -569,8 +584,15 @@ static void pc_q35_init_rhel800(MachineState *machine) + + static void pc_q35_machine_rhel800_options(MachineClass *m) + { +- pc_q35_machine_rhel_options(m); ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel810_options(m); + m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++ m->smbus_no_migration_support = true; ++ m->alias = NULL; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, +-- +1.8.3.1 + diff --git a/kvm-x86-machine-types-pc_rhel_8_0_compat.patch b/kvm-x86-machine-types-pc_rhel_8_0_compat.patch new file mode 100644 index 0000000..cb2371f --- /dev/null +++ b/kvm-x86-machine-types-pc_rhel_8_0_compat.patch @@ -0,0 +1,88 @@ +From e42808c29bdcebe62cdb5cdb1de4dc0910dd21d9 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 31 Jul 2019 15:08:10 +0100 +Subject: [PATCH 1/6] x86 machine types: pc_rhel_8_0_compat + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190731150814.29571-2-dgilbert@redhat.com> +Patchwork-id: 89816 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 1/5] x86 machine types: pc_rhel_8_0_compat +Bugzilla: 1719649 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Cornelia Huck + +From: "Dr. David Alan Gilbert" + +Create the pc_rhel_8_0_compat array based off pc_compat_3_1. +It's the same except for a chunk of mpx=on entries that +we already put in the pc_rhel_7_6_compat. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 33 +++++++++++++++++++++++++++++++++ + include/hw/i386/pc.h | 3 +++ + 2 files changed, 36 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b3d2d1e..f19fed4 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -366,6 +366,39 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_0_compat[] = { ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "intel-iommu", "dma-drain", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, ++}; ++const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); ++ + /* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: + * all of the 2_12 stuff was already in 7.6 from bz 1481253 + * x-migrate-smi-count comes from PC_COMPAT_2_11 but +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 605cc71..2f24333 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -368,6 +368,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_0_compat[]; ++extern const size_t pc_rhel_8_0_compat_len; ++ + extern GlobalProperty pc_rhel_7_6_compat[]; + extern const size_t pc_rhel_7_6_compat_len; + +-- +1.8.3.1 + diff --git a/kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch b/kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch new file mode 100644 index 0000000..781c3af --- /dev/null +++ b/kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch @@ -0,0 +1,57 @@ +From 9de83a880cf0e397db7c8bfdbf009f137c8eaf8a Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 31 Jul 2019 15:08:11 +0100 +Subject: [PATCH 2/6] x86 machine types: q35: Fixup units_per_default_bus + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190731150814.29571-3-dgilbert@redhat.com> +Patchwork-id: 89818 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 2/5] x86 machine types: q35: Fixup units_per_default_bus +Bugzilla: 1719649 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Cornelia Huck + +x86 machine types: q35: Fixup units_per_default_bus + +We omitted the line: + m->units_per_default_bus = 1; + +in our rebase from 2.1.2 (which doesn't have ->units_per_default_bus) +to 2.3.0 (which does). Specifically, in commit ed6d215ef93. + +It's safe for us to add, because: + a) It changes the behaviour when you don't specify a bus/device + number, however libvirt always specifies it, so it's always + safe downstream for us with libvirt which we require. + + b) The behaviour change isn't actually seen by the guest. i.e. + the change from having two SATA devices from: + ide0-hd0, ide0-hd1 + to + ide0-hd0, ide1-hd0 + + is hidden because by the time it gets through the SATA code + it ends up back as two single SATA devices on their own bus. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc_q35.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 4959ed3..068813d 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -549,6 +549,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pcmc->default_nic_model = "e1000e"; + m->family = "pc_q35_Z"; ++ m->units_per_default_bus = 1; + m->default_machine_opts = "firmware=bios-256k.bin"; + m->default_display = "std"; + m->no_floppy = 1; +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 2af9ec5..0f0e249 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 2%{?dist} +Release: 3%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -121,6 +121,18 @@ Patch0017: 0017-usb-xhci-Fix-PCI-capability-order.patch Patch0018: 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0019: 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0020: 0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch +# For bz#1719649 - 8.1 machine type for x86 +Patch21: kvm-x86-machine-types-pc_rhel_8_0_compat.patch +# For bz#1719649 - 8.1 machine type for x86 +Patch22: kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch +# For bz#1719649 - 8.1 machine type for x86 +Patch23: kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch +# For bz#1719649 - 8.1 machine type for x86 +Patch24: kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch +# For bz#1719649 - 8.1 machine type for x86 +Patch25: kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch +# For bz#1719649 - 8.1 machine type for x86 +Patch26: kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch BuildRequires: wget BuildRequires: rpm-build @@ -1062,6 +1074,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-3.el8 +- kvm-x86-machine-types-pc_rhel_8_0_compat.patch [bz#1719649] +- kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch [bz#1719649] +- kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch [bz#1719649] +- kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch [bz#1719649] +- kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch [bz#1719649] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1719649] +- Resolves: bz#1719649 + (8.1 machine type for x86) + * Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-2.el8 - kvm-spec-Update-seavgabios-dependency.patch [bz#1725664] - kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch [bz#1741451] From 9d9148b01fa30598dfce07e56cad46e36cc4e523 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 20 Aug 2019 01:58:06 +0100 Subject: [PATCH 044/195] * Tue Aug 20 2019 Danilo Cesar Lemes de Paula - 4.1.0-4.el8 - kvm-RHEL-disable-hostmem-memfd.patch [bz#1738626 bz#1740797] - Resolves: bz#1738626 (Disable memfd in QEMU) - Resolves: bz#1740797 (Disable memfd in QEMU) --- kvm-RHEL-disable-hostmem-memfd.patch | 56 ++++++++++++++++++++++++++++ qemu-kvm.spec | 12 +++++- 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 kvm-RHEL-disable-hostmem-memfd.patch diff --git a/kvm-RHEL-disable-hostmem-memfd.patch b/kvm-RHEL-disable-hostmem-memfd.patch new file mode 100644 index 0000000..39a0607 --- /dev/null +++ b/kvm-RHEL-disable-hostmem-memfd.patch @@ -0,0 +1,56 @@ +From f7587ddb9a2731bf678a24156b6285dda79a4b2b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 19 Aug 2019 21:18:27 -0300 +Subject: [PATCH] RHEL: disable hostmem-memfd +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20190814085210.18462-1-marcandre.lureau@redhat.com> +Patchwork-id: 89974 +O-Subject: [RHEL-8.1.0 qemu-kvm PATCH] RHEL: disable hostmem-memfd +Bugzilla: 1738626 1740797 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: John Snow + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1740797 +BRANCH: rhel8/rhel-8.1.0 +UPSTREAM: n/a (downstream only) +BREW: 23060214 + +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + backends/Makefile.objs | 3 ++- + util/memfd.c | 2 +- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/backends/Makefile.objs b/backends/Makefile.objs +index 981e8e1..ad54cd6 100644 +--- a/backends/Makefile.objs ++++ b/backends/Makefile.objs +@@ -16,4 +16,5 @@ endif + + common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o + +-common-obj-$(CONFIG_LINUX) += hostmem-memfd.o ++# RHEL: disable memfd ++# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o +diff --git a/util/memfd.c b/util/memfd.c +index 00334e5..e2320af 100644 +--- a/util/memfd.c ++++ b/util/memfd.c +@@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) + */ + bool qemu_memfd_check(unsigned int flags) + { +-#ifdef CONFIG_LINUX ++#if 0 /* RHEL: memfd support disabled */ + int mfd = memfd_create("test", flags | MFD_CLOEXEC); + + if (mfd >= 0) { +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 0f0e249..bae899c 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 3%{?dist} +Release: 4%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -133,6 +133,9 @@ Patch24: kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch Patch25: kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch # For bz#1719649 - 8.1 machine type for x86 Patch26: kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch +# For bz#1738626 - Disable memfd in QEMU +# For bz#1740797 - Disable memfd in QEMU +Patch27: kvm-RHEL-disable-hostmem-memfd.patch BuildRequires: wget BuildRequires: rpm-build @@ -1074,6 +1077,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Aug 20 2019 Danilo Cesar Lemes de Paula - 4.1.0-4.el8 +- kvm-RHEL-disable-hostmem-memfd.patch [bz#1738626 bz#1740797] +- Resolves: bz#1738626 + (Disable memfd in QEMU) +- Resolves: bz#1740797 + (Disable memfd in QEMU) + * Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-3.el8 - kvm-x86-machine-types-pc_rhel_8_0_compat.patch [bz#1719649] - kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch [bz#1719649] From 37b050692f69795b3b31ac85247fe3e0104b2aeb Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 27 Aug 2019 20:51:16 +0100 Subject: [PATCH 045/195] * Tue Aug 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-5.el8 - kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch [bz#1693772] - kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch [bz#1693772] - kvm-enable-virgl.patch [bz#1559740] - kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch [bz#1744170] - kvm-Do-not-run-iotests-on-brew-build.patch [bz#1742197 bz#1742819] - Resolves: bz#1559740 ([RFE] Enable virgl as TechPreview (qemu)) - Resolves: bz#1693772 ([IBM zKVM] RHEL AV 8.1.0 machine type update for s390x) - Resolves: bz#1742197 (Remove iotests from qemu-kvm builds [RHEL AV 8.1.0]) - Resolves: bz#1742819 (Remove iotests from qemu-kvm builds [RHEL 8.1.0]) - Resolves: bz#1744170 ([IBM Power] New 8.1.0 machine type for pseries) --- ...-proper-compatibility-options-for-th.patch | 64 ++++++++++++++++ ...ame-s390-ccw-virtio-rhel8.0.0-to-s39.patch | 64 ++++++++++++++++ ...pdate-pseries-rhel8.1.0-machine-type.patch | 73 +++++++++++++++++++ qemu-kvm.spec | 34 +++++++-- 4 files changed, 228 insertions(+), 7 deletions(-) create mode 100644 kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch create mode 100644 kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch create mode 100644 kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch diff --git a/kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch b/kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch new file mode 100644 index 0000000..8ad7329 --- /dev/null +++ b/kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch @@ -0,0 +1,64 @@ +From a9b22e8663f190e4a845815864e78ef61b68f2a4 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 20 Aug 2019 09:24:41 +0100 +Subject: [PATCH 2/5] redhat: s390x: Add proper compatibility options for the + -rhel7.6.0 machine + +RH-Author: Thomas Huth +Message-id: <20190820092441.28201-3-thuth@redhat.com> +Patchwork-id: 90078 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine +Bugzilla: 1693772 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: 1693772 +Upstream: n/a (downstream only) + +Since we skipped the -rhel8.0.0 machine on s390x, we have to add the +related compatibility properties now to the -rhel7.6.0 machine type +instead. + +Also the "multiple epoch" facility was disabled on the z14GA1 CPU +in 7.6, so we also have to adjust our machine type here accordingly. + +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 2f4cd14..ebbde05 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -905,12 +905,25 @@ DEFINE_CCW_MACHINE(rhel810, "rhel8.1.0", true); + + static void ccw_machine_rhel760_instance_options(MachineState *machine) + { ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; ++ + ccw_machine_rhel810_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); ++ ++ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ ++ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); + } + + static void ccw_machine_rhel760_class_options(MachineClass *mc) + { + ccw_machine_rhel810_class_options(mc); ++ /* We never published the s390x version of RHEL8.0 AV, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); + compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); + compat_props_add(mc->compat_props, ccw_compat_rhel_7_6, ccw_compat_rhel_7_6_len); + } +-- +1.8.3.1 + diff --git a/kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch b/kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch new file mode 100644 index 0000000..4c9e563 --- /dev/null +++ b/kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch @@ -0,0 +1,64 @@ +From fb192e5feac9ed77672e4acbfaec3bdad4d7684a Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 20 Aug 2019 09:24:40 +0100 +Subject: [PATCH 1/5] redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to + s390-ccw-virtio-rhel8.1.0 + +RH-Author: Thomas Huth +Message-id: <20190820092441.28201-2-thuth@redhat.com> +Patchwork-id: 90077 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 +Bugzilla: 1693772 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: 1693772 +Upstream: n/a + +We did not ship RHEL AV 8.0 for s390x to customers, so we do not need +to maintain the s390-ccw-virtio-rhel8.0.0 machine. Rename it to +s390-ccw-virtio-rhel8.1.0 instead. + +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 4d8c2ec..2f4cd14 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -894,23 +894,23 @@ GlobalProperty ccw_compat_rhel_7_5[] = { + }; + const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); + +-static void ccw_machine_rhel800_instance_options(MachineState *machine) ++static void ccw_machine_rhel810_instance_options(MachineState *machine) + { + } + +-static void ccw_machine_rhel800_class_options(MachineClass *mc) ++static void ccw_machine_rhel810_class_options(MachineClass *mc) + { + } +-DEFINE_CCW_MACHINE(rhel800, "rhel8.0.0", true); ++DEFINE_CCW_MACHINE(rhel810, "rhel8.1.0", true); + + static void ccw_machine_rhel760_instance_options(MachineState *machine) + { +- ccw_machine_rhel800_instance_options(machine); ++ ccw_machine_rhel810_instance_options(machine); + } + + static void ccw_machine_rhel760_class_options(MachineClass *mc) + { +- ccw_machine_rhel800_class_options(mc); ++ ccw_machine_rhel810_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); + compat_props_add(mc->compat_props, ccw_compat_rhel_7_6, ccw_compat_rhel_7_6_len); + } +-- +1.8.3.1 + diff --git a/kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch b/kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch new file mode 100644 index 0000000..29c686a --- /dev/null +++ b/kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch @@ -0,0 +1,73 @@ +From bcba7281bbb2351fab69498c54fcda4e6154fa91 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 23 Aug 2019 09:13:02 +0100 +Subject: [PATCH 4/5] redhat: update pseries-rhel8.1.0 machine type + +RH-Author: Laurent Vivier +Message-id: <20190823091302.8970-1-lvivier@redhat.com> +Patchwork-id: 90137 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] redhat: update pseries-rhel8.1.0 machine type +Bugzilla: 1744170 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Gibson + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1744170 +BRANCH: rhel-av-8.1.0/master-4.1.0 +UPSTREAM: downstream only +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23160686 + +pseries-rhel8.1.0 has been created based on pseries-4.0, +we need to update it now we are based on pseries-4.1 + +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 669eae1..9b500d2 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4437,6 +4437,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(4_1, "4.1", true); ++#endif + + /* + * pseries-4.0 +@@ -4453,6 +4454,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4796,7 +4798,7 @@ DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", true); + + /* + * pseries-rhel8.0.0 +- * like spapr_compat_3_1 ++ * like pseries-3.1 and pseries-4.0 + * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS + * that have been backported to pseries-rhel8.0.0 + */ +@@ -4809,6 +4811,12 @@ static void spapr_machine_rhel800_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_rhel_8_0, + hw_compat_rhel_8_0_len); + ++ /* pseries-4.0 */ ++ smc->phb_placement = phb_placement_4_0; ++ smc->irq = &spapr_irq_xics; ++ smc->pre_4_1_migration = true; ++ ++ /* pseries-3.1 */ + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); + smc->update_dt_enabled = false; + smc->dr_phb_enabled = false; +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index bae899c..22fa3d6 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 4%{?dist} +Release: 5%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -136,6 +136,12 @@ Patch26: kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch # For bz#1738626 - Disable memfd in QEMU # For bz#1740797 - Disable memfd in QEMU Patch27: kvm-RHEL-disable-hostmem-memfd.patch +# For bz#1693772 - [IBM zKVM] RHEL AV 8.1.0 machine type update for s390x +Patch28: kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch +# For bz#1693772 - [IBM zKVM] RHEL AV 8.1.0 machine type update for s390x +Patch29: kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch +# For bz#1744170 - [IBM Power] New 8.1.0 machine type for pseries +Patch31: kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch BuildRequires: wget BuildRequires: rpm-build @@ -162,6 +168,7 @@ BuildRequires: python3-sphinx BuildRequires: spice-protocol >= 0.12.12 BuildRequires: spice-server-devel >= 0.12.8 BuildRequires: libcacard-devel +BuildRequires: virglrenderer-devel # For smartcard NSS support BuildRequires: nss-devel %endif @@ -475,9 +482,11 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %if 0%{have_spice} --enable-spice \ --enable-smartcard \ + --enable-virglrenderer \ %else --disable-spice \ --disable-smartcard \ + --disable-virglrenderer \ %endif %if 0%{have_opengl} --enable-opengl \ @@ -882,12 +891,6 @@ rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs %check export DIFF=diff; make check V=1 -pushd tests/qemu-iotests -./check -v -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 045 048 052 063 077 086 101 104 106 120 132 140 143 145 147 150 152 157 159 160 162 170 171 175 181 184 194 208 218 221 222 226 227 232 -./check -v -qcow2 001 002 003 004 005 007 008 009 010 011 012 017 018 019 020 021 022 024 025 027 028 029 031 032 033 034 035 036 037 038 039 042 043 046 047 048 049 050 052 053 054 056 057 058 062 063 065 066 069 072 073 074 080 085 086 087 089 090 091 095 096 097 098 102 103 104 105 107 108 110 111 114 117 120 126 127 130 132 133 134 137 138 140 141 142 143 144 145 147 150 151 152 156 157 158 159 162 165 170 174 177 179 181 184 187 188 189 190 191 194 195 196 198 201 202 203 204 206 208 209 214 216 217 218 222 226 227 232 -./check -v -luks 001 002 003 004 005 008 009 010 011 012 021 032 033 052 140 143 145 157 162 174 181 184 208 218 227 -./check -v -nbd 001 002 003 004 005 008 009 010 011 021 032 033 045 077 094 104 119 123 132 143 145 147 151 152 162 181 184 194 208 218 222 -popd %post -n qemu-kvm-core # load kvm modules now, so we can make sure no reboot is needed. @@ -1077,6 +1080,23 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Aug 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-5.el8 +- kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch [bz#1693772] +- kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch [bz#1693772] +- kvm-enable-virgl.patch [bz#1559740] +- kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch [bz#1744170] +- kvm-Do-not-run-iotests-on-brew-build.patch [bz#1742197 bz#1742819] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1693772 + ([IBM zKVM] RHEL AV 8.1.0 machine type update for s390x) +- Resolves: bz#1742197 + (Remove iotests from qemu-kvm builds [RHEL AV 8.1.0]) +- Resolves: bz#1742819 + (Remove iotests from qemu-kvm builds [RHEL 8.1.0]) +- Resolves: bz#1744170 + ([IBM Power] New 8.1.0 machine type for pseries) + * Tue Aug 20 2019 Danilo Cesar Lemes de Paula - 4.1.0-4.el8 - kvm-RHEL-disable-hostmem-memfd.patch [bz#1738626 bz#1740797] - Resolves: bz#1738626 From 549f70b1e39fed590dfc8880ca3a2c49aa7661d6 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 4 Sep 2019 17:07:50 +0100 Subject: [PATCH 046/195] * Wed Sep 04 2019 Danilo Cesar Lemes de Paula - 4.1.0-6.el8 - kvm-memory-Refactor-memory_region_clear_coalescing.patch [bz#1743142] - kvm-memory-Split-zones-when-do-coalesced_io_del.patch [bz#1743142] - kvm-memory-Remove-has_coalesced_range-counter.patch [bz#1743142] - kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch [bz#1743142] - kvm-enable-virgl-for-real-this-time.patch [bz#1559740] - Resolves: bz#1559740 ([RFE] Enable virgl as TechPreview (qemu)) - Resolves: bz#1743142 (Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28)) --- ...p-memory_region_-add-del-_coalescing.patch | 118 +++++++++++++++++ ...actor-memory_region_clear_coalescing.patch | 64 +++++++++ ...y-Remove-has_coalesced_range-counter.patch | 96 ++++++++++++++ ...Split-zones-when-do-coalesced_io_del.patch | 123 ++++++++++++++++++ qemu-kvm.spec | 26 +++- 5 files changed, 425 insertions(+), 2 deletions(-) create mode 100644 kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch create mode 100644 kvm-memory-Refactor-memory_region_clear_coalescing.patch create mode 100644 kvm-memory-Remove-has_coalesced_range-counter.patch create mode 100644 kvm-memory-Split-zones-when-do-coalesced_io_del.patch diff --git a/kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch b/kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch new file mode 100644 index 0000000..021125e --- /dev/null +++ b/kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch @@ -0,0 +1,118 @@ +From b9b48ed46d2b0a3dd6e8406946eb0516ec75a004 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 23 Aug 2019 06:14:31 +0100 +Subject: [PATCH 4/5] memory: Fix up memory_region_{add|del}_coalescing +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +Message-id: <20190823061431.31759-5-peterx@redhat.com> +Patchwork-id: 90136 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 4/4] memory: Fix up memory_region_{add|del}_coalescing +Bugzilla: 1743142 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Auger Eric +RH-Acked-by: Philippe Mathieu-Daudé + +The old memory_region_{add|clear}_coalescing() has some defects +because they both changed mr->coalesced before updating the regions +using memory_region_update_coalesced_range_as(). Then when the +regions were updated in memory_region_update_coalesced_range_as() the +mr->coalesced will always be either one more or one less. So: + +- For memory_region_add_coalescing: it'll always trying to remove the + newly added coalesced region while it shouldn't, and, + +- For memory_region_clear_coalescing: when it calls the update there + will be no coalesced ranges on mr->coalesced because they were all + removed before hand so the update will probably do nothing for real. + +Let's fix this. Now we've got flat_range_coalesced_io_notify() to +notify a single CoalescedMemoryRange instance change, so use it in the +existing memory_region_update_coalesced_range() logic by only notify +either an addition or deletion. Then we hammer both the +memory_region_{add|clear}_coalescing() to use it. + +Fixes: 3ac7d43a6fbb5d4a3 +Signed-off-by: Peter Xu +Message-Id: <20190820141328.10009-5-peterx@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b960fc1796fb078c21121abf01499603b66b3f57) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + memory.c | 36 +++++++++++++++++------------------- + 1 file changed, 17 insertions(+), 19 deletions(-) + +diff --git a/memory.c b/memory.c +index c7cd43f..2f15180 100644 +--- a/memory.c ++++ b/memory.c +@@ -2238,27 +2238,26 @@ void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp + qemu_ram_resize(mr->ram_block, newsize, errp); + } + +-static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpace *as) ++/* ++ * Call proper memory listeners about the change on the newly ++ * added/removed CoalescedMemoryRange. ++ */ ++static void memory_region_update_coalesced_range(MemoryRegion *mr, ++ CoalescedMemoryRange *cmr, ++ bool add) + { ++ AddressSpace *as; + FlatView *view; + FlatRange *fr; + +- view = address_space_get_flatview(as); +- FOR_EACH_FLAT_RANGE(fr, view) { +- if (fr->mr == mr) { +- flat_range_coalesced_io_del(fr, as); +- flat_range_coalesced_io_add(fr, as); +- } +- } +- flatview_unref(view); +-} +- +-static void memory_region_update_coalesced_range(MemoryRegion *mr) +-{ +- AddressSpace *as; +- + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { +- memory_region_update_coalesced_range_as(mr, as); ++ view = address_space_get_flatview(as); ++ FOR_EACH_FLAT_RANGE(fr, view) { ++ if (fr->mr == mr) { ++ flat_range_coalesced_io_notify(fr, as, cmr, add); ++ } ++ } ++ flatview_unref(view); + } + } + +@@ -2276,7 +2275,7 @@ void memory_region_add_coalescing(MemoryRegion *mr, + + cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size)); + QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link); +- memory_region_update_coalesced_range(mr); ++ memory_region_update_coalesced_range(mr, cmr, true); + memory_region_set_flush_coalesced(mr); + } + +@@ -2294,10 +2293,9 @@ void memory_region_clear_coalescing(MemoryRegion *mr) + while (!QTAILQ_EMPTY(&mr->coalesced)) { + cmr = QTAILQ_FIRST(&mr->coalesced); + QTAILQ_REMOVE(&mr->coalesced, cmr, link); ++ memory_region_update_coalesced_range(mr, cmr, false); + g_free(cmr); + } +- +- memory_region_update_coalesced_range(mr); + } + + void memory_region_set_flush_coalesced(MemoryRegion *mr) +-- +1.8.3.1 + diff --git a/kvm-memory-Refactor-memory_region_clear_coalescing.patch b/kvm-memory-Refactor-memory_region_clear_coalescing.patch new file mode 100644 index 0000000..6ea7ce5 --- /dev/null +++ b/kvm-memory-Refactor-memory_region_clear_coalescing.patch @@ -0,0 +1,64 @@ +From 134ab69ffdfb7e45a0be385595036d0427928306 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 23 Aug 2019 06:14:28 +0100 +Subject: [PATCH 1/5] memory: Refactor memory_region_clear_coalescing +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +Message-id: <20190823061431.31759-2-peterx@redhat.com> +Patchwork-id: 90134 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/4] memory: Refactor memory_region_clear_coalescing +Bugzilla: 1743142 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Auger Eric +RH-Acked-by: Philippe Mathieu-Daudé + +Removing the update variable and quit earlier if the memory region has +no coalesced range. This prepares for the next patch. + +Fixes: 3ac7d43a6fbb5d4a3 +Signed-off-by: Peter Xu +Message-Id: <20190820141328.10009-4-peterx@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9c1aa1c235c770d84462d482460a96e957e95b9c) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + memory.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/memory.c b/memory.c +index 5d8c9a9..9f40742 100644 +--- a/memory.c ++++ b/memory.c +@@ -2276,7 +2276,10 @@ void memory_region_add_coalescing(MemoryRegion *mr, + void memory_region_clear_coalescing(MemoryRegion *mr) + { + CoalescedMemoryRange *cmr; +- bool updated = false; ++ ++ if (QTAILQ_EMPTY(&mr->coalesced)) { ++ return; ++ } + + qemu_flush_coalesced_mmio_buffer(); + mr->flush_coalesced_mmio = false; +@@ -2285,12 +2288,9 @@ void memory_region_clear_coalescing(MemoryRegion *mr) + cmr = QTAILQ_FIRST(&mr->coalesced); + QTAILQ_REMOVE(&mr->coalesced, cmr, link); + g_free(cmr); +- updated = true; + } + +- if (updated) { +- memory_region_update_coalesced_range(mr); +- } ++ memory_region_update_coalesced_range(mr); + } + + void memory_region_set_flush_coalesced(MemoryRegion *mr) +-- +1.8.3.1 + diff --git a/kvm-memory-Remove-has_coalesced_range-counter.patch b/kvm-memory-Remove-has_coalesced_range-counter.patch new file mode 100644 index 0000000..d98baa2 --- /dev/null +++ b/kvm-memory-Remove-has_coalesced_range-counter.patch @@ -0,0 +1,96 @@ +From c1db31bce6d2e5f49e34a2e7282e50bea3f92278 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 23 Aug 2019 06:14:30 +0100 +Subject: [PATCH 3/5] memory: Remove has_coalesced_range counter +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +Message-id: <20190823061431.31759-4-peterx@redhat.com> +Patchwork-id: 90135 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 3/4] memory: Remove has_coalesced_range counter +Bugzilla: 1743142 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Auger Eric +RH-Acked-by: Philippe Mathieu-Daudé + +The has_coalesced_range could potentially be problematic in that it +only works for additions of coalesced mmio ranges but not deletions. +The reason is that has_coalesced_range information can be lost when +the FlatView updates the topology again when the updated region is not +covering the coalesced regions. When that happens, due to +flatrange_equal() is not checking against has_coalesced_range, the new +FlatRange will be seen as the same one as the old and the new +instance (whose has_coalesced_range will be zero) will replace the old +instance (whose has_coalesced_range _could_ be non-zero). + +The counter was originally used to make sure every FlatRange will only +notify once for coalesced_io_{add|del} memory listeners, because each +FlatRange can be used by multiple address spaces, so logically +speaking it could be called multiple times. However we should not +limit that, because memory listeners should will only be registered +with specific address space rather than multiple address spaces. + +So let's fix this up by simply removing the whole has_coalesced_range. + +Fixes: 3ac7d43a6fbb5d4a3 +Signed-off-by: Peter Xu +Message-Id: <20190820141328.10009-3-peterx@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 264ef5a5c52c249ff51a16d141fc03df71714a13) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + memory.c | 14 -------------- + 1 file changed, 14 deletions(-) + +diff --git a/memory.c b/memory.c +index 7b24cb8..c7cd43f 100644 +--- a/memory.c ++++ b/memory.c +@@ -217,7 +217,6 @@ struct FlatRange { + bool romd_mode; + bool readonly; + bool nonvolatile; +- int has_coalesced_range; + }; + + #define FOR_EACH_FLAT_RANGE(var, view) \ +@@ -654,7 +653,6 @@ static void render_memory_region(FlatView *view, + fr.romd_mode = mr->romd_mode; + fr.readonly = readonly; + fr.nonvolatile = nonvolatile; +- fr.has_coalesced_range = 0; + + /* Render the region itself into any gaps left by the current view. */ + for (i = 0; i < view->nr && int128_nz(remain); ++i) { +@@ -888,14 +886,6 @@ static void flat_range_coalesced_io_del(FlatRange *fr, AddressSpace *as) + { + CoalescedMemoryRange *cmr; + +- if (!fr->has_coalesced_range) { +- return; +- } +- +- if (--fr->has_coalesced_range > 0) { +- return; +- } +- + QTAILQ_FOREACH(cmr, &fr->mr->coalesced, link) { + flat_range_coalesced_io_notify(fr, as, cmr, false); + } +@@ -910,10 +900,6 @@ static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as) + return; + } + +- if (fr->has_coalesced_range++) { +- return; +- } +- + QTAILQ_FOREACH(cmr, &mr->coalesced, link) { + flat_range_coalesced_io_notify(fr, as, cmr, true); + } +-- +1.8.3.1 + diff --git a/kvm-memory-Split-zones-when-do-coalesced_io_del.patch b/kvm-memory-Split-zones-when-do-coalesced_io_del.patch new file mode 100644 index 0000000..498cd26 --- /dev/null +++ b/kvm-memory-Split-zones-when-do-coalesced_io_del.patch @@ -0,0 +1,123 @@ +From bdd5394047f7fbecac82d067b9e67db8a20c49d2 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 23 Aug 2019 06:14:29 +0100 +Subject: [PATCH 2/5] memory: Split zones when do coalesced_io_del() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +Message-id: <20190823061431.31759-3-peterx@redhat.com> +Patchwork-id: 90133 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 2/4] memory: Split zones when do coalesced_io_del() +Bugzilla: 1743142 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Auger Eric +RH-Acked-by: Philippe Mathieu-Daudé + +It is a workaround of current KVM's KVM_UNREGISTER_COALESCED_MMIO +interface. The kernel interface only allows to unregister an mmio +device with exactly the zone size when registered, or any smaller zone +that is included in the device mmio zone. It does not support the +userspace to specify a very large zone to remove all the small mmio +devices within the zone covered. + +Logically speaking it would be nicer to fix this from KVM side, though +in all cases we still need to coop with old kernels so let's do this. + +Fixes: 3ac7d43a6fbb5d4a3 +Signed-off-by: Peter Xu +Message-Id: <20190820141328.10009-2-peterx@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 23f1174aac4181f86bb7e13ca8bc2d4a0bdf1e5c) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + memory.c | 49 +++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 35 insertions(+), 14 deletions(-) + +diff --git a/memory.c b/memory.c +index 9f40742..7b24cb8 100644 +--- a/memory.c ++++ b/memory.c +@@ -855,8 +855,39 @@ static void address_space_update_ioeventfds(AddressSpace *as) + flatview_unref(view); + } + ++/* ++ * Notify the memory listeners about the coalesced IO change events of ++ * range `cmr'. Only the part that has intersection of the specified ++ * FlatRange will be sent. ++ */ ++static void flat_range_coalesced_io_notify(FlatRange *fr, AddressSpace *as, ++ CoalescedMemoryRange *cmr, bool add) ++{ ++ AddrRange tmp; ++ ++ tmp = addrrange_shift(cmr->addr, ++ int128_sub(fr->addr.start, ++ int128_make64(fr->offset_in_region))); ++ if (!addrrange_intersects(tmp, fr->addr)) { ++ return; ++ } ++ tmp = addrrange_intersection(tmp, fr->addr); ++ ++ if (add) { ++ MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, coalesced_io_add, ++ int128_get64(tmp.start), ++ int128_get64(tmp.size)); ++ } else { ++ MEMORY_LISTENER_UPDATE_REGION(fr, as, Reverse, coalesced_io_del, ++ int128_get64(tmp.start), ++ int128_get64(tmp.size)); ++ } ++} ++ + static void flat_range_coalesced_io_del(FlatRange *fr, AddressSpace *as) + { ++ CoalescedMemoryRange *cmr; ++ + if (!fr->has_coalesced_range) { + return; + } +@@ -865,16 +896,15 @@ static void flat_range_coalesced_io_del(FlatRange *fr, AddressSpace *as) + return; + } + +- MEMORY_LISTENER_UPDATE_REGION(fr, as, Reverse, coalesced_io_del, +- int128_get64(fr->addr.start), +- int128_get64(fr->addr.size)); ++ QTAILQ_FOREACH(cmr, &fr->mr->coalesced, link) { ++ flat_range_coalesced_io_notify(fr, as, cmr, false); ++ } + } + + static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as) + { + MemoryRegion *mr = fr->mr; + CoalescedMemoryRange *cmr; +- AddrRange tmp; + + if (QTAILQ_EMPTY(&mr->coalesced)) { + return; +@@ -885,16 +915,7 @@ static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as) + } + + QTAILQ_FOREACH(cmr, &mr->coalesced, link) { +- tmp = addrrange_shift(cmr->addr, +- int128_sub(fr->addr.start, +- int128_make64(fr->offset_in_region))); +- if (!addrrange_intersects(tmp, fr->addr)) { +- continue; +- } +- tmp = addrrange_intersection(tmp, fr->addr); +- MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, coalesced_io_add, +- int128_get64(tmp.start), +- int128_get64(tmp.size)); ++ flat_range_coalesced_io_notify(fr, as, cmr, true); + } + } + +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 22fa3d6..e1168f4 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 5%{?dist} +Release: 6%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -142,6 +142,14 @@ Patch28: kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch Patch29: kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch # For bz#1744170 - [IBM Power] New 8.1.0 machine type for pseries Patch31: kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch +# For bz#1743142 - Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28) +Patch32: kvm-memory-Refactor-memory_region_clear_coalescing.patch +# For bz#1743142 - Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28) +Patch33: kvm-memory-Split-zones-when-do-coalesced_io_del.patch +# For bz#1743142 - Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28) +Patch34: kvm-memory-Remove-has_coalesced_range-counter.patch +# For bz#1743142 - Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28) +Patch35: kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch BuildRequires: wget BuildRequires: rpm-build @@ -575,7 +583,6 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --enable-vhost-vsock \ --enable-vnc \ --enable-mpath \ - --disable-virglrenderer \ --disable-xen-pci-passthrough \ --enable-tcg \ --with-git=git \ @@ -1037,6 +1044,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %if 0%{have_memlock_limits} %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf %endif +%if %{have_spice} +%{_libexecdir}/vhost-user-gpu +%{_datadir}/%{name}/vhost-user/50-qemu-gpu.json +%endif %files -n qemu-img %defattr(-,root,root) @@ -1080,6 +1091,17 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Wed Sep 04 2019 Danilo Cesar Lemes de Paula - 4.1.0-6.el8 +- kvm-memory-Refactor-memory_region_clear_coalescing.patch [bz#1743142] +- kvm-memory-Split-zones-when-do-coalesced_io_del.patch [bz#1743142] +- kvm-memory-Remove-has_coalesced_range-counter.patch [bz#1743142] +- kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch [bz#1743142] +- kvm-enable-virgl-for-real-this-time.patch [bz#1559740] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1743142 + (Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28)) + * Tue Aug 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-5.el8 - kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch [bz#1693772] - kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch [bz#1693772] From 1442ce82435b714cab29846ae7ffc4e8eb021ef2 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 6 Sep 2019 00:33:12 +0100 Subject: [PATCH 047/195] * Fri Sep 06 2019 Danilo Cesar Lemes de Paula - 4.1.0-7.el8 - kvm-trace-Clarify-DTrace-SystemTap-help-message.patch [bz#1516220] - kvm-socket-Add-backlog-parameter-to-socket_listen.patch [bz#1726898] - kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch [bz#1726898] - kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch [bz#1726898] - kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch [bz#1726898] - kvm-multifd-Use-number-of-channels-as-listen-backlog.patch [bz#1726898] - kvm-pseries-Fix-compat_pvr-on-reset.patch [bz#1744107] - kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch [bz#1744107] - Resolves: bz#1516220 (-trace help prints an incomplete list of trace events) - Resolves: bz#1726898 (Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then) - Resolves: bz#1744107 (Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'") --- ...number-of-channels-as-listen-backlog.patch | 50 +++ kvm-pseries-Fix-compat_pvr-on-reset.patch | 80 +++++ ...d-backlog-parameter-to-socket_listen.patch | 295 ++++++++++++++++++ ...onnections-to-qio_channel_socket_asy.patch | 144 +++++++++ ...onnections-to-qio_channel_socket_syn.patch | 173 ++++++++++ ...onnections-to-qio_net_listener_open_.patch | 151 +++++++++ ...r-Set-compat-mode-in-spapr_core_plug.patch | 127 ++++++++ ...larify-DTrace-SystemTap-help-message.patch | 80 +++++ qemu-kvm.spec | 34 +- 9 files changed, 1133 insertions(+), 1 deletion(-) create mode 100644 kvm-multifd-Use-number-of-channels-as-listen-backlog.patch create mode 100644 kvm-pseries-Fix-compat_pvr-on-reset.patch create mode 100644 kvm-socket-Add-backlog-parameter-to-socket_listen.patch create mode 100644 kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch create mode 100644 kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch create mode 100644 kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch create mode 100644 kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch create mode 100644 kvm-trace-Clarify-DTrace-SystemTap-help-message.patch diff --git a/kvm-multifd-Use-number-of-channels-as-listen-backlog.patch b/kvm-multifd-Use-number-of-channels-as-listen-backlog.patch new file mode 100644 index 0000000..d48d556 --- /dev/null +++ b/kvm-multifd-Use-number-of-channels-as-listen-backlog.patch @@ -0,0 +1,50 @@ +From 8c93ef106ecf2aa0604ffe7fee3d628b88c2b015 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 4 Sep 2019 10:26:06 +0100 +Subject: [PATCH 6/8] multifd: Use number of channels as listen backlog +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juan Quintela +Message-id: <20190904102606.15744-6-quintela@redhat.com> +Patchwork-id: 90273 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 5/5] multifd: Use number of channels as listen backlog +Bugzilla: 1726898 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Danilo de Paula + +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Juan Quintela +(cherry picked from commit 0705e56496d2b155b5796c6b28d4110e5bcbd5d8) +Signed-off-by: Juan Quintela +Signed-off-by: Danilo C. L. de Paula +--- + migration/socket.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/migration/socket.c b/migration/socket.c +index e63f5e1..97c9efd 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -178,10 +178,15 @@ static void socket_start_incoming_migration(SocketAddress *saddr, + { + QIONetListener *listener = qio_net_listener_new(); + size_t i; ++ int num = 1; + + qio_net_listener_set_name(listener, "migration-socket-listener"); + +- if (qio_net_listener_open_sync(listener, saddr, 1, errp) < 0) { ++ if (migrate_use_multifd()) { ++ num = migrate_multifd_channels(); ++ } ++ ++ if (qio_net_listener_open_sync(listener, saddr, num, errp) < 0) { + object_unref(OBJECT(listener)); + return; + } +-- +1.8.3.1 + diff --git a/kvm-pseries-Fix-compat_pvr-on-reset.patch b/kvm-pseries-Fix-compat_pvr-on-reset.patch new file mode 100644 index 0000000..f03db5e --- /dev/null +++ b/kvm-pseries-Fix-compat_pvr-on-reset.patch @@ -0,0 +1,80 @@ +From 88cfbf2cc3e93a276f9d036850265eb8c2f5310c Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 4 Sep 2019 10:31:38 +0100 +Subject: [PATCH 7/8] pseries: Fix compat_pvr on reset + +RH-Author: Laurent Vivier +Message-id: <20190904103139.29870-2-lvivier@redhat.com> +Patchwork-id: 90274 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] pseries: Fix compat_pvr on reset +Bugzilla: 1744107 +RH-Acked-by: John Snow +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth + +If we a migrate P8 machine to a P9 machine, the migration fails on +destination with: + + error while loading state for instance 0x1 of device 'cpu' + load of migration failed: Operation not permitted + +This is caused because the compat_pvr field is only present for the first +CPU. +Originally, spapr_machine_reset() calls ppc_set_compat() to set the value +max_compat_pvr for the first cpu and this was propagated to all CPUs by +spapr_cpu_reset(). Now, as spapr_cpu_reset() is called before that, the +value is not propagated to all CPUs and the migration fails. + +To fix that, propagate the new value to all CPUs in spapr_machine_reset(). + +Fixes: 25c9780d38d4 ("spapr: Reset CAS & IRQ subsystem after devices") +Signed-off-by: Laurent Vivier +Message-Id: <20190826090812.19080-1-lvivier@redhat.com> +Reviewed-by: Greg Kurz +Signed-off-by: David Gibson +(cherry picked from commit ce03a193e1172ff7d4b3303ec7472dc29183db8c) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1744107 +BRANCH: rhel-av-8.1.0/master-4.1.0 +UPSTREAM: Merged +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 8 +++++++- + hw/ppc/spapr_cpu_core.c | 2 ++ + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 9b500d2..30bf7bb 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1746,7 +1746,13 @@ static void spapr_machine_reset(MachineState *machine) + spapr_ovec_cleanup(spapr->ov5_cas); + spapr->ov5_cas = spapr_ovec_new(); + +- ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal); ++ /* ++ * reset compat_pvr for all CPUs ++ * as qemu_devices_reset() is called before this, ++ * it can't be propagated by spapr_cpu_reset() ++ * from the first CPU to all the others ++ */ ++ ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal); + } + + if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 29a3c7d..ae43c57 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -41,6 +41,8 @@ static void spapr_cpu_reset(void *opaque) + + /* Set compatibility mode to match the boot CPU, which was either set + * by the machine reset code or by CAS. This should never fail. ++ * At startup the value is already set for all the CPUs ++ * but we need this when we hotplug a new CPU + */ + ppc_set_compat(cpu, POWERPC_CPU(first_cpu)->compat_pvr, &error_abort); + +-- +1.8.3.1 + diff --git a/kvm-socket-Add-backlog-parameter-to-socket_listen.patch b/kvm-socket-Add-backlog-parameter-to-socket_listen.patch new file mode 100644 index 0000000..f3dfe2d --- /dev/null +++ b/kvm-socket-Add-backlog-parameter-to-socket_listen.patch @@ -0,0 +1,295 @@ +From fce683618bc605eaedfdcea0db974734c111a2e9 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 4 Sep 2019 10:26:02 +0100 +Subject: [PATCH 2/8] socket: Add backlog parameter to socket_listen +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juan Quintela +Message-id: <20190904102606.15744-2-quintela@redhat.com> +Patchwork-id: 90270 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/5] socket: Add backlog parameter to socket_listen +Bugzilla: 1726898 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Danilo de Paula + +Current parameter was always one. We continue with that value for now +in all callers. + +Signed-off-by: Juan Quintela +Reviewed-by: Daniel P. Berrangé +--- +Moved trace to socket_listen +(cherry picked from commit e5b6353cf25c99c3f08bf51e29933352f7140e8f) +Signed-off-by: Juan Quintela + +Signed-off-by: Danilo C. L. de Paula +--- + include/qemu/sockets.h | 2 +- + io/channel-socket.c | 2 +- + qga/channel-posix.c | 2 +- + tests/test-util-sockets.c | 12 ++++++------ + util/qemu-sockets.c | 33 ++++++++++++++++++++++----------- + util/trace-events | 3 +++ + 6 files changed, 34 insertions(+), 20 deletions(-) + +diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h +index 8140fea..57cd049 100644 +--- a/include/qemu/sockets.h ++++ b/include/qemu/sockets.h +@@ -41,7 +41,7 @@ int unix_connect(const char *path, Error **errp); + + SocketAddress *socket_parse(const char *str, Error **errp); + int socket_connect(SocketAddress *addr, Error **errp); +-int socket_listen(SocketAddress *addr, Error **errp); ++int socket_listen(SocketAddress *addr, int num, Error **errp); + void socket_listen_cleanup(int fd, Error **errp); + int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp); + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index bec3d93..a533c8b 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -202,7 +202,7 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, + int fd; + + trace_qio_channel_socket_listen_sync(ioc, addr); +- fd = socket_listen(addr, errp); ++ fd = socket_listen(addr, 1, errp); + if (fd < 0) { + trace_qio_channel_socket_listen_fail(ioc); + return -1; +diff --git a/qga/channel-posix.c b/qga/channel-posix.c +index 5a925a9..8fc205a 100644 +--- a/qga/channel-posix.c ++++ b/qga/channel-posix.c +@@ -215,7 +215,7 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path, + return false; + } + +- fd = socket_listen(addr, &local_err); ++ fd = socket_listen(addr, 1, &local_err); + qapi_free_SocketAddress(addr); + if (local_err != NULL) { + g_critical("%s", error_get_pretty(local_err)); +diff --git a/tests/test-util-sockets.c b/tests/test-util-sockets.c +index f1ebffe..c8e1893 100644 +--- a/tests/test-util-sockets.c ++++ b/tests/test-util-sockets.c +@@ -93,7 +93,7 @@ static void test_socket_fd_pass_name_good(void) + g_assert_cmpint(fd, !=, mon_fd); + close(fd); + +- fd = socket_listen(&addr, &error_abort); ++ fd = socket_listen(&addr, 1, &error_abort); + g_assert_cmpint(fd, !=, -1); + g_assert_cmpint(fd, !=, mon_fd); + close(fd); +@@ -124,7 +124,7 @@ static void test_socket_fd_pass_name_bad(void) + g_assert_cmpint(fd, ==, -1); + error_free_or_abort(&err); + +- fd = socket_listen(&addr, &err); ++ fd = socket_listen(&addr, 1, &err); + g_assert_cmpint(fd, ==, -1); + error_free_or_abort(&err); + +@@ -151,7 +151,7 @@ static void test_socket_fd_pass_name_nomon(void) + g_assert_cmpint(fd, ==, -1); + error_free_or_abort(&err); + +- fd = socket_listen(&addr, &err); ++ fd = socket_listen(&addr, 1, &err); + g_assert_cmpint(fd, ==, -1); + error_free_or_abort(&err); + +@@ -174,7 +174,7 @@ static void test_socket_fd_pass_num_good(void) + fd = socket_connect(&addr, &error_abort); + g_assert_cmpint(fd, ==, sfd); + +- fd = socket_listen(&addr, &error_abort); ++ fd = socket_listen(&addr, 1, &error_abort); + g_assert_cmpint(fd, ==, sfd); + + g_free(addr.u.fd.str); +@@ -197,7 +197,7 @@ static void test_socket_fd_pass_num_bad(void) + g_assert_cmpint(fd, ==, -1); + error_free_or_abort(&err); + +- fd = socket_listen(&addr, &err); ++ fd = socket_listen(&addr, 1, &err); + g_assert_cmpint(fd, ==, -1); + error_free_or_abort(&err); + +@@ -220,7 +220,7 @@ static void test_socket_fd_pass_num_nocli(void) + g_assert_cmpint(fd, ==, -1); + error_free_or_abort(&err); + +- fd = socket_listen(&addr, &err); ++ fd = socket_listen(&addr, 1, &err); + g_assert_cmpint(fd, ==, -1); + error_free_or_abort(&err); + +diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c +index a5092db..a39ada4 100644 +--- a/util/qemu-sockets.c ++++ b/util/qemu-sockets.c +@@ -31,6 +31,7 @@ + #include "qapi/qobject-input-visitor.h" + #include "qapi/qobject-output-visitor.h" + #include "qemu/cutils.h" ++#include "trace.h" + + #ifndef AI_ADDRCONFIG + # define AI_ADDRCONFIG 0 +@@ -207,6 +208,7 @@ static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) + + static int inet_listen_saddr(InetSocketAddress *saddr, + int port_offset, ++ int num, + Error **errp) + { + struct addrinfo ai,*res,*e; +@@ -303,7 +305,7 @@ static int inet_listen_saddr(InetSocketAddress *saddr, + goto listen_failed; + } + } else { +- if (!listen(slisten, 1)) { ++ if (!listen(slisten, num)) { + goto listen_ok; + } + if (errno != EADDRINUSE) { +@@ -746,6 +748,7 @@ static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp) + } + + static int vsock_listen_saddr(VsockSocketAddress *vaddr, ++ int num, + Error **errp) + { + struct sockaddr_vm svm; +@@ -767,7 +770,7 @@ static int vsock_listen_saddr(VsockSocketAddress *vaddr, + return -1; + } + +- if (listen(slisten, 1) != 0) { ++ if (listen(slisten, num) != 0) { + error_setg_errno(errp, errno, "Failed to listen on socket"); + closesocket(slisten); + return -1; +@@ -808,6 +811,7 @@ static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp) + } + + static int vsock_listen_saddr(VsockSocketAddress *vaddr, ++ int num, + Error **errp) + { + vsock_unsupported(errp); +@@ -825,6 +829,7 @@ static int vsock_parse(VsockSocketAddress *addr, const char *str, + #ifndef _WIN32 + + static int unix_listen_saddr(UnixSocketAddress *saddr, ++ int num, + Error **errp) + { + struct sockaddr_un un; +@@ -886,7 +891,7 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, + error_setg_errno(errp, errno, "Failed to bind socket to %s", path); + goto err; + } +- if (listen(sock, 1) < 0) { ++ if (listen(sock, num) < 0) { + error_setg_errno(errp, errno, "Failed to listen on socket"); + goto err; + } +@@ -953,6 +958,7 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp) + #else + + static int unix_listen_saddr(UnixSocketAddress *saddr, ++ int num, + Error **errp) + { + error_setg(errp, "unix sockets are not available on windows"); +@@ -976,7 +982,7 @@ int unix_listen(const char *str, Error **errp) + + saddr = g_new0(UnixSocketAddress, 1); + saddr->path = g_strdup(str); +- sock = unix_listen_saddr(saddr, errp); ++ sock = unix_listen_saddr(saddr, 1, errp); + qapi_free_UnixSocketAddress(saddr); + return sock; + } +@@ -1033,9 +1039,13 @@ fail: + return NULL; + } + +-static int socket_get_fd(const char *fdstr, Error **errp) ++static int socket_get_fd(const char *fdstr, int num, Error **errp) + { + int fd; ++ if (num != 1) { ++ error_setg_errno(errp, EINVAL, "socket_get_fd: too many connections"); ++ return -1; ++ } + if (cur_mon) { + fd = monitor_get_fd(cur_mon, fdstr, errp); + if (fd < 0) { +@@ -1071,7 +1081,7 @@ int socket_connect(SocketAddress *addr, Error **errp) + break; + + case SOCKET_ADDRESS_TYPE_FD: +- fd = socket_get_fd(addr->u.fd.str, errp); ++ fd = socket_get_fd(addr->u.fd.str, 1, errp); + break; + + case SOCKET_ADDRESS_TYPE_VSOCK: +@@ -1084,25 +1094,26 @@ int socket_connect(SocketAddress *addr, Error **errp) + return fd; + } + +-int socket_listen(SocketAddress *addr, Error **errp) ++int socket_listen(SocketAddress *addr, int num, Error **errp) + { + int fd; + ++ trace_socket_listen(num); + switch (addr->type) { + case SOCKET_ADDRESS_TYPE_INET: +- fd = inet_listen_saddr(&addr->u.inet, 0, errp); ++ fd = inet_listen_saddr(&addr->u.inet, 0, num, errp); + break; + + case SOCKET_ADDRESS_TYPE_UNIX: +- fd = unix_listen_saddr(&addr->u.q_unix, errp); ++ fd = unix_listen_saddr(&addr->u.q_unix, num, errp); + break; + + case SOCKET_ADDRESS_TYPE_FD: +- fd = socket_get_fd(addr->u.fd.str, errp); ++ fd = socket_get_fd(addr->u.fd.str, num, errp); + break; + + case SOCKET_ADDRESS_TYPE_VSOCK: +- fd = vsock_listen_saddr(&addr->u.vsock, errp); ++ fd = vsock_listen_saddr(&addr->u.vsock, num, errp); + break; + + default: +diff --git a/util/trace-events b/util/trace-events +index 9dbd237..83b6639 100644 +--- a/util/trace-events ++++ b/util/trace-events +@@ -64,6 +64,9 @@ lockcnt_futex_wait(const void *lockcnt, int val) "lockcnt %p waiting on %d" + lockcnt_futex_wait_resume(const void *lockcnt, int new) "lockcnt %p after wait: %d" + lockcnt_futex_wake(const void *lockcnt) "lockcnt %p waking up one waiter" + ++# qemu-sockets.c ++socket_listen(int num) "backlog: %d" ++ + # qemu-thread-common.h + qemu_mutex_lock(void *mutex, const char *file, const int line) "waiting on mutex %p (%s:%d)" + qemu_mutex_locked(void *mutex, const char *file, const int line) "taken mutex %p (%s:%d)" +-- +1.8.3.1 + diff --git a/kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch b/kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch new file mode 100644 index 0000000..0512189 --- /dev/null +++ b/kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch @@ -0,0 +1,144 @@ +From d2bb195f057fd21444644d3996551fe8775043e5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 4 Sep 2019 10:26:04 +0100 +Subject: [PATCH 4/8] socket: Add num connections to qio_channel_socket_async() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juan Quintela +Message-id: <20190904102606.15744-4-quintela@redhat.com> +Patchwork-id: 90271 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 3/5] socket: Add num connections to qio_channel_socket_async() +Bugzilla: 1726898 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Danilo de Paula + +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Juan Quintela +(cherry picked from commit 7959e29ea0d6100038367beff9a0da0c83b322a2) +Signed-off-by: Juan Quintela +Signed-off-by: Danilo C. L. de Paula +--- + include/io/channel-socket.h | 2 ++ + io/channel-socket.c | 30 +++++++++++++++++++++++------- + io/trace-events | 2 +- + tests/test-io-channel-socket.c | 2 +- + 4 files changed, 27 insertions(+), 9 deletions(-) + +diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h +index ed88e5b..777ff59 100644 +--- a/include/io/channel-socket.h ++++ b/include/io/channel-socket.h +@@ -140,6 +140,7 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, + * qio_channel_socket_listen_async: + * @ioc: the socket channel object + * @addr: the address to listen to ++ * @num: the expected ammount of connections + * @callback: the function to invoke on completion + * @opaque: user data to pass to @callback + * @destroy: the function to free @opaque +@@ -155,6 +156,7 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, + */ + void qio_channel_socket_listen_async(QIOChannelSocket *ioc, + SocketAddress *addr, ++ int num, + QIOTaskFunc callback, + gpointer opaque, + GDestroyNotify destroy, +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 6258c25..b74f5b9 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -220,14 +220,27 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, + } + + ++struct QIOChannelListenWorkerData { ++ SocketAddress *addr; ++ int num; /* amount of expected connections */ ++}; ++ ++static void qio_channel_listen_worker_free(gpointer opaque) ++{ ++ struct QIOChannelListenWorkerData *data = opaque; ++ ++ qapi_free_SocketAddress(data->addr); ++ g_free(data); ++} ++ + static void qio_channel_socket_listen_worker(QIOTask *task, + gpointer opaque) + { + QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task)); +- SocketAddress *addr = opaque; ++ struct QIOChannelListenWorkerData *data = opaque; + Error *err = NULL; + +- qio_channel_socket_listen_sync(ioc, addr, 1, &err); ++ qio_channel_socket_listen_sync(ioc, data->addr, data->num, &err); + + qio_task_set_error(task, err); + } +@@ -235,6 +248,7 @@ static void qio_channel_socket_listen_worker(QIOTask *task, + + void qio_channel_socket_listen_async(QIOChannelSocket *ioc, + SocketAddress *addr, ++ int num, + QIOTaskFunc callback, + gpointer opaque, + GDestroyNotify destroy, +@@ -242,16 +256,18 @@ void qio_channel_socket_listen_async(QIOChannelSocket *ioc, + { + QIOTask *task = qio_task_new( + OBJECT(ioc), callback, opaque, destroy); +- SocketAddress *addrCopy; ++ struct QIOChannelListenWorkerData *data; + +- addrCopy = QAPI_CLONE(SocketAddress, addr); ++ data = g_new0(struct QIOChannelListenWorkerData, 1); ++ data->addr = QAPI_CLONE(SocketAddress, addr); ++ data->num = num; + + /* socket_listen() blocks in DNS lookups, so we must use a thread */ +- trace_qio_channel_socket_listen_async(ioc, addr); ++ trace_qio_channel_socket_listen_async(ioc, addr, num); + qio_task_run_in_thread(task, + qio_channel_socket_listen_worker, +- addrCopy, +- (GDestroyNotify)qapi_free_SocketAddress, ++ data, ++ qio_channel_listen_worker_free, + context); + } + +diff --git a/io/trace-events b/io/trace-events +index 2e6aa1d..d7bc70b 100644 +--- a/io/trace-events ++++ b/io/trace-events +@@ -18,7 +18,7 @@ qio_channel_socket_connect_async(void *ioc, void *addr) "Socket connect async io + qio_channel_socket_connect_fail(void *ioc) "Socket connect fail ioc=%p" + qio_channel_socket_connect_complete(void *ioc, int fd) "Socket connect complete ioc=%p fd=%d" + qio_channel_socket_listen_sync(void *ioc, void *addr, int num) "Socket listen sync ioc=%p addr=%p num=%d" +-qio_channel_socket_listen_async(void *ioc, void *addr) "Socket listen async ioc=%p addr=%p" ++qio_channel_socket_listen_async(void *ioc, void *addr, int num) "Socket listen async ioc=%p addr=%p num=%d" + qio_channel_socket_listen_fail(void *ioc) "Socket listen fail ioc=%p" + qio_channel_socket_listen_complete(void *ioc, int fd) "Socket listen complete ioc=%p fd=%d" + qio_channel_socket_dgram_sync(void *ioc, void *localAddr, void *remoteAddr) "Socket dgram sync ioc=%p localAddr=%p remoteAddr=%p" +diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c +index 6eebcee..50235c1 100644 +--- a/tests/test-io-channel-socket.c ++++ b/tests/test-io-channel-socket.c +@@ -113,7 +113,7 @@ static void test_io_channel_setup_async(SocketAddress *listen_addr, + + lioc = qio_channel_socket_new(); + qio_channel_socket_listen_async( +- lioc, listen_addr, ++ lioc, listen_addr, 1, + test_io_channel_complete, &data, NULL, NULL); + + g_main_loop_run(data.loop); +-- +1.8.3.1 + diff --git a/kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch b/kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch new file mode 100644 index 0000000..1cac5e6 --- /dev/null +++ b/kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch @@ -0,0 +1,173 @@ +From 307f4596dc70dcabac9da3ec680d377e1df21397 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 4 Sep 2019 10:26:03 +0100 +Subject: [PATCH 3/8] socket: Add num connections to qio_channel_socket_sync() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juan Quintela +Message-id: <20190904102606.15744-3-quintela@redhat.com> +Patchwork-id: 90272 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 2/5] socket: Add num connections to qio_channel_socket_sync() +Bugzilla: 1726898 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Danilo de Paula + +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Juan Quintela +(cherry picked from commit 4e2d8bf6f143138ad121545a7cf4525e36040039) +Signed-off-by: Juan Quintela +Signed-off-by: Danilo C. L. de Paula +--- + include/io/channel-socket.h | 2 ++ + io/channel-socket.c | 7 ++++--- + io/net-listener.c | 2 +- + io/trace-events | 2 +- + scsi/qemu-pr-helper.c | 3 ++- + tests/test-char.c | 4 ++-- + tests/test-io-channel-socket.c | 2 +- + tests/tpm-emu.c | 2 +- + 8 files changed, 14 insertions(+), 10 deletions(-) + +diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h +index d7134d2..ed88e5b 100644 +--- a/include/io/channel-socket.h ++++ b/include/io/channel-socket.h +@@ -123,6 +123,7 @@ void qio_channel_socket_connect_async(QIOChannelSocket *ioc, + * qio_channel_socket_listen_sync: + * @ioc: the socket channel object + * @addr: the address to listen to ++ * @num: the expected ammount of connections + * @errp: pointer to a NULL-initialized error object + * + * Attempt to listen to the address @addr. This method +@@ -132,6 +133,7 @@ void qio_channel_socket_connect_async(QIOChannelSocket *ioc, + */ + int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, + SocketAddress *addr, ++ int num, + Error **errp); + + /** +diff --git a/io/channel-socket.c b/io/channel-socket.c +index a533c8b..6258c25 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -197,12 +197,13 @@ void qio_channel_socket_connect_async(QIOChannelSocket *ioc, + + int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, + SocketAddress *addr, ++ int num, + Error **errp) + { + int fd; + +- trace_qio_channel_socket_listen_sync(ioc, addr); +- fd = socket_listen(addr, 1, errp); ++ trace_qio_channel_socket_listen_sync(ioc, addr, num); ++ fd = socket_listen(addr, num, errp); + if (fd < 0) { + trace_qio_channel_socket_listen_fail(ioc); + return -1; +@@ -226,7 +227,7 @@ static void qio_channel_socket_listen_worker(QIOTask *task, + SocketAddress *addr = opaque; + Error *err = NULL; + +- qio_channel_socket_listen_sync(ioc, addr, &err); ++ qio_channel_socket_listen_sync(ioc, addr, 1, &err); + + qio_task_set_error(task, err); + } +diff --git a/io/net-listener.c b/io/net-listener.c +index d8cfe52..dc81150 100644 +--- a/io/net-listener.c ++++ b/io/net-listener.c +@@ -82,7 +82,7 @@ int qio_net_listener_open_sync(QIONetListener *listener, + for (i = 0; i < nresaddrs; i++) { + QIOChannelSocket *sioc = qio_channel_socket_new(); + +- if (qio_channel_socket_listen_sync(sioc, resaddrs[i], ++ if (qio_channel_socket_listen_sync(sioc, resaddrs[i], 1, + err ? NULL : &err) == 0) { + success = true; + +diff --git a/io/trace-events b/io/trace-events +index 3783905..2e6aa1d 100644 +--- a/io/trace-events ++++ b/io/trace-events +@@ -17,7 +17,7 @@ qio_channel_socket_connect_sync(void *ioc, void *addr) "Socket connect sync ioc= + qio_channel_socket_connect_async(void *ioc, void *addr) "Socket connect async ioc=%p addr=%p" + qio_channel_socket_connect_fail(void *ioc) "Socket connect fail ioc=%p" + qio_channel_socket_connect_complete(void *ioc, int fd) "Socket connect complete ioc=%p fd=%d" +-qio_channel_socket_listen_sync(void *ioc, void *addr) "Socket listen sync ioc=%p addr=%p" ++qio_channel_socket_listen_sync(void *ioc, void *addr, int num) "Socket listen sync ioc=%p addr=%p num=%d" + qio_channel_socket_listen_async(void *ioc, void *addr) "Socket listen async ioc=%p addr=%p" + qio_channel_socket_listen_fail(void *ioc) "Socket listen fail ioc=%p" + qio_channel_socket_listen_complete(void *ioc, int fd) "Socket listen complete ioc=%p fd=%d" +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index a256ce4..a8a74d1 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -1005,7 +1005,8 @@ int main(int argc, char **argv) + .u.q_unix.path = socket_path, + }; + server_ioc = qio_channel_socket_new(); +- if (qio_channel_socket_listen_sync(server_ioc, &saddr, &local_err) < 0) { ++ if (qio_channel_socket_listen_sync(server_ioc, &saddr, ++ 1, &local_err) < 0) { + object_unref(OBJECT(server_ioc)); + error_report_err(local_err); + return 1; +diff --git a/tests/test-char.c b/tests/test-char.c +index f9440cd..af131fc 100644 +--- a/tests/test-char.c ++++ b/tests/test-char.c +@@ -666,7 +666,7 @@ char_socket_addr_to_opt_str(SocketAddress *addr, bool fd_pass, + char *optstr; + g_assert(!reconnect); + if (is_listen) { +- qio_channel_socket_listen_sync(ioc, addr, &error_abort); ++ qio_channel_socket_listen_sync(ioc, addr, 1, &error_abort); + } else { + qio_channel_socket_connect_sync(ioc, addr, &error_abort); + } +@@ -891,7 +891,7 @@ static void char_socket_client_test(gconstpointer opaque) + */ + ioc = qio_channel_socket_new(); + g_assert_nonnull(ioc); +- qio_channel_socket_listen_sync(ioc, config->addr, &error_abort); ++ qio_channel_socket_listen_sync(ioc, config->addr, 1, &error_abort); + addr = qio_channel_socket_get_local_address(ioc, &error_abort); + g_assert_nonnull(addr); + +diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c +index d2053c4..6eebcee 100644 +--- a/tests/test-io-channel-socket.c ++++ b/tests/test-io-channel-socket.c +@@ -57,7 +57,7 @@ static void test_io_channel_setup_sync(SocketAddress *listen_addr, + QIOChannelSocket *lioc; + + lioc = qio_channel_socket_new(); +- qio_channel_socket_listen_sync(lioc, listen_addr, &error_abort); ++ qio_channel_socket_listen_sync(lioc, listen_addr, 1, &error_abort); + + if (listen_addr->type == SOCKET_ADDRESS_TYPE_INET) { + SocketAddress *laddr = qio_channel_socket_get_local_address( +diff --git a/tests/tpm-emu.c b/tests/tpm-emu.c +index 125e697..c43ac4a 100644 +--- a/tests/tpm-emu.c ++++ b/tests/tpm-emu.c +@@ -76,7 +76,7 @@ void *tpm_emu_ctrl_thread(void *data) + QIOChannelSocket *lioc = qio_channel_socket_new(); + QIOChannel *ioc; + +- qio_channel_socket_listen_sync(lioc, s->addr, &error_abort); ++ qio_channel_socket_listen_sync(lioc, s->addr, 1, &error_abort); + + g_mutex_lock(&s->data_mutex); + s->data_cond_signal = true; +-- +1.8.3.1 + diff --git a/kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch b/kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch new file mode 100644 index 0000000..edadfe8 --- /dev/null +++ b/kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch @@ -0,0 +1,151 @@ +From c7029ffd110bdd4bab6847cd485898dbc5acf5f3 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 4 Sep 2019 10:26:05 +0100 +Subject: [PATCH 5/8] socket: Add num connections to + qio_net_listener_open_sync() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juan Quintela +Message-id: <20190904102606.15744-5-quintela@redhat.com> +Patchwork-id: 90269 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 4/5] socket: Add num connections to qio_net_listener_open_sync() +Bugzilla: 1726898 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Danilo de Paula + +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Juan Quintela +(cherry picked from commit fc8135c63021e8e442a7a894e5434f210614a73c) +Signed-off-by: Juan Quintela +Signed-off-by: Danilo C. L. de Paula +--- + blockdev-nbd.c | 2 +- + chardev/char-socket.c | 2 +- + include/io/net-listener.h | 2 ++ + io/net-listener.c | 3 ++- + migration/socket.c | 2 +- + qemu-nbd.c | 2 +- + ui/vnc.c | 4 ++-- + 7 files changed, 10 insertions(+), 7 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 66eebab..06041a2 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -102,7 +102,7 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, + qio_net_listener_set_name(nbd_server->listener, + "nbd-listener"); + +- if (qio_net_listener_open_sync(nbd_server->listener, addr, errp) < 0) { ++ if (qio_net_listener_open_sync(nbd_server->listener, addr, 1, errp) < 0) { + goto error; + } + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 7ca5d97..8c7c9da 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -1160,7 +1160,7 @@ static int qmp_chardev_open_socket_server(Chardev *chr, + qio_net_listener_set_name(s->listener, name); + g_free(name); + +- if (qio_net_listener_open_sync(s->listener, s->addr, errp) < 0) { ++ if (qio_net_listener_open_sync(s->listener, s->addr, 1, errp) < 0) { + object_unref(OBJECT(s->listener)); + s->listener = NULL; + return -1; +diff --git a/include/io/net-listener.h b/include/io/net-listener.h +index 8081ac5..fb10170 100644 +--- a/include/io/net-listener.h ++++ b/include/io/net-listener.h +@@ -95,6 +95,7 @@ void qio_net_listener_set_name(QIONetListener *listener, + * qio_net_listener_open_sync: + * @listener: the network listener object + * @addr: the address to listen on ++ * @num: the amount of expected connections + * @errp: pointer to a NULL initialized error object + * + * Synchronously open a listening connection on all +@@ -104,6 +105,7 @@ void qio_net_listener_set_name(QIONetListener *listener, + */ + int qio_net_listener_open_sync(QIONetListener *listener, + SocketAddress *addr, ++ int num, + Error **errp); + + /** +diff --git a/io/net-listener.c b/io/net-listener.c +index dc81150..5d8a226 100644 +--- a/io/net-listener.c ++++ b/io/net-listener.c +@@ -62,6 +62,7 @@ static gboolean qio_net_listener_channel_func(QIOChannel *ioc, + + int qio_net_listener_open_sync(QIONetListener *listener, + SocketAddress *addr, ++ int num, + Error **errp) + { + QIODNSResolver *resolver = qio_dns_resolver_get_instance(); +@@ -82,7 +83,7 @@ int qio_net_listener_open_sync(QIONetListener *listener, + for (i = 0; i < nresaddrs; i++) { + QIOChannelSocket *sioc = qio_channel_socket_new(); + +- if (qio_channel_socket_listen_sync(sioc, resaddrs[i], 1, ++ if (qio_channel_socket_listen_sync(sioc, resaddrs[i], num, + err ? NULL : &err) == 0) { + success = true; + +diff --git a/migration/socket.c b/migration/socket.c +index 98efdc0..e63f5e1 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -181,7 +181,7 @@ static void socket_start_incoming_migration(SocketAddress *saddr, + + qio_net_listener_set_name(listener, "migration-socket-listener"); + +- if (qio_net_listener_open_sync(listener, saddr, errp) < 0) { ++ if (qio_net_listener_open_sync(listener, saddr, 1, errp) < 0) { + object_unref(OBJECT(listener)); + return; + } +diff --git a/qemu-nbd.c b/qemu-nbd.c +index a8cb39e..e6a52bf 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -1054,7 +1054,7 @@ int main(int argc, char **argv) + server = qio_net_listener_new(); + if (socket_activation == 0) { + saddr = nbd_build_socket_address(sockpath, bindto, port); +- if (qio_net_listener_open_sync(server, saddr, &local_err) < 0) { ++ if (qio_net_listener_open_sync(server, saddr, 1, &local_err) < 0) { + object_unref(OBJECT(server)); + error_report_err(local_err); + exit(EXIT_FAILURE); +diff --git a/ui/vnc.c b/ui/vnc.c +index 933dc36..b042593 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -3760,7 +3760,7 @@ static int vnc_display_listen(VncDisplay *vd, + qio_net_listener_set_name(vd->listener, "vnc-listen"); + for (i = 0; i < nsaddr; i++) { + if (qio_net_listener_open_sync(vd->listener, +- saddr[i], ++ saddr[i], 1, + errp) < 0) { + return -1; + } +@@ -3775,7 +3775,7 @@ static int vnc_display_listen(VncDisplay *vd, + qio_net_listener_set_name(vd->wslistener, "vnc-ws-listen"); + for (i = 0; i < nwsaddr; i++) { + if (qio_net_listener_open_sync(vd->wslistener, +- wsaddr[i], ++ wsaddr[i], 1, + errp) < 0) { + return -1; + } +-- +1.8.3.1 + diff --git a/kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch b/kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch new file mode 100644 index 0000000..f2b6090 --- /dev/null +++ b/kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch @@ -0,0 +1,127 @@ +From 50cee68ce9dc31033969905cf0358d0f641d056a Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 4 Sep 2019 10:31:39 +0100 +Subject: [PATCH 8/8] spapr: Set compat mode in spapr_core_plug() + +RH-Author: Laurent Vivier +Message-id: <20190904103139.29870-3-lvivier@redhat.com> +Patchwork-id: 90276 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] spapr: Set compat mode in spapr_core_plug() +Bugzilla: 1744107 +RH-Acked-by: John Snow +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +A recent change in spapr_machine_reset() showed that resetting the compat +mode in spapr_machine_reset() for the boot vCPU and in spapr_cpu_reset() +for all other vCPUs was fragile. The fix was thus to reset the compat mode +for all vCPUs in spapr_machine_reset(), but we still have to propagate +it to hot-plugged CPUs. This is still performed from spapr_cpu_reset(), +hence resulting in ppc_set_compat() being called twice for every vCPU at +machine reset. Apart from wasting cycles, which isn't really an issue +during machine reset, this seems to indicate that spapr_cpu_reset() isn't +the best place to set the compat mode. + +A natural candidate for CPU-hotplug specific code is spapr_core_plug(). +Also, it sits in the same file as spapr_machine_reset() : this makes +it easier for someone who wants to know when the compat PVR is set. + +Call ppc_set_compat() from there. This doesn't need to be done for +initial vCPUs since the compat PVR is 0 and spapr_machine_reset() sets +the appropriate value later. No need to do this on manually added vCPUS +on the destination QEMU during migration since the compat PVR is +part of the migrated vCPU state. Both conditions can be checked with +spapr_drc_hotplugged(). + +Signed-off-by: Greg Kurz +Message-Id: <156701285312.499757.7807417667750711711.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit b1e815674343a171e51ce447495957e289091e9f) +Signed-off-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1744107 +BRANCH: rhel-av-8.1.0/master-4.1.0 +UPSTREAM: Merged +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 24 ++++++++++++++++-------- + hw/ppc/spapr_cpu_core.c | 7 ------- + 2 files changed, 16 insertions(+), 15 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 30bf7bb..41a6070 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1746,12 +1746,6 @@ static void spapr_machine_reset(MachineState *machine) + spapr_ovec_cleanup(spapr->ov5_cas); + spapr->ov5_cas = spapr_ovec_new(); + +- /* +- * reset compat_pvr for all CPUs +- * as qemu_devices_reset() is called before this, +- * it can't be propagated by spapr_cpu_reset() +- * from the first CPU to all the others +- */ + ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal); + } + +@@ -3826,6 +3820,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + CPUArchId *core_slot; + int index; + bool hotplugged = spapr_drc_hotplugged(dev); ++ int i; + + core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); + if (!core_slot) { +@@ -3859,13 +3854,26 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + core_slot->cpu = OBJECT(dev); + + if (smc->pre_2_10_has_unused_icps) { +- int i; +- + for (i = 0; i < cc->nr_threads; i++) { + cs = CPU(core->threads[i]); + pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index); + } + } ++ ++ /* ++ * Set compatibility mode to match the boot CPU, which was either set ++ * by the machine reset code or by CAS. ++ */ ++ if (hotplugged) { ++ for (i = 0; i < cc->nr_threads; i++) { ++ ppc_set_compat(core->threads[i], POWERPC_CPU(first_cpu)->compat_pvr, ++ &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ } ++ } + } + + static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index ae43c57..85f2746 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -39,13 +39,6 @@ static void spapr_cpu_reset(void *opaque) + * using an RTAS call */ + cs->halted = 1; + +- /* Set compatibility mode to match the boot CPU, which was either set +- * by the machine reset code or by CAS. This should never fail. +- * At startup the value is already set for all the CPUs +- * but we need this when we hotplug a new CPU +- */ +- ppc_set_compat(cpu, POWERPC_CPU(first_cpu)->compat_pvr, &error_abort); +- + env->spr[SPR_HIOR] = 0; + + lpcr = env->spr[SPR_LPCR]; +-- +1.8.3.1 + diff --git a/kvm-trace-Clarify-DTrace-SystemTap-help-message.patch b/kvm-trace-Clarify-DTrace-SystemTap-help-message.patch new file mode 100644 index 0000000..a3bfed4 --- /dev/null +++ b/kvm-trace-Clarify-DTrace-SystemTap-help-message.patch @@ -0,0 +1,80 @@ +From 5d45e3a5d2e2d929095489a37579c3b7fc059450 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Tue, 3 Sep 2019 14:21:10 +0100 +Subject: [PATCH 1/8] trace: Clarify DTrace/SystemTap help message +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190903142110.25673-2-philmd@redhat.com> +Patchwork-id: 90255 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] trace: Clarify DTrace/SystemTap help message +Bugzilla: 1516220 +RH-Acked-by: John Snow +RH-Acked-by: Peter Xu +RH-Acked-by: Stefan Hajnoczi + +Most tracing backends are implemented within QEMU, except the +DTrace/SystemTap backends. + +One side effect is when running 'qemu -trace help', an incomplete +list of trace events is displayed when using the DTrace/SystemTap +backends. + +This is partly due to trace events registered as modules with +trace_init(), and since the events are not used within QEMU, +the linker optimize and remove the unused modules (which is +OK in this particular case). +Currently only the events compiled in trace-root.o and in the +last trace.o member of libqemuutil.a are linked, resulting in +an incomplete list of events. + +To avoid confusion, improve the help message, recommending to +use the proper systemtap script to display the events list. + +Before: + + $ lm32-softmmu/qemu-system-lm32 -trace help 2>&1 | wc -l + 70 + +After: + + $ lm32-softmmu/qemu-system-lm32 -trace help + Run 'qemu-trace-stap list qemu-system-lm32' to print a list + of names of trace points with the DTrace/SystemTap backends. + + $ qemu-trace-stap list qemu-system-lm32 | wc -l + 1136 + +Signed-off-by: Philippe Mathieu-Daudé +Message-id: 20190823142203.5210-1-philmd@redhat.com +Message-Id: <20190823142203.5210-1-philmd@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 9f591a5d95e1969969632ab44cf35e505c8ddc3b) +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Danilo C. L. de Paula +--- + trace/control.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/trace/control.c b/trace/control.c +index 43fb786..d9cafc1 100644 +--- a/trace/control.c ++++ b/trace/control.c +@@ -165,6 +165,12 @@ void trace_list_events(void) + while ((ev = trace_event_iter_next(&iter)) != NULL) { + fprintf(stderr, "%s\n", trace_event_get_name(ev)); + } ++#ifdef CONFIG_TRACE_DTRACE ++ fprintf(stderr, "This list of names of trace points may be incomplete " ++ "when using the DTrace/SystemTap backends.\n" ++ "Run 'qemu-trace-stap list %s' to print the full list.\n", ++ error_get_progname()); ++#endif + } + + static void do_trace_enable_events(const char *line_buf) +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e1168f4..7618078 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 6%{?dist} +Release: 7%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -150,6 +150,22 @@ Patch33: kvm-memory-Split-zones-when-do-coalesced_io_del.patch Patch34: kvm-memory-Remove-has_coalesced_range-counter.patch # For bz#1743142 - Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28) Patch35: kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch +# For bz#1516220 - -trace help prints an incomplete list of trace events +Patch36: kvm-trace-Clarify-DTrace-SystemTap-help-message.patch +# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then +Patch37: kvm-socket-Add-backlog-parameter-to-socket_listen.patch +# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then +Patch38: kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch +# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then +Patch39: kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch +# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then +Patch40: kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch +# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then +Patch41: kvm-multifd-Use-number-of-channels-as-listen-backlog.patch +# For bz#1744107 - Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'" +Patch42: kvm-pseries-Fix-compat_pvr-on-reset.patch +# For bz#1744107 - Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'" +Patch43: kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch BuildRequires: wget BuildRequires: rpm-build @@ -1091,6 +1107,22 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Sep 06 2019 Danilo Cesar Lemes de Paula - 4.1.0-7.el8 +- kvm-trace-Clarify-DTrace-SystemTap-help-message.patch [bz#1516220] +- kvm-socket-Add-backlog-parameter-to-socket_listen.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch [bz#1726898] +- kvm-multifd-Use-number-of-channels-as-listen-backlog.patch [bz#1726898] +- kvm-pseries-Fix-compat_pvr-on-reset.patch [bz#1744107] +- kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch [bz#1744107] +- Resolves: bz#1516220 + (-trace help prints an incomplete list of trace events) +- Resolves: bz#1726898 + (Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then) +- Resolves: bz#1744107 + (Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'") + * Wed Sep 04 2019 Danilo Cesar Lemes de Paula - 4.1.0-6.el8 - kvm-memory-Refactor-memory_region_clear_coalescing.patch [bz#1743142] - kvm-memory-Split-zones-when-do-coalesced_io_del.patch [bz#1743142] From 1523562e21bc90eb6d69512ed0e0d0d77b0d6a59 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 9 Sep 2019 19:17:47 +0100 Subject: [PATCH 048/195] * Mon Sep 09 2019 Danilo Cesar Lemes de Paula - 4.1.0-8.el8 - kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch [bz#1747836] - kvm-ehci-fix-queue-dev-null-ptr-dereference.patch [bz#1746790] - kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch [bz#1743477] - kvm-file-posix-Handle-undetectable-alignment.patch [bz#1749134] - kvm-block-posix-Always-allocate-the-first-block.patch [bz#1749134] - kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch [bz#1749134] - Resolves: bz#1743477 (Since bd94bc06479a "spapr: change default interrupt mode to 'dual'", QEMU resets the machine to select the appropriate interrupt controller. And -no-reboot prevents that.) - Resolves: bz#1746790 (qemu core dump while migrate from RHEL7.6 to RHEL8.1) - Resolves: bz#1747836 (Call traces after guest migration due to incorrect handling of the timebase) - Resolves: bz#1749134 (I/O error when virtio-blk disk is backed by a raw image on 4k disk) --- ...osix-Always-allocate-the-first-block.patch | 386 ++++++++++++++++++ ...i-fix-queue-dev-null-ptr-dereference.patch | 50 +++ ...-posix-Handle-undetectable-alignment.patch | 129 ++++++ ...t-allocate_first_block-with-O_DIRECT.patch | 109 +++++ ...-re-read-the-clock-on-pre_save-in-ca.patch | 101 +++++ ...WN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch | 60 +++ qemu-kvm.spec | 30 +- 7 files changed, 864 insertions(+), 1 deletion(-) create mode 100644 kvm-block-posix-Always-allocate-the-first-block.patch create mode 100644 kvm-ehci-fix-queue-dev-null-ptr-dereference.patch create mode 100644 kvm-file-posix-Handle-undetectable-alignment.patch create mode 100644 kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch create mode 100644 kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch create mode 100644 kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch diff --git a/kvm-block-posix-Always-allocate-the-first-block.patch b/kvm-block-posix-Always-allocate-the-first-block.patch new file mode 100644 index 0000000..6b4229c --- /dev/null +++ b/kvm-block-posix-Always-allocate-the-first-block.patch @@ -0,0 +1,386 @@ +From 58dc8ae23325384b0d9494d203254dc2f6a99255 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 9 Sep 2019 07:38:21 +0100 +Subject: [PATCH 5/6] block: posix: Always allocate the first block + +RH-Author: Max Reitz +Message-id: <20190909073822.26191-3-mreitz@redhat.com> +Patchwork-id: 90333 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/3] block: posix: Always allocate the first block +Bugzilla: 1749134 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth +RH-Acked-by: Kevin Wolf + +From: Nir Soffer + +When creating an image with preallocation "off" or "falloc", the first +block of the image is typically not allocated. When using Gluster +storage backed by XFS filesystem, reading this block using direct I/O +succeeds regardless of request length, fooling alignment detection. + +In this case we fallback to a safe value (4096) instead of the optimal +value (512), which may lead to unneeded data copying when aligning +requests. Allocating the first block avoids the fallback. + +Since we allocate the first block even with preallocation=off, we no +longer create images with zero disk size: + + $ ./qemu-img create -f raw test.raw 1g + Formatting 'test.raw', fmt=raw size=1073741824 + + $ ls -lhs test.raw + 4.0K -rw-r--r--. 1 nsoffer nsoffer 1.0G Aug 16 23:48 test.raw + +And converting the image requires additional cluster: + + $ ./qemu-img measure -f raw -O qcow2 test.raw + required size: 458752 + fully allocated size: 1074135040 + +When using format like vmdk with multiple files per image, we allocate +one block per file: + + $ ./qemu-img create -f vmdk -o subformat=twoGbMaxExtentFlat test.vmdk 4g + Formatting 'test.vmdk', fmt=vmdk size=4294967296 compat6=off hwversion=undefined subformat=twoGbMaxExtentFlat + + $ ls -lhs test*.vmdk + 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f001.vmdk + 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f002.vmdk + 4.0K -rw-r--r--. 1 nsoffer nsoffer 353 Aug 27 03:23 test.vmdk + +I did quick performance test for copying disks with qemu-img convert to +new raw target image to Gluster storage with sector size of 512 bytes: + + for i in $(seq 10); do + rm -f dst.raw + sleep 10 + time ./qemu-img convert -f raw -O raw -t none -T none src.raw dst.raw + done + +Here is a table comparing the total time spent: + +Type Before(s) After(s) Diff(%) +--------------------------------------- +real 530.028 469.123 -11.4 +user 17.204 10.768 -37.4 +sys 17.881 7.011 -60.7 + +We can see very clear improvement in CPU usage. + +Signed-off-by: Nir Soffer +Message-id: 20190827010528.8818-2-nsoffer@redhat.com +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 3a20013fbb26d2a1bd11ef148eefdb1508783787) +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + block/file-posix.c | 51 ++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/059.out | 2 +- + tests/qemu-iotests/150.out | 11 --------- + tests/qemu-iotests/150.out.qcow2 | 11 +++++++++ + tests/qemu-iotests/150.out.raw | 12 ++++++++++ + tests/qemu-iotests/175 | 19 ++++++++++----- + tests/qemu-iotests/175.out | 8 +++---- + tests/qemu-iotests/178.out.qcow2 | 4 ++-- + tests/qemu-iotests/221.out | 12 ++++++---- + tests/qemu-iotests/253.out | 12 ++++++---- + 10 files changed, 110 insertions(+), 32 deletions(-) + delete mode 100644 tests/qemu-iotests/150.out + create mode 100644 tests/qemu-iotests/150.out.qcow2 + create mode 100644 tests/qemu-iotests/150.out.raw + +diff --git a/block/file-posix.c b/block/file-posix.c +index b8b4dad..8ea9889 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1749,6 +1749,43 @@ static int handle_aiocb_discard(void *opaque) + return ret; + } + ++/* ++ * Help alignment probing by allocating the first block. ++ * ++ * When reading with direct I/O from unallocated area on Gluster backed by XFS, ++ * reading succeeds regardless of request length. In this case we fallback to ++ * safe alignment which is not optimal. Allocating the first block avoids this ++ * fallback. ++ * ++ * fd may be opened with O_DIRECT, but we don't know the buffer alignment or ++ * request alignment, so we use safe values. ++ * ++ * Returns: 0 on success, -errno on failure. Since this is an optimization, ++ * caller may ignore failures. ++ */ ++static int allocate_first_block(int fd, size_t max_size) ++{ ++ size_t write_size = (max_size < MAX_BLOCKSIZE) ++ ? BDRV_SECTOR_SIZE ++ : MAX_BLOCKSIZE; ++ size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); ++ void *buf; ++ ssize_t n; ++ int ret; ++ ++ buf = qemu_memalign(max_align, write_size); ++ memset(buf, 0, write_size); ++ ++ do { ++ n = pwrite(fd, buf, write_size, 0); ++ } while (n == -1 && errno == EINTR); ++ ++ ret = (n == -1) ? -errno : 0; ++ ++ qemu_vfree(buf); ++ return ret; ++} ++ + static int handle_aiocb_truncate(void *opaque) + { + RawPosixAIOData *aiocb = opaque; +@@ -1788,6 +1825,17 @@ static int handle_aiocb_truncate(void *opaque) + /* posix_fallocate() doesn't set errno. */ + error_setg_errno(errp, -result, + "Could not preallocate new data"); ++ } else if (current_length == 0) { ++ /* ++ * posix_fallocate() uses fallocate() if the filesystem ++ * supports it, or fallback to manually writing zeroes. If ++ * fallocate() was used, unaligned reads from the fallocated ++ * area in raw_probe_alignment() will succeed, hence we need to ++ * allocate the first block. ++ * ++ * Optimize future alignment probing; ignore failures. ++ */ ++ allocate_first_block(fd, offset); + } + } else { + result = 0; +@@ -1849,6 +1897,9 @@ static int handle_aiocb_truncate(void *opaque) + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); ++ } else if (current_length == 0 && offset > current_length) { ++ /* Optimize future alignment probing; ignore failures. */ ++ allocate_first_block(fd, offset); + } + return result; + default: +diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out +index 4fab42a..fe3f861 100644 +--- a/tests/qemu-iotests/059.out ++++ b/tests/qemu-iotests/059.out +@@ -27,7 +27,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824000 subformat=twoGbMax + image: TEST_DIR/t.vmdk + file format: vmdk + virtual size: 0.977 TiB (1073741824000 bytes) +-disk size: 16 KiB ++disk size: 1.97 MiB + Format specific information: + cid: XXXXXXXX + parent cid: XXXXXXXX +diff --git a/tests/qemu-iotests/150.out b/tests/qemu-iotests/150.out +deleted file mode 100644 +index 2a54e8d..0000000 +--- a/tests/qemu-iotests/150.out ++++ /dev/null +@@ -1,11 +0,0 @@ +-QA output created by 150 +- +-=== Mapping sparse conversion === +- +-Offset Length File +- +-=== Mapping non-sparse conversion === +- +-Offset Length File +-0 0x100000 TEST_DIR/t.IMGFMT +-*** done +diff --git a/tests/qemu-iotests/150.out.qcow2 b/tests/qemu-iotests/150.out.qcow2 +new file mode 100644 +index 0000000..2a54e8d +--- /dev/null ++++ b/tests/qemu-iotests/150.out.qcow2 +@@ -0,0 +1,11 @@ ++QA output created by 150 ++ ++=== Mapping sparse conversion === ++ ++Offset Length File ++ ++=== Mapping non-sparse conversion === ++ ++Offset Length File ++0 0x100000 TEST_DIR/t.IMGFMT ++*** done +diff --git a/tests/qemu-iotests/150.out.raw b/tests/qemu-iotests/150.out.raw +new file mode 100644 +index 0000000..3cdc772 +--- /dev/null ++++ b/tests/qemu-iotests/150.out.raw +@@ -0,0 +1,12 @@ ++QA output created by 150 ++ ++=== Mapping sparse conversion === ++ ++Offset Length File ++0 0x1000 TEST_DIR/t.IMGFMT ++ ++=== Mapping non-sparse conversion === ++ ++Offset Length File ++0 0x100000 TEST_DIR/t.IMGFMT ++*** done +diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 +index 51e62c8..7ba28b3 100755 +--- a/tests/qemu-iotests/175 ++++ b/tests/qemu-iotests/175 +@@ -37,14 +37,16 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 + # the file size. This function hides the resulting difference in the + # stat -c '%b' output. + # Parameter 1: Number of blocks an empty file occupies +-# Parameter 2: Image size in bytes ++# Parameter 2: Minimal number of blocks in an image ++# Parameter 3: Image size in bytes + _filter_blocks() + { + extra_blocks=$1 +- img_size=$2 ++ min_blocks=$2 ++ img_size=$3 + +- sed -e "s/blocks=$extra_blocks\\(\$\\|[^0-9]\\)/nothing allocated/" \ +- -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/everything allocated/" ++ sed -e "s/blocks=$min_blocks\\(\$\\|[^0-9]\\)/min allocation/" \ ++ -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/max allocation/" + } + + # get standard environment, filters and checks +@@ -60,16 +62,21 @@ size=$((1 * 1024 * 1024)) + touch "$TEST_DIR/empty" + extra_blocks=$(stat -c '%b' "$TEST_DIR/empty") + ++# We always write the first byte; check how many blocks this filesystem ++# allocates to match empty image alloation. ++printf "\0" > "$TEST_DIR/empty" ++min_blocks=$(stat -c '%b' "$TEST_DIR/empty") ++ + echo + echo "== creating image with default preallocation ==" + _make_test_img $size | _filter_imgfmt +-stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size ++stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size + + for mode in off full falloc; do + echo + echo "== creating image with preallocation $mode ==" + IMGOPTS=preallocation=$mode _make_test_img $size | _filter_imgfmt +- stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size ++ stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size + done + + # success, all done +diff --git a/tests/qemu-iotests/175.out b/tests/qemu-iotests/175.out +index 6d9a5ed..263e521 100644 +--- a/tests/qemu-iotests/175.out ++++ b/tests/qemu-iotests/175.out +@@ -2,17 +2,17 @@ QA output created by 175 + + == creating image with default preallocation == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +-size=1048576, nothing allocated ++size=1048576, min allocation + + == creating image with preallocation off == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=off +-size=1048576, nothing allocated ++size=1048576, min allocation + + == creating image with preallocation full == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=full +-size=1048576, everything allocated ++size=1048576, max allocation + + == creating image with preallocation falloc == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=falloc +-size=1048576, everything allocated ++size=1048576, max allocation + *** done +diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 +index 55a8dc9..9e7d8c4 100644 +--- a/tests/qemu-iotests/178.out.qcow2 ++++ b/tests/qemu-iotests/178.out.qcow2 +@@ -101,7 +101,7 @@ converted image file size in bytes: 196608 + == raw input image with data (human) == + + Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 +-required size: 393216 ++required size: 458752 + fully allocated size: 1074135040 + wrote 512/512 bytes at offset 512 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +@@ -257,7 +257,7 @@ converted image file size in bytes: 196608 + + Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 + { +- "required": 393216, ++ "required": 458752, + "fully-allocated": 1074135040 + } + wrote 512/512 bytes at offset 512 +diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out +index 9f9dd52..dca024a 100644 +--- a/tests/qemu-iotests/221.out ++++ b/tests/qemu-iotests/221.out +@@ -3,14 +3,18 @@ QA output created by 221 + === Check mapping of unaligned raw image === + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65537 +-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + wrote 1/1 bytes at offset 65536 + 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, + { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, + { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + *** done +diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out +index 607c0ba..3d08b30 100644 +--- a/tests/qemu-iotests/253.out ++++ b/tests/qemu-iotests/253.out +@@ -3,12 +3,16 @@ QA output created by 253 + === Check mapping of unaligned raw image === + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575 +-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + wrote 65535/65535 bytes at offset 983040 + 63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +-[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] + *** done +-- +1.8.3.1 + diff --git a/kvm-ehci-fix-queue-dev-null-ptr-dereference.patch b/kvm-ehci-fix-queue-dev-null-ptr-dereference.patch new file mode 100644 index 0000000..9adeaeb --- /dev/null +++ b/kvm-ehci-fix-queue-dev-null-ptr-dereference.patch @@ -0,0 +1,50 @@ +From 0b38614471dbc44b87a1d2449e602df50c3ff535 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 5 Sep 2019 08:50:37 +0100 +Subject: [PATCH 2/6] ehci: fix queue->dev null ptr dereference +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20190905085037.5648-2-dgilbert@redhat.com> +Patchwork-id: 90288 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/1] ehci: fix queue->dev null ptr dereference +Bugzilla: 1746790 +RH-Acked-by: Peter Xu +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Juan Quintela + +From: Gerd Hoffmann + +In case we don't have a device for an active queue, just skip +processing the queue (same we do for inactive queues) and log +a guest bug. + +Reported-by: Guenter Roeck +Signed-off-by: Gerd Hoffmann +Tested-by: Guenter Roeck +Message-id: 20190821085319.13711-1-kraxel@redhat.com +(cherry picked from commit 1be344b7ad25d572dadeee46d80f0103354352b2) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/hcd-ehci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index 62dab05..5f089f3 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -1834,6 +1834,9 @@ static int ehci_state_fetchqtd(EHCIQueue *q) + ehci_set_state(q->ehci, q->async, EST_EXECUTING); + break; + } ++ } else if (q->dev == NULL) { ++ ehci_trace_guest_bug(q->ehci, "no device attached to queue"); ++ ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH); + } else { + p = ehci_alloc_packet(q); + p->qtdaddr = q->qtdaddr; +-- +1.8.3.1 + diff --git a/kvm-file-posix-Handle-undetectable-alignment.patch b/kvm-file-posix-Handle-undetectable-alignment.patch new file mode 100644 index 0000000..f5f883b --- /dev/null +++ b/kvm-file-posix-Handle-undetectable-alignment.patch @@ -0,0 +1,129 @@ +From 790cebc2a1d8de8d93b2a2a0ef19e31c767f4f1c Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 9 Sep 2019 07:38:20 +0100 +Subject: [PATCH 4/6] file-posix: Handle undetectable alignment + +RH-Author: Max Reitz +Message-id: <20190909073822.26191-2-mreitz@redhat.com> +Patchwork-id: 90332 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/3] file-posix: Handle undetectable alignment +Bugzilla: 1749134 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth +RH-Acked-by: Kevin Wolf + +From: Nir Soffer + +In some cases buf_align or request_alignment cannot be detected: + +1. With Gluster, buf_align cannot be detected since the actual I/O is + done on Gluster server, and qemu buffer alignment does not matter. + Since we don't have alignment requirement, buf_align=1 is the best + value. + +2. With local XFS filesystem, buf_align cannot be detected if reading + from unallocated area. In this we must align the buffer, but we don't + know what is the correct size. Using the wrong alignment results in + I/O error. + +3. With Gluster backed by XFS, request_alignment cannot be detected if + reading from unallocated area. In this case we need to use the + correct alignment, and failing to do so results in I/O errors. + +4. With NFS, the server does not use direct I/O, so both buf_align cannot + be detected. In this case we don't need any alignment so we can use + buf_align=1 and request_alignment=1. + +These cases seems to work when storage sector size is 512 bytes, because +the current code starts checking align=512. If the check succeeds +because alignment cannot be detected we use 512. But this does not work +for storage with 4k sector size. + +To determine if we can detect the alignment, we probe first with +align=1. If probing succeeds, maybe there are no alignment requirement +(cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we +don't have any way to tell, we treat this as undetectable alignment. If +probing with align=1 fails with EINVAL, but probing with one of the +expected alignments succeeds, we know that we found a working alignment. + +Practically the alignment requirements are the same for buffer +alignment, buffer length, and offset in file. So in case we cannot +detect buf_align, we can use request alignment. If we cannot detect +request alignment, we can fallback to a safe value. To use this logic, +we probe first request alignment instead of buf_align. + +Here is a table showing the behaviour with current code (the value in +parenthesis is the optimal value). + +Case Sector buf_align (opt) request_alignment (opt) result + +Signed-off-by: Danilo C. L. de Paula +--- + block/file-posix.c | 36 +++++++++++++++++++++++++----------- + 1 file changed, 25 insertions(+), 11 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 4479cc7..b8b4dad 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -323,6 +323,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) + BDRVRawState *s = bs->opaque; + char *buf; + size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); ++ size_t alignments[] = {1, 512, 1024, 2048, 4096}; + + /* For SCSI generic devices the alignment is not really used. + With buffered I/O, we don't have any restrictions. */ +@@ -349,25 +350,38 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) + } + #endif + +- /* If we could not get the sizes so far, we can only guess them */ +- if (!s->buf_align) { ++ /* ++ * If we could not get the sizes so far, we can only guess them. First try ++ * to detect request alignment, since it is more likely to succeed. Then ++ * try to detect buf_align, which cannot be detected in some cases (e.g. ++ * Gluster). If buf_align cannot be detected, we fallback to the value of ++ * request_alignment. ++ */ ++ ++ if (!bs->bl.request_alignment) { ++ int i; + size_t align; +- buf = qemu_memalign(max_align, 2 * max_align); +- for (align = 512; align <= max_align; align <<= 1) { +- if (raw_is_io_aligned(fd, buf + align, max_align)) { +- s->buf_align = align; ++ buf = qemu_memalign(max_align, max_align); ++ for (i = 0; i < ARRAY_SIZE(alignments); i++) { ++ align = alignments[i]; ++ if (raw_is_io_aligned(fd, buf, align)) { ++ /* Fallback to safe value. */ ++ bs->bl.request_alignment = (align != 1) ? align : max_align; + break; + } + } + qemu_vfree(buf); + } + +- if (!bs->bl.request_alignment) { ++ if (!s->buf_align) { ++ int i; + size_t align; +- buf = qemu_memalign(s->buf_align, max_align); +- for (align = 512; align <= max_align; align <<= 1) { +- if (raw_is_io_aligned(fd, buf, align)) { +- bs->bl.request_alignment = align; ++ buf = qemu_memalign(max_align, 2 * max_align); ++ for (i = 0; i < ARRAY_SIZE(alignments); i++) { ++ align = alignments[i]; ++ if (raw_is_io_aligned(fd, buf + align, max_align)) { ++ /* Fallback to request_aligment. */ ++ s->buf_align = (align != 1) ? align : bs->bl.request_alignment; + break; + } + } +-- +1.8.3.1 + diff --git a/kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch b/kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch new file mode 100644 index 0000000..75c738d --- /dev/null +++ b/kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch @@ -0,0 +1,109 @@ +From b261b31812a3e89a9104fb33bb2339b1742ac494 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 9 Sep 2019 07:38:22 +0100 +Subject: [PATCH 6/6] iotests: Test allocate_first_block() with O_DIRECT + +RH-Author: Max Reitz +Message-id: <20190909073822.26191-4-mreitz@redhat.com> +Patchwork-id: 90334 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 3/3] iotests: Test allocate_first_block() with O_DIRECT +Bugzilla: 1749134 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth +RH-Acked-by: Kevin Wolf + +From: Nir Soffer + +Using block_resize we can test allocate_first_block() with file +descriptor opened with O_DIRECT, ensuring that it works for any size +larger than 4096 bytes. + +Testing smaller sizes is tricky as the result depends on the filesystem +used for testing. For example on NFS any size will work since O_DIRECT +does not require any alignment. + +Signed-off-by: Nir Soffer +Reviewed-by: Max Reitz +Message-id: 20190827010528.8818-3-nsoffer@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 7e3dc2ba9a11862d4e1a08325b9165f27a1b1e7c) +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/175 | 28 ++++++++++++++++++++++++++++ + tests/qemu-iotests/175.out | 8 ++++++++ + 2 files changed, 36 insertions(+) + +diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 +index 7ba28b3..55db280 100755 +--- a/tests/qemu-iotests/175 ++++ b/tests/qemu-iotests/175 +@@ -49,6 +49,23 @@ _filter_blocks() + -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/max allocation/" + } + ++# Resize image using block_resize. ++# Parameter 1: image path ++# Parameter 2: new size ++_block_resize() ++{ ++ local path=$1 ++ local size=$2 ++ ++ $QEMU -qmp stdio -nographic -nodefaults \ ++ -blockdev file,node-name=file,filename=$path,cache.direct=on \ ++ </dev/null ++ stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $new_size ++done ++ + # success, all done + echo "*** done" + rm -f $seq.full +diff --git a/tests/qemu-iotests/175.out b/tests/qemu-iotests/175.out +index 263e521..39c2ee0 100644 +--- a/tests/qemu-iotests/175.out ++++ b/tests/qemu-iotests/175.out +@@ -15,4 +15,12 @@ size=1048576, max allocation + == creating image with preallocation falloc == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=falloc + size=1048576, max allocation ++ ++== resize empty image with block_resize == ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 ++size=4096, min allocation ++ ++== resize empty image with block_resize == ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 ++size=1048576, min allocation + *** done +-- +1.8.3.1 + diff --git a/kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch b/kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch new file mode 100644 index 0000000..604a4bc --- /dev/null +++ b/kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch @@ -0,0 +1,101 @@ +From 8b0a69f11052d271ef49db0051d79e7f1a6102be Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Mon, 2 Sep 2019 04:20:32 +0100 +Subject: [PATCH 1/6] migration: Do not re-read the clock on pre_save in case + of paused guest +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20190902042032.25987-1-dgibson@redhat.com> +Patchwork-id: 90226 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] migration: Do not re-read the clock on pre_save in case of paused guest +Bugzilla: 1747836 +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier + +From: "Maxiwell S. Garcia" + +Re-read the timebase before migrate was ported from x86 commit: + 6053a86fe7bd: kvmclock: reduce kvmclock difference on migration + +The clock move makes the guest knows about the paused time between +the stop and migrate commands. This is an issue in an already-paused +VM because some side effects, like process stalls, could happen +after migration. + +So, this patch checks the runstate of guest in the pre_save handler and +do not re-reads the timebase in case of paused state (cold migration). + +Signed-off-by: Maxiwell S. Garcia +Message-Id: <20190711194702.26598-1-maxiwell@linux.ibm.com> +Signed-off-by: David Gibson +(cherry picked from commit d14f33976282a8744ca1bf1d64e73996c145aa3f) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1747836 +Branch: rhel8/rhel-av-8.1.0/master-4.1.0 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23282250 +Testing: Booted guest with this qemu + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/ppc.c | 13 +++++++++---- + target/ppc/cpu-qom.h | 1 + + 2 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c +index a9e508c..8572e45 100644 +--- a/hw/ppc/ppc.c ++++ b/hw/ppc/ppc.c +@@ -1008,6 +1008,8 @@ static void timebase_save(PPCTimebase *tb) + * there is no need to update it from KVM here + */ + tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset; ++ ++ tb->runstate_paused = runstate_check(RUN_STATE_PAUSED); + } + + static void timebase_load(PPCTimebase *tb) +@@ -1051,9 +1053,9 @@ void cpu_ppc_clock_vm_state_change(void *opaque, int running, + } + + /* +- * When migrating, read the clock just before migration, +- * so that the guest clock counts during the events +- * between: ++ * When migrating a running guest, read the clock just ++ * before migration, so that the guest clock counts ++ * during the events between: + * + * * vm_stop() + * * +@@ -1068,7 +1070,10 @@ static int timebase_pre_save(void *opaque) + { + PPCTimebase *tb = opaque; + +- timebase_save(tb); ++ /* guest_timebase won't be overridden in case of paused guest */ ++ if (!tb->runstate_paused) { ++ timebase_save(tb); ++ } + + return 0; + } +diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h +index be9b4c3..5fbcdee 100644 +--- a/target/ppc/cpu-qom.h ++++ b/target/ppc/cpu-qom.h +@@ -201,6 +201,7 @@ typedef struct PowerPCCPUClass { + typedef struct PPCTimebase { + uint64_t guest_timebase; + int64_t time_of_the_day_ns; ++ bool runstate_paused; + } PPCTimebase; + + extern const struct VMStateDescription vmstate_ppc_timebase; +-- +1.8.3.1 + diff --git a/kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch b/kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch new file mode 100644 index 0000000..8b3c06e --- /dev/null +++ b/kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch @@ -0,0 +1,60 @@ +From c8d3479746b17fcdf56b8afb3eccdba2c14578e8 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Fri, 6 Sep 2019 03:58:36 +0100 +Subject: [PATCH 3/6] spapr: Use SHUTDOWN_CAUSE_SUBSYSTEM_RESET for CAS reboots + +RH-Author: David Gibson +Message-id: <20190906035836.23689-1-dgibson@redhat.com> +Patchwork-id: 90293 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] spapr: Use SHUTDOWN_CAUSE_SUBSYSTEM_RESET for CAS reboots +Bugzilla: 1743477 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Danilo de Paula + +From: David Gibson + +The sPAPR platform includes feature negotiation between the guest and +platform. That sometimes requires reconfiguring the virtual hardware, and +in some cases that is a complex enough process that we trigger a system +reset to handle it. That interacts badly with -no-reboot - we trigger the +reboot, -no-reboot means we exit and so the guest never gets to try again. + +Eventually we want to get rid of CAS reboots entirely, since they're odd +and irritating for the user. But in the meantime we can fix the -no-reboot +problem by using SHUTDOWN_CAUSE_SUBSYSTEM_RESET which ignores -no-reboot +and seems to be designed for this sort of faux-reset for internal purposes +only. + +Signed-off-by: David Gibson +(cherry picked from commit 9146206eb26c1436c80a7c2ca1e4c5f86b27179d) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1743477 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23395494 +Branch: rhel-av-8.1.0/master-4.1.0 +Testing: Started a guest and verified that -no-reboot no longer + prevents the CAS reboot to negotiate XIVE support from + completing + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_hcall.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c +index 6808d4c..687bb7b 100644 +--- a/hw/ppc/spapr_hcall.c ++++ b/hw/ppc/spapr_hcall.c +@@ -1672,7 +1672,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + spapr_ovec_cleanup(ov5_updates); + + if (spapr->cas_reboot) { +- qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); ++ qemu_system_reset_request(SHUTDOWN_CAUSE_SUBSYSTEM_RESET); + } + + return H_SUCCESS; +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 7618078..b087571 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 7%{?dist} +Release: 8%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -166,6 +166,18 @@ Patch41: kvm-multifd-Use-number-of-channels-as-listen-backlog.patch Patch42: kvm-pseries-Fix-compat_pvr-on-reset.patch # For bz#1744107 - Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'" Patch43: kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch +# For bz#1747836 - Call traces after guest migration due to incorrect handling of the timebase +Patch44: kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch +# For bz#1746790 - qemu core dump while migrate from RHEL7.6 to RHEL8.1 +Patch45: kvm-ehci-fix-queue-dev-null-ptr-dereference.patch +# For bz#1743477 - Since bd94bc06479a "spapr: change default interrupt mode to 'dual'", QEMU resets the machine to select the appropriate interrupt controller. And -no-reboot prevents that. +Patch46: kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch +# For bz#1749134 - I/O error when virtio-blk disk is backed by a raw image on 4k disk +Patch47: kvm-file-posix-Handle-undetectable-alignment.patch +# For bz#1749134 - I/O error when virtio-blk disk is backed by a raw image on 4k disk +Patch48: kvm-block-posix-Always-allocate-the-first-block.patch +# For bz#1749134 - I/O error when virtio-blk disk is backed by a raw image on 4k disk +Patch49: kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch BuildRequires: wget BuildRequires: rpm-build @@ -1107,6 +1119,22 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Sep 09 2019 Danilo Cesar Lemes de Paula - 4.1.0-8.el8 +- kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch [bz#1747836] +- kvm-ehci-fix-queue-dev-null-ptr-dereference.patch [bz#1746790] +- kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch [bz#1743477] +- kvm-file-posix-Handle-undetectable-alignment.patch [bz#1749134] +- kvm-block-posix-Always-allocate-the-first-block.patch [bz#1749134] +- kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch [bz#1749134] +- Resolves: bz#1743477 + (Since bd94bc06479a "spapr: change default interrupt mode to 'dual'", QEMU resets the machine to select the appropriate interrupt controller. And -no-reboot prevents that.) +- Resolves: bz#1746790 + (qemu core dump while migrate from RHEL7.6 to RHEL8.1) +- Resolves: bz#1747836 + (Call traces after guest migration due to incorrect handling of the timebase) +- Resolves: bz#1749134 + (I/O error when virtio-blk disk is backed by a raw image on 4k disk) + * Fri Sep 06 2019 Danilo Cesar Lemes de Paula - 4.1.0-7.el8 - kvm-trace-Clarify-DTrace-SystemTap-help-message.patch [bz#1516220] - kvm-socket-Add-backlog-parameter-to-socket_listen.patch [bz#1726898] From 200e3560ab974816d225410fc5ff28afab4d2c76 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 10 Sep 2019 19:56:34 +0100 Subject: [PATCH 049/195] * Tue Sep 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-9.el8 - kvm-migration-always-initialise-ram_counters-for-a-new-m.patch [bz#1734316] - kvm-migration-add-qemu_file_update_transfer-interface.patch [bz#1734316] - kvm-migration-add-speed-limit-for-multifd-migration.patch [bz#1734316] - kvm-migration-update-ram_counters-for-multifd-sync-packe.patch [bz#1734316] - kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch [bz#1750200] - kvm-spapr-pci-Free-MSIs-during-reset.patch [bz#1750200] - Resolves: bz#1734316 (multifd migration does not honour speed limits, consumes entire bandwidth of NIC) - Resolves: bz#1750200 ([RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free)) --- ...-qemu_file_update_transfer-interface.patch | 63 ++++++++ ...dd-speed-limit-for-multifd-migration.patch | 141 ++++++++++++++++++ ...-initialise-ram_counters-for-a-new-m.patch | 139 +++++++++++++++++ ...-ram_counters-for-multifd-sync-packe.patch | 47 ++++++ ...ci-Consolidate-de-allocation-of-MSIs.patch | 103 +++++++++++++ kvm-spapr-pci-Free-MSIs-during-reset.patch | 56 +++++++ qemu-kvm.spec | 26 +++- 7 files changed, 574 insertions(+), 1 deletion(-) create mode 100644 kvm-migration-add-qemu_file_update_transfer-interface.patch create mode 100644 kvm-migration-add-speed-limit-for-multifd-migration.patch create mode 100644 kvm-migration-always-initialise-ram_counters-for-a-new-m.patch create mode 100644 kvm-migration-update-ram_counters-for-multifd-sync-packe.patch create mode 100644 kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch create mode 100644 kvm-spapr-pci-Free-MSIs-during-reset.patch diff --git a/kvm-migration-add-qemu_file_update_transfer-interface.patch b/kvm-migration-add-qemu_file_update_transfer-interface.patch new file mode 100644 index 0000000..db27e74 --- /dev/null +++ b/kvm-migration-add-qemu_file_update_transfer-interface.patch @@ -0,0 +1,63 @@ +From 1748253be11cbf12961274d4586671ce3803b006 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 4 Sep 2019 11:23:30 +0100 +Subject: [PATCH 2/6] migration: add qemu_file_update_transfer interface + +RH-Author: Juan Quintela +Message-id: <20190904112332.16160-3-quintela@redhat.com> +Patchwork-id: 90281 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH v2 2/4] migration: add qemu_file_update_transfer interface +Bugzilla: 1734316 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Danilo de Paula + +From: Ivan Ren + +Add qemu_file_update_transfer for just update bytes_xfer for speed +limitation. This will be used for further migration feature such as +multifd migration. + +Signed-off-by: Ivan Ren +Reviewed-by: Wei Yang +Reviewed-by: Juan Quintela +Message-Id: <1564464816-21804-2-git-send-email-ivanren@tencent.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5d7d2558631b4421826c60046c606584c58ab76c) +Signed-off-by: Danilo C. L. de Paula +--- + migration/qemu-file.c | 5 +++++ + migration/qemu-file.h | 1 + + 2 files changed, 6 insertions(+) + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 0431585..18f4805 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -615,6 +615,11 @@ void qemu_file_reset_rate_limit(QEMUFile *f) + f->bytes_xfer = 0; + } + ++void qemu_file_update_transfer(QEMUFile *f, int64_t len) ++{ ++ f->bytes_xfer += len; ++} ++ + void qemu_put_be16(QEMUFile *f, unsigned int v) + { + qemu_put_byte(f, v >> 8); +diff --git a/migration/qemu-file.h b/migration/qemu-file.h +index 13baf89..5de9fa2 100644 +--- a/migration/qemu-file.h ++++ b/migration/qemu-file.h +@@ -147,6 +147,7 @@ int qemu_peek_byte(QEMUFile *f, int offset); + void qemu_file_skip(QEMUFile *f, int size); + void qemu_update_position(QEMUFile *f, size_t size); + void qemu_file_reset_rate_limit(QEMUFile *f); ++void qemu_file_update_transfer(QEMUFile *f, int64_t len); + void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); + int64_t qemu_file_get_rate_limit(QEMUFile *f); + void qemu_file_set_error(QEMUFile *f, int ret); +-- +1.8.3.1 + diff --git a/kvm-migration-add-speed-limit-for-multifd-migration.patch b/kvm-migration-add-speed-limit-for-multifd-migration.patch new file mode 100644 index 0000000..90adb74 --- /dev/null +++ b/kvm-migration-add-speed-limit-for-multifd-migration.patch @@ -0,0 +1,141 @@ +From d2ade4bec79bdfe6f0867b0672c6731bc1664b42 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 4 Sep 2019 11:23:31 +0100 +Subject: [PATCH 3/6] migration: add speed limit for multifd migration + +RH-Author: Juan Quintela +Message-id: <20190904112332.16160-4-quintela@redhat.com> +Patchwork-id: 90279 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH v2 3/4] migration: add speed limit for multifd migration +Bugzilla: 1734316 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Danilo de Paula + +From: Ivan Ren + +Limit the speed of multifd migration through common speed limitation +qemu file. + +Signed-off-by: Ivan Ren +Message-Id: <1564464816-21804-3-git-send-email-ivanren@tencent.com> +Reviewed-by: Wei Yang +Reviewed-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1b81c974ccfd536aceef840e220912b142a7dda0) +Signed-off-by: Juan Quintela +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 889148d..88ddd2b 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -922,7 +922,7 @@ struct { + * false. + */ + +-static int multifd_send_pages(void) ++static int multifd_send_pages(RAMState *rs) + { + int i; + static int next_channel; +@@ -954,6 +954,7 @@ static int multifd_send_pages(void) + multifd_send_state->pages = p->pages; + p->pages = pages; + transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; ++ qemu_file_update_transfer(rs->f, transferred); + ram_counters.multifd_bytes += transferred; + ram_counters.transferred += transferred;; + qemu_mutex_unlock(&p->mutex); +@@ -962,7 +963,7 @@ static int multifd_send_pages(void) + return 1; + } + +-static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) ++static int multifd_queue_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) + { + MultiFDPages_t *pages = multifd_send_state->pages; + +@@ -981,12 +982,12 @@ static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) + } + } + +- if (multifd_send_pages() < 0) { ++ if (multifd_send_pages(rs) < 0) { + return -1; + } + + if (pages->block != block) { +- return multifd_queue_page(block, offset); ++ return multifd_queue_page(rs, block, offset); + } + + return 1; +@@ -1054,7 +1055,7 @@ void multifd_save_cleanup(void) + multifd_send_state = NULL; + } + +-static void multifd_send_sync_main(void) ++static void multifd_send_sync_main(RAMState *rs) + { + int i; + +@@ -1062,7 +1063,7 @@ static void multifd_send_sync_main(void) + return; + } + if (multifd_send_state->pages->used) { +- if (multifd_send_pages() < 0) { ++ if (multifd_send_pages(rs) < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return; + } +@@ -1083,6 +1084,7 @@ static void multifd_send_sync_main(void) + p->packet_num = multifd_send_state->packet_num++; + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; ++ qemu_file_update_transfer(rs->f, p->packet_len); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } +@@ -2079,7 +2081,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) + static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, + ram_addr_t offset) + { +- if (multifd_queue_page(block, offset) < 0) { ++ if (multifd_queue_page(rs, block, offset) < 0) { + return -1; + } + ram_counters.normal++; +@@ -3482,7 +3484,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + ram_control_before_iterate(f, RAM_CONTROL_SETUP); + ram_control_after_iterate(f, RAM_CONTROL_SETUP); + +- multifd_send_sync_main(); ++ multifd_send_sync_main(*rsp); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -3570,7 +3572,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- multifd_send_sync_main(); ++ multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + ram_counters.transferred += 8; +@@ -3629,7 +3631,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + + rcu_read_unlock(); + +- multifd_send_sync_main(); ++ multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +-- +1.8.3.1 + diff --git a/kvm-migration-always-initialise-ram_counters-for-a-new-m.patch b/kvm-migration-always-initialise-ram_counters-for-a-new-m.patch new file mode 100644 index 0000000..1280554 --- /dev/null +++ b/kvm-migration-always-initialise-ram_counters-for-a-new-m.patch @@ -0,0 +1,139 @@ +From 7ec124b3de4e7c7ef093955813a213d7a976d395 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 4 Sep 2019 11:23:29 +0100 +Subject: [PATCH 1/6] migration: always initialise ram_counters for a new + migration + +RH-Author: Juan Quintela +Message-id: <20190904112332.16160-2-quintela@redhat.com> +Patchwork-id: 90278 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH v2 1/4] migration: always initialise ram_counters for a new migration +Bugzilla: 1734316 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Danilo de Paula + +From: Ivan Ren + +This patch fix a multifd migration bug in migration speed calculation, this +problem can be reproduced as follows: +1. start a vm and give a heavy memory write stress to prevent the vm be + successfully migrated to destination +2. begin a migration with multifd +3. migrate for a long time [actually, this can be measured by transferred bytes] +4. migrate cancel +5. begin a new migration with multifd, the migration will directly run into + migration_completion phase + +Reason as follows: + +Migration update bandwidth and s->threshold_size in function +migration_update_counters after BUFFER_DELAY time: + + current_bytes = migration_total_bytes(s); + transferred = current_bytes - s->iteration_initial_bytes; + time_spent = current_time - s->iteration_start_time; + bandwidth = (double)transferred / time_spent; + s->threshold_size = bandwidth * s->parameters.downtime_limit; + +In multifd migration, migration_total_bytes function return +qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes. +s->iteration_initial_bytes will be initialized to 0 at every new migration, +but ram_counters is a global variable, and history migration data will be +accumulated. So if the ram_counters.multifd_bytes is big enough, it may lead +pending_size >= s->threshold_size become false in migration_iteration_run +after the first migration_update_counters. + +Signed-off-by: Ivan Ren +Reviewed-by: Juan Quintela +Reviewed-by: Wei Yang +Suggested-by: Wei Yang +Message-Id: <1564741121-1840-1-git-send-email-ivanren@tencent.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 87f3bd8717cd88932de302e215f1da51bfb8051a) +Signed-off-by: Juan Quintela +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 25 +++++++++++++++++++------ + migration/savevm.c | 1 + + 2 files changed, 20 insertions(+), 6 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index beffbfd..5299597 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1910,6 +1910,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + } + + migrate_init(s); ++ /* ++ * set ram_counters memory to zero for a ++ * new migration ++ */ ++ memset(&ram_counters, 0, sizeof(ram_counters)); + + return true; + } +@@ -3027,6 +3032,17 @@ static void migration_calculate_complete(MigrationState *s) + } + } + ++static void update_iteration_initial_status(MigrationState *s) ++{ ++ /* ++ * Update these three fields at the same time to avoid mismatch info lead ++ * wrong speed calculation. ++ */ ++ s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ s->iteration_initial_bytes = migration_total_bytes(s); ++ s->iteration_initial_pages = ram_get_total_transferred_pages(); ++} ++ + static void migration_update_counters(MigrationState *s, + int64_t current_time) + { +@@ -3062,9 +3078,7 @@ static void migration_update_counters(MigrationState *s, + + qemu_file_reset_rate_limit(s->to_dst_file); + +- s->iteration_start_time = current_time; +- s->iteration_initial_bytes = current_bytes; +- s->iteration_initial_pages = ram_get_total_transferred_pages(); ++ update_iteration_initial_status(s); + + trace_migrate_transferred(transferred, time_spent, + bandwidth, s->threshold_size); +@@ -3188,7 +3202,7 @@ static void *migration_thread(void *opaque) + rcu_register_thread(); + + object_ref(OBJECT(s)); +- s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ update_iteration_initial_status(s); + + qemu_savevm_state_header(s->to_dst_file); + +@@ -3253,8 +3267,7 @@ static void *migration_thread(void *opaque) + * the local variables. This is important to avoid + * breaking transferred_bytes and bandwidth calculation + */ +- s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +- s->iteration_initial_bytes = 0; ++ update_iteration_initial_status(s); + } + + current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +diff --git a/migration/savevm.c b/migration/savevm.c +index 79ed44d..480c511 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1424,6 +1424,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + } + + migrate_init(ms); ++ memset(&ram_counters, 0, sizeof(ram_counters)); + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +-- +1.8.3.1 + diff --git a/kvm-migration-update-ram_counters-for-multifd-sync-packe.patch b/kvm-migration-update-ram_counters-for-multifd-sync-packe.patch new file mode 100644 index 0000000..b22d984 --- /dev/null +++ b/kvm-migration-update-ram_counters-for-multifd-sync-packe.patch @@ -0,0 +1,47 @@ +From 0e42e63ddcaddd8837ba5ba1056380d4590754ee Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 4 Sep 2019 11:23:32 +0100 +Subject: [PATCH 4/6] migration: update ram_counters for multifd sync packet + +RH-Author: Juan Quintela +Message-id: <20190904112332.16160-5-quintela@redhat.com> +Patchwork-id: 90280 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH v2 4/4] migration: update ram_counters for multifd sync packet +Bugzilla: 1734316 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Danilo de Paula + +From: Ivan Ren + +Multifd sync will send MULTIFD_FLAG_SYNC flag info to destination, add +these bytes to ram_counters record. + +Signed-off-by: Ivan Ren +Suggested-by: Wei Yang +Message-Id: <1564464816-21804-4-git-send-email-ivanren@tencent.com> +Reviewed-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 81507f6b7e87468f24ed5886559feda15fe2db0c) +Signed-off-by: Juan Quintela +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 88ddd2b..20b6eeb 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1085,6 +1085,8 @@ static void multifd_send_sync_main(RAMState *rs) + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; + qemu_file_update_transfer(rs->f, p->packet_len); ++ ram_counters.multifd_bytes += p->packet_len; ++ ram_counters.transferred += p->packet_len; + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } +-- +1.8.3.1 + diff --git a/kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch b/kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch new file mode 100644 index 0000000..b9e727b --- /dev/null +++ b/kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch @@ -0,0 +1,103 @@ +From b27062f4b3ddf47dea926026e5511f15d5b31320 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 10 Sep 2019 07:04:27 +0100 +Subject: [PATCH 5/6] spapr/pci: Consolidate de-allocation of MSIs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20190910070428.28628-2-dgibson@redhat.com> +Patchwork-id: 90362 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/2] spapr/pci: Consolidate de-allocation of MSIs +Bugzilla: 1750200 +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier + +From: Greg Kurz + +When freeing MSIs, we need to: +- remove them from the machine's MSI bitmap +- remove them from the IC backend +- remove them from the PHB's MSI cache + +This is currently open coded in two places in rtas_ibm_change_msi(), +and we're about to need this in spapr_phb_reset() as well. Instead of +duplicating this code again, make it a destroy function for the PHB's +MSI cache. Removing an MSI device from the cache will call the destroy +function internally. + +Signed-off-by: Greg Kurz +Message-Id: <156415227855.1064338.5657793835271464648.stgit@bahia.lan> +Reviewed-by: Cédric Le Goater +Signed-off-by: David Gibson +(cherry picked from commit 078eb6b05b7f962e43d8bc376e0b96cdd550c17a) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1750200 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_pci.c | 24 +++++++++++++++--------- + 1 file changed, 15 insertions(+), 9 deletions(-) + +diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c +index 9003fe9..1ffcfae 100644 +--- a/hw/ppc/spapr_pci.c ++++ b/hw/ppc/spapr_pci.c +@@ -336,10 +336,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, + return; + } + +- if (!smc->legacy_irq_allocation) { +- spapr_irq_msi_free(spapr, msi->first_irq, msi->num); +- } +- spapr_irq_free(spapr, msi->first_irq, msi->num); + if (msi_present(pdev)) { + spapr_msi_setmsg(pdev, 0, false, 0, 0); + } +@@ -409,10 +405,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, + + /* Release previous MSIs */ + if (msi) { +- if (!smc->legacy_irq_allocation) { +- spapr_irq_msi_free(spapr, msi->first_irq, msi->num); +- } +- spapr_irq_free(spapr, msi->first_irq, msi->num); + g_hash_table_remove(phb->msi, &config_addr); + } + +@@ -1806,6 +1798,19 @@ static void spapr_phb_unrealize(DeviceState *dev, Error **errp) + memory_region_del_subregion(get_system_memory(), &sphb->mem32window); + } + ++static void spapr_phb_destroy_msi(gpointer opaque) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); ++ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); ++ spapr_pci_msi *msi = opaque; ++ ++ if (!smc->legacy_irq_allocation) { ++ spapr_irq_msi_free(spapr, msi->first_irq, msi->num); ++ } ++ spapr_irq_free(spapr, msi->first_irq, msi->num); ++ g_free(msi); ++} ++ + static void spapr_phb_realize(DeviceState *dev, Error **errp) + { + /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user +@@ -2017,7 +2022,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) + spapr_tce_get_iommu(tcet)); + } + +- sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); ++ sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, ++ spapr_phb_destroy_msi); + return; + + unrealize: +-- +1.8.3.1 + diff --git a/kvm-spapr-pci-Free-MSIs-during-reset.patch b/kvm-spapr-pci-Free-MSIs-during-reset.patch new file mode 100644 index 0000000..7be103a --- /dev/null +++ b/kvm-spapr-pci-Free-MSIs-during-reset.patch @@ -0,0 +1,56 @@ +From 26879f41a890a93beabefebb19c399561013a615 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 10 Sep 2019 07:04:28 +0100 +Subject: [PATCH 6/6] spapr/pci: Free MSIs during reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20190910070428.28628-3-dgibson@redhat.com> +Patchwork-id: 90363 +O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 2/2] spapr/pci: Free MSIs during reset +Bugzilla: 1750200 +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier + +From: Greg Kurz + +When the machine is reset, the MSI bitmap is cleared but the allocated +MSIs are not freed. Some operating systems, such as AIX, can detect the +previous configuration and assert. + +Empty the MSI cache, this performs the needed cleanup. + +Signed-off-by: Greg Kurz +Message-Id: <156415228410.1064338.4486161194061636096.stgit@bahia.lan> +Reviewed-by: Cédric Le Goater +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: David Gibson +(cherry picked from commit ea52074d3a1c5fbe70f3014dc1b1f2e7d5ced5de) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1750200 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_pci.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c +index 1ffcfae..128c981 100644 +--- a/hw/ppc/spapr_pci.c ++++ b/hw/ppc/spapr_pci.c +@@ -2078,6 +2078,8 @@ static void spapr_phb_reset(DeviceState *qdev) + if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) { + spapr_phb_vfio_reset(qdev); + } ++ ++ g_hash_table_remove_all(sphb->msi); + } + + static Property spapr_phb_properties[] = { +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b087571..d5882f2 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 8%{?dist} +Release: 9%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -178,6 +178,18 @@ Patch47: kvm-file-posix-Handle-undetectable-alignment.patch Patch48: kvm-block-posix-Always-allocate-the-first-block.patch # For bz#1749134 - I/O error when virtio-blk disk is backed by a raw image on 4k disk Patch49: kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch +# For bz#1734316 - multifd migration does not honour speed limits, consumes entire bandwidth of NIC +Patch50: kvm-migration-always-initialise-ram_counters-for-a-new-m.patch +# For bz#1734316 - multifd migration does not honour speed limits, consumes entire bandwidth of NIC +Patch51: kvm-migration-add-qemu_file_update_transfer-interface.patch +# For bz#1734316 - multifd migration does not honour speed limits, consumes entire bandwidth of NIC +Patch52: kvm-migration-add-speed-limit-for-multifd-migration.patch +# For bz#1734316 - multifd migration does not honour speed limits, consumes entire bandwidth of NIC +Patch53: kvm-migration-update-ram_counters-for-multifd-sync-packe.patch +# For bz#1750200 - [RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free) +Patch54: kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch +# For bz#1750200 - [RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free) +Patch55: kvm-spapr-pci-Free-MSIs-during-reset.patch BuildRequires: wget BuildRequires: rpm-build @@ -1119,6 +1131,18 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Sep 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-9.el8 +- kvm-migration-always-initialise-ram_counters-for-a-new-m.patch [bz#1734316] +- kvm-migration-add-qemu_file_update_transfer-interface.patch [bz#1734316] +- kvm-migration-add-speed-limit-for-multifd-migration.patch [bz#1734316] +- kvm-migration-update-ram_counters-for-multifd-sync-packe.patch [bz#1734316] +- kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch [bz#1750200] +- kvm-spapr-pci-Free-MSIs-during-reset.patch [bz#1750200] +- Resolves: bz#1734316 + (multifd migration does not honour speed limits, consumes entire bandwidth of NIC) +- Resolves: bz#1750200 + ([RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free)) + * Mon Sep 09 2019 Danilo Cesar Lemes de Paula - 4.1.0-8.el8 - kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch [bz#1747836] - kvm-ehci-fix-queue-dev-null-ptr-dereference.patch [bz#1746790] From a05903a90430157d986bd37a4151ebaefc4d49e4 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 16 Sep 2019 20:28:31 +0100 Subject: [PATCH 050/195] * Mon Sep 16 2019 Danilo Cesar Lemes de Paula - 4.1.0-10.el8 - kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch [bz#1748725] - kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch [bz#1746267] - kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch [bz#1717321] - kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749737] - Resolves: bz#1717321 (qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot) - Resolves: bz#1746267 (qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed) - Resolves: bz#1748725 ([ppc][migration][v6.3-rc1-p1ce8930]basic migration failed with "qemu-kvm: KVM_SET_DEVICE_ATTR failed: Group 3 attr 0x0000000000001309: Device or resource busy") - Resolves: bz#1749737 (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) --- ...er-m_free-might-read-pointers-from-a.patch | 61 ++++++++++ ...not-abort-if-a-block-driver-is-not-a.patch | 107 ++++++++++++++++++ ...-Mask-the-EAS-when-allocating-an-IRQ.patch | 63 +++++++++++ ...l-the-pending-BH-when-the-dataplane-.patch | 92 +++++++++++++++ qemu-kvm.spec | 24 +++- 5 files changed, 346 insertions(+), 1 deletion(-) create mode 100644 kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch create mode 100644 kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch create mode 100644 kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch create mode 100644 kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch diff --git a/kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch new file mode 100644 index 0000000..a342691 --- /dev/null +++ b/kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch @@ -0,0 +1,61 @@ +From 01e95b17878444859b15e79f7690d32a3532907e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Mon, 16 Sep 2019 17:07:00 +0100 +Subject: [PATCH 4/4] Using ip_deq after m_free might read pointers from an + allocation reuse. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190916170700.647-2-philmd@redhat.com> +Patchwork-id: 90470 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] Using ip_deq after m_free might read pointers from an allocation reuse. +Bugzilla: 1749737 +RH-Acked-by: Danilo de Paula +RH-Acked-by: John Snow + +From: Samuel Thibault + +This would be difficult to exploit, but that is still related with +CVE-2019-14378 which generates fragmented IP packets that would trigger this +issue and at least produce a DoS. + +Signed-off-by: Samuel Thibault +(cherry picked from libslirp commit c59279437eda91841b9d26079c70b8a540d41204) +Signed-off-by: Philippe Mathieu-Daudé + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/ip_input.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +index 8c75d91..df1c846 100644 +--- a/slirp/src/ip_input.c ++++ b/slirp/src/ip_input.c +@@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + */ + while (q != (struct ipasfrag *)&fp->frag_link && + ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; + i = (ip->ip_off + ip->ip_len) - q->ipf_off; + if (i < q->ipf_len) { + q->ipf_len -= i; +@@ -299,9 +300,11 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + m_adj(dtom(slirp, q), i); + break; + } ++ prev = q; + q = q->ipf_next; +- m_free(dtom(slirp, q->ipf_prev)); +- ip_deq(q->ipf_prev); ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); ++ + } + + insert: +-- +1.8.3.1 + diff --git a/kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch b/kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch new file mode 100644 index 0000000..4df148c --- /dev/null +++ b/kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch @@ -0,0 +1,107 @@ +From df8fadfd9450c8709864db44c2f676d40f323f95 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 13 Sep 2019 14:12:25 +0100 +Subject: [PATCH 2/4] block/create: Do not abort if a block driver is not + available +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190913141225.12022-2-philmd@redhat.com> +Patchwork-id: 90451 +O-Subject: [RHEL-7.7 qemu-kvm-rhev + RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/1] block/create: Do not abort if a block driver is not available +Bugzilla: 1746267 +RH-Acked-by: Kevin Wolf +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi + +The 'blockdev-create' QMP command was introduced as experimental +feature in commit b0292b851b8, using the assert() debug call. +It got promoted to 'stable' command in 3fb588a0f2c, but the +assert call was not removed. + +Some block drivers are optional, and bdrv_find_format() might +return a NULL value, triggering the assertion. + +Stable code is not expected to abort, so return an error instead. + +This is easily reproducible when libnfs is not installed: + + ./configure + [...] + module support no + Block whitelist (rw) + Block whitelist (ro) + libiscsi support yes + libnfs support no + [...] + +Start QEMU: + + $ qemu-system-x86_64 -S -qmp unix:/tmp/qemu.qmp,server,nowait + +Send the 'blockdev-create' with the 'nfs' driver: + + $ ( cat << 'EOF' + {'execute': 'qmp_capabilities'} + {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} + EOF + ) | socat STDIO UNIX:/tmp/qemu.qmp + {"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 4}, "package": "v4.1.0-733-g89ea03a7dc"}, "capabilities": ["oob"]}} + {"return": {}} + +QEMU crashes: + + $ gdb qemu-system-x86_64 core + Program received signal SIGSEGV, Segmentation fault. + (gdb) bt + #0 0x00007ffff510957f in raise () at /lib64/libc.so.6 + #1 0x00007ffff50f3895 in abort () at /lib64/libc.so.6 + #2 0x00007ffff50f3769 in _nl_load_domain.cold.0 () at /lib64/libc.so.6 + #3 0x00007ffff5101a26 in .annobin_assert.c_end () at /lib64/libc.so.6 + #4 0x0000555555d7e1f1 in qmp_blockdev_create (job_id=0x555556baee40 "x", options=0x555557666610, errp=0x7fffffffc770) at block/create.c:69 + #5 0x0000555555c96b52 in qmp_marshal_blockdev_create (args=0x7fffdc003830, ret=0x7fffffffc7f8, errp=0x7fffffffc7f0) at qapi/qapi-commands-block-core.c:1314 + #6 0x0000555555deb0a0 in do_qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false, errp=0x7fffffffc898) at qapi/qmp-dispatch.c:131 + #7 0x0000555555deb2a1 in qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false) at qapi/qmp-dispatch.c:174 + +With this patch applied, QEMU returns a QMP error: + + {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} + {"id": "x", "error": {"class": "GenericError", "desc": "Block driver 'nfs' not found or not supported"}} + +Cc: qemu-stable@nongnu.org +Reported-by: Xu Tian +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Blake +Reviewed-by: John Snow +Signed-off-by: Kevin Wolf +(cherry picked from commit d90d5cae2b10efc0e8d0b3cc91ff16201853d3ba) +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Danilo C. L. de Paula +--- + block/create.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/create.c b/block/create.c +index 9534121..de5e97b 100644 +--- a/block/create.c ++++ b/block/create.c +@@ -63,9 +63,13 @@ void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options, + const char *fmt = BlockdevDriver_str(options->driver); + BlockDriver *drv = bdrv_find_format(fmt); + ++ if (!drv) { ++ error_setg(errp, "Block driver '%s' not found or not supported", fmt); ++ return; ++ } ++ + /* If the driver is in the schema, we know that it exists. But it may not + * be whitelisted. */ +- assert(drv); + if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) { + error_setg(errp, "Driver is not whitelisted"); + return; +-- +1.8.3.1 + diff --git a/kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch b/kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch new file mode 100644 index 0000000..493bf77 --- /dev/null +++ b/kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch @@ -0,0 +1,63 @@ +From 6a7245ed7802dff5479228376a4119e095db33b2 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 11 Sep 2019 09:43:17 +0100 +Subject: [PATCH 1/4] spapr/xive: Mask the EAS when allocating an IRQ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +Message-id: <20190911094317.21266-1-lvivier@redhat.com> +Patchwork-id: 90392 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] spapr/xive: Mask the EAS when allocating an IRQ +Bugzilla: 1748725 +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Danilo de Paula + +From: Cédric Le Goater + +If an IRQ is allocated and not configured, such as a MSI requested by +a PCI driver, it can be saved in its default state and possibly later +on restored using the same state. If not initially MASKED, KVM will +try to find a matching priority/target tuple for the interrupt and +fail to restore the VM because 0/0 is not a valid target. + +When allocating a IRQ number, the EAS should be set to a sane default : +VALID and MASKED. + +Reported-by: Satheesh Rajendran +Signed-off-by: Cédric Le Goater +Message-Id: <20190813164420.9829-1-clg@kaod.org> +Signed-off-by: David Gibson +(cherry picked from commit f55750e4e4fb35b6a12c81c485f16494e2c61ad2) +Signed-off-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1748725 +BRANCH: rhel-av-8.1.0/master-4.1.0 +UPSTREAM: merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23451934 +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/spapr_xive.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c +index 3ae311d..1f9c624 100644 +--- a/hw/intc/spapr_xive.c ++++ b/hw/intc/spapr_xive.c +@@ -534,7 +534,10 @@ bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi) + return false; + } + +- xive->eat[lisn].w |= cpu_to_be64(EAS_VALID); ++ /* ++ * Set default values when allocating an IRQ number ++ */ ++ xive->eat[lisn].w |= cpu_to_be64(EAS_VALID | EAS_MASKED); + if (lsi) { + xive_source_irq_set_lsi(xsrc, lisn); + } +-- +1.8.3.1 + diff --git a/kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch b/kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch new file mode 100644 index 0000000..3d61e36 --- /dev/null +++ b/kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch @@ -0,0 +1,92 @@ +From df7d91dda24b27c89ff8ce1b9cc72c7ed7350be2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 13 Sep 2019 14:16:25 +0100 +Subject: [PATCH 3/4] virtio-blk: Cancel the pending BH when the dataplane is + reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20190913141625.12521-2-philmd@redhat.com> +Patchwork-id: 90453 +O-Subject: [RHEL-7.7.z qemu-kvm-rhev + RHEL-8.1.0 qemu-kvm + RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/1] virtio-blk: Cancel the pending BH when the dataplane is reset +Bugzilla: 1717321 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Danilo de Paula + +When 'system_reset' is called, the main loop clear the memory +region cache before the BH has a chance to execute. Later when +the deferred function is called, some assumptions that were +made when scheduling them are no longer true when they actually +execute. + +This is what happens using a virtio-blk device (fresh RHEL7.8 install): + + $ (sleep 12.3; echo system_reset; sleep 12.3; echo system_reset; sleep 1; echo q) \ + | qemu-system-x86_64 -m 4G -smp 8 -boot menu=on \ + -device virtio-blk-pci,id=image1,drive=drive_image1 \ + -drive file=/var/lib/libvirt/images/rhel78.qcow2,if=none,id=drive_image1,format=qcow2,cache=none \ + -device virtio-net-pci,netdev=net0,id=nic0,mac=52:54:00:c4:e7:84 \ + -netdev tap,id=net0,script=/bin/true,downscript=/bin/true,vhost=on \ + -monitor stdio -serial null -nographic + (qemu) system_reset + (qemu) system_reset + (qemu) qemu-system-x86_64: hw/virtio/virtio.c:225: vring_get_region_caches: Assertion `caches != NULL' failed. + Aborted + + (gdb) bt + Thread 1 (Thread 0x7f109c17b680 (LWP 10939)): + #0 0x00005604083296d1 in vring_get_region_caches (vq=0x56040a24bdd0) at hw/virtio/virtio.c:227 + #1 0x000056040832972b in vring_avail_flags (vq=0x56040a24bdd0) at hw/virtio/virtio.c:235 + #2 0x000056040832d13d in virtio_should_notify (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1648 + #3 0x000056040832d1f8 in virtio_notify_irqfd (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1662 + #4 0x00005604082d213d in notify_guest_bh (opaque=0x56040a243ec0) at hw/block/dataplane/virtio-blk.c:75 + #5 0x000056040883dc35 in aio_bh_call (bh=0x56040a243f10) at util/async.c:90 + #6 0x000056040883dccd in aio_bh_poll (ctx=0x560409161980) at util/async.c:118 + #7 0x0000560408842af7 in aio_dispatch (ctx=0x560409161980) at util/aio-posix.c:460 + #8 0x000056040883e068 in aio_ctx_dispatch (source=0x560409161980, callback=0x0, user_data=0x0) at util/async.c:261 + #9 0x00007f10a8fca06d in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 + #10 0x0000560408841445 in glib_pollfds_poll () at util/main-loop.c:215 + #11 0x00005604088414bf in os_host_main_loop_wait (timeout=0) at util/main-loop.c:238 + #12 0x00005604088415c4 in main_loop_wait (nonblocking=0) at util/main-loop.c:514 + #13 0x0000560408416b1e in main_loop () at vl.c:1923 + #14 0x000056040841e0e8 in main (argc=20, argv=0x7ffc2c3f9c58, envp=0x7ffc2c3f9d00) at vl.c:4578 + +Fix this by cancelling the BH when the virtio dataplane is stopped. + +[This is version of the patch was modified as discussed with Philippe on +the mailing list thread. +--Stefan] + +Reported-by: Yihuang Yu +Suggested-by: Stefan Hajnoczi +Fixes: https://bugs.launchpad.net/qemu/+bug/1839428 +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20190816171503.24761-1-philmd@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit ebb6ff25cd888a52a64a9adc3692541c6d1d9a42) +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/dataplane/virtio-blk.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 158c78f..5fea76d 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -297,6 +297,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + ++ qemu_bh_cancel(s->bh); ++ notify_guest_bh(s); /* final chance to notify guest */ ++ + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, nvqs, false); + +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index d5882f2..6afdda6 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 9%{?dist} +Release: 10%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -190,6 +190,14 @@ Patch53: kvm-migration-update-ram_counters-for-multifd-sync-packe.patch Patch54: kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch # For bz#1750200 - [RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free) Patch55: kvm-spapr-pci-Free-MSIs-during-reset.patch +# For bz#1748725 - [ppc][migration][v6.3-rc1-p1ce8930]basic migration failed with "qemu-kvm: KVM_SET_DEVICE_ATTR failed: Group 3 attr 0x0000000000001309: Device or resource busy" +Patch56: kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch +# For bz#1746267 - qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed +Patch57: kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch +# For bz#1717321 - qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot +Patch58: kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch +# For bz#1749737 - CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8] +Patch59: kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch BuildRequires: wget BuildRequires: rpm-build @@ -1131,6 +1139,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Sep 16 2019 Danilo Cesar Lemes de Paula - 4.1.0-10.el8 +- kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch [bz#1748725] +- kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch [bz#1746267] +- kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch [bz#1717321] +- kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749737] +- Resolves: bz#1717321 + (qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot) +- Resolves: bz#1746267 + (qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed) +- Resolves: bz#1748725 + ([ppc][migration][v6.3-rc1-p1ce8930]basic migration failed with "qemu-kvm: KVM_SET_DEVICE_ATTR failed: Group 3 attr 0x0000000000001309: Device or resource busy") +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) + * Tue Sep 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-9.el8 - kvm-migration-always-initialise-ram_counters-for-a-new-m.patch [bz#1734316] - kvm-migration-add-qemu_file_update_transfer-interface.patch [bz#1734316] From 58b71dc108e0610a8d3eaee8ccbedc6c810d7576 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 23 Sep 2019 21:47:12 +0100 Subject: [PATCH 051/195] * Mon Sep 23 2019 Danilo Cesar Lemes de Paula - 4.1.0-11.el8 - kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch [bz#1746631] - kvm-hostmem-file-fix-pmem-file-size-check.patch [bz#1724008 bz#1736788] - kvm-memory-fetch-pmem-size-in-get_file_size.patch [bz#1724008 bz#1736788] - kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch [bz#1753992] - Resolves: bz#1724008 (QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed") - Resolves: bz#1736788 (QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off) - Resolves: bz#1746631 (Qemu core dump when do block commit under stress) - Resolves: bz#1753992 (core dump when testing persistent reservation in guest) --- ...e-nodes-head-while-removing-all-bdrv.patch | 72 +++++ ...ostmem-file-fix-pmem-file-size-check.patch | 70 +++++ ...ory-fetch-pmem-size-in-get_file_size.patch | 254 ++++++++++++++++++ ...manager-Fix-invalid-g_free-crash-bug.patch | 56 ++++ qemu-kvm.spec | 26 +- 5 files changed, 477 insertions(+), 1 deletion(-) create mode 100644 kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch create mode 100644 kvm-hostmem-file-fix-pmem-file-size-check.patch create mode 100644 kvm-memory-fetch-pmem-size-in-get_file_size.patch create mode 100644 kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch diff --git a/kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch b/kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch new file mode 100644 index 0000000..5e0a568 --- /dev/null +++ b/kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch @@ -0,0 +1,72 @@ +From 49dd008d58d7527a98981d96106949b2913fb4d9 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Wed, 18 Sep 2019 11:34:14 +0100 +Subject: [PATCH 1/4] blockjob: update nodes head while removing all bdrv + +RH-Author: Sergio Lopez Pascual +Message-id: <20190918113414.24522-2-slp@redhat.com> +Patchwork-id: 90748 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] blockjob: update nodes head while removing all bdrv +Bugzilla: 1746631 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf + +block_job_remove_all_bdrv() iterates through job->nodes, calling +bdrv_root_unref_child() for each entry. The call to the latter may +reach child_job_[can_]set_aio_ctx(), which will also attempt to +traverse job->nodes, potentially finding entries that where freed +on previous iterations. + +To avoid this situation, update job->nodes head on each iteration to +ensure that already freed entries are no longer linked to the list. + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1746631 +Signed-off-by: Sergio Lopez +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20190911100316.32282-1-mreitz@redhat.com +Reviewed-by: Sergio Lopez +Signed-off-by: Max Reitz +(cherry picked from commit d876bf676f5e7c6aa9ac64555e48cba8734ecb2f) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockjob.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/blockjob.c b/blockjob.c +index 20b7f55..74abb97 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -186,14 +186,23 @@ static const BdrvChildRole child_job = { + + void block_job_remove_all_bdrv(BlockJob *job) + { +- GSList *l; +- for (l = job->nodes; l; l = l->next) { ++ /* ++ * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(), ++ * which will also traverse job->nodes, so consume the list one by ++ * one to make sure that such a concurrent access does not attempt ++ * to process an already freed BdrvChild. ++ */ ++ while (job->nodes) { ++ GSList *l = job->nodes; + BdrvChild *c = l->data; ++ ++ job->nodes = l->next; ++ + bdrv_op_unblock_all(c->bs, job->blocker); + bdrv_root_unref_child(c); ++ ++ g_slist_free_1(l); + } +- g_slist_free(job->nodes); +- job->nodes = NULL; + } + + bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) +-- +1.8.3.1 + diff --git a/kvm-hostmem-file-fix-pmem-file-size-check.patch b/kvm-hostmem-file-fix-pmem-file-size-check.patch new file mode 100644 index 0000000..2fc5e43 --- /dev/null +++ b/kvm-hostmem-file-fix-pmem-file-size-check.patch @@ -0,0 +1,70 @@ +From c7c95a2f14d94eb8213ce7cab03acdef40fed093 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 18 Sep 2019 15:10:06 +0100 +Subject: [PATCH 2/4] hostmem-file: fix pmem file size check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +Message-id: <20190918151007.27973-2-stefanha@redhat.com> +Patchwork-id: 90763 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] hostmem-file: fix pmem file size check +Bugzilla: 1724008 1736788 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +Commit 314aec4a6e06844937f1677f6cba21981005f389 ("hostmem-file: reject +invalid pmem file sizes") added a file size check that verifies the +hostmem object's size parameter against the actual devdax pmem file. +This is useful because getting the size wrong results in confusing +errors inside the guest. + +However, the code doesn't work properly for files where struct +stat::st_size is zero. Hostmem-file's ->alloc() function returns early +without setting an Error, causing the following assertion failure: + + qemu/memory.c:2215: memory_region_get_ram_ptr: Assertion `mr->ram_block' failed. + +This patch handles the case where qemu_get_pmem_size() returns 0 but +there is no error. + +Fixes: 314aec4a6e06844937f1677f6cba21981005f389 +Signed-off-by: Stefan Hajnoczi +Message-Id: <20190823135632.25010-1-stefanha@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 7faae95ebc966c2981b78cf7c25009dfa32d4b72) + +Note that this commit will be effectively reverted by the next one but I +backported it to avoid conflicts. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + backends/hostmem-file.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c +index 29e55c9..ecc15e3 100644 +--- a/backends/hostmem-file.c ++++ b/backends/hostmem-file.c +@@ -67,12 +67,12 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + uint64_t size; + + size = qemu_get_pmem_size(fb->mem_path, &local_err); +- if (!size) { ++ if (local_err) { + error_propagate(errp, local_err); + return; + } + +- if (backend->size > size) { ++ if (size && backend->size > size) { + error_setg(errp, "size property %" PRIu64 " is larger than " + "pmem file \"%s\" size %" PRIu64, backend->size, + fb->mem_path, size); +-- +1.8.3.1 + diff --git a/kvm-memory-fetch-pmem-size-in-get_file_size.patch b/kvm-memory-fetch-pmem-size-in-get_file_size.patch new file mode 100644 index 0000000..80a2e15 --- /dev/null +++ b/kvm-memory-fetch-pmem-size-in-get_file_size.patch @@ -0,0 +1,254 @@ +From 9d6d365abaea5e068f060b8a70d5b8fab43a9f7f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 18 Sep 2019 15:10:07 +0100 +Subject: [PATCH 3/4] memory: fetch pmem size in get_file_size() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +Message-id: <20190918151007.27973-3-stefanha@redhat.com> +Patchwork-id: 90762 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] memory: fetch pmem size in get_file_size() +Bugzilla: 1724008 1736788 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eduardo Habkost + +Neither stat(2) nor lseek(2) report the size of Linux devdax pmem +character device nodes. Commit 314aec4a6e06844937f1677f6cba21981005f389 +("hostmem-file: reject invalid pmem file sizes") added code to +hostmem-file.c to fetch the size from sysfs and compare against the +user-provided size=NUM parameter: + + if (backend->size > size) { + error_setg(errp, "size property %" PRIu64 " is larger than " + "pmem file \"%s\" size %" PRIu64, backend->size, + fb->mem_path, size); + return; + } + +It turns out that exec.c:qemu_ram_alloc_from_fd() already has an +equivalent size check but it skips devdax pmem character devices because +lseek(2) returns 0: + + if (file_size > 0 && file_size < size) { + error_setg(errp, "backing store %s size 0x%" PRIx64 + " does not match 'size' option 0x" RAM_ADDR_FMT, + mem_path, file_size, size); + return NULL; + } + +This patch moves the devdax pmem file size code into get_file_size() so +that we check the memory size in a single place: +qemu_ram_alloc_from_fd(). This simplifies the code and makes it more +general. + +This also fixes the problem that hostmem-file only checks the devdax +pmem file size when the pmem=on parameter is given. An unchecked +size=NUM parameter can lead to SIGBUS in QEMU so we must always fetch +the file size for Linux devdax pmem character device nodes. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20190830093056.12572-1-stefanha@redhat.com> +Reviewed-by: Eduardo Habkost +Signed-off-by: Paolo Bonzini +(cherry picked from commit 72d41eb4b8f923de91e8f06dc20aa86b0a9155fb) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + backends/hostmem-file.c | 22 -------------------- + exec.c | 34 ++++++++++++++++++++++++++++++- + include/qemu/osdep.h | 13 ------------ + util/oslib-posix.c | 54 ------------------------------------------------- + util/oslib-win32.c | 6 ------ + 5 files changed, 33 insertions(+), 96 deletions(-) + +diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c +index ecc15e3..be64020 100644 +--- a/backends/hostmem-file.c ++++ b/backends/hostmem-file.c +@@ -58,28 +58,6 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + return; + } + +- /* +- * Verify pmem file size since starting a guest with an incorrect size +- * leads to confusing failures inside the guest. +- */ +- if (fb->is_pmem) { +- Error *local_err = NULL; +- uint64_t size; +- +- size = qemu_get_pmem_size(fb->mem_path, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- return; +- } +- +- if (size && backend->size > size) { +- error_setg(errp, "size property %" PRIu64 " is larger than " +- "pmem file \"%s\" size %" PRIu64, backend->size, +- fb->mem_path, size); +- return; +- } +- } +- + backend->force_prealloc = mem_prealloc; + name = host_memory_backend_get_name(backend); + memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), +diff --git a/exec.c b/exec.c +index 3e78de3..3c03edf 100644 +--- a/exec.c ++++ b/exec.c +@@ -1813,7 +1813,39 @@ long qemu_maxrampagesize(void) + #ifdef CONFIG_POSIX + static int64_t get_file_size(int fd) + { +- int64_t size = lseek(fd, 0, SEEK_END); ++ int64_t size; ++#if defined(__linux__) ++ struct stat st; ++ ++ if (fstat(fd, &st) < 0) { ++ return -errno; ++ } ++ ++ /* Special handling for devdax character devices */ ++ if (S_ISCHR(st.st_mode)) { ++ g_autofree char *subsystem_path = NULL; ++ g_autofree char *subsystem = NULL; ++ ++ subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", ++ major(st.st_rdev), minor(st.st_rdev)); ++ subsystem = g_file_read_link(subsystem_path, NULL); ++ ++ if (subsystem && g_str_has_suffix(subsystem, "/dax")) { ++ g_autofree char *size_path = NULL; ++ g_autofree char *size_str = NULL; ++ ++ size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", ++ major(st.st_rdev), minor(st.st_rdev)); ++ ++ if (g_file_get_contents(size_path, &size_str, NULL, NULL)) { ++ return g_ascii_strtoll(size_str, NULL, 0); ++ } ++ } ++ } ++#endif /* defined(__linux__) */ ++ ++ /* st.st_size may be zero for special files yet lseek(2) works */ ++ size = lseek(fd, 0, SEEK_END); + if (size < 0) { + return -errno; + } +diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h +index af2b91f..c7d242f 100644 +--- a/include/qemu/osdep.h ++++ b/include/qemu/osdep.h +@@ -571,19 +571,6 @@ void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, + Error **errp); + + /** +- * qemu_get_pmem_size: +- * @filename: path to a pmem file +- * @errp: pointer to a NULL-initialized error object +- * +- * Determine the size of a persistent memory file. Besides supporting files on +- * DAX file systems, this function also supports Linux devdax character +- * devices. +- * +- * Returns: the size or 0 on failure +- */ +-uint64_t qemu_get_pmem_size(const char *filename, Error **errp); +- +-/** + * qemu_get_pid_name: + * @pid: pid of a process + * +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index fe0309c..d772da8 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -513,60 +513,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, + } + } + +-uint64_t qemu_get_pmem_size(const char *filename, Error **errp) +-{ +- struct stat st; +- +- if (stat(filename, &st) < 0) { +- error_setg(errp, "unable to stat pmem file \"%s\"", filename); +- return 0; +- } +- +-#if defined(__linux__) +- /* Special handling for devdax character devices */ +- if (S_ISCHR(st.st_mode)) { +- char *subsystem_path = NULL; +- char *subsystem = NULL; +- char *size_path = NULL; +- char *size_str = NULL; +- uint64_t ret = 0; +- +- subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", +- major(st.st_rdev), minor(st.st_rdev)); +- subsystem = g_file_read_link(subsystem_path, NULL); +- if (!subsystem) { +- error_setg(errp, "unable to read subsystem for pmem file \"%s\"", +- filename); +- goto devdax_err; +- } +- +- if (!g_str_has_suffix(subsystem, "/dax")) { +- error_setg(errp, "pmem file \"%s\" is not a dax device", filename); +- goto devdax_err; +- } +- +- size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", +- major(st.st_rdev), minor(st.st_rdev)); +- if (!g_file_get_contents(size_path, &size_str, NULL, NULL)) { +- error_setg(errp, "unable to read size for pmem file \"%s\"", +- size_path); +- goto devdax_err; +- } +- +- ret = g_ascii_strtoull(size_str, NULL, 0); +- +-devdax_err: +- g_free(size_str); +- g_free(size_path); +- g_free(subsystem); +- g_free(subsystem_path); +- return ret; +- } +-#endif /* defined(__linux__) */ +- +- return st.st_size; +-} +- + char *qemu_get_pid_name(pid_t pid) + { + char *name = NULL; +diff --git a/util/oslib-win32.c b/util/oslib-win32.c +index 9583fb4..c62cd43 100644 +--- a/util/oslib-win32.c ++++ b/util/oslib-win32.c +@@ -562,12 +562,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, + } + } + +-uint64_t qemu_get_pmem_size(const char *filename, Error **errp) +-{ +- error_setg(errp, "pmem support not available"); +- return 0; +-} +- + char *qemu_get_pid_name(pid_t pid) + { + /* XXX Implement me */ +-- +1.8.3.1 + diff --git a/kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch b/kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch new file mode 100644 index 0000000..42c4cd9 --- /dev/null +++ b/kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch @@ -0,0 +1,56 @@ +From 84728251439a3b73a57a8d72cc4d39307207cc01 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 20 Sep 2019 16:48:41 +0100 +Subject: [PATCH 4/4] pr-manager: Fix invalid g_free() crash bug +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20190920164841.10424-1-pbonzini@redhat.com> +Patchwork-id: 90824 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2] pr-manager: Fix invalid g_free() crash bug +Bugzilla: 1753992 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Kevin Wolf +RH-Acked-by: Markus Armbruster + +From: Markus Armbruster + +BZ: 1753992 +BRANCH: rhel-av-8.1.0/master-4.1.0 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23612762 +Upstream: 6b9d62c2a9e83bbad73fb61406f0ff69b46ff6f3 + +pr_manager_worker() passes its @opaque argument to g_free(). Wrong; +it points to pr_manager_worker()'s automatic @data. Broken when +commit 2f3a7ab39be converted @data from heap- to stack-allocated. Fix +by deleting the g_free(). + +Fixes: 2f3a7ab39bec4ba8022dc4d42ea641165b004e3e +Cc: qemu-stable@nongnu.org +Signed-off-by: Markus Armbruster +Reviewed-by: Philippe Mathieu-Daudé +Acked-by: Paolo Bonzini +Signed-off-by: Kevin Wolf +(cherry picked from commit 6b9d62c2a9e83bbad73fb61406f0ff69b46ff6f3) +Signed-off-by: Danilo C. L. de Paula +--- + scsi/pr-manager.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c +index ee43663..0c866e8 100644 +--- a/scsi/pr-manager.c ++++ b/scsi/pr-manager.c +@@ -39,7 +39,6 @@ static int pr_manager_worker(void *opaque) + int fd = data->fd; + int r; + +- g_free(data); + trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]); + + /* The reference was taken in pr_manager_execute. */ +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 6afdda6..697bb0c 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 10%{?dist} +Release: 11%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -198,6 +198,16 @@ Patch57: kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch Patch58: kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch # For bz#1749737 - CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8] Patch59: kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +# For bz#1746631 - Qemu core dump when do block commit under stress +Patch60: kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch +# For bz#1724008 - QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed" +# For bz#1736788 - QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off +Patch61: kvm-hostmem-file-fix-pmem-file-size-check.patch +# For bz#1724008 - QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed" +# For bz#1736788 - QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off +Patch62: kvm-memory-fetch-pmem-size-in-get_file_size.patch +# For bz#1753992 - core dump when testing persistent reservation in guest +Patch63: kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch BuildRequires: wget BuildRequires: rpm-build @@ -1139,6 +1149,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Sep 23 2019 Danilo Cesar Lemes de Paula - 4.1.0-11.el8 +- kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch [bz#1746631] +- kvm-hostmem-file-fix-pmem-file-size-check.patch [bz#1724008 bz#1736788] +- kvm-memory-fetch-pmem-size-in-get_file_size.patch [bz#1724008 bz#1736788] +- kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch [bz#1753992] +- Resolves: bz#1724008 + (QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed") +- Resolves: bz#1736788 + (QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off) +- Resolves: bz#1746631 + (Qemu core dump when do block commit under stress) +- Resolves: bz#1753992 + (core dump when testing persistent reservation in guest) + * Mon Sep 16 2019 Danilo Cesar Lemes de Paula - 4.1.0-10.el8 - kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch [bz#1748725] - kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch [bz#1746267] From 4172d6971a212b3597f1e112334d22785ea7f864 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 26 Sep 2019 00:35:11 +0100 Subject: [PATCH 052/195] * Thu Sep 26 2019 Danilo Cesar Lemes de Paula - 4.1.0-12.el8 - kvm-block-Use-QEMU_IS_ALIGNED.patch [bz#1745922] - kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch [bz#1745922] - kvm-block-qcow2-refactor-encryption-code.patch [bz#1745922] - kvm-qemu-iotests-Add-test-for-bz-1745922.patch [bz#1745922] - Resolves: bz#1745922 (Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase') --- kvm-block-Use-QEMU_IS_ALIGNED.patch | 192 ++++++++++++++ ...corruption-introduced-by-commit-8ac0.patch | 78 ++++++ ...block-qcow2-refactor-encryption-code.patch | 234 ++++++++++++++++++ ...qemu-iotests-Add-test-for-bz-1745922.patch | 191 ++++++++++++++ qemu-kvm.spec | 18 +- 5 files changed, 712 insertions(+), 1 deletion(-) create mode 100644 kvm-block-Use-QEMU_IS_ALIGNED.patch create mode 100644 kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch create mode 100644 kvm-block-qcow2-refactor-encryption-code.patch create mode 100644 kvm-qemu-iotests-Add-test-for-bz-1745922.patch diff --git a/kvm-block-Use-QEMU_IS_ALIGNED.patch b/kvm-block-Use-QEMU_IS_ALIGNED.patch new file mode 100644 index 0000000..7d4e065 --- /dev/null +++ b/kvm-block-Use-QEMU_IS_ALIGNED.patch @@ -0,0 +1,192 @@ +From 1eb1c45037b1e1084ab601ac8461fabca162b479 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 24 Sep 2019 21:11:49 +0100 +Subject: [PATCH 1/4] block: Use QEMU_IS_ALIGNED + +RH-Author: Maxim Levitsky +Message-id: <20190924211152.13461-2-mlevitsk@redhat.com> +Patchwork-id: 90874 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/4] block: Use QEMU_IS_ALIGNED +Bugzilla: 1745922 +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz +RH-Acked-by: Danilo de Paula + +From: Nir Soffer + +Replace instances of: + + (n & (BDRV_SECTOR_SIZE - 1)) == 0 + +And: + + (n & ~BDRV_SECTOR_MASK) == 0 + +With: + + QEMU_IS_ALIGNED(n, BDRV_SECTOR_SIZE) + +Which reveals the intent of the code better, and makes it easier to +locate the code checking alignment. + +Signed-off-by: Nir Soffer +Message-id: 20190827185913.27427-2-nsoffer@redhat.com +Reviewed-by: John Snow +Signed-off-by: Max Reitz +(cherry picked from commit 1bbbf32d5fffe334531c315d7bd865fdfb67b6c5) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/bochs.c | 4 ++-- + block/cloop.c | 4 ++-- + block/dmg.c | 4 ++-- + block/io.c | 8 ++++---- + block/qcow2-cluster.c | 4 ++-- + block/qcow2.c | 4 ++-- + block/vvfat.c | 8 ++++---- + qemu-img.c | 2 +- + 8 files changed, 19 insertions(+), 19 deletions(-) + +diff --git a/block/bochs.c b/block/bochs.c +index 962f185..32bb83b 100644 +--- a/block/bochs.c ++++ b/block/bochs.c +@@ -248,8 +248,8 @@ bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector local_qiov; + int ret; + +- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); +- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); ++ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_co_mutex_lock(&s->lock); +diff --git a/block/cloop.c b/block/cloop.c +index 384c973..4de9487 100644 +--- a/block/cloop.c ++++ b/block/cloop.c +@@ -253,8 +253,8 @@ cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + int nb_sectors = bytes >> BDRV_SECTOR_BITS; + int ret, i; + +- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); +- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); ++ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + qemu_co_mutex_lock(&s->lock); + +diff --git a/block/dmg.c b/block/dmg.c +index 45f6b28..4a045f2 100644 +--- a/block/dmg.c ++++ b/block/dmg.c +@@ -697,8 +697,8 @@ dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + int nb_sectors = bytes >> BDRV_SECTOR_BITS; + int ret, i; + +- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); +- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); ++ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + qemu_co_mutex_lock(&s->lock); + +diff --git a/block/io.c b/block/io.c +index 06305c6..54093fc 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1079,8 +1079,8 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, + sector_num = offset >> BDRV_SECTOR_BITS; + nb_sectors = bytes >> BDRV_SECTOR_BITS; + +- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); +- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); ++ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + assert(bytes <= BDRV_REQUEST_MAX_BYTES); + assert(drv->bdrv_co_readv); + +@@ -1132,8 +1132,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, + sector_num = offset >> BDRV_SECTOR_BITS; + nb_sectors = bytes >> BDRV_SECTOR_BITS; + +- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); +- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); ++ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + assert(bytes <= BDRV_REQUEST_MAX_BYTES); + + assert(drv->bdrv_co_writev); +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index cc5609e..f2de746 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -470,8 +470,8 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, + { + if (bytes && bs->encrypted) { + BDRVQcow2State *s = bs->opaque; +- assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); +- assert((bytes & ~BDRV_SECTOR_MASK) == 0); ++ assert(QEMU_IS_ALIGNED(offset_in_cluster, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + assert(s->crypto); + if (qcow2_co_encrypt(bs, cluster_offset, + src_cluster_offset + offset_in_cluster, +diff --git a/block/qcow2.c b/block/qcow2.c +index 039bdc2..dc4302f 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -2071,8 +2071,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, + } + if (bs->encrypted) { + assert(s->crypto); +- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); +- assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); ++ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(cur_bytes, BDRV_SECTOR_SIZE)); + if (qcow2_co_decrypt(bs, cluster_offset, offset, + cluster_data, cur_bytes) < 0) { + ret = -EIO; +diff --git a/block/vvfat.c b/block/vvfat.c +index f6c2880..019b8f1 100644 +--- a/block/vvfat.c ++++ b/block/vvfat.c +@@ -1547,8 +1547,8 @@ vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + int nb_sectors = bytes >> BDRV_SECTOR_BITS; + void *buf; + +- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); +- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); ++ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + buf = g_try_malloc(bytes); + if (bytes && buf == NULL) { +@@ -3082,8 +3082,8 @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + int nb_sectors = bytes >> BDRV_SECTOR_BITS; + void *buf; + +- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); +- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); ++ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + + buf = g_try_malloc(bytes); + if (bytes && buf == NULL) { +diff --git a/qemu-img.c b/qemu-img.c +index 7998377..940ae94 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2138,7 +2138,7 @@ static int img_convert(int argc, char **argv) + int64_t sval; + + sval = cvtnum(optarg); +- if (sval < 0 || sval & (BDRV_SECTOR_SIZE - 1) || ++ if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) || + sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) { + error_report("Invalid buffer size for sparse output specified. " + "Valid sizes are multiples of %llu up to %llu. Select " +-- +1.8.3.1 + diff --git a/kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch b/kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch new file mode 100644 index 0000000..9cdcb3f --- /dev/null +++ b/kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch @@ -0,0 +1,78 @@ +From 554884a1fe9a271ad78771c37e4be5a8f84258fc Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 24 Sep 2019 21:11:50 +0100 +Subject: [PATCH 2/4] block/qcow2: Fix corruption introduced by commit + 8ac0f15f335 + +RH-Author: Maxim Levitsky +Message-id: <20190924211152.13461-3-mlevitsk@redhat.com> +Patchwork-id: 90878 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 2/4] block/qcow2: Fix corruption introduced by commit 8ac0f15f335 +Bugzilla: 1745922 +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz +RH-Acked-by: Danilo de Paula + +This fixes subtle corruption introduced by luks threaded encryption +in commit 8ac0f15f335 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1745922 + +The corruption happens when we do a write that + * writes to two or more unallocated clusters at once + * doesn't fully cover the first sector + * doesn't fully cover the last sector + * uses luks encryption + +In this case, when allocating the new clusters we COW both areas +prior to the write and after the write, and we encrypt them. + +The above mentioned commit accidentally made it so we encrypt the +second COW area using the physical cluster offset of the first area. + +The problem is that offset_in_cluster in do_perform_cow_encrypt +can be larger that the cluster size, thus cluster_offset +will no longer point to the start of the cluster at which encrypted +area starts. + +Next patch in this series will refactor the code to avoid all these +assumptions. + +In the bugreport that was triggered by rebasing a luks image to new, +zero filled base, which lot of such writes, and causes some files +with zero areas to contain garbage there instead. +But as described above it can happen elsewhere as well + +Signed-off-by: Maxim Levitsky +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-id: 20190915203655.21638-2-mlevitsk@redhat.com +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 38e7d54bdc518b5a05a922467304bcace2396945) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2-cluster.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index f2de746..11e5a92 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -473,9 +473,10 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, + assert(QEMU_IS_ALIGNED(offset_in_cluster, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + assert(s->crypto); +- if (qcow2_co_encrypt(bs, cluster_offset, +- src_cluster_offset + offset_in_cluster, +- buffer, bytes) < 0) { ++ if (qcow2_co_encrypt(bs, ++ start_of_cluster(s, cluster_offset + offset_in_cluster), ++ src_cluster_offset + offset_in_cluster, ++ buffer, bytes) < 0) { + return false; + } + } +-- +1.8.3.1 + diff --git a/kvm-block-qcow2-refactor-encryption-code.patch b/kvm-block-qcow2-refactor-encryption-code.patch new file mode 100644 index 0000000..f32907a --- /dev/null +++ b/kvm-block-qcow2-refactor-encryption-code.patch @@ -0,0 +1,234 @@ +From 780fbdf04884188eca3d5891faa2b2417a88ef14 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 24 Sep 2019 21:11:51 +0100 +Subject: [PATCH 3/4] block/qcow2: refactor encryption code + +RH-Author: Maxim Levitsky +Message-id: <20190924211152.13461-4-mlevitsk@redhat.com> +Patchwork-id: 90876 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 3/4] block/qcow2: refactor encryption code +Bugzilla: 1745922 +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz +RH-Acked-by: Danilo de Paula + +* Change the qcow2_co_{encrypt|decrypt} to just receive full host and + guest offsets and use this function directly instead of calling + do_perform_cow_encrypt (which is removed by that patch). + +* Adjust qcow2_co_encdec to take full host and guest offsets as well. + +* Document the qcow2_co_{encrypt|decrypt} arguments + to prevent the bug fixed in former commit from hopefully + happening again. + +Signed-off-by: Maxim Levitsky +Message-id: 20190915203655.21638-3-mlevitsk@redhat.com +Reviewed-by: Vladimir Sementsov-Ogievskiy +[mreitz: Let perform_cow() return the error value returned by + qcow2_co_encrypt(), as proposed by Vladimir] +Signed-off-by: Max Reitz +(cherry picked from commit 603fbd076c76438b15ec842f0e2d1ba4867dfd00) +Signed-off-by: Maxim Levitsky + +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2-cluster.c | 41 +++++++++++---------------------- + block/qcow2-threads.c | 63 +++++++++++++++++++++++++++++++++++++++------------ + block/qcow2.c | 5 ++-- + block/qcow2.h | 8 +++---- + 4 files changed, 69 insertions(+), 48 deletions(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index 11e5a92..b30fd12 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -461,28 +461,6 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, + return 0; + } + +-static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, +- uint64_t src_cluster_offset, +- uint64_t cluster_offset, +- unsigned offset_in_cluster, +- uint8_t *buffer, +- unsigned bytes) +-{ +- if (bytes && bs->encrypted) { +- BDRVQcow2State *s = bs->opaque; +- assert(QEMU_IS_ALIGNED(offset_in_cluster, BDRV_SECTOR_SIZE)); +- assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); +- assert(s->crypto); +- if (qcow2_co_encrypt(bs, +- start_of_cluster(s, cluster_offset + offset_in_cluster), +- src_cluster_offset + offset_in_cluster, +- buffer, bytes) < 0) { +- return false; +- } +- } +- return true; +-} +- + static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, + uint64_t cluster_offset, + unsigned offset_in_cluster, +@@ -887,12 +865,19 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) + + /* Encrypt the data if necessary before writing it */ + if (bs->encrypted) { +- if (!do_perform_cow_encrypt(bs, m->offset, m->alloc_offset, +- start->offset, start_buffer, +- start->nb_bytes) || +- !do_perform_cow_encrypt(bs, m->offset, m->alloc_offset, +- end->offset, end_buffer, end->nb_bytes)) { +- ret = -EIO; ++ ret = qcow2_co_encrypt(bs, ++ m->alloc_offset + start->offset, ++ m->offset + start->offset, ++ start_buffer, start->nb_bytes); ++ if (ret < 0) { ++ goto fail; ++ } ++ ++ ret = qcow2_co_encrypt(bs, ++ m->alloc_offset + end->offset, ++ m->offset + end->offset, ++ end_buffer, end->nb_bytes); ++ if (ret < 0) { + goto fail; + } + } +diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c +index 3b1e63f..8f5a0d1 100644 +--- a/block/qcow2-threads.c ++++ b/block/qcow2-threads.c +@@ -234,35 +234,70 @@ static int qcow2_encdec_pool_func(void *opaque) + } + + static int coroutine_fn +-qcow2_co_encdec(BlockDriverState *bs, uint64_t file_cluster_offset, +- uint64_t offset, void *buf, size_t len, Qcow2EncDecFunc func) ++qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset, ++ uint64_t guest_offset, void *buf, size_t len, ++ Qcow2EncDecFunc func) + { + BDRVQcow2State *s = bs->opaque; + Qcow2EncDecData arg = { + .block = s->crypto, +- .offset = s->crypt_physical_offset ? +- file_cluster_offset + offset_into_cluster(s, offset) : +- offset, ++ .offset = s->crypt_physical_offset ? host_offset : guest_offset, + .buf = buf, + .len = len, + .func = func, + }; + +- return qcow2_co_process(bs, qcow2_encdec_pool_func, &arg); ++ assert(QEMU_IS_ALIGNED(guest_offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(host_offset, BDRV_SECTOR_SIZE)); ++ assert(QEMU_IS_ALIGNED(len, BDRV_SECTOR_SIZE)); ++ assert(s->crypto); ++ ++ return len == 0 ? 0 : qcow2_co_process(bs, qcow2_encdec_pool_func, &arg); + } + ++/* ++ * qcow2_co_encrypt() ++ * ++ * Encrypts one or more contiguous aligned sectors ++ * ++ * @host_offset - underlying storage offset of the first sector of the ++ * data to be encrypted ++ * ++ * @guest_offset - guest (virtual) offset of the first sector of the ++ * data to be encrypted ++ * ++ * @buf - buffer with the data to encrypt, that after encryption ++ * will be written to the underlying storage device at ++ * @host_offset ++ * ++ * @len - length of the buffer (must be a BDRV_SECTOR_SIZE multiple) ++ * ++ * Depending on the encryption method, @host_offset and/or @guest_offset ++ * may be used for generating the initialization vector for ++ * encryption. ++ * ++ * Note that while the whole range must be aligned on sectors, it ++ * does not have to be aligned on clusters and can also cross cluster ++ * boundaries ++ */ + int coroutine_fn +-qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset, +- uint64_t offset, void *buf, size_t len) ++qcow2_co_encrypt(BlockDriverState *bs, uint64_t host_offset, ++ uint64_t guest_offset, void *buf, size_t len) + { +- return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len, +- qcrypto_block_encrypt); ++ return qcow2_co_encdec(bs, host_offset, guest_offset, buf, len, ++ qcrypto_block_encrypt); + } + ++/* ++ * qcow2_co_decrypt() ++ * ++ * Decrypts one or more contiguous aligned sectors ++ * Similar to qcow2_co_encrypt ++ */ + int coroutine_fn +-qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset, +- uint64_t offset, void *buf, size_t len) ++qcow2_co_decrypt(BlockDriverState *bs, uint64_t host_offset, ++ uint64_t guest_offset, void *buf, size_t len) + { +- return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len, +- qcrypto_block_decrypt); ++ return qcow2_co_encdec(bs, host_offset, guest_offset, buf, len, ++ qcrypto_block_decrypt); + } +diff --git a/block/qcow2.c b/block/qcow2.c +index dc4302f..d4c4f24 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -2073,7 +2073,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, + assert(s->crypto); + assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(cur_bytes, BDRV_SECTOR_SIZE)); +- if (qcow2_co_decrypt(bs, cluster_offset, offset, ++ if (qcow2_co_decrypt(bs, cluster_offset + offset_in_cluster, ++ offset, + cluster_data, cur_bytes) < 0) { + ret = -EIO; + goto fail; +@@ -2288,7 +2289,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, + QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); + +- if (qcow2_co_encrypt(bs, cluster_offset, offset, ++ if (qcow2_co_encrypt(bs, cluster_offset + offset_in_cluster, offset, + cluster_data, cur_bytes) < 0) { + ret = -EIO; + goto out_unlocked; +diff --git a/block/qcow2.h b/block/qcow2.h +index fc1b0d3..b54e734 100644 +--- a/block/qcow2.h ++++ b/block/qcow2.h +@@ -757,10 +757,10 @@ ssize_t coroutine_fn + qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, + const void *src, size_t src_size); + int coroutine_fn +-qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset, +- uint64_t offset, void *buf, size_t len); ++qcow2_co_encrypt(BlockDriverState *bs, uint64_t host_offset, ++ uint64_t guest_offset, void *buf, size_t len); + int coroutine_fn +-qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset, +- uint64_t offset, void *buf, size_t len); ++qcow2_co_decrypt(BlockDriverState *bs, uint64_t host_offset, ++ uint64_t guest_offset, void *buf, size_t len); + + #endif +-- +1.8.3.1 + diff --git a/kvm-qemu-iotests-Add-test-for-bz-1745922.patch b/kvm-qemu-iotests-Add-test-for-bz-1745922.patch new file mode 100644 index 0000000..00751f5 --- /dev/null +++ b/kvm-qemu-iotests-Add-test-for-bz-1745922.patch @@ -0,0 +1,191 @@ +From a888b935e29a08f0ace84906fee84b41a5f8b95d Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Tue, 24 Sep 2019 21:11:52 +0100 +Subject: [PATCH 4/4] qemu-iotests: Add test for bz #1745922 + +RH-Author: Maxim Levitsky +Message-id: <20190924211152.13461-5-mlevitsk@redhat.com> +Patchwork-id: 90877 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 4/4] qemu-iotests: Add test for bz #1745922 +Bugzilla: 1745922 +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz +RH-Acked-by: Danilo de Paula + +Signed-off-by: Maxim Levitsky +Tested-by: Vladimir Sementsov-Ogievskiy +Message-id: 20190915203655.21638-4-mlevitsk@redhat.com +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 1825cc0783ccf0ec5d9f0b225a99b340bdd4c68f) +Signed-off-by: Maxim Levitsky + + Conflicts: + tests/qemu-iotests/group + usual conflicts with missing tests + +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/263 | 91 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/263.out | 40 ++++++++++++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 132 insertions(+) + create mode 100755 tests/qemu-iotests/263 + create mode 100644 tests/qemu-iotests/263.out + +diff --git a/tests/qemu-iotests/263 b/tests/qemu-iotests/263 +new file mode 100755 +index 0000000..d2c030f +--- /dev/null ++++ b/tests/qemu-iotests/263 +@@ -0,0 +1,91 @@ ++#!/usr/bin/env bash ++# ++# Test encrypted write that crosses cluster boundary of two unallocated clusters ++# Based on 188 ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=mlevitsk@redhat.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt qcow2 ++_supported_proto generic ++_supported_os Linux ++ ++ ++size=1M ++ ++SECRET="secret,id=sec0,data=astrochicken" ++QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT ++ ++ ++_run_test() ++{ ++ echo "== reading the whole image ==" ++ $QEMU_IO --object $SECRET -c "read -P 0 0 $size" --image-opts "$1" | _filter_qemu_io | _filter_testdir ++ ++ echo ++ echo "== write two 512 byte sectors on a cluster boundary ==" ++ $QEMU_IO --object $SECRET -c "write -P 0xAA 0xFE00 0x400" --image-opts "$1" | _filter_qemu_io | _filter_testdir ++ ++ echo ++ echo "== verify that the rest of the image is not changed ==" ++ $QEMU_IO --object $SECRET -c "read -P 0x00 0x00000 0xFE00" --image-opts "$1" | _filter_qemu_io | _filter_testdir ++ $QEMU_IO --object $SECRET -c "read -P 0xAA 0x0FE00 0x400" --image-opts "$1" | _filter_qemu_io | _filter_testdir ++ $QEMU_IO --object $SECRET -c "read -P 0x00 0x10200 0xEFE00" --image-opts "$1" | _filter_qemu_io | _filter_testdir ++ ++} ++ ++ ++echo ++echo "testing LUKS qcow2 encryption" ++echo ++ ++_make_test_img --object $SECRET -o "encrypt.format=luks,encrypt.key-secret=sec0,encrypt.iter-time=10,cluster_size=64K" $size ++_run_test "driver=$IMGFMT,encrypt.key-secret=sec0,file.filename=$TEST_IMG" ++_cleanup_test_img ++ ++echo ++echo "testing legacy AES qcow2 encryption" ++echo ++ ++ ++_make_test_img --object $SECRET -o "encrypt.format=aes,encrypt.key-secret=sec0,cluster_size=64K" $size ++_run_test "driver=$IMGFMT,encrypt.key-secret=sec0,file.filename=$TEST_IMG" ++_cleanup_test_img ++ ++ ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/263.out b/tests/qemu-iotests/263.out +new file mode 100644 +index 0000000..0c982c5 +--- /dev/null ++++ b/tests/qemu-iotests/263.out +@@ -0,0 +1,40 @@ ++QA output created by 263 ++ ++testing LUKS qcow2 encryption ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10 ++== reading the whole image == ++read 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== write two 512 byte sectors on a cluster boundary == ++wrote 1024/1024 bytes at offset 65024 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== verify that the rest of the image is not changed == ++read 65024/65024 bytes at offset 0 ++63.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 1024/1024 bytes at offset 65024 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 982528/982528 bytes at offset 66048 ++959.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++testing legacy AES qcow2 encryption ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=aes encrypt.key-secret=sec0 ++== reading the whole image == ++read 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== write two 512 byte sectors on a cluster boundary == ++wrote 1024/1024 bytes at offset 65024 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== verify that the rest of the image is not changed == ++read 65024/65024 bytes at offset 0 ++63.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 1024/1024 bytes at offset 65024 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 982528/982528 bytes at offset 66048 ++959.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++*** done +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 813db25..4a7e08f 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -271,3 +271,4 @@ + 254 rw backing quick + 255 rw quick + 256 rw quick ++263 rw quick +\ No newline at end of file +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 697bb0c..50ee372 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 11%{?dist} +Release: 12%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -208,6 +208,14 @@ Patch61: kvm-hostmem-file-fix-pmem-file-size-check.patch Patch62: kvm-memory-fetch-pmem-size-in-get_file_size.patch # For bz#1753992 - core dump when testing persistent reservation in guest Patch63: kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch +# For bz#1745922 - Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase' +Patch64: kvm-block-Use-QEMU_IS_ALIGNED.patch +# For bz#1745922 - Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase' +Patch65: kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch +# For bz#1745922 - Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase' +Patch66: kvm-block-qcow2-refactor-encryption-code.patch +# For bz#1745922 - Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase' +Patch67: kvm-qemu-iotests-Add-test-for-bz-1745922.patch BuildRequires: wget BuildRequires: rpm-build @@ -1149,6 +1157,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Sep 26 2019 Danilo Cesar Lemes de Paula - 4.1.0-12.el8 +- kvm-block-Use-QEMU_IS_ALIGNED.patch [bz#1745922] +- kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch [bz#1745922] +- kvm-block-qcow2-refactor-encryption-code.patch [bz#1745922] +- kvm-qemu-iotests-Add-test-for-bz-1745922.patch [bz#1745922] +- Resolves: bz#1745922 + (Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase') + * Mon Sep 23 2019 Danilo Cesar Lemes de Paula - 4.1.0-11.el8 - kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch [bz#1746631] - kvm-hostmem-file-fix-pmem-file-size-check.patch [bz#1724008 bz#1736788] From 1eb8acbee7b4a107a9b97edb282ef096f37f11fc Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 27 Sep 2019 18:09:27 +0100 Subject: [PATCH 053/195] * Fri Sep 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-13.el8 - kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch [bz#1748253] - kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch [bz#1744955] - Resolves: bz#1744955 (Qemu hang when block resize a qcow2 image) - Resolves: bz#1748253 (QEMU crashes (core dump) when using the integrated NDB server with data-plane) --- ...h-client-channel-to-the-export-s-Aio.patch | 60 ++++++++++++ ...ule-virtio_notify_config-to-run-on-m.patch | 91 +++++++++++++++++++ qemu-kvm.spec | 14 ++- 3 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch create mode 100644 kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch diff --git a/kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch b/kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch new file mode 100644 index 0000000..1671218 --- /dev/null +++ b/kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch @@ -0,0 +1,60 @@ +From 394dd52ce4dbd69cd5eca9a9928c442650cc3fd2 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 27 Sep 2019 11:13:24 +0100 +Subject: [PATCH 1/2] nbd/server: attach client channel to the export's + AioContext + +RH-Author: Sergio Lopez Pascual +Message-id: <20190927111324.17949-2-slp@redhat.com> +Patchwork-id: 90905 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] nbd/server: attach client channel to the export's AioContext +Bugzilla: 1748253 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Max Reitz + +On creation, the export's AioContext is set to the same one as the +BlockBackend, while the AioContext in the client QIOChannel is left +untouched. + +As a result, when using data-plane, nbd_client_receive_next_request() +schedules coroutines in the IOThread AioContext, while the client's +QIOChannel is serviced from the main_loop, potentially triggering the +assertion at qio_channel_restart_[read|write]. + +To fix this, as soon we have the export corresponding to the client, +we call qio_channel_attach_aio_context() to attach the QIOChannel +context to the export's AioContext. This matches with the logic at +blk_aio_attached(). + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1748253 +Signed-off-by: Sergio Lopez +Message-Id: <20190912110032.26395-1-slp@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit b4961249af0403fa55aae57c4c8806b24f7a7b33) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + nbd/server.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/nbd/server.c b/nbd/server.c +index 10faedc..ea0353a 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1296,6 +1296,11 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) + return ret; + } + ++ /* Attach the channel to the same AioContext as the export */ ++ if (client->exp && client->exp->ctx) { ++ qio_channel_attach_aio_context(client->ioc, client->exp->ctx); ++ } ++ + assert(!client->optlen); + trace_nbd_negotiate_success(); + +-- +1.8.3.1 + diff --git a/kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch b/kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch new file mode 100644 index 0000000..2cf7fe8 --- /dev/null +++ b/kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch @@ -0,0 +1,91 @@ +From 6b292920dbdd463bb80b82bef2063623a8e2da17 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 27 Sep 2019 11:46:41 +0100 +Subject: [PATCH 2/2] virtio-blk: schedule virtio_notify_config to run on main + context +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Sergio Lopez Pascual +Message-id: <20190927114641.20992-2-slp@redhat.com> +Patchwork-id: 90907 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] virtio-blk: schedule virtio_notify_config to run on main context +Bugzilla: 1744955 +RH-Acked-by: Eric Blake +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Max Reitz + +virtio_notify_config() needs to acquire the global mutex, which isn't +allowed from an iothread, and may lead to a deadlock like this: + + - main thead + * Has acquired: qemu_global_mutex. + * Is trying the acquire: iothread AioContext lock via + AIO_WAIT_WHILE (after aio_poll). + + - iothread + * Has acquired: AioContext lock. + * Is trying to acquire: qemu_global_mutex (via + virtio_notify_config->prepare_mmio_access). + +If virtio_blk_resize() is called from an iothread, schedule +virtio_notify_config() to be run in the main context BH. + +[Removed unnecessary newline as suggested by Kevin Wolf +. +--Stefan] + +Signed-off-by: Sergio Lopez +Reviewed-by: Kevin Wolf +Message-id: 20190916112411.21636-1-slp@redhat.com +Message-Id: <20190916112411.21636-1-slp@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f9a7e3698a737ee75a7b0af34203303df982550f) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/virtio-blk.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index cbb3729..0d9adcd 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -16,6 +16,7 @@ + #include "qemu/iov.h" + #include "qemu/module.h" + #include "qemu/error-report.h" ++#include "qemu/main-loop.h" + #include "trace.h" + #include "hw/block/block.h" + #include "sysemu/blockdev.h" +@@ -1082,11 +1083,24 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, + return 0; + } + ++static void virtio_resize_cb(void *opaque) ++{ ++ VirtIODevice *vdev = opaque; ++ ++ assert(qemu_get_current_aio_context() == qemu_get_aio_context()); ++ virtio_notify_config(vdev); ++} ++ + static void virtio_blk_resize(void *opaque) + { + VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + +- virtio_notify_config(vdev); ++ /* ++ * virtio_notify_config() needs to acquire the global mutex, ++ * so it can't be called from an iothread. Instead, schedule ++ * it to be run in the main context BH. ++ */ ++ aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); + } + + static const BlockDevOps virtio_block_ops = { +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 50ee372..a631953 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 12%{?dist} +Release: 13%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -216,6 +216,10 @@ Patch65: kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch Patch66: kvm-block-qcow2-refactor-encryption-code.patch # For bz#1745922 - Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase' Patch67: kvm-qemu-iotests-Add-test-for-bz-1745922.patch +# For bz#1748253 - QEMU crashes (core dump) when using the integrated NDB server with data-plane +Patch68: kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch +# For bz#1744955 - Qemu hang when block resize a qcow2 image +Patch69: kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch BuildRequires: wget BuildRequires: rpm-build @@ -1157,6 +1161,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Sep 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-13.el8 +- kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch [bz#1748253] +- kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch [bz#1744955] +- Resolves: bz#1744955 + (Qemu hang when block resize a qcow2 image) +- Resolves: bz#1748253 + (QEMU crashes (core dump) when using the integrated NDB server with data-plane) + * Thu Sep 26 2019 Danilo Cesar Lemes de Paula - 4.1.0-12.el8 - kvm-block-Use-QEMU_IS_ALIGNED.patch [bz#1745922] - kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch [bz#1745922] From 32a3ac0fa97203315d0ec244a1810b341b69acc4 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 12 Nov 2019 01:37:10 +0000 Subject: [PATCH 054/195] * Tue Nov 12 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 - kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch [bz#1756413] - kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch [bz#1756413] - kvm-iotests-test-bitmap-moving-inside-254.patch [bz#1756413] - kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch [bz#1754710] - kvm-nbd-Grab-aio-context-lock-in-more-places.patch [bz#1741094] - kvm-tests-Use-iothreads-during-iotest-223.patch [bz#1741094] - Resolves: bz#1741094 ([Upstream]Incremental backup: Qemu coredump when expose an active bitmap via pull mode(data plane enable)) - Resolves: bz#1754710 (qemu core dumped when hotpluging vcpus) - Resolves: bz#1756413 (backport support for transactionable block-dirty-bitmap-remove for incremental backup support) --- ...aio_context-locked-sections-in-bitma.patch | 122 ++++++++ ...otests-test-bitmap-moving-inside-254.patch | 209 +++++++++++++ ...Grab-aio-context-lock-in-more-places.patch | 200 +++++++++++++ ...lock-dirty-bitmap-remove-transaction.patch | 274 ++++++++++++++++++ ...partially-initialized-vCPUs-in-prese.patch | 65 +++++ ...ests-Use-iothreads-during-iotest-223.patch | 73 +++++ qemu-kvm.spec | 28 +- 7 files changed, 970 insertions(+), 1 deletion(-) create mode 100644 kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch create mode 100644 kvm-iotests-test-bitmap-moving-inside-254.patch create mode 100644 kvm-nbd-Grab-aio-context-lock-in-more-places.patch create mode 100644 kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch create mode 100644 kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch create mode 100644 kvm-tests-Use-iothreads-during-iotest-223.patch diff --git a/kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch b/kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch new file mode 100644 index 0000000..cf8117f --- /dev/null +++ b/kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch @@ -0,0 +1,122 @@ +From 107ad619739795199df98c56d0ad4db14fec3722 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Fri, 27 Sep 2019 20:18:44 +0100 +Subject: [PATCH 1/6] blockdev: reduce aio_context locked sections in bitmap + add/remove + +RH-Author: John Snow +Message-id: <20190927201846.6823-2-jsnow@redhat.com> +Patchwork-id: 90908 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/3] blockdev: reduce aio_context locked sections in bitmap add/remove +Bugzilla: 1756413 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +From: Vladimir Sementsov-Ogievskiy + +Commit 0a6c86d024c52 returned these locks back to add/remove +functionality, to protect from intersection of persistent bitmap +related IO with other IO. But other bitmap-related functions called +here are unrelated to the problem, and there are no needs to keep these +calls inside critical sections. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: John Snow +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190708220502.12977-2-jsnow@redhat.com +Signed-off-by: John Snow +(cherry picked from commit 2899f41eef2806cf8eb119811c9d6fcf15ce80f6) +Signed-off-by: John Snow +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 30 +++++++++++++----------------- + 1 file changed, 13 insertions(+), 17 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 4d141e9..0124825 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2811,7 +2811,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; +- AioContext *aio_context = NULL; + + if (!name || name[0] == '\0') { + error_setg(errp, "Bitmap name cannot be empty"); +@@ -2847,16 +2846,20 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + } + + if (persistent) { +- aio_context = bdrv_get_aio_context(bs); ++ AioContext *aio_context = bdrv_get_aio_context(bs); ++ bool ok; ++ + aio_context_acquire(aio_context); +- if (!bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) { +- goto out; ++ ok = bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp); ++ aio_context_release(aio_context); ++ if (!ok) { ++ return; + } + } + + bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); + if (bitmap == NULL) { +- goto out; ++ return; + } + + if (disabled) { +@@ -2864,10 +2867,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + } + + bdrv_dirty_bitmap_set_persistence(bitmap, persistent); +- out: +- if (aio_context) { +- aio_context_release(aio_context); +- } + } + + void qmp_block_dirty_bitmap_remove(const char *node, const char *name, +@@ -2875,8 +2874,6 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name, + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; +- Error *local_err = NULL; +- AioContext *aio_context = NULL; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap || !bs) { +@@ -2889,20 +2886,19 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name, + } + + if (bdrv_dirty_bitmap_get_persistence(bitmap)) { +- aio_context = bdrv_get_aio_context(bs); ++ AioContext *aio_context = bdrv_get_aio_context(bs); ++ Error *local_err = NULL; ++ + aio_context_acquire(aio_context); + bdrv_remove_persistent_dirty_bitmap(bs, name, &local_err); ++ aio_context_release(aio_context); + if (local_err != NULL) { + error_propagate(errp, local_err); +- goto out; ++ return; + } + } + + bdrv_release_dirty_bitmap(bs, bitmap); +- out: +- if (aio_context) { +- aio_context_release(aio_context); +- } + } + + /** +-- +1.8.3.1 + diff --git a/kvm-iotests-test-bitmap-moving-inside-254.patch b/kvm-iotests-test-bitmap-moving-inside-254.patch new file mode 100644 index 0000000..064f7d5 --- /dev/null +++ b/kvm-iotests-test-bitmap-moving-inside-254.patch @@ -0,0 +1,209 @@ +From b15fa18e724e356bd889f0566d512daedb9a09dc Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Fri, 27 Sep 2019 20:18:46 +0100 +Subject: [PATCH 3/6] iotests: test bitmap moving inside 254 + +RH-Author: John Snow +Message-id: <20190927201846.6823-4-jsnow@redhat.com> +Patchwork-id: 90910 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 3/3] iotests: test bitmap moving inside 254 +Bugzilla: 1756413 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +From: Vladimir Sementsov-Ogievskiy + +Test persistent bitmap copying with and without removal of original +bitmap. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190708220502.12977-4-jsnow@redhat.com +[Edited comment "bitmap1" --> "bitmap2" as per review. --js] +Signed-off-by: John Snow +(cherry picked from commit 3f7b2fa8cd476fe871ce1d996c640317730752a0) +Signed-off-by: John Snow + +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/254 | 30 +++++++++++++++-- + tests/qemu-iotests/254.out | 82 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 110 insertions(+), 2 deletions(-) + +diff --git a/tests/qemu-iotests/254 b/tests/qemu-iotests/254 +index 8edba91..09584f3 100755 +--- a/tests/qemu-iotests/254 ++++ b/tests/qemu-iotests/254 +@@ -1,6 +1,6 @@ + #!/usr/bin/env python + # +-# Test external snapshot with bitmap copying. ++# Test external snapshot with bitmap copying and moving. + # + # Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved. + # +@@ -32,6 +32,10 @@ vm = iotests.VM().add_drive(disk, opts='node-name=base') + vm.launch() + + vm.qmp_log('block-dirty-bitmap-add', node='drive0', name='bitmap0') ++vm.qmp_log('block-dirty-bitmap-add', node='drive0', name='bitmap1', ++ persistent=True) ++vm.qmp_log('block-dirty-bitmap-add', node='drive0', name='bitmap2', ++ persistent=True) + + vm.hmp_qemu_io('drive0', 'write 0 512K') + +@@ -39,16 +43,38 @@ vm.qmp_log('transaction', indent=2, actions=[ + {'type': 'blockdev-snapshot-sync', + 'data': {'device': 'drive0', 'snapshot-file': top, + 'snapshot-node-name': 'snap'}}, ++ ++ # copy non-persistent bitmap0 + {'type': 'block-dirty-bitmap-add', + 'data': {'node': 'snap', 'name': 'bitmap0'}}, + {'type': 'block-dirty-bitmap-merge', + 'data': {'node': 'snap', 'target': 'bitmap0', +- 'bitmaps': [{'node': 'base', 'name': 'bitmap0'}]}} ++ 'bitmaps': [{'node': 'base', 'name': 'bitmap0'}]}}, ++ ++ # copy persistent bitmap1, original will be saved to base image ++ {'type': 'block-dirty-bitmap-add', ++ 'data': {'node': 'snap', 'name': 'bitmap1', 'persistent': True}}, ++ {'type': 'block-dirty-bitmap-merge', ++ 'data': {'node': 'snap', 'target': 'bitmap1', ++ 'bitmaps': [{'node': 'base', 'name': 'bitmap1'}]}}, ++ ++ # move persistent bitmap2, original will be removed and not saved ++ # to base image ++ {'type': 'block-dirty-bitmap-add', ++ 'data': {'node': 'snap', 'name': 'bitmap2', 'persistent': True}}, ++ {'type': 'block-dirty-bitmap-merge', ++ 'data': {'node': 'snap', 'target': 'bitmap2', ++ 'bitmaps': [{'node': 'base', 'name': 'bitmap2'}]}}, ++ {'type': 'block-dirty-bitmap-remove', ++ 'data': {'node': 'base', 'name': 'bitmap2'}} + ], filters=[iotests.filter_qmp_testfiles]) + + result = vm.qmp('query-block')['return'][0] + log("query-block: device = {}, node-name = {}, dirty-bitmaps:".format( + result['device'], result['inserted']['node-name'])) + log(result['dirty-bitmaps'], indent=2) ++log("\nbitmaps in backing image:") ++log(result['inserted']['image']['backing-image']['format-specific'] \ ++ ['data']['bitmaps'], indent=2) + + vm.shutdown() +diff --git a/tests/qemu-iotests/254.out b/tests/qemu-iotests/254.out +index d7394cf..d185c05 100644 +--- a/tests/qemu-iotests/254.out ++++ b/tests/qemu-iotests/254.out +@@ -1,5 +1,9 @@ + {"execute": "block-dirty-bitmap-add", "arguments": {"name": "bitmap0", "node": "drive0"}} + {"return": {}} ++{"execute": "block-dirty-bitmap-add", "arguments": {"name": "bitmap1", "node": "drive0", "persistent": true}} ++{"return": {}} ++{"execute": "block-dirty-bitmap-add", "arguments": {"name": "bitmap2", "node": "drive0", "persistent": true}} ++{"return": {}} + { + "execute": "transaction", + "arguments": { +@@ -31,6 +35,55 @@ + "target": "bitmap0" + }, + "type": "block-dirty-bitmap-merge" ++ }, ++ { ++ "data": { ++ "name": "bitmap1", ++ "node": "snap", ++ "persistent": true ++ }, ++ "type": "block-dirty-bitmap-add" ++ }, ++ { ++ "data": { ++ "bitmaps": [ ++ { ++ "name": "bitmap1", ++ "node": "base" ++ } ++ ], ++ "node": "snap", ++ "target": "bitmap1" ++ }, ++ "type": "block-dirty-bitmap-merge" ++ }, ++ { ++ "data": { ++ "name": "bitmap2", ++ "node": "snap", ++ "persistent": true ++ }, ++ "type": "block-dirty-bitmap-add" ++ }, ++ { ++ "data": { ++ "bitmaps": [ ++ { ++ "name": "bitmap2", ++ "node": "base" ++ } ++ ], ++ "node": "snap", ++ "target": "bitmap2" ++ }, ++ "type": "block-dirty-bitmap-merge" ++ }, ++ { ++ "data": { ++ "name": "bitmap2", ++ "node": "base" ++ }, ++ "type": "block-dirty-bitmap-remove" + } + ] + } +@@ -44,9 +97,38 @@ query-block: device = drive0, node-name = snap, dirty-bitmaps: + "busy": false, + "count": 524288, + "granularity": 65536, ++ "name": "bitmap2", ++ "persistent": true, ++ "recording": true, ++ "status": "active" ++ }, ++ { ++ "busy": false, ++ "count": 524288, ++ "granularity": 65536, ++ "name": "bitmap1", ++ "persistent": true, ++ "recording": true, ++ "status": "active" ++ }, ++ { ++ "busy": false, ++ "count": 524288, ++ "granularity": 65536, + "name": "bitmap0", + "persistent": false, + "recording": true, + "status": "active" + } + ] ++ ++bitmaps in backing image: ++[ ++ { ++ "flags": [ ++ "auto" ++ ], ++ "granularity": 65536, ++ "name": "bitmap1" ++ } ++] +-- +1.8.3.1 + diff --git a/kvm-nbd-Grab-aio-context-lock-in-more-places.patch b/kvm-nbd-Grab-aio-context-lock-in-more-places.patch new file mode 100644 index 0000000..46df547 --- /dev/null +++ b/kvm-nbd-Grab-aio-context-lock-in-more-places.patch @@ -0,0 +1,200 @@ +From 7cf87a669fa0dd580013b0ca5e4510f12aff2319 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 9 Oct 2019 14:10:07 +0100 +Subject: [PATCH 5/6] nbd: Grab aio context lock in more places + +RH-Author: Eric Blake +Message-id: <20191009141008.24439-2-eblake@redhat.com> +Patchwork-id: 91353 +O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 1/2] nbd: Grab aio context lock in more places +Bugzilla: 1741094 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +When iothreads are in use, the failure to grab the aio context results +in an assertion failure when trying to unlock things during blk_unref, +when trying to unlock a mutex that was not locked. In short, all +calls to nbd_export_put need to done while within the correct aio +context. But since nbd_export_put can recursively reach itself via +nbd_export_close, and recursively grabbing the context would deadlock, +we can't do the context grab directly in those functions, but must do +so in their callers. + +Hoist the use of the correct aio_context from nbd_export_new() to its +caller qmp_nbd_server_add(). Then tweak qmp_nbd_server_remove(), +nbd_eject_notifier(), and nbd_esport_close_all() to grab the right +context, so that all callers during qemu now own the context before +nbd_export_put() can call blk_unref(). + +Remaining uses in qemu-nbd don't matter (since that use case does not +support iothreads). + +Suggested-by: Kevin Wolf +Signed-off-by: Eric Blake +Message-Id: <20190917023917.32226-1-eblake@redhat.com> +Reviewed-by: Sergio Lopez +(cherry picked from commit 61bc846d8c58535af6884b637a4005dd6111ea95) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + blockdev-nbd.c | 14 ++++++++++++-- + include/block/nbd.h | 1 + + nbd/server.c | 22 ++++++++++++++++++---- + 3 files changed, 31 insertions(+), 6 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 06041a2..bed9370 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -152,6 +152,7 @@ void qmp_nbd_server_add(const char *device, bool has_name, const char *name, + BlockBackend *on_eject_blk; + NBDExport *exp; + int64_t len; ++ AioContext *aio_context; + + if (!nbd_server) { + error_setg(errp, "NBD server not running"); +@@ -174,11 +175,13 @@ void qmp_nbd_server_add(const char *device, bool has_name, const char *name, + return; + } + ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); + len = bdrv_getlength(bs); + if (len < 0) { + error_setg_errno(errp, -len, + "Failed to determine the NBD export's length"); +- return; ++ goto out; + } + + if (!has_writable) { +@@ -192,13 +195,16 @@ void qmp_nbd_server_add(const char *device, bool has_name, const char *name, + writable ? 0 : NBD_FLAG_READ_ONLY, + NULL, false, on_eject_blk, errp); + if (!exp) { +- return; ++ goto out; + } + + /* The list of named exports has a strong reference to this export now and + * our only way of accessing it is through nbd_export_find(), so we can drop + * the strong reference that is @exp. */ + nbd_export_put(exp); ++ ++ out: ++ aio_context_release(aio_context); + } + + void qmp_nbd_server_remove(const char *name, +@@ -206,6 +212,7 @@ void qmp_nbd_server_remove(const char *name, + Error **errp) + { + NBDExport *exp; ++ AioContext *aio_context; + + if (!nbd_server) { + error_setg(errp, "NBD server not running"); +@@ -222,7 +229,10 @@ void qmp_nbd_server_remove(const char *name, + mode = NBD_SERVER_REMOVE_MODE_SAFE; + } + ++ aio_context = nbd_export_aio_context(exp); ++ aio_context_acquire(aio_context); + nbd_export_remove(exp, mode, errp); ++ aio_context_release(aio_context); + } + + void qmp_nbd_server_stop(Error **errp) +diff --git a/include/block/nbd.h b/include/block/nbd.h +index bb9f5bc..82f9b9e 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -335,6 +335,7 @@ void nbd_export_put(NBDExport *exp); + + BlockBackend *nbd_export_get_blockdev(NBDExport *exp); + ++AioContext *nbd_export_aio_context(NBDExport *exp); + NBDExport *nbd_export_find(const char *name); + void nbd_export_close_all(void); + +diff --git a/nbd/server.c b/nbd/server.c +index ea0353a..81f8217 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1460,7 +1460,12 @@ static void blk_aio_detach(void *opaque) + static void nbd_eject_notifier(Notifier *n, void *data) + { + NBDExport *exp = container_of(n, NBDExport, eject_notifier); ++ AioContext *aio_context; ++ ++ aio_context = exp->ctx; ++ aio_context_acquire(aio_context); + nbd_export_close(exp); ++ aio_context_release(aio_context); + } + + NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, +@@ -1479,12 +1484,11 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, + * NBD exports are used for non-shared storage migration. Make sure + * that BDRV_O_INACTIVE is cleared and the image is ready for write + * access since the export could be available before migration handover. ++ * ctx was acquired in the caller. + */ + assert(name); + ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + bdrv_invalidate_cache(bs, NULL); +- aio_context_release(ctx); + + /* Don't allow resize while the NBD server is running, otherwise we don't + * care what happens with the node. */ +@@ -1492,7 +1496,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, + if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { + perm |= BLK_PERM_WRITE; + } +- blk = blk_new(bdrv_get_aio_context(bs), perm, ++ blk = blk_new(ctx, perm, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); + ret = blk_insert_bs(blk, bs, errp); +@@ -1549,7 +1553,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, + } + + exp->close = close; +- exp->ctx = blk_get_aio_context(blk); ++ exp->ctx = ctx; + blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); + + if (on_eject_blk) { +@@ -1582,6 +1586,12 @@ NBDExport *nbd_export_find(const char *name) + return NULL; + } + ++AioContext * ++nbd_export_aio_context(NBDExport *exp) ++{ ++ return exp->ctx; ++} ++ + void nbd_export_close(NBDExport *exp) + { + NBDClient *client, *next; +@@ -1676,9 +1686,13 @@ BlockBackend *nbd_export_get_blockdev(NBDExport *exp) + void nbd_export_close_all(void) + { + NBDExport *exp, *next; ++ AioContext *aio_context; + + QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { ++ aio_context = exp->ctx; ++ aio_context_acquire(aio_context); + nbd_export_close(exp); ++ aio_context_release(aio_context); + } + } + +-- +1.8.3.1 + diff --git a/kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch b/kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch new file mode 100644 index 0000000..26141a3 --- /dev/null +++ b/kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch @@ -0,0 +1,274 @@ +From fd8ecebf0c0632e473bcb8bb08dc8311a5530dcf Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Fri, 27 Sep 2019 20:18:45 +0100 +Subject: [PATCH 2/6] qapi: implement block-dirty-bitmap-remove transaction + action + +RH-Author: John Snow +Message-id: <20190927201846.6823-3-jsnow@redhat.com> +Patchwork-id: 90911 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/3] qapi: implement block-dirty-bitmap-remove transaction action +Bugzilla: 1756413 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +It is used to do transactional movement of the bitmap (which is +possible in conjunction with merge command). Transactional bitmap +movement is needed in scenarios with external snapshot, when we don't +want to leave copy of the bitmap in the base image. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190708220502.12977-3-jsnow@redhat.com +[Edited "since" version to 4.2 --js] +Signed-off-by: John Snow +(cherry picked from commit c4e4b0fa598ddc9cee6ba7a06899ce0a8dae6c61) +Signed-off-by: John Snow + +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 2 +- + block/dirty-bitmap.c | 15 ++++---- + blockdev.c | 79 ++++++++++++++++++++++++++++++++++++++---- + include/block/dirty-bitmap.h | 2 +- + migration/block-dirty-bitmap.c | 2 +- + qapi/transaction.json | 2 ++ + 6 files changed, 85 insertions(+), 17 deletions(-) + +diff --git a/block.c b/block.c +index cbd8da5..92a3e9f 100644 +--- a/block.c ++++ b/block.c +@@ -5334,7 +5334,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, + for (bm = bdrv_dirty_bitmap_next(bs, NULL); bm; + bm = bdrv_dirty_bitmap_next(bs, bm)) + { +- bdrv_dirty_bitmap_set_migration(bm, false); ++ bdrv_dirty_bitmap_skip_store(bm, false); + } + + ret = refresh_total_sectors(bs, bs->total_sectors); +diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c +index 95a9c2a..a308e1f 100644 +--- a/block/dirty-bitmap.c ++++ b/block/dirty-bitmap.c +@@ -48,10 +48,9 @@ struct BdrvDirtyBitmap { + bool inconsistent; /* bitmap is persistent, but inconsistent. + It cannot be used at all in any way, except + a QMP user can remove it. */ +- bool migration; /* Bitmap is selected for migration, it should +- not be stored on the next inactivation +- (persistent flag doesn't matter until next +- invalidation).*/ ++ bool skip_store; /* We are either migrating or deleting this ++ * bitmap; it should not be stored on the next ++ * inactivation. */ + QLIST_ENTRY(BdrvDirtyBitmap) list; + }; + +@@ -757,16 +756,16 @@ void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap) + } + + /* Called with BQL taken. */ +-void bdrv_dirty_bitmap_set_migration(BdrvDirtyBitmap *bitmap, bool migration) ++void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip) + { + qemu_mutex_lock(bitmap->mutex); +- bitmap->migration = migration; ++ bitmap->skip_store = skip; + qemu_mutex_unlock(bitmap->mutex); + } + + bool bdrv_dirty_bitmap_get_persistence(BdrvDirtyBitmap *bitmap) + { +- return bitmap->persistent && !bitmap->migration; ++ return bitmap->persistent && !bitmap->skip_store; + } + + bool bdrv_dirty_bitmap_inconsistent(const BdrvDirtyBitmap *bitmap) +@@ -778,7 +777,7 @@ bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs) + { + BdrvDirtyBitmap *bm; + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { +- if (bm->persistent && !bm->readonly && !bm->migration) { ++ if (bm->persistent && !bm->readonly && !bm->skip_store) { + return true; + } + } +diff --git a/blockdev.c b/blockdev.c +index 0124825..800b3dc 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2134,6 +2134,51 @@ static void block_dirty_bitmap_merge_prepare(BlkActionState *common, + errp); + } + ++static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( ++ const char *node, const char *name, bool release, ++ BlockDriverState **bitmap_bs, Error **errp); ++ ++static void block_dirty_bitmap_remove_prepare(BlkActionState *common, ++ Error **errp) ++{ ++ BlockDirtyBitmap *action; ++ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, ++ common, common); ++ ++ if (action_check_completion_mode(common, errp) < 0) { ++ return; ++ } ++ ++ action = common->action->u.block_dirty_bitmap_remove.data; ++ ++ state->bitmap = do_block_dirty_bitmap_remove(action->node, action->name, ++ false, &state->bs, errp); ++ if (state->bitmap) { ++ bdrv_dirty_bitmap_skip_store(state->bitmap, true); ++ bdrv_dirty_bitmap_set_busy(state->bitmap, true); ++ } ++} ++ ++static void block_dirty_bitmap_remove_abort(BlkActionState *common) ++{ ++ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, ++ common, common); ++ ++ if (state->bitmap) { ++ bdrv_dirty_bitmap_skip_store(state->bitmap, false); ++ bdrv_dirty_bitmap_set_busy(state->bitmap, false); ++ } ++} ++ ++static void block_dirty_bitmap_remove_commit(BlkActionState *common) ++{ ++ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, ++ common, common); ++ ++ bdrv_dirty_bitmap_set_busy(state->bitmap, false); ++ bdrv_release_dirty_bitmap(state->bs, state->bitmap); ++} ++ + static void abort_prepare(BlkActionState *common, Error **errp) + { + error_setg(errp, "Transaction aborted using Abort action"); +@@ -2211,6 +2256,12 @@ static const BlkActionOps actions[] = { + .commit = block_dirty_bitmap_free_backup, + .abort = block_dirty_bitmap_restore, + }, ++ [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = { ++ .instance_size = sizeof(BlockDirtyBitmapState), ++ .prepare = block_dirty_bitmap_remove_prepare, ++ .commit = block_dirty_bitmap_remove_commit, ++ .abort = block_dirty_bitmap_remove_abort, ++ }, + /* Where are transactions for MIRROR, COMMIT and STREAM? + * Although these blockjobs use transaction callbacks like the backup job, + * these jobs do not necessarily adhere to transaction semantics. +@@ -2869,20 +2920,21 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + bdrv_dirty_bitmap_set_persistence(bitmap, persistent); + } + +-void qmp_block_dirty_bitmap_remove(const char *node, const char *name, +- Error **errp) ++static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( ++ const char *node, const char *name, bool release, ++ BlockDriverState **bitmap_bs, Error **errp) + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap || !bs) { +- return; ++ return NULL; + } + + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, + errp)) { +- return; ++ return NULL; + } + + if (bdrv_dirty_bitmap_get_persistence(bitmap)) { +@@ -2892,13 +2944,28 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name, + aio_context_acquire(aio_context); + bdrv_remove_persistent_dirty_bitmap(bs, name, &local_err); + aio_context_release(aio_context); ++ + if (local_err != NULL) { + error_propagate(errp, local_err); +- return; ++ return NULL; + } + } + +- bdrv_release_dirty_bitmap(bs, bitmap); ++ if (release) { ++ bdrv_release_dirty_bitmap(bs, bitmap); ++ } ++ ++ if (bitmap_bs) { ++ *bitmap_bs = bs; ++ } ++ ++ return release ? NULL : bitmap; ++} ++ ++void qmp_block_dirty_bitmap_remove(const char *node, const char *name, ++ Error **errp) ++{ ++ do_block_dirty_bitmap_remove(node, name, true, NULL, errp); + } + + /** +diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h +index 62682eb..a21d54a 100644 +--- a/include/block/dirty-bitmap.h ++++ b/include/block/dirty-bitmap.h +@@ -83,7 +83,7 @@ void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap); + void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy); + void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, + HBitmap **backup, Error **errp); +-void bdrv_dirty_bitmap_set_migration(BdrvDirtyBitmap *bitmap, bool migration); ++void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip); + + /* Functions that require manual locking. */ + void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap); +diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c +index 4a896a0..d650ba4 100644 +--- a/migration/block-dirty-bitmap.c ++++ b/migration/block-dirty-bitmap.c +@@ -326,7 +326,7 @@ static int init_dirty_bitmap_migration(void) + + /* unset migration flags here, to not roll back it */ + QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) { +- bdrv_dirty_bitmap_set_migration(dbms->bitmap, true); ++ bdrv_dirty_bitmap_skip_store(dbms->bitmap, true); + } + + if (QSIMPLEQ_EMPTY(&dirty_bitmap_mig_state.dbms_list)) { +diff --git a/qapi/transaction.json b/qapi/transaction.json +index 95edb78..0590dbc 100644 +--- a/qapi/transaction.json ++++ b/qapi/transaction.json +@@ -45,6 +45,7 @@ + # + # - @abort: since 1.6 + # - @block-dirty-bitmap-add: since 2.5 ++# - @block-dirty-bitmap-remove: since 4.2 + # - @block-dirty-bitmap-clear: since 2.5 + # - @block-dirty-bitmap-enable: since 4.0 + # - @block-dirty-bitmap-disable: since 4.0 +@@ -61,6 +62,7 @@ + 'data': { + 'abort': 'Abort', + 'block-dirty-bitmap-add': 'BlockDirtyBitmapAdd', ++ 'block-dirty-bitmap-remove': 'BlockDirtyBitmap', + 'block-dirty-bitmap-clear': 'BlockDirtyBitmap', + 'block-dirty-bitmap-enable': 'BlockDirtyBitmap', + 'block-dirty-bitmap-disable': 'BlockDirtyBitmap', +-- +1.8.3.1 + diff --git a/kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch b/kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch new file mode 100644 index 0000000..fa9b454 --- /dev/null +++ b/kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch @@ -0,0 +1,65 @@ +From 3a7d0411addca79192ed60939f55ec019c27a72a Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 8 Oct 2019 05:08:36 +0100 +Subject: [PATCH 4/6] spapr/xive: skip partially initialized vCPUs in presenter +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20191008050836.11479-1-dgibson@redhat.com> +Patchwork-id: 90994 +O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH] spapr/xive: skip partially initialized vCPUs in presenter +Bugzilla: 1754710 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +From: Cédric Le Goater + +When vCPUs are hotplugged, they are added to the QEMU CPU list before +being fully realized. This can crash the XIVE presenter because the +'tctx' pointer is not necessarily initialized when looking for a +matching target. + +These vCPUs are not valid targets for the presenter. Skip them. + +Signed-off-by: Cédric Le Goater +Message-Id: <20191001085722.32755-1-clg@kaod.org> +Signed-off-by: David Gibson +Reviewed-by: Greg Kurz +(cherry picked from commit 627fa61746f70f7c799f08e9048bb6a482402138) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1754710 +Branch: rhel-av-8.1.1 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23900462 +Testing: Could no longer reproduce bug with brewed qemu + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/xive.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/intc/xive.c b/hw/intc/xive.c +index da148e9..8f639f6 100644 +--- a/hw/intc/xive.c ++++ b/hw/intc/xive.c +@@ -1345,6 +1345,14 @@ static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format, + int ring; + + /* ++ * Skip partially initialized vCPUs. This can happen when ++ * vCPUs are hotplugged. ++ */ ++ if (!tctx) { ++ continue; ++ } ++ ++ /* + * HW checks that the CPU is enabled in the Physical Thread + * Enable Register (PTER). + */ +-- +1.8.3.1 + diff --git a/kvm-tests-Use-iothreads-during-iotest-223.patch b/kvm-tests-Use-iothreads-during-iotest-223.patch new file mode 100644 index 0000000..ea52932 --- /dev/null +++ b/kvm-tests-Use-iothreads-during-iotest-223.patch @@ -0,0 +1,73 @@ +From c03d23733166328e70f98504d7dfaa528e889633 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 9 Oct 2019 14:10:08 +0100 +Subject: [PATCH 6/6] tests: Use iothreads during iotest 223 + +RH-Author: Eric Blake +Message-id: <20191009141008.24439-3-eblake@redhat.com> +Patchwork-id: 91355 +O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 2/2] tests: Use iothreads during iotest 223 +Bugzilla: 1741094 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +Doing so catches the bugs we just fixed with NBD not properly using +correct contexts. + +Signed-off-by: Eric Blake +Message-Id: <20190920220729.31801-1-eblake@redhat.com> +(cherry picked from commit 506902c6fa80210b002e30ff33794bfc718b15c6) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/223 | 6 ++++-- + tests/qemu-iotests/223.out | 1 + + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/tests/qemu-iotests/223 b/tests/qemu-iotests/223 +index cc48e78..2ba3d81 100755 +--- a/tests/qemu-iotests/223 ++++ b/tests/qemu-iotests/223 +@@ -2,7 +2,7 @@ + # + # Test reading dirty bitmap over NBD + # +-# Copyright (C) 2018 Red Hat, Inc. ++# Copyright (C) 2018-2019 Red Hat, Inc. + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by +@@ -109,7 +109,7 @@ echo + echo "=== End dirty bitmaps, and start serving image over NBD ===" + echo + +-_launch_qemu 2> >(_filter_nbd) ++_launch_qemu -object iothread,id=io0 2> >(_filter_nbd) + + # Intentionally provoke some errors as well, to check error handling + silent= +@@ -117,6 +117,8 @@ _send_qemu_cmd $QEMU_HANDLE '{"execute":"qmp_capabilities"}' "return" + _send_qemu_cmd $QEMU_HANDLE '{"execute":"blockdev-add", + "arguments":{"driver":"qcow2", "node-name":"n", + "file":{"driver":"file", "filename":"'"$TEST_IMG"'"}}}' "return" ++_send_qemu_cmd $QEMU_HANDLE '{"execute":"x-blockdev-set-iothread", ++ "arguments":{"node-name":"n", "iothread":"io0"}}' "return" + _send_qemu_cmd $QEMU_HANDLE '{"execute":"block-dirty-bitmap-disable", + "arguments":{"node":"n", "name":"b"}}' "return" + _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add", +diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out +index d5201b2..90cc4b6 100644 +--- a/tests/qemu-iotests/223.out ++++ b/tests/qemu-iotests/223.out +@@ -27,6 +27,7 @@ wrote 2097152/2097152 bytes at offset 2097152 + {"return": {}} + {"return": {}} + {"return": {}} ++{"return": {}} + {"error": {"class": "GenericError", "desc": "NBD server not running"}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "NBD server already running"}} +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index a631953..d8c60ca 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.1.0 -Release: 13%{?dist} +Release: 14%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -220,6 +220,18 @@ Patch67: kvm-qemu-iotests-Add-test-for-bz-1745922.patch Patch68: kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch # For bz#1744955 - Qemu hang when block resize a qcow2 image Patch69: kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch +# For bz#1756413 - backport support for transactionable block-dirty-bitmap-remove for incremental backup support +Patch70: kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch +# For bz#1756413 - backport support for transactionable block-dirty-bitmap-remove for incremental backup support +Patch71: kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch +# For bz#1756413 - backport support for transactionable block-dirty-bitmap-remove for incremental backup support +Patch72: kvm-iotests-test-bitmap-moving-inside-254.patch +# For bz#1754710 - qemu core dumped when hotpluging vcpus +Patch73: kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch +# For bz#1741094 - [Upstream]Incremental backup: Qemu coredump when expose an active bitmap via pull mode(data plane enable) +Patch74: kvm-nbd-Grab-aio-context-lock-in-more-places.patch +# For bz#1741094 - [Upstream]Incremental backup: Qemu coredump when expose an active bitmap via pull mode(data plane enable) +Patch75: kvm-tests-Use-iothreads-during-iotest-223.patch BuildRequires: wget BuildRequires: rpm-build @@ -1161,6 +1173,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Nov 12 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 +- kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch [bz#1756413] +- kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch [bz#1756413] +- kvm-iotests-test-bitmap-moving-inside-254.patch [bz#1756413] +- kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch [bz#1754710] +- kvm-nbd-Grab-aio-context-lock-in-more-places.patch [bz#1741094] +- kvm-tests-Use-iothreads-during-iotest-223.patch [bz#1741094] +- Resolves: bz#1741094 + ([Upstream]Incremental backup: Qemu coredump when expose an active bitmap via pull mode(data plane enable)) +- Resolves: bz#1754710 + (qemu core dumped when hotpluging vcpus) +- Resolves: bz#1756413 + (backport support for transactionable block-dirty-bitmap-remove for incremental backup support) + * Fri Sep 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-13.el8 - kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch [bz#1748253] - kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch [bz#1744955] From 48cefcd942cfb315041a92846ba33a7e153791e3 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 15 Nov 2019 14:35:04 +0000 Subject: [PATCH 055/195] Rebase to qemu-kvm 4.2 --- .gitignore | 1 + ...d.patch => 0005-Initial-redhat-build.patch | 72 +- ...0006-Enable-disable-devices-for-RHEL.patch | 240 +++-- ...Machine-type-related-general-changes.patch | 212 ++-- ...ch => 0008-Add-aarch64-machine-types.patch | 42 +- ...atch => 0009-Add-ppc64-machine-types.patch | 89 +- ...atch => 0010-Add-s390x-machine-types.patch | 47 +- ...tch => 0011-Add-x86_64-machine-types.patch | 163 ++- ...heck.patch => 0012-Enable-make-check.patch | 88 +- ...mber-of-devices-that-can-be-assigned.patch | 16 +- ...Add-support-statement-to-help-output.patch | 10 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 16 +- ... => 0016-Add-support-for-simpletrace.patch | 8 +- ...documentation-instead-of-qemu-system.patch | 958 ------------------ ...documentation-instead-of-qemu-system.patch | 118 +++ ...18-usb-xhci-Fix-PCI-capability-order.patch | 10 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 8 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 8 +- ...ke-die-id-mandatory-unless-necessary.patch | 115 --- ...er-m_free-might-read-pointers-from-a.patch | 10 +- kvm-RHEL-disable-hostmem-memfd.patch | 56 - kvm-block-Use-QEMU_IS_ALIGNED.patch | 192 ---- ...not-abort-if-a-block-driver-is-not-a.patch | 107 -- ...osix-Always-allocate-the-first-block.patch | 386 ------- ...corruption-introduced-by-commit-8ac0.patch | 78 -- ...block-qcow2-refactor-encryption-code.patch | 234 ----- ...aio_context-locked-sections-in-bitma.patch | 122 --- ...e-nodes-head-while-removing-all-bdrv.patch | 72 -- ...i-fix-queue-dev-null-ptr-dereference.patch | 50 - ...-posix-Handle-undetectable-alignment.patch | 129 --- ...ostmem-file-fix-pmem-file-size-check.patch | 70 -- ...t-allocate_first_block-with-O_DIRECT.patch | 109 -- ...otests-test-bitmap-moving-inside-254.patch | 209 ---- ...date-hw_compat_rhel_8_0-from-hw_comp.patch | 59 -- ...p-memory_region_-add-del-_coalescing.patch | 118 --- ...actor-memory_region_clear_coalescing.patch | 64 -- ...y-Remove-has_coalesced_range-counter.patch | 96 -- ...Split-zones-when-do-coalesced_io_del.patch | 123 --- ...ory-fetch-pmem-size-in-get_file_size.patch | 254 ----- ...-re-read-the-clock-on-pre_save-in-ca.patch | 101 -- ...-qemu_file_update_transfer-interface.patch | 63 -- ...dd-speed-limit-for-multifd-migration.patch | 141 --- ...-initialise-ram_counters-for-a-new-m.patch | 139 --- ...-ram_counters-for-multifd-sync-packe.patch | 47 - ...number-of-channels-as-listen-backlog.patch | 50 - ...Grab-aio-context-lock-in-more-places.patch | 200 ---- ...h-client-channel-to-the-export-s-Aio.patch | 60 -- ...manager-Fix-invalid-g_free-crash-bug.patch | 56 - kvm-pseries-Fix-compat_pvr-on-reset.patch | 80 -- ...lock-dirty-bitmap-remove-transaction.patch | 274 ----- ...qemu-iotests-Add-test-for-bz-1745922.patch | 191 ---- ...-proper-compatibility-options-for-th.patch | 64 -- ...ame-s390-ccw-virtio-rhel8.0.0-to-s39.patch | 64 -- ...pdate-pseries-rhel8.1.0-machine-type.patch | 73 -- ...d-backlog-parameter-to-socket_listen.patch | 295 ------ ...onnections-to-qio_channel_socket_asy.patch | 144 --- ...onnections-to-qio_channel_socket_syn.patch | 173 ---- ...onnections-to-qio_net_listener_open_.patch | 151 --- ...r-Set-compat-mode-in-spapr_core_plug.patch | 127 --- ...WN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch | 60 -- ...ci-Consolidate-de-allocation-of-MSIs.patch | 103 -- kvm-spapr-pci-Free-MSIs-during-reset.patch | 56 - ...-Mask-the-EAS-when-allocating-an-IRQ.patch | 63 -- ...partially-initialized-vCPUs-in-prese.patch | 65 -- ...ests-Use-iothreads-during-iotest-223.patch | 73 -- ...larify-DTrace-SystemTap-help-message.patch | 80 -- ...ble-legacy-disable-modern-compat-pro.patch | 48 - ...l-the-pending-BH-when-the-dataplane-.patch | 92 -- ...ule-virtio_notify_config-to-run-on-m.patch | 91 -- ...e-types-Fixup-dynamic-sysbus-entries.patch | 60 -- ...6-machine-types-add-pc-q35-rhel8.1.0.patch | 103 -- ...x86-machine-types-pc_rhel_8_0_compat.patch | 88 -- ...ypes-q35-Fixup-units_per_default_bus.patch | 57 -- qemu-kvm.spec | 195 +--- sources | 2 +- 75 files changed, 775 insertions(+), 7713 deletions(-) rename 0004-Initial-redhat-build.patch => 0005-Initial-redhat-build.patch (68%) rename 0005-Enable-disable-devices-for-RHEL.patch => 0006-Enable-disable-devices-for-RHEL.patch (84%) rename 0006-Machine-type-related-general-changes.patch => 0007-Machine-type-related-general-changes.patch (82%) rename 0007-Add-aarch64-machine-types.patch => 0008-Add-aarch64-machine-types.patch (90%) rename 0008-Add-ppc64-machine-types.patch => 0009-Add-ppc64-machine-types.patch (86%) rename 0009-Add-s390x-machine-types.patch => 0010-Add-s390x-machine-types.patch (69%) rename 0010-Add-x86_64-machine-types.patch => 0011-Add-x86_64-machine-types.patch (83%) rename 0011-Enable-make-check.patch => 0012-Enable-make-check.patch (80%) rename 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch (91%) rename 0013-Add-support-statement-to-help-output.patch => 0014-Add-support-statement-to-help-output.patch (89%) rename 0014-globally-limit-the-maximum-number-of-CPUs.patch => 0015-globally-limit-the-maximum-number-of-CPUs.patch (92%) rename 0015-Add-support-for-simpletrace.patch => 0016-Add-support-for-simpletrace.patch (97%) delete mode 100644 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch create mode 100644 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename 0017-usb-xhci-Fix-PCI-capability-order.patch => 0018-usb-xhci-Fix-PCI-capability-order.patch (93%) rename 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch => 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch (94%) rename 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch => 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch (92%) delete mode 100644 0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch rename kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch => 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch (90%) delete mode 100644 kvm-RHEL-disable-hostmem-memfd.patch delete mode 100644 kvm-block-Use-QEMU_IS_ALIGNED.patch delete mode 100644 kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch delete mode 100644 kvm-block-posix-Always-allocate-the-first-block.patch delete mode 100644 kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch delete mode 100644 kvm-block-qcow2-refactor-encryption-code.patch delete mode 100644 kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch delete mode 100644 kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch delete mode 100644 kvm-ehci-fix-queue-dev-null-ptr-dereference.patch delete mode 100644 kvm-file-posix-Handle-undetectable-alignment.patch delete mode 100644 kvm-hostmem-file-fix-pmem-file-size-check.patch delete mode 100644 kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch delete mode 100644 kvm-iotests-test-bitmap-moving-inside-254.patch delete mode 100644 kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch delete mode 100644 kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch delete mode 100644 kvm-memory-Refactor-memory_region_clear_coalescing.patch delete mode 100644 kvm-memory-Remove-has_coalesced_range-counter.patch delete mode 100644 kvm-memory-Split-zones-when-do-coalesced_io_del.patch delete mode 100644 kvm-memory-fetch-pmem-size-in-get_file_size.patch delete mode 100644 kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch delete mode 100644 kvm-migration-add-qemu_file_update_transfer-interface.patch delete mode 100644 kvm-migration-add-speed-limit-for-multifd-migration.patch delete mode 100644 kvm-migration-always-initialise-ram_counters-for-a-new-m.patch delete mode 100644 kvm-migration-update-ram_counters-for-multifd-sync-packe.patch delete mode 100644 kvm-multifd-Use-number-of-channels-as-listen-backlog.patch delete mode 100644 kvm-nbd-Grab-aio-context-lock-in-more-places.patch delete mode 100644 kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch delete mode 100644 kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch delete mode 100644 kvm-pseries-Fix-compat_pvr-on-reset.patch delete mode 100644 kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch delete mode 100644 kvm-qemu-iotests-Add-test-for-bz-1745922.patch delete mode 100644 kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch delete mode 100644 kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch delete mode 100644 kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch delete mode 100644 kvm-socket-Add-backlog-parameter-to-socket_listen.patch delete mode 100644 kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch delete mode 100644 kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch delete mode 100644 kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch delete mode 100644 kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch delete mode 100644 kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch delete mode 100644 kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch delete mode 100644 kvm-spapr-pci-Free-MSIs-during-reset.patch delete mode 100644 kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch delete mode 100644 kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch delete mode 100644 kvm-tests-Use-iothreads-during-iotest-223.patch delete mode 100644 kvm-trace-Clarify-DTrace-SystemTap-help-message.patch delete mode 100644 kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch delete mode 100644 kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch delete mode 100644 kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch delete mode 100644 kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch delete mode 100644 kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch delete mode 100644 kvm-x86-machine-types-pc_rhel_8_0_compat.patch delete mode 100644 kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch diff --git a/.gitignore b/.gitignore index 514e53e..eed8b13 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /qemu-4.0.0.tar.xz /qemu-4.1.0-rc4.tar.xz /qemu-4.1.0.tar.xz +/qemu-4.2.0-rc1.tar.xz diff --git a/0004-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch similarity index 68% rename from 0004-Initial-redhat-build.patch rename to 0005-Initial-redhat-build.patch index ae67911..a42274e 100644 --- a/0004-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From faeb6cfa3f274d32bcaee546b9fb5f577f895c34 Mon Sep 17 00:00:00 2001 +From 8b38b3cb83404f47ba268958cec8121c674b8153 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: - Use "/share/qemu-kvm" as SHARE_SUFFIX - We reconfigured our share to qemu-kvm to be consistent with used name -This rebase includes changes up to qemu-kvm-4.0.0-6.el8 +This rebase includes changes up to qemu-kvm-4.1.0-14.el8 Rebase notes (3.1.0): - added new configure options @@ -30,20 +30,32 @@ Rebase notes (4.0.0): - Added interop documentation files - Use python module instead of qemu.py (upstream) -Rebase notes (4.1.0-rc0): +Rebase notes (4.1.0): - Remove edk2 files generated by build - Switch to rhel-8.1-candidate build target - Remove specs documentation - Switched from libssh2 to libssh - Add rc0 tarball usage hacks - -Rebase notes (4.1.0-rc1): - Added BuildRequires for wget, rpm-build and python3-sphinx - -Rebase notes (4.1.0-rc2): - Removed new unpacked files - Update configure line to use new options +Rebase notes (weekly-190823): +- Disable iotest run during make check + +Rebase notes (weekly-190906): +- README renamed to README.rst (upstream) +- Removed ui-spice-app.so + +Rebase notes (weekly-190913): +- Added relevant changes from "505f7f4 redhat: Adding slirp to the exploded tree" + +Rebase notes (weekly-190927): +- Removed qemu-ga.8 install from spec file - installed by make + +Rebase notes (weekly-191011): +- Removed spapr-rtas.bin (upstream) + Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file - Spec file cleanups @@ -54,39 +66,44 @@ Merged patches (4.0.0): - eb204b5 Introduce the qemu-kvm-tests rpm - 223cf0c Load kvm module during boot (partial) -Merged patches (4.1.0-rc0): +Merged patches (4.1.0): - ebb6e97 redhat: Fix LOCALVERSION creation - b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) - 7cb3c4a Enable libpmem to support nvdimm - 8943607 qemu-kvm.spec: bump libseccomp >= 2.4.0 - 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) - -Merged patches (4.1.0-rc3): - e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) +Merged patches (weekly-190830): +- 69e1fb2 enable virgla + +Merged patches (weekly-190906): +- d4f6115 enable virgl, for real this time ... + Signed-off-by: Danilo C. L. de Paula --- .gitignore | 1 + Makefile | 3 +- configure | 1 + os-posix.c | 2 +- - redhat/Makefile | 82 ++ + redhat/Makefile | 82 + redhat/Makefile.common | 51 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2202 +++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2369 +++++++++++++++++++++++++++++ redhat/scripts/process-patches.sh | 7 +- + tests/Makefile.include | 2 +- ui/vnc.c | 2 +- - 10 files changed, 2382 insertions(+), 8 deletions(-) + 11 files changed, 2550 insertions(+), 9 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index 85862fb81a..288a5ac28a 100644 +index b437a346d7..086727dbb9 100644 --- a/Makefile +++ b/Makefile -@@ -493,6 +493,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM +@@ -512,6 +512,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC CAP_CFLAGS += -DCAPSTONE_HAS_X86 @@ -94,7 +111,7 @@ index 85862fb81a..288a5ac28a 100644 .PHONY: capstone/all capstone/all: .git-submodule-status -@@ -804,7 +805,7 @@ install-doc: $(DOCS) install-sphinxdocs +@@ -826,7 +827,7 @@ install-doc: $(DOCS) install-sphinxdocs $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" @@ -104,7 +121,7 @@ index 85862fb81a..288a5ac28a 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index 714e7fb6a1..4ecc861a43 100755 +index 6099be1d84..16564f8ccc 100755 --- a/configure +++ b/configure @@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then @@ -116,7 +133,7 @@ index 714e7fb6a1..4ecc861a43 100755 # xen probe diff --git a/os-posix.c b/os-posix.c -index 3ba7df8d75..ff26068d89 100644 +index 86cffd2c7d..1c9f86768d 100644 --- a/os-posix.c +++ b/os-posix.c @@ -83,7 +83,7 @@ void os_setup_signal_handling(void) @@ -128,11 +145,24 @@ index 3ba7df8d75..ff26068d89 100644 #define BUILD_SUFFIX "/pc-bios" char *os_find_datadir(void) { +diff --git a/tests/Makefile.include b/tests/Makefile.include +index 8566f5f119..b483790cf3 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -1194,7 +1194,7 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) + check-qapi-schema: check-tests/qapi-schema/frontend check-tests/qapi-schema/doc-good.texi + check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS)) + check-block: $(patsubst %,check-%, $(check-block-y)) +-check: check-block check-qapi-schema check-unit check-softfloat check-qtest check-decodetree ++check: check-qapi-schema check-unit check-softfloat check-qtest check-decodetree + check-clean: + rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) + rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) diff --git a/ui/vnc.c b/ui/vnc.c -index 38f92bfca3..933dc36db5 100644 +index 87b8045afe..ecf6276f5b 100644 --- a/ui/vnc.c +++ b/ui/vnc.c -@@ -3976,7 +3976,7 @@ void vnc_display_open(const char *id, Error **errp) +@@ -3987,7 +3987,7 @@ void vnc_display_open(const char *id, Error **errp) #ifdef CONFIG_VNC_SASL if (sasl) { @@ -142,5 +172,5 @@ index 38f92bfca3..933dc36db5 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.20.1 +2.21.0 diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch similarity index 84% rename from 0005-Enable-disable-devices-for-RHEL.patch rename to 0006-Enable-disable-devices-for-RHEL.patch index 8fa1c6d..f6c3309 100644 --- a/0005-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 36dad4518633c16a975df51d4d3a475c346b8aed Mon Sep 17 00:00:00 2001 +From 067b5ced8f6f2ee7cd44cfe8e17021974f403206 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -21,22 +21,20 @@ Rebase notes (4.0.0): - Switch to KConfig (upstream) - Using device whitelist + without-defualt-devices option -Rebase notes (4.1.0-rc0): +Rebase notes (4.1.0): - Added CONFIG_USB_OHCI_PCI for ppc64 - Added CONFIG_XIVE_KVM for ppc64 - Added CONFIG_ACPI_PCI for x86_64 - Added CONFIG_SEMIHOSTING for aarch64 - -Rebase notes (4.1.0-rc1): - Cleanup aarch64 devices - Do not build a15mpcore.c - -Rebase notes (4.1.0-rc2): - Removed ide-isa.c stub file - -Rebase notes (4.1.0-rc3): - Use CONFIG_USB_EHCI_PCI on x86_64 (new upstream) +Rebase notes (4.2.0-rc0): +- Use conditional build for isa-superio.c (upstream change) +- Rename PCI_PIIX to PCI_I440FX (upstream change) + Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV - 4b889f3 Declare cirrus-vga as deprecated @@ -44,19 +42,27 @@ Merged patches (qemu 3.1.0): - 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 - 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 -Merged patches (4.1.0-rc0): +Merged patches (4.1.0): - 20a51f6 fdc: Revert downstream disablement of device "floppy" - f869cc0 fdc: Restrict floppy controllers to RHEL-7 machine types - 5909721 aarch64: Compile out IOH3420 - 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) - -Merged patches (4.1.0-rc3): - 495a27d x86_64-rh-devices: add missing TPM passthrough - e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) +Merged patches (weekly-190823): +- f7587dd RHEL: disable hostmem-memfd + +Conflicts: + hw/isa/Makefile.objs + +Conflicts: + hw/pci-host/i440fx.c + Signed-off-by: Danilo C. L. de Paula --- Makefile.objs | 4 +- + backends/Makefile.objs | 3 +- default-configs/aarch64-rh-devices.mak | 20 +++++ default-configs/aarch64-softmmu.mak | 10 ++- default-configs/ppc64-rh-devices.mak | 32 ++++++++ @@ -70,16 +76,14 @@ Signed-off-by: Danilo C. L. de Paula hw/arm/Makefile.objs | 2 +- hw/block/fdc.c | 10 +++ hw/bt/Makefile.objs | 4 +- - hw/core/Makefile.objs | 9 ++- hw/cpu/Makefile.objs | 5 +- hw/display/Makefile.objs | 5 +- hw/display/cirrus_vga.c | 3 + + hw/i386/pc_piix.c | 2 + hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + - hw/isa/Makefile.objs | 2 +- - hw/misc/Makefile.objs | 2 +- hw/net/e1000.c | 2 + - hw/pci-host/piix.c | 4 + + hw/pci-host/i440fx.c | 4 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/ccid-card-emulated.c | 2 + hw/vfio/pci-quirks.c | 5 ++ @@ -91,8 +95,9 @@ Signed-off-by: Danilo C. L. de Paula target/ppc/cpu-models.c | 10 +++ target/s390x/cpu_models.c | 3 + target/s390x/kvm.c | 8 ++ + util/memfd.c | 2 +- vl.c | 8 +- - 36 files changed, 316 insertions(+), 44 deletions(-) + 36 files changed, 314 insertions(+), 40 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak create mode 100644 default-configs/rh-virtio.mak @@ -100,7 +105,7 @@ Signed-off-by: Danilo C. L. de Paula create mode 100644 default-configs/x86_64-rh-devices.mak diff --git a/Makefile.objs b/Makefile.objs -index 6a143dcd57..8e96af153d 100644 +index 11ba1a36bd..fcf63e1096 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -65,8 +65,8 @@ common-obj-y += replay/ @@ -114,6 +119,17 @@ index 6a143dcd57..8e96af153d 100644 common-obj-y += dma-helpers.o common-obj-y += vl.o +diff --git a/backends/Makefile.objs b/backends/Makefile.objs +index f0691116e8..f328d404bf 100644 +--- a/backends/Makefile.objs ++++ b/backends/Makefile.objs +@@ -16,4 +16,5 @@ endif + + common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o + +-common-obj-$(CONFIG_LINUX) += hostmem-memfd.o ++# RHEL: disable memfd ++# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 index 0000000000..a1ed641174 @@ -268,7 +284,7 @@ index f2287a133f..3e2e388e91 100644 +include s390x-rh-devices.mak diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..6b5d68e155 +index 0000000000..d59b6d9bb5 --- /dev/null +++ b/default-configs/x86_64-rh-devices.mak @@ -0,0 +1,100 @@ @@ -324,7 +340,7 @@ index 0000000000..6b5d68e155 +CONFIG_PCI_DEVICES=y +CONFIG_PCI_EXPRESS=y +CONFIG_PCI_EXPRESS_Q35=y -+CONFIG_PCI_PIIX=y ++CONFIG_PCI_I440FX=y +CONFIG_PCI_TESTDEV=y +CONFIG_PCKBD=y +CONFIG_PCSPK=y @@ -384,10 +400,10 @@ index 64b2ee2960..b5de7e5279 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index e53dfe1ee3..168a713eff 100644 +index 2034dd749e..ab203ad448 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -446,8 +446,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -449,8 +449,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; pm->acpi_memory_hotplug.is_enabled = true; pm->cpu_hotplug_legacy = true; @@ -399,7 +415,7 @@ index e53dfe1ee3..168a713eff 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index 43ce8d5b19..13fc9502ff 100644 +index fe749f65fd..2aa1a9efdd 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o @@ -412,10 +428,10 @@ index 43ce8d5b19..13fc9502ff 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 9b24cb9b85..440b53b60c 100644 +index ac5d31e8c1..e925bac002 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -43,6 +43,8 @@ +@@ -46,6 +46,8 @@ #include "qemu/module.h" #include "trace.h" @@ -424,7 +440,7 @@ index 9b24cb9b85..440b53b60c 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2635,6 +2637,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, +@@ -2638,6 +2640,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, int i, j; static int command_tables_inited = 0; @@ -449,27 +465,6 @@ index 867a7d2e8a..e678e9ee3c 100644 +#common-obj-y += core.o l2cap.o sdp.o hci.o hid.o +#common-obj-y += hci-csr.o -diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs -index f8481d959f..bab9c2d443 100644 ---- a/hw/core/Makefile.objs -+++ b/hw/core/Makefile.objs -@@ -17,11 +17,12 @@ common-obj-$(CONFIG_SOFTMMU) += machine.o - common-obj-$(CONFIG_SOFTMMU) += loader.o - common-obj-$(CONFIG_FITLOADER) += loader-fit.o - common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o --common-obj-$(CONFIG_SOFTMMU) += register.o --common-obj-$(CONFIG_SOFTMMU) += or-irq.o --common-obj-$(CONFIG_SOFTMMU) += split-irq.o -+# Disabled in Red Hat Enterprise Linux -+#common-obj-$(CONFIG_SOFTMMU) += register.o -+#common-obj-$(CONFIG_SOFTMMU) += or-irq.o -+#common-obj-$(CONFIG_SOFTMMU) += split-irq.o - common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o --common-obj-$(CONFIG_SOFTMMU) += generic-loader.o -+#common-obj-$(CONFIG_SOFTMMU) += generic-loader.o - common-obj-$(CONFIG_SOFTMMU) += null-machine.o - - obj-$(CONFIG_SOFTMMU) += machine-qmp-cmds.o diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs index 8db9e8a7b3..1601ea93c7 100644 --- a/hw/cpu/Makefile.objs @@ -484,7 +479,7 @@ index 8db9e8a7b3..1601ea93c7 100644 +common-obj-y += core.o +# cluster.o diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index a64998fc7b..88a60b36c5 100644 +index f2182e3bef..3d0cda1b52 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs @@ -1,8 +1,9 @@ @@ -500,10 +495,10 @@ index a64998fc7b..88a60b36c5 100644 common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 2e4911a1e3..49c16c8f8b 100644 +index cd283e53b4..93afa26fda 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2973,6 +2973,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -513,11 +508,25 @@ index 2e4911a1e3..49c16c8f8b 100644 /* follow real hardware, cirrus card emulated has 4 MB video memory. Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 2aefa3b8df..a19f8058ab 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -78,7 +78,9 @@ static void pc_init1(MachineState *machine, + X86MachineState *x86ms = X86_MACHINE(machine); + MemoryRegion *system_memory = get_system_memory(); + MemoryRegion *system_io = get_system_io(); ++#ifdef CONFIG_IDE_ISA + int i; ++#endif + PCIBus *pci_bus; + ISABus *isa_bus; + PCII440FXState *i440fx_state; diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index b97e555072..55b30e65ae 100644 +index db313dd3b1..e14858ca64 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -253,7 +253,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -251,7 +251,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -527,7 +536,7 @@ index b97e555072..55b30e65ae 100644 } static const TypeInfo piix3_ide_info = { -@@ -280,6 +281,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -279,6 +280,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -537,10 +546,10 @@ index b97e555072..55b30e65ae 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index 47a606f5e3..562a9bc0a6 100644 +index f0acfd86f7..390eb6579c 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -568,6 +568,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -571,6 +571,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->realize = i8042_realizefn; dc->vmsd = &vmstate_kbd_isa; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -549,35 +558,11 @@ index 47a606f5e3..562a9bc0a6 100644 } static const TypeInfo i8042_info = { -diff --git a/hw/isa/Makefile.objs b/hw/isa/Makefile.objs -index 9e106df186..0828964014 100644 ---- a/hw/isa/Makefile.objs -+++ b/hw/isa/Makefile.objs -@@ -1,5 +1,5 @@ - common-obj-$(CONFIG_ISA_BUS) += isa-bus.o --common-obj-$(CONFIG_ISA_BUS) += isa-superio.o -+#common-obj-$(CONFIG_ISA_BUS) += isa-superio.o - common-obj-$(CONFIG_APM) += apm.o - common-obj-$(CONFIG_I82378) += i82378.o - common-obj-$(CONFIG_PC87312) += pc87312.o -diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs -index e9aab519a1..17f94225a6 100644 ---- a/hw/misc/Makefile.objs -+++ b/hw/misc/Makefile.objs -@@ -9,7 +9,7 @@ common-obj-$(CONFIG_PCI_TESTDEV) += pci-testdev.o - common-obj-$(CONFIG_EDU) += edu.o - common-obj-$(CONFIG_PCA9552) += pca9552.o - --common-obj-y += unimp.o -+#common-obj-y += unimp.o - common-obj-$(CONFIG_FW_CFG_DMA) += vmcoreinfo.o - - # ARM devices diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a023ceb27c..15d6c7d3be 100644 +index a73f8d404e..fc73fdd6fa 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1794,6 +1794,7 @@ static const E1000Info e1000_devices[] = { +@@ -1795,6 +1795,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -585,7 +570,7 @@ index a023ceb27c..15d6c7d3be 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1806,6 +1807,7 @@ static const E1000Info e1000_devices[] = { +@@ -1807,6 +1808,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -593,11 +578,11 @@ index a023ceb27c..15d6c7d3be 100644 }; static void e1000_register_types(void) -diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c -index d9c70f7ce6..f294fbce6a 100644 ---- a/hw/pci-host/piix.c -+++ b/hw/pci-host/piix.c -@@ -801,6 +801,7 @@ static const TypeInfo i440fx_info = { +diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c +index f27131102d..17f10efae2 100644 +--- a/hw/pci-host/i440fx.c ++++ b/hw/pci-host/i440fx.c +@@ -386,6 +386,7 @@ static const TypeInfo i440fx_info = { }, }; @@ -605,7 +590,7 @@ index d9c70f7ce6..f294fbce6a 100644 /* IGD Passthrough Host Bridge. */ typedef struct { uint8_t offset; -@@ -884,6 +885,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { +@@ -469,6 +470,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { .instance_size = sizeof(PCII440FXState), .class_init = igd_passthrough_i440fx_class_init, }; @@ -613,21 +598,21 @@ index d9c70f7ce6..f294fbce6a 100644 static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, PCIBus *rootbus) -@@ -929,7 +931,9 @@ static const TypeInfo i440fx_pcihost_info = { +@@ -514,7 +516,9 @@ static const TypeInfo i440fx_pcihost_info = { static void i440fx_register_types(void) { type_register_static(&i440fx_info); +#if 0 /* Disabled in Red Hat Enterprise Linux */ type_register_static(&igd_passthrough_i440fx_info); +#endif - type_register_static(&piix3_pci_type_info); - type_register_static(&piix3_info); - type_register_static(&piix3_xen_info); + type_register_static(&i440fx_pcihost_info); + } + diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 5621fb9a3d..b91a106074 100644 +index ef7b27a66d..ef034a1279 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -387,10 +387,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -408,10 +408,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -641,10 +626,10 @@ index 5621fb9a3d..b91a106074 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c -index e20f8ed293..0ddc26cb6c 100644 +index 291e41db8a..1c0f190f1b 100644 --- a/hw/usb/ccid-card-emulated.c +++ b/hw/usb/ccid-card-emulated.c -@@ -603,6 +603,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) +@@ -604,6 +604,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_INPUT, dc->categories); dc->desc = "emulated smartcard"; dc->props = emulated_card_properties; @@ -654,10 +639,10 @@ index e20f8ed293..0ddc26cb6c 100644 static const TypeInfo emulated_card_info = { diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index b35a640030..e8e7480c51 100644 +index 136f3a9ad6..d761fcaf75 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c -@@ -1386,6 +1386,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) +@@ -1391,6 +1391,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment"; dc->hotpluggable = false; @@ -666,7 +651,7 @@ index b35a640030..e8e7480c51 100644 k->realize = vfio_pci_igd_lpc_bridge_realize; k->class_id = PCI_CLASS_BRIDGE_ISA; } -@@ -1579,6 +1581,9 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) +@@ -1584,6 +1586,9 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) 0, PCI_DEVFN(0x2, 0))) { return; } @@ -677,10 +662,10 @@ index b35a640030..e8e7480c51 100644 /* * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d7a4e1875c..7c474a9d4a 100644 +index e6569a7968..5cff163334 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -3180,6 +3180,7 @@ static const TypeInfo vfio_pci_dev_info = { +@@ -3200,6 +3200,7 @@ static const TypeInfo vfio_pci_dev_info = { }, }; @@ -688,7 +673,7 @@ index d7a4e1875c..7c474a9d4a 100644 static Property vfio_pci_dev_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), DEFINE_PROP_END_OF_LIST(), -@@ -3199,11 +3200,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { +@@ -3219,11 +3220,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_nohotplug_dev_class_init, }; @@ -705,10 +690,10 @@ index d7a4e1875c..7c474a9d4a 100644 type_init(register_vfio_pci_dev_type) diff --git a/qemu-options.hx b/qemu-options.hx -index 9621e934c0..6873f9e674 100644 +index 65c9473b73..fc17aca631 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2024,11 +2024,6 @@ ETEXI +@@ -2111,11 +2111,6 @@ ETEXI DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) @@ -720,7 +705,7 @@ index 9621e934c0..6873f9e674 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" -@@ -3038,6 +3033,7 @@ STEXI +@@ -3125,6 +3120,7 @@ STEXI ETEXI DEFHEADING() @@ -728,7 +713,7 @@ index 9621e934c0..6873f9e674 100644 DEFHEADING(Bluetooth(R) options:) STEXI @table @option -@@ -3116,6 +3112,7 @@ STEXI +@@ -3203,6 +3199,7 @@ STEXI @end table ETEXI DEFHEADING() @@ -737,10 +722,10 @@ index 9621e934c0..6873f9e674 100644 #ifdef CONFIG_TPM DEFHEADING(TPM device options:) diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index ec2ab95dbe..7e7ccee45d 100644 +index 7a4ac9339b..3788fc3c4a 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2702,7 +2702,9 @@ static void arm_cpu_register_types(void) +@@ -2744,7 +2744,9 @@ static void arm_cpu_register_types(void) type_register_static(&idau_interface_type_info); while (info->name) { @@ -752,10 +737,10 @@ index ec2ab95dbe..7e7ccee45d 100644 } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 19751e37a7..47a1236e9f 100644 +index a624163ac2..ba5e9faeae 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1554,14 +1554,14 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1807,14 +1807,14 @@ static X86CPUDefinition builtin_x86_defs[] = { .family = 6, .model = 6, .stepping = 3, @@ -778,7 +763,7 @@ index 19751e37a7..47a1236e9f 100644 .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, -@@ -1790,6 +1790,25 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -2043,6 +2043,25 @@ static X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000008, .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", }, @@ -875,7 +860,7 @@ index 086548e9b9..1bbf378c18 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 1d16d7d5e7..47188eddf4 100644 +index 7e92fb2e15..be718220d7 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -889,10 +874,10 @@ index 1d16d7d5e7..47188eddf4 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 6e814c230b..153d092d62 100644 +index 0c9d14b4b1..a02d569537 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2363,6 +2363,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2387,6 +2387,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -907,11 +892,24 @@ index 6e814c230b..153d092d62 100644 prop.cpuid = s390_cpuid_from_cpu_model(model); prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ +diff --git a/util/memfd.c b/util/memfd.c +index 4a3c07e0be..3303ec9da4 100644 +--- a/util/memfd.c ++++ b/util/memfd.c +@@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) + */ + bool qemu_memfd_check(unsigned int flags) + { +-#ifdef CONFIG_LINUX ++#if 0 /* RHEL: memfd support disabled */ + int mfd = memfd_create("test", flags | MFD_CLOEXEC); + + if (mfd >= 0) { diff --git a/vl.c b/vl.c -index b426b32134..f9166f509b 100644 +index 6a65a64bfd..668a34577e 100644 --- a/vl.c +++ b/vl.c -@@ -164,7 +164,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; +@@ -166,7 +166,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; int win2k_install_hack = 0; int singlestep = 0; int acpi_enabled = 1; @@ -920,7 +918,7 @@ index b426b32134..f9166f509b 100644 int fd_bootchk = 1; static int no_reboot; int no_shutdown = 0; -@@ -912,6 +912,7 @@ static void configure_rtc(QemuOpts *opts) +@@ -914,6 +914,7 @@ static void configure_rtc(QemuOpts *opts) } } @@ -928,7 +926,7 @@ index b426b32134..f9166f509b 100644 /***********************************************************/ /* Bluetooth support */ static int nb_hcis; -@@ -1033,6 +1034,7 @@ static int bt_parse(const char *opt) +@@ -1035,6 +1036,7 @@ static int bt_parse(const char *opt) error_report("bad bluetooth parameter '%s'", opt); return 1; } @@ -936,7 +934,7 @@ index b426b32134..f9166f509b 100644 static int parse_name(void *opaque, QemuOpts *opts, Error **errp) { -@@ -3149,6 +3151,7 @@ int main(int argc, char **argv, char **envp) +@@ -3128,6 +3130,7 @@ int main(int argc, char **argv, char **envp) } break; #endif @@ -944,7 +942,7 @@ index b426b32134..f9166f509b 100644 case QEMU_OPTION_bt: warn_report("The bluetooth subsystem is deprecated and will " "be removed soon. If the bluetooth subsystem is " -@@ -3156,6 +3159,7 @@ int main(int argc, char **argv, char **envp) +@@ -3135,6 +3138,7 @@ int main(int argc, char **argv, char **envp) "qemu-devel@nongnu.org with your usecase."); add_device_config(DEV_BT, optarg); break; @@ -952,7 +950,7 @@ index b426b32134..f9166f509b 100644 case QEMU_OPTION_audio_help: audio_legacy_help(); exit (0); -@@ -4284,9 +4288,11 @@ int main(int argc, char **argv, char **envp) +@@ -4282,9 +4286,11 @@ int main(int argc, char **argv, char **envp) tpm_init(); @@ -965,5 +963,5 @@ index b426b32134..f9166f509b 100644 if (!xen_enabled()) { /* On 32-bit hosts, QEMU is limited by virtual address space */ -- -2.20.1 +2.21.0 diff --git a/0006-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch similarity index 82% rename from 0006-Machine-type-related-general-changes.patch rename to 0007-Machine-type-related-general-changes.patch index 0490225..d7fed30 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 34edf0da6480f60393083de194d1a04cd2cfe5c7 Mon Sep 17 00:00:00 2001 +From 701a0ad0e6220c5cf9d860e3689f79f8154274bd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -13,54 +13,52 @@ Rebase changes (4.0.0): - Remove e1000 device duplication changes to reflect upstream solution - Rewrite machine compat properties to upstream solution -Rebase changes (4.1.0-rc0): +Rebase changes (4.1.0): - Removed optional flag for machine compat properties (upstream) - -Rebase changes (4.1.0-rc1): - Remove c3e002cb chunk from hw/net/e1000.c - -Rebase changes (4.1.0-rc2): - Reorder compat structures - Use one format for compat scructures - -Rebase changes (4.1.0-rc4): - Added compat for virtio-balloon-pci.any_layout for rhel71 Merged patches (4.0.0): - d4c0957 compat: Generic HW_COMPAT_RHEL7_6 - cbac773 virtio: Make disable-legacy/disable-modern compat properties optional -Merged patches (4.0.0-rc0): +Merged patches (4.1.0): - 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments - f19738e compat: Generic hw_compat_rhel_8_0 +Merged patches (weekly-190823): +- 9f2bfaa machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 +- ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional + Signed-off-by: Danilo C. L. de Paula --- - hw/acpi/ich9.c | 16 +++++ - hw/acpi/piix4.c | 6 +- - hw/char/serial.c | 16 +++++ - hw/core/machine.c | 146 ++++++++++++++++++++++++++++++++++++++++ + hw/acpi/ich9.c | 16 ++++ + hw/acpi/piix4.c | 5 +- + hw/char/serial.c | 16 ++++ + hw/core/machine.c | 161 ++++++++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/net/e1000e.c | 21 ++++++ hw/net/rtl8139.c | 4 +- + hw/rtc/mc146818rtc.c | 6 ++ hw/smbios/smbios.c | 1 + hw/timer/i8254_common.c | 2 +- - hw/timer/mc146818rtc.c | 6 ++ hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 ++++++ + hw/usb/hcd-xhci.c | 20 +++++ hw/usb/hcd-xhci.h | 2 + include/hw/acpi/ich9.h | 3 + include/hw/boards.h | 21 ++++++ - include/hw/usb.h | 4 ++ + include/hw/usb.h | 4 + migration/migration.c | 2 + migration/migration.h | 5 ++ - 18 files changed, 274 insertions(+), 7 deletions(-) + 18 files changed, 289 insertions(+), 6 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 168a713eff..0a6346f1cf 100644 +index ab203ad448..7ec26884e8 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -441,6 +441,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) +@@ -444,6 +444,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) s->pm.enable_tco = value; } @@ -79,7 +77,7 @@ index 168a713eff..0a6346f1cf 100644 void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; -@@ -465,6 +477,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -468,6 +480,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) ich9_pm_get_cpu_hotplug_legacy, ich9_pm_set_cpu_hotplug_legacy, NULL); @@ -91,19 +89,18 @@ index 168a713eff..0a6346f1cf 100644 ich9_pm_get_disable_s3, ich9_pm_set_disable_s3, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index ec4e186cec..0d2c8e4fe3 100644 +index 93aec2dd2c..3a26193cbe 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -306,7 +306,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) - static const VMStateDescription vmstate_acpi = { +@@ -274,6 +274,7 @@ static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, -- .minimum_version_id = 3, + .minimum_version_id = 3, + .minimum_version_id = 2, - .minimum_version_id_old = 1, - .load_state_old = acpi_load_old, .post_load = vmstate_acpi_post_load, -@@ -663,8 +663,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), +@@ -627,8 +628,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -115,18 +112,18 @@ index ec4e186cec..0d2c8e4fe3 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), diff --git a/hw/char/serial.c b/hw/char/serial.c -index 7c42a2abfc..ae63cc0104 100644 +index b4aa250950..0012f0e44d 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c -@@ -30,6 +30,7 @@ - #include "qemu/timer.h" +@@ -34,6 +34,7 @@ + #include "sysemu/runstate.h" #include "qemu/error-report.h" #include "trace.h" +#include "migration/migration.h" //#define DEBUG_SERIAL -@@ -699,6 +700,9 @@ static int serial_post_load(void *opaque, int version_id) +@@ -703,6 +704,9 @@ static int serial_post_load(void *opaque, int version_id) static bool serial_thr_ipending_needed(void *opaque) { SerialState *s = opaque; @@ -136,7 +133,7 @@ index 7c42a2abfc..ae63cc0104 100644 if (s->ier & UART_IER_THRI) { bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); -@@ -780,6 +784,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { +@@ -784,6 +788,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { static bool serial_fifo_timeout_timer_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -147,7 +144,7 @@ index 7c42a2abfc..ae63cc0104 100644 return timer_pending(s->fifo_timeout_timer); } -@@ -797,6 +805,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { +@@ -801,6 +809,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { static bool serial_timeout_ipending_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -158,7 +155,7 @@ index 7c42a2abfc..ae63cc0104 100644 return s->timeout_ipending != 0; } -@@ -814,6 +826,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { +@@ -818,6 +830,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { static bool serial_poll_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -170,10 +167,10 @@ index 7c42a2abfc..ae63cc0104 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 32d1ca9abc..f30afe0f0b 100644 +index 1689ad3bf8..2b130bb585 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -27,6 +27,152 @@ +@@ -27,6 +27,167 @@ #include "hw/pci/pci.h" #include "hw/mem/nvdimm.h" @@ -204,6 +201,20 @@ index 32d1ca9abc..f30afe0f0b 100644 + { "virtio-blk-device", "discard", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_3_1 */ + { "virtio-blk-device", "write-zeroes", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "VGA", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "secondary-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "bochs-display", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-gpu-pci", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-device", "use-started", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ ++ { "pcie-root-port-base", "disable-acs", "true" }, +}; +const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); + @@ -288,8 +299,9 @@ index 32d1ca9abc..f30afe0f0b 100644 + { "fw_cfg_mem", "dma_enabled", "off" }, + { "fw_cfg_io", "dma_enabled", "off" }, + { "isa-fdc", "fallback", "144" }, -+ { "virtio-pci", "disable-modern", "on" }, -+ { "virtio-pci", "disable-legacy", "off" }, ++ /* Optional because not all virtio-pci devices support legacy mode */ ++ { "virtio-pci", "disable-modern", "on", .optional = true }, ++ { "virtio-pci", "disable-legacy", "off", .optional = true }, + { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, + { "virtio-pci", "page-per-vq", "on" }, + /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ @@ -323,11 +335,11 @@ index 32d1ca9abc..f30afe0f0b 100644 +}; +const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + - GlobalProperty hw_compat_4_0[] = { - { "VGA", "edid", "false" }, - { "secondary-vga", "edid", "false" }, + GlobalProperty hw_compat_4_1[] = { + { "virtio-pci", "x-pcie-flr-init", "off" }, + }; diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 945952a9b0..db0681e891 100644 +index 873e5e9706..d1a2efe47e 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) @@ -340,10 +352,10 @@ index 945952a9b0..db0681e891 100644 }; diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index 581f7d03d5..9a8b1ed805 100644 +index b69fd7d8ad..d8be50a1ce 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c -@@ -76,6 +76,11 @@ typedef struct E1000EState { +@@ -79,6 +79,11 @@ typedef struct E1000EState { E1000ECore core; @@ -355,7 +367,7 @@ index 581f7d03d5..9a8b1ed805 100644 } E1000EState; #define E1000E_MMIO_IDX 0 -@@ -91,6 +96,10 @@ typedef struct E1000EState { +@@ -94,6 +99,10 @@ typedef struct E1000EState { #define E1000E_MSIX_TABLE (0x0000) #define E1000E_MSIX_PBA (0x2000) @@ -366,7 +378,7 @@ index 581f7d03d5..9a8b1ed805 100644 static uint64_t e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) { -@@ -302,6 +311,8 @@ e1000e_init_msix(E1000EState *s) +@@ -305,6 +314,8 @@ e1000e_init_msix(E1000EState *s) } else { if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { msix_uninit(d, &s->msix, &s->msix); @@ -375,7 +387,7 @@ index 581f7d03d5..9a8b1ed805 100644 } } } -@@ -473,6 +484,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) +@@ -476,6 +487,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); if (ret) { trace_e1000e_msi_init_fail(ret); @@ -384,7 +396,7 @@ index 581f7d03d5..9a8b1ed805 100644 } if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, -@@ -596,6 +609,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { +@@ -599,6 +612,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ e1000e_vmstate_intr_timer, E1000IntrDelayTimer) @@ -396,7 +408,7 @@ index 581f7d03d5..9a8b1ed805 100644 static const VMStateDescription e1000e_vmstate = { .name = "e1000e", .version_id = 1, -@@ -607,6 +625,7 @@ static const VMStateDescription e1000e_vmstate = { +@@ -610,6 +628,7 @@ static const VMStateDescription e1000e_vmstate = { VMSTATE_MSIX(parent_obj, E1000EState), VMSTATE_UINT32(ioaddr, E1000EState), @@ -404,7 +416,7 @@ index 581f7d03d5..9a8b1ed805 100644 VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), VMSTATE_UINT8(core.rx_desc_len, E1000EState), VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, -@@ -655,6 +674,8 @@ static PropertyInfo e1000e_prop_disable_vnet, +@@ -658,6 +677,8 @@ static PropertyInfo e1000e_prop_disable_vnet, static Property e1000e_properties[] = { DEFINE_NIC_PROPERTIES(E1000EState, conf), @@ -414,10 +426,10 @@ index 581f7d03d5..9a8b1ed805 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 09273171e5..0cead119ab 100644 +index 88a97d756d..21d80e96cf 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3176,7 +3176,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3177,7 +3177,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -426,7 +438,7 @@ index 09273171e5..0cead119ab 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3257,7 +3257,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3258,7 +3258,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -436,44 +448,19 @@ index 09273171e5..0cead119ab 100644 VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 7bcd67b098..dd0bc2b977 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -778,6 +778,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type1.product, product); - SMBIOS_SET_DEFAULT(type1.version, version); -+ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); - SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type2.product, product); - SMBIOS_SET_DEFAULT(type2.version, version); -diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 76ca6ec444..e7c87a9c67 100644 ---- a/hw/timer/i8254_common.c -+++ b/hw/timer/i8254_common.c -@@ -270,7 +270,7 @@ static const VMStateDescription vmstate_pit_common = { - .pre_save = pit_dispatch_pre_save, - .post_load = pit_dispatch_post_load, - .fields = (VMStateField[]) { -- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), -+ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ - VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, - vmstate_pit_channel, PITChannelState), - VMSTATE_INT64(channels[0].next_transition_time, -diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c -index ce4550b6f2..777efe9819 100644 ---- a/hw/timer/mc146818rtc.c -+++ b/hw/timer/mc146818rtc.c -@@ -37,6 +37,7 @@ - #include "qapi/qapi-events-misc-target.h" +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index ee6bf82b40..d704213824 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -42,6 +42,7 @@ #include "qapi/visitor.h" #include "exec/address-spaces.h" + #include "hw/rtc/mc146818rtc_regs.h" +#include "migration/migration.h" #ifdef TARGET_I386 - #include "hw/i386/apic.h" -@@ -843,6 +844,11 @@ static int rtc_post_load(void *opaque, int version_id) + #include "qapi/qapi-commands-misc-target.h" +@@ -819,6 +820,11 @@ static int rtc_post_load(void *opaque, int version_id) static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) { RTCState *s = (RTCState *)opaque; @@ -485,11 +472,36 @@ index ce4550b6f2..777efe9819 100644 return s->irq_reinject_on_ack_count != 0; } +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 11d476c4a2..e6e9355384 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -777,6 +777,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type2.product, product); + SMBIOS_SET_DEFAULT(type2.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 050875b497..32935da46c 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 98bd5cf49d..2fd2a9bbf0 100644 +index 23507ad3b5..9fd87a7ad9 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c -@@ -1218,12 +1218,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) +@@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) UHCIState *s = UHCI(dev); uint8_t *pci_conf = s->dev.config; int i; @@ -506,10 +518,10 @@ index 98bd5cf49d..2fd2a9bbf0 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 5894a18663..9854fae583 100644 +index 80988bb305..8fed2eedd6 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3584,9 +3584,27 @@ static const VMStateDescription vmstate_xhci_slot = { +@@ -3590,9 +3590,27 @@ static const VMStateDescription vmstate_xhci_slot = { } }; @@ -537,7 +549,7 @@ index 5894a18663..9854fae583 100644 .fields = (VMStateField[]) { VMSTATE_UINT32(type, XHCIEvent), VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3595,6 +3613,8 @@ static const VMStateDescription vmstate_xhci_event = { +@@ -3601,6 +3619,8 @@ static const VMStateDescription vmstate_xhci_event = { VMSTATE_UINT32(flags, XHCIEvent), VMSTATE_UINT8(slotid, XHCIEvent), VMSTATE_UINT8(epid, XHCIEvent), @@ -574,10 +586,10 @@ index 41568d1837..1a23ccc412 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index a71d1a53a5..521c6252bf 100644 +index de45087f34..cc11116585 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -362,4 +362,25 @@ extern const size_t hw_compat_2_2_len; +@@ -377,4 +377,25 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -604,10 +616,10 @@ index a71d1a53a5..521c6252bf 100644 + #endif diff --git a/include/hw/usb.h b/include/hw/usb.h -index c21f41c8a9..71502b0bad 100644 +index c24d968a19..b353438ea0 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h -@@ -604,4 +604,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, +@@ -605,4 +605,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, uint8_t interface_class, uint8_t interface_subclass, uint8_t interface_protocol); @@ -617,10 +629,10 @@ index c21f41c8a9..71502b0bad 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 8a607fe1e2..beffbfd6e1 100644 +index 354ad072fa..30c53c623b 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -116,6 +116,8 @@ enum mig_rp_message_type { +@@ -121,6 +121,8 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; @@ -630,10 +642,10 @@ index 8a607fe1e2..beffbfd6e1 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 1fdd7b21fd..b89e90199c 100644 +index 79b3dda146..0b1b0d4df5 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -332,6 +332,11 @@ void init_dirty_bitmap_incoming_migration(void); +@@ -335,6 +335,11 @@ void init_dirty_bitmap_incoming_migration(void); void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); @@ -646,5 +658,5 @@ index 1fdd7b21fd..b89e90199c 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -2.20.1 +2.21.0 diff --git a/0007-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch similarity index 90% rename from 0007-Add-aarch64-machine-types.patch rename to 0008-Add-aarch64-machine-types.patch index 578f879..26a51d2 100644 --- a/0007-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From cddb8f9ba14dba3bc5d37241924e84340a2b1f6a Mon Sep 17 00:00:00 2001 +From 5d044a17a88f2e6adc72e2b6579052e2a3e98e97 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -14,34 +14,37 @@ Rebase changes (4.1.0-rc0): - Removed a15memmap (upstream) - Use virt_flash_create in rhel800_virt_instance_init +Rebase changes (weekly-190913): +- Set numa_mem_supported + Merged patches (4.0.0): - 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM - 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 - 4d20863 aarch64: Use 256MB ECAM region by default -Merged patches (4.1.0-rc0): +Merged patches (4.1.0): - c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM - 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine Signed-off-by: Danilo C. L. de Paula --- - hw/arm/virt.c | 144 +++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 145 +++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 11 ++++ - 2 files changed, 154 insertions(+), 1 deletion(-) + 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d9496c9363..51fb5f82b4 100644 +index d4bedc2607..1892378914 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -65,6 +65,7 @@ - #include "hw/acpi/acpi.h" - #include "target/arm/internals.h" +@@ -72,6 +72,7 @@ + #include "hw/mem/nvdimm.h" + #include "hw/acpi/generic_event_device.h" +#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -91,7 +92,49 @@ +@@ -98,7 +99,49 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -92,7 +95,7 @@ index d9496c9363..51fb5f82b4 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1722,6 +1765,7 @@ static void machvirt_init(MachineState *machine) +@@ -1763,6 +1806,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -100,7 +103,7 @@ index d9496c9363..51fb5f82b4 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1750,6 +1794,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -1791,6 +1835,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -108,15 +111,15 @@ index d9496c9363..51fb5f82b4 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1871,6 +1916,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +@@ -1912,6 +1957,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) return ms->possible_cpus; } +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) + static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { -@@ -2146,3 +2192,99 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2258,3 +2304,100 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -142,6 +145,7 @@ index d9496c9363..51fb5f82b4 100644 + mc->cpu_index_to_instance_props = virt_cpu_index_to_props; + mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); + mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++ mc->numa_mem_supported = true; +} + +static const TypeInfo rhel_machine_info = { @@ -217,10 +221,10 @@ index d9496c9363..51fb5f82b4 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 1, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index a72094204e..1baa48d46e 100644 +index 0b41083e9d..53fdf16563 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -137,6 +137,7 @@ typedef struct { +@@ -142,6 +142,7 @@ typedef struct { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -228,7 +232,7 @@ index a72094204e..1baa48d46e 100644 #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -145,6 +146,16 @@ typedef struct { +@@ -150,6 +151,16 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) @@ -246,5 +250,5 @@ index a72094204e..1baa48d46e 100644 /* Return the number of used redistributor regions */ -- -2.20.1 +2.21.0 diff --git a/0008-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch similarity index 86% rename from 0008-Add-ppc64-machine-types.patch rename to 0009-Add-ppc64-machine-types.patch index 95f4901..a47f115 100644 --- a/0008-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 765586a71ea9b01f414e4662d11ea77288976f31 Mon Sep 17 00:00:00 2001 +From dbf2123f930a53e949cbeea7a272e453f3efe124 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -15,47 +15,66 @@ Rebase changes (4.0.0): - Use p8 as default for rhel machine types (p9 default upstream) - sPAPRMachineClass renamed to SpaprMachineClass (upstream) -Rebase changes (4.1.0-rc2): +Rebase changes (4.1.0): - Update format for compat structures Merged patches (4.0.0): - 467d59a redhat: define pseries-rhel8.0.0 machine type -Merged patches (4.1.0-rc0): +Merged patches (4.1.0): - f21757edc target/ppc/spapr: Enable mitigations by default for pseries-4.0 machine type - 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 - 89f01da redhat: define pseries-rhel8.1.0 machine type +Merged patches (weekly-190830): +- bcba728 redhat: update pseries-rhel8.1.0 machine type + Signed-off-by: Danilo C. L. de Paula --- - hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr.c | 251 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 1 + target/ppc/compat.c | 13 ++- target/ppc/cpu.h | 1 + - 5 files changed, 270 insertions(+), 1 deletion(-) + 5 files changed, 278 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 12ed4b065c..669eae100e 100644 +index 94f9d27096..59ccd182d4 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4382,6 +4382,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - spapr_caps_add_properties(smc, &error_abort); - smc->irq = &spapr_irq_dual; - smc->dr_phb_enabled = true; +@@ -4442,6 +4442,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + smc->linux_pci_probe = true; + smc->smp_threads_vsmt = true; + smc->nr_xirqs = SPAPR_NR_XIRQS; + smc->has_power9_support = true; } static const TypeInfo spapr_machine_info = { -@@ -4426,6 +4427,7 @@ static const TypeInfo spapr_machine_info = { +@@ -4486,6 +4487,7 @@ static const TypeInfo spapr_machine_info = { } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-4.1 + * pseries-4.2 */ -@@ -4609,6 +4611,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4515,6 +4517,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(4_1, "4.1", false); ++#endif + + /* + * pseries-4.0 +@@ -4531,6 +4534,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4690,6 +4694,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ @@ -63,7 +82,7 @@ index 12ed4b065c..669eae100e 100644 static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4663,6 +4666,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, +@@ -4744,6 +4749,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -71,7 +90,7 @@ index 12ed4b065c..669eae100e 100644 static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4777,6 +4781,245 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4858,6 +4864,251 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -90,7 +109,7 @@ index 12ed4b065c..669eae100e 100644 + +/* + * pseries-rhel8.0.0 -+ * like spapr_compat_3_1 ++ * like pseries-3.1 and pseries-4.0 + * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS + * that have been backported to pseries-rhel8.0.0 + */ @@ -103,6 +122,12 @@ index 12ed4b065c..669eae100e 100644 + compat_props_add(mc->compat_props, hw_compat_rhel_8_0, + hw_compat_rhel_8_0_len); + ++ /* pseries-4.0 */ ++ smc->phb_placement = phb_placement_4_0; ++ smc->irq = &spapr_irq_xics; ++ smc->pre_4_1_migration = true; ++ ++ /* pseries-3.1 */ + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); + smc->update_dt_enabled = false; + smc->dr_phb_enabled = false; @@ -318,18 +343,18 @@ index 12ed4b065c..669eae100e 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index b91a106074..29a3c7d8ee 100644 +index ef034a1279..05f0a83128 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -21,6 +21,7 @@ - #include "sysemu/numa.h" +@@ -24,6 +24,7 @@ + #include "sysemu/reset.h" #include "sysemu/hw_accel.h" #include "qemu/error-report.h" +#include "cpu-models.h" - static void spapr_cpu_reset(void *opaque) + static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -224,6 +225,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -247,6 +248,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); Error *local_err = NULL; @@ -337,7 +362,7 @@ index b91a106074..29a3c7d8ee 100644 object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { -@@ -236,6 +238,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -259,6 +261,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -352,16 +377,16 @@ index b91a106074..29a3c7d8ee 100644 + return; + } + - qemu_register_reset(spapr_cpu_reset, cpu); - spapr_cpu_reset(cpu); - + if (spapr_irq_cpu_intc_create(spapr, cpu, &local_err) < 0) { + goto error_intc_create; + } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 60553d32c4..b0ba32e6dd 100644 +index d5ab5ea7b2..aa89cc4a95 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -121,6 +121,7 @@ struct SpaprMachineClass { - bool broken_host_serial_model; /* present real host info to the guest */ - bool pre_4_1_migration; /* don't migrate hpt-max-page-size */ +@@ -125,6 +125,7 @@ struct SpaprMachineClass { + bool linux_pci_probe; + bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ + bool has_power9_support; void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, @@ -393,10 +418,10 @@ index 7de4bf3122..3e2e35342d 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index c9beba2a5c..76cb7c2e37 100644 +index e3e82327b7..5c53801cfd 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1350,6 +1350,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1367,6 +1367,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -405,5 +430,5 @@ index c9beba2a5c..76cb7c2e37 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -- -2.20.1 +2.21.0 diff --git a/0009-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch similarity index 69% rename from 0009-Add-s390x-machine-types.patch rename to 0010-Add-s390x-machine-types.patch index 3c353ee..d6a8633 100644 --- a/0009-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 2ae1e5940185a169886f2492f97fc98a625da889 Mon Sep 17 00:00:00 2001 +From 18847aab397e7480af49e3a5cd4f6e3b7deae361 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -13,19 +13,23 @@ Rebase changes (weekly-190111): Merged patches (3.1.0): - 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later -Merged patches (4.1.0-rc0): +Merged patches (4.1.0): - 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 +Merged patches (weekly-190830): +- fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 +- a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine + Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 67 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 66 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 80 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 5b6a9a4e55..4d8c2ec22a 100644 +index d3edeef0ad..7bca634666 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -636,7 +636,7 @@ bool css_migration_enabled(void) +@@ -615,7 +615,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -34,15 +38,15 @@ index 5b6a9a4e55..4d8c2ec22a 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = 1; \ -@@ -660,6 +660,7 @@ bool css_migration_enabled(void) +@@ -639,6 +639,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_4_1_instance_options(MachineState *machine) + static void ccw_machine_4_2_instance_options(MachineState *machine) { } -@@ -873,6 +874,70 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -866,6 +867,83 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); @@ -66,23 +70,36 @@ index 5b6a9a4e55..4d8c2ec22a 100644 +}; +const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); + -+static void ccw_machine_rhel800_instance_options(MachineState *machine) ++static void ccw_machine_rhel810_instance_options(MachineState *machine) +{ +} + -+static void ccw_machine_rhel800_class_options(MachineClass *mc) ++static void ccw_machine_rhel810_class_options(MachineClass *mc) +{ +} -+DEFINE_CCW_MACHINE(rhel800, "rhel8.0.0", true); ++DEFINE_CCW_MACHINE(rhel810, "rhel8.1.0", true); + +static void ccw_machine_rhel760_instance_options(MachineState *machine) +{ -+ ccw_machine_rhel800_instance_options(machine); ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; ++ ++ ccw_machine_rhel810_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); ++ ++ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ ++ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); +} + +static void ccw_machine_rhel760_class_options(MachineClass *mc) +{ -+ ccw_machine_rhel800_class_options(mc); ++ ccw_machine_rhel810_class_options(mc); ++ /* We never published the s390x version of RHEL8.0 AV, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); + compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); + compat_props_add(mc->compat_props, ccw_compat_rhel_7_6, ccw_compat_rhel_7_6_len); +} @@ -114,5 +131,5 @@ index 5b6a9a4e55..4d8c2ec22a 100644 static void ccw_machine_register_types(void) { -- -2.20.1 +2.21.0 diff --git a/0010-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch similarity index 83% rename from 0010-Add-x86_64-machine-types.patch rename to 0011-Add-x86_64-machine-types.patch index 23a3ebe..a39e26b 100644 --- a/0010-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 728953bda3fcd8bc06026b59b3b86191fb7787c0 Mon Sep 17 00:00:00 2001 +From 3f0ddfe3f8bc734af3f68884c01c58800ef42d0d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -13,10 +13,10 @@ Rebase changes (qemu-4.0.0): Rebase notes (3.1.0): - Removed xsave changes -Rebase notes (4.1.0-rc2): +Rebase notes (4.1.0): - Updated format for compat structures -Merged patches (4.1.0-rc0): +Merged patches (4.1.0): - f4dc802 pc: 7.5 compat entries - 456ed3e pc: PC_RHEL7_6_COMPAT - 04119ee pc: Add compat for pc-i440fx-rhel7.6.0 machine type @@ -26,23 +26,30 @@ Merged patches (4.1.0-rc0): - 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) - 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types +Merged patches (weekly-190823): +- 7d5c2ef pc: Don't make die-id mandatory unless necessary +- e42808c x86 machine types: pc_rhel_8_0_compat +- 9de83a8 x86 machine types: q35: Fixup units_per_default_bus +- 6df1559 x86 machine types: Fixup dynamic sysbus entries +- 0784125 x86 machine types: add pc-q35-rhel8.1.0 + Signed-off-by: Danilo C. L. de Paula --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 226 ++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 200 +++++++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 114 +++++++++++++++++++++- + hw/i386/pc.c | 259 ++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 207 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 139 ++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 27 ++++++ + include/hw/i386/pc.h | 30 +++++ target/i386/cpu.c | 9 +- target/i386/kvm.c | 4 + - 8 files changed, 578 insertions(+), 7 deletions(-) + 8 files changed, 646 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index f3fdfefcd5..1d64f0bad2 100644 +index 12ff55fcfb..64001893ab 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -203,6 +203,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) +@@ -204,6 +204,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -53,10 +60,10 @@ index f3fdfefcd5..1d64f0bad2 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 549c437050..9ded0db80d 100644 +index 96715f8a3f..677b63a37f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -354,6 +354,224 @@ GlobalProperty pc_compat_1_4[] = { +@@ -344,6 +344,257 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -72,6 +79,39 @@ index 549c437050..9ded0db80d 100644 +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_0_compat[] = { ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "intel-iommu", "dma-drain", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, ++}; ++const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); ++ +/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: + * all of the 2_12 stuff was already in 7.6 from bz 1481253 + * x-migrate-smi-count comes from PC_COMPAT_2_11 but @@ -281,7 +321,7 @@ index 549c437050..9ded0db80d 100644 void gsi_handler(void *opaque, int n, int level) { GSIState *s = opaque; -@@ -1901,7 +2119,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -1225,7 +1476,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -291,16 +331,16 @@ index 549c437050..9ded0db80d 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -2928,6 +3147,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -2199,6 +2451,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); + pcmc->pc_rom_ro = true; + mc->async_pf_vmexit_disable = false; mc->get_hotplug_handler = pc_get_hotplug_handler; - mc->cpu_index_to_instance_props = pc_cpu_index_to_props; - mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; -@@ -2938,7 +3159,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->hotplug_allowed = pc_hotplug_allowed; + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +@@ -2210,7 +2464,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -308,13 +348,13 @@ index 549c437050..9ded0db80d 100644 + /* 240: max CPU count for RHEL */ + mc->max_cpus = 240; mc->reset = pc_machine_reset; + mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; - hc->plug = pc_machine_device_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index c2280c72ef..c86c48c092 100644 +index a19f8058ab..a66005a0ec 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -51,6 +51,7 @@ +@@ -53,6 +53,7 @@ #include "cpu.h" #include "qapi/error.h" #include "qemu/error-report.h" @@ -322,7 +362,7 @@ index c2280c72ef..c86c48c092 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, +@@ -176,8 +177,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -333,15 +373,15 @@ index c2280c72ef..c86c48c092 100644 pcmc->smbios_uuid_encoded, SMBIOS_ENTRY_POINT_21); } -@@ -316,6 +317,7 @@ else { - * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). +@@ -309,6 +310,7 @@ else { + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void pc_compat_2_3_fn(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); -@@ -1022,3 +1024,197 @@ static void xenfv_machine_options(MachineClass *m) +@@ -1028,3 +1030,204 @@ static void xenfv_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, xenfv_machine_options); #endif @@ -358,6 +398,7 @@ index c2280c72ef..c86c48c092 100644 + pcmc->default_nic_model = "e1000"; + m->default_display = "std"; + m->no_parallel = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; + m->is_default = 1; @@ -371,9 +412,15 @@ index c2280c72ef..c86c48c092 100644 + +static void pc_machine_rhel760_options(MachineClass *m) +{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_machine_rhel7_options(m); + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; + m->async_pf_vmexit_disable = true; ++ m->smbus_no_migration_support = true; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); + compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); +} @@ -540,10 +587,10 @@ index c2280c72ef..c86c48c092 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 397e1fdd2f..4959ed329f 100644 +index d51f524727..542947b032 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) +@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -554,7 +601,7 @@ index 397e1fdd2f..4959ed329f 100644 pcmc->smbios_uuid_encoded, SMBIOS_ENTRY_POINT_21); } -@@ -347,6 +347,7 @@ static void pc_q35_init(MachineState *machine) +@@ -330,6 +330,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -562,7 +609,7 @@ index 397e1fdd2f..4959ed329f 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -538,3 +539,112 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -533,3 +534,137 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -576,16 +623,34 @@ index 397e1fdd2f..4959ed329f 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pcmc->default_nic_model = "e1000e"; + m->family = "pc_q35_Z"; ++ m->units_per_default_bus = 1; + m->default_machine_opts = "firmware=bios-256k.bin"; + m->default_display = "std"; + m->no_floppy = 1; + m->no_parallel = 1; -+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_SYS_BUS_DEVICE); ++ pcmc->default_cpu_version = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + m->alias = "q35"; + m->max_cpus = 384; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel810(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel810_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, ++ pc_q35_machine_rhel810_options); ++ +static void pc_q35_init_rhel800(MachineState *machine) +{ + pc_q35_init(machine); @@ -593,8 +658,15 @@ index 397e1fdd2f..4959ed329f 100644 + +static void pc_q35_machine_rhel800_options(MachineClass *m) +{ -+ pc_q35_machine_rhel_options(m); ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel810_options(m); + m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++ m->smbus_no_migration_support = true; ++ m->alias = NULL; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, @@ -676,10 +748,10 @@ index 397e1fdd2f..4959ed329f 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 521c6252bf..b4a8c4ab10 100644 +index cc11116585..3cc126f0f4 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -214,6 +214,8 @@ struct MachineClass { +@@ -222,6 +222,8 @@ struct MachineClass { const char **valid_cpu_types; strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; @@ -689,26 +761,29 @@ index 521c6252bf..b4a8c4ab10 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 859b64c51d..605cc714d3 100644 +index e6fa8418ca..379ed968b3 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -142,6 +142,9 @@ typedef struct PCMachineClass { +@@ -126,6 +126,9 @@ typedef struct PCMachineClass { - /* Enables contiguous-apic-ID mode */ - bool compat_apic_id_mode; + /* use PVH to load kernels that support this feature */ + bool pvh_enabled; + + /* RH only, see bz 1489800 */ + bool pc_rom_ro; } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -362,6 +365,30 @@ extern const size_t pc_compat_1_5_len; +@@ -302,6 +305,33 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_0_compat[]; ++extern const size_t pc_rhel_8_0_compat_len; ++ +extern GlobalProperty pc_rhel_7_6_compat[]; +extern const size_t pc_rhel_7_6_compat_len; + @@ -734,10 +809,10 @@ index 859b64c51d..605cc714d3 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 47a1236e9f..cd71a09b33 100644 +index ba5e9faeae..a4ae730ca5 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1548,11 +1548,17 @@ static CPUCaches epyc_cache_info = { +@@ -1801,11 +1801,17 @@ static CPUCaches epyc_cache_info = { static X86CPUDefinition builtin_x86_defs[] = { { @@ -756,7 +831,7 @@ index 47a1236e9f..cd71a09b33 100644 .stepping = 3, .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -2861,6 +2867,7 @@ static PropValue kvm_default_props[] = { +@@ -3173,6 +3179,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -765,10 +840,10 @@ index 47a1236e9f..cd71a09b33 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index dbbb13772a..da5a5ef5f3 100644 +index bfd09bd441..da312a4801 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -2805,6 +2805,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3064,6 +3064,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -776,7 +851,7 @@ index dbbb13772a..da5a5ef5f3 100644 kvm_msr_buf_reset(cpu); -@@ -3102,6 +3103,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3370,6 +3371,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -787,5 +862,5 @@ index dbbb13772a..da5a5ef5f3 100644 case MSR_KVM_PV_EOI_EN: env->pv_eoi_en_msr = msrs[i].data; -- -2.20.1 +2.21.0 diff --git a/0011-Enable-make-check.patch b/0012-Enable-make-check.patch similarity index 80% rename from 0011-Enable-make-check.patch rename to 0012-Enable-make-check.patch index 6497683..5eed2b1 100644 --- a/0011-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 70e14e50ec8652a4243111dc293fe09ebcef8de1 Mon Sep 17 00:00:00 2001 +From e8c1a5f4768a41cabdcb54cfdbc1a5a4146ff1ad Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -19,34 +19,35 @@ Rebase changes (4.1.0-rc0): Rebase changes (4.1.0-rc1): - remove all 205 tests (unstable) +Rebase changes (4.2.0-rc0): +- partially disable hd-geo-test (requires lsi53c895a) + Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce Merged patches (4.1.0-rc0): - 41288ff redhat: Remove raw iotest 205 -Conflicts: - tests/qemu-iotests/group - Signed-off-by: Danilo C. L. de Paula --- - redhat/qemu-kvm.spec.template | 8 +++++++- + redhat/qemu-kvm.spec.template | 2 +- tests/Makefile.include | 10 +++++----- tests/boot-serial-test.c | 6 +++++- tests/cpu-plug-test.c | 4 ++-- tests/e1000-test.c | 2 ++ + tests/hd-geo-test.c | 4 ++++ tests/prom-env-test.c | 4 ++++ tests/qemu-iotests/051 | 12 ++++++------ tests/qemu-iotests/group | 4 ++-- tests/test-x86-cpuid-compat.c | 2 ++ tests/usb-hcd-xhci-test.c | 4 ++++ - 10 files changed, 39 insertions(+), 17 deletions(-) + 11 files changed, 37 insertions(+), 17 deletions(-) diff --git a/tests/Makefile.include b/tests/Makefile.include -index fd7fdb8658..d3da940f8c 100644 +index b483790cf3..53bdbdfee0 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include -@@ -164,7 +164,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) +@@ -172,7 +172,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) check-qtest-i386-y += tests/ahci-test$(EXESUF) check-qtest-i386-y += tests/hd-geo-test$(EXESUF) check-qtest-i386-y += tests/boot-order-test$(EXESUF) @@ -55,7 +56,7 @@ index fd7fdb8658..d3da940f8c 100644 check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) check-qtest-i386-y += tests/rtc-test$(EXESUF) -@@ -223,7 +223,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) +@@ -230,7 +230,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) check-qtest-moxie-y += tests/boot-serial-test$(EXESUF) check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) @@ -64,7 +65,7 @@ index fd7fdb8658..d3da940f8c 100644 check-qtest-ppc-y += tests/prom-env-test$(EXESUF) check-qtest-ppc-y += tests/drive_del-test$(EXESUF) check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) -@@ -237,8 +237,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) +@@ -244,8 +244,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF) check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF) @@ -75,7 +76,7 @@ index fd7fdb8658..d3da940f8c 100644 check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) check-qtest-ppc64-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) check-qtest-ppc64-y += tests/numa-test$(EXESUF) -@@ -282,7 +282,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) +@@ -291,7 +291,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) check-qtest-s390x-y += tests/drive_del-test$(EXESUF) @@ -85,10 +86,10 @@ index fd7fdb8658..d3da940f8c 100644 check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) check-qtest-s390x-y += tests/migration-test$(EXESUF) diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index 24852d4c7d..dce5860d99 100644 +index d3a54a0ba5..33ce72b89c 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c -@@ -97,18 +97,22 @@ static testdef_t tests[] = { +@@ -108,19 +108,23 @@ static testdef_t tests[] = { { "ppc", "g3beige", "", "PowerPC,750" }, { "ppc", "mac99", "", "PowerPC,G4" }, { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, @@ -101,7 +102,8 @@ index 24852d4c7d..dce5860d99 100644 "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken", "Open Firmware" }, +#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "powernv", "-cpu POWER8", "OPAL" }, + { "ppc64", "powernv8", "", "OPAL" }, + { "ppc64", "powernv9", "", "OPAL" }, { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, +#endif { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, @@ -113,10 +115,10 @@ index 24852d4c7d..dce5860d99 100644 { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 668f00144e..453ca8e583 100644 +index 30e514bbfb..a04beae1c6 100644 --- a/tests/cpu-plug-test.c +++ b/tests/cpu-plug-test.c -@@ -190,8 +190,8 @@ static void add_pseries_test_case(const char *mname) +@@ -185,8 +185,8 @@ static void add_pseries_test_case(const char *mname) char *path; PlugTestData *data; @@ -143,6 +145,38 @@ index c387984ef6..c89112d6f8 100644 }; static void *e1000_get_driver(void *obj, const char *interface) +diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c +index 7e86c5416c..cc068bad87 100644 +--- a/tests/hd-geo-test.c ++++ b/tests/hd-geo-test.c +@@ -732,6 +732,7 @@ static void test_override_ide(void) + test_override(args, expected); + } + ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + static void test_override_scsi(void) + { + TestArgs *args = create_args(); +@@ -776,6 +777,7 @@ static void test_override_scsi_2_controllers(void) + add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); + test_override(args, expected); + } ++#endif + + static void test_override_virtio_blk(void) + { +@@ -951,9 +953,11 @@ int main(int argc, char **argv) + qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); + if (have_qemu_img()) { + qtest_add_func("hd-geo/override/ide", test_override_ide); ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + qtest_add_func("hd-geo/override/scsi", test_override_scsi); + qtest_add_func("hd-geo/override/scsi_2_controllers", + test_override_scsi_2_controllers); ++#endif + qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); + qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); + qtest_add_func("hd-geo/override/scsi_hot_unplug", diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c index 61bc1d1e7b..028d45c7d7 100644 --- a/tests/prom-env-test.c @@ -163,7 +197,7 @@ index 61bc1d1e7b..028d45c7d7 100644 add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index ce942a5444..f64429e21f 100755 +index 53bcdbc911..b387e0c233 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -181,11 +181,11 @@ run_qemu -drive if=virtio @@ -197,7 +231,7 @@ index ce942a5444..f64429e21f 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index f13e5f2e23..813db2565b 100644 +index 065040398d..959fb52824 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ @@ -219,7 +253,7 @@ index f13e5f2e23..813db2565b 100644 101 rw quick 102 rw quick diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index e75b959950..6b46b73dd0 100644 +index 772287bdb4..e7c075ed98 100644 --- a/tests/test-x86-cpuid-compat.c +++ b/tests/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -239,26 +273,26 @@ index e75b959950..6b46b73dd0 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 9eb24b00e4..465ed26dfc 100644 +index 10ef9d2a91..3855873050 100644 --- a/tests/usb-hcd-xhci-test.c +++ b/tests/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) - usb_test_hotplug("xhci", "1", NULL); + usb_test_hotplug(global_qtest, "xhci", "1", NULL); } +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void test_usb_uas_hotplug(void) { - qtest_qmp_device_add("usb-uas", "uas", "{}"); -@@ -34,6 +35,7 @@ static void test_usb_uas_hotplug(void) - qtest_qmp_device_del("scsihd"); - qtest_qmp_device_del("uas"); + QTestState *qts = global_qtest; +@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del(qts, "scsihd"); + qtest_qmp_device_del(qts, "uas"); } +#endif static void test_usb_ccid_hotplug(void) { -@@ -52,7 +54,9 @@ int main(int argc, char **argv) +@@ -56,7 +58,9 @@ int main(int argc, char **argv) qtest_add_func("/xhci/pci/init", test_xhci_init); qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); @@ -269,5 +303,5 @@ index 9eb24b00e4..465ed26dfc 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -2.20.1 +2.21.0 diff --git a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 91% rename from 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index 02030ab..b9fbacc 100644 --- a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 59f231a2ac7d8915f905cec514de580bbbf039c0 Mon Sep 17 00:00:00 2001 +From 5ff7edc72cce5a04a816fd71b0198f2d530d6630 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -42,12 +42,12 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 7c474a9d4a..bb9f28ed95 100644 +index 5cff163334..5184011217 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -41,6 +41,9 @@ +@@ -47,6 +47,9 @@ - #define TYPE_VIFO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" +/* RHEL only: Set once for the first assigned dev */ +static uint16_t device_limit; @@ -55,7 +55,7 @@ index 7c474a9d4a..bb9f28ed95 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2703,9 +2706,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2710,9 +2713,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -87,7 +87,7 @@ index 7c474a9d4a..bb9f28ed95 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3121,6 +3145,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3147,6 +3171,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -98,7 +98,7 @@ index 7c474a9d4a..bb9f28ed95 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 834a90d646..205954e9f8 100644 +index b329d50338..b37c81d882 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { @@ -110,5 +110,5 @@ index 834a90d646..205954e9f8 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.20.1 +2.21.0 diff --git a/0013-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch similarity index 89% rename from 0013-Add-support-statement-to-help-output.patch rename to 0014-Add-support-statement-to-help-output.patch index b93abbf..3eb53c6 100644 --- a/0013-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 9176ac2bc13e273deab798ec95fc2a01f35db43d Mon Sep 17 00:00:00 2001 +From 339380067c7f385a9a6c368204e24193b630e78b Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -24,10 +24,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/vl.c b/vl.c -index f9166f509b..dd1fadfe08 100644 +index 668a34577e..9f3e7e7733 100644 --- a/vl.c +++ b/vl.c -@@ -1803,9 +1803,17 @@ static void version(void) +@@ -1822,9 +1822,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -45,7 +45,7 @@ index f9166f509b..dd1fadfe08 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1822,6 +1830,7 @@ static void help(int exitcode) +@@ -1841,6 +1849,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -54,5 +54,5 @@ index f9166f509b..dd1fadfe08 100644 } -- -2.20.1 +2.21.0 diff --git a/0014-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch similarity index 92% rename from 0014-globally-limit-the-maximum-number-of-CPUs.patch rename to 0015-globally-limit-the-maximum-number-of-CPUs.patch index 136895d..88320d0 100644 --- a/0014-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 11b7001f3eb0c4de20695821eaee5589c4c3ac32 Mon Sep 17 00:00:00 2001 +From 25283fab966a911cbeb4d0d3524cb1e0f1f8f448 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -82,10 +82,10 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index f450f25295..2d850df9e0 100644 +index 140b0bd8f6..7a4399c3ef 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -1869,6 +1869,18 @@ static int kvm_init(MachineState *ms) +@@ -1925,6 +1925,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -105,10 +105,10 @@ index f450f25295..2d850df9e0 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/vl.c b/vl.c -index dd1fadfe08..7c52255141 100644 +index 9f3e7e7733..1550aa2aaa 100644 --- a/vl.c +++ b/vl.c -@@ -132,6 +132,8 @@ int main(int argc, char **argv) +@@ -134,6 +134,8 @@ int main(int argc, char **argv) #define MAX_VIRTIO_CONSOLES 1 @@ -117,7 +117,7 @@ index dd1fadfe08..7c52255141 100644 static const char *data_dir[16]; static int data_dir_idx; const char *bios_name = NULL; -@@ -1337,6 +1339,20 @@ static MachineClass *find_default_machine(GSList *machines) +@@ -1339,6 +1341,20 @@ static MachineClass *find_default_machine(GSList *machines) return NULL; } @@ -138,7 +138,7 @@ index dd1fadfe08..7c52255141 100644 static int machine_help_func(QemuOpts *opts, MachineState *machine) { ObjectProperty *prop; -@@ -3864,6 +3880,8 @@ int main(int argc, char **argv, char **envp) +@@ -3857,6 +3873,8 @@ int main(int argc, char **argv, char **envp) "mutually exclusive"); exit(EXIT_FAILURE); } @@ -148,5 +148,5 @@ index dd1fadfe08..7c52255141 100644 configure_rtc(qemu_find_opts_singleton("rtc")); -- -2.20.1 +2.21.0 diff --git a/0015-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch similarity index 97% rename from 0015-Add-support-for-simpletrace.patch rename to 0016-Add-support-for-simpletrace.patch index 6fc5c29..11a7c10 100644 --- a/0015-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From c32f0e6a6369ce5bc5c337b1fa158c43c067ca6f Mon Sep 17 00:00:00 2001 +From 850e66fc482eb897babd9e1f3d1216fd0f7c6382 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -36,10 +36,10 @@ Signed-off-by: Danilo C. L. de Paula create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 288a5ac28a..8caca6cfc0 100644 +index 086727dbb9..4254950f7f 100644 --- a/Makefile +++ b/Makefile -@@ -910,6 +910,10 @@ endif +@@ -939,6 +939,10 @@ endif $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ done $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" @@ -117,5 +117,5 @@ index 0000000000..c04abf9449 @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -- -2.20.1 +2.21.0 diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch deleted file mode 100644 index f6a6a9d..0000000 --- a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ /dev/null @@ -1,958 +0,0 @@ -From 70a20c03876ba38ba94a5e8e6e1d848e60ef42eb Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Fri, 30 Nov 2018 09:11:03 +0100 -Subject: Use qemu-kvm in documentation instead of qemu-system- - -Patchwork-id: 62380 -O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 -Bugzilla: 1140620 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi - -From: Miroslav Rezanina - -We change the name and location of qemu-kvm binaries. Update documentation -to reflect this change. Only architectures available in RHEL are updated. - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - docs/qemu-block-drivers.texi | 70 +++++++++--------- - docs/qemu-cpu-models.texi | 8 +- - qemu-doc.texi | 70 +++++++++--------- - qemu-options.hx | 140 ++++++++++++++++++----------------- - 4 files changed, 146 insertions(+), 142 deletions(-) - -diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi -index 91ab0eceae..35cc3d838c 100644 ---- a/docs/qemu-block-drivers.texi -+++ b/docs/qemu-block-drivers.texi -@@ -405,7 +405,7 @@ QEMU can automatically create a virtual FAT disk image from a - directory tree. In order to use it, just type: - - @example --qemu-system-i386 linux.img -hdb fat:/my_directory -+qemu-kvm linux.img -hdb fat:/my_directory - @end example - - Then you access access to all the files in the @file{/my_directory} -@@ -415,14 +415,14 @@ them via SAMBA or NFS. The default access is @emph{read-only}. - Floppies can be emulated with the @code{:floppy:} option: - - @example --qemu-system-i386 linux.img -fda fat:floppy:/my_directory -+qemu-kvm linux.img -fda fat:floppy:/my_directory - @end example - - A read/write support is available for testing (beta stage) with the - @code{:rw:} option: - - @example --qemu-system-i386 linux.img -fda fat:floppy:rw:/my_directory -+qemu-kvm linux.img -fda fat:floppy:rw:/my_directory - @end example - - What you should @emph{never} do: -@@ -440,14 +440,14 @@ QEMU can access directly to block device exported using the Network Block Device - protocol. - - @example --qemu-system-i386 linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ -+qemu-kvm linux.img -hdb nbd://my_nbd_server.mydomain.org:1024/ - @end example - - If the NBD server is located on the same host, you can use an unix socket instead - of an inet socket: - - @example --qemu-system-i386 linux.img -hdb nbd+unix://?socket=/tmp/my_socket -+qemu-kvm linux.img -hdb nbd+unix://?socket=/tmp/my_socket - @end example - - In this case, the block device must be exported using qemu-nbd: -@@ -464,23 +464,23 @@ qemu-nbd --socket=/tmp/my_socket --share=2 my_disk.qcow2 - @noindent - and then you can use it with two guests: - @example --qemu-system-i386 linux1.img -hdb nbd+unix://?socket=/tmp/my_socket --qemu-system-i386 linux2.img -hdb nbd+unix://?socket=/tmp/my_socket -+qemu-kvm linux1.img -hdb nbd+unix://?socket=/tmp/my_socket -+qemu-kvm linux2.img -hdb nbd+unix://?socket=/tmp/my_socket - @end example - - If the nbd-server uses named exports (supported since NBD 2.9.18, or with QEMU's - own embedded NBD server), you must specify an export name in the URI: - @example --qemu-system-i386 -cdrom nbd://localhost/debian-500-ppc-netinst --qemu-system-i386 -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst -+qemu-kvm -cdrom nbd://localhost/debian-500-ppc-netinst -+qemu-kvm -cdrom nbd://localhost/openSUSE-11.1-ppc-netinst - @end example - - The URI syntax for NBD is supported since QEMU 1.3. An alternative syntax is - also available. Here are some example of the older syntax: - @example --qemu-system-i386 linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 --qemu-system-i386 linux2.img -hdb nbd:unix:/tmp/my_socket --qemu-system-i386 -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst -+qemu-kvm linux.img -hdb nbd:my_nbd_server.mydomain.org:1024 -+qemu-kvm linux2.img -hdb nbd:unix:/tmp/my_socket -+qemu-kvm -cdrom nbd:localhost:10809:exportname=debian-500-ppc-netinst - @end example - - @node disk_images_sheepdog -@@ -505,7 +505,7 @@ qemu-img convert @var{filename} sheepdog:///@var{image} - - You can boot from the Sheepdog disk image with the command: - @example --qemu-system-i386 sheepdog:///@var{image} -+qemu-kvm sheepdog:///@var{image} - @end example - - You can also create a snapshot of the Sheepdog image like qcow2. -@@ -517,7 +517,7 @@ where @var{tag} is a tag name of the newly created snapshot. - To boot from the Sheepdog snapshot, specify the tag name of the - snapshot. - @example --qemu-system-i386 sheepdog:///@var{image}#@var{tag} -+qemu-kvm sheepdog:///@var{image}#@var{tag} - @end example - - You can create a cloned image from the existing snapshot. -@@ -530,14 +530,14 @@ is its tag name. - You can use an unix socket instead of an inet socket: - - @example --qemu-system-i386 sheepdog+unix:///@var{image}?socket=@var{path} -+qemu-kvm sheepdog+unix:///@var{image}?socket=@var{path} - @end example - - If the Sheepdog daemon doesn't run on the local host, you need to - specify one of the Sheepdog servers to connect to. - @example - qemu-img create sheepdog://@var{hostname}:@var{port}/@var{image} @var{size} --qemu-system-i386 sheepdog://@var{hostname}:@var{port}/@var{image} -+qemu-kvm sheepdog://@var{hostname}:@var{port}/@var{image} - @end example - - @node disk_images_iscsi -@@ -627,7 +627,7 @@ cat >iscsi.conf < /sys/bus/pci/devices/0000:06:0d.0/driver/unbind - # echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id - --# qemu-system-x86_64 -drive file=nvme://@var{host}:@var{bus}:@var{slot}.@var{func}/@var{namespace} -+# qemu-kvm -drive file=nvme://@var{host}:@var{bus}:@var{slot}.@var{func}/@var{namespace} - @end example - - Alternative syntax using properties: - - @example --qemu-system-x86_64 -drive file.driver=nvme,file.device=@var{host}:@var{bus}:@var{slot}.@var{func},file.namespace=@var{namespace} -+qemu-kvm -drive file.driver=nvme,file.device=@var{host}:@var{bus}:@var{slot}.@var{func},file.namespace=@var{namespace} - @end example - - @var{host}:@var{bus}:@var{slot}.@var{func} is the NVMe controller's PCI device -diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -index ad040cfc98..0de3229e43 100644 ---- a/docs/qemu-cpu-models.texi -+++ b/docs/qemu-cpu-models.texi -@@ -578,25 +578,25 @@ CPU models / features in QEMU and libvirt - @item Host passthrough - - @example -- $ qemu-system-x86_64 -cpu host -+ $ qemu-kvm -cpu host - @end example - - With feature customization: - - @example -- $ qemu-system-x86_64 -cpu host,-vmx,... -+ $ qemu-kvm -cpu host,-vmx,... - @end example - - @item Named CPU models - - @example -- $ qemu-system-x86_64 -cpu Westmere -+ $ qemu-kvm -cpu Westmere - @end example - - With feature customization: - - @example -- $ qemu-system-x86_64 -cpu Westmere,+pcid,... -+ $ qemu-kvm -cpu Westmere,+pcid,... - @end example - - @end table -diff --git a/qemu-doc.texi b/qemu-doc.texi -index 577d1e8376..44427bb0e1 100644 ---- a/qemu-doc.texi -+++ b/qemu-doc.texi -@@ -207,12 +207,12 @@ Note that, by default, GUS shares IRQ(7) with parallel ports and so - QEMU must be told to not have parallel ports to have working GUS. - - @example --qemu-system-i386 dos.img -soundhw gus -parallel none -+qemu-kvm dos.img -soundhw gus -parallel none - @end example - - Alternatively: - @example --qemu-system-i386 dos.img -device gus,irq=5 -+qemu-kvm dos.img -device gus,irq=5 - @end example - - Or some other unclaimed IRQ. -@@ -228,7 +228,7 @@ CS4231A is the chip used in Windows Sound System and GUSMAX products - Download and uncompress the linux image (@file{linux.img}) and type: - - @example --qemu-system-i386 linux.img -+qemu-kvm linux.img - @end example - - Linux should boot and give you a prompt. -@@ -238,7 +238,7 @@ Linux should boot and give you a prompt. - - @example - @c man begin SYNOPSIS --@command{qemu-system-i386} [@var{options}] [@var{disk_image}] -+@command{qemu-kvm} [@var{options}] [@var{disk_image}] - @c man end - @end example - -@@ -278,21 +278,21 @@ is specified in seconds. The default is 0 which means no timeout. Libiscsi - - Example (without authentication): - @example --qemu-system-i386 -iscsi initiator-name=iqn.2001-04.com.example:my-initiator \ -+qemu-kvm -iscsi initiator-name=iqn.2001-04.com.example:my-initiator \ - -cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \ - -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 - @end example - - Example (CHAP username/password via URL): - @example --qemu-system-i386 -drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1 -+qemu-kvm -drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1 - @end example - - Example (CHAP username/password via environment variables): - @example - LIBISCSI_CHAP_USERNAME="user" \ - LIBISCSI_CHAP_PASSWORD="password" \ --qemu-system-i386 -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 -+qemu-kvm -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 - @end example - - @item NBD -@@ -307,12 +307,12 @@ Syntax for specifying a NBD device using Unix Domain Sockets - - Example for TCP - @example --qemu-system-i386 --drive file=nbd:192.0.2.1:30000 -+qemu-kvm --drive file=nbd:192.0.2.1:30000 - @end example - - Example for Unix Domain Sockets - @example --qemu-system-i386 --drive file=nbd:unix:/tmp/nbd-socket -+qemu-kvm --drive file=nbd:unix:/tmp/nbd-socket - @end example - - @item SSH -@@ -320,8 +320,8 @@ QEMU supports SSH (Secure Shell) access to remote disks. - - Examples: - @example --qemu-system-i386 -drive file=ssh://user@@host/path/to/disk.img --qemu-system-i386 -drive file.driver=ssh,file.user=user,file.host=host,file.port=22,file.path=/path/to/disk.img -+qemu-kvm -drive file=ssh://user@@host/path/to/disk.img -+qemu-kvm -drive file.driver=ssh,file.user=user,file.host=host,file.port=22,file.path=/path/to/disk.img - @end example - - Currently authentication must be done using ssh-agent. Other -@@ -339,7 +339,7 @@ sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] - - Example - @example --qemu-system-i386 --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine -+qemu-kvm --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine - @end example - - See also @url{https://sheepdog.github.io/sheepdog/}. -@@ -365,17 +365,17 @@ JSON: - Example - @example - URI: --qemu-system-x86_64 --drive file=gluster://192.0.2.1/testvol/a.img, -+qemu-kvm --drive file=gluster://192.0.2.1/testvol/a.img, - @ file.debug=9,file.logfile=/var/log/qemu-gluster.log - - JSON: --qemu-system-x86_64 'json:@{"driver":"qcow2", -+qemu-kvm 'json:@{"driver":"qcow2", - @ "file":@{"driver":"gluster", - @ "volume":"testvol","path":"a.img", - @ "debug":9,"logfile":"/var/log/qemu-gluster.log", - @ "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, - @ @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' --qemu-system-x86_64 -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, -+qemu-kvm -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, - @ file.debug=9,file.logfile=/var/log/qemu-gluster.log, - @ file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, - @ file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket -@@ -440,9 +440,9 @@ of . - - Example: boot from a remote Fedora 20 live ISO image - @example --qemu-system-x86_64 --drive media=cdrom,file=http://dl.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly -+qemu-kvm --drive media=cdrom,file=http://dl.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly - --qemu-system-x86_64 --drive media=cdrom,file.driver=http,file.url=http://dl.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly -+qemu-kvm --drive media=cdrom,file.driver=http,file.url=http://dl.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly - @end example - - Example: boot from a remote Fedora 20 cloud image using a local overlay for -@@ -450,7 +450,7 @@ writes, copy-on-read, and a readahead of 64k - @example - qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"http",, "file.url":"https://dl.fedoraproject.org/pub/fedora/linux/releases/20/Images/x86_64/Fedora-x86_64-20-20131211.1-sda.qcow2",, "file.readahead":"64k"@}' /tmp/Fedora-x86_64-20-20131211.1-sda.qcow2 - --qemu-system-x86_64 -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-on-read=on -+qemu-kvm -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-on-read=on - @end example - - Example: boot from an image stored on a VMware vSphere server with a self-signed -@@ -459,7 +459,7 @@ of 10 seconds. - @example - qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2 - --qemu-system-x86_64 -drive file=/tmp/test.qcow2 -+qemu-kvm -drive file=/tmp/test.qcow2 - @end example - - @end table -@@ -826,7 +826,7 @@ On Linux hosts, a shared memory device is available. The basic syntax - is: - - @example --qemu-system-x86_64 -device ivshmem-plain,memdev=@var{hostmem} -+qemu-kvm -device ivshmem-plain,memdev=@var{hostmem} - @end example - - where @var{hostmem} names a host memory backend. For a POSIX shared -@@ -847,7 +847,7 @@ memory server is: - ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} - - # Then start your qemu instances with matching arguments --qemu-system-x86_64 -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id} -+qemu-kvm -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id} - -chardev socket,path=@var{path},id=@var{id} - @end example - -@@ -872,7 +872,7 @@ Instead of specifying the using POSIX shm, you may specify - a memory backend that has hugepage support: - - @example --qemu-system-x86_64 -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1 -+qemu-kvm -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1 - -device ivshmem-plain,memdev=mb1 - @end example - -@@ -888,7 +888,7 @@ kernel testing. - - The syntax is: - @example --qemu-system-i386 -kernel arch/i386/boot/bzImage -hda root-2.4.20.img -append "root=/dev/hda" -+qemu-kvm -kernel arch/i386/boot/bzImage -hda root-2.4.20.img -append "root=/dev/hda" - @end example - - Use @option{-kernel} to provide the Linux kernel image and -@@ -903,7 +903,7 @@ If you do not need graphical output, you can disable it and redirect - the virtual serial port and the QEMU monitor to the console with the - @option{-nographic} option. The typical command line is: - @example --qemu-system-i386 -kernel arch/i386/boot/bzImage -hda root-2.4.20.img \ -+qemu-kvm -kernel arch/i386/boot/bzImage -hda root-2.4.20.img \ - -append "root=/dev/hda console=ttyS0" -nographic - @end example - -@@ -969,7 +969,7 @@ Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} - specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}. - For instance, user-mode networking can be used with - @example --qemu-system-i386 [...] -netdev user,id=net0 -device usb-net,netdev=net0 -+qemu-kvm [...] -netdev user,id=net0 -device usb-net,netdev=net0 - @end example - @item usb-ccid - Smartcard reader device -@@ -988,7 +988,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. - This USB device implements the USB Transport Layer of HCI. Example - usage: - @example --@command{qemu-system-i386} [...@var{OPTIONS}...] @option{-usbdevice} bt:hci,vlan=3 @option{-bt} device:keyboard,vlan=3 -+@command{qemu-kvm} [...@var{OPTIONS}...] @option{-usbdevice} bt:hci,vlan=3 @option{-bt} device:keyboard,vlan=3 - @end example - @end table - -@@ -1065,7 +1065,7 @@ For this setup it is recommended to restrict it to listen on a UNIX domain - socket only. For example - - @example --qemu-system-i386 [...OPTIONS...] -vnc unix:/home/joebloggs/.qemu-myvm-vnc -+qemu-kvm [...OPTIONS...] -vnc unix:/home/joebloggs/.qemu-myvm-vnc - @end example - - This ensures that only users on local box with read/write access to that -@@ -1088,7 +1088,7 @@ is running the password is set with the monitor. Until the monitor is used to - set the password all clients will be rejected. - - @example --qemu-system-i386 [...OPTIONS...] -vnc :1,password -monitor stdio -+qemu-kvm [...OPTIONS...] -vnc :1,password -monitor stdio - (qemu) change vnc password - Password: ******** - (qemu) -@@ -1105,7 +1105,7 @@ support provides a secure session, but no authentication. This allows any - client to connect, and provides an encrypted session. - - @example --qemu-system-i386 [...OPTIONS...] \ -+qemu-kvm [...OPTIONS...] \ - -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=no \ - -vnc :1,tls-creds=tls0 -monitor stdio - @end example -@@ -1127,7 +1127,7 @@ same syntax as previously, but with @code{verify-peer} set to @code{yes} - instead. - - @example --qemu-system-i386 [...OPTIONS...] \ -+qemu-kvm [...OPTIONS...] \ - -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ - -vnc :1,tls-creds=tls0 -monitor stdio - @end example -@@ -1140,7 +1140,7 @@ Finally, the previous method can be combined with VNC password authentication - to provide two layers of authentication for clients. - - @example --qemu-system-i386 [...OPTIONS...] \ -+qemu-kvm [...OPTIONS...] \ - -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ - -vnc :1,tls-creds=tls0,password -monitor stdio - (qemu) change vnc password -@@ -1165,7 +1165,7 @@ used for authentication, but assuming use of one supporting SSF, - then QEMU can be launched with: - - @example --qemu-system-i386 [...OPTIONS...] -vnc :1,sasl -monitor stdio -+qemu-kvm [...OPTIONS...] -vnc :1,sasl -monitor stdio - @end example - - @node vnc_sec_certificate_sasl -@@ -1179,7 +1179,7 @@ credentials. This can be enabled, by combining the 'sasl' option - with the aforementioned TLS + x509 options: - - @example --qemu-system-i386 [...OPTIONS...] \ -+qemu-kvm [...OPTIONS...] \ - -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ - -vnc :1,tls-creds=tls0,sasl -monitor stdio - @end example -@@ -1574,7 +1574,7 @@ QEMU has a primitive support to work with gdb, so that you can do - In order to use gdb, launch QEMU with the '-s' option. It will wait for a - gdb connection: - @example --qemu-system-i386 -s -kernel arch/i386/boot/bzImage -hda root-2.4.20.img \ -+qemu-kvm -s -kernel arch/i386/boot/bzImage -hda root-2.4.20.img \ - -append "root=/dev/hda" - Connected to host network interface: tun0 - Waiting gdb connection on port 1234 -@@ -1820,7 +1820,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. - Set OpenBIOS variables in NVRAM, for example: - - @example --qemu-system-ppc -prom-env 'auto-boot?=false' \ -+qemu-kvm -prom-env 'auto-boot?=false' \ - -prom-env 'boot-device=hd:2,\yaboot' \ - -prom-env 'boot-args=conf=hd:2,\yaboot.conf' - @end example -diff --git a/qemu-options.hx b/qemu-options.hx -index 6873f9e674..9f323ec2a2 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -254,7 +254,7 @@ This option defines a free-form string that can be used to describe @var{fd}. - - You can open an image using pre-opened file descriptors from an fd set: - @example --qemu-system-i386 -+qemu-kvm - -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" - -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" - -drive file=/dev/fdset/2,index=0,media=disk -@@ -283,7 +283,7 @@ STEXI - Set default value of @var{driver}'s property @var{prop} to @var{value}, e.g.: - - @example --qemu-system-i386 -global ide-hd.physical_block_size=4096 disk-image.img -+qemu-kvm -global ide-hd.physical_block_size=4096 disk-image.img - @end example - - In particular, you can use this to set driver properties for devices which are -@@ -337,11 +337,11 @@ bootindex options. The default is non-strict boot. - - @example - # try to boot from network first, then from hard disk --qemu-system-i386 -boot order=nc -+qemu-kvm -boot order=nc - # boot from CD-ROM first, switch back to default order after reboot --qemu-system-i386 -boot once=d -+qemu-kvm -boot once=d - # boot with a splash picture for 5 seconds. --qemu-system-i386 -boot menu=on,splash=/root/boot.bmp,splash-time=5000 -+qemu-kvm -boot menu=on,splash=/root/boot.bmp,splash-time=5000 - @end example - - Note: The legacy format '-boot @var{drives}' is still supported but its -@@ -370,7 +370,7 @@ For example, the following command-line sets the guest startup RAM size to - memory the guest can reach to 4GB: - - @example --qemu-system-x86_64 -m 1G,slots=3,maxmem=4G -+qemu-kvm -m 1G,slots=3,maxmem=4G - @end example - - If @var{slots} and @var{maxmem} are not specified, memory hotplug won't -@@ -669,12 +669,12 @@ Enable audio and selected sound hardware. Use 'help' to print all - available sound hardware. - - @example --qemu-system-i386 -soundhw sb16,adlib disk.img --qemu-system-i386 -soundhw es1370 disk.img --qemu-system-i386 -soundhw ac97 disk.img --qemu-system-i386 -soundhw hda disk.img --qemu-system-i386 -soundhw all disk.img --qemu-system-i386 -soundhw help -+qemu-kvm -soundhw sb16,adlib disk.img -+qemu-kvm -soundhw es1370 disk.img -+qemu-kvm -soundhw ac97 disk.img -+qemu-kvm -soundhw hda disk.img -+qemu-kvm -soundhw all disk.img -+qemu-kvm -soundhw help - @end example - - Note that Linux's i810_audio OSS kernel (for AC97) module might -@@ -1149,21 +1149,21 @@ is off. - - Instead of @option{-cdrom} you can use: - @example --qemu-system-i386 -drive file=file,index=2,media=cdrom -+qemu-kvm -drive file=file,index=2,media=cdrom - @end example - - Instead of @option{-hda}, @option{-hdb}, @option{-hdc}, @option{-hdd}, you can - use: - @example --qemu-system-i386 -drive file=file,index=0,media=disk --qemu-system-i386 -drive file=file,index=1,media=disk --qemu-system-i386 -drive file=file,index=2,media=disk --qemu-system-i386 -drive file=file,index=3,media=disk -+qemu-kvm -drive file=file,index=0,media=disk -+qemu-kvm -drive file=file,index=1,media=disk -+qemu-kvm -drive file=file,index=2,media=disk -+qemu-kvm -drive file=file,index=3,media=disk - @end example - - You can open an image using pre-opened file descriptors from an fd set: - @example --qemu-system-i386 -+qemu-kvm - -add-fd fd=3,set=2,opaque="rdwr:/path/to/file" - -add-fd fd=4,set=2,opaque="rdonly:/path/to/file" - -drive file=/dev/fdset/2,index=0,media=disk -@@ -1171,28 +1171,28 @@ qemu-system-i386 - - You can connect a CDROM to the slave of ide0: - @example --qemu-system-i386 -drive file=file,if=ide,index=1,media=cdrom -+qemu-kvm -drive file=file,if=ide,index=1,media=cdrom - @end example - - If you don't specify the "file=" argument, you define an empty drive: - @example --qemu-system-i386 -drive if=ide,index=1,media=cdrom -+qemu-kvm -drive if=ide,index=1,media=cdrom - @end example - - Instead of @option{-fda}, @option{-fdb}, you can use: - @example --qemu-system-i386 -drive file=file,index=0,if=floppy --qemu-system-i386 -drive file=file,index=1,if=floppy -+qemu-kvm -drive file=file,index=0,if=floppy -+qemu-kvm -drive file=file,index=1,if=floppy - @end example - - By default, @var{interface} is "ide" and @var{index} is automatically - incremented: - @example --qemu-system-i386 -drive file=a -drive file=b" -+qemu-kvm -drive file=a -drive file=b" - @end example - is interpreted like: - @example --qemu-system-i386 -hda a -hdb b -+qemu-kvm -hda a -hdb b - @end example - ETEXI - -@@ -2258,8 +2258,8 @@ The following two example do exactly the same, to show how @option{-nic} can - be used to shorten the command line length (note that the e1000 is the default - on i386, so the @option{model=e1000} parameter could even be omitted here, too): - @example --qemu-system-i386 -netdev user,id=n1,ipv6=off -device e1000,netdev=n1,mac=52:54:98:76:54:32 --qemu-system-i386 -nic user,ipv6=off,model=e1000,mac=52:54:98:76:54:32 -+qemu-kvm -netdev user,id=n1,ipv6=off -device e1000,netdev=n1,mac=52:54:98:76:54:32 -+qemu-kvm -nic user,ipv6=off,model=e1000,mac=52:54:98:76:54:32 - @end example - - @item -nic none -@@ -2330,7 +2330,7 @@ can not be resolved. - - Example: - @example --qemu-system-i386 -nic user,dnssearch=mgmt.example.org,dnssearch=example.org -+qemu-kvm -nic user,dnssearch=mgmt.example.org,dnssearch=example.org - @end example - - @item domainname=@var{domain} -@@ -2354,7 +2354,7 @@ a guest from a local directory. - - Example (using pxelinux): - @example --qemu-system-i386 -hda linux.img -boot n -device e1000,netdev=n1 \ -+qemu-kvm -hda linux.img -boot n -device e1000,netdev=n1 \ - -netdev user,id=n1,tftp=/path/to/tftp/files,bootfile=/pxelinux.0 - @end example - -@@ -2388,7 +2388,7 @@ screen 0, use the following: - - @example - # on the host --qemu-system-i386 -nic user,hostfwd=tcp:127.0.0.1:6001-:6000 -+qemu-kvm -nic user,hostfwd=tcp:127.0.0.1:6001-:6000 - # this host xterm should open in the guest X11 server - xterm -display :1 - @end example -@@ -2398,7 +2398,7 @@ the guest, use the following: - - @example - # on the host --qemu-system-i386 -nic user,hostfwd=tcp::5555-:23 -+qemu-kvm -nic user,hostfwd=tcp::5555-:23 - telnet localhost 5555 - @end example - -@@ -2417,7 +2417,7 @@ lifetime, like in the following example: - @example - # open 10.10.1.1:4321 on bootup, connect 10.0.2.100:1234 to it whenever - # the guest accesses it --qemu-system-i386 -nic user,guestfwd=tcp:10.0.2.100:1234-tcp:10.10.1.1:4321 -+qemu-kvm -nic user,guestfwd=tcp:10.0.2.100:1234-tcp:10.10.1.1:4321 - @end example - - Or you can execute a command on every TCP connection established by the guest, -@@ -2426,7 +2426,7 @@ so that QEMU behaves similar to an inetd process for that virtual server: - @example - # call "netcat 10.10.1.1 4321" on every TCP connection to 10.0.2.100:1234 - # and connect the TCP stream to its stdin/stdout --qemu-system-i386 -nic 'user,id=n1,guestfwd=tcp:10.0.2.100:1234-cmd:netcat 10.10.1.1 4321' -+qemu-kvm -nic 'user,id=n1,guestfwd=tcp:10.0.2.100:1234-cmd:netcat 10.10.1.1 4321' - @end example - - @end table -@@ -2453,21 +2453,22 @@ Examples: - - @example - #launch a QEMU instance with the default network script --qemu-system-i386 linux.img -nic tap -+qemu-kvm linux.img -nic tap - @end example - - @example - #launch a QEMU instance with two NICs, each one connected - #to a TAP device --qemu-system-i386 linux.img \ -+qemu-kvm linux.img \ - -netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \ - -netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1 -+ -net nic,vlan=1 -net tap,vlan=1,ifname=tap1 - @end example - - @example - #launch a QEMU instance with the default network helper to - #connect a TAP device to bridge br0 --qemu-system-i386 linux.img -device virtio-net-pci,netdev=n1 \ -+qemu-kvm linux.img -device virtio-net-pci,netdev=n1 \ - -netdev tap,id=n1,"helper=/path/to/qemu-bridge-helper" - @end example - -@@ -2484,13 +2485,13 @@ Examples: - @example - #launch a QEMU instance with the default network helper to - #connect a TAP device to bridge br0 --qemu-system-i386 linux.img -netdev bridge,id=n1 -device virtio-net,netdev=n1 -+qemu-kvm linux.img -netdev bridge,id=n1 -device virtio-net,netdev=n1 - @end example - - @example - #launch a QEMU instance with the default network helper to - #connect a TAP device to bridge qemubr0 --qemu-system-i386 linux.img -netdev bridge,br=qemubr0,id=n1 -device virtio-net,netdev=n1 -+qemu-kvm linux.img -netdev bridge,br=qemubr0,id=n1 -device virtio-net,netdev=n1 - @end example - - @item -netdev socket,id=@var{id}[,fd=@var{h}][,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}] -@@ -2505,13 +2506,13 @@ specifies an already opened TCP socket. - Example: - @example - # launch a first QEMU instance --qemu-system-i386 linux.img \ -- -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ -- -netdev socket,id=n1,listen=:1234 -+qemu-kvm linux.img \ -+ -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ -+ -netdev socket,id=n1,listen=:1234 - # connect the network of this instance to the network of the first instance --qemu-system-i386 linux.img \ -- -device e1000,netdev=n2,mac=52:54:00:12:34:57 \ -- -netdev socket,id=n2,connect=127.0.0.1:1234 -+qemu-kvm linux.img \ -+ -device e1000,netdev=n2,mac=52:54:00:12:34:57 \ -+ -netdev socket,id=n2,connect=127.0.0.1:1234 - @end example - - @item -netdev socket,id=@var{id}[,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]] -@@ -2534,15 +2535,15 @@ Use @option{fd=h} to specify an already opened UDP multicast socket. - Example: - @example - # launch one QEMU instance --qemu-system-i386 linux.img \ -- -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ -- -netdev socket,id=n1,mcast=230.0.0.1:1234 -+qemu-kvm linux.img \ -+ -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ -+ -netdev socket,id=n1,mcast=230.0.0.1:1234 - # launch another QEMU instance on same "bus" --qemu-system-i386 linux.img \ -- -device e1000,netdev=n2,mac=52:54:00:12:34:57 \ -- -netdev socket,id=n2,mcast=230.0.0.1:1234 -+qemu-kvm linux.img \ -+ -device e1000,netdev=n2,mac=52:54:00:12:34:57 \ -+ -netdev socket,id=n2,mcast=230.0.0.1:1234 - # launch yet another QEMU instance on same "bus" --qemu-system-i386 linux.img \ -+qemu-kvm linux.img \ - -device e1000,netdev=n3,mac=52:54:00:12:34:58 \ - -netdev socket,id=n3,mcast=230.0.0.1:1234 - @end example -@@ -2550,7 +2551,7 @@ qemu-system-i386 linux.img \ - Example (User Mode Linux compat.): - @example - # launch QEMU instance (note mcast address selected is UML's default) --qemu-system-i386 linux.img \ -+qemu-kvm linux.img \ - -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ - -netdev socket,id=n1,mcast=239.192.168.1:1102 - # launch UML -@@ -2559,9 +2560,12 @@ qemu-system-i386 linux.img \ - - Example (send packets from host's 1.2.3.4): - @example --qemu-system-i386 linux.img \ -- -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ -- -netdev socket,id=n1,mcast=239.192.168.1:1102,localaddr=1.2.3.4 -+qemu-kvm linux.img \ -+ -device e1000,netdev=n1,mac=52:54:00:12:34:56 \ -+ -netdev socket,id=n1,mcast=239.192.168.1:1102,localaddr=1.2.3.4 -+qemu-kvm linux.img \ -+ -net nic,macaddr=52:54:00:12:34:56 \ -+ -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4 - @end example - - @item -netdev l2tpv3,id=@var{id},src=@var{srcaddr},dst=@var{dstaddr}[,srcport=@var{srcport}][,dstport=@var{dstport}],txsession=@var{txsession}[,rxsession=@var{rxsession}][,ipv6][,udp][,cookie64][,counter][,pincounter][,txcookie=@var{txcookie}][,rxcookie=@var{rxcookie}][,offset=@var{offset}] -@@ -2619,7 +2623,7 @@ brctl addif br-lan vmtunnel0 - # on 4.3.2.1 - # launch QEMU instance - if your network has reorder or is very lossy add ,pincounter - --qemu-system-i386 linux.img -device e1000,netdev=n1 \ -+qemu-kvm linux.img -device e1000,netdev=n1 \ - -netdev l2tpv3,id=n1,src=4.2.3.1,dst=1.2.3.4,udp,srcport=16384,dstport=16384,rxsession=0xffffffff,txsession=0xffffffff,counter - - @end example -@@ -2636,7 +2640,7 @@ Example: - # launch vde switch - vde_switch -F -sock /tmp/myswitch - # launch QEMU instance --qemu-system-i386 linux.img -nic vde,sock=/tmp/myswitch -+qemu-kvm linux.img -nic vde,sock=/tmp/myswitch - @end example - - @item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n] -@@ -2650,11 +2654,11 @@ be created for multiqueue vhost-user. - - Example: - @example --qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -- -numa node,memdev=mem \ -- -chardev socket,id=chr0,path=/path/to/socket \ -- -netdev type=vhost-user,id=net0,chardev=chr0 \ -- -device virtio-net-pci,netdev=net0 -+qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -+ -numa node,memdev=mem \ -+ -chardev socket,id=chr0,path=/path/to/socket \ -+ -netdev type=vhost-user,id=net0,chardev=chr0 \ -+ -device virtio-net-pci,netdev=net0 - @end example - - @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] -@@ -3094,7 +3098,7 @@ and communicate. Requires the Linux @code{vhci} driver installed. Can - be used as following: - - @example --qemu-system-i386 [...OPTIONS...] -bt hci,vlan=5 -bt vhci,vlan=5 -+qemu-kvm [...OPTIONS...] -bt hci,vlan=5 -bt vhci,vlan=5 - @end example - - @item -bt device:@var{dev}[,vlan=@var{n}] -@@ -3544,14 +3548,14 @@ ETEXI - - DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, - "-realtime [mlock=on|off]\n" -- " run qemu with realtime features\n" -+ " run qemu-kvm with realtime features\n" - " mlock=on|off controls mlock support (default: on)\n", - QEMU_ARCH_ALL) - STEXI - @item -realtime mlock=on|off - @findex -realtime --Run qemu with realtime features. --mlocking qemu and guest memory can be enabled via @option{mlock=on} -+Run qemu-kvm with realtime features. -+mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on} - (enabled by default). - ETEXI - -@@ -3589,7 +3593,7 @@ connections will likely be TCP-based, but also UDP, pseudo TTY, or even - stdio are reasonable use case. The latter is allowing to start QEMU from - within gdb and establish the connection via a pipe: - @example --(gdb) target remote | exec qemu-system-i386 -gdb stdio ... -+(gdb) target remote | exec qemu-kvm -gdb stdio ... - @end example - ETEXI - -@@ -4559,7 +4563,7 @@ which specify the queue number of cryptodev backend, the default of - - @example - -- # qemu-system-x86_64 \ -+ # qemu-kvm \ - [...] \ - -object cryptodev-backend-builtin,id=cryptodev0 \ - -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \ -@@ -4579,7 +4583,7 @@ of cryptodev backend for multiqueue vhost-user, the default of @var{queues} is 1 - - @example - -- # qemu-system-x86_64 \ -+ # qemu-kvm \ - [...] \ - -chardev socket,id=chardev0,path=/path/to/socket \ - -object cryptodev-vhost-user,id=cryptodev0,chardev=chardev0 \ --- -2.20.1 - diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..7d0d15c --- /dev/null +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,118 @@ +From dce5c0db33a1f7420254944c78962ca1887d3c08 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 30 Nov 2018 09:11:03 +0100 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + docs/qemu-block-drivers.texi | 2 +- + docs/qemu-cpu-models.texi | 2 +- + qemu-doc.texi | 6 +++--- + qemu-options.hx | 16 ++++++++-------- + 4 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi +index 2c7ea49c32..5d0afb3dee 100644 +--- a/docs/qemu-block-drivers.texi ++++ b/docs/qemu-block-drivers.texi +@@ -2,7 +2,7 @@ + QEMU block driver reference manual + @c man end + +-@set qemu_system qemu-system-x86_64 ++@set qemu_system qemu-kvm + + @c man begin DESCRIPTION + +diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi +index f88a1def0d..c82cf8fab7 100644 +--- a/docs/qemu-cpu-models.texi ++++ b/docs/qemu-cpu-models.texi +@@ -2,7 +2,7 @@ + QEMU / KVM CPU model configuration + @c man end + +-@set qemu_system_x86 qemu-system-x86_64 ++@set qemu_system_x86 qemu-kvm + + @c man begin DESCRIPTION + +diff --git a/qemu-doc.texi b/qemu-doc.texi +index 3c5022050f..f770e86a90 100644 +--- a/qemu-doc.texi ++++ b/qemu-doc.texi +@@ -11,8 +11,8 @@ + @paragraphindent 0 + @c %**end of header + +-@set qemu_system qemu-system-x86_64 +-@set qemu_system_x86 qemu-system-x86_64 ++@set qemu_system qemu-kvm ++@set qemu_system_x86 qemu-kvm + + @ifinfo + @direntry +@@ -1826,7 +1826,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. + Set OpenBIOS variables in NVRAM, for example: + + @example +-qemu-system-ppc -prom-env 'auto-boot?=false' \ ++qemu-kvm -prom-env 'auto-boot?=false' \ + -prom-env 'boot-device=hd:2,\yaboot' \ + -prom-env 'boot-args=conf=hd:2,\yaboot.conf' + @end example +diff --git a/qemu-options.hx b/qemu-options.hx +index fc17aca631..df1d27b6f2 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2737,11 +2737,11 @@ be created for multiqueue vhost-user. + + Example: + @example +-qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + @end example + + @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] +@@ -3631,14 +3631,14 @@ ETEXI + + DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, + "-realtime [mlock=on|off]\n" +- " run qemu with realtime features\n" ++ " run qemu-kvm with realtime features\n" + " mlock=on|off controls mlock support (default: on)\n", + QEMU_ARCH_ALL) + STEXI + @item -realtime mlock=on|off + @findex -realtime +-Run qemu with realtime features. +-mlocking qemu and guest memory can be enabled via @option{mlock=on} ++Run qemu-kvm with realtime features. ++mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on} + (enabled by default). + ETEXI + +-- +2.21.0 + diff --git a/0017-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch similarity index 93% rename from 0017-usb-xhci-Fix-PCI-capability-order.patch rename to 0018-usb-xhci-Fix-PCI-capability-order.patch index a74cc82..1c36a55 100644 --- a/0017-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From 4ebea24b67b02abb2fbbd67c3334496315d75b6f Mon Sep 17 00:00:00 2001 +From 9fe2902d4c8f5cd5ad72af06c6bc54813e642e27 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -62,10 +62,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 9854fae583..adfacace65 100644 +index 8fed2eedd6..d2b9744030 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3397,6 +3397,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3403,6 +3403,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) xhci->max_pstreams_mask = 0; } @@ -78,7 +78,7 @@ index 9854fae583..adfacace65 100644 if (xhci->msi != ON_OFF_AUTO_OFF) { ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); /* Any error other than -ENOTSUP(board's MSI support is broken) -@@ -3445,12 +3451,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3451,12 +3457,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, &xhci->mem); @@ -92,5 +92,5 @@ index 9854fae583..adfacace65 100644 /* TODO check for errors, and should fail when msix=on */ msix_init(dev, xhci->numintrs, -- -2.20.1 +2.21.0 diff --git a/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch similarity index 94% rename from 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename to 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 3b3ae2c..2b73bec 100644 --- a/0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 1a030226ff750613c7f567150f6bf97b902b0151 Mon Sep 17 00:00:00 2001 +From e6fd66897236f8a3348235447ed32b8e5de109bb Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,10 +45,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 8b9e5e2b49..9df5494398 100644 +index e8b2b64d09..54108c0056 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c -@@ -805,6 +805,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -808,6 +808,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, SCSIDevice *sd = SCSI_DEVICE(dev); int ret; @@ -65,5 +65,5 @@ index 8b9e5e2b49..9df5494398 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -2.20.1 +2.21.0 diff --git a/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch similarity index 92% rename from 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename to 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index a0260c2..27138fe 100644 --- a/0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From e0daf3e497f1f82a0d8c45a9d26e9982a6f866ac Mon Sep 17 00:00:00 2001 +From 8b3c1edc606bea84b5e52369ed8d211889a44b6c Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,10 +32,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index bbb001f84a..40cd9dcde6 100644 +index 481dfd2a27..805f38533e 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c -@@ -343,12 +343,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, +@@ -351,12 +351,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { @@ -56,5 +56,5 @@ index bbb001f84a..40cd9dcde6 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -2.20.1 +2.21.0 diff --git a/0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch b/0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch deleted file mode 100644 index b527a07..0000000 --- a/0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 7d5c2ef35d0bd7eb90fac2f40225bcfb4a46421d Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Fri, 16 Aug 2019 17:16:33 +0100 -Subject: pc: Don't make die-id mandatory unless necessary - -RH-Author: Eduardo Habkost -Message-id: <20190816171633.26797-2-ehabkost@redhat.com> -Patchwork-id: 90038 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/1] pc: Don't make die-id mandatory unless necessary -Bugzilla: 1741451 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Danilo de Paula -RH-Acked-by: Erik Skultety -RH-Acked-by: Miroslav Rezanina - -We have this issue reported when using libvirt to hotplug CPUs: -https://bugzilla.redhat.com/show_bug.cgi?id=1741451 - -Basically, libvirt is not copying die-id from -query-hotpluggable-cpus, but die-id is now mandatory. - -We could blame libvirt and say it is not following the documented -interface, because we have this buried in the QAPI schema -documentation: - -> Note: currently there are 5 properties that could be present -> but management should be prepared to pass through other -> properties with device_add command to allow for future -> interface extension. This also requires the filed names to be kept in -> sync with the properties passed to -device/device_add. - -But I don't think this would be reasonable from us. We can just -make QEMU more flexible and let die-id to be omitted when there's -no ambiguity. This will allow us to keep compatibility with -existing libvirt versions. - -Test case included to ensure we don't break this again. - -Fixes: commit 176d2cda0dee ("i386/cpu: Consolidate die-id validity in smp context") -Signed-off-by: Eduardo Habkost ---- -Changes v1 -> v2: -* v1 was "pc: Don't make CPU properties mandatory unless necessary" -* Make only die-id optional (Igor Mammedov) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc.c | 8 ++++++ - tests/acceptance/pc_cpu_hotplug_props.py | 35 ++++++++++++++++++++++++ - 2 files changed, 43 insertions(+) - create mode 100644 tests/acceptance/pc_cpu_hotplug_props.py - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 9ded0db80d..b3d2d1e88a 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -2622,6 +2622,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, - int max_socket = (ms->smp.max_cpus - 1) / - smp_threads / smp_cores / pcms->smp_dies; - -+ /* -+ * die-id was optional in QEMU 4.0 and older, so keep it optional -+ * if there's only one die per socket. -+ */ -+ if (cpu->die_id < 0 && pcms->smp_dies == 1) { -+ cpu->die_id = 0; -+ } -+ - if (cpu->socket_id < 0) { - error_setg(errp, "CPU socket-id is not set"); - return; -diff --git a/tests/acceptance/pc_cpu_hotplug_props.py b/tests/acceptance/pc_cpu_hotplug_props.py -new file mode 100644 -index 0000000000..08b7e632c6 ---- /dev/null -+++ b/tests/acceptance/pc_cpu_hotplug_props.py -@@ -0,0 +1,35 @@ -+# -+# Ensure CPU die-id can be omitted on -device -+# -+# Copyright (c) 2019 Red Hat Inc -+# -+# Author: -+# Eduardo Habkost -+# -+# This library is free software; you can redistribute it and/or -+# modify it under the terms of the GNU Lesser General Public -+# License as published by the Free Software Foundation; either -+# version 2 of the License, or (at your option) any later version. -+# -+# This library is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# Lesser General Public License for more details. -+# -+# You should have received a copy of the GNU Lesser General Public -+# License along with this library; if not, see . -+# -+ -+from avocado_qemu import Test -+ -+class OmittedCPUProps(Test): -+ """ -+ :avocado: tags=arch:x86_64 -+ """ -+ def test_no_die_id(self): -+ self.vm.add_args('-nodefaults', '-S') -+ self.vm.add_args('-smp', '1,sockets=2,cores=2,threads=2,maxcpus=8') -+ self.vm.add_args('-cpu', 'qemu64') -+ self.vm.add_args('-device', 'qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id=0') -+ self.vm.launch() -+ self.assertEquals(len(self.vm.command('query-cpus')), 2) --- -2.20.1 - diff --git a/kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch similarity index 90% rename from kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch rename to 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch index a342691..10fe2ab 100644 --- a/kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +++ b/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch @@ -1,8 +1,8 @@ -From 01e95b17878444859b15e79f7690d32a3532907e Mon Sep 17 00:00:00 2001 +From 9c91fc25dd4edd7447a342dd37b2fd8d3e2301f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 16 Sep 2019 17:07:00 +0100 -Subject: [PATCH 4/4] Using ip_deq after m_free might read pointers from an - allocation reuse. +Subject: Using ip_deq after m_free might read pointers from an allocation + reuse. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -31,7 +31,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -index 8c75d91..df1c846 100644 +index 8c75d91495..df1c846ade 100644 --- a/slirp/src/ip_input.c +++ b/slirp/src/ip_input.c @@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) @@ -57,5 +57,5 @@ index 8c75d91..df1c846 100644 insert: -- -1.8.3.1 +2.21.0 diff --git a/kvm-RHEL-disable-hostmem-memfd.patch b/kvm-RHEL-disable-hostmem-memfd.patch deleted file mode 100644 index 39a0607..0000000 --- a/kvm-RHEL-disable-hostmem-memfd.patch +++ /dev/null @@ -1,56 +0,0 @@ -From f7587ddb9a2731bf678a24156b6285dda79a4b2b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 19 Aug 2019 21:18:27 -0300 -Subject: [PATCH] RHEL: disable hostmem-memfd -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20190814085210.18462-1-marcandre.lureau@redhat.com> -Patchwork-id: 89974 -O-Subject: [RHEL-8.1.0 qemu-kvm PATCH] RHEL: disable hostmem-memfd -Bugzilla: 1738626 1740797 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: John Snow - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1740797 -BRANCH: rhel8/rhel-8.1.0 -UPSTREAM: n/a (downstream only) -BREW: 23060214 - -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - backends/Makefile.objs | 3 ++- - util/memfd.c | 2 +- - 2 files changed, 3 insertions(+), 2 deletions(-) - -diff --git a/backends/Makefile.objs b/backends/Makefile.objs -index 981e8e1..ad54cd6 100644 ---- a/backends/Makefile.objs -+++ b/backends/Makefile.objs -@@ -16,4 +16,5 @@ endif - - common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o - --common-obj-$(CONFIG_LINUX) += hostmem-memfd.o -+# RHEL: disable memfd -+# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o -diff --git a/util/memfd.c b/util/memfd.c -index 00334e5..e2320af 100644 ---- a/util/memfd.c -+++ b/util/memfd.c -@@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) - */ - bool qemu_memfd_check(unsigned int flags) - { --#ifdef CONFIG_LINUX -+#if 0 /* RHEL: memfd support disabled */ - int mfd = memfd_create("test", flags | MFD_CLOEXEC); - - if (mfd >= 0) { --- -1.8.3.1 - diff --git a/kvm-block-Use-QEMU_IS_ALIGNED.patch b/kvm-block-Use-QEMU_IS_ALIGNED.patch deleted file mode 100644 index 7d4e065..0000000 --- a/kvm-block-Use-QEMU_IS_ALIGNED.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 1eb1c45037b1e1084ab601ac8461fabca162b479 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Tue, 24 Sep 2019 21:11:49 +0100 -Subject: [PATCH 1/4] block: Use QEMU_IS_ALIGNED - -RH-Author: Maxim Levitsky -Message-id: <20190924211152.13461-2-mlevitsk@redhat.com> -Patchwork-id: 90874 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/4] block: Use QEMU_IS_ALIGNED -Bugzilla: 1745922 -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz -RH-Acked-by: Danilo de Paula - -From: Nir Soffer - -Replace instances of: - - (n & (BDRV_SECTOR_SIZE - 1)) == 0 - -And: - - (n & ~BDRV_SECTOR_MASK) == 0 - -With: - - QEMU_IS_ALIGNED(n, BDRV_SECTOR_SIZE) - -Which reveals the intent of the code better, and makes it easier to -locate the code checking alignment. - -Signed-off-by: Nir Soffer -Message-id: 20190827185913.27427-2-nsoffer@redhat.com -Reviewed-by: John Snow -Signed-off-by: Max Reitz -(cherry picked from commit 1bbbf32d5fffe334531c315d7bd865fdfb67b6c5) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/bochs.c | 4 ++-- - block/cloop.c | 4 ++-- - block/dmg.c | 4 ++-- - block/io.c | 8 ++++---- - block/qcow2-cluster.c | 4 ++-- - block/qcow2.c | 4 ++-- - block/vvfat.c | 8 ++++---- - qemu-img.c | 2 +- - 8 files changed, 19 insertions(+), 19 deletions(-) - -diff --git a/block/bochs.c b/block/bochs.c -index 962f185..32bb83b 100644 ---- a/block/bochs.c -+++ b/block/bochs.c -@@ -248,8 +248,8 @@ bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - QEMUIOVector local_qiov; - int ret; - -- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); -- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); -+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_co_mutex_lock(&s->lock); -diff --git a/block/cloop.c b/block/cloop.c -index 384c973..4de9487 100644 ---- a/block/cloop.c -+++ b/block/cloop.c -@@ -253,8 +253,8 @@ cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - int nb_sectors = bytes >> BDRV_SECTOR_BITS; - int ret, i; - -- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); -- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); -+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - - qemu_co_mutex_lock(&s->lock); - -diff --git a/block/dmg.c b/block/dmg.c -index 45f6b28..4a045f2 100644 ---- a/block/dmg.c -+++ b/block/dmg.c -@@ -697,8 +697,8 @@ dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - int nb_sectors = bytes >> BDRV_SECTOR_BITS; - int ret, i; - -- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); -- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); -+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - - qemu_co_mutex_lock(&s->lock); - -diff --git a/block/io.c b/block/io.c -index 06305c6..54093fc 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1079,8 +1079,8 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, - sector_num = offset >> BDRV_SECTOR_BITS; - nb_sectors = bytes >> BDRV_SECTOR_BITS; - -- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); -- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); -+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - assert(bytes <= BDRV_REQUEST_MAX_BYTES); - assert(drv->bdrv_co_readv); - -@@ -1132,8 +1132,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, - sector_num = offset >> BDRV_SECTOR_BITS; - nb_sectors = bytes >> BDRV_SECTOR_BITS; - -- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); -- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); -+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - assert(bytes <= BDRV_REQUEST_MAX_BYTES); - - assert(drv->bdrv_co_writev); -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index cc5609e..f2de746 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -470,8 +470,8 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, - { - if (bytes && bs->encrypted) { - BDRVQcow2State *s = bs->opaque; -- assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); -- assert((bytes & ~BDRV_SECTOR_MASK) == 0); -+ assert(QEMU_IS_ALIGNED(offset_in_cluster, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - assert(s->crypto); - if (qcow2_co_encrypt(bs, cluster_offset, - src_cluster_offset + offset_in_cluster, -diff --git a/block/qcow2.c b/block/qcow2.c -index 039bdc2..dc4302f 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -2071,8 +2071,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, - } - if (bs->encrypted) { - assert(s->crypto); -- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); -- assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); -+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(cur_bytes, BDRV_SECTOR_SIZE)); - if (qcow2_co_decrypt(bs, cluster_offset, offset, - cluster_data, cur_bytes) < 0) { - ret = -EIO; -diff --git a/block/vvfat.c b/block/vvfat.c -index f6c2880..019b8f1 100644 ---- a/block/vvfat.c -+++ b/block/vvfat.c -@@ -1547,8 +1547,8 @@ vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - int nb_sectors = bytes >> BDRV_SECTOR_BITS; - void *buf; - -- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); -- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); -+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - - buf = g_try_malloc(bytes); - if (bytes && buf == NULL) { -@@ -3082,8 +3082,8 @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, - int nb_sectors = bytes >> BDRV_SECTOR_BITS; - void *buf; - -- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); -- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); -+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - - buf = g_try_malloc(bytes); - if (bytes && buf == NULL) { -diff --git a/qemu-img.c b/qemu-img.c -index 7998377..940ae94 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2138,7 +2138,7 @@ static int img_convert(int argc, char **argv) - int64_t sval; - - sval = cvtnum(optarg); -- if (sval < 0 || sval & (BDRV_SECTOR_SIZE - 1) || -+ if (sval < 0 || !QEMU_IS_ALIGNED(sval, BDRV_SECTOR_SIZE) || - sval / BDRV_SECTOR_SIZE > MAX_BUF_SECTORS) { - error_report("Invalid buffer size for sparse output specified. " - "Valid sizes are multiples of %llu up to %llu. Select " --- -1.8.3.1 - diff --git a/kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch b/kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch deleted file mode 100644 index 4df148c..0000000 --- a/kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch +++ /dev/null @@ -1,107 +0,0 @@ -From df8fadfd9450c8709864db44c2f676d40f323f95 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Fri, 13 Sep 2019 14:12:25 +0100 -Subject: [PATCH 2/4] block/create: Do not abort if a block driver is not - available -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Philippe Mathieu-Daudé -Message-id: <20190913141225.12022-2-philmd@redhat.com> -Patchwork-id: 90451 -O-Subject: [RHEL-7.7 qemu-kvm-rhev + RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/1] block/create: Do not abort if a block driver is not available -Bugzilla: 1746267 -RH-Acked-by: Kevin Wolf -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi - -The 'blockdev-create' QMP command was introduced as experimental -feature in commit b0292b851b8, using the assert() debug call. -It got promoted to 'stable' command in 3fb588a0f2c, but the -assert call was not removed. - -Some block drivers are optional, and bdrv_find_format() might -return a NULL value, triggering the assertion. - -Stable code is not expected to abort, so return an error instead. - -This is easily reproducible when libnfs is not installed: - - ./configure - [...] - module support no - Block whitelist (rw) - Block whitelist (ro) - libiscsi support yes - libnfs support no - [...] - -Start QEMU: - - $ qemu-system-x86_64 -S -qmp unix:/tmp/qemu.qmp,server,nowait - -Send the 'blockdev-create' with the 'nfs' driver: - - $ ( cat << 'EOF' - {'execute': 'qmp_capabilities'} - {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} - EOF - ) | socat STDIO UNIX:/tmp/qemu.qmp - {"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 4}, "package": "v4.1.0-733-g89ea03a7dc"}, "capabilities": ["oob"]}} - {"return": {}} - -QEMU crashes: - - $ gdb qemu-system-x86_64 core - Program received signal SIGSEGV, Segmentation fault. - (gdb) bt - #0 0x00007ffff510957f in raise () at /lib64/libc.so.6 - #1 0x00007ffff50f3895 in abort () at /lib64/libc.so.6 - #2 0x00007ffff50f3769 in _nl_load_domain.cold.0 () at /lib64/libc.so.6 - #3 0x00007ffff5101a26 in .annobin_assert.c_end () at /lib64/libc.so.6 - #4 0x0000555555d7e1f1 in qmp_blockdev_create (job_id=0x555556baee40 "x", options=0x555557666610, errp=0x7fffffffc770) at block/create.c:69 - #5 0x0000555555c96b52 in qmp_marshal_blockdev_create (args=0x7fffdc003830, ret=0x7fffffffc7f8, errp=0x7fffffffc7f0) at qapi/qapi-commands-block-core.c:1314 - #6 0x0000555555deb0a0 in do_qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false, errp=0x7fffffffc898) at qapi/qmp-dispatch.c:131 - #7 0x0000555555deb2a1 in qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false) at qapi/qmp-dispatch.c:174 - -With this patch applied, QEMU returns a QMP error: - - {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} - {"id": "x", "error": {"class": "GenericError", "desc": "Block driver 'nfs' not found or not supported"}} - -Cc: qemu-stable@nongnu.org -Reported-by: Xu Tian -Signed-off-by: Philippe Mathieu-Daudé -Reviewed-by: Eric Blake -Reviewed-by: John Snow -Signed-off-by: Kevin Wolf -(cherry picked from commit d90d5cae2b10efc0e8d0b3cc91ff16201853d3ba) -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Danilo C. L. de Paula ---- - block/create.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/block/create.c b/block/create.c -index 9534121..de5e97b 100644 ---- a/block/create.c -+++ b/block/create.c -@@ -63,9 +63,13 @@ void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options, - const char *fmt = BlockdevDriver_str(options->driver); - BlockDriver *drv = bdrv_find_format(fmt); - -+ if (!drv) { -+ error_setg(errp, "Block driver '%s' not found or not supported", fmt); -+ return; -+ } -+ - /* If the driver is in the schema, we know that it exists. But it may not - * be whitelisted. */ -- assert(drv); - if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) { - error_setg(errp, "Driver is not whitelisted"); - return; --- -1.8.3.1 - diff --git a/kvm-block-posix-Always-allocate-the-first-block.patch b/kvm-block-posix-Always-allocate-the-first-block.patch deleted file mode 100644 index 6b4229c..0000000 --- a/kvm-block-posix-Always-allocate-the-first-block.patch +++ /dev/null @@ -1,386 +0,0 @@ -From 58dc8ae23325384b0d9494d203254dc2f6a99255 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 9 Sep 2019 07:38:21 +0100 -Subject: [PATCH 5/6] block: posix: Always allocate the first block - -RH-Author: Max Reitz -Message-id: <20190909073822.26191-3-mreitz@redhat.com> -Patchwork-id: 90333 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/3] block: posix: Always allocate the first block -Bugzilla: 1749134 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth -RH-Acked-by: Kevin Wolf - -From: Nir Soffer - -When creating an image with preallocation "off" or "falloc", the first -block of the image is typically not allocated. When using Gluster -storage backed by XFS filesystem, reading this block using direct I/O -succeeds regardless of request length, fooling alignment detection. - -In this case we fallback to a safe value (4096) instead of the optimal -value (512), which may lead to unneeded data copying when aligning -requests. Allocating the first block avoids the fallback. - -Since we allocate the first block even with preallocation=off, we no -longer create images with zero disk size: - - $ ./qemu-img create -f raw test.raw 1g - Formatting 'test.raw', fmt=raw size=1073741824 - - $ ls -lhs test.raw - 4.0K -rw-r--r--. 1 nsoffer nsoffer 1.0G Aug 16 23:48 test.raw - -And converting the image requires additional cluster: - - $ ./qemu-img measure -f raw -O qcow2 test.raw - required size: 458752 - fully allocated size: 1074135040 - -When using format like vmdk with multiple files per image, we allocate -one block per file: - - $ ./qemu-img create -f vmdk -o subformat=twoGbMaxExtentFlat test.vmdk 4g - Formatting 'test.vmdk', fmt=vmdk size=4294967296 compat6=off hwversion=undefined subformat=twoGbMaxExtentFlat - - $ ls -lhs test*.vmdk - 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f001.vmdk - 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f002.vmdk - 4.0K -rw-r--r--. 1 nsoffer nsoffer 353 Aug 27 03:23 test.vmdk - -I did quick performance test for copying disks with qemu-img convert to -new raw target image to Gluster storage with sector size of 512 bytes: - - for i in $(seq 10); do - rm -f dst.raw - sleep 10 - time ./qemu-img convert -f raw -O raw -t none -T none src.raw dst.raw - done - -Here is a table comparing the total time spent: - -Type Before(s) After(s) Diff(%) ---------------------------------------- -real 530.028 469.123 -11.4 -user 17.204 10.768 -37.4 -sys 17.881 7.011 -60.7 - -We can see very clear improvement in CPU usage. - -Signed-off-by: Nir Soffer -Message-id: 20190827010528.8818-2-nsoffer@redhat.com -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 3a20013fbb26d2a1bd11ef148eefdb1508783787) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 51 ++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/059.out | 2 +- - tests/qemu-iotests/150.out | 11 --------- - tests/qemu-iotests/150.out.qcow2 | 11 +++++++++ - tests/qemu-iotests/150.out.raw | 12 ++++++++++ - tests/qemu-iotests/175 | 19 ++++++++++----- - tests/qemu-iotests/175.out | 8 +++---- - tests/qemu-iotests/178.out.qcow2 | 4 ++-- - tests/qemu-iotests/221.out | 12 ++++++---- - tests/qemu-iotests/253.out | 12 ++++++---- - 10 files changed, 110 insertions(+), 32 deletions(-) - delete mode 100644 tests/qemu-iotests/150.out - create mode 100644 tests/qemu-iotests/150.out.qcow2 - create mode 100644 tests/qemu-iotests/150.out.raw - -diff --git a/block/file-posix.c b/block/file-posix.c -index b8b4dad..8ea9889 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1749,6 +1749,43 @@ static int handle_aiocb_discard(void *opaque) - return ret; - } - -+/* -+ * Help alignment probing by allocating the first block. -+ * -+ * When reading with direct I/O from unallocated area on Gluster backed by XFS, -+ * reading succeeds regardless of request length. In this case we fallback to -+ * safe alignment which is not optimal. Allocating the first block avoids this -+ * fallback. -+ * -+ * fd may be opened with O_DIRECT, but we don't know the buffer alignment or -+ * request alignment, so we use safe values. -+ * -+ * Returns: 0 on success, -errno on failure. Since this is an optimization, -+ * caller may ignore failures. -+ */ -+static int allocate_first_block(int fd, size_t max_size) -+{ -+ size_t write_size = (max_size < MAX_BLOCKSIZE) -+ ? BDRV_SECTOR_SIZE -+ : MAX_BLOCKSIZE; -+ size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); -+ void *buf; -+ ssize_t n; -+ int ret; -+ -+ buf = qemu_memalign(max_align, write_size); -+ memset(buf, 0, write_size); -+ -+ do { -+ n = pwrite(fd, buf, write_size, 0); -+ } while (n == -1 && errno == EINTR); -+ -+ ret = (n == -1) ? -errno : 0; -+ -+ qemu_vfree(buf); -+ return ret; -+} -+ - static int handle_aiocb_truncate(void *opaque) - { - RawPosixAIOData *aiocb = opaque; -@@ -1788,6 +1825,17 @@ static int handle_aiocb_truncate(void *opaque) - /* posix_fallocate() doesn't set errno. */ - error_setg_errno(errp, -result, - "Could not preallocate new data"); -+ } else if (current_length == 0) { -+ /* -+ * posix_fallocate() uses fallocate() if the filesystem -+ * supports it, or fallback to manually writing zeroes. If -+ * fallocate() was used, unaligned reads from the fallocated -+ * area in raw_probe_alignment() will succeed, hence we need to -+ * allocate the first block. -+ * -+ * Optimize future alignment probing; ignore failures. -+ */ -+ allocate_first_block(fd, offset); - } - } else { - result = 0; -@@ -1849,6 +1897,9 @@ static int handle_aiocb_truncate(void *opaque) - if (ftruncate(fd, offset) != 0) { - result = -errno; - error_setg_errno(errp, -result, "Could not resize file"); -+ } else if (current_length == 0 && offset > current_length) { -+ /* Optimize future alignment probing; ignore failures. */ -+ allocate_first_block(fd, offset); - } - return result; - default: -diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out -index 4fab42a..fe3f861 100644 ---- a/tests/qemu-iotests/059.out -+++ b/tests/qemu-iotests/059.out -@@ -27,7 +27,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824000 subformat=twoGbMax - image: TEST_DIR/t.vmdk - file format: vmdk - virtual size: 0.977 TiB (1073741824000 bytes) --disk size: 16 KiB -+disk size: 1.97 MiB - Format specific information: - cid: XXXXXXXX - parent cid: XXXXXXXX -diff --git a/tests/qemu-iotests/150.out b/tests/qemu-iotests/150.out -deleted file mode 100644 -index 2a54e8d..0000000 ---- a/tests/qemu-iotests/150.out -+++ /dev/null -@@ -1,11 +0,0 @@ --QA output created by 150 -- --=== Mapping sparse conversion === -- --Offset Length File -- --=== Mapping non-sparse conversion === -- --Offset Length File --0 0x100000 TEST_DIR/t.IMGFMT --*** done -diff --git a/tests/qemu-iotests/150.out.qcow2 b/tests/qemu-iotests/150.out.qcow2 -new file mode 100644 -index 0000000..2a54e8d ---- /dev/null -+++ b/tests/qemu-iotests/150.out.qcow2 -@@ -0,0 +1,11 @@ -+QA output created by 150 -+ -+=== Mapping sparse conversion === -+ -+Offset Length File -+ -+=== Mapping non-sparse conversion === -+ -+Offset Length File -+0 0x100000 TEST_DIR/t.IMGFMT -+*** done -diff --git a/tests/qemu-iotests/150.out.raw b/tests/qemu-iotests/150.out.raw -new file mode 100644 -index 0000000..3cdc772 ---- /dev/null -+++ b/tests/qemu-iotests/150.out.raw -@@ -0,0 +1,12 @@ -+QA output created by 150 -+ -+=== Mapping sparse conversion === -+ -+Offset Length File -+0 0x1000 TEST_DIR/t.IMGFMT -+ -+=== Mapping non-sparse conversion === -+ -+Offset Length File -+0 0x100000 TEST_DIR/t.IMGFMT -+*** done -diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 -index 51e62c8..7ba28b3 100755 ---- a/tests/qemu-iotests/175 -+++ b/tests/qemu-iotests/175 -@@ -37,14 +37,16 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 - # the file size. This function hides the resulting difference in the - # stat -c '%b' output. - # Parameter 1: Number of blocks an empty file occupies --# Parameter 2: Image size in bytes -+# Parameter 2: Minimal number of blocks in an image -+# Parameter 3: Image size in bytes - _filter_blocks() - { - extra_blocks=$1 -- img_size=$2 -+ min_blocks=$2 -+ img_size=$3 - -- sed -e "s/blocks=$extra_blocks\\(\$\\|[^0-9]\\)/nothing allocated/" \ -- -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/everything allocated/" -+ sed -e "s/blocks=$min_blocks\\(\$\\|[^0-9]\\)/min allocation/" \ -+ -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/max allocation/" - } - - # get standard environment, filters and checks -@@ -60,16 +62,21 @@ size=$((1 * 1024 * 1024)) - touch "$TEST_DIR/empty" - extra_blocks=$(stat -c '%b' "$TEST_DIR/empty") - -+# We always write the first byte; check how many blocks this filesystem -+# allocates to match empty image alloation. -+printf "\0" > "$TEST_DIR/empty" -+min_blocks=$(stat -c '%b' "$TEST_DIR/empty") -+ - echo - echo "== creating image with default preallocation ==" - _make_test_img $size | _filter_imgfmt --stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size -+stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size - - for mode in off full falloc; do - echo - echo "== creating image with preallocation $mode ==" - IMGOPTS=preallocation=$mode _make_test_img $size | _filter_imgfmt -- stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size -+ stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size - done - - # success, all done -diff --git a/tests/qemu-iotests/175.out b/tests/qemu-iotests/175.out -index 6d9a5ed..263e521 100644 ---- a/tests/qemu-iotests/175.out -+++ b/tests/qemu-iotests/175.out -@@ -2,17 +2,17 @@ QA output created by 175 - - == creating image with default preallocation == - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 --size=1048576, nothing allocated -+size=1048576, min allocation - - == creating image with preallocation off == - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=off --size=1048576, nothing allocated -+size=1048576, min allocation - - == creating image with preallocation full == - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=full --size=1048576, everything allocated -+size=1048576, max allocation - - == creating image with preallocation falloc == - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=falloc --size=1048576, everything allocated -+size=1048576, max allocation - *** done -diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 -index 55a8dc9..9e7d8c4 100644 ---- a/tests/qemu-iotests/178.out.qcow2 -+++ b/tests/qemu-iotests/178.out.qcow2 -@@ -101,7 +101,7 @@ converted image file size in bytes: 196608 - == raw input image with data (human) == - - Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 --required size: 393216 -+required size: 458752 - fully allocated size: 1074135040 - wrote 512/512 bytes at offset 512 - 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -@@ -257,7 +257,7 @@ converted image file size in bytes: 196608 - - Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 - { -- "required": 393216, -+ "required": 458752, - "fully-allocated": 1074135040 - } - wrote 512/512 bytes at offset 512 -diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out -index 9f9dd52..dca024a 100644 ---- a/tests/qemu-iotests/221.out -+++ b/tests/qemu-iotests/221.out -@@ -3,14 +3,18 @@ QA output created by 221 - === Check mapping of unaligned raw image === - - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65537 --[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] --[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] - wrote 1/1 bytes at offset 65536 - 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) --[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, - { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, - { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] --[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, - { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, - { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] - *** done -diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out -index 607c0ba..3d08b30 100644 ---- a/tests/qemu-iotests/253.out -+++ b/tests/qemu-iotests/253.out -@@ -3,12 +3,16 @@ QA output created by 253 - === Check mapping of unaligned raw image === - - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575 --[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] --[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] - wrote 65535/65535 bytes at offset 983040 - 63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) --[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, - { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] --[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, -+[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, - { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] - *** done --- -1.8.3.1 - diff --git a/kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch b/kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch deleted file mode 100644 index 9cdcb3f..0000000 --- a/kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 554884a1fe9a271ad78771c37e4be5a8f84258fc Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Tue, 24 Sep 2019 21:11:50 +0100 -Subject: [PATCH 2/4] block/qcow2: Fix corruption introduced by commit - 8ac0f15f335 - -RH-Author: Maxim Levitsky -Message-id: <20190924211152.13461-3-mlevitsk@redhat.com> -Patchwork-id: 90878 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 2/4] block/qcow2: Fix corruption introduced by commit 8ac0f15f335 -Bugzilla: 1745922 -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz -RH-Acked-by: Danilo de Paula - -This fixes subtle corruption introduced by luks threaded encryption -in commit 8ac0f15f335 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1745922 - -The corruption happens when we do a write that - * writes to two or more unallocated clusters at once - * doesn't fully cover the first sector - * doesn't fully cover the last sector - * uses luks encryption - -In this case, when allocating the new clusters we COW both areas -prior to the write and after the write, and we encrypt them. - -The above mentioned commit accidentally made it so we encrypt the -second COW area using the physical cluster offset of the first area. - -The problem is that offset_in_cluster in do_perform_cow_encrypt -can be larger that the cluster size, thus cluster_offset -will no longer point to the start of the cluster at which encrypted -area starts. - -Next patch in this series will refactor the code to avoid all these -assumptions. - -In the bugreport that was triggered by rebasing a luks image to new, -zero filled base, which lot of such writes, and causes some files -with zero areas to contain garbage there instead. -But as described above it can happen elsewhere as well - -Signed-off-by: Maxim Levitsky -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-id: 20190915203655.21638-2-mlevitsk@redhat.com -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 38e7d54bdc518b5a05a922467304bcace2396945) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2-cluster.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index f2de746..11e5a92 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -473,9 +473,10 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, - assert(QEMU_IS_ALIGNED(offset_in_cluster, BDRV_SECTOR_SIZE)); - assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); - assert(s->crypto); -- if (qcow2_co_encrypt(bs, cluster_offset, -- src_cluster_offset + offset_in_cluster, -- buffer, bytes) < 0) { -+ if (qcow2_co_encrypt(bs, -+ start_of_cluster(s, cluster_offset + offset_in_cluster), -+ src_cluster_offset + offset_in_cluster, -+ buffer, bytes) < 0) { - return false; - } - } --- -1.8.3.1 - diff --git a/kvm-block-qcow2-refactor-encryption-code.patch b/kvm-block-qcow2-refactor-encryption-code.patch deleted file mode 100644 index f32907a..0000000 --- a/kvm-block-qcow2-refactor-encryption-code.patch +++ /dev/null @@ -1,234 +0,0 @@ -From 780fbdf04884188eca3d5891faa2b2417a88ef14 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Tue, 24 Sep 2019 21:11:51 +0100 -Subject: [PATCH 3/4] block/qcow2: refactor encryption code - -RH-Author: Maxim Levitsky -Message-id: <20190924211152.13461-4-mlevitsk@redhat.com> -Patchwork-id: 90876 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 3/4] block/qcow2: refactor encryption code -Bugzilla: 1745922 -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz -RH-Acked-by: Danilo de Paula - -* Change the qcow2_co_{encrypt|decrypt} to just receive full host and - guest offsets and use this function directly instead of calling - do_perform_cow_encrypt (which is removed by that patch). - -* Adjust qcow2_co_encdec to take full host and guest offsets as well. - -* Document the qcow2_co_{encrypt|decrypt} arguments - to prevent the bug fixed in former commit from hopefully - happening again. - -Signed-off-by: Maxim Levitsky -Message-id: 20190915203655.21638-3-mlevitsk@redhat.com -Reviewed-by: Vladimir Sementsov-Ogievskiy -[mreitz: Let perform_cow() return the error value returned by - qcow2_co_encrypt(), as proposed by Vladimir] -Signed-off-by: Max Reitz -(cherry picked from commit 603fbd076c76438b15ec842f0e2d1ba4867dfd00) -Signed-off-by: Maxim Levitsky - -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2-cluster.c | 41 +++++++++++---------------------- - block/qcow2-threads.c | 63 +++++++++++++++++++++++++++++++++++++++------------ - block/qcow2.c | 5 ++-- - block/qcow2.h | 8 +++---- - 4 files changed, 69 insertions(+), 48 deletions(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index 11e5a92..b30fd12 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -461,28 +461,6 @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, - return 0; - } - --static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, -- uint64_t src_cluster_offset, -- uint64_t cluster_offset, -- unsigned offset_in_cluster, -- uint8_t *buffer, -- unsigned bytes) --{ -- if (bytes && bs->encrypted) { -- BDRVQcow2State *s = bs->opaque; -- assert(QEMU_IS_ALIGNED(offset_in_cluster, BDRV_SECTOR_SIZE)); -- assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); -- assert(s->crypto); -- if (qcow2_co_encrypt(bs, -- start_of_cluster(s, cluster_offset + offset_in_cluster), -- src_cluster_offset + offset_in_cluster, -- buffer, bytes) < 0) { -- return false; -- } -- } -- return true; --} -- - static int coroutine_fn do_perform_cow_write(BlockDriverState *bs, - uint64_t cluster_offset, - unsigned offset_in_cluster, -@@ -887,12 +865,19 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) - - /* Encrypt the data if necessary before writing it */ - if (bs->encrypted) { -- if (!do_perform_cow_encrypt(bs, m->offset, m->alloc_offset, -- start->offset, start_buffer, -- start->nb_bytes) || -- !do_perform_cow_encrypt(bs, m->offset, m->alloc_offset, -- end->offset, end_buffer, end->nb_bytes)) { -- ret = -EIO; -+ ret = qcow2_co_encrypt(bs, -+ m->alloc_offset + start->offset, -+ m->offset + start->offset, -+ start_buffer, start->nb_bytes); -+ if (ret < 0) { -+ goto fail; -+ } -+ -+ ret = qcow2_co_encrypt(bs, -+ m->alloc_offset + end->offset, -+ m->offset + end->offset, -+ end_buffer, end->nb_bytes); -+ if (ret < 0) { - goto fail; - } - } -diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c -index 3b1e63f..8f5a0d1 100644 ---- a/block/qcow2-threads.c -+++ b/block/qcow2-threads.c -@@ -234,35 +234,70 @@ static int qcow2_encdec_pool_func(void *opaque) - } - - static int coroutine_fn --qcow2_co_encdec(BlockDriverState *bs, uint64_t file_cluster_offset, -- uint64_t offset, void *buf, size_t len, Qcow2EncDecFunc func) -+qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset, -+ uint64_t guest_offset, void *buf, size_t len, -+ Qcow2EncDecFunc func) - { - BDRVQcow2State *s = bs->opaque; - Qcow2EncDecData arg = { - .block = s->crypto, -- .offset = s->crypt_physical_offset ? -- file_cluster_offset + offset_into_cluster(s, offset) : -- offset, -+ .offset = s->crypt_physical_offset ? host_offset : guest_offset, - .buf = buf, - .len = len, - .func = func, - }; - -- return qcow2_co_process(bs, qcow2_encdec_pool_func, &arg); -+ assert(QEMU_IS_ALIGNED(guest_offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(host_offset, BDRV_SECTOR_SIZE)); -+ assert(QEMU_IS_ALIGNED(len, BDRV_SECTOR_SIZE)); -+ assert(s->crypto); -+ -+ return len == 0 ? 0 : qcow2_co_process(bs, qcow2_encdec_pool_func, &arg); - } - -+/* -+ * qcow2_co_encrypt() -+ * -+ * Encrypts one or more contiguous aligned sectors -+ * -+ * @host_offset - underlying storage offset of the first sector of the -+ * data to be encrypted -+ * -+ * @guest_offset - guest (virtual) offset of the first sector of the -+ * data to be encrypted -+ * -+ * @buf - buffer with the data to encrypt, that after encryption -+ * will be written to the underlying storage device at -+ * @host_offset -+ * -+ * @len - length of the buffer (must be a BDRV_SECTOR_SIZE multiple) -+ * -+ * Depending on the encryption method, @host_offset and/or @guest_offset -+ * may be used for generating the initialization vector for -+ * encryption. -+ * -+ * Note that while the whole range must be aligned on sectors, it -+ * does not have to be aligned on clusters and can also cross cluster -+ * boundaries -+ */ - int coroutine_fn --qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset, -- uint64_t offset, void *buf, size_t len) -+qcow2_co_encrypt(BlockDriverState *bs, uint64_t host_offset, -+ uint64_t guest_offset, void *buf, size_t len) - { -- return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len, -- qcrypto_block_encrypt); -+ return qcow2_co_encdec(bs, host_offset, guest_offset, buf, len, -+ qcrypto_block_encrypt); - } - -+/* -+ * qcow2_co_decrypt() -+ * -+ * Decrypts one or more contiguous aligned sectors -+ * Similar to qcow2_co_encrypt -+ */ - int coroutine_fn --qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset, -- uint64_t offset, void *buf, size_t len) -+qcow2_co_decrypt(BlockDriverState *bs, uint64_t host_offset, -+ uint64_t guest_offset, void *buf, size_t len) - { -- return qcow2_co_encdec(bs, file_cluster_offset, offset, buf, len, -- qcrypto_block_decrypt); -+ return qcow2_co_encdec(bs, host_offset, guest_offset, buf, len, -+ qcrypto_block_decrypt); - } -diff --git a/block/qcow2.c b/block/qcow2.c -index dc4302f..d4c4f24 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -2073,7 +2073,8 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, - assert(s->crypto); - assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); - assert(QEMU_IS_ALIGNED(cur_bytes, BDRV_SECTOR_SIZE)); -- if (qcow2_co_decrypt(bs, cluster_offset, offset, -+ if (qcow2_co_decrypt(bs, cluster_offset + offset_in_cluster, -+ offset, - cluster_data, cur_bytes) < 0) { - ret = -EIO; - goto fail; -@@ -2288,7 +2289,7 @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, - QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); - qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); - -- if (qcow2_co_encrypt(bs, cluster_offset, offset, -+ if (qcow2_co_encrypt(bs, cluster_offset + offset_in_cluster, offset, - cluster_data, cur_bytes) < 0) { - ret = -EIO; - goto out_unlocked; -diff --git a/block/qcow2.h b/block/qcow2.h -index fc1b0d3..b54e734 100644 ---- a/block/qcow2.h -+++ b/block/qcow2.h -@@ -757,10 +757,10 @@ ssize_t coroutine_fn - qcow2_co_decompress(BlockDriverState *bs, void *dest, size_t dest_size, - const void *src, size_t src_size); - int coroutine_fn --qcow2_co_encrypt(BlockDriverState *bs, uint64_t file_cluster_offset, -- uint64_t offset, void *buf, size_t len); -+qcow2_co_encrypt(BlockDriverState *bs, uint64_t host_offset, -+ uint64_t guest_offset, void *buf, size_t len); - int coroutine_fn --qcow2_co_decrypt(BlockDriverState *bs, uint64_t file_cluster_offset, -- uint64_t offset, void *buf, size_t len); -+qcow2_co_decrypt(BlockDriverState *bs, uint64_t host_offset, -+ uint64_t guest_offset, void *buf, size_t len); - - #endif --- -1.8.3.1 - diff --git a/kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch b/kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch deleted file mode 100644 index cf8117f..0000000 --- a/kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 107ad619739795199df98c56d0ad4db14fec3722 Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Fri, 27 Sep 2019 20:18:44 +0100 -Subject: [PATCH 1/6] blockdev: reduce aio_context locked sections in bitmap - add/remove - -RH-Author: John Snow -Message-id: <20190927201846.6823-2-jsnow@redhat.com> -Patchwork-id: 90908 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/3] blockdev: reduce aio_context locked sections in bitmap add/remove -Bugzilla: 1756413 -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -From: Vladimir Sementsov-Ogievskiy - -Commit 0a6c86d024c52 returned these locks back to add/remove -functionality, to protect from intersection of persistent bitmap -related IO with other IO. But other bitmap-related functions called -here are unrelated to the problem, and there are no needs to keep these -calls inside critical sections. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: John Snow -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190708220502.12977-2-jsnow@redhat.com -Signed-off-by: John Snow -(cherry picked from commit 2899f41eef2806cf8eb119811c9d6fcf15ce80f6) -Signed-off-by: John Snow -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 30 +++++++++++++----------------- - 1 file changed, 13 insertions(+), 17 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 4d141e9..0124825 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2811,7 +2811,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -- AioContext *aio_context = NULL; - - if (!name || name[0] == '\0') { - error_setg(errp, "Bitmap name cannot be empty"); -@@ -2847,16 +2846,20 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - } - - if (persistent) { -- aio_context = bdrv_get_aio_context(bs); -+ AioContext *aio_context = bdrv_get_aio_context(bs); -+ bool ok; -+ - aio_context_acquire(aio_context); -- if (!bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) { -- goto out; -+ ok = bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp); -+ aio_context_release(aio_context); -+ if (!ok) { -+ return; - } - } - - bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); - if (bitmap == NULL) { -- goto out; -+ return; - } - - if (disabled) { -@@ -2864,10 +2867,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - } - - bdrv_dirty_bitmap_set_persistence(bitmap, persistent); -- out: -- if (aio_context) { -- aio_context_release(aio_context); -- } - } - - void qmp_block_dirty_bitmap_remove(const char *node, const char *name, -@@ -2875,8 +2874,6 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name, - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -- Error *local_err = NULL; -- AioContext *aio_context = NULL; - - bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); - if (!bitmap || !bs) { -@@ -2889,20 +2886,19 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name, - } - - if (bdrv_dirty_bitmap_get_persistence(bitmap)) { -- aio_context = bdrv_get_aio_context(bs); -+ AioContext *aio_context = bdrv_get_aio_context(bs); -+ Error *local_err = NULL; -+ - aio_context_acquire(aio_context); - bdrv_remove_persistent_dirty_bitmap(bs, name, &local_err); -+ aio_context_release(aio_context); - if (local_err != NULL) { - error_propagate(errp, local_err); -- goto out; -+ return; - } - } - - bdrv_release_dirty_bitmap(bs, bitmap); -- out: -- if (aio_context) { -- aio_context_release(aio_context); -- } - } - - /** --- -1.8.3.1 - diff --git a/kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch b/kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch deleted file mode 100644 index 5e0a568..0000000 --- a/kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 49dd008d58d7527a98981d96106949b2913fb4d9 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Wed, 18 Sep 2019 11:34:14 +0100 -Subject: [PATCH 1/4] blockjob: update nodes head while removing all bdrv - -RH-Author: Sergio Lopez Pascual -Message-id: <20190918113414.24522-2-slp@redhat.com> -Patchwork-id: 90748 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] blockjob: update nodes head while removing all bdrv -Bugzilla: 1746631 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf - -block_job_remove_all_bdrv() iterates through job->nodes, calling -bdrv_root_unref_child() for each entry. The call to the latter may -reach child_job_[can_]set_aio_ctx(), which will also attempt to -traverse job->nodes, potentially finding entries that where freed -on previous iterations. - -To avoid this situation, update job->nodes head on each iteration to -ensure that already freed entries are no longer linked to the list. - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1746631 -Signed-off-by: Sergio Lopez -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-id: 20190911100316.32282-1-mreitz@redhat.com -Reviewed-by: Sergio Lopez -Signed-off-by: Max Reitz -(cherry picked from commit d876bf676f5e7c6aa9ac64555e48cba8734ecb2f) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockjob.c | 17 +++++++++++++---- - 1 file changed, 13 insertions(+), 4 deletions(-) - -diff --git a/blockjob.c b/blockjob.c -index 20b7f55..74abb97 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -186,14 +186,23 @@ static const BdrvChildRole child_job = { - - void block_job_remove_all_bdrv(BlockJob *job) - { -- GSList *l; -- for (l = job->nodes; l; l = l->next) { -+ /* -+ * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(), -+ * which will also traverse job->nodes, so consume the list one by -+ * one to make sure that such a concurrent access does not attempt -+ * to process an already freed BdrvChild. -+ */ -+ while (job->nodes) { -+ GSList *l = job->nodes; - BdrvChild *c = l->data; -+ -+ job->nodes = l->next; -+ - bdrv_op_unblock_all(c->bs, job->blocker); - bdrv_root_unref_child(c); -+ -+ g_slist_free_1(l); - } -- g_slist_free(job->nodes); -- job->nodes = NULL; - } - - bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) --- -1.8.3.1 - diff --git a/kvm-ehci-fix-queue-dev-null-ptr-dereference.patch b/kvm-ehci-fix-queue-dev-null-ptr-dereference.patch deleted file mode 100644 index 9adeaeb..0000000 --- a/kvm-ehci-fix-queue-dev-null-ptr-dereference.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 0b38614471dbc44b87a1d2449e602df50c3ff535 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 5 Sep 2019 08:50:37 +0100 -Subject: [PATCH 2/6] ehci: fix queue->dev null ptr dereference -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190905085037.5648-2-dgilbert@redhat.com> -Patchwork-id: 90288 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/1] ehci: fix queue->dev null ptr dereference -Bugzilla: 1746790 -RH-Acked-by: Peter Xu -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Juan Quintela - -From: Gerd Hoffmann - -In case we don't have a device for an active queue, just skip -processing the queue (same we do for inactive queues) and log -a guest bug. - -Reported-by: Guenter Roeck -Signed-off-by: Gerd Hoffmann -Tested-by: Guenter Roeck -Message-id: 20190821085319.13711-1-kraxel@redhat.com -(cherry picked from commit 1be344b7ad25d572dadeee46d80f0103354352b2) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/hcd-ehci.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c -index 62dab05..5f089f3 100644 ---- a/hw/usb/hcd-ehci.c -+++ b/hw/usb/hcd-ehci.c -@@ -1834,6 +1834,9 @@ static int ehci_state_fetchqtd(EHCIQueue *q) - ehci_set_state(q->ehci, q->async, EST_EXECUTING); - break; - } -+ } else if (q->dev == NULL) { -+ ehci_trace_guest_bug(q->ehci, "no device attached to queue"); -+ ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH); - } else { - p = ehci_alloc_packet(q); - p->qtdaddr = q->qtdaddr; --- -1.8.3.1 - diff --git a/kvm-file-posix-Handle-undetectable-alignment.patch b/kvm-file-posix-Handle-undetectable-alignment.patch deleted file mode 100644 index f5f883b..0000000 --- a/kvm-file-posix-Handle-undetectable-alignment.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 790cebc2a1d8de8d93b2a2a0ef19e31c767f4f1c Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 9 Sep 2019 07:38:20 +0100 -Subject: [PATCH 4/6] file-posix: Handle undetectable alignment - -RH-Author: Max Reitz -Message-id: <20190909073822.26191-2-mreitz@redhat.com> -Patchwork-id: 90332 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/3] file-posix: Handle undetectable alignment -Bugzilla: 1749134 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth -RH-Acked-by: Kevin Wolf - -From: Nir Soffer - -In some cases buf_align or request_alignment cannot be detected: - -1. With Gluster, buf_align cannot be detected since the actual I/O is - done on Gluster server, and qemu buffer alignment does not matter. - Since we don't have alignment requirement, buf_align=1 is the best - value. - -2. With local XFS filesystem, buf_align cannot be detected if reading - from unallocated area. In this we must align the buffer, but we don't - know what is the correct size. Using the wrong alignment results in - I/O error. - -3. With Gluster backed by XFS, request_alignment cannot be detected if - reading from unallocated area. In this case we need to use the - correct alignment, and failing to do so results in I/O errors. - -4. With NFS, the server does not use direct I/O, so both buf_align cannot - be detected. In this case we don't need any alignment so we can use - buf_align=1 and request_alignment=1. - -These cases seems to work when storage sector size is 512 bytes, because -the current code starts checking align=512. If the check succeeds -because alignment cannot be detected we use 512. But this does not work -for storage with 4k sector size. - -To determine if we can detect the alignment, we probe first with -align=1. If probing succeeds, maybe there are no alignment requirement -(cases 1, 4) or we are probing unallocated area (cases 2, 3). Since we -don't have any way to tell, we treat this as undetectable alignment. If -probing with align=1 fails with EINVAL, but probing with one of the -expected alignments succeeds, we know that we found a working alignment. - -Practically the alignment requirements are the same for buffer -alignment, buffer length, and offset in file. So in case we cannot -detect buf_align, we can use request alignment. If we cannot detect -request alignment, we can fallback to a safe value. To use this logic, -we probe first request alignment instead of buf_align. - -Here is a table showing the behaviour with current code (the value in -parenthesis is the optimal value). - -Case Sector buf_align (opt) request_alignment (opt) result - -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 36 +++++++++++++++++++++++++----------- - 1 file changed, 25 insertions(+), 11 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 4479cc7..b8b4dad 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -323,6 +323,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) - BDRVRawState *s = bs->opaque; - char *buf; - size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); -+ size_t alignments[] = {1, 512, 1024, 2048, 4096}; - - /* For SCSI generic devices the alignment is not really used. - With buffered I/O, we don't have any restrictions. */ -@@ -349,25 +350,38 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) - } - #endif - -- /* If we could not get the sizes so far, we can only guess them */ -- if (!s->buf_align) { -+ /* -+ * If we could not get the sizes so far, we can only guess them. First try -+ * to detect request alignment, since it is more likely to succeed. Then -+ * try to detect buf_align, which cannot be detected in some cases (e.g. -+ * Gluster). If buf_align cannot be detected, we fallback to the value of -+ * request_alignment. -+ */ -+ -+ if (!bs->bl.request_alignment) { -+ int i; - size_t align; -- buf = qemu_memalign(max_align, 2 * max_align); -- for (align = 512; align <= max_align; align <<= 1) { -- if (raw_is_io_aligned(fd, buf + align, max_align)) { -- s->buf_align = align; -+ buf = qemu_memalign(max_align, max_align); -+ for (i = 0; i < ARRAY_SIZE(alignments); i++) { -+ align = alignments[i]; -+ if (raw_is_io_aligned(fd, buf, align)) { -+ /* Fallback to safe value. */ -+ bs->bl.request_alignment = (align != 1) ? align : max_align; - break; - } - } - qemu_vfree(buf); - } - -- if (!bs->bl.request_alignment) { -+ if (!s->buf_align) { -+ int i; - size_t align; -- buf = qemu_memalign(s->buf_align, max_align); -- for (align = 512; align <= max_align; align <<= 1) { -- if (raw_is_io_aligned(fd, buf, align)) { -- bs->bl.request_alignment = align; -+ buf = qemu_memalign(max_align, 2 * max_align); -+ for (i = 0; i < ARRAY_SIZE(alignments); i++) { -+ align = alignments[i]; -+ if (raw_is_io_aligned(fd, buf + align, max_align)) { -+ /* Fallback to request_aligment. */ -+ s->buf_align = (align != 1) ? align : bs->bl.request_alignment; - break; - } - } --- -1.8.3.1 - diff --git a/kvm-hostmem-file-fix-pmem-file-size-check.patch b/kvm-hostmem-file-fix-pmem-file-size-check.patch deleted file mode 100644 index 2fc5e43..0000000 --- a/kvm-hostmem-file-fix-pmem-file-size-check.patch +++ /dev/null @@ -1,70 +0,0 @@ -From c7c95a2f14d94eb8213ce7cab03acdef40fed093 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 18 Sep 2019 15:10:06 +0100 -Subject: [PATCH 2/4] hostmem-file: fix pmem file size check -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -Message-id: <20190918151007.27973-2-stefanha@redhat.com> -Patchwork-id: 90763 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] hostmem-file: fix pmem file size check -Bugzilla: 1724008 1736788 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -Commit 314aec4a6e06844937f1677f6cba21981005f389 ("hostmem-file: reject -invalid pmem file sizes") added a file size check that verifies the -hostmem object's size parameter against the actual devdax pmem file. -This is useful because getting the size wrong results in confusing -errors inside the guest. - -However, the code doesn't work properly for files where struct -stat::st_size is zero. Hostmem-file's ->alloc() function returns early -without setting an Error, causing the following assertion failure: - - qemu/memory.c:2215: memory_region_get_ram_ptr: Assertion `mr->ram_block' failed. - -This patch handles the case where qemu_get_pmem_size() returns 0 but -there is no error. - -Fixes: 314aec4a6e06844937f1677f6cba21981005f389 -Signed-off-by: Stefan Hajnoczi -Message-Id: <20190823135632.25010-1-stefanha@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 7faae95ebc966c2981b78cf7c25009dfa32d4b72) - -Note that this commit will be effectively reverted by the next one but I -backported it to avoid conflicts. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - backends/hostmem-file.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c -index 29e55c9..ecc15e3 100644 ---- a/backends/hostmem-file.c -+++ b/backends/hostmem-file.c -@@ -67,12 +67,12 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) - uint64_t size; - - size = qemu_get_pmem_size(fb->mem_path, &local_err); -- if (!size) { -+ if (local_err) { - error_propagate(errp, local_err); - return; - } - -- if (backend->size > size) { -+ if (size && backend->size > size) { - error_setg(errp, "size property %" PRIu64 " is larger than " - "pmem file \"%s\" size %" PRIu64, backend->size, - fb->mem_path, size); --- -1.8.3.1 - diff --git a/kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch b/kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch deleted file mode 100644 index 75c738d..0000000 --- a/kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch +++ /dev/null @@ -1,109 +0,0 @@ -From b261b31812a3e89a9104fb33bb2339b1742ac494 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 9 Sep 2019 07:38:22 +0100 -Subject: [PATCH 6/6] iotests: Test allocate_first_block() with O_DIRECT - -RH-Author: Max Reitz -Message-id: <20190909073822.26191-4-mreitz@redhat.com> -Patchwork-id: 90334 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 3/3] iotests: Test allocate_first_block() with O_DIRECT -Bugzilla: 1749134 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth -RH-Acked-by: Kevin Wolf - -From: Nir Soffer - -Using block_resize we can test allocate_first_block() with file -descriptor opened with O_DIRECT, ensuring that it works for any size -larger than 4096 bytes. - -Testing smaller sizes is tricky as the result depends on the filesystem -used for testing. For example on NFS any size will work since O_DIRECT -does not require any alignment. - -Signed-off-by: Nir Soffer -Reviewed-by: Max Reitz -Message-id: 20190827010528.8818-3-nsoffer@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 7e3dc2ba9a11862d4e1a08325b9165f27a1b1e7c) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/175 | 28 ++++++++++++++++++++++++++++ - tests/qemu-iotests/175.out | 8 ++++++++ - 2 files changed, 36 insertions(+) - -diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 -index 7ba28b3..55db280 100755 ---- a/tests/qemu-iotests/175 -+++ b/tests/qemu-iotests/175 -@@ -49,6 +49,23 @@ _filter_blocks() - -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/max allocation/" - } - -+# Resize image using block_resize. -+# Parameter 1: image path -+# Parameter 2: new size -+_block_resize() -+{ -+ local path=$1 -+ local size=$2 -+ -+ $QEMU -qmp stdio -nographic -nodefaults \ -+ -blockdev file,node-name=file,filename=$path,cache.direct=on \ -+ </dev/null -+ stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $new_size -+done -+ - # success, all done - echo "*** done" - rm -f $seq.full -diff --git a/tests/qemu-iotests/175.out b/tests/qemu-iotests/175.out -index 263e521..39c2ee0 100644 ---- a/tests/qemu-iotests/175.out -+++ b/tests/qemu-iotests/175.out -@@ -15,4 +15,12 @@ size=1048576, max allocation - == creating image with preallocation falloc == - Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=falloc - size=1048576, max allocation -+ -+== resize empty image with block_resize == -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 -+size=4096, min allocation -+ -+== resize empty image with block_resize == -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 -+size=1048576, min allocation - *** done --- -1.8.3.1 - diff --git a/kvm-iotests-test-bitmap-moving-inside-254.patch b/kvm-iotests-test-bitmap-moving-inside-254.patch deleted file mode 100644 index 064f7d5..0000000 --- a/kvm-iotests-test-bitmap-moving-inside-254.patch +++ /dev/null @@ -1,209 +0,0 @@ -From b15fa18e724e356bd889f0566d512daedb9a09dc Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Fri, 27 Sep 2019 20:18:46 +0100 -Subject: [PATCH 3/6] iotests: test bitmap moving inside 254 - -RH-Author: John Snow -Message-id: <20190927201846.6823-4-jsnow@redhat.com> -Patchwork-id: 90910 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 3/3] iotests: test bitmap moving inside 254 -Bugzilla: 1756413 -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -From: Vladimir Sementsov-Ogievskiy - -Test persistent bitmap copying with and without removal of original -bitmap. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190708220502.12977-4-jsnow@redhat.com -[Edited comment "bitmap1" --> "bitmap2" as per review. --js] -Signed-off-by: John Snow -(cherry picked from commit 3f7b2fa8cd476fe871ce1d996c640317730752a0) -Signed-off-by: John Snow - -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/254 | 30 +++++++++++++++-- - tests/qemu-iotests/254.out | 82 ++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 110 insertions(+), 2 deletions(-) - -diff --git a/tests/qemu-iotests/254 b/tests/qemu-iotests/254 -index 8edba91..09584f3 100755 ---- a/tests/qemu-iotests/254 -+++ b/tests/qemu-iotests/254 -@@ -1,6 +1,6 @@ - #!/usr/bin/env python - # --# Test external snapshot with bitmap copying. -+# Test external snapshot with bitmap copying and moving. - # - # Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved. - # -@@ -32,6 +32,10 @@ vm = iotests.VM().add_drive(disk, opts='node-name=base') - vm.launch() - - vm.qmp_log('block-dirty-bitmap-add', node='drive0', name='bitmap0') -+vm.qmp_log('block-dirty-bitmap-add', node='drive0', name='bitmap1', -+ persistent=True) -+vm.qmp_log('block-dirty-bitmap-add', node='drive0', name='bitmap2', -+ persistent=True) - - vm.hmp_qemu_io('drive0', 'write 0 512K') - -@@ -39,16 +43,38 @@ vm.qmp_log('transaction', indent=2, actions=[ - {'type': 'blockdev-snapshot-sync', - 'data': {'device': 'drive0', 'snapshot-file': top, - 'snapshot-node-name': 'snap'}}, -+ -+ # copy non-persistent bitmap0 - {'type': 'block-dirty-bitmap-add', - 'data': {'node': 'snap', 'name': 'bitmap0'}}, - {'type': 'block-dirty-bitmap-merge', - 'data': {'node': 'snap', 'target': 'bitmap0', -- 'bitmaps': [{'node': 'base', 'name': 'bitmap0'}]}} -+ 'bitmaps': [{'node': 'base', 'name': 'bitmap0'}]}}, -+ -+ # copy persistent bitmap1, original will be saved to base image -+ {'type': 'block-dirty-bitmap-add', -+ 'data': {'node': 'snap', 'name': 'bitmap1', 'persistent': True}}, -+ {'type': 'block-dirty-bitmap-merge', -+ 'data': {'node': 'snap', 'target': 'bitmap1', -+ 'bitmaps': [{'node': 'base', 'name': 'bitmap1'}]}}, -+ -+ # move persistent bitmap2, original will be removed and not saved -+ # to base image -+ {'type': 'block-dirty-bitmap-add', -+ 'data': {'node': 'snap', 'name': 'bitmap2', 'persistent': True}}, -+ {'type': 'block-dirty-bitmap-merge', -+ 'data': {'node': 'snap', 'target': 'bitmap2', -+ 'bitmaps': [{'node': 'base', 'name': 'bitmap2'}]}}, -+ {'type': 'block-dirty-bitmap-remove', -+ 'data': {'node': 'base', 'name': 'bitmap2'}} - ], filters=[iotests.filter_qmp_testfiles]) - - result = vm.qmp('query-block')['return'][0] - log("query-block: device = {}, node-name = {}, dirty-bitmaps:".format( - result['device'], result['inserted']['node-name'])) - log(result['dirty-bitmaps'], indent=2) -+log("\nbitmaps in backing image:") -+log(result['inserted']['image']['backing-image']['format-specific'] \ -+ ['data']['bitmaps'], indent=2) - - vm.shutdown() -diff --git a/tests/qemu-iotests/254.out b/tests/qemu-iotests/254.out -index d7394cf..d185c05 100644 ---- a/tests/qemu-iotests/254.out -+++ b/tests/qemu-iotests/254.out -@@ -1,5 +1,9 @@ - {"execute": "block-dirty-bitmap-add", "arguments": {"name": "bitmap0", "node": "drive0"}} - {"return": {}} -+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "bitmap1", "node": "drive0", "persistent": true}} -+{"return": {}} -+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "bitmap2", "node": "drive0", "persistent": true}} -+{"return": {}} - { - "execute": "transaction", - "arguments": { -@@ -31,6 +35,55 @@ - "target": "bitmap0" - }, - "type": "block-dirty-bitmap-merge" -+ }, -+ { -+ "data": { -+ "name": "bitmap1", -+ "node": "snap", -+ "persistent": true -+ }, -+ "type": "block-dirty-bitmap-add" -+ }, -+ { -+ "data": { -+ "bitmaps": [ -+ { -+ "name": "bitmap1", -+ "node": "base" -+ } -+ ], -+ "node": "snap", -+ "target": "bitmap1" -+ }, -+ "type": "block-dirty-bitmap-merge" -+ }, -+ { -+ "data": { -+ "name": "bitmap2", -+ "node": "snap", -+ "persistent": true -+ }, -+ "type": "block-dirty-bitmap-add" -+ }, -+ { -+ "data": { -+ "bitmaps": [ -+ { -+ "name": "bitmap2", -+ "node": "base" -+ } -+ ], -+ "node": "snap", -+ "target": "bitmap2" -+ }, -+ "type": "block-dirty-bitmap-merge" -+ }, -+ { -+ "data": { -+ "name": "bitmap2", -+ "node": "base" -+ }, -+ "type": "block-dirty-bitmap-remove" - } - ] - } -@@ -44,9 +97,38 @@ query-block: device = drive0, node-name = snap, dirty-bitmaps: - "busy": false, - "count": 524288, - "granularity": 65536, -+ "name": "bitmap2", -+ "persistent": true, -+ "recording": true, -+ "status": "active" -+ }, -+ { -+ "busy": false, -+ "count": 524288, -+ "granularity": 65536, -+ "name": "bitmap1", -+ "persistent": true, -+ "recording": true, -+ "status": "active" -+ }, -+ { -+ "busy": false, -+ "count": 524288, -+ "granularity": 65536, - "name": "bitmap0", - "persistent": false, - "recording": true, - "status": "active" - } - ] -+ -+bitmaps in backing image: -+[ -+ { -+ "flags": [ -+ "auto" -+ ], -+ "granularity": 65536, -+ "name": "bitmap1" -+ } -+] --- -1.8.3.1 - diff --git a/kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch b/kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch deleted file mode 100644 index b6d3807..0000000 --- a/kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 9f2bfaaa73a8fb2790ebc4fb16f5d3a629b39898 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 31 Jul 2019 15:08:14 +0100 -Subject: [PATCH 5/6] machine types: Update hw_compat_rhel_8_0 from - hw_compat_4_0 - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190731150814.29571-6-dgilbert@redhat.com> -Patchwork-id: 89817 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 5/5] machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 -Bugzilla: 1719649 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Markus Armbruster -RH-Acked-by: Cornelia Huck - -From: "Dr. David Alan Gilbert" - -Pull in the 4_0 compat entries into hw_compat_rhel_8_0 - -We don't need pl031's migrate-tick-offset because it's an ARM only -device and we don't have compatible types for Aarch yet. - -We don't have to copy virtio-balloon-device.qemu-4-0-config-size=false from -hw_compat_3_1[], since it immediately got reverted in hw_compat_4_0[]. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index f30afe0..e2f812a 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -54,6 +54,20 @@ GlobalProperty hw_compat_rhel_8_0[] = { - { "virtio-blk-device", "discard", "false" }, - /* hw_compat_rhel_8_0 from hw_compat_3_1 */ - { "virtio-blk-device", "write-zeroes", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "VGA", "edid", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "secondary-vga", "edid", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "bochs-display", "edid", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "virtio-vga", "edid", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "virtio-gpu-pci", "edid", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "virtio-device", "use-started", "false" }, -+ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ -+ { "pcie-root-port-base", "disable-acs", "true" }, - }; - const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); - --- -1.8.3.1 - diff --git a/kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch b/kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch deleted file mode 100644 index 021125e..0000000 --- a/kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch +++ /dev/null @@ -1,118 +0,0 @@ -From b9b48ed46d2b0a3dd6e8406946eb0516ec75a004 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 23 Aug 2019 06:14:31 +0100 -Subject: [PATCH 4/5] memory: Fix up memory_region_{add|del}_coalescing -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -Message-id: <20190823061431.31759-5-peterx@redhat.com> -Patchwork-id: 90136 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 4/4] memory: Fix up memory_region_{add|del}_coalescing -Bugzilla: 1743142 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Auger Eric -RH-Acked-by: Philippe Mathieu-Daudé - -The old memory_region_{add|clear}_coalescing() has some defects -because they both changed mr->coalesced before updating the regions -using memory_region_update_coalesced_range_as(). Then when the -regions were updated in memory_region_update_coalesced_range_as() the -mr->coalesced will always be either one more or one less. So: - -- For memory_region_add_coalescing: it'll always trying to remove the - newly added coalesced region while it shouldn't, and, - -- For memory_region_clear_coalescing: when it calls the update there - will be no coalesced ranges on mr->coalesced because they were all - removed before hand so the update will probably do nothing for real. - -Let's fix this. Now we've got flat_range_coalesced_io_notify() to -notify a single CoalescedMemoryRange instance change, so use it in the -existing memory_region_update_coalesced_range() logic by only notify -either an addition or deletion. Then we hammer both the -memory_region_{add|clear}_coalescing() to use it. - -Fixes: 3ac7d43a6fbb5d4a3 -Signed-off-by: Peter Xu -Message-Id: <20190820141328.10009-5-peterx@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit b960fc1796fb078c21121abf01499603b66b3f57) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - memory.c | 36 +++++++++++++++++------------------- - 1 file changed, 17 insertions(+), 19 deletions(-) - -diff --git a/memory.c b/memory.c -index c7cd43f..2f15180 100644 ---- a/memory.c -+++ b/memory.c -@@ -2238,27 +2238,26 @@ void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp - qemu_ram_resize(mr->ram_block, newsize, errp); - } - --static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpace *as) -+/* -+ * Call proper memory listeners about the change on the newly -+ * added/removed CoalescedMemoryRange. -+ */ -+static void memory_region_update_coalesced_range(MemoryRegion *mr, -+ CoalescedMemoryRange *cmr, -+ bool add) - { -+ AddressSpace *as; - FlatView *view; - FlatRange *fr; - -- view = address_space_get_flatview(as); -- FOR_EACH_FLAT_RANGE(fr, view) { -- if (fr->mr == mr) { -- flat_range_coalesced_io_del(fr, as); -- flat_range_coalesced_io_add(fr, as); -- } -- } -- flatview_unref(view); --} -- --static void memory_region_update_coalesced_range(MemoryRegion *mr) --{ -- AddressSpace *as; -- - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { -- memory_region_update_coalesced_range_as(mr, as); -+ view = address_space_get_flatview(as); -+ FOR_EACH_FLAT_RANGE(fr, view) { -+ if (fr->mr == mr) { -+ flat_range_coalesced_io_notify(fr, as, cmr, add); -+ } -+ } -+ flatview_unref(view); - } - } - -@@ -2276,7 +2275,7 @@ void memory_region_add_coalescing(MemoryRegion *mr, - - cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size)); - QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link); -- memory_region_update_coalesced_range(mr); -+ memory_region_update_coalesced_range(mr, cmr, true); - memory_region_set_flush_coalesced(mr); - } - -@@ -2294,10 +2293,9 @@ void memory_region_clear_coalescing(MemoryRegion *mr) - while (!QTAILQ_EMPTY(&mr->coalesced)) { - cmr = QTAILQ_FIRST(&mr->coalesced); - QTAILQ_REMOVE(&mr->coalesced, cmr, link); -+ memory_region_update_coalesced_range(mr, cmr, false); - g_free(cmr); - } -- -- memory_region_update_coalesced_range(mr); - } - - void memory_region_set_flush_coalesced(MemoryRegion *mr) --- -1.8.3.1 - diff --git a/kvm-memory-Refactor-memory_region_clear_coalescing.patch b/kvm-memory-Refactor-memory_region_clear_coalescing.patch deleted file mode 100644 index 6ea7ce5..0000000 --- a/kvm-memory-Refactor-memory_region_clear_coalescing.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 134ab69ffdfb7e45a0be385595036d0427928306 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 23 Aug 2019 06:14:28 +0100 -Subject: [PATCH 1/5] memory: Refactor memory_region_clear_coalescing -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -Message-id: <20190823061431.31759-2-peterx@redhat.com> -Patchwork-id: 90134 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/4] memory: Refactor memory_region_clear_coalescing -Bugzilla: 1743142 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Auger Eric -RH-Acked-by: Philippe Mathieu-Daudé - -Removing the update variable and quit earlier if the memory region has -no coalesced range. This prepares for the next patch. - -Fixes: 3ac7d43a6fbb5d4a3 -Signed-off-by: Peter Xu -Message-Id: <20190820141328.10009-4-peterx@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9c1aa1c235c770d84462d482460a96e957e95b9c) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - memory.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/memory.c b/memory.c -index 5d8c9a9..9f40742 100644 ---- a/memory.c -+++ b/memory.c -@@ -2276,7 +2276,10 @@ void memory_region_add_coalescing(MemoryRegion *mr, - void memory_region_clear_coalescing(MemoryRegion *mr) - { - CoalescedMemoryRange *cmr; -- bool updated = false; -+ -+ if (QTAILQ_EMPTY(&mr->coalesced)) { -+ return; -+ } - - qemu_flush_coalesced_mmio_buffer(); - mr->flush_coalesced_mmio = false; -@@ -2285,12 +2288,9 @@ void memory_region_clear_coalescing(MemoryRegion *mr) - cmr = QTAILQ_FIRST(&mr->coalesced); - QTAILQ_REMOVE(&mr->coalesced, cmr, link); - g_free(cmr); -- updated = true; - } - -- if (updated) { -- memory_region_update_coalesced_range(mr); -- } -+ memory_region_update_coalesced_range(mr); - } - - void memory_region_set_flush_coalesced(MemoryRegion *mr) --- -1.8.3.1 - diff --git a/kvm-memory-Remove-has_coalesced_range-counter.patch b/kvm-memory-Remove-has_coalesced_range-counter.patch deleted file mode 100644 index d98baa2..0000000 --- a/kvm-memory-Remove-has_coalesced_range-counter.patch +++ /dev/null @@ -1,96 +0,0 @@ -From c1db31bce6d2e5f49e34a2e7282e50bea3f92278 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 23 Aug 2019 06:14:30 +0100 -Subject: [PATCH 3/5] memory: Remove has_coalesced_range counter -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -Message-id: <20190823061431.31759-4-peterx@redhat.com> -Patchwork-id: 90135 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 3/4] memory: Remove has_coalesced_range counter -Bugzilla: 1743142 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Auger Eric -RH-Acked-by: Philippe Mathieu-Daudé - -The has_coalesced_range could potentially be problematic in that it -only works for additions of coalesced mmio ranges but not deletions. -The reason is that has_coalesced_range information can be lost when -the FlatView updates the topology again when the updated region is not -covering the coalesced regions. When that happens, due to -flatrange_equal() is not checking against has_coalesced_range, the new -FlatRange will be seen as the same one as the old and the new -instance (whose has_coalesced_range will be zero) will replace the old -instance (whose has_coalesced_range _could_ be non-zero). - -The counter was originally used to make sure every FlatRange will only -notify once for coalesced_io_{add|del} memory listeners, because each -FlatRange can be used by multiple address spaces, so logically -speaking it could be called multiple times. However we should not -limit that, because memory listeners should will only be registered -with specific address space rather than multiple address spaces. - -So let's fix this up by simply removing the whole has_coalesced_range. - -Fixes: 3ac7d43a6fbb5d4a3 -Signed-off-by: Peter Xu -Message-Id: <20190820141328.10009-3-peterx@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 264ef5a5c52c249ff51a16d141fc03df71714a13) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - memory.c | 14 -------------- - 1 file changed, 14 deletions(-) - -diff --git a/memory.c b/memory.c -index 7b24cb8..c7cd43f 100644 ---- a/memory.c -+++ b/memory.c -@@ -217,7 +217,6 @@ struct FlatRange { - bool romd_mode; - bool readonly; - bool nonvolatile; -- int has_coalesced_range; - }; - - #define FOR_EACH_FLAT_RANGE(var, view) \ -@@ -654,7 +653,6 @@ static void render_memory_region(FlatView *view, - fr.romd_mode = mr->romd_mode; - fr.readonly = readonly; - fr.nonvolatile = nonvolatile; -- fr.has_coalesced_range = 0; - - /* Render the region itself into any gaps left by the current view. */ - for (i = 0; i < view->nr && int128_nz(remain); ++i) { -@@ -888,14 +886,6 @@ static void flat_range_coalesced_io_del(FlatRange *fr, AddressSpace *as) - { - CoalescedMemoryRange *cmr; - -- if (!fr->has_coalesced_range) { -- return; -- } -- -- if (--fr->has_coalesced_range > 0) { -- return; -- } -- - QTAILQ_FOREACH(cmr, &fr->mr->coalesced, link) { - flat_range_coalesced_io_notify(fr, as, cmr, false); - } -@@ -910,10 +900,6 @@ static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as) - return; - } - -- if (fr->has_coalesced_range++) { -- return; -- } -- - QTAILQ_FOREACH(cmr, &mr->coalesced, link) { - flat_range_coalesced_io_notify(fr, as, cmr, true); - } --- -1.8.3.1 - diff --git a/kvm-memory-Split-zones-when-do-coalesced_io_del.patch b/kvm-memory-Split-zones-when-do-coalesced_io_del.patch deleted file mode 100644 index 498cd26..0000000 --- a/kvm-memory-Split-zones-when-do-coalesced_io_del.patch +++ /dev/null @@ -1,123 +0,0 @@ -From bdd5394047f7fbecac82d067b9e67db8a20c49d2 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 23 Aug 2019 06:14:29 +0100 -Subject: [PATCH 2/5] memory: Split zones when do coalesced_io_del() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -Message-id: <20190823061431.31759-3-peterx@redhat.com> -Patchwork-id: 90133 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 2/4] memory: Split zones when do coalesced_io_del() -Bugzilla: 1743142 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Auger Eric -RH-Acked-by: Philippe Mathieu-Daudé - -It is a workaround of current KVM's KVM_UNREGISTER_COALESCED_MMIO -interface. The kernel interface only allows to unregister an mmio -device with exactly the zone size when registered, or any smaller zone -that is included in the device mmio zone. It does not support the -userspace to specify a very large zone to remove all the small mmio -devices within the zone covered. - -Logically speaking it would be nicer to fix this from KVM side, though -in all cases we still need to coop with old kernels so let's do this. - -Fixes: 3ac7d43a6fbb5d4a3 -Signed-off-by: Peter Xu -Message-Id: <20190820141328.10009-2-peterx@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 23f1174aac4181f86bb7e13ca8bc2d4a0bdf1e5c) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - memory.c | 49 +++++++++++++++++++++++++++++++++++-------------- - 1 file changed, 35 insertions(+), 14 deletions(-) - -diff --git a/memory.c b/memory.c -index 9f40742..7b24cb8 100644 ---- a/memory.c -+++ b/memory.c -@@ -855,8 +855,39 @@ static void address_space_update_ioeventfds(AddressSpace *as) - flatview_unref(view); - } - -+/* -+ * Notify the memory listeners about the coalesced IO change events of -+ * range `cmr'. Only the part that has intersection of the specified -+ * FlatRange will be sent. -+ */ -+static void flat_range_coalesced_io_notify(FlatRange *fr, AddressSpace *as, -+ CoalescedMemoryRange *cmr, bool add) -+{ -+ AddrRange tmp; -+ -+ tmp = addrrange_shift(cmr->addr, -+ int128_sub(fr->addr.start, -+ int128_make64(fr->offset_in_region))); -+ if (!addrrange_intersects(tmp, fr->addr)) { -+ return; -+ } -+ tmp = addrrange_intersection(tmp, fr->addr); -+ -+ if (add) { -+ MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, coalesced_io_add, -+ int128_get64(tmp.start), -+ int128_get64(tmp.size)); -+ } else { -+ MEMORY_LISTENER_UPDATE_REGION(fr, as, Reverse, coalesced_io_del, -+ int128_get64(tmp.start), -+ int128_get64(tmp.size)); -+ } -+} -+ - static void flat_range_coalesced_io_del(FlatRange *fr, AddressSpace *as) - { -+ CoalescedMemoryRange *cmr; -+ - if (!fr->has_coalesced_range) { - return; - } -@@ -865,16 +896,15 @@ static void flat_range_coalesced_io_del(FlatRange *fr, AddressSpace *as) - return; - } - -- MEMORY_LISTENER_UPDATE_REGION(fr, as, Reverse, coalesced_io_del, -- int128_get64(fr->addr.start), -- int128_get64(fr->addr.size)); -+ QTAILQ_FOREACH(cmr, &fr->mr->coalesced, link) { -+ flat_range_coalesced_io_notify(fr, as, cmr, false); -+ } - } - - static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as) - { - MemoryRegion *mr = fr->mr; - CoalescedMemoryRange *cmr; -- AddrRange tmp; - - if (QTAILQ_EMPTY(&mr->coalesced)) { - return; -@@ -885,16 +915,7 @@ static void flat_range_coalesced_io_add(FlatRange *fr, AddressSpace *as) - } - - QTAILQ_FOREACH(cmr, &mr->coalesced, link) { -- tmp = addrrange_shift(cmr->addr, -- int128_sub(fr->addr.start, -- int128_make64(fr->offset_in_region))); -- if (!addrrange_intersects(tmp, fr->addr)) { -- continue; -- } -- tmp = addrrange_intersection(tmp, fr->addr); -- MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, coalesced_io_add, -- int128_get64(tmp.start), -- int128_get64(tmp.size)); -+ flat_range_coalesced_io_notify(fr, as, cmr, true); - } - } - --- -1.8.3.1 - diff --git a/kvm-memory-fetch-pmem-size-in-get_file_size.patch b/kvm-memory-fetch-pmem-size-in-get_file_size.patch deleted file mode 100644 index 80a2e15..0000000 --- a/kvm-memory-fetch-pmem-size-in-get_file_size.patch +++ /dev/null @@ -1,254 +0,0 @@ -From 9d6d365abaea5e068f060b8a70d5b8fab43a9f7f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 18 Sep 2019 15:10:07 +0100 -Subject: [PATCH 3/4] memory: fetch pmem size in get_file_size() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -Message-id: <20190918151007.27973-3-stefanha@redhat.com> -Patchwork-id: 90762 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] memory: fetch pmem size in get_file_size() -Bugzilla: 1724008 1736788 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Igor Mammedov -RH-Acked-by: Eduardo Habkost - -Neither stat(2) nor lseek(2) report the size of Linux devdax pmem -character device nodes. Commit 314aec4a6e06844937f1677f6cba21981005f389 -("hostmem-file: reject invalid pmem file sizes") added code to -hostmem-file.c to fetch the size from sysfs and compare against the -user-provided size=NUM parameter: - - if (backend->size > size) { - error_setg(errp, "size property %" PRIu64 " is larger than " - "pmem file \"%s\" size %" PRIu64, backend->size, - fb->mem_path, size); - return; - } - -It turns out that exec.c:qemu_ram_alloc_from_fd() already has an -equivalent size check but it skips devdax pmem character devices because -lseek(2) returns 0: - - if (file_size > 0 && file_size < size) { - error_setg(errp, "backing store %s size 0x%" PRIx64 - " does not match 'size' option 0x" RAM_ADDR_FMT, - mem_path, file_size, size); - return NULL; - } - -This patch moves the devdax pmem file size code into get_file_size() so -that we check the memory size in a single place: -qemu_ram_alloc_from_fd(). This simplifies the code and makes it more -general. - -This also fixes the problem that hostmem-file only checks the devdax -pmem file size when the pmem=on parameter is given. An unchecked -size=NUM parameter can lead to SIGBUS in QEMU so we must always fetch -the file size for Linux devdax pmem character device nodes. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20190830093056.12572-1-stefanha@redhat.com> -Reviewed-by: Eduardo Habkost -Signed-off-by: Paolo Bonzini -(cherry picked from commit 72d41eb4b8f923de91e8f06dc20aa86b0a9155fb) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - backends/hostmem-file.c | 22 -------------------- - exec.c | 34 ++++++++++++++++++++++++++++++- - include/qemu/osdep.h | 13 ------------ - util/oslib-posix.c | 54 ------------------------------------------------- - util/oslib-win32.c | 6 ------ - 5 files changed, 33 insertions(+), 96 deletions(-) - -diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c -index ecc15e3..be64020 100644 ---- a/backends/hostmem-file.c -+++ b/backends/hostmem-file.c -@@ -58,28 +58,6 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) - return; - } - -- /* -- * Verify pmem file size since starting a guest with an incorrect size -- * leads to confusing failures inside the guest. -- */ -- if (fb->is_pmem) { -- Error *local_err = NULL; -- uint64_t size; -- -- size = qemu_get_pmem_size(fb->mem_path, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- return; -- } -- -- if (size && backend->size > size) { -- error_setg(errp, "size property %" PRIu64 " is larger than " -- "pmem file \"%s\" size %" PRIu64, backend->size, -- fb->mem_path, size); -- return; -- } -- } -- - backend->force_prealloc = mem_prealloc; - name = host_memory_backend_get_name(backend); - memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), -diff --git a/exec.c b/exec.c -index 3e78de3..3c03edf 100644 ---- a/exec.c -+++ b/exec.c -@@ -1813,7 +1813,39 @@ long qemu_maxrampagesize(void) - #ifdef CONFIG_POSIX - static int64_t get_file_size(int fd) - { -- int64_t size = lseek(fd, 0, SEEK_END); -+ int64_t size; -+#if defined(__linux__) -+ struct stat st; -+ -+ if (fstat(fd, &st) < 0) { -+ return -errno; -+ } -+ -+ /* Special handling for devdax character devices */ -+ if (S_ISCHR(st.st_mode)) { -+ g_autofree char *subsystem_path = NULL; -+ g_autofree char *subsystem = NULL; -+ -+ subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", -+ major(st.st_rdev), minor(st.st_rdev)); -+ subsystem = g_file_read_link(subsystem_path, NULL); -+ -+ if (subsystem && g_str_has_suffix(subsystem, "/dax")) { -+ g_autofree char *size_path = NULL; -+ g_autofree char *size_str = NULL; -+ -+ size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", -+ major(st.st_rdev), minor(st.st_rdev)); -+ -+ if (g_file_get_contents(size_path, &size_str, NULL, NULL)) { -+ return g_ascii_strtoll(size_str, NULL, 0); -+ } -+ } -+ } -+#endif /* defined(__linux__) */ -+ -+ /* st.st_size may be zero for special files yet lseek(2) works */ -+ size = lseek(fd, 0, SEEK_END); - if (size < 0) { - return -errno; - } -diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h -index af2b91f..c7d242f 100644 ---- a/include/qemu/osdep.h -+++ b/include/qemu/osdep.h -@@ -571,19 +571,6 @@ void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus, - Error **errp); - - /** -- * qemu_get_pmem_size: -- * @filename: path to a pmem file -- * @errp: pointer to a NULL-initialized error object -- * -- * Determine the size of a persistent memory file. Besides supporting files on -- * DAX file systems, this function also supports Linux devdax character -- * devices. -- * -- * Returns: the size or 0 on failure -- */ --uint64_t qemu_get_pmem_size(const char *filename, Error **errp); -- --/** - * qemu_get_pid_name: - * @pid: pid of a process - * -diff --git a/util/oslib-posix.c b/util/oslib-posix.c -index fe0309c..d772da8 100644 ---- a/util/oslib-posix.c -+++ b/util/oslib-posix.c -@@ -513,60 +513,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, - } - } - --uint64_t qemu_get_pmem_size(const char *filename, Error **errp) --{ -- struct stat st; -- -- if (stat(filename, &st) < 0) { -- error_setg(errp, "unable to stat pmem file \"%s\"", filename); -- return 0; -- } -- --#if defined(__linux__) -- /* Special handling for devdax character devices */ -- if (S_ISCHR(st.st_mode)) { -- char *subsystem_path = NULL; -- char *subsystem = NULL; -- char *size_path = NULL; -- char *size_str = NULL; -- uint64_t ret = 0; -- -- subsystem_path = g_strdup_printf("/sys/dev/char/%d:%d/subsystem", -- major(st.st_rdev), minor(st.st_rdev)); -- subsystem = g_file_read_link(subsystem_path, NULL); -- if (!subsystem) { -- error_setg(errp, "unable to read subsystem for pmem file \"%s\"", -- filename); -- goto devdax_err; -- } -- -- if (!g_str_has_suffix(subsystem, "/dax")) { -- error_setg(errp, "pmem file \"%s\" is not a dax device", filename); -- goto devdax_err; -- } -- -- size_path = g_strdup_printf("/sys/dev/char/%d:%d/size", -- major(st.st_rdev), minor(st.st_rdev)); -- if (!g_file_get_contents(size_path, &size_str, NULL, NULL)) { -- error_setg(errp, "unable to read size for pmem file \"%s\"", -- size_path); -- goto devdax_err; -- } -- -- ret = g_ascii_strtoull(size_str, NULL, 0); -- --devdax_err: -- g_free(size_str); -- g_free(size_path); -- g_free(subsystem); -- g_free(subsystem_path); -- return ret; -- } --#endif /* defined(__linux__) */ -- -- return st.st_size; --} -- - char *qemu_get_pid_name(pid_t pid) - { - char *name = NULL; -diff --git a/util/oslib-win32.c b/util/oslib-win32.c -index 9583fb4..c62cd43 100644 ---- a/util/oslib-win32.c -+++ b/util/oslib-win32.c -@@ -562,12 +562,6 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus, - } - } - --uint64_t qemu_get_pmem_size(const char *filename, Error **errp) --{ -- error_setg(errp, "pmem support not available"); -- return 0; --} -- - char *qemu_get_pid_name(pid_t pid) - { - /* XXX Implement me */ --- -1.8.3.1 - diff --git a/kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch b/kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch deleted file mode 100644 index 604a4bc..0000000 --- a/kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 8b0a69f11052d271ef49db0051d79e7f1a6102be Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Mon, 2 Sep 2019 04:20:32 +0100 -Subject: [PATCH 1/6] migration: Do not re-read the clock on pre_save in case - of paused guest -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20190902042032.25987-1-dgibson@redhat.com> -Patchwork-id: 90226 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] migration: Do not re-read the clock on pre_save in case of paused guest -Bugzilla: 1747836 -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier - -From: "Maxiwell S. Garcia" - -Re-read the timebase before migrate was ported from x86 commit: - 6053a86fe7bd: kvmclock: reduce kvmclock difference on migration - -The clock move makes the guest knows about the paused time between -the stop and migrate commands. This is an issue in an already-paused -VM because some side effects, like process stalls, could happen -after migration. - -So, this patch checks the runstate of guest in the pre_save handler and -do not re-reads the timebase in case of paused state (cold migration). - -Signed-off-by: Maxiwell S. Garcia -Message-Id: <20190711194702.26598-1-maxiwell@linux.ibm.com> -Signed-off-by: David Gibson -(cherry picked from commit d14f33976282a8744ca1bf1d64e73996c145aa3f) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1747836 -Branch: rhel8/rhel-av-8.1.0/master-4.1.0 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23282250 -Testing: Booted guest with this qemu - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/ppc.c | 13 +++++++++---- - target/ppc/cpu-qom.h | 1 + - 2 files changed, 10 insertions(+), 4 deletions(-) - -diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c -index a9e508c..8572e45 100644 ---- a/hw/ppc/ppc.c -+++ b/hw/ppc/ppc.c -@@ -1008,6 +1008,8 @@ static void timebase_save(PPCTimebase *tb) - * there is no need to update it from KVM here - */ - tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset; -+ -+ tb->runstate_paused = runstate_check(RUN_STATE_PAUSED); - } - - static void timebase_load(PPCTimebase *tb) -@@ -1051,9 +1053,9 @@ void cpu_ppc_clock_vm_state_change(void *opaque, int running, - } - - /* -- * When migrating, read the clock just before migration, -- * so that the guest clock counts during the events -- * between: -+ * When migrating a running guest, read the clock just -+ * before migration, so that the guest clock counts -+ * during the events between: - * - * * vm_stop() - * * -@@ -1068,7 +1070,10 @@ static int timebase_pre_save(void *opaque) - { - PPCTimebase *tb = opaque; - -- timebase_save(tb); -+ /* guest_timebase won't be overridden in case of paused guest */ -+ if (!tb->runstate_paused) { -+ timebase_save(tb); -+ } - - return 0; - } -diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h -index be9b4c3..5fbcdee 100644 ---- a/target/ppc/cpu-qom.h -+++ b/target/ppc/cpu-qom.h -@@ -201,6 +201,7 @@ typedef struct PowerPCCPUClass { - typedef struct PPCTimebase { - uint64_t guest_timebase; - int64_t time_of_the_day_ns; -+ bool runstate_paused; - } PPCTimebase; - - extern const struct VMStateDescription vmstate_ppc_timebase; --- -1.8.3.1 - diff --git a/kvm-migration-add-qemu_file_update_transfer-interface.patch b/kvm-migration-add-qemu_file_update_transfer-interface.patch deleted file mode 100644 index db27e74..0000000 --- a/kvm-migration-add-qemu_file_update_transfer-interface.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 1748253be11cbf12961274d4586671ce3803b006 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 4 Sep 2019 11:23:30 +0100 -Subject: [PATCH 2/6] migration: add qemu_file_update_transfer interface - -RH-Author: Juan Quintela -Message-id: <20190904112332.16160-3-quintela@redhat.com> -Patchwork-id: 90281 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH v2 2/4] migration: add qemu_file_update_transfer interface -Bugzilla: 1734316 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu -RH-Acked-by: Danilo de Paula - -From: Ivan Ren - -Add qemu_file_update_transfer for just update bytes_xfer for speed -limitation. This will be used for further migration feature such as -multifd migration. - -Signed-off-by: Ivan Ren -Reviewed-by: Wei Yang -Reviewed-by: Juan Quintela -Message-Id: <1564464816-21804-2-git-send-email-ivanren@tencent.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5d7d2558631b4421826c60046c606584c58ab76c) -Signed-off-by: Danilo C. L. de Paula ---- - migration/qemu-file.c | 5 +++++ - migration/qemu-file.h | 1 + - 2 files changed, 6 insertions(+) - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index 0431585..18f4805 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -615,6 +615,11 @@ void qemu_file_reset_rate_limit(QEMUFile *f) - f->bytes_xfer = 0; - } - -+void qemu_file_update_transfer(QEMUFile *f, int64_t len) -+{ -+ f->bytes_xfer += len; -+} -+ - void qemu_put_be16(QEMUFile *f, unsigned int v) - { - qemu_put_byte(f, v >> 8); -diff --git a/migration/qemu-file.h b/migration/qemu-file.h -index 13baf89..5de9fa2 100644 ---- a/migration/qemu-file.h -+++ b/migration/qemu-file.h -@@ -147,6 +147,7 @@ int qemu_peek_byte(QEMUFile *f, int offset); - void qemu_file_skip(QEMUFile *f, int size); - void qemu_update_position(QEMUFile *f, size_t size); - void qemu_file_reset_rate_limit(QEMUFile *f); -+void qemu_file_update_transfer(QEMUFile *f, int64_t len); - void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); - int64_t qemu_file_get_rate_limit(QEMUFile *f); - void qemu_file_set_error(QEMUFile *f, int ret); --- -1.8.3.1 - diff --git a/kvm-migration-add-speed-limit-for-multifd-migration.patch b/kvm-migration-add-speed-limit-for-multifd-migration.patch deleted file mode 100644 index 90adb74..0000000 --- a/kvm-migration-add-speed-limit-for-multifd-migration.patch +++ /dev/null @@ -1,141 +0,0 @@ -From d2ade4bec79bdfe6f0867b0672c6731bc1664b42 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 4 Sep 2019 11:23:31 +0100 -Subject: [PATCH 3/6] migration: add speed limit for multifd migration - -RH-Author: Juan Quintela -Message-id: <20190904112332.16160-4-quintela@redhat.com> -Patchwork-id: 90279 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH v2 3/4] migration: add speed limit for multifd migration -Bugzilla: 1734316 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu -RH-Acked-by: Danilo de Paula - -From: Ivan Ren - -Limit the speed of multifd migration through common speed limitation -qemu file. - -Signed-off-by: Ivan Ren -Message-Id: <1564464816-21804-3-git-send-email-ivanren@tencent.com> -Reviewed-by: Wei Yang -Reviewed-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1b81c974ccfd536aceef840e220912b142a7dda0) -Signed-off-by: Juan Quintela -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 22 ++++++++++++---------- - 1 file changed, 12 insertions(+), 10 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 889148d..88ddd2b 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -922,7 +922,7 @@ struct { - * false. - */ - --static int multifd_send_pages(void) -+static int multifd_send_pages(RAMState *rs) - { - int i; - static int next_channel; -@@ -954,6 +954,7 @@ static int multifd_send_pages(void) - multifd_send_state->pages = p->pages; - p->pages = pages; - transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; -+ qemu_file_update_transfer(rs->f, transferred); - ram_counters.multifd_bytes += transferred; - ram_counters.transferred += transferred;; - qemu_mutex_unlock(&p->mutex); -@@ -962,7 +963,7 @@ static int multifd_send_pages(void) - return 1; - } - --static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) -+static int multifd_queue_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) - { - MultiFDPages_t *pages = multifd_send_state->pages; - -@@ -981,12 +982,12 @@ static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) - } - } - -- if (multifd_send_pages() < 0) { -+ if (multifd_send_pages(rs) < 0) { - return -1; - } - - if (pages->block != block) { -- return multifd_queue_page(block, offset); -+ return multifd_queue_page(rs, block, offset); - } - - return 1; -@@ -1054,7 +1055,7 @@ void multifd_save_cleanup(void) - multifd_send_state = NULL; - } - --static void multifd_send_sync_main(void) -+static void multifd_send_sync_main(RAMState *rs) - { - int i; - -@@ -1062,7 +1063,7 @@ static void multifd_send_sync_main(void) - return; - } - if (multifd_send_state->pages->used) { -- if (multifd_send_pages() < 0) { -+ if (multifd_send_pages(rs) < 0) { - error_report("%s: multifd_send_pages fail", __func__); - return; - } -@@ -1083,6 +1084,7 @@ static void multifd_send_sync_main(void) - p->packet_num = multifd_send_state->packet_num++; - p->flags |= MULTIFD_FLAG_SYNC; - p->pending_job++; -+ qemu_file_update_transfer(rs->f, p->packet_len); - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - } -@@ -2079,7 +2081,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) - static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, - ram_addr_t offset) - { -- if (multifd_queue_page(block, offset) < 0) { -+ if (multifd_queue_page(rs, block, offset) < 0) { - return -1; - } - ram_counters.normal++; -@@ -3482,7 +3484,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - ram_control_before_iterate(f, RAM_CONTROL_SETUP); - ram_control_after_iterate(f, RAM_CONTROL_SETUP); - -- multifd_send_sync_main(); -+ multifd_send_sync_main(*rsp); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -3570,7 +3572,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- multifd_send_sync_main(); -+ multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - ram_counters.transferred += 8; -@@ -3629,7 +3631,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - - rcu_read_unlock(); - -- multifd_send_sync_main(); -+ multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - --- -1.8.3.1 - diff --git a/kvm-migration-always-initialise-ram_counters-for-a-new-m.patch b/kvm-migration-always-initialise-ram_counters-for-a-new-m.patch deleted file mode 100644 index 1280554..0000000 --- a/kvm-migration-always-initialise-ram_counters-for-a-new-m.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 7ec124b3de4e7c7ef093955813a213d7a976d395 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 4 Sep 2019 11:23:29 +0100 -Subject: [PATCH 1/6] migration: always initialise ram_counters for a new - migration - -RH-Author: Juan Quintela -Message-id: <20190904112332.16160-2-quintela@redhat.com> -Patchwork-id: 90278 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH v2 1/4] migration: always initialise ram_counters for a new migration -Bugzilla: 1734316 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu -RH-Acked-by: Danilo de Paula - -From: Ivan Ren - -This patch fix a multifd migration bug in migration speed calculation, this -problem can be reproduced as follows: -1. start a vm and give a heavy memory write stress to prevent the vm be - successfully migrated to destination -2. begin a migration with multifd -3. migrate for a long time [actually, this can be measured by transferred bytes] -4. migrate cancel -5. begin a new migration with multifd, the migration will directly run into - migration_completion phase - -Reason as follows: - -Migration update bandwidth and s->threshold_size in function -migration_update_counters after BUFFER_DELAY time: - - current_bytes = migration_total_bytes(s); - transferred = current_bytes - s->iteration_initial_bytes; - time_spent = current_time - s->iteration_start_time; - bandwidth = (double)transferred / time_spent; - s->threshold_size = bandwidth * s->parameters.downtime_limit; - -In multifd migration, migration_total_bytes function return -qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes. -s->iteration_initial_bytes will be initialized to 0 at every new migration, -but ram_counters is a global variable, and history migration data will be -accumulated. So if the ram_counters.multifd_bytes is big enough, it may lead -pending_size >= s->threshold_size become false in migration_iteration_run -after the first migration_update_counters. - -Signed-off-by: Ivan Ren -Reviewed-by: Juan Quintela -Reviewed-by: Wei Yang -Suggested-by: Wei Yang -Message-Id: <1564741121-1840-1-git-send-email-ivanren@tencent.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 87f3bd8717cd88932de302e215f1da51bfb8051a) -Signed-off-by: Juan Quintela -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 25 +++++++++++++++++++------ - migration/savevm.c | 1 + - 2 files changed, 20 insertions(+), 6 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index beffbfd..5299597 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1910,6 +1910,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - } - - migrate_init(s); -+ /* -+ * set ram_counters memory to zero for a -+ * new migration -+ */ -+ memset(&ram_counters, 0, sizeof(ram_counters)); - - return true; - } -@@ -3027,6 +3032,17 @@ static void migration_calculate_complete(MigrationState *s) - } - } - -+static void update_iteration_initial_status(MigrationState *s) -+{ -+ /* -+ * Update these three fields at the same time to avoid mismatch info lead -+ * wrong speed calculation. -+ */ -+ s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ s->iteration_initial_bytes = migration_total_bytes(s); -+ s->iteration_initial_pages = ram_get_total_transferred_pages(); -+} -+ - static void migration_update_counters(MigrationState *s, - int64_t current_time) - { -@@ -3062,9 +3078,7 @@ static void migration_update_counters(MigrationState *s, - - qemu_file_reset_rate_limit(s->to_dst_file); - -- s->iteration_start_time = current_time; -- s->iteration_initial_bytes = current_bytes; -- s->iteration_initial_pages = ram_get_total_transferred_pages(); -+ update_iteration_initial_status(s); - - trace_migrate_transferred(transferred, time_spent, - bandwidth, s->threshold_size); -@@ -3188,7 +3202,7 @@ static void *migration_thread(void *opaque) - rcu_register_thread(); - - object_ref(OBJECT(s)); -- s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ update_iteration_initial_status(s); - - qemu_savevm_state_header(s->to_dst_file); - -@@ -3253,8 +3267,7 @@ static void *migration_thread(void *opaque) - * the local variables. This is important to avoid - * breaking transferred_bytes and bandwidth calculation - */ -- s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -- s->iteration_initial_bytes = 0; -+ update_iteration_initial_status(s); - } - - current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -diff --git a/migration/savevm.c b/migration/savevm.c -index 79ed44d..480c511 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1424,6 +1424,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - } - - migrate_init(ms); -+ memset(&ram_counters, 0, sizeof(ram_counters)); - ms->to_dst_file = f; - - qemu_mutex_unlock_iothread(); --- -1.8.3.1 - diff --git a/kvm-migration-update-ram_counters-for-multifd-sync-packe.patch b/kvm-migration-update-ram_counters-for-multifd-sync-packe.patch deleted file mode 100644 index b22d984..0000000 --- a/kvm-migration-update-ram_counters-for-multifd-sync-packe.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 0e42e63ddcaddd8837ba5ba1056380d4590754ee Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 4 Sep 2019 11:23:32 +0100 -Subject: [PATCH 4/6] migration: update ram_counters for multifd sync packet - -RH-Author: Juan Quintela -Message-id: <20190904112332.16160-5-quintela@redhat.com> -Patchwork-id: 90280 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH v2 4/4] migration: update ram_counters for multifd sync packet -Bugzilla: 1734316 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu -RH-Acked-by: Danilo de Paula - -From: Ivan Ren - -Multifd sync will send MULTIFD_FLAG_SYNC flag info to destination, add -these bytes to ram_counters record. - -Signed-off-by: Ivan Ren -Suggested-by: Wei Yang -Message-Id: <1564464816-21804-4-git-send-email-ivanren@tencent.com> -Reviewed-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 81507f6b7e87468f24ed5886559feda15fe2db0c) -Signed-off-by: Juan Quintela -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 88ddd2b..20b6eeb 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1085,6 +1085,8 @@ static void multifd_send_sync_main(RAMState *rs) - p->flags |= MULTIFD_FLAG_SYNC; - p->pending_job++; - qemu_file_update_transfer(rs->f, p->packet_len); -+ ram_counters.multifd_bytes += p->packet_len; -+ ram_counters.transferred += p->packet_len; - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - } --- -1.8.3.1 - diff --git a/kvm-multifd-Use-number-of-channels-as-listen-backlog.patch b/kvm-multifd-Use-number-of-channels-as-listen-backlog.patch deleted file mode 100644 index d48d556..0000000 --- a/kvm-multifd-Use-number-of-channels-as-listen-backlog.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 8c93ef106ecf2aa0604ffe7fee3d628b88c2b015 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 4 Sep 2019 10:26:06 +0100 -Subject: [PATCH 6/8] multifd: Use number of channels as listen backlog -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juan Quintela -Message-id: <20190904102606.15744-6-quintela@redhat.com> -Patchwork-id: 90273 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 5/5] multifd: Use number of channels as listen backlog -Bugzilla: 1726898 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu -RH-Acked-by: Danilo de Paula - -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Juan Quintela -(cherry picked from commit 0705e56496d2b155b5796c6b28d4110e5bcbd5d8) -Signed-off-by: Juan Quintela -Signed-off-by: Danilo C. L. de Paula ---- - migration/socket.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/migration/socket.c b/migration/socket.c -index e63f5e1..97c9efd 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -178,10 +178,15 @@ static void socket_start_incoming_migration(SocketAddress *saddr, - { - QIONetListener *listener = qio_net_listener_new(); - size_t i; -+ int num = 1; - - qio_net_listener_set_name(listener, "migration-socket-listener"); - -- if (qio_net_listener_open_sync(listener, saddr, 1, errp) < 0) { -+ if (migrate_use_multifd()) { -+ num = migrate_multifd_channels(); -+ } -+ -+ if (qio_net_listener_open_sync(listener, saddr, num, errp) < 0) { - object_unref(OBJECT(listener)); - return; - } --- -1.8.3.1 - diff --git a/kvm-nbd-Grab-aio-context-lock-in-more-places.patch b/kvm-nbd-Grab-aio-context-lock-in-more-places.patch deleted file mode 100644 index 46df547..0000000 --- a/kvm-nbd-Grab-aio-context-lock-in-more-places.patch +++ /dev/null @@ -1,200 +0,0 @@ -From 7cf87a669fa0dd580013b0ca5e4510f12aff2319 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 9 Oct 2019 14:10:07 +0100 -Subject: [PATCH 5/6] nbd: Grab aio context lock in more places - -RH-Author: Eric Blake -Message-id: <20191009141008.24439-2-eblake@redhat.com> -Patchwork-id: 91353 -O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 1/2] nbd: Grab aio context lock in more places -Bugzilla: 1741094 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella - -When iothreads are in use, the failure to grab the aio context results -in an assertion failure when trying to unlock things during blk_unref, -when trying to unlock a mutex that was not locked. In short, all -calls to nbd_export_put need to done while within the correct aio -context. But since nbd_export_put can recursively reach itself via -nbd_export_close, and recursively grabbing the context would deadlock, -we can't do the context grab directly in those functions, but must do -so in their callers. - -Hoist the use of the correct aio_context from nbd_export_new() to its -caller qmp_nbd_server_add(). Then tweak qmp_nbd_server_remove(), -nbd_eject_notifier(), and nbd_esport_close_all() to grab the right -context, so that all callers during qemu now own the context before -nbd_export_put() can call blk_unref(). - -Remaining uses in qemu-nbd don't matter (since that use case does not -support iothreads). - -Suggested-by: Kevin Wolf -Signed-off-by: Eric Blake -Message-Id: <20190917023917.32226-1-eblake@redhat.com> -Reviewed-by: Sergio Lopez -(cherry picked from commit 61bc846d8c58535af6884b637a4005dd6111ea95) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - blockdev-nbd.c | 14 ++++++++++++-- - include/block/nbd.h | 1 + - nbd/server.c | 22 ++++++++++++++++++---- - 3 files changed, 31 insertions(+), 6 deletions(-) - -diff --git a/blockdev-nbd.c b/blockdev-nbd.c -index 06041a2..bed9370 100644 ---- a/blockdev-nbd.c -+++ b/blockdev-nbd.c -@@ -152,6 +152,7 @@ void qmp_nbd_server_add(const char *device, bool has_name, const char *name, - BlockBackend *on_eject_blk; - NBDExport *exp; - int64_t len; -+ AioContext *aio_context; - - if (!nbd_server) { - error_setg(errp, "NBD server not running"); -@@ -174,11 +175,13 @@ void qmp_nbd_server_add(const char *device, bool has_name, const char *name, - return; - } - -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); - len = bdrv_getlength(bs); - if (len < 0) { - error_setg_errno(errp, -len, - "Failed to determine the NBD export's length"); -- return; -+ goto out; - } - - if (!has_writable) { -@@ -192,13 +195,16 @@ void qmp_nbd_server_add(const char *device, bool has_name, const char *name, - writable ? 0 : NBD_FLAG_READ_ONLY, - NULL, false, on_eject_blk, errp); - if (!exp) { -- return; -+ goto out; - } - - /* The list of named exports has a strong reference to this export now and - * our only way of accessing it is through nbd_export_find(), so we can drop - * the strong reference that is @exp. */ - nbd_export_put(exp); -+ -+ out: -+ aio_context_release(aio_context); - } - - void qmp_nbd_server_remove(const char *name, -@@ -206,6 +212,7 @@ void qmp_nbd_server_remove(const char *name, - Error **errp) - { - NBDExport *exp; -+ AioContext *aio_context; - - if (!nbd_server) { - error_setg(errp, "NBD server not running"); -@@ -222,7 +229,10 @@ void qmp_nbd_server_remove(const char *name, - mode = NBD_SERVER_REMOVE_MODE_SAFE; - } - -+ aio_context = nbd_export_aio_context(exp); -+ aio_context_acquire(aio_context); - nbd_export_remove(exp, mode, errp); -+ aio_context_release(aio_context); - } - - void qmp_nbd_server_stop(Error **errp) -diff --git a/include/block/nbd.h b/include/block/nbd.h -index bb9f5bc..82f9b9e 100644 ---- a/include/block/nbd.h -+++ b/include/block/nbd.h -@@ -335,6 +335,7 @@ void nbd_export_put(NBDExport *exp); - - BlockBackend *nbd_export_get_blockdev(NBDExport *exp); - -+AioContext *nbd_export_aio_context(NBDExport *exp); - NBDExport *nbd_export_find(const char *name); - void nbd_export_close_all(void); - -diff --git a/nbd/server.c b/nbd/server.c -index ea0353a..81f8217 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1460,7 +1460,12 @@ static void blk_aio_detach(void *opaque) - static void nbd_eject_notifier(Notifier *n, void *data) - { - NBDExport *exp = container_of(n, NBDExport, eject_notifier); -+ AioContext *aio_context; -+ -+ aio_context = exp->ctx; -+ aio_context_acquire(aio_context); - nbd_export_close(exp); -+ aio_context_release(aio_context); - } - - NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, -@@ -1479,12 +1484,11 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, - * NBD exports are used for non-shared storage migration. Make sure - * that BDRV_O_INACTIVE is cleared and the image is ready for write - * access since the export could be available before migration handover. -+ * ctx was acquired in the caller. - */ - assert(name); - ctx = bdrv_get_aio_context(bs); -- aio_context_acquire(ctx); - bdrv_invalidate_cache(bs, NULL); -- aio_context_release(ctx); - - /* Don't allow resize while the NBD server is running, otherwise we don't - * care what happens with the node. */ -@@ -1492,7 +1496,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, - if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { - perm |= BLK_PERM_WRITE; - } -- blk = blk_new(bdrv_get_aio_context(bs), perm, -+ blk = blk_new(ctx, perm, - BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | - BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); - ret = blk_insert_bs(blk, bs, errp); -@@ -1549,7 +1553,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, - } - - exp->close = close; -- exp->ctx = blk_get_aio_context(blk); -+ exp->ctx = ctx; - blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); - - if (on_eject_blk) { -@@ -1582,6 +1586,12 @@ NBDExport *nbd_export_find(const char *name) - return NULL; - } - -+AioContext * -+nbd_export_aio_context(NBDExport *exp) -+{ -+ return exp->ctx; -+} -+ - void nbd_export_close(NBDExport *exp) - { - NBDClient *client, *next; -@@ -1676,9 +1686,13 @@ BlockBackend *nbd_export_get_blockdev(NBDExport *exp) - void nbd_export_close_all(void) - { - NBDExport *exp, *next; -+ AioContext *aio_context; - - QTAILQ_FOREACH_SAFE(exp, &exports, next, next) { -+ aio_context = exp->ctx; -+ aio_context_acquire(aio_context); - nbd_export_close(exp); -+ aio_context_release(aio_context); - } - } - --- -1.8.3.1 - diff --git a/kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch b/kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch deleted file mode 100644 index 1671218..0000000 --- a/kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 394dd52ce4dbd69cd5eca9a9928c442650cc3fd2 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 27 Sep 2019 11:13:24 +0100 -Subject: [PATCH 1/2] nbd/server: attach client channel to the export's - AioContext - -RH-Author: Sergio Lopez Pascual -Message-id: <20190927111324.17949-2-slp@redhat.com> -Patchwork-id: 90905 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] nbd/server: attach client channel to the export's AioContext -Bugzilla: 1748253 -RH-Acked-by: Eric Blake -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Max Reitz - -On creation, the export's AioContext is set to the same one as the -BlockBackend, while the AioContext in the client QIOChannel is left -untouched. - -As a result, when using data-plane, nbd_client_receive_next_request() -schedules coroutines in the IOThread AioContext, while the client's -QIOChannel is serviced from the main_loop, potentially triggering the -assertion at qio_channel_restart_[read|write]. - -To fix this, as soon we have the export corresponding to the client, -we call qio_channel_attach_aio_context() to attach the QIOChannel -context to the export's AioContext. This matches with the logic at -blk_aio_attached(). - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1748253 -Signed-off-by: Sergio Lopez -Message-Id: <20190912110032.26395-1-slp@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Eric Blake -(cherry picked from commit b4961249af0403fa55aae57c4c8806b24f7a7b33) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - nbd/server.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/nbd/server.c b/nbd/server.c -index 10faedc..ea0353a 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1296,6 +1296,11 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) - return ret; - } - -+ /* Attach the channel to the same AioContext as the export */ -+ if (client->exp && client->exp->ctx) { -+ qio_channel_attach_aio_context(client->ioc, client->exp->ctx); -+ } -+ - assert(!client->optlen); - trace_nbd_negotiate_success(); - --- -1.8.3.1 - diff --git a/kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch b/kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch deleted file mode 100644 index 42c4cd9..0000000 --- a/kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 84728251439a3b73a57a8d72cc4d39307207cc01 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 20 Sep 2019 16:48:41 +0100 -Subject: [PATCH 4/4] pr-manager: Fix invalid g_free() crash bug -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20190920164841.10424-1-pbonzini@redhat.com> -Patchwork-id: 90824 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2] pr-manager: Fix invalid g_free() crash bug -Bugzilla: 1753992 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Kevin Wolf -RH-Acked-by: Markus Armbruster - -From: Markus Armbruster - -BZ: 1753992 -BRANCH: rhel-av-8.1.0/master-4.1.0 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23612762 -Upstream: 6b9d62c2a9e83bbad73fb61406f0ff69b46ff6f3 - -pr_manager_worker() passes its @opaque argument to g_free(). Wrong; -it points to pr_manager_worker()'s automatic @data. Broken when -commit 2f3a7ab39be converted @data from heap- to stack-allocated. Fix -by deleting the g_free(). - -Fixes: 2f3a7ab39bec4ba8022dc4d42ea641165b004e3e -Cc: qemu-stable@nongnu.org -Signed-off-by: Markus Armbruster -Reviewed-by: Philippe Mathieu-Daudé -Acked-by: Paolo Bonzini -Signed-off-by: Kevin Wolf -(cherry picked from commit 6b9d62c2a9e83bbad73fb61406f0ff69b46ff6f3) -Signed-off-by: Danilo C. L. de Paula ---- - scsi/pr-manager.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c -index ee43663..0c866e8 100644 ---- a/scsi/pr-manager.c -+++ b/scsi/pr-manager.c -@@ -39,7 +39,6 @@ static int pr_manager_worker(void *opaque) - int fd = data->fd; - int r; - -- g_free(data); - trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]); - - /* The reference was taken in pr_manager_execute. */ --- -1.8.3.1 - diff --git a/kvm-pseries-Fix-compat_pvr-on-reset.patch b/kvm-pseries-Fix-compat_pvr-on-reset.patch deleted file mode 100644 index f03db5e..0000000 --- a/kvm-pseries-Fix-compat_pvr-on-reset.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 88cfbf2cc3e93a276f9d036850265eb8c2f5310c Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 4 Sep 2019 10:31:38 +0100 -Subject: [PATCH 7/8] pseries: Fix compat_pvr on reset - -RH-Author: Laurent Vivier -Message-id: <20190904103139.29870-2-lvivier@redhat.com> -Patchwork-id: 90274 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] pseries: Fix compat_pvr on reset -Bugzilla: 1744107 -RH-Acked-by: John Snow -RH-Acked-by: David Gibson -RH-Acked-by: Thomas Huth - -If we a migrate P8 machine to a P9 machine, the migration fails on -destination with: - - error while loading state for instance 0x1 of device 'cpu' - load of migration failed: Operation not permitted - -This is caused because the compat_pvr field is only present for the first -CPU. -Originally, spapr_machine_reset() calls ppc_set_compat() to set the value -max_compat_pvr for the first cpu and this was propagated to all CPUs by -spapr_cpu_reset(). Now, as spapr_cpu_reset() is called before that, the -value is not propagated to all CPUs and the migration fails. - -To fix that, propagate the new value to all CPUs in spapr_machine_reset(). - -Fixes: 25c9780d38d4 ("spapr: Reset CAS & IRQ subsystem after devices") -Signed-off-by: Laurent Vivier -Message-Id: <20190826090812.19080-1-lvivier@redhat.com> -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -(cherry picked from commit ce03a193e1172ff7d4b3303ec7472dc29183db8c) - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1744107 -BRANCH: rhel-av-8.1.0/master-4.1.0 -UPSTREAM: Merged -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 8 +++++++- - hw/ppc/spapr_cpu_core.c | 2 ++ - 2 files changed, 9 insertions(+), 1 deletion(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 9b500d2..30bf7bb 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -1746,7 +1746,13 @@ static void spapr_machine_reset(MachineState *machine) - spapr_ovec_cleanup(spapr->ov5_cas); - spapr->ov5_cas = spapr_ovec_new(); - -- ppc_set_compat(first_ppc_cpu, spapr->max_compat_pvr, &error_fatal); -+ /* -+ * reset compat_pvr for all CPUs -+ * as qemu_devices_reset() is called before this, -+ * it can't be propagated by spapr_cpu_reset() -+ * from the first CPU to all the others -+ */ -+ ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal); - } - - if (!SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { -diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 29a3c7d..ae43c57 100644 ---- a/hw/ppc/spapr_cpu_core.c -+++ b/hw/ppc/spapr_cpu_core.c -@@ -41,6 +41,8 @@ static void spapr_cpu_reset(void *opaque) - - /* Set compatibility mode to match the boot CPU, which was either set - * by the machine reset code or by CAS. This should never fail. -+ * At startup the value is already set for all the CPUs -+ * but we need this when we hotplug a new CPU - */ - ppc_set_compat(cpu, POWERPC_CPU(first_cpu)->compat_pvr, &error_abort); - --- -1.8.3.1 - diff --git a/kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch b/kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch deleted file mode 100644 index 26141a3..0000000 --- a/kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch +++ /dev/null @@ -1,274 +0,0 @@ -From fd8ecebf0c0632e473bcb8bb08dc8311a5530dcf Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Fri, 27 Sep 2019 20:18:45 +0100 -Subject: [PATCH 2/6] qapi: implement block-dirty-bitmap-remove transaction - action - -RH-Author: John Snow -Message-id: <20190927201846.6823-3-jsnow@redhat.com> -Patchwork-id: 90911 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/3] qapi: implement block-dirty-bitmap-remove transaction action -Bugzilla: 1756413 -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Max Reitz -RH-Acked-by: Stefano Garzarella - -It is used to do transactional movement of the bitmap (which is -possible in conjunction with merge command). Transactional bitmap -movement is needed in scenarios with external snapshot, when we don't -want to leave copy of the bitmap in the base image. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: John Snow -Reviewed-by: Max Reitz -Message-id: 20190708220502.12977-3-jsnow@redhat.com -[Edited "since" version to 4.2 --js] -Signed-off-by: John Snow -(cherry picked from commit c4e4b0fa598ddc9cee6ba7a06899ce0a8dae6c61) -Signed-off-by: John Snow - -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 2 +- - block/dirty-bitmap.c | 15 ++++---- - blockdev.c | 79 ++++++++++++++++++++++++++++++++++++++---- - include/block/dirty-bitmap.h | 2 +- - migration/block-dirty-bitmap.c | 2 +- - qapi/transaction.json | 2 ++ - 6 files changed, 85 insertions(+), 17 deletions(-) - -diff --git a/block.c b/block.c -index cbd8da5..92a3e9f 100644 ---- a/block.c -+++ b/block.c -@@ -5334,7 +5334,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, - for (bm = bdrv_dirty_bitmap_next(bs, NULL); bm; - bm = bdrv_dirty_bitmap_next(bs, bm)) - { -- bdrv_dirty_bitmap_set_migration(bm, false); -+ bdrv_dirty_bitmap_skip_store(bm, false); - } - - ret = refresh_total_sectors(bs, bs->total_sectors); -diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c -index 95a9c2a..a308e1f 100644 ---- a/block/dirty-bitmap.c -+++ b/block/dirty-bitmap.c -@@ -48,10 +48,9 @@ struct BdrvDirtyBitmap { - bool inconsistent; /* bitmap is persistent, but inconsistent. - It cannot be used at all in any way, except - a QMP user can remove it. */ -- bool migration; /* Bitmap is selected for migration, it should -- not be stored on the next inactivation -- (persistent flag doesn't matter until next -- invalidation).*/ -+ bool skip_store; /* We are either migrating or deleting this -+ * bitmap; it should not be stored on the next -+ * inactivation. */ - QLIST_ENTRY(BdrvDirtyBitmap) list; - }; - -@@ -757,16 +756,16 @@ void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap) - } - - /* Called with BQL taken. */ --void bdrv_dirty_bitmap_set_migration(BdrvDirtyBitmap *bitmap, bool migration) -+void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip) - { - qemu_mutex_lock(bitmap->mutex); -- bitmap->migration = migration; -+ bitmap->skip_store = skip; - qemu_mutex_unlock(bitmap->mutex); - } - - bool bdrv_dirty_bitmap_get_persistence(BdrvDirtyBitmap *bitmap) - { -- return bitmap->persistent && !bitmap->migration; -+ return bitmap->persistent && !bitmap->skip_store; - } - - bool bdrv_dirty_bitmap_inconsistent(const BdrvDirtyBitmap *bitmap) -@@ -778,7 +777,7 @@ bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs) - { - BdrvDirtyBitmap *bm; - QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { -- if (bm->persistent && !bm->readonly && !bm->migration) { -+ if (bm->persistent && !bm->readonly && !bm->skip_store) { - return true; - } - } -diff --git a/blockdev.c b/blockdev.c -index 0124825..800b3dc 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2134,6 +2134,51 @@ static void block_dirty_bitmap_merge_prepare(BlkActionState *common, - errp); - } - -+static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( -+ const char *node, const char *name, bool release, -+ BlockDriverState **bitmap_bs, Error **errp); -+ -+static void block_dirty_bitmap_remove_prepare(BlkActionState *common, -+ Error **errp) -+{ -+ BlockDirtyBitmap *action; -+ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, -+ common, common); -+ -+ if (action_check_completion_mode(common, errp) < 0) { -+ return; -+ } -+ -+ action = common->action->u.block_dirty_bitmap_remove.data; -+ -+ state->bitmap = do_block_dirty_bitmap_remove(action->node, action->name, -+ false, &state->bs, errp); -+ if (state->bitmap) { -+ bdrv_dirty_bitmap_skip_store(state->bitmap, true); -+ bdrv_dirty_bitmap_set_busy(state->bitmap, true); -+ } -+} -+ -+static void block_dirty_bitmap_remove_abort(BlkActionState *common) -+{ -+ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, -+ common, common); -+ -+ if (state->bitmap) { -+ bdrv_dirty_bitmap_skip_store(state->bitmap, false); -+ bdrv_dirty_bitmap_set_busy(state->bitmap, false); -+ } -+} -+ -+static void block_dirty_bitmap_remove_commit(BlkActionState *common) -+{ -+ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState, -+ common, common); -+ -+ bdrv_dirty_bitmap_set_busy(state->bitmap, false); -+ bdrv_release_dirty_bitmap(state->bs, state->bitmap); -+} -+ - static void abort_prepare(BlkActionState *common, Error **errp) - { - error_setg(errp, "Transaction aborted using Abort action"); -@@ -2211,6 +2256,12 @@ static const BlkActionOps actions[] = { - .commit = block_dirty_bitmap_free_backup, - .abort = block_dirty_bitmap_restore, - }, -+ [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_REMOVE] = { -+ .instance_size = sizeof(BlockDirtyBitmapState), -+ .prepare = block_dirty_bitmap_remove_prepare, -+ .commit = block_dirty_bitmap_remove_commit, -+ .abort = block_dirty_bitmap_remove_abort, -+ }, - /* Where are transactions for MIRROR, COMMIT and STREAM? - * Although these blockjobs use transaction callbacks like the backup job, - * these jobs do not necessarily adhere to transaction semantics. -@@ -2869,20 +2920,21 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - bdrv_dirty_bitmap_set_persistence(bitmap, persistent); - } - --void qmp_block_dirty_bitmap_remove(const char *node, const char *name, -- Error **errp) -+static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( -+ const char *node, const char *name, bool release, -+ BlockDriverState **bitmap_bs, Error **errp) - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; - - bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); - if (!bitmap || !bs) { -- return; -+ return NULL; - } - - if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, - errp)) { -- return; -+ return NULL; - } - - if (bdrv_dirty_bitmap_get_persistence(bitmap)) { -@@ -2892,13 +2944,28 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name, - aio_context_acquire(aio_context); - bdrv_remove_persistent_dirty_bitmap(bs, name, &local_err); - aio_context_release(aio_context); -+ - if (local_err != NULL) { - error_propagate(errp, local_err); -- return; -+ return NULL; - } - } - -- bdrv_release_dirty_bitmap(bs, bitmap); -+ if (release) { -+ bdrv_release_dirty_bitmap(bs, bitmap); -+ } -+ -+ if (bitmap_bs) { -+ *bitmap_bs = bs; -+ } -+ -+ return release ? NULL : bitmap; -+} -+ -+void qmp_block_dirty_bitmap_remove(const char *node, const char *name, -+ Error **errp) -+{ -+ do_block_dirty_bitmap_remove(node, name, true, NULL, errp); - } - - /** -diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h -index 62682eb..a21d54a 100644 ---- a/include/block/dirty-bitmap.h -+++ b/include/block/dirty-bitmap.h -@@ -83,7 +83,7 @@ void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap); - void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy); - void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src, - HBitmap **backup, Error **errp); --void bdrv_dirty_bitmap_set_migration(BdrvDirtyBitmap *bitmap, bool migration); -+void bdrv_dirty_bitmap_skip_store(BdrvDirtyBitmap *bitmap, bool skip); - - /* Functions that require manual locking. */ - void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap); -diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c -index 4a896a0..d650ba4 100644 ---- a/migration/block-dirty-bitmap.c -+++ b/migration/block-dirty-bitmap.c -@@ -326,7 +326,7 @@ static int init_dirty_bitmap_migration(void) - - /* unset migration flags here, to not roll back it */ - QSIMPLEQ_FOREACH(dbms, &dirty_bitmap_mig_state.dbms_list, entry) { -- bdrv_dirty_bitmap_set_migration(dbms->bitmap, true); -+ bdrv_dirty_bitmap_skip_store(dbms->bitmap, true); - } - - if (QSIMPLEQ_EMPTY(&dirty_bitmap_mig_state.dbms_list)) { -diff --git a/qapi/transaction.json b/qapi/transaction.json -index 95edb78..0590dbc 100644 ---- a/qapi/transaction.json -+++ b/qapi/transaction.json -@@ -45,6 +45,7 @@ - # - # - @abort: since 1.6 - # - @block-dirty-bitmap-add: since 2.5 -+# - @block-dirty-bitmap-remove: since 4.2 - # - @block-dirty-bitmap-clear: since 2.5 - # - @block-dirty-bitmap-enable: since 4.0 - # - @block-dirty-bitmap-disable: since 4.0 -@@ -61,6 +62,7 @@ - 'data': { - 'abort': 'Abort', - 'block-dirty-bitmap-add': 'BlockDirtyBitmapAdd', -+ 'block-dirty-bitmap-remove': 'BlockDirtyBitmap', - 'block-dirty-bitmap-clear': 'BlockDirtyBitmap', - 'block-dirty-bitmap-enable': 'BlockDirtyBitmap', - 'block-dirty-bitmap-disable': 'BlockDirtyBitmap', --- -1.8.3.1 - diff --git a/kvm-qemu-iotests-Add-test-for-bz-1745922.patch b/kvm-qemu-iotests-Add-test-for-bz-1745922.patch deleted file mode 100644 index 00751f5..0000000 --- a/kvm-qemu-iotests-Add-test-for-bz-1745922.patch +++ /dev/null @@ -1,191 +0,0 @@ -From a888b935e29a08f0ace84906fee84b41a5f8b95d Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Tue, 24 Sep 2019 21:11:52 +0100 -Subject: [PATCH 4/4] qemu-iotests: Add test for bz #1745922 - -RH-Author: Maxim Levitsky -Message-id: <20190924211152.13461-5-mlevitsk@redhat.com> -Patchwork-id: 90877 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v2 4/4] qemu-iotests: Add test for bz #1745922 -Bugzilla: 1745922 -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz -RH-Acked-by: Danilo de Paula - -Signed-off-by: Maxim Levitsky -Tested-by: Vladimir Sementsov-Ogievskiy -Message-id: 20190915203655.21638-4-mlevitsk@redhat.com -Reviewed-by: Max Reitz -Signed-off-by: Max Reitz -(cherry picked from commit 1825cc0783ccf0ec5d9f0b225a99b340bdd4c68f) -Signed-off-by: Maxim Levitsky - - Conflicts: - tests/qemu-iotests/group - usual conflicts with missing tests - -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/263 | 91 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/263.out | 40 ++++++++++++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 132 insertions(+) - create mode 100755 tests/qemu-iotests/263 - create mode 100644 tests/qemu-iotests/263.out - -diff --git a/tests/qemu-iotests/263 b/tests/qemu-iotests/263 -new file mode 100755 -index 0000000..d2c030f ---- /dev/null -+++ b/tests/qemu-iotests/263 -@@ -0,0 +1,91 @@ -+#!/usr/bin/env bash -+# -+# Test encrypted write that crosses cluster boundary of two unallocated clusters -+# Based on 188 -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=mlevitsk@redhat.com -+ -+seq=`basename $0` -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt qcow2 -+_supported_proto generic -+_supported_os Linux -+ -+ -+size=1M -+ -+SECRET="secret,id=sec0,data=astrochicken" -+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT -+ -+ -+_run_test() -+{ -+ echo "== reading the whole image ==" -+ $QEMU_IO --object $SECRET -c "read -P 0 0 $size" --image-opts "$1" | _filter_qemu_io | _filter_testdir -+ -+ echo -+ echo "== write two 512 byte sectors on a cluster boundary ==" -+ $QEMU_IO --object $SECRET -c "write -P 0xAA 0xFE00 0x400" --image-opts "$1" | _filter_qemu_io | _filter_testdir -+ -+ echo -+ echo "== verify that the rest of the image is not changed ==" -+ $QEMU_IO --object $SECRET -c "read -P 0x00 0x00000 0xFE00" --image-opts "$1" | _filter_qemu_io | _filter_testdir -+ $QEMU_IO --object $SECRET -c "read -P 0xAA 0x0FE00 0x400" --image-opts "$1" | _filter_qemu_io | _filter_testdir -+ $QEMU_IO --object $SECRET -c "read -P 0x00 0x10200 0xEFE00" --image-opts "$1" | _filter_qemu_io | _filter_testdir -+ -+} -+ -+ -+echo -+echo "testing LUKS qcow2 encryption" -+echo -+ -+_make_test_img --object $SECRET -o "encrypt.format=luks,encrypt.key-secret=sec0,encrypt.iter-time=10,cluster_size=64K" $size -+_run_test "driver=$IMGFMT,encrypt.key-secret=sec0,file.filename=$TEST_IMG" -+_cleanup_test_img -+ -+echo -+echo "testing legacy AES qcow2 encryption" -+echo -+ -+ -+_make_test_img --object $SECRET -o "encrypt.format=aes,encrypt.key-secret=sec0,cluster_size=64K" $size -+_run_test "driver=$IMGFMT,encrypt.key-secret=sec0,file.filename=$TEST_IMG" -+_cleanup_test_img -+ -+ -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/263.out b/tests/qemu-iotests/263.out -new file mode 100644 -index 0000000..0c982c5 ---- /dev/null -+++ b/tests/qemu-iotests/263.out -@@ -0,0 +1,40 @@ -+QA output created by 263 -+ -+testing LUKS qcow2 encryption -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=luks encrypt.key-secret=sec0 encrypt.iter-time=10 -+== reading the whole image == -+read 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== write two 512 byte sectors on a cluster boundary == -+wrote 1024/1024 bytes at offset 65024 -+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== verify that the rest of the image is not changed == -+read 65024/65024 bytes at offset 0 -+63.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 1024/1024 bytes at offset 65024 -+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 982528/982528 bytes at offset 66048 -+959.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+testing legacy AES qcow2 encryption -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 encrypt.format=aes encrypt.key-secret=sec0 -+== reading the whole image == -+read 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== write two 512 byte sectors on a cluster boundary == -+wrote 1024/1024 bytes at offset 65024 -+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== verify that the rest of the image is not changed == -+read 65024/65024 bytes at offset 0 -+63.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 1024/1024 bytes at offset 65024 -+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 982528/982528 bytes at offset 66048 -+959.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+*** done -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 813db25..4a7e08f 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -271,3 +271,4 @@ - 254 rw backing quick - 255 rw quick - 256 rw quick -+263 rw quick -\ No newline at end of file --- -1.8.3.1 - diff --git a/kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch b/kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch deleted file mode 100644 index 8ad7329..0000000 --- a/kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch +++ /dev/null @@ -1,64 +0,0 @@ -From a9b22e8663f190e4a845815864e78ef61b68f2a4 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 20 Aug 2019 09:24:41 +0100 -Subject: [PATCH 2/5] redhat: s390x: Add proper compatibility options for the - -rhel7.6.0 machine - -RH-Author: Thomas Huth -Message-id: <20190820092441.28201-3-thuth@redhat.com> -Patchwork-id: 90078 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine -Bugzilla: 1693772 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: 1693772 -Upstream: n/a (downstream only) - -Since we skipped the -rhel8.0.0 machine on s390x, we have to add the -related compatibility properties now to the -rhel7.6.0 machine type -instead. - -Also the "multiple epoch" facility was disabled on the z14GA1 CPU -in 7.6, so we also have to adjust our machine type here accordingly. - -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 2f4cd14..ebbde05 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -905,12 +905,25 @@ DEFINE_CCW_MACHINE(rhel810, "rhel8.1.0", true); - - static void ccw_machine_rhel760_instance_options(MachineState *machine) - { -+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; -+ - ccw_machine_rhel810_instance_options(machine); -+ -+ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); -+ -+ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ -+ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); -+ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); -+ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); -+ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); -+ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); - } - - static void ccw_machine_rhel760_class_options(MachineClass *mc) - { - ccw_machine_rhel810_class_options(mc); -+ /* We never published the s390x version of RHEL8.0 AV, so add this here */ -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); - compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); - compat_props_add(mc->compat_props, ccw_compat_rhel_7_6, ccw_compat_rhel_7_6_len); - } --- -1.8.3.1 - diff --git a/kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch b/kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch deleted file mode 100644 index 4c9e563..0000000 --- a/kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch +++ /dev/null @@ -1,64 +0,0 @@ -From fb192e5feac9ed77672e4acbfaec3bdad4d7684a Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 20 Aug 2019 09:24:40 +0100 -Subject: [PATCH 1/5] redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to - s390-ccw-virtio-rhel8.1.0 - -RH-Author: Thomas Huth -Message-id: <20190820092441.28201-2-thuth@redhat.com> -Patchwork-id: 90077 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/2] redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 -Bugzilla: 1693772 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: 1693772 -Upstream: n/a - -We did not ship RHEL AV 8.0 for s390x to customers, so we do not need -to maintain the s390-ccw-virtio-rhel8.0.0 machine. Rename it to -s390-ccw-virtio-rhel8.1.0 instead. - -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 4d8c2ec..2f4cd14 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -894,23 +894,23 @@ GlobalProperty ccw_compat_rhel_7_5[] = { - }; - const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); - --static void ccw_machine_rhel800_instance_options(MachineState *machine) -+static void ccw_machine_rhel810_instance_options(MachineState *machine) - { - } - --static void ccw_machine_rhel800_class_options(MachineClass *mc) -+static void ccw_machine_rhel810_class_options(MachineClass *mc) - { - } --DEFINE_CCW_MACHINE(rhel800, "rhel8.0.0", true); -+DEFINE_CCW_MACHINE(rhel810, "rhel8.1.0", true); - - static void ccw_machine_rhel760_instance_options(MachineState *machine) - { -- ccw_machine_rhel800_instance_options(machine); -+ ccw_machine_rhel810_instance_options(machine); - } - - static void ccw_machine_rhel760_class_options(MachineClass *mc) - { -- ccw_machine_rhel800_class_options(mc); -+ ccw_machine_rhel810_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); - compat_props_add(mc->compat_props, ccw_compat_rhel_7_6, ccw_compat_rhel_7_6_len); - } --- -1.8.3.1 - diff --git a/kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch b/kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch deleted file mode 100644 index 29c686a..0000000 --- a/kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch +++ /dev/null @@ -1,73 +0,0 @@ -From bcba7281bbb2351fab69498c54fcda4e6154fa91 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 23 Aug 2019 09:13:02 +0100 -Subject: [PATCH 4/5] redhat: update pseries-rhel8.1.0 machine type - -RH-Author: Laurent Vivier -Message-id: <20190823091302.8970-1-lvivier@redhat.com> -Patchwork-id: 90137 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] redhat: update pseries-rhel8.1.0 machine type -Bugzilla: 1744170 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Gibson - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1744170 -BRANCH: rhel-av-8.1.0/master-4.1.0 -UPSTREAM: downstream only -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23160686 - -pseries-rhel8.1.0 has been created based on pseries-4.0, -we need to update it now we are based on pseries-4.1 - -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 669eae1..9b500d2 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4437,6 +4437,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) - } - - DEFINE_SPAPR_MACHINE(4_1, "4.1", true); -+#endif - - /* - * pseries-4.0 -@@ -4453,6 +4454,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, - *nv2atsd = 0; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void spapr_machine_4_0_class_options(MachineClass *mc) - { - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4796,7 +4798,7 @@ DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", true); - - /* - * pseries-rhel8.0.0 -- * like spapr_compat_3_1 -+ * like pseries-3.1 and pseries-4.0 - * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS - * that have been backported to pseries-rhel8.0.0 - */ -@@ -4809,6 +4811,12 @@ static void spapr_machine_rhel800_class_options(MachineClass *mc) - compat_props_add(mc->compat_props, hw_compat_rhel_8_0, - hw_compat_rhel_8_0_len); - -+ /* pseries-4.0 */ -+ smc->phb_placement = phb_placement_4_0; -+ smc->irq = &spapr_irq_xics; -+ smc->pre_4_1_migration = true; -+ -+ /* pseries-3.1 */ - mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); - smc->update_dt_enabled = false; - smc->dr_phb_enabled = false; --- -1.8.3.1 - diff --git a/kvm-socket-Add-backlog-parameter-to-socket_listen.patch b/kvm-socket-Add-backlog-parameter-to-socket_listen.patch deleted file mode 100644 index f3dfe2d..0000000 --- a/kvm-socket-Add-backlog-parameter-to-socket_listen.patch +++ /dev/null @@ -1,295 +0,0 @@ -From fce683618bc605eaedfdcea0db974734c111a2e9 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 4 Sep 2019 10:26:02 +0100 -Subject: [PATCH 2/8] socket: Add backlog parameter to socket_listen -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juan Quintela -Message-id: <20190904102606.15744-2-quintela@redhat.com> -Patchwork-id: 90270 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/5] socket: Add backlog parameter to socket_listen -Bugzilla: 1726898 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu -RH-Acked-by: Danilo de Paula - -Current parameter was always one. We continue with that value for now -in all callers. - -Signed-off-by: Juan Quintela -Reviewed-by: Daniel P. Berrangé ---- -Moved trace to socket_listen -(cherry picked from commit e5b6353cf25c99c3f08bf51e29933352f7140e8f) -Signed-off-by: Juan Quintela - -Signed-off-by: Danilo C. L. de Paula ---- - include/qemu/sockets.h | 2 +- - io/channel-socket.c | 2 +- - qga/channel-posix.c | 2 +- - tests/test-util-sockets.c | 12 ++++++------ - util/qemu-sockets.c | 33 ++++++++++++++++++++++----------- - util/trace-events | 3 +++ - 6 files changed, 34 insertions(+), 20 deletions(-) - -diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h -index 8140fea..57cd049 100644 ---- a/include/qemu/sockets.h -+++ b/include/qemu/sockets.h -@@ -41,7 +41,7 @@ int unix_connect(const char *path, Error **errp); - - SocketAddress *socket_parse(const char *str, Error **errp); - int socket_connect(SocketAddress *addr, Error **errp); --int socket_listen(SocketAddress *addr, Error **errp); -+int socket_listen(SocketAddress *addr, int num, Error **errp); - void socket_listen_cleanup(int fd, Error **errp); - int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp); - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index bec3d93..a533c8b 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -202,7 +202,7 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, - int fd; - - trace_qio_channel_socket_listen_sync(ioc, addr); -- fd = socket_listen(addr, errp); -+ fd = socket_listen(addr, 1, errp); - if (fd < 0) { - trace_qio_channel_socket_listen_fail(ioc); - return -1; -diff --git a/qga/channel-posix.c b/qga/channel-posix.c -index 5a925a9..8fc205a 100644 ---- a/qga/channel-posix.c -+++ b/qga/channel-posix.c -@@ -215,7 +215,7 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path, - return false; - } - -- fd = socket_listen(addr, &local_err); -+ fd = socket_listen(addr, 1, &local_err); - qapi_free_SocketAddress(addr); - if (local_err != NULL) { - g_critical("%s", error_get_pretty(local_err)); -diff --git a/tests/test-util-sockets.c b/tests/test-util-sockets.c -index f1ebffe..c8e1893 100644 ---- a/tests/test-util-sockets.c -+++ b/tests/test-util-sockets.c -@@ -93,7 +93,7 @@ static void test_socket_fd_pass_name_good(void) - g_assert_cmpint(fd, !=, mon_fd); - close(fd); - -- fd = socket_listen(&addr, &error_abort); -+ fd = socket_listen(&addr, 1, &error_abort); - g_assert_cmpint(fd, !=, -1); - g_assert_cmpint(fd, !=, mon_fd); - close(fd); -@@ -124,7 +124,7 @@ static void test_socket_fd_pass_name_bad(void) - g_assert_cmpint(fd, ==, -1); - error_free_or_abort(&err); - -- fd = socket_listen(&addr, &err); -+ fd = socket_listen(&addr, 1, &err); - g_assert_cmpint(fd, ==, -1); - error_free_or_abort(&err); - -@@ -151,7 +151,7 @@ static void test_socket_fd_pass_name_nomon(void) - g_assert_cmpint(fd, ==, -1); - error_free_or_abort(&err); - -- fd = socket_listen(&addr, &err); -+ fd = socket_listen(&addr, 1, &err); - g_assert_cmpint(fd, ==, -1); - error_free_or_abort(&err); - -@@ -174,7 +174,7 @@ static void test_socket_fd_pass_num_good(void) - fd = socket_connect(&addr, &error_abort); - g_assert_cmpint(fd, ==, sfd); - -- fd = socket_listen(&addr, &error_abort); -+ fd = socket_listen(&addr, 1, &error_abort); - g_assert_cmpint(fd, ==, sfd); - - g_free(addr.u.fd.str); -@@ -197,7 +197,7 @@ static void test_socket_fd_pass_num_bad(void) - g_assert_cmpint(fd, ==, -1); - error_free_or_abort(&err); - -- fd = socket_listen(&addr, &err); -+ fd = socket_listen(&addr, 1, &err); - g_assert_cmpint(fd, ==, -1); - error_free_or_abort(&err); - -@@ -220,7 +220,7 @@ static void test_socket_fd_pass_num_nocli(void) - g_assert_cmpint(fd, ==, -1); - error_free_or_abort(&err); - -- fd = socket_listen(&addr, &err); -+ fd = socket_listen(&addr, 1, &err); - g_assert_cmpint(fd, ==, -1); - error_free_or_abort(&err); - -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index a5092db..a39ada4 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -31,6 +31,7 @@ - #include "qapi/qobject-input-visitor.h" - #include "qapi/qobject-output-visitor.h" - #include "qemu/cutils.h" -+#include "trace.h" - - #ifndef AI_ADDRCONFIG - # define AI_ADDRCONFIG 0 -@@ -207,6 +208,7 @@ static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) - - static int inet_listen_saddr(InetSocketAddress *saddr, - int port_offset, -+ int num, - Error **errp) - { - struct addrinfo ai,*res,*e; -@@ -303,7 +305,7 @@ static int inet_listen_saddr(InetSocketAddress *saddr, - goto listen_failed; - } - } else { -- if (!listen(slisten, 1)) { -+ if (!listen(slisten, num)) { - goto listen_ok; - } - if (errno != EADDRINUSE) { -@@ -746,6 +748,7 @@ static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp) - } - - static int vsock_listen_saddr(VsockSocketAddress *vaddr, -+ int num, - Error **errp) - { - struct sockaddr_vm svm; -@@ -767,7 +770,7 @@ static int vsock_listen_saddr(VsockSocketAddress *vaddr, - return -1; - } - -- if (listen(slisten, 1) != 0) { -+ if (listen(slisten, num) != 0) { - error_setg_errno(errp, errno, "Failed to listen on socket"); - closesocket(slisten); - return -1; -@@ -808,6 +811,7 @@ static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp) - } - - static int vsock_listen_saddr(VsockSocketAddress *vaddr, -+ int num, - Error **errp) - { - vsock_unsupported(errp); -@@ -825,6 +829,7 @@ static int vsock_parse(VsockSocketAddress *addr, const char *str, - #ifndef _WIN32 - - static int unix_listen_saddr(UnixSocketAddress *saddr, -+ int num, - Error **errp) - { - struct sockaddr_un un; -@@ -886,7 +891,7 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, - error_setg_errno(errp, errno, "Failed to bind socket to %s", path); - goto err; - } -- if (listen(sock, 1) < 0) { -+ if (listen(sock, num) < 0) { - error_setg_errno(errp, errno, "Failed to listen on socket"); - goto err; - } -@@ -953,6 +958,7 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp) - #else - - static int unix_listen_saddr(UnixSocketAddress *saddr, -+ int num, - Error **errp) - { - error_setg(errp, "unix sockets are not available on windows"); -@@ -976,7 +982,7 @@ int unix_listen(const char *str, Error **errp) - - saddr = g_new0(UnixSocketAddress, 1); - saddr->path = g_strdup(str); -- sock = unix_listen_saddr(saddr, errp); -+ sock = unix_listen_saddr(saddr, 1, errp); - qapi_free_UnixSocketAddress(saddr); - return sock; - } -@@ -1033,9 +1039,13 @@ fail: - return NULL; - } - --static int socket_get_fd(const char *fdstr, Error **errp) -+static int socket_get_fd(const char *fdstr, int num, Error **errp) - { - int fd; -+ if (num != 1) { -+ error_setg_errno(errp, EINVAL, "socket_get_fd: too many connections"); -+ return -1; -+ } - if (cur_mon) { - fd = monitor_get_fd(cur_mon, fdstr, errp); - if (fd < 0) { -@@ -1071,7 +1081,7 @@ int socket_connect(SocketAddress *addr, Error **errp) - break; - - case SOCKET_ADDRESS_TYPE_FD: -- fd = socket_get_fd(addr->u.fd.str, errp); -+ fd = socket_get_fd(addr->u.fd.str, 1, errp); - break; - - case SOCKET_ADDRESS_TYPE_VSOCK: -@@ -1084,25 +1094,26 @@ int socket_connect(SocketAddress *addr, Error **errp) - return fd; - } - --int socket_listen(SocketAddress *addr, Error **errp) -+int socket_listen(SocketAddress *addr, int num, Error **errp) - { - int fd; - -+ trace_socket_listen(num); - switch (addr->type) { - case SOCKET_ADDRESS_TYPE_INET: -- fd = inet_listen_saddr(&addr->u.inet, 0, errp); -+ fd = inet_listen_saddr(&addr->u.inet, 0, num, errp); - break; - - case SOCKET_ADDRESS_TYPE_UNIX: -- fd = unix_listen_saddr(&addr->u.q_unix, errp); -+ fd = unix_listen_saddr(&addr->u.q_unix, num, errp); - break; - - case SOCKET_ADDRESS_TYPE_FD: -- fd = socket_get_fd(addr->u.fd.str, errp); -+ fd = socket_get_fd(addr->u.fd.str, num, errp); - break; - - case SOCKET_ADDRESS_TYPE_VSOCK: -- fd = vsock_listen_saddr(&addr->u.vsock, errp); -+ fd = vsock_listen_saddr(&addr->u.vsock, num, errp); - break; - - default: -diff --git a/util/trace-events b/util/trace-events -index 9dbd237..83b6639 100644 ---- a/util/trace-events -+++ b/util/trace-events -@@ -64,6 +64,9 @@ lockcnt_futex_wait(const void *lockcnt, int val) "lockcnt %p waiting on %d" - lockcnt_futex_wait_resume(const void *lockcnt, int new) "lockcnt %p after wait: %d" - lockcnt_futex_wake(const void *lockcnt) "lockcnt %p waking up one waiter" - -+# qemu-sockets.c -+socket_listen(int num) "backlog: %d" -+ - # qemu-thread-common.h - qemu_mutex_lock(void *mutex, const char *file, const int line) "waiting on mutex %p (%s:%d)" - qemu_mutex_locked(void *mutex, const char *file, const int line) "taken mutex %p (%s:%d)" --- -1.8.3.1 - diff --git a/kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch b/kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch deleted file mode 100644 index 0512189..0000000 --- a/kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch +++ /dev/null @@ -1,144 +0,0 @@ -From d2bb195f057fd21444644d3996551fe8775043e5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 4 Sep 2019 10:26:04 +0100 -Subject: [PATCH 4/8] socket: Add num connections to qio_channel_socket_async() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juan Quintela -Message-id: <20190904102606.15744-4-quintela@redhat.com> -Patchwork-id: 90271 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 3/5] socket: Add num connections to qio_channel_socket_async() -Bugzilla: 1726898 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu -RH-Acked-by: Danilo de Paula - -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Juan Quintela -(cherry picked from commit 7959e29ea0d6100038367beff9a0da0c83b322a2) -Signed-off-by: Juan Quintela -Signed-off-by: Danilo C. L. de Paula ---- - include/io/channel-socket.h | 2 ++ - io/channel-socket.c | 30 +++++++++++++++++++++++------- - io/trace-events | 2 +- - tests/test-io-channel-socket.c | 2 +- - 4 files changed, 27 insertions(+), 9 deletions(-) - -diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h -index ed88e5b..777ff59 100644 ---- a/include/io/channel-socket.h -+++ b/include/io/channel-socket.h -@@ -140,6 +140,7 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, - * qio_channel_socket_listen_async: - * @ioc: the socket channel object - * @addr: the address to listen to -+ * @num: the expected ammount of connections - * @callback: the function to invoke on completion - * @opaque: user data to pass to @callback - * @destroy: the function to free @opaque -@@ -155,6 +156,7 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, - */ - void qio_channel_socket_listen_async(QIOChannelSocket *ioc, - SocketAddress *addr, -+ int num, - QIOTaskFunc callback, - gpointer opaque, - GDestroyNotify destroy, -diff --git a/io/channel-socket.c b/io/channel-socket.c -index 6258c25..b74f5b9 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -220,14 +220,27 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, - } - - -+struct QIOChannelListenWorkerData { -+ SocketAddress *addr; -+ int num; /* amount of expected connections */ -+}; -+ -+static void qio_channel_listen_worker_free(gpointer opaque) -+{ -+ struct QIOChannelListenWorkerData *data = opaque; -+ -+ qapi_free_SocketAddress(data->addr); -+ g_free(data); -+} -+ - static void qio_channel_socket_listen_worker(QIOTask *task, - gpointer opaque) - { - QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task)); -- SocketAddress *addr = opaque; -+ struct QIOChannelListenWorkerData *data = opaque; - Error *err = NULL; - -- qio_channel_socket_listen_sync(ioc, addr, 1, &err); -+ qio_channel_socket_listen_sync(ioc, data->addr, data->num, &err); - - qio_task_set_error(task, err); - } -@@ -235,6 +248,7 @@ static void qio_channel_socket_listen_worker(QIOTask *task, - - void qio_channel_socket_listen_async(QIOChannelSocket *ioc, - SocketAddress *addr, -+ int num, - QIOTaskFunc callback, - gpointer opaque, - GDestroyNotify destroy, -@@ -242,16 +256,18 @@ void qio_channel_socket_listen_async(QIOChannelSocket *ioc, - { - QIOTask *task = qio_task_new( - OBJECT(ioc), callback, opaque, destroy); -- SocketAddress *addrCopy; -+ struct QIOChannelListenWorkerData *data; - -- addrCopy = QAPI_CLONE(SocketAddress, addr); -+ data = g_new0(struct QIOChannelListenWorkerData, 1); -+ data->addr = QAPI_CLONE(SocketAddress, addr); -+ data->num = num; - - /* socket_listen() blocks in DNS lookups, so we must use a thread */ -- trace_qio_channel_socket_listen_async(ioc, addr); -+ trace_qio_channel_socket_listen_async(ioc, addr, num); - qio_task_run_in_thread(task, - qio_channel_socket_listen_worker, -- addrCopy, -- (GDestroyNotify)qapi_free_SocketAddress, -+ data, -+ qio_channel_listen_worker_free, - context); - } - -diff --git a/io/trace-events b/io/trace-events -index 2e6aa1d..d7bc70b 100644 ---- a/io/trace-events -+++ b/io/trace-events -@@ -18,7 +18,7 @@ qio_channel_socket_connect_async(void *ioc, void *addr) "Socket connect async io - qio_channel_socket_connect_fail(void *ioc) "Socket connect fail ioc=%p" - qio_channel_socket_connect_complete(void *ioc, int fd) "Socket connect complete ioc=%p fd=%d" - qio_channel_socket_listen_sync(void *ioc, void *addr, int num) "Socket listen sync ioc=%p addr=%p num=%d" --qio_channel_socket_listen_async(void *ioc, void *addr) "Socket listen async ioc=%p addr=%p" -+qio_channel_socket_listen_async(void *ioc, void *addr, int num) "Socket listen async ioc=%p addr=%p num=%d" - qio_channel_socket_listen_fail(void *ioc) "Socket listen fail ioc=%p" - qio_channel_socket_listen_complete(void *ioc, int fd) "Socket listen complete ioc=%p fd=%d" - qio_channel_socket_dgram_sync(void *ioc, void *localAddr, void *remoteAddr) "Socket dgram sync ioc=%p localAddr=%p remoteAddr=%p" -diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c -index 6eebcee..50235c1 100644 ---- a/tests/test-io-channel-socket.c -+++ b/tests/test-io-channel-socket.c -@@ -113,7 +113,7 @@ static void test_io_channel_setup_async(SocketAddress *listen_addr, - - lioc = qio_channel_socket_new(); - qio_channel_socket_listen_async( -- lioc, listen_addr, -+ lioc, listen_addr, 1, - test_io_channel_complete, &data, NULL, NULL); - - g_main_loop_run(data.loop); --- -1.8.3.1 - diff --git a/kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch b/kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch deleted file mode 100644 index 1cac5e6..0000000 --- a/kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch +++ /dev/null @@ -1,173 +0,0 @@ -From 307f4596dc70dcabac9da3ec680d377e1df21397 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 4 Sep 2019 10:26:03 +0100 -Subject: [PATCH 3/8] socket: Add num connections to qio_channel_socket_sync() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juan Quintela -Message-id: <20190904102606.15744-3-quintela@redhat.com> -Patchwork-id: 90272 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 2/5] socket: Add num connections to qio_channel_socket_sync() -Bugzilla: 1726898 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu -RH-Acked-by: Danilo de Paula - -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Juan Quintela -(cherry picked from commit 4e2d8bf6f143138ad121545a7cf4525e36040039) -Signed-off-by: Juan Quintela -Signed-off-by: Danilo C. L. de Paula ---- - include/io/channel-socket.h | 2 ++ - io/channel-socket.c | 7 ++++--- - io/net-listener.c | 2 +- - io/trace-events | 2 +- - scsi/qemu-pr-helper.c | 3 ++- - tests/test-char.c | 4 ++-- - tests/test-io-channel-socket.c | 2 +- - tests/tpm-emu.c | 2 +- - 8 files changed, 14 insertions(+), 10 deletions(-) - -diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h -index d7134d2..ed88e5b 100644 ---- a/include/io/channel-socket.h -+++ b/include/io/channel-socket.h -@@ -123,6 +123,7 @@ void qio_channel_socket_connect_async(QIOChannelSocket *ioc, - * qio_channel_socket_listen_sync: - * @ioc: the socket channel object - * @addr: the address to listen to -+ * @num: the expected ammount of connections - * @errp: pointer to a NULL-initialized error object - * - * Attempt to listen to the address @addr. This method -@@ -132,6 +133,7 @@ void qio_channel_socket_connect_async(QIOChannelSocket *ioc, - */ - int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, - SocketAddress *addr, -+ int num, - Error **errp); - - /** -diff --git a/io/channel-socket.c b/io/channel-socket.c -index a533c8b..6258c25 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -197,12 +197,13 @@ void qio_channel_socket_connect_async(QIOChannelSocket *ioc, - - int qio_channel_socket_listen_sync(QIOChannelSocket *ioc, - SocketAddress *addr, -+ int num, - Error **errp) - { - int fd; - -- trace_qio_channel_socket_listen_sync(ioc, addr); -- fd = socket_listen(addr, 1, errp); -+ trace_qio_channel_socket_listen_sync(ioc, addr, num); -+ fd = socket_listen(addr, num, errp); - if (fd < 0) { - trace_qio_channel_socket_listen_fail(ioc); - return -1; -@@ -226,7 +227,7 @@ static void qio_channel_socket_listen_worker(QIOTask *task, - SocketAddress *addr = opaque; - Error *err = NULL; - -- qio_channel_socket_listen_sync(ioc, addr, &err); -+ qio_channel_socket_listen_sync(ioc, addr, 1, &err); - - qio_task_set_error(task, err); - } -diff --git a/io/net-listener.c b/io/net-listener.c -index d8cfe52..dc81150 100644 ---- a/io/net-listener.c -+++ b/io/net-listener.c -@@ -82,7 +82,7 @@ int qio_net_listener_open_sync(QIONetListener *listener, - for (i = 0; i < nresaddrs; i++) { - QIOChannelSocket *sioc = qio_channel_socket_new(); - -- if (qio_channel_socket_listen_sync(sioc, resaddrs[i], -+ if (qio_channel_socket_listen_sync(sioc, resaddrs[i], 1, - err ? NULL : &err) == 0) { - success = true; - -diff --git a/io/trace-events b/io/trace-events -index 3783905..2e6aa1d 100644 ---- a/io/trace-events -+++ b/io/trace-events -@@ -17,7 +17,7 @@ qio_channel_socket_connect_sync(void *ioc, void *addr) "Socket connect sync ioc= - qio_channel_socket_connect_async(void *ioc, void *addr) "Socket connect async ioc=%p addr=%p" - qio_channel_socket_connect_fail(void *ioc) "Socket connect fail ioc=%p" - qio_channel_socket_connect_complete(void *ioc, int fd) "Socket connect complete ioc=%p fd=%d" --qio_channel_socket_listen_sync(void *ioc, void *addr) "Socket listen sync ioc=%p addr=%p" -+qio_channel_socket_listen_sync(void *ioc, void *addr, int num) "Socket listen sync ioc=%p addr=%p num=%d" - qio_channel_socket_listen_async(void *ioc, void *addr) "Socket listen async ioc=%p addr=%p" - qio_channel_socket_listen_fail(void *ioc) "Socket listen fail ioc=%p" - qio_channel_socket_listen_complete(void *ioc, int fd) "Socket listen complete ioc=%p fd=%d" -diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c -index a256ce4..a8a74d1 100644 ---- a/scsi/qemu-pr-helper.c -+++ b/scsi/qemu-pr-helper.c -@@ -1005,7 +1005,8 @@ int main(int argc, char **argv) - .u.q_unix.path = socket_path, - }; - server_ioc = qio_channel_socket_new(); -- if (qio_channel_socket_listen_sync(server_ioc, &saddr, &local_err) < 0) { -+ if (qio_channel_socket_listen_sync(server_ioc, &saddr, -+ 1, &local_err) < 0) { - object_unref(OBJECT(server_ioc)); - error_report_err(local_err); - return 1; -diff --git a/tests/test-char.c b/tests/test-char.c -index f9440cd..af131fc 100644 ---- a/tests/test-char.c -+++ b/tests/test-char.c -@@ -666,7 +666,7 @@ char_socket_addr_to_opt_str(SocketAddress *addr, bool fd_pass, - char *optstr; - g_assert(!reconnect); - if (is_listen) { -- qio_channel_socket_listen_sync(ioc, addr, &error_abort); -+ qio_channel_socket_listen_sync(ioc, addr, 1, &error_abort); - } else { - qio_channel_socket_connect_sync(ioc, addr, &error_abort); - } -@@ -891,7 +891,7 @@ static void char_socket_client_test(gconstpointer opaque) - */ - ioc = qio_channel_socket_new(); - g_assert_nonnull(ioc); -- qio_channel_socket_listen_sync(ioc, config->addr, &error_abort); -+ qio_channel_socket_listen_sync(ioc, config->addr, 1, &error_abort); - addr = qio_channel_socket_get_local_address(ioc, &error_abort); - g_assert_nonnull(addr); - -diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c -index d2053c4..6eebcee 100644 ---- a/tests/test-io-channel-socket.c -+++ b/tests/test-io-channel-socket.c -@@ -57,7 +57,7 @@ static void test_io_channel_setup_sync(SocketAddress *listen_addr, - QIOChannelSocket *lioc; - - lioc = qio_channel_socket_new(); -- qio_channel_socket_listen_sync(lioc, listen_addr, &error_abort); -+ qio_channel_socket_listen_sync(lioc, listen_addr, 1, &error_abort); - - if (listen_addr->type == SOCKET_ADDRESS_TYPE_INET) { - SocketAddress *laddr = qio_channel_socket_get_local_address( -diff --git a/tests/tpm-emu.c b/tests/tpm-emu.c -index 125e697..c43ac4a 100644 ---- a/tests/tpm-emu.c -+++ b/tests/tpm-emu.c -@@ -76,7 +76,7 @@ void *tpm_emu_ctrl_thread(void *data) - QIOChannelSocket *lioc = qio_channel_socket_new(); - QIOChannel *ioc; - -- qio_channel_socket_listen_sync(lioc, s->addr, &error_abort); -+ qio_channel_socket_listen_sync(lioc, s->addr, 1, &error_abort); - - g_mutex_lock(&s->data_mutex); - s->data_cond_signal = true; --- -1.8.3.1 - diff --git a/kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch b/kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch deleted file mode 100644 index edadfe8..0000000 --- a/kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch +++ /dev/null @@ -1,151 +0,0 @@ -From c7029ffd110bdd4bab6847cd485898dbc5acf5f3 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 4 Sep 2019 10:26:05 +0100 -Subject: [PATCH 5/8] socket: Add num connections to - qio_net_listener_open_sync() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Juan Quintela -Message-id: <20190904102606.15744-5-quintela@redhat.com> -Patchwork-id: 90269 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 4/5] socket: Add num connections to qio_net_listener_open_sync() -Bugzilla: 1726898 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu -RH-Acked-by: Danilo de Paula - -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Juan Quintela -(cherry picked from commit fc8135c63021e8e442a7a894e5434f210614a73c) -Signed-off-by: Juan Quintela -Signed-off-by: Danilo C. L. de Paula ---- - blockdev-nbd.c | 2 +- - chardev/char-socket.c | 2 +- - include/io/net-listener.h | 2 ++ - io/net-listener.c | 3 ++- - migration/socket.c | 2 +- - qemu-nbd.c | 2 +- - ui/vnc.c | 4 ++-- - 7 files changed, 10 insertions(+), 7 deletions(-) - -diff --git a/blockdev-nbd.c b/blockdev-nbd.c -index 66eebab..06041a2 100644 ---- a/blockdev-nbd.c -+++ b/blockdev-nbd.c -@@ -102,7 +102,7 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, - qio_net_listener_set_name(nbd_server->listener, - "nbd-listener"); - -- if (qio_net_listener_open_sync(nbd_server->listener, addr, errp) < 0) { -+ if (qio_net_listener_open_sync(nbd_server->listener, addr, 1, errp) < 0) { - goto error; - } - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index 7ca5d97..8c7c9da 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -1160,7 +1160,7 @@ static int qmp_chardev_open_socket_server(Chardev *chr, - qio_net_listener_set_name(s->listener, name); - g_free(name); - -- if (qio_net_listener_open_sync(s->listener, s->addr, errp) < 0) { -+ if (qio_net_listener_open_sync(s->listener, s->addr, 1, errp) < 0) { - object_unref(OBJECT(s->listener)); - s->listener = NULL; - return -1; -diff --git a/include/io/net-listener.h b/include/io/net-listener.h -index 8081ac5..fb10170 100644 ---- a/include/io/net-listener.h -+++ b/include/io/net-listener.h -@@ -95,6 +95,7 @@ void qio_net_listener_set_name(QIONetListener *listener, - * qio_net_listener_open_sync: - * @listener: the network listener object - * @addr: the address to listen on -+ * @num: the amount of expected connections - * @errp: pointer to a NULL initialized error object - * - * Synchronously open a listening connection on all -@@ -104,6 +105,7 @@ void qio_net_listener_set_name(QIONetListener *listener, - */ - int qio_net_listener_open_sync(QIONetListener *listener, - SocketAddress *addr, -+ int num, - Error **errp); - - /** -diff --git a/io/net-listener.c b/io/net-listener.c -index dc81150..5d8a226 100644 ---- a/io/net-listener.c -+++ b/io/net-listener.c -@@ -62,6 +62,7 @@ static gboolean qio_net_listener_channel_func(QIOChannel *ioc, - - int qio_net_listener_open_sync(QIONetListener *listener, - SocketAddress *addr, -+ int num, - Error **errp) - { - QIODNSResolver *resolver = qio_dns_resolver_get_instance(); -@@ -82,7 +83,7 @@ int qio_net_listener_open_sync(QIONetListener *listener, - for (i = 0; i < nresaddrs; i++) { - QIOChannelSocket *sioc = qio_channel_socket_new(); - -- if (qio_channel_socket_listen_sync(sioc, resaddrs[i], 1, -+ if (qio_channel_socket_listen_sync(sioc, resaddrs[i], num, - err ? NULL : &err) == 0) { - success = true; - -diff --git a/migration/socket.c b/migration/socket.c -index 98efdc0..e63f5e1 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -181,7 +181,7 @@ static void socket_start_incoming_migration(SocketAddress *saddr, - - qio_net_listener_set_name(listener, "migration-socket-listener"); - -- if (qio_net_listener_open_sync(listener, saddr, errp) < 0) { -+ if (qio_net_listener_open_sync(listener, saddr, 1, errp) < 0) { - object_unref(OBJECT(listener)); - return; - } -diff --git a/qemu-nbd.c b/qemu-nbd.c -index a8cb39e..e6a52bf 100644 ---- a/qemu-nbd.c -+++ b/qemu-nbd.c -@@ -1054,7 +1054,7 @@ int main(int argc, char **argv) - server = qio_net_listener_new(); - if (socket_activation == 0) { - saddr = nbd_build_socket_address(sockpath, bindto, port); -- if (qio_net_listener_open_sync(server, saddr, &local_err) < 0) { -+ if (qio_net_listener_open_sync(server, saddr, 1, &local_err) < 0) { - object_unref(OBJECT(server)); - error_report_err(local_err); - exit(EXIT_FAILURE); -diff --git a/ui/vnc.c b/ui/vnc.c -index 933dc36..b042593 100644 ---- a/ui/vnc.c -+++ b/ui/vnc.c -@@ -3760,7 +3760,7 @@ static int vnc_display_listen(VncDisplay *vd, - qio_net_listener_set_name(vd->listener, "vnc-listen"); - for (i = 0; i < nsaddr; i++) { - if (qio_net_listener_open_sync(vd->listener, -- saddr[i], -+ saddr[i], 1, - errp) < 0) { - return -1; - } -@@ -3775,7 +3775,7 @@ static int vnc_display_listen(VncDisplay *vd, - qio_net_listener_set_name(vd->wslistener, "vnc-ws-listen"); - for (i = 0; i < nwsaddr; i++) { - if (qio_net_listener_open_sync(vd->wslistener, -- wsaddr[i], -+ wsaddr[i], 1, - errp) < 0) { - return -1; - } --- -1.8.3.1 - diff --git a/kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch b/kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch deleted file mode 100644 index f2b6090..0000000 --- a/kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 50cee68ce9dc31033969905cf0358d0f641d056a Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 4 Sep 2019 10:31:39 +0100 -Subject: [PATCH 8/8] spapr: Set compat mode in spapr_core_plug() - -RH-Author: Laurent Vivier -Message-id: <20190904103139.29870-3-lvivier@redhat.com> -Patchwork-id: 90276 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 2/2] spapr: Set compat mode in spapr_core_plug() -Bugzilla: 1744107 -RH-Acked-by: John Snow -RH-Acked-by: David Gibson -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -A recent change in spapr_machine_reset() showed that resetting the compat -mode in spapr_machine_reset() for the boot vCPU and in spapr_cpu_reset() -for all other vCPUs was fragile. The fix was thus to reset the compat mode -for all vCPUs in spapr_machine_reset(), but we still have to propagate -it to hot-plugged CPUs. This is still performed from spapr_cpu_reset(), -hence resulting in ppc_set_compat() being called twice for every vCPU at -machine reset. Apart from wasting cycles, which isn't really an issue -during machine reset, this seems to indicate that spapr_cpu_reset() isn't -the best place to set the compat mode. - -A natural candidate for CPU-hotplug specific code is spapr_core_plug(). -Also, it sits in the same file as spapr_machine_reset() : this makes -it easier for someone who wants to know when the compat PVR is set. - -Call ppc_set_compat() from there. This doesn't need to be done for -initial vCPUs since the compat PVR is 0 and spapr_machine_reset() sets -the appropriate value later. No need to do this on manually added vCPUS -on the destination QEMU during migration since the compat PVR is -part of the migrated vCPU state. Both conditions can be checked with -spapr_drc_hotplugged(). - -Signed-off-by: Greg Kurz -Message-Id: <156701285312.499757.7807417667750711711.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit b1e815674343a171e51ce447495957e289091e9f) -Signed-off-by: Laurent Vivier - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1744107 -BRANCH: rhel-av-8.1.0/master-4.1.0 -UPSTREAM: Merged -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 24 ++++++++++++++++-------- - hw/ppc/spapr_cpu_core.c | 7 ------- - 2 files changed, 16 insertions(+), 15 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 30bf7bb..41a6070 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -1746,12 +1746,6 @@ static void spapr_machine_reset(MachineState *machine) - spapr_ovec_cleanup(spapr->ov5_cas); - spapr->ov5_cas = spapr_ovec_new(); - -- /* -- * reset compat_pvr for all CPUs -- * as qemu_devices_reset() is called before this, -- * it can't be propagated by spapr_cpu_reset() -- * from the first CPU to all the others -- */ - ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal); - } - -@@ -3826,6 +3820,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - CPUArchId *core_slot; - int index; - bool hotplugged = spapr_drc_hotplugged(dev); -+ int i; - - core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); - if (!core_slot) { -@@ -3859,13 +3854,26 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - core_slot->cpu = OBJECT(dev); - - if (smc->pre_2_10_has_unused_icps) { -- int i; -- - for (i = 0; i < cc->nr_threads; i++) { - cs = CPU(core->threads[i]); - pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index); - } - } -+ -+ /* -+ * Set compatibility mode to match the boot CPU, which was either set -+ * by the machine reset code or by CAS. -+ */ -+ if (hotplugged) { -+ for (i = 0; i < cc->nr_threads; i++) { -+ ppc_set_compat(core->threads[i], POWERPC_CPU(first_cpu)->compat_pvr, -+ &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ } -+ } - } - - static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, -diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index ae43c57..85f2746 100644 ---- a/hw/ppc/spapr_cpu_core.c -+++ b/hw/ppc/spapr_cpu_core.c -@@ -39,13 +39,6 @@ static void spapr_cpu_reset(void *opaque) - * using an RTAS call */ - cs->halted = 1; - -- /* Set compatibility mode to match the boot CPU, which was either set -- * by the machine reset code or by CAS. This should never fail. -- * At startup the value is already set for all the CPUs -- * but we need this when we hotplug a new CPU -- */ -- ppc_set_compat(cpu, POWERPC_CPU(first_cpu)->compat_pvr, &error_abort); -- - env->spr[SPR_HIOR] = 0; - - lpcr = env->spr[SPR_LPCR]; --- -1.8.3.1 - diff --git a/kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch b/kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch deleted file mode 100644 index 8b3c06e..0000000 --- a/kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch +++ /dev/null @@ -1,60 +0,0 @@ -From c8d3479746b17fcdf56b8afb3eccdba2c14578e8 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Fri, 6 Sep 2019 03:58:36 +0100 -Subject: [PATCH 3/6] spapr: Use SHUTDOWN_CAUSE_SUBSYSTEM_RESET for CAS reboots - -RH-Author: David Gibson -Message-id: <20190906035836.23689-1-dgibson@redhat.com> -Patchwork-id: 90293 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] spapr: Use SHUTDOWN_CAUSE_SUBSYSTEM_RESET for CAS reboots -Bugzilla: 1743477 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth -RH-Acked-by: Danilo de Paula - -From: David Gibson - -The sPAPR platform includes feature negotiation between the guest and -platform. That sometimes requires reconfiguring the virtual hardware, and -in some cases that is a complex enough process that we trigger a system -reset to handle it. That interacts badly with -no-reboot - we trigger the -reboot, -no-reboot means we exit and so the guest never gets to try again. - -Eventually we want to get rid of CAS reboots entirely, since they're odd -and irritating for the user. But in the meantime we can fix the -no-reboot -problem by using SHUTDOWN_CAUSE_SUBSYSTEM_RESET which ignores -no-reboot -and seems to be designed for this sort of faux-reset for internal purposes -only. - -Signed-off-by: David Gibson -(cherry picked from commit 9146206eb26c1436c80a7c2ca1e4c5f86b27179d) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1743477 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23395494 -Branch: rhel-av-8.1.0/master-4.1.0 -Testing: Started a guest and verified that -no-reboot no longer - prevents the CAS reboot to negotiate XIVE support from - completing - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_hcall.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c -index 6808d4c..687bb7b 100644 ---- a/hw/ppc/spapr_hcall.c -+++ b/hw/ppc/spapr_hcall.c -@@ -1672,7 +1672,7 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - spapr_ovec_cleanup(ov5_updates); - - if (spapr->cas_reboot) { -- qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); -+ qemu_system_reset_request(SHUTDOWN_CAUSE_SUBSYSTEM_RESET); - } - - return H_SUCCESS; --- -1.8.3.1 - diff --git a/kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch b/kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch deleted file mode 100644 index b9e727b..0000000 --- a/kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch +++ /dev/null @@ -1,103 +0,0 @@ -From b27062f4b3ddf47dea926026e5511f15d5b31320 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 10 Sep 2019 07:04:27 +0100 -Subject: [PATCH 5/6] spapr/pci: Consolidate de-allocation of MSIs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20190910070428.28628-2-dgibson@redhat.com> -Patchwork-id: 90362 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 1/2] spapr/pci: Consolidate de-allocation of MSIs -Bugzilla: 1750200 -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier - -From: Greg Kurz - -When freeing MSIs, we need to: -- remove them from the machine's MSI bitmap -- remove them from the IC backend -- remove them from the PHB's MSI cache - -This is currently open coded in two places in rtas_ibm_change_msi(), -and we're about to need this in spapr_phb_reset() as well. Instead of -duplicating this code again, make it a destroy function for the PHB's -MSI cache. Removing an MSI device from the cache will call the destroy -function internally. - -Signed-off-by: Greg Kurz -Message-Id: <156415227855.1064338.5657793835271464648.stgit@bahia.lan> -Reviewed-by: Cédric Le Goater -Signed-off-by: David Gibson -(cherry picked from commit 078eb6b05b7f962e43d8bc376e0b96cdd550c17a) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1750200 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_pci.c | 24 +++++++++++++++--------- - 1 file changed, 15 insertions(+), 9 deletions(-) - -diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c -index 9003fe9..1ffcfae 100644 ---- a/hw/ppc/spapr_pci.c -+++ b/hw/ppc/spapr_pci.c -@@ -336,10 +336,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, - return; - } - -- if (!smc->legacy_irq_allocation) { -- spapr_irq_msi_free(spapr, msi->first_irq, msi->num); -- } -- spapr_irq_free(spapr, msi->first_irq, msi->num); - if (msi_present(pdev)) { - spapr_msi_setmsg(pdev, 0, false, 0, 0); - } -@@ -409,10 +405,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, - - /* Release previous MSIs */ - if (msi) { -- if (!smc->legacy_irq_allocation) { -- spapr_irq_msi_free(spapr, msi->first_irq, msi->num); -- } -- spapr_irq_free(spapr, msi->first_irq, msi->num); - g_hash_table_remove(phb->msi, &config_addr); - } - -@@ -1806,6 +1798,19 @@ static void spapr_phb_unrealize(DeviceState *dev, Error **errp) - memory_region_del_subregion(get_system_memory(), &sphb->mem32window); - } - -+static void spapr_phb_destroy_msi(gpointer opaque) -+{ -+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); -+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); -+ spapr_pci_msi *msi = opaque; -+ -+ if (!smc->legacy_irq_allocation) { -+ spapr_irq_msi_free(spapr, msi->first_irq, msi->num); -+ } -+ spapr_irq_free(spapr, msi->first_irq, msi->num); -+ g_free(msi); -+} -+ - static void spapr_phb_realize(DeviceState *dev, Error **errp) - { - /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user -@@ -2017,7 +2022,8 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) - spapr_tce_get_iommu(tcet)); - } - -- sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); -+ sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, -+ spapr_phb_destroy_msi); - return; - - unrealize: --- -1.8.3.1 - diff --git a/kvm-spapr-pci-Free-MSIs-during-reset.patch b/kvm-spapr-pci-Free-MSIs-during-reset.patch deleted file mode 100644 index 7be103a..0000000 --- a/kvm-spapr-pci-Free-MSIs-during-reset.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 26879f41a890a93beabefebb19c399561013a615 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 10 Sep 2019 07:04:28 +0100 -Subject: [PATCH 6/6] spapr/pci: Free MSIs during reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20190910070428.28628-3-dgibson@redhat.com> -Patchwork-id: 90363 -O-Subject: [RHEL-AV-8.1 qemu-kvm PATCH 2/2] spapr/pci: Free MSIs during reset -Bugzilla: 1750200 -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier - -From: Greg Kurz - -When the machine is reset, the MSI bitmap is cleared but the allocated -MSIs are not freed. Some operating systems, such as AIX, can detect the -previous configuration and assert. - -Empty the MSI cache, this performs the needed cleanup. - -Signed-off-by: Greg Kurz -Message-Id: <156415228410.1064338.4486161194061636096.stgit@bahia.lan> -Reviewed-by: Cédric Le Goater -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: David Gibson -(cherry picked from commit ea52074d3a1c5fbe70f3014dc1b1f2e7d5ced5de) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1750200 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_pci.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c -index 1ffcfae..128c981 100644 ---- a/hw/ppc/spapr_pci.c -+++ b/hw/ppc/spapr_pci.c -@@ -2078,6 +2078,8 @@ static void spapr_phb_reset(DeviceState *qdev) - if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) { - spapr_phb_vfio_reset(qdev); - } -+ -+ g_hash_table_remove_all(sphb->msi); - } - - static Property spapr_phb_properties[] = { --- -1.8.3.1 - diff --git a/kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch b/kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch deleted file mode 100644 index 493bf77..0000000 --- a/kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 6a7245ed7802dff5479228376a4119e095db33b2 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 11 Sep 2019 09:43:17 +0100 -Subject: [PATCH 1/4] spapr/xive: Mask the EAS when allocating an IRQ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -Message-id: <20190911094317.21266-1-lvivier@redhat.com> -Patchwork-id: 90392 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH] spapr/xive: Mask the EAS when allocating an IRQ -Bugzilla: 1748725 -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Danilo de Paula - -From: Cédric Le Goater - -If an IRQ is allocated and not configured, such as a MSI requested by -a PCI driver, it can be saved in its default state and possibly later -on restored using the same state. If not initially MASKED, KVM will -try to find a matching priority/target tuple for the interrupt and -fail to restore the VM because 0/0 is not a valid target. - -When allocating a IRQ number, the EAS should be set to a sane default : -VALID and MASKED. - -Reported-by: Satheesh Rajendran -Signed-off-by: Cédric Le Goater -Message-Id: <20190813164420.9829-1-clg@kaod.org> -Signed-off-by: David Gibson -(cherry picked from commit f55750e4e4fb35b6a12c81c485f16494e2c61ad2) -Signed-off-by: Laurent Vivier - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1748725 -BRANCH: rhel-av-8.1.0/master-4.1.0 -UPSTREAM: merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23451934 -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/spapr_xive.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c -index 3ae311d..1f9c624 100644 ---- a/hw/intc/spapr_xive.c -+++ b/hw/intc/spapr_xive.c -@@ -534,7 +534,10 @@ bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi) - return false; - } - -- xive->eat[lisn].w |= cpu_to_be64(EAS_VALID); -+ /* -+ * Set default values when allocating an IRQ number -+ */ -+ xive->eat[lisn].w |= cpu_to_be64(EAS_VALID | EAS_MASKED); - if (lsi) { - xive_source_irq_set_lsi(xsrc, lisn); - } --- -1.8.3.1 - diff --git a/kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch b/kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch deleted file mode 100644 index fa9b454..0000000 --- a/kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 3a7d0411addca79192ed60939f55ec019c27a72a Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 8 Oct 2019 05:08:36 +0100 -Subject: [PATCH 4/6] spapr/xive: skip partially initialized vCPUs in presenter -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20191008050836.11479-1-dgibson@redhat.com> -Patchwork-id: 90994 -O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH] spapr/xive: skip partially initialized vCPUs in presenter -Bugzilla: 1754710 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -From: Cédric Le Goater - -When vCPUs are hotplugged, they are added to the QEMU CPU list before -being fully realized. This can crash the XIVE presenter because the -'tctx' pointer is not necessarily initialized when looking for a -matching target. - -These vCPUs are not valid targets for the presenter. Skip them. - -Signed-off-by: Cédric Le Goater -Message-Id: <20191001085722.32755-1-clg@kaod.org> -Signed-off-by: David Gibson -Reviewed-by: Greg Kurz -(cherry picked from commit 627fa61746f70f7c799f08e9048bb6a482402138) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1754710 -Branch: rhel-av-8.1.1 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=23900462 -Testing: Could no longer reproduce bug with brewed qemu - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/xive.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/intc/xive.c b/hw/intc/xive.c -index da148e9..8f639f6 100644 ---- a/hw/intc/xive.c -+++ b/hw/intc/xive.c -@@ -1345,6 +1345,14 @@ static bool xive_presenter_match(XiveRouter *xrtr, uint8_t format, - int ring; - - /* -+ * Skip partially initialized vCPUs. This can happen when -+ * vCPUs are hotplugged. -+ */ -+ if (!tctx) { -+ continue; -+ } -+ -+ /* - * HW checks that the CPU is enabled in the Physical Thread - * Enable Register (PTER). - */ --- -1.8.3.1 - diff --git a/kvm-tests-Use-iothreads-during-iotest-223.patch b/kvm-tests-Use-iothreads-during-iotest-223.patch deleted file mode 100644 index ea52932..0000000 --- a/kvm-tests-Use-iothreads-during-iotest-223.patch +++ /dev/null @@ -1,73 +0,0 @@ -From c03d23733166328e70f98504d7dfaa528e889633 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 9 Oct 2019 14:10:08 +0100 -Subject: [PATCH 6/6] tests: Use iothreads during iotest 223 - -RH-Author: Eric Blake -Message-id: <20191009141008.24439-3-eblake@redhat.com> -Patchwork-id: 91355 -O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 2/2] tests: Use iothreads during iotest 223 -Bugzilla: 1741094 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella - -Doing so catches the bugs we just fixed with NBD not properly using -correct contexts. - -Signed-off-by: Eric Blake -Message-Id: <20190920220729.31801-1-eblake@redhat.com> -(cherry picked from commit 506902c6fa80210b002e30ff33794bfc718b15c6) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/223 | 6 ++++-- - tests/qemu-iotests/223.out | 1 + - 2 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/tests/qemu-iotests/223 b/tests/qemu-iotests/223 -index cc48e78..2ba3d81 100755 ---- a/tests/qemu-iotests/223 -+++ b/tests/qemu-iotests/223 -@@ -2,7 +2,7 @@ - # - # Test reading dirty bitmap over NBD - # --# Copyright (C) 2018 Red Hat, Inc. -+# Copyright (C) 2018-2019 Red Hat, Inc. - # - # This program is free software; you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by -@@ -109,7 +109,7 @@ echo - echo "=== End dirty bitmaps, and start serving image over NBD ===" - echo - --_launch_qemu 2> >(_filter_nbd) -+_launch_qemu -object iothread,id=io0 2> >(_filter_nbd) - - # Intentionally provoke some errors as well, to check error handling - silent= -@@ -117,6 +117,8 @@ _send_qemu_cmd $QEMU_HANDLE '{"execute":"qmp_capabilities"}' "return" - _send_qemu_cmd $QEMU_HANDLE '{"execute":"blockdev-add", - "arguments":{"driver":"qcow2", "node-name":"n", - "file":{"driver":"file", "filename":"'"$TEST_IMG"'"}}}' "return" -+_send_qemu_cmd $QEMU_HANDLE '{"execute":"x-blockdev-set-iothread", -+ "arguments":{"node-name":"n", "iothread":"io0"}}' "return" - _send_qemu_cmd $QEMU_HANDLE '{"execute":"block-dirty-bitmap-disable", - "arguments":{"node":"n", "name":"b"}}' "return" - _send_qemu_cmd $QEMU_HANDLE '{"execute":"nbd-server-add", -diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out -index d5201b2..90cc4b6 100644 ---- a/tests/qemu-iotests/223.out -+++ b/tests/qemu-iotests/223.out -@@ -27,6 +27,7 @@ wrote 2097152/2097152 bytes at offset 2097152 - {"return": {}} - {"return": {}} - {"return": {}} -+{"return": {}} - {"error": {"class": "GenericError", "desc": "NBD server not running"}} - {"return": {}} - {"error": {"class": "GenericError", "desc": "NBD server already running"}} --- -1.8.3.1 - diff --git a/kvm-trace-Clarify-DTrace-SystemTap-help-message.patch b/kvm-trace-Clarify-DTrace-SystemTap-help-message.patch deleted file mode 100644 index a3bfed4..0000000 --- a/kvm-trace-Clarify-DTrace-SystemTap-help-message.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 5d45e3a5d2e2d929095489a37579c3b7fc059450 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Tue, 3 Sep 2019 14:21:10 +0100 -Subject: [PATCH 1/8] trace: Clarify DTrace/SystemTap help message -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Philippe Mathieu-Daudé -Message-id: <20190903142110.25673-2-philmd@redhat.com> -Patchwork-id: 90255 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] trace: Clarify DTrace/SystemTap help message -Bugzilla: 1516220 -RH-Acked-by: John Snow -RH-Acked-by: Peter Xu -RH-Acked-by: Stefan Hajnoczi - -Most tracing backends are implemented within QEMU, except the -DTrace/SystemTap backends. - -One side effect is when running 'qemu -trace help', an incomplete -list of trace events is displayed when using the DTrace/SystemTap -backends. - -This is partly due to trace events registered as modules with -trace_init(), and since the events are not used within QEMU, -the linker optimize and remove the unused modules (which is -OK in this particular case). -Currently only the events compiled in trace-root.o and in the -last trace.o member of libqemuutil.a are linked, resulting in -an incomplete list of events. - -To avoid confusion, improve the help message, recommending to -use the proper systemtap script to display the events list. - -Before: - - $ lm32-softmmu/qemu-system-lm32 -trace help 2>&1 | wc -l - 70 - -After: - - $ lm32-softmmu/qemu-system-lm32 -trace help - Run 'qemu-trace-stap list qemu-system-lm32' to print a list - of names of trace points with the DTrace/SystemTap backends. - - $ qemu-trace-stap list qemu-system-lm32 | wc -l - 1136 - -Signed-off-by: Philippe Mathieu-Daudé -Message-id: 20190823142203.5210-1-philmd@redhat.com -Message-Id: <20190823142203.5210-1-philmd@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 9f591a5d95e1969969632ab44cf35e505c8ddc3b) -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Danilo C. L. de Paula ---- - trace/control.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/trace/control.c b/trace/control.c -index 43fb786..d9cafc1 100644 ---- a/trace/control.c -+++ b/trace/control.c -@@ -165,6 +165,12 @@ void trace_list_events(void) - while ((ev = trace_event_iter_next(&iter)) != NULL) { - fprintf(stderr, "%s\n", trace_event_get_name(ev)); - } -+#ifdef CONFIG_TRACE_DTRACE -+ fprintf(stderr, "This list of names of trace points may be incomplete " -+ "when using the DTrace/SystemTap backends.\n" -+ "Run 'qemu-trace-stap list %s' to print the full list.\n", -+ error_get_progname()); -+#endif - } - - static void do_trace_enable_events(const char *line_buf) --- -1.8.3.1 - diff --git a/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch b/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch deleted file mode 100644 index b59bdfe..0000000 --- a/kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch +++ /dev/null @@ -1,48 +0,0 @@ -From ca4a5e85de406a495512d544c3b2187ac3654e97 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 1 Aug 2019 10:26:47 +0100 -Subject: [PATCH 6/6] virtio: Make disable-legacy/disable-modern compat - properties optional - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190801102647.14173-1-dgilbert@redhat.com> -Patchwork-id: 89849 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 6/5] virtio: Make disable-legacy/disable-modern compat properties optional -Bugzilla: 1719649 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Markus Armbruster -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Eduardo Habkost - -From: "Dr. David Alan Gilbert" - -Upstream 53921bfdce3 by Eduardo made some hw_compat_2_6 entries optional -to fix a bug where non-transitional devices failed to be created on -old upstream machine types. Do the same fix to our old downstream -machines. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index e2f812a..c796b54 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -152,8 +152,9 @@ GlobalProperty hw_compat_rhel_7_2[] = { - { "fw_cfg_mem", "dma_enabled", "off" }, - { "fw_cfg_io", "dma_enabled", "off" }, - { "isa-fdc", "fallback", "144" }, -- { "virtio-pci", "disable-modern", "on" }, -- { "virtio-pci", "disable-legacy", "off" }, -+ /* Optional because not all virtio-pci devices support legacy mode */ -+ { "virtio-pci", "disable-modern", "on", .optional = true }, -+ { "virtio-pci", "disable-legacy", "off", .optional = true }, - { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, - { "virtio-pci", "page-per-vq", "on" }, - /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ --- -1.8.3.1 - diff --git a/kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch b/kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch deleted file mode 100644 index 3d61e36..0000000 --- a/kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch +++ /dev/null @@ -1,92 +0,0 @@ -From df7d91dda24b27c89ff8ce1b9cc72c7ed7350be2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Fri, 13 Sep 2019 14:16:25 +0100 -Subject: [PATCH 3/4] virtio-blk: Cancel the pending BH when the dataplane is - reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Philippe Mathieu-Daudé -Message-id: <20190913141625.12521-2-philmd@redhat.com> -Patchwork-id: 90453 -O-Subject: [RHEL-7.7.z qemu-kvm-rhev + RHEL-8.1.0 qemu-kvm + RHEL-AV-8.1.0 qemu-kvm PATCH v2 1/1] virtio-blk: Cancel the pending BH when the dataplane is reset -Bugzilla: 1717321 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Danilo de Paula - -When 'system_reset' is called, the main loop clear the memory -region cache before the BH has a chance to execute. Later when -the deferred function is called, some assumptions that were -made when scheduling them are no longer true when they actually -execute. - -This is what happens using a virtio-blk device (fresh RHEL7.8 install): - - $ (sleep 12.3; echo system_reset; sleep 12.3; echo system_reset; sleep 1; echo q) \ - | qemu-system-x86_64 -m 4G -smp 8 -boot menu=on \ - -device virtio-blk-pci,id=image1,drive=drive_image1 \ - -drive file=/var/lib/libvirt/images/rhel78.qcow2,if=none,id=drive_image1,format=qcow2,cache=none \ - -device virtio-net-pci,netdev=net0,id=nic0,mac=52:54:00:c4:e7:84 \ - -netdev tap,id=net0,script=/bin/true,downscript=/bin/true,vhost=on \ - -monitor stdio -serial null -nographic - (qemu) system_reset - (qemu) system_reset - (qemu) qemu-system-x86_64: hw/virtio/virtio.c:225: vring_get_region_caches: Assertion `caches != NULL' failed. - Aborted - - (gdb) bt - Thread 1 (Thread 0x7f109c17b680 (LWP 10939)): - #0 0x00005604083296d1 in vring_get_region_caches (vq=0x56040a24bdd0) at hw/virtio/virtio.c:227 - #1 0x000056040832972b in vring_avail_flags (vq=0x56040a24bdd0) at hw/virtio/virtio.c:235 - #2 0x000056040832d13d in virtio_should_notify (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1648 - #3 0x000056040832d1f8 in virtio_notify_irqfd (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1662 - #4 0x00005604082d213d in notify_guest_bh (opaque=0x56040a243ec0) at hw/block/dataplane/virtio-blk.c:75 - #5 0x000056040883dc35 in aio_bh_call (bh=0x56040a243f10) at util/async.c:90 - #6 0x000056040883dccd in aio_bh_poll (ctx=0x560409161980) at util/async.c:118 - #7 0x0000560408842af7 in aio_dispatch (ctx=0x560409161980) at util/aio-posix.c:460 - #8 0x000056040883e068 in aio_ctx_dispatch (source=0x560409161980, callback=0x0, user_data=0x0) at util/async.c:261 - #9 0x00007f10a8fca06d in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 - #10 0x0000560408841445 in glib_pollfds_poll () at util/main-loop.c:215 - #11 0x00005604088414bf in os_host_main_loop_wait (timeout=0) at util/main-loop.c:238 - #12 0x00005604088415c4 in main_loop_wait (nonblocking=0) at util/main-loop.c:514 - #13 0x0000560408416b1e in main_loop () at vl.c:1923 - #14 0x000056040841e0e8 in main (argc=20, argv=0x7ffc2c3f9c58, envp=0x7ffc2c3f9d00) at vl.c:4578 - -Fix this by cancelling the BH when the virtio dataplane is stopped. - -[This is version of the patch was modified as discussed with Philippe on -the mailing list thread. ---Stefan] - -Reported-by: Yihuang Yu -Suggested-by: Stefan Hajnoczi -Fixes: https://bugs.launchpad.net/qemu/+bug/1839428 -Signed-off-by: Philippe Mathieu-Daudé -Message-Id: <20190816171503.24761-1-philmd@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit ebb6ff25cd888a52a64a9adc3692541c6d1d9a42) -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/dataplane/virtio-blk.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 158c78f..5fea76d 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -297,6 +297,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } - -+ qemu_bh_cancel(s->bh); -+ notify_guest_bh(s); /* final chance to notify guest */ -+ - /* Clean up guest notifier (irq) */ - k->set_guest_notifiers(qbus->parent, nvqs, false); - --- -1.8.3.1 - diff --git a/kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch b/kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch deleted file mode 100644 index 2cf7fe8..0000000 --- a/kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 6b292920dbdd463bb80b82bef2063623a8e2da17 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 27 Sep 2019 11:46:41 +0100 -Subject: [PATCH 2/2] virtio-blk: schedule virtio_notify_config to run on main - context -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Sergio Lopez Pascual -Message-id: <20190927114641.20992-2-slp@redhat.com> -Patchwork-id: 90907 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] virtio-blk: schedule virtio_notify_config to run on main context -Bugzilla: 1744955 -RH-Acked-by: Eric Blake -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Max Reitz - -virtio_notify_config() needs to acquire the global mutex, which isn't -allowed from an iothread, and may lead to a deadlock like this: - - - main thead - * Has acquired: qemu_global_mutex. - * Is trying the acquire: iothread AioContext lock via - AIO_WAIT_WHILE (after aio_poll). - - - iothread - * Has acquired: AioContext lock. - * Is trying to acquire: qemu_global_mutex (via - virtio_notify_config->prepare_mmio_access). - -If virtio_blk_resize() is called from an iothread, schedule -virtio_notify_config() to be run in the main context BH. - -[Removed unnecessary newline as suggested by Kevin Wolf -. ---Stefan] - -Signed-off-by: Sergio Lopez -Reviewed-by: Kevin Wolf -Message-id: 20190916112411.21636-1-slp@redhat.com -Message-Id: <20190916112411.21636-1-slp@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit f9a7e3698a737ee75a7b0af34203303df982550f) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/virtio-blk.c | 16 +++++++++++++++- - 1 file changed, 15 insertions(+), 1 deletion(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index cbb3729..0d9adcd 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -16,6 +16,7 @@ - #include "qemu/iov.h" - #include "qemu/module.h" - #include "qemu/error-report.h" -+#include "qemu/main-loop.h" - #include "trace.h" - #include "hw/block/block.h" - #include "sysemu/blockdev.h" -@@ -1082,11 +1083,24 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, - return 0; - } - -+static void virtio_resize_cb(void *opaque) -+{ -+ VirtIODevice *vdev = opaque; -+ -+ assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -+ virtio_notify_config(vdev); -+} -+ - static void virtio_blk_resize(void *opaque) - { - VirtIODevice *vdev = VIRTIO_DEVICE(opaque); - -- virtio_notify_config(vdev); -+ /* -+ * virtio_notify_config() needs to acquire the global mutex, -+ * so it can't be called from an iothread. Instead, schedule -+ * it to be run in the main context BH. -+ */ -+ aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); - } - - static const BlockDevOps virtio_block_ops = { --- -1.8.3.1 - diff --git a/kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch b/kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch deleted file mode 100644 index 0ac47c3..0000000 --- a/kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 6df1559af7cd65e3faf7c61a2bb8f02667767ad6 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 31 Jul 2019 15:08:12 +0100 -Subject: [PATCH 3/6] x86 machine types: Fixup dynamic sysbus entries - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190731150814.29571-4-dgilbert@redhat.com> -Patchwork-id: 89814 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 3/5] x86 machine types: Fixup dynamic sysbus entries -Bugzilla: 1719649 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Markus Armbruster -RH-Acked-by: Cornelia Huck - -From: "Dr. David Alan Gilbert" - -We're missing a couple of upstream changes, add them for -consistency: - - v2.11.0-824-gef18310d54 Shouldn't have any effect - v2.12.0-1411-g94692dcd71 Should allow us to use RAMFB if we enable - it - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc_piix.c | 1 + - hw/i386/pc_q35.c | 4 +++- - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index c86c48c..3b9ba95 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1037,6 +1037,7 @@ static void pc_machine_rhel7_options(MachineClass *m) - pcmc->default_nic_model = "e1000"; - m->default_display = "std"; - m->no_parallel = 1; -+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - m->alias = "pc"; - m->is_default = 1; -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 068813d..edf8e54 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -554,7 +554,9 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - m->default_display = "std"; - m->no_floppy = 1; - m->no_parallel = 1; -- machine_class_allow_dynamic_sysbus_dev(m, TYPE_SYS_BUS_DEVICE); -+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); -+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); -+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); - m->alias = "q35"; - m->max_cpus = 384; - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); --- -1.8.3.1 - diff --git a/kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch b/kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch deleted file mode 100644 index 73eb148..0000000 --- a/kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 0784125ba3ccd72a590d210cf3f52d80e96b4263 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 31 Jul 2019 15:08:13 +0100 -Subject: [PATCH 4/6] x86 machine types: add pc-q35-rhel8.1.0 - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190731150814.29571-5-dgilbert@redhat.com> -Patchwork-id: 89813 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 4/5] x86 machine types: add pc-q35-rhel8.1.0 -Bugzilla: 1719649 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Markus Armbruster -RH-Acked-by: Cornelia Huck - -From: "Dr. David Alan Gilbert" - -Create the 8.1.0 machine type for q35 and update the _options -functions to keep compatibility. - -Note: - We don't have to copy the kernel_irqchip_split from 4_0 since it - immediately got reverted in 4_0_1 - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc_piix.c | 6 ++++++ - hw/i386/pc_q35.c | 24 +++++++++++++++++++++++- - 2 files changed, 29 insertions(+), 1 deletion(-) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 3b9ba95..bf6b444 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1051,9 +1051,15 @@ static void pc_init_rhel760(MachineState *machine) - - static void pc_machine_rhel760_options(MachineClass *m) - { -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_machine_rhel7_options(m); - m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; - m->async_pf_vmexit_disable = true; -+ m->smbus_no_migration_support = true; -+ pcmc->pvh_enabled = false; -+ pcmc->default_cpu_version = CPU_VERSION_LEGACY; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); -+ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); - compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); - } -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index edf8e54..b6d0bb3 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -554,6 +554,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - m->default_display = "std"; - m->no_floppy = 1; - m->no_parallel = 1; -+ pcmc->default_cpu_version = 1; - machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); - machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); - machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); -@@ -562,6 +563,20 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - -+static void pc_q35_init_rhel810(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel810_options(MachineClass *m) -+{ -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, -+ pc_q35_machine_rhel810_options); -+ - static void pc_q35_init_rhel800(MachineState *machine) - { - pc_q35_init(machine); -@@ -569,8 +584,15 @@ static void pc_q35_init_rhel800(MachineState *machine) - - static void pc_q35_machine_rhel800_options(MachineClass *m) - { -- pc_q35_machine_rhel_options(m); -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel810_options(m); - m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; -+ m->smbus_no_migration_support = true; -+ m->alias = NULL; -+ pcmc->pvh_enabled = false; -+ pcmc->default_cpu_version = CPU_VERSION_LEGACY; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); -+ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); - } - - DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, --- -1.8.3.1 - diff --git a/kvm-x86-machine-types-pc_rhel_8_0_compat.patch b/kvm-x86-machine-types-pc_rhel_8_0_compat.patch deleted file mode 100644 index cb2371f..0000000 --- a/kvm-x86-machine-types-pc_rhel_8_0_compat.patch +++ /dev/null @@ -1,88 +0,0 @@ -From e42808c29bdcebe62cdb5cdb1de4dc0910dd21d9 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 31 Jul 2019 15:08:10 +0100 -Subject: [PATCH 1/6] x86 machine types: pc_rhel_8_0_compat - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190731150814.29571-2-dgilbert@redhat.com> -Patchwork-id: 89816 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 1/5] x86 machine types: pc_rhel_8_0_compat -Bugzilla: 1719649 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Markus Armbruster -RH-Acked-by: Cornelia Huck - -From: "Dr. David Alan Gilbert" - -Create the pc_rhel_8_0_compat array based off pc_compat_3_1. -It's the same except for a chunk of mpx=on entries that -we already put in the pc_rhel_7_6_compat. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc.c | 33 +++++++++++++++++++++++++++++++++ - include/hw/i386/pc.h | 3 +++ - 2 files changed, 36 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index b3d2d1e..f19fed4 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -366,6 +366,39 @@ GlobalProperty pc_rhel_compat[] = { - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_8_0_compat[] = { -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "intel-iommu", "dma-drain", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, -+ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, -+ /* pc_rhel_8_0_compat from pc_compat_3_1 */ -+ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, -+}; -+const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); -+ - /* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: - * all of the 2_12 stuff was already in 7.6 from bz 1481253 - * x-migrate-smi-count comes from PC_COMPAT_2_11 but -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 605cc71..2f24333 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -368,6 +368,9 @@ extern const size_t pc_compat_1_4_len; - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_8_0_compat[]; -+extern const size_t pc_rhel_8_0_compat_len; -+ - extern GlobalProperty pc_rhel_7_6_compat[]; - extern const size_t pc_rhel_7_6_compat_len; - --- -1.8.3.1 - diff --git a/kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch b/kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch deleted file mode 100644 index 781c3af..0000000 --- a/kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 9de83a880cf0e397db7c8bfdbf009f137c8eaf8a Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 31 Jul 2019 15:08:11 +0100 -Subject: [PATCH 2/6] x86 machine types: q35: Fixup units_per_default_bus - -RH-Author: Dr. David Alan Gilbert -Message-id: <20190731150814.29571-3-dgilbert@redhat.com> -Patchwork-id: 89818 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH v3 2/5] x86 machine types: q35: Fixup units_per_default_bus -Bugzilla: 1719649 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Markus Armbruster -RH-Acked-by: Cornelia Huck - -x86 machine types: q35: Fixup units_per_default_bus - -We omitted the line: - m->units_per_default_bus = 1; - -in our rebase from 2.1.2 (which doesn't have ->units_per_default_bus) -to 2.3.0 (which does). Specifically, in commit ed6d215ef93. - -It's safe for us to add, because: - a) It changes the behaviour when you don't specify a bus/device - number, however libvirt always specifies it, so it's always - safe downstream for us with libvirt which we require. - - b) The behaviour change isn't actually seen by the guest. i.e. - the change from having two SATA devices from: - ide0-hd0, ide0-hd1 - to - ide0-hd0, ide1-hd0 - - is hidden because by the time it gets through the SATA code - it ends up back as two single SATA devices on their own bus. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc_q35.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 4959ed3..068813d 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -549,6 +549,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pcmc->default_nic_model = "e1000e"; - m->family = "pc_q35_Z"; -+ m->units_per_default_bus = 1; - m->default_machine_opts = "firmware=bios-256k.bin"; - m->default_display = "std"; - m->no_floppy = 1; --- -1.8.3.1 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index d8c60ca..72a794f 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -1,5 +1,6 @@ %global SLOF_gittagdate 20170724 %global SLOF_gittagcommit 89f519f +%global rcversion -rc1 %global have_usbredir 1 %global have_spice 1 @@ -66,8 +67,8 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 4.1.0 -Release: 14%{?dist} +Version: 4.2.0 +Release: 0%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -76,7 +77,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-4.1.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-4.2.0-rc1.tar.xz # KSM control scripts Source4: ksm.service @@ -104,134 +105,23 @@ Source35: udev-kvm-check.c Source36: README.tests -Patch0004: 0004-Initial-redhat-build.patch -Patch0005: 0005-Enable-disable-devices-for-RHEL.patch -Patch0006: 0006-Machine-type-related-general-changes.patch -Patch0007: 0007-Add-aarch64-machine-types.patch -Patch0008: 0008-Add-ppc64-machine-types.patch -Patch0009: 0009-Add-s390x-machine-types.patch -Patch0010: 0010-Add-x86_64-machine-types.patch -Patch0011: 0011-Enable-make-check.patch -Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0013: 0013-Add-support-statement-to-help-output.patch -Patch0014: 0014-globally-limit-the-maximum-number-of-CPUs.patch -Patch0015: 0015-Add-support-for-simpletrace.patch -Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0017: 0017-usb-xhci-Fix-PCI-capability-order.patch -Patch0018: 0018-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0019: 0019-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0020: 0020-pc-Don-t-make-die-id-mandatory-unless-necessary.patch -# For bz#1719649 - 8.1 machine type for x86 -Patch21: kvm-x86-machine-types-pc_rhel_8_0_compat.patch -# For bz#1719649 - 8.1 machine type for x86 -Patch22: kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch -# For bz#1719649 - 8.1 machine type for x86 -Patch23: kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch -# For bz#1719649 - 8.1 machine type for x86 -Patch24: kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch -# For bz#1719649 - 8.1 machine type for x86 -Patch25: kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch -# For bz#1719649 - 8.1 machine type for x86 -Patch26: kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch -# For bz#1738626 - Disable memfd in QEMU -# For bz#1740797 - Disable memfd in QEMU -Patch27: kvm-RHEL-disable-hostmem-memfd.patch -# For bz#1693772 - [IBM zKVM] RHEL AV 8.1.0 machine type update for s390x -Patch28: kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch -# For bz#1693772 - [IBM zKVM] RHEL AV 8.1.0 machine type update for s390x -Patch29: kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch -# For bz#1744170 - [IBM Power] New 8.1.0 machine type for pseries -Patch31: kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch -# For bz#1743142 - Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28) -Patch32: kvm-memory-Refactor-memory_region_clear_coalescing.patch -# For bz#1743142 - Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28) -Patch33: kvm-memory-Split-zones-when-do-coalesced_io_del.patch -# For bz#1743142 - Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28) -Patch34: kvm-memory-Remove-has_coalesced_range-counter.patch -# For bz#1743142 - Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28) -Patch35: kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch -# For bz#1516220 - -trace help prints an incomplete list of trace events -Patch36: kvm-trace-Clarify-DTrace-SystemTap-help-message.patch -# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then -Patch37: kvm-socket-Add-backlog-parameter-to-socket_listen.patch -# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then -Patch38: kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch -# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then -Patch39: kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch -# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then -Patch40: kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch -# For bz#1726898 - Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then -Patch41: kvm-multifd-Use-number-of-channels-as-listen-backlog.patch -# For bz#1744107 - Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'" -Patch42: kvm-pseries-Fix-compat_pvr-on-reset.patch -# For bz#1744107 - Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'" -Patch43: kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch -# For bz#1747836 - Call traces after guest migration due to incorrect handling of the timebase -Patch44: kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch -# For bz#1746790 - qemu core dump while migrate from RHEL7.6 to RHEL8.1 -Patch45: kvm-ehci-fix-queue-dev-null-ptr-dereference.patch -# For bz#1743477 - Since bd94bc06479a "spapr: change default interrupt mode to 'dual'", QEMU resets the machine to select the appropriate interrupt controller. And -no-reboot prevents that. -Patch46: kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch -# For bz#1749134 - I/O error when virtio-blk disk is backed by a raw image on 4k disk -Patch47: kvm-file-posix-Handle-undetectable-alignment.patch -# For bz#1749134 - I/O error when virtio-blk disk is backed by a raw image on 4k disk -Patch48: kvm-block-posix-Always-allocate-the-first-block.patch -# For bz#1749134 - I/O error when virtio-blk disk is backed by a raw image on 4k disk -Patch49: kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch -# For bz#1734316 - multifd migration does not honour speed limits, consumes entire bandwidth of NIC -Patch50: kvm-migration-always-initialise-ram_counters-for-a-new-m.patch -# For bz#1734316 - multifd migration does not honour speed limits, consumes entire bandwidth of NIC -Patch51: kvm-migration-add-qemu_file_update_transfer-interface.patch -# For bz#1734316 - multifd migration does not honour speed limits, consumes entire bandwidth of NIC -Patch52: kvm-migration-add-speed-limit-for-multifd-migration.patch -# For bz#1734316 - multifd migration does not honour speed limits, consumes entire bandwidth of NIC -Patch53: kvm-migration-update-ram_counters-for-multifd-sync-packe.patch -# For bz#1750200 - [RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free) -Patch54: kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch -# For bz#1750200 - [RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free) -Patch55: kvm-spapr-pci-Free-MSIs-during-reset.patch -# For bz#1748725 - [ppc][migration][v6.3-rc1-p1ce8930]basic migration failed with "qemu-kvm: KVM_SET_DEVICE_ATTR failed: Group 3 attr 0x0000000000001309: Device or resource busy" -Patch56: kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch -# For bz#1746267 - qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed -Patch57: kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch -# For bz#1717321 - qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot -Patch58: kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch -# For bz#1749737 - CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8] -Patch59: kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch -# For bz#1746631 - Qemu core dump when do block commit under stress -Patch60: kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch -# For bz#1724008 - QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed" -# For bz#1736788 - QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off -Patch61: kvm-hostmem-file-fix-pmem-file-size-check.patch -# For bz#1724008 - QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed" -# For bz#1736788 - QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off -Patch62: kvm-memory-fetch-pmem-size-in-get_file_size.patch -# For bz#1753992 - core dump when testing persistent reservation in guest -Patch63: kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch -# For bz#1745922 - Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase' -Patch64: kvm-block-Use-QEMU_IS_ALIGNED.patch -# For bz#1745922 - Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase' -Patch65: kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch -# For bz#1745922 - Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase' -Patch66: kvm-block-qcow2-refactor-encryption-code.patch -# For bz#1745922 - Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase' -Patch67: kvm-qemu-iotests-Add-test-for-bz-1745922.patch -# For bz#1748253 - QEMU crashes (core dump) when using the integrated NDB server with data-plane -Patch68: kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch -# For bz#1744955 - Qemu hang when block resize a qcow2 image -Patch69: kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch -# For bz#1756413 - backport support for transactionable block-dirty-bitmap-remove for incremental backup support -Patch70: kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch -# For bz#1756413 - backport support for transactionable block-dirty-bitmap-remove for incremental backup support -Patch71: kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch -# For bz#1756413 - backport support for transactionable block-dirty-bitmap-remove for incremental backup support -Patch72: kvm-iotests-test-bitmap-moving-inside-254.patch -# For bz#1754710 - qemu core dumped when hotpluging vcpus -Patch73: kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch -# For bz#1741094 - [Upstream]Incremental backup: Qemu coredump when expose an active bitmap via pull mode(data plane enable) -Patch74: kvm-nbd-Grab-aio-context-lock-in-more-places.patch -# For bz#1741094 - [Upstream]Incremental backup: Qemu coredump when expose an active bitmap via pull mode(data plane enable) -Patch75: kvm-tests-Use-iothreads-during-iotest-223.patch +Patch0005: 0005-Initial-redhat-build.patch +Patch0006: 0006-Enable-disable-devices-for-RHEL.patch +Patch0007: 0007-Machine-type-related-general-changes.patch +Patch0008: 0008-Add-aarch64-machine-types.patch +Patch0009: 0009-Add-ppc64-machine-types.patch +Patch0010: 0010-Add-s390x-machine-types.patch +Patch0011: 0011-Add-x86_64-machine-types.patch +Patch0012: 0012-Enable-make-check.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch +Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch +Patch0016: 0016-Add-support-for-simpletrace.patch +Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch +Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch BuildRequires: wget BuildRequires: rpm-build @@ -517,7 +407,7 @@ the Secure Shell (SSH) protocol. %prep -%setup -n qemu-%{version} +%setup -n qemu-%{version}%{rcversion} %autopatch -p1 %build @@ -808,8 +698,6 @@ mkdir -p $RPM_BUILD_ROOT%{_bindir} install -c -m 0755 qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 -install -m 0644 qemu-ga.8 ${RPM_BUILD_ROOT}%{_mandir}/man8/ - install -m 0755 qemu-kvm $RPM_BUILD_ROOT%{_libexecdir}/ install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ @@ -838,7 +726,7 @@ mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py mkdir -p $RPM_BUILD_ROOT%{qemudocdir} -install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} Changelog README README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} Changelog README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* @@ -875,6 +763,8 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp +rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so + %ifarch s390x # Use the s390-ccw.img that we've just built, not the pre-built one install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ @@ -882,10 +772,6 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img %endif -%ifnarch %{power64} - rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/spapr-rtas.bin -%endif - %ifnarch x86_64 rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin @@ -1026,7 +912,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %defattr(-,root,root) %dir %{qemudocdir} %doc %{qemudocdir}/Changelog -%doc %{qemudocdir}/README +%doc %{qemudocdir}/README.rst %doc %{qemudocdir}/qemu-doc.html %doc %{qemudocdir}/COPYING %doc %{qemudocdir}/COPYING.LIB @@ -1108,9 +994,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/icons/* %{_datadir}/%{name}/linuxboot_dma.bin %{_datadir}/%{name}/dump-guest-memory.py* -%ifarch %{power64} - %{_datadir}/%{name}/spapr-rtas.bin -%endif %{_libexecdir}/qemu-kvm %{_datadir}/systemtap/tapset/qemu-kvm.stp %{_datadir}/systemtap/tapset/qemu-kvm-log.stp @@ -1141,7 +1024,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files -n qemu-guest-agent %defattr(-,root,root,-) -%doc COPYING README +%doc COPYING README.rst %{_bindir}/qemu-ga %{_mandir}/man8/qemu-ga.8* %{_unitdir}/qemu-guest-agent.service @@ -1173,19 +1056,17 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Tue Nov 12 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 -- kvm-blockdev-reduce-aio_context-locked-sections-in-bitma.patch [bz#1756413] -- kvm-qapi-implement-block-dirty-bitmap-remove-transaction.patch [bz#1756413] -- kvm-iotests-test-bitmap-moving-inside-254.patch [bz#1756413] -- kvm-spapr-xive-skip-partially-initialized-vCPUs-in-prese.patch [bz#1754710] -- kvm-nbd-Grab-aio-context-lock-in-more-places.patch [bz#1741094] -- kvm-tests-Use-iothreads-during-iotest-223.patch [bz#1741094] -- Resolves: bz#1741094 - ([Upstream]Incremental backup: Qemu coredump when expose an active bitmap via pull mode(data plane enable)) -- Resolves: bz#1754710 - (qemu core dumped when hotpluging vcpus) -- Resolves: bz#1756413 - (backport support for transactionable block-dirty-bitmap-remove for incremental backup support) +* Fri Nov 15 2019 Danilo Cesar Lemes de Paula - 4.2.0-0.el8 +- Rebase to 4.2 + +* Tue Oct 29 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1751934] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1764721] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1764721] +- Resolves: bz#1751934 + (Fail to install guest when xfs is the host filesystem) +- Resolves: bz#1764721 + (qcow2 image corruption due to incorrect locking in preallocation detection) * Fri Sep 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-13.el8 - kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch [bz#1748253] diff --git a/sources b/sources index b509b83..310b89f 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-4.1.0.tar.xz) = 82fd51702a7b9b1b00b2f1bd3b4a832b80249018dbba1add0b0a73e7d4bee452afd45574b4d8df7ce4477d8711f3bda4ca072a1a6de25895c93eb21cf78fc4b2 +SHA512 (qemu-4.2.0-rc1.tar.xz) = 8ad5e0472fd384a9ba03b2e8fbb1e887169abb47a50a3f130b1943b39f45677a9e65ca5d1deb96338a5b3c3953db67f50e194a6763e9121c0eb5f620896162a9 From 320ac89d3fb7864c3d7d416ed8b71269591e5169 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 19 Nov 2019 11:49:41 +0000 Subject: [PATCH 057/195] * Thu Nov 19 2019 Danilo Cesar Lemes de Paula - 4.2.0-1.el8 - 0023-Temporarily-update-VERSION-to-8.2.0.patch [bz#1773397] - Resolves: bz#1773397 (QEMU emulator version is "4.1.91" for qemu-kvm-4.2.0-0.module+el8.2.0+471) - Resoves: bz#1773392 ([ppc] Need to rebase SLOF image for qemu-kvm-4.2) --- ...-Temporarily-update-VERSION-to-8.2.0.patch | 25 +++++++++++++++++++ qemu-kvm.spec | 14 ++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 0023-Temporarily-update-VERSION-to-8.2.0.patch diff --git a/0023-Temporarily-update-VERSION-to-8.2.0.patch b/0023-Temporarily-update-VERSION-to-8.2.0.patch new file mode 100644 index 0000000..1350598 --- /dev/null +++ b/0023-Temporarily-update-VERSION-to-8.2.0.patch @@ -0,0 +1,25 @@ +From 1afa318b8a37aa999221ad4afa01e14a242f7476 Mon Sep 17 00:00:00 2001 +From: "Danilo C. L. de Paula" +Date: Tue, 19 Nov 2019 08:41:33 -0300 +Subject: Temporarily update VERSION to 8.2.0 + +rhbz#1773397 + +This will change when the official release appears. +But, to make qemu-kvm --version happy, we need to fix this now. + +Signed-off-by: Danilo C. L. de Paula +--- + VERSION | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/VERSION b/VERSION +index cfcbdd75dd..6aba2b245a 100644 +--- a/VERSION ++++ b/VERSION +@@ -1 +1 @@ +-4.1.91 ++4.2.0 +-- +2.21.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 72a794f..5a91c0c 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -1,5 +1,5 @@ -%global SLOF_gittagdate 20170724 -%global SLOF_gittagcommit 89f519f +%global SLOF_gittagdate 20191022 +%global SLOF_gittagcommit 899d9883 %global rcversion -rc1 %global have_usbredir 1 @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 0%{?dist} +Release: 1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -122,6 +122,7 @@ Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +Patch0023: 0023-Temporarily-update-VERSION-to-8.2.0.patch BuildRequires: wget BuildRequires: rpm-build @@ -1056,6 +1057,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Nov 19 2019 Danilo Cesar Lemes de Paula - 4.2.0-1.el8 +- 0023-Temporarily-update-VERSION-to-8.2.0.patch [bz#1773397] +- Resolves: bz#1773397 + (QEMU emulator version is "4.1.91" for qemu-kvm-4.2.0-0.module+el8.2.0+471) +- Resoves: bz#1773392 + ([ppc] Need to rebase SLOF image for qemu-kvm-4.2) + * Fri Nov 15 2019 Danilo Cesar Lemes de Paula - 4.2.0-0.el8 - Rebase to 4.2 From e9e1d3955adc27076154d9c899100c70dabe404b Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 9 Dec 2019 20:39:05 +0000 Subject: [PATCH 058/195] rebase to qemu-kvm-4.2.0-rc4 --- .gitignore | 1 + ...at-Adding-slirp-to-the-exploded-tree.patch | 16352 ++++++++++++++++ 0005-Initial-redhat-build.patch | 29 +- 0006-Enable-disable-devices-for-RHEL.patch | 245 +- ...Machine-type-related-general-changes.patch | 56 +- 0008-Add-aarch64-machine-types.patch | 54 +- 0009-Add-ppc64-machine-types.patch | 34 +- 0010-Add-s390x-machine-types.patch | 8 +- 0011-Add-x86_64-machine-types.patch | 58 +- 0012-Enable-make-check.patch | 24 +- ...mber-of-devices-that-can-be-assigned.patch | 12 +- ...Add-support-statement-to-help-output.patch | 6 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 10 +- 0016-Add-support-for-simpletrace.patch | 16 +- ...documentation-instead-of-qemu-system.patch | 14 +- 0018-usb-xhci-Fix-PCI-capability-order.patch | 6 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 6 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 6 +- ...er-m_free-might-read-pointers-from-a.patch | 6 +- ...-Temporarily-update-VERSION-to-8.2.0.patch | 11 +- qemu-kvm.spec | 18 +- sources | 2 +- 22 files changed, 16689 insertions(+), 285 deletions(-) create mode 100644 0001-redhat-Adding-slirp-to-the-exploded-tree.patch diff --git a/.gitignore b/.gitignore index eed8b13..356cc4d 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ /qemu-4.1.0-rc4.tar.xz /qemu-4.1.0.tar.xz /qemu-4.2.0-rc1.tar.xz +/qemu-4.2.0-rc4.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch new file mode 100644 index 0000000..218f66f --- /dev/null +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -0,0 +1,16352 @@ +From 882cfbbb1d29840876617233781c95e821c203be Mon Sep 17 00:00:00 2001 +From: Danilo de Paula +Date: Sat, 7 Sep 2019 02:07:56 +0100 +Subject: redhat: Adding slirp to the exploded tree + +RH-Author: Danilo de Paula +Message-id: <20190907020756.8619-1-ddepaula@redhat.com> +Patchwork-id: 90309 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] redhat: Adding slirp to the exploded tree +Bugzilla: +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Wainer dos Santos Moschetta + +Until qemu-kvm-3.1 slirp used to live as a regular folder in qemu-kvm. +After that it got moved into its own submodule. Which means it's not +part of the qemu-kvm git tree anymore. + +This passed unoticed for RHEL-AV-8.0.1 and 8.1.0 because qemu still ships +the code in the tarball. That's why scratch builds still works (it's based in +the tarball content). + +As we're receiving some CVE's against slirp, we need a way to patch +slirp in RHEL-8.1.0 without handling as a separate package (as we do for +firmwares). + +The simplest solution is to copy the slirp folder from the tarball into the +exploded tree. + +To be able to do that, I had to make some changes: + +slirp needs to be removed from .gitmodules, otherwise git complains +about files on it. + +Since "make -C redhat rh-brew" uses the tarball and apply all the +patches on top of it, we need to remove the folder from the tarball before applying +the patch (because we are actually re-applying them). + +We also need to use --ignore-submodule while generating the patches for +scratch-build, otherwise it will include some weird definition of the +slirp folder in the patch, something that /usr/bin/patch gets mad with. + +After that I compared the patch list, after and before this change, and +saw no major differences. + +This is an exploded-tree-only change and shouldn't be applied to dist-git. + +Signed-off-by: Danilo C. L. de Paula +--- + .gitmodules | 3 - + slirp/.clang-format | 58 ++ + slirp/.gitignore | 10 + + slirp/.gitlab-ci.yml | 24 + + slirp/COPYRIGHT | 62 ++ + slirp/Makefile | 60 ++ + slirp/README.md | 60 ++ + slirp/meson.build | 127 ++++ + slirp/src/arp_table.c | 91 +++ + slirp/src/bootp.c | 370 ++++++++++ + slirp/src/bootp.h | 129 ++++ + slirp/src/cksum.c | 179 +++++ + slirp/src/debug.h | 51 ++ + slirp/src/dhcpv6.c | 225 ++++++ + slirp/src/dhcpv6.h | 68 ++ + slirp/src/dnssearch.c | 311 ++++++++ + slirp/src/if.c | 213 ++++++ + slirp/src/if.h | 21 + + slirp/src/ip.h | 242 ++++++ + slirp/src/ip6.h | 214 ++++++ + slirp/src/ip6_icmp.c | 434 +++++++++++ + slirp/src/ip6_icmp.h | 219 ++++++ + slirp/src/ip6_input.c | 78 ++ + slirp/src/ip6_output.c | 39 + + slirp/src/ip_icmp.c | 477 ++++++++++++ + slirp/src/ip_icmp.h | 166 +++++ + slirp/src/ip_input.c | 462 ++++++++++++ + slirp/src/ip_output.c | 169 +++++ + slirp/src/libslirp-version.h.in | 23 + + slirp/src/libslirp.h | 119 +++ + slirp/src/libslirp.map | 21 + + slirp/src/main.h | 16 + + slirp/src/mbuf.c | 224 ++++++ + slirp/src/mbuf.h | 127 ++++ + slirp/src/misc.c | 298 ++++++++ + slirp/src/misc.h | 63 ++ + slirp/src/ncsi-pkt.h | 445 +++++++++++ + slirp/src/ncsi.c | 192 +++++ + slirp/src/ndp_table.c | 87 +++ + slirp/src/sbuf.c | 186 +++++ + slirp/src/sbuf.h | 27 + + slirp/src/slirp.c | 1112 ++++++++++++++++++++++++++++ + slirp/src/slirp.h | 273 +++++++ + slirp/src/socket.c | 935 ++++++++++++++++++++++++ + slirp/src/socket.h | 164 +++++ + slirp/src/state.c | 381 ++++++++++ + slirp/src/stream.c | 120 +++ + slirp/src/stream.h | 35 + + slirp/src/tcp.h | 181 +++++ + slirp/src/tcp_input.c | 1540 +++++++++++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 +++++++++++++ + slirp/src/tcp_subr.c | 975 +++++++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++++ + slirp/src/tcp_timer.h | 130 ++++ + slirp/src/tcp_var.h | 161 ++++ + slirp/src/tcpip.h | 104 +++ + slirp/src/tftp.c | 462 ++++++++++++ + slirp/src/tftp.h | 52 ++ + slirp/src/udp.c | 354 +++++++++ + slirp/src/udp.h | 90 +++ + slirp/src/udp6.c | 173 +++++ + slirp/src/util.c | 366 ++++++++++ + slirp/src/util.h | 180 +++++ + slirp/src/version.c | 11 + + slirp/src/vmstate.c | 445 +++++++++++ + slirp/src/vmstate.h | 391 ++++++++++ + 66 files changed, 15824 insertions(+), 3 deletions(-) + create mode 100644 slirp/.clang-format + create mode 100644 slirp/.gitignore + create mode 100644 slirp/.gitlab-ci.yml + create mode 100644 slirp/COPYRIGHT + create mode 100644 slirp/Makefile + create mode 100644 slirp/README.md + create mode 100644 slirp/meson.build + create mode 100644 slirp/src/arp_table.c + create mode 100644 slirp/src/bootp.c + create mode 100644 slirp/src/bootp.h + create mode 100644 slirp/src/cksum.c + create mode 100644 slirp/src/debug.h + create mode 100644 slirp/src/dhcpv6.c + create mode 100644 slirp/src/dhcpv6.h + create mode 100644 slirp/src/dnssearch.c + create mode 100644 slirp/src/if.c + create mode 100644 slirp/src/if.h + create mode 100644 slirp/src/ip.h + create mode 100644 slirp/src/ip6.h + create mode 100644 slirp/src/ip6_icmp.c + create mode 100644 slirp/src/ip6_icmp.h + create mode 100644 slirp/src/ip6_input.c + create mode 100644 slirp/src/ip6_output.c + create mode 100644 slirp/src/ip_icmp.c + create mode 100644 slirp/src/ip_icmp.h + create mode 100644 slirp/src/ip_input.c + create mode 100644 slirp/src/ip_output.c + create mode 100644 slirp/src/libslirp-version.h.in + create mode 100644 slirp/src/libslirp.h + create mode 100644 slirp/src/libslirp.map + create mode 100644 slirp/src/main.h + create mode 100644 slirp/src/mbuf.c + create mode 100644 slirp/src/mbuf.h + create mode 100644 slirp/src/misc.c + create mode 100644 slirp/src/misc.h + create mode 100644 slirp/src/ncsi-pkt.h + create mode 100644 slirp/src/ncsi.c + create mode 100644 slirp/src/ndp_table.c + create mode 100644 slirp/src/sbuf.c + create mode 100644 slirp/src/sbuf.h + create mode 100644 slirp/src/slirp.c + create mode 100644 slirp/src/slirp.h + create mode 100644 slirp/src/socket.c + create mode 100644 slirp/src/socket.h + create mode 100644 slirp/src/state.c + create mode 100644 slirp/src/stream.c + create mode 100644 slirp/src/stream.h + create mode 100644 slirp/src/tcp.h + create mode 100644 slirp/src/tcp_input.c + create mode 100644 slirp/src/tcp_output.c + create mode 100644 slirp/src/tcp_subr.c + create mode 100644 slirp/src/tcp_timer.c + create mode 100644 slirp/src/tcp_timer.h + create mode 100644 slirp/src/tcp_var.h + create mode 100644 slirp/src/tcpip.h + create mode 100644 slirp/src/tftp.c + create mode 100644 slirp/src/tftp.h + create mode 100644 slirp/src/udp.c + create mode 100644 slirp/src/udp.h + create mode 100644 slirp/src/udp6.c + create mode 100644 slirp/src/util.c + create mode 100644 slirp/src/util.h + create mode 100644 slirp/src/version.c + create mode 100644 slirp/src/vmstate.c + create mode 100644 slirp/src/vmstate.h + +diff --git a/slirp/.clang-format b/slirp/.clang-format +new file mode 100644 +index 0000000..17fb49f +--- /dev/null ++++ b/slirp/.clang-format +@@ -0,0 +1,58 @@ ++# https://clang.llvm.org/docs/ClangFormat.html ++# https://clang.llvm.org/docs/ClangFormatStyleOptions.html ++--- ++Language: Cpp ++AlignAfterOpenBracket: Align ++AlignConsecutiveAssignments: false # although we like it, it creates churn ++AlignConsecutiveDeclarations: false ++AlignEscapedNewlinesLeft: true ++AlignOperands: true ++AlignTrailingComments: false # churn ++AllowAllParametersOfDeclarationOnNextLine: true ++AllowShortBlocksOnASingleLine: false ++AllowShortCaseLabelsOnASingleLine: false ++AllowShortFunctionsOnASingleLine: None ++AllowShortIfStatementsOnASingleLine: false ++AllowShortLoopsOnASingleLine: false ++AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account ++AlwaysBreakBeforeMultilineStrings: false ++BinPackArguments: true ++BinPackParameters: true ++BraceWrapping: ++ AfterControlStatement: false ++ AfterEnum: false ++ AfterFunction: true ++ AfterStruct: false ++ AfterUnion: false ++ BeforeElse: false ++ IndentBraces: false ++BreakBeforeBinaryOperators: None ++BreakBeforeBraces: Custom ++BreakBeforeTernaryOperators: false ++BreakStringLiterals: true ++ColumnLimit: 80 ++ContinuationIndentWidth: 4 ++Cpp11BracedListStyle: false ++DerivePointerAlignment: false ++DisableFormat: false ++IndentCaseLabels: false ++IndentWidth: 4 ++IndentWrappedFunctionNames: false ++KeepEmptyLinesAtTheStartOfBlocks: false ++MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? ++MacroBlockEnd: '.*_END$' ++MaxEmptyLinesToKeep: 2 ++PointerAlignment: Right ++ReflowComments: true ++SortIncludes: false ++SpaceAfterCStyleCast: false ++SpaceBeforeAssignmentOperators: true ++SpaceBeforeParens: ControlStatements ++SpaceInEmptyParentheses: false ++SpacesBeforeTrailingComments: 1 ++SpacesInContainerLiterals: true ++SpacesInParentheses: false ++SpacesInSquareBrackets: false ++Standard: Auto ++UseTab: Never ++... +diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT +new file mode 100644 +index 0000000..ed49512 +--- /dev/null ++++ b/slirp/COPYRIGHT +@@ -0,0 +1,62 @@ ++Slirp was written by Danny Gasparovski. ++Copyright (c), 1995,1996 All Rights Reserved. ++ ++Slirp is free software; "free" as in you don't have to pay for it, and you ++are free to do whatever you want with it. I do not accept any donations, ++monetary or otherwise, for Slirp. Instead, I would ask you to pass this ++potential donation to your favorite charity. In fact, I encourage ++*everyone* who finds Slirp useful to make a small donation to their ++favorite charity (for example, GreenPeace). This is not a requirement, but ++a suggestion from someone who highly values the service they provide. ++ ++The copyright terms and conditions: ++ ++---BEGIN--- ++ ++ Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ 3. Neither the name of the copyright holder nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, ++ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ++ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ++ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, ++ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++---END--- ++ ++This basically means you can do anything you want with the software, except ++1) call it your own, and 2) claim warranty on it. There is no warranty for ++this software. None. Nada. If you lose a million dollars while using ++Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. ++ ++If these conditions cannot be met due to legal restrictions (E.g. where it ++is against the law to give out Software without warranty), you must cease ++using the software and delete all copies you have. ++ ++Slirp uses code that is copyrighted by the following people/organizations: ++ ++Juha Pirkola. ++Gregory M. Christy. ++The Regents of the University of California. ++Carnegie Mellon University. ++The Australian National University. ++RSA Data Security, Inc. ++ ++Please read the top of each source file for the details on the various ++copyrights. +diff --git a/slirp/Makefile b/slirp/Makefile +new file mode 100644 +index 0000000..7f09879 +--- /dev/null ++++ b/slirp/Makefile +@@ -0,0 +1,60 @@ ++ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) ++BUILD_DIR ?= . ++ ++LIBSLIRP = $(BUILD_DIR)/libslirp.a ++SLIRP_MAJOR_VERSION = 4 ++SLIRP_MINOR_VERSION = 0 ++SLIRP_MICRO_VERSION = 0 ++ ++all: $(LIBSLIRP) ++ ++SRCS := $(wildcard src/*.c) ++OBJS := $(SRCS:%.c=$(BUILD_DIR)/%.o) ++DEPS := $(OBJS:%.o=%.d) ++ ++INC_DIRS := $(BUILD_DIR)/src ++INC_FLAGS := $(addprefix -I,$(INC_DIRS)) ++ ++override CFLAGS += \ ++ -DG_LOG_DOMAIN='"Slirp"' \ ++ $(shell $(PKG_CONFIG) --cflags glib-2.0) \ ++ $(INC_FLAGS) \ ++ -MMD -MP ++override LDFLAGS += $(shell $(PKG_CONFIG) --libs glib-2.0) ++ ++$(BUILD_DIR)/src/libslirp-version.h: ++ @$(MKDIR_P) $(dir $@) ++ $(call quiet-command,cat $(ROOT_DIR)/src/libslirp-version.h.in | \ ++ sed 's/@SLIRP_MAJOR_VERSION@/$(SLIRP_MAJOR_VERSION)/' | \ ++ sed 's/@SLIRP_MINOR_VERSION@/$(SLIRP_MINOR_VERSION)/' | \ ++ sed 's/@SLIRP_MICRO_VERSION@/$(SLIRP_MICRO_VERSION)/' \ ++ > $@,"GEN","$@") ++ ++$(OBJS): $(BUILD_DIR)/src/libslirp-version.h ++ ++$(LIBSLIRP): $(OBJS) ++ ++.PHONY: clean ++ ++clean: ++ rm -r $(OBJS) $(DEPS) $(LIBSLIRP) $(BUILD_DIR)/src/libslirp-version.h ++ ++$(BUILD_DIR)/src/%.o: $(ROOT_DIR)/src/%.c ++ @$(MKDIR_P) $(dir $@) ++ $(call quiet-command,$(CC) $(CFLAGS) -c -o $@ $<,"CC","$@") ++ ++%.a: ++ $(call quiet-command,rm -f $@ && $(AR) rcs $@ $^,"AR","$@") ++ ++PKG_CONFIG ?= pkg-config ++MKDIR_P ?= mkdir -p ++quiet-command-run = $(if $(V),,$(if $2,printf " %-7s %s\n" $2 $3 && ))$1 ++quiet-@ = $(if $(V),,@) ++quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3) ++ ++print-%: ++ @echo '$*=$($*)' ++ ++.SUFFIXES: ++ ++-include $(DEPS) +diff --git a/slirp/README.md b/slirp/README.md +new file mode 100644 +index 0000000..dc11e5f +--- /dev/null ++++ b/slirp/README.md +@@ -0,0 +1,60 @@ ++# libslirp ++ ++libslirp is a user-mode networking library used by virtual machines, ++containers or various tools. ++ ++## Getting Started ++ ++### Prerequisites ++ ++A C compiler, make/meson and glib2 development libraries. ++ ++(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list ++of dependencies on Fedora) ++ ++### Building ++ ++You may build and install the shared library with meson: ++ ++``` sh ++meson build ++ninja -C build install ++``` ++And configure QEMU with --enable-slirp=system to link against it. ++ ++(QEMU may build with the submodule static library using --enable-slirp=git) ++ ++### Testing ++ ++Unfortunately, there are no automated tests available. ++ ++You may run QEMU ``-net user`` linked with your development version. ++ ++## Contributing ++ ++Feel free to open issues on the [project ++issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page. ++ ++You may clone the [gitlab ++project](https://gitlab.freedesktop.org/slirp/libslirp) and create a ++merge request. ++ ++Contributing with gitlab allows gitlab workflow, tracking issues, ++running CI etc. ++ ++Alternatively, you may send patches to slirp@lists.freedesktop.org ++mailing list. ++ ++## Versioning ++ ++We intend to use [libtool's ++versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html) ++for the shared libraries and use [SemVer](http://semver.org/) for ++project versions. ++ ++For the versions available, see the [tags on this ++repository](https://gitlab.freedesktop.org/slirp/libslirp/releases). ++ ++## License ++ ++See the [COPYRIGHT](COPYRIGHT) file for details. +diff --git a/slirp/meson.build b/slirp/meson.build +new file mode 100644 +index 0000000..94578dc +--- /dev/null ++++ b/slirp/meson.build +@@ -0,0 +1,127 @@ ++project('libslirp', 'c', ++ version : '4.0.0', ++ license : 'BSD-3-Clause', ++ default_options : ['warning_level=1', 'c_std=gnu99'] ++) ++ ++version = meson.project_version() ++varr = version.split('.') ++major_version = varr[0] ++minor_version = varr[1] ++micro_version = varr[2] ++ ++conf = configuration_data() ++conf.set('SLIRP_MAJOR_VERSION', major_version) ++conf.set('SLIRP_MINOR_VERSION', minor_version) ++conf.set('SLIRP_MICRO_VERSION', micro_version) ++ ++# libtool versioning - this applies to libslirp ++# ++# See http://sources.redhat.com/autobook/autobook/autobook_91.html#SEC91 for details ++# ++# - If interfaces have been changed or added, but binary compatibility ++# has been preserved, change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE += 1 ++# - If binary compatibility has been broken (eg removed or changed ++# interfaces), change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE = 0 ++# - If the interface is the same as the previous version, but bugs are ++# fixed, change: ++# REVISION += 1 ++lt_current = '0' ++lt_revision = '0' ++lt_age = '0' ++lt_version = '@0@.@1@.@2@'.format(lt_current, lt_age, lt_revision) ++ ++host_system = host_machine.system() ++ ++glib_dep = dependency('glib-2.0') ++ ++cc = meson.get_compiler('c') ++ ++platform_deps = [] ++ ++if host_system == 'windows' ++ platform_deps += [ ++ cc.find_library('ws2_32'), ++ cc.find_library('iphlpapi') ++ ] ++endif ++ ++cargs = [ ++ '-DG_LOG_DOMAIN="Slirp"', ++] ++ ++sources = [ ++ 'src/arp_table.c', ++ 'src/bootp.c', ++ 'src/cksum.c', ++ 'src/dhcpv6.c', ++ 'src/dnssearch.c', ++ 'src/if.c', ++ 'src/ip6_icmp.c', ++ 'src/ip6_input.c', ++ 'src/ip6_output.c', ++ 'src/ip_icmp.c', ++ 'src/ip_input.c', ++ 'src/ip_output.c', ++ 'src/mbuf.c', ++ 'src/misc.c', ++ 'src/ncsi.c', ++ 'src/ndp_table.c', ++ 'src/sbuf.c', ++ 'src/slirp.c', ++ 'src/socket.c', ++ 'src/state.c', ++ 'src/stream.c', ++ 'src/tcp_input.c', ++ 'src/tcp_output.c', ++ 'src/tcp_subr.c', ++ 'src/tcp_timer.c', ++ 'src/tftp.c', ++ 'src/udp.c', ++ 'src/udp6.c', ++ 'src/util.c', ++ 'src/version.c', ++ 'src/vmstate.c', ++] ++ ++mapfile = 'src/libslirp.map' ++vflag = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) ++ ++configure_file( ++ input : 'src/libslirp-version.h.in', ++ output : 'libslirp-version.h', ++ install_dir : join_paths(get_option('includedir'), 'slirp'), ++ configuration : conf ++) ++ ++lib = shared_library('slirp', sources, ++ soversion : lt_current, ++ version : lt_version, ++ c_args : cargs, ++ link_args : vflag, ++ link_depends : mapfile, ++ dependencies : [glib_dep, platform_deps], ++ install : true ++) ++ ++install_headers(['src/libslirp.h'], subdir : 'slirp') ++ ++pkg = import('pkgconfig') ++ ++pkg.generate( ++ version : version, ++ libraries : lib, ++ requires : [ ++ 'glib-2.0', ++ ], ++ name : 'slirp', ++ description : 'User-space network stack', ++ filebase : 'slirp', ++ subdirs : 'slirp', ++) +diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c +new file mode 100644 +index 0000000..022a52e +--- /dev/null ++++ b/slirp/src/arp_table.c +@@ -0,0 +1,91 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * ARP table ++ * ++ * Copyright (c) 2011 AdaCore ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ ++ DEBUG_CALL("arp_table_add"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], ++ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); ++ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* Do not register broadcast addresses */ ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ /* Update the entry */ ++ memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ arptbl->table[arptbl->next_victim].ar_sip = ip_addr; ++ memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); ++ arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; ++} ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ ++ DEBUG_CALL("arp_table_search"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ ++ /* If broadcast address */ ++ if (ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ++ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], ++ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); ++ return 1; ++ } ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c +new file mode 100644 +index 0000000..3f9ce25 +--- /dev/null ++++ b/slirp/src/bootp.c +@@ -0,0 +1,370 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * QEMU BOOTP/DHCP server ++ * ++ * Copyright (c) 2004 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++#if defined(_WIN32) ++/* Windows ntohl() returns an u_long value. ++ * Add a type cast to match the format strings. */ ++#define ntohl(n) ((uint32_t)ntohl(n)) ++#endif ++ ++/* XXX: only DHCP is supported */ ++ ++#define LEASE_TIME (24 * 3600) ++ ++static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; ++ ++#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) ++ ++static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ bc = &slirp->bootp_clients[i]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ uint32_t req_addr = ntohl(paddr->s_addr); ++ uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); ++ BOOTPClient *bc; ++ ++ if (req_addr >= dhcp_addr && req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { ++ bc = &slirp->bootp_clients[req_addr - dhcp_addr]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { ++ bc->allocated = 1; ++ return bc; ++ } ++ } ++ return NULL; ++} ++ ++static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, ++ struct in_addr *preq_addr) ++{ ++ const uint8_t *p, *p_end; ++ int len, tag; ++ ++ *pmsg_type = 0; ++ preq_addr->s_addr = htonl(0L); ++ ++ p = bp->bp_vend; ++ p_end = p + DHCP_OPT_LEN; ++ if (memcmp(p, rfc1533_cookie, 4) != 0) ++ return; ++ p += 4; ++ while (p < p_end) { ++ tag = p[0]; ++ if (tag == RFC1533_PAD) { ++ p++; ++ } else if (tag == RFC1533_END) { ++ break; ++ } else { ++ p++; ++ if (p >= p_end) ++ break; ++ len = *p++; ++ if (p + len > p_end) { ++ break; ++ } ++ DPRINTF("dhcp: tag=%d len=%d\n", tag, len); ++ ++ switch (tag) { ++ case RFC2132_MSG_TYPE: ++ if (len >= 1) ++ *pmsg_type = p[0]; ++ break; ++ case RFC2132_REQ_ADDR: ++ if (len >= 4) { ++ memcpy(&(preq_addr->s_addr), p, 4); ++ } ++ break; ++ default: ++ break; ++ } ++ p += len; ++ } ++ } ++ if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && ++ bp->bp_ciaddr.s_addr) { ++ memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); ++ } ++} ++ ++static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) ++{ ++ BOOTPClient *bc = NULL; ++ struct mbuf *m; ++ struct bootp_t *rbp; ++ struct sockaddr_in saddr, daddr; ++ struct in_addr preq_addr; ++ int dhcp_msg_type, val; ++ uint8_t *q; ++ uint8_t *end; ++ uint8_t client_ethaddr[ETH_ALEN]; ++ ++ /* extract exact DHCP msg type */ ++ dhcp_decode(bp, &dhcp_msg_type, &preq_addr); ++ DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); ++ if (preq_addr.s_addr != htonl(0L)) ++ DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ else { ++ DPRINTF("\n"); ++ } ++ ++ if (dhcp_msg_type == 0) ++ dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ ++ ++ if (dhcp_msg_type != DHCPDISCOVER && dhcp_msg_type != DHCPREQUEST) ++ return; ++ ++ /* Get client's hardware address from bootp request */ ++ memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ rbp = (struct bootp_t *)m->m_data; ++ m->m_data += sizeof(struct udpiphdr); ++ memset(rbp, 0, sizeof(struct bootp_t)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ } ++ } ++ if (!bc) { ++ new_addr: ++ bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); ++ if (!bc) { ++ DPRINTF("no address left\n"); ++ return; ++ } ++ } ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else { ++ /* DHCPNAKs should be sent to broadcast */ ++ daddr.sin_addr.s_addr = 0xffffffff; ++ } ++ } else { ++ bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); ++ if (!bc) { ++ /* if never assigned, behaves as if it was already ++ assigned (windows fix because it remembers its address) */ ++ goto new_addr; ++ } ++ } ++ ++ /* Update ARP table for this IP address */ ++ arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); ++ ++ saddr.sin_addr = slirp->vhost_addr; ++ saddr.sin_port = htons(BOOTP_SERVER); ++ ++ daddr.sin_port = htons(BOOTP_CLIENT); ++ ++ rbp->bp_op = BOOTP_REPLY; ++ rbp->bp_xid = bp->bp_xid; ++ rbp->bp_htype = 1; ++ rbp->bp_hlen = 6; ++ memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ ++ rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ ++ ++ q = rbp->bp_vend; ++ end = (uint8_t *)&rbp[1]; ++ memcpy(q, rfc1533_cookie, 4); ++ q += 4; ++ ++ if (bc) { ++ DPRINTF("%s addr=%08" PRIx32 "\n", ++ (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", ++ ntohl(daddr.sin_addr.s_addr)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPOFFER; ++ } else /* DHCPREQUEST */ { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPACK; ++ } ++ ++ if (slirp->bootp_filename) ++ snprintf((char *)rbp->bp_file, sizeof(rbp->bp_file), "%s", ++ slirp->bootp_filename); ++ ++ *q++ = RFC2132_SRV_ID; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_NETMASK; ++ *q++ = 4; ++ memcpy(q, &slirp->vnetwork_mask, 4); ++ q += 4; ++ ++ if (!slirp->restricted) { ++ *q++ = RFC1533_GATEWAY; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_DNS; ++ *q++ = 4; ++ memcpy(q, &slirp->vnameserver_addr, 4); ++ q += 4; ++ } ++ ++ *q++ = RFC2132_LEASE_TIME; ++ *q++ = 4; ++ val = htonl(LEASE_TIME); ++ memcpy(q, &val, 4); ++ q += 4; ++ ++ if (*slirp->client_hostname) { ++ val = strlen(slirp->client_hostname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting host name option."); ++ } else { ++ *q++ = RFC1533_HOSTNAME; ++ *q++ = val; ++ memcpy(q, slirp->client_hostname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdomainname) { ++ val = strlen(slirp->vdomainname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain name option."); ++ } else { ++ *q++ = RFC1533_DOMAINNAME; ++ *q++ = val; ++ memcpy(q, slirp->vdomainname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->tftp_server_name) { ++ val = strlen(slirp->tftp_server_name); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting tftp-server-name option."); ++ } else { ++ *q++ = RFC2132_TFTP_SERVER_NAME; ++ *q++ = val; ++ memcpy(q, slirp->tftp_server_name, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdnssearch) { ++ val = slirp->vdnssearch_len; ++ if (q + val >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain-search option."); ++ } else { ++ memcpy(q, slirp->vdnssearch, val); ++ q += val; ++ } ++ } ++ } else { ++ static const char nak_msg[] = "requested address not available"; ++ ++ DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPNAK; ++ ++ *q++ = RFC2132_MESSAGE; ++ *q++ = sizeof(nak_msg) - 1; ++ memcpy(q, nak_msg, sizeof(nak_msg) - 1); ++ q += sizeof(nak_msg) - 1; ++ } ++ assert(q < end); ++ *q = ++RFC1533_END ++; ++ ++daddr.sin_addr.s_addr = 0xffffffffu; ++ ++m->m_len = sizeof(struct bootp_t) - sizeof(struct ip) - sizeof(struct udphdr); ++udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); ++} ++ ++void bootp_input(struct mbuf *m) ++{ ++ struct bootp_t *bp = mtod(m, struct bootp_t *); ++ ++ if (bp->bp_op == BOOTP_REQUEST) { ++ bootp_reply(m->slirp, bp); ++ } ++} +diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h +new file mode 100644 +index 0000000..03ece9b +--- /dev/null ++++ b/slirp/src/bootp.h +@@ -0,0 +1,129 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* bootp/dhcp defines */ ++ ++#ifndef SLIRP_BOOTP_H ++#define SLIRP_BOOTP_H ++ ++#define BOOTP_SERVER 67 ++#define BOOTP_CLIENT 68 ++ ++#define BOOTP_REQUEST 1 ++#define BOOTP_REPLY 2 ++ ++#define RFC1533_COOKIE 99, 130, 83, 99 ++#define RFC1533_PAD 0 ++#define RFC1533_NETMASK 1 ++#define RFC1533_TIMEOFFSET 2 ++#define RFC1533_GATEWAY 3 ++#define RFC1533_TIMESERVER 4 ++#define RFC1533_IEN116NS 5 ++#define RFC1533_DNS 6 ++#define RFC1533_LOGSERVER 7 ++#define RFC1533_COOKIESERVER 8 ++#define RFC1533_LPRSERVER 9 ++#define RFC1533_IMPRESSSERVER 10 ++#define RFC1533_RESOURCESERVER 11 ++#define RFC1533_HOSTNAME 12 ++#define RFC1533_BOOTFILESIZE 13 ++#define RFC1533_MERITDUMPFILE 14 ++#define RFC1533_DOMAINNAME 15 ++#define RFC1533_SWAPSERVER 16 ++#define RFC1533_ROOTPATH 17 ++#define RFC1533_EXTENSIONPATH 18 ++#define RFC1533_IPFORWARDING 19 ++#define RFC1533_IPSOURCEROUTING 20 ++#define RFC1533_IPPOLICYFILTER 21 ++#define RFC1533_IPMAXREASSEMBLY 22 ++#define RFC1533_IPTTL 23 ++#define RFC1533_IPMTU 24 ++#define RFC1533_IPMTUPLATEAU 25 ++#define RFC1533_INTMTU 26 ++#define RFC1533_INTLOCALSUBNETS 27 ++#define RFC1533_INTBROADCAST 28 ++#define RFC1533_INTICMPDISCOVER 29 ++#define RFC1533_INTICMPRESPOND 30 ++#define RFC1533_INTROUTEDISCOVER 31 ++#define RFC1533_INTROUTESOLICIT 32 ++#define RFC1533_INTSTATICROUTES 33 ++#define RFC1533_LLTRAILERENCAP 34 ++#define RFC1533_LLARPCACHETMO 35 ++#define RFC1533_LLETHERNETENCAP 36 ++#define RFC1533_TCPTTL 37 ++#define RFC1533_TCPKEEPALIVETMO 38 ++#define RFC1533_TCPKEEPALIVEGB 39 ++#define RFC1533_NISDOMAIN 40 ++#define RFC1533_NISSERVER 41 ++#define RFC1533_NTPSERVER 42 ++#define RFC1533_VENDOR 43 ++#define RFC1533_NBNS 44 ++#define RFC1533_NBDD 45 ++#define RFC1533_NBNT 46 ++#define RFC1533_NBSCOPE 47 ++#define RFC1533_XFS 48 ++#define RFC1533_XDM 49 ++ ++#define RFC2132_REQ_ADDR 50 ++#define RFC2132_LEASE_TIME 51 ++#define RFC2132_MSG_TYPE 53 ++#define RFC2132_SRV_ID 54 ++#define RFC2132_PARAM_LIST 55 ++#define RFC2132_MESSAGE 56 ++#define RFC2132_MAX_SIZE 57 ++#define RFC2132_RENEWAL_TIME 58 ++#define RFC2132_REBIND_TIME 59 ++#define RFC2132_TFTP_SERVER_NAME 66 ++ ++#define DHCPDISCOVER 1 ++#define DHCPOFFER 2 ++#define DHCPREQUEST 3 ++#define DHCPACK 5 ++#define DHCPNAK 6 ++ ++#define RFC1533_VENDOR_MAJOR 0 ++#define RFC1533_VENDOR_MINOR 0 ++ ++#define RFC1533_VENDOR_MAGIC 128 ++#define RFC1533_VENDOR_ADDPARM 129 ++#define RFC1533_VENDOR_ETHDEV 130 ++#define RFC1533_VENDOR_HOWTO 132 ++#define RFC1533_VENDOR_MNUOPTS 160 ++#define RFC1533_VENDOR_SELECTION 176 ++#define RFC1533_VENDOR_MOTD 184 ++#define RFC1533_VENDOR_NUMOFMOTD 8 ++#define RFC1533_VENDOR_IMG 192 ++#define RFC1533_VENDOR_NUMOFIMG 16 ++ ++#define RFC1533_END 255 ++#define BOOTP_VENDOR_LEN 64 ++#define DHCP_OPT_LEN 312 ++ ++struct bootp_t { ++ struct ip ip; ++ struct udphdr udp; ++ uint8_t bp_op; ++ uint8_t bp_htype; ++ uint8_t bp_hlen; ++ uint8_t bp_hops; ++ uint32_t bp_xid; ++ uint16_t bp_secs; ++ uint16_t unused; ++ struct in_addr bp_ciaddr; ++ struct in_addr bp_yiaddr; ++ struct in_addr bp_siaddr; ++ struct in_addr bp_giaddr; ++ uint8_t bp_hwaddr[16]; ++ uint8_t bp_sname[64]; ++ uint8_t bp_file[128]; ++ uint8_t bp_vend[DHCP_OPT_LEN]; ++}; ++ ++typedef struct { ++ uint16_t allocated; ++ uint8_t macaddr[6]; ++} BOOTPClient; ++ ++#define NB_BOOTP_CLIENTS 16 ++ ++void bootp_input(struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c +new file mode 100644 +index 0000000..4d08380 +--- /dev/null ++++ b/slirp/src/cksum.c +@@ -0,0 +1,179 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1988, 1992, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 ++ * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++/* ++ * Checksum routine for Internet Protocol family headers (Portable Version). ++ * ++ * This routine is very heavily used in the network ++ * code and should be modified for each CPU to be as fast as possible. ++ * ++ * XXX Since we will never span more than 1 mbuf, we can optimise this ++ */ ++ ++#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) ++#define REDUCE \ ++ { \ ++ l_util.l = sum; \ ++ sum = l_util.s[0] + l_util.s[1]; \ ++ (void)ADDCARRY(sum); \ ++ } ++ ++int cksum(struct mbuf *m, int len) ++{ ++ register uint16_t *w; ++ register int sum = 0; ++ register int mlen = 0; ++ int byte_swapped = 0; ++ ++ union { ++ uint8_t c[2]; ++ uint16_t s; ++ } s_util; ++ union { ++ uint16_t s[2]; ++ uint32_t l; ++ } l_util; ++ ++ if (m->m_len == 0) ++ goto cont; ++ w = mtod(m, uint16_t *); ++ ++ mlen = m->m_len; ++ ++ if (len < mlen) ++ mlen = len; ++ len -= mlen; ++ /* ++ * Force to even boundary. ++ */ ++ if ((1 & (uintptr_t)w) && (mlen > 0)) { ++ REDUCE; ++ sum <<= 8; ++ s_util.c[0] = *(uint8_t *)w; ++ w = (uint16_t *)((int8_t *)w + 1); ++ mlen--; ++ byte_swapped = 1; ++ } ++ /* ++ * Unroll the loop to make overhead from ++ * branches &c small. ++ */ ++ while ((mlen -= 32) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ sum += w[4]; ++ sum += w[5]; ++ sum += w[6]; ++ sum += w[7]; ++ sum += w[8]; ++ sum += w[9]; ++ sum += w[10]; ++ sum += w[11]; ++ sum += w[12]; ++ sum += w[13]; ++ sum += w[14]; ++ sum += w[15]; ++ w += 16; ++ } ++ mlen += 32; ++ while ((mlen -= 8) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ w += 4; ++ } ++ mlen += 8; ++ if (mlen == 0 && byte_swapped == 0) ++ goto cont; ++ REDUCE; ++ while ((mlen -= 2) >= 0) { ++ sum += *w++; ++ } ++ ++ if (byte_swapped) { ++ REDUCE; ++ sum <<= 8; ++ if (mlen == -1) { ++ s_util.c[1] = *(uint8_t *)w; ++ sum += s_util.s; ++ mlen = 0; ++ } else ++ ++ mlen = -1; ++ } else if (mlen == -1) ++ s_util.c[0] = *(uint8_t *)w; ++ ++cont: ++ if (len) { ++ DEBUG_ERROR("cksum: out of data"); ++ DEBUG_ERROR(" len = %d", len); ++ } ++ if (mlen == -1) { ++ /* The last mbuf has odd # of bytes. Follow the ++ standard (the odd byte may be shifted left by 8 bits ++ or not as determined by endian-ness of the machine) */ ++ s_util.c[1] = 0; ++ sum += s_util.s; ++ } ++ REDUCE; ++ return (~sum & 0xffff); ++} ++ ++int ip6_cksum(struct mbuf *m) ++{ ++ /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum ++ * separately from the mbuf */ ++ struct ip6 save_ip, *ip = mtod(m, struct ip6 *); ++ struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); ++ int sum; ++ ++ save_ip = *ip; ++ ++ ih->ih_src = save_ip.ip_src; ++ ih->ih_dst = save_ip.ip_dst; ++ ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); ++ ih->ih_zero_hi = 0; ++ ih->ih_zero_lo = 0; ++ ih->ih_nh = save_ip.ip_nh; ++ ++ sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + ntohl(ih->ih_pl)); ++ ++ *ip = save_ip; ++ ++ return sum; ++} +diff --git a/slirp/src/debug.h b/slirp/src/debug.h +new file mode 100644 +index 0000000..47712bd +--- /dev/null ++++ b/slirp/src/debug.h +@@ -0,0 +1,51 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef DEBUG_H_ ++#define DEBUG_H_ ++ ++#define DBG_CALL (1 << 0) ++#define DBG_MISC (1 << 1) ++#define DBG_ERROR (1 << 2) ++#define DBG_TFTP (1 << 3) ++ ++extern int slirp_debug; ++ ++#define DEBUG_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ARG(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(" " fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_MISC(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ERROR(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_TFTP(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#endif /* DEBUG_H_ */ +diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c +new file mode 100644 +index 0000000..bc041d5 +--- /dev/null ++++ b/slirp/src/dhcpv6.c +@@ -0,0 +1,225 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * SLIRP stateless DHCPv6 ++ * ++ * We only support stateless DHCPv6, e.g. for network booting. ++ * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include "slirp.h" ++#include "dhcpv6.h" ++ ++/* DHCPv6 message types */ ++#define MSGTYPE_REPLY 7 ++#define MSGTYPE_INFO_REQUEST 11 ++ ++/* DHCPv6 option types */ ++#define OPTION_CLIENTID 1 ++#define OPTION_IAADDR 5 ++#define OPTION_ORO 6 ++#define OPTION_DNS_SERVERS 23 ++#define OPTION_BOOTFILE_URL 59 ++ ++struct requested_infos { ++ uint8_t *client_id; ++ int client_id_len; ++ bool want_dns; ++ bool want_boot_url; ++}; ++ ++/** ++ * Analyze the info request message sent by the client to see what data it ++ * provided and what it wants to have. The information is gathered in the ++ * "requested_infos" struct. Note that client_id (if provided) points into ++ * the odata region, thus the caller must keep odata valid as long as it ++ * needs to access the requested_infos struct. ++ */ ++static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, ++ struct requested_infos *ri) ++{ ++ int i, req_opt; ++ ++ while (olen > 4) { ++ /* Parse one option */ ++ int option = odata[0] << 8 | odata[1]; ++ int len = odata[2] << 8 | odata[3]; ++ ++ if (len + 4 > olen) { ++ slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", ++ slirp->opaque); ++ return -E2BIG; ++ } ++ ++ switch (option) { ++ case OPTION_IAADDR: ++ /* According to RFC3315, we must discard requests with IA option */ ++ return -EINVAL; ++ case OPTION_CLIENTID: ++ if (len > 256) { ++ /* Avoid very long IDs which could cause problems later */ ++ return -E2BIG; ++ } ++ ri->client_id = odata + 4; ++ ri->client_id_len = len; ++ break; ++ case OPTION_ORO: /* Option request option */ ++ if (len & 1) { ++ return -EINVAL; ++ } ++ /* Check which options the client wants to have */ ++ for (i = 0; i < len; i += 2) { ++ req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; ++ switch (req_opt) { ++ case OPTION_DNS_SERVERS: ++ ri->want_dns = true; ++ break; ++ case OPTION_BOOTFILE_URL: ++ ri->want_boot_url = true; ++ break; ++ default: ++ DEBUG_MISC("dhcpv6: Unsupported option request %d", ++ req_opt); ++ } ++ } ++ break; ++ default: ++ DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", option, ++ len); ++ } ++ ++ odata += len + 4; ++ olen -= len + 4; ++ } ++ ++ return 0; ++} ++ ++ ++/** ++ * Handle information request messages ++ */ ++static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, ++ uint32_t xid, uint8_t *odata, int olen) ++{ ++ struct requested_infos ri = { NULL }; ++ struct sockaddr_in6 sa6, da6; ++ struct mbuf *m; ++ uint8_t *resp; ++ ++ if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { ++ return; ++ } ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ memset(m->m_data, 0, m->m_size); ++ m->m_data += IF_MAXLINKHDR; ++ resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); ++ ++ /* Fill in response */ ++ *resp++ = MSGTYPE_REPLY; ++ *resp++ = (uint8_t)(xid >> 16); ++ *resp++ = (uint8_t)(xid >> 8); ++ *resp++ = (uint8_t)xid; ++ ++ if (ri.client_id) { ++ *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ ++ *resp++ = OPTION_CLIENTID; /* option-code low byte */ ++ *resp++ = ri.client_id_len >> 8; /* option-len high byte */ ++ *resp++ = ri.client_id_len; /* option-len low byte */ ++ memcpy(resp, ri.client_id, ri.client_id_len); ++ resp += ri.client_id_len; ++ } ++ if (ri.want_dns) { ++ *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ ++ *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ ++ *resp++ = 0; /* option-len high byte */ ++ *resp++ = 16; /* option-len low byte */ ++ memcpy(resp, &slirp->vnameserver_addr6, 16); ++ resp += 16; ++ } ++ if (ri.want_boot_url) { ++ uint8_t *sa = slirp->vhost_addr6.s6_addr; ++ int slen, smaxlen; ++ ++ *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ ++ *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ ++ smaxlen = (uint8_t *)m->m_data + IF_MTU - (resp + 2); ++ slen = snprintf((char *)resp + 2, smaxlen, ++ "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" ++ "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", ++ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], ++ sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], ++ sa[15], slirp->bootp_filename); ++ slen = MIN(slen, smaxlen); ++ *resp++ = slen >> 8; /* option-len high byte */ ++ *resp++ = slen; /* option-len low byte */ ++ resp += slen; ++ } ++ ++ sa6.sin6_addr = slirp->vhost_addr6; ++ sa6.sin6_port = DHCPV6_SERVER_PORT; ++ da6.sin6_addr = srcsas->sin6_addr; ++ da6.sin6_port = srcsas->sin6_port; ++ m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); ++ m->m_len = resp - (uint8_t *)m->m_data; ++ udp6_output(NULL, m, &sa6, &da6); ++} ++ ++/** ++ * Handle DHCPv6 messages sent by the client ++ */ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) ++{ ++ uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); ++ int data_len = m->m_len - sizeof(struct udphdr); ++ uint32_t xid; ++ ++ if (data_len < 4) { ++ return; ++ } ++ ++ xid = ntohl(*(uint32_t *)data) & 0xffffff; ++ ++ switch (data[0]) { ++ case MSGTYPE_INFO_REQUEST: ++ dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); ++ break; ++ default: ++ DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); ++ } ++} +diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h +new file mode 100644 +index 0000000..d12c49b +--- /dev/null ++++ b/slirp/src/dhcpv6.h +@@ -0,0 +1,68 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Definitions and prototypes for SLIRP stateless DHCPv6 ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef SLIRP_DHCPV6_H ++#define SLIRP_DHCPV6_H ++ ++#define DHCPV6_SERVER_PORT 547 ++ ++#define ALLDHCP_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define in6_dhcp_multicast(a) in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) ++ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c +new file mode 100644 +index 0000000..18a6122 +--- /dev/null ++++ b/slirp/src/dnssearch.c +@@ -0,0 +1,311 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Domain search option for DHCP (RFC 3397) ++ * ++ * Copyright (c) 2012 Klaus Stengel ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; ++static const uint8_t MAX_OPT_LEN = 255; ++static const uint8_t OPT_HEADER_LEN = 2; ++static const uint8_t REFERENCE_LEN = 2; ++ ++struct compact_domain; ++ ++typedef struct compact_domain { ++ struct compact_domain *self; ++ struct compact_domain *refdom; ++ uint8_t *labels; ++ size_t len; ++ size_t common_octets; ++} CompactDomain; ++ ++static size_t domain_suffix_diffoff(const CompactDomain *a, ++ const CompactDomain *b) ++{ ++ size_t la = a->len, lb = b->len; ++ uint8_t *da = a->labels + la, *db = b->labels + lb; ++ size_t i, lm = (la < lb) ? la : lb; ++ ++ for (i = 0; i < lm; i++) { ++ da--; ++ db--; ++ if (*da != *db) { ++ break; ++ } ++ } ++ return i; ++} ++ ++static int domain_suffix_ord(const void *cva, const void *cvb) ++{ ++ const CompactDomain *a = cva, *b = cvb; ++ size_t la = a->len, lb = b->len; ++ size_t doff = domain_suffix_diffoff(a, b); ++ uint8_t ca = a->labels[la - doff]; ++ uint8_t cb = b->labels[lb - doff]; ++ ++ if (ca < cb) { ++ return -1; ++ } ++ if (ca > cb) { ++ return 1; ++ } ++ if (la < lb) { ++ return -1; ++ } ++ if (la > lb) { ++ return 1; ++ } ++ return 0; ++} ++ ++static size_t domain_common_label(CompactDomain *a, CompactDomain *b) ++{ ++ size_t res, doff = domain_suffix_diffoff(a, b); ++ uint8_t *first_eq_pos = a->labels + (a->len - doff); ++ uint8_t *label = a->labels; ++ ++ while (*label && label < first_eq_pos) { ++ label += *label + 1; ++ } ++ res = a->len - (label - a->labels); ++ /* only report if it can help to reduce the packet size */ ++ return (res > REFERENCE_LEN) ? res : 0; ++} ++ ++static void domain_fixup_order(CompactDomain *cd, size_t n) ++{ ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cur = cd + i, *next = cd[i].self; ++ ++ while (!cur->common_octets) { ++ CompactDomain *tmp = next->self; /* backup target value */ ++ ++ next->self = cur; ++ cur->common_octets++; ++ ++ cur = next; ++ next = tmp; ++ } ++ } ++} ++ ++static void domain_mklabels(CompactDomain *cd, const char *input) ++{ ++ uint8_t *len_marker = cd->labels; ++ uint8_t *output = len_marker; /* pre-incremented */ ++ const char *in = input; ++ char cur_chr; ++ size_t len = 0; ++ ++ if (cd->len == 0) { ++ goto fail; ++ } ++ cd->len++; ++ ++ do { ++ cur_chr = *in++; ++ if (cur_chr == '.' || cur_chr == '\0') { ++ len = output - len_marker; ++ if ((len == 0 && cur_chr == '.') || len >= 64) { ++ goto fail; ++ } ++ *len_marker = len; ++ ++ output++; ++ len_marker = output; ++ } else { ++ output++; ++ *output = cur_chr; ++ } ++ } while (cur_chr != '\0'); ++ ++ /* ensure proper zero-termination */ ++ if (len != 0) { ++ *len_marker = 0; ++ cd->len++; ++ } ++ return; ++ ++fail: ++ g_warning("failed to parse domain name '%s'\n", input); ++ cd->len = 0; ++} ++ ++static void domain_mkxrefs(CompactDomain *doms, CompactDomain *last, ++ size_t depth) ++{ ++ CompactDomain *i = doms, *target = doms; ++ ++ do { ++ if (i->labels < target->labels) { ++ target = i; ++ } ++ } while (i++ != last); ++ ++ for (i = doms; i != last; i++) { ++ CompactDomain *group_last; ++ size_t next_depth; ++ ++ if (i->common_octets == depth) { ++ continue; ++ } ++ ++ next_depth = -1; ++ for (group_last = i; group_last != last; group_last++) { ++ size_t co = group_last->common_octets; ++ if (co <= depth) { ++ break; ++ } ++ if (co < next_depth) { ++ next_depth = co; ++ } ++ } ++ domain_mkxrefs(i, group_last, next_depth); ++ ++ i = group_last; ++ if (i == last) { ++ break; ++ } ++ } ++ ++ if (depth == 0) { ++ return; ++ } ++ ++ i = doms; ++ do { ++ if (i != target && i->refdom == NULL) { ++ i->refdom = target; ++ i->common_octets = depth; ++ } ++ } while (i++ != last); ++} ++ ++static size_t domain_compactify(CompactDomain *domains, size_t n) ++{ ++ uint8_t *start = domains->self->labels, *outptr = start; ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cd = domains[i].self; ++ CompactDomain *rd = cd->refdom; ++ ++ if (rd != NULL) { ++ size_t moff = (rd->labels - start) + (rd->len - cd->common_octets); ++ if (moff < 0x3FFFu) { ++ cd->len -= cd->common_octets - 2; ++ cd->labels[cd->len - 1] = moff & 0xFFu; ++ cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); ++ } ++ } ++ ++ if (cd->labels != outptr) { ++ memmove(outptr, cd->labels, cd->len); ++ cd->labels = outptr; ++ } ++ outptr += cd->len; ++ } ++ return outptr - start; ++} ++ ++int translate_dnssearch(Slirp *s, const char **names) ++{ ++ size_t blocks, bsrc_start, bsrc_end, bdst_start; ++ size_t i, num_domains, memreq = 0; ++ uint8_t *result = NULL, *outptr; ++ CompactDomain *domains = NULL; ++ const char **nameptr = names; ++ ++ while (*nameptr != NULL) { ++ nameptr++; ++ } ++ ++ num_domains = nameptr - names; ++ if (num_domains == 0) { ++ return -2; ++ } ++ ++ domains = g_malloc(num_domains * sizeof(*domains)); ++ ++ for (i = 0; i < num_domains; i++) { ++ size_t nlen = strlen(names[i]); ++ memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ ++ domains[i].self = domains + i; ++ domains[i].len = nlen; ++ domains[i].common_octets = 0; ++ domains[i].refdom = NULL; ++ } ++ ++ /* reserve extra 2 header bytes for each 255 bytes of output */ ++ memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; ++ result = g_malloc(memreq * sizeof(*result)); ++ ++ outptr = result; ++ for (i = 0; i < num_domains; i++) { ++ domains[i].labels = outptr; ++ domain_mklabels(domains + i, names[i]); ++ outptr += domains[i].len; ++ } ++ ++ if (outptr == result) { ++ g_free(domains); ++ g_free(result); ++ return -1; ++ } ++ ++ qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); ++ domain_fixup_order(domains, num_domains); ++ ++ for (i = 1; i < num_domains; i++) { ++ size_t cl = domain_common_label(domains + i - 1, domains + i); ++ domains[i - 1].common_octets = cl; ++ } ++ ++ domain_mkxrefs(domains, domains + num_domains - 1, 0); ++ memreq = domain_compactify(domains, num_domains); ++ ++ blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); ++ bsrc_end = memreq; ++ bsrc_start = (blocks - 1) * MAX_OPT_LEN; ++ bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; ++ memreq += blocks * OPT_HEADER_LEN; ++ ++ while (blocks--) { ++ size_t len = bsrc_end - bsrc_start; ++ memmove(result + bdst_start, result + bsrc_start, len); ++ result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; ++ result[bdst_start - 1] = len; ++ bsrc_end = bsrc_start; ++ bsrc_start -= MAX_OPT_LEN; ++ bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; ++ } ++ ++ g_free(domains); ++ s->vdnssearch = result; ++ s->vdnssearch_len = memreq; ++ return 0; ++} +diff --git a/slirp/src/if.c b/slirp/src/if.c +new file mode 100644 +index 0000000..23190b5 +--- /dev/null ++++ b/slirp/src/if.c +@@ -0,0 +1,213 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) ++{ ++ ifm->ifs_next = ifmhead->ifs_next; ++ ifmhead->ifs_next = ifm; ++ ifm->ifs_prev = ifmhead; ++ ifm->ifs_next->ifs_prev = ifm; ++} ++ ++static void ifs_remque(struct mbuf *ifm) ++{ ++ ifm->ifs_prev->ifs_next = ifm->ifs_next; ++ ifm->ifs_next->ifs_prev = ifm->ifs_prev; ++} ++ ++void if_init(Slirp *slirp) ++{ ++ slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; ++ slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; ++} ++ ++/* ++ * if_output: Queue packet into an output queue. ++ * There are 2 output queue's, if_fastq and if_batchq. ++ * Each output queue is a doubly linked list of double linked lists ++ * of mbufs, each list belonging to one "session" (socket). This ++ * way, we can output packets fairly by sending one packet from each ++ * session, instead of all the packets from one session, then all packets ++ * from the next session, etc. Packets on the if_fastq get absolute ++ * priority, but if one session hogs the link, it gets "downgraded" ++ * to the batchq until it runs out of packets, then it'll return ++ * to the fastq (eg. if the user does an ls -alR in a telnet session, ++ * it'll temporarily get downgraded to the batchq) ++ */ ++void if_output(struct socket *so, struct mbuf *ifm) ++{ ++ Slirp *slirp = ifm->slirp; ++ struct mbuf *ifq; ++ int on_fastq = 1; ++ ++ DEBUG_CALL("if_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ifm = %p", ifm); ++ ++ /* ++ * First remove the mbuf from m_usedlist, ++ * since we're gonna use m_next and m_prev ourselves ++ * XXX Shouldn't need this, gotta change dtom() etc. ++ */ ++ if (ifm->m_flags & M_USEDLIST) { ++ remque(ifm); ++ ifm->m_flags &= ~M_USEDLIST; ++ } ++ ++ /* ++ * See if there's already a batchq list for this session. ++ * This can include an interactive session, which should go on fastq, ++ * but gets too greedy... hence it'll be downgraded from fastq to batchq. ++ * We mustn't put this packet back on the fastq (or we'll send it out of ++ * order) ++ * XXX add cache here? ++ */ ++ if (so) { ++ for (ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ (struct quehead *)ifq != &slirp->if_batchq; ifq = ifq->ifq_prev) { ++ if (so == ifq->ifq_so) { ++ /* A match! */ ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } ++ } ++ ++ /* No match, check which queue to put it on */ ++ if (so && (so->so_iptos & IPTOS_LOWDELAY)) { ++ ifq = (struct mbuf *)slirp->if_fastq.qh_rlink; ++ on_fastq = 1; ++ /* ++ * Check if this packet is a part of the last ++ * packet's session ++ */ ++ if (ifq->ifq_so == so) { ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } else { ++ ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ } ++ ++ /* Create a new doubly linked list for this session */ ++ ifm->ifq_so = so; ++ ifs_init(ifm); ++ insque(ifm, ifq); ++ ++diddit: ++ if (so) { ++ /* Update *_queued */ ++ so->so_queued++; ++ so->so_nqueued++; ++ /* ++ * Check if the interactive session should be downgraded to ++ * the batchq. A session is downgraded if it has queued 6 ++ * packets without pausing, and at least 3 of those packets ++ * have been sent over the link ++ * (XXX These are arbitrary numbers, probably not optimal..) ++ */ ++ if (on_fastq && ++ ((so->so_nqueued >= 6) && (so->so_nqueued - so->so_queued) >= 3)) { ++ /* Remove from current queue... */ ++ remque(ifm->ifs_next); ++ ++ /* ...And insert in the new. That'll teach ya! */ ++ insque(ifm->ifs_next, &slirp->if_batchq); ++ } ++ } ++ ++ /* ++ * This prevents us from malloc()ing too many mbufs ++ */ ++ if_start(ifm->slirp); ++} ++ ++/* ++ * Send one packet from each session. ++ * If there are packets on the fastq, they are sent FIFO, before ++ * everything else. Then we choose the first packet from each ++ * batchq session (socket) and send it. ++ * For example, if there are 3 ftp sessions fighting for bandwidth, ++ * one packet will be sent from the first session, then one packet ++ * from the second session, then one packet from the third. ++ */ ++void if_start(Slirp *slirp) ++{ ++ uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); ++ bool from_batchq = false; ++ struct mbuf *ifm, *ifm_next, *ifqt; ++ ++ DEBUG_CALL("if_start"); ++ ++ if (slirp->if_start_busy) { ++ return; ++ } ++ slirp->if_start_busy = true; ++ ++ struct mbuf *batch_head = NULL; ++ if (slirp->if_batchq.qh_link != &slirp->if_batchq) { ++ batch_head = (struct mbuf *)slirp->if_batchq.qh_link; ++ } ++ ++ if (slirp->if_fastq.qh_link != &slirp->if_fastq) { ++ ifm_next = (struct mbuf *)slirp->if_fastq.qh_link; ++ } else if (batch_head) { ++ /* Nothing on fastq, pick up from batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } else { ++ ifm_next = NULL; ++ } ++ ++ while (ifm_next) { ++ ifm = ifm_next; ++ ++ ifm_next = ifm->ifq_next; ++ if ((struct quehead *)ifm_next == &slirp->if_fastq) { ++ /* No more packets in fastq, switch to batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } ++ if ((struct quehead *)ifm_next == &slirp->if_batchq) { ++ /* end of batchq */ ++ ifm_next = NULL; ++ } ++ ++ /* Try to send packet unless it already expired */ ++ if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { ++ /* Packet is delayed due to pending ARP or NDP resolution */ ++ continue; ++ } ++ ++ /* Remove it from the queue */ ++ ifqt = ifm->ifq_prev; ++ remque(ifm); ++ ++ /* If there are more packets for this session, re-queue them */ ++ if (ifm->ifs_next != ifm) { ++ struct mbuf *next = ifm->ifs_next; ++ ++ insque(next, ifqt); ++ ifs_remque(ifm); ++ if (!from_batchq) { ++ ifm_next = next; ++ } ++ } ++ ++ /* Update so_queued */ ++ if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { ++ /* If there's no more queued, reset nqueued */ ++ ifm->ifq_so->so_nqueued = 0; ++ } ++ ++ m_free(ifm); ++ } ++ ++ slirp->if_start_busy = false; ++} +diff --git a/slirp/src/if.h b/slirp/src/if.h +new file mode 100644 +index 0000000..3288298 +--- /dev/null ++++ b/slirp/src/if.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef IF_H ++#define IF_H ++ ++#define IF_COMPRESS 0x01 /* We want compression */ ++#define IF_NOCOMPRESS 0x02 /* Do not do compression */ ++#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ ++#define IF_NOCIDCOMP 0x08 /* CID compression */ ++ ++#define IF_MTU 1500 ++#define IF_MRU 1500 ++#define IF_COMP IF_AUTOCOMP /* Flags for compression */ ++ ++/* 2 for alignment, 14 for ethernet */ ++#define IF_MAXLINKHDR (2 + ETH_HLEN) ++ ++#endif +diff --git a/slirp/src/ip.h b/slirp/src/ip.h +new file mode 100644 +index 0000000..e5d4aa8 +--- /dev/null ++++ b/slirp/src/ip.h +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip.h 8.1 (Berkeley) 6/10/93 ++ * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp ++ */ ++ ++#ifndef IP_H ++#define IP_H ++ ++#include ++ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++#undef NTOHL ++#undef NTOHS ++#undef HTONL ++#undef HTONS ++#define NTOHL(d) ++#define NTOHS(d) ++#define HTONL(d) ++#define HTONS(d) ++#else ++#ifndef NTOHL ++#define NTOHL(d) ((d) = ntohl((d))) ++#endif ++#ifndef NTOHS ++#define NTOHS(d) ((d) = ntohs((uint16_t)(d))) ++#endif ++#ifndef HTONL ++#define HTONL(d) ((d) = htonl((d))) ++#endif ++#ifndef HTONS ++#define HTONS(d) ((d) = htons((uint16_t)(d))) ++#endif ++#endif ++ ++typedef uint32_t n_long; /* long as received from the net */ ++ ++/* ++ * Definitions for internet protocol version 4. ++ * Per RFC 791, September 1981. ++ */ ++#define IPVERSION 4 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ip_v : 4, /* version */ ++ ip_hl : 4; /* header length */ ++#else ++ uint8_t ip_hl : 4, /* header length */ ++ ip_v : 4; /* version */ ++#endif ++ uint8_t ip_tos; /* type of service */ ++ uint16_t ip_len; /* total length */ ++ uint16_t ip_id; /* identification */ ++ uint16_t ip_off; /* fragment offset field */ ++#define IP_DF 0x4000 /* don't fragment flag */ ++#define IP_MF 0x2000 /* more fragments flag */ ++#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ ++ uint8_t ip_ttl; /* time to live */ ++ uint8_t ip_p; /* protocol */ ++ uint16_t ip_sum; /* checksum */ ++ struct in_addr ip_src, ip_dst; /* source and dest address */ ++} SLIRP_PACKED; ++ ++#define IP_MAXPACKET 65535 /* maximum packet size */ ++ ++/* ++ * Definitions for IP type of service (ip_tos) ++ */ ++#define IPTOS_LOWDELAY 0x10 ++#define IPTOS_THROUGHPUT 0x08 ++#define IPTOS_RELIABILITY 0x04 ++ ++/* ++ * Definitions for options. ++ */ ++#define IPOPT_COPIED(o) ((o)&0x80) ++#define IPOPT_CLASS(o) ((o)&0x60) ++#define IPOPT_NUMBER(o) ((o)&0x1f) ++ ++#define IPOPT_CONTROL 0x00 ++#define IPOPT_RESERVED1 0x20 ++#define IPOPT_DEBMEAS 0x40 ++#define IPOPT_RESERVED2 0x60 ++ ++#define IPOPT_EOL 0 /* end of option list */ ++#define IPOPT_NOP 1 /* no operation */ ++ ++#define IPOPT_RR 7 /* record packet route */ ++#define IPOPT_TS 68 /* timestamp */ ++#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ ++#define IPOPT_LSRR 131 /* loose source route */ ++#define IPOPT_SATID 136 /* satnet id */ ++#define IPOPT_SSRR 137 /* strict source route */ ++ ++/* ++ * Offsets to fields in options other than EOL and NOP. ++ */ ++#define IPOPT_OPTVAL 0 /* option ID */ ++#define IPOPT_OLEN 1 /* option length */ ++#define IPOPT_OFFSET 2 /* offset within option */ ++#define IPOPT_MINOFF 4 /* min value of above */ ++ ++/* ++ * Time stamp option structure. ++ */ ++struct ip_timestamp { ++ uint8_t ipt_code; /* IPOPT_TS */ ++ uint8_t ipt_len; /* size of structure (variable) */ ++ uint8_t ipt_ptr; /* index of current entry */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ipt_oflw : 4, /* overflow counter */ ++ ipt_flg : 4; /* flags, see below */ ++#else ++ uint8_t ipt_flg : 4, /* flags, see below */ ++ ipt_oflw : 4; /* overflow counter */ ++#endif ++ union ipt_timestamp { ++ n_long ipt_time[1]; ++ struct ipt_ta { ++ struct in_addr ipt_addr; ++ n_long ipt_time; ++ } ipt_ta[1]; ++ } ipt_timestamp; ++} SLIRP_PACKED; ++ ++/* flag bits for ipt_flg */ ++#define IPOPT_TS_TSONLY 0 /* timestamps only */ ++#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ ++#define IPOPT_TS_PRESPEC 3 /* specified modules only */ ++ ++/* bits for security (not byte swapped) */ ++#define IPOPT_SECUR_UNCLASS 0x0000 ++#define IPOPT_SECUR_CONFID 0xf135 ++#define IPOPT_SECUR_EFTO 0x789a ++#define IPOPT_SECUR_MMMM 0xbc4d ++#define IPOPT_SECUR_RESTR 0xaf13 ++#define IPOPT_SECUR_SECRET 0xd788 ++#define IPOPT_SECUR_TOPSECRET 0x6bc5 ++ ++/* ++ * Internet implementation parameters. ++ */ ++#define MAXTTL 255 /* maximum time to live (seconds) */ ++#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ ++#define IPFRAGTTL 60 /* time to live for frags, slowhz */ ++#define IPTTLDEC 1 /* subtracted when forwarding */ ++ ++#define IP_MSS 576 /* default maximum segment size */ ++ ++#if GLIB_SIZEOF_VOID_P == 4 ++struct mbuf_ptr { ++ struct mbuf *mptr; ++ uint32_t dummy; ++} SLIRP_PACKED; ++#else ++struct mbuf_ptr { ++ struct mbuf *mptr; ++} SLIRP_PACKED; ++#endif ++struct qlink { ++ void *next, *prev; ++}; ++ ++/* ++ * Overlay for ip header used by other protocols (tcp, udp). ++ */ ++struct ipovly { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ uint16_t ih_len; /* protocol length */ ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++} SLIRP_PACKED; ++ ++/* ++ * Ip reassembly queue structure. Each fragment ++ * being reassembled is attached to one of these structures. ++ * They are timed out after ipq_ttl drops to 0, and may also ++ * be reclaimed if memory becomes tight. ++ * size 28 bytes ++ */ ++struct ipq { ++ struct qlink frag_link; /* to ip headers of fragments */ ++ struct qlink ip_link; /* to other reass headers */ ++ uint8_t ipq_ttl; /* time for reass q to live */ ++ uint8_t ipq_p; /* protocol of this fragment */ ++ uint16_t ipq_id; /* sequence id for reassembly */ ++ struct in_addr ipq_src, ipq_dst; ++}; ++ ++/* ++ * Ip header, when holding a fragment. ++ * ++ * Note: ipf_link must be at same offset as frag_link above ++ */ ++struct ipasfrag { ++ struct qlink ipf_link; ++ struct ip ipf_ip; ++}; ++ ++G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == ++ offsetof(struct ipasfrag, ipf_link)); ++ ++#define ipf_off ipf_ip.ip_off ++#define ipf_tos ipf_ip.ip_tos ++#define ipf_len ipf_ip.ip_len ++#define ipf_next ipf_link.next ++#define ipf_prev ipf_link.prev ++ ++#endif +diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h +new file mode 100644 +index 0000000..0630309 +--- /dev/null ++++ b/slirp/src/ip6.h +@@ -0,0 +1,214 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_H ++#define SLIRP_IP6_H ++ ++#include ++#include ++ ++#define ALLNODES_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01 \ ++ } \ ++ } ++ ++#define SOLICITED_NODE_PREFIX \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0xff, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++#define LINKLOCAL_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0xfe, \ ++ 0x80, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define ZERO_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) ++{ ++ return memcmp(a, b, sizeof(*a)) == 0; ++} ++ ++static inline bool in6_equal_net(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(a, b, prefix_len / 8) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) == ++ b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); ++} ++ ++static inline bool in6_equal_mach(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return (a->s6_addr[prefix_len / 8] & ++ ((1U << (8 - (prefix_len % 8))) - 1)) == ++ (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); ++} ++ ++ ++#define in6_equal_router(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, 64))) ++ ++#define in6_equal_dns(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) ++ ++#define in6_equal_host(a) (in6_equal_router(a) || in6_equal_dns(a)) ++ ++#define in6_solicitednode_multicast(a) \ ++ (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) ++ ++#define in6_zero(a) (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) ++ ++/* Compute emulated host MAC address from its ipv6 address */ ++static inline void in6_compute_ethaddr(struct in6_addr ip, ++ uint8_t eth[ETH_ALEN]) ++{ ++ eth[0] = 0x52; ++ eth[1] = 0x56; ++ memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); ++} ++ ++/* ++ * Definitions for internet protocol version 6. ++ * Per RFC 2460, December 1998. ++ */ ++#define IP6VERSION 6 ++#define IP6_HOP_LIMIT 255 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip6 { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t ip_v : 4, /* version */ ++ ip_tc_hi : 4, /* traffic class */ ++ ip_tc_lo : 4, ip_fl_hi : 4, /* flow label */ ++ ip_fl_lo : 16; ++#else ++ uint32_t ip_tc_hi : 4, ip_v : 4, ip_fl_hi : 4, ip_tc_lo : 4, ip_fl_lo : 16; ++#endif ++ uint16_t ip_pl; /* payload length */ ++ uint8_t ip_nh; /* next header */ ++ uint8_t ip_hl; /* hop limit */ ++ struct in6_addr ip_src, ip_dst; /* source and dest address */ ++}; ++ ++/* ++ * IPv6 pseudo-header used by upper-layer protocols ++ */ ++struct ip6_pseudohdr { ++ struct in6_addr ih_src; /* source internet address */ ++ struct in6_addr ih_dst; /* destination internet address */ ++ uint32_t ih_pl; /* upper-layer packet length */ ++ uint16_t ih_zero_hi; /* zero */ ++ uint8_t ih_zero_lo; /* zero */ ++ uint8_t ih_nh; /* next header */ ++}; ++ ++/* ++ * We don't want to mark these ip6 structs as packed as they are naturally ++ * correctly aligned; instead assert that there is no stray padding. ++ * If we marked the struct as packed then we would be unable to take ++ * the address of any of the fields in it. ++ */ ++G_STATIC_ASSERT(sizeof(struct ip6) == 40); ++G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); ++ ++#endif +diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c +new file mode 100644 +index 0000000..8512686 +--- /dev/null ++++ b/slirp/src/ip6_icmp.c +@@ -0,0 +1,434 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++#define NDP_Interval \ ++ g_rand_int_range(slirp->grand, NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) ++ ++static void ra_timer_handler(void *opaque) ++{ ++ Slirp *slirp = opaque; ++ ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++ ndp_send_ra(slirp); ++} ++ ++void icmp6_init(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->ra_timer = ++ slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++} ++ ++void icmp6_cleanup(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); ++} ++ ++static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ struct mbuf *t = m_get(slirp); ++ t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); ++ memcpy(t->m_data, m->m_data, t->m_len); ++ ++ /* IPv6 Packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_dst = ip->ip_src; ++ rip->ip_src = ip->ip_dst; ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_ECHO_REPLY; ++ ricmp->icmp6_cksum = 0; ++ ++ /* Checksum */ ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) ++{ ++ Slirp *slirp = m->slirp; ++ struct mbuf *t; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ DEBUG_CALL("icmp6_send_error"); ++ DEBUG_ARG("type = %d, code = %d", type, code); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || in6_zero(&ip->ip_src)) { ++ /* TODO icmp error? */ ++ return; ++ } ++ ++ t = m_get(slirp); ++ ++ /* IPv6 packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = ip->ip_src; ++ inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ rip->ip_nh = IPPROTO_ICMPV6; ++ const int error_data_len = ++ MIN(m->m_len, IF_MTU - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); ++ rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = type; ++ ricmp->icmp6_code = code; ++ ricmp->icmp6_cksum = 0; ++ ++ switch (type) { ++ case ICMP6_UNREACH: ++ case ICMP6_TIMXCEED: ++ ricmp->icmp6_err.unused = 0; ++ break; ++ case ICMP6_TOOBIG: ++ ricmp->icmp6_err.mtu = htonl(IF_MTU); ++ break; ++ case ICMP6_PARAMPROB: ++ /* TODO: Handle this case */ ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ t->m_data += ICMP6_ERROR_MINLEN; ++ memcpy(t->m_data, m->m_data, error_data_len); ++ ++ /* Checksum */ ++ t->m_data -= ICMP6_ERROR_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Router Advertisement ++ */ ++void ndp_send_ra(Slirp *slirp) ++{ ++ DEBUG_CALL("ndp_send_ra"); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ size_t pl_size = 0; ++ struct in6_addr addr; ++ uint32_t scope_id; ++ ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ rip->ip_nh = IPPROTO_ICMPV6; ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_RA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; ++ ricmp->icmp6_nra.M = NDP_AdvManagedFlag; ++ ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; ++ ricmp->icmp6_nra.reserved = 0; ++ ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); ++ ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); ++ ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); ++ t->m_data += ICMP6_NDP_RA_MINLEN; ++ pl_size += ICMP6_NDP_RA_MINLEN; ++ ++ /* Source link-layer address (NDP option) */ ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); ++ t->m_data += NDPOPT_LINKLAYER_LEN; ++ pl_size += NDPOPT_LINKLAYER_LEN; ++ ++ /* Prefix information (NDP option) */ ++ struct ndpopt *opt2 = mtod(t, struct ndpopt *); ++ opt2->ndpopt_type = NDPOPT_PREFIX_INFO; ++ opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; ++ opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; ++ opt2->ndpopt_prefixinfo.L = 1; ++ opt2->ndpopt_prefixinfo.A = 1; ++ opt2->ndpopt_prefixinfo.reserved1 = 0; ++ opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); ++ opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); ++ opt2->ndpopt_prefixinfo.reserved2 = 0; ++ opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; ++ t->m_data += NDPOPT_PREFIXINFO_LEN; ++ pl_size += NDPOPT_PREFIXINFO_LEN; ++ ++ /* Prefix information (NDP option) */ ++ if (get_dns6_addr(&addr, &scope_id) >= 0) { ++ /* Host system does have an IPv6 DNS server, announce our proxy. */ ++ struct ndpopt *opt3 = mtod(t, struct ndpopt *); ++ opt3->ndpopt_type = NDPOPT_RDNSS; ++ opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; ++ opt3->ndpopt_rdnss.reserved = 0; ++ opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); ++ opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; ++ t->m_data += NDPOPT_RDNSS_LEN; ++ pl_size += NDPOPT_RDNSS_LEN; ++ } ++ ++ rip->ip_pl = htons(pl_size); ++ t->m_data -= sizeof(struct ip6) + pl_size; ++ t->m_len = sizeof(struct ip6) + pl_size; ++ ++ /* ICMPv6 Checksum */ ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Neighbor Solitication ++ */ ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_send_ns"); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = slirp->vhost_addr6; ++ rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; ++ memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NS; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nns.reserved = 0; ++ ricmp->icmp6_nns.target = addr; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NS_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 1); ++} ++ ++/* ++ * Send NDP Neighbor Advertisement ++ */ ++static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) ++{ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = icmp->icmp6_nns.target; ++ if (in6_zero(&ip->ip_src)) { ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ } else { ++ rip->ip_dst = ip->ip_src; ++ } ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nna.R = NDP_IsRouter; ++ ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); ++ ricmp->icmp6_nna.O = 1; ++ ricmp->icmp6_nna.reserved_hi = 0; ++ ricmp->icmp6_nna.reserved_lo = 0; ++ ricmp->icmp6_nna.target = icmp->icmp6_nns.target; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NA_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(ricmp->icmp6_nna.target, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Process a NDP message ++ */ ++static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ m->m_len += ETH_HLEN; ++ m->m_data -= ETH_HLEN; ++ struct ethhdr *eth = mtod(m, struct ethhdr *); ++ m->m_len -= ETH_HLEN; ++ m->m_data += ETH_HLEN; ++ ++ switch (icmp->icmp6_type) { ++ case ICMP6_NDP_RS: ++ DEBUG_CALL(" type = Router Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ++ ndp_send_ra(slirp); ++ } ++ break; ++ ++ case ICMP6_NDP_RA: ++ DEBUG_CALL(" type = Router Advertisement"); ++ slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", ++ slirp->opaque); ++ break; ++ ++ case ICMP6_NDP_NS: ++ DEBUG_CALL(" type = Neighbor Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN && ++ (!in6_zero(&ip->ip_src) || ++ in6_solicitednode_multicast(&ip->ip_dst))) { ++ if (in6_equal_host(&icmp->icmp6_nns.target)) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ndp_send_na(slirp, ip, icmp); ++ } ++ } ++ break; ++ ++ case ICMP6_NDP_NA: ++ DEBUG_CALL(" type = Neighbor Advertisement"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) && ++ (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) || icmp->icmp6_nna.S == 0)) { ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ } ++ break; ++ ++ case ICMP6_NDP_REDIRECT: ++ DEBUG_CALL(" type = Redirect"); ++ slirp->cb->guest_error( ++ "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); ++ break; ++ } ++} ++ ++/* ++ * Process a received ICMPv6 message. ++ */ ++void icmp6_input(struct mbuf *m) ++{ ++ struct icmp6 *icmp; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ Slirp *slirp = m->slirp; ++ int hlen = sizeof(struct ip6); ++ ++ DEBUG_CALL("icmp6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { ++ goto end; ++ } ++ ++ if (ip6_cksum(m)) { ++ goto end; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icmp = mtod(m, struct icmp6 *); ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); ++ switch (icmp->icmp6_type) { ++ case ICMP6_ECHO_REQUEST: ++ if (in6_equal_host(&ip->ip_dst)) { ++ icmp6_send_echoreply(m, slirp, ip, icmp); ++ } else { ++ /* TODO */ ++ g_critical("external icmpv6 not supported yet"); ++ } ++ break; ++ ++ case ICMP6_NDP_RS: ++ case ICMP6_NDP_RA: ++ case ICMP6_NDP_NS: ++ case ICMP6_NDP_NA: ++ case ICMP6_NDP_REDIRECT: ++ ndp_input(m, slirp, ip, icmp); ++ break; ++ ++ case ICMP6_UNREACH: ++ case ICMP6_TOOBIG: ++ case ICMP6_TIMXCEED: ++ case ICMP6_PARAMPROB: ++ /* XXX? report error? close socket? */ ++ default: ++ break; ++ } ++ ++end: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h +new file mode 100644 +index 0000000..c37e60f +--- /dev/null ++++ b/slirp/src/ip6_icmp.h +@@ -0,0 +1,219 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_ICMP_H ++#define SLIRP_IP6_ICMP_H ++ ++/* ++ * Interface Control Message Protocol version 6 Definitions. ++ * Per RFC 4443, March 2006. ++ * ++ * Network Discover Protocol Definitions. ++ * Per RFC 4861, September 2007. ++ */ ++ ++struct icmp6_echo { /* Echo Messages */ ++ uint16_t id; ++ uint16_t seq_num; ++}; ++ ++union icmp6_error_body { ++ uint32_t unused; ++ uint32_t pointer; ++ uint32_t mtu; ++}; ++ ++/* ++ * NDP Messages ++ */ ++struct ndp_rs { /* Router Solicitation Message */ ++ uint32_t reserved; ++}; ++ ++struct ndp_ra { /* Router Advertisement Message */ ++ uint8_t chl; /* Cur Hop Limit */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t M : 1, O : 1, reserved : 6; ++#else ++ uint8_t reserved : 6, O : 1, M : 1; ++#endif ++ uint16_t lifetime; /* Router Lifetime */ ++ uint32_t reach_time; /* Reachable Time */ ++ uint32_t retrans_time; /* Retrans Timer */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); ++ ++struct ndp_ns { /* Neighbor Solicitation Message */ ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); ++ ++struct ndp_na { /* Neighbor Advertisement Message */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t R : 1, /* Router Flag */ ++ S : 1, /* Solicited Flag */ ++ O : 1, /* Override Flag */ ++ reserved_hi : 5, reserved_lo : 24; ++#else ++ uint32_t reserved_hi : 5, O : 1, S : 1, R : 1, reserved_lo : 24; ++#endif ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); ++ ++struct ndp_redirect { ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++ struct in6_addr dest; /* Destination Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); ++ ++/* ++ * Structure of an icmpv6 header. ++ */ ++struct icmp6 { ++ uint8_t icmp6_type; /* type of message, see below */ ++ uint8_t icmp6_code; /* type sub code */ ++ uint16_t icmp6_cksum; /* ones complement cksum of struct */ ++ union { ++ union icmp6_error_body error_body; ++ struct icmp6_echo echo; ++ struct ndp_rs ndp_rs; ++ struct ndp_ra ndp_ra; ++ struct ndp_ns ndp_ns; ++ struct ndp_na ndp_na; ++ struct ndp_redirect ndp_redirect; ++ } icmp6_body; ++#define icmp6_err icmp6_body.error_body ++#define icmp6_echo icmp6_body.echo ++#define icmp6_nrs icmp6_body.ndp_rs ++#define icmp6_nra icmp6_body.ndp_ra ++#define icmp6_nns icmp6_body.ndp_ns ++#define icmp6_nna icmp6_body.ndp_na ++#define icmp6_redirect icmp6_body.ndp_redirect ++}; ++ ++G_STATIC_ASSERT(sizeof(struct icmp6) == 40); ++ ++#define ICMP6_MINLEN 4 ++#define ICMP6_ERROR_MINLEN 8 ++#define ICMP6_ECHO_MINLEN 8 ++#define ICMP6_NDP_RS_MINLEN 8 ++#define ICMP6_NDP_RA_MINLEN 16 ++#define ICMP6_NDP_NS_MINLEN 24 ++#define ICMP6_NDP_NA_MINLEN 24 ++#define ICMP6_NDP_REDIRECT_MINLEN 40 ++ ++/* ++ * NDP Options ++ */ ++struct ndpopt { ++ uint8_t ndpopt_type; /* Option type */ ++ uint8_t ndpopt_len; /* /!\ In units of 8 octets */ ++ union { ++ unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ ++#define ndpopt_linklayer ndpopt_body.linklayer_addr ++ struct prefixinfo { /* Prefix Information */ ++ uint8_t prefix_length; ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t L : 1, A : 1, reserved1 : 6; ++#else ++ uint8_t reserved1 : 6, A : 1, L : 1; ++#endif ++ uint32_t valid_lt; /* Valid Lifetime */ ++ uint32_t pref_lt; /* Preferred Lifetime */ ++ uint32_t reserved2; ++ struct in6_addr prefix; ++ } SLIRP_PACKED prefixinfo; ++#define ndpopt_prefixinfo ndpopt_body.prefixinfo ++ struct rdnss { ++ uint16_t reserved; ++ uint32_t lifetime; ++ struct in6_addr addr; ++ } SLIRP_PACKED rdnss; ++#define ndpopt_rdnss ndpopt_body.rdnss ++ } ndpopt_body; ++} SLIRP_PACKED; ++ ++/* NDP options type */ ++#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ ++#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ ++#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ ++#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ ++ ++/* NDP options size, in octets. */ ++#define NDPOPT_LINKLAYER_LEN 8 ++#define NDPOPT_PREFIXINFO_LEN 32 ++#define NDPOPT_RDNSS_LEN 24 ++ ++/* ++ * Definition of type and code field values. ++ * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml ++ * Last Updated 2012-11-12 ++ */ ++ ++/* Errors */ ++#define ICMP6_UNREACH 1 /* Destination Unreachable */ ++#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ ++#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ ++#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ ++#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ ++#define ICMP6_UNREACH_PORT 4 /* port unreachable */ ++#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ ++#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ ++#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ ++#define ICMP6_TOOBIG 2 /* Packet Too Big */ ++#define ICMP6_TIMXCEED 3 /* Time Exceeded */ ++#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ ++#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ ++#define ICMP6_PARAMPROB 4 /* Parameter Problem */ ++#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ ++#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ ++#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ ++ ++/* Informational Messages */ ++#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ ++#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ ++#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ ++#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ ++#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ ++#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ ++#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ ++ ++/* ++ * Router Configuration Variables (rfc4861#section-6) ++ */ ++#define NDP_IsRouter 1 ++#define NDP_AdvSendAdvertisements 1 ++#define NDP_MaxRtrAdvInterval 600000 ++#define NDP_MinRtrAdvInterval \ ++ ((NDP_MaxRtrAdvInterval >= 9) ? NDP_MaxRtrAdvInterval / 3 : \ ++ NDP_MaxRtrAdvInterval) ++#define NDP_AdvManagedFlag 0 ++#define NDP_AdvOtherConfigFlag 0 ++#define NDP_AdvLinkMTU 0 ++#define NDP_AdvReachableTime 0 ++#define NDP_AdvRetransTime 0 ++#define NDP_AdvCurHopLimit 64 ++#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) ++#define NDP_AdvValidLifetime 86400 ++#define NDP_AdvOnLinkFlag 1 ++#define NDP_AdvPrefLifetime 14400 ++#define NDP_AdvAutonomousFlag 1 ++ ++void icmp6_init(Slirp *slirp); ++void icmp6_cleanup(Slirp *slirp); ++void icmp6_input(struct mbuf *); ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); ++void ndp_send_ra(Slirp *slirp); ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr); ++ ++#endif +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +new file mode 100644 +index 0000000..d9d2b7e +--- /dev/null ++++ b/slirp/src/ip6_input.c +@@ -0,0 +1,78 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip6_init(Slirp *slirp) ++{ ++ icmp6_init(slirp); ++} ++ ++void ip6_cleanup(Slirp *slirp) ++{ ++ icmp6_cleanup(slirp); ++} ++ ++void ip6_input(struct mbuf *m) ++{ ++ struct ip6 *ip6; ++ Slirp *slirp = m->slirp; ++ ++ if (!slirp->in6_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ ip6 = mtod(m, struct ip6 *); ++ ++ if (ip6->ip_v != IP6VERSION) { ++ goto bad; ++ } ++ ++ if (ntohs(ip6->ip_pl) > IF_MTU) { ++ icmp6_send_error(m, ICMP6_TOOBIG, 0); ++ goto bad; ++ } ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip6->ip_hl == 0) { ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip6->ip_nh) { ++ case IPPROTO_TCP: ++ NTOHS(ip6->ip_pl); ++ tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); ++ break; ++ case IPPROTO_UDP: ++ udp6_input(m); ++ break; ++ case IPPROTO_ICMPV6: ++ icmp6_input(m); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c +new file mode 100644 +index 0000000..b861106 +--- /dev/null ++++ b/slirp/src/ip6_output.c +@@ -0,0 +1,39 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF6_THRESH 10 ++ ++/* ++ * IPv6 output. The packet in mbuf chain m contains a IP header ++ */ ++int ip6_output(struct socket *so, struct mbuf *m, int fast) ++{ ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ ++ DEBUG_CALL("ip6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* Fill IPv6 header */ ++ ip->ip_v = IP6VERSION; ++ ip->ip_hl = IP6_HOP_LIMIT; ++ ip->ip_tc_hi = 0; ++ ip->ip_tc_lo = 0; ++ ip->ip_fl_hi = 0; ++ ip->ip_fl_lo = 0; ++ ++ if (fast) { ++ if_encap(m->slirp, m); ++ } else { ++ if_output(so, m); ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c +new file mode 100644 +index 0000000..7590cff +--- /dev/null ++++ b/slirp/src/ip_icmp.c +@@ -0,0 +1,477 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 ++ * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#ifndef WITH_ICMP_ERROR_MSG ++#define WITH_ICMP_ERROR_MSG 0 ++#endif ++ ++/* The message sent when emulating PING */ ++/* Be nice and tell them it's just a pseudo-ping packet */ ++static const char icmp_ping_msg[] = ++ "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST " ++ "packets.\n"; ++ ++/* list of actions for icmp_send_error() on RX of an icmp message */ ++static const int icmp_flush[19] = { ++ /* ECHO REPLY (0) */ 0, ++ 1, ++ 1, ++ /* DEST UNREACH (3) */ 1, ++ /* SOURCE QUENCH (4)*/ 1, ++ /* REDIRECT (5) */ 1, ++ 1, ++ 1, ++ /* ECHO (8) */ 0, ++ /* ROUTERADVERT (9) */ 1, ++ /* ROUTERSOLICIT (10) */ 1, ++ /* TIME EXCEEDED (11) */ 1, ++ /* PARAMETER PROBLEM (12) */ 1, ++ /* TIMESTAMP (13) */ 0, ++ /* TIMESTAMP REPLY (14) */ 0, ++ /* INFO (15) */ 0, ++ /* INFO REPLY (16) */ 0, ++ /* ADDR MASK (17) */ 0, ++ /* ADDR MASK REPLY (18) */ 0 ++}; ++ ++void icmp_init(Slirp *slirp) ++{ ++ slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; ++ slirp->icmp_last_so = &slirp->icmp; ++} ++ ++void icmp_cleanup(Slirp *slirp) ++{ ++ while (slirp->icmp.so_next != &slirp->icmp) { ++ icmp_detach(slirp->icmp.so_next); ++ } ++} ++ ++static int icmp_send(struct socket *so, struct mbuf *m, int hlen) ++{ ++ struct ip *ip = mtod(m, struct ip *); ++ struct sockaddr_in addr; ++ ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); ++ if (so->s == -1) { ++ return -1; ++ } ++ ++ so->so_m = m; ++ so->so_faddr = ip->ip_dst; ++ so->so_laddr = ip->ip_src; ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ so->so_expire = curtime + SO_EXPIRE; ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr = so->so_faddr; ++ ++ insque(so, &so->slirp->icmp); ++ ++ if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, ++ (struct sockaddr *)&addr, sizeof(addr)) == -1) { ++ DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ icmp_detach(so); ++ } ++ ++ return 0; ++} ++ ++void icmp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++/* ++ * Process a received ICMP message. ++ */ ++void icmp_input(struct mbuf *m, int hlen) ++{ ++ register struct icmp *icp; ++ register struct ip *ip = mtod(m, struct ip *); ++ int icmplen = ip->ip_len; ++ Slirp *slirp = m->slirp; ++ ++ DEBUG_CALL("icmp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ /* ++ * Locate icmp structure in mbuf, and check ++ * that its not corrupted and of at least minimum length. ++ */ ++ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ ++ freeit: ++ m_free(m); ++ goto end_error; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icp = mtod(m, struct icmp *); ++ if (cksum(m, icmplen)) { ++ goto freeit; ++ } ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp_type = %d", icp->icmp_type); ++ switch (icp->icmp_type) { ++ case ICMP_ECHO: ++ ip->ip_len += hlen; /* since ip_input subtracts this */ ++ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { ++ icmp_reflect(m); ++ } else if (slirp->restricted) { ++ goto freeit; ++ } else { ++ struct socket *so; ++ struct sockaddr_storage addr; ++ so = socreate(slirp); ++ if (icmp_send(so, m, hlen) == 0) { ++ return; ++ } ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC("icmp_input udp_attach errno = %d-%s", errno, ++ strerror(errno)); ++ sofree(so); ++ m_free(m); ++ goto end_error; ++ } ++ so->so_m = m; ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; ++ so->so_fport = htons(7); ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = htons(9); ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ ++ /* Send the packet */ ++ addr = so->fhost.ss; ++ sotranslate_out(so, &addr); ++ ++ if (sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, ++ (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { ++ DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ } ++ } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ ++ break; ++ case ICMP_UNREACH: ++ /* XXX? report error? close socket? */ ++ case ICMP_TIMXCEED: ++ case ICMP_PARAMPROB: ++ case ICMP_SOURCEQUENCH: ++ case ICMP_TSTAMP: ++ case ICMP_MASKREQ: ++ case ICMP_REDIRECT: ++ m_free(m); ++ break; ++ ++ default: ++ m_free(m); ++ } /* swith */ ++ ++end_error: ++ /* m is m_free()'d xor put in a socket xor or given to ip_send */ ++ return; ++} ++ ++ ++/* ++ * Send an ICMP message in response to a situation ++ * ++ * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. ++ *MAY send more (we do). MUST NOT change this header information. MUST NOT reply ++ *to a multicast/broadcast IP address. MUST NOT reply to a multicast/broadcast ++ *MAC address. MUST reply to only the first fragment. ++ */ ++/* ++ * Send ICMP_UNREACH back to the source regarding msrc. ++ * mbuf *msrc is used as a template, but is NOT m_free()'d. ++ * It is reported as the bad ip packet. The header should ++ * be fully correct and in host byte order. ++ * ICMP fragmentation is illegal. All machines must accept 576 bytes in one ++ * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 ++ */ ++ ++#define ICMP_MAXDATALEN (IP_MSS - 28) ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message) ++{ ++ unsigned hlen, shlen, s_ip_len; ++ register struct ip *ip; ++ register struct icmp *icp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("icmp_send_error"); ++ DEBUG_ARG("msrc = %p", msrc); ++ DEBUG_ARG("msrc_len = %d", msrc->m_len); ++ ++ if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) ++ goto end_error; ++ ++ /* check msrc */ ++ if (!msrc) ++ goto end_error; ++ ip = mtod(msrc, struct ip *); ++ if (slirp_debug & DBG_MISC) { ++ char bufa[20], bufb[20]; ++ strcpy(bufa, inet_ntoa(ip->ip_src)); ++ strcpy(bufb, inet_ntoa(ip->ip_dst)); ++ DEBUG_MISC(" %.16s to %.16s", bufa, bufb); ++ } ++ if (ip->ip_off & IP_OFFMASK) ++ goto end_error; /* Only reply to fragment 0 */ ++ ++ /* Do not reply to source-only IPs */ ++ if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { ++ goto end_error; ++ } ++ ++ shlen = ip->ip_hl << 2; ++ s_ip_len = ip->ip_len; ++ if (ip->ip_p == IPPROTO_ICMP) { ++ icp = (struct icmp *)((char *)ip + shlen); ++ /* ++ * Assume any unknown ICMP type is an error. This isn't ++ * specified by the RFC, but think about it.. ++ */ ++ if (icp->icmp_type > 18 || icmp_flush[icp->icmp_type]) ++ goto end_error; ++ } ++ ++ /* make a copy */ ++ m = m_get(msrc->slirp); ++ if (!m) { ++ goto end_error; ++ } ++ ++ { ++ int new_m_size; ++ new_m_size = ++ sizeof(struct ip) + ICMP_MINLEN + msrc->m_len + ICMP_MAXDATALEN; ++ if (new_m_size > m->m_size) ++ m_inc(m, new_m_size); ++ } ++ memcpy(m->m_data, msrc->m_data, msrc->m_len); ++ m->m_len = msrc->m_len; /* copy msrc to m */ ++ ++ /* make the header of the reply packet */ ++ ip = mtod(m, struct ip *); ++ hlen = sizeof(struct ip); /* no options in reply */ ++ ++ /* fill in icmp */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ icp = mtod(m, struct icmp *); ++ ++ if (minsize) ++ s_ip_len = shlen + ICMP_MINLEN; /* return header+8b only */ ++ else if (s_ip_len > ICMP_MAXDATALEN) /* maximum size */ ++ s_ip_len = ICMP_MAXDATALEN; ++ ++ m->m_len = ICMP_MINLEN + s_ip_len; /* 8 bytes ICMP header */ ++ ++ /* min. size = 8+sizeof(struct ip)+8 */ ++ ++ icp->icmp_type = type; ++ icp->icmp_code = code; ++ icp->icmp_id = 0; ++ icp->icmp_seq = 0; ++ ++ memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ ++ HTONS(icp->icmp_ip.ip_len); ++ HTONS(icp->icmp_ip.ip_id); ++ HTONS(icp->icmp_ip.ip_off); ++ ++ if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ ++ int message_len; ++ char *cpnt; ++ message_len = strlen(message); ++ if (message_len > ICMP_MAXDATALEN) ++ message_len = ICMP_MAXDATALEN; ++ cpnt = (char *)m->m_data + m->m_len; ++ memcpy(cpnt, message, message_len); ++ m->m_len += message_len; ++ } ++ ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, m->m_len); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len = m->m_len; ++ ++ ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ ++ ++ ip->ip_ttl = MAXTTL; ++ ip->ip_p = IPPROTO_ICMP; ++ ip->ip_dst = ip->ip_src; /* ip addresses */ ++ ip->ip_src = m->slirp->vhost_addr; ++ ++ (void)ip_output((struct socket *)NULL, m); ++ ++end_error: ++ return; ++} ++#undef ICMP_MAXDATALEN ++ ++/* ++ * Reflect the ip packet back to the source ++ */ ++void icmp_reflect(struct mbuf *m) ++{ ++ register struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ int optlen = hlen - sizeof(struct ip); ++ register struct icmp *icp; ++ ++ /* ++ * Send an icmp packet back to the ip level, ++ * after supplying a checksum. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ icp->icmp_type = ICMP_ECHOREPLY; ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, ip->ip_len - hlen); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ if (optlen > 0) { ++ /* ++ * Strip out original options by copying rest of first ++ * mbuf's data back, and adjust the IP length. ++ */ ++ memmove((char *)(ip + 1), (char *)ip + hlen, ++ (unsigned)(m->m_len - hlen)); ++ hlen -= optlen; ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len -= optlen; ++ m->m_len -= optlen; ++ } ++ ++ ip->ip_ttl = MAXTTL; ++ { /* swap */ ++ struct in_addr icmp_dst; ++ icmp_dst = ip->ip_dst; ++ ip->ip_dst = ip->ip_src; ++ ip->ip_src = icmp_dst; ++ } ++ ++ (void)ip_output((struct socket *)NULL, m); ++} ++ ++void icmp_receive(struct socket *so) ++{ ++ struct mbuf *m = so->so_m; ++ struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ uint8_t error_code; ++ struct icmp *icp; ++ int id, len; ++ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ id = icp->icmp_id; ++ len = recv(so->s, icp, M_ROOM(m), 0); ++ /* ++ * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent ++ * between host OSes. On Linux, only the ICMP header and payload is ++ * included. On macOS/Darwin, the socket acts like a raw socket and ++ * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP ++ * sockets aren't supported at all, so we treat them like raw sockets. It ++ * isn't possible to detect this difference at runtime, so we must use an ++ * #ifdef to determine if we need to remove the IP header. ++ */ ++#ifdef CONFIG_BSD ++ if (len >= sizeof(struct ip)) { ++ struct ip *inner_ip = mtod(m, struct ip *); ++ int inner_hlen = inner_ip->ip_hl << 2; ++ if (inner_hlen > len) { ++ len = -1; ++ errno = -EINVAL; ++ } else { ++ len -= inner_hlen; ++ memmove(icp, (unsigned char *)icp + inner_hlen, len); ++ } ++ } else { ++ len = -1; ++ errno = -EINVAL; ++ } ++#endif ++ icp->icmp_id = id; ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ if (len == -1 || len == 0) { ++ if (errno == ENETUNREACH) { ++ error_code = ICMP_UNREACH_NET; ++ } else { ++ error_code = ICMP_UNREACH_HOST; ++ } ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ icmp_detach(so); ++} +diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h +new file mode 100644 +index 0000000..84707db +--- /dev/null ++++ b/slirp/src/ip_icmp.h +@@ -0,0 +1,166 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 ++ * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp ++ */ ++ ++#ifndef NETINET_IP_ICMP_H ++#define NETINET_IP_ICMP_H ++ ++/* ++ * Interface Control Message Protocol Definitions. ++ * Per RFC 792, September 1981. ++ */ ++ ++typedef uint32_t n_time; ++ ++/* ++ * Structure of an icmp header. ++ */ ++struct icmp { ++ uint8_t icmp_type; /* type of message, see below */ ++ uint8_t icmp_code; /* type sub code */ ++ uint16_t icmp_cksum; /* ones complement cksum of struct */ ++ union { ++ uint8_t ih_pptr; /* ICMP_PARAMPROB */ ++ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ ++ struct ih_idseq { ++ uint16_t icd_id; ++ uint16_t icd_seq; ++ } ih_idseq; ++ int ih_void; ++ ++ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ ++ struct ih_pmtu { ++ uint16_t ipm_void; ++ uint16_t ipm_nextmtu; ++ } ih_pmtu; ++ } icmp_hun; ++#define icmp_pptr icmp_hun.ih_pptr ++#define icmp_gwaddr icmp_hun.ih_gwaddr ++#define icmp_id icmp_hun.ih_idseq.icd_id ++#define icmp_seq icmp_hun.ih_idseq.icd_seq ++#define icmp_void icmp_hun.ih_void ++#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void ++#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu ++ union { ++ struct id_ts { ++ n_time its_otime; ++ n_time its_rtime; ++ n_time its_ttime; ++ } id_ts; ++ struct id_ip { ++ struct ip idi_ip; ++ /* options and then 64 bits of data */ ++ } id_ip; ++ uint32_t id_mask; ++ char id_data[1]; ++ } icmp_dun; ++#define icmp_otime icmp_dun.id_ts.its_otime ++#define icmp_rtime icmp_dun.id_ts.its_rtime ++#define icmp_ttime icmp_dun.id_ts.its_ttime ++#define icmp_ip icmp_dun.id_ip.idi_ip ++#define icmp_mask icmp_dun.id_mask ++#define icmp_data icmp_dun.id_data ++}; ++ ++/* ++ * Lower bounds on packet lengths for various types. ++ * For the error advice packets must first ensure that the ++ * packet is large enough to contain the returned ip header. ++ * Only then can we do the check to see if 64 bits of packet ++ * data have been returned, since we need to check the returned ++ * ip header length. ++ */ ++#define ICMP_MINLEN 8 /* abs minimum */ ++#define ICMP_TSLEN (8 + 3 * sizeof(n_time)) /* timestamp */ ++#define ICMP_MASKLEN 12 /* address mask */ ++#define ICMP_ADVLENMIN (8 + sizeof(struct ip) + 8) /* min */ ++#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) ++/* N.B.: must separately check that ip_hl >= 5 */ ++ ++/* ++ * Definition of type and code field values. ++ */ ++#define ICMP_ECHOREPLY 0 /* echo reply */ ++#define ICMP_UNREACH 3 /* dest unreachable, codes: */ ++#define ICMP_UNREACH_NET 0 /* bad net */ ++#define ICMP_UNREACH_HOST 1 /* bad host */ ++#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ ++#define ICMP_UNREACH_PORT 3 /* bad port */ ++#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ ++#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ ++#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ ++#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ ++#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ ++#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ ++#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ ++#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ ++#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ ++#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ ++#define ICMP_REDIRECT 5 /* shorter route, codes: */ ++#define ICMP_REDIRECT_NET 0 /* for network */ ++#define ICMP_REDIRECT_HOST 1 /* for host */ ++#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ ++#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ ++#define ICMP_ECHO 8 /* echo service */ ++#define ICMP_ROUTERADVERT 9 /* router advertisement */ ++#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ ++#define ICMP_TIMXCEED 11 /* time exceeded, code: */ ++#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ ++#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ ++#define ICMP_PARAMPROB 12 /* ip header bad */ ++#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ ++#define ICMP_TSTAMP 13 /* timestamp request */ ++#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ ++#define ICMP_IREQ 15 /* information request */ ++#define ICMP_IREQREPLY 16 /* information reply */ ++#define ICMP_MASKREQ 17 /* address mask request */ ++#define ICMP_MASKREPLY 18 /* address mask reply */ ++ ++#define ICMP_MAXTYPE 18 ++ ++#define ICMP_INFOTYPE(type) \ ++ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ ++ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ ++ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ ++ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ ++ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) ++ ++void icmp_init(Slirp *slirp); ++void icmp_cleanup(Slirp *slirp); ++void icmp_input(struct mbuf *, int); ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message); ++void icmp_reflect(struct mbuf *); ++void icmp_receive(struct socket *so); ++void icmp_detach(struct socket *so); ++ ++#endif +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +new file mode 100644 +index 0000000..8c75d91 +--- /dev/null ++++ b/slirp/src/ip_input.c +@@ -0,0 +1,462 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 ++ * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); ++static void ip_freef(Slirp *slirp, struct ipq *fp); ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev); ++static void ip_deq(register struct ipasfrag *p); ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip_init(Slirp *slirp) ++{ ++ slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; ++ udp_init(slirp); ++ tcp_init(slirp); ++ icmp_init(slirp); ++} ++ ++void ip_cleanup(Slirp *slirp) ++{ ++ udp_cleanup(slirp); ++ tcp_cleanup(slirp); ++ icmp_cleanup(slirp); ++} ++ ++/* ++ * Ip input routine. Checksum and byte swap header. If fragmented ++ * try to reassemble. Process options. Pass to next level. ++ */ ++void ip_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ register struct ip *ip; ++ int hlen; ++ ++ if (!slirp->in_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip)) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ++ if (ip->ip_v != IPVERSION) { ++ goto bad; ++ } ++ ++ hlen = ip->ip_hl << 2; ++ if (hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ ++ goto bad; /* or packet too short */ ++ } ++ ++ /* keep ip header intact for ICMP reply ++ * ip->ip_sum = cksum(m, hlen); ++ * if (ip->ip_sum) { ++ */ ++ if (cksum(m, hlen)) { ++ goto bad; ++ } ++ ++ /* ++ * Convert fields to host representation. ++ */ ++ NTOHS(ip->ip_len); ++ if (ip->ip_len < hlen) { ++ goto bad; ++ } ++ NTOHS(ip->ip_id); ++ NTOHS(ip->ip_off); ++ ++ /* ++ * Check that the amount of data in the buffers ++ * is as at least much as the IP header would have us expect. ++ * Trim mbufs if longer than we expect. ++ * Drop packet if shorter than we expect. ++ */ ++ if (m->m_len < ip->ip_len) { ++ goto bad; ++ } ++ ++ /* Should drop packet if mbuf too long? hmmm... */ ++ if (m->m_len > ip->ip_len) ++ m_adj(m, ip->ip_len - m->m_len); ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip->ip_ttl == 0) { ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); ++ goto bad; ++ } ++ ++ /* ++ * If offset or IP_MF are set, must reassemble. ++ * Otherwise, nothing need be done. ++ * (We could look in the reassembly queue to see ++ * if the packet was previously fragmented, ++ * but it's not worth the time; just let them time out.) ++ * ++ * XXX This should fail, don't fragment yet ++ */ ++ if (ip->ip_off & ~IP_DF) { ++ register struct ipq *fp; ++ struct qlink *l; ++ /* ++ * Look for queue of fragments ++ * of this datagram. ++ */ ++ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; ++ l = l->next) { ++ fp = container_of(l, struct ipq, ip_link); ++ if (ip->ip_id == fp->ipq_id && ++ ip->ip_src.s_addr == fp->ipq_src.s_addr && ++ ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ++ ip->ip_p == fp->ipq_p) ++ goto found; ++ } ++ fp = NULL; ++ found: ++ ++ /* ++ * Adjust ip_len to not reflect header, ++ * set ip_mff if more fragments are expected, ++ * convert offset of this to bytes. ++ */ ++ ip->ip_len -= hlen; ++ if (ip->ip_off & IP_MF) ++ ip->ip_tos |= 1; ++ else ++ ip->ip_tos &= ~1; ++ ++ ip->ip_off <<= 3; ++ ++ /* ++ * If datagram marked as having more fragments ++ * or if this is not the first fragment, ++ * attempt reassembly; if it succeeds, proceed. ++ */ ++ if (ip->ip_tos & 1 || ip->ip_off) { ++ ip = ip_reass(slirp, ip, fp); ++ if (ip == NULL) ++ return; ++ m = dtom(slirp, ip); ++ } else if (fp) ++ ip_freef(slirp, fp); ++ ++ } else ++ ip->ip_len -= hlen; ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip->ip_p) { ++ case IPPROTO_TCP: ++ tcp_input(m, hlen, (struct socket *)NULL, AF_INET); ++ break; ++ case IPPROTO_UDP: ++ udp_input(m, hlen); ++ break; ++ case IPPROTO_ICMP: ++ icmp_input(m, hlen); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} ++ ++#define iptofrag(P) ((struct ipasfrag *)(((char *)(P)) - sizeof(struct qlink))) ++#define fragtoip(P) ((struct ip *)(((char *)(P)) + sizeof(struct qlink))) ++/* ++ * Take incoming datagram fragment and try to ++ * reassemble it into whole datagram. If a chain for ++ * reassembly of this datagram already exists, then it ++ * is given as fp; otherwise have to make a chain. ++ */ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) ++{ ++ register struct mbuf *m = dtom(slirp, ip); ++ register struct ipasfrag *q; ++ int hlen = ip->ip_hl << 2; ++ int i, next; ++ ++ DEBUG_CALL("ip_reass"); ++ DEBUG_ARG("ip = %p", ip); ++ DEBUG_ARG("fp = %p", fp); ++ DEBUG_ARG("m = %p", m); ++ ++ /* ++ * Presence of header sizes in mbufs ++ * would confuse code below. ++ * Fragment m_data is concatenated. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ /* ++ * If first fragment to arrive, create a reassembly queue. ++ */ ++ if (fp == NULL) { ++ struct mbuf *t = m_get(slirp); ++ ++ if (t == NULL) { ++ goto dropfrag; ++ } ++ fp = mtod(t, struct ipq *); ++ insque(&fp->ip_link, &slirp->ipq.ip_link); ++ fp->ipq_ttl = IPFRAGTTL; ++ fp->ipq_p = ip->ip_p; ++ fp->ipq_id = ip->ip_id; ++ fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; ++ fp->ipq_src = ip->ip_src; ++ fp->ipq_dst = ip->ip_dst; ++ q = (struct ipasfrag *)fp; ++ goto insert; ++ } ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) ++ if (q->ipf_off > ip->ip_off) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (q->ipf_prev != &fp->frag_link) { ++ struct ipasfrag *pq = q->ipf_prev; ++ i = pq->ipf_off + pq->ipf_len - ip->ip_off; ++ if (i > 0) { ++ if (i >= ip->ip_len) ++ goto dropfrag; ++ m_adj(dtom(slirp, ip), i); ++ ip->ip_off += i; ++ ip->ip_len -= i; ++ } ++ } ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (q != (struct ipasfrag *)&fp->frag_link && ++ ip->ip_off + ip->ip_len > q->ipf_off) { ++ i = (ip->ip_off + ip->ip_len) - q->ipf_off; ++ if (i < q->ipf_len) { ++ q->ipf_len -= i; ++ q->ipf_off += i; ++ m_adj(dtom(slirp, q), i); ++ break; ++ } ++ q = q->ipf_next; ++ m_free(dtom(slirp, q->ipf_prev)); ++ ip_deq(q->ipf_prev); ++ } ++ ++insert: ++ /* ++ * Stick new segment in its place; ++ * check for complete reassembly. ++ */ ++ ip_enq(iptofrag(ip), q->ipf_prev); ++ next = 0; ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) { ++ if (q->ipf_off != next) ++ return NULL; ++ next += q->ipf_len; ++ } ++ if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) ++ return NULL; ++ ++ /* ++ * Reassembly is complete; concatenate fragments. ++ */ ++ q = fp->frag_link.next; ++ m = dtom(slirp, q); ++ ++ int was_ext = m->m_flags & M_EXT; ++ ++ q = (struct ipasfrag *)q->ipf_next; ++ while (q != (struct ipasfrag *)&fp->frag_link) { ++ struct mbuf *t = dtom(slirp, q); ++ q = (struct ipasfrag *)q->ipf_next; ++ m_cat(m, t); ++ } ++ ++ /* ++ * Create header for new ip packet by ++ * modifying header of first packet; ++ * dequeue and discard fragment reassembly header. ++ * Make header visible. ++ */ ++ q = fp->frag_link.next; ++ ++ /* ++ * If the fragments concatenated to an mbuf that's ++ * bigger than the total size of the fragment, then and ++ * m_ext buffer was alloced. But fp->ipq_next points to ++ * the old buffer (in the mbuf), so we must point ip ++ * into the new buffer. ++ */ ++ if (!was_ext && m->m_flags & M_EXT) { ++ int delta = (char *)q - m->m_dat; ++ q = (struct ipasfrag *)(m->m_ext + delta); ++ } ++ ++ ip = fragtoip(q); ++ ip->ip_len = next; ++ ip->ip_tos &= ~1; ++ ip->ip_src = fp->ipq_src; ++ ip->ip_dst = fp->ipq_dst; ++ remque(&fp->ip_link); ++ (void)m_free(dtom(slirp, fp)); ++ m->m_len += (ip->ip_hl << 2); ++ m->m_data -= (ip->ip_hl << 2); ++ ++ return ip; ++ ++dropfrag: ++ m_free(m); ++ return NULL; ++} ++ ++/* ++ * Free a fragment reassembly header and all ++ * associated datagrams. ++ */ ++static void ip_freef(Slirp *slirp, struct ipq *fp) ++{ ++ register struct ipasfrag *q, *p; ++ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = p) { ++ p = q->ipf_next; ++ ip_deq(q); ++ m_free(dtom(slirp, q)); ++ } ++ remque(&fp->ip_link); ++ (void)m_free(dtom(slirp, fp)); ++} ++ ++/* ++ * Put an ip fragment on a reassembly chain. ++ * Like insque, but pointers in middle of structure. ++ */ ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) ++{ ++ DEBUG_CALL("ip_enq"); ++ DEBUG_ARG("prev = %p", prev); ++ p->ipf_prev = prev; ++ p->ipf_next = prev->ipf_next; ++ ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; ++ prev->ipf_next = p; ++} ++ ++/* ++ * To ip_enq as remque is to insque. ++ */ ++static void ip_deq(register struct ipasfrag *p) ++{ ++ ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; ++ ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; ++} ++ ++/* ++ * IP timer processing; ++ * if a timer expires on a reassembly ++ * queue, discard it. ++ */ ++void ip_slowtimo(Slirp *slirp) ++{ ++ struct qlink *l; ++ ++ DEBUG_CALL("ip_slowtimo"); ++ ++ l = slirp->ipq.ip_link.next; ++ ++ if (l == NULL) ++ return; ++ ++ while (l != &slirp->ipq.ip_link) { ++ struct ipq *fp = container_of(l, struct ipq, ip_link); ++ l = l->next; ++ if (--fp->ipq_ttl == 0) { ++ ip_freef(slirp, fp); ++ } ++ } ++} ++ ++/* ++ * Strip out IP options, at higher ++ * level protocol in the kernel. ++ * Second argument is buffer to which options ++ * will be moved, and return value is their length. ++ * (XXX) should be deleted; last arg currently ignored. ++ */ ++void ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) ++{ ++ register int i; ++ struct ip *ip = mtod(m, struct ip *); ++ register char *opts; ++ int olen; ++ ++ olen = (ip->ip_hl << 2) - sizeof(struct ip); ++ opts = (char *)(ip + 1); ++ i = m->m_len - (sizeof(struct ip) + olen); ++ memcpy(opts, opts + olen, (unsigned)i); ++ m->m_len -= olen; ++ ++ ip->ip_hl = sizeof(struct ip) >> 2; ++} +diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c +new file mode 100644 +index 0000000..f820359 +--- /dev/null ++++ b/slirp/src/ip_output.c +@@ -0,0 +1,169 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 ++ * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF_THRESH 10 ++ ++/* ++ * IP output. The packet in mbuf chain m contains a skeletal IP ++ * header (with len, off, ttl, proto, tos, src, dst). ++ * The mbuf chain containing the packet will be freed. ++ * The mbuf opt, if present, will not be freed. ++ */ ++int ip_output(struct socket *so, struct mbuf *m0) ++{ ++ Slirp *slirp = m0->slirp; ++ register struct ip *ip; ++ register struct mbuf *m = m0; ++ register int hlen = sizeof(struct ip); ++ int len, off, error = 0; ++ ++ DEBUG_CALL("ip_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m0 = %p", m0); ++ ++ ip = mtod(m, struct ip *); ++ /* ++ * Fill in IP header. ++ */ ++ ip->ip_v = IPVERSION; ++ ip->ip_off &= IP_DF; ++ ip->ip_id = htons(slirp->ip_id++); ++ ip->ip_hl = hlen >> 2; ++ ++ /* ++ * If small enough for interface, can just send directly. ++ */ ++ if ((uint16_t)ip->ip_len <= IF_MTU) { ++ ip->ip_len = htons((uint16_t)ip->ip_len); ++ ip->ip_off = htons((uint16_t)ip->ip_off); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ ++ if_output(so, m); ++ goto done; ++ } ++ ++ /* ++ * Too large for interface; fragment if possible. ++ * Must be able to put at least 8 bytes per fragment. ++ */ ++ if (ip->ip_off & IP_DF) { ++ error = -1; ++ goto bad; ++ } ++ ++ len = (IF_MTU - hlen) & ~7; /* ip databytes per packet */ ++ if (len < 8) { ++ error = -1; ++ goto bad; ++ } ++ ++ { ++ int mhlen, firstlen = len; ++ struct mbuf **mnext = &m->m_nextpkt; ++ ++ /* ++ * Loop through length of segment after first fragment, ++ * make new header and copy data of each part and link onto chain. ++ */ ++ m0 = m; ++ mhlen = sizeof(struct ip); ++ for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { ++ register struct ip *mhip; ++ m = m_get(slirp); ++ if (m == NULL) { ++ error = -1; ++ goto sendorfree; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ mhip = mtod(m, struct ip *); ++ *mhip = *ip; ++ ++ m->m_len = mhlen; ++ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); ++ if (ip->ip_off & IP_MF) ++ mhip->ip_off |= IP_MF; ++ if (off + len >= (uint16_t)ip->ip_len) ++ len = (uint16_t)ip->ip_len - off; ++ else ++ mhip->ip_off |= IP_MF; ++ mhip->ip_len = htons((uint16_t)(len + mhlen)); ++ ++ if (m_copy(m, m0, off, len) < 0) { ++ error = -1; ++ goto sendorfree; ++ } ++ ++ mhip->ip_off = htons((uint16_t)mhip->ip_off); ++ mhip->ip_sum = 0; ++ mhip->ip_sum = cksum(m, mhlen); ++ *mnext = m; ++ mnext = &m->m_nextpkt; ++ } ++ /* ++ * Update first fragment by trimming what's been copied out ++ * and updating header, then send each fragment (in order). ++ */ ++ m = m0; ++ m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); ++ ip->ip_len = htons((uint16_t)m->m_len); ++ ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ sendorfree: ++ for (m = m0; m; m = m0) { ++ m0 = m->m_nextpkt; ++ m->m_nextpkt = NULL; ++ if (error == 0) ++ if_output(so, m); ++ else ++ m_free(m); ++ } ++ } ++ ++done: ++ return (error); ++ ++bad: ++ m_free(m0); ++ goto done; ++} +diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in +new file mode 100644 +index 0000000..59f7a46 +--- /dev/null ++++ b/slirp/src/libslirp-version.h.in +@@ -0,0 +1,23 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_VERSION_H_ ++#define LIBSLIRP_VERSION_H_ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define SLIRP_MAJOR_VERSION @SLIRP_MAJOR_VERSION@ ++#define SLIRP_MINOR_VERSION @SLIRP_MINOR_VERSION@ ++#define SLIRP_MICRO_VERSION @SLIRP_MICRO_VERSION@ ++ ++#define SLIRP_CHECK_VERSION(major,minor,micro) \ ++ (SLIRP_MAJOR_VERSION > (major) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION > (minor)) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION == (minor) && \ ++ SLIRP_MICRO_VERSION >= (micro))) ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_VERSION_H_ */ +diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h +new file mode 100644 +index 0000000..9b2f611 +--- /dev/null ++++ b/slirp/src/libslirp.h +@@ -0,0 +1,119 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_H ++#define LIBSLIRP_H ++ ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#else ++#include ++#include ++#endif ++ ++#include "libslirp-version.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++typedef struct Slirp Slirp; ++ ++enum { ++ SLIRP_POLL_IN = 1 << 0, ++ SLIRP_POLL_OUT = 1 << 1, ++ SLIRP_POLL_PRI = 1 << 2, ++ SLIRP_POLL_ERR = 1 << 3, ++ SLIRP_POLL_HUP = 1 << 4, ++}; ++ ++typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); ++typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); ++typedef void (*SlirpTimerCb)(void *opaque); ++typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); ++typedef int (*SlirpGetREventsCb)(int idx, void *opaque); ++ ++/* ++ * Callbacks from slirp ++ */ ++typedef struct SlirpCb { ++ /* ++ * Send an ethernet frame to the guest network. The opaque ++ * parameter is the one given to slirp_init(). The function ++ * doesn't need to send all the data and may return m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; ++ slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; ++} ++ ++void m_cleanup(Slirp *slirp) ++{ ++ struct mbuf *m, *next; ++ ++ m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ while ((struct quehead *)m != &slirp->m_usedlist) { ++ next = m->m_next; ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ g_free(m); ++ m = next; ++ } ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ while ((struct quehead *)m != &slirp->m_freelist) { ++ next = m->m_next; ++ g_free(m); ++ m = next; ++ } ++} ++ ++/* ++ * Get an mbuf from the free list, if there are none ++ * allocate one ++ * ++ * Because fragmentation can occur if we alloc new mbufs and ++ * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, ++ * which tells m_free to actually g_free() it ++ */ ++struct mbuf *m_get(Slirp *slirp) ++{ ++ register struct mbuf *m; ++ int flags = 0; ++ ++ DEBUG_CALL("m_get"); ++ ++ if (slirp->m_freelist.qh_link == &slirp->m_freelist) { ++ m = g_malloc(SLIRP_MSIZE); ++ slirp->mbuf_alloced++; ++ if (slirp->mbuf_alloced > MBUF_THRESH) ++ flags = M_DOFREE; ++ m->slirp = slirp; ++ } else { ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ remque(m); ++ } ++ ++ /* Insert it in the used list */ ++ insque(m, &slirp->m_usedlist); ++ m->m_flags = (flags | M_USEDLIST); ++ ++ /* Initialise it */ ++ m->m_size = SLIRP_MSIZE - offsetof(struct mbuf, m_dat); ++ m->m_data = m->m_dat; ++ m->m_len = 0; ++ m->m_nextpkt = NULL; ++ m->m_prevpkt = NULL; ++ m->resolution_requested = false; ++ m->expiration_date = (uint64_t)-1; ++ DEBUG_ARG("m = %p", m); ++ return m; ++} ++ ++void m_free(struct mbuf *m) ++{ ++ DEBUG_CALL("m_free"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (m) { ++ /* Remove from m_usedlist */ ++ if (m->m_flags & M_USEDLIST) ++ remque(m); ++ ++ /* If it's M_EXT, free() it */ ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ /* ++ * Either free() it or put it on the free list ++ */ ++ if (m->m_flags & M_DOFREE) { ++ m->slirp->mbuf_alloced--; ++ g_free(m); ++ } else if ((m->m_flags & M_FREELIST) == 0) { ++ insque(m, &m->slirp->m_freelist); ++ m->m_flags = M_FREELIST; /* Clobber other flags */ ++ } ++ } /* if(m) */ ++} ++ ++/* ++ * Copy data from one mbuf to the end of ++ * the other.. if result is too big for one mbuf, allocate ++ * an M_EXT data segment ++ */ ++void m_cat(struct mbuf *m, struct mbuf *n) ++{ ++ /* ++ * If there's no room, realloc ++ */ ++ if (M_FREEROOM(m) < n->m_len) ++ m_inc(m, m->m_len + n->m_len); ++ ++ memcpy(m->m_data + m->m_len, n->m_data, n->m_len); ++ m->m_len += n->m_len; ++ ++ m_free(n); ++} ++ ++ ++/* make m 'size' bytes large from m_data */ ++void m_inc(struct mbuf *m, int size) ++{ ++ int gapsize; ++ ++ /* some compilers throw up on gotos. This one we can fake. */ ++ if (M_ROOM(m) > size) { ++ return; ++ } ++ ++ if (m->m_flags & M_EXT) { ++ gapsize = m->m_data - m->m_ext; ++ m->m_ext = g_realloc(m->m_ext, size + gapsize); ++ } else { ++ gapsize = m->m_data - m->m_dat; ++ m->m_ext = g_malloc(size + gapsize); ++ memcpy(m->m_ext, m->m_dat, m->m_size); ++ m->m_flags |= M_EXT; ++ } ++ ++ m->m_data = m->m_ext + gapsize; ++ m->m_size = size + gapsize; ++} ++ ++ ++void m_adj(struct mbuf *m, int len) ++{ ++ if (m == NULL) ++ return; ++ if (len >= 0) { ++ /* Trim from head */ ++ m->m_data += len; ++ m->m_len -= len; ++ } else { ++ /* Trim from tail */ ++ len = -len; ++ m->m_len -= len; ++ } ++} ++ ++ ++/* ++ * Copy len bytes from m, starting off bytes into n ++ */ ++int m_copy(struct mbuf *n, struct mbuf *m, int off, int len) ++{ ++ if (len > M_FREEROOM(n)) ++ return -1; ++ ++ memcpy((n->m_data + n->m_len), (m->m_data + off), len); ++ n->m_len += len; ++ return 0; ++} ++ ++ ++/* ++ * Given a pointer into an mbuf, return the mbuf ++ * XXX This is a kludge, I should eliminate the need for it ++ * Fortunately, it's not used often ++ */ ++struct mbuf *dtom(Slirp *slirp, void *dat) ++{ ++ struct mbuf *m; ++ ++ DEBUG_CALL("dtom"); ++ DEBUG_ARG("dat = %p", dat); ++ ++ /* bug corrected for M_EXT buffers */ ++ for (m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ (struct quehead *)m != &slirp->m_usedlist; m = m->m_next) { ++ if (m->m_flags & M_EXT) { ++ if ((char *)dat >= m->m_ext && (char *)dat < (m->m_ext + m->m_size)) ++ return m; ++ } else { ++ if ((char *)dat >= m->m_dat && (char *)dat < (m->m_dat + m->m_size)) ++ return m; ++ } ++ } ++ ++ DEBUG_ERROR("dtom failed"); ++ ++ return (struct mbuf *)0; ++} +diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h +new file mode 100644 +index 0000000..546e785 +--- /dev/null ++++ b/slirp/src/mbuf.h +@@ -0,0 +1,127 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 ++ * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp ++ */ ++ ++#ifndef MBUF_H ++#define MBUF_H ++ ++/* ++ * Macros for type conversion ++ * mtod(m,t) - convert mbuf pointer to data pointer of correct type ++ */ ++#define mtod(m, t) ((t)(m)->m_data) ++ ++/* XXX About mbufs for slirp: ++ * Only one mbuf is ever used in a chain, for each "cell" of data. ++ * m_nextpkt points to the next packet, if fragmented. ++ * If the data is too large, the M_EXT is used, and a larger block ++ * is alloced. Therefore, m_free[m] must check for M_EXT and if set ++ * free the m_ext. This is inefficient memory-wise, but who cares. ++ */ ++ ++/* ++ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if ++ * M_EXT is set, m_dat otherwise) and the in-use data: ++ * ++ * |--gapsize----->|---m_len-------> ++ * |----------m_size------------------------------> ++ * |----M_ROOM--------------------> ++ * |-M_FREEROOM--> ++ * ++ * ^ ^ ^ ++ * m_dat/m_ext m_data end of buffer ++ */ ++ ++/* ++ * How much room is in the mbuf, from m_data to the end of the mbuf ++ */ ++#define M_ROOM(m) \ ++ ((m->m_flags & M_EXT) ? (((m)->m_ext + (m)->m_size) - (m)->m_data) : \ ++ (((m)->m_dat + (m)->m_size) - (m)->m_data)) ++ ++/* ++ * How much free room there is ++ */ ++#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) ++ ++struct mbuf { ++ /* XXX should union some of these! */ ++ /* header at beginning of each mbuf: */ ++ struct mbuf *m_next; /* Linked list of mbufs */ ++ struct mbuf *m_prev; ++ struct mbuf *m_nextpkt; /* Next packet in queue/record */ ++ struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ ++ int m_flags; /* Misc flags */ ++ ++ int m_size; /* Size of mbuf, from m_dat or m_ext */ ++ struct socket *m_so; ++ ++ char *m_data; /* Current location of data */ ++ int m_len; /* Amount of data in this mbuf, from m_data */ ++ ++ Slirp *slirp; ++ bool resolution_requested; ++ uint64_t expiration_date; ++ char *m_ext; ++ /* start of dynamic buffer area, must be last element */ ++ char m_dat[]; ++}; ++ ++#define ifq_prev m_prev ++#define ifq_next m_next ++#define ifs_prev m_prevpkt ++#define ifs_next m_nextpkt ++#define ifq_so m_so ++ ++#define M_EXT 0x01 /* m_ext points to more (malloced) data */ ++#define M_FREELIST 0x02 /* mbuf is on free list */ ++#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ ++#define M_DOFREE \ ++ 0x08 /* when m_free is called on the mbuf, free() \ ++ * it rather than putting it on the free list */ ++ ++void m_init(Slirp *); ++void m_cleanup(Slirp *slirp); ++struct mbuf *m_get(Slirp *); ++void m_free(struct mbuf *); ++void m_cat(register struct mbuf *, register struct mbuf *); ++void m_inc(struct mbuf *, int); ++void m_adj(struct mbuf *, int); ++int m_copy(struct mbuf *, struct mbuf *, int, int); ++struct mbuf *dtom(Slirp *, void *); ++ ++static inline void ifs_init(struct mbuf *ifm) ++{ ++ ifm->ifs_next = ifm->ifs_prev = ifm; ++} ++ ++#endif +diff --git a/slirp/src/misc.c b/slirp/src/misc.c +new file mode 100644 +index 0000000..6675acc +--- /dev/null ++++ b/slirp/src/misc.c +@@ -0,0 +1,298 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++inline void insque(void *a, void *b) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ register struct quehead *head = (struct quehead *)b; ++ element->qh_link = head->qh_link; ++ head->qh_link = (struct quehead *)element; ++ element->qh_rlink = (struct quehead *)head; ++ ((struct quehead *)(element->qh_link))->qh_rlink = ++ (struct quehead *)element; ++} ++ ++inline void remque(void *a) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; ++ ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; ++ element->qh_rlink = NULL; ++} ++ ++/* TODO: IPv6 */ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = g_new0(struct gfwd_list, 1); ++ ++ f->write_cb = write_cb; ++ f->opaque = opaque; ++ f->ex_fport = port; ++ f->ex_addr = addr; ++ f->ex_next = *ex_ptr; ++ *ex_ptr = f; ++ ++ return f; ++} ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_exec = g_strdup(cmdline); ++ ++ return f; ++} ++ ++static int slirp_socketpair_with_oob(int sv[2]) ++{ ++ struct sockaddr_in addr = { ++ .sin_family = AF_INET, ++ .sin_port = 0, ++ .sin_addr.s_addr = INADDR_ANY, ++ }; ++ socklen_t addrlen = sizeof(addr); ++ int ret, s; ++ ++ sv[1] = -1; ++ s = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || ++ listen(s, 1) < 0 || ++ getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { ++ goto err; ++ } ++ ++ sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (sv[1] < 0) { ++ goto err; ++ } ++ /* ++ * This connect won't block because we've already listen()ed on ++ * the server end (even though we won't accept() the connection ++ * until later on). ++ */ ++ do { ++ ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); ++ } while (ret < 0 && errno == EINTR); ++ if (ret < 0) { ++ goto err; ++ } ++ ++ do { ++ sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); ++ } while (sv[0] < 0 && errno == EINTR); ++ if (sv[0] < 0) { ++ goto err; ++ } ++ ++ closesocket(s); ++ return 0; ++ ++err: ++ g_critical("slirp_socketpair(): %s", strerror(errno)); ++ if (s >= 0) { ++ closesocket(s); ++ } ++ if (sv[1] >= 0) { ++ closesocket(sv[1]); ++ } ++ return -1; ++} ++ ++static void fork_exec_child_setup(gpointer data) ++{ ++#ifndef _WIN32 ++ setsid(); ++#endif ++} ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ ++#if !GLIB_CHECK_VERSION(2, 58, 0) ++typedef struct SlirpGSpawnFds { ++ GSpawnChildSetupFunc child_setup; ++ gpointer user_data; ++ gint stdin_fd; ++ gint stdout_fd; ++ gint stderr_fd; ++} SlirpGSpawnFds; ++ ++static inline void slirp_gspawn_fds_setup(gpointer user_data) ++{ ++ SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; ++ ++ dup2(q->stdin_fd, 0); ++ dup2(q->stdout_fd, 1); ++ dup2(q->stderr_fd, 2); ++ q->child_setup(q->user_data); ++} ++#endif ++ ++static inline gboolean ++g_spawn_async_with_fds_slirp(const gchar *working_directory, gchar **argv, ++ gchar **envp, GSpawnFlags flags, ++ GSpawnChildSetupFunc child_setup, ++ gpointer user_data, GPid *child_pid, gint stdin_fd, ++ gint stdout_fd, gint stderr_fd, GError **error) ++{ ++#if GLIB_CHECK_VERSION(2, 58, 0) ++ return g_spawn_async_with_fds(working_directory, argv, envp, flags, ++ child_setup, user_data, child_pid, stdin_fd, ++ stdout_fd, stderr_fd, error); ++#else ++ SlirpGSpawnFds setup = { ++ .child_setup = child_setup, ++ .user_data = user_data, ++ .stdin_fd = stdin_fd, ++ .stdout_fd = stdout_fd, ++ .stderr_fd = stderr_fd, ++ }; ++ ++ return g_spawn_async(working_directory, argv, envp, flags, ++ slirp_gspawn_fds_setup, &setup, child_pid, error); ++#endif ++} ++ ++#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ ++ g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) ++ ++#pragma GCC diagnostic pop ++ ++int fork_exec(struct socket *so, const char *ex) ++{ ++ GError *err = NULL; ++ char **argv; ++ int opt, sp[2]; ++ ++ DEBUG_CALL("fork_exec"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ex = %p", ex); ++ ++ if (slirp_socketpair_with_oob(sp) < 0) { ++ return 0; ++ } ++ ++ argv = g_strsplit(ex, " ", -1); ++ g_spawn_async_with_fds(NULL /* cwd */, argv, NULL /* env */, ++ G_SPAWN_SEARCH_PATH, fork_exec_child_setup, ++ NULL /* data */, NULL /* child_pid */, sp[1], sp[1], ++ sp[1], &err); ++ g_strfreev(argv); ++ ++ if (err) { ++ g_critical("fork_exec: %s", err->message); ++ g_error_free(err); ++ closesocket(sp[0]); ++ closesocket(sp[1]); ++ return 0; ++ } ++ ++ so->s = sp[0]; ++ closesocket(sp[1]); ++ slirp_socket_set_fast_reuse(so->s); ++ opt = 1; ++ setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return 1; ++} ++ ++char *slirp_connection_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ const char *const tcpstates[] = { ++ [TCPS_CLOSED] = "CLOSED", [TCPS_LISTEN] = "LISTEN", ++ [TCPS_SYN_SENT] = "SYN_SENT", [TCPS_SYN_RECEIVED] = "SYN_RCVD", ++ [TCPS_ESTABLISHED] = "ESTABLISHED", [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", ++ [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", [TCPS_CLOSING] = "CLOSING", ++ [TCPS_LAST_ACK] = "LAST_ACK", [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", ++ [TCPS_TIME_WAIT] = "TIME_WAIT", ++ }; ++ struct in_addr dst_addr; ++ struct sockaddr_in src; ++ socklen_t src_len; ++ uint16_t dst_port; ++ struct socket *so; ++ const char *state; ++ char buf[20]; ++ ++ g_string_append_printf(str, ++ " Protocol[State] FD Source Address Port " ++ "Dest. Address Port RecvQ SendQ\n"); ++ ++ /* TODO: IPv6 */ ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ state = "HOST_FORWARD"; ++ } else if (so->so_tcpcb) { ++ state = tcpstates[so->so_tcpcb->t_state]; ++ } else { ++ state = "NONE"; ++ } ++ if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ snprintf(buf, sizeof(buf), " TCP[%s]", state); ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ snprintf(buf, sizeof(buf), " UDP[HOST_FORWARD]"); ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ snprintf(buf, sizeof(buf), " UDP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { ++ snprintf(buf, sizeof(buf), " ICMP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ dst_addr = so->so_faddr; ++ g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*"); ++ g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), ++ so->so_rcv.sb_cc, so->so_snd.sb_cc); ++ } ++ ++ return g_string_free(str, FALSE); ++} +diff --git a/slirp/src/misc.h b/slirp/src/misc.h +new file mode 100644 +index 0000000..ccf8cf0 +--- /dev/null ++++ b/slirp/src/misc.h +@@ -0,0 +1,63 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef MISC_H ++#define MISC_H ++ ++#include "libslirp.h" ++ ++struct gfwd_list { ++ SlirpWriteCb write_cb; ++ void *opaque; ++ struct in_addr ex_addr; /* Server address */ ++ int ex_fport; /* Port to telnet to */ ++ char *ex_exec; /* Command line of what to exec */ ++ struct gfwd_list *ex_next; ++}; ++ ++#define EMU_NONE 0x0 ++ ++/* TCP emulations */ ++#define EMU_CTL 0x1 ++#define EMU_FTP 0x2 ++#define EMU_KSH 0x3 ++#define EMU_IRC 0x4 ++#define EMU_REALAUDIO 0x5 ++#define EMU_RLOGIN 0x6 ++#define EMU_IDENT 0x7 ++ ++#define EMU_NOCONNECT 0x10 /* Don't connect */ ++ ++struct tos_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++}; ++ ++struct emu_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++ struct emu_t *next; ++}; ++ ++struct slirp_quehead { ++ struct slirp_quehead *qh_link; ++ struct slirp_quehead *qh_rlink; ++}; ++ ++void slirp_insque(void *, void *); ++void slirp_remque(void *); ++int fork_exec(struct socket *so, const char *ex); ++ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port); ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port); ++ ++#endif +diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h +new file mode 100644 +index 0000000..7795ad8 +--- /dev/null ++++ b/slirp/src/ncsi-pkt.h +@@ -0,0 +1,445 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright Gavin Shan, IBM Corporation 2016. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef NCSI_PKT_H ++#define NCSI_PKT_H ++ ++/* from linux/net/ncsi/ncsi-pkt.h */ ++#define __be32 uint32_t ++#define __be16 uint16_t ++ ++struct ncsi_pkt_hdr { ++ unsigned char mc_id; /* Management controller ID */ ++ unsigned char revision; /* NCSI version - 0x01 */ ++ unsigned char reserved; /* Reserved */ ++ unsigned char id; /* Packet sequence number */ ++ unsigned char type; /* Packet type */ ++ unsigned char channel; /* Network controller ID */ ++ __be16 length; /* Payload length */ ++ __be32 reserved1[2]; /* Reserved */ ++}; ++ ++struct ncsi_cmd_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++}; ++ ++struct ncsi_rsp_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ __be16 code; /* Response code */ ++ __be16 reason; /* Response reason */ ++}; ++ ++struct ncsi_aen_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ unsigned char reserved2[3]; /* Reserved */ ++ unsigned char type; /* AEN packet type */ ++}; ++ ++/* NCSI common command packet */ ++struct ncsi_cmd_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[26]; ++}; ++ ++struct ncsi_rsp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Select Package */ ++struct ncsi_cmd_sp_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char hw_arbitration; /* HW arbitration */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Disable Channel */ ++struct ncsi_cmd_dc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char ald; /* Allow link down */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Reset Channel */ ++struct ncsi_cmd_rc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 reserved; /* Reserved */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* AEN Enable */ ++struct ncsi_cmd_ae_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mc_id; /* MC ID */ ++ __be32 mode; /* AEN working mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* Set Link */ ++struct ncsi_cmd_sl_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Link working mode */ ++ __be32 oem_mode; /* OEM link mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* Set VLAN Filter */ ++struct ncsi_cmd_svf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be16 reserved; /* Reserved */ ++ __be16 vlan; /* VLAN ID */ ++ __be16 reserved1; /* Reserved */ ++ unsigned char index; /* VLAN table index */ ++ unsigned char enable; /* Enable or disable */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++}; ++ ++/* Enable VLAN */ ++struct ncsi_cmd_ev_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* VLAN filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Set MAC Address */ ++struct ncsi_cmd_sma_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char mac[6]; /* MAC address */ ++ unsigned char index; /* MAC table index */ ++ unsigned char at_e; /* Addr type and operation */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* Enable Broadcast Filter */ ++struct ncsi_cmd_ebf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Enable Global Multicast Filter */ ++struct ncsi_cmd_egmf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Global MC mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Set NCSI Flow Control */ ++struct ncsi_cmd_snfc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* Flow control mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Get Link Status */ ++struct ncsi_rsp_gls_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Link status */ ++ __be32 other; /* Other indications */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; ++ unsigned char pad[10]; ++}; ++ ++/* Get Version ID */ ++struct ncsi_rsp_gvi_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 ncsi_version; /* NCSI version */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char alpha2; /* NCSI version */ ++ unsigned char fw_name[12]; /* f/w name string */ ++ __be32 fw_version; /* f/w version */ ++ __be16 pci_ids[4]; /* PCI IDs */ ++ __be32 mf_id; /* Manufacture ID */ ++ __be32 checksum; ++}; ++ ++/* Get Capabilities */ ++struct ncsi_rsp_gc_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cap; /* Capabilities */ ++ __be32 bc_cap; /* Broadcast cap */ ++ __be32 mc_cap; /* Multicast cap */ ++ __be32 buf_cap; /* Buffering cap */ ++ __be32 aen_cap; /* AEN cap */ ++ unsigned char vlan_cnt; /* VLAN filter count */ ++ unsigned char mixed_cnt; /* Mix filter count */ ++ unsigned char mc_cnt; /* MC filter count */ ++ unsigned char uc_cnt; /* UC filter count */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char channel_cnt; /* Channel count */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get Parameters */ ++struct ncsi_rsp_gp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char mac_cnt; /* Number of MAC addr */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char mac_enable; /* MAC addr enable flags */ ++ unsigned char vlan_cnt; /* VLAN tag count */ ++ unsigned char reserved1; /* Reserved */ ++ __be16 vlan_enable; /* VLAN tag enable flags */ ++ __be32 link_mode; /* Link setting */ ++ __be32 bc_mode; /* BC filter mode */ ++ __be32 valid_modes; /* Valid mode parameters */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char fc_mode; /* Flow control mode */ ++ unsigned char reserved2[2]; /* Reserved */ ++ __be32 aen_mode; /* AEN mode */ ++ unsigned char mac[6]; /* Supported MAC addr */ ++ __be16 vlan; /* Supported VLAN tags */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get Controller Packet Statistics */ ++struct ncsi_rsp_gcps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cnt_hi; /* Counter cleared */ ++ __be32 cnt_lo; /* Counter cleared */ ++ __be32 rx_bytes; /* Rx bytes */ ++ __be32 tx_bytes; /* Tx bytes */ ++ __be32 rx_uc_pkts; /* Rx UC packets */ ++ __be32 rx_mc_pkts; /* Rx MC packets */ ++ __be32 rx_bc_pkts; /* Rx BC packets */ ++ __be32 tx_uc_pkts; /* Tx UC packets */ ++ __be32 tx_mc_pkts; /* Tx MC packets */ ++ __be32 tx_bc_pkts; /* Tx BC packets */ ++ __be32 fcs_err; /* FCS errors */ ++ __be32 align_err; /* Alignment errors */ ++ __be32 false_carrier; /* False carrier detection */ ++ __be32 runt_pkts; /* Rx runt packets */ ++ __be32 jabber_pkts; /* Rx jabber packets */ ++ __be32 rx_pause_xon; /* Rx pause XON frames */ ++ __be32 rx_pause_xoff; /* Rx XOFF frames */ ++ __be32 tx_pause_xon; /* Tx XON frames */ ++ __be32 tx_pause_xoff; /* Tx XOFF frames */ ++ __be32 tx_s_collision; /* Single collision frames */ ++ __be32 tx_m_collision; /* Multiple collision frames */ ++ __be32 l_collision; /* Late collision frames */ ++ __be32 e_collision; /* Excessive collision frames */ ++ __be32 rx_ctl_frames; /* Rx control frames */ ++ __be32 rx_64_frames; /* Rx 64-bytes frames */ ++ __be32 rx_127_frames; /* Rx 65-127 bytes frames */ ++ __be32 rx_255_frames; /* Rx 128-255 bytes frames */ ++ __be32 rx_511_frames; /* Rx 256-511 bytes frames */ ++ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ ++ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ ++ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ ++ __be32 tx_64_frames; /* Tx 64-bytes frames */ ++ __be32 tx_127_frames; /* Tx 65-127 bytes frames */ ++ __be32 tx_255_frames; /* Tx 128-255 bytes frames */ ++ __be32 tx_511_frames; /* Tx 256-511 bytes frames */ ++ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ ++ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ ++ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ ++ __be32 rx_valid_bytes; /* Rx valid bytes */ ++ __be32 rx_runt_pkts; /* Rx error runt packets */ ++ __be32 rx_jabber_pkts; /* Rx error jabber packets */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get NCSI Statistics */ ++struct ncsi_rsp_gns_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 rx_cmds; /* Rx NCSI commands */ ++ __be32 dropped_cmds; /* Dropped commands */ ++ __be32 cmd_type_errs; /* Command type errors */ ++ __be32 cmd_csum_errs; /* Command checksum errors */ ++ __be32 rx_pkts; /* Rx NCSI packets */ ++ __be32 tx_pkts; /* Tx NCSI packets */ ++ __be32 tx_aen_pkts; /* Tx AEN packets */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get NCSI Pass-through Statistics */ ++struct ncsi_rsp_gnpts_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 tx_pkts; /* Tx packets */ ++ __be32 tx_dropped; /* Tx dropped packets */ ++ __be32 tx_channel_err; /* Tx channel errors */ ++ __be32 tx_us_err; /* Tx undersize errors */ ++ __be32 rx_pkts; /* Rx packets */ ++ __be32 rx_dropped; /* Rx dropped packets */ ++ __be32 rx_channel_err; /* Rx channel errors */ ++ __be32 rx_us_err; /* Rx undersize errors */ ++ __be32 rx_os_err; /* Rx oversize errors */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get package status */ ++struct ncsi_rsp_gps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Hardware arbitration status */ ++ __be32 checksum; ++}; ++ ++/* Get package UUID */ ++struct ncsi_rsp_gpuuid_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char uuid[16]; /* UUID */ ++ __be32 checksum; ++}; ++ ++/* AEN: Link State Change */ ++struct ncsi_aen_lsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Link status */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++}; ++ ++/* AEN: Configuration Required */ ++struct ncsi_aen_cr_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* AEN: Host Network Controller Driver Status Change */ ++struct ncsi_aen_hncdsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* NCSI packet revision */ ++#define NCSI_PKT_REVISION 0x01 ++ ++/* NCSI packet commands */ ++#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ ++#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ ++#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ ++#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ ++#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ ++#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ ++#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ ++#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ ++#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ ++#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ ++#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ ++#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ ++#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ ++#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ ++#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ ++#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ ++#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ ++#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ ++#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ ++#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ ++#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ ++#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ ++#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ ++#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ ++#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ ++#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ ++#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ ++#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ ++#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ ++#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ ++ ++/* NCSI packet responses */ ++#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) ++#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) ++#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) ++#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) ++#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) ++#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) ++#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) ++#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) ++#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) ++#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) ++#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) ++#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) ++#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) ++#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) ++#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) ++#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) ++#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) ++#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) ++#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) ++#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) ++#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) ++#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) ++#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) ++#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) ++#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) ++#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) ++#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) ++#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) ++#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) ++#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) ++ ++/* NCSI response code/reason */ ++#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ ++#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ ++#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ ++#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ ++#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ ++#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ ++#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ ++#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ ++#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ ++#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ ++#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ ++ ++/* NCSI AEN packet type */ ++#define NCSI_PKT_AEN 0xFF /* AEN Packet */ ++#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ ++#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ ++#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ ++ ++#endif /* NCSI_PKT_H */ +diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c +new file mode 100644 +index 0000000..6864b73 +--- /dev/null ++++ b/slirp/src/ncsi.c +@@ -0,0 +1,192 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * NC-SI (Network Controller Sideband Interface) "echo" model ++ * ++ * Copyright (C) 2016-2018 IBM Corp. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include "slirp.h" ++ ++#include "ncsi-pkt.h" ++ ++static uint32_t ncsi_calculate_checksum(uint16_t *data, int len) ++{ ++ uint32_t checksum = 0; ++ int i; ++ ++ /* ++ * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet ++ * payload interpreted as a series of 16-bit unsigned integer values. ++ */ ++ for (i = 0; i < len; i++) { ++ checksum += htons(data[i]); ++ } ++ ++ checksum = (~checksum + 1); ++ return checksum; ++} ++ ++/* Get Capabilities */ ++static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *)rnh; ++ ++ rsp->cap = htonl(~0); ++ rsp->bc_cap = htonl(~0); ++ rsp->mc_cap = htonl(~0); ++ rsp->buf_cap = htonl(~0); ++ rsp->aen_cap = htonl(~0); ++ rsp->vlan_mode = 0xff; ++ rsp->uc_cnt = 2; ++ return 0; ++} ++ ++/* Get Link status */ ++static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *)rnh; ++ ++ rsp->status = htonl(0x1); ++ return 0; ++} ++ ++/* Get Parameters */ ++static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *)rnh; ++ ++ /* no MAC address filters or VLAN filters on the channel */ ++ rsp->mac_cnt = 0; ++ rsp->mac_enable = 0; ++ rsp->vlan_cnt = 0; ++ rsp->vlan_enable = 0; ++ ++ return 0; ++} ++ ++static const struct ncsi_rsp_handler { ++ unsigned char type; ++ int payload; ++ int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); ++} ncsi_rsp_handlers[] = { { NCSI_PKT_RSP_CIS, 4, NULL }, ++ { NCSI_PKT_RSP_SP, 4, NULL }, ++ { NCSI_PKT_RSP_DP, 4, NULL }, ++ { NCSI_PKT_RSP_EC, 4, NULL }, ++ { NCSI_PKT_RSP_DC, 4, NULL }, ++ { NCSI_PKT_RSP_RC, 4, NULL }, ++ { NCSI_PKT_RSP_ECNT, 4, NULL }, ++ { NCSI_PKT_RSP_DCNT, 4, NULL }, ++ { NCSI_PKT_RSP_AE, 4, NULL }, ++ { NCSI_PKT_RSP_SL, 4, NULL }, ++ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, ++ { NCSI_PKT_RSP_SVF, 4, NULL }, ++ { NCSI_PKT_RSP_EV, 4, NULL }, ++ { NCSI_PKT_RSP_DV, 4, NULL }, ++ { NCSI_PKT_RSP_SMA, 4, NULL }, ++ { NCSI_PKT_RSP_EBF, 4, NULL }, ++ { NCSI_PKT_RSP_DBF, 4, NULL }, ++ { NCSI_PKT_RSP_EGMF, 4, NULL }, ++ { NCSI_PKT_RSP_DGMF, 4, NULL }, ++ { NCSI_PKT_RSP_SNFC, 4, NULL }, ++ { NCSI_PKT_RSP_GVI, 40, NULL }, ++ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, ++ { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, ++ { NCSI_PKT_RSP_GCPS, 172, NULL }, ++ { NCSI_PKT_RSP_GNS, 172, NULL }, ++ { NCSI_PKT_RSP_GNPTS, 172, NULL }, ++ { NCSI_PKT_RSP_GPS, 8, NULL }, ++ { NCSI_PKT_RSP_OEM, 0, NULL }, ++ { NCSI_PKT_RSP_PLDM, 0, NULL }, ++ { NCSI_PKT_RSP_GPUUID, 20, NULL } }; ++ ++/* ++ * packet format : ncsi header + payload + checksum ++ */ ++#define NCSI_MAX_PAYLOAD 172 ++#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) ++ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct ncsi_pkt_hdr *nh = (struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); ++ uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; ++ struct ethhdr *reh = (struct ethhdr *)ncsi_reply; ++ struct ncsi_rsp_pkt_hdr *rnh = ++ (struct ncsi_rsp_pkt_hdr *)(ncsi_reply + ETH_HLEN); ++ const struct ncsi_rsp_handler *handler = NULL; ++ int i; ++ int ncsi_rsp_len = sizeof(*nh); ++ uint32_t checksum; ++ uint32_t *pchecksum; ++ ++ memset(ncsi_reply, 0, sizeof(ncsi_reply)); ++ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memset(reh->h_source, 0xff, ETH_ALEN); ++ reh->h_proto = htons(ETH_P_NCSI); ++ ++ for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { ++ if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { ++ handler = &ncsi_rsp_handlers[i]; ++ break; ++ } ++ } ++ ++ rnh->common.mc_id = nh->mc_id; ++ rnh->common.revision = NCSI_PKT_REVISION; ++ rnh->common.id = nh->id; ++ rnh->common.type = nh->type + 0x80; ++ rnh->common.channel = nh->channel; ++ ++ if (handler) { ++ rnh->common.length = htons(handler->payload); ++ rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); ++ rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); ++ ++ if (handler->handler) { ++ /* TODO: handle errors */ ++ handler->handler(rnh); ++ } ++ ncsi_rsp_len += handler->payload; ++ } else { ++ rnh->common.length = 0; ++ rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); ++ rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); ++ } ++ ++ /* Add the optional checksum at the end of the frame. */ ++ checksum = ncsi_calculate_checksum((uint16_t *)rnh, ncsi_rsp_len); ++ pchecksum = (uint32_t *)((void *)rnh + ncsi_rsp_len); ++ *pchecksum = htonl(checksum); ++ ncsi_rsp_len += 4; ++ ++ slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); ++} +diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c +new file mode 100644 +index 0000000..110d6ea +--- /dev/null ++++ b/slirp/src/ndp_table.c +@@ -0,0 +1,87 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_add"); ++ DEBUG_ARG("ip = %s", addrstr); ++ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], ++ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { ++ /* Do not register multicast or unspecified addresses */ ++ DEBUG_CALL(" abort: do not register multicast or unspecified address"); ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ DEBUG_CALL(" already in table: update the entry"); ++ /* Update the entry */ ++ memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ DEBUG_CALL(" create new entry"); ++ ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; ++ memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, ethaddr, ++ ETH_ALEN); ++ ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; ++} ++ ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_search"); ++ DEBUG_ARG("ip = %s", addrstr); ++ ++ assert(!in6_zero(&ip_addr)); ++ ++ /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { ++ out_ethaddr[0] = 0x33; ++ out_ethaddr[1] = 0x33; ++ out_ethaddr[2] = ip_addr.s6_addr[12]; ++ out_ethaddr[3] = ip_addr.s6_addr[13]; ++ out_ethaddr[4] = ip_addr.s6_addr[14]; ++ out_ethaddr[5] = ip_addr.s6_addr[15]; ++ DEBUG_ARG("multicast addr = %02x:%02x:%02x:%02x:%02x:%02x", ++ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], ++ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); ++ return 1; ++ } ++ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ++ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], ++ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); ++ return 1; ++ } ++ } ++ ++ DEBUG_CALL(" ip not found in table"); ++ return 0; ++} +diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c +new file mode 100644 +index 0000000..abced48 +--- /dev/null ++++ b/slirp/src/sbuf.c +@@ -0,0 +1,186 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m); ++ ++void sbfree(struct sbuf *sb) ++{ ++ free(sb->sb_data); ++} ++ ++bool sbdrop(struct sbuf *sb, int num) ++{ ++ int limit = sb->sb_datalen / 2; ++ ++ /* ++ * We can only drop how much we have ++ * This should never succeed ++ */ ++ if (num > sb->sb_cc) ++ num = sb->sb_cc; ++ sb->sb_cc -= num; ++ sb->sb_rptr += num; ++ if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { ++ return true; ++ } ++ ++ return false; ++} ++ ++void sbreserve(struct sbuf *sb, int size) ++{ ++ if (sb->sb_data) { ++ /* Already alloced, realloc if necessary */ ++ if (sb->sb_datalen != size) { ++ sb->sb_wptr = sb->sb_rptr = sb->sb_data = ++ (char *)realloc(sb->sb_data, size); ++ sb->sb_cc = 0; ++ if (sb->sb_wptr) ++ sb->sb_datalen = size; ++ else ++ sb->sb_datalen = 0; ++ } ++ } else { ++ sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)malloc(size); ++ sb->sb_cc = 0; ++ if (sb->sb_wptr) ++ sb->sb_datalen = size; ++ else ++ sb->sb_datalen = 0; ++ } ++} ++ ++/* ++ * Try and write() to the socket, whatever doesn't get written ++ * append to the buffer... for a host with a fast net connection, ++ * this prevents an unnecessary copy of the data ++ * (the socket is non-blocking, so we won't hang) ++ */ ++void sbappend(struct socket *so, struct mbuf *m) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("sbappend"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m->m_len = %d", m->m_len); ++ ++ /* Shouldn't happen, but... e.g. foreign host closes connection */ ++ if (m->m_len <= 0) { ++ m_free(m); ++ return; ++ } ++ ++ /* ++ * If there is urgent data, call sosendoob ++ * if not all was sent, sowrite will take care of the rest ++ * (The rest of this function is just an optimisation) ++ */ ++ if (so->so_urgc) { ++ sbappendsb(&so->so_rcv, m); ++ m_free(m); ++ (void)sosendoob(so); ++ return; ++ } ++ ++ /* ++ * We only write if there's nothing in the buffer, ++ * ottherwise it'll arrive out of order, and hence corrupt ++ */ ++ if (!so->so_rcv.sb_cc) ++ ret = slirp_send(so, m->m_data, m->m_len, 0); ++ ++ if (ret <= 0) { ++ /* ++ * Nothing was written ++ * It's possible that the socket has closed, but ++ * we don't need to check because if it has closed, ++ * it will be detected in the normal way by soread() ++ */ ++ sbappendsb(&so->so_rcv, m); ++ } else if (ret != m->m_len) { ++ /* ++ * Something was written, but not everything.. ++ * sbappendsb the rest ++ */ ++ m->m_len -= ret; ++ m->m_data += ret; ++ sbappendsb(&so->so_rcv, m); ++ } /* else */ ++ /* Whatever happened, we free the mbuf */ ++ m_free(m); ++} ++ ++/* ++ * Copy the data from m into sb ++ * The caller is responsible to make sure there's enough room ++ */ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m) ++{ ++ int len, n, nn; ++ ++ len = m->m_len; ++ ++ if (sb->sb_wptr < sb->sb_rptr) { ++ n = sb->sb_rptr - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ } else { ++ /* Do the right edge first */ ++ n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ len -= n; ++ if (len) { ++ /* Now the left edge */ ++ nn = sb->sb_rptr - sb->sb_data; ++ if (nn > len) ++ nn = len; ++ memcpy(sb->sb_data, m->m_data + n, nn); ++ n += nn; ++ } ++ } ++ ++ sb->sb_cc += n; ++ sb->sb_wptr += n; ++ if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_wptr -= sb->sb_datalen; ++} ++ ++/* ++ * Copy data from sbuf to a normal, straight buffer ++ * Don't update the sbuf rptr, this will be ++ * done in sbdrop when the data is acked ++ */ ++void sbcopy(struct sbuf *sb, int off, int len, char *to) ++{ ++ char *from; ++ ++ from = sb->sb_rptr + off; ++ if (from >= sb->sb_data + sb->sb_datalen) ++ from -= sb->sb_datalen; ++ ++ if (from < sb->sb_wptr) { ++ if (len > sb->sb_cc) ++ len = sb->sb_cc; ++ memcpy(to, from, len); ++ } else { ++ /* re-use off */ ++ off = (sb->sb_data + sb->sb_datalen) - from; ++ if (off > len) ++ off = len; ++ memcpy(to, from, off); ++ len -= off; ++ if (len) ++ memcpy(to + off, sb->sb_data, len); ++ } ++} +diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h +new file mode 100644 +index 0000000..1eb9f9e +--- /dev/null ++++ b/slirp/src/sbuf.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SBUF_H ++#define SBUF_H ++ ++#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) ++ ++struct sbuf { ++ uint32_t sb_cc; /* actual chars in buffer */ ++ uint32_t sb_datalen; /* Length of data */ ++ char *sb_wptr; /* write pointer. points to where the next ++ * bytes should be written in the sbuf */ ++ char *sb_rptr; /* read pointer. points to where the next ++ * byte should be read from the sbuf */ ++ char *sb_data; /* Actual data */ ++}; ++ ++void sbfree(struct sbuf *); ++bool sbdrop(struct sbuf *, int); ++void sbreserve(struct sbuf *, int); ++void sbappend(struct socket *, struct mbuf *); ++void sbcopy(struct sbuf *, int, int, char *); ++ ++#endif +diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c +new file mode 100644 +index 0000000..b0194cb +--- /dev/null ++++ b/slirp/src/slirp.c +@@ -0,0 +1,1112 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp glue ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++int slirp_debug; ++ ++/* Define to 1 if you want KEEPALIVE timers */ ++bool slirp_do_keepalive; ++ ++/* host loopback address */ ++struct in_addr loopback_addr; ++/* host loopback network mask */ ++unsigned long loopback_mask; ++ ++/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ ++static const uint8_t special_ethaddr[ETH_ALEN] = { 0x52, 0x55, 0x00, ++ 0x00, 0x00, 0x00 }; ++ ++unsigned curtime; ++ ++static struct in_addr dns_addr; ++#ifndef _WIN32 ++static struct in6_addr dns6_addr; ++#endif ++static unsigned dns_addr_time; ++#ifndef _WIN32 ++static unsigned dns6_addr_time; ++#endif ++ ++#define TIMEOUT_FAST 2 /* milliseconds */ ++#define TIMEOUT_SLOW 499 /* milliseconds */ ++/* for the aging of certain requests like DNS */ ++#define TIMEOUT_DEFAULT 1000 /* milliseconds */ ++ ++#ifdef _WIN32 ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ FIXED_INFO *FixedInfo = NULL; ++ ULONG BufLen; ++ DWORD ret; ++ IP_ADDR_STRING *pIPAddr; ++ struct in_addr tmp_addr; ++ ++ if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { ++ *pdns_addr = dns_addr; ++ return 0; ++ } ++ ++ FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); ++ BufLen = sizeof(FIXED_INFO); ++ ++ if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ FixedInfo = GlobalAlloc(GPTR, BufLen); ++ } ++ ++ if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { ++ printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret); ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return -1; ++ } ++ ++ pIPAddr = &(FixedInfo->DnsServerList); ++ inet_aton(pIPAddr->IpAddress.String, &tmp_addr); ++ *pdns_addr = tmp_addr; ++ dns_addr = tmp_addr; ++ dns_addr_time = curtime; ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return 0; ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ return -1; ++} ++ ++static void winsock_cleanup(void) ++{ ++ WSACleanup(); ++} ++ ++#else ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, struct stat *cached_stat, ++ unsigned *cached_time) ++{ ++ struct stat old_stat; ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ old_stat = *cached_stat; ++ if (stat("/etc/resolv.conf", cached_stat) != 0) { ++ return -1; ++ } ++ if (cached_stat->st_dev == old_stat.st_dev && ++ cached_stat->st_ino == old_stat.st_ino && ++ cached_stat->st_size == old_stat.st_size && ++ cached_stat->st_mtime == old_stat.st_mtime) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ char buff[512]; ++ char buff2[257]; ++ FILE *f; ++ int found = 0; ++ void *tmp_addr = alloca(addrlen); ++ unsigned if_index; ++ ++ f = fopen("/etc/resolv.conf", "r"); ++ if (!f) ++ return -1; ++ ++ DEBUG_MISC("IP address of your DNS(s):"); ++ while (fgets(buff, 512, f) != NULL) { ++ if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { ++ char *c = strchr(buff2, '%'); ++ if (c) { ++ if_index = if_nametoindex(c + 1); ++ *c = '\0'; ++ } else { ++ if_index = 0; ++ } ++ ++ if (!inet_pton(af, buff2, tmp_addr)) { ++ continue; ++ } ++ /* If it's the first one, set it to dns_addr */ ++ if (!found) { ++ memcpy(pdns_addr, tmp_addr, addrlen); ++ memcpy(cached_addr, tmp_addr, addrlen); ++ if (scope_id) { ++ *scope_id = if_index; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (++found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(af, tmp_addr, s, sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ } ++ fclose(f); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ static struct stat dns_addr_stat; ++ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_stat, &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ static struct stat dns6_addr_stat; ++ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_stat, &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, ++ &dns6_addr_time); ++} ++ ++#endif ++ ++static void slirp_init_once(void) ++{ ++ static int initialized; ++ const char *debug; ++#ifdef _WIN32 ++ WSADATA Data; ++#endif ++ ++ if (initialized) { ++ return; ++ } ++ initialized = 1; ++ ++#ifdef _WIN32 ++ WSAStartup(MAKEWORD(2, 0), &Data); ++ atexit(winsock_cleanup); ++#endif ++ ++ loopback_addr.s_addr = htonl(INADDR_LOOPBACK); ++ loopback_mask = htonl(IN_CLASSA_NET); ++ ++ debug = g_getenv("SLIRP_DEBUG"); ++ if (debug) { ++ const GDebugKey keys[] = { ++ { "call", DBG_CALL }, ++ { "misc", DBG_MISC }, ++ { "error", DBG_ERROR }, ++ { "tftp", DBG_TFTP }, ++ }; ++ slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); ++ } ++} ++ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque) ++{ ++ Slirp *slirp = g_malloc0(sizeof(Slirp)); ++ ++ slirp_init_once(); ++ ++ slirp->opaque = opaque; ++ slirp->cb = callbacks; ++ slirp->grand = g_rand_new(); ++ slirp->restricted = restricted; ++ ++ slirp->in_enabled = in_enabled; ++ slirp->in6_enabled = in6_enabled; ++ ++ if_init(slirp); ++ ip_init(slirp); ++ ip6_init(slirp); ++ ++ /* Initialise mbufs *after* setting the MTU */ ++ m_init(slirp); ++ ++ slirp->vnetwork_addr = vnetwork; ++ slirp->vnetwork_mask = vnetmask; ++ slirp->vhost_addr = vhost; ++ slirp->vprefix_addr6 = vprefix_addr6; ++ slirp->vprefix_len = vprefix_len; ++ slirp->vhost_addr6 = vhost6; ++ if (vhostname) { ++ slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), ++ vhostname); ++ } ++ slirp->tftp_prefix = g_strdup(tftp_path); ++ slirp->bootp_filename = g_strdup(bootfile); ++ slirp->vdomainname = g_strdup(vdomainname); ++ slirp->vdhcp_startaddr = vdhcp_start; ++ slirp->vnameserver_addr = vnameserver; ++ slirp->vnameserver_addr6 = vnameserver6; ++ slirp->tftp_server_name = g_strdup(tftp_server_name); ++ ++ if (vdnssearch) { ++ translate_dnssearch(slirp, vdnssearch); ++ } ++ ++ return slirp; ++} ++ ++void slirp_cleanup(Slirp *slirp) ++{ ++ struct gfwd_list *e, *next; ++ ++ for (e = slirp->guestfwd_list; e; e = next) { ++ next = e->ex_next; ++ g_free(e->ex_exec); ++ g_free(e); ++ } ++ ++ ip_cleanup(slirp); ++ ip6_cleanup(slirp); ++ m_cleanup(slirp); ++ ++ g_rand_free(slirp->grand); ++ ++ g_free(slirp->vdnssearch); ++ g_free(slirp->tftp_prefix); ++ g_free(slirp->bootp_filename); ++ g_free(slirp->vdomainname); ++ g_free(slirp); ++} ++ ++#define CONN_CANFSEND(so) \ ++ (((so)->so_state & (SS_FCANTSENDMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++#define CONN_CANFRCV(so) \ ++ (((so)->so_state & (SS_FCANTRCVMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++ ++static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) ++{ ++ uint32_t t; ++ ++ if (*timeout <= TIMEOUT_FAST) { ++ return; ++ } ++ ++ t = MIN(1000, *timeout); ++ ++ /* If we have tcp timeout with slirp, then we will fill @timeout with ++ * more precise value. ++ */ ++ if (slirp->time_fasttimo) { ++ *timeout = TIMEOUT_FAST; ++ return; ++ } ++ if (slirp->do_slowtimo) { ++ t = MIN(TIMEOUT_SLOW, t); ++ } ++ *timeout = t; ++} ++ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque) ++{ ++ struct socket *so, *so_next; ++ ++ /* ++ * First, TCP sockets ++ */ ++ ++ /* ++ * *_slowtimo needs calling if there are IP fragments ++ * in the fragment queue, or there are TCP connections active ++ */ ++ slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || ++ (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int events = 0; ++ ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if we need a tcp_fasttimo ++ */ ++ if (slirp->time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) { ++ slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ ++ } ++ ++ /* ++ * NOFDREF can include still connecting to local-host, ++ * newly socreated() sockets etc. Don't want to select these. ++ */ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Set for reading sockets which are accepting ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing sockets which are connecting ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ so->pollfds_idx = ++ add_poll(so->s, SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing if we are connected, can send more, and ++ * we have something to send ++ */ ++ if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { ++ events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; ++ } ++ ++ /* ++ * Set for reading (and urgent data) if we are connected, can ++ * receive more, and we have room for it XXX /2 ? ++ */ ++ if (CONN_CANFRCV(so) && ++ (so->so_snd.sb_cc < (so->so_snd.sb_datalen / 2))) { ++ events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | ++ SLIRP_POLL_PRI; ++ } ++ ++ if (events) { ++ so->pollfds_idx = add_poll(so->s, events, opaque); ++ } ++ } ++ ++ /* ++ * UDP sockets ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ udp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ /* ++ * When UDP packets are received from over the ++ * link, they're sendto()'d straight away, so ++ * no need for setting for writing ++ * Limit the number of packets queued by this session ++ * to 4. Note that even though we try and limit this ++ * to 4 packets, the session could have more queued ++ * if the packets needed to be fragmented ++ * (XXX <= 4 ?) ++ */ ++ if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ /* ++ * ICMP sockets ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ icmp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ if (so->so_state & SS_ISFCONNECTED) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ slirp_update_timeout(slirp, timeout); ++} ++ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque) ++{ ++ struct socket *so, *so_next; ++ int ret; ++ ++ curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; ++ ++ /* ++ * See if anything has timed out ++ */ ++ if (slirp->time_fasttimo && ++ ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { ++ tcp_fasttimo(slirp); ++ slirp->time_fasttimo = 0; ++ } ++ if (slirp->do_slowtimo && ++ ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { ++ ip_slowtimo(slirp); ++ tcp_slowtimo(slirp); ++ slirp->last_slowtimo = curtime; ++ } ++ ++ /* ++ * Check sockets ++ */ ++ if (!select_error) { ++ /* ++ * Check TCP sockets ++ */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Check for URG data ++ * This will soread as well, so no need to ++ * test for SLIRP_POLL_IN below if this succeeds ++ */ ++ if (revents & SLIRP_POLL_PRI) { ++ ret = sorecvoob(so); ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ /* ++ * Check sockets for reading ++ */ ++ else if (revents & ++ (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR)) { ++ /* ++ * Check for incoming connections ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ tcp_connect(so); ++ continue; ++ } /* else */ ++ ret = soread(so); ++ ++ /* Output it if we read something */ ++ if (ret > 0) { ++ tcp_output(sototcpcb(so)); ++ } ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ ++ /* ++ * Check sockets for writing ++ */ ++ if (!(so->so_state & SS_NOFDREF) && ++ (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { ++ /* ++ * Check for non-blocking, still-connecting sockets ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ /* Connected */ ++ so->so_state &= ~SS_ISFCONNECTING; ++ ++ ret = send(so->s, (const void *)&ret, 0, 0); ++ if (ret < 0) { ++ /* XXXXX Must fix, zero bytes is a NOP */ ++ if (errno == EAGAIN || errno == EWOULDBLOCK || ++ errno == EINPROGRESS || errno == ENOTCONN) { ++ continue; ++ } ++ ++ /* else failed */ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; ++ } ++ /* else so->so_state &= ~SS_ISFCONNECTING; */ ++ ++ /* ++ * Continue tcp_input ++ */ ++ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, ++ so->so_ffamily); ++ /* continue; */ ++ } else { ++ ret = sowrite(so); ++ if (ret > 0) { ++ /* Call tcp_output in case we need to send a window ++ * update to the guest, otherwise it will be stuck ++ * until it sends a window probe. */ ++ tcp_output(sototcpcb(so)); ++ } ++ } ++ } ++ } ++ ++ /* ++ * Now UDP sockets. ++ * Incoming packets are sent straight away, they're not buffered. ++ * Incoming UDP data isn't buffered either. ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ sorecvfrom(so); ++ } ++ } ++ ++ /* ++ * Check incoming ICMP relies. ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ icmp_receive(so); ++ } ++ } ++ } ++ ++ if_start(slirp); ++} ++ ++static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct slirp_arphdr *ah = (struct slirp_arphdr *)(pkt + ETH_HLEN); ++ uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_reply; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); ++ int ar_op; ++ struct gfwd_list *ex_ptr; ++ ++ if (!slirp->in_enabled) { ++ return; ++ } ++ ++ ar_op = ntohs(ah->ar_op); ++ switch (ar_op) { ++ case ARPOP_REQUEST: ++ if (ah->ar_tip == ah->ar_sip) { ++ /* Gratuitous ARP */ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ return; ++ } ++ ++ if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (ah->ar_tip == slirp->vnameserver_addr.s_addr || ++ ah->ar_tip == slirp->vhost_addr.s_addr) ++ goto arp_ok; ++ /* TODO: IPv6 */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_addr.s_addr == ah->ar_tip) ++ goto arp_ok; ++ } ++ return; ++ arp_ok: ++ memset(arp_reply, 0, sizeof(arp_reply)); ++ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ ++ /* ARP request for alias/dns mac address */ ++ memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &ah->ar_tip, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REPLY); ++ memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); ++ rah->ar_sip = ah->ar_tip; ++ memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); ++ rah->ar_tip = ah->ar_sip; ++ slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); ++ } ++ break; ++ case ARPOP_REPLY: ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ break; ++ default: ++ break; ++ } ++} ++ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct mbuf *m; ++ int proto; ++ ++ if (pkt_len < ETH_HLEN) ++ return; ++ ++ proto = (((uint16_t)pkt[12]) << 8) + pkt[13]; ++ switch (proto) { ++ case ETH_P_ARP: ++ arp_input(slirp, pkt, pkt_len); ++ break; ++ case ETH_P_IP: ++ case ETH_P_IPV6: ++ m = m_get(slirp); ++ if (!m) ++ return; ++ /* Note: we add 2 to align the IP header on 4 bytes, ++ * and add the margin for the tcpiphdr overhead */ ++ if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { ++ m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); ++ } ++ m->m_len = pkt_len + TCPIPHDR_DELTA + 2; ++ memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); ++ ++ m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ ++ if (proto == ETH_P_IP) { ++ ip_input(m); ++ } else if (proto == ETH_P_IPV6) { ++ ip6_input(m); ++ } ++ break; ++ ++ case ETH_P_NCSI: ++ ncsi_input(slirp, pkt, pkt_len); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ ++ if (iph->ip_dst.s_addr == 0) { ++ /* 0.0.0.0 can not be a destination address, something went wrong, ++ * avoid making it worse */ ++ return 1; ++ } ++ if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { ++ uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_req; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); ++ ++ if (!ifm->resolution_requested) { ++ /* If the client addr is not known, send an ARP request */ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REQUEST); ++ ++ /* source hw addr */ ++ memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); ++ ++ /* source IP */ ++ rah->ar_sip = slirp->vhost_addr.s_addr; ++ ++ /* target hw addr (none) */ ++ memset(rah->ar_tha, 0, ETH_ALEN); ++ ++ /* target IP */ ++ rah->ar_tip = iph->ip_dst.s_addr; ++ slirp->client_ipaddr = iph->ip_dst; ++ slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); ++ ifm->resolution_requested = true; ++ ++ /* Expire request and drop outgoing packet after 1 second */ ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); ++ /* XXX: not correct */ ++ memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); ++ eh->h_proto = htons(ETH_P_IP); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); ++ if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { ++ if (!ifm->resolution_requested) { ++ ndp_send_ns(slirp, ip6h->ip_dst); ++ ifm->resolution_requested = true; ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ eh->h_proto = htons(ETH_P_IPV6); ++ in6_compute_ethaddr(ip6h->ip_src, eh->h_source); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Output the IP packet to the ethernet device. Returns 0 if the packet must be ++ * re-queued. ++ */ ++int if_encap(Slirp *slirp, struct mbuf *ifm) ++{ ++ uint8_t buf[1600]; ++ struct ethhdr *eh = (struct ethhdr *)buf; ++ uint8_t ethaddr[ETH_ALEN]; ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ int ret; ++ ++ if (ifm->m_len + ETH_HLEN > sizeof(buf)) { ++ return 1; ++ } ++ ++ switch (iph->ip_v) { ++ case IPVERSION: ++ ret = if_encap4(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ case IP6VERSION: ++ ret = if_encap6(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++ memcpy(eh->h_dest, ethaddr, ETH_ALEN); ++ DEBUG_ARG("src = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_source[0], ++ eh->h_source[1], eh->h_source[2], eh->h_source[3], ++ eh->h_source[4], eh->h_source[5]); ++ DEBUG_ARG("dst = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_dest[0], ++ eh->h_dest[1], eh->h_dest[2], eh->h_dest[3], eh->h_dest[4], ++ eh->h_dest[5]); ++ memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); ++ slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); ++ return 1; ++} ++ ++/* Drop host forwarding rule, return 0 if found. */ ++/* TODO: IPv6 */ ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port) ++{ ++ struct socket *so; ++ struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_in addr; ++ int port = htons(host_port); ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ addr.sin_addr.s_addr == host_addr.s_addr && addr.sin_port == port) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++/* TODO: IPv6 */ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port) ++{ ++ if (!guest_addr.s_addr) { ++ guest_addr = slirp->vdhcp_startaddr; ++ } ++ if (is_udp) { ++ if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } else { ++ if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } ++ return 0; ++} ++ ++/* TODO: IPv6 */ ++static bool check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, ++ int guest_port) ++{ ++ struct gfwd_list *tmp_ptr; ++ ++ if (!guest_addr->s_addr) { ++ guest_addr->s_addr = slirp->vnetwork_addr.s_addr | ++ (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); ++ } ++ if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr || ++ guest_addr->s_addr == slirp->vhost_addr.s_addr || ++ guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { ++ return false; ++ } ++ ++ /* check if the port is "bound" */ ++ for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { ++ if (guest_port == tmp_ptr->ex_fport && ++ guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) ++ return false; ++ } ++ ++ return true; ++} ++ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); ++ return 0; ++} ++ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, *guest_addr, ++ htons(guest_port)); ++ return 0; ++} ++ ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) ++{ ++ if (so->s == -1 && so->guestfwd) { ++ /* XXX this blocks entire thread. Rewrite to use ++ * qemu_chr_fe_write and background I/O callbacks */ ++ so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); ++ return len; ++ } ++ ++ if (so->s == -1) { ++ /* ++ * This should in theory not happen but it is hard to be ++ * sure because some code paths will end up with so->s == -1 ++ * on a failure but don't dispose of the struct socket. ++ * Check specifically, so we don't pass -1 to send(). ++ */ ++ errno = EBADF; ++ return -1; ++ } ++ ++ return send(so->s, buf, len, flags); ++} ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct socket *so; ++ ++ /* TODO: IPv6 */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_faddr.s_addr == guest_addr.s_addr && ++ htons(so->so_fport) == guest_port) { ++ return so; ++ } ++ } ++ return NULL; ++} ++ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct iovec iov[2]; ++ struct socket *so; ++ ++ so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so || so->so_state & SS_NOFDREF) { ++ return 0; ++ } ++ ++ if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen / 2)) { ++ return 0; ++ } ++ ++ return sopreprbuf(so, iov, NULL); ++} ++ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size) ++{ ++ int ret; ++ struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so) ++ return; ++ ++ ret = soreadbuf(so, (const char *)buf, size); ++ ++ if (ret > 0) ++ tcp_output(sototcpcb(so)); ++} ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) ++{ ++ ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); ++ ++ if (ret < 0) { ++ g_critical("Failed to send packet, ret: %ld", (long)ret); ++ } else if (ret < len) { ++ DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", (long)ret, ++ (unsigned long)len); ++ } ++} +diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h +new file mode 100644 +index 0000000..37ba6ed +--- /dev/null ++++ b/slirp/src/slirp.h +@@ -0,0 +1,273 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef SLIRP_H ++#define SLIRP_H ++ ++#ifdef _WIN32 ++ ++/* as defined in sdkddkver.h */ ++#ifndef _WIN32_WINNT ++#define _WIN32_WINNT 0x0600 /* Vista */ ++#endif ++/* reduces the number of implicitly included headers */ ++#ifndef WIN32_LEAN_AND_MEAN ++#define WIN32_LEAN_AND_MEAN ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++ ++#else ++#if !defined(__HAIKU__) ++#define O_BINARY 0 ++#endif ++#endif ++ ++#ifndef _WIN32 ++#include ++#include ++#include ++#include ++#include ++#endif ++ ++#ifdef __APPLE__ ++#include ++#endif ++ ++/* Avoid conflicting with the libc insque() and remque(), which ++ have different prototypes. */ ++#define insque slirp_insque ++#define remque slirp_remque ++#define quehead slirp_quehead ++ ++#include "debug.h" ++#include "util.h" ++ ++#include "libslirp.h" ++#include "ip.h" ++#include "ip6.h" ++#include "tcp.h" ++#include "tcp_timer.h" ++#include "tcp_var.h" ++#include "tcpip.h" ++#include "udp.h" ++#include "ip_icmp.h" ++#include "ip6_icmp.h" ++#include "mbuf.h" ++#include "sbuf.h" ++#include "socket.h" ++#include "if.h" ++#include "main.h" ++#include "misc.h" ++ ++#include "bootp.h" ++#include "tftp.h" ++ ++#define ARPOP_REQUEST 1 /* ARP request */ ++#define ARPOP_REPLY 2 /* ARP reply */ ++ ++struct ethhdr { ++ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ ++ unsigned char h_source[ETH_ALEN]; /* source ether addr */ ++ unsigned short h_proto; /* packet type ID field */ ++}; ++ ++struct slirp_arphdr { ++ unsigned short ar_hrd; /* format of hardware address */ ++ unsigned short ar_pro; /* format of protocol address */ ++ unsigned char ar_hln; /* length of hardware address */ ++ unsigned char ar_pln; /* length of protocol address */ ++ unsigned short ar_op; /* ARP opcode (command) */ ++ ++ /* ++ * Ethernet looks like this : This bit is variable sized however... ++ */ ++ unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ ++ uint32_t ar_sip; /* sender IP address */ ++ unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ ++ uint32_t ar_tip; /* target IP address */ ++} SLIRP_PACKED; ++ ++#define ARP_TABLE_SIZE 16 ++ ++typedef struct ArpTable { ++ struct slirp_arphdr table[ARP_TABLE_SIZE]; ++ int next_victim; ++} ArpTable; ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]); ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct ndpentry { ++ unsigned char eth_addr[ETH_ALEN]; /* sender hardware address */ ++ struct in6_addr ip_addr; /* sender IP address */ ++}; ++ ++#define NDP_TABLE_SIZE 16 ++ ++typedef struct NdpTable { ++ struct ndpentry table[NDP_TABLE_SIZE]; ++ int next_victim; ++} NdpTable; ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]); ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct Slirp { ++ unsigned time_fasttimo; ++ unsigned last_slowtimo; ++ bool do_slowtimo; ++ ++ bool in_enabled, in6_enabled; ++ ++ /* virtual network configuration */ ++ struct in_addr vnetwork_addr; ++ struct in_addr vnetwork_mask; ++ struct in_addr vhost_addr; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost_addr6; ++ struct in_addr vdhcp_startaddr; ++ struct in_addr vnameserver_addr; ++ struct in6_addr vnameserver_addr6; ++ ++ struct in_addr client_ipaddr; ++ char client_hostname[33]; ++ ++ int restricted; ++ struct gfwd_list *guestfwd_list; ++ ++ /* mbuf states */ ++ struct quehead m_freelist; ++ struct quehead m_usedlist; ++ int mbuf_alloced; ++ ++ /* if states */ ++ struct quehead if_fastq; /* fast queue (for interactive data) */ ++ struct quehead if_batchq; /* queue for non-interactive data */ ++ bool if_start_busy; /* avoid if_start recursion */ ++ ++ /* ip states */ ++ struct ipq ipq; /* ip reass. queue */ ++ uint16_t ip_id; /* ip packet ctr, for ids */ ++ ++ /* bootp/dhcp states */ ++ BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; ++ char *bootp_filename; ++ size_t vdnssearch_len; ++ uint8_t *vdnssearch; ++ char *vdomainname; ++ ++ /* tcp states */ ++ struct socket tcb; ++ struct socket *tcp_last_so; ++ tcp_seq tcp_iss; /* tcp initial send seq # */ ++ uint32_t tcp_now; /* for RFC 1323 timestamps */ ++ ++ /* udp states */ ++ struct socket udb; ++ struct socket *udp_last_so; ++ ++ /* icmp states */ ++ struct socket icmp; ++ struct socket *icmp_last_so; ++ ++ /* tftp states */ ++ char *tftp_prefix; ++ struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; ++ char *tftp_server_name; ++ ++ ArpTable arp_table; ++ NdpTable ndp_table; ++ ++ GRand *grand; ++ void *ra_timer; ++ ++ const SlirpCb *cb; ++ void *opaque; ++}; ++ ++void if_start(Slirp *); ++ ++int get_dns_addr(struct in_addr *pdns_addr); ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); ++ ++/* ncsi.c */ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++ ++extern bool slirp_do_keepalive; ++ ++#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) ++ ++/* dnssearch.c */ ++int translate_dnssearch(Slirp *s, const char **names); ++ ++/* cksum.c */ ++int cksum(struct mbuf *m, int len); ++int ip6_cksum(struct mbuf *m); ++ ++/* if.c */ ++void if_init(Slirp *); ++void if_output(struct socket *, struct mbuf *); ++ ++/* ip_input.c */ ++void ip_init(Slirp *); ++void ip_cleanup(Slirp *); ++void ip_input(struct mbuf *); ++void ip_slowtimo(Slirp *); ++void ip_stripoptions(register struct mbuf *, struct mbuf *); ++ ++/* ip_output.c */ ++int ip_output(struct socket *, struct mbuf *); ++ ++/* ip6_input.c */ ++void ip6_init(Slirp *); ++void ip6_cleanup(Slirp *); ++void ip6_input(struct mbuf *); ++ ++/* ip6_output */ ++int ip6_output(struct socket *, struct mbuf *, int fast); ++ ++/* tcp_input.c */ ++void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); ++int tcp_mss(register struct tcpcb *, unsigned); ++ ++/* tcp_output.c */ ++int tcp_output(register struct tcpcb *); ++void tcp_setpersist(register struct tcpcb *); ++ ++/* tcp_subr.c */ ++void tcp_init(Slirp *); ++void tcp_cleanup(Slirp *); ++void tcp_template(struct tcpcb *); ++void tcp_respond(struct tcpcb *, register struct tcpiphdr *, ++ register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); ++struct tcpcb *tcp_newtcpcb(struct socket *); ++struct tcpcb *tcp_close(register struct tcpcb *); ++void tcp_sockclosed(struct tcpcb *); ++int tcp_fconnect(struct socket *, unsigned short af); ++void tcp_connect(struct socket *); ++int tcp_attach(struct socket *); ++uint8_t tcp_tos(struct socket *); ++int tcp_emu(struct socket *, struct mbuf *); ++int tcp_ctl(struct socket *); ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err); ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); ++ ++#endif +diff --git a/slirp/src/socket.c b/slirp/src/socket.c +new file mode 100644 +index 0000000..34daffc +--- /dev/null ++++ b/slirp/src/socket.c +@@ -0,0 +1,935 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++#ifdef __sun__ ++#include ++#endif ++ ++static void sofcantrcvmore(struct socket *so); ++static void sofcantsendmore(struct socket *so); ++ ++struct socket *solookup(struct socket **last, struct socket *head, ++ struct sockaddr_storage *lhost, ++ struct sockaddr_storage *fhost) ++{ ++ struct socket *so = *last; ++ ++ /* Optimisation */ ++ if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ return so; ++ } ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ if (sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ *last = so; ++ return so; ++ } ++ } ++ ++ return (struct socket *)NULL; ++} ++ ++/* ++ * Create a new socket, initialise the fields ++ * It is the responsibility of the caller to ++ * insque() it into the correct linked-list ++ */ ++struct socket *socreate(Slirp *slirp) ++{ ++ struct socket *so = g_new(struct socket, 1); ++ ++ memset(so, 0, sizeof(struct socket)); ++ so->so_state = SS_NOFDREF; ++ so->s = -1; ++ so->slirp = slirp; ++ so->pollfds_idx = -1; ++ ++ return so; ++} ++ ++/* ++ * Remove references to so from the given message queue. ++ */ ++static void soqfree(struct socket *so, struct quehead *qh) ++{ ++ struct mbuf *ifq; ++ ++ for (ifq = (struct mbuf *)qh->qh_link; (struct quehead *)ifq != qh; ++ ifq = ifq->ifq_next) { ++ if (ifq->ifq_so == so) { ++ struct mbuf *ifm; ++ ifq->ifq_so = NULL; ++ for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { ++ ifm->ifq_so = NULL; ++ } ++ } ++ } ++} ++ ++/* ++ * remque and free a socket, clobber cache ++ */ ++void sofree(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ soqfree(so, &slirp->if_fastq); ++ soqfree(so, &slirp->if_batchq); ++ ++ if (so == slirp->tcp_last_so) { ++ slirp->tcp_last_so = &slirp->tcb; ++ } else if (so == slirp->udp_last_so) { ++ slirp->udp_last_so = &slirp->udb; ++ } else if (so == slirp->icmp_last_so) { ++ slirp->icmp_last_so = &slirp->icmp; ++ } ++ m_free(so->so_m); ++ ++ if (so->so_next && so->so_prev) ++ remque(so); /* crashes if so is not in a queue */ ++ ++ if (so->so_tcpcb) { ++ free(so->so_tcpcb); ++ } ++ g_free(so); ++} ++ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) ++{ ++ int n, lss, total; ++ struct sbuf *sb = &so->so_snd; ++ int len = sb->sb_datalen - sb->sb_cc; ++ int mss = so->so_tcpcb->t_maxseg; ++ ++ DEBUG_CALL("sopreprbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (len <= 0) ++ return 0; ++ ++ iov[0].iov_base = sb->sb_wptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_wptr < sb->sb_rptr) { ++ iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_rptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ total = iov[0].iov_len + iov[1].iov_len; ++ if (total > mss) { ++ lss = total % mss; ++ if (iov[1].iov_len > lss) { ++ iov[1].iov_len -= lss; ++ n = 2; ++ } else { ++ lss -= iov[1].iov_len; ++ iov[0].iov_len -= lss; ++ n = 1; ++ } ++ } else ++ n = 2; ++ } else { ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } ++ } ++ if (np) ++ *np = n; ++ ++ return iov[0].iov_len + (n - 1) * iov[1].iov_len; ++} ++ ++/* ++ * Read from so's socket into sb_snd, updating all relevant sbuf fields ++ * NOTE: This will only be called if it is select()ed for reading, so ++ * a read() of 0 (or less) means it's disconnected ++ */ ++int soread(struct socket *so) ++{ ++ int n, nn; ++ size_t buf_len; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soread"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ buf_len = sopreprbuf(so, iov, &n); ++ assert(buf_len != 0); ++ ++ nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0); ++ if (nn <= 0) { ++ if (nn < 0 && (errno == EINTR || errno == EAGAIN)) ++ return 0; ++ else { ++ int err; ++ socklen_t elen = sizeof err; ++ struct sockaddr_storage addr; ++ struct sockaddr *paddr = (struct sockaddr *)&addr; ++ socklen_t alen = sizeof addr; ++ ++ err = errno; ++ if (nn == 0) { ++ if (getpeername(so->s, paddr, &alen) < 0) { ++ err = errno; ++ } else { ++ getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &elen); ++ } ++ } ++ ++ DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", nn, ++ errno, strerror(errno)); ++ sofcantrcvmore(so); ++ ++ if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || ++ err == EPIPE) { ++ tcp_drop(sototcpcb(so), err); ++ } else { ++ tcp_sockclosed(sototcpcb(so)); ++ } ++ return -1; ++ } ++ } ++ ++ /* ++ * If there was no error, try and read the second time round ++ * We read again if n = 2 (ie, there's another part of the buffer) ++ * and we read as much as we could in the first read ++ * We don't test for <= 0 this time, because there legitimately ++ * might not be any more data (since the socket is non-blocking), ++ * a close will be detected on next iteration. ++ * A return of -1 won't (shouldn't) happen, since it didn't happen above ++ */ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ ++ DEBUG_MISC(" ... read nn = %d bytes", nn); ++ ++ /* Update fields */ ++ sb->sb_cc += nn; ++ sb->sb_wptr += nn; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return nn; ++} ++ ++int soreadbuf(struct socket *so, const char *buf, int size) ++{ ++ int n, nn, copy = size; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soreadbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ assert(size > 0); ++ if (sopreprbuf(so, iov, &n) < size) ++ goto err; ++ ++ nn = MIN(iov[0].iov_len, copy); ++ memcpy(iov[0].iov_base, buf, nn); ++ ++ copy -= nn; ++ buf += nn; ++ ++ if (copy == 0) ++ goto done; ++ ++ memcpy(iov[1].iov_base, buf, copy); ++ ++done: ++ /* Update fields */ ++ sb->sb_cc += size; ++ sb->sb_wptr += size; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return size; ++err: ++ ++ sofcantrcvmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ g_critical("soreadbuf buffer too small"); ++ return -1; ++} ++ ++/* ++ * Get urgent data ++ * ++ * When the socket is created, we set it SO_OOBINLINE, ++ * so when OOB data arrives, we soread() it and everything ++ * in the send buffer is sent as urgent data ++ */ ++int sorecvoob(struct socket *so) ++{ ++ struct tcpcb *tp = sototcpcb(so); ++ int ret; ++ ++ DEBUG_CALL("sorecvoob"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * We take a guess at how much urgent data has arrived. ++ * In most situations, when urgent data arrives, the next ++ * read() should get all the urgent data. This guess will ++ * be wrong however if more data arrives just after the ++ * urgent data, or the read() doesn't return all the ++ * urgent data. ++ */ ++ ret = soread(so); ++ if (ret > 0) { ++ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Send urgent data ++ * There's a lot duplicated code here, but... ++ */ ++int sosendoob(struct socket *so) ++{ ++ struct sbuf *sb = &so->so_rcv; ++ char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ ++ ++ int n; ++ ++ DEBUG_CALL("sosendoob"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); ++ ++ if (so->so_urgc > 2048) ++ so->so_urgc = 2048; /* XXXX */ ++ ++ if (sb->sb_rptr < sb->sb_wptr) { ++ /* We can send it directly */ ++ n = slirp_send(so, sb->sb_rptr, so->so_urgc, ++ (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++ } else { ++ /* ++ * Since there's no sendv or sendtov like writev, ++ * we must copy all data to a linear buffer then ++ * send it all ++ */ ++ uint32_t urgc = so->so_urgc; ++ int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (len > urgc) { ++ len = urgc; ++ } ++ memcpy(buff, sb->sb_rptr, len); ++ urgc -= len; ++ if (urgc) { ++ n = sb->sb_wptr - sb->sb_data; ++ if (n > urgc) { ++ n = urgc; ++ } ++ memcpy((buff + len), sb->sb_data, n); ++ len += n; ++ } ++ n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++#ifdef DEBUG ++ if (n != len) { ++ DEBUG_ERROR("Didn't send all data urgently XXXXX"); ++ } ++#endif ++ } ++ ++ if (n < 0) { ++ return n; ++ } ++ so->so_urgc -= n; ++ DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, ++ so->so_urgc); ++ ++ sb->sb_cc -= n; ++ sb->sb_rptr += n; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ return n; ++} ++ ++/* ++ * Write data from so_rcv to so's socket, ++ * updating all sbuf field as necessary ++ */ ++int sowrite(struct socket *so) ++{ ++ int n, nn; ++ struct sbuf *sb = &so->so_rcv; ++ int len = sb->sb_cc; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("sowrite"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_urgc) { ++ uint32_t expected = so->so_urgc; ++ if (sosendoob(so) < expected) { ++ /* Treat a short write as a fatal error too, ++ * rather than continuing on and sending the urgent ++ * data as if it were non-urgent and leaving the ++ * so_urgc count wrong. ++ */ ++ goto err_disconnected; ++ } ++ if (sb->sb_cc == 0) ++ return 0; ++ } ++ ++ /* ++ * No need to check if there's something to write, ++ * sowrite wouldn't have been called otherwise ++ */ ++ ++ iov[0].iov_base = sb->sb_rptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_rptr < sb->sb_wptr) { ++ iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_wptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ n = 2; ++ } else ++ n = 1; ++ } ++ /* Check if there's urgent data to send, and if so, send it */ ++ ++ nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len, 0); ++ /* This should never happen, but people tell me it does *shrug* */ ++ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) ++ return 0; ++ ++ if (nn <= 0) { ++ goto err_disconnected; ++ } ++ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ DEBUG_MISC(" ... wrote nn = %d bytes", nn); ++ ++ /* Update sbuf */ ++ sb->sb_cc -= nn; ++ sb->sb_rptr += nn; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ /* ++ * If in DRAIN mode, and there's no more data, set ++ * it CANTSENDMORE ++ */ ++ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) ++ sofcantsendmore(so); ++ ++ return nn; ++ ++err_disconnected: ++ DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", ++ so->so_state, errno); ++ sofcantsendmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ return -1; ++} ++ ++/* ++ * recvfrom() a UDP socket ++ */ ++void sorecvfrom(struct socket *so) ++{ ++ struct sockaddr_storage addr; ++ struct sockaddr_storage saddr, daddr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ ++ DEBUG_CALL("sorecvfrom"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ ++ char buff[256]; ++ int len; ++ ++ len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); ++ /* XXX Check if reply is "correct"? */ ++ ++ if (len == -1 || len == 0) { ++ uint8_t code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) ++ code = ICMP_UNREACH_HOST; ++ else if (errno == ENETUNREACH) ++ code = ICMP_UNREACH_NET; ++ ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ /* No need for this socket anymore, udp_detach it */ ++ udp_detach(so); ++ } else { /* A "normal" UDP packet */ ++ struct mbuf *m; ++ int len; ++#ifdef _WIN32 ++ unsigned long n; ++#else ++ int n; ++#endif ++ ++ if (ioctlsocket(so->s, FIONREAD, &n) != 0) { ++ DEBUG_MISC(" ioctlsocket errno = %d-%s\n", errno, strerror(errno)); ++ return; ++ } ++ if (n == 0) { ++ return; ++ } ++ ++ m = m_get(so->slirp); ++ if (!m) { ++ return; ++ } ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); ++ break; ++ case AF_INET6: ++ m->m_data += ++ IF_MAXLINKHDR + sizeof(struct ip6) + sizeof(struct udphdr); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++ /* ++ * XXX Shouldn't FIONREAD packets destined for port 53, ++ * but I don't know the max packet size for DNS lookups ++ */ ++ len = M_FREEROOM(m); ++ /* if (so->so_fport != htons(53)) { */ ++ ++ if (n > len) { ++ n = (m->m_data - m->m_dat) + m->m_len + n + 1; ++ m_inc(m, n); ++ len = M_FREEROOM(m); ++ } ++ /* } */ ++ ++ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, ++ &addrlen); ++ DEBUG_MISC(" did recvfrom %d, errno = %d-%s", m->m_len, errno, ++ strerror(errno)); ++ if (m->m_len < 0) { ++ /* Report error as ICMP */ ++ switch (so->so_lfamily) { ++ uint8_t code; ++ case AF_INET: ++ code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP_UNREACH_NET; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, ++ strerror(errno)); ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP6_UNREACH_NO_ROUTE; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ m_free(m); ++ } else { ++ /* ++ * Hack: domain name lookup will be used the most for UDP, ++ * and since they'll only be used once there's no need ++ * for the 4 minute (or whatever) timeout... So we time them ++ * out much quicker (10 seconds for now...) ++ */ ++ if (so->so_expire) { ++ if (so->so_fport == htons(53)) ++ so->so_expire = curtime + SO_EXPIREFAST; ++ else ++ so->so_expire = curtime + SO_EXPIRE; ++ } ++ ++ /* ++ * If this packet was destined for CTL_ADDR, ++ * make it look like that's where it came from ++ */ ++ saddr = addr; ++ sotranslate_in(so, &saddr); ++ daddr = so->lhost.ss; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ udp_output(so, m, (struct sockaddr_in *)&saddr, ++ (struct sockaddr_in *)&daddr, so->so_iptos); ++ break; ++ case AF_INET6: ++ udp6_output(so, m, (struct sockaddr_in6 *)&saddr, ++ (struct sockaddr_in6 *)&daddr); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ } /* rx error */ ++ } /* if ping packet */ ++} ++ ++/* ++ * sendto() a socket ++ */ ++int sosendto(struct socket *so, struct mbuf *m) ++{ ++ int ret; ++ struct sockaddr_storage addr; ++ ++ DEBUG_CALL("sosendto"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" sendto()ing)"); ++ sotranslate_out(so, &addr); ++ ++ /* Don't care what port we get */ ++ ret = sendto(so->s, m->m_data, m->m_len, 0, (struct sockaddr *)&addr, ++ sockaddr_size(&addr)); ++ if (ret < 0) ++ return -1; ++ ++ /* ++ * Kill the socket if there's no reply in 4 minutes, ++ * but only if it's an expirable socket ++ */ ++ if (so->so_expire) ++ so->so_expire = curtime + SO_EXPIRE; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ ++ return 0; ++} ++ ++/* ++ * Listen for incoming TCP connections ++ */ ++struct socket *tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ /* TODO: IPv6 */ ++ struct sockaddr_in addr; ++ struct socket *so; ++ int s, opt = 1; ++ socklen_t addrlen = sizeof(addr); ++ memset(&addr, 0, addrlen); ++ ++ DEBUG_CALL("tcp_listen"); ++ DEBUG_ARG("haddr = %s", inet_ntoa((struct in_addr){ .s_addr = haddr })); ++ DEBUG_ARG("hport = %d", ntohs(hport)); ++ DEBUG_ARG("laddr = %s", inet_ntoa((struct in_addr){ .s_addr = laddr })); ++ DEBUG_ARG("lport = %d", ntohs(lport)); ++ DEBUG_ARG("flags = %x", flags); ++ ++ so = socreate(slirp); ++ ++ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ ++ if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) { ++ g_free(so); ++ return NULL; ++ } ++ insque(so, &slirp->tcb); ++ ++ /* ++ * SS_FACCEPTONCE sockets must time out. ++ */ ++ if (flags & SS_FACCEPTONCE) ++ so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT * 2; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= (SS_FACCEPTCONN | flags); ++ so->so_lfamily = AF_INET; ++ so->so_lport = lport; /* Kept in network format */ ++ so->so_laddr.s_addr = laddr; /* Ditto */ ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr.s_addr = haddr; ++ addr.sin_port = hport; ++ ++ if (((s = slirp_socket(AF_INET, SOCK_STREAM, 0)) < 0) || ++ (slirp_socket_set_fast_reuse(s) < 0) || ++ (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) || ++ (listen(s, 1) < 0)) { ++ int tmperrno = errno; /* Don't clobber the real reason we failed */ ++ ++ if (s >= 0) { ++ closesocket(s); ++ } ++ sofree(so); ++ /* Restore the real errno */ ++#ifdef _WIN32 ++ WSASetLastError(tmperrno); ++#else ++ errno = tmperrno; ++#endif ++ return NULL; ++ } ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(int)); ++ ++ getsockname(s, (struct sockaddr *)&addr, &addrlen); ++ so->so_ffamily = AF_INET; ++ so->so_fport = addr.sin_port; ++ if (addr.sin_addr.s_addr == 0 || ++ addr.sin_addr.s_addr == loopback_addr.s_addr) ++ so->so_faddr = slirp->vhost_addr; ++ else ++ so->so_faddr = addr.sin_addr; ++ ++ so->s = s; ++ return so; ++} ++ ++/* ++ * Various session state calls ++ * XXX Should be #define's ++ * The socket state stuff needs work, these often get call 2 or 3 ++ * times each when only 1 was needed ++ */ ++void soisfconnecting(struct socket *so) ++{ ++ so->so_state &= ~(SS_NOFDREF | SS_ISFCONNECTED | SS_FCANTRCVMORE | ++ SS_FCANTSENDMORE | SS_FWDRAIN); ++ so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ ++} ++ ++void soisfconnected(struct socket *so) ++{ ++ so->so_state &= ~(SS_ISFCONNECTING | SS_FWDRAIN | SS_NOFDREF); ++ so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ ++} ++ ++static void sofcantrcvmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 0); ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTSENDMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* Don't select it */ ++ } else { ++ so->so_state |= SS_FCANTRCVMORE; ++ } ++} ++ ++static void sofcantsendmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 1); /* send FIN to fhost */ ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTRCVMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* as above */ ++ } else { ++ so->so_state |= SS_FCANTSENDMORE; ++ } ++} ++ ++/* ++ * Set write drain mode ++ * Set CANTSENDMORE once all data has been write()n ++ */ ++void sofwdrain(struct socket *so) ++{ ++ if (so->so_rcv.sb_cc) ++ so->so_state |= SS_FWDRAIN; ++ else ++ sofcantsendmore(so); ++} ++ ++/* ++ * Translate addr in host addr when it is a virtual address ++ */ ++void sotranslate_out(struct socket *so, struct sockaddr_storage *addr) ++{ ++ Slirp *slirp = so->slirp; ++ struct sockaddr_in *sin = (struct sockaddr_in *)addr; ++ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ /* It's an alias */ ++ if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) { ++ if (get_dns_addr(&sin->sin_addr) < 0) { ++ sin->sin_addr = loopback_addr; ++ } ++ } else { ++ sin->sin_addr = loopback_addr; ++ } ++ } ++ ++ DEBUG_MISC(" addr.sin_port=%d, addr.sin_addr.s_addr=%.16s", ++ ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)); ++ break; ++ ++ case AF_INET6: ++ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, ++ slirp->vprefix_len)) { ++ if (in6_equal(&so->so_faddr6, &slirp->vnameserver_addr6)) { ++ uint32_t scope_id; ++ if (get_dns6_addr(&sin6->sin6_addr, &scope_id) >= 0) { ++ sin6->sin6_scope_id = scope_id; ++ } else { ++ sin6->sin6_addr = in6addr_loopback; ++ } ++ } else { ++ sin6->sin6_addr = in6addr_loopback; ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) ++{ ++ Slirp *slirp = so->slirp; ++ struct sockaddr_in *sin = (struct sockaddr_in *)addr; ++ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; ++ ++ if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { ++ sin->sin_addr = slirp->vhost_addr; ++ } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || ++ so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ sin->sin_addr = so->so_faddr; ++ } ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, ++ slirp->vprefix_len)) { ++ if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) || ++ !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { ++ sin6->sin6_addr = so->so_faddr6; ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* ++ * Translate connections from localhost to the real hostname ++ */ ++void sotranslate_accept(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_faddr.s_addr == INADDR_ANY || ++ (so->so_faddr.s_addr & loopback_mask) == ++ (loopback_addr.s_addr & loopback_mask)) { ++ so->so_faddr = slirp->vhost_addr; ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal(&so->so_faddr6, &in6addr_any) || ++ in6_equal(&so->so_faddr6, &in6addr_loopback)) { ++ so->so_faddr6 = slirp->vhost_addr6; ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++void sodrop(struct socket *s, int num) ++{ ++ if (sbdrop(&s->so_snd, num)) { ++ s->slirp->cb->notify(s->slirp->opaque); ++ } ++} +diff --git a/slirp/src/socket.h b/slirp/src/socket.h +new file mode 100644 +index 0000000..d07f56d +--- /dev/null ++++ b/slirp/src/socket.h +@@ -0,0 +1,164 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_SOCKET_H ++#define SLIRP_SOCKET_H ++ ++#include "misc.h" ++ ++#define SO_EXPIRE 240000 ++#define SO_EXPIREFAST 10000 ++ ++/* ++ * Our socket structure ++ */ ++ ++union slirp_sockaddr { ++ struct sockaddr_storage ss; ++ struct sockaddr_in sin; ++ struct sockaddr_in6 sin6; ++}; ++ ++struct socket { ++ struct socket *so_next, *so_prev; /* For a linked list of sockets */ ++ ++ int s; /* The actual socket */ ++ struct gfwd_list *guestfwd; ++ ++ int pollfds_idx; /* GPollFD GArray index */ ++ ++ Slirp *slirp; /* managing slirp instance */ ++ ++ /* XXX union these with not-yet-used sbuf params */ ++ struct mbuf *so_m; /* Pointer to the original SYN packet, ++ * for non-blocking connect()'s, and ++ * PING reply's */ ++ struct tcpiphdr *so_ti; /* Pointer to the original ti within ++ * so_mconn, for non-blocking connections */ ++ uint32_t so_urgc; ++ union slirp_sockaddr fhost; /* Foreign host */ ++#define so_faddr fhost.sin.sin_addr ++#define so_fport fhost.sin.sin_port ++#define so_faddr6 fhost.sin6.sin6_addr ++#define so_fport6 fhost.sin6.sin6_port ++#define so_ffamily fhost.ss.ss_family ++ ++ union slirp_sockaddr lhost; /* Local host */ ++#define so_laddr lhost.sin.sin_addr ++#define so_lport lhost.sin.sin_port ++#define so_laddr6 lhost.sin6.sin6_addr ++#define so_lport6 lhost.sin6.sin6_port ++#define so_lfamily lhost.ss.ss_family ++ ++ uint8_t so_iptos; /* Type of service */ ++ uint8_t so_emu; /* Is the socket emulated? */ ++ ++ uint8_t so_type; /* Type of socket, UDP or TCP */ ++ int32_t so_state; /* internal state flags SS_*, below */ ++ ++ struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ ++ unsigned so_expire; /* When the socket will expire */ ++ ++ int so_queued; /* Number of packets queued from this socket */ ++ int so_nqueued; /* Number of packets queued in a row ++ * Used to determine when to "downgrade" a session ++ * from fastq to batchq */ ++ ++ struct sbuf so_rcv; /* Receive buffer */ ++ struct sbuf so_snd; /* Send buffer */ ++}; ++ ++ ++/* ++ * Socket state bits. (peer means the host on the Internet, ++ * local host means the host on the other end of the modem) ++ */ ++#define SS_NOFDREF 0x001 /* No fd reference */ ++ ++#define SS_ISFCONNECTING \ ++ 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ ++#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ ++#define SS_FCANTRCVMORE \ ++ 0x008 /* Socket can't receive more from peer (for half-closes) */ ++#define SS_FCANTSENDMORE \ ++ 0x010 /* Socket can't send more to peer (for half-closes) */ ++#define SS_FWDRAIN \ ++ 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ ++ ++#define SS_CTL 0x080 ++#define SS_FACCEPTCONN \ ++ 0x100 /* Socket is accepting connections from a host on the internet */ ++#define SS_FACCEPTONCE \ ++ 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ ++ ++#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ ++#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ ++#define SS_INCOMING \ ++ 0x2000 /* Connection was initiated by a host on the internet */ ++ ++static inline int sockaddr_equal(struct sockaddr_storage *a, ++ struct sockaddr_storage *b) ++{ ++ if (a->ss_family != b->ss_family) { ++ return 0; ++ } ++ ++ switch (a->ss_family) { ++ case AF_INET: { ++ struct sockaddr_in *a4 = (struct sockaddr_in *)a; ++ struct sockaddr_in *b4 = (struct sockaddr_in *)b; ++ return a4->sin_addr.s_addr == b4->sin_addr.s_addr && ++ a4->sin_port == b4->sin_port; ++ } ++ case AF_INET6: { ++ struct sockaddr_in6 *a6 = (struct sockaddr_in6 *)a; ++ struct sockaddr_in6 *b6 = (struct sockaddr_in6 *)b; ++ return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) && ++ a6->sin6_port == b6->sin6_port); ++ } ++ default: ++ g_assert_not_reached(); ++ } ++ ++ return 0; ++} ++ ++static inline socklen_t sockaddr_size(struct sockaddr_storage *a) ++{ ++ switch (a->ss_family) { ++ case AF_INET: ++ return sizeof(struct sockaddr_in); ++ case AF_INET6: ++ return sizeof(struct sockaddr_in6); ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++struct socket *solookup(struct socket **, struct socket *, ++ struct sockaddr_storage *, struct sockaddr_storage *); ++struct socket *socreate(Slirp *); ++void sofree(struct socket *); ++int soread(struct socket *); ++int sorecvoob(struct socket *); ++int sosendoob(struct socket *); ++int sowrite(struct socket *); ++void sorecvfrom(struct socket *); ++int sosendto(struct socket *, struct mbuf *); ++struct socket *tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++void soisfconnecting(register struct socket *); ++void soisfconnected(register struct socket *); ++void sofwdrain(struct socket *); ++struct iovec; /* For win32 */ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); ++int soreadbuf(struct socket *so, const char *buf, int size); ++ ++void sotranslate_out(struct socket *, struct sockaddr_storage *); ++void sotranslate_in(struct socket *, struct sockaddr_storage *); ++void sotranslate_accept(struct socket *); ++void sodrop(struct socket *, int num); ++ ++ ++#endif /* SLIRP_SOCKET_H */ +diff --git a/slirp/src/state.c b/slirp/src/state.c +new file mode 100644 +index 0000000..4a9824e +--- /dev/null ++++ b/slirp/src/state.c +@@ -0,0 +1,381 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++#include "vmstate.h" ++#include "stream.h" ++ ++static int slirp_tcp_post_load(void *opaque, int version) ++{ ++ tcp_template((struct tcpcb *)opaque); ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_tcp = { ++ .name = "slirp-tcp", ++ .version_id = 0, ++ .post_load = slirp_tcp_post_load, ++ .fields = (VMStateField[]){ VMSTATE_INT16(t_state, struct tcpcb), ++ VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, ++ TCPT_NTIMERS), ++ VMSTATE_INT16(t_rxtshift, struct tcpcb), ++ VMSTATE_INT16(t_rxtcur, struct tcpcb), ++ VMSTATE_INT16(t_dupacks, struct tcpcb), ++ VMSTATE_UINT16(t_maxseg, struct tcpcb), ++ VMSTATE_UINT8(t_force, struct tcpcb), ++ VMSTATE_UINT16(t_flags, struct tcpcb), ++ VMSTATE_UINT32(snd_una, struct tcpcb), ++ VMSTATE_UINT32(snd_nxt, struct tcpcb), ++ VMSTATE_UINT32(snd_up, struct tcpcb), ++ VMSTATE_UINT32(snd_wl1, struct tcpcb), ++ VMSTATE_UINT32(snd_wl2, struct tcpcb), ++ VMSTATE_UINT32(iss, struct tcpcb), ++ VMSTATE_UINT32(snd_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_nxt, struct tcpcb), ++ VMSTATE_UINT32(rcv_up, struct tcpcb), ++ VMSTATE_UINT32(irs, struct tcpcb), ++ VMSTATE_UINT32(rcv_adv, struct tcpcb), ++ VMSTATE_UINT32(snd_max, struct tcpcb), ++ VMSTATE_UINT32(snd_cwnd, struct tcpcb), ++ VMSTATE_UINT32(snd_ssthresh, struct tcpcb), ++ VMSTATE_INT16(t_idle, struct tcpcb), ++ VMSTATE_INT16(t_rtt, struct tcpcb), ++ VMSTATE_UINT32(t_rtseq, struct tcpcb), ++ VMSTATE_INT16(t_srtt, struct tcpcb), ++ VMSTATE_INT16(t_rttvar, struct tcpcb), ++ VMSTATE_UINT16(t_rttmin, struct tcpcb), ++ VMSTATE_UINT32(max_sndwnd, struct tcpcb), ++ VMSTATE_UINT8(t_oobflags, struct tcpcb), ++ VMSTATE_UINT8(t_iobc, struct tcpcb), ++ VMSTATE_INT16(t_softerror, struct tcpcb), ++ VMSTATE_UINT8(snd_scale, struct tcpcb), ++ VMSTATE_UINT8(rcv_scale, struct tcpcb), ++ VMSTATE_UINT8(request_r_scale, struct tcpcb), ++ VMSTATE_UINT8(requested_s_scale, struct tcpcb), ++ VMSTATE_UINT32(ts_recent, struct tcpcb), ++ VMSTATE_UINT32(ts_recent_age, struct tcpcb), ++ VMSTATE_UINT32(last_ack_sent, struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++/* The sbuf has a pair of pointers that are migrated as offsets; ++ * we calculate the offsets and restore the pointers using ++ * pre_save/post_load on a tmp structure. ++ */ ++struct sbuf_tmp { ++ struct sbuf *parent; ++ uint32_t roff, woff; ++}; ++ ++static int sbuf_tmp_pre_save(void *opaque) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; ++ tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; ++ ++ return 0; ++} ++ ++static int sbuf_tmp_post_load(void *opaque, int version) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ uint32_t requested_len = tmp->parent->sb_datalen; ++ ++ /* Allocate the buffer space used by the field after the tmp */ ++ sbreserve(tmp->parent, tmp->parent->sb_datalen); ++ ++ if (tmp->parent->sb_datalen != requested_len) { ++ return -ENOMEM; ++ } ++ if (tmp->woff >= requested_len || tmp->roff >= requested_len) { ++ g_critical("invalid sbuf offsets r/w=%u/%u len=%u", tmp->roff, ++ tmp->woff, requested_len); ++ return -EINVAL; ++ } ++ ++ tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; ++ tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; ++ ++ return 0; ++} ++ ++ ++static const VMStateDescription vmstate_slirp_sbuf_tmp = { ++ .name = "slirp-sbuf-tmp", ++ .post_load = sbuf_tmp_post_load, ++ .pre_save = sbuf_tmp_pre_save, ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(woff, struct sbuf_tmp), ++ VMSTATE_UINT32(roff, struct sbuf_tmp), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_sbuf = { ++ .name = "slirp-sbuf", ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(sb_cc, struct sbuf), ++ VMSTATE_UINT32(sb_datalen, struct sbuf), ++ VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, ++ vmstate_slirp_sbuf_tmp), ++ VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, ++ NULL, sb_datalen), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static bool slirp_older_than_v4(void *opaque, int version_id) ++{ ++ return version_id < 4; ++} ++ ++static bool slirp_family_inet(void *opaque, int version_id) ++{ ++ union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; ++ return ssa->ss.ss_family == AF_INET; ++} ++ ++static int slirp_socket_pre_load(void *opaque) ++{ ++ struct socket *so = opaque; ++ if (tcp_attach(so) < 0) { ++ return -ENOMEM; ++ } ++ /* Older versions don't load these fields */ ++ so->so_ffamily = AF_INET; ++ so->so_lfamily = AF_INET; ++ return 0; ++} ++ ++#ifndef _WIN32 ++#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) ++#else ++/* Win uses u_long rather than uint32_t - but it's still 32bits long */ ++#define VMSTATE_SIN4_ADDR(f, s, t) \ ++ VMSTATE_SINGLE_TEST(f, s, t, 0, slirp_vmstate_info_uint32, u_long) ++#endif ++ ++/* The OS provided ss_family field isn't that portable; it's size ++ * and type varies (16/8 bit, signed, unsigned) ++ * and the values it contains aren't fully portable. ++ */ ++typedef struct SS_FamilyTmpStruct { ++ union slirp_sockaddr *parent; ++ uint16_t portable_family; ++} SS_FamilyTmpStruct; ++ ++#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ ++#define SS_FAMILY_MIG_IPV6 10 /* Linux */ ++#define SS_FAMILY_MIG_OTHER 0xffff ++ ++static int ss_family_pre_save(void *opaque) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ tss->portable_family = SS_FAMILY_MIG_OTHER; ++ ++ if (tss->parent->ss.ss_family == AF_INET) { ++ tss->portable_family = SS_FAMILY_MIG_IPV4; ++ } else if (tss->parent->ss.ss_family == AF_INET6) { ++ tss->portable_family = SS_FAMILY_MIG_IPV6; ++ } ++ ++ return 0; ++} ++ ++static int ss_family_post_load(void *opaque, int version_id) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ switch (tss->portable_family) { ++ case SS_FAMILY_MIG_IPV4: ++ tss->parent->ss.ss_family = AF_INET; ++ break; ++ case SS_FAMILY_MIG_IPV6: ++ case 23: /* compatibility: AF_INET6 from mingw */ ++ case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ ++ tss->parent->ss.ss_family = AF_INET6; ++ break; ++ default: ++ g_critical("invalid ss_family type %x", tss->portable_family); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_ss_family = { ++ .name = "slirp-socket-addr/ss_family", ++ .pre_save = ss_family_pre_save, ++ .post_load = ss_family_post_load, ++ .fields = ++ (VMStateField[]){ VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket_addr = { ++ .name = "slirp-socket-addr", ++ .version_id = 4, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, ++ vmstate_slirp_ss_family), ++ VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, ++ slirp_family_inet), ++ VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, ++ slirp_family_inet), ++ ++#if 0 ++ /* Untested: Needs checking by someone with IPv6 test */ ++ VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, ++ slirp_family_inet6), ++#endif ++ ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket = { ++ .name = "slirp-socket", ++ .version_id = 4, ++ .pre_load = slirp_socket_pre_load, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_UINT32(so_urgc, struct socket), ++ /* Pre-v4 versions */ ++ VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), ++ /* v4 and newer */ ++ VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ ++ VMSTATE_UINT8(so_iptos, struct socket), ++ VMSTATE_UINT8(so_emu, struct socket), ++ VMSTATE_UINT8(so_type, struct socket), ++ VMSTATE_INT32(so_state, struct socket), ++ VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, ++ struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_bootp_client = { ++ .name = "slirp_bootpclient", ++ .fields = (VMStateField[]){ VMSTATE_UINT16(allocated, BOOTPClient), ++ VMSTATE_BUFFER(macaddr, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp = { ++ .name = "slirp", ++ .version_id = 4, ++ .fields = (VMStateField[]){ VMSTATE_UINT16_V(ip_id, Slirp, 2), ++ VMSTATE_STRUCT_ARRAY( ++ bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, ++ vmstate_slirp_bootp_client, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpOStream f = { ++ .write_cb = write_cb, ++ .opaque = opaque, ++ }; ++ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) ++ if (ex_ptr->write_cb) { ++ struct socket *so; ++ so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, ++ ntohs(ex_ptr->ex_fport)); ++ if (!so) { ++ continue; ++ } ++ ++ slirp_ostream_write_u8(&f, 42); ++ slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); ++ } ++ slirp_ostream_write_u8(&f, 0); ++ ++ slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); ++} ++ ++ ++int slirp_state_load(Slirp *slirp, int version_id, SlirpReadCb read_cb, ++ void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpIStream f = { ++ .read_cb = read_cb, ++ .opaque = opaque, ++ }; ++ ++ while (slirp_istream_read_u8(&f)) { ++ int ret; ++ struct socket *so = socreate(slirp); ++ ++ ret = ++ slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr) { ++ return -EINVAL; ++ } ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->write_cb && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && ++ so->so_fport == ex_ptr->ex_fport) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ return -EINVAL; ++ } ++ } ++ ++ return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); ++} ++ ++int slirp_state_version(void) ++{ ++ return 4; ++} +diff --git a/slirp/src/stream.c b/slirp/src/stream.c +new file mode 100644 +index 0000000..6cf326f +--- /dev/null ++++ b/slirp/src/stream.c +@@ -0,0 +1,120 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp io streams ++ * ++ * Copyright (c) 2018 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "stream.h" ++#include ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) ++{ ++ return f->read_cb(buf, size, f->opaque) == size; ++} ++ ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) ++{ ++ return f->write_cb(buf, size, f->opaque) == size; ++} ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f) ++{ ++ uint8_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return b; ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) ++{ ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f) ++{ ++ uint16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) ++{ ++ b = GUINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f) ++{ ++ uint32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) ++{ ++ b = GUINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f) ++{ ++ int16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) ++{ ++ b = GINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f) ++{ ++ int32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) ++{ ++ b = GINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} +diff --git a/slirp/src/stream.h b/slirp/src/stream.h +new file mode 100644 +index 0000000..08bb5b6 +--- /dev/null ++++ b/slirp/src/stream.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef STREAM_H_ ++#define STREAM_H_ ++ ++#include "libslirp.h" ++ ++typedef struct SlirpIStream { ++ SlirpReadCb read_cb; ++ void *opaque; ++} SlirpIStream; ++ ++typedef struct SlirpOStream { ++ SlirpWriteCb write_cb; ++ void *opaque; ++} SlirpOStream; ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f); ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f); ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f); ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f); ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f); ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); ++ ++#endif /* STREAM_H_ */ +diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h +new file mode 100644 +index 0000000..22625e6 +--- /dev/null ++++ b/slirp/src/tcp.h +@@ -0,0 +1,181 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 ++ * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp ++ */ ++ ++#ifndef TCP_H ++#define TCP_H ++ ++#include ++ ++typedef uint32_t tcp_seq; ++ ++#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ ++#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ ++ ++#define TCP_SNDSPACE 8192 ++#define TCP_RCVSPACE 8192 ++ ++/* ++ * TCP header. ++ * Per RFC 793, September, 1981. ++ */ ++#define tcphdr slirp_tcphdr ++struct tcphdr { ++ uint16_t th_sport; /* source port */ ++ uint16_t th_dport; /* destination port */ ++ tcp_seq th_seq; /* sequence number */ ++ tcp_seq th_ack; /* acknowledgement number */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t th_off : 4, /* data offset */ ++ th_x2 : 4; /* (unused) */ ++#else ++ uint8_t th_x2 : 4, /* (unused) */ ++ th_off : 4; /* data offset */ ++#endif ++ uint8_t th_flags; ++ uint16_t th_win; /* window */ ++ uint16_t th_sum; /* checksum */ ++ uint16_t th_urp; /* urgent pointer */ ++}; ++ ++#include "tcp_var.h" ++ ++#ifndef TH_FIN ++#define TH_FIN 0x01 ++#define TH_SYN 0x02 ++#define TH_RST 0x04 ++#define TH_PUSH 0x08 ++#define TH_ACK 0x10 ++#define TH_URG 0x20 ++#endif ++ ++#ifndef TCPOPT_EOL ++#define TCPOPT_EOL 0 ++#define TCPOPT_NOP 1 ++#define TCPOPT_MAXSEG 2 ++#define TCPOPT_WINDOW 3 ++#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ ++#define TCPOPT_SACK 5 /* Experimental */ ++#define TCPOPT_TIMESTAMP 8 ++ ++#define TCPOPT_TSTAMP_HDR \ ++ (TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | \ ++ TCPOLEN_TIMESTAMP) ++#endif ++ ++#ifndef TCPOLEN_MAXSEG ++#define TCPOLEN_MAXSEG 4 ++#define TCPOLEN_WINDOW 3 ++#define TCPOLEN_SACK_PERMITTED 2 ++#define TCPOLEN_TIMESTAMP 10 ++#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP + 2) /* appendix A */ ++#endif ++ ++/* ++ * Default maximum segment size for TCP. ++ * With an IP MSS of 576, this is 536, ++ * but 512 is probably more convenient. ++ * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)). ++ * ++ * We make this 1460 because we only care about Ethernet in the qemu context. ++ */ ++#undef TCP_MSS ++#define TCP_MSS 1460 ++#undef TCP6_MSS ++#define TCP6_MSS 1440 ++ ++#undef TCP_MAXWIN ++#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ ++ ++#undef TCP_MAX_WINSHIFT ++#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ ++ ++/* ++ * User-settable options (used with setsockopt). ++ * ++ * We don't use the system headers on unix because we have conflicting ++ * local structures. We can't avoid the system definitions on Windows, ++ * so we undefine them. ++ */ ++#undef TCP_NODELAY ++#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ ++#undef TCP_MAXSEG ++ ++/* ++ * TCP FSM state definitions. ++ * Per RFC793, September, 1981. ++ */ ++ ++#define TCP_NSTATES 11 ++ ++#define TCPS_CLOSED 0 /* closed */ ++#define TCPS_LISTEN 1 /* listening for connection */ ++#define TCPS_SYN_SENT 2 /* active, have sent syn */ ++#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ ++/* states < TCPS_ESTABLISHED are those where connections not established */ ++#define TCPS_ESTABLISHED 4 /* established */ ++#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ ++/* states > TCPS_CLOSE_WAIT are those where user has closed */ ++#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ ++#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ ++#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ ++/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ ++#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ ++#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ ++ ++#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) ++#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) ++#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) ++ ++/* ++ * TCP sequence numbers are 32 bit integers operated ++ * on with modular arithmetic. These macros can be ++ * used to compare such integers. ++ */ ++#define SEQ_LT(a, b) ((int)((a) - (b)) < 0) ++#define SEQ_LEQ(a, b) ((int)((a) - (b)) <= 0) ++#define SEQ_GT(a, b) ((int)((a) - (b)) > 0) ++#define SEQ_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Macros to initialize tcp sequence numbers for ++ * send and receive from initial send and receive ++ * sequence numbers. ++ */ ++#define tcp_rcvseqinit(tp) (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 ++ ++#define tcp_sendseqinit(tp) \ ++ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss ++ ++#define TCP_ISSINCR (125 * 1024) /* increment for tcp_iss each second */ ++ ++#endif +diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c +new file mode 100644 +index 0000000..228c2aa +--- /dev/null ++++ b/slirp/src/tcp_input.c +@@ -0,0 +1,1540 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 ++ * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#define TCPREXMTTHRESH 3 ++ ++#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) ++ ++/* for modulo comparisons of timestamps */ ++#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0) ++#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Insert segment ti into reassembly queue of tcp with ++ * control block tp. Return TH_FIN if reassembly now includes ++ * a segment with FIN. The macro form does the common case inline ++ * (segment is the next to be received on an established connection, ++ * and the queue is empty), avoiding linkage into and removal ++ * from the queue and repetition of various conversions. ++ * Set DELACK for segments received in order, but ack immediately ++ * when segments are out of order (so fast retransmit can work). ++ */ ++#define TCP_REASS(tp, ti, m, so, flags) \ ++ { \ ++ if ((ti)->ti_seq == (tp)->rcv_nxt && tcpfrag_list_empty(tp) && \ ++ (tp)->t_state == TCPS_ESTABLISHED) { \ ++ tp->t_flags |= TF_DELACK; \ ++ (tp)->rcv_nxt += (ti)->ti_len; \ ++ flags = (ti)->ti_flags & TH_FIN; \ ++ if (so->so_emu) { \ ++ if (tcp_emu((so), (m))) \ ++ sbappend(so, (m)); \ ++ } else \ ++ sbappend((so), (m)); \ ++ } else { \ ++ (flags) = tcp_reass((tp), (ti), (m)); \ ++ tp->t_flags |= TF_ACKNOW; \ ++ } \ ++ } ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti); ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); ++ ++static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, ++ struct mbuf *m) ++{ ++ register struct tcpiphdr *q; ++ struct socket *so = tp->t_socket; ++ int flags; ++ ++ /* ++ * Call with ti==NULL after become established to ++ * force pre-ESTABLISHED data up to user socket. ++ */ ++ if (ti == NULL) ++ goto present; ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); ++ q = tcpiphdr_next(q)) ++ if (SEQ_GT(q->ti_seq, ti->ti_seq)) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { ++ register int i; ++ q = tcpiphdr_prev(q); ++ /* conversion to int (in i) handles seq wraparound */ ++ i = q->ti_seq + q->ti_len - ti->ti_seq; ++ if (i > 0) { ++ if (i >= ti->ti_len) { ++ m_free(m); ++ /* ++ * Try to present any queued data ++ * at the left window edge to the user. ++ * This is needed after the 3-WHS ++ * completes. ++ */ ++ goto present; /* ??? */ ++ } ++ m_adj(m, i); ++ ti->ti_len -= i; ++ ti->ti_seq += i; ++ } ++ q = tcpiphdr_next(q); ++ } ++ ti->ti_mbuf = m; ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (!tcpfrag_list_end(q, tp)) { ++ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; ++ if (i <= 0) ++ break; ++ if (i < q->ti_len) { ++ q->ti_seq += i; ++ q->ti_len -= i; ++ m_adj(q->ti_mbuf, i); ++ break; ++ } ++ q = tcpiphdr_next(q); ++ m = tcpiphdr_prev(q)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(q))); ++ m_free(m); ++ } ++ ++ /* ++ * Stick new segment in its place. ++ */ ++ insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); ++ ++present: ++ /* ++ * Present data to user, advancing rcv_nxt through ++ * completed sequence space. ++ */ ++ if (!TCPS_HAVEESTABLISHED(tp->t_state)) ++ return (0); ++ ti = tcpfrag_list_first(tp); ++ if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) ++ return (0); ++ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) ++ return (0); ++ do { ++ tp->rcv_nxt += ti->ti_len; ++ flags = ti->ti_flags & TH_FIN; ++ remque(tcpiphdr2qlink(ti)); ++ m = ti->ti_mbuf; ++ ti = tcpiphdr_next(ti); ++ if (so->so_state & SS_FCANTSENDMORE) ++ m_free(m); ++ else { ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ } ++ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); ++ return (flags); ++} ++ ++/* ++ * TCP input routine, follows pages 65-76 of the ++ * protocol specification dated September, 1981 very closely. ++ */ ++void tcp_input(struct mbuf *m, int iphlen, struct socket *inso, ++ unsigned short af) ++{ ++ struct ip save_ip, *ip; ++ struct ip6 save_ip6, *ip6; ++ register struct tcpiphdr *ti; ++ char *optp = NULL; ++ int optlen = 0; ++ int len, tlen, off; ++ register struct tcpcb *tp = NULL; ++ register int tiflags; ++ struct socket *so = NULL; ++ int todrop, acked, ourfinisacked, needoutput = 0; ++ int iss = 0; ++ uint32_t tiwin; ++ int ret; ++ struct sockaddr_storage lhost, fhost; ++ struct sockaddr_in *lhost4, *fhost4; ++ struct sockaddr_in6 *lhost6, *fhost6; ++ struct gfwd_list *ex_ptr; ++ Slirp *slirp; ++ ++ DEBUG_CALL("tcp_input"); ++ DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso); ++ ++ /* ++ * If called with m == 0, then we're continuing the connect ++ */ ++ if (m == NULL) { ++ so = inso; ++ slirp = so->slirp; ++ ++ /* Re-set a few variables */ ++ tp = sototcpcb(so); ++ m = so->so_m; ++ so->so_m = NULL; ++ ti = so->so_ti; ++ tiwin = ti->ti_win; ++ tiflags = ti->ti_flags; ++ ++ goto cont_conn; ++ } ++ slirp = m->slirp; ++ ++ ip = mtod(m, struct ip *); ++ ip6 = mtod(m, struct ip6 *); ++ ++ switch (af) { ++ case AF_INET: ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ /* XXX Check if too short */ ++ ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; ++ ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ /* ++ * Checksum extended TCP header and data. ++ */ ++ tlen = ip->ip_len; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src = save_ip.ip_src; ++ ti->ti_dst = save_ip.ip_dst; ++ ti->ti_pr = save_ip.ip_p; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ case AF_INET6: ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip6 = *ip6; ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ tlen = ip6->ip_pl; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src6 = save_ip6.ip_src; ++ ti->ti_dst6 = save_ip6.ip_dst; ++ ti->ti_nh6 = save_ip6.ip_nh; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); ++ if (cksum(m, len)) { ++ goto drop; ++ } ++ ++ /* ++ * Check that TCP offset makes sense, ++ * pull out TCP options and adjust length. XXX ++ */ ++ off = ti->ti_off << 2; ++ if (off < sizeof(struct tcphdr) || off > tlen) { ++ goto drop; ++ } ++ tlen -= off; ++ ti->ti_len = tlen; ++ if (off > sizeof(struct tcphdr)) { ++ optlen = off - sizeof(struct tcphdr); ++ optp = mtod(m, char *) + sizeof(struct tcpiphdr); ++ } ++ tiflags = ti->ti_flags; ++ ++ /* ++ * Convert TCP protocol specific fields to host format. ++ */ ++ NTOHL(ti->ti_seq); ++ NTOHL(ti->ti_ack); ++ NTOHS(ti->ti_win); ++ NTOHS(ti->ti_urp); ++ ++ /* ++ * Drop TCP, IP headers and TCP options. ++ */ ++ m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ ++ /* ++ * Locate pcb for segment. ++ */ ++findso: ++ lhost.ss_family = af; ++ fhost.ss_family = af; ++ switch (af) { ++ case AF_INET: ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ti->ti_src; ++ lhost4->sin_port = ti->ti_sport; ++ fhost4 = (struct sockaddr_in *)&fhost; ++ fhost4->sin_addr = ti->ti_dst; ++ fhost4->sin_port = ti->ti_dport; ++ break; ++ case AF_INET6: ++ lhost6 = (struct sockaddr_in6 *)&lhost; ++ lhost6->sin6_addr = ti->ti_src6; ++ lhost6->sin6_port = ti->ti_sport; ++ fhost6 = (struct sockaddr_in6 *)&fhost; ++ fhost6->sin6_addr = ti->ti_dst6; ++ fhost6->sin6_port = ti->ti_dport; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); ++ ++ /* ++ * If the state is CLOSED (i.e., TCB does not exist) then ++ * all data in the incoming segment is discarded. ++ * If the TCB exists but is in CLOSED state, it is embryonic, ++ * but should either do a listen or a connect soon. ++ * ++ * state == CLOSED means we've done socreate() but haven't ++ * attached it to a protocol yet... ++ * ++ * XXX If a TCB does not exist, and the TH_SYN flag is ++ * the only flag set, then create a session, mark it ++ * as if it was LISTENING, and continue... ++ */ ++ if (so == NULL) { ++ /* TODO: IPv6 */ ++ if (slirp->restricted) { ++ /* Any hostfwds will have an existing socket, so we only get here ++ * for non-hostfwd connections. These should be dropped, unless it ++ * happens to be a guestfwd. ++ */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == ti->ti_dport && ++ ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ goto dropwithreset; ++ } ++ } ++ ++ if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN) ++ goto dropwithreset; ++ ++ so = socreate(slirp); ++ if (tcp_attach(so) < 0) { ++ g_free(so); /* Not sofree (if it failed, it's not insqued) */ ++ goto dropwithreset; ++ } ++ ++ sbreserve(&so->so_snd, TCP_SNDSPACE); ++ sbreserve(&so->so_rcv, TCP_RCVSPACE); ++ ++ so->lhost.ss = lhost; ++ so->fhost.ss = fhost; ++ ++ so->so_iptos = tcp_tos(so); ++ if (so->so_iptos == 0) { ++ switch (af) { ++ case AF_INET: ++ so->so_iptos = ((struct ip *)ti)->ip_tos; ++ break; ++ case AF_INET6: ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ ++ tp = sototcpcb(so); ++ tp->t_state = TCPS_LISTEN; ++ } ++ ++ /* ++ * If this is a still-connecting socket, this probably ++ * a retransmit of the SYN. Whether it's a retransmit SYN ++ * or something else, we nuke it. ++ */ ++ if (so->so_state & SS_ISFCONNECTING) ++ goto drop; ++ ++ tp = sototcpcb(so); ++ ++ /* XXX Should never fail */ ++ if (tp == NULL) ++ goto dropwithreset; ++ if (tp->t_state == TCPS_CLOSED) ++ goto drop; ++ ++ tiwin = ti->ti_win; ++ ++ /* ++ * Segment received on connection. ++ * Reset idle time and keep-alive timer. ++ */ ++ tp->t_idle = 0; ++ if (slirp_do_keepalive) ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ ++ /* ++ * Process options if not in LISTEN state, ++ * else do it below (after getting remote address). ++ */ ++ if (optp && tp->t_state != TCPS_LISTEN) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ /* ++ * Header prediction: check for the two common cases ++ * of a uni-directional data xfer. If the packet has ++ * no control flags, is in-sequence, the window didn't ++ * change and we're not retransmitting, it's a ++ * candidate. If the length is zero and the ack moved ++ * forward, we're the sender side of the xfer. Just ++ * free the data acked & wake any higher level process ++ * that was blocked waiting for space. If the length ++ * is non-zero and the ack didn't move, we're the ++ * receiver side. If we're getting packets in-order ++ * (the reassembly queue is empty), add the data to ++ * the socket buffer and note that we need a delayed ack. ++ * ++ * XXX Some of these tests are not needed ++ * eg: the tiwin == tp->snd_wnd prevents many more ++ * predictions.. with no *real* advantage.. ++ */ ++ if (tp->t_state == TCPS_ESTABLISHED && ++ (tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK && ++ ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && ++ tp->snd_nxt == tp->snd_max) { ++ if (ti->ti_len == 0) { ++ if (SEQ_GT(ti->ti_ack, tp->snd_una) && ++ SEQ_LEQ(ti->ti_ack, tp->snd_max) && ++ tp->snd_cwnd >= tp->snd_wnd) { ++ /* ++ * this is a pure ack for outstanding data. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ acked = ti->ti_ack - tp->snd_una; ++ sodrop(so, acked); ++ tp->snd_una = ti->ti_ack; ++ m_free(m); ++ ++ /* ++ * If all outstanding data are acked, stop ++ * retransmit timer, otherwise restart timer ++ * using current (possibly backed-off) value. ++ * If process is waiting for space, ++ * wakeup/selwakeup/signal. If data ++ * are ready to send, let tcp_output ++ * decide between more output or persist. ++ */ ++ if (tp->snd_una == tp->snd_max) ++ tp->t_timer[TCPT_REXMT] = 0; ++ else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ ++ /* ++ * This is called because sowwakeup might have ++ * put data into so_snd. Since we don't so sowwakeup, ++ * we don't need this.. XXX??? ++ */ ++ if (so->so_snd.sb_cc) ++ (void)tcp_output(tp); ++ ++ return; ++ } ++ } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && ++ ti->ti_len <= sbspace(&so->so_rcv)) { ++ /* ++ * this is a pure, in-sequence data packet ++ * with nothing on the reassembly queue and ++ * we have enough buffer space to take it. ++ */ ++ tp->rcv_nxt += ti->ti_len; ++ /* ++ * Add data to socket buffer. ++ */ ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ ++ /* ++ * If this is a short packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ * ++ * It is better to not delay acks at all to maximize ++ * TCP throughput. See RFC 2581. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ } ++ } /* header prediction */ ++ /* ++ * Calculate amount of space in receive window, ++ * and then do TCP input processing. ++ * Receive window is amount of space in rcv queue, ++ * but not less than advertised window. ++ */ ++ { ++ int win; ++ win = sbspace(&so->so_rcv); ++ if (win < 0) ++ win = 0; ++ tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); ++ } ++ ++ switch (tp->t_state) { ++ /* ++ * If the state is LISTEN then ignore segment if it contains an RST. ++ * If the segment contains an ACK then it is bad and send a RST. ++ * If it does not contain a SYN then it is not interesting; drop it. ++ * Don't bother responding if the destination was a broadcast. ++ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial ++ * tp->iss, and send a segment: ++ * ++ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. ++ * Fill in remote peer address fields if not previously specified. ++ * Enter SYN_RECEIVED state, and process any other fields of this ++ * segment in this state. ++ */ ++ case TCPS_LISTEN: { ++ if (tiflags & TH_RST) ++ goto drop; ++ if (tiflags & TH_ACK) ++ goto dropwithreset; ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ ++ /* ++ * This has way too many gotos... ++ * But a bit of spaghetti code never hurt anybody :) ++ */ ++ ++ /* ++ * If this is destined for the control address, then flag to ++ * tcp_ctl once connected, otherwise connect ++ */ ++ /* TODO: IPv6 */ ++ if (af == AF_INET && ++ (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && ++ so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { ++ /* May be an add exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ so->so_state |= SS_CTL; ++ break; ++ } ++ } ++ if (so->so_state & SS_CTL) { ++ goto cont_input; ++ } ++ } ++ /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ ++ } ++ ++ if (so->so_emu & EMU_NOCONNECT) { ++ so->so_emu &= ~EMU_NOCONNECT; ++ goto cont_input; ++ } ++ ++ if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) && ++ (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { ++ uint8_t code; ++ DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); ++ if (errno == ECONNREFUSED) { ++ /* ACK the SYN, send RST to refuse the connection */ ++ tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } else { ++ switch (af) { ++ case AF_INET: ++ code = ICMP_UNREACH_NET; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_NO_ROUTE; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ HTONL(ti->ti_seq); /* restore tcp header */ ++ HTONL(ti->ti_ack); ++ HTONS(ti->ti_win); ++ HTONS(ti->ti_urp); ++ m->m_data -= ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ switch (af) { ++ case AF_INET: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ *ip = save_ip; ++ icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); ++ break; ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ *ip6 = save_ip6; ++ icmp6_send_error(m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ tcp_close(tp); ++ m_free(m); ++ } else { ++ /* ++ * Haven't connected yet, save the current mbuf ++ * and ti, and return ++ * XXX Some OS's don't tell us whether the connect() ++ * succeeded or not. So we must time it out. ++ */ ++ so->so_m = m; ++ so->so_ti = ti; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ /* ++ * Initialize receive sequence numbers now so that we can send a ++ * valid RST if the remote end rejects our connection. ++ */ ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tcp_template(tp); ++ } ++ return; ++ ++ cont_conn: ++ /* m==NULL ++ * Check if the connect succeeded ++ */ ++ if (so->so_state & SS_NOFDREF) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ cont_input: ++ tcp_template(tp); ++ ++ if (optp) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ if (iss) ++ tp->iss = iss; ++ else ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tp->irs = ti->ti_seq; ++ tcp_sendseqinit(tp); ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ goto trimthenstep6; ++ } /* case TCPS_LISTEN */ ++ ++ /* ++ * If the state is SYN_SENT: ++ * if seg contains an ACK, but not for our SYN, drop the input. ++ * if seg contains a RST, then drop the connection. ++ * if seg does not contain SYN, then drop it. ++ * Otherwise this is an acceptable SYN segment ++ * initialize tp->rcv_nxt and tp->irs ++ * if seg contains ack then advance tp->snd_una ++ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state ++ * arrange for segment to be acked (eventually) ++ * continue processing rest of data/controls, beginning with URG ++ */ ++ case TCPS_SYN_SENT: ++ if ((tiflags & TH_ACK) && ++ (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) ++ goto dropwithreset; ++ ++ if (tiflags & TH_RST) { ++ if (tiflags & TH_ACK) { ++ tcp_drop(tp, 0); /* XXX Check t_softerror! */ ++ } ++ goto drop; ++ } ++ ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ if (tiflags & TH_ACK) { ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ } ++ ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { ++ soisfconnected(so); ++ tp->t_state = TCPS_ESTABLISHED; ++ ++ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ /* ++ * if we didn't have to retransmit the SYN, ++ * use its rtt as our initial srtt & rtt var. ++ */ ++ if (tp->t_rtt) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ } else ++ tp->t_state = TCPS_SYN_RECEIVED; ++ ++ trimthenstep6: ++ /* ++ * Advance ti->ti_seq to correspond to first data byte. ++ * If data, trim to stay within window, ++ * dropping FIN if necessary. ++ */ ++ ti->ti_seq++; ++ if (ti->ti_len > tp->rcv_wnd) { ++ todrop = ti->ti_len - tp->rcv_wnd; ++ m_adj(m, -todrop); ++ ti->ti_len = tp->rcv_wnd; ++ tiflags &= ~TH_FIN; ++ } ++ tp->snd_wl1 = ti->ti_seq - 1; ++ tp->rcv_up = ti->ti_seq; ++ goto step6; ++ } /* switch tp->t_state */ ++ /* ++ * States other than LISTEN or SYN_SENT. ++ * Check that at least some bytes of segment are within ++ * receive window. If segment begins before rcv_nxt, ++ * drop leading data (and SYN); if nothing left, just ack. ++ */ ++ todrop = tp->rcv_nxt - ti->ti_seq; ++ if (todrop > 0) { ++ if (tiflags & TH_SYN) { ++ tiflags &= ~TH_SYN; ++ ti->ti_seq++; ++ if (ti->ti_urp > 1) ++ ti->ti_urp--; ++ else ++ tiflags &= ~TH_URG; ++ todrop--; ++ } ++ /* ++ * Following if statement from Stevens, vol. 2, p. 960. ++ */ ++ if (todrop > ti->ti_len || ++ (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { ++ /* ++ * Any valid FIN must be to the left of the window. ++ * At this point the FIN must be a duplicate or out ++ * of sequence; drop it. ++ */ ++ tiflags &= ~TH_FIN; ++ ++ /* ++ * Send an ACK to resynchronize and drop any data. ++ * But keep on processing for RST or ACK. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ todrop = ti->ti_len; ++ } ++ m_adj(m, todrop); ++ ti->ti_seq += todrop; ++ ti->ti_len -= todrop; ++ if (ti->ti_urp > todrop) ++ ti->ti_urp -= todrop; ++ else { ++ tiflags &= ~TH_URG; ++ ti->ti_urp = 0; ++ } ++ } ++ /* ++ * If new data are received on a connection after the ++ * user processes are gone, then RST the other end. ++ */ ++ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && ++ ti->ti_len) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If segment ends after window, drop trailing data ++ * (and PUSH and FIN); if nothing left, just ACK. ++ */ ++ todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd); ++ if (todrop > 0) { ++ if (todrop >= ti->ti_len) { ++ /* ++ * If a new connection request is received ++ * while in TIME_WAIT, drop the old connection ++ * and start over if the sequence numbers ++ * are above the previous ones. ++ */ ++ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && ++ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { ++ iss = tp->rcv_nxt + TCP_ISSINCR; ++ tp = tcp_close(tp); ++ goto findso; ++ } ++ /* ++ * If window is closed can only take segments at ++ * window edge, and have to drop data and PUSH from ++ * incoming segments. Continue processing, but ++ * remember to ack. Otherwise, drop segment ++ * and ack. ++ */ ++ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { ++ tp->t_flags |= TF_ACKNOW; ++ } else { ++ goto dropafterack; ++ } ++ } ++ m_adj(m, -todrop); ++ ti->ti_len -= todrop; ++ tiflags &= ~(TH_PUSH | TH_FIN); ++ } ++ ++ /* ++ * If the RST bit is set examine the state: ++ * SYN_RECEIVED STATE: ++ * If passive open, return to LISTEN state. ++ * If active open, inform user that connection was refused. ++ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: ++ * Inform user that connection was reset, and close tcb. ++ * CLOSING, LAST_ACK, TIME_WAIT STATES ++ * Close the tcb. ++ */ ++ if (tiflags & TH_RST) ++ switch (tp->t_state) { ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ goto drop; ++ ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ tcp_close(tp); ++ goto drop; ++ } ++ ++ /* ++ * If a SYN is in the window, then this is an ++ * error and we send an RST and drop the connection. ++ */ ++ if (tiflags & TH_SYN) { ++ tp = tcp_drop(tp, 0); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If the ACK bit is off we drop the segment and return. ++ */ ++ if ((tiflags & TH_ACK) == 0) ++ goto drop; ++ ++ /* ++ * Ack processing. ++ */ ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED state if the ack ACKs our SYN then enter ++ * ESTABLISHED state and continue processing, otherwise ++ * send an RST. una<=ack<=max ++ */ ++ case TCPS_SYN_RECEIVED: ++ ++ if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) ++ goto dropwithreset; ++ tp->t_state = TCPS_ESTABLISHED; ++ /* ++ * The sent SYN is ack'ed with our sequence number +1 ++ * The first data byte already in the buffer will get ++ * lost if no correction is made. This is only needed for ++ * SS_CTL since the buffer is empty otherwise. ++ * tp->snd_una++; or: ++ */ ++ tp->snd_una = ti->ti_ack; ++ if (so->so_state & SS_CTL) { ++ /* So tcp_ctl reports the right state */ ++ ret = tcp_ctl(so); ++ if (ret == 1) { ++ soisfconnected(so); ++ so->so_state &= ~SS_CTL; /* success XXX */ ++ } else if (ret == 2) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* CTL_CMD */ ++ } else { ++ needoutput = 1; ++ tp->t_state = TCPS_FIN_WAIT_1; ++ } ++ } else { ++ soisfconnected(so); ++ } ++ ++ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ tp->snd_wl1 = ti->ti_seq - 1; ++ /* Avoid ack processing; snd_una==ti_ack => dup ack */ ++ goto synrx_to_est; ++ /* fall into ... */ ++ ++ /* ++ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range ++ * ACKs. If the ack is in the range ++ * tp->snd_una < ti->ti_ack <= tp->snd_max ++ * then advance tp->snd_una to ti->ti_ack and drop ++ * data from the retransmission queue. If this ACK reflects ++ * more up to date window information we update our window information. ++ */ ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ ++ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { ++ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { ++ DEBUG_MISC(" dup ack m = %p so = %p", m, so); ++ /* ++ * If we have outstanding data (other than ++ * a window probe), this is a completely ++ * duplicate ack (ie, window info didn't ++ * change), the ack is the biggest we've ++ * seen and we've seen exactly our rexmt ++ * threshold of them, assume a packet ++ * has been dropped and retransmit it. ++ * Kludge snd_nxt & the congestion ++ * window so we send only this one ++ * packet. ++ * ++ * We know we're losing at the current ++ * window size so do congestion avoidance ++ * (set ssthresh to half the current window ++ * and pull our congestion window back to ++ * the new ssthresh). ++ * ++ * Dup acks mean that packets have left the ++ * network (they're now cached at the receiver) ++ * so bump cwnd by the amount in the receiver ++ * to keep a constant cwnd packets in the ++ * network. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) ++ tp->t_dupacks = 0; ++ else if (++tp->t_dupacks == TCPREXMTTHRESH) { ++ tcp_seq onxt = tp->snd_nxt; ++ unsigned win = ++ MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ ++ if (win < 2) ++ win = 2; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->t_rtt = 0; ++ tp->snd_nxt = ti->ti_ack; ++ tp->snd_cwnd = tp->t_maxseg; ++ (void)tcp_output(tp); ++ tp->snd_cwnd = ++ tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; ++ if (SEQ_GT(onxt, tp->snd_nxt)) ++ tp->snd_nxt = onxt; ++ goto drop; ++ } else if (tp->t_dupacks > TCPREXMTTHRESH) { ++ tp->snd_cwnd += tp->t_maxseg; ++ (void)tcp_output(tp); ++ goto drop; ++ } ++ } else ++ tp->t_dupacks = 0; ++ break; ++ } ++ synrx_to_est: ++ /* ++ * If the congestion window was inflated to account ++ * for the other side's cached packets, retract it. ++ */ ++ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) ++ tp->snd_cwnd = tp->snd_ssthresh; ++ tp->t_dupacks = 0; ++ if (SEQ_GT(ti->ti_ack, tp->snd_max)) { ++ goto dropafterack; ++ } ++ acked = ti->ti_ack - tp->snd_una; ++ ++ /* ++ * If transmit timer is running and timed sequence ++ * number was acked, update smoothed round trip time. ++ * Since we now have an rtt measurement, cancel the ++ * timer backoff (cf., Phil Karn's retransmit alg.). ++ * Recompute the initial retransmit timer. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ ++ /* ++ * If all outstanding data is acked, stop retransmit ++ * timer and remember to restart (more output or persist). ++ * If there is more data to be acked, restart retransmit ++ * timer, using current (possibly backed-off) value. ++ */ ++ if (ti->ti_ack == tp->snd_max) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ needoutput = 1; ++ } else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * When new data is acked, open the congestion window. ++ * If the window gives us less than ssthresh packets ++ * in flight, open exponentially (maxseg per packet). ++ * Otherwise open linearly: maxseg per window ++ * (maxseg^2 / cwnd per packet). ++ */ ++ { ++ register unsigned cw = tp->snd_cwnd; ++ register unsigned incr = tp->t_maxseg; ++ ++ if (cw > tp->snd_ssthresh) ++ incr = incr * incr / cw; ++ tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); ++ } ++ if (acked > so->so_snd.sb_cc) { ++ tp->snd_wnd -= so->so_snd.sb_cc; ++ sodrop(so, (int)so->so_snd.sb_cc); ++ ourfinisacked = 1; ++ } else { ++ sodrop(so, acked); ++ tp->snd_wnd -= acked; ++ ourfinisacked = 0; ++ } ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ ++ switch (tp->t_state) { ++ /* ++ * In FIN_WAIT_1 STATE in addition to the processing ++ * for the ESTABLISHED state if our FIN is now acknowledged ++ * then enter FIN_WAIT_2. ++ */ ++ case TCPS_FIN_WAIT_1: ++ if (ourfinisacked) { ++ /* ++ * If we can't receive any more ++ * data, then closing user can proceed. ++ * Starting the timer is contrary to the ++ * specification, but if we don't get a FIN ++ * we'll hang forever. ++ */ ++ if (so->so_state & SS_FCANTRCVMORE) { ++ tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; ++ } ++ tp->t_state = TCPS_FIN_WAIT_2; ++ } ++ break; ++ ++ /* ++ * In CLOSING STATE in addition to the processing for ++ * the ESTABLISHED state if the ACK acknowledges our FIN ++ * then enter the TIME-WAIT state, otherwise ignore ++ * the segment. ++ */ ++ case TCPS_CLOSING: ++ if (ourfinisacked) { ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ } ++ break; ++ ++ /* ++ * In LAST_ACK, we may still be waiting for data to drain ++ * and/or to be acked, as well as for the ack of our FIN. ++ * If our FIN is now acknowledged, delete the TCB, ++ * enter the closed state and return. ++ */ ++ case TCPS_LAST_ACK: ++ if (ourfinisacked) { ++ tcp_close(tp); ++ goto drop; ++ } ++ break; ++ ++ /* ++ * In TIME_WAIT state the only thing that should arrive ++ * is a retransmission of the remote FIN. Acknowledge ++ * it and restart the finack timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ goto dropafterack; ++ } ++ } /* switch(tp->t_state) */ ++ ++step6: ++ /* ++ * Update window information. ++ * Don't look at window if no ACK: TAC's send garbage on first SYN. ++ */ ++ if ((tiflags & TH_ACK) && ++ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || ++ (tp->snd_wl1 == ti->ti_seq && ++ (SEQ_LT(tp->snd_wl2, ti->ti_ack) || ++ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { ++ tp->snd_wnd = tiwin; ++ tp->snd_wl1 = ti->ti_seq; ++ tp->snd_wl2 = ti->ti_ack; ++ if (tp->snd_wnd > tp->max_sndwnd) ++ tp->max_sndwnd = tp->snd_wnd; ++ needoutput = 1; ++ } ++ ++ /* ++ * Process segments with URG. ++ */ ++ if ((tiflags & TH_URG) && ti->ti_urp && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * This is a kludge, but if we receive and accept ++ * random urgent pointers, we'll crash in ++ * soreceive. It's hard to imagine someone ++ * actually wanting to send this much urgent data. ++ */ ++ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { ++ ti->ti_urp = 0; ++ tiflags &= ~TH_URG; ++ goto dodata; ++ } ++ /* ++ * If this segment advances the known urgent pointer, ++ * then mark the data stream. This should not happen ++ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since ++ * a FIN has been received from the remote side. ++ * In these states we ignore the URG. ++ * ++ * According to RFC961 (Assigned Protocols), ++ * the urgent pointer points to the last octet ++ * of urgent data. We continue, however, ++ * to consider it to indicate the first octet ++ * of data past the urgent section as the original ++ * spec states (in one of two places). ++ */ ++ if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) { ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ so->so_urgc = ++ so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ } ++ } else ++ /* ++ * If no out of band data is expected, ++ * pull receive urgent pointer along ++ * with the receive window. ++ */ ++ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) ++ tp->rcv_up = tp->rcv_nxt; ++dodata: ++ ++ /* ++ * If this is a small packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ */ ++ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && ++ ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { ++ tp->t_flags |= TF_ACKNOW; ++ } ++ ++ /* ++ * Process the segment text, merging it into the TCP sequencing queue, ++ * and arranging for acknowledgment of receipt if necessary. ++ * This process logically involves adjusting tp->rcv_wnd as data ++ * is presented to the user (this happens in tcp_usrreq.c, ++ * case PRU_RCVD). If a FIN has already been received on this ++ * connection then we just ignore the text. ++ */ ++ if ((ti->ti_len || (tiflags & TH_FIN)) && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ TCP_REASS(tp, ti, m, so, tiflags); ++ } else { ++ m_free(m); ++ tiflags &= ~TH_FIN; ++ } ++ ++ /* ++ * If FIN is received ACK the FIN and let the user know ++ * that the connection is closing. ++ */ ++ if (tiflags & TH_FIN) { ++ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * If we receive a FIN we can't send more data, ++ * set it SS_FDRAIN ++ * Shutdown the socket if there is no rx data in the ++ * buffer. ++ * soread() is called on completion of shutdown() and ++ * will got to TCPS_LAST_ACK, and use tcp_output() ++ * to send the FIN. ++ */ ++ sofwdrain(so); ++ ++ tp->t_flags |= TF_ACKNOW; ++ tp->rcv_nxt++; ++ } ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED and ESTABLISHED STATES ++ * enter the CLOSE_WAIT state. ++ */ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ if (so->so_emu == EMU_CTL) /* no shutdown on socket */ ++ tp->t_state = TCPS_LAST_ACK; ++ else ++ tp->t_state = TCPS_CLOSE_WAIT; ++ break; ++ ++ /* ++ * If still in FIN_WAIT_1 STATE FIN has not been acked so ++ * enter the CLOSING state. ++ */ ++ case TCPS_FIN_WAIT_1: ++ tp->t_state = TCPS_CLOSING; ++ break; ++ ++ /* ++ * In FIN_WAIT_2 state enter the TIME_WAIT state, ++ * starting the time-wait timer, turning off the other ++ * standard timers. ++ */ ++ case TCPS_FIN_WAIT_2: ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ ++ /* ++ * In TIME_WAIT state restart the 2 MSL time_wait timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ } ++ } ++ ++ /* ++ * Return any desired output. ++ */ ++ if (needoutput || (tp->t_flags & TF_ACKNOW)) { ++ (void)tcp_output(tp); ++ } ++ return; ++ ++dropafterack: ++ /* ++ * Generate an ACK dropping incoming segment if it occupies ++ * sequence space, where the ACK reflects our state. ++ */ ++ if (tiflags & TH_RST) ++ goto drop; ++ m_free(m); ++ tp->t_flags |= TF_ACKNOW; ++ (void)tcp_output(tp); ++ return; ++ ++dropwithreset: ++ /* reuses m if m!=NULL, m_free() unnecessary */ ++ if (tiflags & TH_ACK) ++ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); ++ else { ++ if (tiflags & TH_SYN) ++ ti->ti_len++; ++ tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } ++ ++ return; ++ ++drop: ++ /* ++ * Drop space held by incoming segment and return. ++ */ ++ m_free(m); ++} ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti) ++{ ++ uint16_t mss; ++ int opt, optlen; ++ ++ DEBUG_CALL("tcp_dooptions"); ++ DEBUG_ARG("tp = %p cnt=%i", tp, cnt); ++ ++ for (; cnt > 0; cnt -= optlen, cp += optlen) { ++ opt = cp[0]; ++ if (opt == TCPOPT_EOL) ++ break; ++ if (opt == TCPOPT_NOP) ++ optlen = 1; ++ else { ++ optlen = cp[1]; ++ if (optlen <= 0) ++ break; ++ } ++ switch (opt) { ++ default: ++ continue; ++ ++ case TCPOPT_MAXSEG: ++ if (optlen != TCPOLEN_MAXSEG) ++ continue; ++ if (!(ti->ti_flags & TH_SYN)) ++ continue; ++ memcpy((char *)&mss, (char *)cp + 2, sizeof(mss)); ++ NTOHS(mss); ++ (void)tcp_mss(tp, mss); /* sets t_maxseg */ ++ break; ++ } ++ } ++} ++ ++/* ++ * Collect new round-trip time estimate ++ * and update averages and current timeout. ++ */ ++ ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt) ++{ ++ register short delta; ++ ++ DEBUG_CALL("tcp_xmit_timer"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("rtt = %d", rtt); ++ ++ if (tp->t_srtt != 0) { ++ /* ++ * srtt is stored as fixed point with 3 bits after the ++ * binary point (i.e., scaled by 8). The following magic ++ * is equivalent to the smoothing algorithm in rfc793 with ++ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed ++ * point). Adjust rtt to origin 0. ++ */ ++ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); ++ if ((tp->t_srtt += delta) <= 0) ++ tp->t_srtt = 1; ++ /* ++ * We accumulate a smoothed rtt variance (actually, a ++ * smoothed mean difference), then set the retransmit ++ * timer to smoothed rtt + 4 times the smoothed variance. ++ * rttvar is stored as fixed point with 2 bits after the ++ * binary point (scaled by 4). The following is ++ * equivalent to rfc793 smoothing with an alpha of .75 ++ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces ++ * rfc793's wired-in beta. ++ */ ++ if (delta < 0) ++ delta = -delta; ++ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); ++ if ((tp->t_rttvar += delta) <= 0) ++ tp->t_rttvar = 1; ++ } else { ++ /* ++ * No rtt measurement yet - use the unsmoothed rtt. ++ * Set the variance to half the rtt (so our first ++ * retransmit happens at 3*rtt). ++ */ ++ tp->t_srtt = rtt << TCP_RTT_SHIFT; ++ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); ++ } ++ tp->t_rtt = 0; ++ tp->t_rxtshift = 0; ++ ++ /* ++ * the retransmit should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ */ ++ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ ++ /* ++ * We received an ack for a packet that wasn't retransmitted; ++ * it is probably safe to discard any error indications we've ++ * received recently. This isn't quite right, but close enough ++ * for now (a route might have failed after we sent a segment, ++ * and the return path might not be symmetrical). ++ */ ++ tp->t_softerror = 0; ++} ++ ++/* ++ * Determine a reasonable value for maxseg size. ++ * If the route is known, check route for mtu. ++ * If none, use an mss that can be handled on the outgoing ++ * interface without forcing IP to fragment; if bigger than ++ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES ++ * to utilize large mbufs. If no route is found, route has no mtu, ++ * or the destination isn't local, use a default, hopefully conservative ++ * size (usually 512 or the default IP max size, but no more than the mtu ++ * of the interface), as we can't discover anything about intervening ++ * gateways or networks. We also initialize the congestion/slow start ++ * window to be a single segment if the destination isn't local. ++ * While looking at the routing entry, we also initialize other path-dependent ++ * parameters from pre-set or cached values in the routing entry. ++ */ ++ ++int tcp_mss(struct tcpcb *tp, unsigned offer) ++{ ++ struct socket *so = tp->t_socket; ++ int mss; ++ ++ DEBUG_CALL("tcp_mss"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("offer = %d", offer); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) - sizeof(struct ip); ++ break; ++ case AF_INET6: ++ mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) - sizeof(struct ip6); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (offer) ++ mss = MIN(mss, offer); ++ mss = MAX(mss, 32); ++ if (mss < tp->t_maxseg || offer != 0) ++ tp->t_maxseg = mss; ++ ++ tp->snd_cwnd = mss; ++ ++ sbreserve(&so->so_snd, ++ TCP_SNDSPACE + ++ ((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0)); ++ sbreserve(&so->so_rcv, ++ TCP_RCVSPACE + ++ ((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0)); ++ ++ DEBUG_MISC(" returning mss = %d", mss); ++ ++ return mss; ++} +diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c +new file mode 100644 +index 0000000..383fe31 +--- /dev/null ++++ b/slirp/src/tcp_output.c +@@ -0,0 +1,516 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 ++ * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t tcp_outflags[TCP_NSTATES] = { ++ TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK, ++ TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK, ++ TH_FIN | TH_ACK, TH_ACK, TH_ACK, ++}; ++ ++ ++#undef MAX_TCPOPTLEN ++#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ ++ ++/* ++ * Tcp output routine: figure out what should be sent and send it. ++ */ ++int tcp_output(struct tcpcb *tp) ++{ ++ register struct socket *so = tp->t_socket; ++ register long len, win; ++ int off, flags, error; ++ register struct mbuf *m; ++ register struct tcpiphdr *ti, tcpiph_save; ++ struct ip *ip; ++ struct ip6 *ip6; ++ uint8_t opt[MAX_TCPOPTLEN]; ++ unsigned optlen, hdrlen; ++ int idle, sendalot; ++ ++ DEBUG_CALL("tcp_output"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* ++ * Determine length of data that should be transmitted, ++ * and flags that will be used. ++ * If there is some data or critical controls (SYN, RST) ++ * to send, then transmit; otherwise, investigate further. ++ */ ++ idle = (tp->snd_max == tp->snd_una); ++ if (idle && tp->t_idle >= tp->t_rxtcur) ++ /* ++ * We have been idle for "a while" and no acks are ++ * expected to clock out any data we send -- ++ * slow start to get ack "clock" running again. ++ */ ++ tp->snd_cwnd = tp->t_maxseg; ++again: ++ sendalot = 0; ++ off = tp->snd_nxt - tp->snd_una; ++ win = MIN(tp->snd_wnd, tp->snd_cwnd); ++ ++ flags = tcp_outflags[tp->t_state]; ++ ++ DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); ++ ++ /* ++ * If in persist timeout with window of 0, send 1 byte. ++ * Otherwise, if window is small but nonzero ++ * and timer expired, we will send what we can ++ * and go to transmit state. ++ */ ++ if (tp->t_force) { ++ if (win == 0) { ++ /* ++ * If we still have some data to send, then ++ * clear the FIN bit. Usually this would ++ * happen below when it realizes that we ++ * aren't sending all the data. However, ++ * if we have exactly 1 byte of unset data, ++ * then it won't clear the FIN bit below, ++ * and if we are in persist state, we wind ++ * up sending the packet without recording ++ * that we sent the FIN bit. ++ * ++ * We can't just blindly clear the FIN bit, ++ * because if we don't have any more data ++ * to send then the probe will be the FIN ++ * itself. ++ */ ++ if (off < so->so_snd.sb_cc) ++ flags &= ~TH_FIN; ++ win = 1; ++ } else { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ ++ len = MIN(so->so_snd.sb_cc, win) - off; ++ ++ if (len < 0) { ++ /* ++ * If FIN has been sent but not acked, ++ * but we haven't been called to retransmit, ++ * len will be -1. Otherwise, window shrank ++ * after we sent into it. If window shrank to 0, ++ * cancel pending retransmit and pull snd_nxt ++ * back to (closed) window. We will enter persist ++ * state below. If the window didn't close completely, ++ * just wait for an ACK. ++ */ ++ len = 0; ++ if (win == 0) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->snd_nxt = tp->snd_una; ++ } ++ } ++ ++ if (len > tp->t_maxseg) { ++ len = tp->t_maxseg; ++ sendalot = 1; ++ } ++ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) ++ flags &= ~TH_FIN; ++ ++ win = sbspace(&so->so_rcv); ++ ++ /* ++ * Sender silly window avoidance. If connection is idle ++ * and can send all data, a maximum segment, ++ * at least a maximum default-size segment do it, ++ * or are forced, do it; otherwise don't bother. ++ * If peer's buffer is tiny, then send ++ * when window is at least half open. ++ * If retransmitting (possibly after persist timer forced us ++ * to send into a small window), then must resend. ++ */ ++ if (len) { ++ if (len == tp->t_maxseg) ++ goto send; ++ if ((1 || idle || tp->t_flags & TF_NODELAY) && ++ len + off >= so->so_snd.sb_cc) ++ goto send; ++ if (tp->t_force) ++ goto send; ++ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) ++ goto send; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) ++ goto send; ++ } ++ ++ /* ++ * Compare available window to amount of window ++ * known to peer (as advertised window less ++ * next expected input). If the difference is at least two ++ * max size segments, or at least 50% of the maximum possible ++ * window, then want to send a window update to peer. ++ */ ++ if (win > 0) { ++ /* ++ * "adv" is the amount we can increase the window, ++ * taking into account that we are limited by ++ * TCP_MAXWIN << tp->rcv_scale. ++ */ ++ long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - ++ (tp->rcv_adv - tp->rcv_nxt); ++ ++ if (adv >= (long)(2 * tp->t_maxseg)) ++ goto send; ++ if (2 * adv >= (long)so->so_rcv.sb_datalen) ++ goto send; ++ } ++ ++ /* ++ * Send if we owe peer an ACK. ++ */ ++ if (tp->t_flags & TF_ACKNOW) ++ goto send; ++ if (flags & (TH_SYN | TH_RST)) ++ goto send; ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) ++ goto send; ++ /* ++ * If our state indicates that FIN should be sent ++ * and we have not yet done so, or we're retransmitting the FIN, ++ * then we need to send. ++ */ ++ if (flags & TH_FIN && ++ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) ++ goto send; ++ ++ /* ++ * TCP window updates are not reliable, rather a polling protocol ++ * using ``persist'' packets is used to insure receipt of window ++ * updates. The three ``states'' for the output side are: ++ * idle not doing retransmits or persists ++ * persisting to move a small or zero window ++ * (re)transmitting and thereby not persisting ++ * ++ * tp->t_timer[TCPT_PERSIST] ++ * is set when we are in persist state. ++ * tp->t_force ++ * is set when we are called to send a persist packet. ++ * tp->t_timer[TCPT_REXMT] ++ * is set when we are retransmitting ++ * The output side is idle when both timers are zero. ++ * ++ * If send window is too small, there is data to transmit, and no ++ * retransmit or persist is pending, then go to persist state. ++ * If nothing happens soon, send when timer expires: ++ * if window is nonzero, transmit what we can, ++ * otherwise force out a byte. ++ */ ++ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && ++ tp->t_timer[TCPT_PERSIST] == 0) { ++ tp->t_rxtshift = 0; ++ tcp_setpersist(tp); ++ } ++ ++ /* ++ * No reason to send a segment, just return. ++ */ ++ return (0); ++ ++send: ++ /* ++ * Before ESTABLISHED, force sending of initial options ++ * unless TCP set not to do any options. ++ * NOTE: we assume that the IP/TCP header plus TCP options ++ * always fit in a single mbuf, leaving room for a maximum ++ * link header, i.e. ++ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN ++ */ ++ optlen = 0; ++ hdrlen = sizeof(struct tcpiphdr); ++ if (flags & TH_SYN) { ++ tp->snd_nxt = tp->iss; ++ if ((tp->t_flags & TF_NOOPT) == 0) { ++ uint16_t mss; ++ ++ opt[0] = TCPOPT_MAXSEG; ++ opt[1] = 4; ++ mss = htons((uint16_t)tcp_mss(tp, 0)); ++ memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); ++ optlen = 4; ++ } ++ } ++ ++ hdrlen += optlen; ++ ++ /* ++ * Adjust data length if insertion of options will ++ * bump the packet length beyond the t_maxseg length. ++ */ ++ if (len > tp->t_maxseg - optlen) { ++ len = tp->t_maxseg - optlen; ++ sendalot = 1; ++ } ++ ++ /* ++ * Grab a header mbuf, attaching a copy of data to ++ * be transmitted, and initialize the header from ++ * the template for sends on this connection. ++ */ ++ if (len) { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ ++ sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen); ++ m->m_len += len; ++ ++ /* ++ * If we're sending everything we've got, set PUSH. ++ * (This will keep happy those implementations which only ++ * give data to the user when a buffer fills or ++ * a PUSH comes in.) ++ */ ++ if (off + len == so->so_snd.sb_cc) ++ flags |= TH_PUSH; ++ } else { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ } ++ ++ ti = mtod(m, struct tcpiphdr *); ++ ++ memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr)); ++ ++ /* ++ * Fill in fields, remembering maximum advertised ++ * window for use in delaying messages about window sizes. ++ * If resending a FIN, be sure not to use a new sequence number. ++ */ ++ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && ++ tp->snd_nxt == tp->snd_max) ++ tp->snd_nxt--; ++ /* ++ * If we are doing retransmissions, then snd_nxt will ++ * not reflect the first unsent octet. For ACK only ++ * packets, we do not want the sequence number of the ++ * retransmitted packet, we want the sequence number ++ * of the next unsent octet. So, if there is no data ++ * (and no SYN or FIN), use snd_max instead of snd_nxt ++ * when filling in ti_seq. But if we are in persist ++ * state, snd_max might reflect one byte beyond the ++ * right edge of the window, so use snd_nxt in that ++ * case, since we know we aren't doing a retransmission. ++ * (retransmit and persist are mutually exclusive...) ++ */ ++ if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST]) ++ ti->ti_seq = htonl(tp->snd_nxt); ++ else ++ ti->ti_seq = htonl(tp->snd_max); ++ ti->ti_ack = htonl(tp->rcv_nxt); ++ if (optlen) { ++ memcpy((char *)(ti + 1), (char *)opt, optlen); ++ ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2; ++ } ++ ti->ti_flags = flags; ++ /* ++ * Calculate receive window. Don't shrink window, ++ * but avoid silly window syndrome. ++ */ ++ if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) ++ win = 0; ++ if (win > (long)TCP_MAXWIN << tp->rcv_scale) ++ win = (long)TCP_MAXWIN << tp->rcv_scale; ++ if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) ++ win = (long)(tp->rcv_adv - tp->rcv_nxt); ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) { ++ ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); ++ ti->ti_flags |= TH_URG; ++ } else ++ /* ++ * If no urgent pointer to send, then we pull ++ * the urgent pointer to the left edge of the send window ++ * so that it doesn't drift into the send window on sequence ++ * number wraparound. ++ */ ++ tp->snd_up = tp->snd_una; /* drag it along */ ++ ++ /* ++ * Put TCP length in extended header, and then ++ * checksum extended header and data. ++ */ ++ if (len + optlen) ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len)); ++ ti->ti_sum = cksum(m, (int)(hdrlen + len)); ++ ++ /* ++ * In transmit state, time the transmission and arrange for ++ * the retransmit. In persist state, just set snd_max. ++ */ ++ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { ++ tcp_seq startseq = tp->snd_nxt; ++ ++ /* ++ * Advance snd_nxt over sequence space of this segment. ++ */ ++ if (flags & (TH_SYN | TH_FIN)) { ++ if (flags & TH_SYN) ++ tp->snd_nxt++; ++ if (flags & TH_FIN) { ++ tp->snd_nxt++; ++ tp->t_flags |= TF_SENTFIN; ++ } ++ } ++ tp->snd_nxt += len; ++ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { ++ tp->snd_max = tp->snd_nxt; ++ /* ++ * Time this transmission if not a retransmission and ++ * not currently timing anything. ++ */ ++ if (tp->t_rtt == 0) { ++ tp->t_rtt = 1; ++ tp->t_rtseq = startseq; ++ } ++ } ++ ++ /* ++ * Set retransmit timer if not currently set, ++ * and not doing an ack or a keep-alive probe. ++ * Initial value for retransmit timer is smoothed ++ * round-trip time + 2 * round-trip time variance. ++ * Initialize shift counter which is used for backoff ++ * of retransmit time. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ if (tp->t_timer[TCPT_PERSIST]) { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) ++ tp->snd_max = tp->snd_nxt + len; ++ ++ /* ++ * Fill in IP length and desired time to live and ++ * send to IP level. There should be a better way ++ * to handle ttl and tos; we could keep them in ++ * the template, but need a way to checksum without them. ++ */ ++ m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ ++ tcpiph_save = *mtod(m, struct tcpiphdr *); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ ip->ip_ttl = IPDEFTTL; ++ ip->ip_tos = so->so_iptos; ++ error = ip_output(so, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ error = ip6_output(so, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (error) { ++ out: ++ return (error); ++ } ++ ++ /* ++ * Data sent (as far as we can tell). ++ * If this advertises a larger window than any other segment, ++ * then remember the size of the advertised window. ++ * Any pending ACK has now been sent. ++ */ ++ if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv)) ++ tp->rcv_adv = tp->rcv_nxt + win; ++ tp->last_ack_sent = tp->rcv_nxt; ++ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); ++ if (sendalot) ++ goto again; ++ ++ return (0); ++} ++ ++void tcp_setpersist(struct tcpcb *tp) ++{ ++ int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; ++ ++ /* ++ * Start/restart persistence timer. ++ */ ++ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift], ++ TCPTV_PERSMIN, TCPTV_PERSMAX); ++ if (tp->t_rxtshift < TCP_MAXRXTSHIFT) ++ tp->t_rxtshift++; ++} +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +new file mode 100644 +index 0000000..d6dd133 +--- /dev/null ++++ b/slirp/src/tcp_subr.c +@@ -0,0 +1,975 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 ++ * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* patchable/settable parameters for tcp */ ++/* Don't do rfc1323 performance enhancements */ ++#define TCP_DO_RFC1323 0 ++ ++/* ++ * Tcp initialization ++ */ ++void tcp_init(Slirp *slirp) ++{ ++ slirp->tcp_iss = 1; /* wrong */ ++ slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; ++ slirp->tcp_last_so = &slirp->tcb; ++} ++ ++void tcp_cleanup(Slirp *slirp) ++{ ++ while (slirp->tcb.so_next != &slirp->tcb) { ++ tcp_close(sototcpcb(slirp->tcb.so_next)); ++ } ++} ++ ++/* ++ * Create template to be used to send tcp packets on a connection. ++ * Call after host entry created, fills ++ * in a skeletal tcp/ip header, minimizing the amount of work ++ * necessary when the connection is used. ++ */ ++void tcp_template(struct tcpcb *tp) ++{ ++ struct socket *so = tp->t_socket; ++ register struct tcpiphdr *n = &tp->t_template; ++ ++ n->ti_mbuf = NULL; ++ memset(&n->ti, 0, sizeof(n->ti)); ++ n->ti_x0 = 0; ++ switch (so->so_ffamily) { ++ case AF_INET: ++ n->ti_pr = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src = so->so_faddr; ++ n->ti_dst = so->so_laddr; ++ n->ti_sport = so->so_fport; ++ n->ti_dport = so->so_lport; ++ break; ++ ++ case AF_INET6: ++ n->ti_nh6 = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src6 = so->so_faddr6; ++ n->ti_dst6 = so->so_laddr6; ++ n->ti_sport = so->so_fport6; ++ n->ti_dport = so->so_lport6; ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ n->ti_seq = 0; ++ n->ti_ack = 0; ++ n->ti_x2 = 0; ++ n->ti_off = 5; ++ n->ti_flags = 0; ++ n->ti_win = 0; ++ n->ti_sum = 0; ++ n->ti_urp = 0; ++} ++ ++/* ++ * Send a single message to the TCP at address specified by ++ * the given TCP/IP header. If m == 0, then we make a copy ++ * of the tcpiphdr at ti and send directly to the addressed host. ++ * This is used to force keep alive messages out using the TCP ++ * template for a connection tp->t_template. If flags are given ++ * then we send a message back to the TCP which originated the ++ * segment ti, and discard the mbuf containing it and any other ++ * attached mbufs. ++ * ++ * In any case the ack and sequence number of the transmitted ++ * segment are as specified by the parameters. ++ */ ++void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, ++ tcp_seq ack, tcp_seq seq, int flags, unsigned short af) ++{ ++ register int tlen; ++ int win = 0; ++ ++ DEBUG_CALL("tcp_respond"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("ti = %p", ti); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("ack = %u", ack); ++ DEBUG_ARG("seq = %u", seq); ++ DEBUG_ARG("flags = %x", flags); ++ ++ if (tp) ++ win = sbspace(&tp->t_socket->so_rcv); ++ if (m == NULL) { ++ if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) ++ return; ++ tlen = 0; ++ m->m_data += IF_MAXLINKHDR; ++ *mtod(m, struct tcpiphdr *) = *ti; ++ ti = mtod(m, struct tcpiphdr *); ++ switch (af) { ++ case AF_INET: ++ ti->ti.ti_i4.ih_x1 = 0; ++ break; ++ case AF_INET6: ++ ti->ti.ti_i6.ih_x1 = 0; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ flags = TH_ACK; ++ } else { ++ /* ++ * ti points into m so the next line is just making ++ * the mbuf point to ti ++ */ ++ m->m_data = (char *)ti; ++ ++ m->m_len = sizeof(struct tcpiphdr); ++ tlen = 0; ++#define xchg(a, b, type) \ ++ { \ ++ type t; \ ++ t = a; \ ++ a = b; \ ++ b = t; \ ++ } ++ switch (af) { ++ case AF_INET: ++ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ case AF_INET6: ++ xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++#undef xchg ++ } ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + tlen)); ++ tlen += sizeof(struct tcpiphdr); ++ m->m_len = tlen; ++ ++ ti->ti_mbuf = NULL; ++ ti->ti_x0 = 0; ++ ti->ti_seq = htonl(seq); ++ ti->ti_ack = htonl(ack); ++ ti->ti_x2 = 0; ++ ti->ti_off = sizeof(struct tcphdr) >> 2; ++ ti->ti_flags = flags; ++ if (tp) ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ else ++ ti->ti_win = htons((uint16_t)win); ++ ti->ti_urp = 0; ++ ti->ti_sum = 0; ++ ti->ti_sum = cksum(m, tlen); ++ ++ struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); ++ struct ip *ip; ++ struct ip6 *ip6; ++ ++ switch (af) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ if (flags & TH_RST) { ++ ip->ip_ttl = MAXTTL; ++ } else { ++ ip->ip_ttl = IPDEFTTL; ++ } ++ ++ ip_output(NULL, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ ip6_output(NULL, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++/* ++ * Create a new TCP control block, making an ++ * empty reassembly queue and hooking it to the argument ++ * protocol control block. ++ */ ++struct tcpcb *tcp_newtcpcb(struct socket *so) ++{ ++ register struct tcpcb *tp; ++ ++ tp = (struct tcpcb *)malloc(sizeof(*tp)); ++ if (tp == NULL) ++ return ((struct tcpcb *)0); ++ ++ memset((char *)tp, 0, sizeof(struct tcpcb)); ++ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; ++ tp->t_maxseg = (so->so_ffamily == AF_INET) ? TCP_MSS : TCP6_MSS; ++ ++ tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE | TF_REQ_TSTMP) : 0; ++ tp->t_socket = so; ++ ++ /* ++ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no ++ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives ++ * reasonable initial retransmit time. ++ */ ++ tp->t_srtt = TCPTV_SRTTBASE; ++ tp->t_rttvar = TCPTV_SRTTDFLT << 2; ++ tp->t_rttmin = TCPTV_MIN; ++ ++ TCPT_RANGESET(tp->t_rxtcur, ++ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, ++ TCPTV_MIN, TCPTV_REXMTMAX); ++ ++ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->t_state = TCPS_CLOSED; ++ ++ so->so_tcpcb = tp; ++ ++ return (tp); ++} ++ ++/* ++ * Drop a TCP connection, reporting ++ * the specified error. If connection is synchronized, ++ * then send a RST to peer. ++ */ ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err) ++{ ++ DEBUG_CALL("tcp_drop"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("errno = %d", errno); ++ ++ if (TCPS_HAVERCVDSYN(tp->t_state)) { ++ tp->t_state = TCPS_CLOSED; ++ (void)tcp_output(tp); ++ } ++ return (tcp_close(tp)); ++} ++ ++/* ++ * Close a TCP control block: ++ * discard all space held by the tcp ++ * discard internet protocol block ++ * wake up any sleepers ++ */ ++struct tcpcb *tcp_close(struct tcpcb *tp) ++{ ++ register struct tcpiphdr *t; ++ struct socket *so = tp->t_socket; ++ Slirp *slirp = so->slirp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("tcp_close"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* free the reassembly queue, if any */ ++ t = tcpfrag_list_first(tp); ++ while (!tcpfrag_list_end(t, tp)) { ++ t = tcpiphdr_next(t); ++ m = tcpiphdr_prev(t)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(t))); ++ m_free(m); ++ } ++ free(tp); ++ so->so_tcpcb = NULL; ++ /* clobber input socket cache if we're closing the cached connection */ ++ if (so == slirp->tcp_last_so) ++ slirp->tcp_last_so = &slirp->tcb; ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sbfree(&so->so_rcv); ++ sbfree(&so->so_snd); ++ sofree(so); ++ return ((struct tcpcb *)0); ++} ++ ++/* ++ * TCP protocol interface to socket abstraction. ++ */ ++ ++/* ++ * User issued close, and wish to trail through shutdown states: ++ * if never received SYN, just forget it. If got a SYN from peer, ++ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. ++ * If already got a FIN from peer, then almost done; go to LAST_ACK ++ * state. In all other cases, have already sent FIN to peer (e.g. ++ * after PRU_SHUTDOWN), and just have to play tedious game waiting ++ * for peer to send FIN or not respond to keep-alives, etc. ++ * We can let the user exit from the close as soon as the FIN is acked. ++ */ ++void tcp_sockclosed(struct tcpcb *tp) ++{ ++ DEBUG_CALL("tcp_sockclosed"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ if (!tp) { ++ return; ++ } ++ ++ switch (tp->t_state) { ++ case TCPS_CLOSED: ++ case TCPS_LISTEN: ++ case TCPS_SYN_SENT: ++ tp->t_state = TCPS_CLOSED; ++ tp = tcp_close(tp); ++ break; ++ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ tp->t_state = TCPS_FIN_WAIT_1; ++ break; ++ ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_LAST_ACK; ++ break; ++ } ++ tcp_output(tp); ++} ++ ++/* ++ * Connect to a host on the Internet ++ * Called by tcp_input ++ * Only do a connect, the tcp fields will be set in tcp_input ++ * return 0 if there's a result of the connect, ++ * else return -1 means we're still connecting ++ * The return value is almost always -1 since the socket is ++ * nonblocking. Connect returns after the SYN is sent, and does ++ * not wait for ACK+SYN. ++ */ ++int tcp_fconnect(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("tcp_fconnect"); ++ DEBUG_ARG("so = %p", so); ++ ++ ret = so->s = slirp_socket(af, SOCK_STREAM, 0); ++ if (ret >= 0) { ++ int opt, s = so->s; ++ struct sockaddr_storage addr; ++ ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" connect()ing"); ++ sotranslate_out(so, &addr); ++ ++ /* We don't care what port we get */ ++ ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); ++ ++ /* ++ * If it's not in progress, it failed, so we just return 0, ++ * without clearing SS_NOFDREF ++ */ ++ soisfconnecting(so); ++ } ++ ++ return (ret); ++} ++ ++/* ++ * Accept the socket and connect to the local-host ++ * ++ * We have a problem. The correct thing to do would be ++ * to first connect to the local-host, and only if the ++ * connection is accepted, then do an accept() here. ++ * But, a) we need to know who's trying to connect ++ * to the socket to be able to SYN the local-host, and ++ * b) we are already connected to the foreign host by ++ * the time it gets to accept(), so... We simply accept ++ * here and SYN the local-host. ++ */ ++void tcp_connect(struct socket *inso) ++{ ++ Slirp *slirp = inso->slirp; ++ struct socket *so; ++ struct sockaddr_storage addr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ struct tcpcb *tp; ++ int s, opt; ++ ++ DEBUG_CALL("tcp_connect"); ++ DEBUG_ARG("inso = %p", inso); ++ ++ /* ++ * If it's an SS_ACCEPTONCE socket, no need to socreate() ++ * another socket, just use the accept() socket. ++ */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* FACCEPTONCE already have a tcpcb */ ++ so = inso; ++ } else { ++ so = socreate(slirp); ++ if (tcp_attach(so) < 0) { ++ g_free(so); /* NOT sofree */ ++ return; ++ } ++ so->lhost = inso->lhost; ++ so->so_ffamily = inso->so_ffamily; ++ } ++ ++ tcp_mss(sototcpcb(so), 0); ++ ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s < 0) { ++ tcp_close(sototcpcb(so)); /* This will sofree() as well */ ++ return; ++ } ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ so->fhost.ss = addr; ++ sotranslate_accept(so); ++ ++ /* Close the accept() socket, set right state */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* If we only accept once, close the accept() socket */ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ ++ /* Don't select it yet, even though we have an FD */ ++ /* if it's not FACCEPTONCE, it's already NOFDREF */ ++ so->so_state = SS_NOFDREF; ++ } ++ so->s = s; ++ so->so_state |= SS_INCOMING; ++ ++ so->so_iptos = tcp_tos(so); ++ tp = sototcpcb(so); ++ ++ tcp_template(tp); ++ ++ tp->t_state = TCPS_SYN_SENT; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tcp_sendseqinit(tp); ++ tcp_output(tp); ++} ++ ++/* ++ * Attach a TCPCB to a socket. ++ */ ++int tcp_attach(struct socket *so) ++{ ++ if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) ++ return -1; ++ ++ insque(so, &so->slirp->tcb); ++ ++ return 0; ++} ++ ++/* ++ * Set the socket's type of service field ++ */ ++static const struct tos_t tcptos[] = { ++ { 0, 20, IPTOS_THROUGHPUT, 0 }, /* ftp data */ ++ { 21, 21, IPTOS_LOWDELAY, EMU_FTP }, /* ftp control */ ++ { 0, 23, IPTOS_LOWDELAY, 0 }, /* telnet */ ++ { 0, 80, IPTOS_THROUGHPUT, 0 }, /* WWW */ ++ { 0, 513, IPTOS_LOWDELAY, EMU_RLOGIN | EMU_NOCONNECT }, /* rlogin */ ++ { 0, 544, IPTOS_LOWDELAY, EMU_KSH }, /* kshell */ ++ { 0, 543, IPTOS_LOWDELAY, 0 }, /* klogin */ ++ { 0, 6667, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC */ ++ { 0, 6668, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC undernet */ ++ { 0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ ++ { 0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ ++ { 0, 0, 0, 0 } ++}; ++ ++static struct emu_t *tcpemu = NULL; ++ ++/* ++ * Return TOS according to the above table ++ */ ++uint8_t tcp_tos(struct socket *so) ++{ ++ int i = 0; ++ struct emu_t *emup; ++ ++ while (tcptos[i].tos) { ++ if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || ++ (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { ++ so->so_emu = tcptos[i].emu; ++ return tcptos[i].tos; ++ } ++ i++; ++ } ++ ++ /* Nope, lets see if there's a user-added one */ ++ for (emup = tcpemu; emup; emup = emup->next) { ++ if ((emup->fport && (ntohs(so->so_fport) == emup->fport)) || ++ (emup->lport && (ntohs(so->so_lport) == emup->lport))) { ++ so->so_emu = emup->emu; ++ return emup->tos; ++ } ++ } ++ ++ return 0; ++} ++ ++/* ++ * Emulate programs that try and connect to us ++ * This includes ftp (the data connection is ++ * initiated by the server) and IRC (DCC CHAT and ++ * DCC SEND) for now ++ * ++ * NOTE: It's possible to crash SLiRP by sending it ++ * unstandard strings to emulate... if this is a problem, ++ * more checks are needed here ++ * ++ * XXX Assumes the whole command came in one packet ++ * ++ * XXX Some ftp clients will have their TOS set to ++ * LOWDELAY and so Nagel will kick in. Because of this, ++ * we'll get the first letter, followed by the rest, so ++ * we simply scan for ORT instead of PORT... ++ * DCC doesn't have this problem because there's other stuff ++ * in the packet before the DCC command. ++ * ++ * Return 1 if the mbuf m is still valid and should be ++ * sbappend()ed ++ * ++ * NOTE: if you return 0 you MUST m_free() the mbuf! ++ */ ++int tcp_emu(struct socket *so, struct mbuf *m) ++{ ++ Slirp *slirp = so->slirp; ++ unsigned n1, n2, n3, n4, n5, n6; ++ char buff[257]; ++ uint32_t laddr; ++ unsigned lport; ++ char *bptr; ++ ++ DEBUG_CALL("tcp_emu"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ switch (so->so_emu) { ++ int x, i; ++ ++ /* TODO: IPv6 */ ++ case EMU_IDENT: ++ /* ++ * Identification protocol as per rfc-1413 ++ */ ++ ++ { ++ struct socket *tmpso; ++ struct sockaddr_in addr; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ char *eol = g_strstr_len(m->m_data, m->m_len, "\r\n"); ++ ++ if (!eol) { ++ return 1; ++ } ++ ++ *eol = '\0'; ++ if (sscanf(m->m_data, "%u%*[ ,]%u", &n1, &n2) == 2) { ++ HTONS(n1); ++ HTONS(n2); ++ /* n2 is the one on our host */ ++ for (tmpso = slirp->tcb.so_next; tmpso != &slirp->tcb; ++ tmpso = tmpso->so_next) { ++ if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && ++ tmpso->so_lport == n2 && ++ tmpso->so_faddr.s_addr == so->so_faddr.s_addr && ++ tmpso->so_fport == n1) { ++ if (getsockname(tmpso->s, (struct sockaddr *)&addr, ++ &addrlen) == 0) ++ n2 = addr.sin_port; ++ break; ++ } ++ } ++ NTOHS(n1); ++ NTOHS(n2); ++ m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); ++ m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); ++ assert(m->m_len < M_ROOM(m)); ++ } else { ++ *eol = '\r'; ++ } ++ ++ return 1; ++ } ++ ++ case EMU_FTP: /* ftp */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NUL terminate for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { ++ /* ++ * Need to emulate the PORT command ++ */ ++ x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", &n1, &n2, ++ &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += snprintf(bptr, m->m_size - m->m_len, ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, ++ n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { ++ /* ++ * Need to emulate the PASV response ++ */ ++ x = sscanf( ++ bptr, ++ "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", ++ &n1, &n2, &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += ++ snprintf(bptr, m->m_size - m->m_len, ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ ++ return 1; ++ } ++ ++ return 1; ++ ++ case EMU_KSH: ++ /* ++ * The kshell (Kerberos rsh) and shell services both pass ++ * a local port port number to carry signals to the server ++ * and stderr to the client. It is passed at the beginning ++ * of the connection as a NUL-terminated decimal ASCII string. ++ */ ++ so->so_emu = 0; ++ for (lport = 0, i = 0; i < m->m_len - 1; ++i) { ++ if (m->m_data[i] < '0' || m->m_data[i] > '9') ++ return 1; /* invalid number */ ++ lport *= 10; ++ lport += m->m_data[i] - '0'; ++ } ++ if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && ++ (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, ++ htons(lport), SS_FACCEPTONCE)) != NULL) ++ m->m_len = ++ snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; ++ return 1; ++ ++ case EMU_IRC: ++ /* ++ * Need to emulate DCC CHAT, DCC SEND and DCC MOVE ++ */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NULL terminate the string for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) ++ return 1; ++ ++ /* The %256s is for the broken mIRC */ ++ if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); ++ } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += ++ snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += ++ snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } ++ return 1; ++ ++ case EMU_REALAUDIO: ++ /* ++ * RealAudio emulation - JP. We must try to parse the incoming ++ * data and try to find the two characters that contain the ++ * port number. Then we redirect an udp port and replace the ++ * number with the real port we got. ++ * ++ * The 1.0 beta versions of the player are not supported ++ * any more. ++ * ++ * A typical packet for player version 1.0 (release version): ++ * ++ * 0000:50 4E 41 00 05 ++ * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P ++ * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH ++ * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v ++ * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB ++ * ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * second packet. This time we received five bytes first and ++ * then the rest. You never know how many bytes you get. ++ * ++ * A typical packet for player version 2.0 (beta): ++ * ++ * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. ++ * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 ++ * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ ++ * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas ++ * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B ++ * ++ * Port number 0x1BC1 is found at offset 0x0d. ++ * ++ * This is just a horrible switch statement. Variable ra tells ++ * us where we're going. ++ */ ++ ++ bptr = m->m_data; ++ while (bptr < m->m_data + m->m_len) { ++ uint16_t p; ++ static int ra = 0; ++ char ra_tbl[4]; ++ ++ ra_tbl[0] = 0x50; ++ ra_tbl[1] = 0x4e; ++ ra_tbl[2] = 0x41; ++ ra_tbl[3] = 0; ++ ++ switch (ra) { ++ case 0: ++ case 2: ++ case 3: ++ if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 1: ++ /* ++ * We may get 0x50 several times, ignore them ++ */ ++ if (*bptr == 0x50) { ++ ra = 1; ++ bptr++; ++ continue; ++ } else if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 4: ++ /* ++ * skip version number ++ */ ++ bptr++; ++ break; ++ ++ case 5: ++ /* ++ * The difference between versions 1.0 and ++ * 2.0 is here. For future versions of ++ * the player this may need to be modified. ++ */ ++ if (*(bptr + 1) == 0x02) ++ bptr += 8; ++ else ++ bptr += 4; ++ break; ++ ++ case 6: ++ /* This is the field containing the port ++ * number that RA-player is listening to. ++ */ ++ lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; ++ if (lport < 6970) ++ lport += 256; /* don't know why */ ++ if (lport < 6970 || lport > 7170) ++ return 1; /* failed */ ++ ++ /* try to get udp port between 6970 - 7170 */ ++ for (p = 6970; p < 7071; p++) { ++ if (udp_listen(slirp, INADDR_ANY, htons(p), ++ so->so_laddr.s_addr, htons(lport), ++ SS_FACCEPTONCE)) { ++ break; ++ } ++ } ++ if (p == 7071) ++ p = 0; ++ *(uint8_t *)bptr++ = (p >> 8) & 0xff; ++ *(uint8_t *)bptr = p & 0xff; ++ ra = 0; ++ return 1; /* port redirected, we're done */ ++ break; ++ ++ default: ++ ra = 0; ++ } ++ ra++; ++ } ++ return 1; ++ ++ default: ++ /* Ooops, not emulated, won't call tcp_emu again */ ++ so->so_emu = 0; ++ return 1; ++ } ++} ++ ++/* ++ * Do misc. config of SLiRP while its running. ++ * Return 0 if this connections is to be closed, 1 otherwise, ++ * return 2 if this is a command-line connection ++ */ ++int tcp_ctl(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ struct sbuf *sb = &so->so_snd; ++ struct gfwd_list *ex_ptr; ++ ++ DEBUG_CALL("tcp_ctl"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* TODO: IPv6 */ ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ /* Check if it's pty_exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ if (ex_ptr->write_cb) { ++ so->s = -1; ++ so->guestfwd = ex_ptr; ++ return 1; ++ } ++ DEBUG_MISC(" executing %s", ex_ptr->ex_exec); ++ return fork_exec(so, ex_ptr->ex_exec); ++ } ++ } ++ } ++ sb->sb_cc = ++ snprintf(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), ++ "Error: No application configured.\r\n"); ++ sb->sb_wptr += sb->sb_cc; ++ return 0; ++} +diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c +new file mode 100644 +index 0000000..102023e +--- /dev/null ++++ b/slirp/src/tcp_timer.c +@@ -0,0 +1,286 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); ++ ++/* ++ * Fast timeout routine for processing delayed acks ++ */ ++void tcp_fasttimo(Slirp *slirp) ++{ ++ register struct socket *so; ++ register struct tcpcb *tp; ++ ++ DEBUG_CALL("tcp_fasttimo"); ++ ++ so = slirp->tcb.so_next; ++ if (so) ++ for (; so != &slirp->tcb; so = so->so_next) ++ if ((tp = (struct tcpcb *)so->so_tcpcb) && ++ (tp->t_flags & TF_DELACK)) { ++ tp->t_flags &= ~TF_DELACK; ++ tp->t_flags |= TF_ACKNOW; ++ (void)tcp_output(tp); ++ } ++} ++ ++/* ++ * Tcp protocol timeout routine called every 500 ms. ++ * Updates the timers in all active tcb's and ++ * causes finite state machine actions if timers expire. ++ */ ++void tcp_slowtimo(Slirp *slirp) ++{ ++ register struct socket *ip, *ipnxt; ++ register struct tcpcb *tp; ++ register int i; ++ ++ DEBUG_CALL("tcp_slowtimo"); ++ ++ /* ++ * Search through tcb's and update active timers. ++ */ ++ ip = slirp->tcb.so_next; ++ if (ip == NULL) { ++ return; ++ } ++ for (; ip != &slirp->tcb; ip = ipnxt) { ++ ipnxt = ip->so_next; ++ tp = sototcpcb(ip); ++ if (tp == NULL) { ++ continue; ++ } ++ for (i = 0; i < TCPT_NTIMERS; i++) { ++ if (tp->t_timer[i] && --tp->t_timer[i] == 0) { ++ tcp_timers(tp, i); ++ if (ipnxt->so_prev != ip) ++ goto tpgone; ++ } ++ } ++ tp->t_idle++; ++ if (tp->t_rtt) ++ tp->t_rtt++; ++ tpgone:; ++ } ++ slirp->tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ ++ slirp->tcp_now++; /* for timestamps */ ++} ++ ++/* ++ * Cancel all timers for TCP tp. ++ */ ++void tcp_canceltimers(struct tcpcb *tp) ++{ ++ register int i; ++ ++ for (i = 0; i < TCPT_NTIMERS; i++) ++ tp->t_timer[i] = 0; ++} ++ ++const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, ++ 64, 64, 64, 64, 64, 64 }; ++ ++/* ++ * TCP timer processing. ++ */ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer) ++{ ++ register int rexmt; ++ ++ DEBUG_CALL("tcp_timers"); ++ ++ switch (timer) { ++ /* ++ * 2 MSL timeout in shutdown went off. If we're closed but ++ * still waiting for peer to close and connection has been idle ++ * too long, or if 2MSL time is up from TIME_WAIT, delete connection ++ * control block. Otherwise, check again in a bit. ++ */ ++ case TCPT_2MSL: ++ if (tp->t_state != TCPS_TIME_WAIT && tp->t_idle <= TCP_MAXIDLE) ++ tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; ++ else ++ tp = tcp_close(tp); ++ break; ++ ++ /* ++ * Retransmission timer went off. Message has not ++ * been acked within retransmit interval. Back off ++ * to a longer retransmit interval and retransmit one segment. ++ */ ++ case TCPT_REXMT: ++ ++ /* ++ * XXXXX If a packet has timed out, then remove all the queued ++ * packets for that session. ++ */ ++ ++ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { ++ /* ++ * This is a hack to suit our terminal server here at the uni of ++ * canberra since they have trouble with zeroes... It usually lets ++ * them through unharmed, but under some conditions, it'll eat the ++ * zeros. If we keep retransmitting it, it'll keep eating the ++ * zeroes, so we keep retransmitting, and eventually the connection ++ * dies... (this only happens on incoming data) ++ * ++ * So, if we were gonna drop the connection from too many ++ * retransmits, don't... instead halve the t_maxseg, which might ++ * break up the NULLs and let them through ++ * ++ * *sigh* ++ */ ++ ++ tp->t_maxseg >>= 1; ++ if (tp->t_maxseg < 32) { ++ /* ++ * We tried our best, now the connection must die! ++ */ ++ tp->t_rxtshift = TCP_MAXRXTSHIFT; ++ tp = tcp_drop(tp, tp->t_softerror); ++ /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ ++ return (tp); /* XXX */ ++ } ++ ++ /* ++ * Set rxtshift to 6, which is still at the maximum ++ * backoff time ++ */ ++ tp->t_rxtshift = 6; ++ } ++ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; ++ TCPT_RANGESET(tp->t_rxtcur, rexmt, (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * If losing, let the lower level know and try for ++ * a better route. Also, if we backed off this far, ++ * our srtt estimate is probably bogus. Clobber it ++ * so we'll take the next rtt measurement as our srtt; ++ * move the current srtt into rttvar to keep the current ++ * retransmit times until then. ++ */ ++ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { ++ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); ++ tp->t_srtt = 0; ++ } ++ tp->snd_nxt = tp->snd_una; ++ /* ++ * If timing a segment in this window, stop the timer. ++ */ ++ tp->t_rtt = 0; ++ /* ++ * Close the congestion window down to one segment ++ * (we'll open it by one segment for each ack we get). ++ * Since we probably have a window's worth of unacked ++ * data accumulated, this "slow start" keeps us from ++ * dumping all that data as back-to-back packets (which ++ * might overwhelm an intermediate gateway). ++ * ++ * There are two phases to the opening: Initially we ++ * open by one mss on each ack. This makes the window ++ * size increase exponentially with time. If the ++ * window is larger than the path can handle, this ++ * exponential growth results in dropped packet(s) ++ * almost immediately. To get more time between ++ * drops but still "push" the network to take advantage ++ * of improving conditions, we switch from exponential ++ * to linear window opening at some threshold size. ++ * For a threshold, we use half the current window ++ * size, truncated to a multiple of the mss. ++ * ++ * (the minimum cwnd that will give us exponential ++ * growth is 2 mss. We don't allow the threshold ++ * to go below this.) ++ */ ++ { ++ unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ if (win < 2) ++ win = 2; ++ tp->snd_cwnd = tp->t_maxseg; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_dupacks = 0; ++ } ++ (void)tcp_output(tp); ++ break; ++ ++ /* ++ * Persistence timer into zero window. ++ * Force a byte to be output, if possible. ++ */ ++ case TCPT_PERSIST: ++ tcp_setpersist(tp); ++ tp->t_force = 1; ++ (void)tcp_output(tp); ++ tp->t_force = 0; ++ break; ++ ++ /* ++ * Keep-alive timer went off; send something ++ * or drop connection if idle for too long. ++ */ ++ case TCPT_KEEP: ++ if (tp->t_state < TCPS_ESTABLISHED) ++ goto dropit; ++ ++ if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { ++ if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) ++ goto dropit; ++ /* ++ * Send a packet designed to force a response ++ * if the peer is up and reachable: ++ * either an ACK if the connection is still alive, ++ * or an RST if the peer has closed the connection ++ * due to timeout or reboot. ++ * Using sequence number tp->snd_una-1 ++ * causes the transmitted zero-length segment ++ * to lie outside the receive window; ++ * by the protocol spec, this requires the ++ * correspondent TCP to respond. ++ */ ++ tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, tp->rcv_nxt, ++ tp->snd_una - 1, 0, tp->t_socket->so_ffamily); ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ } else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ break; ++ ++ dropit: ++ tp = tcp_drop(tp, 0); ++ break; ++ } ++ ++ return (tp); ++} +diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h +new file mode 100644 +index 0000000..584a559 +--- /dev/null ++++ b/slirp/src/tcp_timer.h +@@ -0,0 +1,130 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp ++ */ ++ ++#ifndef TCP_TIMER_H ++#define TCP_TIMER_H ++ ++/* ++ * Definitions of the TCP timers. These timers are counted ++ * down PR_SLOWHZ times a second. ++ */ ++#define TCPT_NTIMERS 4 ++ ++#define TCPT_REXMT 0 /* retransmit */ ++#define TCPT_PERSIST 1 /* retransmit persistence */ ++#define TCPT_KEEP 2 /* keep alive */ ++#define TCPT_2MSL 3 /* 2*msl quiet time timer */ ++ ++/* ++ * The TCPT_REXMT timer is used to force retransmissions. ++ * The TCP has the TCPT_REXMT timer set whenever segments ++ * have been sent for which ACKs are expected but not yet ++ * received. If an ACK is received which advances tp->snd_una, ++ * then the retransmit timer is cleared (if there are no more ++ * outstanding segments) or reset to the base value (if there ++ * are more ACKs expected). Whenever the retransmit timer goes off, ++ * we retransmit one unacknowledged segment, and do a backoff ++ * on the retransmit timer. ++ * ++ * The TCPT_PERSIST timer is used to keep window size information ++ * flowing even if the window goes shut. If all previous transmissions ++ * have been acknowledged (so that there are no retransmissions in progress), ++ * and the window is too small to bother sending anything, then we start ++ * the TCPT_PERSIST timer. When it expires, if the window is nonzero, ++ * we go to transmit state. Otherwise, at intervals send a single byte ++ * into the peer's window to force him to update our window information. ++ * We do this at most as often as TCPT_PERSMIN time intervals, ++ * but no more frequently than the current estimate of round-trip ++ * packet time. The TCPT_PERSIST timer is cleared whenever we receive ++ * a window update from the peer. ++ * ++ * The TCPT_KEEP timer is used to keep connections alive. If an ++ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, ++ * but not yet established, then we drop the connection. Once the connection ++ * is established, if the connection is idle for TCPTV_KEEP_IDLE time ++ * (and keepalives have been enabled on the socket), we begin to probe ++ * the connection. We force the peer to send us a segment by sending: ++ * ++ * This segment is (deliberately) outside the window, and should elicit ++ * an ack segment in response from the peer. If, despite the TCPT_KEEP ++ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE ++ * amount of time probing, then we drop the connection. ++ */ ++ ++/* ++ * Time constants. ++ */ ++#define TCPTV_MSL (5 * PR_SLOWHZ) /* max seg lifetime (hah!) */ ++ ++#define TCPTV_SRTTBASE \ ++ 0 /* base roundtrip time; \ ++ if 0, no idea yet */ ++#define TCPTV_SRTTDFLT (3 * PR_SLOWHZ) /* assumed RTT if no info */ ++ ++#define TCPTV_PERSMIN (5 * PR_SLOWHZ) /* retransmit persistence */ ++#define TCPTV_PERSMAX (60 * PR_SLOWHZ) /* maximum persist interval */ ++ ++#define TCPTV_KEEP_INIT (75 * PR_SLOWHZ) /* initial connect keep alive */ ++#define TCPTV_KEEP_IDLE (120 * 60 * PR_SLOWHZ) /* dflt time before probing */ ++#define TCPTV_KEEPINTVL (75 * PR_SLOWHZ) /* default probe interval */ ++#define TCPTV_KEEPCNT 8 /* max probes before drop */ ++ ++#define TCPTV_MIN (1 * PR_SLOWHZ) /* minimum allowable value */ ++#define TCPTV_REXMTMAX (12 * PR_SLOWHZ) /* max allowable REXMT value */ ++ ++#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ ++ ++#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ ++ ++ ++/* ++ * Force a time value to be in a certain range. ++ */ ++#define TCPT_RANGESET(tv, value, tvmin, tvmax) \ ++ { \ ++ (tv) = (value); \ ++ if ((tv) < (tvmin)) \ ++ (tv) = (tvmin); \ ++ else if ((tv) > (tvmax)) \ ++ (tv) = (tvmax); \ ++ } ++ ++extern const int tcp_backoff[]; ++ ++struct tcpcb; ++ ++void tcp_fasttimo(Slirp *); ++void tcp_slowtimo(Slirp *); ++void tcp_canceltimers(struct tcpcb *); ++ ++#endif +diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h +new file mode 100644 +index 0000000..c8da8cb +--- /dev/null ++++ b/slirp/src/tcp_var.h +@@ -0,0 +1,161 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 ++ * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp ++ */ ++ ++#ifndef TCP_VAR_H ++#define TCP_VAR_H ++ ++#include "tcpip.h" ++#include "tcp_timer.h" ++ ++/* ++ * Tcp control block, one per tcp; fields: ++ */ ++struct tcpcb { ++ struct tcpiphdr *seg_next; /* sequencing queue */ ++ struct tcpiphdr *seg_prev; ++ short t_state; /* state of this connection */ ++ short t_timer[TCPT_NTIMERS]; /* tcp timers */ ++ short t_rxtshift; /* log(2) of rexmt exp. backoff */ ++ short t_rxtcur; /* current retransmit value */ ++ short t_dupacks; /* consecutive dup acks recd */ ++ uint16_t t_maxseg; /* maximum segment size */ ++ uint8_t t_force; /* 1 if forcing out a byte */ ++ uint16_t t_flags; ++#define TF_ACKNOW 0x0001 /* ack peer immediately */ ++#define TF_DELACK 0x0002 /* ack, but try to delay it */ ++#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ ++#define TF_NOOPT 0x0008 /* don't use tcp options */ ++#define TF_SENTFIN 0x0010 /* have sent FIN */ ++#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ ++#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ ++#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ ++#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ ++#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ ++ ++ struct tcpiphdr t_template; /* static skeletal packet for xmit */ ++ ++ struct socket *t_socket; /* back pointer to socket */ ++ /* ++ * The following fields are used as in the protocol specification. ++ * See RFC783, Dec. 1981, page 21. ++ */ ++ /* send sequence variables */ ++ tcp_seq snd_una; /* send unacknowledged */ ++ tcp_seq snd_nxt; /* send next */ ++ tcp_seq snd_up; /* send urgent pointer */ ++ tcp_seq snd_wl1; /* window update seg seq number */ ++ tcp_seq snd_wl2; /* window update seg ack number */ ++ tcp_seq iss; /* initial send sequence number */ ++ uint32_t snd_wnd; /* send window */ ++ /* receive sequence variables */ ++ uint32_t rcv_wnd; /* receive window */ ++ tcp_seq rcv_nxt; /* receive next */ ++ tcp_seq rcv_up; /* receive urgent pointer */ ++ tcp_seq irs; /* initial receive sequence number */ ++ /* ++ * Additional variables for this implementation. ++ */ ++ /* receive variables */ ++ tcp_seq rcv_adv; /* advertised window */ ++ /* retransmit variables */ ++ tcp_seq snd_max; /* highest sequence number sent; ++ * used to recognize retransmits ++ */ ++ /* congestion control (for slow start, source quench, retransmit after loss) ++ */ ++ uint32_t snd_cwnd; /* congestion-controlled window */ ++ uint32_t snd_ssthresh; /* snd_cwnd size threshold for ++ * for slow start exponential to ++ * linear switch ++ */ ++ /* ++ * transmit timing stuff. See below for scale of srtt and rttvar. ++ * "Variance" is actually smoothed difference. ++ */ ++ short t_idle; /* inactivity time */ ++ short t_rtt; /* round trip time */ ++ tcp_seq t_rtseq; /* sequence number being timed */ ++ short t_srtt; /* smoothed round-trip time */ ++ short t_rttvar; /* variance in round-trip time */ ++ uint16_t t_rttmin; /* minimum rtt allowed */ ++ uint32_t max_sndwnd; /* largest window peer has offered */ ++ ++ /* out-of-band data */ ++ uint8_t t_oobflags; /* have some */ ++ uint8_t t_iobc; /* input character */ ++#define TCPOOB_HAVEDATA 0x01 ++#define TCPOOB_HADDATA 0x02 ++ short t_softerror; /* possible error not yet reported */ ++ ++ /* RFC 1323 variables */ ++ uint8_t snd_scale; /* window scaling for send window */ ++ uint8_t rcv_scale; /* window scaling for recv window */ ++ uint8_t request_r_scale; /* pending window scaling */ ++ uint8_t requested_s_scale; ++ uint32_t ts_recent; /* timestamp echo data */ ++ uint32_t ts_recent_age; /* when last updated */ ++ tcp_seq last_ack_sent; ++}; ++ ++#define sototcpcb(so) ((so)->so_tcpcb) ++ ++/* ++ * The smoothed round-trip time and estimated variance ++ * are stored as fixed point numbers scaled by the values below. ++ * For convenience, these scales are also used in smoothing the average ++ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). ++ * With these scales, srtt has 3 bits to the right of the binary point, ++ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the ++ * binary point, and is smoothed with an ALPHA of 0.75. ++ */ ++#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ ++#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ ++#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ ++#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ ++ ++/* ++ * The initial retransmission should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ * This macro assumes that the value of TCP_RTTVAR_SCALE ++ * is the same as the multiplier for rttvar. ++ */ ++#define TCP_REXMTVAL(tp) (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) ++ ++#endif +diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h +new file mode 100644 +index 0000000..d3df021 +--- /dev/null ++++ b/slirp/src/tcpip.h +@@ -0,0 +1,104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 ++ * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp ++ */ ++ ++#ifndef TCPIP_H ++#define TCPIP_H ++ ++/* ++ * Tcp+ip header, after ip options removed. ++ */ ++struct tcpiphdr { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ union { ++ struct { ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ } ti_i4; ++ struct { ++ struct in6_addr ih_src; ++ struct in6_addr ih_dst; ++ uint8_t ih_x1; ++ uint8_t ih_nh; ++ } ti_i6; ++ } ti; ++ uint16_t ti_x0; ++ uint16_t ti_len; /* protocol length */ ++ struct tcphdr ti_t; /* tcp header */ ++}; ++#define ti_mbuf ih_mbuf.mptr ++#define ti_pr ti.ti_i4.ih_pr ++#define ti_src ti.ti_i4.ih_src ++#define ti_dst ti.ti_i4.ih_dst ++#define ti_src6 ti.ti_i6.ih_src ++#define ti_dst6 ti.ti_i6.ih_dst ++#define ti_nh6 ti.ti_i6.ih_nh ++#define ti_sport ti_t.th_sport ++#define ti_dport ti_t.th_dport ++#define ti_seq ti_t.th_seq ++#define ti_ack ti_t.th_ack ++#define ti_x2 ti_t.th_x2 ++#define ti_off ti_t.th_off ++#define ti_flags ti_t.th_flags ++#define ti_win ti_t.th_win ++#define ti_sum ti_t.th_sum ++#define ti_urp ti_t.th_urp ++ ++#define tcpiphdr2qlink(T) \ ++ ((struct qlink *)(((char *)(T)) - sizeof(struct qlink))) ++#define qlink2tcpiphdr(Q) \ ++ ((struct tcpiphdr *)(((char *)(Q)) + sizeof(struct qlink))) ++#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) ++#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) ++#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) ++#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink *)(T)) ++#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr *)(T)) ++ ++/* This is the difference between the size of a tcpiphdr structure, and the ++ * size of actual ip+tcp headers, rounded up since we need to align data. */ ++#define TCPIPHDR_DELTA \ ++ (MAX(0, (sizeof(struct tcpiphdr) - sizeof(struct ip) - \ ++ sizeof(struct tcphdr) + 3) & \ ++ ~3)) ++ ++/* ++ * Just a clean way to get to the first byte ++ * of the packet ++ */ ++struct tcpiphdr_2 { ++ struct tcpiphdr dummy; ++ char first_char; ++}; ++ ++#endif +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +new file mode 100644 +index 0000000..093c2e0 +--- /dev/null ++++ b/slirp/src/tftp.c +@@ -0,0 +1,462 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * tftp.c - a simple, read-only tftp server for qemu ++ * ++ * Copyright (c) 2004 Magnus Damm ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++#include ++#include ++ ++static inline int tftp_session_in_use(struct tftp_session *spt) ++{ ++ return (spt->slirp != NULL); ++} ++ ++static inline void tftp_session_update(struct tftp_session *spt) ++{ ++ spt->timestamp = curtime; ++} ++ ++static void tftp_session_terminate(struct tftp_session *spt) ++{ ++ if (spt->fd >= 0) { ++ close(spt->fd); ++ spt->fd = -1; ++ } ++ g_free(spt->filename); ++ spt->slirp = NULL; ++} ++ ++static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (!tftp_session_in_use(spt)) ++ goto found; ++ ++ /* sessions time out after 5 inactive seconds */ ++ if ((int)(curtime - spt->timestamp) > 5000) { ++ tftp_session_terminate(spt); ++ goto found; ++ } ++ } ++ ++ return -1; ++ ++found: ++ memset(spt, 0, sizeof(*spt)); ++ memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); ++ spt->fd = -1; ++ spt->block_size = 512; ++ spt->client_port = tp->udp.uh_sport; ++ spt->slirp = slirp; ++ ++ tftp_session_update(spt); ++ ++ return k; ++} ++ ++static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (tftp_session_in_use(spt)) { ++ if (sockaddr_equal(&spt->client_addr, srcsas)) { ++ if (spt->client_port == tp->udp.uh_sport) { ++ return k; ++ } ++ } ++ } ++ } ++ ++ return -1; ++} ++ ++static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, ++ uint8_t *buf, int len) ++{ ++ int bytes_read = 0; ++ ++ if (spt->fd < 0) { ++ spt->fd = open(spt->filename, O_RDONLY | O_BINARY); ++ } ++ ++ if (spt->fd < 0) { ++ return -1; ++ } ++ ++ if (len) { ++ lseek(spt->fd, block_nr * spt->block_size, SEEK_SET); ++ ++ bytes_read = read(spt->fd, buf, len); ++ } ++ ++ return bytes_read; ++} ++ ++static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, ++ struct mbuf *m) ++{ ++ struct tftp_t *tp; ++ ++ memset(m->m_data, 0, m->m_size); ++ ++ m->m_data += IF_MAXLINKHDR; ++ if (spt->client_addr.ss_family == AF_INET6) { ++ m->m_data += sizeof(struct ip6); ++ } else { ++ m->m_data += sizeof(struct ip); ++ } ++ tp = (void *)m->m_data; ++ m->m_data += sizeof(struct udphdr); ++ ++ return tp; ++} ++ ++static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, ++ struct tftp_t *recv_tp) ++{ ++ if (spt->client_addr.ss_family == AF_INET6) { ++ struct sockaddr_in6 sa6, da6; ++ ++ sa6.sin6_addr = spt->slirp->vhost_addr6; ++ sa6.sin6_port = recv_tp->udp.uh_dport; ++ da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; ++ da6.sin6_port = spt->client_port; ++ ++ udp6_output(NULL, m, &sa6, &da6); ++ } else { ++ struct sockaddr_in sa4, da4; ++ ++ sa4.sin_addr = spt->slirp->vhost_addr; ++ sa4.sin_port = recv_tp->udp.uh_dport; ++ da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; ++ da4.sin_port = spt->client_port; ++ ++ udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); ++ } ++} ++ ++static int tftp_send_oack(struct tftp_session *spt, const char *keys[], ++ uint32_t values[], int nb, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int i, n = 0; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) ++ return -1; ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->tp_op = htons(TFTP_OACK); ++ for (i = 0; i < nb; i++) { ++ n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", ++ keys[i]) + ++ 1; ++ n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", ++ values[i]) + ++ 1; ++ } ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + n - ++ sizeof(struct udphdr); ++ tftp_udp_output(spt, m, recv_tp); ++ ++ return 0; ++} ++ ++static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, ++ const char *msg, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ ++ DEBUG_TFTP("tftp error msg: %s", msg); ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ goto out; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->tp_op = htons(TFTP_ERROR); ++ tp->x.tp_error.tp_error_code = htons(errorcode); ++ slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), ++ msg); ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + ++ strlen(msg) - sizeof(struct udphdr); ++ tftp_udp_output(spt, m, recv_tp); ++ ++out: ++ tftp_session_terminate(spt); ++} ++ ++static void tftp_send_next_block(struct tftp_session *spt, ++ struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int nobytes; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ return; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->tp_op = htons(TFTP_DATA); ++ tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); ++ ++ nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, ++ spt->block_size); ++ ++ if (nobytes < 0) { ++ m_free(m); ++ ++ /* send "file not found" error back */ ++ ++ tftp_send_error(spt, 1, "File not found", tp); ++ ++ return; ++ } ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - ++ sizeof(struct udphdr); ++ tftp_udp_output(spt, m, recv_tp); ++ ++ if (nobytes == spt->block_size) { ++ tftp_session_update(spt); ++ } else { ++ tftp_session_terminate(spt); ++ } ++ ++ spt->block_nr++; ++} ++ ++static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ struct tftp_session *spt; ++ int s, k; ++ size_t prefix_len; ++ char *req_fname; ++ const char *option_name[2]; ++ uint32_t option_value[2]; ++ int nb_options = 0; ++ ++ /* check if a session already exists and if so terminate it */ ++ s = tftp_session_find(slirp, srcsas, tp); ++ if (s >= 0) { ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++ } ++ ++ s = tftp_session_allocate(slirp, srcsas, tp); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ spt = &slirp->tftp_sessions[s]; ++ ++ /* unspecified prefix means service disabled */ ++ if (!slirp->tftp_prefix) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* skip header fields */ ++ k = 0; ++ pktlen -= offsetof(struct tftp_t, x.tp_buf); ++ ++ /* prepend tftp_prefix */ ++ prefix_len = strlen(slirp->tftp_prefix); ++ spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); ++ memcpy(spt->filename, slirp->tftp_prefix, prefix_len); ++ spt->filename[prefix_len] = '/'; ++ ++ /* get name */ ++ req_fname = spt->filename + prefix_len + 1; ++ ++ while (1) { ++ if (k >= TFTP_FILENAME_MAX || k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ req_fname[k] = tp->x.tp_buf[k]; ++ if (req_fname[k++] == '\0') { ++ break; ++ } ++ } ++ ++ DEBUG_TFTP("tftp rrq file: %s", req_fname); ++ ++ /* check mode */ ++ if ((pktlen - k) < 6) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { ++ tftp_send_error(spt, 4, "Unsupported transfer mode", tp); ++ return; ++ } ++ ++ k += 6; /* skipping octet */ ++ ++ /* do sanity checks on the filename */ ++ if (!strncmp(req_fname, "../", 3) || ++ req_fname[strlen(req_fname) - 1] == '/' || strstr(req_fname, "/../")) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* check if the file exists */ ++ if (tftp_read_data(spt, 0, NULL, 0) < 0) { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ ++ if (tp->x.tp_buf[pktlen - 1] != 0) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { ++ const char *key, *value; ++ ++ key = &tp->x.tp_buf[k]; ++ k += strlen(key) + 1; ++ ++ if (k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ value = &tp->x.tp_buf[k]; ++ k += strlen(value) + 1; ++ ++ if (strcasecmp(key, "tsize") == 0) { ++ int tsize = atoi(value); ++ struct stat stat_p; ++ ++ if (tsize == 0) { ++ if (stat(spt->filename, &stat_p) == 0) ++ tsize = stat_p.st_size; ++ else { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ } ++ ++ option_name[nb_options] = "tsize"; ++ option_value[nb_options] = tsize; ++ nb_options++; ++ } else if (strcasecmp(key, "blksize") == 0) { ++ int blksize = atoi(value); ++ ++ /* Accept blksize up to our maximum size */ ++ if (blksize > 0) { ++ spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); ++ option_name[nb_options] = "blksize"; ++ option_value[nb_options] = spt->block_size; ++ nb_options++; ++ } ++ } ++ } ++ ++ if (nb_options > 0) { ++ assert(nb_options <= G_N_ELEMENTS(option_name)); ++ tftp_send_oack(spt, option_name, option_value, nb_options, tp); ++ return; ++ } ++ ++ spt->block_nr = 0; ++ tftp_send_next_block(spt, tp); ++} ++ ++static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, tp); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_send_next_block(&slirp->tftp_sessions[s], tp); ++} ++ ++static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, tp); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++} ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) ++{ ++ struct tftp_t *tp = (struct tftp_t *)m->m_data; ++ ++ switch (ntohs(tp->tp_op)) { ++ case TFTP_RRQ: ++ tftp_handle_rrq(m->slirp, srcsas, tp, m->m_len); ++ break; ++ ++ case TFTP_ACK: ++ tftp_handle_ack(m->slirp, srcsas, tp, m->m_len); ++ break; ++ ++ case TFTP_ERROR: ++ tftp_handle_error(m->slirp, srcsas, tp, m->m_len); ++ break; ++ } ++} +diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h +new file mode 100644 +index 0000000..c47bb43 +--- /dev/null ++++ b/slirp/src/tftp.h +@@ -0,0 +1,52 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* tftp defines */ ++ ++#ifndef SLIRP_TFTP_H ++#define SLIRP_TFTP_H ++ ++#define TFTP_SESSIONS_MAX 20 ++ ++#define TFTP_SERVER 69 ++ ++#define TFTP_RRQ 1 ++#define TFTP_WRQ 2 ++#define TFTP_DATA 3 ++#define TFTP_ACK 4 ++#define TFTP_ERROR 5 ++#define TFTP_OACK 6 ++ ++#define TFTP_FILENAME_MAX 512 ++#define TFTP_BLOCKSIZE_MAX 1428 ++ ++struct tftp_t { ++ struct udphdr udp; ++ uint16_t tp_op; ++ union { ++ struct { ++ uint16_t tp_block_nr; ++ uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; ++ } tp_data; ++ struct { ++ uint16_t tp_error_code; ++ uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; ++ } tp_error; ++ char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; ++ } x; ++} __attribute__((packed)); ++ ++struct tftp_session { ++ Slirp *slirp; ++ char *filename; ++ int fd; ++ uint16_t block_size; ++ ++ struct sockaddr_storage client_addr; ++ uint16_t client_port; ++ uint32_t block_nr; ++ ++ int timestamp; ++}; ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/udp.c b/slirp/src/udp.c +new file mode 100644 +index 0000000..ae23ba4 +--- /dev/null ++++ b/slirp/src/udp.c +@@ -0,0 +1,354 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 ++ * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ * ++ * Please read the file COPYRIGHT for the ++ * terms and conditions of the copyright. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static uint8_t udp_tos(struct socket *so); ++ ++void udp_init(Slirp *slirp) ++{ ++ slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; ++ slirp->udp_last_so = &slirp->udb; ++} ++ ++void udp_cleanup(Slirp *slirp) ++{ ++ while (slirp->udb.so_next != &slirp->udb) { ++ udp_detach(slirp->udb.so_next); ++ } ++} ++ ++/* m->m_data points at ip packet header ++ * m->m_len length ip packet ++ * ip->ip_len length data (IPDU) ++ */ ++void udp_input(register struct mbuf *m, int iphlen) ++{ ++ Slirp *slirp = m->slirp; ++ register struct ip *ip; ++ register struct udphdr *uh; ++ int len; ++ struct ip save_ip; ++ struct socket *so; ++ struct sockaddr_storage lhost; ++ struct sockaddr_in *lhost4; ++ ++ DEBUG_CALL("udp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("iphlen = %d", iphlen); ++ ++ /* ++ * Strip IP options, if any; should skip this, ++ * make available to user, and use on returned packets, ++ * but we don't yet have a way to check the checksum ++ * with options still present. ++ */ ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ ++ /* ++ * Get IP and UDP header together in first mbuf. ++ */ ++ ip = mtod(m, struct ip *); ++ uh = (struct udphdr *)((char *)ip + iphlen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ if (ip->ip_len != len) { ++ if (len > ip->ip_len) { ++ goto bad; ++ } ++ m_adj(m, len - ip->ip_len); ++ ip->ip_len = len; ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; /* tcp_input subtracts this */ ++ ++ /* ++ * Checksum extended UDP header and data. ++ */ ++ if (uh->uh_sum) { ++ memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ((struct ipovly *)ip)->ih_x1 = 0; ++ ((struct ipovly *)ip)->ih_len = uh->uh_ulen; ++ if (cksum(m, len + sizeof(struct ip))) { ++ goto bad; ++ } ++ } ++ ++ lhost.ss_family = AF_INET; ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ip->ip_src; ++ lhost4->sin_port = uh->uh_sport; ++ ++ /* ++ * handle DHCP/BOOTP ++ */ ++ if (ntohs(uh->uh_dport) == BOOTP_SERVER && ++ (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == 0xffffffff)) { ++ bootp_input(m); ++ goto bad; ++ } ++ ++ /* ++ * handle TFTP ++ */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ /* ++ * Locate pcb for datagram. ++ */ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); ++ ++ if (so == NULL) { ++ /* ++ * If there's no socket for this packet, ++ * create one ++ */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* ++ * Setup fields ++ */ ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = uh->uh_sport; ++ ++ if ((so->so_iptos = udp_tos(so)) == 0) ++ so->so_iptos = ip->ip_tos; ++ ++ /* ++ * XXXXX Here, check if it's in udpexec_list, ++ * and if it is, do the fork_exec() etc. ++ */ ++ } ++ ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; /* XXX */ ++ so->so_fport = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; /* ICMP backup */ ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos) ++{ ++ register struct udpiphdr *ui; ++ int error = 0; ++ ++ DEBUG_CALL("udp_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); ++ DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); ++ ++ /* ++ * Adjust for header ++ */ ++ m->m_data -= sizeof(struct udpiphdr); ++ m->m_len += sizeof(struct udpiphdr); ++ ++ /* ++ * Fill in mbuf with extended UDP header ++ * and addresses and length put into network format. ++ */ ++ ui = mtod(m, struct udpiphdr *); ++ memset(&ui->ui_i.ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ui->ui_x1 = 0; ++ ui->ui_pr = IPPROTO_UDP; ++ ui->ui_len = htons(m->m_len - sizeof(struct ip)); ++ /* XXXXX Check for from-one-location sockets, or from-any-location sockets ++ */ ++ ui->ui_src = saddr->sin_addr; ++ ui->ui_dst = daddr->sin_addr; ++ ui->ui_sport = saddr->sin_port; ++ ui->ui_dport = daddr->sin_port; ++ ui->ui_ulen = ui->ui_len; ++ ++ /* ++ * Stuff checksum and output datagram. ++ */ ++ ui->ui_sum = 0; ++ if ((ui->ui_sum = cksum(m, m->m_len)) == 0) ++ ui->ui_sum = 0xffff; ++ ((struct ip *)ui)->ip_len = m->m_len; ++ ++ ((struct ip *)ui)->ip_ttl = IPDEFTTL; ++ ((struct ip *)ui)->ip_tos = iptos; ++ ++ error = ip_output(so, m); ++ ++ return (error); ++} ++ ++int udp_attach(struct socket *so, unsigned short af) ++{ ++ so->s = slirp_socket(af, SOCK_DGRAM, 0); ++ if (so->s != -1) { ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &so->slirp->udb); ++ } ++ return (so->s); ++} ++ ++void udp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++static const struct tos_t udptos[] = { { 0, 53, IPTOS_LOWDELAY, 0 }, /* DNS */ ++ { 0, 0, 0, 0 } }; ++ ++static uint8_t udp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (udptos[i].tos) { ++ if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || ++ (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { ++ so->so_emu = udptos[i].emu; ++ return udptos[i].tos; ++ } ++ i++; ++ } ++ ++ return 0; ++} ++ ++struct socket *udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ /* TODO: IPv6 */ ++ struct sockaddr_in addr; ++ struct socket *so; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ ++ so = socreate(slirp); ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, 0); ++ if (so->s < 0) { ++ sofree(so); ++ return NULL; ++ } ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &slirp->udb); ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr.s_addr = haddr; ++ addr.sin_port = hport; ++ ++ if (bind(so->s, (struct sockaddr *)&addr, addrlen) < 0) { ++ udp_detach(so); ++ return NULL; ++ } ++ slirp_socket_set_fast_reuse(so->s); ++ ++ getsockname(so->s, (struct sockaddr *)&addr, &addrlen); ++ so->fhost.sin = addr; ++ sotranslate_accept(so); ++ so->so_lfamily = AF_INET; ++ so->so_lport = lport; ++ so->so_laddr.s_addr = laddr; ++ if (flags != SS_FACCEPTONCE) ++ so->so_expire = 0; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED | flags; ++ ++ return so; ++} +diff --git a/slirp/src/udp.h b/slirp/src/udp.h +new file mode 100644 +index 0000000..c3b83fd +--- /dev/null ++++ b/slirp/src/udp.h +@@ -0,0 +1,90 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp.h 8.1 (Berkeley) 6/10/93 ++ * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp ++ */ ++ ++#ifndef UDP_H ++#define UDP_H ++ ++#define UDP_TTL 0x60 ++#define UDP_UDPDATALEN 16192 ++ ++/* ++ * Udp protocol header. ++ * Per RFC 768, September, 1981. ++ */ ++struct udphdr { ++ uint16_t uh_sport; /* source port */ ++ uint16_t uh_dport; /* destination port */ ++ int16_t uh_ulen; /* udp length */ ++ uint16_t uh_sum; /* udp checksum */ ++}; ++ ++/* ++ * UDP kernel structures and variables. ++ */ ++struct udpiphdr { ++ struct ipovly ui_i; /* overlaid ip structure */ ++ struct udphdr ui_u; /* udp header */ ++}; ++#define ui_mbuf ui_i.ih_mbuf.mptr ++#define ui_x1 ui_i.ih_x1 ++#define ui_pr ui_i.ih_pr ++#define ui_len ui_i.ih_len ++#define ui_src ui_i.ih_src ++#define ui_dst ui_i.ih_dst ++#define ui_sport ui_u.uh_sport ++#define ui_dport ui_u.uh_dport ++#define ui_ulen ui_u.uh_ulen ++#define ui_sum ui_u.uh_sum ++ ++/* ++ * Names for UDP sysctl objects ++ */ ++#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ ++#define UDPCTL_MAXID 2 ++ ++struct mbuf; ++ ++void udp_init(Slirp *); ++void udp_cleanup(Slirp *); ++void udp_input(register struct mbuf *, int); ++int udp_attach(struct socket *, unsigned short af); ++void udp_detach(struct socket *); ++struct socket *udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos); ++ ++void udp6_input(register struct mbuf *); ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr); ++ ++#endif +diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c +new file mode 100644 +index 0000000..6f9486b +--- /dev/null ++++ b/slirp/src/udp6.c +@@ -0,0 +1,173 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron ++ */ ++ ++#include "slirp.h" ++#include "udp.h" ++#include "dhcpv6.h" ++ ++void udp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ struct ip6 *ip, save_ip; ++ struct udphdr *uh; ++ int iphlen = sizeof(struct ip6); ++ int len; ++ struct socket *so; ++ struct sockaddr_in6 lhost; ++ ++ DEBUG_CALL("udp6_input"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip6 *); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ uh = mtod(m, struct udphdr *); ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ ++ if (ip6_cksum(m)) { ++ goto bad; ++ } ++ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ if (ntohs(ip->ip_pl) != len) { ++ if (len > ntohs(ip->ip_pl)) { ++ goto bad; ++ } ++ m_adj(m, len - ntohs(ip->ip_pl)); ++ ip->ip_pl = htons(len); ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ ++ /* Locate pcb for datagram. */ ++ lhost.sin6_family = AF_INET6; ++ lhost.sin6_addr = ip->ip_src; ++ lhost.sin6_port = uh->uh_sport; ++ ++ /* handle DHCPv6 */ ++ if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && ++ (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || ++ in6_dhcp_multicast(&ip->ip_dst))) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ dhcpv6_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ /* handle TFTP */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input((struct sockaddr_storage *)&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, ++ (struct sockaddr_storage *)&lhost, NULL); ++ ++ if (so == NULL) { ++ /* If there's no socket for this packet, create one. */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET6) == -1) { ++ DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* Setup fields */ ++ so->so_lfamily = AF_INET6; ++ so->so_laddr6 = ip->ip_src; ++ so->so_lport6 = uh->uh_sport; ++ } ++ ++ so->so_ffamily = AF_INET6; ++ so->so_faddr6 = ip->ip_dst; /* XXX */ ++ so->so_fport6 = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr) ++{ ++ struct ip6 *ip; ++ struct udphdr *uh; ++ ++ DEBUG_CALL("udp6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* adjust for header */ ++ m->m_data -= sizeof(struct udphdr); ++ m->m_len += sizeof(struct udphdr); ++ uh = mtod(m, struct udphdr *); ++ m->m_data -= sizeof(struct ip6); ++ m->m_len += sizeof(struct ip6); ++ ip = mtod(m, struct ip6 *); ++ ++ /* Build IP header */ ++ ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); ++ ip->ip_nh = IPPROTO_UDP; ++ ip->ip_src = saddr->sin6_addr; ++ ip->ip_dst = daddr->sin6_addr; ++ ++ /* Build UDP header */ ++ uh->uh_sport = saddr->sin6_port; ++ uh->uh_dport = daddr->sin6_port; ++ uh->uh_ulen = ip->ip_pl; ++ uh->uh_sum = 0; ++ uh->uh_sum = ip6_cksum(m); ++ if (uh->uh_sum == 0) { ++ uh->uh_sum = 0xffff; ++ } ++ ++ return ip6_output(so, m, 0); ++} +diff --git a/slirp/src/util.c b/slirp/src/util.c +new file mode 100644 +index 0000000..e596087 +--- /dev/null ++++ b/slirp/src/util.c +@@ -0,0 +1,366 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * util.c (mostly based on QEMU os-win32.c) ++ * ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2016 Red Hat, Inc. ++ * ++ * QEMU library functions for win32 which are shared between QEMU and ++ * the QEMU tools. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "util.h" ++ ++#include ++#include ++#include ++ ++#if defined(_WIN32) ++int slirp_inet_aton(const char *cp, struct in_addr *ia) ++{ ++ uint32_t addr = inet_addr(cp); ++ if (addr == 0xffffffff) { ++ return 0; ++ } ++ ia->s_addr = addr; ++ return 1; ++} ++#endif ++ ++void slirp_set_nonblock(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFL); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFL, f | O_NONBLOCK); ++ assert(f != -1); ++#else ++ unsigned long opt = 1; ++ ioctlsocket(fd, FIONBIO, &opt); ++#endif ++} ++ ++static void slirp_set_cloexec(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFD); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); ++ assert(f != -1); ++#endif ++} ++ ++/* ++ * Opens a socket with FD_CLOEXEC set ++ */ ++int slirp_socket(int domain, int type, int protocol) ++{ ++ int ret; ++ ++#ifdef SOCK_CLOEXEC ++ ret = socket(domain, type | SOCK_CLOEXEC, protocol); ++ if (ret != -1 || errno != EINVAL) { ++ return ret; ++ } ++#endif ++ ret = socket(domain, type, protocol); ++ if (ret >= 0) { ++ slirp_set_cloexec(ret); ++ } ++ ++ return ret; ++} ++ ++#ifdef _WIN32 ++static int socket_error(void) ++{ ++ switch (WSAGetLastError()) { ++ case 0: ++ return 0; ++ case WSAEINTR: ++ return EINTR; ++ case WSAEINVAL: ++ return EINVAL; ++ case WSA_INVALID_HANDLE: ++ return EBADF; ++ case WSA_NOT_ENOUGH_MEMORY: ++ return ENOMEM; ++ case WSA_INVALID_PARAMETER: ++ return EINVAL; ++ case WSAENAMETOOLONG: ++ return ENAMETOOLONG; ++ case WSAENOTEMPTY: ++ return ENOTEMPTY; ++ case WSAEWOULDBLOCK: ++ /* not using EWOULDBLOCK as we don't want code to have ++ * to check both EWOULDBLOCK and EAGAIN */ ++ return EAGAIN; ++ case WSAEINPROGRESS: ++ return EINPROGRESS; ++ case WSAEALREADY: ++ return EALREADY; ++ case WSAENOTSOCK: ++ return ENOTSOCK; ++ case WSAEDESTADDRREQ: ++ return EDESTADDRREQ; ++ case WSAEMSGSIZE: ++ return EMSGSIZE; ++ case WSAEPROTOTYPE: ++ return EPROTOTYPE; ++ case WSAENOPROTOOPT: ++ return ENOPROTOOPT; ++ case WSAEPROTONOSUPPORT: ++ return EPROTONOSUPPORT; ++ case WSAEOPNOTSUPP: ++ return EOPNOTSUPP; ++ case WSAEAFNOSUPPORT: ++ return EAFNOSUPPORT; ++ case WSAEADDRINUSE: ++ return EADDRINUSE; ++ case WSAEADDRNOTAVAIL: ++ return EADDRNOTAVAIL; ++ case WSAENETDOWN: ++ return ENETDOWN; ++ case WSAENETUNREACH: ++ return ENETUNREACH; ++ case WSAENETRESET: ++ return ENETRESET; ++ case WSAECONNABORTED: ++ return ECONNABORTED; ++ case WSAECONNRESET: ++ return ECONNRESET; ++ case WSAENOBUFS: ++ return ENOBUFS; ++ case WSAEISCONN: ++ return EISCONN; ++ case WSAENOTCONN: ++ return ENOTCONN; ++ case WSAETIMEDOUT: ++ return ETIMEDOUT; ++ case WSAECONNREFUSED: ++ return ECONNREFUSED; ++ case WSAELOOP: ++ return ELOOP; ++ case WSAEHOSTUNREACH: ++ return EHOSTUNREACH; ++ default: ++ return EIO; ++ } ++} ++ ++#undef ioctlsocket ++int slirp_ioctlsocket_wrap(int fd, int req, void *val) ++{ ++ int ret; ++ ret = ioctlsocket(fd, req, val); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef closesocket ++int slirp_closesocket_wrap(int fd) ++{ ++ int ret; ++ ret = closesocket(fd); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef connect ++int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = connect(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef listen ++int slirp_listen_wrap(int sockfd, int backlog) ++{ ++ int ret; ++ ret = listen(sockfd, backlog); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef bind ++int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = bind(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef socket ++int slirp_socket_wrap(int domain, int type, int protocol) ++{ ++ int ret; ++ ret = socket(domain, type, protocol); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef accept ++int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = accept(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef shutdown ++int slirp_shutdown_wrap(int sockfd, int how) ++{ ++ int ret; ++ ret = shutdown(sockfd, how); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockopt ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen) ++{ ++ int ret; ++ ret = getsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef setsockopt ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen) ++{ ++ int ret; ++ ret = setsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getpeername ++int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getpeername(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockname ++int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getsockname(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef send ++ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = send(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef sendto ++ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, ++ const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = sendto(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recv ++ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = recv(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recvfrom ++ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, ++ struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++#endif /* WIN32 */ ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str) ++{ ++ int c; ++ char *q = buf; ++ ++ if (buf_size <= 0) ++ return; ++ ++ for (;;) { ++ c = *str++; ++ if (c == 0 || q >= buf + buf_size - 1) ++ break; ++ *q++ = c; ++ } ++ *q = '\0'; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +new file mode 100644 +index 0000000..3c6223c +--- /dev/null ++++ b/slirp/src/util.h +@@ -0,0 +1,180 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2019 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#ifndef UTIL_H_ ++#define UTIL_H_ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#else ++#include ++#include ++#include ++#endif ++ ++#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) ++#define SLIRP_PACKED __attribute__((gcc_struct, packed)) ++#else ++#define SLIRP_PACKED __attribute__((packed)) ++#endif ++ ++#ifndef DIV_ROUND_UP ++#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) ++#endif ++ ++#ifndef container_of ++#define container_of(ptr, type, member) \ ++ __extension__({ \ ++ void *__mptr = (void *)(ptr); \ ++ ((type *)(__mptr - offsetof(type, member))); \ ++ }) ++#endif ++ ++#if defined(_WIN32) /* CONFIG_IOVEC */ ++#if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ ++struct iovec { ++ void *iov_base; ++ size_t iov_len; ++}; ++#endif ++#else ++#include ++#endif ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++#define SCALE_MS 1000000 ++ ++#define ETH_ALEN 6 ++#define ETH_HLEN 14 ++#define ETH_P_IP (0x0800) /* Internet Protocol packet */ ++#define ETH_P_ARP (0x0806) /* Address Resolution packet */ ++#define ETH_P_IPV6 (0x86dd) ++#define ETH_P_VLAN (0x8100) ++#define ETH_P_DVLAN (0x88a8) ++#define ETH_P_NCSI (0x88f8) ++#define ETH_P_UNKNOWN (0xffff) ++ ++/* FIXME: remove me when made standalone */ ++#ifdef _WIN32 ++#undef accept ++#undef bind ++#undef closesocket ++#undef connect ++#undef getpeername ++#undef getsockname ++#undef getsockopt ++#undef ioctlsocket ++#undef listen ++#undef recv ++#undef recvfrom ++#undef send ++#undef sendto ++#undef setsockopt ++#undef shutdown ++#undef socket ++#endif ++ ++#ifdef _WIN32 ++#define connect slirp_connect_wrap ++int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define listen slirp_listen_wrap ++int slirp_listen_wrap(int fd, int backlog); ++#define bind slirp_bind_wrap ++int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define socket slirp_socket_wrap ++int slirp_socket_wrap(int domain, int type, int protocol); ++#define accept slirp_accept_wrap ++int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define shutdown slirp_shutdown_wrap ++int slirp_shutdown_wrap(int fd, int how); ++#define getpeername slirp_getpeername_wrap ++int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define getsockname slirp_getsockname_wrap ++int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define send slirp_send_wrap ++ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); ++#define sendto slirp_sendto_wrap ++ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, ++ const struct sockaddr *dest_addr, int addrlen); ++#define recv slirp_recv_wrap ++ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); ++#define recvfrom slirp_recvfrom_wrap ++ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, ++ struct sockaddr *src_addr, int *addrlen); ++#define closesocket slirp_closesocket_wrap ++int slirp_closesocket_wrap(int fd); ++#define ioctlsocket slirp_ioctlsocket_wrap ++int slirp_ioctlsocket_wrap(int fd, int req, void *val); ++#define getsockopt slirp_getsockopt_wrap ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen); ++#define setsockopt slirp_setsockopt_wrap ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen); ++#define inet_aton slirp_inet_aton ++int slirp_inet_aton(const char *cp, struct in_addr *ia); ++#else ++#define closesocket(s) close(s) ++#define ioctlsocket(s, r, v) ioctl(s, r, v) ++#endif ++ ++int slirp_socket(int domain, int type, int protocol); ++void slirp_set_nonblock(int fd); ++ ++static inline int slirp_socket_set_nodelay(int fd) ++{ ++ int v = 1; ++ return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_fast_reuse(int fd) ++{ ++#ifndef _WIN32 ++ int v = 1; ++ return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); ++#else ++ /* Enabling the reuse of an endpoint that was used by a socket still in ++ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows ++ * fast reuse is the default and SO_REUSEADDR does strange things. So we ++ * don't have to do anything here. More info can be found at: ++ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ ++ return 0; ++#endif ++} ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str); ++ ++#endif +diff --git a/slirp/src/version.c b/slirp/src/version.c +new file mode 100644 +index 0000000..a837323 +--- /dev/null ++++ b/slirp/src/version.c +@@ -0,0 +1,11 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#include "libslirp.h" ++#include "util.h" ++ ++const char * ++slirp_version_string(void) ++{ ++ return stringify(SLIRP_MAJOR_VERSION) "." ++ stringify(SLIRP_MINOR_VERSION) "." ++ stringify(SLIRP_MICRO_VERSION); ++} +diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c +new file mode 100644 +index 0000000..8c544eb +--- /dev/null ++++ b/slirp/src/vmstate.c +@@ -0,0 +1,445 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * VMState interpreter ++ * ++ * Copyright (c) 2009-2018 Red Hat Inc ++ * ++ * Authors: ++ * Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "stream.h" ++#include "vmstate.h" ++ ++static int get_nullptr(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { ++ return 0; ++ } ++ g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); ++ return -EINVAL; ++} ++ ++static int put_nullptr(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++ ++{ ++ if (pv == NULL) { ++ slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); ++ return 0; ++ } ++ g_warning("vmstate: put_nullptr must be called with pv == NULL"); ++ return -EINVAL; ++} ++ ++const VMStateInfo slirp_vmstate_info_nullptr = { ++ .name = "uint64", ++ .get = get_nullptr, ++ .put = put_nullptr, ++}; ++ ++/* 8 bit unsigned int */ ++ ++static int get_uint8(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ *v = slirp_istream_read_u8(f); ++ return 0; ++} ++ ++static int put_uint8(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ slirp_ostream_write_u8(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint8 = { ++ .name = "uint8", ++ .get = get_uint8, ++ .put = put_uint8, ++}; ++ ++/* 16 bit unsigned int */ ++ ++static int get_uint16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ *v = slirp_istream_read_u16(f); ++ return 0; ++} ++ ++static int put_uint16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ slirp_ostream_write_u16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint16 = { ++ .name = "uint16", ++ .get = get_uint16, ++ .put = put_uint16, ++}; ++ ++/* 32 bit unsigned int */ ++ ++static int get_uint32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ *v = slirp_istream_read_u32(f); ++ return 0; ++} ++ ++static int put_uint32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ slirp_ostream_write_u32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint32 = { ++ .name = "uint32", ++ .get = get_uint32, ++ .put = put_uint32, ++}; ++ ++/* 16 bit int */ ++ ++static int get_int16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ *v = slirp_istream_read_i16(f); ++ return 0; ++} ++ ++static int put_int16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ slirp_ostream_write_i16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int16 = { ++ .name = "int16", ++ .get = get_int16, ++ .put = put_int16, ++}; ++ ++/* 32 bit int */ ++ ++static int get_int32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ *v = slirp_istream_read_i32(f); ++ return 0; ++} ++ ++static int put_int32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ slirp_ostream_write_i32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int32 = { ++ .name = "int32", ++ .get = get_int32, ++ .put = put_int32, ++}; ++ ++/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate ++ * a temporary buffer and the pre_load/pre_save methods in the child vmsd ++ * copy stuff from the parent into the child and do calculations to fill ++ * in fields that don't really exist in the parent but need to be in the ++ * stream. ++ */ ++static int get_tmp(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int ret; ++ const VMStateDescription *vmsd = field->vmsd; ++ int version_id = field->version_id; ++ void *tmp = g_malloc(size); ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); ++ g_free(tmp); ++ return ret; ++} ++ ++static int put_tmp(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ const VMStateDescription *vmsd = field->vmsd; ++ void *tmp = g_malloc(size); ++ int ret; ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_save_state(f, vmsd, tmp); ++ g_free(tmp); ++ ++ return ret; ++} ++ ++const VMStateInfo slirp_vmstate_info_tmp = { ++ .name = "tmp", ++ .get = get_tmp, ++ .put = put_tmp, ++}; ++ ++/* uint8_t buffers */ ++ ++static int get_buffer(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_istream_read(f, pv, size); ++ return 0; ++} ++ ++static int put_buffer(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_ostream_write(f, pv, size); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_buffer = { ++ .name = "buffer", ++ .get = get_buffer, ++ .put = put_buffer, ++}; ++ ++static int vmstate_n_elems(void *opaque, const VMStateField *field) ++{ ++ int n_elems = 1; ++ ++ if (field->flags & VMS_ARRAY) { ++ n_elems = field->num; ++ } else if (field->flags & VMS_VARRAY_INT32) { ++ n_elems = *(int32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT32) { ++ n_elems = *(uint32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT16) { ++ n_elems = *(uint16_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT8) { ++ n_elems = *(uint8_t *)(opaque + field->num_offset); ++ } ++ ++ if (field->flags & VMS_MULTIPLY_ELEMENTS) { ++ n_elems *= field->num; ++ } ++ ++ return n_elems; ++} ++ ++static int vmstate_size(void *opaque, const VMStateField *field) ++{ ++ int size = field->size; ++ ++ if (field->flags & VMS_VBUFFER) { ++ size = *(int32_t *)(opaque + field->size_offset); ++ if (field->flags & VMS_MULTIPLY) { ++ size *= field->size; ++ } ++ } ++ ++ return size; ++} ++ ++static int vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ int ret = 0; ++ const VMStateField *field = vmsd->fields; ++ ++ if (vmsd->pre_save) { ++ ret = vmsd->pre_save(opaque); ++ if (ret) { ++ g_warning("pre-save failed: %s", vmsd->name); ++ return ret; ++ } ++ } ++ ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ret = 0; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ assert(curr_elem); ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer write placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = vmstate_save_state_v(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->put(f, curr_elem, size, field); ++ } ++ if (ret) { ++ g_warning("Save of field %s/%s failed", vmsd->name, ++ field->name); ++ return ret; ++ } ++ } ++ } else { ++ if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Output state validation failed: %s/%s", vmsd->name, ++ field->name); ++ assert(!(field->flags & VMS_MUST_EXIST)); ++ } ++ } ++ field++; ++ } ++ ++ return 0; ++} ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque) ++{ ++ return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); ++} ++ ++static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) ++{ ++ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { ++ size_t size = vmstate_size(opaque, field); ++ size *= vmstate_n_elems(opaque, field); ++ if (size) { ++ *(void **)ptr = g_malloc(size); ++ } ++ } ++} ++ ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ VMStateField *field = vmsd->fields; ++ int ret = 0; ++ ++ if (version_id > vmsd->version_id) { ++ g_warning("%s: incoming version_id %d is too new " ++ "for local version_id %d", ++ vmsd->name, version_id, vmsd->version_id); ++ return -EINVAL; ++ } ++ if (vmsd->pre_load) { ++ int ret = vmsd->pre_load(opaque); ++ if (ret) { ++ return ret; ++ } ++ } ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ vmstate_handle_alloc(first_elem, field, opaque); ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer check placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->vmsd->version_id); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->get(f, curr_elem, size, field); ++ } ++ if (ret < 0) { ++ g_warning("Failed to load %s:%s", vmsd->name, field->name); ++ return ret; ++ } ++ } ++ } else if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Input validation failed: %s/%s", vmsd->name, ++ field->name); ++ return -1; ++ } ++ field++; ++ } ++ if (vmsd->post_load) { ++ ret = vmsd->post_load(opaque, version_id); ++ } ++ return ret; ++} +diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h +new file mode 100644 +index 0000000..94c6a4b +--- /dev/null ++++ b/slirp/src/vmstate.h +@@ -0,0 +1,391 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * QEMU migration/snapshot declarations ++ * ++ * Copyright (c) 2009-2011 Red Hat, Inc. ++ * ++ * Original author: Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef VMSTATE_H_ ++#define VMSTATE_H_ ++ ++#include ++#include ++#include ++#include "slirp.h" ++#include "stream.h" ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++typedef struct VMStateInfo VMStateInfo; ++typedef struct VMStateDescription VMStateDescription; ++typedef struct VMStateField VMStateField; ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque); ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id); ++ ++/* VMStateInfo allows customized migration of objects that don't fit in ++ * any category in VMStateFlags. Additional information is always passed ++ * into get and put in terms of field and vmdesc parameters. However ++ * these two parameters should only be used in cases when customized ++ * handling is needed, such as QTAILQ. For primitive data types such as ++ * integer, field and vmdesc parameters should be ignored inside get/put. ++ */ ++struct VMStateInfo { ++ const char *name; ++ int (*get)(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field); ++ int (*put)(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field); ++}; ++ ++enum VMStateFlags { ++ /* Ignored */ ++ VMS_SINGLE = 0x001, ++ ++ /* The struct member at opaque + VMStateField.offset is a pointer ++ * to the actual field (e.g. struct a { uint8_t *b; ++ * }). Dereference the pointer before using it as basis for ++ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not ++ * affect the meaning of VMStateField.num_offset or ++ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for ++ * those. */ ++ VMS_POINTER = 0x002, ++ ++ /* The field is an array of fixed size. VMStateField.num contains ++ * the number of entries in the array. The size of each entry is ++ * given by VMStateField.size and / or opaque + ++ * VMStateField.size_offset; see VMS_VBUFFER and ++ * VMS_MULTIPLY. Each array entry will be processed individually ++ * (VMStateField.info.get()/put() if VMS_STRUCT is not set, ++ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not ++ * be combined with VMS_VARRAY*. */ ++ VMS_ARRAY = 0x004, ++ ++ /* The field is itself a struct, containing one or more ++ * fields. Recurse into VMStateField.vmsd. Most useful in ++ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each ++ * array entry. */ ++ VMS_STRUCT = 0x008, ++ ++ /* The field is an array of variable size. The int32_t at opaque + ++ * VMStateField.num_offset contains the number of entries in the ++ * array. See the VMS_ARRAY description regarding array handling ++ * in general. May not be combined with VMS_ARRAY or any other ++ * VMS_VARRAY*. */ ++ VMS_VARRAY_INT32 = 0x010, ++ ++ /* Ignored */ ++ VMS_BUFFER = 0x020, ++ ++ /* The field is a (fixed-size or variable-size) array of pointers ++ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry ++ * before using it. Note: Does not imply any one of VMS_ARRAY / ++ * VMS_VARRAY*; these need to be set explicitly. */ ++ VMS_ARRAY_OF_POINTER = 0x040, ++ ++ /* The field is an array of variable size. The uint16_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT16 = 0x080, ++ ++ /* The size of the individual entries (a single array entry if ++ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if ++ * neither is set) is variable (i.e. not known at compile-time), ++ * but the same for all entries. Use the int32_t at opaque + ++ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine ++ * the size of each (and every) entry. */ ++ VMS_VBUFFER = 0x100, ++ ++ /* Multiply the entry size given by the int32_t at opaque + ++ * VMStateField.size_offset (see VMS_VBUFFER description) with ++ * VMStateField.size to determine the number of bytes to be ++ * allocated. Only valid in combination with VMS_VBUFFER. */ ++ VMS_MULTIPLY = 0x200, ++ ++ /* The field is an array of variable size. The uint8_t at opaque + ++ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT8 = 0x400, ++ ++ /* The field is an array of variable size. The uint32_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT32 = 0x800, ++ ++ /* Fail loading the serialised VM state if this field is missing ++ * from the input. */ ++ VMS_MUST_EXIST = 0x1000, ++ ++ /* When loading serialised VM state, allocate memory for the ++ * (entire) field. Only valid in combination with ++ * VMS_POINTER. Note: Not all combinations with other flags are ++ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't ++ * cause the individual entries to be allocated. */ ++ VMS_ALLOC = 0x2000, ++ ++ /* Multiply the number of entries given by the integer at opaque + ++ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num ++ * to determine the number of entries in the array. Only valid in ++ * combination with one of VMS_VARRAY*. */ ++ VMS_MULTIPLY_ELEMENTS = 0x4000, ++ ++ /* A structure field that is like VMS_STRUCT, but uses ++ * VMStateField.struct_version_id to tell which version of the ++ * structure we are referencing to use. */ ++ VMS_VSTRUCT = 0x8000, ++}; ++ ++struct VMStateField { ++ const char *name; ++ size_t offset; ++ size_t size; ++ size_t start; ++ int num; ++ size_t num_offset; ++ size_t size_offset; ++ const VMStateInfo *info; ++ enum VMStateFlags flags; ++ const VMStateDescription *vmsd; ++ int version_id; ++ int struct_version_id; ++ bool (*field_exists)(void *opaque, int version_id); ++}; ++ ++struct VMStateDescription { ++ const char *name; ++ int version_id; ++ int (*pre_load)(void *opaque); ++ int (*post_load)(void *opaque, int version_id); ++ int (*pre_save)(void *opaque); ++ VMStateField *fields; ++}; ++ ++ ++extern const VMStateInfo slirp_vmstate_info_int16; ++extern const VMStateInfo slirp_vmstate_info_int32; ++extern const VMStateInfo slirp_vmstate_info_uint8; ++extern const VMStateInfo slirp_vmstate_info_uint16; ++extern const VMStateInfo slirp_vmstate_info_uint32; ++ ++/** Put this in the stream when migrating a null pointer.*/ ++#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ ++extern const VMStateInfo slirp_vmstate_info_nullptr; ++ ++extern const VMStateInfo slirp_vmstate_info_buffer; ++extern const VMStateInfo slirp_vmstate_info_tmp; ++ ++#define type_check_array(t1, t2, n) ((t1(*)[n])0 - (t2 *)0) ++#define type_check_pointer(t1, t2) ((t1 **)0 - (t2 *)0) ++#define typeof_field(type, field) typeof(((type *)0)->field) ++#define type_check(t1, t2) ((t1 *)0 - (t2 *)0) ++ ++#define vmstate_offset_value(_state, _field, _type) \ ++ (offsetof(_state, _field) + type_check(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_pointer(_state, _field, _type) \ ++ (offsetof(_state, _field) + \ ++ type_check_pointer(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_array(_state, _field, _type, _num) \ ++ (offsetof(_state, _field) + \ ++ type_check_array(_type, typeof_field(_state, _field), _num)) ++ ++#define vmstate_offset_buffer(_state, _field) \ ++ vmstate_offset_array(_state, _field, uint8_t, \ ++ sizeof(typeof_field(_state, _field))) ++ ++/* In the macros below, if there is a _version, that means the macro's ++ * field will be processed only if the version being received is >= ++ * the _version specified. In general, if you add a new field, you ++ * would increment the structure's version and put that version ++ * number into the new field so it would only be processed with the ++ * new version. ++ * ++ * In particular, for VMSTATE_STRUCT() and friends the _version does ++ * *NOT* pick the version of the sub-structure. It works just as ++ * specified above. The version of the top-level structure received ++ * is passed down to all sub-structures. This means that the ++ * sub-structures must have version that are compatible with all the ++ * structures that use them. ++ * ++ * If you want to specify the version of the sub-structure, use ++ * VMSTATE_VSTRUCT(), which allows the specific sub-structure version ++ * to be directly specified. ++ */ ++ ++#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = sizeof(_type), .info = &(_info), \ ++ .flags = VMS_SINGLE, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), .num = (_num), \ ++ .info = &(_info), .size = sizeof(_type), .flags = VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .vmsd = &(_vmsd), .size = sizeof(_type *), \ ++ .flags = VMS_STRUCT | VMS_POINTER, \ ++ .offset = vmstate_offset_pointer(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, \ ++ _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .num = (_num), .field_exists = (_test), \ ++ .version_id = (_version), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT | VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = (_size - _start), \ ++ .info = &slirp_vmstate_info_buffer, .flags = VMS_BUFFER, \ ++ .offset = vmstate_offset_buffer(_state, _field) + _start, \ ++ } ++ ++#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), \ ++ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t), \ ++ .info = &slirp_vmstate_info_buffer, \ ++ .flags = VMS_VBUFFER | VMS_POINTER, \ ++ .offset = offsetof(_state, _field), \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_STRUCT(x) \ ++ struct { \ ++ int : (x) ? -1 : 1; \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_ZERO(x) \ ++ (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) ++ ++/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state ++ * and execute the vmsd on the temporary. Note that we're working with ++ * the whole of _state here, not a field within it. ++ * We compile time check that: ++ * That _tmp_type contains a 'parent' member that's a pointer to the ++ * '_state' type ++ * That the pointer is right at the start of _tmp_type. ++ */ ++#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \ ++ { \ ++ .name = "tmp", \ ++ .size = sizeof(_tmp_type) + \ ++ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ ++ type_check_pointer(_state, typeof_field(_tmp_type, parent)), \ ++ .vmsd = &(_vmsd), .info = &slirp_vmstate_info_tmp, \ ++ } ++ ++#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ ++ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) ++ ++#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ ++ VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, _vmsd, \ ++ _type) ++ ++#define VMSTATE_INT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) ++#define VMSTATE_INT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) ++ ++#define VMSTATE_UINT8_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) ++#define VMSTATE_UINT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) ++#define VMSTATE_UINT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16(_f, _s) VMSTATE_INT16_V(_f, _s, 0) ++#define VMSTATE_INT32(_f, _s) VMSTATE_INT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT8(_f, _s) VMSTATE_UINT8_V(_f, _s, 0) ++#define VMSTATE_UINT16(_f, _s) VMSTATE_UINT16_V(_f, _s, 0) ++#define VMSTATE_UINT32(_f, _s) VMSTATE_UINT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT16_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) ++ ++#define VMSTATE_UINT32_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ ++ VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) ++ ++#define VMSTATE_INT16_ARRAY(_f, _s, _n) VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) ++ ++#define VMSTATE_BUFFER_V(_f, _s, _v) \ ++ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) ++ ++#define VMSTATE_BUFFER(_f, _s) VMSTATE_BUFFER_V(_f, _s, 0) ++ ++#define VMSTATE_END_OF_LIST() \ ++ { \ ++ } ++ ++#endif +-- +1.8.3.1 + diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index a42274e..705f594 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 8b38b3cb83404f47ba268958cec8121c674b8153 Mon Sep 17 00:00:00 2001 +From 1285db562351e2233413d163bfef3ed002b10259 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -40,21 +40,14 @@ Rebase notes (4.1.0): - Removed new unpacked files - Update configure line to use new options -Rebase notes (weekly-190823): +Rebase notes (4.2.0): - Disable iotest run during make check - -Rebase notes (weekly-190906): - README renamed to README.rst (upstream) - Removed ui-spice-app.so - -Rebase notes (weekly-190913): - Added relevant changes from "505f7f4 redhat: Adding slirp to the exploded tree" - -Rebase notes (weekly-190927): - Removed qemu-ga.8 install from spec file - installed by make - -Rebase notes (weekly-191011): - Removed spapr-rtas.bin (upstream) +- Require newer SLOF (20191022) Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file @@ -86,10 +79,10 @@ Signed-off-by: Danilo C. L. de Paula Makefile | 3 +- configure | 1 + os-posix.c | 2 +- - redhat/Makefile | 82 + + redhat/Makefile | 82 ++ redhat/Makefile.common | 51 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2369 +++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2369 +++++++++++++++++++++++++++++++++++++ redhat/scripts/process-patches.sh | 7 +- tests/Makefile.include | 2 +- ui/vnc.c | 2 +- @@ -100,7 +93,7 @@ Signed-off-by: Danilo C. L. de Paula create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index b437a346d7..086727dbb9 100644 +index b437a34..086727d 100644 --- a/Makefile +++ b/Makefile @@ -512,6 +512,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM @@ -121,7 +114,7 @@ index b437a346d7..086727dbb9 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index 6099be1d84..16564f8ccc 100755 +index 6099be1..16564f8 100755 --- a/configure +++ b/configure @@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then @@ -133,7 +126,7 @@ index 6099be1d84..16564f8ccc 100755 # xen probe diff --git a/os-posix.c b/os-posix.c -index 86cffd2c7d..1c9f86768d 100644 +index 86cffd2..1c9f867 100644 --- a/os-posix.c +++ b/os-posix.c @@ -83,7 +83,7 @@ void os_setup_signal_handling(void) @@ -146,7 +139,7 @@ index 86cffd2c7d..1c9f86768d 100644 char *os_find_datadir(void) { diff --git a/tests/Makefile.include b/tests/Makefile.include -index 8566f5f119..b483790cf3 100644 +index 8566f5f..b483790 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -1194,7 +1194,7 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) @@ -159,7 +152,7 @@ index 8566f5f119..b483790cf3 100644 rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) diff --git a/ui/vnc.c b/ui/vnc.c -index 87b8045afe..ecf6276f5b 100644 +index 87b8045..ecf6276 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -3987,7 +3987,7 @@ void vnc_display_open(const char *id, Error **errp) @@ -172,5 +165,5 @@ index 87b8045afe..ecf6276f5b 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.21.0 +1.8.3.1 diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index f6c3309..465f9cd 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 067b5ced8f6f2ee7cd44cfe8e17021974f403206 Mon Sep 17 00:00:00 2001 +From e75b1c31b76463b4b8f30cb6dbd23ded014e9269 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -35,6 +35,10 @@ Rebase notes (4.2.0-rc0): - Use conditional build for isa-superio.c (upstream change) - Rename PCI_PIIX to PCI_I440FX (upstream change) +Rebase notes (4.2.0-rc3): +- Disabled ccid-card-emulated (patch 92566) +- Disabled vfio-pci-igd-lpc-bridge (patch 92565) + Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV - 4b889f3 Declare cirrus-vga as deprecated @@ -53,51 +57,44 @@ Merged patches (4.1.0): Merged patches (weekly-190823): - f7587dd RHEL: disable hostmem-memfd -Conflicts: - hw/isa/Makefile.objs - -Conflicts: - hw/pci-host/i440fx.c - Signed-off-by: Danilo C. L. de Paula --- Makefile.objs | 4 +- backends/Makefile.objs | 3 +- - default-configs/aarch64-rh-devices.mak | 20 +++++ - default-configs/aarch64-softmmu.mak | 10 ++- - default-configs/ppc64-rh-devices.mak | 32 ++++++++ - default-configs/ppc64-softmmu.mak | 8 +- - default-configs/rh-virtio.mak | 10 +++ - default-configs/s390x-rh-devices.mak | 15 ++++ + default-configs/aarch64-rh-devices.mak | 20 +++++++ + default-configs/aarch64-softmmu.mak | 10 ++-- + default-configs/ppc64-rh-devices.mak | 32 +++++++++++ + default-configs/ppc64-softmmu.mak | 8 ++- + default-configs/rh-virtio.mak | 10 ++++ + default-configs/s390x-rh-devices.mak | 15 +++++ default-configs/s390x-softmmu.mak | 4 +- - default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ + default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++++++++++ default-configs/x86_64-softmmu.mak | 4 +- hw/acpi/ich9.c | 4 +- hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 10 +++ + hw/block/fdc.c | 10 ++++ hw/bt/Makefile.objs | 4 +- hw/cpu/Makefile.objs | 5 +- hw/display/Makefile.objs | 5 +- hw/display/cirrus_vga.c | 3 + - hw/i386/pc_piix.c | 2 + hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + - hw/pci-host/i440fx.c | 4 + + hw/pci-host/i440fx.c | 4 ++ hw/ppc/spapr_cpu_core.c | 2 + - hw/usb/ccid-card-emulated.c | 2 + - hw/vfio/pci-quirks.c | 5 ++ + hw/usb/Makefile.objs | 4 +- + hw/vfio/pci-quirks.c | 9 +++ hw/vfio/pci.c | 5 ++ - qemu-options.hx | 7 +- + qemu-options.hx | 7 +-- redhat/qemu-kvm.spec.template | 5 +- target/arm/cpu.c | 4 +- - target/i386/cpu.c | 35 +++++++-- - target/ppc/cpu-models.c | 10 +++ + target/i386/cpu.c | 35 +++++++++--- + target/ppc/cpu-models.c | 10 ++++ target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 ++ + target/s390x/kvm.c | 8 +++ util/memfd.c | 2 +- - vl.c | 8 +- - 36 files changed, 314 insertions(+), 40 deletions(-) + vl.c | 8 ++- + 35 files changed, 317 insertions(+), 41 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak create mode 100644 default-configs/rh-virtio.mak @@ -105,7 +102,7 @@ Signed-off-by: Danilo C. L. de Paula create mode 100644 default-configs/x86_64-rh-devices.mak diff --git a/Makefile.objs b/Makefile.objs -index 11ba1a36bd..fcf63e1096 100644 +index 11ba1a3..fcf63e1 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -65,8 +65,8 @@ common-obj-y += replay/ @@ -120,7 +117,7 @@ index 11ba1a36bd..fcf63e1096 100644 common-obj-y += dma-helpers.o common-obj-y += vl.o diff --git a/backends/Makefile.objs b/backends/Makefile.objs -index f0691116e8..f328d404bf 100644 +index f069111..f328d40 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs @@ -16,4 +16,5 @@ endif @@ -132,7 +129,7 @@ index f0691116e8..f328d404bf 100644 +# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..a1ed641174 +index 0000000..a1ed641 --- /dev/null +++ b/default-configs/aarch64-rh-devices.mak @@ -0,0 +1,20 @@ @@ -157,7 +154,7 @@ index 0000000000..a1ed641174 +CONFIG_VIRTIO_PCI=y +CONFIG_XIO3130=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 958b1e08e4..8f6867d48a 100644 +index 958b1e0..8f6867d 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak @@ -1,8 +1,10 @@ @@ -177,7 +174,7 @@ index 958b1e08e4..8f6867d48a 100644 +include aarch64-rh-devices.mak diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..35f2106d06 +index 0000000..35f2106 --- /dev/null +++ b/default-configs/ppc64-rh-devices.mak @@ -0,0 +1,32 @@ @@ -214,7 +211,7 @@ index 0000000000..35f2106d06 +CONFIG_XIVE_SPAPR=y +CONFIG_XIVE_KVM=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index cca52665d9..fec354f327 100644 +index cca5266..fec354f 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -1,10 +1,12 @@ @@ -235,7 +232,7 @@ index cca52665d9..fec354f327 100644 +include ppc64-rh-devices.mak diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak new file mode 100644 -index 0000000000..94ede1b5f6 +index 0000000..94ede1b --- /dev/null +++ b/default-configs/rh-virtio.mak @@ -0,0 +1,10 @@ @@ -251,7 +248,7 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak new file mode 100644 -index 0000000000..c3c73fe752 +index 0000000..c3c73fe --- /dev/null +++ b/default-configs/s390x-rh-devices.mak @@ -0,0 +1,15 @@ @@ -271,7 +268,7 @@ index 0000000000..c3c73fe752 +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index f2287a133f..3e2e388e91 100644 +index f2287a1..3e2e388 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -10,4 +10,6 @@ @@ -284,7 +281,7 @@ index f2287a133f..3e2e388e91 100644 +include s390x-rh-devices.mak diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..d59b6d9bb5 +index 0000000..d59b6d9 --- /dev/null +++ b/default-configs/x86_64-rh-devices.mak @@ -0,0 +1,100 @@ @@ -389,7 +386,7 @@ index 0000000000..d59b6d9bb5 +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 64b2ee2960..b5de7e5279 100644 +index 64b2ee2..b5de7e5 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -1,3 +1,5 @@ @@ -400,7 +397,7 @@ index 64b2ee2960..b5de7e5279 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 2034dd749e..ab203ad448 100644 +index 2034dd7..ab203ad 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -449,8 +449,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) @@ -415,7 +412,7 @@ index 2034dd749e..ab203ad448 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index fe749f65fd..2aa1a9efdd 100644 +index fe749f6..2aa1a9e 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o @@ -428,7 +425,7 @@ index fe749f65fd..2aa1a9efdd 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index ac5d31e8c1..e925bac002 100644 +index ac5d31e..e925bac 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -46,6 +46,8 @@ @@ -456,7 +453,7 @@ index ac5d31e8c1..e925bac002 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); } diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs -index 867a7d2e8a..e678e9ee3c 100644 +index 867a7d2..e678e9e 100644 --- a/hw/bt/Makefile.objs +++ b/hw/bt/Makefile.objs @@ -1,3 +1,3 @@ @@ -466,7 +463,7 @@ index 867a7d2e8a..e678e9ee3c 100644 +#common-obj-y += hci-csr.o diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs -index 8db9e8a7b3..1601ea93c7 100644 +index 8db9e8a..1601ea9 100644 --- a/hw/cpu/Makefile.objs +++ b/hw/cpu/Makefile.objs @@ -1,5 +1,6 @@ @@ -479,7 +476,7 @@ index 8db9e8a7b3..1601ea93c7 100644 +common-obj-y += core.o +# cluster.o diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index f2182e3bef..3d0cda1b52 100644 +index f2182e3..3d0cda1 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs @@ -1,8 +1,9 @@ @@ -495,7 +492,7 @@ index f2182e3bef..3d0cda1b52 100644 common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index cd283e53b4..93afa26fda 100644 +index cd283e5..93afa26 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) @@ -508,22 +505,8 @@ index cd283e53b4..93afa26fda 100644 /* follow real hardware, cirrus card emulated has 4 MB video memory. Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 2aefa3b8df..a19f8058ab 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -78,7 +78,9 @@ static void pc_init1(MachineState *machine, - X86MachineState *x86ms = X86_MACHINE(machine); - MemoryRegion *system_memory = get_system_memory(); - MemoryRegion *system_io = get_system_io(); -+#ifdef CONFIG_IDE_ISA - int i; -+#endif - PCIBus *pci_bus; - ISABus *isa_bus; - PCII440FXState *i440fx_state; diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index db313dd3b1..e14858ca64 100644 +index db313dd..e14858c 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -251,7 +251,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) @@ -546,7 +529,7 @@ index db313dd3b1..e14858ca64 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index f0acfd86f7..390eb6579c 100644 +index f0acfd8..390eb65 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -571,6 +571,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) @@ -559,7 +542,7 @@ index f0acfd86f7..390eb6579c 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a73f8d404e..fc73fdd6fa 100644 +index a73f8d4..fc73fdd 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1795,6 +1795,7 @@ static const E1000Info e1000_devices[] = { @@ -579,7 +562,7 @@ index a73f8d404e..fc73fdd6fa 100644 static void e1000_register_types(void) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c -index f27131102d..17f10efae2 100644 +index f271311..17f10ef 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -386,6 +386,7 @@ static const TypeInfo i440fx_info = { @@ -609,10 +592,10 @@ index f27131102d..17f10efae2 100644 } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index ef7b27a66d..ef034a1279 100644 +index 8339c4c..301cd7b 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -408,10 +408,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -403,10 +403,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -625,47 +608,91 @@ index ef7b27a66d..ef034a1279 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), -diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c -index 291e41db8a..1c0f190f1b 100644 ---- a/hw/usb/ccid-card-emulated.c -+++ b/hw/usb/ccid-card-emulated.c -@@ -604,6 +604,8 @@ static void emulated_class_initfn(ObjectClass *klass, void *data) - set_bit(DEVICE_CATEGORY_INPUT, dc->categories); - dc->desc = "emulated smartcard"; - dc->props = emulated_card_properties; -+ /* Disabled for Red Hat Enterprise Linux: */ -+ dc->user_creatable = false; - } - - static const TypeInfo emulated_card_info = { +diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs +index 303ac08..700a918 100644 +--- a/hw/usb/Makefile.objs ++++ b/hw/usb/Makefile.objs +@@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_BLUETOOTH) += dev-bluetooth.o + ifeq ($(CONFIG_USB_SMARTCARD),y) + common-obj-y += dev-smartcard-reader.o + common-obj-$(CONFIG_SMARTCARD) += smartcard.mo +-smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o ++# Disabled for Red Hat Enterprise Linux: ++# smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o ++smartcard.mo-objs := ccid-card-passthru.o + smartcard.mo-cflags := $(SMARTCARD_CFLAGS) + smartcard.mo-libs := $(SMARTCARD_LIBS) + endif diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 136f3a9ad6..d761fcaf75 100644 +index 136f3a9..4505ffe 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c -@@ -1391,6 +1391,8 @@ static void vfio_pci_igd_lpc_bridge_class_init(ObjectClass *klass, void *data) - set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - dc->desc = "VFIO dummy ISA/LPC bridge for IGD assignment"; - dc->hotpluggable = false; -+ /* Disabled in Red Hat Enterprise Linux */ -+ dc->user_creatable = false; - k->realize = vfio_pci_igd_lpc_bridge_realize; - k->class_id = PCI_CLASS_BRIDGE_ISA; +@@ -1166,6 +1166,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) + trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); } -@@ -1584,6 +1586,9 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) - 0, PCI_DEVFN(0x2, 0))) { - return; - } -+ -+ /* Disabled in Red Hat Enterprise Linux */ -+ return; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * Intel IGD support + * +@@ -1239,6 +1240,7 @@ static int igd_gen(VFIOPCIDevice *vdev) + + return 8; /* Assume newer is compatible */ + } ++#endif + + typedef struct VFIOIGDQuirk { + struct VFIOPCIDevice *vdev; +@@ -1311,6 +1313,7 @@ typedef struct { + uint8_t len; + } IGDHostInfo; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static const IGDHostInfo igd_host_bridge_infos[] = { + {PCI_REVISION_ID, 2}, + {PCI_SUBSYSTEM_VENDOR_ID, 2}, +@@ -1559,9 +1562,11 @@ static const MemoryRegionOps vfio_igd_index_quirk = { + .write = vfio_igd_quirk_index_write, + .endianness = DEVICE_LITTLE_ENDIAN, + }; ++#endif + + static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + struct vfio_region_info *rom = NULL, *opregion = NULL, + *host = NULL, *lpc = NULL; + VFIOQuirk *quirk; +@@ -1572,6 +1577,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + uint32_t gmch; + uint16_t cmd_orig, cmd; + Error *err = NULL; ++#endif + /* + * This must be an Intel VGA device at address 00:02.0 for us to even +@@ -1585,6 +1591,8 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) + return; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ ++ /* * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we + * can stuff host values into, so if there's already one there and it's not +@@ -1809,6 +1817,7 @@ out: + g_free(opregion); + g_free(host); + g_free(lpc); ++#endif + } + + /* diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index e6569a7968..5cff163334 100644 +index 2d40b39..c8534d3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -3200,6 +3200,7 @@ static const TypeInfo vfio_pci_dev_info = { +@@ -3220,6 +3220,7 @@ static const TypeInfo vfio_pci_dev_info = { }, }; @@ -673,7 +700,7 @@ index e6569a7968..5cff163334 100644 static Property vfio_pci_dev_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), DEFINE_PROP_END_OF_LIST(), -@@ -3219,11 +3220,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { +@@ -3239,11 +3240,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_nohotplug_dev_class_init, }; @@ -690,7 +717,7 @@ index e6569a7968..5cff163334 100644 type_init(register_vfio_pci_dev_type) diff --git a/qemu-options.hx b/qemu-options.hx -index 65c9473b73..fc17aca631 100644 +index 65c9473..fc17aca 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2111,11 +2111,6 @@ ETEXI @@ -722,7 +749,7 @@ index 65c9473b73..fc17aca631 100644 #ifdef CONFIG_TPM DEFHEADING(TPM device options:) diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 7a4ac9339b..3788fc3c4a 100644 +index 7a4ac93..3788fc3 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2744,7 +2744,9 @@ static void arm_cpu_register_types(void) @@ -737,10 +764,10 @@ index 7a4ac9339b..3788fc3c4a 100644 } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index a624163ac2..ba5e9faeae 100644 +index 69f518a..1b7880a 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1807,14 +1807,14 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1835,14 +1835,14 @@ static X86CPUDefinition builtin_x86_defs[] = { .family = 6, .model = 6, .stepping = 3, @@ -763,11 +790,10 @@ index a624163ac2..ba5e9faeae 100644 .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, -@@ -2043,6 +2043,25 @@ static X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x80000008, +@@ -2129,6 +2129,25 @@ static X86CPUDefinition builtin_x86_defs[] = { .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", }, -+ { + { + .name = "cpu64-rhel6", + .level = 4, + .vendor = CPUID_VENDOR_AMD, @@ -786,11 +812,12 @@ index a624163ac2..ba5e9faeae 100644 + .xlevel = 0x8000000A, + .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", + }, - { ++ { .name = "Conroe", .level = 10, + .vendor = CPUID_VENDOR_INTEL, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 086548e9b9..1bbf378c18 100644 +index 086548e..1bbf378 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -860,7 +887,7 @@ index 086548e9b9..1bbf378c18 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 7e92fb2e15..be718220d7 100644 +index 7e92fb2..be71822 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -874,7 +901,7 @@ index 7e92fb2e15..be718220d7 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 0c9d14b4b1..a02d569537 100644 +index 0c9d14b..a02d569 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2387,6 +2387,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) @@ -893,7 +920,7 @@ index 0c9d14b4b1..a02d569537 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/util/memfd.c b/util/memfd.c -index 4a3c07e0be..3303ec9da4 100644 +index 4a3c07e..3303ec9 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) @@ -906,7 +933,7 @@ index 4a3c07e0be..3303ec9da4 100644 if (mfd >= 0) { diff --git a/vl.c b/vl.c -index 6a65a64bfd..668a34577e 100644 +index 6a65a64..668a345 100644 --- a/vl.c +++ b/vl.c @@ -166,7 +166,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; @@ -963,5 +990,5 @@ index 6a65a64bfd..668a34577e 100644 if (!xen_enabled()) { /* On 32-bit hosts, QEMU is limited by virtual address space */ -- -2.21.0 +1.8.3.1 diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index d7fed30..acea7d3 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 701a0ad0e6220c5cf9d860e3689f79f8154274bd Mon Sep 17 00:00:00 2001 +From c47eea7192e103a6cc57cd7d07421b8e684d3db5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -34,28 +34,28 @@ Merged patches (weekly-190823): Signed-off-by: Danilo C. L. de Paula --- - hw/acpi/ich9.c | 16 ++++ + hw/acpi/ich9.c | 16 +++++ hw/acpi/piix4.c | 5 +- - hw/char/serial.c | 16 ++++ - hw/core/machine.c | 161 ++++++++++++++++++++++++++++++++++++++++ + hw/char/serial.c | 16 +++++ + hw/core/machine.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- - hw/net/e1000e.c | 21 ++++++ + hw/net/e1000e.c | 21 +++++++ hw/net/rtl8139.c | 4 +- hw/rtc/mc146818rtc.c | 6 ++ hw/smbios/smbios.c | 1 + hw/timer/i8254_common.c | 2 +- hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 +++++ + hw/usb/hcd-xhci.c | 20 ++++++ hw/usb/hcd-xhci.h | 2 + include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 21 ++++++ - include/hw/usb.h | 4 + + include/hw/boards.h | 21 +++++++ + include/hw/usb.h | 4 ++ migration/migration.c | 2 + migration/migration.h | 5 ++ 18 files changed, 289 insertions(+), 6 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index ab203ad448..7ec26884e8 100644 +index ab203ad..7ec2688 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -444,6 +444,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -89,7 +89,7 @@ index ab203ad448..7ec26884e8 100644 ich9_pm_get_disable_s3, ich9_pm_set_disable_s3, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 93aec2dd2c..3a26193cbe 100644 +index 93aec2d..3a26193 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -274,6 +274,7 @@ static const VMStateDescription vmstate_acpi = { @@ -112,7 +112,7 @@ index 93aec2dd2c..3a26193cbe 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), diff --git a/hw/char/serial.c b/hw/char/serial.c -index b4aa250950..0012f0e44d 100644 +index b4aa250..0012f0e 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -34,6 +34,7 @@ @@ -167,7 +167,7 @@ index b4aa250950..0012f0e44d 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 1689ad3bf8..2b130bb585 100644 +index 1689ad3..2b130bb 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -27,6 +27,167 @@ @@ -339,7 +339,7 @@ index 1689ad3bf8..2b130bb585 100644 { "virtio-pci", "x-pcie-flr-init", "off" }, }; diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 873e5e9706..d1a2efe47e 100644 +index 873e5e9..d1a2efe 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) @@ -352,7 +352,7 @@ index 873e5e9706..d1a2efe47e 100644 }; diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index b69fd7d8ad..d8be50a1ce 100644 +index b69fd7d..d8be50a 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -79,6 +79,11 @@ typedef struct E1000EState { @@ -426,7 +426,7 @@ index b69fd7d8ad..d8be50a1ce 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 88a97d756d..21d80e96cf 100644 +index 88a97d7..21d80e9 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3177,7 +3177,7 @@ static int rtl8139_pre_save(void *opaque) @@ -449,7 +449,7 @@ index 88a97d756d..21d80e96cf 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index ee6bf82b40..d704213824 100644 +index 74ae74b..7382051 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -42,6 +42,7 @@ @@ -460,7 +460,7 @@ index ee6bf82b40..d704213824 100644 #ifdef TARGET_I386 #include "qapi/qapi-commands-misc-target.h" -@@ -819,6 +820,11 @@ static int rtc_post_load(void *opaque, int version_id) +@@ -820,6 +821,11 @@ static int rtc_post_load(void *opaque, int version_id) static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) { RTCState *s = (RTCState *)opaque; @@ -473,7 +473,7 @@ index ee6bf82b40..d704213824 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 11d476c4a2..e6e9355384 100644 +index 11d476c..e6e9355 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -777,6 +777,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, @@ -485,7 +485,7 @@ index 11d476c4a2..e6e9355384 100644 SMBIOS_SET_DEFAULT(type2.product, product); SMBIOS_SET_DEFAULT(type2.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b497..32935da46c 100644 +index 050875b..32935da 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { @@ -498,7 +498,7 @@ index 050875b497..32935da46c 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 23507ad3b5..9fd87a7ad9 100644 +index 23507ad..9fd87a7 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) @@ -518,7 +518,7 @@ index 23507ad3b5..9fd87a7ad9 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 80988bb305..8fed2eedd6 100644 +index 80988bb..8fed2ee 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3590,9 +3590,27 @@ static const VMStateDescription vmstate_xhci_slot = { @@ -559,7 +559,7 @@ index 80988bb305..8fed2eedd6 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 2fad4df2a7..f554b671e3 100644 +index 2fad4df..f554b67 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -157,6 +157,8 @@ typedef struct XHCIEvent { @@ -572,7 +572,7 @@ index 2fad4df2a7..f554b671e3 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 41568d1837..1a23ccc412 100644 +index 41568d1..1a23ccc 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -586,7 +586,7 @@ index 41568d1837..1a23ccc412 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index de45087f34..cc11116585 100644 +index de45087..cc11116 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -377,4 +377,25 @@ extern const size_t hw_compat_2_2_len; @@ -616,7 +616,7 @@ index de45087f34..cc11116585 100644 + #endif diff --git a/include/hw/usb.h b/include/hw/usb.h -index c24d968a19..b353438ea0 100644 +index c24d968..b353438 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -605,4 +605,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, @@ -629,7 +629,7 @@ index c24d968a19..b353438ea0 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 354ad072fa..30c53c623b 100644 +index 354ad07..30c53c6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -121,6 +121,8 @@ enum mig_rp_message_type { @@ -642,7 +642,7 @@ index 354ad072fa..30c53c623b 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 79b3dda146..0b1b0d4df5 100644 +index 79b3dda..0b1b0d4 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -335,6 +335,11 @@ void init_dirty_bitmap_incoming_migration(void); @@ -658,5 +658,5 @@ index 79b3dda146..0b1b0d4df5 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -2.21.0 +1.8.3.1 diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 26a51d2..5691082 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 5d044a17a88f2e6adc72e2b6579052e2a3e98e97 Mon Sep 17 00:00:00 2001 +From e9fbad8d9e530189a029533f738bac62559e4d52 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -14,9 +14,15 @@ Rebase changes (4.1.0-rc0): - Removed a15memmap (upstream) - Use virt_flash_create in rhel800_virt_instance_init -Rebase changes (weekly-190913): +Rebase changes (4.2.0-rc0): - Set numa_mem_supported +Rebase notes (4.2.0-rc3): +- aarch64: Add virt-rhel8.2.0 machine type for ARM (patch 92246) +- aarch64: virt: Allow more than 1TB of RAM (patch 92249) +- aarch64: virt: Allow PCDIMM instantiation (patch 92247) +- aarch64: virt: Enhance the comment related to gic-version (patch 92248) + Merged patches (4.0.0): - 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM - 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 @@ -28,12 +34,12 @@ Merged patches (4.1.0): Signed-off-by: Danilo C. L. de Paula --- - hw/arm/virt.c | 145 +++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 161 +++++++++++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 11 ++++ - 2 files changed, 155 insertions(+), 1 deletion(-) + 2 files changed, 171 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d4bedc2607..1892378914 100644 +index d4bedc2..e108391 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -72,6 +72,7 @@ @@ -111,15 +117,15 @@ index d4bedc2607..1892378914 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1912,6 +1957,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - return ms->possible_cpus; +@@ -2022,6 +2067,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return requested_pa_size > 40 ? requested_pa_size : 0; } +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) + static void virt_machine_class_init(ObjectClass *oc, void *data) { -@@ -2258,3 +2304,100 @@ static void virt_machine_2_6_options(MachineClass *mc) + MachineClass *mc = MACHINE_CLASS(oc); +@@ -2258,3 +2304,116 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -128,6 +134,7 @@ index d4bedc2607..1892378914 100644 +static void rhel_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; @@ -145,7 +152,14 @@ index d4bedc2607..1892378914 100644 + mc->cpu_index_to_instance_props = virt_cpu_index_to_props; + mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); + mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++ mc->kvm_type = virt_kvm_type; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ hc->pre_plug = virt_machine_device_pre_plug_cb; ++ hc->plug = virt_machine_device_plug_cb; ++ hc->unplug_request = virt_machine_device_unplug_request_cb; + mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memhp = true; +} + +static const TypeInfo rhel_machine_info = { @@ -155,6 +169,10 @@ index d4bedc2607..1892378914 100644 + .instance_size = sizeof(VirtMachineState), + .class_size = sizeof(VirtMachineClass), + .class_init = rhel_machine_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, +}; + +static void rhel_machine_init(void) @@ -163,7 +181,7 @@ index d4bedc2607..1892378914 100644 +} +type_init(rhel_machine_init); + -+static void rhel810_virt_instance_init(Object *obj) ++static void rhel820_virt_instance_init(Object *obj) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); @@ -180,7 +198,11 @@ index d4bedc2607..1892378914 100644 + "Set on/off to enable/disable using " + "physical address space above 32 bits", + NULL); -+ /* Default GIC type is still v2, but became configurable for RHEL */ ++ /* ++ * Default GIC type is still v2, but became configurable for RHEL. We ++ * keep v2 instead of max as TCG CI test cases require an MSI controller ++ * and there is no userspace ITS MSI emulation available. ++ */ + vms->gic_version = 2; + object_property_add_str(obj, "gic-version", virt_get_gic_version, + virt_set_gic_version, NULL); @@ -215,13 +237,13 @@ index d4bedc2607..1892378914 100644 + virt_flash_create(vms); +} + -+static void rhel810_virt_options(MachineClass *mc) ++static void rhel820_virt_options(MachineClass *mc) +{ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); +} -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 1, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 0b41083e9d..53fdf16563 100644 +index 0b41083..53fdf16 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -142,6 +142,7 @@ typedef struct { @@ -250,5 +272,5 @@ index 0b41083e9d..53fdf16563 100644 /* Return the number of used redistributor regions */ -- -2.21.0 +1.8.3.1 diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index a47f115..ba4e0fb 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From dbf2123f930a53e949cbeea7a272e453f3efe124 Mon Sep 17 00:00:00 2001 +From 4a62a0bf849ff8f533d3fc5bd3faec6bd09cf562 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -31,7 +31,7 @@ Merged patches (weekly-190830): Signed-off-by: Danilo C. L. de Paula --- - hw/ppc/spapr.c | 251 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 1 + target/ppc/compat.c | 13 ++- @@ -39,10 +39,10 @@ Signed-off-by: Danilo C. L. de Paula 5 files changed, 278 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 94f9d27096..59ccd182d4 100644 +index e076f60..f3652ed 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4442,6 +4442,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->linux_pci_probe = true; smc->smp_threads_vsmt = true; smc->nr_xirqs = SPAPR_NR_XIRQS; @@ -50,7 +50,7 @@ index 94f9d27096..59ccd182d4 100644 } static const TypeInfo spapr_machine_info = { -@@ -4486,6 +4487,7 @@ static const TypeInfo spapr_machine_info = { +@@ -4491,6 +4492,7 @@ static const TypeInfo spapr_machine_info = { } \ type_init(spapr_machine_register_##suffix) @@ -58,7 +58,7 @@ index 94f9d27096..59ccd182d4 100644 /* * pseries-4.2 */ -@@ -4515,6 +4517,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4520,6 +4522,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -66,7 +66,7 @@ index 94f9d27096..59ccd182d4 100644 /* * pseries-4.0 -@@ -4531,6 +4534,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4536,6 +4539,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -74,7 +74,7 @@ index 94f9d27096..59ccd182d4 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4690,6 +4694,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4695,6 +4699,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ @@ -82,7 +82,7 @@ index 94f9d27096..59ccd182d4 100644 static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4744,6 +4749,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, +@@ -4749,6 +4754,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -90,7 +90,7 @@ index 94f9d27096..59ccd182d4 100644 static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4858,6 +4864,251 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4863,6 +4869,251 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -343,7 +343,7 @@ index 94f9d27096..59ccd182d4 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index ef034a1279..05f0a83128 100644 +index 301cd7b..ba5a8fb 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -354,7 +354,7 @@ index ef034a1279..05f0a83128 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -247,6 +248,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -242,6 +243,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); Error *local_err = NULL; @@ -362,7 +362,7 @@ index ef034a1279..05f0a83128 100644 object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { -@@ -259,6 +261,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -254,6 +256,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -381,7 +381,7 @@ index ef034a1279..05f0a83128 100644 goto error_intc_create; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index d5ab5ea7b2..aa89cc4a95 100644 +index d5ab5ea..aa89cc4 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -125,6 +125,7 @@ struct SpaprMachineClass { @@ -393,7 +393,7 @@ index d5ab5ea7b2..aa89cc4a95 100644 uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7de4bf3122..3e2e35342d 100644 +index 7de4bf3..3e2e353 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -418,7 +418,7 @@ index 7de4bf3122..3e2e35342d 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index e3e82327b7..5c53801cfd 100644 +index e3e8232..5c53801 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1367,6 +1367,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) @@ -430,5 +430,5 @@ index e3e82327b7..5c53801cfd 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -- -2.21.0 +1.8.3.1 diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index d6a8633..9fb964a 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 18847aab397e7480af49e3a5cd4f6e3b7deae361 Mon Sep 17 00:00:00 2001 +From 88abdedad4c594c86eb2b92d490b676fa7494d6c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -22,11 +22,11 @@ Merged patches (weekly-190830): Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 80 +++++++++++++++++++++++++++++++++++++- + hw/s390x/s390-virtio-ccw.c | 80 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index d3edeef0ad..7bca634666 100644 +index d3edeef..7bca634 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -615,7 +615,7 @@ bool css_migration_enabled(void) @@ -131,5 +131,5 @@ index d3edeef0ad..7bca634666 100644 static void ccw_machine_register_types(void) { -- -2.21.0 +1.8.3.1 diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index a39e26b..d7af8fd 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 3f0ddfe3f8bc734af3f68884c01c58800ef42d0d Mon Sep 17 00:00:00 2001 +From aef82bbd0ad99c43bc0b97932cf87fd16303bf5e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -16,6 +16,9 @@ Rebase notes (3.1.0): Rebase notes (4.1.0): - Updated format for compat structures +Rebase notes (4.2.0-rc2): +- Use X86MachineClass for save_tsc_khz (upstream change) + Merged patches (4.1.0): - f4dc802 pc: 7.5 compat entries - 456ed3e pc: PC_RHEL7_6_COMPAT @@ -36,17 +39,17 @@ Merged patches (weekly-190823): Signed-off-by: Danilo C. L. de Paula --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 259 ++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 207 +++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 139 ++++++++++++++++++++++- + hw/i386/pc.c | 259 ++++++++++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 208 ++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 139 ++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 30 +++++ + include/hw/i386/pc.h | 30 ++++++ target/i386/cpu.c | 9 +- target/i386/kvm.c | 4 + - 8 files changed, 646 insertions(+), 7 deletions(-) + 8 files changed, 647 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 12ff55fcfb..64001893ab 100644 +index 12ff55f..6400189 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -204,6 +204,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) @@ -60,7 +63,7 @@ index 12ff55fcfb..64001893ab 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 96715f8a3f..677b63a37f 100644 +index ac08e63..28850ae 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -344,6 +344,257 @@ GlobalProperty pc_compat_1_4[] = { @@ -331,7 +334,7 @@ index 96715f8a3f..677b63a37f 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -2199,6 +2451,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -2198,6 +2450,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); @@ -340,7 +343,7 @@ index 96715f8a3f..677b63a37f 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -2210,7 +2464,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -2209,7 +2463,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -351,7 +354,7 @@ index 96715f8a3f..677b63a37f 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index a19f8058ab..a66005a0ec 100644 +index 1bd70d1..7f2ee97 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -53,6 +53,7 @@ @@ -362,7 +365,7 @@ index a19f8058ab..a66005a0ec 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -176,8 +177,8 @@ static void pc_init1(MachineState *machine, +@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -373,7 +376,7 @@ index a19f8058ab..a66005a0ec 100644 pcmc->smbios_uuid_encoded, SMBIOS_ENTRY_POINT_21); } -@@ -309,6 +310,7 @@ else { +@@ -307,6 +308,7 @@ else { * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -381,7 +384,7 @@ index a19f8058ab..a66005a0ec 100644 static void pc_compat_2_3_fn(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); -@@ -1028,3 +1030,204 @@ static void xenfv_machine_options(MachineClass *m) +@@ -1026,3 +1028,205 @@ static void xenfv_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, xenfv_machine_options); #endif @@ -497,10 +500,11 @@ index a19f8058ab..a66005a0ec 100644 +static void pc_machine_rhel720_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); + pc_machine_rhel730_options(m); + m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; + /* From pc_i440fx_2_5_machine_options */ -+ pcmc->save_tsc_khz = false; ++ x86mc->save_tsc_khz = false; + m->legacy_fw_cfg_order = 1; + /* Note: broken_reserved_end was already in 7.2 */ + /* From pc_i440fx_2_6_machine_options */ @@ -587,7 +591,7 @@ index a19f8058ab..a66005a0ec 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index d51f524727..542947b032 100644 +index 385e5cf..4598db2 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) @@ -748,7 +752,7 @@ index d51f524727..542947b032 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index cc11116585..3cc126f0f4 100644 +index cc11116..3cc126f 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -222,6 +222,8 @@ struct MachineClass { @@ -761,10 +765,10 @@ index cc11116585..3cc126f0f4 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index e6fa8418ca..379ed968b3 100644 +index 1f86eba..dd680ae 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -126,6 +126,9 @@ typedef struct PCMachineClass { +@@ -124,6 +124,9 @@ typedef struct PCMachineClass { /* use PVH to load kernels that support this feature */ bool pvh_enabled; @@ -774,7 +778,7 @@ index e6fa8418ca..379ed968b3 100644 } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -302,6 +305,33 @@ extern const size_t pc_compat_1_5_len; +@@ -300,6 +303,33 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; @@ -809,10 +813,10 @@ index e6fa8418ca..379ed968b3 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index ba5e9faeae..a4ae730ca5 100644 +index 1b7880a..790db77 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1801,11 +1801,17 @@ static CPUCaches epyc_cache_info = { +@@ -1829,11 +1829,17 @@ static CPUCaches epyc_cache_info = { static X86CPUDefinition builtin_x86_defs[] = { { @@ -831,7 +835,7 @@ index ba5e9faeae..a4ae730ca5 100644 .stepping = 3, .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -3173,6 +3179,7 @@ static PropValue kvm_default_props[] = { +@@ -3932,6 +3938,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -840,10 +844,10 @@ index ba5e9faeae..a4ae730ca5 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index bfd09bd441..da312a4801 100644 +index bf16556..1b19797 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -3064,6 +3064,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3071,6 +3071,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -851,7 +855,7 @@ index bfd09bd441..da312a4801 100644 kvm_msr_buf_reset(cpu); -@@ -3370,6 +3371,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3380,6 +3381,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -862,5 +866,5 @@ index bfd09bd441..da312a4801 100644 case MSR_KVM_PV_EOI_EN: env->pv_eoi_en_msr = msrs[i].data; -- -2.21.0 +1.8.3.1 diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index 5eed2b1..c56a715 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From e8c1a5f4768a41cabdcb54cfdbc1a5a4146ff1ad Mon Sep 17 00:00:00 2001 +From 88d09fe5c46c80214f883bd097ca86a99ca1ca41 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -44,7 +44,7 @@ Signed-off-by: Danilo C. L. de Paula 11 files changed, 37 insertions(+), 17 deletions(-) diff --git a/tests/Makefile.include b/tests/Makefile.include -index b483790cf3..53bdbdfee0 100644 +index b483790..53bdbdf 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -172,7 +172,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) @@ -86,7 +86,7 @@ index b483790cf3..53bdbdfee0 100644 check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) check-qtest-s390x-y += tests/migration-test$(EXESUF) diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index d3a54a0ba5..33ce72b89c 100644 +index d3a54a0..33ce72b 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -108,19 +108,23 @@ static testdef_t tests[] = { @@ -115,7 +115,7 @@ index d3a54a0ba5..33ce72b89c 100644 { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 30e514bbfb..a04beae1c6 100644 +index 30e514b..a04beae 100644 --- a/tests/cpu-plug-test.c +++ b/tests/cpu-plug-test.c @@ -185,8 +185,8 @@ static void add_pseries_test_case(const char *mname) @@ -130,7 +130,7 @@ index 30e514bbfb..a04beae1c6 100644 } data = g_new(PlugTestData, 1); diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index c387984ef6..c89112d6f8 100644 +index c387984..c89112d 100644 --- a/tests/e1000-test.c +++ b/tests/e1000-test.c @@ -22,9 +22,11 @@ struct QE1000 { @@ -146,7 +146,7 @@ index c387984ef6..c89112d6f8 100644 static void *e1000_get_driver(void *obj, const char *interface) diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c -index 7e86c5416c..cc068bad87 100644 +index 7e86c54..cc068ba 100644 --- a/tests/hd-geo-test.c +++ b/tests/hd-geo-test.c @@ -732,6 +732,7 @@ static void test_override_ide(void) @@ -178,7 +178,7 @@ index 7e86c5416c..cc068bad87 100644 qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); qtest_add_func("hd-geo/override/scsi_hot_unplug", diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 61bc1d1e7b..028d45c7d7 100644 +index 61bc1d1..028d45c 100644 --- a/tests/prom-env-test.c +++ b/tests/prom-env-test.c @@ -88,10 +88,14 @@ int main(int argc, char *argv[]) @@ -197,7 +197,7 @@ index 61bc1d1e7b..028d45c7d7 100644 add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 53bcdbc911..b387e0c233 100755 +index 53bcdbc..b387e0c 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -181,11 +181,11 @@ run_qemu -drive if=virtio @@ -231,7 +231,7 @@ index 53bcdbc911..b387e0c233 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 065040398d..959fb52824 100644 +index 6b10a6a..06cc734 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ @@ -253,7 +253,7 @@ index 065040398d..959fb52824 100644 101 rw quick 102 rw quick diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index 772287bdb4..e7c075ed98 100644 +index 772287b..e7c075e 100644 --- a/tests/test-x86-cpuid-compat.c +++ b/tests/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -273,7 +273,7 @@ index 772287bdb4..e7c075ed98 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 10ef9d2a91..3855873050 100644 +index 10ef9d2..3855873 100644 --- a/tests/usb-hcd-xhci-test.c +++ b/tests/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) @@ -303,5 +303,5 @@ index 10ef9d2a91..3855873050 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -2.21.0 +1.8.3.1 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index b9fbacc..ed8007a 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 5ff7edc72cce5a04a816fd71b0198f2d530d6630 Mon Sep 17 00:00:00 2001 +From 58e199b5cb9364f46b68d8cf618dff7564d427bb Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -42,7 +42,7 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 5cff163334..5184011217 100644 +index c8534d3..309535f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -47,6 +47,9 @@ @@ -55,7 +55,7 @@ index 5cff163334..5184011217 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2710,9 +2713,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2722,9 +2725,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -87,7 +87,7 @@ index 5cff163334..5184011217 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3147,6 +3171,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3167,6 +3191,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -98,7 +98,7 @@ index 5cff163334..5184011217 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index b329d50338..b37c81d882 100644 +index 35626cd..0cd4803 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { @@ -110,5 +110,5 @@ index b329d50338..b37c81d882 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.21.0 +1.8.3.1 diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index 3eb53c6..d52f32e 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 339380067c7f385a9a6c368204e24193b630e78b Mon Sep 17 00:00:00 2001 +From 73edca2e4885b3f2b65edb08b6cdb53d68494c13 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -24,7 +24,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/vl.c b/vl.c -index 668a34577e..9f3e7e7733 100644 +index 668a345..9f3e7e7 100644 --- a/vl.c +++ b/vl.c @@ -1822,9 +1822,17 @@ static void version(void) @@ -54,5 +54,5 @@ index 668a34577e..9f3e7e7733 100644 } -- -2.21.0 +1.8.3.1 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index 88320d0..936b929 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 25283fab966a911cbeb4d0d3524cb1e0f1f8f448 Mon Sep 17 00:00:00 2001 +From 9647d94ec17c7fa645336a881147a7e841cb78c2 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -82,10 +82,10 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 140b0bd8f6..7a4399c3ef 100644 +index ca00daa..dc3ed7f 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -1925,6 +1925,18 @@ static int kvm_init(MachineState *ms) +@@ -1943,6 +1943,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -105,7 +105,7 @@ index 140b0bd8f6..7a4399c3ef 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/vl.c b/vl.c -index 9f3e7e7733..1550aa2aaa 100644 +index 9f3e7e7..1550aa2 100644 --- a/vl.c +++ b/vl.c @@ -134,6 +134,8 @@ int main(int argc, char **argv) @@ -148,5 +148,5 @@ index 9f3e7e7733..1550aa2aaa 100644 configure_rtc(qemu_find_opts_singleton("rtc")); -- -2.21.0 +1.8.3.1 diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch index 11a7c10..fda1b79 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 850e66fc482eb897babd9e1f3d1216fd0f7c6382 Mon Sep 17 00:00:00 2001 +From d78f2713725382c792154ce482a1b03b749b909f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -26,8 +26,8 @@ Signed-off-by: Danilo C. L. de Paula --- .gitignore | 2 ++ Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 26 ++++++++++++++- + README.systemtap | 43 +++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 26 +++++++++++++++++++- scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ scripts/systemtap/script.d/qemu_kvm.stp | 1 + 6 files changed, 79 insertions(+), 1 deletion(-) @@ -36,7 +36,7 @@ Signed-off-by: Danilo C. L. de Paula create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 086727dbb9..4254950f7f 100644 +index 086727d..4254950 100644 --- a/Makefile +++ b/Makefile @@ -939,6 +939,10 @@ endif @@ -52,7 +52,7 @@ index 086727dbb9..4254950f7f 100644 ctags: diff --git a/README.systemtap b/README.systemtap new file mode 100644 -index 0000000000..ad913fc990 +index 0000000..ad913fc --- /dev/null +++ b/README.systemtap @@ -0,0 +1,43 @@ @@ -101,7 +101,7 @@ index 0000000000..ad913fc990 + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf new file mode 100644 -index 0000000000..372d8160a4 +index 0000000..372d816 --- /dev/null +++ b/scripts/systemtap/conf.d/qemu_kvm.conf @@ -0,0 +1,4 @@ @@ -111,11 +111,11 @@ index 0000000000..372d8160a4 +qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp new file mode 100644 -index 0000000000..c04abf9449 +index 0000000..c04abf9 --- /dev/null +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -- -2.21.0 +1.8.3.1 diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 7d0d15c..707515d 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From dce5c0db33a1f7420254944c78962ca1887d3c08 Mon Sep 17 00:00:00 2001 +From 4dcf2dac71a39d2e5b44cf6a4a43abdd89a11c60 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 30 Nov 2018 09:11:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -25,7 +25,7 @@ Signed-off-by: Danilo C. L. de Paula 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi -index 2c7ea49c32..5d0afb3dee 100644 +index 2c7ea49..5d0afb3 100644 --- a/docs/qemu-block-drivers.texi +++ b/docs/qemu-block-drivers.texi @@ -2,7 +2,7 @@ @@ -38,7 +38,7 @@ index 2c7ea49c32..5d0afb3dee 100644 @c man begin DESCRIPTION diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -index f88a1def0d..c82cf8fab7 100644 +index f88a1de..c82cf8f 100644 --- a/docs/qemu-cpu-models.texi +++ b/docs/qemu-cpu-models.texi @@ -2,7 +2,7 @@ @@ -51,7 +51,7 @@ index f88a1def0d..c82cf8fab7 100644 @c man begin DESCRIPTION diff --git a/qemu-doc.texi b/qemu-doc.texi -index 3c5022050f..f770e86a90 100644 +index 3ddf5c0..d460f8d 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -11,8 +11,8 @@ @@ -65,7 +65,7 @@ index 3c5022050f..f770e86a90 100644 @ifinfo @direntry -@@ -1826,7 +1826,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. +@@ -1827,7 +1827,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. Set OpenBIOS variables in NVRAM, for example: @example @@ -75,7 +75,7 @@ index 3c5022050f..f770e86a90 100644 -prom-env 'boot-args=conf=hd:2,\yaboot.conf' @end example diff --git a/qemu-options.hx b/qemu-options.hx -index fc17aca631..df1d27b6f2 100644 +index fc17aca..df1d27b 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2737,11 +2737,11 @@ be created for multiqueue vhost-user. @@ -114,5 +114,5 @@ index fc17aca631..df1d27b6f2 100644 ETEXI -- -2.21.0 +1.8.3.1 diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch index 1c36a55..4a49700 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From 9fe2902d4c8f5cd5ad72af06c6bc54813e642e27 Mon Sep 17 00:00:00 2001 +From eba5ef4e161aeb71df26b1c43577945ae3093f2b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -62,7 +62,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 8fed2eedd6..d2b9744030 100644 +index 8fed2ee..d2b9744 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3403,6 +3403,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) @@ -92,5 +92,5 @@ index 8fed2eedd6..d2b9744030 100644 /* TODO check for errors, and should fail when msix=on */ msix_init(dev, xhci->numintrs, -- -2.21.0 +1.8.3.1 diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 2b73bec..ffb678e 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From e6fd66897236f8a3348235447ed32b8e5de109bb Mon Sep 17 00:00:00 2001 +From 033166f31288a5104d4e55a828ce8d62c9ccca1c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,7 +45,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index e8b2b64d09..54108c0056 100644 +index e8b2b64..54108c0 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -808,6 +808,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -65,5 +65,5 @@ index e8b2b64d09..54108c0056 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -2.21.0 +1.8.3.1 diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 27138fe..7940b26 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 8b3c1edc606bea84b5e52369ed8d211889a44b6c Mon Sep 17 00:00:00 2001 +From 6ce9df118313e3b2d21c70994d0b42b33b59d27c Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,7 +32,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 481dfd2a27..805f38533e 100644 +index 481dfd2..805f385 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -351,12 +351,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, @@ -56,5 +56,5 @@ index 481dfd2a27..805f38533e 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -2.21.0 +1.8.3.1 diff --git a/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch index 10fe2ab..5b8a57d 100644 --- a/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +++ b/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch @@ -1,4 +1,4 @@ -From 9c91fc25dd4edd7447a342dd37b2fd8d3e2301f6 Mon Sep 17 00:00:00 2001 +From a0059f1ef0bfd6852f838491120adf1be20857cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 16 Sep 2019 17:07:00 +0100 Subject: Using ip_deq after m_free might read pointers from an allocation @@ -31,7 +31,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -index 8c75d91495..df1c846ade 100644 +index 8c75d91..df1c846 100644 --- a/slirp/src/ip_input.c +++ b/slirp/src/ip_input.c @@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) @@ -57,5 +57,5 @@ index 8c75d91495..df1c846ade 100644 insert: -- -2.21.0 +1.8.3.1 diff --git a/0023-Temporarily-update-VERSION-to-8.2.0.patch b/0023-Temporarily-update-VERSION-to-8.2.0.patch index 1350598..284d497 100644 --- a/0023-Temporarily-update-VERSION-to-8.2.0.patch +++ b/0023-Temporarily-update-VERSION-to-8.2.0.patch @@ -1,4 +1,4 @@ -From 1afa318b8a37aa999221ad4afa01e14a242f7476 Mon Sep 17 00:00:00 2001 +From 2a87b3e90278f47395975a77fc056d2f964f4725 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 19 Nov 2019 08:41:33 -0300 Subject: Temporarily update VERSION to 8.2.0 @@ -8,18 +8,21 @@ rhbz#1773397 This will change when the official release appears. But, to make qemu-kvm --version happy, we need to fix this now. +Conflicts: + VERSION + Signed-off-by: Danilo C. L. de Paula --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION -index cfcbdd75dd..6aba2b245a 100644 +index dcd8753..6aba2b2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ --4.1.91 +-4.1.94 +4.2.0 -- -2.21.0 +1.8.3.1 diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 5a91c0c..abdbd7f 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -1,6 +1,6 @@ %global SLOF_gittagdate 20191022 %global SLOF_gittagcommit 899d9883 -%global rcversion -rc1 +%global rcversion -rc4 %global have_usbredir 1 %global have_spice 1 @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 1%{?dist} +Release: 2%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -77,7 +77,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-4.2.0-rc1.tar.xz +Source0: http://wiki.qemu.org/download/qemu-4.2.0-rc4.tar.xz # KSM control scripts Source4: ksm.service @@ -105,6 +105,7 @@ Source35: udev-kvm-check.c Source36: README.tests +Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch Patch0005: 0005-Initial-redhat-build.patch Patch0006: 0006-Enable-disable-devices-for-RHEL.patch Patch0007: 0007-Machine-type-related-general-changes.patch @@ -409,6 +410,9 @@ the Secure Shell (SSH) protocol. %prep %setup -n qemu-%{version}%{rcversion} +# Remove slirp content in scratchbuilds because it's being applyed as a patch +rm -fr slirp +mkdir slirp %autopatch -p1 %build @@ -1057,16 +1061,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Tue Nov 19 2019 Danilo Cesar Lemes de Paula - 4.2.0-1.el8 + +* Tue Nov 19 2019 Danilo Cesar Lemes de Paula - 4.2.0-2.el8 - 0023-Temporarily-update-VERSION-to-8.2.0.patch [bz#1773397] - Resolves: bz#1773397 - (QEMU emulator version is "4.1.91" for qemu-kvm-4.2.0-0.module+el8.2.0+471) + (QEMU emulator version is "4.1.91" for qemu-kvm-4.2.0-0.module+el8.2.0+471 - Resoves: bz#1773392 ([ppc] Need to rebase SLOF image for qemu-kvm-4.2) -* Fri Nov 15 2019 Danilo Cesar Lemes de Paula - 4.2.0-0.el8 -- Rebase to 4.2 - * Tue Oct 29 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 - kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1751934] - kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1764721] diff --git a/sources b/sources index 310b89f..dc76917 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-4.2.0-rc1.tar.xz) = 8ad5e0472fd384a9ba03b2e8fbb1e887169abb47a50a3f130b1943b39f45677a9e65ca5d1deb96338a5b3c3953db67f50e194a6763e9121c0eb5f620896162a9 +SHA512 (qemu-4.2.0-rc4.tar.xz) = 01d7fd8368b37ce9239b5884654962c947e2c597c0042b256bb9d1ebf2fd5159b1182e43094e966e54368c792b390f48ce12bce7ee878f5f3ab685fc118aa7db From 1261d6ce5b8d51fad6b865ef505ba9ba164757bb Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 13 Dec 2019 13:10:42 +0000 Subject: [PATCH 059/195] Rebase to qemu 4.2 - Rebase to qemu-4.2 - Resolves: bz#1783250 (rebase qemu-kvm to 4.2) --- .gitignore | 7 +- ...at-Adding-slirp-to-the-exploded-tree.patch | 16352 ---------------- 0005-Initial-redhat-build.patch | 26 +- 0006-Enable-disable-devices-for-RHEL.patch | 110 +- ...Machine-type-related-general-changes.patch | 75 +- 0008-Add-aarch64-machine-types.patch | 12 +- 0009-Add-ppc64-machine-types.patch | 61 +- 0010-Add-s390x-machine-types.patch | 57 +- 0011-Add-x86_64-machine-types.patch | 79 +- 0012-Enable-make-check.patch | 24 +- ...mber-of-devices-that-can-be-assigned.patch | 8 +- ...Add-support-statement-to-help-output.patch | 6 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 8 +- 0016-Add-support-for-simpletrace.patch | 16 +- ...documentation-instead-of-qemu-system.patch | 12 +- 0018-usb-xhci-Fix-PCI-capability-order.patch | 6 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 6 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 6 +- ...er-m_free-might-read-pointers-from-a.patch | 6 +- ...-Temporarily-update-VERSION-to-8.2.0.patch | 28 - qemu-kvm.spec | 87 +- sources | 2 +- 22 files changed, 362 insertions(+), 16632 deletions(-) delete mode 100644 0001-redhat-Adding-slirp-to-the-exploded-tree.patch delete mode 100644 0023-Temporarily-update-VERSION-to-8.2.0.patch diff --git a/.gitignore b/.gitignore index 356cc4d..a46bdee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1 @@ -/qemu-3.1.0.tar.xz -/qemu-4.0.0.tar.xz -/qemu-4.1.0-rc4.tar.xz -/qemu-4.1.0.tar.xz -/qemu-4.2.0-rc1.tar.xz -/qemu-4.2.0-rc4.tar.xz +/qemu-4.2.0.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch deleted file mode 100644 index 218f66f..0000000 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ /dev/null @@ -1,16352 +0,0 @@ -From 882cfbbb1d29840876617233781c95e821c203be Mon Sep 17 00:00:00 2001 -From: Danilo de Paula -Date: Sat, 7 Sep 2019 02:07:56 +0100 -Subject: redhat: Adding slirp to the exploded tree - -RH-Author: Danilo de Paula -Message-id: <20190907020756.8619-1-ddepaula@redhat.com> -Patchwork-id: 90309 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] redhat: Adding slirp to the exploded tree -Bugzilla: -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Wainer dos Santos Moschetta - -Until qemu-kvm-3.1 slirp used to live as a regular folder in qemu-kvm. -After that it got moved into its own submodule. Which means it's not -part of the qemu-kvm git tree anymore. - -This passed unoticed for RHEL-AV-8.0.1 and 8.1.0 because qemu still ships -the code in the tarball. That's why scratch builds still works (it's based in -the tarball content). - -As we're receiving some CVE's against slirp, we need a way to patch -slirp in RHEL-8.1.0 without handling as a separate package (as we do for -firmwares). - -The simplest solution is to copy the slirp folder from the tarball into the -exploded tree. - -To be able to do that, I had to make some changes: - -slirp needs to be removed from .gitmodules, otherwise git complains -about files on it. - -Since "make -C redhat rh-brew" uses the tarball and apply all the -patches on top of it, we need to remove the folder from the tarball before applying -the patch (because we are actually re-applying them). - -We also need to use --ignore-submodule while generating the patches for -scratch-build, otherwise it will include some weird definition of the -slirp folder in the patch, something that /usr/bin/patch gets mad with. - -After that I compared the patch list, after and before this change, and -saw no major differences. - -This is an exploded-tree-only change and shouldn't be applied to dist-git. - -Signed-off-by: Danilo C. L. de Paula ---- - .gitmodules | 3 - - slirp/.clang-format | 58 ++ - slirp/.gitignore | 10 + - slirp/.gitlab-ci.yml | 24 + - slirp/COPYRIGHT | 62 ++ - slirp/Makefile | 60 ++ - slirp/README.md | 60 ++ - slirp/meson.build | 127 ++++ - slirp/src/arp_table.c | 91 +++ - slirp/src/bootp.c | 370 ++++++++++ - slirp/src/bootp.h | 129 ++++ - slirp/src/cksum.c | 179 +++++ - slirp/src/debug.h | 51 ++ - slirp/src/dhcpv6.c | 225 ++++++ - slirp/src/dhcpv6.h | 68 ++ - slirp/src/dnssearch.c | 311 ++++++++ - slirp/src/if.c | 213 ++++++ - slirp/src/if.h | 21 + - slirp/src/ip.h | 242 ++++++ - slirp/src/ip6.h | 214 ++++++ - slirp/src/ip6_icmp.c | 434 +++++++++++ - slirp/src/ip6_icmp.h | 219 ++++++ - slirp/src/ip6_input.c | 78 ++ - slirp/src/ip6_output.c | 39 + - slirp/src/ip_icmp.c | 477 ++++++++++++ - slirp/src/ip_icmp.h | 166 +++++ - slirp/src/ip_input.c | 462 ++++++++++++ - slirp/src/ip_output.c | 169 +++++ - slirp/src/libslirp-version.h.in | 23 + - slirp/src/libslirp.h | 119 +++ - slirp/src/libslirp.map | 21 + - slirp/src/main.h | 16 + - slirp/src/mbuf.c | 224 ++++++ - slirp/src/mbuf.h | 127 ++++ - slirp/src/misc.c | 298 ++++++++ - slirp/src/misc.h | 63 ++ - slirp/src/ncsi-pkt.h | 445 +++++++++++ - slirp/src/ncsi.c | 192 +++++ - slirp/src/ndp_table.c | 87 +++ - slirp/src/sbuf.c | 186 +++++ - slirp/src/sbuf.h | 27 + - slirp/src/slirp.c | 1112 ++++++++++++++++++++++++++++ - slirp/src/slirp.h | 273 +++++++ - slirp/src/socket.c | 935 ++++++++++++++++++++++++ - slirp/src/socket.h | 164 +++++ - slirp/src/state.c | 381 ++++++++++ - slirp/src/stream.c | 120 +++ - slirp/src/stream.h | 35 + - slirp/src/tcp.h | 181 +++++ - slirp/src/tcp_input.c | 1540 +++++++++++++++++++++++++++++++++++++++ - slirp/src/tcp_output.c | 516 +++++++++++++ - slirp/src/tcp_subr.c | 975 +++++++++++++++++++++++++ - slirp/src/tcp_timer.c | 286 ++++++++ - slirp/src/tcp_timer.h | 130 ++++ - slirp/src/tcp_var.h | 161 ++++ - slirp/src/tcpip.h | 104 +++ - slirp/src/tftp.c | 462 ++++++++++++ - slirp/src/tftp.h | 52 ++ - slirp/src/udp.c | 354 +++++++++ - slirp/src/udp.h | 90 +++ - slirp/src/udp6.c | 173 +++++ - slirp/src/util.c | 366 ++++++++++ - slirp/src/util.h | 180 +++++ - slirp/src/version.c | 11 + - slirp/src/vmstate.c | 445 +++++++++++ - slirp/src/vmstate.h | 391 ++++++++++ - 66 files changed, 15824 insertions(+), 3 deletions(-) - create mode 100644 slirp/.clang-format - create mode 100644 slirp/.gitignore - create mode 100644 slirp/.gitlab-ci.yml - create mode 100644 slirp/COPYRIGHT - create mode 100644 slirp/Makefile - create mode 100644 slirp/README.md - create mode 100644 slirp/meson.build - create mode 100644 slirp/src/arp_table.c - create mode 100644 slirp/src/bootp.c - create mode 100644 slirp/src/bootp.h - create mode 100644 slirp/src/cksum.c - create mode 100644 slirp/src/debug.h - create mode 100644 slirp/src/dhcpv6.c - create mode 100644 slirp/src/dhcpv6.h - create mode 100644 slirp/src/dnssearch.c - create mode 100644 slirp/src/if.c - create mode 100644 slirp/src/if.h - create mode 100644 slirp/src/ip.h - create mode 100644 slirp/src/ip6.h - create mode 100644 slirp/src/ip6_icmp.c - create mode 100644 slirp/src/ip6_icmp.h - create mode 100644 slirp/src/ip6_input.c - create mode 100644 slirp/src/ip6_output.c - create mode 100644 slirp/src/ip_icmp.c - create mode 100644 slirp/src/ip_icmp.h - create mode 100644 slirp/src/ip_input.c - create mode 100644 slirp/src/ip_output.c - create mode 100644 slirp/src/libslirp-version.h.in - create mode 100644 slirp/src/libslirp.h - create mode 100644 slirp/src/libslirp.map - create mode 100644 slirp/src/main.h - create mode 100644 slirp/src/mbuf.c - create mode 100644 slirp/src/mbuf.h - create mode 100644 slirp/src/misc.c - create mode 100644 slirp/src/misc.h - create mode 100644 slirp/src/ncsi-pkt.h - create mode 100644 slirp/src/ncsi.c - create mode 100644 slirp/src/ndp_table.c - create mode 100644 slirp/src/sbuf.c - create mode 100644 slirp/src/sbuf.h - create mode 100644 slirp/src/slirp.c - create mode 100644 slirp/src/slirp.h - create mode 100644 slirp/src/socket.c - create mode 100644 slirp/src/socket.h - create mode 100644 slirp/src/state.c - create mode 100644 slirp/src/stream.c - create mode 100644 slirp/src/stream.h - create mode 100644 slirp/src/tcp.h - create mode 100644 slirp/src/tcp_input.c - create mode 100644 slirp/src/tcp_output.c - create mode 100644 slirp/src/tcp_subr.c - create mode 100644 slirp/src/tcp_timer.c - create mode 100644 slirp/src/tcp_timer.h - create mode 100644 slirp/src/tcp_var.h - create mode 100644 slirp/src/tcpip.h - create mode 100644 slirp/src/tftp.c - create mode 100644 slirp/src/tftp.h - create mode 100644 slirp/src/udp.c - create mode 100644 slirp/src/udp.h - create mode 100644 slirp/src/udp6.c - create mode 100644 slirp/src/util.c - create mode 100644 slirp/src/util.h - create mode 100644 slirp/src/version.c - create mode 100644 slirp/src/vmstate.c - create mode 100644 slirp/src/vmstate.h - -diff --git a/slirp/.clang-format b/slirp/.clang-format -new file mode 100644 -index 0000000..17fb49f ---- /dev/null -+++ b/slirp/.clang-format -@@ -0,0 +1,58 @@ -+# https://clang.llvm.org/docs/ClangFormat.html -+# https://clang.llvm.org/docs/ClangFormatStyleOptions.html -+--- -+Language: Cpp -+AlignAfterOpenBracket: Align -+AlignConsecutiveAssignments: false # although we like it, it creates churn -+AlignConsecutiveDeclarations: false -+AlignEscapedNewlinesLeft: true -+AlignOperands: true -+AlignTrailingComments: false # churn -+AllowAllParametersOfDeclarationOnNextLine: true -+AllowShortBlocksOnASingleLine: false -+AllowShortCaseLabelsOnASingleLine: false -+AllowShortFunctionsOnASingleLine: None -+AllowShortIfStatementsOnASingleLine: false -+AllowShortLoopsOnASingleLine: false -+AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account -+AlwaysBreakBeforeMultilineStrings: false -+BinPackArguments: true -+BinPackParameters: true -+BraceWrapping: -+ AfterControlStatement: false -+ AfterEnum: false -+ AfterFunction: true -+ AfterStruct: false -+ AfterUnion: false -+ BeforeElse: false -+ IndentBraces: false -+BreakBeforeBinaryOperators: None -+BreakBeforeBraces: Custom -+BreakBeforeTernaryOperators: false -+BreakStringLiterals: true -+ColumnLimit: 80 -+ContinuationIndentWidth: 4 -+Cpp11BracedListStyle: false -+DerivePointerAlignment: false -+DisableFormat: false -+IndentCaseLabels: false -+IndentWidth: 4 -+IndentWrappedFunctionNames: false -+KeepEmptyLinesAtTheStartOfBlocks: false -+MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? -+MacroBlockEnd: '.*_END$' -+MaxEmptyLinesToKeep: 2 -+PointerAlignment: Right -+ReflowComments: true -+SortIncludes: false -+SpaceAfterCStyleCast: false -+SpaceBeforeAssignmentOperators: true -+SpaceBeforeParens: ControlStatements -+SpaceInEmptyParentheses: false -+SpacesBeforeTrailingComments: 1 -+SpacesInContainerLiterals: true -+SpacesInParentheses: false -+SpacesInSquareBrackets: false -+Standard: Auto -+UseTab: Never -+... -diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT -new file mode 100644 -index 0000000..ed49512 ---- /dev/null -+++ b/slirp/COPYRIGHT -@@ -0,0 +1,62 @@ -+Slirp was written by Danny Gasparovski. -+Copyright (c), 1995,1996 All Rights Reserved. -+ -+Slirp is free software; "free" as in you don't have to pay for it, and you -+are free to do whatever you want with it. I do not accept any donations, -+monetary or otherwise, for Slirp. Instead, I would ask you to pass this -+potential donation to your favorite charity. In fact, I encourage -+*everyone* who finds Slirp useful to make a small donation to their -+favorite charity (for example, GreenPeace). This is not a requirement, but -+a suggestion from someone who highly values the service they provide. -+ -+The copyright terms and conditions: -+ -+---BEGIN--- -+ -+ Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ 1. Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ 2. Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ 3. Neither the name of the copyright holder nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, -+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY -+ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -+ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+---END--- -+ -+This basically means you can do anything you want with the software, except -+1) call it your own, and 2) claim warranty on it. There is no warranty for -+this software. None. Nada. If you lose a million dollars while using -+Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. -+ -+If these conditions cannot be met due to legal restrictions (E.g. where it -+is against the law to give out Software without warranty), you must cease -+using the software and delete all copies you have. -+ -+Slirp uses code that is copyrighted by the following people/organizations: -+ -+Juha Pirkola. -+Gregory M. Christy. -+The Regents of the University of California. -+Carnegie Mellon University. -+The Australian National University. -+RSA Data Security, Inc. -+ -+Please read the top of each source file for the details on the various -+copyrights. -diff --git a/slirp/Makefile b/slirp/Makefile -new file mode 100644 -index 0000000..7f09879 ---- /dev/null -+++ b/slirp/Makefile -@@ -0,0 +1,60 @@ -+ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) -+BUILD_DIR ?= . -+ -+LIBSLIRP = $(BUILD_DIR)/libslirp.a -+SLIRP_MAJOR_VERSION = 4 -+SLIRP_MINOR_VERSION = 0 -+SLIRP_MICRO_VERSION = 0 -+ -+all: $(LIBSLIRP) -+ -+SRCS := $(wildcard src/*.c) -+OBJS := $(SRCS:%.c=$(BUILD_DIR)/%.o) -+DEPS := $(OBJS:%.o=%.d) -+ -+INC_DIRS := $(BUILD_DIR)/src -+INC_FLAGS := $(addprefix -I,$(INC_DIRS)) -+ -+override CFLAGS += \ -+ -DG_LOG_DOMAIN='"Slirp"' \ -+ $(shell $(PKG_CONFIG) --cflags glib-2.0) \ -+ $(INC_FLAGS) \ -+ -MMD -MP -+override LDFLAGS += $(shell $(PKG_CONFIG) --libs glib-2.0) -+ -+$(BUILD_DIR)/src/libslirp-version.h: -+ @$(MKDIR_P) $(dir $@) -+ $(call quiet-command,cat $(ROOT_DIR)/src/libslirp-version.h.in | \ -+ sed 's/@SLIRP_MAJOR_VERSION@/$(SLIRP_MAJOR_VERSION)/' | \ -+ sed 's/@SLIRP_MINOR_VERSION@/$(SLIRP_MINOR_VERSION)/' | \ -+ sed 's/@SLIRP_MICRO_VERSION@/$(SLIRP_MICRO_VERSION)/' \ -+ > $@,"GEN","$@") -+ -+$(OBJS): $(BUILD_DIR)/src/libslirp-version.h -+ -+$(LIBSLIRP): $(OBJS) -+ -+.PHONY: clean -+ -+clean: -+ rm -r $(OBJS) $(DEPS) $(LIBSLIRP) $(BUILD_DIR)/src/libslirp-version.h -+ -+$(BUILD_DIR)/src/%.o: $(ROOT_DIR)/src/%.c -+ @$(MKDIR_P) $(dir $@) -+ $(call quiet-command,$(CC) $(CFLAGS) -c -o $@ $<,"CC","$@") -+ -+%.a: -+ $(call quiet-command,rm -f $@ && $(AR) rcs $@ $^,"AR","$@") -+ -+PKG_CONFIG ?= pkg-config -+MKDIR_P ?= mkdir -p -+quiet-command-run = $(if $(V),,$(if $2,printf " %-7s %s\n" $2 $3 && ))$1 -+quiet-@ = $(if $(V),,@) -+quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3) -+ -+print-%: -+ @echo '$*=$($*)' -+ -+.SUFFIXES: -+ -+-include $(DEPS) -diff --git a/slirp/README.md b/slirp/README.md -new file mode 100644 -index 0000000..dc11e5f ---- /dev/null -+++ b/slirp/README.md -@@ -0,0 +1,60 @@ -+# libslirp -+ -+libslirp is a user-mode networking library used by virtual machines, -+containers or various tools. -+ -+## Getting Started -+ -+### Prerequisites -+ -+A C compiler, make/meson and glib2 development libraries. -+ -+(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list -+of dependencies on Fedora) -+ -+### Building -+ -+You may build and install the shared library with meson: -+ -+``` sh -+meson build -+ninja -C build install -+``` -+And configure QEMU with --enable-slirp=system to link against it. -+ -+(QEMU may build with the submodule static library using --enable-slirp=git) -+ -+### Testing -+ -+Unfortunately, there are no automated tests available. -+ -+You may run QEMU ``-net user`` linked with your development version. -+ -+## Contributing -+ -+Feel free to open issues on the [project -+issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page. -+ -+You may clone the [gitlab -+project](https://gitlab.freedesktop.org/slirp/libslirp) and create a -+merge request. -+ -+Contributing with gitlab allows gitlab workflow, tracking issues, -+running CI etc. -+ -+Alternatively, you may send patches to slirp@lists.freedesktop.org -+mailing list. -+ -+## Versioning -+ -+We intend to use [libtool's -+versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html) -+for the shared libraries and use [SemVer](http://semver.org/) for -+project versions. -+ -+For the versions available, see the [tags on this -+repository](https://gitlab.freedesktop.org/slirp/libslirp/releases). -+ -+## License -+ -+See the [COPYRIGHT](COPYRIGHT) file for details. -diff --git a/slirp/meson.build b/slirp/meson.build -new file mode 100644 -index 0000000..94578dc ---- /dev/null -+++ b/slirp/meson.build -@@ -0,0 +1,127 @@ -+project('libslirp', 'c', -+ version : '4.0.0', -+ license : 'BSD-3-Clause', -+ default_options : ['warning_level=1', 'c_std=gnu99'] -+) -+ -+version = meson.project_version() -+varr = version.split('.') -+major_version = varr[0] -+minor_version = varr[1] -+micro_version = varr[2] -+ -+conf = configuration_data() -+conf.set('SLIRP_MAJOR_VERSION', major_version) -+conf.set('SLIRP_MINOR_VERSION', minor_version) -+conf.set('SLIRP_MICRO_VERSION', micro_version) -+ -+# libtool versioning - this applies to libslirp -+# -+# See http://sources.redhat.com/autobook/autobook/autobook_91.html#SEC91 for details -+# -+# - If interfaces have been changed or added, but binary compatibility -+# has been preserved, change: -+# CURRENT += 1 -+# REVISION = 0 -+# AGE += 1 -+# - If binary compatibility has been broken (eg removed or changed -+# interfaces), change: -+# CURRENT += 1 -+# REVISION = 0 -+# AGE = 0 -+# - If the interface is the same as the previous version, but bugs are -+# fixed, change: -+# REVISION += 1 -+lt_current = '0' -+lt_revision = '0' -+lt_age = '0' -+lt_version = '@0@.@1@.@2@'.format(lt_current, lt_age, lt_revision) -+ -+host_system = host_machine.system() -+ -+glib_dep = dependency('glib-2.0') -+ -+cc = meson.get_compiler('c') -+ -+platform_deps = [] -+ -+if host_system == 'windows' -+ platform_deps += [ -+ cc.find_library('ws2_32'), -+ cc.find_library('iphlpapi') -+ ] -+endif -+ -+cargs = [ -+ '-DG_LOG_DOMAIN="Slirp"', -+] -+ -+sources = [ -+ 'src/arp_table.c', -+ 'src/bootp.c', -+ 'src/cksum.c', -+ 'src/dhcpv6.c', -+ 'src/dnssearch.c', -+ 'src/if.c', -+ 'src/ip6_icmp.c', -+ 'src/ip6_input.c', -+ 'src/ip6_output.c', -+ 'src/ip_icmp.c', -+ 'src/ip_input.c', -+ 'src/ip_output.c', -+ 'src/mbuf.c', -+ 'src/misc.c', -+ 'src/ncsi.c', -+ 'src/ndp_table.c', -+ 'src/sbuf.c', -+ 'src/slirp.c', -+ 'src/socket.c', -+ 'src/state.c', -+ 'src/stream.c', -+ 'src/tcp_input.c', -+ 'src/tcp_output.c', -+ 'src/tcp_subr.c', -+ 'src/tcp_timer.c', -+ 'src/tftp.c', -+ 'src/udp.c', -+ 'src/udp6.c', -+ 'src/util.c', -+ 'src/version.c', -+ 'src/vmstate.c', -+] -+ -+mapfile = 'src/libslirp.map' -+vflag = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) -+ -+configure_file( -+ input : 'src/libslirp-version.h.in', -+ output : 'libslirp-version.h', -+ install_dir : join_paths(get_option('includedir'), 'slirp'), -+ configuration : conf -+) -+ -+lib = shared_library('slirp', sources, -+ soversion : lt_current, -+ version : lt_version, -+ c_args : cargs, -+ link_args : vflag, -+ link_depends : mapfile, -+ dependencies : [glib_dep, platform_deps], -+ install : true -+) -+ -+install_headers(['src/libslirp.h'], subdir : 'slirp') -+ -+pkg = import('pkgconfig') -+ -+pkg.generate( -+ version : version, -+ libraries : lib, -+ requires : [ -+ 'glib-2.0', -+ ], -+ name : 'slirp', -+ description : 'User-space network stack', -+ filebase : 'slirp', -+ subdirs : 'slirp', -+) -diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c -new file mode 100644 -index 0000000..022a52e ---- /dev/null -+++ b/slirp/src/arp_table.c -@@ -0,0 +1,91 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * ARP table -+ * -+ * Copyright (c) 2011 AdaCore -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "slirp.h" -+ -+#include -+ -+void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]) -+{ -+ const uint32_t broadcast_addr = -+ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; -+ ArpTable *arptbl = &slirp->arp_table; -+ int i; -+ -+ DEBUG_CALL("arp_table_add"); -+ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); -+ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], -+ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); -+ -+ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { -+ /* Do not register broadcast addresses */ -+ return; -+ } -+ -+ /* Search for an entry */ -+ for (i = 0; i < ARP_TABLE_SIZE; i++) { -+ if (arptbl->table[i].ar_sip == ip_addr) { -+ /* Update the entry */ -+ memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); -+ return; -+ } -+ } -+ -+ /* No entry found, create a new one */ -+ arptbl->table[arptbl->next_victim].ar_sip = ip_addr; -+ memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); -+ arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; -+} -+ -+bool arp_table_search(Slirp *slirp, uint32_t ip_addr, -+ uint8_t out_ethaddr[ETH_ALEN]) -+{ -+ const uint32_t broadcast_addr = -+ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; -+ ArpTable *arptbl = &slirp->arp_table; -+ int i; -+ -+ DEBUG_CALL("arp_table_search"); -+ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); -+ -+ /* If broadcast address */ -+ if (ip_addr == 0xffffffff || ip_addr == broadcast_addr) { -+ /* return Ethernet broadcast address */ -+ memset(out_ethaddr, 0xff, ETH_ALEN); -+ return 1; -+ } -+ -+ for (i = 0; i < ARP_TABLE_SIZE; i++) { -+ if (arptbl->table[i].ar_sip == ip_addr) { -+ memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); -+ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", -+ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], -+ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); -+ return 1; -+ } -+ } -+ -+ return 0; -+} -diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c -new file mode 100644 -index 0000000..3f9ce25 ---- /dev/null -+++ b/slirp/src/bootp.c -@@ -0,0 +1,370 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * QEMU BOOTP/DHCP server -+ * -+ * Copyright (c) 2004 Fabrice Bellard -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "slirp.h" -+ -+#if defined(_WIN32) -+/* Windows ntohl() returns an u_long value. -+ * Add a type cast to match the format strings. */ -+#define ntohl(n) ((uint32_t)ntohl(n)) -+#endif -+ -+/* XXX: only DHCP is supported */ -+ -+#define LEASE_TIME (24 * 3600) -+ -+static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; -+ -+#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) -+ -+static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, -+ const uint8_t *macaddr) -+{ -+ BOOTPClient *bc; -+ int i; -+ -+ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { -+ bc = &slirp->bootp_clients[i]; -+ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) -+ goto found; -+ } -+ return NULL; -+found: -+ bc = &slirp->bootp_clients[i]; -+ bc->allocated = 1; -+ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); -+ return bc; -+} -+ -+static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, -+ const uint8_t *macaddr) -+{ -+ uint32_t req_addr = ntohl(paddr->s_addr); -+ uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); -+ BOOTPClient *bc; -+ -+ if (req_addr >= dhcp_addr && req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { -+ bc = &slirp->bootp_clients[req_addr - dhcp_addr]; -+ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { -+ bc->allocated = 1; -+ return bc; -+ } -+ } -+ return NULL; -+} -+ -+static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, -+ const uint8_t *macaddr) -+{ -+ BOOTPClient *bc; -+ int i; -+ -+ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { -+ if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) -+ goto found; -+ } -+ return NULL; -+found: -+ bc = &slirp->bootp_clients[i]; -+ bc->allocated = 1; -+ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); -+ return bc; -+} -+ -+static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, -+ struct in_addr *preq_addr) -+{ -+ const uint8_t *p, *p_end; -+ int len, tag; -+ -+ *pmsg_type = 0; -+ preq_addr->s_addr = htonl(0L); -+ -+ p = bp->bp_vend; -+ p_end = p + DHCP_OPT_LEN; -+ if (memcmp(p, rfc1533_cookie, 4) != 0) -+ return; -+ p += 4; -+ while (p < p_end) { -+ tag = p[0]; -+ if (tag == RFC1533_PAD) { -+ p++; -+ } else if (tag == RFC1533_END) { -+ break; -+ } else { -+ p++; -+ if (p >= p_end) -+ break; -+ len = *p++; -+ if (p + len > p_end) { -+ break; -+ } -+ DPRINTF("dhcp: tag=%d len=%d\n", tag, len); -+ -+ switch (tag) { -+ case RFC2132_MSG_TYPE: -+ if (len >= 1) -+ *pmsg_type = p[0]; -+ break; -+ case RFC2132_REQ_ADDR: -+ if (len >= 4) { -+ memcpy(&(preq_addr->s_addr), p, 4); -+ } -+ break; -+ default: -+ break; -+ } -+ p += len; -+ } -+ } -+ if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && -+ bp->bp_ciaddr.s_addr) { -+ memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); -+ } -+} -+ -+static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) -+{ -+ BOOTPClient *bc = NULL; -+ struct mbuf *m; -+ struct bootp_t *rbp; -+ struct sockaddr_in saddr, daddr; -+ struct in_addr preq_addr; -+ int dhcp_msg_type, val; -+ uint8_t *q; -+ uint8_t *end; -+ uint8_t client_ethaddr[ETH_ALEN]; -+ -+ /* extract exact DHCP msg type */ -+ dhcp_decode(bp, &dhcp_msg_type, &preq_addr); -+ DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); -+ if (preq_addr.s_addr != htonl(0L)) -+ DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); -+ else { -+ DPRINTF("\n"); -+ } -+ -+ if (dhcp_msg_type == 0) -+ dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ -+ -+ if (dhcp_msg_type != DHCPDISCOVER && dhcp_msg_type != DHCPREQUEST) -+ return; -+ -+ /* Get client's hardware address from bootp request */ -+ memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); -+ -+ m = m_get(slirp); -+ if (!m) { -+ return; -+ } -+ m->m_data += IF_MAXLINKHDR; -+ rbp = (struct bootp_t *)m->m_data; -+ m->m_data += sizeof(struct udpiphdr); -+ memset(rbp, 0, sizeof(struct bootp_t)); -+ -+ if (dhcp_msg_type == DHCPDISCOVER) { -+ if (preq_addr.s_addr != htonl(0L)) { -+ bc = request_addr(slirp, &preq_addr, client_ethaddr); -+ if (bc) { -+ daddr.sin_addr = preq_addr; -+ } -+ } -+ if (!bc) { -+ new_addr: -+ bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); -+ if (!bc) { -+ DPRINTF("no address left\n"); -+ return; -+ } -+ } -+ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); -+ } else if (preq_addr.s_addr != htonl(0L)) { -+ bc = request_addr(slirp, &preq_addr, client_ethaddr); -+ if (bc) { -+ daddr.sin_addr = preq_addr; -+ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); -+ } else { -+ /* DHCPNAKs should be sent to broadcast */ -+ daddr.sin_addr.s_addr = 0xffffffff; -+ } -+ } else { -+ bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); -+ if (!bc) { -+ /* if never assigned, behaves as if it was already -+ assigned (windows fix because it remembers its address) */ -+ goto new_addr; -+ } -+ } -+ -+ /* Update ARP table for this IP address */ -+ arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); -+ -+ saddr.sin_addr = slirp->vhost_addr; -+ saddr.sin_port = htons(BOOTP_SERVER); -+ -+ daddr.sin_port = htons(BOOTP_CLIENT); -+ -+ rbp->bp_op = BOOTP_REPLY; -+ rbp->bp_xid = bp->bp_xid; -+ rbp->bp_htype = 1; -+ rbp->bp_hlen = 6; -+ memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); -+ -+ rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ -+ rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ -+ -+ q = rbp->bp_vend; -+ end = (uint8_t *)&rbp[1]; -+ memcpy(q, rfc1533_cookie, 4); -+ q += 4; -+ -+ if (bc) { -+ DPRINTF("%s addr=%08" PRIx32 "\n", -+ (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", -+ ntohl(daddr.sin_addr.s_addr)); -+ -+ if (dhcp_msg_type == DHCPDISCOVER) { -+ *q++ = RFC2132_MSG_TYPE; -+ *q++ = 1; -+ *q++ = DHCPOFFER; -+ } else /* DHCPREQUEST */ { -+ *q++ = RFC2132_MSG_TYPE; -+ *q++ = 1; -+ *q++ = DHCPACK; -+ } -+ -+ if (slirp->bootp_filename) -+ snprintf((char *)rbp->bp_file, sizeof(rbp->bp_file), "%s", -+ slirp->bootp_filename); -+ -+ *q++ = RFC2132_SRV_ID; -+ *q++ = 4; -+ memcpy(q, &saddr.sin_addr, 4); -+ q += 4; -+ -+ *q++ = RFC1533_NETMASK; -+ *q++ = 4; -+ memcpy(q, &slirp->vnetwork_mask, 4); -+ q += 4; -+ -+ if (!slirp->restricted) { -+ *q++ = RFC1533_GATEWAY; -+ *q++ = 4; -+ memcpy(q, &saddr.sin_addr, 4); -+ q += 4; -+ -+ *q++ = RFC1533_DNS; -+ *q++ = 4; -+ memcpy(q, &slirp->vnameserver_addr, 4); -+ q += 4; -+ } -+ -+ *q++ = RFC2132_LEASE_TIME; -+ *q++ = 4; -+ val = htonl(LEASE_TIME); -+ memcpy(q, &val, 4); -+ q += 4; -+ -+ if (*slirp->client_hostname) { -+ val = strlen(slirp->client_hostname); -+ if (q + val + 2 >= end) { -+ g_warning("DHCP packet size exceeded, " -+ "omitting host name option."); -+ } else { -+ *q++ = RFC1533_HOSTNAME; -+ *q++ = val; -+ memcpy(q, slirp->client_hostname, val); -+ q += val; -+ } -+ } -+ -+ if (slirp->vdomainname) { -+ val = strlen(slirp->vdomainname); -+ if (q + val + 2 >= end) { -+ g_warning("DHCP packet size exceeded, " -+ "omitting domain name option."); -+ } else { -+ *q++ = RFC1533_DOMAINNAME; -+ *q++ = val; -+ memcpy(q, slirp->vdomainname, val); -+ q += val; -+ } -+ } -+ -+ if (slirp->tftp_server_name) { -+ val = strlen(slirp->tftp_server_name); -+ if (q + val + 2 >= end) { -+ g_warning("DHCP packet size exceeded, " -+ "omitting tftp-server-name option."); -+ } else { -+ *q++ = RFC2132_TFTP_SERVER_NAME; -+ *q++ = val; -+ memcpy(q, slirp->tftp_server_name, val); -+ q += val; -+ } -+ } -+ -+ if (slirp->vdnssearch) { -+ val = slirp->vdnssearch_len; -+ if (q + val >= end) { -+ g_warning("DHCP packet size exceeded, " -+ "omitting domain-search option."); -+ } else { -+ memcpy(q, slirp->vdnssearch, val); -+ q += val; -+ } -+ } -+ } else { -+ static const char nak_msg[] = "requested address not available"; -+ -+ DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); -+ -+ *q++ = RFC2132_MSG_TYPE; -+ *q++ = 1; -+ *q++ = DHCPNAK; -+ -+ *q++ = RFC2132_MESSAGE; -+ *q++ = sizeof(nak_msg) - 1; -+ memcpy(q, nak_msg, sizeof(nak_msg) - 1); -+ q += sizeof(nak_msg) - 1; -+ } -+ assert(q < end); -+ *q = -+RFC1533_END -+; -+ -+daddr.sin_addr.s_addr = 0xffffffffu; -+ -+m->m_len = sizeof(struct bootp_t) - sizeof(struct ip) - sizeof(struct udphdr); -+udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); -+} -+ -+void bootp_input(struct mbuf *m) -+{ -+ struct bootp_t *bp = mtod(m, struct bootp_t *); -+ -+ if (bp->bp_op == BOOTP_REQUEST) { -+ bootp_reply(m->slirp, bp); -+ } -+} -diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h -new file mode 100644 -index 0000000..03ece9b ---- /dev/null -+++ b/slirp/src/bootp.h -@@ -0,0 +1,129 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* bootp/dhcp defines */ -+ -+#ifndef SLIRP_BOOTP_H -+#define SLIRP_BOOTP_H -+ -+#define BOOTP_SERVER 67 -+#define BOOTP_CLIENT 68 -+ -+#define BOOTP_REQUEST 1 -+#define BOOTP_REPLY 2 -+ -+#define RFC1533_COOKIE 99, 130, 83, 99 -+#define RFC1533_PAD 0 -+#define RFC1533_NETMASK 1 -+#define RFC1533_TIMEOFFSET 2 -+#define RFC1533_GATEWAY 3 -+#define RFC1533_TIMESERVER 4 -+#define RFC1533_IEN116NS 5 -+#define RFC1533_DNS 6 -+#define RFC1533_LOGSERVER 7 -+#define RFC1533_COOKIESERVER 8 -+#define RFC1533_LPRSERVER 9 -+#define RFC1533_IMPRESSSERVER 10 -+#define RFC1533_RESOURCESERVER 11 -+#define RFC1533_HOSTNAME 12 -+#define RFC1533_BOOTFILESIZE 13 -+#define RFC1533_MERITDUMPFILE 14 -+#define RFC1533_DOMAINNAME 15 -+#define RFC1533_SWAPSERVER 16 -+#define RFC1533_ROOTPATH 17 -+#define RFC1533_EXTENSIONPATH 18 -+#define RFC1533_IPFORWARDING 19 -+#define RFC1533_IPSOURCEROUTING 20 -+#define RFC1533_IPPOLICYFILTER 21 -+#define RFC1533_IPMAXREASSEMBLY 22 -+#define RFC1533_IPTTL 23 -+#define RFC1533_IPMTU 24 -+#define RFC1533_IPMTUPLATEAU 25 -+#define RFC1533_INTMTU 26 -+#define RFC1533_INTLOCALSUBNETS 27 -+#define RFC1533_INTBROADCAST 28 -+#define RFC1533_INTICMPDISCOVER 29 -+#define RFC1533_INTICMPRESPOND 30 -+#define RFC1533_INTROUTEDISCOVER 31 -+#define RFC1533_INTROUTESOLICIT 32 -+#define RFC1533_INTSTATICROUTES 33 -+#define RFC1533_LLTRAILERENCAP 34 -+#define RFC1533_LLARPCACHETMO 35 -+#define RFC1533_LLETHERNETENCAP 36 -+#define RFC1533_TCPTTL 37 -+#define RFC1533_TCPKEEPALIVETMO 38 -+#define RFC1533_TCPKEEPALIVEGB 39 -+#define RFC1533_NISDOMAIN 40 -+#define RFC1533_NISSERVER 41 -+#define RFC1533_NTPSERVER 42 -+#define RFC1533_VENDOR 43 -+#define RFC1533_NBNS 44 -+#define RFC1533_NBDD 45 -+#define RFC1533_NBNT 46 -+#define RFC1533_NBSCOPE 47 -+#define RFC1533_XFS 48 -+#define RFC1533_XDM 49 -+ -+#define RFC2132_REQ_ADDR 50 -+#define RFC2132_LEASE_TIME 51 -+#define RFC2132_MSG_TYPE 53 -+#define RFC2132_SRV_ID 54 -+#define RFC2132_PARAM_LIST 55 -+#define RFC2132_MESSAGE 56 -+#define RFC2132_MAX_SIZE 57 -+#define RFC2132_RENEWAL_TIME 58 -+#define RFC2132_REBIND_TIME 59 -+#define RFC2132_TFTP_SERVER_NAME 66 -+ -+#define DHCPDISCOVER 1 -+#define DHCPOFFER 2 -+#define DHCPREQUEST 3 -+#define DHCPACK 5 -+#define DHCPNAK 6 -+ -+#define RFC1533_VENDOR_MAJOR 0 -+#define RFC1533_VENDOR_MINOR 0 -+ -+#define RFC1533_VENDOR_MAGIC 128 -+#define RFC1533_VENDOR_ADDPARM 129 -+#define RFC1533_VENDOR_ETHDEV 130 -+#define RFC1533_VENDOR_HOWTO 132 -+#define RFC1533_VENDOR_MNUOPTS 160 -+#define RFC1533_VENDOR_SELECTION 176 -+#define RFC1533_VENDOR_MOTD 184 -+#define RFC1533_VENDOR_NUMOFMOTD 8 -+#define RFC1533_VENDOR_IMG 192 -+#define RFC1533_VENDOR_NUMOFIMG 16 -+ -+#define RFC1533_END 255 -+#define BOOTP_VENDOR_LEN 64 -+#define DHCP_OPT_LEN 312 -+ -+struct bootp_t { -+ struct ip ip; -+ struct udphdr udp; -+ uint8_t bp_op; -+ uint8_t bp_htype; -+ uint8_t bp_hlen; -+ uint8_t bp_hops; -+ uint32_t bp_xid; -+ uint16_t bp_secs; -+ uint16_t unused; -+ struct in_addr bp_ciaddr; -+ struct in_addr bp_yiaddr; -+ struct in_addr bp_siaddr; -+ struct in_addr bp_giaddr; -+ uint8_t bp_hwaddr[16]; -+ uint8_t bp_sname[64]; -+ uint8_t bp_file[128]; -+ uint8_t bp_vend[DHCP_OPT_LEN]; -+}; -+ -+typedef struct { -+ uint16_t allocated; -+ uint8_t macaddr[6]; -+} BOOTPClient; -+ -+#define NB_BOOTP_CLIENTS 16 -+ -+void bootp_input(struct mbuf *m); -+ -+#endif -diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c -new file mode 100644 -index 0000000..4d08380 ---- /dev/null -+++ b/slirp/src/cksum.c -@@ -0,0 +1,179 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1988, 1992, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 -+ * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp -+ */ -+ -+#include "slirp.h" -+ -+/* -+ * Checksum routine for Internet Protocol family headers (Portable Version). -+ * -+ * This routine is very heavily used in the network -+ * code and should be modified for each CPU to be as fast as possible. -+ * -+ * XXX Since we will never span more than 1 mbuf, we can optimise this -+ */ -+ -+#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) -+#define REDUCE \ -+ { \ -+ l_util.l = sum; \ -+ sum = l_util.s[0] + l_util.s[1]; \ -+ (void)ADDCARRY(sum); \ -+ } -+ -+int cksum(struct mbuf *m, int len) -+{ -+ register uint16_t *w; -+ register int sum = 0; -+ register int mlen = 0; -+ int byte_swapped = 0; -+ -+ union { -+ uint8_t c[2]; -+ uint16_t s; -+ } s_util; -+ union { -+ uint16_t s[2]; -+ uint32_t l; -+ } l_util; -+ -+ if (m->m_len == 0) -+ goto cont; -+ w = mtod(m, uint16_t *); -+ -+ mlen = m->m_len; -+ -+ if (len < mlen) -+ mlen = len; -+ len -= mlen; -+ /* -+ * Force to even boundary. -+ */ -+ if ((1 & (uintptr_t)w) && (mlen > 0)) { -+ REDUCE; -+ sum <<= 8; -+ s_util.c[0] = *(uint8_t *)w; -+ w = (uint16_t *)((int8_t *)w + 1); -+ mlen--; -+ byte_swapped = 1; -+ } -+ /* -+ * Unroll the loop to make overhead from -+ * branches &c small. -+ */ -+ while ((mlen -= 32) >= 0) { -+ sum += w[0]; -+ sum += w[1]; -+ sum += w[2]; -+ sum += w[3]; -+ sum += w[4]; -+ sum += w[5]; -+ sum += w[6]; -+ sum += w[7]; -+ sum += w[8]; -+ sum += w[9]; -+ sum += w[10]; -+ sum += w[11]; -+ sum += w[12]; -+ sum += w[13]; -+ sum += w[14]; -+ sum += w[15]; -+ w += 16; -+ } -+ mlen += 32; -+ while ((mlen -= 8) >= 0) { -+ sum += w[0]; -+ sum += w[1]; -+ sum += w[2]; -+ sum += w[3]; -+ w += 4; -+ } -+ mlen += 8; -+ if (mlen == 0 && byte_swapped == 0) -+ goto cont; -+ REDUCE; -+ while ((mlen -= 2) >= 0) { -+ sum += *w++; -+ } -+ -+ if (byte_swapped) { -+ REDUCE; -+ sum <<= 8; -+ if (mlen == -1) { -+ s_util.c[1] = *(uint8_t *)w; -+ sum += s_util.s; -+ mlen = 0; -+ } else -+ -+ mlen = -1; -+ } else if (mlen == -1) -+ s_util.c[0] = *(uint8_t *)w; -+ -+cont: -+ if (len) { -+ DEBUG_ERROR("cksum: out of data"); -+ DEBUG_ERROR(" len = %d", len); -+ } -+ if (mlen == -1) { -+ /* The last mbuf has odd # of bytes. Follow the -+ standard (the odd byte may be shifted left by 8 bits -+ or not as determined by endian-ness of the machine) */ -+ s_util.c[1] = 0; -+ sum += s_util.s; -+ } -+ REDUCE; -+ return (~sum & 0xffff); -+} -+ -+int ip6_cksum(struct mbuf *m) -+{ -+ /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum -+ * separately from the mbuf */ -+ struct ip6 save_ip, *ip = mtod(m, struct ip6 *); -+ struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); -+ int sum; -+ -+ save_ip = *ip; -+ -+ ih->ih_src = save_ip.ip_src; -+ ih->ih_dst = save_ip.ip_dst; -+ ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); -+ ih->ih_zero_hi = 0; -+ ih->ih_zero_lo = 0; -+ ih->ih_nh = save_ip.ip_nh; -+ -+ sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + ntohl(ih->ih_pl)); -+ -+ *ip = save_ip; -+ -+ return sum; -+} -diff --git a/slirp/src/debug.h b/slirp/src/debug.h -new file mode 100644 -index 0000000..47712bd ---- /dev/null -+++ b/slirp/src/debug.h -@@ -0,0 +1,51 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef DEBUG_H_ -+#define DEBUG_H_ -+ -+#define DBG_CALL (1 << 0) -+#define DBG_MISC (1 << 1) -+#define DBG_ERROR (1 << 2) -+#define DBG_TFTP (1 << 3) -+ -+extern int slirp_debug; -+ -+#define DEBUG_CALL(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ -+ g_debug(fmt "...", ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define DEBUG_ARG(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ -+ g_debug(" " fmt, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define DEBUG_MISC(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ -+ g_debug(fmt, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define DEBUG_ERROR(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ -+ g_debug(fmt, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define DEBUG_TFTP(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ -+ g_debug(fmt, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#endif /* DEBUG_H_ */ -diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c -new file mode 100644 -index 0000000..bc041d5 ---- /dev/null -+++ b/slirp/src/dhcpv6.c -@@ -0,0 +1,225 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * SLIRP stateless DHCPv6 -+ * -+ * We only support stateless DHCPv6, e.g. for network booting. -+ * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. -+ * -+ * Copyright 2016 Thomas Huth, Red Hat Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include "slirp.h" -+#include "dhcpv6.h" -+ -+/* DHCPv6 message types */ -+#define MSGTYPE_REPLY 7 -+#define MSGTYPE_INFO_REQUEST 11 -+ -+/* DHCPv6 option types */ -+#define OPTION_CLIENTID 1 -+#define OPTION_IAADDR 5 -+#define OPTION_ORO 6 -+#define OPTION_DNS_SERVERS 23 -+#define OPTION_BOOTFILE_URL 59 -+ -+struct requested_infos { -+ uint8_t *client_id; -+ int client_id_len; -+ bool want_dns; -+ bool want_boot_url; -+}; -+ -+/** -+ * Analyze the info request message sent by the client to see what data it -+ * provided and what it wants to have. The information is gathered in the -+ * "requested_infos" struct. Note that client_id (if provided) points into -+ * the odata region, thus the caller must keep odata valid as long as it -+ * needs to access the requested_infos struct. -+ */ -+static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, -+ struct requested_infos *ri) -+{ -+ int i, req_opt; -+ -+ while (olen > 4) { -+ /* Parse one option */ -+ int option = odata[0] << 8 | odata[1]; -+ int len = odata[2] << 8 | odata[3]; -+ -+ if (len + 4 > olen) { -+ slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", -+ slirp->opaque); -+ return -E2BIG; -+ } -+ -+ switch (option) { -+ case OPTION_IAADDR: -+ /* According to RFC3315, we must discard requests with IA option */ -+ return -EINVAL; -+ case OPTION_CLIENTID: -+ if (len > 256) { -+ /* Avoid very long IDs which could cause problems later */ -+ return -E2BIG; -+ } -+ ri->client_id = odata + 4; -+ ri->client_id_len = len; -+ break; -+ case OPTION_ORO: /* Option request option */ -+ if (len & 1) { -+ return -EINVAL; -+ } -+ /* Check which options the client wants to have */ -+ for (i = 0; i < len; i += 2) { -+ req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; -+ switch (req_opt) { -+ case OPTION_DNS_SERVERS: -+ ri->want_dns = true; -+ break; -+ case OPTION_BOOTFILE_URL: -+ ri->want_boot_url = true; -+ break; -+ default: -+ DEBUG_MISC("dhcpv6: Unsupported option request %d", -+ req_opt); -+ } -+ } -+ break; -+ default: -+ DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", option, -+ len); -+ } -+ -+ odata += len + 4; -+ olen -= len + 4; -+ } -+ -+ return 0; -+} -+ -+ -+/** -+ * Handle information request messages -+ */ -+static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, -+ uint32_t xid, uint8_t *odata, int olen) -+{ -+ struct requested_infos ri = { NULL }; -+ struct sockaddr_in6 sa6, da6; -+ struct mbuf *m; -+ uint8_t *resp; -+ -+ if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { -+ return; -+ } -+ -+ m = m_get(slirp); -+ if (!m) { -+ return; -+ } -+ memset(m->m_data, 0, m->m_size); -+ m->m_data += IF_MAXLINKHDR; -+ resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); -+ -+ /* Fill in response */ -+ *resp++ = MSGTYPE_REPLY; -+ *resp++ = (uint8_t)(xid >> 16); -+ *resp++ = (uint8_t)(xid >> 8); -+ *resp++ = (uint8_t)xid; -+ -+ if (ri.client_id) { -+ *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ -+ *resp++ = OPTION_CLIENTID; /* option-code low byte */ -+ *resp++ = ri.client_id_len >> 8; /* option-len high byte */ -+ *resp++ = ri.client_id_len; /* option-len low byte */ -+ memcpy(resp, ri.client_id, ri.client_id_len); -+ resp += ri.client_id_len; -+ } -+ if (ri.want_dns) { -+ *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ -+ *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ -+ *resp++ = 0; /* option-len high byte */ -+ *resp++ = 16; /* option-len low byte */ -+ memcpy(resp, &slirp->vnameserver_addr6, 16); -+ resp += 16; -+ } -+ if (ri.want_boot_url) { -+ uint8_t *sa = slirp->vhost_addr6.s6_addr; -+ int slen, smaxlen; -+ -+ *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ -+ *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ -+ smaxlen = (uint8_t *)m->m_data + IF_MTU - (resp + 2); -+ slen = snprintf((char *)resp + 2, smaxlen, -+ "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" -+ "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", -+ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], -+ sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], -+ sa[15], slirp->bootp_filename); -+ slen = MIN(slen, smaxlen); -+ *resp++ = slen >> 8; /* option-len high byte */ -+ *resp++ = slen; /* option-len low byte */ -+ resp += slen; -+ } -+ -+ sa6.sin6_addr = slirp->vhost_addr6; -+ sa6.sin6_port = DHCPV6_SERVER_PORT; -+ da6.sin6_addr = srcsas->sin6_addr; -+ da6.sin6_port = srcsas->sin6_port; -+ m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); -+ m->m_len = resp - (uint8_t *)m->m_data; -+ udp6_output(NULL, m, &sa6, &da6); -+} -+ -+/** -+ * Handle DHCPv6 messages sent by the client -+ */ -+void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) -+{ -+ uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); -+ int data_len = m->m_len - sizeof(struct udphdr); -+ uint32_t xid; -+ -+ if (data_len < 4) { -+ return; -+ } -+ -+ xid = ntohl(*(uint32_t *)data) & 0xffffff; -+ -+ switch (data[0]) { -+ case MSGTYPE_INFO_REQUEST: -+ dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); -+ break; -+ default: -+ DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); -+ } -+} -diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h -new file mode 100644 -index 0000000..d12c49b ---- /dev/null -+++ b/slirp/src/dhcpv6.h -@@ -0,0 +1,68 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Definitions and prototypes for SLIRP stateless DHCPv6 -+ * -+ * Copyright 2016 Thomas Huth, Red Hat Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef SLIRP_DHCPV6_H -+#define SLIRP_DHCPV6_H -+ -+#define DHCPV6_SERVER_PORT 547 -+ -+#define ALLDHCP_MULTICAST \ -+ { \ -+ .s6_addr = { \ -+ 0xff, \ -+ 0x02, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x01, \ -+ 0x00, \ -+ 0x02 \ -+ } \ -+ } -+ -+#define in6_dhcp_multicast(a) in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) -+ -+void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); -+ -+#endif -diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c -new file mode 100644 -index 0000000..18a6122 ---- /dev/null -+++ b/slirp/src/dnssearch.c -@@ -0,0 +1,311 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * Domain search option for DHCP (RFC 3397) -+ * -+ * Copyright (c) 2012 Klaus Stengel -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "slirp.h" -+ -+static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; -+static const uint8_t MAX_OPT_LEN = 255; -+static const uint8_t OPT_HEADER_LEN = 2; -+static const uint8_t REFERENCE_LEN = 2; -+ -+struct compact_domain; -+ -+typedef struct compact_domain { -+ struct compact_domain *self; -+ struct compact_domain *refdom; -+ uint8_t *labels; -+ size_t len; -+ size_t common_octets; -+} CompactDomain; -+ -+static size_t domain_suffix_diffoff(const CompactDomain *a, -+ const CompactDomain *b) -+{ -+ size_t la = a->len, lb = b->len; -+ uint8_t *da = a->labels + la, *db = b->labels + lb; -+ size_t i, lm = (la < lb) ? la : lb; -+ -+ for (i = 0; i < lm; i++) { -+ da--; -+ db--; -+ if (*da != *db) { -+ break; -+ } -+ } -+ return i; -+} -+ -+static int domain_suffix_ord(const void *cva, const void *cvb) -+{ -+ const CompactDomain *a = cva, *b = cvb; -+ size_t la = a->len, lb = b->len; -+ size_t doff = domain_suffix_diffoff(a, b); -+ uint8_t ca = a->labels[la - doff]; -+ uint8_t cb = b->labels[lb - doff]; -+ -+ if (ca < cb) { -+ return -1; -+ } -+ if (ca > cb) { -+ return 1; -+ } -+ if (la < lb) { -+ return -1; -+ } -+ if (la > lb) { -+ return 1; -+ } -+ return 0; -+} -+ -+static size_t domain_common_label(CompactDomain *a, CompactDomain *b) -+{ -+ size_t res, doff = domain_suffix_diffoff(a, b); -+ uint8_t *first_eq_pos = a->labels + (a->len - doff); -+ uint8_t *label = a->labels; -+ -+ while (*label && label < first_eq_pos) { -+ label += *label + 1; -+ } -+ res = a->len - (label - a->labels); -+ /* only report if it can help to reduce the packet size */ -+ return (res > REFERENCE_LEN) ? res : 0; -+} -+ -+static void domain_fixup_order(CompactDomain *cd, size_t n) -+{ -+ size_t i; -+ -+ for (i = 0; i < n; i++) { -+ CompactDomain *cur = cd + i, *next = cd[i].self; -+ -+ while (!cur->common_octets) { -+ CompactDomain *tmp = next->self; /* backup target value */ -+ -+ next->self = cur; -+ cur->common_octets++; -+ -+ cur = next; -+ next = tmp; -+ } -+ } -+} -+ -+static void domain_mklabels(CompactDomain *cd, const char *input) -+{ -+ uint8_t *len_marker = cd->labels; -+ uint8_t *output = len_marker; /* pre-incremented */ -+ const char *in = input; -+ char cur_chr; -+ size_t len = 0; -+ -+ if (cd->len == 0) { -+ goto fail; -+ } -+ cd->len++; -+ -+ do { -+ cur_chr = *in++; -+ if (cur_chr == '.' || cur_chr == '\0') { -+ len = output - len_marker; -+ if ((len == 0 && cur_chr == '.') || len >= 64) { -+ goto fail; -+ } -+ *len_marker = len; -+ -+ output++; -+ len_marker = output; -+ } else { -+ output++; -+ *output = cur_chr; -+ } -+ } while (cur_chr != '\0'); -+ -+ /* ensure proper zero-termination */ -+ if (len != 0) { -+ *len_marker = 0; -+ cd->len++; -+ } -+ return; -+ -+fail: -+ g_warning("failed to parse domain name '%s'\n", input); -+ cd->len = 0; -+} -+ -+static void domain_mkxrefs(CompactDomain *doms, CompactDomain *last, -+ size_t depth) -+{ -+ CompactDomain *i = doms, *target = doms; -+ -+ do { -+ if (i->labels < target->labels) { -+ target = i; -+ } -+ } while (i++ != last); -+ -+ for (i = doms; i != last; i++) { -+ CompactDomain *group_last; -+ size_t next_depth; -+ -+ if (i->common_octets == depth) { -+ continue; -+ } -+ -+ next_depth = -1; -+ for (group_last = i; group_last != last; group_last++) { -+ size_t co = group_last->common_octets; -+ if (co <= depth) { -+ break; -+ } -+ if (co < next_depth) { -+ next_depth = co; -+ } -+ } -+ domain_mkxrefs(i, group_last, next_depth); -+ -+ i = group_last; -+ if (i == last) { -+ break; -+ } -+ } -+ -+ if (depth == 0) { -+ return; -+ } -+ -+ i = doms; -+ do { -+ if (i != target && i->refdom == NULL) { -+ i->refdom = target; -+ i->common_octets = depth; -+ } -+ } while (i++ != last); -+} -+ -+static size_t domain_compactify(CompactDomain *domains, size_t n) -+{ -+ uint8_t *start = domains->self->labels, *outptr = start; -+ size_t i; -+ -+ for (i = 0; i < n; i++) { -+ CompactDomain *cd = domains[i].self; -+ CompactDomain *rd = cd->refdom; -+ -+ if (rd != NULL) { -+ size_t moff = (rd->labels - start) + (rd->len - cd->common_octets); -+ if (moff < 0x3FFFu) { -+ cd->len -= cd->common_octets - 2; -+ cd->labels[cd->len - 1] = moff & 0xFFu; -+ cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); -+ } -+ } -+ -+ if (cd->labels != outptr) { -+ memmove(outptr, cd->labels, cd->len); -+ cd->labels = outptr; -+ } -+ outptr += cd->len; -+ } -+ return outptr - start; -+} -+ -+int translate_dnssearch(Slirp *s, const char **names) -+{ -+ size_t blocks, bsrc_start, bsrc_end, bdst_start; -+ size_t i, num_domains, memreq = 0; -+ uint8_t *result = NULL, *outptr; -+ CompactDomain *domains = NULL; -+ const char **nameptr = names; -+ -+ while (*nameptr != NULL) { -+ nameptr++; -+ } -+ -+ num_domains = nameptr - names; -+ if (num_domains == 0) { -+ return -2; -+ } -+ -+ domains = g_malloc(num_domains * sizeof(*domains)); -+ -+ for (i = 0; i < num_domains; i++) { -+ size_t nlen = strlen(names[i]); -+ memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ -+ domains[i].self = domains + i; -+ domains[i].len = nlen; -+ domains[i].common_octets = 0; -+ domains[i].refdom = NULL; -+ } -+ -+ /* reserve extra 2 header bytes for each 255 bytes of output */ -+ memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; -+ result = g_malloc(memreq * sizeof(*result)); -+ -+ outptr = result; -+ for (i = 0; i < num_domains; i++) { -+ domains[i].labels = outptr; -+ domain_mklabels(domains + i, names[i]); -+ outptr += domains[i].len; -+ } -+ -+ if (outptr == result) { -+ g_free(domains); -+ g_free(result); -+ return -1; -+ } -+ -+ qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); -+ domain_fixup_order(domains, num_domains); -+ -+ for (i = 1; i < num_domains; i++) { -+ size_t cl = domain_common_label(domains + i - 1, domains + i); -+ domains[i - 1].common_octets = cl; -+ } -+ -+ domain_mkxrefs(domains, domains + num_domains - 1, 0); -+ memreq = domain_compactify(domains, num_domains); -+ -+ blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); -+ bsrc_end = memreq; -+ bsrc_start = (blocks - 1) * MAX_OPT_LEN; -+ bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; -+ memreq += blocks * OPT_HEADER_LEN; -+ -+ while (blocks--) { -+ size_t len = bsrc_end - bsrc_start; -+ memmove(result + bdst_start, result + bsrc_start, len); -+ result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; -+ result[bdst_start - 1] = len; -+ bsrc_end = bsrc_start; -+ bsrc_start -= MAX_OPT_LEN; -+ bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; -+ } -+ -+ g_free(domains); -+ s->vdnssearch = result; -+ s->vdnssearch_len = memreq; -+ return 0; -+} -diff --git a/slirp/src/if.c b/slirp/src/if.c -new file mode 100644 -index 0000000..23190b5 ---- /dev/null -+++ b/slirp/src/if.c -@@ -0,0 +1,213 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+static void ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) -+{ -+ ifm->ifs_next = ifmhead->ifs_next; -+ ifmhead->ifs_next = ifm; -+ ifm->ifs_prev = ifmhead; -+ ifm->ifs_next->ifs_prev = ifm; -+} -+ -+static void ifs_remque(struct mbuf *ifm) -+{ -+ ifm->ifs_prev->ifs_next = ifm->ifs_next; -+ ifm->ifs_next->ifs_prev = ifm->ifs_prev; -+} -+ -+void if_init(Slirp *slirp) -+{ -+ slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; -+ slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; -+} -+ -+/* -+ * if_output: Queue packet into an output queue. -+ * There are 2 output queue's, if_fastq and if_batchq. -+ * Each output queue is a doubly linked list of double linked lists -+ * of mbufs, each list belonging to one "session" (socket). This -+ * way, we can output packets fairly by sending one packet from each -+ * session, instead of all the packets from one session, then all packets -+ * from the next session, etc. Packets on the if_fastq get absolute -+ * priority, but if one session hogs the link, it gets "downgraded" -+ * to the batchq until it runs out of packets, then it'll return -+ * to the fastq (eg. if the user does an ls -alR in a telnet session, -+ * it'll temporarily get downgraded to the batchq) -+ */ -+void if_output(struct socket *so, struct mbuf *ifm) -+{ -+ Slirp *slirp = ifm->slirp; -+ struct mbuf *ifq; -+ int on_fastq = 1; -+ -+ DEBUG_CALL("if_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("ifm = %p", ifm); -+ -+ /* -+ * First remove the mbuf from m_usedlist, -+ * since we're gonna use m_next and m_prev ourselves -+ * XXX Shouldn't need this, gotta change dtom() etc. -+ */ -+ if (ifm->m_flags & M_USEDLIST) { -+ remque(ifm); -+ ifm->m_flags &= ~M_USEDLIST; -+ } -+ -+ /* -+ * See if there's already a batchq list for this session. -+ * This can include an interactive session, which should go on fastq, -+ * but gets too greedy... hence it'll be downgraded from fastq to batchq. -+ * We mustn't put this packet back on the fastq (or we'll send it out of -+ * order) -+ * XXX add cache here? -+ */ -+ if (so) { -+ for (ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; -+ (struct quehead *)ifq != &slirp->if_batchq; ifq = ifq->ifq_prev) { -+ if (so == ifq->ifq_so) { -+ /* A match! */ -+ ifm->ifq_so = so; -+ ifs_insque(ifm, ifq->ifs_prev); -+ goto diddit; -+ } -+ } -+ } -+ -+ /* No match, check which queue to put it on */ -+ if (so && (so->so_iptos & IPTOS_LOWDELAY)) { -+ ifq = (struct mbuf *)slirp->if_fastq.qh_rlink; -+ on_fastq = 1; -+ /* -+ * Check if this packet is a part of the last -+ * packet's session -+ */ -+ if (ifq->ifq_so == so) { -+ ifm->ifq_so = so; -+ ifs_insque(ifm, ifq->ifs_prev); -+ goto diddit; -+ } -+ } else { -+ ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; -+ } -+ -+ /* Create a new doubly linked list for this session */ -+ ifm->ifq_so = so; -+ ifs_init(ifm); -+ insque(ifm, ifq); -+ -+diddit: -+ if (so) { -+ /* Update *_queued */ -+ so->so_queued++; -+ so->so_nqueued++; -+ /* -+ * Check if the interactive session should be downgraded to -+ * the batchq. A session is downgraded if it has queued 6 -+ * packets without pausing, and at least 3 of those packets -+ * have been sent over the link -+ * (XXX These are arbitrary numbers, probably not optimal..) -+ */ -+ if (on_fastq && -+ ((so->so_nqueued >= 6) && (so->so_nqueued - so->so_queued) >= 3)) { -+ /* Remove from current queue... */ -+ remque(ifm->ifs_next); -+ -+ /* ...And insert in the new. That'll teach ya! */ -+ insque(ifm->ifs_next, &slirp->if_batchq); -+ } -+ } -+ -+ /* -+ * This prevents us from malloc()ing too many mbufs -+ */ -+ if_start(ifm->slirp); -+} -+ -+/* -+ * Send one packet from each session. -+ * If there are packets on the fastq, they are sent FIFO, before -+ * everything else. Then we choose the first packet from each -+ * batchq session (socket) and send it. -+ * For example, if there are 3 ftp sessions fighting for bandwidth, -+ * one packet will be sent from the first session, then one packet -+ * from the second session, then one packet from the third. -+ */ -+void if_start(Slirp *slirp) -+{ -+ uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); -+ bool from_batchq = false; -+ struct mbuf *ifm, *ifm_next, *ifqt; -+ -+ DEBUG_CALL("if_start"); -+ -+ if (slirp->if_start_busy) { -+ return; -+ } -+ slirp->if_start_busy = true; -+ -+ struct mbuf *batch_head = NULL; -+ if (slirp->if_batchq.qh_link != &slirp->if_batchq) { -+ batch_head = (struct mbuf *)slirp->if_batchq.qh_link; -+ } -+ -+ if (slirp->if_fastq.qh_link != &slirp->if_fastq) { -+ ifm_next = (struct mbuf *)slirp->if_fastq.qh_link; -+ } else if (batch_head) { -+ /* Nothing on fastq, pick up from batchq */ -+ ifm_next = batch_head; -+ from_batchq = true; -+ } else { -+ ifm_next = NULL; -+ } -+ -+ while (ifm_next) { -+ ifm = ifm_next; -+ -+ ifm_next = ifm->ifq_next; -+ if ((struct quehead *)ifm_next == &slirp->if_fastq) { -+ /* No more packets in fastq, switch to batchq */ -+ ifm_next = batch_head; -+ from_batchq = true; -+ } -+ if ((struct quehead *)ifm_next == &slirp->if_batchq) { -+ /* end of batchq */ -+ ifm_next = NULL; -+ } -+ -+ /* Try to send packet unless it already expired */ -+ if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { -+ /* Packet is delayed due to pending ARP or NDP resolution */ -+ continue; -+ } -+ -+ /* Remove it from the queue */ -+ ifqt = ifm->ifq_prev; -+ remque(ifm); -+ -+ /* If there are more packets for this session, re-queue them */ -+ if (ifm->ifs_next != ifm) { -+ struct mbuf *next = ifm->ifs_next; -+ -+ insque(next, ifqt); -+ ifs_remque(ifm); -+ if (!from_batchq) { -+ ifm_next = next; -+ } -+ } -+ -+ /* Update so_queued */ -+ if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { -+ /* If there's no more queued, reset nqueued */ -+ ifm->ifq_so->so_nqueued = 0; -+ } -+ -+ m_free(ifm); -+ } -+ -+ slirp->if_start_busy = false; -+} -diff --git a/slirp/src/if.h b/slirp/src/if.h -new file mode 100644 -index 0000000..3288298 ---- /dev/null -+++ b/slirp/src/if.h -@@ -0,0 +1,21 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef IF_H -+#define IF_H -+ -+#define IF_COMPRESS 0x01 /* We want compression */ -+#define IF_NOCOMPRESS 0x02 /* Do not do compression */ -+#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ -+#define IF_NOCIDCOMP 0x08 /* CID compression */ -+ -+#define IF_MTU 1500 -+#define IF_MRU 1500 -+#define IF_COMP IF_AUTOCOMP /* Flags for compression */ -+ -+/* 2 for alignment, 14 for ethernet */ -+#define IF_MAXLINKHDR (2 + ETH_HLEN) -+ -+#endif -diff --git a/slirp/src/ip.h b/slirp/src/ip.h -new file mode 100644 -index 0000000..e5d4aa8 ---- /dev/null -+++ b/slirp/src/ip.h -@@ -0,0 +1,242 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip.h 8.1 (Berkeley) 6/10/93 -+ * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp -+ */ -+ -+#ifndef IP_H -+#define IP_H -+ -+#include -+ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+#undef NTOHL -+#undef NTOHS -+#undef HTONL -+#undef HTONS -+#define NTOHL(d) -+#define NTOHS(d) -+#define HTONL(d) -+#define HTONS(d) -+#else -+#ifndef NTOHL -+#define NTOHL(d) ((d) = ntohl((d))) -+#endif -+#ifndef NTOHS -+#define NTOHS(d) ((d) = ntohs((uint16_t)(d))) -+#endif -+#ifndef HTONL -+#define HTONL(d) ((d) = htonl((d))) -+#endif -+#ifndef HTONS -+#define HTONS(d) ((d) = htons((uint16_t)(d))) -+#endif -+#endif -+ -+typedef uint32_t n_long; /* long as received from the net */ -+ -+/* -+ * Definitions for internet protocol version 4. -+ * Per RFC 791, September 1981. -+ */ -+#define IPVERSION 4 -+ -+/* -+ * Structure of an internet header, naked of options. -+ */ -+struct ip { -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t ip_v : 4, /* version */ -+ ip_hl : 4; /* header length */ -+#else -+ uint8_t ip_hl : 4, /* header length */ -+ ip_v : 4; /* version */ -+#endif -+ uint8_t ip_tos; /* type of service */ -+ uint16_t ip_len; /* total length */ -+ uint16_t ip_id; /* identification */ -+ uint16_t ip_off; /* fragment offset field */ -+#define IP_DF 0x4000 /* don't fragment flag */ -+#define IP_MF 0x2000 /* more fragments flag */ -+#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ -+ uint8_t ip_ttl; /* time to live */ -+ uint8_t ip_p; /* protocol */ -+ uint16_t ip_sum; /* checksum */ -+ struct in_addr ip_src, ip_dst; /* source and dest address */ -+} SLIRP_PACKED; -+ -+#define IP_MAXPACKET 65535 /* maximum packet size */ -+ -+/* -+ * Definitions for IP type of service (ip_tos) -+ */ -+#define IPTOS_LOWDELAY 0x10 -+#define IPTOS_THROUGHPUT 0x08 -+#define IPTOS_RELIABILITY 0x04 -+ -+/* -+ * Definitions for options. -+ */ -+#define IPOPT_COPIED(o) ((o)&0x80) -+#define IPOPT_CLASS(o) ((o)&0x60) -+#define IPOPT_NUMBER(o) ((o)&0x1f) -+ -+#define IPOPT_CONTROL 0x00 -+#define IPOPT_RESERVED1 0x20 -+#define IPOPT_DEBMEAS 0x40 -+#define IPOPT_RESERVED2 0x60 -+ -+#define IPOPT_EOL 0 /* end of option list */ -+#define IPOPT_NOP 1 /* no operation */ -+ -+#define IPOPT_RR 7 /* record packet route */ -+#define IPOPT_TS 68 /* timestamp */ -+#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ -+#define IPOPT_LSRR 131 /* loose source route */ -+#define IPOPT_SATID 136 /* satnet id */ -+#define IPOPT_SSRR 137 /* strict source route */ -+ -+/* -+ * Offsets to fields in options other than EOL and NOP. -+ */ -+#define IPOPT_OPTVAL 0 /* option ID */ -+#define IPOPT_OLEN 1 /* option length */ -+#define IPOPT_OFFSET 2 /* offset within option */ -+#define IPOPT_MINOFF 4 /* min value of above */ -+ -+/* -+ * Time stamp option structure. -+ */ -+struct ip_timestamp { -+ uint8_t ipt_code; /* IPOPT_TS */ -+ uint8_t ipt_len; /* size of structure (variable) */ -+ uint8_t ipt_ptr; /* index of current entry */ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t ipt_oflw : 4, /* overflow counter */ -+ ipt_flg : 4; /* flags, see below */ -+#else -+ uint8_t ipt_flg : 4, /* flags, see below */ -+ ipt_oflw : 4; /* overflow counter */ -+#endif -+ union ipt_timestamp { -+ n_long ipt_time[1]; -+ struct ipt_ta { -+ struct in_addr ipt_addr; -+ n_long ipt_time; -+ } ipt_ta[1]; -+ } ipt_timestamp; -+} SLIRP_PACKED; -+ -+/* flag bits for ipt_flg */ -+#define IPOPT_TS_TSONLY 0 /* timestamps only */ -+#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ -+#define IPOPT_TS_PRESPEC 3 /* specified modules only */ -+ -+/* bits for security (not byte swapped) */ -+#define IPOPT_SECUR_UNCLASS 0x0000 -+#define IPOPT_SECUR_CONFID 0xf135 -+#define IPOPT_SECUR_EFTO 0x789a -+#define IPOPT_SECUR_MMMM 0xbc4d -+#define IPOPT_SECUR_RESTR 0xaf13 -+#define IPOPT_SECUR_SECRET 0xd788 -+#define IPOPT_SECUR_TOPSECRET 0x6bc5 -+ -+/* -+ * Internet implementation parameters. -+ */ -+#define MAXTTL 255 /* maximum time to live (seconds) */ -+#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ -+#define IPFRAGTTL 60 /* time to live for frags, slowhz */ -+#define IPTTLDEC 1 /* subtracted when forwarding */ -+ -+#define IP_MSS 576 /* default maximum segment size */ -+ -+#if GLIB_SIZEOF_VOID_P == 4 -+struct mbuf_ptr { -+ struct mbuf *mptr; -+ uint32_t dummy; -+} SLIRP_PACKED; -+#else -+struct mbuf_ptr { -+ struct mbuf *mptr; -+} SLIRP_PACKED; -+#endif -+struct qlink { -+ void *next, *prev; -+}; -+ -+/* -+ * Overlay for ip header used by other protocols (tcp, udp). -+ */ -+struct ipovly { -+ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ -+ uint8_t ih_x1; /* (unused) */ -+ uint8_t ih_pr; /* protocol */ -+ uint16_t ih_len; /* protocol length */ -+ struct in_addr ih_src; /* source internet address */ -+ struct in_addr ih_dst; /* destination internet address */ -+} SLIRP_PACKED; -+ -+/* -+ * Ip reassembly queue structure. Each fragment -+ * being reassembled is attached to one of these structures. -+ * They are timed out after ipq_ttl drops to 0, and may also -+ * be reclaimed if memory becomes tight. -+ * size 28 bytes -+ */ -+struct ipq { -+ struct qlink frag_link; /* to ip headers of fragments */ -+ struct qlink ip_link; /* to other reass headers */ -+ uint8_t ipq_ttl; /* time for reass q to live */ -+ uint8_t ipq_p; /* protocol of this fragment */ -+ uint16_t ipq_id; /* sequence id for reassembly */ -+ struct in_addr ipq_src, ipq_dst; -+}; -+ -+/* -+ * Ip header, when holding a fragment. -+ * -+ * Note: ipf_link must be at same offset as frag_link above -+ */ -+struct ipasfrag { -+ struct qlink ipf_link; -+ struct ip ipf_ip; -+}; -+ -+G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == -+ offsetof(struct ipasfrag, ipf_link)); -+ -+#define ipf_off ipf_ip.ip_off -+#define ipf_tos ipf_ip.ip_tos -+#define ipf_len ipf_ip.ip_len -+#define ipf_next ipf_link.next -+#define ipf_prev ipf_link.prev -+ -+#endif -diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h -new file mode 100644 -index 0000000..0630309 ---- /dev/null -+++ b/slirp/src/ip6.h -@@ -0,0 +1,214 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#ifndef SLIRP_IP6_H -+#define SLIRP_IP6_H -+ -+#include -+#include -+ -+#define ALLNODES_MULTICAST \ -+ { \ -+ .s6_addr = { \ -+ 0xff, \ -+ 0x02, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x01 \ -+ } \ -+ } -+ -+#define SOLICITED_NODE_PREFIX \ -+ { \ -+ .s6_addr = { \ -+ 0xff, \ -+ 0x02, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x01, \ -+ 0xff, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00 \ -+ } \ -+ } -+ -+#define LINKLOCAL_ADDR \ -+ { \ -+ .s6_addr = { \ -+ 0xfe, \ -+ 0x80, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x02 \ -+ } \ -+ } -+ -+#define ZERO_ADDR \ -+ { \ -+ .s6_addr = { \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00 \ -+ } \ -+ } -+ -+static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) -+{ -+ return memcmp(a, b, sizeof(*a)) == 0; -+} -+ -+static inline bool in6_equal_net(const struct in6_addr *a, -+ const struct in6_addr *b, int prefix_len) -+{ -+ if (memcmp(a, b, prefix_len / 8) != 0) { -+ return 0; -+ } -+ -+ if (prefix_len % 8 == 0) { -+ return 1; -+ } -+ -+ return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) == -+ b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); -+} -+ -+static inline bool in6_equal_mach(const struct in6_addr *a, -+ const struct in6_addr *b, int prefix_len) -+{ -+ if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), -+ &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), -+ 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { -+ return 0; -+ } -+ -+ if (prefix_len % 8 == 0) { -+ return 1; -+ } -+ -+ return (a->s6_addr[prefix_len / 8] & -+ ((1U << (8 - (prefix_len % 8))) - 1)) == -+ (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); -+} -+ -+ -+#define in6_equal_router(a) \ -+ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ -+ in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len)) || \ -+ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ -+ in6_equal_mach(a, &slirp->vhost_addr6, 64))) -+ -+#define in6_equal_dns(a) \ -+ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ -+ in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len)) || \ -+ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ -+ in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) -+ -+#define in6_equal_host(a) (in6_equal_router(a) || in6_equal_dns(a)) -+ -+#define in6_solicitednode_multicast(a) \ -+ (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) -+ -+#define in6_zero(a) (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) -+ -+/* Compute emulated host MAC address from its ipv6 address */ -+static inline void in6_compute_ethaddr(struct in6_addr ip, -+ uint8_t eth[ETH_ALEN]) -+{ -+ eth[0] = 0x52; -+ eth[1] = 0x56; -+ memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); -+} -+ -+/* -+ * Definitions for internet protocol version 6. -+ * Per RFC 2460, December 1998. -+ */ -+#define IP6VERSION 6 -+#define IP6_HOP_LIMIT 255 -+ -+/* -+ * Structure of an internet header, naked of options. -+ */ -+struct ip6 { -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint32_t ip_v : 4, /* version */ -+ ip_tc_hi : 4, /* traffic class */ -+ ip_tc_lo : 4, ip_fl_hi : 4, /* flow label */ -+ ip_fl_lo : 16; -+#else -+ uint32_t ip_tc_hi : 4, ip_v : 4, ip_fl_hi : 4, ip_tc_lo : 4, ip_fl_lo : 16; -+#endif -+ uint16_t ip_pl; /* payload length */ -+ uint8_t ip_nh; /* next header */ -+ uint8_t ip_hl; /* hop limit */ -+ struct in6_addr ip_src, ip_dst; /* source and dest address */ -+}; -+ -+/* -+ * IPv6 pseudo-header used by upper-layer protocols -+ */ -+struct ip6_pseudohdr { -+ struct in6_addr ih_src; /* source internet address */ -+ struct in6_addr ih_dst; /* destination internet address */ -+ uint32_t ih_pl; /* upper-layer packet length */ -+ uint16_t ih_zero_hi; /* zero */ -+ uint8_t ih_zero_lo; /* zero */ -+ uint8_t ih_nh; /* next header */ -+}; -+ -+/* -+ * We don't want to mark these ip6 structs as packed as they are naturally -+ * correctly aligned; instead assert that there is no stray padding. -+ * If we marked the struct as packed then we would be unable to take -+ * the address of any of the fields in it. -+ */ -+G_STATIC_ASSERT(sizeof(struct ip6) == 40); -+G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); -+ -+#endif -diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c -new file mode 100644 -index 0000000..8512686 ---- /dev/null -+++ b/slirp/src/ip6_icmp.c -@@ -0,0 +1,434 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#include "slirp.h" -+#include "ip6_icmp.h" -+ -+#define NDP_Interval \ -+ g_rand_int_range(slirp->grand, NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) -+ -+static void ra_timer_handler(void *opaque) -+{ -+ Slirp *slirp = opaque; -+ -+ slirp->cb->timer_mod(slirp->ra_timer, -+ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + -+ NDP_Interval, -+ slirp->opaque); -+ ndp_send_ra(slirp); -+} -+ -+void icmp6_init(Slirp *slirp) -+{ -+ if (!slirp->in6_enabled) { -+ return; -+ } -+ -+ slirp->ra_timer = -+ slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); -+ slirp->cb->timer_mod(slirp->ra_timer, -+ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + -+ NDP_Interval, -+ slirp->opaque); -+} -+ -+void icmp6_cleanup(Slirp *slirp) -+{ -+ if (!slirp->in6_enabled) { -+ return; -+ } -+ -+ slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); -+} -+ -+static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, -+ struct icmp6 *icmp) -+{ -+ struct mbuf *t = m_get(slirp); -+ t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); -+ memcpy(t->m_data, m->m_data, t->m_len); -+ -+ /* IPv6 Packet */ -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ rip->ip_dst = ip->ip_src; -+ rip->ip_src = ip->ip_dst; -+ -+ /* ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = ICMP6_ECHO_REPLY; -+ ricmp->icmp6_cksum = 0; -+ -+ /* Checksum */ -+ t->m_data -= sizeof(struct ip6); -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 0); -+} -+ -+void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) -+{ -+ Slirp *slirp = m->slirp; -+ struct mbuf *t; -+ struct ip6 *ip = mtod(m, struct ip6 *); -+ char addrstr[INET6_ADDRSTRLEN]; -+ -+ DEBUG_CALL("icmp6_send_error"); -+ DEBUG_ARG("type = %d, code = %d", type, code); -+ -+ if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || in6_zero(&ip->ip_src)) { -+ /* TODO icmp error? */ -+ return; -+ } -+ -+ t = m_get(slirp); -+ -+ /* IPv6 packet */ -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; -+ rip->ip_dst = ip->ip_src; -+ inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); -+ DEBUG_ARG("target = %s", addrstr); -+ -+ rip->ip_nh = IPPROTO_ICMPV6; -+ const int error_data_len = -+ MIN(m->m_len, IF_MTU - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); -+ rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); -+ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); -+ -+ /* ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = type; -+ ricmp->icmp6_code = code; -+ ricmp->icmp6_cksum = 0; -+ -+ switch (type) { -+ case ICMP6_UNREACH: -+ case ICMP6_TIMXCEED: -+ ricmp->icmp6_err.unused = 0; -+ break; -+ case ICMP6_TOOBIG: -+ ricmp->icmp6_err.mtu = htonl(IF_MTU); -+ break; -+ case ICMP6_PARAMPROB: -+ /* TODO: Handle this case */ -+ break; -+ default: -+ g_assert_not_reached(); -+ break; -+ } -+ t->m_data += ICMP6_ERROR_MINLEN; -+ memcpy(t->m_data, m->m_data, error_data_len); -+ -+ /* Checksum */ -+ t->m_data -= ICMP6_ERROR_MINLEN; -+ t->m_data -= sizeof(struct ip6); -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 0); -+} -+ -+/* -+ * Send NDP Router Advertisement -+ */ -+void ndp_send_ra(Slirp *slirp) -+{ -+ DEBUG_CALL("ndp_send_ra"); -+ -+ /* Build IPv6 packet */ -+ struct mbuf *t = m_get(slirp); -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ size_t pl_size = 0; -+ struct in6_addr addr; -+ uint32_t scope_id; -+ -+ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; -+ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; -+ rip->ip_nh = IPPROTO_ICMPV6; -+ -+ /* Build ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = ICMP6_NDP_RA; -+ ricmp->icmp6_code = 0; -+ ricmp->icmp6_cksum = 0; -+ -+ /* NDP */ -+ ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; -+ ricmp->icmp6_nra.M = NDP_AdvManagedFlag; -+ ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; -+ ricmp->icmp6_nra.reserved = 0; -+ ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); -+ ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); -+ ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); -+ t->m_data += ICMP6_NDP_RA_MINLEN; -+ pl_size += ICMP6_NDP_RA_MINLEN; -+ -+ /* Source link-layer address (NDP option) */ -+ struct ndpopt *opt = mtod(t, struct ndpopt *); -+ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; -+ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; -+ in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); -+ t->m_data += NDPOPT_LINKLAYER_LEN; -+ pl_size += NDPOPT_LINKLAYER_LEN; -+ -+ /* Prefix information (NDP option) */ -+ struct ndpopt *opt2 = mtod(t, struct ndpopt *); -+ opt2->ndpopt_type = NDPOPT_PREFIX_INFO; -+ opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; -+ opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; -+ opt2->ndpopt_prefixinfo.L = 1; -+ opt2->ndpopt_prefixinfo.A = 1; -+ opt2->ndpopt_prefixinfo.reserved1 = 0; -+ opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); -+ opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); -+ opt2->ndpopt_prefixinfo.reserved2 = 0; -+ opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; -+ t->m_data += NDPOPT_PREFIXINFO_LEN; -+ pl_size += NDPOPT_PREFIXINFO_LEN; -+ -+ /* Prefix information (NDP option) */ -+ if (get_dns6_addr(&addr, &scope_id) >= 0) { -+ /* Host system does have an IPv6 DNS server, announce our proxy. */ -+ struct ndpopt *opt3 = mtod(t, struct ndpopt *); -+ opt3->ndpopt_type = NDPOPT_RDNSS; -+ opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; -+ opt3->ndpopt_rdnss.reserved = 0; -+ opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); -+ opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; -+ t->m_data += NDPOPT_RDNSS_LEN; -+ pl_size += NDPOPT_RDNSS_LEN; -+ } -+ -+ rip->ip_pl = htons(pl_size); -+ t->m_data -= sizeof(struct ip6) + pl_size; -+ t->m_len = sizeof(struct ip6) + pl_size; -+ -+ /* ICMPv6 Checksum */ -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 0); -+} -+ -+/* -+ * Send NDP Neighbor Solitication -+ */ -+void ndp_send_ns(Slirp *slirp, struct in6_addr addr) -+{ -+ char addrstr[INET6_ADDRSTRLEN]; -+ -+ inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); -+ -+ DEBUG_CALL("ndp_send_ns"); -+ DEBUG_ARG("target = %s", addrstr); -+ -+ /* Build IPv6 packet */ -+ struct mbuf *t = m_get(slirp); -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ rip->ip_src = slirp->vhost_addr6; -+ rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; -+ memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); -+ rip->ip_nh = IPPROTO_ICMPV6; -+ rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); -+ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); -+ -+ /* Build ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = ICMP6_NDP_NS; -+ ricmp->icmp6_code = 0; -+ ricmp->icmp6_cksum = 0; -+ -+ /* NDP */ -+ ricmp->icmp6_nns.reserved = 0; -+ ricmp->icmp6_nns.target = addr; -+ -+ /* Build NDP option */ -+ t->m_data += ICMP6_NDP_NS_MINLEN; -+ struct ndpopt *opt = mtod(t, struct ndpopt *); -+ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; -+ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; -+ in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); -+ -+ /* ICMPv6 Checksum */ -+ t->m_data -= ICMP6_NDP_NA_MINLEN; -+ t->m_data -= sizeof(struct ip6); -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 1); -+} -+ -+/* -+ * Send NDP Neighbor Advertisement -+ */ -+static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) -+{ -+ /* Build IPv6 packet */ -+ struct mbuf *t = m_get(slirp); -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ rip->ip_src = icmp->icmp6_nns.target; -+ if (in6_zero(&ip->ip_src)) { -+ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; -+ } else { -+ rip->ip_dst = ip->ip_src; -+ } -+ rip->ip_nh = IPPROTO_ICMPV6; -+ rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + NDPOPT_LINKLAYER_LEN); -+ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); -+ -+ /* Build ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = ICMP6_NDP_NA; -+ ricmp->icmp6_code = 0; -+ ricmp->icmp6_cksum = 0; -+ -+ /* NDP */ -+ ricmp->icmp6_nna.R = NDP_IsRouter; -+ ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); -+ ricmp->icmp6_nna.O = 1; -+ ricmp->icmp6_nna.reserved_hi = 0; -+ ricmp->icmp6_nna.reserved_lo = 0; -+ ricmp->icmp6_nna.target = icmp->icmp6_nns.target; -+ -+ /* Build NDP option */ -+ t->m_data += ICMP6_NDP_NA_MINLEN; -+ struct ndpopt *opt = mtod(t, struct ndpopt *); -+ opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; -+ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; -+ in6_compute_ethaddr(ricmp->icmp6_nna.target, opt->ndpopt_linklayer); -+ -+ /* ICMPv6 Checksum */ -+ t->m_data -= ICMP6_NDP_NA_MINLEN; -+ t->m_data -= sizeof(struct ip6); -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 0); -+} -+ -+/* -+ * Process a NDP message -+ */ -+static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, -+ struct icmp6 *icmp) -+{ -+ m->m_len += ETH_HLEN; -+ m->m_data -= ETH_HLEN; -+ struct ethhdr *eth = mtod(m, struct ethhdr *); -+ m->m_len -= ETH_HLEN; -+ m->m_data += ETH_HLEN; -+ -+ switch (icmp->icmp6_type) { -+ case ICMP6_NDP_RS: -+ DEBUG_CALL(" type = Router Solicitation"); -+ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && -+ ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { -+ /* Gratuitous NDP */ -+ ndp_table_add(slirp, ip->ip_src, eth->h_source); -+ -+ ndp_send_ra(slirp); -+ } -+ break; -+ -+ case ICMP6_NDP_RA: -+ DEBUG_CALL(" type = Router Advertisement"); -+ slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", -+ slirp->opaque); -+ break; -+ -+ case ICMP6_NDP_NS: -+ DEBUG_CALL(" type = Neighbor Solicitation"); -+ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && -+ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) && -+ ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN && -+ (!in6_zero(&ip->ip_src) || -+ in6_solicitednode_multicast(&ip->ip_dst))) { -+ if (in6_equal_host(&icmp->icmp6_nns.target)) { -+ /* Gratuitous NDP */ -+ ndp_table_add(slirp, ip->ip_src, eth->h_source); -+ ndp_send_na(slirp, ip, icmp); -+ } -+ } -+ break; -+ -+ case ICMP6_NDP_NA: -+ DEBUG_CALL(" type = Neighbor Advertisement"); -+ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && -+ ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN && -+ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) && -+ (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) || icmp->icmp6_nna.S == 0)) { -+ ndp_table_add(slirp, ip->ip_src, eth->h_source); -+ } -+ break; -+ -+ case ICMP6_NDP_REDIRECT: -+ DEBUG_CALL(" type = Redirect"); -+ slirp->cb->guest_error( -+ "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); -+ break; -+ } -+} -+ -+/* -+ * Process a received ICMPv6 message. -+ */ -+void icmp6_input(struct mbuf *m) -+{ -+ struct icmp6 *icmp; -+ struct ip6 *ip = mtod(m, struct ip6 *); -+ Slirp *slirp = m->slirp; -+ int hlen = sizeof(struct ip6); -+ -+ DEBUG_CALL("icmp6_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m_len = %d", m->m_len); -+ -+ if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { -+ goto end; -+ } -+ -+ if (ip6_cksum(m)) { -+ goto end; -+ } -+ -+ m->m_len -= hlen; -+ m->m_data += hlen; -+ icmp = mtod(m, struct icmp6 *); -+ m->m_len += hlen; -+ m->m_data -= hlen; -+ -+ DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); -+ switch (icmp->icmp6_type) { -+ case ICMP6_ECHO_REQUEST: -+ if (in6_equal_host(&ip->ip_dst)) { -+ icmp6_send_echoreply(m, slirp, ip, icmp); -+ } else { -+ /* TODO */ -+ g_critical("external icmpv6 not supported yet"); -+ } -+ break; -+ -+ case ICMP6_NDP_RS: -+ case ICMP6_NDP_RA: -+ case ICMP6_NDP_NS: -+ case ICMP6_NDP_NA: -+ case ICMP6_NDP_REDIRECT: -+ ndp_input(m, slirp, ip, icmp); -+ break; -+ -+ case ICMP6_UNREACH: -+ case ICMP6_TOOBIG: -+ case ICMP6_TIMXCEED: -+ case ICMP6_PARAMPROB: -+ /* XXX? report error? close socket? */ -+ default: -+ break; -+ } -+ -+end: -+ m_free(m); -+} -diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h -new file mode 100644 -index 0000000..c37e60f ---- /dev/null -+++ b/slirp/src/ip6_icmp.h -@@ -0,0 +1,219 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#ifndef SLIRP_IP6_ICMP_H -+#define SLIRP_IP6_ICMP_H -+ -+/* -+ * Interface Control Message Protocol version 6 Definitions. -+ * Per RFC 4443, March 2006. -+ * -+ * Network Discover Protocol Definitions. -+ * Per RFC 4861, September 2007. -+ */ -+ -+struct icmp6_echo { /* Echo Messages */ -+ uint16_t id; -+ uint16_t seq_num; -+}; -+ -+union icmp6_error_body { -+ uint32_t unused; -+ uint32_t pointer; -+ uint32_t mtu; -+}; -+ -+/* -+ * NDP Messages -+ */ -+struct ndp_rs { /* Router Solicitation Message */ -+ uint32_t reserved; -+}; -+ -+struct ndp_ra { /* Router Advertisement Message */ -+ uint8_t chl; /* Cur Hop Limit */ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t M : 1, O : 1, reserved : 6; -+#else -+ uint8_t reserved : 6, O : 1, M : 1; -+#endif -+ uint16_t lifetime; /* Router Lifetime */ -+ uint32_t reach_time; /* Reachable Time */ -+ uint32_t retrans_time; /* Retrans Timer */ -+}; -+ -+G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); -+ -+struct ndp_ns { /* Neighbor Solicitation Message */ -+ uint32_t reserved; -+ struct in6_addr target; /* Target Address */ -+}; -+ -+G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); -+ -+struct ndp_na { /* Neighbor Advertisement Message */ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint32_t R : 1, /* Router Flag */ -+ S : 1, /* Solicited Flag */ -+ O : 1, /* Override Flag */ -+ reserved_hi : 5, reserved_lo : 24; -+#else -+ uint32_t reserved_hi : 5, O : 1, S : 1, R : 1, reserved_lo : 24; -+#endif -+ struct in6_addr target; /* Target Address */ -+}; -+ -+G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); -+ -+struct ndp_redirect { -+ uint32_t reserved; -+ struct in6_addr target; /* Target Address */ -+ struct in6_addr dest; /* Destination Address */ -+}; -+ -+G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); -+ -+/* -+ * Structure of an icmpv6 header. -+ */ -+struct icmp6 { -+ uint8_t icmp6_type; /* type of message, see below */ -+ uint8_t icmp6_code; /* type sub code */ -+ uint16_t icmp6_cksum; /* ones complement cksum of struct */ -+ union { -+ union icmp6_error_body error_body; -+ struct icmp6_echo echo; -+ struct ndp_rs ndp_rs; -+ struct ndp_ra ndp_ra; -+ struct ndp_ns ndp_ns; -+ struct ndp_na ndp_na; -+ struct ndp_redirect ndp_redirect; -+ } icmp6_body; -+#define icmp6_err icmp6_body.error_body -+#define icmp6_echo icmp6_body.echo -+#define icmp6_nrs icmp6_body.ndp_rs -+#define icmp6_nra icmp6_body.ndp_ra -+#define icmp6_nns icmp6_body.ndp_ns -+#define icmp6_nna icmp6_body.ndp_na -+#define icmp6_redirect icmp6_body.ndp_redirect -+}; -+ -+G_STATIC_ASSERT(sizeof(struct icmp6) == 40); -+ -+#define ICMP6_MINLEN 4 -+#define ICMP6_ERROR_MINLEN 8 -+#define ICMP6_ECHO_MINLEN 8 -+#define ICMP6_NDP_RS_MINLEN 8 -+#define ICMP6_NDP_RA_MINLEN 16 -+#define ICMP6_NDP_NS_MINLEN 24 -+#define ICMP6_NDP_NA_MINLEN 24 -+#define ICMP6_NDP_REDIRECT_MINLEN 40 -+ -+/* -+ * NDP Options -+ */ -+struct ndpopt { -+ uint8_t ndpopt_type; /* Option type */ -+ uint8_t ndpopt_len; /* /!\ In units of 8 octets */ -+ union { -+ unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ -+#define ndpopt_linklayer ndpopt_body.linklayer_addr -+ struct prefixinfo { /* Prefix Information */ -+ uint8_t prefix_length; -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t L : 1, A : 1, reserved1 : 6; -+#else -+ uint8_t reserved1 : 6, A : 1, L : 1; -+#endif -+ uint32_t valid_lt; /* Valid Lifetime */ -+ uint32_t pref_lt; /* Preferred Lifetime */ -+ uint32_t reserved2; -+ struct in6_addr prefix; -+ } SLIRP_PACKED prefixinfo; -+#define ndpopt_prefixinfo ndpopt_body.prefixinfo -+ struct rdnss { -+ uint16_t reserved; -+ uint32_t lifetime; -+ struct in6_addr addr; -+ } SLIRP_PACKED rdnss; -+#define ndpopt_rdnss ndpopt_body.rdnss -+ } ndpopt_body; -+} SLIRP_PACKED; -+ -+/* NDP options type */ -+#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ -+#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ -+#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ -+#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ -+ -+/* NDP options size, in octets. */ -+#define NDPOPT_LINKLAYER_LEN 8 -+#define NDPOPT_PREFIXINFO_LEN 32 -+#define NDPOPT_RDNSS_LEN 24 -+ -+/* -+ * Definition of type and code field values. -+ * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml -+ * Last Updated 2012-11-12 -+ */ -+ -+/* Errors */ -+#define ICMP6_UNREACH 1 /* Destination Unreachable */ -+#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ -+#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ -+#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ -+#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ -+#define ICMP6_UNREACH_PORT 4 /* port unreachable */ -+#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ -+#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ -+#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ -+#define ICMP6_TOOBIG 2 /* Packet Too Big */ -+#define ICMP6_TIMXCEED 3 /* Time Exceeded */ -+#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ -+#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ -+#define ICMP6_PARAMPROB 4 /* Parameter Problem */ -+#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ -+#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ -+#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ -+ -+/* Informational Messages */ -+#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ -+#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ -+#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ -+#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ -+#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ -+#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ -+#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ -+ -+/* -+ * Router Configuration Variables (rfc4861#section-6) -+ */ -+#define NDP_IsRouter 1 -+#define NDP_AdvSendAdvertisements 1 -+#define NDP_MaxRtrAdvInterval 600000 -+#define NDP_MinRtrAdvInterval \ -+ ((NDP_MaxRtrAdvInterval >= 9) ? NDP_MaxRtrAdvInterval / 3 : \ -+ NDP_MaxRtrAdvInterval) -+#define NDP_AdvManagedFlag 0 -+#define NDP_AdvOtherConfigFlag 0 -+#define NDP_AdvLinkMTU 0 -+#define NDP_AdvReachableTime 0 -+#define NDP_AdvRetransTime 0 -+#define NDP_AdvCurHopLimit 64 -+#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) -+#define NDP_AdvValidLifetime 86400 -+#define NDP_AdvOnLinkFlag 1 -+#define NDP_AdvPrefLifetime 14400 -+#define NDP_AdvAutonomousFlag 1 -+ -+void icmp6_init(Slirp *slirp); -+void icmp6_cleanup(Slirp *slirp); -+void icmp6_input(struct mbuf *); -+void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); -+void ndp_send_ra(Slirp *slirp); -+void ndp_send_ns(Slirp *slirp, struct in6_addr addr); -+ -+#endif -diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c -new file mode 100644 -index 0000000..d9d2b7e ---- /dev/null -+++ b/slirp/src/ip6_input.c -@@ -0,0 +1,78 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#include "slirp.h" -+#include "ip6_icmp.h" -+ -+/* -+ * IP initialization: fill in IP protocol switch table. -+ * All protocols not implemented in kernel go to raw IP protocol handler. -+ */ -+void ip6_init(Slirp *slirp) -+{ -+ icmp6_init(slirp); -+} -+ -+void ip6_cleanup(Slirp *slirp) -+{ -+ icmp6_cleanup(slirp); -+} -+ -+void ip6_input(struct mbuf *m) -+{ -+ struct ip6 *ip6; -+ Slirp *slirp = m->slirp; -+ -+ if (!slirp->in6_enabled) { -+ goto bad; -+ } -+ -+ DEBUG_CALL("ip6_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m_len = %d", m->m_len); -+ -+ if (m->m_len < sizeof(struct ip6)) { -+ goto bad; -+ } -+ -+ ip6 = mtod(m, struct ip6 *); -+ -+ if (ip6->ip_v != IP6VERSION) { -+ goto bad; -+ } -+ -+ if (ntohs(ip6->ip_pl) > IF_MTU) { -+ icmp6_send_error(m, ICMP6_TOOBIG, 0); -+ goto bad; -+ } -+ -+ /* check ip_ttl for a correct ICMP reply */ -+ if (ip6->ip_hl == 0) { -+ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); -+ goto bad; -+ } -+ -+ /* -+ * Switch out to protocol's input routine. -+ */ -+ switch (ip6->ip_nh) { -+ case IPPROTO_TCP: -+ NTOHS(ip6->ip_pl); -+ tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); -+ break; -+ case IPPROTO_UDP: -+ udp6_input(m); -+ break; -+ case IPPROTO_ICMPV6: -+ icmp6_input(m); -+ break; -+ default: -+ m_free(m); -+ } -+ return; -+bad: -+ m_free(m); -+} -diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c -new file mode 100644 -index 0000000..b861106 ---- /dev/null -+++ b/slirp/src/ip6_output.c -@@ -0,0 +1,39 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#include "slirp.h" -+ -+/* Number of packets queued before we start sending -+ * (to prevent allocing too many mbufs) */ -+#define IF6_THRESH 10 -+ -+/* -+ * IPv6 output. The packet in mbuf chain m contains a IP header -+ */ -+int ip6_output(struct socket *so, struct mbuf *m, int fast) -+{ -+ struct ip6 *ip = mtod(m, struct ip6 *); -+ -+ DEBUG_CALL("ip6_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ -+ /* Fill IPv6 header */ -+ ip->ip_v = IP6VERSION; -+ ip->ip_hl = IP6_HOP_LIMIT; -+ ip->ip_tc_hi = 0; -+ ip->ip_tc_lo = 0; -+ ip->ip_fl_hi = 0; -+ ip->ip_fl_lo = 0; -+ -+ if (fast) { -+ if_encap(m->slirp, m); -+ } else { -+ if_output(so, m); -+ } -+ -+ return 0; -+} -diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c -new file mode 100644 -index 0000000..7590cff ---- /dev/null -+++ b/slirp/src/ip_icmp.c -@@ -0,0 +1,477 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 -+ * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+ -+#ifndef WITH_ICMP_ERROR_MSG -+#define WITH_ICMP_ERROR_MSG 0 -+#endif -+ -+/* The message sent when emulating PING */ -+/* Be nice and tell them it's just a pseudo-ping packet */ -+static const char icmp_ping_msg[] = -+ "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST " -+ "packets.\n"; -+ -+/* list of actions for icmp_send_error() on RX of an icmp message */ -+static const int icmp_flush[19] = { -+ /* ECHO REPLY (0) */ 0, -+ 1, -+ 1, -+ /* DEST UNREACH (3) */ 1, -+ /* SOURCE QUENCH (4)*/ 1, -+ /* REDIRECT (5) */ 1, -+ 1, -+ 1, -+ /* ECHO (8) */ 0, -+ /* ROUTERADVERT (9) */ 1, -+ /* ROUTERSOLICIT (10) */ 1, -+ /* TIME EXCEEDED (11) */ 1, -+ /* PARAMETER PROBLEM (12) */ 1, -+ /* TIMESTAMP (13) */ 0, -+ /* TIMESTAMP REPLY (14) */ 0, -+ /* INFO (15) */ 0, -+ /* INFO REPLY (16) */ 0, -+ /* ADDR MASK (17) */ 0, -+ /* ADDR MASK REPLY (18) */ 0 -+}; -+ -+void icmp_init(Slirp *slirp) -+{ -+ slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; -+ slirp->icmp_last_so = &slirp->icmp; -+} -+ -+void icmp_cleanup(Slirp *slirp) -+{ -+ while (slirp->icmp.so_next != &slirp->icmp) { -+ icmp_detach(slirp->icmp.so_next); -+ } -+} -+ -+static int icmp_send(struct socket *so, struct mbuf *m, int hlen) -+{ -+ struct ip *ip = mtod(m, struct ip *); -+ struct sockaddr_in addr; -+ -+ so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); -+ if (so->s == -1) { -+ return -1; -+ } -+ -+ so->so_m = m; -+ so->so_faddr = ip->ip_dst; -+ so->so_laddr = ip->ip_src; -+ so->so_iptos = ip->ip_tos; -+ so->so_type = IPPROTO_ICMP; -+ so->so_state = SS_ISFCONNECTED; -+ so->so_expire = curtime + SO_EXPIRE; -+ -+ addr.sin_family = AF_INET; -+ addr.sin_addr = so->so_faddr; -+ -+ insque(so, &so->slirp->icmp); -+ -+ if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, -+ (struct sockaddr *)&addr, sizeof(addr)) == -1) { -+ DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", errno, -+ strerror(errno)); -+ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); -+ icmp_detach(so); -+ } -+ -+ return 0; -+} -+ -+void icmp_detach(struct socket *so) -+{ -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ sofree(so); -+} -+ -+/* -+ * Process a received ICMP message. -+ */ -+void icmp_input(struct mbuf *m, int hlen) -+{ -+ register struct icmp *icp; -+ register struct ip *ip = mtod(m, struct ip *); -+ int icmplen = ip->ip_len; -+ Slirp *slirp = m->slirp; -+ -+ DEBUG_CALL("icmp_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m_len = %d", m->m_len); -+ -+ /* -+ * Locate icmp structure in mbuf, and check -+ * that its not corrupted and of at least minimum length. -+ */ -+ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ -+ freeit: -+ m_free(m); -+ goto end_error; -+ } -+ -+ m->m_len -= hlen; -+ m->m_data += hlen; -+ icp = mtod(m, struct icmp *); -+ if (cksum(m, icmplen)) { -+ goto freeit; -+ } -+ m->m_len += hlen; -+ m->m_data -= hlen; -+ -+ DEBUG_ARG("icmp_type = %d", icp->icmp_type); -+ switch (icp->icmp_type) { -+ case ICMP_ECHO: -+ ip->ip_len += hlen; /* since ip_input subtracts this */ -+ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || -+ ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { -+ icmp_reflect(m); -+ } else if (slirp->restricted) { -+ goto freeit; -+ } else { -+ struct socket *so; -+ struct sockaddr_storage addr; -+ so = socreate(slirp); -+ if (icmp_send(so, m, hlen) == 0) { -+ return; -+ } -+ if (udp_attach(so, AF_INET) == -1) { -+ DEBUG_MISC("icmp_input udp_attach errno = %d-%s", errno, -+ strerror(errno)); -+ sofree(so); -+ m_free(m); -+ goto end_error; -+ } -+ so->so_m = m; -+ so->so_ffamily = AF_INET; -+ so->so_faddr = ip->ip_dst; -+ so->so_fport = htons(7); -+ so->so_lfamily = AF_INET; -+ so->so_laddr = ip->ip_src; -+ so->so_lport = htons(9); -+ so->so_iptos = ip->ip_tos; -+ so->so_type = IPPROTO_ICMP; -+ so->so_state = SS_ISFCONNECTED; -+ -+ /* Send the packet */ -+ addr = so->fhost.ss; -+ sotranslate_out(so, &addr); -+ -+ if (sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, -+ (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { -+ DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", errno, -+ strerror(errno)); -+ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, -+ strerror(errno)); -+ udp_detach(so); -+ } -+ } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ -+ break; -+ case ICMP_UNREACH: -+ /* XXX? report error? close socket? */ -+ case ICMP_TIMXCEED: -+ case ICMP_PARAMPROB: -+ case ICMP_SOURCEQUENCH: -+ case ICMP_TSTAMP: -+ case ICMP_MASKREQ: -+ case ICMP_REDIRECT: -+ m_free(m); -+ break; -+ -+ default: -+ m_free(m); -+ } /* swith */ -+ -+end_error: -+ /* m is m_free()'d xor put in a socket xor or given to ip_send */ -+ return; -+} -+ -+ -+/* -+ * Send an ICMP message in response to a situation -+ * -+ * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. -+ *MAY send more (we do). MUST NOT change this header information. MUST NOT reply -+ *to a multicast/broadcast IP address. MUST NOT reply to a multicast/broadcast -+ *MAC address. MUST reply to only the first fragment. -+ */ -+/* -+ * Send ICMP_UNREACH back to the source regarding msrc. -+ * mbuf *msrc is used as a template, but is NOT m_free()'d. -+ * It is reported as the bad ip packet. The header should -+ * be fully correct and in host byte order. -+ * ICMP fragmentation is illegal. All machines must accept 576 bytes in one -+ * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 -+ */ -+ -+#define ICMP_MAXDATALEN (IP_MSS - 28) -+void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, -+ const char *message) -+{ -+ unsigned hlen, shlen, s_ip_len; -+ register struct ip *ip; -+ register struct icmp *icp; -+ register struct mbuf *m; -+ -+ DEBUG_CALL("icmp_send_error"); -+ DEBUG_ARG("msrc = %p", msrc); -+ DEBUG_ARG("msrc_len = %d", msrc->m_len); -+ -+ if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) -+ goto end_error; -+ -+ /* check msrc */ -+ if (!msrc) -+ goto end_error; -+ ip = mtod(msrc, struct ip *); -+ if (slirp_debug & DBG_MISC) { -+ char bufa[20], bufb[20]; -+ strcpy(bufa, inet_ntoa(ip->ip_src)); -+ strcpy(bufb, inet_ntoa(ip->ip_dst)); -+ DEBUG_MISC(" %.16s to %.16s", bufa, bufb); -+ } -+ if (ip->ip_off & IP_OFFMASK) -+ goto end_error; /* Only reply to fragment 0 */ -+ -+ /* Do not reply to source-only IPs */ -+ if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { -+ goto end_error; -+ } -+ -+ shlen = ip->ip_hl << 2; -+ s_ip_len = ip->ip_len; -+ if (ip->ip_p == IPPROTO_ICMP) { -+ icp = (struct icmp *)((char *)ip + shlen); -+ /* -+ * Assume any unknown ICMP type is an error. This isn't -+ * specified by the RFC, but think about it.. -+ */ -+ if (icp->icmp_type > 18 || icmp_flush[icp->icmp_type]) -+ goto end_error; -+ } -+ -+ /* make a copy */ -+ m = m_get(msrc->slirp); -+ if (!m) { -+ goto end_error; -+ } -+ -+ { -+ int new_m_size; -+ new_m_size = -+ sizeof(struct ip) + ICMP_MINLEN + msrc->m_len + ICMP_MAXDATALEN; -+ if (new_m_size > m->m_size) -+ m_inc(m, new_m_size); -+ } -+ memcpy(m->m_data, msrc->m_data, msrc->m_len); -+ m->m_len = msrc->m_len; /* copy msrc to m */ -+ -+ /* make the header of the reply packet */ -+ ip = mtod(m, struct ip *); -+ hlen = sizeof(struct ip); /* no options in reply */ -+ -+ /* fill in icmp */ -+ m->m_data += hlen; -+ m->m_len -= hlen; -+ -+ icp = mtod(m, struct icmp *); -+ -+ if (minsize) -+ s_ip_len = shlen + ICMP_MINLEN; /* return header+8b only */ -+ else if (s_ip_len > ICMP_MAXDATALEN) /* maximum size */ -+ s_ip_len = ICMP_MAXDATALEN; -+ -+ m->m_len = ICMP_MINLEN + s_ip_len; /* 8 bytes ICMP header */ -+ -+ /* min. size = 8+sizeof(struct ip)+8 */ -+ -+ icp->icmp_type = type; -+ icp->icmp_code = code; -+ icp->icmp_id = 0; -+ icp->icmp_seq = 0; -+ -+ memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ -+ HTONS(icp->icmp_ip.ip_len); -+ HTONS(icp->icmp_ip.ip_id); -+ HTONS(icp->icmp_ip.ip_off); -+ -+ if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ -+ int message_len; -+ char *cpnt; -+ message_len = strlen(message); -+ if (message_len > ICMP_MAXDATALEN) -+ message_len = ICMP_MAXDATALEN; -+ cpnt = (char *)m->m_data + m->m_len; -+ memcpy(cpnt, message, message_len); -+ m->m_len += message_len; -+ } -+ -+ icp->icmp_cksum = 0; -+ icp->icmp_cksum = cksum(m, m->m_len); -+ -+ m->m_data -= hlen; -+ m->m_len += hlen; -+ -+ /* fill in ip */ -+ ip->ip_hl = hlen >> 2; -+ ip->ip_len = m->m_len; -+ -+ ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ -+ -+ ip->ip_ttl = MAXTTL; -+ ip->ip_p = IPPROTO_ICMP; -+ ip->ip_dst = ip->ip_src; /* ip addresses */ -+ ip->ip_src = m->slirp->vhost_addr; -+ -+ (void)ip_output((struct socket *)NULL, m); -+ -+end_error: -+ return; -+} -+#undef ICMP_MAXDATALEN -+ -+/* -+ * Reflect the ip packet back to the source -+ */ -+void icmp_reflect(struct mbuf *m) -+{ -+ register struct ip *ip = mtod(m, struct ip *); -+ int hlen = ip->ip_hl << 2; -+ int optlen = hlen - sizeof(struct ip); -+ register struct icmp *icp; -+ -+ /* -+ * Send an icmp packet back to the ip level, -+ * after supplying a checksum. -+ */ -+ m->m_data += hlen; -+ m->m_len -= hlen; -+ icp = mtod(m, struct icmp *); -+ -+ icp->icmp_type = ICMP_ECHOREPLY; -+ icp->icmp_cksum = 0; -+ icp->icmp_cksum = cksum(m, ip->ip_len - hlen); -+ -+ m->m_data -= hlen; -+ m->m_len += hlen; -+ -+ /* fill in ip */ -+ if (optlen > 0) { -+ /* -+ * Strip out original options by copying rest of first -+ * mbuf's data back, and adjust the IP length. -+ */ -+ memmove((char *)(ip + 1), (char *)ip + hlen, -+ (unsigned)(m->m_len - hlen)); -+ hlen -= optlen; -+ ip->ip_hl = hlen >> 2; -+ ip->ip_len -= optlen; -+ m->m_len -= optlen; -+ } -+ -+ ip->ip_ttl = MAXTTL; -+ { /* swap */ -+ struct in_addr icmp_dst; -+ icmp_dst = ip->ip_dst; -+ ip->ip_dst = ip->ip_src; -+ ip->ip_src = icmp_dst; -+ } -+ -+ (void)ip_output((struct socket *)NULL, m); -+} -+ -+void icmp_receive(struct socket *so) -+{ -+ struct mbuf *m = so->so_m; -+ struct ip *ip = mtod(m, struct ip *); -+ int hlen = ip->ip_hl << 2; -+ uint8_t error_code; -+ struct icmp *icp; -+ int id, len; -+ -+ m->m_data += hlen; -+ m->m_len -= hlen; -+ icp = mtod(m, struct icmp *); -+ -+ id = icp->icmp_id; -+ len = recv(so->s, icp, M_ROOM(m), 0); -+ /* -+ * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent -+ * between host OSes. On Linux, only the ICMP header and payload is -+ * included. On macOS/Darwin, the socket acts like a raw socket and -+ * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP -+ * sockets aren't supported at all, so we treat them like raw sockets. It -+ * isn't possible to detect this difference at runtime, so we must use an -+ * #ifdef to determine if we need to remove the IP header. -+ */ -+#ifdef CONFIG_BSD -+ if (len >= sizeof(struct ip)) { -+ struct ip *inner_ip = mtod(m, struct ip *); -+ int inner_hlen = inner_ip->ip_hl << 2; -+ if (inner_hlen > len) { -+ len = -1; -+ errno = -EINVAL; -+ } else { -+ len -= inner_hlen; -+ memmove(icp, (unsigned char *)icp + inner_hlen, len); -+ } -+ } else { -+ len = -1; -+ errno = -EINVAL; -+ } -+#endif -+ icp->icmp_id = id; -+ -+ m->m_data -= hlen; -+ m->m_len += hlen; -+ -+ if (len == -1 || len == 0) { -+ if (errno == ENETUNREACH) { -+ error_code = ICMP_UNREACH_NET; -+ } else { -+ error_code = ICMP_UNREACH_HOST; -+ } -+ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); -+ icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); -+ } else { -+ icmp_reflect(so->so_m); -+ so->so_m = NULL; /* Don't m_free() it again! */ -+ } -+ icmp_detach(so); -+} -diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h -new file mode 100644 -index 0000000..84707db ---- /dev/null -+++ b/slirp/src/ip_icmp.h -@@ -0,0 +1,166 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 -+ * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp -+ */ -+ -+#ifndef NETINET_IP_ICMP_H -+#define NETINET_IP_ICMP_H -+ -+/* -+ * Interface Control Message Protocol Definitions. -+ * Per RFC 792, September 1981. -+ */ -+ -+typedef uint32_t n_time; -+ -+/* -+ * Structure of an icmp header. -+ */ -+struct icmp { -+ uint8_t icmp_type; /* type of message, see below */ -+ uint8_t icmp_code; /* type sub code */ -+ uint16_t icmp_cksum; /* ones complement cksum of struct */ -+ union { -+ uint8_t ih_pptr; /* ICMP_PARAMPROB */ -+ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ -+ struct ih_idseq { -+ uint16_t icd_id; -+ uint16_t icd_seq; -+ } ih_idseq; -+ int ih_void; -+ -+ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ -+ struct ih_pmtu { -+ uint16_t ipm_void; -+ uint16_t ipm_nextmtu; -+ } ih_pmtu; -+ } icmp_hun; -+#define icmp_pptr icmp_hun.ih_pptr -+#define icmp_gwaddr icmp_hun.ih_gwaddr -+#define icmp_id icmp_hun.ih_idseq.icd_id -+#define icmp_seq icmp_hun.ih_idseq.icd_seq -+#define icmp_void icmp_hun.ih_void -+#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void -+#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu -+ union { -+ struct id_ts { -+ n_time its_otime; -+ n_time its_rtime; -+ n_time its_ttime; -+ } id_ts; -+ struct id_ip { -+ struct ip idi_ip; -+ /* options and then 64 bits of data */ -+ } id_ip; -+ uint32_t id_mask; -+ char id_data[1]; -+ } icmp_dun; -+#define icmp_otime icmp_dun.id_ts.its_otime -+#define icmp_rtime icmp_dun.id_ts.its_rtime -+#define icmp_ttime icmp_dun.id_ts.its_ttime -+#define icmp_ip icmp_dun.id_ip.idi_ip -+#define icmp_mask icmp_dun.id_mask -+#define icmp_data icmp_dun.id_data -+}; -+ -+/* -+ * Lower bounds on packet lengths for various types. -+ * For the error advice packets must first ensure that the -+ * packet is large enough to contain the returned ip header. -+ * Only then can we do the check to see if 64 bits of packet -+ * data have been returned, since we need to check the returned -+ * ip header length. -+ */ -+#define ICMP_MINLEN 8 /* abs minimum */ -+#define ICMP_TSLEN (8 + 3 * sizeof(n_time)) /* timestamp */ -+#define ICMP_MASKLEN 12 /* address mask */ -+#define ICMP_ADVLENMIN (8 + sizeof(struct ip) + 8) /* min */ -+#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) -+/* N.B.: must separately check that ip_hl >= 5 */ -+ -+/* -+ * Definition of type and code field values. -+ */ -+#define ICMP_ECHOREPLY 0 /* echo reply */ -+#define ICMP_UNREACH 3 /* dest unreachable, codes: */ -+#define ICMP_UNREACH_NET 0 /* bad net */ -+#define ICMP_UNREACH_HOST 1 /* bad host */ -+#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ -+#define ICMP_UNREACH_PORT 3 /* bad port */ -+#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ -+#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ -+#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ -+#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ -+#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ -+#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ -+#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ -+#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ -+#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ -+#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ -+#define ICMP_REDIRECT 5 /* shorter route, codes: */ -+#define ICMP_REDIRECT_NET 0 /* for network */ -+#define ICMP_REDIRECT_HOST 1 /* for host */ -+#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ -+#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ -+#define ICMP_ECHO 8 /* echo service */ -+#define ICMP_ROUTERADVERT 9 /* router advertisement */ -+#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ -+#define ICMP_TIMXCEED 11 /* time exceeded, code: */ -+#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ -+#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ -+#define ICMP_PARAMPROB 12 /* ip header bad */ -+#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ -+#define ICMP_TSTAMP 13 /* timestamp request */ -+#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ -+#define ICMP_IREQ 15 /* information request */ -+#define ICMP_IREQREPLY 16 /* information reply */ -+#define ICMP_MASKREQ 17 /* address mask request */ -+#define ICMP_MASKREPLY 18 /* address mask reply */ -+ -+#define ICMP_MAXTYPE 18 -+ -+#define ICMP_INFOTYPE(type) \ -+ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ -+ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ -+ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ -+ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ -+ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) -+ -+void icmp_init(Slirp *slirp); -+void icmp_cleanup(Slirp *slirp); -+void icmp_input(struct mbuf *, int); -+void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, -+ const char *message); -+void icmp_reflect(struct mbuf *); -+void icmp_receive(struct socket *so); -+void icmp_detach(struct socket *so); -+ -+#endif -diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -new file mode 100644 -index 0000000..8c75d91 ---- /dev/null -+++ b/slirp/src/ip_input.c -@@ -0,0 +1,462 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 -+ * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP are -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+ -+static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); -+static void ip_freef(Slirp *slirp, struct ipq *fp); -+static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev); -+static void ip_deq(register struct ipasfrag *p); -+ -+/* -+ * IP initialization: fill in IP protocol switch table. -+ * All protocols not implemented in kernel go to raw IP protocol handler. -+ */ -+void ip_init(Slirp *slirp) -+{ -+ slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; -+ udp_init(slirp); -+ tcp_init(slirp); -+ icmp_init(slirp); -+} -+ -+void ip_cleanup(Slirp *slirp) -+{ -+ udp_cleanup(slirp); -+ tcp_cleanup(slirp); -+ icmp_cleanup(slirp); -+} -+ -+/* -+ * Ip input routine. Checksum and byte swap header. If fragmented -+ * try to reassemble. Process options. Pass to next level. -+ */ -+void ip_input(struct mbuf *m) -+{ -+ Slirp *slirp = m->slirp; -+ register struct ip *ip; -+ int hlen; -+ -+ if (!slirp->in_enabled) { -+ goto bad; -+ } -+ -+ DEBUG_CALL("ip_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m_len = %d", m->m_len); -+ -+ if (m->m_len < sizeof(struct ip)) { -+ goto bad; -+ } -+ -+ ip = mtod(m, struct ip *); -+ -+ if (ip->ip_v != IPVERSION) { -+ goto bad; -+ } -+ -+ hlen = ip->ip_hl << 2; -+ if (hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ -+ goto bad; /* or packet too short */ -+ } -+ -+ /* keep ip header intact for ICMP reply -+ * ip->ip_sum = cksum(m, hlen); -+ * if (ip->ip_sum) { -+ */ -+ if (cksum(m, hlen)) { -+ goto bad; -+ } -+ -+ /* -+ * Convert fields to host representation. -+ */ -+ NTOHS(ip->ip_len); -+ if (ip->ip_len < hlen) { -+ goto bad; -+ } -+ NTOHS(ip->ip_id); -+ NTOHS(ip->ip_off); -+ -+ /* -+ * Check that the amount of data in the buffers -+ * is as at least much as the IP header would have us expect. -+ * Trim mbufs if longer than we expect. -+ * Drop packet if shorter than we expect. -+ */ -+ if (m->m_len < ip->ip_len) { -+ goto bad; -+ } -+ -+ /* Should drop packet if mbuf too long? hmmm... */ -+ if (m->m_len > ip->ip_len) -+ m_adj(m, ip->ip_len - m->m_len); -+ -+ /* check ip_ttl for a correct ICMP reply */ -+ if (ip->ip_ttl == 0) { -+ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); -+ goto bad; -+ } -+ -+ /* -+ * If offset or IP_MF are set, must reassemble. -+ * Otherwise, nothing need be done. -+ * (We could look in the reassembly queue to see -+ * if the packet was previously fragmented, -+ * but it's not worth the time; just let them time out.) -+ * -+ * XXX This should fail, don't fragment yet -+ */ -+ if (ip->ip_off & ~IP_DF) { -+ register struct ipq *fp; -+ struct qlink *l; -+ /* -+ * Look for queue of fragments -+ * of this datagram. -+ */ -+ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; -+ l = l->next) { -+ fp = container_of(l, struct ipq, ip_link); -+ if (ip->ip_id == fp->ipq_id && -+ ip->ip_src.s_addr == fp->ipq_src.s_addr && -+ ip->ip_dst.s_addr == fp->ipq_dst.s_addr && -+ ip->ip_p == fp->ipq_p) -+ goto found; -+ } -+ fp = NULL; -+ found: -+ -+ /* -+ * Adjust ip_len to not reflect header, -+ * set ip_mff if more fragments are expected, -+ * convert offset of this to bytes. -+ */ -+ ip->ip_len -= hlen; -+ if (ip->ip_off & IP_MF) -+ ip->ip_tos |= 1; -+ else -+ ip->ip_tos &= ~1; -+ -+ ip->ip_off <<= 3; -+ -+ /* -+ * If datagram marked as having more fragments -+ * or if this is not the first fragment, -+ * attempt reassembly; if it succeeds, proceed. -+ */ -+ if (ip->ip_tos & 1 || ip->ip_off) { -+ ip = ip_reass(slirp, ip, fp); -+ if (ip == NULL) -+ return; -+ m = dtom(slirp, ip); -+ } else if (fp) -+ ip_freef(slirp, fp); -+ -+ } else -+ ip->ip_len -= hlen; -+ -+ /* -+ * Switch out to protocol's input routine. -+ */ -+ switch (ip->ip_p) { -+ case IPPROTO_TCP: -+ tcp_input(m, hlen, (struct socket *)NULL, AF_INET); -+ break; -+ case IPPROTO_UDP: -+ udp_input(m, hlen); -+ break; -+ case IPPROTO_ICMP: -+ icmp_input(m, hlen); -+ break; -+ default: -+ m_free(m); -+ } -+ return; -+bad: -+ m_free(m); -+} -+ -+#define iptofrag(P) ((struct ipasfrag *)(((char *)(P)) - sizeof(struct qlink))) -+#define fragtoip(P) ((struct ip *)(((char *)(P)) + sizeof(struct qlink))) -+/* -+ * Take incoming datagram fragment and try to -+ * reassemble it into whole datagram. If a chain for -+ * reassembly of this datagram already exists, then it -+ * is given as fp; otherwise have to make a chain. -+ */ -+static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) -+{ -+ register struct mbuf *m = dtom(slirp, ip); -+ register struct ipasfrag *q; -+ int hlen = ip->ip_hl << 2; -+ int i, next; -+ -+ DEBUG_CALL("ip_reass"); -+ DEBUG_ARG("ip = %p", ip); -+ DEBUG_ARG("fp = %p", fp); -+ DEBUG_ARG("m = %p", m); -+ -+ /* -+ * Presence of header sizes in mbufs -+ * would confuse code below. -+ * Fragment m_data is concatenated. -+ */ -+ m->m_data += hlen; -+ m->m_len -= hlen; -+ -+ /* -+ * If first fragment to arrive, create a reassembly queue. -+ */ -+ if (fp == NULL) { -+ struct mbuf *t = m_get(slirp); -+ -+ if (t == NULL) { -+ goto dropfrag; -+ } -+ fp = mtod(t, struct ipq *); -+ insque(&fp->ip_link, &slirp->ipq.ip_link); -+ fp->ipq_ttl = IPFRAGTTL; -+ fp->ipq_p = ip->ip_p; -+ fp->ipq_id = ip->ip_id; -+ fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; -+ fp->ipq_src = ip->ip_src; -+ fp->ipq_dst = ip->ip_dst; -+ q = (struct ipasfrag *)fp; -+ goto insert; -+ } -+ -+ /* -+ * Find a segment which begins after this one does. -+ */ -+ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; -+ q = q->ipf_next) -+ if (q->ipf_off > ip->ip_off) -+ break; -+ -+ /* -+ * If there is a preceding segment, it may provide some of -+ * our data already. If so, drop the data from the incoming -+ * segment. If it provides all of our data, drop us. -+ */ -+ if (q->ipf_prev != &fp->frag_link) { -+ struct ipasfrag *pq = q->ipf_prev; -+ i = pq->ipf_off + pq->ipf_len - ip->ip_off; -+ if (i > 0) { -+ if (i >= ip->ip_len) -+ goto dropfrag; -+ m_adj(dtom(slirp, ip), i); -+ ip->ip_off += i; -+ ip->ip_len -= i; -+ } -+ } -+ -+ /* -+ * While we overlap succeeding segments trim them or, -+ * if they are completely covered, dequeue them. -+ */ -+ while (q != (struct ipasfrag *)&fp->frag_link && -+ ip->ip_off + ip->ip_len > q->ipf_off) { -+ i = (ip->ip_off + ip->ip_len) - q->ipf_off; -+ if (i < q->ipf_len) { -+ q->ipf_len -= i; -+ q->ipf_off += i; -+ m_adj(dtom(slirp, q), i); -+ break; -+ } -+ q = q->ipf_next; -+ m_free(dtom(slirp, q->ipf_prev)); -+ ip_deq(q->ipf_prev); -+ } -+ -+insert: -+ /* -+ * Stick new segment in its place; -+ * check for complete reassembly. -+ */ -+ ip_enq(iptofrag(ip), q->ipf_prev); -+ next = 0; -+ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; -+ q = q->ipf_next) { -+ if (q->ipf_off != next) -+ return NULL; -+ next += q->ipf_len; -+ } -+ if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) -+ return NULL; -+ -+ /* -+ * Reassembly is complete; concatenate fragments. -+ */ -+ q = fp->frag_link.next; -+ m = dtom(slirp, q); -+ -+ int was_ext = m->m_flags & M_EXT; -+ -+ q = (struct ipasfrag *)q->ipf_next; -+ while (q != (struct ipasfrag *)&fp->frag_link) { -+ struct mbuf *t = dtom(slirp, q); -+ q = (struct ipasfrag *)q->ipf_next; -+ m_cat(m, t); -+ } -+ -+ /* -+ * Create header for new ip packet by -+ * modifying header of first packet; -+ * dequeue and discard fragment reassembly header. -+ * Make header visible. -+ */ -+ q = fp->frag_link.next; -+ -+ /* -+ * If the fragments concatenated to an mbuf that's -+ * bigger than the total size of the fragment, then and -+ * m_ext buffer was alloced. But fp->ipq_next points to -+ * the old buffer (in the mbuf), so we must point ip -+ * into the new buffer. -+ */ -+ if (!was_ext && m->m_flags & M_EXT) { -+ int delta = (char *)q - m->m_dat; -+ q = (struct ipasfrag *)(m->m_ext + delta); -+ } -+ -+ ip = fragtoip(q); -+ ip->ip_len = next; -+ ip->ip_tos &= ~1; -+ ip->ip_src = fp->ipq_src; -+ ip->ip_dst = fp->ipq_dst; -+ remque(&fp->ip_link); -+ (void)m_free(dtom(slirp, fp)); -+ m->m_len += (ip->ip_hl << 2); -+ m->m_data -= (ip->ip_hl << 2); -+ -+ return ip; -+ -+dropfrag: -+ m_free(m); -+ return NULL; -+} -+ -+/* -+ * Free a fragment reassembly header and all -+ * associated datagrams. -+ */ -+static void ip_freef(Slirp *slirp, struct ipq *fp) -+{ -+ register struct ipasfrag *q, *p; -+ -+ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; -+ q = p) { -+ p = q->ipf_next; -+ ip_deq(q); -+ m_free(dtom(slirp, q)); -+ } -+ remque(&fp->ip_link); -+ (void)m_free(dtom(slirp, fp)); -+} -+ -+/* -+ * Put an ip fragment on a reassembly chain. -+ * Like insque, but pointers in middle of structure. -+ */ -+static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) -+{ -+ DEBUG_CALL("ip_enq"); -+ DEBUG_ARG("prev = %p", prev); -+ p->ipf_prev = prev; -+ p->ipf_next = prev->ipf_next; -+ ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; -+ prev->ipf_next = p; -+} -+ -+/* -+ * To ip_enq as remque is to insque. -+ */ -+static void ip_deq(register struct ipasfrag *p) -+{ -+ ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; -+ ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; -+} -+ -+/* -+ * IP timer processing; -+ * if a timer expires on a reassembly -+ * queue, discard it. -+ */ -+void ip_slowtimo(Slirp *slirp) -+{ -+ struct qlink *l; -+ -+ DEBUG_CALL("ip_slowtimo"); -+ -+ l = slirp->ipq.ip_link.next; -+ -+ if (l == NULL) -+ return; -+ -+ while (l != &slirp->ipq.ip_link) { -+ struct ipq *fp = container_of(l, struct ipq, ip_link); -+ l = l->next; -+ if (--fp->ipq_ttl == 0) { -+ ip_freef(slirp, fp); -+ } -+ } -+} -+ -+/* -+ * Strip out IP options, at higher -+ * level protocol in the kernel. -+ * Second argument is buffer to which options -+ * will be moved, and return value is their length. -+ * (XXX) should be deleted; last arg currently ignored. -+ */ -+void ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) -+{ -+ register int i; -+ struct ip *ip = mtod(m, struct ip *); -+ register char *opts; -+ int olen; -+ -+ olen = (ip->ip_hl << 2) - sizeof(struct ip); -+ opts = (char *)(ip + 1); -+ i = m->m_len - (sizeof(struct ip) + olen); -+ memcpy(opts, opts + olen, (unsigned)i); -+ m->m_len -= olen; -+ -+ ip->ip_hl = sizeof(struct ip) >> 2; -+} -diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c -new file mode 100644 -index 0000000..f820359 ---- /dev/null -+++ b/slirp/src/ip_output.c -@@ -0,0 +1,169 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 -+ * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP are -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+/* Number of packets queued before we start sending -+ * (to prevent allocing too many mbufs) */ -+#define IF_THRESH 10 -+ -+/* -+ * IP output. The packet in mbuf chain m contains a skeletal IP -+ * header (with len, off, ttl, proto, tos, src, dst). -+ * The mbuf chain containing the packet will be freed. -+ * The mbuf opt, if present, will not be freed. -+ */ -+int ip_output(struct socket *so, struct mbuf *m0) -+{ -+ Slirp *slirp = m0->slirp; -+ register struct ip *ip; -+ register struct mbuf *m = m0; -+ register int hlen = sizeof(struct ip); -+ int len, off, error = 0; -+ -+ DEBUG_CALL("ip_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m0 = %p", m0); -+ -+ ip = mtod(m, struct ip *); -+ /* -+ * Fill in IP header. -+ */ -+ ip->ip_v = IPVERSION; -+ ip->ip_off &= IP_DF; -+ ip->ip_id = htons(slirp->ip_id++); -+ ip->ip_hl = hlen >> 2; -+ -+ /* -+ * If small enough for interface, can just send directly. -+ */ -+ if ((uint16_t)ip->ip_len <= IF_MTU) { -+ ip->ip_len = htons((uint16_t)ip->ip_len); -+ ip->ip_off = htons((uint16_t)ip->ip_off); -+ ip->ip_sum = 0; -+ ip->ip_sum = cksum(m, hlen); -+ -+ if_output(so, m); -+ goto done; -+ } -+ -+ /* -+ * Too large for interface; fragment if possible. -+ * Must be able to put at least 8 bytes per fragment. -+ */ -+ if (ip->ip_off & IP_DF) { -+ error = -1; -+ goto bad; -+ } -+ -+ len = (IF_MTU - hlen) & ~7; /* ip databytes per packet */ -+ if (len < 8) { -+ error = -1; -+ goto bad; -+ } -+ -+ { -+ int mhlen, firstlen = len; -+ struct mbuf **mnext = &m->m_nextpkt; -+ -+ /* -+ * Loop through length of segment after first fragment, -+ * make new header and copy data of each part and link onto chain. -+ */ -+ m0 = m; -+ mhlen = sizeof(struct ip); -+ for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { -+ register struct ip *mhip; -+ m = m_get(slirp); -+ if (m == NULL) { -+ error = -1; -+ goto sendorfree; -+ } -+ m->m_data += IF_MAXLINKHDR; -+ mhip = mtod(m, struct ip *); -+ *mhip = *ip; -+ -+ m->m_len = mhlen; -+ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); -+ if (ip->ip_off & IP_MF) -+ mhip->ip_off |= IP_MF; -+ if (off + len >= (uint16_t)ip->ip_len) -+ len = (uint16_t)ip->ip_len - off; -+ else -+ mhip->ip_off |= IP_MF; -+ mhip->ip_len = htons((uint16_t)(len + mhlen)); -+ -+ if (m_copy(m, m0, off, len) < 0) { -+ error = -1; -+ goto sendorfree; -+ } -+ -+ mhip->ip_off = htons((uint16_t)mhip->ip_off); -+ mhip->ip_sum = 0; -+ mhip->ip_sum = cksum(m, mhlen); -+ *mnext = m; -+ mnext = &m->m_nextpkt; -+ } -+ /* -+ * Update first fragment by trimming what's been copied out -+ * and updating header, then send each fragment (in order). -+ */ -+ m = m0; -+ m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); -+ ip->ip_len = htons((uint16_t)m->m_len); -+ ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); -+ ip->ip_sum = 0; -+ ip->ip_sum = cksum(m, hlen); -+ sendorfree: -+ for (m = m0; m; m = m0) { -+ m0 = m->m_nextpkt; -+ m->m_nextpkt = NULL; -+ if (error == 0) -+ if_output(so, m); -+ else -+ m_free(m); -+ } -+ } -+ -+done: -+ return (error); -+ -+bad: -+ m_free(m0); -+ goto done; -+} -diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in -new file mode 100644 -index 0000000..59f7a46 ---- /dev/null -+++ b/slirp/src/libslirp-version.h.in -@@ -0,0 +1,23 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#ifndef LIBSLIRP_VERSION_H_ -+#define LIBSLIRP_VERSION_H_ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define SLIRP_MAJOR_VERSION @SLIRP_MAJOR_VERSION@ -+#define SLIRP_MINOR_VERSION @SLIRP_MINOR_VERSION@ -+#define SLIRP_MICRO_VERSION @SLIRP_MICRO_VERSION@ -+ -+#define SLIRP_CHECK_VERSION(major,minor,micro) \ -+ (SLIRP_MAJOR_VERSION > (major) || \ -+ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION > (minor)) || \ -+ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION == (minor) && \ -+ SLIRP_MICRO_VERSION >= (micro))) -+ -+#ifdef __cplusplus -+} /* extern "C" */ -+#endif -+ -+#endif /* LIBSLIRP_VERSION_H_ */ -diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h -new file mode 100644 -index 0000000..9b2f611 ---- /dev/null -+++ b/slirp/src/libslirp.h -@@ -0,0 +1,119 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#ifndef LIBSLIRP_H -+#define LIBSLIRP_H -+ -+#include -+#include -+#include -+ -+#ifdef _WIN32 -+#include -+#include -+#else -+#include -+#include -+#endif -+ -+#include "libslirp-version.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+typedef struct Slirp Slirp; -+ -+enum { -+ SLIRP_POLL_IN = 1 << 0, -+ SLIRP_POLL_OUT = 1 << 1, -+ SLIRP_POLL_PRI = 1 << 2, -+ SLIRP_POLL_ERR = 1 << 3, -+ SLIRP_POLL_HUP = 1 << 4, -+}; -+ -+typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); -+typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); -+typedef void (*SlirpTimerCb)(void *opaque); -+typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); -+typedef int (*SlirpGetREventsCb)(int idx, void *opaque); -+ -+/* -+ * Callbacks from slirp -+ */ -+typedef struct SlirpCb { -+ /* -+ * Send an ethernet frame to the guest network. The opaque -+ * parameter is the one given to slirp_init(). The function -+ * doesn't need to send all the data and may return m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; -+ slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; -+} -+ -+void m_cleanup(Slirp *slirp) -+{ -+ struct mbuf *m, *next; -+ -+ m = (struct mbuf *)slirp->m_usedlist.qh_link; -+ while ((struct quehead *)m != &slirp->m_usedlist) { -+ next = m->m_next; -+ if (m->m_flags & M_EXT) { -+ g_free(m->m_ext); -+ } -+ g_free(m); -+ m = next; -+ } -+ m = (struct mbuf *)slirp->m_freelist.qh_link; -+ while ((struct quehead *)m != &slirp->m_freelist) { -+ next = m->m_next; -+ g_free(m); -+ m = next; -+ } -+} -+ -+/* -+ * Get an mbuf from the free list, if there are none -+ * allocate one -+ * -+ * Because fragmentation can occur if we alloc new mbufs and -+ * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, -+ * which tells m_free to actually g_free() it -+ */ -+struct mbuf *m_get(Slirp *slirp) -+{ -+ register struct mbuf *m; -+ int flags = 0; -+ -+ DEBUG_CALL("m_get"); -+ -+ if (slirp->m_freelist.qh_link == &slirp->m_freelist) { -+ m = g_malloc(SLIRP_MSIZE); -+ slirp->mbuf_alloced++; -+ if (slirp->mbuf_alloced > MBUF_THRESH) -+ flags = M_DOFREE; -+ m->slirp = slirp; -+ } else { -+ m = (struct mbuf *)slirp->m_freelist.qh_link; -+ remque(m); -+ } -+ -+ /* Insert it in the used list */ -+ insque(m, &slirp->m_usedlist); -+ m->m_flags = (flags | M_USEDLIST); -+ -+ /* Initialise it */ -+ m->m_size = SLIRP_MSIZE - offsetof(struct mbuf, m_dat); -+ m->m_data = m->m_dat; -+ m->m_len = 0; -+ m->m_nextpkt = NULL; -+ m->m_prevpkt = NULL; -+ m->resolution_requested = false; -+ m->expiration_date = (uint64_t)-1; -+ DEBUG_ARG("m = %p", m); -+ return m; -+} -+ -+void m_free(struct mbuf *m) -+{ -+ DEBUG_CALL("m_free"); -+ DEBUG_ARG("m = %p", m); -+ -+ if (m) { -+ /* Remove from m_usedlist */ -+ if (m->m_flags & M_USEDLIST) -+ remque(m); -+ -+ /* If it's M_EXT, free() it */ -+ if (m->m_flags & M_EXT) { -+ g_free(m->m_ext); -+ } -+ /* -+ * Either free() it or put it on the free list -+ */ -+ if (m->m_flags & M_DOFREE) { -+ m->slirp->mbuf_alloced--; -+ g_free(m); -+ } else if ((m->m_flags & M_FREELIST) == 0) { -+ insque(m, &m->slirp->m_freelist); -+ m->m_flags = M_FREELIST; /* Clobber other flags */ -+ } -+ } /* if(m) */ -+} -+ -+/* -+ * Copy data from one mbuf to the end of -+ * the other.. if result is too big for one mbuf, allocate -+ * an M_EXT data segment -+ */ -+void m_cat(struct mbuf *m, struct mbuf *n) -+{ -+ /* -+ * If there's no room, realloc -+ */ -+ if (M_FREEROOM(m) < n->m_len) -+ m_inc(m, m->m_len + n->m_len); -+ -+ memcpy(m->m_data + m->m_len, n->m_data, n->m_len); -+ m->m_len += n->m_len; -+ -+ m_free(n); -+} -+ -+ -+/* make m 'size' bytes large from m_data */ -+void m_inc(struct mbuf *m, int size) -+{ -+ int gapsize; -+ -+ /* some compilers throw up on gotos. This one we can fake. */ -+ if (M_ROOM(m) > size) { -+ return; -+ } -+ -+ if (m->m_flags & M_EXT) { -+ gapsize = m->m_data - m->m_ext; -+ m->m_ext = g_realloc(m->m_ext, size + gapsize); -+ } else { -+ gapsize = m->m_data - m->m_dat; -+ m->m_ext = g_malloc(size + gapsize); -+ memcpy(m->m_ext, m->m_dat, m->m_size); -+ m->m_flags |= M_EXT; -+ } -+ -+ m->m_data = m->m_ext + gapsize; -+ m->m_size = size + gapsize; -+} -+ -+ -+void m_adj(struct mbuf *m, int len) -+{ -+ if (m == NULL) -+ return; -+ if (len >= 0) { -+ /* Trim from head */ -+ m->m_data += len; -+ m->m_len -= len; -+ } else { -+ /* Trim from tail */ -+ len = -len; -+ m->m_len -= len; -+ } -+} -+ -+ -+/* -+ * Copy len bytes from m, starting off bytes into n -+ */ -+int m_copy(struct mbuf *n, struct mbuf *m, int off, int len) -+{ -+ if (len > M_FREEROOM(n)) -+ return -1; -+ -+ memcpy((n->m_data + n->m_len), (m->m_data + off), len); -+ n->m_len += len; -+ return 0; -+} -+ -+ -+/* -+ * Given a pointer into an mbuf, return the mbuf -+ * XXX This is a kludge, I should eliminate the need for it -+ * Fortunately, it's not used often -+ */ -+struct mbuf *dtom(Slirp *slirp, void *dat) -+{ -+ struct mbuf *m; -+ -+ DEBUG_CALL("dtom"); -+ DEBUG_ARG("dat = %p", dat); -+ -+ /* bug corrected for M_EXT buffers */ -+ for (m = (struct mbuf *)slirp->m_usedlist.qh_link; -+ (struct quehead *)m != &slirp->m_usedlist; m = m->m_next) { -+ if (m->m_flags & M_EXT) { -+ if ((char *)dat >= m->m_ext && (char *)dat < (m->m_ext + m->m_size)) -+ return m; -+ } else { -+ if ((char *)dat >= m->m_dat && (char *)dat < (m->m_dat + m->m_size)) -+ return m; -+ } -+ } -+ -+ DEBUG_ERROR("dtom failed"); -+ -+ return (struct mbuf *)0; -+} -diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h -new file mode 100644 -index 0000000..546e785 ---- /dev/null -+++ b/slirp/src/mbuf.h -@@ -0,0 +1,127 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 -+ * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp -+ */ -+ -+#ifndef MBUF_H -+#define MBUF_H -+ -+/* -+ * Macros for type conversion -+ * mtod(m,t) - convert mbuf pointer to data pointer of correct type -+ */ -+#define mtod(m, t) ((t)(m)->m_data) -+ -+/* XXX About mbufs for slirp: -+ * Only one mbuf is ever used in a chain, for each "cell" of data. -+ * m_nextpkt points to the next packet, if fragmented. -+ * If the data is too large, the M_EXT is used, and a larger block -+ * is alloced. Therefore, m_free[m] must check for M_EXT and if set -+ * free the m_ext. This is inefficient memory-wise, but who cares. -+ */ -+ -+/* -+ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if -+ * M_EXT is set, m_dat otherwise) and the in-use data: -+ * -+ * |--gapsize----->|---m_len-------> -+ * |----------m_size------------------------------> -+ * |----M_ROOM--------------------> -+ * |-M_FREEROOM--> -+ * -+ * ^ ^ ^ -+ * m_dat/m_ext m_data end of buffer -+ */ -+ -+/* -+ * How much room is in the mbuf, from m_data to the end of the mbuf -+ */ -+#define M_ROOM(m) \ -+ ((m->m_flags & M_EXT) ? (((m)->m_ext + (m)->m_size) - (m)->m_data) : \ -+ (((m)->m_dat + (m)->m_size) - (m)->m_data)) -+ -+/* -+ * How much free room there is -+ */ -+#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) -+ -+struct mbuf { -+ /* XXX should union some of these! */ -+ /* header at beginning of each mbuf: */ -+ struct mbuf *m_next; /* Linked list of mbufs */ -+ struct mbuf *m_prev; -+ struct mbuf *m_nextpkt; /* Next packet in queue/record */ -+ struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ -+ int m_flags; /* Misc flags */ -+ -+ int m_size; /* Size of mbuf, from m_dat or m_ext */ -+ struct socket *m_so; -+ -+ char *m_data; /* Current location of data */ -+ int m_len; /* Amount of data in this mbuf, from m_data */ -+ -+ Slirp *slirp; -+ bool resolution_requested; -+ uint64_t expiration_date; -+ char *m_ext; -+ /* start of dynamic buffer area, must be last element */ -+ char m_dat[]; -+}; -+ -+#define ifq_prev m_prev -+#define ifq_next m_next -+#define ifs_prev m_prevpkt -+#define ifs_next m_nextpkt -+#define ifq_so m_so -+ -+#define M_EXT 0x01 /* m_ext points to more (malloced) data */ -+#define M_FREELIST 0x02 /* mbuf is on free list */ -+#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ -+#define M_DOFREE \ -+ 0x08 /* when m_free is called on the mbuf, free() \ -+ * it rather than putting it on the free list */ -+ -+void m_init(Slirp *); -+void m_cleanup(Slirp *slirp); -+struct mbuf *m_get(Slirp *); -+void m_free(struct mbuf *); -+void m_cat(register struct mbuf *, register struct mbuf *); -+void m_inc(struct mbuf *, int); -+void m_adj(struct mbuf *, int); -+int m_copy(struct mbuf *, struct mbuf *, int, int); -+struct mbuf *dtom(Slirp *, void *); -+ -+static inline void ifs_init(struct mbuf *ifm) -+{ -+ ifm->ifs_next = ifm->ifs_prev = ifm; -+} -+ -+#endif -diff --git a/slirp/src/misc.c b/slirp/src/misc.c -new file mode 100644 -index 0000000..6675acc ---- /dev/null -+++ b/slirp/src/misc.c -@@ -0,0 +1,298 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+inline void insque(void *a, void *b) -+{ -+ register struct quehead *element = (struct quehead *)a; -+ register struct quehead *head = (struct quehead *)b; -+ element->qh_link = head->qh_link; -+ head->qh_link = (struct quehead *)element; -+ element->qh_rlink = (struct quehead *)head; -+ ((struct quehead *)(element->qh_link))->qh_rlink = -+ (struct quehead *)element; -+} -+ -+inline void remque(void *a) -+{ -+ register struct quehead *element = (struct quehead *)a; -+ ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; -+ ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; -+ element->qh_rlink = NULL; -+} -+ -+/* TODO: IPv6 */ -+struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, -+ void *opaque, struct in_addr addr, int port) -+{ -+ struct gfwd_list *f = g_new0(struct gfwd_list, 1); -+ -+ f->write_cb = write_cb; -+ f->opaque = opaque; -+ f->ex_fport = port; -+ f->ex_addr = addr; -+ f->ex_next = *ex_ptr; -+ *ex_ptr = f; -+ -+ return f; -+} -+ -+struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, -+ struct in_addr addr, int port) -+{ -+ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); -+ -+ f->ex_exec = g_strdup(cmdline); -+ -+ return f; -+} -+ -+static int slirp_socketpair_with_oob(int sv[2]) -+{ -+ struct sockaddr_in addr = { -+ .sin_family = AF_INET, -+ .sin_port = 0, -+ .sin_addr.s_addr = INADDR_ANY, -+ }; -+ socklen_t addrlen = sizeof(addr); -+ int ret, s; -+ -+ sv[1] = -1; -+ s = slirp_socket(AF_INET, SOCK_STREAM, 0); -+ if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || -+ listen(s, 1) < 0 || -+ getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { -+ goto err; -+ } -+ -+ sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); -+ if (sv[1] < 0) { -+ goto err; -+ } -+ /* -+ * This connect won't block because we've already listen()ed on -+ * the server end (even though we won't accept() the connection -+ * until later on). -+ */ -+ do { -+ ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); -+ } while (ret < 0 && errno == EINTR); -+ if (ret < 0) { -+ goto err; -+ } -+ -+ do { -+ sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); -+ } while (sv[0] < 0 && errno == EINTR); -+ if (sv[0] < 0) { -+ goto err; -+ } -+ -+ closesocket(s); -+ return 0; -+ -+err: -+ g_critical("slirp_socketpair(): %s", strerror(errno)); -+ if (s >= 0) { -+ closesocket(s); -+ } -+ if (sv[1] >= 0) { -+ closesocket(sv[1]); -+ } -+ return -1; -+} -+ -+static void fork_exec_child_setup(gpointer data) -+{ -+#ifndef _WIN32 -+ setsid(); -+#endif -+} -+ -+#pragma GCC diagnostic push -+#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -+ -+#if !GLIB_CHECK_VERSION(2, 58, 0) -+typedef struct SlirpGSpawnFds { -+ GSpawnChildSetupFunc child_setup; -+ gpointer user_data; -+ gint stdin_fd; -+ gint stdout_fd; -+ gint stderr_fd; -+} SlirpGSpawnFds; -+ -+static inline void slirp_gspawn_fds_setup(gpointer user_data) -+{ -+ SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; -+ -+ dup2(q->stdin_fd, 0); -+ dup2(q->stdout_fd, 1); -+ dup2(q->stderr_fd, 2); -+ q->child_setup(q->user_data); -+} -+#endif -+ -+static inline gboolean -+g_spawn_async_with_fds_slirp(const gchar *working_directory, gchar **argv, -+ gchar **envp, GSpawnFlags flags, -+ GSpawnChildSetupFunc child_setup, -+ gpointer user_data, GPid *child_pid, gint stdin_fd, -+ gint stdout_fd, gint stderr_fd, GError **error) -+{ -+#if GLIB_CHECK_VERSION(2, 58, 0) -+ return g_spawn_async_with_fds(working_directory, argv, envp, flags, -+ child_setup, user_data, child_pid, stdin_fd, -+ stdout_fd, stderr_fd, error); -+#else -+ SlirpGSpawnFds setup = { -+ .child_setup = child_setup, -+ .user_data = user_data, -+ .stdin_fd = stdin_fd, -+ .stdout_fd = stdout_fd, -+ .stderr_fd = stderr_fd, -+ }; -+ -+ return g_spawn_async(working_directory, argv, envp, flags, -+ slirp_gspawn_fds_setup, &setup, child_pid, error); -+#endif -+} -+ -+#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ -+ g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) -+ -+#pragma GCC diagnostic pop -+ -+int fork_exec(struct socket *so, const char *ex) -+{ -+ GError *err = NULL; -+ char **argv; -+ int opt, sp[2]; -+ -+ DEBUG_CALL("fork_exec"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("ex = %p", ex); -+ -+ if (slirp_socketpair_with_oob(sp) < 0) { -+ return 0; -+ } -+ -+ argv = g_strsplit(ex, " ", -1); -+ g_spawn_async_with_fds(NULL /* cwd */, argv, NULL /* env */, -+ G_SPAWN_SEARCH_PATH, fork_exec_child_setup, -+ NULL /* data */, NULL /* child_pid */, sp[1], sp[1], -+ sp[1], &err); -+ g_strfreev(argv); -+ -+ if (err) { -+ g_critical("fork_exec: %s", err->message); -+ g_error_free(err); -+ closesocket(sp[0]); -+ closesocket(sp[1]); -+ return 0; -+ } -+ -+ so->s = sp[0]; -+ closesocket(sp[1]); -+ slirp_socket_set_fast_reuse(so->s); -+ opt = 1; -+ setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); -+ slirp_set_nonblock(so->s); -+ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); -+ return 1; -+} -+ -+char *slirp_connection_info(Slirp *slirp) -+{ -+ GString *str = g_string_new(NULL); -+ const char *const tcpstates[] = { -+ [TCPS_CLOSED] = "CLOSED", [TCPS_LISTEN] = "LISTEN", -+ [TCPS_SYN_SENT] = "SYN_SENT", [TCPS_SYN_RECEIVED] = "SYN_RCVD", -+ [TCPS_ESTABLISHED] = "ESTABLISHED", [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", -+ [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", [TCPS_CLOSING] = "CLOSING", -+ [TCPS_LAST_ACK] = "LAST_ACK", [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", -+ [TCPS_TIME_WAIT] = "TIME_WAIT", -+ }; -+ struct in_addr dst_addr; -+ struct sockaddr_in src; -+ socklen_t src_len; -+ uint16_t dst_port; -+ struct socket *so; -+ const char *state; -+ char buf[20]; -+ -+ g_string_append_printf(str, -+ " Protocol[State] FD Source Address Port " -+ "Dest. Address Port RecvQ SendQ\n"); -+ -+ /* TODO: IPv6 */ -+ -+ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { -+ if (so->so_state & SS_HOSTFWD) { -+ state = "HOST_FORWARD"; -+ } else if (so->so_tcpcb) { -+ state = tcpstates[so->so_tcpcb->t_state]; -+ } else { -+ state = "NONE"; -+ } -+ if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { -+ src_len = sizeof(src); -+ getsockname(so->s, (struct sockaddr *)&src, &src_len); -+ dst_addr = so->so_laddr; -+ dst_port = so->so_lport; -+ } else { -+ src.sin_addr = so->so_laddr; -+ src.sin_port = so->so_lport; -+ dst_addr = so->so_faddr; -+ dst_port = so->so_fport; -+ } -+ snprintf(buf, sizeof(buf), " TCP[%s]", state); -+ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, -+ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : -+ "*", -+ ntohs(src.sin_port)); -+ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), -+ ntohs(dst_port), so->so_rcv.sb_cc, -+ so->so_snd.sb_cc); -+ } -+ -+ for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { -+ if (so->so_state & SS_HOSTFWD) { -+ snprintf(buf, sizeof(buf), " UDP[HOST_FORWARD]"); -+ src_len = sizeof(src); -+ getsockname(so->s, (struct sockaddr *)&src, &src_len); -+ dst_addr = so->so_laddr; -+ dst_port = so->so_lport; -+ } else { -+ snprintf(buf, sizeof(buf), " UDP[%d sec]", -+ (so->so_expire - curtime) / 1000); -+ src.sin_addr = so->so_laddr; -+ src.sin_port = so->so_lport; -+ dst_addr = so->so_faddr; -+ dst_port = so->so_fport; -+ } -+ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, -+ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : -+ "*", -+ ntohs(src.sin_port)); -+ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), -+ ntohs(dst_port), so->so_rcv.sb_cc, -+ so->so_snd.sb_cc); -+ } -+ -+ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { -+ snprintf(buf, sizeof(buf), " ICMP[%d sec]", -+ (so->so_expire - curtime) / 1000); -+ src.sin_addr = so->so_laddr; -+ dst_addr = so->so_faddr; -+ g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, -+ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : -+ "*"); -+ g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), -+ so->so_rcv.sb_cc, so->so_snd.sb_cc); -+ } -+ -+ return g_string_free(str, FALSE); -+} -diff --git a/slirp/src/misc.h b/slirp/src/misc.h -new file mode 100644 -index 0000000..ccf8cf0 ---- /dev/null -+++ b/slirp/src/misc.h -@@ -0,0 +1,63 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef MISC_H -+#define MISC_H -+ -+#include "libslirp.h" -+ -+struct gfwd_list { -+ SlirpWriteCb write_cb; -+ void *opaque; -+ struct in_addr ex_addr; /* Server address */ -+ int ex_fport; /* Port to telnet to */ -+ char *ex_exec; /* Command line of what to exec */ -+ struct gfwd_list *ex_next; -+}; -+ -+#define EMU_NONE 0x0 -+ -+/* TCP emulations */ -+#define EMU_CTL 0x1 -+#define EMU_FTP 0x2 -+#define EMU_KSH 0x3 -+#define EMU_IRC 0x4 -+#define EMU_REALAUDIO 0x5 -+#define EMU_RLOGIN 0x6 -+#define EMU_IDENT 0x7 -+ -+#define EMU_NOCONNECT 0x10 /* Don't connect */ -+ -+struct tos_t { -+ uint16_t lport; -+ uint16_t fport; -+ uint8_t tos; -+ uint8_t emu; -+}; -+ -+struct emu_t { -+ uint16_t lport; -+ uint16_t fport; -+ uint8_t tos; -+ uint8_t emu; -+ struct emu_t *next; -+}; -+ -+struct slirp_quehead { -+ struct slirp_quehead *qh_link; -+ struct slirp_quehead *qh_rlink; -+}; -+ -+void slirp_insque(void *, void *); -+void slirp_remque(void *); -+int fork_exec(struct socket *so, const char *ex); -+ -+struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, -+ void *opaque, struct in_addr addr, int port); -+ -+struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, -+ struct in_addr addr, int port); -+ -+#endif -diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h -new file mode 100644 -index 0000000..7795ad8 ---- /dev/null -+++ b/slirp/src/ncsi-pkt.h -@@ -0,0 +1,445 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright Gavin Shan, IBM Corporation 2016. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#ifndef NCSI_PKT_H -+#define NCSI_PKT_H -+ -+/* from linux/net/ncsi/ncsi-pkt.h */ -+#define __be32 uint32_t -+#define __be16 uint16_t -+ -+struct ncsi_pkt_hdr { -+ unsigned char mc_id; /* Management controller ID */ -+ unsigned char revision; /* NCSI version - 0x01 */ -+ unsigned char reserved; /* Reserved */ -+ unsigned char id; /* Packet sequence number */ -+ unsigned char type; /* Packet type */ -+ unsigned char channel; /* Network controller ID */ -+ __be16 length; /* Payload length */ -+ __be32 reserved1[2]; /* Reserved */ -+}; -+ -+struct ncsi_cmd_pkt_hdr { -+ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ -+}; -+ -+struct ncsi_rsp_pkt_hdr { -+ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ -+ __be16 code; /* Response code */ -+ __be16 reason; /* Response reason */ -+}; -+ -+struct ncsi_aen_pkt_hdr { -+ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ -+ unsigned char reserved2[3]; /* Reserved */ -+ unsigned char type; /* AEN packet type */ -+}; -+ -+/* NCSI common command packet */ -+struct ncsi_cmd_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[26]; -+}; -+ -+struct ncsi_rsp_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Select Package */ -+struct ncsi_cmd_sp_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char hw_arbitration; /* HW arbitration */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Disable Channel */ -+struct ncsi_cmd_dc_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char ald; /* Allow link down */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Reset Channel */ -+struct ncsi_cmd_rc_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 reserved; /* Reserved */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* AEN Enable */ -+struct ncsi_cmd_ae_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char mc_id; /* MC ID */ -+ __be32 mode; /* AEN working mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[18]; -+}; -+ -+/* Set Link */ -+struct ncsi_cmd_sl_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 mode; /* Link working mode */ -+ __be32 oem_mode; /* OEM link mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[18]; -+}; -+ -+/* Set VLAN Filter */ -+struct ncsi_cmd_svf_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be16 reserved; /* Reserved */ -+ __be16 vlan; /* VLAN ID */ -+ __be16 reserved1; /* Reserved */ -+ unsigned char index; /* VLAN table index */ -+ unsigned char enable; /* Enable or disable */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[14]; -+}; -+ -+/* Enable VLAN */ -+struct ncsi_cmd_ev_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char mode; /* VLAN filter mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Set MAC Address */ -+struct ncsi_cmd_sma_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char mac[6]; /* MAC address */ -+ unsigned char index; /* MAC table index */ -+ unsigned char at_e; /* Addr type and operation */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[18]; -+}; -+ -+/* Enable Broadcast Filter */ -+struct ncsi_cmd_ebf_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 mode; /* Filter mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Enable Global Multicast Filter */ -+struct ncsi_cmd_egmf_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 mode; /* Global MC mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Set NCSI Flow Control */ -+struct ncsi_cmd_snfc_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char mode; /* Flow control mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Get Link Status */ -+struct ncsi_rsp_gls_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 status; /* Link status */ -+ __be32 other; /* Other indications */ -+ __be32 oem_status; /* OEM link status */ -+ __be32 checksum; -+ unsigned char pad[10]; -+}; -+ -+/* Get Version ID */ -+struct ncsi_rsp_gvi_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 ncsi_version; /* NCSI version */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char alpha2; /* NCSI version */ -+ unsigned char fw_name[12]; /* f/w name string */ -+ __be32 fw_version; /* f/w version */ -+ __be16 pci_ids[4]; /* PCI IDs */ -+ __be32 mf_id; /* Manufacture ID */ -+ __be32 checksum; -+}; -+ -+/* Get Capabilities */ -+struct ncsi_rsp_gc_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 cap; /* Capabilities */ -+ __be32 bc_cap; /* Broadcast cap */ -+ __be32 mc_cap; /* Multicast cap */ -+ __be32 buf_cap; /* Buffering cap */ -+ __be32 aen_cap; /* AEN cap */ -+ unsigned char vlan_cnt; /* VLAN filter count */ -+ unsigned char mixed_cnt; /* Mix filter count */ -+ unsigned char mc_cnt; /* MC filter count */ -+ unsigned char uc_cnt; /* UC filter count */ -+ unsigned char reserved[2]; /* Reserved */ -+ unsigned char vlan_mode; /* VLAN mode */ -+ unsigned char channel_cnt; /* Channel count */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get Parameters */ -+struct ncsi_rsp_gp_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ unsigned char mac_cnt; /* Number of MAC addr */ -+ unsigned char reserved[2]; /* Reserved */ -+ unsigned char mac_enable; /* MAC addr enable flags */ -+ unsigned char vlan_cnt; /* VLAN tag count */ -+ unsigned char reserved1; /* Reserved */ -+ __be16 vlan_enable; /* VLAN tag enable flags */ -+ __be32 link_mode; /* Link setting */ -+ __be32 bc_mode; /* BC filter mode */ -+ __be32 valid_modes; /* Valid mode parameters */ -+ unsigned char vlan_mode; /* VLAN mode */ -+ unsigned char fc_mode; /* Flow control mode */ -+ unsigned char reserved2[2]; /* Reserved */ -+ __be32 aen_mode; /* AEN mode */ -+ unsigned char mac[6]; /* Supported MAC addr */ -+ __be16 vlan; /* Supported VLAN tags */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get Controller Packet Statistics */ -+struct ncsi_rsp_gcps_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 cnt_hi; /* Counter cleared */ -+ __be32 cnt_lo; /* Counter cleared */ -+ __be32 rx_bytes; /* Rx bytes */ -+ __be32 tx_bytes; /* Tx bytes */ -+ __be32 rx_uc_pkts; /* Rx UC packets */ -+ __be32 rx_mc_pkts; /* Rx MC packets */ -+ __be32 rx_bc_pkts; /* Rx BC packets */ -+ __be32 tx_uc_pkts; /* Tx UC packets */ -+ __be32 tx_mc_pkts; /* Tx MC packets */ -+ __be32 tx_bc_pkts; /* Tx BC packets */ -+ __be32 fcs_err; /* FCS errors */ -+ __be32 align_err; /* Alignment errors */ -+ __be32 false_carrier; /* False carrier detection */ -+ __be32 runt_pkts; /* Rx runt packets */ -+ __be32 jabber_pkts; /* Rx jabber packets */ -+ __be32 rx_pause_xon; /* Rx pause XON frames */ -+ __be32 rx_pause_xoff; /* Rx XOFF frames */ -+ __be32 tx_pause_xon; /* Tx XON frames */ -+ __be32 tx_pause_xoff; /* Tx XOFF frames */ -+ __be32 tx_s_collision; /* Single collision frames */ -+ __be32 tx_m_collision; /* Multiple collision frames */ -+ __be32 l_collision; /* Late collision frames */ -+ __be32 e_collision; /* Excessive collision frames */ -+ __be32 rx_ctl_frames; /* Rx control frames */ -+ __be32 rx_64_frames; /* Rx 64-bytes frames */ -+ __be32 rx_127_frames; /* Rx 65-127 bytes frames */ -+ __be32 rx_255_frames; /* Rx 128-255 bytes frames */ -+ __be32 rx_511_frames; /* Rx 256-511 bytes frames */ -+ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ -+ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ -+ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ -+ __be32 tx_64_frames; /* Tx 64-bytes frames */ -+ __be32 tx_127_frames; /* Tx 65-127 bytes frames */ -+ __be32 tx_255_frames; /* Tx 128-255 bytes frames */ -+ __be32 tx_511_frames; /* Tx 256-511 bytes frames */ -+ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ -+ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ -+ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ -+ __be32 rx_valid_bytes; /* Rx valid bytes */ -+ __be32 rx_runt_pkts; /* Rx error runt packets */ -+ __be32 rx_jabber_pkts; /* Rx error jabber packets */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get NCSI Statistics */ -+struct ncsi_rsp_gns_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 rx_cmds; /* Rx NCSI commands */ -+ __be32 dropped_cmds; /* Dropped commands */ -+ __be32 cmd_type_errs; /* Command type errors */ -+ __be32 cmd_csum_errs; /* Command checksum errors */ -+ __be32 rx_pkts; /* Rx NCSI packets */ -+ __be32 tx_pkts; /* Tx NCSI packets */ -+ __be32 tx_aen_pkts; /* Tx AEN packets */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get NCSI Pass-through Statistics */ -+struct ncsi_rsp_gnpts_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 tx_pkts; /* Tx packets */ -+ __be32 tx_dropped; /* Tx dropped packets */ -+ __be32 tx_channel_err; /* Tx channel errors */ -+ __be32 tx_us_err; /* Tx undersize errors */ -+ __be32 rx_pkts; /* Rx packets */ -+ __be32 rx_dropped; /* Rx dropped packets */ -+ __be32 rx_channel_err; /* Rx channel errors */ -+ __be32 rx_us_err; /* Rx undersize errors */ -+ __be32 rx_os_err; /* Rx oversize errors */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get package status */ -+struct ncsi_rsp_gps_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 status; /* Hardware arbitration status */ -+ __be32 checksum; -+}; -+ -+/* Get package UUID */ -+struct ncsi_rsp_gpuuid_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ unsigned char uuid[16]; /* UUID */ -+ __be32 checksum; -+}; -+ -+/* AEN: Link State Change */ -+struct ncsi_aen_lsc_pkt { -+ struct ncsi_aen_pkt_hdr aen; /* AEN header */ -+ __be32 status; /* Link status */ -+ __be32 oem_status; /* OEM link status */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[14]; -+}; -+ -+/* AEN: Configuration Required */ -+struct ncsi_aen_cr_pkt { -+ struct ncsi_aen_pkt_hdr aen; /* AEN header */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* AEN: Host Network Controller Driver Status Change */ -+struct ncsi_aen_hncdsc_pkt { -+ struct ncsi_aen_pkt_hdr aen; /* AEN header */ -+ __be32 status; /* Status */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[18]; -+}; -+ -+/* NCSI packet revision */ -+#define NCSI_PKT_REVISION 0x01 -+ -+/* NCSI packet commands */ -+#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ -+#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ -+#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ -+#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ -+#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ -+#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ -+#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ -+#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ -+#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ -+#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ -+#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ -+#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ -+#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ -+#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ -+#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ -+#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ -+#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ -+#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ -+#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ -+#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ -+#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ -+#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ -+#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ -+#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ -+#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ -+#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ -+#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ -+#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ -+#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ -+#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ -+ -+/* NCSI packet responses */ -+#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) -+#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) -+#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) -+#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) -+#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) -+#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) -+#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) -+#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) -+#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) -+#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) -+#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) -+#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) -+#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) -+#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) -+#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) -+#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) -+#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) -+#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) -+#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) -+#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) -+#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) -+#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) -+#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) -+#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) -+#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) -+#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) -+#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) -+#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) -+#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) -+#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) -+ -+/* NCSI response code/reason */ -+#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ -+#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ -+#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ -+#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ -+#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ -+#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ -+#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ -+#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ -+#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ -+#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ -+#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ -+ -+/* NCSI AEN packet type */ -+#define NCSI_PKT_AEN 0xFF /* AEN Packet */ -+#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ -+#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ -+#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ -+ -+#endif /* NCSI_PKT_H */ -diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c -new file mode 100644 -index 0000000..6864b73 ---- /dev/null -+++ b/slirp/src/ncsi.c -@@ -0,0 +1,192 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * NC-SI (Network Controller Sideband Interface) "echo" model -+ * -+ * Copyright (C) 2016-2018 IBM Corp. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include "slirp.h" -+ -+#include "ncsi-pkt.h" -+ -+static uint32_t ncsi_calculate_checksum(uint16_t *data, int len) -+{ -+ uint32_t checksum = 0; -+ int i; -+ -+ /* -+ * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet -+ * payload interpreted as a series of 16-bit unsigned integer values. -+ */ -+ for (i = 0; i < len; i++) { -+ checksum += htons(data[i]); -+ } -+ -+ checksum = (~checksum + 1); -+ return checksum; -+} -+ -+/* Get Capabilities */ -+static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) -+{ -+ struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *)rnh; -+ -+ rsp->cap = htonl(~0); -+ rsp->bc_cap = htonl(~0); -+ rsp->mc_cap = htonl(~0); -+ rsp->buf_cap = htonl(~0); -+ rsp->aen_cap = htonl(~0); -+ rsp->vlan_mode = 0xff; -+ rsp->uc_cnt = 2; -+ return 0; -+} -+ -+/* Get Link status */ -+static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) -+{ -+ struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *)rnh; -+ -+ rsp->status = htonl(0x1); -+ return 0; -+} -+ -+/* Get Parameters */ -+static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) -+{ -+ struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *)rnh; -+ -+ /* no MAC address filters or VLAN filters on the channel */ -+ rsp->mac_cnt = 0; -+ rsp->mac_enable = 0; -+ rsp->vlan_cnt = 0; -+ rsp->vlan_enable = 0; -+ -+ return 0; -+} -+ -+static const struct ncsi_rsp_handler { -+ unsigned char type; -+ int payload; -+ int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); -+} ncsi_rsp_handlers[] = { { NCSI_PKT_RSP_CIS, 4, NULL }, -+ { NCSI_PKT_RSP_SP, 4, NULL }, -+ { NCSI_PKT_RSP_DP, 4, NULL }, -+ { NCSI_PKT_RSP_EC, 4, NULL }, -+ { NCSI_PKT_RSP_DC, 4, NULL }, -+ { NCSI_PKT_RSP_RC, 4, NULL }, -+ { NCSI_PKT_RSP_ECNT, 4, NULL }, -+ { NCSI_PKT_RSP_DCNT, 4, NULL }, -+ { NCSI_PKT_RSP_AE, 4, NULL }, -+ { NCSI_PKT_RSP_SL, 4, NULL }, -+ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, -+ { NCSI_PKT_RSP_SVF, 4, NULL }, -+ { NCSI_PKT_RSP_EV, 4, NULL }, -+ { NCSI_PKT_RSP_DV, 4, NULL }, -+ { NCSI_PKT_RSP_SMA, 4, NULL }, -+ { NCSI_PKT_RSP_EBF, 4, NULL }, -+ { NCSI_PKT_RSP_DBF, 4, NULL }, -+ { NCSI_PKT_RSP_EGMF, 4, NULL }, -+ { NCSI_PKT_RSP_DGMF, 4, NULL }, -+ { NCSI_PKT_RSP_SNFC, 4, NULL }, -+ { NCSI_PKT_RSP_GVI, 40, NULL }, -+ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, -+ { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, -+ { NCSI_PKT_RSP_GCPS, 172, NULL }, -+ { NCSI_PKT_RSP_GNS, 172, NULL }, -+ { NCSI_PKT_RSP_GNPTS, 172, NULL }, -+ { NCSI_PKT_RSP_GPS, 8, NULL }, -+ { NCSI_PKT_RSP_OEM, 0, NULL }, -+ { NCSI_PKT_RSP_PLDM, 0, NULL }, -+ { NCSI_PKT_RSP_GPUUID, 20, NULL } }; -+ -+/* -+ * packet format : ncsi header + payload + checksum -+ */ -+#define NCSI_MAX_PAYLOAD 172 -+#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) -+ -+void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) -+{ -+ struct ncsi_pkt_hdr *nh = (struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); -+ uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; -+ struct ethhdr *reh = (struct ethhdr *)ncsi_reply; -+ struct ncsi_rsp_pkt_hdr *rnh = -+ (struct ncsi_rsp_pkt_hdr *)(ncsi_reply + ETH_HLEN); -+ const struct ncsi_rsp_handler *handler = NULL; -+ int i; -+ int ncsi_rsp_len = sizeof(*nh); -+ uint32_t checksum; -+ uint32_t *pchecksum; -+ -+ memset(ncsi_reply, 0, sizeof(ncsi_reply)); -+ -+ memset(reh->h_dest, 0xff, ETH_ALEN); -+ memset(reh->h_source, 0xff, ETH_ALEN); -+ reh->h_proto = htons(ETH_P_NCSI); -+ -+ for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { -+ if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { -+ handler = &ncsi_rsp_handlers[i]; -+ break; -+ } -+ } -+ -+ rnh->common.mc_id = nh->mc_id; -+ rnh->common.revision = NCSI_PKT_REVISION; -+ rnh->common.id = nh->id; -+ rnh->common.type = nh->type + 0x80; -+ rnh->common.channel = nh->channel; -+ -+ if (handler) { -+ rnh->common.length = htons(handler->payload); -+ rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); -+ rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); -+ -+ if (handler->handler) { -+ /* TODO: handle errors */ -+ handler->handler(rnh); -+ } -+ ncsi_rsp_len += handler->payload; -+ } else { -+ rnh->common.length = 0; -+ rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); -+ rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); -+ } -+ -+ /* Add the optional checksum at the end of the frame. */ -+ checksum = ncsi_calculate_checksum((uint16_t *)rnh, ncsi_rsp_len); -+ pchecksum = (uint32_t *)((void *)rnh + ncsi_rsp_len); -+ *pchecksum = htonl(checksum); -+ ncsi_rsp_len += 4; -+ -+ slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); -+} -diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c -new file mode 100644 -index 0000000..110d6ea ---- /dev/null -+++ b/slirp/src/ndp_table.c -@@ -0,0 +1,87 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#include "slirp.h" -+ -+void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, -+ uint8_t ethaddr[ETH_ALEN]) -+{ -+ char addrstr[INET6_ADDRSTRLEN]; -+ NdpTable *ndp_table = &slirp->ndp_table; -+ int i; -+ -+ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); -+ -+ DEBUG_CALL("ndp_table_add"); -+ DEBUG_ARG("ip = %s", addrstr); -+ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], -+ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); -+ -+ if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { -+ /* Do not register multicast or unspecified addresses */ -+ DEBUG_CALL(" abort: do not register multicast or unspecified address"); -+ return; -+ } -+ -+ /* Search for an entry */ -+ for (i = 0; i < NDP_TABLE_SIZE; i++) { -+ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { -+ DEBUG_CALL(" already in table: update the entry"); -+ /* Update the entry */ -+ memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); -+ return; -+ } -+ } -+ -+ /* No entry found, create a new one */ -+ DEBUG_CALL(" create new entry"); -+ ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; -+ memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, ethaddr, -+ ETH_ALEN); -+ ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; -+} -+ -+bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, -+ uint8_t out_ethaddr[ETH_ALEN]) -+{ -+ char addrstr[INET6_ADDRSTRLEN]; -+ NdpTable *ndp_table = &slirp->ndp_table; -+ int i; -+ -+ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); -+ -+ DEBUG_CALL("ndp_table_search"); -+ DEBUG_ARG("ip = %s", addrstr); -+ -+ assert(!in6_zero(&ip_addr)); -+ -+ /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ -+ if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { -+ out_ethaddr[0] = 0x33; -+ out_ethaddr[1] = 0x33; -+ out_ethaddr[2] = ip_addr.s6_addr[12]; -+ out_ethaddr[3] = ip_addr.s6_addr[13]; -+ out_ethaddr[4] = ip_addr.s6_addr[14]; -+ out_ethaddr[5] = ip_addr.s6_addr[15]; -+ DEBUG_ARG("multicast addr = %02x:%02x:%02x:%02x:%02x:%02x", -+ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], -+ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); -+ return 1; -+ } -+ -+ for (i = 0; i < NDP_TABLE_SIZE; i++) { -+ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { -+ memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); -+ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", -+ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], -+ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); -+ return 1; -+ } -+ } -+ -+ DEBUG_CALL(" ip not found in table"); -+ return 0; -+} -diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c -new file mode 100644 -index 0000000..abced48 ---- /dev/null -+++ b/slirp/src/sbuf.c -@@ -0,0 +1,186 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+static void sbappendsb(struct sbuf *sb, struct mbuf *m); -+ -+void sbfree(struct sbuf *sb) -+{ -+ free(sb->sb_data); -+} -+ -+bool sbdrop(struct sbuf *sb, int num) -+{ -+ int limit = sb->sb_datalen / 2; -+ -+ /* -+ * We can only drop how much we have -+ * This should never succeed -+ */ -+ if (num > sb->sb_cc) -+ num = sb->sb_cc; -+ sb->sb_cc -= num; -+ sb->sb_rptr += num; -+ if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) -+ sb->sb_rptr -= sb->sb_datalen; -+ -+ if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { -+ return true; -+ } -+ -+ return false; -+} -+ -+void sbreserve(struct sbuf *sb, int size) -+{ -+ if (sb->sb_data) { -+ /* Already alloced, realloc if necessary */ -+ if (sb->sb_datalen != size) { -+ sb->sb_wptr = sb->sb_rptr = sb->sb_data = -+ (char *)realloc(sb->sb_data, size); -+ sb->sb_cc = 0; -+ if (sb->sb_wptr) -+ sb->sb_datalen = size; -+ else -+ sb->sb_datalen = 0; -+ } -+ } else { -+ sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)malloc(size); -+ sb->sb_cc = 0; -+ if (sb->sb_wptr) -+ sb->sb_datalen = size; -+ else -+ sb->sb_datalen = 0; -+ } -+} -+ -+/* -+ * Try and write() to the socket, whatever doesn't get written -+ * append to the buffer... for a host with a fast net connection, -+ * this prevents an unnecessary copy of the data -+ * (the socket is non-blocking, so we won't hang) -+ */ -+void sbappend(struct socket *so, struct mbuf *m) -+{ -+ int ret = 0; -+ -+ DEBUG_CALL("sbappend"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m->m_len = %d", m->m_len); -+ -+ /* Shouldn't happen, but... e.g. foreign host closes connection */ -+ if (m->m_len <= 0) { -+ m_free(m); -+ return; -+ } -+ -+ /* -+ * If there is urgent data, call sosendoob -+ * if not all was sent, sowrite will take care of the rest -+ * (The rest of this function is just an optimisation) -+ */ -+ if (so->so_urgc) { -+ sbappendsb(&so->so_rcv, m); -+ m_free(m); -+ (void)sosendoob(so); -+ return; -+ } -+ -+ /* -+ * We only write if there's nothing in the buffer, -+ * ottherwise it'll arrive out of order, and hence corrupt -+ */ -+ if (!so->so_rcv.sb_cc) -+ ret = slirp_send(so, m->m_data, m->m_len, 0); -+ -+ if (ret <= 0) { -+ /* -+ * Nothing was written -+ * It's possible that the socket has closed, but -+ * we don't need to check because if it has closed, -+ * it will be detected in the normal way by soread() -+ */ -+ sbappendsb(&so->so_rcv, m); -+ } else if (ret != m->m_len) { -+ /* -+ * Something was written, but not everything.. -+ * sbappendsb the rest -+ */ -+ m->m_len -= ret; -+ m->m_data += ret; -+ sbappendsb(&so->so_rcv, m); -+ } /* else */ -+ /* Whatever happened, we free the mbuf */ -+ m_free(m); -+} -+ -+/* -+ * Copy the data from m into sb -+ * The caller is responsible to make sure there's enough room -+ */ -+static void sbappendsb(struct sbuf *sb, struct mbuf *m) -+{ -+ int len, n, nn; -+ -+ len = m->m_len; -+ -+ if (sb->sb_wptr < sb->sb_rptr) { -+ n = sb->sb_rptr - sb->sb_wptr; -+ if (n > len) -+ n = len; -+ memcpy(sb->sb_wptr, m->m_data, n); -+ } else { -+ /* Do the right edge first */ -+ n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; -+ if (n > len) -+ n = len; -+ memcpy(sb->sb_wptr, m->m_data, n); -+ len -= n; -+ if (len) { -+ /* Now the left edge */ -+ nn = sb->sb_rptr - sb->sb_data; -+ if (nn > len) -+ nn = len; -+ memcpy(sb->sb_data, m->m_data + n, nn); -+ n += nn; -+ } -+ } -+ -+ sb->sb_cc += n; -+ sb->sb_wptr += n; -+ if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) -+ sb->sb_wptr -= sb->sb_datalen; -+} -+ -+/* -+ * Copy data from sbuf to a normal, straight buffer -+ * Don't update the sbuf rptr, this will be -+ * done in sbdrop when the data is acked -+ */ -+void sbcopy(struct sbuf *sb, int off, int len, char *to) -+{ -+ char *from; -+ -+ from = sb->sb_rptr + off; -+ if (from >= sb->sb_data + sb->sb_datalen) -+ from -= sb->sb_datalen; -+ -+ if (from < sb->sb_wptr) { -+ if (len > sb->sb_cc) -+ len = sb->sb_cc; -+ memcpy(to, from, len); -+ } else { -+ /* re-use off */ -+ off = (sb->sb_data + sb->sb_datalen) - from; -+ if (off > len) -+ off = len; -+ memcpy(to, from, off); -+ len -= off; -+ if (len) -+ memcpy(to + off, sb->sb_data, len); -+ } -+} -diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h -new file mode 100644 -index 0000000..1eb9f9e ---- /dev/null -+++ b/slirp/src/sbuf.h -@@ -0,0 +1,27 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef SBUF_H -+#define SBUF_H -+ -+#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) -+ -+struct sbuf { -+ uint32_t sb_cc; /* actual chars in buffer */ -+ uint32_t sb_datalen; /* Length of data */ -+ char *sb_wptr; /* write pointer. points to where the next -+ * bytes should be written in the sbuf */ -+ char *sb_rptr; /* read pointer. points to where the next -+ * byte should be read from the sbuf */ -+ char *sb_data; /* Actual data */ -+}; -+ -+void sbfree(struct sbuf *); -+bool sbdrop(struct sbuf *, int); -+void sbreserve(struct sbuf *, int); -+void sbappend(struct socket *, struct mbuf *); -+void sbcopy(struct sbuf *, int, int, char *); -+ -+#endif -diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c -new file mode 100644 -index 0000000..b0194cb ---- /dev/null -+++ b/slirp/src/slirp.c -@@ -0,0 +1,1112 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * libslirp glue -+ * -+ * Copyright (c) 2004-2008 Fabrice Bellard -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "slirp.h" -+ -+ -+#ifndef _WIN32 -+#include -+#endif -+ -+int slirp_debug; -+ -+/* Define to 1 if you want KEEPALIVE timers */ -+bool slirp_do_keepalive; -+ -+/* host loopback address */ -+struct in_addr loopback_addr; -+/* host loopback network mask */ -+unsigned long loopback_mask; -+ -+/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ -+static const uint8_t special_ethaddr[ETH_ALEN] = { 0x52, 0x55, 0x00, -+ 0x00, 0x00, 0x00 }; -+ -+unsigned curtime; -+ -+static struct in_addr dns_addr; -+#ifndef _WIN32 -+static struct in6_addr dns6_addr; -+#endif -+static unsigned dns_addr_time; -+#ifndef _WIN32 -+static unsigned dns6_addr_time; -+#endif -+ -+#define TIMEOUT_FAST 2 /* milliseconds */ -+#define TIMEOUT_SLOW 499 /* milliseconds */ -+/* for the aging of certain requests like DNS */ -+#define TIMEOUT_DEFAULT 1000 /* milliseconds */ -+ -+#ifdef _WIN32 -+ -+int get_dns_addr(struct in_addr *pdns_addr) -+{ -+ FIXED_INFO *FixedInfo = NULL; -+ ULONG BufLen; -+ DWORD ret; -+ IP_ADDR_STRING *pIPAddr; -+ struct in_addr tmp_addr; -+ -+ if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { -+ *pdns_addr = dns_addr; -+ return 0; -+ } -+ -+ FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); -+ BufLen = sizeof(FIXED_INFO); -+ -+ if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { -+ if (FixedInfo) { -+ GlobalFree(FixedInfo); -+ FixedInfo = NULL; -+ } -+ FixedInfo = GlobalAlloc(GPTR, BufLen); -+ } -+ -+ if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { -+ printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret); -+ if (FixedInfo) { -+ GlobalFree(FixedInfo); -+ FixedInfo = NULL; -+ } -+ return -1; -+ } -+ -+ pIPAddr = &(FixedInfo->DnsServerList); -+ inet_aton(pIPAddr->IpAddress.String, &tmp_addr); -+ *pdns_addr = tmp_addr; -+ dns_addr = tmp_addr; -+ dns_addr_time = curtime; -+ if (FixedInfo) { -+ GlobalFree(FixedInfo); -+ FixedInfo = NULL; -+ } -+ return 0; -+} -+ -+int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) -+{ -+ return -1; -+} -+ -+static void winsock_cleanup(void) -+{ -+ WSACleanup(); -+} -+ -+#else -+ -+static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, -+ socklen_t addrlen, struct stat *cached_stat, -+ unsigned *cached_time) -+{ -+ struct stat old_stat; -+ if (curtime - *cached_time < TIMEOUT_DEFAULT) { -+ memcpy(pdns_addr, cached_addr, addrlen); -+ return 0; -+ } -+ old_stat = *cached_stat; -+ if (stat("/etc/resolv.conf", cached_stat) != 0) { -+ return -1; -+ } -+ if (cached_stat->st_dev == old_stat.st_dev && -+ cached_stat->st_ino == old_stat.st_ino && -+ cached_stat->st_size == old_stat.st_size && -+ cached_stat->st_mtime == old_stat.st_mtime) { -+ memcpy(pdns_addr, cached_addr, addrlen); -+ return 0; -+ } -+ return 1; -+} -+ -+static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, -+ socklen_t addrlen, uint32_t *scope_id, -+ unsigned *cached_time) -+{ -+ char buff[512]; -+ char buff2[257]; -+ FILE *f; -+ int found = 0; -+ void *tmp_addr = alloca(addrlen); -+ unsigned if_index; -+ -+ f = fopen("/etc/resolv.conf", "r"); -+ if (!f) -+ return -1; -+ -+ DEBUG_MISC("IP address of your DNS(s):"); -+ while (fgets(buff, 512, f) != NULL) { -+ if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { -+ char *c = strchr(buff2, '%'); -+ if (c) { -+ if_index = if_nametoindex(c + 1); -+ *c = '\0'; -+ } else { -+ if_index = 0; -+ } -+ -+ if (!inet_pton(af, buff2, tmp_addr)) { -+ continue; -+ } -+ /* If it's the first one, set it to dns_addr */ -+ if (!found) { -+ memcpy(pdns_addr, tmp_addr, addrlen); -+ memcpy(cached_addr, tmp_addr, addrlen); -+ if (scope_id) { -+ *scope_id = if_index; -+ } -+ *cached_time = curtime; -+ } -+ -+ if (++found > 3) { -+ DEBUG_MISC(" (more)"); -+ break; -+ } else if (slirp_debug & DBG_MISC) { -+ char s[INET6_ADDRSTRLEN]; -+ const char *res = inet_ntop(af, tmp_addr, s, sizeof(s)); -+ if (!res) { -+ res = " (string conversion error)"; -+ } -+ DEBUG_MISC(" %s", res); -+ } -+ } -+ } -+ fclose(f); -+ if (!found) -+ return -1; -+ return 0; -+} -+ -+int get_dns_addr(struct in_addr *pdns_addr) -+{ -+ static struct stat dns_addr_stat; -+ -+ if (dns_addr.s_addr != 0) { -+ int ret; -+ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), -+ &dns_addr_stat, &dns_addr_time); -+ if (ret <= 0) { -+ return ret; -+ } -+ } -+ return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, -+ sizeof(dns_addr), NULL, &dns_addr_time); -+} -+ -+int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) -+{ -+ static struct stat dns6_addr_stat; -+ -+ if (!in6_zero(&dns6_addr)) { -+ int ret; -+ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), -+ &dns6_addr_stat, &dns6_addr_time); -+ if (ret <= 0) { -+ return ret; -+ } -+ } -+ return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, -+ sizeof(dns6_addr), scope_id, -+ &dns6_addr_time); -+} -+ -+#endif -+ -+static void slirp_init_once(void) -+{ -+ static int initialized; -+ const char *debug; -+#ifdef _WIN32 -+ WSADATA Data; -+#endif -+ -+ if (initialized) { -+ return; -+ } -+ initialized = 1; -+ -+#ifdef _WIN32 -+ WSAStartup(MAKEWORD(2, 0), &Data); -+ atexit(winsock_cleanup); -+#endif -+ -+ loopback_addr.s_addr = htonl(INADDR_LOOPBACK); -+ loopback_mask = htonl(IN_CLASSA_NET); -+ -+ debug = g_getenv("SLIRP_DEBUG"); -+ if (debug) { -+ const GDebugKey keys[] = { -+ { "call", DBG_CALL }, -+ { "misc", DBG_MISC }, -+ { "error", DBG_ERROR }, -+ { "tftp", DBG_TFTP }, -+ }; -+ slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); -+ } -+} -+ -+Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, -+ struct in_addr vnetmask, struct in_addr vhost, -+ bool in6_enabled, struct in6_addr vprefix_addr6, -+ uint8_t vprefix_len, struct in6_addr vhost6, -+ const char *vhostname, const char *tftp_server_name, -+ const char *tftp_path, const char *bootfile, -+ struct in_addr vdhcp_start, struct in_addr vnameserver, -+ struct in6_addr vnameserver6, const char **vdnssearch, -+ const char *vdomainname, const SlirpCb *callbacks, -+ void *opaque) -+{ -+ Slirp *slirp = g_malloc0(sizeof(Slirp)); -+ -+ slirp_init_once(); -+ -+ slirp->opaque = opaque; -+ slirp->cb = callbacks; -+ slirp->grand = g_rand_new(); -+ slirp->restricted = restricted; -+ -+ slirp->in_enabled = in_enabled; -+ slirp->in6_enabled = in6_enabled; -+ -+ if_init(slirp); -+ ip_init(slirp); -+ ip6_init(slirp); -+ -+ /* Initialise mbufs *after* setting the MTU */ -+ m_init(slirp); -+ -+ slirp->vnetwork_addr = vnetwork; -+ slirp->vnetwork_mask = vnetmask; -+ slirp->vhost_addr = vhost; -+ slirp->vprefix_addr6 = vprefix_addr6; -+ slirp->vprefix_len = vprefix_len; -+ slirp->vhost_addr6 = vhost6; -+ if (vhostname) { -+ slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), -+ vhostname); -+ } -+ slirp->tftp_prefix = g_strdup(tftp_path); -+ slirp->bootp_filename = g_strdup(bootfile); -+ slirp->vdomainname = g_strdup(vdomainname); -+ slirp->vdhcp_startaddr = vdhcp_start; -+ slirp->vnameserver_addr = vnameserver; -+ slirp->vnameserver_addr6 = vnameserver6; -+ slirp->tftp_server_name = g_strdup(tftp_server_name); -+ -+ if (vdnssearch) { -+ translate_dnssearch(slirp, vdnssearch); -+ } -+ -+ return slirp; -+} -+ -+void slirp_cleanup(Slirp *slirp) -+{ -+ struct gfwd_list *e, *next; -+ -+ for (e = slirp->guestfwd_list; e; e = next) { -+ next = e->ex_next; -+ g_free(e->ex_exec); -+ g_free(e); -+ } -+ -+ ip_cleanup(slirp); -+ ip6_cleanup(slirp); -+ m_cleanup(slirp); -+ -+ g_rand_free(slirp->grand); -+ -+ g_free(slirp->vdnssearch); -+ g_free(slirp->tftp_prefix); -+ g_free(slirp->bootp_filename); -+ g_free(slirp->vdomainname); -+ g_free(slirp); -+} -+ -+#define CONN_CANFSEND(so) \ -+ (((so)->so_state & (SS_FCANTSENDMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) -+#define CONN_CANFRCV(so) \ -+ (((so)->so_state & (SS_FCANTRCVMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) -+ -+static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) -+{ -+ uint32_t t; -+ -+ if (*timeout <= TIMEOUT_FAST) { -+ return; -+ } -+ -+ t = MIN(1000, *timeout); -+ -+ /* If we have tcp timeout with slirp, then we will fill @timeout with -+ * more precise value. -+ */ -+ if (slirp->time_fasttimo) { -+ *timeout = TIMEOUT_FAST; -+ return; -+ } -+ if (slirp->do_slowtimo) { -+ t = MIN(TIMEOUT_SLOW, t); -+ } -+ *timeout = t; -+} -+ -+void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, -+ SlirpAddPollCb add_poll, void *opaque) -+{ -+ struct socket *so, *so_next; -+ -+ /* -+ * First, TCP sockets -+ */ -+ -+ /* -+ * *_slowtimo needs calling if there are IP fragments -+ * in the fragment queue, or there are TCP connections active -+ */ -+ slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || -+ (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); -+ -+ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { -+ int events = 0; -+ -+ so_next = so->so_next; -+ -+ so->pollfds_idx = -1; -+ -+ /* -+ * See if we need a tcp_fasttimo -+ */ -+ if (slirp->time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) { -+ slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ -+ } -+ -+ /* -+ * NOFDREF can include still connecting to local-host, -+ * newly socreated() sockets etc. Don't want to select these. -+ */ -+ if (so->so_state & SS_NOFDREF || so->s == -1) { -+ continue; -+ } -+ -+ /* -+ * Set for reading sockets which are accepting -+ */ -+ if (so->so_state & SS_FACCEPTCONN) { -+ so->pollfds_idx = add_poll( -+ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); -+ continue; -+ } -+ -+ /* -+ * Set for writing sockets which are connecting -+ */ -+ if (so->so_state & SS_ISFCONNECTING) { -+ so->pollfds_idx = -+ add_poll(so->s, SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); -+ continue; -+ } -+ -+ /* -+ * Set for writing if we are connected, can send more, and -+ * we have something to send -+ */ -+ if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { -+ events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; -+ } -+ -+ /* -+ * Set for reading (and urgent data) if we are connected, can -+ * receive more, and we have room for it XXX /2 ? -+ */ -+ if (CONN_CANFRCV(so) && -+ (so->so_snd.sb_cc < (so->so_snd.sb_datalen / 2))) { -+ events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | -+ SLIRP_POLL_PRI; -+ } -+ -+ if (events) { -+ so->pollfds_idx = add_poll(so->s, events, opaque); -+ } -+ } -+ -+ /* -+ * UDP sockets -+ */ -+ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { -+ so_next = so->so_next; -+ -+ so->pollfds_idx = -1; -+ -+ /* -+ * See if it's timed out -+ */ -+ if (so->so_expire) { -+ if (so->so_expire <= curtime) { -+ udp_detach(so); -+ continue; -+ } else { -+ slirp->do_slowtimo = true; /* Let socket expire */ -+ } -+ } -+ -+ /* -+ * When UDP packets are received from over the -+ * link, they're sendto()'d straight away, so -+ * no need for setting for writing -+ * Limit the number of packets queued by this session -+ * to 4. Note that even though we try and limit this -+ * to 4 packets, the session could have more queued -+ * if the packets needed to be fragmented -+ * (XXX <= 4 ?) -+ */ -+ if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { -+ so->pollfds_idx = add_poll( -+ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); -+ } -+ } -+ -+ /* -+ * ICMP sockets -+ */ -+ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { -+ so_next = so->so_next; -+ -+ so->pollfds_idx = -1; -+ -+ /* -+ * See if it's timed out -+ */ -+ if (so->so_expire) { -+ if (so->so_expire <= curtime) { -+ icmp_detach(so); -+ continue; -+ } else { -+ slirp->do_slowtimo = true; /* Let socket expire */ -+ } -+ } -+ -+ if (so->so_state & SS_ISFCONNECTED) { -+ so->pollfds_idx = add_poll( -+ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); -+ } -+ } -+ -+ slirp_update_timeout(slirp, timeout); -+} -+ -+void slirp_pollfds_poll(Slirp *slirp, int select_error, -+ SlirpGetREventsCb get_revents, void *opaque) -+{ -+ struct socket *so, *so_next; -+ int ret; -+ -+ curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; -+ -+ /* -+ * See if anything has timed out -+ */ -+ if (slirp->time_fasttimo && -+ ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { -+ tcp_fasttimo(slirp); -+ slirp->time_fasttimo = 0; -+ } -+ if (slirp->do_slowtimo && -+ ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { -+ ip_slowtimo(slirp); -+ tcp_slowtimo(slirp); -+ slirp->last_slowtimo = curtime; -+ } -+ -+ /* -+ * Check sockets -+ */ -+ if (!select_error) { -+ /* -+ * Check TCP sockets -+ */ -+ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { -+ int revents; -+ -+ so_next = so->so_next; -+ -+ revents = 0; -+ if (so->pollfds_idx != -1) { -+ revents = get_revents(so->pollfds_idx, opaque); -+ } -+ -+ if (so->so_state & SS_NOFDREF || so->s == -1) { -+ continue; -+ } -+ -+ /* -+ * Check for URG data -+ * This will soread as well, so no need to -+ * test for SLIRP_POLL_IN below if this succeeds -+ */ -+ if (revents & SLIRP_POLL_PRI) { -+ ret = sorecvoob(so); -+ if (ret < 0) { -+ /* Socket error might have resulted in the socket being -+ * removed, do not try to do anything more with it. */ -+ continue; -+ } -+ } -+ /* -+ * Check sockets for reading -+ */ -+ else if (revents & -+ (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR)) { -+ /* -+ * Check for incoming connections -+ */ -+ if (so->so_state & SS_FACCEPTCONN) { -+ tcp_connect(so); -+ continue; -+ } /* else */ -+ ret = soread(so); -+ -+ /* Output it if we read something */ -+ if (ret > 0) { -+ tcp_output(sototcpcb(so)); -+ } -+ if (ret < 0) { -+ /* Socket error might have resulted in the socket being -+ * removed, do not try to do anything more with it. */ -+ continue; -+ } -+ } -+ -+ /* -+ * Check sockets for writing -+ */ -+ if (!(so->so_state & SS_NOFDREF) && -+ (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { -+ /* -+ * Check for non-blocking, still-connecting sockets -+ */ -+ if (so->so_state & SS_ISFCONNECTING) { -+ /* Connected */ -+ so->so_state &= ~SS_ISFCONNECTING; -+ -+ ret = send(so->s, (const void *)&ret, 0, 0); -+ if (ret < 0) { -+ /* XXXXX Must fix, zero bytes is a NOP */ -+ if (errno == EAGAIN || errno == EWOULDBLOCK || -+ errno == EINPROGRESS || errno == ENOTCONN) { -+ continue; -+ } -+ -+ /* else failed */ -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_NOFDREF; -+ } -+ /* else so->so_state &= ~SS_ISFCONNECTING; */ -+ -+ /* -+ * Continue tcp_input -+ */ -+ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, -+ so->so_ffamily); -+ /* continue; */ -+ } else { -+ ret = sowrite(so); -+ if (ret > 0) { -+ /* Call tcp_output in case we need to send a window -+ * update to the guest, otherwise it will be stuck -+ * until it sends a window probe. */ -+ tcp_output(sototcpcb(so)); -+ } -+ } -+ } -+ } -+ -+ /* -+ * Now UDP sockets. -+ * Incoming packets are sent straight away, they're not buffered. -+ * Incoming UDP data isn't buffered either. -+ */ -+ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { -+ int revents; -+ -+ so_next = so->so_next; -+ -+ revents = 0; -+ if (so->pollfds_idx != -1) { -+ revents = get_revents(so->pollfds_idx, opaque); -+ } -+ -+ if (so->s != -1 && -+ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { -+ sorecvfrom(so); -+ } -+ } -+ -+ /* -+ * Check incoming ICMP relies. -+ */ -+ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { -+ int revents; -+ -+ so_next = so->so_next; -+ -+ revents = 0; -+ if (so->pollfds_idx != -1) { -+ revents = get_revents(so->pollfds_idx, opaque); -+ } -+ -+ if (so->s != -1 && -+ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { -+ icmp_receive(so); -+ } -+ } -+ } -+ -+ if_start(slirp); -+} -+ -+static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) -+{ -+ struct slirp_arphdr *ah = (struct slirp_arphdr *)(pkt + ETH_HLEN); -+ uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; -+ struct ethhdr *reh = (struct ethhdr *)arp_reply; -+ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); -+ int ar_op; -+ struct gfwd_list *ex_ptr; -+ -+ if (!slirp->in_enabled) { -+ return; -+ } -+ -+ ar_op = ntohs(ah->ar_op); -+ switch (ar_op) { -+ case ARPOP_REQUEST: -+ if (ah->ar_tip == ah->ar_sip) { -+ /* Gratuitous ARP */ -+ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); -+ return; -+ } -+ -+ if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == -+ slirp->vnetwork_addr.s_addr) { -+ if (ah->ar_tip == slirp->vnameserver_addr.s_addr || -+ ah->ar_tip == slirp->vhost_addr.s_addr) -+ goto arp_ok; -+ /* TODO: IPv6 */ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; -+ ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->ex_addr.s_addr == ah->ar_tip) -+ goto arp_ok; -+ } -+ return; -+ arp_ok: -+ memset(arp_reply, 0, sizeof(arp_reply)); -+ -+ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); -+ -+ /* ARP request for alias/dns mac address */ -+ memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); -+ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); -+ memcpy(&reh->h_source[2], &ah->ar_tip, 4); -+ reh->h_proto = htons(ETH_P_ARP); -+ -+ rah->ar_hrd = htons(1); -+ rah->ar_pro = htons(ETH_P_IP); -+ rah->ar_hln = ETH_ALEN; -+ rah->ar_pln = 4; -+ rah->ar_op = htons(ARPOP_REPLY); -+ memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); -+ rah->ar_sip = ah->ar_tip; -+ memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); -+ rah->ar_tip = ah->ar_sip; -+ slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); -+ } -+ break; -+ case ARPOP_REPLY: -+ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); -+ break; -+ default: -+ break; -+ } -+} -+ -+void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) -+{ -+ struct mbuf *m; -+ int proto; -+ -+ if (pkt_len < ETH_HLEN) -+ return; -+ -+ proto = (((uint16_t)pkt[12]) << 8) + pkt[13]; -+ switch (proto) { -+ case ETH_P_ARP: -+ arp_input(slirp, pkt, pkt_len); -+ break; -+ case ETH_P_IP: -+ case ETH_P_IPV6: -+ m = m_get(slirp); -+ if (!m) -+ return; -+ /* Note: we add 2 to align the IP header on 4 bytes, -+ * and add the margin for the tcpiphdr overhead */ -+ if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { -+ m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); -+ } -+ m->m_len = pkt_len + TCPIPHDR_DELTA + 2; -+ memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); -+ -+ m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; -+ m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; -+ -+ if (proto == ETH_P_IP) { -+ ip_input(m); -+ } else if (proto == ETH_P_IPV6) { -+ ip6_input(m); -+ } -+ break; -+ -+ case ETH_P_NCSI: -+ ncsi_input(slirp, pkt, pkt_len); -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no -+ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet -+ * is ready to go. -+ */ -+static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, -+ uint8_t ethaddr[ETH_ALEN]) -+{ -+ const struct ip *iph = (const struct ip *)ifm->m_data; -+ -+ if (iph->ip_dst.s_addr == 0) { -+ /* 0.0.0.0 can not be a destination address, something went wrong, -+ * avoid making it worse */ -+ return 1; -+ } -+ if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { -+ uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; -+ struct ethhdr *reh = (struct ethhdr *)arp_req; -+ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); -+ -+ if (!ifm->resolution_requested) { -+ /* If the client addr is not known, send an ARP request */ -+ memset(reh->h_dest, 0xff, ETH_ALEN); -+ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); -+ memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); -+ reh->h_proto = htons(ETH_P_ARP); -+ rah->ar_hrd = htons(1); -+ rah->ar_pro = htons(ETH_P_IP); -+ rah->ar_hln = ETH_ALEN; -+ rah->ar_pln = 4; -+ rah->ar_op = htons(ARPOP_REQUEST); -+ -+ /* source hw addr */ -+ memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); -+ memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); -+ -+ /* source IP */ -+ rah->ar_sip = slirp->vhost_addr.s_addr; -+ -+ /* target hw addr (none) */ -+ memset(rah->ar_tha, 0, ETH_ALEN); -+ -+ /* target IP */ -+ rah->ar_tip = iph->ip_dst.s_addr; -+ slirp->client_ipaddr = iph->ip_dst; -+ slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); -+ ifm->resolution_requested = true; -+ -+ /* Expire request and drop outgoing packet after 1 second */ -+ ifm->expiration_date = -+ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; -+ } -+ return 0; -+ } else { -+ memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); -+ /* XXX: not correct */ -+ memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); -+ eh->h_proto = htons(ETH_P_IP); -+ -+ /* Send this */ -+ return 2; -+ } -+} -+ -+/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no -+ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet -+ * is ready to go. -+ */ -+static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, -+ uint8_t ethaddr[ETH_ALEN]) -+{ -+ const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); -+ if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { -+ if (!ifm->resolution_requested) { -+ ndp_send_ns(slirp, ip6h->ip_dst); -+ ifm->resolution_requested = true; -+ ifm->expiration_date = -+ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; -+ } -+ return 0; -+ } else { -+ eh->h_proto = htons(ETH_P_IPV6); -+ in6_compute_ethaddr(ip6h->ip_src, eh->h_source); -+ -+ /* Send this */ -+ return 2; -+ } -+} -+ -+/* Output the IP packet to the ethernet device. Returns 0 if the packet must be -+ * re-queued. -+ */ -+int if_encap(Slirp *slirp, struct mbuf *ifm) -+{ -+ uint8_t buf[1600]; -+ struct ethhdr *eh = (struct ethhdr *)buf; -+ uint8_t ethaddr[ETH_ALEN]; -+ const struct ip *iph = (const struct ip *)ifm->m_data; -+ int ret; -+ -+ if (ifm->m_len + ETH_HLEN > sizeof(buf)) { -+ return 1; -+ } -+ -+ switch (iph->ip_v) { -+ case IPVERSION: -+ ret = if_encap4(slirp, ifm, eh, ethaddr); -+ if (ret < 2) { -+ return ret; -+ } -+ break; -+ -+ case IP6VERSION: -+ ret = if_encap6(slirp, ifm, eh, ethaddr); -+ if (ret < 2) { -+ return ret; -+ } -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ break; -+ } -+ -+ memcpy(eh->h_dest, ethaddr, ETH_ALEN); -+ DEBUG_ARG("src = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_source[0], -+ eh->h_source[1], eh->h_source[2], eh->h_source[3], -+ eh->h_source[4], eh->h_source[5]); -+ DEBUG_ARG("dst = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_dest[0], -+ eh->h_dest[1], eh->h_dest[2], eh->h_dest[3], eh->h_dest[4], -+ eh->h_dest[5]); -+ memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); -+ slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); -+ return 1; -+} -+ -+/* Drop host forwarding rule, return 0 if found. */ -+/* TODO: IPv6 */ -+int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, -+ int host_port) -+{ -+ struct socket *so; -+ struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); -+ struct sockaddr_in addr; -+ int port = htons(host_port); -+ socklen_t addr_len; -+ -+ for (so = head->so_next; so != head; so = so->so_next) { -+ addr_len = sizeof(addr); -+ if ((so->so_state & SS_HOSTFWD) && -+ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && -+ addr.sin_addr.s_addr == host_addr.s_addr && addr.sin_port == port) { -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ sofree(so); -+ return 0; -+ } -+ } -+ -+ return -1; -+} -+ -+/* TODO: IPv6 */ -+int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, -+ int host_port, struct in_addr guest_addr, int guest_port) -+{ -+ if (!guest_addr.s_addr) { -+ guest_addr = slirp->vdhcp_startaddr; -+ } -+ if (is_udp) { -+ if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), -+ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) -+ return -1; -+ } else { -+ if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), -+ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) -+ return -1; -+ } -+ return 0; -+} -+ -+/* TODO: IPv6 */ -+static bool check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, -+ int guest_port) -+{ -+ struct gfwd_list *tmp_ptr; -+ -+ if (!guest_addr->s_addr) { -+ guest_addr->s_addr = slirp->vnetwork_addr.s_addr | -+ (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); -+ } -+ if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != -+ slirp->vnetwork_addr.s_addr || -+ guest_addr->s_addr == slirp->vhost_addr.s_addr || -+ guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { -+ return false; -+ } -+ -+ /* check if the port is "bound" */ -+ for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { -+ if (guest_port == tmp_ptr->ex_fport && -+ guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) -+ return false; -+ } -+ -+ return true; -+} -+ -+int slirp_add_exec(Slirp *slirp, const char *cmdline, -+ struct in_addr *guest_addr, int guest_port) -+{ -+ if (!check_guestfwd(slirp, guest_addr, guest_port)) { -+ return -1; -+ } -+ -+ add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); -+ return 0; -+} -+ -+int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, -+ struct in_addr *guest_addr, int guest_port) -+{ -+ if (!check_guestfwd(slirp, guest_addr, guest_port)) { -+ return -1; -+ } -+ -+ add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, *guest_addr, -+ htons(guest_port)); -+ return 0; -+} -+ -+ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) -+{ -+ if (so->s == -1 && so->guestfwd) { -+ /* XXX this blocks entire thread. Rewrite to use -+ * qemu_chr_fe_write and background I/O callbacks */ -+ so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); -+ return len; -+ } -+ -+ if (so->s == -1) { -+ /* -+ * This should in theory not happen but it is hard to be -+ * sure because some code paths will end up with so->s == -1 -+ * on a failure but don't dispose of the struct socket. -+ * Check specifically, so we don't pass -1 to send(). -+ */ -+ errno = EBADF; -+ return -1; -+ } -+ -+ return send(so->s, buf, len, flags); -+} -+ -+struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, -+ int guest_port) -+{ -+ struct socket *so; -+ -+ /* TODO: IPv6 */ -+ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { -+ if (so->so_faddr.s_addr == guest_addr.s_addr && -+ htons(so->so_fport) == guest_port) { -+ return so; -+ } -+ } -+ return NULL; -+} -+ -+size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, -+ int guest_port) -+{ -+ struct iovec iov[2]; -+ struct socket *so; -+ -+ so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); -+ -+ if (!so || so->so_state & SS_NOFDREF) { -+ return 0; -+ } -+ -+ if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen / 2)) { -+ return 0; -+ } -+ -+ return sopreprbuf(so, iov, NULL); -+} -+ -+void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, -+ const uint8_t *buf, int size) -+{ -+ int ret; -+ struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); -+ -+ if (!so) -+ return; -+ -+ ret = soreadbuf(so, (const char *)buf, size); -+ -+ if (ret > 0) -+ tcp_output(sototcpcb(so)); -+} -+ -+void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) -+{ -+ ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); -+ -+ if (ret < 0) { -+ g_critical("Failed to send packet, ret: %ld", (long)ret); -+ } else if (ret < len) { -+ DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", (long)ret, -+ (unsigned long)len); -+ } -+} -diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h -new file mode 100644 -index 0000000..37ba6ed ---- /dev/null -+++ b/slirp/src/slirp.h -@@ -0,0 +1,273 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#ifndef SLIRP_H -+#define SLIRP_H -+ -+#ifdef _WIN32 -+ -+/* as defined in sdkddkver.h */ -+#ifndef _WIN32_WINNT -+#define _WIN32_WINNT 0x0600 /* Vista */ -+#endif -+/* reduces the number of implicitly included headers */ -+#ifndef WIN32_LEAN_AND_MEAN -+#define WIN32_LEAN_AND_MEAN -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#else -+#if !defined(__HAIKU__) -+#define O_BINARY 0 -+#endif -+#endif -+ -+#ifndef _WIN32 -+#include -+#include -+#include -+#include -+#include -+#endif -+ -+#ifdef __APPLE__ -+#include -+#endif -+ -+/* Avoid conflicting with the libc insque() and remque(), which -+ have different prototypes. */ -+#define insque slirp_insque -+#define remque slirp_remque -+#define quehead slirp_quehead -+ -+#include "debug.h" -+#include "util.h" -+ -+#include "libslirp.h" -+#include "ip.h" -+#include "ip6.h" -+#include "tcp.h" -+#include "tcp_timer.h" -+#include "tcp_var.h" -+#include "tcpip.h" -+#include "udp.h" -+#include "ip_icmp.h" -+#include "ip6_icmp.h" -+#include "mbuf.h" -+#include "sbuf.h" -+#include "socket.h" -+#include "if.h" -+#include "main.h" -+#include "misc.h" -+ -+#include "bootp.h" -+#include "tftp.h" -+ -+#define ARPOP_REQUEST 1 /* ARP request */ -+#define ARPOP_REPLY 2 /* ARP reply */ -+ -+struct ethhdr { -+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ -+ unsigned char h_source[ETH_ALEN]; /* source ether addr */ -+ unsigned short h_proto; /* packet type ID field */ -+}; -+ -+struct slirp_arphdr { -+ unsigned short ar_hrd; /* format of hardware address */ -+ unsigned short ar_pro; /* format of protocol address */ -+ unsigned char ar_hln; /* length of hardware address */ -+ unsigned char ar_pln; /* length of protocol address */ -+ unsigned short ar_op; /* ARP opcode (command) */ -+ -+ /* -+ * Ethernet looks like this : This bit is variable sized however... -+ */ -+ unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ -+ uint32_t ar_sip; /* sender IP address */ -+ unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ -+ uint32_t ar_tip; /* target IP address */ -+} SLIRP_PACKED; -+ -+#define ARP_TABLE_SIZE 16 -+ -+typedef struct ArpTable { -+ struct slirp_arphdr table[ARP_TABLE_SIZE]; -+ int next_victim; -+} ArpTable; -+ -+void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]); -+ -+bool arp_table_search(Slirp *slirp, uint32_t ip_addr, -+ uint8_t out_ethaddr[ETH_ALEN]); -+ -+struct ndpentry { -+ unsigned char eth_addr[ETH_ALEN]; /* sender hardware address */ -+ struct in6_addr ip_addr; /* sender IP address */ -+}; -+ -+#define NDP_TABLE_SIZE 16 -+ -+typedef struct NdpTable { -+ struct ndpentry table[NDP_TABLE_SIZE]; -+ int next_victim; -+} NdpTable; -+ -+void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, -+ uint8_t ethaddr[ETH_ALEN]); -+bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, -+ uint8_t out_ethaddr[ETH_ALEN]); -+ -+struct Slirp { -+ unsigned time_fasttimo; -+ unsigned last_slowtimo; -+ bool do_slowtimo; -+ -+ bool in_enabled, in6_enabled; -+ -+ /* virtual network configuration */ -+ struct in_addr vnetwork_addr; -+ struct in_addr vnetwork_mask; -+ struct in_addr vhost_addr; -+ struct in6_addr vprefix_addr6; -+ uint8_t vprefix_len; -+ struct in6_addr vhost_addr6; -+ struct in_addr vdhcp_startaddr; -+ struct in_addr vnameserver_addr; -+ struct in6_addr vnameserver_addr6; -+ -+ struct in_addr client_ipaddr; -+ char client_hostname[33]; -+ -+ int restricted; -+ struct gfwd_list *guestfwd_list; -+ -+ /* mbuf states */ -+ struct quehead m_freelist; -+ struct quehead m_usedlist; -+ int mbuf_alloced; -+ -+ /* if states */ -+ struct quehead if_fastq; /* fast queue (for interactive data) */ -+ struct quehead if_batchq; /* queue for non-interactive data */ -+ bool if_start_busy; /* avoid if_start recursion */ -+ -+ /* ip states */ -+ struct ipq ipq; /* ip reass. queue */ -+ uint16_t ip_id; /* ip packet ctr, for ids */ -+ -+ /* bootp/dhcp states */ -+ BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; -+ char *bootp_filename; -+ size_t vdnssearch_len; -+ uint8_t *vdnssearch; -+ char *vdomainname; -+ -+ /* tcp states */ -+ struct socket tcb; -+ struct socket *tcp_last_so; -+ tcp_seq tcp_iss; /* tcp initial send seq # */ -+ uint32_t tcp_now; /* for RFC 1323 timestamps */ -+ -+ /* udp states */ -+ struct socket udb; -+ struct socket *udp_last_so; -+ -+ /* icmp states */ -+ struct socket icmp; -+ struct socket *icmp_last_so; -+ -+ /* tftp states */ -+ char *tftp_prefix; -+ struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; -+ char *tftp_server_name; -+ -+ ArpTable arp_table; -+ NdpTable ndp_table; -+ -+ GRand *grand; -+ void *ra_timer; -+ -+ const SlirpCb *cb; -+ void *opaque; -+}; -+ -+void if_start(Slirp *); -+ -+int get_dns_addr(struct in_addr *pdns_addr); -+int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); -+ -+/* ncsi.c */ -+void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); -+ -+#ifndef _WIN32 -+#include -+#endif -+ -+ -+extern bool slirp_do_keepalive; -+ -+#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) -+ -+/* dnssearch.c */ -+int translate_dnssearch(Slirp *s, const char **names); -+ -+/* cksum.c */ -+int cksum(struct mbuf *m, int len); -+int ip6_cksum(struct mbuf *m); -+ -+/* if.c */ -+void if_init(Slirp *); -+void if_output(struct socket *, struct mbuf *); -+ -+/* ip_input.c */ -+void ip_init(Slirp *); -+void ip_cleanup(Slirp *); -+void ip_input(struct mbuf *); -+void ip_slowtimo(Slirp *); -+void ip_stripoptions(register struct mbuf *, struct mbuf *); -+ -+/* ip_output.c */ -+int ip_output(struct socket *, struct mbuf *); -+ -+/* ip6_input.c */ -+void ip6_init(Slirp *); -+void ip6_cleanup(Slirp *); -+void ip6_input(struct mbuf *); -+ -+/* ip6_output */ -+int ip6_output(struct socket *, struct mbuf *, int fast); -+ -+/* tcp_input.c */ -+void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); -+int tcp_mss(register struct tcpcb *, unsigned); -+ -+/* tcp_output.c */ -+int tcp_output(register struct tcpcb *); -+void tcp_setpersist(register struct tcpcb *); -+ -+/* tcp_subr.c */ -+void tcp_init(Slirp *); -+void tcp_cleanup(Slirp *); -+void tcp_template(struct tcpcb *); -+void tcp_respond(struct tcpcb *, register struct tcpiphdr *, -+ register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); -+struct tcpcb *tcp_newtcpcb(struct socket *); -+struct tcpcb *tcp_close(register struct tcpcb *); -+void tcp_sockclosed(struct tcpcb *); -+int tcp_fconnect(struct socket *, unsigned short af); -+void tcp_connect(struct socket *); -+int tcp_attach(struct socket *); -+uint8_t tcp_tos(struct socket *); -+int tcp_emu(struct socket *, struct mbuf *); -+int tcp_ctl(struct socket *); -+struct tcpcb *tcp_drop(struct tcpcb *tp, int err); -+ -+struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, -+ int guest_port); -+ -+void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); -+ -+#endif -diff --git a/slirp/src/socket.c b/slirp/src/socket.c -new file mode 100644 -index 0000000..34daffc ---- /dev/null -+++ b/slirp/src/socket.c -@@ -0,0 +1,935 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+#ifdef __sun__ -+#include -+#endif -+ -+static void sofcantrcvmore(struct socket *so); -+static void sofcantsendmore(struct socket *so); -+ -+struct socket *solookup(struct socket **last, struct socket *head, -+ struct sockaddr_storage *lhost, -+ struct sockaddr_storage *fhost) -+{ -+ struct socket *so = *last; -+ -+ /* Optimisation */ -+ if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) && -+ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { -+ return so; -+ } -+ -+ for (so = head->so_next; so != head; so = so->so_next) { -+ if (sockaddr_equal(&(so->lhost.ss), lhost) && -+ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { -+ *last = so; -+ return so; -+ } -+ } -+ -+ return (struct socket *)NULL; -+} -+ -+/* -+ * Create a new socket, initialise the fields -+ * It is the responsibility of the caller to -+ * insque() it into the correct linked-list -+ */ -+struct socket *socreate(Slirp *slirp) -+{ -+ struct socket *so = g_new(struct socket, 1); -+ -+ memset(so, 0, sizeof(struct socket)); -+ so->so_state = SS_NOFDREF; -+ so->s = -1; -+ so->slirp = slirp; -+ so->pollfds_idx = -1; -+ -+ return so; -+} -+ -+/* -+ * Remove references to so from the given message queue. -+ */ -+static void soqfree(struct socket *so, struct quehead *qh) -+{ -+ struct mbuf *ifq; -+ -+ for (ifq = (struct mbuf *)qh->qh_link; (struct quehead *)ifq != qh; -+ ifq = ifq->ifq_next) { -+ if (ifq->ifq_so == so) { -+ struct mbuf *ifm; -+ ifq->ifq_so = NULL; -+ for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { -+ ifm->ifq_so = NULL; -+ } -+ } -+ } -+} -+ -+/* -+ * remque and free a socket, clobber cache -+ */ -+void sofree(struct socket *so) -+{ -+ Slirp *slirp = so->slirp; -+ -+ soqfree(so, &slirp->if_fastq); -+ soqfree(so, &slirp->if_batchq); -+ -+ if (so == slirp->tcp_last_so) { -+ slirp->tcp_last_so = &slirp->tcb; -+ } else if (so == slirp->udp_last_so) { -+ slirp->udp_last_so = &slirp->udb; -+ } else if (so == slirp->icmp_last_so) { -+ slirp->icmp_last_so = &slirp->icmp; -+ } -+ m_free(so->so_m); -+ -+ if (so->so_next && so->so_prev) -+ remque(so); /* crashes if so is not in a queue */ -+ -+ if (so->so_tcpcb) { -+ free(so->so_tcpcb); -+ } -+ g_free(so); -+} -+ -+size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) -+{ -+ int n, lss, total; -+ struct sbuf *sb = &so->so_snd; -+ int len = sb->sb_datalen - sb->sb_cc; -+ int mss = so->so_tcpcb->t_maxseg; -+ -+ DEBUG_CALL("sopreprbuf"); -+ DEBUG_ARG("so = %p", so); -+ -+ if (len <= 0) -+ return 0; -+ -+ iov[0].iov_base = sb->sb_wptr; -+ iov[1].iov_base = NULL; -+ iov[1].iov_len = 0; -+ if (sb->sb_wptr < sb->sb_rptr) { -+ iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; -+ /* Should never succeed, but... */ -+ if (iov[0].iov_len > len) -+ iov[0].iov_len = len; -+ if (iov[0].iov_len > mss) -+ iov[0].iov_len -= iov[0].iov_len % mss; -+ n = 1; -+ } else { -+ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; -+ /* Should never succeed, but... */ -+ if (iov[0].iov_len > len) -+ iov[0].iov_len = len; -+ len -= iov[0].iov_len; -+ if (len) { -+ iov[1].iov_base = sb->sb_data; -+ iov[1].iov_len = sb->sb_rptr - sb->sb_data; -+ if (iov[1].iov_len > len) -+ iov[1].iov_len = len; -+ total = iov[0].iov_len + iov[1].iov_len; -+ if (total > mss) { -+ lss = total % mss; -+ if (iov[1].iov_len > lss) { -+ iov[1].iov_len -= lss; -+ n = 2; -+ } else { -+ lss -= iov[1].iov_len; -+ iov[0].iov_len -= lss; -+ n = 1; -+ } -+ } else -+ n = 2; -+ } else { -+ if (iov[0].iov_len > mss) -+ iov[0].iov_len -= iov[0].iov_len % mss; -+ n = 1; -+ } -+ } -+ if (np) -+ *np = n; -+ -+ return iov[0].iov_len + (n - 1) * iov[1].iov_len; -+} -+ -+/* -+ * Read from so's socket into sb_snd, updating all relevant sbuf fields -+ * NOTE: This will only be called if it is select()ed for reading, so -+ * a read() of 0 (or less) means it's disconnected -+ */ -+int soread(struct socket *so) -+{ -+ int n, nn; -+ size_t buf_len; -+ struct sbuf *sb = &so->so_snd; -+ struct iovec iov[2]; -+ -+ DEBUG_CALL("soread"); -+ DEBUG_ARG("so = %p", so); -+ -+ /* -+ * No need to check if there's enough room to read. -+ * soread wouldn't have been called if there weren't -+ */ -+ buf_len = sopreprbuf(so, iov, &n); -+ assert(buf_len != 0); -+ -+ nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0); -+ if (nn <= 0) { -+ if (nn < 0 && (errno == EINTR || errno == EAGAIN)) -+ return 0; -+ else { -+ int err; -+ socklen_t elen = sizeof err; -+ struct sockaddr_storage addr; -+ struct sockaddr *paddr = (struct sockaddr *)&addr; -+ socklen_t alen = sizeof addr; -+ -+ err = errno; -+ if (nn == 0) { -+ if (getpeername(so->s, paddr, &alen) < 0) { -+ err = errno; -+ } else { -+ getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &elen); -+ } -+ } -+ -+ DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", nn, -+ errno, strerror(errno)); -+ sofcantrcvmore(so); -+ -+ if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || -+ err == EPIPE) { -+ tcp_drop(sototcpcb(so), err); -+ } else { -+ tcp_sockclosed(sototcpcb(so)); -+ } -+ return -1; -+ } -+ } -+ -+ /* -+ * If there was no error, try and read the second time round -+ * We read again if n = 2 (ie, there's another part of the buffer) -+ * and we read as much as we could in the first read -+ * We don't test for <= 0 this time, because there legitimately -+ * might not be any more data (since the socket is non-blocking), -+ * a close will be detected on next iteration. -+ * A return of -1 won't (shouldn't) happen, since it didn't happen above -+ */ -+ if (n == 2 && nn == iov[0].iov_len) { -+ int ret; -+ ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); -+ if (ret > 0) -+ nn += ret; -+ } -+ -+ DEBUG_MISC(" ... read nn = %d bytes", nn); -+ -+ /* Update fields */ -+ sb->sb_cc += nn; -+ sb->sb_wptr += nn; -+ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) -+ sb->sb_wptr -= sb->sb_datalen; -+ return nn; -+} -+ -+int soreadbuf(struct socket *so, const char *buf, int size) -+{ -+ int n, nn, copy = size; -+ struct sbuf *sb = &so->so_snd; -+ struct iovec iov[2]; -+ -+ DEBUG_CALL("soreadbuf"); -+ DEBUG_ARG("so = %p", so); -+ -+ /* -+ * No need to check if there's enough room to read. -+ * soread wouldn't have been called if there weren't -+ */ -+ assert(size > 0); -+ if (sopreprbuf(so, iov, &n) < size) -+ goto err; -+ -+ nn = MIN(iov[0].iov_len, copy); -+ memcpy(iov[0].iov_base, buf, nn); -+ -+ copy -= nn; -+ buf += nn; -+ -+ if (copy == 0) -+ goto done; -+ -+ memcpy(iov[1].iov_base, buf, copy); -+ -+done: -+ /* Update fields */ -+ sb->sb_cc += size; -+ sb->sb_wptr += size; -+ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) -+ sb->sb_wptr -= sb->sb_datalen; -+ return size; -+err: -+ -+ sofcantrcvmore(so); -+ tcp_sockclosed(sototcpcb(so)); -+ g_critical("soreadbuf buffer too small"); -+ return -1; -+} -+ -+/* -+ * Get urgent data -+ * -+ * When the socket is created, we set it SO_OOBINLINE, -+ * so when OOB data arrives, we soread() it and everything -+ * in the send buffer is sent as urgent data -+ */ -+int sorecvoob(struct socket *so) -+{ -+ struct tcpcb *tp = sototcpcb(so); -+ int ret; -+ -+ DEBUG_CALL("sorecvoob"); -+ DEBUG_ARG("so = %p", so); -+ -+ /* -+ * We take a guess at how much urgent data has arrived. -+ * In most situations, when urgent data arrives, the next -+ * read() should get all the urgent data. This guess will -+ * be wrong however if more data arrives just after the -+ * urgent data, or the read() doesn't return all the -+ * urgent data. -+ */ -+ ret = soread(so); -+ if (ret > 0) { -+ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; -+ tp->t_force = 1; -+ tcp_output(tp); -+ tp->t_force = 0; -+ } -+ -+ return ret; -+} -+ -+/* -+ * Send urgent data -+ * There's a lot duplicated code here, but... -+ */ -+int sosendoob(struct socket *so) -+{ -+ struct sbuf *sb = &so->so_rcv; -+ char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ -+ -+ int n; -+ -+ DEBUG_CALL("sosendoob"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); -+ -+ if (so->so_urgc > 2048) -+ so->so_urgc = 2048; /* XXXX */ -+ -+ if (sb->sb_rptr < sb->sb_wptr) { -+ /* We can send it directly */ -+ n = slirp_send(so, sb->sb_rptr, so->so_urgc, -+ (MSG_OOB)); /* |MSG_DONTWAIT)); */ -+ } else { -+ /* -+ * Since there's no sendv or sendtov like writev, -+ * we must copy all data to a linear buffer then -+ * send it all -+ */ -+ uint32_t urgc = so->so_urgc; -+ int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; -+ if (len > urgc) { -+ len = urgc; -+ } -+ memcpy(buff, sb->sb_rptr, len); -+ urgc -= len; -+ if (urgc) { -+ n = sb->sb_wptr - sb->sb_data; -+ if (n > urgc) { -+ n = urgc; -+ } -+ memcpy((buff + len), sb->sb_data, n); -+ len += n; -+ } -+ n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ -+#ifdef DEBUG -+ if (n != len) { -+ DEBUG_ERROR("Didn't send all data urgently XXXXX"); -+ } -+#endif -+ } -+ -+ if (n < 0) { -+ return n; -+ } -+ so->so_urgc -= n; -+ DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, -+ so->so_urgc); -+ -+ sb->sb_cc -= n; -+ sb->sb_rptr += n; -+ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) -+ sb->sb_rptr -= sb->sb_datalen; -+ -+ return n; -+} -+ -+/* -+ * Write data from so_rcv to so's socket, -+ * updating all sbuf field as necessary -+ */ -+int sowrite(struct socket *so) -+{ -+ int n, nn; -+ struct sbuf *sb = &so->so_rcv; -+ int len = sb->sb_cc; -+ struct iovec iov[2]; -+ -+ DEBUG_CALL("sowrite"); -+ DEBUG_ARG("so = %p", so); -+ -+ if (so->so_urgc) { -+ uint32_t expected = so->so_urgc; -+ if (sosendoob(so) < expected) { -+ /* Treat a short write as a fatal error too, -+ * rather than continuing on and sending the urgent -+ * data as if it were non-urgent and leaving the -+ * so_urgc count wrong. -+ */ -+ goto err_disconnected; -+ } -+ if (sb->sb_cc == 0) -+ return 0; -+ } -+ -+ /* -+ * No need to check if there's something to write, -+ * sowrite wouldn't have been called otherwise -+ */ -+ -+ iov[0].iov_base = sb->sb_rptr; -+ iov[1].iov_base = NULL; -+ iov[1].iov_len = 0; -+ if (sb->sb_rptr < sb->sb_wptr) { -+ iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; -+ /* Should never succeed, but... */ -+ if (iov[0].iov_len > len) -+ iov[0].iov_len = len; -+ n = 1; -+ } else { -+ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; -+ if (iov[0].iov_len > len) -+ iov[0].iov_len = len; -+ len -= iov[0].iov_len; -+ if (len) { -+ iov[1].iov_base = sb->sb_data; -+ iov[1].iov_len = sb->sb_wptr - sb->sb_data; -+ if (iov[1].iov_len > len) -+ iov[1].iov_len = len; -+ n = 2; -+ } else -+ n = 1; -+ } -+ /* Check if there's urgent data to send, and if so, send it */ -+ -+ nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len, 0); -+ /* This should never happen, but people tell me it does *shrug* */ -+ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) -+ return 0; -+ -+ if (nn <= 0) { -+ goto err_disconnected; -+ } -+ -+ if (n == 2 && nn == iov[0].iov_len) { -+ int ret; -+ ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len, 0); -+ if (ret > 0) -+ nn += ret; -+ } -+ DEBUG_MISC(" ... wrote nn = %d bytes", nn); -+ -+ /* Update sbuf */ -+ sb->sb_cc -= nn; -+ sb->sb_rptr += nn; -+ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) -+ sb->sb_rptr -= sb->sb_datalen; -+ -+ /* -+ * If in DRAIN mode, and there's no more data, set -+ * it CANTSENDMORE -+ */ -+ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) -+ sofcantsendmore(so); -+ -+ return nn; -+ -+err_disconnected: -+ DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", -+ so->so_state, errno); -+ sofcantsendmore(so); -+ tcp_sockclosed(sototcpcb(so)); -+ return -1; -+} -+ -+/* -+ * recvfrom() a UDP socket -+ */ -+void sorecvfrom(struct socket *so) -+{ -+ struct sockaddr_storage addr; -+ struct sockaddr_storage saddr, daddr; -+ socklen_t addrlen = sizeof(struct sockaddr_storage); -+ -+ DEBUG_CALL("sorecvfrom"); -+ DEBUG_ARG("so = %p", so); -+ -+ if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ -+ char buff[256]; -+ int len; -+ -+ len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); -+ /* XXX Check if reply is "correct"? */ -+ -+ if (len == -1 || len == 0) { -+ uint8_t code = ICMP_UNREACH_PORT; -+ -+ if (errno == EHOSTUNREACH) -+ code = ICMP_UNREACH_HOST; -+ else if (errno == ENETUNREACH) -+ code = ICMP_UNREACH_NET; -+ -+ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); -+ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); -+ } else { -+ icmp_reflect(so->so_m); -+ so->so_m = NULL; /* Don't m_free() it again! */ -+ } -+ /* No need for this socket anymore, udp_detach it */ -+ udp_detach(so); -+ } else { /* A "normal" UDP packet */ -+ struct mbuf *m; -+ int len; -+#ifdef _WIN32 -+ unsigned long n; -+#else -+ int n; -+#endif -+ -+ if (ioctlsocket(so->s, FIONREAD, &n) != 0) { -+ DEBUG_MISC(" ioctlsocket errno = %d-%s\n", errno, strerror(errno)); -+ return; -+ } -+ if (n == 0) { -+ return; -+ } -+ -+ m = m_get(so->slirp); -+ if (!m) { -+ return; -+ } -+ switch (so->so_ffamily) { -+ case AF_INET: -+ m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); -+ break; -+ case AF_INET6: -+ m->m_data += -+ IF_MAXLINKHDR + sizeof(struct ip6) + sizeof(struct udphdr); -+ break; -+ default: -+ g_assert_not_reached(); -+ break; -+ } -+ -+ /* -+ * XXX Shouldn't FIONREAD packets destined for port 53, -+ * but I don't know the max packet size for DNS lookups -+ */ -+ len = M_FREEROOM(m); -+ /* if (so->so_fport != htons(53)) { */ -+ -+ if (n > len) { -+ n = (m->m_data - m->m_dat) + m->m_len + n + 1; -+ m_inc(m, n); -+ len = M_FREEROOM(m); -+ } -+ /* } */ -+ -+ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, -+ &addrlen); -+ DEBUG_MISC(" did recvfrom %d, errno = %d-%s", m->m_len, errno, -+ strerror(errno)); -+ if (m->m_len < 0) { -+ /* Report error as ICMP */ -+ switch (so->so_lfamily) { -+ uint8_t code; -+ case AF_INET: -+ code = ICMP_UNREACH_PORT; -+ -+ if (errno == EHOSTUNREACH) { -+ code = ICMP_UNREACH_HOST; -+ } else if (errno == ENETUNREACH) { -+ code = ICMP_UNREACH_NET; -+ } -+ -+ DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); -+ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, -+ strerror(errno)); -+ break; -+ case AF_INET6: -+ code = ICMP6_UNREACH_PORT; -+ -+ if (errno == EHOSTUNREACH) { -+ code = ICMP6_UNREACH_ADDRESS; -+ } else if (errno == ENETUNREACH) { -+ code = ICMP6_UNREACH_NO_ROUTE; -+ } -+ -+ DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); -+ icmp6_send_error(so->so_m, ICMP6_UNREACH, code); -+ break; -+ default: -+ g_assert_not_reached(); -+ break; -+ } -+ m_free(m); -+ } else { -+ /* -+ * Hack: domain name lookup will be used the most for UDP, -+ * and since they'll only be used once there's no need -+ * for the 4 minute (or whatever) timeout... So we time them -+ * out much quicker (10 seconds for now...) -+ */ -+ if (so->so_expire) { -+ if (so->so_fport == htons(53)) -+ so->so_expire = curtime + SO_EXPIREFAST; -+ else -+ so->so_expire = curtime + SO_EXPIRE; -+ } -+ -+ /* -+ * If this packet was destined for CTL_ADDR, -+ * make it look like that's where it came from -+ */ -+ saddr = addr; -+ sotranslate_in(so, &saddr); -+ daddr = so->lhost.ss; -+ -+ switch (so->so_ffamily) { -+ case AF_INET: -+ udp_output(so, m, (struct sockaddr_in *)&saddr, -+ (struct sockaddr_in *)&daddr, so->so_iptos); -+ break; -+ case AF_INET6: -+ udp6_output(so, m, (struct sockaddr_in6 *)&saddr, -+ (struct sockaddr_in6 *)&daddr); -+ break; -+ default: -+ g_assert_not_reached(); -+ break; -+ } -+ } /* rx error */ -+ } /* if ping packet */ -+} -+ -+/* -+ * sendto() a socket -+ */ -+int sosendto(struct socket *so, struct mbuf *m) -+{ -+ int ret; -+ struct sockaddr_storage addr; -+ -+ DEBUG_CALL("sosendto"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ -+ addr = so->fhost.ss; -+ DEBUG_CALL(" sendto()ing)"); -+ sotranslate_out(so, &addr); -+ -+ /* Don't care what port we get */ -+ ret = sendto(so->s, m->m_data, m->m_len, 0, (struct sockaddr *)&addr, -+ sockaddr_size(&addr)); -+ if (ret < 0) -+ return -1; -+ -+ /* -+ * Kill the socket if there's no reply in 4 minutes, -+ * but only if it's an expirable socket -+ */ -+ if (so->so_expire) -+ so->so_expire = curtime + SO_EXPIRE; -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ -+ return 0; -+} -+ -+/* -+ * Listen for incoming TCP connections -+ */ -+struct socket *tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, -+ uint32_t laddr, unsigned lport, int flags) -+{ -+ /* TODO: IPv6 */ -+ struct sockaddr_in addr; -+ struct socket *so; -+ int s, opt = 1; -+ socklen_t addrlen = sizeof(addr); -+ memset(&addr, 0, addrlen); -+ -+ DEBUG_CALL("tcp_listen"); -+ DEBUG_ARG("haddr = %s", inet_ntoa((struct in_addr){ .s_addr = haddr })); -+ DEBUG_ARG("hport = %d", ntohs(hport)); -+ DEBUG_ARG("laddr = %s", inet_ntoa((struct in_addr){ .s_addr = laddr })); -+ DEBUG_ARG("lport = %d", ntohs(lport)); -+ DEBUG_ARG("flags = %x", flags); -+ -+ so = socreate(slirp); -+ -+ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ -+ if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) { -+ g_free(so); -+ return NULL; -+ } -+ insque(so, &slirp->tcb); -+ -+ /* -+ * SS_FACCEPTONCE sockets must time out. -+ */ -+ if (flags & SS_FACCEPTONCE) -+ so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT * 2; -+ -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= (SS_FACCEPTCONN | flags); -+ so->so_lfamily = AF_INET; -+ so->so_lport = lport; /* Kept in network format */ -+ so->so_laddr.s_addr = laddr; /* Ditto */ -+ -+ addr.sin_family = AF_INET; -+ addr.sin_addr.s_addr = haddr; -+ addr.sin_port = hport; -+ -+ if (((s = slirp_socket(AF_INET, SOCK_STREAM, 0)) < 0) || -+ (slirp_socket_set_fast_reuse(s) < 0) || -+ (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) || -+ (listen(s, 1) < 0)) { -+ int tmperrno = errno; /* Don't clobber the real reason we failed */ -+ -+ if (s >= 0) { -+ closesocket(s); -+ } -+ sofree(so); -+ /* Restore the real errno */ -+#ifdef _WIN32 -+ WSASetLastError(tmperrno); -+#else -+ errno = tmperrno; -+#endif -+ return NULL; -+ } -+ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); -+ opt = 1; -+ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(int)); -+ -+ getsockname(s, (struct sockaddr *)&addr, &addrlen); -+ so->so_ffamily = AF_INET; -+ so->so_fport = addr.sin_port; -+ if (addr.sin_addr.s_addr == 0 || -+ addr.sin_addr.s_addr == loopback_addr.s_addr) -+ so->so_faddr = slirp->vhost_addr; -+ else -+ so->so_faddr = addr.sin_addr; -+ -+ so->s = s; -+ return so; -+} -+ -+/* -+ * Various session state calls -+ * XXX Should be #define's -+ * The socket state stuff needs work, these often get call 2 or 3 -+ * times each when only 1 was needed -+ */ -+void soisfconnecting(struct socket *so) -+{ -+ so->so_state &= ~(SS_NOFDREF | SS_ISFCONNECTED | SS_FCANTRCVMORE | -+ SS_FCANTSENDMORE | SS_FWDRAIN); -+ so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ -+} -+ -+void soisfconnected(struct socket *so) -+{ -+ so->so_state &= ~(SS_ISFCONNECTING | SS_FWDRAIN | SS_NOFDREF); -+ so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ -+} -+ -+static void sofcantrcvmore(struct socket *so) -+{ -+ if ((so->so_state & SS_NOFDREF) == 0) { -+ shutdown(so->s, 0); -+ } -+ so->so_state &= ~(SS_ISFCONNECTING); -+ if (so->so_state & SS_FCANTSENDMORE) { -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_NOFDREF; /* Don't select it */ -+ } else { -+ so->so_state |= SS_FCANTRCVMORE; -+ } -+} -+ -+static void sofcantsendmore(struct socket *so) -+{ -+ if ((so->so_state & SS_NOFDREF) == 0) { -+ shutdown(so->s, 1); /* send FIN to fhost */ -+ } -+ so->so_state &= ~(SS_ISFCONNECTING); -+ if (so->so_state & SS_FCANTRCVMORE) { -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_NOFDREF; /* as above */ -+ } else { -+ so->so_state |= SS_FCANTSENDMORE; -+ } -+} -+ -+/* -+ * Set write drain mode -+ * Set CANTSENDMORE once all data has been write()n -+ */ -+void sofwdrain(struct socket *so) -+{ -+ if (so->so_rcv.sb_cc) -+ so->so_state |= SS_FWDRAIN; -+ else -+ sofcantsendmore(so); -+} -+ -+/* -+ * Translate addr in host addr when it is a virtual address -+ */ -+void sotranslate_out(struct socket *so, struct sockaddr_storage *addr) -+{ -+ Slirp *slirp = so->slirp; -+ struct sockaddr_in *sin = (struct sockaddr_in *)addr; -+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; -+ -+ switch (addr->ss_family) { -+ case AF_INET: -+ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == -+ slirp->vnetwork_addr.s_addr) { -+ /* It's an alias */ -+ if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) { -+ if (get_dns_addr(&sin->sin_addr) < 0) { -+ sin->sin_addr = loopback_addr; -+ } -+ } else { -+ sin->sin_addr = loopback_addr; -+ } -+ } -+ -+ DEBUG_MISC(" addr.sin_port=%d, addr.sin_addr.s_addr=%.16s", -+ ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)); -+ break; -+ -+ case AF_INET6: -+ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, -+ slirp->vprefix_len)) { -+ if (in6_equal(&so->so_faddr6, &slirp->vnameserver_addr6)) { -+ uint32_t scope_id; -+ if (get_dns6_addr(&sin6->sin6_addr, &scope_id) >= 0) { -+ sin6->sin6_scope_id = scope_id; -+ } else { -+ sin6->sin6_addr = in6addr_loopback; -+ } -+ } else { -+ sin6->sin6_addr = in6addr_loopback; -+ } -+ } -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) -+{ -+ Slirp *slirp = so->slirp; -+ struct sockaddr_in *sin = (struct sockaddr_in *)addr; -+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; -+ -+ switch (addr->ss_family) { -+ case AF_INET: -+ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == -+ slirp->vnetwork_addr.s_addr) { -+ uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; -+ -+ if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { -+ sin->sin_addr = slirp->vhost_addr; -+ } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || -+ so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { -+ sin->sin_addr = so->so_faddr; -+ } -+ } -+ break; -+ -+ case AF_INET6: -+ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, -+ slirp->vprefix_len)) { -+ if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) || -+ !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { -+ sin6->sin6_addr = so->so_faddr6; -+ } -+ } -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+/* -+ * Translate connections from localhost to the real hostname -+ */ -+void sotranslate_accept(struct socket *so) -+{ -+ Slirp *slirp = so->slirp; -+ -+ switch (so->so_ffamily) { -+ case AF_INET: -+ if (so->so_faddr.s_addr == INADDR_ANY || -+ (so->so_faddr.s_addr & loopback_mask) == -+ (loopback_addr.s_addr & loopback_mask)) { -+ so->so_faddr = slirp->vhost_addr; -+ } -+ break; -+ -+ case AF_INET6: -+ if (in6_equal(&so->so_faddr6, &in6addr_any) || -+ in6_equal(&so->so_faddr6, &in6addr_loopback)) { -+ so->so_faddr6 = slirp->vhost_addr6; -+ } -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+void sodrop(struct socket *s, int num) -+{ -+ if (sbdrop(&s->so_snd, num)) { -+ s->slirp->cb->notify(s->slirp->opaque); -+ } -+} -diff --git a/slirp/src/socket.h b/slirp/src/socket.h -new file mode 100644 -index 0000000..d07f56d ---- /dev/null -+++ b/slirp/src/socket.h -@@ -0,0 +1,164 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef SLIRP_SOCKET_H -+#define SLIRP_SOCKET_H -+ -+#include "misc.h" -+ -+#define SO_EXPIRE 240000 -+#define SO_EXPIREFAST 10000 -+ -+/* -+ * Our socket structure -+ */ -+ -+union slirp_sockaddr { -+ struct sockaddr_storage ss; -+ struct sockaddr_in sin; -+ struct sockaddr_in6 sin6; -+}; -+ -+struct socket { -+ struct socket *so_next, *so_prev; /* For a linked list of sockets */ -+ -+ int s; /* The actual socket */ -+ struct gfwd_list *guestfwd; -+ -+ int pollfds_idx; /* GPollFD GArray index */ -+ -+ Slirp *slirp; /* managing slirp instance */ -+ -+ /* XXX union these with not-yet-used sbuf params */ -+ struct mbuf *so_m; /* Pointer to the original SYN packet, -+ * for non-blocking connect()'s, and -+ * PING reply's */ -+ struct tcpiphdr *so_ti; /* Pointer to the original ti within -+ * so_mconn, for non-blocking connections */ -+ uint32_t so_urgc; -+ union slirp_sockaddr fhost; /* Foreign host */ -+#define so_faddr fhost.sin.sin_addr -+#define so_fport fhost.sin.sin_port -+#define so_faddr6 fhost.sin6.sin6_addr -+#define so_fport6 fhost.sin6.sin6_port -+#define so_ffamily fhost.ss.ss_family -+ -+ union slirp_sockaddr lhost; /* Local host */ -+#define so_laddr lhost.sin.sin_addr -+#define so_lport lhost.sin.sin_port -+#define so_laddr6 lhost.sin6.sin6_addr -+#define so_lport6 lhost.sin6.sin6_port -+#define so_lfamily lhost.ss.ss_family -+ -+ uint8_t so_iptos; /* Type of service */ -+ uint8_t so_emu; /* Is the socket emulated? */ -+ -+ uint8_t so_type; /* Type of socket, UDP or TCP */ -+ int32_t so_state; /* internal state flags SS_*, below */ -+ -+ struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ -+ unsigned so_expire; /* When the socket will expire */ -+ -+ int so_queued; /* Number of packets queued from this socket */ -+ int so_nqueued; /* Number of packets queued in a row -+ * Used to determine when to "downgrade" a session -+ * from fastq to batchq */ -+ -+ struct sbuf so_rcv; /* Receive buffer */ -+ struct sbuf so_snd; /* Send buffer */ -+}; -+ -+ -+/* -+ * Socket state bits. (peer means the host on the Internet, -+ * local host means the host on the other end of the modem) -+ */ -+#define SS_NOFDREF 0x001 /* No fd reference */ -+ -+#define SS_ISFCONNECTING \ -+ 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ -+#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ -+#define SS_FCANTRCVMORE \ -+ 0x008 /* Socket can't receive more from peer (for half-closes) */ -+#define SS_FCANTSENDMORE \ -+ 0x010 /* Socket can't send more to peer (for half-closes) */ -+#define SS_FWDRAIN \ -+ 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ -+ -+#define SS_CTL 0x080 -+#define SS_FACCEPTCONN \ -+ 0x100 /* Socket is accepting connections from a host on the internet */ -+#define SS_FACCEPTONCE \ -+ 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ -+ -+#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ -+#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ -+#define SS_INCOMING \ -+ 0x2000 /* Connection was initiated by a host on the internet */ -+ -+static inline int sockaddr_equal(struct sockaddr_storage *a, -+ struct sockaddr_storage *b) -+{ -+ if (a->ss_family != b->ss_family) { -+ return 0; -+ } -+ -+ switch (a->ss_family) { -+ case AF_INET: { -+ struct sockaddr_in *a4 = (struct sockaddr_in *)a; -+ struct sockaddr_in *b4 = (struct sockaddr_in *)b; -+ return a4->sin_addr.s_addr == b4->sin_addr.s_addr && -+ a4->sin_port == b4->sin_port; -+ } -+ case AF_INET6: { -+ struct sockaddr_in6 *a6 = (struct sockaddr_in6 *)a; -+ struct sockaddr_in6 *b6 = (struct sockaddr_in6 *)b; -+ return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) && -+ a6->sin6_port == b6->sin6_port); -+ } -+ default: -+ g_assert_not_reached(); -+ } -+ -+ return 0; -+} -+ -+static inline socklen_t sockaddr_size(struct sockaddr_storage *a) -+{ -+ switch (a->ss_family) { -+ case AF_INET: -+ return sizeof(struct sockaddr_in); -+ case AF_INET6: -+ return sizeof(struct sockaddr_in6); -+ default: -+ g_assert_not_reached(); -+ } -+} -+ -+struct socket *solookup(struct socket **, struct socket *, -+ struct sockaddr_storage *, struct sockaddr_storage *); -+struct socket *socreate(Slirp *); -+void sofree(struct socket *); -+int soread(struct socket *); -+int sorecvoob(struct socket *); -+int sosendoob(struct socket *); -+int sowrite(struct socket *); -+void sorecvfrom(struct socket *); -+int sosendto(struct socket *, struct mbuf *); -+struct socket *tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); -+void soisfconnecting(register struct socket *); -+void soisfconnected(register struct socket *); -+void sofwdrain(struct socket *); -+struct iovec; /* For win32 */ -+size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); -+int soreadbuf(struct socket *so, const char *buf, int size); -+ -+void sotranslate_out(struct socket *, struct sockaddr_storage *); -+void sotranslate_in(struct socket *, struct sockaddr_storage *); -+void sotranslate_accept(struct socket *); -+void sodrop(struct socket *, int num); -+ -+ -+#endif /* SLIRP_SOCKET_H */ -diff --git a/slirp/src/state.c b/slirp/src/state.c -new file mode 100644 -index 0000000..4a9824e ---- /dev/null -+++ b/slirp/src/state.c -@@ -0,0 +1,381 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * libslirp -+ * -+ * Copyright (c) 2004-2008 Fabrice Bellard -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "slirp.h" -+#include "vmstate.h" -+#include "stream.h" -+ -+static int slirp_tcp_post_load(void *opaque, int version) -+{ -+ tcp_template((struct tcpcb *)opaque); -+ -+ return 0; -+} -+ -+static const VMStateDescription vmstate_slirp_tcp = { -+ .name = "slirp-tcp", -+ .version_id = 0, -+ .post_load = slirp_tcp_post_load, -+ .fields = (VMStateField[]){ VMSTATE_INT16(t_state, struct tcpcb), -+ VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, -+ TCPT_NTIMERS), -+ VMSTATE_INT16(t_rxtshift, struct tcpcb), -+ VMSTATE_INT16(t_rxtcur, struct tcpcb), -+ VMSTATE_INT16(t_dupacks, struct tcpcb), -+ VMSTATE_UINT16(t_maxseg, struct tcpcb), -+ VMSTATE_UINT8(t_force, struct tcpcb), -+ VMSTATE_UINT16(t_flags, struct tcpcb), -+ VMSTATE_UINT32(snd_una, struct tcpcb), -+ VMSTATE_UINT32(snd_nxt, struct tcpcb), -+ VMSTATE_UINT32(snd_up, struct tcpcb), -+ VMSTATE_UINT32(snd_wl1, struct tcpcb), -+ VMSTATE_UINT32(snd_wl2, struct tcpcb), -+ VMSTATE_UINT32(iss, struct tcpcb), -+ VMSTATE_UINT32(snd_wnd, struct tcpcb), -+ VMSTATE_UINT32(rcv_wnd, struct tcpcb), -+ VMSTATE_UINT32(rcv_nxt, struct tcpcb), -+ VMSTATE_UINT32(rcv_up, struct tcpcb), -+ VMSTATE_UINT32(irs, struct tcpcb), -+ VMSTATE_UINT32(rcv_adv, struct tcpcb), -+ VMSTATE_UINT32(snd_max, struct tcpcb), -+ VMSTATE_UINT32(snd_cwnd, struct tcpcb), -+ VMSTATE_UINT32(snd_ssthresh, struct tcpcb), -+ VMSTATE_INT16(t_idle, struct tcpcb), -+ VMSTATE_INT16(t_rtt, struct tcpcb), -+ VMSTATE_UINT32(t_rtseq, struct tcpcb), -+ VMSTATE_INT16(t_srtt, struct tcpcb), -+ VMSTATE_INT16(t_rttvar, struct tcpcb), -+ VMSTATE_UINT16(t_rttmin, struct tcpcb), -+ VMSTATE_UINT32(max_sndwnd, struct tcpcb), -+ VMSTATE_UINT8(t_oobflags, struct tcpcb), -+ VMSTATE_UINT8(t_iobc, struct tcpcb), -+ VMSTATE_INT16(t_softerror, struct tcpcb), -+ VMSTATE_UINT8(snd_scale, struct tcpcb), -+ VMSTATE_UINT8(rcv_scale, struct tcpcb), -+ VMSTATE_UINT8(request_r_scale, struct tcpcb), -+ VMSTATE_UINT8(requested_s_scale, struct tcpcb), -+ VMSTATE_UINT32(ts_recent, struct tcpcb), -+ VMSTATE_UINT32(ts_recent_age, struct tcpcb), -+ VMSTATE_UINT32(last_ack_sent, struct tcpcb), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+/* The sbuf has a pair of pointers that are migrated as offsets; -+ * we calculate the offsets and restore the pointers using -+ * pre_save/post_load on a tmp structure. -+ */ -+struct sbuf_tmp { -+ struct sbuf *parent; -+ uint32_t roff, woff; -+}; -+ -+static int sbuf_tmp_pre_save(void *opaque) -+{ -+ struct sbuf_tmp *tmp = opaque; -+ tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; -+ tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; -+ -+ return 0; -+} -+ -+static int sbuf_tmp_post_load(void *opaque, int version) -+{ -+ struct sbuf_tmp *tmp = opaque; -+ uint32_t requested_len = tmp->parent->sb_datalen; -+ -+ /* Allocate the buffer space used by the field after the tmp */ -+ sbreserve(tmp->parent, tmp->parent->sb_datalen); -+ -+ if (tmp->parent->sb_datalen != requested_len) { -+ return -ENOMEM; -+ } -+ if (tmp->woff >= requested_len || tmp->roff >= requested_len) { -+ g_critical("invalid sbuf offsets r/w=%u/%u len=%u", tmp->roff, -+ tmp->woff, requested_len); -+ return -EINVAL; -+ } -+ -+ tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; -+ tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; -+ -+ return 0; -+} -+ -+ -+static const VMStateDescription vmstate_slirp_sbuf_tmp = { -+ .name = "slirp-sbuf-tmp", -+ .post_load = sbuf_tmp_post_load, -+ .pre_save = sbuf_tmp_pre_save, -+ .version_id = 0, -+ .fields = (VMStateField[]){ VMSTATE_UINT32(woff, struct sbuf_tmp), -+ VMSTATE_UINT32(roff, struct sbuf_tmp), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp_sbuf = { -+ .name = "slirp-sbuf", -+ .version_id = 0, -+ .fields = (VMStateField[]){ VMSTATE_UINT32(sb_cc, struct sbuf), -+ VMSTATE_UINT32(sb_datalen, struct sbuf), -+ VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, -+ vmstate_slirp_sbuf_tmp), -+ VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, -+ NULL, sb_datalen), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static bool slirp_older_than_v4(void *opaque, int version_id) -+{ -+ return version_id < 4; -+} -+ -+static bool slirp_family_inet(void *opaque, int version_id) -+{ -+ union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; -+ return ssa->ss.ss_family == AF_INET; -+} -+ -+static int slirp_socket_pre_load(void *opaque) -+{ -+ struct socket *so = opaque; -+ if (tcp_attach(so) < 0) { -+ return -ENOMEM; -+ } -+ /* Older versions don't load these fields */ -+ so->so_ffamily = AF_INET; -+ so->so_lfamily = AF_INET; -+ return 0; -+} -+ -+#ifndef _WIN32 -+#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) -+#else -+/* Win uses u_long rather than uint32_t - but it's still 32bits long */ -+#define VMSTATE_SIN4_ADDR(f, s, t) \ -+ VMSTATE_SINGLE_TEST(f, s, t, 0, slirp_vmstate_info_uint32, u_long) -+#endif -+ -+/* The OS provided ss_family field isn't that portable; it's size -+ * and type varies (16/8 bit, signed, unsigned) -+ * and the values it contains aren't fully portable. -+ */ -+typedef struct SS_FamilyTmpStruct { -+ union slirp_sockaddr *parent; -+ uint16_t portable_family; -+} SS_FamilyTmpStruct; -+ -+#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ -+#define SS_FAMILY_MIG_IPV6 10 /* Linux */ -+#define SS_FAMILY_MIG_OTHER 0xffff -+ -+static int ss_family_pre_save(void *opaque) -+{ -+ SS_FamilyTmpStruct *tss = opaque; -+ -+ tss->portable_family = SS_FAMILY_MIG_OTHER; -+ -+ if (tss->parent->ss.ss_family == AF_INET) { -+ tss->portable_family = SS_FAMILY_MIG_IPV4; -+ } else if (tss->parent->ss.ss_family == AF_INET6) { -+ tss->portable_family = SS_FAMILY_MIG_IPV6; -+ } -+ -+ return 0; -+} -+ -+static int ss_family_post_load(void *opaque, int version_id) -+{ -+ SS_FamilyTmpStruct *tss = opaque; -+ -+ switch (tss->portable_family) { -+ case SS_FAMILY_MIG_IPV4: -+ tss->parent->ss.ss_family = AF_INET; -+ break; -+ case SS_FAMILY_MIG_IPV6: -+ case 23: /* compatibility: AF_INET6 from mingw */ -+ case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ -+ tss->parent->ss.ss_family = AF_INET6; -+ break; -+ default: -+ g_critical("invalid ss_family type %x", tss->portable_family); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static const VMStateDescription vmstate_slirp_ss_family = { -+ .name = "slirp-socket-addr/ss_family", -+ .pre_save = ss_family_pre_save, -+ .post_load = ss_family_post_load, -+ .fields = -+ (VMStateField[]){ VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp_socket_addr = { -+ .name = "slirp-socket-addr", -+ .version_id = 4, -+ .fields = -+ (VMStateField[]){ -+ VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, -+ vmstate_slirp_ss_family), -+ VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, -+ slirp_family_inet), -+ VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, -+ slirp_family_inet), -+ -+#if 0 -+ /* Untested: Needs checking by someone with IPv6 test */ -+ VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, -+ slirp_family_inet6), -+ VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, -+ slirp_family_inet6), -+ VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, -+ slirp_family_inet6), -+ VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, -+ slirp_family_inet6), -+#endif -+ -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp_socket = { -+ .name = "slirp-socket", -+ .version_id = 4, -+ .pre_load = slirp_socket_pre_load, -+ .fields = -+ (VMStateField[]){ -+ VMSTATE_UINT32(so_urgc, struct socket), -+ /* Pre-v4 versions */ -+ VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, -+ slirp_older_than_v4), -+ VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, -+ slirp_older_than_v4), -+ VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), -+ VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), -+ /* v4 and newer */ -+ VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, -+ union slirp_sockaddr), -+ VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, -+ union slirp_sockaddr), -+ -+ VMSTATE_UINT8(so_iptos, struct socket), -+ VMSTATE_UINT8(so_emu, struct socket), -+ VMSTATE_UINT8(so_type, struct socket), -+ VMSTATE_INT32(so_state, struct socket), -+ VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, -+ struct sbuf), -+ VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, -+ struct sbuf), -+ VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, -+ struct tcpcb), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp_bootp_client = { -+ .name = "slirp_bootpclient", -+ .fields = (VMStateField[]){ VMSTATE_UINT16(allocated, BOOTPClient), -+ VMSTATE_BUFFER(macaddr, BOOTPClient), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp = { -+ .name = "slirp", -+ .version_id = 4, -+ .fields = (VMStateField[]){ VMSTATE_UINT16_V(ip_id, Slirp, 2), -+ VMSTATE_STRUCT_ARRAY( -+ bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, -+ vmstate_slirp_bootp_client, BOOTPClient), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) -+{ -+ struct gfwd_list *ex_ptr; -+ SlirpOStream f = { -+ .write_cb = write_cb, -+ .opaque = opaque, -+ }; -+ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) -+ if (ex_ptr->write_cb) { -+ struct socket *so; -+ so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, -+ ntohs(ex_ptr->ex_fport)); -+ if (!so) { -+ continue; -+ } -+ -+ slirp_ostream_write_u8(&f, 42); -+ slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); -+ } -+ slirp_ostream_write_u8(&f, 0); -+ -+ slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); -+} -+ -+ -+int slirp_state_load(Slirp *slirp, int version_id, SlirpReadCb read_cb, -+ void *opaque) -+{ -+ struct gfwd_list *ex_ptr; -+ SlirpIStream f = { -+ .read_cb = read_cb, -+ .opaque = opaque, -+ }; -+ -+ while (slirp_istream_read_u8(&f)) { -+ int ret; -+ struct socket *so = socreate(slirp); -+ -+ ret = -+ slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != -+ slirp->vnetwork_addr.s_addr) { -+ return -EINVAL; -+ } -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->write_cb && -+ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && -+ so->so_fport == ex_ptr->ex_fport) { -+ break; -+ } -+ } -+ if (!ex_ptr) { -+ return -EINVAL; -+ } -+ } -+ -+ return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); -+} -+ -+int slirp_state_version(void) -+{ -+ return 4; -+} -diff --git a/slirp/src/stream.c b/slirp/src/stream.c -new file mode 100644 -index 0000000..6cf326f ---- /dev/null -+++ b/slirp/src/stream.c -@@ -0,0 +1,120 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * libslirp io streams -+ * -+ * Copyright (c) 2018 Red Hat, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "stream.h" -+#include -+ -+bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) -+{ -+ return f->read_cb(buf, size, f->opaque) == size; -+} -+ -+bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) -+{ -+ return f->write_cb(buf, size, f->opaque) == size; -+} -+ -+uint8_t slirp_istream_read_u8(SlirpIStream *f) -+{ -+ uint8_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return b; -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) -+{ -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -+ -+uint16_t slirp_istream_read_u16(SlirpIStream *f) -+{ -+ uint16_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return GUINT16_FROM_BE(b); -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) -+{ -+ b = GUINT16_TO_BE(b); -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -+ -+uint32_t slirp_istream_read_u32(SlirpIStream *f) -+{ -+ uint32_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return GUINT32_FROM_BE(b); -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) -+{ -+ b = GUINT32_TO_BE(b); -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -+ -+int16_t slirp_istream_read_i16(SlirpIStream *f) -+{ -+ int16_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return GINT16_FROM_BE(b); -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) -+{ -+ b = GINT16_TO_BE(b); -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -+ -+int32_t slirp_istream_read_i32(SlirpIStream *f) -+{ -+ int32_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return GINT32_FROM_BE(b); -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) -+{ -+ b = GINT32_TO_BE(b); -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -diff --git a/slirp/src/stream.h b/slirp/src/stream.h -new file mode 100644 -index 0000000..08bb5b6 ---- /dev/null -+++ b/slirp/src/stream.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#ifndef STREAM_H_ -+#define STREAM_H_ -+ -+#include "libslirp.h" -+ -+typedef struct SlirpIStream { -+ SlirpReadCb read_cb; -+ void *opaque; -+} SlirpIStream; -+ -+typedef struct SlirpOStream { -+ SlirpWriteCb write_cb; -+ void *opaque; -+} SlirpOStream; -+ -+bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); -+bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); -+ -+uint8_t slirp_istream_read_u8(SlirpIStream *f); -+bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); -+ -+uint16_t slirp_istream_read_u16(SlirpIStream *f); -+bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); -+ -+uint32_t slirp_istream_read_u32(SlirpIStream *f); -+bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); -+ -+int16_t slirp_istream_read_i16(SlirpIStream *f); -+bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); -+ -+int32_t slirp_istream_read_i32(SlirpIStream *f); -+bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); -+ -+#endif /* STREAM_H_ */ -diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h -new file mode 100644 -index 0000000..22625e6 ---- /dev/null -+++ b/slirp/src/tcp.h -@@ -0,0 +1,181 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 -+ * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp -+ */ -+ -+#ifndef TCP_H -+#define TCP_H -+ -+#include -+ -+typedef uint32_t tcp_seq; -+ -+#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ -+#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ -+ -+#define TCP_SNDSPACE 8192 -+#define TCP_RCVSPACE 8192 -+ -+/* -+ * TCP header. -+ * Per RFC 793, September, 1981. -+ */ -+#define tcphdr slirp_tcphdr -+struct tcphdr { -+ uint16_t th_sport; /* source port */ -+ uint16_t th_dport; /* destination port */ -+ tcp_seq th_seq; /* sequence number */ -+ tcp_seq th_ack; /* acknowledgement number */ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t th_off : 4, /* data offset */ -+ th_x2 : 4; /* (unused) */ -+#else -+ uint8_t th_x2 : 4, /* (unused) */ -+ th_off : 4; /* data offset */ -+#endif -+ uint8_t th_flags; -+ uint16_t th_win; /* window */ -+ uint16_t th_sum; /* checksum */ -+ uint16_t th_urp; /* urgent pointer */ -+}; -+ -+#include "tcp_var.h" -+ -+#ifndef TH_FIN -+#define TH_FIN 0x01 -+#define TH_SYN 0x02 -+#define TH_RST 0x04 -+#define TH_PUSH 0x08 -+#define TH_ACK 0x10 -+#define TH_URG 0x20 -+#endif -+ -+#ifndef TCPOPT_EOL -+#define TCPOPT_EOL 0 -+#define TCPOPT_NOP 1 -+#define TCPOPT_MAXSEG 2 -+#define TCPOPT_WINDOW 3 -+#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ -+#define TCPOPT_SACK 5 /* Experimental */ -+#define TCPOPT_TIMESTAMP 8 -+ -+#define TCPOPT_TSTAMP_HDR \ -+ (TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | \ -+ TCPOLEN_TIMESTAMP) -+#endif -+ -+#ifndef TCPOLEN_MAXSEG -+#define TCPOLEN_MAXSEG 4 -+#define TCPOLEN_WINDOW 3 -+#define TCPOLEN_SACK_PERMITTED 2 -+#define TCPOLEN_TIMESTAMP 10 -+#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP + 2) /* appendix A */ -+#endif -+ -+/* -+ * Default maximum segment size for TCP. -+ * With an IP MSS of 576, this is 536, -+ * but 512 is probably more convenient. -+ * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)). -+ * -+ * We make this 1460 because we only care about Ethernet in the qemu context. -+ */ -+#undef TCP_MSS -+#define TCP_MSS 1460 -+#undef TCP6_MSS -+#define TCP6_MSS 1440 -+ -+#undef TCP_MAXWIN -+#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ -+ -+#undef TCP_MAX_WINSHIFT -+#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ -+ -+/* -+ * User-settable options (used with setsockopt). -+ * -+ * We don't use the system headers on unix because we have conflicting -+ * local structures. We can't avoid the system definitions on Windows, -+ * so we undefine them. -+ */ -+#undef TCP_NODELAY -+#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ -+#undef TCP_MAXSEG -+ -+/* -+ * TCP FSM state definitions. -+ * Per RFC793, September, 1981. -+ */ -+ -+#define TCP_NSTATES 11 -+ -+#define TCPS_CLOSED 0 /* closed */ -+#define TCPS_LISTEN 1 /* listening for connection */ -+#define TCPS_SYN_SENT 2 /* active, have sent syn */ -+#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ -+/* states < TCPS_ESTABLISHED are those where connections not established */ -+#define TCPS_ESTABLISHED 4 /* established */ -+#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ -+/* states > TCPS_CLOSE_WAIT are those where user has closed */ -+#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ -+#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ -+#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ -+/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ -+#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ -+#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ -+ -+#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) -+#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) -+#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) -+ -+/* -+ * TCP sequence numbers are 32 bit integers operated -+ * on with modular arithmetic. These macros can be -+ * used to compare such integers. -+ */ -+#define SEQ_LT(a, b) ((int)((a) - (b)) < 0) -+#define SEQ_LEQ(a, b) ((int)((a) - (b)) <= 0) -+#define SEQ_GT(a, b) ((int)((a) - (b)) > 0) -+#define SEQ_GEQ(a, b) ((int)((a) - (b)) >= 0) -+ -+/* -+ * Macros to initialize tcp sequence numbers for -+ * send and receive from initial send and receive -+ * sequence numbers. -+ */ -+#define tcp_rcvseqinit(tp) (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 -+ -+#define tcp_sendseqinit(tp) \ -+ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss -+ -+#define TCP_ISSINCR (125 * 1024) /* increment for tcp_iss each second */ -+ -+#endif -diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c -new file mode 100644 -index 0000000..228c2aa ---- /dev/null -+++ b/slirp/src/tcp_input.c -@@ -0,0 +1,1540 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 -+ * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+ -+#define TCPREXMTTHRESH 3 -+ -+#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) -+ -+/* for modulo comparisons of timestamps */ -+#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0) -+#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0) -+ -+/* -+ * Insert segment ti into reassembly queue of tcp with -+ * control block tp. Return TH_FIN if reassembly now includes -+ * a segment with FIN. The macro form does the common case inline -+ * (segment is the next to be received on an established connection, -+ * and the queue is empty), avoiding linkage into and removal -+ * from the queue and repetition of various conversions. -+ * Set DELACK for segments received in order, but ack immediately -+ * when segments are out of order (so fast retransmit can work). -+ */ -+#define TCP_REASS(tp, ti, m, so, flags) \ -+ { \ -+ if ((ti)->ti_seq == (tp)->rcv_nxt && tcpfrag_list_empty(tp) && \ -+ (tp)->t_state == TCPS_ESTABLISHED) { \ -+ tp->t_flags |= TF_DELACK; \ -+ (tp)->rcv_nxt += (ti)->ti_len; \ -+ flags = (ti)->ti_flags & TH_FIN; \ -+ if (so->so_emu) { \ -+ if (tcp_emu((so), (m))) \ -+ sbappend(so, (m)); \ -+ } else \ -+ sbappend((so), (m)); \ -+ } else { \ -+ (flags) = tcp_reass((tp), (ti), (m)); \ -+ tp->t_flags |= TF_ACKNOW; \ -+ } \ -+ } -+ -+static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, -+ struct tcpiphdr *ti); -+static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); -+ -+static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, -+ struct mbuf *m) -+{ -+ register struct tcpiphdr *q; -+ struct socket *so = tp->t_socket; -+ int flags; -+ -+ /* -+ * Call with ti==NULL after become established to -+ * force pre-ESTABLISHED data up to user socket. -+ */ -+ if (ti == NULL) -+ goto present; -+ -+ /* -+ * Find a segment which begins after this one does. -+ */ -+ for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); -+ q = tcpiphdr_next(q)) -+ if (SEQ_GT(q->ti_seq, ti->ti_seq)) -+ break; -+ -+ /* -+ * If there is a preceding segment, it may provide some of -+ * our data already. If so, drop the data from the incoming -+ * segment. If it provides all of our data, drop us. -+ */ -+ if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { -+ register int i; -+ q = tcpiphdr_prev(q); -+ /* conversion to int (in i) handles seq wraparound */ -+ i = q->ti_seq + q->ti_len - ti->ti_seq; -+ if (i > 0) { -+ if (i >= ti->ti_len) { -+ m_free(m); -+ /* -+ * Try to present any queued data -+ * at the left window edge to the user. -+ * This is needed after the 3-WHS -+ * completes. -+ */ -+ goto present; /* ??? */ -+ } -+ m_adj(m, i); -+ ti->ti_len -= i; -+ ti->ti_seq += i; -+ } -+ q = tcpiphdr_next(q); -+ } -+ ti->ti_mbuf = m; -+ -+ /* -+ * While we overlap succeeding segments trim them or, -+ * if they are completely covered, dequeue them. -+ */ -+ while (!tcpfrag_list_end(q, tp)) { -+ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; -+ if (i <= 0) -+ break; -+ if (i < q->ti_len) { -+ q->ti_seq += i; -+ q->ti_len -= i; -+ m_adj(q->ti_mbuf, i); -+ break; -+ } -+ q = tcpiphdr_next(q); -+ m = tcpiphdr_prev(q)->ti_mbuf; -+ remque(tcpiphdr2qlink(tcpiphdr_prev(q))); -+ m_free(m); -+ } -+ -+ /* -+ * Stick new segment in its place. -+ */ -+ insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); -+ -+present: -+ /* -+ * Present data to user, advancing rcv_nxt through -+ * completed sequence space. -+ */ -+ if (!TCPS_HAVEESTABLISHED(tp->t_state)) -+ return (0); -+ ti = tcpfrag_list_first(tp); -+ if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) -+ return (0); -+ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) -+ return (0); -+ do { -+ tp->rcv_nxt += ti->ti_len; -+ flags = ti->ti_flags & TH_FIN; -+ remque(tcpiphdr2qlink(ti)); -+ m = ti->ti_mbuf; -+ ti = tcpiphdr_next(ti); -+ if (so->so_state & SS_FCANTSENDMORE) -+ m_free(m); -+ else { -+ if (so->so_emu) { -+ if (tcp_emu(so, m)) -+ sbappend(so, m); -+ } else -+ sbappend(so, m); -+ } -+ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); -+ return (flags); -+} -+ -+/* -+ * TCP input routine, follows pages 65-76 of the -+ * protocol specification dated September, 1981 very closely. -+ */ -+void tcp_input(struct mbuf *m, int iphlen, struct socket *inso, -+ unsigned short af) -+{ -+ struct ip save_ip, *ip; -+ struct ip6 save_ip6, *ip6; -+ register struct tcpiphdr *ti; -+ char *optp = NULL; -+ int optlen = 0; -+ int len, tlen, off; -+ register struct tcpcb *tp = NULL; -+ register int tiflags; -+ struct socket *so = NULL; -+ int todrop, acked, ourfinisacked, needoutput = 0; -+ int iss = 0; -+ uint32_t tiwin; -+ int ret; -+ struct sockaddr_storage lhost, fhost; -+ struct sockaddr_in *lhost4, *fhost4; -+ struct sockaddr_in6 *lhost6, *fhost6; -+ struct gfwd_list *ex_ptr; -+ Slirp *slirp; -+ -+ DEBUG_CALL("tcp_input"); -+ DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso); -+ -+ /* -+ * If called with m == 0, then we're continuing the connect -+ */ -+ if (m == NULL) { -+ so = inso; -+ slirp = so->slirp; -+ -+ /* Re-set a few variables */ -+ tp = sototcpcb(so); -+ m = so->so_m; -+ so->so_m = NULL; -+ ti = so->so_ti; -+ tiwin = ti->ti_win; -+ tiflags = ti->ti_flags; -+ -+ goto cont_conn; -+ } -+ slirp = m->slirp; -+ -+ ip = mtod(m, struct ip *); -+ ip6 = mtod(m, struct ip6 *); -+ -+ switch (af) { -+ case AF_INET: -+ if (iphlen > sizeof(struct ip)) { -+ ip_stripoptions(m, (struct mbuf *)0); -+ iphlen = sizeof(struct ip); -+ } -+ /* XXX Check if too short */ -+ -+ -+ /* -+ * Save a copy of the IP header in case we want restore it -+ * for sending an ICMP error message in response. -+ */ -+ save_ip = *ip; -+ save_ip.ip_len += iphlen; -+ -+ /* -+ * Get IP and TCP header together in first mbuf. -+ * Note: IP leaves IP header in first mbuf. -+ */ -+ m->m_data -= -+ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); -+ m->m_len += -+ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); -+ ti = mtod(m, struct tcpiphdr *); -+ -+ /* -+ * Checksum extended TCP header and data. -+ */ -+ tlen = ip->ip_len; -+ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; -+ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); -+ memset(&ti->ti, 0, sizeof(ti->ti)); -+ ti->ti_x0 = 0; -+ ti->ti_src = save_ip.ip_src; -+ ti->ti_dst = save_ip.ip_dst; -+ ti->ti_pr = save_ip.ip_p; -+ ti->ti_len = htons((uint16_t)tlen); -+ break; -+ -+ case AF_INET6: -+ /* -+ * Save a copy of the IP header in case we want restore it -+ * for sending an ICMP error message in response. -+ */ -+ save_ip6 = *ip6; -+ /* -+ * Get IP and TCP header together in first mbuf. -+ * Note: IP leaves IP header in first mbuf. -+ */ -+ m->m_data -= sizeof(struct tcpiphdr) - -+ (sizeof(struct ip6) + sizeof(struct tcphdr)); -+ m->m_len += sizeof(struct tcpiphdr) - -+ (sizeof(struct ip6) + sizeof(struct tcphdr)); -+ ti = mtod(m, struct tcpiphdr *); -+ -+ tlen = ip6->ip_pl; -+ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; -+ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); -+ memset(&ti->ti, 0, sizeof(ti->ti)); -+ ti->ti_x0 = 0; -+ ti->ti_src6 = save_ip6.ip_src; -+ ti->ti_dst6 = save_ip6.ip_dst; -+ ti->ti_nh6 = save_ip6.ip_nh; -+ ti->ti_len = htons((uint16_t)tlen); -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ } -+ -+ len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); -+ if (cksum(m, len)) { -+ goto drop; -+ } -+ -+ /* -+ * Check that TCP offset makes sense, -+ * pull out TCP options and adjust length. XXX -+ */ -+ off = ti->ti_off << 2; -+ if (off < sizeof(struct tcphdr) || off > tlen) { -+ goto drop; -+ } -+ tlen -= off; -+ ti->ti_len = tlen; -+ if (off > sizeof(struct tcphdr)) { -+ optlen = off - sizeof(struct tcphdr); -+ optp = mtod(m, char *) + sizeof(struct tcpiphdr); -+ } -+ tiflags = ti->ti_flags; -+ -+ /* -+ * Convert TCP protocol specific fields to host format. -+ */ -+ NTOHL(ti->ti_seq); -+ NTOHL(ti->ti_ack); -+ NTOHS(ti->ti_win); -+ NTOHS(ti->ti_urp); -+ -+ /* -+ * Drop TCP, IP headers and TCP options. -+ */ -+ m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); -+ m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); -+ -+ /* -+ * Locate pcb for segment. -+ */ -+findso: -+ lhost.ss_family = af; -+ fhost.ss_family = af; -+ switch (af) { -+ case AF_INET: -+ lhost4 = (struct sockaddr_in *)&lhost; -+ lhost4->sin_addr = ti->ti_src; -+ lhost4->sin_port = ti->ti_sport; -+ fhost4 = (struct sockaddr_in *)&fhost; -+ fhost4->sin_addr = ti->ti_dst; -+ fhost4->sin_port = ti->ti_dport; -+ break; -+ case AF_INET6: -+ lhost6 = (struct sockaddr_in6 *)&lhost; -+ lhost6->sin6_addr = ti->ti_src6; -+ lhost6->sin6_port = ti->ti_sport; -+ fhost6 = (struct sockaddr_in6 *)&fhost; -+ fhost6->sin6_addr = ti->ti_dst6; -+ fhost6->sin6_port = ti->ti_dport; -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ -+ so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); -+ -+ /* -+ * If the state is CLOSED (i.e., TCB does not exist) then -+ * all data in the incoming segment is discarded. -+ * If the TCB exists but is in CLOSED state, it is embryonic, -+ * but should either do a listen or a connect soon. -+ * -+ * state == CLOSED means we've done socreate() but haven't -+ * attached it to a protocol yet... -+ * -+ * XXX If a TCB does not exist, and the TH_SYN flag is -+ * the only flag set, then create a session, mark it -+ * as if it was LISTENING, and continue... -+ */ -+ if (so == NULL) { -+ /* TODO: IPv6 */ -+ if (slirp->restricted) { -+ /* Any hostfwds will have an existing socket, so we only get here -+ * for non-hostfwd connections. These should be dropped, unless it -+ * happens to be a guestfwd. -+ */ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; -+ ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->ex_fport == ti->ti_dport && -+ ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { -+ break; -+ } -+ } -+ if (!ex_ptr) { -+ goto dropwithreset; -+ } -+ } -+ -+ if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN) -+ goto dropwithreset; -+ -+ so = socreate(slirp); -+ if (tcp_attach(so) < 0) { -+ g_free(so); /* Not sofree (if it failed, it's not insqued) */ -+ goto dropwithreset; -+ } -+ -+ sbreserve(&so->so_snd, TCP_SNDSPACE); -+ sbreserve(&so->so_rcv, TCP_RCVSPACE); -+ -+ so->lhost.ss = lhost; -+ so->fhost.ss = fhost; -+ -+ so->so_iptos = tcp_tos(so); -+ if (so->so_iptos == 0) { -+ switch (af) { -+ case AF_INET: -+ so->so_iptos = ((struct ip *)ti)->ip_tos; -+ break; -+ case AF_INET6: -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ } -+ -+ tp = sototcpcb(so); -+ tp->t_state = TCPS_LISTEN; -+ } -+ -+ /* -+ * If this is a still-connecting socket, this probably -+ * a retransmit of the SYN. Whether it's a retransmit SYN -+ * or something else, we nuke it. -+ */ -+ if (so->so_state & SS_ISFCONNECTING) -+ goto drop; -+ -+ tp = sototcpcb(so); -+ -+ /* XXX Should never fail */ -+ if (tp == NULL) -+ goto dropwithreset; -+ if (tp->t_state == TCPS_CLOSED) -+ goto drop; -+ -+ tiwin = ti->ti_win; -+ -+ /* -+ * Segment received on connection. -+ * Reset idle time and keep-alive timer. -+ */ -+ tp->t_idle = 0; -+ if (slirp_do_keepalive) -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; -+ else -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; -+ -+ /* -+ * Process options if not in LISTEN state, -+ * else do it below (after getting remote address). -+ */ -+ if (optp && tp->t_state != TCPS_LISTEN) -+ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); -+ -+ /* -+ * Header prediction: check for the two common cases -+ * of a uni-directional data xfer. If the packet has -+ * no control flags, is in-sequence, the window didn't -+ * change and we're not retransmitting, it's a -+ * candidate. If the length is zero and the ack moved -+ * forward, we're the sender side of the xfer. Just -+ * free the data acked & wake any higher level process -+ * that was blocked waiting for space. If the length -+ * is non-zero and the ack didn't move, we're the -+ * receiver side. If we're getting packets in-order -+ * (the reassembly queue is empty), add the data to -+ * the socket buffer and note that we need a delayed ack. -+ * -+ * XXX Some of these tests are not needed -+ * eg: the tiwin == tp->snd_wnd prevents many more -+ * predictions.. with no *real* advantage.. -+ */ -+ if (tp->t_state == TCPS_ESTABLISHED && -+ (tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK && -+ ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && -+ tp->snd_nxt == tp->snd_max) { -+ if (ti->ti_len == 0) { -+ if (SEQ_GT(ti->ti_ack, tp->snd_una) && -+ SEQ_LEQ(ti->ti_ack, tp->snd_max) && -+ tp->snd_cwnd >= tp->snd_wnd) { -+ /* -+ * this is a pure ack for outstanding data. -+ */ -+ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) -+ tcp_xmit_timer(tp, tp->t_rtt); -+ acked = ti->ti_ack - tp->snd_una; -+ sodrop(so, acked); -+ tp->snd_una = ti->ti_ack; -+ m_free(m); -+ -+ /* -+ * If all outstanding data are acked, stop -+ * retransmit timer, otherwise restart timer -+ * using current (possibly backed-off) value. -+ * If process is waiting for space, -+ * wakeup/selwakeup/signal. If data -+ * are ready to send, let tcp_output -+ * decide between more output or persist. -+ */ -+ if (tp->snd_una == tp->snd_max) -+ tp->t_timer[TCPT_REXMT] = 0; -+ else if (tp->t_timer[TCPT_PERSIST] == 0) -+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; -+ -+ /* -+ * This is called because sowwakeup might have -+ * put data into so_snd. Since we don't so sowwakeup, -+ * we don't need this.. XXX??? -+ */ -+ if (so->so_snd.sb_cc) -+ (void)tcp_output(tp); -+ -+ return; -+ } -+ } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && -+ ti->ti_len <= sbspace(&so->so_rcv)) { -+ /* -+ * this is a pure, in-sequence data packet -+ * with nothing on the reassembly queue and -+ * we have enough buffer space to take it. -+ */ -+ tp->rcv_nxt += ti->ti_len; -+ /* -+ * Add data to socket buffer. -+ */ -+ if (so->so_emu) { -+ if (tcp_emu(so, m)) -+ sbappend(so, m); -+ } else -+ sbappend(so, m); -+ -+ /* -+ * If this is a short packet, then ACK now - with Nagel -+ * congestion avoidance sender won't send more until -+ * he gets an ACK. -+ * -+ * It is better to not delay acks at all to maximize -+ * TCP throughput. See RFC 2581. -+ */ -+ tp->t_flags |= TF_ACKNOW; -+ tcp_output(tp); -+ return; -+ } -+ } /* header prediction */ -+ /* -+ * Calculate amount of space in receive window, -+ * and then do TCP input processing. -+ * Receive window is amount of space in rcv queue, -+ * but not less than advertised window. -+ */ -+ { -+ int win; -+ win = sbspace(&so->so_rcv); -+ if (win < 0) -+ win = 0; -+ tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); -+ } -+ -+ switch (tp->t_state) { -+ /* -+ * If the state is LISTEN then ignore segment if it contains an RST. -+ * If the segment contains an ACK then it is bad and send a RST. -+ * If it does not contain a SYN then it is not interesting; drop it. -+ * Don't bother responding if the destination was a broadcast. -+ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial -+ * tp->iss, and send a segment: -+ * -+ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. -+ * Fill in remote peer address fields if not previously specified. -+ * Enter SYN_RECEIVED state, and process any other fields of this -+ * segment in this state. -+ */ -+ case TCPS_LISTEN: { -+ if (tiflags & TH_RST) -+ goto drop; -+ if (tiflags & TH_ACK) -+ goto dropwithreset; -+ if ((tiflags & TH_SYN) == 0) -+ goto drop; -+ -+ /* -+ * This has way too many gotos... -+ * But a bit of spaghetti code never hurt anybody :) -+ */ -+ -+ /* -+ * If this is destined for the control address, then flag to -+ * tcp_ctl once connected, otherwise connect -+ */ -+ /* TODO: IPv6 */ -+ if (af == AF_INET && -+ (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == -+ slirp->vnetwork_addr.s_addr) { -+ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && -+ so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { -+ /* May be an add exec */ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; -+ ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->ex_fport == so->so_fport && -+ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { -+ so->so_state |= SS_CTL; -+ break; -+ } -+ } -+ if (so->so_state & SS_CTL) { -+ goto cont_input; -+ } -+ } -+ /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ -+ } -+ -+ if (so->so_emu & EMU_NOCONNECT) { -+ so->so_emu &= ~EMU_NOCONNECT; -+ goto cont_input; -+ } -+ -+ if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) && -+ (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { -+ uint8_t code; -+ DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); -+ if (errno == ECONNREFUSED) { -+ /* ACK the SYN, send RST to refuse the connection */ -+ tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0, -+ TH_RST | TH_ACK, af); -+ } else { -+ switch (af) { -+ case AF_INET: -+ code = ICMP_UNREACH_NET; -+ if (errno == EHOSTUNREACH) { -+ code = ICMP_UNREACH_HOST; -+ } -+ break; -+ case AF_INET6: -+ code = ICMP6_UNREACH_NO_ROUTE; -+ if (errno == EHOSTUNREACH) { -+ code = ICMP6_UNREACH_ADDRESS; -+ } -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ HTONL(ti->ti_seq); /* restore tcp header */ -+ HTONL(ti->ti_ack); -+ HTONS(ti->ti_win); -+ HTONS(ti->ti_urp); -+ m->m_data -= -+ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); -+ m->m_len += -+ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); -+ switch (af) { -+ case AF_INET: -+ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) - -+ sizeof(struct tcphdr); -+ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) - -+ sizeof(struct tcphdr); -+ *ip = save_ip; -+ icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); -+ break; -+ case AF_INET6: -+ m->m_data += sizeof(struct tcpiphdr) - -+ (sizeof(struct ip6) + sizeof(struct tcphdr)); -+ m->m_len -= sizeof(struct tcpiphdr) - -+ (sizeof(struct ip6) + sizeof(struct tcphdr)); -+ *ip6 = save_ip6; -+ icmp6_send_error(m, ICMP6_UNREACH, code); -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ } -+ tcp_close(tp); -+ m_free(m); -+ } else { -+ /* -+ * Haven't connected yet, save the current mbuf -+ * and ti, and return -+ * XXX Some OS's don't tell us whether the connect() -+ * succeeded or not. So we must time it out. -+ */ -+ so->so_m = m; -+ so->so_ti = ti; -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; -+ tp->t_state = TCPS_SYN_RECEIVED; -+ /* -+ * Initialize receive sequence numbers now so that we can send a -+ * valid RST if the remote end rejects our connection. -+ */ -+ tp->irs = ti->ti_seq; -+ tcp_rcvseqinit(tp); -+ tcp_template(tp); -+ } -+ return; -+ -+ cont_conn: -+ /* m==NULL -+ * Check if the connect succeeded -+ */ -+ if (so->so_state & SS_NOFDREF) { -+ tp = tcp_close(tp); -+ goto dropwithreset; -+ } -+ cont_input: -+ tcp_template(tp); -+ -+ if (optp) -+ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); -+ -+ if (iss) -+ tp->iss = iss; -+ else -+ tp->iss = slirp->tcp_iss; -+ slirp->tcp_iss += TCP_ISSINCR / 2; -+ tp->irs = ti->ti_seq; -+ tcp_sendseqinit(tp); -+ tcp_rcvseqinit(tp); -+ tp->t_flags |= TF_ACKNOW; -+ tp->t_state = TCPS_SYN_RECEIVED; -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; -+ goto trimthenstep6; -+ } /* case TCPS_LISTEN */ -+ -+ /* -+ * If the state is SYN_SENT: -+ * if seg contains an ACK, but not for our SYN, drop the input. -+ * if seg contains a RST, then drop the connection. -+ * if seg does not contain SYN, then drop it. -+ * Otherwise this is an acceptable SYN segment -+ * initialize tp->rcv_nxt and tp->irs -+ * if seg contains ack then advance tp->snd_una -+ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state -+ * arrange for segment to be acked (eventually) -+ * continue processing rest of data/controls, beginning with URG -+ */ -+ case TCPS_SYN_SENT: -+ if ((tiflags & TH_ACK) && -+ (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) -+ goto dropwithreset; -+ -+ if (tiflags & TH_RST) { -+ if (tiflags & TH_ACK) { -+ tcp_drop(tp, 0); /* XXX Check t_softerror! */ -+ } -+ goto drop; -+ } -+ -+ if ((tiflags & TH_SYN) == 0) -+ goto drop; -+ if (tiflags & TH_ACK) { -+ tp->snd_una = ti->ti_ack; -+ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) -+ tp->snd_nxt = tp->snd_una; -+ } -+ -+ tp->t_timer[TCPT_REXMT] = 0; -+ tp->irs = ti->ti_seq; -+ tcp_rcvseqinit(tp); -+ tp->t_flags |= TF_ACKNOW; -+ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { -+ soisfconnected(so); -+ tp->t_state = TCPS_ESTABLISHED; -+ -+ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); -+ /* -+ * if we didn't have to retransmit the SYN, -+ * use its rtt as our initial srtt & rtt var. -+ */ -+ if (tp->t_rtt) -+ tcp_xmit_timer(tp, tp->t_rtt); -+ } else -+ tp->t_state = TCPS_SYN_RECEIVED; -+ -+ trimthenstep6: -+ /* -+ * Advance ti->ti_seq to correspond to first data byte. -+ * If data, trim to stay within window, -+ * dropping FIN if necessary. -+ */ -+ ti->ti_seq++; -+ if (ti->ti_len > tp->rcv_wnd) { -+ todrop = ti->ti_len - tp->rcv_wnd; -+ m_adj(m, -todrop); -+ ti->ti_len = tp->rcv_wnd; -+ tiflags &= ~TH_FIN; -+ } -+ tp->snd_wl1 = ti->ti_seq - 1; -+ tp->rcv_up = ti->ti_seq; -+ goto step6; -+ } /* switch tp->t_state */ -+ /* -+ * States other than LISTEN or SYN_SENT. -+ * Check that at least some bytes of segment are within -+ * receive window. If segment begins before rcv_nxt, -+ * drop leading data (and SYN); if nothing left, just ack. -+ */ -+ todrop = tp->rcv_nxt - ti->ti_seq; -+ if (todrop > 0) { -+ if (tiflags & TH_SYN) { -+ tiflags &= ~TH_SYN; -+ ti->ti_seq++; -+ if (ti->ti_urp > 1) -+ ti->ti_urp--; -+ else -+ tiflags &= ~TH_URG; -+ todrop--; -+ } -+ /* -+ * Following if statement from Stevens, vol. 2, p. 960. -+ */ -+ if (todrop > ti->ti_len || -+ (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { -+ /* -+ * Any valid FIN must be to the left of the window. -+ * At this point the FIN must be a duplicate or out -+ * of sequence; drop it. -+ */ -+ tiflags &= ~TH_FIN; -+ -+ /* -+ * Send an ACK to resynchronize and drop any data. -+ * But keep on processing for RST or ACK. -+ */ -+ tp->t_flags |= TF_ACKNOW; -+ todrop = ti->ti_len; -+ } -+ m_adj(m, todrop); -+ ti->ti_seq += todrop; -+ ti->ti_len -= todrop; -+ if (ti->ti_urp > todrop) -+ ti->ti_urp -= todrop; -+ else { -+ tiflags &= ~TH_URG; -+ ti->ti_urp = 0; -+ } -+ } -+ /* -+ * If new data are received on a connection after the -+ * user processes are gone, then RST the other end. -+ */ -+ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && -+ ti->ti_len) { -+ tp = tcp_close(tp); -+ goto dropwithreset; -+ } -+ -+ /* -+ * If segment ends after window, drop trailing data -+ * (and PUSH and FIN); if nothing left, just ACK. -+ */ -+ todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd); -+ if (todrop > 0) { -+ if (todrop >= ti->ti_len) { -+ /* -+ * If a new connection request is received -+ * while in TIME_WAIT, drop the old connection -+ * and start over if the sequence numbers -+ * are above the previous ones. -+ */ -+ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && -+ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { -+ iss = tp->rcv_nxt + TCP_ISSINCR; -+ tp = tcp_close(tp); -+ goto findso; -+ } -+ /* -+ * If window is closed can only take segments at -+ * window edge, and have to drop data and PUSH from -+ * incoming segments. Continue processing, but -+ * remember to ack. Otherwise, drop segment -+ * and ack. -+ */ -+ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { -+ tp->t_flags |= TF_ACKNOW; -+ } else { -+ goto dropafterack; -+ } -+ } -+ m_adj(m, -todrop); -+ ti->ti_len -= todrop; -+ tiflags &= ~(TH_PUSH | TH_FIN); -+ } -+ -+ /* -+ * If the RST bit is set examine the state: -+ * SYN_RECEIVED STATE: -+ * If passive open, return to LISTEN state. -+ * If active open, inform user that connection was refused. -+ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: -+ * Inform user that connection was reset, and close tcb. -+ * CLOSING, LAST_ACK, TIME_WAIT STATES -+ * Close the tcb. -+ */ -+ if (tiflags & TH_RST) -+ switch (tp->t_state) { -+ case TCPS_SYN_RECEIVED: -+ case TCPS_ESTABLISHED: -+ case TCPS_FIN_WAIT_1: -+ case TCPS_FIN_WAIT_2: -+ case TCPS_CLOSE_WAIT: -+ tp->t_state = TCPS_CLOSED; -+ tcp_close(tp); -+ goto drop; -+ -+ case TCPS_CLOSING: -+ case TCPS_LAST_ACK: -+ case TCPS_TIME_WAIT: -+ tcp_close(tp); -+ goto drop; -+ } -+ -+ /* -+ * If a SYN is in the window, then this is an -+ * error and we send an RST and drop the connection. -+ */ -+ if (tiflags & TH_SYN) { -+ tp = tcp_drop(tp, 0); -+ goto dropwithreset; -+ } -+ -+ /* -+ * If the ACK bit is off we drop the segment and return. -+ */ -+ if ((tiflags & TH_ACK) == 0) -+ goto drop; -+ -+ /* -+ * Ack processing. -+ */ -+ switch (tp->t_state) { -+ /* -+ * In SYN_RECEIVED state if the ack ACKs our SYN then enter -+ * ESTABLISHED state and continue processing, otherwise -+ * send an RST. una<=ack<=max -+ */ -+ case TCPS_SYN_RECEIVED: -+ -+ if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) -+ goto dropwithreset; -+ tp->t_state = TCPS_ESTABLISHED; -+ /* -+ * The sent SYN is ack'ed with our sequence number +1 -+ * The first data byte already in the buffer will get -+ * lost if no correction is made. This is only needed for -+ * SS_CTL since the buffer is empty otherwise. -+ * tp->snd_una++; or: -+ */ -+ tp->snd_una = ti->ti_ack; -+ if (so->so_state & SS_CTL) { -+ /* So tcp_ctl reports the right state */ -+ ret = tcp_ctl(so); -+ if (ret == 1) { -+ soisfconnected(so); -+ so->so_state &= ~SS_CTL; /* success XXX */ -+ } else if (ret == 2) { -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_NOFDREF; /* CTL_CMD */ -+ } else { -+ needoutput = 1; -+ tp->t_state = TCPS_FIN_WAIT_1; -+ } -+ } else { -+ soisfconnected(so); -+ } -+ -+ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); -+ tp->snd_wl1 = ti->ti_seq - 1; -+ /* Avoid ack processing; snd_una==ti_ack => dup ack */ -+ goto synrx_to_est; -+ /* fall into ... */ -+ -+ /* -+ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range -+ * ACKs. If the ack is in the range -+ * tp->snd_una < ti->ti_ack <= tp->snd_max -+ * then advance tp->snd_una to ti->ti_ack and drop -+ * data from the retransmission queue. If this ACK reflects -+ * more up to date window information we update our window information. -+ */ -+ case TCPS_ESTABLISHED: -+ case TCPS_FIN_WAIT_1: -+ case TCPS_FIN_WAIT_2: -+ case TCPS_CLOSE_WAIT: -+ case TCPS_CLOSING: -+ case TCPS_LAST_ACK: -+ case TCPS_TIME_WAIT: -+ -+ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { -+ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { -+ DEBUG_MISC(" dup ack m = %p so = %p", m, so); -+ /* -+ * If we have outstanding data (other than -+ * a window probe), this is a completely -+ * duplicate ack (ie, window info didn't -+ * change), the ack is the biggest we've -+ * seen and we've seen exactly our rexmt -+ * threshold of them, assume a packet -+ * has been dropped and retransmit it. -+ * Kludge snd_nxt & the congestion -+ * window so we send only this one -+ * packet. -+ * -+ * We know we're losing at the current -+ * window size so do congestion avoidance -+ * (set ssthresh to half the current window -+ * and pull our congestion window back to -+ * the new ssthresh). -+ * -+ * Dup acks mean that packets have left the -+ * network (they're now cached at the receiver) -+ * so bump cwnd by the amount in the receiver -+ * to keep a constant cwnd packets in the -+ * network. -+ */ -+ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) -+ tp->t_dupacks = 0; -+ else if (++tp->t_dupacks == TCPREXMTTHRESH) { -+ tcp_seq onxt = tp->snd_nxt; -+ unsigned win = -+ MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; -+ -+ if (win < 2) -+ win = 2; -+ tp->snd_ssthresh = win * tp->t_maxseg; -+ tp->t_timer[TCPT_REXMT] = 0; -+ tp->t_rtt = 0; -+ tp->snd_nxt = ti->ti_ack; -+ tp->snd_cwnd = tp->t_maxseg; -+ (void)tcp_output(tp); -+ tp->snd_cwnd = -+ tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; -+ if (SEQ_GT(onxt, tp->snd_nxt)) -+ tp->snd_nxt = onxt; -+ goto drop; -+ } else if (tp->t_dupacks > TCPREXMTTHRESH) { -+ tp->snd_cwnd += tp->t_maxseg; -+ (void)tcp_output(tp); -+ goto drop; -+ } -+ } else -+ tp->t_dupacks = 0; -+ break; -+ } -+ synrx_to_est: -+ /* -+ * If the congestion window was inflated to account -+ * for the other side's cached packets, retract it. -+ */ -+ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) -+ tp->snd_cwnd = tp->snd_ssthresh; -+ tp->t_dupacks = 0; -+ if (SEQ_GT(ti->ti_ack, tp->snd_max)) { -+ goto dropafterack; -+ } -+ acked = ti->ti_ack - tp->snd_una; -+ -+ /* -+ * If transmit timer is running and timed sequence -+ * number was acked, update smoothed round trip time. -+ * Since we now have an rtt measurement, cancel the -+ * timer backoff (cf., Phil Karn's retransmit alg.). -+ * Recompute the initial retransmit timer. -+ */ -+ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) -+ tcp_xmit_timer(tp, tp->t_rtt); -+ -+ /* -+ * If all outstanding data is acked, stop retransmit -+ * timer and remember to restart (more output or persist). -+ * If there is more data to be acked, restart retransmit -+ * timer, using current (possibly backed-off) value. -+ */ -+ if (ti->ti_ack == tp->snd_max) { -+ tp->t_timer[TCPT_REXMT] = 0; -+ needoutput = 1; -+ } else if (tp->t_timer[TCPT_PERSIST] == 0) -+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; -+ /* -+ * When new data is acked, open the congestion window. -+ * If the window gives us less than ssthresh packets -+ * in flight, open exponentially (maxseg per packet). -+ * Otherwise open linearly: maxseg per window -+ * (maxseg^2 / cwnd per packet). -+ */ -+ { -+ register unsigned cw = tp->snd_cwnd; -+ register unsigned incr = tp->t_maxseg; -+ -+ if (cw > tp->snd_ssthresh) -+ incr = incr * incr / cw; -+ tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); -+ } -+ if (acked > so->so_snd.sb_cc) { -+ tp->snd_wnd -= so->so_snd.sb_cc; -+ sodrop(so, (int)so->so_snd.sb_cc); -+ ourfinisacked = 1; -+ } else { -+ sodrop(so, acked); -+ tp->snd_wnd -= acked; -+ ourfinisacked = 0; -+ } -+ tp->snd_una = ti->ti_ack; -+ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) -+ tp->snd_nxt = tp->snd_una; -+ -+ switch (tp->t_state) { -+ /* -+ * In FIN_WAIT_1 STATE in addition to the processing -+ * for the ESTABLISHED state if our FIN is now acknowledged -+ * then enter FIN_WAIT_2. -+ */ -+ case TCPS_FIN_WAIT_1: -+ if (ourfinisacked) { -+ /* -+ * If we can't receive any more -+ * data, then closing user can proceed. -+ * Starting the timer is contrary to the -+ * specification, but if we don't get a FIN -+ * we'll hang forever. -+ */ -+ if (so->so_state & SS_FCANTRCVMORE) { -+ tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; -+ } -+ tp->t_state = TCPS_FIN_WAIT_2; -+ } -+ break; -+ -+ /* -+ * In CLOSING STATE in addition to the processing for -+ * the ESTABLISHED state if the ACK acknowledges our FIN -+ * then enter the TIME-WAIT state, otherwise ignore -+ * the segment. -+ */ -+ case TCPS_CLOSING: -+ if (ourfinisacked) { -+ tp->t_state = TCPS_TIME_WAIT; -+ tcp_canceltimers(tp); -+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; -+ } -+ break; -+ -+ /* -+ * In LAST_ACK, we may still be waiting for data to drain -+ * and/or to be acked, as well as for the ack of our FIN. -+ * If our FIN is now acknowledged, delete the TCB, -+ * enter the closed state and return. -+ */ -+ case TCPS_LAST_ACK: -+ if (ourfinisacked) { -+ tcp_close(tp); -+ goto drop; -+ } -+ break; -+ -+ /* -+ * In TIME_WAIT state the only thing that should arrive -+ * is a retransmission of the remote FIN. Acknowledge -+ * it and restart the finack timer. -+ */ -+ case TCPS_TIME_WAIT: -+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; -+ goto dropafterack; -+ } -+ } /* switch(tp->t_state) */ -+ -+step6: -+ /* -+ * Update window information. -+ * Don't look at window if no ACK: TAC's send garbage on first SYN. -+ */ -+ if ((tiflags & TH_ACK) && -+ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || -+ (tp->snd_wl1 == ti->ti_seq && -+ (SEQ_LT(tp->snd_wl2, ti->ti_ack) || -+ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { -+ tp->snd_wnd = tiwin; -+ tp->snd_wl1 = ti->ti_seq; -+ tp->snd_wl2 = ti->ti_ack; -+ if (tp->snd_wnd > tp->max_sndwnd) -+ tp->max_sndwnd = tp->snd_wnd; -+ needoutput = 1; -+ } -+ -+ /* -+ * Process segments with URG. -+ */ -+ if ((tiflags & TH_URG) && ti->ti_urp && -+ TCPS_HAVERCVDFIN(tp->t_state) == 0) { -+ /* -+ * This is a kludge, but if we receive and accept -+ * random urgent pointers, we'll crash in -+ * soreceive. It's hard to imagine someone -+ * actually wanting to send this much urgent data. -+ */ -+ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { -+ ti->ti_urp = 0; -+ tiflags &= ~TH_URG; -+ goto dodata; -+ } -+ /* -+ * If this segment advances the known urgent pointer, -+ * then mark the data stream. This should not happen -+ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since -+ * a FIN has been received from the remote side. -+ * In these states we ignore the URG. -+ * -+ * According to RFC961 (Assigned Protocols), -+ * the urgent pointer points to the last octet -+ * of urgent data. We continue, however, -+ * to consider it to indicate the first octet -+ * of data past the urgent section as the original -+ * spec states (in one of two places). -+ */ -+ if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) { -+ tp->rcv_up = ti->ti_seq + ti->ti_urp; -+ so->so_urgc = -+ so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ -+ tp->rcv_up = ti->ti_seq + ti->ti_urp; -+ } -+ } else -+ /* -+ * If no out of band data is expected, -+ * pull receive urgent pointer along -+ * with the receive window. -+ */ -+ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) -+ tp->rcv_up = tp->rcv_nxt; -+dodata: -+ -+ /* -+ * If this is a small packet, then ACK now - with Nagel -+ * congestion avoidance sender won't send more until -+ * he gets an ACK. -+ */ -+ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && -+ ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { -+ tp->t_flags |= TF_ACKNOW; -+ } -+ -+ /* -+ * Process the segment text, merging it into the TCP sequencing queue, -+ * and arranging for acknowledgment of receipt if necessary. -+ * This process logically involves adjusting tp->rcv_wnd as data -+ * is presented to the user (this happens in tcp_usrreq.c, -+ * case PRU_RCVD). If a FIN has already been received on this -+ * connection then we just ignore the text. -+ */ -+ if ((ti->ti_len || (tiflags & TH_FIN)) && -+ TCPS_HAVERCVDFIN(tp->t_state) == 0) { -+ TCP_REASS(tp, ti, m, so, tiflags); -+ } else { -+ m_free(m); -+ tiflags &= ~TH_FIN; -+ } -+ -+ /* -+ * If FIN is received ACK the FIN and let the user know -+ * that the connection is closing. -+ */ -+ if (tiflags & TH_FIN) { -+ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { -+ /* -+ * If we receive a FIN we can't send more data, -+ * set it SS_FDRAIN -+ * Shutdown the socket if there is no rx data in the -+ * buffer. -+ * soread() is called on completion of shutdown() and -+ * will got to TCPS_LAST_ACK, and use tcp_output() -+ * to send the FIN. -+ */ -+ sofwdrain(so); -+ -+ tp->t_flags |= TF_ACKNOW; -+ tp->rcv_nxt++; -+ } -+ switch (tp->t_state) { -+ /* -+ * In SYN_RECEIVED and ESTABLISHED STATES -+ * enter the CLOSE_WAIT state. -+ */ -+ case TCPS_SYN_RECEIVED: -+ case TCPS_ESTABLISHED: -+ if (so->so_emu == EMU_CTL) /* no shutdown on socket */ -+ tp->t_state = TCPS_LAST_ACK; -+ else -+ tp->t_state = TCPS_CLOSE_WAIT; -+ break; -+ -+ /* -+ * If still in FIN_WAIT_1 STATE FIN has not been acked so -+ * enter the CLOSING state. -+ */ -+ case TCPS_FIN_WAIT_1: -+ tp->t_state = TCPS_CLOSING; -+ break; -+ -+ /* -+ * In FIN_WAIT_2 state enter the TIME_WAIT state, -+ * starting the time-wait timer, turning off the other -+ * standard timers. -+ */ -+ case TCPS_FIN_WAIT_2: -+ tp->t_state = TCPS_TIME_WAIT; -+ tcp_canceltimers(tp); -+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; -+ break; -+ -+ /* -+ * In TIME_WAIT state restart the 2 MSL time_wait timer. -+ */ -+ case TCPS_TIME_WAIT: -+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; -+ break; -+ } -+ } -+ -+ /* -+ * Return any desired output. -+ */ -+ if (needoutput || (tp->t_flags & TF_ACKNOW)) { -+ (void)tcp_output(tp); -+ } -+ return; -+ -+dropafterack: -+ /* -+ * Generate an ACK dropping incoming segment if it occupies -+ * sequence space, where the ACK reflects our state. -+ */ -+ if (tiflags & TH_RST) -+ goto drop; -+ m_free(m); -+ tp->t_flags |= TF_ACKNOW; -+ (void)tcp_output(tp); -+ return; -+ -+dropwithreset: -+ /* reuses m if m!=NULL, m_free() unnecessary */ -+ if (tiflags & TH_ACK) -+ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); -+ else { -+ if (tiflags & TH_SYN) -+ ti->ti_len++; -+ tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0, -+ TH_RST | TH_ACK, af); -+ } -+ -+ return; -+ -+drop: -+ /* -+ * Drop space held by incoming segment and return. -+ */ -+ m_free(m); -+} -+ -+static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, -+ struct tcpiphdr *ti) -+{ -+ uint16_t mss; -+ int opt, optlen; -+ -+ DEBUG_CALL("tcp_dooptions"); -+ DEBUG_ARG("tp = %p cnt=%i", tp, cnt); -+ -+ for (; cnt > 0; cnt -= optlen, cp += optlen) { -+ opt = cp[0]; -+ if (opt == TCPOPT_EOL) -+ break; -+ if (opt == TCPOPT_NOP) -+ optlen = 1; -+ else { -+ optlen = cp[1]; -+ if (optlen <= 0) -+ break; -+ } -+ switch (opt) { -+ default: -+ continue; -+ -+ case TCPOPT_MAXSEG: -+ if (optlen != TCPOLEN_MAXSEG) -+ continue; -+ if (!(ti->ti_flags & TH_SYN)) -+ continue; -+ memcpy((char *)&mss, (char *)cp + 2, sizeof(mss)); -+ NTOHS(mss); -+ (void)tcp_mss(tp, mss); /* sets t_maxseg */ -+ break; -+ } -+ } -+} -+ -+/* -+ * Collect new round-trip time estimate -+ * and update averages and current timeout. -+ */ -+ -+static void tcp_xmit_timer(register struct tcpcb *tp, int rtt) -+{ -+ register short delta; -+ -+ DEBUG_CALL("tcp_xmit_timer"); -+ DEBUG_ARG("tp = %p", tp); -+ DEBUG_ARG("rtt = %d", rtt); -+ -+ if (tp->t_srtt != 0) { -+ /* -+ * srtt is stored as fixed point with 3 bits after the -+ * binary point (i.e., scaled by 8). The following magic -+ * is equivalent to the smoothing algorithm in rfc793 with -+ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed -+ * point). Adjust rtt to origin 0. -+ */ -+ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); -+ if ((tp->t_srtt += delta) <= 0) -+ tp->t_srtt = 1; -+ /* -+ * We accumulate a smoothed rtt variance (actually, a -+ * smoothed mean difference), then set the retransmit -+ * timer to smoothed rtt + 4 times the smoothed variance. -+ * rttvar is stored as fixed point with 2 bits after the -+ * binary point (scaled by 4). The following is -+ * equivalent to rfc793 smoothing with an alpha of .75 -+ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces -+ * rfc793's wired-in beta. -+ */ -+ if (delta < 0) -+ delta = -delta; -+ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); -+ if ((tp->t_rttvar += delta) <= 0) -+ tp->t_rttvar = 1; -+ } else { -+ /* -+ * No rtt measurement yet - use the unsmoothed rtt. -+ * Set the variance to half the rtt (so our first -+ * retransmit happens at 3*rtt). -+ */ -+ tp->t_srtt = rtt << TCP_RTT_SHIFT; -+ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); -+ } -+ tp->t_rtt = 0; -+ tp->t_rxtshift = 0; -+ -+ /* -+ * the retransmit should happen at rtt + 4 * rttvar. -+ * Because of the way we do the smoothing, srtt and rttvar -+ * will each average +1/2 tick of bias. When we compute -+ * the retransmit timer, we want 1/2 tick of rounding and -+ * 1 extra tick because of +-1/2 tick uncertainty in the -+ * firing of the timer. The bias will give us exactly the -+ * 1.5 tick we need. But, because the bias is -+ * statistical, we have to test that we don't drop below -+ * the minimum feasible timer (which is 2 ticks). -+ */ -+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin, -+ TCPTV_REXMTMAX); /* XXX */ -+ -+ /* -+ * We received an ack for a packet that wasn't retransmitted; -+ * it is probably safe to discard any error indications we've -+ * received recently. This isn't quite right, but close enough -+ * for now (a route might have failed after we sent a segment, -+ * and the return path might not be symmetrical). -+ */ -+ tp->t_softerror = 0; -+} -+ -+/* -+ * Determine a reasonable value for maxseg size. -+ * If the route is known, check route for mtu. -+ * If none, use an mss that can be handled on the outgoing -+ * interface without forcing IP to fragment; if bigger than -+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES -+ * to utilize large mbufs. If no route is found, route has no mtu, -+ * or the destination isn't local, use a default, hopefully conservative -+ * size (usually 512 or the default IP max size, but no more than the mtu -+ * of the interface), as we can't discover anything about intervening -+ * gateways or networks. We also initialize the congestion/slow start -+ * window to be a single segment if the destination isn't local. -+ * While looking at the routing entry, we also initialize other path-dependent -+ * parameters from pre-set or cached values in the routing entry. -+ */ -+ -+int tcp_mss(struct tcpcb *tp, unsigned offer) -+{ -+ struct socket *so = tp->t_socket; -+ int mss; -+ -+ DEBUG_CALL("tcp_mss"); -+ DEBUG_ARG("tp = %p", tp); -+ DEBUG_ARG("offer = %d", offer); -+ -+ switch (so->so_ffamily) { -+ case AF_INET: -+ mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) - sizeof(struct ip); -+ break; -+ case AF_INET6: -+ mss = MIN(IF_MTU, IF_MRU) - sizeof(struct tcphdr) - sizeof(struct ip6); -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ -+ if (offer) -+ mss = MIN(mss, offer); -+ mss = MAX(mss, 32); -+ if (mss < tp->t_maxseg || offer != 0) -+ tp->t_maxseg = mss; -+ -+ tp->snd_cwnd = mss; -+ -+ sbreserve(&so->so_snd, -+ TCP_SNDSPACE + -+ ((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0)); -+ sbreserve(&so->so_rcv, -+ TCP_RCVSPACE + -+ ((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0)); -+ -+ DEBUG_MISC(" returning mss = %d", mss); -+ -+ return mss; -+} -diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c -new file mode 100644 -index 0000000..383fe31 ---- /dev/null -+++ b/slirp/src/tcp_output.c -@@ -0,0 +1,516 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 -+ * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+static const uint8_t tcp_outflags[TCP_NSTATES] = { -+ TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK, -+ TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK, -+ TH_FIN | TH_ACK, TH_ACK, TH_ACK, -+}; -+ -+ -+#undef MAX_TCPOPTLEN -+#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ -+ -+/* -+ * Tcp output routine: figure out what should be sent and send it. -+ */ -+int tcp_output(struct tcpcb *tp) -+{ -+ register struct socket *so = tp->t_socket; -+ register long len, win; -+ int off, flags, error; -+ register struct mbuf *m; -+ register struct tcpiphdr *ti, tcpiph_save; -+ struct ip *ip; -+ struct ip6 *ip6; -+ uint8_t opt[MAX_TCPOPTLEN]; -+ unsigned optlen, hdrlen; -+ int idle, sendalot; -+ -+ DEBUG_CALL("tcp_output"); -+ DEBUG_ARG("tp = %p", tp); -+ -+ /* -+ * Determine length of data that should be transmitted, -+ * and flags that will be used. -+ * If there is some data or critical controls (SYN, RST) -+ * to send, then transmit; otherwise, investigate further. -+ */ -+ idle = (tp->snd_max == tp->snd_una); -+ if (idle && tp->t_idle >= tp->t_rxtcur) -+ /* -+ * We have been idle for "a while" and no acks are -+ * expected to clock out any data we send -- -+ * slow start to get ack "clock" running again. -+ */ -+ tp->snd_cwnd = tp->t_maxseg; -+again: -+ sendalot = 0; -+ off = tp->snd_nxt - tp->snd_una; -+ win = MIN(tp->snd_wnd, tp->snd_cwnd); -+ -+ flags = tcp_outflags[tp->t_state]; -+ -+ DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); -+ -+ /* -+ * If in persist timeout with window of 0, send 1 byte. -+ * Otherwise, if window is small but nonzero -+ * and timer expired, we will send what we can -+ * and go to transmit state. -+ */ -+ if (tp->t_force) { -+ if (win == 0) { -+ /* -+ * If we still have some data to send, then -+ * clear the FIN bit. Usually this would -+ * happen below when it realizes that we -+ * aren't sending all the data. However, -+ * if we have exactly 1 byte of unset data, -+ * then it won't clear the FIN bit below, -+ * and if we are in persist state, we wind -+ * up sending the packet without recording -+ * that we sent the FIN bit. -+ * -+ * We can't just blindly clear the FIN bit, -+ * because if we don't have any more data -+ * to send then the probe will be the FIN -+ * itself. -+ */ -+ if (off < so->so_snd.sb_cc) -+ flags &= ~TH_FIN; -+ win = 1; -+ } else { -+ tp->t_timer[TCPT_PERSIST] = 0; -+ tp->t_rxtshift = 0; -+ } -+ } -+ -+ len = MIN(so->so_snd.sb_cc, win) - off; -+ -+ if (len < 0) { -+ /* -+ * If FIN has been sent but not acked, -+ * but we haven't been called to retransmit, -+ * len will be -1. Otherwise, window shrank -+ * after we sent into it. If window shrank to 0, -+ * cancel pending retransmit and pull snd_nxt -+ * back to (closed) window. We will enter persist -+ * state below. If the window didn't close completely, -+ * just wait for an ACK. -+ */ -+ len = 0; -+ if (win == 0) { -+ tp->t_timer[TCPT_REXMT] = 0; -+ tp->snd_nxt = tp->snd_una; -+ } -+ } -+ -+ if (len > tp->t_maxseg) { -+ len = tp->t_maxseg; -+ sendalot = 1; -+ } -+ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) -+ flags &= ~TH_FIN; -+ -+ win = sbspace(&so->so_rcv); -+ -+ /* -+ * Sender silly window avoidance. If connection is idle -+ * and can send all data, a maximum segment, -+ * at least a maximum default-size segment do it, -+ * or are forced, do it; otherwise don't bother. -+ * If peer's buffer is tiny, then send -+ * when window is at least half open. -+ * If retransmitting (possibly after persist timer forced us -+ * to send into a small window), then must resend. -+ */ -+ if (len) { -+ if (len == tp->t_maxseg) -+ goto send; -+ if ((1 || idle || tp->t_flags & TF_NODELAY) && -+ len + off >= so->so_snd.sb_cc) -+ goto send; -+ if (tp->t_force) -+ goto send; -+ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) -+ goto send; -+ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) -+ goto send; -+ } -+ -+ /* -+ * Compare available window to amount of window -+ * known to peer (as advertised window less -+ * next expected input). If the difference is at least two -+ * max size segments, or at least 50% of the maximum possible -+ * window, then want to send a window update to peer. -+ */ -+ if (win > 0) { -+ /* -+ * "adv" is the amount we can increase the window, -+ * taking into account that we are limited by -+ * TCP_MAXWIN << tp->rcv_scale. -+ */ -+ long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - -+ (tp->rcv_adv - tp->rcv_nxt); -+ -+ if (adv >= (long)(2 * tp->t_maxseg)) -+ goto send; -+ if (2 * adv >= (long)so->so_rcv.sb_datalen) -+ goto send; -+ } -+ -+ /* -+ * Send if we owe peer an ACK. -+ */ -+ if (tp->t_flags & TF_ACKNOW) -+ goto send; -+ if (flags & (TH_SYN | TH_RST)) -+ goto send; -+ if (SEQ_GT(tp->snd_up, tp->snd_una)) -+ goto send; -+ /* -+ * If our state indicates that FIN should be sent -+ * and we have not yet done so, or we're retransmitting the FIN, -+ * then we need to send. -+ */ -+ if (flags & TH_FIN && -+ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) -+ goto send; -+ -+ /* -+ * TCP window updates are not reliable, rather a polling protocol -+ * using ``persist'' packets is used to insure receipt of window -+ * updates. The three ``states'' for the output side are: -+ * idle not doing retransmits or persists -+ * persisting to move a small or zero window -+ * (re)transmitting and thereby not persisting -+ * -+ * tp->t_timer[TCPT_PERSIST] -+ * is set when we are in persist state. -+ * tp->t_force -+ * is set when we are called to send a persist packet. -+ * tp->t_timer[TCPT_REXMT] -+ * is set when we are retransmitting -+ * The output side is idle when both timers are zero. -+ * -+ * If send window is too small, there is data to transmit, and no -+ * retransmit or persist is pending, then go to persist state. -+ * If nothing happens soon, send when timer expires: -+ * if window is nonzero, transmit what we can, -+ * otherwise force out a byte. -+ */ -+ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && -+ tp->t_timer[TCPT_PERSIST] == 0) { -+ tp->t_rxtshift = 0; -+ tcp_setpersist(tp); -+ } -+ -+ /* -+ * No reason to send a segment, just return. -+ */ -+ return (0); -+ -+send: -+ /* -+ * Before ESTABLISHED, force sending of initial options -+ * unless TCP set not to do any options. -+ * NOTE: we assume that the IP/TCP header plus TCP options -+ * always fit in a single mbuf, leaving room for a maximum -+ * link header, i.e. -+ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN -+ */ -+ optlen = 0; -+ hdrlen = sizeof(struct tcpiphdr); -+ if (flags & TH_SYN) { -+ tp->snd_nxt = tp->iss; -+ if ((tp->t_flags & TF_NOOPT) == 0) { -+ uint16_t mss; -+ -+ opt[0] = TCPOPT_MAXSEG; -+ opt[1] = 4; -+ mss = htons((uint16_t)tcp_mss(tp, 0)); -+ memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); -+ optlen = 4; -+ } -+ } -+ -+ hdrlen += optlen; -+ -+ /* -+ * Adjust data length if insertion of options will -+ * bump the packet length beyond the t_maxseg length. -+ */ -+ if (len > tp->t_maxseg - optlen) { -+ len = tp->t_maxseg - optlen; -+ sendalot = 1; -+ } -+ -+ /* -+ * Grab a header mbuf, attaching a copy of data to -+ * be transmitted, and initialize the header from -+ * the template for sends on this connection. -+ */ -+ if (len) { -+ m = m_get(so->slirp); -+ if (m == NULL) { -+ error = 1; -+ goto out; -+ } -+ m->m_data += IF_MAXLINKHDR; -+ m->m_len = hdrlen; -+ -+ sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen); -+ m->m_len += len; -+ -+ /* -+ * If we're sending everything we've got, set PUSH. -+ * (This will keep happy those implementations which only -+ * give data to the user when a buffer fills or -+ * a PUSH comes in.) -+ */ -+ if (off + len == so->so_snd.sb_cc) -+ flags |= TH_PUSH; -+ } else { -+ m = m_get(so->slirp); -+ if (m == NULL) { -+ error = 1; -+ goto out; -+ } -+ m->m_data += IF_MAXLINKHDR; -+ m->m_len = hdrlen; -+ } -+ -+ ti = mtod(m, struct tcpiphdr *); -+ -+ memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr)); -+ -+ /* -+ * Fill in fields, remembering maximum advertised -+ * window for use in delaying messages about window sizes. -+ * If resending a FIN, be sure not to use a new sequence number. -+ */ -+ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && -+ tp->snd_nxt == tp->snd_max) -+ tp->snd_nxt--; -+ /* -+ * If we are doing retransmissions, then snd_nxt will -+ * not reflect the first unsent octet. For ACK only -+ * packets, we do not want the sequence number of the -+ * retransmitted packet, we want the sequence number -+ * of the next unsent octet. So, if there is no data -+ * (and no SYN or FIN), use snd_max instead of snd_nxt -+ * when filling in ti_seq. But if we are in persist -+ * state, snd_max might reflect one byte beyond the -+ * right edge of the window, so use snd_nxt in that -+ * case, since we know we aren't doing a retransmission. -+ * (retransmit and persist are mutually exclusive...) -+ */ -+ if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST]) -+ ti->ti_seq = htonl(tp->snd_nxt); -+ else -+ ti->ti_seq = htonl(tp->snd_max); -+ ti->ti_ack = htonl(tp->rcv_nxt); -+ if (optlen) { -+ memcpy((char *)(ti + 1), (char *)opt, optlen); -+ ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2; -+ } -+ ti->ti_flags = flags; -+ /* -+ * Calculate receive window. Don't shrink window, -+ * but avoid silly window syndrome. -+ */ -+ if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) -+ win = 0; -+ if (win > (long)TCP_MAXWIN << tp->rcv_scale) -+ win = (long)TCP_MAXWIN << tp->rcv_scale; -+ if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) -+ win = (long)(tp->rcv_adv - tp->rcv_nxt); -+ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); -+ -+ if (SEQ_GT(tp->snd_up, tp->snd_una)) { -+ ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); -+ ti->ti_flags |= TH_URG; -+ } else -+ /* -+ * If no urgent pointer to send, then we pull -+ * the urgent pointer to the left edge of the send window -+ * so that it doesn't drift into the send window on sequence -+ * number wraparound. -+ */ -+ tp->snd_up = tp->snd_una; /* drag it along */ -+ -+ /* -+ * Put TCP length in extended header, and then -+ * checksum extended header and data. -+ */ -+ if (len + optlen) -+ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len)); -+ ti->ti_sum = cksum(m, (int)(hdrlen + len)); -+ -+ /* -+ * In transmit state, time the transmission and arrange for -+ * the retransmit. In persist state, just set snd_max. -+ */ -+ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { -+ tcp_seq startseq = tp->snd_nxt; -+ -+ /* -+ * Advance snd_nxt over sequence space of this segment. -+ */ -+ if (flags & (TH_SYN | TH_FIN)) { -+ if (flags & TH_SYN) -+ tp->snd_nxt++; -+ if (flags & TH_FIN) { -+ tp->snd_nxt++; -+ tp->t_flags |= TF_SENTFIN; -+ } -+ } -+ tp->snd_nxt += len; -+ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { -+ tp->snd_max = tp->snd_nxt; -+ /* -+ * Time this transmission if not a retransmission and -+ * not currently timing anything. -+ */ -+ if (tp->t_rtt == 0) { -+ tp->t_rtt = 1; -+ tp->t_rtseq = startseq; -+ } -+ } -+ -+ /* -+ * Set retransmit timer if not currently set, -+ * and not doing an ack or a keep-alive probe. -+ * Initial value for retransmit timer is smoothed -+ * round-trip time + 2 * round-trip time variance. -+ * Initialize shift counter which is used for backoff -+ * of retransmit time. -+ */ -+ if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { -+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; -+ if (tp->t_timer[TCPT_PERSIST]) { -+ tp->t_timer[TCPT_PERSIST] = 0; -+ tp->t_rxtshift = 0; -+ } -+ } -+ } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) -+ tp->snd_max = tp->snd_nxt + len; -+ -+ /* -+ * Fill in IP length and desired time to live and -+ * send to IP level. There should be a better way -+ * to handle ttl and tos; we could keep them in -+ * the template, but need a way to checksum without them. -+ */ -+ m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ -+ tcpiph_save = *mtod(m, struct tcpiphdr *); -+ -+ switch (so->so_ffamily) { -+ case AF_INET: -+ m->m_data += -+ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); -+ m->m_len -= -+ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); -+ ip = mtod(m, struct ip *); -+ -+ ip->ip_len = m->m_len; -+ ip->ip_dst = tcpiph_save.ti_dst; -+ ip->ip_src = tcpiph_save.ti_src; -+ ip->ip_p = tcpiph_save.ti_pr; -+ -+ ip->ip_ttl = IPDEFTTL; -+ ip->ip_tos = so->so_iptos; -+ error = ip_output(so, m); -+ break; -+ -+ case AF_INET6: -+ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - -+ sizeof(struct ip6); -+ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - -+ sizeof(struct ip6); -+ ip6 = mtod(m, struct ip6 *); -+ -+ ip6->ip_pl = tcpiph_save.ti_len; -+ ip6->ip_dst = tcpiph_save.ti_dst6; -+ ip6->ip_src = tcpiph_save.ti_src6; -+ ip6->ip_nh = tcpiph_save.ti_nh6; -+ -+ error = ip6_output(so, m, 0); -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ } -+ -+ if (error) { -+ out: -+ return (error); -+ } -+ -+ /* -+ * Data sent (as far as we can tell). -+ * If this advertises a larger window than any other segment, -+ * then remember the size of the advertised window. -+ * Any pending ACK has now been sent. -+ */ -+ if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv)) -+ tp->rcv_adv = tp->rcv_nxt + win; -+ tp->last_ack_sent = tp->rcv_nxt; -+ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); -+ if (sendalot) -+ goto again; -+ -+ return (0); -+} -+ -+void tcp_setpersist(struct tcpcb *tp) -+{ -+ int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; -+ -+ /* -+ * Start/restart persistence timer. -+ */ -+ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift], -+ TCPTV_PERSMIN, TCPTV_PERSMAX); -+ if (tp->t_rxtshift < TCP_MAXRXTSHIFT) -+ tp->t_rxtshift++; -+} -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -new file mode 100644 -index 0000000..d6dd133 ---- /dev/null -+++ b/slirp/src/tcp_subr.c -@@ -0,0 +1,975 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 -+ * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+/* patchable/settable parameters for tcp */ -+/* Don't do rfc1323 performance enhancements */ -+#define TCP_DO_RFC1323 0 -+ -+/* -+ * Tcp initialization -+ */ -+void tcp_init(Slirp *slirp) -+{ -+ slirp->tcp_iss = 1; /* wrong */ -+ slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; -+ slirp->tcp_last_so = &slirp->tcb; -+} -+ -+void tcp_cleanup(Slirp *slirp) -+{ -+ while (slirp->tcb.so_next != &slirp->tcb) { -+ tcp_close(sototcpcb(slirp->tcb.so_next)); -+ } -+} -+ -+/* -+ * Create template to be used to send tcp packets on a connection. -+ * Call after host entry created, fills -+ * in a skeletal tcp/ip header, minimizing the amount of work -+ * necessary when the connection is used. -+ */ -+void tcp_template(struct tcpcb *tp) -+{ -+ struct socket *so = tp->t_socket; -+ register struct tcpiphdr *n = &tp->t_template; -+ -+ n->ti_mbuf = NULL; -+ memset(&n->ti, 0, sizeof(n->ti)); -+ n->ti_x0 = 0; -+ switch (so->so_ffamily) { -+ case AF_INET: -+ n->ti_pr = IPPROTO_TCP; -+ n->ti_len = htons(sizeof(struct tcphdr)); -+ n->ti_src = so->so_faddr; -+ n->ti_dst = so->so_laddr; -+ n->ti_sport = so->so_fport; -+ n->ti_dport = so->so_lport; -+ break; -+ -+ case AF_INET6: -+ n->ti_nh6 = IPPROTO_TCP; -+ n->ti_len = htons(sizeof(struct tcphdr)); -+ n->ti_src6 = so->so_faddr6; -+ n->ti_dst6 = so->so_laddr6; -+ n->ti_sport = so->so_fport6; -+ n->ti_dport = so->so_lport6; -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ } -+ -+ n->ti_seq = 0; -+ n->ti_ack = 0; -+ n->ti_x2 = 0; -+ n->ti_off = 5; -+ n->ti_flags = 0; -+ n->ti_win = 0; -+ n->ti_sum = 0; -+ n->ti_urp = 0; -+} -+ -+/* -+ * Send a single message to the TCP at address specified by -+ * the given TCP/IP header. If m == 0, then we make a copy -+ * of the tcpiphdr at ti and send directly to the addressed host. -+ * This is used to force keep alive messages out using the TCP -+ * template for a connection tp->t_template. If flags are given -+ * then we send a message back to the TCP which originated the -+ * segment ti, and discard the mbuf containing it and any other -+ * attached mbufs. -+ * -+ * In any case the ack and sequence number of the transmitted -+ * segment are as specified by the parameters. -+ */ -+void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, -+ tcp_seq ack, tcp_seq seq, int flags, unsigned short af) -+{ -+ register int tlen; -+ int win = 0; -+ -+ DEBUG_CALL("tcp_respond"); -+ DEBUG_ARG("tp = %p", tp); -+ DEBUG_ARG("ti = %p", ti); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("ack = %u", ack); -+ DEBUG_ARG("seq = %u", seq); -+ DEBUG_ARG("flags = %x", flags); -+ -+ if (tp) -+ win = sbspace(&tp->t_socket->so_rcv); -+ if (m == NULL) { -+ if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) -+ return; -+ tlen = 0; -+ m->m_data += IF_MAXLINKHDR; -+ *mtod(m, struct tcpiphdr *) = *ti; -+ ti = mtod(m, struct tcpiphdr *); -+ switch (af) { -+ case AF_INET: -+ ti->ti.ti_i4.ih_x1 = 0; -+ break; -+ case AF_INET6: -+ ti->ti.ti_i6.ih_x1 = 0; -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ flags = TH_ACK; -+ } else { -+ /* -+ * ti points into m so the next line is just making -+ * the mbuf point to ti -+ */ -+ m->m_data = (char *)ti; -+ -+ m->m_len = sizeof(struct tcpiphdr); -+ tlen = 0; -+#define xchg(a, b, type) \ -+ { \ -+ type t; \ -+ t = a; \ -+ a = b; \ -+ b = t; \ -+ } -+ switch (af) { -+ case AF_INET: -+ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); -+ xchg(ti->ti_dport, ti->ti_sport, uint16_t); -+ break; -+ case AF_INET6: -+ xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); -+ xchg(ti->ti_dport, ti->ti_sport, uint16_t); -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+#undef xchg -+ } -+ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + tlen)); -+ tlen += sizeof(struct tcpiphdr); -+ m->m_len = tlen; -+ -+ ti->ti_mbuf = NULL; -+ ti->ti_x0 = 0; -+ ti->ti_seq = htonl(seq); -+ ti->ti_ack = htonl(ack); -+ ti->ti_x2 = 0; -+ ti->ti_off = sizeof(struct tcphdr) >> 2; -+ ti->ti_flags = flags; -+ if (tp) -+ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); -+ else -+ ti->ti_win = htons((uint16_t)win); -+ ti->ti_urp = 0; -+ ti->ti_sum = 0; -+ ti->ti_sum = cksum(m, tlen); -+ -+ struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); -+ struct ip *ip; -+ struct ip6 *ip6; -+ -+ switch (af) { -+ case AF_INET: -+ m->m_data += -+ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); -+ m->m_len -= -+ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); -+ ip = mtod(m, struct ip *); -+ ip->ip_len = m->m_len; -+ ip->ip_dst = tcpiph_save.ti_dst; -+ ip->ip_src = tcpiph_save.ti_src; -+ ip->ip_p = tcpiph_save.ti_pr; -+ -+ if (flags & TH_RST) { -+ ip->ip_ttl = MAXTTL; -+ } else { -+ ip->ip_ttl = IPDEFTTL; -+ } -+ -+ ip_output(NULL, m); -+ break; -+ -+ case AF_INET6: -+ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - -+ sizeof(struct ip6); -+ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - -+ sizeof(struct ip6); -+ ip6 = mtod(m, struct ip6 *); -+ ip6->ip_pl = tcpiph_save.ti_len; -+ ip6->ip_dst = tcpiph_save.ti_dst6; -+ ip6->ip_src = tcpiph_save.ti_src6; -+ ip6->ip_nh = tcpiph_save.ti_nh6; -+ -+ ip6_output(NULL, m, 0); -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ } -+} -+ -+/* -+ * Create a new TCP control block, making an -+ * empty reassembly queue and hooking it to the argument -+ * protocol control block. -+ */ -+struct tcpcb *tcp_newtcpcb(struct socket *so) -+{ -+ register struct tcpcb *tp; -+ -+ tp = (struct tcpcb *)malloc(sizeof(*tp)); -+ if (tp == NULL) -+ return ((struct tcpcb *)0); -+ -+ memset((char *)tp, 0, sizeof(struct tcpcb)); -+ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; -+ tp->t_maxseg = (so->so_ffamily == AF_INET) ? TCP_MSS : TCP6_MSS; -+ -+ tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE | TF_REQ_TSTMP) : 0; -+ tp->t_socket = so; -+ -+ /* -+ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no -+ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives -+ * reasonable initial retransmit time. -+ */ -+ tp->t_srtt = TCPTV_SRTTBASE; -+ tp->t_rttvar = TCPTV_SRTTDFLT << 2; -+ tp->t_rttmin = TCPTV_MIN; -+ -+ TCPT_RANGESET(tp->t_rxtcur, -+ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, -+ TCPTV_MIN, TCPTV_REXMTMAX); -+ -+ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; -+ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; -+ tp->t_state = TCPS_CLOSED; -+ -+ so->so_tcpcb = tp; -+ -+ return (tp); -+} -+ -+/* -+ * Drop a TCP connection, reporting -+ * the specified error. If connection is synchronized, -+ * then send a RST to peer. -+ */ -+struct tcpcb *tcp_drop(struct tcpcb *tp, int err) -+{ -+ DEBUG_CALL("tcp_drop"); -+ DEBUG_ARG("tp = %p", tp); -+ DEBUG_ARG("errno = %d", errno); -+ -+ if (TCPS_HAVERCVDSYN(tp->t_state)) { -+ tp->t_state = TCPS_CLOSED; -+ (void)tcp_output(tp); -+ } -+ return (tcp_close(tp)); -+} -+ -+/* -+ * Close a TCP control block: -+ * discard all space held by the tcp -+ * discard internet protocol block -+ * wake up any sleepers -+ */ -+struct tcpcb *tcp_close(struct tcpcb *tp) -+{ -+ register struct tcpiphdr *t; -+ struct socket *so = tp->t_socket; -+ Slirp *slirp = so->slirp; -+ register struct mbuf *m; -+ -+ DEBUG_CALL("tcp_close"); -+ DEBUG_ARG("tp = %p", tp); -+ -+ /* free the reassembly queue, if any */ -+ t = tcpfrag_list_first(tp); -+ while (!tcpfrag_list_end(t, tp)) { -+ t = tcpiphdr_next(t); -+ m = tcpiphdr_prev(t)->ti_mbuf; -+ remque(tcpiphdr2qlink(tcpiphdr_prev(t))); -+ m_free(m); -+ } -+ free(tp); -+ so->so_tcpcb = NULL; -+ /* clobber input socket cache if we're closing the cached connection */ -+ if (so == slirp->tcp_last_so) -+ slirp->tcp_last_so = &slirp->tcb; -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ sbfree(&so->so_rcv); -+ sbfree(&so->so_snd); -+ sofree(so); -+ return ((struct tcpcb *)0); -+} -+ -+/* -+ * TCP protocol interface to socket abstraction. -+ */ -+ -+/* -+ * User issued close, and wish to trail through shutdown states: -+ * if never received SYN, just forget it. If got a SYN from peer, -+ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. -+ * If already got a FIN from peer, then almost done; go to LAST_ACK -+ * state. In all other cases, have already sent FIN to peer (e.g. -+ * after PRU_SHUTDOWN), and just have to play tedious game waiting -+ * for peer to send FIN or not respond to keep-alives, etc. -+ * We can let the user exit from the close as soon as the FIN is acked. -+ */ -+void tcp_sockclosed(struct tcpcb *tp) -+{ -+ DEBUG_CALL("tcp_sockclosed"); -+ DEBUG_ARG("tp = %p", tp); -+ -+ if (!tp) { -+ return; -+ } -+ -+ switch (tp->t_state) { -+ case TCPS_CLOSED: -+ case TCPS_LISTEN: -+ case TCPS_SYN_SENT: -+ tp->t_state = TCPS_CLOSED; -+ tp = tcp_close(tp); -+ break; -+ -+ case TCPS_SYN_RECEIVED: -+ case TCPS_ESTABLISHED: -+ tp->t_state = TCPS_FIN_WAIT_1; -+ break; -+ -+ case TCPS_CLOSE_WAIT: -+ tp->t_state = TCPS_LAST_ACK; -+ break; -+ } -+ tcp_output(tp); -+} -+ -+/* -+ * Connect to a host on the Internet -+ * Called by tcp_input -+ * Only do a connect, the tcp fields will be set in tcp_input -+ * return 0 if there's a result of the connect, -+ * else return -1 means we're still connecting -+ * The return value is almost always -1 since the socket is -+ * nonblocking. Connect returns after the SYN is sent, and does -+ * not wait for ACK+SYN. -+ */ -+int tcp_fconnect(struct socket *so, unsigned short af) -+{ -+ int ret = 0; -+ -+ DEBUG_CALL("tcp_fconnect"); -+ DEBUG_ARG("so = %p", so); -+ -+ ret = so->s = slirp_socket(af, SOCK_STREAM, 0); -+ if (ret >= 0) { -+ int opt, s = so->s; -+ struct sockaddr_storage addr; -+ -+ slirp_set_nonblock(s); -+ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); -+ slirp_socket_set_fast_reuse(s); -+ opt = 1; -+ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); -+ opt = 1; -+ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); -+ -+ addr = so->fhost.ss; -+ DEBUG_CALL(" connect()ing"); -+ sotranslate_out(so, &addr); -+ -+ /* We don't care what port we get */ -+ ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); -+ -+ /* -+ * If it's not in progress, it failed, so we just return 0, -+ * without clearing SS_NOFDREF -+ */ -+ soisfconnecting(so); -+ } -+ -+ return (ret); -+} -+ -+/* -+ * Accept the socket and connect to the local-host -+ * -+ * We have a problem. The correct thing to do would be -+ * to first connect to the local-host, and only if the -+ * connection is accepted, then do an accept() here. -+ * But, a) we need to know who's trying to connect -+ * to the socket to be able to SYN the local-host, and -+ * b) we are already connected to the foreign host by -+ * the time it gets to accept(), so... We simply accept -+ * here and SYN the local-host. -+ */ -+void tcp_connect(struct socket *inso) -+{ -+ Slirp *slirp = inso->slirp; -+ struct socket *so; -+ struct sockaddr_storage addr; -+ socklen_t addrlen = sizeof(struct sockaddr_storage); -+ struct tcpcb *tp; -+ int s, opt; -+ -+ DEBUG_CALL("tcp_connect"); -+ DEBUG_ARG("inso = %p", inso); -+ -+ /* -+ * If it's an SS_ACCEPTONCE socket, no need to socreate() -+ * another socket, just use the accept() socket. -+ */ -+ if (inso->so_state & SS_FACCEPTONCE) { -+ /* FACCEPTONCE already have a tcpcb */ -+ so = inso; -+ } else { -+ so = socreate(slirp); -+ if (tcp_attach(so) < 0) { -+ g_free(so); /* NOT sofree */ -+ return; -+ } -+ so->lhost = inso->lhost; -+ so->so_ffamily = inso->so_ffamily; -+ } -+ -+ tcp_mss(sototcpcb(so), 0); -+ -+ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); -+ if (s < 0) { -+ tcp_close(sototcpcb(so)); /* This will sofree() as well */ -+ return; -+ } -+ slirp_set_nonblock(s); -+ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); -+ slirp_socket_set_fast_reuse(s); -+ opt = 1; -+ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); -+ slirp_socket_set_nodelay(s); -+ -+ so->fhost.ss = addr; -+ sotranslate_accept(so); -+ -+ /* Close the accept() socket, set right state */ -+ if (inso->so_state & SS_FACCEPTONCE) { -+ /* If we only accept once, close the accept() socket */ -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ -+ /* Don't select it yet, even though we have an FD */ -+ /* if it's not FACCEPTONCE, it's already NOFDREF */ -+ so->so_state = SS_NOFDREF; -+ } -+ so->s = s; -+ so->so_state |= SS_INCOMING; -+ -+ so->so_iptos = tcp_tos(so); -+ tp = sototcpcb(so); -+ -+ tcp_template(tp); -+ -+ tp->t_state = TCPS_SYN_SENT; -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; -+ tp->iss = slirp->tcp_iss; -+ slirp->tcp_iss += TCP_ISSINCR / 2; -+ tcp_sendseqinit(tp); -+ tcp_output(tp); -+} -+ -+/* -+ * Attach a TCPCB to a socket. -+ */ -+int tcp_attach(struct socket *so) -+{ -+ if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) -+ return -1; -+ -+ insque(so, &so->slirp->tcb); -+ -+ return 0; -+} -+ -+/* -+ * Set the socket's type of service field -+ */ -+static const struct tos_t tcptos[] = { -+ { 0, 20, IPTOS_THROUGHPUT, 0 }, /* ftp data */ -+ { 21, 21, IPTOS_LOWDELAY, EMU_FTP }, /* ftp control */ -+ { 0, 23, IPTOS_LOWDELAY, 0 }, /* telnet */ -+ { 0, 80, IPTOS_THROUGHPUT, 0 }, /* WWW */ -+ { 0, 513, IPTOS_LOWDELAY, EMU_RLOGIN | EMU_NOCONNECT }, /* rlogin */ -+ { 0, 544, IPTOS_LOWDELAY, EMU_KSH }, /* kshell */ -+ { 0, 543, IPTOS_LOWDELAY, 0 }, /* klogin */ -+ { 0, 6667, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC */ -+ { 0, 6668, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC undernet */ -+ { 0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ -+ { 0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ -+ { 0, 0, 0, 0 } -+}; -+ -+static struct emu_t *tcpemu = NULL; -+ -+/* -+ * Return TOS according to the above table -+ */ -+uint8_t tcp_tos(struct socket *so) -+{ -+ int i = 0; -+ struct emu_t *emup; -+ -+ while (tcptos[i].tos) { -+ if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || -+ (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { -+ so->so_emu = tcptos[i].emu; -+ return tcptos[i].tos; -+ } -+ i++; -+ } -+ -+ /* Nope, lets see if there's a user-added one */ -+ for (emup = tcpemu; emup; emup = emup->next) { -+ if ((emup->fport && (ntohs(so->so_fport) == emup->fport)) || -+ (emup->lport && (ntohs(so->so_lport) == emup->lport))) { -+ so->so_emu = emup->emu; -+ return emup->tos; -+ } -+ } -+ -+ return 0; -+} -+ -+/* -+ * Emulate programs that try and connect to us -+ * This includes ftp (the data connection is -+ * initiated by the server) and IRC (DCC CHAT and -+ * DCC SEND) for now -+ * -+ * NOTE: It's possible to crash SLiRP by sending it -+ * unstandard strings to emulate... if this is a problem, -+ * more checks are needed here -+ * -+ * XXX Assumes the whole command came in one packet -+ * -+ * XXX Some ftp clients will have their TOS set to -+ * LOWDELAY and so Nagel will kick in. Because of this, -+ * we'll get the first letter, followed by the rest, so -+ * we simply scan for ORT instead of PORT... -+ * DCC doesn't have this problem because there's other stuff -+ * in the packet before the DCC command. -+ * -+ * Return 1 if the mbuf m is still valid and should be -+ * sbappend()ed -+ * -+ * NOTE: if you return 0 you MUST m_free() the mbuf! -+ */ -+int tcp_emu(struct socket *so, struct mbuf *m) -+{ -+ Slirp *slirp = so->slirp; -+ unsigned n1, n2, n3, n4, n5, n6; -+ char buff[257]; -+ uint32_t laddr; -+ unsigned lport; -+ char *bptr; -+ -+ DEBUG_CALL("tcp_emu"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ -+ switch (so->so_emu) { -+ int x, i; -+ -+ /* TODO: IPv6 */ -+ case EMU_IDENT: -+ /* -+ * Identification protocol as per rfc-1413 -+ */ -+ -+ { -+ struct socket *tmpso; -+ struct sockaddr_in addr; -+ socklen_t addrlen = sizeof(struct sockaddr_in); -+ char *eol = g_strstr_len(m->m_data, m->m_len, "\r\n"); -+ -+ if (!eol) { -+ return 1; -+ } -+ -+ *eol = '\0'; -+ if (sscanf(m->m_data, "%u%*[ ,]%u", &n1, &n2) == 2) { -+ HTONS(n1); -+ HTONS(n2); -+ /* n2 is the one on our host */ -+ for (tmpso = slirp->tcb.so_next; tmpso != &slirp->tcb; -+ tmpso = tmpso->so_next) { -+ if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && -+ tmpso->so_lport == n2 && -+ tmpso->so_faddr.s_addr == so->so_faddr.s_addr && -+ tmpso->so_fport == n1) { -+ if (getsockname(tmpso->s, (struct sockaddr *)&addr, -+ &addrlen) == 0) -+ n2 = addr.sin_port; -+ break; -+ } -+ } -+ NTOHS(n1); -+ NTOHS(n2); -+ m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); -+ m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); -+ assert(m->m_len < M_ROOM(m)); -+ } else { -+ *eol = '\r'; -+ } -+ -+ return 1; -+ } -+ -+ case EMU_FTP: /* ftp */ -+ m_inc(m, m->m_len + 1); -+ *(m->m_data + m->m_len) = 0; /* NUL terminate for strstr */ -+ if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { -+ /* -+ * Need to emulate the PORT command -+ */ -+ x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", &n1, &n2, -+ &n3, &n4, &n5, &n6, buff); -+ if (x < 6) -+ return 1; -+ -+ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); -+ lport = htons((n5 << 8) | (n6)); -+ -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, -+ SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ n6 = ntohs(so->so_fport); -+ -+ n5 = (n6 >> 8) & 0xff; -+ n6 &= 0xff; -+ -+ laddr = ntohl(so->so_faddr.s_addr); -+ -+ n1 = ((laddr >> 24) & 0xff); -+ n2 = ((laddr >> 16) & 0xff); -+ n3 = ((laddr >> 8) & 0xff); -+ n4 = (laddr & 0xff); -+ -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += snprintf(bptr, m->m_size - m->m_len, -+ "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, -+ n5, n6, x == 7 ? buff : ""); -+ return 1; -+ } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { -+ /* -+ * Need to emulate the PASV response -+ */ -+ x = sscanf( -+ bptr, -+ "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", -+ &n1, &n2, &n3, &n4, &n5, &n6, buff); -+ if (x < 6) -+ return 1; -+ -+ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); -+ lport = htons((n5 << 8) | (n6)); -+ -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, -+ SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ n6 = ntohs(so->so_fport); -+ -+ n5 = (n6 >> 8) & 0xff; -+ n6 &= 0xff; -+ -+ laddr = ntohl(so->so_faddr.s_addr); -+ -+ n1 = ((laddr >> 24) & 0xff); -+ n2 = ((laddr >> 16) & 0xff); -+ n3 = ((laddr >> 8) & 0xff); -+ n4 = (laddr & 0xff); -+ -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += -+ snprintf(bptr, m->m_size - m->m_len, -+ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", -+ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); -+ -+ return 1; -+ } -+ -+ return 1; -+ -+ case EMU_KSH: -+ /* -+ * The kshell (Kerberos rsh) and shell services both pass -+ * a local port port number to carry signals to the server -+ * and stderr to the client. It is passed at the beginning -+ * of the connection as a NUL-terminated decimal ASCII string. -+ */ -+ so->so_emu = 0; -+ for (lport = 0, i = 0; i < m->m_len - 1; ++i) { -+ if (m->m_data[i] < '0' || m->m_data[i] > '9') -+ return 1; /* invalid number */ -+ lport *= 10; -+ lport += m->m_data[i] - '0'; -+ } -+ if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && -+ (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, -+ htons(lport), SS_FACCEPTONCE)) != NULL) -+ m->m_len = -+ snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; -+ return 1; -+ -+ case EMU_IRC: -+ /* -+ * Need to emulate DCC CHAT, DCC SEND and DCC MOVE -+ */ -+ m_inc(m, m->m_len + 1); -+ *(m->m_data + m->m_len) = 0; /* NULL terminate the string for strstr */ -+ if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) -+ return 1; -+ -+ /* The %256s is for the broken mIRC */ -+ if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -+ htons(lport), SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), 1); -+ } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, -+ &n1) == 4) { -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -+ htons(lport), SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += -+ snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); -+ } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, -+ &n1) == 4) { -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -+ htons(lport), SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += -+ snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); -+ } -+ return 1; -+ -+ case EMU_REALAUDIO: -+ /* -+ * RealAudio emulation - JP. We must try to parse the incoming -+ * data and try to find the two characters that contain the -+ * port number. Then we redirect an udp port and replace the -+ * number with the real port we got. -+ * -+ * The 1.0 beta versions of the player are not supported -+ * any more. -+ * -+ * A typical packet for player version 1.0 (release version): -+ * -+ * 0000:50 4E 41 00 05 -+ * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P -+ * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH -+ * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v -+ * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB -+ * -+ * Now the port number 0x1BD7 is found at offset 0x04 of the -+ * Now the port number 0x1BD7 is found at offset 0x04 of the -+ * second packet. This time we received five bytes first and -+ * then the rest. You never know how many bytes you get. -+ * -+ * A typical packet for player version 2.0 (beta): -+ * -+ * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. -+ * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 -+ * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ -+ * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas -+ * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B -+ * -+ * Port number 0x1BC1 is found at offset 0x0d. -+ * -+ * This is just a horrible switch statement. Variable ra tells -+ * us where we're going. -+ */ -+ -+ bptr = m->m_data; -+ while (bptr < m->m_data + m->m_len) { -+ uint16_t p; -+ static int ra = 0; -+ char ra_tbl[4]; -+ -+ ra_tbl[0] = 0x50; -+ ra_tbl[1] = 0x4e; -+ ra_tbl[2] = 0x41; -+ ra_tbl[3] = 0; -+ -+ switch (ra) { -+ case 0: -+ case 2: -+ case 3: -+ if (*bptr++ != ra_tbl[ra]) { -+ ra = 0; -+ continue; -+ } -+ break; -+ -+ case 1: -+ /* -+ * We may get 0x50 several times, ignore them -+ */ -+ if (*bptr == 0x50) { -+ ra = 1; -+ bptr++; -+ continue; -+ } else if (*bptr++ != ra_tbl[ra]) { -+ ra = 0; -+ continue; -+ } -+ break; -+ -+ case 4: -+ /* -+ * skip version number -+ */ -+ bptr++; -+ break; -+ -+ case 5: -+ /* -+ * The difference between versions 1.0 and -+ * 2.0 is here. For future versions of -+ * the player this may need to be modified. -+ */ -+ if (*(bptr + 1) == 0x02) -+ bptr += 8; -+ else -+ bptr += 4; -+ break; -+ -+ case 6: -+ /* This is the field containing the port -+ * number that RA-player is listening to. -+ */ -+ lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; -+ if (lport < 6970) -+ lport += 256; /* don't know why */ -+ if (lport < 6970 || lport > 7170) -+ return 1; /* failed */ -+ -+ /* try to get udp port between 6970 - 7170 */ -+ for (p = 6970; p < 7071; p++) { -+ if (udp_listen(slirp, INADDR_ANY, htons(p), -+ so->so_laddr.s_addr, htons(lport), -+ SS_FACCEPTONCE)) { -+ break; -+ } -+ } -+ if (p == 7071) -+ p = 0; -+ *(uint8_t *)bptr++ = (p >> 8) & 0xff; -+ *(uint8_t *)bptr = p & 0xff; -+ ra = 0; -+ return 1; /* port redirected, we're done */ -+ break; -+ -+ default: -+ ra = 0; -+ } -+ ra++; -+ } -+ return 1; -+ -+ default: -+ /* Ooops, not emulated, won't call tcp_emu again */ -+ so->so_emu = 0; -+ return 1; -+ } -+} -+ -+/* -+ * Do misc. config of SLiRP while its running. -+ * Return 0 if this connections is to be closed, 1 otherwise, -+ * return 2 if this is a command-line connection -+ */ -+int tcp_ctl(struct socket *so) -+{ -+ Slirp *slirp = so->slirp; -+ struct sbuf *sb = &so->so_snd; -+ struct gfwd_list *ex_ptr; -+ -+ DEBUG_CALL("tcp_ctl"); -+ DEBUG_ARG("so = %p", so); -+ -+ /* TODO: IPv6 */ -+ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { -+ /* Check if it's pty_exec */ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->ex_fport == so->so_fport && -+ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { -+ if (ex_ptr->write_cb) { -+ so->s = -1; -+ so->guestfwd = ex_ptr; -+ return 1; -+ } -+ DEBUG_MISC(" executing %s", ex_ptr->ex_exec); -+ return fork_exec(so, ex_ptr->ex_exec); -+ } -+ } -+ } -+ sb->sb_cc = -+ snprintf(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), -+ "Error: No application configured.\r\n"); -+ sb->sb_wptr += sb->sb_cc; -+ return 0; -+} -diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c -new file mode 100644 -index 0000000..102023e ---- /dev/null -+++ b/slirp/src/tcp_timer.c -@@ -0,0 +1,286 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 -+ * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp -+ */ -+ -+#include "slirp.h" -+ -+static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); -+ -+/* -+ * Fast timeout routine for processing delayed acks -+ */ -+void tcp_fasttimo(Slirp *slirp) -+{ -+ register struct socket *so; -+ register struct tcpcb *tp; -+ -+ DEBUG_CALL("tcp_fasttimo"); -+ -+ so = slirp->tcb.so_next; -+ if (so) -+ for (; so != &slirp->tcb; so = so->so_next) -+ if ((tp = (struct tcpcb *)so->so_tcpcb) && -+ (tp->t_flags & TF_DELACK)) { -+ tp->t_flags &= ~TF_DELACK; -+ tp->t_flags |= TF_ACKNOW; -+ (void)tcp_output(tp); -+ } -+} -+ -+/* -+ * Tcp protocol timeout routine called every 500 ms. -+ * Updates the timers in all active tcb's and -+ * causes finite state machine actions if timers expire. -+ */ -+void tcp_slowtimo(Slirp *slirp) -+{ -+ register struct socket *ip, *ipnxt; -+ register struct tcpcb *tp; -+ register int i; -+ -+ DEBUG_CALL("tcp_slowtimo"); -+ -+ /* -+ * Search through tcb's and update active timers. -+ */ -+ ip = slirp->tcb.so_next; -+ if (ip == NULL) { -+ return; -+ } -+ for (; ip != &slirp->tcb; ip = ipnxt) { -+ ipnxt = ip->so_next; -+ tp = sototcpcb(ip); -+ if (tp == NULL) { -+ continue; -+ } -+ for (i = 0; i < TCPT_NTIMERS; i++) { -+ if (tp->t_timer[i] && --tp->t_timer[i] == 0) { -+ tcp_timers(tp, i); -+ if (ipnxt->so_prev != ip) -+ goto tpgone; -+ } -+ } -+ tp->t_idle++; -+ if (tp->t_rtt) -+ tp->t_rtt++; -+ tpgone:; -+ } -+ slirp->tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ -+ slirp->tcp_now++; /* for timestamps */ -+} -+ -+/* -+ * Cancel all timers for TCP tp. -+ */ -+void tcp_canceltimers(struct tcpcb *tp) -+{ -+ register int i; -+ -+ for (i = 0; i < TCPT_NTIMERS; i++) -+ tp->t_timer[i] = 0; -+} -+ -+const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, -+ 64, 64, 64, 64, 64, 64 }; -+ -+/* -+ * TCP timer processing. -+ */ -+static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer) -+{ -+ register int rexmt; -+ -+ DEBUG_CALL("tcp_timers"); -+ -+ switch (timer) { -+ /* -+ * 2 MSL timeout in shutdown went off. If we're closed but -+ * still waiting for peer to close and connection has been idle -+ * too long, or if 2MSL time is up from TIME_WAIT, delete connection -+ * control block. Otherwise, check again in a bit. -+ */ -+ case TCPT_2MSL: -+ if (tp->t_state != TCPS_TIME_WAIT && tp->t_idle <= TCP_MAXIDLE) -+ tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; -+ else -+ tp = tcp_close(tp); -+ break; -+ -+ /* -+ * Retransmission timer went off. Message has not -+ * been acked within retransmit interval. Back off -+ * to a longer retransmit interval and retransmit one segment. -+ */ -+ case TCPT_REXMT: -+ -+ /* -+ * XXXXX If a packet has timed out, then remove all the queued -+ * packets for that session. -+ */ -+ -+ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { -+ /* -+ * This is a hack to suit our terminal server here at the uni of -+ * canberra since they have trouble with zeroes... It usually lets -+ * them through unharmed, but under some conditions, it'll eat the -+ * zeros. If we keep retransmitting it, it'll keep eating the -+ * zeroes, so we keep retransmitting, and eventually the connection -+ * dies... (this only happens on incoming data) -+ * -+ * So, if we were gonna drop the connection from too many -+ * retransmits, don't... instead halve the t_maxseg, which might -+ * break up the NULLs and let them through -+ * -+ * *sigh* -+ */ -+ -+ tp->t_maxseg >>= 1; -+ if (tp->t_maxseg < 32) { -+ /* -+ * We tried our best, now the connection must die! -+ */ -+ tp->t_rxtshift = TCP_MAXRXTSHIFT; -+ tp = tcp_drop(tp, tp->t_softerror); -+ /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ -+ return (tp); /* XXX */ -+ } -+ -+ /* -+ * Set rxtshift to 6, which is still at the maximum -+ * backoff time -+ */ -+ tp->t_rxtshift = 6; -+ } -+ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; -+ TCPT_RANGESET(tp->t_rxtcur, rexmt, (short)tp->t_rttmin, -+ TCPTV_REXMTMAX); /* XXX */ -+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; -+ /* -+ * If losing, let the lower level know and try for -+ * a better route. Also, if we backed off this far, -+ * our srtt estimate is probably bogus. Clobber it -+ * so we'll take the next rtt measurement as our srtt; -+ * move the current srtt into rttvar to keep the current -+ * retransmit times until then. -+ */ -+ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { -+ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); -+ tp->t_srtt = 0; -+ } -+ tp->snd_nxt = tp->snd_una; -+ /* -+ * If timing a segment in this window, stop the timer. -+ */ -+ tp->t_rtt = 0; -+ /* -+ * Close the congestion window down to one segment -+ * (we'll open it by one segment for each ack we get). -+ * Since we probably have a window's worth of unacked -+ * data accumulated, this "slow start" keeps us from -+ * dumping all that data as back-to-back packets (which -+ * might overwhelm an intermediate gateway). -+ * -+ * There are two phases to the opening: Initially we -+ * open by one mss on each ack. This makes the window -+ * size increase exponentially with time. If the -+ * window is larger than the path can handle, this -+ * exponential growth results in dropped packet(s) -+ * almost immediately. To get more time between -+ * drops but still "push" the network to take advantage -+ * of improving conditions, we switch from exponential -+ * to linear window opening at some threshold size. -+ * For a threshold, we use half the current window -+ * size, truncated to a multiple of the mss. -+ * -+ * (the minimum cwnd that will give us exponential -+ * growth is 2 mss. We don't allow the threshold -+ * to go below this.) -+ */ -+ { -+ unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; -+ if (win < 2) -+ win = 2; -+ tp->snd_cwnd = tp->t_maxseg; -+ tp->snd_ssthresh = win * tp->t_maxseg; -+ tp->t_dupacks = 0; -+ } -+ (void)tcp_output(tp); -+ break; -+ -+ /* -+ * Persistence timer into zero window. -+ * Force a byte to be output, if possible. -+ */ -+ case TCPT_PERSIST: -+ tcp_setpersist(tp); -+ tp->t_force = 1; -+ (void)tcp_output(tp); -+ tp->t_force = 0; -+ break; -+ -+ /* -+ * Keep-alive timer went off; send something -+ * or drop connection if idle for too long. -+ */ -+ case TCPT_KEEP: -+ if (tp->t_state < TCPS_ESTABLISHED) -+ goto dropit; -+ -+ if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { -+ if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) -+ goto dropit; -+ /* -+ * Send a packet designed to force a response -+ * if the peer is up and reachable: -+ * either an ACK if the connection is still alive, -+ * or an RST if the peer has closed the connection -+ * due to timeout or reboot. -+ * Using sequence number tp->snd_una-1 -+ * causes the transmitted zero-length segment -+ * to lie outside the receive window; -+ * by the protocol spec, this requires the -+ * correspondent TCP to respond. -+ */ -+ tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, tp->rcv_nxt, -+ tp->snd_una - 1, 0, tp->t_socket->so_ffamily); -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; -+ } else -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; -+ break; -+ -+ dropit: -+ tp = tcp_drop(tp, 0); -+ break; -+ } -+ -+ return (tp); -+} -diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h -new file mode 100644 -index 0000000..584a559 ---- /dev/null -+++ b/slirp/src/tcp_timer.h -@@ -0,0 +1,130 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 -+ * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp -+ */ -+ -+#ifndef TCP_TIMER_H -+#define TCP_TIMER_H -+ -+/* -+ * Definitions of the TCP timers. These timers are counted -+ * down PR_SLOWHZ times a second. -+ */ -+#define TCPT_NTIMERS 4 -+ -+#define TCPT_REXMT 0 /* retransmit */ -+#define TCPT_PERSIST 1 /* retransmit persistence */ -+#define TCPT_KEEP 2 /* keep alive */ -+#define TCPT_2MSL 3 /* 2*msl quiet time timer */ -+ -+/* -+ * The TCPT_REXMT timer is used to force retransmissions. -+ * The TCP has the TCPT_REXMT timer set whenever segments -+ * have been sent for which ACKs are expected but not yet -+ * received. If an ACK is received which advances tp->snd_una, -+ * then the retransmit timer is cleared (if there are no more -+ * outstanding segments) or reset to the base value (if there -+ * are more ACKs expected). Whenever the retransmit timer goes off, -+ * we retransmit one unacknowledged segment, and do a backoff -+ * on the retransmit timer. -+ * -+ * The TCPT_PERSIST timer is used to keep window size information -+ * flowing even if the window goes shut. If all previous transmissions -+ * have been acknowledged (so that there are no retransmissions in progress), -+ * and the window is too small to bother sending anything, then we start -+ * the TCPT_PERSIST timer. When it expires, if the window is nonzero, -+ * we go to transmit state. Otherwise, at intervals send a single byte -+ * into the peer's window to force him to update our window information. -+ * We do this at most as often as TCPT_PERSMIN time intervals, -+ * but no more frequently than the current estimate of round-trip -+ * packet time. The TCPT_PERSIST timer is cleared whenever we receive -+ * a window update from the peer. -+ * -+ * The TCPT_KEEP timer is used to keep connections alive. If an -+ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, -+ * but not yet established, then we drop the connection. Once the connection -+ * is established, if the connection is idle for TCPTV_KEEP_IDLE time -+ * (and keepalives have been enabled on the socket), we begin to probe -+ * the connection. We force the peer to send us a segment by sending: -+ * -+ * This segment is (deliberately) outside the window, and should elicit -+ * an ack segment in response from the peer. If, despite the TCPT_KEEP -+ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE -+ * amount of time probing, then we drop the connection. -+ */ -+ -+/* -+ * Time constants. -+ */ -+#define TCPTV_MSL (5 * PR_SLOWHZ) /* max seg lifetime (hah!) */ -+ -+#define TCPTV_SRTTBASE \ -+ 0 /* base roundtrip time; \ -+ if 0, no idea yet */ -+#define TCPTV_SRTTDFLT (3 * PR_SLOWHZ) /* assumed RTT if no info */ -+ -+#define TCPTV_PERSMIN (5 * PR_SLOWHZ) /* retransmit persistence */ -+#define TCPTV_PERSMAX (60 * PR_SLOWHZ) /* maximum persist interval */ -+ -+#define TCPTV_KEEP_INIT (75 * PR_SLOWHZ) /* initial connect keep alive */ -+#define TCPTV_KEEP_IDLE (120 * 60 * PR_SLOWHZ) /* dflt time before probing */ -+#define TCPTV_KEEPINTVL (75 * PR_SLOWHZ) /* default probe interval */ -+#define TCPTV_KEEPCNT 8 /* max probes before drop */ -+ -+#define TCPTV_MIN (1 * PR_SLOWHZ) /* minimum allowable value */ -+#define TCPTV_REXMTMAX (12 * PR_SLOWHZ) /* max allowable REXMT value */ -+ -+#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ -+ -+#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ -+ -+ -+/* -+ * Force a time value to be in a certain range. -+ */ -+#define TCPT_RANGESET(tv, value, tvmin, tvmax) \ -+ { \ -+ (tv) = (value); \ -+ if ((tv) < (tvmin)) \ -+ (tv) = (tvmin); \ -+ else if ((tv) > (tvmax)) \ -+ (tv) = (tvmax); \ -+ } -+ -+extern const int tcp_backoff[]; -+ -+struct tcpcb; -+ -+void tcp_fasttimo(Slirp *); -+void tcp_slowtimo(Slirp *); -+void tcp_canceltimers(struct tcpcb *); -+ -+#endif -diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h -new file mode 100644 -index 0000000..c8da8cb ---- /dev/null -+++ b/slirp/src/tcp_var.h -@@ -0,0 +1,161 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993, 1994 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 -+ * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp -+ */ -+ -+#ifndef TCP_VAR_H -+#define TCP_VAR_H -+ -+#include "tcpip.h" -+#include "tcp_timer.h" -+ -+/* -+ * Tcp control block, one per tcp; fields: -+ */ -+struct tcpcb { -+ struct tcpiphdr *seg_next; /* sequencing queue */ -+ struct tcpiphdr *seg_prev; -+ short t_state; /* state of this connection */ -+ short t_timer[TCPT_NTIMERS]; /* tcp timers */ -+ short t_rxtshift; /* log(2) of rexmt exp. backoff */ -+ short t_rxtcur; /* current retransmit value */ -+ short t_dupacks; /* consecutive dup acks recd */ -+ uint16_t t_maxseg; /* maximum segment size */ -+ uint8_t t_force; /* 1 if forcing out a byte */ -+ uint16_t t_flags; -+#define TF_ACKNOW 0x0001 /* ack peer immediately */ -+#define TF_DELACK 0x0002 /* ack, but try to delay it */ -+#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ -+#define TF_NOOPT 0x0008 /* don't use tcp options */ -+#define TF_SENTFIN 0x0010 /* have sent FIN */ -+#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ -+#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ -+#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ -+#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ -+#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ -+ -+ struct tcpiphdr t_template; /* static skeletal packet for xmit */ -+ -+ struct socket *t_socket; /* back pointer to socket */ -+ /* -+ * The following fields are used as in the protocol specification. -+ * See RFC783, Dec. 1981, page 21. -+ */ -+ /* send sequence variables */ -+ tcp_seq snd_una; /* send unacknowledged */ -+ tcp_seq snd_nxt; /* send next */ -+ tcp_seq snd_up; /* send urgent pointer */ -+ tcp_seq snd_wl1; /* window update seg seq number */ -+ tcp_seq snd_wl2; /* window update seg ack number */ -+ tcp_seq iss; /* initial send sequence number */ -+ uint32_t snd_wnd; /* send window */ -+ /* receive sequence variables */ -+ uint32_t rcv_wnd; /* receive window */ -+ tcp_seq rcv_nxt; /* receive next */ -+ tcp_seq rcv_up; /* receive urgent pointer */ -+ tcp_seq irs; /* initial receive sequence number */ -+ /* -+ * Additional variables for this implementation. -+ */ -+ /* receive variables */ -+ tcp_seq rcv_adv; /* advertised window */ -+ /* retransmit variables */ -+ tcp_seq snd_max; /* highest sequence number sent; -+ * used to recognize retransmits -+ */ -+ /* congestion control (for slow start, source quench, retransmit after loss) -+ */ -+ uint32_t snd_cwnd; /* congestion-controlled window */ -+ uint32_t snd_ssthresh; /* snd_cwnd size threshold for -+ * for slow start exponential to -+ * linear switch -+ */ -+ /* -+ * transmit timing stuff. See below for scale of srtt and rttvar. -+ * "Variance" is actually smoothed difference. -+ */ -+ short t_idle; /* inactivity time */ -+ short t_rtt; /* round trip time */ -+ tcp_seq t_rtseq; /* sequence number being timed */ -+ short t_srtt; /* smoothed round-trip time */ -+ short t_rttvar; /* variance in round-trip time */ -+ uint16_t t_rttmin; /* minimum rtt allowed */ -+ uint32_t max_sndwnd; /* largest window peer has offered */ -+ -+ /* out-of-band data */ -+ uint8_t t_oobflags; /* have some */ -+ uint8_t t_iobc; /* input character */ -+#define TCPOOB_HAVEDATA 0x01 -+#define TCPOOB_HADDATA 0x02 -+ short t_softerror; /* possible error not yet reported */ -+ -+ /* RFC 1323 variables */ -+ uint8_t snd_scale; /* window scaling for send window */ -+ uint8_t rcv_scale; /* window scaling for recv window */ -+ uint8_t request_r_scale; /* pending window scaling */ -+ uint8_t requested_s_scale; -+ uint32_t ts_recent; /* timestamp echo data */ -+ uint32_t ts_recent_age; /* when last updated */ -+ tcp_seq last_ack_sent; -+}; -+ -+#define sototcpcb(so) ((so)->so_tcpcb) -+ -+/* -+ * The smoothed round-trip time and estimated variance -+ * are stored as fixed point numbers scaled by the values below. -+ * For convenience, these scales are also used in smoothing the average -+ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). -+ * With these scales, srtt has 3 bits to the right of the binary point, -+ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the -+ * binary point, and is smoothed with an ALPHA of 0.75. -+ */ -+#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ -+#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ -+#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ -+#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ -+ -+/* -+ * The initial retransmission should happen at rtt + 4 * rttvar. -+ * Because of the way we do the smoothing, srtt and rttvar -+ * will each average +1/2 tick of bias. When we compute -+ * the retransmit timer, we want 1/2 tick of rounding and -+ * 1 extra tick because of +-1/2 tick uncertainty in the -+ * firing of the timer. The bias will give us exactly the -+ * 1.5 tick we need. But, because the bias is -+ * statistical, we have to test that we don't drop below -+ * the minimum feasible timer (which is 2 ticks). -+ * This macro assumes that the value of TCP_RTTVAR_SCALE -+ * is the same as the multiplier for rttvar. -+ */ -+#define TCP_REXMTVAL(tp) (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) -+ -+#endif -diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h -new file mode 100644 -index 0000000..d3df021 ---- /dev/null -+++ b/slirp/src/tcpip.h -@@ -0,0 +1,104 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 -+ * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp -+ */ -+ -+#ifndef TCPIP_H -+#define TCPIP_H -+ -+/* -+ * Tcp+ip header, after ip options removed. -+ */ -+struct tcpiphdr { -+ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ -+ union { -+ struct { -+ struct in_addr ih_src; /* source internet address */ -+ struct in_addr ih_dst; /* destination internet address */ -+ uint8_t ih_x1; /* (unused) */ -+ uint8_t ih_pr; /* protocol */ -+ } ti_i4; -+ struct { -+ struct in6_addr ih_src; -+ struct in6_addr ih_dst; -+ uint8_t ih_x1; -+ uint8_t ih_nh; -+ } ti_i6; -+ } ti; -+ uint16_t ti_x0; -+ uint16_t ti_len; /* protocol length */ -+ struct tcphdr ti_t; /* tcp header */ -+}; -+#define ti_mbuf ih_mbuf.mptr -+#define ti_pr ti.ti_i4.ih_pr -+#define ti_src ti.ti_i4.ih_src -+#define ti_dst ti.ti_i4.ih_dst -+#define ti_src6 ti.ti_i6.ih_src -+#define ti_dst6 ti.ti_i6.ih_dst -+#define ti_nh6 ti.ti_i6.ih_nh -+#define ti_sport ti_t.th_sport -+#define ti_dport ti_t.th_dport -+#define ti_seq ti_t.th_seq -+#define ti_ack ti_t.th_ack -+#define ti_x2 ti_t.th_x2 -+#define ti_off ti_t.th_off -+#define ti_flags ti_t.th_flags -+#define ti_win ti_t.th_win -+#define ti_sum ti_t.th_sum -+#define ti_urp ti_t.th_urp -+ -+#define tcpiphdr2qlink(T) \ -+ ((struct qlink *)(((char *)(T)) - sizeof(struct qlink))) -+#define qlink2tcpiphdr(Q) \ -+ ((struct tcpiphdr *)(((char *)(Q)) + sizeof(struct qlink))) -+#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) -+#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) -+#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) -+#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink *)(T)) -+#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr *)(T)) -+ -+/* This is the difference between the size of a tcpiphdr structure, and the -+ * size of actual ip+tcp headers, rounded up since we need to align data. */ -+#define TCPIPHDR_DELTA \ -+ (MAX(0, (sizeof(struct tcpiphdr) - sizeof(struct ip) - \ -+ sizeof(struct tcphdr) + 3) & \ -+ ~3)) -+ -+/* -+ * Just a clean way to get to the first byte -+ * of the packet -+ */ -+struct tcpiphdr_2 { -+ struct tcpiphdr dummy; -+ char first_char; -+}; -+ -+#endif -diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c -new file mode 100644 -index 0000000..093c2e0 ---- /dev/null -+++ b/slirp/src/tftp.c -@@ -0,0 +1,462 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * tftp.c - a simple, read-only tftp server for qemu -+ * -+ * Copyright (c) 2004 Magnus Damm -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "slirp.h" -+ -+#include -+#include -+#include -+ -+static inline int tftp_session_in_use(struct tftp_session *spt) -+{ -+ return (spt->slirp != NULL); -+} -+ -+static inline void tftp_session_update(struct tftp_session *spt) -+{ -+ spt->timestamp = curtime; -+} -+ -+static void tftp_session_terminate(struct tftp_session *spt) -+{ -+ if (spt->fd >= 0) { -+ close(spt->fd); -+ spt->fd = -1; -+ } -+ g_free(spt->filename); -+ spt->slirp = NULL; -+} -+ -+static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp) -+{ -+ struct tftp_session *spt; -+ int k; -+ -+ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { -+ spt = &slirp->tftp_sessions[k]; -+ -+ if (!tftp_session_in_use(spt)) -+ goto found; -+ -+ /* sessions time out after 5 inactive seconds */ -+ if ((int)(curtime - spt->timestamp) > 5000) { -+ tftp_session_terminate(spt); -+ goto found; -+ } -+ } -+ -+ return -1; -+ -+found: -+ memset(spt, 0, sizeof(*spt)); -+ memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); -+ spt->fd = -1; -+ spt->block_size = 512; -+ spt->client_port = tp->udp.uh_sport; -+ spt->slirp = slirp; -+ -+ tftp_session_update(spt); -+ -+ return k; -+} -+ -+static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp) -+{ -+ struct tftp_session *spt; -+ int k; -+ -+ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { -+ spt = &slirp->tftp_sessions[k]; -+ -+ if (tftp_session_in_use(spt)) { -+ if (sockaddr_equal(&spt->client_addr, srcsas)) { -+ if (spt->client_port == tp->udp.uh_sport) { -+ return k; -+ } -+ } -+ } -+ } -+ -+ return -1; -+} -+ -+static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, -+ uint8_t *buf, int len) -+{ -+ int bytes_read = 0; -+ -+ if (spt->fd < 0) { -+ spt->fd = open(spt->filename, O_RDONLY | O_BINARY); -+ } -+ -+ if (spt->fd < 0) { -+ return -1; -+ } -+ -+ if (len) { -+ lseek(spt->fd, block_nr * spt->block_size, SEEK_SET); -+ -+ bytes_read = read(spt->fd, buf, len); -+ } -+ -+ return bytes_read; -+} -+ -+static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, -+ struct mbuf *m) -+{ -+ struct tftp_t *tp; -+ -+ memset(m->m_data, 0, m->m_size); -+ -+ m->m_data += IF_MAXLINKHDR; -+ if (spt->client_addr.ss_family == AF_INET6) { -+ m->m_data += sizeof(struct ip6); -+ } else { -+ m->m_data += sizeof(struct ip); -+ } -+ tp = (void *)m->m_data; -+ m->m_data += sizeof(struct udphdr); -+ -+ return tp; -+} -+ -+static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, -+ struct tftp_t *recv_tp) -+{ -+ if (spt->client_addr.ss_family == AF_INET6) { -+ struct sockaddr_in6 sa6, da6; -+ -+ sa6.sin6_addr = spt->slirp->vhost_addr6; -+ sa6.sin6_port = recv_tp->udp.uh_dport; -+ da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; -+ da6.sin6_port = spt->client_port; -+ -+ udp6_output(NULL, m, &sa6, &da6); -+ } else { -+ struct sockaddr_in sa4, da4; -+ -+ sa4.sin_addr = spt->slirp->vhost_addr; -+ sa4.sin_port = recv_tp->udp.uh_dport; -+ da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; -+ da4.sin_port = spt->client_port; -+ -+ udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); -+ } -+} -+ -+static int tftp_send_oack(struct tftp_session *spt, const char *keys[], -+ uint32_t values[], int nb, struct tftp_t *recv_tp) -+{ -+ struct mbuf *m; -+ struct tftp_t *tp; -+ int i, n = 0; -+ -+ m = m_get(spt->slirp); -+ -+ if (!m) -+ return -1; -+ -+ tp = tftp_prep_mbuf_data(spt, m); -+ -+ tp->tp_op = htons(TFTP_OACK); -+ for (i = 0; i < nb; i++) { -+ n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", -+ keys[i]) + -+ 1; -+ n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", -+ values[i]) + -+ 1; -+ } -+ -+ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + n - -+ sizeof(struct udphdr); -+ tftp_udp_output(spt, m, recv_tp); -+ -+ return 0; -+} -+ -+static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, -+ const char *msg, struct tftp_t *recv_tp) -+{ -+ struct mbuf *m; -+ struct tftp_t *tp; -+ -+ DEBUG_TFTP("tftp error msg: %s", msg); -+ -+ m = m_get(spt->slirp); -+ -+ if (!m) { -+ goto out; -+ } -+ -+ tp = tftp_prep_mbuf_data(spt, m); -+ -+ tp->tp_op = htons(TFTP_ERROR); -+ tp->x.tp_error.tp_error_code = htons(errorcode); -+ slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), -+ msg); -+ -+ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + -+ strlen(msg) - sizeof(struct udphdr); -+ tftp_udp_output(spt, m, recv_tp); -+ -+out: -+ tftp_session_terminate(spt); -+} -+ -+static void tftp_send_next_block(struct tftp_session *spt, -+ struct tftp_t *recv_tp) -+{ -+ struct mbuf *m; -+ struct tftp_t *tp; -+ int nobytes; -+ -+ m = m_get(spt->slirp); -+ -+ if (!m) { -+ return; -+ } -+ -+ tp = tftp_prep_mbuf_data(spt, m); -+ -+ tp->tp_op = htons(TFTP_DATA); -+ tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); -+ -+ nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, -+ spt->block_size); -+ -+ if (nobytes < 0) { -+ m_free(m); -+ -+ /* send "file not found" error back */ -+ -+ tftp_send_error(spt, 1, "File not found", tp); -+ -+ return; -+ } -+ -+ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - -+ sizeof(struct udphdr); -+ tftp_udp_output(spt, m, recv_tp); -+ -+ if (nobytes == spt->block_size) { -+ tftp_session_update(spt); -+ } else { -+ tftp_session_terminate(spt); -+ } -+ -+ spt->block_nr++; -+} -+ -+static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp, int pktlen) -+{ -+ struct tftp_session *spt; -+ int s, k; -+ size_t prefix_len; -+ char *req_fname; -+ const char *option_name[2]; -+ uint32_t option_value[2]; -+ int nb_options = 0; -+ -+ /* check if a session already exists and if so terminate it */ -+ s = tftp_session_find(slirp, srcsas, tp); -+ if (s >= 0) { -+ tftp_session_terminate(&slirp->tftp_sessions[s]); -+ } -+ -+ s = tftp_session_allocate(slirp, srcsas, tp); -+ -+ if (s < 0) { -+ return; -+ } -+ -+ spt = &slirp->tftp_sessions[s]; -+ -+ /* unspecified prefix means service disabled */ -+ if (!slirp->tftp_prefix) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ /* skip header fields */ -+ k = 0; -+ pktlen -= offsetof(struct tftp_t, x.tp_buf); -+ -+ /* prepend tftp_prefix */ -+ prefix_len = strlen(slirp->tftp_prefix); -+ spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); -+ memcpy(spt->filename, slirp->tftp_prefix, prefix_len); -+ spt->filename[prefix_len] = '/'; -+ -+ /* get name */ -+ req_fname = spt->filename + prefix_len + 1; -+ -+ while (1) { -+ if (k >= TFTP_FILENAME_MAX || k >= pktlen) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ req_fname[k] = tp->x.tp_buf[k]; -+ if (req_fname[k++] == '\0') { -+ break; -+ } -+ } -+ -+ DEBUG_TFTP("tftp rrq file: %s", req_fname); -+ -+ /* check mode */ -+ if ((pktlen - k) < 6) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { -+ tftp_send_error(spt, 4, "Unsupported transfer mode", tp); -+ return; -+ } -+ -+ k += 6; /* skipping octet */ -+ -+ /* do sanity checks on the filename */ -+ if (!strncmp(req_fname, "../", 3) || -+ req_fname[strlen(req_fname) - 1] == '/' || strstr(req_fname, "/../")) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ /* check if the file exists */ -+ if (tftp_read_data(spt, 0, NULL, 0) < 0) { -+ tftp_send_error(spt, 1, "File not found", tp); -+ return; -+ } -+ -+ if (tp->x.tp_buf[pktlen - 1] != 0) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { -+ const char *key, *value; -+ -+ key = &tp->x.tp_buf[k]; -+ k += strlen(key) + 1; -+ -+ if (k >= pktlen) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ value = &tp->x.tp_buf[k]; -+ k += strlen(value) + 1; -+ -+ if (strcasecmp(key, "tsize") == 0) { -+ int tsize = atoi(value); -+ struct stat stat_p; -+ -+ if (tsize == 0) { -+ if (stat(spt->filename, &stat_p) == 0) -+ tsize = stat_p.st_size; -+ else { -+ tftp_send_error(spt, 1, "File not found", tp); -+ return; -+ } -+ } -+ -+ option_name[nb_options] = "tsize"; -+ option_value[nb_options] = tsize; -+ nb_options++; -+ } else if (strcasecmp(key, "blksize") == 0) { -+ int blksize = atoi(value); -+ -+ /* Accept blksize up to our maximum size */ -+ if (blksize > 0) { -+ spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); -+ option_name[nb_options] = "blksize"; -+ option_value[nb_options] = spt->block_size; -+ nb_options++; -+ } -+ } -+ } -+ -+ if (nb_options > 0) { -+ assert(nb_options <= G_N_ELEMENTS(option_name)); -+ tftp_send_oack(spt, option_name, option_value, nb_options, tp); -+ return; -+ } -+ -+ spt->block_nr = 0; -+ tftp_send_next_block(spt, tp); -+} -+ -+static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp, int pktlen) -+{ -+ int s; -+ -+ s = tftp_session_find(slirp, srcsas, tp); -+ -+ if (s < 0) { -+ return; -+ } -+ -+ tftp_send_next_block(&slirp->tftp_sessions[s], tp); -+} -+ -+static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp, int pktlen) -+{ -+ int s; -+ -+ s = tftp_session_find(slirp, srcsas, tp); -+ -+ if (s < 0) { -+ return; -+ } -+ -+ tftp_session_terminate(&slirp->tftp_sessions[s]); -+} -+ -+void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) -+{ -+ struct tftp_t *tp = (struct tftp_t *)m->m_data; -+ -+ switch (ntohs(tp->tp_op)) { -+ case TFTP_RRQ: -+ tftp_handle_rrq(m->slirp, srcsas, tp, m->m_len); -+ break; -+ -+ case TFTP_ACK: -+ tftp_handle_ack(m->slirp, srcsas, tp, m->m_len); -+ break; -+ -+ case TFTP_ERROR: -+ tftp_handle_error(m->slirp, srcsas, tp, m->m_len); -+ break; -+ } -+} -diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h -new file mode 100644 -index 0000000..c47bb43 ---- /dev/null -+++ b/slirp/src/tftp.h -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* tftp defines */ -+ -+#ifndef SLIRP_TFTP_H -+#define SLIRP_TFTP_H -+ -+#define TFTP_SESSIONS_MAX 20 -+ -+#define TFTP_SERVER 69 -+ -+#define TFTP_RRQ 1 -+#define TFTP_WRQ 2 -+#define TFTP_DATA 3 -+#define TFTP_ACK 4 -+#define TFTP_ERROR 5 -+#define TFTP_OACK 6 -+ -+#define TFTP_FILENAME_MAX 512 -+#define TFTP_BLOCKSIZE_MAX 1428 -+ -+struct tftp_t { -+ struct udphdr udp; -+ uint16_t tp_op; -+ union { -+ struct { -+ uint16_t tp_block_nr; -+ uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; -+ } tp_data; -+ struct { -+ uint16_t tp_error_code; -+ uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; -+ } tp_error; -+ char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; -+ } x; -+} __attribute__((packed)); -+ -+struct tftp_session { -+ Slirp *slirp; -+ char *filename; -+ int fd; -+ uint16_t block_size; -+ -+ struct sockaddr_storage client_addr; -+ uint16_t client_port; -+ uint32_t block_nr; -+ -+ int timestamp; -+}; -+ -+void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); -+ -+#endif -diff --git a/slirp/src/udp.c b/slirp/src/udp.c -new file mode 100644 -index 0000000..ae23ba4 ---- /dev/null -+++ b/slirp/src/udp.c -@@ -0,0 +1,354 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 -+ * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP -+ * Copyright (c) 1995 Danny Gasparovski. -+ * -+ * Please read the file COPYRIGHT for the -+ * terms and conditions of the copyright. -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+ -+static uint8_t udp_tos(struct socket *so); -+ -+void udp_init(Slirp *slirp) -+{ -+ slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; -+ slirp->udp_last_so = &slirp->udb; -+} -+ -+void udp_cleanup(Slirp *slirp) -+{ -+ while (slirp->udb.so_next != &slirp->udb) { -+ udp_detach(slirp->udb.so_next); -+ } -+} -+ -+/* m->m_data points at ip packet header -+ * m->m_len length ip packet -+ * ip->ip_len length data (IPDU) -+ */ -+void udp_input(register struct mbuf *m, int iphlen) -+{ -+ Slirp *slirp = m->slirp; -+ register struct ip *ip; -+ register struct udphdr *uh; -+ int len; -+ struct ip save_ip; -+ struct socket *so; -+ struct sockaddr_storage lhost; -+ struct sockaddr_in *lhost4; -+ -+ DEBUG_CALL("udp_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("iphlen = %d", iphlen); -+ -+ /* -+ * Strip IP options, if any; should skip this, -+ * make available to user, and use on returned packets, -+ * but we don't yet have a way to check the checksum -+ * with options still present. -+ */ -+ if (iphlen > sizeof(struct ip)) { -+ ip_stripoptions(m, (struct mbuf *)0); -+ iphlen = sizeof(struct ip); -+ } -+ -+ /* -+ * Get IP and UDP header together in first mbuf. -+ */ -+ ip = mtod(m, struct ip *); -+ uh = (struct udphdr *)((char *)ip + iphlen); -+ -+ /* -+ * Make mbuf data length reflect UDP length. -+ * If not enough data to reflect UDP length, drop. -+ */ -+ len = ntohs((uint16_t)uh->uh_ulen); -+ -+ if (ip->ip_len != len) { -+ if (len > ip->ip_len) { -+ goto bad; -+ } -+ m_adj(m, len - ip->ip_len); -+ ip->ip_len = len; -+ } -+ -+ /* -+ * Save a copy of the IP header in case we want restore it -+ * for sending an ICMP error message in response. -+ */ -+ save_ip = *ip; -+ save_ip.ip_len += iphlen; /* tcp_input subtracts this */ -+ -+ /* -+ * Checksum extended UDP header and data. -+ */ -+ if (uh->uh_sum) { -+ memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); -+ ((struct ipovly *)ip)->ih_x1 = 0; -+ ((struct ipovly *)ip)->ih_len = uh->uh_ulen; -+ if (cksum(m, len + sizeof(struct ip))) { -+ goto bad; -+ } -+ } -+ -+ lhost.ss_family = AF_INET; -+ lhost4 = (struct sockaddr_in *)&lhost; -+ lhost4->sin_addr = ip->ip_src; -+ lhost4->sin_port = uh->uh_sport; -+ -+ /* -+ * handle DHCP/BOOTP -+ */ -+ if (ntohs(uh->uh_dport) == BOOTP_SERVER && -+ (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || -+ ip->ip_dst.s_addr == 0xffffffff)) { -+ bootp_input(m); -+ goto bad; -+ } -+ -+ /* -+ * handle TFTP -+ */ -+ if (ntohs(uh->uh_dport) == TFTP_SERVER && -+ ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { -+ m->m_data += iphlen; -+ m->m_len -= iphlen; -+ tftp_input(&lhost, m); -+ m->m_data -= iphlen; -+ m->m_len += iphlen; -+ goto bad; -+ } -+ -+ if (slirp->restricted) { -+ goto bad; -+ } -+ -+ /* -+ * Locate pcb for datagram. -+ */ -+ so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); -+ -+ if (so == NULL) { -+ /* -+ * If there's no socket for this packet, -+ * create one -+ */ -+ so = socreate(slirp); -+ if (udp_attach(so, AF_INET) == -1) { -+ DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); -+ sofree(so); -+ goto bad; -+ } -+ -+ /* -+ * Setup fields -+ */ -+ so->so_lfamily = AF_INET; -+ so->so_laddr = ip->ip_src; -+ so->so_lport = uh->uh_sport; -+ -+ if ((so->so_iptos = udp_tos(so)) == 0) -+ so->so_iptos = ip->ip_tos; -+ -+ /* -+ * XXXXX Here, check if it's in udpexec_list, -+ * and if it is, do the fork_exec() etc. -+ */ -+ } -+ -+ so->so_ffamily = AF_INET; -+ so->so_faddr = ip->ip_dst; /* XXX */ -+ so->so_fport = uh->uh_dport; /* XXX */ -+ -+ iphlen += sizeof(struct udphdr); -+ m->m_len -= iphlen; -+ m->m_data += iphlen; -+ -+ /* -+ * Now we sendto() the packet. -+ */ -+ if (sosendto(so, m) == -1) { -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ *ip = save_ip; -+ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); -+ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); -+ goto bad; -+ } -+ -+ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ -+ -+ /* restore the orig mbuf packet */ -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ *ip = save_ip; -+ so->so_m = m; /* ICMP backup */ -+ -+ return; -+bad: -+ m_free(m); -+} -+ -+int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, -+ struct sockaddr_in *daddr, int iptos) -+{ -+ register struct udpiphdr *ui; -+ int error = 0; -+ -+ DEBUG_CALL("udp_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); -+ DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); -+ -+ /* -+ * Adjust for header -+ */ -+ m->m_data -= sizeof(struct udpiphdr); -+ m->m_len += sizeof(struct udpiphdr); -+ -+ /* -+ * Fill in mbuf with extended UDP header -+ * and addresses and length put into network format. -+ */ -+ ui = mtod(m, struct udpiphdr *); -+ memset(&ui->ui_i.ih_mbuf, 0, sizeof(struct mbuf_ptr)); -+ ui->ui_x1 = 0; -+ ui->ui_pr = IPPROTO_UDP; -+ ui->ui_len = htons(m->m_len - sizeof(struct ip)); -+ /* XXXXX Check for from-one-location sockets, or from-any-location sockets -+ */ -+ ui->ui_src = saddr->sin_addr; -+ ui->ui_dst = daddr->sin_addr; -+ ui->ui_sport = saddr->sin_port; -+ ui->ui_dport = daddr->sin_port; -+ ui->ui_ulen = ui->ui_len; -+ -+ /* -+ * Stuff checksum and output datagram. -+ */ -+ ui->ui_sum = 0; -+ if ((ui->ui_sum = cksum(m, m->m_len)) == 0) -+ ui->ui_sum = 0xffff; -+ ((struct ip *)ui)->ip_len = m->m_len; -+ -+ ((struct ip *)ui)->ip_ttl = IPDEFTTL; -+ ((struct ip *)ui)->ip_tos = iptos; -+ -+ error = ip_output(so, m); -+ -+ return (error); -+} -+ -+int udp_attach(struct socket *so, unsigned short af) -+{ -+ so->s = slirp_socket(af, SOCK_DGRAM, 0); -+ if (so->s != -1) { -+ so->so_expire = curtime + SO_EXPIRE; -+ insque(so, &so->slirp->udb); -+ } -+ return (so->s); -+} -+ -+void udp_detach(struct socket *so) -+{ -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ sofree(so); -+} -+ -+static const struct tos_t udptos[] = { { 0, 53, IPTOS_LOWDELAY, 0 }, /* DNS */ -+ { 0, 0, 0, 0 } }; -+ -+static uint8_t udp_tos(struct socket *so) -+{ -+ int i = 0; -+ -+ while (udptos[i].tos) { -+ if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || -+ (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { -+ so->so_emu = udptos[i].emu; -+ return udptos[i].tos; -+ } -+ i++; -+ } -+ -+ return 0; -+} -+ -+struct socket *udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, -+ uint32_t laddr, unsigned lport, int flags) -+{ -+ /* TODO: IPv6 */ -+ struct sockaddr_in addr; -+ struct socket *so; -+ socklen_t addrlen = sizeof(struct sockaddr_in); -+ -+ so = socreate(slirp); -+ so->s = slirp_socket(AF_INET, SOCK_DGRAM, 0); -+ if (so->s < 0) { -+ sofree(so); -+ return NULL; -+ } -+ so->so_expire = curtime + SO_EXPIRE; -+ insque(so, &slirp->udb); -+ -+ addr.sin_family = AF_INET; -+ addr.sin_addr.s_addr = haddr; -+ addr.sin_port = hport; -+ -+ if (bind(so->s, (struct sockaddr *)&addr, addrlen) < 0) { -+ udp_detach(so); -+ return NULL; -+ } -+ slirp_socket_set_fast_reuse(so->s); -+ -+ getsockname(so->s, (struct sockaddr *)&addr, &addrlen); -+ so->fhost.sin = addr; -+ sotranslate_accept(so); -+ so->so_lfamily = AF_INET; -+ so->so_lport = lport; -+ so->so_laddr.s_addr = laddr; -+ if (flags != SS_FACCEPTONCE) -+ so->so_expire = 0; -+ -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_ISFCONNECTED | flags; -+ -+ return so; -+} -diff --git a/slirp/src/udp.h b/slirp/src/udp.h -new file mode 100644 -index 0000000..c3b83fd ---- /dev/null -+++ b/slirp/src/udp.h -@@ -0,0 +1,90 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)udp.h 8.1 (Berkeley) 6/10/93 -+ * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp -+ */ -+ -+#ifndef UDP_H -+#define UDP_H -+ -+#define UDP_TTL 0x60 -+#define UDP_UDPDATALEN 16192 -+ -+/* -+ * Udp protocol header. -+ * Per RFC 768, September, 1981. -+ */ -+struct udphdr { -+ uint16_t uh_sport; /* source port */ -+ uint16_t uh_dport; /* destination port */ -+ int16_t uh_ulen; /* udp length */ -+ uint16_t uh_sum; /* udp checksum */ -+}; -+ -+/* -+ * UDP kernel structures and variables. -+ */ -+struct udpiphdr { -+ struct ipovly ui_i; /* overlaid ip structure */ -+ struct udphdr ui_u; /* udp header */ -+}; -+#define ui_mbuf ui_i.ih_mbuf.mptr -+#define ui_x1 ui_i.ih_x1 -+#define ui_pr ui_i.ih_pr -+#define ui_len ui_i.ih_len -+#define ui_src ui_i.ih_src -+#define ui_dst ui_i.ih_dst -+#define ui_sport ui_u.uh_sport -+#define ui_dport ui_u.uh_dport -+#define ui_ulen ui_u.uh_ulen -+#define ui_sum ui_u.uh_sum -+ -+/* -+ * Names for UDP sysctl objects -+ */ -+#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ -+#define UDPCTL_MAXID 2 -+ -+struct mbuf; -+ -+void udp_init(Slirp *); -+void udp_cleanup(Slirp *); -+void udp_input(register struct mbuf *, int); -+int udp_attach(struct socket *, unsigned short af); -+void udp_detach(struct socket *); -+struct socket *udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); -+int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, -+ struct sockaddr_in *daddr, int iptos); -+ -+void udp6_input(register struct mbuf *); -+int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, -+ struct sockaddr_in6 *daddr); -+ -+#endif -diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c -new file mode 100644 -index 0000000..6f9486b ---- /dev/null -+++ b/slirp/src/udp6.c -@@ -0,0 +1,173 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron -+ */ -+ -+#include "slirp.h" -+#include "udp.h" -+#include "dhcpv6.h" -+ -+void udp6_input(struct mbuf *m) -+{ -+ Slirp *slirp = m->slirp; -+ struct ip6 *ip, save_ip; -+ struct udphdr *uh; -+ int iphlen = sizeof(struct ip6); -+ int len; -+ struct socket *so; -+ struct sockaddr_in6 lhost; -+ -+ DEBUG_CALL("udp6_input"); -+ DEBUG_ARG("m = %p", m); -+ -+ if (slirp->restricted) { -+ goto bad; -+ } -+ -+ ip = mtod(m, struct ip6 *); -+ m->m_len -= iphlen; -+ m->m_data += iphlen; -+ uh = mtod(m, struct udphdr *); -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ -+ if (ip6_cksum(m)) { -+ goto bad; -+ } -+ -+ len = ntohs((uint16_t)uh->uh_ulen); -+ -+ /* -+ * Make mbuf data length reflect UDP length. -+ * If not enough data to reflect UDP length, drop. -+ */ -+ if (ntohs(ip->ip_pl) != len) { -+ if (len > ntohs(ip->ip_pl)) { -+ goto bad; -+ } -+ m_adj(m, len - ntohs(ip->ip_pl)); -+ ip->ip_pl = htons(len); -+ } -+ -+ /* -+ * Save a copy of the IP header in case we want restore it -+ * for sending an ICMP error message in response. -+ */ -+ save_ip = *ip; -+ -+ /* Locate pcb for datagram. */ -+ lhost.sin6_family = AF_INET6; -+ lhost.sin6_addr = ip->ip_src; -+ lhost.sin6_port = uh->uh_sport; -+ -+ /* handle DHCPv6 */ -+ if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && -+ (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || -+ in6_dhcp_multicast(&ip->ip_dst))) { -+ m->m_data += iphlen; -+ m->m_len -= iphlen; -+ dhcpv6_input(&lhost, m); -+ m->m_data -= iphlen; -+ m->m_len += iphlen; -+ goto bad; -+ } -+ -+ /* handle TFTP */ -+ if (ntohs(uh->uh_dport) == TFTP_SERVER && -+ !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { -+ m->m_data += iphlen; -+ m->m_len -= iphlen; -+ tftp_input((struct sockaddr_storage *)&lhost, m); -+ m->m_data -= iphlen; -+ m->m_len += iphlen; -+ goto bad; -+ } -+ -+ so = solookup(&slirp->udp_last_so, &slirp->udb, -+ (struct sockaddr_storage *)&lhost, NULL); -+ -+ if (so == NULL) { -+ /* If there's no socket for this packet, create one. */ -+ so = socreate(slirp); -+ if (udp_attach(so, AF_INET6) == -1) { -+ DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); -+ sofree(so); -+ goto bad; -+ } -+ -+ /* Setup fields */ -+ so->so_lfamily = AF_INET6; -+ so->so_laddr6 = ip->ip_src; -+ so->so_lport6 = uh->uh_sport; -+ } -+ -+ so->so_ffamily = AF_INET6; -+ so->so_faddr6 = ip->ip_dst; /* XXX */ -+ so->so_fport6 = uh->uh_dport; /* XXX */ -+ -+ iphlen += sizeof(struct udphdr); -+ m->m_len -= iphlen; -+ m->m_data += iphlen; -+ -+ /* -+ * Now we sendto() the packet. -+ */ -+ if (sosendto(so, m) == -1) { -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ *ip = save_ip; -+ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); -+ icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); -+ goto bad; -+ } -+ -+ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ -+ -+ /* restore the orig mbuf packet */ -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ *ip = save_ip; -+ so->so_m = m; -+ -+ return; -+bad: -+ m_free(m); -+} -+ -+int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, -+ struct sockaddr_in6 *daddr) -+{ -+ struct ip6 *ip; -+ struct udphdr *uh; -+ -+ DEBUG_CALL("udp6_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ -+ /* adjust for header */ -+ m->m_data -= sizeof(struct udphdr); -+ m->m_len += sizeof(struct udphdr); -+ uh = mtod(m, struct udphdr *); -+ m->m_data -= sizeof(struct ip6); -+ m->m_len += sizeof(struct ip6); -+ ip = mtod(m, struct ip6 *); -+ -+ /* Build IP header */ -+ ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); -+ ip->ip_nh = IPPROTO_UDP; -+ ip->ip_src = saddr->sin6_addr; -+ ip->ip_dst = daddr->sin6_addr; -+ -+ /* Build UDP header */ -+ uh->uh_sport = saddr->sin6_port; -+ uh->uh_dport = daddr->sin6_port; -+ uh->uh_ulen = ip->ip_pl; -+ uh->uh_sum = 0; -+ uh->uh_sum = ip6_cksum(m); -+ if (uh->uh_sum == 0) { -+ uh->uh_sum = 0xffff; -+ } -+ -+ return ip6_output(so, m, 0); -+} -diff --git a/slirp/src/util.c b/slirp/src/util.c -new file mode 100644 -index 0000000..e596087 ---- /dev/null -+++ b/slirp/src/util.c -@@ -0,0 +1,366 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * util.c (mostly based on QEMU os-win32.c) -+ * -+ * Copyright (c) 2003-2008 Fabrice Bellard -+ * Copyright (c) 2010-2016 Red Hat, Inc. -+ * -+ * QEMU library functions for win32 which are shared between QEMU and -+ * the QEMU tools. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "util.h" -+ -+#include -+#include -+#include -+ -+#if defined(_WIN32) -+int slirp_inet_aton(const char *cp, struct in_addr *ia) -+{ -+ uint32_t addr = inet_addr(cp); -+ if (addr == 0xffffffff) { -+ return 0; -+ } -+ ia->s_addr = addr; -+ return 1; -+} -+#endif -+ -+void slirp_set_nonblock(int fd) -+{ -+#ifndef _WIN32 -+ int f; -+ f = fcntl(fd, F_GETFL); -+ assert(f != -1); -+ f = fcntl(fd, F_SETFL, f | O_NONBLOCK); -+ assert(f != -1); -+#else -+ unsigned long opt = 1; -+ ioctlsocket(fd, FIONBIO, &opt); -+#endif -+} -+ -+static void slirp_set_cloexec(int fd) -+{ -+#ifndef _WIN32 -+ int f; -+ f = fcntl(fd, F_GETFD); -+ assert(f != -1); -+ f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); -+ assert(f != -1); -+#endif -+} -+ -+/* -+ * Opens a socket with FD_CLOEXEC set -+ */ -+int slirp_socket(int domain, int type, int protocol) -+{ -+ int ret; -+ -+#ifdef SOCK_CLOEXEC -+ ret = socket(domain, type | SOCK_CLOEXEC, protocol); -+ if (ret != -1 || errno != EINVAL) { -+ return ret; -+ } -+#endif -+ ret = socket(domain, type, protocol); -+ if (ret >= 0) { -+ slirp_set_cloexec(ret); -+ } -+ -+ return ret; -+} -+ -+#ifdef _WIN32 -+static int socket_error(void) -+{ -+ switch (WSAGetLastError()) { -+ case 0: -+ return 0; -+ case WSAEINTR: -+ return EINTR; -+ case WSAEINVAL: -+ return EINVAL; -+ case WSA_INVALID_HANDLE: -+ return EBADF; -+ case WSA_NOT_ENOUGH_MEMORY: -+ return ENOMEM; -+ case WSA_INVALID_PARAMETER: -+ return EINVAL; -+ case WSAENAMETOOLONG: -+ return ENAMETOOLONG; -+ case WSAENOTEMPTY: -+ return ENOTEMPTY; -+ case WSAEWOULDBLOCK: -+ /* not using EWOULDBLOCK as we don't want code to have -+ * to check both EWOULDBLOCK and EAGAIN */ -+ return EAGAIN; -+ case WSAEINPROGRESS: -+ return EINPROGRESS; -+ case WSAEALREADY: -+ return EALREADY; -+ case WSAENOTSOCK: -+ return ENOTSOCK; -+ case WSAEDESTADDRREQ: -+ return EDESTADDRREQ; -+ case WSAEMSGSIZE: -+ return EMSGSIZE; -+ case WSAEPROTOTYPE: -+ return EPROTOTYPE; -+ case WSAENOPROTOOPT: -+ return ENOPROTOOPT; -+ case WSAEPROTONOSUPPORT: -+ return EPROTONOSUPPORT; -+ case WSAEOPNOTSUPP: -+ return EOPNOTSUPP; -+ case WSAEAFNOSUPPORT: -+ return EAFNOSUPPORT; -+ case WSAEADDRINUSE: -+ return EADDRINUSE; -+ case WSAEADDRNOTAVAIL: -+ return EADDRNOTAVAIL; -+ case WSAENETDOWN: -+ return ENETDOWN; -+ case WSAENETUNREACH: -+ return ENETUNREACH; -+ case WSAENETRESET: -+ return ENETRESET; -+ case WSAECONNABORTED: -+ return ECONNABORTED; -+ case WSAECONNRESET: -+ return ECONNRESET; -+ case WSAENOBUFS: -+ return ENOBUFS; -+ case WSAEISCONN: -+ return EISCONN; -+ case WSAENOTCONN: -+ return ENOTCONN; -+ case WSAETIMEDOUT: -+ return ETIMEDOUT; -+ case WSAECONNREFUSED: -+ return ECONNREFUSED; -+ case WSAELOOP: -+ return ELOOP; -+ case WSAEHOSTUNREACH: -+ return EHOSTUNREACH; -+ default: -+ return EIO; -+ } -+} -+ -+#undef ioctlsocket -+int slirp_ioctlsocket_wrap(int fd, int req, void *val) -+{ -+ int ret; -+ ret = ioctlsocket(fd, req, val); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef closesocket -+int slirp_closesocket_wrap(int fd) -+{ -+ int ret; -+ ret = closesocket(fd); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef connect -+int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) -+{ -+ int ret; -+ ret = connect(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef listen -+int slirp_listen_wrap(int sockfd, int backlog) -+{ -+ int ret; -+ ret = listen(sockfd, backlog); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef bind -+int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) -+{ -+ int ret; -+ ret = bind(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef socket -+int slirp_socket_wrap(int domain, int type, int protocol) -+{ -+ int ret; -+ ret = socket(domain, type, protocol); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef accept -+int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) -+{ -+ int ret; -+ ret = accept(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef shutdown -+int slirp_shutdown_wrap(int sockfd, int how) -+{ -+ int ret; -+ ret = shutdown(sockfd, how); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef getsockopt -+int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, -+ int *optlen) -+{ -+ int ret; -+ ret = getsockopt(sockfd, level, optname, optval, optlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef setsockopt -+int slirp_setsockopt_wrap(int sockfd, int level, int optname, -+ const void *optval, int optlen) -+{ -+ int ret; -+ ret = setsockopt(sockfd, level, optname, optval, optlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef getpeername -+int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, int *addrlen) -+{ -+ int ret; -+ ret = getpeername(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef getsockname -+int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, int *addrlen) -+{ -+ int ret; -+ ret = getsockname(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef send -+ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) -+{ -+ int ret; -+ ret = send(sockfd, buf, len, flags); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef sendto -+ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, -+ const struct sockaddr *addr, int addrlen) -+{ -+ int ret; -+ ret = sendto(sockfd, buf, len, flags, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef recv -+ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) -+{ -+ int ret; -+ ret = recv(sockfd, buf, len, flags); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef recvfrom -+ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, -+ struct sockaddr *addr, int *addrlen) -+{ -+ int ret; -+ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+#endif /* WIN32 */ -+ -+void slirp_pstrcpy(char *buf, int buf_size, const char *str) -+{ -+ int c; -+ char *q = buf; -+ -+ if (buf_size <= 0) -+ return; -+ -+ for (;;) { -+ c = *str++; -+ if (c == 0 || q >= buf + buf_size - 1) -+ break; -+ *q++ = c; -+ } -+ *q = '\0'; -+} -diff --git a/slirp/src/util.h b/slirp/src/util.h -new file mode 100644 -index 0000000..3c6223c ---- /dev/null -+++ b/slirp/src/util.h -@@ -0,0 +1,180 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * Copyright (c) 2003-2008 Fabrice Bellard -+ * Copyright (c) 2010-2019 Red Hat, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#ifndef UTIL_H_ -+#define UTIL_H_ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef _WIN32 -+#include -+#include -+#else -+#include -+#include -+#include -+#endif -+ -+#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) -+#define SLIRP_PACKED __attribute__((gcc_struct, packed)) -+#else -+#define SLIRP_PACKED __attribute__((packed)) -+#endif -+ -+#ifndef DIV_ROUND_UP -+#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) -+#endif -+ -+#ifndef container_of -+#define container_of(ptr, type, member) \ -+ __extension__({ \ -+ void *__mptr = (void *)(ptr); \ -+ ((type *)(__mptr - offsetof(type, member))); \ -+ }) -+#endif -+ -+#if defined(_WIN32) /* CONFIG_IOVEC */ -+#if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ -+struct iovec { -+ void *iov_base; -+ size_t iov_len; -+}; -+#endif -+#else -+#include -+#endif -+ -+#define stringify(s) tostring(s) -+#define tostring(s) #s -+ -+#define SCALE_MS 1000000 -+ -+#define ETH_ALEN 6 -+#define ETH_HLEN 14 -+#define ETH_P_IP (0x0800) /* Internet Protocol packet */ -+#define ETH_P_ARP (0x0806) /* Address Resolution packet */ -+#define ETH_P_IPV6 (0x86dd) -+#define ETH_P_VLAN (0x8100) -+#define ETH_P_DVLAN (0x88a8) -+#define ETH_P_NCSI (0x88f8) -+#define ETH_P_UNKNOWN (0xffff) -+ -+/* FIXME: remove me when made standalone */ -+#ifdef _WIN32 -+#undef accept -+#undef bind -+#undef closesocket -+#undef connect -+#undef getpeername -+#undef getsockname -+#undef getsockopt -+#undef ioctlsocket -+#undef listen -+#undef recv -+#undef recvfrom -+#undef send -+#undef sendto -+#undef setsockopt -+#undef shutdown -+#undef socket -+#endif -+ -+#ifdef _WIN32 -+#define connect slirp_connect_wrap -+int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); -+#define listen slirp_listen_wrap -+int slirp_listen_wrap(int fd, int backlog); -+#define bind slirp_bind_wrap -+int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); -+#define socket slirp_socket_wrap -+int slirp_socket_wrap(int domain, int type, int protocol); -+#define accept slirp_accept_wrap -+int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); -+#define shutdown slirp_shutdown_wrap -+int slirp_shutdown_wrap(int fd, int how); -+#define getpeername slirp_getpeername_wrap -+int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); -+#define getsockname slirp_getsockname_wrap -+int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); -+#define send slirp_send_wrap -+ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); -+#define sendto slirp_sendto_wrap -+ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, -+ const struct sockaddr *dest_addr, int addrlen); -+#define recv slirp_recv_wrap -+ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); -+#define recvfrom slirp_recvfrom_wrap -+ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, -+ struct sockaddr *src_addr, int *addrlen); -+#define closesocket slirp_closesocket_wrap -+int slirp_closesocket_wrap(int fd); -+#define ioctlsocket slirp_ioctlsocket_wrap -+int slirp_ioctlsocket_wrap(int fd, int req, void *val); -+#define getsockopt slirp_getsockopt_wrap -+int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, -+ int *optlen); -+#define setsockopt slirp_setsockopt_wrap -+int slirp_setsockopt_wrap(int sockfd, int level, int optname, -+ const void *optval, int optlen); -+#define inet_aton slirp_inet_aton -+int slirp_inet_aton(const char *cp, struct in_addr *ia); -+#else -+#define closesocket(s) close(s) -+#define ioctlsocket(s, r, v) ioctl(s, r, v) -+#endif -+ -+int slirp_socket(int domain, int type, int protocol); -+void slirp_set_nonblock(int fd); -+ -+static inline int slirp_socket_set_nodelay(int fd) -+{ -+ int v = 1; -+ return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); -+} -+ -+static inline int slirp_socket_set_fast_reuse(int fd) -+{ -+#ifndef _WIN32 -+ int v = 1; -+ return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); -+#else -+ /* Enabling the reuse of an endpoint that was used by a socket still in -+ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows -+ * fast reuse is the default and SO_REUSEADDR does strange things. So we -+ * don't have to do anything here. More info can be found at: -+ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ -+ return 0; -+#endif -+} -+ -+void slirp_pstrcpy(char *buf, int buf_size, const char *str); -+ -+#endif -diff --git a/slirp/src/version.c b/slirp/src/version.c -new file mode 100644 -index 0000000..a837323 ---- /dev/null -+++ b/slirp/src/version.c -@@ -0,0 +1,11 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#include "libslirp.h" -+#include "util.h" -+ -+const char * -+slirp_version_string(void) -+{ -+ return stringify(SLIRP_MAJOR_VERSION) "." -+ stringify(SLIRP_MINOR_VERSION) "." -+ stringify(SLIRP_MICRO_VERSION); -+} -diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c -new file mode 100644 -index 0000000..8c544eb ---- /dev/null -+++ b/slirp/src/vmstate.c -@@ -0,0 +1,445 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * VMState interpreter -+ * -+ * Copyright (c) 2009-2018 Red Hat Inc -+ * -+ * Authors: -+ * Juan Quintela -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+#include "stream.h" -+#include "vmstate.h" -+ -+static int get_nullptr(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { -+ return 0; -+ } -+ g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); -+ return -EINVAL; -+} -+ -+static int put_nullptr(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+ -+{ -+ if (pv == NULL) { -+ slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); -+ return 0; -+ } -+ g_warning("vmstate: put_nullptr must be called with pv == NULL"); -+ return -EINVAL; -+} -+ -+const VMStateInfo slirp_vmstate_info_nullptr = { -+ .name = "uint64", -+ .get = get_nullptr, -+ .put = put_nullptr, -+}; -+ -+/* 8 bit unsigned int */ -+ -+static int get_uint8(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint8_t *v = pv; -+ *v = slirp_istream_read_u8(f); -+ return 0; -+} -+ -+static int put_uint8(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint8_t *v = pv; -+ slirp_ostream_write_u8(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_uint8 = { -+ .name = "uint8", -+ .get = get_uint8, -+ .put = put_uint8, -+}; -+ -+/* 16 bit unsigned int */ -+ -+static int get_uint16(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint16_t *v = pv; -+ *v = slirp_istream_read_u16(f); -+ return 0; -+} -+ -+static int put_uint16(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint16_t *v = pv; -+ slirp_ostream_write_u16(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_uint16 = { -+ .name = "uint16", -+ .get = get_uint16, -+ .put = put_uint16, -+}; -+ -+/* 32 bit unsigned int */ -+ -+static int get_uint32(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint32_t *v = pv; -+ *v = slirp_istream_read_u32(f); -+ return 0; -+} -+ -+static int put_uint32(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint32_t *v = pv; -+ slirp_ostream_write_u32(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_uint32 = { -+ .name = "uint32", -+ .get = get_uint32, -+ .put = put_uint32, -+}; -+ -+/* 16 bit int */ -+ -+static int get_int16(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int16_t *v = pv; -+ *v = slirp_istream_read_i16(f); -+ return 0; -+} -+ -+static int put_int16(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int16_t *v = pv; -+ slirp_ostream_write_i16(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_int16 = { -+ .name = "int16", -+ .get = get_int16, -+ .put = put_int16, -+}; -+ -+/* 32 bit int */ -+ -+static int get_int32(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int32_t *v = pv; -+ *v = slirp_istream_read_i32(f); -+ return 0; -+} -+ -+static int put_int32(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int32_t *v = pv; -+ slirp_ostream_write_i32(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_int32 = { -+ .name = "int32", -+ .get = get_int32, -+ .put = put_int32, -+}; -+ -+/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate -+ * a temporary buffer and the pre_load/pre_save methods in the child vmsd -+ * copy stuff from the parent into the child and do calculations to fill -+ * in fields that don't really exist in the parent but need to be in the -+ * stream. -+ */ -+static int get_tmp(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int ret; -+ const VMStateDescription *vmsd = field->vmsd; -+ int version_id = field->version_id; -+ void *tmp = g_malloc(size); -+ -+ /* Writes the parent field which is at the start of the tmp */ -+ *(void **)tmp = pv; -+ ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); -+ g_free(tmp); -+ return ret; -+} -+ -+static int put_tmp(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ const VMStateDescription *vmsd = field->vmsd; -+ void *tmp = g_malloc(size); -+ int ret; -+ -+ /* Writes the parent field which is at the start of the tmp */ -+ *(void **)tmp = pv; -+ ret = slirp_vmstate_save_state(f, vmsd, tmp); -+ g_free(tmp); -+ -+ return ret; -+} -+ -+const VMStateInfo slirp_vmstate_info_tmp = { -+ .name = "tmp", -+ .get = get_tmp, -+ .put = put_tmp, -+}; -+ -+/* uint8_t buffers */ -+ -+static int get_buffer(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ slirp_istream_read(f, pv, size); -+ return 0; -+} -+ -+static int put_buffer(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ slirp_ostream_write(f, pv, size); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_buffer = { -+ .name = "buffer", -+ .get = get_buffer, -+ .put = put_buffer, -+}; -+ -+static int vmstate_n_elems(void *opaque, const VMStateField *field) -+{ -+ int n_elems = 1; -+ -+ if (field->flags & VMS_ARRAY) { -+ n_elems = field->num; -+ } else if (field->flags & VMS_VARRAY_INT32) { -+ n_elems = *(int32_t *)(opaque + field->num_offset); -+ } else if (field->flags & VMS_VARRAY_UINT32) { -+ n_elems = *(uint32_t *)(opaque + field->num_offset); -+ } else if (field->flags & VMS_VARRAY_UINT16) { -+ n_elems = *(uint16_t *)(opaque + field->num_offset); -+ } else if (field->flags & VMS_VARRAY_UINT8) { -+ n_elems = *(uint8_t *)(opaque + field->num_offset); -+ } -+ -+ if (field->flags & VMS_MULTIPLY_ELEMENTS) { -+ n_elems *= field->num; -+ } -+ -+ return n_elems; -+} -+ -+static int vmstate_size(void *opaque, const VMStateField *field) -+{ -+ int size = field->size; -+ -+ if (field->flags & VMS_VBUFFER) { -+ size = *(int32_t *)(opaque + field->size_offset); -+ if (field->flags & VMS_MULTIPLY) { -+ size *= field->size; -+ } -+ } -+ -+ return size; -+} -+ -+static int vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, -+ void *opaque, int version_id) -+{ -+ int ret = 0; -+ const VMStateField *field = vmsd->fields; -+ -+ if (vmsd->pre_save) { -+ ret = vmsd->pre_save(opaque); -+ if (ret) { -+ g_warning("pre-save failed: %s", vmsd->name); -+ return ret; -+ } -+ } -+ -+ while (field->name) { -+ if ((field->field_exists && field->field_exists(opaque, version_id)) || -+ (!field->field_exists && field->version_id <= version_id)) { -+ void *first_elem = opaque + field->offset; -+ int i, n_elems = vmstate_n_elems(opaque, field); -+ int size = vmstate_size(opaque, field); -+ -+ if (field->flags & VMS_POINTER) { -+ first_elem = *(void **)first_elem; -+ assert(first_elem || !n_elems || !size); -+ } -+ for (i = 0; i < n_elems; i++) { -+ void *curr_elem = first_elem + size * i; -+ ret = 0; -+ -+ if (field->flags & VMS_ARRAY_OF_POINTER) { -+ assert(curr_elem); -+ curr_elem = *(void **)curr_elem; -+ } -+ if (!curr_elem && size) { -+ /* if null pointer write placeholder and do not follow */ -+ assert(field->flags & VMS_ARRAY_OF_POINTER); -+ ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, -+ NULL); -+ } else if (field->flags & VMS_STRUCT) { -+ ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); -+ } else if (field->flags & VMS_VSTRUCT) { -+ ret = vmstate_save_state_v(f, field->vmsd, curr_elem, -+ field->struct_version_id); -+ } else { -+ ret = field->info->put(f, curr_elem, size, field); -+ } -+ if (ret) { -+ g_warning("Save of field %s/%s failed", vmsd->name, -+ field->name); -+ return ret; -+ } -+ } -+ } else { -+ if (field->flags & VMS_MUST_EXIST) { -+ g_warning("Output state validation failed: %s/%s", vmsd->name, -+ field->name); -+ assert(!(field->flags & VMS_MUST_EXIST)); -+ } -+ } -+ field++; -+ } -+ -+ return 0; -+} -+ -+int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, -+ void *opaque) -+{ -+ return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); -+} -+ -+static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) -+{ -+ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { -+ size_t size = vmstate_size(opaque, field); -+ size *= vmstate_n_elems(opaque, field); -+ if (size) { -+ *(void **)ptr = g_malloc(size); -+ } -+ } -+} -+ -+int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, -+ void *opaque, int version_id) -+{ -+ VMStateField *field = vmsd->fields; -+ int ret = 0; -+ -+ if (version_id > vmsd->version_id) { -+ g_warning("%s: incoming version_id %d is too new " -+ "for local version_id %d", -+ vmsd->name, version_id, vmsd->version_id); -+ return -EINVAL; -+ } -+ if (vmsd->pre_load) { -+ int ret = vmsd->pre_load(opaque); -+ if (ret) { -+ return ret; -+ } -+ } -+ while (field->name) { -+ if ((field->field_exists && field->field_exists(opaque, version_id)) || -+ (!field->field_exists && field->version_id <= version_id)) { -+ void *first_elem = opaque + field->offset; -+ int i, n_elems = vmstate_n_elems(opaque, field); -+ int size = vmstate_size(opaque, field); -+ -+ vmstate_handle_alloc(first_elem, field, opaque); -+ if (field->flags & VMS_POINTER) { -+ first_elem = *(void **)first_elem; -+ assert(first_elem || !n_elems || !size); -+ } -+ for (i = 0; i < n_elems; i++) { -+ void *curr_elem = first_elem + size * i; -+ -+ if (field->flags & VMS_ARRAY_OF_POINTER) { -+ curr_elem = *(void **)curr_elem; -+ } -+ if (!curr_elem && size) { -+ /* if null pointer check placeholder and do not follow */ -+ assert(field->flags & VMS_ARRAY_OF_POINTER); -+ ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, -+ NULL); -+ } else if (field->flags & VMS_STRUCT) { -+ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, -+ field->vmsd->version_id); -+ } else if (field->flags & VMS_VSTRUCT) { -+ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, -+ field->struct_version_id); -+ } else { -+ ret = field->info->get(f, curr_elem, size, field); -+ } -+ if (ret < 0) { -+ g_warning("Failed to load %s:%s", vmsd->name, field->name); -+ return ret; -+ } -+ } -+ } else if (field->flags & VMS_MUST_EXIST) { -+ g_warning("Input validation failed: %s/%s", vmsd->name, -+ field->name); -+ return -1; -+ } -+ field++; -+ } -+ if (vmsd->post_load) { -+ ret = vmsd->post_load(opaque, version_id); -+ } -+ return ret; -+} -diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h -new file mode 100644 -index 0000000..94c6a4b ---- /dev/null -+++ b/slirp/src/vmstate.h -@@ -0,0 +1,391 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * QEMU migration/snapshot declarations -+ * -+ * Copyright (c) 2009-2011 Red Hat, Inc. -+ * -+ * Original author: Juan Quintela -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef VMSTATE_H_ -+#define VMSTATE_H_ -+ -+#include -+#include -+#include -+#include "slirp.h" -+#include "stream.h" -+ -+#define stringify(s) tostring(s) -+#define tostring(s) #s -+ -+typedef struct VMStateInfo VMStateInfo; -+typedef struct VMStateDescription VMStateDescription; -+typedef struct VMStateField VMStateField; -+ -+int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, -+ void *opaque); -+int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, -+ void *opaque, int version_id); -+ -+/* VMStateInfo allows customized migration of objects that don't fit in -+ * any category in VMStateFlags. Additional information is always passed -+ * into get and put in terms of field and vmdesc parameters. However -+ * these two parameters should only be used in cases when customized -+ * handling is needed, such as QTAILQ. For primitive data types such as -+ * integer, field and vmdesc parameters should be ignored inside get/put. -+ */ -+struct VMStateInfo { -+ const char *name; -+ int (*get)(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field); -+ int (*put)(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field); -+}; -+ -+enum VMStateFlags { -+ /* Ignored */ -+ VMS_SINGLE = 0x001, -+ -+ /* The struct member at opaque + VMStateField.offset is a pointer -+ * to the actual field (e.g. struct a { uint8_t *b; -+ * }). Dereference the pointer before using it as basis for -+ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not -+ * affect the meaning of VMStateField.num_offset or -+ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for -+ * those. */ -+ VMS_POINTER = 0x002, -+ -+ /* The field is an array of fixed size. VMStateField.num contains -+ * the number of entries in the array. The size of each entry is -+ * given by VMStateField.size and / or opaque + -+ * VMStateField.size_offset; see VMS_VBUFFER and -+ * VMS_MULTIPLY. Each array entry will be processed individually -+ * (VMStateField.info.get()/put() if VMS_STRUCT is not set, -+ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not -+ * be combined with VMS_VARRAY*. */ -+ VMS_ARRAY = 0x004, -+ -+ /* The field is itself a struct, containing one or more -+ * fields. Recurse into VMStateField.vmsd. Most useful in -+ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each -+ * array entry. */ -+ VMS_STRUCT = 0x008, -+ -+ /* The field is an array of variable size. The int32_t at opaque + -+ * VMStateField.num_offset contains the number of entries in the -+ * array. See the VMS_ARRAY description regarding array handling -+ * in general. May not be combined with VMS_ARRAY or any other -+ * VMS_VARRAY*. */ -+ VMS_VARRAY_INT32 = 0x010, -+ -+ /* Ignored */ -+ VMS_BUFFER = 0x020, -+ -+ /* The field is a (fixed-size or variable-size) array of pointers -+ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry -+ * before using it. Note: Does not imply any one of VMS_ARRAY / -+ * VMS_VARRAY*; these need to be set explicitly. */ -+ VMS_ARRAY_OF_POINTER = 0x040, -+ -+ /* The field is an array of variable size. The uint16_t at opaque -+ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) -+ * contains the number of entries in the array. See the VMS_ARRAY -+ * description regarding array handling in general. May not be -+ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ -+ VMS_VARRAY_UINT16 = 0x080, -+ -+ /* The size of the individual entries (a single array entry if -+ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if -+ * neither is set) is variable (i.e. not known at compile-time), -+ * but the same for all entries. Use the int32_t at opaque + -+ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine -+ * the size of each (and every) entry. */ -+ VMS_VBUFFER = 0x100, -+ -+ /* Multiply the entry size given by the int32_t at opaque + -+ * VMStateField.size_offset (see VMS_VBUFFER description) with -+ * VMStateField.size to determine the number of bytes to be -+ * allocated. Only valid in combination with VMS_VBUFFER. */ -+ VMS_MULTIPLY = 0x200, -+ -+ /* The field is an array of variable size. The uint8_t at opaque + -+ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) -+ * contains the number of entries in the array. See the VMS_ARRAY -+ * description regarding array handling in general. May not be -+ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ -+ VMS_VARRAY_UINT8 = 0x400, -+ -+ /* The field is an array of variable size. The uint32_t at opaque -+ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) -+ * contains the number of entries in the array. See the VMS_ARRAY -+ * description regarding array handling in general. May not be -+ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ -+ VMS_VARRAY_UINT32 = 0x800, -+ -+ /* Fail loading the serialised VM state if this field is missing -+ * from the input. */ -+ VMS_MUST_EXIST = 0x1000, -+ -+ /* When loading serialised VM state, allocate memory for the -+ * (entire) field. Only valid in combination with -+ * VMS_POINTER. Note: Not all combinations with other flags are -+ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't -+ * cause the individual entries to be allocated. */ -+ VMS_ALLOC = 0x2000, -+ -+ /* Multiply the number of entries given by the integer at opaque + -+ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num -+ * to determine the number of entries in the array. Only valid in -+ * combination with one of VMS_VARRAY*. */ -+ VMS_MULTIPLY_ELEMENTS = 0x4000, -+ -+ /* A structure field that is like VMS_STRUCT, but uses -+ * VMStateField.struct_version_id to tell which version of the -+ * structure we are referencing to use. */ -+ VMS_VSTRUCT = 0x8000, -+}; -+ -+struct VMStateField { -+ const char *name; -+ size_t offset; -+ size_t size; -+ size_t start; -+ int num; -+ size_t num_offset; -+ size_t size_offset; -+ const VMStateInfo *info; -+ enum VMStateFlags flags; -+ const VMStateDescription *vmsd; -+ int version_id; -+ int struct_version_id; -+ bool (*field_exists)(void *opaque, int version_id); -+}; -+ -+struct VMStateDescription { -+ const char *name; -+ int version_id; -+ int (*pre_load)(void *opaque); -+ int (*post_load)(void *opaque, int version_id); -+ int (*pre_save)(void *opaque); -+ VMStateField *fields; -+}; -+ -+ -+extern const VMStateInfo slirp_vmstate_info_int16; -+extern const VMStateInfo slirp_vmstate_info_int32; -+extern const VMStateInfo slirp_vmstate_info_uint8; -+extern const VMStateInfo slirp_vmstate_info_uint16; -+extern const VMStateInfo slirp_vmstate_info_uint32; -+ -+/** Put this in the stream when migrating a null pointer.*/ -+#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ -+extern const VMStateInfo slirp_vmstate_info_nullptr; -+ -+extern const VMStateInfo slirp_vmstate_info_buffer; -+extern const VMStateInfo slirp_vmstate_info_tmp; -+ -+#define type_check_array(t1, t2, n) ((t1(*)[n])0 - (t2 *)0) -+#define type_check_pointer(t1, t2) ((t1 **)0 - (t2 *)0) -+#define typeof_field(type, field) typeof(((type *)0)->field) -+#define type_check(t1, t2) ((t1 *)0 - (t2 *)0) -+ -+#define vmstate_offset_value(_state, _field, _type) \ -+ (offsetof(_state, _field) + type_check(_type, typeof_field(_state, _field))) -+ -+#define vmstate_offset_pointer(_state, _field, _type) \ -+ (offsetof(_state, _field) + \ -+ type_check_pointer(_type, typeof_field(_state, _field))) -+ -+#define vmstate_offset_array(_state, _field, _type, _num) \ -+ (offsetof(_state, _field) + \ -+ type_check_array(_type, typeof_field(_state, _field), _num)) -+ -+#define vmstate_offset_buffer(_state, _field) \ -+ vmstate_offset_array(_state, _field, uint8_t, \ -+ sizeof(typeof_field(_state, _field))) -+ -+/* In the macros below, if there is a _version, that means the macro's -+ * field will be processed only if the version being received is >= -+ * the _version specified. In general, if you add a new field, you -+ * would increment the structure's version and put that version -+ * number into the new field so it would only be processed with the -+ * new version. -+ * -+ * In particular, for VMSTATE_STRUCT() and friends the _version does -+ * *NOT* pick the version of the sub-structure. It works just as -+ * specified above. The version of the top-level structure received -+ * is passed down to all sub-structures. This means that the -+ * sub-structures must have version that are compatible with all the -+ * structures that use them. -+ * -+ * If you want to specify the version of the sub-structure, use -+ * VMSTATE_VSTRUCT(), which allows the specific sub-structure version -+ * to be directly specified. -+ */ -+ -+#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .field_exists = (_test), .size = sizeof(_type), .info = &(_info), \ -+ .flags = VMS_SINGLE, \ -+ .offset = vmstate_offset_value(_state, _field, _type), \ -+ } -+ -+#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), .num = (_num), \ -+ .info = &(_info), .size = sizeof(_type), .flags = VMS_ARRAY, \ -+ .offset = vmstate_offset_array(_state, _field, _type, _num), \ -+ } -+ -+#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .field_exists = (_test), .vmsd = &(_vmsd), .size = sizeof(_type), \ -+ .flags = VMS_STRUCT, \ -+ .offset = vmstate_offset_value(_state, _field, _type), \ -+ } -+ -+#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .vmsd = &(_vmsd), .size = sizeof(_type *), \ -+ .flags = VMS_STRUCT | VMS_POINTER, \ -+ .offset = vmstate_offset_pointer(_state, _field, _type), \ -+ } -+ -+#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, \ -+ _vmsd, _type) \ -+ { \ -+ .name = (stringify(_field)), .num = (_num), .field_exists = (_test), \ -+ .version_id = (_version), .vmsd = &(_vmsd), .size = sizeof(_type), \ -+ .flags = VMS_STRUCT | VMS_ARRAY, \ -+ .offset = vmstate_offset_array(_state, _field, _type, _num), \ -+ } -+ -+#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .field_exists = (_test), .size = (_size - _start), \ -+ .info = &slirp_vmstate_info_buffer, .flags = VMS_BUFFER, \ -+ .offset = vmstate_offset_buffer(_state, _field) + _start, \ -+ } -+ -+#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .field_exists = (_test), \ -+ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t), \ -+ .info = &slirp_vmstate_info_buffer, \ -+ .flags = VMS_VBUFFER | VMS_POINTER, \ -+ .offset = offsetof(_state, _field), \ -+ } -+ -+#define QEMU_BUILD_BUG_ON_STRUCT(x) \ -+ struct { \ -+ int : (x) ? -1 : 1; \ -+ } -+ -+#define QEMU_BUILD_BUG_ON_ZERO(x) \ -+ (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) -+ -+/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state -+ * and execute the vmsd on the temporary. Note that we're working with -+ * the whole of _state here, not a field within it. -+ * We compile time check that: -+ * That _tmp_type contains a 'parent' member that's a pointer to the -+ * '_state' type -+ * That the pointer is right at the start of _tmp_type. -+ */ -+#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \ -+ { \ -+ .name = "tmp", \ -+ .size = sizeof(_tmp_type) + \ -+ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ -+ type_check_pointer(_state, typeof_field(_tmp_type, parent)), \ -+ .vmsd = &(_vmsd), .info = &slirp_vmstate_info_tmp, \ -+ } -+ -+#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ -+ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) -+ -+#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ -+ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) -+ -+#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ -+ VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) -+ -+#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ -+ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, _vmsd, \ -+ _type) -+ -+#define VMSTATE_INT16_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) -+#define VMSTATE_INT32_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) -+ -+#define VMSTATE_UINT8_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) -+#define VMSTATE_UINT16_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) -+#define VMSTATE_UINT32_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) -+ -+#define VMSTATE_INT16(_f, _s) VMSTATE_INT16_V(_f, _s, 0) -+#define VMSTATE_INT32(_f, _s) VMSTATE_INT32_V(_f, _s, 0) -+ -+#define VMSTATE_UINT8(_f, _s) VMSTATE_UINT8_V(_f, _s, 0) -+#define VMSTATE_UINT16(_f, _s) VMSTATE_UINT16_V(_f, _s, 0) -+#define VMSTATE_UINT32(_f, _s) VMSTATE_UINT32_V(_f, _s, 0) -+ -+#define VMSTATE_UINT16_TEST(_f, _s, _t) \ -+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) -+ -+#define VMSTATE_UINT32_TEST(_f, _s, _t) \ -+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) -+ -+#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ -+ VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) -+ -+#define VMSTATE_INT16_ARRAY(_f, _s, _n) VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) -+ -+#define VMSTATE_BUFFER_V(_f, _s, _v) \ -+ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) -+ -+#define VMSTATE_BUFFER(_f, _s) VMSTATE_BUFFER_V(_f, _s, 0) -+ -+#define VMSTATE_END_OF_LIST() \ -+ { \ -+ } -+ -+#endif --- -1.8.3.1 - diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index 705f594..cde66a1 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 1285db562351e2233413d163bfef3ed002b10259 Mon Sep 17 00:00:00 2001 +From 4df157781801c50224373be57fa3c8c3741c0535 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: - Use "/share/qemu-kvm" as SHARE_SUFFIX - We reconfigured our share to qemu-kvm to be consistent with used name -This rebase includes changes up to qemu-kvm-4.1.0-14.el8 +This rebase includes changes up to qemu-kvm-4.1.0-18.el8 Rebase notes (3.1.0): - added new configure options @@ -67,10 +67,8 @@ Merged patches (4.1.0): - 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) - e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) -Merged patches (weekly-190830): +Merged patches (4.2.0): - 69e1fb2 enable virgla - -Merged patches (weekly-190906): - d4f6115 enable virgl, for real this time ... Signed-off-by: Danilo C. L. de Paula @@ -79,21 +77,21 @@ Signed-off-by: Danilo C. L. de Paula Makefile | 3 +- configure | 1 + os-posix.c | 2 +- - redhat/Makefile | 82 ++ + redhat/Makefile | 82 + redhat/Makefile.common | 51 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2369 +++++++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2434 +++++++++++++++++++++++++++++ redhat/scripts/process-patches.sh | 7 +- tests/Makefile.include | 2 +- ui/vnc.c | 2 +- - 11 files changed, 2550 insertions(+), 9 deletions(-) + 11 files changed, 2615 insertions(+), 9 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index b437a34..086727d 100644 +index b437a346d7..086727dbb9 100644 --- a/Makefile +++ b/Makefile @@ -512,6 +512,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM @@ -114,7 +112,7 @@ index b437a34..086727d 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index 6099be1..16564f8 100755 +index 6099be1d84..16564f8ccc 100755 --- a/configure +++ b/configure @@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then @@ -126,7 +124,7 @@ index 6099be1..16564f8 100755 # xen probe diff --git a/os-posix.c b/os-posix.c -index 86cffd2..1c9f867 100644 +index 86cffd2c7d..1c9f86768d 100644 --- a/os-posix.c +++ b/os-posix.c @@ -83,7 +83,7 @@ void os_setup_signal_handling(void) @@ -139,7 +137,7 @@ index 86cffd2..1c9f867 100644 char *os_find_datadir(void) { diff --git a/tests/Makefile.include b/tests/Makefile.include -index 8566f5f..b483790 100644 +index 8566f5f119..b483790cf3 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -1194,7 +1194,7 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) @@ -152,7 +150,7 @@ index 8566f5f..b483790 100644 rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) diff --git a/ui/vnc.c b/ui/vnc.c -index 87b8045..ecf6276 100644 +index 87b8045afe..ecf6276f5b 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -3987,7 +3987,7 @@ void vnc_display_open(const char *id, Error **errp) @@ -165,5 +163,5 @@ index 87b8045..ecf6276 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -1.8.3.1 +2.21.0 diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index 465f9cd..b14bb1b 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From e75b1c31b76463b4b8f30cb6dbd23ded014e9269 Mon Sep 17 00:00:00 2001 +From 67511676246cce57becbd2dcf5abccf08d9ef737 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -54,25 +54,25 @@ Merged patches (4.1.0): - 495a27d x86_64-rh-devices: add missing TPM passthrough - e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) -Merged patches (weekly-190823): +Merged patches (4.2.0): - f7587dd RHEL: disable hostmem-memfd Signed-off-by: Danilo C. L. de Paula --- Makefile.objs | 4 +- backends/Makefile.objs | 3 +- - default-configs/aarch64-rh-devices.mak | 20 +++++++ - default-configs/aarch64-softmmu.mak | 10 ++-- - default-configs/ppc64-rh-devices.mak | 32 +++++++++++ - default-configs/ppc64-softmmu.mak | 8 ++- - default-configs/rh-virtio.mak | 10 ++++ - default-configs/s390x-rh-devices.mak | 15 +++++ + default-configs/aarch64-rh-devices.mak | 20 +++++ + default-configs/aarch64-softmmu.mak | 10 ++- + default-configs/ppc64-rh-devices.mak | 32 ++++++++ + default-configs/ppc64-softmmu.mak | 8 +- + default-configs/rh-virtio.mak | 10 +++ + default-configs/s390x-rh-devices.mak | 15 ++++ default-configs/s390x-softmmu.mak | 4 +- - default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++++++++++ + default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ default-configs/x86_64-softmmu.mak | 4 +- hw/acpi/ich9.c | 4 +- hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 10 ++++ + hw/block/fdc.c | 10 +++ hw/bt/Makefile.objs | 4 +- hw/cpu/Makefile.objs | 5 +- hw/display/Makefile.objs | 5 +- @@ -80,20 +80,20 @@ Signed-off-by: Danilo C. L. de Paula hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + - hw/pci-host/i440fx.c | 4 ++ + hw/pci-host/i440fx.c | 4 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/Makefile.objs | 4 +- hw/vfio/pci-quirks.c | 9 +++ hw/vfio/pci.c | 5 ++ - qemu-options.hx | 7 +-- + qemu-options.hx | 7 +- redhat/qemu-kvm.spec.template | 5 +- target/arm/cpu.c | 4 +- - target/i386/cpu.c | 35 +++++++++--- - target/ppc/cpu-models.c | 10 ++++ + target/i386/cpu.c | 35 +++++++-- + target/ppc/cpu-models.c | 10 +++ target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 +++ + target/s390x/kvm.c | 8 ++ util/memfd.c | 2 +- - vl.c | 8 ++- + vl.c | 8 +- 35 files changed, 317 insertions(+), 41 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak @@ -102,7 +102,7 @@ Signed-off-by: Danilo C. L. de Paula create mode 100644 default-configs/x86_64-rh-devices.mak diff --git a/Makefile.objs b/Makefile.objs -index 11ba1a3..fcf63e1 100644 +index 11ba1a36bd..fcf63e1096 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -65,8 +65,8 @@ common-obj-y += replay/ @@ -117,7 +117,7 @@ index 11ba1a3..fcf63e1 100644 common-obj-y += dma-helpers.o common-obj-y += vl.o diff --git a/backends/Makefile.objs b/backends/Makefile.objs -index f069111..f328d40 100644 +index f0691116e8..f328d404bf 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs @@ -16,4 +16,5 @@ endif @@ -129,7 +129,7 @@ index f069111..f328d40 100644 +# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 -index 0000000..a1ed641 +index 0000000000..a1ed641174 --- /dev/null +++ b/default-configs/aarch64-rh-devices.mak @@ -0,0 +1,20 @@ @@ -154,7 +154,7 @@ index 0000000..a1ed641 +CONFIG_VIRTIO_PCI=y +CONFIG_XIO3130=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 958b1e0..8f6867d 100644 +index 958b1e08e4..8f6867d48a 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak @@ -1,8 +1,10 @@ @@ -174,7 +174,7 @@ index 958b1e0..8f6867d 100644 +include aarch64-rh-devices.mak diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak new file mode 100644 -index 0000000..35f2106 +index 0000000000..35f2106d06 --- /dev/null +++ b/default-configs/ppc64-rh-devices.mak @@ -0,0 +1,32 @@ @@ -211,7 +211,7 @@ index 0000000..35f2106 +CONFIG_XIVE_SPAPR=y +CONFIG_XIVE_KVM=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index cca5266..fec354f 100644 +index cca52665d9..fec354f327 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -1,10 +1,12 @@ @@ -232,7 +232,7 @@ index cca5266..fec354f 100644 +include ppc64-rh-devices.mak diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak new file mode 100644 -index 0000000..94ede1b +index 0000000000..94ede1b5f6 --- /dev/null +++ b/default-configs/rh-virtio.mak @@ -0,0 +1,10 @@ @@ -248,7 +248,7 @@ index 0000000..94ede1b +CONFIG_VIRTIO_SERIAL=y diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak new file mode 100644 -index 0000000..c3c73fe +index 0000000000..c3c73fe752 --- /dev/null +++ b/default-configs/s390x-rh-devices.mak @@ -0,0 +1,15 @@ @@ -268,7 +268,7 @@ index 0000000..c3c73fe +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index f2287a1..3e2e388 100644 +index f2287a133f..3e2e388e91 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -10,4 +10,6 @@ @@ -281,7 +281,7 @@ index f2287a1..3e2e388 100644 +include s390x-rh-devices.mak diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak new file mode 100644 -index 0000000..d59b6d9 +index 0000000000..d59b6d9bb5 --- /dev/null +++ b/default-configs/x86_64-rh-devices.mak @@ -0,0 +1,100 @@ @@ -386,7 +386,7 @@ index 0000000..d59b6d9 +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 64b2ee2..b5de7e5 100644 +index 64b2ee2960..b5de7e5279 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -1,3 +1,5 @@ @@ -397,7 +397,7 @@ index 64b2ee2..b5de7e5 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 2034dd7..ab203ad 100644 +index 2034dd749e..ab203ad448 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -449,8 +449,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) @@ -412,7 +412,7 @@ index 2034dd7..ab203ad 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index fe749f6..2aa1a9e 100644 +index fe749f65fd..2aa1a9efdd 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o @@ -425,7 +425,7 @@ index fe749f6..2aa1a9e 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index ac5d31e..e925bac 100644 +index ac5d31e8c1..e925bac002 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -46,6 +46,8 @@ @@ -453,7 +453,7 @@ index ac5d31e..e925bac 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); } diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs -index 867a7d2..e678e9e 100644 +index 867a7d2e8a..e678e9ee3c 100644 --- a/hw/bt/Makefile.objs +++ b/hw/bt/Makefile.objs @@ -1,3 +1,3 @@ @@ -463,7 +463,7 @@ index 867a7d2..e678e9e 100644 +#common-obj-y += hci-csr.o diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs -index 8db9e8a..1601ea9 100644 +index 8db9e8a7b3..1601ea93c7 100644 --- a/hw/cpu/Makefile.objs +++ b/hw/cpu/Makefile.objs @@ -1,5 +1,6 @@ @@ -476,7 +476,7 @@ index 8db9e8a..1601ea9 100644 +common-obj-y += core.o +# cluster.o diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index f2182e3..3d0cda1 100644 +index f2182e3bef..3d0cda1b52 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs @@ -1,8 +1,9 @@ @@ -492,7 +492,7 @@ index f2182e3..3d0cda1 100644 common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index cd283e5..93afa26 100644 +index cd283e53b4..93afa26fda 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) @@ -506,7 +506,7 @@ index cd283e5..93afa26 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index db313dd..e14858c 100644 +index db313dd3b1..e14858ca64 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -251,7 +251,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) @@ -529,7 +529,7 @@ index db313dd..e14858c 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index f0acfd8..390eb65 100644 +index f0acfd86f7..390eb6579c 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -571,6 +571,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) @@ -542,7 +542,7 @@ index f0acfd8..390eb65 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a73f8d4..fc73fdd 100644 +index a73f8d404e..fc73fdd6fa 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1795,6 +1795,7 @@ static const E1000Info e1000_devices[] = { @@ -562,7 +562,7 @@ index a73f8d4..fc73fdd 100644 static void e1000_register_types(void) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c -index f271311..17f10ef 100644 +index f27131102d..17f10efae2 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -386,6 +386,7 @@ static const TypeInfo i440fx_info = { @@ -592,7 +592,7 @@ index f271311..17f10ef 100644 } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 8339c4c..301cd7b 100644 +index 8339c4c0f8..301cd7b4e4 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -403,10 +403,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { @@ -609,7 +609,7 @@ index 8339c4c..301cd7b 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs -index 303ac08..700a918 100644 +index 303ac084a0..700a91886e 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs @@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_BLUETOOTH) += dev-bluetooth.o @@ -624,7 +624,7 @@ index 303ac08..700a918 100644 smartcard.mo-libs := $(SMARTCARD_LIBS) endif diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 136f3a9..4505ffe 100644 +index 136f3a9ad6..4505ffe48a 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1166,6 +1166,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) @@ -689,7 +689,7 @@ index 136f3a9..4505ffe 100644 /* diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 2d40b39..c8534d3 100644 +index 2d40b396f2..c8534d3035 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3220,6 +3220,7 @@ static const TypeInfo vfio_pci_dev_info = { @@ -717,7 +717,7 @@ index 2d40b39..c8534d3 100644 type_init(register_vfio_pci_dev_type) diff --git a/qemu-options.hx b/qemu-options.hx -index 65c9473..fc17aca 100644 +index 65c9473b73..fc17aca631 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2111,11 +2111,6 @@ ETEXI @@ -749,7 +749,7 @@ index 65c9473..fc17aca 100644 #ifdef CONFIG_TPM DEFHEADING(TPM device options:) diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 7a4ac93..3788fc3 100644 +index 7a4ac9339b..3788fc3c4a 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2744,7 +2744,9 @@ static void arm_cpu_register_types(void) @@ -764,7 +764,7 @@ index 7a4ac93..3788fc3 100644 } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 69f518a..1b7880a 100644 +index 69f518a21a..1b7880ae3a 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1835,14 +1835,14 @@ static X86CPUDefinition builtin_x86_defs[] = { @@ -790,10 +790,11 @@ index 69f518a..1b7880a 100644 .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, -@@ -2129,6 +2129,25 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -2128,6 +2128,25 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x80000008, .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", }, - { ++ { + .name = "cpu64-rhel6", + .level = 4, + .vendor = CPUID_VENDOR_AMD, @@ -812,12 +813,11 @@ index 69f518a..1b7880a 100644 + .xlevel = 0x8000000A, + .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", + }, -+ { + { .name = "Conroe", .level = 10, - .vendor = CPUID_VENDOR_INTEL, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 086548e..1bbf378 100644 +index 086548e9b9..1bbf378c18 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -887,7 +887,7 @@ index 086548e..1bbf378 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 7e92fb2..be71822 100644 +index 7e92fb2e15..be718220d7 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -901,7 +901,7 @@ index 7e92fb2..be71822 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 0c9d14b..a02d569 100644 +index 0c9d14b4b1..a02d569537 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2387,6 +2387,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) @@ -920,7 +920,7 @@ index 0c9d14b..a02d569 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/util/memfd.c b/util/memfd.c -index 4a3c07e..3303ec9 100644 +index 4a3c07e0be..3303ec9da4 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) @@ -933,7 +933,7 @@ index 4a3c07e..3303ec9 100644 if (mfd >= 0) { diff --git a/vl.c b/vl.c -index 6a65a64..668a345 100644 +index 6a65a64bfd..668a34577e 100644 --- a/vl.c +++ b/vl.c @@ -166,7 +166,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; @@ -990,5 +990,5 @@ index 6a65a64..668a345 100644 if (!xen_enabled()) { /* On 32-bit hosts, QEMU is limited by virtual address space */ -- -1.8.3.1 +2.21.0 diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index acea7d3..4ae3966 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From c47eea7192e103a6cc57cd7d07421b8e684d3db5 Mon Sep 17 00:00:00 2001 +From 113078b23a4747b07eb363719d7cbc0af403dd2a Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -28,34 +28,35 @@ Merged patches (4.1.0): - 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments - f19738e compat: Generic hw_compat_rhel_8_0 -Merged patches (weekly-190823): +Merged patches (4.2.0): - 9f2bfaa machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 - ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional +- compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) Signed-off-by: Danilo C. L. de Paula --- - hw/acpi/ich9.c | 16 +++++ + hw/acpi/ich9.c | 16 ++++ hw/acpi/piix4.c | 5 +- - hw/char/serial.c | 16 +++++ - hw/core/machine.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++ + hw/char/serial.c | 16 ++++ + hw/core/machine.c | 170 ++++++++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- - hw/net/e1000e.c | 21 +++++++ + hw/net/e1000e.c | 21 +++++ hw/net/rtl8139.c | 4 +- hw/rtc/mc146818rtc.c | 6 ++ hw/smbios/smbios.c | 1 + hw/timer/i8254_common.c | 2 +- hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 ++++++ + hw/usb/hcd-xhci.c | 20 +++++ hw/usb/hcd-xhci.h | 2 + include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 21 +++++++ - include/hw/usb.h | 4 ++ + include/hw/boards.h | 24 ++++++ + include/hw/usb.h | 4 + migration/migration.c | 2 + migration/migration.h | 5 ++ - 18 files changed, 289 insertions(+), 6 deletions(-) + 18 files changed, 301 insertions(+), 6 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index ab203ad..7ec2688 100644 +index ab203ad448..7ec26884e8 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -444,6 +444,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -89,7 +90,7 @@ index ab203ad..7ec2688 100644 ich9_pm_get_disable_s3, ich9_pm_set_disable_s3, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 93aec2d..3a26193 100644 +index 93aec2dd2c..3a26193cbe 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -274,6 +274,7 @@ static const VMStateDescription vmstate_acpi = { @@ -112,7 +113,7 @@ index 93aec2d..3a26193 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), diff --git a/hw/char/serial.c b/hw/char/serial.c -index b4aa250..0012f0e 100644 +index b4aa250950..0012f0e44d 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -34,6 +34,7 @@ @@ -167,13 +168,22 @@ index b4aa250..0012f0e 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 1689ad3..2b130bb 100644 +index 1689ad3bf8..e0e0eec8bf 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -27,6 +27,167 @@ +@@ -27,6 +27,176 @@ #include "hw/pci/pci.h" #include "hw/mem/nvdimm.h" ++/* ++ * The same as hw_compat_4_1 ++ */ ++GlobalProperty hw_compat_rhel_8_1[] = { ++ /* hw_compat_rhel_8_1 from hw_compat_4_1 */ ++ { "virtio-pci", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_8_1_len = G_N_ELEMENTS(hw_compat_rhel_8_1); ++ +/* The same as hw_compat_3_1 + * format of array has been changed by: + * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") @@ -339,7 +349,7 @@ index 1689ad3..2b130bb 100644 { "virtio-pci", "x-pcie-flr-init", "off" }, }; diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 873e5e9..d1a2efe 100644 +index 873e5e9706..d1a2efe47e 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) @@ -352,7 +362,7 @@ index 873e5e9..d1a2efe 100644 }; diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index b69fd7d..d8be50a 100644 +index b69fd7d8ad..d8be50a1ce 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -79,6 +79,11 @@ typedef struct E1000EState { @@ -426,7 +436,7 @@ index b69fd7d..d8be50a 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 88a97d7..21d80e9 100644 +index 88a97d756d..21d80e96cf 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3177,7 +3177,7 @@ static int rtl8139_pre_save(void *opaque) @@ -449,7 +459,7 @@ index 88a97d7..21d80e9 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 74ae74b..7382051 100644 +index 74ae74bc5c..73820517df 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -42,6 +42,7 @@ @@ -473,7 +483,7 @@ index 74ae74b..7382051 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 11d476c..e6e9355 100644 +index 11d476c4a2..e6e9355384 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -777,6 +777,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, @@ -485,7 +495,7 @@ index 11d476c..e6e9355 100644 SMBIOS_SET_DEFAULT(type2.product, product); SMBIOS_SET_DEFAULT(type2.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b..32935da 100644 +index 050875b497..32935da46c 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { @@ -498,7 +508,7 @@ index 050875b..32935da 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 23507ad..9fd87a7 100644 +index 23507ad3b5..9fd87a7ad9 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) @@ -518,7 +528,7 @@ index 23507ad..9fd87a7 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 80988bb..8fed2ee 100644 +index 80988bb305..8fed2eedd6 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3590,9 +3590,27 @@ static const VMStateDescription vmstate_xhci_slot = { @@ -559,7 +569,7 @@ index 80988bb..8fed2ee 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 2fad4df..f554b67 100644 +index 2fad4df2a7..f554b671e3 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -157,6 +157,8 @@ typedef struct XHCIEvent { @@ -572,7 +582,7 @@ index 2fad4df..f554b67 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 41568d1..1a23ccc 100644 +index 41568d1837..1a23ccc412 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -586,13 +596,16 @@ index 41568d1..1a23ccc 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index de45087..cc11116 100644 +index de45087f34..6f85a0e032 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -377,4 +377,25 @@ extern const size_t hw_compat_2_2_len; +@@ -377,4 +377,28 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_8_1[]; ++extern const size_t hw_compat_rhel_8_1_len; ++ +extern GlobalProperty hw_compat_rhel_8_0[]; +extern const size_t hw_compat_rhel_8_0_len; + @@ -616,7 +629,7 @@ index de45087..cc11116 100644 + #endif diff --git a/include/hw/usb.h b/include/hw/usb.h -index c24d968..b353438 100644 +index c24d968a19..b353438ea0 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -605,4 +605,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, @@ -629,7 +642,7 @@ index c24d968..b353438 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 354ad07..30c53c6 100644 +index 354ad072fa..30c53c623b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -121,6 +121,8 @@ enum mig_rp_message_type { @@ -642,7 +655,7 @@ index 354ad07..30c53c6 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 79b3dda..0b1b0d4 100644 +index 79b3dda146..0b1b0d4df5 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -335,6 +335,11 @@ void init_dirty_bitmap_incoming_migration(void); @@ -658,5 +671,5 @@ index 79b3dda..0b1b0d4 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -1.8.3.1 +2.21.0 diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 5691082..5397c8b 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From e9fbad8d9e530189a029533f738bac62559e4d52 Mon Sep 17 00:00:00 2001 +From 49164264d9928f73961acbbe4d56d8dfa23d8099 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -34,12 +34,12 @@ Merged patches (4.1.0): Signed-off-by: Danilo C. L. de Paula --- - hw/arm/virt.c | 161 +++++++++++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 11 ++++ + hw/arm/virt.c | 161 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 11 +++ 2 files changed, 171 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d4bedc2..e108391 100644 +index d4bedc2607..e10839100e 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -72,6 +72,7 @@ @@ -243,7 +243,7 @@ index d4bedc2..e108391 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 0b41083..53fdf16 100644 +index 0b41083e9d..53fdf16563 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -142,6 +142,7 @@ typedef struct { @@ -272,5 +272,5 @@ index 0b41083..53fdf16 100644 /* Return the number of used redistributor regions */ -- -1.8.3.1 +2.21.0 diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index ba4e0fb..a3f1a54 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 4a62a0bf849ff8f533d3fc5bd3faec6bd09cf562 Mon Sep 17 00:00:00 2001 +From 136eae41007e2e5b0d693cc656f3ec36cbabf16f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -26,20 +26,22 @@ Merged patches (4.1.0): - 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 - 89f01da redhat: define pseries-rhel8.1.0 machine type -Merged patches (weekly-190830): +Merged patches (4.2.0): - bcba728 redhat: update pseries-rhel8.1.0 machine type +- redhat: update pseries-rhel-7.6.0 machine type (patch 93039) +- redhat: define pseries-rhel8.2.0 machine type (patch 93041) Signed-off-by: Danilo C. L. de Paula --- - hw/ppc/spapr.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++++ - hw/ppc/spapr_cpu_core.c | 13 +++ + hw/ppc/spapr.c | 278 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 ++ include/hw/ppc/spapr.h | 1 + - target/ppc/compat.c | 13 ++- + target/ppc/compat.c | 13 +- target/ppc/cpu.h | 1 + - 5 files changed, 278 insertions(+), 1 deletion(-) + 5 files changed, 305 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index e076f60..f3652ed 100644 +index e076f6023c..8749c72066 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) @@ -90,22 +92,48 @@ index e076f60..f3652ed 100644 static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4863,6 +4869,251 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4863,6 +4869,278 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); +#endif + +/* -+ * pseries-rhel8.1.0 ++ * pseries-rhel8.2.0 + */ + -+static void spapr_machine_rhel810_class_options(MachineClass *mc) ++static void spapr_machine_rhel820_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + -+DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", true); ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", true); ++ ++/* ++ * pseries-rhel8.1.0 ++ * like pseries-4.1 ++ */ ++ ++static void spapr_machine_rhel810_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ /* Only allow 4kiB and 64kiB IOMMU pagesizes */ ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, ++ }; ++ ++ spapr_machine_rhel820_class_options(mc); ++ ++ /* from pseries-4.1 */ ++ smc->linux_pci_probe = false; ++ smc->smp_threads_vsmt = false; ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, ++ hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); + +/* + * pseries-rhel8.0.0 @@ -159,6 +187,7 @@ index e076f60..f3652ed 100644 + + /* from spapr_machine_3_0_class_options() */ + smc->legacy_irq_allocation = true; ++ smc->nr_xirqs = 0x400; + smc->irq = &spapr_irq_xics_legacy; + + /* from spapr_machine_2_12_class_options() */ @@ -343,7 +372,7 @@ index e076f60..f3652ed 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 301cd7b..ba5a8fb 100644 +index 301cd7b4e4..ba5a8fb82b 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -381,7 +410,7 @@ index 301cd7b..ba5a8fb 100644 goto error_intc_create; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index d5ab5ea..aa89cc4 100644 +index d5ab5ea7b2..aa89cc4a95 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -125,6 +125,7 @@ struct SpaprMachineClass { @@ -393,7 +422,7 @@ index d5ab5ea..aa89cc4 100644 uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7de4bf3..3e2e353 100644 +index 7de4bf3122..3e2e35342d 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -418,7 +447,7 @@ index 7de4bf3..3e2e353 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index e3e8232..5c53801 100644 +index e3e82327b7..5c53801cfd 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1367,6 +1367,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) @@ -430,5 +459,5 @@ index e3e8232..5c53801 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -- -1.8.3.1 +2.21.0 diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index 9fb964a..d0f6669 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 88abdedad4c594c86eb2b92d490b676fa7494d6c Mon Sep 17 00:00:00 2001 +From 0842700b3a01891c316e9169fa651f26714cafa5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -7,7 +7,7 @@ Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina -Rebase changes (weekly-190111): +Rebase changes (weekly-4.1.0): - Use upstream compat handling Merged patches (3.1.0): @@ -16,17 +16,18 @@ Merged patches (3.1.0): Merged patches (4.1.0): - 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 -Merged patches (weekly-190830): +Merged patches (4.2.0): - fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 - a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine +- hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 80 +++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 79 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 70 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index d3edeef..7bca634 100644 +index d3edeef0ad..c2c83d2fce 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -615,7 +615,7 @@ bool css_migration_enabled(void) @@ -46,44 +47,26 @@ index d3edeef..7bca634 100644 static void ccw_machine_4_2_instance_options(MachineState *machine) { } -@@ -866,6 +867,83 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -866,6 +867,73 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif -+/* -+ * like CCW_COMPAT_2_12 + CCW_COMPAT_3_0 (which are empty), but includes -+ * HW_COMPAT_RHEL7_6 instead of HW_COMPAT_2_11 and HW_COMPAT_3_0 -+ */ + -+GlobalProperty ccw_compat_rhel_7_6[] = -+{ -+}; -+const size_t ccw_compat_rhel_7_6_len = G_N_ELEMENTS(ccw_compat_rhel_7_6); -+ -+GlobalProperty ccw_compat_rhel_7_5[] = { -+ { -+ .driver = TYPE_SCLP_EVENT_FACILITY, -+ .property = "allow_all_mask_sizes", -+ .value = "off", -+ }, -+}; -+const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); -+ -+static void ccw_machine_rhel810_instance_options(MachineState *machine) ++static void ccw_machine_rhel820_instance_options(MachineState *machine) +{ +} + -+static void ccw_machine_rhel810_class_options(MachineClass *mc) ++static void ccw_machine_rhel820_class_options(MachineClass *mc) +{ +} -+DEFINE_CCW_MACHINE(rhel810, "rhel8.1.0", true); ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", true); + +static void ccw_machine_rhel760_instance_options(MachineState *machine) +{ + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; + -+ ccw_machine_rhel810_instance_options(machine); ++ ccw_machine_rhel820_instance_options(machine); + + s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); + @@ -97,11 +80,10 @@ index d3edeef..7bca634 100644 + +static void ccw_machine_rhel760_class_options(MachineClass *mc) +{ -+ ccw_machine_rhel810_class_options(mc); -+ /* We never published the s390x version of RHEL8.0 AV, so add this here */ ++ ccw_machine_rhel820_class_options(mc); ++ /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ + compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); + compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); -+ compat_props_add(mc->compat_props, ccw_compat_rhel_7_6, ccw_compat_rhel_7_6_len); +} +DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); + @@ -119,6 +101,15 @@ index d3edeef..7bca634 100644 + s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); +} + ++GlobalProperty ccw_compat_rhel_7_5[] = { ++ { ++ .driver = TYPE_SCLP_EVENT_FACILITY, ++ .property = "allow_all_mask_sizes", ++ .value = "off", ++ }, ++}; ++const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); ++ +static void ccw_machine_rhel750_class_options(MachineClass *mc) +{ + ccw_machine_rhel760_class_options(mc); @@ -131,5 +122,5 @@ index d3edeef..7bca634 100644 static void ccw_machine_register_types(void) { -- -1.8.3.1 +2.21.0 diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index d7af8fd..72a5159 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From aef82bbd0ad99c43bc0b97932cf87fd16303bf5e Mon Sep 17 00:00:00 2001 +From 2ebaeca6e26950f401a8169d1324be2bafd11741 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -29,27 +29,28 @@ Merged patches (4.1.0): - 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) - 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types -Merged patches (weekly-190823): +Merged patches (4.2.0): - 7d5c2ef pc: Don't make die-id mandatory unless necessary - e42808c x86 machine types: pc_rhel_8_0_compat - 9de83a8 x86 machine types: q35: Fixup units_per_default_bus - 6df1559 x86 machine types: Fixup dynamic sysbus entries - 0784125 x86 machine types: add pc-q35-rhel8.1.0 +- machines/x86: Add rhel 8.2 machine type (patch 92959) Signed-off-by: Danilo C. L. de Paula --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 259 ++++++++++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 208 ++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 139 ++++++++++++++++++++++++++- + hw/i386/pc.c | 263 ++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 156 ++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 30 ++++++ + include/hw/i386/pc.h | 33 ++++++ target/i386/cpu.c | 9 +- target/i386/kvm.c | 4 + - 8 files changed, 647 insertions(+), 7 deletions(-) + 8 files changed, 673 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 12ff55f..6400189 100644 +index 12ff55fcfb..64001893ab 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -204,6 +204,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) @@ -63,10 +64,10 @@ index 12ff55f..6400189 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index ac08e63..28850ae 100644 +index ac08e63604..61e70e4811 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -344,6 +344,257 @@ GlobalProperty pc_compat_1_4[] = { +@@ -344,6 +344,261 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -82,6 +83,10 @@ index ac08e63..28850ae 100644 +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ ++GlobalProperty pc_rhel_8_1_compat[] = { }; ++const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); ++ +GlobalProperty pc_rhel_8_0_compat[] = { + /* pc_rhel_8_0_compat from pc_compat_3_1 */ + { "intel-iommu", "dma-drain", "off" }, @@ -324,7 +329,7 @@ index ac08e63..28850ae 100644 void gsi_handler(void *opaque, int n, int level) { GSIState *s = opaque; -@@ -1225,7 +1476,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -1225,7 +1480,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -334,7 +339,7 @@ index ac08e63..28850ae 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -2198,6 +2450,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -2198,6 +2454,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); @@ -343,7 +348,7 @@ index ac08e63..28850ae 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -2209,7 +2463,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -2209,7 +2467,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -354,7 +359,7 @@ index ac08e63..28850ae 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 1bd70d1..7f2ee97 100644 +index 1bd70d1abb..bd7fdb99bb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -53,6 +53,7 @@ @@ -384,7 +389,7 @@ index 1bd70d1..7f2ee97 100644 static void pc_compat_2_3_fn(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); -@@ -1026,3 +1028,205 @@ static void xenfv_machine_options(MachineClass *m) +@@ -1026,3 +1028,207 @@ static void xenfv_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, xenfv_machine_options); #endif @@ -422,6 +427,8 @@ index 1bd70d1..7f2ee97 100644 + m->smbus_no_migration_support = true; + pcmc->pvh_enabled = false; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); + compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); @@ -591,7 +598,7 @@ index 1bd70d1..7f2ee97 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 385e5cf..4598db2 100644 +index 385e5cffb1..7531d8ed76 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) @@ -613,7 +620,7 @@ index 385e5cf..4598db2 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -533,3 +534,137 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -533,3 +534,154 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -641,6 +648,20 @@ index 385e5cf..4598db2 100644 + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel820(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel820_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, ++ pc_q35_machine_rhel820_options); ++ +static void pc_q35_init_rhel810(MachineState *machine) +{ + pc_q35_init(machine); @@ -648,8 +669,11 @@ index 385e5cf..4598db2 100644 + +static void pc_q35_machine_rhel810_options(MachineClass *m) +{ -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel820_options(m); + m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, @@ -752,7 +776,7 @@ index 385e5cf..4598db2 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index cc11116..3cc126f 100644 +index 6f85a0e032..2920bdef5b 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -222,6 +222,8 @@ struct MachineClass { @@ -765,7 +789,7 @@ index cc11116..3cc126f 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 1f86eba..dd680ae 100644 +index 1f86eba3f9..2e362c8faa 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -124,6 +124,9 @@ typedef struct PCMachineClass { @@ -778,13 +802,16 @@ index 1f86eba..dd680ae 100644 } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -300,6 +303,33 @@ extern const size_t pc_compat_1_5_len; +@@ -300,6 +303,36 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_1_compat[]; ++extern const size_t pc_rhel_8_1_compat_len; ++ +extern GlobalProperty pc_rhel_8_0_compat[]; +extern const size_t pc_rhel_8_0_compat_len; + @@ -813,7 +840,7 @@ index 1f86eba..dd680ae 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 1b7880a..790db77 100644 +index 1b7880ae3a..790db778ab 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1829,11 +1829,17 @@ static CPUCaches epyc_cache_info = { @@ -844,10 +871,10 @@ index 1b7880a..790db77 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index bf16556..1b19797 100644 +index 1d10046a6c..86d9a1f364 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -3071,6 +3071,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3079,6 +3079,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -855,7 +882,7 @@ index bf16556..1b19797 100644 kvm_msr_buf_reset(cpu); -@@ -3380,6 +3381,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3388,6 +3389,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -866,5 +893,5 @@ index bf16556..1b19797 100644 case MSR_KVM_PV_EOI_EN: env->pv_eoi_en_msr = msrs[i].data; -- -1.8.3.1 +2.21.0 diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index c56a715..09f7b4e 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 88d09fe5c46c80214f883bd097ca86a99ca1ca41 Mon Sep 17 00:00:00 2001 +From 154215041df085271a780a2989f4f481226e3e34 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -44,7 +44,7 @@ Signed-off-by: Danilo C. L. de Paula 11 files changed, 37 insertions(+), 17 deletions(-) diff --git a/tests/Makefile.include b/tests/Makefile.include -index b483790..53bdbdf 100644 +index b483790cf3..53bdbdfee0 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -172,7 +172,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) @@ -86,7 +86,7 @@ index b483790..53bdbdf 100644 check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) check-qtest-s390x-y += tests/migration-test$(EXESUF) diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index d3a54a0..33ce72b 100644 +index d3a54a0ba5..33ce72b89c 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -108,19 +108,23 @@ static testdef_t tests[] = { @@ -115,7 +115,7 @@ index d3a54a0..33ce72b 100644 { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 30e514b..a04beae 100644 +index 30e514bbfb..a04beae1c6 100644 --- a/tests/cpu-plug-test.c +++ b/tests/cpu-plug-test.c @@ -185,8 +185,8 @@ static void add_pseries_test_case(const char *mname) @@ -130,7 +130,7 @@ index 30e514b..a04beae 100644 } data = g_new(PlugTestData, 1); diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index c387984..c89112d 100644 +index c387984ef6..c89112d6f8 100644 --- a/tests/e1000-test.c +++ b/tests/e1000-test.c @@ -22,9 +22,11 @@ struct QE1000 { @@ -146,7 +146,7 @@ index c387984..c89112d 100644 static void *e1000_get_driver(void *obj, const char *interface) diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c -index 7e86c54..cc068ba 100644 +index 7e86c5416c..cc068bad87 100644 --- a/tests/hd-geo-test.c +++ b/tests/hd-geo-test.c @@ -732,6 +732,7 @@ static void test_override_ide(void) @@ -178,7 +178,7 @@ index 7e86c54..cc068ba 100644 qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); qtest_add_func("hd-geo/override/scsi_hot_unplug", diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 61bc1d1..028d45c 100644 +index 61bc1d1e7b..028d45c7d7 100644 --- a/tests/prom-env-test.c +++ b/tests/prom-env-test.c @@ -88,10 +88,14 @@ int main(int argc, char *argv[]) @@ -197,7 +197,7 @@ index 61bc1d1..028d45c 100644 add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 53bcdbc..b387e0c 100755 +index 53bcdbc911..b387e0c233 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -181,11 +181,11 @@ run_qemu -drive if=virtio @@ -231,7 +231,7 @@ index 53bcdbc..b387e0c 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 6b10a6a..06cc734 100644 +index 6b10a6a762..06cc734b26 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ @@ -253,7 +253,7 @@ index 6b10a6a..06cc734 100644 101 rw quick 102 rw quick diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index 772287b..e7c075e 100644 +index 772287bdb4..e7c075ed98 100644 --- a/tests/test-x86-cpuid-compat.c +++ b/tests/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -273,7 +273,7 @@ index 772287b..e7c075e 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 10ef9d2..3855873 100644 +index 10ef9d2a91..3855873050 100644 --- a/tests/usb-hcd-xhci-test.c +++ b/tests/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) @@ -303,5 +303,5 @@ index 10ef9d2..3855873 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -1.8.3.1 +2.21.0 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index ed8007a..db776c4 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 58e199b5cb9364f46b68d8cf618dff7564d427bb Mon Sep 17 00:00:00 2001 +From de433da59448eaad4ac1b902d07d57b57f922aff Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -42,7 +42,7 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index c8534d3..309535f 100644 +index c8534d3035..309535f306 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -47,6 +47,9 @@ @@ -98,7 +98,7 @@ index c8534d3..309535f 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 35626cd..0cd4803 100644 +index 35626cd63e..0cd4803aee 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { @@ -110,5 +110,5 @@ index 35626cd..0cd4803 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -1.8.3.1 +2.21.0 diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index d52f32e..cb77bfe 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 73edca2e4885b3f2b65edb08b6cdb53d68494c13 Mon Sep 17 00:00:00 2001 +From 2754dd8da8975757753fd491985d5e7b36966106 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -24,7 +24,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/vl.c b/vl.c -index 668a345..9f3e7e7 100644 +index 668a34577e..9f3e7e7733 100644 --- a/vl.c +++ b/vl.c @@ -1822,9 +1822,17 @@ static void version(void) @@ -54,5 +54,5 @@ index 668a345..9f3e7e7 100644 } -- -1.8.3.1 +2.21.0 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index 936b929..cec862d 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 9647d94ec17c7fa645336a881147a7e841cb78c2 Mon Sep 17 00:00:00 2001 +From c9c3cf721b0e9e359418f64c2a5121c3f8b5d27a Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -82,7 +82,7 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index ca00daa..dc3ed7f 100644 +index ca00daa2f5..dc3ed7f04e 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1943,6 +1943,18 @@ static int kvm_init(MachineState *ms) @@ -105,7 +105,7 @@ index ca00daa..dc3ed7f 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/vl.c b/vl.c -index 9f3e7e7..1550aa2 100644 +index 9f3e7e7733..1550aa2aaa 100644 --- a/vl.c +++ b/vl.c @@ -134,6 +134,8 @@ int main(int argc, char **argv) @@ -148,5 +148,5 @@ index 9f3e7e7..1550aa2 100644 configure_rtc(qemu_find_opts_singleton("rtc")); -- -1.8.3.1 +2.21.0 diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch index fda1b79..9624855 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From d78f2713725382c792154ce482a1b03b749b909f Mon Sep 17 00:00:00 2001 +From 26128b3ede339e292a3c50a84e3248af46ecd0ec Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -26,8 +26,8 @@ Signed-off-by: Danilo C. L. de Paula --- .gitignore | 2 ++ Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 26 +++++++++++++++++++- + README.systemtap | 43 +++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 26 ++++++++++++++- scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ scripts/systemtap/script.d/qemu_kvm.stp | 1 + 6 files changed, 79 insertions(+), 1 deletion(-) @@ -36,7 +36,7 @@ Signed-off-by: Danilo C. L. de Paula create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 086727d..4254950 100644 +index 086727dbb9..4254950f7f 100644 --- a/Makefile +++ b/Makefile @@ -939,6 +939,10 @@ endif @@ -52,7 +52,7 @@ index 086727d..4254950 100644 ctags: diff --git a/README.systemtap b/README.systemtap new file mode 100644 -index 0000000..ad913fc +index 0000000000..ad913fc990 --- /dev/null +++ b/README.systemtap @@ -0,0 +1,43 @@ @@ -101,7 +101,7 @@ index 0000000..ad913fc + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf new file mode 100644 -index 0000000..372d816 +index 0000000000..372d8160a4 --- /dev/null +++ b/scripts/systemtap/conf.d/qemu_kvm.conf @@ -0,0 +1,4 @@ @@ -111,11 +111,11 @@ index 0000000..372d816 +qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp new file mode 100644 -index 0000000..c04abf9 +index 0000000000..c04abf9449 --- /dev/null +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -- -1.8.3.1 +2.21.0 diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 707515d..ef83445 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 4dcf2dac71a39d2e5b44cf6a4a43abdd89a11c60 Mon Sep 17 00:00:00 2001 +From 97ed62562b883c384346bfef3e1c7e379f03ccab Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 30 Nov 2018 09:11:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -25,7 +25,7 @@ Signed-off-by: Danilo C. L. de Paula 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi -index 2c7ea49..5d0afb3 100644 +index 2c7ea49c32..5d0afb3dee 100644 --- a/docs/qemu-block-drivers.texi +++ b/docs/qemu-block-drivers.texi @@ -2,7 +2,7 @@ @@ -38,7 +38,7 @@ index 2c7ea49..5d0afb3 100644 @c man begin DESCRIPTION diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -index f88a1de..c82cf8f 100644 +index f88a1def0d..c82cf8fab7 100644 --- a/docs/qemu-cpu-models.texi +++ b/docs/qemu-cpu-models.texi @@ -2,7 +2,7 @@ @@ -51,7 +51,7 @@ index f88a1de..c82cf8f 100644 @c man begin DESCRIPTION diff --git a/qemu-doc.texi b/qemu-doc.texi -index 3ddf5c0..d460f8d 100644 +index 3ddf5c0a68..d460f8d2c0 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -11,8 +11,8 @@ @@ -75,7 +75,7 @@ index 3ddf5c0..d460f8d 100644 -prom-env 'boot-args=conf=hd:2,\yaboot.conf' @end example diff --git a/qemu-options.hx b/qemu-options.hx -index fc17aca..df1d27b 100644 +index fc17aca631..df1d27b6f2 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2737,11 +2737,11 @@ be created for multiqueue vhost-user. @@ -114,5 +114,5 @@ index fc17aca..df1d27b 100644 ETEXI -- -1.8.3.1 +2.21.0 diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch index 4a49700..bc6146d 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From eba5ef4e161aeb71df26b1c43577945ae3093f2b Mon Sep 17 00:00:00 2001 +From b13a7d3527c5c91e7a50236de30a2244b8453911 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -62,7 +62,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 8fed2ee..d2b9744 100644 +index 8fed2eedd6..d2b9744030 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3403,6 +3403,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) @@ -92,5 +92,5 @@ index 8fed2ee..d2b9744 100644 /* TODO check for errors, and should fail when msix=on */ msix_init(dev, xhci->numintrs, -- -1.8.3.1 +2.21.0 diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index ffb678e..e167b2e 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 033166f31288a5104d4e55a828ce8d62c9ccca1c Mon Sep 17 00:00:00 2001 +From 3fab8f5e8a9e190c1ed6916ac13c7c4d65e874b7 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,7 +45,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index e8b2b64..54108c0 100644 +index e8b2b64d09..54108c0056 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -808,6 +808,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -65,5 +65,5 @@ index e8b2b64..54108c0 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -1.8.3.1 +2.21.0 diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 7940b26..b3350da 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 6ce9df118313e3b2d21c70994d0b42b33b59d27c Mon Sep 17 00:00:00 2001 +From 148e9e80a3a430615b552075082fad22d007d851 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,7 +32,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 481dfd2..805f385 100644 +index 481dfd2a27..805f38533e 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -351,12 +351,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, @@ -56,5 +56,5 @@ index 481dfd2..805f385 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -1.8.3.1 +2.21.0 diff --git a/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch index 5b8a57d..a2a800b 100644 --- a/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +++ b/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch @@ -1,4 +1,4 @@ -From a0059f1ef0bfd6852f838491120adf1be20857cc Mon Sep 17 00:00:00 2001 +From ab9ebc29bb9bb142e73a160750a451d40bfe9746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 16 Sep 2019 17:07:00 +0100 Subject: Using ip_deq after m_free might read pointers from an allocation @@ -31,7 +31,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -index 8c75d91..df1c846 100644 +index 8c75d91495..df1c846ade 100644 --- a/slirp/src/ip_input.c +++ b/slirp/src/ip_input.c @@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) @@ -57,5 +57,5 @@ index 8c75d91..df1c846 100644 insert: -- -1.8.3.1 +2.21.0 diff --git a/0023-Temporarily-update-VERSION-to-8.2.0.patch b/0023-Temporarily-update-VERSION-to-8.2.0.patch deleted file mode 100644 index 284d497..0000000 --- a/0023-Temporarily-update-VERSION-to-8.2.0.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 2a87b3e90278f47395975a77fc056d2f964f4725 Mon Sep 17 00:00:00 2001 -From: "Danilo C. L. de Paula" -Date: Tue, 19 Nov 2019 08:41:33 -0300 -Subject: Temporarily update VERSION to 8.2.0 - -rhbz#1773397 - -This will change when the official release appears. -But, to make qemu-kvm --version happy, we need to fix this now. - -Conflicts: - VERSION - -Signed-off-by: Danilo C. L. de Paula ---- - VERSION | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/VERSION b/VERSION -index dcd8753..6aba2b2 100644 ---- a/VERSION -+++ b/VERSION -@@ -1 +1 @@ --4.1.94 -+4.2.0 --- -1.8.3.1 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index abdbd7f..6a554c2 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -1,6 +1,5 @@ %global SLOF_gittagdate 20191022 %global SLOF_gittagcommit 899d9883 -%global rcversion -rc4 %global have_usbredir 1 %global have_spice 1 @@ -68,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 2%{?dist} +Release: 4%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -77,7 +76,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-4.2.0-rc4.tar.xz +Source0: http://wiki.qemu.org/download/qemu-4.2.0.tar.xz # KSM control scripts Source4: ksm.service @@ -105,7 +104,6 @@ Source35: udev-kvm-check.c Source36: README.tests -Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch Patch0005: 0005-Initial-redhat-build.patch Patch0006: 0006-Enable-disable-devices-for-RHEL.patch Patch0007: 0007-Machine-type-related-general-changes.patch @@ -123,7 +121,6 @@ Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch -Patch0023: 0023-Temporarily-update-VERSION-to-8.2.0.patch BuildRequires: wget BuildRequires: rpm-build @@ -409,10 +406,7 @@ the Secure Shell (SSH) protocol. %prep -%setup -n qemu-%{version}%{rcversion} -# Remove slirp content in scratchbuilds because it's being applyed as a patch -rm -fr slirp -mkdir slirp +%setup -n qemu-%{version} %autopatch -p1 %build @@ -1061,13 +1055,76 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Dec 13 2019 Danilo Cesar Lemes de Paula - 4.2.0-4.el8 +- Rebase to qemu-4.2 +- Resolves: bz#1783250 + (rebase qemu-kvm to 4.2) -* Tue Nov 19 2019 Danilo Cesar Lemes de Paula - 4.2.0-2.el8 -- 0023-Temporarily-update-VERSION-to-8.2.0.patch [bz#1773397] -- Resolves: bz#1773397 - (QEMU emulator version is "4.1.91" for qemu-kvm-4.2.0-0.module+el8.2.0+471 -- Resoves: bz#1773392 - ([ppc] Need to rebase SLOF image for qemu-kvm-4.2) +* Tue Dec 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-18.el8 +- kvm-LUKS-support-preallocation.patch [bz#1534951] +- kvm-nbd-add-empty-.bdrv_reopen_prepare.patch [bz#1718727] +- kvm-qdev-qbus-add-hidden-device-support.patch [bz#1757796] +- kvm-pci-add-option-for-net-failover.patch [bz#1757796] +- kvm-pci-mark-devices-partially-unplugged.patch [bz#1757796] +- kvm-pci-mark-device-having-guest-unplug-request-pending.patch [bz#1757796] +- kvm-qapi-add-unplug-primary-event.patch [bz#1757796] +- kvm-qapi-add-failover-negotiated-event.patch [bz#1757796] +- kvm-migration-allow-unplug-during-migration-for-failover.patch [bz#1757796] +- kvm-migration-add-new-migration-state-wait-unplug.patch [bz#1757796] +- kvm-libqos-tolerate-wait-unplug-migration-state.patch [bz#1757796] +- kvm-net-virtio-add-failover-support.patch [bz#1757796] +- kvm-vfio-unplug-failover-primary-device-before-migration.patch [bz#1757796] +- kvm-net-virtio-fix-dev_unplug_pending.patch [bz#1757796] +- kvm-net-virtio-return-early-when-failover-primary-alread.patch [bz#1757796] +- kvm-net-virtio-fix-re-plugging-of-primary-device.patch [bz#1757796] +- kvm-net-virtio-return-error-when-device_opts-arg-is-NULL.patch [bz#1757796] +- kvm-vfio-don-t-ignore-return-value-of-migrate_add_blocke.patch [bz#1757796] +- kvm-hw-vfio-pci-Fix-double-free-of-migration_blocker.patch [bz#1757796] +- Resolves: bz#1534951 + (RFE: Support preallocation mode for luks format) +- Resolves: bz#1718727 + (Committing changes to the backing file over NBD fails with reopening files not supported) +- Resolves: bz#1757796 + (RFE: support for net failover devices in qemu) + +* Mon Dec 02 2019 Danilo Cesar Lemes de Paula - 4.1.0-17.el8 +- kvm-qemu-pr-helper-fix-crash-in-mpath_reconstruct_sense.patch [bz#1772322] +- Resolves: bz#1772322 + (qemu-pr-helper: fix crash in mpath_reconstruct_sense) + +* Wed Nov 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-16.el8 +- kvm-curl-Keep-pointer-to-the-CURLState-in-CURLSocket.patch [bz#1745209] +- kvm-curl-Keep-socket-until-the-end-of-curl_sock_cb.patch [bz#1745209] +- kvm-curl-Check-completion-in-curl_multi_do.patch [bz#1745209] +- kvm-curl-Pass-CURLSocket-to-curl_multi_do.patch [bz#1745209] +- kvm-curl-Report-only-ready-sockets.patch [bz#1745209] +- kvm-curl-Handle-success-in-multi_check_completion.patch [bz#1745209] +- kvm-curl-Check-curl_multi_add_handle-s-return-code.patch [bz#1745209] +- kvm-vhost-user-save-features-if-the-char-dev-is-closed.patch [bz#1738768] +- kvm-block-snapshot-Restrict-set-of-snapshot-nodes.patch [bz#1658981] +- kvm-iotests-Test-internal-snapshots-with-blockdev.patch [bz#1658981] +- kvm-qapi-Add-feature-flags-to-commands-in-qapi-introspec.patch [bz#1658981] +- kvm-qapi-Allow-introspecting-fix-for-savevm-s-cooperatio.patch [bz#1658981] +- kvm-block-Remove-backing-null-from-bs-explicit_-options.patch [bz#1773925] +- kvm-iotests-Test-multiple-blockdev-snapshot-calls.patch [bz#1773925] +- Resolves: bz#1658981 + (qemu failed to create internal snapshot via 'savevm' when using blockdev) +- Resolves: bz#1738768 + (Guest fails to recover receiving packets after vhost-user reconnect) +- Resolves: bz#1745209 + (qemu-img gets stuck when stream-converting from http) +- Resolves: bz#1773925 + (Fail to do blockcommit with more than one snapshots) + +* Thu Nov 14 2019 Danilo Cesar Lemes de Paula - 4.1.0-15.el8 +- kvm-virtio-blk-Add-blk_drain-to-virtio_blk_device_unreal.patch [bz#1706759] +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1772473] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1772473] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1772473] +- Resolves: bz#1706759 + (qemu core dump when unplug a 16T GPT type disk from win2019 guest) +- Resolves: bz#1772473 + (Import fixes from 8.1.0 into 8.1.1 branch) * Tue Oct 29 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 - kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1751934] diff --git a/sources b/sources index dc76917..46350e1 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-4.2.0-rc4.tar.xz) = 01d7fd8368b37ce9239b5884654962c947e2c597c0042b256bb9d1ebf2fd5159b1182e43094e966e54368c792b390f48ce12bce7ee878f5f3ab685fc118aa7db +SHA512 (qemu-4.2.0.tar.xz) = 2a79973c2b07c53e8c57a808ea8add7b6b2cbca96488ed5d4b669ead8c9318907dec2b6109f180fc8ca8f04c0f73a56e82b3a527b5626b799d7e849f2474ec56 From 1390e099eaf7fc0af1212c6a628515b1eea9d69b Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 7 Jan 2020 20:29:46 +0000 Subject: [PATCH 060/195] * Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-5.el8 - kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741345] - kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch [bz#1772774] - Resolves: bz#1741345 (Remove the "cpu64-rhel6" CPU from qemu-kvm) - Resolves: bz#1772774 (qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed )) --- ...ate-dirty_bmap-when-we-change-a-slot.patch | 115 ++++++++++++++++++ kvm-i386-Remove-cpu64-rhel6-CPU-model.patch | 77 ++++++++++++ qemu-kvm.spec | 14 ++- 3 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch create mode 100644 kvm-i386-Remove-cpu64-rhel6-CPU-model.patch diff --git a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch new file mode 100644 index 0000000..d717ae2 --- /dev/null +++ b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch @@ -0,0 +1,115 @@ +From c477581ccc6962651d4d6c702a6c3e2fcc5e4205 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 2 Jan 2020 11:56:51 +0000 +Subject: [PATCH 2/2] kvm: Reallocate dirty_bmap when we change a slot + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200102115651.140177-1-dgilbert@redhat.com> +Patchwork-id: 93256 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] kvm: Reallocate dirty_bmap when we change a slot +Bugzilla: 1772774 +RH-Acked-by: Peter Xu +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Laszlo Ersek + +From: "Dr. David Alan Gilbert" + +bz: https://bugzilla.redhat.com/show_bug.cgi?id=1772774 +brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25575691 +branch: rhel-av-8.2.0 + +kvm_set_phys_mem can be called to reallocate a slot by something the +guest does (e.g. writing to PAM and other chipset registers). +This can happen in the middle of a migration, and if we're unlucky +it can now happen between the split 'sync' and 'clear'; the clear +asserts if there's no bmap to clear. Recreate the bmap whenever +we change the slot, keeping the clear path happy. + +Typically this is triggered by the guest rebooting during a migrate. + +Corresponds to: +https://bugzilla.redhat.com/show_bug.cgi?id=1772774 +https://bugzilla.redhat.com/show_bug.cgi?id=1771032 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +(cherry picked from commit 9b3a31c745b61758aaa5466a3a9fc0526d409188) +Signed-off-by: Danilo C. L. de Paula +--- + accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- + 1 file changed, 29 insertions(+), 15 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index dc3ed7f..5007bda 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -518,6 +518,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, + + #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) + ++/* Allocate the dirty bitmap for a slot */ ++static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) ++{ ++ /* ++ * XXX bad kernel interface alert ++ * For dirty bitmap, kernel allocates array of size aligned to ++ * bits-per-long. But for case when the kernel is 64bits and ++ * the userspace is 32bits, userspace can't align to the same ++ * bits-per-long, since sizeof(long) is different between kernel ++ * and user space. This way, userspace will provide buffer which ++ * may be 4 bytes less than the kernel will use, resulting in ++ * userspace memory corruption (which is not detectable by valgrind ++ * too, in most cases). ++ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in ++ * a hope that sizeof(long) won't become >8 any time soon. ++ */ ++ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ /*HOST_LONG_BITS*/ 64) / 8; ++ mem->dirty_bmap = g_malloc0(bitmap_size); ++} ++ + /** + * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space + * +@@ -550,23 +571,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + goto out; + } + +- /* XXX bad kernel interface alert +- * For dirty bitmap, kernel allocates array of size aligned to +- * bits-per-long. But for case when the kernel is 64bits and +- * the userspace is 32bits, userspace can't align to the same +- * bits-per-long, since sizeof(long) is different between kernel +- * and user space. This way, userspace will provide buffer which +- * may be 4 bytes less than the kernel will use, resulting in +- * userspace memory corruption (which is not detectable by valgrind +- * too, in most cases). +- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in +- * a hope that sizeof(long) won't become >8 any time soon. +- */ + if (!mem->dirty_bmap) { +- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), +- /*HOST_LONG_BITS*/ 64) / 8; + /* Allocate on the first log_sync, once and for all */ +- mem->dirty_bmap = g_malloc0(bitmap_size); ++ kvm_memslot_init_dirty_bitmap(mem); + } + + d.dirty_bitmap = mem->dirty_bmap; +@@ -1067,6 +1074,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); + ++ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ /* ++ * Reallocate the bmap; it means it doesn't disappear in ++ * middle of a migrate. ++ */ ++ kvm_memslot_init_dirty_bitmap(mem); ++ } + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { + fprintf(stderr, "%s: error registering slot: %s\n", __func__, +-- +1.8.3.1 + diff --git a/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch b/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch new file mode 100644 index 0000000..5d62ace --- /dev/null +++ b/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch @@ -0,0 +1,77 @@ +From 4543a3c19816bd07f27eb900f20ae609df03703c Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Mon, 23 Dec 2019 21:10:31 +0000 +Subject: [PATCH 1/2] i386: Remove cpu64-rhel6 CPU model + +RH-Author: Eduardo Habkost +Message-id: <20191223211031.26503-1-ehabkost@redhat.com> +Patchwork-id: 93213 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] i386: Remove cpu64-rhel6 CPU model +Bugzilla: 1741345 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Laszlo Ersek + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1741345 +BRANCH: rhel-av-8.2.0 +Upstream: not applicable +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25525975 + +We don't provide rhel6 machine types anymore, so we don't need to +provide compatibility with RHEl6. cpu64-rhel6 was documented as +deprecated and scheduled for removal in 8.2, so now it's time to +remove it. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 26 +------------------------- + 1 file changed, 1 insertion(+), 25 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 790db77..6dce6f2 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1829,12 +1829,7 @@ static CPUCaches epyc_cache_info = { + + static X86CPUDefinition builtin_x86_defs[] = { + { +- /* qemu64 is the default CPU model for all *-rhel7.* machine-types. +- * The default on RHEL-6 was cpu64-rhel6. +- * libvirt assumes that qemu64 is the default for _all_ machine-types, +- * so we should try to keep qemu64 and cpu64-rhel6 as similar as +- * possible. +- */ ++ /* qemu64 is the default CPU model for all machine-types */ + .name = "qemu64", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, +@@ -2135,25 +2130,6 @@ static X86CPUDefinition builtin_x86_defs[] = { + .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", + }, + { +- .name = "cpu64-rhel6", +- .level = 4, +- .vendor = CPUID_VENDOR_AMD, +- .family = 6, +- .model = 13, +- .stepping = 3, +- .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | +- CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | +- CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | +- CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | +- CPUID_PSE | CPUID_DE | CPUID_FP87, +- .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, +- .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, +- .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | +- CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, +- .xlevel = 0x8000000A, +- .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", +- }, +- { + .name = "Conroe", + .level = 10, + .vendor = CPUID_VENDOR_INTEL, +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 6a554c2..2f2643b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 4%{?dist} +Release: 5%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -121,6 +121,10 @@ Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +# For bz#1741345 - Remove the "cpu64-rhel6" CPU from qemu-kvm +Patch22: kvm-i386-Remove-cpu64-rhel6-CPU-model.patch +# For bz#1772774 - qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed ) +Patch23: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch BuildRequires: wget BuildRequires: rpm-build @@ -1055,6 +1059,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-5.el8 +- kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741345] +- kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch [bz#1772774] +- Resolves: bz#1741345 + (Remove the "cpu64-rhel6" CPU from qemu-kvm) +- Resolves: bz#1772774 + (qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed )) + * Fri Dec 13 2019 Danilo Cesar Lemes de Paula - 4.2.0-4.el8 - Rebase to qemu-4.2 - Resolves: bz#1783250 From 686f1d075aa74f8d371d8a5da49a493c663d8fb4 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 15 Jan 2020 19:44:01 +0000 Subject: [PATCH 061/195] * Wed Jan 15 2020 Danilo Cesar Lemes de Paula - 4.2.0-6.el8 - kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch [bz#1733893] - kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch [bz#1782678] - kvm-virtio-don-t-enable-notifications-during-polling.patch [bz#1789301] - kvm-usbredir-Prevent-recursion-in-usbredir_write.patch [bz#1790844] - kvm-xhci-recheck-slot-status.patch [bz#1790844] - Resolves: bz#1733893 (Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC) - Resolves: bz#1782678 (qemu core dump after hot-unplugging the XXV710/XL710 PF) - Resolves: bz#1789301 (virtio-blk/scsi: fix notification suppression during AioContext polling) - Resolves: bz#1790844 (USB related fixes) --- ...ger-a-CAS-reboot-for-XICS-XIVE-mode-.patch | 113 +++++++++++++ ...-Prevent-recursion-in-usbredir_write.patch | 106 ++++++++++++ ...emove-irqchip-notifier-if-not-regist.patch | 58 +++++++ ...-enable-notifications-during-polling.patch | 158 ++++++++++++++++++ kvm-xhci-recheck-slot-status.patch | 77 +++++++++ qemu-kvm.spec | 27 ++- 6 files changed, 538 insertions(+), 1 deletion(-) create mode 100644 kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch create mode 100644 kvm-usbredir-Prevent-recursion-in-usbredir_write.patch create mode 100644 kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch create mode 100644 kvm-virtio-don-t-enable-notifications-during-polling.patch create mode 100644 kvm-xhci-recheck-slot-status.patch diff --git a/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch b/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch new file mode 100644 index 0000000..d934712 --- /dev/null +++ b/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch @@ -0,0 +1,113 @@ +From f2aeed761d2dad14920fa08c977dc45564886d9b Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Fri, 3 Jan 2020 01:15:12 +0000 +Subject: [PATCH 1/5] spapr: Don't trigger a CAS reboot for XICS/XIVE mode + changeover +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200103011512.49129-2-dgibson@redhat.com> +Patchwork-id: 93261 +O-Subject: [RHEL-AV-4.2 qemu-kvm PATCH 1/1] spapr: Don't trigger a CAS reboot for XICS/XIVE mode changeover +Bugzilla: 1733893 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +From: David Gibson + +PAPR allows the interrupt controller used on a POWER9 machine (XICS or +XIVE) to be selected by the guest operating system, by using the +ibm,client-architecture-support (CAS) feature negotiation call. + +Currently, if the guest selects an interrupt controller different from the +one selected at initial boot, this causes the system to be reset with the +new model and the boot starts again. This means we run through the SLOF +boot process twice, as well as any other bootloader (e.g. grub) in use +before the OS calls CAS. This can be confusing and/or inconvenient for +users. + +Thanks to two fairly recent changes, we no longer need this reboot. 1) we +now completely regenerate the device tree when CAS is called (meaning we +don't need special case updates for all the device tree changes caused by +the interrupt controller mode change), 2) we now have explicit code paths +to activate and deactivate the different interrupt controllers, rather than +just implicitly calling those at machine reset time. + +We can therefore eliminate the reboot for changing irq mode, simply by +putting a call to spapr_irq_update_active_intc() before we call +spapr_h_cas_compose_response() (which gives the updated device tree to +the guest firmware and OS). + +Signed-off-by: David Gibson +Reviewed-by: Cedric Le Goater +Reviewed-by: Greg Kurz +(cherry picked from commit 8deb8019d696c75e6ecaee7545026b62aba2f1bb) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1733893 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_hcall.c | 33 +++++++++++++-------------------- + 1 file changed, 13 insertions(+), 20 deletions(-) + +diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c +index 140f05c..05a7ca2 100644 +--- a/hw/ppc/spapr_hcall.c ++++ b/hw/ppc/spapr_hcall.c +@@ -1767,21 +1767,10 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + } + spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00); + spapr_ovec_cleanup(ov1_guest); +- if (!spapr->cas_reboot) { +- /* If spapr_machine_reset() did not set up a HPT but one is necessary +- * (because the guest isn't going to use radix) then set it up here. */ +- if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { +- /* legacy hash or new hash: */ +- spapr_setup_hpt_and_vrma(spapr); +- } +- spapr->cas_reboot = +- (spapr_h_cas_compose_response(spapr, args[1], args[2], +- ov5_updates) != 0); +- } + + /* +- * Ensure the guest asks for an interrupt mode we support; otherwise +- * terminate the boot. ++ * Ensure the guest asks for an interrupt mode we support; ++ * otherwise terminate the boot. + */ + if (guest_xive) { + if (!spapr->irq->xive) { +@@ -1797,14 +1786,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + } + } + +- /* +- * Generate a machine reset when we have an update of the +- * interrupt mode. Only required when the machine supports both +- * modes. +- */ ++ spapr_irq_update_active_intc(spapr); ++ + if (!spapr->cas_reboot) { +- spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT) +- && spapr->irq->xics && spapr->irq->xive; ++ /* If spapr_machine_reset() did not set up a HPT but one is necessary ++ * (because the guest isn't going to use radix) then set it up here. */ ++ if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { ++ /* legacy hash or new hash: */ ++ spapr_setup_hpt_and_vrma(spapr); ++ } ++ spapr->cas_reboot = ++ (spapr_h_cas_compose_response(spapr, args[1], args[2], ++ ov5_updates) != 0); + } + + spapr_ovec_cleanup(ov5_updates); +-- +1.8.3.1 + diff --git a/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch b/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch new file mode 100644 index 0000000..8f08256 --- /dev/null +++ b/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch @@ -0,0 +1,106 @@ +From 8f6311159977b8ee4b78172caa411d3cee4d2ae5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 14 Jan 2020 20:23:30 +0000 +Subject: [PATCH 4/5] usbredir: Prevent recursion in usbredir_write +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200114202331.51831-2-dgilbert@redhat.com> +Patchwork-id: 93344 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] usbredir: Prevent recursion in usbredir_write +Bugzilla: 1790844 +RH-Acked-by: Peter Xu +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Gerd Hoffmann + +From: "Dr. David Alan Gilbert" + +I've got a case where usbredir_write manages to call back into itself +via spice; this patch causes the recursion to fail (0 bytes) the write; +this seems to avoid the deadlock I was previously seeing. + +I can't say I fully understand the interaction of usbredir and spice; +but there are a few similar guards in spice and usbredir +to catch other cases especially onces also related to spice_server_char_device_wakeup + +This case seems to be triggered by repeated migration+repeated +reconnection of the viewer; but my debugging suggests the migration +finished before this hits. + +The backtrace of the hang looks like: + reds_handle_ticket + reds_handle_other_links + reds_channel_do_link + red_channel_connect + spicevmc_connect + usbredir_create_parser + usbredirparser_do_write + usbredir_write + qemu_chr_fe_write + qemu_chr_write + qemu_chr_write_buffer + spice_chr_write + spice_server_char_device_wakeup + red_char_device_wakeup + red_char_device_write_to_device + vmc_write + usbredirparser_do_write + usbredir_write + qemu_chr_fe_write + qemu_chr_write + qemu_chr_write_buffer + qemu_mutex_lock_impl + +and we fail as we land through qemu_chr_write_buffer's lock +twice. + +Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1752320 + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20191218113012.13331-1-dgilbert@redhat.com> +Signed-off-by: Gerd Hoffmann +(cherry picked from commit 394642a8d3742c885e397d5bb5ee0ec40743cdc6) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/redirect.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index e0f5ca6..97f2c3a 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -113,6 +113,7 @@ struct USBRedirDevice { + /* Properties */ + CharBackend cs; + bool enable_streams; ++ bool in_write; + uint8_t debug; + int32_t bootindex; + char *filter_str; +@@ -290,6 +291,13 @@ static int usbredir_write(void *priv, uint8_t *data, int count) + return 0; + } + ++ /* Recursion check */ ++ if (dev->in_write) { ++ DPRINTF("usbredir_write recursion\n"); ++ return 0; ++ } ++ dev->in_write = true; ++ + r = qemu_chr_fe_write(&dev->cs, data, count); + if (r < count) { + if (!dev->watch) { +@@ -300,6 +308,7 @@ static int usbredir_write(void *priv, uint8_t *data, int count) + r = 0; + } + } ++ dev->in_write = false; + return r; + } + +-- +1.8.3.1 + diff --git a/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch b/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch new file mode 100644 index 0000000..d416e0f --- /dev/null +++ b/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch @@ -0,0 +1,58 @@ +From e4631c00d8e9ee3608ef3196cbe8bec4841ee988 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 8 Jan 2020 15:04:57 +0000 +Subject: [PATCH 2/5] vfio/pci: Don't remove irqchip notifier if not registered + +RH-Author: Peter Xu +Message-id: <20200108150457.12324-2-peterx@redhat.com> +Patchwork-id: 93291 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vfio/pci: Don't remove irqchip notifier if not registered +Bugzilla: 1782678 +RH-Acked-by: Alex Williamson +RH-Acked-by: Cornelia Huck +RH-Acked-by: Auger Eric +RH-Acked-by: Jens Freimann + +The kvm irqchip notifier is only registered if the device supports +INTx, however it's unconditionally removed. If the assigned device +does not support INTx, this will cause QEMU to crash when unplugging +the device from the system. Change it to conditionally remove the +notifier only if the notify hook is setup. + +CC: Eduardo Habkost +CC: David Gibson +CC: Alex Williamson +Cc: qemu-stable@nongnu.org # v4.2 +Reported-by: yanghliu@redhat.com +Debugged-by: Eduardo Habkost +Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1782678 +Signed-off-by: Peter Xu +Reviewed-by: David Gibson +Reviewed-by: Greg Kurz +Signed-off-by: Alex Williamson +(cherry picked from commit 0446f8121723b134ca1d1ed0b73e96d4a0a8689d) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + hw/vfio/pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 309535f..d717520 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3100,7 +3100,9 @@ static void vfio_exitfn(PCIDevice *pdev) + vfio_unregister_req_notifier(vdev); + vfio_unregister_err_notifier(vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); +- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ if (vdev->irqchip_change_notifier.notify) { ++ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ } + vfio_disable_interrupts(vdev); + if (vdev->intx.mmap_timer) { + timer_free(vdev->intx.mmap_timer); +-- +1.8.3.1 + diff --git a/kvm-virtio-don-t-enable-notifications-during-polling.patch b/kvm-virtio-don-t-enable-notifications-during-polling.patch new file mode 100644 index 0000000..2dffc01 --- /dev/null +++ b/kvm-virtio-don-t-enable-notifications-during-polling.patch @@ -0,0 +1,158 @@ +From 351dd07d7b5e69cdf47260c9ea848c0c93cd2c8a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jan 2020 11:13:25 +0000 +Subject: [PATCH 3/5] virtio: don't enable notifications during polling + +RH-Author: Stefan Hajnoczi +Message-id: <20200109111325.559557-2-stefanha@redhat.com> +Patchwork-id: 93298 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] virtio: don't enable notifications during polling +Bugzilla: 1789301 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Michael S. Tsirkin + +Virtqueue notifications are not necessary during polling, so we disable +them. This allows the guest driver to avoid MMIO vmexits. +Unfortunately the virtio-blk and virtio-scsi handler functions re-enable +notifications, defeating this optimization. + +Fix virtio-blk and virtio-scsi emulation so they leave notifications +disabled. The key thing to remember for correctness is that polling +always checks one last time after ending its loop, therefore it's safe +to lose the race when re-enabling notifications at the end of polling. + +There is a measurable performance improvement of 5-10% with the null-co +block driver. Real-life storage configurations will see a smaller +improvement because the MMIO vmexit overhead contributes less to +latency. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20191209210957.65087-1-stefanha@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d0435bc513e23a4961b6af20164d1c6c219eb4ea) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/virtio-blk.c | 9 +++++++-- + hw/scsi/virtio-scsi.c | 9 +++++++-- + hw/virtio/virtio.c | 12 ++++++------ + include/hw/virtio/virtio.h | 1 + + 4 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 4c357d2..c4e55fb 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + { + VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; ++ bool suppress_notifications = virtio_queue_get_notification(vq); + bool progress = false; + + aio_context_acquire(blk_get_aio_context(s->blk)); + blk_io_plug(s->blk); + + do { +- virtio_queue_set_notification(vq, 0); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 0); ++ } + + while ((req = virtio_blk_get_request(s, vq))) { + progress = true; +@@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + } + } + +- virtio_queue_set_notification(vq, 1); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 1); ++ } + } while (!virtio_queue_empty(vq)); + + if (mrb.num_reqs) { +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 54108c0..e2cd1df 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req, *next; + int ret = 0; ++ bool suppress_notifications = virtio_queue_get_notification(vq); + bool progress = false; + + QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); + + do { +- virtio_queue_set_notification(vq, 0); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 0); ++ } + + while ((req = virtio_scsi_pop_req(s, vq))) { + progress = true; +@@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + } + } + +- virtio_queue_set_notification(vq, 1); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 1); ++ } + } while (ret != -EINVAL && !virtio_queue_empty(vq)); + + QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 04716b5..3211135 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) + } + } + ++bool virtio_queue_get_notification(VirtQueue *vq) ++{ ++ return vq->notification; ++} ++ + void virtio_queue_set_notification(VirtQueue *vq, int enable) + { + vq->notification = enable; +@@ -3384,17 +3389,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) + { + EventNotifier *n = opaque; + VirtQueue *vq = container_of(n, VirtQueue, host_notifier); +- bool progress; + + if (!vq->vring.desc || virtio_queue_empty(vq)) { + return false; + } + +- progress = virtio_queue_notify_aio_vq(vq); +- +- /* In case the handler function re-enabled notifications */ +- virtio_queue_set_notification(vq, 0); +- return progress; ++ return virtio_queue_notify_aio_vq(vq); + } + + static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index c32a815..6a20442 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -224,6 +224,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); + + void virtio_notify_config(VirtIODevice *vdev); + ++bool virtio_queue_get_notification(VirtQueue *vq); + void virtio_queue_set_notification(VirtQueue *vq, int enable); + + int virtio_queue_ready(VirtQueue *vq); +-- +1.8.3.1 + diff --git a/kvm-xhci-recheck-slot-status.patch b/kvm-xhci-recheck-slot-status.patch new file mode 100644 index 0000000..8bcbc2c --- /dev/null +++ b/kvm-xhci-recheck-slot-status.patch @@ -0,0 +1,77 @@ +From ab87c0ed2a8f0a626099261a3028bc34cfac3929 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 14 Jan 2020 20:23:31 +0000 +Subject: [PATCH 5/5] xhci: recheck slot status +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200114202331.51831-3-dgilbert@redhat.com> +Patchwork-id: 93345 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] xhci: recheck slot status +Bugzilla: 1790844 +RH-Acked-by: Peter Xu +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Gerd Hoffmann + +From: Gerd Hoffmann + +Factor out slot status check into a helper function. Add an additional +check after completing transfers. This is needed in case a guest +queues multiple transfers in a row and a device unplug happens while +qemu processes them. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1786413 +Signed-off-by: Gerd Hoffmann +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20200107083606.12393-1-kraxel@redhat.com +(cherry picked from commit 236846a019c4f7aa3111026fc9a1fe09684c8978) +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/hcd-xhci.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index d2b9744..646c78c 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -1861,6 +1861,13 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, + xhci_kick_epctx(epctx, streamid); + } + ++static bool xhci_slot_ok(XHCIState *xhci, int slotid) ++{ ++ return (xhci->slots[slotid - 1].uport && ++ xhci->slots[slotid - 1].uport->dev && ++ xhci->slots[slotid - 1].uport->dev->attached); ++} ++ + static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + { + XHCIState *xhci = epctx->xhci; +@@ -1878,9 +1885,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + + /* If the device has been detached, but the guest has not noticed this + yet the 2 above checks will succeed, but we must NOT continue */ +- if (!xhci->slots[epctx->slotid - 1].uport || +- !xhci->slots[epctx->slotid - 1].uport->dev || +- !xhci->slots[epctx->slotid - 1].uport->dev->attached) { ++ if (!xhci_slot_ok(xhci, epctx->slotid)) { + return; + } + +@@ -1987,6 +1992,10 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + } else { + xhci_fire_transfer(xhci, xfer, epctx); + } ++ if (!xhci_slot_ok(xhci, epctx->slotid)) { ++ /* surprise removal -> stop processing */ ++ break; ++ } + if (xfer->complete) { + /* update ring dequeue ptr */ + xhci_set_ep_state(xhci, epctx, stctx, epctx->state); +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 2f2643b..8de9b7b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 5%{?dist} +Release: 6%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -125,6 +125,16 @@ Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch Patch22: kvm-i386-Remove-cpu64-rhel6-CPU-model.patch # For bz#1772774 - qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed ) Patch23: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +# For bz#1733893 - Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC +Patch24: kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch +# For bz#1782678 - qemu core dump after hot-unplugging the XXV710/XL710 PF +Patch25: kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch +# For bz#1789301 - virtio-blk/scsi: fix notification suppression during AioContext polling +Patch26: kvm-virtio-don-t-enable-notifications-during-polling.patch +# For bz#1790844 - USB related fixes +Patch27: kvm-usbredir-Prevent-recursion-in-usbredir_write.patch +# For bz#1790844 - USB related fixes +Patch28: kvm-xhci-recheck-slot-status.patch BuildRequires: wget BuildRequires: rpm-build @@ -1059,6 +1069,21 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Wed Jan 15 2020 Danilo Cesar Lemes de Paula - 4.2.0-6.el8 +- kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch [bz#1733893] +- kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch [bz#1782678] +- kvm-virtio-don-t-enable-notifications-during-polling.patch [bz#1789301] +- kvm-usbredir-Prevent-recursion-in-usbredir_write.patch [bz#1790844] +- kvm-xhci-recheck-slot-status.patch [bz#1790844] +- Resolves: bz#1733893 + (Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC) +- Resolves: bz#1782678 + (qemu core dump after hot-unplugging the XXV710/XL710 PF) +- Resolves: bz#1789301 + (virtio-blk/scsi: fix notification suppression during AioContext polling) +- Resolves: bz#1790844 + (USB related fixes) + * Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-5.el8 - kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741345] - kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch [bz#1772774] From 4508bb8a0ce91ed158d573e31e9363ce2ba94b59 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 21 Jan 2020 15:36:49 +0100 Subject: [PATCH 062/195] * Tue Jan 21 2020 Miroslav Rezanina - 4.2.0-7.el8 - kvm-tcp_emu-Fix-oob-access.patch [bz#1791568] - kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791568] - kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791568] - kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch [bz#1559846] - Resolves: bz#1559846 (Nested KVM: limit VMX features according to CPU models - Fast Train) - Resolves: bz#1791568 (CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0]) --- .gitignore | 2 +- ...able-nested-PERF_GLOBAL_CTRL-MSR-sup.patch | 53 +++++++++++++ ...ct-size-while-emulating-IRC-commands.patch | 77 +++++++++++++++++++ ...orrect-size-while-emulating-commands.patch | 71 +++++++++++++++++ kvm-tcp_emu-Fix-oob-access.patch | 59 ++++++++++++++ qemu-kvm.spec | 20 ++++- 6 files changed, 280 insertions(+), 2 deletions(-) create mode 100644 kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch create mode 100644 kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch create mode 100644 kvm-slirp-use-correct-size-while-emulating-commands.patch create mode 100644 kvm-tcp_emu-Fix-oob-access.patch diff --git a/.gitignore b/.gitignore index a46bdee..713ad2e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -/qemu-4.2.0.tar.xz +/qemu-*.tar.xz diff --git a/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch b/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch new file mode 100644 index 0000000..1435017 --- /dev/null +++ b/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch @@ -0,0 +1,53 @@ +From 481357ea8ae32b6894860c296cf6a2898260195f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 17 Jan 2020 13:18:27 +0100 +Subject: [PATCH 4/4] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR + support + +RH-Author: Paolo Bonzini +Message-id: <20200117131827.20361-1-pbonzini@redhat.com> +Patchwork-id: 93405 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v3] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support +Bugzilla: 1559846 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Miroslav Rezanina + +BZ: 1559846 +BRANCH: rhel-av-8.2.0 +BREW: 25775160 +UPSTREAM: RHEL only + +Nested PERF_GLOBAL_CTRL support is not present in the 8.2 kernel. Drop the +features via compat properties, they will be moved to 8.2 machine type compat +properties in the 8.3 timeframe. + +Signed-off-by: Paolo Bonzini +--- + No change, for v2 I mistakenly wrote "origin/rhel-av-8.2.0" as the + branch. :( + + hw/i386/pc.c | 2 ++ + 1 file changed, 2 insertions(+) + +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 61e70e4..73a0f11 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -351,6 +351,8 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + GlobalProperty pc_rhel_compat[] = { + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, + }; +-- +1.8.3.1 + diff --git a/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch b/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch new file mode 100644 index 0000000..6d8dfe1 --- /dev/null +++ b/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch @@ -0,0 +1,77 @@ +From 0f659af4870f151e25a7d2184b9a383bff58e3ba Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 17 Jan 2020 12:07:57 +0100 +Subject: [PATCH 2/4] slirp: use correct size while emulating IRC commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200117120758.1076549-3-marcandre.lureau@redhat.com> +Patchwork-id: 93400 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] slirp: use correct size while emulating IRC commands +Bugzilla: 1791568 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Prasad J Pandit + +While emulating IRC DCC commands, tcp_emu() uses 'mbuf' size +'m->m_size' to write DCC commands via snprintf(3). This may +lead to OOB write access, because 'bptr' points somewhere in +the middle of 'mbuf' buffer, not at the start. Use M_FREEROOM(m) +size to avoid OOB access. + +Reported-by: Vishnu Dev TJ +Signed-off-by: Prasad J Pandit +Reviewed-by: Samuel Thibault +Message-Id: <20200109094228.79764-2-ppandit@redhat.com> + +(cherry picked from libslirp commit ce131029d6d4a405cb7d3ac6716d03e58fb4a5d9) +Signed-off-by: Marc-André Lureau + +Signed-off-by: Miroslav Rezanina +--- + slirp/src/tcp_subr.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index cbecd64..cedbfb2 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -778,7 +778,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, +@@ -788,8 +789,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += +- snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, +@@ -799,8 +800,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += +- snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } +-- +1.8.3.1 + diff --git a/kvm-slirp-use-correct-size-while-emulating-commands.patch b/kvm-slirp-use-correct-size-while-emulating-commands.patch new file mode 100644 index 0000000..fe42f4f --- /dev/null +++ b/kvm-slirp-use-correct-size-while-emulating-commands.patch @@ -0,0 +1,71 @@ +From dfbfcf02738640ab83f7970e636b72b78f166675 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 17 Jan 2020 12:07:58 +0100 +Subject: [PATCH 3/4] slirp: use correct size while emulating commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200117120758.1076549-4-marcandre.lureau@redhat.com> +Patchwork-id: 93401 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] slirp: use correct size while emulating commands +Bugzilla: 1791568 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Prasad J Pandit + +While emulating services in tcp_emu(), it uses 'mbuf' size +'m->m_size' to write commands via snprintf(3). Use M_FREEROOM(m) +size to avoid possible OOB access. + +Signed-off-by: Prasad J Pandit +Signed-off-by: Samuel Thibault +Message-Id: <20200109094228.79764-3-ppandit@redhat.com> + +(cherry picked from commit 82ebe9c370a0e2970fb5695aa19aa5214a6a1c80) +Signed-off-by: Marc-André Lureau +Signed-off-by: Miroslav Rezanina +--- + slirp/src/tcp_subr.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index cedbfb2..954d1a6 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -696,7 +696,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size - m->m_len, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, + n5, n6, x == 7 ? buff : ""); + return 1; +@@ -731,8 +731,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += +- snprintf(bptr, m->m_size - m->m_len, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", + n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + +@@ -758,8 +757,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) +- m->m_len = +- snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; ++ m->m_len = snprintf(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)) + 1; + return 1; + + case EMU_IRC: +-- +1.8.3.1 + diff --git a/kvm-tcp_emu-Fix-oob-access.patch b/kvm-tcp_emu-Fix-oob-access.patch new file mode 100644 index 0000000..e532877 --- /dev/null +++ b/kvm-tcp_emu-Fix-oob-access.patch @@ -0,0 +1,59 @@ +From 5c2c5496083fa549e1dff903413bb6136fc19d8d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 17 Jan 2020 12:07:56 +0100 +Subject: [PATCH 1/4] tcp_emu: Fix oob access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200117120758.1076549-2-marcandre.lureau@redhat.com> +Patchwork-id: 93399 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] tcp_emu: Fix oob access +Bugzilla: 1791568 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +From: Samuel Thibault + +The main loop only checks for one available byte, while we sometimes +need two bytes. + +[ MA - minor conflict, CHANGELOG.md absent ] +(cherry picked from libslirp commit 2655fffed7a9e765bcb4701dd876e9dab975f289) +Signed-off-by: Marc-André Lureau + +Signed-off-by: Miroslav Rezanina +--- + slirp/src/tcp_subr.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index d6dd133..cbecd64 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -886,6 +886,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + break; + + case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ + /* + * The difference between versions 1.0 and + * 2.0 is here. For future versions of +@@ -901,6 +904,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + /* This is the field containing the port + * number that RA-player is listening to. + */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ + lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; + if (lport < 6970) + lport += 256; /* don't know why */ +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 8de9b7b..63526ad 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 6%{?dist} +Release: 7%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -135,6 +135,14 @@ Patch26: kvm-virtio-don-t-enable-notifications-during-polling.patch Patch27: kvm-usbredir-Prevent-recursion-in-usbredir_write.patch # For bz#1790844 - USB related fixes Patch28: kvm-xhci-recheck-slot-status.patch +# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] +Patch29: kvm-tcp_emu-Fix-oob-access.patch +# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] +Patch30: kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch +# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] +Patch31: kvm-slirp-use-correct-size-while-emulating-commands.patch +# For bz#1559846 - Nested KVM: limit VMX features according to CPU models - Fast Train +Patch32: kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch BuildRequires: wget BuildRequires: rpm-build @@ -1069,6 +1077,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Jan 21 2020 Miroslav Rezanina - 4.2.0-7.el8 +- kvm-tcp_emu-Fix-oob-access.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791568] +- kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch [bz#1559846] +- Resolves: bz#1559846 + (Nested KVM: limit VMX features according to CPU models - Fast Train) +- Resolves: bz#1791568 + (CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0]) + * Wed Jan 15 2020 Danilo Cesar Lemes de Paula - 4.2.0-6.el8 - kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch [bz#1733893] - kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch [bz#1782678] From a5bd08701ab659c40b14b43d10b03b329b4a11ba Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 31 Jan 2020 11:12:06 +0100 Subject: [PATCH 063/195] * Fri Jan 31 2020 Miroslav Rezanina - 4.2.0-8.el8 - kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084] - kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041] - kvm-vhost-Only-align-sections-for-vhost-user.patch [bz#1779041] - kvm-vhost-coding-style-fix.patch [bz#1779041] - kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch [bz#1694164] - kvm-vhost-user-fs-remove-vhostfd-property.patch [bz#1694164] - kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch [bz#1694164] - kvm-virtiofsd-Pull-in-upstream-headers.patch [bz#1694164] - kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch [bz#1694164] - kvm-virtiofsd-Add-auxiliary-.c-s.patch [bz#1694164] - kvm-virtiofsd-Add-fuse_lowlevel.c.patch [bz#1694164] - kvm-virtiofsd-Add-passthrough_ll.patch [bz#1694164] - kvm-virtiofsd-Trim-down-imported-files.patch [bz#1694164] - kvm-virtiofsd-Format-imported-files-to-qemu-style.patch [bz#1694164] - kvm-virtiofsd-remove-mountpoint-dummy-argument.patch [bz#1694164] - kvm-virtiofsd-remove-unused-notify-reply-support.patch [bz#1694164] - kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch [bz#1694164] - kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch [bz#1694164] - kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch [bz#1694164] - kvm-virtiofsd-Trim-out-compatibility-code.patch [bz#1694164] - kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch [bz#1694164] - kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch [bz#1694164] - kvm-virtiofsd-Add-options-for-virtio.patch [bz#1694164] - kvm-virtiofsd-add-o-source-PATH-to-help-output.patch [bz#1694164] - kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch [bz#1694164] - kvm-virtiofsd-Start-wiring-up-vhost-user.patch [bz#1694164] - kvm-virtiofsd-Add-main-virtio-loop.patch [bz#1694164] - kvm-virtiofsd-get-set-features-callbacks.patch [bz#1694164] - kvm-virtiofsd-Start-queue-threads.patch [bz#1694164] - kvm-virtiofsd-Poll-kick_fd-for-queue.patch [bz#1694164] - kvm-virtiofsd-Start-reading-commands-from-queue.patch [bz#1694164] - kvm-virtiofsd-Send-replies-to-messages.patch [bz#1694164] - kvm-virtiofsd-Keep-track-of-replies.patch [bz#1694164] - kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch [bz#1694164] - kvm-virtiofsd-Fast-path-for-virtio-read.patch [bz#1694164] - kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch [bz#1694164] - kvm-virtiofsd-make-f-foreground-the-default.patch [bz#1694164] - kvm-virtiofsd-add-vhost-user.json-file.patch [bz#1694164] - kvm-virtiofsd-add-print-capabilities-option.patch [bz#1694164] - kvm-virtiofs-Add-maintainers-entry.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch [bz#1694164] - kvm-virtiofsd-validate-path-components.patch [bz#1694164] - kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch [bz#1694164] - kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch [bz#1694164] - kvm-virtiofsd-add-fuse_mbuf_iter-API.patch [bz#1694164] - kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch [bz#1694164] - kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch [bz#1694164] - kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch [bz#1694164] - kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch [bz#1694164] - kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch [bz#1694164] - kvm-virtiofsd-sandbox-mount-namespace.patch [bz#1694164] - kvm-virtiofsd-move-to-an-empty-network-namespace.patch [bz#1694164] - kvm-virtiofsd-move-to-a-new-pid-namespace.patch [bz#1694164] - kvm-virtiofsd-add-seccomp-whitelist.patch [bz#1694164] - kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch [bz#1694164] - kvm-virtiofsd-cap-ng-helpers.patch [bz#1694164] - kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch [bz#1694164] - kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch [bz#1694164] - kvm-virtiofsd-fix-libfuse-information-leaks.patch [bz#1694164] - kvm-virtiofsd-add-syslog-command-line-option.patch [bz#1694164] - kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch [bz#1694164] - kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch [bz#1694164] - kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch [bz#1694164] - kvm-virtiofsd-Handle-reinit.patch [bz#1694164] - kvm-virtiofsd-Handle-hard-reboot.patch [bz#1694164] - kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch [bz#1694164] - kvm-vhost-user-Print-unexpected-slave-message-types.patch [bz#1694164] - kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-control-readdirplus.patch [bz#1694164] - kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch [bz#1694164] - kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch [bz#1694164] - kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-use-hashtable.patch [bz#1694164] - kvm-virtiofsd-Clean-up-inodes-on-destroy.patch [bz#1694164] - kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch [bz#1694164] - kvm-virtiofsd-fix-error-handling-in-main.patch [bz#1694164] - kvm-virtiofsd-cleanup-allocated-resource-in-se.patch [bz#1694164] - kvm-virtiofsd-fix-memory-leak-on-lo.source.patch [bz#1694164] - kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch [bz#1694164] - kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch [bz#1694164] - kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch [bz#1694164] - kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch [bz#1694164] - kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch [bz#1694164] - kvm-virtiofsd-Support-remote-posix-locks.patch [bz#1694164] - kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch [bz#1694164] - kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch [bz#1694164] - kvm-virtiofsd-make-lo_release-atomic.patch [bz#1694164] - kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch [bz#1694164] - kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch [bz#1694164] - kvm-libvhost-user-Fix-some-memtable-remap-cases.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch [bz#1694164] - kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch [bz#1694164] - kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch [bz#1694164] - kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch [bz#1694164] - kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch [bz#1694164] - kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch [bz#1694164] - kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch [bz#1694164] - kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch [bz#1694164] - kvm-virtiofsd-process-requests-in-a-thread-pool.patch [bz#1694164] - kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch [bz#1694164] - kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch [bz#1694164] - kvm-virtiofsd-add-thread-pool-size-NUM-option.patch [bz#1694164] - kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch [bz#1694164] - kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch [bz#1694164] - kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch [bz#1694164] - kvm-virtiofsd-add-some-options-to-the-help-message.patch [bz#1694164] - kvm-redhat-ship-virtiofsd-vhost-user-device-backend.patch [bz#1694164] - Resolves: bz#1694164 (virtio-fs: host<->guest shared file system (qemu)) - Resolves: bz#1725084 (aarch64: support dumping SVE registers) - Resolves: bz#1779041 (netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic) --- ...sd-fix-memory-leak-on-fuse-queueinfo.patch | 63 + ...me-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch | 137 + ...ost-user-Protect-slave-fd-with-mutex.patch | 134 + ...t-user-Fix-some-memtable-remap-cases.patch | 117 + kvm-target-arm-arch_dump-Add-SVE-notes.patch | 298 + ...Add-names-to-section-rounded-warning.patch | 53 + ...t-Only-align-sections-for-vhost-user.patch | 97 + kvm-vhost-coding-style-fix.patch | 56 + ...Print-unexpected-slave-message-types.patch | 48 + ...host-user-fs-remove-vhostfd-property.patch | 59 + ...io-fs-fix-MSI-X-nvectors-calculation.patch | 60 + kvm-virtiofs-Add-maintainers-entry.patch | 52 + ...-to-the-log-with-FUSE_LOG_DEBUG-leve.patch | 86 + ...akefile-wiring-for-virtiofsd-contrib.patch | 106 + kvm-virtiofsd-Add-auxiliary-.c-s.patch | 1387 ++ kvm-virtiofsd-Add-fuse_lowlevel.c.patch | 3172 ++++ kvm-virtiofsd-Add-main-virtio-loop.patch | 105 + kvm-virtiofsd-Add-options-for-virtio.patch | 103 + kvm-virtiofsd-Add-passthrough_ll.patch | 1387 ++ ...mestamp-to-the-log-with-FUSE_LOG_DEB.patch | 73 + ...virtiofsd-Clean-up-inodes-on-destroy.patch | 85 + ...t-lo_destroy-to-take-the-lo-mutex-lo.patch | 112 + ...op-CAP_FSETID-if-client-asked-for-it.patch | 176 + kvm-virtiofsd-Fast-path-for-virtio-read.patch | 240 + ...mmon-header-and-define-for-QEMU-buil.patch | 164 + ...ta-corruption-with-O_APPEND-write-in.patch | 136 + ...fuse_daemonize-ignored-return-values.patch | 120 + ...-Format-imported-files-to-qemu-style.patch | 14743 ++++++++++++++++ kvm-virtiofsd-Handle-hard-reboot.patch | 65 + kvm-virtiofsd-Handle-reinit.patch | 53 + kvm-virtiofsd-Keep-track-of-replies.patch | 116 + ...Kill-threads-when-queues-are-stopped.patch | 143 + ...sync-work-even-if-only-inode-is-pass.patch | 96 + ...vhost-connection-instead-of-mounting.patch | 257 + ...ofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch | 76 + ...Pass-write-iov-s-all-the-way-through.patch | 140 + ...-fuse_bufvec-through-to-do_write_buf.patch | 168 + kvm-virtiofsd-Poll-kick_fd-for-queue.patch | 97 + ...t-multiply-running-with-same-vhost_u.patch | 144 + kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch | 945 + kvm-virtiofsd-Pull-in-upstream-headers.patch | 4911 +++++ ...move-unused-enum-fuse_buf_copy_flags.patch | 271 + ...Reset-O_DIRECT-flag-during-file-open.patch | 72 + kvm-virtiofsd-Send-replies-to-messages.patch | 199 + kvm-virtiofsd-Start-queue-threads.patch | 165 + ...sd-Start-reading-commands-from-queue.patch | 200 + ...virtiofsd-Start-wiring-up-vhost-user.patch | 247 + ...virtiofsd-Support-remote-posix-locks.patch | 355 + kvm-virtiofsd-Trim-down-imported-files.patch | 1582 ++ ...irtiofsd-Trim-out-compatibility-code.patch | 545 + ...sd-add-definition-of-fuse_buf_writev.patch | 93 + ...iofsd-add-fd-FDNUM-fd-passing-option.patch | 170 + kvm-virtiofsd-add-fuse_mbuf_iter-API.patch | 134 + ...iofsd-add-helper-for-lo_data-cleanup.patch | 88 + ...fsd-add-o-source-PATH-to-help-output.patch | 46 + ...tiofsd-add-print-capabilities-option.patch | 121 + kvm-virtiofsd-add-seccomp-whitelist.patch | 285 + ...add-some-options-to-the-help-message.patch | 74 + ...iofsd-add-syslog-command-line-option.patch | 239 + ...ofsd-add-thread-pool-size-NUM-option.patch | 106 + kvm-virtiofsd-add-vhost-user.json-file.patch | 73 + kvm-virtiofsd-cap-ng-helpers.patch | 175 + ...input-buffer-size-in-fuse_lowlevel.c.patch | 1111 ++ ...fsd-cleanup-allocated-resource-in-se.patch | 82 + ...t-more-fprintf-and-perror-to-use-fus.patch | 99 + ...d-do-not-always-set-FUSE_FLOCK_LOCKS.patch | 57 + ...d-enable-PARALLEL_DIROPS-during-INIT.patch | 47 + ...ract-root-inode-init-into-setup_root.patch | 111 + ...hen-parent-inode-isn-t-known-in-lo_d.patch | 85 + ...virtiofsd-fix-error-handling-in-main.patch | 63 + ...correct-error-handling-in-lo_do_look.patch | 44 + ...tiofsd-fix-libfuse-information-leaks.patch | 322 + ...tiofsd-fix-lo_destroy-resource-leaks.patch | 94 + ...rtiofsd-fix-memory-leak-on-lo.source.patch | 66 + ...virtiofsd-get-set-features-callbacks.patch | 66 + ...uce-inode-refcount-to-prevent-use-af.patch | 589 + ...tiofsd-make-f-foreground-the-default.patch | 76 + kvm-virtiofsd-make-lo_release-atomic.patch | 62 + ...irtiofsd-move-to-a-new-pid-namespace.patch | 223 + ...d-move-to-an-empty-network-namespace.patch | 66 + ...rough_ll-Pass-errno-to-fuse_reply_er.patch | 54 + ...rough_ll-Use-cache_readdir-for-direc.patch | 48 + ...rough_ll-add-dirp_map-to-hide-lo_dir.patch | 238 + ...through_ll-add-fallback-for-racy-ops.patch | 303 + ...rough_ll-add-fd_map-to-hide-file-des.patch | 328 + ...rough_ll-add-ino_map-to-hide-lo_inod.patch | 395 + ...rough_ll-add-lo_map-for-ino-fh-indir.patch | 182 + ...passthrough_ll-add-renameat2-support.patch | 52 + ...rough_ll-clean-up-cache-related-opti.patch | 138 + ...d-passthrough_ll-control-readdirplus.patch | 79 + ...rough_ll-create-new-files-in-caller-.patch | 198 + ...rough_ll-disable-readdirplus-on-cach.patch | 50 + ...rough_ll-fix-refcounting-on-remove-r.patch | 143 + ...rtiofsd-passthrough_ll-use-hashtable.patch | 211 + ...fsd-prevent-.-escape-in-lo_do_lookup.patch | 54 + ...sd-prevent-.-escape-in-lo_do_readdir.patch | 108 + ...prevent-FUSE_INIT-FUSE_DESTROY-races.patch | 103 + ...t-fv_queue_thread-vs-virtio_loop-rac.patch | 149 + ...iofsd-prevent-races-with-lo_dirp_put.patch | 147 + ...log-only-when-priority-is-high-enoug.patch | 469 + ...sd-process-requests-in-a-thread-pool.patch | 533 + ...fsd-remove-mountpoint-dummy-argument.patch | 159 + ...d-remove-unused-notify-reply-support.patch | 294 + ...name-inode-refcount-to-inode-nlookup.patch | 139 + ...-unref_inode-to-unref_inode_lolocked.patch | 94 + kvm-virtiofsd-sandbox-mount-namespace.patch | 166 + ...ofsd-set-maximum-RLIMIT_NOFILE-limit.patch | 93 + ...ll-queue-threads-on-exit-in-virtio_l.patch | 72 + ...t-nanosecond-resolution-for-file-tim.patch | 83 + ...se_buf_writev-to-replace-fuse_buf_wr.patch | 82 + ...se_lowlevel_is_virtio-in-fuse_sessio.patch | 56 + ...-proc-self-fd-O_PATH-file-descriptor.patch | 390 + ...te-input-buffer-sizes-in-do_write_bu.patch | 137 + kvm-virtiofsd-validate-path-components.patch | 164 + ...-passthrough_ll-fix-fallocate-ifdefs.patch | 56 + qemu-kvm.spec | 364 +- 116 files changed, 45410 insertions(+), 4 deletions(-) create mode 100644 kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch create mode 100644 kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch create mode 100644 kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch create mode 100644 kvm-libvhost-user-Fix-some-memtable-remap-cases.patch create mode 100644 kvm-target-arm-arch_dump-Add-SVE-notes.patch create mode 100644 kvm-vhost-Add-names-to-section-rounded-warning.patch create mode 100644 kvm-vhost-Only-align-sections-for-vhost-user.patch create mode 100644 kvm-vhost-coding-style-fix.patch create mode 100644 kvm-vhost-user-Print-unexpected-slave-message-types.patch create mode 100644 kvm-vhost-user-fs-remove-vhostfd-property.patch create mode 100644 kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch create mode 100644 kvm-virtiofs-Add-maintainers-entry.patch create mode 100644 kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch create mode 100644 kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch create mode 100644 kvm-virtiofsd-Add-auxiliary-.c-s.patch create mode 100644 kvm-virtiofsd-Add-fuse_lowlevel.c.patch create mode 100644 kvm-virtiofsd-Add-main-virtio-loop.patch create mode 100644 kvm-virtiofsd-Add-options-for-virtio.patch create mode 100644 kvm-virtiofsd-Add-passthrough_ll.patch create mode 100644 kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch create mode 100644 kvm-virtiofsd-Clean-up-inodes-on-destroy.patch create mode 100644 kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch create mode 100644 kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch create mode 100644 kvm-virtiofsd-Fast-path-for-virtio-read.patch create mode 100644 kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch create mode 100644 kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch create mode 100644 kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch create mode 100644 kvm-virtiofsd-Format-imported-files-to-qemu-style.patch create mode 100644 kvm-virtiofsd-Handle-hard-reboot.patch create mode 100644 kvm-virtiofsd-Handle-reinit.patch create mode 100644 kvm-virtiofsd-Keep-track-of-replies.patch create mode 100644 kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch create mode 100644 kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch create mode 100644 kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch create mode 100644 kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch create mode 100644 kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch create mode 100644 kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch create mode 100644 kvm-virtiofsd-Poll-kick_fd-for-queue.patch create mode 100644 kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch create mode 100644 kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch create mode 100644 kvm-virtiofsd-Pull-in-upstream-headers.patch create mode 100644 kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch create mode 100644 kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch create mode 100644 kvm-virtiofsd-Send-replies-to-messages.patch create mode 100644 kvm-virtiofsd-Start-queue-threads.patch create mode 100644 kvm-virtiofsd-Start-reading-commands-from-queue.patch create mode 100644 kvm-virtiofsd-Start-wiring-up-vhost-user.patch create mode 100644 kvm-virtiofsd-Support-remote-posix-locks.patch create mode 100644 kvm-virtiofsd-Trim-down-imported-files.patch create mode 100644 kvm-virtiofsd-Trim-out-compatibility-code.patch create mode 100644 kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch create mode 100644 kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch create mode 100644 kvm-virtiofsd-add-fuse_mbuf_iter-API.patch create mode 100644 kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch create mode 100644 kvm-virtiofsd-add-o-source-PATH-to-help-output.patch create mode 100644 kvm-virtiofsd-add-print-capabilities-option.patch create mode 100644 kvm-virtiofsd-add-seccomp-whitelist.patch create mode 100644 kvm-virtiofsd-add-some-options-to-the-help-message.patch create mode 100644 kvm-virtiofsd-add-syslog-command-line-option.patch create mode 100644 kvm-virtiofsd-add-thread-pool-size-NUM-option.patch create mode 100644 kvm-virtiofsd-add-vhost-user.json-file.patch create mode 100644 kvm-virtiofsd-cap-ng-helpers.patch create mode 100644 kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch create mode 100644 kvm-virtiofsd-cleanup-allocated-resource-in-se.patch create mode 100644 kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch create mode 100644 kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch create mode 100644 kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch create mode 100644 kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch create mode 100644 kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch create mode 100644 kvm-virtiofsd-fix-error-handling-in-main.patch create mode 100644 kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch create mode 100644 kvm-virtiofsd-fix-libfuse-information-leaks.patch create mode 100644 kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch create mode 100644 kvm-virtiofsd-fix-memory-leak-on-lo.source.patch create mode 100644 kvm-virtiofsd-get-set-features-callbacks.patch create mode 100644 kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch create mode 100644 kvm-virtiofsd-make-f-foreground-the-default.patch create mode 100644 kvm-virtiofsd-make-lo_release-atomic.patch create mode 100644 kvm-virtiofsd-move-to-a-new-pid-namespace.patch create mode 100644 kvm-virtiofsd-move-to-an-empty-network-namespace.patch create mode 100644 kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch create mode 100644 kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch create mode 100644 kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch create mode 100644 kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch create mode 100644 kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch create mode 100644 kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch create mode 100644 kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch create mode 100644 kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch create mode 100644 kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch create mode 100644 kvm-virtiofsd-passthrough_ll-control-readdirplus.patch create mode 100644 kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch create mode 100644 kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch create mode 100644 kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch create mode 100644 kvm-virtiofsd-passthrough_ll-use-hashtable.patch create mode 100644 kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch create mode 100644 kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch create mode 100644 kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch create mode 100644 kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch create mode 100644 kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch create mode 100644 kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch create mode 100644 kvm-virtiofsd-process-requests-in-a-thread-pool.patch create mode 100644 kvm-virtiofsd-remove-mountpoint-dummy-argument.patch create mode 100644 kvm-virtiofsd-remove-unused-notify-reply-support.patch create mode 100644 kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch create mode 100644 kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch create mode 100644 kvm-virtiofsd-sandbox-mount-namespace.patch create mode 100644 kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch create mode 100644 kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch create mode 100644 kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch create mode 100644 kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch create mode 100644 kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch create mode 100644 kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch create mode 100644 kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch create mode 100644 kvm-virtiofsd-validate-path-components.patch create mode 100644 kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch diff --git a/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch b/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch new file mode 100644 index 0000000..dc65c26 --- /dev/null +++ b/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch @@ -0,0 +1,63 @@ +From ceb6d97674b8bc9a072db1be4167411bc0ee48d7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:02 +0100 +Subject: [PATCH 091/116] Virtiofsd: fix memory leak on fuse queueinfo +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-88-dgilbert@redhat.com> +Patchwork-id: 93542 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 087/112] Virtiofsd: fix memory leak on fuse queueinfo +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +For fuse's queueinfo, both queueinfo array and queueinfos are allocated in +fv_queue_set_started() but not cleaned up when the daemon process quits. + +This fixes the leak in proper places. + +Signed-off-by: Liu Bo +Signed-off-by: Eric Ren +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 740b0b700a6338a1cf60c26229651ac5f6724944) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index b7948de..fb8d6d1 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -625,6 +625,8 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) + } + close(ourqi->kill_fd); + ourqi->kick_fd = -1; ++ free(vud->qi[qidx]); ++ vud->qi[qidx] = NULL; + } + + /* Callback from libvhost-user on start or stop of a queue */ +@@ -884,6 +886,12 @@ int virtio_session_mount(struct fuse_session *se) + void virtio_session_close(struct fuse_session *se) + { + close(se->vu_socketfd); ++ ++ if (!se->virtio_dev) { ++ return; ++ } ++ ++ free(se->virtio_dev->qi); + free(se->virtio_dev); + se->virtio_dev = NULL; + } +-- +1.8.3.1 + diff --git a/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch b/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch new file mode 100644 index 0000000..5d21bf8 --- /dev/null +++ b/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch @@ -0,0 +1,137 @@ +From f756c1c4590a37c533ec0429644a7034ba35dada Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:38 +0100 +Subject: [PATCH 007/116] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-4-dgilbert@redhat.com> +Patchwork-id: 93459 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 003/112] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Paolo Bonzini + +Since we are actually testing for the newer capng library, rename the +symbol to match. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Paolo Bonzini +(cherry picked from commit a358bca24026a377e0804e137a4499e4e041918d) +Signed-off-by: Miroslav Rezanina +--- + configure | 2 +- + qemu-bridge-helper.c | 6 +++--- + scsi/qemu-pr-helper.c | 12 ++++++------ + 3 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/configure b/configure +index 16564f8..7831618 100755 +--- a/configure ++++ b/configure +@@ -6760,7 +6760,7 @@ if test "$l2tpv3" = "yes" ; then + echo "CONFIG_L2TPV3=y" >> $config_host_mak + fi + if test "$cap_ng" = "yes" ; then +- echo "CONFIG_LIBCAP=y" >> $config_host_mak ++ echo "CONFIG_LIBCAP_NG=y" >> $config_host_mak + fi + echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak + for drv in $audio_drv_list; do +diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c +index 3d50ec0..88b2674 100644 +--- a/qemu-bridge-helper.c ++++ b/qemu-bridge-helper.c +@@ -43,7 +43,7 @@ + + #include "net/tap-linux.h" + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + #include + #endif + +@@ -207,7 +207,7 @@ static int send_fd(int c, int fd) + return sendmsg(c, &msg, 0); + } + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + static int drop_privileges(void) + { + /* clear all capabilities */ +@@ -246,7 +246,7 @@ int main(int argc, char **argv) + int access_allowed, access_denied; + int ret = EXIT_SUCCESS; + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + /* if we're run from an suid binary, immediately drop privileges preserving + * cap_net_admin */ + if (geteuid() == 0 && getuid() != geteuid()) { +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index debb18f..0659cee 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -24,7 +24,7 @@ + #include + #include + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + #include + #endif + #include +@@ -70,7 +70,7 @@ static int num_active_sockets = 1; + static int noisy; + static int verbose; + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + static int uid = -1; + static int gid = -1; + #endif +@@ -97,7 +97,7 @@ static void usage(const char *name) + " (default '%s')\n" + " -T, --trace [[enable=]][,events=][,file=]\n" + " specify tracing options\n" +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + " -u, --user=USER user to drop privileges to\n" + " -g, --group=GROUP group to drop privileges to\n" + #endif +@@ -827,7 +827,7 @@ static void close_server_socket(void) + num_active_sockets--; + } + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + static int drop_privileges(void) + { + /* clear all capabilities */ +@@ -920,7 +920,7 @@ int main(int argc, char **argv) + pidfile = g_strdup(optarg); + pidfile_specified = true; + break; +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + case 'u': { + unsigned long res; + struct passwd *userinfo = getpwnam(optarg); +@@ -1056,7 +1056,7 @@ int main(int argc, char **argv) + exit(EXIT_FAILURE); + } + +-#ifdef CONFIG_LIBCAP ++#ifdef CONFIG_LIBCAP_NG + if (drop_privileges() < 0) { + error_report("Failed to drop privileges: %s", strerror(errno)); + exit(EXIT_FAILURE); +-- +1.8.3.1 + diff --git a/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch b/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch new file mode 100644 index 0000000..4212f1c --- /dev/null +++ b/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch @@ -0,0 +1,134 @@ +From 548de8acbf0137b6e49a14b63682badfff037d23 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:44 +0100 +Subject: [PATCH 073/116] contrib/libvhost-user: Protect slave fd with mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-70-dgilbert@redhat.com> +Patchwork-id: 93523 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 069/112] contrib/libvhost-user: Protect slave fd with mutex +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +In future patches we'll be performing commands on the slave-fd driven +by commands on queues, since those queues will be driven by individual +threads we need to make sure they don't attempt to use the slave-fd +for multiple commands in parallel. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c25c02b9e6a196be87a818f459c426556b24770d) +Signed-off-by: Miroslav Rezanina +--- + contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++++++++++---- + contrib/libvhost-user/libvhost-user.h | 3 +++ + 2 files changed, 23 insertions(+), 4 deletions(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index ec27b78..63e4106 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -392,26 +392,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) + return vu_message_write(dev, conn_fd, vmsg); + } + ++/* ++ * Processes a reply on the slave channel. ++ * Entered with slave_mutex held and releases it before exit. ++ * Returns true on success. ++ */ + static bool + vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) + { + VhostUserMsg msg_reply; ++ bool result = false; + + if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { +- return true; ++ result = true; ++ goto out; + } + + if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { +- return false; ++ goto out; + } + + if (msg_reply.request != vmsg->request) { + DPRINT("Received unexpected msg type. Expected %d received %d", + vmsg->request, msg_reply.request); +- return false; ++ goto out; + } + +- return msg_reply.payload.u64 == 0; ++ result = msg_reply.payload.u64 == 0; ++ ++out: ++ pthread_mutex_unlock(&dev->slave_mutex); ++ return result; + } + + /* Kick the log_call_fd if required. */ +@@ -1105,10 +1116,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + return false; + } + ++ pthread_mutex_lock(&dev->slave_mutex); + if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { ++ pthread_mutex_unlock(&dev->slave_mutex); + return false; + } + ++ /* Also unlocks the slave_mutex */ + return vu_process_message_reply(dev, &vmsg); + } + +@@ -1628,6 +1642,7 @@ vu_deinit(VuDev *dev) + close(dev->slave_fd); + dev->slave_fd = -1; + } ++ pthread_mutex_destroy(&dev->slave_mutex); + + if (dev->sock != -1) { + close(dev->sock); +@@ -1663,6 +1678,7 @@ vu_init(VuDev *dev, + dev->remove_watch = remove_watch; + dev->iface = iface; + dev->log_call_fd = -1; ++ pthread_mutex_init(&dev->slave_mutex, NULL); + dev->slave_fd = -1; + dev->max_queues = max_queues; + +diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h +index 46b6007..1844b6f 100644 +--- a/contrib/libvhost-user/libvhost-user.h ++++ b/contrib/libvhost-user/libvhost-user.h +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include "standard-headers/linux/virtio_ring.h" + + /* Based on qemu/hw/virtio/vhost-user.c */ +@@ -355,6 +356,8 @@ struct VuDev { + VuVirtq *vq; + VuDevInflightInfo inflight_info; + int log_call_fd; ++ /* Must be held while using slave_fd */ ++ pthread_mutex_t slave_mutex; + int slave_fd; + uint64_t log_size; + uint8_t *log_table; +-- +1.8.3.1 + diff --git a/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch b/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch new file mode 100644 index 0000000..e362efe --- /dev/null +++ b/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch @@ -0,0 +1,117 @@ +From ee360b70f179cf540faebe7e55b34e323e2bb179 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:09 +0100 +Subject: [PATCH 098/116] libvhost-user: Fix some memtable remap cases +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-95-dgilbert@redhat.com> +Patchwork-id: 93548 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 094/112] libvhost-user: Fix some memtable remap cases +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +If a new setmemtable command comes in once the vhost threads are +running, it will remap the guests address space and the threads +will now be looking in the wrong place. + +Fortunately we're running this command under lock, so we can +update the queue mappings so that threads will look in the new-right +place. + +Note: This doesn't fix things that the threads might be doing +without a lock (e.g. a readv/writev!) That's for another time. + +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 49e9ec749d4db62ae51f76354143cee183912a1d) +Signed-off-by: Miroslav Rezanina +--- + contrib/libvhost-user/libvhost-user.c | 33 +++++++++++++++++++++++++-------- + contrib/libvhost-user/libvhost-user.h | 3 +++ + 2 files changed, 28 insertions(+), 8 deletions(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index 63e4106..b89bf18 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -565,6 +565,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) + } + + static bool ++map_ring(VuDev *dev, VuVirtq *vq) ++{ ++ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); ++ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); ++ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); ++ ++ DPRINT("Setting virtq addresses:\n"); ++ DPRINT(" vring_desc at %p\n", vq->vring.desc); ++ DPRINT(" vring_used at %p\n", vq->vring.used); ++ DPRINT(" vring_avail at %p\n", vq->vring.avail); ++ ++ return !(vq->vring.desc && vq->vring.used && vq->vring.avail); ++} ++ ++static bool + vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) + { + int i; +@@ -767,6 +782,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) + close(vmsg->fds[i]); + } + ++ for (i = 0; i < dev->max_queues; i++) { ++ if (dev->vq[i].vring.desc) { ++ if (map_ring(dev, &dev->vq[i])) { ++ vu_panic(dev, "remaping queue %d during setmemtable", i); ++ } ++ } ++ } ++ + return false; + } + +@@ -853,18 +876,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) + DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr); + DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr); + ++ vq->vra = *vra; + vq->vring.flags = vra->flags; +- vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); +- vq->vring.used = qva_to_va(dev, vra->used_user_addr); +- vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); + vq->vring.log_guest_addr = vra->log_guest_addr; + +- DPRINT("Setting virtq addresses:\n"); +- DPRINT(" vring_desc at %p\n", vq->vring.desc); +- DPRINT(" vring_used at %p\n", vq->vring.used); +- DPRINT(" vring_avail at %p\n", vq->vring.avail); + +- if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { ++ if (map_ring(dev, vq)) { + vu_panic(dev, "Invalid vring_addr message"); + return false; + } +diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h +index 1844b6f..5cb7708 100644 +--- a/contrib/libvhost-user/libvhost-user.h ++++ b/contrib/libvhost-user/libvhost-user.h +@@ -327,6 +327,9 @@ typedef struct VuVirtq { + int err_fd; + unsigned int enable; + bool started; ++ ++ /* Guest addresses of our ring */ ++ struct vhost_vring_addr vra; + } VuVirtq; + + enum VuWatchCondtion { +-- +1.8.3.1 + diff --git a/kvm-target-arm-arch_dump-Add-SVE-notes.patch b/kvm-target-arm-arch_dump-Add-SVE-notes.patch new file mode 100644 index 0000000..febea10 --- /dev/null +++ b/kvm-target-arm-arch_dump-Add-SVE-notes.patch @@ -0,0 +1,298 @@ +From d8871ae2842531130c9b333e7c06a6a5d1561286 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 24 Jan 2020 09:14:34 +0100 +Subject: [PATCH 001/116] target/arm/arch_dump: Add SVE notes + +RH-Author: Andrew Jones +Message-id: <20200124091434.15021-2-drjones@redhat.com> +Patchwork-id: 93443 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/arm/arch_dump: Add SVE notes +Bugzilla: 1725084 +RH-Acked-by: Auger Eric +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1725084 + +Author: Andrew Jones +Date: Thu, 23 Jan 2020 15:22:40 +0000 + + target/arm/arch_dump: Add SVE notes + + When dumping a guest with dump-guest-memory also dump the SVE + registers if they are in use. + + Signed-off-by: Andrew Jones + Reviewed-by: Richard Henderson + Message-id: 20200120101832.18781-1-drjones@redhat.com + [PMM: fixed checkpatch nits] + Signed-off-by: Peter Maydell + +(cherry picked from commit 538baab245ca881e6a6ff720b5133f3ad1fcaafc) +Signed-off-by: Miroslav Rezanina +--- + include/elf.h | 1 + + target/arm/arch_dump.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++- + target/arm/cpu.h | 25 ++++++++++ + target/arm/kvm64.c | 24 ---------- + 4 files changed, 148 insertions(+), 26 deletions(-) + +diff --git a/include/elf.h b/include/elf.h +index 3501e0c..8fbfe60 100644 +--- a/include/elf.h ++++ b/include/elf.h +@@ -1650,6 +1650,7 @@ typedef struct elf64_shdr { + #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ + #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ + #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ ++#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */ + + /* + * Physical entry point into the kernel. +diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c +index 26a2c09..2345dec 100644 +--- a/target/arm/arch_dump.c ++++ b/target/arm/arch_dump.c +@@ -62,12 +62,23 @@ struct aarch64_user_vfp_state { + + QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528); + ++/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */ ++struct aarch64_user_sve_header { ++ uint32_t size; ++ uint32_t max_size; ++ uint16_t vl; ++ uint16_t max_vl; ++ uint16_t flags; ++ uint16_t reserved; ++} QEMU_PACKED; ++ + struct aarch64_note { + Elf64_Nhdr hdr; + char name[8]; /* align_up(sizeof("CORE"), 4) */ + union { + struct aarch64_elf_prstatus prstatus; + struct aarch64_user_vfp_state vfp; ++ struct aarch64_user_sve_header sve; + }; + } QEMU_PACKED; + +@@ -76,6 +87,8 @@ struct aarch64_note { + (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus)) + #define AARCH64_PRFPREG_NOTE_SIZE \ + (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state)) ++#define AARCH64_SVE_NOTE_SIZE(env) \ ++ (AARCH64_NOTE_HEADER_SIZE + sve_size(env)) + + static void aarch64_note_init(struct aarch64_note *note, DumpState *s, + const char *name, Elf64_Word namesz, +@@ -128,11 +141,102 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, + return 0; + } + ++#ifdef TARGET_AARCH64 ++static off_t sve_zreg_offset(uint32_t vq, int n) ++{ ++ off_t off = sizeof(struct aarch64_user_sve_header); ++ return ROUND_UP(off, 16) + vq * 16 * n; ++} ++ ++static off_t sve_preg_offset(uint32_t vq, int n) ++{ ++ return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; ++} ++ ++static off_t sve_fpsr_offset(uint32_t vq) ++{ ++ off_t off = sve_preg_offset(vq, 17); ++ return ROUND_UP(off, 16); ++} ++ ++static off_t sve_fpcr_offset(uint32_t vq) ++{ ++ return sve_fpsr_offset(vq) + sizeof(uint32_t); ++} ++ ++static uint32_t sve_current_vq(CPUARMState *env) ++{ ++ return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; ++} ++ ++static size_t sve_size_vq(uint32_t vq) ++{ ++ off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t); ++ return ROUND_UP(off, 16); ++} ++ ++static size_t sve_size(CPUARMState *env) ++{ ++ return sve_size_vq(sve_current_vq(env)); ++} ++ ++static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, ++ CPUARMState *env, int cpuid, ++ DumpState *s) ++{ ++ struct aarch64_note *note; ++ ARMCPU *cpu = env_archcpu(env); ++ uint32_t vq = sve_current_vq(env); ++ uint64_t tmp[ARM_MAX_VQ * 2], *r; ++ uint32_t fpr; ++ uint8_t *buf; ++ int ret, i; ++ ++ note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env)); ++ buf = (uint8_t *)¬e->sve; ++ ++ aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq)); ++ ++ note->sve.size = cpu_to_dump32(s, sve_size_vq(vq)); ++ note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq)); ++ note->sve.vl = cpu_to_dump16(s, vq * 16); ++ note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16); ++ note->sve.flags = cpu_to_dump16(s, 1); ++ ++ for (i = 0; i < 32; ++i) { ++ r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2); ++ memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16); ++ } ++ ++ for (i = 0; i < 17; ++i) { ++ r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0], ++ DIV_ROUND_UP(vq * 2, 8)); ++ memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8); ++ } ++ ++ fpr = cpu_to_dump32(s, vfp_get_fpsr(env)); ++ memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t)); ++ ++ fpr = cpu_to_dump32(s, vfp_get_fpcr(env)); ++ memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t)); ++ ++ ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s); ++ g_free(note); ++ ++ if (ret < 0) { ++ return -1; ++ } ++ ++ return 0; ++} ++#endif ++ + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + int cpuid, void *opaque) + { + struct aarch64_note note; +- CPUARMState *env = &ARM_CPU(cs)->env; ++ ARMCPU *cpu = ARM_CPU(cs); ++ CPUARMState *env = &cpu->env; + DumpState *s = opaque; + uint64_t pstate, sp; + int ret, i; +@@ -163,7 +267,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + return -1; + } + +- return aarch64_write_elf64_prfpreg(f, env, cpuid, s); ++ ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s); ++ if (ret) { ++ return ret; ++ } ++ ++#ifdef TARGET_AARCH64 ++ if (cpu_isar_feature(aa64_sve, cpu)) { ++ ret = aarch64_write_elf64_sve(f, env, cpuid, s); ++ } ++#endif ++ ++ return ret; + } + + /* struct pt_regs from arch/arm/include/asm/ptrace.h */ +@@ -335,6 +450,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + if (class == ELFCLASS64) { + note_size = AARCH64_PRSTATUS_NOTE_SIZE; + note_size += AARCH64_PRFPREG_NOTE_SIZE; ++#ifdef TARGET_AARCH64 ++ if (cpu_isar_feature(aa64_sve, cpu)) { ++ note_size += AARCH64_SVE_NOTE_SIZE(env); ++ } ++#endif + } else { + note_size = ARM_PRSTATUS_NOTE_SIZE; + if (arm_feature(env, ARM_FEATURE_VFP)) { +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 83a809d..82dd3cc 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); + void aarch64_sve_change_el(CPUARMState *env, int old_el, + int new_el, bool el0_a64); + void aarch64_add_sve_properties(Object *obj); ++ ++/* ++ * SVE registers are encoded in KVM's memory in an endianness-invariant format. ++ * The byte at offset i from the start of the in-memory representation contains ++ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the ++ * lowest offsets are stored in the lowest memory addresses, then that nearly ++ * matches QEMU's representation, which is to use an array of host-endian ++ * uint64_t's, where the lower offsets are at the lower indices. To complete ++ * the translation we just need to byte swap the uint64_t's on big-endian hosts. ++ */ ++static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) ++{ ++#ifdef HOST_WORDS_BIGENDIAN ++ int i; ++ ++ for (i = 0; i < nr; ++i) { ++ dst[i] = bswap64(src[i]); ++ } ++ ++ return dst; ++#else ++ return src; ++#endif ++} ++ + #else + static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } + static inline void aarch64_sve_change_el(CPUARMState *env, int o, +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 876184b..e2da756 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -877,30 +877,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs) + } + + /* +- * SVE registers are encoded in KVM's memory in an endianness-invariant format. +- * The byte at offset i from the start of the in-memory representation contains +- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the +- * lowest offsets are stored in the lowest memory addresses, then that nearly +- * matches QEMU's representation, which is to use an array of host-endian +- * uint64_t's, where the lower offsets are at the lower indices. To complete +- * the translation we just need to byte swap the uint64_t's on big-endian hosts. +- */ +-static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) +-{ +-#ifdef HOST_WORDS_BIGENDIAN +- int i; +- +- for (i = 0; i < nr; ++i) { +- dst[i] = bswap64(src[i]); +- } +- +- return dst; +-#else +- return src; +-#endif +-} +- +-/* + * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits + * and PREGS and the FFR have a slice size of 256 bits. However we simply hard + * code the slice index to zero for now as it's unlikely we'll need more than +-- +1.8.3.1 + diff --git a/kvm-vhost-Add-names-to-section-rounded-warning.patch b/kvm-vhost-Add-names-to-section-rounded-warning.patch new file mode 100644 index 0000000..c41a14c --- /dev/null +++ b/kvm-vhost-Add-names-to-section-rounded-warning.patch @@ -0,0 +1,53 @@ +From 0d545c5850caf76ad3e8dd9bb0fbc9f86b08e220 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 24 Jan 2020 19:46:11 +0100 +Subject: [PATCH 002/116] vhost: Add names to section rounded warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200124194613.41119-2-dgilbert@redhat.com> +Patchwork-id: 93450 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] vhost: Add names to section rounded warning +Bugzilla: 1779041 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Dr. David Alan Gilbert" + +Add the memory region names to section rounding/alignment +warnings. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20200116202414.157959-2-dgilbert@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit ff4776147e960b128ee68f94c728659f662f4378) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 4da0d5a..774d87d 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -590,9 +590,10 @@ static void vhost_region_add_section(struct vhost_dev *dev, + * match up in the same RAMBlock if they do. + */ + if (mrs_gpa < prev_gpa_start) { +- error_report("%s:Section rounded to %"PRIx64 +- " prior to previous %"PRIx64, +- __func__, mrs_gpa, prev_gpa_start); ++ error_report("%s:Section '%s' rounded to %"PRIx64 ++ " prior to previous '%s' %"PRIx64, ++ __func__, section->mr->name, mrs_gpa, ++ prev_sec->mr->name, prev_gpa_start); + /* A way to cleanly fail here would be better */ + return; + } +-- +1.8.3.1 + diff --git a/kvm-vhost-Only-align-sections-for-vhost-user.patch b/kvm-vhost-Only-align-sections-for-vhost-user.patch new file mode 100644 index 0000000..e082ce8 --- /dev/null +++ b/kvm-vhost-Only-align-sections-for-vhost-user.patch @@ -0,0 +1,97 @@ +From c35466c168e5219bf585aa65ac31fc9bdc7cbf36 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 24 Jan 2020 19:46:12 +0100 +Subject: [PATCH 003/116] vhost: Only align sections for vhost-user +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200124194613.41119-3-dgilbert@redhat.com> +Patchwork-id: 93452 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] vhost: Only align sections for vhost-user +Bugzilla: 1779041 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Dr. David Alan Gilbert" + +I added hugepage alignment code in c1ece84e7c9 to deal with +vhost-user + postcopy which needs aligned pages when using userfault. +However, on x86 the lower 2MB of address space tends to be shotgun'd +with small fragments around the 512-640k range - e.g. video RAM, and +with HyperV synic pages tend to sit around there - again splitting +it up. The alignment code complains with a 'Section rounded to ...' +error and gives up. + +Since vhost-user already filters out devices without an fd +(see vhost-user.c vhost_user_mem_section_filter) it shouldn't be +affected by those overlaps. + +Turn the alignment off on vhost-kernel so that it doesn't try +and align, and thus won't hit the rounding issues. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20200116202414.157959-3-dgilbert@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Paolo Bonzini +(cherry picked from commit 76525114736e8f669766e69b715fa59ce8648aae) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 34 ++++++++++++++++++---------------- + 1 file changed, 18 insertions(+), 16 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 774d87d..25fd469 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -547,26 +547,28 @@ static void vhost_region_add_section(struct vhost_dev *dev, + uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + + section->offset_within_region; + RAMBlock *mrs_rb = section->mr->ram_block; +- size_t mrs_page = qemu_ram_pagesize(mrs_rb); + + trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, + mrs_host); + +- /* Round the section to it's page size */ +- /* First align the start down to a page boundary */ +- uint64_t alignage = mrs_host & (mrs_page - 1); +- if (alignage) { +- mrs_host -= alignage; +- mrs_size += alignage; +- mrs_gpa -= alignage; +- } +- /* Now align the size up to a page boundary */ +- alignage = mrs_size & (mrs_page - 1); +- if (alignage) { +- mrs_size += mrs_page - alignage; +- } +- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, +- mrs_host); ++ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { ++ /* Round the section to it's page size */ ++ /* First align the start down to a page boundary */ ++ size_t mrs_page = qemu_ram_pagesize(mrs_rb); ++ uint64_t alignage = mrs_host & (mrs_page - 1); ++ if (alignage) { ++ mrs_host -= alignage; ++ mrs_size += alignage; ++ mrs_gpa -= alignage; ++ } ++ /* Now align the size up to a page boundary */ ++ alignage = mrs_size & (mrs_page - 1); ++ if (alignage) { ++ mrs_size += mrs_page - alignage; ++ } ++ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, ++ mrs_host); ++ } + + if (dev->n_tmp_sections) { + /* Since we already have at least one section, lets see if +-- +1.8.3.1 + diff --git a/kvm-vhost-coding-style-fix.patch b/kvm-vhost-coding-style-fix.patch new file mode 100644 index 0000000..4546130 --- /dev/null +++ b/kvm-vhost-coding-style-fix.patch @@ -0,0 +1,56 @@ +From 624d96c456536e1471968a59fbeea206309cc33b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 24 Jan 2020 19:46:13 +0100 +Subject: [PATCH 004/116] vhost: coding style fix +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200124194613.41119-4-dgilbert@redhat.com> +Patchwork-id: 93453 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] vhost: coding style fix +Bugzilla: 1779041 +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Michael S. Tsirkin" + +Drop a trailing whitespace. Make line shorter. + +Fixes: 76525114736e8 ("vhost: Only align sections for vhost-user") +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8347505640238d3b80f9bb7510fdc1bb574bad19) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 25fd469..9edfadc 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -551,7 +551,7 @@ static void vhost_region_add_section(struct vhost_dev *dev, + trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, + mrs_host); + +- if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { ++ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { + /* Round the section to it's page size */ + /* First align the start down to a page boundary */ + size_t mrs_page = qemu_ram_pagesize(mrs_rb); +@@ -566,8 +566,8 @@ static void vhost_region_add_section(struct vhost_dev *dev, + if (alignage) { + mrs_size += mrs_page - alignage; + } +- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, +- mrs_host); ++ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, ++ mrs_size, mrs_host); + } + + if (dev->n_tmp_sections) { +-- +1.8.3.1 + diff --git a/kvm-vhost-user-Print-unexpected-slave-message-types.patch b/kvm-vhost-user-Print-unexpected-slave-message-types.patch new file mode 100644 index 0000000..e5776e7 --- /dev/null +++ b/kvm-vhost-user-Print-unexpected-slave-message-types.patch @@ -0,0 +1,48 @@ +From d6abbdaeb2c35efe6793a599c98116e250b1f179 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:43 +0100 +Subject: [PATCH 072/116] vhost-user: Print unexpected slave message types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-69-dgilbert@redhat.com> +Patchwork-id: 93519 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 068/112] vhost-user: Print unexpected slave message types +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +When we receive an unexpected message type on the slave fd, print +the type. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0fdc465d7d5aafeae127eba488f247ac6f58df4c) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 02a9b25..e4f46ec 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -1055,7 +1055,7 @@ static void slave_read(void *opaque) + fd[0]); + break; + default: +- error_report("Received unexpected msg type."); ++ error_report("Received unexpected msg type: %d.", hdr.request); + ret = -EINVAL; + } + +-- +1.8.3.1 + diff --git a/kvm-vhost-user-fs-remove-vhostfd-property.patch b/kvm-vhost-user-fs-remove-vhostfd-property.patch new file mode 100644 index 0000000..5904e82 --- /dev/null +++ b/kvm-vhost-user-fs-remove-vhostfd-property.patch @@ -0,0 +1,59 @@ +From 912af6f7c270e2939a91c9b3f62b6ba1202edc43 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:37 +0100 +Subject: [PATCH 006/116] vhost-user-fs: remove "vhostfd" property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-3-dgilbert@redhat.com> +Patchwork-id: 93458 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 002/112] vhost-user-fs: remove "vhostfd" property +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Marc-André Lureau + +The property doesn't make much sense for a vhost-user device. + +Signed-off-by: Marc-André Lureau +Message-Id: <20191116112016.14872-1-marcandre.lureau@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 703857348724319735d9be7b5b996e6445c6e6b9) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user-fs.c | 1 - + include/hw/virtio/vhost-user-fs.h | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index f0df7f4..ca0b7fc 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -263,7 +263,6 @@ static Property vuf_properties[] = { + DEFINE_PROP_UINT16("num-request-queues", VHostUserFS, + conf.num_request_queues, 1), + DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), +- DEFINE_PROP_STRING("vhostfd", VHostUserFS, conf.vhostfd), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h +index 539885b..9ff1bdb 100644 +--- a/include/hw/virtio/vhost-user-fs.h ++++ b/include/hw/virtio/vhost-user-fs.h +@@ -28,7 +28,6 @@ typedef struct { + char *tag; + uint16_t num_request_queues; + uint16_t queue_size; +- char *vhostfd; + } VHostUserFSConf; + + typedef struct { +-- +1.8.3.1 + diff --git a/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch b/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch new file mode 100644 index 0000000..9a69ed1 --- /dev/null +++ b/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch @@ -0,0 +1,60 @@ +From c0cf6d8a1d3b9bf3928f37fcfd5aa8ae6f1338ca Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:36 +0100 +Subject: [PATCH 005/116] virtio-fs: fix MSI-X nvectors calculation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-2-dgilbert@redhat.com> +Patchwork-id: 93455 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 001/112] virtio-fs: fix MSI-X nvectors calculation +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +The following MSI-X vectors are required: + * VIRTIO Configuration Change + * hiprio virtqueue + * requests virtqueues + +Fix the calculation to reserve enough MSI-X vectors. Otherwise guest +drivers fall back to a sub-optional configuration where all virtqueues +share a single vector. + +This change does not break live migration compatibility since +vhost-user-fs-pci devices are not migratable yet. + +Reported-by: Vivek Goyal +Signed-off-by: Stefan Hajnoczi +Message-Id: <20191209110759.35227-1-stefanha@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 366844f3d1329c6423dd752891a28ccb3ee8fddd) +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user-fs-pci.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c +index 933a3f2..e3a649d 100644 +--- a/hw/virtio/vhost-user-fs-pci.c ++++ b/hw/virtio/vhost-user-fs-pci.c +@@ -40,7 +40,8 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + DeviceState *vdev = DEVICE(&dev->vdev); + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { +- vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 1; ++ /* Also reserve config change and hiprio queue vectors */ ++ vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 2; + } + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); +-- +1.8.3.1 + diff --git a/kvm-virtiofs-Add-maintainers-entry.patch b/kvm-virtiofs-Add-maintainers-entry.patch new file mode 100644 index 0000000..fec9371 --- /dev/null +++ b/kvm-virtiofs-Add-maintainers-entry.patch @@ -0,0 +1,52 @@ +From f4144443eacceb04823ee72cb2d4f9f841f05495 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:11 +0100 +Subject: [PATCH 040/116] virtiofs: Add maintainers entry +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-37-dgilbert@redhat.com> +Patchwork-id: 93491 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 036/112] virtiofs: Add maintainers entry +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit bad7d2c3ad1af9344df035aedaf8e0967a543070) +Signed-off-by: Miroslav Rezanina +--- + MAINTAINERS | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/MAINTAINERS b/MAINTAINERS +index 5e5e3e5..d1b3e26 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -1575,6 +1575,14 @@ T: git https://github.com/cohuck/qemu.git s390-next + T: git https://github.com/borntraeger/qemu.git s390-next + L: qemu-s390x@nongnu.org + ++virtiofs ++M: Dr. David Alan Gilbert ++M: Stefan Hajnoczi ++S: Supported ++F: tools/virtiofsd/* ++F: hw/virtio/vhost-user-fs* ++F: include/hw/virtio/vhost-user-fs.h ++ + virtio-input + M: Gerd Hoffmann + S: Maintained +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch b/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch new file mode 100644 index 0000000..a2b91be --- /dev/null +++ b/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch @@ -0,0 +1,86 @@ +From 4d9106acfd7ed9e4d197ddf9f22b79ba6c8afdd8 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:38 +0100 +Subject: [PATCH 067/116] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG + level +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-64-dgilbert@redhat.com> +Patchwork-id: 93514 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 063/112] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG level +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +virtiofsd has some threads, so we see a lot of logs with debug option. +It would be useful for debugging if we can identify the specific thread +from the log. + +Add ID, which is got by gettid(), to the log with FUSE_LOG_DEBUG level +so that we can grep the specific thread. + +The log is like as: + + ]# ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto + ... + [ID: 00000097] unique: 12696, success, outsize: 120 + [ID: 00000097] virtio_send_msg: elem 18: with 2 in desc of length 120 + [ID: 00000003] fv_queue_thread: Got queue event on Queue 1 + [ID: 00000003] fv_queue_thread: Queue 1 gave evalue: 1 available: in: 65552 out: 80 + [ID: 00000003] fv_queue_thread: Waiting for Queue 1 event + [ID: 00000071] fv_queue_worker: elem 33: with 2 out desc of length 80 bad_in_num=0 bad_out_num=0 + [ID: 00000071] unique: 12694, opcode: READ (15), nodeid: 2, insize: 80, pid: 2014 + [ID: 00000071] lo_read(ino=2, size=65536, off=131072) + +Signed-off-by: Masayoshi Mizuma + +Signed-off-by: Dr. David Alan Gilbert + added rework as suggested by Daniel P. Berrangé during review +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 36f3846902bd41413f6c0bf797dee509028c29f4) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ff6910f..f08324f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -43,6 +43,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2268,10 +2269,17 @@ static void setup_nofile_rlimit(void) + + static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + { ++ g_autofree char *localfmt = NULL; ++ + if (current_log_level < level) { + return; + } + ++ if (current_log_level == FUSE_LOG_DEBUG) { ++ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); ++ fmt = localfmt; ++ } ++ + if (use_syslog) { + int priority = LOG_ERR; + switch (level) { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch b/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch new file mode 100644 index 0000000..b017bf4 --- /dev/null +++ b/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch @@ -0,0 +1,106 @@ +From 709408de33112d32b7c6675f8c9320b8bebccd58 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:05 +0100 +Subject: [PATCH 034/116] virtiofsd: Add Makefile wiring for virtiofsd contrib +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-31-dgilbert@redhat.com> +Patchwork-id: 93482 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 030/112] virtiofsd: Add Makefile wiring for virtiofsd contrib +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Wire up the building of the virtiofsd in tools. + +virtiofsd relies on Linux-specific system calls and seccomp. Anyone +wishing to port it to other host operating systems should do so +carefully and without reducing security. + +Only allow building on Linux hosts. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Liam Merwick +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 81bfc42dcf473bc8d3790622633410da72d8e622) +Signed-off-by: Miroslav Rezanina +--- + Makefile | 10 ++++++++++ + Makefile.objs | 1 + + tools/virtiofsd/Makefile.objs | 9 +++++++++ + 3 files changed, 20 insertions(+) + create mode 100644 tools/virtiofsd/Makefile.objs + +diff --git a/Makefile b/Makefile +index 4254950..1526775 100644 +--- a/Makefile ++++ b/Makefile +@@ -330,6 +330,10 @@ endif + endif + endif + ++ifdef CONFIG_LINUX ++HELPERS-y += virtiofsd$(EXESUF) ++endif ++ + # Sphinx does not allow building manuals into the same directory as + # the source files, so if we're doing an in-tree QEMU build we must + # build the manuals into a subdirectory (and then install them from +@@ -430,6 +434,7 @@ dummy := $(call unnest-vars,, \ + elf2dmp-obj-y \ + ivshmem-client-obj-y \ + ivshmem-server-obj-y \ ++ virtiofsd-obj-y \ + rdmacm-mux-obj-y \ + libvhost-user-obj-y \ + vhost-user-scsi-obj-y \ +@@ -675,6 +680,11 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" + rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) + ++ifdef CONFIG_LINUX # relies on Linux-specific syscalls ++virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) ++ $(call LINK, $^) ++endif ++ + vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a + $(call LINK, $^) + +diff --git a/Makefile.objs b/Makefile.objs +index fcf63e1..1a8f288 100644 +--- a/Makefile.objs ++++ b/Makefile.objs +@@ -125,6 +125,7 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/ + rdmacm-mux-obj-y = contrib/rdmacm-mux/ + vhost-user-input-obj-y = contrib/vhost-user-input/ + vhost-user-gpu-obj-y = contrib/vhost-user-gpu/ ++virtiofsd-obj-y = tools/virtiofsd/ + + ###################################################################### + trace-events-subdirs = +diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs +new file mode 100644 +index 0000000..45a8075 +--- /dev/null ++++ b/tools/virtiofsd/Makefile.objs +@@ -0,0 +1,9 @@ ++virtiofsd-obj-y = buffer.o \ ++ fuse_opt.o \ ++ fuse_log.o \ ++ fuse_lowlevel.o \ ++ fuse_signals.o \ ++ fuse_virtio.o \ ++ helper.o \ ++ passthrough_ll.o ++ +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Add-auxiliary-.c-s.patch b/kvm-virtiofsd-Add-auxiliary-.c-s.patch new file mode 100644 index 0000000..90150d9 --- /dev/null +++ b/kvm-virtiofsd-Add-auxiliary-.c-s.patch @@ -0,0 +1,1387 @@ +From 55b4059d6399c212109c758190e15b574accdd07 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:41 +0100 +Subject: [PATCH 010/116] virtiofsd: Add auxiliary .c's +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-7-dgilbert@redhat.com> +Patchwork-id: 93461 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 006/112] virtiofsd: Add auxiliary .c's +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Add most of the non-main .c files we need from upstream fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ffcf8d9f8649c6e56b1193bbbc9c9f7388920043) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 321 ++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_log.c | 40 ++++ + tools/virtiofsd/fuse_opt.c | 423 +++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_signals.c | 91 +++++++++ + tools/virtiofsd/helper.c | 440 +++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 1315 insertions(+) + create mode 100644 tools/virtiofsd/buffer.c + create mode 100644 tools/virtiofsd/fuse_log.c + create mode 100644 tools/virtiofsd/fuse_opt.c + create mode 100644 tools/virtiofsd/fuse_signals.c + create mode 100644 tools/virtiofsd/helper.c + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +new file mode 100644 +index 0000000..5ab9b87 +--- /dev/null ++++ b/tools/virtiofsd/buffer.c +@@ -0,0 +1,321 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2010 Miklos Szeredi ++ ++ Functions for dealing with `struct fuse_buf` and `struct ++ fuse_bufvec`. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#define _GNU_SOURCE ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_lowlevel.h" ++#include ++#include ++#include ++#include ++ ++size_t fuse_buf_size(const struct fuse_bufvec *bufv) ++{ ++ size_t i; ++ size_t size = 0; ++ ++ for (i = 0; i < bufv->count; i++) { ++ if (bufv->buf[i].size == SIZE_MAX) ++ size = SIZE_MAX; ++ else ++ size += bufv->buf[i].size; ++ } ++ ++ return size; ++} ++ ++static size_t min_size(size_t s1, size_t s2) ++{ ++ return s1 < s2 ? s1 : s2; ++} ++ ++static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) ++{ ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ res = pwrite(dst->fd, (char *)src->mem + src_off, len, ++ dst->pos + dst_off); ++ } else { ++ res = write(dst->fd, (char *)src->mem + src_off, len); ++ } ++ if (res == -1) { ++ if (!copied) ++ return -errno; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ if (!(dst->flags & FUSE_BUF_FD_RETRY)) ++ break; ++ ++ src_off += res; ++ dst_off += res; ++ len -= res; ++ } ++ ++ return copied; ++} ++ ++static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) ++{ ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (src->flags & FUSE_BUF_FD_SEEK) { ++ res = pread(src->fd, (char *)dst->mem + dst_off, len, ++ src->pos + src_off); ++ } else { ++ res = read(src->fd, (char *)dst->mem + dst_off, len); ++ } ++ if (res == -1) { ++ if (!copied) ++ return -errno; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ if (!(src->flags & FUSE_BUF_FD_RETRY)) ++ break; ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; ++} ++ ++static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) ++{ ++ char buf[4096]; ++ struct fuse_buf tmp = { ++ .size = sizeof(buf), ++ .flags = 0, ++ }; ++ ssize_t res; ++ size_t copied = 0; ++ ++ tmp.mem = buf; ++ ++ while (len) { ++ size_t this_len = min_size(tmp.size, len); ++ size_t read_len; ++ ++ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); ++ if (res < 0) { ++ if (!copied) ++ return res; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ read_len = res; ++ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); ++ if (res < 0) { ++ if (!copied) ++ return res; ++ break; ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ ++ if (res < this_len) ++ break; ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; ++} ++ ++#ifdef HAVE_SPLICE ++static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) ++{ ++ int splice_flags = 0; ++ off_t *srcpos = NULL; ++ off_t *dstpos = NULL; ++ off_t srcpos_val; ++ off_t dstpos_val; ++ ssize_t res; ++ size_t copied = 0; ++ ++ if (flags & FUSE_BUF_SPLICE_MOVE) ++ splice_flags |= SPLICE_F_MOVE; ++ if (flags & FUSE_BUF_SPLICE_NONBLOCK) ++ splice_flags |= SPLICE_F_NONBLOCK; ++ ++ if (src->flags & FUSE_BUF_FD_SEEK) { ++ srcpos_val = src->pos + src_off; ++ srcpos = &srcpos_val; ++ } ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ dstpos_val = dst->pos + dst_off; ++ dstpos = &dstpos_val; ++ } ++ ++ while (len) { ++ res = splice(src->fd, srcpos, dst->fd, dstpos, len, ++ splice_flags); ++ if (res == -1) { ++ if (copied) ++ break; ++ ++ if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) ++ return -errno; ++ ++ /* Maybe splice is not supported for this combination */ ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, ++ len); ++ } ++ if (res == 0) ++ break; ++ ++ copied += res; ++ if (!(src->flags & FUSE_BUF_FD_RETRY) && ++ !(dst->flags & FUSE_BUF_FD_RETRY)) { ++ break; ++ } ++ ++ len -= res; ++ } ++ ++ return copied; ++} ++#else ++static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) ++{ ++ (void) flags; ++ ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); ++} ++#endif ++ ++ ++static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) ++{ ++ int src_is_fd = src->flags & FUSE_BUF_IS_FD; ++ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; ++ ++ if (!src_is_fd && !dst_is_fd) { ++ char *dstmem = (char *)dst->mem + dst_off; ++ char *srcmem = (char *)src->mem + src_off; ++ ++ if (dstmem != srcmem) { ++ if (dstmem + len <= srcmem || srcmem + len <= dstmem) ++ memcpy(dstmem, srcmem, len); ++ else ++ memmove(dstmem, srcmem, len); ++ } ++ ++ return len; ++ } else if (!src_is_fd) { ++ return fuse_buf_write(dst, dst_off, src, src_off, len); ++ } else if (!dst_is_fd) { ++ return fuse_buf_read(dst, dst_off, src, src_off, len); ++ } else if (flags & FUSE_BUF_NO_SPLICE) { ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); ++ } else { ++ return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); ++ } ++} ++ ++static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) ++{ ++ if (bufv->idx < bufv->count) ++ return &bufv->buf[bufv->idx]; ++ else ++ return NULL; ++} ++ ++static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) ++{ ++ const struct fuse_buf *buf = fuse_bufvec_current(bufv); ++ ++ bufv->off += len; ++ assert(bufv->off <= buf->size); ++ if (bufv->off == buf->size) { ++ assert(bufv->idx < bufv->count); ++ bufv->idx++; ++ if (bufv->idx == bufv->count) ++ return 0; ++ bufv->off = 0; ++ } ++ return 1; ++} ++ ++ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, ++ enum fuse_buf_copy_flags flags) ++{ ++ size_t copied = 0; ++ ++ if (dstv == srcv) ++ return fuse_buf_size(dstv); ++ ++ for (;;) { ++ const struct fuse_buf *src = fuse_bufvec_current(srcv); ++ const struct fuse_buf *dst = fuse_bufvec_current(dstv); ++ size_t src_len; ++ size_t dst_len; ++ size_t len; ++ ssize_t res; ++ ++ if (src == NULL || dst == NULL) ++ break; ++ ++ src_len = src->size - srcv->off; ++ dst_len = dst->size - dstv->off; ++ len = min_size(src_len, dst_len); ++ ++ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); ++ if (res < 0) { ++ if (!copied) ++ return res; ++ break; ++ } ++ copied += res; ++ ++ if (!fuse_bufvec_advance(srcv, res) || ++ !fuse_bufvec_advance(dstv, res)) ++ break; ++ ++ if (res < len) ++ break; ++ } ++ ++ return copied; ++} +diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c +new file mode 100644 +index 0000000..0d268ab +--- /dev/null ++++ b/tools/virtiofsd/fuse_log.c +@@ -0,0 +1,40 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2019 Red Hat, Inc. ++ ++ Logging API. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "fuse_log.h" ++ ++#include ++#include ++ ++static void default_log_func( ++ __attribute__(( unused )) enum fuse_log_level level, ++ const char *fmt, va_list ap) ++{ ++ vfprintf(stderr, fmt, ap); ++} ++ ++static fuse_log_func_t log_func = default_log_func; ++ ++void fuse_set_log_func(fuse_log_func_t func) ++{ ++ if (!func) ++ func = default_log_func; ++ ++ log_func = func; ++} ++ ++void fuse_log(enum fuse_log_level level, const char *fmt, ...) ++{ ++ va_list ap; ++ ++ va_start(ap, fmt); ++ log_func(level, fmt, ap); ++ va_end(ap); ++} +diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c +new file mode 100644 +index 0000000..93066b9 +--- /dev/null ++++ b/tools/virtiofsd/fuse_opt.c +@@ -0,0 +1,423 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Implementation of option parsing routines (dealing with `struct ++ fuse_args`). ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_opt.h" ++#include "fuse_misc.h" ++ ++#include ++#include ++#include ++#include ++ ++struct fuse_opt_context { ++ void *data; ++ const struct fuse_opt *opt; ++ fuse_opt_proc_t proc; ++ int argctr; ++ int argc; ++ char **argv; ++ struct fuse_args outargs; ++ char *opts; ++ int nonopt; ++}; ++ ++void fuse_opt_free_args(struct fuse_args *args) ++{ ++ if (args) { ++ if (args->argv && args->allocated) { ++ int i; ++ for (i = 0; i < args->argc; i++) ++ free(args->argv[i]); ++ free(args->argv); ++ } ++ args->argc = 0; ++ args->argv = NULL; ++ args->allocated = 0; ++ } ++} ++ ++static int alloc_failed(void) ++{ ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++} ++ ++int fuse_opt_add_arg(struct fuse_args *args, const char *arg) ++{ ++ char **newargv; ++ char *newarg; ++ ++ assert(!args->argv || args->allocated); ++ ++ newarg = strdup(arg); ++ if (!newarg) ++ return alloc_failed(); ++ ++ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); ++ if (!newargv) { ++ free(newarg); ++ return alloc_failed(); ++ } ++ ++ args->argv = newargv; ++ args->allocated = 1; ++ args->argv[args->argc++] = newarg; ++ args->argv[args->argc] = NULL; ++ return 0; ++} ++ ++static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, ++ const char *arg) ++{ ++ assert(pos <= args->argc); ++ if (fuse_opt_add_arg(args, arg) == -1) ++ return -1; ++ ++ if (pos != args->argc - 1) { ++ char *newarg = args->argv[args->argc - 1]; ++ memmove(&args->argv[pos + 1], &args->argv[pos], ++ sizeof(char *) * (args->argc - pos - 1)); ++ args->argv[pos] = newarg; ++ } ++ return 0; ++} ++ ++int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) ++{ ++ return fuse_opt_insert_arg_common(args, pos, arg); ++} ++ ++static int next_arg(struct fuse_opt_context *ctx, const char *opt) ++{ ++ if (ctx->argctr + 1 >= ctx->argc) { ++ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); ++ return -1; ++ } ++ ctx->argctr++; ++ return 0; ++} ++ ++static int add_arg(struct fuse_opt_context *ctx, const char *arg) ++{ ++ return fuse_opt_add_arg(&ctx->outargs, arg); ++} ++ ++static int add_opt_common(char **opts, const char *opt, int esc) ++{ ++ unsigned oldlen = *opts ? strlen(*opts) : 0; ++ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); ++ ++ if (!d) ++ return alloc_failed(); ++ ++ *opts = d; ++ if (oldlen) { ++ d += oldlen; ++ *d++ = ','; ++ } ++ ++ for (; *opt; opt++) { ++ if (esc && (*opt == ',' || *opt == '\\')) ++ *d++ = '\\'; ++ *d++ = *opt; ++ } ++ *d = '\0'; ++ ++ return 0; ++} ++ ++int fuse_opt_add_opt(char **opts, const char *opt) ++{ ++ return add_opt_common(opts, opt, 0); ++} ++ ++int fuse_opt_add_opt_escaped(char **opts, const char *opt) ++{ ++ return add_opt_common(opts, opt, 1); ++} ++ ++static int add_opt(struct fuse_opt_context *ctx, const char *opt) ++{ ++ return add_opt_common(&ctx->opts, opt, 1); ++} ++ ++static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, ++ int iso) ++{ ++ if (key == FUSE_OPT_KEY_DISCARD) ++ return 0; ++ ++ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { ++ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); ++ if (res == -1 || !res) ++ return res; ++ } ++ if (iso) ++ return add_opt(ctx, arg); ++ else ++ return add_arg(ctx, arg); ++} ++ ++static int match_template(const char *t, const char *arg, unsigned *sepp) ++{ ++ int arglen = strlen(arg); ++ const char *sep = strchr(t, '='); ++ sep = sep ? sep : strchr(t, ' '); ++ if (sep && (!sep[1] || sep[1] == '%')) { ++ int tlen = sep - t; ++ if (sep[0] == '=') ++ tlen ++; ++ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { ++ *sepp = sep - t; ++ return 1; ++ } ++ } ++ if (strcmp(t, arg) == 0) { ++ *sepp = 0; ++ return 1; ++ } ++ return 0; ++} ++ ++static const struct fuse_opt *find_opt(const struct fuse_opt *opt, ++ const char *arg, unsigned *sepp) ++{ ++ for (; opt && opt->templ; opt++) ++ if (match_template(opt->templ, arg, sepp)) ++ return opt; ++ return NULL; ++} ++ ++int fuse_opt_match(const struct fuse_opt *opts, const char *opt) ++{ ++ unsigned dummy; ++ return find_opt(opts, opt, &dummy) ? 1 : 0; ++} ++ ++static int process_opt_param(void *var, const char *format, const char *param, ++ const char *arg) ++{ ++ assert(format[0] == '%'); ++ if (format[1] == 's') { ++ char **s = var; ++ char *copy = strdup(param); ++ if (!copy) ++ return alloc_failed(); ++ ++ free(*s); ++ *s = copy; ++ } else { ++ if (sscanf(param, format, var) != 1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); ++ return -1; ++ } ++ } ++ return 0; ++} ++ ++static int process_opt(struct fuse_opt_context *ctx, ++ const struct fuse_opt *opt, unsigned sep, ++ const char *arg, int iso) ++{ ++ if (opt->offset == -1U) { ++ if (call_proc(ctx, arg, opt->value, iso) == -1) ++ return -1; ++ } else { ++ void *var = (char *)ctx->data + opt->offset; ++ if (sep && opt->templ[sep + 1]) { ++ const char *param = arg + sep; ++ if (opt->templ[sep] == '=') ++ param ++; ++ if (process_opt_param(var, opt->templ + sep + 1, ++ param, arg) == -1) ++ return -1; ++ } else ++ *(int *)var = opt->value; ++ } ++ return 0; ++} ++ ++static int process_opt_sep_arg(struct fuse_opt_context *ctx, ++ const struct fuse_opt *opt, unsigned sep, ++ const char *arg, int iso) ++{ ++ int res; ++ char *newarg; ++ char *param; ++ ++ if (next_arg(ctx, arg) == -1) ++ return -1; ++ ++ param = ctx->argv[ctx->argctr]; ++ newarg = malloc(sep + strlen(param) + 1); ++ if (!newarg) ++ return alloc_failed(); ++ ++ memcpy(newarg, arg, sep); ++ strcpy(newarg + sep, param); ++ res = process_opt(ctx, opt, sep, newarg, iso); ++ free(newarg); ++ ++ return res; ++} ++ ++static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) ++{ ++ unsigned sep; ++ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); ++ if (opt) { ++ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { ++ int res; ++ if (sep && opt->templ[sep] == ' ' && !arg[sep]) ++ res = process_opt_sep_arg(ctx, opt, sep, arg, ++ iso); ++ else ++ res = process_opt(ctx, opt, sep, arg, iso); ++ if (res == -1) ++ return -1; ++ } ++ return 0; ++ } else ++ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); ++} ++ ++static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) ++{ ++ char *s = opts; ++ char *d = s; ++ int end = 0; ++ ++ while (!end) { ++ if (*s == '\0') ++ end = 1; ++ if (*s == ',' || end) { ++ int res; ++ ++ *d = '\0'; ++ res = process_gopt(ctx, opts, 1); ++ if (res == -1) ++ return -1; ++ d = opts; ++ } else { ++ if (s[0] == '\\' && s[1] != '\0') { ++ s++; ++ if (s[0] >= '0' && s[0] <= '3' && ++ s[1] >= '0' && s[1] <= '7' && ++ s[2] >= '0' && s[2] <= '7') { ++ *d++ = (s[0] - '0') * 0100 + ++ (s[1] - '0') * 0010 + ++ (s[2] - '0'); ++ s += 2; ++ } else { ++ *d++ = *s; ++ } ++ } else { ++ *d++ = *s; ++ } ++ } ++ s++; ++ } ++ ++ return 0; ++} ++ ++static int process_option_group(struct fuse_opt_context *ctx, const char *opts) ++{ ++ int res; ++ char *copy = strdup(opts); ++ ++ if (!copy) { ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++ } ++ res = process_real_option_group(ctx, copy); ++ free(copy); ++ return res; ++} ++ ++static int process_one(struct fuse_opt_context *ctx, const char *arg) ++{ ++ if (ctx->nonopt || arg[0] != '-') ++ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); ++ else if (arg[1] == 'o') { ++ if (arg[2]) ++ return process_option_group(ctx, arg + 2); ++ else { ++ if (next_arg(ctx, arg) == -1) ++ return -1; ++ ++ return process_option_group(ctx, ++ ctx->argv[ctx->argctr]); ++ } ++ } else if (arg[1] == '-' && !arg[2]) { ++ if (add_arg(ctx, arg) == -1) ++ return -1; ++ ctx->nonopt = ctx->outargs.argc; ++ return 0; ++ } else ++ return process_gopt(ctx, arg, 0); ++} ++ ++static int opt_parse(struct fuse_opt_context *ctx) ++{ ++ if (ctx->argc) { ++ if (add_arg(ctx, ctx->argv[0]) == -1) ++ return -1; ++ } ++ ++ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) ++ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) ++ return -1; ++ ++ if (ctx->opts) { ++ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || ++ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) ++ return -1; ++ } ++ ++ /* If option separator ("--") is the last argument, remove it */ ++ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && ++ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { ++ free(ctx->outargs.argv[ctx->outargs.argc - 1]); ++ ctx->outargs.argv[--ctx->outargs.argc] = NULL; ++ } ++ ++ return 0; ++} ++ ++int fuse_opt_parse(struct fuse_args *args, void *data, ++ const struct fuse_opt opts[], fuse_opt_proc_t proc) ++{ ++ int res; ++ struct fuse_opt_context ctx = { ++ .data = data, ++ .opt = opts, ++ .proc = proc, ++ }; ++ ++ if (!args || !args->argv || !args->argc) ++ return 0; ++ ++ ctx.argc = args->argc; ++ ctx.argv = args->argv; ++ ++ res = opt_parse(&ctx); ++ if (res != -1) { ++ struct fuse_args tmp = *args; ++ *args = ctx.outargs; ++ ctx.outargs = tmp; ++ } ++ free(ctx.opts); ++ fuse_opt_free_args(&ctx.outargs); ++ return res; ++} +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +new file mode 100644 +index 0000000..4271947 +--- /dev/null ++++ b/tools/virtiofsd/fuse_signals.c +@@ -0,0 +1,91 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Utility functions for setting signal handlers. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "config.h" ++#include "fuse_lowlevel.h" ++#include "fuse_i.h" ++ ++#include ++#include ++#include ++#include ++ ++static struct fuse_session *fuse_instance; ++ ++static void exit_handler(int sig) ++{ ++ if (fuse_instance) { ++ fuse_session_exit(fuse_instance); ++ if(sig <= 0) { ++ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); ++ abort(); ++ } ++ fuse_instance->error = sig; ++ } ++} ++ ++static void do_nothing(int sig) ++{ ++ (void) sig; ++} ++ ++static int set_one_signal_handler(int sig, void (*handler)(int), int remove) ++{ ++ struct sigaction sa; ++ struct sigaction old_sa; ++ ++ memset(&sa, 0, sizeof(struct sigaction)); ++ sa.sa_handler = remove ? SIG_DFL : handler; ++ sigemptyset(&(sa.sa_mask)); ++ sa.sa_flags = 0; ++ ++ if (sigaction(sig, NULL, &old_sa) == -1) { ++ perror("fuse: cannot get old signal handler"); ++ return -1; ++ } ++ ++ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && ++ sigaction(sig, &sa, NULL) == -1) { ++ perror("fuse: cannot set signal handler"); ++ return -1; ++ } ++ return 0; ++} ++ ++int fuse_set_signal_handlers(struct fuse_session *se) ++{ ++ /* If we used SIG_IGN instead of the do_nothing function, ++ then we would be unable to tell if we set SIG_IGN (and ++ thus should reset to SIG_DFL in fuse_remove_signal_handlers) ++ or if it was already set to SIG_IGN (and should be left ++ untouched. */ ++ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) ++ return -1; ++ ++ fuse_instance = se; ++ return 0; ++} ++ ++void fuse_remove_signal_handlers(struct fuse_session *se) ++{ ++ if (fuse_instance != se) ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: fuse_remove_signal_handlers: unknown session\n"); ++ else ++ fuse_instance = NULL; ++ ++ set_one_signal_handler(SIGHUP, exit_handler, 1); ++ set_one_signal_handler(SIGINT, exit_handler, 1); ++ set_one_signal_handler(SIGTERM, exit_handler, 1); ++ set_one_signal_handler(SIGPIPE, do_nothing, 1); ++} +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +new file mode 100644 +index 0000000..64ff7ad +--- /dev/null ++++ b/tools/virtiofsd/helper.c +@@ -0,0 +1,440 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Helper functions to create (simple) standalone programs. With the ++ aid of these functions it should be possible to create full FUSE ++ file system by implementing nothing but the request handlers. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_misc.h" ++#include "fuse_opt.h" ++#include "fuse_lowlevel.h" ++#include "mount_util.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define FUSE_HELPER_OPT(t, p) \ ++ { t, offsetof(struct fuse_cmdline_opts, p), 1 } ++ ++static const struct fuse_opt fuse_helper_opts[] = { ++ FUSE_HELPER_OPT("-h", show_help), ++ FUSE_HELPER_OPT("--help", show_help), ++ FUSE_HELPER_OPT("-V", show_version), ++ FUSE_HELPER_OPT("--version", show_version), ++ FUSE_HELPER_OPT("-d", debug), ++ FUSE_HELPER_OPT("debug", debug), ++ FUSE_HELPER_OPT("-d", foreground), ++ FUSE_HELPER_OPT("debug", foreground), ++ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), ++ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("-f", foreground), ++ FUSE_HELPER_OPT("-s", singlethread), ++ FUSE_HELPER_OPT("fsname=", nodefault_subtype), ++ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), ++#ifndef __FreeBSD__ ++ FUSE_HELPER_OPT("subtype=", nodefault_subtype), ++ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), ++#endif ++ FUSE_HELPER_OPT("clone_fd", clone_fd), ++ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_OPT_END ++}; ++ ++struct fuse_conn_info_opts { ++ int atomic_o_trunc; ++ int no_remote_posix_lock; ++ int no_remote_flock; ++ int splice_write; ++ int splice_move; ++ int splice_read; ++ int no_splice_write; ++ int no_splice_move; ++ int no_splice_read; ++ int auto_inval_data; ++ int no_auto_inval_data; ++ int no_readdirplus; ++ int no_readdirplus_auto; ++ int async_dio; ++ int no_async_dio; ++ int writeback_cache; ++ int no_writeback_cache; ++ int async_read; ++ int sync_read; ++ unsigned max_write; ++ unsigned max_readahead; ++ unsigned max_background; ++ unsigned congestion_threshold; ++ unsigned time_gran; ++ int set_max_write; ++ int set_max_readahead; ++ int set_max_background; ++ int set_congestion_threshold; ++ int set_time_gran; ++}; ++ ++#define CONN_OPTION(t, p, v) \ ++ { t, offsetof(struct fuse_conn_info_opts, p), v } ++static const struct fuse_opt conn_info_opt_spec[] = { ++ CONN_OPTION("max_write=%u", max_write, 0), ++ CONN_OPTION("max_write=", set_max_write, 1), ++ CONN_OPTION("max_readahead=%u", max_readahead, 0), ++ CONN_OPTION("max_readahead=", set_max_readahead, 1), ++ CONN_OPTION("max_background=%u", max_background, 0), ++ CONN_OPTION("max_background=", set_max_background, 1), ++ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), ++ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), ++ CONN_OPTION("sync_read", sync_read, 1), ++ CONN_OPTION("async_read", async_read, 1), ++ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), ++ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("no_remote_lock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_flock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("splice_write", splice_write, 1), ++ CONN_OPTION("no_splice_write", no_splice_write, 1), ++ CONN_OPTION("splice_move", splice_move, 1), ++ CONN_OPTION("no_splice_move", no_splice_move, 1), ++ CONN_OPTION("splice_read", splice_read, 1), ++ CONN_OPTION("no_splice_read", no_splice_read, 1), ++ CONN_OPTION("auto_inval_data", auto_inval_data, 1), ++ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), ++ CONN_OPTION("readdirplus=no", no_readdirplus, 1), ++ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), ++ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), ++ CONN_OPTION("async_dio", async_dio, 1), ++ CONN_OPTION("no_async_dio", no_async_dio, 1), ++ CONN_OPTION("writeback_cache", writeback_cache, 1), ++ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), ++ CONN_OPTION("time_gran=%u", time_gran, 0), ++ CONN_OPTION("time_gran=", set_time_gran, 1), ++ FUSE_OPT_END ++}; ++ ++ ++void fuse_cmdline_help(void) ++{ ++ printf(" -h --help print help\n" ++ " -V --version print version\n" ++ " -d -o debug enable debug output (implies -f)\n" ++ " -f foreground operation\n" ++ " -s disable multi-threaded operation\n" ++ " -o clone_fd use separate fuse device fd for each thread\n" ++ " (may improve performance)\n" ++ " -o max_idle_threads the maximum number of idle worker threads\n" ++ " allowed (default: 10)\n"); ++} ++ ++static int fuse_helper_opt_proc(void *data, const char *arg, int key, ++ struct fuse_args *outargs) ++{ ++ (void) outargs; ++ struct fuse_cmdline_opts *opts = data; ++ ++ switch (key) { ++ case FUSE_OPT_KEY_NONOPT: ++ if (!opts->mountpoint) { ++ if (fuse_mnt_parse_fuse_fd(arg) != -1) { ++ return fuse_opt_add_opt(&opts->mountpoint, arg); ++ } ++ ++ char mountpoint[PATH_MAX] = ""; ++ if (realpath(arg, mountpoint) == NULL) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: bad mount point `%s': %s\n", ++ arg, strerror(errno)); ++ return -1; ++ } ++ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); ++ } else { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); ++ return -1; ++ } ++ ++ default: ++ /* Pass through unknown options */ ++ return 1; ++ } ++} ++ ++/* Under FreeBSD, there is no subtype option so this ++ function actually sets the fsname */ ++static int add_default_subtype(const char *progname, struct fuse_args *args) ++{ ++ int res; ++ char *subtype_opt; ++ ++ const char *basename = strrchr(progname, '/'); ++ if (basename == NULL) ++ basename = progname; ++ else if (basename[1] != '\0') ++ basename++; ++ ++ subtype_opt = (char *) malloc(strlen(basename) + 64); ++ if (subtype_opt == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++ } ++#ifdef __FreeBSD__ ++ sprintf(subtype_opt, "-ofsname=%s", basename); ++#else ++ sprintf(subtype_opt, "-osubtype=%s", basename); ++#endif ++ res = fuse_opt_add_arg(args, subtype_opt); ++ free(subtype_opt); ++ return res; ++} ++ ++int fuse_parse_cmdline(struct fuse_args *args, ++ struct fuse_cmdline_opts *opts) ++{ ++ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); ++ ++ opts->max_idle_threads = 10; ++ ++ if (fuse_opt_parse(args, opts, fuse_helper_opts, ++ fuse_helper_opt_proc) == -1) ++ return -1; ++ ++ /* *Linux*: if neither -o subtype nor -o fsname are specified, ++ set subtype to program's basename. ++ *FreeBSD*: if fsname is not specified, set to program's ++ basename. */ ++ if (!opts->nodefault_subtype) ++ if (add_default_subtype(args->argv[0], args) == -1) ++ return -1; ++ ++ return 0; ++} ++ ++ ++int fuse_daemonize(int foreground) ++{ ++ if (!foreground) { ++ int nullfd; ++ int waiter[2]; ++ char completed; ++ ++ if (pipe(waiter)) { ++ perror("fuse_daemonize: pipe"); ++ return -1; ++ } ++ ++ /* ++ * demonize current process by forking it and killing the ++ * parent. This makes current process as a child of 'init'. ++ */ ++ switch(fork()) { ++ case -1: ++ perror("fuse_daemonize: fork"); ++ return -1; ++ case 0: ++ break; ++ default: ++ (void) read(waiter[0], &completed, sizeof(completed)); ++ _exit(0); ++ } ++ ++ if (setsid() == -1) { ++ perror("fuse_daemonize: setsid"); ++ return -1; ++ } ++ ++ (void) chdir("/"); ++ ++ nullfd = open("/dev/null", O_RDWR, 0); ++ if (nullfd != -1) { ++ (void) dup2(nullfd, 0); ++ (void) dup2(nullfd, 1); ++ (void) dup2(nullfd, 2); ++ if (nullfd > 2) ++ close(nullfd); ++ } ++ ++ /* Propagate completion of daemon initialization */ ++ completed = 1; ++ (void) write(waiter[1], &completed, sizeof(completed)); ++ close(waiter[0]); ++ close(waiter[1]); ++ } else { ++ (void) chdir("/"); ++ } ++ return 0; ++} ++ ++int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, ++ size_t op_size, void *user_data) ++{ ++ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); ++ struct fuse *fuse; ++ struct fuse_cmdline_opts opts; ++ int res; ++ ++ if (fuse_parse_cmdline(&args, &opts) != 0) ++ return 1; ++ ++ if (opts.show_version) { ++ printf("FUSE library version %s\n", PACKAGE_VERSION); ++ fuse_lowlevel_version(); ++ res = 0; ++ goto out1; ++ } ++ ++ if (opts.show_help) { ++ if(args.argv[0][0] != '\0') ++ printf("usage: %s [options] \n\n", ++ args.argv[0]); ++ printf("FUSE options:\n"); ++ fuse_cmdline_help(); ++ fuse_lib_help(&args); ++ res = 0; ++ goto out1; ++ } ++ ++ if (!opts.show_help && ++ !opts.mountpoint) { ++ fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); ++ res = 2; ++ goto out1; ++ } ++ ++ ++ fuse = fuse_new_31(&args, op, op_size, user_data); ++ if (fuse == NULL) { ++ res = 3; ++ goto out1; ++ } ++ ++ if (fuse_mount(fuse,opts.mountpoint) != 0) { ++ res = 4; ++ goto out2; ++ } ++ ++ if (fuse_daemonize(opts.foreground) != 0) { ++ res = 5; ++ goto out3; ++ } ++ ++ struct fuse_session *se = fuse_get_session(fuse); ++ if (fuse_set_signal_handlers(se) != 0) { ++ res = 6; ++ goto out3; ++ } ++ ++ if (opts.singlethread) ++ res = fuse_loop(fuse); ++ else { ++ struct fuse_loop_config loop_config; ++ loop_config.clone_fd = opts.clone_fd; ++ loop_config.max_idle_threads = opts.max_idle_threads; ++ res = fuse_loop_mt_32(fuse, &loop_config); ++ } ++ if (res) ++ res = 7; ++ ++ fuse_remove_signal_handlers(se); ++out3: ++ fuse_unmount(fuse); ++out2: ++ fuse_destroy(fuse); ++out1: ++ free(opts.mountpoint); ++ fuse_opt_free_args(&args); ++ return res; ++} ++ ++ ++void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, ++ struct fuse_conn_info *conn) ++{ ++ if(opts->set_max_write) ++ conn->max_write = opts->max_write; ++ if(opts->set_max_background) ++ conn->max_background = opts->max_background; ++ if(opts->set_congestion_threshold) ++ conn->congestion_threshold = opts->congestion_threshold; ++ if(opts->set_time_gran) ++ conn->time_gran = opts->time_gran; ++ if(opts->set_max_readahead) ++ conn->max_readahead = opts->max_readahead; ++ ++#define LL_ENABLE(cond,cap) \ ++ if (cond) conn->want |= (cap) ++#define LL_DISABLE(cond,cap) \ ++ if (cond) conn->want &= ~(cap) ++ ++ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); ++ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); ++ ++ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); ++ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); ++ ++ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); ++ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); ++ ++ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ ++ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); ++ ++ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); ++ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); ++ ++ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ ++ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); ++ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); ++ ++ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); ++ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); ++} ++ ++struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) ++{ ++ struct fuse_conn_info_opts *opts; ++ ++ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); ++ if(opts == NULL) { ++ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); ++ return NULL; ++ } ++ if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { ++ free(opts); ++ return NULL; ++ } ++ return opts; ++} ++ ++int fuse_open_channel(const char *mountpoint, const char* options) ++{ ++ struct mount_opts *opts = NULL; ++ int fd = -1; ++ const char *argv[] = { "", "-o", options }; ++ int argc = sizeof(argv) / sizeof(argv[0]); ++ struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); ++ ++ opts = parse_mount_opts(&args); ++ if (opts == NULL) ++ return -1; ++ ++ fd = fuse_kern_mount(mountpoint, opts); ++ destroy_mount_opts(opts); ++ ++ return fd; ++} +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Add-fuse_lowlevel.c.patch b/kvm-virtiofsd-Add-fuse_lowlevel.c.patch new file mode 100644 index 0000000..1318fef --- /dev/null +++ b/kvm-virtiofsd-Add-fuse_lowlevel.c.patch @@ -0,0 +1,3172 @@ +From f6c6830f772e8060255323d2a458cd0e774d9654 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:42 +0100 +Subject: [PATCH 011/116] virtiofsd: Add fuse_lowlevel.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-8-dgilbert@redhat.com> +Patchwork-id: 93456 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 007/112] virtiofsd: Add fuse_lowlevel.c +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +fuse_lowlevel is one of the largest files from the library +and does most of the work. Add it separately to keep the diff +sizes small. +Again this is from upstream fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2de121f01e37e2fe98a4362f4abf7c0848697f76) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 3129 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 3129 insertions(+) + create mode 100644 tools/virtiofsd/fuse_lowlevel.c + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +new file mode 100644 +index 0000000..f2d7038 +--- /dev/null ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -0,0 +1,3129 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ Implementation of (most of) the low-level FUSE API. The session loop ++ functions are implemented in separate files. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#define _GNU_SOURCE ++ ++#include "config.h" ++#include "fuse_i.h" ++#include "fuse_kernel.h" ++#include "fuse_opt.h" ++#include "fuse_misc.h" ++#include "mount_util.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef F_LINUX_SPECIFIC_BASE ++#define F_LINUX_SPECIFIC_BASE 1024 ++#endif ++#ifndef F_SETPIPE_SZ ++#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) ++#endif ++ ++ ++#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) ++#define OFFSET_MAX 0x7fffffffffffffffLL ++ ++#define container_of(ptr, type, member) ({ \ ++ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ++ (type *)( (char *)__mptr - offsetof(type,member) );}) ++ ++struct fuse_pollhandle { ++ uint64_t kh; ++ struct fuse_session *se; ++}; ++ ++static size_t pagesize; ++ ++static __attribute__((constructor)) void fuse_ll_init_pagesize(void) ++{ ++ pagesize = getpagesize(); ++} ++ ++static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) ++{ ++ attr->ino = stbuf->st_ino; ++ attr->mode = stbuf->st_mode; ++ attr->nlink = stbuf->st_nlink; ++ attr->uid = stbuf->st_uid; ++ attr->gid = stbuf->st_gid; ++ attr->rdev = stbuf->st_rdev; ++ attr->size = stbuf->st_size; ++ attr->blksize = stbuf->st_blksize; ++ attr->blocks = stbuf->st_blocks; ++ attr->atime = stbuf->st_atime; ++ attr->mtime = stbuf->st_mtime; ++ attr->ctime = stbuf->st_ctime; ++ attr->atimensec = ST_ATIM_NSEC(stbuf); ++ attr->mtimensec = ST_MTIM_NSEC(stbuf); ++ attr->ctimensec = ST_CTIM_NSEC(stbuf); ++} ++ ++static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) ++{ ++ stbuf->st_mode = attr->mode; ++ stbuf->st_uid = attr->uid; ++ stbuf->st_gid = attr->gid; ++ stbuf->st_size = attr->size; ++ stbuf->st_atime = attr->atime; ++ stbuf->st_mtime = attr->mtime; ++ stbuf->st_ctime = attr->ctime; ++ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); ++ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); ++ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); ++} ++ ++static size_t iov_length(const struct iovec *iov, size_t count) ++{ ++ size_t seg; ++ size_t ret = 0; ++ ++ for (seg = 0; seg < count; seg++) ++ ret += iov[seg].iov_len; ++ return ret; ++} ++ ++static void list_init_req(struct fuse_req *req) ++{ ++ req->next = req; ++ req->prev = req; ++} ++ ++static void list_del_req(struct fuse_req *req) ++{ ++ struct fuse_req *prev = req->prev; ++ struct fuse_req *next = req->next; ++ prev->next = next; ++ next->prev = prev; ++} ++ ++static void list_add_req(struct fuse_req *req, struct fuse_req *next) ++{ ++ struct fuse_req *prev = next->prev; ++ req->next = next; ++ req->prev = prev; ++ prev->next = req; ++ next->prev = req; ++} ++ ++static void destroy_req(fuse_req_t req) ++{ ++ pthread_mutex_destroy(&req->lock); ++ free(req); ++} ++ ++void fuse_free_req(fuse_req_t req) ++{ ++ int ctr; ++ struct fuse_session *se = req->se; ++ ++ pthread_mutex_lock(&se->lock); ++ req->u.ni.func = NULL; ++ req->u.ni.data = NULL; ++ list_del_req(req); ++ ctr = --req->ctr; ++ fuse_chan_put(req->ch); ++ req->ch = NULL; ++ pthread_mutex_unlock(&se->lock); ++ if (!ctr) ++ destroy_req(req); ++} ++ ++static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) ++{ ++ struct fuse_req *req; ++ ++ req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); ++ if (req == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); ++ } else { ++ req->se = se; ++ req->ctr = 1; ++ list_init_req(req); ++ fuse_mutex_init(&req->lock); ++ } ++ ++ return req; ++} ++ ++/* Send data. If *ch* is NULL, send via session master fd */ ++static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count) ++{ ++ struct fuse_out_header *out = iov[0].iov_base; ++ ++ out->len = iov_length(iov, count); ++ if (se->debug) { ++ if (out->unique == 0) { ++ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", ++ out->error, out->len); ++ } else if (out->error) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, error: %i (%s), outsize: %i\n", ++ (unsigned long long) out->unique, out->error, ++ strerror(-out->error), out->len); ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, success, outsize: %i\n", ++ (unsigned long long) out->unique, out->len); ++ } ++ } ++ ++ ssize_t res = writev(ch ? ch->fd : se->fd, ++ iov, count); ++ int err = errno; ++ ++ if (res == -1) { ++ assert(se != NULL); ++ ++ /* ENOENT means the operation was interrupted */ ++ if (!fuse_session_exited(se) && err != ENOENT) ++ perror("fuse: writing device"); ++ return -err; ++ } ++ ++ return 0; ++} ++ ++ ++int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, ++ int count) ++{ ++ struct fuse_out_header out; ++ ++ if (error <= -1000 || error > 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); ++ error = -ERANGE; ++ } ++ ++ out.unique = req->unique; ++ out.error = error; ++ ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); ++ ++ return fuse_send_msg(req->se, req->ch, iov, count); ++} ++ ++static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, ++ int count) ++{ ++ int res; ++ ++ res = fuse_send_reply_iov_nofree(req, error, iov, count); ++ fuse_free_req(req); ++ return res; ++} ++ ++static int send_reply(fuse_req_t req, int error, const void *arg, ++ size_t argsize) ++{ ++ struct iovec iov[2]; ++ int count = 1; ++ if (argsize) { ++ iov[1].iov_base = (void *) arg; ++ iov[1].iov_len = argsize; ++ count++; ++ } ++ return send_reply_iov(req, error, iov, count); ++} ++ ++int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) ++{ ++ int res; ++ struct iovec *padded_iov; ++ ++ padded_iov = malloc((count + 1) * sizeof(struct iovec)); ++ if (padded_iov == NULL) ++ return fuse_reply_err(req, ENOMEM); ++ ++ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); ++ count++; ++ ++ res = send_reply_iov(req, 0, padded_iov, count); ++ free(padded_iov); ++ ++ return res; ++} ++ ++ ++/* `buf` is allowed to be empty so that the proper size may be ++ allocated by the caller */ ++size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, const struct stat *stbuf, off_t off) ++{ ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ struct fuse_dirent *dirent; ++ ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ ++ if ((buf == NULL) || (entlen_padded > bufsize)) ++ return entlen_padded; ++ ++ dirent = (struct fuse_dirent*) buf; ++ dirent->ino = stbuf->st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ ++ return entlen_padded; ++} ++ ++static void convert_statfs(const struct statvfs *stbuf, ++ struct fuse_kstatfs *kstatfs) ++{ ++ kstatfs->bsize = stbuf->f_bsize; ++ kstatfs->frsize = stbuf->f_frsize; ++ kstatfs->blocks = stbuf->f_blocks; ++ kstatfs->bfree = stbuf->f_bfree; ++ kstatfs->bavail = stbuf->f_bavail; ++ kstatfs->files = stbuf->f_files; ++ kstatfs->ffree = stbuf->f_ffree; ++ kstatfs->namelen = stbuf->f_namemax; ++} ++ ++static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) ++{ ++ return send_reply(req, 0, arg, argsize); ++} ++ ++int fuse_reply_err(fuse_req_t req, int err) ++{ ++ return send_reply(req, -err, NULL, 0); ++} ++ ++void fuse_reply_none(fuse_req_t req) ++{ ++ fuse_free_req(req); ++} ++ ++static unsigned long calc_timeout_sec(double t) ++{ ++ if (t > (double) ULONG_MAX) ++ return ULONG_MAX; ++ else if (t < 0.0) ++ return 0; ++ else ++ return (unsigned long) t; ++} ++ ++static unsigned int calc_timeout_nsec(double t) ++{ ++ double f = t - (double) calc_timeout_sec(t); ++ if (f < 0.0) ++ return 0; ++ else if (f >= 0.999999999) ++ return 999999999; ++ else ++ return (unsigned int) (f * 1.0e9); ++} ++ ++static void fill_entry(struct fuse_entry_out *arg, ++ const struct fuse_entry_param *e) ++{ ++ arg->nodeid = e->ino; ++ arg->generation = e->generation; ++ arg->entry_valid = calc_timeout_sec(e->entry_timeout); ++ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); ++ arg->attr_valid = calc_timeout_sec(e->attr_timeout); ++ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); ++ convert_stat(&e->attr, &arg->attr); ++} ++ ++/* `buf` is allowed to be empty so that the proper size may be ++ allocated by the caller */ ++size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, ++ const struct fuse_entry_param *e, off_t off) ++{ ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ if ((buf == NULL) || (entlen_padded > bufsize)) ++ return entlen_padded; ++ ++ struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; ++ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); ++ fill_entry(&dp->entry_out, e); ++ ++ struct fuse_dirent *dirent = &dp->dirent; ++ dirent->ino = e->attr.st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ ++ return entlen_padded; ++} ++ ++static void fill_open(struct fuse_open_out *arg, ++ const struct fuse_file_info *f) ++{ ++ arg->fh = f->fh; ++ if (f->direct_io) ++ arg->open_flags |= FOPEN_DIRECT_IO; ++ if (f->keep_cache) ++ arg->open_flags |= FOPEN_KEEP_CACHE; ++ if (f->cache_readdir) ++ arg->open_flags |= FOPEN_CACHE_DIR; ++ if (f->nonseekable) ++ arg->open_flags |= FOPEN_NONSEEKABLE; ++} ++ ++int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) ++{ ++ struct fuse_entry_out arg; ++ size_t size = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); ++ ++ /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant ++ negative entry */ ++ if (!e->ino && req->se->conn.proto_minor < 4) ++ return fuse_reply_err(req, ENOENT); ++ ++ memset(&arg, 0, sizeof(arg)); ++ fill_entry(&arg, e); ++ return send_reply_ok(req, &arg, size); ++} ++ ++int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, ++ const struct fuse_file_info *f) ++{ ++ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; ++ size_t entrysize = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); ++ struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; ++ struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); ++ ++ memset(buf, 0, sizeof(buf)); ++ fill_entry(earg, e); ++ fill_open(oarg, f); ++ return send_reply_ok(req, buf, ++ entrysize + sizeof(struct fuse_open_out)); ++} ++ ++int fuse_reply_attr(fuse_req_t req, const struct stat *attr, ++ double attr_timeout) ++{ ++ struct fuse_attr_out arg; ++ size_t size = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.attr_valid = calc_timeout_sec(attr_timeout); ++ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); ++ convert_stat(attr, &arg.attr); ++ ++ return send_reply_ok(req, &arg, size); ++} ++ ++int fuse_reply_readlink(fuse_req_t req, const char *linkname) ++{ ++ return send_reply_ok(req, linkname, strlen(linkname)); ++} ++ ++int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) ++{ ++ struct fuse_open_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ fill_open(&arg, f); ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_write(fuse_req_t req, size_t count) ++{ ++ struct fuse_write_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) ++{ ++ return send_reply_ok(req, buf, size); ++} ++ ++static int fuse_send_data_iov_fallback(struct fuse_session *se, ++ struct fuse_chan *ch, ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, ++ size_t len) ++{ ++ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); ++ void *mbuf; ++ int res; ++ ++ /* Optimize common case */ ++ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && ++ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { ++ /* FIXME: also avoid memory copy if there are multiple buffers ++ but none of them contain an fd */ ++ ++ iov[iov_count].iov_base = buf->buf[0].mem; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ return fuse_send_msg(se, ch, iov, iov_count); ++ } ++ ++ res = posix_memalign(&mbuf, pagesize, len); ++ if (res != 0) ++ return res; ++ ++ mem_buf.buf[0].mem = mbuf; ++ res = fuse_buf_copy(&mem_buf, buf, 0); ++ if (res < 0) { ++ free(mbuf); ++ return -res; ++ } ++ len = res; ++ ++ iov[iov_count].iov_base = mbuf; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ res = fuse_send_msg(se, ch, iov, iov_count); ++ free(mbuf); ++ ++ return res; ++} ++ ++struct fuse_ll_pipe { ++ size_t size; ++ int can_grow; ++ int pipe[2]; ++}; ++ ++static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) ++{ ++ close(llp->pipe[0]); ++ close(llp->pipe[1]); ++ free(llp); ++} ++ ++#ifdef HAVE_SPLICE ++#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) ++static int fuse_pipe(int fds[2]) ++{ ++ int rv = pipe(fds); ++ ++ if (rv == -1) ++ return rv; ++ ++ if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || ++ fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || ++ fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || ++ fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { ++ close(fds[0]); ++ close(fds[1]); ++ rv = -1; ++ } ++ return rv; ++} ++#else ++static int fuse_pipe(int fds[2]) ++{ ++ return pipe2(fds, O_CLOEXEC | O_NONBLOCK); ++} ++#endif ++ ++static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) ++{ ++ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); ++ if (llp == NULL) { ++ int res; ++ ++ llp = malloc(sizeof(struct fuse_ll_pipe)); ++ if (llp == NULL) ++ return NULL; ++ ++ res = fuse_pipe(llp->pipe); ++ if (res == -1) { ++ free(llp); ++ return NULL; ++ } ++ ++ /* ++ *the default size is 16 pages on linux ++ */ ++ llp->size = pagesize * 16; ++ llp->can_grow = 1; ++ ++ pthread_setspecific(se->pipe_key, llp); ++ } ++ ++ return llp; ++} ++#endif ++ ++static void fuse_ll_clear_pipe(struct fuse_session *se) ++{ ++ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); ++ if (llp) { ++ pthread_setspecific(se->pipe_key, NULL); ++ fuse_ll_pipe_free(llp); ++ } ++} ++ ++#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) ++static int read_back(int fd, char *buf, size_t len) ++{ ++ int res; ++ ++ res = read(fd, buf, len); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); ++ return -EIO; ++ } ++ if (res != len) { ++ fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); ++ return -EIO; ++ } ++ return 0; ++} ++ ++static int grow_pipe_to_max(int pipefd) ++{ ++ int max; ++ int res; ++ int maxfd; ++ char buf[32]; ++ ++ maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); ++ if (maxfd < 0) ++ return -errno; ++ ++ res = read(maxfd, buf, sizeof(buf) - 1); ++ if (res < 0) { ++ int saved_errno; ++ ++ saved_errno = errno; ++ close(maxfd); ++ return -saved_errno; ++ } ++ close(maxfd); ++ buf[res] = '\0'; ++ ++ max = atoi(buf); ++ res = fcntl(pipefd, F_SETPIPE_SZ, max); ++ if (res < 0) ++ return -errno; ++ return max; ++} ++ ++static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, unsigned int flags) ++{ ++ int res; ++ size_t len = fuse_buf_size(buf); ++ struct fuse_out_header *out = iov[0].iov_base; ++ struct fuse_ll_pipe *llp; ++ int splice_flags; ++ size_t pipesize; ++ size_t total_fd_size; ++ size_t idx; ++ size_t headerlen; ++ struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); ++ ++ if (se->broken_splice_nonblock) ++ goto fallback; ++ ++ if (flags & FUSE_BUF_NO_SPLICE) ++ goto fallback; ++ ++ total_fd_size = 0; ++ for (idx = buf->idx; idx < buf->count; idx++) { ++ if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { ++ total_fd_size = buf->buf[idx].size; ++ if (idx == buf->idx) ++ total_fd_size -= buf->off; ++ } ++ } ++ if (total_fd_size < 2 * pagesize) ++ goto fallback; ++ ++ if (se->conn.proto_minor < 14 || ++ !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) ++ goto fallback; ++ ++ llp = fuse_ll_get_pipe(se); ++ if (llp == NULL) ++ goto fallback; ++ ++ ++ headerlen = iov_length(iov, iov_count); ++ ++ out->len = headerlen + len; ++ ++ /* ++ * Heuristic for the required pipe size, does not work if the ++ * source contains less than page size fragments ++ */ ++ pipesize = pagesize * (iov_count + buf->count + 1) + out->len; ++ ++ if (llp->size < pipesize) { ++ if (llp->can_grow) { ++ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); ++ if (res == -1) { ++ res = grow_pipe_to_max(llp->pipe[0]); ++ if (res > 0) ++ llp->size = res; ++ llp->can_grow = 0; ++ goto fallback; ++ } ++ llp->size = res; ++ } ++ if (llp->size < pipesize) ++ goto fallback; ++ } ++ ++ ++ res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); ++ if (res == -1) ++ goto fallback; ++ ++ if (res != headerlen) { ++ res = -EIO; ++ fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, ++ headerlen); ++ goto clear_pipe; ++ } ++ ++ pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; ++ pipe_buf.buf[0].fd = llp->pipe[1]; ++ ++ res = fuse_buf_copy(&pipe_buf, buf, ++ FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); ++ if (res < 0) { ++ if (res == -EAGAIN || res == -EINVAL) { ++ /* ++ * Should only get EAGAIN on kernels with ++ * broken SPLICE_F_NONBLOCK support (<= ++ * 2.6.35) where this error or a short read is ++ * returned even if the pipe itself is not ++ * full ++ * ++ * EINVAL might mean that splice can't handle ++ * this combination of input and output. ++ */ ++ if (res == -EAGAIN) ++ se->broken_splice_nonblock = 1; ++ ++ pthread_setspecific(se->pipe_key, NULL); ++ fuse_ll_pipe_free(llp); ++ goto fallback; ++ } ++ res = -res; ++ goto clear_pipe; ++ } ++ ++ if (res != 0 && res < len) { ++ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); ++ void *mbuf; ++ size_t now_len = res; ++ /* ++ * For regular files a short count is either ++ * 1) due to EOF, or ++ * 2) because of broken SPLICE_F_NONBLOCK (see above) ++ * ++ * For other inputs it's possible that we overflowed ++ * the pipe because of small buffer fragments. ++ */ ++ ++ res = posix_memalign(&mbuf, pagesize, len); ++ if (res != 0) ++ goto clear_pipe; ++ ++ mem_buf.buf[0].mem = mbuf; ++ mem_buf.off = now_len; ++ res = fuse_buf_copy(&mem_buf, buf, 0); ++ if (res > 0) { ++ char *tmpbuf; ++ size_t extra_len = res; ++ /* ++ * Trickiest case: got more data. Need to get ++ * back the data from the pipe and then fall ++ * back to regular write. ++ */ ++ tmpbuf = malloc(headerlen); ++ if (tmpbuf == NULL) { ++ free(mbuf); ++ res = ENOMEM; ++ goto clear_pipe; ++ } ++ res = read_back(llp->pipe[0], tmpbuf, headerlen); ++ free(tmpbuf); ++ if (res != 0) { ++ free(mbuf); ++ goto clear_pipe; ++ } ++ res = read_back(llp->pipe[0], mbuf, now_len); ++ if (res != 0) { ++ free(mbuf); ++ goto clear_pipe; ++ } ++ len = now_len + extra_len; ++ iov[iov_count].iov_base = mbuf; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ res = fuse_send_msg(se, ch, iov, iov_count); ++ free(mbuf); ++ return res; ++ } ++ free(mbuf); ++ res = now_len; ++ } ++ len = res; ++ out->len = headerlen + len; ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, success, outsize: %i (splice)\n", ++ (unsigned long long) out->unique, out->len); ++ } ++ ++ splice_flags = 0; ++ if ((flags & FUSE_BUF_SPLICE_MOVE) && ++ (se->conn.want & FUSE_CAP_SPLICE_MOVE)) ++ splice_flags |= SPLICE_F_MOVE; ++ ++ res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, ++ NULL, out->len, splice_flags); ++ if (res == -1) { ++ res = -errno; ++ perror("fuse: splice from pipe"); ++ goto clear_pipe; ++ } ++ if (res != out->len) { ++ res = -EIO; ++ fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", ++ res, out->len); ++ goto clear_pipe; ++ } ++ return 0; ++ ++clear_pipe: ++ fuse_ll_clear_pipe(se); ++ return res; ++ ++fallback: ++ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); ++} ++#else ++static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, unsigned int flags) ++{ ++ size_t len = fuse_buf_size(buf); ++ (void) flags; ++ ++ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); ++} ++#endif ++ ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags) ++{ ++ struct iovec iov[2]; ++ struct fuse_out_header out; ++ int res; ++ ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); ++ ++ out.unique = req->unique; ++ out.error = 0; ++ ++ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); ++ if (res <= 0) { ++ fuse_free_req(req); ++ return res; ++ } else { ++ return fuse_reply_err(req, res); ++ } ++} ++ ++int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) ++{ ++ struct fuse_statfs_out arg; ++ size_t size = req->se->conn.proto_minor < 4 ? ++ FUSE_COMPAT_STATFS_SIZE : sizeof(arg); ++ ++ memset(&arg, 0, sizeof(arg)); ++ convert_statfs(stbuf, &arg.st); ++ ++ return send_reply_ok(req, &arg, size); ++} ++ ++int fuse_reply_xattr(fuse_req_t req, size_t count) ++{ ++ struct fuse_getxattr_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_lock(fuse_req_t req, const struct flock *lock) ++{ ++ struct fuse_lk_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.lk.type = lock->l_type; ++ if (lock->l_type != F_UNLCK) { ++ arg.lk.start = lock->l_start; ++ if (lock->l_len == 0) ++ arg.lk.end = OFFSET_MAX; ++ else ++ arg.lk.end = lock->l_start + lock->l_len - 1; ++ } ++ arg.lk.pid = lock->l_pid; ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_bmap(fuse_req_t req, uint64_t idx) ++{ ++ struct fuse_bmap_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.block = idx; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, ++ size_t count) ++{ ++ struct fuse_ioctl_iovec *fiov; ++ size_t i; ++ ++ fiov = malloc(sizeof(fiov[0]) * count); ++ if (!fiov) ++ return NULL; ++ ++ for (i = 0; i < count; i++) { ++ fiov[i].base = (uintptr_t) iov[i].iov_base; ++ fiov[i].len = iov[i].iov_len; ++ } ++ ++ return fiov; ++} ++ ++int fuse_reply_ioctl_retry(fuse_req_t req, ++ const struct iovec *in_iov, size_t in_count, ++ const struct iovec *out_iov, size_t out_count) ++{ ++ struct fuse_ioctl_out arg; ++ struct fuse_ioctl_iovec *in_fiov = NULL; ++ struct fuse_ioctl_iovec *out_fiov = NULL; ++ struct iovec iov[4]; ++ size_t count = 1; ++ int res; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.flags |= FUSE_IOCTL_RETRY; ++ arg.in_iovs = in_count; ++ arg.out_iovs = out_count; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; ++ ++ if (req->se->conn.proto_minor < 16) { ++ if (in_count) { ++ iov[count].iov_base = (void *)in_iov; ++ iov[count].iov_len = sizeof(in_iov[0]) * in_count; ++ count++; ++ } ++ ++ if (out_count) { ++ iov[count].iov_base = (void *)out_iov; ++ iov[count].iov_len = sizeof(out_iov[0]) * out_count; ++ count++; ++ } ++ } else { ++ /* Can't handle non-compat 64bit ioctls on 32bit */ ++ if (sizeof(void *) == 4 && req->ioctl_64bit) { ++ res = fuse_reply_err(req, EINVAL); ++ goto out; ++ } ++ ++ if (in_count) { ++ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); ++ if (!in_fiov) ++ goto enomem; ++ ++ iov[count].iov_base = (void *)in_fiov; ++ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; ++ count++; ++ } ++ if (out_count) { ++ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); ++ if (!out_fiov) ++ goto enomem; ++ ++ iov[count].iov_base = (void *)out_fiov; ++ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; ++ count++; ++ } ++ } ++ ++ res = send_reply_iov(req, 0, iov, count); ++out: ++ free(in_fiov); ++ free(out_fiov); ++ ++ return res; ++ ++enomem: ++ res = fuse_reply_err(req, ENOMEM); ++ goto out; ++} ++ ++int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) ++{ ++ struct fuse_ioctl_out arg; ++ struct iovec iov[3]; ++ size_t count = 1; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; ++ ++ if (size) { ++ iov[count].iov_base = (char *) buf; ++ iov[count].iov_len = size; ++ count++; ++ } ++ ++ return send_reply_iov(req, 0, iov, count); ++} ++ ++int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, ++ int count) ++{ ++ struct iovec *padded_iov; ++ struct fuse_ioctl_out arg; ++ int res; ++ ++ padded_iov = malloc((count + 2) * sizeof(struct iovec)); ++ if (padded_iov == NULL) ++ return fuse_reply_err(req, ENOMEM); ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ padded_iov[1].iov_base = &arg; ++ padded_iov[1].iov_len = sizeof(arg); ++ ++ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); ++ ++ res = send_reply_iov(req, 0, padded_iov, count + 2); ++ free(padded_iov); ++ ++ return res; ++} ++ ++int fuse_reply_poll(fuse_req_t req, unsigned revents) ++{ ++ struct fuse_poll_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.revents = revents; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++int fuse_reply_lseek(fuse_req_t req, off_t off) ++{ ++ struct fuse_lseek_out arg; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.offset = off; ++ ++ return send_reply_ok(req, &arg, sizeof(arg)); ++} ++ ++static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.lookup) ++ req->se->op.lookup(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; ++ ++ if (req->se->op.forget) ++ req->se->op.forget(req, nodeid, arg->nlookup); ++ else ++ fuse_reply_none(req); ++} ++ ++static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, ++ const void *inarg) ++{ ++ struct fuse_batch_forget_in *arg = (void *) inarg; ++ struct fuse_forget_one *param = (void *) PARAM(arg); ++ unsigned int i; ++ ++ (void) nodeid; ++ ++ if (req->se->op.forget_multi) { ++ req->se->op.forget_multi(req, arg->count, ++ (struct fuse_forget_data *) param); ++ } else if (req->se->op.forget) { ++ for (i = 0; i < arg->count; i++) { ++ struct fuse_forget_one *forget = ¶m[i]; ++ struct fuse_req *dummy_req; ++ ++ dummy_req = fuse_ll_alloc_req(req->se); ++ if (dummy_req == NULL) ++ break; ++ ++ dummy_req->unique = req->unique; ++ dummy_req->ctx = req->ctx; ++ dummy_req->ch = NULL; ++ ++ req->se->op.forget(dummy_req, forget->nodeid, ++ forget->nlookup); ++ } ++ fuse_reply_none(req); ++ } else { ++ fuse_reply_none(req); ++ } ++} ++ ++static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_file_info *fip = NULL; ++ struct fuse_file_info fi; ++ ++ if (req->se->conn.proto_minor >= 9) { ++ struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; ++ ++ if (arg->getattr_flags & FUSE_GETATTR_FH) { ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fip = &fi; ++ } ++ } ++ ++ if (req->se->op.getattr) ++ req->se->op.getattr(req, nodeid, fip); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; ++ ++ if (req->se->op.setattr) { ++ struct fuse_file_info *fi = NULL; ++ struct fuse_file_info fi_store; ++ struct stat stbuf; ++ memset(&stbuf, 0, sizeof(stbuf)); ++ convert_attr(arg, &stbuf); ++ if (arg->valid & FATTR_FH) { ++ arg->valid &= ~FATTR_FH; ++ memset(&fi_store, 0, sizeof(fi_store)); ++ fi = &fi_store; ++ fi->fh = arg->fh; ++ } ++ arg->valid &= ++ FUSE_SET_ATTR_MODE | ++ FUSE_SET_ATTR_UID | ++ FUSE_SET_ATTR_GID | ++ FUSE_SET_ATTR_SIZE | ++ FUSE_SET_ATTR_ATIME | ++ FUSE_SET_ATTR_MTIME | ++ FUSE_SET_ATTR_ATIME_NOW | ++ FUSE_SET_ATTR_MTIME_NOW | ++ FUSE_SET_ATTR_CTIME; ++ ++ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); ++ } else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_access_in *arg = (struct fuse_access_in *) inarg; ++ ++ if (req->se->op.access) ++ req->se->op.access(req, nodeid, arg->mask); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ (void) inarg; ++ ++ if (req->se->op.readlink) ++ req->se->op.readlink(req, nodeid); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; ++ char *name = PARAM(arg); ++ ++ if (req->se->conn.proto_minor >= 12) ++ req->ctx.umask = arg->umask; ++ else ++ name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; ++ ++ if (req->se->op.mknod) ++ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; ++ ++ if (req->se->conn.proto_minor >= 12) ++ req->ctx.umask = arg->umask; ++ ++ if (req->se->op.mkdir) ++ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.unlink) ++ req->se->op.unlink(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.rmdir) ++ req->se->op.rmdir(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; ++ ++ if (req->se->op.symlink) ++ req->se->op.symlink(req, linkname, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; ++ ++ if (req->se->op.rename) ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, ++ 0); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; ++ ++ if (req->se->op.rename) ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, ++ arg->flags); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_link_in *arg = (struct fuse_link_in *) inarg; ++ ++ if (req->se->op.link) ++ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_create_in *arg = (struct fuse_create_in *) inarg; ++ ++ if (req->se->op.create) { ++ struct fuse_file_info fi; ++ char *name = PARAM(arg); ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ ++ if (req->se->conn.proto_minor >= 12) ++ req->ctx.umask = arg->umask; ++ else ++ name = (char *) inarg + sizeof(struct fuse_open_in); ++ ++ req->se->op.create(req, nodeid, name, arg->mode, &fi); ++ } else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ ++ if (req->se->op.open) ++ req->se->op.open(req, nodeid, &fi); ++ else ++ fuse_reply_open(req, &fi); ++} ++ ++static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ ++ if (req->se->op.read) { ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 9) { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ } ++ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); ++ } else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; ++ struct fuse_file_info fi; ++ char *param; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; ++ ++ if (req->se->conn.proto_minor < 9) { ++ param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ param = PARAM(arg); ++ } ++ ++ if (req->se->op.write) ++ req->se->op.write(req, nodeid, param, arg->size, ++ arg->offset, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; ++ ++ if (se->conn.proto_minor < 9) { ++ bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ bufv.buf[0].size -= sizeof(struct fuse_in_header) + ++ FUSE_COMPAT_WRITE_IN_SIZE; ++ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) ++ bufv.buf[0].mem = PARAM(arg); ++ ++ bufv.buf[0].size -= sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in); ++ } ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); ++ fuse_reply_err(req, EIO); ++ goto out; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); ++ ++out: ++ /* Need to reset the pipe if ->write_buf() didn't consume all data */ ++ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) ++ fuse_ll_clear_pipe(se); ++} ++ ++static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.flush = 1; ++ if (req->se->conn.proto_minor >= 7) ++ fi.lock_owner = arg->lock_owner; ++ ++ if (req->se->op.flush) ++ req->se->op.flush(req, nodeid, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 8) { ++ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; ++ fi.lock_owner = arg->lock_owner; ++ } ++ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { ++ fi.flock_release = 1; ++ fi.lock_owner = arg->lock_owner; ++ } ++ ++ if (req->se->op.release) ++ req->se->op.release(req, nodeid, &fi); ++ else ++ fuse_reply_err(req, 0); ++} ++ ++static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.fsync) ++ req->se->op.fsync(req, nodeid, datasync, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ ++ if (req->se->op.opendir) ++ req->se->op.opendir(req, nodeid, &fi); ++ else ++ fuse_reply_open(req, &fi); ++} ++ ++static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.readdir) ++ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.readdirplus) ++ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; ++ ++ if (req->se->op.releasedir) ++ req->se->op.releasedir(req, nodeid, &fi); ++ else ++ fuse_reply_err(req, 0); ++} ++ ++static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.fsyncdir) ++ req->se->op.fsyncdir(req, nodeid, datasync, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ (void) nodeid; ++ (void) inarg; ++ ++ if (req->se->op.statfs) ++ req->se->op.statfs(req, nodeid); ++ else { ++ struct statvfs buf = { ++ .f_namemax = 255, ++ .f_bsize = 512, ++ }; ++ fuse_reply_statfs(req, &buf); ++ } ++} ++ ++static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; ++ char *name = PARAM(arg); ++ char *value = name + strlen(name) + 1; ++ ++ if (req->se->op.setxattr) ++ req->se->op.setxattr(req, nodeid, name, value, arg->size, ++ arg->flags); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ ++ if (req->se->op.getxattr) ++ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ ++ if (req->se->op.listxattr) ++ req->se->op.listxattr(req, nodeid, arg->size); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ char *name = (char *) inarg; ++ ++ if (req->se->op.removexattr) ++ req->se->op.removexattr(req, nodeid, name); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void convert_fuse_file_lock(struct fuse_file_lock *fl, ++ struct flock *flock) ++{ ++ memset(flock, 0, sizeof(struct flock)); ++ flock->l_type = fl->type; ++ flock->l_whence = SEEK_SET; ++ flock->l_start = fl->start; ++ if (fl->end == OFFSET_MAX) ++ flock->l_len = 0; ++ else ++ flock->l_len = fl->end - fl->start + 1; ++ flock->l_pid = fl->pid; ++} ++ ++static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; ++ struct fuse_file_info fi; ++ struct flock flock; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; ++ ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.getlk) ++ req->se->op.getlk(req, nodeid, &fi, &flock); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, ++ const void *inarg, int sleep) ++{ ++ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; ++ struct fuse_file_info fi; ++ struct flock flock; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; ++ ++ if (arg->lk_flags & FUSE_LK_FLOCK) { ++ int op = 0; ++ ++ switch (arg->lk.type) { ++ case F_RDLCK: ++ op = LOCK_SH; ++ break; ++ case F_WRLCK: ++ op = LOCK_EX; ++ break; ++ case F_UNLCK: ++ op = LOCK_UN; ++ break; ++ } ++ if (!sleep) ++ op |= LOCK_NB; ++ ++ if (req->se->op.flock) ++ req->se->op.flock(req, nodeid, &fi, op); ++ else ++ fuse_reply_err(req, ENOSYS); ++ } else { ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.setlk) ++ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); ++ else ++ fuse_reply_err(req, ENOSYS); ++ } ++} ++ ++static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ do_setlk_common(req, nodeid, inarg, 0); ++} ++ ++static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ do_setlk_common(req, nodeid, inarg, 1); ++} ++ ++static int find_interrupted(struct fuse_session *se, struct fuse_req *req) ++{ ++ struct fuse_req *curr; ++ ++ for (curr = se->list.next; curr != &se->list; curr = curr->next) { ++ if (curr->unique == req->u.i.unique) { ++ fuse_interrupt_func_t func; ++ void *data; ++ ++ curr->ctr++; ++ pthread_mutex_unlock(&se->lock); ++ ++ /* Ugh, ugly locking */ ++ pthread_mutex_lock(&curr->lock); ++ pthread_mutex_lock(&se->lock); ++ curr->interrupted = 1; ++ func = curr->u.ni.func; ++ data = curr->u.ni.data; ++ pthread_mutex_unlock(&se->lock); ++ if (func) ++ func(curr, data); ++ pthread_mutex_unlock(&curr->lock); ++ ++ pthread_mutex_lock(&se->lock); ++ curr->ctr--; ++ if (!curr->ctr) ++ destroy_req(curr); ++ ++ return 1; ++ } ++ } ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->u.i.unique) ++ return 1; ++ } ++ return 0; ++} ++ ++static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; ++ struct fuse_session *se = req->se; ++ ++ (void) nodeid; ++ if (se->debug) ++ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", ++ (unsigned long long) arg->unique); ++ ++ req->u.i.unique = arg->unique; ++ ++ pthread_mutex_lock(&se->lock); ++ if (find_interrupted(se, req)) ++ destroy_req(req); ++ else ++ list_add_req(req, &se->interrupts); ++ pthread_mutex_unlock(&se->lock); ++} ++ ++static struct fuse_req *check_interrupt(struct fuse_session *se, ++ struct fuse_req *req) ++{ ++ struct fuse_req *curr; ++ ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->unique) { ++ req->interrupted = 1; ++ list_del_req(curr); ++ free(curr); ++ return NULL; ++ } ++ } ++ curr = se->interrupts.next; ++ if (curr != &se->interrupts) { ++ list_del_req(curr); ++ list_init_req(curr); ++ return curr; ++ } else ++ return NULL; ++} ++ ++static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; ++ ++ if (req->se->op.bmap) ++ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; ++ unsigned int flags = arg->flags; ++ void *in_buf = arg->in_size ? PARAM(arg) : NULL; ++ struct fuse_file_info fi; ++ ++ if (flags & FUSE_IOCTL_DIR && ++ !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { ++ fuse_reply_err(req, ENOTTY); ++ return; ++ } ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && ++ !(flags & FUSE_IOCTL_32BIT)) { ++ req->ioctl_64bit = 1; ++ } ++ ++ if (req->se->op.ioctl) ++ req->se->op.ioctl(req, nodeid, arg->cmd, ++ (void *)(uintptr_t)arg->arg, &fi, flags, ++ in_buf, arg->in_size, arg->out_size); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) ++{ ++ free(ph); ++} ++ ++static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.poll_events = arg->events; ++ ++ if (req->se->op.poll) { ++ struct fuse_pollhandle *ph = NULL; ++ ++ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { ++ ph = malloc(sizeof(struct fuse_pollhandle)); ++ if (ph == NULL) { ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ph->kh = arg->kh; ++ ph->se = req->se; ++ } ++ ++ req->se->op.poll(req, nodeid, &fi, ph); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++} ++ ++static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.fallocate) ++ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) ++{ ++ struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; ++ struct fuse_file_info fi_in, fi_out; ++ ++ memset(&fi_in, 0, sizeof(fi_in)); ++ fi_in.fh = arg->fh_in; ++ ++ memset(&fi_out, 0, sizeof(fi_out)); ++ fi_out.fh = arg->fh_out; ++ ++ ++ if (req->se->op.copy_file_range) ++ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, ++ &fi_in, arg->nodeid_out, ++ arg->off_out, &fi_out, arg->len, ++ arg->flags); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ ++ if (req->se->op.lseek) ++ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); ++ else ++ fuse_reply_err(req, ENOSYS); ++} ++ ++static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_init_in *arg = (struct fuse_init_in *) inarg; ++ struct fuse_init_out outarg; ++ struct fuse_session *se = req->se; ++ size_t bufsize = se->bufsize; ++ size_t outargsize = sizeof(outarg); ++ ++ (void) nodeid; ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); ++ if (arg->major == 7 && arg->minor >= 6) { ++ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); ++ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", ++ arg->max_readahead); ++ } ++ } ++ se->conn.proto_major = arg->major; ++ se->conn.proto_minor = arg->minor; ++ se->conn.capable = 0; ++ se->conn.want = 0; ++ ++ memset(&outarg, 0, sizeof(outarg)); ++ outarg.major = FUSE_KERNEL_VERSION; ++ outarg.minor = FUSE_KERNEL_MINOR_VERSION; ++ ++ if (arg->major < 7) { ++ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", ++ arg->major, arg->minor); ++ fuse_reply_err(req, EPROTO); ++ return; ++ } ++ ++ if (arg->major > 7) { ++ /* Wait for a second INIT request with a 7.X version */ ++ send_reply_ok(req, &outarg, sizeof(outarg)); ++ return; ++ } ++ ++ if (arg->minor >= 6) { ++ if (arg->max_readahead < se->conn.max_readahead) ++ se->conn.max_readahead = arg->max_readahead; ++ if (arg->flags & FUSE_ASYNC_READ) ++ se->conn.capable |= FUSE_CAP_ASYNC_READ; ++ if (arg->flags & FUSE_POSIX_LOCKS) ++ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; ++ if (arg->flags & FUSE_ATOMIC_O_TRUNC) ++ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; ++ if (arg->flags & FUSE_EXPORT_SUPPORT) ++ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; ++ if (arg->flags & FUSE_DONT_MASK) ++ se->conn.capable |= FUSE_CAP_DONT_MASK; ++ if (arg->flags & FUSE_FLOCK_LOCKS) ++ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; ++ if (arg->flags & FUSE_AUTO_INVAL_DATA) ++ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; ++ if (arg->flags & FUSE_DO_READDIRPLUS) ++ se->conn.capable |= FUSE_CAP_READDIRPLUS; ++ if (arg->flags & FUSE_READDIRPLUS_AUTO) ++ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; ++ if (arg->flags & FUSE_ASYNC_DIO) ++ se->conn.capable |= FUSE_CAP_ASYNC_DIO; ++ if (arg->flags & FUSE_WRITEBACK_CACHE) ++ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; ++ if (arg->flags & FUSE_NO_OPEN_SUPPORT) ++ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; ++ if (arg->flags & FUSE_PARALLEL_DIROPS) ++ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; ++ if (arg->flags & FUSE_POSIX_ACL) ++ se->conn.capable |= FUSE_CAP_POSIX_ACL; ++ if (arg->flags & FUSE_HANDLE_KILLPRIV) ++ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; ++ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) ++ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; ++ if (!(arg->flags & FUSE_MAX_PAGES)) { ++ size_t max_bufsize = ++ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() ++ + FUSE_BUFFER_HEADER_SIZE; ++ if (bufsize > max_bufsize) { ++ bufsize = max_bufsize; ++ } ++ } ++ } else { ++ se->conn.max_readahead = 0; ++ } ++ ++ if (se->conn.proto_minor >= 14) { ++#ifdef HAVE_SPLICE ++#ifdef HAVE_VMSPLICE ++ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; ++#endif ++ se->conn.capable |= FUSE_CAP_SPLICE_READ; ++#endif ++ } ++ if (se->conn.proto_minor >= 18) ++ se->conn.capable |= FUSE_CAP_IOCTL_DIR; ++ ++ /* Default settings for modern filesystems. ++ * ++ * Most of these capabilities were disabled by default in ++ * libfuse2 for backwards compatibility reasons. In libfuse3, ++ * we can finally enable them by default (as long as they're ++ * supported by the kernel). ++ */ ++#define LL_SET_DEFAULT(cond, cap) \ ++ if ((cond) && (se->conn.capable & (cap))) \ ++ se->conn.want |= (cap) ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); ++ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); ++ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); ++ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); ++ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); ++ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); ++ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, ++ FUSE_CAP_POSIX_LOCKS); ++ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); ++ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, ++ FUSE_CAP_READDIRPLUS_AUTO); ++ se->conn.time_gran = 1; ++ ++ if (bufsize < FUSE_MIN_READ_BUFFER) { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", ++ bufsize); ++ bufsize = FUSE_MIN_READ_BUFFER; ++ } ++ se->bufsize = bufsize; ++ ++ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) ++ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; ++ ++ se->got_init = 1; ++ if (se->op.init) ++ se->op.init(se->userdata, &se->conn); ++ ++ if (se->conn.want & (~se->conn.capable)) { ++ fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " ++ "0x%x that are not supported by kernel, aborting.\n", ++ se->conn.want & (~se->conn.capable)); ++ fuse_reply_err(req, EPROTO); ++ se->error = -EPROTO; ++ fuse_session_exit(se); ++ return; ++ } ++ ++ unsigned max_read_mo = get_max_read(se->mo); ++ if (se->conn.max_read != max_read_mo) { ++ fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " ++ "requested different maximum read size (%u vs %u)\n", ++ se->conn.max_read, max_read_mo); ++ fuse_reply_err(req, EPROTO); ++ se->error = -EPROTO; ++ fuse_session_exit(se); ++ return; ++ } ++ ++ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { ++ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; ++ } ++ if (arg->flags & FUSE_MAX_PAGES) { ++ outarg.flags |= FUSE_MAX_PAGES; ++ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; ++ } ++ ++ /* Always enable big writes, this is superseded ++ by the max_write option */ ++ outarg.flags |= FUSE_BIG_WRITES; ++ ++ if (se->conn.want & FUSE_CAP_ASYNC_READ) ++ outarg.flags |= FUSE_ASYNC_READ; ++ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) ++ outarg.flags |= FUSE_POSIX_LOCKS; ++ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) ++ outarg.flags |= FUSE_ATOMIC_O_TRUNC; ++ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) ++ outarg.flags |= FUSE_EXPORT_SUPPORT; ++ if (se->conn.want & FUSE_CAP_DONT_MASK) ++ outarg.flags |= FUSE_DONT_MASK; ++ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) ++ outarg.flags |= FUSE_FLOCK_LOCKS; ++ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) ++ outarg.flags |= FUSE_AUTO_INVAL_DATA; ++ if (se->conn.want & FUSE_CAP_READDIRPLUS) ++ outarg.flags |= FUSE_DO_READDIRPLUS; ++ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) ++ outarg.flags |= FUSE_READDIRPLUS_AUTO; ++ if (se->conn.want & FUSE_CAP_ASYNC_DIO) ++ outarg.flags |= FUSE_ASYNC_DIO; ++ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) ++ outarg.flags |= FUSE_WRITEBACK_CACHE; ++ if (se->conn.want & FUSE_CAP_POSIX_ACL) ++ outarg.flags |= FUSE_POSIX_ACL; ++ outarg.max_readahead = se->conn.max_readahead; ++ outarg.max_write = se->conn.max_write; ++ if (se->conn.proto_minor >= 13) { ++ if (se->conn.max_background >= (1 << 16)) ++ se->conn.max_background = (1 << 16) - 1; ++ if (se->conn.congestion_threshold > se->conn.max_background) ++ se->conn.congestion_threshold = se->conn.max_background; ++ if (!se->conn.congestion_threshold) { ++ se->conn.congestion_threshold = ++ se->conn.max_background * 3 / 4; ++ } ++ ++ outarg.max_background = se->conn.max_background; ++ outarg.congestion_threshold = se->conn.congestion_threshold; ++ } ++ if (se->conn.proto_minor >= 23) ++ outarg.time_gran = se->conn.time_gran; ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); ++ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); ++ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", ++ outarg.max_readahead); ++ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); ++ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", ++ outarg.max_background); ++ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", ++ outarg.congestion_threshold); ++ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", ++ outarg.time_gran); ++ } ++ if (arg->minor < 5) ++ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; ++ else if (arg->minor < 23) ++ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; ++ ++ send_reply_ok(req, &outarg, outargsize); ++} ++ ++static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++{ ++ struct fuse_session *se = req->se; ++ ++ (void) nodeid; ++ (void) inarg; ++ ++ se->got_destroy = 1; ++ if (se->op.destroy) ++ se->op.destroy(se->userdata); ++ ++ send_reply_ok(req, NULL, 0); ++} ++ ++static void list_del_nreq(struct fuse_notify_req *nreq) ++{ ++ struct fuse_notify_req *prev = nreq->prev; ++ struct fuse_notify_req *next = nreq->next; ++ prev->next = next; ++ next->prev = prev; ++} ++ ++static void list_add_nreq(struct fuse_notify_req *nreq, ++ struct fuse_notify_req *next) ++{ ++ struct fuse_notify_req *prev = next->prev; ++ nreq->next = next; ++ nreq->prev = prev; ++ prev->next = nreq; ++ next->prev = nreq; ++} ++ ++static void list_init_nreq(struct fuse_notify_req *nreq) ++{ ++ nreq->next = nreq; ++ nreq->prev = nreq; ++} ++ ++static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, ++ const void *inarg, const struct fuse_buf *buf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_notify_req *nreq; ++ struct fuse_notify_req *head; ++ ++ pthread_mutex_lock(&se->lock); ++ head = &se->notify_list; ++ for (nreq = head->next; nreq != head; nreq = nreq->next) { ++ if (nreq->unique == req->unique) { ++ list_del_nreq(nreq); ++ break; ++ } ++ } ++ pthread_mutex_unlock(&se->lock); ++ ++ if (nreq != head) ++ nreq->reply(nreq, req, nodeid, inarg, buf); ++} ++ ++static int send_notify_iov(struct fuse_session *se, int notify_code, ++ struct iovec *iov, int count) ++{ ++ struct fuse_out_header out; ++ ++ if (!se->got_init) ++ return -ENOTCONN; ++ ++ out.unique = 0; ++ out.error = notify_code; ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); ++ ++ return fuse_send_msg(se, NULL, iov, count); ++} ++ ++int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) ++{ ++ if (ph != NULL) { ++ struct fuse_notify_poll_wakeup_out outarg; ++ struct iovec iov[2]; ++ ++ outarg.kh = ph->kh; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); ++ } else { ++ return 0; ++ } ++} ++ ++int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, ++ off_t off, off_t len) ++{ ++ struct fuse_notify_inval_inode_out outarg; ++ struct iovec iov[2]; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) ++ return -ENOSYS; ++ ++ outarg.ino = ino; ++ outarg.off = off; ++ outarg.len = len; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); ++} ++ ++int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, ++ const char *name, size_t namelen) ++{ ++ struct fuse_notify_inval_entry_out outarg; ++ struct iovec iov[3]; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) ++ return -ENOSYS; ++ ++ outarg.parent = parent; ++ outarg.namelen = namelen; ++ outarg.padding = 0; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; ++ ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); ++} ++ ++int fuse_lowlevel_notify_delete(struct fuse_session *se, ++ fuse_ino_t parent, fuse_ino_t child, ++ const char *name, size_t namelen) ++{ ++ struct fuse_notify_delete_out outarg; ++ struct iovec iov[3]; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) ++ return -ENOSYS; ++ ++ outarg.parent = parent; ++ outarg.child = child; ++ outarg.namelen = namelen; ++ outarg.padding = 0; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; ++ ++ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); ++} ++ ++int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags) ++{ ++ struct fuse_out_header out; ++ struct fuse_notify_store_out outarg; ++ struct iovec iov[3]; ++ size_t size = fuse_buf_size(bufv); ++ int res; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) ++ return -ENOSYS; ++ ++ out.unique = 0; ++ out.error = FUSE_NOTIFY_STORE; ++ ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; ++ ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(out); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); ++ if (res > 0) ++ res = -res; ++ ++ return res; ++} ++ ++struct fuse_retrieve_req { ++ struct fuse_notify_req nreq; ++ void *cookie; ++}; ++ ++static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, ++ fuse_req_t req, fuse_ino_t ino, ++ const void *inarg, ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_retrieve_req *rreq = ++ container_of(nreq, struct fuse_retrieve_req, nreq); ++ const struct fuse_notify_retrieve_in *arg = inarg; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) ++ bufv.buf[0].mem = PARAM(arg); ++ ++ bufv.buf[0].size -= sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_notify_retrieve_in); ++ ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); ++ fuse_reply_none(req); ++ goto out; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ if (se->op.retrieve_reply) { ++ se->op.retrieve_reply(req, rreq->cookie, ino, ++ arg->offset, &bufv); ++ } else { ++ fuse_reply_none(req); ++ } ++out: ++ free(rreq); ++ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) ++ fuse_ll_clear_pipe(se); ++} ++ ++int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, ++ size_t size, off_t offset, void *cookie) ++{ ++ struct fuse_notify_retrieve_out outarg; ++ struct iovec iov[2]; ++ struct fuse_retrieve_req *rreq; ++ int err; ++ ++ if (!se) ++ return -EINVAL; ++ ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) ++ return -ENOSYS; ++ ++ rreq = malloc(sizeof(*rreq)); ++ if (rreq == NULL) ++ return -ENOMEM; ++ ++ pthread_mutex_lock(&se->lock); ++ rreq->cookie = cookie; ++ rreq->nreq.unique = se->notify_ctr++; ++ rreq->nreq.reply = fuse_ll_retrieve_reply; ++ list_add_nreq(&rreq->nreq, &se->notify_list); ++ pthread_mutex_unlock(&se->lock); ++ ++ outarg.notify_unique = rreq->nreq.unique; ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; ++ ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ ++ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); ++ if (err) { ++ pthread_mutex_lock(&se->lock); ++ list_del_nreq(&rreq->nreq); ++ pthread_mutex_unlock(&se->lock); ++ free(rreq); ++ } ++ ++ return err; ++} ++ ++void *fuse_req_userdata(fuse_req_t req) ++{ ++ return req->se->userdata; ++} ++ ++const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) ++{ ++ return &req->ctx; ++} ++ ++void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, ++ void *data) ++{ ++ pthread_mutex_lock(&req->lock); ++ pthread_mutex_lock(&req->se->lock); ++ req->u.ni.func = func; ++ req->u.ni.data = data; ++ pthread_mutex_unlock(&req->se->lock); ++ if (req->interrupted && func) ++ func(req, data); ++ pthread_mutex_unlock(&req->lock); ++} ++ ++int fuse_req_interrupted(fuse_req_t req) ++{ ++ int interrupted; ++ ++ pthread_mutex_lock(&req->se->lock); ++ interrupted = req->interrupted; ++ pthread_mutex_unlock(&req->se->lock); ++ ++ return interrupted; ++} ++ ++static struct { ++ void (*func)(fuse_req_t, fuse_ino_t, const void *); ++ const char *name; ++} fuse_ll_ops[] = { ++ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, ++ [FUSE_FORGET] = { do_forget, "FORGET" }, ++ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, ++ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, ++ [FUSE_READLINK] = { do_readlink, "READLINK" }, ++ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, ++ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, ++ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, ++ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, ++ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, ++ [FUSE_RENAME] = { do_rename, "RENAME" }, ++ [FUSE_LINK] = { do_link, "LINK" }, ++ [FUSE_OPEN] = { do_open, "OPEN" }, ++ [FUSE_READ] = { do_read, "READ" }, ++ [FUSE_WRITE] = { do_write, "WRITE" }, ++ [FUSE_STATFS] = { do_statfs, "STATFS" }, ++ [FUSE_RELEASE] = { do_release, "RELEASE" }, ++ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, ++ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, ++ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, ++ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, ++ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, ++ [FUSE_FLUSH] = { do_flush, "FLUSH" }, ++ [FUSE_INIT] = { do_init, "INIT" }, ++ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, ++ [FUSE_READDIR] = { do_readdir, "READDIR" }, ++ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, ++ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, ++ [FUSE_GETLK] = { do_getlk, "GETLK" }, ++ [FUSE_SETLK] = { do_setlk, "SETLK" }, ++ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, ++ [FUSE_ACCESS] = { do_access, "ACCESS" }, ++ [FUSE_CREATE] = { do_create, "CREATE" }, ++ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, ++ [FUSE_BMAP] = { do_bmap, "BMAP" }, ++ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, ++ [FUSE_POLL] = { do_poll, "POLL" }, ++ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, ++ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, ++ [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, ++ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, ++ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, ++ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, ++ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, ++ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, ++ [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, ++}; ++ ++#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) ++ ++static const char *opname(enum fuse_opcode opcode) ++{ ++ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) ++ return "???"; ++ else ++ return fuse_ll_ops[opcode].name; ++} ++ ++static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, ++ struct fuse_bufvec *src) ++{ ++ ssize_t res = fuse_buf_copy(dst, src, 0); ++ if (res < 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); ++ return res; ++ } ++ if ((size_t)res < fuse_buf_size(dst)) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); ++ return -1; ++ } ++ return 0; ++} ++ ++void fuse_session_process_buf(struct fuse_session *se, ++ const struct fuse_buf *buf) ++{ ++ fuse_session_process_buf_int(se, buf, NULL); ++} ++ ++void fuse_session_process_buf_int(struct fuse_session *se, ++ const struct fuse_buf *buf, struct fuse_chan *ch) ++{ ++ const size_t write_header_size = sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in); ++ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; ++ struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); ++ struct fuse_in_header *in; ++ const void *inarg; ++ struct fuse_req *req; ++ void *mbuf = NULL; ++ int err; ++ int res; ++ ++ if (buf->flags & FUSE_BUF_IS_FD) { ++ if (buf->size < tmpbuf.buf[0].size) ++ tmpbuf.buf[0].size = buf->size; ++ ++ mbuf = malloc(tmpbuf.buf[0].size); ++ if (mbuf == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); ++ goto clear_pipe; ++ } ++ tmpbuf.buf[0].mem = mbuf; ++ ++ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); ++ if (res < 0) ++ goto clear_pipe; ++ ++ in = mbuf; ++ } else { ++ in = buf->mem; ++ } ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", ++ (unsigned long long) in->unique, ++ opname((enum fuse_opcode) in->opcode), in->opcode, ++ (unsigned long long) in->nodeid, buf->size, in->pid); ++ } ++ ++ req = fuse_ll_alloc_req(se); ++ if (req == NULL) { ++ struct fuse_out_header out = { ++ .unique = in->unique, ++ .error = -ENOMEM, ++ }; ++ struct iovec iov = { ++ .iov_base = &out, ++ .iov_len = sizeof(struct fuse_out_header), ++ }; ++ ++ fuse_send_msg(se, ch, &iov, 1); ++ goto clear_pipe; ++ } ++ ++ req->unique = in->unique; ++ req->ctx.uid = in->uid; ++ req->ctx.gid = in->gid; ++ req->ctx.pid = in->pid; ++ req->ch = ch ? fuse_chan_get(ch) : NULL; ++ ++ err = EIO; ++ if (!se->got_init) { ++ enum fuse_opcode expected; ++ ++ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; ++ if (in->opcode != expected) ++ goto reply_err; ++ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) ++ goto reply_err; ++ ++ err = EACCES; ++ /* Implement -o allow_root */ ++ if (se->deny_others && in->uid != se->owner && in->uid != 0 && ++ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && ++ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && ++ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && ++ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && ++ in->opcode != FUSE_NOTIFY_REPLY && ++ in->opcode != FUSE_READDIRPLUS) ++ goto reply_err; ++ ++ err = ENOSYS; ++ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) ++ goto reply_err; ++ if (in->opcode != FUSE_INTERRUPT) { ++ struct fuse_req *intr; ++ pthread_mutex_lock(&se->lock); ++ intr = check_interrupt(se, req); ++ list_add_req(req, &se->list); ++ pthread_mutex_unlock(&se->lock); ++ if (intr) ++ fuse_reply_err(intr, EAGAIN); ++ } ++ ++ if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && ++ (in->opcode != FUSE_WRITE || !se->op.write_buf) && ++ in->opcode != FUSE_NOTIFY_REPLY) { ++ void *newmbuf; ++ ++ err = ENOMEM; ++ newmbuf = realloc(mbuf, buf->size); ++ if (newmbuf == NULL) ++ goto reply_err; ++ mbuf = newmbuf; ++ ++ tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); ++ tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; ++ ++ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); ++ err = -res; ++ if (res < 0) ++ goto reply_err; ++ ++ in = mbuf; ++ } ++ ++ inarg = (void *) &in[1]; ++ if (in->opcode == FUSE_WRITE && se->op.write_buf) ++ do_write_buf(req, in->nodeid, inarg, buf); ++ else if (in->opcode == FUSE_NOTIFY_REPLY) ++ do_notify_reply(req, in->nodeid, inarg, buf); ++ else ++ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); ++ ++out_free: ++ free(mbuf); ++ return; ++ ++reply_err: ++ fuse_reply_err(req, err); ++clear_pipe: ++ if (buf->flags & FUSE_BUF_IS_FD) ++ fuse_ll_clear_pipe(se); ++ goto out_free; ++} ++ ++#define LL_OPTION(n,o,v) \ ++ { n, offsetof(struct fuse_session, o), v } ++ ++static const struct fuse_opt fuse_ll_opts[] = { ++ LL_OPTION("debug", debug, 1), ++ LL_OPTION("-d", debug, 1), ++ LL_OPTION("--debug", debug, 1), ++ LL_OPTION("allow_root", deny_others, 1), ++ FUSE_OPT_END ++}; ++ ++void fuse_lowlevel_version(void) ++{ ++ printf("using FUSE kernel interface version %i.%i\n", ++ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); ++ fuse_mount_version(); ++} ++ ++void fuse_lowlevel_help(void) ++{ ++ /* These are not all options, but the ones that are ++ potentially of interest to an end-user */ ++ printf( ++" -o allow_other allow access by all users\n" ++" -o allow_root allow access by root\n" ++" -o auto_unmount auto unmount on process termination\n"); ++} ++ ++void fuse_session_destroy(struct fuse_session *se) ++{ ++ struct fuse_ll_pipe *llp; ++ ++ if (se->got_init && !se->got_destroy) { ++ if (se->op.destroy) ++ se->op.destroy(se->userdata); ++ } ++ llp = pthread_getspecific(se->pipe_key); ++ if (llp != NULL) ++ fuse_ll_pipe_free(llp); ++ pthread_key_delete(se->pipe_key); ++ pthread_mutex_destroy(&se->lock); ++ free(se->cuse_data); ++ if (se->fd != -1) ++ close(se->fd); ++ destroy_mount_opts(se->mo); ++ free(se); ++} ++ ++ ++static void fuse_ll_pipe_destructor(void *data) ++{ ++ struct fuse_ll_pipe *llp = data; ++ fuse_ll_pipe_free(llp); ++} ++ ++int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) ++{ ++ return fuse_session_receive_buf_int(se, buf, NULL); ++} ++ ++int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, ++ struct fuse_chan *ch) ++{ ++ int err; ++ ssize_t res; ++#ifdef HAVE_SPLICE ++ size_t bufsize = se->bufsize; ++ struct fuse_ll_pipe *llp; ++ struct fuse_buf tmpbuf; ++ ++ if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) ++ goto fallback; ++ ++ llp = fuse_ll_get_pipe(se); ++ if (llp == NULL) ++ goto fallback; ++ ++ if (llp->size < bufsize) { ++ if (llp->can_grow) { ++ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); ++ if (res == -1) { ++ llp->can_grow = 0; ++ res = grow_pipe_to_max(llp->pipe[0]); ++ if (res > 0) ++ llp->size = res; ++ goto fallback; ++ } ++ llp->size = res; ++ } ++ if (llp->size < bufsize) ++ goto fallback; ++ } ++ ++ res = splice(ch ? ch->fd : se->fd, ++ NULL, llp->pipe[1], NULL, bufsize, 0); ++ err = errno; ++ ++ if (fuse_session_exited(se)) ++ return 0; ++ ++ if (res == -1) { ++ if (err == ENODEV) { ++ /* Filesystem was unmounted, or connection was aborted ++ via /sys/fs/fuse/connections */ ++ fuse_session_exit(se); ++ return 0; ++ } ++ if (err != EINTR && err != EAGAIN) ++ perror("fuse: splice from device"); ++ return -err; ++ } ++ ++ if (res < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); ++ return -EIO; ++ } ++ ++ tmpbuf = (struct fuse_buf) { ++ .size = res, ++ .flags = FUSE_BUF_IS_FD, ++ .fd = llp->pipe[0], ++ }; ++ ++ /* ++ * Don't bother with zero copy for small requests. ++ * fuse_loop_mt() needs to check for FORGET so this more than ++ * just an optimization. ++ */ ++ if (res < sizeof(struct fuse_in_header) + ++ sizeof(struct fuse_write_in) + pagesize) { ++ struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; ++ struct fuse_bufvec dst = { .count = 1 }; ++ ++ if (!buf->mem) { ++ buf->mem = malloc(se->bufsize); ++ if (!buf->mem) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: failed to allocate read buffer\n"); ++ return -ENOMEM; ++ } ++ } ++ buf->size = se->bufsize; ++ buf->flags = 0; ++ dst.buf[0] = *buf; ++ ++ res = fuse_buf_copy(&dst, &src, 0); ++ if (res < 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", ++ strerror(-res)); ++ fuse_ll_clear_pipe(se); ++ return res; ++ } ++ if (res < tmpbuf.size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); ++ fuse_ll_clear_pipe(se); ++ return -EIO; ++ } ++ assert(res == tmpbuf.size); ++ ++ } else { ++ /* Don't overwrite buf->mem, as that would cause a leak */ ++ buf->fd = tmpbuf.fd; ++ buf->flags = tmpbuf.flags; ++ } ++ buf->size = tmpbuf.size; ++ ++ return res; ++ ++fallback: ++#endif ++ if (!buf->mem) { ++ buf->mem = malloc(se->bufsize); ++ if (!buf->mem) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: failed to allocate read buffer\n"); ++ return -ENOMEM; ++ } ++ } ++ ++restart: ++ res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); ++ err = errno; ++ ++ if (fuse_session_exited(se)) ++ return 0; ++ if (res == -1) { ++ /* ENOENT means the operation was interrupted, it's safe ++ to restart */ ++ if (err == ENOENT) ++ goto restart; ++ ++ if (err == ENODEV) { ++ /* Filesystem was unmounted, or connection was aborted ++ via /sys/fs/fuse/connections */ ++ fuse_session_exit(se); ++ return 0; ++ } ++ /* Errors occurring during normal operation: EINTR (read ++ interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem ++ umounted) */ ++ if (err != EINTR && err != EAGAIN) ++ perror("fuse: reading device"); ++ return -err; ++ } ++ if ((size_t) res < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); ++ return -EIO; ++ } ++ ++ buf->size = res; ++ ++ return res; ++} ++ ++struct fuse_session *fuse_session_new(struct fuse_args *args, ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata) ++{ ++ int err; ++ struct fuse_session *se; ++ struct mount_opts *mo; ++ ++ if (sizeof(struct fuse_lowlevel_ops) < op_size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); ++ op_size = sizeof(struct fuse_lowlevel_ops); ++ } ++ ++ if (args->argc == 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); ++ return NULL; ++ } ++ ++ se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); ++ if (se == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); ++ goto out1; ++ } ++ se->fd = -1; ++ se->conn.max_write = UINT_MAX; ++ se->conn.max_readahead = UINT_MAX; ++ ++ /* Parse options */ ++ if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) ++ goto out2; ++ if(se->deny_others) { ++ /* Allowing access only by root is done by instructing ++ * kernel to allow access by everyone, and then restricting ++ * access to root and mountpoint owner in libfuse. ++ */ ++ // We may be adding the option a second time, but ++ // that doesn't hurt. ++ if(fuse_opt_add_arg(args, "-oallow_other") == -1) ++ goto out2; ++ } ++ mo = parse_mount_opts(args); ++ if (mo == NULL) ++ goto out3; ++ ++ if(args->argc == 1 && ++ args->argv[0][0] == '-') { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " ++ "will be ignored\n"); ++ } else if (args->argc != 1) { ++ int i; ++ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); ++ for(i = 1; i < args->argc-1; i++) ++ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); ++ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); ++ goto out4; ++ } ++ ++ if (se->debug) ++ fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); ++ ++ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + ++ FUSE_BUFFER_HEADER_SIZE; ++ ++ list_init_req(&se->list); ++ list_init_req(&se->interrupts); ++ list_init_nreq(&se->notify_list); ++ se->notify_ctr = 1; ++ fuse_mutex_init(&se->lock); ++ ++ err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); ++ if (err) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", ++ strerror(err)); ++ goto out5; ++ } ++ ++ memcpy(&se->op, op, op_size); ++ se->owner = getuid(); ++ se->userdata = userdata; ++ ++ se->mo = mo; ++ return se; ++ ++out5: ++ pthread_mutex_destroy(&se->lock); ++out4: ++ fuse_opt_free_args(args); ++out3: ++ free(mo); ++out2: ++ free(se); ++out1: ++ return NULL; ++} ++ ++int fuse_session_mount(struct fuse_session *se, const char *mountpoint) ++{ ++ int fd; ++ ++ /* ++ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos ++ * would ensue. ++ */ ++ do { ++ fd = open("/dev/null", O_RDWR); ++ if (fd > 2) ++ close(fd); ++ } while (fd >= 0 && fd <= 2); ++ ++ /* ++ * To allow FUSE daemons to run without privileges, the caller may open ++ * /dev/fuse before launching the file system and pass on the file ++ * descriptor by specifying /dev/fd/N as the mount point. Note that the ++ * parent process takes care of performing the mount in this case. ++ */ ++ fd = fuse_mnt_parse_fuse_fd(mountpoint); ++ if (fd != -1) { ++ if (fcntl(fd, F_GETFD) == -1) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: Invalid file descriptor /dev/fd/%u\n", ++ fd); ++ return -1; ++ } ++ se->fd = fd; ++ return 0; ++ } ++ ++ /* Open channel */ ++ fd = fuse_kern_mount(mountpoint, se->mo); ++ if (fd == -1) ++ return -1; ++ se->fd = fd; ++ ++ /* Save mountpoint */ ++ se->mountpoint = strdup(mountpoint); ++ if (se->mountpoint == NULL) ++ goto error_out; ++ ++ return 0; ++ ++error_out: ++ fuse_kern_unmount(mountpoint, fd); ++ return -1; ++} ++ ++int fuse_session_fd(struct fuse_session *se) ++{ ++ return se->fd; ++} ++ ++void fuse_session_unmount(struct fuse_session *se) ++{ ++ if (se->mountpoint != NULL) { ++ fuse_kern_unmount(se->mountpoint, se->fd); ++ free(se->mountpoint); ++ se->mountpoint = NULL; ++ } ++} ++ ++#ifdef linux ++int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) ++{ ++ char *buf; ++ size_t bufsize = 1024; ++ char path[128]; ++ int ret; ++ int fd; ++ unsigned long pid = req->ctx.pid; ++ char *s; ++ ++ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); ++ ++retry: ++ buf = malloc(bufsize); ++ if (buf == NULL) ++ return -ENOMEM; ++ ++ ret = -EIO; ++ fd = open(path, O_RDONLY); ++ if (fd == -1) ++ goto out_free; ++ ++ ret = read(fd, buf, bufsize); ++ close(fd); ++ if (ret < 0) { ++ ret = -EIO; ++ goto out_free; ++ } ++ ++ if ((size_t)ret == bufsize) { ++ free(buf); ++ bufsize *= 4; ++ goto retry; ++ } ++ ++ ret = -EIO; ++ s = strstr(buf, "\nGroups:"); ++ if (s == NULL) ++ goto out_free; ++ ++ s += 8; ++ ret = 0; ++ while (1) { ++ char *end; ++ unsigned long val = strtoul(s, &end, 0); ++ if (end == s) ++ break; ++ ++ s = end; ++ if (ret < size) ++ list[ret] = val; ++ ret++; ++ } ++ ++out_free: ++ free(buf); ++ return ret; ++} ++#else /* linux */ ++/* ++ * This is currently not implemented on other than Linux... ++ */ ++int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) ++{ ++ (void) req; (void) size; (void) list; ++ return -ENOSYS; ++} ++#endif ++ ++void fuse_session_exit(struct fuse_session *se) ++{ ++ se->exited = 1; ++} ++ ++void fuse_session_reset(struct fuse_session *se) ++{ ++ se->exited = 0; ++ se->error = 0; ++} ++ ++int fuse_session_exited(struct fuse_session *se) ++{ ++ return se->exited; ++} +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Add-main-virtio-loop.patch b/kvm-virtiofsd-Add-main-virtio-loop.patch new file mode 100644 index 0000000..c0ba96a --- /dev/null +++ b/kvm-virtiofsd-Add-main-virtio-loop.patch @@ -0,0 +1,105 @@ +From 6f413d8b76ff38e5bc01f36515ca71d7fd6e6144 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:58 +0100 +Subject: [PATCH 027/116] virtiofsd: Add main virtio loop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-24-dgilbert@redhat.com> +Patchwork-id: 93475 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 023/112] virtiofsd: Add main virtio loop +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Processes incoming requests on the vhost-user fd. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 204d8ae57b3c57098642c79b3c03d42495149c09) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 42 +++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 39 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 2ae3c76..1928a20 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -11,12 +11,14 @@ + * See the file COPYING.LIB + */ + ++#include "fuse_virtio.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" +-#include "fuse_virtio.h" + ++#include ++#include + #include + #include + #include +@@ -80,15 +82,49 @@ static const VuDevIface fv_iface = { + .queue_is_processed_in_order = fv_queue_order, + }; + ++/* ++ * Main loop; this mostly deals with events on the vhost-user ++ * socket itself, and not actual fuse data. ++ */ + int virtio_loop(struct fuse_session *se) + { + fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); + +- while (1) { +- /* TODO: Add stuffing */ ++ while (!fuse_session_exited(se)) { ++ struct pollfd pf[1]; ++ pf[0].fd = se->vu_socketfd; ++ pf[0].events = POLLIN; ++ pf[0].revents = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__); ++ int poll_res = ppoll(pf, 1, NULL, NULL); ++ ++ if (poll_res == -1) { ++ if (errno == EINTR) { ++ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", ++ __func__); ++ continue; ++ } ++ fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n"); ++ break; ++ } ++ assert(poll_res == 1); ++ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { ++ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__, ++ pf[0].revents); ++ break; ++ } ++ assert(pf[0].revents & POLLIN); ++ fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); ++ if (!vu_dispatch(&se->virtio_dev->dev)) { ++ fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); ++ break; ++ } + } + + fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); ++ ++ return 0; + } + + int virtio_session_mount(struct fuse_session *se) +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Add-options-for-virtio.patch b/kvm-virtiofsd-Add-options-for-virtio.patch new file mode 100644 index 0000000..8ac7fa7 --- /dev/null +++ b/kvm-virtiofsd-Add-options-for-virtio.patch @@ -0,0 +1,103 @@ +From 9c1bbe327cf8f88ffc78eed0fce8cdd6f3f006ef Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:54 +0100 +Subject: [PATCH 023/116] virtiofsd: Add options for virtio +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-20-dgilbert@redhat.com> +Patchwork-id: 93473 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 019/112] virtiofsd: Add options for virtio +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Add options to specify parameters for virtio-fs paths, i.e. + + ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 205de006aab8dcbe546a7e3a51d295c2d05e654b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++--- + tools/virtiofsd/helper.c | 14 +++++++------- + 3 files changed, 16 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index bae0699..26b1a7d 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -63,6 +63,7 @@ struct fuse_session { + struct fuse_notify_req notify_list; + size_t bufsize; + int error; ++ char *vu_socket_path; + }; + + struct fuse_chan { +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 8552cfb..17e8718 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2115,8 +2115,11 @@ reply_err: + } + + static const struct fuse_opt fuse_ll_opts[] = { +- LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), +- LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), ++ LL_OPTION("debug", debug, 1), ++ LL_OPTION("-d", debug, 1), ++ LL_OPTION("--debug", debug, 1), ++ LL_OPTION("allow_root", deny_others, 1), ++ LL_OPTION("--socket-path=%s", vu_socket_path, 0), + FUSE_OPT_END + }; + +@@ -2132,7 +2135,9 @@ void fuse_lowlevel_help(void) + * These are not all options, but the ones that are + * potentially of interest to an end-user + */ +- printf(" -o allow_root allow access by root\n"); ++ printf( ++ " -o allow_root allow access by root\n" ++ " --socket-path=PATH path for the vhost-user socket\n"); + } + + void fuse_session_destroy(struct fuse_session *se) +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 9333691..676032e 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -127,13 +127,13 @@ static const struct fuse_opt conn_info_opt_spec[] = { + + void fuse_cmdline_help(void) + { +- printf( +- " -h --help print help\n" +- " -V --version print version\n" +- " -d -o debug enable debug output (implies -f)\n" +- " -f foreground operation\n" +- " -o max_idle_threads the maximum number of idle worker threads\n" +- " allowed (default: 10)\n"); ++ printf(" -h --help print help\n" ++ " -V --version print version\n" ++ " -d -o debug enable debug output (implies -f)\n" ++ " -f foreground operation\n" ++ " -o max_idle_threads the maximum number of idle worker " ++ "threads\n" ++ " allowed (default: 10)\n"); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Add-passthrough_ll.patch b/kvm-virtiofsd-Add-passthrough_ll.patch new file mode 100644 index 0000000..2510551 --- /dev/null +++ b/kvm-virtiofsd-Add-passthrough_ll.patch @@ -0,0 +1,1387 @@ +From 18ef831cac81a6bd2336c73dda357d9d69f8fd25 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:43 +0100 +Subject: [PATCH 012/116] virtiofsd: Add passthrough_ll +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-9-dgilbert@redhat.com> +Patchwork-id: 93462 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 008/112] virtiofsd: Add passthrough_ll +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +passthrough_ll is one of the examples in the upstream fuse project +and is the main part of our daemon here. It passes through requests +from fuse to the underlying filesystem, using syscalls as directly +as possible. + +>From libfuse fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert + Fixed up 'GPL' to 'GPLv2' as per Dan's comments and consistent + with the 'LICENSE' file in libfuse; patch sent to libfuse to fix + it upstream. +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 7c6b66027241f41720240fc6ee1021cdbd975b2e) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 1338 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 1338 insertions(+) + create mode 100644 tools/virtiofsd/passthrough_ll.c + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +new file mode 100644 +index 0000000..e1a6056 +--- /dev/null ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -0,0 +1,1338 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU GPLv2. ++ See the file COPYING. ++*/ ++ ++/** @file ++ * ++ * This file system mirrors the existing file system hierarchy of the ++ * system, starting at the root file system. This is implemented by ++ * just "passing through" all requests to the corresponding user-space ++ * libc functions. In contrast to passthrough.c and passthrough_fh.c, ++ * this implementation uses the low-level API. Its performance should ++ * be the least bad among the three, but many operations are not ++ * implemented. In particular, it is not possible to remove files (or ++ * directories) because the code necessary to defer actual removal ++ * until the file is not opened anymore would make the example much ++ * more complicated. ++ * ++ * When writeback caching is enabled (-o writeback mount option), it ++ * is only possible to write to files for which the mounting user has ++ * read permissions. This is because the writeback cache requires the ++ * kernel to be able to issue read requests for all files (which the ++ * passthrough filesystem cannot satisfy if it can't read the file in ++ * the underlying filesystem). ++ * ++ * Compile with: ++ * ++ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll ++ * ++ * ## Source code ## ++ * \include passthrough_ll.c ++ */ ++ ++#define _GNU_SOURCE ++#define FUSE_USE_VERSION 31 ++ ++#include "config.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "passthrough_helpers.h" ++ ++/* We are re-using pointers to our `struct lo_inode` and `struct ++ lo_dirp` elements as inodes. This means that we must be able to ++ store uintptr_t values in a fuse_ino_t variable. The following ++ incantation checks this condition at compile time. */ ++#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), ++ "fuse_ino_t too small to hold uintptr_t values!"); ++#else ++struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ ++ { unsigned _uintptr_to_must_hold_fuse_ino_t: ++ ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; ++#endif ++ ++struct lo_inode { ++ struct lo_inode *next; /* protected by lo->mutex */ ++ struct lo_inode *prev; /* protected by lo->mutex */ ++ int fd; ++ bool is_symlink; ++ ino_t ino; ++ dev_t dev; ++ uint64_t refcount; /* protected by lo->mutex */ ++}; ++ ++enum { ++ CACHE_NEVER, ++ CACHE_NORMAL, ++ CACHE_ALWAYS, ++}; ++ ++struct lo_data { ++ pthread_mutex_t mutex; ++ int debug; ++ int writeback; ++ int flock; ++ int xattr; ++ const char *source; ++ double timeout; ++ int cache; ++ int timeout_set; ++ struct lo_inode root; /* protected by lo->mutex */ ++}; ++ ++static const struct fuse_opt lo_opts[] = { ++ { "writeback", ++ offsetof(struct lo_data, writeback), 1 }, ++ { "no_writeback", ++ offsetof(struct lo_data, writeback), 0 }, ++ { "source=%s", ++ offsetof(struct lo_data, source), 0 }, ++ { "flock", ++ offsetof(struct lo_data, flock), 1 }, ++ { "no_flock", ++ offsetof(struct lo_data, flock), 0 }, ++ { "xattr", ++ offsetof(struct lo_data, xattr), 1 }, ++ { "no_xattr", ++ offsetof(struct lo_data, xattr), 0 }, ++ { "timeout=%lf", ++ offsetof(struct lo_data, timeout), 0 }, ++ { "timeout=", ++ offsetof(struct lo_data, timeout_set), 1 }, ++ { "cache=never", ++ offsetof(struct lo_data, cache), CACHE_NEVER }, ++ { "cache=auto", ++ offsetof(struct lo_data, cache), CACHE_NORMAL }, ++ { "cache=always", ++ offsetof(struct lo_data, cache), CACHE_ALWAYS }, ++ ++ FUSE_OPT_END ++}; ++ ++static struct lo_data *lo_data(fuse_req_t req) ++{ ++ return (struct lo_data *) fuse_req_userdata(req); ++} ++ ++static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) ++{ ++ if (ino == FUSE_ROOT_ID) ++ return &lo_data(req)->root; ++ else ++ return (struct lo_inode *) (uintptr_t) ino; ++} ++ ++static int lo_fd(fuse_req_t req, fuse_ino_t ino) ++{ ++ return lo_inode(req, ino)->fd; ++} ++ ++static bool lo_debug(fuse_req_t req) ++{ ++ return lo_data(req)->debug != 0; ++} ++ ++static void lo_init(void *userdata, ++ struct fuse_conn_info *conn) ++{ ++ struct lo_data *lo = (struct lo_data*) userdata; ++ ++ if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) ++ conn->want |= FUSE_CAP_EXPORT_SUPPORT; ++ ++ if (lo->writeback && ++ conn->capable & FUSE_CAP_WRITEBACK_CACHE) { ++ if (lo->debug) ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); ++ conn->want |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { ++ if (lo->debug) ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); ++ conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ } ++} ++ ++static void lo_getattr(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) ++{ ++ int res; ++ struct stat buf; ++ struct lo_data *lo = lo_data(req); ++ ++ (void) fi; ++ ++ res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ fuse_reply_attr(req, &buf, lo->timeout); ++} ++ ++static int utimensat_empty_nofollow(struct lo_inode *inode, ++ const struct timespec *tv) ++{ ++ int res; ++ char procname[64]; ++ ++ if (inode->is_symlink) { ++ res = utimensat(inode->fd, "", tv, ++ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1 && errno == EINVAL) { ++ /* Sorry, no race free way to set times on symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ return utimensat(AT_FDCWD, procname, tv, 0); ++} ++ ++static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, ++ int valid, struct fuse_file_info *fi) ++{ ++ int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ int ifd = inode->fd; ++ int res; ++ ++ if (valid & FUSE_SET_ATTR_MODE) { ++ if (fi) { ++ res = fchmod(fi->fh, attr->st_mode); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = chmod(procname, attr->st_mode); ++ } ++ if (res == -1) ++ goto out_err; ++ } ++ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { ++ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? ++ attr->st_uid : (uid_t) -1; ++ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? ++ attr->st_gid : (gid_t) -1; ++ ++ res = fchownat(ifd, "", uid, gid, ++ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ goto out_err; ++ } ++ if (valid & FUSE_SET_ATTR_SIZE) { ++ if (fi) { ++ res = ftruncate(fi->fh, attr->st_size); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = truncate(procname, attr->st_size); ++ } ++ if (res == -1) ++ goto out_err; ++ } ++ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { ++ struct timespec tv[2]; ++ ++ tv[0].tv_sec = 0; ++ tv[1].tv_sec = 0; ++ tv[0].tv_nsec = UTIME_OMIT; ++ tv[1].tv_nsec = UTIME_OMIT; ++ ++ if (valid & FUSE_SET_ATTR_ATIME_NOW) ++ tv[0].tv_nsec = UTIME_NOW; ++ else if (valid & FUSE_SET_ATTR_ATIME) ++ tv[0] = attr->st_atim; ++ ++ if (valid & FUSE_SET_ATTR_MTIME_NOW) ++ tv[1].tv_nsec = UTIME_NOW; ++ else if (valid & FUSE_SET_ATTR_MTIME) ++ tv[1] = attr->st_mtim; ++ ++ if (fi) ++ res = futimens(fi->fh, tv); ++ else ++ res = utimensat_empty_nofollow(inode, tv); ++ if (res == -1) ++ goto out_err; ++ } ++ ++ return lo_getattr(req, ino, fi); ++ ++out_err: ++ saverr = errno; ++ fuse_reply_err(req, saverr); ++} ++ ++static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) ++{ ++ struct lo_inode *p; ++ struct lo_inode *ret = NULL; ++ ++ pthread_mutex_lock(&lo->mutex); ++ for (p = lo->root.next; p != &lo->root; p = p->next) { ++ if (p->ino == st->st_ino && p->dev == st->st_dev) { ++ assert(p->refcount > 0); ++ ret = p; ++ ret->refcount++; ++ break; ++ } ++ } ++ pthread_mutex_unlock(&lo->mutex); ++ return ret; ++} ++ ++static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, ++ struct fuse_entry_param *e) ++{ ++ int newfd; ++ int res; ++ int saverr; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ ++ memset(e, 0, sizeof(*e)); ++ e->attr_timeout = lo->timeout; ++ e->entry_timeout = lo->timeout; ++ ++ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); ++ if (newfd == -1) ++ goto out_err; ++ ++ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ goto out_err; ++ ++ inode = lo_find(lo_data(req), &e->attr); ++ if (inode) { ++ close(newfd); ++ newfd = -1; ++ } else { ++ struct lo_inode *prev, *next; ++ ++ saverr = ENOMEM; ++ inode = calloc(1, sizeof(struct lo_inode)); ++ if (!inode) ++ goto out_err; ++ ++ inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ inode->refcount = 1; ++ inode->fd = newfd; ++ inode->ino = e->attr.st_ino; ++ inode->dev = e->attr.st_dev; ++ ++ pthread_mutex_lock(&lo->mutex); ++ prev = &lo->root; ++ next = prev->next; ++ next->prev = inode; ++ inode->next = next; ++ inode->prev = prev; ++ prev->next = inode; ++ pthread_mutex_unlock(&lo->mutex); ++ } ++ e->ino = (uintptr_t) inode; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long) parent, name, (unsigned long long) e->ino); ++ ++ return 0; ++ ++out_err: ++ saverr = errno; ++ if (newfd != -1) ++ close(newfd); ++ return saverr; ++} ++ ++static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) ++{ ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) ++ fuse_reply_err(req, err); ++ else ++ fuse_reply_entry(req, &e); ++} ++ ++static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, ++ const char *name, mode_t mode, dev_t rdev, ++ const char *link) ++{ ++ int res; ++ int saverr; ++ struct lo_inode *dir = lo_inode(req, parent); ++ struct fuse_entry_param e; ++ ++ saverr = ENOMEM; ++ ++ res = mknod_wrapper(dir->fd, name, link, mode, rdev); ++ ++ saverr = errno; ++ if (res == -1) ++ goto out; ++ ++ saverr = lo_do_lookup(req, parent, name, &e); ++ if (saverr) ++ goto out; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long) parent, name, (unsigned long long) e.ino); ++ ++ fuse_reply_entry(req, &e); ++ return; ++ ++out: ++ fuse_reply_err(req, saverr); ++} ++ ++static void lo_mknod(fuse_req_t req, fuse_ino_t parent, ++ const char *name, mode_t mode, dev_t rdev) ++{ ++ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); ++} ++ ++static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode) ++{ ++ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); ++} ++ ++static void lo_symlink(fuse_req_t req, const char *link, ++ fuse_ino_t parent, const char *name) ++{ ++ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); ++} ++ ++static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, ++ const char *name) ++{ ++ int res; ++ char procname[64]; ++ ++ if (inode->is_symlink) { ++ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); ++ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { ++ /* Sorry, no race free way to hard-link a symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); ++} ++ ++static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, ++ const char *name) ++{ ++ int res; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct fuse_entry_param e; ++ int saverr; ++ ++ memset(&e, 0, sizeof(struct fuse_entry_param)); ++ e.attr_timeout = lo->timeout; ++ e.entry_timeout = lo->timeout; ++ ++ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); ++ if (res == -1) ++ goto out_err; ++ ++ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) ++ goto out_err; ++ ++ pthread_mutex_lock(&lo->mutex); ++ inode->refcount++; ++ pthread_mutex_unlock(&lo->mutex); ++ e.ino = (uintptr_t) inode; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long) parent, name, ++ (unsigned long long) e.ino); ++ ++ fuse_reply_entry(req, &e); ++ return; ++ ++out_err: ++ saverr = errno; ++ fuse_reply_err(req, saverr); ++} ++ ++static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) ++{ ++ int res; ++ ++ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags) ++{ ++ int res; ++ ++ if (flags) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ res = renameat(lo_fd(req, parent), name, ++ lo_fd(req, newparent), newname); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) ++{ ++ int res; ++ ++ res = unlinkat(lo_fd(req, parent), name, 0); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) ++{ ++ if (!inode) ++ return; ++ ++ pthread_mutex_lock(&lo->mutex); ++ assert(inode->refcount >= n); ++ inode->refcount -= n; ++ if (!inode->refcount) { ++ struct lo_inode *prev, *next; ++ ++ prev = inode->prev; ++ next = inode->next; ++ next->prev = prev; ++ prev->next = next; ++ ++ pthread_mutex_unlock(&lo->mutex); ++ close(inode->fd); ++ free(inode); ++ ++ } else { ++ pthread_mutex_unlock(&lo->mutex); ++ } ++} ++ ++static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", ++ (unsigned long long) ino, ++ (unsigned long long) inode->refcount, ++ (unsigned long long) nlookup); ++ } ++ ++ unref_inode(lo, inode, nlookup); ++} ++ ++static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) ++{ ++ lo_forget_one(req, ino, nlookup); ++ fuse_reply_none(req); ++} ++ ++static void lo_forget_multi(fuse_req_t req, size_t count, ++ struct fuse_forget_data *forgets) ++{ ++ int i; ++ ++ for (i = 0; i < count; i++) ++ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); ++ fuse_reply_none(req); ++} ++ ++static void lo_readlink(fuse_req_t req, fuse_ino_t ino) ++{ ++ char buf[PATH_MAX + 1]; ++ int res; ++ ++ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); ++ if (res == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ if (res == sizeof(buf)) ++ return (void) fuse_reply_err(req, ENAMETOOLONG); ++ ++ buf[res] = '\0'; ++ ++ fuse_reply_readlink(req, buf); ++} ++ ++struct lo_dirp { ++ DIR *dp; ++ struct dirent *entry; ++ off_t offset; ++}; ++ ++static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) ++{ ++ return (struct lo_dirp *) (uintptr_t) fi->fh; ++} ++ ++static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ int error = ENOMEM; ++ struct lo_data *lo = lo_data(req); ++ struct lo_dirp *d; ++ int fd; ++ ++ d = calloc(1, sizeof(struct lo_dirp)); ++ if (d == NULL) ++ goto out_err; ++ ++ fd = openat(lo_fd(req, ino), ".", O_RDONLY); ++ if (fd == -1) ++ goto out_errno; ++ ++ d->dp = fdopendir(fd); ++ if (d->dp == NULL) ++ goto out_errno; ++ ++ d->offset = 0; ++ d->entry = NULL; ++ ++ fi->fh = (uintptr_t) d; ++ if (lo->cache == CACHE_ALWAYS) ++ fi->keep_cache = 1; ++ fuse_reply_open(req, fi); ++ return; ++ ++out_errno: ++ error = errno; ++out_err: ++ if (d) { ++ if (fd != -1) ++ close(fd); ++ free(d); ++ } ++ fuse_reply_err(req, error); ++} ++ ++static int is_dot_or_dotdot(const char *name) ++{ ++ return name[0] == '.' && (name[1] == '\0' || ++ (name[1] == '.' && name[2] == '\0')); ++} ++ ++static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi, int plus) ++{ ++ struct lo_dirp *d = lo_dirp(fi); ++ char *buf; ++ char *p; ++ size_t rem = size; ++ int err; ++ ++ (void) ino; ++ ++ buf = calloc(1, size); ++ if (!buf) { ++ err = ENOMEM; ++ goto error; ++ } ++ p = buf; ++ ++ if (offset != d->offset) { ++ seekdir(d->dp, offset); ++ d->entry = NULL; ++ d->offset = offset; ++ } ++ while (1) { ++ size_t entsize; ++ off_t nextoff; ++ const char *name; ++ ++ if (!d->entry) { ++ errno = 0; ++ d->entry = readdir(d->dp); ++ if (!d->entry) { ++ if (errno) { // Error ++ err = errno; ++ goto error; ++ } else { // End of stream ++ break; ++ } ++ } ++ } ++ nextoff = d->entry->d_off; ++ name = d->entry->d_name; ++ fuse_ino_t entry_ino = 0; ++ if (plus) { ++ struct fuse_entry_param e; ++ if (is_dot_or_dotdot(name)) { ++ e = (struct fuse_entry_param) { ++ .attr.st_ino = d->entry->d_ino, ++ .attr.st_mode = d->entry->d_type << 12, ++ }; ++ } else { ++ err = lo_do_lookup(req, ino, name, &e); ++ if (err) ++ goto error; ++ entry_ino = e.ino; ++ } ++ ++ entsize = fuse_add_direntry_plus(req, p, rem, name, ++ &e, nextoff); ++ } else { ++ struct stat st = { ++ .st_ino = d->entry->d_ino, ++ .st_mode = d->entry->d_type << 12, ++ }; ++ entsize = fuse_add_direntry(req, p, rem, name, ++ &st, nextoff); ++ } ++ if (entsize > rem) { ++ if (entry_ino != 0) ++ lo_forget_one(req, entry_ino, 1); ++ break; ++ } ++ ++ p += entsize; ++ rem -= entsize; ++ ++ d->entry = NULL; ++ d->offset = nextoff; ++ } ++ ++ err = 0; ++error: ++ // If there's an error, we can only signal it if we haven't stored ++ // any entries yet - otherwise we'd end up with wrong lookup ++ // counts for the entries that are already in the buffer. So we ++ // return what we've collected until that point. ++ if (err && rem == size) ++ fuse_reply_err(req, err); ++ else ++ fuse_reply_buf(req, buf, size - rem); ++ free(buf); ++} ++ ++static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi) ++{ ++ lo_do_readdir(req, ino, size, offset, fi, 0); ++} ++ ++static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi) ++{ ++ lo_do_readdir(req, ino, size, offset, fi, 1); ++} ++ ++static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ struct lo_dirp *d = lo_dirp(fi); ++ (void) ino; ++ closedir(d->dp); ++ free(d); ++ fuse_reply_err(req, 0); ++} ++ ++static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, struct fuse_file_info *fi) ++{ ++ int fd; ++ struct lo_data *lo = lo_data(req); ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ ++ fd = openat(lo_fd(req, parent), name, ++ (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); ++ if (fd == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) ++ fi->direct_io = 1; ++ else if (lo->cache == CACHE_ALWAYS) ++ fi->keep_cache = 1; ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) ++ fuse_reply_err(req, err); ++ else ++ fuse_reply_create(req, &e, fi); ++} ++ ++static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi) ++{ ++ int res; ++ int fd = dirfd(lo_dirp(fi)->dp); ++ (void) ino; ++ if (datasync) ++ res = fdatasync(fd); ++ else ++ res = fsync(fd); ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ int fd; ++ char buf[64]; ++ struct lo_data *lo = lo_data(req); ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ++ ino, fi->flags); ++ ++ /* With writeback cache, kernel may send read requests even ++ when userspace opened write-only */ ++ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { ++ fi->flags &= ~O_ACCMODE; ++ fi->flags |= O_RDWR; ++ } ++ ++ /* With writeback cache, O_APPEND is handled by the kernel. ++ This breaks atomicity (since the file may change in the ++ underlying filesystem, so that the kernel's idea of the ++ end of the file isn't accurate anymore). In this example, ++ we just accept that. A more rigorous filesystem may want ++ to return an error here */ ++ if (lo->writeback && (fi->flags & O_APPEND)) ++ fi->flags &= ~O_APPEND; ++ ++ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ fd = open(buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) ++ return (void) fuse_reply_err(req, errno); ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) ++ fi->direct_io = 1; ++ else if (lo->cache == CACHE_ALWAYS) ++ fi->keep_cache = 1; ++ fuse_reply_open(req, fi); ++} ++ ++static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ (void) ino; ++ ++ close(fi->fh); ++ fuse_reply_err(req, 0); ++} ++ ++static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++{ ++ int res; ++ (void) ino; ++ res = close(dup(fi->fh)); ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi) ++{ ++ int res; ++ (void) ino; ++ if (datasync) ++ res = fdatasync(fi->fh); ++ else ++ res = fsync(fi->fh); ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, ++ off_t offset, struct fuse_file_info *fi) ++{ ++ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " ++ "off=%lu)\n", ino, size, (unsigned long) offset); ++ ++ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ buf.buf[0].fd = fi->fh; ++ buf.buf[0].pos = offset; ++ ++ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); ++} ++ ++static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_bufvec *in_buf, off_t off, ++ struct fuse_file_info *fi) ++{ ++ (void) ino; ++ ssize_t res; ++ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ ++ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ out_buf.buf[0].fd = fi->fh; ++ out_buf.buf[0].pos = off; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ++ ino, out_buf.buf[0].size, (unsigned long) off); ++ ++ res = fuse_buf_copy(&out_buf, in_buf, 0); ++ if(res < 0) ++ fuse_reply_err(req, -res); ++ else ++ fuse_reply_write(req, (size_t) res); ++} ++ ++static void lo_statfs(fuse_req_t req, fuse_ino_t ino) ++{ ++ int res; ++ struct statvfs stbuf; ++ ++ res = fstatvfs(lo_fd(req, ino), &stbuf); ++ if (res == -1) ++ fuse_reply_err(req, errno); ++ else ++ fuse_reply_statfs(req, &stbuf); ++} ++ ++static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, ++ off_t offset, off_t length, struct fuse_file_info *fi) ++{ ++ int err = EOPNOTSUPP; ++ (void) ino; ++ ++#ifdef HAVE_FALLOCATE ++ err = fallocate(fi->fh, mode, offset, length); ++ if (err < 0) ++ err = errno; ++ ++#elif defined(HAVE_POSIX_FALLOCATE) ++ if (mode) { ++ fuse_reply_err(req, EOPNOTSUPP); ++ return; ++ } ++ ++ err = posix_fallocate(fi->fh, offset, length); ++#endif ++ ++ fuse_reply_err(req, err); ++} ++ ++static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ int op) ++{ ++ int res; ++ (void) ino; ++ ++ res = flock(fi->fh, op); ++ ++ fuse_reply_err(req, res == -1 ? errno : 0); ++} ++ ++static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, ++ size_t size) ++{ ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ++ ino, name, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to getxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) ++ goto out_err; ++ ++ ret = getxattr(procname, name, value, size); ++ if (ret == -1) ++ goto out_err; ++ saverr = 0; ++ if (ret == 0) ++ goto out; ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = getxattr(procname, name, NULL, 0); ++ if (ret == -1) ++ goto out_err; ++ ++ fuse_reply_xattr(req, ret); ++ } ++out_free: ++ free(value); ++ return; ++ ++out_err: ++ saverr = errno; ++out: ++ fuse_reply_err(req, saverr); ++ goto out_free; ++} ++ ++static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) ++{ ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ++ ino, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to listxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) ++ goto out_err; ++ ++ ret = listxattr(procname, value, size); ++ if (ret == -1) ++ goto out_err; ++ saverr = 0; ++ if (ret == 0) ++ goto out; ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = listxattr(procname, NULL, 0); ++ if (ret == -1) ++ goto out_err; ++ ++ fuse_reply_xattr(req, ret); ++ } ++out_free: ++ free(value); ++ return; ++ ++out_err: ++ saverr = errno; ++out: ++ fuse_reply_err(req, saverr); ++ goto out_free; ++} ++ ++static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, ++ const char *value, size_t size, int flags) ++{ ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", ++ ino, name, value, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ ret = setxattr(procname, name, value, size, flags); ++ saverr = ret == -1 ? errno : 0; ++ ++out: ++ fuse_reply_err(req, saverr); ++} ++ ++static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) ++{ ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) ++ goto out; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ++ ino, name); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ ret = removexattr(procname, name); ++ saverr = ret == -1 ? errno : 0; ++ ++out: ++ fuse_reply_err(req, saverr); ++} ++ ++#ifdef HAVE_COPY_FILE_RANGE ++static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, ++ struct fuse_file_info *fi_in, ++ fuse_ino_t ino_out, off_t off_out, ++ struct fuse_file_info *fi_out, size_t len, ++ int flags) ++{ ++ ssize_t res; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, size=%zd, flags=0x%x)\n", ++ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, ++ len, flags); ++ ++ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, ++ flags); ++ if (res < 0) ++ fuse_reply_err(req, -errno); ++ else ++ fuse_reply_write(req, res); ++} ++#endif ++ ++static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, ++ struct fuse_file_info *fi) ++{ ++ off_t res; ++ ++ (void)ino; ++ res = lseek(fi->fh, off, whence); ++ if (res != -1) ++ fuse_reply_lseek(req, res); ++ else ++ fuse_reply_err(req, errno); ++} ++ ++static struct fuse_lowlevel_ops lo_oper = { ++ .init = lo_init, ++ .lookup = lo_lookup, ++ .mkdir = lo_mkdir, ++ .mknod = lo_mknod, ++ .symlink = lo_symlink, ++ .link = lo_link, ++ .unlink = lo_unlink, ++ .rmdir = lo_rmdir, ++ .rename = lo_rename, ++ .forget = lo_forget, ++ .forget_multi = lo_forget_multi, ++ .getattr = lo_getattr, ++ .setattr = lo_setattr, ++ .readlink = lo_readlink, ++ .opendir = lo_opendir, ++ .readdir = lo_readdir, ++ .readdirplus = lo_readdirplus, ++ .releasedir = lo_releasedir, ++ .fsyncdir = lo_fsyncdir, ++ .create = lo_create, ++ .open = lo_open, ++ .release = lo_release, ++ .flush = lo_flush, ++ .fsync = lo_fsync, ++ .read = lo_read, ++ .write_buf = lo_write_buf, ++ .statfs = lo_statfs, ++ .fallocate = lo_fallocate, ++ .flock = lo_flock, ++ .getxattr = lo_getxattr, ++ .listxattr = lo_listxattr, ++ .setxattr = lo_setxattr, ++ .removexattr = lo_removexattr, ++#ifdef HAVE_COPY_FILE_RANGE ++ .copy_file_range = lo_copy_file_range, ++#endif ++ .lseek = lo_lseek, ++}; ++ ++int main(int argc, char *argv[]) ++{ ++ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); ++ struct fuse_session *se; ++ struct fuse_cmdline_opts opts; ++ struct lo_data lo = { .debug = 0, ++ .writeback = 0 }; ++ int ret = -1; ++ ++ /* Don't mask creation mode, kernel already did that */ ++ umask(0); ++ ++ pthread_mutex_init(&lo.mutex, NULL); ++ lo.root.next = lo.root.prev = &lo.root; ++ lo.root.fd = -1; ++ lo.cache = CACHE_NORMAL; ++ ++ if (fuse_parse_cmdline(&args, &opts) != 0) ++ return 1; ++ if (opts.show_help) { ++ printf("usage: %s [options] \n\n", argv[0]); ++ fuse_cmdline_help(); ++ fuse_lowlevel_help(); ++ ret = 0; ++ goto err_out1; ++ } else if (opts.show_version) { ++ printf("FUSE library version %s\n", fuse_pkgversion()); ++ fuse_lowlevel_version(); ++ ret = 0; ++ goto err_out1; ++ } ++ ++ if(opts.mountpoint == NULL) { ++ printf("usage: %s [options] \n", argv[0]); ++ printf(" %s --help\n", argv[0]); ++ ret = 1; ++ goto err_out1; ++ } ++ ++ if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) ++ return 1; ++ ++ lo.debug = opts.debug; ++ lo.root.refcount = 2; ++ if (lo.source) { ++ struct stat stat; ++ int res; ++ ++ res = lstat(lo.source, &stat); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", ++ lo.source); ++ exit(1); ++ } ++ if (!S_ISDIR(stat.st_mode)) { ++ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); ++ exit(1); ++ } ++ ++ } else { ++ lo.source = "/"; ++ } ++ lo.root.is_symlink = false; ++ if (!lo.timeout_set) { ++ switch (lo.cache) { ++ case CACHE_NEVER: ++ lo.timeout = 0.0; ++ break; ++ ++ case CACHE_NORMAL: ++ lo.timeout = 1.0; ++ break; ++ ++ case CACHE_ALWAYS: ++ lo.timeout = 86400.0; ++ break; ++ } ++ } else if (lo.timeout < 0) { ++ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", ++ lo.timeout); ++ exit(1); ++ } ++ ++ lo.root.fd = open(lo.source, O_PATH); ++ if (lo.root.fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", ++ lo.source); ++ exit(1); ++ } ++ ++ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); ++ if (se == NULL) ++ goto err_out1; ++ ++ if (fuse_set_signal_handlers(se) != 0) ++ goto err_out2; ++ ++ if (fuse_session_mount(se, opts.mountpoint) != 0) ++ goto err_out3; ++ ++ fuse_daemonize(opts.foreground); ++ ++ /* Block until ctrl+c or fusermount -u */ ++ if (opts.singlethread) ++ ret = fuse_session_loop(se); ++ else ++ ret = fuse_session_loop_mt(se, opts.clone_fd); ++ ++ fuse_session_unmount(se); ++err_out3: ++ fuse_remove_signal_handlers(se); ++err_out2: ++ fuse_session_destroy(se); ++err_out1: ++ free(opts.mountpoint); ++ fuse_opt_free_args(&args); ++ ++ if (lo.root.fd >= 0) ++ close(lo.root.fd); ++ ++ return ret ? 1 : 0; ++} +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch b/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch new file mode 100644 index 0000000..cef537a --- /dev/null +++ b/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch @@ -0,0 +1,73 @@ +From 52e93f2dc499ead339bf808dac3480b369dfadd1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:39 +0100 +Subject: [PATCH 068/116] virtiofsd: Add timestamp to the log with + FUSE_LOG_DEBUG level +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-65-dgilbert@redhat.com> +Patchwork-id: 93517 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 064/112] virtiofsd: Add timestamp to the log with FUSE_LOG_DEBUG level +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +virtiofsd has some threads, so we see a lot of logs with debug option. +It would be useful for debugging if we can see the timestamp. + +Add nano second timestamp, which got by get_clock(), to the log with +FUSE_LOG_DEBUG level if the syslog option isn't set. + +The log is like as: + + # ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto + ... + [5365943125463727] [ID: 00000002] fv_queue_thread: Start for queue 0 kick_fd 9 + [5365943125568644] [ID: 00000002] fv_queue_thread: Waiting for Queue 0 event + [5365943125573561] [ID: 00000002] fv_queue_thread: Got queue event on Queue 0 + +Signed-off-by: Masayoshi Mizuma +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 50fb955aa0e6ede929422146936cf68bf1ca876f) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index f08324f..98114a3 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -36,6 +36,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/timer.h" + #include "fuse_virtio.h" + #include "fuse_log.h" + #include "fuse_lowlevel.h" +@@ -2276,7 +2277,13 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + } + + if (current_log_level == FUSE_LOG_DEBUG) { +- localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); ++ if (!use_syslog) { ++ localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", ++ get_clock(), syscall(__NR_gettid), fmt); ++ } else { ++ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), ++ fmt); ++ } + fmt = localfmt; + } + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch b/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch new file mode 100644 index 0000000..4713a0d --- /dev/null +++ b/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch @@ -0,0 +1,85 @@ +From 2b921f7162b53204051955228bf99bbed55d2457 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:53 +0100 +Subject: [PATCH 082/116] virtiofsd: Clean up inodes on destroy +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-79-dgilbert@redhat.com> +Patchwork-id: 93532 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 078/112] virtiofsd: Clean up inodes on destroy +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Clear out our inodes and fd's on a 'destroy' - so we get rid +of them if we reboot the guest. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 771b01eb76ff480fee984bd1d21727147cc3e702) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index b176a31..9ed77a1 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1169,6 +1169,25 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + } + ++static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) ++{ ++ struct lo_inode *inode = value; ++ struct lo_data *lo = user_data; ++ ++ inode->refcount = 0; ++ lo_map_remove(&lo->ino_map, inode->fuse_ino); ++ close(inode->fd); ++ ++ return TRUE; ++} ++ ++static void unref_all_inodes(struct lo_data *lo) ++{ ++ pthread_mutex_lock(&lo->mutex); ++ g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); ++ pthread_mutex_unlock(&lo->mutex); ++} ++ + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +@@ -2035,6 +2054,12 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + } + } + ++static void lo_destroy(void *userdata) ++{ ++ struct lo_data *lo = (struct lo_data *)userdata; ++ unref_all_inodes(lo); ++} ++ + static struct fuse_lowlevel_ops lo_oper = { + .init = lo_init, + .lookup = lo_lookup, +@@ -2073,6 +2098,7 @@ static struct fuse_lowlevel_ops lo_oper = { + .copy_file_range = lo_copy_file_range, + #endif + .lseek = lo_lseek, ++ .destroy = lo_destroy, + }; + + /* Print vhost-user.json backend program capabilities */ +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch b/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch new file mode 100644 index 0000000..c421365 --- /dev/null +++ b/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch @@ -0,0 +1,112 @@ +From 24f91062f571ad2dd2ac22db3b7d456a2c8bd2cb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:23 +0100 +Subject: [PATCH 112/116] virtiofsd: Convert lo_destroy to take the lo->mutex + lock itself +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-109-dgilbert@redhat.com> +Patchwork-id: 93563 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 108/112] virtiofsd: Convert lo_destroy to take the lo->mutex lock itself +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +lo_destroy was relying on some implicit knowledge of the locking; +we can avoid this if we create an unref_inode that doesn't take +the lock and then grab it for the whole of the lo_destroy. + +Suggested-by: Vivek Goyal +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fe4c15798a48143dd6b1f58d2d3cad12206ce211) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 31 +++++++++++++++++-------------- + 1 file changed, 17 insertions(+), 14 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index eb001b9..fc15d61 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1344,14 +1344,13 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + lo_inode_put(lo, &inode); + } + +-static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, +- uint64_t n) ++/* To be called with lo->mutex held */ ++static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + { + if (!inode) { + return; + } + +- pthread_mutex_lock(&lo->mutex); + assert(inode->nlookup >= n); + inode->nlookup -= n; + if (!inode->nlookup) { +@@ -1362,15 +1361,24 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + g_hash_table_destroy(inode->posix_locks); + pthread_mutex_destroy(&inode->plock_mutex); +- pthread_mutex_unlock(&lo->mutex); + + /* Drop our refcount from lo_do_lookup() */ + lo_inode_put(lo, &inode); +- } else { +- pthread_mutex_unlock(&lo->mutex); + } + } + ++static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, ++ uint64_t n) ++{ ++ if (!inode) { ++ return; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ unref_inode(lo, inode, n); ++ pthread_mutex_unlock(&lo->mutex); ++} ++ + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +@@ -2458,13 +2466,7 @@ static void lo_destroy(void *userdata) + { + struct lo_data *lo = (struct lo_data *)userdata; + +- /* +- * Normally lo->mutex must be taken when traversing lo->inodes but +- * lo_destroy() is a serialized request so no races are possible here. +- * +- * In addition, we cannot acquire lo->mutex since unref_inode() takes it +- * too and this would result in a recursive lock. +- */ ++ pthread_mutex_lock(&lo->mutex); + while (true) { + GHashTableIter iter; + gpointer key, value; +@@ -2475,8 +2477,9 @@ static void lo_destroy(void *userdata) + } + + struct lo_inode *inode = value; +- unref_inode_lolocked(lo, inode, inode->nlookup); ++ unref_inode(lo, inode, inode->nlookup); + } ++ pthread_mutex_unlock(&lo->mutex); + } + + static struct fuse_lowlevel_ops lo_oper = { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch b/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch new file mode 100644 index 0000000..9f198c2 --- /dev/null +++ b/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch @@ -0,0 +1,176 @@ +From e217ab392e0d4c770ec18dbfbe986771773cb557 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:33 +0100 +Subject: [PATCH 062/116] virtiofsd: Drop CAP_FSETID if client asked for it +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-59-dgilbert@redhat.com> +Patchwork-id: 93513 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 058/112] virtiofsd: Drop CAP_FSETID if client asked for it +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +If client requested killing setuid/setgid bits on file being written, drop +CAP_FSETID capability so that setuid/setgid bits are cleared upon write +automatically. + +pjdfstest chown/12.t needs this. + +Signed-off-by: Vivek Goyal + dgilbert: reworked for libcap-ng +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ee88465224b3aed2596049caa28f86cbe0d5a3d0) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 105 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 97e7c75..d53cb1e 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -201,6 +201,91 @@ static int load_capng(void) + return 0; + } + ++/* ++ * Helpers for dropping and regaining effective capabilities. Returns 0 ++ * on success, error otherwise ++ */ ++static int drop_effective_cap(const char *cap_name, bool *cap_dropped) ++{ ++ int cap, ret; ++ ++ cap = capng_name_to_capability(cap_name); ++ if (cap < 0) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", ++ cap_name, strerror(errno)); ++ goto out; ++ } ++ ++ if (load_capng()) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); ++ goto out; ++ } ++ ++ /* We dont have this capability in effective set already. */ ++ if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { ++ ret = 0; ++ goto out; ++ } ++ ++ if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); ++ goto out; ++ } ++ ++ if (capng_apply(CAPNG_SELECT_CAPS)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); ++ goto out; ++ } ++ ++ ret = 0; ++ if (cap_dropped) { ++ *cap_dropped = true; ++ } ++ ++out: ++ return ret; ++} ++ ++static int gain_effective_cap(const char *cap_name) ++{ ++ int cap; ++ int ret = 0; ++ ++ cap = capng_name_to_capability(cap_name); ++ if (cap < 0) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", ++ cap_name, strerror(errno)); ++ goto out; ++ } ++ ++ if (load_capng()) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); ++ goto out; ++ } ++ ++ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); ++ goto out; ++ } ++ ++ if (capng_apply(CAPNG_SELECT_CAPS)) { ++ ret = errno; ++ fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); ++ goto out; ++ } ++ ret = 0; ++ ++out: ++ return ret; ++} ++ + static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; +@@ -1577,6 +1662,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + (void)ino; + ssize_t res; + struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ bool cap_fsetid_dropped = false; + + out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; + out_buf.buf[0].fd = lo_fi_fd(req, fi); +@@ -1588,12 +1674,31 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + out_buf.buf[0].size, (unsigned long)off); + } + ++ /* ++ * If kill_priv is set, drop CAP_FSETID which should lead to kernel ++ * clearing setuid/setgid on file. ++ */ ++ if (fi->kill_priv) { ++ res = drop_effective_cap("FSETID", &cap_fsetid_dropped); ++ if (res != 0) { ++ fuse_reply_err(req, res); ++ return; ++ } ++ } ++ + res = fuse_buf_copy(&out_buf, in_buf); + if (res < 0) { + fuse_reply_err(req, -res); + } else { + fuse_reply_write(req, (size_t)res); + } ++ ++ if (cap_fsetid_dropped) { ++ res = gain_effective_cap("FSETID"); ++ if (res) { ++ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); ++ } ++ } + } + + static void lo_statfs(fuse_req_t req, fuse_ino_t ino) +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Fast-path-for-virtio-read.patch b/kvm-virtiofsd-Fast-path-for-virtio-read.patch new file mode 100644 index 0000000..03874ce --- /dev/null +++ b/kvm-virtiofsd-Fast-path-for-virtio-read.patch @@ -0,0 +1,240 @@ +From 7d2efc3e4af15eff57b0c38cff7c81b371a98303 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:06 +0100 +Subject: [PATCH 035/116] virtiofsd: Fast path for virtio read +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-32-dgilbert@redhat.com> +Patchwork-id: 93480 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 031/112] virtiofsd: Fast path for virtio read +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Readv the data straight into the guests buffer. + +Signed-off-by: Dr. David Alan Gilbert +With fix by: +Signed-off-by: Eryu Guan +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit eb49d187ef5134483a34c970bbfece28aaa686a7) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 5 ++ + tools/virtiofsd/fuse_virtio.c | 162 ++++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_virtio.h | 4 + + 3 files changed, 171 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 380d93b..4f4684d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -475,6 +475,11 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + return fuse_send_msg(se, ch, iov, iov_count); + } + ++ if (fuse_lowlevel_is_virtio(se) && buf->count == 1 && ++ buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) { ++ return virtio_send_data_iov(se, ch, iov, iov_count, buf, len); ++ } ++ + abort(); /* Will have taken vhost path */ + return 0; + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index f1adeb6..7e2711b 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -230,6 +230,168 @@ err: + return ret; + } + ++/* ++ * Callback from fuse_send_data_iov_* when it's virtio and the buffer ++ * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK ++ * We need send the iov and then the buffer. ++ * Return 0 on success ++ */ ++int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count, struct fuse_bufvec *buf, ++ size_t len) ++{ ++ int ret = 0; ++ VuVirtqElement *elem; ++ VuVirtq *q; ++ ++ assert(count >= 1); ++ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); ++ ++ struct fuse_out_header *out = iov[0].iov_base; ++ /* TODO: Endianness! */ ++ ++ size_t iov_len = iov_size(iov, count); ++ size_t tosend_len = iov_len + len; ++ ++ out->len = tosend_len; ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__, ++ count, len, iov_len); ++ ++ /* unique == 0 is notification which we don't support */ ++ assert(out->unique); ++ ++ /* For virtio we always have ch */ ++ assert(ch); ++ assert(!ch->qi->reply_sent); ++ elem = ch->qi->qe; ++ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ ++ /* The 'in' part of the elem is to qemu */ ++ unsigned int in_num = elem->in_num; ++ struct iovec *in_sg = elem->in_sg; ++ size_t in_len = iov_size(in_sg, in_num); ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", ++ __func__, elem->index, in_num, in_len); ++ ++ /* ++ * The elem should have room for a 'fuse_out_header' (out from fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (in_len < sizeof(struct fuse_out_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", ++ __func__, elem->index); ++ ret = E2BIG; ++ goto err; ++ } ++ if (in_len < tosend_len) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", ++ __func__, elem->index, tosend_len); ++ ret = E2BIG; ++ goto err; ++ } ++ ++ /* TODO: Limit to 'len' */ ++ ++ /* First copy the header data from iov->in_sg */ ++ copy_iov(iov, count, in_sg, in_num, iov_len); ++ ++ /* ++ * Build a copy of the the in_sg iov so we can skip bits in it, ++ * including changing the offsets ++ */ ++ struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num); ++ assert(in_sg_cpy); ++ memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); ++ /* These get updated as we skip */ ++ struct iovec *in_sg_ptr = in_sg_cpy; ++ int in_sg_cpy_count = in_num; ++ ++ /* skip over parts of in_sg that contained the header iov */ ++ size_t skip_size = iov_len; ++ ++ size_t in_sg_left = 0; ++ do { ++ while (skip_size != 0 && in_sg_cpy_count) { ++ if (skip_size >= in_sg_ptr[0].iov_len) { ++ skip_size -= in_sg_ptr[0].iov_len; ++ in_sg_ptr++; ++ in_sg_cpy_count--; ++ } else { ++ in_sg_ptr[0].iov_len -= skip_size; ++ in_sg_ptr[0].iov_base += skip_size; ++ break; ++ } ++ } ++ ++ int i; ++ for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) { ++ in_sg_left += in_sg_ptr[i].iov_len; ++ } ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: after skip skip_size=%zd in_sg_cpy_count=%d " ++ "in_sg_left=%zd\n", ++ __func__, skip_size, in_sg_cpy_count, in_sg_left); ++ ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count, ++ buf->buf[0].pos); ++ ++ if (ret == -1) { ++ ret = errno; ++ fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n", ++ __func__, len); ++ free(in_sg_cpy); ++ goto err; ++ } ++ fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__, ++ ret, len); ++ if (ret < len && ret) { ++ fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); ++ /* Skip over this much next time around */ ++ skip_size = ret; ++ buf->buf[0].pos += ret; ++ len -= ret; ++ ++ /* Lets do another read */ ++ continue; ++ } ++ if (!ret) { ++ /* EOF case? */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__, ++ in_sg_left); ++ break; ++ } ++ if (ret != len) { ++ fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__); ++ ret = EIO; ++ free(in_sg_cpy); ++ goto err; ++ } ++ in_sg_left -= ret; ++ len -= ret; ++ } while (in_sg_left); ++ free(in_sg_cpy); ++ ++ /* Need to fix out->len on EOF */ ++ if (len) { ++ struct fuse_out_header *out_sg = in_sg[0].iov_base; ++ ++ tosend_len -= len; ++ out_sg->len = tosend_len; ++ } ++ ++ ret = 0; ++ ++ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); ++ vu_queue_notify(&se->virtio_dev->dev, q); ++ ++err: ++ if (ret == 0) { ++ ch->qi->reply_sent = true; ++ } ++ ++ return ret; ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index 135a148..cc676b9 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -26,4 +26,8 @@ int virtio_loop(struct fuse_session *se); + int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count); + ++int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count, ++ struct fuse_bufvec *buf, size_t len); ++ + #endif +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch b/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch new file mode 100644 index 0000000..12bb9a2 --- /dev/null +++ b/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch @@ -0,0 +1,164 @@ +From 6d41fc549198e140f38fddcb02975098df040ae1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:50 +0100 +Subject: [PATCH 019/116] virtiofsd: Fix common header and define for QEMU + builds +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-16-dgilbert@redhat.com> +Patchwork-id: 93470 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 015/112] virtiofsd: Fix common header and define for QEMU builds +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +All of the fuse files include config.h and define GNU_SOURCE +where we don't have either under our build - remove them. +Fixup path to the kernel's fuse.h in the QEMUs world. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 09863ebc7e32a107235b3c815ad54d26cc64f07a) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 4 +--- + tools/virtiofsd/fuse_i.h | 3 +++ + tools/virtiofsd/fuse_log.c | 1 + + tools/virtiofsd/fuse_lowlevel.c | 6 ++---- + tools/virtiofsd/fuse_opt.c | 2 +- + tools/virtiofsd/fuse_signals.c | 2 +- + tools/virtiofsd/helper.c | 1 + + tools/virtiofsd/passthrough_ll.c | 8 ++------ + 8 files changed, 12 insertions(+), 15 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 4d507f3..772efa9 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -9,9 +9,7 @@ + * See the file COPYING.LIB + */ + +-#define _GNU_SOURCE +- +-#include "config.h" ++#include "qemu/osdep.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + #include +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index e63cb58..bae0699 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -6,6 +6,9 @@ + * See the file COPYING.LIB + */ + ++#define FUSE_USE_VERSION 31 ++ ++ + #include "fuse.h" + #include "fuse_lowlevel.h" + +diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c +index 11345f9..c301ff6 100644 +--- a/tools/virtiofsd/fuse_log.c ++++ b/tools/virtiofsd/fuse_log.c +@@ -8,6 +8,7 @@ + * See the file COPYING.LIB + */ + ++#include "qemu/osdep.h" + #include "fuse_log.h" + + #include +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 3da80de..07fb8a6 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -9,11 +9,9 @@ + * See the file COPYING.LIB + */ + +-#define _GNU_SOURCE +- +-#include "config.h" ++#include "qemu/osdep.h" + #include "fuse_i.h" +-#include "fuse_kernel.h" ++#include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" + +diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c +index edd36f4..2892236 100644 +--- a/tools/virtiofsd/fuse_opt.c ++++ b/tools/virtiofsd/fuse_opt.c +@@ -9,8 +9,8 @@ + * See the file COPYING.LIB + */ + ++#include "qemu/osdep.h" + #include "fuse_opt.h" +-#include "config.h" + #include "fuse_i.h" + #include "fuse_misc.h" + +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +index 19d6791..dc7c8ac 100644 +--- a/tools/virtiofsd/fuse_signals.c ++++ b/tools/virtiofsd/fuse_signals.c +@@ -8,7 +8,7 @@ + * See the file COPYING.LIB + */ + +-#include "config.h" ++#include "qemu/osdep.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index d9227d7..9333691 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -10,6 +10,7 @@ + * See the file COPYING.LIB. + */ + ++#include "qemu/osdep.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + #include "fuse_misc.h" +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 126a56c..322a889 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -35,15 +35,11 @@ + * \include passthrough_ll.c + */ + +-#define _GNU_SOURCE +-#define FUSE_USE_VERSION 31 +- +-#include "config.h" +- ++#include "qemu/osdep.h" ++#include "fuse_lowlevel.h" + #include + #include + #include +-#include + #include + #include + #include +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch b/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch new file mode 100644 index 0000000..f929bab --- /dev/null +++ b/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch @@ -0,0 +1,136 @@ +From 9b5fbc95a287b2ce9448142194b161d8360d5e4e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:15 +0100 +Subject: [PATCH 104/116] virtiofsd: Fix data corruption with O_APPEND write in + writeback mode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-101-dgilbert@redhat.com> +Patchwork-id: 93556 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 100/112] virtiofsd: Fix data corruption with O_APPEND write in writeback mode +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Misono Tomohiro + +When writeback mode is enabled (-o writeback), O_APPEND handling is +done in kernel. Therefore virtiofsd clears O_APPEND flag when open. +Otherwise O_APPEND flag takes precedence over pwrite() and write +data may corrupt. + +Currently clearing O_APPEND flag is done in lo_open(), but we also +need the same operation in lo_create(). So, factor out the flag +update operation in lo_open() to update_open_flags() and call it +in both lo_open() and lo_create(). + +This fixes the failure of xfstest generic/069 in writeback mode +(which tests O_APPEND write data integrity). + +Signed-off-by: Misono Tomohiro +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8e4e41e39eac5ee5f378d66f069a2f70a1734317) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 66 ++++++++++++++++++++-------------------- + 1 file changed, 33 insertions(+), 33 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 948cb19..4c61ac5 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1692,6 +1692,37 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + fuse_reply_err(req, 0); + } + ++static void update_open_flags(int writeback, struct fuse_file_info *fi) ++{ ++ /* ++ * With writeback cache, kernel may send read requests even ++ * when userspace opened write-only ++ */ ++ if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { ++ fi->flags &= ~O_ACCMODE; ++ fi->flags |= O_RDWR; ++ } ++ ++ /* ++ * With writeback cache, O_APPEND is handled by the kernel. ++ * This breaks atomicity (since the file may change in the ++ * underlying filesystem, so that the kernel's idea of the ++ * end of the file isn't accurate anymore). In this example, ++ * we just accept that. A more rigorous filesystem may want ++ * to return an error here ++ */ ++ if (writeback && (fi->flags & O_APPEND)) { ++ fi->flags &= ~O_APPEND; ++ } ++ ++ /* ++ * O_DIRECT in guest should not necessarily mean bypassing page ++ * cache on host as well. If somebody needs that behavior, it ++ * probably should be a configuration knob in daemon. ++ */ ++ fi->flags &= ~O_DIRECT; ++} ++ + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +@@ -1721,12 +1752,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + +- /* +- * O_DIRECT in guest should not necessarily mean bypassing page +- * cache on host as well. If somebody needs that behavior, it +- * probably should be a configuration knob in daemon. +- */ +- fi->flags &= ~O_DIRECT; ++ update_open_flags(lo->writeback, fi); + + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); +@@ -1936,33 +1962,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, + fi->flags); + +- /* +- * With writeback cache, kernel may send read requests even +- * when userspace opened write-only +- */ +- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { +- fi->flags &= ~O_ACCMODE; +- fi->flags |= O_RDWR; +- } +- +- /* +- * With writeback cache, O_APPEND is handled by the kernel. +- * This breaks atomicity (since the file may change in the +- * underlying filesystem, so that the kernel's idea of the +- * end of the file isn't accurate anymore). In this example, +- * we just accept that. A more rigorous filesystem may want +- * to return an error here +- */ +- if (lo->writeback && (fi->flags & O_APPEND)) { +- fi->flags &= ~O_APPEND; +- } +- +- /* +- * O_DIRECT in guest should not necessarily mean bypassing page +- * cache on host as well. If somebody needs that behavior, it +- * probably should be a configuration knob in daemon. +- */ +- fi->flags &= ~O_DIRECT; ++ update_open_flags(lo->writeback, fi); + + sprintf(buf, "%i", lo_fd(req, ino)); + fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch b/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch new file mode 100644 index 0000000..306c183 --- /dev/null +++ b/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch @@ -0,0 +1,120 @@ +From 9f726593bc3acbc247876dcc4d79fbf046958003 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:49 +0100 +Subject: [PATCH 018/116] virtiofsd: Fix fuse_daemonize ignored return values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-15-dgilbert@redhat.com> +Patchwork-id: 93469 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 014/112] virtiofsd: Fix fuse_daemonize ignored return values +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +QEMU's compiler enables warnings/errors for ignored values +and the (void) trick used in the fuse code isn't enough. +Turn all the return values into a return value on the function. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 30d8e49760712d65697ea517c53671bd1d214fc7) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 33 ++++++++++++++++++++++----------- + 1 file changed, 22 insertions(+), 11 deletions(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5e6f205..d9227d7 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -10,12 +10,10 @@ + * See the file COPYING.LIB. + */ + +-#include "config.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" + #include "fuse_misc.h" + #include "fuse_opt.h" +-#include "mount_util.h" + + #include + #include +@@ -171,6 +169,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + + int fuse_daemonize(int foreground) + { ++ int ret = 0, rett; + if (!foreground) { + int nullfd; + int waiter[2]; +@@ -192,8 +191,8 @@ int fuse_daemonize(int foreground) + case 0: + break; + default: +- (void)read(waiter[0], &completed, sizeof(completed)); +- _exit(0); ++ _exit(read(waiter[0], &completed, ++ sizeof(completed) != sizeof(completed))); + } + + if (setsid() == -1) { +@@ -201,13 +200,22 @@ int fuse_daemonize(int foreground) + return -1; + } + +- (void)chdir("/"); ++ ret = chdir("/"); + + nullfd = open("/dev/null", O_RDWR, 0); + if (nullfd != -1) { +- (void)dup2(nullfd, 0); +- (void)dup2(nullfd, 1); +- (void)dup2(nullfd, 2); ++ rett = dup2(nullfd, 0); ++ if (!ret) { ++ ret = rett; ++ } ++ rett = dup2(nullfd, 1); ++ if (!ret) { ++ ret = rett; ++ } ++ rett = dup2(nullfd, 2); ++ if (!ret) { ++ ret = rett; ++ } + if (nullfd > 2) { + close(nullfd); + } +@@ -215,13 +223,16 @@ int fuse_daemonize(int foreground) + + /* Propagate completion of daemon initialization */ + completed = 1; +- (void)write(waiter[1], &completed, sizeof(completed)); ++ rett = write(waiter[1], &completed, sizeof(completed)); ++ if (!ret) { ++ ret = rett; ++ } + close(waiter[0]); + close(waiter[1]); + } else { +- (void)chdir("/"); ++ ret = chdir("/"); + } +- return 0; ++ return ret; + } + + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch b/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch new file mode 100644 index 0000000..5593a33 --- /dev/null +++ b/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch @@ -0,0 +1,14743 @@ +From e313ab94af558bbc133e7a93b0a6dbff706dd1d8 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:45 +0100 +Subject: [PATCH 014/116] virtiofsd: Format imported files to qemu style +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-11-dgilbert@redhat.com> +Patchwork-id: 93464 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 010/112] virtiofsd: Format imported files to qemu style +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Mostly using a set like: + +indent -nut -i 4 -nlp -br -cs -ce --no-space-after-function-call-names file +clang-format -style=file -i -- file +clang-tidy -fix-errors -checks=readability-braces-around-statements file +clang-format -style=file -i -- file + +With manual cleanups. + +The .clang-format used is below. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed by: Aleksandar Markovic + +Language: Cpp +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false # although we like it, it creates churn +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: false # churn +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account +AlwaysBreakBeforeMultilineStrings: false +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterStruct: false + AfterUnion: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeTernaryOperators: false +BreakStringLiterals: true +ColumnLimit: 80 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ForEachMacros: [ + 'CPU_FOREACH', + 'CPU_FOREACH_REVERSE', + 'CPU_FOREACH_SAFE', + 'IOMMU_NOTIFIER_FOREACH', + 'QLIST_FOREACH', + 'QLIST_FOREACH_ENTRY', + 'QLIST_FOREACH_RCU', + 'QLIST_FOREACH_SAFE', + 'QLIST_FOREACH_SAFE_RCU', + 'QSIMPLEQ_FOREACH', + 'QSIMPLEQ_FOREACH_SAFE', + 'QSLIST_FOREACH', + 'QSLIST_FOREACH_SAFE', + 'QTAILQ_FOREACH', + 'QTAILQ_FOREACH_REVERSE', + 'QTAILQ_FOREACH_SAFE', + 'QTAILQ_RAW_FOREACH', + 'RAMBLOCK_FOREACH' +] +IncludeCategories: + - Regex: '^"qemu/osdep.h' + Priority: -3 + - Regex: '^"(block|chardev|crypto|disas|exec|fpu|hw|io|libdecnumber|migration|monitor|net|qapi|qemu|qom|standard-headers|sysemu|ui)/' + Priority: -2 + - Regex: '^"(elf.h|qemu-common.h|glib-compat.h|qemu-io.h|trace-tcg.h)' + Priority: -1 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '$' +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? +MacroBlockEnd: '.*_END$' +MaxEmptyLinesToKeep: 2 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInContainerLiterals: true +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Auto +UseTab: Never +... + +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 7387863d033e8028aa09a815736617a7c4490827) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 434 ++-- + tools/virtiofsd/fuse.h | 1572 +++++++------- + tools/virtiofsd/fuse_common.h | 730 +++---- + tools/virtiofsd/fuse_i.h | 121 +- + tools/virtiofsd/fuse_log.c | 38 +- + tools/virtiofsd/fuse_log.h | 32 +- + tools/virtiofsd/fuse_lowlevel.c | 3638 +++++++++++++++++---------------- + tools/virtiofsd/fuse_lowlevel.h | 2392 +++++++++++----------- + tools/virtiofsd/fuse_misc.h | 30 +- + tools/virtiofsd/fuse_opt.c | 659 +++--- + tools/virtiofsd/fuse_opt.h | 79 +- + tools/virtiofsd/fuse_signals.c | 118 +- + tools/virtiofsd/helper.c | 506 ++--- + tools/virtiofsd/passthrough_helpers.h | 33 +- + tools/virtiofsd/passthrough_ll.c | 2061 ++++++++++--------- + 15 files changed, 6382 insertions(+), 6061 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index aefb7db..5df946c 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -1,252 +1,272 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2010 Miklos Szeredi +- +- Functions for dealing with `struct fuse_buf` and `struct +- fuse_bufvec`. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2010 Miklos Szeredi ++ * ++ * Functions for dealing with `struct fuse_buf` and `struct ++ * fuse_bufvec`. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #define _GNU_SOURCE + + #include "config.h" + #include "fuse_i.h" + #include "fuse_lowlevel.h" ++#include ++#include + #include + #include +-#include +-#include + + size_t fuse_buf_size(const struct fuse_bufvec *bufv) + { +- size_t i; +- size_t size = 0; +- +- for (i = 0; i < bufv->count; i++) { +- if (bufv->buf[i].size == SIZE_MAX) +- size = SIZE_MAX; +- else +- size += bufv->buf[i].size; +- } +- +- return size; ++ size_t i; ++ size_t size = 0; ++ ++ for (i = 0; i < bufv->count; i++) { ++ if (bufv->buf[i].size == SIZE_MAX) { ++ size = SIZE_MAX; ++ } else { ++ size += bufv->buf[i].size; ++ } ++ } ++ ++ return size; + } + + static size_t min_size(size_t s1, size_t s2) + { +- return s1 < s2 ? s1 : s2; ++ return s1 < s2 ? s1 : s2; + } + + static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) + { +- ssize_t res = 0; +- size_t copied = 0; +- +- while (len) { +- if (dst->flags & FUSE_BUF_FD_SEEK) { +- res = pwrite(dst->fd, (char *)src->mem + src_off, len, +- dst->pos + dst_off); +- } else { +- res = write(dst->fd, (char *)src->mem + src_off, len); +- } +- if (res == -1) { +- if (!copied) +- return -errno; +- break; +- } +- if (res == 0) +- break; +- +- copied += res; +- if (!(dst->flags & FUSE_BUF_FD_RETRY)) +- break; +- +- src_off += res; +- dst_off += res; +- len -= res; +- } +- +- return copied; ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (dst->flags & FUSE_BUF_FD_SEEK) { ++ res = pwrite(dst->fd, (char *)src->mem + src_off, len, ++ dst->pos + dst_off); ++ } else { ++ res = write(dst->fd, (char *)src->mem + src_off, len); ++ } ++ if (res == -1) { ++ if (!copied) { ++ return -errno; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ copied += res; ++ if (!(dst->flags & FUSE_BUF_FD_RETRY)) { ++ break; ++ } ++ ++ src_off += res; ++ dst_off += res; ++ len -= res; ++ } ++ ++ return copied; + } + + static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) + { +- ssize_t res = 0; +- size_t copied = 0; +- +- while (len) { +- if (src->flags & FUSE_BUF_FD_SEEK) { +- res = pread(src->fd, (char *)dst->mem + dst_off, len, +- src->pos + src_off); +- } else { +- res = read(src->fd, (char *)dst->mem + dst_off, len); +- } +- if (res == -1) { +- if (!copied) +- return -errno; +- break; +- } +- if (res == 0) +- break; +- +- copied += res; +- if (!(src->flags & FUSE_BUF_FD_RETRY)) +- break; +- +- dst_off += res; +- src_off += res; +- len -= res; +- } +- +- return copied; ++ ssize_t res = 0; ++ size_t copied = 0; ++ ++ while (len) { ++ if (src->flags & FUSE_BUF_FD_SEEK) { ++ res = pread(src->fd, (char *)dst->mem + dst_off, len, ++ src->pos + src_off); ++ } else { ++ res = read(src->fd, (char *)dst->mem + dst_off, len); ++ } ++ if (res == -1) { ++ if (!copied) { ++ return -errno; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ copied += res; ++ if (!(src->flags & FUSE_BUF_FD_RETRY)) { ++ break; ++ } ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; + } + + static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len) + { +- char buf[4096]; +- struct fuse_buf tmp = { +- .size = sizeof(buf), +- .flags = 0, +- }; +- ssize_t res; +- size_t copied = 0; +- +- tmp.mem = buf; +- +- while (len) { +- size_t this_len = min_size(tmp.size, len); +- size_t read_len; +- +- res = fuse_buf_read(&tmp, 0, src, src_off, this_len); +- if (res < 0) { +- if (!copied) +- return res; +- break; +- } +- if (res == 0) +- break; +- +- read_len = res; +- res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); +- if (res < 0) { +- if (!copied) +- return res; +- break; +- } +- if (res == 0) +- break; +- +- copied += res; +- +- if (res < this_len) +- break; +- +- dst_off += res; +- src_off += res; +- len -= res; +- } +- +- return copied; ++ char buf[4096]; ++ struct fuse_buf tmp = { ++ .size = sizeof(buf), ++ .flags = 0, ++ }; ++ ssize_t res; ++ size_t copied = 0; ++ ++ tmp.mem = buf; ++ ++ while (len) { ++ size_t this_len = min_size(tmp.size, len); ++ size_t read_len; ++ ++ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); ++ if (res < 0) { ++ if (!copied) { ++ return res; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ read_len = res; ++ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); ++ if (res < 0) { ++ if (!copied) { ++ return res; ++ } ++ break; ++ } ++ if (res == 0) { ++ break; ++ } ++ ++ copied += res; ++ ++ if (res < this_len) { ++ break; ++ } ++ ++ dst_off += res; ++ src_off += res; ++ len -= res; ++ } ++ ++ return copied; + } + + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) ++ const struct fuse_buf *src, size_t src_off, ++ size_t len, enum fuse_buf_copy_flags flags) + { +- int src_is_fd = src->flags & FUSE_BUF_IS_FD; +- int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; +- +- if (!src_is_fd && !dst_is_fd) { +- char *dstmem = (char *)dst->mem + dst_off; +- char *srcmem = (char *)src->mem + src_off; +- +- if (dstmem != srcmem) { +- if (dstmem + len <= srcmem || srcmem + len <= dstmem) +- memcpy(dstmem, srcmem, len); +- else +- memmove(dstmem, srcmem, len); +- } +- +- return len; +- } else if (!src_is_fd) { +- return fuse_buf_write(dst, dst_off, src, src_off, len); +- } else if (!dst_is_fd) { +- return fuse_buf_read(dst, dst_off, src, src_off, len); +- } else { +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); +- } ++ int src_is_fd = src->flags & FUSE_BUF_IS_FD; ++ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; ++ ++ if (!src_is_fd && !dst_is_fd) { ++ char *dstmem = (char *)dst->mem + dst_off; ++ char *srcmem = (char *)src->mem + src_off; ++ ++ if (dstmem != srcmem) { ++ if (dstmem + len <= srcmem || srcmem + len <= dstmem) { ++ memcpy(dstmem, srcmem, len); ++ } else { ++ memmove(dstmem, srcmem, len); ++ } ++ } ++ ++ return len; ++ } else if (!src_is_fd) { ++ return fuse_buf_write(dst, dst_off, src, src_off, len); ++ } else if (!dst_is_fd) { ++ return fuse_buf_read(dst, dst_off, src, src_off, len); ++ } else { ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); ++ } + } + + static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) + { +- if (bufv->idx < bufv->count) +- return &bufv->buf[bufv->idx]; +- else +- return NULL; ++ if (bufv->idx < bufv->count) { ++ return &bufv->buf[bufv->idx]; ++ } else { ++ return NULL; ++ } + } + + static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + { +- const struct fuse_buf *buf = fuse_bufvec_current(bufv); +- +- bufv->off += len; +- assert(bufv->off <= buf->size); +- if (bufv->off == buf->size) { +- assert(bufv->idx < bufv->count); +- bufv->idx++; +- if (bufv->idx == bufv->count) +- return 0; +- bufv->off = 0; +- } +- return 1; ++ const struct fuse_buf *buf = fuse_bufvec_current(bufv); ++ ++ bufv->off += len; ++ assert(bufv->off <= buf->size); ++ if (bufv->off == buf->size) { ++ assert(bufv->idx < bufv->count); ++ bufv->idx++; ++ if (bufv->idx == bufv->count) { ++ return 0; ++ } ++ bufv->off = 0; ++ } ++ return 1; + } + + ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, +- enum fuse_buf_copy_flags flags) ++ enum fuse_buf_copy_flags flags) + { +- size_t copied = 0; +- +- if (dstv == srcv) +- return fuse_buf_size(dstv); +- +- for (;;) { +- const struct fuse_buf *src = fuse_bufvec_current(srcv); +- const struct fuse_buf *dst = fuse_bufvec_current(dstv); +- size_t src_len; +- size_t dst_len; +- size_t len; +- ssize_t res; +- +- if (src == NULL || dst == NULL) +- break; +- +- src_len = src->size - srcv->off; +- dst_len = dst->size - dstv->off; +- len = min_size(src_len, dst_len); +- +- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); +- if (res < 0) { +- if (!copied) +- return res; +- break; +- } +- copied += res; +- +- if (!fuse_bufvec_advance(srcv, res) || +- !fuse_bufvec_advance(dstv, res)) +- break; +- +- if (res < len) +- break; +- } +- +- return copied; ++ size_t copied = 0; ++ ++ if (dstv == srcv) { ++ return fuse_buf_size(dstv); ++ } ++ ++ for (;;) { ++ const struct fuse_buf *src = fuse_bufvec_current(srcv); ++ const struct fuse_buf *dst = fuse_bufvec_current(dstv); ++ size_t src_len; ++ size_t dst_len; ++ size_t len; ++ ssize_t res; ++ ++ if (src == NULL || dst == NULL) { ++ break; ++ } ++ ++ src_len = src->size - srcv->off; ++ dst_len = dst->size - dstv->off; ++ len = min_size(src_len, dst_len); ++ ++ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); ++ if (res < 0) { ++ if (!copied) { ++ return res; ++ } ++ break; ++ } ++ copied += res; ++ ++ if (!fuse_bufvec_advance(srcv, res) || ++ !fuse_bufvec_advance(dstv, res)) { ++ break; ++ } ++ ++ if (res < len) { ++ break; ++ } ++ } ++ ++ return copied; + } +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +index 3202fba..7a4c713 100644 +--- a/tools/virtiofsd/fuse.h ++++ b/tools/virtiofsd/fuse.h +@@ -1,15 +1,15 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_H_ + #define FUSE_H_ + +-/** @file ++/* + * + * This file defines the library interface of FUSE + * +@@ -19,15 +19,15 @@ + #include "fuse_common.h" + + #include +-#include +-#include + #include + #include ++#include + #include ++#include + +-/* ----------------------------------------------------------- * +- * Basic FUSE API * +- * ----------------------------------------------------------- */ ++/* ++ * Basic FUSE API ++ */ + + /** Handle for a FUSE filesystem */ + struct fuse; +@@ -36,38 +36,39 @@ struct fuse; + * Readdir flags, passed to ->readdir() + */ + enum fuse_readdir_flags { +- /** +- * "Plus" mode. +- * +- * The kernel wants to prefill the inode cache during readdir. The +- * filesystem may honour this by filling in the attributes and setting +- * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also +- * just ignore this flag completely. +- */ +- FUSE_READDIR_PLUS = (1 << 0), ++ /** ++ * "Plus" mode. ++ * ++ * The kernel wants to prefill the inode cache during readdir. The ++ * filesystem may honour this by filling in the attributes and setting ++ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also ++ * just ignore this flag completely. ++ */ ++ FUSE_READDIR_PLUS = (1 << 0), + }; + + enum fuse_fill_dir_flags { +- /** +- * "Plus" mode: all file attributes are valid +- * +- * The attributes are used by the kernel to prefill the inode cache +- * during a readdir. +- * +- * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set +- * and vice versa. +- */ +- FUSE_FILL_DIR_PLUS = (1 << 1), ++ /** ++ * "Plus" mode: all file attributes are valid ++ * ++ * The attributes are used by the kernel to prefill the inode cache ++ * during a readdir. ++ * ++ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set ++ * and vice versa. ++ */ ++ FUSE_FILL_DIR_PLUS = (1 << 1), + }; + +-/** Function to add an entry in a readdir() operation ++/** ++ * Function to add an entry in a readdir() operation + * + * The *off* parameter can be any non-zero value that enables the + * filesystem to identify the current point in the directory + * stream. It does not need to be the actual physical position. A + * value of zero is reserved to indicate that seeking in directories + * is not supported. +- * ++ * + * @param buf the buffer passed to the readdir() operation + * @param name the file name of the directory entry + * @param stat file attributes, can be NULL +@@ -75,9 +76,9 @@ enum fuse_fill_dir_flags { + * @param flags fill flags + * @return 1 if buffer is full, zero otherwise + */ +-typedef int (*fuse_fill_dir_t) (void *buf, const char *name, +- const struct stat *stbuf, off_t off, +- enum fuse_fill_dir_flags flags); ++typedef int (*fuse_fill_dir_t)(void *buf, const char *name, ++ const struct stat *stbuf, off_t off, ++ enum fuse_fill_dir_flags flags); + /** + * Configuration of the high-level API + * +@@ -87,186 +88,186 @@ typedef int (*fuse_fill_dir_t) (void *buf, const char *name, + * file system implementation. + */ + struct fuse_config { +- /** +- * If `set_gid` is non-zero, the st_gid attribute of each file +- * is overwritten with the value of `gid`. +- */ +- int set_gid; +- unsigned int gid; +- +- /** +- * If `set_uid` is non-zero, the st_uid attribute of each file +- * is overwritten with the value of `uid`. +- */ +- int set_uid; +- unsigned int uid; +- +- /** +- * If `set_mode` is non-zero, the any permissions bits set in +- * `umask` are unset in the st_mode attribute of each file. +- */ +- int set_mode; +- unsigned int umask; +- +- /** +- * The timeout in seconds for which name lookups will be +- * cached. +- */ +- double entry_timeout; +- +- /** +- * The timeout in seconds for which a negative lookup will be +- * cached. This means, that if file did not exist (lookup +- * retuned ENOENT), the lookup will only be redone after the +- * timeout, and the file/directory will be assumed to not +- * exist until then. A value of zero means that negative +- * lookups are not cached. +- */ +- double negative_timeout; +- +- /** +- * The timeout in seconds for which file/directory attributes +- * (as returned by e.g. the `getattr` handler) are cached. +- */ +- double attr_timeout; +- +- /** +- * Allow requests to be interrupted +- */ +- int intr; +- +- /** +- * Specify which signal number to send to the filesystem when +- * a request is interrupted. The default is hardcoded to +- * USR1. +- */ +- int intr_signal; +- +- /** +- * Normally, FUSE assigns inodes to paths only for as long as +- * the kernel is aware of them. With this option inodes are +- * instead remembered for at least this many seconds. This +- * will require more memory, but may be necessary when using +- * applications that make use of inode numbers. +- * +- * A number of -1 means that inodes will be remembered for the +- * entire life-time of the file-system process. +- */ +- int remember; +- +- /** +- * The default behavior is that if an open file is deleted, +- * the file is renamed to a hidden file (.fuse_hiddenXXX), and +- * only removed when the file is finally released. This +- * relieves the filesystem implementation of having to deal +- * with this problem. This option disables the hiding +- * behavior, and files are removed immediately in an unlink +- * operation (or in a rename operation which overwrites an +- * existing file). +- * +- * It is recommended that you not use the hard_remove +- * option. When hard_remove is set, the following libc +- * functions fail on unlinked files (returning errno of +- * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), +- * ftruncate(2), fstat(2), fchmod(2), fchown(2) +- */ +- int hard_remove; +- +- /** +- * Honor the st_ino field in the functions getattr() and +- * fill_dir(). This value is used to fill in the st_ino field +- * in the stat(2), lstat(2), fstat(2) functions and the d_ino +- * field in the readdir(2) function. The filesystem does not +- * have to guarantee uniqueness, however some applications +- * rely on this value being unique for the whole filesystem. +- * +- * Note that this does *not* affect the inode that libfuse +- * and the kernel use internally (also called the "nodeid"). +- */ +- int use_ino; +- +- /** +- * If use_ino option is not given, still try to fill in the +- * d_ino field in readdir(2). If the name was previously +- * looked up, and is still in the cache, the inode number +- * found there will be used. Otherwise it will be set to -1. +- * If use_ino option is given, this option is ignored. +- */ +- int readdir_ino; +- +- /** +- * This option disables the use of page cache (file content cache) +- * in the kernel for this filesystem. This has several affects: +- * +- * 1. Each read(2) or write(2) system call will initiate one +- * or more read or write operations, data will not be +- * cached in the kernel. +- * +- * 2. The return value of the read() and write() system calls +- * will correspond to the return values of the read and +- * write operations. This is useful for example if the +- * file size is not known in advance (before reading it). +- * +- * Internally, enabling this option causes fuse to set the +- * `direct_io` field of `struct fuse_file_info` - overwriting +- * any value that was put there by the file system. +- */ +- int direct_io; +- +- /** +- * This option disables flushing the cache of the file +- * contents on every open(2). This should only be enabled on +- * filesystems where the file data is never changed +- * externally (not through the mounted FUSE filesystem). Thus +- * it is not suitable for network filesystems and other +- * intermediate filesystems. +- * +- * NOTE: if this option is not specified (and neither +- * direct_io) data is still cached after the open(2), so a +- * read(2) system call will not always initiate a read +- * operation. +- * +- * Internally, enabling this option causes fuse to set the +- * `keep_cache` field of `struct fuse_file_info` - overwriting +- * any value that was put there by the file system. +- */ +- int kernel_cache; +- +- /** +- * This option is an alternative to `kernel_cache`. Instead of +- * unconditionally keeping cached data, the cached data is +- * invalidated on open(2) if if the modification time or the +- * size of the file has changed since it was last opened. +- */ +- int auto_cache; +- +- /** +- * The timeout in seconds for which file attributes are cached +- * for the purpose of checking if auto_cache should flush the +- * file data on open. +- */ +- int ac_attr_timeout_set; +- double ac_attr_timeout; +- +- /** +- * If this option is given the file-system handlers for the +- * following operations will not receive path information: +- * read, write, flush, release, fsync, readdir, releasedir, +- * fsyncdir, lock, ioctl and poll. +- * +- * For the truncate, getattr, chmod, chown and utimens +- * operations the path will be provided only if the struct +- * fuse_file_info argument is NULL. +- */ +- int nullpath_ok; +- +- /** +- * The remaining options are used by libfuse internally and +- * should not be touched. +- */ +- int show_help; +- char *modules; +- int debug; ++ /** ++ * If `set_gid` is non-zero, the st_gid attribute of each file ++ * is overwritten with the value of `gid`. ++ */ ++ int set_gid; ++ unsigned int gid; ++ ++ /** ++ * If `set_uid` is non-zero, the st_uid attribute of each file ++ * is overwritten with the value of `uid`. ++ */ ++ int set_uid; ++ unsigned int uid; ++ ++ /** ++ * If `set_mode` is non-zero, the any permissions bits set in ++ * `umask` are unset in the st_mode attribute of each file. ++ */ ++ int set_mode; ++ unsigned int umask; ++ ++ /** ++ * The timeout in seconds for which name lookups will be ++ * cached. ++ */ ++ double entry_timeout; ++ ++ /** ++ * The timeout in seconds for which a negative lookup will be ++ * cached. This means, that if file did not exist (lookup ++ * retuned ENOENT), the lookup will only be redone after the ++ * timeout, and the file/directory will be assumed to not ++ * exist until then. A value of zero means that negative ++ * lookups are not cached. ++ */ ++ double negative_timeout; ++ ++ /** ++ * The timeout in seconds for which file/directory attributes ++ * (as returned by e.g. the `getattr` handler) are cached. ++ */ ++ double attr_timeout; ++ ++ /** ++ * Allow requests to be interrupted ++ */ ++ int intr; ++ ++ /** ++ * Specify which signal number to send to the filesystem when ++ * a request is interrupted. The default is hardcoded to ++ * USR1. ++ */ ++ int intr_signal; ++ ++ /** ++ * Normally, FUSE assigns inodes to paths only for as long as ++ * the kernel is aware of them. With this option inodes are ++ * instead remembered for at least this many seconds. This ++ * will require more memory, but may be necessary when using ++ * applications that make use of inode numbers. ++ * ++ * A number of -1 means that inodes will be remembered for the ++ * entire life-time of the file-system process. ++ */ ++ int remember; ++ ++ /** ++ * The default behavior is that if an open file is deleted, ++ * the file is renamed to a hidden file (.fuse_hiddenXXX), and ++ * only removed when the file is finally released. This ++ * relieves the filesystem implementation of having to deal ++ * with this problem. This option disables the hiding ++ * behavior, and files are removed immediately in an unlink ++ * operation (or in a rename operation which overwrites an ++ * existing file). ++ * ++ * It is recommended that you not use the hard_remove ++ * option. When hard_remove is set, the following libc ++ * functions fail on unlinked files (returning errno of ++ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), ++ * ftruncate(2), fstat(2), fchmod(2), fchown(2) ++ */ ++ int hard_remove; ++ ++ /** ++ * Honor the st_ino field in the functions getattr() and ++ * fill_dir(). This value is used to fill in the st_ino field ++ * in the stat(2), lstat(2), fstat(2) functions and the d_ino ++ * field in the readdir(2) function. The filesystem does not ++ * have to guarantee uniqueness, however some applications ++ * rely on this value being unique for the whole filesystem. ++ * ++ * Note that this does *not* affect the inode that libfuse ++ * and the kernel use internally (also called the "nodeid"). ++ */ ++ int use_ino; ++ ++ /** ++ * If use_ino option is not given, still try to fill in the ++ * d_ino field in readdir(2). If the name was previously ++ * looked up, and is still in the cache, the inode number ++ * found there will be used. Otherwise it will be set to -1. ++ * If use_ino option is given, this option is ignored. ++ */ ++ int readdir_ino; ++ ++ /** ++ * This option disables the use of page cache (file content cache) ++ * in the kernel for this filesystem. This has several affects: ++ * ++ * 1. Each read(2) or write(2) system call will initiate one ++ * or more read or write operations, data will not be ++ * cached in the kernel. ++ * ++ * 2. The return value of the read() and write() system calls ++ * will correspond to the return values of the read and ++ * write operations. This is useful for example if the ++ * file size is not known in advance (before reading it). ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `direct_io` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int direct_io; ++ ++ /** ++ * This option disables flushing the cache of the file ++ * contents on every open(2). This should only be enabled on ++ * filesystems where the file data is never changed ++ * externally (not through the mounted FUSE filesystem). Thus ++ * it is not suitable for network filesystems and other ++ * intermediate filesystems. ++ * ++ * NOTE: if this option is not specified (and neither ++ * direct_io) data is still cached after the open(2), so a ++ * read(2) system call will not always initiate a read ++ * operation. ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `keep_cache` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int kernel_cache; ++ ++ /** ++ * This option is an alternative to `kernel_cache`. Instead of ++ * unconditionally keeping cached data, the cached data is ++ * invalidated on open(2) if if the modification time or the ++ * size of the file has changed since it was last opened. ++ */ ++ int auto_cache; ++ ++ /** ++ * The timeout in seconds for which file attributes are cached ++ * for the purpose of checking if auto_cache should flush the ++ * file data on open. ++ */ ++ int ac_attr_timeout_set; ++ double ac_attr_timeout; ++ ++ /** ++ * If this option is given the file-system handlers for the ++ * following operations will not receive path information: ++ * read, write, flush, release, fsync, readdir, releasedir, ++ * fsyncdir, lock, ioctl and poll. ++ * ++ * For the truncate, getattr, chmod, chown and utimens ++ * operations the path will be provided only if the struct ++ * fuse_file_info argument is NULL. ++ */ ++ int nullpath_ok; ++ ++ /** ++ * The remaining options are used by libfuse internally and ++ * should not be touched. ++ */ ++ int show_help; ++ char *modules; ++ int debug; + }; + + +@@ -293,515 +294,535 @@ struct fuse_config { + * Almost all operations take a path which can be of any length. + */ + struct fuse_operations { +- /** Get file attributes. +- * +- * Similar to stat(). The 'st_dev' and 'st_blksize' fields are +- * ignored. The 'st_ino' field is ignored except if the 'use_ino' +- * mount option is given. In that case it is passed to userspace, +- * but libfuse and the kernel will still assign a different +- * inode for internal use (called the "nodeid"). +- * +- * `fi` will always be NULL if the file is not currently open, but +- * may also be NULL if the file is open. +- */ +- int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); +- +- /** Read the target of a symbolic link +- * +- * The buffer should be filled with a null terminated string. The +- * buffer size argument includes the space for the terminating +- * null character. If the linkname is too long to fit in the +- * buffer, it should be truncated. The return value should be 0 +- * for success. +- */ +- int (*readlink) (const char *, char *, size_t); +- +- /** Create a file node +- * +- * This is called for creation of all non-directory, non-symlink +- * nodes. If the filesystem defines a create() method, then for +- * regular files that will be called instead. +- */ +- int (*mknod) (const char *, mode_t, dev_t); +- +- /** Create a directory +- * +- * Note that the mode argument may not have the type specification +- * bits set, i.e. S_ISDIR(mode) can be false. To obtain the +- * correct directory type bits use mode|S_IFDIR +- * */ +- int (*mkdir) (const char *, mode_t); +- +- /** Remove a file */ +- int (*unlink) (const char *); +- +- /** Remove a directory */ +- int (*rmdir) (const char *); +- +- /** Create a symbolic link */ +- int (*symlink) (const char *, const char *); +- +- /** Rename a file +- * +- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If +- * RENAME_NOREPLACE is specified, the filesystem must not +- * overwrite *newname* if it exists and return an error +- * instead. If `RENAME_EXCHANGE` is specified, the filesystem +- * must atomically exchange the two files, i.e. both must +- * exist and neither may be deleted. +- */ +- int (*rename) (const char *, const char *, unsigned int flags); +- +- /** Create a hard link to a file */ +- int (*link) (const char *, const char *); +- +- /** Change the permission bits of a file +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- */ +- int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); +- +- /** Change the owner and group of a file +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); +- +- /** Change the size of a file +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*truncate) (const char *, off_t, struct fuse_file_info *fi); +- +- /** Open a file +- * +- * Open flags are available in fi->flags. The following rules +- * apply. +- * +- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be +- * filtered out / handled by the kernel. +- * +- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) +- * should be used by the filesystem to check if the operation is +- * permitted. If the ``-o default_permissions`` mount option is +- * given, this check is already done by the kernel before calling +- * open() and may thus be omitted by the filesystem. +- * +- * - When writeback caching is enabled, the kernel may send +- * read requests even for files opened with O_WRONLY. The +- * filesystem should be prepared to handle this. +- * +- * - When writeback caching is disabled, the filesystem is +- * expected to properly handle the O_APPEND flag and ensure +- * that each write is appending to the end of the file. +- * +- * - When writeback caching is enabled, the kernel will +- * handle O_APPEND. However, unless all changes to the file +- * come through the kernel this will not work reliably. The +- * filesystem should thus either ignore the O_APPEND flag +- * (and let the kernel handle it), or return an error +- * (indicating that reliably O_APPEND is not available). +- * +- * Filesystem may store an arbitrary file handle (pointer, +- * index, etc) in fi->fh, and use this in other all other file +- * operations (read, write, flush, release, fsync). +- * +- * Filesystem may also implement stateless file I/O and not store +- * anything in fi->fh. +- * +- * There are also some flags (direct_io, keep_cache) which the +- * filesystem may set in fi, to change the way the file is opened. +- * See fuse_file_info structure in for more details. +- * +- * If this request is answered with an error code of ENOSYS +- * and FUSE_CAP_NO_OPEN_SUPPORT is set in +- * `fuse_conn_info.capable`, this is treated as success and +- * future calls to open will also succeed without being send +- * to the filesystem process. +- * +- */ +- int (*open) (const char *, struct fuse_file_info *); +- +- /** Read data from an open file +- * +- * Read should return exactly the number of bytes requested except +- * on EOF or error, otherwise the rest of the data will be +- * substituted with zeroes. An exception to this is when the +- * 'direct_io' mount option is specified, in which case the return +- * value of the read system call will reflect the return value of +- * this operation. +- */ +- int (*read) (const char *, char *, size_t, off_t, +- struct fuse_file_info *); +- +- /** Write data to an open file +- * +- * Write should return exactly the number of bytes requested +- * except on error. An exception to this is when the 'direct_io' +- * mount option is specified (see read operation). +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*write) (const char *, const char *, size_t, off_t, +- struct fuse_file_info *); +- +- /** Get file system statistics +- * +- * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored +- */ +- int (*statfs) (const char *, struct statvfs *); +- +- /** Possibly flush cached data +- * +- * BIG NOTE: This is not equivalent to fsync(). It's not a +- * request to sync dirty data. +- * +- * Flush is called on each close() of a file descriptor, as opposed to +- * release which is called on the close of the last file descriptor for +- * a file. Under Linux, errors returned by flush() will be passed to +- * userspace as errors from close(), so flush() is a good place to write +- * back any cached dirty data. However, many applications ignore errors +- * on close(), and on non-Linux systems, close() may succeed even if flush() +- * returns an error. For these reasons, filesystems should not assume +- * that errors returned by flush will ever be noticed or even +- * delivered. +- * +- * NOTE: The flush() method may be called more than once for each +- * open(). This happens if more than one file descriptor refers to an +- * open file handle, e.g. due to dup(), dup2() or fork() calls. It is +- * not possible to determine if a flush is final, so each flush should +- * be treated equally. Multiple write-flush sequences are relatively +- * rare, so this shouldn't be a problem. +- * +- * Filesystems shouldn't assume that flush will be called at any +- * particular point. It may be called more times than expected, or not +- * at all. +- * +- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html +- */ +- int (*flush) (const char *, struct fuse_file_info *); +- +- /** Release an open file +- * +- * Release is called when there are no more references to an open +- * file: all file descriptors are closed and all memory mappings +- * are unmapped. +- * +- * For every open() call there will be exactly one release() call +- * with the same flags and file handle. It is possible to +- * have a file opened more than once, in which case only the last +- * release will mean, that no more reads/writes will happen on the +- * file. The return value of release is ignored. +- */ +- int (*release) (const char *, struct fuse_file_info *); +- +- /** Synchronize file contents +- * +- * If the datasync parameter is non-zero, then only the user data +- * should be flushed, not the meta data. +- */ +- int (*fsync) (const char *, int, struct fuse_file_info *); +- +- /** Set extended attributes */ +- int (*setxattr) (const char *, const char *, const char *, size_t, int); +- +- /** Get extended attributes */ +- int (*getxattr) (const char *, const char *, char *, size_t); +- +- /** List extended attributes */ +- int (*listxattr) (const char *, char *, size_t); +- +- /** Remove extended attributes */ +- int (*removexattr) (const char *, const char *); +- +- /** Open directory +- * +- * Unless the 'default_permissions' mount option is given, +- * this method should check if opendir is permitted for this +- * directory. Optionally opendir may also return an arbitrary +- * filehandle in the fuse_file_info structure, which will be +- * passed to readdir, releasedir and fsyncdir. +- */ +- int (*opendir) (const char *, struct fuse_file_info *); +- +- /** Read directory +- * +- * The filesystem may choose between two modes of operation: +- * +- * 1) The readdir implementation ignores the offset parameter, and +- * passes zero to the filler function's offset. The filler +- * function will not return '1' (unless an error happens), so the +- * whole directory is read in a single readdir operation. +- * +- * 2) The readdir implementation keeps track of the offsets of the +- * directory entries. It uses the offset parameter and always +- * passes non-zero offset to the filler function. When the buffer +- * is full (or an error happens) the filler function will return +- * '1'. +- */ +- int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, +- struct fuse_file_info *, enum fuse_readdir_flags); +- +- /** Release directory +- */ +- int (*releasedir) (const char *, struct fuse_file_info *); +- +- /** Synchronize directory contents +- * +- * If the datasync parameter is non-zero, then only the user data +- * should be flushed, not the meta data +- */ +- int (*fsyncdir) (const char *, int, struct fuse_file_info *); +- +- /** +- * Initialize filesystem +- * +- * The return value will passed in the `private_data` field of +- * `struct fuse_context` to all file operations, and as a +- * parameter to the destroy() method. It overrides the initial +- * value provided to fuse_main() / fuse_new(). +- */ +- void *(*init) (struct fuse_conn_info *conn, +- struct fuse_config *cfg); +- +- /** +- * Clean up filesystem +- * +- * Called on filesystem exit. +- */ +- void (*destroy) (void *private_data); +- +- /** +- * Check file access permissions +- * +- * This will be called for the access() system call. If the +- * 'default_permissions' mount option is given, this method is not +- * called. +- * +- * This method is not called under Linux kernel versions 2.4.x +- */ +- int (*access) (const char *, int); +- +- /** +- * Create and open a file +- * +- * If the file does not exist, first create it with the specified +- * mode, and then open it. +- * +- * If this method is not implemented or under Linux kernel +- * versions earlier than 2.6.15, the mknod() and open() methods +- * will be called instead. +- */ +- int (*create) (const char *, mode_t, struct fuse_file_info *); +- +- /** +- * Perform POSIX file locking operation +- * +- * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. +- * +- * For the meaning of fields in 'struct flock' see the man page +- * for fcntl(2). The l_whence field will always be set to +- * SEEK_SET. +- * +- * For checking lock ownership, the 'fuse_file_info->owner' +- * argument must be used. +- * +- * For F_GETLK operation, the library will first check currently +- * held locks, and if a conflicting lock is found it will return +- * information without calling this method. This ensures, that +- * for local locks the l_pid field is correctly filled in. The +- * results may not be accurate in case of race conditions and in +- * the presence of hard links, but it's unlikely that an +- * application would rely on accurate GETLK results in these +- * cases. If a conflicting lock is not found, this method will be +- * called, and the filesystem may fill out l_pid by a meaningful +- * value, or it may leave this field zero. +- * +- * For F_SETLK and F_SETLKW the l_pid field will be set to the pid +- * of the process performing the locking operation. +- * +- * Note: if this method is not implemented, the kernel will still +- * allow file locking to work locally. Hence it is only +- * interesting for network filesystems and similar. +- */ +- int (*lock) (const char *, struct fuse_file_info *, int cmd, +- struct flock *); +- +- /** +- * Change the access and modification times of a file with +- * nanosecond resolution +- * +- * This supersedes the old utime() interface. New applications +- * should use this. +- * +- * `fi` will always be NULL if the file is not currenlty open, but +- * may also be NULL if the file is open. +- * +- * See the utimensat(2) man page for details. +- */ +- int (*utimens) (const char *, const struct timespec tv[2], +- struct fuse_file_info *fi); +- +- /** +- * Map block index within file to block index within device +- * +- * Note: This makes sense only for block device backed filesystems +- * mounted with the 'blkdev' option +- */ +- int (*bmap) (const char *, size_t blocksize, uint64_t *idx); +- +- /** +- * Ioctl +- * +- * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in +- * 64bit environment. The size and direction of data is +- * determined by _IOC_*() decoding of cmd. For _IOC_NONE, +- * data will be NULL, for _IOC_WRITE data is out area, for +- * _IOC_READ in area and if both are set in/out area. In all +- * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. +- * +- * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a +- * directory file handle. +- * +- * Note : the unsigned long request submitted by the application +- * is truncated to 32 bits. +- */ +- int (*ioctl) (const char *, unsigned int cmd, void *arg, +- struct fuse_file_info *, unsigned int flags, void *data); +- +- /** +- * Poll for IO readiness events +- * +- * Note: If ph is non-NULL, the client should notify +- * when IO readiness events occur by calling +- * fuse_notify_poll() with the specified ph. +- * +- * Regardless of the number of times poll with a non-NULL ph +- * is received, single notification is enough to clear all. +- * Notifying more times incurs overhead but doesn't harm +- * correctness. +- * +- * The callee is responsible for destroying ph with +- * fuse_pollhandle_destroy() when no longer in use. +- */ +- int (*poll) (const char *, struct fuse_file_info *, +- struct fuse_pollhandle *ph, unsigned *reventsp); +- +- /** Write contents of buffer to an open file +- * +- * Similar to the write() method, but data is supplied in a +- * generic buffer. Use fuse_buf_copy() to transfer data to +- * the destination. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- */ +- int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, +- struct fuse_file_info *); +- +- /** Store data from an open file in a buffer +- * +- * Similar to the read() method, but data is stored and +- * returned in a generic buffer. +- * +- * No actual copying of data has to take place, the source +- * file descriptor may simply be stored in the buffer for +- * later data transfer. +- * +- * The buffer must be allocated dynamically and stored at the +- * location pointed to by bufp. If the buffer contains memory +- * regions, they too must be allocated using malloc(). The +- * allocated memory will be freed by the caller. +- */ +- int (*read_buf) (const char *, struct fuse_bufvec **bufp, +- size_t size, off_t off, struct fuse_file_info *); +- /** +- * Perform BSD file locking operation +- * +- * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN +- * +- * Nonblocking requests will be indicated by ORing LOCK_NB to +- * the above operations +- * +- * For more information see the flock(2) manual page. +- * +- * Additionally fi->owner will be set to a value unique to +- * this open file. This same value will be supplied to +- * ->release() when the file is released. +- * +- * Note: if this method is not implemented, the kernel will still +- * allow file locking to work locally. Hence it is only +- * interesting for network filesystems and similar. +- */ +- int (*flock) (const char *, struct fuse_file_info *, int op); +- +- /** +- * Allocates space for an open file +- * +- * This function ensures that required space is allocated for specified +- * file. If this function returns success then any subsequent write +- * request to specified range is guaranteed not to fail because of lack +- * of space on the file system media. +- */ +- int (*fallocate) (const char *, int, off_t, off_t, +- struct fuse_file_info *); +- +- /** +- * Copy a range of data from one file to another +- * +- * Performs an optimized copy between two file descriptors without the +- * additional cost of transferring data through the FUSE kernel module +- * to user space (glibc) and then back into the FUSE filesystem again. +- * +- * In case this method is not implemented, glibc falls back to reading +- * data from the source and writing to the destination. Effectively +- * doing an inefficient copy of the data. +- */ +- ssize_t (*copy_file_range) (const char *path_in, +- struct fuse_file_info *fi_in, +- off_t offset_in, const char *path_out, +- struct fuse_file_info *fi_out, +- off_t offset_out, size_t size, int flags); +- +- /** +- * Find next data or hole after the specified offset +- */ +- off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); ++ /** ++ * Get file attributes. ++ * ++ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are ++ * ignored. The 'st_ino' field is ignored except if the 'use_ino' ++ * mount option is given. In that case it is passed to userspace, ++ * but libfuse and the kernel will still assign a different ++ * inode for internal use (called the "nodeid"). ++ * ++ * `fi` will always be NULL if the file is not currently open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*getattr)(const char *, struct stat *, struct fuse_file_info *fi); ++ ++ /** ++ * Read the target of a symbolic link ++ * ++ * The buffer should be filled with a null terminated string. The ++ * buffer size argument includes the space for the terminating ++ * null character. If the linkname is too long to fit in the ++ * buffer, it should be truncated. The return value should be 0 ++ * for success. ++ */ ++ int (*readlink)(const char *, char *, size_t); ++ ++ /** ++ * Create a file node ++ * ++ * This is called for creation of all non-directory, non-symlink ++ * nodes. If the filesystem defines a create() method, then for ++ * regular files that will be called instead. ++ */ ++ int (*mknod)(const char *, mode_t, dev_t); ++ ++ /** ++ * Create a directory ++ * ++ * Note that the mode argument may not have the type specification ++ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the ++ * correct directory type bits use mode|S_IFDIR ++ */ ++ int (*mkdir)(const char *, mode_t); ++ ++ /** Remove a file */ ++ int (*unlink)(const char *); ++ ++ /** Remove a directory */ ++ int (*rmdir)(const char *); ++ ++ /** Create a symbolic link */ ++ int (*symlink)(const char *, const char *); ++ ++ /** ++ * Rename a file ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ */ ++ int (*rename)(const char *, const char *, unsigned int flags); ++ ++ /** Create a hard link to a file */ ++ int (*link)(const char *, const char *); ++ ++ /** ++ * Change the permission bits of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*chmod)(const char *, mode_t, struct fuse_file_info *fi); ++ ++ /** ++ * Change the owner and group of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*chown)(const char *, uid_t, gid_t, struct fuse_file_info *fi); ++ ++ /** ++ * Change the size of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*truncate)(const char *, off_t, struct fuse_file_info *fi); ++ ++ /** ++ * Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) ++ * should be used by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount option is ++ * given, this check is already done by the kernel before calling ++ * open() and may thus be omitted by the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open will also succeed without being send ++ * to the filesystem process. ++ * ++ */ ++ int (*open)(const char *, struct fuse_file_info *); ++ ++ /** ++ * Read data from an open file ++ * ++ * Read should return exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the ++ * 'direct_io' mount option is specified, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ */ ++ int (*read)(const char *, char *, size_t, off_t, struct fuse_file_info *); ++ ++ /** ++ * Write data to an open file ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the 'direct_io' ++ * mount option is specified (see read operation). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write)(const char *, const char *, size_t, off_t, ++ struct fuse_file_info *); ++ ++ /** ++ * Get file system statistics ++ * ++ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored ++ */ ++ int (*statfs)(const char *, struct statvfs *); ++ ++ /** ++ * Possibly flush cached data ++ * ++ * BIG NOTE: This is not equivalent to fsync(). It's not a ++ * request to sync dirty data. ++ * ++ * Flush is called on each close() of a file descriptor, as opposed to ++ * release which is called on the close of the last file descriptor for ++ * a file. Under Linux, errors returned by flush() will be passed to ++ * userspace as errors from close(), so flush() is a good place to write ++ * back any cached dirty data. However, many applications ignore errors ++ * on close(), and on non-Linux systems, close() may succeed even if flush() ++ * returns an error. For these reasons, filesystems should not assume ++ * that errors returned by flush will ever be noticed or even ++ * delivered. ++ * ++ * NOTE: The flush() method may be called more than once for each ++ * open(). This happens if more than one file descriptor refers to an ++ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is ++ * not possible to determine if a flush is final, so each flush should ++ * be treated equally. Multiple write-flush sequences are relatively ++ * rare, so this shouldn't be a problem. ++ * ++ * Filesystems shouldn't assume that flush will be called at any ++ * particular point. It may be called more times than expected, or not ++ * at all. ++ * ++ * [close]: ++ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ int (*flush)(const char *, struct fuse_file_info *); ++ ++ /** ++ * Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open() call there will be exactly one release() call ++ * with the same flags and file handle. It is possible to ++ * have a file opened more than once, in which case only the last ++ * release will mean, that no more reads/writes will happen on the ++ * file. The return value of release is ignored. ++ */ ++ int (*release)(const char *, struct fuse_file_info *); ++ ++ /* ++ * Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ */ ++ int (*fsync)(const char *, int, struct fuse_file_info *); ++ ++ /** Set extended attributes */ ++ int (*setxattr)(const char *, const char *, const char *, size_t, int); ++ ++ /** Get extended attributes */ ++ int (*getxattr)(const char *, const char *, char *, size_t); ++ ++ /** List extended attributes */ ++ int (*listxattr)(const char *, char *, size_t); ++ ++ /** Remove extended attributes */ ++ int (*removexattr)(const char *, const char *); ++ ++ /* ++ * Open directory ++ * ++ * Unless the 'default_permissions' mount option is given, ++ * this method should check if opendir is permitted for this ++ * directory. Optionally opendir may also return an arbitrary ++ * filehandle in the fuse_file_info structure, which will be ++ * passed to readdir, releasedir and fsyncdir. ++ */ ++ int (*opendir)(const char *, struct fuse_file_info *); ++ ++ /* ++ * Read directory ++ * ++ * The filesystem may choose between two modes of operation: ++ * ++ * 1) The readdir implementation ignores the offset parameter, and ++ * passes zero to the filler function's offset. The filler ++ * function will not return '1' (unless an error happens), so the ++ * whole directory is read in a single readdir operation. ++ * ++ * 2) The readdir implementation keeps track of the offsets of the ++ * directory entries. It uses the offset parameter and always ++ * passes non-zero offset to the filler function. When the buffer ++ * is full (or an error happens) the filler function will return ++ * '1'. ++ */ ++ int (*readdir)(const char *, void *, fuse_fill_dir_t, off_t, ++ struct fuse_file_info *, enum fuse_readdir_flags); ++ ++ /** ++ * Release directory ++ */ ++ int (*releasedir)(const char *, struct fuse_file_info *); ++ ++ /** ++ * Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data ++ */ ++ int (*fsyncdir)(const char *, int, struct fuse_file_info *); ++ ++ /** ++ * Initialize filesystem ++ * ++ * The return value will passed in the `private_data` field of ++ * `struct fuse_context` to all file operations, and as a ++ * parameter to the destroy() method. It overrides the initial ++ * value provided to fuse_main() / fuse_new(). ++ */ ++ void *(*init)(struct fuse_conn_info *conn, struct fuse_config *cfg); ++ ++ /** ++ * Clean up filesystem ++ * ++ * Called on filesystem exit. ++ */ ++ void (*destroy)(void *private_data); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() system call. If the ++ * 'default_permissions' mount option is given, this method is not ++ * called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ */ ++ int (*access)(const char *, int); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ */ ++ int (*create)(const char *, mode_t, struct fuse_file_info *); ++ ++ /** ++ * Perform POSIX file locking operation ++ * ++ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. ++ * ++ * For the meaning of fields in 'struct flock' see the man page ++ * for fcntl(2). The l_whence field will always be set to ++ * SEEK_SET. ++ * ++ * For checking lock ownership, the 'fuse_file_info->owner' ++ * argument must be used. ++ * ++ * For F_GETLK operation, the library will first check currently ++ * held locks, and if a conflicting lock is found it will return ++ * information without calling this method. This ensures, that ++ * for local locks the l_pid field is correctly filled in. The ++ * results may not be accurate in case of race conditions and in ++ * the presence of hard links, but it's unlikely that an ++ * application would rely on accurate GETLK results in these ++ * cases. If a conflicting lock is not found, this method will be ++ * called, and the filesystem may fill out l_pid by a meaningful ++ * value, or it may leave this field zero. ++ * ++ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid ++ * of the process performing the locking operation. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*lock)(const char *, struct fuse_file_info *, int cmd, struct flock *); ++ ++ /** ++ * Change the access and modification times of a file with ++ * nanosecond resolution ++ * ++ * This supersedes the old utime() interface. New applications ++ * should use this. ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * See the utimensat(2) man page for details. ++ */ ++ int (*utimens)(const char *, const struct timespec tv[2], ++ struct fuse_file_info *fi); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ */ ++ int (*bmap)(const char *, size_t blocksize, uint64_t *idx); ++ ++ /** ++ * Ioctl ++ * ++ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in ++ * 64bit environment. The size and direction of data is ++ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, ++ * data will be NULL, for _IOC_WRITE data is out area, for ++ * _IOC_READ in area and if both are set in/out area. In all ++ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. ++ * ++ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a ++ * directory file handle. ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ int (*ioctl)(const char *, unsigned int cmd, void *arg, ++ struct fuse_file_info *, unsigned int flags, void *data); ++ ++ /** ++ * Poll for IO readiness events ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ */ ++ int (*poll)(const char *, struct fuse_file_info *, ++ struct fuse_pollhandle *ph, unsigned *reventsp); ++ ++ /* ++ * Write contents of buffer to an open file ++ * ++ * Similar to the write() method, but data is supplied in a ++ * generic buffer. Use fuse_buf_copy() to transfer data to ++ * the destination. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write_buf)(const char *, struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *); ++ ++ /* ++ * Store data from an open file in a buffer ++ * ++ * Similar to the read() method, but data is stored and ++ * returned in a generic buffer. ++ * ++ * No actual copying of data has to take place, the source ++ * file descriptor may simply be stored in the buffer for ++ * later data transfer. ++ * ++ * The buffer must be allocated dynamically and stored at the ++ * location pointed to by bufp. If the buffer contains memory ++ * regions, they too must be allocated using malloc(). The ++ * allocated memory will be freed by the caller. ++ */ ++ int (*read_buf)(const char *, struct fuse_bufvec **bufp, size_t size, ++ off_t off, struct fuse_file_info *); ++ /** ++ * Perform BSD file locking operation ++ * ++ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN ++ * ++ * Nonblocking requests will be indicated by ORing LOCK_NB to ++ * the above operations ++ * ++ * For more information see the flock(2) manual page. ++ * ++ * Additionally fi->owner will be set to a value unique to ++ * this open file. This same value will be supplied to ++ * ->release() when the file is released. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*flock)(const char *, struct fuse_file_info *, int op); ++ ++ /** ++ * Allocates space for an open file ++ * ++ * This function ensures that required space is allocated for specified ++ * file. If this function returns success then any subsequent write ++ * request to specified range is guaranteed not to fail because of lack ++ * of space on the file system media. ++ */ ++ int (*fallocate)(const char *, int, off_t, off_t, struct fuse_file_info *); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ */ ++ ssize_t (*copy_file_range)(const char *path_in, ++ struct fuse_file_info *fi_in, off_t offset_in, ++ const char *path_out, ++ struct fuse_file_info *fi_out, off_t offset_out, ++ size_t size, int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ */ ++ off_t (*lseek)(const char *, off_t off, int whence, ++ struct fuse_file_info *); + }; + +-/** Extra context that may be needed by some filesystems ++/* ++ * Extra context that may be needed by some filesystems + * + * The uid, gid and pid fields are not filled in case of a writepage + * operation. + */ + struct fuse_context { +- /** Pointer to the fuse object */ +- struct fuse *fuse; ++ /** Pointer to the fuse object */ ++ struct fuse *fuse; + +- /** User ID of the calling process */ +- uid_t uid; ++ /** User ID of the calling process */ ++ uid_t uid; + +- /** Group ID of the calling process */ +- gid_t gid; ++ /** Group ID of the calling process */ ++ gid_t gid; + +- /** Process ID of the calling thread */ +- pid_t pid; ++ /** Process ID of the calling thread */ ++ pid_t pid; + +- /** Private filesystem data */ +- void *private_data; ++ /** Private filesystem data */ ++ void *private_data; + +- /** Umask of the calling process */ +- mode_t umask; ++ /** Umask of the calling process */ ++ mode_t umask; + }; + + /** +@@ -859,15 +880,15 @@ struct fuse_context { + * Example usage, see hello.c + */ + /* +- int fuse_main(int argc, char *argv[], const struct fuse_operations *op, +- void *private_data); +-*/ +-#define fuse_main(argc, argv, op, private_data) \ +- fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) ++ * int fuse_main(int argc, char *argv[], const struct fuse_operations *op, ++ * void *private_data); ++ */ ++#define fuse_main(argc, argv, op, private_data) \ ++ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) + +-/* ----------------------------------------------------------- * +- * More detailed API * +- * ----------------------------------------------------------- */ ++/* ++ * More detailed API ++ */ + + /** + * Print available options (high- and low-level) to stdout. This is +@@ -910,12 +931,13 @@ void fuse_lib_help(struct fuse_args *args); + * @return the created FUSE handle + */ + #if FUSE_USE_VERSION == 30 +-struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, +- size_t op_size, void *private_data); ++struct fuse *fuse_new_30(struct fuse_args *args, ++ const struct fuse_operations *op, size_t op_size, ++ void *private_data); + #define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) + #else + struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, +- size_t op_size, void *private_data); ++ size_t op_size, void *private_data); + #endif + + /** +@@ -940,7 +962,7 @@ void fuse_unmount(struct fuse *f); + /** + * Destroy the FUSE handle. + * +- * NOTE: This function does not unmount the filesystem. If this is ++ * NOTE: This function does not unmount the filesystem. If this is + * needed, call fuse_unmount() before calling this function. + * + * @param f the FUSE handle +@@ -1030,7 +1052,7 @@ int fuse_invalidate_path(struct fuse *f, const char *path); + * Do not call this directly, use fuse_main() + */ + int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, +- size_t op_size, void *private_data); ++ size_t op_size, void *private_data); + + /** + * Start the cleanup thread when using option "remember". +@@ -1081,89 +1103,87 @@ struct fuse_fs; + */ + + int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, +- struct fuse_file_info *fi); +-int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, +- const char *newpath, unsigned int flags); ++ struct fuse_file_info *fi); ++int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath, ++ unsigned int flags); + int fuse_fs_unlink(struct fuse_fs *fs, const char *path); + int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); +-int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, +- const char *path); ++int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path); + int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); +-int fuse_fs_release(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++int fuse_fs_release(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); + int fuse_fs_open(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, +- off_t off, struct fuse_file_info *fi); ++ off_t off, struct fuse_file_info *fi); + int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, +- struct fuse_bufvec **bufp, size_t size, off_t off, +- struct fuse_file_info *fi); ++ struct fuse_bufvec **bufp, size_t size, off_t off, ++ struct fuse_file_info *fi); + int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, +- size_t size, off_t off, struct fuse_file_info *fi); ++ size_t size, off_t off, struct fuse_file_info *fi); + int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, +- struct fuse_bufvec *buf, off_t off, +- struct fuse_file_info *fi); ++ struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *fi); + int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_flush(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); + int fuse_fs_opendir(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, +- fuse_fill_dir_t filler, off_t off, +- struct fuse_file_info *fi, enum fuse_readdir_flags flags); ++ fuse_fill_dir_t filler, off_t off, ++ struct fuse_file_info *fi, enum fuse_readdir_flags flags); + int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_lock(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi, int cmd, struct flock *lock); ++ struct fuse_file_info *fi, int cmd, struct flock *lock); + int fuse_fs_flock(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi, int op); ++ struct fuse_file_info *fi, int op); + int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + int fuse_fs_utimens(struct fuse_fs *fs, const char *path, +- const struct timespec tv[2], struct fuse_file_info *fi); ++ const struct timespec tv[2], struct fuse_file_info *fi); + int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); + int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, +- size_t len); ++ size_t len); + int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, +- dev_t rdev); ++ dev_t rdev); + int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); + int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, +- const char *value, size_t size, int flags); ++ const char *value, size_t size, int flags); + int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, +- char *value, size_t size); ++ char *value, size_t size); + int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, +- size_t size); +-int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, +- const char *name); ++ size_t size); ++int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, const char *name); + int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, +- uint64_t *idx); ++ uint64_t *idx); + int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, +- void *arg, struct fuse_file_info *fi, unsigned int flags, +- void *data); ++ void *arg, struct fuse_file_info *fi, unsigned int flags, ++ void *data); + int fuse_fs_poll(struct fuse_fs *fs, const char *path, +- struct fuse_file_info *fi, struct fuse_pollhandle *ph, +- unsigned *reventsp); ++ struct fuse_file_info *fi, struct fuse_pollhandle *ph, ++ unsigned *reventsp); + int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, +- off_t offset, off_t length, struct fuse_file_info *fi); ++ off_t offset, off_t length, struct fuse_file_info *fi); + ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, +- struct fuse_file_info *fi_in, off_t off_in, +- const char *path_out, +- struct fuse_file_info *fi_out, off_t off_out, +- size_t len, int flags); ++ struct fuse_file_info *fi_in, off_t off_in, ++ const char *path_out, ++ struct fuse_file_info *fi_out, off_t off_out, ++ size_t len, int flags); + off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, +- struct fuse_file_info *fi); ++ struct fuse_file_info *fi); + void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, +- struct fuse_config *cfg); ++ struct fuse_config *cfg); + void fuse_fs_destroy(struct fuse_fs *fs); + + int fuse_notify_poll(struct fuse_pollhandle *ph); +@@ -1182,7 +1202,7 @@ int fuse_notify_poll(struct fuse_pollhandle *ph); + * @return a new filesystem object + */ + struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, +- void *private_data); ++ void *private_data); + + /** + * Factory for creating filesystem objects +@@ -1199,7 +1219,7 @@ struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, + * @return the new filesystem object + */ + typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, +- struct fuse_fs *fs[]); ++ struct fuse_fs *fs[]); + /** + * Register filesystem module + * +@@ -1211,7 +1231,7 @@ typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, + * @param factory_ the factory function for this filesystem module + */ + #define FUSE_REGISTER_MODULE(name_, factory_) \ +- fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ ++ fuse_module_factory_t fuse_module_##name_##_factory = factory_ + + /** Get session from fuse object */ + struct fuse_session *fuse_get_session(struct fuse *f); +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index bf8f8cc..bd9bf86 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -1,21 +1,23 @@ +-/* FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++/* ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + /** @file */ + + #if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) +-#error "Never include directly; use or instead." ++#error \ ++ "Never include directly; use or instead." + #endif + + #ifndef FUSE_COMMON_H_ + #define FUSE_COMMON_H_ + +-#include "fuse_opt.h" + #include "fuse_log.h" ++#include "fuse_opt.h" + #include + #include + +@@ -25,7 +27,7 @@ + /** Minor version of FUSE library interface */ + #define FUSE_MINOR_VERSION 2 + +-#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) ++#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) + #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) + + /** +@@ -38,67 +40,83 @@ + * descriptors can share a single file handle. + */ + struct fuse_file_info { +- /** Open flags. Available in open() and release() */ +- int flags; +- +- /** In case of a write operation indicates if this was caused +- by a delayed write from the page cache. If so, then the +- context's pid, uid, and gid fields will not be valid, and +- the *fh* value may not match the *fh* value that would +- have been sent with the corresponding individual write +- requests if write caching had been disabled. */ +- unsigned int writepage : 1; +- +- /** Can be filled in by open, to use direct I/O on this file. */ +- unsigned int direct_io : 1; +- +- /** Can be filled in by open. It signals the kernel that any +- currently cached file data (ie., data that the filesystem +- provided the last time the file was open) need not be +- invalidated. Has no effect when set in other contexts (in +- particular it does nothing when set by opendir()). */ +- unsigned int keep_cache : 1; +- +- /** Indicates a flush operation. Set in flush operation, also +- maybe set in highlevel lock operation and lowlevel release +- operation. */ +- unsigned int flush : 1; +- +- /** Can be filled in by open, to indicate that the file is not +- seekable. */ +- unsigned int nonseekable : 1; +- +- /* Indicates that flock locks for this file should be +- released. If set, lock_owner shall contain a valid value. +- May only be set in ->release(). */ +- unsigned int flock_release : 1; +- +- /** Can be filled in by opendir. It signals the kernel to +- enable caching of entries returned by readdir(). Has no +- effect when set in other contexts (in particular it does +- nothing when set by open()). */ +- unsigned int cache_readdir : 1; +- +- /** Padding. Reserved for future use*/ +- unsigned int padding : 25; +- unsigned int padding2 : 32; +- +- /** File handle id. May be filled in by filesystem in create, +- * open, and opendir(). Available in most other file operations on the +- * same file handle. */ +- uint64_t fh; +- +- /** Lock owner id. Available in locking operations and flush */ +- uint64_t lock_owner; +- +- /** Requested poll events. Available in ->poll. Only set on kernels +- which support it. If unsupported, this field is set to zero. */ +- uint32_t poll_events; ++ /** Open flags. Available in open() and release() */ ++ int flags; ++ ++ /* ++ * In case of a write operation indicates if this was caused ++ * by a delayed write from the page cache. If so, then the ++ * context's pid, uid, and gid fields will not be valid, and ++ * the *fh* value may not match the *fh* value that would ++ * have been sent with the corresponding individual write ++ * requests if write caching had been disabled. ++ */ ++ unsigned int writepage:1; ++ ++ /** Can be filled in by open, to use direct I/O on this file. */ ++ unsigned int direct_io:1; ++ ++ /* ++ * Can be filled in by open. It signals the kernel that any ++ * currently cached file data (ie., data that the filesystem ++ * provided the last time the file was open) need not be ++ * invalidated. Has no effect when set in other contexts (in ++ * particular it does nothing when set by opendir()). ++ */ ++ unsigned int keep_cache:1; ++ ++ /* ++ * Indicates a flush operation. Set in flush operation, also ++ * maybe set in highlevel lock operation and lowlevel release ++ * operation. ++ */ ++ unsigned int flush:1; ++ ++ /* ++ * Can be filled in by open, to indicate that the file is not ++ * seekable. ++ */ ++ unsigned int nonseekable:1; ++ ++ /* ++ * Indicates that flock locks for this file should be ++ * released. If set, lock_owner shall contain a valid value. ++ * May only be set in ->release(). ++ */ ++ unsigned int flock_release:1; ++ ++ /* ++ * Can be filled in by opendir. It signals the kernel to ++ * enable caching of entries returned by readdir(). Has no ++ * effect when set in other contexts (in particular it does ++ * nothing when set by open()). ++ */ ++ unsigned int cache_readdir:1; ++ ++ /** Padding. Reserved for future use*/ ++ unsigned int padding:25; ++ unsigned int padding2:32; ++ ++ /* ++ * File handle id. May be filled in by filesystem in create, ++ * open, and opendir(). Available in most other file operations on the ++ * same file handle. ++ */ ++ uint64_t fh; ++ ++ /** Lock owner id. Available in locking operations and flush */ ++ uint64_t lock_owner; ++ ++ /* ++ * Requested poll events. Available in ->poll. Only set on kernels ++ * which support it. If unsupported, this field is set to zero. ++ */ ++ uint32_t poll_events; + }; + +-/************************************************************************** +- * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * +- **************************************************************************/ ++/* ++ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' ++ */ + + /** + * Indicates that the filesystem supports asynchronous read requests. +@@ -110,7 +128,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_ASYNC_READ (1 << 0) ++#define FUSE_CAP_ASYNC_READ (1 << 0) + + /** + * Indicates that the filesystem supports "remote" locking. +@@ -118,7 +136,7 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel, + * and if getlk() and setlk() handlers are implemented. + */ +-#define FUSE_CAP_POSIX_LOCKS (1 << 1) ++#define FUSE_CAP_POSIX_LOCKS (1 << 1) + + /** + * Indicates that the filesystem supports the O_TRUNC open flag. If +@@ -127,14 +145,14 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) ++#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) + + /** + * Indicates that the filesystem supports lookups of "." and "..". + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) ++#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) + + /** + * Indicates that the kernel should not apply the umask to the +@@ -142,7 +160,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_DONT_MASK (1 << 6) ++#define FUSE_CAP_DONT_MASK (1 << 6) + + /** + * Indicates that libfuse should try to use splice() when writing to +@@ -150,7 +168,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_SPLICE_WRITE (1 << 7) ++#define FUSE_CAP_SPLICE_WRITE (1 << 7) + + /** + * Indicates that libfuse should try to move pages instead of copying when +@@ -158,7 +176,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_SPLICE_MOVE (1 << 8) ++#define FUSE_CAP_SPLICE_MOVE (1 << 8) + + /** + * Indicates that libfuse should try to use splice() when reading from +@@ -167,7 +185,7 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel and + * if the filesystem implements a write_buf() handler. + */ +-#define FUSE_CAP_SPLICE_READ (1 << 9) ++#define FUSE_CAP_SPLICE_READ (1 << 9) + + /** + * If set, the calls to flock(2) will be emulated using POSIX locks and must +@@ -180,14 +198,14 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel and + * if the filesystem implements a flock() handler. + */ +-#define FUSE_CAP_FLOCK_LOCKS (1 << 10) ++#define FUSE_CAP_FLOCK_LOCKS (1 << 10) + + /** + * Indicates that the filesystem supports ioctl's on directories. + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_IOCTL_DIR (1 << 11) ++#define FUSE_CAP_IOCTL_DIR (1 << 11) + + /** + * Traditionally, while a file is open the FUSE kernel module only +@@ -209,7 +227,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) ++#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) + + /** + * Indicates that the filesystem supports readdirplus. +@@ -217,7 +235,7 @@ struct fuse_file_info { + * This feature is enabled by default when supported by the kernel and if the + * filesystem implements a readdirplus() handler. + */ +-#define FUSE_CAP_READDIRPLUS (1 << 13) ++#define FUSE_CAP_READDIRPLUS (1 << 13) + + /** + * Indicates that the filesystem supports adaptive readdirplus. +@@ -245,7 +263,7 @@ struct fuse_file_info { + * if the filesystem implements both a readdirplus() and a readdir() + * handler. + */ +-#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) ++#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) + + /** + * Indicates that the filesystem supports asynchronous direct I/O submission. +@@ -256,7 +274,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_ASYNC_DIO (1 << 15) ++#define FUSE_CAP_ASYNC_DIO (1 << 15) + + /** + * Indicates that writeback caching should be enabled. This means that +@@ -265,7 +283,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) ++#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) + + /** + * Indicates support for zero-message opens. If this flag is set in +@@ -278,7 +296,7 @@ struct fuse_file_info { + * Setting (or unsetting) this flag in the `want` field has *no + * effect*. + */ +-#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) ++#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) + + /** + * Indicates support for parallel directory operations. If this flag +@@ -288,7 +306,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) ++#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) + + /** + * Indicates support for POSIX ACLs. +@@ -307,7 +325,7 @@ struct fuse_file_info { + * + * This feature is disabled by default. + */ +-#define FUSE_CAP_POSIX_ACL (1 << 19) ++#define FUSE_CAP_POSIX_ACL (1 << 19) + + /** + * Indicates that the filesystem is responsible for unsetting +@@ -316,7 +334,7 @@ struct fuse_file_info { + * + * This feature is enabled by default when supported by the kernel. + */ +-#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) ++#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) + + /** + * Indicates support for zero-message opendirs. If this flag is set in +@@ -328,7 +346,7 @@ struct fuse_file_info { + * + * Setting (or unsetting) this flag in the `want` field has *no effect*. + */ +-#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) ++#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) + + /** + * Ioctl flags +@@ -340,12 +358,12 @@ struct fuse_file_info { + * + * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs + */ +-#define FUSE_IOCTL_COMPAT (1 << 0) +-#define FUSE_IOCTL_UNRESTRICTED (1 << 1) +-#define FUSE_IOCTL_RETRY (1 << 2) +-#define FUSE_IOCTL_DIR (1 << 4) ++#define FUSE_IOCTL_COMPAT (1 << 0) ++#define FUSE_IOCTL_UNRESTRICTED (1 << 1) ++#define FUSE_IOCTL_RETRY (1 << 2) ++#define FUSE_IOCTL_DIR (1 << 4) + +-#define FUSE_IOCTL_MAX_IOV 256 ++#define FUSE_IOCTL_MAX_IOV 256 + + /** + * Connection information, passed to the ->init() method +@@ -355,114 +373,114 @@ struct fuse_file_info { + * value must usually be smaller than the indicated value. + */ + struct fuse_conn_info { +- /** +- * Major version of the protocol (read-only) +- */ +- unsigned proto_major; +- +- /** +- * Minor version of the protocol (read-only) +- */ +- unsigned proto_minor; +- +- /** +- * Maximum size of the write buffer +- */ +- unsigned max_write; +- +- /** +- * Maximum size of read requests. A value of zero indicates no +- * limit. However, even if the filesystem does not specify a +- * limit, the maximum size of read requests will still be +- * limited by the kernel. +- * +- * NOTE: For the time being, the maximum size of read requests +- * must be set both here *and* passed to fuse_session_new() +- * using the ``-o max_read=`` mount option. At some point +- * in the future, specifying the mount option will no longer +- * be necessary. +- */ +- unsigned max_read; +- +- /** +- * Maximum readahead +- */ +- unsigned max_readahead; +- +- /** +- * Capability flags that the kernel supports (read-only) +- */ +- unsigned capable; +- +- /** +- * Capability flags that the filesystem wants to enable. +- * +- * libfuse attempts to initialize this field with +- * reasonable default values before calling the init() handler. +- */ +- unsigned want; +- +- /** +- * Maximum number of pending "background" requests. A +- * background request is any type of request for which the +- * total number is not limited by other means. As of kernel +- * 4.8, only two types of requests fall into this category: +- * +- * 1. Read-ahead requests +- * 2. Asynchronous direct I/O requests +- * +- * Read-ahead requests are generated (if max_readahead is +- * non-zero) by the kernel to preemptively fill its caches +- * when it anticipates that userspace will soon read more +- * data. +- * +- * Asynchronous direct I/O requests are generated if +- * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large +- * direct I/O request. In this case the kernel will internally +- * split it up into multiple smaller requests and submit them +- * to the filesystem concurrently. +- * +- * Note that the following requests are *not* background +- * requests: writeback requests (limited by the kernel's +- * flusher algorithm), regular (i.e., synchronous and +- * buffered) userspace read/write requests (limited to one per +- * thread), asynchronous read requests (Linux's io_submit(2) +- * call actually blocks, so these are also limited to one per +- * thread). +- */ +- unsigned max_background; +- +- /** +- * Kernel congestion threshold parameter. If the number of pending +- * background requests exceeds this number, the FUSE kernel module will +- * mark the filesystem as "congested". This instructs the kernel to +- * expect that queued requests will take some time to complete, and to +- * adjust its algorithms accordingly (e.g. by putting a waiting thread +- * to sleep instead of using a busy-loop). +- */ +- unsigned congestion_threshold; +- +- /** +- * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible +- * for updating mtime and ctime when write requests are received. The +- * updated values are passed to the filesystem with setattr() requests. +- * However, if the filesystem does not support the full resolution of +- * the kernel timestamps (nanoseconds), the mtime and ctime values used +- * by kernel and filesystem will differ (and result in an apparent +- * change of times after a cache flush). +- * +- * To prevent this problem, this variable can be used to inform the +- * kernel about the timestamp granularity supported by the file-system. +- * The value should be power of 10. The default is 1, i.e. full +- * nano-second resolution. Filesystems supporting only second resolution +- * should set this to 1000000000. +- */ +- unsigned time_gran; +- +- /** +- * For future use. +- */ +- unsigned reserved[22]; ++ /** ++ * Major version of the protocol (read-only) ++ */ ++ unsigned proto_major; ++ ++ /** ++ * Minor version of the protocol (read-only) ++ */ ++ unsigned proto_minor; ++ ++ /** ++ * Maximum size of the write buffer ++ */ ++ unsigned max_write; ++ ++ /** ++ * Maximum size of read requests. A value of zero indicates no ++ * limit. However, even if the filesystem does not specify a ++ * limit, the maximum size of read requests will still be ++ * limited by the kernel. ++ * ++ * NOTE: For the time being, the maximum size of read requests ++ * must be set both here *and* passed to fuse_session_new() ++ * using the ``-o max_read=`` mount option. At some point ++ * in the future, specifying the mount option will no longer ++ * be necessary. ++ */ ++ unsigned max_read; ++ ++ /** ++ * Maximum readahead ++ */ ++ unsigned max_readahead; ++ ++ /** ++ * Capability flags that the kernel supports (read-only) ++ */ ++ unsigned capable; ++ ++ /** ++ * Capability flags that the filesystem wants to enable. ++ * ++ * libfuse attempts to initialize this field with ++ * reasonable default values before calling the init() handler. ++ */ ++ unsigned want; ++ ++ /** ++ * Maximum number of pending "background" requests. A ++ * background request is any type of request for which the ++ * total number is not limited by other means. As of kernel ++ * 4.8, only two types of requests fall into this category: ++ * ++ * 1. Read-ahead requests ++ * 2. Asynchronous direct I/O requests ++ * ++ * Read-ahead requests are generated (if max_readahead is ++ * non-zero) by the kernel to preemptively fill its caches ++ * when it anticipates that userspace will soon read more ++ * data. ++ * ++ * Asynchronous direct I/O requests are generated if ++ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large ++ * direct I/O request. In this case the kernel will internally ++ * split it up into multiple smaller requests and submit them ++ * to the filesystem concurrently. ++ * ++ * Note that the following requests are *not* background ++ * requests: writeback requests (limited by the kernel's ++ * flusher algorithm), regular (i.e., synchronous and ++ * buffered) userspace read/write requests (limited to one per ++ * thread), asynchronous read requests (Linux's io_submit(2) ++ * call actually blocks, so these are also limited to one per ++ * thread). ++ */ ++ unsigned max_background; ++ ++ /** ++ * Kernel congestion threshold parameter. If the number of pending ++ * background requests exceeds this number, the FUSE kernel module will ++ * mark the filesystem as "congested". This instructs the kernel to ++ * expect that queued requests will take some time to complete, and to ++ * adjust its algorithms accordingly (e.g. by putting a waiting thread ++ * to sleep instead of using a busy-loop). ++ */ ++ unsigned congestion_threshold; ++ ++ /** ++ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible ++ * for updating mtime and ctime when write requests are received. The ++ * updated values are passed to the filesystem with setattr() requests. ++ * However, if the filesystem does not support the full resolution of ++ * the kernel timestamps (nanoseconds), the mtime and ctime values used ++ * by kernel and filesystem will differ (and result in an apparent ++ * change of times after a cache flush). ++ * ++ * To prevent this problem, this variable can be used to inform the ++ * kernel about the timestamp granularity supported by the file-system. ++ * The value should be power of 10. The default is 1, i.e. full ++ * nano-second resolution. Filesystems supporting only second resolution ++ * should set this to 1000000000. ++ */ ++ unsigned time_gran; ++ ++ /** ++ * For future use. ++ */ ++ unsigned reserved[22]; + }; + + struct fuse_session; +@@ -489,21 +507,20 @@ struct fuse_conn_info_opts; + * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want + * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want + * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want +- * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock +- * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want +- * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want +- * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want +- * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want +- * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want +- * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want +- * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want +- * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets +- * FUSE_CAP_READDIRPLUS_AUTO in conn->want +- * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and +- * FUSE_CAP_READDIRPLUS_AUTO in conn->want +- * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want +- * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want +- * -o time_gran=N sets conn->time_gran ++ * -o no_remote_lock Equivalent to -o ++ *no_remote_flock,no_remote_posix_lock -o no_remote_flock Unsets ++ *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock Unsets ++ *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write (un-)sets ++ *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move (un-)sets ++ *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read (un-)sets ++ *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data (un-)sets ++ *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no unsets ++ *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes sets ++ *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o ++ *readdirplus=auto sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO ++ *in conn->want -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in ++ *conn->want -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in ++ *conn->want -o time_gran=N sets conn->time_gran + * + * Known options will be removed from *args*, unknown options will be + * passed through unchanged. +@@ -511,7 +528,7 @@ struct fuse_conn_info_opts; + * @param args argument vector (input+output) + * @return parsed options + **/ +-struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); ++struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args); + + /** + * This function applies the (parsed) parameters in *opts* to the +@@ -521,7 +538,7 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); + * option has been explicitly set. + */ + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, +- struct fuse_conn_info *conn); ++ struct fuse_conn_info *conn); + + /** + * Go into the background +@@ -552,81 +569,81 @@ const char *fuse_pkgversion(void); + */ + void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); + +-/* ----------------------------------------------------------- * +- * Data buffer * +- * ----------------------------------------------------------- */ ++/* ++ * Data buffer ++ */ + + /** + * Buffer flags + */ + enum fuse_buf_flags { +- /** +- * Buffer contains a file descriptor +- * +- * If this flag is set, the .fd field is valid, otherwise the +- * .mem fields is valid. +- */ +- FUSE_BUF_IS_FD = (1 << 1), +- +- /** +- * Seek on the file descriptor +- * +- * If this flag is set then the .pos field is valid and is +- * used to seek to the given offset before performing +- * operation on file descriptor. +- */ +- FUSE_BUF_FD_SEEK = (1 << 2), +- +- /** +- * Retry operation on file descriptor +- * +- * If this flag is set then retry operation on file descriptor +- * until .size bytes have been copied or an error or EOF is +- * detected. +- */ +- FUSE_BUF_FD_RETRY = (1 << 3), ++ /** ++ * Buffer contains a file descriptor ++ * ++ * If this flag is set, the .fd field is valid, otherwise the ++ * .mem fields is valid. ++ */ ++ FUSE_BUF_IS_FD = (1 << 1), ++ ++ /** ++ * Seek on the file descriptor ++ * ++ * If this flag is set then the .pos field is valid and is ++ * used to seek to the given offset before performing ++ * operation on file descriptor. ++ */ ++ FUSE_BUF_FD_SEEK = (1 << 2), ++ ++ /** ++ * Retry operation on file descriptor ++ * ++ * If this flag is set then retry operation on file descriptor ++ * until .size bytes have been copied or an error or EOF is ++ * detected. ++ */ ++ FUSE_BUF_FD_RETRY = (1 << 3), + }; + + /** + * Buffer copy flags + */ + enum fuse_buf_copy_flags { +- /** +- * Don't use splice(2) +- * +- * Always fall back to using read and write instead of +- * splice(2) to copy data from one file descriptor to another. +- * +- * If this flag is not set, then only fall back if splice is +- * unavailable. +- */ +- FUSE_BUF_NO_SPLICE = (1 << 1), +- +- /** +- * Force splice +- * +- * Always use splice(2) to copy data from one file descriptor +- * to another. If splice is not available, return -EINVAL. +- */ +- FUSE_BUF_FORCE_SPLICE = (1 << 2), +- +- /** +- * Try to move data with splice. +- * +- * If splice is used, try to move pages from the source to the +- * destination instead of copying. See documentation of +- * SPLICE_F_MOVE in splice(2) man page. +- */ +- FUSE_BUF_SPLICE_MOVE = (1 << 3), +- +- /** +- * Don't block on the pipe when copying data with splice +- * +- * Makes the operations on the pipe non-blocking (if the pipe +- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) +- * man page. +- */ +- FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), ++ /** ++ * Don't use splice(2) ++ * ++ * Always fall back to using read and write instead of ++ * splice(2) to copy data from one file descriptor to another. ++ * ++ * If this flag is not set, then only fall back if splice is ++ * unavailable. ++ */ ++ FUSE_BUF_NO_SPLICE = (1 << 1), ++ ++ /** ++ * Force splice ++ * ++ * Always use splice(2) to copy data from one file descriptor ++ * to another. If splice is not available, return -EINVAL. ++ */ ++ FUSE_BUF_FORCE_SPLICE = (1 << 2), ++ ++ /** ++ * Try to move data with splice. ++ * ++ * If splice is used, try to move pages from the source to the ++ * destination instead of copying. See documentation of ++ * SPLICE_F_MOVE in splice(2) man page. ++ */ ++ FUSE_BUF_SPLICE_MOVE = (1 << 3), ++ ++ /** ++ * Don't block on the pipe when copying data with splice ++ * ++ * Makes the operations on the pipe non-blocking (if the pipe ++ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) ++ * man page. ++ */ ++ FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), + }; + + /** +@@ -636,36 +653,36 @@ enum fuse_buf_copy_flags { + * be supplied as a memory pointer or as a file descriptor + */ + struct fuse_buf { +- /** +- * Size of data in bytes +- */ +- size_t size; +- +- /** +- * Buffer flags +- */ +- enum fuse_buf_flags flags; +- +- /** +- * Memory pointer +- * +- * Used unless FUSE_BUF_IS_FD flag is set. +- */ +- void *mem; +- +- /** +- * File descriptor +- * +- * Used if FUSE_BUF_IS_FD flag is set. +- */ +- int fd; +- +- /** +- * File position +- * +- * Used if FUSE_BUF_FD_SEEK flag is set. +- */ +- off_t pos; ++ /** ++ * Size of data in bytes ++ */ ++ size_t size; ++ ++ /** ++ * Buffer flags ++ */ ++ enum fuse_buf_flags flags; ++ ++ /** ++ * Memory pointer ++ * ++ * Used unless FUSE_BUF_IS_FD flag is set. ++ */ ++ void *mem; ++ ++ /** ++ * File descriptor ++ * ++ * Used if FUSE_BUF_IS_FD flag is set. ++ */ ++ int fd; ++ ++ /** ++ * File position ++ * ++ * Used if FUSE_BUF_FD_SEEK flag is set. ++ */ ++ off_t pos; + }; + + /** +@@ -677,41 +694,39 @@ struct fuse_buf { + * Allocate dynamically to add more than one buffer. + */ + struct fuse_bufvec { +- /** +- * Number of buffers in the array +- */ +- size_t count; +- +- /** +- * Index of current buffer within the array +- */ +- size_t idx; +- +- /** +- * Current offset within the current buffer +- */ +- size_t off; +- +- /** +- * Array of buffers +- */ +- struct fuse_buf buf[1]; ++ /** ++ * Number of buffers in the array ++ */ ++ size_t count; ++ ++ /** ++ * Index of current buffer within the array ++ */ ++ size_t idx; ++ ++ /** ++ * Current offset within the current buffer ++ */ ++ size_t off; ++ ++ /** ++ * Array of buffers ++ */ ++ struct fuse_buf buf[1]; + }; + + /* Initialize bufvec with a single buffer of given size */ +-#define FUSE_BUFVEC_INIT(size__) \ +- ((struct fuse_bufvec) { \ +- /* .count= */ 1, \ +- /* .idx = */ 0, \ +- /* .off = */ 0, \ +- /* .buf = */ { /* [0] = */ { \ +- /* .size = */ (size__), \ +- /* .flags = */ (enum fuse_buf_flags) 0, \ +- /* .mem = */ NULL, \ +- /* .fd = */ -1, \ +- /* .pos = */ 0, \ +- } } \ +- } ) ++#define FUSE_BUFVEC_INIT(size__) \ ++ ((struct fuse_bufvec){ /* .count= */ 1, \ ++ /* .idx = */ 0, \ ++ /* .off = */ 0, /* .buf = */ \ ++ { /* [0] = */ { \ ++ /* .size = */ (size__), \ ++ /* .flags = */ (enum fuse_buf_flags)0, \ ++ /* .mem = */ NULL, \ ++ /* .fd = */ -1, \ ++ /* .pos = */ 0, \ ++ } } }) + + /** + * Get total size of data in a fuse buffer vector +@@ -730,16 +745,16 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + * @return actual number of bytes copied or -errno on error + */ + ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, +- enum fuse_buf_copy_flags flags); ++ enum fuse_buf_copy_flags flags); + +-/* ----------------------------------------------------------- * +- * Signal handling * +- * ----------------------------------------------------------- */ ++/* ++ * Signal handling ++ */ + + /** + * Exit session on HUP, TERM and INT signals and ignore PIPE signal + * +- * Stores session in a global variable. May only be called once per ++ * Stores session in a global variable. May only be called once per + * process until fuse_remove_signal_handlers() is called. + * + * Once either of the POSIX signals arrives, the signal handler calls +@@ -766,12 +781,12 @@ int fuse_set_signal_handlers(struct fuse_session *se); + */ + void fuse_remove_signal_handlers(struct fuse_session *se); + +-/* ----------------------------------------------------------- * +- * Compatibility stuff * +- * ----------------------------------------------------------- */ ++/* ++ * Compatibility stuff ++ */ + + #if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 +-# error only API version 30 or greater is supported ++#error only API version 30 or greater is supported + #endif + + +@@ -781,11 +796,14 @@ void fuse_remove_signal_handlers(struct fuse_session *se); + * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! + */ + +-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++#if defined(__GNUC__) && \ ++ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ ++ !defined __cplusplus + _Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); + #else +-struct _fuse_off_t_must_be_64bit_dummy_struct \ +- { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; ++struct _fuse_off_t_must_be_64bit_dummy_struct { ++ unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); ++}; + #endif + + #endif /* FUSE_COMMON_H_ */ +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index b39522e..e63cb58 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -1,71 +1,71 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include "fuse.h" + #include "fuse_lowlevel.h" + + struct fuse_req { +- struct fuse_session *se; +- uint64_t unique; +- int ctr; +- pthread_mutex_t lock; +- struct fuse_ctx ctx; +- struct fuse_chan *ch; +- int interrupted; +- unsigned int ioctl_64bit : 1; +- union { +- struct { +- uint64_t unique; +- } i; +- struct { +- fuse_interrupt_func_t func; +- void *data; +- } ni; +- } u; +- struct fuse_req *next; +- struct fuse_req *prev; ++ struct fuse_session *se; ++ uint64_t unique; ++ int ctr; ++ pthread_mutex_t lock; ++ struct fuse_ctx ctx; ++ struct fuse_chan *ch; ++ int interrupted; ++ unsigned int ioctl_64bit:1; ++ union { ++ struct { ++ uint64_t unique; ++ } i; ++ struct { ++ fuse_interrupt_func_t func; ++ void *data; ++ } ni; ++ } u; ++ struct fuse_req *next; ++ struct fuse_req *prev; + }; + + struct fuse_notify_req { +- uint64_t unique; +- void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, +- const void *, const struct fuse_buf *); +- struct fuse_notify_req *next; +- struct fuse_notify_req *prev; ++ uint64_t unique; ++ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, ++ const void *, const struct fuse_buf *); ++ struct fuse_notify_req *next; ++ struct fuse_notify_req *prev; + }; + + struct fuse_session { +- char *mountpoint; +- volatile int exited; +- int fd; +- int debug; +- int deny_others; +- struct fuse_lowlevel_ops op; +- int got_init; +- struct cuse_data *cuse_data; +- void *userdata; +- uid_t owner; +- struct fuse_conn_info conn; +- struct fuse_req list; +- struct fuse_req interrupts; +- pthread_mutex_t lock; +- int got_destroy; +- int broken_splice_nonblock; +- uint64_t notify_ctr; +- struct fuse_notify_req notify_list; +- size_t bufsize; +- int error; ++ char *mountpoint; ++ volatile int exited; ++ int fd; ++ int debug; ++ int deny_others; ++ struct fuse_lowlevel_ops op; ++ int got_init; ++ struct cuse_data *cuse_data; ++ void *userdata; ++ uid_t owner; ++ struct fuse_conn_info conn; ++ struct fuse_req list; ++ struct fuse_req interrupts; ++ pthread_mutex_t lock; ++ int got_destroy; ++ int broken_splice_nonblock; ++ uint64_t notify_ctr; ++ struct fuse_notify_req notify_list; ++ size_t bufsize; ++ int error; + }; + + struct fuse_chan { +- pthread_mutex_t lock; +- int ctr; +- int fd; ++ pthread_mutex_t lock; ++ int ctr; ++ int fd; + }; + + /** +@@ -76,19 +76,20 @@ struct fuse_chan { + * + */ + struct fuse_module { +- char *name; +- fuse_module_factory_t factory; +- struct fuse_module *next; +- struct fusemod_so *so; +- int ctr; ++ char *name; ++ fuse_module_factory_t factory; ++ struct fuse_module *next; ++ struct fusemod_so *so; ++ int ctr; + }; + + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, +- int count); ++ int count); + void fuse_free_req(fuse_req_t req); + + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, struct fuse_chan *ch); ++ const struct fuse_buf *buf, ++ struct fuse_chan *ch); + + + #define FUSE_MAX_MAX_PAGES 256 +diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c +index 0d268ab..11345f9 100644 +--- a/tools/virtiofsd/fuse_log.c ++++ b/tools/virtiofsd/fuse_log.c +@@ -1,40 +1,40 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2019 Red Hat, Inc. +- +- Logging API. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * Logging API. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include "fuse_log.h" + + #include + #include + +-static void default_log_func( +- __attribute__(( unused )) enum fuse_log_level level, +- const char *fmt, va_list ap) ++static void default_log_func(__attribute__((unused)) enum fuse_log_level level, ++ const char *fmt, va_list ap) + { +- vfprintf(stderr, fmt, ap); ++ vfprintf(stderr, fmt, ap); + } + + static fuse_log_func_t log_func = default_log_func; + + void fuse_set_log_func(fuse_log_func_t func) + { +- if (!func) +- func = default_log_func; ++ if (!func) { ++ func = default_log_func; ++ } + +- log_func = func; ++ log_func = func; + } + + void fuse_log(enum fuse_log_level level, const char *fmt, ...) + { +- va_list ap; ++ va_list ap; + +- va_start(ap, fmt); +- log_func(level, fmt, ap); +- va_end(ap); ++ va_start(ap, fmt); ++ log_func(level, fmt, ap); ++ va_end(ap); + } +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +index 0af700d..bf6c11f 100644 +--- a/tools/virtiofsd/fuse_log.h ++++ b/tools/virtiofsd/fuse_log.h +@@ -1,10 +1,10 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2019 Red Hat, Inc. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_LOG_H_ + #define FUSE_LOG_H_ +@@ -22,14 +22,14 @@ + * These levels correspond to syslog(2) log levels since they are widely used. + */ + enum fuse_log_level { +- FUSE_LOG_EMERG, +- FUSE_LOG_ALERT, +- FUSE_LOG_CRIT, +- FUSE_LOG_ERR, +- FUSE_LOG_WARNING, +- FUSE_LOG_NOTICE, +- FUSE_LOG_INFO, +- FUSE_LOG_DEBUG ++ FUSE_LOG_EMERG, ++ FUSE_LOG_ALERT, ++ FUSE_LOG_CRIT, ++ FUSE_LOG_ERR, ++ FUSE_LOG_WARNING, ++ FUSE_LOG_NOTICE, ++ FUSE_LOG_INFO, ++ FUSE_LOG_DEBUG + }; + + /** +@@ -45,8 +45,8 @@ enum fuse_log_level { + * @param fmt sprintf-style format string including newline + * @param ap format string arguments + */ +-typedef void (*fuse_log_func_t)(enum fuse_log_level level, +- const char *fmt, va_list ap); ++typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt, ++ va_list ap); + + /** + * Install a custom log handler function. +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index e6fa247..5c9cb52 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1,2380 +1,2515 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- Implementation of (most of) the low-level FUSE API. The session loop +- functions are implemented in separate files. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Implementation of (most of) the low-level FUSE API. The session loop ++ * functions are implemented in separate files. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #define _GNU_SOURCE + + #include "config.h" + #include "fuse_i.h" + #include "fuse_kernel.h" +-#include "fuse_opt.h" + #include "fuse_misc.h" ++#include "fuse_opt.h" + ++#include ++#include ++#include ++#include + #include + #include +-#include + #include +-#include +-#include +-#include +-#include + #include +- ++#include + + + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) + #define OFFSET_MAX 0x7fffffffffffffffLL + +-#define container_of(ptr, type, member) ({ \ +- const typeof( ((type *)0)->member ) *__mptr = (ptr); \ +- (type *)( (char *)__mptr - offsetof(type,member) );}) ++#define container_of(ptr, type, member) \ ++ ({ \ ++ const typeof(((type *)0)->member) *__mptr = (ptr); \ ++ (type *)((char *)__mptr - offsetof(type, member)); \ ++ }) + + struct fuse_pollhandle { +- uint64_t kh; +- struct fuse_session *se; ++ uint64_t kh; ++ struct fuse_session *se; + }; + + static size_t pagesize; + + static __attribute__((constructor)) void fuse_ll_init_pagesize(void) + { +- pagesize = getpagesize(); ++ pagesize = getpagesize(); + } + + static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) + { +- attr->ino = stbuf->st_ino; +- attr->mode = stbuf->st_mode; +- attr->nlink = stbuf->st_nlink; +- attr->uid = stbuf->st_uid; +- attr->gid = stbuf->st_gid; +- attr->rdev = stbuf->st_rdev; +- attr->size = stbuf->st_size; +- attr->blksize = stbuf->st_blksize; +- attr->blocks = stbuf->st_blocks; +- attr->atime = stbuf->st_atime; +- attr->mtime = stbuf->st_mtime; +- attr->ctime = stbuf->st_ctime; +- attr->atimensec = ST_ATIM_NSEC(stbuf); +- attr->mtimensec = ST_MTIM_NSEC(stbuf); +- attr->ctimensec = ST_CTIM_NSEC(stbuf); ++ attr->ino = stbuf->st_ino; ++ attr->mode = stbuf->st_mode; ++ attr->nlink = stbuf->st_nlink; ++ attr->uid = stbuf->st_uid; ++ attr->gid = stbuf->st_gid; ++ attr->rdev = stbuf->st_rdev; ++ attr->size = stbuf->st_size; ++ attr->blksize = stbuf->st_blksize; ++ attr->blocks = stbuf->st_blocks; ++ attr->atime = stbuf->st_atime; ++ attr->mtime = stbuf->st_mtime; ++ attr->ctime = stbuf->st_ctime; ++ attr->atimensec = ST_ATIM_NSEC(stbuf); ++ attr->mtimensec = ST_MTIM_NSEC(stbuf); ++ attr->ctimensec = ST_CTIM_NSEC(stbuf); + } + + static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) + { +- stbuf->st_mode = attr->mode; +- stbuf->st_uid = attr->uid; +- stbuf->st_gid = attr->gid; +- stbuf->st_size = attr->size; +- stbuf->st_atime = attr->atime; +- stbuf->st_mtime = attr->mtime; +- stbuf->st_ctime = attr->ctime; +- ST_ATIM_NSEC_SET(stbuf, attr->atimensec); +- ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); +- ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); ++ stbuf->st_mode = attr->mode; ++ stbuf->st_uid = attr->uid; ++ stbuf->st_gid = attr->gid; ++ stbuf->st_size = attr->size; ++ stbuf->st_atime = attr->atime; ++ stbuf->st_mtime = attr->mtime; ++ stbuf->st_ctime = attr->ctime; ++ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); ++ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); ++ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); + } + +-static size_t iov_length(const struct iovec *iov, size_t count) ++static size_t iov_length(const struct iovec *iov, size_t count) + { +- size_t seg; +- size_t ret = 0; ++ size_t seg; ++ size_t ret = 0; + +- for (seg = 0; seg < count; seg++) +- ret += iov[seg].iov_len; +- return ret; ++ for (seg = 0; seg < count; seg++) { ++ ret += iov[seg].iov_len; ++ } ++ return ret; + } + + static void list_init_req(struct fuse_req *req) + { +- req->next = req; +- req->prev = req; ++ req->next = req; ++ req->prev = req; + } + + static void list_del_req(struct fuse_req *req) + { +- struct fuse_req *prev = req->prev; +- struct fuse_req *next = req->next; +- prev->next = next; +- next->prev = prev; ++ struct fuse_req *prev = req->prev; ++ struct fuse_req *next = req->next; ++ prev->next = next; ++ next->prev = prev; + } + + static void list_add_req(struct fuse_req *req, struct fuse_req *next) + { +- struct fuse_req *prev = next->prev; +- req->next = next; +- req->prev = prev; +- prev->next = req; +- next->prev = req; ++ struct fuse_req *prev = next->prev; ++ req->next = next; ++ req->prev = prev; ++ prev->next = req; ++ next->prev = req; + } + + static void destroy_req(fuse_req_t req) + { +- pthread_mutex_destroy(&req->lock); +- free(req); ++ pthread_mutex_destroy(&req->lock); ++ free(req); + } + + void fuse_free_req(fuse_req_t req) + { +- int ctr; +- struct fuse_session *se = req->se; ++ int ctr; ++ struct fuse_session *se = req->se; + +- pthread_mutex_lock(&se->lock); +- req->u.ni.func = NULL; +- req->u.ni.data = NULL; +- list_del_req(req); +- ctr = --req->ctr; +- req->ch = NULL; +- pthread_mutex_unlock(&se->lock); +- if (!ctr) +- destroy_req(req); ++ pthread_mutex_lock(&se->lock); ++ req->u.ni.func = NULL; ++ req->u.ni.data = NULL; ++ list_del_req(req); ++ ctr = --req->ctr; ++ req->ch = NULL; ++ pthread_mutex_unlock(&se->lock); ++ if (!ctr) { ++ destroy_req(req); ++ } + } + + static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) + { +- struct fuse_req *req; ++ struct fuse_req *req; + +- req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); +- if (req == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); +- } else { +- req->se = se; +- req->ctr = 1; +- list_init_req(req); +- fuse_mutex_init(&req->lock); +- } ++ req = (struct fuse_req *)calloc(1, sizeof(struct fuse_req)); ++ if (req == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); ++ } else { ++ req->se = se; ++ req->ctr = 1; ++ list_init_req(req); ++ fuse_mutex_init(&req->lock); ++ } + +- return req; ++ return req; + } + + /* Send data. If *ch* is NULL, send via session master fd */ + static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, +- struct iovec *iov, int count) ++ struct iovec *iov, int count) + { +- struct fuse_out_header *out = iov[0].iov_base; ++ struct fuse_out_header *out = iov[0].iov_base; + +- out->len = iov_length(iov, count); +- if (se->debug) { +- if (out->unique == 0) { +- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", +- out->error, out->len); +- } else if (out->error) { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, error: %i (%s), outsize: %i\n", +- (unsigned long long) out->unique, out->error, +- strerror(-out->error), out->len); +- } else { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, success, outsize: %i\n", +- (unsigned long long) out->unique, out->len); +- } +- } ++ out->len = iov_length(iov, count); ++ if (se->debug) { ++ if (out->unique == 0) { ++ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, ++ out->len); ++ } else if (out->error) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, error: %i (%s), outsize: %i\n", ++ (unsigned long long)out->unique, out->error, ++ strerror(-out->error), out->len); ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", ++ (unsigned long long)out->unique, out->len); ++ } ++ } + +- abort(); /* virtio should have taken it before here */ +- return 0; ++ abort(); /* virtio should have taken it before here */ ++ return 0; + } + + + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, +- int count) ++ int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out; + +- if (error <= -1000 || error > 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); +- error = -ERANGE; +- } ++ if (error <= -1000 || error > 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); ++ error = -ERANGE; ++ } + +- out.unique = req->unique; +- out.error = error; ++ out.unique = req->unique; ++ out.error = error; + +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(struct fuse_out_header); ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); + +- return fuse_send_msg(req->se, req->ch, iov, count); ++ return fuse_send_msg(req->se, req->ch, iov, count); + } + + static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, +- int count) ++ int count) + { +- int res; ++ int res; + +- res = fuse_send_reply_iov_nofree(req, error, iov, count); +- fuse_free_req(req); +- return res; ++ res = fuse_send_reply_iov_nofree(req, error, iov, count); ++ fuse_free_req(req); ++ return res; + } + + static int send_reply(fuse_req_t req, int error, const void *arg, +- size_t argsize) ++ size_t argsize) + { +- struct iovec iov[2]; +- int count = 1; +- if (argsize) { +- iov[1].iov_base = (void *) arg; +- iov[1].iov_len = argsize; +- count++; +- } +- return send_reply_iov(req, error, iov, count); ++ struct iovec iov[2]; ++ int count = 1; ++ if (argsize) { ++ iov[1].iov_base = (void *)arg; ++ iov[1].iov_len = argsize; ++ count++; ++ } ++ return send_reply_iov(req, error, iov, count); + } + + int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) + { +- int res; +- struct iovec *padded_iov; ++ int res; ++ struct iovec *padded_iov; + +- padded_iov = malloc((count + 1) * sizeof(struct iovec)); +- if (padded_iov == NULL) +- return fuse_reply_err(req, ENOMEM); ++ padded_iov = malloc((count + 1) * sizeof(struct iovec)); ++ if (padded_iov == NULL) { ++ return fuse_reply_err(req, ENOMEM); ++ } + +- memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); +- count++; ++ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); ++ count++; + +- res = send_reply_iov(req, 0, padded_iov, count); +- free(padded_iov); ++ res = send_reply_iov(req, 0, padded_iov, count); ++ free(padded_iov); + +- return res; ++ return res; + } + + +-/* `buf` is allowed to be empty so that the proper size may be +- allocated by the caller */ ++/* ++ * 'buf` is allowed to be empty so that the proper size may be ++ * allocated by the caller ++ */ + size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, const struct stat *stbuf, off_t off) ++ const char *name, const struct stat *stbuf, off_t off) + { +- (void)req; +- size_t namelen; +- size_t entlen; +- size_t entlen_padded; +- struct fuse_dirent *dirent; ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ struct fuse_dirent *dirent; + +- namelen = strlen(name); +- entlen = FUSE_NAME_OFFSET + namelen; +- entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); + +- if ((buf == NULL) || (entlen_padded > bufsize)) +- return entlen_padded; ++ if ((buf == NULL) || (entlen_padded > bufsize)) { ++ return entlen_padded; ++ } + +- dirent = (struct fuse_dirent*) buf; +- dirent->ino = stbuf->st_ino; +- dirent->off = off; +- dirent->namelen = namelen; +- dirent->type = (stbuf->st_mode & S_IFMT) >> 12; +- memcpy(dirent->name, name, namelen); +- memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ dirent = (struct fuse_dirent *)buf; ++ dirent->ino = stbuf->st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); + +- return entlen_padded; ++ return entlen_padded; + } + + static void convert_statfs(const struct statvfs *stbuf, +- struct fuse_kstatfs *kstatfs) ++ struct fuse_kstatfs *kstatfs) + { +- kstatfs->bsize = stbuf->f_bsize; +- kstatfs->frsize = stbuf->f_frsize; +- kstatfs->blocks = stbuf->f_blocks; +- kstatfs->bfree = stbuf->f_bfree; +- kstatfs->bavail = stbuf->f_bavail; +- kstatfs->files = stbuf->f_files; +- kstatfs->ffree = stbuf->f_ffree; +- kstatfs->namelen = stbuf->f_namemax; ++ kstatfs->bsize = stbuf->f_bsize; ++ kstatfs->frsize = stbuf->f_frsize; ++ kstatfs->blocks = stbuf->f_blocks; ++ kstatfs->bfree = stbuf->f_bfree; ++ kstatfs->bavail = stbuf->f_bavail; ++ kstatfs->files = stbuf->f_files; ++ kstatfs->ffree = stbuf->f_ffree; ++ kstatfs->namelen = stbuf->f_namemax; + } + + static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) + { +- return send_reply(req, 0, arg, argsize); ++ return send_reply(req, 0, arg, argsize); + } + + int fuse_reply_err(fuse_req_t req, int err) + { +- return send_reply(req, -err, NULL, 0); ++ return send_reply(req, -err, NULL, 0); + } + + void fuse_reply_none(fuse_req_t req) + { +- fuse_free_req(req); ++ fuse_free_req(req); + } + + static unsigned long calc_timeout_sec(double t) + { +- if (t > (double) ULONG_MAX) +- return ULONG_MAX; +- else if (t < 0.0) +- return 0; +- else +- return (unsigned long) t; ++ if (t > (double)ULONG_MAX) { ++ return ULONG_MAX; ++ } else if (t < 0.0) { ++ return 0; ++ } else { ++ return (unsigned long)t; ++ } + } + + static unsigned int calc_timeout_nsec(double t) + { +- double f = t - (double) calc_timeout_sec(t); +- if (f < 0.0) +- return 0; +- else if (f >= 0.999999999) +- return 999999999; +- else +- return (unsigned int) (f * 1.0e9); ++ double f = t - (double)calc_timeout_sec(t); ++ if (f < 0.0) { ++ return 0; ++ } else if (f >= 0.999999999) { ++ return 999999999; ++ } else { ++ return (unsigned int)(f * 1.0e9); ++ } + } + + static void fill_entry(struct fuse_entry_out *arg, +- const struct fuse_entry_param *e) ++ const struct fuse_entry_param *e) + { +- arg->nodeid = e->ino; +- arg->generation = e->generation; +- arg->entry_valid = calc_timeout_sec(e->entry_timeout); +- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); +- arg->attr_valid = calc_timeout_sec(e->attr_timeout); +- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); +- convert_stat(&e->attr, &arg->attr); ++ arg->nodeid = e->ino; ++ arg->generation = e->generation; ++ arg->entry_valid = calc_timeout_sec(e->entry_timeout); ++ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); ++ arg->attr_valid = calc_timeout_sec(e->attr_timeout); ++ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); ++ convert_stat(&e->attr, &arg->attr); + } + +-/* `buf` is allowed to be empty so that the proper size may be +- allocated by the caller */ ++/* ++ * `buf` is allowed to be empty so that the proper size may be ++ * allocated by the caller ++ */ + size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, +- const struct fuse_entry_param *e, off_t off) +-{ +- (void)req; +- size_t namelen; +- size_t entlen; +- size_t entlen_padded; +- +- namelen = strlen(name); +- entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; +- entlen_padded = FUSE_DIRENT_ALIGN(entlen); +- if ((buf == NULL) || (entlen_padded > bufsize)) +- return entlen_padded; +- +- struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; +- memset(&dp->entry_out, 0, sizeof(dp->entry_out)); +- fill_entry(&dp->entry_out, e); +- +- struct fuse_dirent *dirent = &dp->dirent; +- dirent->ino = e->attr.st_ino; +- dirent->off = off; +- dirent->namelen = namelen; +- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; +- memcpy(dirent->name, name, namelen); +- memset(dirent->name + namelen, 0, entlen_padded - entlen); +- +- return entlen_padded; +-} +- +-static void fill_open(struct fuse_open_out *arg, +- const struct fuse_file_info *f) +-{ +- arg->fh = f->fh; +- if (f->direct_io) +- arg->open_flags |= FOPEN_DIRECT_IO; +- if (f->keep_cache) +- arg->open_flags |= FOPEN_KEEP_CACHE; +- if (f->cache_readdir) +- arg->open_flags |= FOPEN_CACHE_DIR; +- if (f->nonseekable) +- arg->open_flags |= FOPEN_NONSEEKABLE; ++ const char *name, ++ const struct fuse_entry_param *e, off_t off) ++{ ++ (void)req; ++ size_t namelen; ++ size_t entlen; ++ size_t entlen_padded; ++ ++ namelen = strlen(name); ++ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; ++ entlen_padded = FUSE_DIRENT_ALIGN(entlen); ++ if ((buf == NULL) || (entlen_padded > bufsize)) { ++ return entlen_padded; ++ } ++ ++ struct fuse_direntplus *dp = (struct fuse_direntplus *)buf; ++ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); ++ fill_entry(&dp->entry_out, e); ++ ++ struct fuse_dirent *dirent = &dp->dirent; ++ dirent->ino = e->attr.st_ino; ++ dirent->off = off; ++ dirent->namelen = namelen; ++ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; ++ memcpy(dirent->name, name, namelen); ++ memset(dirent->name + namelen, 0, entlen_padded - entlen); ++ ++ return entlen_padded; ++} ++ ++static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) ++{ ++ arg->fh = f->fh; ++ if (f->direct_io) { ++ arg->open_flags |= FOPEN_DIRECT_IO; ++ } ++ if (f->keep_cache) { ++ arg->open_flags |= FOPEN_KEEP_CACHE; ++ } ++ if (f->cache_readdir) { ++ arg->open_flags |= FOPEN_CACHE_DIR; ++ } ++ if (f->nonseekable) { ++ arg->open_flags |= FOPEN_NONSEEKABLE; ++ } + } + + int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) + { +- struct fuse_entry_out arg; +- size_t size = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); ++ struct fuse_entry_out arg; ++ size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : ++ sizeof(arg); + +- /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant +- negative entry */ +- if (!e->ino && req->se->conn.proto_minor < 4) +- return fuse_reply_err(req, ENOENT); ++ /* ++ * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant ++ * negative entry ++ */ ++ if (!e->ino && req->se->conn.proto_minor < 4) { ++ return fuse_reply_err(req, ENOENT); ++ } + +- memset(&arg, 0, sizeof(arg)); +- fill_entry(&arg, e); +- return send_reply_ok(req, &arg, size); ++ memset(&arg, 0, sizeof(arg)); ++ fill_entry(&arg, e); ++ return send_reply_ok(req, &arg, size); + } + + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, +- const struct fuse_file_info *f) ++ const struct fuse_file_info *f) + { +- char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; +- size_t entrysize = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); +- struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; +- struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); ++ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; ++ size_t entrysize = req->se->conn.proto_minor < 9 ? ++ FUSE_COMPAT_ENTRY_OUT_SIZE : ++ sizeof(struct fuse_entry_out); ++ struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; ++ struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); + +- memset(buf, 0, sizeof(buf)); +- fill_entry(earg, e); +- fill_open(oarg, f); +- return send_reply_ok(req, buf, +- entrysize + sizeof(struct fuse_open_out)); ++ memset(buf, 0, sizeof(buf)); ++ fill_entry(earg, e); ++ fill_open(oarg, f); ++ return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); + } + + int fuse_reply_attr(fuse_req_t req, const struct stat *attr, +- double attr_timeout) ++ double attr_timeout) + { +- struct fuse_attr_out arg; +- size_t size = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); ++ struct fuse_attr_out arg; ++ size_t size = ++ req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); + +- memset(&arg, 0, sizeof(arg)); +- arg.attr_valid = calc_timeout_sec(attr_timeout); +- arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); +- convert_stat(attr, &arg.attr); ++ memset(&arg, 0, sizeof(arg)); ++ arg.attr_valid = calc_timeout_sec(attr_timeout); ++ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); ++ convert_stat(attr, &arg.attr); + +- return send_reply_ok(req, &arg, size); ++ return send_reply_ok(req, &arg, size); + } + + int fuse_reply_readlink(fuse_req_t req, const char *linkname) + { +- return send_reply_ok(req, linkname, strlen(linkname)); ++ return send_reply_ok(req, linkname, strlen(linkname)); + } + + int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) + { +- struct fuse_open_out arg; ++ struct fuse_open_out arg; + +- memset(&arg, 0, sizeof(arg)); +- fill_open(&arg, f); +- return send_reply_ok(req, &arg, sizeof(arg)); ++ memset(&arg, 0, sizeof(arg)); ++ fill_open(&arg, f); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_write(fuse_req_t req, size_t count) + { +- struct fuse_write_out arg; ++ struct fuse_write_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.size = count; ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) + { +- return send_reply_ok(req, buf, size); ++ return send_reply_ok(req, buf, size); + } + + static int fuse_send_data_iov_fallback(struct fuse_session *se, +- struct fuse_chan *ch, +- struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, +- size_t len) ++ struct fuse_chan *ch, struct iovec *iov, ++ int iov_count, struct fuse_bufvec *buf, ++ size_t len) + { +- /* Optimize common case */ +- if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && +- !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { +- /* FIXME: also avoid memory copy if there are multiple buffers +- but none of them contain an fd */ ++ /* Optimize common case */ ++ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && ++ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { ++ /* ++ * FIXME: also avoid memory copy if there are multiple buffers ++ * but none of them contain an fd ++ */ + +- iov[iov_count].iov_base = buf->buf[0].mem; +- iov[iov_count].iov_len = len; +- iov_count++; +- return fuse_send_msg(se, ch, iov, iov_count); +- } ++ iov[iov_count].iov_base = buf->buf[0].mem; ++ iov[iov_count].iov_len = len; ++ iov_count++; ++ return fuse_send_msg(se, ch, iov, iov_count); ++ } + +- abort(); /* Will have taken vhost path */ +- return 0; ++ abort(); /* Will have taken vhost path */ ++ return 0; + } + + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, +- struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, unsigned int flags) ++ struct iovec *iov, int iov_count, ++ struct fuse_bufvec *buf, unsigned int flags) + { +- size_t len = fuse_buf_size(buf); +- (void) flags; ++ size_t len = fuse_buf_size(buf); ++ (void)flags; + +- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); ++ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + } + + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++ enum fuse_buf_copy_flags flags) + { +- struct iovec iov[2]; +- struct fuse_out_header out; +- int res; ++ struct iovec iov[2]; ++ struct fuse_out_header out; ++ int res; + +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(struct fuse_out_header); ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); + +- out.unique = req->unique; +- out.error = 0; ++ out.unique = req->unique; ++ out.error = 0; + +- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); +- if (res <= 0) { +- fuse_free_req(req); +- return res; +- } else { +- return fuse_reply_err(req, res); +- } ++ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); ++ if (res <= 0) { ++ fuse_free_req(req); ++ return res; ++ } else { ++ return fuse_reply_err(req, res); ++ } + } + + int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) + { +- struct fuse_statfs_out arg; +- size_t size = req->se->conn.proto_minor < 4 ? +- FUSE_COMPAT_STATFS_SIZE : sizeof(arg); ++ struct fuse_statfs_out arg; ++ size_t size = ++ req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); + +- memset(&arg, 0, sizeof(arg)); +- convert_statfs(stbuf, &arg.st); ++ memset(&arg, 0, sizeof(arg)); ++ convert_statfs(stbuf, &arg.st); + +- return send_reply_ok(req, &arg, size); ++ return send_reply_ok(req, &arg, size); + } + + int fuse_reply_xattr(fuse_req_t req, size_t count) + { +- struct fuse_getxattr_out arg; ++ struct fuse_getxattr_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.size = count; ++ memset(&arg, 0, sizeof(arg)); ++ arg.size = count; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_lock(fuse_req_t req, const struct flock *lock) + { +- struct fuse_lk_out arg; ++ struct fuse_lk_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.lk.type = lock->l_type; +- if (lock->l_type != F_UNLCK) { +- arg.lk.start = lock->l_start; +- if (lock->l_len == 0) +- arg.lk.end = OFFSET_MAX; +- else +- arg.lk.end = lock->l_start + lock->l_len - 1; +- } +- arg.lk.pid = lock->l_pid; +- return send_reply_ok(req, &arg, sizeof(arg)); ++ memset(&arg, 0, sizeof(arg)); ++ arg.lk.type = lock->l_type; ++ if (lock->l_type != F_UNLCK) { ++ arg.lk.start = lock->l_start; ++ if (lock->l_len == 0) { ++ arg.lk.end = OFFSET_MAX; ++ } else { ++ arg.lk.end = lock->l_start + lock->l_len - 1; ++ } ++ } ++ arg.lk.pid = lock->l_pid; ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_bmap(fuse_req_t req, uint64_t idx) + { +- struct fuse_bmap_out arg; ++ struct fuse_bmap_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.block = idx; ++ memset(&arg, 0, sizeof(arg)); ++ arg.block = idx; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, +- size_t count) +-{ +- struct fuse_ioctl_iovec *fiov; +- size_t i; +- +- fiov = malloc(sizeof(fiov[0]) * count); +- if (!fiov) +- return NULL; +- +- for (i = 0; i < count; i++) { +- fiov[i].base = (uintptr_t) iov[i].iov_base; +- fiov[i].len = iov[i].iov_len; +- } +- +- return fiov; +-} +- +-int fuse_reply_ioctl_retry(fuse_req_t req, +- const struct iovec *in_iov, size_t in_count, +- const struct iovec *out_iov, size_t out_count) +-{ +- struct fuse_ioctl_out arg; +- struct fuse_ioctl_iovec *in_fiov = NULL; +- struct fuse_ioctl_iovec *out_fiov = NULL; +- struct iovec iov[4]; +- size_t count = 1; +- int res; +- +- memset(&arg, 0, sizeof(arg)); +- arg.flags |= FUSE_IOCTL_RETRY; +- arg.in_iovs = in_count; +- arg.out_iovs = out_count; +- iov[count].iov_base = &arg; +- iov[count].iov_len = sizeof(arg); +- count++; +- +- if (req->se->conn.proto_minor < 16) { +- if (in_count) { +- iov[count].iov_base = (void *)in_iov; +- iov[count].iov_len = sizeof(in_iov[0]) * in_count; +- count++; +- } +- +- if (out_count) { +- iov[count].iov_base = (void *)out_iov; +- iov[count].iov_len = sizeof(out_iov[0]) * out_count; +- count++; +- } +- } else { +- /* Can't handle non-compat 64bit ioctls on 32bit */ +- if (sizeof(void *) == 4 && req->ioctl_64bit) { +- res = fuse_reply_err(req, EINVAL); +- goto out; +- } +- +- if (in_count) { +- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); +- if (!in_fiov) +- goto enomem; +- +- iov[count].iov_base = (void *)in_fiov; +- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; +- count++; +- } +- if (out_count) { +- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); +- if (!out_fiov) +- goto enomem; +- +- iov[count].iov_base = (void *)out_fiov; +- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; +- count++; +- } +- } +- +- res = send_reply_iov(req, 0, iov, count); ++ size_t count) ++{ ++ struct fuse_ioctl_iovec *fiov; ++ size_t i; ++ ++ fiov = malloc(sizeof(fiov[0]) * count); ++ if (!fiov) { ++ return NULL; ++ } ++ ++ for (i = 0; i < count; i++) { ++ fiov[i].base = (uintptr_t)iov[i].iov_base; ++ fiov[i].len = iov[i].iov_len; ++ } ++ ++ return fiov; ++} ++ ++int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, ++ size_t in_count, const struct iovec *out_iov, ++ size_t out_count) ++{ ++ struct fuse_ioctl_out arg; ++ struct fuse_ioctl_iovec *in_fiov = NULL; ++ struct fuse_ioctl_iovec *out_fiov = NULL; ++ struct iovec iov[4]; ++ size_t count = 1; ++ int res; ++ ++ memset(&arg, 0, sizeof(arg)); ++ arg.flags |= FUSE_IOCTL_RETRY; ++ arg.in_iovs = in_count; ++ arg.out_iovs = out_count; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; ++ ++ if (req->se->conn.proto_minor < 16) { ++ if (in_count) { ++ iov[count].iov_base = (void *)in_iov; ++ iov[count].iov_len = sizeof(in_iov[0]) * in_count; ++ count++; ++ } ++ ++ if (out_count) { ++ iov[count].iov_base = (void *)out_iov; ++ iov[count].iov_len = sizeof(out_iov[0]) * out_count; ++ count++; ++ } ++ } else { ++ /* Can't handle non-compat 64bit ioctls on 32bit */ ++ if (sizeof(void *) == 4 && req->ioctl_64bit) { ++ res = fuse_reply_err(req, EINVAL); ++ goto out; ++ } ++ ++ if (in_count) { ++ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); ++ if (!in_fiov) { ++ goto enomem; ++ } ++ ++ iov[count].iov_base = (void *)in_fiov; ++ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; ++ count++; ++ } ++ if (out_count) { ++ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); ++ if (!out_fiov) { ++ goto enomem; ++ } ++ ++ iov[count].iov_base = (void *)out_fiov; ++ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; ++ count++; ++ } ++ } ++ ++ res = send_reply_iov(req, 0, iov, count); + out: +- free(in_fiov); +- free(out_fiov); ++ free(in_fiov); ++ free(out_fiov); + +- return res; ++ return res; + + enomem: +- res = fuse_reply_err(req, ENOMEM); +- goto out; ++ res = fuse_reply_err(req, ENOMEM); ++ goto out; + } + + int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) + { +- struct fuse_ioctl_out arg; +- struct iovec iov[3]; +- size_t count = 1; ++ struct fuse_ioctl_out arg; ++ struct iovec iov[3]; ++ size_t count = 1; + +- memset(&arg, 0, sizeof(arg)); +- arg.result = result; +- iov[count].iov_base = &arg; +- iov[count].iov_len = sizeof(arg); +- count++; ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ iov[count].iov_base = &arg; ++ iov[count].iov_len = sizeof(arg); ++ count++; + +- if (size) { +- iov[count].iov_base = (char *) buf; +- iov[count].iov_len = size; +- count++; +- } ++ if (size) { ++ iov[count].iov_base = (char *)buf; ++ iov[count].iov_len = size; ++ count++; ++ } + +- return send_reply_iov(req, 0, iov, count); ++ return send_reply_iov(req, 0, iov, count); + } + + int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, +- int count) ++ int count) + { +- struct iovec *padded_iov; +- struct fuse_ioctl_out arg; +- int res; ++ struct iovec *padded_iov; ++ struct fuse_ioctl_out arg; ++ int res; + +- padded_iov = malloc((count + 2) * sizeof(struct iovec)); +- if (padded_iov == NULL) +- return fuse_reply_err(req, ENOMEM); ++ padded_iov = malloc((count + 2) * sizeof(struct iovec)); ++ if (padded_iov == NULL) { ++ return fuse_reply_err(req, ENOMEM); ++ } + +- memset(&arg, 0, sizeof(arg)); +- arg.result = result; +- padded_iov[1].iov_base = &arg; +- padded_iov[1].iov_len = sizeof(arg); ++ memset(&arg, 0, sizeof(arg)); ++ arg.result = result; ++ padded_iov[1].iov_base = &arg; ++ padded_iov[1].iov_len = sizeof(arg); + +- memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); ++ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); + +- res = send_reply_iov(req, 0, padded_iov, count + 2); +- free(padded_iov); ++ res = send_reply_iov(req, 0, padded_iov, count + 2); ++ free(padded_iov); + +- return res; ++ return res; + } + + int fuse_reply_poll(fuse_req_t req, unsigned revents) + { +- struct fuse_poll_out arg; ++ struct fuse_poll_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.revents = revents; ++ memset(&arg, 0, sizeof(arg)); ++ arg.revents = revents; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + int fuse_reply_lseek(fuse_req_t req, off_t off) + { +- struct fuse_lseek_out arg; ++ struct fuse_lseek_out arg; + +- memset(&arg, 0, sizeof(arg)); +- arg.offset = off; ++ memset(&arg, 0, sizeof(arg)); ++ arg.offset = off; + +- return send_reply_ok(req, &arg, sizeof(arg)); ++ return send_reply_ok(req, &arg, sizeof(arg)); + } + + static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.lookup) +- req->se->op.lookup(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.lookup) { ++ req->se->op.lookup(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; ++ struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; + +- if (req->se->op.forget) +- req->se->op.forget(req, nodeid, arg->nlookup); +- else +- fuse_reply_none(req); ++ if (req->se->op.forget) { ++ req->se->op.forget(req, nodeid, arg->nlookup); ++ } else { ++ fuse_reply_none(req); ++ } + } + + static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg) ++ const void *inarg) + { +- struct fuse_batch_forget_in *arg = (void *) inarg; +- struct fuse_forget_one *param = (void *) PARAM(arg); +- unsigned int i; ++ struct fuse_batch_forget_in *arg = (void *)inarg; ++ struct fuse_forget_one *param = (void *)PARAM(arg); ++ unsigned int i; + +- (void) nodeid; ++ (void)nodeid; + +- if (req->se->op.forget_multi) { +- req->se->op.forget_multi(req, arg->count, +- (struct fuse_forget_data *) param); +- } else if (req->se->op.forget) { +- for (i = 0; i < arg->count; i++) { +- struct fuse_forget_one *forget = ¶m[i]; +- struct fuse_req *dummy_req; ++ if (req->se->op.forget_multi) { ++ req->se->op.forget_multi(req, arg->count, ++ (struct fuse_forget_data *)param); ++ } else if (req->se->op.forget) { ++ for (i = 0; i < arg->count; i++) { ++ struct fuse_forget_one *forget = ¶m[i]; ++ struct fuse_req *dummy_req; + +- dummy_req = fuse_ll_alloc_req(req->se); +- if (dummy_req == NULL) +- break; ++ dummy_req = fuse_ll_alloc_req(req->se); ++ if (dummy_req == NULL) { ++ break; ++ } + +- dummy_req->unique = req->unique; +- dummy_req->ctx = req->ctx; +- dummy_req->ch = NULL; ++ dummy_req->unique = req->unique; ++ dummy_req->ctx = req->ctx; ++ dummy_req->ch = NULL; + +- req->se->op.forget(dummy_req, forget->nodeid, +- forget->nlookup); +- } +- fuse_reply_none(req); +- } else { +- fuse_reply_none(req); +- } ++ req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); ++ } ++ fuse_reply_none(req); ++ } else { ++ fuse_reply_none(req); ++ } + } + + static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_file_info *fip = NULL; +- struct fuse_file_info fi; ++ struct fuse_file_info *fip = NULL; ++ struct fuse_file_info fi; + +- if (req->se->conn.proto_minor >= 9) { +- struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; ++ if (req->se->conn.proto_minor >= 9) { ++ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; + +- if (arg->getattr_flags & FUSE_GETATTR_FH) { +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fip = &fi; +- } +- } ++ if (arg->getattr_flags & FUSE_GETATTR_FH) { ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fip = &fi; ++ } ++ } + +- if (req->se->op.getattr) +- req->se->op.getattr(req, nodeid, fip); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.getattr) { ++ req->se->op.getattr(req, nodeid, fip); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; +- +- if (req->se->op.setattr) { +- struct fuse_file_info *fi = NULL; +- struct fuse_file_info fi_store; +- struct stat stbuf; +- memset(&stbuf, 0, sizeof(stbuf)); +- convert_attr(arg, &stbuf); +- if (arg->valid & FATTR_FH) { +- arg->valid &= ~FATTR_FH; +- memset(&fi_store, 0, sizeof(fi_store)); +- fi = &fi_store; +- fi->fh = arg->fh; +- } +- arg->valid &= +- FUSE_SET_ATTR_MODE | +- FUSE_SET_ATTR_UID | +- FUSE_SET_ATTR_GID | +- FUSE_SET_ATTR_SIZE | +- FUSE_SET_ATTR_ATIME | +- FUSE_SET_ATTR_MTIME | +- FUSE_SET_ATTR_ATIME_NOW | +- FUSE_SET_ATTR_MTIME_NOW | +- FUSE_SET_ATTR_CTIME; +- +- req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); +- } else +- fuse_reply_err(req, ENOSYS); ++ struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; ++ ++ if (req->se->op.setattr) { ++ struct fuse_file_info *fi = NULL; ++ struct fuse_file_info fi_store; ++ struct stat stbuf; ++ memset(&stbuf, 0, sizeof(stbuf)); ++ convert_attr(arg, &stbuf); ++ if (arg->valid & FATTR_FH) { ++ arg->valid &= ~FATTR_FH; ++ memset(&fi_store, 0, sizeof(fi_store)); ++ fi = &fi_store; ++ fi->fh = arg->fh; ++ } ++ arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID | ++ FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE | ++ FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME | ++ FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW | ++ FUSE_SET_ATTR_CTIME; ++ ++ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_access_in *arg = (struct fuse_access_in *) inarg; ++ struct fuse_access_in *arg = (struct fuse_access_in *)inarg; + +- if (req->se->op.access) +- req->se->op.access(req, nodeid, arg->mask); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.access) { ++ req->se->op.access(req, nodeid, arg->mask); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- (void) inarg; ++ (void)inarg; + +- if (req->se->op.readlink) +- req->se->op.readlink(req, nodeid); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.readlink) { ++ req->se->op.readlink(req, nodeid); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; +- char *name = PARAM(arg); ++ struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; ++ char *name = PARAM(arg); + +- if (req->se->conn.proto_minor >= 12) +- req->ctx.umask = arg->umask; +- else +- name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; ++ if (req->se->conn.proto_minor >= 12) { ++ req->ctx.umask = arg->umask; ++ } else { ++ name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; ++ } + +- if (req->se->op.mknod) +- req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.mknod) { ++ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; ++ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; + +- if (req->se->conn.proto_minor >= 12) +- req->ctx.umask = arg->umask; ++ if (req->se->conn.proto_minor >= 12) { ++ req->ctx.umask = arg->umask; ++ } + +- if (req->se->op.mkdir) +- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.mkdir) { ++ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.unlink) +- req->se->op.unlink(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.unlink) { ++ req->se->op.unlink(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.rmdir) +- req->se->op.rmdir(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.rmdir) { ++ req->se->op.rmdir(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; +- char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; ++ char *name = (char *)inarg; ++ char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; + +- if (req->se->op.symlink) +- req->se->op.symlink(req, linkname, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.symlink) { ++ req->se->op.symlink(req, linkname, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; + +- if (req->se->op.rename) +- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, +- 0); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.rename) { ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; ++ char *oldname = PARAM(arg); ++ char *newname = oldname + strlen(oldname) + 1; + +- if (req->se->op.rename) +- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, +- arg->flags); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.rename) { ++ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, ++ arg->flags); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_link_in *arg = (struct fuse_link_in *) inarg; ++ struct fuse_link_in *arg = (struct fuse_link_in *)inarg; + +- if (req->se->op.link) +- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.link) { ++ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_create_in *arg = (struct fuse_create_in *) inarg; ++ struct fuse_create_in *arg = (struct fuse_create_in *)inarg; + +- if (req->se->op.create) { +- struct fuse_file_info fi; +- char *name = PARAM(arg); ++ if (req->se->op.create) { ++ struct fuse_file_info fi; ++ char *name = PARAM(arg); + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; + +- if (req->se->conn.proto_minor >= 12) +- req->ctx.umask = arg->umask; +- else +- name = (char *) inarg + sizeof(struct fuse_open_in); ++ if (req->se->conn.proto_minor >= 12) { ++ req->ctx.umask = arg->umask; ++ } else { ++ name = (char *)inarg + sizeof(struct fuse_open_in); ++ } + +- req->se->op.create(req, nodeid, name, arg->mode, &fi); +- } else +- fuse_reply_err(req, ENOSYS); ++ req->se->op.create(req, nodeid, name, arg->mode, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; + +- if (req->se->op.open) +- req->se->op.open(req, nodeid, &fi); +- else +- fuse_reply_open(req, &fi); ++ if (req->se->op.open) { ++ req->se->op.open(req, nodeid, &fi); ++ } else { ++ fuse_reply_open(req, &fi); ++ } + } + + static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; ++ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; + +- if (req->se->op.read) { +- struct fuse_file_info fi; ++ if (req->se->op.read) { ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 9) { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- } +- req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); +- } else +- fuse_reply_err(req, ENOSYS); ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 9) { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ } ++ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; +- struct fuse_file_info fi; +- char *param; ++ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_file_info fi; ++ char *param; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; + +- if (req->se->conn.proto_minor < 9) { +- param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- param = PARAM(arg); +- } ++ if (req->se->conn.proto_minor < 9) { ++ param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ param = PARAM(arg); ++ } + +- if (req->se->op.write) +- req->se->op.write(req, nodeid, param, arg->size, +- arg->offset, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.write) { ++ req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, +- const struct fuse_buf *ibuf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, +- .count = 1, +- }; +- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; +- struct fuse_file_info fi; +- +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; +- +- if (se->conn.proto_minor < 9) { +- bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- bufv.buf[0].size -= sizeof(struct fuse_in_header) + +- FUSE_COMPAT_WRITE_IN_SIZE; +- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) +- bufv.buf[0].mem = PARAM(arg); +- +- bufv.buf[0].size -= sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in); +- } +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); +- fuse_reply_err(req, EIO); +- return; +- } +- bufv.buf[0].size = arg->size; +- +- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_file_info fi; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; ++ ++ if (se->conn.proto_minor < 9) { ++ bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; ++ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); ++ } else { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ bufv.buf[0].mem = PARAM(arg); ++ } ++ ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); ++ } ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); + } + + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.flush = 1; +- if (req->se->conn.proto_minor >= 7) +- fi.lock_owner = arg->lock_owner; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.flush = 1; ++ if (req->se->conn.proto_minor >= 7) { ++ fi.lock_owner = arg->lock_owner; ++ } + +- if (req->se->op.flush) +- req->se->op.flush(req, nodeid, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.flush) { ++ req->se->op.flush(req, nodeid, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; +- fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 8) { +- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; +- fi.lock_owner = arg->lock_owner; +- } +- if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { +- fi.flock_release = 1; +- fi.lock_owner = arg->lock_owner; +- } ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; ++ if (req->se->conn.proto_minor >= 8) { ++ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; ++ fi.lock_owner = arg->lock_owner; ++ } ++ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { ++ fi.flock_release = 1; ++ fi.lock_owner = arg->lock_owner; ++ } + +- if (req->se->op.release) +- req->se->op.release(req, nodeid, &fi); +- else +- fuse_reply_err(req, 0); ++ if (req->se->op.release) { ++ req->se->op.release(req, nodeid, &fi); ++ } else { ++ fuse_reply_err(req, 0); ++ } + } + + static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; +- struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.fsync) +- req->se->op.fsync(req, nodeid, datasync, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.fsync) { ++ req->se->op.fsync(req, nodeid, datasync, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; + +- if (req->se->op.opendir) +- req->se->op.opendir(req, nodeid, &fi); +- else +- fuse_reply_open(req, &fi); ++ if (req->se->op.opendir) { ++ req->se->op.opendir(req, nodeid, &fi); ++ } else { ++ fuse_reply_open(req, &fi); ++ } + } + + static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.readdir) +- req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.readdir) { ++ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.readdirplus) +- req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.readdirplus) { ++ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.flags = arg->flags; +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.flags = arg->flags; ++ fi.fh = arg->fh; + +- if (req->se->op.releasedir) +- req->se->op.releasedir(req, nodeid, &fi); +- else +- fuse_reply_err(req, 0); ++ if (req->se->op.releasedir) { ++ req->se->op.releasedir(req, nodeid, &fi); ++ } else { ++ fuse_reply_err(req, 0); ++ } + } + + static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; +- struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_file_info fi; ++ int datasync = arg->fsync_flags & 1; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.fsyncdir) +- req->se->op.fsyncdir(req, nodeid, datasync, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.fsyncdir) { ++ req->se->op.fsyncdir(req, nodeid, datasync, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- (void) nodeid; +- (void) inarg; ++ (void)nodeid; ++ (void)inarg; + +- if (req->se->op.statfs) +- req->se->op.statfs(req, nodeid); +- else { +- struct statvfs buf = { +- .f_namemax = 255, +- .f_bsize = 512, +- }; +- fuse_reply_statfs(req, &buf); +- } ++ if (req->se->op.statfs) { ++ req->se->op.statfs(req, nodeid); ++ } else { ++ struct statvfs buf = { ++ .f_namemax = 255, ++ .f_bsize = 512, ++ }; ++ fuse_reply_statfs(req, &buf); ++ } + } + + static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; +- char *name = PARAM(arg); +- char *value = name + strlen(name) + 1; ++ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; ++ char *name = PARAM(arg); ++ char *value = name + strlen(name) + 1; + +- if (req->se->op.setxattr) +- req->se->op.setxattr(req, nodeid, name, value, arg->size, +- arg->flags); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.setxattr) { ++ req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; + +- if (req->se->op.getxattr) +- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.getxattr) { ++ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; ++ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; + +- if (req->se->op.listxattr) +- req->se->op.listxattr(req, nodeid, arg->size); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.listxattr) { ++ req->se->op.listxattr(req, nodeid, arg->size); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- char *name = (char *) inarg; ++ char *name = (char *)inarg; + +- if (req->se->op.removexattr) +- req->se->op.removexattr(req, nodeid, name); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.removexattr) { ++ req->se->op.removexattr(req, nodeid, name); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void convert_fuse_file_lock(struct fuse_file_lock *fl, +- struct flock *flock) ++ struct flock *flock) + { +- memset(flock, 0, sizeof(struct flock)); +- flock->l_type = fl->type; +- flock->l_whence = SEEK_SET; +- flock->l_start = fl->start; +- if (fl->end == OFFSET_MAX) +- flock->l_len = 0; +- else +- flock->l_len = fl->end - fl->start + 1; +- flock->l_pid = fl->pid; ++ memset(flock, 0, sizeof(struct flock)); ++ flock->l_type = fl->type; ++ flock->l_whence = SEEK_SET; ++ flock->l_start = fl->start; ++ if (fl->end == OFFSET_MAX) { ++ flock->l_len = 0; ++ } else { ++ flock->l_len = fl->end - fl->start + 1; ++ } ++ flock->l_pid = fl->pid; + } + + static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; +- struct fuse_file_info fi; +- struct flock flock; ++ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_file_info fi; ++ struct flock flock; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.lock_owner = arg->owner; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; + +- convert_fuse_file_lock(&arg->lk, &flock); +- if (req->se->op.getlk) +- req->se->op.getlk(req, nodeid, &fi, &flock); +- else +- fuse_reply_err(req, ENOSYS); ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.getlk) { ++ req->se->op.getlk(req, nodeid, &fi, &flock); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, int sleep) +-{ +- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; +- struct fuse_file_info fi; +- struct flock flock; +- +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.lock_owner = arg->owner; +- +- if (arg->lk_flags & FUSE_LK_FLOCK) { +- int op = 0; +- +- switch (arg->lk.type) { +- case F_RDLCK: +- op = LOCK_SH; +- break; +- case F_WRLCK: +- op = LOCK_EX; +- break; +- case F_UNLCK: +- op = LOCK_UN; +- break; +- } +- if (!sleep) +- op |= LOCK_NB; +- +- if (req->se->op.flock) +- req->se->op.flock(req, nodeid, &fi, op); +- else +- fuse_reply_err(req, ENOSYS); +- } else { +- convert_fuse_file_lock(&arg->lk, &flock); +- if (req->se->op.setlk) +- req->se->op.setlk(req, nodeid, &fi, &flock, sleep); +- else +- fuse_reply_err(req, ENOSYS); +- } ++ const void *inarg, int sleep) ++{ ++ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_file_info fi; ++ struct flock flock; ++ ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.lock_owner = arg->owner; ++ ++ if (arg->lk_flags & FUSE_LK_FLOCK) { ++ int op = 0; ++ ++ switch (arg->lk.type) { ++ case F_RDLCK: ++ op = LOCK_SH; ++ break; ++ case F_WRLCK: ++ op = LOCK_EX; ++ break; ++ case F_UNLCK: ++ op = LOCK_UN; ++ break; ++ } ++ if (!sleep) { ++ op |= LOCK_NB; ++ } ++ ++ if (req->se->op.flock) { ++ req->se->op.flock(req, nodeid, &fi, op); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++ } else { ++ convert_fuse_file_lock(&arg->lk, &flock); ++ if (req->se->op.setlk) { ++ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } ++ } + } + + static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- do_setlk_common(req, nodeid, inarg, 0); ++ do_setlk_common(req, nodeid, inarg, 0); + } + + static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- do_setlk_common(req, nodeid, inarg, 1); ++ do_setlk_common(req, nodeid, inarg, 1); + } + + static int find_interrupted(struct fuse_session *se, struct fuse_req *req) + { +- struct fuse_req *curr; +- +- for (curr = se->list.next; curr != &se->list; curr = curr->next) { +- if (curr->unique == req->u.i.unique) { +- fuse_interrupt_func_t func; +- void *data; +- +- curr->ctr++; +- pthread_mutex_unlock(&se->lock); +- +- /* Ugh, ugly locking */ +- pthread_mutex_lock(&curr->lock); +- pthread_mutex_lock(&se->lock); +- curr->interrupted = 1; +- func = curr->u.ni.func; +- data = curr->u.ni.data; +- pthread_mutex_unlock(&se->lock); +- if (func) +- func(curr, data); +- pthread_mutex_unlock(&curr->lock); +- +- pthread_mutex_lock(&se->lock); +- curr->ctr--; +- if (!curr->ctr) +- destroy_req(curr); +- +- return 1; +- } +- } +- for (curr = se->interrupts.next; curr != &se->interrupts; +- curr = curr->next) { +- if (curr->u.i.unique == req->u.i.unique) +- return 1; +- } +- return 0; ++ struct fuse_req *curr; ++ ++ for (curr = se->list.next; curr != &se->list; curr = curr->next) { ++ if (curr->unique == req->u.i.unique) { ++ fuse_interrupt_func_t func; ++ void *data; ++ ++ curr->ctr++; ++ pthread_mutex_unlock(&se->lock); ++ ++ /* Ugh, ugly locking */ ++ pthread_mutex_lock(&curr->lock); ++ pthread_mutex_lock(&se->lock); ++ curr->interrupted = 1; ++ func = curr->u.ni.func; ++ data = curr->u.ni.data; ++ pthread_mutex_unlock(&se->lock); ++ if (func) { ++ func(curr, data); ++ } ++ pthread_mutex_unlock(&curr->lock); ++ ++ pthread_mutex_lock(&se->lock); ++ curr->ctr--; ++ if (!curr->ctr) { ++ destroy_req(curr); ++ } ++ ++ return 1; ++ } ++ } ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->u.i.unique) { ++ return 1; ++ } ++ } ++ return 0; + } + + static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; +- struct fuse_session *se = req->se; ++ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; ++ struct fuse_session *se = req->se; + +- (void) nodeid; +- if (se->debug) +- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", +- (unsigned long long) arg->unique); ++ (void)nodeid; ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", ++ (unsigned long long)arg->unique); ++ } + +- req->u.i.unique = arg->unique; ++ req->u.i.unique = arg->unique; + +- pthread_mutex_lock(&se->lock); +- if (find_interrupted(se, req)) +- destroy_req(req); +- else +- list_add_req(req, &se->interrupts); +- pthread_mutex_unlock(&se->lock); ++ pthread_mutex_lock(&se->lock); ++ if (find_interrupted(se, req)) { ++ destroy_req(req); ++ } else { ++ list_add_req(req, &se->interrupts); ++ } ++ pthread_mutex_unlock(&se->lock); + } + + static struct fuse_req *check_interrupt(struct fuse_session *se, +- struct fuse_req *req) +-{ +- struct fuse_req *curr; +- +- for (curr = se->interrupts.next; curr != &se->interrupts; +- curr = curr->next) { +- if (curr->u.i.unique == req->unique) { +- req->interrupted = 1; +- list_del_req(curr); +- free(curr); +- return NULL; +- } +- } +- curr = se->interrupts.next; +- if (curr != &se->interrupts) { +- list_del_req(curr); +- list_init_req(curr); +- return curr; +- } else +- return NULL; ++ struct fuse_req *req) ++{ ++ struct fuse_req *curr; ++ ++ for (curr = se->interrupts.next; curr != &se->interrupts; ++ curr = curr->next) { ++ if (curr->u.i.unique == req->unique) { ++ req->interrupted = 1; ++ list_del_req(curr); ++ free(curr); ++ return NULL; ++ } ++ } ++ curr = se->interrupts.next; ++ if (curr != &se->interrupts) { ++ list_del_req(curr); ++ list_init_req(curr); ++ return curr; ++ } else { ++ return NULL; ++ } + } + + static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; ++ struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; + +- if (req->se->op.bmap) +- req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.bmap) { ++ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; +- unsigned int flags = arg->flags; +- void *in_buf = arg->in_size ? PARAM(arg) : NULL; +- struct fuse_file_info fi; ++ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; ++ unsigned int flags = arg->flags; ++ void *in_buf = arg->in_size ? PARAM(arg) : NULL; ++ struct fuse_file_info fi; + +- if (flags & FUSE_IOCTL_DIR && +- !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { +- fuse_reply_err(req, ENOTTY); +- return; +- } ++ if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { ++ fuse_reply_err(req, ENOTTY); ++ return; ++ } + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && +- !(flags & FUSE_IOCTL_32BIT)) { +- req->ioctl_64bit = 1; +- } ++ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && ++ !(flags & FUSE_IOCTL_32BIT)) { ++ req->ioctl_64bit = 1; ++ } + +- if (req->se->op.ioctl) +- req->se->op.ioctl(req, nodeid, arg->cmd, +- (void *)(uintptr_t)arg->arg, &fi, flags, +- in_buf, arg->in_size, arg->out_size); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.ioctl) { ++ req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg, ++ &fi, flags, in_buf, arg->in_size, arg->out_size); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) + { +- free(ph); ++ free(ph); + } + + static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fi.poll_events = arg->events; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fi.poll_events = arg->events; + +- if (req->se->op.poll) { +- struct fuse_pollhandle *ph = NULL; ++ if (req->se->op.poll) { ++ struct fuse_pollhandle *ph = NULL; + +- if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { +- ph = malloc(sizeof(struct fuse_pollhandle)); +- if (ph == NULL) { +- fuse_reply_err(req, ENOMEM); +- return; +- } +- ph->kh = arg->kh; +- ph->se = req->se; +- } ++ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { ++ ph = malloc(sizeof(struct fuse_pollhandle)); ++ if (ph == NULL) { ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ph->kh = arg->kh; ++ ph->se = req->se; ++ } + +- req->se->op.poll(req, nodeid, &fi, ph); +- } else { +- fuse_reply_err(req, ENOSYS); +- } ++ req->se->op.poll(req, nodeid, &fi, ph); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.fallocate) +- req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.fallocate) { ++ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, ++ &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + +-static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) ++static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, ++ const void *inarg) + { +- struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; +- struct fuse_file_info fi_in, fi_out; ++ struct fuse_copy_file_range_in *arg = ++ (struct fuse_copy_file_range_in *)inarg; ++ struct fuse_file_info fi_in, fi_out; + +- memset(&fi_in, 0, sizeof(fi_in)); +- fi_in.fh = arg->fh_in; ++ memset(&fi_in, 0, sizeof(fi_in)); ++ fi_in.fh = arg->fh_in; + +- memset(&fi_out, 0, sizeof(fi_out)); +- fi_out.fh = arg->fh_out; ++ memset(&fi_out, 0, sizeof(fi_out)); ++ fi_out.fh = arg->fh_out; + + +- if (req->se->op.copy_file_range) +- req->se->op.copy_file_range(req, nodeid_in, arg->off_in, +- &fi_in, arg->nodeid_out, +- arg->off_out, &fi_out, arg->len, +- arg->flags); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.copy_file_range) { ++ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in, ++ arg->nodeid_out, arg->off_out, &fi_out, ++ arg->len, arg->flags); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; +- struct fuse_file_info fi; ++ struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; ++ struct fuse_file_info fi; + +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; + +- if (req->se->op.lseek) +- req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); +- else +- fuse_reply_err(req, ENOSYS); ++ if (req->se->op.lseek) { ++ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); ++ } else { ++ fuse_reply_err(req, ENOSYS); ++ } + } + + static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_init_in *arg = (struct fuse_init_in *) inarg; +- struct fuse_init_out outarg; +- struct fuse_session *se = req->se; +- size_t bufsize = se->bufsize; +- size_t outargsize = sizeof(outarg); +- +- (void) nodeid; +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); +- if (arg->major == 7 && arg->minor >= 6) { +- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); +- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", +- arg->max_readahead); +- } +- } +- se->conn.proto_major = arg->major; +- se->conn.proto_minor = arg->minor; +- se->conn.capable = 0; +- se->conn.want = 0; +- +- memset(&outarg, 0, sizeof(outarg)); +- outarg.major = FUSE_KERNEL_VERSION; +- outarg.minor = FUSE_KERNEL_MINOR_VERSION; +- +- if (arg->major < 7) { +- fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", +- arg->major, arg->minor); +- fuse_reply_err(req, EPROTO); +- return; +- } +- +- if (arg->major > 7) { +- /* Wait for a second INIT request with a 7.X version */ +- send_reply_ok(req, &outarg, sizeof(outarg)); +- return; +- } +- +- if (arg->minor >= 6) { +- if (arg->max_readahead < se->conn.max_readahead) +- se->conn.max_readahead = arg->max_readahead; +- if (arg->flags & FUSE_ASYNC_READ) +- se->conn.capable |= FUSE_CAP_ASYNC_READ; +- if (arg->flags & FUSE_POSIX_LOCKS) +- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; +- if (arg->flags & FUSE_ATOMIC_O_TRUNC) +- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; +- if (arg->flags & FUSE_EXPORT_SUPPORT) +- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; +- if (arg->flags & FUSE_DONT_MASK) +- se->conn.capable |= FUSE_CAP_DONT_MASK; +- if (arg->flags & FUSE_FLOCK_LOCKS) +- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; +- if (arg->flags & FUSE_AUTO_INVAL_DATA) +- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; +- if (arg->flags & FUSE_DO_READDIRPLUS) +- se->conn.capable |= FUSE_CAP_READDIRPLUS; +- if (arg->flags & FUSE_READDIRPLUS_AUTO) +- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; +- if (arg->flags & FUSE_ASYNC_DIO) +- se->conn.capable |= FUSE_CAP_ASYNC_DIO; +- if (arg->flags & FUSE_WRITEBACK_CACHE) +- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; +- if (arg->flags & FUSE_NO_OPEN_SUPPORT) +- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; +- if (arg->flags & FUSE_PARALLEL_DIROPS) +- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; +- if (arg->flags & FUSE_POSIX_ACL) +- se->conn.capable |= FUSE_CAP_POSIX_ACL; +- if (arg->flags & FUSE_HANDLE_KILLPRIV) +- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; +- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) +- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; +- if (!(arg->flags & FUSE_MAX_PAGES)) { +- size_t max_bufsize = +- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() +- + FUSE_BUFFER_HEADER_SIZE; +- if (bufsize > max_bufsize) { +- bufsize = max_bufsize; +- } +- } +- } else { +- se->conn.max_readahead = 0; +- } +- +- if (se->conn.proto_minor >= 14) { ++ struct fuse_init_in *arg = (struct fuse_init_in *)inarg; ++ struct fuse_init_out outarg; ++ struct fuse_session *se = req->se; ++ size_t bufsize = se->bufsize; ++ size_t outargsize = sizeof(outarg); ++ ++ (void)nodeid; ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); ++ if (arg->major == 7 && arg->minor >= 6) { ++ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); ++ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", ++ arg->max_readahead); ++ } ++ } ++ se->conn.proto_major = arg->major; ++ se->conn.proto_minor = arg->minor; ++ se->conn.capable = 0; ++ se->conn.want = 0; ++ ++ memset(&outarg, 0, sizeof(outarg)); ++ outarg.major = FUSE_KERNEL_VERSION; ++ outarg.minor = FUSE_KERNEL_MINOR_VERSION; ++ ++ if (arg->major < 7) { ++ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", ++ arg->major, arg->minor); ++ fuse_reply_err(req, EPROTO); ++ return; ++ } ++ ++ if (arg->major > 7) { ++ /* Wait for a second INIT request with a 7.X version */ ++ send_reply_ok(req, &outarg, sizeof(outarg)); ++ return; ++ } ++ ++ if (arg->minor >= 6) { ++ if (arg->max_readahead < se->conn.max_readahead) { ++ se->conn.max_readahead = arg->max_readahead; ++ } ++ if (arg->flags & FUSE_ASYNC_READ) { ++ se->conn.capable |= FUSE_CAP_ASYNC_READ; ++ } ++ if (arg->flags & FUSE_POSIX_LOCKS) { ++ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; ++ } ++ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { ++ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; ++ } ++ if (arg->flags & FUSE_EXPORT_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; ++ } ++ if (arg->flags & FUSE_DONT_MASK) { ++ se->conn.capable |= FUSE_CAP_DONT_MASK; ++ } ++ if (arg->flags & FUSE_FLOCK_LOCKS) { ++ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; ++ } ++ if (arg->flags & FUSE_AUTO_INVAL_DATA) { ++ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; ++ } ++ if (arg->flags & FUSE_DO_READDIRPLUS) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS; ++ } ++ if (arg->flags & FUSE_READDIRPLUS_AUTO) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; ++ } ++ if (arg->flags & FUSE_ASYNC_DIO) { ++ se->conn.capable |= FUSE_CAP_ASYNC_DIO; ++ } ++ if (arg->flags & FUSE_WRITEBACK_CACHE) { ++ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; ++ } ++ if (arg->flags & FUSE_PARALLEL_DIROPS) { ++ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; ++ } ++ if (arg->flags & FUSE_POSIX_ACL) { ++ se->conn.capable |= FUSE_CAP_POSIX_ACL; ++ } ++ if (arg->flags & FUSE_HANDLE_KILLPRIV) { ++ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; ++ } ++ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; ++ } ++ if (!(arg->flags & FUSE_MAX_PAGES)) { ++ size_t max_bufsize = ++ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + ++ FUSE_BUFFER_HEADER_SIZE; ++ if (bufsize > max_bufsize) { ++ bufsize = max_bufsize; ++ } ++ } ++ } else { ++ se->conn.max_readahead = 0; ++ } ++ ++ if (se->conn.proto_minor >= 14) { + #ifdef HAVE_SPLICE + #ifdef HAVE_VMSPLICE +- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; ++ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; + #endif +- se->conn.capable |= FUSE_CAP_SPLICE_READ; ++ se->conn.capable |= FUSE_CAP_SPLICE_READ; + #endif +- } +- if (se->conn.proto_minor >= 18) +- se->conn.capable |= FUSE_CAP_IOCTL_DIR; +- +- /* Default settings for modern filesystems. +- * +- * Most of these capabilities were disabled by default in +- * libfuse2 for backwards compatibility reasons. In libfuse3, +- * we can finally enable them by default (as long as they're +- * supported by the kernel). +- */ +-#define LL_SET_DEFAULT(cond, cap) \ +- if ((cond) && (se->conn.capable & (cap))) \ +- se->conn.want |= (cap) +- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); +- LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); +- LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); +- LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); +- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); +- LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); +- LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); +- LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); +- LL_SET_DEFAULT(se->op.getlk && se->op.setlk, +- FUSE_CAP_POSIX_LOCKS); +- LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); +- LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); +- LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, +- FUSE_CAP_READDIRPLUS_AUTO); +- se->conn.time_gran = 1; +- +- if (bufsize < FUSE_MIN_READ_BUFFER) { +- fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", +- bufsize); +- bufsize = FUSE_MIN_READ_BUFFER; +- } +- se->bufsize = bufsize; +- +- if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) +- se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; +- +- se->got_init = 1; +- if (se->op.init) +- se->op.init(se->userdata, &se->conn); +- +- if (se->conn.want & (~se->conn.capable)) { +- fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " +- "0x%x that are not supported by kernel, aborting.\n", +- se->conn.want & (~se->conn.capable)); +- fuse_reply_err(req, EPROTO); +- se->error = -EPROTO; +- fuse_session_exit(se); +- return; +- } +- +- if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { +- se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; +- } +- if (arg->flags & FUSE_MAX_PAGES) { +- outarg.flags |= FUSE_MAX_PAGES; +- outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; +- } +- +- /* Always enable big writes, this is superseded +- by the max_write option */ +- outarg.flags |= FUSE_BIG_WRITES; +- +- if (se->conn.want & FUSE_CAP_ASYNC_READ) +- outarg.flags |= FUSE_ASYNC_READ; +- if (se->conn.want & FUSE_CAP_POSIX_LOCKS) +- outarg.flags |= FUSE_POSIX_LOCKS; +- if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) +- outarg.flags |= FUSE_ATOMIC_O_TRUNC; +- if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) +- outarg.flags |= FUSE_EXPORT_SUPPORT; +- if (se->conn.want & FUSE_CAP_DONT_MASK) +- outarg.flags |= FUSE_DONT_MASK; +- if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) +- outarg.flags |= FUSE_FLOCK_LOCKS; +- if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) +- outarg.flags |= FUSE_AUTO_INVAL_DATA; +- if (se->conn.want & FUSE_CAP_READDIRPLUS) +- outarg.flags |= FUSE_DO_READDIRPLUS; +- if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) +- outarg.flags |= FUSE_READDIRPLUS_AUTO; +- if (se->conn.want & FUSE_CAP_ASYNC_DIO) +- outarg.flags |= FUSE_ASYNC_DIO; +- if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) +- outarg.flags |= FUSE_WRITEBACK_CACHE; +- if (se->conn.want & FUSE_CAP_POSIX_ACL) +- outarg.flags |= FUSE_POSIX_ACL; +- outarg.max_readahead = se->conn.max_readahead; +- outarg.max_write = se->conn.max_write; +- if (se->conn.proto_minor >= 13) { +- if (se->conn.max_background >= (1 << 16)) +- se->conn.max_background = (1 << 16) - 1; +- if (se->conn.congestion_threshold > se->conn.max_background) +- se->conn.congestion_threshold = se->conn.max_background; +- if (!se->conn.congestion_threshold) { +- se->conn.congestion_threshold = +- se->conn.max_background * 3 / 4; +- } +- +- outarg.max_background = se->conn.max_background; +- outarg.congestion_threshold = se->conn.congestion_threshold; +- } +- if (se->conn.proto_minor >= 23) +- outarg.time_gran = se->conn.time_gran; +- +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); +- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); +- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", +- outarg.max_readahead); +- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); +- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", +- outarg.max_background); +- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", +- outarg.congestion_threshold); +- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", +- outarg.time_gran); +- } +- if (arg->minor < 5) +- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; +- else if (arg->minor < 23) +- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; +- +- send_reply_ok(req, &outarg, outargsize); ++ } ++ if (se->conn.proto_minor >= 18) { ++ se->conn.capable |= FUSE_CAP_IOCTL_DIR; ++ } ++ ++ /* ++ * Default settings for modern filesystems. ++ * ++ * Most of these capabilities were disabled by default in ++ * libfuse2 for backwards compatibility reasons. In libfuse3, ++ * we can finally enable them by default (as long as they're ++ * supported by the kernel). ++ */ ++#define LL_SET_DEFAULT(cond, cap) \ ++ if ((cond) && (se->conn.capable & (cap))) \ ++ se->conn.want |= (cap) ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); ++ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); ++ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); ++ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); ++ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); ++ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); ++ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); ++ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS); ++ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); ++ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, ++ FUSE_CAP_READDIRPLUS_AUTO); ++ se->conn.time_gran = 1; ++ ++ if (bufsize < FUSE_MIN_READ_BUFFER) { ++ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", ++ bufsize); ++ bufsize = FUSE_MIN_READ_BUFFER; ++ } ++ se->bufsize = bufsize; ++ ++ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) { ++ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; ++ } ++ ++ se->got_init = 1; ++ if (se->op.init) { ++ se->op.init(se->userdata, &se->conn); ++ } ++ ++ if (se->conn.want & (~se->conn.capable)) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: error: filesystem requested capabilities " ++ "0x%x that are not supported by kernel, aborting.\n", ++ se->conn.want & (~se->conn.capable)); ++ fuse_reply_err(req, EPROTO); ++ se->error = -EPROTO; ++ fuse_session_exit(se); ++ return; ++ } ++ ++ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { ++ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; ++ } ++ if (arg->flags & FUSE_MAX_PAGES) { ++ outarg.flags |= FUSE_MAX_PAGES; ++ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; ++ } ++ ++ /* ++ * Always enable big writes, this is superseded ++ * by the max_write option ++ */ ++ outarg.flags |= FUSE_BIG_WRITES; ++ ++ if (se->conn.want & FUSE_CAP_ASYNC_READ) { ++ outarg.flags |= FUSE_ASYNC_READ; ++ } ++ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { ++ outarg.flags |= FUSE_POSIX_LOCKS; ++ } ++ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) { ++ outarg.flags |= FUSE_ATOMIC_O_TRUNC; ++ } ++ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) { ++ outarg.flags |= FUSE_EXPORT_SUPPORT; ++ } ++ if (se->conn.want & FUSE_CAP_DONT_MASK) { ++ outarg.flags |= FUSE_DONT_MASK; ++ } ++ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) { ++ outarg.flags |= FUSE_FLOCK_LOCKS; ++ } ++ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) { ++ outarg.flags |= FUSE_AUTO_INVAL_DATA; ++ } ++ if (se->conn.want & FUSE_CAP_READDIRPLUS) { ++ outarg.flags |= FUSE_DO_READDIRPLUS; ++ } ++ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) { ++ outarg.flags |= FUSE_READDIRPLUS_AUTO; ++ } ++ if (se->conn.want & FUSE_CAP_ASYNC_DIO) { ++ outarg.flags |= FUSE_ASYNC_DIO; ++ } ++ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) { ++ outarg.flags |= FUSE_WRITEBACK_CACHE; ++ } ++ if (se->conn.want & FUSE_CAP_POSIX_ACL) { ++ outarg.flags |= FUSE_POSIX_ACL; ++ } ++ outarg.max_readahead = se->conn.max_readahead; ++ outarg.max_write = se->conn.max_write; ++ if (se->conn.proto_minor >= 13) { ++ if (se->conn.max_background >= (1 << 16)) { ++ se->conn.max_background = (1 << 16) - 1; ++ } ++ if (se->conn.congestion_threshold > se->conn.max_background) { ++ se->conn.congestion_threshold = se->conn.max_background; ++ } ++ if (!se->conn.congestion_threshold) { ++ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; ++ } ++ ++ outarg.max_background = se->conn.max_background; ++ outarg.congestion_threshold = se->conn.congestion_threshold; ++ } ++ if (se->conn.proto_minor >= 23) { ++ outarg.time_gran = se->conn.time_gran; ++ } ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, ++ outarg.minor); ++ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); ++ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", ++ outarg.max_readahead); ++ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); ++ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", ++ outarg.max_background); ++ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", ++ outarg.congestion_threshold); ++ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); ++ } ++ if (arg->minor < 5) { ++ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; ++ } else if (arg->minor < 23) { ++ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; ++ } ++ ++ send_reply_ok(req, &outarg, outargsize); + } + + static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { +- struct fuse_session *se = req->se; ++ struct fuse_session *se = req->se; + +- (void) nodeid; +- (void) inarg; ++ (void)nodeid; ++ (void)inarg; + +- se->got_destroy = 1; +- if (se->op.destroy) +- se->op.destroy(se->userdata); ++ se->got_destroy = 1; ++ if (se->op.destroy) { ++ se->op.destroy(se->userdata); ++ } + +- send_reply_ok(req, NULL, 0); ++ send_reply_ok(req, NULL, 0); + } + + static void list_del_nreq(struct fuse_notify_req *nreq) + { +- struct fuse_notify_req *prev = nreq->prev; +- struct fuse_notify_req *next = nreq->next; +- prev->next = next; +- next->prev = prev; ++ struct fuse_notify_req *prev = nreq->prev; ++ struct fuse_notify_req *next = nreq->next; ++ prev->next = next; ++ next->prev = prev; + } + + static void list_add_nreq(struct fuse_notify_req *nreq, +- struct fuse_notify_req *next) ++ struct fuse_notify_req *next) + { +- struct fuse_notify_req *prev = next->prev; +- nreq->next = next; +- nreq->prev = prev; +- prev->next = nreq; +- next->prev = nreq; ++ struct fuse_notify_req *prev = next->prev; ++ nreq->next = next; ++ nreq->prev = prev; ++ prev->next = nreq; ++ next->prev = nreq; + } + + static void list_init_nreq(struct fuse_notify_req *nreq) + { +- nreq->next = nreq; +- nreq->prev = nreq; ++ nreq->next = nreq; ++ nreq->prev = nreq; + } + + static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, const struct fuse_buf *buf) ++ const void *inarg, const struct fuse_buf *buf) + { +- struct fuse_session *se = req->se; +- struct fuse_notify_req *nreq; +- struct fuse_notify_req *head; ++ struct fuse_session *se = req->se; ++ struct fuse_notify_req *nreq; ++ struct fuse_notify_req *head; + +- pthread_mutex_lock(&se->lock); +- head = &se->notify_list; +- for (nreq = head->next; nreq != head; nreq = nreq->next) { +- if (nreq->unique == req->unique) { +- list_del_nreq(nreq); +- break; +- } +- } +- pthread_mutex_unlock(&se->lock); ++ pthread_mutex_lock(&se->lock); ++ head = &se->notify_list; ++ for (nreq = head->next; nreq != head; nreq = nreq->next) { ++ if (nreq->unique == req->unique) { ++ list_del_nreq(nreq); ++ break; ++ } ++ } ++ pthread_mutex_unlock(&se->lock); + +- if (nreq != head) +- nreq->reply(nreq, req, nodeid, inarg, buf); ++ if (nreq != head) { ++ nreq->reply(nreq, req, nodeid, inarg, buf); ++ } + } + + static int send_notify_iov(struct fuse_session *se, int notify_code, +- struct iovec *iov, int count) ++ struct iovec *iov, int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out; + +- if (!se->got_init) +- return -ENOTCONN; ++ if (!se->got_init) { ++ return -ENOTCONN; ++ } + +- out.unique = 0; +- out.error = notify_code; +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(struct fuse_out_header); ++ out.unique = 0; ++ out.error = notify_code; ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(struct fuse_out_header); + +- return fuse_send_msg(se, NULL, iov, count); ++ return fuse_send_msg(se, NULL, iov, count); + } + + int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) + { +- if (ph != NULL) { +- struct fuse_notify_poll_wakeup_out outarg; +- struct iovec iov[2]; ++ if (ph != NULL) { ++ struct fuse_notify_poll_wakeup_out outarg; ++ struct iovec iov[2]; + +- outarg.kh = ph->kh; ++ outarg.kh = ph->kh; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); +- } else { +- return 0; +- } ++ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); ++ } else { ++ return 0; ++ } + } + + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, +- off_t off, off_t len) ++ off_t off, off_t len) + { +- struct fuse_notify_inval_inode_out outarg; +- struct iovec iov[2]; ++ struct fuse_notify_inval_inode_out outarg; ++ struct iovec iov[2]; ++ ++ if (!se) { ++ return -EINVAL; ++ } + +- if (!se) +- return -EINVAL; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { ++ return -ENOSYS; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) +- return -ENOSYS; +- +- outarg.ino = ino; +- outarg.off = off; +- outarg.len = len; ++ outarg.ino = ino; ++ outarg.off = off; ++ outarg.len = len; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); + } + + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, +- const char *name, size_t namelen) ++ const char *name, size_t namelen) + { +- struct fuse_notify_inval_entry_out outarg; +- struct iovec iov[3]; ++ struct fuse_notify_inval_entry_out outarg; ++ struct iovec iov[3]; ++ ++ if (!se) { ++ return -EINVAL; ++ } + +- if (!se) +- return -EINVAL; +- +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { ++ return -ENOSYS; ++ } + +- outarg.parent = parent; +- outarg.namelen = namelen; +- outarg.padding = 0; ++ outarg.parent = parent; ++ outarg.namelen = namelen; ++ outarg.padding = 0; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); +- iov[2].iov_base = (void *)name; +- iov[2].iov_len = namelen + 1; ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; + +- return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); ++ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); + } + +-int fuse_lowlevel_notify_delete(struct fuse_session *se, +- fuse_ino_t parent, fuse_ino_t child, +- const char *name, size_t namelen) ++int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, ++ fuse_ino_t child, const char *name, ++ size_t namelen) + { +- struct fuse_notify_delete_out outarg; +- struct iovec iov[3]; ++ struct fuse_notify_delete_out outarg; ++ struct iovec iov[3]; + +- if (!se) +- return -EINVAL; ++ if (!se) { ++ return -EINVAL; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { ++ return -ENOSYS; ++ } + +- outarg.parent = parent; +- outarg.child = child; +- outarg.namelen = namelen; +- outarg.padding = 0; ++ outarg.parent = parent; ++ outarg.child = child; ++ outarg.namelen = namelen; ++ outarg.padding = 0; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); +- iov[2].iov_base = (void *)name; +- iov[2].iov_len = namelen + 1; ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); ++ iov[2].iov_base = (void *)name; ++ iov[2].iov_len = namelen + 1; + +- return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); ++ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); + } + + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags) + { +- struct fuse_out_header out; +- struct fuse_notify_store_out outarg; +- struct iovec iov[3]; +- size_t size = fuse_buf_size(bufv); +- int res; ++ struct fuse_out_header out; ++ struct fuse_notify_store_out outarg; ++ struct iovec iov[3]; ++ size_t size = fuse_buf_size(bufv); ++ int res; + +- if (!se) +- return -EINVAL; ++ if (!se) { ++ return -EINVAL; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { ++ return -ENOSYS; ++ } + +- out.unique = 0; +- out.error = FUSE_NOTIFY_STORE; ++ out.unique = 0; ++ out.error = FUSE_NOTIFY_STORE; + +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; + +- iov[0].iov_base = &out; +- iov[0].iov_len = sizeof(out); +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[0].iov_base = &out; ++ iov[0].iov_len = sizeof(out); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); +- if (res > 0) +- res = -res; ++ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); ++ if (res > 0) { ++ res = -res; ++ } + +- return res; ++ return res; + } + + struct fuse_retrieve_req { +- struct fuse_notify_req nreq; +- void *cookie; ++ struct fuse_notify_req nreq; ++ void *cookie; + }; + +-static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, +- fuse_req_t req, fuse_ino_t ino, +- const void *inarg, +- const struct fuse_buf *ibuf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_retrieve_req *rreq = +- container_of(nreq, struct fuse_retrieve_req, nreq); +- const struct fuse_notify_retrieve_in *arg = inarg; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, +- .count = 1, +- }; +- +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) +- bufv.buf[0].mem = PARAM(arg); +- +- bufv.buf[0].size -= sizeof(struct fuse_in_header) + +- sizeof(struct fuse_notify_retrieve_in); +- +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); +- fuse_reply_none(req); +- goto out; +- } +- bufv.buf[0].size = arg->size; +- +- if (se->op.retrieve_reply) { +- se->op.retrieve_reply(req, rreq->cookie, ino, +- arg->offset, &bufv); +- } else { +- fuse_reply_none(req); +- } ++static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, ++ fuse_ino_t ino, const void *inarg, ++ const struct fuse_buf *ibuf) ++{ ++ struct fuse_session *se = req->se; ++ struct fuse_retrieve_req *rreq = ++ container_of(nreq, struct fuse_retrieve_req, nreq); ++ const struct fuse_notify_retrieve_in *arg = inarg; ++ struct fuse_bufvec bufv = { ++ .buf[0] = *ibuf, ++ .count = 1, ++ }; ++ ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ bufv.buf[0].mem = PARAM(arg); ++ } ++ ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); ++ ++ if (bufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); ++ fuse_reply_none(req); ++ goto out; ++ } ++ bufv.buf[0].size = arg->size; ++ ++ if (se->op.retrieve_reply) { ++ se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); ++ } else { ++ fuse_reply_none(req); ++ } + out: +- free(rreq); ++ free(rreq); + } + + int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie) ++ size_t size, off_t offset, void *cookie) + { +- struct fuse_notify_retrieve_out outarg; +- struct iovec iov[2]; +- struct fuse_retrieve_req *rreq; +- int err; ++ struct fuse_notify_retrieve_out outarg; ++ struct iovec iov[2]; ++ struct fuse_retrieve_req *rreq; ++ int err; + +- if (!se) +- return -EINVAL; ++ if (!se) { ++ return -EINVAL; ++ } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) +- return -ENOSYS; ++ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { ++ return -ENOSYS; ++ } + +- rreq = malloc(sizeof(*rreq)); +- if (rreq == NULL) +- return -ENOMEM; ++ rreq = malloc(sizeof(*rreq)); ++ if (rreq == NULL) { ++ return -ENOMEM; ++ } + +- pthread_mutex_lock(&se->lock); +- rreq->cookie = cookie; +- rreq->nreq.unique = se->notify_ctr++; +- rreq->nreq.reply = fuse_ll_retrieve_reply; +- list_add_nreq(&rreq->nreq, &se->notify_list); +- pthread_mutex_unlock(&se->lock); ++ pthread_mutex_lock(&se->lock); ++ rreq->cookie = cookie; ++ rreq->nreq.unique = se->notify_ctr++; ++ rreq->nreq.reply = fuse_ll_retrieve_reply; ++ list_add_nreq(&rreq->nreq, &se->notify_list); ++ pthread_mutex_unlock(&se->lock); + +- outarg.notify_unique = rreq->nreq.unique; +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; ++ outarg.notify_unique = rreq->nreq.unique; ++ outarg.nodeid = ino; ++ outarg.offset = offset; ++ outarg.size = size; ++ outarg.padding = 0; + +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); ++ iov[1].iov_base = &outarg; ++ iov[1].iov_len = sizeof(outarg); + +- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); +- if (err) { +- pthread_mutex_lock(&se->lock); +- list_del_nreq(&rreq->nreq); +- pthread_mutex_unlock(&se->lock); +- free(rreq); +- } ++ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); ++ if (err) { ++ pthread_mutex_lock(&se->lock); ++ list_del_nreq(&rreq->nreq); ++ pthread_mutex_unlock(&se->lock); ++ free(rreq); ++ } + +- return err; ++ return err; + } + + void *fuse_req_userdata(fuse_req_t req) + { +- return req->se->userdata; ++ return req->se->userdata; + } + + const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) + { +- return &req->ctx; ++ return &req->ctx; + } + + void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, +- void *data) ++ void *data) + { +- pthread_mutex_lock(&req->lock); +- pthread_mutex_lock(&req->se->lock); +- req->u.ni.func = func; +- req->u.ni.data = data; +- pthread_mutex_unlock(&req->se->lock); +- if (req->interrupted && func) +- func(req, data); +- pthread_mutex_unlock(&req->lock); ++ pthread_mutex_lock(&req->lock); ++ pthread_mutex_lock(&req->se->lock); ++ req->u.ni.func = func; ++ req->u.ni.data = data; ++ pthread_mutex_unlock(&req->se->lock); ++ if (req->interrupted && func) { ++ func(req, data); ++ } ++ pthread_mutex_unlock(&req->lock); + } + + int fuse_req_interrupted(fuse_req_t req) + { +- int interrupted; ++ int interrupted; + +- pthread_mutex_lock(&req->se->lock); +- interrupted = req->interrupted; +- pthread_mutex_unlock(&req->se->lock); ++ pthread_mutex_lock(&req->se->lock); ++ interrupted = req->interrupted; ++ pthread_mutex_unlock(&req->se->lock); + +- return interrupted; ++ return interrupted; + } + + static struct { +- void (*func)(fuse_req_t, fuse_ino_t, const void *); +- const char *name; ++ void (*func)(fuse_req_t, fuse_ino_t, const void *); ++ const char *name; + } fuse_ll_ops[] = { +- [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, +- [FUSE_FORGET] = { do_forget, "FORGET" }, +- [FUSE_GETATTR] = { do_getattr, "GETATTR" }, +- [FUSE_SETATTR] = { do_setattr, "SETATTR" }, +- [FUSE_READLINK] = { do_readlink, "READLINK" }, +- [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, +- [FUSE_MKNOD] = { do_mknod, "MKNOD" }, +- [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, +- [FUSE_UNLINK] = { do_unlink, "UNLINK" }, +- [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, +- [FUSE_RENAME] = { do_rename, "RENAME" }, +- [FUSE_LINK] = { do_link, "LINK" }, +- [FUSE_OPEN] = { do_open, "OPEN" }, +- [FUSE_READ] = { do_read, "READ" }, +- [FUSE_WRITE] = { do_write, "WRITE" }, +- [FUSE_STATFS] = { do_statfs, "STATFS" }, +- [FUSE_RELEASE] = { do_release, "RELEASE" }, +- [FUSE_FSYNC] = { do_fsync, "FSYNC" }, +- [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, +- [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, +- [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, +- [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, +- [FUSE_FLUSH] = { do_flush, "FLUSH" }, +- [FUSE_INIT] = { do_init, "INIT" }, +- [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, +- [FUSE_READDIR] = { do_readdir, "READDIR" }, +- [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, +- [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, +- [FUSE_GETLK] = { do_getlk, "GETLK" }, +- [FUSE_SETLK] = { do_setlk, "SETLK" }, +- [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, +- [FUSE_ACCESS] = { do_access, "ACCESS" }, +- [FUSE_CREATE] = { do_create, "CREATE" }, +- [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, +- [FUSE_BMAP] = { do_bmap, "BMAP" }, +- [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, +- [FUSE_POLL] = { do_poll, "POLL" }, +- [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, +- [FUSE_DESTROY] = { do_destroy, "DESTROY" }, +- [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, +- [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, +- [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, +- [FUSE_RENAME2] = { do_rename2, "RENAME2" }, +- [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, +- [FUSE_LSEEK] = { do_lseek, "LSEEK" }, ++ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, ++ [FUSE_FORGET] = { do_forget, "FORGET" }, ++ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, ++ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, ++ [FUSE_READLINK] = { do_readlink, "READLINK" }, ++ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, ++ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, ++ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, ++ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, ++ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, ++ [FUSE_RENAME] = { do_rename, "RENAME" }, ++ [FUSE_LINK] = { do_link, "LINK" }, ++ [FUSE_OPEN] = { do_open, "OPEN" }, ++ [FUSE_READ] = { do_read, "READ" }, ++ [FUSE_WRITE] = { do_write, "WRITE" }, ++ [FUSE_STATFS] = { do_statfs, "STATFS" }, ++ [FUSE_RELEASE] = { do_release, "RELEASE" }, ++ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, ++ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, ++ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, ++ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, ++ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, ++ [FUSE_FLUSH] = { do_flush, "FLUSH" }, ++ [FUSE_INIT] = { do_init, "INIT" }, ++ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, ++ [FUSE_READDIR] = { do_readdir, "READDIR" }, ++ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, ++ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, ++ [FUSE_GETLK] = { do_getlk, "GETLK" }, ++ [FUSE_SETLK] = { do_setlk, "SETLK" }, ++ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, ++ [FUSE_ACCESS] = { do_access, "ACCESS" }, ++ [FUSE_CREATE] = { do_create, "CREATE" }, ++ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, ++ [FUSE_BMAP] = { do_bmap, "BMAP" }, ++ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, ++ [FUSE_POLL] = { do_poll, "POLL" }, ++ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, ++ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, ++ [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, ++ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, ++ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, ++ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, ++ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, ++ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, + }; + + #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) + + static const char *opname(enum fuse_opcode opcode) + { +- if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) +- return "???"; +- else +- return fuse_ll_ops[opcode].name; ++ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) { ++ return "???"; ++ } else { ++ return fuse_ll_ops[opcode].name; ++ } + } + + void fuse_session_process_buf(struct fuse_session *se, +- const struct fuse_buf *buf) ++ const struct fuse_buf *buf) + { +- fuse_session_process_buf_int(se, buf, NULL); ++ fuse_session_process_buf_int(se, buf, NULL); + } + + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, struct fuse_chan *ch) +-{ +- struct fuse_in_header *in; +- const void *inarg; +- struct fuse_req *req; +- int err; +- +- in = buf->mem; +- +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, +- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", +- (unsigned long long) in->unique, +- opname((enum fuse_opcode) in->opcode), in->opcode, +- (unsigned long long) in->nodeid, buf->size, in->pid); +- } +- +- req = fuse_ll_alloc_req(se); +- if (req == NULL) { +- struct fuse_out_header out = { +- .unique = in->unique, +- .error = -ENOMEM, +- }; +- struct iovec iov = { +- .iov_base = &out, +- .iov_len = sizeof(struct fuse_out_header), +- }; +- +- fuse_send_msg(se, ch, &iov, 1); +- return; +- } +- +- req->unique = in->unique; +- req->ctx.uid = in->uid; +- req->ctx.gid = in->gid; +- req->ctx.pid = in->pid; +- req->ch = ch; +- +- err = EIO; +- if (!se->got_init) { +- enum fuse_opcode expected; +- +- expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; +- if (in->opcode != expected) +- goto reply_err; +- } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) +- goto reply_err; +- +- err = EACCES; +- /* Implement -o allow_root */ +- if (se->deny_others && in->uid != se->owner && in->uid != 0 && +- in->opcode != FUSE_INIT && in->opcode != FUSE_READ && +- in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && +- in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && +- in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && +- in->opcode != FUSE_NOTIFY_REPLY && +- in->opcode != FUSE_READDIRPLUS) +- goto reply_err; +- +- err = ENOSYS; +- if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) +- goto reply_err; +- if (in->opcode != FUSE_INTERRUPT) { +- struct fuse_req *intr; +- pthread_mutex_lock(&se->lock); +- intr = check_interrupt(se, req); +- list_add_req(req, &se->list); +- pthread_mutex_unlock(&se->lock); +- if (intr) +- fuse_reply_err(intr, EAGAIN); +- } +- +- inarg = (void *) &in[1]; +- if (in->opcode == FUSE_WRITE && se->op.write_buf) +- do_write_buf(req, in->nodeid, inarg, buf); +- else if (in->opcode == FUSE_NOTIFY_REPLY) +- do_notify_reply(req, in->nodeid, inarg, buf); +- else +- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); +- +- return; ++ const struct fuse_buf *buf, ++ struct fuse_chan *ch) ++{ ++ struct fuse_in_header *in; ++ const void *inarg; ++ struct fuse_req *req; ++ int err; ++ ++ in = buf->mem; ++ ++ if (se->debug) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " ++ "pid: %u\n", ++ (unsigned long long)in->unique, ++ opname((enum fuse_opcode)in->opcode), in->opcode, ++ (unsigned long long)in->nodeid, buf->size, in->pid); ++ } ++ ++ req = fuse_ll_alloc_req(se); ++ if (req == NULL) { ++ struct fuse_out_header out = { ++ .unique = in->unique, ++ .error = -ENOMEM, ++ }; ++ struct iovec iov = { ++ .iov_base = &out, ++ .iov_len = sizeof(struct fuse_out_header), ++ }; ++ ++ fuse_send_msg(se, ch, &iov, 1); ++ return; ++ } ++ ++ req->unique = in->unique; ++ req->ctx.uid = in->uid; ++ req->ctx.gid = in->gid; ++ req->ctx.pid = in->pid; ++ req->ch = ch; ++ ++ err = EIO; ++ if (!se->got_init) { ++ enum fuse_opcode expected; ++ ++ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; ++ if (in->opcode != expected) { ++ goto reply_err; ++ } ++ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { ++ goto reply_err; ++ } ++ ++ err = EACCES; ++ /* Implement -o allow_root */ ++ if (se->deny_others && in->uid != se->owner && in->uid != 0 && ++ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && ++ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && ++ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && ++ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && ++ in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) { ++ goto reply_err; ++ } ++ ++ err = ENOSYS; ++ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) { ++ goto reply_err; ++ } ++ if (in->opcode != FUSE_INTERRUPT) { ++ struct fuse_req *intr; ++ pthread_mutex_lock(&se->lock); ++ intr = check_interrupt(se, req); ++ list_add_req(req, &se->list); ++ pthread_mutex_unlock(&se->lock); ++ if (intr) { ++ fuse_reply_err(intr, EAGAIN); ++ } ++ } ++ ++ inarg = (void *)&in[1]; ++ if (in->opcode == FUSE_WRITE && se->op.write_buf) { ++ do_write_buf(req, in->nodeid, inarg, buf); ++ } else if (in->opcode == FUSE_NOTIFY_REPLY) { ++ do_notify_reply(req, in->nodeid, inarg, buf); ++ } else { ++ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); ++ } ++ ++ return; + + reply_err: +- fuse_reply_err(req, err); ++ fuse_reply_err(req, err); + } + +-#define LL_OPTION(n,o,v) \ +- { n, offsetof(struct fuse_session, o), v } ++#define LL_OPTION(n, o, v) \ ++ { \ ++ n, offsetof(struct fuse_session, o), v \ ++ } + + static const struct fuse_opt fuse_ll_opts[] = { +- LL_OPTION("debug", debug, 1), +- LL_OPTION("-d", debug, 1), +- LL_OPTION("--debug", debug, 1), +- LL_OPTION("allow_root", deny_others, 1), +- FUSE_OPT_END ++ LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), ++ LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), ++ FUSE_OPT_END + }; + + void fuse_lowlevel_version(void) + { +- printf("using FUSE kernel interface version %i.%i\n", +- FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); ++ printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION, ++ FUSE_KERNEL_MINOR_VERSION); + } + + void fuse_lowlevel_help(void) + { +- /* These are not all options, but the ones that are +- potentially of interest to an end-user */ +- printf( +-" -o allow_root allow access by root\n" +-); ++ /* ++ * These are not all options, but the ones that are ++ * potentially of interest to an end-user ++ */ ++ printf(" -o allow_root allow access by root\n"); + } + + void fuse_session_destroy(struct fuse_session *se) + { +- if (se->got_init && !se->got_destroy) { +- if (se->op.destroy) +- se->op.destroy(se->userdata); +- } +- pthread_mutex_destroy(&se->lock); +- free(se->cuse_data); +- if (se->fd != -1) +- close(se->fd); +- free(se); ++ if (se->got_init && !se->got_destroy) { ++ if (se->op.destroy) { ++ se->op.destroy(se->userdata); ++ } ++ } ++ pthread_mutex_destroy(&se->lock); ++ free(se->cuse_data); ++ if (se->fd != -1) { ++ close(se->fd); ++ } ++ free(se); + } + + + struct fuse_session *fuse_session_new(struct fuse_args *args, +- const struct fuse_lowlevel_ops *op, +- size_t op_size, void *userdata) +-{ +- struct fuse_session *se; +- +- if (sizeof(struct fuse_lowlevel_ops) < op_size) { +- fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); +- op_size = sizeof(struct fuse_lowlevel_ops); +- } +- +- if (args->argc == 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); +- return NULL; +- } +- +- se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); +- if (se == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); +- goto out1; +- } +- se->fd = -1; +- se->conn.max_write = UINT_MAX; +- se->conn.max_readahead = UINT_MAX; +- +- /* Parse options */ +- if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) +- goto out2; +- if(args->argc == 1 && +- args->argv[0][0] == '-') { +- fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " +- "will be ignored\n"); +- } else if (args->argc != 1) { +- int i; +- fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); +- for(i = 1; i < args->argc-1; i++) +- fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); +- fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); +- goto out4; +- } +- +- se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + +- FUSE_BUFFER_HEADER_SIZE; +- +- list_init_req(&se->list); +- list_init_req(&se->interrupts); +- list_init_nreq(&se->notify_list); +- se->notify_ctr = 1; +- fuse_mutex_init(&se->lock); +- +- memcpy(&se->op, op, op_size); +- se->owner = getuid(); +- se->userdata = userdata; +- +- return se; ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata) ++{ ++ struct fuse_session *se; ++ ++ if (sizeof(struct fuse_lowlevel_ops) < op_size) { ++ fuse_log( ++ FUSE_LOG_ERR, ++ "fuse: warning: library too old, some operations may not work\n"); ++ op_size = sizeof(struct fuse_lowlevel_ops); ++ } ++ ++ if (args->argc == 0) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: empty argv passed to fuse_session_new().\n"); ++ return NULL; ++ } ++ ++ se = (struct fuse_session *)calloc(1, sizeof(struct fuse_session)); ++ if (se == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); ++ goto out1; ++ } ++ se->fd = -1; ++ se->conn.max_write = UINT_MAX; ++ se->conn.max_readahead = UINT_MAX; ++ ++ /* Parse options */ ++ if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) { ++ goto out2; ++ } ++ if (args->argc == 1 && args->argv[0][0] == '-') { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: warning: argv[0] looks like an option, but " ++ "will be ignored\n"); ++ } else if (args->argc != 1) { ++ int i; ++ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); ++ for (i = 1; i < args->argc - 1; i++) { ++ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); ++ } ++ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); ++ goto out4; ++ } ++ ++ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; ++ ++ list_init_req(&se->list); ++ list_init_req(&se->interrupts); ++ list_init_nreq(&se->notify_list); ++ se->notify_ctr = 1; ++ fuse_mutex_init(&se->lock); ++ ++ memcpy(&se->op, op, op_size); ++ se->owner = getuid(); ++ se->userdata = userdata; ++ ++ return se; + + out4: +- fuse_opt_free_args(args); ++ fuse_opt_free_args(args); + out2: +- free(se); ++ free(se); + out1: +- return NULL; ++ return NULL; + } + + int fuse_session_mount(struct fuse_session *se, const char *mountpoint) + { +- int fd; +- +- /* +- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos +- * would ensue. +- */ +- do { +- fd = open("/dev/null", O_RDWR); +- if (fd > 2) +- close(fd); +- } while (fd >= 0 && fd <= 2); +- +- /* +- * To allow FUSE daemons to run without privileges, the caller may open +- * /dev/fuse before launching the file system and pass on the file +- * descriptor by specifying /dev/fd/N as the mount point. Note that the +- * parent process takes care of performing the mount in this case. +- */ +- fd = fuse_mnt_parse_fuse_fd(mountpoint); +- if (fd != -1) { +- if (fcntl(fd, F_GETFD) == -1) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: Invalid file descriptor /dev/fd/%u\n", +- fd); +- return -1; +- } +- se->fd = fd; +- return 0; +- } +- +- /* Open channel */ +- fd = fuse_kern_mount(mountpoint, se->mo); +- if (fd == -1) +- return -1; +- se->fd = fd; +- +- /* Save mountpoint */ +- se->mountpoint = strdup(mountpoint); +- if (se->mountpoint == NULL) +- goto error_out; +- +- return 0; ++ int fd; ++ ++ /* ++ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos ++ * would ensue. ++ */ ++ do { ++ fd = open("/dev/null", O_RDWR); ++ if (fd > 2) { ++ close(fd); ++ } ++ } while (fd >= 0 && fd <= 2); ++ ++ /* ++ * To allow FUSE daemons to run without privileges, the caller may open ++ * /dev/fuse before launching the file system and pass on the file ++ * descriptor by specifying /dev/fd/N as the mount point. Note that the ++ * parent process takes care of performing the mount in this case. ++ */ ++ fd = fuse_mnt_parse_fuse_fd(mountpoint); ++ if (fd != -1) { ++ if (fcntl(fd, F_GETFD) == -1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", ++ fd); ++ return -1; ++ } ++ se->fd = fd; ++ return 0; ++ } ++ ++ /* Open channel */ ++ fd = fuse_kern_mount(mountpoint, se->mo); ++ if (fd == -1) { ++ return -1; ++ } ++ se->fd = fd; ++ ++ /* Save mountpoint */ ++ se->mountpoint = strdup(mountpoint); ++ if (se->mountpoint == NULL) { ++ goto error_out; ++ } ++ ++ return 0; + + error_out: +- fuse_kern_unmount(mountpoint, fd); +- return -1; ++ fuse_kern_unmount(mountpoint, fd); ++ return -1; + } + + int fuse_session_fd(struct fuse_session *se) + { +- return se->fd; ++ return se->fd; + } + + void fuse_session_unmount(struct fuse_session *se) +@@ -2384,61 +2519,66 @@ void fuse_session_unmount(struct fuse_session *se) + #ifdef linux + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) + { +- char *buf; +- size_t bufsize = 1024; +- char path[128]; +- int ret; +- int fd; +- unsigned long pid = req->ctx.pid; +- char *s; ++ char *buf; ++ size_t bufsize = 1024; ++ char path[128]; ++ int ret; ++ int fd; ++ unsigned long pid = req->ctx.pid; ++ char *s; + +- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); ++ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); + + retry: +- buf = malloc(bufsize); +- if (buf == NULL) +- return -ENOMEM; +- +- ret = -EIO; +- fd = open(path, O_RDONLY); +- if (fd == -1) +- goto out_free; +- +- ret = read(fd, buf, bufsize); +- close(fd); +- if (ret < 0) { +- ret = -EIO; +- goto out_free; +- } +- +- if ((size_t)ret == bufsize) { +- free(buf); +- bufsize *= 4; +- goto retry; +- } +- +- ret = -EIO; +- s = strstr(buf, "\nGroups:"); +- if (s == NULL) +- goto out_free; +- +- s += 8; +- ret = 0; +- while (1) { +- char *end; +- unsigned long val = strtoul(s, &end, 0); +- if (end == s) +- break; +- +- s = end; +- if (ret < size) +- list[ret] = val; +- ret++; +- } ++ buf = malloc(bufsize); ++ if (buf == NULL) { ++ return -ENOMEM; ++ } ++ ++ ret = -EIO; ++ fd = open(path, O_RDONLY); ++ if (fd == -1) { ++ goto out_free; ++ } ++ ++ ret = read(fd, buf, bufsize); ++ close(fd); ++ if (ret < 0) { ++ ret = -EIO; ++ goto out_free; ++ } ++ ++ if ((size_t)ret == bufsize) { ++ free(buf); ++ bufsize *= 4; ++ goto retry; ++ } ++ ++ ret = -EIO; ++ s = strstr(buf, "\nGroups:"); ++ if (s == NULL) { ++ goto out_free; ++ } ++ ++ s += 8; ++ ret = 0; ++ while (1) { ++ char *end; ++ unsigned long val = strtoul(s, &end, 0); ++ if (end == s) { ++ break; ++ } ++ ++ s = end; ++ if (ret < size) { ++ list[ret] = val; ++ } ++ ret++; ++ } + + out_free: +- free(buf); +- return ret; ++ free(buf); ++ return ret; + } + #else /* linux */ + /* +@@ -2446,23 +2586,25 @@ out_free: + */ + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) + { +- (void) req; (void) size; (void) list; +- return -ENOSYS; ++ (void)req; ++ (void)size; ++ (void)list; ++ return -ENOSYS; + } + #endif + + void fuse_session_exit(struct fuse_session *se) + { +- se->exited = 1; ++ se->exited = 1; + } + + void fuse_session_reset(struct fuse_session *se) + { +- se->exited = 0; +- se->error = 0; ++ se->exited = 0; ++ se->error = 0; + } + + int fuse_session_exited(struct fuse_session *se) + { +- return se->exited; ++ return se->exited; + } +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 6b1adfc..adb9054 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1,15 +1,16 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_LOWLEVEL_H_ + #define FUSE_LOWLEVEL_H_ + +-/** @file ++/** ++ * @file + * + * Low level API + * +@@ -24,16 +25,16 @@ + + #include "fuse_common.h" + +-#include + #include +-#include + #include + #include ++#include + #include ++#include + +-/* ----------------------------------------------------------- * +- * Miscellaneous definitions * +- * ----------------------------------------------------------- */ ++/* ++ * Miscellaneous definitions ++ */ + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -53,47 +54,54 @@ struct fuse_session; + + /** Directory entry parameters supplied to fuse_reply_entry() */ + struct fuse_entry_param { +- /** Unique inode number +- * +- * In lookup, zero means negative entry (from version 2.5) +- * Returning ENOENT also means negative entry, but by setting zero +- * ino the kernel may cache negative entries for entry_timeout +- * seconds. +- */ +- fuse_ino_t ino; +- +- /** Generation number for this entry. +- * +- * If the file system will be exported over NFS, the +- * ino/generation pairs need to be unique over the file +- * system's lifetime (rather than just the mount time). So if +- * the file system reuses an inode after it has been deleted, +- * it must assign a new, previously unused generation number +- * to the inode at the same time. +- * +- */ +- uint64_t generation; +- +- /** Inode attributes. +- * +- * Even if attr_timeout == 0, attr must be correct. For example, +- * for open(), FUSE uses attr.st_size from lookup() to determine +- * how many bytes to request. If this value is not correct, +- * incorrect data will be returned. +- */ +- struct stat attr; +- +- /** Validity timeout (in seconds) for inode attributes. If +- attributes only change as a result of requests that come +- through the kernel, this should be set to a very large +- value. */ +- double attr_timeout; +- +- /** Validity timeout (in seconds) for the name. If directory +- entries are changed/deleted only as a result of requests +- that come through the kernel, this should be set to a very +- large value. */ +- double entry_timeout; ++ /** ++ * Unique inode number ++ * ++ * In lookup, zero means negative entry (from version 2.5) ++ * Returning ENOENT also means negative entry, but by setting zero ++ * ino the kernel may cache negative entries for entry_timeout ++ * seconds. ++ */ ++ fuse_ino_t ino; ++ ++ /** ++ * Generation number for this entry. ++ * ++ * If the file system will be exported over NFS, the ++ * ino/generation pairs need to be unique over the file ++ * system's lifetime (rather than just the mount time). So if ++ * the file system reuses an inode after it has been deleted, ++ * it must assign a new, previously unused generation number ++ * to the inode at the same time. ++ * ++ */ ++ uint64_t generation; ++ ++ /** ++ * Inode attributes. ++ * ++ * Even if attr_timeout == 0, attr must be correct. For example, ++ * for open(), FUSE uses attr.st_size from lookup() to determine ++ * how many bytes to request. If this value is not correct, ++ * incorrect data will be returned. ++ */ ++ struct stat attr; ++ ++ /** ++ * Validity timeout (in seconds) for inode attributes. If ++ * attributes only change as a result of requests that come ++ * through the kernel, this should be set to a very large ++ * value. ++ */ ++ double attr_timeout; ++ ++ /** ++ * Validity timeout (in seconds) for the name. If directory ++ * entries are changed/deleted only as a result of requests ++ * that come through the kernel, this should be set to a very ++ * large value. ++ */ ++ double entry_timeout; + }; + + /** +@@ -105,38 +113,38 @@ struct fuse_entry_param { + * there is no valid uid/pid/gid that could be reported. + */ + struct fuse_ctx { +- /** User ID of the calling process */ +- uid_t uid; ++ /** User ID of the calling process */ ++ uid_t uid; + +- /** Group ID of the calling process */ +- gid_t gid; ++ /** Group ID of the calling process */ ++ gid_t gid; + +- /** Thread ID of the calling process */ +- pid_t pid; ++ /** Thread ID of the calling process */ ++ pid_t pid; + +- /** Umask of the calling process */ +- mode_t umask; ++ /** Umask of the calling process */ ++ mode_t umask; + }; + + struct fuse_forget_data { +- fuse_ino_t ino; +- uint64_t nlookup; ++ fuse_ino_t ino; ++ uint64_t nlookup; + }; + + /* 'to_set' flags in setattr */ +-#define FUSE_SET_ATTR_MODE (1 << 0) +-#define FUSE_SET_ATTR_UID (1 << 1) +-#define FUSE_SET_ATTR_GID (1 << 2) +-#define FUSE_SET_ATTR_SIZE (1 << 3) +-#define FUSE_SET_ATTR_ATIME (1 << 4) +-#define FUSE_SET_ATTR_MTIME (1 << 5) +-#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) +-#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) +-#define FUSE_SET_ATTR_CTIME (1 << 10) +- +-/* ----------------------------------------------------------- * +- * Request methods and replies * +- * ----------------------------------------------------------- */ ++#define FUSE_SET_ATTR_MODE (1 << 0) ++#define FUSE_SET_ATTR_UID (1 << 1) ++#define FUSE_SET_ATTR_GID (1 << 2) ++#define FUSE_SET_ATTR_SIZE (1 << 3) ++#define FUSE_SET_ATTR_ATIME (1 << 4) ++#define FUSE_SET_ATTR_MTIME (1 << 5) ++#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) ++#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) ++#define FUSE_SET_ATTR_CTIME (1 << 10) ++ ++/* ++ * Request methods and replies ++ */ + + /** + * Low level filesystem operations +@@ -166,1075 +174,1069 @@ struct fuse_forget_data { + * this file will not be called. + */ + struct fuse_lowlevel_ops { +- /** +- * Initialize filesystem +- * +- * This function is called when libfuse establishes +- * communication with the FUSE kernel module. The file system +- * should use this module to inspect and/or modify the +- * connection parameters provided in the `conn` structure. +- * +- * Note that some parameters may be overwritten by options +- * passed to fuse_session_new() which take precedence over the +- * values set in this handler. +- * +- * There's no reply to this function +- * +- * @param userdata the user data passed to fuse_session_new() +- */ +- void (*init) (void *userdata, struct fuse_conn_info *conn); +- +- /** +- * Clean up filesystem. +- * +- * Called on filesystem exit. When this method is called, the +- * connection to the kernel may be gone already, so that eg. calls +- * to fuse_lowlevel_notify_* will fail. +- * +- * There's no reply to this function +- * +- * @param userdata the user data passed to fuse_session_new() +- */ +- void (*destroy) (void *userdata); +- +- /** +- * Look up a directory entry by name and get its attributes. +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name the name to look up +- */ +- void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); +- +- /** +- * Forget about an inode +- * +- * This function is called when the kernel removes an inode +- * from its internal caches. +- * +- * The inode's lookup count increases by one for every call to +- * fuse_reply_entry and fuse_reply_create. The nlookup parameter +- * indicates by how much the lookup count should be decreased. +- * +- * Inodes with a non-zero lookup count may receive request from +- * the kernel even after calls to unlink, rmdir or (when +- * overwriting an existing file) rename. Filesystems must handle +- * such requests properly and it is recommended to defer removal +- * of the inode until the lookup count reaches zero. Calls to +- * unlink, rmdir or rename will be followed closely by forget +- * unless the file or directory is open, in which case the +- * kernel issues forget only after the release or releasedir +- * calls. +- * +- * Note that if a file system will be exported over NFS the +- * inodes lifetime must extend even beyond forget. See the +- * generation field in struct fuse_entry_param above. +- * +- * On unmount the lookup count for all inodes implicitly drops +- * to zero. It is not guaranteed that the file system will +- * receive corresponding forget messages for the affected +- * inodes. +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- * @param ino the inode number +- * @param nlookup the number of lookups to forget +- */ +- void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); +- +- /** +- * Get file attributes. +- * +- * If writeback caching is enabled, the kernel may have a +- * better idea of a file's length than the FUSE file system +- * (eg if there has been a write that extended the file size, +- * but that has not yet been passed to the filesystem.n +- * +- * In this case, the st_size value provided by the file system +- * will be ignored. +- * +- * Valid replies: +- * fuse_reply_attr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi for future use, currently always NULL +- */ +- void (*getattr) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Set file attributes +- * +- * In the 'attr' argument only members indicated by the 'to_set' +- * bitmask contain valid values. Other members contain undefined +- * values. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits if the file +- * size or owner is being changed. +- * +- * If the setattr was invoked from the ftruncate() system call +- * under Linux kernel versions 2.6.15 or later, the fi->fh will +- * contain the value set by the open method or will be undefined +- * if the open method didn't set any value. Otherwise (not +- * ftruncate call, or kernel version earlier than 2.6.15) the fi +- * parameter will be NULL. +- * +- * Valid replies: +- * fuse_reply_attr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param attr the attributes +- * @param to_set bit mask of attributes which should be set +- * @param fi file information, or NULL +- */ +- void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, +- int to_set, struct fuse_file_info *fi); +- +- /** +- * Read symbolic link +- * +- * Valid replies: +- * fuse_reply_readlink +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- */ +- void (*readlink) (fuse_req_t req, fuse_ino_t ino); +- +- /** +- * Create file node +- * +- * Create a regular file, character device, block device, fifo or +- * socket node. +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to create +- * @param mode file type and mode with which to create the new file +- * @param rdev the device number (only valid if created file is a device) +- */ +- void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode, dev_t rdev); +- +- /** +- * Create a directory +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to create +- * @param mode with which to create the new file +- */ +- void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode); +- +- /** +- * Remove a file +- * +- * If the file's inode's lookup count is non-zero, the file +- * system is expected to postpone any removal of the inode +- * until the lookup count reaches zero (see description of the +- * forget function). +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to remove +- */ +- void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); +- +- /** +- * Remove a directory +- * +- * If the directory's inode's lookup count is non-zero, the +- * file system is expected to postpone any removal of the +- * inode until the lookup count reaches zero (see description +- * of the forget function). +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to remove +- */ +- void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); +- +- /** +- * Create a symbolic link +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param link the contents of the symbolic link +- * @param parent inode number of the parent directory +- * @param name to create +- */ +- void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, +- const char *name); +- +- /** Rename a file +- * +- * If the target exists it should be atomically replaced. If +- * the target's inode's lookup count is non-zero, the file +- * system is expected to postpone any removal of the inode +- * until the lookup count reaches zero (see description of the +- * forget function). +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EINVAL, i.e. all +- * future bmap requests will fail with EINVAL without being +- * send to the filesystem process. +- * +- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If +- * RENAME_NOREPLACE is specified, the filesystem must not +- * overwrite *newname* if it exists and return an error +- * instead. If `RENAME_EXCHANGE` is specified, the filesystem +- * must atomically exchange the two files, i.e. both must +- * exist and neither may be deleted. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the old parent directory +- * @param name old name +- * @param newparent inode number of the new parent directory +- * @param newname new name +- */ +- void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, +- fuse_ino_t newparent, const char *newname, +- unsigned int flags); +- +- /** +- * Create a hard link +- * +- * Valid replies: +- * fuse_reply_entry +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the old inode number +- * @param newparent inode number of the new parent directory +- * @param newname new name to create +- */ +- void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, +- const char *newname); +- +- /** +- * Open a file +- * +- * Open flags are available in fi->flags. The following rules +- * apply. +- * +- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be +- * filtered out / handled by the kernel. +- * +- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used +- * by the filesystem to check if the operation is +- * permitted. If the ``-o default_permissions`` mount +- * option is given, this check is already done by the +- * kernel before calling open() and may thus be omitted by +- * the filesystem. +- * +- * - When writeback caching is enabled, the kernel may send +- * read requests even for files opened with O_WRONLY. The +- * filesystem should be prepared to handle this. +- * +- * - When writeback caching is disabled, the filesystem is +- * expected to properly handle the O_APPEND flag and ensure +- * that each write is appending to the end of the file. +- * +- * - When writeback caching is enabled, the kernel will +- * handle O_APPEND. However, unless all changes to the file +- * come through the kernel this will not work reliably. The +- * filesystem should thus either ignore the O_APPEND flag +- * (and let the kernel handle it), or return an error +- * (indicating that reliably O_APPEND is not available). +- * +- * Filesystem may store an arbitrary file handle (pointer, +- * index, etc) in fi->fh, and use this in other all other file +- * operations (read, write, flush, release, fsync). +- * +- * Filesystem may also implement stateless file I/O and not store +- * anything in fi->fh. +- * +- * There are also some flags (direct_io, keep_cache) which the +- * filesystem may set in fi, to change the way the file is opened. +- * See fuse_file_info structure in for more details. +- * +- * If this request is answered with an error code of ENOSYS +- * and FUSE_CAP_NO_OPEN_SUPPORT is set in +- * `fuse_conn_info.capable`, this is treated as success and +- * future calls to open and release will also succeed without being +- * sent to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_open +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*open) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Read data +- * +- * Read should send exactly the number of bytes requested except +- * on EOF or error, otherwise the rest of the data will be +- * substituted with zeroes. An exception to this is when the file +- * has been opened in 'direct_io' mode, in which case the return +- * value of the read system call will reflect the return value of +- * this operation. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_iov +- * fuse_reply_data +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size number of bytes to read +- * @param off offset to read from +- * @param fi file information +- */ +- void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Write data +- * +- * Write should return exactly the number of bytes requested +- * except on error. An exception to this is when the file has +- * been opened in 'direct_io' mode, in which case the return value +- * of the write system call will reflect the return value of this +- * operation. +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * +- * Valid replies: +- * fuse_reply_write +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param buf data to write +- * @param size number of bytes to write +- * @param off offset to write to +- * @param fi file information +- */ +- void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, +- size_t size, off_t off, struct fuse_file_info *fi); +- +- /** +- * Flush method +- * +- * This is called on each close() of the opened file. +- * +- * Since file descriptors can be duplicated (dup, dup2, fork), for +- * one open call there may be many flush calls. +- * +- * Filesystems shouldn't assume that flush will always be called +- * after some writes, or that if will be called at all. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * +- * NOTE: the name of the method is misleading, since (unlike +- * fsync) the filesystem is not forced to flush pending writes. +- * One reason to flush data is if the filesystem wants to return +- * write errors during close. However, such use is non-portable +- * because POSIX does not require [close] to wait for delayed I/O to +- * complete. +- * +- * If the filesystem supports file locking operations (setlk, +- * getlk) it should remove all locks belonging to 'fi->owner'. +- * +- * If this request is answered with an error code of ENOSYS, +- * this is treated as success and future calls to flush() will +- * succeed automatically without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * +- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html +- */ +- void (*flush) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Release an open file +- * +- * Release is called when there are no more references to an open +- * file: all file descriptors are closed and all memory mappings +- * are unmapped. +- * +- * For every open call there will be exactly one release call (unless +- * the filesystem is force-unmounted). +- * +- * The filesystem may reply with an error, but error values are +- * not returned to close() or munmap() which triggered the +- * release. +- * +- * fi->fh will contain the value set by the open method, or will +- * be undefined if the open method didn't set any value. +- * fi->flags will contain the same flags as for open. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*release) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Synchronize file contents +- * +- * If the datasync parameter is non-zero, then only the user data +- * should be flushed, not the meta data. +- * +- * If this request is answered with an error code of ENOSYS, +- * this is treated as success and future calls to fsync() will +- * succeed automatically without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param datasync flag indicating if only data should be flushed +- * @param fi file information +- */ +- void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi); +- +- /** +- * Open a directory +- * +- * Filesystem may store an arbitrary file handle (pointer, index, +- * etc) in fi->fh, and use this in other all other directory +- * stream operations (readdir, releasedir, fsyncdir). +- * +- * If this request is answered with an error code of ENOSYS and +- * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, +- * this is treated as success and future calls to opendir and +- * releasedir will also succeed without being sent to the filesystem +- * process. In addition, the kernel will cache readdir results +- * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. +- * +- * Valid replies: +- * fuse_reply_open +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*opendir) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Read directory +- * +- * Send a buffer filled using fuse_add_direntry(), with size not +- * exceeding the requested size. Send an empty buffer on end of +- * stream. +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * Returning a directory entry from readdir() does not affect +- * its lookup count. +- * +- * If off_t is non-zero, then it will correspond to one of the off_t +- * values that was previously returned by readdir() for the same +- * directory handle. In this case, readdir() should skip over entries +- * coming before the position defined by the off_t value. If entries +- * are added or removed while the directory handle is open, they filesystem +- * may still include the entries that have been removed, and may not +- * report the entries that have been created. However, addition or +- * removal of entries must never cause readdir() to skip over unrelated +- * entries or to report them more than once. This means +- * that off_t can not be a simple index that enumerates the entries +- * that have been returned but must contain sufficient information to +- * uniquely determine the next directory entry to return even when the +- * set of entries is changing. +- * +- * The function does not have to report the '.' and '..' +- * entries, but is allowed to do so. Note that, if readdir does +- * not return '.' or '..', they will not be implicitly returned, +- * and this behavior is observable by the caller. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size maximum number of bytes to send +- * @param off offset to continue reading the directory stream +- * @param fi file information +- */ +- void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Release an open directory +- * +- * For every opendir call there will be exactly one releasedir +- * call (unless the filesystem is force-unmounted). +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- */ +- void (*releasedir) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi); +- +- /** +- * Synchronize directory contents +- * +- * If the datasync parameter is non-zero, then only the directory +- * contents should be flushed, not the meta data. +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * If this request is answered with an error code of ENOSYS, +- * this is treated as success and future calls to fsyncdir() will +- * succeed automatically without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param datasync flag indicating if only data should be flushed +- * @param fi file information +- */ +- void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi); +- +- /** +- * Get file system statistics +- * +- * Valid replies: +- * fuse_reply_statfs +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number, zero means "undefined" +- */ +- void (*statfs) (fuse_req_t req, fuse_ino_t ino); +- +- /** +- * Set an extended attribute +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future setxattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- */ +- void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, +- const char *value, size_t size, int flags); +- +- /** +- * Get an extended attribute +- * +- * If size is zero, the size of the value should be sent with +- * fuse_reply_xattr. +- * +- * If the size is non-zero, and the value fits in the buffer, the +- * value should be sent with fuse_reply_buf. +- * +- * If the size is too small for the value, the ERANGE error should +- * be sent. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future getxattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_xattr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param name of the extended attribute +- * @param size maximum size of the value to send +- */ +- void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, +- size_t size); +- +- /** +- * List extended attribute names +- * +- * If size is zero, the total size of the attribute list should be +- * sent with fuse_reply_xattr. +- * +- * If the size is non-zero, and the null character separated +- * attribute list fits in the buffer, the list should be sent with +- * fuse_reply_buf. +- * +- * If the size is too small for the list, the ERANGE error should +- * be sent. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future listxattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_xattr +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size maximum size of the list to send +- */ +- void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); +- +- /** +- * Remove an extended attribute +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future removexattr() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param name of the extended attribute +- */ +- void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); +- +- /** +- * Check file access permissions +- * +- * This will be called for the access() and chdir() system +- * calls. If the 'default_permissions' mount option is given, +- * this method is not called. +- * +- * This method is not called under Linux kernel versions 2.4.x +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent success, i.e. this and all future access() +- * requests will succeed without being send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param mask requested access mode +- */ +- void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); +- +- /** +- * Create and open a file +- * +- * If the file does not exist, first create it with the specified +- * mode, and then open it. +- * +- * See the description of the open handler for more +- * information. +- * +- * If this method is not implemented or under Linux kernel +- * versions earlier than 2.6.15, the mknod() and open() methods +- * will be called instead. +- * +- * If this request is answered with an error code of ENOSYS, the handler +- * is treated as not implemented (i.e., for this and future requests the +- * mknod() and open() handlers will be called instead). +- * +- * Valid replies: +- * fuse_reply_create +- * fuse_reply_err +- * +- * @param req request handle +- * @param parent inode number of the parent directory +- * @param name to create +- * @param mode file type and mode with which to create the new file +- * @param fi file information +- */ +- void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode, struct fuse_file_info *fi); +- +- /** +- * Test for a POSIX file lock +- * +- * Valid replies: +- * fuse_reply_lock +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param lock the region/type to test +- */ +- void (*getlk) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi, struct flock *lock); +- +- /** +- * Acquire, modify or release a POSIX file lock +- * +- * For POSIX threads (NPTL) there's a 1-1 relation between pid and +- * owner, but otherwise this is not always the case. For checking +- * lock ownership, 'fi->owner' must be used. The l_pid field in +- * 'struct flock' should only be used to fill in this field in +- * getlk(). +- * +- * Note: if the locking methods are not implemented, the kernel +- * will still allow file locking to work locally. Hence these are +- * only interesting for network filesystems and similar. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param lock the region/type to set +- * @param sleep locking operation may sleep +- */ +- void (*setlk) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi, +- struct flock *lock, int sleep); +- +- /** +- * Map block index within file to block index within device +- * +- * Note: This makes sense only for block device backed filesystems +- * mounted with the 'blkdev' option +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure, i.e. all future bmap() requests will +- * fail with the same error code without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_bmap +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param blocksize unit of block index +- * @param idx block index within file +- */ +- void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, +- uint64_t idx); +- +- /** +- * Ioctl +- * +- * Note: For unrestricted ioctls (not allowed for FUSE +- * servers), data in and out areas can be discovered by giving +- * iovs and setting FUSE_IOCTL_RETRY in *flags*. For +- * restricted ioctls, kernel prepares in/out data area +- * according to the information encoded in cmd. +- * +- * Valid replies: +- * fuse_reply_ioctl_retry +- * fuse_reply_ioctl +- * fuse_reply_ioctl_iov +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param cmd ioctl command +- * @param arg ioctl argument +- * @param fi file information +- * @param flags for FUSE_IOCTL_* flags +- * @param in_buf data fetched from the caller +- * @param in_bufsz number of fetched bytes +- * @param out_bufsz maximum size of output data +- * +- * Note : the unsigned long request submitted by the application +- * is truncated to 32 bits. +- */ +- void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, +- void *arg, struct fuse_file_info *fi, unsigned flags, +- const void *in_buf, size_t in_bufsz, size_t out_bufsz); +- +- /** +- * Poll for IO readiness +- * +- * Note: If ph is non-NULL, the client should notify +- * when IO readiness events occur by calling +- * fuse_lowlevel_notify_poll() with the specified ph. +- * +- * Regardless of the number of times poll with a non-NULL ph +- * is received, single notification is enough to clear all. +- * Notifying more times incurs overhead but doesn't harm +- * correctness. +- * +- * The callee is responsible for destroying ph with +- * fuse_pollhandle_destroy() when no longer in use. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as success (with a kernel-defined default poll-mask) and +- * future calls to pull() will succeed the same way without being send +- * to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_poll +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param ph poll handle to be used for notification +- */ +- void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, +- struct fuse_pollhandle *ph); +- +- /** +- * Write data made available in a buffer +- * +- * This is a more generic version of the ->write() method. If +- * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the +- * kernel supports splicing from the fuse device, then the +- * data will be made available in pipe for supporting zero +- * copy data transfer. +- * +- * buf->count is guaranteed to be one (and thus buf->idx is +- * always zero). The write_buf handler must ensure that +- * bufv->off is correctly updated (reflecting the number of +- * bytes read from bufv->buf[0]). +- * +- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is +- * expected to reset the setuid and setgid bits. +- * +- * Valid replies: +- * fuse_reply_write +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param bufv buffer containing the data +- * @param off offset to write to +- * @param fi file information +- */ +- void (*write_buf) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_bufvec *bufv, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Callback function for the retrieve request +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() +- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() +- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() +- * @param bufv the buffer containing the returned data +- */ +- void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv); +- +- /** +- * Forget about multiple inodes +- * +- * See description of the forget function for more +- * information. +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- */ +- void (*forget_multi) (fuse_req_t req, size_t count, +- struct fuse_forget_data *forgets); +- +- /** +- * Acquire, modify or release a BSD file lock +- * +- * Note: if the locking methods are not implemented, the kernel +- * will still allow file locking to work locally. Hence these are +- * only interesting for network filesystems and similar. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param fi file information +- * @param op the locking operation, see flock(2) +- */ +- void (*flock) (fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi, int op); +- +- /** +- * Allocate requested space. If this function returns success then +- * subsequent writes to the specified range shall not fail due to the lack +- * of free space on the file system storage media. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future fallocate() requests will fail with EOPNOTSUPP without being +- * send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param offset starting point for allocated region +- * @param length size of allocated region +- * @param mode determines the operation to be performed on the given range, +- * see fallocate(2) +- */ +- void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, +- off_t offset, off_t length, struct fuse_file_info *fi); +- +- /** +- * Read directory with attributes +- * +- * Send a buffer filled using fuse_add_direntry_plus(), with size not +- * exceeding the requested size. Send an empty buffer on end of +- * stream. +- * +- * fi->fh will contain the value set by the opendir method, or +- * will be undefined if the opendir method didn't set any value. +- * +- * In contrast to readdir() (which does not affect the lookup counts), +- * the lookup count of every entry returned by readdirplus(), except "." +- * and "..", is incremented by one. +- * +- * Valid replies: +- * fuse_reply_buf +- * fuse_reply_data +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param size maximum number of bytes to send +- * @param off offset to continue reading the directory stream +- * @param fi file information +- */ +- void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, +- struct fuse_file_info *fi); +- +- /** +- * Copy a range of data from one file to another +- * +- * Performs an optimized copy between two file descriptors without the +- * additional cost of transferring data through the FUSE kernel module +- * to user space (glibc) and then back into the FUSE filesystem again. +- * +- * In case this method is not implemented, glibc falls back to reading +- * data from the source and writing to the destination. Effectively +- * doing an inefficient copy of the data. +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all +- * future copy_file_range() requests will fail with EOPNOTSUPP without +- * being send to the filesystem process. +- * +- * Valid replies: +- * fuse_reply_write +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino_in the inode number or the source file +- * @param off_in starting point from were the data should be read +- * @param fi_in file information of the source file +- * @param ino_out the inode number or the destination file +- * @param off_out starting point where the data should be written +- * @param fi_out file information of the destination file +- * @param len maximum size of the data to copy +- * @param flags passed along with the copy_file_range() syscall +- */ +- void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, +- off_t off_in, struct fuse_file_info *fi_in, +- fuse_ino_t ino_out, off_t off_out, +- struct fuse_file_info *fi_out, size_t len, +- int flags); +- +- /** +- * Find next data or hole after the specified offset +- * +- * If this request is answered with an error code of ENOSYS, this is +- * treated as a permanent failure, i.e. all future lseek() requests will +- * fail with the same error code without being send to the filesystem +- * process. +- * +- * Valid replies: +- * fuse_reply_lseek +- * fuse_reply_err +- * +- * @param req request handle +- * @param ino the inode number +- * @param off offset to start search from +- * @param whence either SEEK_DATA or SEEK_HOLE +- * @param fi file information +- */ +- void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, +- struct fuse_file_info *fi); ++ /** ++ * Initialize filesystem ++ * ++ * This function is called when libfuse establishes ++ * communication with the FUSE kernel module. The file system ++ * should use this module to inspect and/or modify the ++ * connection parameters provided in the `conn` structure. ++ * ++ * Note that some parameters may be overwritten by options ++ * passed to fuse_session_new() which take precedence over the ++ * values set in this handler. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*init)(void *userdata, struct fuse_conn_info *conn); ++ ++ /** ++ * Clean up filesystem. ++ * ++ * Called on filesystem exit. When this method is called, the ++ * connection to the kernel may be gone already, so that eg. calls ++ * to fuse_lowlevel_notify_* will fail. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*destroy)(void *userdata); ++ ++ /** ++ * Look up a directory entry by name and get its attributes. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name the name to look up ++ */ ++ void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Forget about an inode ++ * ++ * This function is called when the kernel removes an inode ++ * from its internal caches. ++ * ++ * The inode's lookup count increases by one for every call to ++ * fuse_reply_entry and fuse_reply_create. The nlookup parameter ++ * indicates by how much the lookup count should be decreased. ++ * ++ * Inodes with a non-zero lookup count may receive request from ++ * the kernel even after calls to unlink, rmdir or (when ++ * overwriting an existing file) rename. Filesystems must handle ++ * such requests properly and it is recommended to defer removal ++ * of the inode until the lookup count reaches zero. Calls to ++ * unlink, rmdir or rename will be followed closely by forget ++ * unless the file or directory is open, in which case the ++ * kernel issues forget only after the release or releasedir ++ * calls. ++ * ++ * Note that if a file system will be exported over NFS the ++ * inodes lifetime must extend even beyond forget. See the ++ * generation field in struct fuse_entry_param above. ++ * ++ * On unmount the lookup count for all inodes implicitly drops ++ * to zero. It is not guaranteed that the file system will ++ * receive corresponding forget messages for the affected ++ * inodes. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param nlookup the number of lookups to forget ++ */ ++ void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); ++ ++ /** ++ * Get file attributes. ++ * ++ * If writeback caching is enabled, the kernel may have a ++ * better idea of a file's length than the FUSE file system ++ * (eg if there has been a write that extended the file size, ++ * but that has not yet been passed to the filesystem.n ++ * ++ * In this case, the st_size value provided by the file system ++ * will be ignored. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi for future use, currently always NULL ++ */ ++ void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Set file attributes ++ * ++ * In the 'attr' argument only members indicated by the 'to_set' ++ * bitmask contain valid values. Other members contain undefined ++ * values. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits if the file ++ * size or owner is being changed. ++ * ++ * If the setattr was invoked from the ftruncate() system call ++ * under Linux kernel versions 2.6.15 or later, the fi->fh will ++ * contain the value set by the open method or will be undefined ++ * if the open method didn't set any value. Otherwise (not ++ * ftruncate call, or kernel version earlier than 2.6.15) the fi ++ * parameter will be NULL. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param attr the attributes ++ * @param to_set bit mask of attributes which should be set ++ * @param fi file information, or NULL ++ */ ++ void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr, ++ int to_set, struct fuse_file_info *fi); ++ ++ /** ++ * Read symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_readlink ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ */ ++ void (*readlink)(fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Create file node ++ * ++ * Create a regular file, character device, block device, fifo or ++ * socket node. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param rdev the device number (only valid if created file is a device) ++ */ ++ void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, dev_t rdev); ++ ++ /** ++ * Create a directory ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode with which to create the new file ++ */ ++ void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode); ++ ++ /** ++ * Remove a file ++ * ++ * If the file's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Remove a directory ++ * ++ * If the directory's inode's lookup count is non-zero, the ++ * file system is expected to postpone any removal of the ++ * inode until the lookup count reaches zero (see description ++ * of the forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Create a symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param link the contents of the symbolic link ++ * @param parent inode number of the parent directory ++ * @param name to create ++ */ ++ void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent, ++ const char *name); ++ ++ /** ++ * Rename a file ++ * ++ * If the target exists it should be atomically replaced. If ++ * the target's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EINVAL, i.e. all ++ * future bmap requests will fail with EINVAL without being ++ * send to the filesystem process. ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the old parent directory ++ * @param name old name ++ * @param newparent inode number of the new parent directory ++ * @param newname new name ++ */ ++ void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags); ++ ++ /** ++ * Create a hard link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the old inode number ++ * @param newparent inode number of the new parent directory ++ * @param newname new name to create ++ */ ++ void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, ++ const char *newname); ++ ++ /** ++ * Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used ++ * by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount ++ * option is given, this check is already done by the ++ * kernel before calling open() and may thus be omitted by ++ * the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open and release will also succeed without being ++ * sent to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Read data ++ * ++ * Read should send exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the file ++ * has been opened in 'direct_io' mode, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_iov ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size number of bytes to read ++ * @param off offset to read from ++ * @param fi file information ++ */ ++ void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Write data ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the file has ++ * been opened in 'direct_io' mode, in which case the return value ++ * of the write system call will reflect the return value of this ++ * operation. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param buf data to write ++ * @param size number of bytes to write ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size, ++ off_t off, struct fuse_file_info *fi); ++ ++ /** ++ * Flush method ++ * ++ * This is called on each close() of the opened file. ++ * ++ * Since file descriptors can be duplicated (dup, dup2, fork), for ++ * one open call there may be many flush calls. ++ * ++ * Filesystems shouldn't assume that flush will always be called ++ * after some writes, or that if will be called at all. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * NOTE: the name of the method is misleading, since (unlike ++ * fsync) the filesystem is not forced to flush pending writes. ++ * One reason to flush data is if the filesystem wants to return ++ * write errors during close. However, such use is non-portable ++ * because POSIX does not require [close] to wait for delayed I/O to ++ * complete. ++ * ++ * If the filesystem supports file locking operations (setlk, ++ * getlk) it should remove all locks belonging to 'fi->owner'. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to flush() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * ++ * [close]: ++ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open call there will be exactly one release call (unless ++ * the filesystem is force-unmounted). ++ * ++ * The filesystem may reply with an error, but error values are ++ * not returned to close() or munmap() which triggered the ++ * release. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * fi->flags will contain the same flags as for open. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsync() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Open a directory ++ * ++ * Filesystem may store an arbitrary file handle (pointer, index, ++ * etc) in fi->fh, and use this in other all other directory ++ * stream operations (readdir, releasedir, fsyncdir). ++ * ++ * If this request is answered with an error code of ENOSYS and ++ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, ++ * this is treated as success and future calls to opendir and ++ * releasedir will also succeed without being sent to the filesystem ++ * process. In addition, the kernel will cache readdir results ++ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); ++ ++ /** ++ * Read directory ++ * ++ * Send a buffer filled using fuse_add_direntry(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Returning a directory entry from readdir() does not affect ++ * its lookup count. ++ * ++ * If off_t is non-zero, then it will correspond to one of the off_t ++ * values that was previously returned by readdir() for the same ++ * directory handle. In this case, readdir() should skip over entries ++ * coming before the position defined by the off_t value. If entries ++ * are added or removed while the directory handle is open, they filesystem ++ * may still include the entries that have been removed, and may not ++ * report the entries that have been created. However, addition or ++ * removal of entries must never cause readdir() to skip over unrelated ++ * entries or to report them more than once. This means ++ * that off_t can not be a simple index that enumerates the entries ++ * that have been returned but must contain sufficient information to ++ * uniquely determine the next directory entry to return even when the ++ * set of entries is changing. ++ * ++ * The function does not have to report the '.' and '..' ++ * entries, but is allowed to do so. Note that, if readdir does ++ * not return '.' or '..', they will not be implicitly returned, ++ * and this behavior is observable by the caller. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Release an open directory ++ * ++ * For every opendir call there will be exactly one releasedir ++ * call (unless the filesystem is force-unmounted). ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*releasedir)(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the directory ++ * contents should be flushed, not the meta data. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsyncdir() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Get file system statistics ++ * ++ * Valid replies: ++ * fuse_reply_statfs ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number, zero means "undefined" ++ */ ++ void (*statfs)(fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Set an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future setxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ */ ++ void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, ++ const char *value, size_t size, int flags); ++ ++ /** ++ * Get an extended attribute ++ * ++ * If size is zero, the size of the value should be sent with ++ * fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the value fits in the buffer, the ++ * value should be sent with fuse_reply_buf. ++ * ++ * If the size is too small for the value, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future getxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ * @param size maximum size of the value to send ++ */ ++ void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, ++ size_t size); ++ ++ /** ++ * List extended attribute names ++ * ++ * If size is zero, the total size of the attribute list should be ++ * sent with fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the null character separated ++ * attribute list fits in the buffer, the list should be sent with ++ * fuse_reply_buf. ++ * ++ * If the size is too small for the list, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future listxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum size of the list to send ++ */ ++ void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size); ++ ++ /** ++ * Remove an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future removexattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ */ ++ void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() and chdir() system ++ * calls. If the 'default_permissions' mount option is given, ++ * this method is not called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent success, i.e. this and all future access() ++ * requests will succeed without being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param mask requested access mode ++ */ ++ void (*access)(fuse_req_t req, fuse_ino_t ino, int mask); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * See the description of the open handler for more ++ * information. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ * ++ * If this request is answered with an error code of ENOSYS, the handler ++ * is treated as not implemented (i.e., for this and future requests the ++ * mknod() and open() handlers will be called instead). ++ * ++ * Valid replies: ++ * fuse_reply_create ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param fi file information ++ */ ++ void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, struct fuse_file_info *fi); ++ ++ /** ++ * Test for a POSIX file lock ++ * ++ * Valid replies: ++ * fuse_reply_lock ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to test ++ */ ++ void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock); ++ ++ /** ++ * Acquire, modify or release a POSIX file lock ++ * ++ * For POSIX threads (NPTL) there's a 1-1 relation between pid and ++ * owner, but otherwise this is not always the case. For checking ++ * lock ownership, 'fi->owner' must be used. The l_pid field in ++ * 'struct flock' should only be used to fill in this field in ++ * getlk(). ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to set ++ * @param sleep locking operation may sleep ++ */ ++ void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock, int sleep); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future bmap() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_bmap ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param blocksize unit of block index ++ * @param idx block index within file ++ */ ++ void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize, ++ uint64_t idx); ++ ++ /** ++ * Ioctl ++ * ++ * Note: For unrestricted ioctls (not allowed for FUSE ++ * servers), data in and out areas can be discovered by giving ++ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For ++ * restricted ioctls, kernel prepares in/out data area ++ * according to the information encoded in cmd. ++ * ++ * Valid replies: ++ * fuse_reply_ioctl_retry ++ * fuse_reply_ioctl ++ * fuse_reply_ioctl_iov ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param cmd ioctl command ++ * @param arg ioctl argument ++ * @param fi file information ++ * @param flags for FUSE_IOCTL_* flags ++ * @param in_buf data fetched from the caller ++ * @param in_bufsz number of fetched bytes ++ * @param out_bufsz maximum size of output data ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg, ++ struct fuse_file_info *fi, unsigned flags, const void *in_buf, ++ size_t in_bufsz, size_t out_bufsz); ++ ++ /** ++ * Poll for IO readiness ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_lowlevel_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as success (with a kernel-defined default poll-mask) and ++ * future calls to pull() will succeed the same way without being send ++ * to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_poll ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param ph poll handle to be used for notification ++ */ ++ void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct fuse_pollhandle *ph); ++ ++ /** ++ * Write data made available in a buffer ++ * ++ * This is a more generic version of the ->write() method. If ++ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the ++ * kernel supports splicing from the fuse device, then the ++ * data will be made available in pipe for supporting zero ++ * copy data transfer. ++ * ++ * buf->count is guaranteed to be one (and thus buf->idx is ++ * always zero). The write_buf handler must ensure that ++ * bufv->off is correctly updated (reflecting the number of ++ * bytes read from bufv->buf[0]). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param bufv buffer containing the data ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv, ++ off_t off, struct fuse_file_info *fi); ++ ++ /** ++ * Callback function for the retrieve request ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() ++ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() ++ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() ++ * @param bufv the buffer containing the returned data ++ */ ++ void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv); ++ ++ /** ++ * Forget about multiple inodes ++ * ++ * See description of the forget function for more ++ * information. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ */ ++ void (*forget_multi)(fuse_req_t req, size_t count, ++ struct fuse_forget_data *forgets); ++ ++ /** ++ * Acquire, modify or release a BSD file lock ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param op the locking operation, see flock(2) ++ */ ++ void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ int op); ++ ++ /** ++ * Allocate requested space. If this function returns success then ++ * subsequent writes to the specified range shall not fail due to the lack ++ * of free space on the file system storage media. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future fallocate() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param offset starting point for allocated region ++ * @param length size of allocated region ++ * @param mode determines the operation to be performed on the given range, ++ * see fallocate(2) ++ */ ++ void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, ++ off_t length, struct fuse_file_info *fi); ++ ++ /** ++ * Read directory with attributes ++ * ++ * Send a buffer filled using fuse_add_direntry_plus(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * In contrast to readdir() (which does not affect the lookup counts), ++ * the lookup count of every entry returned by readdirplus(), except "." ++ * and "..", is incremented by one. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future copy_file_range() requests will fail with EOPNOTSUPP without ++ * being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino_in the inode number or the source file ++ * @param off_in starting point from were the data should be read ++ * @param fi_in file information of the source file ++ * @param ino_out the inode number or the destination file ++ * @param off_out starting point where the data should be written ++ * @param fi_out file information of the destination file ++ * @param len maximum size of the data to copy ++ * @param flags passed along with the copy_file_range() syscall ++ */ ++ void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, ++ struct fuse_file_info *fi_in, fuse_ino_t ino_out, ++ off_t off_out, struct fuse_file_info *fi_out, ++ size_t len, int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future lseek() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_lseek ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param off offset to start search from ++ * @param whence either SEEK_DATA or SEEK_HOLE ++ * @param fi file information ++ */ ++ void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, ++ struct fuse_file_info *fi); + }; + + /** +@@ -1305,7 +1307,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, +- const struct fuse_file_info *fi); ++ const struct fuse_file_info *fi); + + /** + * Reply with attributes +@@ -1315,11 +1317,11 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, + * + * @param req request handle + * @param attr the attributes +- * @param attr_timeout validity timeout (in seconds) for the attributes ++ * @param attr_timeout validity timeout (in seconds) for the attributes + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_attr(fuse_req_t req, const struct stat *attr, +- double attr_timeout); ++ double attr_timeout); + + /** + * Reply with the contents of a symbolic link +@@ -1417,7 +1419,7 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + * @return zero for success, -errno for failure to send reply + */ + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++ enum fuse_buf_copy_flags flags); + + /** + * Reply with data vector +@@ -1480,9 +1482,9 @@ int fuse_reply_lock(fuse_req_t req, const struct flock *lock); + */ + int fuse_reply_bmap(fuse_req_t req, uint64_t idx); + +-/* ----------------------------------------------------------- * +- * Filling a buffer in readdir * +- * ----------------------------------------------------------- */ ++/* ++ * Filling a buffer in readdir ++ */ + + /** + * Add a directory entry to the buffer +@@ -1512,8 +1514,7 @@ int fuse_reply_bmap(fuse_req_t req, uint64_t idx); + * @return the space needed for the entry + */ + size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, const struct stat *stbuf, +- off_t off); ++ const char *name, const struct stat *stbuf, off_t off); + + /** + * Add a directory entry to the buffer with the attributes +@@ -1529,8 +1530,8 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, + * @return the space needed for the entry + */ + size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, +- const char *name, +- const struct fuse_entry_param *e, off_t off); ++ const char *name, ++ const struct fuse_entry_param *e, off_t off); + + /** + * Reply to ask for data fetch and output buffer preparation. ioctl +@@ -1547,9 +1548,9 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, + * @param out_count number of entries in out_iov + * @return zero for success, -errno for failure to send reply + */ +-int fuse_reply_ioctl_retry(fuse_req_t req, +- const struct iovec *in_iov, size_t in_count, +- const struct iovec *out_iov, size_t out_count); ++int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, ++ size_t in_count, const struct iovec *out_iov, ++ size_t out_count); + + /** + * Reply to finish ioctl +@@ -1576,7 +1577,7 @@ int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); + * @param count the size of vector + */ + int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, +- int count); ++ int count); + + /** + * Reply with poll result event mask +@@ -1598,9 +1599,9 @@ int fuse_reply_poll(fuse_req_t req, unsigned revents); + */ + int fuse_reply_lseek(fuse_req_t req, off_t off); + +-/* ----------------------------------------------------------- * +- * Notification * +- * ----------------------------------------------------------- */ ++/* ++ * Notification ++ */ + + /** + * Notify IO readiness event +@@ -1635,7 +1636,7 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, +- off_t off, off_t len); ++ off_t off, off_t len); + + /** + * Notify to invalidate parent attributes and the dentry matching +@@ -1663,7 +1664,7 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, +- const char *name, size_t namelen); ++ const char *name, size_t namelen); + + /** + * This function behaves like fuse_lowlevel_notify_inval_entry() with +@@ -1693,9 +1694,9 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + * @param namelen strlen() of file name + * @return zero for success, -errno for failure + */ +-int fuse_lowlevel_notify_delete(struct fuse_session *se, +- fuse_ino_t parent, fuse_ino_t child, +- const char *name, size_t namelen); ++int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, ++ fuse_ino_t child, const char *name, ++ size_t namelen); + + /** + * Store data to the kernel buffers +@@ -1723,8 +1724,8 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags); + /** + * Retrieve data from the kernel buffers + * +@@ -1755,12 +1756,12 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie); ++ size_t size, off_t offset, void *cookie); + + +-/* ----------------------------------------------------------- * +- * Utility functions * +- * ----------------------------------------------------------- */ ++/* ++ * Utility functions ++ */ + + /** + * Get the userdata from the request +@@ -1822,7 +1823,7 @@ typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); + * @param data user data passed to the callback function + */ + void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, +- void *data); ++ void *data); + + /** + * Check if a request has already been interrupted +@@ -1833,9 +1834,9 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, + int fuse_req_interrupted(fuse_req_t req); + + +-/* ----------------------------------------------------------- * +- * Inquiry functions * +- * ----------------------------------------------------------- */ ++/* ++ * Inquiry functions ++ */ + + /** + * Print low-level version information to stdout. +@@ -1854,18 +1855,18 @@ void fuse_lowlevel_help(void); + */ + void fuse_cmdline_help(void); + +-/* ----------------------------------------------------------- * +- * Filesystem setup & teardown * +- * ----------------------------------------------------------- */ ++/* ++ * Filesystem setup & teardown ++ */ + + struct fuse_cmdline_opts { +- int foreground; +- int debug; +- int nodefault_subtype; +- char *mountpoint; +- int show_version; +- int show_help; +- unsigned int max_idle_threads; ++ int foreground; ++ int debug; ++ int nodefault_subtype; ++ char *mountpoint; ++ int show_version; ++ int show_help; ++ unsigned int max_idle_threads; + }; + + /** +@@ -1886,8 +1887,7 @@ struct fuse_cmdline_opts { + * @param opts output argument for parsed options + * @return 0 on success, -1 on failure + */ +-int fuse_parse_cmdline(struct fuse_args *args, +- struct fuse_cmdline_opts *opts); ++int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts); + + /** + * Create a low level session. +@@ -1918,8 +1918,8 @@ int fuse_parse_cmdline(struct fuse_args *args, + * @return the fuse session on success, NULL on failure + **/ + struct fuse_session *fuse_session_new(struct fuse_args *args, +- const struct fuse_lowlevel_ops *op, +- size_t op_size, void *userdata); ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata); + + /** + * Mount a FUSE file system. +@@ -2014,9 +2014,9 @@ void fuse_session_unmount(struct fuse_session *se); + */ + void fuse_session_destroy(struct fuse_session *se); + +-/* ----------------------------------------------------------- * +- * Custom event loop support * +- * ----------------------------------------------------------- */ ++/* ++ * Custom event loop support ++ */ + + /** + * Return file descriptor for communication with kernel. +@@ -2043,7 +2043,7 @@ int fuse_session_fd(struct fuse_session *se); + * @param buf the fuse_buf containing the request + */ + void fuse_session_process_buf(struct fuse_session *se, +- const struct fuse_buf *buf); ++ const struct fuse_buf *buf); + + /** + * Read a raw request from the kernel into the supplied buffer. +diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h +index 2f6663e..f252baa 100644 +--- a/tools/virtiofsd/fuse_misc.h ++++ b/tools/virtiofsd/fuse_misc.h +@@ -1,18 +1,18 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include + + /* +- Versioned symbols cannot be used in some cases because it +- - confuse the dynamic linker in uClibc +- - not supported on MacOSX (in MachO binary format) +-*/ ++ * Versioned symbols cannot be used in some cases because it ++ * - confuse the dynamic linker in uClibc ++ * - not supported on MacOSX (in MachO binary format) ++ */ + #if (!defined(__UCLIBC__) && !defined(__APPLE__)) + #define FUSE_SYMVER(x) __asm__(x) + #else +@@ -25,11 +25,11 @@ + /* Is this hack still needed? */ + static inline void fuse_mutex_init(pthread_mutex_t *mut) + { +- pthread_mutexattr_t attr; +- pthread_mutexattr_init(&attr); +- pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); +- pthread_mutex_init(mut, &attr); +- pthread_mutexattr_destroy(&attr); ++ pthread_mutexattr_t attr; ++ pthread_mutexattr_init(&attr); ++ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); ++ pthread_mutex_init(mut, &attr); ++ pthread_mutexattr_destroy(&attr); + } + #endif + +diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c +index 93066b9..edd36f4 100644 +--- a/tools/virtiofsd/fuse_opt.c ++++ b/tools/virtiofsd/fuse_opt.c +@@ -1,423 +1,450 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- Implementation of option parsing routines (dealing with `struct +- fuse_args`). +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Implementation of option parsing routines (dealing with `struct ++ * fuse_args`). ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + ++#include "fuse_opt.h" + #include "config.h" + #include "fuse_i.h" +-#include "fuse_opt.h" + #include "fuse_misc.h" + ++#include + #include + #include + #include +-#include + + struct fuse_opt_context { +- void *data; +- const struct fuse_opt *opt; +- fuse_opt_proc_t proc; +- int argctr; +- int argc; +- char **argv; +- struct fuse_args outargs; +- char *opts; +- int nonopt; ++ void *data; ++ const struct fuse_opt *opt; ++ fuse_opt_proc_t proc; ++ int argctr; ++ int argc; ++ char **argv; ++ struct fuse_args outargs; ++ char *opts; ++ int nonopt; + }; + + void fuse_opt_free_args(struct fuse_args *args) + { +- if (args) { +- if (args->argv && args->allocated) { +- int i; +- for (i = 0; i < args->argc; i++) +- free(args->argv[i]); +- free(args->argv); +- } +- args->argc = 0; +- args->argv = NULL; +- args->allocated = 0; +- } ++ if (args) { ++ if (args->argv && args->allocated) { ++ int i; ++ for (i = 0; i < args->argc; i++) { ++ free(args->argv[i]); ++ } ++ free(args->argv); ++ } ++ args->argc = 0; ++ args->argv = NULL; ++ args->allocated = 0; ++ } + } + + static int alloc_failed(void) + { +- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); +- return -1; ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; + } + + int fuse_opt_add_arg(struct fuse_args *args, const char *arg) + { +- char **newargv; +- char *newarg; +- +- assert(!args->argv || args->allocated); +- +- newarg = strdup(arg); +- if (!newarg) +- return alloc_failed(); +- +- newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); +- if (!newargv) { +- free(newarg); +- return alloc_failed(); +- } +- +- args->argv = newargv; +- args->allocated = 1; +- args->argv[args->argc++] = newarg; +- args->argv[args->argc] = NULL; +- return 0; ++ char **newargv; ++ char *newarg; ++ ++ assert(!args->argv || args->allocated); ++ ++ newarg = strdup(arg); ++ if (!newarg) { ++ return alloc_failed(); ++ } ++ ++ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); ++ if (!newargv) { ++ free(newarg); ++ return alloc_failed(); ++ } ++ ++ args->argv = newargv; ++ args->allocated = 1; ++ args->argv[args->argc++] = newarg; ++ args->argv[args->argc] = NULL; ++ return 0; + } + + static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, +- const char *arg) ++ const char *arg) + { +- assert(pos <= args->argc); +- if (fuse_opt_add_arg(args, arg) == -1) +- return -1; +- +- if (pos != args->argc - 1) { +- char *newarg = args->argv[args->argc - 1]; +- memmove(&args->argv[pos + 1], &args->argv[pos], +- sizeof(char *) * (args->argc - pos - 1)); +- args->argv[pos] = newarg; +- } +- return 0; ++ assert(pos <= args->argc); ++ if (fuse_opt_add_arg(args, arg) == -1) { ++ return -1; ++ } ++ ++ if (pos != args->argc - 1) { ++ char *newarg = args->argv[args->argc - 1]; ++ memmove(&args->argv[pos + 1], &args->argv[pos], ++ sizeof(char *) * (args->argc - pos - 1)); ++ args->argv[pos] = newarg; ++ } ++ return 0; + } + + int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) + { +- return fuse_opt_insert_arg_common(args, pos, arg); ++ return fuse_opt_insert_arg_common(args, pos, arg); + } + + static int next_arg(struct fuse_opt_context *ctx, const char *opt) + { +- if (ctx->argctr + 1 >= ctx->argc) { +- fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); +- return -1; +- } +- ctx->argctr++; +- return 0; ++ if (ctx->argctr + 1 >= ctx->argc) { ++ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); ++ return -1; ++ } ++ ctx->argctr++; ++ return 0; + } + + static int add_arg(struct fuse_opt_context *ctx, const char *arg) + { +- return fuse_opt_add_arg(&ctx->outargs, arg); ++ return fuse_opt_add_arg(&ctx->outargs, arg); + } + + static int add_opt_common(char **opts, const char *opt, int esc) + { +- unsigned oldlen = *opts ? strlen(*opts) : 0; +- char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); +- +- if (!d) +- return alloc_failed(); +- +- *opts = d; +- if (oldlen) { +- d += oldlen; +- *d++ = ','; +- } +- +- for (; *opt; opt++) { +- if (esc && (*opt == ',' || *opt == '\\')) +- *d++ = '\\'; +- *d++ = *opt; +- } +- *d = '\0'; +- +- return 0; ++ unsigned oldlen = *opts ? strlen(*opts) : 0; ++ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); ++ ++ if (!d) { ++ return alloc_failed(); ++ } ++ ++ *opts = d; ++ if (oldlen) { ++ d += oldlen; ++ *d++ = ','; ++ } ++ ++ for (; *opt; opt++) { ++ if (esc && (*opt == ',' || *opt == '\\')) { ++ *d++ = '\\'; ++ } ++ *d++ = *opt; ++ } ++ *d = '\0'; ++ ++ return 0; + } + + int fuse_opt_add_opt(char **opts, const char *opt) + { +- return add_opt_common(opts, opt, 0); ++ return add_opt_common(opts, opt, 0); + } + + int fuse_opt_add_opt_escaped(char **opts, const char *opt) + { +- return add_opt_common(opts, opt, 1); ++ return add_opt_common(opts, opt, 1); + } + + static int add_opt(struct fuse_opt_context *ctx, const char *opt) + { +- return add_opt_common(&ctx->opts, opt, 1); ++ return add_opt_common(&ctx->opts, opt, 1); + } + + static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, +- int iso) ++ int iso) + { +- if (key == FUSE_OPT_KEY_DISCARD) +- return 0; +- +- if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { +- int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); +- if (res == -1 || !res) +- return res; +- } +- if (iso) +- return add_opt(ctx, arg); +- else +- return add_arg(ctx, arg); ++ if (key == FUSE_OPT_KEY_DISCARD) { ++ return 0; ++ } ++ ++ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { ++ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); ++ if (res == -1 || !res) { ++ return res; ++ } ++ } ++ if (iso) { ++ return add_opt(ctx, arg); ++ } else { ++ return add_arg(ctx, arg); ++ } + } + + static int match_template(const char *t, const char *arg, unsigned *sepp) + { +- int arglen = strlen(arg); +- const char *sep = strchr(t, '='); +- sep = sep ? sep : strchr(t, ' '); +- if (sep && (!sep[1] || sep[1] == '%')) { +- int tlen = sep - t; +- if (sep[0] == '=') +- tlen ++; +- if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { +- *sepp = sep - t; +- return 1; +- } +- } +- if (strcmp(t, arg) == 0) { +- *sepp = 0; +- return 1; +- } +- return 0; ++ int arglen = strlen(arg); ++ const char *sep = strchr(t, '='); ++ sep = sep ? sep : strchr(t, ' '); ++ if (sep && (!sep[1] || sep[1] == '%')) { ++ int tlen = sep - t; ++ if (sep[0] == '=') { ++ tlen++; ++ } ++ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { ++ *sepp = sep - t; ++ return 1; ++ } ++ } ++ if (strcmp(t, arg) == 0) { ++ *sepp = 0; ++ return 1; ++ } ++ return 0; + } + + static const struct fuse_opt *find_opt(const struct fuse_opt *opt, +- const char *arg, unsigned *sepp) ++ const char *arg, unsigned *sepp) + { +- for (; opt && opt->templ; opt++) +- if (match_template(opt->templ, arg, sepp)) +- return opt; +- return NULL; ++ for (; opt && opt->templ; opt++) { ++ if (match_template(opt->templ, arg, sepp)) { ++ return opt; ++ } ++ } ++ return NULL; + } + + int fuse_opt_match(const struct fuse_opt *opts, const char *opt) + { +- unsigned dummy; +- return find_opt(opts, opt, &dummy) ? 1 : 0; ++ unsigned dummy; ++ return find_opt(opts, opt, &dummy) ? 1 : 0; + } + + static int process_opt_param(void *var, const char *format, const char *param, +- const char *arg) ++ const char *arg) + { +- assert(format[0] == '%'); +- if (format[1] == 's') { +- char **s = var; +- char *copy = strdup(param); +- if (!copy) +- return alloc_failed(); +- +- free(*s); +- *s = copy; +- } else { +- if (sscanf(param, format, var) != 1) { +- fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); +- return -1; +- } +- } +- return 0; ++ assert(format[0] == '%'); ++ if (format[1] == 's') { ++ char **s = var; ++ char *copy = strdup(param); ++ if (!copy) { ++ return alloc_failed(); ++ } ++ ++ free(*s); ++ *s = copy; ++ } else { ++ if (sscanf(param, format, var) != 1) { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", ++ arg); ++ return -1; ++ } ++ } ++ return 0; + } + +-static int process_opt(struct fuse_opt_context *ctx, +- const struct fuse_opt *opt, unsigned sep, +- const char *arg, int iso) ++static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt, ++ unsigned sep, const char *arg, int iso) + { +- if (opt->offset == -1U) { +- if (call_proc(ctx, arg, opt->value, iso) == -1) +- return -1; +- } else { +- void *var = (char *)ctx->data + opt->offset; +- if (sep && opt->templ[sep + 1]) { +- const char *param = arg + sep; +- if (opt->templ[sep] == '=') +- param ++; +- if (process_opt_param(var, opt->templ + sep + 1, +- param, arg) == -1) +- return -1; +- } else +- *(int *)var = opt->value; +- } +- return 0; ++ if (opt->offset == -1U) { ++ if (call_proc(ctx, arg, opt->value, iso) == -1) { ++ return -1; ++ } ++ } else { ++ void *var = (char *)ctx->data + opt->offset; ++ if (sep && opt->templ[sep + 1]) { ++ const char *param = arg + sep; ++ if (opt->templ[sep] == '=') { ++ param++; ++ } ++ if (process_opt_param(var, opt->templ + sep + 1, param, arg) == ++ -1) { ++ return -1; ++ } ++ } else { ++ *(int *)var = opt->value; ++ } ++ } ++ return 0; + } + + static int process_opt_sep_arg(struct fuse_opt_context *ctx, +- const struct fuse_opt *opt, unsigned sep, +- const char *arg, int iso) ++ const struct fuse_opt *opt, unsigned sep, ++ const char *arg, int iso) + { +- int res; +- char *newarg; +- char *param; +- +- if (next_arg(ctx, arg) == -1) +- return -1; +- +- param = ctx->argv[ctx->argctr]; +- newarg = malloc(sep + strlen(param) + 1); +- if (!newarg) +- return alloc_failed(); +- +- memcpy(newarg, arg, sep); +- strcpy(newarg + sep, param); +- res = process_opt(ctx, opt, sep, newarg, iso); +- free(newarg); +- +- return res; ++ int res; ++ char *newarg; ++ char *param; ++ ++ if (next_arg(ctx, arg) == -1) { ++ return -1; ++ } ++ ++ param = ctx->argv[ctx->argctr]; ++ newarg = malloc(sep + strlen(param) + 1); ++ if (!newarg) { ++ return alloc_failed(); ++ } ++ ++ memcpy(newarg, arg, sep); ++ strcpy(newarg + sep, param); ++ res = process_opt(ctx, opt, sep, newarg, iso); ++ free(newarg); ++ ++ return res; + } + + static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) + { +- unsigned sep; +- const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); +- if (opt) { +- for (; opt; opt = find_opt(opt + 1, arg, &sep)) { +- int res; +- if (sep && opt->templ[sep] == ' ' && !arg[sep]) +- res = process_opt_sep_arg(ctx, opt, sep, arg, +- iso); +- else +- res = process_opt(ctx, opt, sep, arg, iso); +- if (res == -1) +- return -1; +- } +- return 0; +- } else +- return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); ++ unsigned sep; ++ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); ++ if (opt) { ++ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { ++ int res; ++ if (sep && opt->templ[sep] == ' ' && !arg[sep]) { ++ res = process_opt_sep_arg(ctx, opt, sep, arg, iso); ++ } else { ++ res = process_opt(ctx, opt, sep, arg, iso); ++ } ++ if (res == -1) { ++ return -1; ++ } ++ } ++ return 0; ++ } else { ++ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); ++ } + } + + static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) + { +- char *s = opts; +- char *d = s; +- int end = 0; +- +- while (!end) { +- if (*s == '\0') +- end = 1; +- if (*s == ',' || end) { +- int res; +- +- *d = '\0'; +- res = process_gopt(ctx, opts, 1); +- if (res == -1) +- return -1; +- d = opts; +- } else { +- if (s[0] == '\\' && s[1] != '\0') { +- s++; +- if (s[0] >= '0' && s[0] <= '3' && +- s[1] >= '0' && s[1] <= '7' && +- s[2] >= '0' && s[2] <= '7') { +- *d++ = (s[0] - '0') * 0100 + +- (s[1] - '0') * 0010 + +- (s[2] - '0'); +- s += 2; +- } else { +- *d++ = *s; +- } +- } else { +- *d++ = *s; +- } +- } +- s++; +- } +- +- return 0; ++ char *s = opts; ++ char *d = s; ++ int end = 0; ++ ++ while (!end) { ++ if (*s == '\0') { ++ end = 1; ++ } ++ if (*s == ',' || end) { ++ int res; ++ ++ *d = '\0'; ++ res = process_gopt(ctx, opts, 1); ++ if (res == -1) { ++ return -1; ++ } ++ d = opts; ++ } else { ++ if (s[0] == '\\' && s[1] != '\0') { ++ s++; ++ if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' && ++ s[2] >= '0' && s[2] <= '7') { ++ *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 + ++ (s[2] - '0'); ++ s += 2; ++ } else { ++ *d++ = *s; ++ } ++ } else { ++ *d++ = *s; ++ } ++ } ++ s++; ++ } ++ ++ return 0; + } + + static int process_option_group(struct fuse_opt_context *ctx, const char *opts) + { +- int res; +- char *copy = strdup(opts); +- +- if (!copy) { +- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); +- return -1; +- } +- res = process_real_option_group(ctx, copy); +- free(copy); +- return res; ++ int res; ++ char *copy = strdup(opts); ++ ++ if (!copy) { ++ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); ++ return -1; ++ } ++ res = process_real_option_group(ctx, copy); ++ free(copy); ++ return res; + } + + static int process_one(struct fuse_opt_context *ctx, const char *arg) + { +- if (ctx->nonopt || arg[0] != '-') +- return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); +- else if (arg[1] == 'o') { +- if (arg[2]) +- return process_option_group(ctx, arg + 2); +- else { +- if (next_arg(ctx, arg) == -1) +- return -1; +- +- return process_option_group(ctx, +- ctx->argv[ctx->argctr]); +- } +- } else if (arg[1] == '-' && !arg[2]) { +- if (add_arg(ctx, arg) == -1) +- return -1; +- ctx->nonopt = ctx->outargs.argc; +- return 0; +- } else +- return process_gopt(ctx, arg, 0); ++ if (ctx->nonopt || arg[0] != '-') { ++ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); ++ } else if (arg[1] == 'o') { ++ if (arg[2]) { ++ return process_option_group(ctx, arg + 2); ++ } else { ++ if (next_arg(ctx, arg) == -1) { ++ return -1; ++ } ++ ++ return process_option_group(ctx, ctx->argv[ctx->argctr]); ++ } ++ } else if (arg[1] == '-' && !arg[2]) { ++ if (add_arg(ctx, arg) == -1) { ++ return -1; ++ } ++ ctx->nonopt = ctx->outargs.argc; ++ return 0; ++ } else { ++ return process_gopt(ctx, arg, 0); ++ } + } + + static int opt_parse(struct fuse_opt_context *ctx) + { +- if (ctx->argc) { +- if (add_arg(ctx, ctx->argv[0]) == -1) +- return -1; +- } +- +- for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) +- if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) +- return -1; +- +- if (ctx->opts) { +- if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || +- fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) +- return -1; +- } +- +- /* If option separator ("--") is the last argument, remove it */ +- if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && +- strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { +- free(ctx->outargs.argv[ctx->outargs.argc - 1]); +- ctx->outargs.argv[--ctx->outargs.argc] = NULL; +- } +- +- return 0; ++ if (ctx->argc) { ++ if (add_arg(ctx, ctx->argv[0]) == -1) { ++ return -1; ++ } ++ } ++ ++ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) { ++ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) { ++ return -1; ++ } ++ } ++ ++ if (ctx->opts) { ++ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || ++ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) { ++ return -1; ++ } ++ } ++ ++ /* If option separator ("--") is the last argument, remove it */ ++ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && ++ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { ++ free(ctx->outargs.argv[ctx->outargs.argc - 1]); ++ ctx->outargs.argv[--ctx->outargs.argc] = NULL; ++ } ++ ++ return 0; + } + + int fuse_opt_parse(struct fuse_args *args, void *data, +- const struct fuse_opt opts[], fuse_opt_proc_t proc) ++ const struct fuse_opt opts[], fuse_opt_proc_t proc) + { +- int res; +- struct fuse_opt_context ctx = { +- .data = data, +- .opt = opts, +- .proc = proc, +- }; +- +- if (!args || !args->argv || !args->argc) +- return 0; +- +- ctx.argc = args->argc; +- ctx.argv = args->argv; +- +- res = opt_parse(&ctx); +- if (res != -1) { +- struct fuse_args tmp = *args; +- *args = ctx.outargs; +- ctx.outargs = tmp; +- } +- free(ctx.opts); +- fuse_opt_free_args(&ctx.outargs); +- return res; ++ int res; ++ struct fuse_opt_context ctx = { ++ .data = data, ++ .opt = opts, ++ .proc = proc, ++ }; ++ ++ if (!args || !args->argv || !args->argc) { ++ return 0; ++ } ++ ++ ctx.argc = args->argc; ++ ctx.argv = args->argv; ++ ++ res = opt_parse(&ctx); ++ if (res != -1) { ++ struct fuse_args tmp = *args; ++ *args = ctx.outargs; ++ ctx.outargs = tmp; ++ } ++ free(ctx.opts); ++ fuse_opt_free_args(&ctx.outargs); ++ return res; + } +diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h +index 6910255..8f59b4d 100644 +--- a/tools/virtiofsd/fuse_opt.h ++++ b/tools/virtiofsd/fuse_opt.h +@@ -1,10 +1,10 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #ifndef FUSE_OPT_H_ + #define FUSE_OPT_H_ +@@ -37,7 +37,7 @@ + * + * - 'offsetof(struct foo, member)' actions i) and iii) + * +- * - -1 action ii) ++ * - -1 action ii) + * + * The 'offsetof()' macro is defined in the header. + * +@@ -48,7 +48,7 @@ + * + * The types of templates are: + * +- * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only ++ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only + * themselves. Invalid values are "--" and anything beginning + * with "-o" + * +@@ -71,58 +71,67 @@ + * freed. + */ + struct fuse_opt { +- /** Matching template and optional parameter formatting */ +- const char *templ; ++ /** Matching template and optional parameter formatting */ ++ const char *templ; + +- /** +- * Offset of variable within 'data' parameter of fuse_opt_parse() +- * or -1 +- */ +- unsigned long offset; ++ /** ++ * Offset of variable within 'data' parameter of fuse_opt_parse() ++ * or -1 ++ */ ++ unsigned long offset; + +- /** +- * Value to set the variable to, or to be passed as 'key' to the +- * processing function. Ignored if template has a format +- */ +- int value; ++ /** ++ * Value to set the variable to, or to be passed as 'key' to the ++ * processing function. Ignored if template has a format ++ */ ++ int value; + }; + + /** +- * Key option. In case of a match, the processing function will be ++ * Key option. In case of a match, the processing function will be + * called with the specified key. + */ +-#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } ++#define FUSE_OPT_KEY(templ, key) \ ++ { \ ++ templ, -1U, key \ ++ } + + /** +- * Last option. An array of 'struct fuse_opt' must end with a NULL ++ * Last option. An array of 'struct fuse_opt' must end with a NULL + * template value + */ +-#define FUSE_OPT_END { NULL, 0, 0 } ++#define FUSE_OPT_END \ ++ { \ ++ NULL, 0, 0 \ ++ } + + /** + * Argument list + */ + struct fuse_args { +- /** Argument count */ +- int argc; ++ /** Argument count */ ++ int argc; + +- /** Argument vector. NULL terminated */ +- char **argv; ++ /** Argument vector. NULL terminated */ ++ char **argv; + +- /** Is 'argv' allocated? */ +- int allocated; ++ /** Is 'argv' allocated? */ ++ int allocated; + }; + + /** + * Initializer for 'struct fuse_args' + */ +-#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } ++#define FUSE_ARGS_INIT(argc, argv) \ ++ { \ ++ argc, argv, 0 \ ++ } + + /** + * Key value passed to the processing function if an option did not + * match any template + */ +-#define FUSE_OPT_KEY_OPT -1 ++#define FUSE_OPT_KEY_OPT -1 + + /** + * Key value passed to the processing function for all non-options +@@ -130,7 +139,7 @@ struct fuse_args { + * Non-options are the arguments beginning with a character other than + * '-' or all arguments after the special '--' option + */ +-#define FUSE_OPT_KEY_NONOPT -2 ++#define FUSE_OPT_KEY_NONOPT -2 + + /** + * Special key value for options to keep +@@ -174,7 +183,7 @@ struct fuse_args { + * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept + */ + typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, +- struct fuse_args *outargs); ++ struct fuse_args *outargs); + + /** + * Option parsing function +@@ -197,7 +206,7 @@ typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, + * @return -1 on error, 0 on success + */ + int fuse_opt_parse(struct fuse_args *args, void *data, +- const struct fuse_opt opts[], fuse_opt_proc_t proc); ++ const struct fuse_opt opts[], fuse_opt_proc_t proc); + + /** + * Add an option to a comma separated option list +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +index 4271947..19d6791 100644 +--- a/tools/virtiofsd/fuse_signals.c ++++ b/tools/virtiofsd/fuse_signals.c +@@ -1,91 +1,95 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- Utility functions for setting signal handlers. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Utility functions for setting signal handlers. ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ + + #include "config.h" +-#include "fuse_lowlevel.h" + #include "fuse_i.h" ++#include "fuse_lowlevel.h" + +-#include +-#include + #include ++#include + #include ++#include + + static struct fuse_session *fuse_instance; + + static void exit_handler(int sig) + { +- if (fuse_instance) { +- fuse_session_exit(fuse_instance); +- if(sig <= 0) { +- fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); +- abort(); +- } +- fuse_instance->error = sig; +- } ++ if (fuse_instance) { ++ fuse_session_exit(fuse_instance); ++ if (sig <= 0) { ++ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); ++ abort(); ++ } ++ fuse_instance->error = sig; ++ } + } + + static void do_nothing(int sig) + { +- (void) sig; ++ (void)sig; + } + + static int set_one_signal_handler(int sig, void (*handler)(int), int remove) + { +- struct sigaction sa; +- struct sigaction old_sa; ++ struct sigaction sa; ++ struct sigaction old_sa; + +- memset(&sa, 0, sizeof(struct sigaction)); +- sa.sa_handler = remove ? SIG_DFL : handler; +- sigemptyset(&(sa.sa_mask)); +- sa.sa_flags = 0; ++ memset(&sa, 0, sizeof(struct sigaction)); ++ sa.sa_handler = remove ? SIG_DFL : handler; ++ sigemptyset(&(sa.sa_mask)); ++ sa.sa_flags = 0; + +- if (sigaction(sig, NULL, &old_sa) == -1) { +- perror("fuse: cannot get old signal handler"); +- return -1; +- } ++ if (sigaction(sig, NULL, &old_sa) == -1) { ++ perror("fuse: cannot get old signal handler"); ++ return -1; ++ } + +- if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && +- sigaction(sig, &sa, NULL) == -1) { +- perror("fuse: cannot set signal handler"); +- return -1; +- } +- return 0; ++ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && ++ sigaction(sig, &sa, NULL) == -1) { ++ perror("fuse: cannot set signal handler"); ++ return -1; ++ } ++ return 0; + } + + int fuse_set_signal_handlers(struct fuse_session *se) + { +- /* If we used SIG_IGN instead of the do_nothing function, +- then we would be unable to tell if we set SIG_IGN (and +- thus should reset to SIG_DFL in fuse_remove_signal_handlers) +- or if it was already set to SIG_IGN (and should be left +- untouched. */ +- if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || +- set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || +- set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || +- set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) +- return -1; ++ /* ++ * If we used SIG_IGN instead of the do_nothing function, ++ * then we would be unable to tell if we set SIG_IGN (and ++ * thus should reset to SIG_DFL in fuse_remove_signal_handlers) ++ * or if it was already set to SIG_IGN (and should be left ++ * untouched. ++ */ ++ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || ++ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) { ++ return -1; ++ } + +- fuse_instance = se; +- return 0; ++ fuse_instance = se; ++ return 0; + } + + void fuse_remove_signal_handlers(struct fuse_session *se) + { +- if (fuse_instance != se) +- fuse_log(FUSE_LOG_ERR, +- "fuse: fuse_remove_signal_handlers: unknown session\n"); +- else +- fuse_instance = NULL; ++ if (fuse_instance != se) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: fuse_remove_signal_handlers: unknown session\n"); ++ } else { ++ fuse_instance = NULL; ++ } + +- set_one_signal_handler(SIGHUP, exit_handler, 1); +- set_one_signal_handler(SIGINT, exit_handler, 1); +- set_one_signal_handler(SIGTERM, exit_handler, 1); +- set_one_signal_handler(SIGPIPE, do_nothing, 1); ++ set_one_signal_handler(SIGHUP, exit_handler, 1); ++ set_one_signal_handler(SIGINT, exit_handler, 1); ++ set_one_signal_handler(SIGTERM, exit_handler, 1); ++ set_one_signal_handler(SIGPIPE, do_nothing, 1); + } +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5a2e64c..5711dd2 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -1,297 +1,309 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * Helper functions to create (simple) standalone programs. With the ++ * aid of these functions it should be possible to create full FUSE ++ * file system by implementing nothing but the request handlers. + +- Helper functions to create (simple) standalone programs. With the +- aid of these functions it should be possible to create full FUSE +- file system by implementing nothing but the request handlers. +- +- This program can be distributed under the terms of the GNU LGPLv2. +- See the file COPYING.LIB. +-*/ ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB. ++ */ + + #include "config.h" + #include "fuse_i.h" ++#include "fuse_lowlevel.h" + #include "fuse_misc.h" + #include "fuse_opt.h" +-#include "fuse_lowlevel.h" + #include "mount_util.h" + ++#include ++#include ++#include + #include + #include +-#include +-#include + #include +-#include +-#include + #include ++#include + +-#define FUSE_HELPER_OPT(t, p) \ +- { t, offsetof(struct fuse_cmdline_opts, p), 1 } ++#define FUSE_HELPER_OPT(t, p) \ ++ { \ ++ t, offsetof(struct fuse_cmdline_opts, p), 1 \ ++ } + + static const struct fuse_opt fuse_helper_opts[] = { +- FUSE_HELPER_OPT("-h", show_help), +- FUSE_HELPER_OPT("--help", show_help), +- FUSE_HELPER_OPT("-V", show_version), +- FUSE_HELPER_OPT("--version", show_version), +- FUSE_HELPER_OPT("-d", debug), +- FUSE_HELPER_OPT("debug", debug), +- FUSE_HELPER_OPT("-d", foreground), +- FUSE_HELPER_OPT("debug", foreground), +- FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), +- FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), +- FUSE_HELPER_OPT("-f", foreground), +- FUSE_HELPER_OPT("fsname=", nodefault_subtype), +- FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), +- FUSE_HELPER_OPT("subtype=", nodefault_subtype), +- FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), +- FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), +- FUSE_OPT_END ++ FUSE_HELPER_OPT("-h", show_help), ++ FUSE_HELPER_OPT("--help", show_help), ++ FUSE_HELPER_OPT("-V", show_version), ++ FUSE_HELPER_OPT("--version", show_version), ++ FUSE_HELPER_OPT("-d", debug), ++ FUSE_HELPER_OPT("debug", debug), ++ FUSE_HELPER_OPT("-d", foreground), ++ FUSE_HELPER_OPT("debug", foreground), ++ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), ++ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("-f", foreground), ++ FUSE_HELPER_OPT("fsname=", nodefault_subtype), ++ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("subtype=", nodefault_subtype), ++ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), ++ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_OPT_END + }; + + struct fuse_conn_info_opts { +- int atomic_o_trunc; +- int no_remote_posix_lock; +- int no_remote_flock; +- int splice_write; +- int splice_move; +- int splice_read; +- int no_splice_write; +- int no_splice_move; +- int no_splice_read; +- int auto_inval_data; +- int no_auto_inval_data; +- int no_readdirplus; +- int no_readdirplus_auto; +- int async_dio; +- int no_async_dio; +- int writeback_cache; +- int no_writeback_cache; +- int async_read; +- int sync_read; +- unsigned max_write; +- unsigned max_readahead; +- unsigned max_background; +- unsigned congestion_threshold; +- unsigned time_gran; +- int set_max_write; +- int set_max_readahead; +- int set_max_background; +- int set_congestion_threshold; +- int set_time_gran; ++ int atomic_o_trunc; ++ int no_remote_posix_lock; ++ int no_remote_flock; ++ int splice_write; ++ int splice_move; ++ int splice_read; ++ int no_splice_write; ++ int no_splice_move; ++ int no_splice_read; ++ int auto_inval_data; ++ int no_auto_inval_data; ++ int no_readdirplus; ++ int no_readdirplus_auto; ++ int async_dio; ++ int no_async_dio; ++ int writeback_cache; ++ int no_writeback_cache; ++ int async_read; ++ int sync_read; ++ unsigned max_write; ++ unsigned max_readahead; ++ unsigned max_background; ++ unsigned congestion_threshold; ++ unsigned time_gran; ++ int set_max_write; ++ int set_max_readahead; ++ int set_max_background; ++ int set_congestion_threshold; ++ int set_time_gran; + }; + +-#define CONN_OPTION(t, p, v) \ +- { t, offsetof(struct fuse_conn_info_opts, p), v } ++#define CONN_OPTION(t, p, v) \ ++ { \ ++ t, offsetof(struct fuse_conn_info_opts, p), v \ ++ } + static const struct fuse_opt conn_info_opt_spec[] = { +- CONN_OPTION("max_write=%u", max_write, 0), +- CONN_OPTION("max_write=", set_max_write, 1), +- CONN_OPTION("max_readahead=%u", max_readahead, 0), +- CONN_OPTION("max_readahead=", set_max_readahead, 1), +- CONN_OPTION("max_background=%u", max_background, 0), +- CONN_OPTION("max_background=", set_max_background, 1), +- CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), +- CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), +- CONN_OPTION("sync_read", sync_read, 1), +- CONN_OPTION("async_read", async_read, 1), +- CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), +- CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), +- CONN_OPTION("no_remote_lock", no_remote_flock, 1), +- CONN_OPTION("no_remote_flock", no_remote_flock, 1), +- CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), +- CONN_OPTION("splice_write", splice_write, 1), +- CONN_OPTION("no_splice_write", no_splice_write, 1), +- CONN_OPTION("splice_move", splice_move, 1), +- CONN_OPTION("no_splice_move", no_splice_move, 1), +- CONN_OPTION("splice_read", splice_read, 1), +- CONN_OPTION("no_splice_read", no_splice_read, 1), +- CONN_OPTION("auto_inval_data", auto_inval_data, 1), +- CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), +- CONN_OPTION("readdirplus=no", no_readdirplus, 1), +- CONN_OPTION("readdirplus=yes", no_readdirplus, 0), +- CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), +- CONN_OPTION("readdirplus=auto", no_readdirplus, 0), +- CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), +- CONN_OPTION("async_dio", async_dio, 1), +- CONN_OPTION("no_async_dio", no_async_dio, 1), +- CONN_OPTION("writeback_cache", writeback_cache, 1), +- CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), +- CONN_OPTION("time_gran=%u", time_gran, 0), +- CONN_OPTION("time_gran=", set_time_gran, 1), +- FUSE_OPT_END ++ CONN_OPTION("max_write=%u", max_write, 0), ++ CONN_OPTION("max_write=", set_max_write, 1), ++ CONN_OPTION("max_readahead=%u", max_readahead, 0), ++ CONN_OPTION("max_readahead=", set_max_readahead, 1), ++ CONN_OPTION("max_background=%u", max_background, 0), ++ CONN_OPTION("max_background=", set_max_background, 1), ++ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), ++ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), ++ CONN_OPTION("sync_read", sync_read, 1), ++ CONN_OPTION("async_read", async_read, 1), ++ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), ++ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("no_remote_lock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_flock", no_remote_flock, 1), ++ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), ++ CONN_OPTION("splice_write", splice_write, 1), ++ CONN_OPTION("no_splice_write", no_splice_write, 1), ++ CONN_OPTION("splice_move", splice_move, 1), ++ CONN_OPTION("no_splice_move", no_splice_move, 1), ++ CONN_OPTION("splice_read", splice_read, 1), ++ CONN_OPTION("no_splice_read", no_splice_read, 1), ++ CONN_OPTION("auto_inval_data", auto_inval_data, 1), ++ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), ++ CONN_OPTION("readdirplus=no", no_readdirplus, 1), ++ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), ++ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), ++ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), ++ CONN_OPTION("async_dio", async_dio, 1), ++ CONN_OPTION("no_async_dio", no_async_dio, 1), ++ CONN_OPTION("writeback_cache", writeback_cache, 1), ++ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), ++ CONN_OPTION("time_gran=%u", time_gran, 0), ++ CONN_OPTION("time_gran=", set_time_gran, 1), ++ FUSE_OPT_END + }; + + + void fuse_cmdline_help(void) + { +- printf(" -h --help print help\n" +- " -V --version print version\n" +- " -d -o debug enable debug output (implies -f)\n" +- " -f foreground operation\n" +- " -o max_idle_threads the maximum number of idle worker threads\n" +- " allowed (default: 10)\n"); ++ printf( ++ " -h --help print help\n" ++ " -V --version print version\n" ++ " -d -o debug enable debug output (implies -f)\n" ++ " -f foreground operation\n" ++ " -o max_idle_threads the maximum number of idle worker threads\n" ++ " allowed (default: 10)\n"); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +- struct fuse_args *outargs) ++ struct fuse_args *outargs) + { +- (void) outargs; +- struct fuse_cmdline_opts *opts = data; +- +- switch (key) { +- case FUSE_OPT_KEY_NONOPT: +- if (!opts->mountpoint) { +- if (fuse_mnt_parse_fuse_fd(arg) != -1) { +- return fuse_opt_add_opt(&opts->mountpoint, arg); +- } +- +- char mountpoint[PATH_MAX] = ""; +- if (realpath(arg, mountpoint) == NULL) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: bad mount point `%s': %s\n", +- arg, strerror(errno)); +- return -1; +- } +- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); +- } else { +- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); +- return -1; +- } +- +- default: +- /* Pass through unknown options */ +- return 1; +- } ++ (void)outargs; ++ struct fuse_cmdline_opts *opts = data; ++ ++ switch (key) { ++ case FUSE_OPT_KEY_NONOPT: ++ if (!opts->mountpoint) { ++ if (fuse_mnt_parse_fuse_fd(arg) != -1) { ++ return fuse_opt_add_opt(&opts->mountpoint, arg); ++ } ++ ++ char mountpoint[PATH_MAX] = ""; ++ if (realpath(arg, mountpoint) == NULL) { ++ fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, ++ strerror(errno)); ++ return -1; ++ } ++ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); ++ } else { ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); ++ return -1; ++ } ++ ++ default: ++ /* Pass through unknown options */ ++ return 1; ++ } + } + +-int fuse_parse_cmdline(struct fuse_args *args, +- struct fuse_cmdline_opts *opts) ++int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + { +- memset(opts, 0, sizeof(struct fuse_cmdline_opts)); ++ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); + +- opts->max_idle_threads = 10; ++ opts->max_idle_threads = 10; + +- if (fuse_opt_parse(args, opts, fuse_helper_opts, +- fuse_helper_opt_proc) == -1) +- return -1; ++ if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == ++ -1) { ++ return -1; ++ } + +- return 0; ++ return 0; + } + + + int fuse_daemonize(int foreground) + { +- if (!foreground) { +- int nullfd; +- int waiter[2]; +- char completed; +- +- if (pipe(waiter)) { +- perror("fuse_daemonize: pipe"); +- return -1; +- } +- +- /* +- * demonize current process by forking it and killing the +- * parent. This makes current process as a child of 'init'. +- */ +- switch(fork()) { +- case -1: +- perror("fuse_daemonize: fork"); +- return -1; +- case 0: +- break; +- default: +- (void) read(waiter[0], &completed, sizeof(completed)); +- _exit(0); +- } +- +- if (setsid() == -1) { +- perror("fuse_daemonize: setsid"); +- return -1; +- } +- +- (void) chdir("/"); +- +- nullfd = open("/dev/null", O_RDWR, 0); +- if (nullfd != -1) { +- (void) dup2(nullfd, 0); +- (void) dup2(nullfd, 1); +- (void) dup2(nullfd, 2); +- if (nullfd > 2) +- close(nullfd); +- } +- +- /* Propagate completion of daemon initialization */ +- completed = 1; +- (void) write(waiter[1], &completed, sizeof(completed)); +- close(waiter[0]); +- close(waiter[1]); +- } else { +- (void) chdir("/"); +- } +- return 0; ++ if (!foreground) { ++ int nullfd; ++ int waiter[2]; ++ char completed; ++ ++ if (pipe(waiter)) { ++ perror("fuse_daemonize: pipe"); ++ return -1; ++ } ++ ++ /* ++ * demonize current process by forking it and killing the ++ * parent. This makes current process as a child of 'init'. ++ */ ++ switch (fork()) { ++ case -1: ++ perror("fuse_daemonize: fork"); ++ return -1; ++ case 0: ++ break; ++ default: ++ (void)read(waiter[0], &completed, sizeof(completed)); ++ _exit(0); ++ } ++ ++ if (setsid() == -1) { ++ perror("fuse_daemonize: setsid"); ++ return -1; ++ } ++ ++ (void)chdir("/"); ++ ++ nullfd = open("/dev/null", O_RDWR, 0); ++ if (nullfd != -1) { ++ (void)dup2(nullfd, 0); ++ (void)dup2(nullfd, 1); ++ (void)dup2(nullfd, 2); ++ if (nullfd > 2) { ++ close(nullfd); ++ } ++ } ++ ++ /* Propagate completion of daemon initialization */ ++ completed = 1; ++ (void)write(waiter[1], &completed, sizeof(completed)); ++ close(waiter[0]); ++ close(waiter[1]); ++ } else { ++ (void)chdir("/"); ++ } ++ return 0; + } + + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, +- struct fuse_conn_info *conn) ++ struct fuse_conn_info *conn) + { +- if(opts->set_max_write) +- conn->max_write = opts->max_write; +- if(opts->set_max_background) +- conn->max_background = opts->max_background; +- if(opts->set_congestion_threshold) +- conn->congestion_threshold = opts->congestion_threshold; +- if(opts->set_time_gran) +- conn->time_gran = opts->time_gran; +- if(opts->set_max_readahead) +- conn->max_readahead = opts->max_readahead; +- +-#define LL_ENABLE(cond,cap) \ +- if (cond) conn->want |= (cap) +-#define LL_DISABLE(cond,cap) \ +- if (cond) conn->want &= ~(cap) +- +- LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); +- LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); +- +- LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); +- LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); +- +- LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); +- LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); +- +- LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); +- LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); +- +- LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); +- LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); +- +- LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); +- LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); +- +- LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); +- LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); +- +- LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); +- LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); +- +- LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); +- LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); ++ if (opts->set_max_write) { ++ conn->max_write = opts->max_write; ++ } ++ if (opts->set_max_background) { ++ conn->max_background = opts->max_background; ++ } ++ if (opts->set_congestion_threshold) { ++ conn->congestion_threshold = opts->congestion_threshold; ++ } ++ if (opts->set_time_gran) { ++ conn->time_gran = opts->time_gran; ++ } ++ if (opts->set_max_readahead) { ++ conn->max_readahead = opts->max_readahead; ++ } ++ ++#define LL_ENABLE(cond, cap) \ ++ if (cond) \ ++ conn->want |= (cap) ++#define LL_DISABLE(cond, cap) \ ++ if (cond) \ ++ conn->want &= ~(cap) ++ ++ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); ++ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); ++ ++ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); ++ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); ++ ++ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); ++ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); ++ ++ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); ++ ++ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); ++ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); ++ ++ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); ++ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); ++ ++ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); ++ ++ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); ++ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); ++ ++ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); ++ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); + } + +-struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) ++struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args) + { +- struct fuse_conn_info_opts *opts; +- +- opts = calloc(1, sizeof(struct fuse_conn_info_opts)); +- if(opts == NULL) { +- fuse_log(FUSE_LOG_ERR, "calloc failed\n"); +- return NULL; +- } +- if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { +- free(opts); +- return NULL; +- } +- return opts; ++ struct fuse_conn_info_opts *opts; ++ ++ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); ++ if (opts == NULL) { ++ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); ++ return NULL; ++ } ++ if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { ++ free(opts); ++ return NULL; ++ } ++ return opts; + } +diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h +index 7c5f561..0b98275 100644 +--- a/tools/virtiofsd/passthrough_helpers.h ++++ b/tools/virtiofsd/passthrough_helpers.h +@@ -28,23 +28,24 @@ + * operation + */ + static int mknod_wrapper(int dirfd, const char *path, const char *link, +- int mode, dev_t rdev) ++ int mode, dev_t rdev) + { +- int res; ++ int res; + +- if (S_ISREG(mode)) { +- res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); +- if (res >= 0) +- res = close(res); +- } else if (S_ISDIR(mode)) { +- res = mkdirat(dirfd, path, mode); +- } else if (S_ISLNK(mode) && link != NULL) { +- res = symlinkat(link, dirfd, path); +- } else if (S_ISFIFO(mode)) { +- res = mkfifoat(dirfd, path, mode); +- } else { +- res = mknodat(dirfd, path, mode, rdev); +- } ++ if (S_ISREG(mode)) { ++ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); ++ if (res >= 0) { ++ res = close(res); ++ } ++ } else if (S_ISDIR(mode)) { ++ res = mkdirat(dirfd, path, mode); ++ } else if (S_ISLNK(mode) && link != NULL) { ++ res = symlinkat(link, dirfd, path); ++ } else if (S_ISFIFO(mode)) { ++ res = mkfifoat(dirfd, path, mode); ++ } else { ++ res = mknodat(dirfd, path, mode, rdev); ++ } + +- return res; ++ return res; + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e5f7115..c5850ef 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1,12 +1,12 @@ + /* +- FUSE: Filesystem in Userspace +- Copyright (C) 2001-2007 Miklos Szeredi +- +- This program can be distributed under the terms of the GNU GPLv2. +- See the file COPYING. +-*/ ++ * FUSE: Filesystem in Userspace ++ * Copyright (C) 2001-2007 Miklos Szeredi ++ * ++ * This program can be distributed under the terms of the GNU GPLv2. ++ * See the file COPYING. ++ */ + +-/** @file ++/* + * + * This file system mirrors the existing file system hierarchy of the + * system, starting at the root file system. This is implemented by +@@ -28,7 +28,8 @@ + * + * Compile with: + * +- * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll ++ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o ++ * passthrough_ll + * + * ## Source code ## + * \include passthrough_ll.c +@@ -39,1299 +40,1365 @@ + + #include "config.h" + +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include + #include ++#include + #include ++#include + #include ++#include + #include ++#include ++#include ++#include ++#include ++#include + #include + #include ++#include + + #include "passthrough_helpers.h" + +-/* We are re-using pointers to our `struct lo_inode` and `struct +- lo_dirp` elements as inodes. This means that we must be able to +- store uintptr_t values in a fuse_ino_t variable. The following +- incantation checks this condition at compile time. */ +-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++/* ++ * We are re-using pointers to our `struct lo_inode` and `struct ++ * lo_dirp` elements as inodes. This means that we must be able to ++ * store uintptr_t values in a fuse_ino_t variable. The following ++ * incantation checks this condition at compile time. ++ */ ++#if defined(__GNUC__) && \ ++ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ ++ !defined __cplusplus + _Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), +- "fuse_ino_t too small to hold uintptr_t values!"); ++ "fuse_ino_t too small to hold uintptr_t values!"); + #else +-struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ +- { unsigned _uintptr_to_must_hold_fuse_ino_t: +- ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; ++struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { ++ unsigned _uintptr_to_must_hold_fuse_ino_t ++ : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); ++}; + #endif + + struct lo_inode { +- struct lo_inode *next; /* protected by lo->mutex */ +- struct lo_inode *prev; /* protected by lo->mutex */ +- int fd; +- bool is_symlink; +- ino_t ino; +- dev_t dev; +- uint64_t refcount; /* protected by lo->mutex */ ++ struct lo_inode *next; /* protected by lo->mutex */ ++ struct lo_inode *prev; /* protected by lo->mutex */ ++ int fd; ++ bool is_symlink; ++ ino_t ino; ++ dev_t dev; ++ uint64_t refcount; /* protected by lo->mutex */ + }; + + enum { +- CACHE_NEVER, +- CACHE_NORMAL, +- CACHE_ALWAYS, ++ CACHE_NEVER, ++ CACHE_NORMAL, ++ CACHE_ALWAYS, + }; + + struct lo_data { +- pthread_mutex_t mutex; +- int debug; +- int writeback; +- int flock; +- int xattr; +- const char *source; +- double timeout; +- int cache; +- int timeout_set; +- struct lo_inode root; /* protected by lo->mutex */ ++ pthread_mutex_t mutex; ++ int debug; ++ int writeback; ++ int flock; ++ int xattr; ++ const char *source; ++ double timeout; ++ int cache; ++ int timeout_set; ++ struct lo_inode root; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +- { "writeback", +- offsetof(struct lo_data, writeback), 1 }, +- { "no_writeback", +- offsetof(struct lo_data, writeback), 0 }, +- { "source=%s", +- offsetof(struct lo_data, source), 0 }, +- { "flock", +- offsetof(struct lo_data, flock), 1 }, +- { "no_flock", +- offsetof(struct lo_data, flock), 0 }, +- { "xattr", +- offsetof(struct lo_data, xattr), 1 }, +- { "no_xattr", +- offsetof(struct lo_data, xattr), 0 }, +- { "timeout=%lf", +- offsetof(struct lo_data, timeout), 0 }, +- { "timeout=", +- offsetof(struct lo_data, timeout_set), 1 }, +- { "cache=never", +- offsetof(struct lo_data, cache), CACHE_NEVER }, +- { "cache=auto", +- offsetof(struct lo_data, cache), CACHE_NORMAL }, +- { "cache=always", +- offsetof(struct lo_data, cache), CACHE_ALWAYS }, +- +- FUSE_OPT_END ++ { "writeback", offsetof(struct lo_data, writeback), 1 }, ++ { "no_writeback", offsetof(struct lo_data, writeback), 0 }, ++ { "source=%s", offsetof(struct lo_data, source), 0 }, ++ { "flock", offsetof(struct lo_data, flock), 1 }, ++ { "no_flock", offsetof(struct lo_data, flock), 0 }, ++ { "xattr", offsetof(struct lo_data, xattr), 1 }, ++ { "no_xattr", offsetof(struct lo_data, xattr), 0 }, ++ { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, ++ { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, ++ { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, ++ { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, ++ { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, ++ ++ FUSE_OPT_END + }; + + static struct lo_data *lo_data(fuse_req_t req) + { +- return (struct lo_data *) fuse_req_userdata(req); ++ return (struct lo_data *)fuse_req_userdata(req); + } + + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { +- if (ino == FUSE_ROOT_ID) +- return &lo_data(req)->root; +- else +- return (struct lo_inode *) (uintptr_t) ino; ++ if (ino == FUSE_ROOT_ID) { ++ return &lo_data(req)->root; ++ } else { ++ return (struct lo_inode *)(uintptr_t)ino; ++ } + } + + static int lo_fd(fuse_req_t req, fuse_ino_t ino) + { +- return lo_inode(req, ino)->fd; ++ return lo_inode(req, ino)->fd; + } + + static bool lo_debug(fuse_req_t req) + { +- return lo_data(req)->debug != 0; ++ return lo_data(req)->debug != 0; + } + +-static void lo_init(void *userdata, +- struct fuse_conn_info *conn) ++static void lo_init(void *userdata, struct fuse_conn_info *conn) + { +- struct lo_data *lo = (struct lo_data*) userdata; +- +- if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) +- conn->want |= FUSE_CAP_EXPORT_SUPPORT; +- +- if (lo->writeback && +- conn->capable & FUSE_CAP_WRITEBACK_CACHE) { +- if (lo->debug) +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); +- conn->want |= FUSE_CAP_WRITEBACK_CACHE; +- } +- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { +- if (lo->debug) +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); +- conn->want |= FUSE_CAP_FLOCK_LOCKS; +- } ++ struct lo_data *lo = (struct lo_data *)userdata; ++ ++ if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { ++ conn->want |= FUSE_CAP_EXPORT_SUPPORT; ++ } ++ ++ if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { ++ if (lo->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); ++ } ++ conn->want |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { ++ if (lo->debug) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); ++ } ++ conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ } + } + + static void lo_getattr(fuse_req_t req, fuse_ino_t ino, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- int res; +- struct stat buf; +- struct lo_data *lo = lo_data(req); ++ int res; ++ struct stat buf; ++ struct lo_data *lo = lo_data(req); + +- (void) fi; ++ (void)fi; + +- res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- return (void) fuse_reply_err(req, errno); ++ res = ++ fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } + +- fuse_reply_attr(req, &buf, lo->timeout); ++ fuse_reply_attr(req, &buf, lo->timeout); + } + + static int utimensat_empty_nofollow(struct lo_inode *inode, +- const struct timespec *tv) ++ const struct timespec *tv) + { +- int res; +- char procname[64]; +- +- if (inode->is_symlink) { +- res = utimensat(inode->fd, "", tv, +- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1 && errno == EINVAL) { +- /* Sorry, no race free way to set times on symlink. */ +- errno = EPERM; +- } +- return res; +- } +- sprintf(procname, "/proc/self/fd/%i", inode->fd); +- +- return utimensat(AT_FDCWD, procname, tv, 0); ++ int res; ++ char procname[64]; ++ ++ if (inode->is_symlink) { ++ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1 && errno == EINVAL) { ++ /* Sorry, no race free way to set times on symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ return utimensat(AT_FDCWD, procname, tv, 0); + } + + static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, +- int valid, struct fuse_file_info *fi) ++ int valid, struct fuse_file_info *fi) + { +- int saverr; +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- int ifd = inode->fd; +- int res; +- +- if (valid & FUSE_SET_ATTR_MODE) { +- if (fi) { +- res = fchmod(fi->fh, attr->st_mode); +- } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = chmod(procname, attr->st_mode); +- } +- if (res == -1) +- goto out_err; +- } +- if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { +- uid_t uid = (valid & FUSE_SET_ATTR_UID) ? +- attr->st_uid : (uid_t) -1; +- gid_t gid = (valid & FUSE_SET_ATTR_GID) ? +- attr->st_gid : (gid_t) -1; +- +- res = fchownat(ifd, "", uid, gid, +- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- goto out_err; +- } +- if (valid & FUSE_SET_ATTR_SIZE) { +- if (fi) { +- res = ftruncate(fi->fh, attr->st_size); +- } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = truncate(procname, attr->st_size); +- } +- if (res == -1) +- goto out_err; +- } +- if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { +- struct timespec tv[2]; +- +- tv[0].tv_sec = 0; +- tv[1].tv_sec = 0; +- tv[0].tv_nsec = UTIME_OMIT; +- tv[1].tv_nsec = UTIME_OMIT; +- +- if (valid & FUSE_SET_ATTR_ATIME_NOW) +- tv[0].tv_nsec = UTIME_NOW; +- else if (valid & FUSE_SET_ATTR_ATIME) +- tv[0] = attr->st_atim; +- +- if (valid & FUSE_SET_ATTR_MTIME_NOW) +- tv[1].tv_nsec = UTIME_NOW; +- else if (valid & FUSE_SET_ATTR_MTIME) +- tv[1] = attr->st_mtim; +- +- if (fi) +- res = futimens(fi->fh, tv); +- else +- res = utimensat_empty_nofollow(inode, tv); +- if (res == -1) +- goto out_err; +- } +- +- return lo_getattr(req, ino, fi); ++ int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ int ifd = inode->fd; ++ int res; ++ ++ if (valid & FUSE_SET_ATTR_MODE) { ++ if (fi) { ++ res = fchmod(fi->fh, attr->st_mode); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = chmod(procname, attr->st_mode); ++ } ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { ++ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; ++ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; ++ ++ res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ if (valid & FUSE_SET_ATTR_SIZE) { ++ if (fi) { ++ res = ftruncate(fi->fh, attr->st_size); ++ } else { ++ sprintf(procname, "/proc/self/fd/%i", ifd); ++ res = truncate(procname, attr->st_size); ++ } ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { ++ struct timespec tv[2]; ++ ++ tv[0].tv_sec = 0; ++ tv[1].tv_sec = 0; ++ tv[0].tv_nsec = UTIME_OMIT; ++ tv[1].tv_nsec = UTIME_OMIT; ++ ++ if (valid & FUSE_SET_ATTR_ATIME_NOW) { ++ tv[0].tv_nsec = UTIME_NOW; ++ } else if (valid & FUSE_SET_ATTR_ATIME) { ++ tv[0] = attr->st_atim; ++ } ++ ++ if (valid & FUSE_SET_ATTR_MTIME_NOW) { ++ tv[1].tv_nsec = UTIME_NOW; ++ } else if (valid & FUSE_SET_ATTR_MTIME) { ++ tv[1] = attr->st_mtim; ++ } ++ ++ if (fi) { ++ res = futimens(fi->fh, tv); ++ } else { ++ res = utimensat_empty_nofollow(inode, tv); ++ } ++ if (res == -1) { ++ goto out_err; ++ } ++ } ++ ++ return lo_getattr(req, ino, fi); + + out_err: +- saverr = errno; +- fuse_reply_err(req, saverr); ++ saverr = errno; ++ fuse_reply_err(req, saverr); + } + + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + { +- struct lo_inode *p; +- struct lo_inode *ret = NULL; +- +- pthread_mutex_lock(&lo->mutex); +- for (p = lo->root.next; p != &lo->root; p = p->next) { +- if (p->ino == st->st_ino && p->dev == st->st_dev) { +- assert(p->refcount > 0); +- ret = p; +- ret->refcount++; +- break; +- } +- } +- pthread_mutex_unlock(&lo->mutex); +- return ret; ++ struct lo_inode *p; ++ struct lo_inode *ret = NULL; ++ ++ pthread_mutex_lock(&lo->mutex); ++ for (p = lo->root.next; p != &lo->root; p = p->next) { ++ if (p->ino == st->st_ino && p->dev == st->st_dev) { ++ assert(p->refcount > 0); ++ ret = p; ++ ret->refcount++; ++ break; ++ } ++ } ++ pthread_mutex_unlock(&lo->mutex); ++ return ret; + } + + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +- struct fuse_entry_param *e) ++ struct fuse_entry_param *e) + { +- int newfd; +- int res; +- int saverr; +- struct lo_data *lo = lo_data(req); +- struct lo_inode *inode; +- +- memset(e, 0, sizeof(*e)); +- e->attr_timeout = lo->timeout; +- e->entry_timeout = lo->timeout; +- +- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); +- if (newfd == -1) +- goto out_err; +- +- res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- goto out_err; +- +- inode = lo_find(lo_data(req), &e->attr); +- if (inode) { +- close(newfd); +- newfd = -1; +- } else { +- struct lo_inode *prev, *next; +- +- saverr = ENOMEM; +- inode = calloc(1, sizeof(struct lo_inode)); +- if (!inode) +- goto out_err; +- +- inode->is_symlink = S_ISLNK(e->attr.st_mode); +- inode->refcount = 1; +- inode->fd = newfd; +- inode->ino = e->attr.st_ino; +- inode->dev = e->attr.st_dev; +- +- pthread_mutex_lock(&lo->mutex); +- prev = &lo->root; +- next = prev->next; +- next->prev = inode; +- inode->next = next; +- inode->prev = prev; +- prev->next = inode; +- pthread_mutex_unlock(&lo->mutex); +- } +- e->ino = (uintptr_t) inode; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long) parent, name, (unsigned long long) e->ino); +- +- return 0; ++ int newfd; ++ int res; ++ int saverr; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ ++ memset(e, 0, sizeof(*e)); ++ e->attr_timeout = lo->timeout; ++ e->entry_timeout = lo->timeout; ++ ++ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); ++ if (newfd == -1) { ++ goto out_err; ++ } ++ ++ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ goto out_err; ++ } ++ ++ inode = lo_find(lo_data(req), &e->attr); ++ if (inode) { ++ close(newfd); ++ newfd = -1; ++ } else { ++ struct lo_inode *prev, *next; ++ ++ saverr = ENOMEM; ++ inode = calloc(1, sizeof(struct lo_inode)); ++ if (!inode) { ++ goto out_err; ++ } ++ ++ inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ inode->refcount = 1; ++ inode->fd = newfd; ++ inode->ino = e->attr.st_ino; ++ inode->dev = e->attr.st_dev; ++ ++ pthread_mutex_lock(&lo->mutex); ++ prev = &lo->root; ++ next = prev->next; ++ next->prev = inode; ++ inode->next = next; ++ inode->prev = prev; ++ prev->next = inode; ++ pthread_mutex_unlock(&lo->mutex); ++ } ++ e->ino = (uintptr_t)inode; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long)parent, name, (unsigned long long)e->ino); ++ } ++ ++ return 0; + + out_err: +- saverr = errno; +- if (newfd != -1) +- close(newfd); +- return saverr; ++ saverr = errno; ++ if (newfd != -1) { ++ close(newfd); ++ } ++ return saverr; + } + + static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + { +- struct fuse_entry_param e; +- int err; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- +- err = lo_do_lookup(req, parent, name, &e); +- if (err) +- fuse_reply_err(req, err); +- else +- fuse_reply_entry(req, &e); ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ } ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_entry(req, &e); ++ } + } + + static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, +- const char *name, mode_t mode, dev_t rdev, +- const char *link) ++ const char *name, mode_t mode, dev_t rdev, ++ const char *link) + { +- int res; +- int saverr; +- struct lo_inode *dir = lo_inode(req, parent); +- struct fuse_entry_param e; ++ int res; ++ int saverr; ++ struct lo_inode *dir = lo_inode(req, parent); ++ struct fuse_entry_param e; + +- saverr = ENOMEM; ++ saverr = ENOMEM; + +- res = mknod_wrapper(dir->fd, name, link, mode, rdev); ++ res = mknod_wrapper(dir->fd, name, link, mode, rdev); + +- saverr = errno; +- if (res == -1) +- goto out; ++ saverr = errno; ++ if (res == -1) { ++ goto out; ++ } + +- saverr = lo_do_lookup(req, parent, name, &e); +- if (saverr) +- goto out; ++ saverr = lo_do_lookup(req, parent, name, &e); ++ if (saverr) { ++ goto out; ++ } + +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long) parent, name, (unsigned long long) e.ino); ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long)parent, name, (unsigned long long)e.ino); ++ } + +- fuse_reply_entry(req, &e); +- return; ++ fuse_reply_entry(req, &e); ++ return; + + out: +- fuse_reply_err(req, saverr); ++ fuse_reply_err(req, saverr); + } + +-static void lo_mknod(fuse_req_t req, fuse_ino_t parent, +- const char *name, mode_t mode, dev_t rdev) ++static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, dev_t rdev) + { +- lo_mknod_symlink(req, parent, name, mode, rdev, NULL); ++ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); + } + + static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode) ++ mode_t mode) + { +- lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); ++ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); + } + +-static void lo_symlink(fuse_req_t req, const char *link, +- fuse_ino_t parent, const char *name) ++static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, ++ const char *name) + { +- lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); ++ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); + } + + static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, +- const char *name) ++ const char *name) + { +- int res; +- char procname[64]; ++ int res; ++ char procname[64]; + +- if (inode->is_symlink) { +- res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); +- if (res == -1 && (errno == ENOENT || errno == EINVAL)) { +- /* Sorry, no race free way to hard-link a symlink. */ +- errno = EPERM; +- } +- return res; +- } ++ if (inode->is_symlink) { ++ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); ++ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { ++ /* Sorry, no race free way to hard-link a symlink. */ ++ errno = EPERM; ++ } ++ return res; ++ } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); + +- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); ++ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); + } + + static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, +- const char *name) ++ const char *name) + { +- int res; +- struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); +- struct fuse_entry_param e; +- int saverr; +- +- memset(&e, 0, sizeof(struct fuse_entry_param)); +- e.attr_timeout = lo->timeout; +- e.entry_timeout = lo->timeout; +- +- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); +- if (res == -1) +- goto out_err; +- +- res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); +- if (res == -1) +- goto out_err; +- +- pthread_mutex_lock(&lo->mutex); +- inode->refcount++; +- pthread_mutex_unlock(&lo->mutex); +- e.ino = (uintptr_t) inode; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long) parent, name, +- (unsigned long long) e.ino); +- +- fuse_reply_entry(req, &e); +- return; ++ int res; ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct fuse_entry_param e; ++ int saverr; ++ ++ memset(&e, 0, sizeof(struct fuse_entry_param)); ++ e.attr_timeout = lo->timeout; ++ e.entry_timeout = lo->timeout; ++ ++ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); ++ if (res == -1) { ++ goto out_err; ++ } ++ ++ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ goto out_err; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ inode->refcount++; ++ pthread_mutex_unlock(&lo->mutex); ++ e.ino = (uintptr_t)inode; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", ++ (unsigned long long)parent, name, (unsigned long long)e.ino); ++ } ++ ++ fuse_reply_entry(req, &e); ++ return; + + out_err: +- saverr = errno; +- fuse_reply_err(req, saverr); ++ saverr = errno; ++ fuse_reply_err(req, saverr); + } + + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + { +- int res; ++ int res; + +- res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); ++ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, +- fuse_ino_t newparent, const char *newname, +- unsigned int flags) ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags) + { +- int res; ++ int res; + +- if (flags) { +- fuse_reply_err(req, EINVAL); +- return; +- } ++ if (flags) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + +- res = renameat(lo_fd(req, parent), name, +- lo_fd(req, newparent), newname); ++ res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + { +- int res; ++ int res; + +- res = unlinkat(lo_fd(req, parent), name, 0); ++ res = unlinkat(lo_fd(req, parent), name, 0); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + { +- if (!inode) +- return; +- +- pthread_mutex_lock(&lo->mutex); +- assert(inode->refcount >= n); +- inode->refcount -= n; +- if (!inode->refcount) { +- struct lo_inode *prev, *next; +- +- prev = inode->prev; +- next = inode->next; +- next->prev = prev; +- prev->next = next; +- +- pthread_mutex_unlock(&lo->mutex); +- close(inode->fd); +- free(inode); +- +- } else { +- pthread_mutex_unlock(&lo->mutex); +- } ++ if (!inode) { ++ return; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ assert(inode->refcount >= n); ++ inode->refcount -= n; ++ if (!inode->refcount) { ++ struct lo_inode *prev, *next; ++ ++ prev = inode->prev; ++ next = inode->next; ++ next->prev = prev; ++ prev->next = next; ++ ++ pthread_mutex_unlock(&lo->mutex); ++ close(inode->fd); ++ free(inode); ++ ++ } else { ++ pthread_mutex_unlock(&lo->mutex); ++ } + } + + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { +- struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +- (unsigned long long) ino, +- (unsigned long long) inode->refcount, +- (unsigned long long) nlookup); +- } ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", ++ (unsigned long long)ino, (unsigned long long)inode->refcount, ++ (unsigned long long)nlookup); ++ } + +- unref_inode(lo, inode, nlookup); ++ unref_inode(lo, inode, nlookup); + } + + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { +- lo_forget_one(req, ino, nlookup); +- fuse_reply_none(req); ++ lo_forget_one(req, ino, nlookup); ++ fuse_reply_none(req); + } + + static void lo_forget_multi(fuse_req_t req, size_t count, +- struct fuse_forget_data *forgets) ++ struct fuse_forget_data *forgets) + { +- int i; ++ int i; + +- for (i = 0; i < count; i++) +- lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); +- fuse_reply_none(req); ++ for (i = 0; i < count; i++) { ++ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); ++ } ++ fuse_reply_none(req); + } + + static void lo_readlink(fuse_req_t req, fuse_ino_t ino) + { +- char buf[PATH_MAX + 1]; +- int res; ++ char buf[PATH_MAX + 1]; ++ int res; + +- res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); +- if (res == -1) +- return (void) fuse_reply_err(req, errno); ++ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } + +- if (res == sizeof(buf)) +- return (void) fuse_reply_err(req, ENAMETOOLONG); ++ if (res == sizeof(buf)) { ++ return (void)fuse_reply_err(req, ENAMETOOLONG); ++ } + +- buf[res] = '\0'; ++ buf[res] = '\0'; + +- fuse_reply_readlink(req, buf); ++ fuse_reply_readlink(req, buf); + } + + struct lo_dirp { +- DIR *dp; +- struct dirent *entry; +- off_t offset; ++ DIR *dp; ++ struct dirent *entry; ++ off_t offset; + }; + + static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) + { +- return (struct lo_dirp *) (uintptr_t) fi->fh; ++ return (struct lo_dirp *)(uintptr_t)fi->fh; + } + +-static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++static void lo_opendir(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) + { +- int error = ENOMEM; +- struct lo_data *lo = lo_data(req); +- struct lo_dirp *d; +- int fd; +- +- d = calloc(1, sizeof(struct lo_dirp)); +- if (d == NULL) +- goto out_err; +- +- fd = openat(lo_fd(req, ino), ".", O_RDONLY); +- if (fd == -1) +- goto out_errno; +- +- d->dp = fdopendir(fd); +- if (d->dp == NULL) +- goto out_errno; +- +- d->offset = 0; +- d->entry = NULL; +- +- fi->fh = (uintptr_t) d; +- if (lo->cache == CACHE_ALWAYS) +- fi->keep_cache = 1; +- fuse_reply_open(req, fi); +- return; ++ int error = ENOMEM; ++ struct lo_data *lo = lo_data(req); ++ struct lo_dirp *d; ++ int fd; ++ ++ d = calloc(1, sizeof(struct lo_dirp)); ++ if (d == NULL) { ++ goto out_err; ++ } ++ ++ fd = openat(lo_fd(req, ino), ".", O_RDONLY); ++ if (fd == -1) { ++ goto out_errno; ++ } ++ ++ d->dp = fdopendir(fd); ++ if (d->dp == NULL) { ++ goto out_errno; ++ } ++ ++ d->offset = 0; ++ d->entry = NULL; ++ ++ fi->fh = (uintptr_t)d; ++ if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ fuse_reply_open(req, fi); ++ return; + + out_errno: +- error = errno; ++ error = errno; + out_err: +- if (d) { +- if (fd != -1) +- close(fd); +- free(d); +- } +- fuse_reply_err(req, error); ++ if (d) { ++ if (fd != -1) { ++ close(fd); ++ } ++ free(d); ++ } ++ fuse_reply_err(req, error); + } + + static int is_dot_or_dotdot(const char *name) + { +- return name[0] == '.' && (name[1] == '\0' || +- (name[1] == '.' && name[2] == '\0')); ++ return name[0] == '.' && ++ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); + } + + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi, int plus) ++ off_t offset, struct fuse_file_info *fi, int plus) + { +- struct lo_dirp *d = lo_dirp(fi); +- char *buf; +- char *p; +- size_t rem = size; +- int err; +- +- (void) ino; +- +- buf = calloc(1, size); +- if (!buf) { +- err = ENOMEM; +- goto error; +- } +- p = buf; +- +- if (offset != d->offset) { +- seekdir(d->dp, offset); +- d->entry = NULL; +- d->offset = offset; +- } +- while (1) { +- size_t entsize; +- off_t nextoff; +- const char *name; +- +- if (!d->entry) { +- errno = 0; +- d->entry = readdir(d->dp); +- if (!d->entry) { +- if (errno) { // Error +- err = errno; +- goto error; +- } else { // End of stream +- break; +- } +- } +- } +- nextoff = d->entry->d_off; +- name = d->entry->d_name; +- fuse_ino_t entry_ino = 0; +- if (plus) { +- struct fuse_entry_param e; +- if (is_dot_or_dotdot(name)) { +- e = (struct fuse_entry_param) { +- .attr.st_ino = d->entry->d_ino, +- .attr.st_mode = d->entry->d_type << 12, +- }; +- } else { +- err = lo_do_lookup(req, ino, name, &e); +- if (err) +- goto error; +- entry_ino = e.ino; +- } +- +- entsize = fuse_add_direntry_plus(req, p, rem, name, +- &e, nextoff); +- } else { +- struct stat st = { +- .st_ino = d->entry->d_ino, +- .st_mode = d->entry->d_type << 12, +- }; +- entsize = fuse_add_direntry(req, p, rem, name, +- &st, nextoff); +- } +- if (entsize > rem) { +- if (entry_ino != 0) +- lo_forget_one(req, entry_ino, 1); +- break; +- } +- +- p += entsize; +- rem -= entsize; +- +- d->entry = NULL; +- d->offset = nextoff; +- } ++ struct lo_dirp *d = lo_dirp(fi); ++ char *buf; ++ char *p; ++ size_t rem = size; ++ int err; ++ ++ (void)ino; ++ ++ buf = calloc(1, size); ++ if (!buf) { ++ err = ENOMEM; ++ goto error; ++ } ++ p = buf; ++ ++ if (offset != d->offset) { ++ seekdir(d->dp, offset); ++ d->entry = NULL; ++ d->offset = offset; ++ } ++ while (1) { ++ size_t entsize; ++ off_t nextoff; ++ const char *name; ++ ++ if (!d->entry) { ++ errno = 0; ++ d->entry = readdir(d->dp); ++ if (!d->entry) { ++ if (errno) { /* Error */ ++ err = errno; ++ goto error; ++ } else { /* End of stream */ ++ break; ++ } ++ } ++ } ++ nextoff = d->entry->d_off; ++ name = d->entry->d_name; ++ fuse_ino_t entry_ino = 0; ++ if (plus) { ++ struct fuse_entry_param e; ++ if (is_dot_or_dotdot(name)) { ++ e = (struct fuse_entry_param){ ++ .attr.st_ino = d->entry->d_ino, ++ .attr.st_mode = d->entry->d_type << 12, ++ }; ++ } else { ++ err = lo_do_lookup(req, ino, name, &e); ++ if (err) { ++ goto error; ++ } ++ entry_ino = e.ino; ++ } ++ ++ entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); ++ } else { ++ struct stat st = { ++ .st_ino = d->entry->d_ino, ++ .st_mode = d->entry->d_type << 12, ++ }; ++ entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); ++ } ++ if (entsize > rem) { ++ if (entry_ino != 0) { ++ lo_forget_one(req, entry_ino, 1); ++ } ++ break; ++ } ++ ++ p += entsize; ++ rem -= entsize; ++ ++ d->entry = NULL; ++ d->offset = nextoff; ++ } + + err = 0; + error: +- // If there's an error, we can only signal it if we haven't stored +- // any entries yet - otherwise we'd end up with wrong lookup +- // counts for the entries that are already in the buffer. So we +- // return what we've collected until that point. +- if (err && rem == size) +- fuse_reply_err(req, err); +- else +- fuse_reply_buf(req, buf, size - rem); ++ /* ++ * If there's an error, we can only signal it if we haven't stored ++ * any entries yet - otherwise we'd end up with wrong lookup ++ * counts for the entries that are already in the buffer. So we ++ * return what we've collected until that point. ++ */ ++ if (err && rem == size) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_buf(req, buf, size - rem); ++ } + free(buf); + } + + static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi) ++ off_t offset, struct fuse_file_info *fi) + { +- lo_do_readdir(req, ino, size, offset, fi, 0); ++ lo_do_readdir(req, ino, size, offset, fi, 0); + } + + static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi) ++ off_t offset, struct fuse_file_info *fi) + { +- lo_do_readdir(req, ino, size, offset, fi, 1); ++ lo_do_readdir(req, ino, size, offset, fi, 1); + } + +-static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) + { +- struct lo_dirp *d = lo_dirp(fi); +- (void) ino; +- closedir(d->dp); +- free(d); +- fuse_reply_err(req, 0); ++ struct lo_dirp *d = lo_dirp(fi); ++ (void)ino; ++ closedir(d->dp); ++ free(d); ++ fuse_reply_err(req, 0); + } + + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, +- mode_t mode, struct fuse_file_info *fi) ++ mode_t mode, struct fuse_file_info *fi) + { +- int fd; +- struct lo_data *lo = lo_data(req); +- struct fuse_entry_param e; +- int err; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- +- fd = openat(lo_fd(req, parent), name, +- (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); +- if (fd == -1) +- return (void) fuse_reply_err(req, errno); +- +- fi->fh = fd; +- if (lo->cache == CACHE_NEVER) +- fi->direct_io = 1; +- else if (lo->cache == CACHE_ALWAYS) +- fi->keep_cache = 1; +- +- err = lo_do_lookup(req, parent, name, &e); +- if (err) +- fuse_reply_err(req, err); +- else +- fuse_reply_create(req, &e, fi); ++ int fd; ++ struct lo_data *lo = lo_data(req); ++ struct fuse_entry_param e; ++ int err; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", ++ parent, name); ++ } ++ ++ fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, ++ mode); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ ++ err = lo_do_lookup(req, parent, name, &e); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_create(req, &e, fi); ++ } + } + + static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- int res; +- int fd = dirfd(lo_dirp(fi)->dp); +- (void) ino; +- if (datasync) +- res = fdatasync(fd); +- else +- res = fsync(fd); +- fuse_reply_err(req, res == -1 ? errno : 0); ++ int res; ++ int fd = dirfd(lo_dirp(fi)->dp); ++ (void)ino; ++ if (datasync) { ++ res = fdatasync(fd); ++ } else { ++ res = fsync(fd); ++ } ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int fd; +- char buf[64]; +- struct lo_data *lo = lo_data(req); +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", +- ino, fi->flags); +- +- /* With writeback cache, kernel may send read requests even +- when userspace opened write-only */ +- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { +- fi->flags &= ~O_ACCMODE; +- fi->flags |= O_RDWR; +- } +- +- /* With writeback cache, O_APPEND is handled by the kernel. +- This breaks atomicity (since the file may change in the +- underlying filesystem, so that the kernel's idea of the +- end of the file isn't accurate anymore). In this example, +- we just accept that. A more rigorous filesystem may want +- to return an error here */ +- if (lo->writeback && (fi->flags & O_APPEND)) +- fi->flags &= ~O_APPEND; +- +- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); +- fd = open(buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) +- return (void) fuse_reply_err(req, errno); +- +- fi->fh = fd; +- if (lo->cache == CACHE_NEVER) +- fi->direct_io = 1; +- else if (lo->cache == CACHE_ALWAYS) +- fi->keep_cache = 1; +- fuse_reply_open(req, fi); ++ int fd; ++ char buf[64]; ++ struct lo_data *lo = lo_data(req); ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, ++ fi->flags); ++ } ++ ++ /* ++ * With writeback cache, kernel may send read requests even ++ * when userspace opened write-only ++ */ ++ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { ++ fi->flags &= ~O_ACCMODE; ++ fi->flags |= O_RDWR; ++ } ++ ++ /* ++ * With writeback cache, O_APPEND is handled by the kernel. ++ * This breaks atomicity (since the file may change in the ++ * underlying filesystem, so that the kernel's idea of the ++ * end of the file isn't accurate anymore). In this example, ++ * we just accept that. A more rigorous filesystem may want ++ * to return an error here ++ */ ++ if (lo->writeback && (fi->flags & O_APPEND)) { ++ fi->flags &= ~O_APPEND; ++ } ++ ++ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ fd = open(buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fi->fh = fd; ++ if (lo->cache == CACHE_NEVER) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ fuse_reply_open(req, fi); + } + +-static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) ++static void lo_release(fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi) + { +- (void) ino; ++ (void)ino; + +- close(fi->fh); +- fuse_reply_err(req, 0); ++ close(fi->fh); ++ fuse_reply_err(req, 0); + } + + static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int res; +- (void) ino; +- res = close(dup(fi->fh)); +- fuse_reply_err(req, res == -1 ? errno : 0); ++ int res; ++ (void)ino; ++ res = close(dup(fi->fh)); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- int res; +- (void) ino; +- if (datasync) +- res = fdatasync(fi->fh); +- else +- res = fsync(fi->fh); +- fuse_reply_err(req, res == -1 ? errno : 0); ++ int res; ++ (void)ino; ++ if (datasync) { ++ res = fdatasync(fi->fh); ++ } else { ++ res = fsync(fi->fh); ++ } ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + +-static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, +- off_t offset, struct fuse_file_info *fi) ++static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, ++ struct fuse_file_info *fi) + { +- struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); ++ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); + +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " +- "off=%lu)\n", ino, size, (unsigned long) offset); ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_read(ino=%" PRIu64 ", size=%zd, " ++ "off=%lu)\n", ++ ino, size, (unsigned long)offset); ++ } + +- buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- buf.buf[0].fd = fi->fh; +- buf.buf[0].pos = offset; ++ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ buf.buf[0].fd = fi->fh; ++ buf.buf[0].pos = offset; + +- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); ++ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); + } + + static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, +- struct fuse_bufvec *in_buf, off_t off, +- struct fuse_file_info *fi) ++ struct fuse_bufvec *in_buf, off_t off, ++ struct fuse_file_info *fi) + { +- (void) ino; +- ssize_t res; +- struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); +- +- out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- out_buf.buf[0].fd = fi->fh; +- out_buf.buf[0].pos = off; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", +- ino, out_buf.buf[0].size, (unsigned long) off); +- +- res = fuse_buf_copy(&out_buf, in_buf, 0); +- if(res < 0) +- fuse_reply_err(req, -res); +- else +- fuse_reply_write(req, (size_t) res); ++ (void)ino; ++ ssize_t res; ++ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); ++ ++ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; ++ out_buf.buf[0].fd = fi->fh; ++ out_buf.buf[0].pos = off; ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, ++ out_buf.buf[0].size, (unsigned long)off); ++ } ++ ++ res = fuse_buf_copy(&out_buf, in_buf, 0); ++ if (res < 0) { ++ fuse_reply_err(req, -res); ++ } else { ++ fuse_reply_write(req, (size_t)res); ++ } + } + + static void lo_statfs(fuse_req_t req, fuse_ino_t ino) + { +- int res; +- struct statvfs stbuf; +- +- res = fstatvfs(lo_fd(req, ino), &stbuf); +- if (res == -1) +- fuse_reply_err(req, errno); +- else +- fuse_reply_statfs(req, &stbuf); ++ int res; ++ struct statvfs stbuf; ++ ++ res = fstatvfs(lo_fd(req, ino), &stbuf); ++ if (res == -1) { ++ fuse_reply_err(req, errno); ++ } else { ++ fuse_reply_statfs(req, &stbuf); ++ } + } + +-static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, +- off_t offset, off_t length, struct fuse_file_info *fi) ++static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, ++ off_t length, struct fuse_file_info *fi) + { +- int err = EOPNOTSUPP; +- (void) ino; ++ int err = EOPNOTSUPP; ++ (void)ino; + + #ifdef HAVE_FALLOCATE +- err = fallocate(fi->fh, mode, offset, length); +- if (err < 0) +- err = errno; ++ err = fallocate(fi->fh, mode, offset, length); ++ if (err < 0) { ++ err = errno; ++ } + + #elif defined(HAVE_POSIX_FALLOCATE) +- if (mode) { +- fuse_reply_err(req, EOPNOTSUPP); +- return; +- } ++ if (mode) { ++ fuse_reply_err(req, EOPNOTSUPP); ++ return; ++ } + +- err = posix_fallocate(fi->fh, offset, length); ++ err = posix_fallocate(fi->fh, offset, length); + #endif + +- fuse_reply_err(req, err); ++ fuse_reply_err(req, err); + } + + static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, +- int op) ++ int op) + { +- int res; +- (void) ino; ++ int res; ++ (void)ino; + +- res = flock(fi->fh, op); ++ res = flock(fi->fh, op); + +- fuse_reply_err(req, res == -1 ? errno : 0); ++ fuse_reply_err(req, res == -1 ? errno : 0); + } + + static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, +- size_t size) ++ size_t size) + { +- char *value = NULL; +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; +- +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; +- +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", +- ino, name, size); +- } +- +- if (inode->is_symlink) { +- /* Sorry, no race free way to getxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- +- sprintf(procname, "/proc/self/fd/%i", inode->fd); +- +- if (size) { +- value = malloc(size); +- if (!value) +- goto out_err; +- +- ret = getxattr(procname, name, value, size); +- if (ret == -1) +- goto out_err; +- saverr = 0; +- if (ret == 0) +- goto out; +- +- fuse_reply_buf(req, value, ret); +- } else { +- ret = getxattr(procname, name, NULL, 0); +- if (ret == -1) +- goto out_err; +- +- fuse_reply_xattr(req, ret); +- } ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, ++ size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to getxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ ++ ret = getxattr(procname, name, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } ++ saverr = 0; ++ if (ret == 0) { ++ goto out; ++ } ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = getxattr(procname, name, NULL, 0); ++ if (ret == -1) { ++ goto out_err; ++ } ++ ++ fuse_reply_xattr(req, ret); ++ } + out_free: +- free(value); +- return; ++ free(value); ++ return; + + out_err: +- saverr = errno; ++ saverr = errno; + out: +- fuse_reply_err(req, saverr); +- goto out_free; ++ fuse_reply_err(req, saverr); ++ goto out_free; + } + + static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + { +- char *value = NULL; +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; +- +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; +- +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", +- ino, size); +- } +- +- if (inode->is_symlink) { +- /* Sorry, no race free way to listxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- +- sprintf(procname, "/proc/self/fd/%i", inode->fd); +- +- if (size) { +- value = malloc(size); +- if (!value) +- goto out_err; +- +- ret = listxattr(procname, value, size); +- if (ret == -1) +- goto out_err; +- saverr = 0; +- if (ret == 0) +- goto out; +- +- fuse_reply_buf(req, value, ret); +- } else { +- ret = listxattr(procname, NULL, 0); +- if (ret == -1) +- goto out_err; +- +- fuse_reply_xattr(req, ret); +- } ++ char *value = NULL; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; ++ ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } ++ ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ++ ino, size); ++ } ++ ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to listxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } ++ ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ ++ ret = listxattr(procname, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } ++ saverr = 0; ++ if (ret == 0) { ++ goto out; ++ } ++ ++ fuse_reply_buf(req, value, ret); ++ } else { ++ ret = listxattr(procname, NULL, 0); ++ if (ret == -1) { ++ goto out_err; ++ } ++ ++ fuse_reply_xattr(req, ret); ++ } + out_free: +- free(value); +- return; ++ free(value); ++ return; + + out_err: +- saverr = errno; ++ saverr = errno; + out: +- fuse_reply_err(req, saverr); +- goto out_free; ++ fuse_reply_err(req, saverr); ++ goto out_free; + } + + static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, +- const char *value, size_t size, int flags) ++ const char *value, size_t size, int flags) + { +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; + +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", +- ino, name, value, size); +- } ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", ++ ino, name, value, size); ++ } + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); + +- ret = setxattr(procname, name, value, size, flags); +- saverr = ret == -1 ? errno : 0; ++ ret = setxattr(procname, name, value, size, flags); ++ saverr = ret == -1 ? errno : 0; + + out: +- fuse_reply_err(req, saverr); ++ fuse_reply_err(req, saverr); + } + + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + { +- char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- ssize_t ret; +- int saverr; ++ char procname[64]; ++ struct lo_inode *inode = lo_inode(req, ino); ++ ssize_t ret; ++ int saverr; + +- saverr = ENOSYS; +- if (!lo_data(req)->xattr) +- goto out; ++ saverr = ENOSYS; ++ if (!lo_data(req)->xattr) { ++ goto out; ++ } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", +- ino, name); +- } ++ if (lo_debug(req)) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ++ ino, name); ++ } + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } ++ if (inode->is_symlink) { ++ /* Sorry, no race free way to setxattr on symlink. */ ++ saverr = EPERM; ++ goto out; ++ } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); + +- ret = removexattr(procname, name); +- saverr = ret == -1 ? errno : 0; ++ ret = removexattr(procname, name); ++ saverr = ret == -1 ? errno : 0; + + out: +- fuse_reply_err(req, saverr); ++ fuse_reply_err(req, saverr); + } + + #ifdef HAVE_COPY_FILE_RANGE + static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, +- struct fuse_file_info *fi_in, +- fuse_ino_t ino_out, off_t off_out, +- struct fuse_file_info *fi_out, size_t len, +- int flags) ++ struct fuse_file_info *fi_in, fuse_ino_t ino_out, ++ off_t off_out, struct fuse_file_info *fi_out, ++ size_t len, int flags) + { +- ssize_t res; +- +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, size=%zd, flags=0x%x)\n", +- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, +- len, flags); +- +- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, +- flags); +- if (res < 0) +- fuse_reply_err(req, -errno); +- else +- fuse_reply_write(req, res); ++ ssize_t res; ++ ++ if (lo_debug(req)) ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, ino=%" PRIu64 "/fd=%lu, " ++ "off=%lu, size=%zd, flags=0x%x)\n", ++ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, ++ flags); ++ ++ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); ++ if (res < 0) { ++ fuse_reply_err(req, -errno); ++ } else { ++ fuse_reply_write(req, res); ++ } + } + #endif + + static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, +- struct fuse_file_info *fi) ++ struct fuse_file_info *fi) + { +- off_t res; +- +- (void)ino; +- res = lseek(fi->fh, off, whence); +- if (res != -1) +- fuse_reply_lseek(req, res); +- else +- fuse_reply_err(req, errno); ++ off_t res; ++ ++ (void)ino; ++ res = lseek(fi->fh, off, whence); ++ if (res != -1) { ++ fuse_reply_lseek(req, res); ++ } else { ++ fuse_reply_err(req, errno); ++ } + } + + static struct fuse_lowlevel_ops lo_oper = { +- .init = lo_init, +- .lookup = lo_lookup, +- .mkdir = lo_mkdir, +- .mknod = lo_mknod, +- .symlink = lo_symlink, +- .link = lo_link, +- .unlink = lo_unlink, +- .rmdir = lo_rmdir, +- .rename = lo_rename, +- .forget = lo_forget, +- .forget_multi = lo_forget_multi, +- .getattr = lo_getattr, +- .setattr = lo_setattr, +- .readlink = lo_readlink, +- .opendir = lo_opendir, +- .readdir = lo_readdir, +- .readdirplus = lo_readdirplus, +- .releasedir = lo_releasedir, +- .fsyncdir = lo_fsyncdir, +- .create = lo_create, +- .open = lo_open, +- .release = lo_release, +- .flush = lo_flush, +- .fsync = lo_fsync, +- .read = lo_read, +- .write_buf = lo_write_buf, +- .statfs = lo_statfs, +- .fallocate = lo_fallocate, +- .flock = lo_flock, +- .getxattr = lo_getxattr, +- .listxattr = lo_listxattr, +- .setxattr = lo_setxattr, +- .removexattr = lo_removexattr, ++ .init = lo_init, ++ .lookup = lo_lookup, ++ .mkdir = lo_mkdir, ++ .mknod = lo_mknod, ++ .symlink = lo_symlink, ++ .link = lo_link, ++ .unlink = lo_unlink, ++ .rmdir = lo_rmdir, ++ .rename = lo_rename, ++ .forget = lo_forget, ++ .forget_multi = lo_forget_multi, ++ .getattr = lo_getattr, ++ .setattr = lo_setattr, ++ .readlink = lo_readlink, ++ .opendir = lo_opendir, ++ .readdir = lo_readdir, ++ .readdirplus = lo_readdirplus, ++ .releasedir = lo_releasedir, ++ .fsyncdir = lo_fsyncdir, ++ .create = lo_create, ++ .open = lo_open, ++ .release = lo_release, ++ .flush = lo_flush, ++ .fsync = lo_fsync, ++ .read = lo_read, ++ .write_buf = lo_write_buf, ++ .statfs = lo_statfs, ++ .fallocate = lo_fallocate, ++ .flock = lo_flock, ++ .getxattr = lo_getxattr, ++ .listxattr = lo_listxattr, ++ .setxattr = lo_setxattr, ++ .removexattr = lo_removexattr, + #ifdef HAVE_COPY_FILE_RANGE +- .copy_file_range = lo_copy_file_range, ++ .copy_file_range = lo_copy_file_range, + #endif +- .lseek = lo_lseek, ++ .lseek = lo_lseek, + }; + + int main(int argc, char *argv[]) + { +- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +- struct fuse_session *se; +- struct fuse_cmdline_opts opts; +- struct lo_data lo = { .debug = 0, +- .writeback = 0 }; +- int ret = -1; +- +- /* Don't mask creation mode, kernel already did that */ +- umask(0); +- +- pthread_mutex_init(&lo.mutex, NULL); +- lo.root.next = lo.root.prev = &lo.root; +- lo.root.fd = -1; +- lo.cache = CACHE_NORMAL; +- +- if (fuse_parse_cmdline(&args, &opts) != 0) +- return 1; +- if (opts.show_help) { +- printf("usage: %s [options] \n\n", argv[0]); +- fuse_cmdline_help(); +- fuse_lowlevel_help(); +- ret = 0; +- goto err_out1; +- } else if (opts.show_version) { +- fuse_lowlevel_version(); +- ret = 0; +- goto err_out1; +- } +- +- if(opts.mountpoint == NULL) { +- printf("usage: %s [options] \n", argv[0]); +- printf(" %s --help\n", argv[0]); +- ret = 1; +- goto err_out1; +- } +- +- if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) +- return 1; +- +- lo.debug = opts.debug; +- lo.root.refcount = 2; +- if (lo.source) { +- struct stat stat; +- int res; +- +- res = lstat(lo.source, &stat); +- if (res == -1) { +- fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", +- lo.source); +- exit(1); +- } +- if (!S_ISDIR(stat.st_mode)) { +- fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); +- exit(1); +- } +- +- } else { +- lo.source = "/"; +- } +- lo.root.is_symlink = false; +- if (!lo.timeout_set) { +- switch (lo.cache) { +- case CACHE_NEVER: +- lo.timeout = 0.0; +- break; +- +- case CACHE_NORMAL: +- lo.timeout = 1.0; +- break; +- +- case CACHE_ALWAYS: +- lo.timeout = 86400.0; +- break; +- } +- } else if (lo.timeout < 0) { +- fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", +- lo.timeout); +- exit(1); +- } +- +- lo.root.fd = open(lo.source, O_PATH); +- if (lo.root.fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", +- lo.source); +- exit(1); +- } +- +- se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); +- if (se == NULL) +- goto err_out1; +- +- if (fuse_set_signal_handlers(se) != 0) +- goto err_out2; +- +- if (fuse_session_mount(se, opts.mountpoint) != 0) +- goto err_out3; +- +- fuse_daemonize(opts.foreground); +- +- /* Block until ctrl+c or fusermount -u */ +- if (opts.singlethread) +- ret = fuse_session_loop(se); +- else +- ret = fuse_session_loop_mt(se, opts.clone_fd); +- +- fuse_session_unmount(se); ++ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); ++ struct fuse_session *se; ++ struct fuse_cmdline_opts opts; ++ struct lo_data lo = { .debug = 0, .writeback = 0 }; ++ int ret = -1; ++ ++ /* Don't mask creation mode, kernel already did that */ ++ umask(0); ++ ++ pthread_mutex_init(&lo.mutex, NULL); ++ lo.root.next = lo.root.prev = &lo.root; ++ lo.root.fd = -1; ++ lo.cache = CACHE_NORMAL; ++ ++ if (fuse_parse_cmdline(&args, &opts) != 0) { ++ return 1; ++ } ++ if (opts.show_help) { ++ printf("usage: %s [options] \n\n", argv[0]); ++ fuse_cmdline_help(); ++ fuse_lowlevel_help(); ++ ret = 0; ++ goto err_out1; ++ } else if (opts.show_version) { ++ fuse_lowlevel_version(); ++ ret = 0; ++ goto err_out1; ++ } ++ ++ if (opts.mountpoint == NULL) { ++ printf("usage: %s [options] \n", argv[0]); ++ printf(" %s --help\n", argv[0]); ++ ret = 1; ++ goto err_out1; ++ } ++ ++ if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { ++ return 1; ++ } ++ ++ lo.debug = opts.debug; ++ lo.root.refcount = 2; ++ if (lo.source) { ++ struct stat stat; ++ int res; ++ ++ res = lstat(lo.source, &stat); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", ++ lo.source); ++ exit(1); ++ } ++ if (!S_ISDIR(stat.st_mode)) { ++ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); ++ exit(1); ++ } ++ ++ } else { ++ lo.source = "/"; ++ } ++ lo.root.is_symlink = false; ++ if (!lo.timeout_set) { ++ switch (lo.cache) { ++ case CACHE_NEVER: ++ lo.timeout = 0.0; ++ break; ++ ++ case CACHE_NORMAL: ++ lo.timeout = 1.0; ++ break; ++ ++ case CACHE_ALWAYS: ++ lo.timeout = 86400.0; ++ break; ++ } ++ } else if (lo.timeout < 0) { ++ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); ++ exit(1); ++ } ++ ++ lo.root.fd = open(lo.source, O_PATH); ++ if (lo.root.fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); ++ exit(1); ++ } ++ ++ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); ++ if (se == NULL) { ++ goto err_out1; ++ } ++ ++ if (fuse_set_signal_handlers(se) != 0) { ++ goto err_out2; ++ } ++ ++ if (fuse_session_mount(se, opts.mountpoint) != 0) { ++ goto err_out3; ++ } ++ ++ fuse_daemonize(opts.foreground); ++ ++ /* Block until ctrl+c or fusermount -u */ ++ if (opts.singlethread) { ++ ret = fuse_session_loop(se); ++ } else { ++ ret = fuse_session_loop_mt(se, opts.clone_fd); ++ } ++ ++ fuse_session_unmount(se); + err_out3: +- fuse_remove_signal_handlers(se); ++ fuse_remove_signal_handlers(se); + err_out2: +- fuse_session_destroy(se); ++ fuse_session_destroy(se); + err_out1: +- free(opts.mountpoint); +- fuse_opt_free_args(&args); ++ free(opts.mountpoint); ++ fuse_opt_free_args(&args); + +- if (lo.root.fd >= 0) +- close(lo.root.fd); ++ if (lo.root.fd >= 0) { ++ close(lo.root.fd); ++ } + +- return ret ? 1 : 0; ++ return ret ? 1 : 0; + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Handle-hard-reboot.patch b/kvm-virtiofsd-Handle-hard-reboot.patch new file mode 100644 index 0000000..8888030 --- /dev/null +++ b/kvm-virtiofsd-Handle-hard-reboot.patch @@ -0,0 +1,65 @@ +From 616407b06517361ce444dcc0960aeaf55b52da33 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:41 +0100 +Subject: [PATCH 070/116] virtiofsd: Handle hard reboot +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-67-dgilbert@redhat.com> +Patchwork-id: 93521 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 066/112] virtiofsd: Handle hard reboot +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Handle a + mount + hard reboot (without unmount) + mount + +we get another 'init' which FUSE doesn't normally expect. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e8556f49098b5d95634e592d79a97f761b76c96e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 7d742b5..65f91da 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2433,7 +2433,21 @@ void fuse_session_process_buf_int(struct fuse_session *se, + goto reply_err; + } + } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { +- goto reply_err; ++ if (fuse_lowlevel_is_virtio(se)) { ++ /* ++ * TODO: This is after a hard reboot typically, we need to do ++ * a destroy, but we can't reply to this request yet so ++ * we can't use do_destroy ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__); ++ se->got_destroy = 1; ++ se->got_init = 0; ++ if (se->op.destroy) { ++ se->op.destroy(se->userdata); ++ } ++ } else { ++ goto reply_err; ++ } + } + + err = EACCES; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Handle-reinit.patch b/kvm-virtiofsd-Handle-reinit.patch new file mode 100644 index 0000000..3f9577b --- /dev/null +++ b/kvm-virtiofsd-Handle-reinit.patch @@ -0,0 +1,53 @@ +From 485adfa1aa1b3e2d1449edf5c42d6ec396cbfb5d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:40 +0100 +Subject: [PATCH 069/116] virtiofsd: Handle reinit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-66-dgilbert@redhat.com> +Patchwork-id: 93520 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 065/112] virtiofsd: Handle reinit +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Allow init->destroy->init for mount->umount->mount + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c806d6435fe95fd54b379920aca2f4e3ea1f3258) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index a7a1968..7d742b5 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2028,6 +2028,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + } + + se->got_init = 1; ++ se->got_destroy = 0; + if (se->op.init) { + se->op.init(se->userdata, &se->conn); + } +@@ -2130,6 +2131,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, + (void)iter; + + se->got_destroy = 1; ++ se->got_init = 0; + if (se->op.destroy) { + se->op.destroy(se->userdata); + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Keep-track-of-replies.patch b/kvm-virtiofsd-Keep-track-of-replies.patch new file mode 100644 index 0000000..18be3e0 --- /dev/null +++ b/kvm-virtiofsd-Keep-track-of-replies.patch @@ -0,0 +1,116 @@ +From c818a1cb603cad07aa5c49ce808aa09435667c7c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:04 +0100 +Subject: [PATCH 033/116] virtiofsd: Keep track of replies +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-30-dgilbert@redhat.com> +Patchwork-id: 93481 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 029/112] virtiofsd: Keep track of replies +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Keep track of whether we sent a reply to a request; this is a bit +paranoid but it means: + a) We should always recycle an element even if there was an error + in the request + b) Never try and send two replies on one queue element + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2f65e69a7f22da8d20c747f34f339ebb40a0634f) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 23 ++++++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 05d0e29..f1adeb6 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -44,6 +44,7 @@ struct fv_QueueInfo { + + /* The element for the command currently being processed */ + VuVirtqElement *qe; ++ bool reply_sent; + }; + + /* +@@ -178,6 +179,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + { + VuVirtqElement *elem; + VuVirtq *q; ++ int ret = 0; + + assert(count >= 1); + assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); +@@ -191,6 +193,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + assert(out->unique); + /* For virtio we always have ch */ + assert(ch); ++ assert(!ch->qi->reply_sent); + elem = ch->qi->qe; + q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; + +@@ -208,19 +211,23 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + if (in_len < sizeof(struct fuse_out_header)) { + fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", + __func__, elem->index); +- return -E2BIG; ++ ret = -E2BIG; ++ goto err; + } + if (in_len < tosend_len) { + fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", + __func__, elem->index, tosend_len); +- return -E2BIG; ++ ret = -E2BIG; ++ goto err; + } + + copy_iov(iov, count, in_sg, in_num, tosend_len); + vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); + vu_queue_notify(&se->virtio_dev->dev, q); ++ ch->qi->reply_sent = true; + +- return 0; ++err: ++ return ret; + } + + /* Thread function for individual queues, created when a queue is 'started' */ +@@ -296,6 +303,9 @@ static void *fv_queue_thread(void *opaque) + break; + } + ++ qi->qe = elem; ++ qi->reply_sent = false; ++ + if (!fbuf.mem) { + fbuf.mem = malloc(se->bufsize); + assert(fbuf.mem); +@@ -331,6 +341,13 @@ static void *fv_queue_thread(void *opaque) + /* TODO: Add checks for fuse_session_exited */ + fuse_session_process_buf_int(se, &fbuf, &ch); + ++ if (!qi->reply_sent) { ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", ++ __func__, elem->index); ++ /* I think we've still got to recycle the element */ ++ vu_queue_push(dev, q, elem, 0); ++ vu_queue_notify(dev, q); ++ } + qi->qe = NULL; + free(elem); + elem = NULL; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch b/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch new file mode 100644 index 0000000..5e054f3 --- /dev/null +++ b/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch @@ -0,0 +1,143 @@ +From b37344c38b866c7e7fb773b4a3172a39306bac7e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:42 +0100 +Subject: [PATCH 071/116] virtiofsd: Kill threads when queues are stopped +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-68-dgilbert@redhat.com> +Patchwork-id: 93522 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 067/112] virtiofsd: Kill threads when queues are stopped +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Kill the threads we've started when the queues get stopped. + +Signed-off-by: Dr. David Alan Gilbert +With improvements by: +Signed-off-by: Eryu Guan +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 10477ac47fc57d00a84802ff97c15450cd8021c1) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 51 +++++++++++++++++++++++++++++++++++++------ + 1 file changed, 44 insertions(+), 7 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 872968f..7a8774a 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -41,6 +41,7 @@ struct fv_QueueInfo { + /* Our queue index, corresponds to array position */ + int qidx; + int kick_fd; ++ int kill_fd; /* For killing the thread */ + + /* The element for the command currently being processed */ + VuVirtqElement *qe; +@@ -412,14 +413,17 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); + while (1) { +- struct pollfd pf[1]; ++ struct pollfd pf[2]; + pf[0].fd = qi->kick_fd; + pf[0].events = POLLIN; + pf[0].revents = 0; ++ pf[1].fd = qi->kill_fd; ++ pf[1].events = POLLIN; ++ pf[1].revents = 0; + + fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, + qi->qidx); +- int poll_res = ppoll(pf, 1, NULL, NULL); ++ int poll_res = ppoll(pf, 2, NULL, NULL); + + if (poll_res == -1) { + if (errno == EINTR) { +@@ -430,12 +434,23 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); + break; + } +- assert(poll_res == 1); ++ assert(poll_res >= 1); + if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { + fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", + __func__, pf[0].revents, qi->qidx); + break; + } ++ if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) { ++ fuse_log(FUSE_LOG_ERR, ++ "%s: Unexpected poll revents %x Queue %d killfd\n", ++ __func__, pf[1].revents, qi->qidx); ++ break; ++ } ++ if (pf[1].revents) { ++ fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n", ++ __func__, qi->qidx); ++ break; ++ } + assert(pf[0].revents & POLLIN); + fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, + qi->qidx); +@@ -589,6 +604,28 @@ out: + return NULL; + } + ++static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) ++{ ++ int ret; ++ struct fv_QueueInfo *ourqi; ++ ++ assert(qidx < vud->nqueues); ++ ourqi = vud->qi[qidx]; ++ ++ /* Kill the thread */ ++ if (eventfd_write(ourqi->kill_fd, 1)) { ++ fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n", ++ qidx, strerror(errno)); ++ } ++ ret = pthread_join(ourqi->thread, NULL); ++ if (ret) { ++ fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", ++ __func__, qidx, ret); ++ } ++ close(ourqi->kill_fd); ++ ourqi->kick_fd = -1; ++} ++ + /* Callback from libvhost-user on start or stop of a queue */ + static void fv_queue_set_started(VuDev *dev, int qidx, bool started) + { +@@ -633,16 +670,16 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) + } + ourqi = vud->qi[qidx]; + ourqi->kick_fd = dev->vq[qidx].kick_fd; ++ ++ ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); ++ assert(ourqi->kill_fd != -1); + if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { + fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", + __func__, qidx); + assert(0); + } + } else { +- /* TODO: Kill the thread */ +- assert(qidx < vud->nqueues); +- ourqi = vud->qi[qidx]; +- ourqi->kick_fd = -1; ++ fv_queue_cleanup_thread(vud, qidx); + } + } + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch b/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch new file mode 100644 index 0000000..98211cb --- /dev/null +++ b/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch @@ -0,0 +1,96 @@ +From f09f13f9a001a50ee3465c165f4bbaf870fcadb9 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:53 +0100 +Subject: [PATCH 022/116] virtiofsd: Make fsync work even if only inode is + passed in +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-19-dgilbert@redhat.com> +Patchwork-id: 93472 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 018/112] virtiofsd: Make fsync work even if only inode is passed in +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +If caller has not sent file handle in request, then using inode, retrieve +the fd opened using O_PATH and use that to open file again and issue +fsync. This will be needed when dax_flush() calls fsync. At that time +we only have inode information (and not file). + +Signed-off-by: Vivek Goyal +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1b209805f8159c3f4d89ddb9390a5f64887cebff) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 6 +++++- + tools/virtiofsd/passthrough_ll.c | 28 ++++++++++++++++++++++++++-- + 2 files changed, 31 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 514d79c..8552cfb 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1075,7 +1075,11 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + fi.fh = arg->fh; + + if (req->se->op.fsync) { +- req->se->op.fsync(req, nodeid, datasync, &fi); ++ if (fi.fh == (uint64_t)-1) { ++ req->se->op.fsync(req, nodeid, datasync, NULL); ++ } else { ++ req->se->op.fsync(req, nodeid, datasync, &fi); ++ } + } else { + fuse_reply_err(req, ENOSYS); + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 6c4da18..26ac870 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -903,10 +903,34 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + { + int res; + (void)ino; ++ int fd; ++ char *buf; ++ ++ fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, ++ (void *)fi); ++ ++ if (!fi) { ++ res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ if (res == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ ++ fd = open(buf, O_RDWR); ++ free(buf); ++ if (fd == -1) { ++ return (void)fuse_reply_err(req, errno); ++ } ++ } else { ++ fd = fi->fh; ++ } ++ + if (datasync) { +- res = fdatasync(fi->fh); ++ res = fdatasync(fd); + } else { +- res = fsync(fi->fh); ++ res = fsync(fd); ++ } ++ if (!fi) { ++ close(fd); + } + fuse_reply_err(req, res == -1 ? errno : 0); + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch b/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch new file mode 100644 index 0000000..2c9874d --- /dev/null +++ b/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch @@ -0,0 +1,257 @@ +From a96042f05eaf494fbe26a9cbd940f5f815f782f9 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:56 +0100 +Subject: [PATCH 025/116] virtiofsd: Open vhost connection instead of mounting +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-22-dgilbert@redhat.com> +Patchwork-id: 93476 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 021/112] virtiofsd: Open vhost connection instead of mounting +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +When run with vhost-user options we conect to the QEMU instead +via a socket. Start this off by creating the socket. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d14bf584dd965821e80d14c16d9292a464b1ab85) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 7 ++-- + tools/virtiofsd/fuse_lowlevel.c | 55 ++++------------------------ + tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_virtio.h | 23 ++++++++++++ + 4 files changed, 114 insertions(+), 50 deletions(-) + create mode 100644 tools/virtiofsd/fuse_virtio.c + create mode 100644 tools/virtiofsd/fuse_virtio.h + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 26b1a7d..82d6ac7 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -6,9 +6,10 @@ + * See the file COPYING.LIB + */ + +-#define FUSE_USE_VERSION 31 +- ++#ifndef FUSE_I_H ++#define FUSE_I_H + ++#define FUSE_USE_VERSION 31 + #include "fuse.h" + #include "fuse_lowlevel.h" + +@@ -101,3 +102,5 @@ void fuse_session_process_buf_int(struct fuse_session *se, + + /* room needed in buffer to accommodate header */ + #define FUSE_BUFFER_HEADER_SIZE 0x1000 ++ ++#endif +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 17e8718..5df124e 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -14,6 +14,7 @@ + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" ++#include "fuse_virtio.h" + + #include + #include +@@ -2202,6 +2203,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out4; + } + ++ if (!se->vu_socket_path) { ++ fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); ++ goto out4; ++ } ++ + se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; + + list_init_req(&se->list); +@@ -2224,54 +2230,7 @@ out1: + + int fuse_session_mount(struct fuse_session *se) + { +- int fd; +- +- /* +- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos +- * would ensue. +- */ +- do { +- fd = open("/dev/null", O_RDWR); +- if (fd > 2) { +- close(fd); +- } +- } while (fd >= 0 && fd <= 2); +- +- /* +- * To allow FUSE daemons to run without privileges, the caller may open +- * /dev/fuse before launching the file system and pass on the file +- * descriptor by specifying /dev/fd/N as the mount point. Note that the +- * parent process takes care of performing the mount in this case. +- */ +- fd = fuse_mnt_parse_fuse_fd(mountpoint); +- if (fd != -1) { +- if (fcntl(fd, F_GETFD) == -1) { +- fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", +- fd); +- return -1; +- } +- se->fd = fd; +- return 0; +- } +- +- /* Open channel */ +- fd = fuse_kern_mount(mountpoint, se->mo); +- if (fd == -1) { +- return -1; +- } +- se->fd = fd; +- +- /* Save mountpoint */ +- se->mountpoint = strdup(mountpoint); +- if (se->mountpoint == NULL) { +- goto error_out; +- } +- +- return 0; +- +-error_out: +- fuse_kern_unmount(mountpoint, fd); +- return -1; ++ return virtio_session_mount(se); + } + + int fuse_session_fd(struct fuse_session *se) +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +new file mode 100644 +index 0000000..cbef6ff +--- /dev/null ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -0,0 +1,79 @@ ++/* ++ * virtio-fs glue for FUSE ++ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates ++ * ++ * Authors: ++ * Dave Gilbert ++ * ++ * Implements the glue between libfuse and libvhost-user ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ ++ ++#include "fuse_i.h" ++#include "standard-headers/linux/fuse.h" ++#include "fuse_misc.h" ++#include "fuse_opt.h" ++#include "fuse_virtio.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* From spec */ ++struct virtio_fs_config { ++ char tag[36]; ++ uint32_t num_queues; ++}; ++ ++int virtio_session_mount(struct fuse_session *se) ++{ ++ struct sockaddr_un un; ++ mode_t old_umask; ++ ++ if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { ++ fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); ++ return -1; ++ } ++ ++ se->fd = -1; ++ ++ /* ++ * Create the Unix socket to communicate with qemu ++ * based on QEMU's vhost-user-bridge ++ */ ++ unlink(se->vu_socket_path); ++ strcpy(un.sun_path, se->vu_socket_path); ++ size_t addr_len = sizeof(un); ++ ++ int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0); ++ if (listen_sock == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n"); ++ return -1; ++ } ++ un.sun_family = AF_UNIX; ++ ++ /* ++ * Unfortunately bind doesn't let you set the mask on the socket, ++ * so set umask to 077 and restore it later. ++ */ ++ old_umask = umask(0077); ++ if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); ++ umask(old_umask); ++ return -1; ++ } ++ umask(old_umask); ++ ++ if (listen(listen_sock, 1) == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); ++ return -1; ++ } ++ ++ return -1; ++} +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +new file mode 100644 +index 0000000..8f2edb6 +--- /dev/null ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -0,0 +1,23 @@ ++/* ++ * virtio-fs glue for FUSE ++ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates ++ * ++ * Authors: ++ * Dave Gilbert ++ * ++ * Implements the glue between libfuse and libvhost-user ++ * ++ * This program can be distributed under the terms of the GNU LGPLv2. ++ * See the file COPYING.LIB ++ */ ++ ++#ifndef FUSE_VIRTIO_H ++#define FUSE_VIRTIO_H ++ ++#include "fuse_i.h" ++ ++struct fuse_session; ++ ++int virtio_session_mount(struct fuse_session *se); ++ ++#endif +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch b/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch new file mode 100644 index 0000000..8d8de78 --- /dev/null +++ b/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch @@ -0,0 +1,76 @@ +From ade3dcad8a907d281549b341a8908851e36ba458 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:31 +0100 +Subject: [PATCH 060/116] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-57-dgilbert@redhat.com> +Patchwork-id: 93505 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 056/112] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +Caller can set FUSE_WRITE_KILL_PRIV in write_flags. Parse it and pass it +to the filesystem. + +Signed-off-by: Vivek Goyal +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f779bc5265e7e7abb13a03d4bfbc74151afc15c2) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_common.h | 6 +++++- + tools/virtiofsd/fuse_lowlevel.c | 4 +++- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index f8f6433..686c42c 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -93,8 +93,12 @@ struct fuse_file_info { + */ + unsigned int cache_readdir:1; + ++ /* Indicates that suid/sgid bits should be removed upon write */ ++ unsigned int kill_priv:1; ++ ++ + /** Padding. Reserved for future use*/ +- unsigned int padding:25; ++ unsigned int padding:24; + unsigned int padding2:32; + + /* +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 02e1d83..2d6dc5a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1142,6 +1142,7 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; ++ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); + + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; +@@ -1177,7 +1178,8 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; + fi.fh = arg->fh; +- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; ++ fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE); ++ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); + + if (ibufv->count == 1) { + assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch b/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch new file mode 100644 index 0000000..7d095c9 --- /dev/null +++ b/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch @@ -0,0 +1,140 @@ +From d5986c804f05070a07dfe702f7c66357daaa1ab6 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:20 +0100 +Subject: [PATCH 049/116] virtiofsd: Pass write iov's all the way through +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-46-dgilbert@redhat.com> +Patchwork-id: 93497 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 045/112] virtiofsd: Pass write iov's all the way through +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Pass the write iov pointing to guest RAM all the way through rather +than copying the data. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Xiao Yang +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e17f7a580e2c599330ad3a6946be615ca2fe97d9) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 73 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index fd588a4..872968f 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -454,6 +454,10 @@ static void *fv_queue_thread(void *opaque) + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + + while (1) { ++ bool allocated_bufv = false; ++ struct fuse_bufvec bufv; ++ struct fuse_bufvec *pbufv; ++ + /* + * An element contains one request and the space to send our + * response They're spread over multiple descriptors in a +@@ -495,14 +499,76 @@ static void *fv_queue_thread(void *opaque) + __func__, elem->index); + assert(0); /* TODO */ + } +- copy_from_iov(&fbuf, out_num, out_sg); +- fbuf.size = out_len; ++ /* Copy just the first element and look at it */ ++ copy_from_iov(&fbuf, 1, out_sg); ++ ++ if (out_num > 2 && ++ out_sg[0].iov_len == sizeof(struct fuse_in_header) && ++ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && ++ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ /* ++ * For a write we don't actually need to copy the ++ * data, we can just do it straight out of guest memory ++ * but we must still copy the headers in case the guest ++ * was nasty and changed them while we were using them. ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); ++ ++ /* copy the fuse_write_in header after the fuse_in_header */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ ++ /* Allocate the bufv, with space for the rest of the iov */ ++ allocated_bufv = true; ++ pbufv = malloc(sizeof(struct fuse_bufvec) + ++ sizeof(struct fuse_buf) * (out_num - 2)); ++ if (!pbufv) { ++ vu_queue_unpop(dev, q, elem, 0); ++ free(elem); ++ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", ++ __func__); ++ goto out; ++ } ++ ++ pbufv->count = 1; ++ pbufv->buf[0] = fbuf; ++ ++ size_t iovindex, pbufvindex; ++ iovindex = 2; /* 2 headers, separate iovs */ ++ pbufvindex = 1; /* 2 headers, 1 fusebuf */ ++ ++ for (; iovindex < out_num; iovindex++, pbufvindex++) { ++ pbufv->count++; ++ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ ++ pbufv->buf[pbufvindex].flags = 0; ++ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; ++ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ } ++ } else { ++ /* Normal (non fast write) path */ ++ ++ /* Copy the rest of the buffer */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_len; + +- /* TODO! Endianness of header */ ++ /* TODO! Endianness of header */ + +- /* TODO: Add checks for fuse_session_exited */ +- struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; +- fuse_session_process_buf_int(se, &bufv, &ch); ++ /* TODO: Add checks for fuse_session_exited */ ++ bufv.buf[0] = fbuf; ++ bufv.count = 1; ++ pbufv = &bufv; ++ } ++ pbufv->idx = 0; ++ pbufv->off = 0; ++ fuse_session_process_buf_int(se, pbufv, &ch); ++ ++ if (allocated_bufv) { ++ free(pbufv); ++ } + + if (!qi->reply_sent) { + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", +@@ -516,6 +582,7 @@ static void *fv_queue_thread(void *opaque) + elem = NULL; + } + } ++out: + pthread_mutex_destroy(&ch.lock); + free(fbuf.mem); + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch b/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch new file mode 100644 index 0000000..834ced1 --- /dev/null +++ b/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch @@ -0,0 +1,168 @@ +From 9e4320eec5204da851ac95fb7a7e6520c9ccee7d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:19 +0100 +Subject: [PATCH 048/116] virtiofsd: Plumb fuse_bufvec through to do_write_buf +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-45-dgilbert@redhat.com> +Patchwork-id: 93499 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 044/112] virtiofsd: Plumb fuse_bufvec through to do_write_buf +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Let fuse_session_process_buf_int take a fuse_bufvec * instead of a +fuse_buf; and then through to do_write_buf - where in the best +case it can pass that straight through to op.write_buf without copying +(other than skipping a header). + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 469f9d2fc405b0508e6cf1b4b5bbcadfc82064e5) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 2 +- + tools/virtiofsd/fuse_lowlevel.c | 61 +++++++++++++++++++++++++++-------------- + tools/virtiofsd/fuse_virtio.c | 3 +- + 3 files changed, 44 insertions(+), 22 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 45995f3..a20854f 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -100,7 +100,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + void fuse_free_req(fuse_req_t req); + + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, ++ struct fuse_bufvec *bufv, + struct fuse_chan *ch); + + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 95f4db8..7e10995 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1004,11 +1004,12 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, +- const struct fuse_buf *ibuf) ++ struct fuse_bufvec *ibufv) + { + struct fuse_session *se = req->se; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, ++ struct fuse_bufvec *pbufv = ibufv; ++ struct fuse_bufvec tmpbufv = { ++ .buf[0] = ibufv->buf[0], + .count = 1, + }; + struct fuse_write_in *arg = (struct fuse_write_in *)inarg; +@@ -1018,22 +1019,31 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + fi.fh = arg->fh; + fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- bufv.buf[0].mem = PARAM(arg); +- } +- +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); +- fuse_reply_err(req, EIO); +- return; ++ if (ibufv->count == 1) { ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ tmpbufv.buf[0].mem = PARAM(arg); ++ } ++ tmpbufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); ++ if (tmpbufv.buf[0].size < arg->size) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: do_write_buf: buffer size too small\n"); ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ tmpbufv.buf[0].size = arg->size; ++ pbufv = &tmpbufv; ++ } else { ++ /* ++ * Input bufv contains the headers in the first element ++ * and the data in the rest, we need to skip that first element ++ */ ++ ibufv->buf[0].size = 0; + } +- bufv.buf[0].size = arg->size; + +- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); ++ se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); + } + + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +@@ -2024,13 +2034,24 @@ static const char *opname(enum fuse_opcode opcode) + void fuse_session_process_buf(struct fuse_session *se, + const struct fuse_buf *buf) + { +- fuse_session_process_buf_int(se, buf, NULL); ++ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; ++ fuse_session_process_buf_int(se, &bufv, NULL); + } + ++/* ++ * Restriction: ++ * bufv is normally a single entry buffer, except for a write ++ * where (if it's in memory) then the bufv may be multiple entries, ++ * where the first entry contains all headers and subsequent entries ++ * contain data ++ * bufv shall not use any offsets etc to make the data anything ++ * other than contiguous starting from 0. ++ */ + void fuse_session_process_buf_int(struct fuse_session *se, +- const struct fuse_buf *buf, ++ struct fuse_bufvec *bufv, + struct fuse_chan *ch) + { ++ const struct fuse_buf *buf = bufv->buf; + struct fuse_in_header *in; + const void *inarg; + struct fuse_req *req; +@@ -2108,7 +2129,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, + + inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { +- do_write_buf(req, in->nodeid, inarg, buf); ++ do_write_buf(req, in->nodeid, inarg, bufv); + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 635f877..fd588a4 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -501,7 +501,8 @@ static void *fv_queue_thread(void *opaque) + /* TODO! Endianness of header */ + + /* TODO: Add checks for fuse_session_exited */ +- fuse_session_process_buf_int(se, &fbuf, &ch); ++ struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; ++ fuse_session_process_buf_int(se, &bufv, &ch); + + if (!qi->reply_sent) { + fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Poll-kick_fd-for-queue.patch b/kvm-virtiofsd-Poll-kick_fd-for-queue.patch new file mode 100644 index 0000000..d7c6c0a --- /dev/null +++ b/kvm-virtiofsd-Poll-kick_fd-for-queue.patch @@ -0,0 +1,97 @@ +From 083b944fac29bc3115a19eb38e176f6b23f04938 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:01 +0100 +Subject: [PATCH 030/116] virtiofsd: Poll kick_fd for queue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-27-dgilbert@redhat.com> +Patchwork-id: 93483 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 026/112] virtiofsd: Poll kick_fd for queue +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +In the queue thread poll the kick_fd we're passed. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5dcd1f56141378226d33dc3df68ec57913e0aa04) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 40 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 39 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 2a94bb3..05e7258 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -100,13 +101,50 @@ static void fv_panic(VuDev *dev, const char *err) + exit(EXIT_FAILURE); + } + ++/* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { + struct fv_QueueInfo *qi = opaque; + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); + while (1) { +- /* TODO */ ++ struct pollfd pf[1]; ++ pf[0].fd = qi->kick_fd; ++ pf[0].events = POLLIN; ++ pf[0].revents = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, ++ qi->qidx); ++ int poll_res = ppoll(pf, 1, NULL, NULL); ++ ++ if (poll_res == -1) { ++ if (errno == EINTR) { ++ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", ++ __func__); ++ continue; ++ } ++ fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); ++ break; ++ } ++ assert(poll_res == 1); ++ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { ++ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", ++ __func__, pf[0].revents, qi->qidx); ++ break; ++ } ++ assert(pf[0].revents & POLLIN); ++ fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, ++ qi->qidx); ++ ++ eventfd_t evalue; ++ if (eventfd_read(qi->kick_fd, &evalue)) { ++ fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); ++ break; ++ } ++ if (qi->virtio_dev->se->debug) { ++ fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, ++ qi->qidx, (size_t)evalue); ++ } + } + + return NULL; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch b/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch new file mode 100644 index 0000000..d4e1ea1 --- /dev/null +++ b/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch @@ -0,0 +1,144 @@ +From ab336e3aea97d76c1b2ac725d19b4518f47dd8f0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:59 +0100 +Subject: [PATCH 088/116] virtiofsd: Prevent multiply running with same + vhost_user_socket +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-85-dgilbert@redhat.com> +Patchwork-id: 93541 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 084/112] virtiofsd: Prevent multiply running with same vhost_user_socket +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +virtiofsd can run multiply even if the vhost_user_socket is same path. + + ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & + [1] 244965 + virtio_session_mount: Waiting for vhost-user socket connection... + ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & + [2] 244966 + virtio_session_mount: Waiting for vhost-user socket connection... + ]# + +The user will get confused about the situation and maybe the cause of the +unexpected problem. So it's better to prevent the multiple running. + +Create a regular file under localstatedir directory to exclude the +vhost_user_socket. To create and lock the file, use qemu_write_pidfile() +because the API has some sanity checks and file lock. + +Signed-off-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert + Applied fixes from Stefan's review and moved osdep include +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 96814800d2b49d18737c36e021c387697ec40c62) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 1 + + tools/virtiofsd/fuse_virtio.c | 49 ++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 49 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 440508a..aac282f 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -18,6 +18,7 @@ + + #include + #include ++#include + #include + #include + #include +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index e7bd772..b7948de 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -13,11 +13,12 @@ + + #include "qemu/osdep.h" + #include "qemu/iov.h" +-#include "fuse_virtio.h" ++#include "qapi/error.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" + #include "fuse_misc.h" + #include "fuse_opt.h" ++#include "fuse_virtio.h" + + #include + #include +@@ -743,6 +744,42 @@ int virtio_loop(struct fuse_session *se) + return 0; + } + ++static void strreplace(char *s, char old, char new) ++{ ++ for (; *s; ++s) { ++ if (*s == old) { ++ *s = new; ++ } ++ } ++} ++ ++static bool fv_socket_lock(struct fuse_session *se) ++{ ++ g_autofree gchar *sk_name = NULL; ++ g_autofree gchar *pidfile = NULL; ++ g_autofree gchar *dir = NULL; ++ Error *local_err = NULL; ++ ++ dir = qemu_get_local_state_pathname("run/virtiofsd"); ++ ++ if (g_mkdir_with_parents(dir, S_IRWXU) < 0) { ++ fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s", ++ __func__, dir, strerror(errno)); ++ return false; ++ } ++ ++ sk_name = g_strdup(se->vu_socket_path); ++ strreplace(sk_name, '/', '.'); ++ pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name); ++ ++ if (!qemu_write_pidfile(pidfile, &local_err)) { ++ error_report_err(local_err); ++ return false; ++ } ++ ++ return true; ++} ++ + static int fv_create_listen_socket(struct fuse_session *se) + { + struct sockaddr_un un; +@@ -758,6 +795,16 @@ static int fv_create_listen_socket(struct fuse_session *se) + return -1; + } + ++ if (!strlen(se->vu_socket_path)) { ++ fuse_log(FUSE_LOG_ERR, "Socket path is empty\n"); ++ return -1; ++ } ++ ++ /* Check the vu_socket_path is already used */ ++ if (!fv_socket_lock(se)) { ++ return -1; ++ } ++ + /* + * Create the Unix socket to communicate with qemu + * based on QEMU's vhost-user-bridge +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch b/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch new file mode 100644 index 0000000..f30f23a --- /dev/null +++ b/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch @@ -0,0 +1,945 @@ +From e7c1ad608117b21f80c762f5505a66b21c56e9d3 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:40 +0100 +Subject: [PATCH 009/116] virtiofsd: Pull in kernel's fuse.h +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-6-dgilbert@redhat.com> +Patchwork-id: 93460 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 005/112] virtiofsd: Pull in kernel's fuse.h +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Update scripts/update-linux-headers.sh to add fuse.h and +use it to pull in fuse.h from the kernel; from v5.5-rc1 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a62a9e192bc5f0aa0bc076b51db5a069add87c78) +Signed-off-by: Miroslav Rezanina +--- + include/standard-headers/linux/fuse.h | 891 ++++++++++++++++++++++++++++++++++ + scripts/update-linux-headers.sh | 1 + + 2 files changed, 892 insertions(+) + create mode 100644 include/standard-headers/linux/fuse.h + +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +new file mode 100644 +index 0000000..f4df0a4 +--- /dev/null ++++ b/include/standard-headers/linux/fuse.h +@@ -0,0 +1,891 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ ++/* ++ This file defines the kernel interface of FUSE ++ Copyright (C) 2001-2008 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU GPL. ++ See the file COPYING. ++ ++ This -- and only this -- header file may also be distributed under ++ the terms of the BSD Licence as follows: ++ ++ Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ++ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE ++ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ SUCH DAMAGE. ++*/ ++ ++/* ++ * This file defines the kernel interface of FUSE ++ * ++ * Protocol changelog: ++ * ++ * 7.1: ++ * - add the following messages: ++ * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, ++ * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, ++ * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, ++ * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, ++ * FUSE_RELEASEDIR ++ * - add padding to messages to accommodate 32-bit servers on 64-bit kernels ++ * ++ * 7.2: ++ * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags ++ * - add FUSE_FSYNCDIR message ++ * ++ * 7.3: ++ * - add FUSE_ACCESS message ++ * - add FUSE_CREATE message ++ * - add filehandle to fuse_setattr_in ++ * ++ * 7.4: ++ * - add frsize to fuse_kstatfs ++ * - clean up request size limit checking ++ * ++ * 7.5: ++ * - add flags and max_write to fuse_init_out ++ * ++ * 7.6: ++ * - add max_readahead to fuse_init_in and fuse_init_out ++ * ++ * 7.7: ++ * - add FUSE_INTERRUPT message ++ * - add POSIX file lock support ++ * ++ * 7.8: ++ * - add lock_owner and flags fields to fuse_release_in ++ * - add FUSE_BMAP message ++ * - add FUSE_DESTROY message ++ * ++ * 7.9: ++ * - new fuse_getattr_in input argument of GETATTR ++ * - add lk_flags in fuse_lk_in ++ * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in ++ * - add blksize field to fuse_attr ++ * - add file flags field to fuse_read_in and fuse_write_in ++ * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in ++ * ++ * 7.10 ++ * - add nonseekable open flag ++ * ++ * 7.11 ++ * - add IOCTL message ++ * - add unsolicited notification support ++ * - add POLL message and NOTIFY_POLL notification ++ * ++ * 7.12 ++ * - add umask flag to input argument of create, mknod and mkdir ++ * - add notification messages for invalidation of inodes and ++ * directory entries ++ * ++ * 7.13 ++ * - make max number of background requests and congestion threshold ++ * tunables ++ * ++ * 7.14 ++ * - add splice support to fuse device ++ * ++ * 7.15 ++ * - add store notify ++ * - add retrieve notify ++ * ++ * 7.16 ++ * - add BATCH_FORGET request ++ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct ++ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' ++ * - add FUSE_IOCTL_32BIT flag ++ * ++ * 7.17 ++ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK ++ * ++ * 7.18 ++ * - add FUSE_IOCTL_DIR flag ++ * - add FUSE_NOTIFY_DELETE ++ * ++ * 7.19 ++ * - add FUSE_FALLOCATE ++ * ++ * 7.20 ++ * - add FUSE_AUTO_INVAL_DATA ++ * ++ * 7.21 ++ * - add FUSE_READDIRPLUS ++ * - send the requested events in POLL request ++ * ++ * 7.22 ++ * - add FUSE_ASYNC_DIO ++ * ++ * 7.23 ++ * - add FUSE_WRITEBACK_CACHE ++ * - add time_gran to fuse_init_out ++ * - add reserved space to fuse_init_out ++ * - add FATTR_CTIME ++ * - add ctime and ctimensec to fuse_setattr_in ++ * - add FUSE_RENAME2 request ++ * - add FUSE_NO_OPEN_SUPPORT flag ++ * ++ * 7.24 ++ * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support ++ * ++ * 7.25 ++ * - add FUSE_PARALLEL_DIROPS ++ * ++ * 7.26 ++ * - add FUSE_HANDLE_KILLPRIV ++ * - add FUSE_POSIX_ACL ++ * ++ * 7.27 ++ * - add FUSE_ABORT_ERROR ++ * ++ * 7.28 ++ * - add FUSE_COPY_FILE_RANGE ++ * - add FOPEN_CACHE_DIR ++ * - add FUSE_MAX_PAGES, add max_pages to init_out ++ * - add FUSE_CACHE_SYMLINKS ++ * ++ * 7.29 ++ * - add FUSE_NO_OPENDIR_SUPPORT flag ++ * ++ * 7.30 ++ * - add FUSE_EXPLICIT_INVAL_DATA ++ * - add FUSE_IOCTL_COMPAT_X32 ++ * ++ * 7.31 ++ * - add FUSE_WRITE_KILL_PRIV flag ++ * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING ++ * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag ++ */ ++ ++#ifndef _LINUX_FUSE_H ++#define _LINUX_FUSE_H ++ ++#include ++ ++/* ++ * Version negotiation: ++ * ++ * Both the kernel and userspace send the version they support in the ++ * INIT request and reply respectively. ++ * ++ * If the major versions match then both shall use the smallest ++ * of the two minor versions for communication. ++ * ++ * If the kernel supports a larger major version, then userspace shall ++ * reply with the major version it supports, ignore the rest of the ++ * INIT message and expect a new INIT message from the kernel with a ++ * matching major version. ++ * ++ * If the library supports a larger major version, then it shall fall ++ * back to the major protocol version sent by the kernel for ++ * communication and reply with that major version (and an arbitrary ++ * supported minor version). ++ */ ++ ++/** Version number of this interface */ ++#define FUSE_KERNEL_VERSION 7 ++ ++/** Minor version number of this interface */ ++#define FUSE_KERNEL_MINOR_VERSION 31 ++ ++/** The node ID of the root inode */ ++#define FUSE_ROOT_ID 1 ++ ++/* Make sure all structures are padded to 64bit boundary, so 32bit ++ userspace works under 64bit kernels */ ++ ++struct fuse_attr { ++ uint64_t ino; ++ uint64_t size; ++ uint64_t blocks; ++ uint64_t atime; ++ uint64_t mtime; ++ uint64_t ctime; ++ uint32_t atimensec; ++ uint32_t mtimensec; ++ uint32_t ctimensec; ++ uint32_t mode; ++ uint32_t nlink; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t rdev; ++ uint32_t blksize; ++ uint32_t padding; ++}; ++ ++struct fuse_kstatfs { ++ uint64_t blocks; ++ uint64_t bfree; ++ uint64_t bavail; ++ uint64_t files; ++ uint64_t ffree; ++ uint32_t bsize; ++ uint32_t namelen; ++ uint32_t frsize; ++ uint32_t padding; ++ uint32_t spare[6]; ++}; ++ ++struct fuse_file_lock { ++ uint64_t start; ++ uint64_t end; ++ uint32_t type; ++ uint32_t pid; /* tgid */ ++}; ++ ++/** ++ * Bitmasks for fuse_setattr_in.valid ++ */ ++#define FATTR_MODE (1 << 0) ++#define FATTR_UID (1 << 1) ++#define FATTR_GID (1 << 2) ++#define FATTR_SIZE (1 << 3) ++#define FATTR_ATIME (1 << 4) ++#define FATTR_MTIME (1 << 5) ++#define FATTR_FH (1 << 6) ++#define FATTR_ATIME_NOW (1 << 7) ++#define FATTR_MTIME_NOW (1 << 8) ++#define FATTR_LOCKOWNER (1 << 9) ++#define FATTR_CTIME (1 << 10) ++ ++/** ++ * Flags returned by the OPEN request ++ * ++ * FOPEN_DIRECT_IO: bypass page cache for this open file ++ * FOPEN_KEEP_CACHE: don't invalidate the data cache on open ++ * FOPEN_NONSEEKABLE: the file is not seekable ++ * FOPEN_CACHE_DIR: allow caching this directory ++ * FOPEN_STREAM: the file is stream-like (no file position at all) ++ */ ++#define FOPEN_DIRECT_IO (1 << 0) ++#define FOPEN_KEEP_CACHE (1 << 1) ++#define FOPEN_NONSEEKABLE (1 << 2) ++#define FOPEN_CACHE_DIR (1 << 3) ++#define FOPEN_STREAM (1 << 4) ++ ++/** ++ * INIT request/reply flags ++ * ++ * FUSE_ASYNC_READ: asynchronous read requests ++ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks ++ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) ++ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem ++ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." ++ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB ++ * FUSE_DONT_MASK: don't apply umask to file mode on create operations ++ * FUSE_SPLICE_WRITE: kernel supports splice write on the device ++ * FUSE_SPLICE_MOVE: kernel supports splice move on the device ++ * FUSE_SPLICE_READ: kernel supports splice read on the device ++ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks ++ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories ++ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages ++ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) ++ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus ++ * FUSE_ASYNC_DIO: asynchronous direct I/O submission ++ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes ++ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens ++ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir ++ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc ++ * FUSE_POSIX_ACL: filesystem supports posix acls ++ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED ++ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages ++ * FUSE_CACHE_SYMLINKS: cache READLINK responses ++ * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir ++ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request ++ * FUSE_MAP_ALIGNMENT: map_alignment field is valid ++ */ ++#define FUSE_ASYNC_READ (1 << 0) ++#define FUSE_POSIX_LOCKS (1 << 1) ++#define FUSE_FILE_OPS (1 << 2) ++#define FUSE_ATOMIC_O_TRUNC (1 << 3) ++#define FUSE_EXPORT_SUPPORT (1 << 4) ++#define FUSE_BIG_WRITES (1 << 5) ++#define FUSE_DONT_MASK (1 << 6) ++#define FUSE_SPLICE_WRITE (1 << 7) ++#define FUSE_SPLICE_MOVE (1 << 8) ++#define FUSE_SPLICE_READ (1 << 9) ++#define FUSE_FLOCK_LOCKS (1 << 10) ++#define FUSE_HAS_IOCTL_DIR (1 << 11) ++#define FUSE_AUTO_INVAL_DATA (1 << 12) ++#define FUSE_DO_READDIRPLUS (1 << 13) ++#define FUSE_READDIRPLUS_AUTO (1 << 14) ++#define FUSE_ASYNC_DIO (1 << 15) ++#define FUSE_WRITEBACK_CACHE (1 << 16) ++#define FUSE_NO_OPEN_SUPPORT (1 << 17) ++#define FUSE_PARALLEL_DIROPS (1 << 18) ++#define FUSE_HANDLE_KILLPRIV (1 << 19) ++#define FUSE_POSIX_ACL (1 << 20) ++#define FUSE_ABORT_ERROR (1 << 21) ++#define FUSE_MAX_PAGES (1 << 22) ++#define FUSE_CACHE_SYMLINKS (1 << 23) ++#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) ++#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) ++#define FUSE_MAP_ALIGNMENT (1 << 26) ++ ++/** ++ * CUSE INIT request/reply flags ++ * ++ * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl ++ */ ++#define CUSE_UNRESTRICTED_IOCTL (1 << 0) ++ ++/** ++ * Release flags ++ */ ++#define FUSE_RELEASE_FLUSH (1 << 0) ++#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) ++ ++/** ++ * Getattr flags ++ */ ++#define FUSE_GETATTR_FH (1 << 0) ++ ++/** ++ * Lock flags ++ */ ++#define FUSE_LK_FLOCK (1 << 0) ++ ++/** ++ * WRITE flags ++ * ++ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed ++ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid ++ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits ++ */ ++#define FUSE_WRITE_CACHE (1 << 0) ++#define FUSE_WRITE_LOCKOWNER (1 << 1) ++#define FUSE_WRITE_KILL_PRIV (1 << 2) ++ ++/** ++ * Read flags ++ */ ++#define FUSE_READ_LOCKOWNER (1 << 1) ++ ++/** ++ * Ioctl flags ++ * ++ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine ++ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed ++ * FUSE_IOCTL_RETRY: retry with new iovecs ++ * FUSE_IOCTL_32BIT: 32bit ioctl ++ * FUSE_IOCTL_DIR: is a directory ++ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) ++ * ++ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs ++ */ ++#define FUSE_IOCTL_COMPAT (1 << 0) ++#define FUSE_IOCTL_UNRESTRICTED (1 << 1) ++#define FUSE_IOCTL_RETRY (1 << 2) ++#define FUSE_IOCTL_32BIT (1 << 3) ++#define FUSE_IOCTL_DIR (1 << 4) ++#define FUSE_IOCTL_COMPAT_X32 (1 << 5) ++ ++#define FUSE_IOCTL_MAX_IOV 256 ++ ++/** ++ * Poll flags ++ * ++ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify ++ */ ++#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) ++ ++/** ++ * Fsync flags ++ * ++ * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata ++ */ ++#define FUSE_FSYNC_FDATASYNC (1 << 0) ++ ++enum fuse_opcode { ++ FUSE_LOOKUP = 1, ++ FUSE_FORGET = 2, /* no reply */ ++ FUSE_GETATTR = 3, ++ FUSE_SETATTR = 4, ++ FUSE_READLINK = 5, ++ FUSE_SYMLINK = 6, ++ FUSE_MKNOD = 8, ++ FUSE_MKDIR = 9, ++ FUSE_UNLINK = 10, ++ FUSE_RMDIR = 11, ++ FUSE_RENAME = 12, ++ FUSE_LINK = 13, ++ FUSE_OPEN = 14, ++ FUSE_READ = 15, ++ FUSE_WRITE = 16, ++ FUSE_STATFS = 17, ++ FUSE_RELEASE = 18, ++ FUSE_FSYNC = 20, ++ FUSE_SETXATTR = 21, ++ FUSE_GETXATTR = 22, ++ FUSE_LISTXATTR = 23, ++ FUSE_REMOVEXATTR = 24, ++ FUSE_FLUSH = 25, ++ FUSE_INIT = 26, ++ FUSE_OPENDIR = 27, ++ FUSE_READDIR = 28, ++ FUSE_RELEASEDIR = 29, ++ FUSE_FSYNCDIR = 30, ++ FUSE_GETLK = 31, ++ FUSE_SETLK = 32, ++ FUSE_SETLKW = 33, ++ FUSE_ACCESS = 34, ++ FUSE_CREATE = 35, ++ FUSE_INTERRUPT = 36, ++ FUSE_BMAP = 37, ++ FUSE_DESTROY = 38, ++ FUSE_IOCTL = 39, ++ FUSE_POLL = 40, ++ FUSE_NOTIFY_REPLY = 41, ++ FUSE_BATCH_FORGET = 42, ++ FUSE_FALLOCATE = 43, ++ FUSE_READDIRPLUS = 44, ++ FUSE_RENAME2 = 45, ++ FUSE_LSEEK = 46, ++ FUSE_COPY_FILE_RANGE = 47, ++ FUSE_SETUPMAPPING = 48, ++ FUSE_REMOVEMAPPING = 49, ++ ++ /* CUSE specific operations */ ++ CUSE_INIT = 4096, ++ ++ /* Reserved opcodes: helpful to detect structure endian-ness */ ++ CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ ++ FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ ++}; ++ ++enum fuse_notify_code { ++ FUSE_NOTIFY_POLL = 1, ++ FUSE_NOTIFY_INVAL_INODE = 2, ++ FUSE_NOTIFY_INVAL_ENTRY = 3, ++ FUSE_NOTIFY_STORE = 4, ++ FUSE_NOTIFY_RETRIEVE = 5, ++ FUSE_NOTIFY_DELETE = 6, ++ FUSE_NOTIFY_CODE_MAX, ++}; ++ ++/* The read buffer is required to be at least 8k, but may be much larger */ ++#define FUSE_MIN_READ_BUFFER 8192 ++ ++#define FUSE_COMPAT_ENTRY_OUT_SIZE 120 ++ ++struct fuse_entry_out { ++ uint64_t nodeid; /* Inode ID */ ++ uint64_t generation; /* Inode generation: nodeid:gen must ++ be unique for the fs's lifetime */ ++ uint64_t entry_valid; /* Cache timeout for the name */ ++ uint64_t attr_valid; /* Cache timeout for the attributes */ ++ uint32_t entry_valid_nsec; ++ uint32_t attr_valid_nsec; ++ struct fuse_attr attr; ++}; ++ ++struct fuse_forget_in { ++ uint64_t nlookup; ++}; ++ ++struct fuse_forget_one { ++ uint64_t nodeid; ++ uint64_t nlookup; ++}; ++ ++struct fuse_batch_forget_in { ++ uint32_t count; ++ uint32_t dummy; ++}; ++ ++struct fuse_getattr_in { ++ uint32_t getattr_flags; ++ uint32_t dummy; ++ uint64_t fh; ++}; ++ ++#define FUSE_COMPAT_ATTR_OUT_SIZE 96 ++ ++struct fuse_attr_out { ++ uint64_t attr_valid; /* Cache timeout for the attributes */ ++ uint32_t attr_valid_nsec; ++ uint32_t dummy; ++ struct fuse_attr attr; ++}; ++ ++#define FUSE_COMPAT_MKNOD_IN_SIZE 8 ++ ++struct fuse_mknod_in { ++ uint32_t mode; ++ uint32_t rdev; ++ uint32_t umask; ++ uint32_t padding; ++}; ++ ++struct fuse_mkdir_in { ++ uint32_t mode; ++ uint32_t umask; ++}; ++ ++struct fuse_rename_in { ++ uint64_t newdir; ++}; ++ ++struct fuse_rename2_in { ++ uint64_t newdir; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ ++struct fuse_link_in { ++ uint64_t oldnodeid; ++}; ++ ++struct fuse_setattr_in { ++ uint32_t valid; ++ uint32_t padding; ++ uint64_t fh; ++ uint64_t size; ++ uint64_t lock_owner; ++ uint64_t atime; ++ uint64_t mtime; ++ uint64_t ctime; ++ uint32_t atimensec; ++ uint32_t mtimensec; ++ uint32_t ctimensec; ++ uint32_t mode; ++ uint32_t unused4; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t unused5; ++}; ++ ++struct fuse_open_in { ++ uint32_t flags; ++ uint32_t unused; ++}; ++ ++struct fuse_create_in { ++ uint32_t flags; ++ uint32_t mode; ++ uint32_t umask; ++ uint32_t padding; ++}; ++ ++struct fuse_open_out { ++ uint64_t fh; ++ uint32_t open_flags; ++ uint32_t padding; ++}; ++ ++struct fuse_release_in { ++ uint64_t fh; ++ uint32_t flags; ++ uint32_t release_flags; ++ uint64_t lock_owner; ++}; ++ ++struct fuse_flush_in { ++ uint64_t fh; ++ uint32_t unused; ++ uint32_t padding; ++ uint64_t lock_owner; ++}; ++ ++struct fuse_read_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t read_flags; ++ uint64_t lock_owner; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ ++#define FUSE_COMPAT_WRITE_IN_SIZE 24 ++ ++struct fuse_write_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t write_flags; ++ uint64_t lock_owner; ++ uint32_t flags; ++ uint32_t padding; ++}; ++ ++struct fuse_write_out { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++#define FUSE_COMPAT_STATFS_SIZE 48 ++ ++struct fuse_statfs_out { ++ struct fuse_kstatfs st; ++}; ++ ++struct fuse_fsync_in { ++ uint64_t fh; ++ uint32_t fsync_flags; ++ uint32_t padding; ++}; ++ ++struct fuse_setxattr_in { ++ uint32_t size; ++ uint32_t flags; ++}; ++ ++struct fuse_getxattr_in { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++struct fuse_getxattr_out { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++struct fuse_lk_in { ++ uint64_t fh; ++ uint64_t owner; ++ struct fuse_file_lock lk; ++ uint32_t lk_flags; ++ uint32_t padding; ++}; ++ ++struct fuse_lk_out { ++ struct fuse_file_lock lk; ++}; ++ ++struct fuse_access_in { ++ uint32_t mask; ++ uint32_t padding; ++}; ++ ++struct fuse_init_in { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t max_readahead; ++ uint32_t flags; ++}; ++ ++#define FUSE_COMPAT_INIT_OUT_SIZE 8 ++#define FUSE_COMPAT_22_INIT_OUT_SIZE 24 ++ ++struct fuse_init_out { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t max_readahead; ++ uint32_t flags; ++ uint16_t max_background; ++ uint16_t congestion_threshold; ++ uint32_t max_write; ++ uint32_t time_gran; ++ uint16_t max_pages; ++ uint16_t map_alignment; ++ uint32_t unused[8]; ++}; ++ ++#define CUSE_INIT_INFO_MAX 4096 ++ ++struct cuse_init_in { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t unused; ++ uint32_t flags; ++}; ++ ++struct cuse_init_out { ++ uint32_t major; ++ uint32_t minor; ++ uint32_t unused; ++ uint32_t flags; ++ uint32_t max_read; ++ uint32_t max_write; ++ uint32_t dev_major; /* chardev major */ ++ uint32_t dev_minor; /* chardev minor */ ++ uint32_t spare[10]; ++}; ++ ++struct fuse_interrupt_in { ++ uint64_t unique; ++}; ++ ++struct fuse_bmap_in { ++ uint64_t block; ++ uint32_t blocksize; ++ uint32_t padding; ++}; ++ ++struct fuse_bmap_out { ++ uint64_t block; ++}; ++ ++struct fuse_ioctl_in { ++ uint64_t fh; ++ uint32_t flags; ++ uint32_t cmd; ++ uint64_t arg; ++ uint32_t in_size; ++ uint32_t out_size; ++}; ++ ++struct fuse_ioctl_iovec { ++ uint64_t base; ++ uint64_t len; ++}; ++ ++struct fuse_ioctl_out { ++ int32_t result; ++ uint32_t flags; ++ uint32_t in_iovs; ++ uint32_t out_iovs; ++}; ++ ++struct fuse_poll_in { ++ uint64_t fh; ++ uint64_t kh; ++ uint32_t flags; ++ uint32_t events; ++}; ++ ++struct fuse_poll_out { ++ uint32_t revents; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_poll_wakeup_out { ++ uint64_t kh; ++}; ++ ++struct fuse_fallocate_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint64_t length; ++ uint32_t mode; ++ uint32_t padding; ++}; ++ ++struct fuse_in_header { ++ uint32_t len; ++ uint32_t opcode; ++ uint64_t unique; ++ uint64_t nodeid; ++ uint32_t uid; ++ uint32_t gid; ++ uint32_t pid; ++ uint32_t padding; ++}; ++ ++struct fuse_out_header { ++ uint32_t len; ++ int32_t error; ++ uint64_t unique; ++}; ++ ++struct fuse_dirent { ++ uint64_t ino; ++ uint64_t off; ++ uint32_t namelen; ++ uint32_t type; ++ char name[]; ++}; ++ ++#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) ++#define FUSE_DIRENT_ALIGN(x) \ ++ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) ++#define FUSE_DIRENT_SIZE(d) \ ++ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) ++ ++struct fuse_direntplus { ++ struct fuse_entry_out entry_out; ++ struct fuse_dirent dirent; ++}; ++ ++#define FUSE_NAME_OFFSET_DIRENTPLUS \ ++ offsetof(struct fuse_direntplus, dirent.name) ++#define FUSE_DIRENTPLUS_SIZE(d) \ ++ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) ++ ++struct fuse_notify_inval_inode_out { ++ uint64_t ino; ++ int64_t off; ++ int64_t len; ++}; ++ ++struct fuse_notify_inval_entry_out { ++ uint64_t parent; ++ uint32_t namelen; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_delete_out { ++ uint64_t parent; ++ uint64_t child; ++ uint32_t namelen; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_store_out { ++ uint64_t nodeid; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++struct fuse_notify_retrieve_out { ++ uint64_t notify_unique; ++ uint64_t nodeid; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++/* Matches the size of fuse_write_in */ ++struct fuse_notify_retrieve_in { ++ uint64_t dummy1; ++ uint64_t offset; ++ uint32_t size; ++ uint32_t dummy2; ++ uint64_t dummy3; ++ uint64_t dummy4; ++}; ++ ++/* Device ioctls: */ ++#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) ++ ++struct fuse_lseek_in { ++ uint64_t fh; ++ uint64_t offset; ++ uint32_t whence; ++ uint32_t padding; ++}; ++ ++struct fuse_lseek_out { ++ uint64_t offset; ++}; ++ ++struct fuse_copy_file_range_in { ++ uint64_t fh_in; ++ uint64_t off_in; ++ uint64_t nodeid_out; ++ uint64_t fh_out; ++ uint64_t off_out; ++ uint64_t len; ++ uint64_t flags; ++}; ++ ++#endif /* _LINUX_FUSE_H */ +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index f76d773..29c27f4 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -186,6 +186,7 @@ rm -rf "$output/include/standard-headers/linux" + mkdir -p "$output/include/standard-headers/linux" + for i in "$tmpdir"/include/linux/*virtio*.h \ + "$tmpdir/include/linux/qemu_fw_cfg.h" \ ++ "$tmpdir/include/linux/fuse.h" \ + "$tmpdir/include/linux/input.h" \ + "$tmpdir/include/linux/input-event-codes.h" \ + "$tmpdir/include/linux/pci_regs.h" \ +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Pull-in-upstream-headers.patch b/kvm-virtiofsd-Pull-in-upstream-headers.patch new file mode 100644 index 0000000..78784fb --- /dev/null +++ b/kvm-virtiofsd-Pull-in-upstream-headers.patch @@ -0,0 +1,4911 @@ +From 434b51e5c2fce756906dec4803900397bc98ad72 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:39 +0100 +Subject: [PATCH 008/116] virtiofsd: Pull in upstream headers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-5-dgilbert@redhat.com> +Patchwork-id: 93457 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 004/112] virtiofsd: Pull in upstream headers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Pull in headers fromlibfuse's upstream fuse-3.8.0 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ee46c78901eb7fa78e328e04c0494ad6d207238b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse.h | 1275 ++++++++++++++++++++ + tools/virtiofsd/fuse_common.h | 823 +++++++++++++ + tools/virtiofsd/fuse_i.h | 139 +++ + tools/virtiofsd/fuse_log.h | 82 ++ + tools/virtiofsd/fuse_lowlevel.h | 2089 +++++++++++++++++++++++++++++++++ + tools/virtiofsd/fuse_misc.h | 59 + + tools/virtiofsd/fuse_opt.h | 271 +++++ + tools/virtiofsd/passthrough_helpers.h | 76 ++ + 8 files changed, 4814 insertions(+) + create mode 100644 tools/virtiofsd/fuse.h + create mode 100644 tools/virtiofsd/fuse_common.h + create mode 100644 tools/virtiofsd/fuse_i.h + create mode 100644 tools/virtiofsd/fuse_log.h + create mode 100644 tools/virtiofsd/fuse_lowlevel.h + create mode 100644 tools/virtiofsd/fuse_misc.h + create mode 100644 tools/virtiofsd/fuse_opt.h + create mode 100644 tools/virtiofsd/passthrough_helpers.h + +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +new file mode 100644 +index 0000000..883f6e5 +--- /dev/null ++++ b/tools/virtiofsd/fuse.h +@@ -0,0 +1,1275 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_H_ ++#define FUSE_H_ ++ ++/** @file ++ * ++ * This file defines the library interface of FUSE ++ * ++ * IMPORTANT: you should define FUSE_USE_VERSION before including this header. ++ */ ++ ++#include "fuse_common.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ----------------------------------------------------------- * ++ * Basic FUSE API * ++ * ----------------------------------------------------------- */ ++ ++/** Handle for a FUSE filesystem */ ++struct fuse; ++ ++/** ++ * Readdir flags, passed to ->readdir() ++ */ ++enum fuse_readdir_flags { ++ /** ++ * "Plus" mode. ++ * ++ * The kernel wants to prefill the inode cache during readdir. The ++ * filesystem may honour this by filling in the attributes and setting ++ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also ++ * just ignore this flag completely. ++ */ ++ FUSE_READDIR_PLUS = (1 << 0), ++}; ++ ++enum fuse_fill_dir_flags { ++ /** ++ * "Plus" mode: all file attributes are valid ++ * ++ * The attributes are used by the kernel to prefill the inode cache ++ * during a readdir. ++ * ++ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set ++ * and vice versa. ++ */ ++ FUSE_FILL_DIR_PLUS = (1 << 1), ++}; ++ ++/** Function to add an entry in a readdir() operation ++ * ++ * The *off* parameter can be any non-zero value that enables the ++ * filesystem to identify the current point in the directory ++ * stream. It does not need to be the actual physical position. A ++ * value of zero is reserved to indicate that seeking in directories ++ * is not supported. ++ * ++ * @param buf the buffer passed to the readdir() operation ++ * @param name the file name of the directory entry ++ * @param stat file attributes, can be NULL ++ * @param off offset of the next entry or zero ++ * @param flags fill flags ++ * @return 1 if buffer is full, zero otherwise ++ */ ++typedef int (*fuse_fill_dir_t) (void *buf, const char *name, ++ const struct stat *stbuf, off_t off, ++ enum fuse_fill_dir_flags flags); ++/** ++ * Configuration of the high-level API ++ * ++ * This structure is initialized from the arguments passed to ++ * fuse_new(), and then passed to the file system's init() handler ++ * which should ensure that the configuration is compatible with the ++ * file system implementation. ++ */ ++struct fuse_config { ++ /** ++ * If `set_gid` is non-zero, the st_gid attribute of each file ++ * is overwritten with the value of `gid`. ++ */ ++ int set_gid; ++ unsigned int gid; ++ ++ /** ++ * If `set_uid` is non-zero, the st_uid attribute of each file ++ * is overwritten with the value of `uid`. ++ */ ++ int set_uid; ++ unsigned int uid; ++ ++ /** ++ * If `set_mode` is non-zero, the any permissions bits set in ++ * `umask` are unset in the st_mode attribute of each file. ++ */ ++ int set_mode; ++ unsigned int umask; ++ ++ /** ++ * The timeout in seconds for which name lookups will be ++ * cached. ++ */ ++ double entry_timeout; ++ ++ /** ++ * The timeout in seconds for which a negative lookup will be ++ * cached. This means, that if file did not exist (lookup ++ * retuned ENOENT), the lookup will only be redone after the ++ * timeout, and the file/directory will be assumed to not ++ * exist until then. A value of zero means that negative ++ * lookups are not cached. ++ */ ++ double negative_timeout; ++ ++ /** ++ * The timeout in seconds for which file/directory attributes ++ * (as returned by e.g. the `getattr` handler) are cached. ++ */ ++ double attr_timeout; ++ ++ /** ++ * Allow requests to be interrupted ++ */ ++ int intr; ++ ++ /** ++ * Specify which signal number to send to the filesystem when ++ * a request is interrupted. The default is hardcoded to ++ * USR1. ++ */ ++ int intr_signal; ++ ++ /** ++ * Normally, FUSE assigns inodes to paths only for as long as ++ * the kernel is aware of them. With this option inodes are ++ * instead remembered for at least this many seconds. This ++ * will require more memory, but may be necessary when using ++ * applications that make use of inode numbers. ++ * ++ * A number of -1 means that inodes will be remembered for the ++ * entire life-time of the file-system process. ++ */ ++ int remember; ++ ++ /** ++ * The default behavior is that if an open file is deleted, ++ * the file is renamed to a hidden file (.fuse_hiddenXXX), and ++ * only removed when the file is finally released. This ++ * relieves the filesystem implementation of having to deal ++ * with this problem. This option disables the hiding ++ * behavior, and files are removed immediately in an unlink ++ * operation (or in a rename operation which overwrites an ++ * existing file). ++ * ++ * It is recommended that you not use the hard_remove ++ * option. When hard_remove is set, the following libc ++ * functions fail on unlinked files (returning errno of ++ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), ++ * ftruncate(2), fstat(2), fchmod(2), fchown(2) ++ */ ++ int hard_remove; ++ ++ /** ++ * Honor the st_ino field in the functions getattr() and ++ * fill_dir(). This value is used to fill in the st_ino field ++ * in the stat(2), lstat(2), fstat(2) functions and the d_ino ++ * field in the readdir(2) function. The filesystem does not ++ * have to guarantee uniqueness, however some applications ++ * rely on this value being unique for the whole filesystem. ++ * ++ * Note that this does *not* affect the inode that libfuse ++ * and the kernel use internally (also called the "nodeid"). ++ */ ++ int use_ino; ++ ++ /** ++ * If use_ino option is not given, still try to fill in the ++ * d_ino field in readdir(2). If the name was previously ++ * looked up, and is still in the cache, the inode number ++ * found there will be used. Otherwise it will be set to -1. ++ * If use_ino option is given, this option is ignored. ++ */ ++ int readdir_ino; ++ ++ /** ++ * This option disables the use of page cache (file content cache) ++ * in the kernel for this filesystem. This has several affects: ++ * ++ * 1. Each read(2) or write(2) system call will initiate one ++ * or more read or write operations, data will not be ++ * cached in the kernel. ++ * ++ * 2. The return value of the read() and write() system calls ++ * will correspond to the return values of the read and ++ * write operations. This is useful for example if the ++ * file size is not known in advance (before reading it). ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `direct_io` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int direct_io; ++ ++ /** ++ * This option disables flushing the cache of the file ++ * contents on every open(2). This should only be enabled on ++ * filesystems where the file data is never changed ++ * externally (not through the mounted FUSE filesystem). Thus ++ * it is not suitable for network filesystems and other ++ * intermediate filesystems. ++ * ++ * NOTE: if this option is not specified (and neither ++ * direct_io) data is still cached after the open(2), so a ++ * read(2) system call will not always initiate a read ++ * operation. ++ * ++ * Internally, enabling this option causes fuse to set the ++ * `keep_cache` field of `struct fuse_file_info` - overwriting ++ * any value that was put there by the file system. ++ */ ++ int kernel_cache; ++ ++ /** ++ * This option is an alternative to `kernel_cache`. Instead of ++ * unconditionally keeping cached data, the cached data is ++ * invalidated on open(2) if if the modification time or the ++ * size of the file has changed since it was last opened. ++ */ ++ int auto_cache; ++ ++ /** ++ * The timeout in seconds for which file attributes are cached ++ * for the purpose of checking if auto_cache should flush the ++ * file data on open. ++ */ ++ int ac_attr_timeout_set; ++ double ac_attr_timeout; ++ ++ /** ++ * If this option is given the file-system handlers for the ++ * following operations will not receive path information: ++ * read, write, flush, release, fsync, readdir, releasedir, ++ * fsyncdir, lock, ioctl and poll. ++ * ++ * For the truncate, getattr, chmod, chown and utimens ++ * operations the path will be provided only if the struct ++ * fuse_file_info argument is NULL. ++ */ ++ int nullpath_ok; ++ ++ /** ++ * The remaining options are used by libfuse internally and ++ * should not be touched. ++ */ ++ int show_help; ++ char *modules; ++ int debug; ++}; ++ ++ ++/** ++ * The file system operations: ++ * ++ * Most of these should work very similarly to the well known UNIX ++ * file system operations. A major exception is that instead of ++ * returning an error in 'errno', the operation should return the ++ * negated error value (-errno) directly. ++ * ++ * All methods are optional, but some are essential for a useful ++ * filesystem (e.g. getattr). Open, flush, release, fsync, opendir, ++ * releasedir, fsyncdir, access, create, truncate, lock, init and ++ * destroy are special purpose methods, without which a full featured ++ * filesystem can still be implemented. ++ * ++ * In general, all methods are expected to perform any necessary ++ * permission checking. However, a filesystem may delegate this task ++ * to the kernel by passing the `default_permissions` mount option to ++ * `fuse_new()`. In this case, methods will only be called if ++ * the kernel's permission check has succeeded. ++ * ++ * Almost all operations take a path which can be of any length. ++ */ ++struct fuse_operations { ++ /** Get file attributes. ++ * ++ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are ++ * ignored. The 'st_ino' field is ignored except if the 'use_ino' ++ * mount option is given. In that case it is passed to userspace, ++ * but libfuse and the kernel will still assign a different ++ * inode for internal use (called the "nodeid"). ++ * ++ * `fi` will always be NULL if the file is not currently open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); ++ ++ /** Read the target of a symbolic link ++ * ++ * The buffer should be filled with a null terminated string. The ++ * buffer size argument includes the space for the terminating ++ * null character. If the linkname is too long to fit in the ++ * buffer, it should be truncated. The return value should be 0 ++ * for success. ++ */ ++ int (*readlink) (const char *, char *, size_t); ++ ++ /** Create a file node ++ * ++ * This is called for creation of all non-directory, non-symlink ++ * nodes. If the filesystem defines a create() method, then for ++ * regular files that will be called instead. ++ */ ++ int (*mknod) (const char *, mode_t, dev_t); ++ ++ /** Create a directory ++ * ++ * Note that the mode argument may not have the type specification ++ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the ++ * correct directory type bits use mode|S_IFDIR ++ * */ ++ int (*mkdir) (const char *, mode_t); ++ ++ /** Remove a file */ ++ int (*unlink) (const char *); ++ ++ /** Remove a directory */ ++ int (*rmdir) (const char *); ++ ++ /** Create a symbolic link */ ++ int (*symlink) (const char *, const char *); ++ ++ /** Rename a file ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ */ ++ int (*rename) (const char *, const char *, unsigned int flags); ++ ++ /** Create a hard link to a file */ ++ int (*link) (const char *, const char *); ++ ++ /** Change the permission bits of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ */ ++ int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); ++ ++ /** Change the owner and group of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); ++ ++ /** Change the size of a file ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*truncate) (const char *, off_t, struct fuse_file_info *fi); ++ ++ /** Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) ++ * should be used by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount option is ++ * given, this check is already done by the kernel before calling ++ * open() and may thus be omitted by the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open will also succeed without being send ++ * to the filesystem process. ++ * ++ */ ++ int (*open) (const char *, struct fuse_file_info *); ++ ++ /** Read data from an open file ++ * ++ * Read should return exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the ++ * 'direct_io' mount option is specified, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ */ ++ int (*read) (const char *, char *, size_t, off_t, ++ struct fuse_file_info *); ++ ++ /** Write data to an open file ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the 'direct_io' ++ * mount option is specified (see read operation). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write) (const char *, const char *, size_t, off_t, ++ struct fuse_file_info *); ++ ++ /** Get file system statistics ++ * ++ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored ++ */ ++ int (*statfs) (const char *, struct statvfs *); ++ ++ /** Possibly flush cached data ++ * ++ * BIG NOTE: This is not equivalent to fsync(). It's not a ++ * request to sync dirty data. ++ * ++ * Flush is called on each close() of a file descriptor, as opposed to ++ * release which is called on the close of the last file descriptor for ++ * a file. Under Linux, errors returned by flush() will be passed to ++ * userspace as errors from close(), so flush() is a good place to write ++ * back any cached dirty data. However, many applications ignore errors ++ * on close(), and on non-Linux systems, close() may succeed even if flush() ++ * returns an error. For these reasons, filesystems should not assume ++ * that errors returned by flush will ever be noticed or even ++ * delivered. ++ * ++ * NOTE: The flush() method may be called more than once for each ++ * open(). This happens if more than one file descriptor refers to an ++ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is ++ * not possible to determine if a flush is final, so each flush should ++ * be treated equally. Multiple write-flush sequences are relatively ++ * rare, so this shouldn't be a problem. ++ * ++ * Filesystems shouldn't assume that flush will be called at any ++ * particular point. It may be called more times than expected, or not ++ * at all. ++ * ++ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ int (*flush) (const char *, struct fuse_file_info *); ++ ++ /** Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open() call there will be exactly one release() call ++ * with the same flags and file handle. It is possible to ++ * have a file opened more than once, in which case only the last ++ * release will mean, that no more reads/writes will happen on the ++ * file. The return value of release is ignored. ++ */ ++ int (*release) (const char *, struct fuse_file_info *); ++ ++ /** Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ */ ++ int (*fsync) (const char *, int, struct fuse_file_info *); ++ ++ /** Set extended attributes */ ++ int (*setxattr) (const char *, const char *, const char *, size_t, int); ++ ++ /** Get extended attributes */ ++ int (*getxattr) (const char *, const char *, char *, size_t); ++ ++ /** List extended attributes */ ++ int (*listxattr) (const char *, char *, size_t); ++ ++ /** Remove extended attributes */ ++ int (*removexattr) (const char *, const char *); ++ ++ /** Open directory ++ * ++ * Unless the 'default_permissions' mount option is given, ++ * this method should check if opendir is permitted for this ++ * directory. Optionally opendir may also return an arbitrary ++ * filehandle in the fuse_file_info structure, which will be ++ * passed to readdir, releasedir and fsyncdir. ++ */ ++ int (*opendir) (const char *, struct fuse_file_info *); ++ ++ /** Read directory ++ * ++ * The filesystem may choose between two modes of operation: ++ * ++ * 1) The readdir implementation ignores the offset parameter, and ++ * passes zero to the filler function's offset. The filler ++ * function will not return '1' (unless an error happens), so the ++ * whole directory is read in a single readdir operation. ++ * ++ * 2) The readdir implementation keeps track of the offsets of the ++ * directory entries. It uses the offset parameter and always ++ * passes non-zero offset to the filler function. When the buffer ++ * is full (or an error happens) the filler function will return ++ * '1'. ++ */ ++ int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, ++ struct fuse_file_info *, enum fuse_readdir_flags); ++ ++ /** Release directory ++ */ ++ int (*releasedir) (const char *, struct fuse_file_info *); ++ ++ /** Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data ++ */ ++ int (*fsyncdir) (const char *, int, struct fuse_file_info *); ++ ++ /** ++ * Initialize filesystem ++ * ++ * The return value will passed in the `private_data` field of ++ * `struct fuse_context` to all file operations, and as a ++ * parameter to the destroy() method. It overrides the initial ++ * value provided to fuse_main() / fuse_new(). ++ */ ++ void *(*init) (struct fuse_conn_info *conn, ++ struct fuse_config *cfg); ++ ++ /** ++ * Clean up filesystem ++ * ++ * Called on filesystem exit. ++ */ ++ void (*destroy) (void *private_data); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() system call. If the ++ * 'default_permissions' mount option is given, this method is not ++ * called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ */ ++ int (*access) (const char *, int); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ */ ++ int (*create) (const char *, mode_t, struct fuse_file_info *); ++ ++ /** ++ * Perform POSIX file locking operation ++ * ++ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. ++ * ++ * For the meaning of fields in 'struct flock' see the man page ++ * for fcntl(2). The l_whence field will always be set to ++ * SEEK_SET. ++ * ++ * For checking lock ownership, the 'fuse_file_info->owner' ++ * argument must be used. ++ * ++ * For F_GETLK operation, the library will first check currently ++ * held locks, and if a conflicting lock is found it will return ++ * information without calling this method. This ensures, that ++ * for local locks the l_pid field is correctly filled in. The ++ * results may not be accurate in case of race conditions and in ++ * the presence of hard links, but it's unlikely that an ++ * application would rely on accurate GETLK results in these ++ * cases. If a conflicting lock is not found, this method will be ++ * called, and the filesystem may fill out l_pid by a meaningful ++ * value, or it may leave this field zero. ++ * ++ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid ++ * of the process performing the locking operation. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*lock) (const char *, struct fuse_file_info *, int cmd, ++ struct flock *); ++ ++ /** ++ * Change the access and modification times of a file with ++ * nanosecond resolution ++ * ++ * This supersedes the old utime() interface. New applications ++ * should use this. ++ * ++ * `fi` will always be NULL if the file is not currenlty open, but ++ * may also be NULL if the file is open. ++ * ++ * See the utimensat(2) man page for details. ++ */ ++ int (*utimens) (const char *, const struct timespec tv[2], ++ struct fuse_file_info *fi); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ */ ++ int (*bmap) (const char *, size_t blocksize, uint64_t *idx); ++ ++ /** ++ * Ioctl ++ * ++ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in ++ * 64bit environment. The size and direction of data is ++ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, ++ * data will be NULL, for _IOC_WRITE data is out area, for ++ * _IOC_READ in area and if both are set in/out area. In all ++ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. ++ * ++ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a ++ * directory file handle. ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ int (*ioctl) (const char *, unsigned int cmd, void *arg, ++ struct fuse_file_info *, unsigned int flags, void *data); ++ ++ /** ++ * Poll for IO readiness events ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ */ ++ int (*poll) (const char *, struct fuse_file_info *, ++ struct fuse_pollhandle *ph, unsigned *reventsp); ++ ++ /** Write contents of buffer to an open file ++ * ++ * Similar to the write() method, but data is supplied in a ++ * generic buffer. Use fuse_buf_copy() to transfer data to ++ * the destination. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ */ ++ int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *); ++ ++ /** Store data from an open file in a buffer ++ * ++ * Similar to the read() method, but data is stored and ++ * returned in a generic buffer. ++ * ++ * No actual copying of data has to take place, the source ++ * file descriptor may simply be stored in the buffer for ++ * later data transfer. ++ * ++ * The buffer must be allocated dynamically and stored at the ++ * location pointed to by bufp. If the buffer contains memory ++ * regions, they too must be allocated using malloc(). The ++ * allocated memory will be freed by the caller. ++ */ ++ int (*read_buf) (const char *, struct fuse_bufvec **bufp, ++ size_t size, off_t off, struct fuse_file_info *); ++ /** ++ * Perform BSD file locking operation ++ * ++ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN ++ * ++ * Nonblocking requests will be indicated by ORing LOCK_NB to ++ * the above operations ++ * ++ * For more information see the flock(2) manual page. ++ * ++ * Additionally fi->owner will be set to a value unique to ++ * this open file. This same value will be supplied to ++ * ->release() when the file is released. ++ * ++ * Note: if this method is not implemented, the kernel will still ++ * allow file locking to work locally. Hence it is only ++ * interesting for network filesystems and similar. ++ */ ++ int (*flock) (const char *, struct fuse_file_info *, int op); ++ ++ /** ++ * Allocates space for an open file ++ * ++ * This function ensures that required space is allocated for specified ++ * file. If this function returns success then any subsequent write ++ * request to specified range is guaranteed not to fail because of lack ++ * of space on the file system media. ++ */ ++ int (*fallocate) (const char *, int, off_t, off_t, ++ struct fuse_file_info *); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ */ ++ ssize_t (*copy_file_range) (const char *path_in, ++ struct fuse_file_info *fi_in, ++ off_t offset_in, const char *path_out, ++ struct fuse_file_info *fi_out, ++ off_t offset_out, size_t size, int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ */ ++ off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); ++}; ++ ++/** Extra context that may be needed by some filesystems ++ * ++ * The uid, gid and pid fields are not filled in case of a writepage ++ * operation. ++ */ ++struct fuse_context { ++ /** Pointer to the fuse object */ ++ struct fuse *fuse; ++ ++ /** User ID of the calling process */ ++ uid_t uid; ++ ++ /** Group ID of the calling process */ ++ gid_t gid; ++ ++ /** Process ID of the calling thread */ ++ pid_t pid; ++ ++ /** Private filesystem data */ ++ void *private_data; ++ ++ /** Umask of the calling process */ ++ mode_t umask; ++}; ++ ++/** ++ * Main function of FUSE. ++ * ++ * This is for the lazy. This is all that has to be called from the ++ * main() function. ++ * ++ * This function does the following: ++ * - parses command line options, and handles --help and ++ * --version ++ * - installs signal handlers for INT, HUP, TERM and PIPE ++ * - registers an exit handler to unmount the filesystem on program exit ++ * - creates a fuse handle ++ * - registers the operations ++ * - calls either the single-threaded or the multi-threaded event loop ++ * ++ * Most file systems will have to parse some file-system specific ++ * arguments before calling this function. It is recommended to do ++ * this with fuse_opt_parse() and a processing function that passes ++ * through any unknown options (this can also be achieved by just ++ * passing NULL as the processing function). That way, the remaining ++ * options can be passed directly to fuse_main(). ++ * ++ * fuse_main() accepts all options that can be passed to ++ * fuse_parse_cmdline(), fuse_new(), or fuse_session_new(). ++ * ++ * Option parsing skips argv[0], which is assumed to contain the ++ * program name. This element must always be present and is used to ++ * construct a basic ``usage: `` message for the --help ++ * output. argv[0] may also be set to the empty string. In this case ++ * the usage message is suppressed. This can be used by file systems ++ * to print their own usage line first. See hello.c for an example of ++ * how to do this. ++ * ++ * Note: this is currently implemented as a macro. ++ * ++ * The following error codes may be returned from fuse_main(): ++ * 1: Invalid option arguments ++ * 2: No mount point specified ++ * 3: FUSE setup failed ++ * 4: Mounting failed ++ * 5: Failed to daemonize (detach from session) ++ * 6: Failed to set up signal handlers ++ * 7: An error occured during the life of the file system ++ * ++ * @param argc the argument counter passed to the main() function ++ * @param argv the argument vector passed to the main() function ++ * @param op the file system operation ++ * @param private_data Initial value for the `private_data` ++ * field of `struct fuse_context`. May be overridden by the ++ * `struct fuse_operations.init` handler. ++ * @return 0 on success, nonzero on failure ++ * ++ * Example usage, see hello.c ++ */ ++/* ++ int fuse_main(int argc, char *argv[], const struct fuse_operations *op, ++ void *private_data); ++*/ ++#define fuse_main(argc, argv, op, private_data) \ ++ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) ++ ++/* ----------------------------------------------------------- * ++ * More detailed API * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Print available options (high- and low-level) to stdout. This is ++ * not an exhaustive list, but includes only those options that may be ++ * of interest to an end-user of a file system. ++ * ++ * The function looks at the argument vector only to determine if ++ * there are additional modules to be loaded (module=foo option), ++ * and attempts to call their help functions as well. ++ * ++ * @param args the argument vector. ++ */ ++void fuse_lib_help(struct fuse_args *args); ++ ++/** ++ * Create a new FUSE filesystem. ++ * ++ * This function accepts most file-system independent mount options ++ * (like context, nodev, ro - see mount(8)), as well as the ++ * FUSE-specific mount options from mount.fuse(8). ++ * ++ * If the --help option is specified, the function writes a help text ++ * to stdout and returns NULL. ++ * ++ * Option parsing skips argv[0], which is assumed to contain the ++ * program name. This element must always be present and is used to ++ * construct a basic ``usage: `` message for the --help output. If ++ * argv[0] is set to the empty string, no usage message is included in ++ * the --help output. ++ * ++ * If an unknown option is passed in, an error message is written to ++ * stderr and the function returns NULL. ++ * ++ * @param args argument vector ++ * @param op the filesystem operations ++ * @param op_size the size of the fuse_operations structure ++ * @param private_data Initial value for the `private_data` ++ * field of `struct fuse_context`. May be overridden by the ++ * `struct fuse_operations.init` handler. ++ * @return the created FUSE handle ++ */ ++#if FUSE_USE_VERSION == 30 ++struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++#define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) ++#else ++struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++#endif ++ ++/** ++ * Mount a FUSE file system. ++ * ++ * @param mountpoint the mount point path ++ * @param f the FUSE handle ++ * ++ * @return 0 on success, -1 on failure. ++ **/ ++int fuse_mount(struct fuse *f, const char *mountpoint); ++ ++/** ++ * Unmount a FUSE file system. ++ * ++ * See fuse_session_unmount() for additional information. ++ * ++ * @param f the FUSE handle ++ **/ ++void fuse_unmount(struct fuse *f); ++ ++/** ++ * Destroy the FUSE handle. ++ * ++ * NOTE: This function does not unmount the filesystem. If this is ++ * needed, call fuse_unmount() before calling this function. ++ * ++ * @param f the FUSE handle ++ */ ++void fuse_destroy(struct fuse *f); ++ ++/** ++ * FUSE event loop. ++ * ++ * Requests from the kernel are processed, and the appropriate ++ * operations are called. ++ * ++ * For a description of the return value and the conditions when the ++ * event loop exits, refer to the documentation of ++ * fuse_session_loop(). ++ * ++ * @param f the FUSE handle ++ * @return see fuse_session_loop() ++ * ++ * See also: fuse_loop_mt() ++ */ ++int fuse_loop(struct fuse *f); ++ ++/** ++ * Flag session as terminated ++ * ++ * This function will cause any running event loops to exit on ++ * the next opportunity. ++ * ++ * @param f the FUSE handle ++ */ ++void fuse_exit(struct fuse *f); ++ ++/** ++ * FUSE event loop with multiple threads ++ * ++ * Requests from the kernel are processed, and the appropriate ++ * operations are called. Request are processed in parallel by ++ * distributing them between multiple threads. ++ * ++ * For a description of the return value and the conditions when the ++ * event loop exits, refer to the documentation of ++ * fuse_session_loop(). ++ * ++ * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in ++ * single-threaded mode, and that you will not have to worry about reentrancy, ++ * though you will have to worry about recursive lookups. In single-threaded ++ * mode, FUSE will wait for one callback to return before calling another. ++ * ++ * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make ++ * multiple simultaneous calls into the various callback functions given by your ++ * fuse_operations record. ++ * ++ * If you are using multiple threads, you can enjoy all the parallel execution ++ * and interactive response benefits of threads, and you get to enjoy all the ++ * benefits of race conditions and locking bugs, too. Ensure that any code used ++ * in the callback function of fuse_operations is also thread-safe. ++ * ++ * @param f the FUSE handle ++ * @param config loop configuration ++ * @return see fuse_session_loop() ++ * ++ * See also: fuse_loop() ++ */ ++#if FUSE_USE_VERSION < 32 ++int fuse_loop_mt_31(struct fuse *f, int clone_fd); ++#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) ++#else ++int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); ++#endif ++ ++/** ++ * Get the current context ++ * ++ * The context is only valid for the duration of a filesystem ++ * operation, and thus must not be stored and used later. ++ * ++ * @return the context ++ */ ++struct fuse_context *fuse_get_context(void); ++ ++/** ++ * Get the current supplementary group IDs for the current request ++ * ++ * Similar to the getgroups(2) system call, except the return value is ++ * always the total number of group IDs, even if it is larger than the ++ * specified size. ++ * ++ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass ++ * the group list to userspace, hence this function needs to parse ++ * "/proc/$TID/task/$TID/status" to get the group IDs. ++ * ++ * This feature may not be supported on all operating systems. In ++ * such a case this function will return -ENOSYS. ++ * ++ * @param size size of given array ++ * @param list array of group IDs to be filled in ++ * @return the total number of supplementary group IDs or -errno on failure ++ */ ++int fuse_getgroups(int size, gid_t list[]); ++ ++/** ++ * Check if the current request has already been interrupted ++ * ++ * @return 1 if the request has been interrupted, 0 otherwise ++ */ ++int fuse_interrupted(void); ++ ++/** ++ * Invalidates cache for the given path. ++ * ++ * This calls fuse_lowlevel_notify_inval_inode internally. ++ * ++ * @return 0 on successful invalidation, negative error value otherwise. ++ * This routine may return -ENOENT to indicate that there was ++ * no entry to be invalidated, e.g., because the path has not ++ * been seen before or has been forgotten; this should not be ++ * considered to be an error. ++ */ ++int fuse_invalidate_path(struct fuse *f, const char *path); ++ ++/** ++ * The real main function ++ * ++ * Do not call this directly, use fuse_main() ++ */ ++int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++ ++/** ++ * Start the cleanup thread when using option "remember". ++ * ++ * This is done automatically by fuse_loop_mt() ++ * @param fuse struct fuse pointer for fuse instance ++ * @return 0 on success and -1 on error ++ */ ++int fuse_start_cleanup_thread(struct fuse *fuse); ++ ++/** ++ * Stop the cleanup thread when using option "remember". ++ * ++ * This is done automatically by fuse_loop_mt() ++ * @param fuse struct fuse pointer for fuse instance ++ */ ++void fuse_stop_cleanup_thread(struct fuse *fuse); ++ ++/** ++ * Iterate over cache removing stale entries ++ * use in conjunction with "-oremember" ++ * ++ * NOTE: This is already done for the standard sessions ++ * ++ * @param fuse struct fuse pointer for fuse instance ++ * @return the number of seconds until the next cleanup ++ */ ++int fuse_clean_cache(struct fuse *fuse); ++ ++/* ++ * Stacking API ++ */ ++ ++/** ++ * Fuse filesystem object ++ * ++ * This is opaque object represents a filesystem layer ++ */ ++struct fuse_fs; ++ ++/* ++ * These functions call the relevant filesystem operation, and return ++ * the result. ++ * ++ * If the operation is not defined, they return -ENOSYS, with the ++ * exception of fuse_fs_open, fuse_fs_release, fuse_fs_opendir, ++ * fuse_fs_releasedir and fuse_fs_statfs, which return 0. ++ */ ++ ++int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, ++ struct fuse_file_info *fi); ++int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, ++ const char *newpath, unsigned int flags); ++int fuse_fs_unlink(struct fuse_fs *fs, const char *path); ++int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); ++int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, ++ const char *path); ++int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); ++int fuse_fs_release(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_open(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, ++ off_t off, struct fuse_file_info *fi); ++int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, ++ struct fuse_bufvec **bufp, size_t size, off_t off, ++ struct fuse_file_info *fi); ++int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, ++ size_t size, off_t off, struct fuse_file_info *fi); ++int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, ++ struct fuse_bufvec *buf, off_t off, ++ struct fuse_file_info *fi); ++int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, ++ struct fuse_file_info *fi); ++int fuse_fs_flush(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); ++int fuse_fs_opendir(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, ++ fuse_fill_dir_t filler, off_t off, ++ struct fuse_file_info *fi, enum fuse_readdir_flags flags); ++int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, ++ struct fuse_file_info *fi); ++int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi); ++int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, ++ struct fuse_file_info *fi); ++int fuse_fs_lock(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi, int cmd, struct flock *lock); ++int fuse_fs_flock(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi, int op); ++int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, ++ struct fuse_file_info *fi); ++int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, ++ struct fuse_file_info *fi); ++int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, ++ struct fuse_file_info *fi); ++int fuse_fs_utimens(struct fuse_fs *fs, const char *path, ++ const struct timespec tv[2], struct fuse_file_info *fi); ++int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); ++int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, ++ size_t len); ++int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, ++ dev_t rdev); ++int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); ++int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, ++ const char *value, size_t size, int flags); ++int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, ++ char *value, size_t size); ++int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, ++ size_t size); ++int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, ++ const char *name); ++int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, ++ uint64_t *idx); ++int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, ++ void *arg, struct fuse_file_info *fi, unsigned int flags, ++ void *data); ++int fuse_fs_poll(struct fuse_fs *fs, const char *path, ++ struct fuse_file_info *fi, struct fuse_pollhandle *ph, ++ unsigned *reventsp); ++int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, ++ off_t offset, off_t length, struct fuse_file_info *fi); ++ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, ++ struct fuse_file_info *fi_in, off_t off_in, ++ const char *path_out, ++ struct fuse_file_info *fi_out, off_t off_out, ++ size_t len, int flags); ++off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, ++ struct fuse_file_info *fi); ++void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, ++ struct fuse_config *cfg); ++void fuse_fs_destroy(struct fuse_fs *fs); ++ ++int fuse_notify_poll(struct fuse_pollhandle *ph); ++ ++/** ++ * Create a new fuse filesystem object ++ * ++ * This is usually called from the factory of a fuse module to create ++ * a new instance of a filesystem. ++ * ++ * @param op the filesystem operations ++ * @param op_size the size of the fuse_operations structure ++ * @param private_data Initial value for the `private_data` ++ * field of `struct fuse_context`. May be overridden by the ++ * `struct fuse_operations.init` handler. ++ * @return a new filesystem object ++ */ ++struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, ++ void *private_data); ++ ++/** ++ * Factory for creating filesystem objects ++ * ++ * The function may use and remove options from 'args' that belong ++ * to this module. ++ * ++ * For now the 'fs' vector always contains exactly one filesystem. ++ * This is the filesystem which will be below the newly created ++ * filesystem in the stack. ++ * ++ * @param args the command line arguments ++ * @param fs NULL terminated filesystem object vector ++ * @return the new filesystem object ++ */ ++typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, ++ struct fuse_fs *fs[]); ++/** ++ * Register filesystem module ++ * ++ * If the "-omodules=*name*_:..." option is present, filesystem ++ * objects are created and pushed onto the stack with the *factory_* ++ * function. ++ * ++ * @param name_ the name of this filesystem module ++ * @param factory_ the factory function for this filesystem module ++ */ ++#define FUSE_REGISTER_MODULE(name_, factory_) \ ++ fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ ++ ++/** Get session from fuse object */ ++struct fuse_session *fuse_get_session(struct fuse *f); ++ ++/** ++ * Open a FUSE file descriptor and set up the mount for the given ++ * mountpoint and flags. ++ * ++ * @param mountpoint reference to the mount in the file system ++ * @param options mount options ++ * @return the FUSE file descriptor or -1 upon error ++ */ ++int fuse_open_channel(const char *mountpoint, const char *options); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_H_ */ +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +new file mode 100644 +index 0000000..2d686b2 +--- /dev/null ++++ b/tools/virtiofsd/fuse_common.h +@@ -0,0 +1,823 @@ ++/* FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++/** @file */ ++ ++#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) ++#error "Never include directly; use or instead." ++#endif ++ ++#ifndef FUSE_COMMON_H_ ++#define FUSE_COMMON_H_ ++ ++#include "fuse_opt.h" ++#include "fuse_log.h" ++#include ++#include ++ ++/** Major version of FUSE library interface */ ++#define FUSE_MAJOR_VERSION 3 ++ ++/** Minor version of FUSE library interface */ ++#define FUSE_MINOR_VERSION 2 ++ ++#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) ++#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * Information about an open file. ++ * ++ * File Handles are created by the open, opendir, and create methods and closed ++ * by the release and releasedir methods. Multiple file handles may be ++ * concurrently open for the same file. Generally, a client will create one ++ * file handle per file descriptor, though in some cases multiple file ++ * descriptors can share a single file handle. ++ */ ++struct fuse_file_info { ++ /** Open flags. Available in open() and release() */ ++ int flags; ++ ++ /** In case of a write operation indicates if this was caused ++ by a delayed write from the page cache. If so, then the ++ context's pid, uid, and gid fields will not be valid, and ++ the *fh* value may not match the *fh* value that would ++ have been sent with the corresponding individual write ++ requests if write caching had been disabled. */ ++ unsigned int writepage : 1; ++ ++ /** Can be filled in by open, to use direct I/O on this file. */ ++ unsigned int direct_io : 1; ++ ++ /** Can be filled in by open. It signals the kernel that any ++ currently cached file data (ie., data that the filesystem ++ provided the last time the file was open) need not be ++ invalidated. Has no effect when set in other contexts (in ++ particular it does nothing when set by opendir()). */ ++ unsigned int keep_cache : 1; ++ ++ /** Indicates a flush operation. Set in flush operation, also ++ maybe set in highlevel lock operation and lowlevel release ++ operation. */ ++ unsigned int flush : 1; ++ ++ /** Can be filled in by open, to indicate that the file is not ++ seekable. */ ++ unsigned int nonseekable : 1; ++ ++ /* Indicates that flock locks for this file should be ++ released. If set, lock_owner shall contain a valid value. ++ May only be set in ->release(). */ ++ unsigned int flock_release : 1; ++ ++ /** Can be filled in by opendir. It signals the kernel to ++ enable caching of entries returned by readdir(). Has no ++ effect when set in other contexts (in particular it does ++ nothing when set by open()). */ ++ unsigned int cache_readdir : 1; ++ ++ /** Padding. Reserved for future use*/ ++ unsigned int padding : 25; ++ unsigned int padding2 : 32; ++ ++ /** File handle id. May be filled in by filesystem in create, ++ * open, and opendir(). Available in most other file operations on the ++ * same file handle. */ ++ uint64_t fh; ++ ++ /** Lock owner id. Available in locking operations and flush */ ++ uint64_t lock_owner; ++ ++ /** Requested poll events. Available in ->poll. Only set on kernels ++ which support it. If unsupported, this field is set to zero. */ ++ uint32_t poll_events; ++}; ++ ++/** ++ * Configuration parameters passed to fuse_session_loop_mt() and ++ * fuse_loop_mt(). ++ */ ++struct fuse_loop_config { ++ /** ++ * whether to use separate device fds for each thread ++ * (may increase performance) ++ */ ++ int clone_fd; ++ ++ /** ++ * The maximum number of available worker threads before they ++ * start to get deleted when they become idle. If not ++ * specified, the default is 10. ++ * ++ * Adjusting this has performance implications; a very small number ++ * of threads in the pool will cause a lot of thread creation and ++ * deletion overhead and performance may suffer. When set to 0, a new ++ * thread will be created to service every operation. ++ */ ++ unsigned int max_idle_threads; ++}; ++ ++/************************************************************************** ++ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * ++ **************************************************************************/ ++ ++/** ++ * Indicates that the filesystem supports asynchronous read requests. ++ * ++ * If this capability is not requested/available, the kernel will ++ * ensure that there is at most one pending read request per ++ * file-handle at any time, and will attempt to order read requests by ++ * increasing offset. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_ASYNC_READ (1 << 0) ++ ++/** ++ * Indicates that the filesystem supports "remote" locking. ++ * ++ * This feature is enabled by default when supported by the kernel, ++ * and if getlk() and setlk() handlers are implemented. ++ */ ++#define FUSE_CAP_POSIX_LOCKS (1 << 1) ++ ++/** ++ * Indicates that the filesystem supports the O_TRUNC open flag. If ++ * disabled, and an application specifies O_TRUNC, fuse first calls ++ * truncate() and then open() with O_TRUNC filtered out. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) ++ ++/** ++ * Indicates that the filesystem supports lookups of "." and "..". ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) ++ ++/** ++ * Indicates that the kernel should not apply the umask to the ++ * file mode on create operations. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_DONT_MASK (1 << 6) ++ ++/** ++ * Indicates that libfuse should try to use splice() when writing to ++ * the fuse device. This may improve performance. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_SPLICE_WRITE (1 << 7) ++ ++/** ++ * Indicates that libfuse should try to move pages instead of copying when ++ * writing to / reading from the fuse device. This may improve performance. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_SPLICE_MOVE (1 << 8) ++ ++/** ++ * Indicates that libfuse should try to use splice() when reading from ++ * the fuse device. This may improve performance. ++ * ++ * This feature is enabled by default when supported by the kernel and ++ * if the filesystem implements a write_buf() handler. ++ */ ++#define FUSE_CAP_SPLICE_READ (1 << 9) ++ ++/** ++ * If set, the calls to flock(2) will be emulated using POSIX locks and must ++ * then be handled by the filesystem's setlock() handler. ++ * ++ * If not set, flock(2) calls will be handled by the FUSE kernel module ++ * internally (so any access that does not go through the kernel cannot be taken ++ * into account). ++ * ++ * This feature is enabled by default when supported by the kernel and ++ * if the filesystem implements a flock() handler. ++ */ ++#define FUSE_CAP_FLOCK_LOCKS (1 << 10) ++ ++/** ++ * Indicates that the filesystem supports ioctl's on directories. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_IOCTL_DIR (1 << 11) ++ ++/** ++ * Traditionally, while a file is open the FUSE kernel module only ++ * asks the filesystem for an update of the file's attributes when a ++ * client attempts to read beyond EOF. This is unsuitable for ++ * e.g. network filesystems, where the file contents may change ++ * without the kernel knowing about it. ++ * ++ * If this flag is set, FUSE will check the validity of the attributes ++ * on every read. If the attributes are no longer valid (i.e., if the ++ * *attr_timeout* passed to fuse_reply_attr() or set in `struct ++ * fuse_entry_param` has passed), it will first issue a `getattr` ++ * request. If the new mtime differs from the previous value, any ++ * cached file *contents* will be invalidated as well. ++ * ++ * This flag should always be set when available. If all file changes ++ * go through the kernel, *attr_timeout* should be set to a very large ++ * number to avoid unnecessary getattr() calls. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) ++ ++/** ++ * Indicates that the filesystem supports readdirplus. ++ * ++ * This feature is enabled by default when supported by the kernel and if the ++ * filesystem implements a readdirplus() handler. ++ */ ++#define FUSE_CAP_READDIRPLUS (1 << 13) ++ ++/** ++ * Indicates that the filesystem supports adaptive readdirplus. ++ * ++ * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect. ++ * ++ * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel ++ * will always issue readdirplus() requests to retrieve directory ++ * contents. ++ * ++ * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel ++ * will issue both readdir() and readdirplus() requests, depending on ++ * how much information is expected to be required. ++ * ++ * As of Linux 4.20, the algorithm is as follows: when userspace ++ * starts to read directory entries, issue a READDIRPLUS request to ++ * the filesystem. If any entry attributes have been looked up by the ++ * time userspace requests the next batch of entries continue with ++ * READDIRPLUS, otherwise switch to plain READDIR. This will reasult ++ * in eg plain "ls" triggering READDIRPLUS first then READDIR after ++ * that because it doesn't do lookups. "ls -l" should result in all ++ * READDIRPLUS, except if dentries are already cached. ++ * ++ * This feature is enabled by default when supported by the kernel and ++ * if the filesystem implements both a readdirplus() and a readdir() ++ * handler. ++ */ ++#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) ++ ++/** ++ * Indicates that the filesystem supports asynchronous direct I/O submission. ++ * ++ * If this capability is not requested/available, the kernel will ensure that ++ * there is at most one pending read and one pending write request per direct ++ * I/O file-handle at any time. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_ASYNC_DIO (1 << 15) ++ ++/** ++ * Indicates that writeback caching should be enabled. This means that ++ * individual write request may be buffered and merged in the kernel ++ * before they are send to the filesystem. ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) ++ ++/** ++ * Indicates support for zero-message opens. If this flag is set in ++ * the `capable` field of the `fuse_conn_info` structure, then the ++ * filesystem may return `ENOSYS` from the open() handler to indicate ++ * success. Further attempts to open files will be handled in the ++ * kernel. (If this flag is not set, returning ENOSYS will be treated ++ * as an error and signaled to the caller). ++ * ++ * Setting (or unsetting) this flag in the `want` field has *no ++ * effect*. ++ */ ++#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) ++ ++/** ++ * Indicates support for parallel directory operations. If this flag ++ * is unset, the FUSE kernel module will ensure that lookup() and ++ * readdir() requests are never issued concurrently for the same ++ * directory. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) ++ ++/** ++ * Indicates support for POSIX ACLs. ++ * ++ * If this feature is enabled, the kernel will cache and have ++ * responsibility for enforcing ACLs. ACL will be stored as xattrs and ++ * passed to userspace, which is responsible for updating the ACLs in ++ * the filesystem, keeping the file mode in sync with the ACL, and ++ * ensuring inheritance of default ACLs when new filesystem nodes are ++ * created. Note that this requires that the file system is able to ++ * parse and interpret the xattr representation of ACLs. ++ * ++ * Enabling this feature implicitly turns on the ++ * ``default_permissions`` mount option (even if it was not passed to ++ * mount(2)). ++ * ++ * This feature is disabled by default. ++ */ ++#define FUSE_CAP_POSIX_ACL (1 << 19) ++ ++/** ++ * Indicates that the filesystem is responsible for unsetting ++ * setuid and setgid bits when a file is written, truncated, or ++ * its owner is changed. ++ * ++ * This feature is enabled by default when supported by the kernel. ++ */ ++#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) ++ ++/** ++ * Indicates support for zero-message opendirs. If this flag is set in ++ * the `capable` field of the `fuse_conn_info` structure, then the filesystem ++ * may return `ENOSYS` from the opendir() handler to indicate success. Further ++ * opendir and releasedir messages will be handled in the kernel. (If this ++ * flag is not set, returning ENOSYS will be treated as an error and signalled ++ * to the caller.) ++ * ++ * Setting (or unsetting) this flag in the `want` field has *no effect*. ++ */ ++#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) ++ ++/** ++ * Ioctl flags ++ * ++ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine ++ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed ++ * FUSE_IOCTL_RETRY: retry with new iovecs ++ * FUSE_IOCTL_DIR: is a directory ++ * ++ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs ++ */ ++#define FUSE_IOCTL_COMPAT (1 << 0) ++#define FUSE_IOCTL_UNRESTRICTED (1 << 1) ++#define FUSE_IOCTL_RETRY (1 << 2) ++#define FUSE_IOCTL_DIR (1 << 4) ++ ++#define FUSE_IOCTL_MAX_IOV 256 ++ ++/** ++ * Connection information, passed to the ->init() method ++ * ++ * Some of the elements are read-write, these can be changed to ++ * indicate the value requested by the filesystem. The requested ++ * value must usually be smaller than the indicated value. ++ */ ++struct fuse_conn_info { ++ /** ++ * Major version of the protocol (read-only) ++ */ ++ unsigned proto_major; ++ ++ /** ++ * Minor version of the protocol (read-only) ++ */ ++ unsigned proto_minor; ++ ++ /** ++ * Maximum size of the write buffer ++ */ ++ unsigned max_write; ++ ++ /** ++ * Maximum size of read requests. A value of zero indicates no ++ * limit. However, even if the filesystem does not specify a ++ * limit, the maximum size of read requests will still be ++ * limited by the kernel. ++ * ++ * NOTE: For the time being, the maximum size of read requests ++ * must be set both here *and* passed to fuse_session_new() ++ * using the ``-o max_read=`` mount option. At some point ++ * in the future, specifying the mount option will no longer ++ * be necessary. ++ */ ++ unsigned max_read; ++ ++ /** ++ * Maximum readahead ++ */ ++ unsigned max_readahead; ++ ++ /** ++ * Capability flags that the kernel supports (read-only) ++ */ ++ unsigned capable; ++ ++ /** ++ * Capability flags that the filesystem wants to enable. ++ * ++ * libfuse attempts to initialize this field with ++ * reasonable default values before calling the init() handler. ++ */ ++ unsigned want; ++ ++ /** ++ * Maximum number of pending "background" requests. A ++ * background request is any type of request for which the ++ * total number is not limited by other means. As of kernel ++ * 4.8, only two types of requests fall into this category: ++ * ++ * 1. Read-ahead requests ++ * 2. Asynchronous direct I/O requests ++ * ++ * Read-ahead requests are generated (if max_readahead is ++ * non-zero) by the kernel to preemptively fill its caches ++ * when it anticipates that userspace will soon read more ++ * data. ++ * ++ * Asynchronous direct I/O requests are generated if ++ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large ++ * direct I/O request. In this case the kernel will internally ++ * split it up into multiple smaller requests and submit them ++ * to the filesystem concurrently. ++ * ++ * Note that the following requests are *not* background ++ * requests: writeback requests (limited by the kernel's ++ * flusher algorithm), regular (i.e., synchronous and ++ * buffered) userspace read/write requests (limited to one per ++ * thread), asynchronous read requests (Linux's io_submit(2) ++ * call actually blocks, so these are also limited to one per ++ * thread). ++ */ ++ unsigned max_background; ++ ++ /** ++ * Kernel congestion threshold parameter. If the number of pending ++ * background requests exceeds this number, the FUSE kernel module will ++ * mark the filesystem as "congested". This instructs the kernel to ++ * expect that queued requests will take some time to complete, and to ++ * adjust its algorithms accordingly (e.g. by putting a waiting thread ++ * to sleep instead of using a busy-loop). ++ */ ++ unsigned congestion_threshold; ++ ++ /** ++ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible ++ * for updating mtime and ctime when write requests are received. The ++ * updated values are passed to the filesystem with setattr() requests. ++ * However, if the filesystem does not support the full resolution of ++ * the kernel timestamps (nanoseconds), the mtime and ctime values used ++ * by kernel and filesystem will differ (and result in an apparent ++ * change of times after a cache flush). ++ * ++ * To prevent this problem, this variable can be used to inform the ++ * kernel about the timestamp granularity supported by the file-system. ++ * The value should be power of 10. The default is 1, i.e. full ++ * nano-second resolution. Filesystems supporting only second resolution ++ * should set this to 1000000000. ++ */ ++ unsigned time_gran; ++ ++ /** ++ * For future use. ++ */ ++ unsigned reserved[22]; ++}; ++ ++struct fuse_session; ++struct fuse_pollhandle; ++struct fuse_conn_info_opts; ++ ++/** ++ * This function parses several command-line options that can be used ++ * to override elements of struct fuse_conn_info. The pointer returned ++ * by this function should be passed to the ++ * fuse_apply_conn_info_opts() method by the file system's init() ++ * handler. ++ * ++ * Before using this function, think twice if you really want these ++ * parameters to be adjustable from the command line. In most cases, ++ * they should be determined by the file system internally. ++ * ++ * The following options are recognized: ++ * ++ * -o max_write=N sets conn->max_write ++ * -o max_readahead=N sets conn->max_readahead ++ * -o max_background=N sets conn->max_background ++ * -o congestion_threshold=N sets conn->congestion_threshold ++ * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want ++ * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want ++ * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want ++ * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock ++ * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want ++ * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want ++ * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want ++ * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want ++ * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want ++ * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want ++ * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want ++ * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets ++ * FUSE_CAP_READDIRPLUS_AUTO in conn->want ++ * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and ++ * FUSE_CAP_READDIRPLUS_AUTO in conn->want ++ * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want ++ * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want ++ * -o time_gran=N sets conn->time_gran ++ * ++ * Known options will be removed from *args*, unknown options will be ++ * passed through unchanged. ++ * ++ * @param args argument vector (input+output) ++ * @return parsed options ++ **/ ++struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); ++ ++/** ++ * This function applies the (parsed) parameters in *opts* to the ++ * *conn* pointer. It may modify the following fields: wants, ++ * max_write, max_readahead, congestion_threshold, max_background, ++ * time_gran. A field is only set (or unset) if the corresponding ++ * option has been explicitly set. ++ */ ++void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, ++ struct fuse_conn_info *conn); ++ ++/** ++ * Go into the background ++ * ++ * @param foreground if true, stay in the foreground ++ * @return 0 on success, -1 on failure ++ */ ++int fuse_daemonize(int foreground); ++ ++/** ++ * Get the version of the library ++ * ++ * @return the version ++ */ ++int fuse_version(void); ++ ++/** ++ * Get the full package version string of the library ++ * ++ * @return the package version ++ */ ++const char *fuse_pkgversion(void); ++ ++/** ++ * Destroy poll handle ++ * ++ * @param ph the poll handle ++ */ ++void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); ++ ++/* ----------------------------------------------------------- * ++ * Data buffer * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Buffer flags ++ */ ++enum fuse_buf_flags { ++ /** ++ * Buffer contains a file descriptor ++ * ++ * If this flag is set, the .fd field is valid, otherwise the ++ * .mem fields is valid. ++ */ ++ FUSE_BUF_IS_FD = (1 << 1), ++ ++ /** ++ * Seek on the file descriptor ++ * ++ * If this flag is set then the .pos field is valid and is ++ * used to seek to the given offset before performing ++ * operation on file descriptor. ++ */ ++ FUSE_BUF_FD_SEEK = (1 << 2), ++ ++ /** ++ * Retry operation on file descriptor ++ * ++ * If this flag is set then retry operation on file descriptor ++ * until .size bytes have been copied or an error or EOF is ++ * detected. ++ */ ++ FUSE_BUF_FD_RETRY = (1 << 3), ++}; ++ ++/** ++ * Buffer copy flags ++ */ ++enum fuse_buf_copy_flags { ++ /** ++ * Don't use splice(2) ++ * ++ * Always fall back to using read and write instead of ++ * splice(2) to copy data from one file descriptor to another. ++ * ++ * If this flag is not set, then only fall back if splice is ++ * unavailable. ++ */ ++ FUSE_BUF_NO_SPLICE = (1 << 1), ++ ++ /** ++ * Force splice ++ * ++ * Always use splice(2) to copy data from one file descriptor ++ * to another. If splice is not available, return -EINVAL. ++ */ ++ FUSE_BUF_FORCE_SPLICE = (1 << 2), ++ ++ /** ++ * Try to move data with splice. ++ * ++ * If splice is used, try to move pages from the source to the ++ * destination instead of copying. See documentation of ++ * SPLICE_F_MOVE in splice(2) man page. ++ */ ++ FUSE_BUF_SPLICE_MOVE = (1 << 3), ++ ++ /** ++ * Don't block on the pipe when copying data with splice ++ * ++ * Makes the operations on the pipe non-blocking (if the pipe ++ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) ++ * man page. ++ */ ++ FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), ++}; ++ ++/** ++ * Single data buffer ++ * ++ * Generic data buffer for I/O, extended attributes, etc... Data may ++ * be supplied as a memory pointer or as a file descriptor ++ */ ++struct fuse_buf { ++ /** ++ * Size of data in bytes ++ */ ++ size_t size; ++ ++ /** ++ * Buffer flags ++ */ ++ enum fuse_buf_flags flags; ++ ++ /** ++ * Memory pointer ++ * ++ * Used unless FUSE_BUF_IS_FD flag is set. ++ */ ++ void *mem; ++ ++ /** ++ * File descriptor ++ * ++ * Used if FUSE_BUF_IS_FD flag is set. ++ */ ++ int fd; ++ ++ /** ++ * File position ++ * ++ * Used if FUSE_BUF_FD_SEEK flag is set. ++ */ ++ off_t pos; ++}; ++ ++/** ++ * Data buffer vector ++ * ++ * An array of data buffers, each containing a memory pointer or a ++ * file descriptor. ++ * ++ * Allocate dynamically to add more than one buffer. ++ */ ++struct fuse_bufvec { ++ /** ++ * Number of buffers in the array ++ */ ++ size_t count; ++ ++ /** ++ * Index of current buffer within the array ++ */ ++ size_t idx; ++ ++ /** ++ * Current offset within the current buffer ++ */ ++ size_t off; ++ ++ /** ++ * Array of buffers ++ */ ++ struct fuse_buf buf[1]; ++}; ++ ++/* Initialize bufvec with a single buffer of given size */ ++#define FUSE_BUFVEC_INIT(size__) \ ++ ((struct fuse_bufvec) { \ ++ /* .count= */ 1, \ ++ /* .idx = */ 0, \ ++ /* .off = */ 0, \ ++ /* .buf = */ { /* [0] = */ { \ ++ /* .size = */ (size__), \ ++ /* .flags = */ (enum fuse_buf_flags) 0, \ ++ /* .mem = */ NULL, \ ++ /* .fd = */ -1, \ ++ /* .pos = */ 0, \ ++ } } \ ++ } ) ++ ++/** ++ * Get total size of data in a fuse buffer vector ++ * ++ * @param bufv buffer vector ++ * @return size of data ++ */ ++size_t fuse_buf_size(const struct fuse_bufvec *bufv); ++ ++/** ++ * Copy data from one buffer vector to another ++ * ++ * @param dst destination buffer vector ++ * @param src source buffer vector ++ * @param flags flags controlling the copy ++ * @return actual number of bytes copied or -errno on error ++ */ ++ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, ++ enum fuse_buf_copy_flags flags); ++ ++/* ----------------------------------------------------------- * ++ * Signal handling * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Exit session on HUP, TERM and INT signals and ignore PIPE signal ++ * ++ * Stores session in a global variable. May only be called once per ++ * process until fuse_remove_signal_handlers() is called. ++ * ++ * Once either of the POSIX signals arrives, the signal handler calls ++ * fuse_session_exit(). ++ * ++ * @param se the session to exit ++ * @return 0 on success, -1 on failure ++ * ++ * See also: ++ * fuse_remove_signal_handlers() ++ */ ++int fuse_set_signal_handlers(struct fuse_session *se); ++ ++/** ++ * Restore default signal handlers ++ * ++ * Resets global session. After this fuse_set_signal_handlers() may ++ * be called again. ++ * ++ * @param se the same session as given in fuse_set_signal_handlers() ++ * ++ * See also: ++ * fuse_set_signal_handlers() ++ */ ++void fuse_remove_signal_handlers(struct fuse_session *se); ++ ++/* ----------------------------------------------------------- * ++ * Compatibility stuff * ++ * ----------------------------------------------------------- */ ++ ++#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 ++# error only API version 30 or greater is supported ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++ ++ ++/* ++ * This interface uses 64 bit off_t. ++ * ++ * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! ++ */ ++ ++#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus ++_Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); ++#else ++struct _fuse_off_t_must_be_64bit_dummy_struct \ ++ { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; ++#endif ++ ++#endif /* FUSE_COMMON_H_ */ +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +new file mode 100644 +index 0000000..d38b630 +--- /dev/null ++++ b/tools/virtiofsd/fuse_i.h +@@ -0,0 +1,139 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include "fuse.h" ++#include "fuse_lowlevel.h" ++ ++struct mount_opts; ++ ++struct fuse_req { ++ struct fuse_session *se; ++ uint64_t unique; ++ int ctr; ++ pthread_mutex_t lock; ++ struct fuse_ctx ctx; ++ struct fuse_chan *ch; ++ int interrupted; ++ unsigned int ioctl_64bit : 1; ++ union { ++ struct { ++ uint64_t unique; ++ } i; ++ struct { ++ fuse_interrupt_func_t func; ++ void *data; ++ } ni; ++ } u; ++ struct fuse_req *next; ++ struct fuse_req *prev; ++}; ++ ++struct fuse_notify_req { ++ uint64_t unique; ++ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, ++ const void *, const struct fuse_buf *); ++ struct fuse_notify_req *next; ++ struct fuse_notify_req *prev; ++}; ++ ++struct fuse_session { ++ char *mountpoint; ++ volatile int exited; ++ int fd; ++ struct mount_opts *mo; ++ int debug; ++ int deny_others; ++ struct fuse_lowlevel_ops op; ++ int got_init; ++ struct cuse_data *cuse_data; ++ void *userdata; ++ uid_t owner; ++ struct fuse_conn_info conn; ++ struct fuse_req list; ++ struct fuse_req interrupts; ++ pthread_mutex_t lock; ++ int got_destroy; ++ pthread_key_t pipe_key; ++ int broken_splice_nonblock; ++ uint64_t notify_ctr; ++ struct fuse_notify_req notify_list; ++ size_t bufsize; ++ int error; ++}; ++ ++struct fuse_chan { ++ pthread_mutex_t lock; ++ int ctr; ++ int fd; ++}; ++ ++/** ++ * Filesystem module ++ * ++ * Filesystem modules are registered with the FUSE_REGISTER_MODULE() ++ * macro. ++ * ++ */ ++struct fuse_module { ++ char *name; ++ fuse_module_factory_t factory; ++ struct fuse_module *next; ++ struct fusemod_so *so; ++ int ctr; ++}; ++ ++/* ----------------------------------------------------------- * ++ * Channel interface (when using -o clone_fd) * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Obtain counted reference to the channel ++ * ++ * @param ch the channel ++ * @return the channel ++ */ ++struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); ++ ++/** ++ * Drop counted reference to a channel ++ * ++ * @param ch the channel ++ */ ++void fuse_chan_put(struct fuse_chan *ch); ++ ++struct mount_opts *parse_mount_opts(struct fuse_args *args); ++void destroy_mount_opts(struct mount_opts *mo); ++void fuse_mount_version(void); ++unsigned get_max_read(struct mount_opts *o); ++void fuse_kern_unmount(const char *mountpoint, int fd); ++int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); ++ ++int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, ++ int count); ++void fuse_free_req(fuse_req_t req); ++ ++void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); ++ ++int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); ++ ++int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, ++ struct fuse_chan *ch); ++void fuse_session_process_buf_int(struct fuse_session *se, ++ const struct fuse_buf *buf, struct fuse_chan *ch); ++ ++struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, ++ size_t op_size, void *private_data); ++int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); ++int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); ++ ++#define FUSE_MAX_MAX_PAGES 256 ++#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 ++ ++/* room needed in buffer to accommodate header */ ++#define FUSE_BUFFER_HEADER_SIZE 0x1000 ++ +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +new file mode 100644 +index 0000000..5e112e0 +--- /dev/null ++++ b/tools/virtiofsd/fuse_log.h +@@ -0,0 +1,82 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2019 Red Hat, Inc. ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_LOG_H_ ++#define FUSE_LOG_H_ ++ ++/** @file ++ * ++ * This file defines the logging interface of FUSE ++ */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * Log severity level ++ * ++ * These levels correspond to syslog(2) log levels since they are widely used. ++ */ ++enum fuse_log_level { ++ FUSE_LOG_EMERG, ++ FUSE_LOG_ALERT, ++ FUSE_LOG_CRIT, ++ FUSE_LOG_ERR, ++ FUSE_LOG_WARNING, ++ FUSE_LOG_NOTICE, ++ FUSE_LOG_INFO, ++ FUSE_LOG_DEBUG ++}; ++ ++/** ++ * Log message handler function. ++ * ++ * This function must be thread-safe. It may be called from any libfuse ++ * function, including fuse_parse_cmdline() and other functions invoked before ++ * a FUSE filesystem is created. ++ * ++ * Install a custom log message handler function using fuse_set_log_func(). ++ * ++ * @param level log severity level ++ * @param fmt sprintf-style format string including newline ++ * @param ap format string arguments ++ */ ++typedef void (*fuse_log_func_t)(enum fuse_log_level level, ++ const char *fmt, va_list ap); ++ ++/** ++ * Install a custom log handler function. ++ * ++ * Log messages are emitted by libfuse functions to report errors and debug ++ * information. Messages are printed to stderr by default but this can be ++ * overridden by installing a custom log message handler function. ++ * ++ * The log message handler function is global and affects all FUSE filesystems ++ * created within this process. ++ * ++ * @param func a custom log message handler function or NULL to revert to ++ * the default ++ */ ++void fuse_set_log_func(fuse_log_func_t func); ++ ++/** ++ * Emit a log message ++ * ++ * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) ++ * @param fmt sprintf-style format string including newline ++ */ ++void fuse_log(enum fuse_log_level level, const char *fmt, ...); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_LOG_H_ */ +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +new file mode 100644 +index 0000000..18c6363 +--- /dev/null ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -0,0 +1,2089 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_LOWLEVEL_H_ ++#define FUSE_LOWLEVEL_H_ ++ ++/** @file ++ * ++ * Low level API ++ * ++ * IMPORTANT: you should define FUSE_USE_VERSION before including this ++ * header. To use the newest API define it to 31 (recommended for any ++ * new application). ++ */ ++ ++#ifndef FUSE_USE_VERSION ++#error FUSE_USE_VERSION not defined ++#endif ++ ++#include "fuse_common.h" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ----------------------------------------------------------- * ++ * Miscellaneous definitions * ++ * ----------------------------------------------------------- */ ++ ++/** The node ID of the root inode */ ++#define FUSE_ROOT_ID 1 ++ ++/** Inode number type */ ++typedef uint64_t fuse_ino_t; ++ ++/** Request pointer type */ ++typedef struct fuse_req *fuse_req_t; ++ ++/** ++ * Session ++ * ++ * This provides hooks for processing requests, and exiting ++ */ ++struct fuse_session; ++ ++/** Directory entry parameters supplied to fuse_reply_entry() */ ++struct fuse_entry_param { ++ /** Unique inode number ++ * ++ * In lookup, zero means negative entry (from version 2.5) ++ * Returning ENOENT also means negative entry, but by setting zero ++ * ino the kernel may cache negative entries for entry_timeout ++ * seconds. ++ */ ++ fuse_ino_t ino; ++ ++ /** Generation number for this entry. ++ * ++ * If the file system will be exported over NFS, the ++ * ino/generation pairs need to be unique over the file ++ * system's lifetime (rather than just the mount time). So if ++ * the file system reuses an inode after it has been deleted, ++ * it must assign a new, previously unused generation number ++ * to the inode at the same time. ++ * ++ */ ++ uint64_t generation; ++ ++ /** Inode attributes. ++ * ++ * Even if attr_timeout == 0, attr must be correct. For example, ++ * for open(), FUSE uses attr.st_size from lookup() to determine ++ * how many bytes to request. If this value is not correct, ++ * incorrect data will be returned. ++ */ ++ struct stat attr; ++ ++ /** Validity timeout (in seconds) for inode attributes. If ++ attributes only change as a result of requests that come ++ through the kernel, this should be set to a very large ++ value. */ ++ double attr_timeout; ++ ++ /** Validity timeout (in seconds) for the name. If directory ++ entries are changed/deleted only as a result of requests ++ that come through the kernel, this should be set to a very ++ large value. */ ++ double entry_timeout; ++}; ++ ++/** ++ * Additional context associated with requests. ++ * ++ * Note that the reported client uid, gid and pid may be zero in some ++ * situations. For example, if the FUSE file system is running in a ++ * PID or user namespace but then accessed from outside the namespace, ++ * there is no valid uid/pid/gid that could be reported. ++ */ ++struct fuse_ctx { ++ /** User ID of the calling process */ ++ uid_t uid; ++ ++ /** Group ID of the calling process */ ++ gid_t gid; ++ ++ /** Thread ID of the calling process */ ++ pid_t pid; ++ ++ /** Umask of the calling process */ ++ mode_t umask; ++}; ++ ++struct fuse_forget_data { ++ fuse_ino_t ino; ++ uint64_t nlookup; ++}; ++ ++/* 'to_set' flags in setattr */ ++#define FUSE_SET_ATTR_MODE (1 << 0) ++#define FUSE_SET_ATTR_UID (1 << 1) ++#define FUSE_SET_ATTR_GID (1 << 2) ++#define FUSE_SET_ATTR_SIZE (1 << 3) ++#define FUSE_SET_ATTR_ATIME (1 << 4) ++#define FUSE_SET_ATTR_MTIME (1 << 5) ++#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) ++#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) ++#define FUSE_SET_ATTR_CTIME (1 << 10) ++ ++/* ----------------------------------------------------------- * ++ * Request methods and replies * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Low level filesystem operations ++ * ++ * Most of the methods (with the exception of init and destroy) ++ * receive a request handle (fuse_req_t) as their first argument. ++ * This handle must be passed to one of the specified reply functions. ++ * ++ * This may be done inside the method invocation, or after the call ++ * has returned. The request handle is valid until one of the reply ++ * functions is called. ++ * ++ * Other pointer arguments (name, fuse_file_info, etc) are not valid ++ * after the call has returned, so if they are needed later, their ++ * contents have to be copied. ++ * ++ * In general, all methods are expected to perform any necessary ++ * permission checking. However, a filesystem may delegate this task ++ * to the kernel by passing the `default_permissions` mount option to ++ * `fuse_session_new()`. In this case, methods will only be called if ++ * the kernel's permission check has succeeded. ++ * ++ * The filesystem sometimes needs to handle a return value of -ENOENT ++ * from the reply function, which means, that the request was ++ * interrupted, and the reply discarded. For example if ++ * fuse_reply_open() return -ENOENT means, that the release method for ++ * this file will not be called. ++ */ ++struct fuse_lowlevel_ops { ++ /** ++ * Initialize filesystem ++ * ++ * This function is called when libfuse establishes ++ * communication with the FUSE kernel module. The file system ++ * should use this module to inspect and/or modify the ++ * connection parameters provided in the `conn` structure. ++ * ++ * Note that some parameters may be overwritten by options ++ * passed to fuse_session_new() which take precedence over the ++ * values set in this handler. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*init) (void *userdata, struct fuse_conn_info *conn); ++ ++ /** ++ * Clean up filesystem. ++ * ++ * Called on filesystem exit. When this method is called, the ++ * connection to the kernel may be gone already, so that eg. calls ++ * to fuse_lowlevel_notify_* will fail. ++ * ++ * There's no reply to this function ++ * ++ * @param userdata the user data passed to fuse_session_new() ++ */ ++ void (*destroy) (void *userdata); ++ ++ /** ++ * Look up a directory entry by name and get its attributes. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name the name to look up ++ */ ++ void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Forget about an inode ++ * ++ * This function is called when the kernel removes an inode ++ * from its internal caches. ++ * ++ * The inode's lookup count increases by one for every call to ++ * fuse_reply_entry and fuse_reply_create. The nlookup parameter ++ * indicates by how much the lookup count should be decreased. ++ * ++ * Inodes with a non-zero lookup count may receive request from ++ * the kernel even after calls to unlink, rmdir or (when ++ * overwriting an existing file) rename. Filesystems must handle ++ * such requests properly and it is recommended to defer removal ++ * of the inode until the lookup count reaches zero. Calls to ++ * unlink, rmdir or rename will be followed closely by forget ++ * unless the file or directory is open, in which case the ++ * kernel issues forget only after the release or releasedir ++ * calls. ++ * ++ * Note that if a file system will be exported over NFS the ++ * inodes lifetime must extend even beyond forget. See the ++ * generation field in struct fuse_entry_param above. ++ * ++ * On unmount the lookup count for all inodes implicitly drops ++ * to zero. It is not guaranteed that the file system will ++ * receive corresponding forget messages for the affected ++ * inodes. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param nlookup the number of lookups to forget ++ */ ++ void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); ++ ++ /** ++ * Get file attributes. ++ * ++ * If writeback caching is enabled, the kernel may have a ++ * better idea of a file's length than the FUSE file system ++ * (eg if there has been a write that extended the file size, ++ * but that has not yet been passed to the filesystem.n ++ * ++ * In this case, the st_size value provided by the file system ++ * will be ignored. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi for future use, currently always NULL ++ */ ++ void (*getattr) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Set file attributes ++ * ++ * In the 'attr' argument only members indicated by the 'to_set' ++ * bitmask contain valid values. Other members contain undefined ++ * values. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits if the file ++ * size or owner is being changed. ++ * ++ * If the setattr was invoked from the ftruncate() system call ++ * under Linux kernel versions 2.6.15 or later, the fi->fh will ++ * contain the value set by the open method or will be undefined ++ * if the open method didn't set any value. Otherwise (not ++ * ftruncate call, or kernel version earlier than 2.6.15) the fi ++ * parameter will be NULL. ++ * ++ * Valid replies: ++ * fuse_reply_attr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param attr the attributes ++ * @param to_set bit mask of attributes which should be set ++ * @param fi file information, or NULL ++ */ ++ void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, ++ int to_set, struct fuse_file_info *fi); ++ ++ /** ++ * Read symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_readlink ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ */ ++ void (*readlink) (fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Create file node ++ * ++ * Create a regular file, character device, block device, fifo or ++ * socket node. ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param rdev the device number (only valid if created file is a device) ++ */ ++ void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, dev_t rdev); ++ ++ /** ++ * Create a directory ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode with which to create the new file ++ */ ++ void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode); ++ ++ /** ++ * Remove a file ++ * ++ * If the file's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Remove a directory ++ * ++ * If the directory's inode's lookup count is non-zero, the ++ * file system is expected to postpone any removal of the ++ * inode until the lookup count reaches zero (see description ++ * of the forget function). ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to remove ++ */ ++ void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); ++ ++ /** ++ * Create a symbolic link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param link the contents of the symbolic link ++ * @param parent inode number of the parent directory ++ * @param name to create ++ */ ++ void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, ++ const char *name); ++ ++ /** Rename a file ++ * ++ * If the target exists it should be atomically replaced. If ++ * the target's inode's lookup count is non-zero, the file ++ * system is expected to postpone any removal of the inode ++ * until the lookup count reaches zero (see description of the ++ * forget function). ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EINVAL, i.e. all ++ * future bmap requests will fail with EINVAL without being ++ * send to the filesystem process. ++ * ++ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If ++ * RENAME_NOREPLACE is specified, the filesystem must not ++ * overwrite *newname* if it exists and return an error ++ * instead. If `RENAME_EXCHANGE` is specified, the filesystem ++ * must atomically exchange the two files, i.e. both must ++ * exist and neither may be deleted. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the old parent directory ++ * @param name old name ++ * @param newparent inode number of the new parent directory ++ * @param newname new name ++ */ ++ void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ fuse_ino_t newparent, const char *newname, ++ unsigned int flags); ++ ++ /** ++ * Create a hard link ++ * ++ * Valid replies: ++ * fuse_reply_entry ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the old inode number ++ * @param newparent inode number of the new parent directory ++ * @param newname new name to create ++ */ ++ void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, ++ const char *newname); ++ ++ /** ++ * Open a file ++ * ++ * Open flags are available in fi->flags. The following rules ++ * apply. ++ * ++ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be ++ * filtered out / handled by the kernel. ++ * ++ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used ++ * by the filesystem to check if the operation is ++ * permitted. If the ``-o default_permissions`` mount ++ * option is given, this check is already done by the ++ * kernel before calling open() and may thus be omitted by ++ * the filesystem. ++ * ++ * - When writeback caching is enabled, the kernel may send ++ * read requests even for files opened with O_WRONLY. The ++ * filesystem should be prepared to handle this. ++ * ++ * - When writeback caching is disabled, the filesystem is ++ * expected to properly handle the O_APPEND flag and ensure ++ * that each write is appending to the end of the file. ++ * ++ * - When writeback caching is enabled, the kernel will ++ * handle O_APPEND. However, unless all changes to the file ++ * come through the kernel this will not work reliably. The ++ * filesystem should thus either ignore the O_APPEND flag ++ * (and let the kernel handle it), or return an error ++ * (indicating that reliably O_APPEND is not available). ++ * ++ * Filesystem may store an arbitrary file handle (pointer, ++ * index, etc) in fi->fh, and use this in other all other file ++ * operations (read, write, flush, release, fsync). ++ * ++ * Filesystem may also implement stateless file I/O and not store ++ * anything in fi->fh. ++ * ++ * There are also some flags (direct_io, keep_cache) which the ++ * filesystem may set in fi, to change the way the file is opened. ++ * See fuse_file_info structure in for more details. ++ * ++ * If this request is answered with an error code of ENOSYS ++ * and FUSE_CAP_NO_OPEN_SUPPORT is set in ++ * `fuse_conn_info.capable`, this is treated as success and ++ * future calls to open and release will also succeed without being ++ * sent to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*open) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Read data ++ * ++ * Read should send exactly the number of bytes requested except ++ * on EOF or error, otherwise the rest of the data will be ++ * substituted with zeroes. An exception to this is when the file ++ * has been opened in 'direct_io' mode, in which case the return ++ * value of the read system call will reflect the return value of ++ * this operation. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_iov ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size number of bytes to read ++ * @param off offset to read from ++ * @param fi file information ++ */ ++ void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Write data ++ * ++ * Write should return exactly the number of bytes requested ++ * except on error. An exception to this is when the file has ++ * been opened in 'direct_io' mode, in which case the return value ++ * of the write system call will reflect the return value of this ++ * operation. ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param buf data to write ++ * @param size number of bytes to write ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, ++ size_t size, off_t off, struct fuse_file_info *fi); ++ ++ /** ++ * Flush method ++ * ++ * This is called on each close() of the opened file. ++ * ++ * Since file descriptors can be duplicated (dup, dup2, fork), for ++ * one open call there may be many flush calls. ++ * ++ * Filesystems shouldn't assume that flush will always be called ++ * after some writes, or that if will be called at all. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * ++ * NOTE: the name of the method is misleading, since (unlike ++ * fsync) the filesystem is not forced to flush pending writes. ++ * One reason to flush data is if the filesystem wants to return ++ * write errors during close. However, such use is non-portable ++ * because POSIX does not require [close] to wait for delayed I/O to ++ * complete. ++ * ++ * If the filesystem supports file locking operations (setlk, ++ * getlk) it should remove all locks belonging to 'fi->owner'. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to flush() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * ++ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html ++ */ ++ void (*flush) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Release an open file ++ * ++ * Release is called when there are no more references to an open ++ * file: all file descriptors are closed and all memory mappings ++ * are unmapped. ++ * ++ * For every open call there will be exactly one release call (unless ++ * the filesystem is force-unmounted). ++ * ++ * The filesystem may reply with an error, but error values are ++ * not returned to close() or munmap() which triggered the ++ * release. ++ * ++ * fi->fh will contain the value set by the open method, or will ++ * be undefined if the open method didn't set any value. ++ * fi->flags will contain the same flags as for open. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*release) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize file contents ++ * ++ * If the datasync parameter is non-zero, then only the user data ++ * should be flushed, not the meta data. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsync() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Open a directory ++ * ++ * Filesystem may store an arbitrary file handle (pointer, index, ++ * etc) in fi->fh, and use this in other all other directory ++ * stream operations (readdir, releasedir, fsyncdir). ++ * ++ * If this request is answered with an error code of ENOSYS and ++ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, ++ * this is treated as success and future calls to opendir and ++ * releasedir will also succeed without being sent to the filesystem ++ * process. In addition, the kernel will cache readdir results ++ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. ++ * ++ * Valid replies: ++ * fuse_reply_open ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*opendir) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Read directory ++ * ++ * Send a buffer filled using fuse_add_direntry(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Returning a directory entry from readdir() does not affect ++ * its lookup count. ++ * ++ * If off_t is non-zero, then it will correspond to one of the off_t ++ * values that was previously returned by readdir() for the same ++ * directory handle. In this case, readdir() should skip over entries ++ * coming before the position defined by the off_t value. If entries ++ * are added or removed while the directory handle is open, they filesystem ++ * may still include the entries that have been removed, and may not ++ * report the entries that have been created. However, addition or ++ * removal of entries must never cause readdir() to skip over unrelated ++ * entries or to report them more than once. This means ++ * that off_t can not be a simple index that enumerates the entries ++ * that have been returned but must contain sufficient information to ++ * uniquely determine the next directory entry to return even when the ++ * set of entries is changing. ++ * ++ * The function does not have to report the '.' and '..' ++ * entries, but is allowed to do so. Note that, if readdir does ++ * not return '.' or '..', they will not be implicitly returned, ++ * and this behavior is observable by the caller. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Release an open directory ++ * ++ * For every opendir call there will be exactly one releasedir ++ * call (unless the filesystem is force-unmounted). ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ */ ++ void (*releasedir) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Synchronize directory contents ++ * ++ * If the datasync parameter is non-zero, then only the directory ++ * contents should be flushed, not the meta data. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * If this request is answered with an error code of ENOSYS, ++ * this is treated as success and future calls to fsyncdir() will ++ * succeed automatically without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param datasync flag indicating if only data should be flushed ++ * @param fi file information ++ */ ++ void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Get file system statistics ++ * ++ * Valid replies: ++ * fuse_reply_statfs ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number, zero means "undefined" ++ */ ++ void (*statfs) (fuse_req_t req, fuse_ino_t ino); ++ ++ /** ++ * Set an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future setxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ */ ++ void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, ++ const char *value, size_t size, int flags); ++ ++ /** ++ * Get an extended attribute ++ * ++ * If size is zero, the size of the value should be sent with ++ * fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the value fits in the buffer, the ++ * value should be sent with fuse_reply_buf. ++ * ++ * If the size is too small for the value, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future getxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ * @param size maximum size of the value to send ++ */ ++ void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, ++ size_t size); ++ ++ /** ++ * List extended attribute names ++ * ++ * If size is zero, the total size of the attribute list should be ++ * sent with fuse_reply_xattr. ++ * ++ * If the size is non-zero, and the null character separated ++ * attribute list fits in the buffer, the list should be sent with ++ * fuse_reply_buf. ++ * ++ * If the size is too small for the list, the ERANGE error should ++ * be sent. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future listxattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_xattr ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum size of the list to send ++ */ ++ void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); ++ ++ /** ++ * Remove an extended attribute ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future removexattr() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param name of the extended attribute ++ */ ++ void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); ++ ++ /** ++ * Check file access permissions ++ * ++ * This will be called for the access() and chdir() system ++ * calls. If the 'default_permissions' mount option is given, ++ * this method is not called. ++ * ++ * This method is not called under Linux kernel versions 2.4.x ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent success, i.e. this and all future access() ++ * requests will succeed without being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param mask requested access mode ++ */ ++ void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); ++ ++ /** ++ * Create and open a file ++ * ++ * If the file does not exist, first create it with the specified ++ * mode, and then open it. ++ * ++ * See the description of the open handler for more ++ * information. ++ * ++ * If this method is not implemented or under Linux kernel ++ * versions earlier than 2.6.15, the mknod() and open() methods ++ * will be called instead. ++ * ++ * If this request is answered with an error code of ENOSYS, the handler ++ * is treated as not implemented (i.e., for this and future requests the ++ * mknod() and open() handlers will be called instead). ++ * ++ * Valid replies: ++ * fuse_reply_create ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param parent inode number of the parent directory ++ * @param name to create ++ * @param mode file type and mode with which to create the new file ++ * @param fi file information ++ */ ++ void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, ++ mode_t mode, struct fuse_file_info *fi); ++ ++ /** ++ * Test for a POSIX file lock ++ * ++ * Valid replies: ++ * fuse_reply_lock ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to test ++ */ ++ void (*getlk) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi, struct flock *lock); ++ ++ /** ++ * Acquire, modify or release a POSIX file lock ++ * ++ * For POSIX threads (NPTL) there's a 1-1 relation between pid and ++ * owner, but otherwise this is not always the case. For checking ++ * lock ownership, 'fi->owner' must be used. The l_pid field in ++ * 'struct flock' should only be used to fill in this field in ++ * getlk(). ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param lock the region/type to set ++ * @param sleep locking operation may sleep ++ */ ++ void (*setlk) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi, ++ struct flock *lock, int sleep); ++ ++ /** ++ * Map block index within file to block index within device ++ * ++ * Note: This makes sense only for block device backed filesystems ++ * mounted with the 'blkdev' option ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future bmap() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_bmap ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param blocksize unit of block index ++ * @param idx block index within file ++ */ ++ void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, ++ uint64_t idx); ++ ++ /** ++ * Ioctl ++ * ++ * Note: For unrestricted ioctls (not allowed for FUSE ++ * servers), data in and out areas can be discovered by giving ++ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For ++ * restricted ioctls, kernel prepares in/out data area ++ * according to the information encoded in cmd. ++ * ++ * Valid replies: ++ * fuse_reply_ioctl_retry ++ * fuse_reply_ioctl ++ * fuse_reply_ioctl_iov ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param cmd ioctl command ++ * @param arg ioctl argument ++ * @param fi file information ++ * @param flags for FUSE_IOCTL_* flags ++ * @param in_buf data fetched from the caller ++ * @param in_bufsz number of fetched bytes ++ * @param out_bufsz maximum size of output data ++ * ++ * Note : the unsigned long request submitted by the application ++ * is truncated to 32 bits. ++ */ ++ void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, ++ void *arg, struct fuse_file_info *fi, unsigned flags, ++ const void *in_buf, size_t in_bufsz, size_t out_bufsz); ++ ++ /** ++ * Poll for IO readiness ++ * ++ * Note: If ph is non-NULL, the client should notify ++ * when IO readiness events occur by calling ++ * fuse_lowlevel_notify_poll() with the specified ph. ++ * ++ * Regardless of the number of times poll with a non-NULL ph ++ * is received, single notification is enough to clear all. ++ * Notifying more times incurs overhead but doesn't harm ++ * correctness. ++ * ++ * The callee is responsible for destroying ph with ++ * fuse_pollhandle_destroy() when no longer in use. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as success (with a kernel-defined default poll-mask) and ++ * future calls to pull() will succeed the same way without being send ++ * to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_poll ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param ph poll handle to be used for notification ++ */ ++ void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct fuse_pollhandle *ph); ++ ++ /** ++ * Write data made available in a buffer ++ * ++ * This is a more generic version of the ->write() method. If ++ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the ++ * kernel supports splicing from the fuse device, then the ++ * data will be made available in pipe for supporting zero ++ * copy data transfer. ++ * ++ * buf->count is guaranteed to be one (and thus buf->idx is ++ * always zero). The write_buf handler must ensure that ++ * bufv->off is correctly updated (reflecting the number of ++ * bytes read from bufv->buf[0]). ++ * ++ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is ++ * expected to reset the setuid and setgid bits. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param bufv buffer containing the data ++ * @param off offset to write to ++ * @param fi file information ++ */ ++ void (*write_buf) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_bufvec *bufv, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Callback function for the retrieve request ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() ++ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() ++ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() ++ * @param bufv the buffer containing the returned data ++ */ ++ void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv); ++ ++ /** ++ * Forget about multiple inodes ++ * ++ * See description of the forget function for more ++ * information. ++ * ++ * Valid replies: ++ * fuse_reply_none ++ * ++ * @param req request handle ++ */ ++ void (*forget_multi) (fuse_req_t req, size_t count, ++ struct fuse_forget_data *forgets); ++ ++ /** ++ * Acquire, modify or release a BSD file lock ++ * ++ * Note: if the locking methods are not implemented, the kernel ++ * will still allow file locking to work locally. Hence these are ++ * only interesting for network filesystems and similar. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param fi file information ++ * @param op the locking operation, see flock(2) ++ */ ++ void (*flock) (fuse_req_t req, fuse_ino_t ino, ++ struct fuse_file_info *fi, int op); ++ ++ /** ++ * Allocate requested space. If this function returns success then ++ * subsequent writes to the specified range shall not fail due to the lack ++ * of free space on the file system storage media. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future fallocate() requests will fail with EOPNOTSUPP without being ++ * send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param offset starting point for allocated region ++ * @param length size of allocated region ++ * @param mode determines the operation to be performed on the given range, ++ * see fallocate(2) ++ */ ++ void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, ++ off_t offset, off_t length, struct fuse_file_info *fi); ++ ++ /** ++ * Read directory with attributes ++ * ++ * Send a buffer filled using fuse_add_direntry_plus(), with size not ++ * exceeding the requested size. Send an empty buffer on end of ++ * stream. ++ * ++ * fi->fh will contain the value set by the opendir method, or ++ * will be undefined if the opendir method didn't set any value. ++ * ++ * In contrast to readdir() (which does not affect the lookup counts), ++ * the lookup count of every entry returned by readdirplus(), except "." ++ * and "..", is incremented by one. ++ * ++ * Valid replies: ++ * fuse_reply_buf ++ * fuse_reply_data ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param size maximum number of bytes to send ++ * @param off offset to continue reading the directory stream ++ * @param fi file information ++ */ ++ void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, ++ struct fuse_file_info *fi); ++ ++ /** ++ * Copy a range of data from one file to another ++ * ++ * Performs an optimized copy between two file descriptors without the ++ * additional cost of transferring data through the FUSE kernel module ++ * to user space (glibc) and then back into the FUSE filesystem again. ++ * ++ * In case this method is not implemented, glibc falls back to reading ++ * data from the source and writing to the destination. Effectively ++ * doing an inefficient copy of the data. ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all ++ * future copy_file_range() requests will fail with EOPNOTSUPP without ++ * being send to the filesystem process. ++ * ++ * Valid replies: ++ * fuse_reply_write ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino_in the inode number or the source file ++ * @param off_in starting point from were the data should be read ++ * @param fi_in file information of the source file ++ * @param ino_out the inode number or the destination file ++ * @param off_out starting point where the data should be written ++ * @param fi_out file information of the destination file ++ * @param len maximum size of the data to copy ++ * @param flags passed along with the copy_file_range() syscall ++ */ ++ void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, ++ off_t off_in, struct fuse_file_info *fi_in, ++ fuse_ino_t ino_out, off_t off_out, ++ struct fuse_file_info *fi_out, size_t len, ++ int flags); ++ ++ /** ++ * Find next data or hole after the specified offset ++ * ++ * If this request is answered with an error code of ENOSYS, this is ++ * treated as a permanent failure, i.e. all future lseek() requests will ++ * fail with the same error code without being send to the filesystem ++ * process. ++ * ++ * Valid replies: ++ * fuse_reply_lseek ++ * fuse_reply_err ++ * ++ * @param req request handle ++ * @param ino the inode number ++ * @param off offset to start search from ++ * @param whence either SEEK_DATA or SEEK_HOLE ++ * @param fi file information ++ */ ++ void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, ++ struct fuse_file_info *fi); ++}; ++ ++/** ++ * Reply with an error code or success. ++ * ++ * Possible requests: ++ * all except forget ++ * ++ * Whereever possible, error codes should be chosen from the list of ++ * documented error conditions in the corresponding system calls ++ * manpage. ++ * ++ * An error code of ENOSYS is sometimes treated specially. This is ++ * indicated in the documentation of the affected handler functions. ++ * ++ * The following requests may be answered with a zero error code: ++ * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr, ++ * removexattr, setlk. ++ * ++ * @param req request handle ++ * @param err the positive error value, or zero for success ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_err(fuse_req_t req, int err); ++ ++/** ++ * Don't send reply ++ * ++ * Possible requests: ++ * forget ++ * forget_multi ++ * retrieve_reply ++ * ++ * @param req request handle ++ */ ++void fuse_reply_none(fuse_req_t req); ++ ++/** ++ * Reply with a directory entry ++ * ++ * Possible requests: ++ * lookup, mknod, mkdir, symlink, link ++ * ++ * Side effects: ++ * increments the lookup count on success ++ * ++ * @param req request handle ++ * @param e the entry parameters ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); ++ ++/** ++ * Reply with a directory entry and open parameters ++ * ++ * currently the following members of 'fi' are used: ++ * fh, direct_io, keep_cache ++ * ++ * Possible requests: ++ * create ++ * ++ * Side effects: ++ * increments the lookup count on success ++ * ++ * @param req request handle ++ * @param e the entry parameters ++ * @param fi file information ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, ++ const struct fuse_file_info *fi); ++ ++/** ++ * Reply with attributes ++ * ++ * Possible requests: ++ * getattr, setattr ++ * ++ * @param req request handle ++ * @param attr the attributes ++ * @param attr_timeout validity timeout (in seconds) for the attributes ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_attr(fuse_req_t req, const struct stat *attr, ++ double attr_timeout); ++ ++/** ++ * Reply with the contents of a symbolic link ++ * ++ * Possible requests: ++ * readlink ++ * ++ * @param req request handle ++ * @param link symbolic link contents ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_readlink(fuse_req_t req, const char *link); ++ ++/** ++ * Reply with open parameters ++ * ++ * currently the following members of 'fi' are used: ++ * fh, direct_io, keep_cache ++ * ++ * Possible requests: ++ * open, opendir ++ * ++ * @param req request handle ++ * @param fi file information ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi); ++ ++/** ++ * Reply with number of bytes written ++ * ++ * Possible requests: ++ * write ++ * ++ * @param req request handle ++ * @param count the number of bytes written ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_write(fuse_req_t req, size_t count); ++ ++/** ++ * Reply with data ++ * ++ * Possible requests: ++ * read, readdir, getxattr, listxattr ++ * ++ * @param req request handle ++ * @param buf buffer containing data ++ * @param size the size of data in bytes ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); ++ ++/** ++ * Reply with data copied/moved from buffer(s) ++ * ++ * Zero copy data transfer ("splicing") will be used under ++ * the following circumstances: ++ * ++ * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and ++ * 2. the kernel supports splicing from the fuse device ++ * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and ++ * 3. *flags* does not contain FUSE_BUF_NO_SPLICE ++ * 4. The amount of data that is provided in file-descriptor backed ++ * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) ++ * is at least twice the page size. ++ * ++ * In order for SPLICE_F_MOVE to be used, the following additional ++ * conditions have to be fulfilled: ++ * ++ * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and ++ * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in ++ fuse_conn_info.capable), and ++ * 3. *flags* contains FUSE_BUF_SPLICE_MOVE ++ * ++ * Note that, if splice is used, the data is actually spliced twice: ++ * once into a temporary pipe (to prepend header data), and then again ++ * into the kernel. If some of the provided buffers are memory-backed, ++ * the data in them is copied in step one and spliced in step two. ++ * ++ * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags ++ * are silently ignored. ++ * ++ * Possible requests: ++ * read, readdir, getxattr, listxattr ++ * ++ * Side effects: ++ * when used to return data from a readdirplus() (but not readdir()) ++ * call, increments the lookup count of each returned entry by one ++ * on success. ++ * ++ * @param req request handle ++ * @param bufv buffer vector ++ * @param flags flags controlling the copy ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags); ++ ++/** ++ * Reply with data vector ++ * ++ * Possible requests: ++ * read, readdir, getxattr, listxattr ++ * ++ * @param req request handle ++ * @param iov the vector containing the data ++ * @param count the size of vector ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count); ++ ++/** ++ * Reply with filesystem statistics ++ * ++ * Possible requests: ++ * statfs ++ * ++ * @param req request handle ++ * @param stbuf filesystem statistics ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf); ++ ++/** ++ * Reply with needed buffer size ++ * ++ * Possible requests: ++ * getxattr, listxattr ++ * ++ * @param req request handle ++ * @param count the buffer size needed in bytes ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_xattr(fuse_req_t req, size_t count); ++ ++/** ++ * Reply with file lock information ++ * ++ * Possible requests: ++ * getlk ++ * ++ * @param req request handle ++ * @param lock the lock information ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_lock(fuse_req_t req, const struct flock *lock); ++ ++/** ++ * Reply with block index ++ * ++ * Possible requests: ++ * bmap ++ * ++ * @param req request handle ++ * @param idx block index within device ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_bmap(fuse_req_t req, uint64_t idx); ++ ++/* ----------------------------------------------------------- * ++ * Filling a buffer in readdir * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Add a directory entry to the buffer ++ * ++ * Buffer needs to be large enough to hold the entry. If it's not, ++ * then the entry is not filled in but the size of the entry is still ++ * returned. The caller can check this by comparing the bufsize ++ * parameter with the returned entry size. If the entry size is ++ * larger than the buffer size, the operation failed. ++ * ++ * From the 'stbuf' argument the st_ino field and bits 12-15 of the ++ * st_mode field are used. The other fields are ignored. ++ * ++ * *off* should be any non-zero value that the filesystem can use to ++ * identify the current point in the directory stream. It does not ++ * need to be the actual physical position. A value of zero is ++ * reserved to mean "from the beginning", and should therefore never ++ * be used (the first call to fuse_add_direntry should be passed the ++ * offset of the second directory entry). ++ * ++ * @param req request handle ++ * @param buf the point where the new entry will be added to the buffer ++ * @param bufsize remaining size of the buffer ++ * @param name the name of the entry ++ * @param stbuf the file attributes ++ * @param off the offset of the next entry ++ * @return the space needed for the entry ++ */ ++size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, const struct stat *stbuf, ++ off_t off); ++ ++/** ++ * Add a directory entry to the buffer with the attributes ++ * ++ * See documentation of `fuse_add_direntry()` for more details. ++ * ++ * @param req request handle ++ * @param buf the point where the new entry will be added to the buffer ++ * @param bufsize remaining size of the buffer ++ * @param name the name of the entry ++ * @param e the directory entry ++ * @param off the offset of the next entry ++ * @return the space needed for the entry ++ */ ++size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, ++ const char *name, ++ const struct fuse_entry_param *e, off_t off); ++ ++/** ++ * Reply to ask for data fetch and output buffer preparation. ioctl ++ * will be retried with the specified input data fetched and output ++ * buffer prepared. ++ * ++ * Possible requests: ++ * ioctl ++ * ++ * @param req request handle ++ * @param in_iov iovec specifying data to fetch from the caller ++ * @param in_count number of entries in in_iov ++ * @param out_iov iovec specifying addresses to write output to ++ * @param out_count number of entries in out_iov ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_ioctl_retry(fuse_req_t req, ++ const struct iovec *in_iov, size_t in_count, ++ const struct iovec *out_iov, size_t out_count); ++ ++/** ++ * Reply to finish ioctl ++ * ++ * Possible requests: ++ * ioctl ++ * ++ * @param req request handle ++ * @param result result to be passed to the caller ++ * @param buf buffer containing output data ++ * @param size length of output data ++ */ ++int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); ++ ++/** ++ * Reply to finish ioctl with iov buffer ++ * ++ * Possible requests: ++ * ioctl ++ * ++ * @param req request handle ++ * @param result result to be passed to the caller ++ * @param iov the vector containing the data ++ * @param count the size of vector ++ */ ++int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, ++ int count); ++ ++/** ++ * Reply with poll result event mask ++ * ++ * @param req request handle ++ * @param revents poll result event mask ++ */ ++int fuse_reply_poll(fuse_req_t req, unsigned revents); ++ ++/** ++ * Reply with offset ++ * ++ * Possible requests: ++ * lseek ++ * ++ * @param req request handle ++ * @param off offset of next data or hole ++ * @return zero for success, -errno for failure to send reply ++ */ ++int fuse_reply_lseek(fuse_req_t req, off_t off); ++ ++/* ----------------------------------------------------------- * ++ * Notification * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Notify IO readiness event ++ * ++ * For more information, please read comment for poll operation. ++ * ++ * @param ph poll handle to notify IO readiness event for ++ */ ++int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); ++ ++/** ++ * Notify to invalidate cache for an inode. ++ * ++ * Added in FUSE protocol version 7.12. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * If the filesystem has writeback caching enabled, invalidating an ++ * inode will first trigger a writeback of all dirty pages. The call ++ * will block until all writeback requests have completed and the ++ * inode has been invalidated. It will, however, not wait for ++ * completion of pending writeback requests that have been issued ++ * before. ++ * ++ * If there are no dirty pages, this function will never block. ++ * ++ * @param se the session object ++ * @param ino the inode number ++ * @param off the offset in the inode where to start invalidating ++ * or negative to invalidate attributes only ++ * @param len the amount of cache to invalidate or 0 for all ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, ++ off_t off, off_t len); ++ ++/** ++ * Notify to invalidate parent attributes and the dentry matching ++ * parent/name ++ * ++ * To avoid a deadlock this function must not be called in the ++ * execution path of a related filesytem operation or within any code ++ * that could hold a lock that could be needed to execute such an ++ * operation. As of kernel 4.18, a "related operation" is a lookup(), ++ * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() ++ * request for the parent, and a setattr(), unlink(), rmdir(), ++ * rename(), setxattr(), removexattr(), readdir() or readdirplus() ++ * request for the inode itself. ++ * ++ * When called correctly, this function will never block. ++ * ++ * Added in FUSE protocol version 7.12. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param parent inode number ++ * @param name file name ++ * @param namelen strlen() of file name ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, ++ const char *name, size_t namelen); ++ ++/** ++ * This function behaves like fuse_lowlevel_notify_inval_entry() with ++ * the following additional effect (at least as of Linux kernel 4.8): ++ * ++ * If the provided *child* inode matches the inode that is currently ++ * associated with the cached dentry, and if there are any inotify ++ * watches registered for the dentry, then the watchers are informed ++ * that the dentry has been deleted. ++ * ++ * To avoid a deadlock this function must not be called while ++ * executing a related filesytem operation or while holding a lock ++ * that could be needed to execute such an operation (see the ++ * description of fuse_lowlevel_notify_inval_entry() for more ++ * details). ++ * ++ * When called correctly, this function will never block. ++ * ++ * Added in FUSE protocol version 7.18. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param parent inode number ++ * @param child inode number ++ * @param name file name ++ * @param namelen strlen() of file name ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_delete(struct fuse_session *se, ++ fuse_ino_t parent, fuse_ino_t child, ++ const char *name, size_t namelen); ++ ++/** ++ * Store data to the kernel buffers ++ * ++ * Synchronously store data in the kernel buffers belonging to the ++ * given inode. The stored data is marked up-to-date (no read will be ++ * performed against it, unless it's invalidated or evicted from the ++ * cache). ++ * ++ * If the stored data overflows the current file size, then the size ++ * is extended, similarly to a write(2) on the filesystem. ++ * ++ * If this function returns an error, then the store wasn't fully ++ * completed, but it may have been partially completed. ++ * ++ * Added in FUSE protocol version 7.15. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param ino the inode number ++ * @param offset the starting offset into the file to store to ++ * @param bufv buffer vector ++ * @param flags flags controlling the copy ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, ++ off_t offset, struct fuse_bufvec *bufv, ++ enum fuse_buf_copy_flags flags); ++/** ++ * Retrieve data from the kernel buffers ++ * ++ * Retrieve data in the kernel buffers belonging to the given inode. ++ * If successful then the retrieve_reply() method will be called with ++ * the returned data. ++ * ++ * Only present pages are returned in the retrieve reply. Retrieving ++ * stops when it finds a non-present page and only data prior to that ++ * is returned. ++ * ++ * If this function returns an error, then the retrieve will not be ++ * completed and no reply will be sent. ++ * ++ * This function doesn't change the dirty state of pages in the kernel ++ * buffer. For dirty pages the write() method will be called ++ * regardless of having been retrieved previously. ++ * ++ * Added in FUSE protocol version 7.15. If the kernel does not support ++ * this (or a newer) version, the function will return -ENOSYS and do ++ * nothing. ++ * ++ * @param se the session object ++ * @param ino the inode number ++ * @param size the number of bytes to retrieve ++ * @param offset the starting offset into the file to retrieve from ++ * @param cookie user data to supply to the reply callback ++ * @return zero for success, -errno for failure ++ */ ++int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, ++ size_t size, off_t offset, void *cookie); ++ ++ ++/* ----------------------------------------------------------- * ++ * Utility functions * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Get the userdata from the request ++ * ++ * @param req request handle ++ * @return the user data passed to fuse_session_new() ++ */ ++void *fuse_req_userdata(fuse_req_t req); ++ ++/** ++ * Get the context from the request ++ * ++ * The pointer returned by this function will only be valid for the ++ * request's lifetime ++ * ++ * @param req request handle ++ * @return the context structure ++ */ ++const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); ++ ++/** ++ * Get the current supplementary group IDs for the specified request ++ * ++ * Similar to the getgroups(2) system call, except the return value is ++ * always the total number of group IDs, even if it is larger than the ++ * specified size. ++ * ++ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass ++ * the group list to userspace, hence this function needs to parse ++ * "/proc/$TID/task/$TID/status" to get the group IDs. ++ * ++ * This feature may not be supported on all operating systems. In ++ * such a case this function will return -ENOSYS. ++ * ++ * @param req request handle ++ * @param size size of given array ++ * @param list array of group IDs to be filled in ++ * @return the total number of supplementary group IDs or -errno on failure ++ */ ++int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); ++ ++/** ++ * Callback function for an interrupt ++ * ++ * @param req interrupted request ++ * @param data user data ++ */ ++typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); ++ ++/** ++ * Register/unregister callback for an interrupt ++ * ++ * If an interrupt has already happened, then the callback function is ++ * called from within this function, hence it's not possible for ++ * interrupts to be lost. ++ * ++ * @param req request handle ++ * @param func the callback function or NULL for unregister ++ * @param data user data passed to the callback function ++ */ ++void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, ++ void *data); ++ ++/** ++ * Check if a request has already been interrupted ++ * ++ * @param req request handle ++ * @return 1 if the request has been interrupted, 0 otherwise ++ */ ++int fuse_req_interrupted(fuse_req_t req); ++ ++ ++/* ----------------------------------------------------------- * ++ * Inquiry functions * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Print low-level version information to stdout. ++ */ ++void fuse_lowlevel_version(void); ++ ++/** ++ * Print available low-level options to stdout. This is not an ++ * exhaustive list, but includes only those options that may be of ++ * interest to an end-user of a file system. ++ */ ++void fuse_lowlevel_help(void); ++ ++/** ++ * Print available options for `fuse_parse_cmdline()`. ++ */ ++void fuse_cmdline_help(void); ++ ++/* ----------------------------------------------------------- * ++ * Filesystem setup & teardown * ++ * ----------------------------------------------------------- */ ++ ++struct fuse_cmdline_opts { ++ int singlethread; ++ int foreground; ++ int debug; ++ int nodefault_subtype; ++ char *mountpoint; ++ int show_version; ++ int show_help; ++ int clone_fd; ++ unsigned int max_idle_threads; ++}; ++ ++/** ++ * Utility function to parse common options for simple file systems ++ * using the low-level API. A help text that describes the available ++ * options can be printed with `fuse_cmdline_help`. A single ++ * non-option argument is treated as the mountpoint. Multiple ++ * non-option arguments will result in an error. ++ * ++ * If neither -o subtype= or -o fsname= options are given, a new ++ * subtype option will be added and set to the basename of the program ++ * (the fsname will remain unset, and then defaults to "fuse"). ++ * ++ * Known options will be removed from *args*, unknown options will ++ * remain. ++ * ++ * @param args argument vector (input+output) ++ * @param opts output argument for parsed options ++ * @return 0 on success, -1 on failure ++ */ ++int fuse_parse_cmdline(struct fuse_args *args, ++ struct fuse_cmdline_opts *opts); ++ ++/** ++ * Create a low level session. ++ * ++ * Returns a session structure suitable for passing to ++ * fuse_session_mount() and fuse_session_loop(). ++ * ++ * This function accepts most file-system independent mount options ++ * (like context, nodev, ro - see mount(8)), as well as the general ++ * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and ++ * -o default_permissions, but not ``-o use_ino``). Instead of `-o ++ * debug`, debugging may also enabled with `-d` or `--debug`. ++ * ++ * If not all options are known, an error message is written to stderr ++ * and the function returns NULL. ++ * ++ * Option parsing skips argv[0], which is assumed to contain the ++ * program name. To prevent accidentally passing an option in ++ * argv[0], this element must always be present (even if no options ++ * are specified). It may be set to the empty string ('\0') if no ++ * reasonable value can be provided. ++ * ++ * @param args argument vector ++ * @param op the (low-level) filesystem operations ++ * @param op_size sizeof(struct fuse_lowlevel_ops) ++ * @param userdata user data ++ * ++ * @return the fuse session on success, NULL on failure ++ **/ ++struct fuse_session *fuse_session_new(struct fuse_args *args, ++ const struct fuse_lowlevel_ops *op, ++ size_t op_size, void *userdata); ++ ++/** ++ * Mount a FUSE file system. ++ * ++ * @param mountpoint the mount point path ++ * @param se session object ++ * ++ * @return 0 on success, -1 on failure. ++ **/ ++int fuse_session_mount(struct fuse_session *se, const char *mountpoint); ++ ++/** ++ * Enter a single threaded, blocking event loop. ++ * ++ * When the event loop terminates because the connection to the FUSE ++ * kernel module has been closed, this function returns zero. This ++ * happens when the filesystem is unmounted regularly (by the ++ * filesystem owner or root running the umount(8) or fusermount(1) ++ * command), or if connection is explicitly severed by writing ``1`` ++ * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only ++ * way to distinguish between these two conditions is to check if the ++ * filesystem is still mounted after the session loop returns. ++ * ++ * When some error occurs during request processing, the function ++ * returns a negated errno(3) value. ++ * ++ * If the loop has been terminated because of a signal handler ++ * installed by fuse_set_signal_handlers(), this function returns the ++ * (positive) signal value that triggered the exit. ++ * ++ * @param se the session ++ * @return 0, -errno, or a signal value ++ */ ++int fuse_session_loop(struct fuse_session *se); ++ ++/** ++ * Enter a multi-threaded event loop. ++ * ++ * For a description of the return value and the conditions when the ++ * event loop exits, refer to the documentation of ++ * fuse_session_loop(). ++ * ++ * @param se the session ++ * @param config session loop configuration ++ * @return see fuse_session_loop() ++ */ ++#if FUSE_USE_VERSION < 32 ++int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); ++#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) ++#else ++int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); ++#endif ++ ++/** ++ * Flag a session as terminated. ++ * ++ * This function is invoked by the POSIX signal handlers, when ++ * registered using fuse_set_signal_handlers(). It will cause any ++ * running event loops to terminate on the next opportunity. ++ * ++ * @param se the session ++ */ ++void fuse_session_exit(struct fuse_session *se); ++ ++/** ++ * Reset the terminated flag of a session ++ * ++ * @param se the session ++ */ ++void fuse_session_reset(struct fuse_session *se); ++ ++/** ++ * Query the terminated flag of a session ++ * ++ * @param se the session ++ * @return 1 if exited, 0 if not exited ++ */ ++int fuse_session_exited(struct fuse_session *se); ++ ++/** ++ * Ensure that file system is unmounted. ++ * ++ * In regular operation, the file system is typically unmounted by the ++ * user calling umount(8) or fusermount(1), which then terminates the ++ * FUSE session loop. However, the session loop may also terminate as ++ * a result of an explicit call to fuse_session_exit() (e.g. by a ++ * signal handler installed by fuse_set_signal_handler()). In this ++ * case the filesystem remains mounted, but any attempt to access it ++ * will block (while the filesystem process is still running) or give ++ * an ESHUTDOWN error (after the filesystem process has terminated). ++ * ++ * If the communication channel with the FUSE kernel module is still ++ * open (i.e., if the session loop was terminated by an explicit call ++ * to fuse_session_exit()), this function will close it and unmount ++ * the filesystem. If the communication channel has been closed by the ++ * kernel, this method will do (almost) nothing. ++ * ++ * NOTE: The above semantics mean that if the connection to the kernel ++ * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file, ++ * this method will *not* unmount the filesystem. ++ * ++ * @param se the session ++ */ ++void fuse_session_unmount(struct fuse_session *se); ++ ++/** ++ * Destroy a session ++ * ++ * @param se the session ++ */ ++void fuse_session_destroy(struct fuse_session *se); ++ ++/* ----------------------------------------------------------- * ++ * Custom event loop support * ++ * ----------------------------------------------------------- */ ++ ++/** ++ * Return file descriptor for communication with kernel. ++ * ++ * The file selector can be used to integrate FUSE with a custom event ++ * loop. Whenever data is available for reading on the provided fd, ++ * the event loop should call `fuse_session_receive_buf` followed by ++ * `fuse_session_process_buf` to process the request. ++ * ++ * The returned file descriptor is valid until `fuse_session_unmount` ++ * is called. ++ * ++ * @param se the session ++ * @return a file descriptor ++ */ ++int fuse_session_fd(struct fuse_session *se); ++ ++/** ++ * Process a raw request supplied in a generic buffer ++ * ++ * The fuse_buf may contain a memory buffer or a pipe file descriptor. ++ * ++ * @param se the session ++ * @param buf the fuse_buf containing the request ++ */ ++void fuse_session_process_buf(struct fuse_session *se, ++ const struct fuse_buf *buf); ++ ++/** ++ * Read a raw request from the kernel into the supplied buffer. ++ * ++ * Depending on file system options, system capabilities, and request ++ * size the request is either read into a memory buffer or spliced ++ * into a temporary pipe. ++ * ++ * @param se the session ++ * @param buf the fuse_buf to store the request in ++ * @return the actual size of the raw request, or -errno on error ++ */ ++int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h +new file mode 100644 +index 0000000..2f6663e +--- /dev/null ++++ b/tools/virtiofsd/fuse_misc.h +@@ -0,0 +1,59 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB ++*/ ++ ++#include ++ ++/* ++ Versioned symbols cannot be used in some cases because it ++ - confuse the dynamic linker in uClibc ++ - not supported on MacOSX (in MachO binary format) ++*/ ++#if (!defined(__UCLIBC__) && !defined(__APPLE__)) ++#define FUSE_SYMVER(x) __asm__(x) ++#else ++#define FUSE_SYMVER(x) ++#endif ++ ++#ifndef USE_UCLIBC ++#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL) ++#else ++/* Is this hack still needed? */ ++static inline void fuse_mutex_init(pthread_mutex_t *mut) ++{ ++ pthread_mutexattr_t attr; ++ pthread_mutexattr_init(&attr); ++ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); ++ pthread_mutex_init(mut, &attr); ++ pthread_mutexattr_destroy(&attr); ++} ++#endif ++ ++#ifdef HAVE_STRUCT_STAT_ST_ATIM ++/* Linux */ ++#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec) ++#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec) ++#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec) ++#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val) ++#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val) ++#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val) ++#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC) ++/* FreeBSD */ ++#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec) ++#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec) ++#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec) ++#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val) ++#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val) ++#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val) ++#else ++#define ST_ATIM_NSEC(stbuf) 0 ++#define ST_CTIM_NSEC(stbuf) 0 ++#define ST_MTIM_NSEC(stbuf) 0 ++#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0) ++#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0) ++#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0) ++#endif +diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h +new file mode 100644 +index 0000000..d8573e7 +--- /dev/null ++++ b/tools/virtiofsd/fuse_opt.h +@@ -0,0 +1,271 @@ ++/* ++ FUSE: Filesystem in Userspace ++ Copyright (C) 2001-2007 Miklos Szeredi ++ ++ This program can be distributed under the terms of the GNU LGPLv2. ++ See the file COPYING.LIB. ++*/ ++ ++#ifndef FUSE_OPT_H_ ++#define FUSE_OPT_H_ ++ ++/** @file ++ * ++ * This file defines the option parsing interface of FUSE ++ */ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/** ++ * Option description ++ * ++ * This structure describes a single option, and action associated ++ * with it, in case it matches. ++ * ++ * More than one such match may occur, in which case the action for ++ * each match is executed. ++ * ++ * There are three possible actions in case of a match: ++ * ++ * i) An integer (int or unsigned) variable determined by 'offset' is ++ * set to 'value' ++ * ++ * ii) The processing function is called, with 'value' as the key ++ * ++ * iii) An integer (any) or string (char *) variable determined by ++ * 'offset' is set to the value of an option parameter ++ * ++ * 'offset' should normally be either set to ++ * ++ * - 'offsetof(struct foo, member)' actions i) and iii) ++ * ++ * - -1 action ii) ++ * ++ * The 'offsetof()' macro is defined in the header. ++ * ++ * The template determines which options match, and also have an ++ * effect on the action. Normally the action is either i) or ii), but ++ * if a format is present in the template, then action iii) is ++ * performed. ++ * ++ * The types of templates are: ++ * ++ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only ++ * themselves. Invalid values are "--" and anything beginning ++ * with "-o" ++ * ++ * 2) "foo", "foo-bar", etc. These match "-ofoo", "-ofoo-bar" or ++ * the relevant option in a comma separated option list ++ * ++ * 3) "bar=", "--foo=", etc. These are variations of 1) and 2) ++ * which have a parameter ++ * ++ * 4) "bar=%s", "--foo=%lu", etc. Same matching as above but perform ++ * action iii). ++ * ++ * 5) "-x ", etc. Matches either "-xparam" or "-x param" as ++ * two separate arguments ++ * ++ * 6) "-x %s", etc. Combination of 4) and 5) ++ * ++ * If the format is "%s", memory is allocated for the string unlike with ++ * scanf(). The previous value (if non-NULL) stored at the this location is ++ * freed. ++ */ ++struct fuse_opt { ++ /** Matching template and optional parameter formatting */ ++ const char *templ; ++ ++ /** ++ * Offset of variable within 'data' parameter of fuse_opt_parse() ++ * or -1 ++ */ ++ unsigned long offset; ++ ++ /** ++ * Value to set the variable to, or to be passed as 'key' to the ++ * processing function. Ignored if template has a format ++ */ ++ int value; ++}; ++ ++/** ++ * Key option. In case of a match, the processing function will be ++ * called with the specified key. ++ */ ++#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } ++ ++/** ++ * Last option. An array of 'struct fuse_opt' must end with a NULL ++ * template value ++ */ ++#define FUSE_OPT_END { NULL, 0, 0 } ++ ++/** ++ * Argument list ++ */ ++struct fuse_args { ++ /** Argument count */ ++ int argc; ++ ++ /** Argument vector. NULL terminated */ ++ char **argv; ++ ++ /** Is 'argv' allocated? */ ++ int allocated; ++}; ++ ++/** ++ * Initializer for 'struct fuse_args' ++ */ ++#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } ++ ++/** ++ * Key value passed to the processing function if an option did not ++ * match any template ++ */ ++#define FUSE_OPT_KEY_OPT -1 ++ ++/** ++ * Key value passed to the processing function for all non-options ++ * ++ * Non-options are the arguments beginning with a character other than ++ * '-' or all arguments after the special '--' option ++ */ ++#define FUSE_OPT_KEY_NONOPT -2 ++ ++/** ++ * Special key value for options to keep ++ * ++ * Argument is not passed to processing function, but behave as if the ++ * processing function returned 1 ++ */ ++#define FUSE_OPT_KEY_KEEP -3 ++ ++/** ++ * Special key value for options to discard ++ * ++ * Argument is not passed to processing function, but behave as if the ++ * processing function returned zero ++ */ ++#define FUSE_OPT_KEY_DISCARD -4 ++ ++/** ++ * Processing function ++ * ++ * This function is called if ++ * - option did not match any 'struct fuse_opt' ++ * - argument is a non-option ++ * - option did match and offset was set to -1 ++ * ++ * The 'arg' parameter will always contain the whole argument or ++ * option including the parameter if exists. A two-argument option ++ * ("-x foo") is always converted to single argument option of the ++ * form "-xfoo" before this function is called. ++ * ++ * Options of the form '-ofoo' are passed to this function without the ++ * '-o' prefix. ++ * ++ * The return value of this function determines whether this argument ++ * is to be inserted into the output argument vector, or discarded. ++ * ++ * @param data is the user data passed to the fuse_opt_parse() function ++ * @param arg is the whole argument or option ++ * @param key determines why the processing function was called ++ * @param outargs the current output argument list ++ * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept ++ */ ++typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, ++ struct fuse_args *outargs); ++ ++/** ++ * Option parsing function ++ * ++ * If 'args' was returned from a previous call to fuse_opt_parse() or ++ * it was constructed from ++ * ++ * A NULL 'args' is equivalent to an empty argument vector ++ * ++ * A NULL 'opts' is equivalent to an 'opts' array containing a single ++ * end marker ++ * ++ * A NULL 'proc' is equivalent to a processing function always ++ * returning '1' ++ * ++ * @param args is the input and output argument list ++ * @param data is the user data ++ * @param opts is the option description array ++ * @param proc is the processing function ++ * @return -1 on error, 0 on success ++ */ ++int fuse_opt_parse(struct fuse_args *args, void *data, ++ const struct fuse_opt opts[], fuse_opt_proc_t proc); ++ ++/** ++ * Add an option to a comma separated option list ++ * ++ * @param opts is a pointer to an option list, may point to a NULL value ++ * @param opt is the option to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_add_opt(char **opts, const char *opt); ++ ++/** ++ * Add an option, escaping commas, to a comma separated option list ++ * ++ * @param opts is a pointer to an option list, may point to a NULL value ++ * @param opt is the option to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_add_opt_escaped(char **opts, const char *opt); ++ ++/** ++ * Add an argument to a NULL terminated argument vector ++ * ++ * @param args is the structure containing the current argument list ++ * @param arg is the new argument to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_add_arg(struct fuse_args *args, const char *arg); ++ ++/** ++ * Add an argument at the specified position in a NULL terminated ++ * argument vector ++ * ++ * Adds the argument to the N-th position. This is useful for adding ++ * options at the beginning of the array which must not come after the ++ * special '--' option. ++ * ++ * @param args is the structure containing the current argument list ++ * @param pos is the position at which to add the argument ++ * @param arg is the new argument to add ++ * @return -1 on allocation error, 0 on success ++ */ ++int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg); ++ ++/** ++ * Free the contents of argument list ++ * ++ * The structure itself is not freed ++ * ++ * @param args is the structure containing the argument list ++ */ ++void fuse_opt_free_args(struct fuse_args *args); ++ ++ ++/** ++ * Check if an option matches ++ * ++ * @param opts is the option description array ++ * @param opt is the option to match ++ * @return 1 if a match is found, 0 if not ++ */ ++int fuse_opt_match(const struct fuse_opt opts[], const char *opt); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* FUSE_OPT_H_ */ +diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h +new file mode 100644 +index 0000000..6b77c33 +--- /dev/null ++++ b/tools/virtiofsd/passthrough_helpers.h +@@ -0,0 +1,76 @@ ++/* ++ * FUSE: Filesystem in Userspace ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE ++ */ ++ ++/* ++ * Creates files on the underlying file system in response to a FUSE_MKNOD ++ * operation ++ */ ++static int mknod_wrapper(int dirfd, const char *path, const char *link, ++ int mode, dev_t rdev) ++{ ++ int res; ++ ++ if (S_ISREG(mode)) { ++ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); ++ if (res >= 0) ++ res = close(res); ++ } else if (S_ISDIR(mode)) { ++ res = mkdirat(dirfd, path, mode); ++ } else if (S_ISLNK(mode) && link != NULL) { ++ res = symlinkat(link, dirfd, path); ++ } else if (S_ISFIFO(mode)) { ++ res = mkfifoat(dirfd, path, mode); ++#ifdef __FreeBSD__ ++ } else if (S_ISSOCK(mode)) { ++ struct sockaddr_un su; ++ int fd; ++ ++ if (strlen(path) >= sizeof(su.sun_path)) { ++ errno = ENAMETOOLONG; ++ return -1; ++ } ++ fd = socket(AF_UNIX, SOCK_STREAM, 0); ++ if (fd >= 0) { ++ /* ++ * We must bind the socket to the underlying file ++ * system to create the socket file, even though ++ * we'll never listen on this socket. ++ */ ++ su.sun_family = AF_UNIX; ++ strncpy(su.sun_path, path, sizeof(su.sun_path)); ++ res = bindat(dirfd, fd, (struct sockaddr*)&su, ++ sizeof(su)); ++ if (res == 0) ++ close(fd); ++ } else { ++ res = -1; ++ } ++#endif ++ } else { ++ res = mknodat(dirfd, path, mode, rdev); ++ } ++ ++ return res; ++} +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch b/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch new file mode 100644 index 0000000..7f9c5bb --- /dev/null +++ b/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch @@ -0,0 +1,271 @@ +From 80237df2b22eca685037456e65d149fed4654165 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:48 +0100 +Subject: [PATCH 017/116] virtiofsd: Remove unused enum fuse_buf_copy_flags +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-14-dgilbert@redhat.com> +Patchwork-id: 93465 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 013/112] virtiofsd: Remove unused enum fuse_buf_copy_flags +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Xiao Yang + +Signed-off-by: Xiao Yang +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8c3fe75e0308ba2f01d160ace534b7e386cea808) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 7 +++--- + tools/virtiofsd/fuse_common.h | 46 +--------------------------------------- + tools/virtiofsd/fuse_lowlevel.c | 13 +++++------- + tools/virtiofsd/fuse_lowlevel.h | 35 ++---------------------------- + tools/virtiofsd/passthrough_ll.c | 4 ++-- + 5 files changed, 13 insertions(+), 92 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 5df946c..4d507f3 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -171,7 +171,7 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, + + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) ++ size_t len) + { + int src_is_fd = src->flags & FUSE_BUF_IS_FD; + int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; +@@ -224,8 +224,7 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + return 1; + } + +-ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, +- enum fuse_buf_copy_flags flags) ++ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + { + size_t copied = 0; + +@@ -249,7 +248,7 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, + dst_len = dst->size - dstv->off; + len = min_size(src_len, dst_len); + +- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); ++ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len); + if (res < 0) { + if (!copied) { + return res; +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index bd9bf86..0cb33ac 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -605,48 +605,6 @@ enum fuse_buf_flags { + }; + + /** +- * Buffer copy flags +- */ +-enum fuse_buf_copy_flags { +- /** +- * Don't use splice(2) +- * +- * Always fall back to using read and write instead of +- * splice(2) to copy data from one file descriptor to another. +- * +- * If this flag is not set, then only fall back if splice is +- * unavailable. +- */ +- FUSE_BUF_NO_SPLICE = (1 << 1), +- +- /** +- * Force splice +- * +- * Always use splice(2) to copy data from one file descriptor +- * to another. If splice is not available, return -EINVAL. +- */ +- FUSE_BUF_FORCE_SPLICE = (1 << 2), +- +- /** +- * Try to move data with splice. +- * +- * If splice is used, try to move pages from the source to the +- * destination instead of copying. See documentation of +- * SPLICE_F_MOVE in splice(2) man page. +- */ +- FUSE_BUF_SPLICE_MOVE = (1 << 3), +- +- /** +- * Don't block on the pipe when copying data with splice +- * +- * Makes the operations on the pipe non-blocking (if the pipe +- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) +- * man page. +- */ +- FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), +-}; +- +-/** + * Single data buffer + * + * Generic data buffer for I/O, extended attributes, etc... Data may +@@ -741,11 +699,9 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + * + * @param dst destination buffer vector + * @param src source buffer vector +- * @param flags flags controlling the copy + * @return actual number of bytes copied or -errno on error + */ +-ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, +- enum fuse_buf_copy_flags flags); ++ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); + + /* + * Signal handling +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index eb0ec49..3da80de 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -490,16 +490,14 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, unsigned int flags) ++ struct fuse_bufvec *buf) + { + size_t len = fuse_buf_size(buf); +- (void)flags; + + return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + } + +-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) + { + struct iovec iov[2]; + struct fuse_out_header out; +@@ -511,7 +509,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, + out.unique = req->unique; + out.error = 0; + +- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); ++ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); + if (res <= 0) { + fuse_free_req(req); + return res; +@@ -1969,8 +1967,7 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + } + + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags) ++ off_t offset, struct fuse_bufvec *bufv) + { + struct fuse_out_header out; + struct fuse_notify_store_out outarg; +@@ -1999,7 +1996,7 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + +- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); ++ res = fuse_send_data_iov(se, NULL, iov, 2, bufv); + if (res > 0) { + res = -res; + } +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 12a84b4..2fa225d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1363,33 +1363,6 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + /** + * Reply with data copied/moved from buffer(s) + * +- * Zero copy data transfer ("splicing") will be used under +- * the following circumstances: +- * +- * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and +- * 2. the kernel supports splicing from the fuse device +- * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and +- * 3. *flags* does not contain FUSE_BUF_NO_SPLICE +- * 4. The amount of data that is provided in file-descriptor backed +- * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) +- * is at least twice the page size. +- * +- * In order for SPLICE_F_MOVE to be used, the following additional +- * conditions have to be fulfilled: +- * +- * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and +- * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in +- fuse_conn_info.capable), and +- * 3. *flags* contains FUSE_BUF_SPLICE_MOVE +- * +- * Note that, if splice is used, the data is actually spliced twice: +- * once into a temporary pipe (to prepend header data), and then again +- * into the kernel. If some of the provided buffers are memory-backed, +- * the data in them is copied in step one and spliced in step two. +- * +- * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags +- * are silently ignored. +- * + * Possible requests: + * read, readdir, getxattr, listxattr + * +@@ -1400,11 +1373,9 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); + * + * @param req request handle + * @param bufv buffer vector +- * @param flags flags controlling the copy + * @return zero for success, -errno for failure to send reply + */ +-int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv); + + /** + * Reply with data vector +@@ -1705,12 +1676,10 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + * @param ino the inode number + * @param offset the starting offset into the file to store to + * @param bufv buffer vector +- * @param flags flags controlling the copy + * @return zero for success, -errno for failure + */ + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv, +- enum fuse_buf_copy_flags flags); ++ off_t offset, struct fuse_bufvec *bufv); + + /* + * Utility functions +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9377718..126a56c 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -931,7 +931,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + buf.buf[0].fd = fi->fh; + buf.buf[0].pos = offset; + +- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); ++ fuse_reply_data(req, &buf); + } + + static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, +@@ -952,7 +952,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + out_buf.buf[0].size, (unsigned long)off); + } + +- res = fuse_buf_copy(&out_buf, in_buf, 0); ++ res = fuse_buf_copy(&out_buf, in_buf); + if (res < 0) { + fuse_reply_err(req, -res); + } else { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch b/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch new file mode 100644 index 0000000..e1a3cd1 --- /dev/null +++ b/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch @@ -0,0 +1,72 @@ +From b8d62021f28114f054571b96ec0cd4dad4476923 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:14 +0100 +Subject: [PATCH 103/116] virtiofsd: Reset O_DIRECT flag during file open +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-100-dgilbert@redhat.com> +Patchwork-id: 93553 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 099/112] virtiofsd: Reset O_DIRECT flag during file open +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +If an application wants to do direct IO and opens a file with O_DIRECT +in guest, that does not necessarily mean that we need to bypass page +cache on host as well. So reset this flag on host. + +If somebody needs to bypass page cache on host as well (and it is safe to +do so), we can add a knob in daemon later to control this behavior. + +I check virtio-9p and they do reset O_DIRECT flag. + +Signed-off-by: Vivek Goyal +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 65da4539803373ec4eec97ffc49ee90083e56efd) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ccbbec1..948cb19 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1721,6 +1721,13 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + ++ /* ++ * O_DIRECT in guest should not necessarily mean bypassing page ++ * cache on host as well. If somebody needs that behavior, it ++ * probably should be a configuration knob in daemon. ++ */ ++ fi->flags &= ~O_DIRECT; ++ + fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); + err = fd == -1 ? errno : 0; +@@ -1950,6 +1957,13 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fi->flags &= ~O_APPEND; + } + ++ /* ++ * O_DIRECT in guest should not necessarily mean bypassing page ++ * cache on host as well. If somebody needs that behavior, it ++ * probably should be a configuration knob in daemon. ++ */ ++ fi->flags &= ~O_DIRECT; ++ + sprintf(buf, "%i", lo_fd(req, ino)); + fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); + if (fd == -1) { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Send-replies-to-messages.patch b/kvm-virtiofsd-Send-replies-to-messages.patch new file mode 100644 index 0000000..5453fda --- /dev/null +++ b/kvm-virtiofsd-Send-replies-to-messages.patch @@ -0,0 +1,199 @@ +From bb1f691dc410ce11ac9675ced70e78a3ce2511b0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:03 +0100 +Subject: [PATCH 032/116] virtiofsd: Send replies to messages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-29-dgilbert@redhat.com> +Patchwork-id: 93485 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 028/112] virtiofsd: Send replies to messages +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Route fuse out messages back through the same queue elements +that had the command that triggered the request. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit df57ba919ec3edef9cc208d35685095e6e92713e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 4 ++ + tools/virtiofsd/fuse_virtio.c | 107 ++++++++++++++++++++++++++++++++++++++-- + tools/virtiofsd/fuse_virtio.h | 4 ++ + 3 files changed, 111 insertions(+), 4 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index af09fa2..380d93b 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -171,6 +171,10 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + } + } + ++ if (fuse_lowlevel_is_virtio(se)) { ++ return virtio_send_msg(se, ch, iov, count); ++ } ++ + abort(); /* virtio should have taken it before here */ + return 0; + } +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 3841b20..05d0e29 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -41,6 +41,9 @@ struct fv_QueueInfo { + /* Our queue index, corresponds to array position */ + int qidx; + int kick_fd; ++ ++ /* The element for the command currently being processed */ ++ VuVirtqElement *qe; + }; + + /* +@@ -121,6 +124,105 @@ static void copy_from_iov(struct fuse_buf *buf, size_t out_num, + } + } + ++/* ++ * Copy from one iov to another, the given number of bytes ++ * The caller must have checked sizes. ++ */ ++static void copy_iov(struct iovec *src_iov, int src_count, ++ struct iovec *dst_iov, int dst_count, size_t to_copy) ++{ ++ size_t dst_offset = 0; ++ /* Outer loop copies 'src' elements */ ++ while (to_copy) { ++ assert(src_count); ++ size_t src_len = src_iov[0].iov_len; ++ size_t src_offset = 0; ++ ++ if (src_len > to_copy) { ++ src_len = to_copy; ++ } ++ /* Inner loop copies contents of one 'src' to maybe multiple dst. */ ++ while (src_len) { ++ assert(dst_count); ++ size_t dst_len = dst_iov[0].iov_len - dst_offset; ++ if (dst_len > src_len) { ++ dst_len = src_len; ++ } ++ ++ memcpy(dst_iov[0].iov_base + dst_offset, ++ src_iov[0].iov_base + src_offset, dst_len); ++ src_len -= dst_len; ++ to_copy -= dst_len; ++ src_offset += dst_len; ++ dst_offset += dst_len; ++ ++ assert(dst_offset <= dst_iov[0].iov_len); ++ if (dst_offset == dst_iov[0].iov_len) { ++ dst_offset = 0; ++ dst_iov++; ++ dst_count--; ++ } ++ } ++ src_iov++; ++ src_count--; ++ } ++} ++ ++/* ++ * Called back by ll whenever it wants to send a reply/message back ++ * The 1st element of the iov starts with the fuse_out_header ++ * 'unique'==0 means it's a notify message. ++ */ ++int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count) ++{ ++ VuVirtqElement *elem; ++ VuVirtq *q; ++ ++ assert(count >= 1); ++ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); ++ ++ struct fuse_out_header *out = iov[0].iov_base; ++ /* TODO: Endianness! */ ++ ++ size_t tosend_len = iov_size(iov, count); ++ ++ /* unique == 0 is notification, which we don't support */ ++ assert(out->unique); ++ /* For virtio we always have ch */ ++ assert(ch); ++ elem = ch->qi->qe; ++ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ ++ /* The 'in' part of the elem is to qemu */ ++ unsigned int in_num = elem->in_num; ++ struct iovec *in_sg = elem->in_sg; ++ size_t in_len = iov_size(in_sg, in_num); ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", ++ __func__, elem->index, in_num, in_len); ++ ++ /* ++ * The elem should have room for a 'fuse_out_header' (out from fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (in_len < sizeof(struct fuse_out_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", ++ __func__, elem->index); ++ return -E2BIG; ++ } ++ if (in_len < tosend_len) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", ++ __func__, elem->index, tosend_len); ++ return -E2BIG; ++ } ++ ++ copy_iov(iov, count, in_sg, in_num, tosend_len); ++ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); ++ vu_queue_notify(&se->virtio_dev->dev, q); ++ ++ return 0; ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { +@@ -226,13 +328,10 @@ static void *fv_queue_thread(void *opaque) + + /* TODO! Endianness of header */ + +- /* TODO: Fixup fuse_send_msg */ + /* TODO: Add checks for fuse_session_exited */ + fuse_session_process_buf_int(se, &fbuf, &ch); + +- /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ +- vu_queue_notify(dev, q); +- ++ qi->qe = NULL; + free(elem); + elem = NULL; + } +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index 23026d6..135a148 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -22,4 +22,8 @@ int virtio_session_mount(struct fuse_session *se); + + int virtio_loop(struct fuse_session *se); + ++ ++int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, ++ struct iovec *iov, int count); ++ + #endif +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Start-queue-threads.patch b/kvm-virtiofsd-Start-queue-threads.patch new file mode 100644 index 0000000..8b03cd6 --- /dev/null +++ b/kvm-virtiofsd-Start-queue-threads.patch @@ -0,0 +1,165 @@ +From 38282d996cde61261211160577b366b83cad8012 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:00 +0100 +Subject: [PATCH 029/116] virtiofsd: Start queue threads +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-26-dgilbert@redhat.com> +Patchwork-id: 93479 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 025/112] virtiofsd: Start queue threads +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Start a thread for each queue when we get notified it's been started. + +Signed-off-by: Dr. David Alan Gilbert +fix by: +Signed-off-by: Jun Piao +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e4c55a3c144493b436e40031e2eed61a84eca47b) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 89 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 89 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 4819e56..2a94bb3 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -11,6 +11,7 @@ + * See the file COPYING.LIB + */ + ++#include "qemu/osdep.h" + #include "fuse_virtio.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" +@@ -30,6 +31,15 @@ + + #include "contrib/libvhost-user/libvhost-user.h" + ++struct fv_QueueInfo { ++ pthread_t thread; ++ struct fv_VuDev *virtio_dev; ++ ++ /* Our queue index, corresponds to array position */ ++ int qidx; ++ int kick_fd; ++}; ++ + /* + * We pass the dev element into libvhost-user + * and then use it to get back to the outer +@@ -38,6 +48,13 @@ + struct fv_VuDev { + VuDev dev; + struct fuse_session *se; ++ ++ /* ++ * The following pair of fields are only accessed in the main ++ * virtio_loop ++ */ ++ size_t nqueues; ++ struct fv_QueueInfo **qi; + }; + + /* From spec */ +@@ -83,6 +100,75 @@ static void fv_panic(VuDev *dev, const char *err) + exit(EXIT_FAILURE); + } + ++static void *fv_queue_thread(void *opaque) ++{ ++ struct fv_QueueInfo *qi = opaque; ++ fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, ++ qi->qidx, qi->kick_fd); ++ while (1) { ++ /* TODO */ ++ } ++ ++ return NULL; ++} ++ ++/* Callback from libvhost-user on start or stop of a queue */ ++static void fv_queue_set_started(VuDev *dev, int qidx, bool started) ++{ ++ struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev); ++ struct fv_QueueInfo *ourqi; ++ ++ fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx, ++ started); ++ assert(qidx >= 0); ++ ++ /* ++ * Ignore additional request queues for now. passthrough_ll.c must be ++ * audited for thread-safety issues first. It was written with a ++ * well-behaved client in mind and may not protect against all types of ++ * races yet. ++ */ ++ if (qidx > 1) { ++ fuse_log(FUSE_LOG_ERR, ++ "%s: multiple request queues not yet implemented, please only " ++ "configure 1 request queue\n", ++ __func__); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (started) { ++ /* Fire up a thread to watch this queue */ ++ if (qidx >= vud->nqueues) { ++ vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0])); ++ assert(vud->qi); ++ memset(vud->qi + vud->nqueues, 0, ++ sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues))); ++ vud->nqueues = qidx + 1; ++ } ++ if (!vud->qi[qidx]) { ++ vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1); ++ assert(vud->qi[qidx]); ++ vud->qi[qidx]->virtio_dev = vud; ++ vud->qi[qidx]->qidx = qidx; ++ } else { ++ /* Shouldn't have been started */ ++ assert(vud->qi[qidx]->kick_fd == -1); ++ } ++ ourqi = vud->qi[qidx]; ++ ourqi->kick_fd = dev->vq[qidx].kick_fd; ++ if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { ++ fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", ++ __func__, qidx); ++ assert(0); ++ } ++ } else { ++ /* TODO: Kill the thread */ ++ assert(qidx < vud->nqueues); ++ ourqi = vud->qi[qidx]; ++ ourqi->kick_fd = -1; ++ } ++} ++ + static bool fv_queue_order(VuDev *dev, int qidx) + { + return false; +@@ -92,6 +178,9 @@ static const VuDevIface fv_iface = { + .get_features = fv_get_features, + .set_features = fv_set_features, + ++ /* Don't need process message, we've not got any at vhost-user level */ ++ .queue_set_started = fv_queue_set_started, ++ + .queue_is_processed_in_order = fv_queue_order, + }; + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Start-reading-commands-from-queue.patch b/kvm-virtiofsd-Start-reading-commands-from-queue.patch new file mode 100644 index 0000000..2022480 --- /dev/null +++ b/kvm-virtiofsd-Start-reading-commands-from-queue.patch @@ -0,0 +1,200 @@ +From b4af2eff8ecadb4e2c9520602455f77fac2cb943 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:02 +0100 +Subject: [PATCH 031/116] virtiofsd: Start reading commands from queue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-28-dgilbert@redhat.com> +Patchwork-id: 93484 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 027/112] virtiofsd: Start reading commands from queue +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Pop queue elements off queues, copy the data from them and +pass that to fuse. + + Note: 'out' in a VuVirtqElement is from QEMU + 'in' in libfuse is into the daemon + + So we read from the out iov's to get a fuse_in_header + +When we get a kick we've got to read all the elements until the queue +is empty. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b509e1228b3e5eb83c14819045988999fc2dbd1b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 2 + + tools/virtiofsd/fuse_virtio.c | 99 +++++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 98 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index ec04449..1126723 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -14,6 +14,7 @@ + #include "fuse_lowlevel.h" + + struct fv_VuDev; ++struct fv_QueueInfo; + + struct fuse_req { + struct fuse_session *se; +@@ -75,6 +76,7 @@ struct fuse_chan { + pthread_mutex_t lock; + int ctr; + int fd; ++ struct fv_QueueInfo *qi; + }; + + /** +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 05e7258..3841b20 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -12,6 +12,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/iov.h" + #include "fuse_virtio.h" + #include "fuse_i.h" + #include "standard-headers/linux/fuse.h" +@@ -32,6 +33,7 @@ + + #include "contrib/libvhost-user/libvhost-user.h" + ++struct fv_VuDev; + struct fv_QueueInfo { + pthread_t thread; + struct fv_VuDev *virtio_dev; +@@ -101,10 +103,41 @@ static void fv_panic(VuDev *dev, const char *err) + exit(EXIT_FAILURE); + } + ++/* ++ * Copy from an iovec into a fuse_buf (memory only) ++ * Caller must ensure there is space ++ */ ++static void copy_from_iov(struct fuse_buf *buf, size_t out_num, ++ const struct iovec *out_sg) ++{ ++ void *dest = buf->mem; ++ ++ while (out_num) { ++ size_t onelen = out_sg->iov_len; ++ memcpy(dest, out_sg->iov_base, onelen); ++ dest += onelen; ++ out_sg++; ++ out_num--; ++ } ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { + struct fv_QueueInfo *qi = opaque; ++ struct VuDev *dev = &qi->virtio_dev->dev; ++ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ struct fuse_session *se = qi->virtio_dev->se; ++ struct fuse_chan ch; ++ struct fuse_buf fbuf; ++ ++ fbuf.mem = NULL; ++ fbuf.flags = 0; ++ ++ fuse_mutex_init(&ch.lock); ++ ch.fd = (int)0xdaff0d111; ++ ch.qi = qi; ++ + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); + while (1) { +@@ -141,11 +174,71 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); + break; + } +- if (qi->virtio_dev->se->debug) { +- fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, +- qi->qidx, (size_t)evalue); ++ /* out is from guest, in is too guest */ ++ unsigned int in_bytes, out_bytes; ++ vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", ++ __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); ++ ++ while (1) { ++ /* ++ * An element contains one request and the space to send our ++ * response They're spread over multiple descriptors in a ++ * scatter/gather set and we can't trust the guest to keep them ++ * still; so copy in/out. ++ */ ++ VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); ++ if (!elem) { ++ break; ++ } ++ ++ if (!fbuf.mem) { ++ fbuf.mem = malloc(se->bufsize); ++ assert(fbuf.mem); ++ assert(se->bufsize > sizeof(struct fuse_in_header)); ++ } ++ /* The 'out' part of the elem is from qemu */ ++ unsigned int out_num = elem->out_num; ++ struct iovec *out_sg = elem->out_sg; ++ size_t out_len = iov_size(out_sg, out_num); ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: elem %d: with %d out desc of length %zd\n", __func__, ++ elem->index, out_num, out_len); ++ ++ /* ++ * The elem should contain a 'fuse_in_header' (in to fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (out_len < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", ++ __func__, elem->index); ++ assert(0); /* TODO */ ++ } ++ if (out_len > se->bufsize) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", ++ __func__, elem->index); ++ assert(0); /* TODO */ ++ } ++ copy_from_iov(&fbuf, out_num, out_sg); ++ fbuf.size = out_len; ++ ++ /* TODO! Endianness of header */ ++ ++ /* TODO: Fixup fuse_send_msg */ ++ /* TODO: Add checks for fuse_session_exited */ ++ fuse_session_process_buf_int(se, &fbuf, &ch); ++ ++ /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ ++ vu_queue_notify(dev, q); ++ ++ free(elem); ++ elem = NULL; + } + } ++ pthread_mutex_destroy(&ch.lock); ++ free(fbuf.mem); + + return NULL; + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Start-wiring-up-vhost-user.patch b/kvm-virtiofsd-Start-wiring-up-vhost-user.patch new file mode 100644 index 0000000..7b50118 --- /dev/null +++ b/kvm-virtiofsd-Start-wiring-up-vhost-user.patch @@ -0,0 +1,247 @@ +From 020f593031b0b54e4c35faffea489b700aed6a72 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:57 +0100 +Subject: [PATCH 026/116] virtiofsd: Start wiring up vhost-user +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-23-dgilbert@redhat.com> +Patchwork-id: 93477 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 022/112] virtiofsd: Start wiring up vhost-user +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Listen on our unix socket for the connection from QEMU, when we get it +initialise vhost-user and dive into our own loop variant (currently +dummy). + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f6f3573c6f271af5ded63ce28589a113f7205c72) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 4 ++ + tools/virtiofsd/fuse_lowlevel.c | 5 +++ + tools/virtiofsd/fuse_lowlevel.h | 7 ++++ + tools/virtiofsd/fuse_virtio.c | 87 +++++++++++++++++++++++++++++++++++++++- + tools/virtiofsd/fuse_virtio.h | 2 + + tools/virtiofsd/passthrough_ll.c | 7 +--- + 6 files changed, 106 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 82d6ac7..ec04449 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -13,6 +13,8 @@ + #include "fuse.h" + #include "fuse_lowlevel.h" + ++struct fv_VuDev; ++ + struct fuse_req { + struct fuse_session *se; + uint64_t unique; +@@ -65,6 +67,8 @@ struct fuse_session { + size_t bufsize; + int error; + char *vu_socket_path; ++ int vu_socketfd; ++ struct fv_VuDev *virtio_dev; + }; + + struct fuse_chan { +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 5df124e..af09fa2 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2242,6 +2242,11 @@ void fuse_session_unmount(struct fuse_session *se) + { + } + ++int fuse_lowlevel_is_virtio(struct fuse_session *se) ++{ ++ return se->vu_socket_path != NULL; ++} ++ + #ifdef linux + int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) + { +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 2fa225d..f6b3470 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1755,6 +1755,13 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, + */ + int fuse_req_interrupted(fuse_req_t req); + ++/** ++ * Check if the session is connected via virtio ++ * ++ * @param se session object ++ * @return 1 if the session is a virtio session ++ */ ++int fuse_lowlevel_is_virtio(struct fuse_session *se); + + /* + * Inquiry functions +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index cbef6ff..2ae3c76 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -19,18 +19,78 @@ + + #include + #include ++#include + #include + #include + #include + #include + #include + ++#include "contrib/libvhost-user/libvhost-user.h" ++ ++/* ++ * We pass the dev element into libvhost-user ++ * and then use it to get back to the outer ++ * container for other data. ++ */ ++struct fv_VuDev { ++ VuDev dev; ++ struct fuse_session *se; ++}; ++ + /* From spec */ + struct virtio_fs_config { + char tag[36]; + uint32_t num_queues; + }; + ++/* ++ * Callback from libvhost-user if there's a new fd we're supposed to listen ++ * to, typically a queue kick? ++ */ ++static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb, ++ void *data) ++{ ++ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); ++} ++ ++/* ++ * Callback from libvhost-user if we're no longer supposed to listen on an fd ++ */ ++static void fv_remove_watch(VuDev *dev, int fd) ++{ ++ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); ++} ++ ++/* Callback from libvhost-user to panic */ ++static void fv_panic(VuDev *dev, const char *err) ++{ ++ fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err); ++ /* TODO: Allow reconnects?? */ ++ exit(EXIT_FAILURE); ++} ++ ++static bool fv_queue_order(VuDev *dev, int qidx) ++{ ++ return false; ++} ++ ++static const VuDevIface fv_iface = { ++ /* TODO: Add other callbacks */ ++ .queue_is_processed_in_order = fv_queue_order, ++}; ++ ++int virtio_loop(struct fuse_session *se) ++{ ++ fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); ++ ++ while (1) { ++ /* TODO: Add stuffing */ ++ } ++ ++ fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); ++} ++ + int virtio_session_mount(struct fuse_session *se) + { + struct sockaddr_un un; +@@ -75,5 +135,30 @@ int virtio_session_mount(struct fuse_session *se) + return -1; + } + +- return -1; ++ fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", ++ __func__); ++ int data_sock = accept(listen_sock, NULL, NULL); ++ if (data_sock == -1) { ++ fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); ++ close(listen_sock); ++ return -1; ++ } ++ close(listen_sock); ++ fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", ++ __func__); ++ ++ /* TODO: Some cleanup/deallocation! */ ++ se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1); ++ if (!se->virtio_dev) { ++ fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__); ++ close(data_sock); ++ return -1; ++ } ++ ++ se->vu_socketfd = data_sock; ++ se->virtio_dev->se = se; ++ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, ++ fv_remove_watch, &fv_iface); ++ ++ return 0; + } +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index 8f2edb6..23026d6 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -20,4 +20,6 @@ struct fuse_session; + + int virtio_session_mount(struct fuse_session *se); + ++int virtio_loop(struct fuse_session *se); ++ + #endif +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index fc9b264..037c5d7 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -36,6 +36,7 @@ + */ + + #include "qemu/osdep.h" ++#include "fuse_virtio.h" + #include "fuse_lowlevel.h" + #include + #include +@@ -1395,11 +1396,7 @@ int main(int argc, char *argv[]) + fuse_daemonize(opts.foreground); + + /* Block until ctrl+c or fusermount -u */ +- if (opts.singlethread) { +- ret = fuse_session_loop(se); +- } else { +- ret = fuse_session_loop_mt(se, opts.clone_fd); +- } ++ ret = virtio_loop(se); + + fuse_session_unmount(se); + err_out3: +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Support-remote-posix-locks.patch b/kvm-virtiofsd-Support-remote-posix-locks.patch new file mode 100644 index 0000000..e60364a --- /dev/null +++ b/kvm-virtiofsd-Support-remote-posix-locks.patch @@ -0,0 +1,355 @@ +From 8e46d0862c4c204f92c08ce2ae961921f270efb5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:03 +0100 +Subject: [PATCH 092/116] virtiofsd: Support remote posix locks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-89-dgilbert@redhat.com> +Patchwork-id: 93537 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 088/112] virtiofsd: Support remote posix locks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +Doing posix locks with-in guest kernel are not sufficient if a file/dir +is being shared by multiple guests. So we need the notion of daemon doing +the locks which are visible to rest of the guests. + +Given posix locks are per process, one can not call posix lock API on host, +otherwise bunch of basic posix locks properties are broken. For example, +If two processes (A and B) in guest open the file and take locks on different +sections of file, if one of the processes closes the fd, it will close +fd on virtiofsd and all posix locks on file will go away. This means if +process A closes the fd, then locks of process B will go away too. + +Similar other problems exist too. + +This patch set tries to emulate posix locks while using open file +description locks provided on Linux. + +Daemon provides two options (-o posix_lock, -o no_posix_lock) to enable +or disable posix locking in daemon. By default it is enabled. + +There are few issues though. + +- GETLK() returns pid of process holding lock. As we are emulating locks + using OFD, and these locks are not per process and don't return pid + of process, so GETLK() in guest does not reuturn process pid. + +- As of now only F_SETLK is supported and not F_SETLKW. We can't block + the thread in virtiofsd for arbitrary long duration as there is only + one thread serving the queue. That means unlock request will not make + it to daemon and F_SETLKW will block infinitely and bring virtio-fs + to a halt. This is a solvable problem though and will require significant + changes in virtiofsd and kernel. Left as a TODO item for now. + +Signed-off-by: Vivek Goyal +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0e81414c54161296212f6bc8a1c70526c4a9755a) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 3 + + tools/virtiofsd/passthrough_ll.c | 189 +++++++++++++++++++++++++++++++++++++++ + 2 files changed, 192 insertions(+) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5672024..33749bf 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -156,6 +156,9 @@ void fuse_cmdline_help(void) + " allowed (default: 10)\n" + " -o norace disable racy fallback\n" + " default: false\n" ++ " -o posix_lock|no_posix_lock\n" ++ " enable/disable remote posix lock\n" ++ " default: posix_lock\n" + " -o readdirplus|no_readdirplus\n" + " enable/disable readirplus\n" + " default: readdirplus except with " +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 05b5f89..9414935 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -67,6 +67,12 @@ + #include "passthrough_helpers.h" + #include "seccomp.h" + ++/* Keep track of inode posix locks for each owner. */ ++struct lo_inode_plock { ++ uint64_t lock_owner; ++ int fd; /* fd for OFD locks */ ++}; ++ + struct lo_map_elem { + union { + struct lo_inode *inode; +@@ -95,6 +101,8 @@ struct lo_inode { + struct lo_key key; + uint64_t refcount; /* protected by lo->mutex */ + fuse_ino_t fuse_ino; ++ pthread_mutex_t plock_mutex; ++ GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ + }; + + struct lo_cred { +@@ -114,6 +122,7 @@ struct lo_data { + int norace; + int writeback; + int flock; ++ int posix_lock; + int xattr; + char *source; + double timeout; +@@ -137,6 +146,8 @@ static const struct fuse_opt lo_opts[] = { + { "source=%s", offsetof(struct lo_data, source), 0 }, + { "flock", offsetof(struct lo_data, flock), 1 }, + { "no_flock", offsetof(struct lo_data, flock), 0 }, ++ { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, ++ { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, + { "xattr", offsetof(struct lo_data, xattr), 1 }, + { "no_xattr", offsetof(struct lo_data, xattr), 0 }, + { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, +@@ -485,6 +496,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } ++ ++ if (conn->capable & FUSE_CAP_POSIX_LOCKS) { ++ if (lo->posix_lock) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); ++ conn->want |= FUSE_CAP_POSIX_LOCKS; ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); ++ conn->want &= ~FUSE_CAP_POSIX_LOCKS; ++ } ++ } ++ + if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || + lo->readdirplus_clear) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); +@@ -772,6 +794,19 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + return p; + } + ++/* value_destroy_func for posix_locks GHashTable */ ++static void posix_locks_value_destroy(gpointer data) ++{ ++ struct lo_inode_plock *plock = data; ++ ++ /* ++ * We had used open() for locks and had only one fd. So ++ * closing this fd should release all OFD locks. ++ */ ++ close(plock->fd); ++ free(plock); ++} ++ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct fuse_entry_param *e) + { +@@ -825,6 +860,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + newfd = -1; + inode->key.ino = e->attr.st_ino; + inode->key.dev = e->attr.st_dev; ++ pthread_mutex_init(&inode->plock_mutex, NULL); ++ inode->posix_locks = g_hash_table_new_full( ++ g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); + + pthread_mutex_lock(&lo->mutex); + inode->fuse_ino = lo_add_inode_mapping(req, inode); +@@ -1160,6 +1198,11 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + if (!inode->refcount) { + lo_map_remove(&lo->ino_map, inode->fuse_ino); + g_hash_table_remove(lo->inodes, &inode->key); ++ if (g_hash_table_size(inode->posix_locks)) { ++ fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); ++ } ++ g_hash_table_destroy(inode->posix_locks); ++ pthread_mutex_destroy(&inode->plock_mutex); + pthread_mutex_unlock(&lo->mutex); + close(inode->fd); + free(inode); +@@ -1516,6 +1559,136 @@ out: + } + } + ++/* Should be called with inode->plock_mutex held */ ++static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, ++ struct lo_inode *inode, ++ uint64_t lock_owner, ++ pid_t pid, int *err) ++{ ++ struct lo_inode_plock *plock; ++ char procname[64]; ++ int fd; ++ ++ plock = ++ g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); ++ ++ if (plock) { ++ return plock; ++ } ++ ++ plock = malloc(sizeof(struct lo_inode_plock)); ++ if (!plock) { ++ *err = ENOMEM; ++ return NULL; ++ } ++ ++ /* Open another instance of file which can be used for ofd locks. */ ++ sprintf(procname, "%i", inode->fd); ++ ++ /* TODO: What if file is not writable? */ ++ fd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (fd == -1) { ++ *err = errno; ++ free(plock); ++ return NULL; ++ } ++ ++ plock->lock_owner = lock_owner; ++ plock->fd = fd; ++ g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), ++ plock); ++ return plock; ++} ++ ++static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ struct lo_inode_plock *plock; ++ int ret, saverr = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_getlk(ino=%" PRIu64 ", flags=%d)" ++ " owner=0x%lx, l_type=%d l_start=0x%lx" ++ " l_len=0x%lx\n", ++ ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, ++ lock->l_len); ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ pthread_mutex_lock(&inode->plock_mutex); ++ plock = ++ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); ++ if (!plock) { ++ pthread_mutex_unlock(&inode->plock_mutex); ++ fuse_reply_err(req, ret); ++ return; ++ } ++ ++ ret = fcntl(plock->fd, F_OFD_GETLK, lock); ++ if (ret == -1) { ++ saverr = errno; ++ } ++ pthread_mutex_unlock(&inode->plock_mutex); ++ ++ if (saverr) { ++ fuse_reply_err(req, saverr); ++ } else { ++ fuse_reply_lock(req, lock); ++ } ++} ++ ++static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, ++ struct flock *lock, int sleep) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode; ++ struct lo_inode_plock *plock; ++ int ret, saverr = 0; ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_setlk(ino=%" PRIu64 ", flags=%d)" ++ " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d" ++ " l_start=0x%lx l_len=0x%lx\n", ++ ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, ++ lock->l_whence, lock->l_start, lock->l_len); ++ ++ if (sleep) { ++ fuse_reply_err(req, EOPNOTSUPP); ++ return; ++ } ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ pthread_mutex_lock(&inode->plock_mutex); ++ plock = ++ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); ++ ++ if (!plock) { ++ pthread_mutex_unlock(&inode->plock_mutex); ++ fuse_reply_err(req, ret); ++ return; ++ } ++ ++ /* TODO: Is it alright to modify flock? */ ++ lock->l_pid = 0; ++ ret = fcntl(plock->fd, F_OFD_SETLK, lock); ++ if (ret == -1) { ++ saverr = errno; ++ } ++ pthread_mutex_unlock(&inode->plock_mutex); ++ fuse_reply_err(req, saverr); ++} ++ + static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { +@@ -1617,6 +1790,19 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { + int res; + (void)ino; ++ struct lo_inode *inode; ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ /* An fd is going away. Cleanup associated posix locks */ ++ pthread_mutex_lock(&inode->plock_mutex); ++ g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); ++ pthread_mutex_unlock(&inode->plock_mutex); ++ + res = close(dup(lo_fi_fd(req, fi))); + fuse_reply_err(req, res == -1 ? errno : 0); + } +@@ -2080,6 +2266,8 @@ static struct fuse_lowlevel_ops lo_oper = { + .releasedir = lo_releasedir, + .fsyncdir = lo_fsyncdir, + .create = lo_create, ++ .getlk = lo_getlk, ++ .setlk = lo_setlk, + .open = lo_open, + .release = lo_release, + .flush = lo_flush, +@@ -2434,6 +2622,7 @@ int main(int argc, char *argv[]) + struct lo_data lo = { + .debug = 0, + .writeback = 0, ++ .posix_lock = 1, + .proc_self_fd = -1, + }; + struct lo_map_elem *root_elem; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Trim-down-imported-files.patch b/kvm-virtiofsd-Trim-down-imported-files.patch new file mode 100644 index 0000000..f3f1e85 --- /dev/null +++ b/kvm-virtiofsd-Trim-down-imported-files.patch @@ -0,0 +1,1582 @@ +From 9d3788b1c2fa5cb4f14e292232a05c6a5217802d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:44 +0100 +Subject: [PATCH 013/116] virtiofsd: Trim down imported files +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-10-dgilbert@redhat.com> +Patchwork-id: 93463 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 009/112] virtiofsd: Trim down imported files +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +There's a lot of the original fuse code we don't need; trim them down. + +Signed-off-by: Dr. David Alan Gilbert +with additional trimming by: +Signed-off-by: Misono Tomohiro +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Xiao Yang +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a3e23f325439a290c504d6bbc48c2e742149ecab) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 71 +--- + tools/virtiofsd/fuse.h | 46 --- + tools/virtiofsd/fuse_common.h | 32 -- + tools/virtiofsd/fuse_i.h | 41 --- + tools/virtiofsd/fuse_log.h | 8 - + tools/virtiofsd/fuse_lowlevel.c | 675 +--------------------------------- + tools/virtiofsd/fuse_lowlevel.h | 28 -- + tools/virtiofsd/fuse_opt.h | 8 - + tools/virtiofsd/helper.c | 143 ------- + tools/virtiofsd/passthrough_helpers.h | 26 -- + tools/virtiofsd/passthrough_ll.c | 1 - + 11 files changed, 8 insertions(+), 1071 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 5ab9b87..aefb7db 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -157,73 +157,6 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, + return copied; + } + +-#ifdef HAVE_SPLICE +-static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) +-{ +- int splice_flags = 0; +- off_t *srcpos = NULL; +- off_t *dstpos = NULL; +- off_t srcpos_val; +- off_t dstpos_val; +- ssize_t res; +- size_t copied = 0; +- +- if (flags & FUSE_BUF_SPLICE_MOVE) +- splice_flags |= SPLICE_F_MOVE; +- if (flags & FUSE_BUF_SPLICE_NONBLOCK) +- splice_flags |= SPLICE_F_NONBLOCK; +- +- if (src->flags & FUSE_BUF_FD_SEEK) { +- srcpos_val = src->pos + src_off; +- srcpos = &srcpos_val; +- } +- if (dst->flags & FUSE_BUF_FD_SEEK) { +- dstpos_val = dst->pos + dst_off; +- dstpos = &dstpos_val; +- } +- +- while (len) { +- res = splice(src->fd, srcpos, dst->fd, dstpos, len, +- splice_flags); +- if (res == -1) { +- if (copied) +- break; +- +- if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) +- return -errno; +- +- /* Maybe splice is not supported for this combination */ +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, +- len); +- } +- if (res == 0) +- break; +- +- copied += res; +- if (!(src->flags & FUSE_BUF_FD_RETRY) && +- !(dst->flags & FUSE_BUF_FD_RETRY)) { +- break; +- } +- +- len -= res; +- } +- +- return copied; +-} +-#else +-static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, +- const struct fuse_buf *src, size_t src_off, +- size_t len, enum fuse_buf_copy_flags flags) +-{ +- (void) flags; +- +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); +-} +-#endif +- +- + static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + const struct fuse_buf *src, size_t src_off, + size_t len, enum fuse_buf_copy_flags flags) +@@ -247,10 +180,8 @@ static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, + return fuse_buf_write(dst, dst_off, src, src_off, len); + } else if (!dst_is_fd) { + return fuse_buf_read(dst, dst_off, src, src_off, len); +- } else if (flags & FUSE_BUF_NO_SPLICE) { +- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); + } else { +- return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); ++ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); + } + } + +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +index 883f6e5..3202fba 100644 +--- a/tools/virtiofsd/fuse.h ++++ b/tools/virtiofsd/fuse.h +@@ -25,10 +25,6 @@ + #include + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /* ----------------------------------------------------------- * + * Basic FUSE API * + * ----------------------------------------------------------- */ +@@ -979,44 +975,6 @@ int fuse_loop(struct fuse *f); + void fuse_exit(struct fuse *f); + + /** +- * FUSE event loop with multiple threads +- * +- * Requests from the kernel are processed, and the appropriate +- * operations are called. Request are processed in parallel by +- * distributing them between multiple threads. +- * +- * For a description of the return value and the conditions when the +- * event loop exits, refer to the documentation of +- * fuse_session_loop(). +- * +- * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in +- * single-threaded mode, and that you will not have to worry about reentrancy, +- * though you will have to worry about recursive lookups. In single-threaded +- * mode, FUSE will wait for one callback to return before calling another. +- * +- * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make +- * multiple simultaneous calls into the various callback functions given by your +- * fuse_operations record. +- * +- * If you are using multiple threads, you can enjoy all the parallel execution +- * and interactive response benefits of threads, and you get to enjoy all the +- * benefits of race conditions and locking bugs, too. Ensure that any code used +- * in the callback function of fuse_operations is also thread-safe. +- * +- * @param f the FUSE handle +- * @param config loop configuration +- * @return see fuse_session_loop() +- * +- * See also: fuse_loop() +- */ +-#if FUSE_USE_VERSION < 32 +-int fuse_loop_mt_31(struct fuse *f, int clone_fd); +-#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) +-#else +-int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); +-#endif +- +-/** + * Get the current context + * + * The context is only valid for the duration of a filesystem +@@ -1268,8 +1226,4 @@ struct fuse_session *fuse_get_session(struct fuse *f); + */ + int fuse_open_channel(const char *mountpoint, const char *options); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_H_ */ +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index 2d686b2..bf8f8cc 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -28,10 +28,6 @@ + #define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) + #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /** + * Information about an open file. + * +@@ -100,30 +96,6 @@ struct fuse_file_info { + uint32_t poll_events; + }; + +-/** +- * Configuration parameters passed to fuse_session_loop_mt() and +- * fuse_loop_mt(). +- */ +-struct fuse_loop_config { +- /** +- * whether to use separate device fds for each thread +- * (may increase performance) +- */ +- int clone_fd; +- +- /** +- * The maximum number of available worker threads before they +- * start to get deleted when they become idle. If not +- * specified, the default is 10. +- * +- * Adjusting this has performance implications; a very small number +- * of threads in the pool will cause a lot of thread creation and +- * deletion overhead and performance may suffer. When set to 0, a new +- * thread will be created to service every operation. +- */ +- unsigned int max_idle_threads; +-}; +- + /************************************************************************** + * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * + **************************************************************************/ +@@ -802,10 +774,6 @@ void fuse_remove_signal_handlers(struct fuse_session *se); + # error only API version 30 or greater is supported + #endif + +-#ifdef __cplusplus +-} +-#endif +- + + /* + * This interface uses 64 bit off_t. +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index d38b630..b39522e 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -9,8 +9,6 @@ + #include "fuse.h" + #include "fuse_lowlevel.h" + +-struct mount_opts; +- + struct fuse_req { + struct fuse_session *se; + uint64_t unique; +@@ -45,7 +43,6 @@ struct fuse_session { + char *mountpoint; + volatile int exited; + int fd; +- struct mount_opts *mo; + int debug; + int deny_others; + struct fuse_lowlevel_ops op; +@@ -58,7 +55,6 @@ struct fuse_session { + struct fuse_req interrupts; + pthread_mutex_t lock; + int got_destroy; +- pthread_key_t pipe_key; + int broken_splice_nonblock; + uint64_t notify_ctr; + struct fuse_notify_req notify_list; +@@ -87,53 +83,16 @@ struct fuse_module { + int ctr; + }; + +-/* ----------------------------------------------------------- * +- * Channel interface (when using -o clone_fd) * +- * ----------------------------------------------------------- */ +- +-/** +- * Obtain counted reference to the channel +- * +- * @param ch the channel +- * @return the channel +- */ +-struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); +- +-/** +- * Drop counted reference to a channel +- * +- * @param ch the channel +- */ +-void fuse_chan_put(struct fuse_chan *ch); +- +-struct mount_opts *parse_mount_opts(struct fuse_args *args); +-void destroy_mount_opts(struct mount_opts *mo); +-void fuse_mount_version(void); +-unsigned get_max_read(struct mount_opts *o); +-void fuse_kern_unmount(const char *mountpoint, int fd); +-int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); +- + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + int count); + void fuse_free_req(fuse_req_t req); + +-void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); +- +-int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); +- +-int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, +- struct fuse_chan *ch); + void fuse_session_process_buf_int(struct fuse_session *se, + const struct fuse_buf *buf, struct fuse_chan *ch); + +-struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, +- size_t op_size, void *private_data); +-int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); +-int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); + + #define FUSE_MAX_MAX_PAGES 256 + #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 + + /* room needed in buffer to accommodate header */ + #define FUSE_BUFFER_HEADER_SIZE 0x1000 +- +diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h +index 5e112e0..0af700d 100644 +--- a/tools/virtiofsd/fuse_log.h ++++ b/tools/virtiofsd/fuse_log.h +@@ -16,10 +16,6 @@ + + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /** + * Log severity level + * +@@ -75,8 +71,4 @@ void fuse_set_log_func(fuse_log_func_t func); + */ + void fuse_log(enum fuse_log_level level, const char *fmt, ...); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_LOG_H_ */ +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index f2d7038..e6fa247 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -16,7 +16,6 @@ + #include "fuse_kernel.h" + #include "fuse_opt.h" + #include "fuse_misc.h" +-#include "mount_util.h" + + #include + #include +@@ -28,12 +27,6 @@ + #include + #include + +-#ifndef F_LINUX_SPECIFIC_BASE +-#define F_LINUX_SPECIFIC_BASE 1024 +-#endif +-#ifndef F_SETPIPE_SZ +-#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) +-#endif + + + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) +@@ -137,7 +130,6 @@ void fuse_free_req(fuse_req_t req) + req->u.ni.data = NULL; + list_del_req(req); + ctr = --req->ctr; +- fuse_chan_put(req->ch); + req->ch = NULL; + pthread_mutex_unlock(&se->lock); + if (!ctr) +@@ -184,19 +176,7 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + } + } + +- ssize_t res = writev(ch ? ch->fd : se->fd, +- iov, count); +- int err = errno; +- +- if (res == -1) { +- assert(se != NULL); +- +- /* ENOENT means the operation was interrupted */ +- if (!fuse_session_exited(se) && err != ENOENT) +- perror("fuse: writing device"); +- return -err; +- } +- ++ abort(); /* virtio should have taken it before here */ + return 0; + } + +@@ -480,10 +460,6 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + struct fuse_bufvec *buf, + size_t len) + { +- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); +- void *mbuf; +- int res; +- + /* Optimize common case */ + if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && + !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { +@@ -496,350 +472,10 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, + return fuse_send_msg(se, ch, iov, iov_count); + } + +- res = posix_memalign(&mbuf, pagesize, len); +- if (res != 0) +- return res; +- +- mem_buf.buf[0].mem = mbuf; +- res = fuse_buf_copy(&mem_buf, buf, 0); +- if (res < 0) { +- free(mbuf); +- return -res; +- } +- len = res; +- +- iov[iov_count].iov_base = mbuf; +- iov[iov_count].iov_len = len; +- iov_count++; +- res = fuse_send_msg(se, ch, iov, iov_count); +- free(mbuf); +- +- return res; +-} +- +-struct fuse_ll_pipe { +- size_t size; +- int can_grow; +- int pipe[2]; +-}; +- +-static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) +-{ +- close(llp->pipe[0]); +- close(llp->pipe[1]); +- free(llp); +-} +- +-#ifdef HAVE_SPLICE +-#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) +-static int fuse_pipe(int fds[2]) +-{ +- int rv = pipe(fds); +- +- if (rv == -1) +- return rv; +- +- if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || +- fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || +- fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || +- fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { +- close(fds[0]); +- close(fds[1]); +- rv = -1; +- } +- return rv; +-} +-#else +-static int fuse_pipe(int fds[2]) +-{ +- return pipe2(fds, O_CLOEXEC | O_NONBLOCK); +-} +-#endif +- +-static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) +-{ +- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); +- if (llp == NULL) { +- int res; +- +- llp = malloc(sizeof(struct fuse_ll_pipe)); +- if (llp == NULL) +- return NULL; +- +- res = fuse_pipe(llp->pipe); +- if (res == -1) { +- free(llp); +- return NULL; +- } +- +- /* +- *the default size is 16 pages on linux +- */ +- llp->size = pagesize * 16; +- llp->can_grow = 1; +- +- pthread_setspecific(se->pipe_key, llp); +- } +- +- return llp; +-} +-#endif +- +-static void fuse_ll_clear_pipe(struct fuse_session *se) +-{ +- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); +- if (llp) { +- pthread_setspecific(se->pipe_key, NULL); +- fuse_ll_pipe_free(llp); +- } +-} +- +-#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) +-static int read_back(int fd, char *buf, size_t len) +-{ +- int res; +- +- res = read(fd, buf, len); +- if (res == -1) { +- fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); +- return -EIO; +- } +- if (res != len) { +- fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); +- return -EIO; +- } ++ abort(); /* Will have taken vhost path */ + return 0; + } + +-static int grow_pipe_to_max(int pipefd) +-{ +- int max; +- int res; +- int maxfd; +- char buf[32]; +- +- maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); +- if (maxfd < 0) +- return -errno; +- +- res = read(maxfd, buf, sizeof(buf) - 1); +- if (res < 0) { +- int saved_errno; +- +- saved_errno = errno; +- close(maxfd); +- return -saved_errno; +- } +- close(maxfd); +- buf[res] = '\0'; +- +- max = atoi(buf); +- res = fcntl(pipefd, F_SETPIPE_SZ, max); +- if (res < 0) +- return -errno; +- return max; +-} +- +-static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, +- struct iovec *iov, int iov_count, +- struct fuse_bufvec *buf, unsigned int flags) +-{ +- int res; +- size_t len = fuse_buf_size(buf); +- struct fuse_out_header *out = iov[0].iov_base; +- struct fuse_ll_pipe *llp; +- int splice_flags; +- size_t pipesize; +- size_t total_fd_size; +- size_t idx; +- size_t headerlen; +- struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); +- +- if (se->broken_splice_nonblock) +- goto fallback; +- +- if (flags & FUSE_BUF_NO_SPLICE) +- goto fallback; +- +- total_fd_size = 0; +- for (idx = buf->idx; idx < buf->count; idx++) { +- if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { +- total_fd_size = buf->buf[idx].size; +- if (idx == buf->idx) +- total_fd_size -= buf->off; +- } +- } +- if (total_fd_size < 2 * pagesize) +- goto fallback; +- +- if (se->conn.proto_minor < 14 || +- !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) +- goto fallback; +- +- llp = fuse_ll_get_pipe(se); +- if (llp == NULL) +- goto fallback; +- +- +- headerlen = iov_length(iov, iov_count); +- +- out->len = headerlen + len; +- +- /* +- * Heuristic for the required pipe size, does not work if the +- * source contains less than page size fragments +- */ +- pipesize = pagesize * (iov_count + buf->count + 1) + out->len; +- +- if (llp->size < pipesize) { +- if (llp->can_grow) { +- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); +- if (res == -1) { +- res = grow_pipe_to_max(llp->pipe[0]); +- if (res > 0) +- llp->size = res; +- llp->can_grow = 0; +- goto fallback; +- } +- llp->size = res; +- } +- if (llp->size < pipesize) +- goto fallback; +- } +- +- +- res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); +- if (res == -1) +- goto fallback; +- +- if (res != headerlen) { +- res = -EIO; +- fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, +- headerlen); +- goto clear_pipe; +- } +- +- pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; +- pipe_buf.buf[0].fd = llp->pipe[1]; +- +- res = fuse_buf_copy(&pipe_buf, buf, +- FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); +- if (res < 0) { +- if (res == -EAGAIN || res == -EINVAL) { +- /* +- * Should only get EAGAIN on kernels with +- * broken SPLICE_F_NONBLOCK support (<= +- * 2.6.35) where this error or a short read is +- * returned even if the pipe itself is not +- * full +- * +- * EINVAL might mean that splice can't handle +- * this combination of input and output. +- */ +- if (res == -EAGAIN) +- se->broken_splice_nonblock = 1; +- +- pthread_setspecific(se->pipe_key, NULL); +- fuse_ll_pipe_free(llp); +- goto fallback; +- } +- res = -res; +- goto clear_pipe; +- } +- +- if (res != 0 && res < len) { +- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); +- void *mbuf; +- size_t now_len = res; +- /* +- * For regular files a short count is either +- * 1) due to EOF, or +- * 2) because of broken SPLICE_F_NONBLOCK (see above) +- * +- * For other inputs it's possible that we overflowed +- * the pipe because of small buffer fragments. +- */ +- +- res = posix_memalign(&mbuf, pagesize, len); +- if (res != 0) +- goto clear_pipe; +- +- mem_buf.buf[0].mem = mbuf; +- mem_buf.off = now_len; +- res = fuse_buf_copy(&mem_buf, buf, 0); +- if (res > 0) { +- char *tmpbuf; +- size_t extra_len = res; +- /* +- * Trickiest case: got more data. Need to get +- * back the data from the pipe and then fall +- * back to regular write. +- */ +- tmpbuf = malloc(headerlen); +- if (tmpbuf == NULL) { +- free(mbuf); +- res = ENOMEM; +- goto clear_pipe; +- } +- res = read_back(llp->pipe[0], tmpbuf, headerlen); +- free(tmpbuf); +- if (res != 0) { +- free(mbuf); +- goto clear_pipe; +- } +- res = read_back(llp->pipe[0], mbuf, now_len); +- if (res != 0) { +- free(mbuf); +- goto clear_pipe; +- } +- len = now_len + extra_len; +- iov[iov_count].iov_base = mbuf; +- iov[iov_count].iov_len = len; +- iov_count++; +- res = fuse_send_msg(se, ch, iov, iov_count); +- free(mbuf); +- return res; +- } +- free(mbuf); +- res = now_len; +- } +- len = res; +- out->len = headerlen + len; +- +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, success, outsize: %i (splice)\n", +- (unsigned long long) out->unique, out->len); +- } +- +- splice_flags = 0; +- if ((flags & FUSE_BUF_SPLICE_MOVE) && +- (se->conn.want & FUSE_CAP_SPLICE_MOVE)) +- splice_flags |= SPLICE_F_MOVE; +- +- res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, +- NULL, out->len, splice_flags); +- if (res == -1) { +- res = -errno; +- perror("fuse: splice from pipe"); +- goto clear_pipe; +- } +- if (res != out->len) { +- res = -EIO; +- fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", +- res, out->len); +- goto clear_pipe; +- } +- return 0; +- +-clear_pipe: +- fuse_ll_clear_pipe(se); +- return res; +- +-fallback: +- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); +-} +-#else + static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int iov_count, + struct fuse_bufvec *buf, unsigned int flags) +@@ -849,7 +485,6 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + + return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); + } +-#endif + + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags) +@@ -1408,16 +1043,11 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + if (bufv.buf[0].size < arg->size) { + fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); + fuse_reply_err(req, EIO); +- goto out; ++ return; + } + bufv.buf[0].size = arg->size; + + se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); +- +-out: +- /* Need to reset the pipe if ->write_buf() didn't consume all data */ +- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) +- fuse_ll_clear_pipe(se); + } + + static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) +@@ -2038,17 +1668,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + return; + } + +- unsigned max_read_mo = get_max_read(se->mo); +- if (se->conn.max_read != max_read_mo) { +- fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " +- "requested different maximum read size (%u vs %u)\n", +- se->conn.max_read, max_read_mo); +- fuse_reply_err(req, EPROTO); +- se->error = -EPROTO; +- fuse_session_exit(se); +- return; +- } +- + if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { + se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; + } +@@ -2364,8 +1983,6 @@ static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, + } + out: + free(rreq); +- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) +- fuse_ll_clear_pipe(se); + } + + int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +@@ -2496,7 +2113,6 @@ static struct { + [FUSE_RENAME2] = { do_rename2, "RENAME2" }, + [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, + [FUSE_LSEEK] = { do_lseek, "LSEEK" }, +- [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, + }; + + #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) +@@ -2509,21 +2125,6 @@ static const char *opname(enum fuse_opcode opcode) + return fuse_ll_ops[opcode].name; + } + +-static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, +- struct fuse_bufvec *src) +-{ +- ssize_t res = fuse_buf_copy(dst, src, 0); +- if (res < 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); +- return res; +- } +- if ((size_t)res < fuse_buf_size(dst)) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); +- return -1; +- } +- return 0; +-} +- + void fuse_session_process_buf(struct fuse_session *se, + const struct fuse_buf *buf) + { +@@ -2533,36 +2134,12 @@ void fuse_session_process_buf(struct fuse_session *se, + void fuse_session_process_buf_int(struct fuse_session *se, + const struct fuse_buf *buf, struct fuse_chan *ch) + { +- const size_t write_header_size = sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in); +- struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; +- struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); + struct fuse_in_header *in; + const void *inarg; + struct fuse_req *req; +- void *mbuf = NULL; + int err; +- int res; +- +- if (buf->flags & FUSE_BUF_IS_FD) { +- if (buf->size < tmpbuf.buf[0].size) +- tmpbuf.buf[0].size = buf->size; + +- mbuf = malloc(tmpbuf.buf[0].size); +- if (mbuf == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); +- goto clear_pipe; +- } +- tmpbuf.buf[0].mem = mbuf; +- +- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); +- if (res < 0) +- goto clear_pipe; +- +- in = mbuf; +- } else { +- in = buf->mem; +- } ++ in = buf->mem; + + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, +@@ -2584,14 +2161,14 @@ void fuse_session_process_buf_int(struct fuse_session *se, + }; + + fuse_send_msg(se, ch, &iov, 1); +- goto clear_pipe; ++ return; + } + + req->unique = in->unique; + req->ctx.uid = in->uid; + req->ctx.gid = in->gid; + req->ctx.pid = in->pid; +- req->ch = ch ? fuse_chan_get(ch) : NULL; ++ req->ch = ch; + + err = EIO; + if (!se->got_init) { +@@ -2627,28 +2204,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, + fuse_reply_err(intr, EAGAIN); + } + +- if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && +- (in->opcode != FUSE_WRITE || !se->op.write_buf) && +- in->opcode != FUSE_NOTIFY_REPLY) { +- void *newmbuf; +- +- err = ENOMEM; +- newmbuf = realloc(mbuf, buf->size); +- if (newmbuf == NULL) +- goto reply_err; +- mbuf = newmbuf; +- +- tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); +- tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; +- +- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); +- err = -res; +- if (res < 0) +- goto reply_err; +- +- in = mbuf; +- } +- + inarg = (void *) &in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) + do_write_buf(req, in->nodeid, inarg, buf); +@@ -2657,16 +2212,10 @@ void fuse_session_process_buf_int(struct fuse_session *se, + else + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + +-out_free: +- free(mbuf); + return; + + reply_err: + fuse_reply_err(req, err); +-clear_pipe: +- if (buf->flags & FUSE_BUF_IS_FD) +- fuse_ll_clear_pipe(se); +- goto out_free; + } + + #define LL_OPTION(n,o,v) \ +@@ -2684,7 +2233,6 @@ void fuse_lowlevel_version(void) + { + printf("using FUSE kernel interface version %i.%i\n", + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); +- fuse_mount_version(); + } + + void fuse_lowlevel_help(void) +@@ -2692,204 +2240,29 @@ void fuse_lowlevel_help(void) + /* These are not all options, but the ones that are + potentially of interest to an end-user */ + printf( +-" -o allow_other allow access by all users\n" + " -o allow_root allow access by root\n" +-" -o auto_unmount auto unmount on process termination\n"); ++); + } + + void fuse_session_destroy(struct fuse_session *se) + { +- struct fuse_ll_pipe *llp; +- + if (se->got_init && !se->got_destroy) { + if (se->op.destroy) + se->op.destroy(se->userdata); + } +- llp = pthread_getspecific(se->pipe_key); +- if (llp != NULL) +- fuse_ll_pipe_free(llp); +- pthread_key_delete(se->pipe_key); + pthread_mutex_destroy(&se->lock); + free(se->cuse_data); + if (se->fd != -1) + close(se->fd); +- destroy_mount_opts(se->mo); + free(se); + } + + +-static void fuse_ll_pipe_destructor(void *data) +-{ +- struct fuse_ll_pipe *llp = data; +- fuse_ll_pipe_free(llp); +-} +- +-int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) +-{ +- return fuse_session_receive_buf_int(se, buf, NULL); +-} +- +-int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, +- struct fuse_chan *ch) +-{ +- int err; +- ssize_t res; +-#ifdef HAVE_SPLICE +- size_t bufsize = se->bufsize; +- struct fuse_ll_pipe *llp; +- struct fuse_buf tmpbuf; +- +- if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) +- goto fallback; +- +- llp = fuse_ll_get_pipe(se); +- if (llp == NULL) +- goto fallback; +- +- if (llp->size < bufsize) { +- if (llp->can_grow) { +- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); +- if (res == -1) { +- llp->can_grow = 0; +- res = grow_pipe_to_max(llp->pipe[0]); +- if (res > 0) +- llp->size = res; +- goto fallback; +- } +- llp->size = res; +- } +- if (llp->size < bufsize) +- goto fallback; +- } +- +- res = splice(ch ? ch->fd : se->fd, +- NULL, llp->pipe[1], NULL, bufsize, 0); +- err = errno; +- +- if (fuse_session_exited(se)) +- return 0; +- +- if (res == -1) { +- if (err == ENODEV) { +- /* Filesystem was unmounted, or connection was aborted +- via /sys/fs/fuse/connections */ +- fuse_session_exit(se); +- return 0; +- } +- if (err != EINTR && err != EAGAIN) +- perror("fuse: splice from device"); +- return -err; +- } +- +- if (res < sizeof(struct fuse_in_header)) { +- fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); +- return -EIO; +- } +- +- tmpbuf = (struct fuse_buf) { +- .size = res, +- .flags = FUSE_BUF_IS_FD, +- .fd = llp->pipe[0], +- }; +- +- /* +- * Don't bother with zero copy for small requests. +- * fuse_loop_mt() needs to check for FORGET so this more than +- * just an optimization. +- */ +- if (res < sizeof(struct fuse_in_header) + +- sizeof(struct fuse_write_in) + pagesize) { +- struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; +- struct fuse_bufvec dst = { .count = 1 }; +- +- if (!buf->mem) { +- buf->mem = malloc(se->bufsize); +- if (!buf->mem) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: failed to allocate read buffer\n"); +- return -ENOMEM; +- } +- } +- buf->size = se->bufsize; +- buf->flags = 0; +- dst.buf[0] = *buf; +- +- res = fuse_buf_copy(&dst, &src, 0); +- if (res < 0) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", +- strerror(-res)); +- fuse_ll_clear_pipe(se); +- return res; +- } +- if (res < tmpbuf.size) { +- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); +- fuse_ll_clear_pipe(se); +- return -EIO; +- } +- assert(res == tmpbuf.size); +- +- } else { +- /* Don't overwrite buf->mem, as that would cause a leak */ +- buf->fd = tmpbuf.fd; +- buf->flags = tmpbuf.flags; +- } +- buf->size = tmpbuf.size; +- +- return res; +- +-fallback: +-#endif +- if (!buf->mem) { +- buf->mem = malloc(se->bufsize); +- if (!buf->mem) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: failed to allocate read buffer\n"); +- return -ENOMEM; +- } +- } +- +-restart: +- res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); +- err = errno; +- +- if (fuse_session_exited(se)) +- return 0; +- if (res == -1) { +- /* ENOENT means the operation was interrupted, it's safe +- to restart */ +- if (err == ENOENT) +- goto restart; +- +- if (err == ENODEV) { +- /* Filesystem was unmounted, or connection was aborted +- via /sys/fs/fuse/connections */ +- fuse_session_exit(se); +- return 0; +- } +- /* Errors occurring during normal operation: EINTR (read +- interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem +- umounted) */ +- if (err != EINTR && err != EAGAIN) +- perror("fuse: reading device"); +- return -err; +- } +- if ((size_t) res < sizeof(struct fuse_in_header)) { +- fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); +- return -EIO; +- } +- +- buf->size = res; +- +- return res; +-} +- + struct fuse_session *fuse_session_new(struct fuse_args *args, + const struct fuse_lowlevel_ops *op, + size_t op_size, void *userdata) + { +- int err; + struct fuse_session *se; +- struct mount_opts *mo; + + if (sizeof(struct fuse_lowlevel_ops) < op_size) { + fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); +@@ -2913,20 +2286,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + /* Parse options */ + if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) + goto out2; +- if(se->deny_others) { +- /* Allowing access only by root is done by instructing +- * kernel to allow access by everyone, and then restricting +- * access to root and mountpoint owner in libfuse. +- */ +- // We may be adding the option a second time, but +- // that doesn't hurt. +- if(fuse_opt_add_arg(args, "-oallow_other") == -1) +- goto out2; +- } +- mo = parse_mount_opts(args); +- if (mo == NULL) +- goto out3; +- + if(args->argc == 1 && + args->argv[0][0] == '-') { + fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " +@@ -2940,9 +2299,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out4; + } + +- if (se->debug) +- fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); +- + se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + + FUSE_BUFFER_HEADER_SIZE; + +@@ -2952,26 +2308,14 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + se->notify_ctr = 1; + fuse_mutex_init(&se->lock); + +- err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); +- if (err) { +- fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", +- strerror(err)); +- goto out5; +- } +- + memcpy(&se->op, op, op_size); + se->owner = getuid(); + se->userdata = userdata; + +- se->mo = mo; + return se; + +-out5: +- pthread_mutex_destroy(&se->lock); + out4: + fuse_opt_free_args(args); +-out3: +- free(mo); + out2: + free(se); + out1: +@@ -3035,11 +2379,6 @@ int fuse_session_fd(struct fuse_session *se) + + void fuse_session_unmount(struct fuse_session *se) + { +- if (se->mountpoint != NULL) { +- fuse_kern_unmount(se->mountpoint, se->fd); +- free(se->mountpoint); +- se->mountpoint = NULL; +- } + } + + #ifdef linux +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 18c6363..6b1adfc 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -31,10 +31,6 @@ + #include + #include + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /* ----------------------------------------------------------- * + * Miscellaneous definitions * + * ----------------------------------------------------------- */ +@@ -1863,14 +1859,12 @@ void fuse_cmdline_help(void); + * ----------------------------------------------------------- */ + + struct fuse_cmdline_opts { +- int singlethread; + int foreground; + int debug; + int nodefault_subtype; + char *mountpoint; + int show_version; + int show_help; +- int clone_fd; + unsigned int max_idle_threads; + }; + +@@ -1962,24 +1956,6 @@ int fuse_session_mount(struct fuse_session *se, const char *mountpoint); + int fuse_session_loop(struct fuse_session *se); + + /** +- * Enter a multi-threaded event loop. +- * +- * For a description of the return value and the conditions when the +- * event loop exits, refer to the documentation of +- * fuse_session_loop(). +- * +- * @param se the session +- * @param config session loop configuration +- * @return see fuse_session_loop() +- */ +-#if FUSE_USE_VERSION < 32 +-int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); +-#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) +-#else +-int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); +-#endif +- +-/** + * Flag a session as terminated. + * + * This function is invoked by the POSIX signal handlers, when +@@ -2082,8 +2058,4 @@ void fuse_session_process_buf(struct fuse_session *se, + */ + int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_LOWLEVEL_H_ */ +diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h +index d8573e7..6910255 100644 +--- a/tools/virtiofsd/fuse_opt.h ++++ b/tools/virtiofsd/fuse_opt.h +@@ -14,10 +14,6 @@ + * This file defines the option parsing interface of FUSE + */ + +-#ifdef __cplusplus +-extern "C" { +-#endif +- + /** + * Option description + * +@@ -264,8 +260,4 @@ void fuse_opt_free_args(struct fuse_args *args); + */ + int fuse_opt_match(const struct fuse_opt opts[], const char *opt); + +-#ifdef __cplusplus +-} +-#endif +- + #endif /* FUSE_OPT_H_ */ +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 64ff7ad..5a2e64c 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -41,14 +41,10 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), + FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("-f", foreground), +- FUSE_HELPER_OPT("-s", singlethread), + FUSE_HELPER_OPT("fsname=", nodefault_subtype), + FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), +-#ifndef __FreeBSD__ + FUSE_HELPER_OPT("subtype=", nodefault_subtype), + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), +-#endif +- FUSE_HELPER_OPT("clone_fd", clone_fd), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), + FUSE_OPT_END + }; +@@ -132,9 +128,6 @@ void fuse_cmdline_help(void) + " -V --version print version\n" + " -d -o debug enable debug output (implies -f)\n" + " -f foreground operation\n" +- " -s disable multi-threaded operation\n" +- " -o clone_fd use separate fuse device fd for each thread\n" +- " (may improve performance)\n" + " -o max_idle_threads the maximum number of idle worker threads\n" + " allowed (default: 10)\n"); + } +@@ -171,34 +164,6 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, + } + } + +-/* Under FreeBSD, there is no subtype option so this +- function actually sets the fsname */ +-static int add_default_subtype(const char *progname, struct fuse_args *args) +-{ +- int res; +- char *subtype_opt; +- +- const char *basename = strrchr(progname, '/'); +- if (basename == NULL) +- basename = progname; +- else if (basename[1] != '\0') +- basename++; +- +- subtype_opt = (char *) malloc(strlen(basename) + 64); +- if (subtype_opt == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); +- return -1; +- } +-#ifdef __FreeBSD__ +- sprintf(subtype_opt, "-ofsname=%s", basename); +-#else +- sprintf(subtype_opt, "-osubtype=%s", basename); +-#endif +- res = fuse_opt_add_arg(args, subtype_opt); +- free(subtype_opt); +- return res; +-} +- + int fuse_parse_cmdline(struct fuse_args *args, + struct fuse_cmdline_opts *opts) + { +@@ -210,14 +175,6 @@ int fuse_parse_cmdline(struct fuse_args *args, + fuse_helper_opt_proc) == -1) + return -1; + +- /* *Linux*: if neither -o subtype nor -o fsname are specified, +- set subtype to program's basename. +- *FreeBSD*: if fsname is not specified, set to program's +- basename. */ +- if (!opts->nodefault_subtype) +- if (add_default_subtype(args->argv[0], args) == -1) +- return -1; +- + return 0; + } + +@@ -276,88 +233,6 @@ int fuse_daemonize(int foreground) + return 0; + } + +-int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, +- size_t op_size, void *user_data) +-{ +- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +- struct fuse *fuse; +- struct fuse_cmdline_opts opts; +- int res; +- +- if (fuse_parse_cmdline(&args, &opts) != 0) +- return 1; +- +- if (opts.show_version) { +- printf("FUSE library version %s\n", PACKAGE_VERSION); +- fuse_lowlevel_version(); +- res = 0; +- goto out1; +- } +- +- if (opts.show_help) { +- if(args.argv[0][0] != '\0') +- printf("usage: %s [options] \n\n", +- args.argv[0]); +- printf("FUSE options:\n"); +- fuse_cmdline_help(); +- fuse_lib_help(&args); +- res = 0; +- goto out1; +- } +- +- if (!opts.show_help && +- !opts.mountpoint) { +- fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); +- res = 2; +- goto out1; +- } +- +- +- fuse = fuse_new_31(&args, op, op_size, user_data); +- if (fuse == NULL) { +- res = 3; +- goto out1; +- } +- +- if (fuse_mount(fuse,opts.mountpoint) != 0) { +- res = 4; +- goto out2; +- } +- +- if (fuse_daemonize(opts.foreground) != 0) { +- res = 5; +- goto out3; +- } +- +- struct fuse_session *se = fuse_get_session(fuse); +- if (fuse_set_signal_handlers(se) != 0) { +- res = 6; +- goto out3; +- } +- +- if (opts.singlethread) +- res = fuse_loop(fuse); +- else { +- struct fuse_loop_config loop_config; +- loop_config.clone_fd = opts.clone_fd; +- loop_config.max_idle_threads = opts.max_idle_threads; +- res = fuse_loop_mt_32(fuse, &loop_config); +- } +- if (res) +- res = 7; +- +- fuse_remove_signal_handlers(se); +-out3: +- fuse_unmount(fuse); +-out2: +- fuse_destroy(fuse); +-out1: +- free(opts.mountpoint); +- fuse_opt_free_args(&args); +- return res; +-} +- +- + void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, + struct fuse_conn_info *conn) + { +@@ -420,21 +295,3 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) + } + return opts; + } +- +-int fuse_open_channel(const char *mountpoint, const char* options) +-{ +- struct mount_opts *opts = NULL; +- int fd = -1; +- const char *argv[] = { "", "-o", options }; +- int argc = sizeof(argv) / sizeof(argv[0]); +- struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); +- +- opts = parse_mount_opts(&args); +- if (opts == NULL) +- return -1; +- +- fd = fuse_kern_mount(mountpoint, opts); +- destroy_mount_opts(opts); +- +- return fd; +-} +diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h +index 6b77c33..7c5f561 100644 +--- a/tools/virtiofsd/passthrough_helpers.h ++++ b/tools/virtiofsd/passthrough_helpers.h +@@ -42,32 +42,6 @@ static int mknod_wrapper(int dirfd, const char *path, const char *link, + res = symlinkat(link, dirfd, path); + } else if (S_ISFIFO(mode)) { + res = mkfifoat(dirfd, path, mode); +-#ifdef __FreeBSD__ +- } else if (S_ISSOCK(mode)) { +- struct sockaddr_un su; +- int fd; +- +- if (strlen(path) >= sizeof(su.sun_path)) { +- errno = ENAMETOOLONG; +- return -1; +- } +- fd = socket(AF_UNIX, SOCK_STREAM, 0); +- if (fd >= 0) { +- /* +- * We must bind the socket to the underlying file +- * system to create the socket file, even though +- * we'll never listen on this socket. +- */ +- su.sun_family = AF_UNIX; +- strncpy(su.sun_path, path, sizeof(su.sun_path)); +- res = bindat(dirfd, fd, (struct sockaddr*)&su, +- sizeof(su)); +- if (res == 0) +- close(fd); +- } else { +- res = -1; +- } +-#endif + } else { + res = mknodat(dirfd, path, mode, rdev); + } +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e1a6056..e5f7115 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1240,7 +1240,6 @@ int main(int argc, char *argv[]) + ret = 0; + goto err_out1; + } else if (opts.show_version) { +- printf("FUSE library version %s\n", fuse_pkgversion()); + fuse_lowlevel_version(); + ret = 0; + goto err_out1; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Trim-out-compatibility-code.patch b/kvm-virtiofsd-Trim-out-compatibility-code.patch new file mode 100644 index 0000000..411af77 --- /dev/null +++ b/kvm-virtiofsd-Trim-out-compatibility-code.patch @@ -0,0 +1,545 @@ +From ff16b837e402de773581f77ca188f8806c0b500f Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:51 +0100 +Subject: [PATCH 020/116] virtiofsd: Trim out compatibility code +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-17-dgilbert@redhat.com> +Patchwork-id: 93468 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 016/112] virtiofsd: Trim out compatibility code +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +virtiofsd only supports major=7, minor>=31; trim out a lot of +old compatibility code. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 72c42e2d65510e073cf78fdc924d121c77fa0080) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 330 +++++++++++++++------------------------- + 1 file changed, 119 insertions(+), 211 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 07fb8a6..514d79c 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -387,16 +387,7 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) + int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) + { + struct fuse_entry_out arg; +- size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : +- sizeof(arg); +- +- /* +- * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant +- * negative entry +- */ +- if (!e->ino && req->se->conn.proto_minor < 4) { +- return fuse_reply_err(req, ENOENT); +- } ++ size_t size = sizeof(arg); + + memset(&arg, 0, sizeof(arg)); + fill_entry(&arg, e); +@@ -407,9 +398,7 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, + const struct fuse_file_info *f) + { + char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; +- size_t entrysize = req->se->conn.proto_minor < 9 ? +- FUSE_COMPAT_ENTRY_OUT_SIZE : +- sizeof(struct fuse_entry_out); ++ size_t entrysize = sizeof(struct fuse_entry_out); + struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; + struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); + +@@ -423,8 +412,7 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr, + double attr_timeout) + { + struct fuse_attr_out arg; +- size_t size = +- req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); ++ size_t size = sizeof(arg); + + memset(&arg, 0, sizeof(arg)); + arg.attr_valid = calc_timeout_sec(attr_timeout); +@@ -519,8 +507,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) + int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) + { + struct fuse_statfs_out arg; +- size_t size = +- req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); ++ size_t size = sizeof(arg); + + memset(&arg, 0, sizeof(arg)); + convert_statfs(stbuf, &arg.st); +@@ -604,45 +591,31 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, + iov[count].iov_len = sizeof(arg); + count++; + +- if (req->se->conn.proto_minor < 16) { +- if (in_count) { +- iov[count].iov_base = (void *)in_iov; +- iov[count].iov_len = sizeof(in_iov[0]) * in_count; +- count++; +- } ++ /* Can't handle non-compat 64bit ioctls on 32bit */ ++ if (sizeof(void *) == 4 && req->ioctl_64bit) { ++ res = fuse_reply_err(req, EINVAL); ++ goto out; ++ } + +- if (out_count) { +- iov[count].iov_base = (void *)out_iov; +- iov[count].iov_len = sizeof(out_iov[0]) * out_count; +- count++; ++ if (in_count) { ++ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); ++ if (!in_fiov) { ++ goto enomem; + } +- } else { +- /* Can't handle non-compat 64bit ioctls on 32bit */ +- if (sizeof(void *) == 4 && req->ioctl_64bit) { +- res = fuse_reply_err(req, EINVAL); +- goto out; +- } +- +- if (in_count) { +- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); +- if (!in_fiov) { +- goto enomem; +- } + +- iov[count].iov_base = (void *)in_fiov; +- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; +- count++; ++ iov[count].iov_base = (void *)in_fiov; ++ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; ++ count++; ++ } ++ if (out_count) { ++ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); ++ if (!out_fiov) { ++ goto enomem; + } +- if (out_count) { +- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); +- if (!out_fiov) { +- goto enomem; +- } + +- iov[count].iov_base = (void *)out_fiov; +- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; +- count++; +- } ++ iov[count].iov_base = (void *)out_fiov; ++ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; ++ count++; + } + + res = send_reply_iov(req, 0, iov, count); +@@ -784,14 +757,12 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + struct fuse_file_info *fip = NULL; + struct fuse_file_info fi; + +- if (req->se->conn.proto_minor >= 9) { +- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; ++ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; + +- if (arg->getattr_flags & FUSE_GETATTR_FH) { +- memset(&fi, 0, sizeof(fi)); +- fi.fh = arg->fh; +- fip = &fi; +- } ++ if (arg->getattr_flags & FUSE_GETATTR_FH) { ++ memset(&fi, 0, sizeof(fi)); ++ fi.fh = arg->fh; ++ fip = &fi; + } + + if (req->se->op.getattr) { +@@ -856,11 +827,7 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; + char *name = PARAM(arg); + +- if (req->se->conn.proto_minor >= 12) { +- req->ctx.umask = arg->umask; +- } else { +- name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; +- } ++ req->ctx.umask = arg->umask; + + if (req->se->op.mknod) { + req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); +@@ -873,9 +840,7 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + { + struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; + +- if (req->se->conn.proto_minor >= 12) { +- req->ctx.umask = arg->umask; +- } ++ req->ctx.umask = arg->umask; + + if (req->se->op.mkdir) { + req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); +@@ -967,11 +932,7 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + +- if (req->se->conn.proto_minor >= 12) { +- req->ctx.umask = arg->umask; +- } else { +- name = (char *)inarg + sizeof(struct fuse_open_in); +- } ++ req->ctx.umask = arg->umask; + + req->se->op.create(req, nodeid, name, arg->mode, &fi); + } else { +@@ -1003,10 +964,8 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 9) { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- } ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; + req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); + } else { + fuse_reply_err(req, ENOSYS); +@@ -1023,13 +982,9 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + fi.fh = arg->fh; + fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; + +- if (req->se->conn.proto_minor < 9) { +- param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- param = PARAM(arg); +- } ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ param = PARAM(arg); + + if (req->se->op.write) { + req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); +@@ -1053,21 +1008,14 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + fi.fh = arg->fh; + fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + +- if (se->conn.proto_minor < 9) { +- bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; +- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); +- } else { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- bufv.buf[0].mem = PARAM(arg); +- } +- +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; ++ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { ++ bufv.buf[0].mem = PARAM(arg); + } ++ ++ bufv.buf[0].size -= ++ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); + if (bufv.buf[0].size < arg->size) { + fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); + fuse_reply_err(req, EIO); +@@ -1086,9 +1034,7 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.flush = 1; +- if (req->se->conn.proto_minor >= 7) { +- fi.lock_owner = arg->lock_owner; +- } ++ fi.lock_owner = arg->lock_owner; + + if (req->se->op.flush) { + req->se->op.flush(req, nodeid, &fi); +@@ -1105,10 +1051,8 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + fi.fh = arg->fh; +- if (req->se->conn.proto_minor >= 8) { +- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; +- fi.lock_owner = arg->lock_owner; +- } ++ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; ++ fi.lock_owner = arg->lock_owner; + if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { + fi.flock_release = 1; + fi.lock_owner = arg->lock_owner; +@@ -1477,8 +1421,7 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && +- !(flags & FUSE_IOCTL_32BIT)) { ++ if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) { + req->ioctl_64bit = 1; + } + +@@ -1603,7 +1546,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + outarg.major = FUSE_KERNEL_VERSION; + outarg.minor = FUSE_KERNEL_MINOR_VERSION; + +- if (arg->major < 7) { ++ if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) { + fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", + arg->major, arg->minor); + fuse_reply_err(req, EPROTO); +@@ -1616,81 +1559,71 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + return; + } + +- if (arg->minor >= 6) { +- if (arg->max_readahead < se->conn.max_readahead) { +- se->conn.max_readahead = arg->max_readahead; +- } +- if (arg->flags & FUSE_ASYNC_READ) { +- se->conn.capable |= FUSE_CAP_ASYNC_READ; +- } +- if (arg->flags & FUSE_POSIX_LOCKS) { +- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; +- } +- if (arg->flags & FUSE_ATOMIC_O_TRUNC) { +- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; +- } +- if (arg->flags & FUSE_EXPORT_SUPPORT) { +- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; +- } +- if (arg->flags & FUSE_DONT_MASK) { +- se->conn.capable |= FUSE_CAP_DONT_MASK; +- } +- if (arg->flags & FUSE_FLOCK_LOCKS) { +- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; +- } +- if (arg->flags & FUSE_AUTO_INVAL_DATA) { +- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; +- } +- if (arg->flags & FUSE_DO_READDIRPLUS) { +- se->conn.capable |= FUSE_CAP_READDIRPLUS; +- } +- if (arg->flags & FUSE_READDIRPLUS_AUTO) { +- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; +- } +- if (arg->flags & FUSE_ASYNC_DIO) { +- se->conn.capable |= FUSE_CAP_ASYNC_DIO; +- } +- if (arg->flags & FUSE_WRITEBACK_CACHE) { +- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; +- } +- if (arg->flags & FUSE_NO_OPEN_SUPPORT) { +- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; +- } +- if (arg->flags & FUSE_PARALLEL_DIROPS) { +- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; +- } +- if (arg->flags & FUSE_POSIX_ACL) { +- se->conn.capable |= FUSE_CAP_POSIX_ACL; +- } +- if (arg->flags & FUSE_HANDLE_KILLPRIV) { +- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; +- } +- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { +- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; +- } +- if (!(arg->flags & FUSE_MAX_PAGES)) { +- size_t max_bufsize = +- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + +- FUSE_BUFFER_HEADER_SIZE; +- if (bufsize > max_bufsize) { +- bufsize = max_bufsize; +- } ++ if (arg->max_readahead < se->conn.max_readahead) { ++ se->conn.max_readahead = arg->max_readahead; ++ } ++ if (arg->flags & FUSE_ASYNC_READ) { ++ se->conn.capable |= FUSE_CAP_ASYNC_READ; ++ } ++ if (arg->flags & FUSE_POSIX_LOCKS) { ++ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; ++ } ++ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { ++ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; ++ } ++ if (arg->flags & FUSE_EXPORT_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; ++ } ++ if (arg->flags & FUSE_DONT_MASK) { ++ se->conn.capable |= FUSE_CAP_DONT_MASK; ++ } ++ if (arg->flags & FUSE_FLOCK_LOCKS) { ++ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; ++ } ++ if (arg->flags & FUSE_AUTO_INVAL_DATA) { ++ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; ++ } ++ if (arg->flags & FUSE_DO_READDIRPLUS) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS; ++ } ++ if (arg->flags & FUSE_READDIRPLUS_AUTO) { ++ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; ++ } ++ if (arg->flags & FUSE_ASYNC_DIO) { ++ se->conn.capable |= FUSE_CAP_ASYNC_DIO; ++ } ++ if (arg->flags & FUSE_WRITEBACK_CACHE) { ++ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; ++ } ++ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; ++ } ++ if (arg->flags & FUSE_PARALLEL_DIROPS) { ++ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; ++ } ++ if (arg->flags & FUSE_POSIX_ACL) { ++ se->conn.capable |= FUSE_CAP_POSIX_ACL; ++ } ++ if (arg->flags & FUSE_HANDLE_KILLPRIV) { ++ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; ++ } ++ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { ++ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; ++ } ++ if (!(arg->flags & FUSE_MAX_PAGES)) { ++ size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + ++ FUSE_BUFFER_HEADER_SIZE; ++ if (bufsize > max_bufsize) { ++ bufsize = max_bufsize; + } +- } else { +- se->conn.max_readahead = 0; + } +- +- if (se->conn.proto_minor >= 14) { + #ifdef HAVE_SPLICE + #ifdef HAVE_VMSPLICE +- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; ++ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; + #endif +- se->conn.capable |= FUSE_CAP_SPLICE_READ; ++ se->conn.capable |= FUSE_CAP_SPLICE_READ; + #endif +- } +- if (se->conn.proto_minor >= 18) { +- se->conn.capable |= FUSE_CAP_IOCTL_DIR; +- } ++ se->conn.capable |= FUSE_CAP_IOCTL_DIR; + + /* + * Default settings for modern filesystems. +@@ -1797,24 +1730,20 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + outarg.max_readahead = se->conn.max_readahead; + outarg.max_write = se->conn.max_write; +- if (se->conn.proto_minor >= 13) { +- if (se->conn.max_background >= (1 << 16)) { +- se->conn.max_background = (1 << 16) - 1; +- } +- if (se->conn.congestion_threshold > se->conn.max_background) { +- se->conn.congestion_threshold = se->conn.max_background; +- } +- if (!se->conn.congestion_threshold) { +- se->conn.congestion_threshold = se->conn.max_background * 3 / 4; +- } +- +- outarg.max_background = se->conn.max_background; +- outarg.congestion_threshold = se->conn.congestion_threshold; ++ if (se->conn.max_background >= (1 << 16)) { ++ se->conn.max_background = (1 << 16) - 1; ++ } ++ if (se->conn.congestion_threshold > se->conn.max_background) { ++ se->conn.congestion_threshold = se->conn.max_background; + } +- if (se->conn.proto_minor >= 23) { +- outarg.time_gran = se->conn.time_gran; ++ if (!se->conn.congestion_threshold) { ++ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; + } + ++ outarg.max_background = se->conn.max_background; ++ outarg.congestion_threshold = se->conn.congestion_threshold; ++ outarg.time_gran = se->conn.time_gran; ++ + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, + outarg.minor); +@@ -1828,11 +1757,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + outarg.congestion_threshold); + fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); + } +- if (arg->minor < 5) { +- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; +- } else if (arg->minor < 23) { +- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; +- } + + send_reply_ok(req, &outarg, outargsize); + } +@@ -1896,10 +1820,6 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { +- return -ENOSYS; +- } +- + outarg.ino = ino; + outarg.off = off; + outarg.len = len; +@@ -1920,10 +1840,6 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { +- return -ENOSYS; +- } +- + outarg.parent = parent; + outarg.namelen = namelen; + outarg.padding = 0; +@@ -1947,10 +1863,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { +- return -ENOSYS; +- } +- + outarg.parent = parent; + outarg.child = child; + outarg.namelen = namelen; +@@ -1977,10 +1889,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + return -EINVAL; + } + +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { +- return -ENOSYS; +- } +- + out.unique = 0; + out.error = FUSE_NOTIFY_STORE; + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch b/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch new file mode 100644 index 0000000..a0882d5 --- /dev/null +++ b/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch @@ -0,0 +1,93 @@ +From e4c8fd1060fb69a093064851ebf66dd82533ec0e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:17 +0100 +Subject: [PATCH 106/116] virtiofsd: add definition of fuse_buf_writev() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-103-dgilbert@redhat.com> +Patchwork-id: 93557 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 102/112] virtiofsd: add definition of fuse_buf_writev() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: piaojun + +Define fuse_buf_writev() which use pwritev and writev to improve io +bandwidth. Especially, the src bufs with 0 size should be skipped as +their mems are not *block_size* aligned which will cause writev failed +in direct io mode. + +Signed-off-by: Jun Piao +Suggested-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9ceaaa15cf21073c2b23058c374f61c30cd39c31) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 38 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 38 insertions(+) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 42a608f..37befeb 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -14,6 +14,7 @@ + #include "fuse_lowlevel.h" + #include + #include ++#include + #include + #include + +@@ -33,6 +34,43 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) + return size; + } + ++__attribute__((unused)) ++static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, ++ struct fuse_bufvec *in_buf) ++{ ++ ssize_t res, i, j; ++ size_t iovcnt = in_buf->count; ++ struct iovec *iov; ++ int fd = out_buf->fd; ++ ++ iov = calloc(iovcnt, sizeof(struct iovec)); ++ if (!iov) { ++ return -ENOMEM; ++ } ++ ++ for (i = 0, j = 0; i < iovcnt; i++) { ++ /* Skip the buf with 0 size */ ++ if (in_buf->buf[i].size) { ++ iov[j].iov_base = in_buf->buf[i].mem; ++ iov[j].iov_len = in_buf->buf[i].size; ++ j++; ++ } ++ } ++ ++ if (out_buf->flags & FUSE_BUF_FD_SEEK) { ++ res = pwritev(fd, iov, iovcnt, out_buf->pos); ++ } else { ++ res = writev(fd, iov, iovcnt); ++ } ++ ++ if (res == -1) { ++ res = -errno; ++ } ++ ++ free(iov); ++ return res; ++} ++ + static size_t min_size(size_t s1, size_t s2) + { + return s1 < s2 ? s1 : s2; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch b/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch new file mode 100644 index 0000000..451f12b --- /dev/null +++ b/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch @@ -0,0 +1,170 @@ +From f91a9bdc171142174110e9ff1716b611f6fb0039 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:07 +0100 +Subject: [PATCH 036/116] virtiofsd: add --fd=FDNUM fd passing option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-33-dgilbert@redhat.com> +Patchwork-id: 93487 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 032/112] virtiofsd: add --fd=FDNUM fd passing option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Although --socket-path=PATH is useful for manual invocations, management +tools typically create the UNIX domain socket themselves and pass it to +the vhost-user device backend. This way QEMU can be launched +immediately with a valid socket. No waiting for the vhost-user device +backend is required when fd passing is used. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cee8e35d4386e34bf79c3ca2aab7f7b1bb48cf8d) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 16 ++++++++++++---- + tools/virtiofsd/fuse_virtio.c | 31 +++++++++++++++++++++++++------ + 3 files changed, 38 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 1126723..45995f3 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -68,6 +68,7 @@ struct fuse_session { + size_t bufsize; + int error; + char *vu_socket_path; ++ int vu_listen_fd; + int vu_socketfd; + struct fv_VuDev *virtio_dev; + }; +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 4f4684d..95f4db8 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2130,6 +2130,7 @@ static const struct fuse_opt fuse_ll_opts[] = { + LL_OPTION("--debug", debug, 1), + LL_OPTION("allow_root", deny_others, 1), + LL_OPTION("--socket-path=%s", vu_socket_path, 0), ++ LL_OPTION("--fd=%d", vu_listen_fd, 0), + FUSE_OPT_END + }; + +@@ -2147,7 +2148,8 @@ void fuse_lowlevel_help(void) + */ + printf( + " -o allow_root allow access by root\n" +- " --socket-path=PATH path for the vhost-user socket\n"); ++ " --socket-path=PATH path for the vhost-user socket\n" ++ " --fd=FDNUM fd number of vhost-user socket\n"); + } + + void fuse_session_destroy(struct fuse_session *se) +@@ -2191,6 +2193,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out1; + } + se->fd = -1; ++ se->vu_listen_fd = -1; + se->conn.max_write = UINT_MAX; + se->conn.max_readahead = UINT_MAX; + +@@ -2212,8 +2215,13 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + goto out4; + } + +- if (!se->vu_socket_path) { +- fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); ++ if (!se->vu_socket_path && se->vu_listen_fd < 0) { ++ fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n"); ++ goto out4; ++ } ++ if (se->vu_socket_path && se->vu_listen_fd >= 0) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: --socket-path and --fd cannot be given together\n"); + goto out4; + } + +@@ -2253,7 +2261,7 @@ void fuse_session_unmount(struct fuse_session *se) + + int fuse_lowlevel_is_virtio(struct fuse_session *se) + { +- return se->vu_socket_path != NULL; ++ return !!se->virtio_dev; + } + + #ifdef linux +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 7e2711b..635f877 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -638,18 +638,21 @@ int virtio_loop(struct fuse_session *se) + return 0; + } + +-int virtio_session_mount(struct fuse_session *se) ++static int fv_create_listen_socket(struct fuse_session *se) + { + struct sockaddr_un un; + mode_t old_umask; + ++ /* Nothing to do if fd is already initialized */ ++ if (se->vu_listen_fd >= 0) { ++ return 0; ++ } ++ + if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { + fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); + return -1; + } + +- se->fd = -1; +- + /* + * Create the Unix socket to communicate with qemu + * based on QEMU's vhost-user-bridge +@@ -682,15 +685,31 @@ int virtio_session_mount(struct fuse_session *se) + return -1; + } + ++ se->vu_listen_fd = listen_sock; ++ return 0; ++} ++ ++int virtio_session_mount(struct fuse_session *se) ++{ ++ int ret; ++ ++ ret = fv_create_listen_socket(se); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ se->fd = -1; ++ + fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", + __func__); +- int data_sock = accept(listen_sock, NULL, NULL); ++ int data_sock = accept(se->vu_listen_fd, NULL, NULL); + if (data_sock == -1) { + fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); +- close(listen_sock); ++ close(se->vu_listen_fd); + return -1; + } +- close(listen_sock); ++ close(se->vu_listen_fd); ++ se->vu_listen_fd = -1; + fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", + __func__); + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch b/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch new file mode 100644 index 0000000..b874dc9 --- /dev/null +++ b/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch @@ -0,0 +1,134 @@ +From 1b0edd3d0a2ee5c097bcf3501c1dfa937f02e473 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:21 +0100 +Subject: [PATCH 050/116] virtiofsd: add fuse_mbuf_iter API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-47-dgilbert@redhat.com> +Patchwork-id: 93502 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 046/112] virtiofsd: add fuse_mbuf_iter API +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Introduce an API for consuming bytes from a buffer with size checks. +All FUSE operations will be converted to use this safe API instead of +void *inarg. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit dad157e880416ab3a0e45beaa0e81977516568bc) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 28 +++++++++++++++++++++++++ + tools/virtiofsd/fuse_common.h | 49 ++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 76 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 772efa9..42a608f 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -267,3 +267,31 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + + return copied; + } ++ ++void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len) ++{ ++ void *ptr; ++ ++ if (len > iter->size - iter->pos) { ++ return NULL; ++ } ++ ++ ptr = iter->mem + iter->pos; ++ iter->pos += len; ++ return ptr; ++} ++ ++const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter) ++{ ++ const char *str = iter->mem + iter->pos; ++ size_t remaining = iter->size - iter->pos; ++ size_t i; ++ ++ for (i = 0; i < remaining; i++) { ++ if (str[i] == '\0') { ++ iter->pos += i + 1; ++ return str; ++ } ++ } ++ return NULL; ++} +diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h +index 0cb33ac..f8f6433 100644 +--- a/tools/virtiofsd/fuse_common.h ++++ b/tools/virtiofsd/fuse_common.h +@@ -703,10 +703,57 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); + */ + ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); + ++/** ++ * Memory buffer iterator ++ * ++ */ ++struct fuse_mbuf_iter { ++ /** ++ * Data pointer ++ */ ++ void *mem; ++ ++ /** ++ * Total length, in bytes ++ */ ++ size_t size; ++ ++ /** ++ * Offset from start of buffer ++ */ ++ size_t pos; ++}; ++ ++/* Initialize memory buffer iterator from a fuse_buf */ ++#define FUSE_MBUF_ITER_INIT(fbuf) \ ++ ((struct fuse_mbuf_iter){ \ ++ .mem = fbuf->mem, \ ++ .size = fbuf->size, \ ++ .pos = 0, \ ++ }) ++ ++/** ++ * Consume bytes from a memory buffer iterator ++ * ++ * @param iter memory buffer iterator ++ * @param len number of bytes to consume ++ * @return pointer to start of consumed bytes or ++ * NULL if advancing beyond end of buffer ++ */ ++void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len); ++ ++/** ++ * Consume a NUL-terminated string from a memory buffer iterator ++ * ++ * @param iter memory buffer iterator ++ * @return pointer to the string or ++ * NULL if advancing beyond end of buffer or there is no NUL-terminator ++ */ ++const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter); ++ + /* + * Signal handling + */ +- + /** + * Exit session on HUP, TERM and INT signals and ignore PIPE signal + * +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch b/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch new file mode 100644 index 0000000..bdef115 --- /dev/null +++ b/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch @@ -0,0 +1,88 @@ +From 7a3c94e10b087c06635ef72aadb1550184dd5c58 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:58 +0100 +Subject: [PATCH 087/116] virtiofsd: add helper for lo_data cleanup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-84-dgilbert@redhat.com> +Patchwork-id: 93538 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 083/112] virtiofsd: add helper for lo_data cleanup +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +This offers an helper function for lo_data's cleanup. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 18a69cbbb6a4caa7c2040c6db4a33b044a32be7e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++---------------- + 1 file changed, 21 insertions(+), 16 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 056ebe8..e8dc5c7 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2407,6 +2407,26 @@ static gboolean lo_key_equal(gconstpointer a, gconstpointer b) + return la->ino == lb->ino && la->dev == lb->dev; + } + ++static void fuse_lo_data_cleanup(struct lo_data *lo) ++{ ++ if (lo->inodes) { ++ g_hash_table_destroy(lo->inodes); ++ } ++ lo_map_destroy(&lo->fd_map); ++ lo_map_destroy(&lo->dirp_map); ++ lo_map_destroy(&lo->ino_map); ++ ++ if (lo->proc_self_fd >= 0) { ++ close(lo->proc_self_fd); ++ } ++ ++ if (lo->root.fd >= 0) { ++ close(lo->root.fd); ++ } ++ ++ free(lo->source); ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2554,22 +2574,7 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + +- if (lo.inodes) { +- g_hash_table_destroy(lo.inodes); +- } +- lo_map_destroy(&lo.fd_map); +- lo_map_destroy(&lo.dirp_map); +- lo_map_destroy(&lo.ino_map); +- +- if (lo.proc_self_fd >= 0) { +- close(lo.proc_self_fd); +- } +- +- if (lo.root.fd >= 0) { +- close(lo.root.fd); +- } +- +- free(lo.source); ++ fuse_lo_data_cleanup(&lo); + + return ret ? 1 : 0; + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch b/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch new file mode 100644 index 0000000..5e81663 --- /dev/null +++ b/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch @@ -0,0 +1,46 @@ +From c55995c25f60168e3cb6b5bae1bf9a47813383d0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:55 +0100 +Subject: [PATCH 024/116] virtiofsd: add -o source=PATH to help output +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-21-dgilbert@redhat.com> +Patchwork-id: 93474 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 020/112] virtiofsd: add -o source=PATH to help output +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +The -o source=PATH option will be used by most command-line invocations. +Let's document it! + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4ff075f72be2f489c8998ae492ec5cdbbbd73e07) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 26ac870..fc9b264 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1319,6 +1319,7 @@ int main(int argc, char *argv[]) + if (opts.show_help) { + printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); ++ printf(" -o source=PATH shared directory tree\n"); + fuse_lowlevel_help(); + ret = 0; + goto err_out1; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-print-capabilities-option.patch b/kvm-virtiofsd-add-print-capabilities-option.patch new file mode 100644 index 0000000..b57e408 --- /dev/null +++ b/kvm-virtiofsd-add-print-capabilities-option.patch @@ -0,0 +1,121 @@ +From 23d81ee7564084f29e32fedaed5196ae1a5a3240 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:10 +0100 +Subject: [PATCH 039/116] virtiofsd: add --print-capabilities option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-36-dgilbert@redhat.com> +Patchwork-id: 93486 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 035/112] virtiofsd: add --print-capabilities option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Add the --print-capabilities option as per vhost-user.rst "Backend +programs conventions". Currently there are no advertised features. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 45018fbb0a73ce66fd3dd87ecd2872b45658add4) +Signed-off-by: Miroslav Rezanina +--- + docs/interop/vhost-user.json | 4 +++- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 2 ++ + tools/virtiofsd/passthrough_ll.c | 12 ++++++++++++ + 4 files changed, 18 insertions(+), 1 deletion(-) + +diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json +index da6aaf5..d4ea1f7 100644 +--- a/docs/interop/vhost-user.json ++++ b/docs/interop/vhost-user.json +@@ -31,6 +31,7 @@ + # @rproc-serial: virtio remoteproc serial link + # @scsi: virtio scsi + # @vsock: virtio vsock transport ++# @fs: virtio fs (since 4.2) + # + # Since: 4.0 + ## +@@ -50,7 +51,8 @@ + 'rpmsg', + 'rproc-serial', + 'scsi', +- 'vsock' ++ 'vsock', ++ 'fs' + ] + } + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index f6b3470..0d61df8 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1794,6 +1794,7 @@ struct fuse_cmdline_opts { + int nodefault_subtype; + int show_version; + int show_help; ++ int print_capabilities; + unsigned int max_idle_threads; + }; + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index a3645fc..b8ec5ac 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -40,6 +40,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("--help", show_help), + FUSE_HELPER_OPT("-V", show_version), + FUSE_HELPER_OPT("--version", show_version), ++ FUSE_HELPER_OPT("--print-capabilities", print_capabilities), + FUSE_HELPER_OPT("-d", debug), + FUSE_HELPER_OPT("debug", debug), + FUSE_HELPER_OPT("-d", foreground), +@@ -135,6 +136,7 @@ void fuse_cmdline_help(void) + { + printf(" -h --help print help\n" + " -V --version print version\n" ++ " --print-capabilities print vhost-user.json\n" + " -d -o debug enable debug output (implies -f)\n" + " -f foreground operation\n" + " --daemonize run in background\n" +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 037c5d7..cd27c09 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1298,6 +1298,14 @@ static struct fuse_lowlevel_ops lo_oper = { + .lseek = lo_lseek, + }; + ++/* Print vhost-user.json backend program capabilities */ ++static void print_capabilities(void) ++{ ++ printf("{\n"); ++ printf(" \"type\": \"fs\"\n"); ++ printf("}\n"); ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -1328,6 +1336,10 @@ int main(int argc, char *argv[]) + fuse_lowlevel_version(); + ret = 0; + goto err_out1; ++ } else if (opts.print_capabilities) { ++ print_capabilities(); ++ ret = 0; ++ goto err_out1; + } + + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-seccomp-whitelist.patch b/kvm-virtiofsd-add-seccomp-whitelist.patch new file mode 100644 index 0000000..b34108e --- /dev/null +++ b/kvm-virtiofsd-add-seccomp-whitelist.patch @@ -0,0 +1,285 @@ +From 58c4e9473b364fb62aac797b0d69fd8ddb02c8c7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:30 +0100 +Subject: [PATCH 059/116] virtiofsd: add seccomp whitelist +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-56-dgilbert@redhat.com> +Patchwork-id: 93511 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 055/112] virtiofsd: add seccomp whitelist +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Only allow system calls that are needed by virtiofsd. All other system +calls cause SIGSYS to be directed at the thread and the process will +coredump. + +Restricting system calls reduces the kernel attack surface and limits +what the process can do when compromised. + +Signed-off-by: Stefan Hajnoczi +with additional entries by: +Signed-off-by: Ganesh Maharaj Mahalingam +Signed-off-by: Masayoshi Mizuma +Signed-off-by: Misono Tomohiro +Signed-off-by: piaojun +Signed-off-by: Vivek Goyal +Signed-off-by: Eric Ren +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4f8bde99c175ffd86b5125098a4707d43f5e80c6) + +Signed-off-by: Miroslav Rezanina +--- + Makefile | 5 +- + tools/virtiofsd/Makefile.objs | 5 +- + tools/virtiofsd/passthrough_ll.c | 2 + + tools/virtiofsd/seccomp.c | 151 +++++++++++++++++++++++++++++++++++++++ + tools/virtiofsd/seccomp.h | 14 ++++ + 5 files changed, 174 insertions(+), 3 deletions(-) + create mode 100644 tools/virtiofsd/seccomp.c + create mode 100644 tools/virtiofsd/seccomp.h + +diff --git a/Makefile b/Makefile +index 0e9755d..6879a06 100644 +--- a/Makefile ++++ b/Makefile +@@ -330,7 +330,7 @@ endif + endif + endif + +-ifdef CONFIG_LINUX ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) + HELPERS-y += virtiofsd$(EXESUF) + vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json + endif +@@ -681,7 +681,8 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" + rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) + +-ifdef CONFIG_LINUX # relies on Linux-specific syscalls ++# relies on Linux-specific syscalls ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) + virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) + $(call LINK, $^) + endif +diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs +index 45a8075..076f667 100644 +--- a/tools/virtiofsd/Makefile.objs ++++ b/tools/virtiofsd/Makefile.objs +@@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \ + fuse_signals.o \ + fuse_virtio.o \ + helper.o \ +- passthrough_ll.o ++ passthrough_ll.o \ ++ seccomp.o + ++seccomp.o-cflags := $(SECCOMP_CFLAGS) ++seccomp.o-libs := $(SECCOMP_LIBS) +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0947d14..bd8925b 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -59,6 +59,7 @@ + #include + + #include "passthrough_helpers.h" ++#include "seccomp.h" + + struct lo_map_elem { + union { +@@ -2091,6 +2092,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) + { + setup_namespaces(lo, se); + setup_mounts(lo->source); ++ setup_seccomp(); + } + + int main(int argc, char *argv[]) +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +new file mode 100644 +index 0000000..691fb63 +--- /dev/null ++++ b/tools/virtiofsd/seccomp.c +@@ -0,0 +1,151 @@ ++/* ++ * Seccomp sandboxing for virtiofsd ++ * ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "seccomp.h" ++#include "fuse_i.h" ++#include "fuse_log.h" ++#include ++#include ++#include ++#include ++ ++/* Bodge for libseccomp 2.4.2 which broke ppoll */ ++#if !defined(__SNR_ppoll) && defined(__SNR_brk) ++#ifdef __NR_ppoll ++#define __SNR_ppoll __NR_ppoll ++#else ++#define __SNR_ppoll __PNR_ppoll ++#endif ++#endif ++ ++static const int syscall_whitelist[] = { ++ /* TODO ireg sem*() syscalls */ ++ SCMP_SYS(brk), ++ SCMP_SYS(capget), /* For CAP_FSETID */ ++ SCMP_SYS(capset), ++ SCMP_SYS(clock_gettime), ++ SCMP_SYS(clone), ++#ifdef __NR_clone3 ++ SCMP_SYS(clone3), ++#endif ++ SCMP_SYS(close), ++ SCMP_SYS(copy_file_range), ++ SCMP_SYS(dup), ++ SCMP_SYS(eventfd2), ++ SCMP_SYS(exit), ++ SCMP_SYS(exit_group), ++ SCMP_SYS(fallocate), ++ SCMP_SYS(fchmodat), ++ SCMP_SYS(fchownat), ++ SCMP_SYS(fcntl), ++ SCMP_SYS(fdatasync), ++ SCMP_SYS(fgetxattr), ++ SCMP_SYS(flistxattr), ++ SCMP_SYS(flock), ++ SCMP_SYS(fremovexattr), ++ SCMP_SYS(fsetxattr), ++ SCMP_SYS(fstat), ++ SCMP_SYS(fstatfs), ++ SCMP_SYS(fsync), ++ SCMP_SYS(ftruncate), ++ SCMP_SYS(futex), ++ SCMP_SYS(getdents), ++ SCMP_SYS(getdents64), ++ SCMP_SYS(getegid), ++ SCMP_SYS(geteuid), ++ SCMP_SYS(getpid), ++ SCMP_SYS(gettid), ++ SCMP_SYS(gettimeofday), ++ SCMP_SYS(linkat), ++ SCMP_SYS(lseek), ++ SCMP_SYS(madvise), ++ SCMP_SYS(mkdirat), ++ SCMP_SYS(mknodat), ++ SCMP_SYS(mmap), ++ SCMP_SYS(mprotect), ++ SCMP_SYS(mremap), ++ SCMP_SYS(munmap), ++ SCMP_SYS(newfstatat), ++ SCMP_SYS(open), ++ SCMP_SYS(openat), ++ SCMP_SYS(ppoll), ++ SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ ++ SCMP_SYS(preadv), ++ SCMP_SYS(pread64), ++ SCMP_SYS(pwritev), ++ SCMP_SYS(pwrite64), ++ SCMP_SYS(read), ++ SCMP_SYS(readlinkat), ++ SCMP_SYS(recvmsg), ++ SCMP_SYS(renameat), ++ SCMP_SYS(renameat2), ++ SCMP_SYS(rt_sigaction), ++ SCMP_SYS(rt_sigprocmask), ++ SCMP_SYS(rt_sigreturn), ++ SCMP_SYS(sendmsg), ++ SCMP_SYS(setresgid), ++ SCMP_SYS(setresuid), ++#ifdef __NR_setresgid32 ++ SCMP_SYS(setresgid32), ++#endif ++#ifdef __NR_setresuid32 ++ SCMP_SYS(setresuid32), ++#endif ++ SCMP_SYS(set_robust_list), ++ SCMP_SYS(symlinkat), ++ SCMP_SYS(time), /* Rarely needed, except on static builds */ ++ SCMP_SYS(tgkill), ++ SCMP_SYS(unlinkat), ++ SCMP_SYS(utimensat), ++ SCMP_SYS(write), ++ SCMP_SYS(writev), ++}; ++ ++void setup_seccomp(void) ++{ ++ scmp_filter_ctx ctx; ++ size_t i; ++ ++#ifdef SCMP_ACT_KILL_PROCESS ++ ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); ++ /* Handle a newer libseccomp but an older kernel */ ++ if (!ctx && errno == EOPNOTSUPP) { ++ ctx = seccomp_init(SCMP_ACT_TRAP); ++ } ++#else ++ ctx = seccomp_init(SCMP_ACT_TRAP); ++#endif ++ if (!ctx) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n"); ++ exit(1); ++ } ++ ++ for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { ++ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, ++ syscall_whitelist[i], 0) != 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", ++ syscall_whitelist[i]); ++ exit(1); ++ } ++ } ++ ++ /* libvhost-user calls this for post-copy migration, we don't need it */ ++ if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), ++ SCMP_SYS(userfaultfd), 0) != 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n"); ++ exit(1); ++ } ++ ++ if (seccomp_load(ctx) < 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n"); ++ exit(1); ++ } ++ ++ seccomp_release(ctx); ++} +diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h +new file mode 100644 +index 0000000..86bce72 +--- /dev/null ++++ b/tools/virtiofsd/seccomp.h +@@ -0,0 +1,14 @@ ++/* ++ * Seccomp sandboxing for virtiofsd ++ * ++ * Copyright (C) 2019 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef VIRTIOFSD_SECCOMP_H ++#define VIRTIOFSD_SECCOMP_H ++ ++void setup_seccomp(void); ++ ++#endif /* VIRTIOFSD_SECCOMP_H */ +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-some-options-to-the-help-message.patch b/kvm-virtiofsd-add-some-options-to-the-help-message.patch new file mode 100644 index 0000000..ac6dc54 --- /dev/null +++ b/kvm-virtiofsd-add-some-options-to-the-help-message.patch @@ -0,0 +1,74 @@ +From 6d62abb99b6b918f05f099b01a99f4326a69d650 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:26 +0100 +Subject: [PATCH 115/116] virtiofsd: add some options to the help message +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-112-dgilbert@redhat.com> +Patchwork-id: 93565 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 111/112] virtiofsd: add some options to the help message +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Masayoshi Mizuma + +Add following options to the help message: +- cache +- flock|no_flock +- norace +- posix_lock|no_posix_lock +- readdirplus|no_readdirplus +- timeout +- writeback|no_writeback +- xattr|no_xattr + +Signed-off-by: Masayoshi Mizuma + +dgilbert: Split cache, norace, posix_lock, readdirplus off + into our own earlier patches that added the options + +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1d59b1b210d7c3b0bdf4b10ebe0bb1fccfcb8b95) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index f98d8f2..0801cf7 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -148,6 +148,8 @@ void fuse_cmdline_help(void) + " -o cache= cache mode. could be one of \"auto, " + "always, none\"\n" + " default: auto\n" ++ " -o flock|no_flock enable/disable flock\n" ++ " default: no_flock\n" + " -o log_level= log level, default to \"info\"\n" + " level could be one of \"debug, " + "info, warn, err\"\n" +@@ -163,7 +165,13 @@ void fuse_cmdline_help(void) + " enable/disable readirplus\n" + " default: readdirplus except with " + "cache=none\n" +- ); ++ " -o timeout= I/O timeout (second)\n" ++ " default: depends on cache= option.\n" ++ " -o writeback|no_writeback enable/disable writeback cache\n" ++ " default: no_writeback\n" ++ " -o xattr|no_xattr enable/disable xattr\n" ++ " default: no_xattr\n" ++ ); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-syslog-command-line-option.patch b/kvm-virtiofsd-add-syslog-command-line-option.patch new file mode 100644 index 0000000..5b55342 --- /dev/null +++ b/kvm-virtiofsd-add-syslog-command-line-option.patch @@ -0,0 +1,239 @@ +From 6f5cf644bebc189bdb16f1caf3d7c47835d7c287 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:36 +0100 +Subject: [PATCH 065/116] virtiofsd: add --syslog command-line option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-62-dgilbert@redhat.com> +Patchwork-id: 93509 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 061/112] virtiofsd: add --syslog command-line option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Sometimes collecting output from stderr is inconvenient or does not fit +within the overall logging architecture. Add syslog(3) support for +cases where stderr cannot be used. + +Signed-off-by: Stefan Hajnoczi +dgilbert: Reworked as a logging function +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f185621d41f03a23b55795b89e6584253fa23505) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 2 ++ + tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++--- + tools/virtiofsd/seccomp.c | 32 +++++++++++++++++-------- + tools/virtiofsd/seccomp.h | 4 +++- + 5 files changed, 76 insertions(+), 13 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 0d61df8..f2750bc 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1795,6 +1795,7 @@ struct fuse_cmdline_opts { + int show_version; + int show_help; + int print_capabilities; ++ int syslog; + unsigned int max_idle_threads; + }; + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5531425..9692ef9 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -54,6 +54,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("subtype=", nodefault_subtype), + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), ++ FUSE_HELPER_OPT("--syslog", syslog), + FUSE_OPT_END + }; + +@@ -138,6 +139,7 @@ void fuse_cmdline_help(void) + " -V --version print version\n" + " --print-capabilities print vhost-user.json\n" + " -d -o debug enable debug output (implies -f)\n" ++ " --syslog log to syslog (default stderr)\n" + " -f foreground operation\n" + " --daemonize run in background\n" + " -o max_idle_threads the maximum number of idle worker " +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c281d81..0372aca 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -58,6 +58,7 @@ + #include + #include + #include ++#include + #include + + #include "passthrough_helpers.h" +@@ -138,6 +139,7 @@ static const struct fuse_opt lo_opts[] = { + { "norace", offsetof(struct lo_data, norace), 1 }, + FUSE_OPT_END + }; ++static bool use_syslog = false; + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + +@@ -2262,11 +2264,12 @@ static void setup_mounts(const char *source) + * Lock down this process to prevent access to other processes or files outside + * source directory. This reduces the impact of arbitrary code execution bugs. + */ +-static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) ++static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, ++ bool enable_syslog) + { + setup_namespaces(lo, se); + setup_mounts(lo->source); +- setup_seccomp(); ++ setup_seccomp(enable_syslog); + } + + /* Raise the maximum number of open file descriptors */ +@@ -2298,6 +2301,42 @@ static void setup_nofile_rlimit(void) + } + } + ++static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) ++{ ++ if (use_syslog) { ++ int priority = LOG_ERR; ++ switch (level) { ++ case FUSE_LOG_EMERG: ++ priority = LOG_EMERG; ++ break; ++ case FUSE_LOG_ALERT: ++ priority = LOG_ALERT; ++ break; ++ case FUSE_LOG_CRIT: ++ priority = LOG_CRIT; ++ break; ++ case FUSE_LOG_ERR: ++ priority = LOG_ERR; ++ break; ++ case FUSE_LOG_WARNING: ++ priority = LOG_WARNING; ++ break; ++ case FUSE_LOG_NOTICE: ++ priority = LOG_NOTICE; ++ break; ++ case FUSE_LOG_INFO: ++ priority = LOG_INFO; ++ break; ++ case FUSE_LOG_DEBUG: ++ priority = LOG_DEBUG; ++ break; ++ } ++ vsyslog(priority, fmt, ap); ++ } else { ++ vfprintf(stderr, fmt, ap); ++ } ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2336,6 +2375,11 @@ int main(int argc, char *argv[]) + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; + } ++ fuse_set_log_func(log_func); ++ use_syslog = opts.syslog; ++ if (use_syslog) { ++ openlog("virtiofsd", LOG_PID, LOG_DAEMON); ++ } + if (opts.show_help) { + printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); +@@ -2424,7 +2468,7 @@ int main(int argc, char *argv[]) + /* Must be before sandbox since it wants /proc */ + setup_capng(); + +- setup_sandbox(&lo, se); ++ setup_sandbox(&lo, se, opts.syslog); + + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 691fb63..2d9d4a7 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -107,11 +107,28 @@ static const int syscall_whitelist[] = { + SCMP_SYS(writev), + }; + +-void setup_seccomp(void) ++/* Syscalls used when --syslog is enabled */ ++static const int syscall_whitelist_syslog[] = { ++ SCMP_SYS(sendto), ++}; ++ ++static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len) + { +- scmp_filter_ctx ctx; + size_t i; + ++ for (i = 0; i < len; i++) { ++ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) { ++ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n", ++ syscalls[i]); ++ exit(1); ++ } ++ } ++} ++ ++void setup_seccomp(bool enable_syslog) ++{ ++ scmp_filter_ctx ctx; ++ + #ifdef SCMP_ACT_KILL_PROCESS + ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); + /* Handle a newer libseccomp but an older kernel */ +@@ -126,13 +143,10 @@ void setup_seccomp(void) + exit(1); + } + +- for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { +- if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, +- syscall_whitelist[i], 0) != 0) { +- fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", +- syscall_whitelist[i]); +- exit(1); +- } ++ add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist)); ++ if (enable_syslog) { ++ add_whitelist(ctx, syscall_whitelist_syslog, ++ G_N_ELEMENTS(syscall_whitelist_syslog)); + } + + /* libvhost-user calls this for post-copy migration, we don't need it */ +diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h +index 86bce72..d47c8ea 100644 +--- a/tools/virtiofsd/seccomp.h ++++ b/tools/virtiofsd/seccomp.h +@@ -9,6 +9,8 @@ + #ifndef VIRTIOFSD_SECCOMP_H + #define VIRTIOFSD_SECCOMP_H + +-void setup_seccomp(void); ++#include ++ ++void setup_seccomp(bool enable_syslog); + + #endif /* VIRTIOFSD_SECCOMP_H */ +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch b/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch new file mode 100644 index 0000000..0241a9d --- /dev/null +++ b/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch @@ -0,0 +1,106 @@ +From 3dbfb932288eb5a55dfdc0eebca7e4c7f0cf6f33 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:22 +0100 +Subject: [PATCH 111/116] virtiofsd: add --thread-pool-size=NUM option +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-108-dgilbert@redhat.com> +Patchwork-id: 93561 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 107/112] virtiofsd: add --thread-pool-size=NUM option +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Add an option to control the size of the thread pool. Requests are now +processed in parallel by default. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 951b3120dbc971f08681e1d860360e4a1e638902) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 7 ++++++- + tools/virtiofsd/fuse_virtio.c | 5 +++-- + 3 files changed, 10 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index 1447d86..4e47e58 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -72,6 +72,7 @@ struct fuse_session { + int vu_listen_fd; + int vu_socketfd; + struct fv_VuDev *virtio_dev; ++ int thread_pool_size; + }; + + struct fuse_chan { +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 79a4031..de2e2e0 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -28,6 +28,7 @@ + #include + #include + ++#define THREAD_POOL_SIZE 64 + + #define OFFSET_MAX 0x7fffffffffffffffLL + +@@ -2519,6 +2520,7 @@ static const struct fuse_opt fuse_ll_opts[] = { + LL_OPTION("allow_root", deny_others, 1), + LL_OPTION("--socket-path=%s", vu_socket_path, 0), + LL_OPTION("--fd=%d", vu_listen_fd, 0), ++ LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0), + FUSE_OPT_END + }; + +@@ -2537,7 +2539,9 @@ void fuse_lowlevel_help(void) + printf( + " -o allow_root allow access by root\n" + " --socket-path=PATH path for the vhost-user socket\n" +- " --fd=FDNUM fd number of vhost-user socket\n"); ++ " --fd=FDNUM fd number of vhost-user socket\n" ++ " --thread-pool-size=NUM thread pool size limit (default %d)\n", ++ THREAD_POOL_SIZE); + } + + void fuse_session_destroy(struct fuse_session *se) +@@ -2591,6 +2595,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + } + se->fd = -1; + se->vu_listen_fd = -1; ++ se->thread_pool_size = THREAD_POOL_SIZE; + se->conn.max_write = UINT_MAX; + se->conn.max_readahead = UINT_MAX; + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 0dcf2ef..9f65823 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -572,10 +572,11 @@ static void *fv_queue_thread(void *opaque) + struct fv_QueueInfo *qi = opaque; + struct VuDev *dev = &qi->virtio_dev->dev; + struct VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ struct fuse_session *se = qi->virtio_dev->se; + GThreadPool *pool; + +- pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, +- TRUE, NULL); ++ pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE, ++ NULL); + if (!pool) { + fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); + return NULL; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-add-vhost-user.json-file.patch b/kvm-virtiofsd-add-vhost-user.json-file.patch new file mode 100644 index 0000000..a24b24f --- /dev/null +++ b/kvm-virtiofsd-add-vhost-user.json-file.patch @@ -0,0 +1,73 @@ +From 77eb3258e76a1ac240503572d4f41d45cb832ba2 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:09 +0100 +Subject: [PATCH 038/116] virtiofsd: add vhost-user.json file +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-35-dgilbert@redhat.com> +Patchwork-id: 93490 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 034/112] virtiofsd: add vhost-user.json file +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Install a vhost-user.json file describing virtiofsd. This allows +libvirt and other management tools to enumerate vhost-user backend +programs. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 315616ed50ba15a5d7236ade8a402a93898202de) +Signed-off-by: Miroslav Rezanina +--- + .gitignore | 1 + + Makefile | 1 + + tools/virtiofsd/50-qemu-virtiofsd.json.in | 5 +++++ + 3 files changed, 7 insertions(+) + create mode 100644 tools/virtiofsd/50-qemu-virtiofsd.json.in + +diff --git a/.gitignore b/.gitignore +index aefad32..d7a4f99 100644 +--- a/.gitignore ++++ b/.gitignore +@@ -6,6 +6,7 @@ + /config-target.* + /config.status + /config-temp ++/tools/virtiofsd/50-qemu-virtiofsd.json + /elf2dmp + /trace-events-all + /trace/generated-events.h +diff --git a/Makefile b/Makefile +index 1526775..0e9755d 100644 +--- a/Makefile ++++ b/Makefile +@@ -332,6 +332,7 @@ endif + + ifdef CONFIG_LINUX + HELPERS-y += virtiofsd$(EXESUF) ++vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json + endif + + # Sphinx does not allow building manuals into the same directory as +diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in +new file mode 100644 +index 0000000..9bcd86f +--- /dev/null ++++ b/tools/virtiofsd/50-qemu-virtiofsd.json.in +@@ -0,0 +1,5 @@ ++{ ++ "description": "QEMU virtiofsd vhost-user-fs", ++ "type": "fs", ++ "binary": "@libexecdir@/virtiofsd" ++} +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-cap-ng-helpers.patch b/kvm-virtiofsd-cap-ng-helpers.patch new file mode 100644 index 0000000..305745d --- /dev/null +++ b/kvm-virtiofsd-cap-ng-helpers.patch @@ -0,0 +1,175 @@ +From f62613d8058bcb60b26727d980a37537103b0033 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:32 +0100 +Subject: [PATCH 061/116] virtiofsd: cap-ng helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-58-dgilbert@redhat.com> +Patchwork-id: 93512 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 057/112] virtiofsd: cap-ng helpers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +libcap-ng reads /proc during capng_get_caps_process, and virtiofsd's +sandboxing doesn't have /proc mounted; thus we have to do the +caps read before we sandbox it and save/restore the state. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2405f3c0d19eb4d516a88aa4e5c54e5f9c6bbea3) +Signed-off-by: Miroslav Rezanina +--- + Makefile | 4 +-- + tools/virtiofsd/passthrough_ll.c | 72 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 74 insertions(+), 2 deletions(-) + +diff --git a/Makefile b/Makefile +index 6879a06..ff05c30 100644 +--- a/Makefile ++++ b/Makefile +@@ -330,7 +330,7 @@ endif + endif + endif + +-ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) + HELPERS-y += virtiofsd$(EXESUF) + vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json + endif +@@ -682,7 +682,7 @@ rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) + $(call LINK, $^) + + # relies on Linux-specific syscalls +-ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) ++ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) + virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) + $(call LINK, $^) + endif +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index bd8925b..97e7c75 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -39,6 +39,7 @@ + #include "fuse_virtio.h" + #include "fuse_lowlevel.h" + #include ++#include + #include + #include + #include +@@ -139,6 +140,13 @@ static const struct fuse_opt lo_opts[] = { + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + ++static struct { ++ pthread_mutex_t mutex; ++ void *saved; ++} cap; ++/* That we loaded cap-ng in the current thread from the saved */ ++static __thread bool cap_loaded = 0; ++ + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); + + static int is_dot_or_dotdot(const char *name) +@@ -162,6 +170,37 @@ static struct lo_data *lo_data(fuse_req_t req) + return (struct lo_data *)fuse_req_userdata(req); + } + ++/* ++ * Load capng's state from our saved state if the current thread ++ * hadn't previously been loaded. ++ * returns 0 on success ++ */ ++static int load_capng(void) ++{ ++ if (!cap_loaded) { ++ pthread_mutex_lock(&cap.mutex); ++ capng_restore_state(&cap.saved); ++ /* ++ * restore_state free's the saved copy ++ * so make another. ++ */ ++ cap.saved = capng_save_state(); ++ if (!cap.saved) { ++ fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); ++ return -EINVAL; ++ } ++ pthread_mutex_unlock(&cap.mutex); ++ ++ /* ++ * We want to use the loaded state for our pid, ++ * not the original ++ */ ++ capng_setpid(syscall(SYS_gettid)); ++ cap_loaded = true; ++ } ++ return 0; ++} ++ + static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; +@@ -2024,6 +2063,35 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) + } + + /* ++ * Capture the capability state, we'll need to restore this for individual ++ * threads later; see load_capng. ++ */ ++static void setup_capng(void) ++{ ++ /* Note this accesses /proc so has to happen before the sandbox */ ++ if (capng_get_caps_process()) { ++ fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); ++ exit(1); ++ } ++ pthread_mutex_init(&cap.mutex, NULL); ++ pthread_mutex_lock(&cap.mutex); ++ cap.saved = capng_save_state(); ++ if (!cap.saved) { ++ fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); ++ exit(1); ++ } ++ pthread_mutex_unlock(&cap.mutex); ++} ++ ++static void cleanup_capng(void) ++{ ++ free(cap.saved); ++ cap.saved = NULL; ++ pthread_mutex_destroy(&cap.mutex); ++} ++ ++ ++/* + * Make the source directory our root so symlinks cannot escape and no other + * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. + */ +@@ -2216,12 +2284,16 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + ++ /* Must be before sandbox since it wants /proc */ ++ setup_capng(); ++ + setup_sandbox(&lo, se); + + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + + fuse_session_unmount(se); ++ cleanup_capng(); + err_out3: + fuse_remove_signal_handlers(se); + err_out2: +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch b/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch new file mode 100644 index 0000000..caa4560 --- /dev/null +++ b/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch @@ -0,0 +1,1111 @@ +From d6a0067e6c08523a8f605f775be980eaf0a23690 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:23 +0100 +Subject: [PATCH 052/116] virtiofsd: check input buffer size in fuse_lowlevel.c + ops +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-49-dgilbert@redhat.com> +Patchwork-id: 93503 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 048/112] virtiofsd: check input buffer size in fuse_lowlevel.c ops +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Each FUSE operation involves parsing the input buffer. Currently the +code assumes the input buffer is large enough for the expected +arguments. This patch uses fuse_mbuf_iter to check the size. + +Most operations are simple to convert. Some are more complicated due to +variable-length inputs or different sizes depending on the protocol +version. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 70995754416eb4491c31607fe380a83cfd25a087) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 581 +++++++++++++++++++++++++++++++--------- + 1 file changed, 456 insertions(+), 125 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 611e8b0..02e1d83 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -27,7 +28,6 @@ + #include + + +-#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) + #define OFFSET_MAX 0x7fffffffffffffffLL + + struct fuse_pollhandle { +@@ -706,9 +706,14 @@ int fuse_reply_lseek(fuse_req_t req, off_t off) + return send_reply_ok(req, &arg, sizeof(arg)); + } + +-static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.lookup) { + req->se->op.lookup(req, nodeid, name); +@@ -717,9 +722,16 @@ static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_forget(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; ++ struct fuse_forget_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.forget) { + req->se->op.forget(req, nodeid, arg->nlookup); +@@ -729,20 +741,48 @@ static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg) ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_batch_forget_in *arg = (void *)inarg; +- struct fuse_forget_one *param = (void *)PARAM(arg); +- unsigned int i; ++ struct fuse_batch_forget_in *arg; ++ struct fuse_forget_data *forgets; ++ size_t scount; + + (void)nodeid; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_none(req); ++ return; ++ } ++ ++ /* ++ * Prevent integer overflow. The compiler emits the following warning ++ * unless we use the scount local variable: ++ * ++ * error: comparison is always false due to limited range of data type ++ * [-Werror=type-limits] ++ * ++ * This may be true on 64-bit hosts but we need this check for 32-bit ++ * hosts. ++ */ ++ scount = arg->count; ++ if (scount > SIZE_MAX / sizeof(forgets[0])) { ++ fuse_reply_none(req); ++ return; ++ } ++ ++ forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0])); ++ if (!forgets) { ++ fuse_reply_none(req); ++ return; ++ } ++ + if (req->se->op.forget_multi) { +- req->se->op.forget_multi(req, arg->count, +- (struct fuse_forget_data *)param); ++ req->se->op.forget_multi(req, arg->count, forgets); + } else if (req->se->op.forget) { ++ unsigned int i; ++ + for (i = 0; i < arg->count; i++) { +- struct fuse_forget_one *forget = ¶m[i]; + struct fuse_req *dummy_req; + + dummy_req = fuse_ll_alloc_req(req->se); +@@ -754,7 +794,7 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, + dummy_req->ctx = req->ctx; + dummy_req->ch = NULL; + +- req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); ++ req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup); + } + fuse_reply_none(req); + } else { +@@ -762,12 +802,19 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, + } + } + +-static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { + struct fuse_file_info *fip = NULL; + struct fuse_file_info fi; + +- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; ++ struct fuse_getattr_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (arg->getattr_flags & FUSE_GETATTR_FH) { + memset(&fi, 0, sizeof(fi)); +@@ -782,14 +829,21 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; +- + if (req->se->op.setattr) { ++ struct fuse_setattr_in *arg; + struct fuse_file_info *fi = NULL; + struct fuse_file_info fi_store; + struct stat stbuf; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&stbuf, 0, sizeof(stbuf)); + convert_attr(arg, &stbuf); + if (arg->valid & FATTR_FH) { +@@ -810,9 +864,16 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_access(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_access_in *arg = (struct fuse_access_in *)inarg; ++ struct fuse_access_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.access) { + req->se->op.access(req, nodeid, arg->mask); +@@ -821,9 +882,10 @@ static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- (void)inarg; ++ (void)iter; + + if (req->se->op.readlink) { + req->se->op.readlink(req, nodeid); +@@ -832,10 +894,18 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; +- char *name = PARAM(arg); ++ struct fuse_mknod_in *arg; ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + req->ctx.umask = arg->umask; + +@@ -846,22 +916,37 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; ++ struct fuse_mkdir_in *arg; ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + req->ctx.umask = arg->umask; + + if (req->se->op.mkdir) { +- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); ++ req->se->op.mkdir(req, nodeid, name, arg->mode); + } else { + fuse_reply_err(req, ENOSYS); + } + } + +-static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.unlink) { + req->se->op.unlink(req, nodeid, name); +@@ -870,9 +955,15 @@ static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.rmdir) { + req->se->op.rmdir(req, nodeid, name); +@@ -881,10 +972,16 @@ static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; +- char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ const char *linkname = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name || !linkname) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.symlink) { + req->se->op.symlink(req, linkname, nodeid, name); +@@ -893,11 +990,20 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_rename(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename_in *arg; ++ const char *oldname; ++ const char *newname; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ oldname = fuse_mbuf_iter_advance_str(iter); ++ newname = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !oldname || !newname) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.rename) { + req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); +@@ -906,11 +1012,20 @@ static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; +- char *oldname = PARAM(arg); +- char *newname = oldname + strlen(oldname) + 1; ++ struct fuse_rename2_in *arg; ++ const char *oldname; ++ const char *newname; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ oldname = fuse_mbuf_iter_advance_str(iter); ++ newname = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !oldname || !newname) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.rename) { + req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, +@@ -920,24 +1035,38 @@ static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_link(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_link_in *arg = (struct fuse_link_in *)inarg; ++ struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.link) { +- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); ++ req->se->op.link(req, arg->oldnodeid, nodeid, name); + } else { + fuse_reply_err(req, ENOSYS); + } + } + +-static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_create(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_create_in *arg = (struct fuse_create_in *)inarg; +- + if (req->se->op.create) { ++ struct fuse_create_in *arg; + struct fuse_file_info fi; +- char *name = PARAM(arg); ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; +@@ -950,11 +1079,18 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_open(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_open_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + +@@ -965,13 +1101,15 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_read(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; +- + if (req->se->op.read) { ++ struct fuse_read_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.lock_owner = arg->lock_owner; +@@ -982,11 +1120,24 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_write(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_write_in *arg; + struct fuse_file_info fi; +- char *param; ++ const char *param; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ param = fuse_mbuf_iter_advance(iter, arg->size); ++ if (!param) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +@@ -994,7 +1145,6 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + + fi.lock_owner = arg->lock_owner; + fi.flags = arg->flags; +- param = PARAM(arg); + + if (req->se->op.write) { + req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); +@@ -1052,11 +1202,18 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, + se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); + } + +-static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_flush(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; ++ struct fuse_flush_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.flush = 1; +@@ -1069,19 +1226,26 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_release(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_release_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + fi.fh = arg->fh; + fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; + fi.lock_owner = arg->lock_owner; ++ + if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { + fi.flock_release = 1; +- fi.lock_owner = arg->lock_owner; + } + + if (req->se->op.release) { +@@ -1091,11 +1255,19 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_fsync_in *arg; + struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ int datasync; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ datasync = arg->fsync_flags & 1; + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +@@ -1111,11 +1283,18 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; ++ struct fuse_open_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + +@@ -1126,11 +1305,18 @@ static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_read_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1141,11 +1327,18 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; ++ struct fuse_read_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1156,11 +1349,18 @@ static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; ++ struct fuse_release_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.flags = arg->flags; + fi.fh = arg->fh; +@@ -1172,11 +1372,19 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; ++ struct fuse_fsync_in *arg; + struct fuse_file_info fi; +- int datasync = arg->fsync_flags & 1; ++ int datasync; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ datasync = arg->fsync_flags & 1; + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +@@ -1188,10 +1396,11 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { + (void)nodeid; +- (void)inarg; ++ (void)iter; + + if (req->se->op.statfs) { + req->se->op.statfs(req, nodeid); +@@ -1204,11 +1413,25 @@ static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; +- char *name = PARAM(arg); +- char *value = name + strlen(name) + 1; ++ struct fuse_setxattr_in *arg; ++ const char *name; ++ const char *value; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ value = fuse_mbuf_iter_advance(iter, arg->size); ++ if (!value) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.setxattr) { + req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); +@@ -1217,20 +1440,36 @@ static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; ++ struct fuse_getxattr_in *arg; ++ const char *name; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ name = fuse_mbuf_iter_advance_str(iter); ++ if (!arg || !name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.getxattr) { +- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); ++ req->se->op.getxattr(req, nodeid, name, arg->size); + } else { + fuse_reply_err(req, ENOSYS); + } + } + +-static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; ++ struct fuse_getxattr_in *arg; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.listxattr) { + req->se->op.listxattr(req, nodeid, arg->size); +@@ -1239,9 +1478,15 @@ static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- char *name = (char *)inarg; ++ const char *name = fuse_mbuf_iter_advance_str(iter); ++ ++ if (!name) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.removexattr) { + req->se->op.removexattr(req, nodeid, name); +@@ -1265,12 +1510,19 @@ static void convert_fuse_file_lock(struct fuse_file_lock *fl, + flock->l_pid = fl->pid; + } + +-static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_lk_in *arg; + struct fuse_file_info fi; + struct flock flock; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.lock_owner = arg->owner; +@@ -1284,12 +1536,18 @@ static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, int sleep) ++ struct fuse_mbuf_iter *iter, int sleep) + { +- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; ++ struct fuse_lk_in *arg; + struct fuse_file_info fi; + struct flock flock; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.lock_owner = arg->owner; +@@ -1327,14 +1585,16 @@ static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, + } + } + +-static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- do_setlk_common(req, nodeid, inarg, 0); ++ do_setlk_common(req, nodeid, iter, 0); + } + +-static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- do_setlk_common(req, nodeid, inarg, 1); ++ do_setlk_common(req, nodeid, iter, 1); + } + + static int find_interrupted(struct fuse_session *se, struct fuse_req *req) +@@ -1379,12 +1639,20 @@ static int find_interrupted(struct fuse_session *se, struct fuse_req *req) + return 0; + } + +-static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; ++ struct fuse_interrupt_in *arg; + struct fuse_session *se = req->se; + + (void)nodeid; ++ ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", + (unsigned long long)arg->unique); +@@ -1425,9 +1693,15 @@ static struct fuse_req *check_interrupt(struct fuse_session *se, + } + } + +-static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; ++ struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + if (req->se->op.bmap) { + req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); +@@ -1436,18 +1710,34 @@ static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; +- unsigned int flags = arg->flags; +- void *in_buf = arg->in_size ? PARAM(arg) : NULL; ++ struct fuse_ioctl_in *arg; ++ unsigned int flags; ++ void *in_buf = NULL; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ flags = arg->flags; + if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { + fuse_reply_err(req, ENOTTY); + return; + } + ++ if (arg->in_size) { ++ in_buf = fuse_mbuf_iter_advance(iter, arg->in_size); ++ if (!in_buf) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1468,11 +1758,18 @@ void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) + free(ph); + } + +-static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_poll(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; ++ struct fuse_poll_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + fi.poll_events = arg->events; +@@ -1496,11 +1793,18 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; ++ struct fuse_fallocate_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1513,12 +1817,17 @@ static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + + static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, +- const void *inarg) ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_copy_file_range_in *arg = +- (struct fuse_copy_file_range_in *)inarg; ++ struct fuse_copy_file_range_in *arg; + struct fuse_file_info fi_in, fi_out; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + memset(&fi_in, 0, sizeof(fi_in)); + fi_in.fh = arg->fh_in; + +@@ -1535,11 +1844,17 @@ static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, + } + } + +-static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; ++ struct fuse_lseek_in *arg; + struct fuse_file_info fi; + ++ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; + +@@ -1550,15 +1865,33 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_init(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { +- struct fuse_init_in *arg = (struct fuse_init_in *)inarg; ++ size_t compat_size = offsetof(struct fuse_init_in, max_readahead); ++ struct fuse_init_in *arg; + struct fuse_init_out outarg; + struct fuse_session *se = req->se; + size_t bufsize = se->bufsize; + size_t outargsize = sizeof(outarg); + + (void)nodeid; ++ ++ /* First consume the old fields... */ ++ arg = fuse_mbuf_iter_advance(iter, compat_size); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ /* ...and now consume the new fields. */ ++ if (arg->major == 7 && arg->minor >= 6) { ++ if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ } ++ + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); + if (arg->major == 7 && arg->minor >= 6) { +@@ -1791,12 +2124,13 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + send_reply_ok(req, &outarg, outargsize); + } + +-static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) ++static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter) + { + struct fuse_session *se = req->se; + + (void)nodeid; +- (void)inarg; ++ (void)iter; + + se->got_destroy = 1; + if (se->op.destroy) { +@@ -1976,7 +2310,7 @@ int fuse_req_interrupted(fuse_req_t req) + } + + static struct { +- void (*func)(fuse_req_t, fuse_ino_t, const void *); ++ void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *); + const char *name; + } fuse_ll_ops[] = { + [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, +@@ -2060,7 +2394,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, + const struct fuse_buf *buf = bufv->buf; + struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); + struct fuse_in_header *in; +- const void *inarg; + struct fuse_req *req; + int err; + +@@ -2138,13 +2471,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, + } + } + +- inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { + do_write_buf(req, in->nodeid, &iter, bufv); + } else { +- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); ++ fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); + } +- + return; + + reply_err: +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch b/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch new file mode 100644 index 0000000..b6de0a9 --- /dev/null +++ b/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch @@ -0,0 +1,82 @@ +From 99ff67682ef7c5659bdc9836008541861ae313d5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:56 +0100 +Subject: [PATCH 085/116] virtiofsd: cleanup allocated resource in se +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-82-dgilbert@redhat.com> +Patchwork-id: 93533 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 081/112] virtiofsd: cleanup allocated resource in se +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +This cleans up unfreed resources in se on quiting, including +se->virtio_dev, se->vu_socket_path, se->vu_socketfd. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 61cfc44982e566c33b9d5df17858e4d5ae373873) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 7 +++++++ + tools/virtiofsd/fuse_virtio.c | 7 +++++++ + tools/virtiofsd/fuse_virtio.h | 2 +- + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 65f91da..440508a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2532,6 +2532,13 @@ void fuse_session_destroy(struct fuse_session *se) + if (se->fd != -1) { + close(se->fd); + } ++ ++ if (se->vu_socket_path) { ++ virtio_session_close(se); ++ free(se->vu_socket_path); ++ se->vu_socket_path = NULL; ++ } ++ + free(se); + } + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 7a8774a..e7bd772 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -833,3 +833,10 @@ int virtio_session_mount(struct fuse_session *se) + + return 0; + } ++ ++void virtio_session_close(struct fuse_session *se) ++{ ++ close(se->vu_socketfd); ++ free(se->virtio_dev); ++ se->virtio_dev = NULL; ++} +diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h +index cc676b9..1116840 100644 +--- a/tools/virtiofsd/fuse_virtio.h ++++ b/tools/virtiofsd/fuse_virtio.h +@@ -19,7 +19,7 @@ + struct fuse_session; + + int virtio_session_mount(struct fuse_session *se); +- ++void virtio_session_close(struct fuse_session *se); + int virtio_loop(struct fuse_session *se); + + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch b/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch new file mode 100644 index 0000000..d01b000 --- /dev/null +++ b/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch @@ -0,0 +1,99 @@ +From e00543b0384fba61a9c7274c73e11a25e7ab2946 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:13 +0100 +Subject: [PATCH 102/116] virtiofsd: convert more fprintf and perror to use + fuse log infra +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-99-dgilbert@redhat.com> +Patchwork-id: 93552 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 098/112] virtiofsd: convert more fprintf and perror to use fuse log infra +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eryu Guan + +Signed-off-by: Eryu Guan +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Misono Tomohiro +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fc1aed0bf96259d0b46b1cfea7497b7762c4ee3d) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_signals.c | 7 +++++-- + tools/virtiofsd/helper.c | 9 ++++++--- + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c +index dc7c8ac..f18625b 100644 +--- a/tools/virtiofsd/fuse_signals.c ++++ b/tools/virtiofsd/fuse_signals.c +@@ -12,6 +12,7 @@ + #include "fuse_i.h" + #include "fuse_lowlevel.h" + ++#include + #include + #include + #include +@@ -47,13 +48,15 @@ static int set_one_signal_handler(int sig, void (*handler)(int), int remove) + sa.sa_flags = 0; + + if (sigaction(sig, NULL, &old_sa) == -1) { +- perror("fuse: cannot get old signal handler"); ++ fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n", ++ strerror(errno)); + return -1; + } + + if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && + sigaction(sig, &sa, NULL) == -1) { +- perror("fuse: cannot set signal handler"); ++ fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n", ++ strerror(errno)); + return -1; + } + return 0; +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 33749bf..f98d8f2 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -208,7 +208,8 @@ int fuse_daemonize(int foreground) + char completed; + + if (pipe(waiter)) { +- perror("fuse_daemonize: pipe"); ++ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n", ++ strerror(errno)); + return -1; + } + +@@ -218,7 +219,8 @@ int fuse_daemonize(int foreground) + */ + switch (fork()) { + case -1: +- perror("fuse_daemonize: fork"); ++ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n", ++ strerror(errno)); + return -1; + case 0: + break; +@@ -228,7 +230,8 @@ int fuse_daemonize(int foreground) + } + + if (setsid() == -1) { +- perror("fuse_daemonize: setsid"); ++ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n", ++ strerror(errno)); + return -1; + } + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch b/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch new file mode 100644 index 0000000..8c1022a --- /dev/null +++ b/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch @@ -0,0 +1,57 @@ +From 8e6473e906dfc7d2a62abaf1ec80ff461e4d201d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:12 +0100 +Subject: [PATCH 101/116] virtiofsd: do not always set FUSE_FLOCK_LOCKS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-98-dgilbert@redhat.com> +Patchwork-id: 93551 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 097/112] virtiofsd: do not always set FUSE_FLOCK_LOCKS +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Peng Tao + +Right now we always enable it regardless of given commandlines. +Fix it by setting the flag relying on the lo->flock bit. + +Signed-off-by: Peng Tao +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e468d4af5f5192ab33283464a9f6933044ce47f7) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ab16135..ccbbec1 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -546,9 +546,14 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); + conn->want |= FUSE_CAP_WRITEBACK_CACHE; + } +- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); +- conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { ++ if (lo->flock) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); ++ conn->want |= FUSE_CAP_FLOCK_LOCKS; ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); ++ conn->want &= ~FUSE_CAP_FLOCK_LOCKS; ++ } + } + + if (conn->capable & FUSE_CAP_POSIX_LOCKS) { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch b/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch new file mode 100644 index 0000000..3279a5e --- /dev/null +++ b/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch @@ -0,0 +1,47 @@ +From bc127914b29f2e4163bc7ca786e04ed955d96016 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:00 +0100 +Subject: [PATCH 089/116] virtiofsd: enable PARALLEL_DIROPS during INIT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-86-dgilbert@redhat.com> +Patchwork-id: 93539 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 085/112] virtiofsd: enable PARALLEL_DIROPS during INIT +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +lookup is a RO operations, PARALLEL_DIROPS can be enabled. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b7ed733a3841c4d489d3bd6ca7ed23c84db119c2) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index aac282f..70568d2 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2062,6 +2062,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + if (se->conn.want & FUSE_CAP_ASYNC_READ) { + outarg.flags |= FUSE_ASYNC_READ; + } ++ if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) { ++ outarg.flags |= FUSE_PARALLEL_DIROPS; ++ } + if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { + outarg.flags |= FUSE_POSIX_LOCKS; + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch b/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch new file mode 100644 index 0000000..96f91a1 --- /dev/null +++ b/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch @@ -0,0 +1,111 @@ +From 983b383bc4a92a9f7ecff0332cadefed2f58f502 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:50 +0100 +Subject: [PATCH 079/116] virtiofsd: extract root inode init into setup_root() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-76-dgilbert@redhat.com> +Patchwork-id: 93527 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 075/112] virtiofsd: extract root inode init into setup_root() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Inititialize the root inode in a single place. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Stefan Hajnoczi +dgilbert: +with fix suggested by Misono Tomohiro +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 3ca8a2b1c83eb185c232a4e87abbb65495263756) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 35 +++++++++++++++++++++++++---------- + 1 file changed, 25 insertions(+), 10 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 33bfb4d..9e7191e 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2351,6 +2351,30 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + } + } + ++static void setup_root(struct lo_data *lo, struct lo_inode *root) ++{ ++ int fd, res; ++ struct stat stat; ++ ++ fd = open("/", O_PATH); ++ if (fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); ++ exit(1); ++ } ++ ++ res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); ++ exit(1); ++ } ++ ++ root->is_symlink = false; ++ root->fd = fd; ++ root->ino = stat.st_ino; ++ root->dev = stat.st_dev; ++ root->refcount = 2; ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2426,8 +2450,6 @@ int main(int argc, char *argv[]) + if (lo.debug) { + current_log_level = FUSE_LOG_DEBUG; + } +- lo.root.refcount = 2; +- + if (lo.source) { + struct stat stat; + int res; +@@ -2446,7 +2468,6 @@ int main(int argc, char *argv[]) + } else { + lo.source = "/"; + } +- lo.root.is_symlink = false; + if (!lo.timeout_set) { + switch (lo.cache) { + case CACHE_NEVER: +@@ -2466,13 +2487,6 @@ int main(int argc, char *argv[]) + exit(1); + } + +- lo.root.fd = open(lo.source, O_PATH); +- +- if (lo.root.fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); +- exit(1); +- } +- + se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); + if (se == NULL) { + goto err_out1; +@@ -2495,6 +2509,7 @@ int main(int argc, char *argv[]) + + setup_sandbox(&lo, se, opts.syslog); + ++ setup_root(&lo, &lo.root); + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch b/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch new file mode 100644 index 0000000..4860bec --- /dev/null +++ b/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch @@ -0,0 +1,85 @@ +From b3cd18ab58e331d3610cf00f857d6a945f11a030 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:49 +0100 +Subject: [PATCH 078/116] virtiofsd: fail when parent inode isn't known in + lo_do_lookup() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-75-dgilbert@redhat.com> +Patchwork-id: 93529 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 074/112] virtiofsd: fail when parent inode isn't known in lo_do_lookup() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +The Linux file handle APIs (struct export_operations) can access inodes +that are not attached to parents because path name traversal is not +performed. Refuse if there is no parent in lo_do_lookup(). + +Also clean up lo_do_lookup() while we're here. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9de4fab5995d115f8ebfb41d8d94a866d80a1708) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index de12e75..33bfb4d 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -777,6 +777,15 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_data *lo = lo_data(req); + struct lo_inode *inode, *dir = lo_inode(req, parent); + ++ /* ++ * name_to_handle_at() and open_by_handle_at() can reach here with fuse ++ * mount point in guest, but we don't have its inode info in the ++ * ino_map. ++ */ ++ if (!dir) { ++ return ENOENT; ++ } ++ + memset(e, 0, sizeof(*e)); + e->attr_timeout = lo->timeout; + e->entry_timeout = lo->timeout; +@@ -786,7 +795,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + name = "."; + } + +- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); ++ newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); + if (newfd == -1) { + goto out_err; + } +@@ -796,7 +805,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out_err; + } + +- inode = lo_find(lo_data(req), &e->attr); ++ inode = lo_find(lo, &e->attr); + if (inode) { + close(newfd); + newfd = -1; +@@ -812,6 +821,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + inode->is_symlink = S_ISLNK(e->attr.st_mode); + inode->refcount = 1; + inode->fd = newfd; ++ newfd = -1; + inode->ino = e->attr.st_ino; + inode->dev = e->attr.st_dev; + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-fix-error-handling-in-main.patch b/kvm-virtiofsd-fix-error-handling-in-main.patch new file mode 100644 index 0000000..a831992 --- /dev/null +++ b/kvm-virtiofsd-fix-error-handling-in-main.patch @@ -0,0 +1,63 @@ +From 0ea1c7375d6509367399c706eb9d1e8cf79a5830 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:55 +0100 +Subject: [PATCH 084/116] virtiofsd: fix error handling in main() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-81-dgilbert@redhat.com> +Patchwork-id: 93534 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 080/112] virtiofsd: fix error handling in main() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +Neither fuse_parse_cmdline() nor fuse_opt_parse() goes to the right place +to do cleanup. + +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c6de804670f2255ce776263124c37f3370dc5ac1) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9ed77a1..af050c6 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2443,13 +2443,14 @@ int main(int argc, char *argv[]) + lo_map_init(&lo.fd_map); + + if (fuse_parse_cmdline(&args, &opts) != 0) { +- return 1; ++ goto err_out1; + } + fuse_set_log_func(log_func); + use_syslog = opts.syslog; + if (use_syslog) { + openlog("virtiofsd", LOG_PID, LOG_DAEMON); + } ++ + if (opts.show_help) { + printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); +@@ -2468,7 +2469,7 @@ int main(int argc, char *argv[]) + } + + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { +- return 1; ++ goto err_out1; + } + + /* +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch b/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch new file mode 100644 index 0000000..420a8a6 --- /dev/null +++ b/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch @@ -0,0 +1,44 @@ +From 9c291ca8624318613ede6e4174d08cf45aae8384 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:01 +0100 +Subject: [PATCH 090/116] virtiofsd: fix incorrect error handling in + lo_do_lookup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-87-dgilbert@redhat.com> +Patchwork-id: 93543 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 086/112] virtiofsd: fix incorrect error handling in lo_do_lookup +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eric Ren + +Signed-off-by: Eric Ren +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fc3f0041b43b6c64aa97b3558a6abe1a10028354) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e8dc5c7..05b5f89 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -814,7 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + close(newfd); + newfd = -1; + } else { +- saverr = ENOMEM; + inode = calloc(1, sizeof(struct lo_inode)); + if (!inode) { + goto out_err; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-fix-libfuse-information-leaks.patch b/kvm-virtiofsd-fix-libfuse-information-leaks.patch new file mode 100644 index 0000000..90debb0 --- /dev/null +++ b/kvm-virtiofsd-fix-libfuse-information-leaks.patch @@ -0,0 +1,322 @@ +From e0d64e481e5a9fab5ff90d2a8f84afcd3311d13b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:35 +0100 +Subject: [PATCH 064/116] virtiofsd: fix libfuse information leaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-61-dgilbert@redhat.com> +Patchwork-id: 93515 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 060/112] virtiofsd: fix libfuse information leaks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Some FUSE message replies contain padding fields that are not +initialized by libfuse. This is fine in traditional FUSE applications +because the kernel is trusted. virtiofsd does not trust the guest and +must not expose uninitialized memory. + +Use C struct initializers to automatically zero out memory. Not all of +these code changes are strictly necessary but they will prevent future +information leaks if the structs are extended. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 3db2876a0153ac7103c077c53090e020faffb3ea) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 150 ++++++++++++++++++++-------------------- + 1 file changed, 76 insertions(+), 74 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 2d6dc5a..6ceb33d 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -44,21 +44,23 @@ static __attribute__((constructor)) void fuse_ll_init_pagesize(void) + + static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) + { +- attr->ino = stbuf->st_ino; +- attr->mode = stbuf->st_mode; +- attr->nlink = stbuf->st_nlink; +- attr->uid = stbuf->st_uid; +- attr->gid = stbuf->st_gid; +- attr->rdev = stbuf->st_rdev; +- attr->size = stbuf->st_size; +- attr->blksize = stbuf->st_blksize; +- attr->blocks = stbuf->st_blocks; +- attr->atime = stbuf->st_atime; +- attr->mtime = stbuf->st_mtime; +- attr->ctime = stbuf->st_ctime; +- attr->atimensec = ST_ATIM_NSEC(stbuf); +- attr->mtimensec = ST_MTIM_NSEC(stbuf); +- attr->ctimensec = ST_CTIM_NSEC(stbuf); ++ *attr = (struct fuse_attr){ ++ .ino = stbuf->st_ino, ++ .mode = stbuf->st_mode, ++ .nlink = stbuf->st_nlink, ++ .uid = stbuf->st_uid, ++ .gid = stbuf->st_gid, ++ .rdev = stbuf->st_rdev, ++ .size = stbuf->st_size, ++ .blksize = stbuf->st_blksize, ++ .blocks = stbuf->st_blocks, ++ .atime = stbuf->st_atime, ++ .mtime = stbuf->st_mtime, ++ .ctime = stbuf->st_ctime, ++ .atimensec = ST_ATIM_NSEC(stbuf), ++ .mtimensec = ST_MTIM_NSEC(stbuf), ++ .ctimensec = ST_CTIM_NSEC(stbuf), ++ }; + } + + static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) +@@ -183,16 +185,16 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out = { ++ .unique = req->unique, ++ .error = error, ++ }; + + if (error <= -1000 || error > 0) { + fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); + error = -ERANGE; + } + +- out.unique = req->unique; +- out.error = error; +- + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + +@@ -277,14 +279,16 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, + static void convert_statfs(const struct statvfs *stbuf, + struct fuse_kstatfs *kstatfs) + { +- kstatfs->bsize = stbuf->f_bsize; +- kstatfs->frsize = stbuf->f_frsize; +- kstatfs->blocks = stbuf->f_blocks; +- kstatfs->bfree = stbuf->f_bfree; +- kstatfs->bavail = stbuf->f_bavail; +- kstatfs->files = stbuf->f_files; +- kstatfs->ffree = stbuf->f_ffree; +- kstatfs->namelen = stbuf->f_namemax; ++ *kstatfs = (struct fuse_kstatfs){ ++ .bsize = stbuf->f_bsize, ++ .frsize = stbuf->f_frsize, ++ .blocks = stbuf->f_blocks, ++ .bfree = stbuf->f_bfree, ++ .bavail = stbuf->f_bavail, ++ .files = stbuf->f_files, ++ .ffree = stbuf->f_ffree, ++ .namelen = stbuf->f_namemax, ++ }; + } + + static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) +@@ -328,12 +332,14 @@ static unsigned int calc_timeout_nsec(double t) + static void fill_entry(struct fuse_entry_out *arg, + const struct fuse_entry_param *e) + { +- arg->nodeid = e->ino; +- arg->generation = e->generation; +- arg->entry_valid = calc_timeout_sec(e->entry_timeout); +- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); +- arg->attr_valid = calc_timeout_sec(e->attr_timeout); +- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); ++ *arg = (struct fuse_entry_out){ ++ .nodeid = e->ino, ++ .generation = e->generation, ++ .entry_valid = calc_timeout_sec(e->entry_timeout), ++ .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout), ++ .attr_valid = calc_timeout_sec(e->attr_timeout), ++ .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout), ++ }; + convert_stat(&e->attr, &arg->attr); + } + +@@ -362,10 +368,12 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, + fill_entry(&dp->entry_out, e); + + struct fuse_dirent *dirent = &dp->dirent; +- dirent->ino = e->attr.st_ino; +- dirent->off = off; +- dirent->namelen = namelen; +- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; ++ *dirent = (struct fuse_dirent){ ++ .ino = e->attr.st_ino, ++ .off = off, ++ .namelen = namelen, ++ .type = (e->attr.st_mode & S_IFMT) >> 12, ++ }; + memcpy(dirent->name, name, namelen); + memset(dirent->name + namelen, 0, entlen_padded - entlen); + +@@ -496,15 +504,14 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) + { + struct iovec iov[2]; +- struct fuse_out_header out; ++ struct fuse_out_header out = { ++ .unique = req->unique, ++ }; + int res; + + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + +- out.unique = req->unique; +- out.error = 0; +- + res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); + if (res <= 0) { + fuse_free_req(req); +@@ -2145,14 +2152,14 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, + static int send_notify_iov(struct fuse_session *se, int notify_code, + struct iovec *iov, int count) + { +- struct fuse_out_header out; ++ struct fuse_out_header out = { ++ .error = notify_code, ++ }; + + if (!se->got_init) { + return -ENOTCONN; + } + +- out.unique = 0; +- out.error = notify_code; + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(struct fuse_out_header); + +@@ -2162,11 +2169,11 @@ static int send_notify_iov(struct fuse_session *se, int notify_code, + int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) + { + if (ph != NULL) { +- struct fuse_notify_poll_wakeup_out outarg; ++ struct fuse_notify_poll_wakeup_out outarg = { ++ .kh = ph->kh, ++ }; + struct iovec iov[2]; + +- outarg.kh = ph->kh; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + +@@ -2179,17 +2186,17 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) + int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + off_t off, off_t len) + { +- struct fuse_notify_inval_inode_out outarg; ++ struct fuse_notify_inval_inode_out outarg = { ++ .ino = ino, ++ .off = off, ++ .len = len, ++ }; + struct iovec iov[2]; + + if (!se) { + return -EINVAL; + } + +- outarg.ino = ino; +- outarg.off = off; +- outarg.len = len; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + +@@ -2199,17 +2206,16 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, + int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, + const char *name, size_t namelen) + { +- struct fuse_notify_inval_entry_out outarg; ++ struct fuse_notify_inval_entry_out outarg = { ++ .parent = parent, ++ .namelen = namelen, ++ }; + struct iovec iov[3]; + + if (!se) { + return -EINVAL; + } + +- outarg.parent = parent; +- outarg.namelen = namelen; +- outarg.padding = 0; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + iov[2].iov_base = (void *)name; +@@ -2222,18 +2228,17 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + fuse_ino_t child, const char *name, + size_t namelen) + { +- struct fuse_notify_delete_out outarg; ++ struct fuse_notify_delete_out outarg = { ++ .parent = parent, ++ .child = child, ++ .namelen = namelen, ++ }; + struct iovec iov[3]; + + if (!se) { + return -EINVAL; + } + +- outarg.parent = parent; +- outarg.child = child; +- outarg.namelen = namelen; +- outarg.padding = 0; +- + iov[1].iov_base = &outarg; + iov[1].iov_len = sizeof(outarg); + iov[2].iov_base = (void *)name; +@@ -2245,24 +2250,21 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + off_t offset, struct fuse_bufvec *bufv) + { +- struct fuse_out_header out; +- struct fuse_notify_store_out outarg; ++ struct fuse_out_header out = { ++ .error = FUSE_NOTIFY_STORE, ++ }; ++ struct fuse_notify_store_out outarg = { ++ .nodeid = ino, ++ .offset = offset, ++ .size = fuse_buf_size(bufv), ++ }; + struct iovec iov[3]; +- size_t size = fuse_buf_size(bufv); + int res; + + if (!se) { + return -EINVAL; + } + +- out.unique = 0; +- out.error = FUSE_NOTIFY_STORE; +- +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; +- + iov[0].iov_base = &out; + iov[0].iov_len = sizeof(out); + iov[1].iov_base = &outarg; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch b/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch new file mode 100644 index 0000000..6243037 --- /dev/null +++ b/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch @@ -0,0 +1,94 @@ +From 9a44d78f5019280b006bb5b3de7164336289d639 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:21 +0100 +Subject: [PATCH 110/116] virtiofsd: fix lo_destroy() resource leaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-107-dgilbert@redhat.com> +Patchwork-id: 93560 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 106/112] virtiofsd: fix lo_destroy() resource leaks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Now that lo_destroy() is serialized we can call unref_inode() so that +all inode resources are freed. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 28f7a3b026f231bfe8de5fed6a18a8d27b1dfcee) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++-------------------- + 1 file changed, 20 insertions(+), 21 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 79b8b71..eb001b9 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1371,26 +1371,6 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + } + +-static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) +-{ +- struct lo_inode *inode = value; +- struct lo_data *lo = user_data; +- +- inode->nlookup = 0; +- lo_map_remove(&lo->ino_map, inode->fuse_ino); +- close(inode->fd); +- lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ +- +- return TRUE; +-} +- +-static void unref_all_inodes(struct lo_data *lo) +-{ +- pthread_mutex_lock(&lo->mutex); +- g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); +- pthread_mutex_unlock(&lo->mutex); +-} +- + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +@@ -2477,7 +2457,26 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + static void lo_destroy(void *userdata) + { + struct lo_data *lo = (struct lo_data *)userdata; +- unref_all_inodes(lo); ++ ++ /* ++ * Normally lo->mutex must be taken when traversing lo->inodes but ++ * lo_destroy() is a serialized request so no races are possible here. ++ * ++ * In addition, we cannot acquire lo->mutex since unref_inode() takes it ++ * too and this would result in a recursive lock. ++ */ ++ while (true) { ++ GHashTableIter iter; ++ gpointer key, value; ++ ++ g_hash_table_iter_init(&iter, lo->inodes); ++ if (!g_hash_table_iter_next(&iter, &key, &value)) { ++ break; ++ } ++ ++ struct lo_inode *inode = value; ++ unref_inode_lolocked(lo, inode, inode->nlookup); ++ } + } + + static struct fuse_lowlevel_ops lo_oper = { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch b/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch new file mode 100644 index 0000000..4d7d6dc --- /dev/null +++ b/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch @@ -0,0 +1,66 @@ +From 9e0f5b64f30c2f841f297e25c2f3a6d82c8a16b8 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:57 +0100 +Subject: [PATCH 086/116] virtiofsd: fix memory leak on lo.source +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-83-dgilbert@redhat.com> +Patchwork-id: 93536 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 082/112] virtiofsd: fix memory leak on lo.source +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Liu Bo + +valgrind reported that lo.source is leaked on quiting, but it was defined +as (const char*) as it may point to a const string "/". + +Signed-off-by: Liu Bo +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit eb68a33b5fc5dde87bd9b99b94e7c33a5d8ea82e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index af050c6..056ebe8 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -115,7 +115,7 @@ struct lo_data { + int writeback; + int flock; + int xattr; +- const char *source; ++ char *source; + double timeout; + int cache; + int timeout_set; +@@ -2497,9 +2497,8 @@ int main(int argc, char *argv[]) + fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); + exit(1); + } +- + } else { +- lo.source = "/"; ++ lo.source = strdup("/"); + } + if (!lo.timeout_set) { + switch (lo.cache) { +@@ -2570,5 +2569,7 @@ err_out1: + close(lo.root.fd); + } + ++ free(lo.source); ++ + return ret ? 1 : 0; + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-get-set-features-callbacks.patch b/kvm-virtiofsd-get-set-features-callbacks.patch new file mode 100644 index 0000000..fcb5ca2 --- /dev/null +++ b/kvm-virtiofsd-get-set-features-callbacks.patch @@ -0,0 +1,66 @@ +From 59bfe3ad924d00dc9c7a4363fcd3db36ea247988 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:59 +0100 +Subject: [PATCH 028/116] virtiofsd: get/set features callbacks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-25-dgilbert@redhat.com> +Patchwork-id: 93478 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 024/112] virtiofsd: get/set features callbacks +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: "Dr. David Alan Gilbert" + +Add the get/set features callbacks. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f2cef5fb9ae20136ca18d16328787b69b3abfa18) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 1928a20..4819e56 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -46,6 +46,17 @@ struct virtio_fs_config { + uint32_t num_queues; + }; + ++/* Callback from libvhost-user */ ++static uint64_t fv_get_features(VuDev *dev) ++{ ++ return 1ULL << VIRTIO_F_VERSION_1; ++} ++ ++/* Callback from libvhost-user */ ++static void fv_set_features(VuDev *dev, uint64_t features) ++{ ++} ++ + /* + * Callback from libvhost-user if there's a new fd we're supposed to listen + * to, typically a queue kick? +@@ -78,7 +89,9 @@ static bool fv_queue_order(VuDev *dev, int qidx) + } + + static const VuDevIface fv_iface = { +- /* TODO: Add other callbacks */ ++ .get_features = fv_get_features, ++ .set_features = fv_set_features, ++ + .queue_is_processed_in_order = fv_queue_order, + }; + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch b/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch new file mode 100644 index 0000000..68d20e7 --- /dev/null +++ b/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch @@ -0,0 +1,589 @@ +From da6ee5c24397d2ca93dfaf275fdd9dafc922da15 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:11 +0100 +Subject: [PATCH 100/116] virtiofsd: introduce inode refcount to prevent + use-after-free +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-97-dgilbert@redhat.com> +Patchwork-id: 93550 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 096/112] virtiofsd: introduce inode refcount to prevent use-after-free +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +If thread A is using an inode it must not be deleted by thread B when +processing a FUSE_FORGET request. + +The FUSE protocol itself already has a counter called nlookup that is +used in FUSE_FORGET messages. We cannot trust this counter since the +untrusted client can manipulate it via FUSE_FORGET messages. + +Introduce a new refcount to keep inodes alive for the required lifespan. +lo_inode_put() must be called to release a reference. FUSE's nlookup +counter holds exactly one reference so that the inode stays alive as +long as the client still wants to remember it. + +Note that the lo_inode->is_symlink field is moved to avoid creating a +hole in the struct due to struct field alignment. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Misono Tomohiro +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 169 +++++++++++++++++++++++++++++++++------ + 1 file changed, 146 insertions(+), 23 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e3a6d6b..ab16135 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -97,7 +97,13 @@ struct lo_key { + + struct lo_inode { + int fd; +- bool is_symlink; ++ ++ /* ++ * Atomic reference count for this object. The nlookup field holds a ++ * reference and release it when nlookup reaches 0. ++ */ ++ gint refcount; ++ + struct lo_key key; + + /* +@@ -116,6 +122,8 @@ struct lo_inode { + fuse_ino_t fuse_ino; + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ ++ ++ bool is_symlink; + }; + + struct lo_cred { +@@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) + return elem - lo_data(req)->ino_map.elems; + } + ++static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) ++{ ++ struct lo_inode *inode = *inodep; ++ ++ if (!inode) { ++ return; ++ } ++ ++ *inodep = NULL; ++ ++ if (g_atomic_int_dec_and_test(&inode->refcount)) { ++ close(inode->fd); ++ free(inode); ++ } ++} ++ ++/* Caller must release refcount using lo_inode_put() */ + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { + struct lo_data *lo = lo_data(req); +@@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + + pthread_mutex_lock(&lo->mutex); + elem = lo_map_get(&lo->ino_map, ino); ++ if (elem) { ++ g_atomic_int_inc(&elem->inode->refcount); ++ } + pthread_mutex_unlock(&lo->mutex); + + if (!elem) { +@@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + return elem->inode; + } + ++/* ++ * TODO Remove this helper and force callers to hold an inode refcount until ++ * they are done with the fd. This will be done in a later patch to make ++ * review easier. ++ */ + static int lo_fd(fuse_req_t req, fuse_ino_t ino) + { + struct lo_inode *inode = lo_inode(req, ino); +- return inode ? inode->fd : -1; ++ int fd; ++ ++ if (!inode) { ++ return -1; ++ } ++ ++ fd = inode->fd; ++ lo_inode_put(lo_data(req), &inode); ++ return fd; + } + + static void lo_init(void *userdata, struct fuse_conn_info *conn) +@@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, + fuse_reply_attr(req, &buf, lo->timeout); + } + ++/* ++ * Increments parent->nlookup and caller must release refcount using ++ * lo_inode_put(&parent). ++ */ + static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, + char path[PATH_MAX], struct lo_inode **parent) + { +@@ -582,6 +627,7 @@ retry: + p = &lo->root; + pthread_mutex_lock(&lo->mutex); + p->nlookup++; ++ g_atomic_int_inc(&p->refcount); + pthread_mutex_unlock(&lo->mutex); + } else { + *last = '\0'; +@@ -625,6 +671,7 @@ retry: + + fail_unref: + unref_inode_lolocked(lo, p, 1); ++ lo_inode_put(lo, &p); + fail: + if (retries) { + retries--; +@@ -663,6 +710,7 @@ fallback: + if (res != -1) { + res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); + unref_inode_lolocked(lo, parent, 1); ++ lo_inode_put(lo, &parent); + } + + return res; +@@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + goto out_err; + } + } ++ lo_inode_put(lo, &inode); + + return lo_getattr(req, ino, fi); + + out_err: + saverr = errno; ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + +@@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + if (p) { + assert(p->nlookup > 0); + p->nlookup++; ++ g_atomic_int_inc(&p->refcount); + } + pthread_mutex_unlock(&lo->mutex); + +@@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data) + free(plock); + } + ++/* ++ * Increments nlookup and caller must release refcount using ++ * lo_inode_put(&parent). ++ */ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct fuse_entry_param *e) + { +@@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + int res; + int saverr; + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode, *dir = lo_inode(req, parent); ++ struct lo_inode *inode = NULL; ++ struct lo_inode *dir = lo_inode(req, parent); + + /* + * name_to_handle_at() and open_by_handle_at() can reach here with fuse +@@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ ++ /* ++ * One for the caller and one for nlookup (released in ++ * unref_inode_lolocked()) ++ */ ++ g_atomic_int_set(&inode->refcount, 2); ++ + inode->nlookup = 1; + inode->fd = newfd; + newfd = -1; +@@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; ++ lo_inode_put(lo, &inode); ++ lo_inode_put(lo, &dir); + + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, + name, (unsigned long long)e->ino); +@@ -894,6 +959,8 @@ out_err: + if (newfd != -1) { + close(newfd); + } ++ lo_inode_put(lo, &inode); ++ lo_inode_put(lo, &dir); + return saverr; + } + +@@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + { + int res; + int saverr; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *dir; + struct fuse_entry_param e; + struct lo_cred old = {}; +@@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); ++ lo_inode_put(lo, &dir); + return; + + out: ++ lo_inode_put(lo, &dir); + fuse_reply_err(req, saverr); + } + +@@ -1085,6 +1155,7 @@ fallback: + if (res != -1) { + res = linkat(parent->fd, path, dfd, name, 0); + unref_inode_lolocked(lo, parent, 1); ++ lo_inode_put(lo, &parent); + } + + return res; +@@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + { + int res; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *parent_inode; + struct lo_inode *inode; + struct fuse_entry_param e; + int saverr; +@@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + return; + } + ++ parent_inode = lo_inode(req, parent); + inode = lo_inode(req, ino); +- if (!inode) { +- fuse_reply_err(req, EBADF); +- return; ++ if (!parent_inode || !inode) { ++ errno = EBADF; ++ goto out_err; + } + + memset(&e, 0, sizeof(struct fuse_entry_param)); + e.attr_timeout = lo->timeout; + e.entry_timeout = lo->timeout; + +- res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); ++ res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); + if (res == -1) { + goto out_err; + } +@@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); ++ lo_inode_put(lo, &parent_inode); ++ lo_inode_put(lo, &inode); + return; + + out_err: + saverr = errno; ++ lo_inode_put(lo, &parent_inode); ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + ++/* Increments nlookup and caller must release refcount using lo_inode_put() */ + static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, + const char *name) + { +@@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + + fuse_reply_err(req, res == -1 ? errno : 0); + unref_inode_lolocked(lo, inode, 1); ++ lo_inode_put(lo, &inode); + } + + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + unsigned int flags) + { + int res; +- struct lo_inode *oldinode; +- struct lo_inode *newinode; ++ struct lo_inode *parent_inode; ++ struct lo_inode *newparent_inode; ++ struct lo_inode *oldinode = NULL; ++ struct lo_inode *newinode = NULL; + struct lo_data *lo = lo_data(req); + + if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { +@@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + return; + } + ++ parent_inode = lo_inode(req, parent); ++ newparent_inode = lo_inode(req, newparent); ++ if (!parent_inode || !newparent_inode) { ++ fuse_reply_err(req, EBADF); ++ goto out; ++ } ++ + oldinode = lookup_name(req, parent, name); + newinode = lookup_name(req, newparent, newname); + +@@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + #ifndef SYS_renameat2 + fuse_reply_err(req, EINVAL); + #else +- res = syscall(SYS_renameat2, lo_fd(req, parent), name, +- lo_fd(req, newparent), newname, flags); ++ res = syscall(SYS_renameat2, parent_inode->fd, name, ++ newparent_inode->fd, newname, flags); + if (res == -1 && errno == ENOSYS) { + fuse_reply_err(req, EINVAL); + } else { +@@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out; + } + +- res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); ++ res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); + + fuse_reply_err(req, res == -1 ? errno : 0); + out: + unref_inode_lolocked(lo, oldinode, 1); + unref_inode_lolocked(lo, newinode, 1); ++ lo_inode_put(lo, &oldinode); ++ lo_inode_put(lo, &newinode); ++ lo_inode_put(lo, &parent_inode); ++ lo_inode_put(lo, &newparent_inode); + } + + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) +@@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + + fuse_reply_err(req, res == -1 ? errno : 0); + unref_inode_lolocked(lo, inode, 1); ++ lo_inode_put(lo, &inode); + } + + static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, +@@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + g_hash_table_destroy(inode->posix_locks); + pthread_mutex_destroy(&inode->plock_mutex); + pthread_mutex_unlock(&lo->mutex); +- close(inode->fd); +- free(inode); ++ ++ /* Drop our refcount from lo_do_lookup() */ ++ lo_inode_put(lo, &inode); + } else { + pthread_mutex_unlock(&lo->mutex); + } +@@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) + inode->nlookup = 0; + lo_map_remove(&lo->ino_map, inode->fuse_ino); + close(inode->fd); ++ lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ + + return TRUE; + } +@@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + (unsigned long long)nlookup); + + unref_inode_lolocked(lo, inode, nlookup); ++ lo_inode_put(lo, &inode); + } + + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) +@@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + err = 0; + error: + lo_dirp_put(&d); ++ lo_inode_put(lo, &dinode); + + /* + * If there's an error, we can only signal it if we haven't stored +@@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + { + int fd; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *parent_inode; + struct fuse_entry_param e; + int err; + struct lo_cred old = {}; +@@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + return; + } + ++ parent_inode = lo_inode(req, parent); ++ if (!parent_inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + err = lo_change_cred(req, &old); + if (err) { + goto out; + } + +- fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, ++ fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); + err = fd == -1 ? errno : 0; + lo_restore_cred(&old); +@@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + if (fh == -1) { + close(fd); +- fuse_reply_err(req, ENOMEM); +- return; ++ err = ENOMEM; ++ goto out; + } + + fi->fh = fh; +@@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + out: ++ lo_inode_put(lo, &parent_inode); ++ + if (err) { + fuse_reply_err(req, err); + } else { +@@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + plock = + lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); + if (!plock) { +- pthread_mutex_unlock(&inode->plock_mutex); +- fuse_reply_err(req, ret); +- return; ++ saverr = ret; ++ goto out; + } + + ret = fcntl(plock->fd, F_OFD_GETLK, lock); + if (ret == -1) { + saverr = errno; + } ++ ++out: + pthread_mutex_unlock(&inode->plock_mutex); ++ lo_inode_put(lo, &inode); + + if (saverr) { + fuse_reply_err(req, saverr); +@@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); + + if (!plock) { +- pthread_mutex_unlock(&inode->plock_mutex); +- fuse_reply_err(req, ret); +- return; ++ saverr = ret; ++ goto out; + } + + /* TODO: Is it alright to modify flock? */ +@@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + if (ret == -1) { + saverr = errno; + } ++ ++out: + pthread_mutex_unlock(&inode->plock_mutex); ++ lo_inode_put(lo, &inode); ++ + fuse_reply_err(req, saverr); + } + +@@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + pthread_mutex_unlock(&inode->plock_mutex); + + res = close(dup(lo_fi_fd(req, fi))); ++ lo_inode_put(lo_data(req), &inode); + fuse_reply_err(req, res == -1 ? errno : 0); + } + +@@ -2115,11 +2227,14 @@ out_free: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + return; + + out_err: + saverr = errno; + out: ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +@@ -2190,11 +2305,14 @@ out_free: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + return; + + out_err: + saverr = errno; + out: ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +@@ -2243,6 +2361,8 @@ out: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + +@@ -2289,6 +2409,8 @@ out: + if (fd >= 0) { + close(fd); + } ++ ++ lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } + +@@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + root->key.ino = stat.st_ino; + root->key.dev = stat.st_dev; + root->nlookup = 2; ++ g_atomic_int_set(&root->refcount, 2); + } + + static guint lo_key_hash(gconstpointer key) +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-make-f-foreground-the-default.patch b/kvm-virtiofsd-make-f-foreground-the-default.patch new file mode 100644 index 0000000..d6cb0e3 --- /dev/null +++ b/kvm-virtiofsd-make-f-foreground-the-default.patch @@ -0,0 +1,76 @@ +From 7f2e1f79a3addb242c3018c7a80e2e57589119f0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:08 +0100 +Subject: [PATCH 037/116] virtiofsd: make -f (foreground) the default +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-34-dgilbert@redhat.com> +Patchwork-id: 93489 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 033/112] virtiofsd: make -f (foreground) the default +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +According to vhost-user.rst "Backend program conventions", backend +programs should run in the foregound by default. Follow the +conventions so libvirt and other management tools can control virtiofsd +in a standard way. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0bbd31753714ac2899efda0f0de31e353e965789) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 676032e..a3645fc 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -29,6 +29,11 @@ + { \ + t, offsetof(struct fuse_cmdline_opts, p), 1 \ + } ++#define FUSE_HELPER_OPT_VALUE(t, p, v) \ ++ { \ ++ t, offsetof(struct fuse_cmdline_opts, p), v \ ++ } ++ + + static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("-h", show_help), +@@ -42,6 +47,7 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), + FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("-f", foreground), ++ FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0), + FUSE_HELPER_OPT("fsname=", nodefault_subtype), + FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("subtype=", nodefault_subtype), +@@ -131,6 +137,7 @@ void fuse_cmdline_help(void) + " -V --version print version\n" + " -d -o debug enable debug output (implies -f)\n" + " -f foreground operation\n" ++ " --daemonize run in background\n" + " -o max_idle_threads the maximum number of idle worker " + "threads\n" + " allowed (default: 10)\n"); +@@ -158,6 +165,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) + memset(opts, 0, sizeof(struct fuse_cmdline_opts)); + + opts->max_idle_threads = 10; ++ opts->foreground = 1; + + if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == + -1) { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-make-lo_release-atomic.patch b/kvm-virtiofsd-make-lo_release-atomic.patch new file mode 100644 index 0000000..6d88549 --- /dev/null +++ b/kvm-virtiofsd-make-lo_release-atomic.patch @@ -0,0 +1,62 @@ +From 4ebabb66f4132186152edf8e1907fce436bf5c69 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:06 +0100 +Subject: [PATCH 095/116] virtiofsd: make lo_release() atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-92-dgilbert@redhat.com> +Patchwork-id: 93545 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 091/112] virtiofsd: make lo_release() atomic +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Hold the lock across both lo_map_get() and lo_map_remove() to prevent +races between two FUSE_RELEASE requests. In this case I don't see a +serious bug but it's safer to do things atomically. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit baed65c060c0e524530bc243eec427fb408bd477) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9414935..690edbc 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1772,14 +1772,18 @@ static void lo_release(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { + struct lo_data *lo = lo_data(req); +- int fd; ++ struct lo_map_elem *elem; ++ int fd = -1; + + (void)ino; + +- fd = lo_fi_fd(req, fi); +- + pthread_mutex_lock(&lo->mutex); +- lo_map_remove(&lo->fd_map, fi->fh); ++ elem = lo_map_get(&lo->fd_map, fi->fh); ++ if (elem) { ++ fd = elem->fd; ++ elem = NULL; ++ lo_map_remove(&lo->fd_map, fi->fh); ++ } + pthread_mutex_unlock(&lo->mutex); + + close(fd); +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-move-to-a-new-pid-namespace.patch b/kvm-virtiofsd-move-to-a-new-pid-namespace.patch new file mode 100644 index 0000000..9a33d1b --- /dev/null +++ b/kvm-virtiofsd-move-to-a-new-pid-namespace.patch @@ -0,0 +1,223 @@ +From a7a87a751a9893830d031a957a751b7622b71fb2 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:29 +0100 +Subject: [PATCH 058/116] virtiofsd: move to a new pid namespace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-55-dgilbert@redhat.com> +Patchwork-id: 93510 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 054/112] virtiofsd: move to a new pid namespace +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +virtiofsd needs access to /proc/self/fd. Let's move to a new pid +namespace so that a compromised process cannot see another other +processes running on the system. + +One wrinkle in this approach: unshare(CLONE_NEWPID) affects *child* +processes and not the current process. Therefore we need to fork the +pid 1 process that will actually run virtiofsd and leave a parent in +waitpid(2). This is not the same thing as daemonization and parent +processes should not notice a difference. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8e1d4ef231d8327be219f7aea7aa15d181375bbc) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 134 +++++++++++++++++++++++++-------------- + 1 file changed, 86 insertions(+), 48 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 27ab328..0947d14 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -51,7 +51,10 @@ + #include + #include + #include ++#include + #include ++#include ++#include + #include + #include + +@@ -1945,24 +1948,95 @@ static void print_capabilities(void) + } + + /* +- * Called after our UNIX domain sockets have been created, now we can move to +- * an empty network namespace to prevent TCP/IP and other network activity in +- * case this process is compromised. ++ * Move to a new mount, net, and pid namespaces to isolate this process. + */ +-static void setup_net_namespace(void) ++static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) + { +- if (unshare(CLONE_NEWNET) != 0) { +- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); ++ pid_t child; ++ ++ /* ++ * Create a new pid namespace for *child* processes. We'll have to ++ * fork in order to enter the new pid namespace. A new mount namespace ++ * is also needed so that we can remount /proc for the new pid ++ * namespace. ++ * ++ * Our UNIX domain sockets have been created. Now we can move to ++ * an empty network namespace to prevent TCP/IP and other network ++ * activity in case this process is compromised. ++ */ ++ if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { ++ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); ++ exit(1); ++ } ++ ++ child = fork(); ++ if (child < 0) { ++ fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); ++ exit(1); ++ } ++ if (child > 0) { ++ pid_t waited; ++ int wstatus; ++ ++ /* The parent waits for the child */ ++ do { ++ waited = waitpid(child, &wstatus, 0); ++ } while (waited < 0 && errno == EINTR && !se->exited); ++ ++ /* We were terminated by a signal, see fuse_signals.c */ ++ if (se->exited) { ++ exit(0); ++ } ++ ++ if (WIFEXITED(wstatus)) { ++ exit(WEXITSTATUS(wstatus)); ++ } ++ ++ exit(1); ++ } ++ ++ /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ ++ prctl(PR_SET_PDEATHSIG, SIGTERM); ++ ++ /* ++ * If the mounts have shared propagation then we want to opt out so our ++ * mount changes don't affect the parent mount namespace. ++ */ ++ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); ++ exit(1); ++ } ++ ++ /* The child must remount /proc to use the new pid namespace */ ++ if (mount("proc", "/proc", "proc", ++ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); ++ exit(1); ++ } ++ ++ /* Now we can get our /proc/self/fd directory file descriptor */ ++ lo->proc_self_fd = open("/proc/self/fd", O_PATH); ++ if (lo->proc_self_fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); + exit(1); + } + } + +-/* This magic is based on lxc's lxc_pivot_root() */ +-static void setup_pivot_root(const char *source) ++/* ++ * Make the source directory our root so symlinks cannot escape and no other ++ * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. ++ */ ++static void setup_mounts(const char *source) + { + int oldroot; + int newroot; + ++ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); ++ exit(1); ++ } ++ ++ /* This magic is based on lxc's lxc_pivot_root() */ + oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); + if (oldroot < 0) { + fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); +@@ -2009,47 +2083,14 @@ static void setup_pivot_root(const char *source) + close(oldroot); + } + +-static void setup_proc_self_fd(struct lo_data *lo) +-{ +- lo->proc_self_fd = open("/proc/self/fd", O_PATH); +- if (lo->proc_self_fd == -1) { +- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); +- exit(1); +- } +-} +- +-/* +- * Make the source directory our root so symlinks cannot escape and no other +- * files are accessible. +- */ +-static void setup_mount_namespace(const char *source) +-{ +- if (unshare(CLONE_NEWNS) != 0) { +- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); +- exit(1); +- } +- +- if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { +- fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); +- exit(1); +- } +- +- if (mount(source, source, NULL, MS_BIND, NULL) < 0) { +- fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); +- exit(1); +- } +- +- setup_pivot_root(source); +-} +- + /* + * Lock down this process to prevent access to other processes or files outside + * source directory. This reduces the impact of arbitrary code execution bugs. + */ +-static void setup_sandbox(struct lo_data *lo) ++static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) + { +- setup_net_namespace(); +- setup_mount_namespace(lo->source); ++ setup_namespaces(lo, se); ++ setup_mounts(lo->source); + } + + int main(int argc, char *argv[]) +@@ -2173,10 +2214,7 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + +- /* Must be after daemonize to get the right /proc/self/fd */ +- setup_proc_self_fd(&lo); +- +- setup_sandbox(&lo); ++ setup_sandbox(&lo, se); + + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-move-to-an-empty-network-namespace.patch b/kvm-virtiofsd-move-to-an-empty-network-namespace.patch new file mode 100644 index 0000000..69a7c20 --- /dev/null +++ b/kvm-virtiofsd-move-to-an-empty-network-namespace.patch @@ -0,0 +1,66 @@ +From 19a16f26bdeb6302159736e182a18b06160a3f42 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:28 +0100 +Subject: [PATCH 057/116] virtiofsd: move to an empty network namespace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-54-dgilbert@redhat.com> +Patchwork-id: 93508 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 053/112] virtiofsd: move to an empty network namespace +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +If the process is compromised there should be no network access. Use an +empty network namespace to sandbox networking. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d74830d12ae233186ff74ddf64c552d26bb39e50) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0570453..27ab328 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1944,6 +1944,19 @@ static void print_capabilities(void) + printf("}\n"); + } + ++/* ++ * Called after our UNIX domain sockets have been created, now we can move to ++ * an empty network namespace to prevent TCP/IP and other network activity in ++ * case this process is compromised. ++ */ ++static void setup_net_namespace(void) ++{ ++ if (unshare(CLONE_NEWNET) != 0) { ++ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); ++ exit(1); ++ } ++} ++ + /* This magic is based on lxc's lxc_pivot_root() */ + static void setup_pivot_root(const char *source) + { +@@ -2035,6 +2048,7 @@ static void setup_mount_namespace(const char *source) + */ + static void setup_sandbox(struct lo_data *lo) + { ++ setup_net_namespace(); + setup_mount_namespace(lo->source); + } + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch b/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch new file mode 100644 index 0000000..e3d5773 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch @@ -0,0 +1,54 @@ +From fe031dbbf5e287f64de9fcc9aec361e8ab492109 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:24 +0100 +Subject: [PATCH 113/116] virtiofsd/passthrough_ll: Pass errno to + fuse_reply_err() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-110-dgilbert@redhat.com> +Patchwork-id: 93559 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 109/112] virtiofsd/passthrough_ll: Pass errno to fuse_reply_err() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Xiao Yang + +lo_copy_file_range() passes -errno to fuse_reply_err() and then fuse_reply_err() +changes it to errno again, so that subsequent fuse_send_reply_iov_nofree() catches +the wrong errno.(i.e. reports "fuse: bad error value: ..."). + +Make fuse_send_reply_iov_nofree() accept the correct -errno by passing errno +directly in lo_copy_file_range(). + +Signed-off-by: Xiao Yang +Reviewed-by: Eryu Guan + +dgilbert: Sent upstream and now Merged as aa1185e153f774f1df65 +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a931b6861e59c78d861017e9c6a9c161ff49a163) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index fc15d61..e6f2399 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2441,7 +2441,7 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, + + res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); + if (res < 0) { +- fuse_reply_err(req, -errno); ++ fuse_reply_err(req, errno); + } else { + fuse_reply_write(req, res); + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch b/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch new file mode 100644 index 0000000..ddacdbe --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch @@ -0,0 +1,48 @@ +From 83b03fc4a3ecf6086394363488bbebc8d55428c0 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:16 +0100 +Subject: [PATCH 105/116] virtiofsd: passthrough_ll: Use cache_readdir for + directory open +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-102-dgilbert@redhat.com> +Patchwork-id: 93555 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 101/112] virtiofsd: passthrough_ll: Use cache_readdir for directory open +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Misono Tomohiro + +Since keep_cache(FOPEN_KEEP_CACHE) has no effect for directory as +described in fuse_common.h, use cache_readdir(FOPNE_CACHE_DIR) for +diretory open when cache=always mode. + +Signed-off-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9b610b09b49b1aada256097b338d49da805da6ae) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 4c61ac5..79b8b71 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1523,7 +1523,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + + fi->fh = fh; + if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ fi->cache_readdir = 1; + } + fuse_reply_open(req, fi); + return; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch b/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch new file mode 100644 index 0000000..0506574 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch @@ -0,0 +1,238 @@ +From 474d0adafed4d73720d6413b2903d6c4b529e5e6 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:15 +0100 +Subject: [PATCH 044/116] virtiofsd: passthrough_ll: add dirp_map to hide + lo_dirp pointers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-41-dgilbert@redhat.com> +Patchwork-id: 93495 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 040/112] virtiofsd: passthrough_ll: add dirp_map to hide lo_dirp pointers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Do not expose lo_dirp pointers to clients. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b39bce121bfad8757eec0ee41f14607b883935d3) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 103 +++++++++++++++++++++++++++++---------- + 1 file changed, 76 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index a3ebf74..5f5a72f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -56,27 +56,10 @@ + + #include "passthrough_helpers.h" + +-/* +- * We are re-using pointers to our `struct lo_inode` +- * elements as inodes. This means that we must be able to +- * store uintptr_t values in a fuse_ino_t variable. The following +- * incantation checks this condition at compile time. +- */ +-#if defined(__GNUC__) && \ +- (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ +- !defined __cplusplus +-_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), +- "fuse_ino_t too small to hold uintptr_t values!"); +-#else +-struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { +- unsigned _uintptr_to_must_hold_fuse_ino_t +- : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); +-}; +-#endif +- + struct lo_map_elem { + union { + struct lo_inode *inode; ++ struct lo_dirp *dirp; + ssize_t freelist; + }; + bool in_use; +@@ -123,6 +106,7 @@ struct lo_data { + int timeout_set; + struct lo_inode root; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ ++ struct lo_map dirp_map; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +@@ -253,6 +237,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ ++static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) ++{ ++ struct lo_map_elem *elem; ++ ++ elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); ++ if (!elem) { ++ return -1; ++ } ++ ++ elem->dirp = dirp; ++ return elem - lo_data(req)->dirp_map.elems; ++} ++ ++/* Assumes lo->mutex is held */ + static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) + { + struct lo_map_elem *elem; +@@ -861,9 +859,19 @@ struct lo_dirp { + off_t offset; + }; + +-static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) ++static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) + { +- return (struct lo_dirp *)(uintptr_t)fi->fh; ++ struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; ++ ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->dirp_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ if (!elem) { ++ return NULL; ++ } ++ ++ return elem->dirp; + } + + static void lo_opendir(fuse_req_t req, fuse_ino_t ino, +@@ -873,6 +881,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + struct lo_data *lo = lo_data(req); + struct lo_dirp *d; + int fd; ++ ssize_t fh; + + d = calloc(1, sizeof(struct lo_dirp)); + if (d == NULL) { +@@ -892,7 +901,14 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + d->offset = 0; + d->entry = NULL; + +- fi->fh = (uintptr_t)d; ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_dirp_mapping(req, d); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ goto out_err; ++ } ++ ++ fi->fh = fh; + if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; + } +@@ -903,6 +919,9 @@ out_errno: + error = errno; + out_err: + if (d) { ++ if (d->dp) { ++ closedir(d->dp); ++ } + if (fd != -1) { + close(fd); + } +@@ -920,17 +939,21 @@ static int is_dot_or_dotdot(const char *name) + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { +- struct lo_dirp *d = lo_dirp(fi); +- char *buf; ++ struct lo_dirp *d; ++ char *buf = NULL; + char *p; + size_t rem = size; +- int err; ++ int err = ENOMEM; + + (void)ino; + ++ d = lo_dirp(req, fi); ++ if (!d) { ++ goto error; ++ } ++ + buf = calloc(1, size); + if (!buf) { +- err = ENOMEM; + goto error; + } + p = buf; +@@ -1028,8 +1051,21 @@ static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, + static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { +- struct lo_dirp *d = lo_dirp(fi); ++ struct lo_data *lo = lo_data(req); ++ struct lo_dirp *d; ++ + (void)ino; ++ ++ d = lo_dirp(req, fi); ++ if (!d) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ lo_map_remove(&lo->dirp_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ + closedir(d->dp); + free(d); + fuse_reply_err(req, 0); +@@ -1081,8 +1117,18 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { + int res; +- int fd = dirfd(lo_dirp(fi)->dp); ++ struct lo_dirp *d; ++ int fd; ++ + (void)ino; ++ ++ d = lo_dirp(req, fi); ++ if (!d) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ fd = dirfd(d->dp); + if (datasync) { + res = fdatasync(fd); + } else { +@@ -1614,6 +1660,8 @@ int main(int argc, char *argv[]) + root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); + root_elem->inode = &lo.root; + ++ lo_map_init(&lo.dirp_map); ++ + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; + } +@@ -1710,6 +1758,7 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); + + if (lo.root.fd >= 0) { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch b/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch new file mode 100644 index 0000000..b8de3d8 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch @@ -0,0 +1,303 @@ +From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:17 +0100 +Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-43-dgilbert@redhat.com> +Patchwork-id: 93496 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +We have two operations that cannot be done race-free on a symlink in +certain cases: utimes and link. + +Add racy fallback for these if the race-free method doesn't work. We do +our best to avoid races even in this case: + + - get absolute path by reading /proc/self/fd/NN symlink + + - lookup parent directory: after this we are safe against renames in + ancestors + + - lookup name in parent directory, and verify that we got to the original + inode, if not retry the whole thing + +Both utimes(2) and link(2) hold i_lock on the inode across the operation, +so a racing rename/delete by this fuse instance is not possible, only from +other entities changing the filesystem. + +If the "norace" option is given, then disable the racy fallbacks. + +Signed-off-by: Miklos Szeredi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 5 +- + tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++---- + 2 files changed, 145 insertions(+), 17 deletions(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index b8ec5ac..5531425 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -142,7 +142,10 @@ void fuse_cmdline_help(void) + " --daemonize run in background\n" + " -o max_idle_threads the maximum number of idle worker " + "threads\n" +- " allowed (default: 10)\n"); ++ " allowed (default: 10)\n" ++ " -o norace disable racy fallback\n" ++ " default: false\n" ++ ); + } + + static int fuse_helper_opt_proc(void *data, const char *arg, int key, +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9815bfa..ac380ef 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -98,6 +98,7 @@ enum { + struct lo_data { + pthread_mutex_t mutex; + int debug; ++ int norace; + int writeback; + int flock; + int xattr; +@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = { + { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, + { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, +- ++ { "norace", offsetof(struct lo_data, norace), 1 }, + FUSE_OPT_END + }; + ++static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); ++ ++static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); ++ ++ + static struct lo_data *lo_data(fuse_req_t req) + { + return (struct lo_data *)fuse_req_userdata(req); +@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, + fuse_reply_attr(req, &buf, lo->timeout); + } + +-static int utimensat_empty_nofollow(struct lo_inode *inode, +- const struct timespec *tv) ++static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, ++ char path[PATH_MAX], struct lo_inode **parent) + { +- int res; + char procname[64]; ++ char *last; ++ struct stat stat; ++ struct lo_inode *p; ++ int retries = 2; ++ int res; ++ ++retry: ++ sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ ++ res = readlink(procname, path, PATH_MAX); ++ if (res < 0) { ++ fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); ++ goto fail_noretry; ++ } ++ ++ if (res >= PATH_MAX) { ++ fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__); ++ goto fail_noretry; ++ } ++ path[res] = '\0'; ++ ++ last = strrchr(path, '/'); ++ if (last == NULL) { ++ /* Shouldn't happen */ ++ fuse_log( ++ FUSE_LOG_WARNING, ++ "%s: INTERNAL ERROR: bad path read from proc\n", __func__); ++ goto fail_noretry; ++ } ++ if (last == path) { ++ p = &lo->root; ++ pthread_mutex_lock(&lo->mutex); ++ p->refcount++; ++ pthread_mutex_unlock(&lo->mutex); ++ } else { ++ *last = '\0'; ++ res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0); ++ if (res == -1) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to stat parent: %m\n", __func__); ++ } ++ goto fail; ++ } ++ p = lo_find(lo, &stat); ++ if (p == NULL) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to find parent\n", __func__); ++ } ++ goto fail; ++ } ++ } ++ last++; ++ res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to stat last\n", __func__); ++ } ++ goto fail_unref; ++ } ++ if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { ++ if (!retries) { ++ fuse_log(FUSE_LOG_WARNING, ++ "%s: failed to match last\n", __func__); ++ } ++ goto fail_unref; ++ } ++ *parent = p; ++ memmove(path, last, strlen(last) + 1); ++ ++ return 0; ++ ++fail_unref: ++ unref_inode(lo, p, 1); ++fail: ++ if (retries) { ++ retries--; ++ goto retry; ++ } ++fail_noretry: ++ errno = EIO; ++ return -1; ++} ++ ++static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, ++ const struct timespec *tv) ++{ ++ int res; ++ struct lo_inode *parent; ++ char path[PATH_MAX]; + + if (inode->is_symlink) { +- res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); + if (res == -1 && errno == EINVAL) { + /* Sorry, no race free way to set times on symlink. */ +- errno = EPERM; ++ if (lo->norace) { ++ errno = EPERM; ++ } else { ++ goto fallback; ++ } + } + return res; + } +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "/proc/self/fd/%i", inode->fd); + +- return utimensat(AT_FDCWD, procname, tv, 0); ++ return utimensat(AT_FDCWD, path, tv, 0); ++ ++fallback: ++ res = lo_parent_and_name(lo, inode, path, &parent); ++ if (res != -1) { ++ res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); ++ unref_inode(lo, parent, 1); ++ } ++ ++ return res; + } + + static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) +@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + { + int saverr; + char procname[64]; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *inode; + int ifd; + int res; +@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + res = futimens(fd, tv); + } else { +- res = utimensat_empty_nofollow(inode, tv); ++ res = utimensat_empty(lo, inode, tv); + } + if (res == -1) { + goto out_err; +@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, + lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); + } + +-static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, +- const char *name) ++static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, ++ int dfd, const char *name) + { + int res; +- char procname[64]; ++ struct lo_inode *parent; ++ char path[PATH_MAX]; + + if (inode->is_symlink) { + res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); + if (res == -1 && (errno == ENOENT || errno == EINVAL)) { + /* Sorry, no race free way to hard-link a symlink. */ +- errno = EPERM; ++ if (lo->norace) { ++ errno = EPERM; ++ } else { ++ goto fallback; ++ } + } + return res; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "/proc/self/fd/%i", inode->fd); ++ ++ return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); ++ ++fallback: ++ res = lo_parent_and_name(lo, inode, path, &parent); ++ if (res != -1) { ++ res = linkat(parent->fd, path, dfd, name, 0); ++ unref_inode(lo, parent, 1); ++ } + +- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); ++ return res; + } + + static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, +@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + e.attr_timeout = lo->timeout; + e.entry_timeout = lo->timeout; + +- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); ++ res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); + if (res == -1) { + goto out_err; + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch b/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch new file mode 100644 index 0000000..24b2a6e --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch @@ -0,0 +1,328 @@ +From 35337e604e9149d6d8fcf74b8b82ac33a8611ebb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:16 +0100 +Subject: [PATCH 045/116] virtiofsd: passthrough_ll: add fd_map to hide file + descriptors +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-42-dgilbert@redhat.com> +Patchwork-id: 93494 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 041/112] virtiofsd: passthrough_ll: add fd_map to hide file descriptors +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Do not expose file descriptor numbers to clients. This prevents the +abuse of internal file descriptors (like stdin/stdout). + +Signed-off-by: Stefan Hajnoczi +Fix from: +Signed-off-by: Xiao Yang +dgilbert: + Added lseek +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 73b4d19dfc4248a74c1f3e511cfa934681d9c602) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 116 +++++++++++++++++++++++++++++++-------- + 1 file changed, 94 insertions(+), 22 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 5f5a72f..9815bfa 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -60,6 +60,7 @@ struct lo_map_elem { + union { + struct lo_inode *inode; + struct lo_dirp *dirp; ++ int fd; + ssize_t freelist; + }; + bool in_use; +@@ -107,6 +108,7 @@ struct lo_data { + struct lo_inode root; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ ++ struct lo_map fd_map; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +@@ -237,6 +239,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ ++static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) ++{ ++ struct lo_map_elem *elem; ++ ++ elem = lo_map_alloc_elem(&lo_data(req)->fd_map); ++ if (!elem) { ++ return -1; ++ } ++ ++ elem->fd = fd; ++ return elem - lo_data(req)->fd_map.elems; ++} ++ ++/* Assumes lo->mutex is held */ + static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) + { + struct lo_map_elem *elem; +@@ -350,6 +366,22 @@ static int utimensat_empty_nofollow(struct lo_inode *inode, + return utimensat(AT_FDCWD, procname, tv, 0); + } + ++static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) ++{ ++ struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; ++ ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->fd_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ ++ if (!elem) { ++ return -1; ++ } ++ ++ return elem->fd; ++} ++ + static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + int valid, struct fuse_file_info *fi) + { +@@ -358,6 +390,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + struct lo_inode *inode; + int ifd; + int res; ++ int fd; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -367,9 +400,14 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + + ifd = inode->fd; + ++ /* If fi->fh is invalid we'll report EBADF later */ ++ if (fi) { ++ fd = lo_fi_fd(req, fi); ++ } ++ + if (valid & FUSE_SET_ATTR_MODE) { + if (fi) { +- res = fchmod(fi->fh, attr->st_mode); ++ res = fchmod(fd, attr->st_mode); + } else { + sprintf(procname, "/proc/self/fd/%i", ifd); + res = chmod(procname, attr->st_mode); +@@ -389,7 +427,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + if (valid & FUSE_SET_ATTR_SIZE) { + if (fi) { +- res = ftruncate(fi->fh, attr->st_size); ++ res = ftruncate(fd, attr->st_size); + } else { + sprintf(procname, "/proc/self/fd/%i", ifd); + res = truncate(procname, attr->st_size); +@@ -419,7 +457,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + + if (fi) { +- res = futimens(fi->fh, tv); ++ res = futimens(fd, tv); + } else { + res = utimensat_empty_nofollow(inode, tv); + } +@@ -1096,7 +1134,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + lo_restore_cred(&old); + + if (!err) { +- fi->fh = fd; ++ ssize_t fh; ++ ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(req, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ++ fi->fh = fh; + err = lo_do_lookup(req, parent, name, &e); + } + if (lo->cache == CACHE_NEVER) { +@@ -1140,6 +1189,7 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { + int fd; ++ ssize_t fh; + char buf[64]; + struct lo_data *lo = lo_data(req); + +@@ -1175,7 +1225,16 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + return (void)fuse_reply_err(req, errno); + } + +- fi->fh = fd; ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(req, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ fuse_reply_err(req, ENOMEM); ++ return; ++ } ++ ++ fi->fh = fh; + if (lo->cache == CACHE_NEVER) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { +@@ -1187,9 +1246,18 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + static void lo_release(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { ++ struct lo_data *lo = lo_data(req); ++ int fd; ++ + (void)ino; + +- close(fi->fh); ++ fd = lo_fi_fd(req, fi); ++ ++ pthread_mutex_lock(&lo->mutex); ++ lo_map_remove(&lo->fd_map, fi->fh); ++ pthread_mutex_unlock(&lo->mutex); ++ ++ close(fd); + fuse_reply_err(req, 0); + } + +@@ -1197,7 +1265,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { + int res; + (void)ino; +- res = close(dup(fi->fh)); ++ res = close(dup(lo_fi_fd(req, fi))); + fuse_reply_err(req, res == -1 ? errno : 0); + } + +@@ -1224,7 +1292,7 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + return (void)fuse_reply_err(req, errno); + } + } else { +- fd = fi->fh; ++ fd = lo_fi_fd(req, fi); + } + + if (datasync) { +@@ -1251,7 +1319,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + } + + buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- buf.buf[0].fd = fi->fh; ++ buf.buf[0].fd = lo_fi_fd(req, fi); + buf.buf[0].pos = offset; + + fuse_reply_data(req, &buf); +@@ -1266,7 +1334,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); + + out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; +- out_buf.buf[0].fd = fi->fh; ++ out_buf.buf[0].fd = lo_fi_fd(req, fi); + out_buf.buf[0].pos = off; + + if (lo_debug(req)) { +@@ -1303,7 +1371,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + (void)ino; + + #ifdef CONFIG_FALLOCATE +- err = fallocate(fi->fh, mode, offset, length); ++ err = fallocate(lo_fi_fd(req, fi), mode, offset, length); + if (err < 0) { + err = errno; + } +@@ -1314,7 +1382,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + return; + } + +- err = posix_fallocate(fi->fh, offset, length); ++ err = posix_fallocate(lo_fi_fd(req, fi), offset, length); + #endif + + fuse_reply_err(req, err); +@@ -1326,7 +1394,7 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + int res; + (void)ino; + +- res = flock(fi->fh, op); ++ res = flock(lo_fi_fd(req, fi), op); + + fuse_reply_err(req, res == -1 ? errno : 0); + } +@@ -1551,17 +1619,19 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, + off_t off_out, struct fuse_file_info *fi_out, + size_t len, int flags) + { ++ int in_fd, out_fd; + ssize_t res; + +- if (lo_debug(req)) +- fuse_log(FUSE_LOG_DEBUG, +- "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, ino=%" PRIu64 "/fd=%lu, " +- "off=%lu, size=%zd, flags=0x%x)\n", +- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, +- flags); ++ in_fd = lo_fi_fd(req, fi_in); ++ out_fd = lo_fi_fd(req, fi_out); ++ ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " ++ "off=%lu, ino=%" PRIu64 "/fd=%d, " ++ "off=%lu, size=%zd, flags=0x%x)\n", ++ ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags); + +- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); ++ res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); + if (res < 0) { + fuse_reply_err(req, -errno); + } else { +@@ -1576,7 +1646,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, + off_t res; + + (void)ino; +- res = lseek(fi->fh, off, whence); ++ res = lseek(lo_fi_fd(req, fi), off, whence); + if (res != -1) { + fuse_reply_lseek(req, res); + } else { +@@ -1661,6 +1731,7 @@ int main(int argc, char *argv[]) + root_elem->inode = &lo.root; + + lo_map_init(&lo.dirp_map); ++ lo_map_init(&lo.fd_map); + + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; +@@ -1758,6 +1829,7 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ lo_map_destroy(&lo.fd_map); + lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch b/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch new file mode 100644 index 0000000..ba8b730 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch @@ -0,0 +1,395 @@ +From d81396cc3d9815730903b0755c9d2e67d6954d54 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:14 +0100 +Subject: [PATCH 043/116] virtiofsd: passthrough_ll: add ino_map to hide + lo_inode pointers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-40-dgilbert@redhat.com> +Patchwork-id: 93493 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 039/112] virtiofsd: passthrough_ll: add ino_map to hide lo_inode pointers +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Do not expose lo_inode pointers to clients. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 92fb57b83cdbfc4bf53c0c46a3d0bcbc36e64126) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 144 +++++++++++++++++++++++++++++++-------- + 1 file changed, 114 insertions(+), 30 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e83a976..a3ebf74 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -57,8 +57,8 @@ + #include "passthrough_helpers.h" + + /* +- * We are re-using pointers to our `struct lo_inode` and `struct +- * lo_dirp` elements as inodes. This means that we must be able to ++ * We are re-using pointers to our `struct lo_inode` ++ * elements as inodes. This means that we must be able to + * store uintptr_t values in a fuse_ino_t variable. The following + * incantation checks this condition at compile time. + */ +@@ -76,7 +76,7 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { + + struct lo_map_elem { + union { +- /* Element values will go here... */ ++ struct lo_inode *inode; + ssize_t freelist; + }; + bool in_use; +@@ -97,6 +97,7 @@ struct lo_inode { + ino_t ino; + dev_t dev; + uint64_t refcount; /* protected by lo->mutex */ ++ fuse_ino_t fuse_ino; + }; + + struct lo_cred { +@@ -121,6 +122,7 @@ struct lo_data { + int cache; + int timeout_set; + struct lo_inode root; /* protected by lo->mutex */ ++ struct lo_map ino_map; /* protected by lo->mutex */ + }; + + static const struct fuse_opt lo_opts[] = { +@@ -145,14 +147,14 @@ static struct lo_data *lo_data(fuse_req_t req) + return (struct lo_data *)fuse_req_userdata(req); + } + +-__attribute__((unused)) static void lo_map_init(struct lo_map *map) ++static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; + map->nelems = 0; + map->freelist = -1; + } + +-__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) ++static void lo_map_destroy(struct lo_map *map) + { + free(map->elems); + } +@@ -183,8 +185,7 @@ static int lo_map_grow(struct lo_map *map, size_t new_nelems) + return 1; + } + +-__attribute__((unused)) static struct lo_map_elem * +-lo_map_alloc_elem(struct lo_map *map) ++static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) + { + struct lo_map_elem *elem; + +@@ -200,8 +201,7 @@ lo_map_alloc_elem(struct lo_map *map) + return elem; + } + +-__attribute__((unused)) static struct lo_map_elem * +-lo_map_reserve(struct lo_map *map, size_t key) ++static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) + { + ssize_t *prev; + +@@ -222,8 +222,7 @@ lo_map_reserve(struct lo_map *map, size_t key) + return NULL; + } + +-__attribute__((unused)) static struct lo_map_elem * +-lo_map_get(struct lo_map *map, size_t key) ++static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) + { + if (key >= map->nelems) { + return NULL; +@@ -234,8 +233,7 @@ lo_map_get(struct lo_map *map, size_t key) + return &map->elems[key]; + } + +-__attribute__((unused)) static void lo_map_remove(struct lo_map *map, +- size_t key) ++static void lo_map_remove(struct lo_map *map, size_t key) + { + struct lo_map_elem *elem; + +@@ -254,18 +252,40 @@ __attribute__((unused)) static void lo_map_remove(struct lo_map *map, + map->freelist = key; + } + ++/* Assumes lo->mutex is held */ ++static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) ++{ ++ struct lo_map_elem *elem; ++ ++ elem = lo_map_alloc_elem(&lo_data(req)->ino_map); ++ if (!elem) { ++ return -1; ++ } ++ ++ elem->inode = inode; ++ return elem - lo_data(req)->ino_map.elems; ++} ++ + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { +- if (ino == FUSE_ROOT_ID) { +- return &lo_data(req)->root; +- } else { +- return (struct lo_inode *)(uintptr_t)ino; ++ struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; ++ ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->ino_map, ino); ++ pthread_mutex_unlock(&lo->mutex); ++ ++ if (!elem) { ++ return NULL; + } ++ ++ return elem->inode; + } + + static int lo_fd(fuse_req_t req, fuse_ino_t ino) + { +- return lo_inode(req, ino)->fd; ++ struct lo_inode *inode = lo_inode(req, ino); ++ return inode ? inode->fd : -1; + } + + static bool lo_debug(fuse_req_t req) +@@ -337,10 +357,18 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + { + int saverr; + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); +- int ifd = inode->fd; ++ struct lo_inode *inode; ++ int ifd; + int res; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ ++ ifd = inode->fd; ++ + if (valid & FUSE_SET_ATTR_MODE) { + if (fi) { + res = fchmod(fi->fh, attr->st_mode); +@@ -470,6 +498,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + inode->dev = e->attr.st_dev; + + pthread_mutex_lock(&lo->mutex); ++ inode->fuse_ino = lo_add_inode_mapping(req, inode); + prev = &lo->root; + next = prev->next; + next->prev = inode; +@@ -478,7 +507,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + prev->next = inode; + pthread_mutex_unlock(&lo->mutex); + } +- e->ino = (uintptr_t)inode; ++ e->ino = inode->fuse_ino; + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +@@ -582,10 +611,16 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + { + int res; + int saverr; +- struct lo_inode *dir = lo_inode(req, parent); ++ struct lo_inode *dir; + struct fuse_entry_param e; + struct lo_cred old = {}; + ++ dir = lo_inode(req, parent); ++ if (!dir) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOMEM; + + saverr = lo_change_cred(req, &old); +@@ -663,10 +698,16 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + { + int res; + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + struct fuse_entry_param e; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + memset(&e, 0, sizeof(struct fuse_entry_param)); + e.attr_timeout = lo->timeout; + e.entry_timeout = lo->timeout; +@@ -684,7 +725,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + pthread_mutex_lock(&lo->mutex); + inode->refcount++; + pthread_mutex_unlock(&lo->mutex); +- e.ino = (uintptr_t)inode; ++ e.ino = inode->fuse_ino; + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +@@ -750,10 +791,10 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + next->prev = prev; + prev->next = next; + ++ lo_map_remove(&lo->ino_map, inode->fuse_ino); + pthread_mutex_unlock(&lo->mutex); + close(inode->fd); + free(inode); +- + } else { + pthread_mutex_unlock(&lo->mutex); + } +@@ -762,7 +803,12 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) + static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + { + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; ++ ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ return; ++ } + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +@@ -1244,10 +1290,16 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + { + char *value = NULL; + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1306,10 +1358,16 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + { + char *value = NULL; + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1367,10 +1425,16 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, int flags) + { + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1400,10 +1464,16 @@ out: + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + { + char procname[64]; +- struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_inode *inode; + ssize_t ret; + int saverr; + ++ inode = lo_inode(req, ino); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } ++ + saverr = ENOSYS; + if (!lo_data(req)->xattr) { + goto out; +@@ -1522,6 +1592,7 @@ int main(int argc, char *argv[]) + struct fuse_session *se; + struct fuse_cmdline_opts opts; + struct lo_data lo = { .debug = 0, .writeback = 0 }; ++ struct lo_map_elem *root_elem; + int ret = -1; + + /* Don't mask creation mode, kernel already did that */ +@@ -1530,8 +1601,19 @@ int main(int argc, char *argv[]) + pthread_mutex_init(&lo.mutex, NULL); + lo.root.next = lo.root.prev = &lo.root; + lo.root.fd = -1; ++ lo.root.fuse_ino = FUSE_ROOT_ID; + lo.cache = CACHE_NORMAL; + ++ /* ++ * Set up the ino map like this: ++ * [0] Reserved (will not be used) ++ * [1] Root inode ++ */ ++ lo_map_init(&lo.ino_map); ++ lo_map_reserve(&lo.ino_map, 0)->in_use = false; ++ root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); ++ root_elem->inode = &lo.root; ++ + if (fuse_parse_cmdline(&args, &opts) != 0) { + return 1; + } +@@ -1628,6 +1710,8 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ lo_map_destroy(&lo.ino_map); ++ + if (lo.root.fd >= 0) { + close(lo.root.fd); + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch b/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch new file mode 100644 index 0000000..4751f95 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch @@ -0,0 +1,182 @@ +From d56651e227bae83ee0cceb12bd91e3e9f6045ab3 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:13 +0100 +Subject: [PATCH 042/116] virtiofsd: passthrough_ll: add lo_map for ino/fh + indirection +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-39-dgilbert@redhat.com> +Patchwork-id: 93492 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 038/112] virtiofsd: passthrough_ll: add lo_map for ino/fh indirection +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +A layer of indirection is needed because passthrough_ll cannot expose +pointers or file descriptor numbers to untrusted clients. Malicious +clients could send invalid pointers or file descriptors in order to +crash or exploit the file system daemon. + +lo_map provides an integer key->value mapping. This will be used for +ino and fh fields in the patches that follow. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 25c135727b08dca90f00094e522a69170b13dfac) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 124 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 124 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 5e06179..e83a976 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -74,6 +74,21 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { + }; + #endif + ++struct lo_map_elem { ++ union { ++ /* Element values will go here... */ ++ ssize_t freelist; ++ }; ++ bool in_use; ++}; ++ ++/* Maps FUSE fh or ino values to internal objects */ ++struct lo_map { ++ struct lo_map_elem *elems; ++ size_t nelems; ++ ssize_t freelist; ++}; ++ + struct lo_inode { + struct lo_inode *next; /* protected by lo->mutex */ + struct lo_inode *prev; /* protected by lo->mutex */ +@@ -130,6 +145,115 @@ static struct lo_data *lo_data(fuse_req_t req) + return (struct lo_data *)fuse_req_userdata(req); + } + ++__attribute__((unused)) static void lo_map_init(struct lo_map *map) ++{ ++ map->elems = NULL; ++ map->nelems = 0; ++ map->freelist = -1; ++} ++ ++__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) ++{ ++ free(map->elems); ++} ++ ++static int lo_map_grow(struct lo_map *map, size_t new_nelems) ++{ ++ struct lo_map_elem *new_elems; ++ size_t i; ++ ++ if (new_nelems <= map->nelems) { ++ return 1; ++ } ++ ++ new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems); ++ if (!new_elems) { ++ return 0; ++ } ++ ++ for (i = map->nelems; i < new_nelems; i++) { ++ new_elems[i].freelist = i + 1; ++ new_elems[i].in_use = false; ++ } ++ new_elems[new_nelems - 1].freelist = -1; ++ ++ map->elems = new_elems; ++ map->freelist = map->nelems; ++ map->nelems = new_nelems; ++ return 1; ++} ++ ++__attribute__((unused)) static struct lo_map_elem * ++lo_map_alloc_elem(struct lo_map *map) ++{ ++ struct lo_map_elem *elem; ++ ++ if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { ++ return NULL; ++ } ++ ++ elem = &map->elems[map->freelist]; ++ map->freelist = elem->freelist; ++ ++ elem->in_use = true; ++ ++ return elem; ++} ++ ++__attribute__((unused)) static struct lo_map_elem * ++lo_map_reserve(struct lo_map *map, size_t key) ++{ ++ ssize_t *prev; ++ ++ if (!lo_map_grow(map, key + 1)) { ++ return NULL; ++ } ++ ++ for (prev = &map->freelist; *prev != -1; ++ prev = &map->elems[*prev].freelist) { ++ if (*prev == key) { ++ struct lo_map_elem *elem = &map->elems[key]; ++ ++ *prev = elem->freelist; ++ elem->in_use = true; ++ return elem; ++ } ++ } ++ return NULL; ++} ++ ++__attribute__((unused)) static struct lo_map_elem * ++lo_map_get(struct lo_map *map, size_t key) ++{ ++ if (key >= map->nelems) { ++ return NULL; ++ } ++ if (!map->elems[key].in_use) { ++ return NULL; ++ } ++ return &map->elems[key]; ++} ++ ++__attribute__((unused)) static void lo_map_remove(struct lo_map *map, ++ size_t key) ++{ ++ struct lo_map_elem *elem; ++ ++ if (key >= map->nelems) { ++ return; ++ } ++ ++ elem = &map->elems[key]; ++ if (!elem->in_use) { ++ return; ++ } ++ ++ elem->in_use = false; ++ ++ elem->freelist = map->freelist; ++ map->freelist = key; ++} ++ + static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) + { + if (ino == FUSE_ROOT_ID) { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch b/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch new file mode 100644 index 0000000..a3f7970 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch @@ -0,0 +1,52 @@ +From 86b4f2865f2ebd7e6b3d85beb66a9390eb46eb96 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:45 +0100 +Subject: [PATCH 074/116] virtiofsd: passthrough_ll: add renameat2 support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-71-dgilbert@redhat.com> +Patchwork-id: 93531 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 070/112] virtiofsd: passthrough_ll: add renameat2 support +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f0ab7d6f78a7d3c1c19fd81a91c9b1199f56c4f6) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 98114a3..18d69ab 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1099,7 +1099,17 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + if (flags) { ++#ifndef SYS_renameat2 + fuse_reply_err(req, EINVAL); ++#else ++ res = syscall(SYS_renameat2, lo_fd(req, parent), name, ++ lo_fd(req, newparent), newname, flags); ++ if (res == -1 && errno == ENOSYS) { ++ fuse_reply_err(req, EINVAL); ++ } else { ++ fuse_reply_err(req, res == -1 ? errno : 0); ++ } ++#endif + return; + } + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch b/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch new file mode 100644 index 0000000..dc87ef2 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch @@ -0,0 +1,138 @@ +From 079199c53f483f0051f994b195ebb595aec76a39 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:51 +0100 +Subject: [PATCH 080/116] virtiofsd: passthrough_ll: clean up cache related + options +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-77-dgilbert@redhat.com> +Patchwork-id: 93530 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 076/112] virtiofsd: passthrough_ll: clean up cache related options +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + + - Rename "cache=never" to "cache=none" to match 9p's similar option. + + - Rename CACHE_NORMAL constant to CACHE_AUTO to match the "cache=auto" + option. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 230e777b5e250759ee0480fcc0e9ccfa2b082fba) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 5 ++++- + tools/virtiofsd/passthrough_ll.c | 20 ++++++++++---------- + 2 files changed, 14 insertions(+), 11 deletions(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 14f5d70..5672024 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -145,6 +145,9 @@ void fuse_cmdline_help(void) + " --syslog log to syslog (default stderr)\n" + " -f foreground operation\n" + " --daemonize run in background\n" ++ " -o cache= cache mode. could be one of \"auto, " ++ "always, none\"\n" ++ " default: auto\n" + " -o log_level= log level, default to \"info\"\n" + " level could be one of \"debug, " + "info, warn, err\"\n" +@@ -156,7 +159,7 @@ void fuse_cmdline_help(void) + " -o readdirplus|no_readdirplus\n" + " enable/disable readirplus\n" + " default: readdirplus except with " +- "cache=never\n" ++ "cache=none\n" + ); + } + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 9e7191e..b40f287 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -101,8 +101,8 @@ struct lo_cred { + }; + + enum { +- CACHE_NEVER, +- CACHE_NORMAL, ++ CACHE_NONE, ++ CACHE_AUTO, + CACHE_ALWAYS, + }; + +@@ -138,8 +138,8 @@ static const struct fuse_opt lo_opts[] = { + { "no_xattr", offsetof(struct lo_data, xattr), 0 }, + { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, + { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, +- { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, +- { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, ++ { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, ++ { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, + { "norace", offsetof(struct lo_data, norace), 1 }, + { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, +@@ -482,7 +482,7 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } +- if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || ++ if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || + lo->readdirplus_clear) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); + conn->want &= ~FUSE_CAP_READDIRPLUS; +@@ -1493,7 +1493,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + fi->fh = fh; + err = lo_do_lookup(req, parent, name, &e); + } +- if (lo->cache == CACHE_NEVER) { ++ if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; +@@ -1578,7 +1578,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + } + + fi->fh = fh; +- if (lo->cache == CACHE_NEVER) { ++ if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; +@@ -2395,7 +2395,7 @@ int main(int argc, char *argv[]) + lo.root.next = lo.root.prev = &lo.root; + lo.root.fd = -1; + lo.root.fuse_ino = FUSE_ROOT_ID; +- lo.cache = CACHE_NORMAL; ++ lo.cache = CACHE_AUTO; + + /* + * Set up the ino map like this: +@@ -2470,11 +2470,11 @@ int main(int argc, char *argv[]) + } + if (!lo.timeout_set) { + switch (lo.cache) { +- case CACHE_NEVER: ++ case CACHE_NONE: + lo.timeout = 0.0; + break; + +- case CACHE_NORMAL: ++ case CACHE_AUTO: + lo.timeout = 1.0; + break; + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch b/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch new file mode 100644 index 0000000..98d00fc --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch @@ -0,0 +1,79 @@ +From 0f1d456fad4ba6a696eff8976b9fe8a0f251e1b5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:47 +0100 +Subject: [PATCH 076/116] virtiofsd: passthrough_ll: control readdirplus +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-73-dgilbert@redhat.com> +Patchwork-id: 93524 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 072/112] virtiofsd: passthrough_ll: control readdirplus +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 59aef494be2d8d91055ff3f3a8eb13d9f32873d8) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/helper.c | 4 ++++ + tools/virtiofsd/passthrough_ll.c | 7 ++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 6d50a46..14f5d70 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -153,6 +153,10 @@ void fuse_cmdline_help(void) + " allowed (default: 10)\n" + " -o norace disable racy fallback\n" + " default: false\n" ++ " -o readdirplus|no_readdirplus\n" ++ " enable/disable readirplus\n" ++ " default: readdirplus except with " ++ "cache=never\n" + ); + } + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 6480c51..8b1784f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -117,6 +117,8 @@ struct lo_data { + double timeout; + int cache; + int timeout_set; ++ int readdirplus_set; ++ int readdirplus_clear; + struct lo_inode root; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ +@@ -140,6 +142,8 @@ static const struct fuse_opt lo_opts[] = { + { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, + { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, + { "norace", offsetof(struct lo_data, norace), 1 }, ++ { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, ++ { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, + FUSE_OPT_END + }; + static bool use_syslog = false; +@@ -478,7 +482,8 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } +- if (lo->cache == CACHE_NEVER) { ++ if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || ++ lo->readdirplus_clear) { + fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); + conn->want &= ~FUSE_CAP_READDIRPLUS; + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch b/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch new file mode 100644 index 0000000..4b02779 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch @@ -0,0 +1,198 @@ +From af14ef1dba9356e566c9c7531b8fd23361c2b16d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:12 +0100 +Subject: [PATCH 041/116] virtiofsd: passthrough_ll: create new files in + caller's context +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-38-dgilbert@redhat.com> +Patchwork-id: 93488 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 037/112] virtiofsd: passthrough_ll: create new files in caller's context +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Vivek Goyal + +We need to create files in the caller's context. Otherwise after +creating a file, the caller might not be able to do file operations on +that file. + +Changed effective uid/gid to caller's uid/gid, create file and then +switch back to uid/gid 0. + +Use syscall(setresuid, ...) otherwise glibc does some magic to change EUID +in all threads, which is not what we want. + +Signed-off-by: Vivek Goyal +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 929cfb7a9a1b101cdfc9ac19807ecab4c81a13e4) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 96 +++++++++++++++++++++++++++++++++++++--- + 1 file changed, 91 insertions(+), 5 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index cd27c09..5e06179 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -83,6 +84,11 @@ struct lo_inode { + uint64_t refcount; /* protected by lo->mutex */ + }; + ++struct lo_cred { ++ uid_t euid; ++ gid_t egid; ++}; ++ + enum { + CACHE_NEVER, + CACHE_NORMAL, +@@ -383,6 +389,69 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + } + } + ++/* ++ * On some archs, setres*id is limited to 2^16 but they ++ * provide setres*id32 variants that allow 2^32. ++ * Others just let setres*id do 2^32 anyway. ++ */ ++#ifdef SYS_setresgid32 ++#define OURSYS_setresgid SYS_setresgid32 ++#else ++#define OURSYS_setresgid SYS_setresgid ++#endif ++ ++#ifdef SYS_setresuid32 ++#define OURSYS_setresuid SYS_setresuid32 ++#else ++#define OURSYS_setresuid SYS_setresuid ++#endif ++ ++/* ++ * Change to uid/gid of caller so that file is created with ++ * ownership of caller. ++ * TODO: What about selinux context? ++ */ ++static int lo_change_cred(fuse_req_t req, struct lo_cred *old) ++{ ++ int res; ++ ++ old->euid = geteuid(); ++ old->egid = getegid(); ++ ++ res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); ++ if (res == -1) { ++ return errno; ++ } ++ ++ res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); ++ if (res == -1) { ++ int errno_save = errno; ++ ++ syscall(OURSYS_setresgid, -1, old->egid, -1); ++ return errno_save; ++ } ++ ++ return 0; ++} ++ ++/* Regain Privileges */ ++static void lo_restore_cred(struct lo_cred *old) ++{ ++ int res; ++ ++ res = syscall(OURSYS_setresuid, -1, old->euid, -1); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); ++ exit(1); ++ } ++ ++ res = syscall(OURSYS_setresgid, -1, old->egid, -1); ++ if (res == -1) { ++ fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); ++ exit(1); ++ } ++} ++ + static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + const char *name, mode_t mode, dev_t rdev, + const char *link) +@@ -391,12 +460,21 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + int saverr; + struct lo_inode *dir = lo_inode(req, parent); + struct fuse_entry_param e; ++ struct lo_cred old = {}; + + saverr = ENOMEM; + ++ saverr = lo_change_cred(req, &old); ++ if (saverr) { ++ goto out; ++ } ++ + res = mknod_wrapper(dir->fd, name, link, mode, rdev); + + saverr = errno; ++ ++ lo_restore_cred(&old); ++ + if (res == -1) { + goto out; + } +@@ -794,26 +872,34 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_data *lo = lo_data(req); + struct fuse_entry_param e; + int err; ++ struct lo_cred old = {}; + + if (lo_debug(req)) { + fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", + parent, name); + } + ++ err = lo_change_cred(req, &old); ++ if (err) { ++ goto out; ++ } ++ + fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, + mode); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); +- } ++ err = fd == -1 ? errno : 0; ++ lo_restore_cred(&old); + +- fi->fh = fd; ++ if (!err) { ++ fi->fh = fd; ++ err = lo_do_lookup(req, parent, name, &e); ++ } + if (lo->cache == CACHE_NEVER) { + fi->direct_io = 1; + } else if (lo->cache == CACHE_ALWAYS) { + fi->keep_cache = 1; + } + +- err = lo_do_lookup(req, parent, name, &e); ++out: + if (err) { + fuse_reply_err(req, err); + } else { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch b/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch new file mode 100644 index 0000000..4a531a3 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch @@ -0,0 +1,50 @@ +From bbf92338e5e5eed796d511d2bd3c3686b7d1e5fd Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:46 +0100 +Subject: [PATCH 075/116] virtiofsd: passthrough_ll: disable readdirplus on + cache=never +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-72-dgilbert@redhat.com> +Patchwork-id: 93525 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 071/112] virtiofsd: passthrough_ll: disable readdirplus on cache=never +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +...because the attributes sent in the READDIRPLUS reply would be discarded +anyway. + +Signed-off-by: Miklos Szeredi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ddcbabcb0ea177be3ec3500726b699c7c26ffd93) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 18d69ab..6480c51 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -478,6 +478,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } ++ if (lo->cache == CACHE_NEVER) { ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); ++ conn->want &= ~FUSE_CAP_READDIRPLUS; ++ } + } + + static void lo_getattr(fuse_req_t req, fuse_ino_t ino, +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch b/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch new file mode 100644 index 0000000..00e11b4 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch @@ -0,0 +1,143 @@ +From 5e33269d5fbc4ba4614bab4a6b9e0ef759bebcb7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:10 +0100 +Subject: [PATCH 099/116] virtiofsd: passthrough_ll: fix refcounting on + remove/rename +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-96-dgilbert@redhat.com> +Patchwork-id: 93549 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 095/112] virtiofsd: passthrough_ll: fix refcounting on remove/rename +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9257e514d861afa759c36704e1904d43ca3fec88) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 49 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c819b5f..e3a6d6b 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1140,17 +1140,42 @@ out_err: + fuse_reply_err(req, saverr); + } + ++static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, ++ const char *name) ++{ ++ int res; ++ struct stat attr; ++ ++ res = fstatat(lo_fd(req, parent), name, &attr, ++ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); ++ if (res == -1) { ++ return NULL; ++ } ++ ++ return lo_find(lo_data(req), &attr); ++} ++ + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; ++ struct lo_inode *inode; ++ struct lo_data *lo = lo_data(req); ++ + if (!is_safe_path_component(name)) { + fuse_reply_err(req, EINVAL); + return; + } + ++ inode = lookup_name(req, parent, name); ++ if (!inode) { ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ + res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); + + fuse_reply_err(req, res == -1 ? errno : 0); ++ unref_inode_lolocked(lo, inode, 1); + } + + static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -1158,12 +1183,23 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + unsigned int flags) + { + int res; ++ struct lo_inode *oldinode; ++ struct lo_inode *newinode; ++ struct lo_data *lo = lo_data(req); + + if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { + fuse_reply_err(req, EINVAL); + return; + } + ++ oldinode = lookup_name(req, parent, name); ++ newinode = lookup_name(req, newparent, newname); ++ ++ if (!oldinode) { ++ fuse_reply_err(req, EIO); ++ goto out; ++ } ++ + if (flags) { + #ifndef SYS_renameat2 + fuse_reply_err(req, EINVAL); +@@ -1176,26 +1212,38 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + fuse_reply_err(req, res == -1 ? errno : 0); + } + #endif +- return; ++ goto out; + } + + res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); + + fuse_reply_err(req, res == -1 ? errno : 0); ++out: ++ unref_inode_lolocked(lo, oldinode, 1); ++ unref_inode_lolocked(lo, newinode, 1); + } + + static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; ++ struct lo_inode *inode; ++ struct lo_data *lo = lo_data(req); + + if (!is_safe_path_component(name)) { + fuse_reply_err(req, EINVAL); + return; + } + ++ inode = lookup_name(req, parent, name); ++ if (!inode) { ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ + res = unlinkat(lo_fd(req, parent), name, 0); + + fuse_reply_err(req, res == -1 ? errno : 0); ++ unref_inode_lolocked(lo, inode, 1); + } + + static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-use-hashtable.patch b/kvm-virtiofsd-passthrough_ll-use-hashtable.patch new file mode 100644 index 0000000..b0be1f9 --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-use-hashtable.patch @@ -0,0 +1,211 @@ +From 44f4434b1305f6ff47b4f63fafcf39bcea9e4ceb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:52 +0100 +Subject: [PATCH 081/116] virtiofsd: passthrough_ll: use hashtable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-78-dgilbert@redhat.com> +Patchwork-id: 93528 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 077/112] virtiofsd: passthrough_ll: use hashtable +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Improve performance of inode lookup by using a hash table. + +Signed-off-by: Miklos Szeredi +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Liu Bo +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit bfc50a6e06b10b2f9dbaf6c1a89dd523322e016f) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 81 ++++++++++++++++++++++------------------ + 1 file changed, 45 insertions(+), 36 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index b40f287..b176a31 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -84,13 +84,15 @@ struct lo_map { + ssize_t freelist; + }; + ++struct lo_key { ++ ino_t ino; ++ dev_t dev; ++}; ++ + struct lo_inode { +- struct lo_inode *next; /* protected by lo->mutex */ +- struct lo_inode *prev; /* protected by lo->mutex */ + int fd; + bool is_symlink; +- ino_t ino; +- dev_t dev; ++ struct lo_key key; + uint64_t refcount; /* protected by lo->mutex */ + fuse_ino_t fuse_ino; + }; +@@ -119,7 +121,8 @@ struct lo_data { + int timeout_set; + int readdirplus_set; + int readdirplus_clear; +- struct lo_inode root; /* protected by lo->mutex */ ++ struct lo_inode root; ++ GHashTable *inodes; /* protected by lo->mutex */ + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ + struct lo_map fd_map; /* protected by lo->mutex */ +@@ -573,7 +576,7 @@ retry: + } + goto fail_unref; + } +- if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { ++ if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) { + if (!retries) { + fuse_log(FUSE_LOG_WARNING, + "%s: failed to match last\n", __func__); +@@ -753,19 +756,20 @@ out_err: + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + { + struct lo_inode *p; +- struct lo_inode *ret = NULL; ++ struct lo_key key = { ++ .ino = st->st_ino, ++ .dev = st->st_dev, ++ }; + + pthread_mutex_lock(&lo->mutex); +- for (p = lo->root.next; p != &lo->root; p = p->next) { +- if (p->ino == st->st_ino && p->dev == st->st_dev) { +- assert(p->refcount > 0); +- ret = p; +- ret->refcount++; +- break; +- } ++ p = g_hash_table_lookup(lo->inodes, &key); ++ if (p) { ++ assert(p->refcount > 0); ++ p->refcount++; + } + pthread_mutex_unlock(&lo->mutex); +- return ret; ++ ++ return p; + } + + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +@@ -810,8 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + close(newfd); + newfd = -1; + } else { +- struct lo_inode *prev, *next; +- + saverr = ENOMEM; + inode = calloc(1, sizeof(struct lo_inode)); + if (!inode) { +@@ -822,17 +824,12 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + inode->refcount = 1; + inode->fd = newfd; + newfd = -1; +- inode->ino = e->attr.st_ino; +- inode->dev = e->attr.st_dev; ++ inode->key.ino = e->attr.st_ino; ++ inode->key.dev = e->attr.st_dev; + + pthread_mutex_lock(&lo->mutex); + inode->fuse_ino = lo_add_inode_mapping(req, inode); +- prev = &lo->root; +- next = prev->next; +- next->prev = inode; +- inode->next = next; +- inode->prev = prev; +- prev->next = inode; ++ g_hash_table_insert(lo->inodes, &inode->key, inode); + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; +@@ -1162,14 +1159,8 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + assert(inode->refcount >= n); + inode->refcount -= n; + if (!inode->refcount) { +- struct lo_inode *prev, *next; +- +- prev = inode->prev; +- next = inode->next; +- next->prev = prev; +- prev->next = next; +- + lo_map_remove(&lo->ino_map, inode->fuse_ino); ++ g_hash_table_remove(lo->inodes, &inode->key); + pthread_mutex_unlock(&lo->mutex); + close(inode->fd); + free(inode); +@@ -1369,7 +1360,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + /* Hide root's parent directory */ + if (dinode == &lo->root && strcmp(name, "..") == 0) { +- e.attr.st_ino = lo->root.ino; ++ e.attr.st_ino = lo->root.key.ino; + e.attr.st_mode = DT_DIR << 12; + } + +@@ -2370,11 +2361,26 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + + root->is_symlink = false; + root->fd = fd; +- root->ino = stat.st_ino; +- root->dev = stat.st_dev; ++ root->key.ino = stat.st_ino; ++ root->key.dev = stat.st_dev; + root->refcount = 2; + } + ++static guint lo_key_hash(gconstpointer key) ++{ ++ const struct lo_key *lkey = key; ++ ++ return (guint)lkey->ino + (guint)lkey->dev; ++} ++ ++static gboolean lo_key_equal(gconstpointer a, gconstpointer b) ++{ ++ const struct lo_key *la = a; ++ const struct lo_key *lb = b; ++ ++ return la->ino == lb->ino && la->dev == lb->dev; ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2392,7 +2398,7 @@ int main(int argc, char *argv[]) + umask(0); + + pthread_mutex_init(&lo.mutex, NULL); +- lo.root.next = lo.root.prev = &lo.root; ++ lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); + lo.root.fd = -1; + lo.root.fuse_ino = FUSE_ROOT_ID; + lo.cache = CACHE_AUTO; +@@ -2522,6 +2528,9 @@ err_out2: + err_out1: + fuse_opt_free_args(&args); + ++ if (lo.inodes) { ++ g_hash_table_destroy(lo.inodes); ++ } + lo_map_destroy(&lo.fd_map); + lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch b/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch new file mode 100644 index 0000000..68eb03e --- /dev/null +++ b/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch @@ -0,0 +1,54 @@ +From feb005dfeb15dd5ac5156c994f323ab4c573b1fc Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:24 +0100 +Subject: [PATCH 053/116] virtiofsd: prevent ".." escape in lo_do_lookup() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-50-dgilbert@redhat.com> +Patchwork-id: 93500 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 049/112] virtiofsd: prevent ".." escape in lo_do_lookup() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 854684bc0b3d63eb90b3abdfe471c2e4271ef176) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e375406..79d5966 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -624,12 +624,17 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + int res; + int saverr; + struct lo_data *lo = lo_data(req); +- struct lo_inode *inode; ++ struct lo_inode *inode, *dir = lo_inode(req, parent); + + memset(e, 0, sizeof(*e)); + e->attr_timeout = lo->timeout; + e->entry_timeout = lo->timeout; + ++ /* Do not allow escaping root directory */ ++ if (dir == &lo->root && strcmp(name, "..") == 0) { ++ name = "."; ++ } ++ + newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); + if (newfd == -1) { + goto out_err; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch b/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch new file mode 100644 index 0000000..5f97cbf --- /dev/null +++ b/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch @@ -0,0 +1,108 @@ +From 97e232e75bbc0032f4a309d248f383384612eafe Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:25 +0100 +Subject: [PATCH 054/116] virtiofsd: prevent ".." escape in lo_do_readdir() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-51-dgilbert@redhat.com> +Patchwork-id: 93507 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 050/112] virtiofsd: prevent ".." escape in lo_do_readdir() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Construct a fake dirent for the root directory's ".." entry. This hides +the parent directory from the FUSE client. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 752272da2b68a2312f0e11fc5303015a6c3ee1ac) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++-------------- + 1 file changed, 22 insertions(+), 14 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 79d5966..e3d65c3 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1149,19 +1149,25 @@ out_err: + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { ++ struct lo_data *lo = lo_data(req); + struct lo_dirp *d; ++ struct lo_inode *dinode; + char *buf = NULL; + char *p; + size_t rem = size; +- int err = ENOMEM; ++ int err = EBADF; + +- (void)ino; ++ dinode = lo_inode(req, ino); ++ if (!dinode) { ++ goto error; ++ } + + d = lo_dirp(req, fi); + if (!d) { + goto error; + } + ++ err = ENOMEM; + buf = calloc(1, size); + if (!buf) { + goto error; +@@ -1192,15 +1198,21 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + } + nextoff = d->entry->d_off; + name = d->entry->d_name; ++ + fuse_ino_t entry_ino = 0; ++ struct fuse_entry_param e = (struct fuse_entry_param){ ++ .attr.st_ino = d->entry->d_ino, ++ .attr.st_mode = d->entry->d_type << 12, ++ }; ++ ++ /* Hide root's parent directory */ ++ if (dinode == &lo->root && strcmp(name, "..") == 0) { ++ e.attr.st_ino = lo->root.ino; ++ e.attr.st_mode = DT_DIR << 12; ++ } ++ + if (plus) { +- struct fuse_entry_param e; +- if (is_dot_or_dotdot(name)) { +- e = (struct fuse_entry_param){ +- .attr.st_ino = d->entry->d_ino, +- .attr.st_mode = d->entry->d_type << 12, +- }; +- } else { ++ if (!is_dot_or_dotdot(name)) { + err = lo_do_lookup(req, ino, name, &e); + if (err) { + goto error; +@@ -1210,11 +1222,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); + } else { +- struct stat st = { +- .st_ino = d->entry->d_ino, +- .st_mode = d->entry->d_type << 12, +- }; +- entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); ++ entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); + } + if (entsize > rem) { + if (entry_ino != 0) { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch b/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch new file mode 100644 index 0000000..be7c120 --- /dev/null +++ b/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch @@ -0,0 +1,103 @@ +From 249c02ae54739dc5894ee1b2905bbe8f1e79e909 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:20 +0100 +Subject: [PATCH 109/116] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-106-dgilbert@redhat.com> +Patchwork-id: 93562 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 105/112] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +When running with multiple threads it can be tricky to handle +FUSE_INIT/FUSE_DESTROY in parallel with other request types or in +parallel with themselves. Serialize FUSE_INIT and FUSE_DESTROY so that +malicious clients cannot trigger race conditions. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Masayoshi Mizuma +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cdc497c6925be745bc895355bd4674a17a4b2a8b) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_i.h | 1 + + tools/virtiofsd/fuse_lowlevel.c | 18 ++++++++++++++++++ + 2 files changed, 19 insertions(+) + +diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h +index a20854f..1447d86 100644 +--- a/tools/virtiofsd/fuse_i.h ++++ b/tools/virtiofsd/fuse_i.h +@@ -61,6 +61,7 @@ struct fuse_session { + struct fuse_req list; + struct fuse_req interrupts; + pthread_mutex_t lock; ++ pthread_rwlock_t init_rwlock; + int got_destroy; + int broken_splice_nonblock; + uint64_t notify_ctr; +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index dab6a31..79a4031 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2428,6 +2428,19 @@ void fuse_session_process_buf_int(struct fuse_session *se, + req->ctx.pid = in->pid; + req->ch = ch; + ++ /* ++ * INIT and DESTROY requests are serialized, all other request types ++ * run in parallel. This prevents races between FUSE_INIT and ordinary ++ * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and ++ * FUSE_DESTROY and FUSE_DESTROY. ++ */ ++ if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT || ++ in->opcode == FUSE_DESTROY) { ++ pthread_rwlock_wrlock(&se->init_rwlock); ++ } else { ++ pthread_rwlock_rdlock(&se->init_rwlock); ++ } ++ + err = EIO; + if (!se->got_init) { + enum fuse_opcode expected; +@@ -2485,10 +2498,13 @@ void fuse_session_process_buf_int(struct fuse_session *se, + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); + } ++ ++ pthread_rwlock_unlock(&se->init_rwlock); + return; + + reply_err: + fuse_reply_err(req, err); ++ pthread_rwlock_unlock(&se->init_rwlock); + } + + #define LL_OPTION(n, o, v) \ +@@ -2531,6 +2547,7 @@ void fuse_session_destroy(struct fuse_session *se) + se->op.destroy(se->userdata); + } + } ++ pthread_rwlock_destroy(&se->init_rwlock); + pthread_mutex_destroy(&se->lock); + free(se->cuse_data); + if (se->fd != -1) { +@@ -2610,6 +2627,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + list_init_req(&se->list); + list_init_req(&se->interrupts); + fuse_mutex_init(&se->lock); ++ pthread_rwlock_init(&se->init_rwlock, NULL); + + memcpy(&se->op, op, op_size); + se->owner = getuid(); +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch b/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch new file mode 100644 index 0000000..8eabede --- /dev/null +++ b/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch @@ -0,0 +1,149 @@ +From 69c6a829f8136a8c95ccdf480f2fd0173d64b6ec Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:05 +0100 +Subject: [PATCH 094/116] virtiofsd: prevent fv_queue_thread() vs virtio_loop() + races +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-91-dgilbert@redhat.com> +Patchwork-id: 93544 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 090/112] virtiofsd: prevent fv_queue_thread() vs virtio_loop() races +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +We call into libvhost-user from the virtqueue handler thread and the +vhost-user message processing thread without a lock. There is nothing +protecting the virtqueue handler thread if the vhost-user message +processing thread changes the virtqueue or memory table while it is +running. + +This patch introduces a read-write lock. Virtqueue handler threads are +readers. The vhost-user message processing thread is a writer. This +will allow concurrency for multiqueue in the future while protecting +against fv_queue_thread() vs virtio_loop() races. + +Note that the critical sections could be made smaller but it would be +more invasive and require libvhost-user changes. Let's start simple and +improve performance later, if necessary. Another option would be an +RCU-style approach with lighter-weight primitives. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e7b337326d594b71b07cd6dbb332c49c122c80a4) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 34 +++++++++++++++++++++++++++++++++- + 1 file changed, 33 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index fb8d6d1..f6242f9 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -59,6 +59,18 @@ struct fv_VuDev { + struct fuse_session *se; + + /* ++ * Either handle virtqueues or vhost-user protocol messages. Don't do ++ * both at the same time since that could lead to race conditions if ++ * virtqueues or memory tables change while another thread is accessing ++ * them. ++ * ++ * The assumptions are: ++ * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev. ++ * 2. virtio_loop() reads/writes virtqueues and VuDev. ++ */ ++ pthread_rwlock_t vu_dispatch_rwlock; ++ ++ /* + * The following pair of fields are only accessed in the main + * virtio_loop + */ +@@ -415,6 +427,8 @@ static void *fv_queue_thread(void *opaque) + qi->qidx, qi->kick_fd); + while (1) { + struct pollfd pf[2]; ++ int ret; ++ + pf[0].fd = qi->kick_fd; + pf[0].events = POLLIN; + pf[0].revents = 0; +@@ -461,6 +475,9 @@ static void *fv_queue_thread(void *opaque) + fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); + break; + } ++ /* Mutual exclusion with virtio_loop() */ ++ ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ assert(ret == 0); /* there is no possible error case */ + /* out is from guest, in is too guest */ + unsigned int in_bytes, out_bytes; + vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); +@@ -469,6 +486,7 @@ static void *fv_queue_thread(void *opaque) + "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + ++ + while (1) { + bool allocated_bufv = false; + struct fuse_bufvec bufv; +@@ -597,6 +615,8 @@ static void *fv_queue_thread(void *opaque) + free(elem); + elem = NULL; + } ++ ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + } + out: + pthread_mutex_destroy(&ch.lock); +@@ -711,6 +731,8 @@ int virtio_loop(struct fuse_session *se) + + while (!fuse_session_exited(se)) { + struct pollfd pf[1]; ++ bool ok; ++ int ret; + pf[0].fd = se->vu_socketfd; + pf[0].events = POLLIN; + pf[0].revents = 0; +@@ -735,7 +757,15 @@ int virtio_loop(struct fuse_session *se) + } + assert(pf[0].revents & POLLIN); + fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); +- if (!vu_dispatch(&se->virtio_dev->dev)) { ++ /* Mutual exclusion with fv_queue_thread() */ ++ ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock); ++ assert(ret == 0); /* there is no possible error case */ ++ ++ ok = vu_dispatch(&se->virtio_dev->dev); ++ ++ pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock); ++ ++ if (!ok) { + fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); + break; + } +@@ -877,6 +907,7 @@ int virtio_session_mount(struct fuse_session *se) + + se->vu_socketfd = data_sock; + se->virtio_dev->se = se; ++ pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL); + vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, + fv_remove_watch, &fv_iface); + +@@ -892,6 +923,7 @@ void virtio_session_close(struct fuse_session *se) + } + + free(se->virtio_dev->qi); ++ pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock); + free(se->virtio_dev); + se->virtio_dev = NULL; + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch b/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch new file mode 100644 index 0000000..acafa41 --- /dev/null +++ b/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch @@ -0,0 +1,147 @@ +From 2e58ff6978f8433fc8672d2e357c6f0f5f36d24f Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:07 +0100 +Subject: [PATCH 096/116] virtiofsd: prevent races with lo_dirp_put() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-93-dgilbert@redhat.com> +Patchwork-id: 93546 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 092/112] virtiofsd: prevent races with lo_dirp_put() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Introduce lo_dirp_put() so that FUSE_RELEASEDIR does not cause +use-after-free races with other threads that are accessing lo_dirp. + +Also make lo_releasedir() atomic to prevent FUSE_RELEASEDIR racing with +itself. This prevents double-frees. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit acefdde73b403576a241ebd8dbe8431ddc0d9442) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++++++++++++++++------ + 1 file changed, 35 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 690edbc..2d703b5 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1284,11 +1284,28 @@ static void lo_readlink(fuse_req_t req, fuse_ino_t ino) + } + + struct lo_dirp { ++ gint refcount; + DIR *dp; + struct dirent *entry; + off_t offset; + }; + ++static void lo_dirp_put(struct lo_dirp **dp) ++{ ++ struct lo_dirp *d = *dp; ++ ++ if (!d) { ++ return; ++ } ++ *dp = NULL; ++ ++ if (g_atomic_int_dec_and_test(&d->refcount)) { ++ closedir(d->dp); ++ free(d); ++ } ++} ++ ++/* Call lo_dirp_put() on the return value when no longer needed */ + static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) + { + struct lo_data *lo = lo_data(req); +@@ -1296,6 +1313,9 @@ static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) + + pthread_mutex_lock(&lo->mutex); + elem = lo_map_get(&lo->dirp_map, fi->fh); ++ if (elem) { ++ g_atomic_int_inc(&elem->dirp->refcount); ++ } + pthread_mutex_unlock(&lo->mutex); + if (!elem) { + return NULL; +@@ -1331,6 +1351,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, + d->offset = 0; + d->entry = NULL; + ++ g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ + pthread_mutex_lock(&lo->mutex); + fh = lo_add_dirp_mapping(req, d); + pthread_mutex_unlock(&lo->mutex); +@@ -1364,7 +1385,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { + struct lo_data *lo = lo_data(req); +- struct lo_dirp *d; ++ struct lo_dirp *d = NULL; + struct lo_inode *dinode; + char *buf = NULL; + char *p; +@@ -1454,6 +1475,8 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + err = 0; + error: ++ lo_dirp_put(&d); ++ + /* + * If there's an error, we can only signal it if we haven't stored + * any entries yet - otherwise we'd end up with wrong lookup +@@ -1484,22 +1507,25 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) + { + struct lo_data *lo = lo_data(req); ++ struct lo_map_elem *elem; + struct lo_dirp *d; + + (void)ino; + +- d = lo_dirp(req, fi); +- if (!d) { ++ pthread_mutex_lock(&lo->mutex); ++ elem = lo_map_get(&lo->dirp_map, fi->fh); ++ if (!elem) { ++ pthread_mutex_unlock(&lo->mutex); + fuse_reply_err(req, EBADF); + return; + } + +- pthread_mutex_lock(&lo->mutex); ++ d = elem->dirp; + lo_map_remove(&lo->dirp_map, fi->fh); + pthread_mutex_unlock(&lo->mutex); + +- closedir(d->dp); +- free(d); ++ lo_dirp_put(&d); /* paired with lo_opendir() */ ++ + fuse_reply_err(req, 0); + } + +@@ -1710,6 +1736,9 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + } else { + res = fsync(fd); + } ++ ++ lo_dirp_put(&d); ++ + fuse_reply_err(req, res == -1 ? errno : 0); + } + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch b/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch new file mode 100644 index 0000000..056559d --- /dev/null +++ b/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch @@ -0,0 +1,469 @@ +From 5c9bbd00e8f8c944d9e8e22e7d1cf08cb8fddd6b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:37 +0100 +Subject: [PATCH 066/116] virtiofsd: print log only when priority is high + enough +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-63-dgilbert@redhat.com> +Patchwork-id: 93518 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 062/112] virtiofsd: print log only when priority is high enough +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eryu Guan + +Introduce "-o log_level=" command line option to specify current log +level (priority), valid values are "debug info warn err", e.g. + + ./virtiofsd -o log_level=debug ... + +So only log priority higher than "debug" will be printed to +stderr/syslog. And the default level is info. + +The "-o debug"/"-d" options are kept, and imply debug log level. + +Signed-off-by: Eryu Guan +dgilbert: Reworked for libfuse's log_func +Signed-off-by: Dr. David Alan Gilbert +with fix by: +Signed-off-by: Xiao Yang +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d240314a1a18a1d914af1b5763fe8c9a572e6409) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 75 ++++++++++--------------- + tools/virtiofsd/fuse_lowlevel.h | 1 + + tools/virtiofsd/helper.c | 8 ++- + tools/virtiofsd/passthrough_ll.c | 118 ++++++++++++++++----------------------- + 4 files changed, 87 insertions(+), 115 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 6ceb33d..a7a1968 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -158,19 +158,17 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, + struct fuse_out_header *out = iov[0].iov_base; + + out->len = iov_length(iov, count); +- if (se->debug) { +- if (out->unique == 0) { +- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, +- out->len); +- } else if (out->error) { +- fuse_log(FUSE_LOG_DEBUG, +- " unique: %llu, error: %i (%s), outsize: %i\n", +- (unsigned long long)out->unique, out->error, +- strerror(-out->error), out->len); +- } else { +- fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", +- (unsigned long long)out->unique, out->len); +- } ++ if (out->unique == 0) { ++ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, ++ out->len); ++ } else if (out->error) { ++ fuse_log(FUSE_LOG_DEBUG, ++ " unique: %llu, error: %i (%s), outsize: %i\n", ++ (unsigned long long)out->unique, out->error, ++ strerror(-out->error), out->len); ++ } else { ++ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", ++ (unsigned long long)out->unique, out->len); + } + + if (fuse_lowlevel_is_virtio(se)) { +@@ -1662,10 +1660,8 @@ static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, + return; + } + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", +- (unsigned long long)arg->unique); +- } ++ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", ++ (unsigned long long)arg->unique); + + req->u.i.unique = arg->unique; + +@@ -1901,13 +1897,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + } + } + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); +- if (arg->major == 7 && arg->minor >= 6) { +- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); +- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", +- arg->max_readahead); +- } ++ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); ++ if (arg->major == 7 && arg->minor >= 6) { ++ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); ++ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead); + } + se->conn.proto_major = arg->major; + se->conn.proto_minor = arg->minor; +@@ -2116,19 +2109,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + outarg.congestion_threshold = se->conn.congestion_threshold; + outarg.time_gran = se->conn.time_gran; + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, +- outarg.minor); +- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); +- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", +- outarg.max_readahead); +- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); +- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", +- outarg.max_background); +- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", +- outarg.congestion_threshold); +- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); +- } ++ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); ++ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); ++ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead); ++ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); ++ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background); ++ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", ++ outarg.congestion_threshold); ++ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); + + send_reply_ok(req, &outarg, outargsize); + } +@@ -2407,14 +2395,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, + in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); + assert(in); /* caller guarantees the input buffer is large enough */ + +- if (se->debug) { +- fuse_log(FUSE_LOG_DEBUG, +- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " +- "pid: %u\n", +- (unsigned long long)in->unique, +- opname((enum fuse_opcode)in->opcode), in->opcode, +- (unsigned long long)in->nodeid, buf->size, in->pid); +- } ++ fuse_log( ++ FUSE_LOG_DEBUG, ++ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", ++ (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode), ++ in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid); + + req = fuse_ll_alloc_req(se); + if (req == NULL) { +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index f2750bc..138041e 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1796,6 +1796,7 @@ struct fuse_cmdline_opts { + int show_help; + int print_capabilities; + int syslog; ++ int log_level; + unsigned int max_idle_threads; + }; + +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 9692ef9..6d50a46 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -34,7 +34,6 @@ + t, offsetof(struct fuse_cmdline_opts, p), v \ + } + +- + static const struct fuse_opt fuse_helper_opts[] = { + FUSE_HELPER_OPT("-h", show_help), + FUSE_HELPER_OPT("--help", show_help), +@@ -55,6 +54,10 @@ static const struct fuse_opt fuse_helper_opts[] = { + FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), + FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), + FUSE_HELPER_OPT("--syslog", syslog), ++ FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG), ++ FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO), ++ FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING), ++ FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR), + FUSE_OPT_END + }; + +@@ -142,6 +145,9 @@ void fuse_cmdline_help(void) + " --syslog log to syslog (default stderr)\n" + " -f foreground operation\n" + " --daemonize run in background\n" ++ " -o log_level= log level, default to \"info\"\n" ++ " level could be one of \"debug, " ++ "info, warn, err\"\n" + " -o max_idle_threads the maximum number of idle worker " + "threads\n" + " allowed (default: 10)\n" +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 0372aca..ff6910f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -37,6 +37,7 @@ + + #include "qemu/osdep.h" + #include "fuse_virtio.h" ++#include "fuse_log.h" + #include "fuse_lowlevel.h" + #include + #include +@@ -140,6 +141,7 @@ static const struct fuse_opt lo_opts[] = { + FUSE_OPT_END + }; + static bool use_syslog = false; ++static int current_log_level; + + static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + +@@ -458,11 +460,6 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) + return inode ? inode->fd : -1; + } + +-static bool lo_debug(fuse_req_t req) +-{ +- return lo_data(req)->debug != 0; +-} +- + static void lo_init(void *userdata, struct fuse_conn_info *conn) + { + struct lo_data *lo = (struct lo_data *)userdata; +@@ -472,15 +469,11 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) + } + + if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { +- if (lo->debug) { +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); + conn->want |= FUSE_CAP_WRITEBACK_CACHE; + } + if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { +- if (lo->debug) { +- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); + conn->want |= FUSE_CAP_FLOCK_LOCKS; + } + } +@@ -823,10 +816,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + } + e->ino = inode->fuse_ino; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long)parent, name, (unsigned long long)e->ino); +- } ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, ++ name, (unsigned long long)e->ino); + + return 0; + +@@ -843,10 +834,8 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + struct fuse_entry_param e; + int err; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, ++ name); + + /* + * Don't use is_safe_path_component(), allow "." and ".." for NFS export +@@ -971,10 +960,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long)parent, name, (unsigned long long)e.ino); +- } ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, ++ name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); + return; +@@ -1074,10 +1061,8 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + pthread_mutex_unlock(&lo->mutex); + e.ino = inode->fuse_ino; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", +- (unsigned long long)parent, name, (unsigned long long)e.ino); +- } ++ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, ++ name, (unsigned long long)e.ino); + + fuse_reply_entry(req, &e); + return; +@@ -1171,11 +1156,9 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + return; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +- (unsigned long long)ino, (unsigned long long)inode->refcount, +- (unsigned long long)nlookup); +- } ++ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", ++ (unsigned long long)ino, (unsigned long long)inode->refcount, ++ (unsigned long long)nlookup); + + unref_inode(lo, inode, nlookup); + } +@@ -1445,10 +1428,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + int err; + struct lo_cred old = {}; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", +- parent, name); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent, ++ name); + + if (!is_safe_path_component(name)) { + fuse_reply_err(req, EINVAL); +@@ -1525,10 +1506,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + char buf[64]; + struct lo_data *lo = lo_data(req); + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, +- fi->flags); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, ++ fi->flags); + + /* + * With writeback cache, kernel may send read requests even +@@ -1644,12 +1623,10 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, + { + struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_read(ino=%" PRIu64 ", size=%zd, " +- "off=%lu)\n", +- ino, size, (unsigned long)offset); +- } ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_read(ino=%" PRIu64 ", size=%zd, " ++ "off=%lu)\n", ++ ino, size, (unsigned long)offset); + + buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; + buf.buf[0].fd = lo_fi_fd(req, fi); +@@ -1671,11 +1648,9 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + out_buf.buf[0].fd = lo_fi_fd(req, fi); + out_buf.buf[0].pos = off; + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, +- out_buf.buf[0].size, (unsigned long)off); +- } ++ fuse_log(FUSE_LOG_DEBUG, ++ "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, ++ out_buf.buf[0].size, (unsigned long)off); + + /* + * If kill_priv is set, drop CAP_FSETID which should lead to kernel +@@ -1774,11 +1749,8 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, +- size); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ++ ino, name, size); + + if (inode->is_symlink) { + /* Sorry, no race free way to getxattr on symlink. */ +@@ -1852,10 +1824,8 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", +- ino, size); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, ++ size); + + if (inode->is_symlink) { + /* Sorry, no race free way to listxattr on symlink. */ +@@ -1929,11 +1899,8 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, +- "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", +- ino, name, value, size); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ++ ", name=%s value=%s size=%zd)\n", ino, name, value, size); + + if (inode->is_symlink) { + /* Sorry, no race free way to setxattr on symlink. */ +@@ -1978,10 +1945,8 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + goto out; + } + +- if (lo_debug(req)) { +- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", +- ino, name); +- } ++ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, ++ name); + + if (inode->is_symlink) { + /* Sorry, no race free way to setxattr on symlink. */ +@@ -2303,6 +2268,10 @@ static void setup_nofile_rlimit(void) + + static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + { ++ if (current_log_level < level) { ++ return; ++ } ++ + if (use_syslog) { + int priority = LOG_ERR; + switch (level) { +@@ -2401,8 +2370,19 @@ int main(int argc, char *argv[]) + return 1; + } + ++ /* ++ * log_level is 0 if not configured via cmd options (0 is LOG_EMERG, ++ * and we don't use this log level). ++ */ ++ if (opts.log_level != 0) { ++ current_log_level = opts.log_level; ++ } + lo.debug = opts.debug; ++ if (lo.debug) { ++ current_log_level = FUSE_LOG_DEBUG; ++ } + lo.root.refcount = 2; ++ + if (lo.source) { + struct stat stat; + int res; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-process-requests-in-a-thread-pool.patch b/kvm-virtiofsd-process-requests-in-a-thread-pool.patch new file mode 100644 index 0000000..87fff99 --- /dev/null +++ b/kvm-virtiofsd-process-requests-in-a-thread-pool.patch @@ -0,0 +1,533 @@ +From b0db5e666aaa43eadff3e60a1ada704f33b03074 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:19 +0100 +Subject: [PATCH 108/116] virtiofsd: process requests in a thread pool +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-105-dgilbert@redhat.com> +Patchwork-id: 93554 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 104/112] virtiofsd: process requests in a thread pool +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Introduce a thread pool so that fv_queue_thread() just pops +VuVirtqElements and hands them to the thread pool. For the time being +only one worker thread is allowed since passthrough_ll.c is not +thread-safe yet. Future patches will lift this restriction so that +multiple FUSE requests can be processed in parallel. + +The main new concept is struct FVRequest, which contains both +VuVirtqElement and struct fuse_chan. We now have fv_VuDev for a device, +fv_QueueInfo for a virtqueue, and FVRequest for a request. Some of +fv_QueueInfo's fields are moved into FVRequest because they are +per-request. The name FVRequest conforms to QEMU coding style and I +expect the struct fv_* types will be renamed in a future refactoring. + +This patch series is not optimal. fbuf reuse is dropped so each request +does malloc(se->bufsize), but there is no clean and cheap way to keep +this with a thread pool. The vq_lock mutex is held for longer than +necessary, especially during the eventfd_write() syscall. Performance +can be improved in the future. + +prctl(2) had to be added to the seccomp whitelist because glib invokes +it. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Misono Tomohiro +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a3d756c5aecccc4c0e51060a7e2f1c87bf8f1180) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 359 +++++++++++++++++++++++------------------- + 1 file changed, 201 insertions(+), 158 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index f6242f9..0dcf2ef 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -22,6 +22,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -37,17 +38,28 @@ + struct fv_VuDev; + struct fv_QueueInfo { + pthread_t thread; ++ /* ++ * This lock protects the VuVirtq preventing races between ++ * fv_queue_thread() and fv_queue_worker(). ++ */ ++ pthread_mutex_t vq_lock; ++ + struct fv_VuDev *virtio_dev; + + /* Our queue index, corresponds to array position */ + int qidx; + int kick_fd; + int kill_fd; /* For killing the thread */ ++}; + +- /* The element for the command currently being processed */ +- VuVirtqElement *qe; ++/* A FUSE request */ ++typedef struct { ++ VuVirtqElement elem; ++ struct fuse_chan ch; ++ ++ /* Used to complete requests that involve no reply */ + bool reply_sent; +-}; ++} FVRequest; + + /* + * We pass the dev element into libvhost-user +@@ -191,8 +203,11 @@ static void copy_iov(struct iovec *src_iov, int src_count, + int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count) + { +- VuVirtqElement *elem; +- VuVirtq *q; ++ FVRequest *req = container_of(ch, FVRequest, ch); ++ struct fv_QueueInfo *qi = ch->qi; ++ VuDev *dev = &se->virtio_dev->dev; ++ VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ VuVirtqElement *elem = &req->elem; + int ret = 0; + + assert(count >= 1); +@@ -205,11 +220,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + + /* unique == 0 is notification, which we don't support */ + assert(out->unique); +- /* For virtio we always have ch */ +- assert(ch); +- assert(!ch->qi->reply_sent); +- elem = ch->qi->qe; +- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ assert(!req->reply_sent); + + /* The 'in' part of the elem is to qemu */ + unsigned int in_num = elem->in_num; +@@ -236,9 +247,15 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, + } + + copy_iov(iov, count, in_sg, in_num, tosend_len); +- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); +- vu_queue_notify(&se->virtio_dev->dev, q); +- ch->qi->reply_sent = true; ++ ++ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ pthread_mutex_lock(&qi->vq_lock); ++ vu_queue_push(dev, q, elem, tosend_len); ++ vu_queue_notify(dev, q); ++ pthread_mutex_unlock(&qi->vq_lock); ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ ++ req->reply_sent = true; + + err: + return ret; +@@ -254,9 +271,12 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + struct iovec *iov, int count, struct fuse_bufvec *buf, + size_t len) + { ++ FVRequest *req = container_of(ch, FVRequest, ch); ++ struct fv_QueueInfo *qi = ch->qi; ++ VuDev *dev = &se->virtio_dev->dev; ++ VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ VuVirtqElement *elem = &req->elem; + int ret = 0; +- VuVirtqElement *elem; +- VuVirtq *q; + + assert(count >= 1); + assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); +@@ -275,11 +295,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + /* unique == 0 is notification which we don't support */ + assert(out->unique); + +- /* For virtio we always have ch */ +- assert(ch); +- assert(!ch->qi->reply_sent); +- elem = ch->qi->qe; +- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; ++ assert(!req->reply_sent); + + /* The 'in' part of the elem is to qemu */ + unsigned int in_num = elem->in_num; +@@ -395,33 +411,175 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, + + ret = 0; + +- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); +- vu_queue_notify(&se->virtio_dev->dev, q); ++ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ pthread_mutex_lock(&qi->vq_lock); ++ vu_queue_push(dev, q, elem, tosend_len); ++ vu_queue_notify(dev, q); ++ pthread_mutex_unlock(&qi->vq_lock); ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + + err: + if (ret == 0) { +- ch->qi->reply_sent = true; ++ req->reply_sent = true; + } + + return ret; + } + ++/* Process one FVRequest in a thread pool */ ++static void fv_queue_worker(gpointer data, gpointer user_data) ++{ ++ struct fv_QueueInfo *qi = user_data; ++ struct fuse_session *se = qi->virtio_dev->se; ++ struct VuDev *dev = &qi->virtio_dev->dev; ++ FVRequest *req = data; ++ VuVirtqElement *elem = &req->elem; ++ struct fuse_buf fbuf = {}; ++ bool allocated_bufv = false; ++ struct fuse_bufvec bufv; ++ struct fuse_bufvec *pbufv; ++ ++ assert(se->bufsize > sizeof(struct fuse_in_header)); ++ ++ /* ++ * An element contains one request and the space to send our response ++ * They're spread over multiple descriptors in a scatter/gather set ++ * and we can't trust the guest to keep them still; so copy in/out. ++ */ ++ fbuf.mem = malloc(se->bufsize); ++ assert(fbuf.mem); ++ ++ fuse_mutex_init(&req->ch.lock); ++ req->ch.fd = -1; ++ req->ch.qi = qi; ++ ++ /* The 'out' part of the elem is from qemu */ ++ unsigned int out_num = elem->out_num; ++ struct iovec *out_sg = elem->out_sg; ++ size_t out_len = iov_size(out_sg, out_num); ++ fuse_log(FUSE_LOG_DEBUG, ++ "%s: elem %d: with %d out desc of length %zd\n", ++ __func__, elem->index, out_num, out_len); ++ ++ /* ++ * The elem should contain a 'fuse_in_header' (in to fuse) ++ * plus the data based on the len in the header. ++ */ ++ if (out_len < sizeof(struct fuse_in_header)) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", ++ __func__, elem->index); ++ assert(0); /* TODO */ ++ } ++ if (out_len > se->bufsize) { ++ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__, ++ elem->index); ++ assert(0); /* TODO */ ++ } ++ /* Copy just the first element and look at it */ ++ copy_from_iov(&fbuf, 1, out_sg); ++ ++ pbufv = NULL; /* Compiler thinks an unitialised path */ ++ if (out_num > 2 && ++ out_sg[0].iov_len == sizeof(struct fuse_in_header) && ++ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && ++ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { ++ /* ++ * For a write we don't actually need to copy the ++ * data, we can just do it straight out of guest memory ++ * but we must still copy the headers in case the guest ++ * was nasty and changed them while we were using them. ++ */ ++ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); ++ ++ /* copy the fuse_write_in header afte rthe fuse_in_header */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; ++ ++ /* Allocate the bufv, with space for the rest of the iov */ ++ pbufv = malloc(sizeof(struct fuse_bufvec) + ++ sizeof(struct fuse_buf) * (out_num - 2)); ++ if (!pbufv) { ++ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", ++ __func__); ++ goto out; ++ } ++ ++ allocated_bufv = true; ++ pbufv->count = 1; ++ pbufv->buf[0] = fbuf; ++ ++ size_t iovindex, pbufvindex; ++ iovindex = 2; /* 2 headers, separate iovs */ ++ pbufvindex = 1; /* 2 headers, 1 fusebuf */ ++ ++ for (; iovindex < out_num; iovindex++, pbufvindex++) { ++ pbufv->count++; ++ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ ++ pbufv->buf[pbufvindex].flags = 0; ++ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; ++ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; ++ } ++ } else { ++ /* Normal (non fast write) path */ ++ ++ /* Copy the rest of the buffer */ ++ fbuf.mem += out_sg->iov_len; ++ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); ++ fbuf.mem -= out_sg->iov_len; ++ fbuf.size = out_len; ++ ++ /* TODO! Endianness of header */ ++ ++ /* TODO: Add checks for fuse_session_exited */ ++ bufv.buf[0] = fbuf; ++ bufv.count = 1; ++ pbufv = &bufv; ++ } ++ pbufv->idx = 0; ++ pbufv->off = 0; ++ fuse_session_process_buf_int(se, pbufv, &req->ch); ++ ++out: ++ if (allocated_bufv) { ++ free(pbufv); ++ } ++ ++ /* If the request has no reply, still recycle the virtqueue element */ ++ if (!req->reply_sent) { ++ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); ++ ++ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__, ++ elem->index); ++ ++ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ pthread_mutex_lock(&qi->vq_lock); ++ vu_queue_push(dev, q, elem, 0); ++ vu_queue_notify(dev, q); ++ pthread_mutex_unlock(&qi->vq_lock); ++ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); ++ } ++ ++ pthread_mutex_destroy(&req->ch.lock); ++ free(fbuf.mem); ++ free(req); ++} ++ + /* Thread function for individual queues, created when a queue is 'started' */ + static void *fv_queue_thread(void *opaque) + { + struct fv_QueueInfo *qi = opaque; + struct VuDev *dev = &qi->virtio_dev->dev; + struct VuVirtq *q = vu_get_queue(dev, qi->qidx); +- struct fuse_session *se = qi->virtio_dev->se; +- struct fuse_chan ch; +- struct fuse_buf fbuf; ++ GThreadPool *pool; + +- fbuf.mem = NULL; +- fbuf.flags = 0; +- +- fuse_mutex_init(&ch.lock); +- ch.fd = (int)0xdaff0d111; +- ch.qi = qi; ++ pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, ++ TRUE, NULL); ++ if (!pool) { ++ fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); ++ return NULL; ++ } + + fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, + qi->qidx, qi->kick_fd); +@@ -478,6 +636,7 @@ static void *fv_queue_thread(void *opaque) + /* Mutual exclusion with virtio_loop() */ + ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); + assert(ret == 0); /* there is no possible error case */ ++ pthread_mutex_lock(&qi->vq_lock); + /* out is from guest, in is too guest */ + unsigned int in_bytes, out_bytes; + vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); +@@ -486,141 +645,22 @@ static void *fv_queue_thread(void *opaque) + "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", + __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); + +- + while (1) { +- bool allocated_bufv = false; +- struct fuse_bufvec bufv; +- struct fuse_bufvec *pbufv; +- +- /* +- * An element contains one request and the space to send our +- * response They're spread over multiple descriptors in a +- * scatter/gather set and we can't trust the guest to keep them +- * still; so copy in/out. +- */ +- VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); +- if (!elem) { ++ FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest)); ++ if (!req) { + break; + } + +- qi->qe = elem; +- qi->reply_sent = false; ++ req->reply_sent = false; + +- if (!fbuf.mem) { +- fbuf.mem = malloc(se->bufsize); +- assert(fbuf.mem); +- assert(se->bufsize > sizeof(struct fuse_in_header)); +- } +- /* The 'out' part of the elem is from qemu */ +- unsigned int out_num = elem->out_num; +- struct iovec *out_sg = elem->out_sg; +- size_t out_len = iov_size(out_sg, out_num); +- fuse_log(FUSE_LOG_DEBUG, +- "%s: elem %d: with %d out desc of length %zd\n", __func__, +- elem->index, out_num, out_len); +- +- /* +- * The elem should contain a 'fuse_in_header' (in to fuse) +- * plus the data based on the len in the header. +- */ +- if (out_len < sizeof(struct fuse_in_header)) { +- fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", +- __func__, elem->index); +- assert(0); /* TODO */ +- } +- if (out_len > se->bufsize) { +- fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", +- __func__, elem->index); +- assert(0); /* TODO */ +- } +- /* Copy just the first element and look at it */ +- copy_from_iov(&fbuf, 1, out_sg); +- +- if (out_num > 2 && +- out_sg[0].iov_len == sizeof(struct fuse_in_header) && +- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && +- out_sg[1].iov_len == sizeof(struct fuse_write_in)) { +- /* +- * For a write we don't actually need to copy the +- * data, we can just do it straight out of guest memory +- * but we must still copy the headers in case the guest +- * was nasty and changed them while we were using them. +- */ +- fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); +- +- /* copy the fuse_write_in header after the fuse_in_header */ +- fbuf.mem += out_sg->iov_len; +- copy_from_iov(&fbuf, 1, out_sg + 1); +- fbuf.mem -= out_sg->iov_len; +- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; +- +- /* Allocate the bufv, with space for the rest of the iov */ +- allocated_bufv = true; +- pbufv = malloc(sizeof(struct fuse_bufvec) + +- sizeof(struct fuse_buf) * (out_num - 2)); +- if (!pbufv) { +- vu_queue_unpop(dev, q, elem, 0); +- free(elem); +- fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", +- __func__); +- goto out; +- } +- +- pbufv->count = 1; +- pbufv->buf[0] = fbuf; +- +- size_t iovindex, pbufvindex; +- iovindex = 2; /* 2 headers, separate iovs */ +- pbufvindex = 1; /* 2 headers, 1 fusebuf */ +- +- for (; iovindex < out_num; iovindex++, pbufvindex++) { +- pbufv->count++; +- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ +- pbufv->buf[pbufvindex].flags = 0; +- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; +- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; +- } +- } else { +- /* Normal (non fast write) path */ +- +- /* Copy the rest of the buffer */ +- fbuf.mem += out_sg->iov_len; +- copy_from_iov(&fbuf, out_num - 1, out_sg + 1); +- fbuf.mem -= out_sg->iov_len; +- fbuf.size = out_len; +- +- /* TODO! Endianness of header */ +- +- /* TODO: Add checks for fuse_session_exited */ +- bufv.buf[0] = fbuf; +- bufv.count = 1; +- pbufv = &bufv; +- } +- pbufv->idx = 0; +- pbufv->off = 0; +- fuse_session_process_buf_int(se, pbufv, &ch); +- +- if (allocated_bufv) { +- free(pbufv); +- } +- +- if (!qi->reply_sent) { +- fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", +- __func__, elem->index); +- /* I think we've still got to recycle the element */ +- vu_queue_push(dev, q, elem, 0); +- vu_queue_notify(dev, q); +- } +- qi->qe = NULL; +- free(elem); +- elem = NULL; ++ g_thread_pool_push(pool, req, NULL); + } + ++ pthread_mutex_unlock(&qi->vq_lock); + pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); + } +-out: +- pthread_mutex_destroy(&ch.lock); +- free(fbuf.mem); ++ ++ g_thread_pool_free(pool, FALSE, TRUE); + + return NULL; + } +@@ -643,6 +683,7 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) + fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", + __func__, qidx, ret); + } ++ pthread_mutex_destroy(&ourqi->vq_lock); + close(ourqi->kill_fd); + ourqi->kick_fd = -1; + free(vud->qi[qidx]); +@@ -696,6 +737,8 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) + + ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); + assert(ourqi->kill_fd != -1); ++ pthread_mutex_init(&ourqi->vq_lock, NULL); ++ + if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { + fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", + __func__, qidx); +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch b/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch new file mode 100644 index 0000000..181e32d --- /dev/null +++ b/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch @@ -0,0 +1,159 @@ +From a8a1835a82510be7d2d6edcc28a60e506a2cedad Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:46 +0100 +Subject: [PATCH 015/116] virtiofsd: remove mountpoint dummy argument +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-12-dgilbert@redhat.com> +Patchwork-id: 93466 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 011/112] virtiofsd: remove mountpoint dummy argument +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Classic FUSE file system daemons take a mountpoint argument but +virtiofsd exposes a vhost-user UNIX domain socket instead. The +mountpoint argument is not used by virtiofsd but the user is still +required to pass a dummy argument on the command-line. + +Remove the mountpoint argument to clean up the command-line. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 67aab02272f6cb47c56420f60b370c184961b5ca) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 2 +- + tools/virtiofsd/fuse_lowlevel.h | 4 +--- + tools/virtiofsd/helper.c | 20 +++----------------- + tools/virtiofsd/passthrough_ll.c | 12 ++---------- + 4 files changed, 7 insertions(+), 31 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 5c9cb52..2f32c68 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2455,7 +2455,7 @@ out1: + return NULL; + } + +-int fuse_session_mount(struct fuse_session *se, const char *mountpoint) ++int fuse_session_mount(struct fuse_session *se) + { + int fd; + +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index adb9054..8d8909b 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1863,7 +1863,6 @@ struct fuse_cmdline_opts { + int foreground; + int debug; + int nodefault_subtype; +- char *mountpoint; + int show_version; + int show_help; + unsigned int max_idle_threads; +@@ -1924,12 +1923,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + /** + * Mount a FUSE file system. + * +- * @param mountpoint the mount point path + * @param se session object + * + * @return 0 on success, -1 on failure. + **/ +-int fuse_session_mount(struct fuse_session *se, const char *mountpoint); ++int fuse_session_mount(struct fuse_session *se); + + /** + * Enter a single threaded, blocking event loop. +diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c +index 5711dd2..5e6f205 100644 +--- a/tools/virtiofsd/helper.c ++++ b/tools/virtiofsd/helper.c +@@ -140,27 +140,13 @@ void fuse_cmdline_help(void) + static int fuse_helper_opt_proc(void *data, const char *arg, int key, + struct fuse_args *outargs) + { ++ (void)data; + (void)outargs; +- struct fuse_cmdline_opts *opts = data; + + switch (key) { + case FUSE_OPT_KEY_NONOPT: +- if (!opts->mountpoint) { +- if (fuse_mnt_parse_fuse_fd(arg) != -1) { +- return fuse_opt_add_opt(&opts->mountpoint, arg); +- } +- +- char mountpoint[PATH_MAX] = ""; +- if (realpath(arg, mountpoint) == NULL) { +- fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, +- strerror(errno)); +- return -1; +- } +- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); +- } else { +- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); +- return -1; +- } ++ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); ++ return -1; + + default: + /* Pass through unknown options */ +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c5850ef..9377718 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -1297,7 +1297,7 @@ int main(int argc, char *argv[]) + return 1; + } + if (opts.show_help) { +- printf("usage: %s [options] \n\n", argv[0]); ++ printf("usage: %s [options]\n\n", argv[0]); + fuse_cmdline_help(); + fuse_lowlevel_help(); + ret = 0; +@@ -1308,13 +1308,6 @@ int main(int argc, char *argv[]) + goto err_out1; + } + +- if (opts.mountpoint == NULL) { +- printf("usage: %s [options] \n", argv[0]); +- printf(" %s --help\n", argv[0]); +- ret = 1; +- goto err_out1; +- } +- + if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { + return 1; + } +@@ -1374,7 +1367,7 @@ int main(int argc, char *argv[]) + goto err_out2; + } + +- if (fuse_session_mount(se, opts.mountpoint) != 0) { ++ if (fuse_session_mount(se) != 0) { + goto err_out3; + } + +@@ -1393,7 +1386,6 @@ err_out3: + err_out2: + fuse_session_destroy(se); + err_out1: +- free(opts.mountpoint); + fuse_opt_free_args(&args); + + if (lo.root.fd >= 0) { +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-remove-unused-notify-reply-support.patch b/kvm-virtiofsd-remove-unused-notify-reply-support.patch new file mode 100644 index 0000000..98fb968 --- /dev/null +++ b/kvm-virtiofsd-remove-unused-notify-reply-support.patch @@ -0,0 +1,294 @@ +From e5534c0d4b866f61dbafa8d2422a24ab956189c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:47 +0100 +Subject: [PATCH 016/116] virtiofsd: remove unused notify reply support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-13-dgilbert@redhat.com> +Patchwork-id: 93467 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 012/112] virtiofsd: remove unused notify reply support +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Notify reply support is unused by virtiofsd. The code would need to be +updated to validate input buffer sizes. Remove this unused code since +changes to it are untestable. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 64c6f408a29ef03e9b8da9f5a5d8fd511b0d801e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 147 +--------------------------------------- + tools/virtiofsd/fuse_lowlevel.h | 47 ------------- + 2 files changed, 1 insertion(+), 193 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 2f32c68..eb0ec49 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -31,12 +31,6 @@ + #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) + #define OFFSET_MAX 0x7fffffffffffffffLL + +-#define container_of(ptr, type, member) \ +- ({ \ +- const typeof(((type *)0)->member) *__mptr = (ptr); \ +- (type *)((char *)__mptr - offsetof(type, member)); \ +- }) +- + struct fuse_pollhandle { + uint64_t kh; + struct fuse_session *se; +@@ -1862,52 +1856,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + send_reply_ok(req, NULL, 0); + } + +-static void list_del_nreq(struct fuse_notify_req *nreq) +-{ +- struct fuse_notify_req *prev = nreq->prev; +- struct fuse_notify_req *next = nreq->next; +- prev->next = next; +- next->prev = prev; +-} +- +-static void list_add_nreq(struct fuse_notify_req *nreq, +- struct fuse_notify_req *next) +-{ +- struct fuse_notify_req *prev = next->prev; +- nreq->next = next; +- nreq->prev = prev; +- prev->next = nreq; +- next->prev = nreq; +-} +- +-static void list_init_nreq(struct fuse_notify_req *nreq) +-{ +- nreq->next = nreq; +- nreq->prev = nreq; +-} +- +-static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, +- const void *inarg, const struct fuse_buf *buf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_notify_req *nreq; +- struct fuse_notify_req *head; +- +- pthread_mutex_lock(&se->lock); +- head = &se->notify_list; +- for (nreq = head->next; nreq != head; nreq = nreq->next) { +- if (nreq->unique == req->unique) { +- list_del_nreq(nreq); +- break; +- } +- } +- pthread_mutex_unlock(&se->lock); +- +- if (nreq != head) { +- nreq->reply(nreq, req, nodeid, inarg, buf); +- } +-} +- + static int send_notify_iov(struct fuse_session *se, int notify_code, + struct iovec *iov, int count) + { +@@ -2059,95 +2007,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + return res; + } + +-struct fuse_retrieve_req { +- struct fuse_notify_req nreq; +- void *cookie; +-}; +- +-static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, +- fuse_ino_t ino, const void *inarg, +- const struct fuse_buf *ibuf) +-{ +- struct fuse_session *se = req->se; +- struct fuse_retrieve_req *rreq = +- container_of(nreq, struct fuse_retrieve_req, nreq); +- const struct fuse_notify_retrieve_in *arg = inarg; +- struct fuse_bufvec bufv = { +- .buf[0] = *ibuf, +- .count = 1, +- }; +- +- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- bufv.buf[0].mem = PARAM(arg); +- } +- +- bufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); +- +- if (bufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); +- fuse_reply_none(req); +- goto out; +- } +- bufv.buf[0].size = arg->size; +- +- if (se->op.retrieve_reply) { +- se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); +- } else { +- fuse_reply_none(req); +- } +-out: +- free(rreq); +-} +- +-int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie) +-{ +- struct fuse_notify_retrieve_out outarg; +- struct iovec iov[2]; +- struct fuse_retrieve_req *rreq; +- int err; +- +- if (!se) { +- return -EINVAL; +- } +- +- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { +- return -ENOSYS; +- } +- +- rreq = malloc(sizeof(*rreq)); +- if (rreq == NULL) { +- return -ENOMEM; +- } +- +- pthread_mutex_lock(&se->lock); +- rreq->cookie = cookie; +- rreq->nreq.unique = se->notify_ctr++; +- rreq->nreq.reply = fuse_ll_retrieve_reply; +- list_add_nreq(&rreq->nreq, &se->notify_list); +- pthread_mutex_unlock(&se->lock); +- +- outarg.notify_unique = rreq->nreq.unique; +- outarg.nodeid = ino; +- outarg.offset = offset; +- outarg.size = size; +- outarg.padding = 0; +- +- iov[1].iov_base = &outarg; +- iov[1].iov_len = sizeof(outarg); +- +- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); +- if (err) { +- pthread_mutex_lock(&se->lock); +- list_del_nreq(&rreq->nreq); +- pthread_mutex_unlock(&se->lock); +- free(rreq); +- } +- +- return err; +-} +- + void *fuse_req_userdata(fuse_req_t req) + { + return req->se->userdata; +@@ -2226,7 +2085,7 @@ static struct { + [FUSE_POLL] = { do_poll, "POLL" }, + [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, + [FUSE_DESTROY] = { do_destroy, "DESTROY" }, +- [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, ++ [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" }, + [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, + [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, + [FUSE_RENAME2] = { do_rename2, "RENAME2" }, +@@ -2333,8 +2192,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, + inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { + do_write_buf(req, in->nodeid, inarg, buf); +- } else if (in->opcode == FUSE_NOTIFY_REPLY) { +- do_notify_reply(req, in->nodeid, inarg, buf); + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + } +@@ -2437,8 +2294,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, + + list_init_req(&se->list); + list_init_req(&se->interrupts); +- list_init_nreq(&se->notify_list); +- se->notify_ctr = 1; + fuse_mutex_init(&se->lock); + + memcpy(&se->op, op, op_size); +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 8d8909b..12a84b4 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1085,21 +1085,6 @@ struct fuse_lowlevel_ops { + off_t off, struct fuse_file_info *fi); + + /** +- * Callback function for the retrieve request +- * +- * Valid replies: +- * fuse_reply_none +- * +- * @param req request handle +- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() +- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() +- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() +- * @param bufv the buffer containing the returned data +- */ +- void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, +- off_t offset, struct fuse_bufvec *bufv); +- +- /** + * Forget about multiple inodes + * + * See description of the forget function for more +@@ -1726,38 +1711,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, + int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, + off_t offset, struct fuse_bufvec *bufv, + enum fuse_buf_copy_flags flags); +-/** +- * Retrieve data from the kernel buffers +- * +- * Retrieve data in the kernel buffers belonging to the given inode. +- * If successful then the retrieve_reply() method will be called with +- * the returned data. +- * +- * Only present pages are returned in the retrieve reply. Retrieving +- * stops when it finds a non-present page and only data prior to that +- * is returned. +- * +- * If this function returns an error, then the retrieve will not be +- * completed and no reply will be sent. +- * +- * This function doesn't change the dirty state of pages in the kernel +- * buffer. For dirty pages the write() method will be called +- * regardless of having been retrieved previously. +- * +- * Added in FUSE protocol version 7.15. If the kernel does not support +- * this (or a newer) version, the function will return -ENOSYS and do +- * nothing. +- * +- * @param se the session object +- * @param ino the inode number +- * @param size the number of bytes to retrieve +- * @param offset the starting offset into the file to retrieve from +- * @param cookie user data to supply to the reply callback +- * @return zero for success, -errno for failure +- */ +-int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, +- size_t size, off_t offset, void *cookie); +- + + /* + * Utility functions +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch b/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch new file mode 100644 index 0000000..97a0db3 --- /dev/null +++ b/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch @@ -0,0 +1,139 @@ +From e01a6e68d799ed2af0ca3b04d75818ba62b18682 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:08 +0100 +Subject: [PATCH 097/116] virtiofsd: rename inode->refcount to inode->nlookup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-94-dgilbert@redhat.com> +Patchwork-id: 93547 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 093/112] virtiofsd: rename inode->refcount to inode->nlookup +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +This reference counter plays a specific role in the FUSE protocol. It's +not a generic object reference counter and the FUSE kernel code calls it +"nlookup". + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 1222f015558fc34cea02aa3a5a92de608c82cec8) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 2d703b5..c819b5f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -99,7 +99,20 @@ struct lo_inode { + int fd; + bool is_symlink; + struct lo_key key; +- uint64_t refcount; /* protected by lo->mutex */ ++ ++ /* ++ * This counter keeps the inode alive during the FUSE session. ++ * Incremented when the FUSE inode number is sent in a reply ++ * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is ++ * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. ++ * ++ * Note that this value is untrusted because the client can manipulate ++ * it arbitrarily using FUSE_FORGET requests. ++ * ++ * Protected by lo->mutex. ++ */ ++ uint64_t nlookup; ++ + fuse_ino_t fuse_ino; + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ +@@ -568,7 +581,7 @@ retry: + if (last == path) { + p = &lo->root; + pthread_mutex_lock(&lo->mutex); +- p->refcount++; ++ p->nlookup++; + pthread_mutex_unlock(&lo->mutex); + } else { + *last = '\0'; +@@ -786,8 +799,8 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) + pthread_mutex_lock(&lo->mutex); + p = g_hash_table_lookup(lo->inodes, &key); + if (p) { +- assert(p->refcount > 0); +- p->refcount++; ++ assert(p->nlookup > 0); ++ p->nlookup++; + } + pthread_mutex_unlock(&lo->mutex); + +@@ -855,7 +868,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + inode->is_symlink = S_ISLNK(e->attr.st_mode); +- inode->refcount = 1; ++ inode->nlookup = 1; + inode->fd = newfd; + newfd = -1; + inode->key.ino = e->attr.st_ino; +@@ -1112,7 +1125,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + } + + pthread_mutex_lock(&lo->mutex); +- inode->refcount++; ++ inode->nlookup++; + pthread_mutex_unlock(&lo->mutex); + e.ino = inode->fuse_ino; + +@@ -1193,9 +1206,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, + } + + pthread_mutex_lock(&lo->mutex); +- assert(inode->refcount >= n); +- inode->refcount -= n; +- if (!inode->refcount) { ++ assert(inode->nlookup >= n); ++ inode->nlookup -= n; ++ if (!inode->nlookup) { + lo_map_remove(&lo->ino_map, inode->fuse_ino); + g_hash_table_remove(lo->inodes, &inode->key); + if (g_hash_table_size(inode->posix_locks)) { +@@ -1216,7 +1229,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) + struct lo_inode *inode = value; + struct lo_data *lo = user_data; + +- inode->refcount = 0; ++ inode->nlookup = 0; + lo_map_remove(&lo->ino_map, inode->fuse_ino); + close(inode->fd); + +@@ -1241,7 +1254,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + } + + fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", +- (unsigned long long)ino, (unsigned long long)inode->refcount, ++ (unsigned long long)ino, (unsigned long long)inode->nlookup, + (unsigned long long)nlookup); + + unref_inode_lolocked(lo, inode, nlookup); +@@ -2609,7 +2622,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + root->fd = fd; + root->key.ino = stat.st_ino; + root->key.dev = stat.st_dev; +- root->refcount = 2; ++ root->nlookup = 2; + } + + static guint lo_key_hash(gconstpointer key) +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch b/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch new file mode 100644 index 0000000..95858f8 --- /dev/null +++ b/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch @@ -0,0 +1,94 @@ +From cfa4550f926e7a07757853f94273f2d1589cb9d3 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:48 +0100 +Subject: [PATCH 077/116] virtiofsd: rename unref_inode() to + unref_inode_lolocked() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-74-dgilbert@redhat.com> +Patchwork-id: 93526 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 073/112] virtiofsd: rename unref_inode() to unref_inode_lolocked() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Miklos Szeredi + +Signed-off-by: Miklos Szeredi +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 95d2715791c60b5dc2d22e4eb7b83217273296fa) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 8b1784f..de12e75 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -148,8 +148,8 @@ static const struct fuse_opt lo_opts[] = { + }; + static bool use_syslog = false; + static int current_log_level; +- +-static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); ++static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, ++ uint64_t n); + + static struct { + pthread_mutex_t mutex; +@@ -586,7 +586,7 @@ retry: + return 0; + + fail_unref: +- unref_inode(lo, p, 1); ++ unref_inode_lolocked(lo, p, 1); + fail: + if (retries) { + retries--; +@@ -624,7 +624,7 @@ fallback: + res = lo_parent_and_name(lo, inode, path, &parent); + if (res != -1) { + res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); +- unref_inode(lo, parent, 1); ++ unref_inode_lolocked(lo, parent, 1); + } + + return res; +@@ -1027,7 +1027,7 @@ fallback: + res = lo_parent_and_name(lo, inode, path, &parent); + if (res != -1) { + res = linkat(parent->fd, path, dfd, name, 0); +- unref_inode(lo, parent, 1); ++ unref_inode_lolocked(lo, parent, 1); + } + + return res; +@@ -1141,7 +1141,8 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + fuse_reply_err(req, res == -1 ? errno : 0); + } + +-static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) ++static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, ++ uint64_t n) + { + if (!inode) { + return; +@@ -1181,7 +1182,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) + (unsigned long long)ino, (unsigned long long)inode->refcount, + (unsigned long long)nlookup); + +- unref_inode(lo, inode, nlookup); ++ unref_inode_lolocked(lo, inode, nlookup); + } + + static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-sandbox-mount-namespace.patch b/kvm-virtiofsd-sandbox-mount-namespace.patch new file mode 100644 index 0000000..ab6f751 --- /dev/null +++ b/kvm-virtiofsd-sandbox-mount-namespace.patch @@ -0,0 +1,166 @@ +From c7ae38df696e4be432fd418c670dcea892b910a7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:27 +0100 +Subject: [PATCH 056/116] virtiofsd: sandbox mount namespace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-53-dgilbert@redhat.com> +Patchwork-id: 93504 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 052/112] virtiofsd: sandbox mount namespace +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Use a mount namespace with the shared directory tree mounted at "/" and +no other mounts. + +This prevents symlink escape attacks because symlink targets are +resolved only against the shared directory and cannot go outside it. + +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Peng Tao +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 89 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 89 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e2e2211..0570453 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1943,6 +1944,58 @@ static void print_capabilities(void) + printf("}\n"); + } + ++/* This magic is based on lxc's lxc_pivot_root() */ ++static void setup_pivot_root(const char *source) ++{ ++ int oldroot; ++ int newroot; ++ ++ oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); ++ if (oldroot < 0) { ++ fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); ++ exit(1); ++ } ++ ++ newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); ++ if (newroot < 0) { ++ fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); ++ exit(1); ++ } ++ ++ if (fchdir(newroot) < 0) { ++ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); ++ exit(1); ++ } ++ ++ if (syscall(__NR_pivot_root, ".", ".") < 0) { ++ fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); ++ exit(1); ++ } ++ ++ if (fchdir(oldroot) < 0) { ++ fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); ++ exit(1); ++ } ++ ++ if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); ++ exit(1); ++ } ++ ++ if (umount2(".", MNT_DETACH) < 0) { ++ fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); ++ exit(1); ++ } ++ ++ if (fchdir(newroot) < 0) { ++ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); ++ exit(1); ++ } ++ ++ close(newroot); ++ close(oldroot); ++} ++ + static void setup_proc_self_fd(struct lo_data *lo) + { + lo->proc_self_fd = open("/proc/self/fd", O_PATH); +@@ -1952,6 +2005,39 @@ static void setup_proc_self_fd(struct lo_data *lo) + } + } + ++/* ++ * Make the source directory our root so symlinks cannot escape and no other ++ * files are accessible. ++ */ ++static void setup_mount_namespace(const char *source) ++{ ++ if (unshare(CLONE_NEWNS) != 0) { ++ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); ++ exit(1); ++ } ++ ++ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); ++ exit(1); ++ } ++ ++ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { ++ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); ++ exit(1); ++ } ++ ++ setup_pivot_root(source); ++} ++ ++/* ++ * Lock down this process to prevent access to other processes or files outside ++ * source directory. This reduces the impact of arbitrary code execution bugs. ++ */ ++static void setup_sandbox(struct lo_data *lo) ++{ ++ setup_mount_namespace(lo->source); ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2052,6 +2138,7 @@ int main(int argc, char *argv[]) + } + + lo.root.fd = open(lo.source, O_PATH); ++ + if (lo.root.fd == -1) { + fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); + exit(1); +@@ -2075,6 +2162,8 @@ int main(int argc, char *argv[]) + /* Must be after daemonize to get the right /proc/self/fd */ + setup_proc_self_fd(&lo); + ++ setup_sandbox(&lo); ++ + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch b/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch new file mode 100644 index 0000000..e54248c --- /dev/null +++ b/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch @@ -0,0 +1,93 @@ +From 4cc435b3a8a9a419cc85ee883d5184f810f91e52 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:34 +0100 +Subject: [PATCH 063/116] virtiofsd: set maximum RLIMIT_NOFILE limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-60-dgilbert@redhat.com> +Patchwork-id: 93516 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 059/112] virtiofsd: set maximum RLIMIT_NOFILE limit +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +virtiofsd can exceed the default open file descriptor limit easily on +most systems. Take advantage of the fact that it runs as root to raise +the limit. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 01a6dc95ec7f71eeff9963fe3cb03d85225fba3e) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index d53cb1e..c281d81 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -53,6 +53,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2268,6 +2269,35 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) + setup_seccomp(); + } + ++/* Raise the maximum number of open file descriptors */ ++static void setup_nofile_rlimit(void) ++{ ++ const rlim_t max_fds = 1000000; ++ struct rlimit rlim; ++ ++ if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { ++ fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); ++ exit(1); ++ } ++ ++ if (rlim.rlim_cur >= max_fds) { ++ return; /* nothing to do */ ++ } ++ ++ rlim.rlim_cur = max_fds; ++ rlim.rlim_max = max_fds; ++ ++ if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { ++ /* Ignore SELinux denials */ ++ if (errno == EPERM) { ++ return; ++ } ++ ++ fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); ++ exit(1); ++ } ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); +@@ -2389,6 +2419,8 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + ++ setup_nofile_rlimit(); ++ + /* Must be before sandbox since it wants /proc */ + setup_capng(); + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch b/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch new file mode 100644 index 0000000..be6b244 --- /dev/null +++ b/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch @@ -0,0 +1,72 @@ +From 06a24b54c94345b436d888a48b92fafa967c3d58 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:25 +0100 +Subject: [PATCH 114/116] virtiofsd: stop all queue threads on exit in + virtio_loop() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-111-dgilbert@redhat.com> +Patchwork-id: 93564 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 110/112] virtiofsd: stop all queue threads on exit in virtio_loop() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Eryu Guan + +On guest graceful shutdown, virtiofsd receives VHOST_USER_GET_VRING_BASE +request from VMM and shuts down virtqueues by calling fv_set_started(), +which joins fv_queue_thread() threads. So when virtio_loop() returns, +there should be no thread is still accessing data in fuse session and/or +virtio dev. + +But on abnormal exit, e.g. guest got killed for whatever reason, +vhost-user socket is closed and virtio_loop() breaks out the main loop +and returns to main(). But it's possible fv_queue_worker()s are still +working and accessing fuse session and virtio dev, which results in +crash or use-after-free. + +Fix it by stopping fv_queue_thread()s before virtio_loop() returns, +to make sure there's no-one could access fuse session and virtio dev. + +Reported-by: Qingming Su +Signed-off-by: Eryu Guan +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9883df8ccae6d744a0c8d9cbf9d62b1797d70ebd) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_virtio.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 9f65823..80a6e92 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -815,6 +815,19 @@ int virtio_loop(struct fuse_session *se) + } + } + ++ /* ++ * Make sure all fv_queue_thread()s quit on exit, as we're about to ++ * free virtio dev and fuse session, no one should access them anymore. ++ */ ++ for (int i = 0; i < se->virtio_dev->nqueues; i++) { ++ if (!se->virtio_dev->qi[i]) { ++ continue; ++ } ++ ++ fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i); ++ fv_queue_cleanup_thread(se->virtio_dev, i); ++ } ++ + fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); + + return 0; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch b/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch new file mode 100644 index 0000000..f595ffa --- /dev/null +++ b/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch @@ -0,0 +1,83 @@ +From 1744329bcba4a3e1a82cec3b1a34b3fbf0a9d7cf Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:54 +0100 +Subject: [PATCH 083/116] virtiofsd: support nanosecond resolution for file + timestamp +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-80-dgilbert@redhat.com> +Patchwork-id: 93535 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 079/112] virtiofsd: support nanosecond resolution for file timestamp +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Jiufei Xue + +Define HAVE_STRUCT_STAT_ST_ATIM to 1 if `st_atim' is member of `struct +stat' which means support nanosecond resolution for the file timestamp +fields. + +Signed-off-by: Jiufei Xue +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8a792b034d4b315251fd842bb4c73a133aa1368f) +Signed-off-by: Miroslav Rezanina +--- + configure | 16 ++++++++++++++++ + tools/virtiofsd/fuse_misc.h | 1 + + 2 files changed, 17 insertions(+) + +diff --git a/configure b/configure +index 7831618..5120c14 100755 +--- a/configure ++++ b/configure +@@ -5218,6 +5218,19 @@ if compile_prog "" "" ; then + strchrnul=yes + fi + ++######################################### ++# check if we have st_atim ++ ++st_atim=no ++cat > $TMPC << EOF ++#include ++#include ++int main(void) { return offsetof(struct stat, st_atim); } ++EOF ++if compile_prog "" "" ; then ++ st_atim=yes ++fi ++ + ########################################## + # check if trace backend exists + +@@ -6919,6 +6932,9 @@ fi + if test "$strchrnul" = "yes" ; then + echo "HAVE_STRCHRNUL=y" >> $config_host_mak + fi ++if test "$st_atim" = "yes" ; then ++ echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak ++fi + if test "$byteswap_h" = "yes" ; then + echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak + fi +diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h +index f252baa..5c618ce 100644 +--- a/tools/virtiofsd/fuse_misc.h ++++ b/tools/virtiofsd/fuse_misc.h +@@ -7,6 +7,7 @@ + */ + + #include ++#include "config-host.h" + + /* + * Versioned symbols cannot be used in some cases because it +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch b/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch new file mode 100644 index 0000000..1bae1bf --- /dev/null +++ b/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch @@ -0,0 +1,82 @@ +From 7bc27a767bc8c78b1bca46bbe5e1d53dcd7173b4 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:18 +0100 +Subject: [PATCH 107/116] virtiofsd: use fuse_buf_writev to replace + fuse_buf_write for better performance +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-104-dgilbert@redhat.com> +Patchwork-id: 93558 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 103/112] virtiofsd: use fuse_buf_writev to replace fuse_buf_write for better performance +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: piaojun + +fuse_buf_writev() only handles the normal write in which src is buffer +and dest is fd. Specially if src buffer represents guest physical +address that can't be mapped by the daemon process, IO must be bounced +back to the VMM to do it by fuse_buf_copy(). + +Signed-off-by: Jun Piao +Suggested-by: Dr. David Alan Gilbert +Suggested-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c465bba2c90a810f6e71e4f2646b1b4ee4b478de) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/buffer.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c +index 37befeb..27c1377 100644 +--- a/tools/virtiofsd/buffer.c ++++ b/tools/virtiofsd/buffer.c +@@ -34,7 +34,6 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) + return size; + } + +-__attribute__((unused)) + static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, + struct fuse_bufvec *in_buf) + { +@@ -262,12 +261,29 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) + + ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) + { +- size_t copied = 0; ++ size_t copied = 0, i; + + if (dstv == srcv) { + return fuse_buf_size(dstv); + } + ++ /* ++ * use writev to improve bandwidth when all the ++ * src buffers already mapped by the daemon ++ * process ++ */ ++ for (i = 0; i < srcv->count; i++) { ++ if (srcv->buf[i].flags & FUSE_BUF_IS_FD) { ++ break; ++ } ++ } ++ if ((i == srcv->count) && (dstv->count == 1) && ++ (dstv->idx == 0) && ++ (dstv->buf[0].flags & FUSE_BUF_IS_FD)) { ++ dstv->buf[0].pos += dstv->off; ++ return fuse_buf_writev(&dstv->buf[0], srcv); ++ } ++ + for (;;) { + const struct fuse_buf *src = fuse_bufvec_current(srcv); + const struct fuse_buf *dst = fuse_bufvec_current(dstv); +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch b/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch new file mode 100644 index 0000000..feffb5e --- /dev/null +++ b/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch @@ -0,0 +1,56 @@ +From 1724f54070d33d8070ba2d22c8fac87ea65814c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:02:04 +0100 +Subject: [PATCH 093/116] virtiofsd: use fuse_lowlevel_is_virtio() in + fuse_session_destroy() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-90-dgilbert@redhat.com> +Patchwork-id: 93540 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 089/112] virtiofsd: use fuse_lowlevel_is_virtio() in fuse_session_destroy() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +vu_socket_path is NULL when --fd=FDNUM was used. Use +fuse_lowlevel_is_virtio() instead. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 620e9d8d9cee6df7fe71168dea950dba0cc21a4a) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 70568d2..dab6a31 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2537,12 +2537,13 @@ void fuse_session_destroy(struct fuse_session *se) + close(se->fd); + } + +- if (se->vu_socket_path) { ++ if (fuse_lowlevel_is_virtio(se)) { + virtio_session_close(se); +- free(se->vu_socket_path); +- se->vu_socket_path = NULL; + } + ++ free(se->vu_socket_path); ++ se->vu_socket_path = NULL; ++ + free(se); + } + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch b/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch new file mode 100644 index 0000000..f250ed7 --- /dev/null +++ b/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch @@ -0,0 +1,390 @@ +From bce5070d1aada88154b811a08eec1586ab24fce5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:26 +0100 +Subject: [PATCH 055/116] virtiofsd: use /proc/self/fd/ O_PATH file descriptor +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-52-dgilbert@redhat.com> +Patchwork-id: 93506 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 051/112] virtiofsd: use /proc/self/fd/ O_PATH file descriptor +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Sandboxing will remove /proc from the mount namespace so we can no +longer build string paths into "/proc/self/fd/...". + +Keep an O_PATH file descriptor so we can still re-open fds via +/proc/self/fd. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 9f59d175e2ca96f0b87f534dba69ea547dd35945) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 130 +++++++++++++++++++++++++++++++-------- + 1 file changed, 103 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e3d65c3..e2e2211 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -110,6 +110,9 @@ struct lo_data { + struct lo_map ino_map; /* protected by lo->mutex */ + struct lo_map dirp_map; /* protected by lo->mutex */ + struct lo_map fd_map; /* protected by lo->mutex */ ++ ++ /* An O_PATH file descriptor to /proc/self/fd/ */ ++ int proc_self_fd; + }; + + static const struct fuse_opt lo_opts[] = { +@@ -379,9 +382,9 @@ static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, + int res; + + retry: +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); + +- res = readlink(procname, path, PATH_MAX); ++ res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX); + if (res < 0) { + fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); + goto fail_noretry; +@@ -477,9 +480,9 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, + } + return res; + } +- sprintf(path, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "%i", inode->fd); + +- return utimensat(AT_FDCWD, path, tv, 0); ++ return utimensat(lo->proc_self_fd, path, tv, 0); + + fallback: + res = lo_parent_and_name(lo, inode, path, &parent); +@@ -535,8 +538,8 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + res = fchmod(fd, attr->st_mode); + } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = chmod(procname, attr->st_mode); ++ sprintf(procname, "%i", ifd); ++ res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); + } + if (res == -1) { + goto out_err; +@@ -552,11 +555,23 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + } + if (valid & FUSE_SET_ATTR_SIZE) { ++ int truncfd; ++ + if (fi) { +- res = ftruncate(fd, attr->st_size); ++ truncfd = fd; + } else { +- sprintf(procname, "/proc/self/fd/%i", ifd); +- res = truncate(procname, attr->st_size); ++ sprintf(procname, "%i", ifd); ++ truncfd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (truncfd < 0) { ++ goto out_err; ++ } ++ } ++ ++ res = ftruncate(truncfd, attr->st_size); ++ if (!fi) { ++ saverr = errno; ++ close(truncfd); ++ errno = saverr; + } + if (res == -1) { + goto out_err; +@@ -874,9 +889,9 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, + return res; + } + +- sprintf(path, "/proc/self/fd/%i", inode->fd); ++ sprintf(path, "%i", inode->fd); + +- return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); ++ return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW); + + fallback: + res = lo_parent_and_name(lo, inode, path, &parent); +@@ -1404,8 +1419,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + fi->flags &= ~O_APPEND; + } + +- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); +- fd = open(buf, fi->flags & ~O_NOFOLLOW); ++ sprintf(buf, "%i", lo_fd(req, ino)); ++ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); + if (fd == -1) { + return (void)fuse_reply_err(req, errno); + } +@@ -1458,7 +1473,6 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { + int res; +- (void)ino; + int fd; + char *buf; + +@@ -1466,12 +1480,14 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + (void *)fi); + + if (!fi) { +- res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); ++ struct lo_data *lo = lo_data(req); ++ ++ res = asprintf(&buf, "%i", lo_fd(req, ino)); + if (res == -1) { + return (void)fuse_reply_err(req, errno); + } + +- fd = open(buf, O_RDWR); ++ fd = openat(lo->proc_self_fd, buf, O_RDWR); + free(buf); + if (fd == -1) { + return (void)fuse_reply_err(req, errno); +@@ -1587,11 +1603,13 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, + static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + size_t size) + { ++ struct lo_data *lo = lo_data(req); + char *value = NULL; + char procname[64]; + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1616,7 +1634,11 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } + + if (size) { + value = malloc(size); +@@ -1624,7 +1646,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out_err; + } + +- ret = getxattr(procname, name, value, size); ++ ret = fgetxattr(fd, name, value, size); + if (ret == -1) { + goto out_err; + } +@@ -1635,7 +1657,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + + fuse_reply_buf(req, value, ret); + } else { +- ret = getxattr(procname, name, NULL, 0); ++ ret = fgetxattr(fd, name, NULL, 0); + if (ret == -1) { + goto out_err; + } +@@ -1644,6 +1666,10 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + } + out_free: + free(value); ++ ++ if (fd >= 0) { ++ close(fd); ++ } + return; + + out_err: +@@ -1655,11 +1681,13 @@ out: + + static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + { ++ struct lo_data *lo = lo_data(req); + char *value = NULL; + char procname[64]; + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1683,7 +1711,11 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } + + if (size) { + value = malloc(size); +@@ -1691,7 +1723,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out_err; + } + +- ret = listxattr(procname, value, size); ++ ret = flistxattr(fd, value, size); + if (ret == -1) { + goto out_err; + } +@@ -1702,7 +1734,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + + fuse_reply_buf(req, value, ret); + } else { +- ret = listxattr(procname, NULL, 0); ++ ret = flistxattr(fd, NULL, 0); + if (ret == -1) { + goto out_err; + } +@@ -1711,6 +1743,10 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + } + out_free: + free(value); ++ ++ if (fd >= 0) { ++ close(fd); ++ } + return; + + out_err: +@@ -1724,9 +1760,11 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, int flags) + { + char procname[64]; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1751,21 +1789,31 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } + +- ret = setxattr(procname, name, value, size, flags); ++ ret = fsetxattr(fd, name, value, size, flags); + saverr = ret == -1 ? errno : 0; + + out: ++ if (fd >= 0) { ++ close(fd); ++ } + fuse_reply_err(req, saverr); + } + + static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + { + char procname[64]; ++ struct lo_data *lo = lo_data(req); + struct lo_inode *inode; + ssize_t ret; + int saverr; ++ int fd = -1; + + inode = lo_inode(req, ino); + if (!inode) { +@@ -1789,12 +1837,20 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + goto out; + } + +- sprintf(procname, "/proc/self/fd/%i", inode->fd); ++ sprintf(procname, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, procname, O_RDWR); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } + +- ret = removexattr(procname, name); ++ ret = fremovexattr(fd, name); + saverr = ret == -1 ? errno : 0; + + out: ++ if (fd >= 0) { ++ close(fd); ++ } + fuse_reply_err(req, saverr); + } + +@@ -1887,12 +1943,25 @@ static void print_capabilities(void) + printf("}\n"); + } + ++static void setup_proc_self_fd(struct lo_data *lo) ++{ ++ lo->proc_self_fd = open("/proc/self/fd", O_PATH); ++ if (lo->proc_self_fd == -1) { ++ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); ++ exit(1); ++ } ++} ++ + int main(int argc, char *argv[]) + { + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); + struct fuse_session *se; + struct fuse_cmdline_opts opts; +- struct lo_data lo = { .debug = 0, .writeback = 0 }; ++ struct lo_data lo = { ++ .debug = 0, ++ .writeback = 0, ++ .proc_self_fd = -1, ++ }; + struct lo_map_elem *root_elem; + int ret = -1; + +@@ -2003,6 +2072,9 @@ int main(int argc, char *argv[]) + + fuse_daemonize(opts.foreground); + ++ /* Must be after daemonize to get the right /proc/self/fd */ ++ setup_proc_self_fd(&lo); ++ + /* Block until ctrl+c or fusermount -u */ + ret = virtio_loop(se); + +@@ -2018,6 +2090,10 @@ err_out1: + lo_map_destroy(&lo.dirp_map); + lo_map_destroy(&lo.ino_map); + ++ if (lo.proc_self_fd >= 0) { ++ close(lo.proc_self_fd); ++ } ++ + if (lo.root.fd >= 0) { + close(lo.root.fd); + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch b/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch new file mode 100644 index 0000000..d60a902 --- /dev/null +++ b/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch @@ -0,0 +1,137 @@ +From 6877a6c456178d6c1ca9a0ffaabaa7e51105b2ac Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:22 +0100 +Subject: [PATCH 051/116] virtiofsd: validate input buffer sizes in + do_write_buf() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-48-dgilbert@redhat.com> +Patchwork-id: 93501 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 047/112] virtiofsd: validate input buffer sizes in do_write_buf() +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +There is a small change in behavior: if fuse_write_in->size doesn't +match the input buffer size then the request is failed. Previously +write requests with 1 fuse_buf element would truncate to +fuse_write_in->size. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Sergio Lopez +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 0ba8c3c6fce8fe949d59c1fd84d98d220ef9e759) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/fuse_lowlevel.c | 49 +++++++++++++++++++++++++---------------- + 1 file changed, 30 insertions(+), 19 deletions(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 7e10995..611e8b0 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1003,8 +1003,8 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) + } + } + +-static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, +- struct fuse_bufvec *ibufv) ++static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, ++ struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv) + { + struct fuse_session *se = req->se; + struct fuse_bufvec *pbufv = ibufv; +@@ -1012,28 +1012,27 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + .buf[0] = ibufv->buf[0], + .count = 1, + }; +- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; ++ struct fuse_write_in *arg; ++ size_t arg_size = sizeof(*arg); + struct fuse_file_info fi; + + memset(&fi, 0, sizeof(fi)); ++ ++ arg = fuse_mbuf_iter_advance(iter, arg_size); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ ++ fi.lock_owner = arg->lock_owner; ++ fi.flags = arg->flags; + fi.fh = arg->fh; + fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; + + if (ibufv->count == 1) { +- fi.lock_owner = arg->lock_owner; +- fi.flags = arg->flags; +- if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { +- tmpbufv.buf[0].mem = PARAM(arg); +- } +- tmpbufv.buf[0].size -= +- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); +- if (tmpbufv.buf[0].size < arg->size) { +- fuse_log(FUSE_LOG_ERR, +- "fuse: do_write_buf: buffer size too small\n"); +- fuse_reply_err(req, EIO); +- return; +- } +- tmpbufv.buf[0].size = arg->size; ++ assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); ++ tmpbufv.buf[0].mem = ((char *)arg) + arg_size; ++ tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size; + pbufv = &tmpbufv; + } else { + /* +@@ -1043,6 +1042,13 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, + ibufv->buf[0].size = 0; + } + ++ if (fuse_buf_size(pbufv) != arg->size) { ++ fuse_log(FUSE_LOG_ERR, ++ "fuse: do_write_buf: buffer size doesn't match arg->size\n"); ++ fuse_reply_err(req, EIO); ++ return; ++ } ++ + se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); + } + +@@ -2052,12 +2058,17 @@ void fuse_session_process_buf_int(struct fuse_session *se, + struct fuse_chan *ch) + { + const struct fuse_buf *buf = bufv->buf; ++ struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); + struct fuse_in_header *in; + const void *inarg; + struct fuse_req *req; + int err; + +- in = buf->mem; ++ /* The first buffer must be a memory buffer */ ++ assert(!(buf->flags & FUSE_BUF_IS_FD)); ++ ++ in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); ++ assert(in); /* caller guarantees the input buffer is large enough */ + + if (se->debug) { + fuse_log(FUSE_LOG_DEBUG, +@@ -2129,7 +2140,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, + + inarg = (void *)&in[1]; + if (in->opcode == FUSE_WRITE && se->op.write_buf) { +- do_write_buf(req, in->nodeid, inarg, bufv); ++ do_write_buf(req, in->nodeid, &iter, bufv); + } else { + fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-validate-path-components.patch b/kvm-virtiofsd-validate-path-components.patch new file mode 100644 index 0000000..b35aed7 --- /dev/null +++ b/kvm-virtiofsd-validate-path-components.patch @@ -0,0 +1,164 @@ +From 69ac47502848c37ca3ede00f432c0675d9eef42c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:01:18 +0100 +Subject: [PATCH 047/116] virtiofsd: validate path components +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-44-dgilbert@redhat.com> +Patchwork-id: 93498 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 043/112] virtiofsd: validate path components +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Stefan Hajnoczi + +Several FUSE requests contain single path components. A correct FUSE +client sends well-formed path components but there is currently no input +validation in case something went wrong or the client is malicious. + +Refuse ".", "..", and paths containing '/' when we expect a path +component. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 25dae28c58d7e706b5d5db99042c9db3cef2e657) +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 59 ++++++++++++++++++++++++++++++++++++---- + 1 file changed, 53 insertions(+), 6 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index ac380ef..e375406 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -133,6 +133,21 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); + + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); + ++static int is_dot_or_dotdot(const char *name) ++{ ++ return name[0] == '.' && ++ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); ++} ++ ++/* Is `path` a single path component that is not "." or ".."? */ ++static int is_safe_path_component(const char *path) ++{ ++ if (strchr(path, '/')) { ++ return 0; ++ } ++ ++ return !is_dot_or_dotdot(path); ++} + + static struct lo_data *lo_data(fuse_req_t req) + { +@@ -681,6 +696,15 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + parent, name); + } + ++ /* ++ * Don't use is_safe_path_component(), allow "." and ".." for NFS export ++ * support. ++ */ ++ if (strchr(name, '/')) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + err = lo_do_lookup(req, parent, name, &e); + if (err) { + fuse_reply_err(req, err); +@@ -762,6 +786,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + struct fuse_entry_param e; + struct lo_cred old = {}; + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + dir = lo_inode(req, parent); + if (!dir) { + fuse_reply_err(req, EBADF); +@@ -863,6 +892,11 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + struct fuse_entry_param e; + int saverr; + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + inode = lo_inode(req, ino); + if (!inode) { + fuse_reply_err(req, EBADF); +@@ -904,6 +938,10 @@ out_err: + static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); + +@@ -916,6 +954,11 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + { + int res; + ++ if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + if (flags) { + fuse_reply_err(req, EINVAL); + return; +@@ -930,6 +973,11 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) + { + int res; + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + res = unlinkat(lo_fd(req, parent), name, 0); + + fuse_reply_err(req, res == -1 ? errno : 0); +@@ -1093,12 +1141,6 @@ out_err: + fuse_reply_err(req, error); + } + +-static int is_dot_or_dotdot(const char *name) +-{ +- return name[0] == '.' && +- (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); +-} +- + static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, struct fuse_file_info *fi, int plus) + { +@@ -1248,6 +1290,11 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + parent, name); + } + ++ if (!is_safe_path_component(name)) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } ++ + err = lo_change_cred(req, &old); + if (err) { + goto out; +-- +1.8.3.1 + diff --git a/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch b/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch new file mode 100644 index 0000000..20add81 --- /dev/null +++ b/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch @@ -0,0 +1,56 @@ +From 247987aa987b7332eb501e00c440079b9e8e1fe7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 27 Jan 2020 19:00:52 +0100 +Subject: [PATCH 021/116] vitriofsd/passthrough_ll: fix fallocate() ifdefs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200127190227.40942-18-dgilbert@redhat.com> +Patchwork-id: 93471 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 017/112] vitriofsd/passthrough_ll: fix fallocate() ifdefs +Bugzilla: 1694164 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual + +From: Xiao Yang + +1) Use correct CONFIG_FALLOCATE macro to check if fallocate() is supported.(i.e configure + script sets CONFIG_FALLOCATE intead of HAVE_FALLOCATE if fallocate() is supported) +2) Replace HAVE_POSIX_FALLOCATE with CONFIG_POSIX_FALLOCATE. + +Signed-off-by: Xiao Yang +Signed-off-by: Dr. David Alan Gilbert + Merged from two of Xiao Yang's patches +(cherry picked from commit 9776457ca6f05d5900e27decb1dba2ffddf95a22) + +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_ll.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 322a889..6c4da18 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -975,13 +975,13 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, + int err = EOPNOTSUPP; + (void)ino; + +-#ifdef HAVE_FALLOCATE ++#ifdef CONFIG_FALLOCATE + err = fallocate(fi->fh, mode, offset, length); + if (err < 0) { + err = errno; + } + +-#elif defined(HAVE_POSIX_FALLOCATE) ++#elif defined(CONFIG_POSIX_FALLOCATE) + if (mode) { + fuse_reply_err(req, EOPNOTSUPP); + return; +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 63526ad..7ecca51 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 7%{?dist} +Release: 8%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -143,6 +143,236 @@ Patch30: kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch Patch31: kvm-slirp-use-correct-size-while-emulating-commands.patch # For bz#1559846 - Nested KVM: limit VMX features according to CPU models - Fast Train Patch32: kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch +# For bz#1725084 - aarch64: support dumping SVE registers +Patch33: kvm-target-arm-arch_dump-Add-SVE-notes.patch +# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic +Patch34: kvm-vhost-Add-names-to-section-rounded-warning.patch +# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic +Patch35: kvm-vhost-Only-align-sections-for-vhost-user.patch +# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic +Patch36: kvm-vhost-coding-style-fix.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch37: kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch38: kvm-vhost-user-fs-remove-vhostfd-property.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch39: kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch40: kvm-virtiofsd-Pull-in-upstream-headers.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch41: kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch42: kvm-virtiofsd-Add-auxiliary-.c-s.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch43: kvm-virtiofsd-Add-fuse_lowlevel.c.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch44: kvm-virtiofsd-Add-passthrough_ll.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch45: kvm-virtiofsd-Trim-down-imported-files.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch46: kvm-virtiofsd-Format-imported-files-to-qemu-style.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch47: kvm-virtiofsd-remove-mountpoint-dummy-argument.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch48: kvm-virtiofsd-remove-unused-notify-reply-support.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch49: kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch50: kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch51: kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch52: kvm-virtiofsd-Trim-out-compatibility-code.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch53: kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch54: kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch55: kvm-virtiofsd-Add-options-for-virtio.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch56: kvm-virtiofsd-add-o-source-PATH-to-help-output.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch57: kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch58: kvm-virtiofsd-Start-wiring-up-vhost-user.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch59: kvm-virtiofsd-Add-main-virtio-loop.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch60: kvm-virtiofsd-get-set-features-callbacks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch61: kvm-virtiofsd-Start-queue-threads.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch62: kvm-virtiofsd-Poll-kick_fd-for-queue.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch63: kvm-virtiofsd-Start-reading-commands-from-queue.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch64: kvm-virtiofsd-Send-replies-to-messages.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch65: kvm-virtiofsd-Keep-track-of-replies.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch66: kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch67: kvm-virtiofsd-Fast-path-for-virtio-read.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch68: kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch69: kvm-virtiofsd-make-f-foreground-the-default.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch70: kvm-virtiofsd-add-vhost-user.json-file.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch71: kvm-virtiofsd-add-print-capabilities-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch72: kvm-virtiofs-Add-maintainers-entry.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch73: kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch74: kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch75: kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch76: kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch77: kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch78: kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch79: kvm-virtiofsd-validate-path-components.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch80: kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch81: kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch82: kvm-virtiofsd-add-fuse_mbuf_iter-API.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch83: kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch84: kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch85: kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch86: kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch87: kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch88: kvm-virtiofsd-sandbox-mount-namespace.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch89: kvm-virtiofsd-move-to-an-empty-network-namespace.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch90: kvm-virtiofsd-move-to-a-new-pid-namespace.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch91: kvm-virtiofsd-add-seccomp-whitelist.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch92: kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch93: kvm-virtiofsd-cap-ng-helpers.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch94: kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch95: kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch96: kvm-virtiofsd-fix-libfuse-information-leaks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch97: kvm-virtiofsd-add-syslog-command-line-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch98: kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch99: kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch100: kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch101: kvm-virtiofsd-Handle-reinit.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch102: kvm-virtiofsd-Handle-hard-reboot.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch103: kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch104: kvm-vhost-user-Print-unexpected-slave-message-types.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch105: kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch106: kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch107: kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch108: kvm-virtiofsd-passthrough_ll-control-readdirplus.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch109: kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch110: kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch111: kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch112: kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch113: kvm-virtiofsd-passthrough_ll-use-hashtable.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch114: kvm-virtiofsd-Clean-up-inodes-on-destroy.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch115: kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch116: kvm-virtiofsd-fix-error-handling-in-main.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch117: kvm-virtiofsd-cleanup-allocated-resource-in-se.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch118: kvm-virtiofsd-fix-memory-leak-on-lo.source.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch119: kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch120: kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch121: kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch122: kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch123: kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch124: kvm-virtiofsd-Support-remote-posix-locks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch125: kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch126: kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch127: kvm-virtiofsd-make-lo_release-atomic.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch128: kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch129: kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch130: kvm-libvhost-user-Fix-some-memtable-remap-cases.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch131: kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch132: kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch133: kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch134: kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch135: kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch136: kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch137: kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch138: kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch139: kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch140: kvm-virtiofsd-process-requests-in-a-thread-pool.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch141: kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch142: kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch143: kvm-virtiofsd-add-thread-pool-size-NUM-option.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch144: kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch145: kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch146: kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch +# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) +Patch147: kvm-virtiofsd-add-some-options-to-the-help-message.patch BuildRequires: wget BuildRequires: rpm-build @@ -197,8 +427,8 @@ BuildRequires: bluez-libs-devel BuildRequires: brlapi-devel # For test suite BuildRequires: check-devel -# For virtfs -BuildRequires: libcap-devel +# For virtiofs +BuildRequires: libcap-ng-devel # Hard requirement for version >= 1.3 BuildRequires: pixman-devel # Documentation requirement @@ -520,7 +750,7 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --tls-priority=NORMAL \ --disable-bluez \ --disable-brlapi \ - --disable-cap-ng \ + --enable-cap-ng \ --enable-coroutine-pool \ --enable-curl \ --disable-curses \ @@ -1034,6 +1264,8 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_libexecdir}/vhost-user-gpu %{_datadir}/%{name}/vhost-user/50-qemu-gpu.json %endif +%{_libexecdir}/virtiofsd +%{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json %files -n qemu-img %defattr(-,root,root) @@ -1077,6 +1309,130 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Jan 31 2020 Miroslav Rezanina - 4.2.0-8.el8 +- kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084] +- kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041] +- kvm-vhost-Only-align-sections-for-vhost-user.patch [bz#1779041] +- kvm-vhost-coding-style-fix.patch [bz#1779041] +- kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch [bz#1694164] +- kvm-vhost-user-fs-remove-vhostfd-property.patch [bz#1694164] +- kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-upstream-headers.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch [bz#1694164] +- kvm-virtiofsd-Add-auxiliary-.c-s.patch [bz#1694164] +- kvm-virtiofsd-Add-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-Add-passthrough_ll.patch [bz#1694164] +- kvm-virtiofsd-Trim-down-imported-files.patch [bz#1694164] +- kvm-virtiofsd-Format-imported-files-to-qemu-style.patch [bz#1694164] +- kvm-virtiofsd-remove-mountpoint-dummy-argument.patch [bz#1694164] +- kvm-virtiofsd-remove-unused-notify-reply-support.patch [bz#1694164] +- kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch [bz#1694164] +- kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch [bz#1694164] +- kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch [bz#1694164] +- kvm-virtiofsd-Trim-out-compatibility-code.patch [bz#1694164] +- kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch [bz#1694164] +- kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch [bz#1694164] +- kvm-virtiofsd-Add-options-for-virtio.patch [bz#1694164] +- kvm-virtiofsd-add-o-source-PATH-to-help-output.patch [bz#1694164] +- kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch [bz#1694164] +- kvm-virtiofsd-Start-wiring-up-vhost-user.patch [bz#1694164] +- kvm-virtiofsd-Add-main-virtio-loop.patch [bz#1694164] +- kvm-virtiofsd-get-set-features-callbacks.patch [bz#1694164] +- kvm-virtiofsd-Start-queue-threads.patch [bz#1694164] +- kvm-virtiofsd-Poll-kick_fd-for-queue.patch [bz#1694164] +- kvm-virtiofsd-Start-reading-commands-from-queue.patch [bz#1694164] +- kvm-virtiofsd-Send-replies-to-messages.patch [bz#1694164] +- kvm-virtiofsd-Keep-track-of-replies.patch [bz#1694164] +- kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch [bz#1694164] +- kvm-virtiofsd-Fast-path-for-virtio-read.patch [bz#1694164] +- kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch [bz#1694164] +- kvm-virtiofsd-make-f-foreground-the-default.patch [bz#1694164] +- kvm-virtiofsd-add-vhost-user.json-file.patch [bz#1694164] +- kvm-virtiofsd-add-print-capabilities-option.patch [bz#1694164] +- kvm-virtiofs-Add-maintainers-entry.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch [bz#1694164] +- kvm-virtiofsd-validate-path-components.patch [bz#1694164] +- kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch [bz#1694164] +- kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch [bz#1694164] +- kvm-virtiofsd-add-fuse_mbuf_iter-API.patch [bz#1694164] +- kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch [bz#1694164] +- kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch [bz#1694164] +- kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch [bz#1694164] +- kvm-virtiofsd-sandbox-mount-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-an-empty-network-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-a-new-pid-namespace.patch [bz#1694164] +- kvm-virtiofsd-add-seccomp-whitelist.patch [bz#1694164] +- kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch [bz#1694164] +- kvm-virtiofsd-cap-ng-helpers.patch [bz#1694164] +- kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch [bz#1694164] +- kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch [bz#1694164] +- kvm-virtiofsd-fix-libfuse-information-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-syslog-command-line-option.patch [bz#1694164] +- kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch [bz#1694164] +- kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch [bz#1694164] +- kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch [bz#1694164] +- kvm-virtiofsd-Handle-reinit.patch [bz#1694164] +- kvm-virtiofsd-Handle-hard-reboot.patch [bz#1694164] +- kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch [bz#1694164] +- kvm-vhost-user-Print-unexpected-slave-message-types.patch [bz#1694164] +- kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-control-readdirplus.patch [bz#1694164] +- kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch [bz#1694164] +- kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch [bz#1694164] +- kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-use-hashtable.patch [bz#1694164] +- kvm-virtiofsd-Clean-up-inodes-on-destroy.patch [bz#1694164] +- kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch [bz#1694164] +- kvm-virtiofsd-fix-error-handling-in-main.patch [bz#1694164] +- kvm-virtiofsd-cleanup-allocated-resource-in-se.patch [bz#1694164] +- kvm-virtiofsd-fix-memory-leak-on-lo.source.patch [bz#1694164] +- kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch [bz#1694164] +- kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch [bz#1694164] +- kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch [bz#1694164] +- kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch [bz#1694164] +- kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch [bz#1694164] +- kvm-virtiofsd-Support-remote-posix-locks.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch [bz#1694164] +- kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch [bz#1694164] +- kvm-virtiofsd-make-lo_release-atomic.patch [bz#1694164] +- kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch [bz#1694164] +- kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch [bz#1694164] +- kvm-libvhost-user-Fix-some-memtable-remap-cases.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch [bz#1694164] +- kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch [bz#1694164] +- kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch [bz#1694164] +- kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch [bz#1694164] +- kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch [bz#1694164] +- kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch [bz#1694164] +- kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch [bz#1694164] +- kvm-virtiofsd-process-requests-in-a-thread-pool.patch [bz#1694164] +- kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch [bz#1694164] +- kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-thread-pool-size-NUM-option.patch [bz#1694164] +- kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch [bz#1694164] +- kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch [bz#1694164] +- kvm-virtiofsd-add-some-options-to-the-help-message.patch [bz#1694164] +- kvm-redhat-ship-virtiofsd-vhost-user-device-backend.patch [bz#1694164] +- Resolves: bz#1694164 + (virtio-fs: host<->guest shared file system (qemu)) +- Resolves: bz#1725084 + (aarch64: support dumping SVE registers) +- Resolves: bz#1779041 + (netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic) + * Tue Jan 21 2020 Miroslav Rezanina - 4.2.0-7.el8 - kvm-tcp_emu-Fix-oob-access.patch [bz#1791568] - kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791568] From 6ca2f341c22c11f81f5c90763fc580a382bac91e Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 10 Feb 2020 22:47:04 +0000 Subject: [PATCH 064/195] * Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 4.2.0-9.el8 - kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch [bz#1776638] - kvm-xics-Don-t-deassert-outputs.patch [bz#1776638] - kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch [bz#1776638] - kvm-trace-update-qemu-trace-stap-to-Python-3.patch [bz#1787395] - kvm-redhat-Remove-redundant-fix-for-qemu-trace-stap.patch [bz#1787395] - kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794503] - kvm-tpm-ppi-page-align-PPI-RAM.patch [bz#1787444] - kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch [bz#1647366] - kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch [bz#1647366] - kvm-tests-arm-cpu-features-Check-feature-default-values.patch [bz#1647366] - kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch [bz#1647366] - kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch [bz#1647366] - kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch [bz#1529231] - kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch [bz#1529231] - kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch [bz#1529231] - Resolves: bz#1529231 ([q35] VM hangs after migration with 200 vCPUs) - Resolves: bz#1647366 (aarch64: Add support for the kvm-no-adjvtime ARM CPU feature) - Resolves: bz#1776638 (Guest failed to boot up after system_reset 20 times) - Resolves: bz#1787395 (qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str) - Resolves: bz#1787444 (Broken postcopy migration with vTPM device) - Resolves: bz#1794503 (CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0]) --- ...it-APIC-ID-for-migration-instance-ID.patch | 62 ++++ ...count-from-GET-LBA-STATUS-CVE-2020-1.patch | 79 +++++ ...-SaveStateEntry.instance_id-into-uin.patch | 179 ++++++++++ ...ation-Define-VMSTATE_INSTANCE_ID_ANY.patch | 257 ++++++++++++++ ...-external-interrupt-pin-in-KVM-on-re.patch | 107 ++++++ ...UPPCState-irq_input_state-with-moder.patch | 112 ++++++ ...Add-the-kvm-no-adjvtime-CPU-property.patch | 281 +++++++++++++++ ...vm-Implement-virtual-time-adjustment.patch | 330 ++++++++++++++++++ ...rivial-Clean-up-header-documentation.patch | 197 +++++++++++ ...vm64-kvm64-cpus-have-timer-registers.patch | 60 ++++ ...eatures-Check-feature-default-values.patch | 106 ++++++ kvm-tpm-ppi-page-align-PPI-RAM.patch | 58 +++ ...e-update-qemu-trace-stap-to-Python-3.patch | 82 +++++ kvm-xics-Don-t-deassert-outputs.patch | 52 +++ qemu-kvm.spec | 62 +++- 15 files changed, 2020 insertions(+), 4 deletions(-) create mode 100644 kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch create mode 100644 kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch create mode 100644 kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch create mode 100644 kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch create mode 100644 kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch create mode 100644 kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch create mode 100644 kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch create mode 100644 kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch create mode 100644 kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch create mode 100644 kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch create mode 100644 kvm-tests-arm-cpu-features-Check-feature-default-values.patch create mode 100644 kvm-tpm-ppi-page-align-PPI-RAM.patch create mode 100644 kvm-trace-update-qemu-trace-stap-to-Python-3.patch create mode 100644 kvm-xics-Don-t-deassert-outputs.patch diff --git a/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch b/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch new file mode 100644 index 0000000..becba21 --- /dev/null +++ b/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch @@ -0,0 +1,62 @@ +From 0d5a09173eb75b7e56122c2aefb2646a2be58400 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 31 Jan 2020 17:12:57 +0000 +Subject: [PATCH 15/15] apic: Use 32bit APIC ID for migration instance ID + +RH-Author: Peter Xu +Message-id: <20200131171257.1066593-4-peterx@redhat.com> +Patchwork-id: 93628 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] apic: Use 32bit APIC ID for migration instance ID +Bugzilla: 1529231 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +Migration is silently broken now with x2apic config like this: + + -smp 200,maxcpus=288,sockets=2,cores=72,threads=2 \ + -device intel-iommu,intremap=on,eim=on + +After migration, the guest kernel could hang at anything, due to +x2apic bit not migrated correctly in IA32_APIC_BASE on some vcpus, so +any operations related to x2apic could be broken then (e.g., RDMSR on +x2apic MSRs could fail because KVM would think that the vcpu hasn't +enabled x2apic at all). + +The issue is that the x2apic bit was never applied correctly for vcpus +whose ID > 255 when migrate completes, and that's because when we +migrate APIC we use the APICCommonState.id as instance ID of the +migration stream, while that's too short for x2apic. + +Let's use the newly introduced initial_apic_id for that. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: Eduardo Habkost +Signed-off-by: Juan Quintela +(cherry picked from commit 0ab994867c365db21e15f9503922c79234d8e40e) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/apic_common.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index 54b8731..b5dbeb6 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info; + static DeviceState *vapic; +- uint32_t instance_id = s->id; ++ uint32_t instance_id = s->initial_apic_id; ++ ++ /* Normally initial APIC ID should be no more than hundreds */ ++ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); + + info = APIC_COMMON_GET_CLASS(s); + info->realize(dev, errp); +-- +1.8.3.1 + diff --git a/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch b/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch new file mode 100644 index 0000000..2ee9dcd --- /dev/null +++ b/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch @@ -0,0 +1,79 @@ +From 1c508d56d154caf5fbf53e7dabafd707236cb16b Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Wed, 29 Jan 2020 13:45:18 +0000 +Subject: [PATCH 06/15] iscsi: Cap block count from GET LBA STATUS + (CVE-2020-1711) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200129134518.1293-2-jmaloy@redhat.com> +Patchwork-id: 93571 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) +Bugzilla: 1794503 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Philippe Mathieu-Daudé + +From: Felipe Franciosi + +When querying an iSCSI server for the provisioning status of blocks (via +GET LBA STATUS), Qemu only validates that the response descriptor zero's +LBA matches the one requested. Given the SCSI spec allows servers to +respond with the status of blocks beyond the end of the LUN, Qemu may +have its heap corrupted by clearing/setting too many bits at the end of +its allocmap for the LUN. + +A malicious guest in control of the iSCSI server could carefully program +Qemu's heap (by selectively setting the bitmap) and then smash it. + +This limits the number of bits that iscsi_co_block_status() will try to +update in the allocmap so it can't overflow the bitmap. + +Fixes: CVE-2020-1711 +Cc: qemu-stable@nongnu.org +Signed-off-by: Felipe Franciosi +Signed-off-by: Peter Turschmid +Signed-off-by: Raphael Norwitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 693fd2acdf14dd86c0bf852610f1c2cca80a74dc) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + block/iscsi.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/iscsi.c b/block/iscsi.c +index 2aea7e3..cbd5729 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -701,7 +701,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + struct scsi_get_lba_status *lbas = NULL; + struct scsi_lba_status_descriptor *lbasd = NULL; + struct IscsiTask iTask; +- uint64_t lba; ++ uint64_t lba, max_bytes; + int ret; + + iscsi_co_init_iscsitask(iscsilun, &iTask); +@@ -721,6 +721,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, + } + + lba = offset / iscsilun->block_size; ++ max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size; + + qemu_mutex_lock(&iscsilun->mutex); + retry: +@@ -764,7 +765,7 @@ retry: + goto out_unlock; + } + +- *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size; ++ *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes); + + if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || + lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { +-- +1.8.3.1 + diff --git a/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch b/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch new file mode 100644 index 0000000..3477af5 --- /dev/null +++ b/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch @@ -0,0 +1,179 @@ +From 38a032829b6b8d523b4cee05f732031e66fc2e41 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 31 Jan 2020 17:12:56 +0000 +Subject: [PATCH 14/15] migration: Change SaveStateEntry.instance_id into + uint32_t + +RH-Author: Peter Xu +Message-id: <20200131171257.1066593-3-peterx@redhat.com> +Patchwork-id: 93629 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] migration: Change SaveStateEntry.instance_id into uint32_t +Bugzilla: 1529231 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +It was always used as 32bit, so define it as used to be clear. +Instead of using -1 as the auto-gen magic value, we switch to +UINT32_MAX. We also make sure that we don't auto-gen this value to +avoid overflowed instance IDs without being noticed. + +Suggested-by: Juan Quintela +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 93062e23619e057743757ee53bf7f8e07f7a3710) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + include/migration/vmstate.h + migration/savevm.c + stubs/vmstate.c + Due to missing 3cad405bab ("vmstate: replace DeviceState with + VMStateIf", 2020-01-06) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/apic_common.c | 2 +- + include/migration/register.h | 2 +- + include/migration/vmstate.h | 2 +- + migration/savevm.c | 18 ++++++++++-------- + stubs/vmstate.c | 2 +- + 5 files changed, 14 insertions(+), 12 deletions(-) + +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index f2c3a7f..54b8731 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -268,7 +268,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info; + static DeviceState *vapic; +- int instance_id = s->id; ++ uint32_t instance_id = s->id; + + info = APIC_COMMON_GET_CLASS(s); + info->realize(dev, errp); +diff --git a/include/migration/register.h b/include/migration/register.h +index a13359a..f3ba10b 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -69,7 +69,7 @@ typedef struct SaveVMHandlers { + } SaveVMHandlers; + + int register_savevm_live(const char *idstr, +- int instance_id, ++ uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, + void *opaque); +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index 883f1cf..296609c 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -1158,7 +1158,7 @@ bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + #define VMSTATE_INSTANCE_ID_ANY -1 + + /* Returns: 0 on success, -1 on failure */ +-int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, ++int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, + const VMStateDescription *vmsd, + void *base, int alias_id, + int required_for_version, +diff --git a/migration/savevm.c b/migration/savevm.c +index e2e8e0a..a80bb52 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -233,7 +233,7 @@ typedef struct CompatEntry { + typedef struct SaveStateEntry { + QTAILQ_ENTRY(SaveStateEntry) entry; + char idstr[256]; +- int instance_id; ++ uint32_t instance_id; + int alias_id; + int version_id; + /* version id read from the stream */ +@@ -665,10 +665,10 @@ void dump_vmstate_json_to_file(FILE *out_file) + fclose(out_file); + } + +-static int calculate_new_instance_id(const char *idstr) ++static uint32_t calculate_new_instance_id(const char *idstr) + { + SaveStateEntry *se; +- int instance_id = 0; ++ uint32_t instance_id = 0; + + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { + if (strcmp(idstr, se->idstr) == 0 +@@ -676,6 +676,8 @@ static int calculate_new_instance_id(const char *idstr) + instance_id = se->instance_id + 1; + } + } ++ /* Make sure we never loop over without being noticed */ ++ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); + return instance_id; + } + +@@ -730,7 +732,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) + Meanwhile pass -1 as instance_id if you do not already have a clearly + distinguishing id for all instances of your device class. */ + int register_savevm_live(const char *idstr, +- int instance_id, ++ uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, + void *opaque) +@@ -784,7 +786,7 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) + } + } + +-int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, ++int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, + const VMStateDescription *vmsd, + void *opaque, int alias_id, + int required_for_version, +@@ -1600,7 +1602,7 @@ int qemu_save_device_state(QEMUFile *f) + return qemu_file_get_error(f); + } + +-static SaveStateEntry *find_se(const char *idstr, int instance_id) ++static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) + { + SaveStateEntry *se; + +@@ -2267,7 +2269,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) + /* Find savevm section */ + se = find_se(idstr, instance_id); + if (se == NULL) { +- error_report("Unknown savevm section or instance '%s' %d. " ++ error_report("Unknown savevm section or instance '%s' %"PRIu32". " + "Make sure that your current VM setup matches your " + "saved VM setup, including any hotplugged devices", + idstr, instance_id); +@@ -2291,7 +2293,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) + + ret = vmstate_load(f, se); + if (ret < 0) { +- error_report("error while loading state for instance 0x%x of" ++ error_report("error while loading state for instance 0x%"PRIx32" of" + " device '%s'", instance_id, idstr); + return ret; + } +diff --git a/stubs/vmstate.c b/stubs/vmstate.c +index e1e89b8..4ed5cc6 100644 +--- a/stubs/vmstate.c ++++ b/stubs/vmstate.c +@@ -4,7 +4,7 @@ + const VMStateDescription vmstate_dummy = {}; + + int vmstate_register_with_alias_id(DeviceState *dev, +- int instance_id, ++ uint32_t instance_id, + const VMStateDescription *vmsd, + void *base, int alias_id, + int required_for_version, +-- +1.8.3.1 + diff --git a/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch b/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch new file mode 100644 index 0000000..c2ead53 --- /dev/null +++ b/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch @@ -0,0 +1,257 @@ +From 2659af9267586fb626f543773bf3f844727e473b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 31 Jan 2020 17:12:55 +0000 +Subject: [PATCH 13/15] migration: Define VMSTATE_INSTANCE_ID_ANY + +RH-Author: Peter Xu +Message-id: <20200131171257.1066593-2-peterx@redhat.com> +Patchwork-id: 93630 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] migration: Define VMSTATE_INSTANCE_ID_ANY +Bugzilla: 1529231 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +Define the new macro VMSTATE_INSTANCE_ID_ANY for callers who wants to +auto-generate the vmstate instance ID. Previously it was hard coded +as -1 instead of this macro. It helps to change this default value in +the follow up patches. No functional change. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 1df2c9a26fcb2fa32d099f8e9adcdae4207872e3) +Signed-off-by: Peter Xu +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + backends/dbus-vmstate.c + File deleted + hw/core/qdev.c + hw/misc/max111x.c + hw/net/eepro100.c + Due to missing commit 3cad405bab ("vmstate: replace + DeviceState with VMStateIf", 2020-01-06) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/stellaris.c | 2 +- + hw/core/qdev.c | 3 ++- + hw/display/ads7846.c | 2 +- + hw/i2c/core.c | 2 +- + hw/input/stellaris_input.c | 3 ++- + hw/intc/apic_common.c | 2 +- + hw/misc/max111x.c | 2 +- + hw/net/eepro100.c | 2 +- + hw/pci/pci.c | 2 +- + hw/ppc/spapr.c | 2 +- + hw/timer/arm_timer.c | 2 +- + hw/tpm/tpm_emulator.c | 3 ++- + include/migration/vmstate.h | 2 ++ + migration/savevm.c | 8 ++++---- + 14 files changed, 21 insertions(+), 16 deletions(-) + +diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c +index b198066..bb025e0 100644 +--- a/hw/arm/stellaris.c ++++ b/hw/arm/stellaris.c +@@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, + memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); + memory_region_add_subregion(get_system_memory(), base, &s->iomem); + ssys_reset(s); +- vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); + return 0; + } + +diff --git a/hw/core/qdev.c b/hw/core/qdev.c +index cf1ba28..40f6b2b 100644 +--- a/hw/core/qdev.c ++++ b/hw/core/qdev.c +@@ -890,7 +890,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp) + dev->canonical_path = object_get_canonical_path(OBJECT(dev)); + + if (qdev_get_vmsd(dev)) { +- if (vmstate_register_with_alias_id(dev, -1, qdev_get_vmsd(dev), dev, ++ if (vmstate_register_with_alias_id(dev, VMSTATE_INSTANCE_ID_ANY, ++ qdev_get_vmsd(dev), dev, + dev->instance_id_alias, + dev->alias_required_for_version, + &local_err) < 0) { +diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c +index c12272a..9228b40 100644 +--- a/hw/display/ads7846.c ++++ b/hw/display/ads7846.c +@@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) + + ads7846_int_update(s); + +- vmstate_register(NULL, -1, &vmstate_ads7846, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); + } + + static void ads7846_class_init(ObjectClass *klass, void *data) +diff --git a/hw/i2c/core.c b/hw/i2c/core.c +index 92cd489..d770035 100644 +--- a/hw/i2c/core.c ++++ b/hw/i2c/core.c +@@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) + + bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); + QLIST_INIT(&bus->current_devs); +- vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); + return bus; + } + +diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c +index 59892b0..e6ee5e1 100644 +--- a/hw/input/stellaris_input.c ++++ b/hw/input/stellaris_input.c +@@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) + } + s->num_buttons = n; + qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); +- vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, ++ &vmstate_stellaris_gamepad, s); + } +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index 375cb6a..f2c3a7f 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -284,7 +284,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + } + + if (s->legacy_instance_id) { +- instance_id = -1; ++ instance_id = VMSTATE_INSTANCE_ID_ANY; + } + vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, + s, -1, 0, NULL); +diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c +index a713149..81ee73e 100644 +--- a/hw/misc/max111x.c ++++ b/hw/misc/max111x.c +@@ -146,7 +146,7 @@ static int max111x_init(SSISlave *d, int inputs) + s->input[7] = 0x80; + s->com = 0; + +- vmstate_register(dev, -1, &vmstate_max111x, s); ++ vmstate_register(dev, VMSTATE_INSTANCE_ID_ANY, &vmstate_max111x, s); + return 0; + } + +diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c +index cc2dd8b..39920c6 100644 +--- a/hw/net/eepro100.c ++++ b/hw/net/eepro100.c +@@ -1874,7 +1874,7 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) + + s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); + s->vmstate->name = qemu_get_queue(s->nic)->model; +- vmstate_register(&pci_dev->qdev, -1, s->vmstate, s); ++ vmstate_register(&pci_dev->qdev, VMSTATE_INSTANCE_ID_ANY, s->vmstate, s); + } + + static void eepro100_instance_init(Object *obj) +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index cbc7a32..fed019d 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -124,7 +124,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) + bus->machine_done.notify = pcibus_machine_done; + qemu_add_machine_init_done_notifier(&bus->machine_done); + +- vmstate_register(NULL, -1, &vmstate_pcibus, bus); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); + } + + static void pcie_bus_realize(BusState *qbus, Error **errp) +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 8749c72..c12862d 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -3028,7 +3028,7 @@ static void spapr_machine_init(MachineState *machine) + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ + vmstate_register(NULL, 0, &vmstate_spapr, spapr); +- register_savevm_live("spapr/htab", -1, 1, ++ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_htab_handlers, spapr); + + qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), +diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c +index af524fa..beaa285 100644 +--- a/hw/timer/arm_timer.c ++++ b/hw/timer/arm_timer.c +@@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) + s->control = TIMER_CTRL_IE; + + s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT); +- vmstate_register(NULL, -1, &vmstate_arm_timer, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); + return s; + } + +diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c +index 22f9113..da7b490 100644 +--- a/hw/tpm/tpm_emulator.c ++++ b/hw/tpm/tpm_emulator.c +@@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) + tpm_emu->cur_locty_number = ~0; + qemu_mutex_init(&tpm_emu->mutex); + +- vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, ++ &vmstate_tpm_emulator, obj); + } + + /* +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index ac4f46a..883f1cf 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -1155,6 +1155,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, + + bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + ++#define VMSTATE_INSTANCE_ID_ANY -1 ++ + /* Returns: 0 on success, -1 on failure */ + int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, + const VMStateDescription *vmsd, +diff --git a/migration/savevm.c b/migration/savevm.c +index a71b930..e2e8e0a 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -750,7 +750,7 @@ int register_savevm_live(const char *idstr, + + pstrcat(se->idstr, sizeof(se->idstr), idstr); + +- if (instance_id == -1) { ++ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { + se->instance_id = calculate_new_instance_id(se->idstr); + } else { + se->instance_id = instance_id; +@@ -817,14 +817,14 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, + + se->compat = g_new0(CompatEntry, 1); + pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); +- se->compat->instance_id = instance_id == -1 ? ++ se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? + calculate_compat_instance_id(vmsd->name) : instance_id; +- instance_id = -1; ++ instance_id = VMSTATE_INSTANCE_ID_ANY; + } + } + pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); + +- if (instance_id == -1) { ++ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { + se->instance_id = calculate_new_instance_id(se->idstr); + } else { + se->instance_id = instance_id; +-- +1.8.3.1 + diff --git a/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch b/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch new file mode 100644 index 0000000..2dbdb16 --- /dev/null +++ b/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch @@ -0,0 +1,107 @@ +From 22fc9bd7e7ae0b72c6f6e483eb66cf996f519766 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 21 Jan 2020 05:16:11 +0000 +Subject: [PATCH 01/15] ppc: Deassert the external interrupt pin in KVM on + reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200121051613.388295-2-dgibson@redhat.com> +Patchwork-id: 93429 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] ppc: Deassert the external interrupt pin in KVM on reset +Bugzilla: 1776638 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +When a CPU is reset, QEMU makes sure no interrupt is pending by clearing +CPUPPCstate::pending_interrupts in ppc_cpu_reset(). In the case of a +complete machine emulation, eg. a sPAPR machine, an external interrupt +request could still be pending in KVM though, eg. an IPI. It will be +eventually presented to the guest, which is supposed to acknowledge it at +the interrupt controller. If the interrupt controller is emulated in QEMU, +either XICS or XIVE, ppc_set_irq() won't deassert the external interrupt +pin in KVM since it isn't pending anymore for QEMU. When the vCPU re-enters +the guest, the interrupt request is still pending and the vCPU will try +again to acknowledge it. This causes an infinite loop and eventually hangs +the guest. + +The code has been broken since the beginning. The issue wasn't hit before +because accel=kvm,kernel-irqchip=off is an awkward setup that never got +used until recently with the LC92x IBM systems (aka, Boston). + +Add a ppc_irq_reset() function to do the necessary cleanup, ie. deassert +the IRQ pins of the CPU in QEMU and most importantly the external interrupt +pin for this vCPU in KVM. + +Reported-by: Satheesh Rajendran +Signed-off-by: Greg Kurz +Message-Id: <157548861740.3650476.16879693165328764758.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 401774387aeb37f2ada9bb18f7c7e307b21a3e93) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/ppc.c | 8 ++++++++ + include/hw/ppc/ppc.h | 2 ++ + target/ppc/translate_init.inc.c | 1 + + 3 files changed, 11 insertions(+) + +diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c +index 52a18eb..d554b64 100644 +--- a/hw/ppc/ppc.c ++++ b/hw/ppc/ppc.c +@@ -1510,3 +1510,11 @@ PowerPCCPU *ppc_get_vcpu_by_pir(int pir) + + return NULL; + } ++ ++void ppc_irq_reset(PowerPCCPU *cpu) ++{ ++ CPUPPCState *env = &cpu->env; ++ ++ env->irq_input_state = 0; ++ kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); ++} +diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h +index 4bdcb8b..5dd7531 100644 +--- a/include/hw/ppc/ppc.h ++++ b/include/hw/ppc/ppc.h +@@ -76,6 +76,7 @@ static inline void ppc970_irq_init(PowerPCCPU *cpu) {} + static inline void ppcPOWER7_irq_init(PowerPCCPU *cpu) {} + static inline void ppcPOWER9_irq_init(PowerPCCPU *cpu) {} + static inline void ppce500_irq_init(PowerPCCPU *cpu) {} ++static inline void ppc_irq_reset(PowerPCCPU *cpu) {} + #else + void ppc40x_irq_init(PowerPCCPU *cpu); + void ppce500_irq_init(PowerPCCPU *cpu); +@@ -83,6 +84,7 @@ void ppc6xx_irq_init(PowerPCCPU *cpu); + void ppc970_irq_init(PowerPCCPU *cpu); + void ppcPOWER7_irq_init(PowerPCCPU *cpu); + void ppcPOWER9_irq_init(PowerPCCPU *cpu); ++void ppc_irq_reset(PowerPCCPU *cpu); + #endif + + /* PPC machines for OpenBIOS */ +diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c +index ba726de..64a8380 100644 +--- a/target/ppc/translate_init.inc.c ++++ b/target/ppc/translate_init.inc.c +@@ -10461,6 +10461,7 @@ static void ppc_cpu_reset(CPUState *s) + env->pending_interrupts = 0; + s->exception_index = POWERPC_EXCP_NONE; + env->error_code = 0; ++ ppc_irq_reset(cpu); + + /* tininess for underflow is detected before rounding */ + set_float_detect_tininess(float_tininess_before_rounding, +-- +1.8.3.1 + diff --git a/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch b/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch new file mode 100644 index 0000000..457d149 --- /dev/null +++ b/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch @@ -0,0 +1,112 @@ +From f2f57c1ed926384e074d2048cdbdc30ee2f426eb Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 21 Jan 2020 05:16:13 +0000 +Subject: [PATCH 03/15] ppc: Don't use CPUPPCState::irq_input_state with modern + Book3s CPU models +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200121051613.388295-4-dgibson@redhat.com> +Patchwork-id: 93431 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] ppc: Don't use CPUPPCState::irq_input_state with modern Book3s CPU models +Bugzilla: 1776638 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +The power7_set_irq() and power9_set_irq() functions set this but it is +never used actually. Modern Book3s compatible CPUs are only supported +by the pnv and spapr machines. They have an interrupt controller, XICS +for POWER7/8 and XIVE for POWER9, whose models don't require to track +IRQ input states at the CPU level. + +Drop these lines to avoid confusion. + +Signed-off-by: Greg Kurz +Message-Id: <157548862861.3650476.16622818876928044450.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit c1ad0b892ce20cf2b5e619c79e8a0c4c66b235dc) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/ppc.c | 16 ++-------------- + target/ppc/cpu.h | 4 +++- + 2 files changed, 5 insertions(+), 15 deletions(-) + +diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c +index d554b64..730a41f 100644 +--- a/hw/ppc/ppc.c ++++ b/hw/ppc/ppc.c +@@ -275,10 +275,9 @@ void ppc970_irq_init(PowerPCCPU *cpu) + static void power7_set_irq(void *opaque, int pin, int level) + { + PowerPCCPU *cpu = opaque; +- CPUPPCState *env = &cpu->env; + + LOG_IRQ("%s: env %p pin %d level %d\n", __func__, +- env, pin, level); ++ &cpu->env, pin, level); + + switch (pin) { + case POWER7_INPUT_INT: +@@ -292,11 +291,6 @@ static void power7_set_irq(void *opaque, int pin, int level) + LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); + return; + } +- if (level) { +- env->irq_input_state |= 1 << pin; +- } else { +- env->irq_input_state &= ~(1 << pin); +- } + } + + void ppcPOWER7_irq_init(PowerPCCPU *cpu) +@@ -311,10 +305,9 @@ void ppcPOWER7_irq_init(PowerPCCPU *cpu) + static void power9_set_irq(void *opaque, int pin, int level) + { + PowerPCCPU *cpu = opaque; +- CPUPPCState *env = &cpu->env; + + LOG_IRQ("%s: env %p pin %d level %d\n", __func__, +- env, pin, level); ++ &cpu->env, pin, level); + + switch (pin) { + case POWER9_INPUT_INT: +@@ -334,11 +327,6 @@ static void power9_set_irq(void *opaque, int pin, int level) + LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); + return; + } +- if (level) { +- env->irq_input_state |= 1 << pin; +- } else { +- env->irq_input_state &= ~(1 << pin); +- } + } + + void ppcPOWER9_irq_init(PowerPCCPU *cpu) +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 5c53801..8887f76 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1090,7 +1090,9 @@ struct CPUPPCState { + #if !defined(CONFIG_USER_ONLY) + /* + * This is the IRQ controller, which is implementation dependent +- * and only relevant when emulating a complete machine. ++ * and only relevant when emulating a complete machine. Note that ++ * this isn't used by recent Book3s compatible CPUs (POWER7 and ++ * newer). + */ + uint32_t irq_input_state; + void **irq_inputs; +-- +1.8.3.1 + diff --git a/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch b/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch new file mode 100644 index 0000000..601b8c4 --- /dev/null +++ b/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch @@ -0,0 +1,281 @@ +From 730f72105b478553c4f22555c29b0f64224ff914 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:14 +0000 +Subject: [PATCH 12/15] target/arm/cpu: Add the kvm-no-adjvtime CPU property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-6-drjones@redhat.com> +Patchwork-id: 93623 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/5] target/arm/cpu: Add the kvm-no-adjvtime CPU property +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + target/arm/cpu: Add the kvm-no-adjvtime CPU property + + kvm-no-adjvtime is a KVM specific CPU property and a first of its + kind. To accommodate it we also add kvm_arm_add_vcpu_properties() + and a KVM specific CPU properties description to the CPU features + document. + + Signed-off-by: Andrew Jones + Message-id: 20200120101023.16030-7-drjones@redhat.com + Reviewed-by: Peter Maydell + Signed-off-by: Peter Maydell + +(cherry picked from commit dea101a1ae9968c9fec6ab0291489dad7c49f36f) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + Dropped the second hunk of the hw/arm/virt.c changes + as they would patch dead code. + +Signed-off-by: Danilo C. L. de Paula +--- + docs/arm-cpu-features.rst | 37 ++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 5 +++++ + include/hw/arm/virt.h | 1 + + target/arm/cpu.c | 2 ++ + target/arm/cpu64.c | 1 + + target/arm/kvm.c | 28 ++++++++++++++++++++++++++++ + target/arm/kvm_arm.h | 11 +++++++++++ + target/arm/monitor.c | 1 + + tests/arm-cpu-features.c | 4 ++++ + 9 files changed, 89 insertions(+), 1 deletion(-) + +diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst +index 1b367e2..45d1eb6 100644 +--- a/docs/arm-cpu-features.rst ++++ b/docs/arm-cpu-features.rst +@@ -31,7 +31,9 @@ supporting the feature or only supporting the feature under certain + configurations. For example, the `aarch64` CPU feature, which, when + disabled, enables the optional AArch32 CPU feature, is only supported + when using the KVM accelerator and when running on a host CPU type that +-supports the feature. ++supports the feature. While `aarch64` currently only works with KVM, ++it could work with TCG. CPU features that are specific to KVM are ++prefixed with "kvm-" and are described in "KVM VCPU Features". + + CPU Feature Probing + =================== +@@ -171,6 +173,39 @@ disabling many SVE vector lengths would be quite verbose, the `sve` CPU + properties have special semantics (see "SVE CPU Property Parsing + Semantics"). + ++KVM VCPU Features ++================= ++ ++KVM VCPU features are CPU features that are specific to KVM, such as ++paravirt features or features that enable CPU virtualization extensions. ++The features' CPU properties are only available when KVM is enabled and ++are named with the prefix "kvm-". KVM VCPU features may be probed, ++enabled, and disabled in the same way as other CPU features. Below is ++the list of KVM VCPU features and their descriptions. ++ ++ kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This ++ means that by default the virtual time ++ adjustment is enabled (vtime is *not not* ++ adjusted). ++ ++ When virtual time adjustment is enabled each ++ time the VM transitions back to running state ++ the VCPU's virtual counter is updated to ensure ++ stopped time is not counted. This avoids time ++ jumps surprising guest OSes and applications, ++ as long as they use the virtual counter for ++ timekeeping. However it has the side effect of ++ the virtual and physical counters diverging. ++ All timekeeping based on the virtual counter ++ will appear to lag behind any timekeeping that ++ does not subtract VM stopped time. The guest ++ may resynchronize its virtual counter with ++ other time sources as needed. ++ ++ Enable kvm-no-adjvtime to disable virtual time ++ adjustment, also restoring the legacy (pre-5.0) ++ behavior. ++ + SVE CPU Properties + ================== + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e108391..d30d38c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1707,6 +1707,11 @@ static void machvirt_init(MachineState *machine) + } + } + ++ if (vmc->kvm_no_adjvtime && ++ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { ++ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); ++ } ++ + if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { + object_property_set_bool(cpuobj, false, "pmu", NULL); + } +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 53fdf16..77828ce 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -109,6 +109,7 @@ typedef struct { + bool smbios_old_sys_ver; + bool no_highmem_ecam; + bool no_ged; /* Machines < 4.2 has no support for ACPI GED device */ ++ bool kvm_no_adjvtime; + } VirtMachineClass; + + typedef struct { +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 3788fc3..e46efe9 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2482,6 +2482,7 @@ static void arm_max_initfn(Object *obj) + + if (kvm_enabled()) { + kvm_arm_set_cpu_features_from_host(cpu); ++ kvm_arm_add_vcpu_properties(obj); + } else { + cortex_a15_initfn(obj); + +@@ -2673,6 +2674,7 @@ static void arm_host_initfn(Object *obj) + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + aarch64_add_sve_properties(obj); + } ++ kvm_arm_add_vcpu_properties(obj); + arm_cpu_post_init(obj); + } + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index a39d6fc..3cd416d 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -605,6 +605,7 @@ static void aarch64_max_initfn(Object *obj) + + if (kvm_enabled()) { + kvm_arm_set_cpu_features_from_host(cpu); ++ kvm_arm_add_vcpu_properties(obj); + } else { + uint64_t t; + uint32_t u; +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 26d7f8b..4be9497 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -17,6 +17,8 @@ + #include "qemu/timer.h" + #include "qemu/error-report.h" + #include "qemu/main-loop.h" ++#include "qom/object.h" ++#include "qapi/error.h" + #include "sysemu/sysemu.h" + #include "sysemu/kvm.h" + #include "sysemu/kvm_int.h" +@@ -179,6 +181,32 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + env->features = arm_host_cpu_features.features; + } + ++static bool kvm_no_adjvtime_get(Object *obj, Error **errp) ++{ ++ return !ARM_CPU(obj)->kvm_adjvtime; ++} ++ ++static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) ++{ ++ ARM_CPU(obj)->kvm_adjvtime = !value; ++} ++ ++/* KVM VCPU properties should be prefixed with "kvm-". */ ++void kvm_arm_add_vcpu_properties(Object *obj) ++{ ++ if (!kvm_enabled()) { ++ return; ++ } ++ ++ ARM_CPU(obj)->kvm_adjvtime = true; ++ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, ++ kvm_no_adjvtime_set, &error_abort); ++ object_property_set_description(obj, "kvm-no-adjvtime", ++ "Set on to disable the adjustment of " ++ "the virtual counter. VM stopped time " ++ "will be counted.", &error_abort); ++} ++ + bool kvm_arm_pmu_supported(CPUState *cpu) + { + KVMState *s = KVM_STATE(current_machine->accelerator); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 01a9a18..ae9e075 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -256,6 +256,15 @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map); + void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + + /** ++ * kvm_arm_add_vcpu_properties: ++ * @obj: The CPU object to add the properties to ++ * ++ * Add all KVM specific CPU properties to the CPU object. These ++ * are the CPU properties with "kvm-" prefixed names. ++ */ ++void kvm_arm_add_vcpu_properties(Object *obj); ++ ++/** + * kvm_arm_aarch32_supported: + * @cs: CPUState + * +@@ -345,6 +354,8 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + cpu->host_cpu_probe_failed = true; + } + ++static inline void kvm_arm_add_vcpu_properties(Object *obj) {} ++ + static inline bool kvm_arm_aarch32_supported(CPUState *cs) + { + return false; +diff --git a/target/arm/monitor.c b/target/arm/monitor.c +index fa054f8..9725dff 100644 +--- a/target/arm/monitor.c ++++ b/target/arm/monitor.c +@@ -103,6 +103,7 @@ static const char *cpu_model_advertised_features[] = { + "sve128", "sve256", "sve384", "sve512", + "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", + "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", ++ "kvm-no-adjvtime", + NULL + }; + +diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c +index 89285ca..ba1a6fe 100644 +--- a/tests/arm-cpu-features.c ++++ b/tests/arm-cpu-features.c +@@ -428,6 +428,8 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); + ++ assert_has_not_feature(qts, "max", "kvm-no-adjvtime"); ++ + if (g_str_equal(qtest_get_arch(), "aarch64")) { + assert_has_feature_enabled(qts, "max", "aarch64"); + assert_has_feature_enabled(qts, "max", "sve"); +@@ -462,6 +464,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + return; + } + ++ assert_has_feature_disabled(qts, "host", "kvm-no-adjvtime"); ++ + if (g_str_equal(qtest_get_arch(), "aarch64")) { + bool kvm_supports_sve; + char max_name[8], name[8]; +-- +1.8.3.1 + diff --git a/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch b/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch new file mode 100644 index 0000000..3396a32 --- /dev/null +++ b/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch @@ -0,0 +1,330 @@ +From 5388ea3fc0737d1a659256ff3663057bef484c19 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:13 +0000 +Subject: [PATCH 11/15] target/arm/kvm: Implement virtual time adjustment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-5-drjones@redhat.com> +Patchwork-id: 93622 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/5] target/arm/kvm: Implement virtual time adjustment +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + target/arm/kvm: Implement virtual time adjustment + + When a VM is stopped (such as when it's paused) guest virtual time + should stop counting. Otherwise, when the VM is resumed it will + experience time jumps and its kernel may report soft lockups. Not + counting virtual time while the VM is stopped has the side effect + of making the guest's time appear to lag when compared with real + time, and even with time derived from the physical counter. For + this reason, this change, which is enabled by default, comes with + a KVM CPU feature allowing it to be disabled, restoring legacy + behavior. + + This patch only provides the implementation of the virtual time + adjustment. A subsequent patch will provide the CPU property + allowing the change to be enabled and disabled. + + Reported-by: Bijan Mottahedeh + Signed-off-by: Andrew Jones + Message-id: 20200120101023.16030-6-drjones@redhat.com + Reviewed-by: Peter Maydell + Signed-off-by: Peter Maydell + +(cherry picked from commit e5ac4200b4cddf44df9adbef677af0d1f1c579c6) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/cpu.h | 7 ++++ + target/arm/kvm.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + target/arm/kvm32.c | 3 ++ + target/arm/kvm64.c | 3 ++ + target/arm/kvm_arm.h | 38 ++++++++++++++++++++++ + target/arm/machine.c | 7 ++++ + 6 files changed, 150 insertions(+) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 82dd3cc..fbd8ea0 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -821,6 +821,13 @@ struct ARMCPU { + /* KVM init features for this CPU */ + uint32_t kvm_init_features[7]; + ++ /* KVM CPU state */ ++ ++ /* KVM virtual time adjustment */ ++ bool kvm_adjvtime; ++ bool kvm_vtime_dirty; ++ uint64_t kvm_vtime; ++ + /* Uniprocessor system with MP extensions */ + bool mp_is_up; + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 5b82cef..26d7f8b 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -359,6 +359,22 @@ static int compare_u64(const void *a, const void *b) + return 0; + } + ++/* ++ * cpreg_values are sorted in ascending order by KVM register ID ++ * (see kvm_arm_init_cpreg_list). This allows us to cheaply find ++ * the storage for a KVM register by ID with a binary search. ++ */ ++static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) ++{ ++ uint64_t *res; ++ ++ res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, ++ sizeof(uint64_t), compare_u64); ++ assert(res); ++ ++ return &cpu->cpreg_values[res - cpu->cpreg_indexes]; ++} ++ + /* Initialize the ARMCPU cpreg list according to the kernel's + * definition of what CPU registers it knows about (and throw away + * the previous TCG-created cpreg list). +@@ -512,6 +528,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) + return ok; + } + ++void kvm_arm_cpu_pre_save(ARMCPU *cpu) ++{ ++ /* KVM virtual time adjustment */ ++ if (cpu->kvm_vtime_dirty) { ++ *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; ++ } ++} ++ ++void kvm_arm_cpu_post_load(ARMCPU *cpu) ++{ ++ /* KVM virtual time adjustment */ ++ if (cpu->kvm_adjvtime) { ++ cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); ++ cpu->kvm_vtime_dirty = true; ++ } ++} ++ + void kvm_arm_reset_vcpu(ARMCPU *cpu) + { + int ret; +@@ -579,6 +612,50 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) + return 0; + } + ++void kvm_arm_get_virtual_time(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ struct kvm_one_reg reg = { ++ .id = KVM_REG_ARM_TIMER_CNT, ++ .addr = (uintptr_t)&cpu->kvm_vtime, ++ }; ++ int ret; ++ ++ if (cpu->kvm_vtime_dirty) { ++ return; ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); ++ if (ret) { ++ error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); ++ abort(); ++ } ++ ++ cpu->kvm_vtime_dirty = true; ++} ++ ++void kvm_arm_put_virtual_time(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ struct kvm_one_reg reg = { ++ .id = KVM_REG_ARM_TIMER_CNT, ++ .addr = (uintptr_t)&cpu->kvm_vtime, ++ }; ++ int ret; ++ ++ if (!cpu->kvm_vtime_dirty) { ++ return; ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); ++ if (ret) { ++ error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); ++ abort(); ++ } ++ ++ cpu->kvm_vtime_dirty = false; ++} ++ + int kvm_put_vcpu_events(ARMCPU *cpu) + { + CPUARMState *env = &cpu->env; +@@ -690,6 +767,21 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) + return MEMTXATTRS_UNSPECIFIED; + } + ++void kvm_arm_vm_state_change(void *opaque, int running, RunState state) ++{ ++ CPUState *cs = opaque; ++ ARMCPU *cpu = ARM_CPU(cs); ++ ++ if (running) { ++ if (cpu->kvm_adjvtime) { ++ kvm_arm_put_virtual_time(cs); ++ } ++ } else { ++ if (cpu->kvm_adjvtime) { ++ kvm_arm_get_virtual_time(cs); ++ } ++ } ++} + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { +diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c +index 32bf8d6..3a8b437 100644 +--- a/target/arm/kvm32.c ++++ b/target/arm/kvm32.c +@@ -16,6 +16,7 @@ + #include "qemu-common.h" + #include "cpu.h" + #include "qemu/timer.h" ++#include "sysemu/runstate.h" + #include "sysemu/kvm.h" + #include "kvm_arm.h" + #include "internals.h" +@@ -198,6 +199,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + ++ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 666a81a..d368189 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -23,6 +23,7 @@ + #include "qemu/host-utils.h" + #include "qemu/main-loop.h" + #include "exec/gdbstub.h" ++#include "sysemu/runstate.h" + #include "sysemu/kvm.h" + #include "sysemu/kvm_int.h" + #include "kvm_arm.h" +@@ -735,6 +736,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + ++ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index b48a9c9..01a9a18 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -128,6 +128,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level); + bool write_kvmstate_to_list(ARMCPU *cpu); + + /** ++ * kvm_arm_cpu_pre_save: ++ * @cpu: ARMCPU ++ * ++ * Called after write_kvmstate_to_list() from cpu_pre_save() to update ++ * the cpreg list with KVM CPU state. ++ */ ++void kvm_arm_cpu_pre_save(ARMCPU *cpu); ++ ++/** ++ * kvm_arm_cpu_post_load: ++ * @cpu: ARMCPU ++ * ++ * Called from cpu_post_load() to update KVM CPU state from the cpreg list. ++ */ ++void kvm_arm_cpu_post_load(ARMCPU *cpu); ++ ++/** + * kvm_arm_reset_vcpu: + * @cpu: ARMCPU + * +@@ -292,6 +309,24 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + */ + int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + ++/** ++ * kvm_arm_get_virtual_time: ++ * @cs: CPUState ++ * ++ * Gets the VCPU's virtual counter and stores it in the KVM CPU state. ++ */ ++void kvm_arm_get_virtual_time(CPUState *cs); ++ ++/** ++ * kvm_arm_put_virtual_time: ++ * @cs: CPUState ++ * ++ * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. ++ */ ++void kvm_arm_put_virtual_time(CPUState *cs); ++ ++void kvm_arm_vm_state_change(void *opaque, int running, RunState state); ++ + int kvm_arm_vgic_probe(void); + + void kvm_arm_pmu_set_irq(CPUState *cs, int irq); +@@ -339,6 +374,9 @@ static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {} + static inline void kvm_arm_pmu_init(CPUState *cs) {} + + static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) {} ++ ++static inline void kvm_arm_get_virtual_time(CPUState *cs) {} ++static inline void kvm_arm_put_virtual_time(CPUState *cs) {} + #endif + + static inline const char *gic_class_name(void) +diff --git a/target/arm/machine.c b/target/arm/machine.c +index eb28b23..241890a 100644 +--- a/target/arm/machine.c ++++ b/target/arm/machine.c +@@ -642,6 +642,12 @@ static int cpu_pre_save(void *opaque) + /* This should never fail */ + abort(); + } ++ ++ /* ++ * kvm_arm_cpu_pre_save() must be called after ++ * write_kvmstate_to_list() ++ */ ++ kvm_arm_cpu_pre_save(cpu); + } else { + if (!write_cpustate_to_list(cpu, false)) { + /* This should never fail. */ +@@ -744,6 +750,7 @@ static int cpu_post_load(void *opaque, int version_id) + * we're using it. + */ + write_list_to_cpustate(cpu); ++ kvm_arm_cpu_post_load(cpu); + } else { + if (!write_list_to_cpustate(cpu)) { + return -1; +-- +1.8.3.1 + diff --git a/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch b/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch new file mode 100644 index 0000000..8cdc867 --- /dev/null +++ b/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch @@ -0,0 +1,197 @@ +From 11cb9cb7b1b56d5c9723e9c50bc2903281893bcc Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:10 +0000 +Subject: [PATCH 08/15] target/arm/kvm: trivial: Clean up header documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-2-drjones@redhat.com> +Patchwork-id: 93625 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/5] target/arm/kvm: trivial: Clean up header documentation +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:05 +0000 + + target/arm/kvm: trivial: Clean up header documentation + + Signed-off-by: Andrew Jones + Message-id: 20200120101023.16030-2-drjones@redhat.com + Reviewed-by: Peter Maydell + Signed-off-by: Peter Maydell + +(cherry picked from commit d1ebbc9d16297b54b153ee33abe05eb4f1df0c66) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/kvm_arm.h | 46 +++++++++++++++++++++++++++------------------- + 1 file changed, 27 insertions(+), 19 deletions(-) + +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 8e14d40..b48a9c9 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -28,9 +28,9 @@ + int kvm_arm_vcpu_init(CPUState *cs); + + /** +- * kvm_arm_vcpu_finalize ++ * kvm_arm_vcpu_finalize: + * @cs: CPUState +- * @feature: int ++ * @feature: feature to finalize + * + * Finalizes the configuration of the specified VCPU feature by + * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring +@@ -75,8 +75,8 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, + int kvm_arm_init_cpreg_list(ARMCPU *cpu); + + /** +- * kvm_arm_reg_syncs_via_cpreg_list +- * regidx: KVM register index ++ * kvm_arm_reg_syncs_via_cpreg_list: ++ * @regidx: KVM register index + * + * Return true if this KVM register should be synchronized via the + * cpreg list of arbitrary system registers, false if it is synchronized +@@ -85,8 +85,8 @@ int kvm_arm_init_cpreg_list(ARMCPU *cpu); + bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); + + /** +- * kvm_arm_cpreg_level +- * regidx: KVM register index ++ * kvm_arm_cpreg_level: ++ * @regidx: KVM register index + * + * Return the level of this coprocessor/system register. Return value is + * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. +@@ -148,6 +148,8 @@ void kvm_arm_init_serror_injection(CPUState *cs); + * @cpu: ARMCPU + * + * Get VCPU related state from kvm. ++ * ++ * Returns: 0 if success else < 0 error code + */ + int kvm_get_vcpu_events(ARMCPU *cpu); + +@@ -156,6 +158,8 @@ int kvm_get_vcpu_events(ARMCPU *cpu); + * @cpu: ARMCPU + * + * Put VCPU related state to kvm. ++ * ++ * Returns: 0 if success else < 0 error code + */ + int kvm_put_vcpu_events(ARMCPU *cpu); + +@@ -205,10 +209,12 @@ typedef struct ARMHostCPUFeatures { + + /** + * kvm_arm_get_host_cpu_features: +- * @ahcc: ARMHostCPUClass to fill in ++ * @ahcf: ARMHostCPUClass to fill in + * + * Probe the capabilities of the host kernel's preferred CPU and fill + * in the ARMHostCPUClass struct accordingly. ++ * ++ * Returns true on success and false otherwise. + */ + bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); + +@@ -242,7 +248,7 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + bool kvm_arm_aarch32_supported(CPUState *cs); + + /** +- * bool kvm_arm_pmu_supported: ++ * kvm_arm_pmu_supported: + * @cs: CPUState + * + * Returns: true if the KVM VCPU can enable its PMU +@@ -251,7 +257,7 @@ bool kvm_arm_aarch32_supported(CPUState *cs); + bool kvm_arm_pmu_supported(CPUState *cs); + + /** +- * bool kvm_arm_sve_supported: ++ * kvm_arm_sve_supported: + * @cs: CPUState + * + * Returns true if the KVM VCPU can enable SVE and false otherwise. +@@ -259,26 +265,30 @@ bool kvm_arm_pmu_supported(CPUState *cs); + bool kvm_arm_sve_supported(CPUState *cs); + + /** +- * kvm_arm_get_max_vm_ipa_size - Returns the number of bits in the +- * IPA address space supported by KVM +- * ++ * kvm_arm_get_max_vm_ipa_size: + * @ms: Machine state handle ++ * ++ * Returns the number of bits in the IPA address space supported by KVM + */ + int kvm_arm_get_max_vm_ipa_size(MachineState *ms); + + /** +- * kvm_arm_sync_mpstate_to_kvm ++ * kvm_arm_sync_mpstate_to_kvm: + * @cpu: ARMCPU + * + * If supported set the KVM MP_STATE based on QEMU's model. ++ * ++ * Returns 0 on success and -1 on failure. + */ + int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + + /** +- * kvm_arm_sync_mpstate_to_qemu ++ * kvm_arm_sync_mpstate_to_qemu: + * @cpu: ARMCPU + * + * If supported get the MP_STATE from KVM and store in QEMU's model. ++ * ++ * Returns 0 on success and aborts on failure. + */ + int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + +@@ -292,7 +302,8 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + + static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + { +- /* This should never actually be called in the "not KVM" case, ++ /* ++ * This should never actually be called in the "not KVM" case, + * but set up the fields to indicate an error anyway. + */ + cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; +@@ -377,23 +388,20 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit); + * + * Return: TRUE if any hardware breakpoints in use. + */ +- + bool kvm_arm_hw_debug_active(CPUState *cs); + + /** + * kvm_arm_copy_hw_debug_data: +- * + * @ptr: kvm_guest_debug_arch structure + * + * Copy the architecture specific debug registers into the + * kvm_guest_debug ioctl structure. + */ + struct kvm_guest_debug_arch; +- + void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr); + + /** +- * its_class_name ++ * its_class_name: + * + * Return the ITS class name to use depending on whether KVM acceleration + * and KVM CAP_SIGNAL_MSI are supported +-- +1.8.3.1 + diff --git a/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch b/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch new file mode 100644 index 0000000..36c0f1a --- /dev/null +++ b/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch @@ -0,0 +1,60 @@ +From 2740a84fe798ade5c1ce725d65cdaffb255da47c Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:11 +0000 +Subject: [PATCH 09/15] target/arm/kvm64: kvm64 cpus have timer registers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-3-drjones@redhat.com> +Patchwork-id: 93621 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/5] target/arm/kvm64: kvm64 cpus have timer registers +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + target/arm/kvm64: kvm64 cpus have timer registers + + Add the missing GENERIC_TIMER feature to kvm64 cpus. + + We don't currently use these registers when KVM is enabled, but it's + probably best we add the feature flag for consistency and potential + future use. There's also precedent, as we add the PMU feature flag to + KVM enabled guests, even though we don't use those registers either. + + This change was originally posted as a hunk of a different, never + merged patch from Bijan Mottahedeh. + + Signed-off-by: Andrew Jones + Reviewed-by: Richard Henderson + Message-id: 20200120101023.16030-4-drjones@redhat.com + Signed-off-by: Peter Maydell + +(cherry picked from commit 65caa415487f4a6e265105446c6ef8f56bb0aa70) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/kvm64.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index e2da756..666a81a 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -605,6 +605,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + set_feature(&features, ARM_FEATURE_NEON); + set_feature(&features, ARM_FEATURE_AARCH64); + set_feature(&features, ARM_FEATURE_PMU); ++ set_feature(&features, ARM_FEATURE_GENERIC_TIMER); + + ahcf->features = features; + +-- +1.8.3.1 + diff --git a/kvm-tests-arm-cpu-features-Check-feature-default-values.patch b/kvm-tests-arm-cpu-features-Check-feature-default-values.patch new file mode 100644 index 0000000..e8a48bf --- /dev/null +++ b/kvm-tests-arm-cpu-features-Check-feature-default-values.patch @@ -0,0 +1,106 @@ +From 323889aa2182bf39df10f1caf43f22daea2d7d37 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Fri, 31 Jan 2020 14:23:12 +0000 +Subject: [PATCH 10/15] tests/arm-cpu-features: Check feature default values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200131142314.13175-4-drjones@redhat.com> +Patchwork-id: 93626 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/5] tests/arm-cpu-features: Check feature default values +Bugzilla: 1647366 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 + +Author: Andrew Jones +Date: Thu, 30 Jan 2020 16:02:06 +0000 + + tests/arm-cpu-features: Check feature default values + + If we know what the default value should be then we can test for + that as well as the feature existence. + + Signed-off-by: Andrew Jones + Reviewed-by: Richard Henderson + Message-id: 20200120101023.16030-5-drjones@redhat.com + Signed-off-by: Peter Maydell + +(cherry picked from commit 789a35efb583464f9fcd5d871a7fd6164318bb91) +Signed-off-by: Danilo C. L. de Paula +--- + tests/arm-cpu-features.c | 37 ++++++++++++++++++++++++++++--------- + 1 file changed, 28 insertions(+), 9 deletions(-) + +diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c +index 6e99aa9..89285ca 100644 +--- a/tests/arm-cpu-features.c ++++ b/tests/arm-cpu-features.c +@@ -159,6 +159,25 @@ static bool resp_get_feature(QDict *resp, const char *feature) + qobject_unref(_resp); \ + }) + ++#define assert_feature(qts, cpu_type, feature, expected_value) \ ++({ \ ++ QDict *_resp, *_props; \ ++ \ ++ _resp = do_query_no_props(qts, cpu_type); \ ++ g_assert(_resp); \ ++ g_assert(resp_has_props(_resp)); \ ++ _props = resp_get_props(_resp); \ ++ g_assert(qdict_get(_props, feature)); \ ++ g_assert(qdict_get_bool(_props, feature) == (expected_value)); \ ++ qobject_unref(_resp); \ ++}) ++ ++#define assert_has_feature_enabled(qts, cpu_type, feature) \ ++ assert_feature(qts, cpu_type, feature, true) ++ ++#define assert_has_feature_disabled(qts, cpu_type, feature) \ ++ assert_feature(qts, cpu_type, feature, false) ++ + static void assert_type_full(QTestState *qts) + { + const char *error; +@@ -405,16 +424,16 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ +- assert_has_feature(qts, "max", "pmu"); +- assert_has_feature(qts, "cortex-a15", "pmu"); ++ assert_has_feature_enabled(qts, "max", "pmu"); ++ assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); + + if (g_str_equal(qtest_get_arch(), "aarch64")) { +- assert_has_feature(qts, "max", "aarch64"); +- assert_has_feature(qts, "max", "sve"); +- assert_has_feature(qts, "max", "sve128"); +- assert_has_feature(qts, "cortex-a57", "pmu"); +- assert_has_feature(qts, "cortex-a57", "aarch64"); ++ assert_has_feature_enabled(qts, "max", "aarch64"); ++ assert_has_feature_enabled(qts, "max", "sve"); ++ assert_has_feature_enabled(qts, "max", "sve128"); ++ assert_has_feature_enabled(qts, "cortex-a57", "pmu"); ++ assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + + sve_tests_default(qts, "max"); + +@@ -451,8 +470,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + QDict *resp; + char *error; + +- assert_has_feature(qts, "host", "aarch64"); +- assert_has_feature(qts, "host", "pmu"); ++ assert_has_feature_enabled(qts, "host", "aarch64"); ++ assert_has_feature_enabled(qts, "host", "pmu"); + + assert_error(qts, "cortex-a15", + "We cannot guarantee the CPU type 'cortex-a15' works " +-- +1.8.3.1 + diff --git a/kvm-tpm-ppi-page-align-PPI-RAM.patch b/kvm-tpm-ppi-page-align-PPI-RAM.patch new file mode 100644 index 0000000..32c971d --- /dev/null +++ b/kvm-tpm-ppi-page-align-PPI-RAM.patch @@ -0,0 +1,58 @@ +From 7cb1c5e1416de9a09180f0930d2a216c77e8cdbd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 30 Jan 2020 16:01:10 +0000 +Subject: [PATCH 07/15] tpm-ppi: page-align PPI RAM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200130160110.126086-1-marcandre.lureau@redhat.com> +Patchwork-id: 93600 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] tpm-ppi: page-align PPI RAM +Bugzilla: 1787444 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +post-copy migration fails on destination with error such as: +2019-12-26T10:22:44.714644Z qemu-kvm: ram_block_discard_range: +Unaligned start address: 0x559d2afae9a0 + +Use qemu_memalign() to constrain the PPI RAM memory alignment. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Berger +Signed-off-by: Stefan Berger +Message-id: 20200103074000.1006389-3-marcandre.lureau@redhat.com + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1787444 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=26122940 + +(cherry picked from commit 71e415c8a75c130875f14d6b2136825789feb297) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + hw/tpm/tpm_ppi.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c +index ff31459..6d9c1a3 100644 +--- a/hw/tpm/tpm_ppi.c ++++ b/hw/tpm/tpm_ppi.c +@@ -43,7 +43,8 @@ void tpm_ppi_reset(TPMPPI *tpmppi) + void tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m, + hwaddr addr, Object *obj) + { +- tpmppi->buf = g_malloc0(HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); ++ tpmppi->buf = qemu_memalign(qemu_real_host_page_size, ++ HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); + memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", + TPM_PPI_ADDR_SIZE, tpmppi->buf); + vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); +-- +1.8.3.1 + diff --git a/kvm-trace-update-qemu-trace-stap-to-Python-3.patch b/kvm-trace-update-qemu-trace-stap-to-Python-3.patch new file mode 100644 index 0000000..c49aecd --- /dev/null +++ b/kvm-trace-update-qemu-trace-stap-to-Python-3.patch @@ -0,0 +1,82 @@ +From e7cdcd1e39c4c030a32c9e8ef79316eae8555bc8 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 16 Jan 2020 17:52:48 +0000 +Subject: [PATCH 04/15] trace: update qemu-trace-stap to Python 3 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +Message-id: <20200116175248.286556-2-stefanha@redhat.com> +Patchwork-id: 93365 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] trace: update qemu-trace-stap to Python 3 +Bugzilla: 1787395 +RH-Acked-by: John Snow +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Dr. David Alan Gilbert + +qemu-trace-stap does not support Python 3 yet: + + $ scripts/qemu-trace-stap list path/to/qemu-system-x86_64 + Traceback (most recent call last): + File "scripts/qemu-trace-stap", line 175, in + main() + File "scripts/qemu-trace-stap", line 171, in main + args.func(args) + File "scripts/qemu-trace-stap", line 118, in cmd_list + print_probes(args.verbose, "*") + File "scripts/qemu-trace-stap", line 114, in print_probes + if line.startswith(prefix): + TypeError: startswith first arg must be bytes or a tuple of bytes, not str + +Now that QEMU requires Python 3.5 or later we can switch to pure Python +3. Use Popen()'s universal_newlines=True argument to treat stdout as +text instead of binary. + +Fixes: 62dd1048c0bd ("trace: add ability to do simple printf logging via systemtap") +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1787395 +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Message-id: 20200107112438.383958-1-stefanha@redhat.com +Message-Id: <20200107112438.383958-1-stefanha@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 3f0097169bb60268cc5dda0c5ea47c31ab57b22f) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + scripts/qemu-trace-stap | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap +index 91d1051..90527eb 100755 +--- a/scripts/qemu-trace-stap ++++ b/scripts/qemu-trace-stap +@@ -1,4 +1,4 @@ +-#!/usr/bin/python ++#!/usr/bin/env python3 + # -*- python -*- + # + # Copyright (C) 2019 Red Hat, Inc +@@ -18,8 +18,6 @@ + # You should have received a copy of the GNU General Public License + # along with this program; if not, see . + +-from __future__ import print_function +- + import argparse + import copy + import os.path +@@ -104,7 +102,9 @@ def cmd_list(args): + if verbose: + print("Listing probes with name '%s'" % script) + proc = subprocess.Popen(["stap", "-l", script], +- stdout=subprocess.PIPE, env=tapset_env(tapsets)) ++ stdout=subprocess.PIPE, ++ universal_newlines=True, ++ env=tapset_env(tapsets)) + out, err = proc.communicate() + if proc.returncode != 0: + print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary)) +-- +1.8.3.1 + diff --git a/kvm-xics-Don-t-deassert-outputs.patch b/kvm-xics-Don-t-deassert-outputs.patch new file mode 100644 index 0000000..08ed724 --- /dev/null +++ b/kvm-xics-Don-t-deassert-outputs.patch @@ -0,0 +1,52 @@ +From 99b6ee4b7f63ea49e5b73f61bbf68f67252f27da Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Tue, 21 Jan 2020 05:16:12 +0000 +Subject: [PATCH 02/15] xics: Don't deassert outputs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Gibson +Message-id: <20200121051613.388295-3-dgibson@redhat.com> +Patchwork-id: 93430 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] xics: Don't deassert outputs +Bugzilla: 1776638 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: Greg Kurz + +The correct way to do this is to deassert the input pins on the CPU side. +This is the case since a previous change. + +Signed-off-by: Greg Kurz +Message-Id: <157548862298.3650476.1228720391270249433.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 4febcdd88f08422a66a1aa0dc55e1472abed3c4b) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/xics.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/intc/xics.c b/hw/intc/xics.c +index e7ac9ba..72c5dca 100644 +--- a/hw/intc/xics.c ++++ b/hw/intc/xics.c +@@ -289,9 +289,6 @@ void icp_reset(ICPState *icp) + icp->pending_priority = 0xff; + icp->mfrr = 0xff; + +- /* Make all outputs are deasserted */ +- qemu_set_irq(icp->output, 0); +- + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 7ecca51..388d2d5 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 8%{?dist} +Release: 9%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -373,6 +373,34 @@ Patch145: kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch Patch146: kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch # For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) Patch147: kvm-virtiofsd-add-some-options-to-the-help-message.patch +# For bz#1776638 - Guest failed to boot up after system_reset 20 times +Patch148: kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch +# For bz#1776638 - Guest failed to boot up after system_reset 20 times +Patch149: kvm-xics-Don-t-deassert-outputs.patch +# For bz#1776638 - Guest failed to boot up after system_reset 20 times +Patch150: kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch +# For bz#1787395 - qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str +Patch151: kvm-trace-update-qemu-trace-stap-to-Python-3.patch +# For bz#1794503 - CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0] +Patch153: kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch +# For bz#1787444 - Broken postcopy migration with vTPM device +Patch154: kvm-tpm-ppi-page-align-PPI-RAM.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch155: kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch156: kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch157: kvm-tests-arm-cpu-features-Check-feature-default-values.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch158: kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch +# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature +Patch159: kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch +# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs +Patch160: kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch +# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs +Patch161: kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch +# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs +Patch162: kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch BuildRequires: wget BuildRequires: rpm-build @@ -962,9 +990,6 @@ rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simplet rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp -# Mangle qemu-kvm-stap -sed -i -e '1 s/python/python3/' $RPM_BUILD_ROOT%{_bindir}/qemu-trace-stap - # Install simpletrace install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py # Avoid ambiguous 'python' interpreter name @@ -1309,6 +1334,35 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 4.2.0-9.el8 +- kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch [bz#1776638] +- kvm-xics-Don-t-deassert-outputs.patch [bz#1776638] +- kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch [bz#1776638] +- kvm-trace-update-qemu-trace-stap-to-Python-3.patch [bz#1787395] +- kvm-redhat-Remove-redundant-fix-for-qemu-trace-stap.patch [bz#1787395] +- kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794503] +- kvm-tpm-ppi-page-align-PPI-RAM.patch [bz#1787444] +- kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch [bz#1647366] +- kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch [bz#1647366] +- kvm-tests-arm-cpu-features-Check-feature-default-values.patch [bz#1647366] +- kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch [bz#1647366] +- kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch [bz#1647366] +- kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch [bz#1529231] +- kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch [bz#1529231] +- kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch [bz#1529231] +- Resolves: bz#1529231 + ([q35] VM hangs after migration with 200 vCPUs) +- Resolves: bz#1647366 + (aarch64: Add support for the kvm-no-adjvtime ARM CPU feature) +- Resolves: bz#1776638 + (Guest failed to boot up after system_reset 20 times) +- Resolves: bz#1787395 + (qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str) +- Resolves: bz#1787444 + (Broken postcopy migration with vTPM device) +- Resolves: bz#1794503 + (CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0]) + * Fri Jan 31 2020 Miroslav Rezanina - 4.2.0-8.el8 - kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084] - kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041] From fda7fbcd8dff8e3743df2bb53a7fd01cba7137c5 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 14 Feb 2020 03:03:11 +0000 Subject: [PATCH 065/195] * Fri Feb 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-10.el8 - kvm-i386-Resolve-CPU-models-to-v1-by-default.patch [bz#1779078 bz#1787291 bz#1779078 bz#1779078] - kvm-iotests-Support-job-complete-in-run_job.patch [bz#1781637] - kvm-iotests-Create-VM.blockdev_create.patch [bz#1781637] - kvm-block-Activate-recursively-even-for-already-active-n.patch [bz#1781637] - kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch [bz#1781637] - kvm-iotests-Test-external-snapshot-with-VM-state.patch [bz#1781637] - kvm-iotests.py-Let-wait_migration-wait-even-more.patch [bz#1781637] - kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] - kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] - kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] - kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] - kvm-backup-top-Begin-drain-earlier.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] - kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] - kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] - kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] - kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] - kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch [bz#1801320] - kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch [bz#1801320] - Resolves: bz#1745606 (Qemu hang when do incremental live backup in transaction mode without bitmap) - Resolves: bz#1746217 (Src qemu hang when do storage vm migration during guest installation) - Resolves: bz#1773517 (Src qemu hang when do storage vm migration with dataplane enable) - Resolves: bz#1779036 (Qemu coredump when do snapshot in transaction mode with one snapshot path not exist) - Resolves: bz#1779078 (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm)) - Resolves: bz#1781637 (qemu crashed when do mem and disk snapshot) - Resolves: bz#1782111 (Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable)) - Resolves: bz#1782175 (Qemu core dump when add persistent bitmap(data plane enable)) - Resolves: bz#1783965 (Qemu core dump when do backup with sync: bitmap and no bitmap provided) - Resolves: bz#1787291 (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z]) - Resolves: bz#1801320 (aarch64: backport query-cpu-model-expansion and adjvtime document fixes) --- kvm-backup-top-Begin-drain-earlier.patch | 56 +++ ...ecursively-even-for-already-active-n.patch | 116 +++++ ...-Don-t-acquire-context-while-droppin.patch | 130 ++++++ ...-AioContext-on-dirty-bitmap-function.patch | 176 ++++++++ ...bs-to-the-proper-context-on-snapshot.patch | 107 +++++ ...ing-style-issues-in-drive_backup_pre.patch | 62 +++ ...drv_try_set_aio_context-context-requ.patch | 204 +++++++++ ...mp_blockdev_backup-and-blockdev-back.patch | 144 ++++++ ...mp_drive_backup-and-drive-backup-tra.patch | 419 ++++++++++++++++++ ...tures-Make-kvm-no-adjvtime-comment-c.patch | 56 +++ ...ow-using-qdev-ID-for-qemu-io-command.patch | 100 +++++ ...-Resolve-CPU-models-to-v1-by-default.patch | 95 ++++ kvm-iotests-Create-VM.blockdev_create.patch | 59 +++ ...ests-Support-job-complete-in-run_job.patch | 46 ++ ...Test-external-snapshot-with-VM-state.patch | 189 ++++++++ ...dling-of-AioContexts-with-some-block.patch | 322 ++++++++++++++ ...py-Let-wait_migration-wait-even-more.patch | 123 +++++ ...or-query-cpu-model-expansion-crashed.patch | 81 ++++ qemu-kvm.spec | 137 +++++- 19 files changed, 2621 insertions(+), 1 deletion(-) create mode 100644 kvm-backup-top-Begin-drain-earlier.patch create mode 100644 kvm-block-Activate-recursively-even-for-already-active-n.patch create mode 100644 kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch create mode 100644 kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch create mode 100644 kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch create mode 100644 kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch create mode 100644 kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch create mode 100644 kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch create mode 100644 kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch create mode 100644 kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch create mode 100644 kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch create mode 100644 kvm-i386-Resolve-CPU-models-to-v1-by-default.patch create mode 100644 kvm-iotests-Create-VM.blockdev_create.patch create mode 100644 kvm-iotests-Support-job-complete-in-run_job.patch create mode 100644 kvm-iotests-Test-external-snapshot-with-VM-state.patch create mode 100644 kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch create mode 100644 kvm-iotests.py-Let-wait_migration-wait-even-more.patch create mode 100644 kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch diff --git a/kvm-backup-top-Begin-drain-earlier.patch b/kvm-backup-top-Begin-drain-earlier.patch new file mode 100644 index 0000000..ef289b7 --- /dev/null +++ b/kvm-backup-top-Begin-drain-earlier.patch @@ -0,0 +1,56 @@ +From bc78ee07bf400cbff0021367e05d308870471710 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:45 +0000 +Subject: [PATCH 12/18] backup-top: Begin drain earlier + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-6-slp@redhat.com> +Patchwork-id: 93757 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/9] backup-top: Begin drain earlier +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +From: Max Reitz + +When dropping backup-top, we need to drain the node before freeing the +BlockCopyState. Otherwise, requests may still be in flight and then the +assertion in shres_destroy() will fail. + +(This becomes visible in intermittent failure of 056.) + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191219182638.104621-1-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 503ca1262bab2c11c533a4816d1ff4297d4f58a6) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + block/backup-top.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/backup-top.c b/block/backup-top.c +index 7cdb1f8..818d3f2 100644 +--- a/block/backup-top.c ++++ b/block/backup-top.c +@@ -257,12 +257,12 @@ void bdrv_backup_top_drop(BlockDriverState *bs) + BDRVBackupTopState *s = bs->opaque; + AioContext *aio_context = bdrv_get_aio_context(bs); + +- block_copy_state_free(s->bcs); +- + aio_context_acquire(aio_context); + + bdrv_drained_begin(bs); + ++ block_copy_state_free(s->bcs); ++ + s->active = false; + bdrv_child_refresh_perms(bs, bs->backing, &error_abort); + bdrv_replace_node(bs, backing_bs(bs), &error_abort); +-- +1.8.3.1 + diff --git a/kvm-block-Activate-recursively-even-for-already-active-n.patch b/kvm-block-Activate-recursively-even-for-already-active-n.patch new file mode 100644 index 0000000..d6cad06 --- /dev/null +++ b/kvm-block-Activate-recursively-even-for-already-active-n.patch @@ -0,0 +1,116 @@ +From 0ef6691ce8964bb2bbd677756c4e594793ca3ad8 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:01 +0000 +Subject: [PATCH 04/18] block: Activate recursively even for already active + nodes + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-4-kwolf@redhat.com> +Patchwork-id: 93749 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] block: Activate recursively even for already active nodes +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +bdrv_invalidate_cache_all() assumes that all nodes in a given subtree +are either active or inactive when it starts. Therefore, as soon as it +arrives at an already active node, it stops. + +However, this assumption is wrong. For example, it's possible to take a +snapshot of an inactive node, which results in an active overlay over an +inactive backing file. The active overlay is probably also the root node +of an inactive BlockBackend (blk->disable_perm == true). + +In this case, bdrv_invalidate_cache_all() does not need to do anything +to activate the overlay node, but it still needs to recurse into the +children and the parents to make sure that after returning success, +really everything is activated. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit 7bb4941ace471fc7dd6ded4749b95b9622baa6ed) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 50 ++++++++++++++++++++++++-------------------------- + 1 file changed, 24 insertions(+), 26 deletions(-) + +diff --git a/block.c b/block.c +index 473eb6e..2e5e8b6 100644 +--- a/block.c ++++ b/block.c +@@ -5335,10 +5335,6 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, + return; + } + +- if (!(bs->open_flags & BDRV_O_INACTIVE)) { +- return; +- } +- + QLIST_FOREACH(child, &bs->children, next) { + bdrv_co_invalidate_cache(child->bs, &local_err); + if (local_err) { +@@ -5360,34 +5356,36 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, + * just keep the extended permissions for the next time that an activation + * of the image is tried. + */ +- bs->open_flags &= ~BDRV_O_INACTIVE; +- bdrv_get_cumulative_perm(bs, &perm, &shared_perm); +- ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); +- if (ret < 0) { +- bs->open_flags |= BDRV_O_INACTIVE; +- error_propagate(errp, local_err); +- return; +- } +- bdrv_set_perm(bs, perm, shared_perm); +- +- if (bs->drv->bdrv_co_invalidate_cache) { +- bs->drv->bdrv_co_invalidate_cache(bs, &local_err); +- if (local_err) { ++ if (bs->open_flags & BDRV_O_INACTIVE) { ++ bs->open_flags &= ~BDRV_O_INACTIVE; ++ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); ++ ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); ++ if (ret < 0) { + bs->open_flags |= BDRV_O_INACTIVE; + error_propagate(errp, local_err); + return; + } +- } ++ bdrv_set_perm(bs, perm, shared_perm); + +- FOR_EACH_DIRTY_BITMAP(bs, bm) { +- bdrv_dirty_bitmap_skip_store(bm, false); +- } ++ if (bs->drv->bdrv_co_invalidate_cache) { ++ bs->drv->bdrv_co_invalidate_cache(bs, &local_err); ++ if (local_err) { ++ bs->open_flags |= BDRV_O_INACTIVE; ++ error_propagate(errp, local_err); ++ return; ++ } ++ } + +- ret = refresh_total_sectors(bs, bs->total_sectors); +- if (ret < 0) { +- bs->open_flags |= BDRV_O_INACTIVE; +- error_setg_errno(errp, -ret, "Could not refresh total sector count"); +- return; ++ FOR_EACH_DIRTY_BITMAP(bs, bm) { ++ bdrv_dirty_bitmap_skip_store(bm, false); ++ } ++ ++ ret = refresh_total_sectors(bs, bs->total_sectors); ++ if (ret < 0) { ++ bs->open_flags |= BDRV_O_INACTIVE; ++ error_setg_errno(errp, -ret, "Could not refresh total sector count"); ++ return; ++ } + } + + QLIST_FOREACH(parent, &bs->parents, next_parent) { +-- +1.8.3.1 + diff --git a/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch b/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch new file mode 100644 index 0000000..45f506c --- /dev/null +++ b/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch @@ -0,0 +1,130 @@ +From aefff389c4d11bd69180db7177135c4645a9b1bd Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:46 +0000 +Subject: [PATCH 13/18] block/backup-top: Don't acquire context while dropping + top + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-7-slp@redhat.com> +Patchwork-id: 93759 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/9] block/backup-top: Don't acquire context while dropping top +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +All paths that lead to bdrv_backup_top_drop(), except for the call +from backup_clean(), imply that the BDS AioContext has already been +acquired, so doing it there too can potentially lead to QEMU hanging +on AIO_WAIT_WHILE(). + +An easy way to trigger this situation is by issuing a two actions +transaction, with a proper and a bogus blockdev-backup, so the second +one will trigger a rollback. This will trigger a hang with an stack +trace like this one: + + #0 0x00007fb680c75016 in __GI_ppoll (fds=0x55e74580f7c0, nfds=1, timeout=, + timeout@entry=0x0, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39 + #1 0x000055e743386e09 in ppoll (__ss=0x0, __timeout=0x0, __nfds=, __fds=) + at /usr/include/bits/poll2.h:77 + #2 0x000055e743386e09 in qemu_poll_ns + (fds=, nfds=, timeout=) at util/qemu-timer.c:336 + #3 0x000055e743388dc4 in aio_poll (ctx=0x55e7458925d0, blocking=blocking@entry=true) + at util/aio-posix.c:669 + #4 0x000055e743305dea in bdrv_flush (bs=bs@entry=0x55e74593c0d0) at block/io.c:2878 + #5 0x000055e7432be58e in bdrv_close (bs=0x55e74593c0d0) at block.c:4017 + #6 0x000055e7432be58e in bdrv_delete (bs=) at block.c:4262 + #7 0x000055e7432be58e in bdrv_unref (bs=bs@entry=0x55e74593c0d0) at block.c:5644 + #8 0x000055e743316b9b in bdrv_backup_top_drop (bs=bs@entry=0x55e74593c0d0) at block/backup-top.c:273 + #9 0x000055e74331461f in backup_job_create + (job_id=0x0, bs=bs@entry=0x55e7458d5820, target=target@entry=0x55e74589f640, speed=0, sync_mode=MIRROR_SYNC_MODE_FULL, sync_bitmap=sync_bitmap@entry=0x0, bitmap_mode=BITMAP_SYNC_MODE_ON_SUCCESS, compress=false, filter_node_name=0x0, on_source_error=BLOCKDEV_ON_ERROR_REPORT, on_target_error=BLOCKDEV_ON_ERROR_REPORT, creation_flags=0, cb=0x0, opaque=0x0, txn=0x0, errp=0x7ffddfd1efb0) at block/backup.c:478 + #10 0x000055e74315bc52 in do_backup_common + (backup=backup@entry=0x55e746c066d0, bs=bs@entry=0x55e7458d5820, target_bs=target_bs@entry=0x55e74589f640, aio_context=aio_context@entry=0x55e7458a91e0, txn=txn@entry=0x0, errp=errp@entry=0x7ffddfd1efb0) + at blockdev.c:3580 + #11 0x000055e74315c37c in do_blockdev_backup + (backup=backup@entry=0x55e746c066d0, txn=0x0, errp=errp@entry=0x7ffddfd1efb0) + at /usr/src/debug/qemu-kvm-4.2.0-2.module+el8.2.0+5135+ed3b2489.x86_64/./qapi/qapi-types-block-core.h:1492 + #12 0x000055e74315c449 in blockdev_backup_prepare (common=0x55e746a8de90, errp=0x7ffddfd1f018) + at blockdev.c:1885 + #13 0x000055e743160152 in qmp_transaction + (dev_list=, has_props=, props=0x55e7467fe2c0, errp=errp@entry=0x7ffddfd1f088) at blockdev.c:2340 + #14 0x000055e743287ff5 in qmp_marshal_transaction + (args=, ret=, errp=0x7ffddfd1f0f8) + at qapi/qapi-commands-transaction.c:44 + #15 0x000055e74333de6c in do_qmp_dispatch + (errp=0x7ffddfd1f0f0, allow_oob=, request=, cmds=0x55e743c28d60 ) at qapi/qmp-dispatch.c:132 + #16 0x000055e74333de6c in qmp_dispatch + (cmds=0x55e743c28d60 , request=, allow_oob=) + at qapi/qmp-dispatch.c:175 + #17 0x000055e74325c061 in monitor_qmp_dispatch (mon=0x55e745908030, req=) + at monitor/qmp.c:145 + #18 0x000055e74325c6fa in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:234 + #19 0x000055e743385866 in aio_bh_call (bh=0x55e745807ae0) at util/async.c:117 + #20 0x000055e743385866 in aio_bh_poll (ctx=ctx@entry=0x55e7458067a0) at util/async.c:117 + #21 0x000055e743388c54 in aio_dispatch (ctx=0x55e7458067a0) at util/aio-posix.c:459 + #22 0x000055e743385742 in aio_ctx_dispatch + (source=, callback=, user_data=) at util/async.c:260 + #23 0x00007fb68543e67d in g_main_dispatch (context=0x55e745893a40) at gmain.c:3176 + #24 0x00007fb68543e67d in g_main_context_dispatch (context=context@entry=0x55e745893a40) at gmain.c:3829 + #25 0x000055e743387d08 in glib_pollfds_poll () at util/main-loop.c:219 + #26 0x000055e743387d08 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #27 0x000055e743387d08 in main_loop_wait (nonblocking=) at util/main-loop.c:518 + #28 0x000055e74316a3c1 in main_loop () at vl.c:1828 + #29 0x000055e743016a72 in main (argc=, argv=, envp=) + at vl.c:4504 + +Fix this by not acquiring the AioContext there, and ensuring all paths +leading to it have it already acquired (backup_clean()). + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782111 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 0abf2581717a19d9749d5c2ff8acd0ac203452c2) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + block/backup-top.c | 5 ----- + block/backup.c | 3 +++ + 2 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/block/backup-top.c b/block/backup-top.c +index 818d3f2..b8d863f 100644 +--- a/block/backup-top.c ++++ b/block/backup-top.c +@@ -255,9 +255,6 @@ append_failed: + void bdrv_backup_top_drop(BlockDriverState *bs) + { + BDRVBackupTopState *s = bs->opaque; +- AioContext *aio_context = bdrv_get_aio_context(bs); +- +- aio_context_acquire(aio_context); + + bdrv_drained_begin(bs); + +@@ -271,6 +268,4 @@ void bdrv_backup_top_drop(BlockDriverState *bs) + bdrv_drained_end(bs); + + bdrv_unref(bs); +- +- aio_context_release(aio_context); + } +diff --git a/block/backup.c b/block/backup.c +index cf62b1a..1383e21 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -135,8 +135,11 @@ static void backup_abort(Job *job) + static void backup_clean(Job *job) + { + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); ++ AioContext *aio_context = bdrv_get_aio_context(s->backup_top); + ++ aio_context_acquire(aio_context); + bdrv_backup_top_drop(s->backup_top); ++ aio_context_release(aio_context); + } + + void backup_do_checkpoint(BlockJob *job, Error **errp) +-- +1.8.3.1 + diff --git a/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch b/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch new file mode 100644 index 0000000..9a69130 --- /dev/null +++ b/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch @@ -0,0 +1,176 @@ +From dc2654f2319ad6c379e0ba10be143726c6f0e9e0 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:47 +0000 +Subject: [PATCH 14/18] blockdev: Acquire AioContext on dirty bitmap functions + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-8-slp@redhat.com> +Patchwork-id: 93760 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 7/9] blockdev: Acquire AioContext on dirty bitmap functions +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Dirty map addition and removal functions are not acquiring to BDS +AioContext, while they may call to code that expects it to be +acquired. + +This may trigger a crash with a stack trace like this one: + + #0 0x00007f0ef146370f in __GI_raise (sig=sig@entry=6) + at ../sysdeps/unix/sysv/linux/raise.c:50 + #1 0x00007f0ef144db25 in __GI_abort () at abort.c:79 + #2 0x0000565022294dce in error_exit + (err=, msg=msg@entry=0x56502243a730 <__func__.16350> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36 + #3 0x00005650222950ba in qemu_mutex_unlock_impl + (mutex=mutex@entry=0x5650244b0240, file=file@entry=0x565022439adf "util/async.c", line=line@entry=526) at util/qemu-thread-posix.c:108 + #4 0x0000565022290029 in aio_context_release + (ctx=ctx@entry=0x5650244b01e0) at util/async.c:526 + #5 0x000056502221cd08 in bdrv_can_store_new_dirty_bitmap + (bs=bs@entry=0x5650244dc820, name=name@entry=0x56502481d360 "bitmap1", granularity=granularity@entry=65536, errp=errp@entry=0x7fff22831718) + at block/dirty-bitmap.c:542 + #6 0x000056502206ae53 in qmp_block_dirty_bitmap_add + (errp=0x7fff22831718, disabled=false, has_disabled=, persistent=, has_persistent=true, granularity=65536, has_granularity=, name=0x56502481d360 "bitmap1", node=) at blockdev.c:2894 + #7 0x000056502206ae53 in qmp_block_dirty_bitmap_add + (node=, name=0x56502481d360 "bitmap1", has_granularity=, granularity=, has_persistent=true, persistent=, has_disabled=false, disabled=false, errp=0x7fff22831718) at blockdev.c:2856 + #8 0x00005650221847a3 in qmp_marshal_block_dirty_bitmap_add + (args=, ret=, errp=0x7fff22831798) + at qapi/qapi-commands-block-core.c:651 + #9 0x0000565022247e6c in do_qmp_dispatch + (errp=0x7fff22831790, allow_oob=, request=, cmds=0x565022b32d60 ) at qapi/qmp-dispatch.c:132 + #10 0x0000565022247e6c in qmp_dispatch + (cmds=0x565022b32d60 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 + #11 0x0000565022166061 in monitor_qmp_dispatch + (mon=0x56502450faa0, req=) at monitor/qmp.c:145 + #12 0x00005650221666fa in monitor_qmp_bh_dispatcher + (data=) at monitor/qmp.c:234 + #13 0x000056502228f866 in aio_bh_call (bh=0x56502440eae0) + at util/async.c:117 + #14 0x000056502228f866 in aio_bh_poll (ctx=ctx@entry=0x56502440d7a0) + at util/async.c:117 + #15 0x0000565022292c54 in aio_dispatch (ctx=0x56502440d7a0) + at util/aio-posix.c:459 + #16 0x000056502228f742 in aio_ctx_dispatch + (source=, callback=, user_data=) at util/async.c:260 + #17 0x00007f0ef5ce667d in g_main_dispatch (context=0x56502449aa40) + at gmain.c:3176 + #18 0x00007f0ef5ce667d in g_main_context_dispatch + (context=context@entry=0x56502449aa40) at gmain.c:3829 + #19 0x0000565022291d08 in glib_pollfds_poll () at util/main-loop.c:219 + #20 0x0000565022291d08 in os_host_main_loop_wait + (timeout=) at util/main-loop.c:242 + #21 0x0000565022291d08 in main_loop_wait (nonblocking=) + at util/main-loop.c:518 + #22 0x00005650220743c1 in main_loop () at vl.c:1828 + #23 0x0000565021f20a72 in main + (argc=, argv=, envp=) + at vl.c:4504 + +Fix this by acquiring the AioContext at qmp_block_dirty_bitmap_add() +and qmp_block_dirty_bitmap_add(). + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782175 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 91005a495e228ebd7e5e173cd18f952450eef82d) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 22 ++++++++++++++++++---- + 1 file changed, 18 insertions(+), 4 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 1dacbc2..d4ef6cd 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2984,6 +2984,7 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; ++ AioContext *aio_context; + + if (!name || name[0] == '\0') { + error_setg(errp, "Bitmap name cannot be empty"); +@@ -2995,11 +2996,14 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + return; + } + ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); ++ + if (has_granularity) { + if (granularity < 512 || !is_power_of_2(granularity)) { + error_setg(errp, "Granularity must be power of 2 " + "and at least 512"); +- return; ++ goto out; + } + } else { + /* Default to cluster size, if available: */ +@@ -3017,12 +3021,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + if (persistent && + !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) + { +- return; ++ goto out; + } + + bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); + if (bitmap == NULL) { +- return; ++ goto out; + } + + if (disabled) { +@@ -3030,6 +3034,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + } + + bdrv_dirty_bitmap_set_persistence(bitmap, persistent); ++ ++out: ++ aio_context_release(aio_context); + } + + static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( +@@ -3038,21 +3045,27 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; ++ AioContext *aio_context; + + bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); + if (!bitmap || !bs) { + return NULL; + } + ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); ++ + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, + errp)) { ++ aio_context_release(aio_context); + return NULL; + } + + if (bdrv_dirty_bitmap_get_persistence(bitmap) && + bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) + { +- return NULL; ++ aio_context_release(aio_context); ++ return NULL; + } + + if (release) { +@@ -3063,6 +3076,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( + *bitmap_bs = bs; + } + ++ aio_context_release(aio_context); + return release ? NULL : bitmap; + } + +-- +1.8.3.1 + diff --git a/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch b/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch new file mode 100644 index 0000000..b2dd453 --- /dev/null +++ b/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch @@ -0,0 +1,107 @@ +From 24e5eca4218b294bd013e2d85a38345045506bec Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:48 +0000 +Subject: [PATCH 15/18] blockdev: Return bs to the proper context on snapshot + abort + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-9-slp@redhat.com> +Patchwork-id: 93761 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 8/9] blockdev: Return bs to the proper context on snapshot abort +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +external_snapshot_abort() calls to bdrv_set_backing_hd(), which +returns state->old_bs to the main AioContext, as it's intended to be +used then the BDS is going to be released. As that's not the case when +aborting an external snapshot, return it to the AioContext it was +before the call. + +This issue can be triggered by issuing a transaction with two actions, +a proper blockdev-snapshot-sync and a bogus one, so the second will +trigger a transaction abort. This results in a crash with an stack +trace like this one: + + #0 0x00007fa1048b28df in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 + #1 0x00007fa10489ccf5 in __GI_abort () at abort.c:79 + #2 0x00007fa10489cbc9 in __assert_fail_base + (fmt=0x7fa104a03300 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=0x557224014d30 "block.c", line=2240, function=) at assert.c:92 + #3 0x00007fa1048aae96 in __GI___assert_fail + (assertion=assertion@entry=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=file@entry=0x557224014d30 "block.c", line=line@entry=2240, function=function@entry=0x5572240b5d60 <__PRETTY_FUNCTION__.31620> "bdrv_replace_child_noperm") at assert.c:101 + #4 0x0000557223e631f8 in bdrv_replace_child_noperm (child=0x557225b9c980, new_bs=new_bs@entry=0x557225c42e40) at block.c:2240 + #5 0x0000557223e68be7 in bdrv_replace_node (from=0x557226951a60, to=0x557225c42e40, errp=0x5572247d6138 ) at block.c:4196 + #6 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1731 + #7 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1717 + #8 0x0000557223d09013 in qmp_transaction (dev_list=, has_props=, props=0x557225cc7d70, errp=errp@entry=0x7ffe704c0c98) at blockdev.c:2360 + #9 0x0000557223e32085 in qmp_marshal_transaction (args=, ret=, errp=0x7ffe704c0d08) at qapi/qapi-commands-transaction.c:44 + #10 0x0000557223ee798c in do_qmp_dispatch (errp=0x7ffe704c0d00, allow_oob=, request=, cmds=0x5572247d3cc0 ) at qapi/qmp-dispatch.c:132 + #11 0x0000557223ee798c in qmp_dispatch (cmds=0x5572247d3cc0 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 + #12 0x0000557223e06141 in monitor_qmp_dispatch (mon=0x557225c69ff0, req=) at monitor/qmp.c:120 + #13 0x0000557223e0678a in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:209 + #14 0x0000557223f2f366 in aio_bh_call (bh=0x557225b9dc60) at util/async.c:117 + #15 0x0000557223f2f366 in aio_bh_poll (ctx=ctx@entry=0x557225b9c840) at util/async.c:117 + #16 0x0000557223f32754 in aio_dispatch (ctx=0x557225b9c840) at util/aio-posix.c:459 + #17 0x0000557223f2f242 in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 + #18 0x00007fa10913467d in g_main_dispatch (context=0x557225c28e80) at gmain.c:3176 + #19 0x00007fa10913467d in g_main_context_dispatch (context=context@entry=0x557225c28e80) at gmain.c:3829 + #20 0x0000557223f31808 in glib_pollfds_poll () at util/main-loop.c:219 + #21 0x0000557223f31808 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #22 0x0000557223f31808 in main_loop_wait (nonblocking=) at util/main-loop.c:518 + #23 0x0000557223d13201 in main_loop () at vl.c:1828 + #24 0x0000557223bbfb82 in main (argc=, argv=, envp=) at vl.c:4504 + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1779036 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 377410f6fb4f6b0d26d4a028c20766fae05de17e) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/blockdev.c b/blockdev.c +index d4ef6cd..4cd9a58 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1731,6 +1731,8 @@ static void external_snapshot_abort(BlkActionState *common) + if (state->new_bs) { + if (state->overlay_appended) { + AioContext *aio_context; ++ AioContext *tmp_context; ++ int ret; + + aio_context = bdrv_get_aio_context(state->old_bs); + aio_context_acquire(aio_context); +@@ -1738,6 +1740,25 @@ static void external_snapshot_abort(BlkActionState *common) + bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() + close state->old_bs; we need it */ + bdrv_set_backing_hd(state->new_bs, NULL, &error_abort); ++ ++ /* ++ * The call to bdrv_set_backing_hd() above returns state->old_bs to ++ * the main AioContext. As we're still going to be using it, return ++ * it to the AioContext it was before. ++ */ ++ tmp_context = bdrv_get_aio_context(state->old_bs); ++ if (aio_context != tmp_context) { ++ aio_context_release(aio_context); ++ aio_context_acquire(tmp_context); ++ ++ ret = bdrv_try_set_aio_context(state->old_bs, ++ aio_context, NULL); ++ assert(ret == 0); ++ ++ aio_context_release(tmp_context); ++ aio_context_acquire(aio_context); ++ } ++ + bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); + bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ + +-- +1.8.3.1 + diff --git a/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch b/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch new file mode 100644 index 0000000..399a06a --- /dev/null +++ b/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch @@ -0,0 +1,62 @@ +From d56b53cd75c4146eae7a06d1cc30ab823a9bde93 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:41 +0000 +Subject: [PATCH 08/18] blockdev: fix coding style issues in + drive_backup_prepare +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-2-slp@redhat.com> +Patchwork-id: 93754 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/9] blockdev: fix coding style issues in drive_backup_prepare +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Fix a couple of minor coding style issues in drive_backup_prepare. + +Signed-off-by: Sergio Lopez +Reviewed-by: Max Reitz +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 471ded690e19689018535e3f48480507ed073e22) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 8e029e9..553e315 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3620,7 +3620,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + + if (!backup->has_format) { + backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? +- NULL : (char*) bs->drv->format_name; ++ NULL : (char *) bs->drv->format_name; + } + + /* Early check to avoid creating target */ +@@ -3630,8 +3630,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + + flags = bs->open_flags | BDRV_O_RDWR; + +- /* See if we have a backing HD we can use to create our new image +- * on top of. */ ++ /* ++ * See if we have a backing HD we can use to create our new image ++ * on top of. ++ */ + if (backup->sync == MIRROR_SYNC_MODE_TOP) { + source = backing_bs(bs); + if (!source) { +-- +1.8.3.1 + diff --git a/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch b/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch new file mode 100644 index 0000000..a94ee75 --- /dev/null +++ b/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch @@ -0,0 +1,204 @@ +From da4ee4c0d56200042cb86f8ccd2777009bd82df3 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:44 +0000 +Subject: [PATCH 11/18] blockdev: honor bdrv_try_set_aio_context() context + requirements + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-5-slp@redhat.com> +Patchwork-id: 93758 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/9] blockdev: honor bdrv_try_set_aio_context() context requirements +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +bdrv_try_set_aio_context() requires that the old context is held, and +the new context is not held. Fix all the occurrences where it's not +done this way. + +Suggested-by: Max Reitz +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 3ea67e08832775a28d0bd2795f01bc77e7ea1512) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 60 insertions(+), 8 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 152a0f7..1dacbc2 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, + DO_UPCAST(ExternalSnapshotState, common, common); + TransactionAction *action = common->action; + AioContext *aio_context; ++ AioContext *old_context; + int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar +@@ -1675,7 +1676,16 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(state->new_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ + ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (ret < 0) { + goto out; + } +@@ -1775,11 +1785,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) + BlockDriverState *target_bs; + BlockDriverState *source = NULL; + AioContext *aio_context; ++ AioContext *old_context; + QDict *options; + Error *local_err = NULL; + int flags; + int64_t size; + bool set_backing_hd = false; ++ int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup.data; +@@ -1868,6 +1880,21 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) + goto out; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ ++ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ if (ret < 0) { ++ bdrv_unref(target_bs); ++ aio_context_release(old_context); ++ return; ++ } ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (set_backing_hd) { + bdrv_set_backing_hd(target_bs, source, &local_err); + if (local_err) { +@@ -1947,6 +1974,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; ++ AioContext *old_context; ++ int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup.data; +@@ -1961,7 +1990,18 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + return; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + aio_context = bdrv_get_aio_context(bs); ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_acquire(old_context); ++ ++ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ if (ret < 0) { ++ aio_context_release(old_context); ++ return; ++ } ++ ++ aio_context_release(old_context); + aio_context_acquire(aio_context); + state->bs = bs; + +@@ -3562,7 +3602,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, + BlockJob *job = NULL; + BdrvDirtyBitmap *bmap = NULL; + int job_flags = JOB_DEFAULT; +- int ret; + + if (!backup->has_speed) { + backup->speed = 0; +@@ -3586,11 +3625,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, + backup->compress = false; + } + +- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); +- if (ret < 0) { +- return NULL; +- } +- + if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || + (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { + /* done before desugaring 'incremental' to print the right message */ +@@ -3825,6 +3859,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + BlockDriverState *bs; + BlockDriverState *source, *target_bs; + AioContext *aio_context; ++ AioContext *old_context; + BlockMirrorBackingMode backing_mode; + Error *local_err = NULL; + QDict *options = NULL; +@@ -3937,12 +3972,22 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + (arg->mode == NEW_IMAGE_MODE_EXISTING || + !bdrv_has_zero_init(target_bs))); + ++ ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + bdrv_unref(target_bs); +- goto out; ++ aio_context_release(old_context); ++ return; + } + ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, + arg->has_replaces, arg->replaces, arg->sync, + backing_mode, zero_target, +@@ -3984,6 +4029,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; ++ AioContext *old_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; + Error *local_err = NULL; + bool zero_target; +@@ -4001,10 +4047,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + + zero_target = (sync == MIRROR_SYNC_MODE_FULL); + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); ++ aio_context_acquire(old_context); + + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (ret < 0) { + goto out; + } +-- +1.8.3.1 + diff --git a/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch b/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch new file mode 100644 index 0000000..c426384 --- /dev/null +++ b/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch @@ -0,0 +1,144 @@ +From 959955217f745f1ee6cbea97314efe69f2d7dc08 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:43 +0000 +Subject: [PATCH 10/18] blockdev: unify qmp_blockdev_backup and blockdev-backup + transaction paths + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-4-slp@redhat.com> +Patchwork-id: 93756 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/9] blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Issuing a blockdev-backup from qmp_blockdev_backup takes a slightly +different path than when it's issued from a transaction. In the code, +this is manifested as some redundancy between do_blockdev_backup() and +blockdev_backup_prepare(). + +This change unifies both paths, merging do_blockdev_backup() and +blockdev_backup_prepare(), and changing qmp_blockdev_backup() to +create a transaction instead of calling do_backup_common() direcly. + +As a side-effect, now qmp_blockdev_backup() is executed inside a +drained section, as it happens when creating a blockdev-backup +transaction. This change is visible from the user's perspective, as +the job gets paused and immediately resumed before starting the actual +work. + +Signed-off-by: Sergio Lopez +Reviewed-by: Max Reitz +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 5b7bfe515ecbd584b40ff6e41d2fd8b37c7d5139) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 60 +++++++++++++----------------------------------------------- + 1 file changed, 13 insertions(+), 47 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 5e85fc0..152a0f7 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1940,16 +1940,13 @@ typedef struct BlockdevBackupState { + BlockJob *job; + } BlockdevBackupState; + +-static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, +- Error **errp); +- + static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + { + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackup *backup; +- BlockDriverState *bs, *target; ++ BlockDriverState *bs; ++ BlockDriverState *target_bs; + AioContext *aio_context; +- Error *local_err = NULL; + + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup.data; +@@ -1959,8 +1956,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + return; + } + +- target = bdrv_lookup_bs(backup->target, backup->target, errp); +- if (!target) { ++ target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); ++ if (!target_bs) { + return; + } + +@@ -1971,13 +1968,10 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + /* Paired with .clean() */ + bdrv_drained_begin(state->bs); + +- state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- goto out; +- } ++ state->job = do_backup_common(qapi_BlockdevBackup_base(backup), ++ bs, target_bs, aio_context, ++ common->block_job_txn, errp); + +-out: + aio_context_release(aio_context); + } + +@@ -3695,41 +3689,13 @@ XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) + return bdrv_get_xdbg_block_graph(errp); + } + +-BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, +- Error **errp) ++void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) + { +- BlockDriverState *bs; +- BlockDriverState *target_bs; +- AioContext *aio_context; +- BlockJob *job; +- +- bs = bdrv_lookup_bs(backup->device, backup->device, errp); +- if (!bs) { +- return NULL; +- } +- +- target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); +- if (!target_bs) { +- return NULL; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- job = do_backup_common(qapi_BlockdevBackup_base(backup), +- bs, target_bs, aio_context, txn, errp); +- +- aio_context_release(aio_context); +- return job; +-} +- +-void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) +-{ +- BlockJob *job; +- job = do_blockdev_backup(arg, NULL, errp); +- if (job) { +- job_start(&job->job); +- } ++ TransactionAction action = { ++ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, ++ .u.blockdev_backup.data = backup, ++ }; ++ blockdev_do_action(&action, errp); + } + + /* Parameter check and block job starting for drive mirroring. +-- +1.8.3.1 + diff --git a/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch b/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch new file mode 100644 index 0000000..9ec1975 --- /dev/null +++ b/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch @@ -0,0 +1,419 @@ +From 4a03ab2a6cc4974d8d43240d1297b09160818af3 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 11:27:42 +0000 +Subject: [PATCH 09/18] blockdev: unify qmp_drive_backup and drive-backup + transaction paths + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-3-slp@redhat.com> +Patchwork-id: 93755 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/9] blockdev: unify qmp_drive_backup and drive-backup transaction paths +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Issuing a drive-backup from qmp_drive_backup takes a slightly +different path than when it's issued from a transaction. In the code, +this is manifested as some redundancy between do_drive_backup() and +drive_backup_prepare(). + +This change unifies both paths, merging do_drive_backup() and +drive_backup_prepare(), and changing qmp_drive_backup() to create a +transaction instead of calling do_backup_common() direcly. + +As a side-effect, now qmp_drive_backup() is executed inside a drained +section, as it happens when creating a drive-backup transaction. This +change is visible from the user's perspective, as the job gets paused +and immediately resumed before starting the actual work. + +Also fix tests 141, 185 and 219 to cope with the extra +JOB_STATUS_CHANGE lines. + +Signed-off-by: Sergio Lopez +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 2288ccfac96281c316db942d10e3f921c1373064) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 224 ++++++++++++++++++++------------------------- + tests/qemu-iotests/141.out | 2 + + tests/qemu-iotests/185.out | 2 + + tests/qemu-iotests/219 | 7 +- + tests/qemu-iotests/219.out | 8 ++ + 5 files changed, 117 insertions(+), 126 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 553e315..5e85fc0 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1761,39 +1761,128 @@ typedef struct DriveBackupState { + BlockJob *job; + } DriveBackupState; + +-static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, +- Error **errp); ++static BlockJob *do_backup_common(BackupCommon *backup, ++ BlockDriverState *bs, ++ BlockDriverState *target_bs, ++ AioContext *aio_context, ++ JobTxn *txn, Error **errp); + + static void drive_backup_prepare(BlkActionState *common, Error **errp) + { + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); +- BlockDriverState *bs; + DriveBackup *backup; ++ BlockDriverState *bs; ++ BlockDriverState *target_bs; ++ BlockDriverState *source = NULL; + AioContext *aio_context; ++ QDict *options; + Error *local_err = NULL; ++ int flags; ++ int64_t size; ++ bool set_backing_hd = false; + + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup.data; + ++ if (!backup->has_mode) { ++ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; ++ } ++ + bs = bdrv_lookup_bs(backup->device, backup->device, errp); + if (!bs) { + return; + } + ++ if (!bs->drv) { ++ error_setg(errp, "Device has no medium"); ++ return; ++ } ++ + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + /* Paired with .clean() */ + bdrv_drained_begin(bs); + +- state->bs = bs; ++ if (!backup->has_format) { ++ backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? ++ NULL : (char *) bs->drv->format_name; ++ } ++ ++ /* Early check to avoid creating target */ ++ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { ++ goto out; ++ } ++ ++ flags = bs->open_flags | BDRV_O_RDWR; ++ ++ /* ++ * See if we have a backing HD we can use to create our new image ++ * on top of. ++ */ ++ if (backup->sync == MIRROR_SYNC_MODE_TOP) { ++ source = backing_bs(bs); ++ if (!source) { ++ backup->sync = MIRROR_SYNC_MODE_FULL; ++ } ++ } ++ if (backup->sync == MIRROR_SYNC_MODE_NONE) { ++ source = bs; ++ flags |= BDRV_O_NO_BACKING; ++ set_backing_hd = true; ++ } ++ ++ size = bdrv_getlength(bs); ++ if (size < 0) { ++ error_setg_errno(errp, -size, "bdrv_getlength failed"); ++ goto out; ++ } ++ ++ if (backup->mode != NEW_IMAGE_MODE_EXISTING) { ++ assert(backup->format); ++ if (source) { ++ bdrv_refresh_filename(source); ++ bdrv_img_create(backup->target, backup->format, source->filename, ++ source->drv->format_name, NULL, ++ size, flags, false, &local_err); ++ } else { ++ bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, ++ size, flags, false, &local_err); ++ } ++ } + +- state->job = do_drive_backup(backup, common->block_job_txn, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + ++ options = qdict_new(); ++ qdict_put_str(options, "discard", "unmap"); ++ qdict_put_str(options, "detect-zeroes", "unmap"); ++ if (backup->format) { ++ qdict_put_str(options, "driver", backup->format); ++ } ++ ++ target_bs = bdrv_open(backup->target, NULL, options, flags, errp); ++ if (!target_bs) { ++ goto out; ++ } ++ ++ if (set_backing_hd) { ++ bdrv_set_backing_hd(target_bs, source, &local_err); ++ if (local_err) { ++ goto unref; ++ } ++ } ++ ++ state->bs = bs; ++ ++ state->job = do_backup_common(qapi_DriveBackup_base(backup), ++ bs, target_bs, aio_context, ++ common->block_job_txn, errp); ++ ++unref: ++ bdrv_unref(target_bs); + out: + aio_context_release(aio_context); + } +@@ -3587,126 +3676,13 @@ static BlockJob *do_backup_common(BackupCommon *backup, + return job; + } + +-static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, +- Error **errp) +-{ +- BlockDriverState *bs; +- BlockDriverState *target_bs; +- BlockDriverState *source = NULL; +- BlockJob *job = NULL; +- AioContext *aio_context; +- QDict *options; +- Error *local_err = NULL; +- int flags; +- int64_t size; +- bool set_backing_hd = false; +- +- if (!backup->has_mode) { +- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; +- } +- +- bs = bdrv_lookup_bs(backup->device, backup->device, errp); +- if (!bs) { +- return NULL; +- } +- +- if (!bs->drv) { +- error_setg(errp, "Device has no medium"); +- return NULL; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- if (!backup->has_format) { +- backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? +- NULL : (char *) bs->drv->format_name; +- } +- +- /* Early check to avoid creating target */ +- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { +- goto out; +- } +- +- flags = bs->open_flags | BDRV_O_RDWR; +- +- /* +- * See if we have a backing HD we can use to create our new image +- * on top of. +- */ +- if (backup->sync == MIRROR_SYNC_MODE_TOP) { +- source = backing_bs(bs); +- if (!source) { +- backup->sync = MIRROR_SYNC_MODE_FULL; +- } +- } +- if (backup->sync == MIRROR_SYNC_MODE_NONE) { +- source = bs; +- flags |= BDRV_O_NO_BACKING; +- set_backing_hd = true; +- } +- +- size = bdrv_getlength(bs); +- if (size < 0) { +- error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; +- } +- +- if (backup->mode != NEW_IMAGE_MODE_EXISTING) { +- assert(backup->format); +- if (source) { +- bdrv_refresh_filename(source); +- bdrv_img_create(backup->target, backup->format, source->filename, +- source->drv->format_name, NULL, +- size, flags, false, &local_err); +- } else { +- bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, +- size, flags, false, &local_err); +- } +- } +- +- if (local_err) { +- error_propagate(errp, local_err); +- goto out; +- } +- +- options = qdict_new(); +- qdict_put_str(options, "discard", "unmap"); +- qdict_put_str(options, "detect-zeroes", "unmap"); +- if (backup->format) { +- qdict_put_str(options, "driver", backup->format); +- } +- +- target_bs = bdrv_open(backup->target, NULL, options, flags, errp); +- if (!target_bs) { +- goto out; +- } +- +- if (set_backing_hd) { +- bdrv_set_backing_hd(target_bs, source, &local_err); +- if (local_err) { +- goto unref; +- } +- } +- +- job = do_backup_common(qapi_DriveBackup_base(backup), +- bs, target_bs, aio_context, txn, errp); +- +-unref: +- bdrv_unref(target_bs); +-out: +- aio_context_release(aio_context); +- return job; +-} +- +-void qmp_drive_backup(DriveBackup *arg, Error **errp) ++void qmp_drive_backup(DriveBackup *backup, Error **errp) + { +- +- BlockJob *job; +- job = do_drive_backup(arg, NULL, errp); +- if (job) { +- job_start(&job->job); +- } ++ TransactionAction action = { ++ .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP, ++ .u.drive_backup.data = backup, ++ }; ++ blockdev_do_action(&action, errp); + } + + BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) +diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out +index 3645675..263b680 100644 +--- a/tests/qemu-iotests/141.out ++++ b/tests/qemu-iotests/141.out +@@ -13,6 +13,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m. + Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} + {'execute': 'blockdev-del', 'arguments': {'node-name': 'drv0'}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} + {'execute': 'block-job-cancel', 'arguments': {'device': 'job0'}} +diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out +index 8379ac5..9a3b657 100644 +--- a/tests/qemu-iotests/185.out ++++ b/tests/qemu-iotests/185.out +@@ -65,6 +65,8 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 l + Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } + {"return": {}} +diff --git a/tests/qemu-iotests/219 b/tests/qemu-iotests/219 +index e0c5166..655f54d 100755 +--- a/tests/qemu-iotests/219 ++++ b/tests/qemu-iotests/219 +@@ -63,7 +63,7 @@ def test_pause_resume(vm): + # logged immediately + iotests.log(vm.qmp('query-jobs')) + +-def test_job_lifecycle(vm, job, job_args, has_ready=False): ++def test_job_lifecycle(vm, job, job_args, has_ready=False, is_mirror=False): + global img_size + + iotests.log('') +@@ -135,6 +135,9 @@ def test_job_lifecycle(vm, job, job_args, has_ready=False): + iotests.log('Waiting for PENDING state...') + iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) + iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) ++ if is_mirror: ++ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) ++ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) + + if not job_args.get('auto-finalize', True): + # PENDING state: +@@ -218,7 +221,7 @@ with iotests.FilePath('disk.img') as disk_path, \ + + for auto_finalize in [True, False]: + for auto_dismiss in [True, False]: +- test_job_lifecycle(vm, 'drive-backup', job_args={ ++ test_job_lifecycle(vm, 'drive-backup', is_mirror=True, job_args={ + 'device': 'drive0-node', + 'target': copy_path, + 'sync': 'full', +diff --git a/tests/qemu-iotests/219.out b/tests/qemu-iotests/219.out +index 8ebd3fe..0ea5d0b 100644 +--- a/tests/qemu-iotests/219.out ++++ b/tests/qemu-iotests/219.out +@@ -135,6 +135,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +@@ -186,6 +188,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +@@ -245,6 +249,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} +@@ -304,6 +310,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} +-- +1.8.3.1 + diff --git a/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch b/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch new file mode 100644 index 0000000..a6177c6 --- /dev/null +++ b/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch @@ -0,0 +1,56 @@ +From f01178897c8f5ff98692a22059dd65e35677eaa3 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Mon, 10 Feb 2020 17:33:58 +0000 +Subject: [PATCH 18/18] docs/arm-cpu-features: Make kvm-no-adjvtime comment + clearer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200210173358.16896-3-drjones@redhat.com> +Patchwork-id: 93772 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer +Bugzilla: 1801320 +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan +RH-Acked-by: Philippe Mathieu-Daudé + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 + +Author: Philippe Mathieu-Daudé +Date: Fri, 07 Feb 2020 14:04:28 +0000 + + docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer + + The bold text sounds like 'knock knock'. Only bolding the + second 'not' makes it easier to read. + + Fixes: dea101a1ae + Signed-off-by: Philippe Mathieu-Daudé + Reviewed-by: Andrew Jones + Message-id: 20200206225148.23923-1-philmd@redhat.com + Signed-off-by: Peter Maydell + +(cherry picked from commit fa3236a970b6ea5be3fa3ad258f1a75920ca1ebb) +Signed-off-by: Danilo C. L. de Paula +--- + docs/arm-cpu-features.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst +index 45d1eb6..48d5054 100644 +--- a/docs/arm-cpu-features.rst ++++ b/docs/arm-cpu-features.rst +@@ -185,7 +185,7 @@ the list of KVM VCPU features and their descriptions. + + kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This + means that by default the virtual time +- adjustment is enabled (vtime is *not not* ++ adjustment is enabled (vtime is not *not* + adjusted). + + When virtual time adjustment is enabled each +-- +1.8.3.1 + diff --git a/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch b/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch new file mode 100644 index 0000000..f01dec2 --- /dev/null +++ b/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch @@ -0,0 +1,100 @@ +From cebc614e5ddd1f770c4d6dc26c066791f36e56df Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:02 +0000 +Subject: [PATCH 05/18] hmp: Allow using qdev ID for qemu-io command + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-5-kwolf@redhat.com> +Patchwork-id: 93750 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] hmp: Allow using qdev ID for qemu-io command +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +In order to issue requests on an existing BlockBackend with the +'qemu-io' HMP command, allow specifying the BlockBackend not only with a +BlockBackend name, but also with a qdev ID/QOM path for a device that +owns the (possibly anonymous) BlockBackend. + +Because qdev names could be conflicting with BlockBackend and node +names, introduce a -d option to explicitly address a device. If the +option is not given, a BlockBackend or a node is addressed. + +Signed-off-by: Kevin Wolf +(cherry picked from commit 89b6fc45614bb45dcd58f1590415afe5c2791abd) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + hmp-commands.hx | 8 +++++--- + monitor/hmp-cmds.c | 28 ++++++++++++++++++---------- + 2 files changed, 23 insertions(+), 13 deletions(-) + +diff --git a/hmp-commands.hx b/hmp-commands.hx +index cfcc044..dc23185 100644 +--- a/hmp-commands.hx ++++ b/hmp-commands.hx +@@ -1875,9 +1875,11 @@ ETEXI + + { + .name = "qemu-io", +- .args_type = "device:B,command:s", +- .params = "[device] \"[command]\"", +- .help = "run a qemu-io command on a block device", ++ .args_type = "qdev:-d,device:B,command:s", ++ .params = "[-d] [device] \"[command]\"", ++ .help = "run a qemu-io command on a block device\n\t\t\t" ++ "-d: [device] is a device ID rather than a " ++ "drive ID or node name", + .cmd = hmp_qemu_io, + }, + +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index b2551c1..5f8941d 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -2468,23 +2468,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) + { + BlockBackend *blk; + BlockBackend *local_blk = NULL; ++ bool qdev = qdict_get_try_bool(qdict, "qdev", false); + const char* device = qdict_get_str(qdict, "device"); + const char* command = qdict_get_str(qdict, "command"); + Error *err = NULL; + int ret; + +- blk = blk_by_name(device); +- if (!blk) { +- BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); +- if (bs) { +- blk = local_blk = blk_new(bdrv_get_aio_context(bs), +- 0, BLK_PERM_ALL); +- ret = blk_insert_bs(blk, bs, &err); +- if (ret < 0) { ++ if (qdev) { ++ blk = blk_by_qdev_id(device, &err); ++ if (!blk) { ++ goto fail; ++ } ++ } else { ++ blk = blk_by_name(device); ++ if (!blk) { ++ BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); ++ if (bs) { ++ blk = local_blk = blk_new(bdrv_get_aio_context(bs), ++ 0, BLK_PERM_ALL); ++ ret = blk_insert_bs(blk, bs, &err); ++ if (ret < 0) { ++ goto fail; ++ } ++ } else { + goto fail; + } +- } else { +- goto fail; + } + } + +-- +1.8.3.1 + diff --git a/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch b/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch new file mode 100644 index 0000000..1027341 --- /dev/null +++ b/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch @@ -0,0 +1,95 @@ +From ccda4494b0ea4b81b6b0c3e539a0bcf7e673c68c Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Thu, 5 Dec 2019 21:56:50 +0000 +Subject: [PATCH 01/18] i386: Resolve CPU models to v1 by default + +RH-Author: Eduardo Habkost +Message-id: <20191205225650.772600-2-ehabkost@redhat.com> +Patchwork-id: 92907 +O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 1/1] i386: Resolve CPU models to v1 by default +Bugzilla: 1787291 1779078 1779078 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25187823 +Upstream: submitted, Message-Id: <20191205223339.764534-1-ehabkost@redhat.com> + +When using `query-cpu-definitions` using `-machine none`, +QEMU is resolving all CPU models to their latest versions. The +actual CPU model version being used by another machine type (e.g. +`pc-q35-4.0`) might be different. + +In theory, this was OK because the correct CPU model +version is returned when using the correct `-machine` argument. + +Except that in practice, this breaks libvirt expectations: +libvirt always use `-machine none` when checking if a CPU model +is runnable, because runnability is not expected to be affected +when the machine type is changed. + +For example, when running on a Haswell host without TSX, +Haswell-v4 is runnable, but Haswell-v1 is not. On those hosts, +`query-cpu-definitions` says Haswell is runnable if using +`-machine none`, but Haswell is actually not runnable using any +of the `pc-*` machine types (because they resolve Haswell to +Haswell-v1). In other words, we're breaking the "runnability +guarantee" we promised to not break for a few releases (see +qemu-deprecated.texi). + +To address this issue, change the default CPU model version to v1 +on all machine types, so we make `query-cpu-definitions` output +when using `-machine none` match the results when using `pc-*`. +This will change in the future (the plan is to always return the +latest CPU model version if using `-machine none`), but only +after giving libvirt the opportunity to adapt. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 +Signed-off-by: Eduardo Habkost +Signed-off-by: Danilo C. L. de Paula +--- + qemu-deprecated.texi | 7 +++++++ + target/i386/cpu.c | 8 +++++++- + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi +index 4b4b742..534ebe9 100644 +--- a/qemu-deprecated.texi ++++ b/qemu-deprecated.texi +@@ -374,6 +374,13 @@ guarantees must resolve the CPU model aliases using te + ``alias-of'' field returned by the ``query-cpu-definitions'' QMP + command. + ++While those guarantees are kept, the return value of ++``query-cpu-definitions'' will have existing CPU model aliases ++point to a version that doesn't break runnability guarantees ++(specifically, version 1 of those CPU models). In future QEMU ++versions, aliases will point to newer CPU model versions ++depending on the machine type, so management software must ++resolve CPU model aliases before starting a virtual machine. + + @node Recently removed features + @appendix Recently removed features +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6dce6f2..863192c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3926,7 +3926,13 @@ static PropValue tcg_default_props[] = { + }; + + +-X86CPUVersion default_cpu_version = CPU_VERSION_LATEST; ++/* ++ * We resolve CPU model aliases using -v1 when using "-machine ++ * none", but this is just for compatibility while libvirt isn't ++ * adapted to resolve CPU model versions before creating VMs. ++ * See "Runnability guarantee of CPU models" at * qemu-deprecated.texi. ++ */ ++X86CPUVersion default_cpu_version = 1; + + void x86_cpu_set_default_version(X86CPUVersion version) + { +-- +1.8.3.1 + diff --git a/kvm-iotests-Create-VM.blockdev_create.patch b/kvm-iotests-Create-VM.blockdev_create.patch new file mode 100644 index 0000000..805b31a --- /dev/null +++ b/kvm-iotests-Create-VM.blockdev_create.patch @@ -0,0 +1,59 @@ +From 05fedde1374abb180cd2b51457385d8128aa7fe4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:00 +0000 +Subject: [PATCH 03/18] iotests: Create VM.blockdev_create() + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-3-kwolf@redhat.com> +Patchwork-id: 93748 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] iotests: Create VM.blockdev_create() +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +We have several almost identical copies of a blockdev_create() function +in different test cases. Time to create one unified function in +iotests.py. + +To keep the diff managable, this patch only creates the function and +follow-up patches will convert the individual test cases. + +Signed-off-by: Kevin Wolf +(cherry picked from commit e9dbd1cae86f7cb6f8e470e1485aeb0c6e23ae64) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 3cff671..5741efb 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -638,6 +638,22 @@ class VM(qtest.QEMUQtestMachine): + elif status == 'null': + return error + ++ # Returns None on success, and an error string on failure ++ def blockdev_create(self, options, job_id='job0', filters=None): ++ if filters is None: ++ filters = [filter_qmp_testfiles] ++ result = self.qmp_log('blockdev-create', filters=filters, ++ job_id=job_id, options=options) ++ ++ if 'return' in result: ++ assert result['return'] == {} ++ job_result = self.run_job(job_id) ++ else: ++ job_result = result['error'] ++ ++ log("") ++ return job_result ++ + def enable_migration_events(self, name): + log('Enabling migration QMP events on %s...' % name) + log(self.qmp('migrate-set-capabilities', capabilities=[ +-- +1.8.3.1 + diff --git a/kvm-iotests-Support-job-complete-in-run_job.patch b/kvm-iotests-Support-job-complete-in-run_job.patch new file mode 100644 index 0000000..08971a0 --- /dev/null +++ b/kvm-iotests-Support-job-complete-in-run_job.patch @@ -0,0 +1,46 @@ +From a3778aef0be61dead835af39073a62bbf72c8e20 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:23:59 +0000 +Subject: [PATCH 02/18] iotests: Support job-complete in run_job() + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-2-kwolf@redhat.com> +Patchwork-id: 93746 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] iotests: Support job-complete in run_job() +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Automatically complete jobs that have a 'ready' state and need an +explicit job-complete. Without this, run_job() would hang for such +jobs. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 4688c4e32ec76004676470f11734478799673d6d) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index df07089..3cff671 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -617,6 +617,8 @@ class VM(qtest.QEMUQtestMachine): + error = j['error'] + if use_log: + log('Job failed: %s' % (j['error'])) ++ elif status == 'ready': ++ self.qmp_log('job-complete', id=job) + elif status == 'pending' and not auto_finalize: + if pre_finalize: + pre_finalize() +-- +1.8.3.1 + diff --git a/kvm-iotests-Test-external-snapshot-with-VM-state.patch b/kvm-iotests-Test-external-snapshot-with-VM-state.patch new file mode 100644 index 0000000..6fcb2f6 --- /dev/null +++ b/kvm-iotests-Test-external-snapshot-with-VM-state.patch @@ -0,0 +1,189 @@ +From 38b0cff9703fc740c30f5874973ac1be88f94d9f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:03 +0000 +Subject: [PATCH 06/18] iotests: Test external snapshot with VM state + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-6-kwolf@redhat.com> +Patchwork-id: 93752 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Test external snapshot with VM state +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +This tests creating an external snapshot with VM state (which results in +an active overlay over an inactive backing file, which is also the root +node of an inactive BlockBackend), re-activating the images and +performing some operations to test that the re-activation worked as +intended. + +Signed-off-by: Kevin Wolf +(cherry picked from commit f62f08ab7a9d902da70078992248ec5c98f652ad) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/280 | 83 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/280.out | 50 ++++++++++++++++++++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 134 insertions(+) + create mode 100755 tests/qemu-iotests/280 + create mode 100644 tests/qemu-iotests/280.out + +diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 +new file mode 100755 +index 0000000..0b1fa8e +--- /dev/null ++++ b/tests/qemu-iotests/280 +@@ -0,0 +1,83 @@ ++#!/usr/bin/env python ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++# Creator/Owner: Kevin Wolf ++# ++# Test migration to file for taking an external snapshot with VM state. ++ ++import iotests ++import os ++ ++iotests.verify_image_format(supported_fmts=['qcow2']) ++iotests.verify_protocol(supported=['file']) ++iotests.verify_platform(['linux']) ++ ++with iotests.FilePath('base') as base_path , \ ++ iotests.FilePath('top') as top_path, \ ++ iotests.VM() as vm: ++ ++ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base_path, '64M') ++ ++ iotests.log('=== Launch VM ===') ++ vm.add_object('iothread,id=iothread0') ++ vm.add_blockdev('file,filename=%s,node-name=base-file' % (base_path)) ++ vm.add_blockdev('%s,file=base-file,node-name=base-fmt' % (iotests.imgfmt)) ++ vm.add_device('virtio-blk,drive=base-fmt,iothread=iothread0,id=vda') ++ vm.launch() ++ ++ vm.enable_migration_events('VM') ++ ++ iotests.log('\n=== Migrate to file ===') ++ vm.qmp_log('migrate', uri='exec:cat > /dev/null') ++ ++ with iotests.Timeout(3, 'Migration does not complete'): ++ vm.wait_migration() ++ ++ iotests.log('\nVM is now stopped:') ++ iotests.log(vm.qmp('query-migrate')['return']['status']) ++ vm.qmp_log('query-status') ++ ++ iotests.log('\n=== Create a snapshot of the disk image ===') ++ vm.blockdev_create({ ++ 'driver': 'file', ++ 'filename': top_path, ++ 'size': 0, ++ }) ++ vm.qmp_log('blockdev-add', node_name='top-file', ++ driver='file', filename=top_path, ++ filters=[iotests.filter_qmp_testfiles]) ++ ++ vm.blockdev_create({ ++ 'driver': iotests.imgfmt, ++ 'file': 'top-file', ++ 'size': 1024 * 1024, ++ }) ++ vm.qmp_log('blockdev-add', node_name='top-fmt', ++ driver=iotests.imgfmt, file='top-file') ++ ++ vm.qmp_log('blockdev-snapshot', node='base-fmt', overlay='top-fmt') ++ ++ iotests.log('\n=== Resume the VM and simulate a write request ===') ++ vm.qmp_log('cont') ++ iotests.log(vm.hmp_qemu_io('-d vda/virtio-backend', 'write 4k 4k')) ++ ++ iotests.log('\n=== Commit it to the backing file ===') ++ result = vm.qmp_log('block-commit', job_id='job0', auto_dismiss=False, ++ device='top-fmt', top_node='top-fmt', ++ filters=[iotests.filter_qmp_testfiles]) ++ if 'return' in result: ++ vm.run_job('job0') +diff --git a/tests/qemu-iotests/280.out b/tests/qemu-iotests/280.out +new file mode 100644 +index 0000000..5d382fa +--- /dev/null ++++ b/tests/qemu-iotests/280.out +@@ -0,0 +1,50 @@ ++Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 ++ ++=== Launch VM === ++Enabling migration QMP events on VM... ++{"return": {}} ++ ++=== Migrate to file === ++{"execute": "migrate", "arguments": {"uri": "exec:cat > /dev/null"}} ++{"return": {}} ++{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++ ++VM is now stopped: ++completed ++{"execute": "query-status", "arguments": {}} ++{"return": {"running": false, "singlestep": false, "status": "postmigrate"}} ++ ++=== Create a snapshot of the disk image === ++{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "file", "filename": "TEST_DIR/PID-top", "size": 0}}} ++{"return": {}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} ++ ++{"execute": "blockdev-add", "arguments": {"driver": "file", "filename": "TEST_DIR/PID-top", "node-name": "top-file"}} ++{"return": {}} ++{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "top-file", "size": 1048576}}} ++{"return": {}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} ++ ++{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": "top-file", "node-name": "top-fmt"}} ++{"return": {}} ++{"execute": "blockdev-snapshot", "arguments": {"node": "base-fmt", "overlay": "top-fmt"}} ++{"return": {}} ++ ++=== Resume the VM and simulate a write request === ++{"execute": "cont", "arguments": {}} ++{"return": {}} ++{"return": ""} ++ ++=== Commit it to the backing file === ++{"execute": "block-commit", "arguments": {"auto-dismiss": false, "device": "top-fmt", "job-id": "job0", "top-node": "top-fmt"}} ++{"return": {}} ++{"execute": "job-complete", "arguments": {"id": "job0"}} ++{"return": {}} ++{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "job-dismiss", "arguments": {"id": "job0"}} ++{"return": {}} +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 06cc734..01301cd 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -286,3 +286,4 @@ + 272 rw + 273 backing quick + 277 rw quick ++280 rw migration quick +-- +1.8.3.1 + diff --git a/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch b/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch new file mode 100644 index 0000000..b09439b --- /dev/null +++ b/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch @@ -0,0 +1,322 @@ +From 6b9a6ba9ed753ad7aa714b35de938ebeeb4fa6cb Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Fri, 7 Feb 2020 10:27:49 +0000 +Subject: [PATCH 16/18] iotests: Test handling of AioContexts with some + blockdev actions + +RH-Author: Sergio Lopez Pascual +Message-id: <20200207112749.25073-10-slp@redhat.com> +Patchwork-id: 93762 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 9/9] iotests: Test handling of AioContexts with some blockdev actions +Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Includes the following tests: + + - Adding a dirty bitmap. + * RHBZ: 1782175 + + - Starting a drive-mirror to an NBD-backed target. + * RHBZ: 1746217, 1773517 + + - Aborting an external snapshot transaction. + * RHBZ: 1779036 + + - Aborting a blockdev backup transaction. + * RHBZ: 1782111 + +For each one of them, a VM with a number of disks running in an +IOThread AioContext is used. + +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +(cherry picked from commit 9b8c59e7610b9c5315ef093d801843dbe8debfac) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/281 | 247 +++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/281.out | 5 + + tests/qemu-iotests/group | 1 + + 3 files changed, 253 insertions(+) + create mode 100755 tests/qemu-iotests/281 + create mode 100644 tests/qemu-iotests/281.out + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +new file mode 100755 +index 0000000..269d583 +--- /dev/null ++++ b/tests/qemu-iotests/281 +@@ -0,0 +1,247 @@ ++#!/usr/bin/env python ++# ++# Test cases for blockdev + IOThread interactions ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import iotests ++from iotests import qemu_img ++ ++image_len = 64 * 1024 * 1024 ++ ++# Test for RHBZ#1782175 ++class TestDirtyBitmapIOThread(iotests.QMPTestCase): ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ images = { 'drive0': drive0_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothread0') ++ ++ for name in self.images: ++ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' ++ % (self.images[name], name)) ++ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' ++ % (name, name)) ++ ++ self.vm.launch() ++ self.vm.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_add_dirty_bitmap(self): ++ result = self.vm.qmp( ++ 'block-dirty-bitmap-add', ++ node='drive0', ++ name='bitmap1', ++ persistent=True, ++ ) ++ ++ self.assert_qmp(result, 'return', {}) ++ ++ ++# Test for RHBZ#1746217 & RHBZ#1773517 ++class TestNBDMirrorIOThread(iotests.QMPTestCase): ++ nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ mirror_img = os.path.join(iotests.test_dir, 'mirror.img') ++ images = { 'drive0': drive0_img, 'mirror': mirror_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm_src = iotests.VM(path_suffix='src') ++ self.vm_src.add_object('iothread,id=iothread0') ++ self.vm_src.add_blockdev('driver=file,filename=%s,node-name=file0' ++ % (self.drive0_img)) ++ self.vm_src.add_blockdev('driver=qcow2,file=file0,node-name=drive0') ++ self.vm_src.launch() ++ self.vm_src.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ self.vm_tgt = iotests.VM(path_suffix='tgt') ++ self.vm_tgt.add_object('iothread,id=iothread0') ++ self.vm_tgt.add_blockdev('driver=file,filename=%s,node-name=file0' ++ % (self.mirror_img)) ++ self.vm_tgt.add_blockdev('driver=qcow2,file=file0,node-name=drive0') ++ self.vm_tgt.launch() ++ self.vm_tgt.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ def tearDown(self): ++ self.vm_src.shutdown() ++ self.vm_tgt.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_nbd_mirror(self): ++ result = self.vm_tgt.qmp( ++ 'nbd-server-start', ++ addr={ ++ 'type': 'unix', ++ 'data': { 'path': self.nbd_sock } ++ } ++ ) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm_tgt.qmp( ++ 'nbd-server-add', ++ device='drive0', ++ writable=True ++ ) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm_src.qmp( ++ 'drive-mirror', ++ device='drive0', ++ target='nbd+unix:///drive0?socket=' + self.nbd_sock, ++ sync='full', ++ mode='existing', ++ speed=64*1024*1024, ++ job_id='j1' ++ ) ++ self.assert_qmp(result, 'return', {}) ++ ++ self.vm_src.event_wait(name="BLOCK_JOB_READY") ++ ++ ++# Test for RHBZ#1779036 ++class TestExternalSnapshotAbort(iotests.QMPTestCase): ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ snapshot_img = os.path.join(iotests.test_dir, 'snapshot.img') ++ images = { 'drive0': drive0_img, 'snapshot': snapshot_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothread0') ++ self.vm.add_blockdev('driver=file,filename=%s,node-name=file0' ++ % (self.drive0_img)) ++ self.vm.add_blockdev('driver=qcow2,file=file0,node-name=drive0') ++ self.vm.launch() ++ self.vm.qmp('x-blockdev-set-iothread', ++ node_name='drive0', iothread='iothread0', ++ force=True) ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_external_snapshot_abort(self): ++ # Use a two actions transaction with a bogus values on the second ++ # one to trigger an abort of the transaction. ++ result = self.vm.qmp('transaction', actions=[ ++ { ++ 'type': 'blockdev-snapshot-sync', ++ 'data': { 'node-name': 'drive0', ++ 'snapshot-file': self.snapshot_img, ++ 'snapshot-node-name': 'snap1', ++ 'mode': 'absolute-paths', ++ 'format': 'qcow2' } ++ }, ++ { ++ 'type': 'blockdev-snapshot-sync', ++ 'data': { 'node-name': 'drive0', ++ 'snapshot-file': '/fakesnapshot', ++ 'snapshot-node-name': 'snap2', ++ 'mode': 'absolute-paths', ++ 'format': 'qcow2' } ++ }, ++ ]) ++ ++ # Crashes on failure, we expect this error. ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++ ++# Test for RHBZ#1782111 ++class TestBlockdevBackupAbort(iotests.QMPTestCase): ++ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') ++ drive1_img = os.path.join(iotests.test_dir, 'drive1.img') ++ snap0_img = os.path.join(iotests.test_dir, 'snap0.img') ++ snap1_img = os.path.join(iotests.test_dir, 'snap1.img') ++ images = { 'drive0': drive0_img, ++ 'drive1': drive1_img, ++ 'snap0': snap0_img, ++ 'snap1': snap1_img } ++ ++ def setUp(self): ++ for name in self.images: ++ qemu_img('create', '-f', iotests.imgfmt, ++ self.images[name], str(image_len)) ++ ++ self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothread0') ++ self.vm.add_device('virtio-scsi,iothread=iothread0') ++ ++ for name in self.images: ++ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' ++ % (self.images[name], name)) ++ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' ++ % (name, name)) ++ ++ self.vm.add_device('scsi-hd,drive=drive0') ++ self.vm.add_device('scsi-hd,drive=drive1') ++ self.vm.launch() ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ for name in self.images: ++ os.remove(self.images[name]) ++ ++ def test_blockdev_backup_abort(self): ++ # Use a two actions transaction with a bogus values on the second ++ # one to trigger an abort of the transaction. ++ result = self.vm.qmp('transaction', actions=[ ++ { ++ 'type': 'blockdev-backup', ++ 'data': { 'device': 'drive0', ++ 'target': 'snap0', ++ 'sync': 'full', ++ 'job-id': 'j1' } ++ }, ++ { ++ 'type': 'blockdev-backup', ++ 'data': { 'device': 'drive1', ++ 'target': 'snap1', ++ 'sync': 'full' } ++ }, ++ ]) ++ ++ # Hangs on failure, we expect this error. ++ self.assert_qmp(result, 'error/class', 'GenericError') ++ ++if __name__ == '__main__': ++ iotests.main(supported_fmts=['qcow2'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +new file mode 100644 +index 0000000..89968f3 +--- /dev/null ++++ b/tests/qemu-iotests/281.out +@@ -0,0 +1,5 @@ ++.... ++---------------------------------------------------------------------- ++Ran 4 tests ++ ++OK +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 01301cd..c0e8197 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -287,3 +287,4 @@ + 273 backing quick + 277 rw quick + 280 rw migration quick ++281 rw quick +-- +1.8.3.1 + diff --git a/kvm-iotests.py-Let-wait_migration-wait-even-more.patch b/kvm-iotests.py-Let-wait_migration-wait-even-more.patch new file mode 100644 index 0000000..cda8037 --- /dev/null +++ b/kvm-iotests.py-Let-wait_migration-wait-even-more.patch @@ -0,0 +1,123 @@ +From d6df1426ae65b3a0d50bdbb1f8a7246386dd6ebf Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 7 Feb 2020 11:24:04 +0000 +Subject: [PATCH 07/18] iotests.py: Let wait_migration wait even more + +RH-Author: Kevin Wolf +Message-id: <20200207112404.25198-7-kwolf@redhat.com> +Patchwork-id: 93751 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] iotests.py: Let wait_migration wait even more +Bugzilla: 1781637 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +From: Max Reitz + +The "migration completed" event may be sent (on the source, to be +specific) before the migration is actually completed, so the VM runstate +will still be "finish-migrate" instead of "postmigrate". So ask the +users of VM.wait_migration() to specify the final runstate they desire +and then poll the VM until it has reached that state. (This should be +over very quickly, so busy polling is fine.) + +Without this patch, I see intermittent failures in the new iotest 280 +under high system load. I have not yet seen such failures with other +iotests that use VM.wait_migration() and query-status afterwards, but +maybe they just occur even more rarely, or it is because they also wait +on the destination VM to be running. + +Signed-off-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 8da7969bd7014f6de037d8ae132b40721944b186) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/234 | 8 ++++---- + tests/qemu-iotests/262 | 4 ++-- + tests/qemu-iotests/280 | 2 +- + tests/qemu-iotests/iotests.py | 6 +++++- + 4 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234 +index 34c818c..59a7f94 100755 +--- a/tests/qemu-iotests/234 ++++ b/tests/qemu-iotests/234 +@@ -69,9 +69,9 @@ with iotests.FilePath('img') as img_path, \ + iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) +- vm_a.wait_migration() ++ vm_a.wait_migration('postmigrate') + # Wait for the destination second (which does not) +- vm_b.wait_migration() ++ vm_b.wait_migration('running') + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) +@@ -98,9 +98,9 @@ with iotests.FilePath('img') as img_path, \ + iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) +- vm_b.wait_migration() ++ vm_b.wait_migration('postmigrate') + # Wait for the destination second (which does not) +- vm_a.wait_migration() ++ vm_a.wait_migration('running') + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) +diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262 +index 0963daa..bbcb526 100755 +--- a/tests/qemu-iotests/262 ++++ b/tests/qemu-iotests/262 +@@ -71,9 +71,9 @@ with iotests.FilePath('img') as img_path, \ + iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo))) + with iotests.Timeout(3, 'Migration does not complete'): + # Wait for the source first (which includes setup=setup) +- vm_a.wait_migration() ++ vm_a.wait_migration('postmigrate') + # Wait for the destination second (which does not) +- vm_b.wait_migration() ++ vm_b.wait_migration('running') + + iotests.log(vm_a.qmp('query-migrate')['return']['status']) + iotests.log(vm_b.qmp('query-migrate')['return']['status']) +diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 +index 0b1fa8e..85e9114 100755 +--- a/tests/qemu-iotests/280 ++++ b/tests/qemu-iotests/280 +@@ -45,7 +45,7 @@ with iotests.FilePath('base') as base_path , \ + vm.qmp_log('migrate', uri='exec:cat > /dev/null') + + with iotests.Timeout(3, 'Migration does not complete'): +- vm.wait_migration() ++ vm.wait_migration('postmigrate') + + iotests.log('\nVM is now stopped:') + iotests.log(vm.qmp('query-migrate')['return']['status']) +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 5741efb..0c55f7b 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -663,12 +663,16 @@ class VM(qtest.QEMUQtestMachine): + } + ])) + +- def wait_migration(self): ++ def wait_migration(self, expect_runstate): + while True: + event = self.event_wait('MIGRATION') + log(event, filters=[filter_qmp_event]) + if event['data']['status'] == 'completed': + break ++ # The event may occur in finish-migrate, so wait for the expected ++ # post-migration runstate ++ while self.qmp('query-status')['return']['status'] != expect_runstate: ++ pass + + def node_info(self, node_name): + nodes = self.qmp('query-named-block-nodes') +-- +1.8.3.1 + diff --git a/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch b/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch new file mode 100644 index 0000000..55f328d --- /dev/null +++ b/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch @@ -0,0 +1,81 @@ +From c82cf5c08617c947b34eb490d1714729103e3379 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Mon, 10 Feb 2020 17:33:57 +0000 +Subject: [PATCH 17/18] target/arm/monitor: query-cpu-model-expansion crashed + qemu when using machine type none +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +Message-id: <20200210173358.16896-2-drjones@redhat.com> +Patchwork-id: 93773 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none +Bugzilla: 1801320 +RH-Acked-by: Auger Eric +RH-Acked-by: Gavin Shan +RH-Acked-by: Philippe Mathieu-Daudé + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 + +Author: Liang Yan +Date: Fri, 07 Feb 2020 14:04:21 +0000 + + target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none + + Commit e19afd566781 mentioned that target-arm only supports queryable + cpu models 'max', 'host', and the current type when KVM is in use. + The logic works well until using machine type none. + + For machine type none, cpu_type will be null if cpu option is not + set by command line, strlen(cpu_type) will terminate process. + So We add a check above it. + + This won't affect i386 and s390x since they do not use current_cpu. + + Signed-off-by: Liang Yan + Message-id: 20200203134251.12986-1-lyan@suse.com + Reviewed-by: Andrew Jones + Tested-by: Andrew Jones + Signed-off-by: Peter Maydell + +(cherry picked from commit 0999a4ba8718aa96105b978d3567fc7e90244c7e) +Signed-off-by: Danilo C. L. de Paula +--- + target/arm/monitor.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/target/arm/monitor.c b/target/arm/monitor.c +index 9725dff..c2dc790 100644 +--- a/target/arm/monitor.c ++++ b/target/arm/monitor.c +@@ -137,17 +137,20 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + } + + if (kvm_enabled()) { +- const char *cpu_type = current_machine->cpu_type; +- int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); + bool supported = false; + + if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { + /* These are kvmarm's recommended cpu types */ + supported = true; +- } else if (strlen(model->name) == len && +- !strncmp(model->name, cpu_type, len)) { +- /* KVM is enabled and we're using this type, so it works. */ +- supported = true; ++ } else if (current_machine->cpu_type) { ++ const char *cpu_type = current_machine->cpu_type; ++ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); ++ ++ if (strlen(model->name) == len && ++ !strncmp(model->name, cpu_type, len)) { ++ /* KVM is enabled and we're using this type, so it works. */ ++ supported = true; ++ } + } + if (!supported) { + error_setg(errp, "We cannot guarantee the CPU type '%s' works " +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 388d2d5..d5f53d8 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 9%{?dist} +Release: 10%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -401,6 +401,99 @@ Patch160: kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch Patch161: kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch # For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs Patch162: kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch +# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) +# For bz#1787291 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z] +# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) +# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) +Patch163: kvm-i386-Resolve-CPU-models-to-v1-by-default.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch164: kvm-iotests-Support-job-complete-in-run_job.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch165: kvm-iotests-Create-VM.blockdev_create.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch166: kvm-block-Activate-recursively-even-for-already-active-n.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch167: kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch168: kvm-iotests-Test-external-snapshot-with-VM-state.patch +# For bz#1781637 - qemu crashed when do mem and disk snapshot +Patch169: kvm-iotests.py-Let-wait_migration-wait-even-more.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch170: kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch171: kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch172: kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch173: kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch174: kvm-backup-top-Begin-drain-earlier.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch175: kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch176: kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch177: kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch +# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap +# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation +# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable +# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist +# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) +# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) +# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided +Patch178: kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch +# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes +Patch179: kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch +# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes +Patch180: kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch BuildRequires: wget BuildRequires: rpm-build @@ -1334,6 +1427,48 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Fri Feb 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-10.el8 +- kvm-i386-Resolve-CPU-models-to-v1-by-default.patch [bz#1779078 bz#1787291 bz#1779078 bz#1779078] +- kvm-iotests-Support-job-complete-in-run_job.patch [bz#1781637] +- kvm-iotests-Create-VM.blockdev_create.patch [bz#1781637] +- kvm-block-Activate-recursively-even-for-already-active-n.patch [bz#1781637] +- kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch [bz#1781637] +- kvm-iotests-Test-external-snapshot-with-VM-state.patch [bz#1781637] +- kvm-iotests.py-Let-wait_migration-wait-even-more.patch [bz#1781637] +- kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-backup-top-Begin-drain-earlier.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch [bz#1801320] +- kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch [bz#1801320] +- Resolves: bz#1745606 + (Qemu hang when do incremental live backup in transaction mode without bitmap) +- Resolves: bz#1746217 + (Src qemu hang when do storage vm migration during guest installation) +- Resolves: bz#1773517 + (Src qemu hang when do storage vm migration with dataplane enable) +- Resolves: bz#1779036 + (Qemu coredump when do snapshot in transaction mode with one snapshot path not exist) +- Resolves: bz#1779078 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm)) +- Resolves: bz#1781637 + (qemu crashed when do mem and disk snapshot) +- Resolves: bz#1782111 + (Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable)) +- Resolves: bz#1782175 + (Qemu core dump when add persistent bitmap(data plane enable)) +- Resolves: bz#1783965 + (Qemu core dump when do backup with sync: bitmap and no bitmap provided) +- Resolves: bz#1787291 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z]) +- Resolves: bz#1801320 + (aarch64: backport query-cpu-model-expansion and adjvtime document fixes) + * Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 4.2.0-9.el8 - kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch [bz#1776638] - kvm-xics-Don-t-deassert-outputs.patch [bz#1776638] From 77ffa9e8e96ebbc37adf3436f09c6c8a20fff5b6 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Sun, 23 Feb 2020 05:41:47 +0000 Subject: [PATCH 066/195] * Sun Feb 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-11.el8 - kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch [bz#1796240] - kvm-util-add-slirp_fmt-helpers.patch [bz#1798994] - kvm-tcp_emu-fix-unsafe-snprintf-usages.patch [bz#1798994] - kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch [bz#1791590] - kvm-virtio-make-virtio_delete_queue-idempotent.patch [bz#1791590] - kvm-virtio-reset-region-cache-when-on-queue-deletion.patch [bz#1791590] - kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch [bz#1791590] - Resolves: bz#1791590 ([Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device) - Resolves: bz#1796240 (Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus) - Resolves: bz#1798994 (CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0]) --- ....3-accelerated-count-cache-flush-in-.patch | 135 ++++++++++++++++ kvm-tcp_emu-fix-unsafe-snprintf-usages.patch | 149 ++++++++++++++++++ kvm-util-add-slirp_fmt-helpers.patch | 140 ++++++++++++++++ ...ility-to-delete-vq-through-a-pointer.patch | 80 ++++++++++ ...-make-virtio_delete_queue-idempotent.patch | 42 +++++ ...e-also-control-queue-when-TX-RX-dele.patch | 49 ++++++ ...-region-cache-when-on-queue-deletion.patch | 46 ++++++ qemu-kvm.spec | 31 +++- 8 files changed, 671 insertions(+), 1 deletion(-) create mode 100644 kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch create mode 100644 kvm-tcp_emu-fix-unsafe-snprintf-usages.patch create mode 100644 kvm-util-add-slirp_fmt-helpers.patch create mode 100644 kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch create mode 100644 kvm-virtio-make-virtio_delete_queue-idempotent.patch create mode 100644 kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch create mode 100644 kvm-virtio-reset-region-cache-when-on-queue-deletion.patch diff --git a/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch b/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch new file mode 100644 index 0000000..0aa782b --- /dev/null +++ b/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch @@ -0,0 +1,135 @@ +From eb121ffa97c1c25d7853d51b4c8209c0bb521deb Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Fri, 7 Feb 2020 00:57:04 +0000 +Subject: [PATCH 1/7] spapr: Enable DD2.3 accelerated count cache flush in + pseries-5.0 machine + +RH-Author: David Gibson +Message-id: <20200207005704.194428-1-dgibson@redhat.com> +Patchwork-id: 93737 +O-Subject: [RHEL-AV-8.2 qemu-kvm PATCHv2] spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine +Bugzilla: 1796240 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth + +From: David Gibson + +For POWER9 DD2.2 cpus, the best current Spectre v2 indirect branch +mitigation is "count cache disabled", which is configured with: + -machine cap-ibs=fixed-ccd +However, this option isn't available on DD2.3 CPUs with KVM, because they +don't have the count cache disabled. + +For POWER9 DD2.3 cpus, it is "count cache flush with assist", configured +with: + -machine cap-ibs=workaround,cap-ccf-assist=on +However this option isn't available on DD2.2 CPUs with KVM, because they +don't have the special CCF assist instruction this relies on. + +On current machine types, we default to "count cache flush w/o assist", +that is: + -machine cap-ibs=workaround,cap-ccf-assist=off +This runs, with mitigation on both DD2.2 and DD2.3 host cpus, but has a +fairly significant performance impact. + +It turns out we can do better. The special instruction that CCF assist +uses to trigger a count cache flush is a no-op on earlier CPUs, rather than +trapping or causing other badness. It doesn't, of itself, implement the +mitigation, but *if* we have count-cache-disabled, then the count cache +flush is unnecessary, and so using the count cache flush mitigation is +harmless. + +Therefore for the new pseries-5.0 machine type, enable cap-ccf-assist by +default. Along with that, suppress throwing an error if cap-ccf-assist +is selected but KVM doesn't support it, as long as KVM *is* giving us +count-cache-disabled. To allow TCG to work out of the box, even though it +doesn't implement the ccf flush assist, downgrade the error in that case to +a warning. This matches several Spectre mitigations where we allow TCG +to operate for debugging, since we don't really make guarantees about TCG +security properties anyway. + +While we're there, make the TCG warning for this case match that for other +mitigations. + +Signed-off-by: David Gibson +Tested-by: Michael Ellerman +(cherry picked from commit 37965dfe4dffa3ac49438337417608e7f346b58a) +Signed-off-by: Danilo C. L. de Paula + +Conflicts: + hw/ppc/spapr.c + +Adjusted machine version compatibility code to the RHEL machine types +rather than the upstream machine types. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1796240 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=26285002 +Branch: rhel-av-8.2.0 +Upstream: Merged for qemu-5.0 + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 4 +++- + hw/ppc/spapr_caps.c | 21 +++++++++++++++++---- + 2 files changed, 20 insertions(+), 5 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index c12862d..a330f03 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4440,7 +4440,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ + smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; +- smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; + spapr_caps_add_properties(smc, &error_abort); + smc->irq = &spapr_irq_dual; + smc->dr_phb_enabled = true; +@@ -4904,6 +4904,8 @@ static void spapr_machine_rhel810_class_options(MachineClass *mc) + hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; + } + + DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index 805f385..6e6fb28 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -492,11 +492,24 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, + uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); + + if (tcg_enabled() && val) { +- /* TODO - for now only allow broken for TCG */ +- error_setg(errp, +-"Requested count cache flush assist capability level not supported by tcg," +- " try appending -machine cap-ccf-assist=off"); ++ /* TCG doesn't implement anything here, but allow with a warning */ ++ warn_report("TCG doesn't support requested feature, cap-ccf-assist=on"); + } else if (kvm_enabled() && (val > kvm_val)) { ++ uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch(); ++ ++ if (kvm_ibs == SPAPR_CAP_FIXED_CCD) { ++ /* ++ * If we don't have CCF assist on the host, the assist ++ * instruction is a harmless no-op. It won't correctly ++ * implement the cache count flush *but* if we have ++ * count-cache-disabled in the host, that flush is ++ * unnnecessary. So, specifically allow this case. This ++ * allows us to have better performance on POWER9 DD2.3, ++ * while still working on POWER9 DD2.2 and POWER8 host ++ * cpus. ++ */ ++ return; ++ } + error_setg(errp, + "Requested count cache flush assist capability level not supported by kvm," + " try appending -machine cap-ccf-assist=off"); +-- +1.8.3.1 + diff --git a/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch b/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch new file mode 100644 index 0000000..846da73 --- /dev/null +++ b/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch @@ -0,0 +1,149 @@ +From 9a7810c257711ce02627916d886fc1029f7a8190 Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Thu, 13 Feb 2020 15:50:49 +0000 +Subject: [PATCH 3/7] tcp_emu: fix unsafe snprintf() usages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200213155049.3936-3-jmaloy@redhat.com> +Patchwork-id: 93826 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] tcp_emu: fix unsafe snprintf() usages +Bugzilla: 1798994 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi + +From: Marc-André Lureau + +Various calls to snprintf() assume that snprintf() returns "only" the +number of bytes written (excluding terminating NUL). + +https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 + +"Upon successful completion, the snprintf() function shall return the +number of bytes that would be written to s had n been sufficiently +large excluding the terminating null byte." + +Before patch ce131029, if there isn't enough room in "m_data" for the +"DCC ..." message, we overflow "m_data". + +After the patch, if there isn't enough room for the same, we don't +overflow "m_data", but we set "m_len" out-of-bounds. The next time an +access is bounded by "m_len", we'll have a buffer overflow then. + +Use slirp_fmt*() to fix potential OOB memory access. + +Reported-by: Laszlo Ersek +Signed-off-by: Marc-André Lureau +Reviewed-by: Samuel Thibault +Message-Id: <20200127092414.169796-7-marcandre.lureau@redhat.com> +(cherry picked from libslirp commit 68ccb8021a838066f0951d4b2817eb6b6f10a843) +Signed-off-by: Jon Maloy + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/tcp_subr.c | 44 +++++++++++++++++++++----------------------- + 1 file changed, 21 insertions(+), 23 deletions(-) + +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +index 954d1a6..26d4ead 100644 +--- a/slirp/src/tcp_subr.c ++++ b/slirp/src/tcp_subr.c +@@ -655,8 +655,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + NTOHS(n1); + NTOHS(n2); + m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); +- m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); +- assert(m->m_len < M_ROOM(m)); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); + } else { + *eol = '\r'; + } +@@ -696,9 +695,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, +- n5, n6, x == 7 ? buff : ""); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + return 1; + } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { + /* +@@ -731,10 +730,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", +- n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); +- ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + return 1; + } + +@@ -757,8 +755,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) +- m->m_len = snprintf(m->m_data, M_ROOM(m), +- "%d", ntohs(so->so_fport)) + 1; ++ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)); + return 1; + + case EMU_IRC: +@@ -777,10 +775,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "DCC CHAT chat %lu %u%c\n", +- (unsigned long)ntohl(so->so_faddr.s_addr), +- ntohs(so->so_fport), 1); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, + &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), +@@ -788,10 +786,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "DCC SEND %s %lu %u %u%c\n", buff, +- (unsigned long)ntohl(so->so_faddr.s_addr), +- ntohs(so->so_fport), n1, 1); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, + &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), +@@ -799,10 +797,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), +- "DCC MOVE %s %lu %u %u%c\n", buff, +- (unsigned long)ntohl(so->so_faddr.s_addr), +- ntohs(so->so_fport), n1, 1); ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); + } + return 1; + +-- +1.8.3.1 + diff --git a/kvm-util-add-slirp_fmt-helpers.patch b/kvm-util-add-slirp_fmt-helpers.patch new file mode 100644 index 0000000..31af599 --- /dev/null +++ b/kvm-util-add-slirp_fmt-helpers.patch @@ -0,0 +1,140 @@ +From 5dc50c6bca059a9cda6677b1fd0187df1de78ed7 Mon Sep 17 00:00:00 2001 +From: jmaloy +Date: Thu, 13 Feb 2020 15:50:48 +0000 +Subject: [PATCH 2/7] util: add slirp_fmt() helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: jmaloy +Message-id: <20200213155049.3936-2-jmaloy@redhat.com> +Patchwork-id: 93824 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] util: add slirp_fmt() helpers +Bugzilla: 1798994 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefan Hajnoczi + +From: Marc-André Lureau + +Various calls to snprintf() in libslirp assume that snprintf() returns +"only" the number of bytes written (excluding terminating NUL). + +https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 + +"Upon successful completion, the snprintf() function shall return the +number of bytes that would be written to s had n been sufficiently +large excluding the terminating null byte." + +Introduce slirp_fmt() that handles several pathological cases the +way libslirp usually expect: + +- treat error as fatal (instead of silently returning -1) + +- fmt0() will always \0 end + +- return the number of bytes actually written (instead of what would +have been written, which would usually result in OOB later), including +the ending \0 for fmt0() + +- warn if truncation happened (instead of ignoring) + +Other less common cases can still be handled with strcpy/snprintf() etc. + +Signed-off-by: Marc-André Lureau +Reviewed-by: Samuel Thibault +Message-Id: <20200127092414.169796-2-marcandre.lureau@redhat.com> +(cherry picked from libslirp commit 30648c03b27fb8d9611b723184216cd3174b6775) +Signed-off-by: Jon Maloy + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/util.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + slirp/src/util.h | 3 +++ + 2 files changed, 65 insertions(+) + +diff --git a/slirp/src/util.c b/slirp/src/util.c +index e596087..e3b6257 100644 +--- a/slirp/src/util.c ++++ b/slirp/src/util.c +@@ -364,3 +364,65 @@ void slirp_pstrcpy(char *buf, int buf_size, const char *str) + } + *q = '\0'; + } ++ ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv > size) { ++ g_critical("vsnprintf() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("vsnprintf() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +index 3c6223c..0558dfc 100644 +--- a/slirp/src/util.h ++++ b/slirp/src/util.h +@@ -177,4 +177,7 @@ static inline int slirp_socket_set_fast_reuse(int fd) + + void slirp_pstrcpy(char *buf, int buf_size, const char *str); + ++int slirp_fmt(char *str, size_t size, const char *format, ...); ++int slirp_fmt0(char *str, size_t size, const char *format, ...); ++ + #endif +-- +1.8.3.1 + diff --git a/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch b/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch new file mode 100644 index 0000000..ed10701 --- /dev/null +++ b/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch @@ -0,0 +1,80 @@ +From b395ad369278d0923a590975fabbb99ec7716c6b Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:28 +0000 +Subject: [PATCH 4/7] virtio: add ability to delete vq through a pointer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-2-jusual@redhat.com> +Patchwork-id: 93980 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/4] virtio: add ability to delete vq through a pointer +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: "Michael S. Tsirkin" + +Devices tend to maintain vq pointers, allow deleting them trough a vq pointer. + +Signed-off-by: Michael S. Tsirkin +Reviewed-by: David Hildenbrand +Reviewed-by: David Hildenbrand +(cherry picked from commit 722f8c51d8af223751dfb1d02de40043e8ba067e) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio.c | 15 ++++++++++----- + include/hw/virtio/virtio.h | 2 ++ + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 3211135..d63a369 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2335,17 +2335,22 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + return &vdev->vq[i]; + } + ++void virtio_delete_queue(VirtQueue *vq) ++{ ++ vq->vring.num = 0; ++ vq->vring.num_default = 0; ++ vq->handle_output = NULL; ++ vq->handle_aio_output = NULL; ++ g_free(vq->used_elems); ++} ++ + void virtio_del_queue(VirtIODevice *vdev, int n) + { + if (n < 0 || n >= VIRTIO_QUEUE_MAX) { + abort(); + } + +- vdev->vq[n].vring.num = 0; +- vdev->vq[n].vring.num_default = 0; +- vdev->vq[n].handle_output = NULL; +- vdev->vq[n].handle_aio_output = NULL; +- g_free(vdev->vq[n].used_elems); ++ virtio_delete_queue(&vdev->vq[n]); + } + + static void virtio_set_isr(VirtIODevice *vdev, int value) +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 6a20442..91167f6 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -183,6 +183,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + + void virtio_del_queue(VirtIODevice *vdev, int n); + ++void virtio_delete_queue(VirtQueue *vq); ++ + void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len); + void virtqueue_flush(VirtQueue *vq, unsigned int count); +-- +1.8.3.1 + diff --git a/kvm-virtio-make-virtio_delete_queue-idempotent.patch b/kvm-virtio-make-virtio_delete_queue-idempotent.patch new file mode 100644 index 0000000..16eb1da --- /dev/null +++ b/kvm-virtio-make-virtio_delete_queue-idempotent.patch @@ -0,0 +1,42 @@ +From 901e65fa6ccbadeacd6c585cf49a0a7cdafb4737 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:29 +0000 +Subject: [PATCH 5/7] virtio: make virtio_delete_queue idempotent + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-3-jusual@redhat.com> +Patchwork-id: 93981 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/4] virtio: make virtio_delete_queue idempotent +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: "Michael S. Tsirkin" + +Let's make sure calling this twice is harmless - +no known instances, but seems safer. + +Suggested-by: Pan Nengyuan +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8cd353ea0fbf0e334e015d833f612799be642296) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index d63a369..e6a9ba4 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2342,6 +2342,7 @@ void virtio_delete_queue(VirtQueue *vq) + vq->handle_output = NULL; + vq->handle_aio_output = NULL; + g_free(vq->used_elems); ++ vq->used_elems = NULL; + } + + void virtio_del_queue(VirtIODevice *vdev, int n) +-- +1.8.3.1 + diff --git a/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch b/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch new file mode 100644 index 0000000..c21c699 --- /dev/null +++ b/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch @@ -0,0 +1,49 @@ +From 2f494c41715193522c52eafc6af2a5e33f88ceb9 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:31 +0000 +Subject: [PATCH 7/7] virtio-net: delete also control queue when TX/RX deleted + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-5-jusual@redhat.com> +Patchwork-id: 93983 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/4] virtio-net: delete also control queue when TX/RX deleted +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: Yuri Benditovich + +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +If the control queue is not deleted together with TX/RX, it +later will be ignored in freeing cache resources and hot +unplug will not be completed. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Yuri Benditovich +Message-Id: <20191226043649.14481-3-yuri.benditovich@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d945d9f1731244ef341f74ede93120fc9de35913) +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index db3d7c3..f325440 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3101,7 +3101,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) + for (i = 0; i < max_queues; i++) { + virtio_net_del_queue(n, i); + } +- ++ /* delete also control vq */ ++ virtio_del_queue(vdev, max_queues * 2); + qemu_announce_timer_del(&n->announce_timer, false); + g_free(n->vqs); + qemu_del_nic(n->nic); +-- +1.8.3.1 + diff --git a/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch b/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch new file mode 100644 index 0000000..c9f1086 --- /dev/null +++ b/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch @@ -0,0 +1,46 @@ +From 8bf4f561262d9282cebdb3418cdb9a69c92216a0 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:30 +0000 +Subject: [PATCH 6/7] virtio: reset region cache when on queue deletion + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-4-jusual@redhat.com> +Patchwork-id: 93982 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/4] virtio: reset region cache when on queue deletion +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: Yuri Benditovich + +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +Fix leak of region reference that prevents complete +device deletion on hot unplug. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Yuri Benditovich +Message-Id: <20191226043649.14481-2-yuri.benditovich@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 421afd2fe8dd4603216cbf36081877c391f5a2a4) +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/virtio.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index e6a9ba4..f644d9a 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2343,6 +2343,7 @@ void virtio_delete_queue(VirtQueue *vq) + vq->handle_aio_output = NULL; + g_free(vq->used_elems); + vq->used_elems = NULL; ++ virtio_virtqueue_reset_region_cache(vq); + } + + void virtio_del_queue(VirtIODevice *vdev, int n) +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index d5f53d8..00b9eac 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 10%{?dist} +Release: 11%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -494,6 +494,20 @@ Patch178: kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch Patch179: kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch # For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes Patch180: kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch +# For bz#1796240 - Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus +Patch181: kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch +# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] +Patch182: kvm-util-add-slirp_fmt-helpers.patch +# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] +Patch183: kvm-tcp_emu-fix-unsafe-snprintf-usages.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch184: kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch185: kvm-virtio-make-virtio_delete_queue-idempotent.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch186: kvm-virtio-reset-region-cache-when-on-queue-deletion.patch +# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device +Patch187: kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch BuildRequires: wget BuildRequires: rpm-build @@ -1427,6 +1441,21 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Sun Feb 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-11.el8 +- kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch [bz#1796240] +- kvm-util-add-slirp_fmt-helpers.patch [bz#1798994] +- kvm-tcp_emu-fix-unsafe-snprintf-usages.patch [bz#1798994] +- kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch [bz#1791590] +- kvm-virtio-make-virtio_delete_queue-idempotent.patch [bz#1791590] +- kvm-virtio-reset-region-cache-when-on-queue-deletion.patch [bz#1791590] +- kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch [bz#1791590] +- Resolves: bz#1791590 + ([Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device) +- Resolves: bz#1796240 + (Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus) +- Resolves: bz#1798994 + (CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0]) + * Fri Feb 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-10.el8 - kvm-i386-Resolve-CPU-models-to-v1-by-default.patch [bz#1779078 bz#1787291 bz#1779078 bz#1779078] - kvm-iotests-Support-job-complete-in-run_job.patch [bz#1781637] From 6d18d0286e424f7065449492706ff061a91b1aa1 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 24 Feb 2020 20:02:01 +0000 Subject: [PATCH 067/195] * Mon Feb 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-12.el8 - kvm-vhost-user-gpu-Drop-trailing-json-comma.patch [bz#1805334] - Resolves: bz#1805334 (vhost-user/50-qemu-gpu.json is not valid JSON) --- ...st-user-gpu-Drop-trailing-json-comma.patch | 52 +++++++++++++++++++ qemu-kvm.spec | 9 +++- 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 kvm-vhost-user-gpu-Drop-trailing-json-comma.patch diff --git a/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch b/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch new file mode 100644 index 0000000..3a50632 --- /dev/null +++ b/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch @@ -0,0 +1,52 @@ +From 044feb40e3041759ee77d08136f334cf3ad67c1e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=A1n=20Tomko?= +Date: Fri, 21 Feb 2020 09:49:23 +0000 +Subject: [PATCH] vhost-user-gpu: Drop trailing json comma +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Ján Tomko +Message-id: <07fed9a38495938a7180819e27f590d80cd6668d.1582278173.git.jtomko@redhat.com> +Patchwork-id: 94019 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vhost-user-gpu: Drop trailing json comma +Bugzilla: 1805334 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Stefan Hajnoczi + +From: Cole Robinson + +Trailing comma is not valid json: + +$ cat contrib/vhost-user-gpu/50-qemu-gpu.json.in | jq +parse error: Expected another key-value pair at line 5, column 1 + +Signed-off-by: Cole Robinson +Reviewed-by: Marc-André Lureau +Reviewed-by: Li Qiang +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 7f5dd2ac9f3504e2699f23e69bc3d8051b729832.1568925097.git.crobinso@redhat.com +Signed-off-by: Gerd Hoffmann +(cherry picked from commit ca26b032e5a0e8a190c763ce828a8740d24b9b65) +Signed-off-by: Ján Tomko +Signed-off-by: Danilo C. L. de Paula +--- + contrib/vhost-user-gpu/50-qemu-gpu.json.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in +index 658b545..f5edd09 100644 +--- a/contrib/vhost-user-gpu/50-qemu-gpu.json.in ++++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in +@@ -1,5 +1,5 @@ + { + "description": "QEMU vhost-user-gpu", + "type": "gpu", +- "binary": "@libexecdir@/vhost-user-gpu", ++ "binary": "@libexecdir@/vhost-user-gpu" + } +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 00b9eac..598edf6 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 11%{?dist} +Release: 12%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -508,6 +508,8 @@ Patch185: kvm-virtio-make-virtio_delete_queue-idempotent.patch Patch186: kvm-virtio-reset-region-cache-when-on-queue-deletion.patch # For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device Patch187: kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch +# For bz#1805334 - vhost-user/50-qemu-gpu.json is not valid JSON +Patch188: kvm-vhost-user-gpu-Drop-trailing-json-comma.patch BuildRequires: wget BuildRequires: rpm-build @@ -1441,6 +1443,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Feb 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-12.el8 +- kvm-vhost-user-gpu-Drop-trailing-json-comma.patch [bz#1805334] +- Resolves: bz#1805334 + (vhost-user/50-qemu-gpu.json is not valid JSON) + * Sun Feb 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-11.el8 - kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch [bz#1796240] - kvm-util-add-slirp_fmt-helpers.patch [bz#1798994] From 2084aa024620f0814c0ac3d9b3848b39f2b7ff00 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Sat, 29 Feb 2020 03:50:42 +0000 Subject: [PATCH 068/195] * Sat Feb 29 2020 Danilo Cesar Lemes de Paula - 4.2.0-13.el8 - kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1791648] - kvm-target-i386-add-a-ucode-rev-property.patch [bz#1791648] - kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch [bz#1791648] - kvm-target-i386-fix-TCG-UCODE_REV-access.patch [bz#1791648] - kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch [bz#1791648] - kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch [bz#1791648] - kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch [bz#1703907] - kvm-mirror-Store-MirrorOp.co-for-debuggability.patch [bz#1794692] - kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] - Resolves: bz#1703907 ([upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading) - Resolves: bz#1791648 ([RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough) - Resolves: bz#1794692 (Mirror block job stops making progress) --- ...n-t-let-an-operation-wait-for-itself.patch | 123 ++++++++++++ ...-Store-MirrorOp.co-for-debuggability.patch | 51 +++++ ...alloc_cluster_abort-for-external-dat.patch | 52 +++++ ...target-i386-add-a-ucode-rev-property.patch | 125 ++++++++++++ ...k-for-availability-of-MSR_IA32_UCODE.patch | 72 +++++++ ...le-monitor-and-ucode-revision-with-c.patch | 49 +++++ ...target-i386-fix-TCG-UCODE_REV-access.patch | 73 +++++++ ...m-initialize-feature-MSRs-very-early.patch | 178 ++++++++++++++++++ ...initialize-microcode-revision-from-K.patch | 64 +++++++ qemu-kvm.spec | 37 +++- 10 files changed, 823 insertions(+), 1 deletion(-) create mode 100644 kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch create mode 100644 kvm-mirror-Store-MirrorOp.co-for-debuggability.patch create mode 100644 kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch create mode 100644 kvm-target-i386-add-a-ucode-rev-property.patch create mode 100644 kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch create mode 100644 kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch create mode 100644 kvm-target-i386-fix-TCG-UCODE_REV-access.patch create mode 100644 kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch create mode 100644 kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch diff --git a/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch b/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch new file mode 100644 index 0000000..c20cb6c --- /dev/null +++ b/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch @@ -0,0 +1,123 @@ +From 261ee33e0e6711fadd3049e4640bb731ee3d44ff Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 24 Feb 2020 16:57:10 +0000 +Subject: [PATCH 9/9] mirror: Don't let an operation wait for itself + +RH-Author: Kevin Wolf +Message-id: <20200224165710.4830-3-kwolf@redhat.com> +Patchwork-id: 94045 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Don't let an operation wait for itself +Bugzilla: 1794692 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +mirror_wait_for_free_in_flight_slot() just picks a random operation to +wait for. However, when mirror_co_read() waits for free slots, its +MirrorOp is already in s->ops_in_flight, so if not enough slots are +immediately available, an operation can end up waiting for itself to +complete, which results in a hang. + +Fix this by passing the current MirrorOp and skipping this operation +when picking an operation to wait for. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry picked from commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 8959e42..cacbc70 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -283,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + } + + static inline void coroutine_fn +-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) ++mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { ++ if (self == op) { ++ continue; ++ } + /* Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op +@@ -302,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + } + + static inline void coroutine_fn +-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) ++mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) + { + /* Only non-active operations use up in-flight slots */ +- mirror_wait_for_any_operation(s, false); ++ mirror_wait_for_any_operation(s, self, false); + } + + /* Perform a mirror copy operation. +@@ -348,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + + while (s->buf_free_count < nb_chunks) { + trace_mirror_yield_in_flight(s, op->offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, op); + } + + /* Now make a QEMUIOVector taking enough granularity-sized chunks +@@ -555,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) + + while (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield_in_flight(s, offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, pseudo_op); + } + + if (s->ret < 0) { +@@ -609,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s) + static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) + { + while (s->in_flight > 0) { +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, NULL); + } + } + +@@ -794,7 +797,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) + if (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, + s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, NULL); + continue; + } + +@@ -947,7 +950,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + /* Do not start passive operations while there are active + * writes in progress */ + while (s->in_active_write_counter) { +- mirror_wait_for_any_operation(s, true); ++ mirror_wait_for_any_operation(s, NULL, true); + } + + if (s->ret < 0) { +@@ -973,7 +976,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || + (cnt == 0 && s->in_flight > 0)) { + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s); ++ mirror_wait_for_free_in_flight_slot(s, NULL); + continue; + } else if (cnt != 0) { + delay_ns = mirror_iteration(s); +-- +1.8.3.1 + diff --git a/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch b/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch new file mode 100644 index 0000000..67f3e54 --- /dev/null +++ b/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch @@ -0,0 +1,51 @@ +From 27fe3b8d42a2c99de01ce20e4b0727079c12da65 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 24 Feb 2020 16:57:09 +0000 +Subject: [PATCH 8/9] mirror: Store MirrorOp.co for debuggability + +RH-Author: Kevin Wolf +Message-id: <20200224165710.4830-2-kwolf@redhat.com> +Patchwork-id: 94044 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] mirror: Store MirrorOp.co for debuggability +Bugzilla: 1794692 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +If a coroutine is launched, but the coroutine pointer isn't stored +anywhere, debugging any problems inside the coroutine is quite hard. +Let's store the coroutine pointer of a mirror operation in MirrorOp to +have it available in the debugger. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +(cherry picked from commit eed325b92c3e68417121ea23f96e33af6a4654ed) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/block/mirror.c b/block/mirror.c +index f0f2d9d..8959e42 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -103,6 +103,7 @@ struct MirrorOp { + bool is_pseudo_op; + bool is_active_write; + CoQueue waiting_requests; ++ Coroutine *co; + + QTAILQ_ENTRY(MirrorOp) next; + }; +@@ -429,6 +430,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, + default: + abort(); + } ++ op->co = co; + + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); + qemu_coroutine_enter(co); +-- +1.8.3.1 + diff --git a/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch b/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch new file mode 100644 index 0000000..1a7ace5 --- /dev/null +++ b/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch @@ -0,0 +1,52 @@ +From ecc4fb6e1941035e1d9def1f69b779fbea216caf Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 24 Feb 2020 16:13:07 +0000 +Subject: [PATCH 7/9] qcow2: Fix qcow2_alloc_cluster_abort() for external data + file + +RH-Author: Kevin Wolf +Message-id: <20200224161307.29783-2-kwolf@redhat.com> +Patchwork-id: 94042 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] qcow2: Fix qcow2_alloc_cluster_abort() for external data file +Bugzilla: 1703907 +RH-Acked-by: John Snow +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +For external data file, cluster allocations return an offset in the data +file and are not refcounted. In this case, there is nothing to do for +qcow2_alloc_cluster_abort(). Freeing the same offset in the qcow2 file +is wrong and causes crashes in the better case or image corruption in +the worse case. + +Signed-off-by: Kevin Wolf +Message-Id: <20200211094900.17315-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c3b6658c1a5a3fb24d6c27b2594cf86146f75b22) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2-cluster.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index 8982b7b..dc3c270 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -1015,8 +1015,11 @@ err: + void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) + { + BDRVQcow2State *s = bs->opaque; +- qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, +- QCOW2_DISCARD_NEVER); ++ if (!has_data_file(bs)) { ++ qcow2_free_clusters(bs, m->alloc_offset, ++ m->nb_clusters << s->cluster_bits, ++ QCOW2_DISCARD_NEVER); ++ } + } + + /* +-- +1.8.3.1 + diff --git a/kvm-target-i386-add-a-ucode-rev-property.patch b/kvm-target-i386-add-a-ucode-rev-property.patch new file mode 100644 index 0000000..5c3c770 --- /dev/null +++ b/kvm-target-i386-add-a-ucode-rev-property.patch @@ -0,0 +1,125 @@ +From 4009f0bcc8004ce481015d088fe335a16b8d7ce1 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:12 +0000 +Subject: [PATCH 2/9] target/i386: add a ucode-rev property + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-3-pbonzini@redhat.com> +Patchwork-id: 93909 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] target/i386: add a ucode-rev property +Bugzilla: 1791648 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Add the property and plumb it in TCG and HVF (the latter of which +tried to support returning a constant value but used the wrong MSR). + +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-3-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4e45aff398cd1542c2a384a2a3b8600f23337d86) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 10 ++++++++++ + target/i386/cpu.h | 3 +++ + target/i386/hvf/x86_emu.c | 4 +--- + target/i386/misc_helper.c | 4 ++++ + 4 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 863192c..e505d3e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6325,6 +6325,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + } + } + ++ if (cpu->ucode_rev == 0) { ++ /* The default is the same as KVM's. */ ++ if (IS_AMD_CPU(env)) { ++ cpu->ucode_rev = 0x01000065; ++ } else { ++ cpu->ucode_rev = 0x100000000ULL; ++ } ++ } ++ + /* mwait extended info: needed for Core compatibility */ + /* We always wake on interrupt even if host does not have the capability */ + cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; +@@ -7008,6 +7017,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), + DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), + DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), ++ DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), + DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), + DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), + DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index cde2a16..4441061 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -348,6 +348,7 @@ typedef enum X86Seg { + #define MSR_IA32_SPEC_CTRL 0x48 + #define MSR_VIRT_SSBD 0xc001011f + #define MSR_IA32_PRED_CMD 0x49 ++#define MSR_IA32_UCODE_REV 0x8b + #define MSR_IA32_CORE_CAPABILITY 0xcf + + #define MSR_IA32_ARCH_CAPABILITIES 0x10a +@@ -1621,6 +1622,8 @@ struct X86CPU { + CPUNegativeOffsetState neg; + CPUX86State env; + ++ uint64_t ucode_rev; ++ + uint32_t hyperv_spinlock_attempts; + char *hyperv_vendor_id; + bool hyperv_synic_kvm_only; +diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c +index 3df7672..92ab815 100644 +--- a/target/i386/hvf/x86_emu.c ++++ b/target/i386/hvf/x86_emu.c +@@ -664,8 +664,6 @@ static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) + RIP(env) += decode->len; + } + +-#define MSR_IA32_UCODE_REV 0x00000017 +- + void simulate_rdmsr(struct CPUState *cpu) + { + X86CPU *x86_cpu = X86_CPU(cpu); +@@ -681,7 +679,7 @@ void simulate_rdmsr(struct CPUState *cpu) + val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); + break; + case MSR_IA32_UCODE_REV: +- val = (0x100000000ULL << 32) | 0x100000000ULL; ++ val = x86_cpu->ucode_rev; + break; + case MSR_EFER: + val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); +diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c +index 3eff688..aed16fe 100644 +--- a/target/i386/misc_helper.c ++++ b/target/i386/misc_helper.c +@@ -229,6 +229,7 @@ void helper_rdmsr(CPUX86State *env) + #else + void helper_wrmsr(CPUX86State *env) + { ++ X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); +@@ -371,6 +372,9 @@ void helper_wrmsr(CPUX86State *env) + env->msr_bndcfgs = val; + cpu_sync_bndcs_hflags(env); + break; ++ case MSR_IA32_UCODE_REV: ++ val = x86_cpu->ucode_rev; ++ break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +-- +1.8.3.1 + diff --git a/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch b/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch new file mode 100644 index 0000000..a80c9d3 --- /dev/null +++ b/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch @@ -0,0 +1,72 @@ +From 27d7b085f2f568050d638b694ed2f51495db718c Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:15 +0000 +Subject: [PATCH 5/9] target/i386: check for availability of MSR_IA32_UCODE_REV + as an emulated MSR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-6-pbonzini@redhat.com> +Patchwork-id: 93898 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] target/i386: check for availability of MSR_IA32_UCODE_REV as an emulated MSR +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Even though MSR_IA32_UCODE_REV has been available long before Linux 5.6, +which added it to the emulated MSR list, a bug caused the microcode +version to revert to 0x100000000 on INIT. As a result, processors other +than the bootstrap processor would not see the host microcode revision; +some Windows version complain loudly about this and crash with a +fairly explicit MICROCODE REVISION MISMATCH error. + +[If running 5.6 prereleases, the kernel fix "KVM: x86: do not reset + microcode version on INIT or RESET" should also be applied.] + +Reported-by: Alex Williamson +Message-id: <20200211175516.10716-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6702514814c7e7b4cbf179624539b5f38c72740b) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 6c61aef..99840ca 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -105,6 +105,7 @@ static bool has_msr_smi_count; + static bool has_msr_arch_capabs; + static bool has_msr_core_capabs; + static bool has_msr_vmx_vmfunc; ++static bool has_msr_ucode_rev; + + static uint32_t has_architectural_pmu_version; + static uint32_t num_architectural_pmu_gp_counters; +@@ -2056,6 +2057,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_VMX_VMFUNC: + has_msr_vmx_vmfunc = true; + break; ++ case MSR_IA32_UCODE_REV: ++ has_msr_ucode_rev = true; ++ break; + } + } + } +@@ -2696,8 +2700,7 @@ static void kvm_init_msrs(X86CPU *cpu) + env->features[FEAT_CORE_CAPABILITY]); + } + +- if (kvm_arch_get_supported_msr_feature(kvm_state, +- MSR_IA32_UCODE_REV)) { ++ if (has_msr_ucode_rev) { + kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); + } + +-- +1.8.3.1 + diff --git a/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch b/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch new file mode 100644 index 0000000..47438a3 --- /dev/null +++ b/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch @@ -0,0 +1,49 @@ +From 7b71a7011437ebfa3bc7df9297e892b82293ec98 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:16 +0000 +Subject: [PATCH 6/9] target/i386: enable monitor and ucode revision with -cpu + max +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-7-pbonzini@redhat.com> +Patchwork-id: 93910 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] target/i386: enable monitor and ucode revision with -cpu max +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +These two features were incorrectly tied to host_cpuid_required rather than +cpu->max_features. As a result, -cpu max was not enabling either MONITOR +features or ucode revision. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit be02cda3afde60d219786e23c3f8edb53aec8e17) + +[RHEL7: context, upstream uses g_autofree] + +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 5ac843d..1685a8c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6317,7 +6317,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + g_free(name); + goto out; + } ++ } + ++ if (cpu->max_features && accel_uses_host_cpuid()) { + if (enable_cpu_pm) { + host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, + &cpu->mwait.ecx, &cpu->mwait.edx); +-- +1.8.3.1 + diff --git a/kvm-target-i386-fix-TCG-UCODE_REV-access.patch b/kvm-target-i386-fix-TCG-UCODE_REV-access.patch new file mode 100644 index 0000000..c7ced8a --- /dev/null +++ b/kvm-target-i386-fix-TCG-UCODE_REV-access.patch @@ -0,0 +1,73 @@ +From 3d16f05359e6277da1f970f71aa9f76337d655dc Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:14 +0000 +Subject: [PATCH 4/9] target/i386: fix TCG UCODE_REV access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-5-pbonzini@redhat.com> +Patchwork-id: 93904 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] target/i386: fix TCG UCODE_REV access +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +This was a very interesting semantic conflict that caused git to move +the MSR_IA32_UCODE_REV read to helper_wrmsr. Not a big deal, but +still should be fixed... + +Fixes: 4e45aff398 ("target/i386: add a ucode-rev property", 2020-01-24) +Message-id: <20200206171022.9289-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9028c75c9d08be303ccc425bfe3d3b23d8f4cac7) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/misc_helper.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c +index aed16fe..7d61221 100644 +--- a/target/i386/misc_helper.c ++++ b/target/i386/misc_helper.c +@@ -229,7 +229,6 @@ void helper_rdmsr(CPUX86State *env) + #else + void helper_wrmsr(CPUX86State *env) + { +- X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); +@@ -372,9 +371,6 @@ void helper_wrmsr(CPUX86State *env) + env->msr_bndcfgs = val; + cpu_sync_bndcs_hflags(env); + break; +- case MSR_IA32_UCODE_REV: +- val = x86_cpu->ucode_rev; +- break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +@@ -393,6 +389,7 @@ void helper_wrmsr(CPUX86State *env) + + void helper_rdmsr(CPUX86State *env) + { ++ X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC()); +@@ -526,6 +523,9 @@ void helper_rdmsr(CPUX86State *env) + case MSR_IA32_BNDCFGS: + val = env->msr_bndcfgs; + break; ++ case MSR_IA32_UCODE_REV: ++ val = x86_cpu->ucode_rev; ++ break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +-- +1.8.3.1 + diff --git a/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch b/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch new file mode 100644 index 0000000..5118aed --- /dev/null +++ b/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch @@ -0,0 +1,178 @@ +From eb0fc0ae2750a0462698d6d21ebb56a4249539f9 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:11 +0000 +Subject: [PATCH 1/9] target/i386: kvm: initialize feature MSRs very early +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-2-pbonzini@redhat.com> +Patchwork-id: 93899 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] target/i386: kvm: initialize feature MSRs very early +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Some read-only MSRs affect the behavior of ioctls such as +KVM_SET_NESTED_STATE. We can initialize them once and for all +right after the CPU is realized, since they will never be modified +by the guest. + +Reported-by: Qingua Cheng +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-2-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 420ae1fc51c99abfd03b1c590f55617edd2a2bed) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 81 ++++++++++++++++++++++++++++++-------------------- + target/i386/kvm_i386.h | 1 + + 2 files changed, 49 insertions(+), 33 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 86d9a1f..f41605b 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -67,6 +67,8 @@ + * 255 kvm_msr_entry structs */ + #define MSR_BUF_SIZE 4096 + ++static void kvm_init_msrs(X86CPU *cpu); ++ + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_INFO(SET_TSS_ADDR), + KVM_CAP_INFO(EXT_CPUID), +@@ -1842,6 +1844,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + has_msr_tsc_aux = false; + } + ++ kvm_init_msrs(cpu); ++ + r = hyperv_init_vcpu(cpu); + if (r) { + goto fail; +@@ -2660,11 +2664,53 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) + VMCS12_MAX_FIELD_INDEX << 1); + } + ++static int kvm_buf_set_msrs(X86CPU *cpu) ++{ ++ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if (ret < cpu->kvm_msr_buf->nmsrs) { ++ struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; ++ error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, ++ (uint32_t)e->index, (uint64_t)e->data); ++ } ++ ++ assert(ret == cpu->kvm_msr_buf->nmsrs); ++ return 0; ++} ++ ++static void kvm_init_msrs(X86CPU *cpu) ++{ ++ CPUX86State *env = &cpu->env; ++ ++ kvm_msr_buf_reset(cpu); ++ if (has_msr_arch_capabs) { ++ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, ++ env->features[FEAT_ARCH_CAPABILITIES]); ++ } ++ ++ if (has_msr_core_capabs) { ++ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, ++ env->features[FEAT_CORE_CAPABILITY]); ++ } ++ ++ /* ++ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but ++ * all kernels with MSR features should have them. ++ */ ++ if (kvm_feature_msrs && cpu_has_vmx(env)) { ++ kvm_msr_entry_add_vmx(cpu, env->features); ++ } ++ ++ assert(kvm_buf_set_msrs(cpu) == 0); ++} ++ + static int kvm_put_msrs(X86CPU *cpu, int level) + { + CPUX86State *env = &cpu->env; + int i; +- int ret; + + kvm_msr_buf_reset(cpu); + +@@ -2722,17 +2768,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + #endif + +- /* If host supports feature MSR, write down. */ +- if (has_msr_arch_capabs) { +- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, +- env->features[FEAT_ARCH_CAPABILITIES]); +- } +- +- if (has_msr_core_capabs) { +- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, +- env->features[FEAT_CORE_CAPABILITY]); +- } +- + /* + * The following MSRs have side effects on the guest or are too heavy + * for normal writeback. Limit them to reset or full state updates. +@@ -2910,14 +2945,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ +- +- /* +- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but +- * all kernels with MSR features should have them. +- */ +- if (kvm_feature_msrs && cpu_has_vmx(env)) { +- kvm_msr_entry_add_vmx(cpu, env->features); +- } + } + + if (env->mcg_cap) { +@@ -2933,19 +2960,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + } + +- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); +- if (ret < 0) { +- return ret; +- } +- +- if (ret < cpu->kvm_msr_buf->nmsrs) { +- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; +- error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, +- (uint32_t)e->index, (uint64_t)e->data); +- } +- +- assert(ret == cpu->kvm_msr_buf->nmsrs); +- return 0; ++ return kvm_buf_set_msrs(cpu); + } + + +diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h +index 06fe06b..d98c6f6 100644 +--- a/target/i386/kvm_i386.h ++++ b/target/i386/kvm_i386.h +@@ -66,4 +66,5 @@ bool kvm_enable_x2apic(void); + bool kvm_has_x2apic_api(void); + + bool kvm_hv_vpindex_settable(void); ++ + #endif +-- +1.8.3.1 + diff --git a/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch b/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch new file mode 100644 index 0000000..99b18fc --- /dev/null +++ b/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch @@ -0,0 +1,64 @@ +From 8f39b0c9523630efeb451e2298cf64b88cd2ac81 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:13 +0000 +Subject: [PATCH 3/9] target/i386: kvm: initialize microcode revision from KVM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-4-pbonzini@redhat.com> +Patchwork-id: 93897 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] target/i386: kvm: initialize microcode revision from KVM +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +KVM can return the host microcode revision as a feature MSR. +Use it as the default value for -cpu host. + +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-4-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 32c87d70ff55b96741f08c35108935cac6f40fe4) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 4 ++++ + target/i386/kvm.c | 5 +++++ + 2 files changed, 9 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index e505d3e..5ac843d 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6323,6 +6323,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + &cpu->mwait.ecx, &cpu->mwait.edx); + env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; + } ++ if (kvm_enabled() && cpu->ucode_rev == 0) { ++ cpu->ucode_rev = kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_UCODE_REV); ++ } + } + + if (cpu->ucode_rev == 0) { +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index f41605b..6c61aef 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -2696,6 +2696,11 @@ static void kvm_init_msrs(X86CPU *cpu) + env->features[FEAT_CORE_CAPABILITY]); + } + ++ if (kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_UCODE_REV)) { ++ kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); ++ } ++ + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 598edf6..3fa1135 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 12%{?dist} +Release: 13%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -510,6 +510,24 @@ Patch186: kvm-virtio-reset-region-cache-when-on-queue-deletion.patch Patch187: kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch # For bz#1805334 - vhost-user/50-qemu-gpu.json is not valid JSON Patch188: kvm-vhost-user-gpu-Drop-trailing-json-comma.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch189: kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch190: kvm-target-i386-add-a-ucode-rev-property.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch191: kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch192: kvm-target-i386-fix-TCG-UCODE_REV-access.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch193: kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch +# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough +Patch194: kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch +# For bz#1703907 - [upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading +Patch195: kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch +# For bz#1794692 - Mirror block job stops making progress +Patch196: kvm-mirror-Store-MirrorOp.co-for-debuggability.patch +# For bz#1794692 - Mirror block job stops making progress +Patch197: kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch BuildRequires: wget BuildRequires: rpm-build @@ -1443,6 +1461,23 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Sat Feb 29 2020 Danilo Cesar Lemes de Paula - 4.2.0-13.el8 +- kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1791648] +- kvm-target-i386-add-a-ucode-rev-property.patch [bz#1791648] +- kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch [bz#1791648] +- kvm-target-i386-fix-TCG-UCODE_REV-access.patch [bz#1791648] +- kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch [bz#1791648] +- kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch [bz#1791648] +- kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch [bz#1703907] +- kvm-mirror-Store-MirrorOp.co-for-debuggability.patch [bz#1794692] +- kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- Resolves: bz#1703907 + ([upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading) +- Resolves: bz#1791648 + ([RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough) +- Resolves: bz#1794692 + (Mirror block job stops making progress) + * Mon Feb 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-12.el8 - kvm-vhost-user-gpu-Drop-trailing-json-comma.patch [bz#1805334] - Resolves: bz#1805334 From cb4ea43665f72f2bbcb3f11e88c01f4df515dd67 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 11 Mar 2020 20:25:54 +0000 Subject: [PATCH 069/195] * Wed Mar 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-14.el8 - kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch [bz#1782529] - kvm-migration-multifd-clean-pages-after-filling-packet.patch [bz#1738451] - kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch [bz#1738451] - kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch [bz#1738451] - kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch [bz#1738451] - kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch [bz#1738451] - kvm-qemu-file-Don-t-do-IO-after-shutdown.patch [bz#1738451] - kvm-migration-Don-t-send-data-if-we-have-stopped.patch [bz#1738451] - kvm-migration-Create-migration_is_running.patch [bz#1738451] - kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch [bz#1738451] - kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch [bz#1738451] - kvm-virtiofsd-Remove-fuse_req_getgroups.patch [bz#1797064] - kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch [bz#1797064] - kvm-virtiofsd-load_capng-missing-unlock.patch [bz#1797064] - kvm-virtiofsd-do_read-missing-NULL-check.patch [bz#1797064] - kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch [bz#1797064] - kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch [bz#1797064] - kvm-virtiofsd-Fix-xattr-operations.patch [bz#1797064] - Resolves: bz#1738451 (qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel)) - Resolves: bz#1782529 (Windows Update Enablement with default smbios strings in qemu) - Resolves: bz#1797064 (virtiofsd: Fixes) --- ...w-default-SMBIOS-fields-for-Windows-.patch | 262 ++++++++++++++ ...igration-Create-migration_is_running.patch | 119 +++++++ ...n-Don-t-send-data-if-we-have-stopped.patch | 42 +++ ...ure-that-we-don-t-call-write-in-case.patch | 94 +++++ ...VM-is-paused-when-migration-is-cance.patch | 70 ++++ ...ifd-clean-pages-after-filling-packet.patch | 65 ++++ ...d-fix-destroyed-mutex-access-in-term.patch | 77 +++++ ...d-fix-nullptr-access-in-multifd_send.patch | 75 ++++ ...d-fix-nullptr-access-in-terminating-.patch | 68 ++++ ...e-that-we-don-t-do-any-IO-after-an-e.patch | 74 ++++ ...qemu-file-Don-t-do-IO-after-shutdown.patch | 92 +++++ ...fuse_lowlevel-Fix-fuse_out_header-er.patch | 55 +++ kvm-virtiofsd-Fix-xattr-operations.patch | 327 ++++++++++++++++++ kvm-virtiofsd-Remove-fuse_req_getgroups.patch | 193 +++++++++++ ...virtiofsd-do_read-missing-NULL-check.patch | 49 +++ ...ate_listen_socket-error-path-socket-.patch | 56 +++ kvm-virtiofsd-load_capng-missing-unlock.patch | 46 +++ ...hrough_ll-cleanup-getxattr-listxattr.patch | 154 +++++++++ qemu-kvm.spec | 64 +++- 19 files changed, 1981 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch create mode 100644 kvm-migration-Create-migration_is_running.patch create mode 100644 kvm-migration-Don-t-send-data-if-we-have-stopped.patch create mode 100644 kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch create mode 100644 kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch create mode 100644 kvm-migration-multifd-clean-pages-after-filling-packet.patch create mode 100644 kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch create mode 100644 kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch create mode 100644 kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch create mode 100644 kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch create mode 100644 kvm-qemu-file-Don-t-do-IO-after-shutdown.patch create mode 100644 kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch create mode 100644 kvm-virtiofsd-Fix-xattr-operations.patch create mode 100644 kvm-virtiofsd-Remove-fuse_req_getgroups.patch create mode 100644 kvm-virtiofsd-do_read-missing-NULL-check.patch create mode 100644 kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch create mode 100644 kvm-virtiofsd-load_capng-missing-unlock.patch create mode 100644 kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch diff --git a/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch b/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch new file mode 100644 index 0000000..0f0f126 --- /dev/null +++ b/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch @@ -0,0 +1,262 @@ +From e6c3fbfc82863180007569cf2a9132c28a47bf1f Mon Sep 17 00:00:00 2001 +From: "Daniel P. Berrange" +Date: Mon, 20 Jan 2020 16:13:08 +0000 +Subject: [PATCH 01/18] hw/smbios: set new default SMBIOS fields for Windows + driver support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrange +Message-id: <20200120161308.584989-2-berrange@redhat.com> +Patchwork-id: 93422 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] hw/smbios: set new default SMBIOS fields for Windows driver support +Bugzilla: 1782529 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Igor Mammedov +RH-Acked-by: Laszlo Ersek + +For Windows driver support, we have to follow this doc in order to +enable Windows to automatically determine the right drivers to install +for a given guest / host combination: + + https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer + +Out of the choices available, it was decided that the Windows drivers +will be written to expect use of the scheme documented as "HardwareID-6" +against Windows 10. This uses SMBIOS System (Type 1) and Base Board +(Type 2) tables and will match on + + System Manufacturer = Red Hat + System SKU Number = 8.2.0 + Baseboard Manufacturer = Red Hat + Baseboard Product = RHEL-AV + +The new SMBIOS fields will be tied to machine type and only reported for +pc-q35-8.2.0 machine and later. + +The old SMBIOS fields, previously reported by all machines were: + + System Manufacturer: Red Hat + System Product Name: KVM + System Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + System Family: Red Hat Enterprise Linux + Baseboard Manufacturer: Red Hat + Baseboard Product Name: KVM + Baseboard Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + Chassis Manufacturer: Red Hat + Chassis Product Name: KVM + Chassis Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + Processor Manufacturer: Red Hat + Processor Product Name: KVM + Processor Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) + +This information will continue to be reported for all machines, except +where it conflicts with the requirement of the new SMBIOS data. IOW, +the "Baseboard Product Name" will change to "RHEL-AV" for pc-q35-8.2.0 +machine types and later. + +Management applications MUST NEVER override the 4 new SMBIOS fields that +are used for Windows driver matching, with differing values. Aside from +this, they are free to override any other field, including those from +the old SMBIOS field data. + +In particular if a management application wants to report its own +product name and version, it is recommended to use "System product" +and "System version" as identifying fields, as these avoid a clash with +the new SMBIOS fields used for Windows drivers. + +Note that until now the Baseboard (type 2) table has only been generated +by QEMU if explicitly asked for on the CLI. This patch makes it always +present for new machine types. + +Signed-off-by: Daniel P. Berrangé +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 2 +- + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 8 ++++++++ + hw/smbios/smbios.c | 45 +++++++++++++++++++++++++++++++++++++++++--- + include/hw/firmware/smbios.h | 5 ++++- + include/hw/i386/pc.h | 3 +++ + 6 files changed, 60 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d30d38c..2dcf6e7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1423,7 +1423,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_30); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); + + smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len); +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index bd7fdb9..2ac94d5 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 7531d8e..e975643 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +@@ -565,8 +567,11 @@ static void pc_q35_init_rhel820(MachineState *machine) + + static void pc_q35_machine_rhel820_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; + } + + DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, +@@ -579,9 +584,12 @@ static void pc_q35_init_rhel810(MachineState *machine) + + static void pc_q35_machine_rhel810_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel820_options(m); + m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; + m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; + compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); + } +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index e6e9355..d65c149 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -532,7 +535,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -753,7 +756,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -774,12 +780,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); + SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } + SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 02a0ced..67e38a1 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 2e362c8..b9f29ba 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -109,6 +109,9 @@ typedef struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; +-- +1.8.3.1 + diff --git a/kvm-migration-Create-migration_is_running.patch b/kvm-migration-Create-migration_is_running.patch new file mode 100644 index 0000000..c9593de --- /dev/null +++ b/kvm-migration-Create-migration_is_running.patch @@ -0,0 +1,119 @@ +From c9e3d13d70a24bf606ce351886b27bdca25ef4dc Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:41 +0000 +Subject: [PATCH 09/18] migration: Create migration_is_running() + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-9-quintela@redhat.com> +Patchwork-id: 94115 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/10] migration: Create migration_is_running() +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +This function returns true if we are in the middle of a migration. +It is like migration_is_setup_or_active() with CANCELLING and COLO. +Adapt all callers that are needed. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 392d87e21325fdb01210176faa07472b4985ccf0) +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 29 ++++++++++++++++++++++++----- + migration/migration.h | 1 + + migration/savevm.c | 4 +--- + 3 files changed, 26 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 30c53c6..eb50d77 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -831,6 +831,27 @@ bool migration_is_setup_or_active(int state) + } + } + ++bool migration_is_running(int state) ++{ ++ switch (state) { ++ case MIGRATION_STATUS_ACTIVE: ++ case MIGRATION_STATUS_POSTCOPY_ACTIVE: ++ case MIGRATION_STATUS_POSTCOPY_PAUSED: ++ case MIGRATION_STATUS_POSTCOPY_RECOVER: ++ case MIGRATION_STATUS_SETUP: ++ case MIGRATION_STATUS_PRE_SWITCHOVER: ++ case MIGRATION_STATUS_DEVICE: ++ case MIGRATION_STATUS_WAIT_UNPLUG: ++ case MIGRATION_STATUS_CANCELLING: ++ case MIGRATION_STATUS_COLO: ++ return true; ++ ++ default: ++ return false; ++ ++ } ++} ++ + static void populate_time_info(MigrationInfo *info, MigrationState *s) + { + info->has_status = true; +@@ -1090,7 +1111,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + MigrationCapabilityStatusList *cap; + bool cap_list[MIGRATION_CAPABILITY__MAX]; + +- if (migration_is_setup_or_active(s->state)) { ++ if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return; + } +@@ -1603,7 +1624,7 @@ static void migrate_fd_cancel(MigrationState *s) + + do { + old_state = s->state; +- if (!migration_is_setup_or_active(old_state)) { ++ if (!migration_is_running(old_state)) { + break; + } + /* If the migration is paused, kick it out of the pause */ +@@ -1900,9 +1921,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + return true; + } + +- if (migration_is_setup_or_active(s->state) || +- s->state == MIGRATION_STATUS_CANCELLING || +- s->state == MIGRATION_STATUS_COLO) { ++ if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return false; + } +diff --git a/migration/migration.h b/migration/migration.h +index 0b1b0d4..a2b2336 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -279,6 +279,7 @@ void migrate_fd_error(MigrationState *s, const Error *error); + void migrate_fd_connect(MigrationState *s, Error *error_in); + + bool migration_is_setup_or_active(int state); ++bool migration_is_running(int state); + + void migrate_init(MigrationState *s); + bool migration_is_blocked(Error **errp); +diff --git a/migration/savevm.c b/migration/savevm.c +index a80bb52..144ecf0 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1506,9 +1506,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + MigrationState *ms = migrate_get_current(); + MigrationStatus status; + +- if (migration_is_setup_or_active(ms->state) || +- ms->state == MIGRATION_STATUS_CANCELLING || +- ms->state == MIGRATION_STATUS_COLO) { ++ if (migration_is_running(ms->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return -EINVAL; + } +-- +1.8.3.1 + diff --git a/kvm-migration-Don-t-send-data-if-we-have-stopped.patch b/kvm-migration-Don-t-send-data-if-we-have-stopped.patch new file mode 100644 index 0000000..9a36714 --- /dev/null +++ b/kvm-migration-Don-t-send-data-if-we-have-stopped.patch @@ -0,0 +1,42 @@ +From ab07e0b41c50a85940d798a9a65a58698fd2edfb Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:40 +0000 +Subject: [PATCH 08/18] migration: Don't send data if we have stopped + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-8-quintela@redhat.com> +Patchwork-id: 94114 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/10] migration: Don't send data if we have stopped +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +If we do a cancel, we got out without one error, but we can't do the +rest of the output as in a normal situation. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit b69a0227a803256ad270283872d40ff768f4d56d) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index a0257ee..902c56c 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3511,7 +3511,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- if (ret >= 0) { ++ if (ret >= 0 ++ && migration_is_setup_or_active(migrate_get_current()->state)) { + multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); +-- +1.8.3.1 + diff --git a/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch b/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch new file mode 100644 index 0000000..01cb0f1 --- /dev/null +++ b/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch @@ -0,0 +1,94 @@ +From 71b05ab5782aa1e38c016be6264a14f5650d2a87 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:35 +0000 +Subject: [PATCH 03/18] migration: Make sure that we don't call write() in case + of error + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-3-quintela@redhat.com> +Patchwork-id: 94113 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/10] migration: Make sure that we don't call write() in case of error +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +If we are exiting due to an error/finish/.... Just don't try to even +touch the channel with one IO operation. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Juan Quintela +(cherry picked from commit 4d65a6216bfc44891ac298b74a6921d479805131) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 65580e3..8c783b3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -899,6 +899,12 @@ struct { + uint64_t packet_num; + /* send channels ready */ + QemuSemaphore channels_ready; ++ /* ++ * Have we already run terminate threads. There is a race when it ++ * happens that we got one error while we are exiting. ++ * We will use atomic operations. Only valid values are 0 and 1. ++ */ ++ int exiting; + } *multifd_send_state; + + /* +@@ -927,6 +933,10 @@ static int multifd_send_pages(RAMState *rs) + MultiFDPages_t *pages = multifd_send_state->pages; + uint64_t transferred; + ++ if (atomic_read(&multifd_send_state->exiting)) { ++ return -1; ++ } ++ + qemu_sem_wait(&multifd_send_state->channels_ready); + for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { + p = &multifd_send_state->params[i]; +@@ -1008,6 +1018,16 @@ static void multifd_send_terminate_threads(Error *err) + } + } + ++ /* ++ * We don't want to exit each threads twice. Depending on where ++ * we get the error, or if there are two independent errors in two ++ * threads at the same time, we can end calling this function ++ * twice. ++ */ ++ if (atomic_xchg(&multifd_send_state->exiting, 1)) { ++ return; ++ } ++ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -1117,6 +1137,10 @@ static void *multifd_send_thread(void *opaque) + + while (true) { + qemu_sem_wait(&p->sem); ++ ++ if (atomic_read(&multifd_send_state->exiting)) { ++ break; ++ } + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +@@ -1225,6 +1249,7 @@ int multifd_save_setup(void) + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); + multifd_send_state->pages = multifd_pages_init(page_count); + qemu_sem_init(&multifd_send_state->channels_ready, 0); ++ atomic_set(&multifd_send_state->exiting, 0); + + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +-- +1.8.3.1 + diff --git a/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch b/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch new file mode 100644 index 0000000..4a7fb28 --- /dev/null +++ b/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch @@ -0,0 +1,70 @@ +From 3c4f6f0c2bf5562f2aa26f964848ae53e6ac4790 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:43 +0000 +Subject: [PATCH 11/18] migration: Maybe VM is paused when migration is + cancelled + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-11-quintela@redhat.com> +Patchwork-id: 94120 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/10] migration: Maybe VM is paused when migration is cancelled +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Zhimin Feng + +If the migration is cancelled when it is in the completion phase, +the migration state is set to MIGRATION_STATUS_CANCELLING. +The VM maybe wait for the 'pause_sem' semaphore in migration_maybe_pause +function, so that VM always is paused. + +Reported-by: Euler Robot +Signed-off-by: Zhimin Feng +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 8958338b10abcb346b54a8038a491fda2db1c853) +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index eb50d77..ed18c59 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2786,14 +2786,22 @@ static int migration_maybe_pause(MigrationState *s, + /* This block intentionally left blank */ + } + +- qemu_mutex_unlock_iothread(); +- migrate_set_state(&s->state, *current_active_state, +- MIGRATION_STATUS_PRE_SWITCHOVER); +- qemu_sem_wait(&s->pause_sem); +- migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, +- new_state); +- *current_active_state = new_state; +- qemu_mutex_lock_iothread(); ++ /* ++ * If the migration is cancelled when it is in the completion phase, ++ * the migration state is set to MIGRATION_STATUS_CANCELLING. ++ * So we don't need to wait a semaphore, otherwise we would always ++ * wait for the 'pause_sem' semaphore. ++ */ ++ if (s->state != MIGRATION_STATUS_CANCELLING) { ++ qemu_mutex_unlock_iothread(); ++ migrate_set_state(&s->state, *current_active_state, ++ MIGRATION_STATUS_PRE_SWITCHOVER); ++ qemu_sem_wait(&s->pause_sem); ++ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, ++ new_state); ++ *current_active_state = new_state; ++ qemu_mutex_lock_iothread(); ++ } + + return s->state == new_state ? 0 : -EINVAL; + } +-- +1.8.3.1 + diff --git a/kvm-migration-multifd-clean-pages-after-filling-packet.patch b/kvm-migration-multifd-clean-pages-after-filling-packet.patch new file mode 100644 index 0000000..5fa7fde --- /dev/null +++ b/kvm-migration-multifd-clean-pages-after-filling-packet.patch @@ -0,0 +1,65 @@ +From 32ee75b7f4a31d6080e5659e2a0285a046ef1036 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:34 +0000 +Subject: [PATCH 02/18] migration/multifd: clean pages after filling packet + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-2-quintela@redhat.com> +Patchwork-id: 94112 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/10] migration/multifd: clean pages after filling packet +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Wei Yang + +This is a preparation for the next patch: + + not use multifd during postcopy. + +Without enabling postcopy, everything looks good. While after enabling +postcopy, migration may fail even not use multifd during postcopy. The +reason is the pages is not properly cleared and *old* target page will +continue to be transferred. + +After clean pages, migration succeeds. + +Signed-off-by: Wei Yang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit eab54aa78ffd9fb7895b20fc2761ee998479489b) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 5078f94..65580e3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -944,10 +944,10 @@ static int multifd_send_pages(RAMState *rs) + } + qemu_mutex_unlock(&p->mutex); + } +- p->pages->used = 0; ++ assert(!p->pages->used); ++ assert(!p->pages->block); + + p->packet_num = multifd_send_state->packet_num++; +- p->pages->block = NULL; + multifd_send_state->pages = p->pages; + p->pages = pages; + transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; +@@ -1129,6 +1129,8 @@ static void *multifd_send_thread(void *opaque) + p->flags = 0; + p->num_packets++; + p->num_pages += used; ++ p->pages->used = 0; ++ p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + + trace_multifd_send(p->id, packet_num, used, flags, +-- +1.8.3.1 + diff --git a/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch b/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch new file mode 100644 index 0000000..0c5fe80 --- /dev/null +++ b/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch @@ -0,0 +1,77 @@ +From 2c14a6831954a59256cc8d1980da0ad705a3a3fa Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:37 +0000 +Subject: [PATCH 05/18] migration/multifd: fix destroyed mutex access in + terminating multifd threads + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-5-quintela@redhat.com> +Patchwork-id: 94119 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/10] migration/multifd: fix destroyed mutex access in terminating multifd threads +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Jiahui Cen + +One multifd will lock all the other multifds' IOChannel mutex to inform them +to quit by setting p->quit or shutting down p->c. In this senario, if some +multifds had already been terminated and multifd_load_cleanup/multifd_save_cleanup +had destroyed their mutex, it could cause destroyed mutex access when trying +lock their mutex. + +Here is the coredump stack: + #0 0x00007f81a2794437 in raise () from /usr/lib64/libc.so.6 + #1 0x00007f81a2795b28 in abort () from /usr/lib64/libc.so.6 + #2 0x00007f81a278d1b6 in __assert_fail_base () from /usr/lib64/libc.so.6 + #3 0x00007f81a278d262 in __assert_fail () from /usr/lib64/libc.so.6 + #4 0x000055eb1bfadbd3 in qemu_mutex_lock_impl (mutex=0x55eb1e2d1988, file=, line=) at util/qemu-thread-posix.c:64 + #5 0x000055eb1bb4564a in multifd_send_terminate_threads (err=) at migration/ram.c:1015 + #6 0x000055eb1bb4bb7f in multifd_send_thread (opaque=0x55eb1e2d19f8) at migration/ram.c:1171 + #7 0x000055eb1bfad628 in qemu_thread_start (args=0x55eb1e170450) at util/qemu-thread-posix.c:502 + #8 0x00007f81a2b36df5 in start_thread () from /usr/lib64/libpthread.so.0 + #9 0x00007f81a286048d in clone () from /usr/lib64/libc.so.6 + +To fix it up, let's destroy the mutex after all the other multifd threads had +been terminated. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 9560a48ecc0c20d87bc458a6db77fba651605819) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 860f781..6c55c5d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1052,6 +1052,10 @@ void multifd_save_cleanup(void) + if (p->running) { + qemu_thread_join(&p->thread); + } ++ } ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDSendParams *p = &multifd_send_state->params[i]; ++ + socket_send_channel_destroy(p->c); + p->c = NULL; + qemu_mutex_destroy(&p->mutex); +@@ -1335,6 +1339,10 @@ int multifd_load_cleanup(Error **errp) + qemu_sem_post(&p->sem_sync); + qemu_thread_join(&p->thread); + } ++ } ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDRecvParams *p = &multifd_recv_state->params[i]; ++ + object_unref(OBJECT(p->c)); + p->c = NULL; + qemu_mutex_destroy(&p->mutex); +-- +1.8.3.1 + diff --git a/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch b/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch new file mode 100644 index 0000000..9e9683c --- /dev/null +++ b/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch @@ -0,0 +1,75 @@ +From 517a99c5fba163bf684978fe3d9476b619481391 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:42 +0000 +Subject: [PATCH 10/18] migration/multifd: fix nullptr access in + multifd_send_terminate_threads + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-10-quintela@redhat.com> +Patchwork-id: 94117 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/10] migration/multifd: fix nullptr access in multifd_send_terminate_threads +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Zhimin Feng + +If the multifd_send_threads is not created when migration is failed, +multifd_save_cleanup would be called twice. In this senario, the +multifd_send_state is accessed after it has been released, the result +is that the source VM is crashing down. + +Here is the coredump stack: + Program received signal SIGSEGV, Segmentation fault. + 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 + 1012 MultiFDSendParams *p = &multifd_send_state->params[i]; + #0 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 + #1 0x00005629333ab8a9 in multifd_save_cleanup () at migration/ram.c:1028 + #2 0x00005629333abaea in multifd_new_send_channel_async (task=0x562935450e70, opaque=) at migration/ram.c:1202 + #3 0x000056293373a562 in qio_task_complete (task=task@entry=0x562935450e70) at io/task.c:196 + #4 0x000056293373a6e0 in qio_task_thread_result (opaque=0x562935450e70) at io/task.c:111 + #5 0x00007f475d4d75a7 in g_idle_dispatch () from /usr/lib64/libglib-2.0.so.0 + #6 0x00007f475d4da9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 + #7 0x0000562933785b33 in glib_pollfds_poll () at util/main-loop.c:219 + #8 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #9 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:518 + #10 0x00005629334c5acf in main_loop () at vl.c:1810 + #11 0x000056293334d7bb in main (argc=, argv=, envp=) at vl.c:4471 + +If the multifd_send_threads is not created when migration is failed. +In this senario, we don't call multifd_save_cleanup in multifd_new_send_channel_async. + +Signed-off-by: Zhimin Feng +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 9c4d333c092e9c26d38f740ff3616deb42f21681) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 902c56c..3891eff 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1229,7 +1229,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + trace_multifd_new_send_channel_async(p->id); + if (qio_task_propagate_error(task, &local_err)) { + migrate_set_error(migrate_get_current(), local_err); +- multifd_save_cleanup(); ++ /* Error happen, we need to tell who pay attention to me */ ++ qemu_sem_post(&multifd_send_state->channels_ready); ++ qemu_sem_post(&p->sem_sync); ++ /* ++ * Although multifd_send_thread is not created, but main migration ++ * thread neet to judge whether it is running, so we need to mark ++ * its status. ++ */ ++ p->quit = true; + } else { + p->c = QIO_CHANNEL(sioc); + qio_channel_set_delay(p->c, false); +-- +1.8.3.1 + diff --git a/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch b/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch new file mode 100644 index 0000000..e780698 --- /dev/null +++ b/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch @@ -0,0 +1,68 @@ +From 7f664fe26ff67f8131faa7a81a388b8a5b51403f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:36 +0000 +Subject: [PATCH 04/18] migration/multifd: fix nullptr access in terminating + multifd threads + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-4-quintela@redhat.com> +Patchwork-id: 94110 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/10] migration/multifd: fix nullptr access in terminating multifd threads +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +From: Jiahui Cen + +One multifd channel will shutdown all the other multifd's IOChannel when it +fails to receive an IOChannel. In this senario, if some multifds had not +received its IOChannel yet, it would try to shutdown its IOChannel which could +cause nullptr access at qio_channel_shutdown. + +Here is the coredump stack: + #0 object_get_class (obj=obj@entry=0x0) at qom/object.c:908 + #1 0x00005563fdbb8f4a in qio_channel_shutdown (ioc=0x0, how=QIO_CHANNEL_SHUTDOWN_BOTH, errp=0x0) at io/channel.c:355 + #2 0x00005563fd7b4c5f in multifd_recv_terminate_threads (err=) at migration/ram.c:1280 + #3 0x00005563fd7bc019 in multifd_recv_new_channel (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce00) at migration/ram.c:1478 + #4 0x00005563fda82177 in migration_ioc_process_incoming (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce30) at migration/migration.c:605 + #5 0x00005563fda8567d in migration_channel_process_incoming (ioc=0x556400255610) at migration/channel.c:44 + #6 0x00005563fda83ee0 in socket_accept_incoming_migration (listener=0x5563fff6b920, cioc=0x556400255610, opaque=) at migration/socket.c:166 + #7 0x00005563fdbc25cd in qio_net_listener_channel_func (ioc=, condition=, opaque=) at io/net-listener.c:54 + #8 0x00007f895b6fe9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 + #9 0x00005563fdc18136 in glib_pollfds_poll () at util/main-loop.c:218 + #10 0x00005563fdc181b5 in os_host_main_loop_wait (timeout=1000000000) at util/main-loop.c:241 + #11 0x00005563fdc183a2 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:517 + #12 0x00005563fd8edb37 in main_loop () at vl.c:1791 + #13 0x00005563fd74fd45 in main (argc=, argv=, envp=) at vl.c:4473 + +To fix it up, let's check p->c before calling qio_channel_shutdown. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit f76e32eb05041ab001184ab16afb56524adccd0c) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 8c783b3..860f781 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1307,7 +1307,9 @@ static void multifd_recv_terminate_threads(Error *err) + - normal quit, i.e. everything went fine, just finished + - error quit: We close the channels so the channel threads + finish the qio_channel_read_all_eof() */ +- qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++ if (p->c) { ++ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++ } + qemu_mutex_unlock(&p->mutex); + } + } +-- +1.8.3.1 + diff --git a/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch b/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch new file mode 100644 index 0000000..bca0b4c --- /dev/null +++ b/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch @@ -0,0 +1,74 @@ +From 78c7fb5afcb298631df47f6b71cf764f921c15f4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:38 +0000 +Subject: [PATCH 06/18] multifd: Make sure that we don't do any IO after an + error + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-6-quintela@redhat.com> +Patchwork-id: 94118 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/10] multifd: Make sure that we don't do any IO after an error +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 3d4095b222d97393b1c2c6e514951ec7798f1c43) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 22 +++++++++++++--------- + 1 file changed, 13 insertions(+), 9 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 6c55c5d..a0257ee 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3440,7 +3440,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + { + RAMState **temp = opaque; + RAMState *rs = *temp; +- int ret; ++ int ret = 0; + int i; + int64_t t0; + int done = 0; +@@ -3511,12 +3511,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- multifd_send_sync_main(rs); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); +- ram_counters.transferred += 8; ++ if (ret >= 0) { ++ multifd_send_sync_main(rs); ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ ram_counters.transferred += 8; + +- ret = qemu_file_get_error(f); ++ ret = qemu_file_get_error(f); ++ } + if (ret < 0) { + return ret; + } +@@ -3568,9 +3570,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } + +- multifd_send_sync_main(rs); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); ++ if (ret >= 0) { ++ multifd_send_sync_main(rs); ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ } + + return ret; + } +-- +1.8.3.1 + diff --git a/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch b/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch new file mode 100644 index 0000000..88a6e31 --- /dev/null +++ b/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch @@ -0,0 +1,92 @@ +From d84814e298e3b05fb5bc61cc8e641a5e104d32d5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:39 +0000 +Subject: [PATCH 07/18] qemu-file: Don't do IO after shutdown + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-7-quintela@redhat.com> +Patchwork-id: 94116 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/10] qemu-file: Don't do IO after shutdown +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +Be sure that we are not doing neither read/write after shutdown of the +QEMUFile. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit a555b8092abc6f1bbe4b64c516679cbd68fcfbd8) +Signed-off-by: Danilo C. L. de Paula +--- + migration/qemu-file.c | 22 +++++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 26fb25d..bbb2b63 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -53,6 +53,8 @@ struct QEMUFile { + + int last_error; + Error *last_error_obj; ++ /* has the file has been shutdown */ ++ bool shutdown; + }; + + /* +@@ -61,10 +63,18 @@ struct QEMUFile { + */ + int qemu_file_shutdown(QEMUFile *f) + { ++ int ret; ++ ++ f->shutdown = true; + if (!f->ops->shut_down) { + return -ENOSYS; + } +- return f->ops->shut_down(f->opaque, true, true, NULL); ++ ret = f->ops->shut_down(f->opaque, true, true, NULL); ++ ++ if (!f->last_error) { ++ qemu_file_set_error(f, -EIO); ++ } ++ return ret; + } + + /* +@@ -214,6 +224,9 @@ void qemu_fflush(QEMUFile *f) + return; + } + ++ if (f->shutdown) { ++ return; ++ } + if (f->iovcnt > 0) { + expect = iov_size(f->iov, f->iovcnt); + ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos, +@@ -328,6 +341,10 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) + f->buf_index = 0; + f->buf_size = pending; + ++ if (f->shutdown) { ++ return 0; ++ } ++ + len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, + IO_BUF_SIZE - pending, &local_error); + if (len > 0) { +@@ -642,6 +659,9 @@ int64_t qemu_ftell(QEMUFile *f) + + int qemu_file_rate_limit(QEMUFile *f) + { ++ if (f->shutdown) { ++ return 1; ++ } + if (qemu_file_get_error(f)) { + return 1; + } +-- +1.8.3.1 + diff --git a/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch b/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch new file mode 100644 index 0000000..3efef47 --- /dev/null +++ b/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch @@ -0,0 +1,55 @@ +From e483eea891139ee38138381ba6715b3a2be050cc Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:12 +0000 +Subject: [PATCH 16/18] tools/virtiofsd/fuse_lowlevel: Fix + fuse_out_header::error value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-6-dgilbert@redhat.com> +Patchwork-id: 94128 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/7] tools/virtiofsd/fuse_lowlevel: Fix fuse_out_header::error value +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: Philippe Mathieu-Daudé + +Fix warning reported by Clang static code analyzer: + + CC tools/virtiofsd/fuse_lowlevel.o + tools/virtiofsd/fuse_lowlevel.c:195:9: warning: Value stored to 'error' is never read + error = -ERANGE; + ^ ~~~~~~~ + +Fixes: 3db2876 +Reported-by: Clang Static Analyzer +Reviewed-by: Ján Tomko +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 09c086b2a144324199f99a7d4de78c3276a486c1) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_lowlevel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 704c036..2dd36ec 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -192,7 +192,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, + + if (error <= -1000 || error > 0) { + fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); +- error = -ERANGE; ++ out.error = -ERANGE; + } + + iov[0].iov_base = &out; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Fix-xattr-operations.patch b/kvm-virtiofsd-Fix-xattr-operations.patch new file mode 100644 index 0000000..532948f --- /dev/null +++ b/kvm-virtiofsd-Fix-xattr-operations.patch @@ -0,0 +1,327 @@ +From 8721796f22a8a61d82974088e542377ee6db209e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:14 +0000 +Subject: [PATCH 18/18] virtiofsd: Fix xattr operations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-8-dgilbert@redhat.com> +Patchwork-id: 94123 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 7/7] virtiofsd: Fix xattr operations +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: Misono Tomohiro + +Current virtiofsd has problems about xattr operations and +they does not work properly for directory/symlink/special file. + +The fundamental cause is that virtiofsd uses openat() + f...xattr() +systemcalls for xattr operation but we should not open symlink/special +file in the daemon. Therefore the function is restricted. + +Fix this problem by: + 1. during setup of each thread, call unshare(CLONE_FS) + 2. in xattr operations (i.e. lo_getxattr), if inode is not a regular + file or directory, use fchdir(proc_loot_fd) + ...xattr() + + fchdir(root.fd) instead of openat() + f...xattr() + + (Note: for a regular file/directory openat() + f...xattr() + is still used for performance reason) + +With this patch, xfstests generic/062 passes on virtiofs. + +This fix is suggested by Miklos Szeredi and Stefan Hajnoczi. +The original discussion can be found here: + https://www.redhat.com/archives/virtio-fs/2019-October/msg00046.html + +Signed-off-by: Misono Tomohiro +Message-Id: <20200227055927.24566-3-misono.tomohiro@jp.fujitsu.com> +Acked-by: Vivek Goyal +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit bdfd66788349acc43cd3f1298718ad491663cfcc) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_virtio.c | 13 +++++ + tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++------------------ + tools/virtiofsd/seccomp.c | 6 +++ + 3 files changed, 77 insertions(+), 47 deletions(-) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index dd1c605..3b6d16a 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -426,6 +426,8 @@ err: + return ret; + } + ++static __thread bool clone_fs_called; ++ + /* Process one FVRequest in a thread pool */ + static void fv_queue_worker(gpointer data, gpointer user_data) + { +@@ -441,6 +443,17 @@ static void fv_queue_worker(gpointer data, gpointer user_data) + + assert(se->bufsize > sizeof(struct fuse_in_header)); + ++ if (!clone_fs_called) { ++ int ret; ++ ++ /* unshare FS for xattr operation */ ++ ret = unshare(CLONE_FS); ++ /* should not fail */ ++ assert(ret == 0); ++ ++ clone_fs_called = true; ++ } ++ + /* + * An element contains one request and the space to send our response + * They're spread over multiple descriptors in a scatter/gather set +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 50c7273..9cba3f1 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -123,7 +123,7 @@ struct lo_inode { + pthread_mutex_t plock_mutex; + GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ + +- bool is_symlink; ++ mode_t filetype; + }; + + struct lo_cred { +@@ -695,7 +695,7 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, + struct lo_inode *parent; + char path[PATH_MAX]; + +- if (inode->is_symlink) { ++ if (S_ISLNK(inode->filetype)) { + res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); + if (res == -1 && errno == EINVAL) { + /* Sorry, no race free way to set times on symlink. */ +@@ -929,7 +929,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + goto out_err; + } + +- inode->is_symlink = S_ISLNK(e->attr.st_mode); ++ /* cache only filetype */ ++ inode->filetype = (e->attr.st_mode & S_IFMT); + + /* + * One for the caller and one for nlookup (released in +@@ -1139,7 +1140,7 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, + struct lo_inode *parent; + char path[PATH_MAX]; + +- if (inode->is_symlink) { ++ if (S_ISLNK(inode->filetype)) { + res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); + if (res == -1 && (errno == ENOENT || errno == EINVAL)) { + /* Sorry, no race free way to hard-link a symlink. */ +@@ -2193,12 +2194,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", + ino, name, size); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to getxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + if (size) { + value = malloc(size); + if (!value) { +@@ -2207,12 +2202,25 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + } + + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDONLY); +- if (fd < 0) { +- goto out_err; ++ /* ++ * It is not safe to open() non-regular/non-dir files in file server ++ * unless O_PATH is used, so use that method for regular files/dir ++ * only (as it seems giving less performance overhead). ++ * Otherwise, call fchdir() to avoid open(). ++ */ ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } ++ ret = fgetxattr(fd, name, value, size); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = getxattr(procname, name, value, size); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = fgetxattr(fd, name, value, size); + if (ret == -1) { + goto out_err; + } +@@ -2266,12 +2274,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, + size); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to listxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + if (size) { + value = malloc(size); + if (!value) { +@@ -2280,12 +2282,19 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + } + + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDONLY); +- if (fd < 0) { +- goto out_err; ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ goto out_err; ++ } ++ ret = flistxattr(fd, value, size); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = listxattr(procname, value, size); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = flistxattr(fd, value, size); + if (ret == -1) { + goto out_err; + } +@@ -2339,20 +2348,21 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 + ", name=%s value=%s size=%zd)\n", ino, name, value, size); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd < 0) { +- saverr = errno; +- goto out; ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } ++ ret = fsetxattr(fd, name, value, size, flags); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = setxattr(procname, name, value, size, flags); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = fsetxattr(fd, name, value, size, flags); + saverr = ret == -1 ? errno : 0; + + out: +@@ -2387,20 +2397,21 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) + fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, + name); + +- if (inode->is_symlink) { +- /* Sorry, no race free way to setxattr on symlink. */ +- saverr = EPERM; +- goto out; +- } +- + sprintf(procname, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd < 0) { +- saverr = errno; +- goto out; ++ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { ++ fd = openat(lo->proc_self_fd, procname, O_RDONLY); ++ if (fd < 0) { ++ saverr = errno; ++ goto out; ++ } ++ ret = fremovexattr(fd, name); ++ } else { ++ /* fchdir should not fail here */ ++ assert(fchdir(lo->proc_self_fd) == 0); ++ ret = removexattr(procname, name); ++ assert(fchdir(lo->root.fd) == 0); + } + +- ret = fremovexattr(fd, name); + saverr = ret == -1 ? errno : 0; + + out: +@@ -2800,7 +2811,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) + exit(1); + } + +- root->is_symlink = false; ++ root->filetype = S_IFDIR; + root->fd = fd; + root->key.ino = stat.st_ino; + root->key.dev = stat.st_dev; +diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c +index 2d9d4a7..bd9e7b0 100644 +--- a/tools/virtiofsd/seccomp.c ++++ b/tools/virtiofsd/seccomp.c +@@ -41,6 +41,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(exit), + SCMP_SYS(exit_group), + SCMP_SYS(fallocate), ++ SCMP_SYS(fchdir), + SCMP_SYS(fchmodat), + SCMP_SYS(fchownat), + SCMP_SYS(fcntl), +@@ -62,7 +63,9 @@ static const int syscall_whitelist[] = { + SCMP_SYS(getpid), + SCMP_SYS(gettid), + SCMP_SYS(gettimeofday), ++ SCMP_SYS(getxattr), + SCMP_SYS(linkat), ++ SCMP_SYS(listxattr), + SCMP_SYS(lseek), + SCMP_SYS(madvise), + SCMP_SYS(mkdirat), +@@ -85,6 +88,7 @@ static const int syscall_whitelist[] = { + SCMP_SYS(recvmsg), + SCMP_SYS(renameat), + SCMP_SYS(renameat2), ++ SCMP_SYS(removexattr), + SCMP_SYS(rt_sigaction), + SCMP_SYS(rt_sigprocmask), + SCMP_SYS(rt_sigreturn), +@@ -98,10 +102,12 @@ static const int syscall_whitelist[] = { + SCMP_SYS(setresuid32), + #endif + SCMP_SYS(set_robust_list), ++ SCMP_SYS(setxattr), + SCMP_SYS(symlinkat), + SCMP_SYS(time), /* Rarely needed, except on static builds */ + SCMP_SYS(tgkill), + SCMP_SYS(unlinkat), ++ SCMP_SYS(unshare), + SCMP_SYS(utimensat), + SCMP_SYS(write), + SCMP_SYS(writev), +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-Remove-fuse_req_getgroups.patch b/kvm-virtiofsd-Remove-fuse_req_getgroups.patch new file mode 100644 index 0000000..27e71f2 --- /dev/null +++ b/kvm-virtiofsd-Remove-fuse_req_getgroups.patch @@ -0,0 +1,193 @@ +From 7a1860c83ff042f3e796c449e780ee0528107213 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:08 +0000 +Subject: [PATCH 12/18] virtiofsd: Remove fuse_req_getgroups +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-2-dgilbert@redhat.com> +Patchwork-id: 94122 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/7] virtiofsd: Remove fuse_req_getgroups +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +Remove fuse_req_getgroups that's unused in virtiofsd; it came in +from libfuse but we don't actually use it. It was called from +fuse_getgroups which we previously removed (but had left it's header +in). + +Coverity had complained about null termination in it, but removing +it is the easiest answer. + +Fixes: Coverity CID: 1413117 (String not null terminated) +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 988717b46b6424907618cb845ace9d69062703af) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse.h | 20 ----------- + tools/virtiofsd/fuse_lowlevel.c | 77 ----------------------------------------- + tools/virtiofsd/fuse_lowlevel.h | 21 ----------- + 3 files changed, 118 deletions(-) + +diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h +index 7a4c713..aba13fe 100644 +--- a/tools/virtiofsd/fuse.h ++++ b/tools/virtiofsd/fuse.h +@@ -1007,26 +1007,6 @@ void fuse_exit(struct fuse *f); + struct fuse_context *fuse_get_context(void); + + /** +- * Get the current supplementary group IDs for the current request +- * +- * Similar to the getgroups(2) system call, except the return value is +- * always the total number of group IDs, even if it is larger than the +- * specified size. +- * +- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass +- * the group list to userspace, hence this function needs to parse +- * "/proc/$TID/task/$TID/status" to get the group IDs. +- * +- * This feature may not be supported on all operating systems. In +- * such a case this function will return -ENOSYS. +- * +- * @param size size of given array +- * @param list array of group IDs to be filled in +- * @return the total number of supplementary group IDs or -errno on failure +- */ +-int fuse_getgroups(int size, gid_t list[]); +- +-/** + * Check if the current request has already been interrupted + * + * @return 1 if the request has been interrupted, 0 otherwise +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index de2e2e0..01c418a 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -2667,83 +2667,6 @@ int fuse_lowlevel_is_virtio(struct fuse_session *se) + return !!se->virtio_dev; + } + +-#ifdef linux +-int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) +-{ +- char *buf; +- size_t bufsize = 1024; +- char path[128]; +- int ret; +- int fd; +- unsigned long pid = req->ctx.pid; +- char *s; +- +- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); +- +-retry: +- buf = malloc(bufsize); +- if (buf == NULL) { +- return -ENOMEM; +- } +- +- ret = -EIO; +- fd = open(path, O_RDONLY); +- if (fd == -1) { +- goto out_free; +- } +- +- ret = read(fd, buf, bufsize); +- close(fd); +- if (ret < 0) { +- ret = -EIO; +- goto out_free; +- } +- +- if ((size_t)ret == bufsize) { +- free(buf); +- bufsize *= 4; +- goto retry; +- } +- +- ret = -EIO; +- s = strstr(buf, "\nGroups:"); +- if (s == NULL) { +- goto out_free; +- } +- +- s += 8; +- ret = 0; +- while (1) { +- char *end; +- unsigned long val = strtoul(s, &end, 0); +- if (end == s) { +- break; +- } +- +- s = end; +- if (ret < size) { +- list[ret] = val; +- } +- ret++; +- } +- +-out_free: +- free(buf); +- return ret; +-} +-#else /* linux */ +-/* +- * This is currently not implemented on other than Linux... +- */ +-int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) +-{ +- (void)req; +- (void)size; +- (void)list; +- return -ENOSYS; +-} +-#endif +- + void fuse_session_exit(struct fuse_session *se) + { + se->exited = 1; +diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h +index 138041e..8f6d705 100644 +--- a/tools/virtiofsd/fuse_lowlevel.h ++++ b/tools/virtiofsd/fuse_lowlevel.h +@@ -1705,27 +1705,6 @@ void *fuse_req_userdata(fuse_req_t req); + const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); + + /** +- * Get the current supplementary group IDs for the specified request +- * +- * Similar to the getgroups(2) system call, except the return value is +- * always the total number of group IDs, even if it is larger than the +- * specified size. +- * +- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass +- * the group list to userspace, hence this function needs to parse +- * "/proc/$TID/task/$TID/status" to get the group IDs. +- * +- * This feature may not be supported on all operating systems. In +- * such a case this function will return -ENOSYS. +- * +- * @param req request handle +- * @param size size of given array +- * @param list array of group IDs to be filled in +- * @return the total number of supplementary group IDs or -errno on failure +- */ +-int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); +- +-/** + * Callback function for an interrupt + * + * @param req interrupted request +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-do_read-missing-NULL-check.patch b/kvm-virtiofsd-do_read-missing-NULL-check.patch new file mode 100644 index 0000000..4f8e5ef --- /dev/null +++ b/kvm-virtiofsd-do_read-missing-NULL-check.patch @@ -0,0 +1,49 @@ +From 901c005299b0316bbca7bc190de56f6c7a2a9880 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:11 +0000 +Subject: [PATCH 15/18] virtiofsd: do_read missing NULL check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-5-dgilbert@redhat.com> +Patchwork-id: 94127 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/7] virtiofsd: do_read missing NULL check +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +Missing a NULL check if the argument fetch fails. + +Fixes: Coverity CID 1413119 +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 99ce9a7e60fd12b213b985343ff8fcc172de59fd) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_lowlevel.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index 01c418a..704c036 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1116,6 +1116,10 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, + struct fuse_file_info fi; + + arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); ++ if (!arg) { ++ fuse_reply_err(req, EINVAL); ++ return; ++ } + + memset(&fi, 0, sizeof(fi)); + fi.fh = arg->fh; +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch b/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch new file mode 100644 index 0000000..b17d93c --- /dev/null +++ b/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch @@ -0,0 +1,56 @@ +From 3b6461ee08654b2cbb6d4e0cc15c02f89a6610d5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:09 +0000 +Subject: [PATCH 13/18] virtiofsd: fv_create_listen_socket error path socket + leak +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-3-dgilbert@redhat.com> +Patchwork-id: 94124 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/7] virtiofsd: fv_create_listen_socket error path socket leak +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +If we fail when bringing up the socket we can leak the listen_fd; +in practice the daemon will exit so it's not really a problem. + +Fixes: Coverity CID 1413121 +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 6fa249027f97e3080f3d9c0fab3f94f8f80828fe) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/fuse_virtio.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c +index 80a6e92..dd1c605 100644 +--- a/tools/virtiofsd/fuse_virtio.c ++++ b/tools/virtiofsd/fuse_virtio.c +@@ -916,6 +916,7 @@ static int fv_create_listen_socket(struct fuse_session *se) + old_umask = umask(0077); + if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { + fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); ++ close(listen_sock); + umask(old_umask); + return -1; + } +@@ -923,6 +924,7 @@ static int fv_create_listen_socket(struct fuse_session *se) + + if (listen(listen_sock, 1) == -1) { + fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); ++ close(listen_sock); + return -1; + } + +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-load_capng-missing-unlock.patch b/kvm-virtiofsd-load_capng-missing-unlock.patch new file mode 100644 index 0000000..bc04f6b --- /dev/null +++ b/kvm-virtiofsd-load_capng-missing-unlock.patch @@ -0,0 +1,46 @@ +From ece7649025fbdbde48ff0b954e8ec2e42c4a8b3d Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:10 +0000 +Subject: [PATCH 14/18] virtiofsd: load_capng missing unlock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-4-dgilbert@redhat.com> +Patchwork-id: 94126 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/7] virtiofsd: load_capng missing unlock +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: "Dr. David Alan Gilbert" + +Missing unlock in error path. + +Fixes: Covertiy CID 1413123 +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefan Hajnoczi +(cherry picked from commit 686391112fd42c615bcc4233472887a66a9b5a4a) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index e6f2399..c635fc8 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -232,6 +232,7 @@ static int load_capng(void) + */ + cap.saved = capng_save_state(); + if (!cap.saved) { ++ pthread_mutex_unlock(&cap.mutex); + fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); + return -EINVAL; + } +-- +1.8.3.1 + diff --git a/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch b/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch new file mode 100644 index 0000000..c55eead --- /dev/null +++ b/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch @@ -0,0 +1,154 @@ +From f93ea308351cbe2630d7ecf637c3b69894d84a11 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 3 Mar 2020 18:43:13 +0000 +Subject: [PATCH 17/18] virtiofsd: passthrough_ll: cleanup getxattr/listxattr +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200303184314.155564-7-dgilbert@redhat.com> +Patchwork-id: 94125 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/7] virtiofsd: passthrough_ll: cleanup getxattr/listxattr +Bugzilla: 1797064 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Ján Tomko + +From: Misono Tomohiro + +This is a cleanup patch to simplify the following xattr fix and +there is no functional changes. + +- Move memory allocation to head of the function +- Unify fgetxattr/flistxattr call for both size == 0 and + size != 0 case +- Remove redundant lo_inode_put call in error path + (Note: second call is ignored now since @inode is already NULL) + +Signed-off-by: Misono Tomohiro +Message-Id: <20200227055927.24566-2-misono.tomohiro@jp.fujitsu.com> +Acked-by: Vivek Goyal +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 16e15a73089102c3d8846792d514e769300fcc3c) +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 54 ++++++++++++++++------------------------ + 1 file changed, 22 insertions(+), 32 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index c635fc8..50c7273 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -2199,34 +2199,30 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + goto out; + } + ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ } ++ + sprintf(procname, "%i", inode->fd); + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + goto out_err; + } + ++ ret = fgetxattr(fd, name, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } + if (size) { +- value = malloc(size); +- if (!value) { +- goto out_err; +- } +- +- ret = fgetxattr(fd, name, value, size); +- if (ret == -1) { +- goto out_err; +- } + saverr = 0; + if (ret == 0) { + goto out; + } +- + fuse_reply_buf(req, value, ret); + } else { +- ret = fgetxattr(fd, name, NULL, 0); +- if (ret == -1) { +- goto out_err; +- } +- + fuse_reply_xattr(req, ret); + } + out_free: +@@ -2242,7 +2238,6 @@ out_free: + out_err: + saverr = errno; + out: +- lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +@@ -2277,34 +2272,30 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) + goto out; + } + ++ if (size) { ++ value = malloc(size); ++ if (!value) { ++ goto out_err; ++ } ++ } ++ + sprintf(procname, "%i", inode->fd); + fd = openat(lo->proc_self_fd, procname, O_RDONLY); + if (fd < 0) { + goto out_err; + } + ++ ret = flistxattr(fd, value, size); ++ if (ret == -1) { ++ goto out_err; ++ } + if (size) { +- value = malloc(size); +- if (!value) { +- goto out_err; +- } +- +- ret = flistxattr(fd, value, size); +- if (ret == -1) { +- goto out_err; +- } + saverr = 0; + if (ret == 0) { + goto out; + } +- + fuse_reply_buf(req, value, ret); + } else { +- ret = flistxattr(fd, NULL, 0); +- if (ret == -1) { +- goto out_err; +- } +- + fuse_reply_xattr(req, ret); + } + out_free: +@@ -2320,7 +2311,6 @@ out_free: + out_err: + saverr = errno; + out: +- lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + goto out_free; + } +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 3fa1135..92fba75 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 13%{?dist} +Release: 14%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -528,6 +528,42 @@ Patch195: kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch Patch196: kvm-mirror-Store-MirrorOp.co-for-debuggability.patch # For bz#1794692 - Mirror block job stops making progress Patch197: kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch +# For bz#1782529 - Windows Update Enablement with default smbios strings in qemu +Patch198: kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch199: kvm-migration-multifd-clean-pages-after-filling-packet.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch200: kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch201: kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch202: kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch203: kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch204: kvm-qemu-file-Don-t-do-IO-after-shutdown.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch205: kvm-migration-Don-t-send-data-if-we-have-stopped.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch206: kvm-migration-Create-migration_is_running.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch207: kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch +# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) +Patch208: kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch +# For bz#1797064 - virtiofsd: Fixes +Patch209: kvm-virtiofsd-Remove-fuse_req_getgroups.patch +# For bz#1797064 - virtiofsd: Fixes +Patch210: kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch +# For bz#1797064 - virtiofsd: Fixes +Patch211: kvm-virtiofsd-load_capng-missing-unlock.patch +# For bz#1797064 - virtiofsd: Fixes +Patch212: kvm-virtiofsd-do_read-missing-NULL-check.patch +# For bz#1797064 - virtiofsd: Fixes +Patch213: kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch +# For bz#1797064 - virtiofsd: Fixes +Patch214: kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch +# For bz#1797064 - virtiofsd: Fixes +Patch215: kvm-virtiofsd-Fix-xattr-operations.patch BuildRequires: wget BuildRequires: rpm-build @@ -1461,6 +1497,32 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Wed Mar 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-14.el8 +- kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch [bz#1782529] +- kvm-migration-multifd-clean-pages-after-filling-packet.patch [bz#1738451] +- kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch [bz#1738451] +- kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch [bz#1738451] +- kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch [bz#1738451] +- kvm-qemu-file-Don-t-do-IO-after-shutdown.patch [bz#1738451] +- kvm-migration-Don-t-send-data-if-we-have-stopped.patch [bz#1738451] +- kvm-migration-Create-migration_is_running.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch [bz#1738451] +- kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch [bz#1738451] +- kvm-virtiofsd-Remove-fuse_req_getgroups.patch [bz#1797064] +- kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch [bz#1797064] +- kvm-virtiofsd-load_capng-missing-unlock.patch [bz#1797064] +- kvm-virtiofsd-do_read-missing-NULL-check.patch [bz#1797064] +- kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch [bz#1797064] +- kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch [bz#1797064] +- kvm-virtiofsd-Fix-xattr-operations.patch [bz#1797064] +- Resolves: bz#1738451 + (qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel)) +- Resolves: bz#1782529 + (Windows Update Enablement with default smbios strings in qemu) +- Resolves: bz#1797064 + (virtiofsd: Fixes) + * Sat Feb 29 2020 Danilo Cesar Lemes de Paula - 4.2.0-13.el8 - kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1791648] - kvm-target-i386-add-a-ucode-rev-property.patch [bz#1791648] From dd7aef2877c7b8a9d9fdd40a1e87f9a57266a044 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 17 Mar 2020 00:52:27 +0000 Subject: [PATCH 070/195] * Tue Mar 17 2020 Danilo Cesar Lemes de Paula - 4.2.0-15.el8 - kvm-block-nbd-Fix-hang-in-.bdrv_close.patch [bz#1640894] - kvm-block-Generic-file-creation-fallback.patch [bz#1640894] - kvm-file-posix-Drop-hdev_co_create_opts.patch [bz#1640894] - kvm-iscsi-Drop-iscsi_co_create_opts.patch [bz#1640894] - kvm-iotests-Add-test-for-image-creation-fallback.patch [bz#1640894] - kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch [bz#1640894] - kvm-iotests-Use-complete_and_wait-in-155.patch [bz#1790482 bz#1805143] - kvm-block-Introduce-bdrv_reopen_commit_post-step.patch [bz#1790482 bz#1805143] - kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch [bz#1790482 bz#1805143] - kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch [bz#1790482 bz#1805143] - kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch [bz#1790482 bz#1805143] - kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch [bz#1790482 bz#1805143] - kvm-block-Make-bdrv_get_cumulative_perm-public.patch [bz#1790482 bz#1805143] - kvm-block-Relax-restrictions-for-blockdev-snapshot.patch [bz#1790482 bz#1805143] - kvm-iotests-Fix-run_job-with-use_log-False.patch [bz#1790482 bz#1805143] - kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch [bz#1790482 bz#1805143] - kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch [bz#1790482 bz#1805143] - kvm-iotests-Add-iothread-cases-to-155.patch [bz#1790482 bz#1805143] - kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch [bz#1790482 bz#1805143] - kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch [bz#1809380] - Resolves: bz#1640894 (Fix generic file creation fallback for qemu-img nvme:// image creation support) - Resolves: bz#1790482 (bitmaps in backing images can't be modified) - Resolves: bz#1805143 (allow late/lazy opening of backing chain for shallow blockdev-mirror) - Resolves: bz#1809380 (guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0.) --- ...x-cross-AioContext-blockdev-snapshot.patch | 91 +++++++ ...ix-leak-in-bdrv_create_file_fallback.patch | 60 +++++ ...block-Generic-file-creation-fallback.patch | 227 ++++++++++++++++++ ...troduce-bdrv_reopen_commit_post-step.patch | 65 +++++ ...Make-bdrv_get_cumulative_perm-public.patch | 67 ++++++ ...x-restrictions-for-blockdev-snapshot.patch | 117 +++++++++ ...x-blockdev-reopen-API-with-feature-f.patch | 57 +++++ ...n-with-backing-file-in-different-Aio.patch | 114 +++++++++ kvm-block-nbd-Fix-hang-in-.bdrv_close.patch | 78 ++++++ ...-bitmap-reopen-into-bdrv_reopen_comm.patch | 78 ++++++ ...-Free-rom-data-during-inmigrate-skip.patch | 85 +++++++ kvm-file-posix-Drop-hdev_co_create_opts.patch | 131 ++++++++++ kvm-iotests-Add-iothread-cases-to-155.patch | 147 ++++++++++++ ...Add-test-for-image-creation-fallback.patch | 138 +++++++++++ ...tests-Fix-run_job-with-use_log-False.patch | 47 ++++ ...r-blockdev-reopen-test-for-iothreads.patch | 122 ++++++++++ ...ror-with-temporarily-disabled-target.patch | 162 +++++++++++++ ...iotests-Use-complete_and_wait-in-155.patch | 50 ++++ kvm-iscsi-Drop-iscsi_co_create_opts.patch | 113 +++++++++ ...rite-only-overlay-feature-for-blockd.patch | 64 +++++ qemu-kvm.spec | 85 ++++++- 21 files changed, 2097 insertions(+), 1 deletion(-) create mode 100644 kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch create mode 100644 kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch create mode 100644 kvm-block-Generic-file-creation-fallback.patch create mode 100644 kvm-block-Introduce-bdrv_reopen_commit_post-step.patch create mode 100644 kvm-block-Make-bdrv_get_cumulative_perm-public.patch create mode 100644 kvm-block-Relax-restrictions-for-blockdev-snapshot.patch create mode 100644 kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch create mode 100644 kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch create mode 100644 kvm-block-nbd-Fix-hang-in-.bdrv_close.patch create mode 100644 kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch create mode 100644 kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch create mode 100644 kvm-file-posix-Drop-hdev_co_create_opts.patch create mode 100644 kvm-iotests-Add-iothread-cases-to-155.patch create mode 100644 kvm-iotests-Add-test-for-image-creation-fallback.patch create mode 100644 kvm-iotests-Fix-run_job-with-use_log-False.patch create mode 100644 kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch create mode 100644 kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch create mode 100644 kvm-iotests-Use-complete_and_wait-in-155.patch create mode 100644 kvm-iscsi-Drop-iscsi_co_create_opts.patch create mode 100644 kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch diff --git a/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch b/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch new file mode 100644 index 0000000..0bad890 --- /dev/null +++ b/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch @@ -0,0 +1,91 @@ +From 5774af5a3c713d0c93010c30453812eae6a749cd Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:37 +0000 +Subject: [PATCH 17/20] block: Fix cross-AioContext blockdev-snapshot + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-12-kwolf@redhat.com> +Patchwork-id: 94286 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 11/13] block: Fix cross-AioContext blockdev-snapshot +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +external_snapshot_prepare() tries to move the overlay to the AioContext +of the backing file (the snapshotted node). However, it's possible that +this doesn't work, but the backing file can instead be moved to the +overlay's AioContext (e.g. opening the backing chain for a mirror +target). + +bdrv_append() already indirectly uses bdrv_attach_node(), which takes +care to move nodes to make sure they use the same AioContext and which +tries both directions. + +So the problem has a simple fix: Just delete the unnecessary extra +bdrv_try_set_aio_context() call in external_snapshot_prepare() and +instead assert in bdrv_append() that both nodes were indeed moved to the +same AioContext. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-6-kwolf@redhat.com> +Tested-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit 30dd65f307b647eef8156c4a33bd007823ef85cb) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 1 + + blockdev.c | 16 ---------------- + 2 files changed, 1 insertion(+), 16 deletions(-) + +diff --git a/block.c b/block.c +index 354d388..ec29b1e 100644 +--- a/block.c ++++ b/block.c +@@ -4327,6 +4327,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + bdrv_ref(from); + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); ++ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); + bdrv_drained_begin(from); + + /* Put all parents into @list and calculate their cumulative permissions */ +diff --git a/blockdev.c b/blockdev.c +index 7918533..c8d4b51 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1535,9 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, + DO_UPCAST(ExternalSnapshotState, common, common); + TransactionAction *action = common->action; + AioContext *aio_context; +- AioContext *old_context; + uint64_t perm, shared; +- int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar + * purpose but a different set of parameters */ +@@ -1678,20 +1676,6 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + +- /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(state->new_bs); +- aio_context_release(aio_context); +- aio_context_acquire(old_context); +- +- ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); +- +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- +- if (ret < 0) { +- goto out; +- } +- + /* This removes our old bs and adds the new bs. This is an operation that + * can fail, so we need to do it in .prepare; undoing it for abort is + * always possible. */ +-- +1.8.3.1 + diff --git a/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch b/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch new file mode 100644 index 0000000..1735dc0 --- /dev/null +++ b/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch @@ -0,0 +1,60 @@ +From 05452efd7e0fb0522099ae09a396f8f97e66014a Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:47 +0000 +Subject: [PATCH 06/20] block: Fix leak in bdrv_create_file_fallback() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-7-mlevitsk@redhat.com> +Patchwork-id: 94229 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] block: Fix leak in bdrv_create_file_fallback() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +@options is leaked by the first two return statements in this function. + +Note that blk_new_open() takes the reference to @options even on +failure, so all we need to do to fix the leak is to move the QDict +allocation down to where we actually need it. + +Reported-by: Coverity (CID 1419884) +Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd + ("block: Generic file creation fallback") +Signed-off-by: Max Reitz +Message-Id: <20200225155618.133412-1-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit eeea1faa099f82328f5831cf252f8ce0a59a9287) +Signed-off-by: Maxim Levitsky + +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 3beec7f..e1a4e38 100644 +--- a/block.c ++++ b/block.c +@@ -600,7 +600,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, + QemuOpts *opts, Error **errp) + { + BlockBackend *blk; +- QDict *options = qdict_new(); ++ QDict *options; + int64_t size = 0; + char *buf = NULL; + PreallocMode prealloc; +@@ -623,6 +623,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, + return -ENOTSUP; + } + ++ options = qdict_new(); + qdict_put_str(options, "driver", drv->format_name); + + blk = blk_new_open(filename, NULL, options, +-- +1.8.3.1 + diff --git a/kvm-block-Generic-file-creation-fallback.patch b/kvm-block-Generic-file-creation-fallback.patch new file mode 100644 index 0000000..a5dd1d7 --- /dev/null +++ b/kvm-block-Generic-file-creation-fallback.patch @@ -0,0 +1,227 @@ +From 882d09226b7f45b72c5b7763c4c4aba182e0f8a1 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:43 +0000 +Subject: [PATCH 02/20] block: Generic file creation fallback + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-3-mlevitsk@redhat.com> +Patchwork-id: 94227 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] block: Generic file creation fallback +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +If a protocol driver does not support image creation, we can see whether +maybe the file exists already. If so, just truncating it will be +sufficient. + +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-3-mreitz@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit fd17146cd93d1704cd96d7c2757b325fc7aac6fd) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 147 insertions(+), 12 deletions(-) + +diff --git a/block.c b/block.c +index 2e5e8b6..3beec7f 100644 +--- a/block.c ++++ b/block.c +@@ -532,20 +532,139 @@ out: + return ret; + } + +-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) ++/** ++ * Helper function for bdrv_create_file_fallback(): Resize @blk to at ++ * least the given @minimum_size. ++ * ++ * On success, return @blk's actual length. ++ * Otherwise, return -errno. ++ */ ++static int64_t create_file_fallback_truncate(BlockBackend *blk, ++ int64_t minimum_size, Error **errp) + { +- BlockDriver *drv; ++ Error *local_err = NULL; ++ int64_t size; ++ int ret; ++ ++ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); ++ if (ret < 0 && ret != -ENOTSUP) { ++ error_propagate(errp, local_err); ++ return ret; ++ } ++ ++ size = blk_getlength(blk); ++ if (size < 0) { ++ error_free(local_err); ++ error_setg_errno(errp, -size, ++ "Failed to inquire the new image file's length"); ++ return size; ++ } ++ ++ if (size < minimum_size) { ++ /* Need to grow the image, but we failed to do that */ ++ error_propagate(errp, local_err); ++ return -ENOTSUP; ++ } ++ ++ error_free(local_err); ++ local_err = NULL; ++ ++ return size; ++} ++ ++/** ++ * Helper function for bdrv_create_file_fallback(): Zero the first ++ * sector to remove any potentially pre-existing image header. ++ */ ++static int create_file_fallback_zero_first_sector(BlockBackend *blk, ++ int64_t current_size, ++ Error **errp) ++{ ++ int64_t bytes_to_clear; ++ int ret; ++ ++ bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); ++ if (bytes_to_clear) { ++ ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, ++ "Failed to clear the new image's first sector"); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, ++ QemuOpts *opts, Error **errp) ++{ ++ BlockBackend *blk; ++ QDict *options = qdict_new(); ++ int64_t size = 0; ++ char *buf = NULL; ++ PreallocMode prealloc; + Error *local_err = NULL; + int ret; + ++ size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); ++ buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); ++ prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, ++ PREALLOC_MODE_OFF, &local_err); ++ g_free(buf); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return -EINVAL; ++ } ++ ++ if (prealloc != PREALLOC_MODE_OFF) { ++ error_setg(errp, "Unsupported preallocation mode '%s'", ++ PreallocMode_str(prealloc)); ++ return -ENOTSUP; ++ } ++ ++ qdict_put_str(options, "driver", drv->format_name); ++ ++ blk = blk_new_open(filename, NULL, options, ++ BDRV_O_RDWR | BDRV_O_RESIZE, errp); ++ if (!blk) { ++ error_prepend(errp, "Protocol driver '%s' does not support image " ++ "creation, and opening the image failed: ", ++ drv->format_name); ++ return -EINVAL; ++ } ++ ++ size = create_file_fallback_truncate(blk, size, errp); ++ if (size < 0) { ++ ret = size; ++ goto out; ++ } ++ ++ ret = create_file_fallback_zero_first_sector(blk, size, errp); ++ if (ret < 0) { ++ goto out; ++ } ++ ++ ret = 0; ++out: ++ blk_unref(blk); ++ return ret; ++} ++ ++int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) ++{ ++ BlockDriver *drv; ++ + drv = bdrv_find_protocol(filename, true, errp); + if (drv == NULL) { + return -ENOENT; + } + +- ret = bdrv_create(drv, filename, opts, &local_err); +- error_propagate(errp, local_err); +- return ret; ++ if (drv->bdrv_co_create_opts) { ++ return bdrv_create(drv, filename, opts, errp); ++ } else { ++ return bdrv_create_file_fallback(filename, drv, opts, errp); ++ } + } + + /** +@@ -1422,6 +1541,24 @@ QemuOptsList bdrv_runtime_opts = { + }, + }; + ++static QemuOptsList fallback_create_opts = { ++ .name = "fallback-create-opts", ++ .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), ++ .desc = { ++ { ++ .name = BLOCK_OPT_SIZE, ++ .type = QEMU_OPT_SIZE, ++ .help = "Virtual disk size" ++ }, ++ { ++ .name = BLOCK_OPT_PREALLOC, ++ .type = QEMU_OPT_STRING, ++ .help = "Preallocation mode (allowed values: off)" ++ }, ++ { /* end of list */ } ++ } ++}; ++ + /* + * Common part for opening disk images and files + * +@@ -5743,14 +5880,12 @@ void bdrv_img_create(const char *filename, const char *fmt, + return; + } + +- if (!proto_drv->create_opts) { +- error_setg(errp, "Protocol driver '%s' does not support image creation", +- proto_drv->format_name); +- return; +- } +- + create_opts = qemu_opts_append(create_opts, drv->create_opts); +- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); ++ if (proto_drv->create_opts) { ++ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); ++ } else { ++ create_opts = qemu_opts_append(create_opts, &fallback_create_opts); ++ } + + /* Create parameter list with default values */ + opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); +-- +1.8.3.1 + diff --git a/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch b/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch new file mode 100644 index 0000000..72c8986 --- /dev/null +++ b/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch @@ -0,0 +1,65 @@ +From f7dd953c2d0380cef3c351afb03d68c6fcda1dca Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:28 +0000 +Subject: [PATCH 08/20] block: Introduce 'bdrv_reopen_commit_post' step + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-3-kwolf@redhat.com> +Patchwork-id: 94278 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/13] block: Introduce 'bdrv_reopen_commit_post' step +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Peter Krempa + +Add another step in the reopen process where driver can execute code +after permission changes are comitted. + +Signed-off-by: Peter Krempa +Message-Id: +Signed-off-by: Kevin Wolf +(cherry picked from commit 17e1e2be5f9e84e0298e28e70675655b43e225ea) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 9 +++++++++ + include/block/block_int.h | 1 + + 2 files changed, 10 insertions(+) + +diff --git a/block.c b/block.c +index e1a4e38..a744bb5 100644 +--- a/block.c ++++ b/block.c +@@ -3657,6 +3657,15 @@ cleanup_perm: + } + } + } ++ ++ if (ret == 0) { ++ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { ++ BlockDriverState *bs = bs_entry->state.bs; ++ ++ if (bs->drv->bdrv_reopen_commit_post) ++ bs->drv->bdrv_reopen_commit_post(&bs_entry->state); ++ } ++ } + cleanup: + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + if (ret) { +diff --git a/include/block/block_int.h b/include/block/block_int.h +index dd033d0..c168690 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -123,6 +123,7 @@ struct BlockDriver { + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); ++ void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + void (*bdrv_join_options)(QDict *options, QDict *old_options); + +-- +1.8.3.1 + diff --git a/kvm-block-Make-bdrv_get_cumulative_perm-public.patch b/kvm-block-Make-bdrv_get_cumulative_perm-public.patch new file mode 100644 index 0000000..2f0f999 --- /dev/null +++ b/kvm-block-Make-bdrv_get_cumulative_perm-public.patch @@ -0,0 +1,67 @@ +From 294ab4c4963295556d12ac15150b48c8536175a7 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:33 +0000 +Subject: [PATCH 13/20] block: Make bdrv_get_cumulative_perm() public + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-8-kwolf@redhat.com> +Patchwork-id: 94287 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/13] block: Make bdrv_get_cumulative_perm() public +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-2-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit c7a0f2be8f95b220cdadbba9a9236eaf115951dc) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 6 ++---- + include/block/block_int.h | 3 +++ + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/block.c b/block.c +index 39e4647..354d388 100644 +--- a/block.c ++++ b/block.c +@@ -1850,8 +1850,6 @@ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, + bool *tighten_restrictions, Error **errp); + static void bdrv_child_abort_perm_update(BdrvChild *c); + static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); +-static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, +- uint64_t *shared_perm); + + typedef struct BlockReopenQueueEntry { + bool prepared; +@@ -2075,8 +2073,8 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, + } + } + +-static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, +- uint64_t *shared_perm) ++void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, ++ uint64_t *shared_perm) + { + BdrvChild *c; + uint64_t cumulative_perms = 0; +diff --git a/include/block/block_int.h b/include/block/block_int.h +index c168690..96e327b 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1228,6 +1228,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + void *opaque, Error **errp); + void bdrv_root_unref_child(BdrvChild *child); + ++void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, ++ uint64_t *shared_perm); ++ + /** + * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use + * bdrv_child_refresh_perms() instead and make the parent's +-- +1.8.3.1 + diff --git a/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch b/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch new file mode 100644 index 0000000..de85205 --- /dev/null +++ b/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch @@ -0,0 +1,117 @@ +From 9ba321e18a357c1a3a238ceee301bbb174f96eee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:34 +0000 +Subject: [PATCH 14/20] block: Relax restrictions for blockdev-snapshot + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-9-kwolf@redhat.com> +Patchwork-id: 94285 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/13] block: Relax restrictions for blockdev-snapshot +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +blockdev-snapshot returned an error if the overlay was already in use, +which it defined as having any BlockBackend parent. This is in fact both +too strict (some parents can tolerate the change of visible data caused +by attaching a backing file) and too loose (some non-BlockBackend +parents may not be happy with it). + +One important use case that is prevented by the too strict check is live +storage migration with blockdev-mirror. Here, the target node is +usually opened without a backing file so that the active layer is +mirrored while its backing chain can be copied in the background. + +The backing chain should be attached to the mirror target node when +finalising the job, just before switching the users of the source node +to the new copy (at which point the mirror job still has a reference to +the node). drive-mirror did this automatically, but with blockdev-mirror +this is the job of the QMP client, so it needs a way to do this. + +blockdev-snapshot is the obvious way, so this patch makes it work in +this scenario. The new condition is that no parent uses CONSISTENT_READ +permissions. This will ensure that the operation will still be blocked +when the node is attached to the guest device, so blockdev-snapshot +remains safe. + +(For the sake of completeness, x-blockdev-reopen can be used to achieve +the same, however it is a big hammer, performs the graph change +completely unchecked and is still experimental. So even with the option +of using x-blockdev-reopen, there are reasons why blockdev-snapshot +should be able to perform this operation.) + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-3-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Tested-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit d29d3d1f80b3947fb26e7139645c83de66d146a9) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 14 ++++++++------ + tests/qemu-iotests/085.out | 4 ++-- + 2 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 4cd9a58..7918533 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1536,6 +1536,7 @@ static void external_snapshot_prepare(BlkActionState *common, + TransactionAction *action = common->action; + AioContext *aio_context; + AioContext *old_context; ++ uint64_t perm, shared; + int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar +@@ -1656,16 +1657,17 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + +- if (bdrv_has_blk(state->new_bs)) { ++ /* ++ * Allow attaching a backing file to an overlay that's already in use only ++ * if the parents don't assume that they are already seeing a valid image. ++ * (Specifically, allow it as a mirror target, which is write-only access.) ++ */ ++ bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); ++ if (perm & BLK_PERM_CONSISTENT_READ) { + error_setg(errp, "The overlay is already in use"); + goto out; + } + +- if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, +- errp)) { +- goto out; +- } +- + if (state->new_bs->backing != NULL) { + error_setg(errp, "The overlay already has a backing image"); + goto out; +diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out +index bb50227..487d920 100644 +--- a/tests/qemu-iotests/085.out ++++ b/tests/qemu-iotests/085.out +@@ -82,7 +82,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ + === Invalid command - cannot create a snapshot using a file BDS === + + { 'execute': 'blockdev-snapshot', 'arguments': { 'node':'virtio0', 'overlay':'file_12' } } +-{"error": {"class": "GenericError", "desc": "The overlay does not support backing images"}} ++{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} + + === Invalid command - snapshot node used as active layer === + +@@ -96,7 +96,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ + === Invalid command - snapshot node used as backing hd === + + { 'execute': 'blockdev-snapshot', 'arguments': { 'node': 'virtio0', 'overlay':'snap_11' } } +-{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} ++{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} + + === Invalid command - snapshot node has a backing image === + +-- +1.8.3.1 + diff --git a/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch new file mode 100644 index 0000000..ea796d5 --- /dev/null +++ b/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -0,0 +1,57 @@ +From 371d312300251c0dc24522607b06b7e47e760b53 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:32 +0000 +Subject: [PATCH 12/20] block: Versioned x-blockdev-reopen API with feature + flag + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-7-kwolf@redhat.com> +Patchwork-id: 94283 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/13] block: Versioned x-blockdev-reopen API with feature flag +Bugzilla: 1790482 1805143 +RH-Acked-by: Eric Blake +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +x-blockdev-reopen is still considered unstable upstream. libvirt needs +(a small subset of) it for incremental backups, though. + +Add a downstream-only feature flag that effectively makes this a +versioned interface. As long as the feature is present, we promise that +we won't change the interface incompatibly. Incompatible changes to the +command will require us to drop the feature flag (and possibly introduce +a new one if the new version is still not stable upstream). + +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + qapi/block-core.json | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 0cf68fe..a1e85b0 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -4202,10 +4202,17 @@ + # image does not have a default backing file name as part of its + # metadata. + # ++# Features: ++# @__com.redhat_rhel-av-8_2_0-api: Versioning the downstream interface while ++# it's still unstable upstream. As long as ++# this flag is present, this command will not ++# change incompatibly. ++# + # Since: 4.0 + ## + { 'command': 'x-blockdev-reopen', +- 'data': 'BlockdevOptions', 'boxed': true } ++ 'data': 'BlockdevOptions', 'boxed': true, ++ 'features': [ '__com.redhat_rhel-av-8_2_0-api' ] } + + ## + # @blockdev-del: +-- +1.8.3.1 + diff --git a/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch b/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch new file mode 100644 index 0000000..745be9f --- /dev/null +++ b/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch @@ -0,0 +1,114 @@ +From 1e0582ad34e77a060e2067a35992979c9eae82c9 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:31 +0000 +Subject: [PATCH 11/20] block: bdrv_reopen() with backing file in different + AioContext + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-6-kwolf@redhat.com> +Patchwork-id: 94282 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/13] block: bdrv_reopen() with backing file in different AioContext +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +This patch allows bdrv_reopen() (and therefore the x-blockdev-reopen QMP +command) to attach a node as the new backing file even if the node is in +a different AioContext than the parent if one of both nodes can be moved +to the AioContext of the other node. + +Signed-off-by: Kevin Wolf +Tested-by: Peter Krempa +Message-Id: <20200306141413.30705-3-kwolf@redhat.com> +Reviewed-by: Alberto Garcia +Signed-off-by: Kevin Wolf +(cherry picked from commit 1de6b45fb5c1489b450df7d1a4c692bba9678ce6) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 32 ++++++++++++++++++++++++++------ + tests/qemu-iotests/245 | 8 +++----- + 2 files changed, 29 insertions(+), 11 deletions(-) + +diff --git a/block.c b/block.c +index a744bb5..39e4647 100644 +--- a/block.c ++++ b/block.c +@@ -3749,6 +3749,29 @@ static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, + *shared = cumulative_shared_perms; + } + ++static bool bdrv_reopen_can_attach(BlockDriverState *parent, ++ BdrvChild *child, ++ BlockDriverState *new_child, ++ Error **errp) ++{ ++ AioContext *parent_ctx = bdrv_get_aio_context(parent); ++ AioContext *child_ctx = bdrv_get_aio_context(new_child); ++ GSList *ignore; ++ bool ret; ++ ++ ignore = g_slist_prepend(NULL, child); ++ ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); ++ g_slist_free(ignore); ++ if (ret) { ++ return ret; ++ } ++ ++ ignore = g_slist_prepend(NULL, child); ++ ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); ++ g_slist_free(ignore); ++ return ret; ++} ++ + /* + * Take a BDRVReopenState and check if the value of 'backing' in the + * reopen_state->options QDict is valid or not. +@@ -3800,14 +3823,11 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, + } + + /* +- * TODO: before removing the x- prefix from x-blockdev-reopen we +- * should move the new backing file into the right AioContext +- * instead of returning an error. ++ * Check AioContext compatibility so that the bdrv_set_backing_hd() call in ++ * bdrv_reopen_commit() won't fail. + */ + if (new_backing_bs) { +- if (bdrv_get_aio_context(new_backing_bs) != bdrv_get_aio_context(bs)) { +- error_setg(errp, "Cannot use a new backing file " +- "with a different AioContext"); ++ if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { + return -EINVAL; + } + } +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index f69c2fa..919131d 100644 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -1013,18 +1013,16 @@ class TestBlockdevReopen(iotests.QMPTestCase): + # neither of them can switch to the other AioContext + def test_iothreads_error(self): + self.run_test_iothreads('iothread0', 'iothread1', +- "Cannot use a new backing file with a different AioContext") ++ "Cannot change iothread of active block backend") + + def test_iothreads_compatible_users(self): + self.run_test_iothreads('iothread0', 'iothread0') + + def test_iothreads_switch_backing(self): +- self.run_test_iothreads('iothread0', None, +- "Cannot use a new backing file with a different AioContext") ++ self.run_test_iothreads('iothread0', None) + + def test_iothreads_switch_overlay(self): +- self.run_test_iothreads(None, 'iothread0', +- "Cannot use a new backing file with a different AioContext") ++ self.run_test_iothreads(None, 'iothread0') + + if __name__ == '__main__': + iotests.main(supported_fmts=["qcow2"], +-- +1.8.3.1 + diff --git a/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch b/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch new file mode 100644 index 0000000..378ae1a --- /dev/null +++ b/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch @@ -0,0 +1,78 @@ +From 4ef2c464a54b0b618d933641ac0a7012e629fed9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:42 +0000 +Subject: [PATCH 01/20] block/nbd: Fix hang in .bdrv_close() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-2-mlevitsk@redhat.com> +Patchwork-id: 94224 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] block/nbd: Fix hang in .bdrv_close() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +When nbd_close() is called from a coroutine, the connection_co never +gets to run, and thus nbd_teardown_connection() hangs. + +This is because aio_co_enter() only puts the connection_co into the main +coroutine's wake-up queue, so this main coroutine needs to yield and +wait for connection_co to terminate. + +Suggested-by: Kevin Wolf +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-2-mreitz@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +(cherry picked from commit 78c81a3f108870d325b0a39d88711366afe6f703) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/nbd.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/block/nbd.c b/block/nbd.c +index 5f18f78..a73f0d9 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -70,6 +70,7 @@ typedef struct BDRVNBDState { + CoMutex send_mutex; + CoQueue free_sema; + Coroutine *connection_co; ++ Coroutine *teardown_co; + QemuCoSleepState *connection_co_sleep_ns_state; + bool drained; + bool wait_drained_end; +@@ -203,7 +204,15 @@ static void nbd_teardown_connection(BlockDriverState *bs) + qemu_co_sleep_wake(s->connection_co_sleep_ns_state); + } + } +- BDRV_POLL_WHILE(bs, s->connection_co); ++ if (qemu_in_coroutine()) { ++ s->teardown_co = qemu_coroutine_self(); ++ /* connection_co resumes us when it terminates */ ++ qemu_coroutine_yield(); ++ s->teardown_co = NULL; ++ } else { ++ BDRV_POLL_WHILE(bs, s->connection_co); ++ } ++ assert(!s->connection_co); + } + + static bool nbd_client_connecting(BDRVNBDState *s) +@@ -395,6 +404,9 @@ static coroutine_fn void nbd_connection_entry(void *opaque) + s->ioc = NULL; + } + ++ if (s->teardown_co) { ++ aio_co_wake(s->teardown_co); ++ } + aio_wait_kick(); + } + +-- +1.8.3.1 + diff --git a/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch b/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch new file mode 100644 index 0000000..2c27fd2 --- /dev/null +++ b/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch @@ -0,0 +1,78 @@ +From ec5408763c49cd0b63ee324bdc38a429ed1adeee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:29 +0000 +Subject: [PATCH 09/20] block/qcow2: Move bitmap reopen into + bdrv_reopen_commit_post + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-4-kwolf@redhat.com> +Patchwork-id: 94280 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/13] block/qcow2: Move bitmap reopen into bdrv_reopen_commit_post +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Peter Krempa + +The bitmap code requires writing the 'file' child when the qcow2 driver +is reopened in read-write mode. + +If the 'file' child is being reopened due to a permissions change, the +modification is commited yet when qcow2_reopen_commit is called. This +means that any attempt to write the 'file' child will end with EBADFD +as the original fd was already closed. + +Moving bitmap reopening to the new callback which is called after +permission modifications are commited fixes this as the file descriptor +will be replaced with the correct one. + +The above problem manifests itself when reopening 'qcow2' format layer +which uses a 'file-posix' file child which was opened with the +'auto-read-only' property set. + +Signed-off-by: Peter Krempa +Message-Id: +Signed-off-by: Kevin Wolf +(cherry picked from commit 65eb7c85a3e62529e2bad782e94d5a7b11dd5a92) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/qcow2.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 7c18721..83b1fc0 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1881,6 +1881,11 @@ fail: + static void qcow2_reopen_commit(BDRVReopenState *state) + { + qcow2_update_options_commit(state->bs, state->opaque); ++ g_free(state->opaque); ++} ++ ++static void qcow2_reopen_commit_post(BDRVReopenState *state) ++{ + if (state->flags & BDRV_O_RDWR) { + Error *local_err = NULL; + +@@ -1895,7 +1900,6 @@ static void qcow2_reopen_commit(BDRVReopenState *state) + bdrv_get_node_name(state->bs)); + } + } +- g_free(state->opaque); + } + + static void qcow2_reopen_abort(BDRVReopenState *state) +@@ -5492,6 +5496,7 @@ BlockDriver bdrv_qcow2 = { + .bdrv_close = qcow2_close, + .bdrv_reopen_prepare = qcow2_reopen_prepare, + .bdrv_reopen_commit = qcow2_reopen_commit, ++ .bdrv_reopen_commit_post = qcow2_reopen_commit_post, + .bdrv_reopen_abort = qcow2_reopen_abort, + .bdrv_join_options = qcow2_join_options, + .bdrv_child_perm = bdrv_format_default_perms, +-- +1.8.3.1 + diff --git a/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch b/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch new file mode 100644 index 0000000..5d44708 --- /dev/null +++ b/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch @@ -0,0 +1,85 @@ +From 5770fe43fe1e15e6f53cfd3705605e8645b95a98 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 13 Mar 2020 17:17:08 +0000 +Subject: [PATCH 20/20] exec/rom_reset: Free rom data during inmigrate skip +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200313171708.242774-1-dgilbert@redhat.com> +Patchwork-id: 94292 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] exec/rom_reset: Free rom data during inmigrate skip +Bugzilla: 1809380 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Paolo Bonzini + +From: "Dr. David Alan Gilbert" + +bz: https://bugzilla.redhat.com/show_bug.cgi?id=1809380 +brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27249921 +branch: rhel-av-8.2.0 +upstream: Posted and with review-by, not merged yet + +Commit 355477f8c73e9 skips rom reset when we're an incoming migration +so as not to overwrite shared ram in the ignore-shared migration +optimisation. +However, it's got an unexpected side effect that because it skips +freeing the ROM data, when rom_reset gets called later on, after +migration (e.g. during a reboot), the ROM does get reset to the original +file contents. Because of seabios/x86's weird reboot process +this confuses a reboot into hanging after a migration. + +Fixes: 355477f8c73e9 ("migration: do not rom_reset() during incoming migration") +https://bugzilla.redhat.com/show_bug.cgi?id=1809380 + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/loader.c | 25 ++++++++++++++++--------- + 1 file changed, 16 insertions(+), 9 deletions(-) + +diff --git a/hw/core/loader.c b/hw/core/loader.c +index 5099f27..375b29b 100644 +--- a/hw/core/loader.c ++++ b/hw/core/loader.c +@@ -1118,19 +1118,26 @@ static void rom_reset(void *unused) + { + Rom *rom; + +- /* +- * We don't need to fill in the RAM with ROM data because we'll fill +- * the data in during the next incoming migration in all cases. Note +- * that some of those RAMs can actually be modified by the guest on ARM +- * so this is probably the only right thing to do here. +- */ +- if (runstate_check(RUN_STATE_INMIGRATE)) +- return; +- + QTAILQ_FOREACH(rom, &roms, next) { + if (rom->fw_file) { + continue; + } ++ /* ++ * We don't need to fill in the RAM with ROM data because we'll fill ++ * the data in during the next incoming migration in all cases. Note ++ * that some of those RAMs can actually be modified by the guest. ++ */ ++ if (runstate_check(RUN_STATE_INMIGRATE)) { ++ if (rom->data && rom->isrom) { ++ /* ++ * Free it so that a rom_reset after migration doesn't ++ * overwrite a potentially modified 'rom'. ++ */ ++ rom_free_data(rom); ++ } ++ continue; ++ } ++ + if (rom->data == NULL) { + continue; + } +-- +1.8.3.1 + diff --git a/kvm-file-posix-Drop-hdev_co_create_opts.patch b/kvm-file-posix-Drop-hdev_co_create_opts.patch new file mode 100644 index 0000000..ea2edbd --- /dev/null +++ b/kvm-file-posix-Drop-hdev_co_create_opts.patch @@ -0,0 +1,131 @@ +From 3d3509c010129bd15eb1f5ec1a7b9eedcdbf23f6 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:44 +0000 +Subject: [PATCH 03/20] file-posix: Drop hdev_co_create_opts() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-4-mlevitsk@redhat.com> +Patchwork-id: 94225 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] file-posix: Drop hdev_co_create_opts() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +The generic fallback implementation effectively does the same. + +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-4-mreitz@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit 87ca3b8fa615b278b33cabf9ed22b3f44b5214ba) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/file-posix.c | 67 ------------------------------------------------------ + 1 file changed, 67 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 1b805bd..fd29372 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3418,67 +3418,6 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, + return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true); + } + +-static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, +- Error **errp) +-{ +- int fd; +- int ret = 0; +- struct stat stat_buf; +- int64_t total_size = 0; +- bool has_prefix; +- +- /* This function is used by both protocol block drivers and therefore either +- * of these prefixes may be given. +- * The return value has to be stored somewhere, otherwise this is an error +- * due to -Werror=unused-value. */ +- has_prefix = +- strstart(filename, "host_device:", &filename) || +- strstart(filename, "host_cdrom:" , &filename); +- +- (void)has_prefix; +- +- ret = raw_normalize_devicepath(&filename, errp); +- if (ret < 0) { +- return ret; +- } +- +- /* Read out options */ +- total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), +- BDRV_SECTOR_SIZE); +- +- fd = qemu_open(filename, O_WRONLY | O_BINARY); +- if (fd < 0) { +- ret = -errno; +- error_setg_errno(errp, -ret, "Could not open device"); +- return ret; +- } +- +- if (fstat(fd, &stat_buf) < 0) { +- ret = -errno; +- error_setg_errno(errp, -ret, "Could not stat device"); +- } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) { +- error_setg(errp, +- "The given file is neither a block nor a character device"); +- ret = -ENODEV; +- } else if (lseek(fd, 0, SEEK_END) < total_size) { +- error_setg(errp, "Device is too small"); +- ret = -ENOSPC; +- } +- +- if (!ret && total_size) { +- uint8_t buf[BDRV_SECTOR_SIZE] = { 0 }; +- int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size); +- if (lseek(fd, 0, SEEK_SET) == -1) { +- ret = -errno; +- } else { +- ret = qemu_write_full(fd, buf, zero_size); +- ret = ret == zero_size ? 0 : -errno; +- } +- } +- qemu_close(fd); +- return ret; +-} +- + static BlockDriver bdrv_host_device = { + .format_name = "host_device", + .protocol_name = "host_device", +@@ -3491,8 +3430,6 @@ static BlockDriver bdrv_host_device = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, +- .bdrv_co_create_opts = hdev_co_create_opts, +- .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, +@@ -3619,8 +3556,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, +- .bdrv_co_create_opts = hdev_co_create_opts, +- .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + +@@ -3753,8 +3688,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, +- .bdrv_co_create_opts = hdev_co_create_opts, +- .create_opts = &raw_create_opts, + .mutable_opts = mutable_opts, + + .bdrv_co_preadv = raw_co_preadv, +-- +1.8.3.1 + diff --git a/kvm-iotests-Add-iothread-cases-to-155.patch b/kvm-iotests-Add-iothread-cases-to-155.patch new file mode 100644 index 0000000..24ac90c --- /dev/null +++ b/kvm-iotests-Add-iothread-cases-to-155.patch @@ -0,0 +1,147 @@ +From 2366cd9066e79d6c93a3a28710aea987b2c8f454 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:38 +0000 +Subject: [PATCH 18/20] iotests: Add iothread cases to 155 + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-13-kwolf@redhat.com> +Patchwork-id: 94289 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 12/13] iotests: Add iothread cases to 155 +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +This patch adds test cases for attaching the backing chain to a mirror +job target right before finalising the job, where the image is in a +non-mainloop AioContext (i.e. the backing chain needs to be moved to the +AioContext of the mirror target). + +This requires switching the test case from virtio-blk to virtio-scsi +because virtio-blk only actually starts using the iothreads when the +guest driver initialises the device (which never happens in a test case +without a guest OS). virtio-scsi always keeps its block nodes in the +AioContext of the the requested iothread without guest interaction. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-7-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit 6a5f6403a11307794ec79d277a065c137cfc12b2) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/155 | 32 +++++++++++++++++++++++--------- + tests/qemu-iotests/155.out | 4 ++-- + 2 files changed, 25 insertions(+), 11 deletions(-) + +diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 +index 3053e50..b552d1f 100755 +--- a/tests/qemu-iotests/155 ++++ b/tests/qemu-iotests/155 +@@ -49,11 +49,14 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) + # chain opened right away. If False, blockdev-add + # opens it without a backing file and job completion + # is supposed to open the backing chain. ++# use_iothread: If True, an iothread is configured for the virtio-blk device ++# that uses the image being mirrored + + class BaseClass(iotests.QMPTestCase): + target_blockdev_backing = None + target_real_backing = None + target_open_with_backing = True ++ use_iothread = False + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') +@@ -69,7 +72,16 @@ class BaseClass(iotests.QMPTestCase): + 'file': {'driver': 'file', + 'filename': source_img}} + self.vm.add_blockdev(self.vm.qmp_to_opts(blockdev)) +- self.vm.add_device('virtio-blk,id=qdev0,drive=source') ++ ++ if self.use_iothread: ++ self.vm.add_object('iothread,id=iothread0') ++ iothread = ",iothread=iothread0" ++ else: ++ iothread = "" ++ ++ self.vm.add_device('virtio-scsi%s' % iothread) ++ self.vm.add_device('scsi-hd,id=qdev0,drive=source') ++ + self.vm.launch() + + self.assertIntactSourceBackingChain() +@@ -182,24 +194,21 @@ class MirrorBaseClass(BaseClass): + def testFull(self): + self.runMirror('full') + +- node = self.findBlockNode('target', +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode('target', 'qdev0') + self.assertCorrectBackingImage(node, None) + self.assertIntactSourceBackingChain() + + def testTop(self): + self.runMirror('top') + +- node = self.findBlockNode('target', +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode('target', 'qdev0') + self.assertCorrectBackingImage(node, back2_img) + self.assertIntactSourceBackingChain() + + def testNone(self): + self.runMirror('none') + +- node = self.findBlockNode('target', +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode('target', 'qdev0') + self.assertCorrectBackingImage(node, source_img) + self.assertIntactSourceBackingChain() + +@@ -252,6 +261,9 @@ class TestBlockdevMirrorReopen(MirrorBaseClass): + backing="backing") + self.assert_qmp(result, 'return', {}) + ++class TestBlockdevMirrorReopenIothread(TestBlockdevMirrorReopen): ++ use_iothread = True ++ + # Attach the backing chain only during completion, with blockdev-snapshot + class TestBlockdevMirrorSnapshot(MirrorBaseClass): + cmd = 'blockdev-mirror' +@@ -268,6 +280,9 @@ class TestBlockdevMirrorSnapshot(MirrorBaseClass): + overlay="target") + self.assert_qmp(result, 'return', {}) + ++class TestBlockdevMirrorSnapshotIothread(TestBlockdevMirrorSnapshot): ++ use_iothread = True ++ + class TestCommit(BaseClass): + existing = False + +@@ -283,8 +298,7 @@ class TestCommit(BaseClass): + + self.vm.event_wait('BLOCK_JOB_COMPLETED') + +- node = self.findBlockNode(None, +- '/machine/peripheral/qdev0/virtio-backend') ++ node = self.findBlockNode(None, 'qdev0') + self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename', + back1_img) + self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename', +diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out +index 4fd1c2d..ed714d5 100644 +--- a/tests/qemu-iotests/155.out ++++ b/tests/qemu-iotests/155.out +@@ -1,5 +1,5 @@ +-......................... ++............................... + ---------------------------------------------------------------------- +-Ran 25 tests ++Ran 31 tests + + OK +-- +1.8.3.1 + diff --git a/kvm-iotests-Add-test-for-image-creation-fallback.patch b/kvm-iotests-Add-test-for-image-creation-fallback.patch new file mode 100644 index 0000000..a8ea8f7 --- /dev/null +++ b/kvm-iotests-Add-test-for-image-creation-fallback.patch @@ -0,0 +1,138 @@ +From 55f3a02574da226299d99bd74d12dd91b0f228dc Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:46 +0000 +Subject: [PATCH 05/20] iotests: Add test for image creation fallback + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-6-mlevitsk@redhat.com> +Patchwork-id: 94228 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Add test for image creation fallback +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-6-mreitz@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Maxim Levitsky +[mreitz: Added a note that NBD does not support resizing, which is why + the second case is expected to fail] +Signed-off-by: Max Reitz +(cherry picked from commit 4dddeac115c5a2c5f74731fda0afd031a0b45490) +Signed-off-by: Maxim Levitsky + +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/259 | 62 ++++++++++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/259.out | 14 +++++++++++ + tests/qemu-iotests/group | 1 + + 3 files changed, 77 insertions(+) + create mode 100755 tests/qemu-iotests/259 + create mode 100644 tests/qemu-iotests/259.out + +diff --git a/tests/qemu-iotests/259 b/tests/qemu-iotests/259 +new file mode 100755 +index 0000000..62e29af +--- /dev/null ++++ b/tests/qemu-iotests/259 +@@ -0,0 +1,62 @@ ++#!/usr/bin/env bash ++# ++# Test generic image creation fallback (by using NBD) ++# ++# Copyright (C) 2019 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=mreitz@redhat.com ++ ++seq=$(basename $0) ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt raw ++_supported_proto nbd ++_supported_os Linux ++ ++ ++_make_test_img 64M ++ ++echo ++echo '--- Testing creation ---' ++ ++$QEMU_IMG create -f qcow2 "$TEST_IMG" 64M | _filter_img_create ++$QEMU_IMG info "$TEST_IMG" | _filter_img_info ++ ++echo ++echo '--- Testing creation for which the node would need to grow ---' ++ ++# NBD does not support resizing, so this will fail ++$QEMU_IMG create -f qcow2 -o preallocation=metadata "$TEST_IMG" 64M 2>&1 \ ++ | _filter_img_create ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/259.out b/tests/qemu-iotests/259.out +new file mode 100644 +index 0000000..ffed19c +--- /dev/null ++++ b/tests/qemu-iotests/259.out +@@ -0,0 +1,14 @@ ++QA output created by 259 ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 ++ ++--- Testing creation --- ++Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 ++image: TEST_DIR/t.IMGFMT ++file format: qcow2 ++virtual size: 64 MiB (67108864 bytes) ++disk size: unavailable ++ ++--- Testing creation for which the node would need to grow --- ++qemu-img: TEST_DIR/t.IMGFMT: Could not resize image: Image format driver does not support resize ++Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 preallocation=metadata ++*** done +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index c0e8197..e47cbfc 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -273,6 +273,7 @@ + 256 rw quick + 257 rw + 258 rw quick ++259 rw auto quick + 260 rw quick + 261 rw + 262 rw quick migration +-- +1.8.3.1 + diff --git a/kvm-iotests-Fix-run_job-with-use_log-False.patch b/kvm-iotests-Fix-run_job-with-use_log-False.patch new file mode 100644 index 0000000..b105fc2 --- /dev/null +++ b/kvm-iotests-Fix-run_job-with-use_log-False.patch @@ -0,0 +1,47 @@ +From bb7b968a02c97564596b73d8d080cd745d96ed6b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:35 +0000 +Subject: [PATCH 15/20] iotests: Fix run_job() with use_log=False + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-10-kwolf@redhat.com> +Patchwork-id: 94284 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/13] iotests: Fix run_job() with use_log=False +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +The 'job-complete' QMP command should be run with qmp() rather than +qmp_log() if use_log=False is passed. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-4-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit b31b532122ec6f68d17168449c034d2197bf96ec) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 0c55f7b..46f880c 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -618,7 +618,10 @@ class VM(qtest.QEMUQtestMachine): + if use_log: + log('Job failed: %s' % (j['error'])) + elif status == 'ready': +- self.qmp_log('job-complete', id=job) ++ if use_log: ++ self.qmp_log('job-complete', id=job) ++ else: ++ self.qmp('job-complete', id=job) + elif status == 'pending' and not auto_finalize: + if pre_finalize: + pre_finalize() +-- +1.8.3.1 + diff --git a/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch b/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch new file mode 100644 index 0000000..17e4a41 --- /dev/null +++ b/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch @@ -0,0 +1,122 @@ +From 7e23b64dc20b64ca6fa887cd06cc5e52374f6268 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:30 +0000 +Subject: [PATCH 10/20] iotests: Refactor blockdev-reopen test for iothreads + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-5-kwolf@redhat.com> +Patchwork-id: 94281 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/13] iotests: Refactor blockdev-reopen test for iothreads +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +We'll want to test more than one successful case in the future, so +prepare the test for that by a refactoring that runs each scenario in a +separate VM. + +test_iothreads_switch_{backing,overlay} currently produce errors, but +these are cases that should actually work, by switching either the +backing file node or the overlay node to the AioContext of the other +node. + +Signed-off-by: Kevin Wolf +Tested-by: Peter Krempa +Message-Id: <20200306141413.30705-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 97518e11c3d902a32386d33797044f6b79bccc6f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/245 | 47 ++++++++++++++++++++++++++++++++++++---------- + tests/qemu-iotests/245.out | 4 ++-- + 2 files changed, 39 insertions(+), 12 deletions(-) + +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index e66a23c..f69c2fa 100644 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -968,8 +968,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): + self.assertEqual(self.get_node('hd1'), None) + self.assert_qmp(self.get_node('hd2'), 'ro', True) + +- # We don't allow setting a backing file that uses a different AioContext +- def test_iothreads(self): ++ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): + opts = hd_opts(0) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) + self.assert_qmp(result, 'return', {}) +@@ -984,20 +983,48 @@ class TestBlockdevReopen(iotests.QMPTestCase): + result = self.vm.qmp('object-add', qom_type='iothread', id='iothread1') + self.assert_qmp(result, 'return', {}) + +- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd0', iothread='iothread0') ++ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi0', ++ iothread=iothread_a) + self.assert_qmp(result, 'return', {}) + +- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") +- +- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread1') ++ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi1', ++ iothread=iothread_b) + self.assert_qmp(result, 'return', {}) + +- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") ++ if iothread_a: ++ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd0', ++ share_rw=True, bus="scsi0.0") ++ self.assert_qmp(result, 'return', {}) + +- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread0') +- self.assert_qmp(result, 'return', {}) ++ if iothread_b: ++ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd2', ++ share_rw=True, bus="scsi1.0") ++ self.assert_qmp(result, 'return', {}) + +- self.reopen(opts, {'backing': 'hd2'}) ++ # Attaching the backing file may or may not work ++ self.reopen(opts, {'backing': 'hd2'}, errmsg) ++ ++ # But removing the backing file should always work ++ self.reopen(opts, {'backing': None}) ++ ++ self.vm.shutdown() ++ ++ # We don't allow setting a backing file that uses a different AioContext if ++ # neither of them can switch to the other AioContext ++ def test_iothreads_error(self): ++ self.run_test_iothreads('iothread0', 'iothread1', ++ "Cannot use a new backing file with a different AioContext") ++ ++ def test_iothreads_compatible_users(self): ++ self.run_test_iothreads('iothread0', 'iothread0') ++ ++ def test_iothreads_switch_backing(self): ++ self.run_test_iothreads('iothread0', None, ++ "Cannot use a new backing file with a different AioContext") ++ ++ def test_iothreads_switch_overlay(self): ++ self.run_test_iothreads(None, 'iothread0', ++ "Cannot use a new backing file with a different AioContext") + + if __name__ == '__main__': + iotests.main(supported_fmts=["qcow2"], +diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out +index a19de52..682b933 100644 +--- a/tests/qemu-iotests/245.out ++++ b/tests/qemu-iotests/245.out +@@ -1,6 +1,6 @@ +-.................. ++..................... + ---------------------------------------------------------------------- +-Ran 18 tests ++Ran 21 tests + + OK + {"execute": "job-finalize", "arguments": {"id": "commit0"}} +-- +1.8.3.1 + diff --git a/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch b/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch new file mode 100644 index 0000000..58ef198 --- /dev/null +++ b/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch @@ -0,0 +1,162 @@ +From 239f7bdeef48a3c0b07098617371b9955dc55348 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:36 +0000 +Subject: [PATCH 16/20] iotests: Test mirror with temporarily disabled target + backing file + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-11-kwolf@redhat.com> +Patchwork-id: 94288 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/13] iotests: Test mirror with temporarily disabled target backing file +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +The newly tested scenario is a common live storage migration scenario: +The target node is opened without a backing file so that the active +layer is mirrored while its backing chain can be copied in the +background. + +The backing chain should be attached to the mirror target node when +finalising the job, just before switching the users of the source node +to the new copy (at which point the mirror job still has a reference to +the node). drive-mirror did this automatically, but with blockdev-mirror +this is the job of the QMP client. + +This patch adds test cases for two ways to achieve the desired result, +using either x-blockdev-reopen or blockdev-snapshot. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-5-kwolf@redhat.com> +Reviewed-by: Peter Krempa +Signed-off-by: Kevin Wolf +(cherry picked from commit 8bdee9f10eac2aefdcc5095feef756354c87bdec) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/155 | 56 +++++++++++++++++++++++++++++++++++++++++----- + tests/qemu-iotests/155.out | 4 ++-- + 2 files changed, 53 insertions(+), 7 deletions(-) + +diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 +index d7ef257..3053e50 100755 +--- a/tests/qemu-iotests/155 ++++ b/tests/qemu-iotests/155 +@@ -45,10 +45,15 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) + # image during runtime, only makes sense if + # target_blockdev_backing is not None + # (None: same as target_backing) ++# target_open_with_backing: If True, the target image is added with its backing ++# chain opened right away. If False, blockdev-add ++# opens it without a backing file and job completion ++# is supposed to open the backing chain. + + class BaseClass(iotests.QMPTestCase): + target_blockdev_backing = None + target_real_backing = None ++ target_open_with_backing = True + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') +@@ -80,9 +85,13 @@ class BaseClass(iotests.QMPTestCase): + options = { 'node-name': 'target', + 'driver': iotests.imgfmt, + 'file': { 'driver': 'file', ++ 'node-name': 'target-file', + 'filename': target_img } } +- if self.target_blockdev_backing: +- options['backing'] = self.target_blockdev_backing ++ ++ if not self.target_open_with_backing: ++ options['backing'] = None ++ elif self.target_blockdev_backing: ++ options['backing'] = self.target_blockdev_backing + + result = self.vm.qmp('blockdev-add', **options) + self.assert_qmp(result, 'return', {}) +@@ -147,10 +156,14 @@ class BaseClass(iotests.QMPTestCase): + # cmd: Mirroring command to execute, either drive-mirror or blockdev-mirror + + class MirrorBaseClass(BaseClass): ++ def openBacking(self): ++ pass ++ + def runMirror(self, sync): + if self.cmd == 'blockdev-mirror': + result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', +- sync=sync, target='target') ++ sync=sync, target='target', ++ auto_finalize=False) + else: + if self.existing: + mode = 'existing' +@@ -159,11 +172,12 @@ class MirrorBaseClass(BaseClass): + result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', + sync=sync, target=target_img, + format=iotests.imgfmt, mode=mode, +- node_name='target') ++ node_name='target', auto_finalize=False) + + self.assert_qmp(result, 'return', {}) + +- self.complete_and_wait('mirror-job') ++ self.vm.run_job('mirror-job', use_log=False, auto_finalize=False, ++ pre_finalize=self.openBacking, auto_dismiss=True) + + def testFull(self): + self.runMirror('full') +@@ -221,6 +235,38 @@ class TestBlockdevMirrorForcedBacking(MirrorBaseClass): + target_blockdev_backing = { 'driver': 'null-co' } + target_real_backing = 'null-co://' + ++# Attach the backing chain only during completion, with blockdev-reopen ++class TestBlockdevMirrorReopen(MirrorBaseClass): ++ cmd = 'blockdev-mirror' ++ existing = True ++ target_backing = 'null-co://' ++ target_open_with_backing = False ++ ++ def openBacking(self): ++ if not self.target_open_with_backing: ++ result = self.vm.qmp('blockdev-add', node_name="backing", ++ driver="null-co") ++ self.assert_qmp(result, 'return', {}) ++ result = self.vm.qmp('x-blockdev-reopen', node_name="target", ++ driver=iotests.imgfmt, file="target-file", ++ backing="backing") ++ self.assert_qmp(result, 'return', {}) ++ ++# Attach the backing chain only during completion, with blockdev-snapshot ++class TestBlockdevMirrorSnapshot(MirrorBaseClass): ++ cmd = 'blockdev-mirror' ++ existing = True ++ target_backing = 'null-co://' ++ target_open_with_backing = False ++ ++ def openBacking(self): ++ if not self.target_open_with_backing: ++ result = self.vm.qmp('blockdev-add', node_name="backing", ++ driver="null-co") ++ self.assert_qmp(result, 'return', {}) ++ result = self.vm.qmp('blockdev-snapshot', node="backing", ++ overlay="target") ++ self.assert_qmp(result, 'return', {}) + + class TestCommit(BaseClass): + existing = False +diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out +index 4176bb9..4fd1c2d 100644 +--- a/tests/qemu-iotests/155.out ++++ b/tests/qemu-iotests/155.out +@@ -1,5 +1,5 @@ +-................... ++......................... + ---------------------------------------------------------------------- +-Ran 19 tests ++Ran 25 tests + + OK +-- +1.8.3.1 + diff --git a/kvm-iotests-Use-complete_and_wait-in-155.patch b/kvm-iotests-Use-complete_and_wait-in-155.patch new file mode 100644 index 0000000..38b41be --- /dev/null +++ b/kvm-iotests-Use-complete_and_wait-in-155.patch @@ -0,0 +1,50 @@ +From 872fbd32d06bda4aba3a7e67a95f76f62e475dbe Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:27 +0000 +Subject: [PATCH 07/20] iotests: Use complete_and_wait() in 155 + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-2-kwolf@redhat.com> +Patchwork-id: 94279 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/13] iotests: Use complete_and_wait() in 155 +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Max Reitz + +This way, we get to see errors during the completion phase. + +Signed-off-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200218103454.296704-14-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 6644d0e6192b36cdf2902c9774e1afb8ab2e7223) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/155 | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 +index e194859..d7ef257 100755 +--- a/tests/qemu-iotests/155 ++++ b/tests/qemu-iotests/155 +@@ -163,12 +163,7 @@ class MirrorBaseClass(BaseClass): + + self.assert_qmp(result, 'return', {}) + +- self.vm.event_wait('BLOCK_JOB_READY') +- +- result = self.vm.qmp('block-job-complete', device='mirror-job') +- self.assert_qmp(result, 'return', {}) +- +- self.vm.event_wait('BLOCK_JOB_COMPLETED') ++ self.complete_and_wait('mirror-job') + + def testFull(self): + self.runMirror('full') +-- +1.8.3.1 + diff --git a/kvm-iscsi-Drop-iscsi_co_create_opts.patch b/kvm-iscsi-Drop-iscsi_co_create_opts.patch new file mode 100644 index 0000000..a6d0baf --- /dev/null +++ b/kvm-iscsi-Drop-iscsi_co_create_opts.patch @@ -0,0 +1,113 @@ +From 58b7d33e1bc17b89103ceaa39f5722a69b35d810 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 11 Mar 2020 10:51:45 +0000 +Subject: [PATCH 04/20] iscsi: Drop iscsi_co_create_opts() + +RH-Author: Maxim Levitsky +Message-id: <20200311105147.13208-5-mlevitsk@redhat.com> +Patchwork-id: 94226 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] iscsi: Drop iscsi_co_create_opts() +Bugzilla: 1640894 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: John Snow +RH-Acked-by: Max Reitz + +From: Max Reitz + +The generic fallback implementation effectively does the same. + +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +Message-Id: <20200122164532.178040-5-mreitz@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit 80f0900905b555f00d644894c786b6d66ac2e00e) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block/iscsi.c | 56 -------------------------------------------------------- + 1 file changed, 56 deletions(-) + +diff --git a/block/iscsi.c b/block/iscsi.c +index cbd5729..b45da65 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -2164,58 +2164,6 @@ static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, + return 0; + } + +-static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts, +- Error **errp) +-{ +- int ret = 0; +- int64_t total_size = 0; +- BlockDriverState *bs; +- IscsiLun *iscsilun = NULL; +- QDict *bs_options; +- Error *local_err = NULL; +- +- bs = bdrv_new(); +- +- /* Read out options */ +- total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), +- BDRV_SECTOR_SIZE); +- bs->opaque = g_new0(struct IscsiLun, 1); +- iscsilun = bs->opaque; +- +- bs_options = qdict_new(); +- iscsi_parse_filename(filename, bs_options, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- ret = -EINVAL; +- } else { +- ret = iscsi_open(bs, bs_options, 0, NULL); +- } +- qobject_unref(bs_options); +- +- if (ret != 0) { +- goto out; +- } +- iscsi_detach_aio_context(bs); +- if (iscsilun->type != TYPE_DISK) { +- ret = -ENODEV; +- goto out; +- } +- if (bs->total_sectors < total_size) { +- ret = -ENOSPC; +- goto out; +- } +- +- ret = 0; +-out: +- if (iscsilun->iscsi != NULL) { +- iscsi_destroy_context(iscsilun->iscsi); +- } +- g_free(bs->opaque); +- bs->opaque = NULL; +- bdrv_unref(bs); +- return ret; +-} +- + static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) + { + IscsiLun *iscsilun = bs->opaque; +@@ -2486,8 +2434,6 @@ static BlockDriver bdrv_iscsi = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, +- .bdrv_co_create_opts = iscsi_co_create_opts, +- .create_opts = &iscsi_create_opts, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +@@ -2525,8 +2471,6 @@ static BlockDriver bdrv_iser = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, +- .bdrv_co_create_opts = iscsi_co_create_opts, +- .create_opts = &iscsi_create_opts, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +-- +1.8.3.1 + diff --git a/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch b/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch new file mode 100644 index 0000000..9c25b76 --- /dev/null +++ b/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch @@ -0,0 +1,64 @@ +From 428eb7260718b69b1f3f421d03bce10b8785fc49 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 13 Mar 2020 12:34:39 +0000 +Subject: [PATCH 19/20] qapi: Add '@allow-write-only-overlay' feature for + 'blockdev-snapshot' + +RH-Author: Kevin Wolf +Message-id: <20200313123439.10548-14-kwolf@redhat.com> +Patchwork-id: 94290 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 13/13] qapi: Add '@allow-write-only-overlay' feature for 'blockdev-snapshot' +Bugzilla: 1790482 1805143 +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Peter Krempa + +From: Peter Krempa + +Anounce that 'blockdev-snapshot' command's permissions allow changing +of the backing file if the 'consistent_read' permission is not required. + +This is useful for libvirt to allow late opening of the backing chain +during a blockdev-mirror. + +Signed-off-by: Peter Krempa +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-8-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c6bdc312f30d5c7326aa2fdca3e0f98c15eb541a) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + qapi/block-core.json | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index a1e85b0..a64ad81 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1541,6 +1541,12 @@ + # + # For the arguments, see the documentation of BlockdevSnapshot. + # ++# Features: ++# @allow-write-only-overlay: If present, the check whether this operation is safe ++# was relaxed so that it can be used to change ++# backing file of a destination of a blockdev-mirror. ++# (since 5.0) ++# + # Since: 2.5 + # + # Example: +@@ -1561,7 +1567,8 @@ + # + ## + { 'command': 'blockdev-snapshot', +- 'data': 'BlockdevSnapshot' } ++ 'data': 'BlockdevSnapshot', ++ 'features': [ 'allow-write-only-overlay' ] } + + ## + # @change-backing-file: +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 92fba75..fb07343 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 14%{?dist} +Release: 15%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -564,6 +564,59 @@ Patch213: kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch Patch214: kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch # For bz#1797064 - virtiofsd: Fixes Patch215: kvm-virtiofsd-Fix-xattr-operations.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch216: kvm-block-nbd-Fix-hang-in-.bdrv_close.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch217: kvm-block-Generic-file-creation-fallback.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch218: kvm-file-posix-Drop-hdev_co_create_opts.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch219: kvm-iscsi-Drop-iscsi_co_create_opts.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch220: kvm-iotests-Add-test-for-image-creation-fallback.patch +# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support +Patch221: kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch222: kvm-iotests-Use-complete_and_wait-in-155.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch223: kvm-block-Introduce-bdrv_reopen_commit_post-step.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch224: kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch225: kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch226: kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch227: kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch228: kvm-block-Make-bdrv_get_cumulative_perm-public.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch229: kvm-block-Relax-restrictions-for-blockdev-snapshot.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch230: kvm-iotests-Fix-run_job-with-use_log-False.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch231: kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch232: kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch233: kvm-iotests-Add-iothread-cases-to-155.patch +# For bz#1790482 - bitmaps in backing images can't be modified +# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror +Patch234: kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch +# For bz#1809380 - guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0. +Patch235: kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch BuildRequires: wget BuildRequires: rpm-build @@ -1497,6 +1550,36 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Mar 17 2020 Danilo Cesar Lemes de Paula - 4.2.0-15.el8 +- kvm-block-nbd-Fix-hang-in-.bdrv_close.patch [bz#1640894] +- kvm-block-Generic-file-creation-fallback.patch [bz#1640894] +- kvm-file-posix-Drop-hdev_co_create_opts.patch [bz#1640894] +- kvm-iscsi-Drop-iscsi_co_create_opts.patch [bz#1640894] +- kvm-iotests-Add-test-for-image-creation-fallback.patch [bz#1640894] +- kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch [bz#1640894] +- kvm-iotests-Use-complete_and_wait-in-155.patch [bz#1790482 bz#1805143] +- kvm-block-Introduce-bdrv_reopen_commit_post-step.patch [bz#1790482 bz#1805143] +- kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch [bz#1790482 bz#1805143] +- kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch [bz#1790482 bz#1805143] +- kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch [bz#1790482 bz#1805143] +- kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch [bz#1790482 bz#1805143] +- kvm-block-Make-bdrv_get_cumulative_perm-public.patch [bz#1790482 bz#1805143] +- kvm-block-Relax-restrictions-for-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Fix-run_job-with-use_log-False.patch [bz#1790482 bz#1805143] +- kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch [bz#1790482 bz#1805143] +- kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Add-iothread-cases-to-155.patch [bz#1790482 bz#1805143] +- kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch [bz#1790482 bz#1805143] +- kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch [bz#1809380] +- Resolves: bz#1640894 + (Fix generic file creation fallback for qemu-img nvme:// image creation support) +- Resolves: bz#1790482 + (bitmaps in backing images can't be modified) +- Resolves: bz#1805143 + (allow late/lazy opening of backing chain for shallow blockdev-mirror) +- Resolves: bz#1809380 + (guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0.) + * Wed Mar 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-14.el8 - kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch [bz#1782529] - kvm-migration-multifd-clean-pages-after-filling-packet.patch [bz#1738451] From 828ffba1b76957367f90b78b93a83b2be0a8d1e9 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 24 Mar 2020 01:04:55 +0000 Subject: [PATCH 071/195] * Tue Mar 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-16.el8 - kvm-migration-Rate-limit-inside-host-pages.patch [bz#1814336] - kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch [bz#1811670] - Resolves: bz#1811670 (Unneeded qemu-guest-agent dependency on pixman) - Resolves: bz#1814336 ([POWER9] QEMU migration-test triggers a kernel warning) --- ...do-not-make-qemu-ga-link-with-pixman.patch | 2463 +++++++++++++++++ ...gration-Rate-limit-inside-host-pages.patch | 172 ++ qemu-kvm.spec | 14 +- 3 files changed, 2648 insertions(+), 1 deletion(-) create mode 100644 kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch create mode 100644 kvm-migration-Rate-limit-inside-host-pages.patch diff --git a/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch b/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch new file mode 100644 index 0000000..5b1b170 --- /dev/null +++ b/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch @@ -0,0 +1,2463 @@ +From fc2d0dfe60b14992a9b67e7a18394ba6365dc5ed Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 18 Mar 2020 18:10:40 +0000 +Subject: [PATCH 2/2] build-sys: do not make qemu-ga link with pixman +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200318181040.256425-1-marcandre.lureau@redhat.com> +Patchwork-id: 94381 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] build-sys: do not make qemu-ga link with pixman +Bugzilla: 1811670 +RH-Acked-by: Markus Armbruster +RH-Acked-by: John Snow +RH-Acked-by: Daniel P. Berrange + +Since commit d52c454aadcdae74506f315ebf8b58bb79a05573 ("contrib: add +vhost-user-gpu"), qemu-ga is linking with pixman. + +This is because the Make-based build-system use a global namespace for +variables, and we rely on "main.o-libs" for different linking targets. + +Note: this kind of variable clashing is hard to fix or prevent +currently. meson should help, as declarations have a linear +dependency and doesn't rely so much on variables and clever tricks. + +Note2: we have a lot of main.c (or other duplicated names!) in +tree. Imho, it would be annoying and a bad workaroud to rename all +those to avoid conflicts like I did here. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 + +Signed-off-by: Marc-André Lureau +Message-Id: <20200311160923.882474-1-marcandre.lureau@redhat.com> +Signed-off-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=27330493 + +(cherry picked from commit 5b42bc5ce9ab4a3171819feea5042931817211fd) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + contrib/vhost-user-gpu/Makefile.objs | 6 +- + contrib/vhost-user-gpu/main.c | 1191 ------------------------------- + contrib/vhost-user-gpu/vhost-user-gpu.c | 1191 +++++++++++++++++++++++++++++++ + 3 files changed, 1194 insertions(+), 1194 deletions(-) + delete mode 100644 contrib/vhost-user-gpu/main.c + create mode 100644 contrib/vhost-user-gpu/vhost-user-gpu.c + +diff --git a/contrib/vhost-user-gpu/Makefile.objs b/contrib/vhost-user-gpu/Makefile.objs +index 6170c91..0929609 100644 +--- a/contrib/vhost-user-gpu/Makefile.objs ++++ b/contrib/vhost-user-gpu/Makefile.objs +@@ -1,7 +1,7 @@ +-vhost-user-gpu-obj-y = main.o virgl.o vugbm.o ++vhost-user-gpu-obj-y = vhost-user-gpu.o virgl.o vugbm.o + +-main.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) +-main.o-libs := $(PIXMAN_LIBS) ++vhost-user-gpu.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) ++vhost-user-gpu.o-libs := $(PIXMAN_LIBS) + + virgl.o-cflags := $(VIRGL_CFLAGS) $(GBM_CFLAGS) + virgl.o-libs := $(VIRGL_LIBS) +diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c +deleted file mode 100644 +index b45d201..0000000 +--- a/contrib/vhost-user-gpu/main.c ++++ /dev/null +@@ -1,1191 +0,0 @@ +-/* +- * Virtio vhost-user GPU Device +- * +- * Copyright Red Hat, Inc. 2013-2018 +- * +- * Authors: +- * Dave Airlie +- * Gerd Hoffmann +- * Marc-André Lureau +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- */ +-#include "qemu/osdep.h" +-#include "qemu/drm.h" +-#include "qapi/error.h" +-#include "qemu/sockets.h" +- +-#include +-#include +- +-#include "vugpu.h" +-#include "hw/virtio/virtio-gpu-bswap.h" +-#include "hw/virtio/virtio-gpu-pixman.h" +-#include "virgl.h" +-#include "vugbm.h" +- +-enum { +- VHOST_USER_GPU_MAX_QUEUES = 2, +-}; +- +-struct virtio_gpu_simple_resource { +- uint32_t resource_id; +- uint32_t width; +- uint32_t height; +- uint32_t format; +- struct iovec *iov; +- unsigned int iov_cnt; +- uint32_t scanout_bitmask; +- pixman_image_t *image; +- struct vugbm_buffer buffer; +- QTAILQ_ENTRY(virtio_gpu_simple_resource) next; +-}; +- +-static gboolean opt_print_caps; +-static int opt_fdnum = -1; +-static char *opt_socket_path; +-static char *opt_render_node; +-static gboolean opt_virgl; +- +-static void vg_handle_ctrl(VuDev *dev, int qidx); +- +-static const char * +-vg_cmd_to_string(int cmd) +-{ +-#define CMD(cmd) [cmd] = #cmd +- static const char *vg_cmd_str[] = { +- CMD(VIRTIO_GPU_UNDEFINED), +- +- /* 2d commands */ +- CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), +- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), +- CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), +- CMD(VIRTIO_GPU_CMD_SET_SCANOUT), +- CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), +- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), +- CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), +- CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), +- CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), +- CMD(VIRTIO_GPU_CMD_GET_CAPSET), +- +- /* 3d commands */ +- CMD(VIRTIO_GPU_CMD_CTX_CREATE), +- CMD(VIRTIO_GPU_CMD_CTX_DESTROY), +- CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), +- CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), +- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), +- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), +- CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), +- CMD(VIRTIO_GPU_CMD_SUBMIT_3D), +- +- /* cursor commands */ +- CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), +- CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), +- }; +-#undef REQ +- +- if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { +- return vg_cmd_str[cmd]; +- } else { +- return "unknown"; +- } +-} +- +-static int +-vg_sock_fd_read(int sock, void *buf, ssize_t buflen) +-{ +- int ret; +- +- do { +- ret = read(sock, buf, buflen); +- } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); +- +- g_warn_if_fail(ret == buflen); +- return ret; +-} +- +-static void +-vg_sock_fd_close(VuGpu *g) +-{ +- if (g->sock_fd >= 0) { +- close(g->sock_fd); +- g->sock_fd = -1; +- } +-} +- +-static gboolean +-source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) +-{ +- VuGpu *g = user_data; +- +- if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { +- return G_SOURCE_CONTINUE; +- } +- +- /* resume */ +- g->wait_ok = 0; +- vg_handle_ctrl(&g->dev.parent, 0); +- +- return G_SOURCE_REMOVE; +-} +- +-void +-vg_wait_ok(VuGpu *g) +-{ +- assert(g->wait_ok == 0); +- g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, +- source_wait_cb, g); +-} +- +-static int +-vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) +-{ +- ssize_t ret; +- struct iovec iov = { +- .iov_base = (void *)buf, +- .iov_len = buflen, +- }; +- struct msghdr msg = { +- .msg_iov = &iov, +- .msg_iovlen = 1, +- }; +- union { +- struct cmsghdr cmsghdr; +- char control[CMSG_SPACE(sizeof(int))]; +- } cmsgu; +- struct cmsghdr *cmsg; +- +- if (fd != -1) { +- msg.msg_control = cmsgu.control; +- msg.msg_controllen = sizeof(cmsgu.control); +- +- cmsg = CMSG_FIRSTHDR(&msg); +- cmsg->cmsg_len = CMSG_LEN(sizeof(int)); +- cmsg->cmsg_level = SOL_SOCKET; +- cmsg->cmsg_type = SCM_RIGHTS; +- +- *((int *)CMSG_DATA(cmsg)) = fd; +- } +- +- do { +- ret = sendmsg(sock, &msg, 0); +- } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); +- +- g_warn_if_fail(ret == buflen); +- return ret; +-} +- +-void +-vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) +-{ +- if (vg_sock_fd_write(vg->sock_fd, msg, +- VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { +- vg_sock_fd_close(vg); +- } +-} +- +-bool +-vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, +- gpointer payload) +-{ +- uint32_t req, flags, size; +- +- if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || +- vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || +- vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { +- goto err; +- } +- +- g_return_val_if_fail(req == expect_req, false); +- g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); +- g_return_val_if_fail(size == expect_size, false); +- +- if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { +- goto err; +- } +- +- return true; +- +-err: +- vg_sock_fd_close(g); +- return false; +-} +- +-static struct virtio_gpu_simple_resource * +-virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) +-{ +- struct virtio_gpu_simple_resource *res; +- +- QTAILQ_FOREACH(res, &g->reslist, next) { +- if (res->resource_id == resource_id) { +- return res; +- } +- } +- return NULL; +-} +- +-void +-vg_ctrl_response(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd, +- struct virtio_gpu_ctrl_hdr *resp, +- size_t resp_len) +-{ +- size_t s; +- +- if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { +- resp->flags |= VIRTIO_GPU_FLAG_FENCE; +- resp->fence_id = cmd->cmd_hdr.fence_id; +- resp->ctx_id = cmd->cmd_hdr.ctx_id; +- } +- virtio_gpu_ctrl_hdr_bswap(resp); +- s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); +- if (s != resp_len) { +- g_critical("%s: response size incorrect %zu vs %zu", +- __func__, s, resp_len); +- } +- vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); +- vu_queue_notify(&g->dev.parent, cmd->vq); +- cmd->finished = true; +-} +- +-void +-vg_ctrl_response_nodata(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd, +- enum virtio_gpu_ctrl_type type) +-{ +- struct virtio_gpu_ctrl_hdr resp = { +- .type = type, +- }; +- +- vg_ctrl_response(g, cmd, &resp, sizeof(resp)); +-} +- +-void +-vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_resp_display_info dpy_info = { {} }; +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_GET_DISPLAY_INFO, +- .size = 0, +- }; +- +- assert(vg->wait_ok == 0); +- +- vg_send_msg(vg, &msg, -1); +- if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { +- return; +- } +- +- vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); +-} +- +-static void +-vg_resource_create_2d(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- pixman_format_code_t pformat; +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_create_2d c2d; +- +- VUGPU_FILL_CMD(c2d); +- virtio_gpu_bswap_32(&c2d, sizeof(c2d)); +- +- if (c2d.resource_id == 0) { +- g_critical("%s: resource id 0 is not allowed", __func__); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- res = virtio_gpu_find_resource(g, c2d.resource_id); +- if (res) { +- g_critical("%s: resource already exists %d", __func__, c2d.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- res = g_new0(struct virtio_gpu_simple_resource, 1); +- res->width = c2d.width; +- res->height = c2d.height; +- res->format = c2d.format; +- res->resource_id = c2d.resource_id; +- +- pformat = virtio_gpu_get_pixman_format(c2d.format); +- if (!pformat) { +- g_critical("%s: host couldn't handle guest format %d", +- __func__, c2d.format); +- g_free(res); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); +- res->image = pixman_image_create_bits(pformat, +- c2d.width, +- c2d.height, +- (uint32_t *)res->buffer.mmap, +- res->buffer.stride); +- if (!res->image) { +- g_critical("%s: resource creation failed %d %d %d", +- __func__, c2d.resource_id, c2d.width, c2d.height); +- g_free(res); +- cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; +- return; +- } +- +- QTAILQ_INSERT_HEAD(&g->reslist, res, next); +-} +- +-static void +-vg_disable_scanout(VuGpu *g, int scanout_id) +-{ +- struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; +- struct virtio_gpu_simple_resource *res; +- +- if (scanout->resource_id == 0) { +- return; +- } +- +- res = virtio_gpu_find_resource(g, scanout->resource_id); +- if (res) { +- res->scanout_bitmask &= ~(1 << scanout_id); +- } +- +- scanout->width = 0; +- scanout->height = 0; +- +- if (g->sock_fd >= 0) { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_SCANOUT, +- .size = sizeof(VhostUserGpuScanout), +- .payload.scanout.scanout_id = scanout_id, +- }; +- vg_send_msg(g, &msg, -1); +- } +-} +- +-static void +-vg_resource_destroy(VuGpu *g, +- struct virtio_gpu_simple_resource *res) +-{ +- int i; +- +- if (res->scanout_bitmask) { +- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { +- if (res->scanout_bitmask & (1 << i)) { +- vg_disable_scanout(g, i); +- } +- } +- } +- +- vugbm_buffer_destroy(&res->buffer); +- pixman_image_unref(res->image); +- QTAILQ_REMOVE(&g->reslist, res, next); +- g_free(res); +-} +- +-static void +-vg_resource_unref(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_unref unref; +- +- VUGPU_FILL_CMD(unref); +- virtio_gpu_bswap_32(&unref, sizeof(unref)); +- +- res = virtio_gpu_find_resource(g, unref.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d", +- __func__, unref.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- vg_resource_destroy(g, res); +-} +- +-int +-vg_create_mapping_iov(VuGpu *g, +- struct virtio_gpu_resource_attach_backing *ab, +- struct virtio_gpu_ctrl_command *cmd, +- struct iovec **iov) +-{ +- struct virtio_gpu_mem_entry *ents; +- size_t esize, s; +- int i; +- +- if (ab->nr_entries > 16384) { +- g_critical("%s: nr_entries is too big (%d > 16384)", +- __func__, ab->nr_entries); +- return -1; +- } +- +- esize = sizeof(*ents) * ab->nr_entries; +- ents = g_malloc(esize); +- s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, +- sizeof(*ab), ents, esize); +- if (s != esize) { +- g_critical("%s: command data size incorrect %zu vs %zu", +- __func__, s, esize); +- g_free(ents); +- return -1; +- } +- +- *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); +- for (i = 0; i < ab->nr_entries; i++) { +- uint64_t len = ents[i].length; +- (*iov)[i].iov_len = ents[i].length; +- (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); +- if (!(*iov)[i].iov_base || len != ents[i].length) { +- g_critical("%s: resource %d element %d", +- __func__, ab->resource_id, i); +- g_free(*iov); +- g_free(ents); +- *iov = NULL; +- return -1; +- } +- } +- g_free(ents); +- return 0; +-} +- +-static void +-vg_resource_attach_backing(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_attach_backing ab; +- int ret; +- +- VUGPU_FILL_CMD(ab); +- virtio_gpu_bswap_32(&ab, sizeof(ab)); +- +- res = virtio_gpu_find_resource(g, ab.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d", +- __func__, ab.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); +- if (ret != 0) { +- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; +- return; +- } +- +- res->iov_cnt = ab.nr_entries; +-} +- +-static void +-vg_resource_detach_backing(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_detach_backing detach; +- +- VUGPU_FILL_CMD(detach); +- virtio_gpu_bswap_32(&detach, sizeof(detach)); +- +- res = virtio_gpu_find_resource(g, detach.resource_id); +- if (!res || !res->iov) { +- g_critical("%s: illegal resource specified %d", +- __func__, detach.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- g_free(res->iov); +- res->iov = NULL; +- res->iov_cnt = 0; +-} +- +-static void +-vg_transfer_to_host_2d(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- int h; +- uint32_t src_offset, dst_offset, stride; +- int bpp; +- pixman_format_code_t format; +- struct virtio_gpu_transfer_to_host_2d t2d; +- +- VUGPU_FILL_CMD(t2d); +- virtio_gpu_t2d_bswap(&t2d); +- +- res = virtio_gpu_find_resource(g, t2d.resource_id); +- if (!res || !res->iov) { +- g_critical("%s: illegal resource specified %d", +- __func__, t2d.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- if (t2d.r.x > res->width || +- t2d.r.y > res->height || +- t2d.r.width > res->width || +- t2d.r.height > res->height || +- t2d.r.x + t2d.r.width > res->width || +- t2d.r.y + t2d.r.height > res->height) { +- g_critical("%s: transfer bounds outside resource" +- " bounds for resource %d: %d %d %d %d vs %d %d", +- __func__, t2d.resource_id, t2d.r.x, t2d.r.y, +- t2d.r.width, t2d.r.height, res->width, res->height); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- +- format = pixman_image_get_format(res->image); +- bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; +- stride = pixman_image_get_stride(res->image); +- +- if (t2d.offset || t2d.r.x || t2d.r.y || +- t2d.r.width != pixman_image_get_width(res->image)) { +- void *img_data = pixman_image_get_data(res->image); +- for (h = 0; h < t2d.r.height; h++) { +- src_offset = t2d.offset + stride * h; +- dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); +- +- iov_to_buf(res->iov, res->iov_cnt, src_offset, +- img_data +- + dst_offset, t2d.r.width * bpp); +- } +- } else { +- iov_to_buf(res->iov, res->iov_cnt, 0, +- pixman_image_get_data(res->image), +- pixman_image_get_stride(res->image) +- * pixman_image_get_height(res->image)); +- } +-} +- +-static void +-vg_set_scanout(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res, *ores; +- struct virtio_gpu_scanout *scanout; +- struct virtio_gpu_set_scanout ss; +- int fd; +- +- VUGPU_FILL_CMD(ss); +- virtio_gpu_bswap_32(&ss, sizeof(ss)); +- +- if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { +- g_critical("%s: illegal scanout id specified %d", +- __func__, ss.scanout_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; +- return; +- } +- +- if (ss.resource_id == 0) { +- vg_disable_scanout(g, ss.scanout_id); +- return; +- } +- +- /* create a surface for this scanout */ +- res = virtio_gpu_find_resource(g, ss.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d", +- __func__, ss.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- if (ss.r.x > res->width || +- ss.r.y > res->height || +- ss.r.width > res->width || +- ss.r.height > res->height || +- ss.r.x + ss.r.width > res->width || +- ss.r.y + ss.r.height > res->height) { +- g_critical("%s: illegal scanout %d bounds for" +- " resource %d, (%d,%d)+%d,%d vs %d %d", +- __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, +- ss.r.width, ss.r.height, res->width, res->height); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- +- scanout = &g->scanout[ss.scanout_id]; +- +- ores = virtio_gpu_find_resource(g, scanout->resource_id); +- if (ores) { +- ores->scanout_bitmask &= ~(1 << ss.scanout_id); +- } +- +- res->scanout_bitmask |= (1 << ss.scanout_id); +- scanout->resource_id = ss.resource_id; +- scanout->x = ss.r.x; +- scanout->y = ss.r.y; +- scanout->width = ss.r.width; +- scanout->height = ss.r.height; +- +- struct vugbm_buffer *buffer = &res->buffer; +- +- if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_DMABUF_SCANOUT, +- .size = sizeof(VhostUserGpuDMABUFScanout), +- .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { +- .scanout_id = ss.scanout_id, +- .x = ss.r.x, +- .y = ss.r.y, +- .width = ss.r.width, +- .height = ss.r.height, +- .fd_width = buffer->width, +- .fd_height = buffer->height, +- .fd_stride = buffer->stride, +- .fd_drm_fourcc = buffer->format +- } +- }; +- +- if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { +- vg_send_msg(g, &msg, fd); +- close(fd); +- } +- } else { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_SCANOUT, +- .size = sizeof(VhostUserGpuScanout), +- .payload.scanout = (VhostUserGpuScanout) { +- .scanout_id = ss.scanout_id, +- .width = scanout->width, +- .height = scanout->height +- } +- }; +- vg_send_msg(g, &msg, -1); +- } +-} +- +-static void +-vg_resource_flush(VuGpu *g, +- struct virtio_gpu_ctrl_command *cmd) +-{ +- struct virtio_gpu_simple_resource *res; +- struct virtio_gpu_resource_flush rf; +- pixman_region16_t flush_region; +- int i; +- +- VUGPU_FILL_CMD(rf); +- virtio_gpu_bswap_32(&rf, sizeof(rf)); +- +- res = virtio_gpu_find_resource(g, rf.resource_id); +- if (!res) { +- g_critical("%s: illegal resource specified %d\n", +- __func__, rf.resource_id); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +- return; +- } +- +- if (rf.r.x > res->width || +- rf.r.y > res->height || +- rf.r.width > res->width || +- rf.r.height > res->height || +- rf.r.x + rf.r.width > res->width || +- rf.r.y + rf.r.height > res->height) { +- g_critical("%s: flush bounds outside resource" +- " bounds for resource %d: %d %d %d %d vs %d %d\n", +- __func__, rf.resource_id, rf.r.x, rf.r.y, +- rf.r.width, rf.r.height, res->width, res->height); +- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; +- } +- +- pixman_region_init_rect(&flush_region, +- rf.r.x, rf.r.y, rf.r.width, rf.r.height); +- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { +- struct virtio_gpu_scanout *scanout; +- pixman_region16_t region, finalregion; +- pixman_box16_t *extents; +- +- if (!(res->scanout_bitmask & (1 << i))) { +- continue; +- } +- scanout = &g->scanout[i]; +- +- pixman_region_init(&finalregion); +- pixman_region_init_rect(®ion, scanout->x, scanout->y, +- scanout->width, scanout->height); +- +- pixman_region_intersect(&finalregion, &flush_region, ®ion); +- +- extents = pixman_region_extents(&finalregion); +- size_t width = extents->x2 - extents->x1; +- size_t height = extents->y2 - extents->y1; +- +- if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { +- VhostUserGpuMsg vmsg = { +- .request = VHOST_USER_GPU_DMABUF_UPDATE, +- .size = sizeof(VhostUserGpuUpdate), +- .payload.update = (VhostUserGpuUpdate) { +- .scanout_id = i, +- .x = extents->x1, +- .y = extents->y1, +- .width = width, +- .height = height, +- } +- }; +- vg_send_msg(g, &vmsg, -1); +- vg_wait_ok(g); +- } else { +- size_t bpp = +- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; +- size_t size = width * height * bpp; +- +- void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + +- sizeof(VhostUserGpuUpdate) + size); +- VhostUserGpuMsg *msg = p; +- msg->request = VHOST_USER_GPU_UPDATE; +- msg->size = sizeof(VhostUserGpuUpdate) + size; +- msg->payload.update = (VhostUserGpuUpdate) { +- .scanout_id = i, +- .x = extents->x1, +- .y = extents->y1, +- .width = width, +- .height = height, +- }; +- pixman_image_t *i = +- pixman_image_create_bits(pixman_image_get_format(res->image), +- msg->payload.update.width, +- msg->payload.update.height, +- p + offsetof(VhostUserGpuMsg, +- payload.update.data), +- width * bpp); +- pixman_image_composite(PIXMAN_OP_SRC, +- res->image, NULL, i, +- extents->x1, extents->y1, +- 0, 0, 0, 0, +- width, height); +- pixman_image_unref(i); +- vg_send_msg(g, msg, -1); +- g_free(msg); +- } +- pixman_region_fini(®ion); +- pixman_region_fini(&finalregion); +- } +- pixman_region_fini(&flush_region); +-} +- +-static void +-vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +-{ +- switch (cmd->cmd_hdr.type) { +- case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: +- vg_get_display_info(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: +- vg_resource_create_2d(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_UNREF: +- vg_resource_unref(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_FLUSH: +- vg_resource_flush(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: +- vg_transfer_to_host_2d(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_SET_SCANOUT: +- vg_set_scanout(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: +- vg_resource_attach_backing(vg, cmd); +- break; +- case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: +- vg_resource_detach_backing(vg, cmd); +- break; +- /* case VIRTIO_GPU_CMD_GET_EDID: */ +- /* break */ +- default: +- g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); +- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; +- break; +- } +- if (!cmd->finished) { +- vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : +- VIRTIO_GPU_RESP_OK_NODATA); +- } +-} +- +-static void +-vg_handle_ctrl(VuDev *dev, int qidx) +-{ +- VuGpu *vg = container_of(dev, VuGpu, dev.parent); +- VuVirtq *vq = vu_get_queue(dev, qidx); +- struct virtio_gpu_ctrl_command *cmd = NULL; +- size_t len; +- +- for (;;) { +- if (vg->wait_ok != 0) { +- return; +- } +- +- cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); +- if (!cmd) { +- break; +- } +- cmd->vq = vq; +- cmd->error = 0; +- cmd->finished = false; +- +- len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, +- 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); +- if (len != sizeof(cmd->cmd_hdr)) { +- g_warning("%s: command size incorrect %zu vs %zu\n", +- __func__, len, sizeof(cmd->cmd_hdr)); +- } +- +- virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); +- g_debug("%d %s\n", cmd->cmd_hdr.type, +- vg_cmd_to_string(cmd->cmd_hdr.type)); +- +- if (vg->virgl) { +- vg_virgl_process_cmd(vg, cmd); +- } else { +- vg_process_cmd(vg, cmd); +- } +- +- if (!cmd->finished) { +- QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); +- vg->inflight++; +- } else { +- g_free(cmd); +- } +- } +-} +- +-static void +-update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) +-{ +- struct virtio_gpu_simple_resource *res; +- +- res = virtio_gpu_find_resource(g, resource_id); +- g_return_if_fail(res != NULL); +- g_return_if_fail(pixman_image_get_width(res->image) == 64); +- g_return_if_fail(pixman_image_get_height(res->image) == 64); +- g_return_if_fail( +- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); +- +- memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); +-} +- +-static void +-vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) +-{ +- bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; +- +- g_debug("%s move:%d\n", G_STRFUNC, move); +- +- if (move) { +- VhostUserGpuMsg msg = { +- .request = cursor->resource_id ? +- VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, +- .size = sizeof(VhostUserGpuCursorPos), +- .payload.cursor_pos = { +- .scanout_id = cursor->pos.scanout_id, +- .x = cursor->pos.x, +- .y = cursor->pos.y, +- } +- }; +- vg_send_msg(g, &msg, -1); +- } else { +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_CURSOR_UPDATE, +- .size = sizeof(VhostUserGpuCursorUpdate), +- .payload.cursor_update = { +- .pos = { +- .scanout_id = cursor->pos.scanout_id, +- .x = cursor->pos.x, +- .y = cursor->pos.y, +- }, +- .hot_x = cursor->hot_x, +- .hot_y = cursor->hot_y, +- } +- }; +- if (g->virgl) { +- vg_virgl_update_cursor_data(g, cursor->resource_id, +- msg.payload.cursor_update.data); +- } else { +- update_cursor_data_simple(g, cursor->resource_id, +- msg.payload.cursor_update.data); +- } +- vg_send_msg(g, &msg, -1); +- } +-} +- +-static void +-vg_handle_cursor(VuDev *dev, int qidx) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- VuVirtq *vq = vu_get_queue(dev, qidx); +- VuVirtqElement *elem; +- size_t len; +- struct virtio_gpu_update_cursor cursor; +- +- for (;;) { +- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); +- if (!elem) { +- break; +- } +- g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); +- +- len = iov_to_buf(elem->out_sg, elem->out_num, +- 0, &cursor, sizeof(cursor)); +- if (len != sizeof(cursor)) { +- g_warning("%s: cursor size incorrect %zu vs %zu\n", +- __func__, len, sizeof(cursor)); +- } else { +- virtio_gpu_bswap_32(&cursor, sizeof(cursor)); +- vg_process_cursor_cmd(g, &cursor); +- } +- vu_queue_push(dev, vq, elem, 0); +- vu_queue_notify(dev, vq); +- g_free(elem); +- } +-} +- +-static void +-vg_panic(VuDev *dev, const char *msg) +-{ +- g_critical("%s\n", msg); +- exit(1); +-} +- +-static void +-vg_queue_set_started(VuDev *dev, int qidx, bool started) +-{ +- VuVirtq *vq = vu_get_queue(dev, qidx); +- +- g_debug("queue started %d:%d\n", qidx, started); +- +- switch (qidx) { +- case 0: +- vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); +- break; +- case 1: +- vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); +- break; +- default: +- break; +- } +-} +- +-static void +-set_gpu_protocol_features(VuGpu *g) +-{ +- uint64_t u64; +- VhostUserGpuMsg msg = { +- .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES +- }; +- +- assert(g->wait_ok == 0); +- vg_send_msg(g, &msg, -1); +- if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { +- return; +- } +- +- msg = (VhostUserGpuMsg) { +- .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, +- .size = sizeof(uint64_t), +- .payload.u64 = 0 +- }; +- vg_send_msg(g, &msg, -1); +-} +- +-static int +-vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- +- switch (msg->request) { +- case VHOST_USER_GPU_SET_SOCKET: { +- g_return_val_if_fail(msg->fd_num == 1, 1); +- g_return_val_if_fail(g->sock_fd == -1, 1); +- g->sock_fd = msg->fds[0]; +- set_gpu_protocol_features(g); +- return 1; +- } +- default: +- return 0; +- } +- +- return 0; +-} +- +-static uint64_t +-vg_get_features(VuDev *dev) +-{ +- uint64_t features = 0; +- +- if (opt_virgl) { +- features |= 1 << VIRTIO_GPU_F_VIRGL; +- } +- +- return features; +-} +- +-static void +-vg_set_features(VuDev *dev, uint64_t features) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); +- +- if (virgl && !g->virgl_inited) { +- if (!vg_virgl_init(g)) { +- vg_panic(dev, "Failed to initialize virgl"); +- } +- g->virgl_inited = true; +- } +- +- g->virgl = virgl; +-} +- +-static int +-vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- +- g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); +- +- if (opt_virgl) { +- g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); +- } +- +- memcpy(config, &g->virtio_config, len); +- +- return 0; +-} +- +-static int +-vg_set_config(VuDev *dev, const uint8_t *data, +- uint32_t offset, uint32_t size, +- uint32_t flags) +-{ +- VuGpu *g = container_of(dev, VuGpu, dev.parent); +- struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; +- +- if (config->events_clear) { +- g->virtio_config.events_read &= ~config->events_clear; +- } +- +- return 0; +-} +- +-static const VuDevIface vuiface = { +- .set_features = vg_set_features, +- .get_features = vg_get_features, +- .queue_set_started = vg_queue_set_started, +- .process_msg = vg_process_msg, +- .get_config = vg_get_config, +- .set_config = vg_set_config, +-}; +- +-static void +-vg_destroy(VuGpu *g) +-{ +- struct virtio_gpu_simple_resource *res, *tmp; +- +- vug_deinit(&g->dev); +- +- vg_sock_fd_close(g); +- +- QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { +- vg_resource_destroy(g, res); +- } +- +- vugbm_device_destroy(&g->gdev); +-} +- +-static GOptionEntry entries[] = { +- { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, +- "Print capabilities", NULL }, +- { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, +- "Use inherited fd socket", "FDNUM" }, +- { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, +- "Use UNIX socket path", "PATH" }, +- { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, +- "Specify DRM render node", "PATH" }, +- { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, +- "Turn virgl rendering on", NULL }, +- { NULL, } +-}; +- +-int +-main(int argc, char *argv[]) +-{ +- GOptionContext *context; +- GError *error = NULL; +- GMainLoop *loop = NULL; +- int fd; +- VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; +- +- QTAILQ_INIT(&g.reslist); +- QTAILQ_INIT(&g.fenceq); +- +- context = g_option_context_new("QEMU vhost-user-gpu"); +- g_option_context_add_main_entries(context, entries, NULL); +- if (!g_option_context_parse(context, &argc, &argv, &error)) { +- g_printerr("Option parsing failed: %s\n", error->message); +- exit(EXIT_FAILURE); +- } +- g_option_context_free(context); +- +- if (opt_print_caps) { +- g_print("{\n"); +- g_print(" \"type\": \"gpu\",\n"); +- g_print(" \"features\": [\n"); +- g_print(" \"render-node\",\n"); +- g_print(" \"virgl\"\n"); +- g_print(" ]\n"); +- g_print("}\n"); +- exit(EXIT_SUCCESS); +- } +- +- g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); +- if (opt_render_node && g.drm_rnode_fd == -1) { +- g_printerr("Failed to open DRM rendernode.\n"); +- exit(EXIT_FAILURE); +- } +- +- if (g.drm_rnode_fd >= 0) { +- if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { +- g_warning("Failed to init DRM device, using fallback path"); +- } +- } +- +- if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { +- g_printerr("Please specify either --fd or --socket-path\n"); +- exit(EXIT_FAILURE); +- } +- +- if (opt_socket_path) { +- int lsock = unix_listen(opt_socket_path, &error_fatal); +- if (lsock < 0) { +- g_printerr("Failed to listen on %s.\n", opt_socket_path); +- exit(EXIT_FAILURE); +- } +- fd = accept(lsock, NULL, NULL); +- close(lsock); +- } else { +- fd = opt_fdnum; +- } +- if (fd == -1) { +- g_printerr("Invalid vhost-user socket.\n"); +- exit(EXIT_FAILURE); +- } +- +- if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { +- g_printerr("Failed to initialize libvhost-user-glib.\n"); +- exit(EXIT_FAILURE); +- } +- +- loop = g_main_loop_new(NULL, FALSE); +- g_main_loop_run(loop); +- g_main_loop_unref(loop); +- +- vg_destroy(&g); +- if (g.drm_rnode_fd >= 0) { +- close(g.drm_rnode_fd); +- } +- +- return 0; +-} +diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c +new file mode 100644 +index 0000000..b45d201 +--- /dev/null ++++ b/contrib/vhost-user-gpu/vhost-user-gpu.c +@@ -0,0 +1,1191 @@ ++/* ++ * Virtio vhost-user GPU Device ++ * ++ * Copyright Red Hat, Inc. 2013-2018 ++ * ++ * Authors: ++ * Dave Airlie ++ * Gerd Hoffmann ++ * Marc-André Lureau ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/drm.h" ++#include "qapi/error.h" ++#include "qemu/sockets.h" ++ ++#include ++#include ++ ++#include "vugpu.h" ++#include "hw/virtio/virtio-gpu-bswap.h" ++#include "hw/virtio/virtio-gpu-pixman.h" ++#include "virgl.h" ++#include "vugbm.h" ++ ++enum { ++ VHOST_USER_GPU_MAX_QUEUES = 2, ++}; ++ ++struct virtio_gpu_simple_resource { ++ uint32_t resource_id; ++ uint32_t width; ++ uint32_t height; ++ uint32_t format; ++ struct iovec *iov; ++ unsigned int iov_cnt; ++ uint32_t scanout_bitmask; ++ pixman_image_t *image; ++ struct vugbm_buffer buffer; ++ QTAILQ_ENTRY(virtio_gpu_simple_resource) next; ++}; ++ ++static gboolean opt_print_caps; ++static int opt_fdnum = -1; ++static char *opt_socket_path; ++static char *opt_render_node; ++static gboolean opt_virgl; ++ ++static void vg_handle_ctrl(VuDev *dev, int qidx); ++ ++static const char * ++vg_cmd_to_string(int cmd) ++{ ++#define CMD(cmd) [cmd] = #cmd ++ static const char *vg_cmd_str[] = { ++ CMD(VIRTIO_GPU_UNDEFINED), ++ ++ /* 2d commands */ ++ CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), ++ CMD(VIRTIO_GPU_CMD_SET_SCANOUT), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), ++ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), ++ CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), ++ CMD(VIRTIO_GPU_CMD_GET_CAPSET), ++ ++ /* 3d commands */ ++ CMD(VIRTIO_GPU_CMD_CTX_CREATE), ++ CMD(VIRTIO_GPU_CMD_CTX_DESTROY), ++ CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), ++ CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), ++ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), ++ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), ++ CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), ++ CMD(VIRTIO_GPU_CMD_SUBMIT_3D), ++ ++ /* cursor commands */ ++ CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), ++ CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), ++ }; ++#undef REQ ++ ++ if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { ++ return vg_cmd_str[cmd]; ++ } else { ++ return "unknown"; ++ } ++} ++ ++static int ++vg_sock_fd_read(int sock, void *buf, ssize_t buflen) ++{ ++ int ret; ++ ++ do { ++ ret = read(sock, buf, buflen); ++ } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); ++ ++ g_warn_if_fail(ret == buflen); ++ return ret; ++} ++ ++static void ++vg_sock_fd_close(VuGpu *g) ++{ ++ if (g->sock_fd >= 0) { ++ close(g->sock_fd); ++ g->sock_fd = -1; ++ } ++} ++ ++static gboolean ++source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) ++{ ++ VuGpu *g = user_data; ++ ++ if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { ++ return G_SOURCE_CONTINUE; ++ } ++ ++ /* resume */ ++ g->wait_ok = 0; ++ vg_handle_ctrl(&g->dev.parent, 0); ++ ++ return G_SOURCE_REMOVE; ++} ++ ++void ++vg_wait_ok(VuGpu *g) ++{ ++ assert(g->wait_ok == 0); ++ g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, ++ source_wait_cb, g); ++} ++ ++static int ++vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) ++{ ++ ssize_t ret; ++ struct iovec iov = { ++ .iov_base = (void *)buf, ++ .iov_len = buflen, ++ }; ++ struct msghdr msg = { ++ .msg_iov = &iov, ++ .msg_iovlen = 1, ++ }; ++ union { ++ struct cmsghdr cmsghdr; ++ char control[CMSG_SPACE(sizeof(int))]; ++ } cmsgu; ++ struct cmsghdr *cmsg; ++ ++ if (fd != -1) { ++ msg.msg_control = cmsgu.control; ++ msg.msg_controllen = sizeof(cmsgu.control); ++ ++ cmsg = CMSG_FIRSTHDR(&msg); ++ cmsg->cmsg_len = CMSG_LEN(sizeof(int)); ++ cmsg->cmsg_level = SOL_SOCKET; ++ cmsg->cmsg_type = SCM_RIGHTS; ++ ++ *((int *)CMSG_DATA(cmsg)) = fd; ++ } ++ ++ do { ++ ret = sendmsg(sock, &msg, 0); ++ } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); ++ ++ g_warn_if_fail(ret == buflen); ++ return ret; ++} ++ ++void ++vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) ++{ ++ if (vg_sock_fd_write(vg->sock_fd, msg, ++ VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { ++ vg_sock_fd_close(vg); ++ } ++} ++ ++bool ++vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, ++ gpointer payload) ++{ ++ uint32_t req, flags, size; ++ ++ if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || ++ vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || ++ vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { ++ goto err; ++ } ++ ++ g_return_val_if_fail(req == expect_req, false); ++ g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); ++ g_return_val_if_fail(size == expect_size, false); ++ ++ if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { ++ goto err; ++ } ++ ++ return true; ++ ++err: ++ vg_sock_fd_close(g); ++ return false; ++} ++ ++static struct virtio_gpu_simple_resource * ++virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) ++{ ++ struct virtio_gpu_simple_resource *res; ++ ++ QTAILQ_FOREACH(res, &g->reslist, next) { ++ if (res->resource_id == resource_id) { ++ return res; ++ } ++ } ++ return NULL; ++} ++ ++void ++vg_ctrl_response(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd, ++ struct virtio_gpu_ctrl_hdr *resp, ++ size_t resp_len) ++{ ++ size_t s; ++ ++ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { ++ resp->flags |= VIRTIO_GPU_FLAG_FENCE; ++ resp->fence_id = cmd->cmd_hdr.fence_id; ++ resp->ctx_id = cmd->cmd_hdr.ctx_id; ++ } ++ virtio_gpu_ctrl_hdr_bswap(resp); ++ s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); ++ if (s != resp_len) { ++ g_critical("%s: response size incorrect %zu vs %zu", ++ __func__, s, resp_len); ++ } ++ vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); ++ vu_queue_notify(&g->dev.parent, cmd->vq); ++ cmd->finished = true; ++} ++ ++void ++vg_ctrl_response_nodata(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd, ++ enum virtio_gpu_ctrl_type type) ++{ ++ struct virtio_gpu_ctrl_hdr resp = { ++ .type = type, ++ }; ++ ++ vg_ctrl_response(g, cmd, &resp, sizeof(resp)); ++} ++ ++void ++vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_resp_display_info dpy_info = { {} }; ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_GET_DISPLAY_INFO, ++ .size = 0, ++ }; ++ ++ assert(vg->wait_ok == 0); ++ ++ vg_send_msg(vg, &msg, -1); ++ if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { ++ return; ++ } ++ ++ vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); ++} ++ ++static void ++vg_resource_create_2d(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ pixman_format_code_t pformat; ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_create_2d c2d; ++ ++ VUGPU_FILL_CMD(c2d); ++ virtio_gpu_bswap_32(&c2d, sizeof(c2d)); ++ ++ if (c2d.resource_id == 0) { ++ g_critical("%s: resource id 0 is not allowed", __func__); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ res = virtio_gpu_find_resource(g, c2d.resource_id); ++ if (res) { ++ g_critical("%s: resource already exists %d", __func__, c2d.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ res = g_new0(struct virtio_gpu_simple_resource, 1); ++ res->width = c2d.width; ++ res->height = c2d.height; ++ res->format = c2d.format; ++ res->resource_id = c2d.resource_id; ++ ++ pformat = virtio_gpu_get_pixman_format(c2d.format); ++ if (!pformat) { ++ g_critical("%s: host couldn't handle guest format %d", ++ __func__, c2d.format); ++ g_free(res); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); ++ res->image = pixman_image_create_bits(pformat, ++ c2d.width, ++ c2d.height, ++ (uint32_t *)res->buffer.mmap, ++ res->buffer.stride); ++ if (!res->image) { ++ g_critical("%s: resource creation failed %d %d %d", ++ __func__, c2d.resource_id, c2d.width, c2d.height); ++ g_free(res); ++ cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ QTAILQ_INSERT_HEAD(&g->reslist, res, next); ++} ++ ++static void ++vg_disable_scanout(VuGpu *g, int scanout_id) ++{ ++ struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; ++ struct virtio_gpu_simple_resource *res; ++ ++ if (scanout->resource_id == 0) { ++ return; ++ } ++ ++ res = virtio_gpu_find_resource(g, scanout->resource_id); ++ if (res) { ++ res->scanout_bitmask &= ~(1 << scanout_id); ++ } ++ ++ scanout->width = 0; ++ scanout->height = 0; ++ ++ if (g->sock_fd >= 0) { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_SCANOUT, ++ .size = sizeof(VhostUserGpuScanout), ++ .payload.scanout.scanout_id = scanout_id, ++ }; ++ vg_send_msg(g, &msg, -1); ++ } ++} ++ ++static void ++vg_resource_destroy(VuGpu *g, ++ struct virtio_gpu_simple_resource *res) ++{ ++ int i; ++ ++ if (res->scanout_bitmask) { ++ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { ++ if (res->scanout_bitmask & (1 << i)) { ++ vg_disable_scanout(g, i); ++ } ++ } ++ } ++ ++ vugbm_buffer_destroy(&res->buffer); ++ pixman_image_unref(res->image); ++ QTAILQ_REMOVE(&g->reslist, res, next); ++ g_free(res); ++} ++ ++static void ++vg_resource_unref(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_unref unref; ++ ++ VUGPU_FILL_CMD(unref); ++ virtio_gpu_bswap_32(&unref, sizeof(unref)); ++ ++ res = virtio_gpu_find_resource(g, unref.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, unref.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ vg_resource_destroy(g, res); ++} ++ ++int ++vg_create_mapping_iov(VuGpu *g, ++ struct virtio_gpu_resource_attach_backing *ab, ++ struct virtio_gpu_ctrl_command *cmd, ++ struct iovec **iov) ++{ ++ struct virtio_gpu_mem_entry *ents; ++ size_t esize, s; ++ int i; ++ ++ if (ab->nr_entries > 16384) { ++ g_critical("%s: nr_entries is too big (%d > 16384)", ++ __func__, ab->nr_entries); ++ return -1; ++ } ++ ++ esize = sizeof(*ents) * ab->nr_entries; ++ ents = g_malloc(esize); ++ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, ++ sizeof(*ab), ents, esize); ++ if (s != esize) { ++ g_critical("%s: command data size incorrect %zu vs %zu", ++ __func__, s, esize); ++ g_free(ents); ++ return -1; ++ } ++ ++ *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); ++ for (i = 0; i < ab->nr_entries; i++) { ++ uint64_t len = ents[i].length; ++ (*iov)[i].iov_len = ents[i].length; ++ (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); ++ if (!(*iov)[i].iov_base || len != ents[i].length) { ++ g_critical("%s: resource %d element %d", ++ __func__, ab->resource_id, i); ++ g_free(*iov); ++ g_free(ents); ++ *iov = NULL; ++ return -1; ++ } ++ } ++ g_free(ents); ++ return 0; ++} ++ ++static void ++vg_resource_attach_backing(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_attach_backing ab; ++ int ret; ++ ++ VUGPU_FILL_CMD(ab); ++ virtio_gpu_bswap_32(&ab, sizeof(ab)); ++ ++ res = virtio_gpu_find_resource(g, ab.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, ab.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); ++ if (ret != 0) { ++ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; ++ return; ++ } ++ ++ res->iov_cnt = ab.nr_entries; ++} ++ ++static void ++vg_resource_detach_backing(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_detach_backing detach; ++ ++ VUGPU_FILL_CMD(detach); ++ virtio_gpu_bswap_32(&detach, sizeof(detach)); ++ ++ res = virtio_gpu_find_resource(g, detach.resource_id); ++ if (!res || !res->iov) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, detach.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ g_free(res->iov); ++ res->iov = NULL; ++ res->iov_cnt = 0; ++} ++ ++static void ++vg_transfer_to_host_2d(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ int h; ++ uint32_t src_offset, dst_offset, stride; ++ int bpp; ++ pixman_format_code_t format; ++ struct virtio_gpu_transfer_to_host_2d t2d; ++ ++ VUGPU_FILL_CMD(t2d); ++ virtio_gpu_t2d_bswap(&t2d); ++ ++ res = virtio_gpu_find_resource(g, t2d.resource_id); ++ if (!res || !res->iov) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, t2d.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ if (t2d.r.x > res->width || ++ t2d.r.y > res->height || ++ t2d.r.width > res->width || ++ t2d.r.height > res->height || ++ t2d.r.x + t2d.r.width > res->width || ++ t2d.r.y + t2d.r.height > res->height) { ++ g_critical("%s: transfer bounds outside resource" ++ " bounds for resource %d: %d %d %d %d vs %d %d", ++ __func__, t2d.resource_id, t2d.r.x, t2d.r.y, ++ t2d.r.width, t2d.r.height, res->width, res->height); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ ++ format = pixman_image_get_format(res->image); ++ bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; ++ stride = pixman_image_get_stride(res->image); ++ ++ if (t2d.offset || t2d.r.x || t2d.r.y || ++ t2d.r.width != pixman_image_get_width(res->image)) { ++ void *img_data = pixman_image_get_data(res->image); ++ for (h = 0; h < t2d.r.height; h++) { ++ src_offset = t2d.offset + stride * h; ++ dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); ++ ++ iov_to_buf(res->iov, res->iov_cnt, src_offset, ++ img_data ++ + dst_offset, t2d.r.width * bpp); ++ } ++ } else { ++ iov_to_buf(res->iov, res->iov_cnt, 0, ++ pixman_image_get_data(res->image), ++ pixman_image_get_stride(res->image) ++ * pixman_image_get_height(res->image)); ++ } ++} ++ ++static void ++vg_set_scanout(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res, *ores; ++ struct virtio_gpu_scanout *scanout; ++ struct virtio_gpu_set_scanout ss; ++ int fd; ++ ++ VUGPU_FILL_CMD(ss); ++ virtio_gpu_bswap_32(&ss, sizeof(ss)); ++ ++ if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { ++ g_critical("%s: illegal scanout id specified %d", ++ __func__, ss.scanout_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; ++ return; ++ } ++ ++ if (ss.resource_id == 0) { ++ vg_disable_scanout(g, ss.scanout_id); ++ return; ++ } ++ ++ /* create a surface for this scanout */ ++ res = virtio_gpu_find_resource(g, ss.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d", ++ __func__, ss.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ if (ss.r.x > res->width || ++ ss.r.y > res->height || ++ ss.r.width > res->width || ++ ss.r.height > res->height || ++ ss.r.x + ss.r.width > res->width || ++ ss.r.y + ss.r.height > res->height) { ++ g_critical("%s: illegal scanout %d bounds for" ++ " resource %d, (%d,%d)+%d,%d vs %d %d", ++ __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, ++ ss.r.width, ss.r.height, res->width, res->height); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ ++ scanout = &g->scanout[ss.scanout_id]; ++ ++ ores = virtio_gpu_find_resource(g, scanout->resource_id); ++ if (ores) { ++ ores->scanout_bitmask &= ~(1 << ss.scanout_id); ++ } ++ ++ res->scanout_bitmask |= (1 << ss.scanout_id); ++ scanout->resource_id = ss.resource_id; ++ scanout->x = ss.r.x; ++ scanout->y = ss.r.y; ++ scanout->width = ss.r.width; ++ scanout->height = ss.r.height; ++ ++ struct vugbm_buffer *buffer = &res->buffer; ++ ++ if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_DMABUF_SCANOUT, ++ .size = sizeof(VhostUserGpuDMABUFScanout), ++ .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { ++ .scanout_id = ss.scanout_id, ++ .x = ss.r.x, ++ .y = ss.r.y, ++ .width = ss.r.width, ++ .height = ss.r.height, ++ .fd_width = buffer->width, ++ .fd_height = buffer->height, ++ .fd_stride = buffer->stride, ++ .fd_drm_fourcc = buffer->format ++ } ++ }; ++ ++ if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { ++ vg_send_msg(g, &msg, fd); ++ close(fd); ++ } ++ } else { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_SCANOUT, ++ .size = sizeof(VhostUserGpuScanout), ++ .payload.scanout = (VhostUserGpuScanout) { ++ .scanout_id = ss.scanout_id, ++ .width = scanout->width, ++ .height = scanout->height ++ } ++ }; ++ vg_send_msg(g, &msg, -1); ++ } ++} ++ ++static void ++vg_resource_flush(VuGpu *g, ++ struct virtio_gpu_ctrl_command *cmd) ++{ ++ struct virtio_gpu_simple_resource *res; ++ struct virtio_gpu_resource_flush rf; ++ pixman_region16_t flush_region; ++ int i; ++ ++ VUGPU_FILL_CMD(rf); ++ virtio_gpu_bswap_32(&rf, sizeof(rf)); ++ ++ res = virtio_gpu_find_resource(g, rf.resource_id); ++ if (!res) { ++ g_critical("%s: illegal resource specified %d\n", ++ __func__, rf.resource_id); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; ++ return; ++ } ++ ++ if (rf.r.x > res->width || ++ rf.r.y > res->height || ++ rf.r.width > res->width || ++ rf.r.height > res->height || ++ rf.r.x + rf.r.width > res->width || ++ rf.r.y + rf.r.height > res->height) { ++ g_critical("%s: flush bounds outside resource" ++ " bounds for resource %d: %d %d %d %d vs %d %d\n", ++ __func__, rf.resource_id, rf.r.x, rf.r.y, ++ rf.r.width, rf.r.height, res->width, res->height); ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } ++ ++ pixman_region_init_rect(&flush_region, ++ rf.r.x, rf.r.y, rf.r.width, rf.r.height); ++ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { ++ struct virtio_gpu_scanout *scanout; ++ pixman_region16_t region, finalregion; ++ pixman_box16_t *extents; ++ ++ if (!(res->scanout_bitmask & (1 << i))) { ++ continue; ++ } ++ scanout = &g->scanout[i]; ++ ++ pixman_region_init(&finalregion); ++ pixman_region_init_rect(®ion, scanout->x, scanout->y, ++ scanout->width, scanout->height); ++ ++ pixman_region_intersect(&finalregion, &flush_region, ®ion); ++ ++ extents = pixman_region_extents(&finalregion); ++ size_t width = extents->x2 - extents->x1; ++ size_t height = extents->y2 - extents->y1; ++ ++ if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { ++ VhostUserGpuMsg vmsg = { ++ .request = VHOST_USER_GPU_DMABUF_UPDATE, ++ .size = sizeof(VhostUserGpuUpdate), ++ .payload.update = (VhostUserGpuUpdate) { ++ .scanout_id = i, ++ .x = extents->x1, ++ .y = extents->y1, ++ .width = width, ++ .height = height, ++ } ++ }; ++ vg_send_msg(g, &vmsg, -1); ++ vg_wait_ok(g); ++ } else { ++ size_t bpp = ++ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; ++ size_t size = width * height * bpp; ++ ++ void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + ++ sizeof(VhostUserGpuUpdate) + size); ++ VhostUserGpuMsg *msg = p; ++ msg->request = VHOST_USER_GPU_UPDATE; ++ msg->size = sizeof(VhostUserGpuUpdate) + size; ++ msg->payload.update = (VhostUserGpuUpdate) { ++ .scanout_id = i, ++ .x = extents->x1, ++ .y = extents->y1, ++ .width = width, ++ .height = height, ++ }; ++ pixman_image_t *i = ++ pixman_image_create_bits(pixman_image_get_format(res->image), ++ msg->payload.update.width, ++ msg->payload.update.height, ++ p + offsetof(VhostUserGpuMsg, ++ payload.update.data), ++ width * bpp); ++ pixman_image_composite(PIXMAN_OP_SRC, ++ res->image, NULL, i, ++ extents->x1, extents->y1, ++ 0, 0, 0, 0, ++ width, height); ++ pixman_image_unref(i); ++ vg_send_msg(g, msg, -1); ++ g_free(msg); ++ } ++ pixman_region_fini(®ion); ++ pixman_region_fini(&finalregion); ++ } ++ pixman_region_fini(&flush_region); ++} ++ ++static void ++vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) ++{ ++ switch (cmd->cmd_hdr.type) { ++ case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: ++ vg_get_display_info(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: ++ vg_resource_create_2d(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_UNREF: ++ vg_resource_unref(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_FLUSH: ++ vg_resource_flush(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: ++ vg_transfer_to_host_2d(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_SET_SCANOUT: ++ vg_set_scanout(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: ++ vg_resource_attach_backing(vg, cmd); ++ break; ++ case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: ++ vg_resource_detach_backing(vg, cmd); ++ break; ++ /* case VIRTIO_GPU_CMD_GET_EDID: */ ++ /* break */ ++ default: ++ g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); ++ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; ++ break; ++ } ++ if (!cmd->finished) { ++ vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : ++ VIRTIO_GPU_RESP_OK_NODATA); ++ } ++} ++ ++static void ++vg_handle_ctrl(VuDev *dev, int qidx) ++{ ++ VuGpu *vg = container_of(dev, VuGpu, dev.parent); ++ VuVirtq *vq = vu_get_queue(dev, qidx); ++ struct virtio_gpu_ctrl_command *cmd = NULL; ++ size_t len; ++ ++ for (;;) { ++ if (vg->wait_ok != 0) { ++ return; ++ } ++ ++ cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); ++ if (!cmd) { ++ break; ++ } ++ cmd->vq = vq; ++ cmd->error = 0; ++ cmd->finished = false; ++ ++ len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, ++ 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); ++ if (len != sizeof(cmd->cmd_hdr)) { ++ g_warning("%s: command size incorrect %zu vs %zu\n", ++ __func__, len, sizeof(cmd->cmd_hdr)); ++ } ++ ++ virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); ++ g_debug("%d %s\n", cmd->cmd_hdr.type, ++ vg_cmd_to_string(cmd->cmd_hdr.type)); ++ ++ if (vg->virgl) { ++ vg_virgl_process_cmd(vg, cmd); ++ } else { ++ vg_process_cmd(vg, cmd); ++ } ++ ++ if (!cmd->finished) { ++ QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); ++ vg->inflight++; ++ } else { ++ g_free(cmd); ++ } ++ } ++} ++ ++static void ++update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) ++{ ++ struct virtio_gpu_simple_resource *res; ++ ++ res = virtio_gpu_find_resource(g, resource_id); ++ g_return_if_fail(res != NULL); ++ g_return_if_fail(pixman_image_get_width(res->image) == 64); ++ g_return_if_fail(pixman_image_get_height(res->image) == 64); ++ g_return_if_fail( ++ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); ++ ++ memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); ++} ++ ++static void ++vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) ++{ ++ bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; ++ ++ g_debug("%s move:%d\n", G_STRFUNC, move); ++ ++ if (move) { ++ VhostUserGpuMsg msg = { ++ .request = cursor->resource_id ? ++ VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, ++ .size = sizeof(VhostUserGpuCursorPos), ++ .payload.cursor_pos = { ++ .scanout_id = cursor->pos.scanout_id, ++ .x = cursor->pos.x, ++ .y = cursor->pos.y, ++ } ++ }; ++ vg_send_msg(g, &msg, -1); ++ } else { ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_CURSOR_UPDATE, ++ .size = sizeof(VhostUserGpuCursorUpdate), ++ .payload.cursor_update = { ++ .pos = { ++ .scanout_id = cursor->pos.scanout_id, ++ .x = cursor->pos.x, ++ .y = cursor->pos.y, ++ }, ++ .hot_x = cursor->hot_x, ++ .hot_y = cursor->hot_y, ++ } ++ }; ++ if (g->virgl) { ++ vg_virgl_update_cursor_data(g, cursor->resource_id, ++ msg.payload.cursor_update.data); ++ } else { ++ update_cursor_data_simple(g, cursor->resource_id, ++ msg.payload.cursor_update.data); ++ } ++ vg_send_msg(g, &msg, -1); ++ } ++} ++ ++static void ++vg_handle_cursor(VuDev *dev, int qidx) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ VuVirtq *vq = vu_get_queue(dev, qidx); ++ VuVirtqElement *elem; ++ size_t len; ++ struct virtio_gpu_update_cursor cursor; ++ ++ for (;;) { ++ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); ++ if (!elem) { ++ break; ++ } ++ g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); ++ ++ len = iov_to_buf(elem->out_sg, elem->out_num, ++ 0, &cursor, sizeof(cursor)); ++ if (len != sizeof(cursor)) { ++ g_warning("%s: cursor size incorrect %zu vs %zu\n", ++ __func__, len, sizeof(cursor)); ++ } else { ++ virtio_gpu_bswap_32(&cursor, sizeof(cursor)); ++ vg_process_cursor_cmd(g, &cursor); ++ } ++ vu_queue_push(dev, vq, elem, 0); ++ vu_queue_notify(dev, vq); ++ g_free(elem); ++ } ++} ++ ++static void ++vg_panic(VuDev *dev, const char *msg) ++{ ++ g_critical("%s\n", msg); ++ exit(1); ++} ++ ++static void ++vg_queue_set_started(VuDev *dev, int qidx, bool started) ++{ ++ VuVirtq *vq = vu_get_queue(dev, qidx); ++ ++ g_debug("queue started %d:%d\n", qidx, started); ++ ++ switch (qidx) { ++ case 0: ++ vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); ++ break; ++ case 1: ++ vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); ++ break; ++ default: ++ break; ++ } ++} ++ ++static void ++set_gpu_protocol_features(VuGpu *g) ++{ ++ uint64_t u64; ++ VhostUserGpuMsg msg = { ++ .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES ++ }; ++ ++ assert(g->wait_ok == 0); ++ vg_send_msg(g, &msg, -1); ++ if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { ++ return; ++ } ++ ++ msg = (VhostUserGpuMsg) { ++ .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, ++ .size = sizeof(uint64_t), ++ .payload.u64 = 0 ++ }; ++ vg_send_msg(g, &msg, -1); ++} ++ ++static int ++vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ ++ switch (msg->request) { ++ case VHOST_USER_GPU_SET_SOCKET: { ++ g_return_val_if_fail(msg->fd_num == 1, 1); ++ g_return_val_if_fail(g->sock_fd == -1, 1); ++ g->sock_fd = msg->fds[0]; ++ set_gpu_protocol_features(g); ++ return 1; ++ } ++ default: ++ return 0; ++ } ++ ++ return 0; ++} ++ ++static uint64_t ++vg_get_features(VuDev *dev) ++{ ++ uint64_t features = 0; ++ ++ if (opt_virgl) { ++ features |= 1 << VIRTIO_GPU_F_VIRGL; ++ } ++ ++ return features; ++} ++ ++static void ++vg_set_features(VuDev *dev, uint64_t features) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); ++ ++ if (virgl && !g->virgl_inited) { ++ if (!vg_virgl_init(g)) { ++ vg_panic(dev, "Failed to initialize virgl"); ++ } ++ g->virgl_inited = true; ++ } ++ ++ g->virgl = virgl; ++} ++ ++static int ++vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ ++ g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); ++ ++ if (opt_virgl) { ++ g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); ++ } ++ ++ memcpy(config, &g->virtio_config, len); ++ ++ return 0; ++} ++ ++static int ++vg_set_config(VuDev *dev, const uint8_t *data, ++ uint32_t offset, uint32_t size, ++ uint32_t flags) ++{ ++ VuGpu *g = container_of(dev, VuGpu, dev.parent); ++ struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; ++ ++ if (config->events_clear) { ++ g->virtio_config.events_read &= ~config->events_clear; ++ } ++ ++ return 0; ++} ++ ++static const VuDevIface vuiface = { ++ .set_features = vg_set_features, ++ .get_features = vg_get_features, ++ .queue_set_started = vg_queue_set_started, ++ .process_msg = vg_process_msg, ++ .get_config = vg_get_config, ++ .set_config = vg_set_config, ++}; ++ ++static void ++vg_destroy(VuGpu *g) ++{ ++ struct virtio_gpu_simple_resource *res, *tmp; ++ ++ vug_deinit(&g->dev); ++ ++ vg_sock_fd_close(g); ++ ++ QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { ++ vg_resource_destroy(g, res); ++ } ++ ++ vugbm_device_destroy(&g->gdev); ++} ++ ++static GOptionEntry entries[] = { ++ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, ++ "Print capabilities", NULL }, ++ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, ++ "Use inherited fd socket", "FDNUM" }, ++ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, ++ "Use UNIX socket path", "PATH" }, ++ { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, ++ "Specify DRM render node", "PATH" }, ++ { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, ++ "Turn virgl rendering on", NULL }, ++ { NULL, } ++}; ++ ++int ++main(int argc, char *argv[]) ++{ ++ GOptionContext *context; ++ GError *error = NULL; ++ GMainLoop *loop = NULL; ++ int fd; ++ VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; ++ ++ QTAILQ_INIT(&g.reslist); ++ QTAILQ_INIT(&g.fenceq); ++ ++ context = g_option_context_new("QEMU vhost-user-gpu"); ++ g_option_context_add_main_entries(context, entries, NULL); ++ if (!g_option_context_parse(context, &argc, &argv, &error)) { ++ g_printerr("Option parsing failed: %s\n", error->message); ++ exit(EXIT_FAILURE); ++ } ++ g_option_context_free(context); ++ ++ if (opt_print_caps) { ++ g_print("{\n"); ++ g_print(" \"type\": \"gpu\",\n"); ++ g_print(" \"features\": [\n"); ++ g_print(" \"render-node\",\n"); ++ g_print(" \"virgl\"\n"); ++ g_print(" ]\n"); ++ g_print("}\n"); ++ exit(EXIT_SUCCESS); ++ } ++ ++ g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); ++ if (opt_render_node && g.drm_rnode_fd == -1) { ++ g_printerr("Failed to open DRM rendernode.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (g.drm_rnode_fd >= 0) { ++ if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { ++ g_warning("Failed to init DRM device, using fallback path"); ++ } ++ } ++ ++ if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { ++ g_printerr("Please specify either --fd or --socket-path\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (opt_socket_path) { ++ int lsock = unix_listen(opt_socket_path, &error_fatal); ++ if (lsock < 0) { ++ g_printerr("Failed to listen on %s.\n", opt_socket_path); ++ exit(EXIT_FAILURE); ++ } ++ fd = accept(lsock, NULL, NULL); ++ close(lsock); ++ } else { ++ fd = opt_fdnum; ++ } ++ if (fd == -1) { ++ g_printerr("Invalid vhost-user socket.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { ++ g_printerr("Failed to initialize libvhost-user-glib.\n"); ++ exit(EXIT_FAILURE); ++ } ++ ++ loop = g_main_loop_new(NULL, FALSE); ++ g_main_loop_run(loop); ++ g_main_loop_unref(loop); ++ ++ vg_destroy(&g); ++ if (g.drm_rnode_fd >= 0) { ++ close(g.drm_rnode_fd); ++ } ++ ++ return 0; ++} +-- +1.8.3.1 + diff --git a/kvm-migration-Rate-limit-inside-host-pages.patch b/kvm-migration-Rate-limit-inside-host-pages.patch new file mode 100644 index 0000000..2d3d519 --- /dev/null +++ b/kvm-migration-Rate-limit-inside-host-pages.patch @@ -0,0 +1,172 @@ +From 8e8f421cce99543081f225acf46541312cfbc371 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 17 Mar 2020 17:05:18 +0000 +Subject: [PATCH 1/2] migration: Rate limit inside host pages + +RH-Author: Laurent Vivier +Message-id: <20200317170518.9303-1-lvivier@redhat.com> +Patchwork-id: 94374 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] migration: Rate limit inside host pages +Bugzilla: 1814336 +RH-Acked-by: Peter Xu +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +From: "Dr. David Alan Gilbert" + +When using hugepages, rate limiting is necessary within each huge +page, since a 1G huge page can take a significant time to send, so +you end up with bursty behaviour. + +Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") +Reported-by: Lin Ma +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6) +Signed-off-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1814336 +BRANCH: rhel-av-8.2.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27283241 +TESTED: Tested that the migration abort doesn't trigger an error message in + the kernel logs on P9 + +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 57 ++++++++++++++++++++++++++++---------------------- + migration/migration.h | 1 + + migration/ram.c | 2 ++ + migration/trace-events | 4 ++-- + 4 files changed, 37 insertions(+), 27 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index ed18c59..e31d0f5 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3253,6 +3253,37 @@ void migration_consume_urgent_request(void) + qemu_sem_wait(&migrate_get_current()->rate_limit_sem); + } + ++/* Returns true if the rate limiting was broken by an urgent request */ ++bool migration_rate_limit(void) ++{ ++ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ MigrationState *s = migrate_get_current(); ++ ++ bool urgent = false; ++ migration_update_counters(s, now); ++ if (qemu_file_rate_limit(s->to_dst_file)) { ++ /* ++ * Wait for a delay to do rate limiting OR ++ * something urgent to post the semaphore. ++ */ ++ int ms = s->iteration_start_time + BUFFER_DELAY - now; ++ trace_migration_rate_limit_pre(ms); ++ if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { ++ /* ++ * We were woken by one or more urgent things but ++ * the timedwait will have consumed one of them. ++ * The service routine for the urgent wake will dec ++ * the semaphore itself for each item it consumes, ++ * so add this one we just eat back. ++ */ ++ qemu_sem_post(&s->rate_limit_sem); ++ urgent = true; ++ } ++ trace_migration_rate_limit_post(urgent); ++ } ++ return urgent; ++} ++ + /* + * Master migration thread on the source VM. + * It drives the migration and pumps the data down the outgoing channel. +@@ -3319,8 +3350,6 @@ static void *migration_thread(void *opaque) + trace_migration_thread_setup_complete(); + + while (migration_is_active(s)) { +- int64_t current_time; +- + if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { + MigIterateState iter_state = migration_iteration_run(s); + if (iter_state == MIG_ITERATE_SKIP) { +@@ -3347,29 +3376,7 @@ static void *migration_thread(void *opaque) + update_iteration_initial_status(s); + } + +- current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +- +- migration_update_counters(s, current_time); +- +- urgent = false; +- if (qemu_file_rate_limit(s->to_dst_file)) { +- /* Wait for a delay to do rate limiting OR +- * something urgent to post the semaphore. +- */ +- int ms = s->iteration_start_time + BUFFER_DELAY - current_time; +- trace_migration_thread_ratelimit_pre(ms); +- if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { +- /* We were worken by one or more urgent things but +- * the timedwait will have consumed one of them. +- * The service routine for the urgent wake will dec +- * the semaphore itself for each item it consumes, +- * so add this one we just eat back. +- */ +- qemu_sem_post(&s->rate_limit_sem); +- urgent = true; +- } +- trace_migration_thread_ratelimit_post(urgent); +- } ++ urgent = migration_rate_limit(); + } + + trace_migration_thread_after_loop(); +diff --git a/migration/migration.h b/migration/migration.h +index a2b2336..a15e8d8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -347,5 +347,6 @@ extern bool migrate_pre_2_2; + + void migration_make_urgent_request(void); + void migration_consume_urgent_request(void); ++bool migration_rate_limit(void); + + #endif +diff --git a/migration/ram.c b/migration/ram.c +index 3891eff..5344c7d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2661,6 +2661,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, + + pages += tmppages; + pss->page++; ++ /* Allow rate limiting to happen in the middle of huge pages */ ++ migration_rate_limit(); + } while ((pss->page & (pagesize_bits - 1)) && + offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); + +diff --git a/migration/trace-events b/migration/trace-events +index 6dee7b5..2f9129e 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -138,12 +138,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 + migration_completion_file_err(void) "" + migration_completion_postcopy_end(void) "" + migration_completion_postcopy_end_after_complete(void) "" ++migration_rate_limit_pre(int ms) "%d ms" ++migration_rate_limit_post(int urgent) "urgent: %d" + migration_return_path_end_before(void) "" + migration_return_path_end_after(int rp_error) "%d" + migration_thread_after_loop(void) "" + migration_thread_file_err(void) "" +-migration_thread_ratelimit_pre(int ms) "%d ms" +-migration_thread_ratelimit_post(int urgent) "urgent: %d" + migration_thread_setup_complete(void) "" + open_return_path_on_source(void) "" + open_return_path_on_source_continue(void) "" +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index fb07343..4045d6d 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 15%{?dist} +Release: 16%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -617,6 +617,10 @@ Patch233: kvm-iotests-Add-iothread-cases-to-155.patch Patch234: kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch # For bz#1809380 - guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0. Patch235: kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch +# For bz#1814336 - [POWER9] QEMU migration-test triggers a kernel warning +Patch236: kvm-migration-Rate-limit-inside-host-pages.patch +# For bz#1811670 - Unneeded qemu-guest-agent dependency on pixman +Patch237: kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch BuildRequires: wget BuildRequires: rpm-build @@ -1550,6 +1554,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Mar 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-16.el8 +- kvm-migration-Rate-limit-inside-host-pages.patch [bz#1814336] +- kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch [bz#1811670] +- Resolves: bz#1811670 + (Unneeded qemu-guest-agent dependency on pixman) +- Resolves: bz#1814336 + ([POWER9] QEMU migration-test triggers a kernel warning) + * Tue Mar 17 2020 Danilo Cesar Lemes de Paula - 4.2.0-15.el8 - kvm-block-nbd-Fix-hang-in-.bdrv_close.patch [bz#1640894] - kvm-block-Generic-file-creation-fallback.patch [bz#1640894] From 739c885ee5c9607de9224374b580f2a678da778b Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 30 Mar 2020 19:34:47 +0100 Subject: [PATCH 072/195] * Mon Mar 30 2020 Danilo Cesar Lemes de Paula - 4.2.0-17.el8 - kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch [bz#1816007] - kvm-block-trickle-down-the-fallback-image-creation-funct.patch [bz#1816007] - kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] - kvm-mirror-Wait-only-for-in-flight-operations.patch [bz#1794692] - Resolves: bz#1794692 (Mirror block job stops making progress) - Resolves: bz#1816007 (qemu-img convert failed to convert with block device as target) --- ...n-t-let-an-operation-wait-for-itself.patch | 121 +++++++ ...Driver-reference-to-the-.bdrv_co_cre.patch | 328 ++++++++++++++++++ ...wn-the-fallback-image-creation-funct.patch | 296 ++++++++++++++++ ...r-Wait-only-for-in-flight-operations.patch | 95 +++++ qemu-kvm.spec | 20 +- 5 files changed, 859 insertions(+), 1 deletion(-) create mode 100644 kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch create mode 100644 kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch create mode 100644 kvm-block-trickle-down-the-fallback-image-creation-funct.patch create mode 100644 kvm-mirror-Wait-only-for-in-flight-operations.patch diff --git a/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch b/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch new file mode 100644 index 0000000..0c1c37f --- /dev/null +++ b/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch @@ -0,0 +1,121 @@ +From 71b5267ed33f9e60bc98acbabcbed62f01a96ff4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 30 Mar 2020 11:19:23 +0100 +Subject: [PATCH 3/4] Revert "mirror: Don't let an operation wait for itself" + +RH-Author: Kevin Wolf +Message-id: <20200330111924.22938-2-kwolf@redhat.com> +Patchwork-id: 94464 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] Revert "mirror: Don't let an operation wait for itself" +Bugzilla: 1794692 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +This reverts commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca. + +The fix was incomplete as it only protected against requests waiting for +themselves, but not against requests waiting for each other. We need a +different solution. + +Signed-off-by: Kevin Wolf +Message-Id: <20200326153628.4869-2-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 9178f4fe5f083064f5c91f04d98c815ce5a5af1c) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 21 +++++++++------------ + 1 file changed, 9 insertions(+), 12 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index cacbc70..8959e42 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -283,14 +283,11 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + } + + static inline void coroutine_fn +-mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) ++mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { +- if (self == op) { +- continue; +- } + /* Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op +@@ -305,10 +302,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) + } + + static inline void coroutine_fn +-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) ++mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) + { + /* Only non-active operations use up in-flight slots */ +- mirror_wait_for_any_operation(s, self, false); ++ mirror_wait_for_any_operation(s, false); + } + + /* Perform a mirror copy operation. +@@ -351,7 +348,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + + while (s->buf_free_count < nb_chunks) { + trace_mirror_yield_in_flight(s, op->offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, op); ++ mirror_wait_for_free_in_flight_slot(s); + } + + /* Now make a QEMUIOVector taking enough granularity-sized chunks +@@ -558,7 +555,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) + + while (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield_in_flight(s, offset, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, pseudo_op); ++ mirror_wait_for_free_in_flight_slot(s); + } + + if (s->ret < 0) { +@@ -612,7 +609,7 @@ static void mirror_free_init(MirrorBlockJob *s) + static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) + { + while (s->in_flight > 0) { +- mirror_wait_for_free_in_flight_slot(s, NULL); ++ mirror_wait_for_free_in_flight_slot(s); + } + } + +@@ -797,7 +794,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) + if (s->in_flight >= MAX_IN_FLIGHT) { + trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, + s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, NULL); ++ mirror_wait_for_free_in_flight_slot(s); + continue; + } + +@@ -950,7 +947,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + /* Do not start passive operations while there are active + * writes in progress */ + while (s->in_active_write_counter) { +- mirror_wait_for_any_operation(s, NULL, true); ++ mirror_wait_for_any_operation(s, true); + } + + if (s->ret < 0) { +@@ -976,7 +973,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || + (cnt == 0 && s->in_flight > 0)) { + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); +- mirror_wait_for_free_in_flight_slot(s, NULL); ++ mirror_wait_for_free_in_flight_slot(s); + continue; + } else if (cnt != 0) { + delay_ns = mirror_iteration(s); +-- +1.8.3.1 + diff --git a/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch b/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch new file mode 100644 index 0000000..43f9ffc --- /dev/null +++ b/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch @@ -0,0 +1,328 @@ +From 25c528b30f8774f33e957d14060805398da524d9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 26 Mar 2020 20:23:06 +0000 +Subject: [PATCH 1/4] block: pass BlockDriver reference to the .bdrv_co_create + +RH-Author: Maxim Levitsky +Message-id: <20200326202307.9264-2-mlevitsk@redhat.com> +Patchwork-id: 94447 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] block: pass BlockDriver reference to the .bdrv_co_create +Bugzilla: 1816007 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +This will allow the reuse of a single generic .bdrv_co_create +implementation for several drivers. +No functional changes. + +Signed-off-by: Maxim Levitsky +Message-Id: <20200326011218.29230-2-mlevitsk@redhat.com> +Reviewed-by: Denis V. Lunev +Signed-off-by: Max Reitz +(cherry picked from commit b92902dfeaafbceaf744ab7473f2d070284f6172) +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 3 ++- + block/crypto.c | 3 ++- + block/file-posix.c | 4 +++- + block/file-win32.c | 4 +++- + block/gluster.c | 3 ++- + block/nfs.c | 4 +++- + block/parallels.c | 3 ++- + block/qcow.c | 3 ++- + block/qcow2.c | 4 +++- + block/qed.c | 3 ++- + block/raw-format.c | 4 +++- + block/rbd.c | 3 ++- + block/sheepdog.c | 4 +++- + block/ssh.c | 4 +++- + block/vdi.c | 4 +++- + block/vhdx.c | 3 ++- + block/vmdk.c | 4 +++- + block/vpc.c | 6 ++++-- + include/block/block_int.h | 3 ++- + 19 files changed, 49 insertions(+), 20 deletions(-) + +diff --git a/block.c b/block.c +index ec29b1e..f9a1c5b 100644 +--- a/block.c ++++ b/block.c +@@ -482,7 +482,8 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) + CreateCo *cco = opaque; + assert(cco->drv); + +- ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err); ++ ret = cco->drv->bdrv_co_create_opts(cco->drv, ++ cco->filename, cco->opts, &local_err); + error_propagate(&cco->err, local_err); + cco->ret = ret; + } +diff --git a/block/crypto.c b/block/crypto.c +index 2482383..970d463 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -539,7 +539,8 @@ fail: + return ret; + } + +-static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, ++static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/file-posix.c b/block/file-posix.c +index fd29372..a2e0a74 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2346,7 +2346,9 @@ out: + return result; + } + +-static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn raw_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions options; +diff --git a/block/file-win32.c b/block/file-win32.c +index 77e8ff7..1585983 100644 +--- a/block/file-win32.c ++++ b/block/file-win32.c +@@ -588,7 +588,9 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) + return 0; + } + +-static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn raw_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions options; +diff --git a/block/gluster.c b/block/gluster.c +index 4fa4a77..0aa1f2c 100644 +--- a/block/gluster.c ++++ b/block/gluster.c +@@ -1130,7 +1130,8 @@ out: + return ret; + } + +-static int coroutine_fn qemu_gluster_co_create_opts(const char *filename, ++static int coroutine_fn qemu_gluster_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/nfs.c b/block/nfs.c +index 9a6311e..cc2413d 100644 +--- a/block/nfs.c ++++ b/block/nfs.c +@@ -662,7 +662,9 @@ out: + return ret; + } + +-static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts, ++static int coroutine_fn nfs_file_co_create_opts(BlockDriver *drv, ++ const char *url, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options; +diff --git a/block/parallels.c b/block/parallels.c +index 7a01997..6d4ed77 100644 +--- a/block/parallels.c ++++ b/block/parallels.c +@@ -609,7 +609,8 @@ exit: + goto out; + } + +-static int coroutine_fn parallels_co_create_opts(const char *filename, ++static int coroutine_fn parallels_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/qcow.c b/block/qcow.c +index fce8989..8973e4e 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -934,7 +934,8 @@ exit: + return ret; + } + +-static int coroutine_fn qcow_co_create_opts(const char *filename, ++static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, Error **errp) + { + BlockdevCreateOptions *create_options = NULL; +diff --git a/block/qcow2.c b/block/qcow2.c +index 83b1fc0..71067c6 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -3558,7 +3558,9 @@ out: + return ret; + } + +-static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options = NULL; +diff --git a/block/qed.c b/block/qed.c +index d8c4e5f..1af9b3c 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -720,7 +720,8 @@ out: + return ret; + } + +-static int coroutine_fn bdrv_qed_co_create_opts(const char *filename, ++static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/raw-format.c b/block/raw-format.c +index 3a76ec7..93b25e1 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -419,7 +419,9 @@ static int raw_has_zero_init_truncate(BlockDriverState *bs) + return bdrv_has_zero_init_truncate(bs->file->bs); + } + +-static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn raw_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + return bdrv_create_file(filename, opts, errp); +diff --git a/block/rbd.c b/block/rbd.c +index 027cbcc..8847259 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -425,7 +425,8 @@ static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp) + return qemu_rbd_do_create(options, NULL, NULL, errp); + } + +-static int coroutine_fn qemu_rbd_co_create_opts(const char *filename, ++static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/sheepdog.c b/block/sheepdog.c +index cfa8433..a8a7e32 100644 +--- a/block/sheepdog.c ++++ b/block/sheepdog.c +@@ -2157,7 +2157,9 @@ out: + return ret; + } + +-static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn sd_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options = NULL; +diff --git a/block/ssh.c b/block/ssh.c +index b4375cf..84e9282 100644 +--- a/block/ssh.c ++++ b/block/ssh.c +@@ -963,7 +963,9 @@ fail: + return ret; + } + +-static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn ssh_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + BlockdevCreateOptions *create_options; +diff --git a/block/vdi.c b/block/vdi.c +index 0142da7..e1a11f2 100644 +--- a/block/vdi.c ++++ b/block/vdi.c +@@ -896,7 +896,9 @@ static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, + return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); + } + +-static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn vdi_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + QDict *qdict = NULL; +diff --git a/block/vhdx.c b/block/vhdx.c +index f02d261..33e57cd 100644 +--- a/block/vhdx.c ++++ b/block/vhdx.c +@@ -2046,7 +2046,8 @@ delete_and_exit: + return ret; + } + +-static int coroutine_fn vhdx_co_create_opts(const char *filename, ++static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp) + { +diff --git a/block/vmdk.c b/block/vmdk.c +index 20e909d..eb726f2 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2588,7 +2588,9 @@ exit: + return blk; + } + +-static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, ++static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, + Error **errp) + { + Error *local_err = NULL; +diff --git a/block/vpc.c b/block/vpc.c +index a655502..6df75e2 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -1089,8 +1089,10 @@ out: + return ret; + } + +-static int coroutine_fn vpc_co_create_opts(const char *filename, +- QemuOpts *opts, Error **errp) ++static int coroutine_fn vpc_co_create_opts(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, ++ Error **errp) + { + BlockdevCreateOptions *create_options = NULL; + QDict *qdict; +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 96e327b..7ff81be 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -136,7 +136,8 @@ struct BlockDriver { + void (*bdrv_close)(BlockDriverState *bs); + int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, + Error **errp); +- int coroutine_fn (*bdrv_co_create_opts)(const char *filename, ++ int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, ++ const char *filename, + QemuOpts *opts, + Error **errp); + int (*bdrv_make_empty)(BlockDriverState *bs); +-- +1.8.3.1 + diff --git a/kvm-block-trickle-down-the-fallback-image-creation-funct.patch b/kvm-block-trickle-down-the-fallback-image-creation-funct.patch new file mode 100644 index 0000000..5ba1521 --- /dev/null +++ b/kvm-block-trickle-down-the-fallback-image-creation-funct.patch @@ -0,0 +1,296 @@ +From a1f7b929ae1fe6fa424c520c3a5eb497333b0fd9 Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Thu, 26 Mar 2020 20:23:07 +0000 +Subject: [PATCH 2/4] block: trickle down the fallback image creation function + use to the block drivers + +RH-Author: Maxim Levitsky +Message-id: <20200326202307.9264-3-mlevitsk@redhat.com> +Patchwork-id: 94446 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] block: trickle down the fallback image creation function use to the block drivers +Bugzilla: 1816007 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +Instead of checking the .bdrv_co_create_opts to see if we need the +fallback, just implement the .bdrv_co_create_opts in the drivers that +need it. + +This way we don't break various places that need to know if the +underlying protocol/format really supports image creation, and this way +we still allow some drivers to not support image creation. + +Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1816007 + +Note that technically this driver reverts the image creation fallback +for the vxhs driver since I don't have a means to test it, and IMHO it +is better to leave it not supported as it was prior to generic image +creation patches. + +Also drop iscsi_create_opts which was left accidentally. + +Signed-off-by: Maxim Levitsky +Message-Id: <20200326011218.29230-3-mlevitsk@redhat.com> +Reviewed-by: Denis V. Lunev +[mreitz: Fixed alignment, and moved bdrv_co_create_opts_simple() and + bdrv_create_opts_simple from block.h into block_int.h] +Signed-off-by: Max Reitz +(cherry picked from commit 5a5e7f8cd86b7ced0732b1b6e28c82baa65b09c9) + +Contextual conflicts in block.c and include/block/block_int.h + +(conflict in block.c by default shows as functional but +with --diff-algorithm=patience it becomes a contextual conflict) + +... +001/2:[----] [--] 'block: pass BlockDriver reference to the .bdrv_co_create' +002/2:[0014] [FC] 'block: trickle down the fallback image creation function use to the block drivers' +... +002/2: 'meld <(git show 5a5e7f8^\!) <(git show 6d3bca5^\!)' + +So now running: +meld <(git show 5a5e7f8^\! --diff-algorithm=patience) <(git show 6d3bca5^\! --diff-algorithm=patience) + +shows no contextual conflicts +It is mostly due to missing commit f6dc1c31d3801dcbdf0c56574f9ff4f05180810c +Thanks to Max Reitz for helping me with this. + +Signed-off-by: Maxim Levitsky +Signed-off-by: Danilo C. L. de Paula +--- + block.c | 35 ++++++++++++++++++++--------------- + block/file-posix.c | 7 ++++++- + block/iscsi.c | 16 ++++------------ + block/nbd.c | 6 ++++++ + block/nvme.c | 3 +++ + include/block/block.h | 1 + + include/block/block_int.h | 11 +++++++++++ + 7 files changed, 51 insertions(+), 28 deletions(-) + +diff --git a/block.c b/block.c +index f9a1c5b..ba3b40d7 100644 +--- a/block.c ++++ b/block.c +@@ -597,8 +597,15 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk, + return 0; + } + +-static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, +- QemuOpts *opts, Error **errp) ++/** ++ * Simple implementation of bdrv_co_create_opts for protocol drivers ++ * which only support creation via opening a file ++ * (usually existing raw storage device) ++ */ ++int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, ++ Error **errp) + { + BlockBackend *blk; + QDict *options; +@@ -662,11 +669,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) + return -ENOENT; + } + +- if (drv->bdrv_co_create_opts) { +- return bdrv_create(drv, filename, opts, errp); +- } else { +- return bdrv_create_file_fallback(filename, drv, opts, errp); +- } ++ return bdrv_create(drv, filename, opts, errp); + } + + /** +@@ -1543,9 +1546,9 @@ QemuOptsList bdrv_runtime_opts = { + }, + }; + +-static QemuOptsList fallback_create_opts = { +- .name = "fallback-create-opts", +- .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), ++QemuOptsList bdrv_create_opts_simple = { ++ .name = "simple-create-opts", ++ .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, +@@ -5910,13 +5913,15 @@ void bdrv_img_create(const char *filename, const char *fmt, + return; + } + +- create_opts = qemu_opts_append(create_opts, drv->create_opts); +- if (proto_drv->create_opts) { +- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); +- } else { +- create_opts = qemu_opts_append(create_opts, &fallback_create_opts); ++ if (!proto_drv->create_opts) { ++ error_setg(errp, "Protocol driver '%s' does not support image creation", ++ proto_drv->format_name); ++ return; + } + ++ create_opts = qemu_opts_append(create_opts, drv->create_opts); ++ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); ++ + /* Create parameter list with default values */ + opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); +diff --git a/block/file-posix.c b/block/file-posix.c +index a2e0a74..dd18d40 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3432,6 +3432,8 @@ static BlockDriver bdrv_host_device = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, +@@ -3558,10 +3560,11 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + .bdrv_co_invalidate_cache = raw_co_invalidate_cache, + +- + .bdrv_co_preadv = raw_co_preadv, + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, +@@ -3690,6 +3693,8 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_reopen_prepare = raw_reopen_prepare, + .bdrv_reopen_commit = raw_reopen_commit, + .bdrv_reopen_abort = raw_reopen_abort, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .mutable_opts = mutable_opts, + + .bdrv_co_preadv = raw_co_preadv, +diff --git a/block/iscsi.c b/block/iscsi.c +index b45da65..16b0716 100644 +--- a/block/iscsi.c ++++ b/block/iscsi.c +@@ -2399,18 +2399,6 @@ out_unlock: + return r; + } + +-static QemuOptsList iscsi_create_opts = { +- .name = "iscsi-create-opts", +- .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head), +- .desc = { +- { +- .name = BLOCK_OPT_SIZE, +- .type = QEMU_OPT_SIZE, +- .help = "Virtual disk size" +- }, +- { /* end of list */ } +- } +-}; + + static const char *const iscsi_strong_runtime_opts[] = { + "transport", +@@ -2434,6 +2422,8 @@ static BlockDriver bdrv_iscsi = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +@@ -2471,6 +2461,8 @@ static BlockDriver bdrv_iser = { + .bdrv_parse_filename = iscsi_parse_filename, + .bdrv_file_open = iscsi_open, + .bdrv_close = iscsi_close, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_reopen_prepare = iscsi_reopen_prepare, + .bdrv_reopen_commit = iscsi_reopen_commit, + .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, +diff --git a/block/nbd.c b/block/nbd.c +index a73f0d9..927915d 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -2030,6 +2030,8 @@ static BlockDriver bdrv_nbd = { + .protocol_name = "nbd", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, +@@ -2055,6 +2057,8 @@ static BlockDriver bdrv_nbd_tcp = { + .protocol_name = "nbd+tcp", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, +@@ -2080,6 +2084,8 @@ static BlockDriver bdrv_nbd_unix = { + .protocol_name = "nbd+unix", + .instance_size = sizeof(BDRVNBDState), + .bdrv_parse_filename = nbd_parse_filename, ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, + .bdrv_file_open = nbd_open, + .bdrv_reopen_prepare = nbd_client_reopen_prepare, + .bdrv_co_preadv = nbd_client_co_preadv, +diff --git a/block/nvme.c b/block/nvme.c +index d41c4bd..7b7c0cc 100644 +--- a/block/nvme.c ++++ b/block/nvme.c +@@ -1333,6 +1333,9 @@ static BlockDriver bdrv_nvme = { + .protocol_name = "nvme", + .instance_size = sizeof(BDRVNVMeState), + ++ .bdrv_co_create_opts = bdrv_co_create_opts_simple, ++ .create_opts = &bdrv_create_opts_simple, ++ + .bdrv_parse_filename = nvme_parse_filename, + .bdrv_file_open = nvme_file_open, + .bdrv_close = nvme_close, +diff --git a/include/block/block.h b/include/block/block.h +index 1df9848..92685d2 100644 +--- a/include/block/block.h ++++ b/include/block/block.h +@@ -293,6 +293,7 @@ BlockDriver *bdrv_find_format(const char *format_name); + int bdrv_create(BlockDriver *drv, const char* filename, + QemuOpts *opts, Error **errp); + int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); ++ + BlockDriverState *bdrv_new(void); + void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp); +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 7ff81be..529f153 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1325,4 +1325,15 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, + + int refresh_total_sectors(BlockDriverState *bs, int64_t hint); + ++/** ++ * Simple implementation of bdrv_co_create_opts for protocol drivers ++ * which only support creation via opening a file ++ * (usually existing raw storage device) ++ */ ++int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, ++ const char *filename, ++ QemuOpts *opts, ++ Error **errp); ++extern QemuOptsList bdrv_create_opts_simple; ++ + #endif /* BLOCK_INT_H */ +-- +1.8.3.1 + diff --git a/kvm-mirror-Wait-only-for-in-flight-operations.patch b/kvm-mirror-Wait-only-for-in-flight-operations.patch new file mode 100644 index 0000000..a06d30e --- /dev/null +++ b/kvm-mirror-Wait-only-for-in-flight-operations.patch @@ -0,0 +1,95 @@ +From bddf389330e11fb0ce17413c1bfa2264a281ded2 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 30 Mar 2020 11:19:24 +0100 +Subject: [PATCH 4/4] mirror: Wait only for in-flight operations + +RH-Author: Kevin Wolf +Message-id: <20200330111924.22938-3-kwolf@redhat.com> +Patchwork-id: 94463 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Wait only for in-flight operations +Bugzilla: 1794692 +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +mirror_wait_for_free_in_flight_slot() just picks a random operation to +wait for. However, a MirrorOp is already in s->ops_in_flight when +mirror_co_read() waits for free slots, so if not enough slots are +immediately available, an operation can end up waiting for itself, or +two or more operations can wait for each other to complete, which +results in a hang. + +Fix this by adding a flag to MirrorOp that tells us if the request is +already in flight (and therefore occupies slots that it will later +free), and picking only such operations for waiting. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 +Signed-off-by: Kevin Wolf +Message-Id: <20200326153628.4869-3-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit ce8cabbd17cf738ddfc68384440c38e5dd2fdf97) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 8959e42..5e5a521 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -102,6 +102,7 @@ struct MirrorOp { + + bool is_pseudo_op; + bool is_active_write; ++ bool is_in_flight; + CoQueue waiting_requests; + Coroutine *co; + +@@ -293,7 +294,9 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + * caller of this function. Since there is only one pseudo op + * at any given time, we will always find some real operation + * to wait on. */ +- if (!op->is_pseudo_op && op->is_active_write == active) { ++ if (!op->is_pseudo_op && op->is_in_flight && ++ op->is_active_write == active) ++ { + qemu_co_queue_wait(&op->waiting_requests, NULL); + return; + } +@@ -367,6 +370,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + /* Copy the dirty cluster. */ + s->in_flight++; + s->bytes_in_flight += op->bytes; ++ op->is_in_flight = true; + trace_mirror_one_iteration(s, op->offset, op->bytes); + + ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, +@@ -382,6 +386,7 @@ static void coroutine_fn mirror_co_zero(void *opaque) + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; ++ op->is_in_flight = true; + + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); +@@ -396,6 +401,7 @@ static void coroutine_fn mirror_co_discard(void *opaque) + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; ++ op->is_in_flight = true; + + ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); + mirror_write_complete(op, ret); +@@ -1306,6 +1312,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, + .offset = offset, + .bytes = bytes, + .is_active_write = true, ++ .is_in_flight = true, + }; + qemu_co_queue_init(&op->waiting_requests); + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4045d6d..1d159a1 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 16%{?dist} +Release: 17%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -621,6 +621,14 @@ Patch235: kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch Patch236: kvm-migration-Rate-limit-inside-host-pages.patch # For bz#1811670 - Unneeded qemu-guest-agent dependency on pixman Patch237: kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch +# For bz#1816007 - qemu-img convert failed to convert with block device as target +Patch238: kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch +# For bz#1816007 - qemu-img convert failed to convert with block device as target +Patch239: kvm-block-trickle-down-the-fallback-image-creation-funct.patch +# For bz#1794692 - Mirror block job stops making progress +Patch240: kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch +# For bz#1794692 - Mirror block job stops making progress +Patch241: kvm-mirror-Wait-only-for-in-flight-operations.patch BuildRequires: wget BuildRequires: rpm-build @@ -1554,6 +1562,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Mar 30 2020 Danilo Cesar Lemes de Paula - 4.2.0-17.el8 +- kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch [bz#1816007] +- kvm-block-trickle-down-the-fallback-image-creation-funct.patch [bz#1816007] +- kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- kvm-mirror-Wait-only-for-in-flight-operations.patch [bz#1794692] +- Resolves: bz#1794692 + (Mirror block job stops making progress) +- Resolves: bz#1816007 + (qemu-img convert failed to convert with block device as target) + * Tue Mar 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-16.el8 - kvm-migration-Rate-limit-inside-host-pages.patch [bz#1814336] - kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch [bz#1811670] From c210bc1676ea5febacf4c076c5b9b8fabc676dd9 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 9 Apr 2020 18:22:19 +0100 Subject: [PATCH 074/195] * Thu Apr 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-18.el8_2 - kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch [bz#1817621] - kvm-replication-assert-we-own-context-before-job_cancel_.patch [bz#1817621] - kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch [bz#1817621] - kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch [bz#1817621] - kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch [bz#1817621] - kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch [bz#1817621] - Resolves: bz#1817621 (Crash and deadlock with block jobs when using io-threads) --- ...-acquire-aio_context-in-backup_clean.patch | 57 ++++ ..._flight-during-blk_wait_while_draine.patch | 84 +++++ ...B.in_flight-for-coroutine-and-sync-i.patch | 295 ++++++++++++++++++ ...order-flush-pdiscard-function-defini.patch | 158 ++++++++++ ...b-s-lock-individually-in-job_txn_app.patch | 213 +++++++++++++ ...rt-we-own-context-before-job_cancel_.patch | 57 ++++ qemu-kvm.spec | 24 +- 7 files changed, 887 insertions(+), 1 deletion(-) create mode 100644 kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch create mode 100644 kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch create mode 100644 kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch create mode 100644 kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch create mode 100644 kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch create mode 100644 kvm-replication-assert-we-own-context-before-job_cancel_.patch diff --git a/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch b/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch new file mode 100644 index 0000000..7fb76c1 --- /dev/null +++ b/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch @@ -0,0 +1,57 @@ +From 619b3aac9790a7ca7c01846144395a318a9ab250 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:14 +0100 +Subject: [PATCH 3/6] backup: don't acquire aio_context in backup_clean + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-4-kwolf@redhat.com> +Patchwork-id: 94596 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] backup: don't acquire aio_context in backup_clean +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +From: Stefan Reiter + +All code-paths leading to backup_clean (via job_clean) have the job's +context already acquired. The job's context is guaranteed to be the same +as the one used by backup_top via backup_job_create. + +Since the previous logic effectively acquired the lock twice, this +broke cleanup of backups for disks using IO threads, since the BDRV_POLL_WHILE +in bdrv_backup_top_drop -> bdrv_do_drained_begin would only release the lock +once, thus deadlocking with the IO thread. + +This is a partial revert of 0abf2581717a19. + +Signed-off-by: Stefan Reiter +Reviewed-by: Max Reitz +Message-Id: <20200407115651.69472-4-s.reiter@proxmox.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit eca0f3524a4eb57d03a56b0cbcef5527a0981ce4) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/backup.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index 1383e21..ec50946 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -135,11 +135,7 @@ static void backup_abort(Job *job) + static void backup_clean(Job *job) + { + BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); +- AioContext *aio_context = bdrv_get_aio_context(s->backup_top); +- +- aio_context_acquire(aio_context); + bdrv_backup_top_drop(s->backup_top); +- aio_context_release(aio_context); + } + + void backup_do_checkpoint(BlockJob *job, Error **errp) +-- +1.8.3.1 + diff --git a/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch b/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch new file mode 100644 index 0000000..b16c0b7 --- /dev/null +++ b/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch @@ -0,0 +1,84 @@ +From f17b37b58a57d849d2ff5fa04f149d9415803a39 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:17 +0100 +Subject: [PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-7-kwolf@redhat.com> +Patchwork-id: 94599 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +Waiting in blk_wait_while_drained() while blk->in_flight is increased +for the current request is wrong because it will cause the drain +operation to deadlock. + +This patch makes sure that blk_wait_while_drained() is called with +blk->in_flight increased exactly once for the current request, and that +it temporarily decreases the counter while it waits. + +Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +Message-Id: <20200407121259.21350-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7f16476fab14fc32388e0ebae793f64673848efa) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 610dbfa..38ae413 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, + return 0; + } + ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ + static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) + { ++ assert(blk->in_flight > 0); ++ + if (blk->quiesce_counter && !blk->disable_request_queuing) { ++ blk_dec_in_flight(blk); + qemu_co_queue_wait(&blk->queued_requests, NULL); ++ blk_inc_in_flight(blk); + } + } + +@@ -1418,12 +1423,6 @@ static void blk_aio_read_entry(void *opaque) + BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; + +- if (rwco->blk->quiesce_counter) { +- blk_dec_in_flight(rwco->blk); +- blk_wait_while_drained(rwco->blk); +- blk_inc_in_flight(rwco->blk); +- } +- + assert(qiov->size == acb->bytes); + rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, + qiov, rwco->flags); +@@ -1436,12 +1435,6 @@ static void blk_aio_write_entry(void *opaque) + BlkRwCo *rwco = &acb->rwco; + QEMUIOVector *qiov = rwco->iobuf; + +- if (rwco->blk->quiesce_counter) { +- blk_dec_in_flight(rwco->blk); +- blk_wait_while_drained(rwco->blk); +- blk_inc_in_flight(rwco->blk); +- } +- + assert(!qiov || qiov->size == acb->bytes); + rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, + qiov, 0, rwco->flags); +-- +1.8.3.1 + diff --git a/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch b/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch new file mode 100644 index 0000000..463501a --- /dev/null +++ b/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch @@ -0,0 +1,295 @@ +From 52cc1d1cd2f695c5761d65baec961d14552a79ed Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:16 +0100 +Subject: [PATCH 5/6] block: Increase BB.in_flight for coroutine and sync + interfaces + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-6-kwolf@redhat.com> +Patchwork-id: 94600 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] block: Increase BB.in_flight for coroutine and sync interfaces +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +External callers of blk_co_*() and of the synchronous blk_*() functions +don't currently increase the BlockBackend.in_flight counter, but calls +from blk_aio_*() do, so there is an inconsistency whether the counter +has been increased or not. + +This patch moves the actual operations to static functions that can +later know they will always be called with in_flight increased exactly +once, even for external callers using the blk_co_*() coroutine +interfaces. + +If the public blk_co_*() interface is unused, remove it. + +Signed-off-by: Kevin Wolf +Message-Id: <20200407121259.21350-3-kwolf@redhat.com> +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit fbb92b6798894d3bf62fe3578d99fa62c720b242) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 103 ++++++++++++++++++++++++++++++++--------- + include/sysemu/block-backend.h | 1 - + 2 files changed, 80 insertions(+), 24 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 17b2e87..610dbfa 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1147,9 +1147,10 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) + } + } + +-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, +- unsigned int bytes, QEMUIOVector *qiov, +- BdrvRequestFlags flags) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, ++ QEMUIOVector *qiov, BdrvRequestFlags flags) + { + int ret; + BlockDriverState *bs; +@@ -1178,10 +1179,24 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + return ret; + } + +-int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, +- unsigned int bytes, +- QEMUIOVector *qiov, size_t qiov_offset, +- BdrvRequestFlags flags) ++int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, ++ unsigned int bytes, QEMUIOVector *qiov, ++ BdrvRequestFlags flags) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_preadv(blk, offset, bytes, qiov, flags); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, ++ QEMUIOVector *qiov, size_t qiov_offset, ++ BdrvRequestFlags flags) + { + int ret; + BlockDriverState *bs; +@@ -1214,6 +1229,20 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, + return ret; + } + ++int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, ++ unsigned int bytes, ++ QEMUIOVector *qiov, size_t qiov_offset, ++ BdrvRequestFlags flags) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ + int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +@@ -1234,7 +1263,7 @@ static void blk_read_entry(void *opaque) + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, ++ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, + qiov, rwco->flags); + aio_wait_kick(); + } +@@ -1244,8 +1273,8 @@ static void blk_write_entry(void *opaque) + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, +- qiov, rwco->flags); ++ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, ++ qiov, 0, rwco->flags); + aio_wait_kick(); + } + +@@ -1262,6 +1291,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, + .ret = NOT_DONE, + }; + ++ blk_inc_in_flight(blk); + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + co_entry(&rwco); +@@ -1270,6 +1300,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, + bdrv_coroutine_enter(blk_bs(blk), co); + BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); + } ++ blk_dec_in_flight(blk); + + return rwco.ret; + } +@@ -1394,7 +1425,7 @@ static void blk_aio_read_entry(void *opaque) + } + + assert(qiov->size == acb->bytes); +- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, ++ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, + qiov, rwco->flags); + blk_aio_complete(acb); + } +@@ -1412,8 +1443,8 @@ static void blk_aio_write_entry(void *opaque) + } + + assert(!qiov || qiov->size == acb->bytes); +- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, +- qiov, rwco->flags); ++ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, ++ qiov, 0, rwco->flags); + blk_aio_complete(acb); + } + +@@ -1498,7 +1529,9 @@ void blk_aio_cancel_async(BlockAIOCB *acb) + bdrv_aio_cancel_async(acb); + } + +-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) + { + blk_wait_while_drained(blk); + +@@ -1514,8 +1547,7 @@ static void blk_ioctl_entry(void *opaque) + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, +- qiov->iov[0].iov_base); ++ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); + aio_wait_kick(); + } + +@@ -1529,7 +1561,7 @@ static void blk_aio_ioctl_entry(void *opaque) + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + +- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); ++ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); + + blk_aio_complete(acb); + } +@@ -1540,7 +1572,9 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); + } + +-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn ++blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) + { + int ret; + +@@ -1559,7 +1593,7 @@ static void blk_aio_pdiscard_entry(void *opaque) + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); ++ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); + blk_aio_complete(acb); + } + +@@ -1571,12 +1605,23 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, + cb, opaque); + } + ++int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_pdiscard(blk, offset, bytes); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ + static void blk_pdiscard_entry(void *opaque) + { + BlkRwCo *rwco = opaque; + QEMUIOVector *qiov = rwco->iobuf; + +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); ++ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); + aio_wait_kick(); + } + +@@ -1585,7 +1630,8 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) + return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); + } + +-int blk_co_flush(BlockBackend *blk) ++/* To be called between exactly one pair of blk_inc/dec_in_flight() */ ++static int coroutine_fn blk_do_flush(BlockBackend *blk) + { + blk_wait_while_drained(blk); + +@@ -1601,7 +1647,7 @@ static void blk_aio_flush_entry(void *opaque) + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + +- rwco->ret = blk_co_flush(rwco->blk); ++ rwco->ret = blk_do_flush(rwco->blk); + blk_aio_complete(acb); + } + +@@ -1611,10 +1657,21 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, + return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); + } + ++int coroutine_fn blk_co_flush(BlockBackend *blk) ++{ ++ int ret; ++ ++ blk_inc_in_flight(blk); ++ ret = blk_do_flush(blk); ++ blk_dec_in_flight(blk); ++ ++ return ret; ++} ++ + static void blk_flush_entry(void *opaque) + { + BlkRwCo *rwco = opaque; +- rwco->ret = blk_co_flush(rwco->blk); ++ rwco->ret = blk_do_flush(rwco->blk); + aio_wait_kick(); + } + +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index b198dec..9bbdbd6 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -171,7 +171,6 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, + BlockCompletionFunc *cb, void *opaque); + void blk_aio_cancel(BlockAIOCB *acb); + void blk_aio_cancel_async(BlockAIOCB *acb); +-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); + BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); +-- +1.8.3.1 + diff --git a/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch b/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch new file mode 100644 index 0000000..9d49cfa --- /dev/null +++ b/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch @@ -0,0 +1,158 @@ +From 6cc456c4c1e6557fdc7e138e8ef8171b71609222 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:15 +0100 +Subject: [PATCH 4/6] block-backend: Reorder flush/pdiscard function + definitions + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-5-kwolf@redhat.com> +Patchwork-id: 94598 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] block-backend: Reorder flush/pdiscard function definitions +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +Move all variants of the flush/pdiscard functions to a single place and +put the blk_co_*() version first because it is called by all other +variants (and will become static in the next patch). + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +Message-Id: <20200407121259.21350-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 564806c529d4e0acad209b1e5b864a8886092f1f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/block-backend.c | 92 +++++++++++++++++++++++++-------------------------- + 1 file changed, 46 insertions(+), 46 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 8b8f2a8..17b2e87 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1488,38 +1488,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, + blk_aio_write_entry, flags, cb, opaque); + } + +-static void blk_aio_flush_entry(void *opaque) +-{ +- BlkAioEmAIOCB *acb = opaque; +- BlkRwCo *rwco = &acb->rwco; +- +- rwco->ret = blk_co_flush(rwco->blk); +- blk_aio_complete(acb); +-} +- +-BlockAIOCB *blk_aio_flush(BlockBackend *blk, +- BlockCompletionFunc *cb, void *opaque) +-{ +- return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); +-} +- +-static void blk_aio_pdiscard_entry(void *opaque) +-{ +- BlkAioEmAIOCB *acb = opaque; +- BlkRwCo *rwco = &acb->rwco; +- +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); +- blk_aio_complete(acb); +-} +- +-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, +- int64_t offset, int bytes, +- BlockCompletionFunc *cb, void *opaque) +-{ +- return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, +- cb, opaque); +-} +- + void blk_aio_cancel(BlockAIOCB *acb) + { + bdrv_aio_cancel(acb); +@@ -1586,6 +1554,37 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) + return bdrv_co_pdiscard(blk->root, offset, bytes); + } + ++static void blk_aio_pdiscard_entry(void *opaque) ++{ ++ BlkAioEmAIOCB *acb = opaque; ++ BlkRwCo *rwco = &acb->rwco; ++ ++ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); ++ blk_aio_complete(acb); ++} ++ ++BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, ++ int64_t offset, int bytes, ++ BlockCompletionFunc *cb, void *opaque) ++{ ++ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, ++ cb, opaque); ++} ++ ++static void blk_pdiscard_entry(void *opaque) ++{ ++ BlkRwCo *rwco = opaque; ++ QEMUIOVector *qiov = rwco->iobuf; ++ ++ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); ++ aio_wait_kick(); ++} ++ ++int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) ++{ ++ return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); ++} ++ + int blk_co_flush(BlockBackend *blk) + { + blk_wait_while_drained(blk); +@@ -1597,6 +1596,21 @@ int blk_co_flush(BlockBackend *blk) + return bdrv_co_flush(blk_bs(blk)); + } + ++static void blk_aio_flush_entry(void *opaque) ++{ ++ BlkAioEmAIOCB *acb = opaque; ++ BlkRwCo *rwco = &acb->rwco; ++ ++ rwco->ret = blk_co_flush(rwco->blk); ++ blk_aio_complete(acb); ++} ++ ++BlockAIOCB *blk_aio_flush(BlockBackend *blk, ++ BlockCompletionFunc *cb, void *opaque) ++{ ++ return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); ++} ++ + static void blk_flush_entry(void *opaque) + { + BlkRwCo *rwco = opaque; +@@ -2083,20 +2097,6 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, + return bdrv_truncate(blk->root, offset, exact, prealloc, errp); + } + +-static void blk_pdiscard_entry(void *opaque) +-{ +- BlkRwCo *rwco = opaque; +- QEMUIOVector *qiov = rwco->iobuf; +- +- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); +- aio_wait_kick(); +-} +- +-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) +-{ +- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); +-} +- + int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, + int64_t pos, int size) + { +-- +1.8.3.1 + diff --git a/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch b/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch new file mode 100644 index 0000000..e38428b --- /dev/null +++ b/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch @@ -0,0 +1,213 @@ +From 3f16b8a33bd7503cbe857fbeb45fff7301b6bb5f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:12 +0100 +Subject: [PATCH 1/6] job: take each job's lock individually in job_txn_apply + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-2-kwolf@redhat.com> +Patchwork-id: 94597 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] job: take each job's lock individually in job_txn_apply +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +From: Stefan Reiter + +All callers of job_txn_apply hold a single job's lock, but different +jobs within a transaction can have different contexts, thus we need to +lock each one individually before applying the callback function. + +Similar to job_completed_txn_abort this also requires releasing the +caller's context before and reacquiring it after to avoid recursive +locks which might break AIO_WAIT_WHILE in the callback. This is safe, since +existing code would already have to take this into account, lest +job_completed_txn_abort might have broken. + +This also brings to light a different issue: When a callback function in +job_txn_apply moves it's job to a different AIO context, callers will +try to release the wrong lock (now that we re-acquire the lock +correctly, previously it would just continue with the old lock, leaving +the job unlocked for the rest of the return path). Fix this by not caching +the job's context. + +This is only necessary for qmp_block_job_finalize, qmp_job_finalize and +job_exit, since everyone else calls through job_exit. + +One test needed adapting, since it calls job_finalize directly, so it +manually needs to acquire the correct context. + +Signed-off-by: Stefan Reiter +Message-Id: <20200407115651.69472-2-s.reiter@proxmox.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit b660a84bbb0eb1a76b505648d31d5e82594fb75e) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + blockdev.c | 9 +++++++++ + job-qmp.c | 9 +++++++++ + job.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- + tests/test-blockjob.c | 2 ++ + 4 files changed, 60 insertions(+), 10 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index c8d4b51..86eb115 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -4215,7 +4215,16 @@ void qmp_block_job_finalize(const char *id, Error **errp) + } + + trace_qmp_block_job_finalize(job); ++ job_ref(&job->job); + job_finalize(&job->job, errp); ++ ++ /* ++ * Job's context might have changed via job_finalize (and job_txn_apply ++ * automatically acquires the new one), so make sure we release the correct ++ * one. ++ */ ++ aio_context = blk_get_aio_context(job->blk); ++ job_unref(&job->job); + aio_context_release(aio_context); + } + +diff --git a/job-qmp.c b/job-qmp.c +index fbfed25..a201220 100644 +--- a/job-qmp.c ++++ b/job-qmp.c +@@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp) + } + + trace_qmp_job_finalize(job); ++ job_ref(job); + job_finalize(job, errp); ++ ++ /* ++ * Job's context might have changed via job_finalize (and job_txn_apply ++ * automatically acquires the new one), so make sure we release the correct ++ * one. ++ */ ++ aio_context = job->aio_context; ++ job_unref(job); + aio_context_release(aio_context); + } + +diff --git a/job.c b/job.c +index 04409b4..48fc4ad 100644 +--- a/job.c ++++ b/job.c +@@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job) + } + } + +-static int job_txn_apply(JobTxn *txn, int fn(Job *)) ++static int job_txn_apply(Job *job, int fn(Job *)) + { +- Job *job, *next; ++ AioContext *inner_ctx; ++ Job *other_job, *next; ++ JobTxn *txn = job->txn; + int rc = 0; + +- QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { +- rc = fn(job); ++ /* ++ * Similar to job_completed_txn_abort, we take each job's lock before ++ * applying fn, but since we assume that outer_ctx is held by the caller, ++ * we need to release it here to avoid holding the lock twice - which would ++ * break AIO_WAIT_WHILE from within fn. ++ */ ++ job_ref(job); ++ aio_context_release(job->aio_context); ++ ++ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { ++ inner_ctx = other_job->aio_context; ++ aio_context_acquire(inner_ctx); ++ rc = fn(other_job); ++ aio_context_release(inner_ctx); + if (rc) { + break; + } + } ++ ++ /* ++ * Note that job->aio_context might have been changed by calling fn, so we ++ * can't use a local variable to cache it. ++ */ ++ aio_context_acquire(job->aio_context); ++ job_unref(job); + return rc; + } + +@@ -774,11 +795,11 @@ static void job_do_finalize(Job *job) + assert(job && job->txn); + + /* prepare the transaction to complete */ +- rc = job_txn_apply(job->txn, job_prepare); ++ rc = job_txn_apply(job, job_prepare); + if (rc) { + job_completed_txn_abort(job); + } else { +- job_txn_apply(job->txn, job_finalize_single); ++ job_txn_apply(job, job_finalize_single); + } + } + +@@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job) + assert(other_job->ret == 0); + } + +- job_txn_apply(txn, job_transition_to_pending); ++ job_txn_apply(job, job_transition_to_pending); + + /* If no jobs need manual finalization, automatically do so */ +- if (job_txn_apply(txn, job_needs_finalize) == 0) { ++ if (job_txn_apply(job, job_needs_finalize) == 0) { + job_do_finalize(job); + } + } +@@ -849,9 +870,10 @@ static void job_completed(Job *job) + static void job_exit(void *opaque) + { + Job *job = (Job *)opaque; +- AioContext *ctx = job->aio_context; ++ AioContext *ctx; + +- aio_context_acquire(ctx); ++ job_ref(job); ++ aio_context_acquire(job->aio_context); + + /* This is a lie, we're not quiescent, but still doing the completion + * callbacks. However, completion callbacks tend to involve operations that +@@ -862,6 +884,14 @@ static void job_exit(void *opaque) + + job_completed(job); + ++ /* ++ * Note that calling job_completed can move the job to a different ++ * aio_context, so we cannot cache from above. job_txn_apply takes care of ++ * acquiring the new lock, and we ref/unref to avoid job_completed freeing ++ * the job underneath us. ++ */ ++ ctx = job->aio_context; ++ job_unref(job); + aio_context_release(ctx); + } + +diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c +index 7844c9f..6d857fd 100644 +--- a/tests/test-blockjob.c ++++ b/tests/test-blockjob.c +@@ -368,7 +368,9 @@ static void test_cancel_concluded(void) + aio_poll(qemu_get_aio_context(), true); + assert(job->status == JOB_STATUS_PENDING); + ++ aio_context_acquire(job->aio_context); + job_finalize(job, &error_abort); ++ aio_context_release(job->aio_context); + assert(job->status == JOB_STATUS_CONCLUDED); + + cancel_common(s); +-- +1.8.3.1 + diff --git a/kvm-replication-assert-we-own-context-before-job_cancel_.patch b/kvm-replication-assert-we-own-context-before-job_cancel_.patch new file mode 100644 index 0000000..09ef4de --- /dev/null +++ b/kvm-replication-assert-we-own-context-before-job_cancel_.patch @@ -0,0 +1,57 @@ +From 46887feac666d0d7633ff3f5af5721fe2a80a8ab Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 8 Apr 2020 17:29:13 +0100 +Subject: [PATCH 2/6] replication: assert we own context before job_cancel_sync + +RH-Author: Kevin Wolf +Message-id: <20200408172917.18712-3-kwolf@redhat.com> +Patchwork-id: 94595 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] replication: assert we own context before job_cancel_sync +Bugzilla: 1817621 +RH-Acked-by: Eric Blake +RH-Acked-by: Danilo de Paula +RH-Acked-by: Max Reitz + +From: Stefan Reiter + +job_cancel_sync requires the job's lock to be held, all other callers +already do this (replication_stop, drive_backup_abort, +blockdev_backup_abort, job_cancel_sync_all, cancel_common). + +In this case we're in a BlockDriver handler, so we already have a lock, +just assert that it is the same as the one used for the commit_job. + +Signed-off-by: Stefan Reiter +Message-Id: <20200407115651.69472-3-s.reiter@proxmox.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 08558e33257ec796594bd411261028a93414a70c) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/replication.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/block/replication.c b/block/replication.c +index 99532ce..0ce27ee 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -144,12 +144,15 @@ fail: + static void replication_close(BlockDriverState *bs) + { + BDRVReplicationState *s = bs->opaque; ++ Job *commit_job; + + if (s->stage == BLOCK_REPLICATION_RUNNING) { + replication_stop(s->rs, false, NULL); + } + if (s->stage == BLOCK_REPLICATION_FAILOVER) { +- job_cancel_sync(&s->commit_job->job); ++ commit_job = &s->commit_job->job; ++ assert(commit_job->aio_context == qemu_get_current_aio_context()); ++ job_cancel_sync(commit_job); + } + + if (s->mode == REPLICATION_MODE_SECONDARY) { +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 1d159a1..9246507 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 17%{?dist} +Release: 18%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -629,6 +629,18 @@ Patch239: kvm-block-trickle-down-the-fallback-image-creation-funct.patch Patch240: kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch # For bz#1794692 - Mirror block job stops making progress Patch241: kvm-mirror-Wait-only-for-in-flight-operations.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch242: kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch243: kvm-replication-assert-we-own-context-before-job_cancel_.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch244: kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch245: kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch246: kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch +# For bz#1817621 - Crash and deadlock with block jobs when using io-threads +Patch247: kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch BuildRequires: wget BuildRequires: rpm-build @@ -1562,6 +1574,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Apr 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-18.el8_2 +- kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch [bz#1817621] +- kvm-replication-assert-we-own-context-before-job_cancel_.patch [bz#1817621] +- kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch [bz#1817621] +- kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch [bz#1817621] +- kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch [bz#1817621] +- kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch [bz#1817621] +- Resolves: bz#1817621 + (Crash and deadlock with block jobs when using io-threads) + * Mon Mar 30 2020 Danilo Cesar Lemes de Paula - 4.2.0-17.el8 - kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch [bz#1816007] - kvm-block-trickle-down-the-fallback-image-creation-funct.patch [bz#1816007] From b73c686441f58546489af585204c461cb79c2615 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 14 Apr 2020 22:40:52 +0100 Subject: [PATCH 075/195] * Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 - kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] - Resolves: bz#1822682 (QEMU-4.2 fails to start a VM on Azure) --- ...ot-set-unsupported-VMX-secondary-exe.patch | 112 ++++++++++++++++++ qemu-kvm.spec | 9 +- 2 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch diff --git a/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch b/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch new file mode 100644 index 0000000..4c2362d --- /dev/null +++ b/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch @@ -0,0 +1,112 @@ +From 77cdcccc49ba988e3b5bcb66decdee2e99fdcd72 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Tue, 14 Apr 2020 15:00:36 +0100 +Subject: [PATCH] target/i386: do not set unsupported VMX secondary execution + controls + +RH-Author: Vitaly Kuznetsov +Message-id: <20200414150036.625732-2-vkuznets@redhat.com> +Patchwork-id: 94674 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/i386: do not set unsupported VMX secondary execution controls +Bugzilla: 1822682 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Paolo Bonzini + +Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for +secondary execution controls") added a workaround for KVM pre-dating +commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm +KVM_GET_MSRS") which wasn't setting certain available controls. The +workaround uses generic CPUID feature bits to set missing VMX controls. + +It was found that in some cases it is possible to observe hosts which +have certain CPUID features but lack the corresponding VMX control. + +In particular, it was reported that Azure VMs have RDSEED but lack +VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature +bit result in QEMU abort. + +Resolve the issue but not applying the workaround when we don't have +to. As there is no good way to find out if KVM has the fix itself, use +95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead +as these [are supposed to] come together. + +Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls") +Suggested-by: Paolo Bonzini +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20200331162752.1209928-1-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4a910e1f6ab4155ec8b24c49b2585cc486916985) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 41 ++++++++++++++++++++++++++--------------- + 1 file changed, 26 insertions(+), 15 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 99840ca..fcc8f7d 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -106,6 +106,7 @@ static bool has_msr_arch_capabs; + static bool has_msr_core_capabs; + static bool has_msr_vmx_vmfunc; + static bool has_msr_ucode_rev; ++static bool has_msr_vmx_procbased_ctls2; + + static uint32_t has_architectural_pmu_version; + static uint32_t num_architectural_pmu_gp_counters; +@@ -490,21 +491,28 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) + value = msr_data.entries[0].data; + switch (index) { + case MSR_IA32_VMX_PROCBASED_CTLS2: +- /* KVM forgot to add these bits for some time, do this ourselves. */ +- if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; ++ if (!has_msr_vmx_procbased_ctls2) { ++ /* KVM forgot to add these bits for some time, do this ourselves. */ ++ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & ++ CPUID_XSAVE_XSAVES) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & ++ CPUID_EXT_RDRAND) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & ++ CPUID_7_0_EBX_INVPCID) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & ++ CPUID_7_0_EBX_RDSEED) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & ++ CPUID_EXT2_RDTSCP) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; ++ } + } + /* fall through */ + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: +@@ -2060,6 +2068,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_UCODE_REV: + has_msr_ucode_rev = true; + break; ++ case MSR_IA32_VMX_PROCBASED_CTLS2: ++ has_msr_vmx_procbased_ctls2 = true; ++ break; + } + } + } +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 9246507..4f9fc85 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,7 +67,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 4.2.0 -Release: 18%{?dist} +Release: 19%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -641,6 +641,8 @@ Patch245: kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch Patch246: kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch # For bz#1817621 - Crash and deadlock with block jobs when using io-threads Patch247: kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch +# For bz#1822682 - QEMU-4.2 fails to start a VM on Azure +Patch248: kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch BuildRequires: wget BuildRequires: rpm-build @@ -1574,6 +1576,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 +- kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] +- Resolves: bz#1822682 + (QEMU-4.2 fails to start a VM on Azure) + * Thu Apr 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-18.el8_2 - kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch [bz#1817621] - kvm-replication-assert-we-own-context-before-job_cancel_.patch [bz#1817621] From d0bd08660b2bc86044c6a1e8dc38d2b6233ecb75 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 12 May 2020 21:03:43 -0400 Subject: [PATCH 077/195] temporary rebase for 8.3.0 --- .gitignore | 14 +- ...at-Adding-slirp-to-the-exploded-tree.patch | 16975 ++++++++++++++++ 0005-Initial-redhat-build.patch | 136 +- 0006-Enable-disable-devices-for-RHEL.patch | 428 +- ...Machine-type-related-general-changes.patch | 286 +- 0008-Add-aarch64-machine-types.patch | 58 +- 0009-Add-ppc64-machine-types.patch | 57 +- 0010-Add-s390x-machine-types.patch | 23 +- 0011-Add-x86_64-machine-types.patch | 110 +- 0012-Enable-make-check.patch | 375 +- ...mber-of-devices-that-can-be-assigned.patch | 11 +- ...Add-support-statement-to-help-output.patch | 19 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 28 +- 0016-Add-support-for-simpletrace.patch | 25 +- ...documentation-instead-of-qemu-system.patch | 3769 +++- 0018-usb-xhci-Fix-PCI-capability-order.patch | 11 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 8 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 6 +- ...er-m_free-might-read-pointers-from-a.patch | 61 - ...x-blockdev-reopen-API-with-feature-f.patch | 9 +- ...able-nested-PERF_GLOBAL_CTRL-MSR-sup.patch | 53 - ...ate-dirty_bmap-when-we-change-a-slot.patch | 115 - ...n-t-let-an-operation-wait-for-itself.patch | 121 - ...sd-fix-memory-leak-on-fuse-queueinfo.patch | 63 - ...it-APIC-ID-for-migration-instance-ID.patch | 62 - ...-acquire-aio_context-in-backup_clean.patch | 57 - kvm-backup-top-Begin-drain-earlier.patch | 56 - ...ecursively-even-for-already-active-n.patch | 116 - ..._flight-during-blk_wait_while_draine.patch | 84 - ...x-cross-AioContext-blockdev-snapshot.patch | 91 - ...ix-leak-in-bdrv_create_file_fallback.patch | 60 - ...block-Generic-file-creation-fallback.patch | 227 - ...B.in_flight-for-coroutine-and-sync-i.patch | 295 - ...troduce-bdrv_reopen_commit_post-step.patch | 65 - ...Make-bdrv_get_cumulative_perm-public.patch | 67 - ...x-restrictions-for-blockdev-snapshot.patch | 117 - ...order-flush-pdiscard-function-defini.patch | 158 - ...-Don-t-acquire-context-while-droppin.patch | 130 - ...n-with-backing-file-in-different-Aio.patch | 114 - kvm-block-nbd-Fix-hang-in-.bdrv_close.patch | 78 - ...Driver-reference-to-the-.bdrv_co_cre.patch | 328 - ...-bitmap-reopen-into-bdrv_reopen_comm.patch | 78 - ...wn-the-fallback-image-creation-funct.patch | 296 - ...-AioContext-on-dirty-bitmap-function.patch | 176 - ...bs-to-the-proper-context-on-snapshot.patch | 107 - ...ing-style-issues-in-drive_backup_pre.patch | 62 - ...drv_try_set_aio_context-context-requ.patch | 204 - ...mp_blockdev_backup-and-blockdev-back.patch | 144 - ...mp_drive_backup-and-drive-backup-tra.patch | 419 - ...me-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch | 137 - ...do-not-make-qemu-ga-link-with-pixman.patch | 2463 --- ...ost-user-Protect-slave-fd-with-mutex.patch | 134 - ...tures-Make-kvm-no-adjvtime-comment-c.patch | 56 - ...-Free-rom-data-during-inmigrate-skip.patch | 85 - kvm-file-posix-Drop-hdev_co_create_opts.patch | 131 - ...ow-using-qdev-ID-for-qemu-io-command.patch | 100 - ...w-default-SMBIOS-fields-for-Windows-.patch | 262 - kvm-i386-Remove-cpu64-rhel6-CPU-model.patch | 77 - ...-Resolve-CPU-models-to-v1-by-default.patch | 95 - kvm-iotests-Add-iothread-cases-to-155.patch | 147 - ...Add-test-for-image-creation-fallback.patch | 138 - kvm-iotests-Create-VM.blockdev_create.patch | 59 - ...tests-Fix-run_job-with-use_log-False.patch | 47 - ...r-blockdev-reopen-test-for-iothreads.patch | 122 - ...ests-Support-job-complete-in-run_job.patch | 46 - ...Test-external-snapshot-with-VM-state.patch | 189 - ...dling-of-AioContexts-with-some-block.patch | 322 - ...ror-with-temporarily-disabled-target.patch | 162 - ...iotests-Use-complete_and_wait-in-155.patch | 50 - ...py-Let-wait_migration-wait-even-more.patch | 123 - ...count-from-GET-LBA-STATUS-CVE-2020-1.patch | 79 - kvm-iscsi-Drop-iscsi_co_create_opts.patch | 113 - ...b-s-lock-individually-in-job_txn_app.patch | 213 - ...t-user-Fix-some-memtable-remap-cases.patch | 117 - ...-SaveStateEntry.instance_id-into-uin.patch | 179 - ...igration-Create-migration_is_running.patch | 119 - ...ation-Define-VMSTATE_INSTANCE_ID_ANY.patch | 257 - ...n-Don-t-send-data-if-we-have-stopped.patch | 42 - ...ure-that-we-don-t-call-write-in-case.patch | 94 - ...VM-is-paused-when-migration-is-cance.patch | 70 - ...gration-Rate-limit-inside-host-pages.patch | 172 - ...ifd-clean-pages-after-filling-packet.patch | 65 - ...d-fix-destroyed-mutex-access-in-term.patch | 77 - ...d-fix-nullptr-access-in-multifd_send.patch | 75 - ...d-fix-nullptr-access-in-terminating-.patch | 68 - ...n-t-let-an-operation-wait-for-itself.patch | 123 - ...-Store-MirrorOp.co-for-debuggability.patch | 51 - ...r-Wait-only-for-in-flight-operations.patch | 95 - ...e-that-we-don-t-do-any-IO-after-an-e.patch | 74 - ...-external-interrupt-pin-in-KVM-on-re.patch | 107 - ...UPPCState-irq_input_state-with-moder.patch | 112 - ...rite-only-overlay-feature-for-blockd.patch | 64 - ...alloc_cluster_abort-for-external-dat.patch | 52 - ...qemu-file-Don-t-do-IO-after-shutdown.patch | 92 - ...rt-we-own-context-before-job_cancel_.patch | 57 - ...ct-size-while-emulating-IRC-commands.patch | 77 - ...orrect-size-while-emulating-commands.patch | 71 - ...ger-a-CAS-reboot-for-XICS-XIVE-mode-.patch | 113 - ....3-accelerated-count-cache-flush-in-.patch | 135 - kvm-target-arm-arch_dump-Add-SVE-notes.patch | 298 - ...Add-the-kvm-no-adjvtime-CPU-property.patch | 281 - ...vm-Implement-virtual-time-adjustment.patch | 330 - ...rivial-Clean-up-header-documentation.patch | 197 - ...vm64-kvm64-cpus-have-timer-registers.patch | 60 - ...or-query-cpu-model-expansion-crashed.patch | 81 - ...target-i386-add-a-ucode-rev-property.patch | 125 - ...k-for-availability-of-MSR_IA32_UCODE.patch | 72 - ...ot-set-unsupported-VMX-secondary-exe.patch | 112 - ...le-monitor-and-ucode-revision-with-c.patch | 49 - ...target-i386-fix-TCG-UCODE_REV-access.patch | 73 - ...m-initialize-feature-MSRs-very-early.patch | 178 - ...initialize-microcode-revision-from-K.patch | 64 - kvm-tcp_emu-Fix-oob-access.patch | 59 - kvm-tcp_emu-fix-unsafe-snprintf-usages.patch | 149 - ...eatures-Check-feature-default-values.patch | 106 - ...fuse_lowlevel-Fix-fuse_out_header-er.patch | 55 - kvm-tpm-ppi-page-align-PPI-RAM.patch | 58 - ...e-update-qemu-trace-stap-to-Python-3.patch | 82 - ...-Prevent-recursion-in-usbredir_write.patch | 106 - kvm-util-add-slirp_fmt-helpers.patch | 140 - ...emove-irqchip-notifier-if-not-regist.patch | 58 - ...Add-names-to-section-rounded-warning.patch | 53 - ...t-Only-align-sections-for-vhost-user.patch | 97 - kvm-vhost-coding-style-fix.patch | 56 - ...Print-unexpected-slave-message-types.patch | 48 - ...host-user-fs-remove-vhostfd-property.patch | 59 - ...st-user-gpu-Drop-trailing-json-comma.patch | 52 - ...ility-to-delete-vq-through-a-pointer.patch | 80 - ...-enable-notifications-during-polling.patch | 158 - ...io-fs-fix-MSI-X-nvectors-calculation.patch | 60 - ...-make-virtio_delete_queue-idempotent.patch | 42 - ...e-also-control-queue-when-TX-RX-dele.patch | 49 - ...-region-cache-when-on-queue-deletion.patch | 46 - kvm-virtiofs-Add-maintainers-entry.patch | 52 - ...-to-the-log-with-FUSE_LOG_DEBUG-leve.patch | 86 - ...akefile-wiring-for-virtiofsd-contrib.patch | 106 - kvm-virtiofsd-Add-auxiliary-.c-s.patch | 1387 -- kvm-virtiofsd-Add-fuse_lowlevel.c.patch | 3172 --- kvm-virtiofsd-Add-main-virtio-loop.patch | 105 - kvm-virtiofsd-Add-options-for-virtio.patch | 103 - kvm-virtiofsd-Add-passthrough_ll.patch | 1387 -- ...mestamp-to-the-log-with-FUSE_LOG_DEB.patch | 73 - ...virtiofsd-Clean-up-inodes-on-destroy.patch | 85 - ...t-lo_destroy-to-take-the-lo-mutex-lo.patch | 112 - ...op-CAP_FSETID-if-client-asked-for-it.patch | 176 - kvm-virtiofsd-Fast-path-for-virtio-read.patch | 240 - ...mmon-header-and-define-for-QEMU-buil.patch | 164 - ...ta-corruption-with-O_APPEND-write-in.patch | 136 - ...fuse_daemonize-ignored-return-values.patch | 120 - kvm-virtiofsd-Fix-xattr-operations.patch | 327 - ...-Format-imported-files-to-qemu-style.patch | 14743 -------------- kvm-virtiofsd-Handle-hard-reboot.patch | 65 - kvm-virtiofsd-Handle-reinit.patch | 53 - kvm-virtiofsd-Keep-track-of-replies.patch | 116 - ...Kill-threads-when-queues-are-stopped.patch | 143 - ...sync-work-even-if-only-inode-is-pass.patch | 96 - ...vhost-connection-instead-of-mounting.patch | 257 - ...ofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch | 76 - ...Pass-write-iov-s-all-the-way-through.patch | 140 - ...-fuse_bufvec-through-to-do_write_buf.patch | 168 - kvm-virtiofsd-Poll-kick_fd-for-queue.patch | 97 - ...t-multiply-running-with-same-vhost_u.patch | 144 - kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch | 945 - kvm-virtiofsd-Pull-in-upstream-headers.patch | 4911 ----- kvm-virtiofsd-Remove-fuse_req_getgroups.patch | 193 - ...move-unused-enum-fuse_buf_copy_flags.patch | 271 - ...Reset-O_DIRECT-flag-during-file-open.patch | 72 - kvm-virtiofsd-Send-replies-to-messages.patch | 199 - kvm-virtiofsd-Start-queue-threads.patch | 165 - ...sd-Start-reading-commands-from-queue.patch | 200 - ...virtiofsd-Start-wiring-up-vhost-user.patch | 247 - ...virtiofsd-Support-remote-posix-locks.patch | 355 - kvm-virtiofsd-Trim-down-imported-files.patch | 1582 -- ...irtiofsd-Trim-out-compatibility-code.patch | 545 - ...sd-add-definition-of-fuse_buf_writev.patch | 93 - ...iofsd-add-fd-FDNUM-fd-passing-option.patch | 170 - kvm-virtiofsd-add-fuse_mbuf_iter-API.patch | 134 - ...iofsd-add-helper-for-lo_data-cleanup.patch | 88 - ...fsd-add-o-source-PATH-to-help-output.patch | 46 - ...tiofsd-add-print-capabilities-option.patch | 121 - kvm-virtiofsd-add-seccomp-whitelist.patch | 285 - ...add-some-options-to-the-help-message.patch | 74 - ...iofsd-add-syslog-command-line-option.patch | 239 - ...ofsd-add-thread-pool-size-NUM-option.patch | 106 - kvm-virtiofsd-add-vhost-user.json-file.patch | 73 - kvm-virtiofsd-cap-ng-helpers.patch | 175 - ...input-buffer-size-in-fuse_lowlevel.c.patch | 1111 - ...fsd-cleanup-allocated-resource-in-se.patch | 82 - ...t-more-fprintf-and-perror-to-use-fus.patch | 99 - ...d-do-not-always-set-FUSE_FLOCK_LOCKS.patch | 57 - ...virtiofsd-do_read-missing-NULL-check.patch | 49 - ...d-enable-PARALLEL_DIROPS-during-INIT.patch | 47 - ...ract-root-inode-init-into-setup_root.patch | 111 - ...hen-parent-inode-isn-t-known-in-lo_d.patch | 85 - ...virtiofsd-fix-error-handling-in-main.patch | 63 - ...correct-error-handling-in-lo_do_look.patch | 44 - ...tiofsd-fix-libfuse-information-leaks.patch | 322 - ...tiofsd-fix-lo_destroy-resource-leaks.patch | 94 - ...rtiofsd-fix-memory-leak-on-lo.source.patch | 66 - ...ate_listen_socket-error-path-socket-.patch | 56 - ...virtiofsd-get-set-features-callbacks.patch | 66 - ...uce-inode-refcount-to-prevent-use-af.patch | 589 - kvm-virtiofsd-load_capng-missing-unlock.patch | 46 - ...tiofsd-make-f-foreground-the-default.patch | 76 - kvm-virtiofsd-make-lo_release-atomic.patch | 62 - ...irtiofsd-move-to-a-new-pid-namespace.patch | 223 - ...d-move-to-an-empty-network-namespace.patch | 66 - ...rough_ll-Pass-errno-to-fuse_reply_er.patch | 54 - ...rough_ll-Use-cache_readdir-for-direc.patch | 48 - ...rough_ll-add-dirp_map-to-hide-lo_dir.patch | 238 - ...through_ll-add-fallback-for-racy-ops.patch | 303 - ...rough_ll-add-fd_map-to-hide-file-des.patch | 328 - ...rough_ll-add-ino_map-to-hide-lo_inod.patch | 395 - ...rough_ll-add-lo_map-for-ino-fh-indir.patch | 182 - ...passthrough_ll-add-renameat2-support.patch | 52 - ...rough_ll-clean-up-cache-related-opti.patch | 138 - ...hrough_ll-cleanup-getxattr-listxattr.patch | 154 - ...d-passthrough_ll-control-readdirplus.patch | 79 - ...rough_ll-create-new-files-in-caller-.patch | 198 - ...rough_ll-disable-readdirplus-on-cach.patch | 50 - ...rough_ll-fix-refcounting-on-remove-r.patch | 143 - ...rtiofsd-passthrough_ll-use-hashtable.patch | 211 - ...fsd-prevent-.-escape-in-lo_do_lookup.patch | 54 - ...sd-prevent-.-escape-in-lo_do_readdir.patch | 108 - ...prevent-FUSE_INIT-FUSE_DESTROY-races.patch | 103 - ...t-fv_queue_thread-vs-virtio_loop-rac.patch | 149 - ...iofsd-prevent-races-with-lo_dirp_put.patch | 147 - ...log-only-when-priority-is-high-enoug.patch | 469 - ...sd-process-requests-in-a-thread-pool.patch | 533 - ...fsd-remove-mountpoint-dummy-argument.patch | 159 - ...d-remove-unused-notify-reply-support.patch | 294 - ...name-inode-refcount-to-inode-nlookup.patch | 139 - ...-unref_inode-to-unref_inode_lolocked.patch | 94 - kvm-virtiofsd-sandbox-mount-namespace.patch | 166 - ...ofsd-set-maximum-RLIMIT_NOFILE-limit.patch | 93 - ...ll-queue-threads-on-exit-in-virtio_l.patch | 72 - ...t-nanosecond-resolution-for-file-tim.patch | 83 - ...se_buf_writev-to-replace-fuse_buf_wr.patch | 82 - ...se_lowlevel_is_virtio-in-fuse_sessio.patch | 56 - ...-proc-self-fd-O_PATH-file-descriptor.patch | 390 - ...te-input-buffer-sizes-in-do_write_bu.patch | 137 - kvm-virtiofsd-validate-path-components.patch | 164 - ...-passthrough_ll-fix-fallocate-ifdefs.patch | 56 - kvm-xhci-recheck-slot-status.patch | 77 - kvm-xics-Don-t-deassert-outputs.patch | 52 - kvm.modules | 18 - qemu-kvm.spec | 836 +- sources | 2 +- 248 files changed, 21648 insertions(+), 61883 deletions(-) create mode 100644 0001-redhat-Adding-slirp-to-the-exploded-tree.patch delete mode 100644 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch rename kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch => 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch (90%) delete mode 100644 kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch delete mode 100644 kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch delete mode 100644 kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch delete mode 100644 kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch delete mode 100644 kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch delete mode 100644 kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch delete mode 100644 kvm-backup-top-Begin-drain-earlier.patch delete mode 100644 kvm-block-Activate-recursively-even-for-already-active-n.patch delete mode 100644 kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch delete mode 100644 kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch delete mode 100644 kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch delete mode 100644 kvm-block-Generic-file-creation-fallback.patch delete mode 100644 kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch delete mode 100644 kvm-block-Introduce-bdrv_reopen_commit_post-step.patch delete mode 100644 kvm-block-Make-bdrv_get_cumulative_perm-public.patch delete mode 100644 kvm-block-Relax-restrictions-for-blockdev-snapshot.patch delete mode 100644 kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch delete mode 100644 kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch delete mode 100644 kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch delete mode 100644 kvm-block-nbd-Fix-hang-in-.bdrv_close.patch delete mode 100644 kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch delete mode 100644 kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch delete mode 100644 kvm-block-trickle-down-the-fallback-image-creation-funct.patch delete mode 100644 kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch delete mode 100644 kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch delete mode 100644 kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch delete mode 100644 kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch delete mode 100644 kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch delete mode 100644 kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch delete mode 100644 kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch delete mode 100644 kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch delete mode 100644 kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch delete mode 100644 kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch delete mode 100644 kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch delete mode 100644 kvm-file-posix-Drop-hdev_co_create_opts.patch delete mode 100644 kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch delete mode 100644 kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch delete mode 100644 kvm-i386-Remove-cpu64-rhel6-CPU-model.patch delete mode 100644 kvm-i386-Resolve-CPU-models-to-v1-by-default.patch delete mode 100644 kvm-iotests-Add-iothread-cases-to-155.patch delete mode 100644 kvm-iotests-Add-test-for-image-creation-fallback.patch delete mode 100644 kvm-iotests-Create-VM.blockdev_create.patch delete mode 100644 kvm-iotests-Fix-run_job-with-use_log-False.patch delete mode 100644 kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch delete mode 100644 kvm-iotests-Support-job-complete-in-run_job.patch delete mode 100644 kvm-iotests-Test-external-snapshot-with-VM-state.patch delete mode 100644 kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch delete mode 100644 kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch delete mode 100644 kvm-iotests-Use-complete_and_wait-in-155.patch delete mode 100644 kvm-iotests.py-Let-wait_migration-wait-even-more.patch delete mode 100644 kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch delete mode 100644 kvm-iscsi-Drop-iscsi_co_create_opts.patch delete mode 100644 kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch delete mode 100644 kvm-libvhost-user-Fix-some-memtable-remap-cases.patch delete mode 100644 kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch delete mode 100644 kvm-migration-Create-migration_is_running.patch delete mode 100644 kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch delete mode 100644 kvm-migration-Don-t-send-data-if-we-have-stopped.patch delete mode 100644 kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch delete mode 100644 kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch delete mode 100644 kvm-migration-Rate-limit-inside-host-pages.patch delete mode 100644 kvm-migration-multifd-clean-pages-after-filling-packet.patch delete mode 100644 kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch delete mode 100644 kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch delete mode 100644 kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch delete mode 100644 kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch delete mode 100644 kvm-mirror-Store-MirrorOp.co-for-debuggability.patch delete mode 100644 kvm-mirror-Wait-only-for-in-flight-operations.patch delete mode 100644 kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch delete mode 100644 kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch delete mode 100644 kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch delete mode 100644 kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch delete mode 100644 kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch delete mode 100644 kvm-qemu-file-Don-t-do-IO-after-shutdown.patch delete mode 100644 kvm-replication-assert-we-own-context-before-job_cancel_.patch delete mode 100644 kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch delete mode 100644 kvm-slirp-use-correct-size-while-emulating-commands.patch delete mode 100644 kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch delete mode 100644 kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch delete mode 100644 kvm-target-arm-arch_dump-Add-SVE-notes.patch delete mode 100644 kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch delete mode 100644 kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch delete mode 100644 kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch delete mode 100644 kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch delete mode 100644 kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch delete mode 100644 kvm-target-i386-add-a-ucode-rev-property.patch delete mode 100644 kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch delete mode 100644 kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch delete mode 100644 kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch delete mode 100644 kvm-target-i386-fix-TCG-UCODE_REV-access.patch delete mode 100644 kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch delete mode 100644 kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch delete mode 100644 kvm-tcp_emu-Fix-oob-access.patch delete mode 100644 kvm-tcp_emu-fix-unsafe-snprintf-usages.patch delete mode 100644 kvm-tests-arm-cpu-features-Check-feature-default-values.patch delete mode 100644 kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch delete mode 100644 kvm-tpm-ppi-page-align-PPI-RAM.patch delete mode 100644 kvm-trace-update-qemu-trace-stap-to-Python-3.patch delete mode 100644 kvm-usbredir-Prevent-recursion-in-usbredir_write.patch delete mode 100644 kvm-util-add-slirp_fmt-helpers.patch delete mode 100644 kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch delete mode 100644 kvm-vhost-Add-names-to-section-rounded-warning.patch delete mode 100644 kvm-vhost-Only-align-sections-for-vhost-user.patch delete mode 100644 kvm-vhost-coding-style-fix.patch delete mode 100644 kvm-vhost-user-Print-unexpected-slave-message-types.patch delete mode 100644 kvm-vhost-user-fs-remove-vhostfd-property.patch delete mode 100644 kvm-vhost-user-gpu-Drop-trailing-json-comma.patch delete mode 100644 kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch delete mode 100644 kvm-virtio-don-t-enable-notifications-during-polling.patch delete mode 100644 kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch delete mode 100644 kvm-virtio-make-virtio_delete_queue-idempotent.patch delete mode 100644 kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch delete mode 100644 kvm-virtio-reset-region-cache-when-on-queue-deletion.patch delete mode 100644 kvm-virtiofs-Add-maintainers-entry.patch delete mode 100644 kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch delete mode 100644 kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch delete mode 100644 kvm-virtiofsd-Add-auxiliary-.c-s.patch delete mode 100644 kvm-virtiofsd-Add-fuse_lowlevel.c.patch delete mode 100644 kvm-virtiofsd-Add-main-virtio-loop.patch delete mode 100644 kvm-virtiofsd-Add-options-for-virtio.patch delete mode 100644 kvm-virtiofsd-Add-passthrough_ll.patch delete mode 100644 kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch delete mode 100644 kvm-virtiofsd-Clean-up-inodes-on-destroy.patch delete mode 100644 kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch delete mode 100644 kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch delete mode 100644 kvm-virtiofsd-Fast-path-for-virtio-read.patch delete mode 100644 kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch delete mode 100644 kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch delete mode 100644 kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch delete mode 100644 kvm-virtiofsd-Fix-xattr-operations.patch delete mode 100644 kvm-virtiofsd-Format-imported-files-to-qemu-style.patch delete mode 100644 kvm-virtiofsd-Handle-hard-reboot.patch delete mode 100644 kvm-virtiofsd-Handle-reinit.patch delete mode 100644 kvm-virtiofsd-Keep-track-of-replies.patch delete mode 100644 kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch delete mode 100644 kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch delete mode 100644 kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch delete mode 100644 kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch delete mode 100644 kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch delete mode 100644 kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch delete mode 100644 kvm-virtiofsd-Poll-kick_fd-for-queue.patch delete mode 100644 kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch delete mode 100644 kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch delete mode 100644 kvm-virtiofsd-Pull-in-upstream-headers.patch delete mode 100644 kvm-virtiofsd-Remove-fuse_req_getgroups.patch delete mode 100644 kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch delete mode 100644 kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch delete mode 100644 kvm-virtiofsd-Send-replies-to-messages.patch delete mode 100644 kvm-virtiofsd-Start-queue-threads.patch delete mode 100644 kvm-virtiofsd-Start-reading-commands-from-queue.patch delete mode 100644 kvm-virtiofsd-Start-wiring-up-vhost-user.patch delete mode 100644 kvm-virtiofsd-Support-remote-posix-locks.patch delete mode 100644 kvm-virtiofsd-Trim-down-imported-files.patch delete mode 100644 kvm-virtiofsd-Trim-out-compatibility-code.patch delete mode 100644 kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch delete mode 100644 kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch delete mode 100644 kvm-virtiofsd-add-fuse_mbuf_iter-API.patch delete mode 100644 kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch delete mode 100644 kvm-virtiofsd-add-o-source-PATH-to-help-output.patch delete mode 100644 kvm-virtiofsd-add-print-capabilities-option.patch delete mode 100644 kvm-virtiofsd-add-seccomp-whitelist.patch delete mode 100644 kvm-virtiofsd-add-some-options-to-the-help-message.patch delete mode 100644 kvm-virtiofsd-add-syslog-command-line-option.patch delete mode 100644 kvm-virtiofsd-add-thread-pool-size-NUM-option.patch delete mode 100644 kvm-virtiofsd-add-vhost-user.json-file.patch delete mode 100644 kvm-virtiofsd-cap-ng-helpers.patch delete mode 100644 kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch delete mode 100644 kvm-virtiofsd-cleanup-allocated-resource-in-se.patch delete mode 100644 kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch delete mode 100644 kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch delete mode 100644 kvm-virtiofsd-do_read-missing-NULL-check.patch delete mode 100644 kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch delete mode 100644 kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch delete mode 100644 kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch delete mode 100644 kvm-virtiofsd-fix-error-handling-in-main.patch delete mode 100644 kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch delete mode 100644 kvm-virtiofsd-fix-libfuse-information-leaks.patch delete mode 100644 kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch delete mode 100644 kvm-virtiofsd-fix-memory-leak-on-lo.source.patch delete mode 100644 kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch delete mode 100644 kvm-virtiofsd-get-set-features-callbacks.patch delete mode 100644 kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch delete mode 100644 kvm-virtiofsd-load_capng-missing-unlock.patch delete mode 100644 kvm-virtiofsd-make-f-foreground-the-default.patch delete mode 100644 kvm-virtiofsd-make-lo_release-atomic.patch delete mode 100644 kvm-virtiofsd-move-to-a-new-pid-namespace.patch delete mode 100644 kvm-virtiofsd-move-to-an-empty-network-namespace.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-control-readdirplus.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch delete mode 100644 kvm-virtiofsd-passthrough_ll-use-hashtable.patch delete mode 100644 kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch delete mode 100644 kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch delete mode 100644 kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch delete mode 100644 kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch delete mode 100644 kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch delete mode 100644 kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch delete mode 100644 kvm-virtiofsd-process-requests-in-a-thread-pool.patch delete mode 100644 kvm-virtiofsd-remove-mountpoint-dummy-argument.patch delete mode 100644 kvm-virtiofsd-remove-unused-notify-reply-support.patch delete mode 100644 kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch delete mode 100644 kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch delete mode 100644 kvm-virtiofsd-sandbox-mount-namespace.patch delete mode 100644 kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch delete mode 100644 kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch delete mode 100644 kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch delete mode 100644 kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch delete mode 100644 kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch delete mode 100644 kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch delete mode 100644 kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch delete mode 100644 kvm-virtiofsd-validate-path-components.patch delete mode 100644 kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch delete mode 100644 kvm-xhci-recheck-slot-status.patch delete mode 100644 kvm-xics-Don-t-deassert-outputs.patch delete mode 100644 kvm.modules diff --git a/.gitignore b/.gitignore index 713ad2e..ba7d4aa 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,13 @@ -/qemu-*.tar.xz +/qemu-3.1.0.tar.xz +/qemu-4.0.0.tar.xz +/qemu-4.1.0-rc4.tar.xz +/qemu-4.1.0.tar.xz +/qemu-4.2.0-rc1.tar.xz +/qemu-4.2.0-rc4.tar.xz +/qemu-4.2.0.tar.xz +/qemu-5.0.0-rc0.tar.xz +/qemu-5.0.0-rc1.tar.xz +/qemu-5.0.0-rc2.tar.xz +/qemu-5.0.0-rc3.tar.xz +/qemu-5.0.0-rc4.tar.xz +/qemu-5.0.0.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch new file mode 100644 index 0000000..04e73be --- /dev/null +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -0,0 +1,16975 @@ +From e4d185c8c4efbf15a9380c1433bc66b49a09e79d Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 23 Apr 2020 05:26:54 +0200 +Subject: redhat: Adding slirp to the exploded tree + +RH-Author: Danilo de Paula +Message-id: <20190907020756.8619-1-ddepaula@redhat.com> +Patchwork-id: 90309 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] redhat: Adding slirp to the exploded tree +Bugzilla: +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Wainer dos Santos Moschetta + +Until qemu-kvm-3.1 slirp used to live as a regular folder in qemu-kvm. +After that it got moved into its own submodule. Which means it's not +part of the qemu-kvm git tree anymore. + +This passed unoticed for RHEL-AV-8.0.1 and 8.1.0 because qemu still ships +the code in the tarball. That's why scratch builds still works (it's based in +the tarball content). + +As we're receiving some CVE's against slirp, we need a way to patch +slirp in RHEL-8.1.0 without handling as a separate package (as we do for +firmwares). + +The simplest solution is to copy the slirp folder from the tarball into the +exploded tree. + +To be able to do that, I had to make some changes: + +slirp needs to be removed from .gitmodules, otherwise git complains +about files on it. + +Since "make -C redhat rh-brew" uses the tarball and apply all the +patches on top of it, we need to remove the folder from the tarball before applying +the patch (because we are actually re-applying them). + +We also need to use --ignore-submodule while generating the patches for +scratch-build, otherwise it will include some weird definition of the +slirp folder in the patch, something that /usr/bin/patch gets mad with. + +After that I compared the patch list, after and before this change, and +saw no major differences. + +This is an exploded-tree-only change and shouldn't be applied to dist-git. + +Signed-off-by: Danilo C. L. de Paula + +Rebase notes (5.0.0-rc4): + - Update slirp directory to commit 2faae0f778 (used upstream) +--- + .gitmodules | 3 - + slirp/.clang-format | 58 ++ + slirp/.gitignore | 10 + + slirp/.gitlab-ci.yml | 27 + + slirp/.gitpublish | 3 + + slirp/CHANGELOG.md | 88 +++ + slirp/COPYRIGHT | 62 ++ + slirp/Makefile | 62 ++ + slirp/README.md | 60 ++ + slirp/build-aux/git-version-gen | 158 ++++ + slirp/build-aux/meson-dist | 16 + + slirp/meson.build | 134 ++++ + slirp/src/arp_table.c | 91 +++ + slirp/src/bootp.c | 369 ++++++++++ + slirp/src/bootp.h | 129 ++++ + slirp/src/cksum.c | 179 +++++ + slirp/src/debug.h | 51 ++ + slirp/src/dhcpv6.c | 224 ++++++ + slirp/src/dhcpv6.h | 68 ++ + slirp/src/dnssearch.c | 306 ++++++++ + slirp/src/if.c | 213 ++++++ + slirp/src/if.h | 25 + + slirp/src/ip.h | 242 ++++++ + slirp/src/ip6.h | 214 ++++++ + slirp/src/ip6_icmp.c | 434 +++++++++++ + slirp/src/ip6_icmp.h | 219 ++++++ + slirp/src/ip6_input.c | 78 ++ + slirp/src/ip6_output.c | 39 + + slirp/src/ip_icmp.c | 489 +++++++++++++ + slirp/src/ip_icmp.h | 166 +++++ + slirp/src/ip_input.c | 461 ++++++++++++ + slirp/src/ip_output.c | 169 +++++ + slirp/src/libslirp-version.h.in | 24 + + slirp/src/libslirp.h | 171 +++++ + slirp/src/libslirp.map | 30 + + slirp/src/main.h | 16 + + slirp/src/mbuf.c | 224 ++++++ + slirp/src/mbuf.h | 127 ++++ + slirp/src/misc.c | 390 ++++++++++ + slirp/src/misc.h | 72 ++ + slirp/src/ncsi-pkt.h | 445 +++++++++++ + slirp/src/ncsi.c | 192 +++++ + slirp/src/ndp_table.c | 87 +++ + slirp/src/sbuf.c | 168 +++++ + slirp/src/sbuf.h | 27 + + slirp/src/slirp.c | 1185 ++++++++++++++++++++++++++++++ + slirp/src/slirp.h | 283 +++++++ + slirp/src/socket.c | 957 ++++++++++++++++++++++++ + slirp/src/socket.h | 164 +++++ + slirp/src/state.c | 379 ++++++++++ + slirp/src/stream.c | 120 +++ + slirp/src/stream.h | 35 + + slirp/src/tcp.h | 169 +++++ + slirp/src/tcp_input.c | 1539 +++++++++++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 +++++++++++++ + slirp/src/tcp_subr.c | 980 +++++++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++++ + slirp/src/tcp_timer.h | 130 ++++ + slirp/src/tcp_var.h | 161 ++++ + slirp/src/tcpip.h | 104 +++ + slirp/src/tftp.c | 462 ++++++++++++ + slirp/src/tftp.h | 52 ++ + slirp/src/udp.c | 361 +++++++++ + slirp/src/udp.h | 90 +++ + slirp/src/udp6.c | 173 +++++ + slirp/src/util.c | 428 +++++++++++ + slirp/src/util.h | 189 +++++ + slirp/src/version.c | 8 + + slirp/src/vmstate.c | 444 +++++++++++ + slirp/src/vmstate.h | 391 ++++++++++ + 70 files changed, 16423 insertions(+), 3 deletions(-) + create mode 100644 slirp/.clang-format + create mode 100644 slirp/.gitignore + create mode 100644 slirp/.gitlab-ci.yml + create mode 100644 slirp/.gitpublish + create mode 100644 slirp/CHANGELOG.md + create mode 100644 slirp/COPYRIGHT + create mode 100644 slirp/Makefile + create mode 100644 slirp/README.md + create mode 100755 slirp/build-aux/git-version-gen + create mode 100755 slirp/build-aux/meson-dist + create mode 100644 slirp/meson.build + create mode 100644 slirp/src/arp_table.c + create mode 100644 slirp/src/bootp.c + create mode 100644 slirp/src/bootp.h + create mode 100644 slirp/src/cksum.c + create mode 100644 slirp/src/debug.h + create mode 100644 slirp/src/dhcpv6.c + create mode 100644 slirp/src/dhcpv6.h + create mode 100644 slirp/src/dnssearch.c + create mode 100644 slirp/src/if.c + create mode 100644 slirp/src/if.h + create mode 100644 slirp/src/ip.h + create mode 100644 slirp/src/ip6.h + create mode 100644 slirp/src/ip6_icmp.c + create mode 100644 slirp/src/ip6_icmp.h + create mode 100644 slirp/src/ip6_input.c + create mode 100644 slirp/src/ip6_output.c + create mode 100644 slirp/src/ip_icmp.c + create mode 100644 slirp/src/ip_icmp.h + create mode 100644 slirp/src/ip_input.c + create mode 100644 slirp/src/ip_output.c + create mode 100644 slirp/src/libslirp-version.h.in + create mode 100644 slirp/src/libslirp.h + create mode 100644 slirp/src/libslirp.map + create mode 100644 slirp/src/main.h + create mode 100644 slirp/src/mbuf.c + create mode 100644 slirp/src/mbuf.h + create mode 100644 slirp/src/misc.c + create mode 100644 slirp/src/misc.h + create mode 100644 slirp/src/ncsi-pkt.h + create mode 100644 slirp/src/ncsi.c + create mode 100644 slirp/src/ndp_table.c + create mode 100644 slirp/src/sbuf.c + create mode 100644 slirp/src/sbuf.h + create mode 100644 slirp/src/slirp.c + create mode 100644 slirp/src/slirp.h + create mode 100644 slirp/src/socket.c + create mode 100644 slirp/src/socket.h + create mode 100644 slirp/src/state.c + create mode 100644 slirp/src/stream.c + create mode 100644 slirp/src/stream.h + create mode 100644 slirp/src/tcp.h + create mode 100644 slirp/src/tcp_input.c + create mode 100644 slirp/src/tcp_output.c + create mode 100644 slirp/src/tcp_subr.c + create mode 100644 slirp/src/tcp_timer.c + create mode 100644 slirp/src/tcp_timer.h + create mode 100644 slirp/src/tcp_var.h + create mode 100644 slirp/src/tcpip.h + create mode 100644 slirp/src/tftp.c + create mode 100644 slirp/src/tftp.h + create mode 100644 slirp/src/udp.c + create mode 100644 slirp/src/udp.h + create mode 100644 slirp/src/udp6.c + create mode 100644 slirp/src/util.c + create mode 100644 slirp/src/util.h + create mode 100644 slirp/src/version.c + create mode 100644 slirp/src/vmstate.c + create mode 100644 slirp/src/vmstate.h + +diff --git a/slirp/.clang-format b/slirp/.clang-format +new file mode 100644 +index 0000000..17fb49f +--- /dev/null ++++ b/slirp/.clang-format +@@ -0,0 +1,58 @@ ++# https://clang.llvm.org/docs/ClangFormat.html ++# https://clang.llvm.org/docs/ClangFormatStyleOptions.html ++--- ++Language: Cpp ++AlignAfterOpenBracket: Align ++AlignConsecutiveAssignments: false # although we like it, it creates churn ++AlignConsecutiveDeclarations: false ++AlignEscapedNewlinesLeft: true ++AlignOperands: true ++AlignTrailingComments: false # churn ++AllowAllParametersOfDeclarationOnNextLine: true ++AllowShortBlocksOnASingleLine: false ++AllowShortCaseLabelsOnASingleLine: false ++AllowShortFunctionsOnASingleLine: None ++AllowShortIfStatementsOnASingleLine: false ++AllowShortLoopsOnASingleLine: false ++AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account ++AlwaysBreakBeforeMultilineStrings: false ++BinPackArguments: true ++BinPackParameters: true ++BraceWrapping: ++ AfterControlStatement: false ++ AfterEnum: false ++ AfterFunction: true ++ AfterStruct: false ++ AfterUnion: false ++ BeforeElse: false ++ IndentBraces: false ++BreakBeforeBinaryOperators: None ++BreakBeforeBraces: Custom ++BreakBeforeTernaryOperators: false ++BreakStringLiterals: true ++ColumnLimit: 80 ++ContinuationIndentWidth: 4 ++Cpp11BracedListStyle: false ++DerivePointerAlignment: false ++DisableFormat: false ++IndentCaseLabels: false ++IndentWidth: 4 ++IndentWrappedFunctionNames: false ++KeepEmptyLinesAtTheStartOfBlocks: false ++MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? ++MacroBlockEnd: '.*_END$' ++MaxEmptyLinesToKeep: 2 ++PointerAlignment: Right ++ReflowComments: true ++SortIncludes: false ++SpaceAfterCStyleCast: false ++SpaceBeforeAssignmentOperators: true ++SpaceBeforeParens: ControlStatements ++SpaceInEmptyParentheses: false ++SpacesBeforeTrailingComments: 1 ++SpacesInContainerLiterals: true ++SpacesInParentheses: false ++SpacesInSquareBrackets: false ++Standard: Auto ++UseTab: Never ++... +diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md +new file mode 100644 +index 0000000..67b0a74 +--- /dev/null ++++ b/slirp/CHANGELOG.md +@@ -0,0 +1,88 @@ ++# Changelog ++ ++All notable changes to this project will be documented in this file. ++ ++The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ++and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ++ ++## [Unreleased] ++ ++### Added ++ ++### Changed ++ ++### Deprecated ++ ++### Fixed ++ ++## [4.2.0] - 2020-03-17 ++ ++### Added ++ ++ - New API function `slirp_add_unix`: add a forward rule to a Unix socket. ++ - New API function `slirp_remove_guestfwd`: remove a forward rule previously ++ added by `slirp_add_exec`, `slirp_add_unix` or `slirp_add_guestfwd` ++ - New SlirpConfig.outbound_addr{,6} fields to bind output socket to a ++ specific address ++ ++### Changed ++ ++ - socket: do not fallback on host loopback if get_dns_addr() failed ++ or the address is in slirp network ++ ++### Fixed ++ ++ - ncsi: fix checksum OOB memory access ++ - `tcp_emu()`: fix OOB accesses ++ - tftp: restrict relative path access ++ - state: fix loading of guestfwd state ++ ++## [4.1.0] - 2019-12-02 ++ ++### Added ++ ++ - The `slirp_new()` API, simpler and more extensible than `slirp_init()`. ++ - Allow custom MTU configuration. ++ - Option to disable host loopback connections. ++ - CI now runs scan-build too. ++ ++### Changed ++ ++ - Disable `tcp_emu()` by default. `tcp_emu()` is known to have caused ++ several CVEs, and not useful today in most cases. The feature can ++ be still enabled by setting `SlirpConfig.enable_emu` to true. ++ - meson build system is now `subproject()` friendly. ++ - Replace remaining `malloc()`/`free()` with glib (which aborts on OOM) ++ - Various code cleanups. ++ ++### Deprecated ++ ++ - The `slirp_init()` API. ++ ++### Fixed ++ ++ - `getpeername()` error after `shutdown(SHUT_WR)`. ++ - Exec forward: correctly parse command lines that contain spaces. ++ - Allow 0.0.0.0 destination address. ++ - Make host receive broadcast packets. ++ - Various memory related fixes (heap overflow, leaks, NULL ++ dereference). ++ - Compilation warnings, dead code. ++ ++## [4.0.0] - 2019-05-24 ++ ++### Added ++ ++ - Installable as a shared library. ++ - meson build system ++ (& make build system for in-tree QEMU integration) ++ ++### Changed ++ ++ - Standalone project, removing any QEMU dependency. ++ - License clarifications. ++ ++[unreleased]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.2.0...master ++[4.2.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.1.0...v4.2.0 ++[4.1.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.0.0...v4.1.0 ++[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 +diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT +new file mode 100644 +index 0000000..ed49512 +--- /dev/null ++++ b/slirp/COPYRIGHT +@@ -0,0 +1,62 @@ ++Slirp was written by Danny Gasparovski. ++Copyright (c), 1995,1996 All Rights Reserved. ++ ++Slirp is free software; "free" as in you don't have to pay for it, and you ++are free to do whatever you want with it. I do not accept any donations, ++monetary or otherwise, for Slirp. Instead, I would ask you to pass this ++potential donation to your favorite charity. In fact, I encourage ++*everyone* who finds Slirp useful to make a small donation to their ++favorite charity (for example, GreenPeace). This is not a requirement, but ++a suggestion from someone who highly values the service they provide. ++ ++The copyright terms and conditions: ++ ++---BEGIN--- ++ ++ Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ 3. Neither the name of the copyright holder nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, ++ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ++ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ++ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, ++ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++---END--- ++ ++This basically means you can do anything you want with the software, except ++1) call it your own, and 2) claim warranty on it. There is no warranty for ++this software. None. Nada. If you lose a million dollars while using ++Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. ++ ++If these conditions cannot be met due to legal restrictions (E.g. where it ++is against the law to give out Software without warranty), you must cease ++using the software and delete all copies you have. ++ ++Slirp uses code that is copyrighted by the following people/organizations: ++ ++Juha Pirkola. ++Gregory M. Christy. ++The Regents of the University of California. ++Carnegie Mellon University. ++The Australian National University. ++RSA Data Security, Inc. ++ ++Please read the top of each source file for the details on the various ++copyrights. +diff --git a/slirp/Makefile b/slirp/Makefile +new file mode 100644 +index 0000000..8857b41 +--- /dev/null ++++ b/slirp/Makefile +@@ -0,0 +1,62 @@ ++ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) ++BUILD_DIR ?= . ++ ++LIBSLIRP = $(BUILD_DIR)/libslirp.a ++SLIRP_MAJOR_VERSION = 4 ++SLIRP_MINOR_VERSION = 2 ++SLIRP_MICRO_VERSION = 0 ++SLIRP_VERSION_STRING = "$(SLIRP_MAJOR_VERSION).$(SLIRP_MINOR_VERSION).$(SLIRP_MICRO_VERSION)-git" ++ ++all: $(LIBSLIRP) ++ ++SRCS := $(wildcard src/*.c) ++OBJS := $(SRCS:%.c=$(BUILD_DIR)/%.o) ++DEPS := $(OBJS:%.o=%.d) ++ ++INC_DIRS := $(BUILD_DIR)/src ++INC_FLAGS := $(addprefix -I,$(INC_DIRS)) ++ ++override CFLAGS += \ ++ -DG_LOG_DOMAIN='"Slirp"' \ ++ $(shell $(PKG_CONFIG) --cflags glib-2.0) \ ++ $(INC_FLAGS) \ ++ -MMD -MP ++override LDFLAGS += $(shell $(PKG_CONFIG) --libs glib-2.0) ++ ++$(BUILD_DIR)/src/libslirp-version.h: Makefile ++ @$(MKDIR_P) $(dir $@) ++ $(call quiet-command,cat $(ROOT_DIR)/src/libslirp-version.h.in | \ ++ sed 's/@SLIRP_MAJOR_VERSION@/$(SLIRP_MAJOR_VERSION)/' | \ ++ sed 's/@SLIRP_MINOR_VERSION@/$(SLIRP_MINOR_VERSION)/' | \ ++ sed 's/@SLIRP_MICRO_VERSION@/$(SLIRP_MICRO_VERSION)/' | \ ++ sed 's/@SLIRP_VERSION_STRING@/$(SLIRP_VERSION_STRING)/' \ ++ > $@,"GEN","$@") ++ ++$(OBJS): $(BUILD_DIR)/src/libslirp-version.h ++ ++$(LIBSLIRP): $(OBJS) ++ ++.PHONY: clean ++ ++clean: ++ rm -r $(OBJS) $(DEPS) $(LIBSLIRP) $(BUILD_DIR)/src/libslirp-version.h ++ ++$(BUILD_DIR)/src/%.o: $(ROOT_DIR)/src/%.c ++ @$(MKDIR_P) $(dir $@) ++ $(call quiet-command,$(CC) $(CFLAGS) -c -o $@ $<,"CC","$@") ++ ++%.a: ++ $(call quiet-command,rm -f $@ && $(AR) rcs $@ $^,"AR","$@") ++ ++PKG_CONFIG ?= pkg-config ++MKDIR_P ?= mkdir -p ++quiet-command-run = $(if $(V),,$(if $2,printf " %-7s %s\n" $2 $3 && ))$1 ++quiet-@ = $(if $(V),,@) ++quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3) ++ ++print-%: ++ @echo '$*=$($*)' ++ ++.SUFFIXES: ++ ++-include $(DEPS) +diff --git a/slirp/README.md b/slirp/README.md +new file mode 100644 +index 0000000..dc11e5f +--- /dev/null ++++ b/slirp/README.md +@@ -0,0 +1,60 @@ ++# libslirp ++ ++libslirp is a user-mode networking library used by virtual machines, ++containers or various tools. ++ ++## Getting Started ++ ++### Prerequisites ++ ++A C compiler, make/meson and glib2 development libraries. ++ ++(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list ++of dependencies on Fedora) ++ ++### Building ++ ++You may build and install the shared library with meson: ++ ++``` sh ++meson build ++ninja -C build install ++``` ++And configure QEMU with --enable-slirp=system to link against it. ++ ++(QEMU may build with the submodule static library using --enable-slirp=git) ++ ++### Testing ++ ++Unfortunately, there are no automated tests available. ++ ++You may run QEMU ``-net user`` linked with your development version. ++ ++## Contributing ++ ++Feel free to open issues on the [project ++issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page. ++ ++You may clone the [gitlab ++project](https://gitlab.freedesktop.org/slirp/libslirp) and create a ++merge request. ++ ++Contributing with gitlab allows gitlab workflow, tracking issues, ++running CI etc. ++ ++Alternatively, you may send patches to slirp@lists.freedesktop.org ++mailing list. ++ ++## Versioning ++ ++We intend to use [libtool's ++versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html) ++for the shared libraries and use [SemVer](http://semver.org/) for ++project versions. ++ ++For the versions available, see the [tags on this ++repository](https://gitlab.freedesktop.org/slirp/libslirp/releases). ++ ++## License ++ ++See the [COPYRIGHT](COPYRIGHT) file for details. +diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen +new file mode 100755 +index 0000000..5617eb8 +--- /dev/null ++++ b/slirp/build-aux/git-version-gen +@@ -0,0 +1,158 @@ ++#!/bin/sh ++# Print a version string. ++scriptversion=2010-06-14.19; # UTC ++ ++# Copyright (C) 2007-2010 Free Software Foundation, Inc. ++# ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++# This script is derived from GIT-VERSION-GEN from GIT: http://git.or.cz/. ++# It may be run two ways: ++# - from a git repository in which the "git describe" command below ++# produces useful output (thus requiring at least one signed tag) ++# - from a non-git-repo directory containing a .tarball-version file, which ++# presumes this script is invoked like "./git-version-gen .tarball-version". ++ ++# In order to use intra-version strings in your project, you will need two ++# separate generated version string files: ++# ++# .tarball-version - present only in a distribution tarball, and not in ++# a checked-out repository. Created with contents that were learned at ++# the last time autoconf was run, and used by git-version-gen. Must not ++# be present in either $(srcdir) or $(builddir) for git-version-gen to ++# give accurate answers during normal development with a checked out tree, ++# but must be present in a tarball when there is no version control system. ++# Therefore, it cannot be used in any dependencies. GNUmakefile has ++# hooks to force a reconfigure at distribution time to get the value ++# correct, without penalizing normal development with extra reconfigures. ++# ++# .version - present in a checked-out repository and in a distribution ++# tarball. Usable in dependencies, particularly for files that don't ++# want to depend on config.h but do want to track version changes. ++# Delete this file prior to any autoconf run where you want to rebuild ++# files to pick up a version string change; and leave it stale to ++# minimize rebuild time after unrelated changes to configure sources. ++# ++# It is probably wise to add these two files to .gitignore, so that you ++# don't accidentally commit either generated file. ++# ++# Use the following line in your configure.ac, so that $(VERSION) will ++# automatically be up-to-date each time configure is run (and note that ++# since configure.ac no longer includes a version string, Makefile rules ++# should not depend on configure.ac for version updates). ++# ++# AC_INIT([GNU project], ++# m4_esyscmd([build-aux/git-version-gen .tarball-version]), ++# [bug-project@example]) ++# ++# Then use the following lines in your Makefile.am, so that .version ++# will be present for dependencies, and so that .tarball-version will ++# exist in distribution tarballs. ++# ++# BUILT_SOURCES = $(top_srcdir)/.version ++# $(top_srcdir)/.version: ++# echo $(VERSION) > $@-t && mv $@-t $@ ++# dist-hook: ++# echo $(VERSION) > $(distdir)/.tarball-version ++ ++case $# in ++ 1|2) ;; ++ *) echo 1>&2 "Usage: $0 \$srcdir/.tarball-version" \ ++ '[TAG-NORMALIZATION-SED-SCRIPT]' ++ exit 1;; ++esac ++ ++tarball_version_file=$1 ++tag_sed_script="${2:-s/x/x/}" ++nl=' ++' ++ ++# Avoid meddling by environment variable of the same name. ++v= ++ ++# First see if there is a tarball-only version file. ++# then try "git describe", then default. ++if test -f $tarball_version_file ++then ++ v=`cat $tarball_version_file` || exit 1 ++ case $v in ++ *$nl*) v= ;; # reject multi-line output ++ [0-9]*) ;; ++ *) v= ;; ++ esac ++ test -z "$v" \ ++ && echo "$0: WARNING: $tarball_version_file seems to be damaged" 1>&2 ++fi ++ ++if test -n "$v" ++then ++ : # use $v ++elif test -d .git \ ++ && v=`git describe --abbrev=4 --match='v*' HEAD 2>/dev/null \ ++ || git describe --abbrev=4 HEAD 2>/dev/null` \ ++ && v=`printf '%s\n' "$v" | sed "$tag_sed_script"` \ ++ && case $v in ++ v[0-9]*) ;; ++ *) (exit 1) ;; ++ esac ++then ++ # Is this a new git that lists number of commits since the last ++ # tag or the previous older version that did not? ++ # Newer: v6.10-77-g0f8faeb ++ # Older: v6.10-g0f8faeb ++ case $v in ++ *-*-*) : git describe is okay three part flavor ;; ++ *-*) ++ : git describe is older two part flavor ++ # Recreate the number of commits and rewrite such that the ++ # result is the same as if we were using the newer version ++ # of git describe. ++ vtag=`echo "$v" | sed 's/-.*//'` ++ numcommits=`git rev-list "$vtag"..HEAD | wc -l` ++ v=`echo "$v" | sed "s/\(.*\)-\(.*\)/\1-$numcommits-\2/"`; ++ ;; ++ esac ++ ++ # Change the first '-' to a '.', so version-comparing tools work properly. ++ # Remove the "g" in git describe's output string, to save a byte. ++ v=`echo "$v" | sed 's/-/./;s/\(.*\)-g/\1-/'`; ++else ++ v=UNKNOWN ++fi ++ ++v=`echo "$v" |sed 's/^v//'` ++ ++# Don't declare a version "dirty" merely because a time stamp has changed. ++git update-index --refresh > /dev/null 2>&1 ++ ++dirty=`sh -c 'git diff-index --name-only HEAD' 2>/dev/null` || dirty= ++case "$dirty" in ++ '') ;; ++ *) # Append the suffix only if there isn't one already. ++ case $v in ++ *-dirty) ;; ++ *) v="$v-dirty" ;; ++ esac ;; ++esac ++ ++# Omit the trailing newline, so that m4_esyscmd can use the result directly. ++echo "$v" | tr -d "$nl" ++ ++# Local variables: ++# eval: (add-hook 'write-file-hooks 'time-stamp) ++# time-stamp-start: "scriptversion=" ++# time-stamp-format: "%:y-%02m-%02d.%02H" ++# time-stamp-time-zone: "UTC" ++# time-stamp-end: "; # UTC" ++# End: +diff --git a/slirp/build-aux/meson-dist b/slirp/build-aux/meson-dist +new file mode 100755 +index 0000000..80d534f +--- /dev/null ++++ b/slirp/build-aux/meson-dist +@@ -0,0 +1,16 @@ ++#!/bin/bash ++ ++set -e ++set -o pipefail ++ ++if test "$1" = ""; then ++ echo "Version not provided" >&2 ++ exit 1 ++fi ++if ! test -d "$2"; then ++ echo "Source directory not provided" >&2 ++ exit 1 ++fi ++ ++# generate tarball version ++echo "$1" > "$MESON_DIST_ROOT/.tarball-version" +diff --git a/slirp/meson.build b/slirp/meson.build +new file mode 100644 +index 0000000..3a27149 +--- /dev/null ++++ b/slirp/meson.build +@@ -0,0 +1,134 @@ ++project('libslirp', 'c', ++ version : run_command('build-aux/git-version-gen', '@0@/.tarball-version'.format(meson.source_root()), check : true).stdout().strip(), ++ license : 'BSD-3-Clause', ++ default_options : ['warning_level=1', 'c_std=gnu99'], ++ meson_version : '>= 0.49', ++) ++ ++meson.add_dist_script('build-aux/meson-dist', meson.project_version(), meson.source_root()) ++ ++version = meson.project_version() ++varr = version.split('.') ++major_version = varr[0] ++minor_version = varr[1] ++micro_version = varr[2] ++ ++conf = configuration_data() ++conf.set('SLIRP_MAJOR_VERSION', major_version) ++conf.set('SLIRP_MINOR_VERSION', minor_version) ++conf.set('SLIRP_MICRO_VERSION', micro_version) ++conf.set_quoted('SLIRP_VERSION_STRING', version) ++ ++# libtool versioning - this applies to libslirp ++# ++# See http://sources.redhat.com/autobook/autobook/autobook_91.html#SEC91 for details ++# ++# - If interfaces have been changed or added, but binary compatibility ++# has been preserved, change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE += 1 ++# - If binary compatibility has been broken (eg removed or changed ++# interfaces), change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE = 0 ++# - If the interface is the same as the previous version, but bugs are ++# fixed, change: ++# REVISION += 1 ++lt_current = 2 ++lt_revision = 0 ++lt_age = 2 ++lt_version = '@0@.@1@.@2@'.format(lt_current - lt_age, lt_age, lt_revision) ++ ++host_system = host_machine.system() ++ ++glib_dep = dependency('glib-2.0') ++ ++cc = meson.get_compiler('c') ++ ++platform_deps = [] ++ ++if host_system == 'windows' ++ platform_deps += [ ++ cc.find_library('ws2_32'), ++ cc.find_library('iphlpapi') ++ ] ++endif ++ ++cargs = [ ++ '-DG_LOG_DOMAIN="Slirp"', ++] ++ ++sources = [ ++ 'src/arp_table.c', ++ 'src/bootp.c', ++ 'src/cksum.c', ++ 'src/dhcpv6.c', ++ 'src/dnssearch.c', ++ 'src/if.c', ++ 'src/ip6_icmp.c', ++ 'src/ip6_input.c', ++ 'src/ip6_output.c', ++ 'src/ip_icmp.c', ++ 'src/ip_input.c', ++ 'src/ip_output.c', ++ 'src/mbuf.c', ++ 'src/misc.c', ++ 'src/ncsi.c', ++ 'src/ndp_table.c', ++ 'src/sbuf.c', ++ 'src/slirp.c', ++ 'src/socket.c', ++ 'src/state.c', ++ 'src/stream.c', ++ 'src/tcp_input.c', ++ 'src/tcp_output.c', ++ 'src/tcp_subr.c', ++ 'src/tcp_timer.c', ++ 'src/tftp.c', ++ 'src/udp.c', ++ 'src/udp6.c', ++ 'src/util.c', ++ 'src/version.c', ++ 'src/vmstate.c', ++] ++ ++mapfile = 'src/libslirp.map' ++vflag = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) ++ ++configure_file( ++ input : 'src/libslirp-version.h.in', ++ output : 'libslirp-version.h', ++ install_dir : join_paths(get_option('includedir'), 'slirp'), ++ configuration : conf ++) ++ ++lib = library('slirp', sources, ++ version : lt_version, ++ c_args : cargs, ++ link_args : vflag, ++ link_depends : mapfile, ++ dependencies : [glib_dep, platform_deps], ++ install : true ++) ++ ++libslirp_dep = declare_dependency( ++ include_directories: include_directories('.', 'src'), ++ link_with: lib) ++ ++install_headers(['src/libslirp.h'], subdir : 'slirp') ++ ++pkg = import('pkgconfig') ++ ++pkg.generate( ++ version : version, ++ libraries : lib, ++ requires : [ ++ 'glib-2.0', ++ ], ++ name : 'slirp', ++ description : 'User-space network stack', ++ filebase : 'slirp', ++ subdirs : 'slirp', ++) +diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c +new file mode 100644 +index 0000000..054fbf5 +--- /dev/null ++++ b/slirp/src/arp_table.c +@@ -0,0 +1,91 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * ARP table ++ * ++ * Copyright (c) 2011 AdaCore ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ ++ DEBUG_CALL("arp_table_add"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], ++ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); ++ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* Do not register broadcast addresses */ ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ /* Update the entry */ ++ memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ arptbl->table[arptbl->next_victim].ar_sip = ip_addr; ++ memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); ++ arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; ++} ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ ++ DEBUG_CALL("arp_table_search"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ ++ /* If broadcast address */ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ++ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], ++ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); ++ return 1; ++ } ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c +new file mode 100644 +index 0000000..46e9681 +--- /dev/null ++++ b/slirp/src/bootp.c +@@ -0,0 +1,369 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * QEMU BOOTP/DHCP server ++ * ++ * Copyright (c) 2004 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++#if defined(_WIN32) ++/* Windows ntohl() returns an u_long value. ++ * Add a type cast to match the format strings. */ ++#define ntohl(n) ((uint32_t)ntohl(n)) ++#endif ++ ++/* XXX: only DHCP is supported */ ++ ++#define LEASE_TIME (24 * 3600) ++ ++static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; ++ ++#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) ++ ++static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ bc = &slirp->bootp_clients[i]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ uint32_t req_addr = ntohl(paddr->s_addr); ++ uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); ++ BOOTPClient *bc; ++ ++ if (req_addr >= dhcp_addr && req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { ++ bc = &slirp->bootp_clients[req_addr - dhcp_addr]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { ++ bc->allocated = 1; ++ return bc; ++ } ++ } ++ return NULL; ++} ++ ++static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, ++ struct in_addr *preq_addr) ++{ ++ const uint8_t *p, *p_end; ++ int len, tag; ++ ++ *pmsg_type = 0; ++ preq_addr->s_addr = htonl(0L); ++ ++ p = bp->bp_vend; ++ p_end = p + DHCP_OPT_LEN; ++ if (memcmp(p, rfc1533_cookie, 4) != 0) ++ return; ++ p += 4; ++ while (p < p_end) { ++ tag = p[0]; ++ if (tag == RFC1533_PAD) { ++ p++; ++ } else if (tag == RFC1533_END) { ++ break; ++ } else { ++ p++; ++ if (p >= p_end) ++ break; ++ len = *p++; ++ if (p + len > p_end) { ++ break; ++ } ++ DPRINTF("dhcp: tag=%d len=%d\n", tag, len); ++ ++ switch (tag) { ++ case RFC2132_MSG_TYPE: ++ if (len >= 1) ++ *pmsg_type = p[0]; ++ break; ++ case RFC2132_REQ_ADDR: ++ if (len >= 4) { ++ memcpy(&(preq_addr->s_addr), p, 4); ++ } ++ break; ++ default: ++ break; ++ } ++ p += len; ++ } ++ } ++ if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && ++ bp->bp_ciaddr.s_addr) { ++ memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); ++ } ++} ++ ++static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) ++{ ++ BOOTPClient *bc = NULL; ++ struct mbuf *m; ++ struct bootp_t *rbp; ++ struct sockaddr_in saddr, daddr; ++ struct in_addr preq_addr; ++ int dhcp_msg_type, val; ++ uint8_t *q; ++ uint8_t *end; ++ uint8_t client_ethaddr[ETH_ALEN]; ++ ++ /* extract exact DHCP msg type */ ++ dhcp_decode(bp, &dhcp_msg_type, &preq_addr); ++ DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); ++ if (preq_addr.s_addr != htonl(0L)) ++ DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ else { ++ DPRINTF("\n"); ++ } ++ ++ if (dhcp_msg_type == 0) ++ dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ ++ ++ if (dhcp_msg_type != DHCPDISCOVER && dhcp_msg_type != DHCPREQUEST) ++ return; ++ ++ /* Get client's hardware address from bootp request */ ++ memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ rbp = (struct bootp_t *)m->m_data; ++ m->m_data += sizeof(struct udpiphdr); ++ memset(rbp, 0, sizeof(struct bootp_t)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ } ++ } ++ if (!bc) { ++ new_addr: ++ bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); ++ if (!bc) { ++ DPRINTF("no address left\n"); ++ return; ++ } ++ } ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else { ++ /* DHCPNAKs should be sent to broadcast */ ++ daddr.sin_addr.s_addr = 0xffffffff; ++ } ++ } else { ++ bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); ++ if (!bc) { ++ /* if never assigned, behaves as if it was already ++ assigned (windows fix because it remembers its address) */ ++ goto new_addr; ++ } ++ } ++ ++ /* Update ARP table for this IP address */ ++ arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); ++ ++ saddr.sin_addr = slirp->vhost_addr; ++ saddr.sin_port = htons(BOOTP_SERVER); ++ ++ daddr.sin_port = htons(BOOTP_CLIENT); ++ ++ rbp->bp_op = BOOTP_REPLY; ++ rbp->bp_xid = bp->bp_xid; ++ rbp->bp_htype = 1; ++ rbp->bp_hlen = 6; ++ memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ ++ rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ ++ ++ q = rbp->bp_vend; ++ end = (uint8_t *)&rbp[1]; ++ memcpy(q, rfc1533_cookie, 4); ++ q += 4; ++ ++ if (bc) { ++ DPRINTF("%s addr=%08" PRIx32 "\n", ++ (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", ++ ntohl(daddr.sin_addr.s_addr)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPOFFER; ++ } else /* DHCPREQUEST */ { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPACK; ++ } ++ ++ if (slirp->bootp_filename) { ++ g_assert(strlen(slirp->bootp_filename) < sizeof(rbp->bp_file)); ++ strcpy(rbp->bp_file, slirp->bootp_filename); ++ } ++ ++ *q++ = RFC2132_SRV_ID; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_NETMASK; ++ *q++ = 4; ++ memcpy(q, &slirp->vnetwork_mask, 4); ++ q += 4; ++ ++ if (!slirp->restricted) { ++ *q++ = RFC1533_GATEWAY; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_DNS; ++ *q++ = 4; ++ memcpy(q, &slirp->vnameserver_addr, 4); ++ q += 4; ++ } ++ ++ *q++ = RFC2132_LEASE_TIME; ++ *q++ = 4; ++ val = htonl(LEASE_TIME); ++ memcpy(q, &val, 4); ++ q += 4; ++ ++ if (*slirp->client_hostname) { ++ val = strlen(slirp->client_hostname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting host name option."); ++ } else { ++ *q++ = RFC1533_HOSTNAME; ++ *q++ = val; ++ memcpy(q, slirp->client_hostname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdomainname) { ++ val = strlen(slirp->vdomainname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain name option."); ++ } else { ++ *q++ = RFC1533_DOMAINNAME; ++ *q++ = val; ++ memcpy(q, slirp->vdomainname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->tftp_server_name) { ++ val = strlen(slirp->tftp_server_name); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting tftp-server-name option."); ++ } else { ++ *q++ = RFC2132_TFTP_SERVER_NAME; ++ *q++ = val; ++ memcpy(q, slirp->tftp_server_name, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdnssearch) { ++ val = slirp->vdnssearch_len; ++ if (q + val >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain-search option."); ++ } else { ++ memcpy(q, slirp->vdnssearch, val); ++ q += val; ++ } ++ } ++ } else { ++ static const char nak_msg[] = "requested address not available"; ++ ++ DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPNAK; ++ ++ *q++ = RFC2132_MESSAGE; ++ *q++ = sizeof(nak_msg) - 1; ++ memcpy(q, nak_msg, sizeof(nak_msg) - 1); ++ q += sizeof(nak_msg) - 1; ++ } ++ assert(q < end); ++ *q = RFC1533_END; ++ ++ daddr.sin_addr.s_addr = 0xffffffffu; ++ ++ m->m_len = sizeof(struct bootp_t) - sizeof(struct ip) - sizeof(struct udphdr); ++ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); ++} ++ ++void bootp_input(struct mbuf *m) ++{ ++ struct bootp_t *bp = mtod(m, struct bootp_t *); ++ ++ if (bp->bp_op == BOOTP_REQUEST) { ++ bootp_reply(m->slirp, bp); ++ } ++} +diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h +new file mode 100644 +index 0000000..a57fa51 +--- /dev/null ++++ b/slirp/src/bootp.h +@@ -0,0 +1,129 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* bootp/dhcp defines */ ++ ++#ifndef SLIRP_BOOTP_H ++#define SLIRP_BOOTP_H ++ ++#define BOOTP_SERVER 67 ++#define BOOTP_CLIENT 68 ++ ++#define BOOTP_REQUEST 1 ++#define BOOTP_REPLY 2 ++ ++#define RFC1533_COOKIE 99, 130, 83, 99 ++#define RFC1533_PAD 0 ++#define RFC1533_NETMASK 1 ++#define RFC1533_TIMEOFFSET 2 ++#define RFC1533_GATEWAY 3 ++#define RFC1533_TIMESERVER 4 ++#define RFC1533_IEN116NS 5 ++#define RFC1533_DNS 6 ++#define RFC1533_LOGSERVER 7 ++#define RFC1533_COOKIESERVER 8 ++#define RFC1533_LPRSERVER 9 ++#define RFC1533_IMPRESSSERVER 10 ++#define RFC1533_RESOURCESERVER 11 ++#define RFC1533_HOSTNAME 12 ++#define RFC1533_BOOTFILESIZE 13 ++#define RFC1533_MERITDUMPFILE 14 ++#define RFC1533_DOMAINNAME 15 ++#define RFC1533_SWAPSERVER 16 ++#define RFC1533_ROOTPATH 17 ++#define RFC1533_EXTENSIONPATH 18 ++#define RFC1533_IPFORWARDING 19 ++#define RFC1533_IPSOURCEROUTING 20 ++#define RFC1533_IPPOLICYFILTER 21 ++#define RFC1533_IPMAXREASSEMBLY 22 ++#define RFC1533_IPTTL 23 ++#define RFC1533_IPMTU 24 ++#define RFC1533_IPMTUPLATEAU 25 ++#define RFC1533_INTMTU 26 ++#define RFC1533_INTLOCALSUBNETS 27 ++#define RFC1533_INTBROADCAST 28 ++#define RFC1533_INTICMPDISCOVER 29 ++#define RFC1533_INTICMPRESPOND 30 ++#define RFC1533_INTROUTEDISCOVER 31 ++#define RFC1533_INTROUTESOLICIT 32 ++#define RFC1533_INTSTATICROUTES 33 ++#define RFC1533_LLTRAILERENCAP 34 ++#define RFC1533_LLARPCACHETMO 35 ++#define RFC1533_LLETHERNETENCAP 36 ++#define RFC1533_TCPTTL 37 ++#define RFC1533_TCPKEEPALIVETMO 38 ++#define RFC1533_TCPKEEPALIVEGB 39 ++#define RFC1533_NISDOMAIN 40 ++#define RFC1533_NISSERVER 41 ++#define RFC1533_NTPSERVER 42 ++#define RFC1533_VENDOR 43 ++#define RFC1533_NBNS 44 ++#define RFC1533_NBDD 45 ++#define RFC1533_NBNT 46 ++#define RFC1533_NBSCOPE 47 ++#define RFC1533_XFS 48 ++#define RFC1533_XDM 49 ++ ++#define RFC2132_REQ_ADDR 50 ++#define RFC2132_LEASE_TIME 51 ++#define RFC2132_MSG_TYPE 53 ++#define RFC2132_SRV_ID 54 ++#define RFC2132_PARAM_LIST 55 ++#define RFC2132_MESSAGE 56 ++#define RFC2132_MAX_SIZE 57 ++#define RFC2132_RENEWAL_TIME 58 ++#define RFC2132_REBIND_TIME 59 ++#define RFC2132_TFTP_SERVER_NAME 66 ++ ++#define DHCPDISCOVER 1 ++#define DHCPOFFER 2 ++#define DHCPREQUEST 3 ++#define DHCPACK 5 ++#define DHCPNAK 6 ++ ++#define RFC1533_VENDOR_MAJOR 0 ++#define RFC1533_VENDOR_MINOR 0 ++ ++#define RFC1533_VENDOR_MAGIC 128 ++#define RFC1533_VENDOR_ADDPARM 129 ++#define RFC1533_VENDOR_ETHDEV 130 ++#define RFC1533_VENDOR_HOWTO 132 ++#define RFC1533_VENDOR_MNUOPTS 160 ++#define RFC1533_VENDOR_SELECTION 176 ++#define RFC1533_VENDOR_MOTD 184 ++#define RFC1533_VENDOR_NUMOFMOTD 8 ++#define RFC1533_VENDOR_IMG 192 ++#define RFC1533_VENDOR_NUMOFIMG 16 ++ ++#define RFC1533_END 255 ++#define BOOTP_VENDOR_LEN 64 ++#define DHCP_OPT_LEN 312 ++ ++struct bootp_t { ++ struct ip ip; ++ struct udphdr udp; ++ uint8_t bp_op; ++ uint8_t bp_htype; ++ uint8_t bp_hlen; ++ uint8_t bp_hops; ++ uint32_t bp_xid; ++ uint16_t bp_secs; ++ uint16_t unused; ++ struct in_addr bp_ciaddr; ++ struct in_addr bp_yiaddr; ++ struct in_addr bp_siaddr; ++ struct in_addr bp_giaddr; ++ uint8_t bp_hwaddr[16]; ++ uint8_t bp_sname[64]; ++ char bp_file[128]; ++ uint8_t bp_vend[DHCP_OPT_LEN]; ++}; ++ ++typedef struct { ++ uint16_t allocated; ++ uint8_t macaddr[6]; ++} BOOTPClient; ++ ++#define NB_BOOTP_CLIENTS 16 ++ ++void bootp_input(struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c +new file mode 100644 +index 0000000..4d08380 +--- /dev/null ++++ b/slirp/src/cksum.c +@@ -0,0 +1,179 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1988, 1992, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 ++ * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++/* ++ * Checksum routine for Internet Protocol family headers (Portable Version). ++ * ++ * This routine is very heavily used in the network ++ * code and should be modified for each CPU to be as fast as possible. ++ * ++ * XXX Since we will never span more than 1 mbuf, we can optimise this ++ */ ++ ++#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) ++#define REDUCE \ ++ { \ ++ l_util.l = sum; \ ++ sum = l_util.s[0] + l_util.s[1]; \ ++ (void)ADDCARRY(sum); \ ++ } ++ ++int cksum(struct mbuf *m, int len) ++{ ++ register uint16_t *w; ++ register int sum = 0; ++ register int mlen = 0; ++ int byte_swapped = 0; ++ ++ union { ++ uint8_t c[2]; ++ uint16_t s; ++ } s_util; ++ union { ++ uint16_t s[2]; ++ uint32_t l; ++ } l_util; ++ ++ if (m->m_len == 0) ++ goto cont; ++ w = mtod(m, uint16_t *); ++ ++ mlen = m->m_len; ++ ++ if (len < mlen) ++ mlen = len; ++ len -= mlen; ++ /* ++ * Force to even boundary. ++ */ ++ if ((1 & (uintptr_t)w) && (mlen > 0)) { ++ REDUCE; ++ sum <<= 8; ++ s_util.c[0] = *(uint8_t *)w; ++ w = (uint16_t *)((int8_t *)w + 1); ++ mlen--; ++ byte_swapped = 1; ++ } ++ /* ++ * Unroll the loop to make overhead from ++ * branches &c small. ++ */ ++ while ((mlen -= 32) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ sum += w[4]; ++ sum += w[5]; ++ sum += w[6]; ++ sum += w[7]; ++ sum += w[8]; ++ sum += w[9]; ++ sum += w[10]; ++ sum += w[11]; ++ sum += w[12]; ++ sum += w[13]; ++ sum += w[14]; ++ sum += w[15]; ++ w += 16; ++ } ++ mlen += 32; ++ while ((mlen -= 8) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ w += 4; ++ } ++ mlen += 8; ++ if (mlen == 0 && byte_swapped == 0) ++ goto cont; ++ REDUCE; ++ while ((mlen -= 2) >= 0) { ++ sum += *w++; ++ } ++ ++ if (byte_swapped) { ++ REDUCE; ++ sum <<= 8; ++ if (mlen == -1) { ++ s_util.c[1] = *(uint8_t *)w; ++ sum += s_util.s; ++ mlen = 0; ++ } else ++ ++ mlen = -1; ++ } else if (mlen == -1) ++ s_util.c[0] = *(uint8_t *)w; ++ ++cont: ++ if (len) { ++ DEBUG_ERROR("cksum: out of data"); ++ DEBUG_ERROR(" len = %d", len); ++ } ++ if (mlen == -1) { ++ /* The last mbuf has odd # of bytes. Follow the ++ standard (the odd byte may be shifted left by 8 bits ++ or not as determined by endian-ness of the machine) */ ++ s_util.c[1] = 0; ++ sum += s_util.s; ++ } ++ REDUCE; ++ return (~sum & 0xffff); ++} ++ ++int ip6_cksum(struct mbuf *m) ++{ ++ /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum ++ * separately from the mbuf */ ++ struct ip6 save_ip, *ip = mtod(m, struct ip6 *); ++ struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); ++ int sum; ++ ++ save_ip = *ip; ++ ++ ih->ih_src = save_ip.ip_src; ++ ih->ih_dst = save_ip.ip_dst; ++ ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); ++ ih->ih_zero_hi = 0; ++ ih->ih_zero_lo = 0; ++ ih->ih_nh = save_ip.ip_nh; ++ ++ sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + ntohl(ih->ih_pl)); ++ ++ *ip = save_ip; ++ ++ return sum; ++} +diff --git a/slirp/src/debug.h b/slirp/src/debug.h +new file mode 100644 +index 0000000..47712bd +--- /dev/null ++++ b/slirp/src/debug.h +@@ -0,0 +1,51 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef DEBUG_H_ ++#define DEBUG_H_ ++ ++#define DBG_CALL (1 << 0) ++#define DBG_MISC (1 << 1) ++#define DBG_ERROR (1 << 2) ++#define DBG_TFTP (1 << 3) ++ ++extern int slirp_debug; ++ ++#define DEBUG_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ARG(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(" " fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_MISC(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ERROR(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_TFTP(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#endif /* DEBUG_H_ */ +diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c +new file mode 100644 +index 0000000..77b451b +--- /dev/null ++++ b/slirp/src/dhcpv6.c +@@ -0,0 +1,224 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * SLIRP stateless DHCPv6 ++ * ++ * We only support stateless DHCPv6, e.g. for network booting. ++ * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include "slirp.h" ++#include "dhcpv6.h" ++ ++/* DHCPv6 message types */ ++#define MSGTYPE_REPLY 7 ++#define MSGTYPE_INFO_REQUEST 11 ++ ++/* DHCPv6 option types */ ++#define OPTION_CLIENTID 1 ++#define OPTION_IAADDR 5 ++#define OPTION_ORO 6 ++#define OPTION_DNS_SERVERS 23 ++#define OPTION_BOOTFILE_URL 59 ++ ++struct requested_infos { ++ uint8_t *client_id; ++ int client_id_len; ++ bool want_dns; ++ bool want_boot_url; ++}; ++ ++/** ++ * Analyze the info request message sent by the client to see what data it ++ * provided and what it wants to have. The information is gathered in the ++ * "requested_infos" struct. Note that client_id (if provided) points into ++ * the odata region, thus the caller must keep odata valid as long as it ++ * needs to access the requested_infos struct. ++ */ ++static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, ++ struct requested_infos *ri) ++{ ++ int i, req_opt; ++ ++ while (olen > 4) { ++ /* Parse one option */ ++ int option = odata[0] << 8 | odata[1]; ++ int len = odata[2] << 8 | odata[3]; ++ ++ if (len + 4 > olen) { ++ slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", ++ slirp->opaque); ++ return -E2BIG; ++ } ++ ++ switch (option) { ++ case OPTION_IAADDR: ++ /* According to RFC3315, we must discard requests with IA option */ ++ return -EINVAL; ++ case OPTION_CLIENTID: ++ if (len > 256) { ++ /* Avoid very long IDs which could cause problems later */ ++ return -E2BIG; ++ } ++ ri->client_id = odata + 4; ++ ri->client_id_len = len; ++ break; ++ case OPTION_ORO: /* Option request option */ ++ if (len & 1) { ++ return -EINVAL; ++ } ++ /* Check which options the client wants to have */ ++ for (i = 0; i < len; i += 2) { ++ req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; ++ switch (req_opt) { ++ case OPTION_DNS_SERVERS: ++ ri->want_dns = true; ++ break; ++ case OPTION_BOOTFILE_URL: ++ ri->want_boot_url = true; ++ break; ++ default: ++ DEBUG_MISC("dhcpv6: Unsupported option request %d", ++ req_opt); ++ } ++ } ++ break; ++ default: ++ DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", option, ++ len); ++ } ++ ++ odata += len + 4; ++ olen -= len + 4; ++ } ++ ++ return 0; ++} ++ ++ ++/** ++ * Handle information request messages ++ */ ++static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, ++ uint32_t xid, uint8_t *odata, int olen) ++{ ++ struct requested_infos ri = { NULL }; ++ struct sockaddr_in6 sa6, da6; ++ struct mbuf *m; ++ uint8_t *resp; ++ ++ if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { ++ return; ++ } ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ memset(m->m_data, 0, m->m_size); ++ m->m_data += IF_MAXLINKHDR; ++ resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); ++ ++ /* Fill in response */ ++ *resp++ = MSGTYPE_REPLY; ++ *resp++ = (uint8_t)(xid >> 16); ++ *resp++ = (uint8_t)(xid >> 8); ++ *resp++ = (uint8_t)xid; ++ ++ if (ri.client_id) { ++ *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ ++ *resp++ = OPTION_CLIENTID; /* option-code low byte */ ++ *resp++ = ri.client_id_len >> 8; /* option-len high byte */ ++ *resp++ = ri.client_id_len; /* option-len low byte */ ++ memcpy(resp, ri.client_id, ri.client_id_len); ++ resp += ri.client_id_len; ++ } ++ if (ri.want_dns) { ++ *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ ++ *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ ++ *resp++ = 0; /* option-len high byte */ ++ *resp++ = 16; /* option-len low byte */ ++ memcpy(resp, &slirp->vnameserver_addr6, 16); ++ resp += 16; ++ } ++ if (ri.want_boot_url) { ++ uint8_t *sa = slirp->vhost_addr6.s6_addr; ++ int slen, smaxlen; ++ ++ *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ ++ *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ ++ smaxlen = (uint8_t *)m->m_data + slirp->if_mtu - (resp + 2); ++ slen = slirp_fmt((char *)resp + 2, smaxlen, ++ "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" ++ "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", ++ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], ++ sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], ++ sa[15], slirp->bootp_filename); ++ *resp++ = slen >> 8; /* option-len high byte */ ++ *resp++ = slen; /* option-len low byte */ ++ resp += slen; ++ } ++ ++ sa6.sin6_addr = slirp->vhost_addr6; ++ sa6.sin6_port = DHCPV6_SERVER_PORT; ++ da6.sin6_addr = srcsas->sin6_addr; ++ da6.sin6_port = srcsas->sin6_port; ++ m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); ++ m->m_len = resp - (uint8_t *)m->m_data; ++ udp6_output(NULL, m, &sa6, &da6); ++} ++ ++/** ++ * Handle DHCPv6 messages sent by the client ++ */ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) ++{ ++ uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); ++ int data_len = m->m_len - sizeof(struct udphdr); ++ uint32_t xid; ++ ++ if (data_len < 4) { ++ return; ++ } ++ ++ xid = ntohl(*(uint32_t *)data) & 0xffffff; ++ ++ switch (data[0]) { ++ case MSGTYPE_INFO_REQUEST: ++ dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); ++ break; ++ default: ++ DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); ++ } ++} +diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h +new file mode 100644 +index 0000000..d12c49b +--- /dev/null ++++ b/slirp/src/dhcpv6.h +@@ -0,0 +1,68 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Definitions and prototypes for SLIRP stateless DHCPv6 ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef SLIRP_DHCPV6_H ++#define SLIRP_DHCPV6_H ++ ++#define DHCPV6_SERVER_PORT 547 ++ ++#define ALLDHCP_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define in6_dhcp_multicast(a) in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) ++ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c +new file mode 100644 +index 0000000..e8f14e3 +--- /dev/null ++++ b/slirp/src/dnssearch.c +@@ -0,0 +1,306 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Domain search option for DHCP (RFC 3397) ++ * ++ * Copyright (c) 2012 Klaus Stengel ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; ++static const uint8_t MAX_OPT_LEN = 255; ++static const uint8_t OPT_HEADER_LEN = 2; ++static const uint8_t REFERENCE_LEN = 2; ++ ++struct compact_domain; ++ ++typedef struct compact_domain { ++ struct compact_domain *self; ++ struct compact_domain *refdom; ++ uint8_t *labels; ++ size_t len; ++ size_t common_octets; ++} CompactDomain; ++ ++static size_t domain_suffix_diffoff(const CompactDomain *a, ++ const CompactDomain *b) ++{ ++ size_t la = a->len, lb = b->len; ++ uint8_t *da = a->labels + la, *db = b->labels + lb; ++ size_t i, lm = (la < lb) ? la : lb; ++ ++ for (i = 0; i < lm; i++) { ++ da--; ++ db--; ++ if (*da != *db) { ++ break; ++ } ++ } ++ return i; ++} ++ ++static int domain_suffix_ord(const void *cva, const void *cvb) ++{ ++ const CompactDomain *a = cva, *b = cvb; ++ size_t la = a->len, lb = b->len; ++ size_t doff = domain_suffix_diffoff(a, b); ++ uint8_t ca = a->labels[la - doff]; ++ uint8_t cb = b->labels[lb - doff]; ++ ++ if (ca < cb) { ++ return -1; ++ } ++ if (ca > cb) { ++ return 1; ++ } ++ if (la < lb) { ++ return -1; ++ } ++ if (la > lb) { ++ return 1; ++ } ++ return 0; ++} ++ ++static size_t domain_common_label(CompactDomain *a, CompactDomain *b) ++{ ++ size_t res, doff = domain_suffix_diffoff(a, b); ++ uint8_t *first_eq_pos = a->labels + (a->len - doff); ++ uint8_t *label = a->labels; ++ ++ while (*label && label < first_eq_pos) { ++ label += *label + 1; ++ } ++ res = a->len - (label - a->labels); ++ /* only report if it can help to reduce the packet size */ ++ return (res > REFERENCE_LEN) ? res : 0; ++} ++ ++static void domain_fixup_order(CompactDomain *cd, size_t n) ++{ ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cur = cd + i, *next = cd[i].self; ++ ++ while (!cur->common_octets) { ++ CompactDomain *tmp = next->self; /* backup target value */ ++ ++ next->self = cur; ++ cur->common_octets++; ++ ++ cur = next; ++ next = tmp; ++ } ++ } ++} ++ ++static void domain_mklabels(CompactDomain *cd, const char *input) ++{ ++ uint8_t *len_marker = cd->labels; ++ uint8_t *output = len_marker; /* pre-incremented */ ++ const char *in = input; ++ char cur_chr; ++ size_t len = 0; ++ ++ if (cd->len == 0) { ++ goto fail; ++ } ++ cd->len++; ++ ++ do { ++ cur_chr = *in++; ++ if (cur_chr == '.' || cur_chr == '\0') { ++ len = output - len_marker; ++ if ((len == 0 && cur_chr == '.') || len >= 64) { ++ goto fail; ++ } ++ *len_marker = len; ++ ++ output++; ++ len_marker = output; ++ } else { ++ output++; ++ *output = cur_chr; ++ } ++ } while (cur_chr != '\0'); ++ ++ /* ensure proper zero-termination */ ++ if (len != 0) { ++ *len_marker = 0; ++ cd->len++; ++ } ++ return; ++ ++fail: ++ g_warning("failed to parse domain name '%s'\n", input); ++ cd->len = 0; ++} ++ ++static void domain_mkxrefs(CompactDomain *doms, CompactDomain *last, ++ size_t depth) ++{ ++ CompactDomain *i = doms, *target = doms; ++ ++ do { ++ if (i->labels < target->labels) { ++ target = i; ++ } ++ } while (i++ != last); ++ ++ for (i = doms; i != last; i++) { ++ CompactDomain *group_last; ++ size_t next_depth; ++ ++ if (i->common_octets == depth) { ++ continue; ++ } ++ ++ next_depth = -1; ++ for (group_last = i; group_last != last; group_last++) { ++ size_t co = group_last->common_octets; ++ if (co <= depth) { ++ break; ++ } ++ if (co < next_depth) { ++ next_depth = co; ++ } ++ } ++ domain_mkxrefs(i, group_last, next_depth); ++ ++ i = group_last; ++ if (i == last) { ++ break; ++ } ++ } ++ ++ if (depth == 0) { ++ return; ++ } ++ ++ i = doms; ++ do { ++ if (i != target && i->refdom == NULL) { ++ i->refdom = target; ++ i->common_octets = depth; ++ } ++ } while (i++ != last); ++} ++ ++static size_t domain_compactify(CompactDomain *domains, size_t n) ++{ ++ uint8_t *start = domains->self->labels, *outptr = start; ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cd = domains[i].self; ++ CompactDomain *rd = cd->refdom; ++ ++ if (rd != NULL) { ++ size_t moff = (rd->labels - start) + (rd->len - cd->common_octets); ++ if (moff < 0x3FFFu) { ++ cd->len -= cd->common_octets - 2; ++ cd->labels[cd->len - 1] = moff & 0xFFu; ++ cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); ++ } ++ } ++ ++ if (cd->labels != outptr) { ++ memmove(outptr, cd->labels, cd->len); ++ cd->labels = outptr; ++ } ++ outptr += cd->len; ++ } ++ return outptr - start; ++} ++ ++int translate_dnssearch(Slirp *s, const char **names) ++{ ++ size_t blocks, bsrc_start, bsrc_end, bdst_start; ++ size_t i, num_domains, memreq = 0; ++ uint8_t *result = NULL, *outptr; ++ CompactDomain *domains = NULL; ++ ++ num_domains = g_strv_length((GStrv)names); ++ if (num_domains == 0) { ++ return -2; ++ } ++ ++ domains = g_malloc(num_domains * sizeof(*domains)); ++ ++ for (i = 0; i < num_domains; i++) { ++ size_t nlen = strlen(names[i]); ++ memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ ++ domains[i].self = domains + i; ++ domains[i].len = nlen; ++ domains[i].common_octets = 0; ++ domains[i].refdom = NULL; ++ } ++ ++ /* reserve extra 2 header bytes for each 255 bytes of output */ ++ memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; ++ result = g_malloc(memreq * sizeof(*result)); ++ ++ outptr = result; ++ for (i = 0; i < num_domains; i++) { ++ domains[i].labels = outptr; ++ domain_mklabels(domains + i, names[i]); ++ outptr += domains[i].len; ++ } ++ ++ if (outptr == result) { ++ g_free(domains); ++ g_free(result); ++ return -1; ++ } ++ ++ qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); ++ domain_fixup_order(domains, num_domains); ++ ++ for (i = 1; i < num_domains; i++) { ++ size_t cl = domain_common_label(domains + i - 1, domains + i); ++ domains[i - 1].common_octets = cl; ++ } ++ ++ domain_mkxrefs(domains, domains + num_domains - 1, 0); ++ memreq = domain_compactify(domains, num_domains); ++ ++ blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); ++ bsrc_end = memreq; ++ bsrc_start = (blocks - 1) * MAX_OPT_LEN; ++ bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; ++ memreq += blocks * OPT_HEADER_LEN; ++ ++ while (blocks--) { ++ size_t len = bsrc_end - bsrc_start; ++ memmove(result + bdst_start, result + bsrc_start, len); ++ result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; ++ result[bdst_start - 1] = len; ++ bsrc_end = bsrc_start; ++ bsrc_start -= MAX_OPT_LEN; ++ bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; ++ } ++ ++ g_free(domains); ++ s->vdnssearch = result; ++ s->vdnssearch_len = memreq; ++ return 0; ++} +diff --git a/slirp/src/if.c b/slirp/src/if.c +new file mode 100644 +index 0000000..23190b5 +--- /dev/null ++++ b/slirp/src/if.c +@@ -0,0 +1,213 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) ++{ ++ ifm->ifs_next = ifmhead->ifs_next; ++ ifmhead->ifs_next = ifm; ++ ifm->ifs_prev = ifmhead; ++ ifm->ifs_next->ifs_prev = ifm; ++} ++ ++static void ifs_remque(struct mbuf *ifm) ++{ ++ ifm->ifs_prev->ifs_next = ifm->ifs_next; ++ ifm->ifs_next->ifs_prev = ifm->ifs_prev; ++} ++ ++void if_init(Slirp *slirp) ++{ ++ slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; ++ slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; ++} ++ ++/* ++ * if_output: Queue packet into an output queue. ++ * There are 2 output queue's, if_fastq and if_batchq. ++ * Each output queue is a doubly linked list of double linked lists ++ * of mbufs, each list belonging to one "session" (socket). This ++ * way, we can output packets fairly by sending one packet from each ++ * session, instead of all the packets from one session, then all packets ++ * from the next session, etc. Packets on the if_fastq get absolute ++ * priority, but if one session hogs the link, it gets "downgraded" ++ * to the batchq until it runs out of packets, then it'll return ++ * to the fastq (eg. if the user does an ls -alR in a telnet session, ++ * it'll temporarily get downgraded to the batchq) ++ */ ++void if_output(struct socket *so, struct mbuf *ifm) ++{ ++ Slirp *slirp = ifm->slirp; ++ struct mbuf *ifq; ++ int on_fastq = 1; ++ ++ DEBUG_CALL("if_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ifm = %p", ifm); ++ ++ /* ++ * First remove the mbuf from m_usedlist, ++ * since we're gonna use m_next and m_prev ourselves ++ * XXX Shouldn't need this, gotta change dtom() etc. ++ */ ++ if (ifm->m_flags & M_USEDLIST) { ++ remque(ifm); ++ ifm->m_flags &= ~M_USEDLIST; ++ } ++ ++ /* ++ * See if there's already a batchq list for this session. ++ * This can include an interactive session, which should go on fastq, ++ * but gets too greedy... hence it'll be downgraded from fastq to batchq. ++ * We mustn't put this packet back on the fastq (or we'll send it out of ++ * order) ++ * XXX add cache here? ++ */ ++ if (so) { ++ for (ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ (struct quehead *)ifq != &slirp->if_batchq; ifq = ifq->ifq_prev) { ++ if (so == ifq->ifq_so) { ++ /* A match! */ ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } ++ } ++ ++ /* No match, check which queue to put it on */ ++ if (so && (so->so_iptos & IPTOS_LOWDELAY)) { ++ ifq = (struct mbuf *)slirp->if_fastq.qh_rlink; ++ on_fastq = 1; ++ /* ++ * Check if this packet is a part of the last ++ * packet's session ++ */ ++ if (ifq->ifq_so == so) { ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } else { ++ ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ } ++ ++ /* Create a new doubly linked list for this session */ ++ ifm->ifq_so = so; ++ ifs_init(ifm); ++ insque(ifm, ifq); ++ ++diddit: ++ if (so) { ++ /* Update *_queued */ ++ so->so_queued++; ++ so->so_nqueued++; ++ /* ++ * Check if the interactive session should be downgraded to ++ * the batchq. A session is downgraded if it has queued 6 ++ * packets without pausing, and at least 3 of those packets ++ * have been sent over the link ++ * (XXX These are arbitrary numbers, probably not optimal..) ++ */ ++ if (on_fastq && ++ ((so->so_nqueued >= 6) && (so->so_nqueued - so->so_queued) >= 3)) { ++ /* Remove from current queue... */ ++ remque(ifm->ifs_next); ++ ++ /* ...And insert in the new. That'll teach ya! */ ++ insque(ifm->ifs_next, &slirp->if_batchq); ++ } ++ } ++ ++ /* ++ * This prevents us from malloc()ing too many mbufs ++ */ ++ if_start(ifm->slirp); ++} ++ ++/* ++ * Send one packet from each session. ++ * If there are packets on the fastq, they are sent FIFO, before ++ * everything else. Then we choose the first packet from each ++ * batchq session (socket) and send it. ++ * For example, if there are 3 ftp sessions fighting for bandwidth, ++ * one packet will be sent from the first session, then one packet ++ * from the second session, then one packet from the third. ++ */ ++void if_start(Slirp *slirp) ++{ ++ uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); ++ bool from_batchq = false; ++ struct mbuf *ifm, *ifm_next, *ifqt; ++ ++ DEBUG_CALL("if_start"); ++ ++ if (slirp->if_start_busy) { ++ return; ++ } ++ slirp->if_start_busy = true; ++ ++ struct mbuf *batch_head = NULL; ++ if (slirp->if_batchq.qh_link != &slirp->if_batchq) { ++ batch_head = (struct mbuf *)slirp->if_batchq.qh_link; ++ } ++ ++ if (slirp->if_fastq.qh_link != &slirp->if_fastq) { ++ ifm_next = (struct mbuf *)slirp->if_fastq.qh_link; ++ } else if (batch_head) { ++ /* Nothing on fastq, pick up from batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } else { ++ ifm_next = NULL; ++ } ++ ++ while (ifm_next) { ++ ifm = ifm_next; ++ ++ ifm_next = ifm->ifq_next; ++ if ((struct quehead *)ifm_next == &slirp->if_fastq) { ++ /* No more packets in fastq, switch to batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } ++ if ((struct quehead *)ifm_next == &slirp->if_batchq) { ++ /* end of batchq */ ++ ifm_next = NULL; ++ } ++ ++ /* Try to send packet unless it already expired */ ++ if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { ++ /* Packet is delayed due to pending ARP or NDP resolution */ ++ continue; ++ } ++ ++ /* Remove it from the queue */ ++ ifqt = ifm->ifq_prev; ++ remque(ifm); ++ ++ /* If there are more packets for this session, re-queue them */ ++ if (ifm->ifs_next != ifm) { ++ struct mbuf *next = ifm->ifs_next; ++ ++ insque(next, ifqt); ++ ifs_remque(ifm); ++ if (!from_batchq) { ++ ifm_next = next; ++ } ++ } ++ ++ /* Update so_queued */ ++ if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { ++ /* If there's no more queued, reset nqueued */ ++ ifm->ifq_so->so_nqueued = 0; ++ } ++ ++ m_free(ifm); ++ } ++ ++ slirp->if_start_busy = false; ++} +diff --git a/slirp/src/if.h b/slirp/src/if.h +new file mode 100644 +index 0000000..7cf9d27 +--- /dev/null ++++ b/slirp/src/if.h +@@ -0,0 +1,25 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef IF_H ++#define IF_H ++ ++#define IF_COMPRESS 0x01 /* We want compression */ ++#define IF_NOCOMPRESS 0x02 /* Do not do compression */ ++#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ ++#define IF_NOCIDCOMP 0x08 /* CID compression */ ++ ++#define IF_MTU_DEFAULT 1500 ++#define IF_MTU_MIN 68 ++#define IF_MTU_MAX 65521 ++#define IF_MRU_DEFAULT 1500 ++#define IF_MRU_MIN 68 ++#define IF_MRU_MAX 65521 ++#define IF_COMP IF_AUTOCOMP /* Flags for compression */ ++ ++/* 2 for alignment, 14 for ethernet */ ++#define IF_MAXLINKHDR (2 + ETH_HLEN) ++ ++#endif +diff --git a/slirp/src/ip.h b/slirp/src/ip.h +new file mode 100644 +index 0000000..e5d4aa8 +--- /dev/null ++++ b/slirp/src/ip.h +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip.h 8.1 (Berkeley) 6/10/93 ++ * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp ++ */ ++ ++#ifndef IP_H ++#define IP_H ++ ++#include ++ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++#undef NTOHL ++#undef NTOHS ++#undef HTONL ++#undef HTONS ++#define NTOHL(d) ++#define NTOHS(d) ++#define HTONL(d) ++#define HTONS(d) ++#else ++#ifndef NTOHL ++#define NTOHL(d) ((d) = ntohl((d))) ++#endif ++#ifndef NTOHS ++#define NTOHS(d) ((d) = ntohs((uint16_t)(d))) ++#endif ++#ifndef HTONL ++#define HTONL(d) ((d) = htonl((d))) ++#endif ++#ifndef HTONS ++#define HTONS(d) ((d) = htons((uint16_t)(d))) ++#endif ++#endif ++ ++typedef uint32_t n_long; /* long as received from the net */ ++ ++/* ++ * Definitions for internet protocol version 4. ++ * Per RFC 791, September 1981. ++ */ ++#define IPVERSION 4 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ip_v : 4, /* version */ ++ ip_hl : 4; /* header length */ ++#else ++ uint8_t ip_hl : 4, /* header length */ ++ ip_v : 4; /* version */ ++#endif ++ uint8_t ip_tos; /* type of service */ ++ uint16_t ip_len; /* total length */ ++ uint16_t ip_id; /* identification */ ++ uint16_t ip_off; /* fragment offset field */ ++#define IP_DF 0x4000 /* don't fragment flag */ ++#define IP_MF 0x2000 /* more fragments flag */ ++#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ ++ uint8_t ip_ttl; /* time to live */ ++ uint8_t ip_p; /* protocol */ ++ uint16_t ip_sum; /* checksum */ ++ struct in_addr ip_src, ip_dst; /* source and dest address */ ++} SLIRP_PACKED; ++ ++#define IP_MAXPACKET 65535 /* maximum packet size */ ++ ++/* ++ * Definitions for IP type of service (ip_tos) ++ */ ++#define IPTOS_LOWDELAY 0x10 ++#define IPTOS_THROUGHPUT 0x08 ++#define IPTOS_RELIABILITY 0x04 ++ ++/* ++ * Definitions for options. ++ */ ++#define IPOPT_COPIED(o) ((o)&0x80) ++#define IPOPT_CLASS(o) ((o)&0x60) ++#define IPOPT_NUMBER(o) ((o)&0x1f) ++ ++#define IPOPT_CONTROL 0x00 ++#define IPOPT_RESERVED1 0x20 ++#define IPOPT_DEBMEAS 0x40 ++#define IPOPT_RESERVED2 0x60 ++ ++#define IPOPT_EOL 0 /* end of option list */ ++#define IPOPT_NOP 1 /* no operation */ ++ ++#define IPOPT_RR 7 /* record packet route */ ++#define IPOPT_TS 68 /* timestamp */ ++#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ ++#define IPOPT_LSRR 131 /* loose source route */ ++#define IPOPT_SATID 136 /* satnet id */ ++#define IPOPT_SSRR 137 /* strict source route */ ++ ++/* ++ * Offsets to fields in options other than EOL and NOP. ++ */ ++#define IPOPT_OPTVAL 0 /* option ID */ ++#define IPOPT_OLEN 1 /* option length */ ++#define IPOPT_OFFSET 2 /* offset within option */ ++#define IPOPT_MINOFF 4 /* min value of above */ ++ ++/* ++ * Time stamp option structure. ++ */ ++struct ip_timestamp { ++ uint8_t ipt_code; /* IPOPT_TS */ ++ uint8_t ipt_len; /* size of structure (variable) */ ++ uint8_t ipt_ptr; /* index of current entry */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ipt_oflw : 4, /* overflow counter */ ++ ipt_flg : 4; /* flags, see below */ ++#else ++ uint8_t ipt_flg : 4, /* flags, see below */ ++ ipt_oflw : 4; /* overflow counter */ ++#endif ++ union ipt_timestamp { ++ n_long ipt_time[1]; ++ struct ipt_ta { ++ struct in_addr ipt_addr; ++ n_long ipt_time; ++ } ipt_ta[1]; ++ } ipt_timestamp; ++} SLIRP_PACKED; ++ ++/* flag bits for ipt_flg */ ++#define IPOPT_TS_TSONLY 0 /* timestamps only */ ++#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ ++#define IPOPT_TS_PRESPEC 3 /* specified modules only */ ++ ++/* bits for security (not byte swapped) */ ++#define IPOPT_SECUR_UNCLASS 0x0000 ++#define IPOPT_SECUR_CONFID 0xf135 ++#define IPOPT_SECUR_EFTO 0x789a ++#define IPOPT_SECUR_MMMM 0xbc4d ++#define IPOPT_SECUR_RESTR 0xaf13 ++#define IPOPT_SECUR_SECRET 0xd788 ++#define IPOPT_SECUR_TOPSECRET 0x6bc5 ++ ++/* ++ * Internet implementation parameters. ++ */ ++#define MAXTTL 255 /* maximum time to live (seconds) */ ++#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ ++#define IPFRAGTTL 60 /* time to live for frags, slowhz */ ++#define IPTTLDEC 1 /* subtracted when forwarding */ ++ ++#define IP_MSS 576 /* default maximum segment size */ ++ ++#if GLIB_SIZEOF_VOID_P == 4 ++struct mbuf_ptr { ++ struct mbuf *mptr; ++ uint32_t dummy; ++} SLIRP_PACKED; ++#else ++struct mbuf_ptr { ++ struct mbuf *mptr; ++} SLIRP_PACKED; ++#endif ++struct qlink { ++ void *next, *prev; ++}; ++ ++/* ++ * Overlay for ip header used by other protocols (tcp, udp). ++ */ ++struct ipovly { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ uint16_t ih_len; /* protocol length */ ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++} SLIRP_PACKED; ++ ++/* ++ * Ip reassembly queue structure. Each fragment ++ * being reassembled is attached to one of these structures. ++ * They are timed out after ipq_ttl drops to 0, and may also ++ * be reclaimed if memory becomes tight. ++ * size 28 bytes ++ */ ++struct ipq { ++ struct qlink frag_link; /* to ip headers of fragments */ ++ struct qlink ip_link; /* to other reass headers */ ++ uint8_t ipq_ttl; /* time for reass q to live */ ++ uint8_t ipq_p; /* protocol of this fragment */ ++ uint16_t ipq_id; /* sequence id for reassembly */ ++ struct in_addr ipq_src, ipq_dst; ++}; ++ ++/* ++ * Ip header, when holding a fragment. ++ * ++ * Note: ipf_link must be at same offset as frag_link above ++ */ ++struct ipasfrag { ++ struct qlink ipf_link; ++ struct ip ipf_ip; ++}; ++ ++G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == ++ offsetof(struct ipasfrag, ipf_link)); ++ ++#define ipf_off ipf_ip.ip_off ++#define ipf_tos ipf_ip.ip_tos ++#define ipf_len ipf_ip.ip_len ++#define ipf_next ipf_link.next ++#define ipf_prev ipf_link.prev ++ ++#endif +diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h +new file mode 100644 +index 0000000..0630309 +--- /dev/null ++++ b/slirp/src/ip6.h +@@ -0,0 +1,214 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_H ++#define SLIRP_IP6_H ++ ++#include ++#include ++ ++#define ALLNODES_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01 \ ++ } \ ++ } ++ ++#define SOLICITED_NODE_PREFIX \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0xff, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++#define LINKLOCAL_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0xfe, \ ++ 0x80, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define ZERO_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) ++{ ++ return memcmp(a, b, sizeof(*a)) == 0; ++} ++ ++static inline bool in6_equal_net(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(a, b, prefix_len / 8) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) == ++ b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); ++} ++ ++static inline bool in6_equal_mach(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return (a->s6_addr[prefix_len / 8] & ++ ((1U << (8 - (prefix_len % 8))) - 1)) == ++ (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); ++} ++ ++ ++#define in6_equal_router(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, 64))) ++ ++#define in6_equal_dns(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) ++ ++#define in6_equal_host(a) (in6_equal_router(a) || in6_equal_dns(a)) ++ ++#define in6_solicitednode_multicast(a) \ ++ (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) ++ ++#define in6_zero(a) (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) ++ ++/* Compute emulated host MAC address from its ipv6 address */ ++static inline void in6_compute_ethaddr(struct in6_addr ip, ++ uint8_t eth[ETH_ALEN]) ++{ ++ eth[0] = 0x52; ++ eth[1] = 0x56; ++ memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); ++} ++ ++/* ++ * Definitions for internet protocol version 6. ++ * Per RFC 2460, December 1998. ++ */ ++#define IP6VERSION 6 ++#define IP6_HOP_LIMIT 255 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip6 { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t ip_v : 4, /* version */ ++ ip_tc_hi : 4, /* traffic class */ ++ ip_tc_lo : 4, ip_fl_hi : 4, /* flow label */ ++ ip_fl_lo : 16; ++#else ++ uint32_t ip_tc_hi : 4, ip_v : 4, ip_fl_hi : 4, ip_tc_lo : 4, ip_fl_lo : 16; ++#endif ++ uint16_t ip_pl; /* payload length */ ++ uint8_t ip_nh; /* next header */ ++ uint8_t ip_hl; /* hop limit */ ++ struct in6_addr ip_src, ip_dst; /* source and dest address */ ++}; ++ ++/* ++ * IPv6 pseudo-header used by upper-layer protocols ++ */ ++struct ip6_pseudohdr { ++ struct in6_addr ih_src; /* source internet address */ ++ struct in6_addr ih_dst; /* destination internet address */ ++ uint32_t ih_pl; /* upper-layer packet length */ ++ uint16_t ih_zero_hi; /* zero */ ++ uint8_t ih_zero_lo; /* zero */ ++ uint8_t ih_nh; /* next header */ ++}; ++ ++/* ++ * We don't want to mark these ip6 structs as packed as they are naturally ++ * correctly aligned; instead assert that there is no stray padding. ++ * If we marked the struct as packed then we would be unable to take ++ * the address of any of the fields in it. ++ */ ++G_STATIC_ASSERT(sizeof(struct ip6) == 40); ++G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); ++ ++#endif +diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c +new file mode 100644 +index 0000000..28ec2be +--- /dev/null ++++ b/slirp/src/ip6_icmp.c +@@ -0,0 +1,434 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++#define NDP_Interval \ ++ g_rand_int_range(slirp->grand, NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) ++ ++static void ra_timer_handler(void *opaque) ++{ ++ Slirp *slirp = opaque; ++ ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++ ndp_send_ra(slirp); ++} ++ ++void icmp6_init(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->ra_timer = ++ slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++} ++ ++void icmp6_cleanup(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); ++} ++ ++static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ struct mbuf *t = m_get(slirp); ++ t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); ++ memcpy(t->m_data, m->m_data, t->m_len); ++ ++ /* IPv6 Packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_dst = ip->ip_src; ++ rip->ip_src = ip->ip_dst; ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_ECHO_REPLY; ++ ricmp->icmp6_cksum = 0; ++ ++ /* Checksum */ ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) ++{ ++ Slirp *slirp = m->slirp; ++ struct mbuf *t; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ DEBUG_CALL("icmp6_send_error"); ++ DEBUG_ARG("type = %d, code = %d", type, code); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || in6_zero(&ip->ip_src)) { ++ /* TODO icmp error? */ ++ return; ++ } ++ ++ t = m_get(slirp); ++ ++ /* IPv6 packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = ip->ip_src; ++ inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ rip->ip_nh = IPPROTO_ICMPV6; ++ const int error_data_len = MIN( ++ m->m_len, slirp->if_mtu - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); ++ rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = type; ++ ricmp->icmp6_code = code; ++ ricmp->icmp6_cksum = 0; ++ ++ switch (type) { ++ case ICMP6_UNREACH: ++ case ICMP6_TIMXCEED: ++ ricmp->icmp6_err.unused = 0; ++ break; ++ case ICMP6_TOOBIG: ++ ricmp->icmp6_err.mtu = htonl(slirp->if_mtu); ++ break; ++ case ICMP6_PARAMPROB: ++ /* TODO: Handle this case */ ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ t->m_data += ICMP6_ERROR_MINLEN; ++ memcpy(t->m_data, m->m_data, error_data_len); ++ ++ /* Checksum */ ++ t->m_data -= ICMP6_ERROR_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Router Advertisement ++ */ ++void ndp_send_ra(Slirp *slirp) ++{ ++ DEBUG_CALL("ndp_send_ra"); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ size_t pl_size = 0; ++ struct in6_addr addr; ++ uint32_t scope_id; ++ ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ rip->ip_nh = IPPROTO_ICMPV6; ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_RA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; ++ ricmp->icmp6_nra.M = NDP_AdvManagedFlag; ++ ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; ++ ricmp->icmp6_nra.reserved = 0; ++ ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); ++ ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); ++ ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); ++ t->m_data += ICMP6_NDP_RA_MINLEN; ++ pl_size += ICMP6_NDP_RA_MINLEN; ++ ++ /* Source link-layer address (NDP option) */ ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); ++ t->m_data += NDPOPT_LINKLAYER_LEN; ++ pl_size += NDPOPT_LINKLAYER_LEN; ++ ++ /* Prefix information (NDP option) */ ++ struct ndpopt *opt2 = mtod(t, struct ndpopt *); ++ opt2->ndpopt_type = NDPOPT_PREFIX_INFO; ++ opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; ++ opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; ++ opt2->ndpopt_prefixinfo.L = 1; ++ opt2->ndpopt_prefixinfo.A = 1; ++ opt2->ndpopt_prefixinfo.reserved1 = 0; ++ opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); ++ opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); ++ opt2->ndpopt_prefixinfo.reserved2 = 0; ++ opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; ++ t->m_data += NDPOPT_PREFIXINFO_LEN; ++ pl_size += NDPOPT_PREFIXINFO_LEN; ++ ++ /* Prefix information (NDP option) */ ++ if (get_dns6_addr(&addr, &scope_id) >= 0) { ++ /* Host system does have an IPv6 DNS server, announce our proxy. */ ++ struct ndpopt *opt3 = mtod(t, struct ndpopt *); ++ opt3->ndpopt_type = NDPOPT_RDNSS; ++ opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; ++ opt3->ndpopt_rdnss.reserved = 0; ++ opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); ++ opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; ++ t->m_data += NDPOPT_RDNSS_LEN; ++ pl_size += NDPOPT_RDNSS_LEN; ++ } ++ ++ rip->ip_pl = htons(pl_size); ++ t->m_data -= sizeof(struct ip6) + pl_size; ++ t->m_len = sizeof(struct ip6) + pl_size; ++ ++ /* ICMPv6 Checksum */ ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Neighbor Solitication ++ */ ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_send_ns"); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = slirp->vhost_addr6; ++ rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; ++ memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NS; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nns.reserved = 0; ++ ricmp->icmp6_nns.target = addr; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NS_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 1); ++} ++ ++/* ++ * Send NDP Neighbor Advertisement ++ */ ++static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) ++{ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = icmp->icmp6_nns.target; ++ if (in6_zero(&ip->ip_src)) { ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ } else { ++ rip->ip_dst = ip->ip_src; ++ } ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nna.R = NDP_IsRouter; ++ ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); ++ ricmp->icmp6_nna.O = 1; ++ ricmp->icmp6_nna.reserved_hi = 0; ++ ricmp->icmp6_nna.reserved_lo = 0; ++ ricmp->icmp6_nna.target = icmp->icmp6_nns.target; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NA_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(ricmp->icmp6_nna.target, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Process a NDP message ++ */ ++static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ m->m_len += ETH_HLEN; ++ m->m_data -= ETH_HLEN; ++ struct ethhdr *eth = mtod(m, struct ethhdr *); ++ m->m_len -= ETH_HLEN; ++ m->m_data += ETH_HLEN; ++ ++ switch (icmp->icmp6_type) { ++ case ICMP6_NDP_RS: ++ DEBUG_CALL(" type = Router Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ++ ndp_send_ra(slirp); ++ } ++ break; ++ ++ case ICMP6_NDP_RA: ++ DEBUG_CALL(" type = Router Advertisement"); ++ slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", ++ slirp->opaque); ++ break; ++ ++ case ICMP6_NDP_NS: ++ DEBUG_CALL(" type = Neighbor Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN && ++ (!in6_zero(&ip->ip_src) || ++ in6_solicitednode_multicast(&ip->ip_dst))) { ++ if (in6_equal_host(&icmp->icmp6_nns.target)) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ndp_send_na(slirp, ip, icmp); ++ } ++ } ++ break; ++ ++ case ICMP6_NDP_NA: ++ DEBUG_CALL(" type = Neighbor Advertisement"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) && ++ (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) || icmp->icmp6_nna.S == 0)) { ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ } ++ break; ++ ++ case ICMP6_NDP_REDIRECT: ++ DEBUG_CALL(" type = Redirect"); ++ slirp->cb->guest_error( ++ "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); ++ break; ++ } ++} ++ ++/* ++ * Process a received ICMPv6 message. ++ */ ++void icmp6_input(struct mbuf *m) ++{ ++ struct icmp6 *icmp; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ Slirp *slirp = m->slirp; ++ int hlen = sizeof(struct ip6); ++ ++ DEBUG_CALL("icmp6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { ++ goto end; ++ } ++ ++ if (ip6_cksum(m)) { ++ goto end; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icmp = mtod(m, struct icmp6 *); ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); ++ switch (icmp->icmp6_type) { ++ case ICMP6_ECHO_REQUEST: ++ if (in6_equal_host(&ip->ip_dst)) { ++ icmp6_send_echoreply(m, slirp, ip, icmp); ++ } else { ++ /* TODO */ ++ g_critical("external icmpv6 not supported yet"); ++ } ++ break; ++ ++ case ICMP6_NDP_RS: ++ case ICMP6_NDP_RA: ++ case ICMP6_NDP_NS: ++ case ICMP6_NDP_NA: ++ case ICMP6_NDP_REDIRECT: ++ ndp_input(m, slirp, ip, icmp); ++ break; ++ ++ case ICMP6_UNREACH: ++ case ICMP6_TOOBIG: ++ case ICMP6_TIMXCEED: ++ case ICMP6_PARAMPROB: ++ /* XXX? report error? close socket? */ ++ default: ++ break; ++ } ++ ++end: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h +new file mode 100644 +index 0000000..c37e60f +--- /dev/null ++++ b/slirp/src/ip6_icmp.h +@@ -0,0 +1,219 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_ICMP_H ++#define SLIRP_IP6_ICMP_H ++ ++/* ++ * Interface Control Message Protocol version 6 Definitions. ++ * Per RFC 4443, March 2006. ++ * ++ * Network Discover Protocol Definitions. ++ * Per RFC 4861, September 2007. ++ */ ++ ++struct icmp6_echo { /* Echo Messages */ ++ uint16_t id; ++ uint16_t seq_num; ++}; ++ ++union icmp6_error_body { ++ uint32_t unused; ++ uint32_t pointer; ++ uint32_t mtu; ++}; ++ ++/* ++ * NDP Messages ++ */ ++struct ndp_rs { /* Router Solicitation Message */ ++ uint32_t reserved; ++}; ++ ++struct ndp_ra { /* Router Advertisement Message */ ++ uint8_t chl; /* Cur Hop Limit */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t M : 1, O : 1, reserved : 6; ++#else ++ uint8_t reserved : 6, O : 1, M : 1; ++#endif ++ uint16_t lifetime; /* Router Lifetime */ ++ uint32_t reach_time; /* Reachable Time */ ++ uint32_t retrans_time; /* Retrans Timer */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); ++ ++struct ndp_ns { /* Neighbor Solicitation Message */ ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); ++ ++struct ndp_na { /* Neighbor Advertisement Message */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t R : 1, /* Router Flag */ ++ S : 1, /* Solicited Flag */ ++ O : 1, /* Override Flag */ ++ reserved_hi : 5, reserved_lo : 24; ++#else ++ uint32_t reserved_hi : 5, O : 1, S : 1, R : 1, reserved_lo : 24; ++#endif ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); ++ ++struct ndp_redirect { ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++ struct in6_addr dest; /* Destination Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); ++ ++/* ++ * Structure of an icmpv6 header. ++ */ ++struct icmp6 { ++ uint8_t icmp6_type; /* type of message, see below */ ++ uint8_t icmp6_code; /* type sub code */ ++ uint16_t icmp6_cksum; /* ones complement cksum of struct */ ++ union { ++ union icmp6_error_body error_body; ++ struct icmp6_echo echo; ++ struct ndp_rs ndp_rs; ++ struct ndp_ra ndp_ra; ++ struct ndp_ns ndp_ns; ++ struct ndp_na ndp_na; ++ struct ndp_redirect ndp_redirect; ++ } icmp6_body; ++#define icmp6_err icmp6_body.error_body ++#define icmp6_echo icmp6_body.echo ++#define icmp6_nrs icmp6_body.ndp_rs ++#define icmp6_nra icmp6_body.ndp_ra ++#define icmp6_nns icmp6_body.ndp_ns ++#define icmp6_nna icmp6_body.ndp_na ++#define icmp6_redirect icmp6_body.ndp_redirect ++}; ++ ++G_STATIC_ASSERT(sizeof(struct icmp6) == 40); ++ ++#define ICMP6_MINLEN 4 ++#define ICMP6_ERROR_MINLEN 8 ++#define ICMP6_ECHO_MINLEN 8 ++#define ICMP6_NDP_RS_MINLEN 8 ++#define ICMP6_NDP_RA_MINLEN 16 ++#define ICMP6_NDP_NS_MINLEN 24 ++#define ICMP6_NDP_NA_MINLEN 24 ++#define ICMP6_NDP_REDIRECT_MINLEN 40 ++ ++/* ++ * NDP Options ++ */ ++struct ndpopt { ++ uint8_t ndpopt_type; /* Option type */ ++ uint8_t ndpopt_len; /* /!\ In units of 8 octets */ ++ union { ++ unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ ++#define ndpopt_linklayer ndpopt_body.linklayer_addr ++ struct prefixinfo { /* Prefix Information */ ++ uint8_t prefix_length; ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t L : 1, A : 1, reserved1 : 6; ++#else ++ uint8_t reserved1 : 6, A : 1, L : 1; ++#endif ++ uint32_t valid_lt; /* Valid Lifetime */ ++ uint32_t pref_lt; /* Preferred Lifetime */ ++ uint32_t reserved2; ++ struct in6_addr prefix; ++ } SLIRP_PACKED prefixinfo; ++#define ndpopt_prefixinfo ndpopt_body.prefixinfo ++ struct rdnss { ++ uint16_t reserved; ++ uint32_t lifetime; ++ struct in6_addr addr; ++ } SLIRP_PACKED rdnss; ++#define ndpopt_rdnss ndpopt_body.rdnss ++ } ndpopt_body; ++} SLIRP_PACKED; ++ ++/* NDP options type */ ++#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ ++#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ ++#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ ++#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ ++ ++/* NDP options size, in octets. */ ++#define NDPOPT_LINKLAYER_LEN 8 ++#define NDPOPT_PREFIXINFO_LEN 32 ++#define NDPOPT_RDNSS_LEN 24 ++ ++/* ++ * Definition of type and code field values. ++ * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml ++ * Last Updated 2012-11-12 ++ */ ++ ++/* Errors */ ++#define ICMP6_UNREACH 1 /* Destination Unreachable */ ++#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ ++#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ ++#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ ++#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ ++#define ICMP6_UNREACH_PORT 4 /* port unreachable */ ++#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ ++#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ ++#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ ++#define ICMP6_TOOBIG 2 /* Packet Too Big */ ++#define ICMP6_TIMXCEED 3 /* Time Exceeded */ ++#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ ++#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ ++#define ICMP6_PARAMPROB 4 /* Parameter Problem */ ++#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ ++#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ ++#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ ++ ++/* Informational Messages */ ++#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ ++#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ ++#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ ++#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ ++#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ ++#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ ++#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ ++ ++/* ++ * Router Configuration Variables (rfc4861#section-6) ++ */ ++#define NDP_IsRouter 1 ++#define NDP_AdvSendAdvertisements 1 ++#define NDP_MaxRtrAdvInterval 600000 ++#define NDP_MinRtrAdvInterval \ ++ ((NDP_MaxRtrAdvInterval >= 9) ? NDP_MaxRtrAdvInterval / 3 : \ ++ NDP_MaxRtrAdvInterval) ++#define NDP_AdvManagedFlag 0 ++#define NDP_AdvOtherConfigFlag 0 ++#define NDP_AdvLinkMTU 0 ++#define NDP_AdvReachableTime 0 ++#define NDP_AdvRetransTime 0 ++#define NDP_AdvCurHopLimit 64 ++#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) ++#define NDP_AdvValidLifetime 86400 ++#define NDP_AdvOnLinkFlag 1 ++#define NDP_AdvPrefLifetime 14400 ++#define NDP_AdvAutonomousFlag 1 ++ ++void icmp6_init(Slirp *slirp); ++void icmp6_cleanup(Slirp *slirp); ++void icmp6_input(struct mbuf *); ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); ++void ndp_send_ra(Slirp *slirp); ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr); ++ ++#endif +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +new file mode 100644 +index 0000000..dfcbfd6 +--- /dev/null ++++ b/slirp/src/ip6_input.c +@@ -0,0 +1,78 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip6_init(Slirp *slirp) ++{ ++ icmp6_init(slirp); ++} ++ ++void ip6_cleanup(Slirp *slirp) ++{ ++ icmp6_cleanup(slirp); ++} ++ ++void ip6_input(struct mbuf *m) ++{ ++ struct ip6 *ip6; ++ Slirp *slirp = m->slirp; ++ ++ if (!slirp->in6_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ ip6 = mtod(m, struct ip6 *); ++ ++ if (ip6->ip_v != IP6VERSION) { ++ goto bad; ++ } ++ ++ if (ntohs(ip6->ip_pl) > slirp->if_mtu) { ++ icmp6_send_error(m, ICMP6_TOOBIG, 0); ++ goto bad; ++ } ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip6->ip_hl == 0) { ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip6->ip_nh) { ++ case IPPROTO_TCP: ++ NTOHS(ip6->ip_pl); ++ tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); ++ break; ++ case IPPROTO_UDP: ++ udp6_input(m); ++ break; ++ case IPPROTO_ICMPV6: ++ icmp6_input(m); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c +new file mode 100644 +index 0000000..b861106 +--- /dev/null ++++ b/slirp/src/ip6_output.c +@@ -0,0 +1,39 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF6_THRESH 10 ++ ++/* ++ * IPv6 output. The packet in mbuf chain m contains a IP header ++ */ ++int ip6_output(struct socket *so, struct mbuf *m, int fast) ++{ ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ ++ DEBUG_CALL("ip6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* Fill IPv6 header */ ++ ip->ip_v = IP6VERSION; ++ ip->ip_hl = IP6_HOP_LIMIT; ++ ip->ip_tc_hi = 0; ++ ip->ip_tc_lo = 0; ++ ip->ip_fl_hi = 0; ++ ip->ip_fl_lo = 0; ++ ++ if (fast) { ++ if_encap(m->slirp, m); ++ } else { ++ if_output(so, m); ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c +new file mode 100644 +index 0000000..fe0add4 +--- /dev/null ++++ b/slirp/src/ip_icmp.c +@@ -0,0 +1,489 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 ++ * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#ifndef WITH_ICMP_ERROR_MSG ++#define WITH_ICMP_ERROR_MSG 0 ++#endif ++ ++/* The message sent when emulating PING */ ++/* Be nice and tell them it's just a pseudo-ping packet */ ++static const char icmp_ping_msg[] = ++ "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST " ++ "packets.\n"; ++ ++/* list of actions for icmp_send_error() on RX of an icmp message */ ++static const int icmp_flush[19] = { ++ /* ECHO REPLY (0) */ 0, ++ 1, ++ 1, ++ /* DEST UNREACH (3) */ 1, ++ /* SOURCE QUENCH (4)*/ 1, ++ /* REDIRECT (5) */ 1, ++ 1, ++ 1, ++ /* ECHO (8) */ 0, ++ /* ROUTERADVERT (9) */ 1, ++ /* ROUTERSOLICIT (10) */ 1, ++ /* TIME EXCEEDED (11) */ 1, ++ /* PARAMETER PROBLEM (12) */ 1, ++ /* TIMESTAMP (13) */ 0, ++ /* TIMESTAMP REPLY (14) */ 0, ++ /* INFO (15) */ 0, ++ /* INFO REPLY (16) */ 0, ++ /* ADDR MASK (17) */ 0, ++ /* ADDR MASK REPLY (18) */ 0 ++}; ++ ++void icmp_init(Slirp *slirp) ++{ ++ slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; ++ slirp->icmp_last_so = &slirp->icmp; ++} ++ ++void icmp_cleanup(Slirp *slirp) ++{ ++ while (slirp->icmp.so_next != &slirp->icmp) { ++ icmp_detach(slirp->icmp.so_next); ++ } ++} ++ ++static int icmp_send(struct socket *so, struct mbuf *m, int hlen) ++{ ++ struct ip *ip = mtod(m, struct ip *); ++ struct sockaddr_in addr; ++ ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); ++ if (so->s == -1) { ++ return -1; ++ } ++ ++ if (slirp_bind_outbound(so, AF_INET) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++ so->so_m = m; ++ so->so_faddr = ip->ip_dst; ++ so->so_laddr = ip->ip_src; ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ so->so_expire = curtime + SO_EXPIRE; ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr = so->so_faddr; ++ ++ insque(so, &so->slirp->icmp); ++ ++ if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, ++ (struct sockaddr *)&addr, sizeof(addr)) == -1) { ++ DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ icmp_detach(so); ++ } ++ ++ return 0; ++} ++ ++void icmp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++/* ++ * Process a received ICMP message. ++ */ ++void icmp_input(struct mbuf *m, int hlen) ++{ ++ register struct icmp *icp; ++ register struct ip *ip = mtod(m, struct ip *); ++ int icmplen = ip->ip_len; ++ Slirp *slirp = m->slirp; ++ ++ DEBUG_CALL("icmp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ /* ++ * Locate icmp structure in mbuf, and check ++ * that its not corrupted and of at least minimum length. ++ */ ++ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ ++ freeit: ++ m_free(m); ++ goto end_error; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icp = mtod(m, struct icmp *); ++ if (cksum(m, icmplen)) { ++ goto freeit; ++ } ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp_type = %d", icp->icmp_type); ++ switch (icp->icmp_type) { ++ case ICMP_ECHO: ++ ip->ip_len += hlen; /* since ip_input subtracts this */ ++ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { ++ icmp_reflect(m); ++ } else if (slirp->restricted) { ++ goto freeit; ++ } else { ++ struct socket *so; ++ struct sockaddr_storage addr; ++ so = socreate(slirp); ++ if (icmp_send(so, m, hlen) == 0) { ++ return; ++ } ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC("icmp_input udp_attach errno = %d-%s", errno, ++ strerror(errno)); ++ sofree(so); ++ m_free(m); ++ goto end_error; ++ } ++ so->so_m = m; ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; ++ so->so_fport = htons(7); ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = htons(9); ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ ++ /* Send the packet */ ++ addr = so->fhost.ss; ++ if (sotranslate_out(so, &addr) < 0) { ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ return; ++ } ++ ++ if (sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, ++ (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { ++ DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ } ++ } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ ++ break; ++ case ICMP_UNREACH: ++ /* XXX? report error? close socket? */ ++ case ICMP_TIMXCEED: ++ case ICMP_PARAMPROB: ++ case ICMP_SOURCEQUENCH: ++ case ICMP_TSTAMP: ++ case ICMP_MASKREQ: ++ case ICMP_REDIRECT: ++ m_free(m); ++ break; ++ ++ default: ++ m_free(m); ++ } /* swith */ ++ ++end_error: ++ /* m is m_free()'d xor put in a socket xor or given to ip_send */ ++ return; ++} ++ ++ ++/* ++ * Send an ICMP message in response to a situation ++ * ++ * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. ++ *MAY send more (we do). MUST NOT change this header information. MUST NOT reply ++ *to a multicast/broadcast IP address. MUST NOT reply to a multicast/broadcast ++ *MAC address. MUST reply to only the first fragment. ++ */ ++/* ++ * Send ICMP_UNREACH back to the source regarding msrc. ++ * mbuf *msrc is used as a template, but is NOT m_free()'d. ++ * It is reported as the bad ip packet. The header should ++ * be fully correct and in host byte order. ++ * ICMP fragmentation is illegal. All machines must accept 576 bytes in one ++ * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 ++ */ ++ ++#define ICMP_MAXDATALEN (IP_MSS - 28) ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message) ++{ ++ unsigned hlen, shlen, s_ip_len; ++ register struct ip *ip; ++ register struct icmp *icp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("icmp_send_error"); ++ DEBUG_ARG("msrc = %p", msrc); ++ DEBUG_ARG("msrc_len = %d", msrc->m_len); ++ ++ if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) ++ goto end_error; ++ ++ /* check msrc */ ++ if (!msrc) ++ goto end_error; ++ ip = mtod(msrc, struct ip *); ++ if (slirp_debug & DBG_MISC) { ++ char bufa[20], bufb[20]; ++ strcpy(bufa, inet_ntoa(ip->ip_src)); ++ strcpy(bufb, inet_ntoa(ip->ip_dst)); ++ DEBUG_MISC(" %.16s to %.16s", bufa, bufb); ++ } ++ if (ip->ip_off & IP_OFFMASK) ++ goto end_error; /* Only reply to fragment 0 */ ++ ++ /* Do not reply to source-only IPs */ ++ if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { ++ goto end_error; ++ } ++ ++ shlen = ip->ip_hl << 2; ++ s_ip_len = ip->ip_len; ++ if (ip->ip_p == IPPROTO_ICMP) { ++ icp = (struct icmp *)((char *)ip + shlen); ++ /* ++ * Assume any unknown ICMP type is an error. This isn't ++ * specified by the RFC, but think about it.. ++ */ ++ if (icp->icmp_type > 18 || icmp_flush[icp->icmp_type]) ++ goto end_error; ++ } ++ ++ /* make a copy */ ++ m = m_get(msrc->slirp); ++ if (!m) { ++ goto end_error; ++ } ++ ++ { ++ int new_m_size; ++ new_m_size = ++ sizeof(struct ip) + ICMP_MINLEN + msrc->m_len + ICMP_MAXDATALEN; ++ if (new_m_size > m->m_size) ++ m_inc(m, new_m_size); ++ } ++ memcpy(m->m_data, msrc->m_data, msrc->m_len); ++ m->m_len = msrc->m_len; /* copy msrc to m */ ++ ++ /* make the header of the reply packet */ ++ ip = mtod(m, struct ip *); ++ hlen = sizeof(struct ip); /* no options in reply */ ++ ++ /* fill in icmp */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ icp = mtod(m, struct icmp *); ++ ++ if (minsize) ++ s_ip_len = shlen + ICMP_MINLEN; /* return header+8b only */ ++ else if (s_ip_len > ICMP_MAXDATALEN) /* maximum size */ ++ s_ip_len = ICMP_MAXDATALEN; ++ ++ m->m_len = ICMP_MINLEN + s_ip_len; /* 8 bytes ICMP header */ ++ ++ /* min. size = 8+sizeof(struct ip)+8 */ ++ ++ icp->icmp_type = type; ++ icp->icmp_code = code; ++ icp->icmp_id = 0; ++ icp->icmp_seq = 0; ++ ++ memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ ++ HTONS(icp->icmp_ip.ip_len); ++ HTONS(icp->icmp_ip.ip_id); ++ HTONS(icp->icmp_ip.ip_off); ++ ++ if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ ++ int message_len; ++ char *cpnt; ++ message_len = strlen(message); ++ if (message_len > ICMP_MAXDATALEN) ++ message_len = ICMP_MAXDATALEN; ++ cpnt = (char *)m->m_data + m->m_len; ++ memcpy(cpnt, message, message_len); ++ m->m_len += message_len; ++ } ++ ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, m->m_len); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len = m->m_len; ++ ++ ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ ++ ++ ip->ip_ttl = MAXTTL; ++ ip->ip_p = IPPROTO_ICMP; ++ ip->ip_dst = ip->ip_src; /* ip addresses */ ++ ip->ip_src = m->slirp->vhost_addr; ++ ++ (void)ip_output((struct socket *)NULL, m); ++ ++end_error: ++ return; ++} ++#undef ICMP_MAXDATALEN ++ ++/* ++ * Reflect the ip packet back to the source ++ */ ++void icmp_reflect(struct mbuf *m) ++{ ++ register struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ int optlen = hlen - sizeof(struct ip); ++ register struct icmp *icp; ++ ++ /* ++ * Send an icmp packet back to the ip level, ++ * after supplying a checksum. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ icp->icmp_type = ICMP_ECHOREPLY; ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, ip->ip_len - hlen); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ if (optlen > 0) { ++ /* ++ * Strip out original options by copying rest of first ++ * mbuf's data back, and adjust the IP length. ++ */ ++ memmove((char *)(ip + 1), (char *)ip + hlen, ++ (unsigned)(m->m_len - hlen)); ++ hlen -= optlen; ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len -= optlen; ++ m->m_len -= optlen; ++ } ++ ++ ip->ip_ttl = MAXTTL; ++ { /* swap */ ++ struct in_addr icmp_dst; ++ icmp_dst = ip->ip_dst; ++ ip->ip_dst = ip->ip_src; ++ ip->ip_src = icmp_dst; ++ } ++ ++ (void)ip_output((struct socket *)NULL, m); ++} ++ ++void icmp_receive(struct socket *so) ++{ ++ struct mbuf *m = so->so_m; ++ struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ uint8_t error_code; ++ struct icmp *icp; ++ int id, len; ++ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ id = icp->icmp_id; ++ len = recv(so->s, icp, M_ROOM(m), 0); ++ /* ++ * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent ++ * between host OSes. On Linux, only the ICMP header and payload is ++ * included. On macOS/Darwin, the socket acts like a raw socket and ++ * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP ++ * sockets aren't supported at all, so we treat them like raw sockets. It ++ * isn't possible to detect this difference at runtime, so we must use an ++ * #ifdef to determine if we need to remove the IP header. ++ */ ++#ifdef CONFIG_BSD ++ if (len >= sizeof(struct ip)) { ++ struct ip *inner_ip = mtod(m, struct ip *); ++ int inner_hlen = inner_ip->ip_hl << 2; ++ if (inner_hlen > len) { ++ len = -1; ++ errno = -EINVAL; ++ } else { ++ len -= inner_hlen; ++ memmove(icp, (unsigned char *)icp + inner_hlen, len); ++ } ++ } else { ++ len = -1; ++ errno = -EINVAL; ++ } ++#endif ++ icp->icmp_id = id; ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ if (len == -1 || len == 0) { ++ if (errno == ENETUNREACH) { ++ error_code = ICMP_UNREACH_NET; ++ } else { ++ error_code = ICMP_UNREACH_HOST; ++ } ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ icmp_detach(so); ++} +diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h +new file mode 100644 +index 0000000..84707db +--- /dev/null ++++ b/slirp/src/ip_icmp.h +@@ -0,0 +1,166 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 ++ * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp ++ */ ++ ++#ifndef NETINET_IP_ICMP_H ++#define NETINET_IP_ICMP_H ++ ++/* ++ * Interface Control Message Protocol Definitions. ++ * Per RFC 792, September 1981. ++ */ ++ ++typedef uint32_t n_time; ++ ++/* ++ * Structure of an icmp header. ++ */ ++struct icmp { ++ uint8_t icmp_type; /* type of message, see below */ ++ uint8_t icmp_code; /* type sub code */ ++ uint16_t icmp_cksum; /* ones complement cksum of struct */ ++ union { ++ uint8_t ih_pptr; /* ICMP_PARAMPROB */ ++ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ ++ struct ih_idseq { ++ uint16_t icd_id; ++ uint16_t icd_seq; ++ } ih_idseq; ++ int ih_void; ++ ++ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ ++ struct ih_pmtu { ++ uint16_t ipm_void; ++ uint16_t ipm_nextmtu; ++ } ih_pmtu; ++ } icmp_hun; ++#define icmp_pptr icmp_hun.ih_pptr ++#define icmp_gwaddr icmp_hun.ih_gwaddr ++#define icmp_id icmp_hun.ih_idseq.icd_id ++#define icmp_seq icmp_hun.ih_idseq.icd_seq ++#define icmp_void icmp_hun.ih_void ++#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void ++#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu ++ union { ++ struct id_ts { ++ n_time its_otime; ++ n_time its_rtime; ++ n_time its_ttime; ++ } id_ts; ++ struct id_ip { ++ struct ip idi_ip; ++ /* options and then 64 bits of data */ ++ } id_ip; ++ uint32_t id_mask; ++ char id_data[1]; ++ } icmp_dun; ++#define icmp_otime icmp_dun.id_ts.its_otime ++#define icmp_rtime icmp_dun.id_ts.its_rtime ++#define icmp_ttime icmp_dun.id_ts.its_ttime ++#define icmp_ip icmp_dun.id_ip.idi_ip ++#define icmp_mask icmp_dun.id_mask ++#define icmp_data icmp_dun.id_data ++}; ++ ++/* ++ * Lower bounds on packet lengths for various types. ++ * For the error advice packets must first ensure that the ++ * packet is large enough to contain the returned ip header. ++ * Only then can we do the check to see if 64 bits of packet ++ * data have been returned, since we need to check the returned ++ * ip header length. ++ */ ++#define ICMP_MINLEN 8 /* abs minimum */ ++#define ICMP_TSLEN (8 + 3 * sizeof(n_time)) /* timestamp */ ++#define ICMP_MASKLEN 12 /* address mask */ ++#define ICMP_ADVLENMIN (8 + sizeof(struct ip) + 8) /* min */ ++#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) ++/* N.B.: must separately check that ip_hl >= 5 */ ++ ++/* ++ * Definition of type and code field values. ++ */ ++#define ICMP_ECHOREPLY 0 /* echo reply */ ++#define ICMP_UNREACH 3 /* dest unreachable, codes: */ ++#define ICMP_UNREACH_NET 0 /* bad net */ ++#define ICMP_UNREACH_HOST 1 /* bad host */ ++#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ ++#define ICMP_UNREACH_PORT 3 /* bad port */ ++#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ ++#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ ++#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ ++#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ ++#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ ++#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ ++#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ ++#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ ++#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ ++#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ ++#define ICMP_REDIRECT 5 /* shorter route, codes: */ ++#define ICMP_REDIRECT_NET 0 /* for network */ ++#define ICMP_REDIRECT_HOST 1 /* for host */ ++#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ ++#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ ++#define ICMP_ECHO 8 /* echo service */ ++#define ICMP_ROUTERADVERT 9 /* router advertisement */ ++#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ ++#define ICMP_TIMXCEED 11 /* time exceeded, code: */ ++#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ ++#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ ++#define ICMP_PARAMPROB 12 /* ip header bad */ ++#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ ++#define ICMP_TSTAMP 13 /* timestamp request */ ++#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ ++#define ICMP_IREQ 15 /* information request */ ++#define ICMP_IREQREPLY 16 /* information reply */ ++#define ICMP_MASKREQ 17 /* address mask request */ ++#define ICMP_MASKREPLY 18 /* address mask reply */ ++ ++#define ICMP_MAXTYPE 18 ++ ++#define ICMP_INFOTYPE(type) \ ++ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ ++ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ ++ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ ++ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ ++ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) ++ ++void icmp_init(Slirp *slirp); ++void icmp_cleanup(Slirp *slirp); ++void icmp_input(struct mbuf *, int); ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message); ++void icmp_reflect(struct mbuf *); ++void icmp_receive(struct socket *so); ++void icmp_detach(struct socket *so); ++ ++#endif +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +new file mode 100644 +index 0000000..89a01d4 +--- /dev/null ++++ b/slirp/src/ip_input.c +@@ -0,0 +1,461 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 ++ * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); ++static void ip_freef(Slirp *slirp, struct ipq *fp); ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev); ++static void ip_deq(register struct ipasfrag *p); ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip_init(Slirp *slirp) ++{ ++ slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; ++ udp_init(slirp); ++ tcp_init(slirp); ++ icmp_init(slirp); ++} ++ ++void ip_cleanup(Slirp *slirp) ++{ ++ udp_cleanup(slirp); ++ tcp_cleanup(slirp); ++ icmp_cleanup(slirp); ++} ++ ++/* ++ * Ip input routine. Checksum and byte swap header. If fragmented ++ * try to reassemble. Process options. Pass to next level. ++ */ ++void ip_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ register struct ip *ip; ++ int hlen; ++ ++ if (!slirp->in_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip)) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ++ if (ip->ip_v != IPVERSION) { ++ goto bad; ++ } ++ ++ hlen = ip->ip_hl << 2; ++ if (hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ ++ goto bad; /* or packet too short */ ++ } ++ ++ /* keep ip header intact for ICMP reply ++ * ip->ip_sum = cksum(m, hlen); ++ * if (ip->ip_sum) { ++ */ ++ if (cksum(m, hlen)) { ++ goto bad; ++ } ++ ++ /* ++ * Convert fields to host representation. ++ */ ++ NTOHS(ip->ip_len); ++ if (ip->ip_len < hlen) { ++ goto bad; ++ } ++ NTOHS(ip->ip_id); ++ NTOHS(ip->ip_off); ++ ++ /* ++ * Check that the amount of data in the buffers ++ * is as at least much as the IP header would have us expect. ++ * Trim mbufs if longer than we expect. ++ * Drop packet if shorter than we expect. ++ */ ++ if (m->m_len < ip->ip_len) { ++ goto bad; ++ } ++ ++ /* Should drop packet if mbuf too long? hmmm... */ ++ if (m->m_len > ip->ip_len) ++ m_adj(m, ip->ip_len - m->m_len); ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip->ip_ttl == 0) { ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); ++ goto bad; ++ } ++ ++ /* ++ * If offset or IP_MF are set, must reassemble. ++ * Otherwise, nothing need be done. ++ * (We could look in the reassembly queue to see ++ * if the packet was previously fragmented, ++ * but it's not worth the time; just let them time out.) ++ * ++ * XXX This should fail, don't fragment yet ++ */ ++ if (ip->ip_off & ~IP_DF) { ++ register struct ipq *fp; ++ struct qlink *l; ++ /* ++ * Look for queue of fragments ++ * of this datagram. ++ */ ++ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; ++ l = l->next) { ++ fp = container_of(l, struct ipq, ip_link); ++ if (ip->ip_id == fp->ipq_id && ++ ip->ip_src.s_addr == fp->ipq_src.s_addr && ++ ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ++ ip->ip_p == fp->ipq_p) ++ goto found; ++ } ++ fp = NULL; ++ found: ++ ++ /* ++ * Adjust ip_len to not reflect header, ++ * set ip_mff if more fragments are expected, ++ * convert offset of this to bytes. ++ */ ++ ip->ip_len -= hlen; ++ if (ip->ip_off & IP_MF) ++ ip->ip_tos |= 1; ++ else ++ ip->ip_tos &= ~1; ++ ++ ip->ip_off <<= 3; ++ ++ /* ++ * If datagram marked as having more fragments ++ * or if this is not the first fragment, ++ * attempt reassembly; if it succeeds, proceed. ++ */ ++ if (ip->ip_tos & 1 || ip->ip_off) { ++ ip = ip_reass(slirp, ip, fp); ++ if (ip == NULL) ++ return; ++ m = dtom(slirp, ip); ++ } else if (fp) ++ ip_freef(slirp, fp); ++ ++ } else ++ ip->ip_len -= hlen; ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip->ip_p) { ++ case IPPROTO_TCP: ++ tcp_input(m, hlen, (struct socket *)NULL, AF_INET); ++ break; ++ case IPPROTO_UDP: ++ udp_input(m, hlen); ++ break; ++ case IPPROTO_ICMP: ++ icmp_input(m, hlen); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} ++ ++#define iptofrag(P) ((struct ipasfrag *)(((char *)(P)) - sizeof(struct qlink))) ++#define fragtoip(P) ((struct ip *)(((char *)(P)) + sizeof(struct qlink))) ++/* ++ * Take incoming datagram fragment and try to ++ * reassemble it into whole datagram. If a chain for ++ * reassembly of this datagram already exists, then it ++ * is given as fp; otherwise have to make a chain. ++ */ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) ++{ ++ register struct mbuf *m = dtom(slirp, ip); ++ register struct ipasfrag *q; ++ int hlen = ip->ip_hl << 2; ++ int i, next; ++ ++ DEBUG_CALL("ip_reass"); ++ DEBUG_ARG("ip = %p", ip); ++ DEBUG_ARG("fp = %p", fp); ++ DEBUG_ARG("m = %p", m); ++ ++ /* ++ * Presence of header sizes in mbufs ++ * would confuse code below. ++ * Fragment m_data is concatenated. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ /* ++ * If first fragment to arrive, create a reassembly queue. ++ */ ++ if (fp == NULL) { ++ struct mbuf *t = m_get(slirp); ++ ++ if (t == NULL) { ++ goto dropfrag; ++ } ++ fp = mtod(t, struct ipq *); ++ insque(&fp->ip_link, &slirp->ipq.ip_link); ++ fp->ipq_ttl = IPFRAGTTL; ++ fp->ipq_p = ip->ip_p; ++ fp->ipq_id = ip->ip_id; ++ fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; ++ fp->ipq_src = ip->ip_src; ++ fp->ipq_dst = ip->ip_dst; ++ q = (struct ipasfrag *)fp; ++ goto insert; ++ } ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) ++ if (q->ipf_off > ip->ip_off) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (q->ipf_prev != &fp->frag_link) { ++ struct ipasfrag *pq = q->ipf_prev; ++ i = pq->ipf_off + pq->ipf_len - ip->ip_off; ++ if (i > 0) { ++ if (i >= ip->ip_len) ++ goto dropfrag; ++ m_adj(dtom(slirp, ip), i); ++ ip->ip_off += i; ++ ip->ip_len -= i; ++ } ++ } ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (q != (struct ipasfrag *)&fp->frag_link && ++ ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; ++ i = (ip->ip_off + ip->ip_len) - q->ipf_off; ++ if (i < q->ipf_len) { ++ q->ipf_len -= i; ++ q->ipf_off += i; ++ m_adj(dtom(slirp, q), i); ++ break; ++ } ++ prev = q; ++ q = q->ipf_next; ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); ++ } ++ ++insert: ++ /* ++ * Stick new segment in its place; ++ * check for complete reassembly. ++ */ ++ ip_enq(iptofrag(ip), q->ipf_prev); ++ next = 0; ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) { ++ if (q->ipf_off != next) ++ return NULL; ++ next += q->ipf_len; ++ } ++ if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) ++ return NULL; ++ ++ /* ++ * Reassembly is complete; concatenate fragments. ++ */ ++ q = fp->frag_link.next; ++ m = dtom(slirp, q); ++ int delta = (char *)q - (m->m_flags & M_EXT ? m->m_ext : m->m_dat); ++ ++ q = (struct ipasfrag *)q->ipf_next; ++ while (q != (struct ipasfrag *)&fp->frag_link) { ++ struct mbuf *t = dtom(slirp, q); ++ q = (struct ipasfrag *)q->ipf_next; ++ m_cat(m, t); ++ } ++ ++ /* ++ * Create header for new ip packet by ++ * modifying header of first packet; ++ * dequeue and discard fragment reassembly header. ++ * Make header visible. ++ */ ++ q = fp->frag_link.next; ++ ++ /* ++ * If the fragments concatenated to an mbuf that's bigger than the total ++ * size of the fragment and the mbuf was not already using an m_ext buffer, ++ * then an m_ext buffer was alloced. But fp->ipq_next points to the old ++ * buffer (in the mbuf), so we must point ip into the new buffer. ++ */ ++ if (m->m_flags & M_EXT) { ++ q = (struct ipasfrag *)(m->m_ext + delta); ++ } ++ ++ ip = fragtoip(q); ++ ip->ip_len = next; ++ ip->ip_tos &= ~1; ++ ip->ip_src = fp->ipq_src; ++ ip->ip_dst = fp->ipq_dst; ++ remque(&fp->ip_link); ++ (void)m_free(dtom(slirp, fp)); ++ m->m_len += (ip->ip_hl << 2); ++ m->m_data -= (ip->ip_hl << 2); ++ ++ return ip; ++ ++dropfrag: ++ m_free(m); ++ return NULL; ++} ++ ++/* ++ * Free a fragment reassembly header and all ++ * associated datagrams. ++ */ ++static void ip_freef(Slirp *slirp, struct ipq *fp) ++{ ++ register struct ipasfrag *q, *p; ++ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = p) { ++ p = q->ipf_next; ++ ip_deq(q); ++ m_free(dtom(slirp, q)); ++ } ++ remque(&fp->ip_link); ++ (void)m_free(dtom(slirp, fp)); ++} ++ ++/* ++ * Put an ip fragment on a reassembly chain. ++ * Like insque, but pointers in middle of structure. ++ */ ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) ++{ ++ DEBUG_CALL("ip_enq"); ++ DEBUG_ARG("prev = %p", prev); ++ p->ipf_prev = prev; ++ p->ipf_next = prev->ipf_next; ++ ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; ++ prev->ipf_next = p; ++} ++ ++/* ++ * To ip_enq as remque is to insque. ++ */ ++static void ip_deq(register struct ipasfrag *p) ++{ ++ ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; ++ ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; ++} ++ ++/* ++ * IP timer processing; ++ * if a timer expires on a reassembly ++ * queue, discard it. ++ */ ++void ip_slowtimo(Slirp *slirp) ++{ ++ struct qlink *l; ++ ++ DEBUG_CALL("ip_slowtimo"); ++ ++ l = slirp->ipq.ip_link.next; ++ ++ if (l == NULL) ++ return; ++ ++ while (l != &slirp->ipq.ip_link) { ++ struct ipq *fp = container_of(l, struct ipq, ip_link); ++ l = l->next; ++ if (--fp->ipq_ttl == 0) { ++ ip_freef(slirp, fp); ++ } ++ } ++} ++ ++/* ++ * Strip out IP options, at higher ++ * level protocol in the kernel. ++ * Second argument is buffer to which options ++ * will be moved, and return value is their length. ++ * (XXX) should be deleted; last arg currently ignored. ++ */ ++void ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) ++{ ++ register int i; ++ struct ip *ip = mtod(m, struct ip *); ++ register char *opts; ++ int olen; ++ ++ olen = (ip->ip_hl << 2) - sizeof(struct ip); ++ opts = (char *)(ip + 1); ++ i = m->m_len - (sizeof(struct ip) + olen); ++ memcpy(opts, opts + olen, (unsigned)i); ++ m->m_len -= olen; ++ ++ ip->ip_hl = sizeof(struct ip) >> 2; ++} +diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c +new file mode 100644 +index 0000000..22916a3 +--- /dev/null ++++ b/slirp/src/ip_output.c +@@ -0,0 +1,169 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 ++ * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF_THRESH 10 ++ ++/* ++ * IP output. The packet in mbuf chain m contains a skeletal IP ++ * header (with len, off, ttl, proto, tos, src, dst). ++ * The mbuf chain containing the packet will be freed. ++ * The mbuf opt, if present, will not be freed. ++ */ ++int ip_output(struct socket *so, struct mbuf *m0) ++{ ++ Slirp *slirp = m0->slirp; ++ register struct ip *ip; ++ register struct mbuf *m = m0; ++ register int hlen = sizeof(struct ip); ++ int len, off, error = 0; ++ ++ DEBUG_CALL("ip_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m0 = %p", m0); ++ ++ ip = mtod(m, struct ip *); ++ /* ++ * Fill in IP header. ++ */ ++ ip->ip_v = IPVERSION; ++ ip->ip_off &= IP_DF; ++ ip->ip_id = htons(slirp->ip_id++); ++ ip->ip_hl = hlen >> 2; ++ ++ /* ++ * If small enough for interface, can just send directly. ++ */ ++ if ((uint16_t)ip->ip_len <= slirp->if_mtu) { ++ ip->ip_len = htons((uint16_t)ip->ip_len); ++ ip->ip_off = htons((uint16_t)ip->ip_off); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ ++ if_output(so, m); ++ goto done; ++ } ++ ++ /* ++ * Too large for interface; fragment if possible. ++ * Must be able to put at least 8 bytes per fragment. ++ */ ++ if (ip->ip_off & IP_DF) { ++ error = -1; ++ goto bad; ++ } ++ ++ len = (slirp->if_mtu - hlen) & ~7; /* ip databytes per packet */ ++ if (len < 8) { ++ error = -1; ++ goto bad; ++ } ++ ++ { ++ int mhlen, firstlen = len; ++ struct mbuf **mnext = &m->m_nextpkt; ++ ++ /* ++ * Loop through length of segment after first fragment, ++ * make new header and copy data of each part and link onto chain. ++ */ ++ m0 = m; ++ mhlen = sizeof(struct ip); ++ for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { ++ register struct ip *mhip; ++ m = m_get(slirp); ++ if (m == NULL) { ++ error = -1; ++ goto sendorfree; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ mhip = mtod(m, struct ip *); ++ *mhip = *ip; ++ ++ m->m_len = mhlen; ++ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); ++ if (ip->ip_off & IP_MF) ++ mhip->ip_off |= IP_MF; ++ if (off + len >= (uint16_t)ip->ip_len) ++ len = (uint16_t)ip->ip_len - off; ++ else ++ mhip->ip_off |= IP_MF; ++ mhip->ip_len = htons((uint16_t)(len + mhlen)); ++ ++ if (m_copy(m, m0, off, len) < 0) { ++ error = -1; ++ goto sendorfree; ++ } ++ ++ mhip->ip_off = htons((uint16_t)mhip->ip_off); ++ mhip->ip_sum = 0; ++ mhip->ip_sum = cksum(m, mhlen); ++ *mnext = m; ++ mnext = &m->m_nextpkt; ++ } ++ /* ++ * Update first fragment by trimming what's been copied out ++ * and updating header, then send each fragment (in order). ++ */ ++ m = m0; ++ m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); ++ ip->ip_len = htons((uint16_t)m->m_len); ++ ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ sendorfree: ++ for (m = m0; m; m = m0) { ++ m0 = m->m_nextpkt; ++ m->m_nextpkt = NULL; ++ if (error == 0) ++ if_output(so, m); ++ else ++ m_free(m); ++ } ++ } ++ ++done: ++ return (error); ++ ++bad: ++ m_free(m0); ++ goto done; ++} +diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in +new file mode 100644 +index 0000000..faa6c85 +--- /dev/null ++++ b/slirp/src/libslirp-version.h.in +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_VERSION_H_ ++#define LIBSLIRP_VERSION_H_ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define SLIRP_MAJOR_VERSION @SLIRP_MAJOR_VERSION@ ++#define SLIRP_MINOR_VERSION @SLIRP_MINOR_VERSION@ ++#define SLIRP_MICRO_VERSION @SLIRP_MICRO_VERSION@ ++#define SLIRP_VERSION_STRING @SLIRP_VERSION_STRING@ ++ ++#define SLIRP_CHECK_VERSION(major,minor,micro) \ ++ (SLIRP_MAJOR_VERSION > (major) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION > (minor)) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION == (minor) && \ ++ SLIRP_MICRO_VERSION >= (micro))) ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_VERSION_H_ */ +diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h +new file mode 100644 +index 0000000..fb4c7e8 +--- /dev/null ++++ b/slirp/src/libslirp.h +@@ -0,0 +1,171 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_H ++#define LIBSLIRP_H ++ ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#else ++#include ++#include ++#endif ++ ++#include "libslirp-version.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++typedef struct Slirp Slirp; ++ ++enum { ++ SLIRP_POLL_IN = 1 << 0, ++ SLIRP_POLL_OUT = 1 << 1, ++ SLIRP_POLL_PRI = 1 << 2, ++ SLIRP_POLL_ERR = 1 << 3, ++ SLIRP_POLL_HUP = 1 << 4, ++}; ++ ++typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); ++typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); ++typedef void (*SlirpTimerCb)(void *opaque); ++typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); ++typedef int (*SlirpGetREventsCb)(int idx, void *opaque); ++ ++/* ++ * Callbacks from slirp ++ */ ++typedef struct SlirpCb { ++ /* ++ * Send an ethernet frame to the guest network. The opaque ++ * parameter is the one given to slirp_init(). The function ++ * doesn't need to send all the data and may return m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; ++ slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; ++} ++ ++void m_cleanup(Slirp *slirp) ++{ ++ struct mbuf *m, *next; ++ ++ m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ while ((struct quehead *)m != &slirp->m_usedlist) { ++ next = m->m_next; ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ g_free(m); ++ m = next; ++ } ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ while ((struct quehead *)m != &slirp->m_freelist) { ++ next = m->m_next; ++ g_free(m); ++ m = next; ++ } ++} ++ ++/* ++ * Get an mbuf from the free list, if there are none ++ * allocate one ++ * ++ * Because fragmentation can occur if we alloc new mbufs and ++ * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, ++ * which tells m_free to actually g_free() it ++ */ ++struct mbuf *m_get(Slirp *slirp) ++{ ++ register struct mbuf *m; ++ int flags = 0; ++ ++ DEBUG_CALL("m_get"); ++ ++ if (slirp->m_freelist.qh_link == &slirp->m_freelist) { ++ m = g_malloc(SLIRP_MSIZE(slirp->if_mtu)); ++ slirp->mbuf_alloced++; ++ if (slirp->mbuf_alloced > MBUF_THRESH) ++ flags = M_DOFREE; ++ m->slirp = slirp; ++ } else { ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ remque(m); ++ } ++ ++ /* Insert it in the used list */ ++ insque(m, &slirp->m_usedlist); ++ m->m_flags = (flags | M_USEDLIST); ++ ++ /* Initialise it */ ++ m->m_size = SLIRP_MSIZE(slirp->if_mtu) - offsetof(struct mbuf, m_dat); ++ m->m_data = m->m_dat; ++ m->m_len = 0; ++ m->m_nextpkt = NULL; ++ m->m_prevpkt = NULL; ++ m->resolution_requested = false; ++ m->expiration_date = (uint64_t)-1; ++ DEBUG_ARG("m = %p", m); ++ return m; ++} ++ ++void m_free(struct mbuf *m) ++{ ++ DEBUG_CALL("m_free"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (m) { ++ /* Remove from m_usedlist */ ++ if (m->m_flags & M_USEDLIST) ++ remque(m); ++ ++ /* If it's M_EXT, free() it */ ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ /* ++ * Either free() it or put it on the free list ++ */ ++ if (m->m_flags & M_DOFREE) { ++ m->slirp->mbuf_alloced--; ++ g_free(m); ++ } else if ((m->m_flags & M_FREELIST) == 0) { ++ insque(m, &m->slirp->m_freelist); ++ m->m_flags = M_FREELIST; /* Clobber other flags */ ++ } ++ } /* if(m) */ ++} ++ ++/* ++ * Copy data from one mbuf to the end of ++ * the other.. if result is too big for one mbuf, allocate ++ * an M_EXT data segment ++ */ ++void m_cat(struct mbuf *m, struct mbuf *n) ++{ ++ /* ++ * If there's no room, realloc ++ */ ++ if (M_FREEROOM(m) < n->m_len) ++ m_inc(m, m->m_len + n->m_len); ++ ++ memcpy(m->m_data + m->m_len, n->m_data, n->m_len); ++ m->m_len += n->m_len; ++ ++ m_free(n); ++} ++ ++ ++/* make m 'size' bytes large from m_data */ ++void m_inc(struct mbuf *m, int size) ++{ ++ int gapsize; ++ ++ /* some compilers throw up on gotos. This one we can fake. */ ++ if (M_ROOM(m) > size) { ++ return; ++ } ++ ++ if (m->m_flags & M_EXT) { ++ gapsize = m->m_data - m->m_ext; ++ m->m_ext = g_realloc(m->m_ext, size + gapsize); ++ } else { ++ gapsize = m->m_data - m->m_dat; ++ m->m_ext = g_malloc(size + gapsize); ++ memcpy(m->m_ext, m->m_dat, m->m_size); ++ m->m_flags |= M_EXT; ++ } ++ ++ m->m_data = m->m_ext + gapsize; ++ m->m_size = size + gapsize; ++} ++ ++ ++void m_adj(struct mbuf *m, int len) ++{ ++ if (m == NULL) ++ return; ++ if (len >= 0) { ++ /* Trim from head */ ++ m->m_data += len; ++ m->m_len -= len; ++ } else { ++ /* Trim from tail */ ++ len = -len; ++ m->m_len -= len; ++ } ++} ++ ++ ++/* ++ * Copy len bytes from m, starting off bytes into n ++ */ ++int m_copy(struct mbuf *n, struct mbuf *m, int off, int len) ++{ ++ if (len > M_FREEROOM(n)) ++ return -1; ++ ++ memcpy((n->m_data + n->m_len), (m->m_data + off), len); ++ n->m_len += len; ++ return 0; ++} ++ ++ ++/* ++ * Given a pointer into an mbuf, return the mbuf ++ * XXX This is a kludge, I should eliminate the need for it ++ * Fortunately, it's not used often ++ */ ++struct mbuf *dtom(Slirp *slirp, void *dat) ++{ ++ struct mbuf *m; ++ ++ DEBUG_CALL("dtom"); ++ DEBUG_ARG("dat = %p", dat); ++ ++ /* bug corrected for M_EXT buffers */ ++ for (m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ (struct quehead *)m != &slirp->m_usedlist; m = m->m_next) { ++ if (m->m_flags & M_EXT) { ++ if ((char *)dat >= m->m_ext && (char *)dat < (m->m_ext + m->m_size)) ++ return m; ++ } else { ++ if ((char *)dat >= m->m_dat && (char *)dat < (m->m_dat + m->m_size)) ++ return m; ++ } ++ } ++ ++ DEBUG_ERROR("dtom failed"); ++ ++ return (struct mbuf *)0; ++} +diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h +new file mode 100644 +index 0000000..546e785 +--- /dev/null ++++ b/slirp/src/mbuf.h +@@ -0,0 +1,127 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 ++ * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp ++ */ ++ ++#ifndef MBUF_H ++#define MBUF_H ++ ++/* ++ * Macros for type conversion ++ * mtod(m,t) - convert mbuf pointer to data pointer of correct type ++ */ ++#define mtod(m, t) ((t)(m)->m_data) ++ ++/* XXX About mbufs for slirp: ++ * Only one mbuf is ever used in a chain, for each "cell" of data. ++ * m_nextpkt points to the next packet, if fragmented. ++ * If the data is too large, the M_EXT is used, and a larger block ++ * is alloced. Therefore, m_free[m] must check for M_EXT and if set ++ * free the m_ext. This is inefficient memory-wise, but who cares. ++ */ ++ ++/* ++ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if ++ * M_EXT is set, m_dat otherwise) and the in-use data: ++ * ++ * |--gapsize----->|---m_len-------> ++ * |----------m_size------------------------------> ++ * |----M_ROOM--------------------> ++ * |-M_FREEROOM--> ++ * ++ * ^ ^ ^ ++ * m_dat/m_ext m_data end of buffer ++ */ ++ ++/* ++ * How much room is in the mbuf, from m_data to the end of the mbuf ++ */ ++#define M_ROOM(m) \ ++ ((m->m_flags & M_EXT) ? (((m)->m_ext + (m)->m_size) - (m)->m_data) : \ ++ (((m)->m_dat + (m)->m_size) - (m)->m_data)) ++ ++/* ++ * How much free room there is ++ */ ++#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) ++ ++struct mbuf { ++ /* XXX should union some of these! */ ++ /* header at beginning of each mbuf: */ ++ struct mbuf *m_next; /* Linked list of mbufs */ ++ struct mbuf *m_prev; ++ struct mbuf *m_nextpkt; /* Next packet in queue/record */ ++ struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ ++ int m_flags; /* Misc flags */ ++ ++ int m_size; /* Size of mbuf, from m_dat or m_ext */ ++ struct socket *m_so; ++ ++ char *m_data; /* Current location of data */ ++ int m_len; /* Amount of data in this mbuf, from m_data */ ++ ++ Slirp *slirp; ++ bool resolution_requested; ++ uint64_t expiration_date; ++ char *m_ext; ++ /* start of dynamic buffer area, must be last element */ ++ char m_dat[]; ++}; ++ ++#define ifq_prev m_prev ++#define ifq_next m_next ++#define ifs_prev m_prevpkt ++#define ifs_next m_nextpkt ++#define ifq_so m_so ++ ++#define M_EXT 0x01 /* m_ext points to more (malloced) data */ ++#define M_FREELIST 0x02 /* mbuf is on free list */ ++#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ ++#define M_DOFREE \ ++ 0x08 /* when m_free is called on the mbuf, free() \ ++ * it rather than putting it on the free list */ ++ ++void m_init(Slirp *); ++void m_cleanup(Slirp *slirp); ++struct mbuf *m_get(Slirp *); ++void m_free(struct mbuf *); ++void m_cat(register struct mbuf *, register struct mbuf *); ++void m_inc(struct mbuf *, int); ++void m_adj(struct mbuf *, int); ++int m_copy(struct mbuf *, struct mbuf *, int, int); ++struct mbuf *dtom(Slirp *, void *); ++ ++static inline void ifs_init(struct mbuf *ifm) ++{ ++ ifm->ifs_next = ifm->ifs_prev = ifm; ++} ++ ++#endif +diff --git a/slirp/src/misc.c b/slirp/src/misc.c +new file mode 100644 +index 0000000..e6bc0a2 +--- /dev/null ++++ b/slirp/src/misc.c +@@ -0,0 +1,390 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#ifdef G_OS_UNIX ++#include ++#endif ++ ++inline void insque(void *a, void *b) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ register struct quehead *head = (struct quehead *)b; ++ element->qh_link = head->qh_link; ++ head->qh_link = (struct quehead *)element; ++ element->qh_rlink = (struct quehead *)head; ++ ((struct quehead *)(element->qh_link))->qh_rlink = ++ (struct quehead *)element; ++} ++ ++inline void remque(void *a) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; ++ ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; ++ element->qh_rlink = NULL; ++} ++ ++/* TODO: IPv6 */ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = g_new0(struct gfwd_list, 1); ++ ++ f->write_cb = write_cb; ++ f->opaque = opaque; ++ f->ex_fport = port; ++ f->ex_addr = addr; ++ f->ex_next = *ex_ptr; ++ *ex_ptr = f; ++ ++ return f; ++} ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_exec = g_strdup(cmdline); ++ ++ return f; ++} ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_unix = g_strdup(unixsock); ++ ++ return f; ++} ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port) ++{ ++ for (; *ex_ptr != NULL; ex_ptr = &((*ex_ptr)->ex_next)) { ++ struct gfwd_list *f = *ex_ptr; ++ if (f->ex_addr.s_addr == addr.s_addr && f->ex_fport == port) { ++ *ex_ptr = f->ex_next; ++ g_free(f->ex_exec); ++ g_free(f); ++ return 0; ++ } ++ } ++ return -1; ++} ++ ++static int slirp_socketpair_with_oob(int sv[2]) ++{ ++ struct sockaddr_in addr = { ++ .sin_family = AF_INET, ++ .sin_port = 0, ++ .sin_addr.s_addr = INADDR_ANY, ++ }; ++ socklen_t addrlen = sizeof(addr); ++ int ret, s; ++ ++ sv[1] = -1; ++ s = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || ++ listen(s, 1) < 0 || ++ getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { ++ goto err; ++ } ++ ++ sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (sv[1] < 0) { ++ goto err; ++ } ++ /* ++ * This connect won't block because we've already listen()ed on ++ * the server end (even though we won't accept() the connection ++ * until later on). ++ */ ++ do { ++ ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); ++ } while (ret < 0 && errno == EINTR); ++ if (ret < 0) { ++ goto err; ++ } ++ ++ do { ++ sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); ++ } while (sv[0] < 0 && errno == EINTR); ++ if (sv[0] < 0) { ++ goto err; ++ } ++ ++ closesocket(s); ++ return 0; ++ ++err: ++ g_critical("slirp_socketpair(): %s", strerror(errno)); ++ if (s >= 0) { ++ closesocket(s); ++ } ++ if (sv[1] >= 0) { ++ closesocket(sv[1]); ++ } ++ return -1; ++} ++ ++static void fork_exec_child_setup(gpointer data) ++{ ++#ifndef _WIN32 ++ setsid(); ++#endif ++} ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ ++#if !GLIB_CHECK_VERSION(2, 58, 0) ++typedef struct SlirpGSpawnFds { ++ GSpawnChildSetupFunc child_setup; ++ gpointer user_data; ++ gint stdin_fd; ++ gint stdout_fd; ++ gint stderr_fd; ++} SlirpGSpawnFds; ++ ++static inline void slirp_gspawn_fds_setup(gpointer user_data) ++{ ++ SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; ++ ++ dup2(q->stdin_fd, 0); ++ dup2(q->stdout_fd, 1); ++ dup2(q->stderr_fd, 2); ++ q->child_setup(q->user_data); ++} ++#endif ++ ++static inline gboolean ++g_spawn_async_with_fds_slirp(const gchar *working_directory, gchar **argv, ++ gchar **envp, GSpawnFlags flags, ++ GSpawnChildSetupFunc child_setup, ++ gpointer user_data, GPid *child_pid, gint stdin_fd, ++ gint stdout_fd, gint stderr_fd, GError **error) ++{ ++#if GLIB_CHECK_VERSION(2, 58, 0) ++ return g_spawn_async_with_fds(working_directory, argv, envp, flags, ++ child_setup, user_data, child_pid, stdin_fd, ++ stdout_fd, stderr_fd, error); ++#else ++ SlirpGSpawnFds setup = { ++ .child_setup = child_setup, ++ .user_data = user_data, ++ .stdin_fd = stdin_fd, ++ .stdout_fd = stdout_fd, ++ .stderr_fd = stderr_fd, ++ }; ++ ++ return g_spawn_async(working_directory, argv, envp, flags, ++ slirp_gspawn_fds_setup, &setup, child_pid, error); ++#endif ++} ++ ++#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ ++ g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) ++ ++#pragma GCC diagnostic pop ++ ++int fork_exec(struct socket *so, const char *ex) ++{ ++ GError *err = NULL; ++ gint argc = 0; ++ gchar **argv = NULL; ++ int opt, sp[2]; ++ ++ DEBUG_CALL("fork_exec"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ex = %p", ex); ++ ++ if (slirp_socketpair_with_oob(sp) < 0) { ++ return 0; ++ } ++ ++ if (!g_shell_parse_argv(ex, &argc, &argv, &err)) { ++ g_critical("fork_exec invalid command: %s\nerror: %s", ex, err->message); ++ g_error_free(err); ++ return 0; ++ } ++ ++ g_spawn_async_with_fds(NULL /* cwd */, argv, NULL /* env */, ++ G_SPAWN_SEARCH_PATH, fork_exec_child_setup, ++ NULL /* data */, NULL /* child_pid */, sp[1], sp[1], ++ sp[1], &err); ++ g_strfreev(argv); ++ ++ if (err) { ++ g_critical("fork_exec: %s", err->message); ++ g_error_free(err); ++ closesocket(sp[0]); ++ closesocket(sp[1]); ++ return 0; ++ } ++ ++ so->s = sp[0]; ++ closesocket(sp[1]); ++ slirp_socket_set_fast_reuse(so->s); ++ opt = 1; ++ setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return 1; ++} ++ ++int open_unix(struct socket *so, const char *unixpath) ++{ ++#ifdef G_OS_UNIX ++ struct sockaddr_un sa; ++ int s; ++ ++ DEBUG_CALL("open_unix"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("unixpath = %s", unixpath); ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.sun_family = AF_UNIX; ++ if (g_strlcpy(sa.sun_path, unixpath, sizeof(sa.sun_path)) >= sizeof(sa.sun_path)) { ++ g_critical("Bad unix path: %s", unixpath); ++ return 0; ++ } ++ ++ s = slirp_socket(PF_UNIX, SOCK_STREAM, 0); ++ if (s < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ return 0; ++ } ++ ++ if (connect(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ closesocket(s); ++ return 0; ++ } ++ ++ so->s = s; ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ return 1; ++#else ++ g_assert_not_reached(); ++#endif ++} ++ ++char *slirp_connection_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ const char *const tcpstates[] = { ++ [TCPS_CLOSED] = "CLOSED", [TCPS_LISTEN] = "LISTEN", ++ [TCPS_SYN_SENT] = "SYN_SENT", [TCPS_SYN_RECEIVED] = "SYN_RCVD", ++ [TCPS_ESTABLISHED] = "ESTABLISHED", [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", ++ [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", [TCPS_CLOSING] = "CLOSING", ++ [TCPS_LAST_ACK] = "LAST_ACK", [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", ++ [TCPS_TIME_WAIT] = "TIME_WAIT", ++ }; ++ struct in_addr dst_addr; ++ struct sockaddr_in src; ++ socklen_t src_len; ++ uint16_t dst_port; ++ struct socket *so; ++ const char *state; ++ char buf[20]; ++ ++ g_string_append_printf(str, ++ " Protocol[State] FD Source Address Port " ++ "Dest. Address Port RecvQ SendQ\n"); ++ ++ /* TODO: IPv6 */ ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ state = "HOST_FORWARD"; ++ } else if (so->so_tcpcb) { ++ state = tcpstates[so->so_tcpcb->t_state]; ++ } else { ++ state = "NONE"; ++ } ++ if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ slirp_fmt0(buf, sizeof(buf), " TCP[%s]", state); ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ slirp_fmt0(buf, sizeof(buf), " UDP[HOST_FORWARD]"); ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ slirp_fmt0(buf, sizeof(buf), " UDP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { ++ slirp_fmt0(buf, sizeof(buf), " ICMP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ dst_addr = so->so_faddr; ++ g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*"); ++ g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), ++ so->so_rcv.sb_cc, so->so_snd.sb_cc); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ struct sockaddr *addr = NULL; ++ int addr_size = 0; ++ ++ if (af == AF_INET && so->slirp->outbound_addr != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr; ++ addr_size = sizeof(struct sockaddr_in); ++ } else if (af == AF_INET6 && so->slirp->outbound_addr6 != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr6; ++ addr_size = sizeof(struct sockaddr_in6); ++ } ++ ++ if (addr != NULL) { ++ ret = bind(so->s, addr, addr_size); ++ } ++ return ret; ++} +\ No newline at end of file +diff --git a/slirp/src/misc.h b/slirp/src/misc.h +new file mode 100644 +index 0000000..81b370c +--- /dev/null ++++ b/slirp/src/misc.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef MISC_H ++#define MISC_H ++ ++#include "libslirp.h" ++ ++struct gfwd_list { ++ SlirpWriteCb write_cb; ++ void *opaque; ++ struct in_addr ex_addr; /* Server address */ ++ int ex_fport; /* Port to telnet to */ ++ char *ex_exec; /* Command line of what to exec */ ++ char *ex_unix; /* unix socket */ ++ struct gfwd_list *ex_next; ++}; ++ ++#define EMU_NONE 0x0 ++ ++/* TCP emulations */ ++#define EMU_CTL 0x1 ++#define EMU_FTP 0x2 ++#define EMU_KSH 0x3 ++#define EMU_IRC 0x4 ++#define EMU_REALAUDIO 0x5 ++#define EMU_RLOGIN 0x6 ++#define EMU_IDENT 0x7 ++ ++#define EMU_NOCONNECT 0x10 /* Don't connect */ ++ ++struct tos_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++}; ++ ++struct emu_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++ struct emu_t *next; ++}; ++ ++struct slirp_quehead { ++ struct slirp_quehead *qh_link; ++ struct slirp_quehead *qh_rlink; ++}; ++ ++void slirp_insque(void *, void *); ++void slirp_remque(void *); ++int fork_exec(struct socket *so, const char *ex); ++int open_unix(struct socket *so, const char *unixsock); ++ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port); ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port); ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port); ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port); ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af); ++ ++#endif +diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h +new file mode 100644 +index 0000000..7795ad8 +--- /dev/null ++++ b/slirp/src/ncsi-pkt.h +@@ -0,0 +1,445 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright Gavin Shan, IBM Corporation 2016. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef NCSI_PKT_H ++#define NCSI_PKT_H ++ ++/* from linux/net/ncsi/ncsi-pkt.h */ ++#define __be32 uint32_t ++#define __be16 uint16_t ++ ++struct ncsi_pkt_hdr { ++ unsigned char mc_id; /* Management controller ID */ ++ unsigned char revision; /* NCSI version - 0x01 */ ++ unsigned char reserved; /* Reserved */ ++ unsigned char id; /* Packet sequence number */ ++ unsigned char type; /* Packet type */ ++ unsigned char channel; /* Network controller ID */ ++ __be16 length; /* Payload length */ ++ __be32 reserved1[2]; /* Reserved */ ++}; ++ ++struct ncsi_cmd_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++}; ++ ++struct ncsi_rsp_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ __be16 code; /* Response code */ ++ __be16 reason; /* Response reason */ ++}; ++ ++struct ncsi_aen_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ unsigned char reserved2[3]; /* Reserved */ ++ unsigned char type; /* AEN packet type */ ++}; ++ ++/* NCSI common command packet */ ++struct ncsi_cmd_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[26]; ++}; ++ ++struct ncsi_rsp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Select Package */ ++struct ncsi_cmd_sp_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char hw_arbitration; /* HW arbitration */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Disable Channel */ ++struct ncsi_cmd_dc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char ald; /* Allow link down */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Reset Channel */ ++struct ncsi_cmd_rc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 reserved; /* Reserved */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* AEN Enable */ ++struct ncsi_cmd_ae_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mc_id; /* MC ID */ ++ __be32 mode; /* AEN working mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* Set Link */ ++struct ncsi_cmd_sl_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Link working mode */ ++ __be32 oem_mode; /* OEM link mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* Set VLAN Filter */ ++struct ncsi_cmd_svf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be16 reserved; /* Reserved */ ++ __be16 vlan; /* VLAN ID */ ++ __be16 reserved1; /* Reserved */ ++ unsigned char index; /* VLAN table index */ ++ unsigned char enable; /* Enable or disable */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++}; ++ ++/* Enable VLAN */ ++struct ncsi_cmd_ev_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* VLAN filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Set MAC Address */ ++struct ncsi_cmd_sma_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char mac[6]; /* MAC address */ ++ unsigned char index; /* MAC table index */ ++ unsigned char at_e; /* Addr type and operation */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* Enable Broadcast Filter */ ++struct ncsi_cmd_ebf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Enable Global Multicast Filter */ ++struct ncsi_cmd_egmf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Global MC mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Set NCSI Flow Control */ ++struct ncsi_cmd_snfc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* Flow control mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* Get Link Status */ ++struct ncsi_rsp_gls_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Link status */ ++ __be32 other; /* Other indications */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; ++ unsigned char pad[10]; ++}; ++ ++/* Get Version ID */ ++struct ncsi_rsp_gvi_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 ncsi_version; /* NCSI version */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char alpha2; /* NCSI version */ ++ unsigned char fw_name[12]; /* f/w name string */ ++ __be32 fw_version; /* f/w version */ ++ __be16 pci_ids[4]; /* PCI IDs */ ++ __be32 mf_id; /* Manufacture ID */ ++ __be32 checksum; ++}; ++ ++/* Get Capabilities */ ++struct ncsi_rsp_gc_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cap; /* Capabilities */ ++ __be32 bc_cap; /* Broadcast cap */ ++ __be32 mc_cap; /* Multicast cap */ ++ __be32 buf_cap; /* Buffering cap */ ++ __be32 aen_cap; /* AEN cap */ ++ unsigned char vlan_cnt; /* VLAN filter count */ ++ unsigned char mixed_cnt; /* Mix filter count */ ++ unsigned char mc_cnt; /* MC filter count */ ++ unsigned char uc_cnt; /* UC filter count */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char channel_cnt; /* Channel count */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get Parameters */ ++struct ncsi_rsp_gp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char mac_cnt; /* Number of MAC addr */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char mac_enable; /* MAC addr enable flags */ ++ unsigned char vlan_cnt; /* VLAN tag count */ ++ unsigned char reserved1; /* Reserved */ ++ __be16 vlan_enable; /* VLAN tag enable flags */ ++ __be32 link_mode; /* Link setting */ ++ __be32 bc_mode; /* BC filter mode */ ++ __be32 valid_modes; /* Valid mode parameters */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char fc_mode; /* Flow control mode */ ++ unsigned char reserved2[2]; /* Reserved */ ++ __be32 aen_mode; /* AEN mode */ ++ unsigned char mac[6]; /* Supported MAC addr */ ++ __be16 vlan; /* Supported VLAN tags */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get Controller Packet Statistics */ ++struct ncsi_rsp_gcps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cnt_hi; /* Counter cleared */ ++ __be32 cnt_lo; /* Counter cleared */ ++ __be32 rx_bytes; /* Rx bytes */ ++ __be32 tx_bytes; /* Tx bytes */ ++ __be32 rx_uc_pkts; /* Rx UC packets */ ++ __be32 rx_mc_pkts; /* Rx MC packets */ ++ __be32 rx_bc_pkts; /* Rx BC packets */ ++ __be32 tx_uc_pkts; /* Tx UC packets */ ++ __be32 tx_mc_pkts; /* Tx MC packets */ ++ __be32 tx_bc_pkts; /* Tx BC packets */ ++ __be32 fcs_err; /* FCS errors */ ++ __be32 align_err; /* Alignment errors */ ++ __be32 false_carrier; /* False carrier detection */ ++ __be32 runt_pkts; /* Rx runt packets */ ++ __be32 jabber_pkts; /* Rx jabber packets */ ++ __be32 rx_pause_xon; /* Rx pause XON frames */ ++ __be32 rx_pause_xoff; /* Rx XOFF frames */ ++ __be32 tx_pause_xon; /* Tx XON frames */ ++ __be32 tx_pause_xoff; /* Tx XOFF frames */ ++ __be32 tx_s_collision; /* Single collision frames */ ++ __be32 tx_m_collision; /* Multiple collision frames */ ++ __be32 l_collision; /* Late collision frames */ ++ __be32 e_collision; /* Excessive collision frames */ ++ __be32 rx_ctl_frames; /* Rx control frames */ ++ __be32 rx_64_frames; /* Rx 64-bytes frames */ ++ __be32 rx_127_frames; /* Rx 65-127 bytes frames */ ++ __be32 rx_255_frames; /* Rx 128-255 bytes frames */ ++ __be32 rx_511_frames; /* Rx 256-511 bytes frames */ ++ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ ++ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ ++ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ ++ __be32 tx_64_frames; /* Tx 64-bytes frames */ ++ __be32 tx_127_frames; /* Tx 65-127 bytes frames */ ++ __be32 tx_255_frames; /* Tx 128-255 bytes frames */ ++ __be32 tx_511_frames; /* Tx 256-511 bytes frames */ ++ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ ++ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ ++ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ ++ __be32 rx_valid_bytes; /* Rx valid bytes */ ++ __be32 rx_runt_pkts; /* Rx error runt packets */ ++ __be32 rx_jabber_pkts; /* Rx error jabber packets */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get NCSI Statistics */ ++struct ncsi_rsp_gns_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 rx_cmds; /* Rx NCSI commands */ ++ __be32 dropped_cmds; /* Dropped commands */ ++ __be32 cmd_type_errs; /* Command type errors */ ++ __be32 cmd_csum_errs; /* Command checksum errors */ ++ __be32 rx_pkts; /* Rx NCSI packets */ ++ __be32 tx_pkts; /* Tx NCSI packets */ ++ __be32 tx_aen_pkts; /* Tx AEN packets */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get NCSI Pass-through Statistics */ ++struct ncsi_rsp_gnpts_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 tx_pkts; /* Tx packets */ ++ __be32 tx_dropped; /* Tx dropped packets */ ++ __be32 tx_channel_err; /* Tx channel errors */ ++ __be32 tx_us_err; /* Tx undersize errors */ ++ __be32 rx_pkts; /* Rx packets */ ++ __be32 rx_dropped; /* Rx dropped packets */ ++ __be32 rx_channel_err; /* Rx channel errors */ ++ __be32 rx_us_err; /* Rx undersize errors */ ++ __be32 rx_os_err; /* Rx oversize errors */ ++ __be32 checksum; /* Checksum */ ++}; ++ ++/* Get package status */ ++struct ncsi_rsp_gps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Hardware arbitration status */ ++ __be32 checksum; ++}; ++ ++/* Get package UUID */ ++struct ncsi_rsp_gpuuid_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char uuid[16]; /* UUID */ ++ __be32 checksum; ++}; ++ ++/* AEN: Link State Change */ ++struct ncsi_aen_lsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Link status */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++}; ++ ++/* AEN: Configuration Required */ ++struct ncsi_aen_cr_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++}; ++ ++/* AEN: Host Network Controller Driver Status Change */ ++struct ncsi_aen_hncdsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++}; ++ ++/* NCSI packet revision */ ++#define NCSI_PKT_REVISION 0x01 ++ ++/* NCSI packet commands */ ++#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ ++#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ ++#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ ++#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ ++#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ ++#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ ++#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ ++#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ ++#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ ++#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ ++#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ ++#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ ++#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ ++#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ ++#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ ++#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ ++#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ ++#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ ++#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ ++#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ ++#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ ++#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ ++#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ ++#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ ++#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ ++#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ ++#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ ++#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ ++#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ ++#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ ++ ++/* NCSI packet responses */ ++#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) ++#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) ++#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) ++#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) ++#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) ++#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) ++#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) ++#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) ++#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) ++#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) ++#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) ++#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) ++#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) ++#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) ++#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) ++#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) ++#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) ++#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) ++#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) ++#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) ++#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) ++#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) ++#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) ++#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) ++#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) ++#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) ++#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) ++#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) ++#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) ++#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) ++ ++/* NCSI response code/reason */ ++#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ ++#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ ++#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ ++#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ ++#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ ++#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ ++#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ ++#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ ++#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ ++#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ ++#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ ++ ++/* NCSI AEN packet type */ ++#define NCSI_PKT_AEN 0xFF /* AEN Packet */ ++#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ ++#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ ++#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ ++ ++#endif /* NCSI_PKT_H */ +diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c +new file mode 100644 +index 0000000..ddd980d +--- /dev/null ++++ b/slirp/src/ncsi.c +@@ -0,0 +1,192 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * NC-SI (Network Controller Sideband Interface) "echo" model ++ * ++ * Copyright (C) 2016-2018 IBM Corp. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include "slirp.h" ++ ++#include "ncsi-pkt.h" ++ ++static uint32_t ncsi_calculate_checksum(uint16_t *data, int len) ++{ ++ uint32_t checksum = 0; ++ int i; ++ ++ /* ++ * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet ++ * payload interpreted as a series of 16-bit unsigned integer values. ++ */ ++ for (i = 0; i < len / 2; i++) { ++ checksum += htons(data[i]); ++ } ++ ++ checksum = (~checksum + 1); ++ return checksum; ++} ++ ++/* Get Capabilities */ ++static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *)rnh; ++ ++ rsp->cap = htonl(~0); ++ rsp->bc_cap = htonl(~0); ++ rsp->mc_cap = htonl(~0); ++ rsp->buf_cap = htonl(~0); ++ rsp->aen_cap = htonl(~0); ++ rsp->vlan_mode = 0xff; ++ rsp->uc_cnt = 2; ++ return 0; ++} ++ ++/* Get Link status */ ++static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *)rnh; ++ ++ rsp->status = htonl(0x1); ++ return 0; ++} ++ ++/* Get Parameters */ ++static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *)rnh; ++ ++ /* no MAC address filters or VLAN filters on the channel */ ++ rsp->mac_cnt = 0; ++ rsp->mac_enable = 0; ++ rsp->vlan_cnt = 0; ++ rsp->vlan_enable = 0; ++ ++ return 0; ++} ++ ++static const struct ncsi_rsp_handler { ++ unsigned char type; ++ int payload; ++ int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); ++} ncsi_rsp_handlers[] = { { NCSI_PKT_RSP_CIS, 4, NULL }, ++ { NCSI_PKT_RSP_SP, 4, NULL }, ++ { NCSI_PKT_RSP_DP, 4, NULL }, ++ { NCSI_PKT_RSP_EC, 4, NULL }, ++ { NCSI_PKT_RSP_DC, 4, NULL }, ++ { NCSI_PKT_RSP_RC, 4, NULL }, ++ { NCSI_PKT_RSP_ECNT, 4, NULL }, ++ { NCSI_PKT_RSP_DCNT, 4, NULL }, ++ { NCSI_PKT_RSP_AE, 4, NULL }, ++ { NCSI_PKT_RSP_SL, 4, NULL }, ++ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, ++ { NCSI_PKT_RSP_SVF, 4, NULL }, ++ { NCSI_PKT_RSP_EV, 4, NULL }, ++ { NCSI_PKT_RSP_DV, 4, NULL }, ++ { NCSI_PKT_RSP_SMA, 4, NULL }, ++ { NCSI_PKT_RSP_EBF, 4, NULL }, ++ { NCSI_PKT_RSP_DBF, 4, NULL }, ++ { NCSI_PKT_RSP_EGMF, 4, NULL }, ++ { NCSI_PKT_RSP_DGMF, 4, NULL }, ++ { NCSI_PKT_RSP_SNFC, 4, NULL }, ++ { NCSI_PKT_RSP_GVI, 40, NULL }, ++ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, ++ { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, ++ { NCSI_PKT_RSP_GCPS, 172, NULL }, ++ { NCSI_PKT_RSP_GNS, 172, NULL }, ++ { NCSI_PKT_RSP_GNPTS, 172, NULL }, ++ { NCSI_PKT_RSP_GPS, 8, NULL }, ++ { NCSI_PKT_RSP_OEM, 0, NULL }, ++ { NCSI_PKT_RSP_PLDM, 0, NULL }, ++ { NCSI_PKT_RSP_GPUUID, 20, NULL } }; ++ ++/* ++ * packet format : ncsi header + payload + checksum ++ */ ++#define NCSI_MAX_PAYLOAD 172 ++#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) ++ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct ncsi_pkt_hdr *nh = (struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); ++ uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; ++ struct ethhdr *reh = (struct ethhdr *)ncsi_reply; ++ struct ncsi_rsp_pkt_hdr *rnh = ++ (struct ncsi_rsp_pkt_hdr *)(ncsi_reply + ETH_HLEN); ++ const struct ncsi_rsp_handler *handler = NULL; ++ int i; ++ int ncsi_rsp_len = sizeof(*nh); ++ uint32_t checksum; ++ uint32_t *pchecksum; ++ ++ memset(ncsi_reply, 0, sizeof(ncsi_reply)); ++ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memset(reh->h_source, 0xff, ETH_ALEN); ++ reh->h_proto = htons(ETH_P_NCSI); ++ ++ for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { ++ if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { ++ handler = &ncsi_rsp_handlers[i]; ++ break; ++ } ++ } ++ ++ rnh->common.mc_id = nh->mc_id; ++ rnh->common.revision = NCSI_PKT_REVISION; ++ rnh->common.id = nh->id; ++ rnh->common.type = nh->type + 0x80; ++ rnh->common.channel = nh->channel; ++ ++ if (handler) { ++ rnh->common.length = htons(handler->payload); ++ rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); ++ rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); ++ ++ if (handler->handler) { ++ /* TODO: handle errors */ ++ handler->handler(rnh); ++ } ++ ncsi_rsp_len += handler->payload; ++ } else { ++ rnh->common.length = 0; ++ rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); ++ rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); ++ } ++ ++ /* Add the optional checksum at the end of the frame. */ ++ checksum = ncsi_calculate_checksum((uint16_t *)rnh, ncsi_rsp_len); ++ pchecksum = (uint32_t *)((void *)rnh + ncsi_rsp_len); ++ *pchecksum = htonl(checksum); ++ ncsi_rsp_len += 4; ++ ++ slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); ++} +diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c +new file mode 100644 +index 0000000..110d6ea +--- /dev/null ++++ b/slirp/src/ndp_table.c +@@ -0,0 +1,87 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_add"); ++ DEBUG_ARG("ip = %s", addrstr); ++ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], ++ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { ++ /* Do not register multicast or unspecified addresses */ ++ DEBUG_CALL(" abort: do not register multicast or unspecified address"); ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ DEBUG_CALL(" already in table: update the entry"); ++ /* Update the entry */ ++ memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ DEBUG_CALL(" create new entry"); ++ ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; ++ memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, ethaddr, ++ ETH_ALEN); ++ ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; ++} ++ ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_search"); ++ DEBUG_ARG("ip = %s", addrstr); ++ ++ assert(!in6_zero(&ip_addr)); ++ ++ /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { ++ out_ethaddr[0] = 0x33; ++ out_ethaddr[1] = 0x33; ++ out_ethaddr[2] = ip_addr.s6_addr[12]; ++ out_ethaddr[3] = ip_addr.s6_addr[13]; ++ out_ethaddr[4] = ip_addr.s6_addr[14]; ++ out_ethaddr[5] = ip_addr.s6_addr[15]; ++ DEBUG_ARG("multicast addr = %02x:%02x:%02x:%02x:%02x:%02x", ++ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], ++ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); ++ return 1; ++ } ++ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ++ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], ++ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); ++ return 1; ++ } ++ } ++ ++ DEBUG_CALL(" ip not found in table"); ++ return 0; ++} +diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c +new file mode 100644 +index 0000000..2fb9176 +--- /dev/null ++++ b/slirp/src/sbuf.c +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m); ++ ++void sbfree(struct sbuf *sb) ++{ ++ g_free(sb->sb_data); ++} ++ ++bool sbdrop(struct sbuf *sb, size_t num) ++{ ++ int limit = sb->sb_datalen / 2; ++ ++ g_warn_if_fail(num <= sb->sb_cc); ++ if (num > sb->sb_cc) ++ num = sb->sb_cc; ++ ++ sb->sb_cc -= num; ++ sb->sb_rptr += num; ++ if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { ++ return true; ++ } ++ ++ return false; ++} ++ ++void sbreserve(struct sbuf *sb, size_t size) ++{ ++ sb->sb_wptr = sb->sb_rptr = sb->sb_data = g_realloc(sb->sb_data, size); ++ sb->sb_cc = 0; ++ sb->sb_datalen = size; ++} ++ ++/* ++ * Try and write() to the socket, whatever doesn't get written ++ * append to the buffer... for a host with a fast net connection, ++ * this prevents an unnecessary copy of the data ++ * (the socket is non-blocking, so we won't hang) ++ */ ++void sbappend(struct socket *so, struct mbuf *m) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("sbappend"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m->m_len = %d", m->m_len); ++ ++ /* Shouldn't happen, but... e.g. foreign host closes connection */ ++ if (m->m_len <= 0) { ++ m_free(m); ++ return; ++ } ++ ++ /* ++ * If there is urgent data, call sosendoob ++ * if not all was sent, sowrite will take care of the rest ++ * (The rest of this function is just an optimisation) ++ */ ++ if (so->so_urgc) { ++ sbappendsb(&so->so_rcv, m); ++ m_free(m); ++ (void)sosendoob(so); ++ return; ++ } ++ ++ /* ++ * We only write if there's nothing in the buffer, ++ * ottherwise it'll arrive out of order, and hence corrupt ++ */ ++ if (!so->so_rcv.sb_cc) ++ ret = slirp_send(so, m->m_data, m->m_len, 0); ++ ++ if (ret <= 0) { ++ /* ++ * Nothing was written ++ * It's possible that the socket has closed, but ++ * we don't need to check because if it has closed, ++ * it will be detected in the normal way by soread() ++ */ ++ sbappendsb(&so->so_rcv, m); ++ } else if (ret != m->m_len) { ++ /* ++ * Something was written, but not everything.. ++ * sbappendsb the rest ++ */ ++ m->m_len -= ret; ++ m->m_data += ret; ++ sbappendsb(&so->so_rcv, m); ++ } /* else */ ++ /* Whatever happened, we free the mbuf */ ++ m_free(m); ++} ++ ++/* ++ * Copy the data from m into sb ++ * The caller is responsible to make sure there's enough room ++ */ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m) ++{ ++ int len, n, nn; ++ ++ len = m->m_len; ++ ++ if (sb->sb_wptr < sb->sb_rptr) { ++ n = sb->sb_rptr - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ } else { ++ /* Do the right edge first */ ++ n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ len -= n; ++ if (len) { ++ /* Now the left edge */ ++ nn = sb->sb_rptr - sb->sb_data; ++ if (nn > len) ++ nn = len; ++ memcpy(sb->sb_data, m->m_data + n, nn); ++ n += nn; ++ } ++ } ++ ++ sb->sb_cc += n; ++ sb->sb_wptr += n; ++ if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_wptr -= sb->sb_datalen; ++} ++ ++/* ++ * Copy data from sbuf to a normal, straight buffer ++ * Don't update the sbuf rptr, this will be ++ * done in sbdrop when the data is acked ++ */ ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *to) ++{ ++ char *from; ++ ++ g_assert(len + off <= sb->sb_cc); ++ ++ from = sb->sb_rptr + off; ++ if (from >= sb->sb_data + sb->sb_datalen) ++ from -= sb->sb_datalen; ++ ++ if (from < sb->sb_wptr) { ++ memcpy(to, from, len); ++ } else { ++ /* re-use off */ ++ off = (sb->sb_data + sb->sb_datalen) - from; ++ if (off > len) ++ off = len; ++ memcpy(to, from, off); ++ len -= off; ++ if (len) ++ memcpy(to + off, sb->sb_data, len); ++ } ++} +diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h +new file mode 100644 +index 0000000..01886fb +--- /dev/null ++++ b/slirp/src/sbuf.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SBUF_H ++#define SBUF_H ++ ++#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) ++ ++struct sbuf { ++ uint32_t sb_cc; /* actual chars in buffer */ ++ uint32_t sb_datalen; /* Length of data */ ++ char *sb_wptr; /* write pointer. points to where the next ++ * bytes should be written in the sbuf */ ++ char *sb_rptr; /* read pointer. points to where the next ++ * byte should be read from the sbuf */ ++ char *sb_data; /* Actual data */ ++}; ++ ++void sbfree(struct sbuf *sb); ++bool sbdrop(struct sbuf *sb, size_t len); ++void sbreserve(struct sbuf *sb, size_t size); ++void sbappend(struct socket *sb, struct mbuf *mb); ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *p); ++ ++#endif +diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c +new file mode 100644 +index 0000000..14458e8 +--- /dev/null ++++ b/slirp/src/slirp.c +@@ -0,0 +1,1185 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp glue ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++/* https://gitlab.freedesktop.org/slirp/libslirp/issues/18 */ ++#if defined(__NetBSD__) && defined(if_mtu) ++#undef if_mtu ++#endif ++ ++int slirp_debug; ++ ++/* Define to 1 if you want KEEPALIVE timers */ ++bool slirp_do_keepalive; ++ ++/* host loopback address */ ++struct in_addr loopback_addr; ++/* host loopback network mask */ ++unsigned long loopback_mask; ++ ++/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ ++static const uint8_t special_ethaddr[ETH_ALEN] = { 0x52, 0x55, 0x00, ++ 0x00, 0x00, 0x00 }; ++ ++unsigned curtime; ++ ++static struct in_addr dns_addr; ++#ifndef _WIN32 ++static struct in6_addr dns6_addr; ++#endif ++static unsigned dns_addr_time; ++#ifndef _WIN32 ++static unsigned dns6_addr_time; ++#endif ++ ++#define TIMEOUT_FAST 2 /* milliseconds */ ++#define TIMEOUT_SLOW 499 /* milliseconds */ ++/* for the aging of certain requests like DNS */ ++#define TIMEOUT_DEFAULT 1000 /* milliseconds */ ++ ++#ifdef _WIN32 ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ FIXED_INFO *FixedInfo = NULL; ++ ULONG BufLen; ++ DWORD ret; ++ IP_ADDR_STRING *pIPAddr; ++ struct in_addr tmp_addr; ++ ++ if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { ++ *pdns_addr = dns_addr; ++ return 0; ++ } ++ ++ FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); ++ BufLen = sizeof(FIXED_INFO); ++ ++ if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ FixedInfo = GlobalAlloc(GPTR, BufLen); ++ } ++ ++ if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { ++ printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret); ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return -1; ++ } ++ ++ pIPAddr = &(FixedInfo->DnsServerList); ++ inet_aton(pIPAddr->IpAddress.String, &tmp_addr); ++ *pdns_addr = tmp_addr; ++ dns_addr = tmp_addr; ++ dns_addr_time = curtime; ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return 0; ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ return -1; ++} ++ ++static void winsock_cleanup(void) ++{ ++ WSACleanup(); ++} ++ ++#else ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, struct stat *cached_stat, ++ unsigned *cached_time) ++{ ++ struct stat old_stat; ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ old_stat = *cached_stat; ++ if (stat("/etc/resolv.conf", cached_stat) != 0) { ++ return -1; ++ } ++ if (cached_stat->st_dev == old_stat.st_dev && ++ cached_stat->st_ino == old_stat.st_ino && ++ cached_stat->st_size == old_stat.st_size && ++ cached_stat->st_mtime == old_stat.st_mtime) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ char buff[512]; ++ char buff2[257]; ++ FILE *f; ++ int found = 0; ++ void *tmp_addr = alloca(addrlen); ++ unsigned if_index; ++ ++ f = fopen("/etc/resolv.conf", "r"); ++ if (!f) ++ return -1; ++ ++ DEBUG_MISC("IP address of your DNS(s):"); ++ while (fgets(buff, 512, f) != NULL) { ++ if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { ++ char *c = strchr(buff2, '%'); ++ if (c) { ++ if_index = if_nametoindex(c + 1); ++ *c = '\0'; ++ } else { ++ if_index = 0; ++ } ++ ++ if (!inet_pton(af, buff2, tmp_addr)) { ++ continue; ++ } ++ /* If it's the first one, set it to dns_addr */ ++ if (!found) { ++ memcpy(pdns_addr, tmp_addr, addrlen); ++ memcpy(cached_addr, tmp_addr, addrlen); ++ if (scope_id) { ++ *scope_id = if_index; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (++found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(af, tmp_addr, s, sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ } ++ fclose(f); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ static struct stat dns_addr_stat; ++ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_stat, &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ static struct stat dns6_addr_stat; ++ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_stat, &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, ++ &dns6_addr_time); ++} ++ ++#endif ++ ++static void slirp_init_once(void) ++{ ++ static int initialized; ++ const char *debug; ++#ifdef _WIN32 ++ WSADATA Data; ++#endif ++ ++ if (initialized) { ++ return; ++ } ++ initialized = 1; ++ ++#ifdef _WIN32 ++ WSAStartup(MAKEWORD(2, 0), &Data); ++ atexit(winsock_cleanup); ++#endif ++ ++ loopback_addr.s_addr = htonl(INADDR_LOOPBACK); ++ loopback_mask = htonl(IN_CLASSA_NET); ++ ++ debug = g_getenv("SLIRP_DEBUG"); ++ if (debug) { ++ const GDebugKey keys[] = { ++ { "call", DBG_CALL }, ++ { "misc", DBG_MISC }, ++ { "error", DBG_ERROR }, ++ { "tftp", DBG_TFTP }, ++ }; ++ slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); ++ } ++} ++ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, void *opaque) ++{ ++ Slirp *slirp; ++ ++ g_return_val_if_fail(cfg != NULL, NULL); ++ g_return_val_if_fail(cfg->version >= SLIRP_CONFIG_VERSION_MIN, NULL); ++ g_return_val_if_fail(cfg->version <= SLIRP_CONFIG_VERSION_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mtu >= IF_MTU_MIN || cfg->if_mtu == 0, NULL); ++ g_return_val_if_fail(cfg->if_mtu <= IF_MTU_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mru >= IF_MRU_MIN || cfg->if_mru == 0, NULL); ++ g_return_val_if_fail(cfg->if_mru <= IF_MRU_MAX, NULL); ++ g_return_val_if_fail(!cfg->bootfile || ++ (strlen(cfg->bootfile) < ++ G_SIZEOF_MEMBER(struct bootp_t, bp_file)), NULL); ++ ++ slirp = g_malloc0(sizeof(Slirp)); ++ ++ slirp_init_once(); ++ ++ slirp->opaque = opaque; ++ slirp->cb = callbacks; ++ slirp->grand = g_rand_new(); ++ slirp->restricted = cfg->restricted; ++ ++ slirp->in_enabled = cfg->in_enabled; ++ slirp->in6_enabled = cfg->in6_enabled; ++ ++ if_init(slirp); ++ ip_init(slirp); ++ ip6_init(slirp); ++ ++ m_init(slirp); ++ ++ slirp->vnetwork_addr = cfg->vnetwork; ++ slirp->vnetwork_mask = cfg->vnetmask; ++ slirp->vhost_addr = cfg->vhost; ++ slirp->vprefix_addr6 = cfg->vprefix_addr6; ++ slirp->vprefix_len = cfg->vprefix_len; ++ slirp->vhost_addr6 = cfg->vhost6; ++ if (cfg->vhostname) { ++ slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), ++ cfg->vhostname); ++ } ++ slirp->tftp_prefix = g_strdup(cfg->tftp_path); ++ slirp->bootp_filename = g_strdup(cfg->bootfile); ++ slirp->vdomainname = g_strdup(cfg->vdomainname); ++ slirp->vdhcp_startaddr = cfg->vdhcp_start; ++ slirp->vnameserver_addr = cfg->vnameserver; ++ slirp->vnameserver_addr6 = cfg->vnameserver6; ++ slirp->tftp_server_name = g_strdup(cfg->tftp_server_name); ++ ++ if (cfg->vdnssearch) { ++ translate_dnssearch(slirp, cfg->vdnssearch); ++ } ++ slirp->if_mtu = cfg->if_mtu == 0 ? IF_MTU_DEFAULT : cfg->if_mtu; ++ slirp->if_mru = cfg->if_mru == 0 ? IF_MRU_DEFAULT : cfg->if_mru; ++ slirp->disable_host_loopback = cfg->disable_host_loopback; ++ slirp->enable_emu = cfg->enable_emu; ++ ++ if (cfg->version >= 2) { ++ slirp->outbound_addr = cfg->outbound_addr; ++ slirp->outbound_addr6 = cfg->outbound_addr6; ++ } else { ++ slirp->outbound_addr = NULL; ++ slirp->outbound_addr6 = NULL; ++ } ++ return slirp; ++} ++ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque) ++{ ++ SlirpConfig cfg; ++ memset(&cfg, 0, sizeof(cfg)); ++ cfg.version = 1; ++ cfg.restricted = restricted; ++ cfg.in_enabled = in_enabled; ++ cfg.vnetwork = vnetwork; ++ cfg.vnetmask = vnetmask; ++ cfg.vhost = vhost; ++ cfg.in6_enabled = in6_enabled; ++ cfg.vprefix_addr6 = vprefix_addr6; ++ cfg.vprefix_len = vprefix_len; ++ cfg.vhost6 = vhost6; ++ cfg.vhostname = vhostname; ++ cfg.tftp_server_name = tftp_server_name; ++ cfg.tftp_path = tftp_path; ++ cfg.bootfile = bootfile; ++ cfg.vdhcp_start = vdhcp_start; ++ cfg.vnameserver = vnameserver; ++ cfg.vnameserver6 = vnameserver6; ++ cfg.vdnssearch = vdnssearch; ++ cfg.vdomainname = vdomainname; ++ return slirp_new(&cfg, callbacks, opaque); ++} ++ ++void slirp_cleanup(Slirp *slirp) ++{ ++ struct gfwd_list *e, *next; ++ ++ for (e = slirp->guestfwd_list; e; e = next) { ++ next = e->ex_next; ++ g_free(e->ex_exec); ++ g_free(e->ex_unix); ++ g_free(e); ++ } ++ ++ ip_cleanup(slirp); ++ ip6_cleanup(slirp); ++ m_cleanup(slirp); ++ ++ g_rand_free(slirp->grand); ++ ++ g_free(slirp->vdnssearch); ++ g_free(slirp->tftp_prefix); ++ g_free(slirp->bootp_filename); ++ g_free(slirp->vdomainname); ++ g_free(slirp); ++} ++ ++#define CONN_CANFSEND(so) \ ++ (((so)->so_state & (SS_FCANTSENDMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++#define CONN_CANFRCV(so) \ ++ (((so)->so_state & (SS_FCANTRCVMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++ ++static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) ++{ ++ uint32_t t; ++ ++ if (*timeout <= TIMEOUT_FAST) { ++ return; ++ } ++ ++ t = MIN(1000, *timeout); ++ ++ /* If we have tcp timeout with slirp, then we will fill @timeout with ++ * more precise value. ++ */ ++ if (slirp->time_fasttimo) { ++ *timeout = TIMEOUT_FAST; ++ return; ++ } ++ if (slirp->do_slowtimo) { ++ t = MIN(TIMEOUT_SLOW, t); ++ } ++ *timeout = t; ++} ++ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque) ++{ ++ struct socket *so, *so_next; ++ ++ /* ++ * First, TCP sockets ++ */ ++ ++ /* ++ * *_slowtimo needs calling if there are IP fragments ++ * in the fragment queue, or there are TCP connections active ++ */ ++ slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || ++ (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int events = 0; ++ ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if we need a tcp_fasttimo ++ */ ++ if (slirp->time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) { ++ slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ ++ } ++ ++ /* ++ * NOFDREF can include still connecting to local-host, ++ * newly socreated() sockets etc. Don't want to select these. ++ */ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Set for reading sockets which are accepting ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing sockets which are connecting ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ so->pollfds_idx = ++ add_poll(so->s, SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing if we are connected, can send more, and ++ * we have something to send ++ */ ++ if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { ++ events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; ++ } ++ ++ /* ++ * Set for reading (and urgent data) if we are connected, can ++ * receive more, and we have room for it XXX /2 ? ++ */ ++ if (CONN_CANFRCV(so) && ++ (so->so_snd.sb_cc < (so->so_snd.sb_datalen / 2))) { ++ events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | ++ SLIRP_POLL_PRI; ++ } ++ ++ if (events) { ++ so->pollfds_idx = add_poll(so->s, events, opaque); ++ } ++ } ++ ++ /* ++ * UDP sockets ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ udp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ /* ++ * When UDP packets are received from over the ++ * link, they're sendto()'d straight away, so ++ * no need for setting for writing ++ * Limit the number of packets queued by this session ++ * to 4. Note that even though we try and limit this ++ * to 4 packets, the session could have more queued ++ * if the packets needed to be fragmented ++ * (XXX <= 4 ?) ++ */ ++ if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ /* ++ * ICMP sockets ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ icmp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ if (so->so_state & SS_ISFCONNECTED) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ slirp_update_timeout(slirp, timeout); ++} ++ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque) ++{ ++ struct socket *so, *so_next; ++ int ret; ++ ++ curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; ++ ++ /* ++ * See if anything has timed out ++ */ ++ if (slirp->time_fasttimo && ++ ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { ++ tcp_fasttimo(slirp); ++ slirp->time_fasttimo = 0; ++ } ++ if (slirp->do_slowtimo && ++ ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { ++ ip_slowtimo(slirp); ++ tcp_slowtimo(slirp); ++ slirp->last_slowtimo = curtime; ++ } ++ ++ /* ++ * Check sockets ++ */ ++ if (!select_error) { ++ /* ++ * Check TCP sockets ++ */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Check for URG data ++ * This will soread as well, so no need to ++ * test for SLIRP_POLL_IN below if this succeeds ++ */ ++ if (revents & SLIRP_POLL_PRI) { ++ ret = sorecvoob(so); ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ /* ++ * Check sockets for reading ++ */ ++ else if (revents & ++ (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR)) { ++ /* ++ * Check for incoming connections ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ tcp_connect(so); ++ continue; ++ } /* else */ ++ ret = soread(so); ++ ++ /* Output it if we read something */ ++ if (ret > 0) { ++ tcp_output(sototcpcb(so)); ++ } ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ ++ /* ++ * Check sockets for writing ++ */ ++ if (!(so->so_state & SS_NOFDREF) && ++ (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { ++ /* ++ * Check for non-blocking, still-connecting sockets ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ /* Connected */ ++ so->so_state &= ~SS_ISFCONNECTING; ++ ++ ret = send(so->s, (const void *)&ret, 0, 0); ++ if (ret < 0) { ++ /* XXXXX Must fix, zero bytes is a NOP */ ++ if (errno == EAGAIN || errno == EWOULDBLOCK || ++ errno == EINPROGRESS || errno == ENOTCONN) { ++ continue; ++ } ++ ++ /* else failed */ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; ++ } ++ /* else so->so_state &= ~SS_ISFCONNECTING; */ ++ ++ /* ++ * Continue tcp_input ++ */ ++ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, ++ so->so_ffamily); ++ /* continue; */ ++ } else { ++ ret = sowrite(so); ++ if (ret > 0) { ++ /* Call tcp_output in case we need to send a window ++ * update to the guest, otherwise it will be stuck ++ * until it sends a window probe. */ ++ tcp_output(sototcpcb(so)); ++ } ++ } ++ } ++ } ++ ++ /* ++ * Now UDP sockets. ++ * Incoming packets are sent straight away, they're not buffered. ++ * Incoming UDP data isn't buffered either. ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ sorecvfrom(so); ++ } ++ } ++ ++ /* ++ * Check incoming ICMP relies. ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ icmp_receive(so); ++ } ++ } ++ } ++ ++ if_start(slirp); ++} ++ ++static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct slirp_arphdr *ah = (struct slirp_arphdr *)(pkt + ETH_HLEN); ++ uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_reply; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); ++ int ar_op; ++ struct gfwd_list *ex_ptr; ++ ++ if (!slirp->in_enabled) { ++ return; ++ } ++ ++ ar_op = ntohs(ah->ar_op); ++ switch (ar_op) { ++ case ARPOP_REQUEST: ++ if (ah->ar_tip == ah->ar_sip) { ++ /* Gratuitous ARP */ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ return; ++ } ++ ++ if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (ah->ar_tip == slirp->vnameserver_addr.s_addr || ++ ah->ar_tip == slirp->vhost_addr.s_addr) ++ goto arp_ok; ++ /* TODO: IPv6 */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_addr.s_addr == ah->ar_tip) ++ goto arp_ok; ++ } ++ return; ++ arp_ok: ++ memset(arp_reply, 0, sizeof(arp_reply)); ++ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ ++ /* ARP request for alias/dns mac address */ ++ memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &ah->ar_tip, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REPLY); ++ memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); ++ rah->ar_sip = ah->ar_tip; ++ memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); ++ rah->ar_tip = ah->ar_sip; ++ slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); ++ } ++ break; ++ case ARPOP_REPLY: ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ break; ++ default: ++ break; ++ } ++} ++ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct mbuf *m; ++ int proto; ++ ++ if (pkt_len < ETH_HLEN) ++ return; ++ ++ proto = (((uint16_t)pkt[12]) << 8) + pkt[13]; ++ switch (proto) { ++ case ETH_P_ARP: ++ arp_input(slirp, pkt, pkt_len); ++ break; ++ case ETH_P_IP: ++ case ETH_P_IPV6: ++ m = m_get(slirp); ++ if (!m) ++ return; ++ /* Note: we add 2 to align the IP header on 4 bytes, ++ * and add the margin for the tcpiphdr overhead */ ++ if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { ++ m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); ++ } ++ m->m_len = pkt_len + TCPIPHDR_DELTA + 2; ++ memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); ++ ++ m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ ++ if (proto == ETH_P_IP) { ++ ip_input(m); ++ } else if (proto == ETH_P_IPV6) { ++ ip6_input(m); ++ } ++ break; ++ ++ case ETH_P_NCSI: ++ ncsi_input(slirp, pkt, pkt_len); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ ++ if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { ++ uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_req; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); ++ ++ if (!ifm->resolution_requested) { ++ /* If the client addr is not known, send an ARP request */ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REQUEST); ++ ++ /* source hw addr */ ++ memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); ++ ++ /* source IP */ ++ rah->ar_sip = slirp->vhost_addr.s_addr; ++ ++ /* target hw addr (none) */ ++ memset(rah->ar_tha, 0, ETH_ALEN); ++ ++ /* target IP */ ++ rah->ar_tip = iph->ip_dst.s_addr; ++ slirp->client_ipaddr = iph->ip_dst; ++ slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); ++ ifm->resolution_requested = true; ++ ++ /* Expire request and drop outgoing packet after 1 second */ ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); ++ /* XXX: not correct */ ++ memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); ++ eh->h_proto = htons(ETH_P_IP); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); ++ if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { ++ if (!ifm->resolution_requested) { ++ ndp_send_ns(slirp, ip6h->ip_dst); ++ ifm->resolution_requested = true; ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ eh->h_proto = htons(ETH_P_IPV6); ++ in6_compute_ethaddr(ip6h->ip_src, eh->h_source); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Output the IP packet to the ethernet device. Returns 0 if the packet must be ++ * re-queued. ++ */ ++int if_encap(Slirp *slirp, struct mbuf *ifm) ++{ ++ uint8_t buf[IF_MTU_MAX + 100]; ++ struct ethhdr *eh = (struct ethhdr *)buf; ++ uint8_t ethaddr[ETH_ALEN]; ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ int ret; ++ ++ if (ifm->m_len + ETH_HLEN > sizeof(buf)) { ++ return 1; ++ } ++ ++ switch (iph->ip_v) { ++ case IPVERSION: ++ ret = if_encap4(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ case IP6VERSION: ++ ret = if_encap6(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++ memcpy(eh->h_dest, ethaddr, ETH_ALEN); ++ DEBUG_ARG("src = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_source[0], ++ eh->h_source[1], eh->h_source[2], eh->h_source[3], ++ eh->h_source[4], eh->h_source[5]); ++ DEBUG_ARG("dst = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_dest[0], ++ eh->h_dest[1], eh->h_dest[2], eh->h_dest[3], eh->h_dest[4], ++ eh->h_dest[5]); ++ memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); ++ slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); ++ return 1; ++} ++ ++/* Drop host forwarding rule, return 0 if found. */ ++/* TODO: IPv6 */ ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port) ++{ ++ struct socket *so; ++ struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_in addr; ++ int port = htons(host_port); ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ addr.sin_addr.s_addr == host_addr.s_addr && addr.sin_port == port) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++/* TODO: IPv6 */ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port) ++{ ++ if (!guest_addr.s_addr) { ++ guest_addr = slirp->vdhcp_startaddr; ++ } ++ if (is_udp) { ++ if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } else { ++ if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } ++ return 0; ++} ++ ++/* TODO: IPv6 */ ++static bool check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, ++ int guest_port) ++{ ++ struct gfwd_list *tmp_ptr; ++ ++ if (!guest_addr->s_addr) { ++ guest_addr->s_addr = slirp->vnetwork_addr.s_addr | ++ (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); ++ } ++ if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr || ++ guest_addr->s_addr == slirp->vhost_addr.s_addr || ++ guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { ++ return false; ++ } ++ ++ /* check if the port is "bound" */ ++ for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { ++ if (guest_port == tmp_ptr->ex_fport && ++ guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) ++ return false; ++ } ++ ++ return true; ++} ++ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); ++ return 0; ++} ++ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port) ++{ ++#ifdef G_OS_UNIX ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_unix(&slirp->guestfwd_list, unixsock, *guest_addr, htons(guest_port)); ++ return 0; ++#else ++ g_warn_if_reached(); ++ return -1; ++#endif ++} ++ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, *guest_addr, ++ htons(guest_port)); ++ return 0; ++} ++ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ return remove_guestfwd(&slirp->guestfwd_list, guest_addr, ++ htons(guest_port)); ++} ++ ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) ++{ ++ if (so->s == -1 && so->guestfwd) { ++ /* XXX this blocks entire thread. Rewrite to use ++ * qemu_chr_fe_write and background I/O callbacks */ ++ so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); ++ return len; ++ } ++ ++ if (so->s == -1) { ++ /* ++ * This should in theory not happen but it is hard to be ++ * sure because some code paths will end up with so->s == -1 ++ * on a failure but don't dispose of the struct socket. ++ * Check specifically, so we don't pass -1 to send(). ++ */ ++ errno = EBADF; ++ return -1; ++ } ++ ++ return send(so->s, buf, len, flags); ++} ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct socket *so; ++ ++ /* TODO: IPv6 */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_faddr.s_addr == guest_addr.s_addr && ++ htons(so->so_fport) == guest_port) { ++ return so; ++ } ++ } ++ return NULL; ++} ++ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct iovec iov[2]; ++ struct socket *so; ++ ++ so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so || so->so_state & SS_NOFDREF) { ++ return 0; ++ } ++ ++ if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen / 2)) { ++ return 0; ++ } ++ ++ return sopreprbuf(so, iov, NULL); ++} ++ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size) ++{ ++ int ret; ++ struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so) ++ return; ++ ++ ret = soreadbuf(so, (const char *)buf, size); ++ ++ if (ret > 0) ++ tcp_output(sototcpcb(so)); ++} ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) ++{ ++ ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); ++ ++ if (ret < 0) { ++ g_critical("Failed to send packet, ret: %ld", (long)ret); ++ } else if (ret < len) { ++ DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", (long)ret, ++ (unsigned long)len); ++ } ++} +diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h +new file mode 100644 +index 0000000..32634bc +--- /dev/null ++++ b/slirp/src/slirp.h +@@ -0,0 +1,283 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef SLIRP_H ++#define SLIRP_H ++ ++#ifdef _WIN32 ++ ++/* as defined in sdkddkver.h */ ++#ifndef _WIN32_WINNT ++#define _WIN32_WINNT 0x0600 /* Vista */ ++#endif ++/* reduces the number of implicitly included headers */ ++#ifndef WIN32_LEAN_AND_MEAN ++#define WIN32_LEAN_AND_MEAN ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++ ++#else ++#if !defined(__HAIKU__) ++#define O_BINARY 0 ++#endif ++#endif ++ ++#ifndef _WIN32 ++#include ++#include ++#include ++#include ++#include ++#endif ++ ++#ifdef __APPLE__ ++#include ++#endif ++ ++/* Avoid conflicting with the libc insque() and remque(), which ++ have different prototypes. */ ++#define insque slirp_insque ++#define remque slirp_remque ++#define quehead slirp_quehead ++ ++#include "debug.h" ++#include "util.h" ++ ++#include "libslirp.h" ++#include "ip.h" ++#include "ip6.h" ++#include "tcp.h" ++#include "tcp_timer.h" ++#include "tcp_var.h" ++#include "tcpip.h" ++#include "udp.h" ++#include "ip_icmp.h" ++#include "ip6_icmp.h" ++#include "mbuf.h" ++#include "sbuf.h" ++#include "socket.h" ++#include "if.h" ++#include "main.h" ++#include "misc.h" ++ ++#include "bootp.h" ++#include "tftp.h" ++ ++#define ARPOP_REQUEST 1 /* ARP request */ ++#define ARPOP_REPLY 2 /* ARP reply */ ++ ++struct ethhdr { ++ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ ++ unsigned char h_source[ETH_ALEN]; /* source ether addr */ ++ unsigned short h_proto; /* packet type ID field */ ++}; ++ ++struct slirp_arphdr { ++ unsigned short ar_hrd; /* format of hardware address */ ++ unsigned short ar_pro; /* format of protocol address */ ++ unsigned char ar_hln; /* length of hardware address */ ++ unsigned char ar_pln; /* length of protocol address */ ++ unsigned short ar_op; /* ARP opcode (command) */ ++ ++ /* ++ * Ethernet looks like this : This bit is variable sized however... ++ */ ++ unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ ++ uint32_t ar_sip; /* sender IP address */ ++ unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ ++ uint32_t ar_tip; /* target IP address */ ++} SLIRP_PACKED; ++ ++#define ARP_TABLE_SIZE 16 ++ ++typedef struct ArpTable { ++ struct slirp_arphdr table[ARP_TABLE_SIZE]; ++ int next_victim; ++} ArpTable; ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]); ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct ndpentry { ++ unsigned char eth_addr[ETH_ALEN]; /* sender hardware address */ ++ struct in6_addr ip_addr; /* sender IP address */ ++}; ++ ++#define NDP_TABLE_SIZE 16 ++ ++typedef struct NdpTable { ++ struct ndpentry table[NDP_TABLE_SIZE]; ++ int next_victim; ++} NdpTable; ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]); ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct Slirp { ++ unsigned time_fasttimo; ++ unsigned last_slowtimo; ++ bool do_slowtimo; ++ ++ bool in_enabled, in6_enabled; ++ ++ /* virtual network configuration */ ++ struct in_addr vnetwork_addr; ++ struct in_addr vnetwork_mask; ++ struct in_addr vhost_addr; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost_addr6; ++ struct in_addr vdhcp_startaddr; ++ struct in_addr vnameserver_addr; ++ struct in6_addr vnameserver_addr6; ++ ++ struct in_addr client_ipaddr; ++ char client_hostname[33]; ++ ++ int restricted; ++ struct gfwd_list *guestfwd_list; ++ ++ int if_mtu; ++ int if_mru; ++ ++ bool disable_host_loopback; ++ ++ /* mbuf states */ ++ struct quehead m_freelist; ++ struct quehead m_usedlist; ++ int mbuf_alloced; ++ ++ /* if states */ ++ struct quehead if_fastq; /* fast queue (for interactive data) */ ++ struct quehead if_batchq; /* queue for non-interactive data */ ++ bool if_start_busy; /* avoid if_start recursion */ ++ ++ /* ip states */ ++ struct ipq ipq; /* ip reass. queue */ ++ uint16_t ip_id; /* ip packet ctr, for ids */ ++ ++ /* bootp/dhcp states */ ++ BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; ++ char *bootp_filename; ++ size_t vdnssearch_len; ++ uint8_t *vdnssearch; ++ char *vdomainname; ++ ++ /* tcp states */ ++ struct socket tcb; ++ struct socket *tcp_last_so; ++ tcp_seq tcp_iss; /* tcp initial send seq # */ ++ uint32_t tcp_now; /* for RFC 1323 timestamps */ ++ ++ /* udp states */ ++ struct socket udb; ++ struct socket *udp_last_so; ++ ++ /* icmp states */ ++ struct socket icmp; ++ struct socket *icmp_last_so; ++ ++ /* tftp states */ ++ char *tftp_prefix; ++ struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; ++ char *tftp_server_name; ++ ++ ArpTable arp_table; ++ NdpTable ndp_table; ++ ++ GRand *grand; ++ void *ra_timer; ++ ++ bool enable_emu; ++ ++ const SlirpCb *cb; ++ void *opaque; ++ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++}; ++ ++void if_start(Slirp *); ++ ++int get_dns_addr(struct in_addr *pdns_addr); ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); ++ ++/* ncsi.c */ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++ ++extern bool slirp_do_keepalive; ++ ++#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) ++ ++/* dnssearch.c */ ++int translate_dnssearch(Slirp *s, const char **names); ++ ++/* cksum.c */ ++int cksum(struct mbuf *m, int len); ++int ip6_cksum(struct mbuf *m); ++ ++/* if.c */ ++void if_init(Slirp *); ++void if_output(struct socket *, struct mbuf *); ++ ++/* ip_input.c */ ++void ip_init(Slirp *); ++void ip_cleanup(Slirp *); ++void ip_input(struct mbuf *); ++void ip_slowtimo(Slirp *); ++void ip_stripoptions(register struct mbuf *, struct mbuf *); ++ ++/* ip_output.c */ ++int ip_output(struct socket *, struct mbuf *); ++ ++/* ip6_input.c */ ++void ip6_init(Slirp *); ++void ip6_cleanup(Slirp *); ++void ip6_input(struct mbuf *); ++ ++/* ip6_output */ ++int ip6_output(struct socket *, struct mbuf *, int fast); ++ ++/* tcp_input.c */ ++void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); ++int tcp_mss(register struct tcpcb *, unsigned); ++ ++/* tcp_output.c */ ++int tcp_output(register struct tcpcb *); ++void tcp_setpersist(register struct tcpcb *); ++ ++/* tcp_subr.c */ ++void tcp_init(Slirp *); ++void tcp_cleanup(Slirp *); ++void tcp_template(struct tcpcb *); ++void tcp_respond(struct tcpcb *, register struct tcpiphdr *, ++ register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); ++struct tcpcb *tcp_newtcpcb(struct socket *); ++struct tcpcb *tcp_close(register struct tcpcb *); ++void tcp_sockclosed(struct tcpcb *); ++int tcp_fconnect(struct socket *, unsigned short af); ++void tcp_connect(struct socket *); ++void tcp_attach(struct socket *); ++uint8_t tcp_tos(struct socket *); ++int tcp_emu(struct socket *, struct mbuf *); ++int tcp_ctl(struct socket *); ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err); ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); ++ ++#endif +diff --git a/slirp/src/socket.c b/slirp/src/socket.c +new file mode 100644 +index 0000000..4cd9a64 +--- /dev/null ++++ b/slirp/src/socket.c +@@ -0,0 +1,957 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++#ifdef __sun__ ++#include ++#endif ++ ++static void sofcantrcvmore(struct socket *so); ++static void sofcantsendmore(struct socket *so); ++ ++struct socket *solookup(struct socket **last, struct socket *head, ++ struct sockaddr_storage *lhost, ++ struct sockaddr_storage *fhost) ++{ ++ struct socket *so = *last; ++ ++ /* Optimisation */ ++ if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ return so; ++ } ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ if (sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ *last = so; ++ return so; ++ } ++ } ++ ++ return (struct socket *)NULL; ++} ++ ++/* ++ * Create a new socket, initialise the fields ++ * It is the responsibility of the caller to ++ * insque() it into the correct linked-list ++ */ ++struct socket *socreate(Slirp *slirp) ++{ ++ struct socket *so = g_new(struct socket, 1); ++ ++ memset(so, 0, sizeof(struct socket)); ++ so->so_state = SS_NOFDREF; ++ so->s = -1; ++ so->slirp = slirp; ++ so->pollfds_idx = -1; ++ ++ return so; ++} ++ ++/* ++ * Remove references to so from the given message queue. ++ */ ++static void soqfree(struct socket *so, struct quehead *qh) ++{ ++ struct mbuf *ifq; ++ ++ for (ifq = (struct mbuf *)qh->qh_link; (struct quehead *)ifq != qh; ++ ifq = ifq->ifq_next) { ++ if (ifq->ifq_so == so) { ++ struct mbuf *ifm; ++ ifq->ifq_so = NULL; ++ for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { ++ ifm->ifq_so = NULL; ++ } ++ } ++ } ++} ++ ++/* ++ * remque and free a socket, clobber cache ++ */ ++void sofree(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ soqfree(so, &slirp->if_fastq); ++ soqfree(so, &slirp->if_batchq); ++ ++ if (so == slirp->tcp_last_so) { ++ slirp->tcp_last_so = &slirp->tcb; ++ } else if (so == slirp->udp_last_so) { ++ slirp->udp_last_so = &slirp->udb; ++ } else if (so == slirp->icmp_last_so) { ++ slirp->icmp_last_so = &slirp->icmp; ++ } ++ m_free(so->so_m); ++ ++ if (so->so_next && so->so_prev) ++ remque(so); /* crashes if so is not in a queue */ ++ ++ if (so->so_tcpcb) { ++ g_free(so->so_tcpcb); ++ } ++ g_free(so); ++} ++ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) ++{ ++ int n, lss, total; ++ struct sbuf *sb = &so->so_snd; ++ int len = sb->sb_datalen - sb->sb_cc; ++ int mss = so->so_tcpcb->t_maxseg; ++ ++ DEBUG_CALL("sopreprbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (len <= 0) ++ return 0; ++ ++ iov[0].iov_base = sb->sb_wptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_wptr < sb->sb_rptr) { ++ iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_rptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ total = iov[0].iov_len + iov[1].iov_len; ++ if (total > mss) { ++ lss = total % mss; ++ if (iov[1].iov_len > lss) { ++ iov[1].iov_len -= lss; ++ n = 2; ++ } else { ++ lss -= iov[1].iov_len; ++ iov[0].iov_len -= lss; ++ n = 1; ++ } ++ } else ++ n = 2; ++ } else { ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } ++ } ++ if (np) ++ *np = n; ++ ++ return iov[0].iov_len + (n - 1) * iov[1].iov_len; ++} ++ ++/* ++ * Read from so's socket into sb_snd, updating all relevant sbuf fields ++ * NOTE: This will only be called if it is select()ed for reading, so ++ * a read() of 0 (or less) means it's disconnected ++ */ ++int soread(struct socket *so) ++{ ++ int n, nn; ++ size_t buf_len; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soread"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ buf_len = sopreprbuf(so, iov, &n); ++ assert(buf_len != 0); ++ ++ nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0); ++ if (nn <= 0) { ++ if (nn < 0 && (errno == EINTR || errno == EAGAIN)) ++ return 0; ++ else { ++ int err; ++ socklen_t elen = sizeof err; ++ struct sockaddr_storage addr; ++ struct sockaddr *paddr = (struct sockaddr *)&addr; ++ socklen_t alen = sizeof addr; ++ ++ err = errno; ++ if (nn == 0) { ++ int shutdown_wr = so->so_state & SS_FCANTSENDMORE; ++ ++ if (!shutdown_wr && getpeername(so->s, paddr, &alen) < 0) { ++ err = errno; ++ } else { ++ getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &elen); ++ } ++ } ++ ++ DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", nn, ++ errno, strerror(errno)); ++ sofcantrcvmore(so); ++ ++ if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || ++ err == EPIPE) { ++ tcp_drop(sototcpcb(so), err); ++ } else { ++ tcp_sockclosed(sototcpcb(so)); ++ } ++ return -1; ++ } ++ } ++ ++ /* ++ * If there was no error, try and read the second time round ++ * We read again if n = 2 (ie, there's another part of the buffer) ++ * and we read as much as we could in the first read ++ * We don't test for <= 0 this time, because there legitimately ++ * might not be any more data (since the socket is non-blocking), ++ * a close will be detected on next iteration. ++ * A return of -1 won't (shouldn't) happen, since it didn't happen above ++ */ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ ++ DEBUG_MISC(" ... read nn = %d bytes", nn); ++ ++ /* Update fields */ ++ sb->sb_cc += nn; ++ sb->sb_wptr += nn; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return nn; ++} ++ ++int soreadbuf(struct socket *so, const char *buf, int size) ++{ ++ int n, nn, copy = size; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soreadbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ assert(size > 0); ++ if (sopreprbuf(so, iov, &n) < size) ++ goto err; ++ ++ nn = MIN(iov[0].iov_len, copy); ++ memcpy(iov[0].iov_base, buf, nn); ++ ++ copy -= nn; ++ buf += nn; ++ ++ if (copy == 0) ++ goto done; ++ ++ memcpy(iov[1].iov_base, buf, copy); ++ ++done: ++ /* Update fields */ ++ sb->sb_cc += size; ++ sb->sb_wptr += size; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return size; ++err: ++ ++ sofcantrcvmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ g_critical("soreadbuf buffer too small"); ++ return -1; ++} ++ ++/* ++ * Get urgent data ++ * ++ * When the socket is created, we set it SO_OOBINLINE, ++ * so when OOB data arrives, we soread() it and everything ++ * in the send buffer is sent as urgent data ++ */ ++int sorecvoob(struct socket *so) ++{ ++ struct tcpcb *tp = sototcpcb(so); ++ int ret; ++ ++ DEBUG_CALL("sorecvoob"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * We take a guess at how much urgent data has arrived. ++ * In most situations, when urgent data arrives, the next ++ * read() should get all the urgent data. This guess will ++ * be wrong however if more data arrives just after the ++ * urgent data, or the read() doesn't return all the ++ * urgent data. ++ */ ++ ret = soread(so); ++ if (ret > 0) { ++ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Send urgent data ++ * There's a lot duplicated code here, but... ++ */ ++int sosendoob(struct socket *so) ++{ ++ struct sbuf *sb = &so->so_rcv; ++ char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ ++ ++ int n; ++ ++ DEBUG_CALL("sosendoob"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); ++ ++ if (so->so_urgc > 2048) ++ so->so_urgc = 2048; /* XXXX */ ++ ++ if (sb->sb_rptr < sb->sb_wptr) { ++ /* We can send it directly */ ++ n = slirp_send(so, sb->sb_rptr, so->so_urgc, ++ (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++ } else { ++ /* ++ * Since there's no sendv or sendtov like writev, ++ * we must copy all data to a linear buffer then ++ * send it all ++ */ ++ uint32_t urgc = so->so_urgc; ++ int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (len > urgc) { ++ len = urgc; ++ } ++ memcpy(buff, sb->sb_rptr, len); ++ urgc -= len; ++ if (urgc) { ++ n = sb->sb_wptr - sb->sb_data; ++ if (n > urgc) { ++ n = urgc; ++ } ++ memcpy((buff + len), sb->sb_data, n); ++ len += n; ++ } ++ n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++#ifdef DEBUG ++ if (n != len) { ++ DEBUG_ERROR("Didn't send all data urgently XXXXX"); ++ } ++#endif ++ } ++ ++ if (n < 0) { ++ return n; ++ } ++ so->so_urgc -= n; ++ DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, ++ so->so_urgc); ++ ++ sb->sb_cc -= n; ++ sb->sb_rptr += n; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ return n; ++} ++ ++/* ++ * Write data from so_rcv to so's socket, ++ * updating all sbuf field as necessary ++ */ ++int sowrite(struct socket *so) ++{ ++ int n, nn; ++ struct sbuf *sb = &so->so_rcv; ++ int len = sb->sb_cc; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("sowrite"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_urgc) { ++ uint32_t expected = so->so_urgc; ++ if (sosendoob(so) < expected) { ++ /* Treat a short write as a fatal error too, ++ * rather than continuing on and sending the urgent ++ * data as if it were non-urgent and leaving the ++ * so_urgc count wrong. ++ */ ++ goto err_disconnected; ++ } ++ if (sb->sb_cc == 0) ++ return 0; ++ } ++ ++ /* ++ * No need to check if there's something to write, ++ * sowrite wouldn't have been called otherwise ++ */ ++ ++ iov[0].iov_base = sb->sb_rptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_rptr < sb->sb_wptr) { ++ iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_wptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ n = 2; ++ } else ++ n = 1; ++ } ++ /* Check if there's urgent data to send, and if so, send it */ ++ ++ nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len, 0); ++ /* This should never happen, but people tell me it does *shrug* */ ++ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) ++ return 0; ++ ++ if (nn <= 0) { ++ goto err_disconnected; ++ } ++ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ DEBUG_MISC(" ... wrote nn = %d bytes", nn); ++ ++ /* Update sbuf */ ++ sb->sb_cc -= nn; ++ sb->sb_rptr += nn; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ /* ++ * If in DRAIN mode, and there's no more data, set ++ * it CANTSENDMORE ++ */ ++ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) ++ sofcantsendmore(so); ++ ++ return nn; ++ ++err_disconnected: ++ DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", ++ so->so_state, errno); ++ sofcantsendmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ return -1; ++} ++ ++/* ++ * recvfrom() a UDP socket ++ */ ++void sorecvfrom(struct socket *so) ++{ ++ struct sockaddr_storage addr; ++ struct sockaddr_storage saddr, daddr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ ++ DEBUG_CALL("sorecvfrom"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ ++ char buff[256]; ++ int len; ++ ++ len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); ++ /* XXX Check if reply is "correct"? */ ++ ++ if (len == -1 || len == 0) { ++ uint8_t code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) ++ code = ICMP_UNREACH_HOST; ++ else if (errno == ENETUNREACH) ++ code = ICMP_UNREACH_NET; ++ ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ /* No need for this socket anymore, udp_detach it */ ++ udp_detach(so); ++ } else { /* A "normal" UDP packet */ ++ struct mbuf *m; ++ int len; ++#ifdef _WIN32 ++ unsigned long n; ++#else ++ int n; ++#endif ++ ++ if (ioctlsocket(so->s, FIONREAD, &n) != 0) { ++ DEBUG_MISC(" ioctlsocket errno = %d-%s\n", errno, strerror(errno)); ++ return; ++ } ++ if (n == 0) { ++ return; ++ } ++ ++ m = m_get(so->slirp); ++ if (!m) { ++ return; ++ } ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); ++ break; ++ case AF_INET6: ++ m->m_data += ++ IF_MAXLINKHDR + sizeof(struct ip6) + sizeof(struct udphdr); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++ /* ++ * XXX Shouldn't FIONREAD packets destined for port 53, ++ * but I don't know the max packet size for DNS lookups ++ */ ++ len = M_FREEROOM(m); ++ /* if (so->so_fport != htons(53)) { */ ++ ++ if (n > len) { ++ n = (m->m_data - m->m_dat) + m->m_len + n + 1; ++ m_inc(m, n); ++ len = M_FREEROOM(m); ++ } ++ /* } */ ++ ++ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, ++ &addrlen); ++ DEBUG_MISC(" did recvfrom %d, errno = %d-%s", m->m_len, errno, ++ strerror(errno)); ++ if (m->m_len < 0) { ++ /* Report error as ICMP */ ++ switch (so->so_lfamily) { ++ uint8_t code; ++ case AF_INET: ++ code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP_UNREACH_NET; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, ++ strerror(errno)); ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP6_UNREACH_NO_ROUTE; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ m_free(m); ++ } else { ++ /* ++ * Hack: domain name lookup will be used the most for UDP, ++ * and since they'll only be used once there's no need ++ * for the 4 minute (or whatever) timeout... So we time them ++ * out much quicker (10 seconds for now...) ++ */ ++ if (so->so_expire) { ++ if (so->so_fport == htons(53)) ++ so->so_expire = curtime + SO_EXPIREFAST; ++ else ++ so->so_expire = curtime + SO_EXPIRE; ++ } ++ ++ /* ++ * If this packet was destined for CTL_ADDR, ++ * make it look like that's where it came from ++ */ ++ saddr = addr; ++ sotranslate_in(so, &saddr); ++ daddr = so->lhost.ss; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ udp_output(so, m, (struct sockaddr_in *)&saddr, ++ (struct sockaddr_in *)&daddr, so->so_iptos); ++ break; ++ case AF_INET6: ++ udp6_output(so, m, (struct sockaddr_in6 *)&saddr, ++ (struct sockaddr_in6 *)&daddr); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ } /* rx error */ ++ } /* if ping packet */ ++} ++ ++/* ++ * sendto() a socket ++ */ ++int sosendto(struct socket *so, struct mbuf *m) ++{ ++ int ret; ++ struct sockaddr_storage addr; ++ ++ DEBUG_CALL("sosendto"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" sendto()ing)"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* Don't care what port we get */ ++ ret = sendto(so->s, m->m_data, m->m_len, 0, (struct sockaddr *)&addr, ++ sockaddr_size(&addr)); ++ if (ret < 0) ++ return -1; ++ ++ /* ++ * Kill the socket if there's no reply in 4 minutes, ++ * but only if it's an expirable socket ++ */ ++ if (so->so_expire) ++ so->so_expire = curtime + SO_EXPIRE; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ ++ return 0; ++} ++ ++/* ++ * Listen for incoming TCP connections ++ */ ++struct socket *tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ /* TODO: IPv6 */ ++ struct sockaddr_in addr; ++ struct socket *so; ++ int s, opt = 1; ++ socklen_t addrlen = sizeof(addr); ++ memset(&addr, 0, addrlen); ++ ++ DEBUG_CALL("tcp_listen"); ++ DEBUG_ARG("haddr = %s", inet_ntoa((struct in_addr){ .s_addr = haddr })); ++ DEBUG_ARG("hport = %d", ntohs(hport)); ++ DEBUG_ARG("laddr = %s", inet_ntoa((struct in_addr){ .s_addr = laddr })); ++ DEBUG_ARG("lport = %d", ntohs(lport)); ++ DEBUG_ARG("flags = %x", flags); ++ ++ so = socreate(slirp); ++ ++ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ ++ if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) { ++ g_free(so); ++ return NULL; ++ } ++ insque(so, &slirp->tcb); ++ ++ /* ++ * SS_FACCEPTONCE sockets must time out. ++ */ ++ if (flags & SS_FACCEPTONCE) ++ so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT * 2; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= (SS_FACCEPTCONN | flags); ++ so->so_lfamily = AF_INET; ++ so->so_lport = lport; /* Kept in network format */ ++ so->so_laddr.s_addr = laddr; /* Ditto */ ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr.s_addr = haddr; ++ addr.sin_port = hport; ++ ++ if (((s = slirp_socket(AF_INET, SOCK_STREAM, 0)) < 0) || ++ (slirp_socket_set_fast_reuse(s) < 0) || ++ (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) || ++ (listen(s, 1) < 0)) { ++ int tmperrno = errno; /* Don't clobber the real reason we failed */ ++ ++ if (s >= 0) { ++ closesocket(s); ++ } ++ sofree(so); ++ /* Restore the real errno */ ++#ifdef _WIN32 ++ WSASetLastError(tmperrno); ++#else ++ errno = tmperrno; ++#endif ++ return NULL; ++ } ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(int)); ++ ++ getsockname(s, (struct sockaddr *)&addr, &addrlen); ++ so->so_ffamily = AF_INET; ++ so->so_fport = addr.sin_port; ++ if (addr.sin_addr.s_addr == 0 || ++ addr.sin_addr.s_addr == loopback_addr.s_addr) ++ so->so_faddr = slirp->vhost_addr; ++ else ++ so->so_faddr = addr.sin_addr; ++ ++ so->s = s; ++ return so; ++} ++ ++/* ++ * Various session state calls ++ * XXX Should be #define's ++ * The socket state stuff needs work, these often get call 2 or 3 ++ * times each when only 1 was needed ++ */ ++void soisfconnecting(struct socket *so) ++{ ++ so->so_state &= ~(SS_NOFDREF | SS_ISFCONNECTED | SS_FCANTRCVMORE | ++ SS_FCANTSENDMORE | SS_FWDRAIN); ++ so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ ++} ++ ++void soisfconnected(struct socket *so) ++{ ++ so->so_state &= ~(SS_ISFCONNECTING | SS_FWDRAIN | SS_NOFDREF); ++ so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ ++} ++ ++static void sofcantrcvmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 0); ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTSENDMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* Don't select it */ ++ } else { ++ so->so_state |= SS_FCANTRCVMORE; ++ } ++} ++ ++static void sofcantsendmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 1); /* send FIN to fhost */ ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTRCVMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* as above */ ++ } else { ++ so->so_state |= SS_FCANTSENDMORE; ++ } ++} ++ ++/* ++ * Set write drain mode ++ * Set CANTSENDMORE once all data has been write()n ++ */ ++void sofwdrain(struct socket *so) ++{ ++ if (so->so_rcv.sb_cc) ++ so->so_state |= SS_FWDRAIN; ++ else ++ sofcantsendmore(so); ++} ++ ++static bool sotranslate_out4(Slirp *s, struct socket *so, struct sockaddr_in *sin) ++{ ++ if (so->so_faddr.s_addr == s->vnameserver_addr.s_addr) { ++ return get_dns_addr(&sin->sin_addr) >= 0; ++ } ++ ++ if (so->so_faddr.s_addr == s->vhost_addr.s_addr || ++ so->so_faddr.s_addr == 0xffffffff) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin_addr = loopback_addr; ++ } ++ ++ return true; ++} ++ ++static bool sotranslate_out6(Slirp *s, struct socket *so, struct sockaddr_in6 *sin) ++{ ++ if (in6_equal(&so->so_faddr6, &s->vnameserver_addr6)) { ++ uint32_t scope_id; ++ if (get_dns6_addr(&sin->sin6_addr, &scope_id) >= 0) { ++ sin->sin6_scope_id = scope_id; ++ return true; ++ } ++ return false; ++ } ++ ++ if (in6_equal_net(&so->so_faddr6, &s->vprefix_addr6, s->vprefix_len) || ++ in6_equal(&so->so_faddr6, &(struct in6_addr)ALLNODES_MULTICAST)) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin6_addr = in6addr_loopback; ++ } ++ ++ return true; ++} ++ ++ ++/* ++ * Translate addr in host addr when it is a virtual address ++ */ ++int sotranslate_out(struct socket *so, struct sockaddr_storage *addr) ++{ ++ bool ok = true; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ ok = sotranslate_out4(so->slirp, so, (struct sockaddr_in *)addr); ++ break; ++ case AF_INET6: ++ ok = sotranslate_out6(so->slirp, so, (struct sockaddr_in6 *)addr); ++ break; ++ } ++ ++ if (!ok) { ++ errno = EPERM; ++ return -1; ++ } ++ ++ return 0; ++} ++ ++void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) ++{ ++ Slirp *slirp = so->slirp; ++ struct sockaddr_in *sin = (struct sockaddr_in *)addr; ++ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; ++ ++ if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { ++ sin->sin_addr = slirp->vhost_addr; ++ } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || ++ so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ sin->sin_addr = so->so_faddr; ++ } ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, ++ slirp->vprefix_len)) { ++ if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) || ++ !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { ++ sin6->sin6_addr = so->so_faddr6; ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* ++ * Translate connections from localhost to the real hostname ++ */ ++void sotranslate_accept(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_faddr.s_addr == INADDR_ANY || ++ (so->so_faddr.s_addr & loopback_mask) == ++ (loopback_addr.s_addr & loopback_mask)) { ++ so->so_faddr = slirp->vhost_addr; ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal(&so->so_faddr6, &in6addr_any) || ++ in6_equal(&so->so_faddr6, &in6addr_loopback)) { ++ so->so_faddr6 = slirp->vhost_addr6; ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++void sodrop(struct socket *s, int num) ++{ ++ if (sbdrop(&s->so_snd, num)) { ++ s->slirp->cb->notify(s->slirp->opaque); ++ } ++} +diff --git a/slirp/src/socket.h b/slirp/src/socket.h +new file mode 100644 +index 0000000..a6a1e5e +--- /dev/null ++++ b/slirp/src/socket.h +@@ -0,0 +1,164 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_SOCKET_H ++#define SLIRP_SOCKET_H ++ ++#include "misc.h" ++ ++#define SO_EXPIRE 240000 ++#define SO_EXPIREFAST 10000 ++ ++/* ++ * Our socket structure ++ */ ++ ++union slirp_sockaddr { ++ struct sockaddr_storage ss; ++ struct sockaddr_in sin; ++ struct sockaddr_in6 sin6; ++}; ++ ++struct socket { ++ struct socket *so_next, *so_prev; /* For a linked list of sockets */ ++ ++ int s; /* The actual socket */ ++ struct gfwd_list *guestfwd; ++ ++ int pollfds_idx; /* GPollFD GArray index */ ++ ++ Slirp *slirp; /* managing slirp instance */ ++ ++ /* XXX union these with not-yet-used sbuf params */ ++ struct mbuf *so_m; /* Pointer to the original SYN packet, ++ * for non-blocking connect()'s, and ++ * PING reply's */ ++ struct tcpiphdr *so_ti; /* Pointer to the original ti within ++ * so_mconn, for non-blocking connections */ ++ uint32_t so_urgc; ++ union slirp_sockaddr fhost; /* Foreign host */ ++#define so_faddr fhost.sin.sin_addr ++#define so_fport fhost.sin.sin_port ++#define so_faddr6 fhost.sin6.sin6_addr ++#define so_fport6 fhost.sin6.sin6_port ++#define so_ffamily fhost.ss.ss_family ++ ++ union slirp_sockaddr lhost; /* Local host */ ++#define so_laddr lhost.sin.sin_addr ++#define so_lport lhost.sin.sin_port ++#define so_laddr6 lhost.sin6.sin6_addr ++#define so_lport6 lhost.sin6.sin6_port ++#define so_lfamily lhost.ss.ss_family ++ ++ uint8_t so_iptos; /* Type of service */ ++ uint8_t so_emu; /* Is the socket emulated? */ ++ ++ uint8_t so_type; /* Type of socket, UDP or TCP */ ++ int32_t so_state; /* internal state flags SS_*, below */ ++ ++ struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ ++ unsigned so_expire; /* When the socket will expire */ ++ ++ int so_queued; /* Number of packets queued from this socket */ ++ int so_nqueued; /* Number of packets queued in a row ++ * Used to determine when to "downgrade" a session ++ * from fastq to batchq */ ++ ++ struct sbuf so_rcv; /* Receive buffer */ ++ struct sbuf so_snd; /* Send buffer */ ++}; ++ ++ ++/* ++ * Socket state bits. (peer means the host on the Internet, ++ * local host means the host on the other end of the modem) ++ */ ++#define SS_NOFDREF 0x001 /* No fd reference */ ++ ++#define SS_ISFCONNECTING \ ++ 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ ++#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ ++#define SS_FCANTRCVMORE \ ++ 0x008 /* Socket can't receive more from peer (for half-closes) */ ++#define SS_FCANTSENDMORE \ ++ 0x010 /* Socket can't send more to peer (for half-closes) */ ++#define SS_FWDRAIN \ ++ 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ ++ ++#define SS_CTL 0x080 ++#define SS_FACCEPTCONN \ ++ 0x100 /* Socket is accepting connections from a host on the internet */ ++#define SS_FACCEPTONCE \ ++ 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ ++ ++#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ ++#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ ++#define SS_INCOMING \ ++ 0x2000 /* Connection was initiated by a host on the internet */ ++ ++static inline int sockaddr_equal(struct sockaddr_storage *a, ++ struct sockaddr_storage *b) ++{ ++ if (a->ss_family != b->ss_family) { ++ return 0; ++ } ++ ++ switch (a->ss_family) { ++ case AF_INET: { ++ struct sockaddr_in *a4 = (struct sockaddr_in *)a; ++ struct sockaddr_in *b4 = (struct sockaddr_in *)b; ++ return a4->sin_addr.s_addr == b4->sin_addr.s_addr && ++ a4->sin_port == b4->sin_port; ++ } ++ case AF_INET6: { ++ struct sockaddr_in6 *a6 = (struct sockaddr_in6 *)a; ++ struct sockaddr_in6 *b6 = (struct sockaddr_in6 *)b; ++ return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) && ++ a6->sin6_port == b6->sin6_port); ++ } ++ default: ++ g_assert_not_reached(); ++ } ++ ++ return 0; ++} ++ ++static inline socklen_t sockaddr_size(struct sockaddr_storage *a) ++{ ++ switch (a->ss_family) { ++ case AF_INET: ++ return sizeof(struct sockaddr_in); ++ case AF_INET6: ++ return sizeof(struct sockaddr_in6); ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++struct socket *solookup(struct socket **, struct socket *, ++ struct sockaddr_storage *, struct sockaddr_storage *); ++struct socket *socreate(Slirp *); ++void sofree(struct socket *); ++int soread(struct socket *); ++int sorecvoob(struct socket *); ++int sosendoob(struct socket *); ++int sowrite(struct socket *); ++void sorecvfrom(struct socket *); ++int sosendto(struct socket *, struct mbuf *); ++struct socket *tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++void soisfconnecting(register struct socket *); ++void soisfconnected(register struct socket *); ++void sofwdrain(struct socket *); ++struct iovec; /* For win32 */ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); ++int soreadbuf(struct socket *so, const char *buf, int size); ++ ++int sotranslate_out(struct socket *, struct sockaddr_storage *); ++void sotranslate_in(struct socket *, struct sockaddr_storage *); ++void sotranslate_accept(struct socket *); ++void sodrop(struct socket *, int num); ++ ++ ++#endif /* SLIRP_SOCKET_H */ +diff --git a/slirp/src/state.c b/slirp/src/state.c +new file mode 100644 +index 0000000..22af77b +--- /dev/null ++++ b/slirp/src/state.c +@@ -0,0 +1,379 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++#include "vmstate.h" ++#include "stream.h" ++ ++static int slirp_tcp_post_load(void *opaque, int version) ++{ ++ tcp_template((struct tcpcb *)opaque); ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_tcp = { ++ .name = "slirp-tcp", ++ .version_id = 0, ++ .post_load = slirp_tcp_post_load, ++ .fields = (VMStateField[]){ VMSTATE_INT16(t_state, struct tcpcb), ++ VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, ++ TCPT_NTIMERS), ++ VMSTATE_INT16(t_rxtshift, struct tcpcb), ++ VMSTATE_INT16(t_rxtcur, struct tcpcb), ++ VMSTATE_INT16(t_dupacks, struct tcpcb), ++ VMSTATE_UINT16(t_maxseg, struct tcpcb), ++ VMSTATE_UINT8(t_force, struct tcpcb), ++ VMSTATE_UINT16(t_flags, struct tcpcb), ++ VMSTATE_UINT32(snd_una, struct tcpcb), ++ VMSTATE_UINT32(snd_nxt, struct tcpcb), ++ VMSTATE_UINT32(snd_up, struct tcpcb), ++ VMSTATE_UINT32(snd_wl1, struct tcpcb), ++ VMSTATE_UINT32(snd_wl2, struct tcpcb), ++ VMSTATE_UINT32(iss, struct tcpcb), ++ VMSTATE_UINT32(snd_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_nxt, struct tcpcb), ++ VMSTATE_UINT32(rcv_up, struct tcpcb), ++ VMSTATE_UINT32(irs, struct tcpcb), ++ VMSTATE_UINT32(rcv_adv, struct tcpcb), ++ VMSTATE_UINT32(snd_max, struct tcpcb), ++ VMSTATE_UINT32(snd_cwnd, struct tcpcb), ++ VMSTATE_UINT32(snd_ssthresh, struct tcpcb), ++ VMSTATE_INT16(t_idle, struct tcpcb), ++ VMSTATE_INT16(t_rtt, struct tcpcb), ++ VMSTATE_UINT32(t_rtseq, struct tcpcb), ++ VMSTATE_INT16(t_srtt, struct tcpcb), ++ VMSTATE_INT16(t_rttvar, struct tcpcb), ++ VMSTATE_UINT16(t_rttmin, struct tcpcb), ++ VMSTATE_UINT32(max_sndwnd, struct tcpcb), ++ VMSTATE_UINT8(t_oobflags, struct tcpcb), ++ VMSTATE_UINT8(t_iobc, struct tcpcb), ++ VMSTATE_INT16(t_softerror, struct tcpcb), ++ VMSTATE_UINT8(snd_scale, struct tcpcb), ++ VMSTATE_UINT8(rcv_scale, struct tcpcb), ++ VMSTATE_UINT8(request_r_scale, struct tcpcb), ++ VMSTATE_UINT8(requested_s_scale, struct tcpcb), ++ VMSTATE_UINT32(ts_recent, struct tcpcb), ++ VMSTATE_UINT32(ts_recent_age, struct tcpcb), ++ VMSTATE_UINT32(last_ack_sent, struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++/* The sbuf has a pair of pointers that are migrated as offsets; ++ * we calculate the offsets and restore the pointers using ++ * pre_save/post_load on a tmp structure. ++ */ ++struct sbuf_tmp { ++ struct sbuf *parent; ++ uint32_t roff, woff; ++}; ++ ++static int sbuf_tmp_pre_save(void *opaque) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; ++ tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; ++ ++ return 0; ++} ++ ++static int sbuf_tmp_post_load(void *opaque, int version) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ uint32_t requested_len = tmp->parent->sb_datalen; ++ ++ /* Allocate the buffer space used by the field after the tmp */ ++ sbreserve(tmp->parent, tmp->parent->sb_datalen); ++ ++ if (tmp->woff >= requested_len || tmp->roff >= requested_len) { ++ g_critical("invalid sbuf offsets r/w=%u/%u len=%u", tmp->roff, ++ tmp->woff, requested_len); ++ return -EINVAL; ++ } ++ ++ tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; ++ tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; ++ ++ return 0; ++} ++ ++ ++static const VMStateDescription vmstate_slirp_sbuf_tmp = { ++ .name = "slirp-sbuf-tmp", ++ .post_load = sbuf_tmp_post_load, ++ .pre_save = sbuf_tmp_pre_save, ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(woff, struct sbuf_tmp), ++ VMSTATE_UINT32(roff, struct sbuf_tmp), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_sbuf = { ++ .name = "slirp-sbuf", ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(sb_cc, struct sbuf), ++ VMSTATE_UINT32(sb_datalen, struct sbuf), ++ VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, ++ vmstate_slirp_sbuf_tmp), ++ VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, ++ NULL, sb_datalen), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static bool slirp_older_than_v4(void *opaque, int version_id) ++{ ++ return version_id < 4; ++} ++ ++static bool slirp_family_inet(void *opaque, int version_id) ++{ ++ union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; ++ return ssa->ss.ss_family == AF_INET; ++} ++ ++static int slirp_socket_pre_load(void *opaque) ++{ ++ struct socket *so = opaque; ++ ++ tcp_attach(so); ++ /* Older versions don't load these fields */ ++ so->so_ffamily = AF_INET; ++ so->so_lfamily = AF_INET; ++ return 0; ++} ++ ++#ifndef _WIN32 ++#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) ++#else ++/* Win uses u_long rather than uint32_t - but it's still 32bits long */ ++#define VMSTATE_SIN4_ADDR(f, s, t) \ ++ VMSTATE_SINGLE_TEST(f, s, t, 0, slirp_vmstate_info_uint32, u_long) ++#endif ++ ++/* The OS provided ss_family field isn't that portable; it's size ++ * and type varies (16/8 bit, signed, unsigned) ++ * and the values it contains aren't fully portable. ++ */ ++typedef struct SS_FamilyTmpStruct { ++ union slirp_sockaddr *parent; ++ uint16_t portable_family; ++} SS_FamilyTmpStruct; ++ ++#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ ++#define SS_FAMILY_MIG_IPV6 10 /* Linux */ ++#define SS_FAMILY_MIG_OTHER 0xffff ++ ++static int ss_family_pre_save(void *opaque) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ tss->portable_family = SS_FAMILY_MIG_OTHER; ++ ++ if (tss->parent->ss.ss_family == AF_INET) { ++ tss->portable_family = SS_FAMILY_MIG_IPV4; ++ } else if (tss->parent->ss.ss_family == AF_INET6) { ++ tss->portable_family = SS_FAMILY_MIG_IPV6; ++ } ++ ++ return 0; ++} ++ ++static int ss_family_post_load(void *opaque, int version_id) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ switch (tss->portable_family) { ++ case SS_FAMILY_MIG_IPV4: ++ tss->parent->ss.ss_family = AF_INET; ++ break; ++ case SS_FAMILY_MIG_IPV6: ++ case 23: /* compatibility: AF_INET6 from mingw */ ++ case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ ++ tss->parent->ss.ss_family = AF_INET6; ++ break; ++ default: ++ g_critical("invalid ss_family type %x", tss->portable_family); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_ss_family = { ++ .name = "slirp-socket-addr/ss_family", ++ .pre_save = ss_family_pre_save, ++ .post_load = ss_family_post_load, ++ .fields = ++ (VMStateField[]){ VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket_addr = { ++ .name = "slirp-socket-addr", ++ .version_id = 4, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, ++ vmstate_slirp_ss_family), ++ VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, ++ slirp_family_inet), ++ VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, ++ slirp_family_inet), ++ ++#if 0 ++ /* Untested: Needs checking by someone with IPv6 test */ ++ VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, ++ slirp_family_inet6), ++#endif ++ ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket = { ++ .name = "slirp-socket", ++ .version_id = 4, ++ .pre_load = slirp_socket_pre_load, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_UINT32(so_urgc, struct socket), ++ /* Pre-v4 versions */ ++ VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), ++ /* v4 and newer */ ++ VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ ++ VMSTATE_UINT8(so_iptos, struct socket), ++ VMSTATE_UINT8(so_emu, struct socket), ++ VMSTATE_UINT8(so_type, struct socket), ++ VMSTATE_INT32(so_state, struct socket), ++ VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, ++ struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_bootp_client = { ++ .name = "slirp_bootpclient", ++ .fields = (VMStateField[]){ VMSTATE_UINT16(allocated, BOOTPClient), ++ VMSTATE_BUFFER(macaddr, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp = { ++ .name = "slirp", ++ .version_id = 4, ++ .fields = (VMStateField[]){ VMSTATE_UINT16_V(ip_id, Slirp, 2), ++ VMSTATE_STRUCT_ARRAY( ++ bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, ++ vmstate_slirp_bootp_client, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpOStream f = { ++ .write_cb = write_cb, ++ .opaque = opaque, ++ }; ++ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) ++ if (ex_ptr->write_cb) { ++ struct socket *so; ++ so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, ++ ntohs(ex_ptr->ex_fport)); ++ if (!so) { ++ continue; ++ } ++ ++ slirp_ostream_write_u8(&f, 42); ++ slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); ++ } ++ slirp_ostream_write_u8(&f, 0); ++ ++ slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); ++} ++ ++ ++int slirp_state_load(Slirp *slirp, int version_id, SlirpReadCb read_cb, ++ void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpIStream f = { ++ .read_cb = read_cb, ++ .opaque = opaque, ++ }; ++ ++ while (slirp_istream_read_u8(&f)) { ++ int ret; ++ struct socket *so = socreate(slirp); ++ ++ ret = ++ slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr) { ++ return -EINVAL; ++ } ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->write_cb && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && ++ so->so_fport == ex_ptr->ex_fport) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ return -EINVAL; ++ } ++ ++ so->guestfwd = ex_ptr; ++ } ++ ++ return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); ++} ++ ++int slirp_state_version(void) ++{ ++ return 4; ++} +diff --git a/slirp/src/stream.c b/slirp/src/stream.c +new file mode 100644 +index 0000000..6cf326f +--- /dev/null ++++ b/slirp/src/stream.c +@@ -0,0 +1,120 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp io streams ++ * ++ * Copyright (c) 2018 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "stream.h" ++#include ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) ++{ ++ return f->read_cb(buf, size, f->opaque) == size; ++} ++ ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) ++{ ++ return f->write_cb(buf, size, f->opaque) == size; ++} ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f) ++{ ++ uint8_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return b; ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) ++{ ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f) ++{ ++ uint16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) ++{ ++ b = GUINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f) ++{ ++ uint32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) ++{ ++ b = GUINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f) ++{ ++ int16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) ++{ ++ b = GINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f) ++{ ++ int32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) ++{ ++ b = GINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} +diff --git a/slirp/src/stream.h b/slirp/src/stream.h +new file mode 100644 +index 0000000..08bb5b6 +--- /dev/null ++++ b/slirp/src/stream.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef STREAM_H_ ++#define STREAM_H_ ++ ++#include "libslirp.h" ++ ++typedef struct SlirpIStream { ++ SlirpReadCb read_cb; ++ void *opaque; ++} SlirpIStream; ++ ++typedef struct SlirpOStream { ++ SlirpWriteCb write_cb; ++ void *opaque; ++} SlirpOStream; ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f); ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f); ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f); ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f); ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f); ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); ++ ++#endif /* STREAM_H_ */ +diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h +new file mode 100644 +index 0000000..70a9760 +--- /dev/null ++++ b/slirp/src/tcp.h +@@ -0,0 +1,169 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 ++ * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp ++ */ ++ ++#ifndef TCP_H ++#define TCP_H ++ ++#include ++ ++typedef uint32_t tcp_seq; ++ ++#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ ++#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ ++ ++#define TCP_SNDSPACE 1024 * 128 ++#define TCP_RCVSPACE 1024 * 128 ++#define TCP_MAXSEG_MAX 32768 ++ ++/* ++ * TCP header. ++ * Per RFC 793, September, 1981. ++ */ ++#define tcphdr slirp_tcphdr ++struct tcphdr { ++ uint16_t th_sport; /* source port */ ++ uint16_t th_dport; /* destination port */ ++ tcp_seq th_seq; /* sequence number */ ++ tcp_seq th_ack; /* acknowledgement number */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t th_off : 4, /* data offset */ ++ th_x2 : 4; /* (unused) */ ++#else ++ uint8_t th_x2 : 4, /* (unused) */ ++ th_off : 4; /* data offset */ ++#endif ++ uint8_t th_flags; ++ uint16_t th_win; /* window */ ++ uint16_t th_sum; /* checksum */ ++ uint16_t th_urp; /* urgent pointer */ ++}; ++ ++#include "tcp_var.h" ++ ++#ifndef TH_FIN ++#define TH_FIN 0x01 ++#define TH_SYN 0x02 ++#define TH_RST 0x04 ++#define TH_PUSH 0x08 ++#define TH_ACK 0x10 ++#define TH_URG 0x20 ++#endif ++ ++#ifndef TCPOPT_EOL ++#define TCPOPT_EOL 0 ++#define TCPOPT_NOP 1 ++#define TCPOPT_MAXSEG 2 ++#define TCPOPT_WINDOW 3 ++#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ ++#define TCPOPT_SACK 5 /* Experimental */ ++#define TCPOPT_TIMESTAMP 8 ++ ++#define TCPOPT_TSTAMP_HDR \ ++ (TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | \ ++ TCPOLEN_TIMESTAMP) ++#endif ++ ++#ifndef TCPOLEN_MAXSEG ++#define TCPOLEN_MAXSEG 4 ++#define TCPOLEN_WINDOW 3 ++#define TCPOLEN_SACK_PERMITTED 2 ++#define TCPOLEN_TIMESTAMP 10 ++#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP + 2) /* appendix A */ ++#endif ++ ++#undef TCP_MAXWIN ++#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ ++ ++#undef TCP_MAX_WINSHIFT ++#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ ++ ++/* ++ * User-settable options (used with setsockopt). ++ * ++ * We don't use the system headers on unix because we have conflicting ++ * local structures. We can't avoid the system definitions on Windows, ++ * so we undefine them. ++ */ ++#undef TCP_NODELAY ++#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ ++#undef TCP_MAXSEG ++ ++/* ++ * TCP FSM state definitions. ++ * Per RFC793, September, 1981. ++ */ ++ ++#define TCP_NSTATES 11 ++ ++#define TCPS_CLOSED 0 /* closed */ ++#define TCPS_LISTEN 1 /* listening for connection */ ++#define TCPS_SYN_SENT 2 /* active, have sent syn */ ++#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ ++/* states < TCPS_ESTABLISHED are those where connections not established */ ++#define TCPS_ESTABLISHED 4 /* established */ ++#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ ++/* states > TCPS_CLOSE_WAIT are those where user has closed */ ++#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ ++#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ ++#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ ++/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ ++#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ ++#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ ++ ++#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) ++#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) ++#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) ++ ++/* ++ * TCP sequence numbers are 32 bit integers operated ++ * on with modular arithmetic. These macros can be ++ * used to compare such integers. ++ */ ++#define SEQ_LT(a, b) ((int)((a) - (b)) < 0) ++#define SEQ_LEQ(a, b) ((int)((a) - (b)) <= 0) ++#define SEQ_GT(a, b) ((int)((a) - (b)) > 0) ++#define SEQ_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Macros to initialize tcp sequence numbers for ++ * send and receive from initial send and receive ++ * sequence numbers. ++ */ ++#define tcp_rcvseqinit(tp) (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 ++ ++#define tcp_sendseqinit(tp) \ ++ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss ++ ++#define TCP_ISSINCR (125 * 1024) /* increment for tcp_iss each second */ ++ ++#endif +diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c +new file mode 100644 +index 0000000..d55b0c8 +--- /dev/null ++++ b/slirp/src/tcp_input.c +@@ -0,0 +1,1539 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 ++ * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#define TCPREXMTTHRESH 3 ++ ++#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) ++ ++/* for modulo comparisons of timestamps */ ++#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0) ++#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Insert segment ti into reassembly queue of tcp with ++ * control block tp. Return TH_FIN if reassembly now includes ++ * a segment with FIN. The macro form does the common case inline ++ * (segment is the next to be received on an established connection, ++ * and the queue is empty), avoiding linkage into and removal ++ * from the queue and repetition of various conversions. ++ * Set DELACK for segments received in order, but ack immediately ++ * when segments are out of order (so fast retransmit can work). ++ */ ++#define TCP_REASS(tp, ti, m, so, flags) \ ++ { \ ++ if ((ti)->ti_seq == (tp)->rcv_nxt && tcpfrag_list_empty(tp) && \ ++ (tp)->t_state == TCPS_ESTABLISHED) { \ ++ tp->t_flags |= TF_DELACK; \ ++ (tp)->rcv_nxt += (ti)->ti_len; \ ++ flags = (ti)->ti_flags & TH_FIN; \ ++ if (so->so_emu) { \ ++ if (tcp_emu((so), (m))) \ ++ sbappend(so, (m)); \ ++ } else \ ++ sbappend((so), (m)); \ ++ } else { \ ++ (flags) = tcp_reass((tp), (ti), (m)); \ ++ tp->t_flags |= TF_ACKNOW; \ ++ } \ ++ } ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti); ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); ++ ++static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, ++ struct mbuf *m) ++{ ++ register struct tcpiphdr *q; ++ struct socket *so = tp->t_socket; ++ int flags; ++ ++ /* ++ * Call with ti==NULL after become established to ++ * force pre-ESTABLISHED data up to user socket. ++ */ ++ if (ti == NULL) ++ goto present; ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); ++ q = tcpiphdr_next(q)) ++ if (SEQ_GT(q->ti_seq, ti->ti_seq)) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { ++ register int i; ++ q = tcpiphdr_prev(q); ++ /* conversion to int (in i) handles seq wraparound */ ++ i = q->ti_seq + q->ti_len - ti->ti_seq; ++ if (i > 0) { ++ if (i >= ti->ti_len) { ++ m_free(m); ++ /* ++ * Try to present any queued data ++ * at the left window edge to the user. ++ * This is needed after the 3-WHS ++ * completes. ++ */ ++ goto present; /* ??? */ ++ } ++ m_adj(m, i); ++ ti->ti_len -= i; ++ ti->ti_seq += i; ++ } ++ q = tcpiphdr_next(q); ++ } ++ ti->ti_mbuf = m; ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (!tcpfrag_list_end(q, tp)) { ++ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; ++ if (i <= 0) ++ break; ++ if (i < q->ti_len) { ++ q->ti_seq += i; ++ q->ti_len -= i; ++ m_adj(q->ti_mbuf, i); ++ break; ++ } ++ q = tcpiphdr_next(q); ++ m = tcpiphdr_prev(q)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(q))); ++ m_free(m); ++ } ++ ++ /* ++ * Stick new segment in its place. ++ */ ++ insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); ++ ++present: ++ /* ++ * Present data to user, advancing rcv_nxt through ++ * completed sequence space. ++ */ ++ if (!TCPS_HAVEESTABLISHED(tp->t_state)) ++ return (0); ++ ti = tcpfrag_list_first(tp); ++ if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) ++ return (0); ++ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) ++ return (0); ++ do { ++ tp->rcv_nxt += ti->ti_len; ++ flags = ti->ti_flags & TH_FIN; ++ remque(tcpiphdr2qlink(ti)); ++ m = ti->ti_mbuf; ++ ti = tcpiphdr_next(ti); ++ if (so->so_state & SS_FCANTSENDMORE) ++ m_free(m); ++ else { ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ } ++ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); ++ return (flags); ++} ++ ++/* ++ * TCP input routine, follows pages 65-76 of the ++ * protocol specification dated September, 1981 very closely. ++ */ ++void tcp_input(struct mbuf *m, int iphlen, struct socket *inso, ++ unsigned short af) ++{ ++ struct ip save_ip, *ip; ++ struct ip6 save_ip6, *ip6; ++ register struct tcpiphdr *ti; ++ char *optp = NULL; ++ int optlen = 0; ++ int len, tlen, off; ++ register struct tcpcb *tp = NULL; ++ register int tiflags; ++ struct socket *so = NULL; ++ int todrop, acked, ourfinisacked, needoutput = 0; ++ int iss = 0; ++ uint32_t tiwin; ++ int ret; ++ struct sockaddr_storage lhost, fhost; ++ struct sockaddr_in *lhost4, *fhost4; ++ struct sockaddr_in6 *lhost6, *fhost6; ++ struct gfwd_list *ex_ptr; ++ Slirp *slirp; ++ ++ DEBUG_CALL("tcp_input"); ++ DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso); ++ ++ /* ++ * If called with m == 0, then we're continuing the connect ++ */ ++ if (m == NULL) { ++ so = inso; ++ slirp = so->slirp; ++ ++ /* Re-set a few variables */ ++ tp = sototcpcb(so); ++ m = so->so_m; ++ so->so_m = NULL; ++ ti = so->so_ti; ++ tiwin = ti->ti_win; ++ tiflags = ti->ti_flags; ++ ++ goto cont_conn; ++ } ++ slirp = m->slirp; ++ ++ ip = mtod(m, struct ip *); ++ ip6 = mtod(m, struct ip6 *); ++ ++ switch (af) { ++ case AF_INET: ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ /* XXX Check if too short */ ++ ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; ++ ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ /* ++ * Checksum extended TCP header and data. ++ */ ++ tlen = ip->ip_len; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src = save_ip.ip_src; ++ ti->ti_dst = save_ip.ip_dst; ++ ti->ti_pr = save_ip.ip_p; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ case AF_INET6: ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip6 = *ip6; ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ tlen = ip6->ip_pl; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src6 = save_ip6.ip_src; ++ ti->ti_dst6 = save_ip6.ip_dst; ++ ti->ti_nh6 = save_ip6.ip_nh; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); ++ if (cksum(m, len)) { ++ goto drop; ++ } ++ ++ /* ++ * Check that TCP offset makes sense, ++ * pull out TCP options and adjust length. XXX ++ */ ++ off = ti->ti_off << 2; ++ if (off < sizeof(struct tcphdr) || off > tlen) { ++ goto drop; ++ } ++ tlen -= off; ++ ti->ti_len = tlen; ++ if (off > sizeof(struct tcphdr)) { ++ optlen = off - sizeof(struct tcphdr); ++ optp = mtod(m, char *) + sizeof(struct tcpiphdr); ++ } ++ tiflags = ti->ti_flags; ++ ++ /* ++ * Convert TCP protocol specific fields to host format. ++ */ ++ NTOHL(ti->ti_seq); ++ NTOHL(ti->ti_ack); ++ NTOHS(ti->ti_win); ++ NTOHS(ti->ti_urp); ++ ++ /* ++ * Drop TCP, IP headers and TCP options. ++ */ ++ m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ ++ /* ++ * Locate pcb for segment. ++ */ ++findso: ++ lhost.ss_family = af; ++ fhost.ss_family = af; ++ switch (af) { ++ case AF_INET: ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ti->ti_src; ++ lhost4->sin_port = ti->ti_sport; ++ fhost4 = (struct sockaddr_in *)&fhost; ++ fhost4->sin_addr = ti->ti_dst; ++ fhost4->sin_port = ti->ti_dport; ++ break; ++ case AF_INET6: ++ lhost6 = (struct sockaddr_in6 *)&lhost; ++ lhost6->sin6_addr = ti->ti_src6; ++ lhost6->sin6_port = ti->ti_sport; ++ fhost6 = (struct sockaddr_in6 *)&fhost; ++ fhost6->sin6_addr = ti->ti_dst6; ++ fhost6->sin6_port = ti->ti_dport; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); ++ ++ /* ++ * If the state is CLOSED (i.e., TCB does not exist) then ++ * all data in the incoming segment is discarded. ++ * If the TCB exists but is in CLOSED state, it is embryonic, ++ * but should either do a listen or a connect soon. ++ * ++ * state == CLOSED means we've done socreate() but haven't ++ * attached it to a protocol yet... ++ * ++ * XXX If a TCB does not exist, and the TH_SYN flag is ++ * the only flag set, then create a session, mark it ++ * as if it was LISTENING, and continue... ++ */ ++ if (so == NULL) { ++ /* TODO: IPv6 */ ++ if (slirp->restricted) { ++ /* Any hostfwds will have an existing socket, so we only get here ++ * for non-hostfwd connections. These should be dropped, unless it ++ * happens to be a guestfwd. ++ */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == ti->ti_dport && ++ ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ goto dropwithreset; ++ } ++ } ++ ++ if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN) ++ goto dropwithreset; ++ ++ so = socreate(slirp); ++ tcp_attach(so); ++ ++ sbreserve(&so->so_snd, TCP_SNDSPACE); ++ sbreserve(&so->so_rcv, TCP_RCVSPACE); ++ ++ so->lhost.ss = lhost; ++ so->fhost.ss = fhost; ++ ++ so->so_iptos = tcp_tos(so); ++ if (so->so_iptos == 0) { ++ switch (af) { ++ case AF_INET: ++ so->so_iptos = ((struct ip *)ti)->ip_tos; ++ break; ++ case AF_INET6: ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ ++ tp = sototcpcb(so); ++ tp->t_state = TCPS_LISTEN; ++ } ++ ++ /* ++ * If this is a still-connecting socket, this probably ++ * a retransmit of the SYN. Whether it's a retransmit SYN ++ * or something else, we nuke it. ++ */ ++ if (so->so_state & SS_ISFCONNECTING) ++ goto drop; ++ ++ tp = sototcpcb(so); ++ ++ /* XXX Should never fail */ ++ if (tp == NULL) ++ goto dropwithreset; ++ if (tp->t_state == TCPS_CLOSED) ++ goto drop; ++ ++ tiwin = ti->ti_win; ++ ++ /* ++ * Segment received on connection. ++ * Reset idle time and keep-alive timer. ++ */ ++ tp->t_idle = 0; ++ if (slirp_do_keepalive) ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ ++ /* ++ * Process options if not in LISTEN state, ++ * else do it below (after getting remote address). ++ */ ++ if (optp && tp->t_state != TCPS_LISTEN) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ /* ++ * Header prediction: check for the two common cases ++ * of a uni-directional data xfer. If the packet has ++ * no control flags, is in-sequence, the window didn't ++ * change and we're not retransmitting, it's a ++ * candidate. If the length is zero and the ack moved ++ * forward, we're the sender side of the xfer. Just ++ * free the data acked & wake any higher level process ++ * that was blocked waiting for space. If the length ++ * is non-zero and the ack didn't move, we're the ++ * receiver side. If we're getting packets in-order ++ * (the reassembly queue is empty), add the data to ++ * the socket buffer and note that we need a delayed ack. ++ * ++ * XXX Some of these tests are not needed ++ * eg: the tiwin == tp->snd_wnd prevents many more ++ * predictions.. with no *real* advantage.. ++ */ ++ if (tp->t_state == TCPS_ESTABLISHED && ++ (tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK && ++ ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && ++ tp->snd_nxt == tp->snd_max) { ++ if (ti->ti_len == 0) { ++ if (SEQ_GT(ti->ti_ack, tp->snd_una) && ++ SEQ_LEQ(ti->ti_ack, tp->snd_max) && ++ tp->snd_cwnd >= tp->snd_wnd) { ++ /* ++ * this is a pure ack for outstanding data. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ acked = ti->ti_ack - tp->snd_una; ++ sodrop(so, acked); ++ tp->snd_una = ti->ti_ack; ++ m_free(m); ++ ++ /* ++ * If all outstanding data are acked, stop ++ * retransmit timer, otherwise restart timer ++ * using current (possibly backed-off) value. ++ * If process is waiting for space, ++ * wakeup/selwakeup/signal. If data ++ * are ready to send, let tcp_output ++ * decide between more output or persist. ++ */ ++ if (tp->snd_una == tp->snd_max) ++ tp->t_timer[TCPT_REXMT] = 0; ++ else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ ++ /* ++ * This is called because sowwakeup might have ++ * put data into so_snd. Since we don't so sowwakeup, ++ * we don't need this.. XXX??? ++ */ ++ if (so->so_snd.sb_cc) ++ (void)tcp_output(tp); ++ ++ return; ++ } ++ } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && ++ ti->ti_len <= sbspace(&so->so_rcv)) { ++ /* ++ * this is a pure, in-sequence data packet ++ * with nothing on the reassembly queue and ++ * we have enough buffer space to take it. ++ */ ++ tp->rcv_nxt += ti->ti_len; ++ /* ++ * Add data to socket buffer. ++ */ ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ ++ /* ++ * If this is a short packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ * ++ * It is better to not delay acks at all to maximize ++ * TCP throughput. See RFC 2581. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ } ++ } /* header prediction */ ++ /* ++ * Calculate amount of space in receive window, ++ * and then do TCP input processing. ++ * Receive window is amount of space in rcv queue, ++ * but not less than advertised window. ++ */ ++ { ++ int win; ++ win = sbspace(&so->so_rcv); ++ if (win < 0) ++ win = 0; ++ tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); ++ } ++ ++ switch (tp->t_state) { ++ /* ++ * If the state is LISTEN then ignore segment if it contains an RST. ++ * If the segment contains an ACK then it is bad and send a RST. ++ * If it does not contain a SYN then it is not interesting; drop it. ++ * Don't bother responding if the destination was a broadcast. ++ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial ++ * tp->iss, and send a segment: ++ * ++ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. ++ * Fill in remote peer address fields if not previously specified. ++ * Enter SYN_RECEIVED state, and process any other fields of this ++ * segment in this state. ++ */ ++ case TCPS_LISTEN: { ++ if (tiflags & TH_RST) ++ goto drop; ++ if (tiflags & TH_ACK) ++ goto dropwithreset; ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ ++ /* ++ * This has way too many gotos... ++ * But a bit of spaghetti code never hurt anybody :) ++ */ ++ ++ /* ++ * If this is destined for the control address, then flag to ++ * tcp_ctl once connected, otherwise connect ++ */ ++ /* TODO: IPv6 */ ++ if (af == AF_INET && ++ (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && ++ so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { ++ /* May be an add exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ so->so_state |= SS_CTL; ++ break; ++ } ++ } ++ if (so->so_state & SS_CTL) { ++ goto cont_input; ++ } ++ } ++ /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ ++ } ++ ++ if (so->so_emu & EMU_NOCONNECT) { ++ so->so_emu &= ~EMU_NOCONNECT; ++ goto cont_input; ++ } ++ ++ if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) && ++ (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { ++ uint8_t code; ++ DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); ++ if (errno == ECONNREFUSED) { ++ /* ACK the SYN, send RST to refuse the connection */ ++ tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } else { ++ switch (af) { ++ case AF_INET: ++ code = ICMP_UNREACH_NET; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_NO_ROUTE; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ HTONL(ti->ti_seq); /* restore tcp header */ ++ HTONL(ti->ti_ack); ++ HTONS(ti->ti_win); ++ HTONS(ti->ti_urp); ++ m->m_data -= ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ switch (af) { ++ case AF_INET: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ *ip = save_ip; ++ icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); ++ break; ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ *ip6 = save_ip6; ++ icmp6_send_error(m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ tcp_close(tp); ++ m_free(m); ++ } else { ++ /* ++ * Haven't connected yet, save the current mbuf ++ * and ti, and return ++ * XXX Some OS's don't tell us whether the connect() ++ * succeeded or not. So we must time it out. ++ */ ++ so->so_m = m; ++ so->so_ti = ti; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ /* ++ * Initialize receive sequence numbers now so that we can send a ++ * valid RST if the remote end rejects our connection. ++ */ ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tcp_template(tp); ++ } ++ return; ++ ++ cont_conn: ++ /* m==NULL ++ * Check if the connect succeeded ++ */ ++ if (so->so_state & SS_NOFDREF) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ cont_input: ++ tcp_template(tp); ++ ++ if (optp) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ if (iss) ++ tp->iss = iss; ++ else ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tp->irs = ti->ti_seq; ++ tcp_sendseqinit(tp); ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ goto trimthenstep6; ++ } /* case TCPS_LISTEN */ ++ ++ /* ++ * If the state is SYN_SENT: ++ * if seg contains an ACK, but not for our SYN, drop the input. ++ * if seg contains a RST, then drop the connection. ++ * if seg does not contain SYN, then drop it. ++ * Otherwise this is an acceptable SYN segment ++ * initialize tp->rcv_nxt and tp->irs ++ * if seg contains ack then advance tp->snd_una ++ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state ++ * arrange for segment to be acked (eventually) ++ * continue processing rest of data/controls, beginning with URG ++ */ ++ case TCPS_SYN_SENT: ++ if ((tiflags & TH_ACK) && ++ (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) ++ goto dropwithreset; ++ ++ if (tiflags & TH_RST) { ++ if (tiflags & TH_ACK) { ++ tcp_drop(tp, 0); /* XXX Check t_softerror! */ ++ } ++ goto drop; ++ } ++ ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ if (tiflags & TH_ACK) { ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ } ++ ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { ++ soisfconnected(so); ++ tp->t_state = TCPS_ESTABLISHED; ++ ++ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ /* ++ * if we didn't have to retransmit the SYN, ++ * use its rtt as our initial srtt & rtt var. ++ */ ++ if (tp->t_rtt) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ } else ++ tp->t_state = TCPS_SYN_RECEIVED; ++ ++ trimthenstep6: ++ /* ++ * Advance ti->ti_seq to correspond to first data byte. ++ * If data, trim to stay within window, ++ * dropping FIN if necessary. ++ */ ++ ti->ti_seq++; ++ if (ti->ti_len > tp->rcv_wnd) { ++ todrop = ti->ti_len - tp->rcv_wnd; ++ m_adj(m, -todrop); ++ ti->ti_len = tp->rcv_wnd; ++ tiflags &= ~TH_FIN; ++ } ++ tp->snd_wl1 = ti->ti_seq - 1; ++ tp->rcv_up = ti->ti_seq; ++ goto step6; ++ } /* switch tp->t_state */ ++ /* ++ * States other than LISTEN or SYN_SENT. ++ * Check that at least some bytes of segment are within ++ * receive window. If segment begins before rcv_nxt, ++ * drop leading data (and SYN); if nothing left, just ack. ++ */ ++ todrop = tp->rcv_nxt - ti->ti_seq; ++ if (todrop > 0) { ++ if (tiflags & TH_SYN) { ++ tiflags &= ~TH_SYN; ++ ti->ti_seq++; ++ if (ti->ti_urp > 1) ++ ti->ti_urp--; ++ else ++ tiflags &= ~TH_URG; ++ todrop--; ++ } ++ /* ++ * Following if statement from Stevens, vol. 2, p. 960. ++ */ ++ if (todrop > ti->ti_len || ++ (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { ++ /* ++ * Any valid FIN must be to the left of the window. ++ * At this point the FIN must be a duplicate or out ++ * of sequence; drop it. ++ */ ++ tiflags &= ~TH_FIN; ++ ++ /* ++ * Send an ACK to resynchronize and drop any data. ++ * But keep on processing for RST or ACK. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ todrop = ti->ti_len; ++ } ++ m_adj(m, todrop); ++ ti->ti_seq += todrop; ++ ti->ti_len -= todrop; ++ if (ti->ti_urp > todrop) ++ ti->ti_urp -= todrop; ++ else { ++ tiflags &= ~TH_URG; ++ ti->ti_urp = 0; ++ } ++ } ++ /* ++ * If new data are received on a connection after the ++ * user processes are gone, then RST the other end. ++ */ ++ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && ++ ti->ti_len) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If segment ends after window, drop trailing data ++ * (and PUSH and FIN); if nothing left, just ACK. ++ */ ++ todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd); ++ if (todrop > 0) { ++ if (todrop >= ti->ti_len) { ++ /* ++ * If a new connection request is received ++ * while in TIME_WAIT, drop the old connection ++ * and start over if the sequence numbers ++ * are above the previous ones. ++ */ ++ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && ++ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { ++ iss = tp->rcv_nxt + TCP_ISSINCR; ++ tp = tcp_close(tp); ++ goto findso; ++ } ++ /* ++ * If window is closed can only take segments at ++ * window edge, and have to drop data and PUSH from ++ * incoming segments. Continue processing, but ++ * remember to ack. Otherwise, drop segment ++ * and ack. ++ */ ++ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { ++ tp->t_flags |= TF_ACKNOW; ++ } else { ++ goto dropafterack; ++ } ++ } ++ m_adj(m, -todrop); ++ ti->ti_len -= todrop; ++ tiflags &= ~(TH_PUSH | TH_FIN); ++ } ++ ++ /* ++ * If the RST bit is set examine the state: ++ * SYN_RECEIVED STATE: ++ * If passive open, return to LISTEN state. ++ * If active open, inform user that connection was refused. ++ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: ++ * Inform user that connection was reset, and close tcb. ++ * CLOSING, LAST_ACK, TIME_WAIT STATES ++ * Close the tcb. ++ */ ++ if (tiflags & TH_RST) ++ switch (tp->t_state) { ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ goto drop; ++ ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ tcp_close(tp); ++ goto drop; ++ } ++ ++ /* ++ * If a SYN is in the window, then this is an ++ * error and we send an RST and drop the connection. ++ */ ++ if (tiflags & TH_SYN) { ++ tp = tcp_drop(tp, 0); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If the ACK bit is off we drop the segment and return. ++ */ ++ if ((tiflags & TH_ACK) == 0) ++ goto drop; ++ ++ /* ++ * Ack processing. ++ */ ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED state if the ack ACKs our SYN then enter ++ * ESTABLISHED state and continue processing, otherwise ++ * send an RST. una<=ack<=max ++ */ ++ case TCPS_SYN_RECEIVED: ++ ++ if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) ++ goto dropwithreset; ++ tp->t_state = TCPS_ESTABLISHED; ++ /* ++ * The sent SYN is ack'ed with our sequence number +1 ++ * The first data byte already in the buffer will get ++ * lost if no correction is made. This is only needed for ++ * SS_CTL since the buffer is empty otherwise. ++ * tp->snd_una++; or: ++ */ ++ tp->snd_una = ti->ti_ack; ++ if (so->so_state & SS_CTL) { ++ /* So tcp_ctl reports the right state */ ++ ret = tcp_ctl(so); ++ if (ret == 1) { ++ soisfconnected(so); ++ so->so_state &= ~SS_CTL; /* success XXX */ ++ } else if (ret == 2) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* CTL_CMD */ ++ } else { ++ needoutput = 1; ++ tp->t_state = TCPS_FIN_WAIT_1; ++ } ++ } else { ++ soisfconnected(so); ++ } ++ ++ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ tp->snd_wl1 = ti->ti_seq - 1; ++ /* Avoid ack processing; snd_una==ti_ack => dup ack */ ++ goto synrx_to_est; ++ /* fall into ... */ ++ ++ /* ++ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range ++ * ACKs. If the ack is in the range ++ * tp->snd_una < ti->ti_ack <= tp->snd_max ++ * then advance tp->snd_una to ti->ti_ack and drop ++ * data from the retransmission queue. If this ACK reflects ++ * more up to date window information we update our window information. ++ */ ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ ++ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { ++ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { ++ DEBUG_MISC(" dup ack m = %p so = %p", m, so); ++ /* ++ * If we have outstanding data (other than ++ * a window probe), this is a completely ++ * duplicate ack (ie, window info didn't ++ * change), the ack is the biggest we've ++ * seen and we've seen exactly our rexmt ++ * threshold of them, assume a packet ++ * has been dropped and retransmit it. ++ * Kludge snd_nxt & the congestion ++ * window so we send only this one ++ * packet. ++ * ++ * We know we're losing at the current ++ * window size so do congestion avoidance ++ * (set ssthresh to half the current window ++ * and pull our congestion window back to ++ * the new ssthresh). ++ * ++ * Dup acks mean that packets have left the ++ * network (they're now cached at the receiver) ++ * so bump cwnd by the amount in the receiver ++ * to keep a constant cwnd packets in the ++ * network. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) ++ tp->t_dupacks = 0; ++ else if (++tp->t_dupacks == TCPREXMTTHRESH) { ++ tcp_seq onxt = tp->snd_nxt; ++ unsigned win = ++ MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ ++ if (win < 2) ++ win = 2; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->t_rtt = 0; ++ tp->snd_nxt = ti->ti_ack; ++ tp->snd_cwnd = tp->t_maxseg; ++ (void)tcp_output(tp); ++ tp->snd_cwnd = ++ tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; ++ if (SEQ_GT(onxt, tp->snd_nxt)) ++ tp->snd_nxt = onxt; ++ goto drop; ++ } else if (tp->t_dupacks > TCPREXMTTHRESH) { ++ tp->snd_cwnd += tp->t_maxseg; ++ (void)tcp_output(tp); ++ goto drop; ++ } ++ } else ++ tp->t_dupacks = 0; ++ break; ++ } ++ synrx_to_est: ++ /* ++ * If the congestion window was inflated to account ++ * for the other side's cached packets, retract it. ++ */ ++ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) ++ tp->snd_cwnd = tp->snd_ssthresh; ++ tp->t_dupacks = 0; ++ if (SEQ_GT(ti->ti_ack, tp->snd_max)) { ++ goto dropafterack; ++ } ++ acked = ti->ti_ack - tp->snd_una; ++ ++ /* ++ * If transmit timer is running and timed sequence ++ * number was acked, update smoothed round trip time. ++ * Since we now have an rtt measurement, cancel the ++ * timer backoff (cf., Phil Karn's retransmit alg.). ++ * Recompute the initial retransmit timer. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ ++ /* ++ * If all outstanding data is acked, stop retransmit ++ * timer and remember to restart (more output or persist). ++ * If there is more data to be acked, restart retransmit ++ * timer, using current (possibly backed-off) value. ++ */ ++ if (ti->ti_ack == tp->snd_max) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ needoutput = 1; ++ } else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * When new data is acked, open the congestion window. ++ * If the window gives us less than ssthresh packets ++ * in flight, open exponentially (maxseg per packet). ++ * Otherwise open linearly: maxseg per window ++ * (maxseg^2 / cwnd per packet). ++ */ ++ { ++ register unsigned cw = tp->snd_cwnd; ++ register unsigned incr = tp->t_maxseg; ++ ++ if (cw > tp->snd_ssthresh) ++ incr = incr * incr / cw; ++ tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); ++ } ++ if (acked > so->so_snd.sb_cc) { ++ tp->snd_wnd -= so->so_snd.sb_cc; ++ sodrop(so, (int)so->so_snd.sb_cc); ++ ourfinisacked = 1; ++ } else { ++ sodrop(so, acked); ++ tp->snd_wnd -= acked; ++ ourfinisacked = 0; ++ } ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ ++ switch (tp->t_state) { ++ /* ++ * In FIN_WAIT_1 STATE in addition to the processing ++ * for the ESTABLISHED state if our FIN is now acknowledged ++ * then enter FIN_WAIT_2. ++ */ ++ case TCPS_FIN_WAIT_1: ++ if (ourfinisacked) { ++ /* ++ * If we can't receive any more ++ * data, then closing user can proceed. ++ * Starting the timer is contrary to the ++ * specification, but if we don't get a FIN ++ * we'll hang forever. ++ */ ++ if (so->so_state & SS_FCANTRCVMORE) { ++ tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; ++ } ++ tp->t_state = TCPS_FIN_WAIT_2; ++ } ++ break; ++ ++ /* ++ * In CLOSING STATE in addition to the processing for ++ * the ESTABLISHED state if the ACK acknowledges our FIN ++ * then enter the TIME-WAIT state, otherwise ignore ++ * the segment. ++ */ ++ case TCPS_CLOSING: ++ if (ourfinisacked) { ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ } ++ break; ++ ++ /* ++ * In LAST_ACK, we may still be waiting for data to drain ++ * and/or to be acked, as well as for the ack of our FIN. ++ * If our FIN is now acknowledged, delete the TCB, ++ * enter the closed state and return. ++ */ ++ case TCPS_LAST_ACK: ++ if (ourfinisacked) { ++ tcp_close(tp); ++ goto drop; ++ } ++ break; ++ ++ /* ++ * In TIME_WAIT state the only thing that should arrive ++ * is a retransmission of the remote FIN. Acknowledge ++ * it and restart the finack timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ goto dropafterack; ++ } ++ } /* switch(tp->t_state) */ ++ ++step6: ++ /* ++ * Update window information. ++ * Don't look at window if no ACK: TAC's send garbage on first SYN. ++ */ ++ if ((tiflags & TH_ACK) && ++ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || ++ (tp->snd_wl1 == ti->ti_seq && ++ (SEQ_LT(tp->snd_wl2, ti->ti_ack) || ++ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { ++ tp->snd_wnd = tiwin; ++ tp->snd_wl1 = ti->ti_seq; ++ tp->snd_wl2 = ti->ti_ack; ++ if (tp->snd_wnd > tp->max_sndwnd) ++ tp->max_sndwnd = tp->snd_wnd; ++ needoutput = 1; ++ } ++ ++ /* ++ * Process segments with URG. ++ */ ++ if ((tiflags & TH_URG) && ti->ti_urp && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * This is a kludge, but if we receive and accept ++ * random urgent pointers, we'll crash in ++ * soreceive. It's hard to imagine someone ++ * actually wanting to send this much urgent data. ++ */ ++ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { ++ ti->ti_urp = 0; ++ tiflags &= ~TH_URG; ++ goto dodata; ++ } ++ /* ++ * If this segment advances the known urgent pointer, ++ * then mark the data stream. This should not happen ++ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since ++ * a FIN has been received from the remote side. ++ * In these states we ignore the URG. ++ * ++ * According to RFC961 (Assigned Protocols), ++ * the urgent pointer points to the last octet ++ * of urgent data. We continue, however, ++ * to consider it to indicate the first octet ++ * of data past the urgent section as the original ++ * spec states (in one of two places). ++ */ ++ if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) { ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ so->so_urgc = ++ so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ } ++ } else ++ /* ++ * If no out of band data is expected, ++ * pull receive urgent pointer along ++ * with the receive window. ++ */ ++ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) ++ tp->rcv_up = tp->rcv_nxt; ++dodata: ++ ++ /* ++ * If this is a small packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ */ ++ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && ++ ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { ++ tp->t_flags |= TF_ACKNOW; ++ } ++ ++ /* ++ * Process the segment text, merging it into the TCP sequencing queue, ++ * and arranging for acknowledgment of receipt if necessary. ++ * This process logically involves adjusting tp->rcv_wnd as data ++ * is presented to the user (this happens in tcp_usrreq.c, ++ * case PRU_RCVD). If a FIN has already been received on this ++ * connection then we just ignore the text. ++ */ ++ if ((ti->ti_len || (tiflags & TH_FIN)) && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ TCP_REASS(tp, ti, m, so, tiflags); ++ } else { ++ m_free(m); ++ tiflags &= ~TH_FIN; ++ } ++ ++ /* ++ * If FIN is received ACK the FIN and let the user know ++ * that the connection is closing. ++ */ ++ if (tiflags & TH_FIN) { ++ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * If we receive a FIN we can't send more data, ++ * set it SS_FDRAIN ++ * Shutdown the socket if there is no rx data in the ++ * buffer. ++ * soread() is called on completion of shutdown() and ++ * will got to TCPS_LAST_ACK, and use tcp_output() ++ * to send the FIN. ++ */ ++ sofwdrain(so); ++ ++ tp->t_flags |= TF_ACKNOW; ++ tp->rcv_nxt++; ++ } ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED and ESTABLISHED STATES ++ * enter the CLOSE_WAIT state. ++ */ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ if (so->so_emu == EMU_CTL) /* no shutdown on socket */ ++ tp->t_state = TCPS_LAST_ACK; ++ else ++ tp->t_state = TCPS_CLOSE_WAIT; ++ break; ++ ++ /* ++ * If still in FIN_WAIT_1 STATE FIN has not been acked so ++ * enter the CLOSING state. ++ */ ++ case TCPS_FIN_WAIT_1: ++ tp->t_state = TCPS_CLOSING; ++ break; ++ ++ /* ++ * In FIN_WAIT_2 state enter the TIME_WAIT state, ++ * starting the time-wait timer, turning off the other ++ * standard timers. ++ */ ++ case TCPS_FIN_WAIT_2: ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ ++ /* ++ * In TIME_WAIT state restart the 2 MSL time_wait timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ } ++ } ++ ++ /* ++ * Return any desired output. ++ */ ++ if (needoutput || (tp->t_flags & TF_ACKNOW)) { ++ (void)tcp_output(tp); ++ } ++ return; ++ ++dropafterack: ++ /* ++ * Generate an ACK dropping incoming segment if it occupies ++ * sequence space, where the ACK reflects our state. ++ */ ++ if (tiflags & TH_RST) ++ goto drop; ++ m_free(m); ++ tp->t_flags |= TF_ACKNOW; ++ (void)tcp_output(tp); ++ return; ++ ++dropwithreset: ++ /* reuses m if m!=NULL, m_free() unnecessary */ ++ if (tiflags & TH_ACK) ++ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); ++ else { ++ if (tiflags & TH_SYN) ++ ti->ti_len++; ++ tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } ++ ++ return; ++ ++drop: ++ /* ++ * Drop space held by incoming segment and return. ++ */ ++ m_free(m); ++} ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti) ++{ ++ uint16_t mss; ++ int opt, optlen; ++ ++ DEBUG_CALL("tcp_dooptions"); ++ DEBUG_ARG("tp = %p cnt=%i", tp, cnt); ++ ++ for (; cnt > 0; cnt -= optlen, cp += optlen) { ++ opt = cp[0]; ++ if (opt == TCPOPT_EOL) ++ break; ++ if (opt == TCPOPT_NOP) ++ optlen = 1; ++ else { ++ optlen = cp[1]; ++ if (optlen <= 0) ++ break; ++ } ++ switch (opt) { ++ default: ++ continue; ++ ++ case TCPOPT_MAXSEG: ++ if (optlen != TCPOLEN_MAXSEG) ++ continue; ++ if (!(ti->ti_flags & TH_SYN)) ++ continue; ++ memcpy((char *)&mss, (char *)cp + 2, sizeof(mss)); ++ NTOHS(mss); ++ (void)tcp_mss(tp, mss); /* sets t_maxseg */ ++ break; ++ } ++ } ++} ++ ++/* ++ * Collect new round-trip time estimate ++ * and update averages and current timeout. ++ */ ++ ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt) ++{ ++ register short delta; ++ ++ DEBUG_CALL("tcp_xmit_timer"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("rtt = %d", rtt); ++ ++ if (tp->t_srtt != 0) { ++ /* ++ * srtt is stored as fixed point with 3 bits after the ++ * binary point (i.e., scaled by 8). The following magic ++ * is equivalent to the smoothing algorithm in rfc793 with ++ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed ++ * point). Adjust rtt to origin 0. ++ */ ++ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); ++ if ((tp->t_srtt += delta) <= 0) ++ tp->t_srtt = 1; ++ /* ++ * We accumulate a smoothed rtt variance (actually, a ++ * smoothed mean difference), then set the retransmit ++ * timer to smoothed rtt + 4 times the smoothed variance. ++ * rttvar is stored as fixed point with 2 bits after the ++ * binary point (scaled by 4). The following is ++ * equivalent to rfc793 smoothing with an alpha of .75 ++ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces ++ * rfc793's wired-in beta. ++ */ ++ if (delta < 0) ++ delta = -delta; ++ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); ++ if ((tp->t_rttvar += delta) <= 0) ++ tp->t_rttvar = 1; ++ } else { ++ /* ++ * No rtt measurement yet - use the unsmoothed rtt. ++ * Set the variance to half the rtt (so our first ++ * retransmit happens at 3*rtt). ++ */ ++ tp->t_srtt = rtt << TCP_RTT_SHIFT; ++ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); ++ } ++ tp->t_rtt = 0; ++ tp->t_rxtshift = 0; ++ ++ /* ++ * the retransmit should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ */ ++ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ ++ /* ++ * We received an ack for a packet that wasn't retransmitted; ++ * it is probably safe to discard any error indications we've ++ * received recently. This isn't quite right, but close enough ++ * for now (a route might have failed after we sent a segment, ++ * and the return path might not be symmetrical). ++ */ ++ tp->t_softerror = 0; ++} ++ ++/* ++ * Determine a reasonable value for maxseg size. ++ * If the route is known, check route for mtu. ++ * If none, use an mss that can be handled on the outgoing ++ * interface without forcing IP to fragment; if bigger than ++ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES ++ * to utilize large mbufs. If no route is found, route has no mtu, ++ * or the destination isn't local, use a default, hopefully conservative ++ * size (usually 512 or the default IP max size, but no more than the mtu ++ * of the interface), as we can't discover anything about intervening ++ * gateways or networks. We also initialize the congestion/slow start ++ * window to be a single segment if the destination isn't local. ++ * While looking at the routing entry, we also initialize other path-dependent ++ * parameters from pre-set or cached values in the routing entry. ++ */ ++ ++int tcp_mss(struct tcpcb *tp, unsigned offer) ++{ ++ struct socket *so = tp->t_socket; ++ int mss; ++ ++ DEBUG_CALL("tcp_mss"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("offer = %d", offer); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip); ++ break; ++ case AF_INET6: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip6); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (offer) ++ mss = MIN(mss, offer); ++ mss = MAX(mss, 32); ++ if (mss < tp->t_maxseg || offer != 0) ++ tp->t_maxseg = MIN(mss, TCP_MAXSEG_MAX); ++ ++ tp->snd_cwnd = mss; ++ ++ sbreserve(&so->so_snd, ++ TCP_SNDSPACE + ++ ((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0)); ++ sbreserve(&so->so_rcv, ++ TCP_RCVSPACE + ++ ((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0)); ++ ++ DEBUG_MISC(" returning mss = %d", mss); ++ ++ return mss; ++} +diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c +new file mode 100644 +index 0000000..383fe31 +--- /dev/null ++++ b/slirp/src/tcp_output.c +@@ -0,0 +1,516 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 ++ * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t tcp_outflags[TCP_NSTATES] = { ++ TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK, ++ TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK, ++ TH_FIN | TH_ACK, TH_ACK, TH_ACK, ++}; ++ ++ ++#undef MAX_TCPOPTLEN ++#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ ++ ++/* ++ * Tcp output routine: figure out what should be sent and send it. ++ */ ++int tcp_output(struct tcpcb *tp) ++{ ++ register struct socket *so = tp->t_socket; ++ register long len, win; ++ int off, flags, error; ++ register struct mbuf *m; ++ register struct tcpiphdr *ti, tcpiph_save; ++ struct ip *ip; ++ struct ip6 *ip6; ++ uint8_t opt[MAX_TCPOPTLEN]; ++ unsigned optlen, hdrlen; ++ int idle, sendalot; ++ ++ DEBUG_CALL("tcp_output"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* ++ * Determine length of data that should be transmitted, ++ * and flags that will be used. ++ * If there is some data or critical controls (SYN, RST) ++ * to send, then transmit; otherwise, investigate further. ++ */ ++ idle = (tp->snd_max == tp->snd_una); ++ if (idle && tp->t_idle >= tp->t_rxtcur) ++ /* ++ * We have been idle for "a while" and no acks are ++ * expected to clock out any data we send -- ++ * slow start to get ack "clock" running again. ++ */ ++ tp->snd_cwnd = tp->t_maxseg; ++again: ++ sendalot = 0; ++ off = tp->snd_nxt - tp->snd_una; ++ win = MIN(tp->snd_wnd, tp->snd_cwnd); ++ ++ flags = tcp_outflags[tp->t_state]; ++ ++ DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); ++ ++ /* ++ * If in persist timeout with window of 0, send 1 byte. ++ * Otherwise, if window is small but nonzero ++ * and timer expired, we will send what we can ++ * and go to transmit state. ++ */ ++ if (tp->t_force) { ++ if (win == 0) { ++ /* ++ * If we still have some data to send, then ++ * clear the FIN bit. Usually this would ++ * happen below when it realizes that we ++ * aren't sending all the data. However, ++ * if we have exactly 1 byte of unset data, ++ * then it won't clear the FIN bit below, ++ * and if we are in persist state, we wind ++ * up sending the packet without recording ++ * that we sent the FIN bit. ++ * ++ * We can't just blindly clear the FIN bit, ++ * because if we don't have any more data ++ * to send then the probe will be the FIN ++ * itself. ++ */ ++ if (off < so->so_snd.sb_cc) ++ flags &= ~TH_FIN; ++ win = 1; ++ } else { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ ++ len = MIN(so->so_snd.sb_cc, win) - off; ++ ++ if (len < 0) { ++ /* ++ * If FIN has been sent but not acked, ++ * but we haven't been called to retransmit, ++ * len will be -1. Otherwise, window shrank ++ * after we sent into it. If window shrank to 0, ++ * cancel pending retransmit and pull snd_nxt ++ * back to (closed) window. We will enter persist ++ * state below. If the window didn't close completely, ++ * just wait for an ACK. ++ */ ++ len = 0; ++ if (win == 0) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->snd_nxt = tp->snd_una; ++ } ++ } ++ ++ if (len > tp->t_maxseg) { ++ len = tp->t_maxseg; ++ sendalot = 1; ++ } ++ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) ++ flags &= ~TH_FIN; ++ ++ win = sbspace(&so->so_rcv); ++ ++ /* ++ * Sender silly window avoidance. If connection is idle ++ * and can send all data, a maximum segment, ++ * at least a maximum default-size segment do it, ++ * or are forced, do it; otherwise don't bother. ++ * If peer's buffer is tiny, then send ++ * when window is at least half open. ++ * If retransmitting (possibly after persist timer forced us ++ * to send into a small window), then must resend. ++ */ ++ if (len) { ++ if (len == tp->t_maxseg) ++ goto send; ++ if ((1 || idle || tp->t_flags & TF_NODELAY) && ++ len + off >= so->so_snd.sb_cc) ++ goto send; ++ if (tp->t_force) ++ goto send; ++ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) ++ goto send; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) ++ goto send; ++ } ++ ++ /* ++ * Compare available window to amount of window ++ * known to peer (as advertised window less ++ * next expected input). If the difference is at least two ++ * max size segments, or at least 50% of the maximum possible ++ * window, then want to send a window update to peer. ++ */ ++ if (win > 0) { ++ /* ++ * "adv" is the amount we can increase the window, ++ * taking into account that we are limited by ++ * TCP_MAXWIN << tp->rcv_scale. ++ */ ++ long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - ++ (tp->rcv_adv - tp->rcv_nxt); ++ ++ if (adv >= (long)(2 * tp->t_maxseg)) ++ goto send; ++ if (2 * adv >= (long)so->so_rcv.sb_datalen) ++ goto send; ++ } ++ ++ /* ++ * Send if we owe peer an ACK. ++ */ ++ if (tp->t_flags & TF_ACKNOW) ++ goto send; ++ if (flags & (TH_SYN | TH_RST)) ++ goto send; ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) ++ goto send; ++ /* ++ * If our state indicates that FIN should be sent ++ * and we have not yet done so, or we're retransmitting the FIN, ++ * then we need to send. ++ */ ++ if (flags & TH_FIN && ++ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) ++ goto send; ++ ++ /* ++ * TCP window updates are not reliable, rather a polling protocol ++ * using ``persist'' packets is used to insure receipt of window ++ * updates. The three ``states'' for the output side are: ++ * idle not doing retransmits or persists ++ * persisting to move a small or zero window ++ * (re)transmitting and thereby not persisting ++ * ++ * tp->t_timer[TCPT_PERSIST] ++ * is set when we are in persist state. ++ * tp->t_force ++ * is set when we are called to send a persist packet. ++ * tp->t_timer[TCPT_REXMT] ++ * is set when we are retransmitting ++ * The output side is idle when both timers are zero. ++ * ++ * If send window is too small, there is data to transmit, and no ++ * retransmit or persist is pending, then go to persist state. ++ * If nothing happens soon, send when timer expires: ++ * if window is nonzero, transmit what we can, ++ * otherwise force out a byte. ++ */ ++ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && ++ tp->t_timer[TCPT_PERSIST] == 0) { ++ tp->t_rxtshift = 0; ++ tcp_setpersist(tp); ++ } ++ ++ /* ++ * No reason to send a segment, just return. ++ */ ++ return (0); ++ ++send: ++ /* ++ * Before ESTABLISHED, force sending of initial options ++ * unless TCP set not to do any options. ++ * NOTE: we assume that the IP/TCP header plus TCP options ++ * always fit in a single mbuf, leaving room for a maximum ++ * link header, i.e. ++ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN ++ */ ++ optlen = 0; ++ hdrlen = sizeof(struct tcpiphdr); ++ if (flags & TH_SYN) { ++ tp->snd_nxt = tp->iss; ++ if ((tp->t_flags & TF_NOOPT) == 0) { ++ uint16_t mss; ++ ++ opt[0] = TCPOPT_MAXSEG; ++ opt[1] = 4; ++ mss = htons((uint16_t)tcp_mss(tp, 0)); ++ memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); ++ optlen = 4; ++ } ++ } ++ ++ hdrlen += optlen; ++ ++ /* ++ * Adjust data length if insertion of options will ++ * bump the packet length beyond the t_maxseg length. ++ */ ++ if (len > tp->t_maxseg - optlen) { ++ len = tp->t_maxseg - optlen; ++ sendalot = 1; ++ } ++ ++ /* ++ * Grab a header mbuf, attaching a copy of data to ++ * be transmitted, and initialize the header from ++ * the template for sends on this connection. ++ */ ++ if (len) { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ ++ sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen); ++ m->m_len += len; ++ ++ /* ++ * If we're sending everything we've got, set PUSH. ++ * (This will keep happy those implementations which only ++ * give data to the user when a buffer fills or ++ * a PUSH comes in.) ++ */ ++ if (off + len == so->so_snd.sb_cc) ++ flags |= TH_PUSH; ++ } else { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ } ++ ++ ti = mtod(m, struct tcpiphdr *); ++ ++ memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr)); ++ ++ /* ++ * Fill in fields, remembering maximum advertised ++ * window for use in delaying messages about window sizes. ++ * If resending a FIN, be sure not to use a new sequence number. ++ */ ++ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && ++ tp->snd_nxt == tp->snd_max) ++ tp->snd_nxt--; ++ /* ++ * If we are doing retransmissions, then snd_nxt will ++ * not reflect the first unsent octet. For ACK only ++ * packets, we do not want the sequence number of the ++ * retransmitted packet, we want the sequence number ++ * of the next unsent octet. So, if there is no data ++ * (and no SYN or FIN), use snd_max instead of snd_nxt ++ * when filling in ti_seq. But if we are in persist ++ * state, snd_max might reflect one byte beyond the ++ * right edge of the window, so use snd_nxt in that ++ * case, since we know we aren't doing a retransmission. ++ * (retransmit and persist are mutually exclusive...) ++ */ ++ if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST]) ++ ti->ti_seq = htonl(tp->snd_nxt); ++ else ++ ti->ti_seq = htonl(tp->snd_max); ++ ti->ti_ack = htonl(tp->rcv_nxt); ++ if (optlen) { ++ memcpy((char *)(ti + 1), (char *)opt, optlen); ++ ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2; ++ } ++ ti->ti_flags = flags; ++ /* ++ * Calculate receive window. Don't shrink window, ++ * but avoid silly window syndrome. ++ */ ++ if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) ++ win = 0; ++ if (win > (long)TCP_MAXWIN << tp->rcv_scale) ++ win = (long)TCP_MAXWIN << tp->rcv_scale; ++ if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) ++ win = (long)(tp->rcv_adv - tp->rcv_nxt); ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) { ++ ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); ++ ti->ti_flags |= TH_URG; ++ } else ++ /* ++ * If no urgent pointer to send, then we pull ++ * the urgent pointer to the left edge of the send window ++ * so that it doesn't drift into the send window on sequence ++ * number wraparound. ++ */ ++ tp->snd_up = tp->snd_una; /* drag it along */ ++ ++ /* ++ * Put TCP length in extended header, and then ++ * checksum extended header and data. ++ */ ++ if (len + optlen) ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len)); ++ ti->ti_sum = cksum(m, (int)(hdrlen + len)); ++ ++ /* ++ * In transmit state, time the transmission and arrange for ++ * the retransmit. In persist state, just set snd_max. ++ */ ++ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { ++ tcp_seq startseq = tp->snd_nxt; ++ ++ /* ++ * Advance snd_nxt over sequence space of this segment. ++ */ ++ if (flags & (TH_SYN | TH_FIN)) { ++ if (flags & TH_SYN) ++ tp->snd_nxt++; ++ if (flags & TH_FIN) { ++ tp->snd_nxt++; ++ tp->t_flags |= TF_SENTFIN; ++ } ++ } ++ tp->snd_nxt += len; ++ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { ++ tp->snd_max = tp->snd_nxt; ++ /* ++ * Time this transmission if not a retransmission and ++ * not currently timing anything. ++ */ ++ if (tp->t_rtt == 0) { ++ tp->t_rtt = 1; ++ tp->t_rtseq = startseq; ++ } ++ } ++ ++ /* ++ * Set retransmit timer if not currently set, ++ * and not doing an ack or a keep-alive probe. ++ * Initial value for retransmit timer is smoothed ++ * round-trip time + 2 * round-trip time variance. ++ * Initialize shift counter which is used for backoff ++ * of retransmit time. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ if (tp->t_timer[TCPT_PERSIST]) { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) ++ tp->snd_max = tp->snd_nxt + len; ++ ++ /* ++ * Fill in IP length and desired time to live and ++ * send to IP level. There should be a better way ++ * to handle ttl and tos; we could keep them in ++ * the template, but need a way to checksum without them. ++ */ ++ m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ ++ tcpiph_save = *mtod(m, struct tcpiphdr *); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ ip->ip_ttl = IPDEFTTL; ++ ip->ip_tos = so->so_iptos; ++ error = ip_output(so, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ error = ip6_output(so, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (error) { ++ out: ++ return (error); ++ } ++ ++ /* ++ * Data sent (as far as we can tell). ++ * If this advertises a larger window than any other segment, ++ * then remember the size of the advertised window. ++ * Any pending ACK has now been sent. ++ */ ++ if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv)) ++ tp->rcv_adv = tp->rcv_nxt + win; ++ tp->last_ack_sent = tp->rcv_nxt; ++ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); ++ if (sendalot) ++ goto again; ++ ++ return (0); ++} ++ ++void tcp_setpersist(struct tcpcb *tp) ++{ ++ int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; ++ ++ /* ++ * Start/restart persistence timer. ++ */ ++ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift], ++ TCPTV_PERSMIN, TCPTV_PERSMAX); ++ if (tp->t_rxtshift < TCP_MAXRXTSHIFT) ++ tp->t_rxtshift++; ++} +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +new file mode 100644 +index 0000000..a1016d9 +--- /dev/null ++++ b/slirp/src/tcp_subr.c +@@ -0,0 +1,980 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 ++ * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* patchable/settable parameters for tcp */ ++/* Don't do rfc1323 performance enhancements */ ++#define TCP_DO_RFC1323 0 ++ ++/* ++ * Tcp initialization ++ */ ++void tcp_init(Slirp *slirp) ++{ ++ slirp->tcp_iss = 1; /* wrong */ ++ slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; ++ slirp->tcp_last_so = &slirp->tcb; ++} ++ ++void tcp_cleanup(Slirp *slirp) ++{ ++ while (slirp->tcb.so_next != &slirp->tcb) { ++ tcp_close(sototcpcb(slirp->tcb.so_next)); ++ } ++} ++ ++/* ++ * Create template to be used to send tcp packets on a connection. ++ * Call after host entry created, fills ++ * in a skeletal tcp/ip header, minimizing the amount of work ++ * necessary when the connection is used. ++ */ ++void tcp_template(struct tcpcb *tp) ++{ ++ struct socket *so = tp->t_socket; ++ register struct tcpiphdr *n = &tp->t_template; ++ ++ n->ti_mbuf = NULL; ++ memset(&n->ti, 0, sizeof(n->ti)); ++ n->ti_x0 = 0; ++ switch (so->so_ffamily) { ++ case AF_INET: ++ n->ti_pr = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src = so->so_faddr; ++ n->ti_dst = so->so_laddr; ++ n->ti_sport = so->so_fport; ++ n->ti_dport = so->so_lport; ++ break; ++ ++ case AF_INET6: ++ n->ti_nh6 = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src6 = so->so_faddr6; ++ n->ti_dst6 = so->so_laddr6; ++ n->ti_sport = so->so_fport6; ++ n->ti_dport = so->so_lport6; ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ n->ti_seq = 0; ++ n->ti_ack = 0; ++ n->ti_x2 = 0; ++ n->ti_off = 5; ++ n->ti_flags = 0; ++ n->ti_win = 0; ++ n->ti_sum = 0; ++ n->ti_urp = 0; ++} ++ ++/* ++ * Send a single message to the TCP at address specified by ++ * the given TCP/IP header. If m == 0, then we make a copy ++ * of the tcpiphdr at ti and send directly to the addressed host. ++ * This is used to force keep alive messages out using the TCP ++ * template for a connection tp->t_template. If flags are given ++ * then we send a message back to the TCP which originated the ++ * segment ti, and discard the mbuf containing it and any other ++ * attached mbufs. ++ * ++ * In any case the ack and sequence number of the transmitted ++ * segment are as specified by the parameters. ++ */ ++void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, ++ tcp_seq ack, tcp_seq seq, int flags, unsigned short af) ++{ ++ register int tlen; ++ int win = 0; ++ ++ DEBUG_CALL("tcp_respond"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("ti = %p", ti); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("ack = %u", ack); ++ DEBUG_ARG("seq = %u", seq); ++ DEBUG_ARG("flags = %x", flags); ++ ++ if (tp) ++ win = sbspace(&tp->t_socket->so_rcv); ++ if (m == NULL) { ++ if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) ++ return; ++ tlen = 0; ++ m->m_data += IF_MAXLINKHDR; ++ *mtod(m, struct tcpiphdr *) = *ti; ++ ti = mtod(m, struct tcpiphdr *); ++ switch (af) { ++ case AF_INET: ++ ti->ti.ti_i4.ih_x1 = 0; ++ break; ++ case AF_INET6: ++ ti->ti.ti_i6.ih_x1 = 0; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ flags = TH_ACK; ++ } else { ++ /* ++ * ti points into m so the next line is just making ++ * the mbuf point to ti ++ */ ++ m->m_data = (char *)ti; ++ ++ m->m_len = sizeof(struct tcpiphdr); ++ tlen = 0; ++#define xchg(a, b, type) \ ++ { \ ++ type t; \ ++ t = a; \ ++ a = b; \ ++ b = t; \ ++ } ++ switch (af) { ++ case AF_INET: ++ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ case AF_INET6: ++ xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++#undef xchg ++ } ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + tlen)); ++ tlen += sizeof(struct tcpiphdr); ++ m->m_len = tlen; ++ ++ ti->ti_mbuf = NULL; ++ ti->ti_x0 = 0; ++ ti->ti_seq = htonl(seq); ++ ti->ti_ack = htonl(ack); ++ ti->ti_x2 = 0; ++ ti->ti_off = sizeof(struct tcphdr) >> 2; ++ ti->ti_flags = flags; ++ if (tp) ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ else ++ ti->ti_win = htons((uint16_t)win); ++ ti->ti_urp = 0; ++ ti->ti_sum = 0; ++ ti->ti_sum = cksum(m, tlen); ++ ++ struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); ++ struct ip *ip; ++ struct ip6 *ip6; ++ ++ switch (af) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ if (flags & TH_RST) { ++ ip->ip_ttl = MAXTTL; ++ } else { ++ ip->ip_ttl = IPDEFTTL; ++ } ++ ++ ip_output(NULL, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ ip6_output(NULL, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++/* ++ * Create a new TCP control block, making an ++ * empty reassembly queue and hooking it to the argument ++ * protocol control block. ++ */ ++struct tcpcb *tcp_newtcpcb(struct socket *so) ++{ ++ register struct tcpcb *tp; ++ ++ tp = g_new0(struct tcpcb, 1); ++ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; ++ /* ++ * 40: length of IPv4 header (20) + TCP header (20) ++ * 60: length of IPv6 header (40) + TCP header (20) ++ */ ++ tp->t_maxseg = ++ MIN(so->slirp->if_mtu - ((so->so_ffamily == AF_INET) ? 40 : 60), ++ TCP_MAXSEG_MAX); ++ ++ tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE | TF_REQ_TSTMP) : 0; ++ tp->t_socket = so; ++ ++ /* ++ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no ++ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives ++ * reasonable initial retransmit time. ++ */ ++ tp->t_srtt = TCPTV_SRTTBASE; ++ tp->t_rttvar = TCPTV_SRTTDFLT << 2; ++ tp->t_rttmin = TCPTV_MIN; ++ ++ TCPT_RANGESET(tp->t_rxtcur, ++ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, ++ TCPTV_MIN, TCPTV_REXMTMAX); ++ ++ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->t_state = TCPS_CLOSED; ++ ++ so->so_tcpcb = tp; ++ ++ return (tp); ++} ++ ++/* ++ * Drop a TCP connection, reporting ++ * the specified error. If connection is synchronized, ++ * then send a RST to peer. ++ */ ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err) ++{ ++ DEBUG_CALL("tcp_drop"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("errno = %d", errno); ++ ++ if (TCPS_HAVERCVDSYN(tp->t_state)) { ++ tp->t_state = TCPS_CLOSED; ++ (void)tcp_output(tp); ++ } ++ return (tcp_close(tp)); ++} ++ ++/* ++ * Close a TCP control block: ++ * discard all space held by the tcp ++ * discard internet protocol block ++ * wake up any sleepers ++ */ ++struct tcpcb *tcp_close(struct tcpcb *tp) ++{ ++ register struct tcpiphdr *t; ++ struct socket *so = tp->t_socket; ++ Slirp *slirp = so->slirp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("tcp_close"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* free the reassembly queue, if any */ ++ t = tcpfrag_list_first(tp); ++ while (!tcpfrag_list_end(t, tp)) { ++ t = tcpiphdr_next(t); ++ m = tcpiphdr_prev(t)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(t))); ++ m_free(m); ++ } ++ g_free(tp); ++ so->so_tcpcb = NULL; ++ /* clobber input socket cache if we're closing the cached connection */ ++ if (so == slirp->tcp_last_so) ++ slirp->tcp_last_so = &slirp->tcb; ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sbfree(&so->so_rcv); ++ sbfree(&so->so_snd); ++ sofree(so); ++ return ((struct tcpcb *)0); ++} ++ ++/* ++ * TCP protocol interface to socket abstraction. ++ */ ++ ++/* ++ * User issued close, and wish to trail through shutdown states: ++ * if never received SYN, just forget it. If got a SYN from peer, ++ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. ++ * If already got a FIN from peer, then almost done; go to LAST_ACK ++ * state. In all other cases, have already sent FIN to peer (e.g. ++ * after PRU_SHUTDOWN), and just have to play tedious game waiting ++ * for peer to send FIN or not respond to keep-alives, etc. ++ * We can let the user exit from the close as soon as the FIN is acked. ++ */ ++void tcp_sockclosed(struct tcpcb *tp) ++{ ++ DEBUG_CALL("tcp_sockclosed"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ if (!tp) { ++ return; ++ } ++ ++ switch (tp->t_state) { ++ case TCPS_CLOSED: ++ case TCPS_LISTEN: ++ case TCPS_SYN_SENT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ return; ++ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ tp->t_state = TCPS_FIN_WAIT_1; ++ break; ++ ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_LAST_ACK; ++ break; ++ } ++ tcp_output(tp); ++} ++ ++/* ++ * Connect to a host on the Internet ++ * Called by tcp_input ++ * Only do a connect, the tcp fields will be set in tcp_input ++ * return 0 if there's a result of the connect, ++ * else return -1 means we're still connecting ++ * The return value is almost always -1 since the socket is ++ * nonblocking. Connect returns after the SYN is sent, and does ++ * not wait for ACK+SYN. ++ */ ++int tcp_fconnect(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("tcp_fconnect"); ++ DEBUG_ARG("so = %p", so); ++ ++ ret = so->s = slirp_socket(af, SOCK_STREAM, 0); ++ if (ret >= 0) { ++ ret = slirp_bind_outbound(so, af); ++ if (ret < 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return (ret); ++ } ++ } ++ ++ if (ret >= 0) { ++ int opt, s = so->s; ++ struct sockaddr_storage addr; ++ ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" connect()ing"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* We don't care what port we get */ ++ ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); ++ ++ /* ++ * If it's not in progress, it failed, so we just return 0, ++ * without clearing SS_NOFDREF ++ */ ++ soisfconnecting(so); ++ } ++ ++ return (ret); ++} ++ ++/* ++ * Accept the socket and connect to the local-host ++ * ++ * We have a problem. The correct thing to do would be ++ * to first connect to the local-host, and only if the ++ * connection is accepted, then do an accept() here. ++ * But, a) we need to know who's trying to connect ++ * to the socket to be able to SYN the local-host, and ++ * b) we are already connected to the foreign host by ++ * the time it gets to accept(), so... We simply accept ++ * here and SYN the local-host. ++ */ ++void tcp_connect(struct socket *inso) ++{ ++ Slirp *slirp = inso->slirp; ++ struct socket *so; ++ struct sockaddr_storage addr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ struct tcpcb *tp; ++ int s, opt; ++ ++ DEBUG_CALL("tcp_connect"); ++ DEBUG_ARG("inso = %p", inso); ++ ++ /* ++ * If it's an SS_ACCEPTONCE socket, no need to socreate() ++ * another socket, just use the accept() socket. ++ */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* FACCEPTONCE already have a tcpcb */ ++ so = inso; ++ } else { ++ so = socreate(slirp); ++ tcp_attach(so); ++ so->lhost = inso->lhost; ++ so->so_ffamily = inso->so_ffamily; ++ } ++ ++ tcp_mss(sototcpcb(so), 0); ++ ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s < 0) { ++ tcp_close(sototcpcb(so)); /* This will sofree() as well */ ++ return; ++ } ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ so->fhost.ss = addr; ++ sotranslate_accept(so); ++ ++ /* Close the accept() socket, set right state */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* If we only accept once, close the accept() socket */ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ ++ /* Don't select it yet, even though we have an FD */ ++ /* if it's not FACCEPTONCE, it's already NOFDREF */ ++ so->so_state = SS_NOFDREF; ++ } ++ so->s = s; ++ so->so_state |= SS_INCOMING; ++ ++ so->so_iptos = tcp_tos(so); ++ tp = sototcpcb(so); ++ ++ tcp_template(tp); ++ ++ tp->t_state = TCPS_SYN_SENT; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tcp_sendseqinit(tp); ++ tcp_output(tp); ++} ++ ++/* ++ * Attach a TCPCB to a socket. ++ */ ++void tcp_attach(struct socket *so) ++{ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &so->slirp->tcb); ++} ++ ++/* ++ * Set the socket's type of service field ++ */ ++static const struct tos_t tcptos[] = { ++ { 0, 20, IPTOS_THROUGHPUT, 0 }, /* ftp data */ ++ { 21, 21, IPTOS_LOWDELAY, EMU_FTP }, /* ftp control */ ++ { 0, 23, IPTOS_LOWDELAY, 0 }, /* telnet */ ++ { 0, 80, IPTOS_THROUGHPUT, 0 }, /* WWW */ ++ { 0, 513, IPTOS_LOWDELAY, EMU_RLOGIN | EMU_NOCONNECT }, /* rlogin */ ++ { 0, 544, IPTOS_LOWDELAY, EMU_KSH }, /* kshell */ ++ { 0, 543, IPTOS_LOWDELAY, 0 }, /* klogin */ ++ { 0, 6667, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC */ ++ { 0, 6668, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC undernet */ ++ { 0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ ++ { 0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ ++ { 0, 0, 0, 0 } ++}; ++ ++/* ++ * Return TOS according to the above table ++ */ ++uint8_t tcp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (tcptos[i].tos) { ++ if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || ++ (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { ++ if (so->slirp->enable_emu) ++ so->so_emu = tcptos[i].emu; ++ return tcptos[i].tos; ++ } ++ i++; ++ } ++ return 0; ++} ++ ++/* ++ * Emulate programs that try and connect to us ++ * This includes ftp (the data connection is ++ * initiated by the server) and IRC (DCC CHAT and ++ * DCC SEND) for now ++ * ++ * NOTE: It's possible to crash SLiRP by sending it ++ * unstandard strings to emulate... if this is a problem, ++ * more checks are needed here ++ * ++ * XXX Assumes the whole command came in one packet ++ * XXX If there is more than one command in the packet, the others may ++ * be truncated. ++ * XXX If the command is too long, it may be truncated. ++ * ++ * XXX Some ftp clients will have their TOS set to ++ * LOWDELAY and so Nagel will kick in. Because of this, ++ * we'll get the first letter, followed by the rest, so ++ * we simply scan for ORT instead of PORT... ++ * DCC doesn't have this problem because there's other stuff ++ * in the packet before the DCC command. ++ * ++ * Return 1 if the mbuf m is still valid and should be ++ * sbappend()ed ++ * ++ * NOTE: if you return 0 you MUST m_free() the mbuf! ++ */ ++int tcp_emu(struct socket *so, struct mbuf *m) ++{ ++ Slirp *slirp = so->slirp; ++ unsigned n1, n2, n3, n4, n5, n6; ++ char buff[257]; ++ uint32_t laddr; ++ unsigned lport; ++ char *bptr; ++ ++ DEBUG_CALL("tcp_emu"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ switch (so->so_emu) { ++ int x, i; ++ ++ /* TODO: IPv6 */ ++ case EMU_IDENT: ++ /* ++ * Identification protocol as per rfc-1413 ++ */ ++ ++ { ++ struct socket *tmpso; ++ struct sockaddr_in addr; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ char *eol = g_strstr_len(m->m_data, m->m_len, "\r\n"); ++ ++ if (!eol) { ++ return 1; ++ } ++ ++ *eol = '\0'; ++ if (sscanf(m->m_data, "%u%*[ ,]%u", &n1, &n2) == 2) { ++ HTONS(n1); ++ HTONS(n2); ++ /* n2 is the one on our host */ ++ for (tmpso = slirp->tcb.so_next; tmpso != &slirp->tcb; ++ tmpso = tmpso->so_next) { ++ if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && ++ tmpso->so_lport == n2 && ++ tmpso->so_faddr.s_addr == so->so_faddr.s_addr && ++ tmpso->so_fport == n1) { ++ if (getsockname(tmpso->s, (struct sockaddr *)&addr, ++ &addrlen) == 0) ++ n2 = addr.sin_port; ++ break; ++ } ++ } ++ NTOHS(n1); ++ NTOHS(n2); ++ m_inc(m, g_snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); ++ } else { ++ *eol = '\r'; ++ } ++ ++ return 1; ++ } ++ ++ case EMU_FTP: /* ftp */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NUL terminate for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { ++ /* ++ * Need to emulate the PORT command ++ */ ++ x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", &n1, &n2, ++ &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { ++ /* ++ * Need to emulate the PASV response ++ */ ++ x = sscanf( ++ bptr, ++ "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", ++ &n1, &n2, &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } ++ ++ return 1; ++ ++ case EMU_KSH: ++ /* ++ * The kshell (Kerberos rsh) and shell services both pass ++ * a local port port number to carry signals to the server ++ * and stderr to the client. It is passed at the beginning ++ * of the connection as a NUL-terminated decimal ASCII string. ++ */ ++ so->so_emu = 0; ++ for (lport = 0, i = 0; i < m->m_len - 1; ++i) { ++ if (m->m_data[i] < '0' || m->m_data[i] > '9') ++ return 1; /* invalid number */ ++ lport *= 10; ++ lport += m->m_data[i] - '0'; ++ } ++ if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && ++ (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, ++ htons(lport), SS_FACCEPTONCE)) != NULL) ++ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)); ++ return 1; ++ ++ case EMU_IRC: ++ /* ++ * Need to emulate DCC CHAT, DCC SEND and DCC MOVE ++ */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NULL terminate the string for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) ++ return 1; ++ ++ /* The %256s is for the broken mIRC */ ++ if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); ++ } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } ++ return 1; ++ ++ case EMU_REALAUDIO: ++ /* ++ * RealAudio emulation - JP. We must try to parse the incoming ++ * data and try to find the two characters that contain the ++ * port number. Then we redirect an udp port and replace the ++ * number with the real port we got. ++ * ++ * The 1.0 beta versions of the player are not supported ++ * any more. ++ * ++ * A typical packet for player version 1.0 (release version): ++ * ++ * 0000:50 4E 41 00 05 ++ * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P ++ * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH ++ * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v ++ * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB ++ * ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * second packet. This time we received five bytes first and ++ * then the rest. You never know how many bytes you get. ++ * ++ * A typical packet for player version 2.0 (beta): ++ * ++ * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. ++ * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 ++ * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ ++ * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas ++ * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B ++ * ++ * Port number 0x1BC1 is found at offset 0x0d. ++ * ++ * This is just a horrible switch statement. Variable ra tells ++ * us where we're going. ++ */ ++ ++ bptr = m->m_data; ++ while (bptr < m->m_data + m->m_len) { ++ uint16_t p; ++ static int ra = 0; ++ char ra_tbl[4]; ++ ++ ra_tbl[0] = 0x50; ++ ra_tbl[1] = 0x4e; ++ ra_tbl[2] = 0x41; ++ ra_tbl[3] = 0; ++ ++ switch (ra) { ++ case 0: ++ case 2: ++ case 3: ++ if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 1: ++ /* ++ * We may get 0x50 several times, ignore them ++ */ ++ if (*bptr == 0x50) { ++ ra = 1; ++ bptr++; ++ continue; ++ } else if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 4: ++ /* ++ * skip version number ++ */ ++ bptr++; ++ break; ++ ++ case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ /* ++ * The difference between versions 1.0 and ++ * 2.0 is here. For future versions of ++ * the player this may need to be modified. ++ */ ++ if (*(bptr + 1) == 0x02) ++ bptr += 8; ++ else ++ bptr += 4; ++ break; ++ ++ case 6: ++ /* This is the field containing the port ++ * number that RA-player is listening to. ++ */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; ++ if (lport < 6970) ++ lport += 256; /* don't know why */ ++ if (lport < 6970 || lport > 7170) ++ return 1; /* failed */ ++ ++ /* try to get udp port between 6970 - 7170 */ ++ for (p = 6970; p < 7071; p++) { ++ if (udp_listen(slirp, INADDR_ANY, htons(p), ++ so->so_laddr.s_addr, htons(lport), ++ SS_FACCEPTONCE)) { ++ break; ++ } ++ } ++ if (p == 7071) ++ p = 0; ++ *(uint8_t *)bptr++ = (p >> 8) & 0xff; ++ *(uint8_t *)bptr = p & 0xff; ++ ra = 0; ++ return 1; /* port redirected, we're done */ ++ break; ++ ++ default: ++ ra = 0; ++ } ++ ra++; ++ } ++ return 1; ++ ++ default: ++ /* Ooops, not emulated, won't call tcp_emu again */ ++ so->so_emu = 0; ++ return 1; ++ } ++} ++ ++/* ++ * Do misc. config of SLiRP while its running. ++ * Return 0 if this connections is to be closed, 1 otherwise, ++ * return 2 if this is a command-line connection ++ */ ++int tcp_ctl(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ struct sbuf *sb = &so->so_snd; ++ struct gfwd_list *ex_ptr; ++ ++ DEBUG_CALL("tcp_ctl"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* TODO: IPv6 */ ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ /* Check if it's pty_exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ if (ex_ptr->write_cb) { ++ so->s = -1; ++ so->guestfwd = ex_ptr; ++ return 1; ++ } ++ DEBUG_MISC(" executing %s", ex_ptr->ex_exec); ++ if (ex_ptr->ex_unix) ++ return open_unix(so, ex_ptr->ex_unix); ++ else ++ return fork_exec(so, ex_ptr->ex_exec); ++ } ++ } ++ } ++ sb->sb_cc = slirp_fmt(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), ++ "Error: No application configured.\r\n"); ++ sb->sb_wptr += sb->sb_cc; ++ return 0; ++} +diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c +new file mode 100644 +index 0000000..102023e +--- /dev/null ++++ b/slirp/src/tcp_timer.c +@@ -0,0 +1,286 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); ++ ++/* ++ * Fast timeout routine for processing delayed acks ++ */ ++void tcp_fasttimo(Slirp *slirp) ++{ ++ register struct socket *so; ++ register struct tcpcb *tp; ++ ++ DEBUG_CALL("tcp_fasttimo"); ++ ++ so = slirp->tcb.so_next; ++ if (so) ++ for (; so != &slirp->tcb; so = so->so_next) ++ if ((tp = (struct tcpcb *)so->so_tcpcb) && ++ (tp->t_flags & TF_DELACK)) { ++ tp->t_flags &= ~TF_DELACK; ++ tp->t_flags |= TF_ACKNOW; ++ (void)tcp_output(tp); ++ } ++} ++ ++/* ++ * Tcp protocol timeout routine called every 500 ms. ++ * Updates the timers in all active tcb's and ++ * causes finite state machine actions if timers expire. ++ */ ++void tcp_slowtimo(Slirp *slirp) ++{ ++ register struct socket *ip, *ipnxt; ++ register struct tcpcb *tp; ++ register int i; ++ ++ DEBUG_CALL("tcp_slowtimo"); ++ ++ /* ++ * Search through tcb's and update active timers. ++ */ ++ ip = slirp->tcb.so_next; ++ if (ip == NULL) { ++ return; ++ } ++ for (; ip != &slirp->tcb; ip = ipnxt) { ++ ipnxt = ip->so_next; ++ tp = sototcpcb(ip); ++ if (tp == NULL) { ++ continue; ++ } ++ for (i = 0; i < TCPT_NTIMERS; i++) { ++ if (tp->t_timer[i] && --tp->t_timer[i] == 0) { ++ tcp_timers(tp, i); ++ if (ipnxt->so_prev != ip) ++ goto tpgone; ++ } ++ } ++ tp->t_idle++; ++ if (tp->t_rtt) ++ tp->t_rtt++; ++ tpgone:; ++ } ++ slirp->tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ ++ slirp->tcp_now++; /* for timestamps */ ++} ++ ++/* ++ * Cancel all timers for TCP tp. ++ */ ++void tcp_canceltimers(struct tcpcb *tp) ++{ ++ register int i; ++ ++ for (i = 0; i < TCPT_NTIMERS; i++) ++ tp->t_timer[i] = 0; ++} ++ ++const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, ++ 64, 64, 64, 64, 64, 64 }; ++ ++/* ++ * TCP timer processing. ++ */ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer) ++{ ++ register int rexmt; ++ ++ DEBUG_CALL("tcp_timers"); ++ ++ switch (timer) { ++ /* ++ * 2 MSL timeout in shutdown went off. If we're closed but ++ * still waiting for peer to close and connection has been idle ++ * too long, or if 2MSL time is up from TIME_WAIT, delete connection ++ * control block. Otherwise, check again in a bit. ++ */ ++ case TCPT_2MSL: ++ if (tp->t_state != TCPS_TIME_WAIT && tp->t_idle <= TCP_MAXIDLE) ++ tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; ++ else ++ tp = tcp_close(tp); ++ break; ++ ++ /* ++ * Retransmission timer went off. Message has not ++ * been acked within retransmit interval. Back off ++ * to a longer retransmit interval and retransmit one segment. ++ */ ++ case TCPT_REXMT: ++ ++ /* ++ * XXXXX If a packet has timed out, then remove all the queued ++ * packets for that session. ++ */ ++ ++ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { ++ /* ++ * This is a hack to suit our terminal server here at the uni of ++ * canberra since they have trouble with zeroes... It usually lets ++ * them through unharmed, but under some conditions, it'll eat the ++ * zeros. If we keep retransmitting it, it'll keep eating the ++ * zeroes, so we keep retransmitting, and eventually the connection ++ * dies... (this only happens on incoming data) ++ * ++ * So, if we were gonna drop the connection from too many ++ * retransmits, don't... instead halve the t_maxseg, which might ++ * break up the NULLs and let them through ++ * ++ * *sigh* ++ */ ++ ++ tp->t_maxseg >>= 1; ++ if (tp->t_maxseg < 32) { ++ /* ++ * We tried our best, now the connection must die! ++ */ ++ tp->t_rxtshift = TCP_MAXRXTSHIFT; ++ tp = tcp_drop(tp, tp->t_softerror); ++ /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ ++ return (tp); /* XXX */ ++ } ++ ++ /* ++ * Set rxtshift to 6, which is still at the maximum ++ * backoff time ++ */ ++ tp->t_rxtshift = 6; ++ } ++ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; ++ TCPT_RANGESET(tp->t_rxtcur, rexmt, (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * If losing, let the lower level know and try for ++ * a better route. Also, if we backed off this far, ++ * our srtt estimate is probably bogus. Clobber it ++ * so we'll take the next rtt measurement as our srtt; ++ * move the current srtt into rttvar to keep the current ++ * retransmit times until then. ++ */ ++ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { ++ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); ++ tp->t_srtt = 0; ++ } ++ tp->snd_nxt = tp->snd_una; ++ /* ++ * If timing a segment in this window, stop the timer. ++ */ ++ tp->t_rtt = 0; ++ /* ++ * Close the congestion window down to one segment ++ * (we'll open it by one segment for each ack we get). ++ * Since we probably have a window's worth of unacked ++ * data accumulated, this "slow start" keeps us from ++ * dumping all that data as back-to-back packets (which ++ * might overwhelm an intermediate gateway). ++ * ++ * There are two phases to the opening: Initially we ++ * open by one mss on each ack. This makes the window ++ * size increase exponentially with time. If the ++ * window is larger than the path can handle, this ++ * exponential growth results in dropped packet(s) ++ * almost immediately. To get more time between ++ * drops but still "push" the network to take advantage ++ * of improving conditions, we switch from exponential ++ * to linear window opening at some threshold size. ++ * For a threshold, we use half the current window ++ * size, truncated to a multiple of the mss. ++ * ++ * (the minimum cwnd that will give us exponential ++ * growth is 2 mss. We don't allow the threshold ++ * to go below this.) ++ */ ++ { ++ unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ if (win < 2) ++ win = 2; ++ tp->snd_cwnd = tp->t_maxseg; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_dupacks = 0; ++ } ++ (void)tcp_output(tp); ++ break; ++ ++ /* ++ * Persistence timer into zero window. ++ * Force a byte to be output, if possible. ++ */ ++ case TCPT_PERSIST: ++ tcp_setpersist(tp); ++ tp->t_force = 1; ++ (void)tcp_output(tp); ++ tp->t_force = 0; ++ break; ++ ++ /* ++ * Keep-alive timer went off; send something ++ * or drop connection if idle for too long. ++ */ ++ case TCPT_KEEP: ++ if (tp->t_state < TCPS_ESTABLISHED) ++ goto dropit; ++ ++ if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { ++ if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) ++ goto dropit; ++ /* ++ * Send a packet designed to force a response ++ * if the peer is up and reachable: ++ * either an ACK if the connection is still alive, ++ * or an RST if the peer has closed the connection ++ * due to timeout or reboot. ++ * Using sequence number tp->snd_una-1 ++ * causes the transmitted zero-length segment ++ * to lie outside the receive window; ++ * by the protocol spec, this requires the ++ * correspondent TCP to respond. ++ */ ++ tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, tp->rcv_nxt, ++ tp->snd_una - 1, 0, tp->t_socket->so_ffamily); ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ } else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ break; ++ ++ dropit: ++ tp = tcp_drop(tp, 0); ++ break; ++ } ++ ++ return (tp); ++} +diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h +new file mode 100644 +index 0000000..584a559 +--- /dev/null ++++ b/slirp/src/tcp_timer.h +@@ -0,0 +1,130 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp ++ */ ++ ++#ifndef TCP_TIMER_H ++#define TCP_TIMER_H ++ ++/* ++ * Definitions of the TCP timers. These timers are counted ++ * down PR_SLOWHZ times a second. ++ */ ++#define TCPT_NTIMERS 4 ++ ++#define TCPT_REXMT 0 /* retransmit */ ++#define TCPT_PERSIST 1 /* retransmit persistence */ ++#define TCPT_KEEP 2 /* keep alive */ ++#define TCPT_2MSL 3 /* 2*msl quiet time timer */ ++ ++/* ++ * The TCPT_REXMT timer is used to force retransmissions. ++ * The TCP has the TCPT_REXMT timer set whenever segments ++ * have been sent for which ACKs are expected but not yet ++ * received. If an ACK is received which advances tp->snd_una, ++ * then the retransmit timer is cleared (if there are no more ++ * outstanding segments) or reset to the base value (if there ++ * are more ACKs expected). Whenever the retransmit timer goes off, ++ * we retransmit one unacknowledged segment, and do a backoff ++ * on the retransmit timer. ++ * ++ * The TCPT_PERSIST timer is used to keep window size information ++ * flowing even if the window goes shut. If all previous transmissions ++ * have been acknowledged (so that there are no retransmissions in progress), ++ * and the window is too small to bother sending anything, then we start ++ * the TCPT_PERSIST timer. When it expires, if the window is nonzero, ++ * we go to transmit state. Otherwise, at intervals send a single byte ++ * into the peer's window to force him to update our window information. ++ * We do this at most as often as TCPT_PERSMIN time intervals, ++ * but no more frequently than the current estimate of round-trip ++ * packet time. The TCPT_PERSIST timer is cleared whenever we receive ++ * a window update from the peer. ++ * ++ * The TCPT_KEEP timer is used to keep connections alive. If an ++ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, ++ * but not yet established, then we drop the connection. Once the connection ++ * is established, if the connection is idle for TCPTV_KEEP_IDLE time ++ * (and keepalives have been enabled on the socket), we begin to probe ++ * the connection. We force the peer to send us a segment by sending: ++ * ++ * This segment is (deliberately) outside the window, and should elicit ++ * an ack segment in response from the peer. If, despite the TCPT_KEEP ++ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE ++ * amount of time probing, then we drop the connection. ++ */ ++ ++/* ++ * Time constants. ++ */ ++#define TCPTV_MSL (5 * PR_SLOWHZ) /* max seg lifetime (hah!) */ ++ ++#define TCPTV_SRTTBASE \ ++ 0 /* base roundtrip time; \ ++ if 0, no idea yet */ ++#define TCPTV_SRTTDFLT (3 * PR_SLOWHZ) /* assumed RTT if no info */ ++ ++#define TCPTV_PERSMIN (5 * PR_SLOWHZ) /* retransmit persistence */ ++#define TCPTV_PERSMAX (60 * PR_SLOWHZ) /* maximum persist interval */ ++ ++#define TCPTV_KEEP_INIT (75 * PR_SLOWHZ) /* initial connect keep alive */ ++#define TCPTV_KEEP_IDLE (120 * 60 * PR_SLOWHZ) /* dflt time before probing */ ++#define TCPTV_KEEPINTVL (75 * PR_SLOWHZ) /* default probe interval */ ++#define TCPTV_KEEPCNT 8 /* max probes before drop */ ++ ++#define TCPTV_MIN (1 * PR_SLOWHZ) /* minimum allowable value */ ++#define TCPTV_REXMTMAX (12 * PR_SLOWHZ) /* max allowable REXMT value */ ++ ++#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ ++ ++#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ ++ ++ ++/* ++ * Force a time value to be in a certain range. ++ */ ++#define TCPT_RANGESET(tv, value, tvmin, tvmax) \ ++ { \ ++ (tv) = (value); \ ++ if ((tv) < (tvmin)) \ ++ (tv) = (tvmin); \ ++ else if ((tv) > (tvmax)) \ ++ (tv) = (tvmax); \ ++ } ++ ++extern const int tcp_backoff[]; ++ ++struct tcpcb; ++ ++void tcp_fasttimo(Slirp *); ++void tcp_slowtimo(Slirp *); ++void tcp_canceltimers(struct tcpcb *); ++ ++#endif +diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h +new file mode 100644 +index 0000000..c8da8cb +--- /dev/null ++++ b/slirp/src/tcp_var.h +@@ -0,0 +1,161 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 ++ * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp ++ */ ++ ++#ifndef TCP_VAR_H ++#define TCP_VAR_H ++ ++#include "tcpip.h" ++#include "tcp_timer.h" ++ ++/* ++ * Tcp control block, one per tcp; fields: ++ */ ++struct tcpcb { ++ struct tcpiphdr *seg_next; /* sequencing queue */ ++ struct tcpiphdr *seg_prev; ++ short t_state; /* state of this connection */ ++ short t_timer[TCPT_NTIMERS]; /* tcp timers */ ++ short t_rxtshift; /* log(2) of rexmt exp. backoff */ ++ short t_rxtcur; /* current retransmit value */ ++ short t_dupacks; /* consecutive dup acks recd */ ++ uint16_t t_maxseg; /* maximum segment size */ ++ uint8_t t_force; /* 1 if forcing out a byte */ ++ uint16_t t_flags; ++#define TF_ACKNOW 0x0001 /* ack peer immediately */ ++#define TF_DELACK 0x0002 /* ack, but try to delay it */ ++#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ ++#define TF_NOOPT 0x0008 /* don't use tcp options */ ++#define TF_SENTFIN 0x0010 /* have sent FIN */ ++#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ ++#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ ++#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ ++#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ ++#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ ++ ++ struct tcpiphdr t_template; /* static skeletal packet for xmit */ ++ ++ struct socket *t_socket; /* back pointer to socket */ ++ /* ++ * The following fields are used as in the protocol specification. ++ * See RFC783, Dec. 1981, page 21. ++ */ ++ /* send sequence variables */ ++ tcp_seq snd_una; /* send unacknowledged */ ++ tcp_seq snd_nxt; /* send next */ ++ tcp_seq snd_up; /* send urgent pointer */ ++ tcp_seq snd_wl1; /* window update seg seq number */ ++ tcp_seq snd_wl2; /* window update seg ack number */ ++ tcp_seq iss; /* initial send sequence number */ ++ uint32_t snd_wnd; /* send window */ ++ /* receive sequence variables */ ++ uint32_t rcv_wnd; /* receive window */ ++ tcp_seq rcv_nxt; /* receive next */ ++ tcp_seq rcv_up; /* receive urgent pointer */ ++ tcp_seq irs; /* initial receive sequence number */ ++ /* ++ * Additional variables for this implementation. ++ */ ++ /* receive variables */ ++ tcp_seq rcv_adv; /* advertised window */ ++ /* retransmit variables */ ++ tcp_seq snd_max; /* highest sequence number sent; ++ * used to recognize retransmits ++ */ ++ /* congestion control (for slow start, source quench, retransmit after loss) ++ */ ++ uint32_t snd_cwnd; /* congestion-controlled window */ ++ uint32_t snd_ssthresh; /* snd_cwnd size threshold for ++ * for slow start exponential to ++ * linear switch ++ */ ++ /* ++ * transmit timing stuff. See below for scale of srtt and rttvar. ++ * "Variance" is actually smoothed difference. ++ */ ++ short t_idle; /* inactivity time */ ++ short t_rtt; /* round trip time */ ++ tcp_seq t_rtseq; /* sequence number being timed */ ++ short t_srtt; /* smoothed round-trip time */ ++ short t_rttvar; /* variance in round-trip time */ ++ uint16_t t_rttmin; /* minimum rtt allowed */ ++ uint32_t max_sndwnd; /* largest window peer has offered */ ++ ++ /* out-of-band data */ ++ uint8_t t_oobflags; /* have some */ ++ uint8_t t_iobc; /* input character */ ++#define TCPOOB_HAVEDATA 0x01 ++#define TCPOOB_HADDATA 0x02 ++ short t_softerror; /* possible error not yet reported */ ++ ++ /* RFC 1323 variables */ ++ uint8_t snd_scale; /* window scaling for send window */ ++ uint8_t rcv_scale; /* window scaling for recv window */ ++ uint8_t request_r_scale; /* pending window scaling */ ++ uint8_t requested_s_scale; ++ uint32_t ts_recent; /* timestamp echo data */ ++ uint32_t ts_recent_age; /* when last updated */ ++ tcp_seq last_ack_sent; ++}; ++ ++#define sototcpcb(so) ((so)->so_tcpcb) ++ ++/* ++ * The smoothed round-trip time and estimated variance ++ * are stored as fixed point numbers scaled by the values below. ++ * For convenience, these scales are also used in smoothing the average ++ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). ++ * With these scales, srtt has 3 bits to the right of the binary point, ++ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the ++ * binary point, and is smoothed with an ALPHA of 0.75. ++ */ ++#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ ++#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ ++#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ ++#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ ++ ++/* ++ * The initial retransmission should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ * This macro assumes that the value of TCP_RTTVAR_SCALE ++ * is the same as the multiplier for rttvar. ++ */ ++#define TCP_REXMTVAL(tp) (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) ++ ++#endif +diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h +new file mode 100644 +index 0000000..d3df021 +--- /dev/null ++++ b/slirp/src/tcpip.h +@@ -0,0 +1,104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 ++ * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp ++ */ ++ ++#ifndef TCPIP_H ++#define TCPIP_H ++ ++/* ++ * Tcp+ip header, after ip options removed. ++ */ ++struct tcpiphdr { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ union { ++ struct { ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ } ti_i4; ++ struct { ++ struct in6_addr ih_src; ++ struct in6_addr ih_dst; ++ uint8_t ih_x1; ++ uint8_t ih_nh; ++ } ti_i6; ++ } ti; ++ uint16_t ti_x0; ++ uint16_t ti_len; /* protocol length */ ++ struct tcphdr ti_t; /* tcp header */ ++}; ++#define ti_mbuf ih_mbuf.mptr ++#define ti_pr ti.ti_i4.ih_pr ++#define ti_src ti.ti_i4.ih_src ++#define ti_dst ti.ti_i4.ih_dst ++#define ti_src6 ti.ti_i6.ih_src ++#define ti_dst6 ti.ti_i6.ih_dst ++#define ti_nh6 ti.ti_i6.ih_nh ++#define ti_sport ti_t.th_sport ++#define ti_dport ti_t.th_dport ++#define ti_seq ti_t.th_seq ++#define ti_ack ti_t.th_ack ++#define ti_x2 ti_t.th_x2 ++#define ti_off ti_t.th_off ++#define ti_flags ti_t.th_flags ++#define ti_win ti_t.th_win ++#define ti_sum ti_t.th_sum ++#define ti_urp ti_t.th_urp ++ ++#define tcpiphdr2qlink(T) \ ++ ((struct qlink *)(((char *)(T)) - sizeof(struct qlink))) ++#define qlink2tcpiphdr(Q) \ ++ ((struct tcpiphdr *)(((char *)(Q)) + sizeof(struct qlink))) ++#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) ++#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) ++#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) ++#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink *)(T)) ++#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr *)(T)) ++ ++/* This is the difference between the size of a tcpiphdr structure, and the ++ * size of actual ip+tcp headers, rounded up since we need to align data. */ ++#define TCPIPHDR_DELTA \ ++ (MAX(0, (sizeof(struct tcpiphdr) - sizeof(struct ip) - \ ++ sizeof(struct tcphdr) + 3) & \ ++ ~3)) ++ ++/* ++ * Just a clean way to get to the first byte ++ * of the packet ++ */ ++struct tcpiphdr_2 { ++ struct tcpiphdr dummy; ++ char first_char; ++}; ++ ++#endif +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +new file mode 100644 +index 0000000..c209145 +--- /dev/null ++++ b/slirp/src/tftp.c +@@ -0,0 +1,462 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * tftp.c - a simple, read-only tftp server for qemu ++ * ++ * Copyright (c) 2004 Magnus Damm ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++#include ++#include ++ ++static inline int tftp_session_in_use(struct tftp_session *spt) ++{ ++ return (spt->slirp != NULL); ++} ++ ++static inline void tftp_session_update(struct tftp_session *spt) ++{ ++ spt->timestamp = curtime; ++} ++ ++static void tftp_session_terminate(struct tftp_session *spt) ++{ ++ if (spt->fd >= 0) { ++ close(spt->fd); ++ spt->fd = -1; ++ } ++ g_free(spt->filename); ++ spt->slirp = NULL; ++} ++ ++static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (!tftp_session_in_use(spt)) ++ goto found; ++ ++ /* sessions time out after 5 inactive seconds */ ++ if ((int)(curtime - spt->timestamp) > 5000) { ++ tftp_session_terminate(spt); ++ goto found; ++ } ++ } ++ ++ return -1; ++ ++found: ++ memset(spt, 0, sizeof(*spt)); ++ memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); ++ spt->fd = -1; ++ spt->block_size = 512; ++ spt->client_port = tp->udp.uh_sport; ++ spt->slirp = slirp; ++ ++ tftp_session_update(spt); ++ ++ return k; ++} ++ ++static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (tftp_session_in_use(spt)) { ++ if (sockaddr_equal(&spt->client_addr, srcsas)) { ++ if (spt->client_port == tp->udp.uh_sport) { ++ return k; ++ } ++ } ++ } ++ } ++ ++ return -1; ++} ++ ++static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, ++ uint8_t *buf, int len) ++{ ++ int bytes_read = 0; ++ ++ if (spt->fd < 0) { ++ spt->fd = open(spt->filename, O_RDONLY | O_BINARY); ++ } ++ ++ if (spt->fd < 0) { ++ return -1; ++ } ++ ++ if (len) { ++ lseek(spt->fd, block_nr * spt->block_size, SEEK_SET); ++ ++ bytes_read = read(spt->fd, buf, len); ++ } ++ ++ return bytes_read; ++} ++ ++static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, ++ struct mbuf *m) ++{ ++ struct tftp_t *tp; ++ ++ memset(m->m_data, 0, m->m_size); ++ ++ m->m_data += IF_MAXLINKHDR; ++ if (spt->client_addr.ss_family == AF_INET6) { ++ m->m_data += sizeof(struct ip6); ++ } else { ++ m->m_data += sizeof(struct ip); ++ } ++ tp = (void *)m->m_data; ++ m->m_data += sizeof(struct udphdr); ++ ++ return tp; ++} ++ ++static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, ++ struct tftp_t *recv_tp) ++{ ++ if (spt->client_addr.ss_family == AF_INET6) { ++ struct sockaddr_in6 sa6, da6; ++ ++ sa6.sin6_addr = spt->slirp->vhost_addr6; ++ sa6.sin6_port = recv_tp->udp.uh_dport; ++ da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; ++ da6.sin6_port = spt->client_port; ++ ++ udp6_output(NULL, m, &sa6, &da6); ++ } else { ++ struct sockaddr_in sa4, da4; ++ ++ sa4.sin_addr = spt->slirp->vhost_addr; ++ sa4.sin_port = recv_tp->udp.uh_dport; ++ da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; ++ da4.sin_port = spt->client_port; ++ ++ udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); ++ } ++} ++ ++static int tftp_send_oack(struct tftp_session *spt, const char *keys[], ++ uint32_t values[], int nb, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int i, n = 0; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) ++ return -1; ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->tp_op = htons(TFTP_OACK); ++ for (i = 0; i < nb; i++) { ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", keys[i]); ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", values[i]); ++ } ++ ++ m->m_len = G_SIZEOF_MEMBER(struct tftp_t, tp_op) + n; ++ tftp_udp_output(spt, m, recv_tp); ++ ++ return 0; ++} ++ ++static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, ++ const char *msg, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ ++ DEBUG_TFTP("tftp error msg: %s", msg); ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ goto out; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->tp_op = htons(TFTP_ERROR); ++ tp->x.tp_error.tp_error_code = htons(errorcode); ++ slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), ++ msg); ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + ++ strlen(msg) - sizeof(struct udphdr); ++ tftp_udp_output(spt, m, recv_tp); ++ ++out: ++ tftp_session_terminate(spt); ++} ++ ++static void tftp_send_next_block(struct tftp_session *spt, ++ struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int nobytes; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ return; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->tp_op = htons(TFTP_DATA); ++ tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); ++ ++ nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, ++ spt->block_size); ++ ++ if (nobytes < 0) { ++ m_free(m); ++ ++ /* send "file not found" error back */ ++ ++ tftp_send_error(spt, 1, "File not found", tp); ++ ++ return; ++ } ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - ++ sizeof(struct udphdr); ++ tftp_udp_output(spt, m, recv_tp); ++ ++ if (nobytes == spt->block_size) { ++ tftp_session_update(spt); ++ } else { ++ tftp_session_terminate(spt); ++ } ++ ++ spt->block_nr++; ++} ++ ++static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ struct tftp_session *spt; ++ int s, k; ++ size_t prefix_len; ++ char *req_fname; ++ const char *option_name[2]; ++ uint32_t option_value[2]; ++ int nb_options = 0; ++ ++ /* check if a session already exists and if so terminate it */ ++ s = tftp_session_find(slirp, srcsas, tp); ++ if (s >= 0) { ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++ } ++ ++ s = tftp_session_allocate(slirp, srcsas, tp); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ spt = &slirp->tftp_sessions[s]; ++ ++ /* unspecified prefix means service disabled */ ++ if (!slirp->tftp_prefix) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* skip header fields */ ++ k = 0; ++ pktlen -= offsetof(struct tftp_t, x.tp_buf); ++ ++ /* prepend tftp_prefix */ ++ prefix_len = strlen(slirp->tftp_prefix); ++ spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); ++ memcpy(spt->filename, slirp->tftp_prefix, prefix_len); ++ spt->filename[prefix_len] = '/'; ++ ++ /* get name */ ++ req_fname = spt->filename + prefix_len + 1; ++ ++ while (1) { ++ if (k >= TFTP_FILENAME_MAX || k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ req_fname[k] = tp->x.tp_buf[k]; ++ if (req_fname[k++] == '\0') { ++ break; ++ } ++ } ++ ++ DEBUG_TFTP("tftp rrq file: %s", req_fname); ++ ++ /* check mode */ ++ if ((pktlen - k) < 6) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { ++ tftp_send_error(spt, 4, "Unsupported transfer mode", tp); ++ return; ++ } ++ ++ k += 6; /* skipping octet */ ++ ++ /* do sanity checks on the filename */ ++ if ( ++#ifdef G_OS_WIN32 ++ strstr(req_fname, "..\\") || ++ req_fname[strlen(req_fname) - 1] == '\\' || ++#endif ++ strstr(req_fname, "../") || ++ req_fname[strlen(req_fname) - 1] == '/') { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* check if the file exists */ ++ if (tftp_read_data(spt, 0, NULL, 0) < 0) { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ ++ if (tp->x.tp_buf[pktlen - 1] != 0) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { ++ const char *key, *value; ++ ++ key = &tp->x.tp_buf[k]; ++ k += strlen(key) + 1; ++ ++ if (k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ value = &tp->x.tp_buf[k]; ++ k += strlen(value) + 1; ++ ++ if (strcasecmp(key, "tsize") == 0) { ++ int tsize = atoi(value); ++ struct stat stat_p; ++ ++ if (tsize == 0) { ++ if (stat(spt->filename, &stat_p) == 0) ++ tsize = stat_p.st_size; ++ else { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ } ++ ++ option_name[nb_options] = "tsize"; ++ option_value[nb_options] = tsize; ++ nb_options++; ++ } else if (strcasecmp(key, "blksize") == 0) { ++ int blksize = atoi(value); ++ ++ /* Accept blksize up to our maximum size */ ++ if (blksize > 0) { ++ spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); ++ option_name[nb_options] = "blksize"; ++ option_value[nb_options] = spt->block_size; ++ nb_options++; ++ } ++ } ++ } ++ ++ if (nb_options > 0) { ++ assert(nb_options <= G_N_ELEMENTS(option_name)); ++ tftp_send_oack(spt, option_name, option_value, nb_options, tp); ++ return; ++ } ++ ++ spt->block_nr = 0; ++ tftp_send_next_block(spt, tp); ++} ++ ++static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, tp); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_send_next_block(&slirp->tftp_sessions[s], tp); ++} ++ ++static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, tp); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++} ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) ++{ ++ struct tftp_t *tp = (struct tftp_t *)m->m_data; ++ ++ switch (ntohs(tp->tp_op)) { ++ case TFTP_RRQ: ++ tftp_handle_rrq(m->slirp, srcsas, tp, m->m_len); ++ break; ++ ++ case TFTP_ACK: ++ tftp_handle_ack(m->slirp, srcsas, tp, m->m_len); ++ break; ++ ++ case TFTP_ERROR: ++ tftp_handle_error(m->slirp, srcsas, tp, m->m_len); ++ break; ++ } ++} +diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h +new file mode 100644 +index 0000000..c47bb43 +--- /dev/null ++++ b/slirp/src/tftp.h +@@ -0,0 +1,52 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* tftp defines */ ++ ++#ifndef SLIRP_TFTP_H ++#define SLIRP_TFTP_H ++ ++#define TFTP_SESSIONS_MAX 20 ++ ++#define TFTP_SERVER 69 ++ ++#define TFTP_RRQ 1 ++#define TFTP_WRQ 2 ++#define TFTP_DATA 3 ++#define TFTP_ACK 4 ++#define TFTP_ERROR 5 ++#define TFTP_OACK 6 ++ ++#define TFTP_FILENAME_MAX 512 ++#define TFTP_BLOCKSIZE_MAX 1428 ++ ++struct tftp_t { ++ struct udphdr udp; ++ uint16_t tp_op; ++ union { ++ struct { ++ uint16_t tp_block_nr; ++ uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; ++ } tp_data; ++ struct { ++ uint16_t tp_error_code; ++ uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; ++ } tp_error; ++ char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; ++ } x; ++} __attribute__((packed)); ++ ++struct tftp_session { ++ Slirp *slirp; ++ char *filename; ++ int fd; ++ uint16_t block_size; ++ ++ struct sockaddr_storage client_addr; ++ uint16_t client_port; ++ uint32_t block_nr; ++ ++ int timestamp; ++}; ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/udp.c b/slirp/src/udp.c +new file mode 100644 +index 0000000..6bde20f +--- /dev/null ++++ b/slirp/src/udp.c +@@ -0,0 +1,361 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 ++ * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ * ++ * Please read the file COPYRIGHT for the ++ * terms and conditions of the copyright. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static uint8_t udp_tos(struct socket *so); ++ ++void udp_init(Slirp *slirp) ++{ ++ slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; ++ slirp->udp_last_so = &slirp->udb; ++} ++ ++void udp_cleanup(Slirp *slirp) ++{ ++ while (slirp->udb.so_next != &slirp->udb) { ++ udp_detach(slirp->udb.so_next); ++ } ++} ++ ++/* m->m_data points at ip packet header ++ * m->m_len length ip packet ++ * ip->ip_len length data (IPDU) ++ */ ++void udp_input(register struct mbuf *m, int iphlen) ++{ ++ Slirp *slirp = m->slirp; ++ register struct ip *ip; ++ register struct udphdr *uh; ++ int len; ++ struct ip save_ip; ++ struct socket *so; ++ struct sockaddr_storage lhost; ++ struct sockaddr_in *lhost4; ++ ++ DEBUG_CALL("udp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("iphlen = %d", iphlen); ++ ++ /* ++ * Strip IP options, if any; should skip this, ++ * make available to user, and use on returned packets, ++ * but we don't yet have a way to check the checksum ++ * with options still present. ++ */ ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ ++ /* ++ * Get IP and UDP header together in first mbuf. ++ */ ++ ip = mtod(m, struct ip *); ++ uh = (struct udphdr *)((char *)ip + iphlen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ if (ip->ip_len != len) { ++ if (len > ip->ip_len) { ++ goto bad; ++ } ++ m_adj(m, len - ip->ip_len); ++ ip->ip_len = len; ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; /* tcp_input subtracts this */ ++ ++ /* ++ * Checksum extended UDP header and data. ++ */ ++ if (uh->uh_sum) { ++ memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ((struct ipovly *)ip)->ih_x1 = 0; ++ ((struct ipovly *)ip)->ih_len = uh->uh_ulen; ++ if (cksum(m, len + sizeof(struct ip))) { ++ goto bad; ++ } ++ } ++ ++ lhost.ss_family = AF_INET; ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ip->ip_src; ++ lhost4->sin_port = uh->uh_sport; ++ ++ /* ++ * handle DHCP/BOOTP ++ */ ++ if (ntohs(uh->uh_dport) == BOOTP_SERVER && ++ (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == 0xffffffff)) { ++ bootp_input(m); ++ goto bad; ++ } ++ ++ /* ++ * handle TFTP ++ */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ /* ++ * Locate pcb for datagram. ++ */ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); ++ ++ if (so == NULL) { ++ /* ++ * If there's no socket for this packet, ++ * create one ++ */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* ++ * Setup fields ++ */ ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = uh->uh_sport; ++ ++ if ((so->so_iptos = udp_tos(so)) == 0) ++ so->so_iptos = ip->ip_tos; ++ ++ /* ++ * XXXXX Here, check if it's in udpexec_list, ++ * and if it is, do the fork_exec() etc. ++ */ ++ } ++ ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; /* XXX */ ++ so->so_fport = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; /* ICMP backup */ ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos) ++{ ++ register struct udpiphdr *ui; ++ int error = 0; ++ ++ DEBUG_CALL("udp_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); ++ DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); ++ ++ /* ++ * Adjust for header ++ */ ++ m->m_data -= sizeof(struct udpiphdr); ++ m->m_len += sizeof(struct udpiphdr); ++ ++ /* ++ * Fill in mbuf with extended UDP header ++ * and addresses and length put into network format. ++ */ ++ ui = mtod(m, struct udpiphdr *); ++ memset(&ui->ui_i.ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ui->ui_x1 = 0; ++ ui->ui_pr = IPPROTO_UDP; ++ ui->ui_len = htons(m->m_len - sizeof(struct ip)); ++ /* XXXXX Check for from-one-location sockets, or from-any-location sockets ++ */ ++ ui->ui_src = saddr->sin_addr; ++ ui->ui_dst = daddr->sin_addr; ++ ui->ui_sport = saddr->sin_port; ++ ui->ui_dport = daddr->sin_port; ++ ui->ui_ulen = ui->ui_len; ++ ++ /* ++ * Stuff checksum and output datagram. ++ */ ++ ui->ui_sum = 0; ++ if ((ui->ui_sum = cksum(m, m->m_len)) == 0) ++ ui->ui_sum = 0xffff; ++ ((struct ip *)ui)->ip_len = m->m_len; ++ ++ ((struct ip *)ui)->ip_ttl = IPDEFTTL; ++ ((struct ip *)ui)->ip_tos = iptos; ++ ++ error = ip_output(so, m); ++ ++ return (error); ++} ++ ++int udp_attach(struct socket *so, unsigned short af) ++{ ++ so->s = slirp_socket(af, SOCK_DGRAM, 0); ++ if (so->s != -1) { ++ if (slirp_bind_outbound(so, af) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &so->slirp->udb); ++ } ++ return (so->s); ++} ++ ++void udp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++static const struct tos_t udptos[] = { { 0, 53, IPTOS_LOWDELAY, 0 }, /* DNS */ ++ { 0, 0, 0, 0 } }; ++ ++static uint8_t udp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (udptos[i].tos) { ++ if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || ++ (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { ++ if (so->slirp->enable_emu) ++ so->so_emu = udptos[i].emu; ++ return udptos[i].tos; ++ } ++ i++; ++ } ++ ++ return 0; ++} ++ ++struct socket *udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ /* TODO: IPv6 */ ++ struct sockaddr_in addr; ++ struct socket *so; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ ++ so = socreate(slirp); ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, 0); ++ if (so->s < 0) { ++ sofree(so); ++ return NULL; ++ } ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &slirp->udb); ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr.s_addr = haddr; ++ addr.sin_port = hport; ++ ++ if (bind(so->s, (struct sockaddr *)&addr, addrlen) < 0) { ++ udp_detach(so); ++ return NULL; ++ } ++ slirp_socket_set_fast_reuse(so->s); ++ ++ getsockname(so->s, (struct sockaddr *)&addr, &addrlen); ++ so->fhost.sin = addr; ++ sotranslate_accept(so); ++ so->so_lfamily = AF_INET; ++ so->so_lport = lport; ++ so->so_laddr.s_addr = laddr; ++ if (flags != SS_FACCEPTONCE) ++ so->so_expire = 0; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED | flags; ++ ++ return so; ++} +diff --git a/slirp/src/udp.h b/slirp/src/udp.h +new file mode 100644 +index 0000000..c3b83fd +--- /dev/null ++++ b/slirp/src/udp.h +@@ -0,0 +1,90 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp.h 8.1 (Berkeley) 6/10/93 ++ * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp ++ */ ++ ++#ifndef UDP_H ++#define UDP_H ++ ++#define UDP_TTL 0x60 ++#define UDP_UDPDATALEN 16192 ++ ++/* ++ * Udp protocol header. ++ * Per RFC 768, September, 1981. ++ */ ++struct udphdr { ++ uint16_t uh_sport; /* source port */ ++ uint16_t uh_dport; /* destination port */ ++ int16_t uh_ulen; /* udp length */ ++ uint16_t uh_sum; /* udp checksum */ ++}; ++ ++/* ++ * UDP kernel structures and variables. ++ */ ++struct udpiphdr { ++ struct ipovly ui_i; /* overlaid ip structure */ ++ struct udphdr ui_u; /* udp header */ ++}; ++#define ui_mbuf ui_i.ih_mbuf.mptr ++#define ui_x1 ui_i.ih_x1 ++#define ui_pr ui_i.ih_pr ++#define ui_len ui_i.ih_len ++#define ui_src ui_i.ih_src ++#define ui_dst ui_i.ih_dst ++#define ui_sport ui_u.uh_sport ++#define ui_dport ui_u.uh_dport ++#define ui_ulen ui_u.uh_ulen ++#define ui_sum ui_u.uh_sum ++ ++/* ++ * Names for UDP sysctl objects ++ */ ++#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ ++#define UDPCTL_MAXID 2 ++ ++struct mbuf; ++ ++void udp_init(Slirp *); ++void udp_cleanup(Slirp *); ++void udp_input(register struct mbuf *, int); ++int udp_attach(struct socket *, unsigned short af); ++void udp_detach(struct socket *); ++struct socket *udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos); ++ ++void udp6_input(register struct mbuf *); ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr); ++ ++#endif +diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c +new file mode 100644 +index 0000000..6f9486b +--- /dev/null ++++ b/slirp/src/udp6.c +@@ -0,0 +1,173 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron ++ */ ++ ++#include "slirp.h" ++#include "udp.h" ++#include "dhcpv6.h" ++ ++void udp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ struct ip6 *ip, save_ip; ++ struct udphdr *uh; ++ int iphlen = sizeof(struct ip6); ++ int len; ++ struct socket *so; ++ struct sockaddr_in6 lhost; ++ ++ DEBUG_CALL("udp6_input"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip6 *); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ uh = mtod(m, struct udphdr *); ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ ++ if (ip6_cksum(m)) { ++ goto bad; ++ } ++ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ if (ntohs(ip->ip_pl) != len) { ++ if (len > ntohs(ip->ip_pl)) { ++ goto bad; ++ } ++ m_adj(m, len - ntohs(ip->ip_pl)); ++ ip->ip_pl = htons(len); ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ ++ /* Locate pcb for datagram. */ ++ lhost.sin6_family = AF_INET6; ++ lhost.sin6_addr = ip->ip_src; ++ lhost.sin6_port = uh->uh_sport; ++ ++ /* handle DHCPv6 */ ++ if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && ++ (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || ++ in6_dhcp_multicast(&ip->ip_dst))) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ dhcpv6_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ /* handle TFTP */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input((struct sockaddr_storage *)&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, ++ (struct sockaddr_storage *)&lhost, NULL); ++ ++ if (so == NULL) { ++ /* If there's no socket for this packet, create one. */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET6) == -1) { ++ DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* Setup fields */ ++ so->so_lfamily = AF_INET6; ++ so->so_laddr6 = ip->ip_src; ++ so->so_lport6 = uh->uh_sport; ++ } ++ ++ so->so_ffamily = AF_INET6; ++ so->so_faddr6 = ip->ip_dst; /* XXX */ ++ so->so_fport6 = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr) ++{ ++ struct ip6 *ip; ++ struct udphdr *uh; ++ ++ DEBUG_CALL("udp6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* adjust for header */ ++ m->m_data -= sizeof(struct udphdr); ++ m->m_len += sizeof(struct udphdr); ++ uh = mtod(m, struct udphdr *); ++ m->m_data -= sizeof(struct ip6); ++ m->m_len += sizeof(struct ip6); ++ ip = mtod(m, struct ip6 *); ++ ++ /* Build IP header */ ++ ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); ++ ip->ip_nh = IPPROTO_UDP; ++ ip->ip_src = saddr->sin6_addr; ++ ip->ip_dst = daddr->sin6_addr; ++ ++ /* Build UDP header */ ++ uh->uh_sport = saddr->sin6_port; ++ uh->uh_dport = daddr->sin6_port; ++ uh->uh_ulen = ip->ip_pl; ++ uh->uh_sum = 0; ++ uh->uh_sum = ip6_cksum(m); ++ if (uh->uh_sum == 0) { ++ uh->uh_sum = 0xffff; ++ } ++ ++ return ip6_output(so, m, 0); ++} +diff --git a/slirp/src/util.c b/slirp/src/util.c +new file mode 100644 +index 0000000..570c53f +--- /dev/null ++++ b/slirp/src/util.c +@@ -0,0 +1,428 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * util.c (mostly based on QEMU os-win32.c) ++ * ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2016 Red Hat, Inc. ++ * ++ * QEMU library functions for win32 which are shared between QEMU and ++ * the QEMU tools. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "util.h" ++ ++#include ++#include ++#include ++ ++#if defined(_WIN32) ++int slirp_inet_aton(const char *cp, struct in_addr *ia) ++{ ++ uint32_t addr = inet_addr(cp); ++ if (addr == 0xffffffff) { ++ return 0; ++ } ++ ia->s_addr = addr; ++ return 1; ++} ++#endif ++ ++void slirp_set_nonblock(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFL); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFL, f | O_NONBLOCK); ++ assert(f != -1); ++#else ++ unsigned long opt = 1; ++ ioctlsocket(fd, FIONBIO, &opt); ++#endif ++} ++ ++static void slirp_set_cloexec(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFD); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); ++ assert(f != -1); ++#endif ++} ++ ++/* ++ * Opens a socket with FD_CLOEXEC set ++ */ ++int slirp_socket(int domain, int type, int protocol) ++{ ++ int ret; ++ ++#ifdef SOCK_CLOEXEC ++ ret = socket(domain, type | SOCK_CLOEXEC, protocol); ++ if (ret != -1 || errno != EINVAL) { ++ return ret; ++ } ++#endif ++ ret = socket(domain, type, protocol); ++ if (ret >= 0) { ++ slirp_set_cloexec(ret); ++ } ++ ++ return ret; ++} ++ ++#ifdef _WIN32 ++static int socket_error(void) ++{ ++ switch (WSAGetLastError()) { ++ case 0: ++ return 0; ++ case WSAEINTR: ++ return EINTR; ++ case WSAEINVAL: ++ return EINVAL; ++ case WSA_INVALID_HANDLE: ++ return EBADF; ++ case WSA_NOT_ENOUGH_MEMORY: ++ return ENOMEM; ++ case WSA_INVALID_PARAMETER: ++ return EINVAL; ++ case WSAENAMETOOLONG: ++ return ENAMETOOLONG; ++ case WSAENOTEMPTY: ++ return ENOTEMPTY; ++ case WSAEWOULDBLOCK: ++ /* not using EWOULDBLOCK as we don't want code to have ++ * to check both EWOULDBLOCK and EAGAIN */ ++ return EAGAIN; ++ case WSAEINPROGRESS: ++ return EINPROGRESS; ++ case WSAEALREADY: ++ return EALREADY; ++ case WSAENOTSOCK: ++ return ENOTSOCK; ++ case WSAEDESTADDRREQ: ++ return EDESTADDRREQ; ++ case WSAEMSGSIZE: ++ return EMSGSIZE; ++ case WSAEPROTOTYPE: ++ return EPROTOTYPE; ++ case WSAENOPROTOOPT: ++ return ENOPROTOOPT; ++ case WSAEPROTONOSUPPORT: ++ return EPROTONOSUPPORT; ++ case WSAEOPNOTSUPP: ++ return EOPNOTSUPP; ++ case WSAEAFNOSUPPORT: ++ return EAFNOSUPPORT; ++ case WSAEADDRINUSE: ++ return EADDRINUSE; ++ case WSAEADDRNOTAVAIL: ++ return EADDRNOTAVAIL; ++ case WSAENETDOWN: ++ return ENETDOWN; ++ case WSAENETUNREACH: ++ return ENETUNREACH; ++ case WSAENETRESET: ++ return ENETRESET; ++ case WSAECONNABORTED: ++ return ECONNABORTED; ++ case WSAECONNRESET: ++ return ECONNRESET; ++ case WSAENOBUFS: ++ return ENOBUFS; ++ case WSAEISCONN: ++ return EISCONN; ++ case WSAENOTCONN: ++ return ENOTCONN; ++ case WSAETIMEDOUT: ++ return ETIMEDOUT; ++ case WSAECONNREFUSED: ++ return ECONNREFUSED; ++ case WSAELOOP: ++ return ELOOP; ++ case WSAEHOSTUNREACH: ++ return EHOSTUNREACH; ++ default: ++ return EIO; ++ } ++} ++ ++#undef ioctlsocket ++int slirp_ioctlsocket_wrap(int fd, int req, void *val) ++{ ++ int ret; ++ ret = ioctlsocket(fd, req, val); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef closesocket ++int slirp_closesocket_wrap(int fd) ++{ ++ int ret; ++ ret = closesocket(fd); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef connect ++int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = connect(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef listen ++int slirp_listen_wrap(int sockfd, int backlog) ++{ ++ int ret; ++ ret = listen(sockfd, backlog); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef bind ++int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = bind(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef socket ++int slirp_socket_wrap(int domain, int type, int protocol) ++{ ++ int ret; ++ ret = socket(domain, type, protocol); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef accept ++int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = accept(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef shutdown ++int slirp_shutdown_wrap(int sockfd, int how) ++{ ++ int ret; ++ ret = shutdown(sockfd, how); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockopt ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen) ++{ ++ int ret; ++ ret = getsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef setsockopt ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen) ++{ ++ int ret; ++ ret = setsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getpeername ++int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getpeername(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockname ++int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getsockname(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef send ++ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = send(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef sendto ++ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, ++ const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = sendto(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recv ++ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = recv(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recvfrom ++ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, ++ struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++#endif /* WIN32 */ ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str) ++{ ++ int c; ++ char *q = buf; ++ ++ if (buf_size <= 0) ++ return; ++ ++ for (;;) { ++ c = *str++; ++ if (c == 0 || q >= buf + buf_size - 1) ++ break; ++ *q++ = c; ++ } ++ *q = '\0'; ++} ++ ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = g_vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("g_vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv > size) { ++ g_critical("slirp_fmt() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt0() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +new file mode 100644 +index 0000000..d67b3d0 +--- /dev/null ++++ b/slirp/src/util.h +@@ -0,0 +1,189 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2019 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#ifndef UTIL_H_ ++#define UTIL_H_ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#else ++#include ++#include ++#include ++#endif ++ ++#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) ++#define SLIRP_PACKED __attribute__((gcc_struct, packed)) ++#else ++#define SLIRP_PACKED __attribute__((packed)) ++#endif ++ ++#ifndef DIV_ROUND_UP ++#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) ++#endif ++ ++#ifndef container_of ++#define container_of(ptr, type, member) \ ++ __extension__({ \ ++ void *__mptr = (void *)(ptr); \ ++ ((type *)(__mptr - offsetof(type, member))); \ ++ }) ++#endif ++ ++#ifndef G_SIZEOF_MEMBER ++#define G_SIZEOF_MEMBER(type, member) sizeof(((type *)0)->member) ++#endif ++ ++#if defined(_WIN32) /* CONFIG_IOVEC */ ++#if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ ++struct iovec { ++ void *iov_base; ++ size_t iov_len; ++}; ++#endif ++#else ++#include ++#endif ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++#define SCALE_MS 1000000 ++ ++#define ETH_ALEN 6 ++#define ETH_HLEN 14 ++#define ETH_P_IP (0x0800) /* Internet Protocol packet */ ++#define ETH_P_ARP (0x0806) /* Address Resolution packet */ ++#define ETH_P_IPV6 (0x86dd) ++#define ETH_P_VLAN (0x8100) ++#define ETH_P_DVLAN (0x88a8) ++#define ETH_P_NCSI (0x88f8) ++#define ETH_P_UNKNOWN (0xffff) ++ ++/* FIXME: remove me when made standalone */ ++#ifdef _WIN32 ++#undef accept ++#undef bind ++#undef closesocket ++#undef connect ++#undef getpeername ++#undef getsockname ++#undef getsockopt ++#undef ioctlsocket ++#undef listen ++#undef recv ++#undef recvfrom ++#undef send ++#undef sendto ++#undef setsockopt ++#undef shutdown ++#undef socket ++#endif ++ ++#ifdef _WIN32 ++#define connect slirp_connect_wrap ++int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define listen slirp_listen_wrap ++int slirp_listen_wrap(int fd, int backlog); ++#define bind slirp_bind_wrap ++int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define socket slirp_socket_wrap ++int slirp_socket_wrap(int domain, int type, int protocol); ++#define accept slirp_accept_wrap ++int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define shutdown slirp_shutdown_wrap ++int slirp_shutdown_wrap(int fd, int how); ++#define getpeername slirp_getpeername_wrap ++int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define getsockname slirp_getsockname_wrap ++int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define send slirp_send_wrap ++ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); ++#define sendto slirp_sendto_wrap ++ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, ++ const struct sockaddr *dest_addr, int addrlen); ++#define recv slirp_recv_wrap ++ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); ++#define recvfrom slirp_recvfrom_wrap ++ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, ++ struct sockaddr *src_addr, int *addrlen); ++#define closesocket slirp_closesocket_wrap ++int slirp_closesocket_wrap(int fd); ++#define ioctlsocket slirp_ioctlsocket_wrap ++int slirp_ioctlsocket_wrap(int fd, int req, void *val); ++#define getsockopt slirp_getsockopt_wrap ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen); ++#define setsockopt slirp_setsockopt_wrap ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen); ++#define inet_aton slirp_inet_aton ++int slirp_inet_aton(const char *cp, struct in_addr *ia); ++#else ++#define closesocket(s) close(s) ++#define ioctlsocket(s, r, v) ioctl(s, r, v) ++#endif ++ ++int slirp_socket(int domain, int type, int protocol); ++void slirp_set_nonblock(int fd); ++ ++static inline int slirp_socket_set_nodelay(int fd) ++{ ++ int v = 1; ++ return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_fast_reuse(int fd) ++{ ++#ifndef _WIN32 ++ int v = 1; ++ return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); ++#else ++ /* Enabling the reuse of an endpoint that was used by a socket still in ++ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows ++ * fast reuse is the default and SO_REUSEADDR does strange things. So we ++ * don't have to do anything here. More info can be found at: ++ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ ++ return 0; ++#endif ++} ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str); ++ ++int slirp_fmt(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++int slirp_fmt0(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++ ++#endif +diff --git a/slirp/src/version.c b/slirp/src/version.c +new file mode 100644 +index 0000000..93e0be9 +--- /dev/null ++++ b/slirp/src/version.c +@@ -0,0 +1,8 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#include "libslirp.h" ++ ++const char * ++slirp_version_string(void) ++{ ++ return SLIRP_VERSION_STRING; ++} +diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c +new file mode 100644 +index 0000000..68cc172 +--- /dev/null ++++ b/slirp/src/vmstate.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * VMState interpreter ++ * ++ * Copyright (c) 2009-2018 Red Hat Inc ++ * ++ * Authors: ++ * Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "stream.h" ++#include "vmstate.h" ++ ++static int get_nullptr(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { ++ return 0; ++ } ++ g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); ++ return -EINVAL; ++} ++ ++static int put_nullptr(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++ ++{ ++ if (pv == NULL) { ++ slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); ++ return 0; ++ } ++ g_warning("vmstate: put_nullptr must be called with pv == NULL"); ++ return -EINVAL; ++} ++ ++const VMStateInfo slirp_vmstate_info_nullptr = { ++ .name = "uint64", ++ .get = get_nullptr, ++ .put = put_nullptr, ++}; ++ ++/* 8 bit unsigned int */ ++ ++static int get_uint8(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ *v = slirp_istream_read_u8(f); ++ return 0; ++} ++ ++static int put_uint8(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ slirp_ostream_write_u8(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint8 = { ++ .name = "uint8", ++ .get = get_uint8, ++ .put = put_uint8, ++}; ++ ++/* 16 bit unsigned int */ ++ ++static int get_uint16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ *v = slirp_istream_read_u16(f); ++ return 0; ++} ++ ++static int put_uint16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ slirp_ostream_write_u16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint16 = { ++ .name = "uint16", ++ .get = get_uint16, ++ .put = put_uint16, ++}; ++ ++/* 32 bit unsigned int */ ++ ++static int get_uint32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ *v = slirp_istream_read_u32(f); ++ return 0; ++} ++ ++static int put_uint32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ slirp_ostream_write_u32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint32 = { ++ .name = "uint32", ++ .get = get_uint32, ++ .put = put_uint32, ++}; ++ ++/* 16 bit int */ ++ ++static int get_int16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ *v = slirp_istream_read_i16(f); ++ return 0; ++} ++ ++static int put_int16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ slirp_ostream_write_i16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int16 = { ++ .name = "int16", ++ .get = get_int16, ++ .put = put_int16, ++}; ++ ++/* 32 bit int */ ++ ++static int get_int32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ *v = slirp_istream_read_i32(f); ++ return 0; ++} ++ ++static int put_int32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ slirp_ostream_write_i32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int32 = { ++ .name = "int32", ++ .get = get_int32, ++ .put = put_int32, ++}; ++ ++/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate ++ * a temporary buffer and the pre_load/pre_save methods in the child vmsd ++ * copy stuff from the parent into the child and do calculations to fill ++ * in fields that don't really exist in the parent but need to be in the ++ * stream. ++ */ ++static int get_tmp(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int ret; ++ const VMStateDescription *vmsd = field->vmsd; ++ int version_id = field->version_id; ++ void *tmp = g_malloc(size); ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); ++ g_free(tmp); ++ return ret; ++} ++ ++static int put_tmp(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ const VMStateDescription *vmsd = field->vmsd; ++ void *tmp = g_malloc(size); ++ int ret; ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_save_state(f, vmsd, tmp); ++ g_free(tmp); ++ ++ return ret; ++} ++ ++const VMStateInfo slirp_vmstate_info_tmp = { ++ .name = "tmp", ++ .get = get_tmp, ++ .put = put_tmp, ++}; ++ ++/* uint8_t buffers */ ++ ++static int get_buffer(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_istream_read(f, pv, size); ++ return 0; ++} ++ ++static int put_buffer(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_ostream_write(f, pv, size); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_buffer = { ++ .name = "buffer", ++ .get = get_buffer, ++ .put = put_buffer, ++}; ++ ++static int vmstate_n_elems(void *opaque, const VMStateField *field) ++{ ++ int n_elems = 1; ++ ++ if (field->flags & VMS_ARRAY) { ++ n_elems = field->num; ++ } else if (field->flags & VMS_VARRAY_INT32) { ++ n_elems = *(int32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT32) { ++ n_elems = *(uint32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT16) { ++ n_elems = *(uint16_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT8) { ++ n_elems = *(uint8_t *)(opaque + field->num_offset); ++ } ++ ++ if (field->flags & VMS_MULTIPLY_ELEMENTS) { ++ n_elems *= field->num; ++ } ++ ++ return n_elems; ++} ++ ++static int vmstate_size(void *opaque, const VMStateField *field) ++{ ++ int size = field->size; ++ ++ if (field->flags & VMS_VBUFFER) { ++ size = *(int32_t *)(opaque + field->size_offset); ++ if (field->flags & VMS_MULTIPLY) { ++ size *= field->size; ++ } ++ } ++ ++ return size; ++} ++ ++static int vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ int ret = 0; ++ const VMStateField *field = vmsd->fields; ++ ++ if (vmsd->pre_save) { ++ ret = vmsd->pre_save(opaque); ++ if (ret) { ++ g_warning("pre-save failed: %s", vmsd->name); ++ return ret; ++ } ++ } ++ ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ assert(curr_elem); ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer write placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = vmstate_save_state_v(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->put(f, curr_elem, size, field); ++ } ++ if (ret) { ++ g_warning("Save of field %s/%s failed", vmsd->name, ++ field->name); ++ return ret; ++ } ++ } ++ } else { ++ if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Output state validation failed: %s/%s", vmsd->name, ++ field->name); ++ assert(!(field->flags & VMS_MUST_EXIST)); ++ } ++ } ++ field++; ++ } ++ ++ return 0; ++} ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque) ++{ ++ return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); ++} ++ ++static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) ++{ ++ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { ++ size_t size = vmstate_size(opaque, field); ++ size *= vmstate_n_elems(opaque, field); ++ if (size) { ++ *(void **)ptr = g_malloc(size); ++ } ++ } ++} ++ ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ VMStateField *field = vmsd->fields; ++ int ret = 0; ++ ++ if (version_id > vmsd->version_id) { ++ g_warning("%s: incoming version_id %d is too new " ++ "for local version_id %d", ++ vmsd->name, version_id, vmsd->version_id); ++ return -EINVAL; ++ } ++ if (vmsd->pre_load) { ++ int ret = vmsd->pre_load(opaque); ++ if (ret) { ++ return ret; ++ } ++ } ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ vmstate_handle_alloc(first_elem, field, opaque); ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer check placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->vmsd->version_id); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->get(f, curr_elem, size, field); ++ } ++ if (ret < 0) { ++ g_warning("Failed to load %s:%s", vmsd->name, field->name); ++ return ret; ++ } ++ } ++ } else if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Input validation failed: %s/%s", vmsd->name, ++ field->name); ++ return -1; ++ } ++ field++; ++ } ++ if (vmsd->post_load) { ++ ret = vmsd->post_load(opaque, version_id); ++ } ++ return ret; ++} +diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h +new file mode 100644 +index 0000000..94c6a4b +--- /dev/null ++++ b/slirp/src/vmstate.h +@@ -0,0 +1,391 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * QEMU migration/snapshot declarations ++ * ++ * Copyright (c) 2009-2011 Red Hat, Inc. ++ * ++ * Original author: Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef VMSTATE_H_ ++#define VMSTATE_H_ ++ ++#include ++#include ++#include ++#include "slirp.h" ++#include "stream.h" ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++typedef struct VMStateInfo VMStateInfo; ++typedef struct VMStateDescription VMStateDescription; ++typedef struct VMStateField VMStateField; ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque); ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id); ++ ++/* VMStateInfo allows customized migration of objects that don't fit in ++ * any category in VMStateFlags. Additional information is always passed ++ * into get and put in terms of field and vmdesc parameters. However ++ * these two parameters should only be used in cases when customized ++ * handling is needed, such as QTAILQ. For primitive data types such as ++ * integer, field and vmdesc parameters should be ignored inside get/put. ++ */ ++struct VMStateInfo { ++ const char *name; ++ int (*get)(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field); ++ int (*put)(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field); ++}; ++ ++enum VMStateFlags { ++ /* Ignored */ ++ VMS_SINGLE = 0x001, ++ ++ /* The struct member at opaque + VMStateField.offset is a pointer ++ * to the actual field (e.g. struct a { uint8_t *b; ++ * }). Dereference the pointer before using it as basis for ++ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not ++ * affect the meaning of VMStateField.num_offset or ++ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for ++ * those. */ ++ VMS_POINTER = 0x002, ++ ++ /* The field is an array of fixed size. VMStateField.num contains ++ * the number of entries in the array. The size of each entry is ++ * given by VMStateField.size and / or opaque + ++ * VMStateField.size_offset; see VMS_VBUFFER and ++ * VMS_MULTIPLY. Each array entry will be processed individually ++ * (VMStateField.info.get()/put() if VMS_STRUCT is not set, ++ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not ++ * be combined with VMS_VARRAY*. */ ++ VMS_ARRAY = 0x004, ++ ++ /* The field is itself a struct, containing one or more ++ * fields. Recurse into VMStateField.vmsd. Most useful in ++ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each ++ * array entry. */ ++ VMS_STRUCT = 0x008, ++ ++ /* The field is an array of variable size. The int32_t at opaque + ++ * VMStateField.num_offset contains the number of entries in the ++ * array. See the VMS_ARRAY description regarding array handling ++ * in general. May not be combined with VMS_ARRAY or any other ++ * VMS_VARRAY*. */ ++ VMS_VARRAY_INT32 = 0x010, ++ ++ /* Ignored */ ++ VMS_BUFFER = 0x020, ++ ++ /* The field is a (fixed-size or variable-size) array of pointers ++ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry ++ * before using it. Note: Does not imply any one of VMS_ARRAY / ++ * VMS_VARRAY*; these need to be set explicitly. */ ++ VMS_ARRAY_OF_POINTER = 0x040, ++ ++ /* The field is an array of variable size. The uint16_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT16 = 0x080, ++ ++ /* The size of the individual entries (a single array entry if ++ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if ++ * neither is set) is variable (i.e. not known at compile-time), ++ * but the same for all entries. Use the int32_t at opaque + ++ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine ++ * the size of each (and every) entry. */ ++ VMS_VBUFFER = 0x100, ++ ++ /* Multiply the entry size given by the int32_t at opaque + ++ * VMStateField.size_offset (see VMS_VBUFFER description) with ++ * VMStateField.size to determine the number of bytes to be ++ * allocated. Only valid in combination with VMS_VBUFFER. */ ++ VMS_MULTIPLY = 0x200, ++ ++ /* The field is an array of variable size. The uint8_t at opaque + ++ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT8 = 0x400, ++ ++ /* The field is an array of variable size. The uint32_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT32 = 0x800, ++ ++ /* Fail loading the serialised VM state if this field is missing ++ * from the input. */ ++ VMS_MUST_EXIST = 0x1000, ++ ++ /* When loading serialised VM state, allocate memory for the ++ * (entire) field. Only valid in combination with ++ * VMS_POINTER. Note: Not all combinations with other flags are ++ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't ++ * cause the individual entries to be allocated. */ ++ VMS_ALLOC = 0x2000, ++ ++ /* Multiply the number of entries given by the integer at opaque + ++ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num ++ * to determine the number of entries in the array. Only valid in ++ * combination with one of VMS_VARRAY*. */ ++ VMS_MULTIPLY_ELEMENTS = 0x4000, ++ ++ /* A structure field that is like VMS_STRUCT, but uses ++ * VMStateField.struct_version_id to tell which version of the ++ * structure we are referencing to use. */ ++ VMS_VSTRUCT = 0x8000, ++}; ++ ++struct VMStateField { ++ const char *name; ++ size_t offset; ++ size_t size; ++ size_t start; ++ int num; ++ size_t num_offset; ++ size_t size_offset; ++ const VMStateInfo *info; ++ enum VMStateFlags flags; ++ const VMStateDescription *vmsd; ++ int version_id; ++ int struct_version_id; ++ bool (*field_exists)(void *opaque, int version_id); ++}; ++ ++struct VMStateDescription { ++ const char *name; ++ int version_id; ++ int (*pre_load)(void *opaque); ++ int (*post_load)(void *opaque, int version_id); ++ int (*pre_save)(void *opaque); ++ VMStateField *fields; ++}; ++ ++ ++extern const VMStateInfo slirp_vmstate_info_int16; ++extern const VMStateInfo slirp_vmstate_info_int32; ++extern const VMStateInfo slirp_vmstate_info_uint8; ++extern const VMStateInfo slirp_vmstate_info_uint16; ++extern const VMStateInfo slirp_vmstate_info_uint32; ++ ++/** Put this in the stream when migrating a null pointer.*/ ++#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ ++extern const VMStateInfo slirp_vmstate_info_nullptr; ++ ++extern const VMStateInfo slirp_vmstate_info_buffer; ++extern const VMStateInfo slirp_vmstate_info_tmp; ++ ++#define type_check_array(t1, t2, n) ((t1(*)[n])0 - (t2 *)0) ++#define type_check_pointer(t1, t2) ((t1 **)0 - (t2 *)0) ++#define typeof_field(type, field) typeof(((type *)0)->field) ++#define type_check(t1, t2) ((t1 *)0 - (t2 *)0) ++ ++#define vmstate_offset_value(_state, _field, _type) \ ++ (offsetof(_state, _field) + type_check(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_pointer(_state, _field, _type) \ ++ (offsetof(_state, _field) + \ ++ type_check_pointer(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_array(_state, _field, _type, _num) \ ++ (offsetof(_state, _field) + \ ++ type_check_array(_type, typeof_field(_state, _field), _num)) ++ ++#define vmstate_offset_buffer(_state, _field) \ ++ vmstate_offset_array(_state, _field, uint8_t, \ ++ sizeof(typeof_field(_state, _field))) ++ ++/* In the macros below, if there is a _version, that means the macro's ++ * field will be processed only if the version being received is >= ++ * the _version specified. In general, if you add a new field, you ++ * would increment the structure's version and put that version ++ * number into the new field so it would only be processed with the ++ * new version. ++ * ++ * In particular, for VMSTATE_STRUCT() and friends the _version does ++ * *NOT* pick the version of the sub-structure. It works just as ++ * specified above. The version of the top-level structure received ++ * is passed down to all sub-structures. This means that the ++ * sub-structures must have version that are compatible with all the ++ * structures that use them. ++ * ++ * If you want to specify the version of the sub-structure, use ++ * VMSTATE_VSTRUCT(), which allows the specific sub-structure version ++ * to be directly specified. ++ */ ++ ++#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = sizeof(_type), .info = &(_info), \ ++ .flags = VMS_SINGLE, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), .num = (_num), \ ++ .info = &(_info), .size = sizeof(_type), .flags = VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .vmsd = &(_vmsd), .size = sizeof(_type *), \ ++ .flags = VMS_STRUCT | VMS_POINTER, \ ++ .offset = vmstate_offset_pointer(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, \ ++ _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .num = (_num), .field_exists = (_test), \ ++ .version_id = (_version), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT | VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = (_size - _start), \ ++ .info = &slirp_vmstate_info_buffer, .flags = VMS_BUFFER, \ ++ .offset = vmstate_offset_buffer(_state, _field) + _start, \ ++ } ++ ++#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), \ ++ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t), \ ++ .info = &slirp_vmstate_info_buffer, \ ++ .flags = VMS_VBUFFER | VMS_POINTER, \ ++ .offset = offsetof(_state, _field), \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_STRUCT(x) \ ++ struct { \ ++ int : (x) ? -1 : 1; \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_ZERO(x) \ ++ (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) ++ ++/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state ++ * and execute the vmsd on the temporary. Note that we're working with ++ * the whole of _state here, not a field within it. ++ * We compile time check that: ++ * That _tmp_type contains a 'parent' member that's a pointer to the ++ * '_state' type ++ * That the pointer is right at the start of _tmp_type. ++ */ ++#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \ ++ { \ ++ .name = "tmp", \ ++ .size = sizeof(_tmp_type) + \ ++ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ ++ type_check_pointer(_state, typeof_field(_tmp_type, parent)), \ ++ .vmsd = &(_vmsd), .info = &slirp_vmstate_info_tmp, \ ++ } ++ ++#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ ++ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) ++ ++#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ ++ VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, _vmsd, \ ++ _type) ++ ++#define VMSTATE_INT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) ++#define VMSTATE_INT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) ++ ++#define VMSTATE_UINT8_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) ++#define VMSTATE_UINT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) ++#define VMSTATE_UINT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16(_f, _s) VMSTATE_INT16_V(_f, _s, 0) ++#define VMSTATE_INT32(_f, _s) VMSTATE_INT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT8(_f, _s) VMSTATE_UINT8_V(_f, _s, 0) ++#define VMSTATE_UINT16(_f, _s) VMSTATE_UINT16_V(_f, _s, 0) ++#define VMSTATE_UINT32(_f, _s) VMSTATE_UINT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT16_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) ++ ++#define VMSTATE_UINT32_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ ++ VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) ++ ++#define VMSTATE_INT16_ARRAY(_f, _s, _n) VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) ++ ++#define VMSTATE_BUFFER_V(_f, _s, _v) \ ++ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) ++ ++#define VMSTATE_BUFFER(_f, _s) VMSTATE_BUFFER_V(_f, _s, 0) ++ ++#define VMSTATE_END_OF_LIST() \ ++ { \ ++ } ++ ++#endif +-- +1.8.3.1 + diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index cde66a1..174bcc9 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 4df157781801c50224373be57fa3c8c3741c0535 Mon Sep 17 00:00:00 2001 +From 4b7f57db366243eeafc0528d3cff4fa6967e7522 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -10,10 +10,8 @@ several issues are fixed in QEMU tree: - As we use qemu-kvm as name in all places, this is updated to be consistent - Man page renamed from qemu to qemu-kvm - man page is installed using make install so we have to fix it in qemu tree - - Use "/share/qemu-kvm" as SHARE_SUFFIX - - We reconfigured our share to qemu-kvm to be consistent with used name -This rebase includes changes up to qemu-kvm-4.1.0-18.el8 +This rebase includes changes up to qemu-kvm-4.2.0-20.el8 Rebase notes (3.1.0): - added new configure options @@ -49,6 +47,44 @@ Rebase notes (4.2.0): - Removed spapr-rtas.bin (upstream) - Require newer SLOF (20191022) +Rebase notes (weekly-200115): +- Added index.html (upstream) + +Rebase notes (weekly-200122): +- Use python3 for virtio_seg_max_adjust.py test +- Removed qemu-trace-stap shebang from spec file + +Rebase notes (weekly-200129): +- Ship docs/qemu-kvm/system help files (added upstream) + +Rebase notes (weekly-200212): +- Added virtiofsd.1 (upstream) + +Rebase notes (weekly-200219): +- Use out-of-tree build + +Rebase notes (weekly-200226): +- added tools documentation (upstream) + +Rebase notes (weekly-200304): +- Update local build + +Rebase notes (weekly-200311): +- Add docs/qemu-kvm/user help files (added upstream) +- Removing installed qemu-storage-daemon (added upstream) + +Rebase notes (weekly-200318): +- Removing opensbi-riscv32-sifive_u-fw_jump.bin (added upstream) + +Rebase notes (weekly-200325): +- Disable iotests (moved from Enable make check commit) + +Rebase notes (5.0.0 rc2): +- Added missing configure options + +Rebase notes (5.0.0 rc3): +- Reorder configure options + Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file - Spec file cleanups @@ -71,30 +107,35 @@ Merged patches (4.2.0): - 69e1fb2 enable virgla - d4f6115 enable virgl, for real this time ... -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200205): +- 5edf6bd Add support for rh-brew-module +- f77d52d redhat: ship virtiofsd vhost-user device backend + +Conflicts: + gdbstub.c --- - .gitignore | 1 + - Makefile | 3 +- - configure | 1 + - os-posix.c | 2 +- - redhat/Makefile | 82 + - redhat/Makefile.common | 51 + - redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2434 +++++++++++++++++++++++++++++ - redhat/scripts/process-patches.sh | 7 +- - tests/Makefile.include | 2 +- - ui/vnc.c | 2 +- - 11 files changed, 2615 insertions(+), 9 deletions(-) + .gitignore | 1 + + Makefile | 3 +- + configure | 1 + + redhat/Makefile | 88 ++ + redhat/Makefile.common | 51 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 2820 +++++++++++++++++++++++++++++++++++ + redhat/scripts/extract_build_cmd.py | 2 +- + redhat/scripts/process-patches.sh | 7 +- + tests/check-block.sh | 2 + + ui/vnc.c | 2 +- + 11 files changed, 3008 insertions(+), 8 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index b437a346d7..086727dbb9 100644 +index 34275f5..aee2e8e 100644 --- a/Makefile +++ b/Makefile -@@ -512,6 +512,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM +@@ -548,6 +548,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC CAP_CFLAGS += -DCAPSTONE_HAS_X86 @@ -102,20 +143,20 @@ index b437a346d7..086727dbb9 100644 .PHONY: capstone/all capstone/all: .git-submodule-status -@@ -826,7 +827,7 @@ install-doc: $(DOCS) install-sphinxdocs +@@ -883,7 +884,7 @@ install-doc: $(DOCS) install-sphinxdocs $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" -- $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1" -+ $(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1/qemu-kvm.1" +- $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu.1 "$(DESTDIR)$(mandir)/man1" ++ $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu.1 "$(DESTDIR)$(mandir)/man1/qemu-kvm.1" $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" - $(INSTALL_DATA) docs/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" + $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index 6099be1d84..16564f8ccc 100755 +index 23b5e93..1b40d7e 100755 --- a/configure +++ b/configure -@@ -2424,6 +2424,7 @@ if test "$seccomp" != "no" ; then +@@ -2538,6 +2538,7 @@ if test "$seccomp" != "no" ; then seccomp="no" fi fi @@ -123,37 +164,24 @@ index 6099be1d84..16564f8ccc 100755 ########################################## # xen probe -diff --git a/os-posix.c b/os-posix.c -index 86cffd2c7d..1c9f86768d 100644 ---- a/os-posix.c -+++ b/os-posix.c -@@ -83,7 +83,7 @@ void os_setup_signal_handling(void) - /* Find a likely location for support files using the location of the binary. - For installed binaries this will be "$bindir/../share/qemu". When - running from the build tree this will be "$bindir/../pc-bios". */ --#define SHARE_SUFFIX "/share/qemu" -+#define SHARE_SUFFIX "/share/qemu-kvm" - #define BUILD_SUFFIX "/pc-bios" - char *os_find_datadir(void) - { -diff --git a/tests/Makefile.include b/tests/Makefile.include -index 8566f5f119..b483790cf3 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -1194,7 +1194,7 @@ check-acceptance: check-venv $(TESTS_RESULTS_DIR) - check-qapi-schema: check-tests/qapi-schema/frontend check-tests/qapi-schema/doc-good.texi - check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS)) - check-block: $(patsubst %,check-%, $(check-block-y)) --check: check-block check-qapi-schema check-unit check-softfloat check-qtest check-decodetree -+check: check-qapi-schema check-unit check-softfloat check-qtest check-decodetree - check-clean: - rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) - rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) +diff --git a/tests/check-block.sh b/tests/check-block.sh +index ad320c2..1f26083 100755 +--- a/tests/check-block.sh ++++ b/tests/check-block.sh +@@ -43,6 +43,8 @@ if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then + fi + fi + ++exit 0 ++ + cd tests/qemu-iotests + + ret=0 diff --git a/ui/vnc.c b/ui/vnc.c -index 87b8045afe..ecf6276f5b 100644 +index 1d7138a..1fc55b7 100644 --- a/ui/vnc.c +++ b/ui/vnc.c -@@ -3987,7 +3987,7 @@ void vnc_display_open(const char *id, Error **errp) +@@ -3972,7 +3972,7 @@ void vnc_display_open(const char *id, Error **errp) #ifdef CONFIG_VNC_SASL if (sasl) { @@ -163,5 +191,5 @@ index 87b8045afe..ecf6276f5b 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.21.0 +1.8.3.1 diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index b14bb1b..ff90dbb 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 67511676246cce57becbd2dcf5abccf08d9ef737 Mon Sep 17 00:00:00 2001 +From db33b6f22ff44edfb3ca54ef7bf051ccfc53e479 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -39,6 +39,12 @@ Rebase notes (4.2.0-rc3): - Disabled ccid-card-emulated (patch 92566) - Disabled vfio-pci-igd-lpc-bridge (patch 92565) +Rebase notes (weekly-200205): +- added CONFIG_PCI_EXPRESS on ppc64 (due to upstream dependency) + +Rebase notes (weekly-200226): +- Added CONFIG_NVDIMM + Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV - 4b889f3 Declare cirrus-vga as deprecated @@ -57,82 +63,72 @@ Merged patches (4.1.0): Merged patches (4.2.0): - f7587dd RHEL: disable hostmem-memfd -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200108): +- 4543a3c i386: Remove cpu64-rhel6 CPU model + +Conflicts: + target/arm/cpu.c --- - Makefile.objs | 4 +- backends/Makefile.objs | 3 +- - default-configs/aarch64-rh-devices.mak | 20 +++++ - default-configs/aarch64-softmmu.mak | 10 ++- - default-configs/ppc64-rh-devices.mak | 32 ++++++++ - default-configs/ppc64-softmmu.mak | 8 +- - default-configs/rh-virtio.mak | 10 +++ - default-configs/s390x-rh-devices.mak | 15 ++++ + default-configs/aarch64-rh-devices.mak | 21 +++++++ + default-configs/aarch64-softmmu.mak | 10 ++-- + default-configs/ppc64-rh-devices.mak | 34 +++++++++++ + default-configs/ppc64-softmmu.mak | 10 ++-- + default-configs/rh-virtio.mak | 10 ++++ + default-configs/s390x-rh-devices.mak | 15 +++++ default-configs/s390x-softmmu.mak | 4 +- - default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ + default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++++++++++ default-configs/x86_64-softmmu.mak | 4 +- hw/acpi/ich9.c | 4 +- hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 10 +++ - hw/bt/Makefile.objs | 4 +- + hw/block/fdc.c | 10 ++++ + hw/bt/Makefile.objs | 3 + hw/cpu/Makefile.objs | 5 +- hw/display/Makefile.objs | 5 +- hw/display/cirrus_vga.c | 3 + hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + - hw/pci-host/i440fx.c | 4 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/Makefile.objs | 4 +- - hw/vfio/pci-quirks.c | 9 +++ hw/vfio/pci.c | 5 ++ - qemu-options.hx | 7 +- + qemu-options.hx | 4 -- redhat/qemu-kvm.spec.template | 5 +- - target/arm/cpu.c | 4 +- - target/i386/cpu.c | 35 +++++++-- - target/ppc/cpu-models.c | 10 +++ + softmmu/vl.c | 2 +- + target/arm/cpu.c | 3 + + target/i386/cpu.c | 17 +++--- + target/ppc/cpu-models.c | 10 ++++ target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 ++ + target/s390x/kvm.c | 8 +++ util/memfd.c | 2 +- - vl.c | 8 +- - 35 files changed, 317 insertions(+), 41 deletions(-) + 32 files changed, 281 insertions(+), 36 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak create mode 100644 default-configs/rh-virtio.mak create mode 100644 default-configs/s390x-rh-devices.mak create mode 100644 default-configs/x86_64-rh-devices.mak + create mode 100644 hw/bt/Makefile.objs -diff --git a/Makefile.objs b/Makefile.objs -index 11ba1a36bd..fcf63e1096 100644 ---- a/Makefile.objs -+++ b/Makefile.objs -@@ -65,8 +65,8 @@ common-obj-y += replay/ - - common-obj-y += ui/ - common-obj-m += ui/ --common-obj-y += bt-host.o bt-vhci.o --bt-host.o-cflags := $(BLUEZ_CFLAGS) -+#common-obj-y += bt-host.o bt-vhci.o -+#bt-host.o-cflags := $(BLUEZ_CFLAGS) - - common-obj-y += dma-helpers.o - common-obj-y += vl.o diff --git a/backends/Makefile.objs b/backends/Makefile.objs -index f0691116e8..f328d404bf 100644 +index 28a847c..0eda216 100644 --- a/backends/Makefile.objs +++ b/backends/Makefile.objs -@@ -16,4 +16,5 @@ endif +@@ -16,7 +16,8 @@ endif common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o -common-obj-$(CONFIG_LINUX) += hostmem-memfd.o +# RHEL: disable memfd +# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o + + common-obj-$(CONFIG_GIO) += dbus-vmstate.o + dbus-vmstate.o-cflags = $(GIO_CFLAGS) diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..a1ed641174 +index 0000000..0d756a2 --- /dev/null +++ b/default-configs/aarch64-rh-devices.mak -@@ -0,0 +1,20 @@ +@@ -0,0 +1,21 @@ +include rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -153,8 +149,9 @@ index 0000000000..a1ed641174 +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_XIO3130=y ++CONFIG_NVDIMM=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 958b1e08e4..8f6867d48a 100644 +index 958b1e0..8f6867d 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak @@ -1,8 +1,10 @@ @@ -174,17 +171,19 @@ index 958b1e08e4..8f6867d48a 100644 +include aarch64-rh-devices.mak diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..35f2106d06 +index 0000000..ecbe53f --- /dev/null +++ b/default-configs/ppc64-rh-devices.mak -@@ -0,0 +1,32 @@ +@@ -0,0 +1,34 @@ +include rh-virtio.mak + +CONFIG_DIMM=y +CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y +CONFIG_PCI=y +CONFIG_PCI_DEVICES=y +CONFIG_PCI_TESTDEV=y ++CONFIG_PCI_EXPRESS=y +CONFIG_PSERIES=y +CONFIG_SCSI=y +CONFIG_SPAPR_VSCSI=y @@ -211,10 +210,10 @@ index 0000000000..35f2106d06 +CONFIG_XIVE_SPAPR=y +CONFIG_XIVE_KVM=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index cca52665d9..fec354f327 100644 +index ae0841f..040e557 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak -@@ -1,10 +1,12 @@ +@@ -1,11 +1,13 @@ # Default configuration for ppc64-softmmu # Include all 32-bit boards @@ -227,12 +226,14 @@ index cca52665d9..fec354f327 100644 # For pSeries -CONFIG_PSERIES=y +-CONFIG_NVDIMM=y +#CONFIG_PSERIES=y ++#CONFIG_NVDIMM=y + +include ppc64-rh-devices.mak diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak new file mode 100644 -index 0000000000..94ede1b5f6 +index 0000000..94ede1b --- /dev/null +++ b/default-configs/rh-virtio.mak @@ -0,0 +1,10 @@ @@ -248,7 +249,7 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak new file mode 100644 -index 0000000000..c3c73fe752 +index 0000000..c3c73fe --- /dev/null +++ b/default-configs/s390x-rh-devices.mak @@ -0,0 +1,15 @@ @@ -268,7 +269,7 @@ index 0000000000..c3c73fe752 +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index f2287a133f..3e2e388e91 100644 +index f2287a1..3e2e388 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -10,4 +10,6 @@ @@ -281,7 +282,7 @@ index f2287a133f..3e2e388e91 100644 +include s390x-rh-devices.mak diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..d59b6d9bb5 +index 0000000..d59b6d9 --- /dev/null +++ b/default-configs/x86_64-rh-devices.mak @@ -0,0 +1,100 @@ @@ -386,7 +387,7 @@ index 0000000000..d59b6d9bb5 +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 64b2ee2960..b5de7e5279 100644 +index 64b2ee2..b5de7e5 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -1,3 +1,5 @@ @@ -397,10 +398,10 @@ index 64b2ee2960..b5de7e5279 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 2034dd749e..ab203ad448 100644 +index 336cace..ae86900 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -449,8 +449,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; pm->acpi_memory_hotplug.is_enabled = true; pm->cpu_hotplug_legacy = true; @@ -412,10 +413,10 @@ index 2034dd749e..ab203ad448 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index fe749f65fd..2aa1a9efdd 100644 +index 534a6a1..bd62442 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs -@@ -27,7 +27,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o +@@ -28,7 +28,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o obj-$(CONFIG_ZYNQ) += xilinx_zynq.o obj-$(CONFIG_SABRELITE) += sabrelite.o @@ -425,7 +426,7 @@ index fe749f65fd..2aa1a9efdd 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index ac5d31e8c1..e925bac002 100644 +index 9628cc1..37989fe 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -46,6 +46,8 @@ @@ -437,7 +438,7 @@ index ac5d31e8c1..e925bac002 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2638,6 +2640,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, +@@ -2613,6 +2615,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, int i, j; static int command_tables_inited = 0; @@ -451,19 +452,18 @@ index ac5d31e8c1..e925bac002 100644 + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); - } + return; diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs -index 867a7d2e8a..e678e9ee3c 100644 ---- a/hw/bt/Makefile.objs +new file mode 100644 +index 0000000..e678e9e +--- /dev/null +++ b/hw/bt/Makefile.objs -@@ -1,3 +1,3 @@ --common-obj-y += core.o l2cap.o sdp.o hci.o hid.o --common-obj-y += hci-csr.o +@@ -0,0 +1,3 @@ +#common-obj-y += core.o l2cap.o sdp.o hci.o hid.o +#common-obj-y += hci-csr.o - ++ diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs -index 8db9e8a7b3..1601ea93c7 100644 +index 8db9e8a..1601ea9 100644 --- a/hw/cpu/Makefile.objs +++ b/hw/cpu/Makefile.objs @@ -1,5 +1,6 @@ @@ -476,7 +476,7 @@ index 8db9e8a7b3..1601ea93c7 100644 +common-obj-y += core.o +# cluster.o diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index f2182e3bef..3d0cda1b52 100644 +index 77a7d62..68c793e 100644 --- a/hw/display/Makefile.objs +++ b/hw/display/Makefile.objs @@ -1,8 +1,9 @@ @@ -492,10 +492,10 @@ index f2182e3bef..3d0cda1b52 100644 common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index cd283e53b4..93afa26fda 100644 +index 1f29731..cac9e40 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2973,6 +2973,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -506,10 +506,10 @@ index cd283e53b4..93afa26fda 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index db313dd3b1..e14858ca64 100644 +index 3b2de4c..980c35e 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -251,7 +251,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -221,7 +221,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -519,7 +519,7 @@ index db313dd3b1..e14858ca64 100644 } static const TypeInfo piix3_ide_info = { -@@ -279,6 +280,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -250,6 +251,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -529,10 +529,10 @@ index db313dd3b1..e14858ca64 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index f0acfd86f7..390eb6579c 100644 +index 60a4130..b2f1f91 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -571,6 +571,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -568,6 +568,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->realize = i8042_realizefn; dc->vmsd = &vmstate_kbd_isa; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -542,10 +542,10 @@ index f0acfd86f7..390eb6579c 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a73f8d404e..fc73fdd6fa 100644 +index 2a69eee..af3ec17 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1795,6 +1795,7 @@ static const E1000Info e1000_devices[] = { +@@ -1797,6 +1797,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -553,7 +553,7 @@ index a73f8d404e..fc73fdd6fa 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1807,6 +1808,7 @@ static const E1000Info e1000_devices[] = { +@@ -1809,6 +1810,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -561,41 +561,11 @@ index a73f8d404e..fc73fdd6fa 100644 }; static void e1000_register_types(void) -diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c -index f27131102d..17f10efae2 100644 ---- a/hw/pci-host/i440fx.c -+++ b/hw/pci-host/i440fx.c -@@ -386,6 +386,7 @@ static const TypeInfo i440fx_info = { - }, - }; - -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - /* IGD Passthrough Host Bridge. */ - typedef struct { - uint8_t offset; -@@ -469,6 +470,7 @@ static const TypeInfo igd_passthrough_i440fx_info = { - .instance_size = sizeof(PCII440FXState), - .class_init = igd_passthrough_i440fx_class_init, - }; -+#endif - - static const char *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, - PCIBus *rootbus) -@@ -514,7 +516,9 @@ static const TypeInfo i440fx_pcihost_info = { - static void i440fx_register_types(void) - { - type_register_static(&i440fx_info); -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - type_register_static(&igd_passthrough_i440fx_info); -+#endif - type_register_static(&i440fx_pcihost_info); - } - diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 8339c4c0f8..301cd7b4e4 100644 +index ac1c109..542c19e 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -403,10 +403,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -399,10 +399,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -609,10 +579,10 @@ index 8339c4c0f8..301cd7b4e4 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs -index 303ac084a0..700a91886e 100644 +index 66835e5..1b03645 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs -@@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_BLUETOOTH) += dev-bluetooth.o +@@ -29,7 +29,9 @@ common-obj-$(CONFIG_USB_NETWORK) += dev-network.o ifeq ($(CONFIG_USB_SMARTCARD),y) common-obj-y += dev-smartcard-reader.o common-obj-$(CONFIG_SMARTCARD) += smartcard.mo @@ -623,76 +593,11 @@ index 303ac084a0..700a91886e 100644 smartcard.mo-cflags := $(SMARTCARD_CFLAGS) smartcard.mo-libs := $(SMARTCARD_LIBS) endif -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 136f3a9ad6..4505ffe48a 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -1166,6 +1166,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) - trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* - * Intel IGD support - * -@@ -1239,6 +1240,7 @@ static int igd_gen(VFIOPCIDevice *vdev) - - return 8; /* Assume newer is compatible */ - } -+#endif - - typedef struct VFIOIGDQuirk { - struct VFIOPCIDevice *vdev; -@@ -1311,6 +1313,7 @@ typedef struct { - uint8_t len; - } IGDHostInfo; - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static const IGDHostInfo igd_host_bridge_infos[] = { - {PCI_REVISION_ID, 2}, - {PCI_SUBSYSTEM_VENDOR_ID, 2}, -@@ -1559,9 +1562,11 @@ static const MemoryRegionOps vfio_igd_index_quirk = { - .write = vfio_igd_quirk_index_write, - .endianness = DEVICE_LITTLE_ENDIAN, - }; -+#endif - - static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) - { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - struct vfio_region_info *rom = NULL, *opregion = NULL, - *host = NULL, *lpc = NULL; - VFIOQuirk *quirk; -@@ -1572,6 +1577,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) - uint32_t gmch; - uint16_t cmd_orig, cmd; - Error *err = NULL; -+#endif - - /* - * This must be an Intel VGA device at address 00:02.0 for us to even -@@ -1585,6 +1591,8 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) - return; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ -+ - /* - * We need to create an LPC/ISA bridge at PCI bus address 00:1f.0 that we - * can stuff host values into, so if there's already one there and it's not -@@ -1809,6 +1817,7 @@ out: - g_free(opregion); - g_free(host); - g_free(lpc); -+#endif - } - - /* diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 2d40b396f2..c8534d3035 100644 +index 5e75a95..e265d77 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -3220,6 +3220,7 @@ static const TypeInfo vfio_pci_dev_info = { +@@ -3222,6 +3222,7 @@ static const TypeInfo vfio_pci_dev_info = { }, }; @@ -700,7 +605,7 @@ index 2d40b396f2..c8534d3035 100644 static Property vfio_pci_dev_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), DEFINE_PROP_END_OF_LIST(), -@@ -3239,11 +3240,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { +@@ -3241,11 +3242,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_nohotplug_dev_class_init, }; @@ -717,57 +622,59 @@ index 2d40b396f2..c8534d3035 100644 type_init(register_vfio_pci_dev_type) diff --git a/qemu-options.hx b/qemu-options.hx -index 65c9473b73..fc17aca631 100644 +index 292d4e7..1df25ae 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2111,11 +2111,6 @@ ETEXI +@@ -2239,10 +2239,6 @@ ERST DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) --STEXI --@item -no-hpet --@findex -no-hpet --Disable HPET support. --ETEXI +-SRST +-``-no-hpet`` +- Disable HPET support. +-ERST DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" -@@ -3125,6 +3120,7 @@ STEXI - ETEXI - DEFHEADING() - -+#if 0 - DEFHEADING(Bluetooth(R) options:) - STEXI - @table @option -@@ -3203,6 +3199,7 @@ STEXI - @end table - ETEXI - DEFHEADING() -+#endif - - #ifdef CONFIG_TPM - DEFHEADING(TPM device options:) +diff --git a/softmmu/vl.c b/softmmu/vl.c +index afd2615..00f7604 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -144,7 +144,7 @@ static Chardev **serial_hds; + Chardev *parallel_hds[MAX_PARALLEL_PORTS]; + int win2k_install_hack = 0; + int singlestep = 0; +-int no_hpet = 0; ++int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ + int fd_bootchk = 1; + static int no_reboot; + int no_shutdown = 0; diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 7a4ac9339b..3788fc3c4a 100644 +index 5d64adf..f1d18b8 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2744,7 +2744,9 @@ static void arm_cpu_register_types(void) - type_register_static(&idau_interface_type_info); +@@ -2904,6 +2904,9 @@ static void arm_cpu_register_types(void) while (info->name) { -- cpu_register(info); + arm_cpu_register(info); + /* RHEL specific: Filter out unsupported cpu models */ -+ if (!strcmp(info->name, "cortex-a15")) -+ cpu_register(info); ++ if (!strcmp(info->name, "cortex-a15-arm-cpu")) ++ arm_cpu_register(info); info++; } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 69f518a21a..1b7880ae3a 100644 +index 9c256ab..26a8584 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1835,14 +1835,14 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1801,20 +1801,21 @@ static CPUCaches epyc_rome_cache_info = { + + static X86CPUDefinition builtin_x86_defs[] = { + { ++ /* qemu64 is the default CPU model for all machine-types */ + .name = "qemu64", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, .family = 6, .model = 6, .stepping = 3, @@ -790,34 +697,8 @@ index 69f518a21a..1b7880ae3a 100644 .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, -@@ -2128,6 +2128,25 @@ static X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x80000008, - .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", - }, -+ { -+ .name = "cpu64-rhel6", -+ .level = 4, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 6, -+ .model = 13, -+ .stepping = 3, -+ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | -+ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -+ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | -+ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | -+ CPUID_PSE | CPUID_DE | CPUID_FP87, -+ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, -+ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, -+ .xlevel = 0x8000000A, -+ .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", -+ }, - { - .name = "Conroe", - .level = 10, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 086548e9b9..1bbf378c18 100644 +index 4ad1686..16b2185 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -847,7 +728,7 @@ index 086548e9b9..1bbf378c18 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -780,6 +784,7 @@ +@@ -782,6 +786,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -855,7 +736,7 @@ index 086548e9b9..1bbf378c18 100644 { "403", "403gc" }, { "405", "405d4" }, { "405cr", "405crc" }, -@@ -938,12 +943,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -940,12 +945,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -871,15 +752,15 @@ index 086548e9b9..1bbf378c18 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -952,6 +960,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { - { "power9", "power9_v2.0" }, +@@ -955,6 +963,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power10", "power10_v1.0" }, #endif +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* Generic PowerPCs */ #if defined(TARGET_PPC64) { "ppc64", "970fx_v3.1" }, -@@ -959,5 +968,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -962,5 +971,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "ppc32", "604" }, { "ppc", "604" }, { "default", "604" }, @@ -887,7 +768,7 @@ index 086548e9b9..1bbf378c18 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 7e92fb2e15..be718220d7 100644 +index 7c32180..88bf4a9 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -901,10 +782,10 @@ index 7e92fb2e15..be718220d7 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 0c9d14b4b1..a02d569537 100644 +index 69881a0..9802878 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2387,6 +2387,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2494,6 +2494,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -920,7 +801,7 @@ index 0c9d14b4b1..a02d569537 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ diff --git a/util/memfd.c b/util/memfd.c -index 4a3c07e0be..3303ec9da4 100644 +index 4a3c07e..3303ec9 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) @@ -932,63 +813,6 @@ index 4a3c07e0be..3303ec9da4 100644 int mfd = memfd_create("test", flags | MFD_CLOEXEC); if (mfd >= 0) { -diff --git a/vl.c b/vl.c -index 6a65a64bfd..668a34577e 100644 ---- a/vl.c -+++ b/vl.c -@@ -166,7 +166,7 @@ Chardev *parallel_hds[MAX_PARALLEL_PORTS]; - int win2k_install_hack = 0; - int singlestep = 0; - int acpi_enabled = 1; --int no_hpet = 0; -+int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ - int fd_bootchk = 1; - static int no_reboot; - int no_shutdown = 0; -@@ -914,6 +914,7 @@ static void configure_rtc(QemuOpts *opts) - } - } - -+#if 0 // Disabled for Red Hat Enterprise Linux - /***********************************************************/ - /* Bluetooth support */ - static int nb_hcis; -@@ -1035,6 +1036,7 @@ static int bt_parse(const char *opt) - error_report("bad bluetooth parameter '%s'", opt); - return 1; - } -+#endif - - static int parse_name(void *opaque, QemuOpts *opts, Error **errp) - { -@@ -3128,6 +3130,7 @@ int main(int argc, char **argv, char **envp) - } - break; - #endif -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - case QEMU_OPTION_bt: - warn_report("The bluetooth subsystem is deprecated and will " - "be removed soon. If the bluetooth subsystem is " -@@ -3135,6 +3138,7 @@ int main(int argc, char **argv, char **envp) - "qemu-devel@nongnu.org with your usecase."); - add_device_config(DEV_BT, optarg); - break; -+#endif - case QEMU_OPTION_audio_help: - audio_legacy_help(); - exit (0); -@@ -4282,9 +4286,11 @@ int main(int argc, char **argv, char **envp) - - tpm_init(); - -+#if 0 // Disabled for Red Hat Enterprise Linux - /* init the bluetooth world */ - if (foreach_device_config(DEV_BT, bt_parse)) - exit(1); -+#endif - - if (!xen_enabled()) { - /* On 32-bit hosts, QEMU is limited by virtual address space */ -- -2.21.0 +1.8.3.1 diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index 4ae3966..a302d2c 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 113078b23a4747b07eb363719d7cbc0af403dd2a Mon Sep 17 00:00:00 2001 +From 799c934a1ec957ae2e163f367f5f7550949178da Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -33,33 +33,42 @@ Merged patches (4.2.0): - ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional - compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200318): +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) + +Merged patches (weekly-200506): +- 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw --- - hw/acpi/ich9.c | 16 ++++ - hw/acpi/piix4.c | 5 +- - hw/char/serial.c | 16 ++++ - hw/core/machine.c | 170 ++++++++++++++++++++++++++++++++++++++++ - hw/display/vga-isa.c | 2 +- - hw/net/e1000e.c | 21 +++++ - hw/net/rtl8139.c | 4 +- - hw/rtc/mc146818rtc.c | 6 ++ - hw/smbios/smbios.c | 1 + - hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 +++++ - hw/usb/hcd-xhci.h | 2 + - include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 24 ++++++ - include/hw/usb.h | 4 + - migration/migration.c | 2 + - migration/migration.h | 5 ++ - 18 files changed, 301 insertions(+), 6 deletions(-) + hw/acpi/ich9.c | 16 ++++ + hw/acpi/piix4.c | 5 +- + hw/arm/virt.c | 2 +- + hw/char/serial.c | 16 ++++ + hw/core/machine.c | 170 +++++++++++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/i386/pc_piix.c | 2 + + hw/i386/pc_q35.c | 2 + + hw/net/e1000e.c | 21 ++++++ + hw/net/rtl8139.c | 4 +- + hw/rtc/mc146818rtc.c | 6 ++ + hw/smbios/smbios.c | 46 +++++++++++- + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci.c | 20 +++++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/boards.h | 24 ++++++ + include/hw/firmware/smbios.h | 5 +- + include/hw/i386/pc.h | 3 + + include/hw/usb.h | 4 + + migration/migration.c | 2 + + migration/migration.h | 5 ++ + 23 files changed, 355 insertions(+), 11 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index ab203ad448..7ec26884e8 100644 +index ae86900..9a8a627 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -444,6 +444,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) +@@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) s->pm.enable_tco = value; } @@ -78,7 +87,7 @@ index ab203ad448..7ec26884e8 100644 void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; -@@ -468,6 +480,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -393,6 +405,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) ich9_pm_get_cpu_hotplug_legacy, ich9_pm_set_cpu_hotplug_legacy, NULL); @@ -86,14 +95,14 @@ index ab203ad448..7ec26884e8 100644 + ich9_pm_get_force_rev1_fadt, + ich9_pm_set_force_rev1_fadt, + NULL); - object_property_add(obj, ACPI_PM_PROP_S3_DISABLED, "uint8", - ich9_pm_get_disable_s3, - ich9_pm_set_disable_s3, + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, + &pm->disable_s3, OBJ_PROP_FLAG_READWRITE, + NULL); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 93aec2dd2c..3a26193cbe 100644 +index 964d6f5..b8458ba 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -274,6 +274,7 @@ static const VMStateDescription vmstate_acpi = { +@@ -275,6 +275,7 @@ static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, .minimum_version_id = 3, @@ -101,7 +110,7 @@ index 93aec2dd2c..3a26193cbe 100644 .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -627,8 +628,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -628,8 +629,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -112,19 +121,32 @@ index 93aec2dd2c..3a26193cbe 100644 DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 171e690..25e6839 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1436,7 +1436,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_30); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); + + smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len); diff --git a/hw/char/serial.c b/hw/char/serial.c -index b4aa250950..0012f0e44d 100644 +index c822a9a..0c7f1ff 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c -@@ -34,6 +34,7 @@ - #include "sysemu/runstate.h" +@@ -35,6 +35,7 @@ #include "qemu/error-report.h" #include "trace.h" + #include "hw/qdev-properties.h" +#include "migration/migration.h" //#define DEBUG_SERIAL -@@ -703,6 +704,9 @@ static int serial_post_load(void *opaque, int version_id) +@@ -704,6 +705,9 @@ static int serial_post_load(void *opaque, int version_id) static bool serial_thr_ipending_needed(void *opaque) { SerialState *s = opaque; @@ -134,7 +156,7 @@ index b4aa250950..0012f0e44d 100644 if (s->ier & UART_IER_THRI) { bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); -@@ -784,6 +788,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { +@@ -785,6 +789,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { static bool serial_fifo_timeout_timer_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -145,7 +167,7 @@ index b4aa250950..0012f0e44d 100644 return timer_pending(s->fifo_timeout_timer); } -@@ -801,6 +809,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { +@@ -802,6 +810,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { static bool serial_timeout_ipending_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -156,7 +178,7 @@ index b4aa250950..0012f0e44d 100644 return s->timeout_ipending != 0; } -@@ -818,6 +830,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { +@@ -819,6 +831,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { static bool serial_poll_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -168,12 +190,12 @@ index b4aa250950..0012f0e44d 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 1689ad3bf8..e0e0eec8bf 100644 +index c1a444c..af407cc 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -27,6 +27,176 @@ - #include "hw/pci/pci.h" +@@ -28,6 +28,176 @@ #include "hw/mem/nvdimm.h" + #include "migration/vmstate.h" +/* + * The same as hw_compat_4_1 @@ -220,7 +242,7 @@ index 1689ad3bf8..e0e0eec8bf 100644 + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-vga", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "virtio-gpu-pci", "edid", "false" }, ++ { "virtio-gpu-device", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-device", "use-started", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ @@ -345,14 +367,14 @@ index 1689ad3bf8..e0e0eec8bf 100644 +}; +const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + - GlobalProperty hw_compat_4_1[] = { - { "virtio-pci", "x-pcie-flr-init", "off" }, - }; + GlobalProperty hw_compat_4_2[] = { + { "virtio-blk-device", "queue-size", "128"}, + { "virtio-scsi-device", "virtqueue_size", "128"}, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 873e5e9706..d1a2efe47e 100644 +index 0633ed3..b703e9e 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c -@@ -82,7 +82,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) +@@ -84,7 +84,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) } static Property vga_isa_properties[] = { @@ -361,8 +383,34 @@ index 873e5e9706..d1a2efe47e 100644 DEFINE_PROP_END_OF_LIST(), }; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index b75087d..b255d56 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index d2806c1..461e1cd 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index b69fd7d8ad..d8be50a1ce 100644 +index 79ba158..311dbe0 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -79,6 +79,11 @@ typedef struct E1000EState { @@ -436,10 +484,10 @@ index b69fd7d8ad..d8be50a1ce 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 88a97d756d..21d80e96cf 100644 +index 70aca7e..0950cee 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3177,7 +3177,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -448,7 +496,7 @@ index 88a97d756d..21d80e96cf 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3258,7 +3258,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -459,7 +507,7 @@ index 88a97d756d..21d80e96cf 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 74ae74bc5c..73820517df 100644 +index d18c099..8a3bd68 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -42,6 +42,7 @@ @@ -483,19 +531,89 @@ index 74ae74bc5c..73820517df 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 11d476c4a2..e6e9355384 100644 +index ffd9872..7818b90 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -777,6 +777,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -56,6 +56,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -531,7 +534,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -752,7 +755,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -773,11 +779,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type1.product, product); SMBIOS_SET_DEFAULT(type1.version, version); + SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type2.product, product); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b497..32935da46c 100644 +index 050875b..32935da 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { @@ -508,7 +626,7 @@ index 050875b497..32935da46c 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 23507ad3b5..9fd87a7ad9 100644 +index 37f7beb..2741edc 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) @@ -528,10 +646,10 @@ index 23507ad3b5..9fd87a7ad9 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 80988bb305..8fed2eedd6 100644 +index b330e36..b25cce8 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3590,9 +3590,27 @@ static const VMStateDescription vmstate_xhci_slot = { +@@ -3600,9 +3600,27 @@ static const VMStateDescription vmstate_xhci_slot = { } }; @@ -559,7 +677,7 @@ index 80988bb305..8fed2eedd6 100644 .fields = (VMStateField[]) { VMSTATE_UINT32(type, XHCIEvent), VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3601,6 +3619,8 @@ static const VMStateDescription vmstate_xhci_event = { +@@ -3611,6 +3629,8 @@ static const VMStateDescription vmstate_xhci_event = { VMSTATE_UINT32(flags, XHCIEvent), VMSTATE_UINT8(slotid, XHCIEvent), VMSTATE_UINT8(epid, XHCIEvent), @@ -569,7 +687,7 @@ index 80988bb305..8fed2eedd6 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 2fad4df2a7..f554b671e3 100644 +index 2fad4df..f554b67 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -157,6 +157,8 @@ typedef struct XHCIEvent { @@ -582,7 +700,7 @@ index 2fad4df2a7..f554b671e3 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 41568d1837..1a23ccc412 100644 +index 41568d1..1a23ccc 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -596,10 +714,10 @@ index 41568d1837..1a23ccc412 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index de45087f34..6f85a0e032 100644 +index fd4d62b..0046ab5 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -377,4 +377,28 @@ extern const size_t hw_compat_2_2_len; +@@ -369,4 +369,28 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -628,8 +746,38 @@ index de45087f34..6f85a0e032 100644 +extern const size_t hw_compat_rhel_7_1_len; + #endif +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 02a0ced..67e38a1 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 05e1945..811c3d5 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -100,6 +100,9 @@ typedef struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; diff --git a/include/hw/usb.h b/include/hw/usb.h -index c24d968a19..b353438ea0 100644 +index c24d968..b353438 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -605,4 +605,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, @@ -642,10 +790,10 @@ index c24d968a19..b353438ea0 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 354ad072fa..30c53c623b 100644 +index 177cce9..2864560 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -121,6 +121,8 @@ enum mig_rp_message_type { +@@ -128,6 +128,8 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; @@ -655,10 +803,10 @@ index 354ad072fa..30c53c623b 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 79b3dda146..0b1b0d4df5 100644 +index 507284e..0baa337 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -335,6 +335,11 @@ void init_dirty_bitmap_incoming_migration(void); +@@ -339,6 +339,11 @@ void init_dirty_bitmap_incoming_migration(void); void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); @@ -671,5 +819,5 @@ index 79b3dda146..0b1b0d4df5 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -2.21.0 +1.8.3.1 diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 5397c8b..320b19f 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 49164264d9928f73961acbbe4d56d8dfa23d8099 Mon Sep 17 00:00:00 2001 +From a373b0198f9268478a5211efb6a545d9c598b364 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -7,14 +7,14 @@ Adding changes to add RHEL machine types for aarch64 architecture. Signed-off-by: Miroslav Rezanina -Rebase changes (4.0.0): +Rebase notes (4.0.0): - Use upstream compat handling -Rebase changes (4.1.0-rc0): +Rebase notes (4.1.0-rc0): - Removed a15memmap (upstream) - Use virt_flash_create in rhel800_virt_instance_init -Rebase changes (4.2.0-rc0): +Rebase notes (4.2.0-rc0): - Set numa_mem_supported Rebase notes (4.2.0-rc3): @@ -23,6 +23,12 @@ Rebase notes (4.2.0-rc3): - aarch64: virt: Allow PCDIMM instantiation (patch 92247) - aarch64: virt: Enhance the comment related to gic-version (patch 92248) +Rebase notes (weekly-200226): +- Set default_ram_id in rhel_machine_class_init + +Rebase notes (5.0.0-rc1): +- Added setting acpi properties + Merged patches (4.0.0): - 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM - 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 @@ -31,26 +37,24 @@ Merged patches (4.0.0): Merged patches (4.1.0): - c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM - 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine - -Signed-off-by: Danilo C. L. de Paula --- - hw/arm/virt.c | 161 +++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 11 +++ - 2 files changed, 171 insertions(+), 1 deletion(-) + hw/arm/virt.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 11 ++++ + 2 files changed, 179 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d4bedc2607..e10839100e 100644 +index 25e6839..1387ff6 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -72,6 +72,7 @@ - #include "hw/mem/nvdimm.h" - #include "hw/acpi/generic_event_device.h" +@@ -79,6 +79,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" +#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -98,7 +99,49 @@ +@@ -105,7 +106,49 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -101,7 +105,7 @@ index d4bedc2607..e10839100e 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1763,6 +1806,7 @@ static void machvirt_init(MachineState *machine) +@@ -1914,6 +1957,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -109,7 +113,7 @@ index d4bedc2607..e10839100e 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1791,6 +1835,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -1942,6 +1986,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -117,7 +121,7 @@ index d4bedc2607..e10839100e 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2022,6 +2067,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2218,6 +2263,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return requested_pa_size > 40 ? requested_pa_size : 0; } @@ -125,7 +129,7 @@ index d4bedc2607..e10839100e 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2258,3 +2304,116 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2478,3 +2524,124 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -159,7 +163,15 @@ index d4bedc2607..e10839100e 100644 + hc->plug = virt_machine_device_plug_cb; + hc->unplug_request = virt_machine_device_unplug_request_cb; + mc->numa_mem_supported = true; ++ mc->nvdimm_supported = true; + mc->auto_enable_numa_with_memhp = true; ++ mc->default_ram_id = "mach-virt.ram"; ++ ++ object_class_property_add(oc, "acpi", "OnOffAuto", ++ virt_get_acpi, virt_set_acpi, ++ NULL, NULL, &error_abort); ++ object_class_property_set_description(oc, "acpi", ++ "Enable ACPI", &error_abort); +} + +static const TypeInfo rhel_machine_info = { @@ -243,10 +255,10 @@ index d4bedc2607..e10839100e 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 0b41083e9d..53fdf16563 100644 +index 6d67ace..e44e25c 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -142,6 +142,7 @@ typedef struct { +@@ -156,6 +156,7 @@ typedef struct { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -254,7 +266,7 @@ index 0b41083e9d..53fdf16563 100644 #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -150,6 +151,16 @@ typedef struct { +@@ -164,6 +165,16 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) @@ -269,8 +281,8 @@ index 0b41083e9d..53fdf16563 100644 +#endif + void virt_acpi_setup(VirtMachineState *vms); + bool virt_is_acpi_enabled(VirtMachineState *vms); - /* Return the number of used redistributor regions */ -- -2.21.0 +1.8.3.1 diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index a3f1a54..b14fcee 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 136eae41007e2e5b0d693cc656f3ec36cbabf16f Mon Sep 17 00:00:00 2001 +From c50a71e2a577b532a904e70d23f7533aca0b3a6f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -31,36 +31,37 @@ Merged patches (4.2.0): - redhat: update pseries-rhel-7.6.0 machine type (patch 93039) - redhat: define pseries-rhel8.2.0 machine type (patch 93041) -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200226): +- eb121ff spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine (partial) --- - hw/ppc/spapr.c | 278 ++++++++++++++++++++++++++++++++++++++++ - hw/ppc/spapr_cpu_core.c | 13 ++ + hw/ppc/spapr.c | 280 ++++++++++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 1 + - target/ppc/compat.c | 13 +- + target/ppc/compat.c | 13 ++- target/ppc/cpu.h | 1 + - 5 files changed, 305 insertions(+), 1 deletion(-) + 5 files changed, 307 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index e076f6023c..8749c72066 100644 +index 9a2bd50..20b3437 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->linux_pci_probe = true; +@@ -4549,6 +4549,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->smp_threads_vsmt = true; smc->nr_xirqs = SPAPR_NR_XIRQS; + xfc->match_nvt = spapr_match_nvt; + smc->has_power9_support = true; } static const TypeInfo spapr_machine_info = { -@@ -4491,6 +4492,7 @@ static const TypeInfo spapr_machine_info = { +@@ -4599,6 +4600,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-4.2 + * pseries-5.0 */ -@@ -4520,6 +4522,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4645,6 +4647,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -68,7 +69,7 @@ index e076f6023c..8749c72066 100644 /* * pseries-4.0 -@@ -4536,6 +4539,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4661,6 +4664,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -76,7 +77,7 @@ index e076f6023c..8749c72066 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4695,6 +4699,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4820,6 +4824,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ @@ -84,7 +85,7 @@ index e076f6023c..8749c72066 100644 static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4749,6 +4754,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, +@@ -4874,6 +4879,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -92,7 +93,7 @@ index e076f6023c..8749c72066 100644 static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4863,6 +4869,278 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4988,6 +4994,280 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -131,6 +132,8 @@ index e076f6023c..8749c72066 100644 + hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; +} + +DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); @@ -372,7 +375,7 @@ index e076f6023c..8749c72066 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 301cd7b4e4..ba5a8fb82b 100644 +index 542c19e..916ab0e 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -383,7 +386,7 @@ index 301cd7b4e4..ba5a8fb82b 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -242,6 +243,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -238,6 +239,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); Error *local_err = NULL; @@ -391,7 +394,7 @@ index 301cd7b4e4..ba5a8fb82b 100644 object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); if (local_err) { -@@ -254,6 +256,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -250,6 +252,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -410,22 +413,22 @@ index 301cd7b4e4..ba5a8fb82b 100644 goto error_intc_create; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index d5ab5ea7b2..aa89cc4a95 100644 +index 42d64a0..c03611f 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -125,6 +125,7 @@ struct SpaprMachineClass { - bool linux_pci_probe; +@@ -128,6 +128,7 @@ struct SpaprMachineClass { bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ + hwaddr rma_limit; /* clamp the RMA to this size */ + bool has_power9_support; void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7de4bf3122..3e2e35342d 100644 +index f48df25..34e3d0f 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c -@@ -105,8 +105,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) +@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) return NULL; } @@ -447,10 +450,10 @@ index 7de4bf3122..3e2e35342d 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index e3e82327b7..5c53801cfd 100644 +index 88d9449..0d7f5f5 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1367,6 +1367,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1323,6 +1323,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -459,5 +462,5 @@ index e3e82327b7..5c53801cfd 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -- -2.21.0 +1.8.3.1 diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index d0f6669..bed50e9 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 0842700b3a01891c316e9169fa651f26714cafa5 Mon Sep 17 00:00:00 2001 +From 349c332a69933b977b40f4a2198236611d002818 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -20,17 +20,15 @@ Merged patches (4.2.0): - fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 - a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine - hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) - -Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 70 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 69 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 71 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index d3edeef0ad..c2c83d2fce 100644 +index 45292fb..1b3a04c 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -615,7 +615,7 @@ bool css_migration_enabled(void) +@@ -777,7 +777,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -38,16 +36,16 @@ index d3edeef0ad..c2c83d2fce 100644 + mc->desc = "VirtIO-ccw based S390 machine " verstr; \ if (latest) { \ mc->alias = "s390-ccw-virtio"; \ - mc->is_default = 1; \ -@@ -639,6 +639,7 @@ bool css_migration_enabled(void) + mc->is_default = true; \ +@@ -801,6 +801,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_4_2_instance_options(MachineState *machine) + static void ccw_machine_5_0_instance_options(MachineState *machine) { } -@@ -866,6 +867,73 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1041,6 +1042,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); @@ -59,6 +57,7 @@ index d3edeef0ad..c2c83d2fce 100644 + +static void ccw_machine_rhel820_class_options(MachineClass *mc) +{ ++ mc->fixup_ram_size = s390_fixup_ram_size; +} +DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", true); + @@ -122,5 +121,5 @@ index d3edeef0ad..c2c83d2fce 100644 static void ccw_machine_register_types(void) { -- -2.21.0 +1.8.3.1 diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index 72a5159..fba69c6 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 2ebaeca6e26950f401a8169d1324be2bafd11741 Mon Sep 17 00:00:00 2001 +From 9da7d3c4b5a90c155ea4227c412b0ebd4d2a9b87 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -37,23 +37,27 @@ Merged patches (4.2.0): - 0784125 x86 machine types: add pc-q35-rhel8.1.0 - machines/x86: Add rhel 8.2 machine type (patch 92959) -Signed-off-by: Danilo C. L. de Paula +Merged patches (weekly-200122): +- 481357e RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support + +Merged patches (weekly-200318): +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 263 ++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 156 ++++++++++++++++++++++++- + hw/i386/pc.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 162 ++++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 33 ++++++ - target/i386/cpu.c | 9 +- + include/hw/i386/pc.h | 33 +++++++ + target/i386/cpu.c | 3 +- target/i386/kvm.c | 4 + - 8 files changed, 673 insertions(+), 7 deletions(-) + 8 files changed, 675 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 12ff55fcfb..64001893ab 100644 +index 2e15f68..8dbf49b 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -204,6 +204,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) +@@ -213,6 +213,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -64,10 +68,10 @@ index 12ff55fcfb..64001893ab 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index ac08e63604..61e70e4811 100644 +index f6b8431..2ed002f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -344,6 +344,261 @@ GlobalProperty pc_compat_1_4[] = { +@@ -341,6 +341,263 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -78,6 +82,8 @@ index ac08e63604..61e70e4811 100644 +GlobalProperty pc_rhel_compat[] = { + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, +}; @@ -326,10 +332,10 @@ index ac08e63604..61e70e4811 100644 +}; +const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); + - void gsi_handler(void *opaque, int n, int level) + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { - GSIState *s = opaque; -@@ -1225,7 +1480,8 @@ void pc_memory_init(PCMachineState *pcms, + GSIState *s; +@@ -1031,7 +1288,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -339,7 +345,7 @@ index ac08e63604..61e70e4811 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -2198,6 +2454,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1937,6 +2195,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); @@ -348,7 +354,7 @@ index ac08e63604..61e70e4811 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -2209,7 +2467,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1948,7 +2208,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -359,7 +365,7 @@ index ac08e63604..61e70e4811 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 1bd70d1abb..bd7fdb99bb 100644 +index b255d56..8eb64d1 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -53,6 +53,7 @@ @@ -370,7 +376,7 @@ index 1bd70d1abb..bd7fdb99bb 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, +@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -379,19 +385,19 @@ index 1bd70d1abb..bd7fdb99bb 100644 + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } -@@ -307,6 +308,7 @@ else { + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -312,6 +313,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void pc_compat_2_3_fn(MachineState *machine) { - PCMachineState *pcms = PC_MACHINE(machine); -@@ -1026,3 +1028,207 @@ static void xenfv_machine_options(MachineClass *m) - DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init, - xenfv_machine_options); + X86MachineState *x86ms = X86_MACHINE(machine); +@@ -975,3 +977,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, + xenfv_3_1_machine_options); #endif +#endif /* Disabled for Red Hat Enterprise Linux */ + @@ -598,10 +604,10 @@ index 1bd70d1abb..bd7fdb99bb 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 385e5cffb1..7531d8ed76 100644 +index 461e1cd..f5ae759 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) +@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -610,9 +616,9 @@ index 385e5cffb1..7531d8ed76 100644 + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, - SMBIOS_ENTRY_POINT_21); - } -@@ -330,6 +330,7 @@ static void pc_q35_init(MachineState *machine) + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -335,6 +335,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -620,7 +626,7 @@ index 385e5cffb1..7531d8ed76 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -533,3 +534,154 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -549,3 +550,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -655,8 +661,11 @@ index 385e5cffb1..7531d8ed76 100644 + +static void pc_q35_machine_rhel820_options(MachineClass *m) +{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; +} + +DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, @@ -669,9 +678,12 @@ index 385e5cffb1..7531d8ed76 100644 + +static void pc_q35_machine_rhel810_options(MachineClass *m) +{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel820_options(m); + m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; + m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; + compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); +} @@ -776,10 +788,10 @@ index 385e5cffb1..7531d8ed76 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 6f85a0e032..2920bdef5b 100644 +index 0046ab5..d81225b 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -222,6 +222,8 @@ struct MachineClass { +@@ -207,6 +207,8 @@ struct MachineClass { const char **valid_cpu_types; strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; @@ -789,10 +801,10 @@ index 6f85a0e032..2920bdef5b 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 1f86eba3f9..2e362c8faa 100644 +index 811c3d5..5b90f8b 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -124,6 +124,9 @@ typedef struct PCMachineClass { +@@ -118,6 +118,9 @@ typedef struct PCMachineClass { /* use PVH to load kernels that support this feature */ bool pvh_enabled; @@ -802,7 +814,7 @@ index 1f86eba3f9..2e362c8faa 100644 } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -300,6 +303,36 @@ extern const size_t pc_compat_1_5_len; +@@ -260,6 +263,36 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; @@ -840,20 +852,10 @@ index 1f86eba3f9..2e362c8faa 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 1b7880ae3a..790db778ab 100644 +index 26a8584..dc4d1c9 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1829,11 +1829,17 @@ static CPUCaches epyc_cache_info = { - - static X86CPUDefinition builtin_x86_defs[] = { - { -+ /* qemu64 is the default CPU model for all *-rhel7.* machine-types. -+ * The default on RHEL-6 was cpu64-rhel6. -+ * libvirt assumes that qemu64 is the default for _all_ machine-types, -+ * so we should try to keep qemu64 and cpu64-rhel6 as similar as -+ * possible. -+ */ - .name = "qemu64", +@@ -1806,7 +1806,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 6, @@ -862,7 +864,7 @@ index 1b7880ae3a..790db778ab 100644 .stepping = 3, .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -3932,6 +3938,7 @@ static PropValue kvm_default_props[] = { +@@ -4085,6 +4085,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -871,10 +873,10 @@ index 1b7880ae3a..790db778ab 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 1d10046a6c..86d9a1f364 100644 +index 4901c6d..e41cff2 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -3079,6 +3079,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3113,6 +3113,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -882,7 +884,7 @@ index 1d10046a6c..86d9a1f364 100644 kvm_msr_buf_reset(cpu); -@@ -3388,6 +3389,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3422,6 +3423,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -893,5 +895,5 @@ index 1d10046a6c..86d9a1f364 100644 case MSR_KVM_PV_EOI_EN: env->pv_eoi_en_msr = msrs[i].data; -- -2.21.0 +1.8.3.1 diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index 09f7b4e..cab4413 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 154215041df085271a780a2989f4f481226e3e34 Mon Sep 17 00:00:00 2001 +From 0114b7010c87be70014b170ffdf66e1317f6becc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -22,185 +22,36 @@ Rebase changes (4.1.0-rc1): Rebase changes (4.2.0-rc0): - partially disable hd-geo-test (requires lsi53c895a) +Rebase changes (weekly-200129): +- Disable qtest/q35-test (uses upstream machine types) + +Rebased changes (weekly-200212): +- Do not run iotests on make check + Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce Merged patches (4.1.0-rc0): - 41288ff redhat: Remove raw iotest 205 - -Signed-off-by: Danilo C. L. de Paula --- - redhat/qemu-kvm.spec.template | 2 +- - tests/Makefile.include | 10 +++++----- - tests/boot-serial-test.c | 6 +++++- - tests/cpu-plug-test.c | 4 ++-- - tests/e1000-test.c | 2 ++ - tests/hd-geo-test.c | 4 ++++ - tests/prom-env-test.c | 4 ++++ - tests/qemu-iotests/051 | 12 ++++++------ - tests/qemu-iotests/group | 4 ++-- - tests/test-x86-cpuid-compat.c | 2 ++ - tests/usb-hcd-xhci-test.c | 4 ++++ - 11 files changed, 37 insertions(+), 17 deletions(-) + redhat/qemu-kvm.spec.template | 2 +- + tests/qemu-iotests/051 | 12 ++++++------ + tests/qemu-iotests/group | 4 ++-- + tests/qtest/Makefile.include | 12 ++++++------ + tests/qtest/boot-serial-test.c | 6 +++++- + tests/qtest/cpu-plug-test.c | 4 ++-- + tests/qtest/e1000-test.c | 2 ++ + tests/qtest/hd-geo-test.c | 4 ++++ + tests/qtest/prom-env-test.c | 4 ++++ + tests/qtest/test-x86-cpuid-compat.c | 2 ++ + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + 11 files changed, 38 insertions(+), 18 deletions(-) -diff --git a/tests/Makefile.include b/tests/Makefile.include -index b483790cf3..53bdbdfee0 100644 ---- a/tests/Makefile.include -+++ b/tests/Makefile.include -@@ -172,7 +172,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) - check-qtest-i386-y += tests/ahci-test$(EXESUF) - check-qtest-i386-y += tests/hd-geo-test$(EXESUF) - check-qtest-i386-y += tests/boot-order-test$(EXESUF) --check-qtest-i386-y += tests/bios-tables-test$(EXESUF) -+#check-qtest-i386-y += tests/bios-tables-test$(EXESUF) - check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) - check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) - check-qtest-i386-y += tests/rtc-test$(EXESUF) -@@ -230,7 +230,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) - check-qtest-moxie-y += tests/boot-serial-test$(EXESUF) - - check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = tests/endianness-test$(EXESUF) --check-qtest-ppc-y += tests/boot-order-test$(EXESUF) -+#check-qtest-ppc-y += tests/boot-order-test$(EXESUF) - check-qtest-ppc-y += tests/prom-env-test$(EXESUF) - check-qtest-ppc-y += tests/drive_del-test$(EXESUF) - check-qtest-ppc-y += tests/boot-serial-test$(EXESUF) -@@ -244,8 +244,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += tests/rtas-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_USB_UHCI) += tests/usb-hcd-uhci-test$(EXESUF) - check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += tests/usb-hcd-xhci-test$(EXESUF) --check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) --check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) -+#check-qtest-ppc64-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) - check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) - check-qtest-ppc64-$(CONFIG_VGA) += tests/display-vga-test$(EXESUF) - check-qtest-ppc64-y += tests/numa-test$(EXESUF) -@@ -291,7 +291,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) - check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) - check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF) - check-qtest-s390x-y += tests/drive_del-test$(EXESUF) --check-qtest-s390x-y += tests/device-plug-test$(EXESUF) -+#check-qtest-s390x-y += tests/device-plug-test$(EXESUF) - check-qtest-s390x-y += tests/virtio-ccw-test$(EXESUF) - check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF) - check-qtest-s390x-y += tests/migration-test$(EXESUF) -diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c -index d3a54a0ba5..33ce72b89c 100644 ---- a/tests/boot-serial-test.c -+++ b/tests/boot-serial-test.c -@@ -108,19 +108,23 @@ static testdef_t tests[] = { - { "ppc", "g3beige", "", "PowerPC,750" }, - { "ppc", "mac99", "", "PowerPC,G4" }, - { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "ppce500", "", "U-Boot" }, - { "ppc64", "40p", "-m 192", "Memory: 192M" }, - { "ppc64", "mac99", "", "PowerPC,970FX" }, -+#endif - { "ppc64", "pseries", - "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken", - "Open Firmware" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "powernv8", "", "OPAL" }, - { "ppc64", "powernv9", "", "OPAL" }, - { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, -+#endif - { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "i386", "pc", "-device sga", "SGABIOS" }, - { "i386", "q35", "-device sga", "SGABIOS" }, -- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, -+ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "x86_64", "q35", "-device sga", "SGABIOS" }, - { "sparc", "LX", "", "TMS390S10" }, - { "sparc", "SS-4", "", "MB86904" }, -diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c -index 30e514bbfb..a04beae1c6 100644 ---- a/tests/cpu-plug-test.c -+++ b/tests/cpu-plug-test.c -@@ -185,8 +185,8 @@ static void add_pseries_test_case(const char *mname) - char *path; - PlugTestData *data; - -- if (!g_str_has_prefix(mname, "pseries-") || -- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { -+ if (!g_str_has_prefix(mname, "pseries-rhel") || -+ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { - return; - } - data = g_new(PlugTestData, 1); -diff --git a/tests/e1000-test.c b/tests/e1000-test.c -index c387984ef6..c89112d6f8 100644 ---- a/tests/e1000-test.c -+++ b/tests/e1000-test.c -@@ -22,9 +22,11 @@ struct QE1000 { - - static const char *models[] = { - "e1000", -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - "e1000-82540em", - "e1000-82544gc", - "e1000-82545em", -+#endif - }; - - static void *e1000_get_driver(void *obj, const char *interface) -diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c -index 7e86c5416c..cc068bad87 100644 ---- a/tests/hd-geo-test.c -+++ b/tests/hd-geo-test.c -@@ -732,6 +732,7 @@ static void test_override_ide(void) - test_override(args, expected); - } - -+#if 0 /* Require lsi53c895a - not supported on RHEL */ - static void test_override_scsi(void) - { - TestArgs *args = create_args(); -@@ -776,6 +777,7 @@ static void test_override_scsi_2_controllers(void) - add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); - test_override(args, expected); - } -+#endif - - static void test_override_virtio_blk(void) - { -@@ -951,9 +953,11 @@ int main(int argc, char **argv) - qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); - if (have_qemu_img()) { - qtest_add_func("hd-geo/override/ide", test_override_ide); -+#if 0 /* Require lsi53c895a - not supported on RHEL */ - qtest_add_func("hd-geo/override/scsi", test_override_scsi); - qtest_add_func("hd-geo/override/scsi_2_controllers", - test_override_scsi_2_controllers); -+#endif - qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); - qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); - qtest_add_func("hd-geo/override/scsi_hot_unplug", -diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c -index 61bc1d1e7b..028d45c7d7 100644 ---- a/tests/prom-env-test.c -+++ b/tests/prom-env-test.c -@@ -88,10 +88,14 @@ int main(int argc, char *argv[]) - if (!strcmp(arch, "ppc")) { - add_tests(ppc_machines); - } else if (!strcmp(arch, "ppc64")) { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - add_tests(ppc_machines); - if (g_test_slow()) { -+#endif - qtest_add_data_func("prom-env/pseries", "pseries", test_machine); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - } -+#endif - } else if (!strcmp(arch, "sparc")) { - add_tests(sparc_machines); - } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 53bcdbc911..b387e0c233 100755 +index 034d3a3..aadc413 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 -@@ -181,11 +181,11 @@ run_qemu -drive if=virtio +@@ -183,11 +183,11 @@ run_qemu -drive if=virtio case "$QEMU_DEFAULT_MACHINE" in pc) run_qemu -drive if=none,id=disk -device ide-cd,drive=disk @@ -215,7 +66,7 @@ index 53bcdbc911..b387e0c233 100755 ;; *) ;; -@@ -234,11 +234,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on +@@ -236,11 +236,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on case "$QEMU_DEFAULT_MACHINE" in pc) run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk @@ -231,7 +82,7 @@ index 53bcdbc911..b387e0c233 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 6b10a6a762..06cc734b26 100644 +index 1710470..0711b66 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ @@ -252,10 +103,172 @@ index 6b10a6a762..06cc734b26 100644 # 100 was removed, do not reuse 101 rw quick 102 rw quick -diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c -index 772287bdb4..e7c075ed98 100644 ---- a/tests/test-x86-cpuid-compat.c -+++ b/tests/test-x86-cpuid-compat.c +diff --git a/tests/qtest/Makefile.include b/tests/qtest/Makefile.include +index 9e5a51d..0732f59 100644 +--- a/tests/qtest/Makefile.include ++++ b/tests/qtest/Makefile.include +@@ -29,7 +29,7 @@ check-qtest-i386-y += ide-test + check-qtest-i386-$(CONFIG_TOOLS) += ahci-test + check-qtest-i386-y += hd-geo-test + check-qtest-i386-y += boot-order-test +-check-qtest-i386-y += bios-tables-test ++#check-qtest-i386-y += bios-tables-test + check-qtest-i386-$(CONFIG_SGA) += boot-serial-test + check-qtest-i386-$(CONFIG_SLIRP) += pxe-test + check-qtest-i386-y += rtc-test +@@ -51,7 +51,7 @@ check-qtest-i386-$(CONFIG_USB_UHCI) += usb-hcd-uhci-test + check-qtest-i386-$(call land,$(CONFIG_USB_EHCI),$(CONFIG_USB_UHCI)) += usb-hcd-ehci-test + check-qtest-i386-$(CONFIG_USB_XHCI_NEC) += usb-hcd-xhci-test + check-qtest-i386-y += cpu-plug-test +-check-qtest-i386-y += q35-test ++#check-qtest-i386-y += q35-test + check-qtest-i386-y += vmgenid-test + check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-swtpm-test + check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-test +@@ -88,7 +88,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += display-vga-test + check-qtest-moxie-y += boot-serial-test + + check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = endianness-test +-check-qtest-ppc-y += boot-order-test ++#check-qtest-ppc-y += boot-order-test + check-qtest-ppc-y += prom-env-test + check-qtest-ppc-y += drive_del-test + check-qtest-ppc-y += boot-serial-test +@@ -102,8 +102,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += rtas-test + check-qtest-ppc64-$(CONFIG_SLIRP) += pxe-test + check-qtest-ppc64-$(CONFIG_USB_UHCI) += usb-hcd-uhci-test + check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += usb-hcd-xhci-test +-check-qtest-ppc64-$(CONFIG_SLIRP) += test-netfilter +-check-qtest-ppc64-$(CONFIG_POSIX) += test-filter-mirror ++#check-qtest-ppc64-$(CONFIG_SLIRP) += test-netfilter ++#check-qtest-ppc64-$(CONFIG_POSIX) += test-filter-mirror + check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += test-filter-redirector + check-qtest-ppc64-$(CONFIG_VGA) += display-vga-test + check-qtest-ppc64-y += numa-test +@@ -152,7 +152,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += test-netfilter + check-qtest-s390x-$(CONFIG_POSIX) += test-filter-mirror + check-qtest-s390x-$(CONFIG_POSIX) += test-filter-redirector + check-qtest-s390x-y += drive_del-test +-check-qtest-s390x-y += device-plug-test ++#check-qtest-s390x-y += device-plug-test + check-qtest-s390x-y += virtio-ccw-test + check-qtest-s390x-y += cpu-plug-test + check-qtest-s390x-y += migration-test +diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c +index 85a3614..1c18441 100644 +--- a/tests/qtest/boot-serial-test.c ++++ b/tests/qtest/boot-serial-test.c +@@ -109,19 +109,23 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "40p", "-m 192", "Memory: 192M" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", + "-machine " PSERIES_DEFAULT_CAPABILITIES, + "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv8", "", "OPAL" }, + { "ppc64", "powernv9", "", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c +index e8ffbbc..fda7269 100644 +--- a/tests/qtest/cpu-plug-test.c ++++ b/tests/qtest/cpu-plug-test.c +@@ -181,8 +181,8 @@ static void add_pseries_test_case(const char *mname) + char *path; + PlugTestData *data; + +- if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ if (!g_str_has_prefix(mname, "pseries-rhel") || ++ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/qtest/e1000-test.c b/tests/qtest/e1000-test.c +index c387984..c89112d 100644 +--- a/tests/qtest/e1000-test.c ++++ b/tests/qtest/e1000-test.c +@@ -22,9 +22,11 @@ struct QE1000 { + + static const char *models[] = { + "e1000", ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + "e1000-82540em", + "e1000-82544gc", + "e1000-82545em", ++#endif + }; + + static void *e1000_get_driver(void *obj, const char *interface) +diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c +index 48e8e02..6496196 100644 +--- a/tests/qtest/hd-geo-test.c ++++ b/tests/qtest/hd-geo-test.c +@@ -737,6 +737,7 @@ static void test_override_ide(void) + test_override(args, expected); + } + ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + static void test_override_scsi(void) + { + TestArgs *args = create_args(); +@@ -781,6 +782,7 @@ static void test_override_scsi_2_controllers(void) + add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); + test_override(args, expected); + } ++#endif + + static void test_override_virtio_blk(void) + { +@@ -960,9 +962,11 @@ int main(int argc, char **argv) + qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); + if (have_qemu_img()) { + qtest_add_func("hd-geo/override/ide", test_override_ide); ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + qtest_add_func("hd-geo/override/scsi", test_override_scsi); + qtest_add_func("hd-geo/override/scsi_2_controllers", + test_override_scsi_2_controllers); ++#endif + qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); + qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); + qtest_add_func("hd-geo/override/scsi_hot_unplug", +diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c +index 60e6ec3..f9d6adc 100644 +--- a/tests/qtest/prom-env-test.c ++++ b/tests/qtest/prom-env-test.c +@@ -89,10 +89,14 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); + if (g_test_slow()) { ++#endif + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } ++#endif + } else if (!strcmp(arch, "sparc")) { + add_tests(sparc_machines); + } else if (!strcmp(arch, "sparc64")) { +diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c +index 772287b..e7c075e 100644 +--- a/tests/qtest/test-x86-cpuid-compat.c ++++ b/tests/qtest/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) "-cpu 486,xlevel2=0xC0000002,+xstore", "xlevel2", 0xC0000002); @@ -272,10 +285,10 @@ index 772287bdb4..e7c075ed98 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", -diff --git a/tests/usb-hcd-xhci-test.c b/tests/usb-hcd-xhci-test.c -index 10ef9d2a91..3855873050 100644 ---- a/tests/usb-hcd-xhci-test.c -+++ b/tests/usb-hcd-xhci-test.c +diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c +index 10ef9d2..3855873 100644 +--- a/tests/qtest/usb-hcd-xhci-test.c ++++ b/tests/qtest/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) usb_test_hotplug(global_qtest, "xhci", "1", NULL); } @@ -303,5 +316,5 @@ index 10ef9d2a91..3855873050 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -2.21.0 +1.8.3.1 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index db776c4..77ec099 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From de433da59448eaad4ac1b902d07d57b57f922aff Mon Sep 17 00:00:00 2001 +From e77808a25ee638b717e1507a1e55cbf8350afbfd Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -35,14 +35,13 @@ Merged patches (2.9.0): (cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) (cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) -Signed-off-by: Danilo C. L. de Paula --- hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index c8534d3035..309535f306 100644 +index e265d77..41d00a3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -47,6 +47,9 @@ @@ -87,7 +86,7 @@ index c8534d3035..309535f306 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3167,6 +3191,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3169,6 +3193,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -98,7 +97,7 @@ index c8534d3035..309535f306 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 35626cd63e..0cd4803aee 100644 +index 0da7a20..5d2b0d2 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { @@ -110,5 +109,5 @@ index 35626cd63e..0cd4803aee 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.21.0 +1.8.3.1 diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index cb77bfe..d87713a 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 2754dd8da8975757753fd491985d5e7b36966106 Mon Sep 17 00:00:00 2001 +From 8b189d52c8b8e2c251d76c7b00dc4a2a0a570bf8 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -18,16 +18,15 @@ as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost (cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) (cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) -Signed-off-by: Danilo C. L. de Paula --- - vl.c | 9 +++++++++ + softmmu/vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) -diff --git a/vl.c b/vl.c -index 668a34577e..9f3e7e7733 100644 ---- a/vl.c -+++ b/vl.c -@@ -1822,9 +1822,17 @@ static void version(void) +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 00f7604..5ba8c19 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -1674,9 +1674,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -45,7 +44,7 @@ index 668a34577e..9f3e7e7733 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1841,6 +1849,7 @@ static void help(int exitcode) +@@ -1693,6 +1701,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -54,5 +53,5 @@ index 668a34577e..9f3e7e7733 100644 } -- -2.21.0 +1.8.3.1 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index cec862d..89e14e7 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From c9c3cf721b0e9e359418f64c2a5121c3f8b5d27a Mon Sep 17 00:00:00 2001 +From e0aee69fcafe1c3656db2676b8a0d379a48c299c Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -74,18 +74,16 @@ The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. This commit matches the limit to current KVM_CAP_NR_VCPUS value. - -Signed-off-by: Danilo C. L. de Paula --- accel/kvm/kvm-all.c | 12 ++++++++++++ - vl.c | 18 ++++++++++++++++++ + softmmu/vl.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index ca00daa2f5..dc3ed7f04e 100644 +index 439a4ef..6f804b8 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -1943,6 +1943,18 @@ static int kvm_init(MachineState *ms) +@@ -1975,6 +1975,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -104,11 +102,11 @@ index ca00daa2f5..dc3ed7f04e 100644 while (nc->name) { if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " -diff --git a/vl.c b/vl.c -index 9f3e7e7733..1550aa2aaa 100644 ---- a/vl.c -+++ b/vl.c -@@ -134,6 +134,8 @@ int main(int argc, char **argv) +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 5ba8c19..e98ab6b 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -115,6 +115,8 @@ #define MAX_VIRTIO_CONSOLES 1 @@ -117,8 +115,8 @@ index 9f3e7e7733..1550aa2aaa 100644 static const char *data_dir[16]; static int data_dir_idx; const char *bios_name = NULL; -@@ -1339,6 +1341,20 @@ static MachineClass *find_default_machine(GSList *machines) - return NULL; +@@ -1177,6 +1179,20 @@ static MachineClass *find_default_machine(GSList *machines) + return default_machineclass; } +/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ @@ -138,7 +136,7 @@ index 9f3e7e7733..1550aa2aaa 100644 static int machine_help_func(QemuOpts *opts, MachineState *machine) { ObjectProperty *prop; -@@ -3857,6 +3873,8 @@ int main(int argc, char **argv, char **envp) +@@ -3829,6 +3845,8 @@ void qemu_init(int argc, char **argv, char **envp) "mutually exclusive"); exit(EXIT_FAILURE); } @@ -148,5 +146,5 @@ index 9f3e7e7733..1550aa2aaa 100644 configure_rtc(qemu_find_opts_singleton("rtc")); -- -2.21.0 +1.8.3.1 diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch index 9624855..d64625b 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 26128b3ede339e292a3c50a84e3248af46ecd0ec Mon Sep 17 00:00:00 2001 +From 565cee8e4965ece9e0c271cad813263b606b3e65 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -16,30 +16,31 @@ Rebase notes (2.9.0): Rebase notes (2.8.0): - Changed tracetool.py parameters +Rebase notes (weekly-200219): +- Removed python shenigan (done upstream) + Merged patches (2.3.0): - db959d6 redhat/qemu-kvm.spec.template: Install qemu-kvm-simpletrace.stp - 5292fc3 trace: add SystemTap init scripts for simpletrace bridge - eda9e5e simpletrace: install simpletrace.py - 85c4c8f trace: add systemtap-initscript README file to RPM - -Signed-off-by: Danilo C. L. de Paula --- .gitignore | 2 ++ Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 26 ++++++++++++++- + README.systemtap | 43 +++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 25 ++++++++++++++++++- scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ scripts/systemtap/script.d/qemu_kvm.stp | 1 + - 6 files changed, 79 insertions(+), 1 deletion(-) + 6 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 README.systemtap create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 086727dbb9..4254950f7f 100644 +index aee2e8e..ded56e5 100644 --- a/Makefile +++ b/Makefile -@@ -939,6 +939,10 @@ endif +@@ -999,6 +999,10 @@ endif $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ done $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" @@ -52,7 +53,7 @@ index 086727dbb9..4254950f7f 100644 ctags: diff --git a/README.systemtap b/README.systemtap new file mode 100644 -index 0000000000..ad913fc990 +index 0000000..ad913fc --- /dev/null +++ b/README.systemtap @@ -0,0 +1,43 @@ @@ -101,7 +102,7 @@ index 0000000000..ad913fc990 + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf new file mode 100644 -index 0000000000..372d8160a4 +index 0000000..372d816 --- /dev/null +++ b/scripts/systemtap/conf.d/qemu_kvm.conf @@ -0,0 +1,4 @@ @@ -111,11 +112,11 @@ index 0000000000..372d8160a4 +qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp new file mode 100644 -index 0000000000..c04abf9449 +index 0000000..c04abf9 --- /dev/null +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -- -2.21.0 +1.8.3.1 diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index ef83445..7065d7d 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,6 +1,6 @@ -From 97ed62562b883c384346bfef3e1c7e379f03ccab Mon Sep 17 00:00:00 2001 +From 4375e8b568866c7ddbde19de1bb999cf3ebfe6fe Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Fri, 30 Nov 2018 09:11:03 +0100 +Date: Wed, 29 Jan 2020 09:30:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- Patchwork-id: 62380 @@ -16,103 +16,3702 @@ We change the name and location of qemu-kvm binaries. Update documentation to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - docs/qemu-block-drivers.texi | 2 +- - docs/qemu-cpu-models.texi | 2 +- - qemu-doc.texi | 6 +++--- - qemu-options.hx | 16 ++++++++-------- - 4 files changed, 13 insertions(+), 13 deletions(-) -diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi -index 2c7ea49c32..5d0afb3dee 100644 ---- a/docs/qemu-block-drivers.texi -+++ b/docs/qemu-block-drivers.texi -@@ -2,7 +2,7 @@ - QEMU block driver reference manual - @c man end - --@set qemu_system qemu-system-x86_64 -+@set qemu_system qemu-kvm - - @c man begin DESCRIPTION - +--- + +Rebase notes (weekly-200129): +- qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) + +Conflicts: + docs/qemu-cpu-models.texi + docs/system/qemu-block-drivers.rst + qemu-doc.texi + qemu-options.hx +--- + docs/qemu-cpu-models.texi | 677 +++++++++++ + qemu-doc.texi | 2967 +++++++++++++++++++++++++++++++++++++++++++++ + qemu-options.hx | 10 +- + 3 files changed, 3649 insertions(+), 5 deletions(-) + create mode 100644 docs/qemu-cpu-models.texi + create mode 100644 qemu-doc.texi + diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -index f88a1def0d..c82cf8fab7 100644 ---- a/docs/qemu-cpu-models.texi +new file mode 100644 +index 0000000..c82cf8f +--- /dev/null +++ b/docs/qemu-cpu-models.texi -@@ -2,7 +2,7 @@ - QEMU / KVM CPU model configuration - @c man end - --@set qemu_system_x86 qemu-system-x86_64 +@@ -0,0 +1,677 @@ ++@c man begin SYNOPSIS ++QEMU / KVM CPU model configuration ++@c man end ++ +@set qemu_system_x86 qemu-kvm - - @c man begin DESCRIPTION - ++ ++@c man begin DESCRIPTION ++ ++@menu ++* recommendations_cpu_models_x86:: Recommendations for KVM CPU model configuration on x86 hosts ++* recommendations_cpu_models_MIPS:: Supported CPU model configurations on MIPS hosts ++* cpu_model_syntax_apps:: Syntax for configuring CPU models ++@end menu ++ ++QEMU / KVM virtualization supports two ways to configure CPU models ++ ++@table @option ++ ++@item Host passthrough ++ ++This passes the host CPU model features, model, stepping, exactly to the ++guest. Note that KVM may filter out some host CPU model features if they ++cannot be supported with virtualization. Live migration is unsafe when ++this mode is used as libvirt / QEMU cannot guarantee a stable CPU is ++exposed to the guest across hosts. This is the recommended CPU to use, ++provided live migration is not required. ++ ++@item Named model ++ ++QEMU comes with a number of predefined named CPU models, that typically ++refer to specific generations of hardware released by Intel and AMD. ++These allow the guest VMs to have a degree of isolation from the host CPU, ++allowing greater flexibility in live migrating between hosts with differing ++hardware. ++@end table ++ ++In both cases, it is possible to optionally add or remove individual CPU ++features, to alter what is presented to the guest by default. ++ ++Libvirt supports a third way to configure CPU models known as "Host model". ++This uses the QEMU "Named model" feature, automatically picking a CPU model ++that is similar the host CPU, and then adding extra features to approximate ++the host model as closely as possible. This does not guarantee the CPU family, ++stepping, etc will precisely match the host CPU, as they would with "Host ++passthrough", but gives much of the benefit of passthrough, while making ++live migration safe. ++ ++@node recommendations_cpu_models_x86 ++@subsection Recommendations for KVM CPU model configuration on x86 hosts ++ ++The information that follows provides recommendations for configuring ++CPU models on x86 hosts. The goals are to maximise performance, while ++protecting guest OS against various CPU hardware flaws, and optionally ++enabling live migration between hosts with heterogeneous CPU models. ++ ++@menu ++* preferred_cpu_models_intel_x86:: Preferred CPU models for Intel x86 hosts ++* important_cpu_features_intel_x86:: Important CPU features for Intel x86 hosts ++* preferred_cpu_models_amd_x86:: Preferred CPU models for AMD x86 hosts ++* important_cpu_features_amd_x86:: Important CPU features for AMD x86 hosts ++* default_cpu_models_x86:: Default x86 CPU models ++* other_non_recommended_cpu_models_x86:: Other non-recommended x86 CPUs ++@end menu ++ ++@node preferred_cpu_models_intel_x86 ++@subsubsection Preferred CPU models for Intel x86 hosts ++ ++The following CPU models are preferred for use on Intel hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++@item @code{Skylake-Server} ++@item @code{Skylake-Server-IBRS} ++ ++Intel Xeon Processor (Skylake, 2016) ++ ++ ++@item @code{Skylake-Client} ++@item @code{Skylake-Client-IBRS} ++ ++Intel Core Processor (Skylake, 2015) ++ ++ ++@item @code{Broadwell} ++@item @code{Broadwell-IBRS} ++@item @code{Broadwell-noTSX} ++@item @code{Broadwell-noTSX-IBRS} ++ ++Intel Core Processor (Broadwell, 2014) ++ ++ ++@item @code{Haswell} ++@item @code{Haswell-IBRS} ++@item @code{Haswell-noTSX} ++@item @code{Haswell-noTSX-IBRS} ++ ++Intel Core Processor (Haswell, 2013) ++ ++ ++@item @code{IvyBridge} ++@item @code{IvyBridge-IBRS} ++ ++Intel Xeon E3-12xx v2 (Ivy Bridge, 2012) ++ ++ ++@item @code{SandyBridge} ++@item @code{SandyBridge-IBRS} ++ ++Intel Xeon E312xx (Sandy Bridge, 2011) ++ ++ ++@item @code{Westmere} ++@item @code{Westmere-IBRS} ++ ++Westmere E56xx/L56xx/X56xx (Nehalem-C, 2010) ++ ++ ++@item @code{Nehalem} ++@item @code{Nehalem-IBRS} ++ ++Intel Core i7 9xx (Nehalem Class Core i7, 2008) ++ ++ ++@item @code{Penryn} ++ ++Intel Core 2 Duo P9xxx (Penryn Class Core 2, 2007) ++ ++ ++@item @code{Conroe} ++ ++Intel Celeron_4x0 (Conroe/Merom Class Core 2, 2006) ++ ++@end table ++ ++@node important_cpu_features_intel_x86 ++@subsubsection Important CPU features for Intel x86 hosts ++ ++The following are important CPU features that should be used on Intel x86 ++hosts, when available in the host CPU. Some of them require explicit ++configuration to enable, as they are not included by default in some, or all, ++of the named CPU models listed above. In general all of these features are ++included if using "Host passthrough" or "Host model". ++ ++ ++@table @option ++ ++@item @code{pcid} ++ ++Recommended to mitigate the cost of the Meltdown (CVE-2017-5754) fix ++ ++Included by default in Haswell, Broadwell & Skylake Intel CPU models. ++ ++Should be explicitly turned on for Westmere, SandyBridge, and IvyBridge ++Intel CPU models. Note that some desktop/mobile Westmere CPUs cannot ++support this feature. ++ ++ ++@item @code{spec-ctrl} ++ ++Required to enable the Spectre v2 (CVE-2017-5715) fix. ++ ++Included by default in Intel CPU models with -IBRS suffix. ++ ++Must be explicitly turned on for Intel CPU models without -IBRS suffix. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{stibp} ++ ++Required to enable stronger Spectre v2 (CVE-2017-5715) fixes in some ++operating systems. ++ ++Must be explicitly turned on for all Intel CPU models. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{ssbd} ++ ++Required to enable the CVE-2018-3639 fix ++ ++Not included by default in any Intel CPU model. ++ ++Must be explicitly turned on for all Intel CPU models. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{pdpe1gb} ++ ++Recommended to allow guest OS to use 1GB size pages ++ ++Not included by default in any Intel CPU model. ++ ++Should be explicitly turned on for all Intel CPU models. ++ ++Note that not all CPU hardware will support this feature. ++ ++@item @code{md-clear} ++ ++Required to confirm the MDS (CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, ++CVE-2019-11091) fixes. ++ ++Not included by default in any Intel CPU model. ++ ++Must be explicitly turned on for all Intel CPU models. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++@end table ++ ++ ++@node preferred_cpu_models_amd_x86 ++@subsubsection Preferred CPU models for AMD x86 hosts ++ ++The following CPU models are preferred for use on Intel hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++ ++@item @code{EPYC} ++@item @code{EPYC-IBPB} ++ ++AMD EPYC Processor (2017) ++ ++ ++@item @code{Opteron_G5} ++ ++AMD Opteron 63xx class CPU (2012) ++ ++ ++@item @code{Opteron_G4} ++ ++AMD Opteron 62xx class CPU (2011) ++ ++ ++@item @code{Opteron_G3} ++ ++AMD Opteron 23xx (Gen 3 Class Opteron, 2009) ++ ++ ++@item @code{Opteron_G2} ++ ++AMD Opteron 22xx (Gen 2 Class Opteron, 2006) ++ ++ ++@item @code{Opteron_G1} ++ ++AMD Opteron 240 (Gen 1 Class Opteron, 2004) ++@end table ++ ++@node important_cpu_features_amd_x86 ++@subsubsection Important CPU features for AMD x86 hosts ++ ++The following are important CPU features that should be used on AMD x86 ++hosts, when available in the host CPU. Some of them require explicit ++configuration to enable, as they are not included by default in some, or all, ++of the named CPU models listed above. In general all of these features are ++included if using "Host passthrough" or "Host model". ++ ++ ++@table @option ++ ++@item @code{ibpb} ++ ++Required to enable the Spectre v2 (CVE-2017-5715) fix. ++ ++Included by default in AMD CPU models with -IBPB suffix. ++ ++Must be explicitly turned on for AMD CPU models without -IBPB suffix. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{stibp} ++ ++Required to enable stronger Spectre v2 (CVE-2017-5715) fixes in some ++operating systems. ++ ++Must be explicitly turned on for all AMD CPU models. ++ ++Requires the host CPU microcode to support this feature before it ++can be used for guest CPUs. ++ ++ ++@item @code{virt-ssbd} ++ ++Required to enable the CVE-2018-3639 fix ++ ++Not included by default in any AMD CPU model. ++ ++Must be explicitly turned on for all AMD CPU models. ++ ++This should be provided to guests, even if amd-ssbd is also ++provided, for maximum guest compatibility. ++ ++Note for some QEMU / libvirt versions, this must be force enabled ++when when using "Host model", because this is a virtual feature ++that doesn't exist in the physical host CPUs. ++ ++ ++@item @code{amd-ssbd} ++ ++Required to enable the CVE-2018-3639 fix ++ ++Not included by default in any AMD CPU model. ++ ++Must be explicitly turned on for all AMD CPU models. ++ ++This provides higher performance than virt-ssbd so should be ++exposed to guests whenever available in the host. virt-ssbd ++should none the less also be exposed for maximum guest ++compatibility as some kernels only know about virt-ssbd. ++ ++ ++@item @code{amd-no-ssb} ++ ++Recommended to indicate the host is not vulnerable CVE-2018-3639 ++ ++Not included by default in any AMD CPU model. ++ ++Future hardware generations of CPU will not be vulnerable to ++CVE-2018-3639, and thus the guest should be told not to enable ++its mitigations, by exposing amd-no-ssb. This is mutually ++exclusive with virt-ssbd and amd-ssbd. ++ ++ ++@item @code{pdpe1gb} ++ ++Recommended to allow guest OS to use 1GB size pages ++ ++Not included by default in any AMD CPU model. ++ ++Should be explicitly turned on for all AMD CPU models. ++ ++Note that not all CPU hardware will support this feature. ++@end table ++ ++ ++@node default_cpu_models_x86 ++@subsubsection Default x86 CPU models ++ ++The default QEMU CPU models are designed such that they can run on all hosts. ++If an application does not wish to do perform any host compatibility checks ++before launching guests, the default is guaranteed to work. ++ ++The default CPU models will, however, leave the guest OS vulnerable to various ++CPU hardware flaws, so their use is strongly discouraged. Applications should ++follow the earlier guidance to setup a better CPU configuration, with host ++passthrough recommended if live migration is not needed. ++ ++@table @option ++@item @code{qemu32} ++@item @code{qemu64} ++ ++QEMU Virtual CPU version 2.5+ (32 & 64 bit variants) ++ ++qemu64 is used for x86_64 guests and qemu32 is used for i686 guests, when no ++-cpu argument is given to QEMU, or no is provided in libvirt XML. ++@end table ++ ++ ++@node other_non_recommended_cpu_models_x86 ++@subsubsection Other non-recommended x86 CPUs ++ ++The following CPUs models are compatible with most AMD and Intel x86 hosts, but ++their usage is discouraged, as they expose a very limited featureset, which ++prevents guests having optimal performance. ++ ++@table @option ++ ++@item @code{kvm32} ++@item @code{kvm64} ++ ++Common KVM processor (32 & 64 bit variants) ++ ++Legacy models just for historical compatibility with ancient QEMU versions. ++ ++ ++@item @code{486} ++@item @code{athlon} ++@item @code{phenom} ++@item @code{coreduo} ++@item @code{core2duo} ++@item @code{n270} ++@item @code{pentium} ++@item @code{pentium2} ++@item @code{pentium3} ++ ++Various very old x86 CPU models, mostly predating the introduction of ++hardware assisted virtualization, that should thus not be required for ++running virtual machines. ++@end table ++ ++@node recommendations_cpu_models_MIPS ++@subsection Supported CPU model configurations on MIPS hosts ++ ++QEMU supports variety of MIPS CPU models: ++ ++@menu ++* cpu_models_MIPS32:: Supported CPU models for MIPS32 hosts ++* cpu_models_MIPS64:: Supported CPU models for MIPS64 hosts ++* cpu_models_nanoMIPS:: Supported CPU models for nanoMIPS hosts ++* preferred_cpu_models_MIPS:: Preferred CPU models for MIPS hosts ++@end menu ++ ++@node cpu_models_MIPS32 ++@subsubsection Supported CPU models for MIPS32 hosts ++ ++The following CPU models are supported for use on MIPS32 hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++@item @code{mips32r6-generic} ++ ++MIPS32 Processor (Release 6, 2015) ++ ++ ++@item @code{P5600} ++ ++MIPS32 Processor (P5600, 2014) ++ ++ ++@item @code{M14K} ++@item @code{M14Kc} ++ ++MIPS32 Processor (M14K, 2009) ++ ++ ++@item @code{74Kf} ++ ++MIPS32 Processor (74K, 2007) ++ ++ ++@item @code{34Kf} ++ ++MIPS32 Processor (34K, 2006) ++ ++ ++@item @code{24Kc} ++@item @code{24KEc} ++@item @code{24Kf} ++ ++MIPS32 Processor (24K, 2003) ++ ++ ++@item @code{4Kc} ++@item @code{4Km} ++@item @code{4KEcR1} ++@item @code{4KEmR1} ++@item @code{4KEc} ++@item @code{4KEm} ++ ++MIPS32 Processor (4K, 1999) ++@end table ++ ++@node cpu_models_MIPS64 ++@subsubsection Supported CPU models for MIPS64 hosts ++ ++The following CPU models are supported for use on MIPS64 hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++@item @code{I6400} ++ ++MIPS64 Processor (Release 6, 2014) ++ ++ ++@item @code{Loongson-2F} ++ ++MIPS64 Processor (Loongson 2, 2008) ++ ++ ++@item @code{Loongson-2E} ++ ++MIPS64 Processor (Loongson 2, 2006) ++ ++ ++@item @code{mips64dspr2} ++ ++MIPS64 Processor (Release 2, 2006) ++ ++ ++@item @code{MIPS64R2-generic} ++@item @code{5KEc} ++@item @code{5KEf} ++ ++MIPS64 Processor (Release 2, 2002) ++ ++ ++@item @code{20Kc} ++ ++MIPS64 Processor (20K, 2000) ++ ++ ++@item @code{5Kc} ++@item @code{5Kf} ++ ++MIPS64 Processor (5K, 1999) ++ ++ ++@item @code{VR5432} ++ ++MIPS64 Processor (VR, 1998) ++ ++ ++@item @code{R4000} ++ ++MIPS64 Processor (MIPS III, 1991) ++@end table ++ ++@node cpu_models_nanoMIPS ++@subsubsection Supported CPU models for nanoMIPS hosts ++ ++The following CPU models are supported for use on nanoMIPS hosts. Administrators / ++applications are recommended to use the CPU model that matches the generation ++of the host CPUs in use. In a deployment with a mixture of host CPU models ++between machines, if live migration compatibility is required, use the newest ++CPU model that is compatible across all desired hosts. ++ ++@table @option ++@item @code{I7200} ++ ++MIPS I7200 (nanoMIPS, 2018) ++ ++@end table ++ ++@node preferred_cpu_models_MIPS ++@subsubsection Preferred CPU models for MIPS hosts ++ ++The following CPU models are preferred for use on different MIPS hosts: ++ ++@table @option ++@item @code{MIPS III} ++R4000 ++ ++@item @code{MIPS32R2} ++34Kf ++ ++@item @code{MIPS64R6} ++I6400 ++ ++@item @code{nanoMIPS} ++I7200 ++@end table ++ ++@node cpu_model_syntax_apps ++@subsection Syntax for configuring CPU models ++ ++The example below illustrate the approach to configuring the various ++CPU models / features in QEMU and libvirt ++ ++@menu ++* cpu_model_syntax_qemu:: QEMU command line ++* cpu_model_syntax_libvirt:: Libvirt guest XML ++@end menu ++ ++@node cpu_model_syntax_qemu ++@subsubsection QEMU command line ++ ++@table @option ++ ++@item Host passthrough ++ ++@example ++ $ @value{qemu_system_x86} -cpu host ++@end example ++ ++With feature customization: ++ ++@example ++ $ @value{qemu_system_x86} -cpu host,-vmx,... ++@end example ++ ++@item Named CPU models ++ ++@example ++ $ @value{qemu_system_x86} -cpu Westmere ++@end example ++ ++With feature customization: ++ ++@example ++ $ @value{qemu_system_x86} -cpu Westmere,+pcid,... ++@end example ++ ++@end table ++ ++@node cpu_model_syntax_libvirt ++@subsubsection Libvirt guest XML ++ ++@table @option ++ ++@item Host passthrough ++ ++@example ++ ++@end example ++ ++With feature customization: ++ ++@example ++ ++ ++ ... ++ ++@end example ++ ++@item Host model ++ ++@example ++ ++@end example ++ ++With feature customization: ++ ++@example ++ ++ ++ ... ++ ++@end example ++ ++@item Named model ++ ++@example ++ ++ ++ ++@end example ++ ++With feature customization: ++ ++@example ++ ++ ++ ++ ... ++ ++@end example ++ ++@end table ++ ++@c man end ++ ++@ignore ++ ++@setfilename qemu-cpu-models ++@settitle QEMU / KVM CPU model configuration ++ ++@c man begin SEEALSO ++The HTML documentation of QEMU for more precise information and Linux ++user mode emulator invocation. ++@c man end ++ ++@c man begin AUTHOR ++Daniel P. Berrange ++@c man end ++ ++@end ignore diff --git a/qemu-doc.texi b/qemu-doc.texi -index 3ddf5c0a68..d460f8d2c0 100644 ---- a/qemu-doc.texi +new file mode 100644 +index 0000000..10cd1de +--- /dev/null +++ b/qemu-doc.texi -@@ -11,8 +11,8 @@ - @paragraphindent 0 - @c %**end of header - --@set qemu_system qemu-system-x86_64 --@set qemu_system_x86 qemu-system-x86_64 +@@ -0,0 +1,2967 @@ ++\input texinfo @c -*- texinfo -*- ++@c %**start of header ++@setfilename qemu-doc.info ++@include version.texi ++ ++@documentlanguage en ++@documentencoding UTF-8 ++ ++@settitle QEMU version @value{VERSION} User Documentation ++@exampleindent 0 ++@paragraphindent 0 ++@c %**end of header ++ +@set qemu_system qemu-kvm +@set qemu_system_x86 qemu-kvm - - @ifinfo - @direntry -@@ -1827,7 +1827,7 @@ Set the initial VGA graphic mode. The default is 800x600x32. - Set OpenBIOS variables in NVRAM, for example: - - @example --qemu-system-ppc -prom-env 'auto-boot?=false' \ ++ ++@ifinfo ++@direntry ++* QEMU: (qemu-doc). The QEMU Emulator User Documentation. ++@end direntry ++@end ifinfo ++ ++@iftex ++@titlepage ++@sp 7 ++@center @titlefont{QEMU version @value{VERSION}} ++@sp 1 ++@center @titlefont{User Documentation} ++@sp 3 ++@end titlepage ++@end iftex ++ ++@ifnottex ++@node Top ++@top ++ ++@menu ++* Introduction:: ++* QEMU PC System emulator:: ++* QEMU System emulator for non PC targets:: ++* QEMU User space emulator:: ++* System requirements:: ++* Security:: ++* Implementation notes:: ++* Deprecated features:: ++* Recently removed features:: ++* Supported build platforms:: ++* License:: ++* Index:: ++@end menu ++@end ifnottex ++ ++@contents ++ ++@node Introduction ++@chapter Introduction ++ ++@menu ++* intro_features:: Features ++@end menu ++ ++@node intro_features ++@section Features ++ ++QEMU is a FAST! processor emulator using dynamic translation to ++achieve good emulation speed. ++ ++@cindex operating modes ++QEMU has two operating modes: ++ ++@itemize ++@cindex system emulation ++@item Full system emulation. In this mode, QEMU emulates a full system (for ++example a PC), including one or several processors and various ++peripherals. It can be used to launch different Operating Systems ++without rebooting the PC or to debug system code. ++ ++@cindex user mode emulation ++@item User mode emulation. In this mode, QEMU can launch ++processes compiled for one CPU on another CPU. It can be used to ++launch the Wine Windows API emulator (@url{https://www.winehq.org}) or ++to ease cross-compilation and cross-debugging. ++ ++@end itemize ++ ++QEMU has the following features: ++ ++@itemize ++@item QEMU can run without a host kernel driver and yet gives acceptable ++performance. It uses dynamic translation to native code for reasonable speed, ++with support for self-modifying code and precise exceptions. ++ ++@item It is portable to several operating systems (GNU/Linux, *BSD, Mac OS X, ++Windows) and architectures. ++ ++@item It performs accurate software emulation of the FPU. ++@end itemize ++ ++QEMU user mode emulation has the following features: ++@itemize ++@item Generic Linux system call converter, including most ioctls. ++ ++@item clone() emulation using native CPU clone() to use Linux scheduler for threads. ++ ++@item Accurate signal handling by remapping host signals to target signals. ++@end itemize ++ ++QEMU full system emulation has the following features: ++@itemize ++@item ++QEMU uses a full software MMU for maximum portability. ++ ++@item ++QEMU can optionally use an in-kernel accelerator, like kvm. The accelerators ++execute most of the guest code natively, while ++continuing to emulate the rest of the machine. ++ ++@item ++Various hardware devices can be emulated and in some cases, host ++devices (e.g. serial and parallel ports, USB, drives) can be used ++transparently by the guest Operating System. Host device passthrough ++can be used for talking to external physical peripherals (e.g. a ++webcam, modem or tape drive). ++ ++@item ++Symmetric multiprocessing (SMP) support. Currently, an in-kernel ++accelerator is required to use more than one host CPU for emulation. ++ ++@end itemize ++ ++ ++@node QEMU PC System emulator ++@chapter QEMU PC System emulator ++@cindex system emulation (PC) ++ ++@menu ++* pcsys_introduction:: Introduction ++* pcsys_quickstart:: Quick Start ++* sec_invocation:: Invocation ++* pcsys_keys:: Keys in the graphical frontends ++* mux_keys:: Keys in the character backend multiplexer ++* pcsys_monitor:: QEMU Monitor ++* cpu_models:: CPU models ++* disk_images:: Disk Images ++* pcsys_network:: Network emulation ++* pcsys_other_devs:: Other Devices ++* direct_linux_boot:: Direct Linux Boot ++* pcsys_usb:: USB emulation ++* vnc_security:: VNC security ++* network_tls:: TLS setup for network services ++* gdb_usage:: GDB usage ++* pcsys_os_specific:: Target OS specific information ++@end menu ++ ++@node pcsys_introduction ++@section Introduction ++ ++@c man begin DESCRIPTION ++ ++The QEMU PC System emulator simulates the ++following peripherals: ++ ++@itemize @minus ++@item ++i440FX host PCI bridge and PIIX3 PCI to ISA bridge ++@item ++Cirrus CLGD 5446 PCI VGA card or dummy VGA card with Bochs VESA ++extensions (hardware level, including all non standard modes). ++@item ++PS/2 mouse and keyboard ++@item ++2 PCI IDE interfaces with hard disk and CD-ROM support ++@item ++Floppy disk ++@item ++PCI and ISA network adapters ++@item ++Serial ports ++@item ++IPMI BMC, either and internal or external one ++@item ++Creative SoundBlaster 16 sound card ++@item ++ENSONIQ AudioPCI ES1370 sound card ++@item ++Intel 82801AA AC97 Audio compatible sound card ++@item ++Intel HD Audio Controller and HDA codec ++@item ++Adlib (OPL2) - Yamaha YM3812 compatible chip ++@item ++Gravis Ultrasound GF1 sound card ++@item ++CS4231A compatible sound card ++@item ++PCI UHCI, OHCI, EHCI or XHCI USB controller and a virtual USB-1.1 hub. ++@end itemize ++ ++SMP is supported with up to 255 CPUs. ++ ++QEMU uses the PC BIOS from the Seabios project and the Plex86/Bochs LGPL ++VGA BIOS. ++ ++QEMU uses YM3812 emulation by Tatsuyuki Satoh. ++ ++QEMU uses GUS emulation (GUSEMU32 @url{http://www.deinmeister.de/gusemu/}) ++by Tibor "TS" Schütz. ++ ++Note that, by default, GUS shares IRQ(7) with parallel ports and so ++QEMU must be told to not have parallel ports to have working GUS. ++ ++@example ++@value{qemu_system_x86} dos.img -soundhw gus -parallel none ++@end example ++ ++Alternatively: ++@example ++@value{qemu_system_x86} dos.img -device gus,irq=5 ++@end example ++ ++Or some other unclaimed IRQ. ++ ++CS4231A is the chip used in Windows Sound System and GUSMAX products ++ ++@c man end ++ ++@node pcsys_quickstart ++@section Quick Start ++@cindex quick start ++ ++Download and uncompress a hard disk image with Linux installed (e.g. ++@file{linux.img}) and type: ++ ++@example ++@value{qemu_system} linux.img ++@end example ++ ++Linux should boot and give you a prompt. ++ ++@node sec_invocation ++@section Invocation ++ ++@example ++@c man begin SYNOPSIS ++@command{@value{qemu_system}} [@var{options}] [@var{disk_image}] ++@c man end ++@end example ++ ++@c man begin OPTIONS ++@var{disk_image} is a raw hard disk image for IDE hard disk 0. Some ++targets do not need a disk image. ++ ++@include qemu-options.texi ++ ++@c man end ++ ++@subsection Device URL Syntax ++@c TODO merge this with section Disk Images ++ ++@c man begin NOTES ++ ++In addition to using normal file images for the emulated storage devices, ++QEMU can also use networked resources such as iSCSI devices. These are ++specified using a special URL syntax. ++ ++@table @option ++@item iSCSI ++iSCSI support allows QEMU to access iSCSI resources directly and use as ++images for the guest storage. Both disk and cdrom images are supported. ++ ++Syntax for specifying iSCSI LUNs is ++``iscsi://[:]//'' ++ ++By default qemu will use the iSCSI initiator-name ++'iqn.2008-11.org.linux-kvm[:]' but this can also be set from the command ++line or a configuration file. ++ ++Since version Qemu 2.4 it is possible to specify a iSCSI request timeout to detect ++stalled requests and force a reestablishment of the session. The timeout ++is specified in seconds. The default is 0 which means no timeout. Libiscsi ++1.15.0 or greater is required for this feature. ++ ++Example (without authentication): ++@example ++@value{qemu_system} -iscsi initiator-name=iqn.2001-04.com.example:my-initiator \ ++ -cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \ ++ -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 ++@end example ++ ++Example (CHAP username/password via URL): ++@example ++@value{qemu_system} -drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1 ++@end example ++ ++Example (CHAP username/password via environment variables): ++@example ++LIBISCSI_CHAP_USERNAME="user" \ ++LIBISCSI_CHAP_PASSWORD="password" \ ++@value{qemu_system} -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 ++@end example ++ ++@item NBD ++QEMU supports NBD (Network Block Devices) both using TCP protocol as well ++as Unix Domain Sockets. With TCP, the default port is 10809. ++ ++Syntax for specifying a NBD device using TCP, in preferred URI form: ++``nbd://[:]/[]'' ++ ++Syntax for specifying a NBD device using Unix Domain Sockets; remember ++that '?' is a shell glob character and may need quoting: ++``nbd+unix:///[]?socket='' ++ ++Older syntax that is also recognized: ++``nbd::[:exportname=]'' ++ ++Syntax for specifying a NBD device using Unix Domain Sockets ++``nbd:unix:[:exportname=]'' ++ ++Example for TCP ++@example ++@value{qemu_system} --drive file=nbd:192.0.2.1:30000 ++@end example ++ ++Example for Unix Domain Sockets ++@example ++@value{qemu_system} --drive file=nbd:unix:/tmp/nbd-socket ++@end example ++ ++@item SSH ++QEMU supports SSH (Secure Shell) access to remote disks. ++ ++Examples: ++@example ++@value{qemu_system} -drive file=ssh://user@@host/path/to/disk.img ++@value{qemu_system} -drive file.driver=ssh,file.user=user,file.host=host,file.port=22,file.path=/path/to/disk.img ++@end example ++ ++Currently authentication must be done using ssh-agent. Other ++authentication methods may be supported in future. ++ ++@item Sheepdog ++Sheepdog is a distributed storage system for QEMU. ++QEMU supports using either local sheepdog devices or remote networked ++devices. ++ ++Syntax for specifying a sheepdog device ++@example ++sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] ++@end example ++ ++Example ++@example ++@value{qemu_system} --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine ++@end example ++ ++See also @url{https://sheepdog.github.io/sheepdog/}. ++ ++@item GlusterFS ++GlusterFS is a user space distributed file system. ++QEMU supports the use of GlusterFS volumes for hosting VM disk images using ++TCP, Unix Domain Sockets and RDMA transport protocols. ++ ++Syntax for specifying a VM disk image on GlusterFS volume is ++@example ++ ++URI: ++gluster[+type]://[host[:port]]/volume/path[?socket=...][,debug=N][,logfile=...] ++ ++JSON: ++'json:@{"driver":"qcow2","file":@{"driver":"gluster","volume":"testvol","path":"a.img","debug":N,"logfile":"...", ++@ "server":[@{"type":"tcp","host":"...","port":"..."@}, ++@ @{"type":"unix","socket":"..."@}]@}@}' ++@end example ++ ++ ++Example ++@example ++URI: ++@value{qemu_system} --drive file=gluster://192.0.2.1/testvol/a.img, ++@ file.debug=9,file.logfile=/var/log/qemu-gluster.log ++ ++JSON: ++@value{qemu_system} 'json:@{"driver":"qcow2", ++@ "file":@{"driver":"gluster", ++@ "volume":"testvol","path":"a.img", ++@ "debug":9,"logfile":"/var/log/qemu-gluster.log", ++@ "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, ++@ @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' ++@value{qemu_system} -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, ++@ file.debug=9,file.logfile=/var/log/qemu-gluster.log, ++@ file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, ++@ file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket ++@end example ++ ++See also @url{http://www.gluster.org}. ++ ++@item HTTP/HTTPS/FTP/FTPS ++QEMU supports read-only access to files accessed over http(s) and ftp(s). ++ ++Syntax using a single filename: ++@example ++://[[:]@@]/ ++@end example ++ ++where: ++@table @option ++@item protocol ++'http', 'https', 'ftp', or 'ftps'. ++ ++@item username ++Optional username for authentication to the remote server. ++ ++@item password ++Optional password for authentication to the remote server. ++ ++@item host ++Address of the remote server. ++ ++@item path ++Path on the remote server, including any query string. ++@end table ++ ++The following options are also supported: ++@table @option ++@item url ++The full URL when passing options to the driver explicitly. ++ ++@item readahead ++The amount of data to read ahead with each range request to the remote server. ++This value may optionally have the suffix 'T', 'G', 'M', 'K', 'k' or 'b'. If it ++does not have a suffix, it will be assumed to be in bytes. The value must be a ++multiple of 512 bytes. It defaults to 256k. ++ ++@item sslverify ++Whether to verify the remote server's certificate when connecting over SSL. It ++can have the value 'on' or 'off'. It defaults to 'on'. ++ ++@item cookie ++Send this cookie (it can also be a list of cookies separated by ';') with ++each outgoing request. Only supported when using protocols such as HTTP ++which support cookies, otherwise ignored. ++ ++@item timeout ++Set the timeout in seconds of the CURL connection. This timeout is the time ++that CURL waits for a response from the remote server to get the size of the ++image to be downloaded. If not set, the default timeout of 5 seconds is used. ++@end table ++ ++Note that when passing options to qemu explicitly, @option{driver} is the value ++of . ++ ++Example: boot from a remote Fedora 20 live ISO image ++@example ++@value{qemu_system_x86} --drive media=cdrom,file=https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly ++ ++@value{qemu_system_x86} --drive media=cdrom,file.driver=http,file.url=http://archives.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly ++@end example ++ ++Example: boot from a remote Fedora 20 cloud image using a local overlay for ++writes, copy-on-read, and a readahead of 64k ++@example ++qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"http",, "file.url":"http://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Images/x86_64/Fedora-x86_64-20-20131211.1-sda.qcow2",, "file.readahead":"64k"@}' /tmp/Fedora-x86_64-20-20131211.1-sda.qcow2 ++ ++@value{qemu_system_x86} -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-on-read=on ++@end example ++ ++Example: boot from an image stored on a VMware vSphere server with a self-signed ++certificate using a local overlay for writes, a readahead of 64k and a timeout ++of 10 seconds. ++@example ++qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2 ++ ++@value{qemu_system_x86} -drive file=/tmp/test.qcow2 ++@end example ++ ++@end table ++ ++@c man end ++ ++@node pcsys_keys ++@section Keys in the graphical frontends ++ ++@c man begin OPTIONS ++ ++During the graphical emulation, you can use special key combinations to change ++modes. The default key mappings are shown below, but if you use @code{-alt-grab} ++then the modifier is Ctrl-Alt-Shift (instead of Ctrl-Alt) and if you use ++@code{-ctrl-grab} then the modifier is the right Ctrl key (instead of Ctrl-Alt): ++ ++@table @key ++@item Ctrl-Alt-f ++@kindex Ctrl-Alt-f ++Toggle full screen ++ ++@item Ctrl-Alt-+ ++@kindex Ctrl-Alt-+ ++Enlarge the screen ++ ++@item Ctrl-Alt-- ++@kindex Ctrl-Alt-- ++Shrink the screen ++ ++@item Ctrl-Alt-u ++@kindex Ctrl-Alt-u ++Restore the screen's un-scaled dimensions ++ ++@item Ctrl-Alt-n ++@kindex Ctrl-Alt-n ++Switch to virtual console 'n'. Standard console mappings are: ++@table @emph ++@item 1 ++Target system display ++@item 2 ++Monitor ++@item 3 ++Serial port ++@end table ++ ++@item Ctrl-Alt ++@kindex Ctrl-Alt ++Toggle mouse and keyboard grab. ++@end table ++ ++@kindex Ctrl-Up ++@kindex Ctrl-Down ++@kindex Ctrl-PageUp ++@kindex Ctrl-PageDown ++In the virtual consoles, you can use @key{Ctrl-Up}, @key{Ctrl-Down}, ++@key{Ctrl-PageUp} and @key{Ctrl-PageDown} to move in the back log. ++ ++@c man end ++ ++@node mux_keys ++@section Keys in the character backend multiplexer ++ ++@c man begin OPTIONS ++ ++During emulation, if you are using a character backend multiplexer ++(which is the default if you are using @option{-nographic}) then ++several commands are available via an escape sequence. These ++key sequences all start with an escape character, which is @key{Ctrl-a} ++by default, but can be changed with @option{-echr}. The list below assumes ++you're using the default. ++ ++@table @key ++@item Ctrl-a h ++@kindex Ctrl-a h ++Print this help ++@item Ctrl-a x ++@kindex Ctrl-a x ++Exit emulator ++@item Ctrl-a s ++@kindex Ctrl-a s ++Save disk data back to file (if -snapshot) ++@item Ctrl-a t ++@kindex Ctrl-a t ++Toggle console timestamps ++@item Ctrl-a b ++@kindex Ctrl-a b ++Send break (magic sysrq in Linux) ++@item Ctrl-a c ++@kindex Ctrl-a c ++Rotate between the frontends connected to the multiplexer (usually ++this switches between the monitor and the console) ++@item Ctrl-a Ctrl-a ++@kindex Ctrl-a Ctrl-a ++Send the escape character to the frontend ++@end table ++@c man end ++ ++@ignore ++ ++@c man begin SEEALSO ++The HTML documentation of QEMU for more precise information and Linux ++user mode emulator invocation. ++@c man end ++ ++@c man begin AUTHOR ++Fabrice Bellard ++@c man end ++ ++@end ignore ++ ++@node pcsys_monitor ++@section QEMU Monitor ++@cindex QEMU monitor ++ ++The QEMU monitor is used to give complex commands to the QEMU ++emulator. You can use it to: ++ ++@itemize @minus ++ ++@item ++Remove or insert removable media images ++(such as CD-ROM or floppies). ++ ++@item ++Freeze/unfreeze the Virtual Machine (VM) and save or restore its state ++from a disk file. ++ ++@item Inspect the VM state without an external debugger. ++ ++@end itemize ++ ++@subsection Commands ++ ++The following commands are available: ++ ++@include qemu-monitor.texi ++ ++@include qemu-monitor-info.texi ++ ++@subsection Integer expressions ++ ++The monitor understands integers expressions for every integer ++argument. You can use register names to get the value of specifics ++CPU registers by prefixing them with @emph{$}. ++ ++@node cpu_models ++@section CPU models ++ ++@include docs/qemu-cpu-models.texi ++ ++@node disk_images ++@section Disk Images ++ ++QEMU supports many disk image formats, including growable disk images ++(their size increase as non empty sectors are written), compressed and ++encrypted disk images. ++ ++@menu ++* disk_images_quickstart:: Quick start for disk image creation ++* disk_images_snapshot_mode:: Snapshot mode ++* vm_snapshots:: VM snapshots ++@end menu ++ ++@node disk_images_quickstart ++@subsection Quick start for disk image creation ++ ++You can create a disk image with the command: ++@example ++qemu-img create myimage.img mysize ++@end example ++where @var{myimage.img} is the disk image filename and @var{mysize} is its ++size in kilobytes. You can add an @code{M} suffix to give the size in ++megabytes and a @code{G} suffix for gigabytes. ++ ++@c When this document is converted to rst we should make this into ++@c a proper linked reference to the qemu-img documentation again: ++See the qemu-img invocation documentation for more information. ++ ++@node disk_images_snapshot_mode ++@subsection Snapshot mode ++ ++If you use the option @option{-snapshot}, all disk images are ++considered as read only. When sectors in written, they are written in ++a temporary file created in @file{/tmp}. You can however force the ++write back to the raw disk images by using the @code{commit} monitor ++command (or @key{C-a s} in the serial console). ++ ++@node vm_snapshots ++@subsection VM snapshots ++ ++VM snapshots are snapshots of the complete virtual machine including ++CPU state, RAM, device state and the content of all the writable ++disks. In order to use VM snapshots, you must have at least one non ++removable and writable block device using the @code{qcow2} disk image ++format. Normally this device is the first virtual hard drive. ++ ++Use the monitor command @code{savevm} to create a new VM snapshot or ++replace an existing one. A human readable name can be assigned to each ++snapshot in addition to its numerical ID. ++ ++Use @code{loadvm} to restore a VM snapshot and @code{delvm} to remove ++a VM snapshot. @code{info snapshots} lists the available snapshots ++with their associated information: ++ ++@example ++(qemu) info snapshots ++Snapshot devices: hda ++Snapshot list (from hda): ++ID TAG VM SIZE DATE VM CLOCK ++1 start 41M 2006-08-06 12:38:02 00:00:14.954 ++2 40M 2006-08-06 12:43:29 00:00:18.633 ++3 msys 40M 2006-08-06 12:44:04 00:00:23.514 ++@end example ++ ++A VM snapshot is made of a VM state info (its size is shown in ++@code{info snapshots}) and a snapshot of every writable disk image. ++The VM state info is stored in the first @code{qcow2} non removable ++and writable block device. The disk image snapshots are stored in ++every disk image. The size of a snapshot in a disk image is difficult ++to evaluate and is not shown by @code{info snapshots} because the ++associated disk sectors are shared among all the snapshots to save ++disk space (otherwise each snapshot would need a full copy of all the ++disk images). ++ ++When using the (unrelated) @code{-snapshot} option ++(@ref{disk_images_snapshot_mode}), you can always make VM snapshots, ++but they are deleted as soon as you exit QEMU. ++ ++VM snapshots currently have the following known limitations: ++@itemize ++@item ++They cannot cope with removable devices if they are removed or ++inserted after a snapshot is done. ++@item ++A few device drivers still have incomplete snapshot support so their ++state is not saved or restored properly (in particular USB). ++@end itemize ++ ++@node pcsys_network ++@section Network emulation ++ ++QEMU can simulate several network cards (e.g. PCI or ISA cards on the PC ++target) and can connect them to a network backend on the host or an emulated ++hub. The various host network backends can either be used to connect the NIC of ++the guest to a real network (e.g. by using a TAP devices or the non-privileged ++user mode network stack), or to other guest instances running in another QEMU ++process (e.g. by using the socket host network backend). ++ ++@subsection Using TAP network interfaces ++ ++This is the standard way to connect QEMU to a real network. QEMU adds ++a virtual network device on your host (called @code{tapN}), and you ++can then configure it as if it was a real ethernet card. ++ ++@subsubsection Linux host ++ ++As an example, you can download the @file{linux-test-xxx.tar.gz} ++archive and copy the script @file{qemu-ifup} in @file{/etc} and ++configure properly @code{sudo} so that the command @code{ifconfig} ++contained in @file{qemu-ifup} can be executed as root. You must verify ++that your host kernel supports the TAP network interfaces: the ++device @file{/dev/net/tun} must be present. ++ ++See @ref{sec_invocation} to have examples of command lines using the ++TAP network interfaces. ++ ++@subsubsection Windows host ++ ++There is a virtual ethernet driver for Windows 2000/XP systems, called ++TAP-Win32. But it is not included in standard QEMU for Windows, ++so you will need to get it separately. It is part of OpenVPN package, ++so download OpenVPN from : @url{https://openvpn.net/}. ++ ++@subsection Using the user mode network stack ++ ++By using the option @option{-net user} (default configuration if no ++@option{-net} option is specified), QEMU uses a completely user mode ++network stack (you don't need root privilege to use the virtual ++network). The virtual network configuration is the following: ++ ++@example ++ ++ guest (10.0.2.15) <------> Firewall/DHCP server <-----> Internet ++ | (10.0.2.2) ++ | ++ ----> DNS server (10.0.2.3) ++ | ++ ----> SMB server (10.0.2.4) ++@end example ++ ++The QEMU VM behaves as if it was behind a firewall which blocks all ++incoming connections. You can use a DHCP client to automatically ++configure the network in the QEMU VM. The DHCP server assign addresses ++to the hosts starting from 10.0.2.15. ++ ++In order to check that the user mode network is working, you can ping ++the address 10.0.2.2 and verify that you got an address in the range ++10.0.2.x from the QEMU virtual DHCP server. ++ ++Note that ICMP traffic in general does not work with user mode networking. ++@code{ping}, aka. ICMP echo, to the local router (10.0.2.2) shall work, ++however. If you're using QEMU on Linux >= 3.0, it can use unprivileged ICMP ++ping sockets to allow @code{ping} to the Internet. The host admin has to set ++the ping_group_range in order to grant access to those sockets. To allow ping ++for GID 100 (usually users group): ++ ++@example ++echo 100 100 > /proc/sys/net/ipv4/ping_group_range ++@end example ++ ++When using the built-in TFTP server, the router is also the TFTP ++server. ++ ++When using the @option{'-netdev user,hostfwd=...'} option, TCP or UDP ++connections can be redirected from the host to the guest. It allows for ++example to redirect X11, telnet or SSH connections. ++ ++@subsection Hubs ++ ++QEMU can simulate several hubs. A hub can be thought of as a virtual connection ++between several network devices. These devices can be for example QEMU virtual ++ethernet cards or virtual Host ethernet devices (TAP devices). You can connect ++guest NICs or host network backends to such a hub using the @option{-netdev ++hubport} or @option{-nic hubport} options. The legacy @option{-net} option ++also connects the given device to the emulated hub with ID 0 (i.e. the default ++hub) unless you specify a netdev with @option{-net nic,netdev=xxx} here. ++ ++@subsection Connecting emulated networks between QEMU instances ++ ++Using the @option{-netdev socket} (or @option{-nic socket} or ++@option{-net socket}) option, it is possible to create emulated ++networks that span several QEMU instances. ++See the description of the @option{-netdev socket} option in the ++@ref{sec_invocation,,Invocation chapter} to have a basic example. ++ ++@node pcsys_other_devs ++@section Other Devices ++ ++@subsection Inter-VM Shared Memory device ++ ++On Linux hosts, a shared memory device is available. The basic syntax ++is: ++ ++@example ++@value{qemu_system_x86} -device ivshmem-plain,memdev=@var{hostmem} ++@end example ++ ++where @var{hostmem} names a host memory backend. For a POSIX shared ++memory backend, use something like ++ ++@example ++-object memory-backend-file,size=1M,share,mem-path=/dev/shm/ivshmem,id=@var{hostmem} ++@end example ++ ++If desired, interrupts can be sent between guest VMs accessing the same shared ++memory region. Interrupt support requires using a shared memory server and ++using a chardev socket to connect to it. The code for the shared memory server ++is qemu.git/contrib/ivshmem-server. An example syntax when using the shared ++memory server is: ++ ++@example ++# First start the ivshmem server once and for all ++ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} ++ ++# Then start your qemu instances with matching arguments ++@value{qemu_system_x86} -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id} ++ -chardev socket,path=@var{path},id=@var{id} ++@end example ++ ++When using the server, the guest will be assigned a VM ID (>=0) that allows guests ++using the same server to communicate via interrupts. Guests can read their ++VM ID from a device register (see ivshmem-spec.txt). ++ ++@subsubsection Migration with ivshmem ++ ++With device property @option{master=on}, the guest will copy the shared ++memory on migration to the destination host. With @option{master=off}, ++the guest will not be able to migrate with the device attached. In the ++latter case, the device should be detached and then reattached after ++migration using the PCI hotplug support. ++ ++At most one of the devices sharing the same memory can be master. The ++master must complete migration before you plug back the other devices. ++ ++@subsubsection ivshmem and hugepages ++ ++Instead of specifying the using POSIX shm, you may specify ++a memory backend that has hugepage support: ++ ++@example ++@value{qemu_system_x86} -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1 ++ -device ivshmem-plain,memdev=mb1 ++@end example ++ ++ivshmem-server also supports hugepages mount points with the ++@option{-m} memory path argument. ++ ++@node direct_linux_boot ++@section Direct Linux Boot ++ ++This section explains how to launch a Linux kernel inside QEMU without ++having to make a full bootable image. It is very useful for fast Linux ++kernel testing. ++ ++The syntax is: ++@example ++@value{qemu_system} -kernel bzImage -hda rootdisk.img -append "root=/dev/hda" ++@end example ++ ++Use @option{-kernel} to provide the Linux kernel image and ++@option{-append} to give the kernel command line arguments. The ++@option{-initrd} option can be used to provide an INITRD image. ++ ++If you do not need graphical output, you can disable it and redirect ++the virtual serial port and the QEMU monitor to the console with the ++@option{-nographic} option. The typical command line is: ++@example ++@value{qemu_system} -kernel bzImage -hda rootdisk.img \ ++ -append "root=/dev/hda console=ttyS0" -nographic ++@end example ++ ++Use @key{Ctrl-a c} to switch between the serial console and the ++monitor (@pxref{pcsys_keys}). ++ ++@node pcsys_usb ++@section USB emulation ++ ++QEMU can emulate a PCI UHCI, OHCI, EHCI or XHCI USB controller. You can ++plug virtual USB devices or real host USB devices (only works with certain ++host operating systems). QEMU will automatically create and connect virtual ++USB hubs as necessary to connect multiple USB devices. ++ ++@menu ++* usb_devices:: ++* host_usb_devices:: ++@end menu ++@node usb_devices ++@subsection Connecting USB devices ++ ++USB devices can be connected with the @option{-device usb-...} command line ++option or the @code{device_add} monitor command. Available devices are: ++ ++@table @code ++@item usb-mouse ++Virtual Mouse. This will override the PS/2 mouse emulation when activated. ++@item usb-tablet ++Pointer device that uses absolute coordinates (like a touchscreen). ++This means QEMU is able to report the mouse position without having ++to grab the mouse. Also overrides the PS/2 mouse emulation when activated. ++@item usb-storage,drive=@var{drive_id} ++Mass storage device backed by @var{drive_id} (@pxref{disk_images}) ++@item usb-uas ++USB attached SCSI device, see ++@url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt} ++for details ++@item usb-bot ++Bulk-only transport storage device, see ++@url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt} ++for details here, too ++@item usb-mtp,rootdir=@var{dir} ++Media transfer protocol device, using @var{dir} as root of the file tree ++that is presented to the guest. ++@item usb-host,hostbus=@var{bus},hostaddr=@var{addr} ++Pass through the host device identified by @var{bus} and @var{addr} ++@item usb-host,vendorid=@var{vendor},productid=@var{product} ++Pass through the host device identified by @var{vendor} and @var{product} ID ++@item usb-wacom-tablet ++Virtual Wacom PenPartner tablet. This device is similar to the @code{tablet} ++above but it can be used with the tslib library because in addition to touch ++coordinates it reports touch pressure. ++@item usb-kbd ++Standard USB keyboard. Will override the PS/2 keyboard (if present). ++@item usb-serial,chardev=@var{id} ++Serial converter. This emulates an FTDI FT232BM chip connected to host character ++device @var{id}. ++@item usb-braille,chardev=@var{id} ++Braille device. This will use BrlAPI to display the braille output on a real ++or fake device referenced by @var{id}. ++@item usb-net[,netdev=@var{id}] ++Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} ++specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}. ++For instance, user-mode networking can be used with ++@example ++@value{qemu_system} [...] -netdev user,id=net0 -device usb-net,netdev=net0 ++@end example ++@item usb-ccid ++Smartcard reader device ++@item usb-audio ++USB audio device ++@end table ++ ++@node host_usb_devices ++@subsection Using host USB devices on a Linux host ++ ++WARNING: this is an experimental feature. QEMU will slow down when ++using it. USB devices requiring real time streaming (i.e. USB Video ++Cameras) are not supported yet. ++ ++@enumerate ++@item If you use an early Linux 2.4 kernel, verify that no Linux driver ++is actually using the USB device. A simple way to do that is simply to ++disable the corresponding kernel module by renaming it from @file{mydriver.o} ++to @file{mydriver.o.disabled}. ++ ++@item Verify that @file{/proc/bus/usb} is working (most Linux distributions should enable it by default). You should see something like that: ++@example ++ls /proc/bus/usb ++001 devices drivers ++@end example ++ ++@item Since only root can access to the USB devices directly, you can either launch QEMU as root or change the permissions of the USB devices you want to use. For testing, the following suffices: ++@example ++chown -R myuid /proc/bus/usb ++@end example ++ ++@item Launch QEMU and do in the monitor: ++@example ++info usbhost ++ Device 1.2, speed 480 Mb/s ++ Class 00: USB device 1234:5678, USB DISK ++@end example ++You should see the list of the devices you can use (Never try to use ++hubs, it won't work). ++ ++@item Add the device in QEMU by using: ++@example ++device_add usb-host,vendorid=0x1234,productid=0x5678 ++@end example ++ ++Normally the guest OS should report that a new USB device is plugged. ++You can use the option @option{-device usb-host,...} to do the same. ++ ++@item Now you can try to use the host USB device in QEMU. ++ ++@end enumerate ++ ++When relaunching QEMU, you may have to unplug and plug again the USB ++device to make it work again (this is a bug). ++ ++@node vnc_security ++@section VNC security ++ ++The VNC server capability provides access to the graphical console ++of the guest VM across the network. This has a number of security ++considerations depending on the deployment scenarios. ++ ++@menu ++* vnc_sec_none:: ++* vnc_sec_password:: ++* vnc_sec_certificate:: ++* vnc_sec_certificate_verify:: ++* vnc_sec_certificate_pw:: ++* vnc_sec_sasl:: ++* vnc_sec_certificate_sasl:: ++* vnc_setup_sasl:: ++@end menu ++@node vnc_sec_none ++@subsection Without passwords ++ ++The simplest VNC server setup does not include any form of authentication. ++For this setup it is recommended to restrict it to listen on a UNIX domain ++socket only. For example ++ ++@example ++@value{qemu_system} [...OPTIONS...] -vnc unix:/home/joebloggs/.qemu-myvm-vnc ++@end example ++ ++This ensures that only users on local box with read/write access to that ++path can access the VNC server. To securely access the VNC server from a ++remote machine, a combination of netcat+ssh can be used to provide a secure ++tunnel. ++ ++@node vnc_sec_password ++@subsection With passwords ++ ++The VNC protocol has limited support for password based authentication. Since ++the protocol limits passwords to 8 characters it should not be considered ++to provide high security. The password can be fairly easily brute-forced by ++a client making repeat connections. For this reason, a VNC server using password ++authentication should be restricted to only listen on the loopback interface ++or UNIX domain sockets. Password authentication is not supported when operating ++in FIPS 140-2 compliance mode as it requires the use of the DES cipher. Password ++authentication is requested with the @code{password} option, and then once QEMU ++is running the password is set with the monitor. Until the monitor is used to ++set the password all clients will be rejected. ++ ++@example ++@value{qemu_system} [...OPTIONS...] -vnc :1,password -monitor stdio ++(qemu) change vnc password ++Password: ******** ++(qemu) ++@end example ++ ++@node vnc_sec_certificate ++@subsection With x509 certificates ++ ++The QEMU VNC server also implements the VeNCrypt extension allowing use of ++TLS for encryption of the session, and x509 certificates for authentication. ++The use of x509 certificates is strongly recommended, because TLS on its ++own is susceptible to man-in-the-middle attacks. Basic x509 certificate ++support provides a secure session, but no authentication. This allows any ++client to connect, and provides an encrypted session. ++ ++@example ++@value{qemu_system} [...OPTIONS...] \ ++ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=no \ ++ -vnc :1,tls-creds=tls0 -monitor stdio ++@end example ++ ++In the above example @code{/etc/pki/qemu} should contain at least three files, ++@code{ca-cert.pem}, @code{server-cert.pem} and @code{server-key.pem}. Unprivileged ++users will want to use a private directory, for example @code{$HOME/.pki/qemu}. ++NB the @code{server-key.pem} file should be protected with file mode 0600 to ++only be readable by the user owning it. ++ ++@node vnc_sec_certificate_verify ++@subsection With x509 certificates and client verification ++ ++Certificates can also provide a means to authenticate the client connecting. ++The server will request that the client provide a certificate, which it will ++then validate against the CA certificate. This is a good choice if deploying ++in an environment with a private internal certificate authority. It uses the ++same syntax as previously, but with @code{verify-peer} set to @code{yes} ++instead. ++ ++@example ++@value{qemu_system} [...OPTIONS...] \ ++ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ ++ -vnc :1,tls-creds=tls0 -monitor stdio ++@end example ++ ++ ++@node vnc_sec_certificate_pw ++@subsection With x509 certificates, client verification and passwords ++ ++Finally, the previous method can be combined with VNC password authentication ++to provide two layers of authentication for clients. ++ ++@example ++@value{qemu_system} [...OPTIONS...] \ ++ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ ++ -vnc :1,tls-creds=tls0,password -monitor stdio ++(qemu) change vnc password ++Password: ******** ++(qemu) ++@end example ++ ++ ++@node vnc_sec_sasl ++@subsection With SASL authentication ++ ++The SASL authentication method is a VNC extension, that provides an ++easily extendable, pluggable authentication method. This allows for ++integration with a wide range of authentication mechanisms, such as ++PAM, GSSAPI/Kerberos, LDAP, SQL databases, one-time keys and more. ++The strength of the authentication depends on the exact mechanism ++configured. If the chosen mechanism also provides a SSF layer, then ++it will encrypt the datastream as well. ++ ++Refer to the later docs on how to choose the exact SASL mechanism ++used for authentication, but assuming use of one supporting SSF, ++then QEMU can be launched with: ++ ++@example ++@value{qemu_system} [...OPTIONS...] -vnc :1,sasl -monitor stdio ++@end example ++ ++@node vnc_sec_certificate_sasl ++@subsection With x509 certificates and SASL authentication ++ ++If the desired SASL authentication mechanism does not supported ++SSF layers, then it is strongly advised to run it in combination ++with TLS and x509 certificates. This provides securely encrypted ++data stream, avoiding risk of compromising of the security ++credentials. This can be enabled, by combining the 'sasl' option ++with the aforementioned TLS + x509 options: ++ ++@example ++@value{qemu_system} [...OPTIONS...] \ ++ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ ++ -vnc :1,tls-creds=tls0,sasl -monitor stdio ++@end example ++ ++@node vnc_setup_sasl ++ ++@subsection Configuring SASL mechanisms ++ ++The following documentation assumes use of the Cyrus SASL implementation on a ++Linux host, but the principles should apply to any other SASL implementation ++or host. When SASL is enabled, the mechanism configuration will be loaded from ++system default SASL service config /etc/sasl2/qemu.conf. If running QEMU as an ++unprivileged user, an environment variable SASL_CONF_PATH can be used to make ++it search alternate locations for the service config file. ++ ++If the TLS option is enabled for VNC, then it will provide session encryption, ++otherwise the SASL mechanism will have to provide encryption. In the latter ++case the list of possible plugins that can be used is drastically reduced. In ++fact only the GSSAPI SASL mechanism provides an acceptable level of security ++by modern standards. Previous versions of QEMU referred to the DIGEST-MD5 ++mechanism, however, it has multiple serious flaws described in detail in ++RFC 6331 and thus should never be used any more. The SCRAM-SHA-1 mechanism ++provides a simple username/password auth facility similar to DIGEST-MD5, but ++does not support session encryption, so can only be used in combination with ++TLS. ++ ++When not using TLS the recommended configuration is ++ ++@example ++mech_list: gssapi ++keytab: /etc/qemu/krb5.tab ++@end example ++ ++This says to use the 'GSSAPI' mechanism with the Kerberos v5 protocol, with ++the server principal stored in /etc/qemu/krb5.tab. For this to work the ++administrator of your KDC must generate a Kerberos principal for the server, ++with a name of 'qemu/somehost.example.com@@EXAMPLE.COM' replacing ++'somehost.example.com' with the fully qualified host name of the machine ++running QEMU, and 'EXAMPLE.COM' with the Kerberos Realm. ++ ++When using TLS, if username+password authentication is desired, then a ++reasonable configuration is ++ ++@example ++mech_list: scram-sha-1 ++sasldb_path: /etc/qemu/passwd.db ++@end example ++ ++The @code{saslpasswd2} program can be used to populate the @code{passwd.db} ++file with accounts. ++ ++Other SASL configurations will be left as an exercise for the reader. Note that ++all mechanisms, except GSSAPI, should be combined with use of TLS to ensure a ++secure data channel. ++ ++ ++@node network_tls ++@section TLS setup for network services ++ ++Almost all network services in QEMU have the ability to use TLS for ++session data encryption, along with x509 certificates for simple ++client authentication. What follows is a description of how to ++generate certificates suitable for usage with QEMU, and applies to ++the VNC server, character devices with the TCP backend, NBD server ++and client, and migration server and client. ++ ++At a high level, QEMU requires certificates and private keys to be ++provided in PEM format. Aside from the core fields, the certificates ++should include various extension data sets, including v3 basic ++constraints data, key purpose, key usage and subject alt name. ++ ++The GnuTLS package includes a command called @code{certtool} which can ++be used to easily generate certificates and keys in the required format ++with expected data present. Alternatively a certificate management ++service may be used. ++ ++At a minimum it is necessary to setup a certificate authority, and ++issue certificates to each server. If using x509 certificates for ++authentication, then each client will also need to be issued a ++certificate. ++ ++Assuming that the QEMU network services will only ever be exposed to ++clients on a private intranet, there is no need to use a commercial ++certificate authority to create certificates. A self-signed CA is ++sufficient, and in fact likely to be more secure since it removes ++the ability of malicious 3rd parties to trick the CA into mis-issuing ++certs for impersonating your services. The only likely exception ++where a commercial CA might be desirable is if enabling the VNC ++websockets server and exposing it directly to remote browser clients. ++In such a case it might be useful to use a commercial CA to avoid ++needing to install custom CA certs in the web browsers. ++ ++The recommendation is for the server to keep its certificates in either ++@code{/etc/pki/qemu} or for unprivileged users in @code{$HOME/.pki/qemu}. ++ ++@menu ++* tls_generate_ca:: ++* tls_generate_server:: ++* tls_generate_client:: ++* tls_creds_setup:: ++* tls_psk:: ++@end menu ++@node tls_generate_ca ++@subsection Setup the Certificate Authority ++ ++This step only needs to be performed once per organization / organizational ++unit. First the CA needs a private key. This key must be kept VERY secret ++and secure. If this key is compromised the entire trust chain of the certificates ++issued with it is lost. ++ ++@example ++# certtool --generate-privkey > ca-key.pem ++@end example ++ ++To generate a self-signed certificate requires one core piece of information, ++the name of the organization. A template file @code{ca.info} should be ++populated with the desired data to avoid having to deal with interactive ++prompts from certtool: ++@example ++# cat > ca.info < server-hostNNN.info < server-hostNNN-key.pem ++# certtool --generate-certificate \ ++ --load-ca-certificate ca-cert.pem \ ++ --load-ca-privkey ca-key.pem \ ++ --load-privkey server-hostNNN-key.pem \ ++ --template server-hostNNN.info \ ++ --outfile server-hostNNN-cert.pem ++@end example ++ ++The @code{dns_name} and @code{ip_address} fields in the template are setting ++the subject alt name extension data. The @code{tls_www_server} keyword is the ++key purpose extension to indicate this certificate is intended for usage in ++a web server. Although QEMU network services are not in fact HTTP servers ++(except for VNC websockets), setting this key purpose is still recommended. ++The @code{encryption_key} and @code{signing_key} keyword is the key usage ++extension to indicate this certificate is intended for usage in the data ++session. ++ ++The @code{server-hostNNN-key.pem} and @code{server-hostNNN-cert.pem} files ++should now be securely copied to the server for which they were generated, ++and renamed to @code{server-key.pem} and @code{server-cert.pem} when added ++to the @code{/etc/pki/qemu} directory on the target host. The @code{server-key.pem} ++file is security sensitive and should be kept protected with file mode 0600 ++to prevent disclosure. ++ ++@node tls_generate_client ++@subsection Issuing client certificates ++ ++The QEMU x509 TLS credential setup defaults to enabling client verification ++using certificates, providing a simple authentication mechanism. If this ++default is used, each client also needs to be issued a certificate. The client ++certificate contains enough metadata to uniquely identify the client with the ++scope of the certificate authority. The client certificate would typically ++include fields for organization, state, city, building, etc. ++ ++Once again on the host holding the CA, create template files containing the ++information for each client, and use it to issue client certificates. ++ ++ ++@example ++# cat > client-hostNNN.info < client-hostNNN-key.pem ++# certtool --generate-certificate \ ++ --load-ca-certificate ca-cert.pem \ ++ --load-ca-privkey ca-key.pem \ ++ --load-privkey client-hostNNN-key.pem \ ++ --template client-hostNNN.info \ ++ --outfile client-hostNNN-cert.pem ++@end example ++ ++The subject alt name extension data is not required for clients, so the ++the @code{dns_name} and @code{ip_address} fields are not included. ++The @code{tls_www_client} keyword is the key purpose extension to indicate ++this certificate is intended for usage in a web client. Although QEMU ++network clients are not in fact HTTP clients, setting this key purpose is ++still recommended. The @code{encryption_key} and @code{signing_key} keyword ++is the key usage extension to indicate this certificate is intended for ++usage in the data session. ++ ++The @code{client-hostNNN-key.pem} and @code{client-hostNNN-cert.pem} files ++should now be securely copied to the client for which they were generated, ++and renamed to @code{client-key.pem} and @code{client-cert.pem} when added ++to the @code{/etc/pki/qemu} directory on the target host. The @code{client-key.pem} ++file is security sensitive and should be kept protected with file mode 0600 ++to prevent disclosure. ++ ++If a single host is going to be using TLS in both a client and server ++role, it is possible to create a single certificate to cover both roles. ++This would be quite common for the migration and NBD services, where a ++QEMU process will be started by accepting a TLS protected incoming migration, ++and later itself be migrated out to another host. To generate a single ++certificate, simply include the template data from both the client and server ++instructions in one. ++ ++@example ++# cat > both-hostNNN.info < both-hostNNN-key.pem ++# certtool --generate-certificate \ ++ --load-ca-certificate ca-cert.pem \ ++ --load-ca-privkey ca-key.pem \ ++ --load-privkey both-hostNNN-key.pem \ ++ --template both-hostNNN.info \ ++ --outfile both-hostNNN-cert.pem ++@end example ++ ++When copying the PEM files to the target host, save them twice, ++once as @code{server-cert.pem} and @code{server-key.pem}, and ++again as @code{client-cert.pem} and @code{client-key.pem}. ++ ++@node tls_creds_setup ++@subsection TLS x509 credential configuration ++ ++QEMU has a standard mechanism for loading x509 credentials that will be ++used for network services and clients. It requires specifying the ++@code{tls-creds-x509} class name to the @code{--object} command line ++argument for the system emulators. Each set of credentials loaded should ++be given a unique string identifier via the @code{id} parameter. A single ++set of TLS credentials can be used for multiple network backends, so VNC, ++migration, NBD, character devices can all share the same credentials. Note, ++however, that credentials for use in a client endpoint must be loaded ++separately from those used in a server endpoint. ++ ++When specifying the object, the @code{dir} parameters specifies which ++directory contains the credential files. This directory is expected to ++contain files with the names mentioned previously, @code{ca-cert.pem}, ++@code{server-key.pem}, @code{server-cert.pem}, @code{client-key.pem} ++and @code{client-cert.pem} as appropriate. It is also possible to ++include a set of pre-generated Diffie-Hellman (DH) parameters in a file ++@code{dh-params.pem}, which can be created using the ++@code{certtool --generate-dh-params} command. If omitted, QEMU will ++dynamically generate DH parameters when loading the credentials. ++ ++The @code{endpoint} parameter indicates whether the credentials will ++be used for a network client or server, and determines which PEM ++files are loaded. ++ ++The @code{verify} parameter determines whether x509 certificate ++validation should be performed. This defaults to enabled, meaning ++clients will always validate the server hostname against the ++certificate subject alt name fields and/or CN field. It also ++means that servers will request that clients provide a certificate ++and validate them. Verification should never be turned off for ++client endpoints, however, it may be turned off for server endpoints ++if an alternative mechanism is used to authenticate clients. For ++example, the VNC server can use SASL to authenticate clients ++instead. ++ ++To load server credentials with client certificate validation ++enabled ++ ++@example ++@value{qemu_system} -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server ++@end example ++ ++while to load client credentials use ++ ++@example ++@value{qemu_system} -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=client ++@end example ++ ++Network services which support TLS will all have a @code{tls-creds} ++parameter which expects the ID of the TLS credentials object. For ++example with VNC: ++ ++@example ++@value{qemu_system} -vnc 0.0.0.0:0,tls-creds=tls0 ++@end example ++ ++@node tls_psk ++@subsection TLS Pre-Shared Keys (PSK) ++ ++Instead of using certificates, you may also use TLS Pre-Shared Keys ++(TLS-PSK). This can be simpler to set up than certificates but is ++less scalable. ++ ++Use the GnuTLS @code{psktool} program to generate a @code{keys.psk} ++file containing one or more usernames and random keys: ++ ++@example ++mkdir -m 0700 /tmp/keys ++psktool -u rich -p /tmp/keys/keys.psk ++@end example ++ ++TLS-enabled servers such as qemu-nbd can use this directory like so: ++ ++@example ++qemu-nbd \ ++ -t -x / \ ++ --object tls-creds-psk,id=tls0,endpoint=server,dir=/tmp/keys \ ++ --tls-creds tls0 \ ++ image.qcow2 ++@end example ++ ++When connecting from a qemu-based client you must specify the ++directory containing @code{keys.psk} and an optional @var{username} ++(defaults to ``qemu''): ++ ++@example ++qemu-img info \ ++ --object tls-creds-psk,id=tls0,dir=/tmp/keys,username=rich,endpoint=client \ ++ --image-opts \ ++ file.driver=nbd,file.host=localhost,file.port=10809,file.tls-creds=tls0,file.export=/ ++@end example ++ ++@node gdb_usage ++@section GDB usage ++ ++QEMU has a primitive support to work with gdb, so that you can do ++'Ctrl-C' while the virtual machine is running and inspect its state. ++ ++In order to use gdb, launch QEMU with the '-s' option. It will wait for a ++gdb connection: ++@example ++@value{qemu_system} -s -kernel bzImage -hda rootdisk.img -append "root=/dev/hda" ++Connected to host network interface: tun0 ++Waiting gdb connection on port 1234 ++@end example ++ ++Then launch gdb on the 'vmlinux' executable: ++@example ++> gdb vmlinux ++@end example ++ ++In gdb, connect to QEMU: ++@example ++(gdb) target remote localhost:1234 ++@end example ++ ++Then you can use gdb normally. For example, type 'c' to launch the kernel: ++@example ++(gdb) c ++@end example ++ ++Here are some useful tips in order to use gdb on system code: ++ ++@enumerate ++@item ++Use @code{info reg} to display all the CPU registers. ++@item ++Use @code{x/10i $eip} to display the code at the PC position. ++@item ++Use @code{set architecture i8086} to dump 16 bit code. Then use ++@code{x/10i $cs*16+$eip} to dump the code at the PC position. ++@end enumerate ++ ++Advanced debugging options: ++ ++The default single stepping behavior is step with the IRQs and timer service routines off. It is set this way because when gdb executes a single step it expects to advance beyond the current instruction. With the IRQs and timer service routines on, a single step might jump into the one of the interrupt or exception vectors instead of executing the current instruction. This means you may hit the same breakpoint a number of times before executing the instruction gdb wants to have executed. Because there are rare circumstances where you want to single step into an interrupt vector the behavior can be controlled from GDB. There are three commands you can query and set the single step behavior: ++@table @code ++@item maintenance packet qqemu.sstepbits ++ ++This will display the MASK bits used to control the single stepping IE: ++@example ++(gdb) maintenance packet qqemu.sstepbits ++sending: "qqemu.sstepbits" ++received: "ENABLE=1,NOIRQ=2,NOTIMER=4" ++@end example ++@item maintenance packet qqemu.sstep ++ ++This will display the current value of the mask used when single stepping IE: ++@example ++(gdb) maintenance packet qqemu.sstep ++sending: "qqemu.sstep" ++received: "0x7" ++@end example ++@item maintenance packet Qqemu.sstep=HEX_VALUE ++ ++This will change the single step mask, so if wanted to enable IRQs on the single step, but not timers, you would use: ++@example ++(gdb) maintenance packet Qqemu.sstep=0x5 ++sending: "qemu.sstep=0x5" ++received: "OK" ++@end example ++@end table ++ ++@node pcsys_os_specific ++@section Target OS specific information ++ ++@subsection Linux ++ ++To have access to SVGA graphic modes under X11, use the @code{vesa} or ++the @code{cirrus} X11 driver. For optimal performances, use 16 bit ++color depth in the guest and the host OS. ++ ++When using a 2.6 guest Linux kernel, you should add the option ++@code{clock=pit} on the kernel command line because the 2.6 Linux ++kernels make very strict real time clock checks by default that QEMU ++cannot simulate exactly. ++ ++When using a 2.6 guest Linux kernel, verify that the 4G/4G patch is ++not activated because QEMU is slower with this patch. The QEMU ++Accelerator Module is also much slower in this case. Earlier Fedora ++Core 3 Linux kernel (< 2.6.9-1.724_FC3) were known to incorporate this ++patch by default. Newer kernels don't have it. ++ ++@subsection Windows ++ ++If you have a slow host, using Windows 95 is better as it gives the ++best speed. Windows 2000 is also a good choice. ++ ++@subsubsection SVGA graphic modes support ++ ++QEMU emulates a Cirrus Logic GD5446 Video ++card. All Windows versions starting from Windows 95 should recognize ++and use this graphic card. For optimal performances, use 16 bit color ++depth in the guest and the host OS. ++ ++If you are using Windows XP as guest OS and if you want to use high ++resolution modes which the Cirrus Logic BIOS does not support (i.e. >= ++1280x1024x16), then you should use the VESA VBE virtual graphic card ++(option @option{-std-vga}). ++ ++@subsubsection CPU usage reduction ++ ++Windows 9x does not correctly use the CPU HLT ++instruction. The result is that it takes host CPU cycles even when ++idle. You can install the utility from ++@url{https://web.archive.org/web/20060212132151/http://www.user.cityline.ru/~maxamn/amnhltm.zip} ++to solve this problem. Note that no such tool is needed for NT, 2000 or XP. ++ ++@subsubsection Windows 2000 disk full problem ++ ++Windows 2000 has a bug which gives a disk full problem during its ++installation. When installing it, use the @option{-win2k-hack} QEMU ++option to enable a specific workaround. After Windows 2000 is ++installed, you no longer need this option (this option slows down the ++IDE transfers). ++ ++@subsubsection Windows 2000 shutdown ++ ++Windows 2000 cannot automatically shutdown in QEMU although Windows 98 ++can. It comes from the fact that Windows 2000 does not automatically ++use the APM driver provided by the BIOS. ++ ++In order to correct that, do the following (thanks to Struan ++Bartlett): go to the Control Panel => Add/Remove Hardware & Next => ++Add/Troubleshoot a device => Add a new device & Next => No, select the ++hardware from a list & Next => NT Apm/Legacy Support & Next => Next ++(again) a few times. Now the driver is installed and Windows 2000 now ++correctly instructs QEMU to shutdown at the appropriate moment. ++ ++@subsubsection Share a directory between Unix and Windows ++ ++See @ref{sec_invocation} about the help of the option ++@option{'-netdev user,smb=...'}. ++ ++@subsubsection Windows XP security problem ++ ++Some releases of Windows XP install correctly but give a security ++error when booting: ++@example ++A problem is preventing Windows from accurately checking the ++license for this computer. Error code: 0x800703e6. ++@end example ++ ++The workaround is to install a service pack for XP after a boot in safe ++mode. Then reboot, and the problem should go away. Since there is no ++network while in safe mode, its recommended to download the full ++installation of SP1 or SP2 and transfer that via an ISO or using the ++vvfat block device ("-hdb fat:directory_which_holds_the_SP"). ++ ++@subsection MS-DOS and FreeDOS ++ ++@subsubsection CPU usage reduction ++ ++DOS does not correctly use the CPU HLT instruction. The result is that ++it takes host CPU cycles even when idle. You can install the utility from ++@url{https://web.archive.org/web/20051222085335/http://www.vmware.com/software/dosidle210.zip} ++to solve this problem. ++ ++@node QEMU System emulator for non PC targets ++@chapter QEMU System emulator for non PC targets ++ ++QEMU is a generic emulator and it emulates many non PC ++machines. Most of the options are similar to the PC emulator. The ++differences are mentioned in the following sections. ++ ++@menu ++* PowerPC System emulator:: ++* Sparc32 System emulator:: ++* Sparc64 System emulator:: ++* MIPS System emulator:: ++* ARM System emulator:: ++* ColdFire System emulator:: ++* Cris System emulator:: ++* Microblaze System emulator:: ++* SH4 System emulator:: ++* Xtensa System emulator:: ++@end menu ++ ++@node PowerPC System emulator ++@section PowerPC System emulator ++@cindex system emulation (PowerPC) ++ ++Use the executable @file{qemu-system-ppc} to simulate a complete 40P (PREP) ++or PowerMac PowerPC system. ++ ++QEMU emulates the following PowerMac peripherals: ++ ++@itemize @minus ++@item ++UniNorth or Grackle PCI Bridge ++@item ++PCI VGA compatible card with VESA Bochs Extensions ++@item ++2 PMAC IDE interfaces with hard disk and CD-ROM support ++@item ++NE2000 PCI adapters ++@item ++Non Volatile RAM ++@item ++VIA-CUDA with ADB keyboard and mouse. ++@end itemize ++ ++QEMU emulates the following 40P (PREP) peripherals: ++ ++@itemize @minus ++@item ++PCI Bridge ++@item ++PCI VGA compatible card with VESA Bochs Extensions ++@item ++2 IDE interfaces with hard disk and CD-ROM support ++@item ++Floppy disk ++@item ++PCnet network adapters ++@item ++Serial port ++@item ++PREP Non Volatile RAM ++@item ++PC compatible keyboard and mouse. ++@end itemize ++ ++Since version 0.9.1, QEMU uses OpenBIOS @url{https://www.openbios.org/} ++for the g3beige and mac99 PowerMac and the 40p machines. OpenBIOS is a free ++(GPL v2) portable firmware implementation. The goal is to implement a 100% ++IEEE 1275-1994 (referred to as Open Firmware) compliant firmware. ++ ++@c man begin OPTIONS ++ ++The following options are specific to the PowerPC emulation: ++ ++@table @option ++ ++@item -g @var{W}x@var{H}[x@var{DEPTH}] ++ ++Set the initial VGA graphic mode. The default is 800x600x32. ++ ++@item -prom-env @var{string} ++ ++Set OpenBIOS variables in NVRAM, for example: ++ ++@example +qemu-kvm -prom-env 'auto-boot?=false' \ - -prom-env 'boot-device=hd:2,\yaboot' \ - -prom-env 'boot-args=conf=hd:2,\yaboot.conf' - @end example ++ -prom-env 'boot-device=hd:2,\yaboot' \ ++ -prom-env 'boot-args=conf=hd:2,\yaboot.conf' ++@end example ++ ++@end table ++ ++@c man end ++ ++ ++More information is available at ++@url{http://perso.magic.fr/l_indien/qemu-ppc/}. ++ ++@node Sparc32 System emulator ++@section Sparc32 System emulator ++@cindex system emulation (Sparc32) ++ ++Use the executable @file{qemu-system-sparc} to simulate the following ++Sun4m architecture machines: ++@itemize @minus ++@item ++SPARCstation 4 ++@item ++SPARCstation 5 ++@item ++SPARCstation 10 ++@item ++SPARCstation 20 ++@item ++SPARCserver 600MP ++@item ++SPARCstation LX ++@item ++SPARCstation Voyager ++@item ++SPARCclassic ++@item ++SPARCbook ++@end itemize ++ ++The emulation is somewhat complete. SMP up to 16 CPUs is supported, ++but Linux limits the number of usable CPUs to 4. ++ ++QEMU emulates the following sun4m peripherals: ++ ++@itemize @minus ++@item ++IOMMU ++@item ++TCX or cgthree Frame buffer ++@item ++Lance (Am7990) Ethernet ++@item ++Non Volatile RAM M48T02/M48T08 ++@item ++Slave I/O: timers, interrupt controllers, Zilog serial ports, keyboard ++and power/reset logic ++@item ++ESP SCSI controller with hard disk and CD-ROM support ++@item ++Floppy drive (not on SS-600MP) ++@item ++CS4231 sound device (only on SS-5, not working yet) ++@end itemize ++ ++The number of peripherals is fixed in the architecture. Maximum ++memory size depends on the machine type, for SS-5 it is 256MB and for ++others 2047MB. ++ ++Since version 0.8.2, QEMU uses OpenBIOS ++@url{https://www.openbios.org/}. OpenBIOS is a free (GPL v2) portable ++firmware implementation. The goal is to implement a 100% IEEE ++1275-1994 (referred to as Open Firmware) compliant firmware. ++ ++A sample Linux 2.6 series kernel and ram disk image are available on ++the QEMU web site. There are still issues with NetBSD and OpenBSD, but ++most kernel versions work. Please note that currently older Solaris kernels ++don't work probably due to interface issues between OpenBIOS and ++Solaris. ++ ++@c man begin OPTIONS ++ ++The following options are specific to the Sparc32 emulation: ++ ++@table @option ++ ++@item -g @var{W}x@var{H}x[x@var{DEPTH}] ++ ++Set the initial graphics mode. For TCX, the default is 1024x768x8 with the ++option of 1024x768x24. For cgthree, the default is 1024x768x8 with the option ++of 1152x900x8 for people who wish to use OBP. ++ ++@item -prom-env @var{string} ++ ++Set OpenBIOS variables in NVRAM, for example: ++ ++@example ++qemu-system-sparc -prom-env 'auto-boot?=false' \ ++ -prom-env 'boot-device=sd(0,2,0):d' -prom-env 'boot-args=linux single' ++@end example ++ ++@item -M [SS-4|SS-5|SS-10|SS-20|SS-600MP|LX|Voyager|SPARCClassic] [|SPARCbook] ++ ++Set the emulated machine type. Default is SS-5. ++ ++@end table ++ ++@c man end ++ ++@node Sparc64 System emulator ++@section Sparc64 System emulator ++@cindex system emulation (Sparc64) ++ ++Use the executable @file{qemu-system-sparc64} to simulate a Sun4u ++(UltraSPARC PC-like machine), Sun4v (T1 PC-like machine), or generic ++Niagara (T1) machine. The Sun4u emulator is mostly complete, being ++able to run Linux, NetBSD and OpenBSD in headless (-nographic) mode. The ++Sun4v emulator is still a work in progress. ++ ++The Niagara T1 emulator makes use of firmware and OS binaries supplied in the S10image/ directory ++of the OpenSPARC T1 project @url{http://download.oracle.com/technetwork/systems/opensparc/OpenSPARCT1_Arch.1.5.tar.bz2} ++and is able to boot the disk.s10hw2 Solaris image. ++@example ++qemu-system-sparc64 -M niagara -L /path-to/S10image/ \ ++ -nographic -m 256 \ ++ -drive if=pflash,readonly=on,file=/S10image/disk.s10hw2 ++@end example ++ ++ ++QEMU emulates the following peripherals: ++ ++@itemize @minus ++@item ++UltraSparc IIi APB PCI Bridge ++@item ++PCI VGA compatible card with VESA Bochs Extensions ++@item ++PS/2 mouse and keyboard ++@item ++Non Volatile RAM M48T59 ++@item ++PC-compatible serial ports ++@item ++2 PCI IDE interfaces with hard disk and CD-ROM support ++@item ++Floppy disk ++@end itemize ++ ++@c man begin OPTIONS ++ ++The following options are specific to the Sparc64 emulation: ++ ++@table @option ++ ++@item -prom-env @var{string} ++ ++Set OpenBIOS variables in NVRAM, for example: ++ ++@example ++qemu-system-sparc64 -prom-env 'auto-boot?=false' ++@end example ++ ++@item -M [sun4u|sun4v|niagara] ++ ++Set the emulated machine type. The default is sun4u. ++ ++@end table ++ ++@c man end ++ ++@node MIPS System emulator ++@section MIPS System emulator ++@cindex system emulation (MIPS) ++ ++@menu ++* nanoMIPS System emulator :: ++@end menu ++ ++Four executables cover simulation of 32 and 64-bit MIPS systems in ++both endian options, @file{qemu-system-mips}, @file{qemu-system-mipsel} ++@file{qemu-system-mips64} and @file{qemu-system-mips64el}. ++Five different machine types are emulated: ++ ++@itemize @minus ++@item ++A generic ISA PC-like machine "mips" ++@item ++The MIPS Malta prototype board "malta" ++@item ++An ACER Pica "pica61". This machine needs the 64-bit emulator. ++@item ++MIPS emulator pseudo board "mipssim" ++@item ++A MIPS Magnum R4000 machine "magnum". This machine needs the 64-bit emulator. ++@end itemize ++ ++The generic emulation is supported by Debian 'Etch' and is able to ++install Debian into a virtual disk image. The following devices are ++emulated: ++ ++@itemize @minus ++@item ++A range of MIPS CPUs, default is the 24Kf ++@item ++PC style serial port ++@item ++PC style IDE disk ++@item ++NE2000 network card ++@end itemize ++ ++The Malta emulation supports the following devices: ++ ++@itemize @minus ++@item ++Core board with MIPS 24Kf CPU and Galileo system controller ++@item ++PIIX4 PCI/USB/SMbus controller ++@item ++The Multi-I/O chip's serial device ++@item ++PCI network cards (PCnet32 and others) ++@item ++Malta FPGA serial device ++@item ++Cirrus (default) or any other PCI VGA graphics card ++@end itemize ++ ++The Boston board emulation supports the following devices: ++ ++@itemize @minus ++@item ++Xilinx FPGA, which includes a PCIe root port and an UART ++@item ++Intel EG20T PCH connects the I/O peripherals, but only the SATA bus is emulated ++@end itemize ++ ++The ACER Pica emulation supports: ++ ++@itemize @minus ++@item ++MIPS R4000 CPU ++@item ++PC-style IRQ and DMA controllers ++@item ++PC Keyboard ++@item ++IDE controller ++@end itemize ++ ++The MIPS Magnum R4000 emulation supports: ++ ++@itemize @minus ++@item ++MIPS R4000 CPU ++@item ++PC-style IRQ controller ++@item ++PC Keyboard ++@item ++SCSI controller ++@item ++G364 framebuffer ++@end itemize ++ ++The Fulong 2E emulation supports: ++ ++@itemize @minus ++@item ++Loongson 2E CPU ++@item ++Bonito64 system controller as North Bridge ++@item ++VT82C686 chipset as South Bridge ++@item ++RTL8139D as a network card chipset ++@end itemize ++ ++The mipssim pseudo board emulation provides an environment similar ++to what the proprietary MIPS emulator uses for running Linux. ++It supports: ++ ++@itemize @minus ++@item ++A range of MIPS CPUs, default is the 24Kf ++@item ++PC style serial port ++@item ++MIPSnet network emulation ++@end itemize ++ ++@node nanoMIPS System emulator ++@subsection nanoMIPS System emulator ++@cindex system emulation (nanoMIPS) ++ ++Executable @file{qemu-system-mipsel} also covers simulation of ++32-bit nanoMIPS system in little endian mode: ++ ++@itemize @minus ++@item ++nanoMIPS I7200 CPU ++@end itemize ++ ++Example of @file{qemu-system-mipsel} usage for nanoMIPS is shown below: ++ ++Download @code{} from @url{https://mipsdistros.mips.com/LinuxDistro/nanomips/buildroot/index.html}. ++ ++Download @code{} from @url{https://mipsdistros.mips.com/LinuxDistro/nanomips/kernels/v4.15.18-432-gb2eb9a8b07a1-20180627102142/index.html}. ++ ++Start system emulation of Malta board with nanoMIPS I7200 CPU: ++@example ++qemu-system-mipsel -cpu I7200 -kernel @code{} \ ++ -M malta -serial stdio -m @code{} -hda @code{} \ ++ -append "mem=256m@@0x0 rw console=ttyS0 vga=cirrus vesa=0x111 root=/dev/sda" ++@end example ++ ++ ++@node ARM System emulator ++@section ARM System emulator ++@cindex system emulation (ARM) ++ ++Use the executable @file{qemu-system-arm} to simulate a ARM ++machine. The ARM Integrator/CP board is emulated with the following ++devices: ++ ++@itemize @minus ++@item ++ARM926E, ARM1026E, ARM946E, ARM1136 or Cortex-A8 CPU ++@item ++Two PL011 UARTs ++@item ++SMC 91c111 Ethernet adapter ++@item ++PL110 LCD controller ++@item ++PL050 KMI with PS/2 keyboard and mouse. ++@item ++PL181 MultiMedia Card Interface with SD card. ++@end itemize ++ ++The ARM Versatile baseboard is emulated with the following devices: ++ ++@itemize @minus ++@item ++ARM926E, ARM1136 or Cortex-A8 CPU ++@item ++PL190 Vectored Interrupt Controller ++@item ++Four PL011 UARTs ++@item ++SMC 91c111 Ethernet adapter ++@item ++PL110 LCD controller ++@item ++PL050 KMI with PS/2 keyboard and mouse. ++@item ++PCI host bridge. Note the emulated PCI bridge only provides access to ++PCI memory space. It does not provide access to PCI IO space. ++This means some devices (eg. ne2k_pci NIC) are not usable, and others ++(eg. rtl8139 NIC) are only usable when the guest drivers use the memory ++mapped control registers. ++@item ++PCI OHCI USB controller. ++@item ++LSI53C895A PCI SCSI Host Bus Adapter with hard disk and CD-ROM devices. ++@item ++PL181 MultiMedia Card Interface with SD card. ++@end itemize ++ ++Several variants of the ARM RealView baseboard are emulated, ++including the EB, PB-A8 and PBX-A9. Due to interactions with the ++bootloader, only certain Linux kernel configurations work out ++of the box on these boards. ++ ++Kernels for the PB-A8 board should have CONFIG_REALVIEW_HIGH_PHYS_OFFSET ++enabled in the kernel, and expect 512M RAM. Kernels for The PBX-A9 board ++should have CONFIG_SPARSEMEM enabled, CONFIG_REALVIEW_HIGH_PHYS_OFFSET ++disabled and expect 1024M RAM. ++ ++The following devices are emulated: ++ ++@itemize @minus ++@item ++ARM926E, ARM1136, ARM11MPCore, Cortex-A8 or Cortex-A9 MPCore CPU ++@item ++ARM AMBA Generic/Distributed Interrupt Controller ++@item ++Four PL011 UARTs ++@item ++SMC 91c111 or SMSC LAN9118 Ethernet adapter ++@item ++PL110 LCD controller ++@item ++PL050 KMI with PS/2 keyboard and mouse ++@item ++PCI host bridge ++@item ++PCI OHCI USB controller ++@item ++LSI53C895A PCI SCSI Host Bus Adapter with hard disk and CD-ROM devices ++@item ++PL181 MultiMedia Card Interface with SD card. ++@end itemize ++ ++The XScale-based clamshell PDA models ("Spitz", "Akita", "Borzoi" ++and "Terrier") emulation includes the following peripherals: ++ ++@itemize @minus ++@item ++Intel PXA270 System-on-chip (ARM V5TE core) ++@item ++NAND Flash memory ++@item ++IBM/Hitachi DSCM microdrive in a PXA PCMCIA slot - not in "Akita" ++@item ++On-chip OHCI USB controller ++@item ++On-chip LCD controller ++@item ++On-chip Real Time Clock ++@item ++TI ADS7846 touchscreen controller on SSP bus ++@item ++Maxim MAX1111 analog-digital converter on I@math{^2}C bus ++@item ++GPIO-connected keyboard controller and LEDs ++@item ++Secure Digital card connected to PXA MMC/SD host ++@item ++Three on-chip UARTs ++@item ++WM8750 audio CODEC on I@math{^2}C and I@math{^2}S busses ++@end itemize ++ ++The Palm Tungsten|E PDA (codename "Cheetah") emulation includes the ++following elements: ++ ++@itemize @minus ++@item ++Texas Instruments OMAP310 System-on-chip (ARM 925T core) ++@item ++ROM and RAM memories (ROM firmware image can be loaded with -option-rom) ++@item ++On-chip LCD controller ++@item ++On-chip Real Time Clock ++@item ++TI TSC2102i touchscreen controller / analog-digital converter / Audio ++CODEC, connected through MicroWire and I@math{^2}S busses ++@item ++GPIO-connected matrix keypad ++@item ++Secure Digital card connected to OMAP MMC/SD host ++@item ++Three on-chip UARTs ++@end itemize ++ ++Nokia N800 and N810 internet tablets (known also as RX-34 and RX-44 / 48) ++emulation supports the following elements: ++ ++@itemize @minus ++@item ++Texas Instruments OMAP2420 System-on-chip (ARM 1136 core) ++@item ++RAM and non-volatile OneNAND Flash memories ++@item ++Display connected to EPSON remote framebuffer chip and OMAP on-chip ++display controller and a LS041y3 MIPI DBI-C controller ++@item ++TI TSC2301 (in N800) and TI TSC2005 (in N810) touchscreen controllers ++driven through SPI bus ++@item ++National Semiconductor LM8323-controlled qwerty keyboard driven ++through I@math{^2}C bus ++@item ++Secure Digital card connected to OMAP MMC/SD host ++@item ++Three OMAP on-chip UARTs and on-chip STI debugging console ++@item ++Mentor Graphics "Inventra" dual-role USB controller embedded in a TI ++TUSB6010 chip - only USB host mode is supported ++@item ++TI TMP105 temperature sensor driven through I@math{^2}C bus ++@item ++TI TWL92230C power management companion with an RTC on I@math{^2}C bus ++@item ++Nokia RETU and TAHVO multi-purpose chips with an RTC, connected ++through CBUS ++@end itemize ++ ++The Luminary Micro Stellaris LM3S811EVB emulation includes the following ++devices: ++ ++@itemize @minus ++@item ++Cortex-M3 CPU core. ++@item ++64k Flash and 8k SRAM. ++@item ++Timers, UARTs, ADC and I@math{^2}C interface. ++@item ++OSRAM Pictiva 96x16 OLED with SSD0303 controller on I@math{^2}C bus. ++@end itemize ++ ++The Luminary Micro Stellaris LM3S6965EVB emulation includes the following ++devices: ++ ++@itemize @minus ++@item ++Cortex-M3 CPU core. ++@item ++256k Flash and 64k SRAM. ++@item ++Timers, UARTs, ADC, I@math{^2}C and SSI interfaces. ++@item ++OSRAM Pictiva 128x64 OLED with SSD0323 controller connected via SSI. ++@end itemize ++ ++The Freecom MusicPal internet radio emulation includes the following ++elements: ++ ++@itemize @minus ++@item ++Marvell MV88W8618 ARM core. ++@item ++32 MB RAM, 256 KB SRAM, 8 MB flash. ++@item ++Up to 2 16550 UARTs ++@item ++MV88W8xx8 Ethernet controller ++@item ++MV88W8618 audio controller, WM8750 CODEC and mixer ++@item ++128×64 display with brightness control ++@item ++2 buttons, 2 navigation wheels with button function ++@end itemize ++ ++The Siemens SX1 models v1 and v2 (default) basic emulation. ++The emulation includes the following elements: ++ ++@itemize @minus ++@item ++Texas Instruments OMAP310 System-on-chip (ARM 925T core) ++@item ++ROM and RAM memories (ROM firmware image can be loaded with -pflash) ++V1 ++1 Flash of 16MB and 1 Flash of 8MB ++V2 ++1 Flash of 32MB ++@item ++On-chip LCD controller ++@item ++On-chip Real Time Clock ++@item ++Secure Digital card connected to OMAP MMC/SD host ++@item ++Three on-chip UARTs ++@end itemize ++ ++A Linux 2.6 test image is available on the QEMU web site. More ++information is available in the QEMU mailing-list archive. ++ ++@c man begin OPTIONS ++ ++The following options are specific to the ARM emulation: ++ ++@table @option ++ ++@item -semihosting ++Enable semihosting syscall emulation. ++ ++On ARM this implements the "Angel" interface. ++ ++Note that this allows guest direct access to the host filesystem, ++so should only be used with trusted guest OS. ++ ++@end table ++ ++@c man end ++ ++@node ColdFire System emulator ++@section ColdFire System emulator ++@cindex system emulation (ColdFire) ++@cindex system emulation (M68K) ++ ++Use the executable @file{qemu-system-m68k} to simulate a ColdFire machine. ++The emulator is able to boot a uClinux kernel. ++ ++The M5208EVB emulation includes the following devices: ++ ++@itemize @minus ++@item ++MCF5208 ColdFire V2 Microprocessor (ISA A+ with EMAC). ++@item ++Three Two on-chip UARTs. ++@item ++Fast Ethernet Controller (FEC) ++@end itemize ++ ++The AN5206 emulation includes the following devices: ++ ++@itemize @minus ++@item ++MCF5206 ColdFire V2 Microprocessor. ++@item ++Two on-chip UARTs. ++@end itemize ++ ++@c man begin OPTIONS ++ ++The following options are specific to the ColdFire emulation: ++ ++@table @option ++ ++@item -semihosting ++Enable semihosting syscall emulation. ++ ++On M68K this implements the "ColdFire GDB" interface used by libgloss. ++ ++Note that this allows guest direct access to the host filesystem, ++so should only be used with trusted guest OS. ++ ++@end table ++ ++@c man end ++ ++@node Cris System emulator ++@section Cris System emulator ++@cindex system emulation (Cris) ++ ++TODO ++ ++@node Microblaze System emulator ++@section Microblaze System emulator ++@cindex system emulation (Microblaze) ++ ++TODO ++ ++@node SH4 System emulator ++@section SH4 System emulator ++@cindex system emulation (SH4) ++ ++TODO ++ ++@node Xtensa System emulator ++@section Xtensa System emulator ++@cindex system emulation (Xtensa) ++ ++Two executables cover simulation of both Xtensa endian options, ++@file{qemu-system-xtensa} and @file{qemu-system-xtensaeb}. ++Two different machine types are emulated: ++ ++@itemize @minus ++@item ++Xtensa emulator pseudo board "sim" ++@item ++Avnet LX60/LX110/LX200 board ++@end itemize ++ ++The sim pseudo board emulation provides an environment similar ++to one provided by the proprietary Tensilica ISS. ++It supports: ++ ++@itemize @minus ++@item ++A range of Xtensa CPUs, default is the DC232B ++@item ++Console and filesystem access via semihosting calls ++@end itemize ++ ++The Avnet LX60/LX110/LX200 emulation supports: ++ ++@itemize @minus ++@item ++A range of Xtensa CPUs, default is the DC232B ++@item ++16550 UART ++@item ++OpenCores 10/100 Mbps Ethernet MAC ++@end itemize ++ ++@c man begin OPTIONS ++ ++The following options are specific to the Xtensa emulation: ++ ++@table @option ++ ++@item -semihosting ++Enable semihosting syscall emulation. ++ ++Xtensa semihosting provides basic file IO calls, such as open/read/write/seek/select. ++Tensilica baremetal libc for ISS and linux platform "sim" use this interface. ++ ++Note that this allows guest direct access to the host filesystem, ++so should only be used with trusted guest OS. ++ ++@end table ++ ++@c man end ++ ++@node QEMU User space emulator ++@chapter QEMU User space emulator ++ ++@menu ++* Supported Operating Systems :: ++* Features:: ++* Linux User space emulator:: ++* BSD User space emulator :: ++@end menu ++ ++@node Supported Operating Systems ++@section Supported Operating Systems ++ ++The following OS are supported in user space emulation: ++ ++@itemize @minus ++@item ++Linux (referred as qemu-linux-user) ++@item ++BSD (referred as qemu-bsd-user) ++@end itemize ++ ++@node Features ++@section Features ++ ++QEMU user space emulation has the following notable features: ++ ++@table @strong ++@item System call translation: ++QEMU includes a generic system call translator. This means that ++the parameters of the system calls can be converted to fix ++endianness and 32/64-bit mismatches between hosts and targets. ++IOCTLs can be converted too. ++ ++@item POSIX signal handling: ++QEMU can redirect to the running program all signals coming from ++the host (such as @code{SIGALRM}), as well as synthesize signals from ++virtual CPU exceptions (for example @code{SIGFPE} when the program ++executes a division by zero). ++ ++QEMU relies on the host kernel to emulate most signal system ++calls, for example to emulate the signal mask. On Linux, QEMU ++supports both normal and real-time signals. ++ ++@item Threading: ++On Linux, QEMU can emulate the @code{clone} syscall and create a real ++host thread (with a separate virtual CPU) for each emulated thread. ++Note that not all targets currently emulate atomic operations correctly. ++x86 and ARM use a global lock in order to preserve their semantics. ++@end table ++ ++QEMU was conceived so that ultimately it can emulate itself. Although ++it is not very useful, it is an important test to show the power of the ++emulator. ++ ++@node Linux User space emulator ++@section Linux User space emulator ++ ++@menu ++* Quick Start:: ++* Wine launch:: ++* Command line options:: ++* Other binaries:: ++@end menu ++ ++@node Quick Start ++@subsection Quick Start ++ ++In order to launch a Linux process, QEMU needs the process executable ++itself and all the target (x86) dynamic libraries used by it. ++ ++@itemize ++ ++@item On x86, you can just try to launch any process by using the native ++libraries: ++ ++@example ++qemu-i386 -L / /bin/ls ++@end example ++ ++@code{-L /} tells that the x86 dynamic linker must be searched with a ++@file{/} prefix. ++ ++@item Since QEMU is also a linux process, you can launch QEMU with ++QEMU (NOTE: you can only do that if you compiled QEMU from the sources): ++ ++@example ++qemu-i386 -L / qemu-i386 -L / /bin/ls ++@end example ++ ++@item On non x86 CPUs, you need first to download at least an x86 glibc ++(@file{qemu-runtime-i386-XXX-.tar.gz} on the QEMU web page). Ensure that ++@code{LD_LIBRARY_PATH} is not set: ++ ++@example ++unset LD_LIBRARY_PATH ++@end example ++ ++Then you can launch the precompiled @file{ls} x86 executable: ++ ++@example ++qemu-i386 tests/i386/ls ++@end example ++You can look at @file{scripts/qemu-binfmt-conf.sh} so that ++QEMU is automatically launched by the Linux kernel when you try to ++launch x86 executables. It requires the @code{binfmt_misc} module in the ++Linux kernel. ++ ++@item The x86 version of QEMU is also included. You can try weird things such as: ++@example ++qemu-i386 /usr/local/qemu-i386/bin/qemu-i386 \ ++ /usr/local/qemu-i386/bin/ls-i386 ++@end example ++ ++@end itemize ++ ++@node Wine launch ++@subsection Wine launch ++ ++@itemize ++ ++@item Ensure that you have a working QEMU with the x86 glibc ++distribution (see previous section). In order to verify it, you must be ++able to do: ++ ++@example ++qemu-i386 /usr/local/qemu-i386/bin/ls-i386 ++@end example ++ ++@item Download the binary x86 Wine install ++(@file{qemu-XXX-i386-wine.tar.gz} on the QEMU web page). ++ ++@item Configure Wine on your account. Look at the provided script ++@file{/usr/local/qemu-i386/@/bin/wine-conf.sh}. Your previous ++@code{$@{HOME@}/.wine} directory is saved to @code{$@{HOME@}/.wine.org}. ++ ++@item Then you can try the example @file{putty.exe}: ++ ++@example ++qemu-i386 /usr/local/qemu-i386/wine/bin/wine \ ++ /usr/local/qemu-i386/wine/c/Program\ Files/putty.exe ++@end example ++ ++@end itemize ++ ++@node Command line options ++@subsection Command line options ++ ++@example ++@command{qemu-i386} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-cpu} @var{model}] [@option{-g} @var{port}] [@option{-B} @var{offset}] [@option{-R} @var{size}] @var{program} [@var{arguments}...] ++@end example ++ ++@table @option ++@item -h ++Print the help ++@item -L path ++Set the x86 elf interpreter prefix (default=/usr/local/qemu-i386) ++@item -s size ++Set the x86 stack size in bytes (default=524288) ++@item -cpu model ++Select CPU model (-cpu help for list and additional feature selection) ++@item -E @var{var}=@var{value} ++Set environment @var{var} to @var{value}. ++@item -U @var{var} ++Remove @var{var} from the environment. ++@item -B offset ++Offset guest address by the specified number of bytes. This is useful when ++the address region required by guest applications is reserved on the host. ++This option is currently only supported on some hosts. ++@item -R size ++Pre-allocate a guest virtual address space of the given size (in bytes). ++"G", "M", and "k" suffixes may be used when specifying the size. ++@end table ++ ++Debug options: ++ ++@table @option ++@item -d item1,... ++Activate logging of the specified items (use '-d help' for a list of log items) ++@item -p pagesize ++Act as if the host page size was 'pagesize' bytes ++@item -g port ++Wait gdb connection to port ++@item -singlestep ++Run the emulation in single step mode. ++@end table ++ ++Environment variables: ++ ++@table @env ++@item QEMU_STRACE ++Print system calls and arguments similar to the 'strace' program ++(NOTE: the actual 'strace' program will not work because the user ++space emulator hasn't implemented ptrace). At the moment this is ++incomplete. All system calls that don't have a specific argument ++format are printed with information for six arguments. Many ++flag-style arguments don't have decoders and will show up as numbers. ++@end table ++ ++@node Other binaries ++@subsection Other binaries ++ ++@cindex user mode (Alpha) ++@command{qemu-alpha} TODO. ++ ++@cindex user mode (ARM) ++@command{qemu-armeb} TODO. ++ ++@cindex user mode (ARM) ++@command{qemu-arm} is also capable of running ARM "Angel" semihosted ELF ++binaries (as implemented by the arm-elf and arm-eabi Newlib/GDB ++configurations), and arm-uclinux bFLT format binaries. ++ ++@cindex user mode (ColdFire) ++@cindex user mode (M68K) ++@command{qemu-m68k} is capable of running semihosted binaries using the BDM ++(m5xxx-ram-hosted.ld) or m68k-sim (sim.ld) syscall interfaces, and ++coldfire uClinux bFLT format binaries. ++ ++The binary format is detected automatically. ++ ++@cindex user mode (Cris) ++@command{qemu-cris} TODO. ++ ++@cindex user mode (i386) ++@command{qemu-i386} TODO. ++@command{qemu-x86_64} TODO. ++ ++@cindex user mode (Microblaze) ++@command{qemu-microblaze} TODO. ++ ++@cindex user mode (MIPS) ++@command{qemu-mips} executes 32-bit big endian MIPS binaries (MIPS O32 ABI). ++ ++@command{qemu-mipsel} executes 32-bit little endian MIPS binaries (MIPS O32 ABI). ++ ++@command{qemu-mips64} executes 64-bit big endian MIPS binaries (MIPS N64 ABI). ++ ++@command{qemu-mips64el} executes 64-bit little endian MIPS binaries (MIPS N64 ABI). ++ ++@command{qemu-mipsn32} executes 32-bit big endian MIPS binaries (MIPS N32 ABI). ++ ++@command{qemu-mipsn32el} executes 32-bit little endian MIPS binaries (MIPS N32 ABI). ++ ++@cindex user mode (NiosII) ++@command{qemu-nios2} TODO. ++ ++@cindex user mode (PowerPC) ++@command{qemu-ppc64abi32} TODO. ++@command{qemu-ppc64} TODO. ++@command{qemu-ppc} TODO. ++ ++@cindex user mode (SH4) ++@command{qemu-sh4eb} TODO. ++@command{qemu-sh4} TODO. ++ ++@cindex user mode (SPARC) ++@command{qemu-sparc} can execute Sparc32 binaries (Sparc32 CPU, 32 bit ABI). ++ ++@command{qemu-sparc32plus} can execute Sparc32 and SPARC32PLUS binaries ++(Sparc64 CPU, 32 bit ABI). ++ ++@command{qemu-sparc64} can execute some Sparc64 (Sparc64 CPU, 64 bit ABI) and ++SPARC32PLUS binaries (Sparc64 CPU, 32 bit ABI). ++ ++@node BSD User space emulator ++@section BSD User space emulator ++ ++@menu ++* BSD Status:: ++* BSD Quick Start:: ++* BSD Command line options:: ++@end menu ++ ++@node BSD Status ++@subsection BSD Status ++ ++@itemize @minus ++@item ++target Sparc64 on Sparc64: Some trivial programs work. ++@end itemize ++ ++@node BSD Quick Start ++@subsection Quick Start ++ ++In order to launch a BSD process, QEMU needs the process executable ++itself and all the target dynamic libraries used by it. ++ ++@itemize ++ ++@item On Sparc64, you can just try to launch any process by using the native ++libraries: ++ ++@example ++qemu-sparc64 /bin/ls ++@end example ++ ++@end itemize ++ ++@node BSD Command line options ++@subsection Command line options ++ ++@example ++@command{qemu-sparc64} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-bsd} @var{type}] @var{program} [@var{arguments}...] ++@end example ++ ++@table @option ++@item -h ++Print the help ++@item -L path ++Set the library root path (default=/) ++@item -s size ++Set the stack size in bytes (default=524288) ++@item -ignore-environment ++Start with an empty environment. Without this option, ++the initial environment is a copy of the caller's environment. ++@item -E @var{var}=@var{value} ++Set environment @var{var} to @var{value}. ++@item -U @var{var} ++Remove @var{var} from the environment. ++@item -bsd type ++Set the type of the emulated BSD Operating system. Valid values are ++FreeBSD, NetBSD and OpenBSD (default). ++@end table ++ ++Debug options: ++ ++@table @option ++@item -d item1,... ++Activate logging of the specified items (use '-d help' for a list of log items) ++@item -p pagesize ++Act as if the host page size was 'pagesize' bytes ++@item -singlestep ++Run the emulation in single step mode. ++@end table ++ ++@node System requirements ++@chapter System requirements ++ ++@section KVM kernel module ++ ++On x86_64 hosts, the default set of CPU features enabled by the KVM accelerator ++require the host to be running Linux v4.5 or newer. ++ ++The OpteronG[345] CPU models require KVM support for RDTSCP, which was ++added with Linux 4.5 which is supported by the major distros. And even ++if RHEL7 has kernel 3.10, KVM there has the required functionality there ++to make it close to a 4.5 or newer kernel. ++ ++@include docs/security.texi ++ ++@include qemu-tech.texi ++ ++@include qemu-deprecated.texi ++ ++@node Supported build platforms ++@appendix Supported build platforms ++ ++QEMU aims to support building and executing on multiple host OS platforms. ++This appendix outlines which platforms are the major build targets. These ++platforms are used as the basis for deciding upon the minimum required ++versions of 3rd party software QEMU depends on. The supported platforms ++are the targets for automated testing performed by the project when patches ++are submitted for review, and tested before and after merge. ++ ++If a platform is not listed here, it does not imply that QEMU won't work. ++If an unlisted platform has comparable software versions to a listed platform, ++there is every expectation that it will work. Bug reports are welcome for ++problems encountered on unlisted platforms unless they are clearly older ++vintage than what is described here. ++ ++Note that when considering software versions shipped in distros as support ++targets, QEMU considers only the version number, and assumes the features in ++that distro match the upstream release with the same version. In other words, ++if a distro backports extra features to the software in their distro, QEMU ++upstream code will not add explicit support for those backports, unless the ++feature is auto-detectable in a manner that works for the upstream releases ++too. ++ ++The Repology site @url{https://repology.org} is a useful resource to identify ++currently shipped versions of software in various operating systems, though ++it does not cover all distros listed below. ++ ++@section Linux OS ++ ++For distributions with frequent, short-lifetime releases, the project will ++aim to support all versions that are not end of life by their respective ++vendors. For the purposes of identifying supported software versions, the ++project will look at Fedora, Ubuntu, and openSUSE distros. Other short- ++lifetime distros will be assumed to ship similar software versions. ++ ++For distributions with long-lifetime releases, the project will aim to support ++the most recent major version at all times. Support for the previous major ++version will be dropped 2 years after the new major version is released, ++or when it reaches ``end of life''. For the purposes of identifying ++supported software versions, the project will look at RHEL, Debian, ++Ubuntu LTS, and SLES distros. Other long-lifetime distros will be ++assumed to ship similar software versions. ++ ++@section Windows ++ ++The project supports building with current versions of the MinGW toolchain, ++hosted on Linux. ++ ++@section macOS ++ ++The project supports building with the two most recent versions of macOS, with ++the current homebrew package set available. ++ ++@section FreeBSD ++ ++The project aims to support the all the versions which are not end of life. ++ ++@section NetBSD ++ ++The project aims to support the most recent major version at all times. Support ++for the previous major version will be dropped 2 years after the new major ++version is released. ++ ++@section OpenBSD ++ ++The project aims to support the all the versions which are not end of life. ++ ++@node License ++@appendix License ++ ++QEMU is a trademark of Fabrice Bellard. ++ ++QEMU is released under the ++@url{https://www.gnu.org/licenses/gpl-2.0.txt,GNU General Public License}, ++version 2. Parts of QEMU have specific licenses, see file ++@url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=LICENSE,LICENSE}. ++ ++@node Index ++@appendix Index ++@menu ++* Concept Index:: ++* Function Index:: ++* Keystroke Index:: ++* Program Index:: ++* Data Type Index:: ++* Variable Index:: ++@end menu ++ ++@node Concept Index ++@section Concept Index ++This is the main index. Should we combine all keywords in one index? TODO ++@printindex cp ++ ++@node Function Index ++@section Function Index ++This index could be used for command line options and monitor functions. ++@printindex fn ++ ++@node Keystroke Index ++@section Keystroke Index ++ ++This is a list of all keystrokes which have a special function ++in system emulation. ++ ++@printindex ky ++ ++@node Program Index ++@section Program Index ++@printindex pg ++ ++@node Data Type Index ++@section Data Type Index ++ ++This index could be used for qdev device names and options. ++ ++@printindex tp ++ ++@node Variable Index ++@section Variable Index ++@printindex vr ++ ++@bye diff --git a/qemu-options.hx b/qemu-options.hx -index fc17aca631..df1d27b6f2 100644 +index 1df25ae..8c48b40 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2737,11 +2737,11 @@ be created for multiqueue vhost-user. +@@ -2878,11 +2878,11 @@ SRST - Example: - @example --qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -- -numa node,memdev=mem \ -- -chardev socket,id=chr0,path=/path/to/socket \ -- -netdev type=vhost-user,id=net0,chardev=chr0 \ -- -device virtio-net-pci,netdev=net0 -+qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -+ -numa node,memdev=mem \ -+ -chardev socket,id=chr0,path=/path/to/socket \ -+ -netdev type=vhost-user,id=net0,chardev=chr0 \ -+ -device virtio-net-pci,netdev=net0 - @end example + :: - @item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] -@@ -3631,14 +3631,14 @@ ETEXI - - DEF("realtime", HAS_ARG, QEMU_OPTION_realtime, - "-realtime [mlock=on|off]\n" -- " run qemu with realtime features\n" -+ " run qemu-kvm with realtime features\n" - " mlock=on|off controls mlock support (default: on)\n", - QEMU_ARCH_ALL) - STEXI - @item -realtime mlock=on|off - @findex -realtime --Run qemu with realtime features. --mlocking qemu and guest memory can be enabled via @option{mlock=on} -+Run qemu-kvm with realtime features. -+mlocking qemu-kvm and guest memory can be enabled via @option{mlock=on} - (enabled by default). - ETEXI +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + ``-netdev hubport,id=id,hubid=hubid[,netdev=nd]`` + Create a hub port on the emulated hub with ID hubid. -- -2.21.0 +1.8.3.1 diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch index bc6146d..8137171 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From b13a7d3527c5c91e7a50236de30a2244b8453911 Mon Sep 17 00:00:00 2001 +From 50d4f1973a86696cb7487173cbdbc68453445c54 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -56,16 +56,15 @@ Conflicts: (cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) (cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) -Signed-off-by: Danilo C. L. de Paula --- hw/usb/hcd-xhci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 8fed2eedd6..d2b9744030 100644 +index b25cce8..9582d81 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3403,6 +3403,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3413,6 +3413,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) xhci->max_pstreams_mask = 0; } @@ -78,7 +77,7 @@ index 8fed2eedd6..d2b9744030 100644 if (xhci->msi != ON_OFF_AUTO_OFF) { ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); /* Any error other than -ENOTSUP(board's MSI support is broken) -@@ -3451,12 +3457,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) +@@ -3461,12 +3467,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, &xhci->mem); @@ -92,5 +91,5 @@ index 8fed2eedd6..d2b9744030 100644 /* TODO check for errors, and should fail when msix=on */ msix_init(dev, xhci->numintrs, -- -2.21.0 +1.8.3.1 diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index e167b2e..c087f5e 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 3fab8f5e8a9e190c1ed6916ac13c7c4d65e874b7 Mon Sep 17 00:00:00 2001 +From 5d9529f40e7cc092a57f9203aad22f3644a2b6d6 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,10 +45,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index e8b2b64d09..54108c0056 100644 +index 472bbd2..ba2dac8 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c -@@ -808,6 +808,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -814,6 +814,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, SCSIDevice *sd = SCSI_DEVICE(dev); int ret; @@ -65,5 +65,5 @@ index e8b2b64d09..54108c0056 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -2.21.0 +1.8.3.1 diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index b3350da..41c655c 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 148e9e80a3a430615b552075082fad22d007d851 Mon Sep 17 00:00:00 2001 +From 3ea4a35afce28805241b3be3c11de605600ecda1 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,7 +32,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 481dfd2a27..805f38533e 100644 +index eb54f94..ecefb08 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -351,12 +351,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, @@ -56,5 +56,5 @@ index 481dfd2a27..805f38533e 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -2.21.0 +1.8.3.1 diff --git a/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch b/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch deleted file mode 100644 index a2a800b..0000000 --- a/0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch +++ /dev/null @@ -1,61 +0,0 @@ -From ab9ebc29bb9bb142e73a160750a451d40bfe9746 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Mon, 16 Sep 2019 17:07:00 +0100 -Subject: Using ip_deq after m_free might read pointers from an allocation - reuse. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Philippe Mathieu-Daudé -Message-id: <20190916170700.647-2-philmd@redhat.com> -Patchwork-id: 90470 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] Using ip_deq after m_free might read pointers from an allocation reuse. -Bugzilla: 1749737 -RH-Acked-by: Danilo de Paula -RH-Acked-by: John Snow - -From: Samuel Thibault - -This would be difficult to exploit, but that is still related with -CVE-2019-14378 which generates fragmented IP packets that would trigger this -issue and at least produce a DoS. - -Signed-off-by: Samuel Thibault -(cherry picked from libslirp commit c59279437eda91841b9d26079c70b8a540d41204) -Signed-off-by: Philippe Mathieu-Daudé - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/ip_input.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -index 8c75d91495..df1c846ade 100644 ---- a/slirp/src/ip_input.c -+++ b/slirp/src/ip_input.c -@@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) - */ - while (q != (struct ipasfrag *)&fp->frag_link && - ip->ip_off + ip->ip_len > q->ipf_off) { -+ struct ipasfrag *prev; - i = (ip->ip_off + ip->ip_len) - q->ipf_off; - if (i < q->ipf_len) { - q->ipf_len -= i; -@@ -299,9 +300,11 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) - m_adj(dtom(slirp, q), i); - break; - } -+ prev = q; - q = q->ipf_next; -- m_free(dtom(slirp, q->ipf_prev)); -- ip_deq(q->ipf_prev); -+ ip_deq(prev); -+ m_free(dtom(slirp, prev)); -+ - } - - insert: --- -2.21.0 - diff --git a/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch similarity index 90% rename from kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch rename to 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index ea796d5..be042ba 100644 --- a/kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,8 +1,7 @@ -From 371d312300251c0dc24522607b06b7e47e760b53 Mon Sep 17 00:00:00 2001 +From f07c3ee209b3897efebb4cf008c88a390205a5dd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 -Subject: [PATCH 12/20] block: Versioned x-blockdev-reopen API with feature - flag +Subject: block: Versioned x-blockdev-reopen API with feature flag RH-Author: Kevin Wolf Message-id: <20200313123439.10548-7-kwolf@redhat.com> @@ -30,10 +29,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json -index 0cf68fe..a1e85b0 100644 +index 943df19..50b99fb 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json -@@ -4202,10 +4202,17 @@ +@@ -4126,10 +4126,17 @@ # image does not have a default backing file name as part of its # metadata. # diff --git a/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch b/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch deleted file mode 100644 index 1435017..0000000 --- a/kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 481357ea8ae32b6894860c296cf6a2898260195f Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 17 Jan 2020 13:18:27 +0100 -Subject: [PATCH 4/4] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR - support - -RH-Author: Paolo Bonzini -Message-id: <20200117131827.20361-1-pbonzini@redhat.com> -Patchwork-id: 93405 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v3] RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support -Bugzilla: 1559846 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Miroslav Rezanina - -BZ: 1559846 -BRANCH: rhel-av-8.2.0 -BREW: 25775160 -UPSTREAM: RHEL only - -Nested PERF_GLOBAL_CTRL support is not present in the 8.2 kernel. Drop the -features via compat properties, they will be moved to 8.2 machine type compat -properties in the 8.3 timeframe. - -Signed-off-by: Paolo Bonzini ---- - No change, for v2 I mistakenly wrote "origin/rhel-av-8.2.0" as the - branch. :( - - hw/i386/pc.c | 2 ++ - 1 file changed, 2 insertions(+) - -Signed-off-by: Miroslav Rezanina ---- - hw/i386/pc.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 61e70e4..73a0f11 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -351,6 +351,8 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); - GlobalProperty pc_rhel_compat[] = { - { TYPE_X86_CPU, "host-phys-bits", "on" }, - { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, -+ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, -+ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, - /* bz 1508330 */ - { "vfio-pci", "x-no-geforce-quirks", "on" }, - }; --- -1.8.3.1 - diff --git a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch deleted file mode 100644 index d717ae2..0000000 --- a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +++ /dev/null @@ -1,115 +0,0 @@ -From c477581ccc6962651d4d6c702a6c3e2fcc5e4205 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 2 Jan 2020 11:56:51 +0000 -Subject: [PATCH 2/2] kvm: Reallocate dirty_bmap when we change a slot - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200102115651.140177-1-dgilbert@redhat.com> -Patchwork-id: 93256 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] kvm: Reallocate dirty_bmap when we change a slot -Bugzilla: 1772774 -RH-Acked-by: Peter Xu -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Laszlo Ersek - -From: "Dr. David Alan Gilbert" - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=1772774 -brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25575691 -branch: rhel-av-8.2.0 - -kvm_set_phys_mem can be called to reallocate a slot by something the -guest does (e.g. writing to PAM and other chipset registers). -This can happen in the middle of a migration, and if we're unlucky -it can now happen between the split 'sync' and 'clear'; the clear -asserts if there's no bmap to clear. Recreate the bmap whenever -we change the slot, keeping the clear path happy. - -Typically this is triggered by the guest rebooting during a migrate. - -Corresponds to: -https://bugzilla.redhat.com/show_bug.cgi?id=1772774 -https://bugzilla.redhat.com/show_bug.cgi?id=1771032 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Peter Xu -(cherry picked from commit 9b3a31c745b61758aaa5466a3a9fc0526d409188) -Signed-off-by: Danilo C. L. de Paula ---- - accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- - 1 file changed, 29 insertions(+), 15 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index dc3ed7f..5007bda 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -518,6 +518,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, - - #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) - -+/* Allocate the dirty bitmap for a slot */ -+static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) -+{ -+ /* -+ * XXX bad kernel interface alert -+ * For dirty bitmap, kernel allocates array of size aligned to -+ * bits-per-long. But for case when the kernel is 64bits and -+ * the userspace is 32bits, userspace can't align to the same -+ * bits-per-long, since sizeof(long) is different between kernel -+ * and user space. This way, userspace will provide buffer which -+ * may be 4 bytes less than the kernel will use, resulting in -+ * userspace memory corruption (which is not detectable by valgrind -+ * too, in most cases). -+ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in -+ * a hope that sizeof(long) won't become >8 any time soon. -+ */ -+ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -+ /*HOST_LONG_BITS*/ 64) / 8; -+ mem->dirty_bmap = g_malloc0(bitmap_size); -+} -+ - /** - * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space - * -@@ -550,23 +571,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, - goto out; - } - -- /* XXX bad kernel interface alert -- * For dirty bitmap, kernel allocates array of size aligned to -- * bits-per-long. But for case when the kernel is 64bits and -- * the userspace is 32bits, userspace can't align to the same -- * bits-per-long, since sizeof(long) is different between kernel -- * and user space. This way, userspace will provide buffer which -- * may be 4 bytes less than the kernel will use, resulting in -- * userspace memory corruption (which is not detectable by valgrind -- * too, in most cases). -- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in -- * a hope that sizeof(long) won't become >8 any time soon. -- */ - if (!mem->dirty_bmap) { -- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), -- /*HOST_LONG_BITS*/ 64) / 8; - /* Allocate on the first log_sync, once and for all */ -- mem->dirty_bmap = g_malloc0(bitmap_size); -+ kvm_memslot_init_dirty_bitmap(mem); - } - - d.dirty_bitmap = mem->dirty_bmap; -@@ -1067,6 +1074,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - mem->ram = ram; - mem->flags = kvm_mem_flags(mr); - -+ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { -+ /* -+ * Reallocate the bmap; it means it doesn't disappear in -+ * middle of a migrate. -+ */ -+ kvm_memslot_init_dirty_bitmap(mem); -+ } - err = kvm_set_user_memory_region(kml, mem, true); - if (err) { - fprintf(stderr, "%s: error registering slot: %s\n", __func__, --- -1.8.3.1 - diff --git a/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch b/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch deleted file mode 100644 index 0c1c37f..0000000 --- a/kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 71b5267ed33f9e60bc98acbabcbed62f01a96ff4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 30 Mar 2020 11:19:23 +0100 -Subject: [PATCH 3/4] Revert "mirror: Don't let an operation wait for itself" - -RH-Author: Kevin Wolf -Message-id: <20200330111924.22938-2-kwolf@redhat.com> -Patchwork-id: 94464 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] Revert "mirror: Don't let an operation wait for itself" -Bugzilla: 1794692 -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -This reverts commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca. - -The fix was incomplete as it only protected against requests waiting for -themselves, but not against requests waiting for each other. We need a -different solution. - -Signed-off-by: Kevin Wolf -Message-Id: <20200326153628.4869-2-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 9178f4fe5f083064f5c91f04d98c815ce5a5af1c) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 21 +++++++++------------ - 1 file changed, 9 insertions(+), 12 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index cacbc70..8959e42 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -283,14 +283,11 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, - } - - static inline void coroutine_fn --mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) -+mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - { - MirrorOp *op; - - QTAILQ_FOREACH(op, &s->ops_in_flight, next) { -- if (self == op) { -- continue; -- } - /* Do not wait on pseudo ops, because it may in turn wait on - * some other operation to start, which may in fact be the - * caller of this function. Since there is only one pseudo op -@@ -305,10 +302,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) - } - - static inline void coroutine_fn --mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) -+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) - { - /* Only non-active operations use up in-flight slots */ -- mirror_wait_for_any_operation(s, self, false); -+ mirror_wait_for_any_operation(s, false); - } - - /* Perform a mirror copy operation. -@@ -351,7 +348,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - - while (s->buf_free_count < nb_chunks) { - trace_mirror_yield_in_flight(s, op->offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, op); -+ mirror_wait_for_free_in_flight_slot(s); - } - - /* Now make a QEMUIOVector taking enough granularity-sized chunks -@@ -558,7 +555,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) - - while (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield_in_flight(s, offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, pseudo_op); -+ mirror_wait_for_free_in_flight_slot(s); - } - - if (s->ret < 0) { -@@ -612,7 +609,7 @@ static void mirror_free_init(MirrorBlockJob *s) - static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) - { - while (s->in_flight > 0) { -- mirror_wait_for_free_in_flight_slot(s, NULL); -+ mirror_wait_for_free_in_flight_slot(s); - } - } - -@@ -797,7 +794,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) - if (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, - s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, NULL); -+ mirror_wait_for_free_in_flight_slot(s); - continue; - } - -@@ -950,7 +947,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - /* Do not start passive operations while there are active - * writes in progress */ - while (s->in_active_write_counter) { -- mirror_wait_for_any_operation(s, NULL, true); -+ mirror_wait_for_any_operation(s, true); - } - - if (s->ret < 0) { -@@ -976,7 +973,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || - (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s, NULL); -+ mirror_wait_for_free_in_flight_slot(s); - continue; - } else if (cnt != 0) { - delay_ns = mirror_iteration(s); --- -1.8.3.1 - diff --git a/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch b/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch deleted file mode 100644 index dc65c26..0000000 --- a/kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch +++ /dev/null @@ -1,63 +0,0 @@ -From ceb6d97674b8bc9a072db1be4167411bc0ee48d7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:02 +0100 -Subject: [PATCH 091/116] Virtiofsd: fix memory leak on fuse queueinfo -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-88-dgilbert@redhat.com> -Patchwork-id: 93542 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 087/112] Virtiofsd: fix memory leak on fuse queueinfo -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -For fuse's queueinfo, both queueinfo array and queueinfos are allocated in -fv_queue_set_started() but not cleaned up when the daemon process quits. - -This fixes the leak in proper places. - -Signed-off-by: Liu Bo -Signed-off-by: Eric Ren -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 740b0b700a6338a1cf60c26229651ac5f6724944) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index b7948de..fb8d6d1 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -625,6 +625,8 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) - } - close(ourqi->kill_fd); - ourqi->kick_fd = -1; -+ free(vud->qi[qidx]); -+ vud->qi[qidx] = NULL; - } - - /* Callback from libvhost-user on start or stop of a queue */ -@@ -884,6 +886,12 @@ int virtio_session_mount(struct fuse_session *se) - void virtio_session_close(struct fuse_session *se) - { - close(se->vu_socketfd); -+ -+ if (!se->virtio_dev) { -+ return; -+ } -+ -+ free(se->virtio_dev->qi); - free(se->virtio_dev); - se->virtio_dev = NULL; - } --- -1.8.3.1 - diff --git a/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch b/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch deleted file mode 100644 index becba21..0000000 --- a/kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 0d5a09173eb75b7e56122c2aefb2646a2be58400 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 31 Jan 2020 17:12:57 +0000 -Subject: [PATCH 15/15] apic: Use 32bit APIC ID for migration instance ID - -RH-Author: Peter Xu -Message-id: <20200131171257.1066593-4-peterx@redhat.com> -Patchwork-id: 93628 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] apic: Use 32bit APIC ID for migration instance ID -Bugzilla: 1529231 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -Migration is silently broken now with x2apic config like this: - - -smp 200,maxcpus=288,sockets=2,cores=72,threads=2 \ - -device intel-iommu,intremap=on,eim=on - -After migration, the guest kernel could hang at anything, due to -x2apic bit not migrated correctly in IA32_APIC_BASE on some vcpus, so -any operations related to x2apic could be broken then (e.g., RDMSR on -x2apic MSRs could fail because KVM would think that the vcpu hasn't -enabled x2apic at all). - -The issue is that the x2apic bit was never applied correctly for vcpus -whose ID > 255 when migrate completes, and that's because when we -migrate APIC we use the APICCommonState.id as instance ID of the -migration stream, while that's too short for x2apic. - -Let's use the newly introduced initial_apic_id for that. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: Eduardo Habkost -Signed-off-by: Juan Quintela -(cherry picked from commit 0ab994867c365db21e15f9503922c79234d8e40e) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/apic_common.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index 54b8731..b5dbeb6 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -268,7 +268,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - APICCommonState *s = APIC_COMMON(dev); - APICCommonClass *info; - static DeviceState *vapic; -- uint32_t instance_id = s->id; -+ uint32_t instance_id = s->initial_apic_id; -+ -+ /* Normally initial APIC ID should be no more than hundreds */ -+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); - - info = APIC_COMMON_GET_CLASS(s); - info->realize(dev, errp); --- -1.8.3.1 - diff --git a/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch b/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch deleted file mode 100644 index 7fb76c1..0000000 --- a/kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 619b3aac9790a7ca7c01846144395a318a9ab250 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:14 +0100 -Subject: [PATCH 3/6] backup: don't acquire aio_context in backup_clean - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-4-kwolf@redhat.com> -Patchwork-id: 94596 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] backup: don't acquire aio_context in backup_clean -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -From: Stefan Reiter - -All code-paths leading to backup_clean (via job_clean) have the job's -context already acquired. The job's context is guaranteed to be the same -as the one used by backup_top via backup_job_create. - -Since the previous logic effectively acquired the lock twice, this -broke cleanup of backups for disks using IO threads, since the BDRV_POLL_WHILE -in bdrv_backup_top_drop -> bdrv_do_drained_begin would only release the lock -once, thus deadlocking with the IO thread. - -This is a partial revert of 0abf2581717a19. - -Signed-off-by: Stefan Reiter -Reviewed-by: Max Reitz -Message-Id: <20200407115651.69472-4-s.reiter@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit eca0f3524a4eb57d03a56b0cbcef5527a0981ce4) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/backup.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/block/backup.c b/block/backup.c -index 1383e21..ec50946 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -135,11 +135,7 @@ static void backup_abort(Job *job) - static void backup_clean(Job *job) - { - BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); -- AioContext *aio_context = bdrv_get_aio_context(s->backup_top); -- -- aio_context_acquire(aio_context); - bdrv_backup_top_drop(s->backup_top); -- aio_context_release(aio_context); - } - - void backup_do_checkpoint(BlockJob *job, Error **errp) --- -1.8.3.1 - diff --git a/kvm-backup-top-Begin-drain-earlier.patch b/kvm-backup-top-Begin-drain-earlier.patch deleted file mode 100644 index ef289b7..0000000 --- a/kvm-backup-top-Begin-drain-earlier.patch +++ /dev/null @@ -1,56 +0,0 @@ -From bc78ee07bf400cbff0021367e05d308870471710 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:45 +0000 -Subject: [PATCH 12/18] backup-top: Begin drain earlier - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-6-slp@redhat.com> -Patchwork-id: 93757 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/9] backup-top: Begin drain earlier -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -From: Max Reitz - -When dropping backup-top, we need to drain the node before freeing the -BlockCopyState. Otherwise, requests may still be in flight and then the -assertion in shres_destroy() will fail. - -(This becomes visible in intermittent failure of 056.) - -Cc: qemu-stable@nongnu.org -Signed-off-by: Max Reitz -Message-id: 20191219182638.104621-1-mreitz@redhat.com -Signed-off-by: Max Reitz -(cherry picked from commit 503ca1262bab2c11c533a4816d1ff4297d4f58a6) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - block/backup-top.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block/backup-top.c b/block/backup-top.c -index 7cdb1f8..818d3f2 100644 ---- a/block/backup-top.c -+++ b/block/backup-top.c -@@ -257,12 +257,12 @@ void bdrv_backup_top_drop(BlockDriverState *bs) - BDRVBackupTopState *s = bs->opaque; - AioContext *aio_context = bdrv_get_aio_context(bs); - -- block_copy_state_free(s->bcs); -- - aio_context_acquire(aio_context); - - bdrv_drained_begin(bs); - -+ block_copy_state_free(s->bcs); -+ - s->active = false; - bdrv_child_refresh_perms(bs, bs->backing, &error_abort); - bdrv_replace_node(bs, backing_bs(bs), &error_abort); --- -1.8.3.1 - diff --git a/kvm-block-Activate-recursively-even-for-already-active-n.patch b/kvm-block-Activate-recursively-even-for-already-active-n.patch deleted file mode 100644 index d6cad06..0000000 --- a/kvm-block-Activate-recursively-even-for-already-active-n.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 0ef6691ce8964bb2bbd677756c4e594793ca3ad8 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:01 +0000 -Subject: [PATCH 04/18] block: Activate recursively even for already active - nodes - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-4-kwolf@redhat.com> -Patchwork-id: 93749 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] block: Activate recursively even for already active nodes -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -bdrv_invalidate_cache_all() assumes that all nodes in a given subtree -are either active or inactive when it starts. Therefore, as soon as it -arrives at an already active node, it stops. - -However, this assumption is wrong. For example, it's possible to take a -snapshot of an inactive node, which results in an active overlay over an -inactive backing file. The active overlay is probably also the root node -of an inactive BlockBackend (blk->disable_perm == true). - -In this case, bdrv_invalidate_cache_all() does not need to do anything -to activate the overlay node, but it still needs to recurse into the -children and the parents to make sure that after returning success, -really everything is activated. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Max Reitz -(cherry picked from commit 7bb4941ace471fc7dd6ded4749b95b9622baa6ed) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 50 ++++++++++++++++++++++++-------------------------- - 1 file changed, 24 insertions(+), 26 deletions(-) - -diff --git a/block.c b/block.c -index 473eb6e..2e5e8b6 100644 ---- a/block.c -+++ b/block.c -@@ -5335,10 +5335,6 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, - return; - } - -- if (!(bs->open_flags & BDRV_O_INACTIVE)) { -- return; -- } -- - QLIST_FOREACH(child, &bs->children, next) { - bdrv_co_invalidate_cache(child->bs, &local_err); - if (local_err) { -@@ -5360,34 +5356,36 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, - * just keep the extended permissions for the next time that an activation - * of the image is tried. - */ -- bs->open_flags &= ~BDRV_O_INACTIVE; -- bdrv_get_cumulative_perm(bs, &perm, &shared_perm); -- ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); -- if (ret < 0) { -- bs->open_flags |= BDRV_O_INACTIVE; -- error_propagate(errp, local_err); -- return; -- } -- bdrv_set_perm(bs, perm, shared_perm); -- -- if (bs->drv->bdrv_co_invalidate_cache) { -- bs->drv->bdrv_co_invalidate_cache(bs, &local_err); -- if (local_err) { -+ if (bs->open_flags & BDRV_O_INACTIVE) { -+ bs->open_flags &= ~BDRV_O_INACTIVE; -+ bdrv_get_cumulative_perm(bs, &perm, &shared_perm); -+ ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, &local_err); -+ if (ret < 0) { - bs->open_flags |= BDRV_O_INACTIVE; - error_propagate(errp, local_err); - return; - } -- } -+ bdrv_set_perm(bs, perm, shared_perm); - -- FOR_EACH_DIRTY_BITMAP(bs, bm) { -- bdrv_dirty_bitmap_skip_store(bm, false); -- } -+ if (bs->drv->bdrv_co_invalidate_cache) { -+ bs->drv->bdrv_co_invalidate_cache(bs, &local_err); -+ if (local_err) { -+ bs->open_flags |= BDRV_O_INACTIVE; -+ error_propagate(errp, local_err); -+ return; -+ } -+ } - -- ret = refresh_total_sectors(bs, bs->total_sectors); -- if (ret < 0) { -- bs->open_flags |= BDRV_O_INACTIVE; -- error_setg_errno(errp, -ret, "Could not refresh total sector count"); -- return; -+ FOR_EACH_DIRTY_BITMAP(bs, bm) { -+ bdrv_dirty_bitmap_skip_store(bm, false); -+ } -+ -+ ret = refresh_total_sectors(bs, bs->total_sectors); -+ if (ret < 0) { -+ bs->open_flags |= BDRV_O_INACTIVE; -+ error_setg_errno(errp, -ret, "Could not refresh total sector count"); -+ return; -+ } - } - - QLIST_FOREACH(parent, &bs->parents, next_parent) { --- -1.8.3.1 - diff --git a/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch b/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch deleted file mode 100644 index b16c0b7..0000000 --- a/kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch +++ /dev/null @@ -1,84 +0,0 @@ -From f17b37b58a57d849d2ff5fa04f149d9415803a39 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:17 +0100 -Subject: [PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-7-kwolf@redhat.com> -Patchwork-id: 94599 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] block: Fix blk->in_flight during blk_wait_while_drained() -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -Waiting in blk_wait_while_drained() while blk->in_flight is increased -for the current request is wrong because it will cause the drain -operation to deadlock. - -This patch makes sure that blk_wait_while_drained() is called with -blk->in_flight increased exactly once for the current request, and that -it temporarily decreases the counter while it waits. - -Fixes: cf3129323f900ef5ddbccbe86e4fa801e88c566e -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Max Reitz -Message-Id: <20200407121259.21350-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7f16476fab14fc32388e0ebae793f64673848efa) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 17 +++++------------ - 1 file changed, 5 insertions(+), 12 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 610dbfa..38ae413 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1140,10 +1140,15 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, - return 0; - } - -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ - static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) - { -+ assert(blk->in_flight > 0); -+ - if (blk->quiesce_counter && !blk->disable_request_queuing) { -+ blk_dec_in_flight(blk); - qemu_co_queue_wait(&blk->queued_requests, NULL); -+ blk_inc_in_flight(blk); - } - } - -@@ -1418,12 +1423,6 @@ static void blk_aio_read_entry(void *opaque) - BlkRwCo *rwco = &acb->rwco; - QEMUIOVector *qiov = rwco->iobuf; - -- if (rwco->blk->quiesce_counter) { -- blk_dec_in_flight(rwco->blk); -- blk_wait_while_drained(rwco->blk); -- blk_inc_in_flight(rwco->blk); -- } -- - assert(qiov->size == acb->bytes); - rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, - qiov, rwco->flags); -@@ -1436,12 +1435,6 @@ static void blk_aio_write_entry(void *opaque) - BlkRwCo *rwco = &acb->rwco; - QEMUIOVector *qiov = rwco->iobuf; - -- if (rwco->blk->quiesce_counter) { -- blk_dec_in_flight(rwco->blk); -- blk_wait_while_drained(rwco->blk); -- blk_inc_in_flight(rwco->blk); -- } -- - assert(!qiov || qiov->size == acb->bytes); - rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, - qiov, 0, rwco->flags); --- -1.8.3.1 - diff --git a/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch b/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch deleted file mode 100644 index 0bad890..0000000 --- a/kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 5774af5a3c713d0c93010c30453812eae6a749cd Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:37 +0000 -Subject: [PATCH 17/20] block: Fix cross-AioContext blockdev-snapshot - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-12-kwolf@redhat.com> -Patchwork-id: 94286 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 11/13] block: Fix cross-AioContext blockdev-snapshot -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -external_snapshot_prepare() tries to move the overlay to the AioContext -of the backing file (the snapshotted node). However, it's possible that -this doesn't work, but the backing file can instead be moved to the -overlay's AioContext (e.g. opening the backing chain for a mirror -target). - -bdrv_append() already indirectly uses bdrv_attach_node(), which takes -care to move nodes to make sure they use the same AioContext and which -tries both directions. - -So the problem has a simple fix: Just delete the unnecessary extra -bdrv_try_set_aio_context() call in external_snapshot_prepare() and -instead assert in bdrv_append() that both nodes were indeed moved to the -same AioContext. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-6-kwolf@redhat.com> -Tested-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit 30dd65f307b647eef8156c4a33bd007823ef85cb) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 1 + - blockdev.c | 16 ---------------- - 2 files changed, 1 insertion(+), 16 deletions(-) - -diff --git a/block.c b/block.c -index 354d388..ec29b1e 100644 ---- a/block.c -+++ b/block.c -@@ -4327,6 +4327,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, - bdrv_ref(from); - - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -+ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); - bdrv_drained_begin(from); - - /* Put all parents into @list and calculate their cumulative permissions */ -diff --git a/blockdev.c b/blockdev.c -index 7918533..c8d4b51 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1535,9 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; - AioContext *aio_context; -- AioContext *old_context; - uint64_t perm, shared; -- int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar - * purpose but a different set of parameters */ -@@ -1678,20 +1676,6 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -- /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -- old_context = bdrv_get_aio_context(state->new_bs); -- aio_context_release(aio_context); -- aio_context_acquire(old_context); -- -- ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); -- -- aio_context_release(old_context); -- aio_context_acquire(aio_context); -- -- if (ret < 0) { -- goto out; -- } -- - /* This removes our old bs and adds the new bs. This is an operation that - * can fail, so we need to do it in .prepare; undoing it for abort is - * always possible. */ --- -1.8.3.1 - diff --git a/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch b/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch deleted file mode 100644 index 1735dc0..0000000 --- a/kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 05452efd7e0fb0522099ae09a396f8f97e66014a Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:47 +0000 -Subject: [PATCH 06/20] block: Fix leak in bdrv_create_file_fallback() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-7-mlevitsk@redhat.com> -Patchwork-id: 94229 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] block: Fix leak in bdrv_create_file_fallback() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -@options is leaked by the first two return statements in this function. - -Note that blk_new_open() takes the reference to @options even on -failure, so all we need to do to fix the leak is to move the QDict -allocation down to where we actually need it. - -Reported-by: Coverity (CID 1419884) -Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd - ("block: Generic file creation fallback") -Signed-off-by: Max Reitz -Message-Id: <20200225155618.133412-1-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit eeea1faa099f82328f5831cf252f8ce0a59a9287) -Signed-off-by: Maxim Levitsky - -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/block.c b/block.c -index 3beec7f..e1a4e38 100644 ---- a/block.c -+++ b/block.c -@@ -600,7 +600,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, - QemuOpts *opts, Error **errp) - { - BlockBackend *blk; -- QDict *options = qdict_new(); -+ QDict *options; - int64_t size = 0; - char *buf = NULL; - PreallocMode prealloc; -@@ -623,6 +623,7 @@ static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, - return -ENOTSUP; - } - -+ options = qdict_new(); - qdict_put_str(options, "driver", drv->format_name); - - blk = blk_new_open(filename, NULL, options, --- -1.8.3.1 - diff --git a/kvm-block-Generic-file-creation-fallback.patch b/kvm-block-Generic-file-creation-fallback.patch deleted file mode 100644 index a5dd1d7..0000000 --- a/kvm-block-Generic-file-creation-fallback.patch +++ /dev/null @@ -1,227 +0,0 @@ -From 882d09226b7f45b72c5b7763c4c4aba182e0f8a1 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:43 +0000 -Subject: [PATCH 02/20] block: Generic file creation fallback - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-3-mlevitsk@redhat.com> -Patchwork-id: 94227 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] block: Generic file creation fallback -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -If a protocol driver does not support image creation, we can see whether -maybe the file exists already. If so, just truncating it will be -sufficient. - -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-3-mreitz@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit fd17146cd93d1704cd96d7c2757b325fc7aac6fd) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 147 insertions(+), 12 deletions(-) - -diff --git a/block.c b/block.c -index 2e5e8b6..3beec7f 100644 ---- a/block.c -+++ b/block.c -@@ -532,20 +532,139 @@ out: - return ret; - } - --int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) -+/** -+ * Helper function for bdrv_create_file_fallback(): Resize @blk to at -+ * least the given @minimum_size. -+ * -+ * On success, return @blk's actual length. -+ * Otherwise, return -errno. -+ */ -+static int64_t create_file_fallback_truncate(BlockBackend *blk, -+ int64_t minimum_size, Error **errp) - { -- BlockDriver *drv; -+ Error *local_err = NULL; -+ int64_t size; -+ int ret; -+ -+ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err); -+ if (ret < 0 && ret != -ENOTSUP) { -+ error_propagate(errp, local_err); -+ return ret; -+ } -+ -+ size = blk_getlength(blk); -+ if (size < 0) { -+ error_free(local_err); -+ error_setg_errno(errp, -size, -+ "Failed to inquire the new image file's length"); -+ return size; -+ } -+ -+ if (size < minimum_size) { -+ /* Need to grow the image, but we failed to do that */ -+ error_propagate(errp, local_err); -+ return -ENOTSUP; -+ } -+ -+ error_free(local_err); -+ local_err = NULL; -+ -+ return size; -+} -+ -+/** -+ * Helper function for bdrv_create_file_fallback(): Zero the first -+ * sector to remove any potentially pre-existing image header. -+ */ -+static int create_file_fallback_zero_first_sector(BlockBackend *blk, -+ int64_t current_size, -+ Error **errp) -+{ -+ int64_t bytes_to_clear; -+ int ret; -+ -+ bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE); -+ if (bytes_to_clear) { -+ ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, -+ "Failed to clear the new image's first sector"); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, -+ QemuOpts *opts, Error **errp) -+{ -+ BlockBackend *blk; -+ QDict *options = qdict_new(); -+ int64_t size = 0; -+ char *buf = NULL; -+ PreallocMode prealloc; - Error *local_err = NULL; - int ret; - -+ size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0); -+ buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); -+ prealloc = qapi_enum_parse(&PreallocMode_lookup, buf, -+ PREALLOC_MODE_OFF, &local_err); -+ g_free(buf); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return -EINVAL; -+ } -+ -+ if (prealloc != PREALLOC_MODE_OFF) { -+ error_setg(errp, "Unsupported preallocation mode '%s'", -+ PreallocMode_str(prealloc)); -+ return -ENOTSUP; -+ } -+ -+ qdict_put_str(options, "driver", drv->format_name); -+ -+ blk = blk_new_open(filename, NULL, options, -+ BDRV_O_RDWR | BDRV_O_RESIZE, errp); -+ if (!blk) { -+ error_prepend(errp, "Protocol driver '%s' does not support image " -+ "creation, and opening the image failed: ", -+ drv->format_name); -+ return -EINVAL; -+ } -+ -+ size = create_file_fallback_truncate(blk, size, errp); -+ if (size < 0) { -+ ret = size; -+ goto out; -+ } -+ -+ ret = create_file_fallback_zero_first_sector(blk, size, errp); -+ if (ret < 0) { -+ goto out; -+ } -+ -+ ret = 0; -+out: -+ blk_unref(blk); -+ return ret; -+} -+ -+int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) -+{ -+ BlockDriver *drv; -+ - drv = bdrv_find_protocol(filename, true, errp); - if (drv == NULL) { - return -ENOENT; - } - -- ret = bdrv_create(drv, filename, opts, &local_err); -- error_propagate(errp, local_err); -- return ret; -+ if (drv->bdrv_co_create_opts) { -+ return bdrv_create(drv, filename, opts, errp); -+ } else { -+ return bdrv_create_file_fallback(filename, drv, opts, errp); -+ } - } - - /** -@@ -1422,6 +1541,24 @@ QemuOptsList bdrv_runtime_opts = { - }, - }; - -+static QemuOptsList fallback_create_opts = { -+ .name = "fallback-create-opts", -+ .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), -+ .desc = { -+ { -+ .name = BLOCK_OPT_SIZE, -+ .type = QEMU_OPT_SIZE, -+ .help = "Virtual disk size" -+ }, -+ { -+ .name = BLOCK_OPT_PREALLOC, -+ .type = QEMU_OPT_STRING, -+ .help = "Preallocation mode (allowed values: off)" -+ }, -+ { /* end of list */ } -+ } -+}; -+ - /* - * Common part for opening disk images and files - * -@@ -5743,14 +5880,12 @@ void bdrv_img_create(const char *filename, const char *fmt, - return; - } - -- if (!proto_drv->create_opts) { -- error_setg(errp, "Protocol driver '%s' does not support image creation", -- proto_drv->format_name); -- return; -- } -- - create_opts = qemu_opts_append(create_opts, drv->create_opts); -- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -+ if (proto_drv->create_opts) { -+ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -+ } else { -+ create_opts = qemu_opts_append(create_opts, &fallback_create_opts); -+ } - - /* Create parameter list with default values */ - opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); --- -1.8.3.1 - diff --git a/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch b/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch deleted file mode 100644 index 463501a..0000000 --- a/kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch +++ /dev/null @@ -1,295 +0,0 @@ -From 52cc1d1cd2f695c5761d65baec961d14552a79ed Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:16 +0100 -Subject: [PATCH 5/6] block: Increase BB.in_flight for coroutine and sync - interfaces - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-6-kwolf@redhat.com> -Patchwork-id: 94600 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] block: Increase BB.in_flight for coroutine and sync interfaces -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -External callers of blk_co_*() and of the synchronous blk_*() functions -don't currently increase the BlockBackend.in_flight counter, but calls -from blk_aio_*() do, so there is an inconsistency whether the counter -has been increased or not. - -This patch moves the actual operations to static functions that can -later know they will always be called with in_flight increased exactly -once, even for external callers using the blk_co_*() coroutine -interfaces. - -If the public blk_co_*() interface is unused, remove it. - -Signed-off-by: Kevin Wolf -Message-Id: <20200407121259.21350-3-kwolf@redhat.com> -Reviewed-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit fbb92b6798894d3bf62fe3578d99fa62c720b242) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 103 ++++++++++++++++++++++++++++++++--------- - include/sysemu/block-backend.h | 1 - - 2 files changed, 80 insertions(+), 24 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 17b2e87..610dbfa 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1147,9 +1147,10 @@ static void coroutine_fn blk_wait_while_drained(BlockBackend *blk) - } - } - --int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, -- unsigned int bytes, QEMUIOVector *qiov, -- BdrvRequestFlags flags) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes, -+ QEMUIOVector *qiov, BdrvRequestFlags flags) - { - int ret; - BlockDriverState *bs; -@@ -1178,10 +1179,24 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, - return ret; - } - --int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, -- unsigned int bytes, -- QEMUIOVector *qiov, size_t qiov_offset, -- BdrvRequestFlags flags) -+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, -+ unsigned int bytes, QEMUIOVector *qiov, -+ BdrvRequestFlags flags) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_preadv(blk, offset, bytes, qiov, flags); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes, -+ QEMUIOVector *qiov, size_t qiov_offset, -+ BdrvRequestFlags flags) - { - int ret; - BlockDriverState *bs; -@@ -1214,6 +1229,20 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, - return ret; - } - -+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, -+ unsigned int bytes, -+ QEMUIOVector *qiov, size_t qiov_offset, -+ BdrvRequestFlags flags) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ - int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, - unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) -@@ -1234,7 +1263,7 @@ static void blk_read_entry(void *opaque) - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size, -+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size, - qiov, rwco->flags); - aio_wait_kick(); - } -@@ -1244,8 +1273,8 @@ static void blk_write_entry(void *opaque) - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size, -- qiov, rwco->flags); -+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size, -+ qiov, 0, rwco->flags); - aio_wait_kick(); - } - -@@ -1262,6 +1291,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, - .ret = NOT_DONE, - }; - -+ blk_inc_in_flight(blk); - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - co_entry(&rwco); -@@ -1270,6 +1300,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, - bdrv_coroutine_enter(blk_bs(blk), co); - BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); - } -+ blk_dec_in_flight(blk); - - return rwco.ret; - } -@@ -1394,7 +1425,7 @@ static void blk_aio_read_entry(void *opaque) - } - - assert(qiov->size == acb->bytes); -- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, -+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes, - qiov, rwco->flags); - blk_aio_complete(acb); - } -@@ -1412,8 +1443,8 @@ static void blk_aio_write_entry(void *opaque) - } - - assert(!qiov || qiov->size == acb->bytes); -- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, -- qiov, rwco->flags); -+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes, -+ qiov, 0, rwco->flags); - blk_aio_complete(acb); - } - -@@ -1498,7 +1529,9 @@ void blk_aio_cancel_async(BlockAIOCB *acb) - bdrv_aio_cancel_async(acb); - } - --int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf) - { - blk_wait_while_drained(blk); - -@@ -1514,8 +1547,7 @@ static void blk_ioctl_entry(void *opaque) - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, -- qiov->iov[0].iov_base); -+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base); - aio_wait_kick(); - } - -@@ -1529,7 +1561,7 @@ static void blk_aio_ioctl_entry(void *opaque) - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - -- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf); -+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf); - - blk_aio_complete(acb); - } -@@ -1540,7 +1572,9 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, - return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque); - } - --int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn -+blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes) - { - int ret; - -@@ -1559,7 +1593,7 @@ static void blk_aio_pdiscard_entry(void *opaque) - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); -+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes); - blk_aio_complete(acb); - } - -@@ -1571,12 +1605,23 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, - cb, opaque); - } - -+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_pdiscard(blk, offset, bytes); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ - static void blk_pdiscard_entry(void *opaque) - { - BlkRwCo *rwco = opaque; - QEMUIOVector *qiov = rwco->iobuf; - -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); -+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size); - aio_wait_kick(); - } - -@@ -1585,7 +1630,8 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) - return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); - } - --int blk_co_flush(BlockBackend *blk) -+/* To be called between exactly one pair of blk_inc/dec_in_flight() */ -+static int coroutine_fn blk_do_flush(BlockBackend *blk) - { - blk_wait_while_drained(blk); - -@@ -1601,7 +1647,7 @@ static void blk_aio_flush_entry(void *opaque) - BlkAioEmAIOCB *acb = opaque; - BlkRwCo *rwco = &acb->rwco; - -- rwco->ret = blk_co_flush(rwco->blk); -+ rwco->ret = blk_do_flush(rwco->blk); - blk_aio_complete(acb); - } - -@@ -1611,10 +1657,21 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, - return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); - } - -+int coroutine_fn blk_co_flush(BlockBackend *blk) -+{ -+ int ret; -+ -+ blk_inc_in_flight(blk); -+ ret = blk_do_flush(blk); -+ blk_dec_in_flight(blk); -+ -+ return ret; -+} -+ - static void blk_flush_entry(void *opaque) - { - BlkRwCo *rwco = opaque; -- rwco->ret = blk_co_flush(rwco->blk); -+ rwco->ret = blk_do_flush(rwco->blk); - aio_wait_kick(); - } - -diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h -index b198dec..9bbdbd6 100644 ---- a/include/sysemu/block-backend.h -+++ b/include/sysemu/block-backend.h -@@ -171,7 +171,6 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes, - BlockCompletionFunc *cb, void *opaque); - void blk_aio_cancel(BlockAIOCB *acb); - void blk_aio_cancel_async(BlockAIOCB *acb); --int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); - int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf); - BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, - BlockCompletionFunc *cb, void *opaque); --- -1.8.3.1 - diff --git a/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch b/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch deleted file mode 100644 index 72c8986..0000000 --- a/kvm-block-Introduce-bdrv_reopen_commit_post-step.patch +++ /dev/null @@ -1,65 +0,0 @@ -From f7dd953c2d0380cef3c351afb03d68c6fcda1dca Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:28 +0000 -Subject: [PATCH 08/20] block: Introduce 'bdrv_reopen_commit_post' step - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-3-kwolf@redhat.com> -Patchwork-id: 94278 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/13] block: Introduce 'bdrv_reopen_commit_post' step -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Peter Krempa - -Add another step in the reopen process where driver can execute code -after permission changes are comitted. - -Signed-off-by: Peter Krempa -Message-Id: -Signed-off-by: Kevin Wolf -(cherry picked from commit 17e1e2be5f9e84e0298e28e70675655b43e225ea) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 9 +++++++++ - include/block/block_int.h | 1 + - 2 files changed, 10 insertions(+) - -diff --git a/block.c b/block.c -index e1a4e38..a744bb5 100644 ---- a/block.c -+++ b/block.c -@@ -3657,6 +3657,15 @@ cleanup_perm: - } - } - } -+ -+ if (ret == 0) { -+ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { -+ BlockDriverState *bs = bs_entry->state.bs; -+ -+ if (bs->drv->bdrv_reopen_commit_post) -+ bs->drv->bdrv_reopen_commit_post(&bs_entry->state); -+ } -+ } - cleanup: - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - if (ret) { -diff --git a/include/block/block_int.h b/include/block/block_int.h -index dd033d0..c168690 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -123,6 +123,7 @@ struct BlockDriver { - int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, Error **errp); - void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); -+ void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); - void (*bdrv_join_options)(QDict *options, QDict *old_options); - --- -1.8.3.1 - diff --git a/kvm-block-Make-bdrv_get_cumulative_perm-public.patch b/kvm-block-Make-bdrv_get_cumulative_perm-public.patch deleted file mode 100644 index 2f0f999..0000000 --- a/kvm-block-Make-bdrv_get_cumulative_perm-public.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 294ab4c4963295556d12ac15150b48c8536175a7 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:33 +0000 -Subject: [PATCH 13/20] block: Make bdrv_get_cumulative_perm() public - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-8-kwolf@redhat.com> -Patchwork-id: 94287 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/13] block: Make bdrv_get_cumulative_perm() public -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-2-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit c7a0f2be8f95b220cdadbba9a9236eaf115951dc) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 6 ++---- - include/block/block_int.h | 3 +++ - 2 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/block.c b/block.c -index 39e4647..354d388 100644 ---- a/block.c -+++ b/block.c -@@ -1850,8 +1850,6 @@ static int bdrv_child_check_perm(BdrvChild *c, BlockReopenQueue *q, - bool *tighten_restrictions, Error **errp); - static void bdrv_child_abort_perm_update(BdrvChild *c); - static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared); --static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -- uint64_t *shared_perm); - - typedef struct BlockReopenQueueEntry { - bool prepared; -@@ -2075,8 +2073,8 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, - } - } - --static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -- uint64_t *shared_perm) -+void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -+ uint64_t *shared_perm) - { - BdrvChild *c; - uint64_t cumulative_perms = 0; -diff --git a/include/block/block_int.h b/include/block/block_int.h -index c168690..96e327b 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1228,6 +1228,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, - void *opaque, Error **errp); - void bdrv_root_unref_child(BdrvChild *child); - -+void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, -+ uint64_t *shared_perm); -+ - /** - * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use - * bdrv_child_refresh_perms() instead and make the parent's --- -1.8.3.1 - diff --git a/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch b/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch deleted file mode 100644 index de85205..0000000 --- a/kvm-block-Relax-restrictions-for-blockdev-snapshot.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 9ba321e18a357c1a3a238ceee301bbb174f96eee Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:34 +0000 -Subject: [PATCH 14/20] block: Relax restrictions for blockdev-snapshot - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-9-kwolf@redhat.com> -Patchwork-id: 94285 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/13] block: Relax restrictions for blockdev-snapshot -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -blockdev-snapshot returned an error if the overlay was already in use, -which it defined as having any BlockBackend parent. This is in fact both -too strict (some parents can tolerate the change of visible data caused -by attaching a backing file) and too loose (some non-BlockBackend -parents may not be happy with it). - -One important use case that is prevented by the too strict check is live -storage migration with blockdev-mirror. Here, the target node is -usually opened without a backing file so that the active layer is -mirrored while its backing chain can be copied in the background. - -The backing chain should be attached to the mirror target node when -finalising the job, just before switching the users of the source node -to the new copy (at which point the mirror job still has a reference to -the node). drive-mirror did this automatically, but with blockdev-mirror -this is the job of the QMP client, so it needs a way to do this. - -blockdev-snapshot is the obvious way, so this patch makes it work in -this scenario. The new condition is that no parent uses CONSISTENT_READ -permissions. This will ensure that the operation will still be blocked -when the node is attached to the guest device, so blockdev-snapshot -remains safe. - -(For the sake of completeness, x-blockdev-reopen can be used to achieve -the same, however it is a big hammer, performs the graph change -completely unchecked and is still experimental. So even with the option -of using x-blockdev-reopen, there are reasons why blockdev-snapshot -should be able to perform this operation.) - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-3-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Tested-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit d29d3d1f80b3947fb26e7139645c83de66d146a9) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 14 ++++++++------ - tests/qemu-iotests/085.out | 4 ++-- - 2 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 4cd9a58..7918533 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1536,6 +1536,7 @@ static void external_snapshot_prepare(BlkActionState *common, - TransactionAction *action = common->action; - AioContext *aio_context; - AioContext *old_context; -+ uint64_t perm, shared; - int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar -@@ -1656,16 +1657,17 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -- if (bdrv_has_blk(state->new_bs)) { -+ /* -+ * Allow attaching a backing file to an overlay that's already in use only -+ * if the parents don't assume that they are already seeing a valid image. -+ * (Specifically, allow it as a mirror target, which is write-only access.) -+ */ -+ bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); -+ if (perm & BLK_PERM_CONSISTENT_READ) { - error_setg(errp, "The overlay is already in use"); - goto out; - } - -- if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, -- errp)) { -- goto out; -- } -- - if (state->new_bs->backing != NULL) { - error_setg(errp, "The overlay already has a backing image"); - goto out; -diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out -index bb50227..487d920 100644 ---- a/tests/qemu-iotests/085.out -+++ b/tests/qemu-iotests/085.out -@@ -82,7 +82,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ - === Invalid command - cannot create a snapshot using a file BDS === - - { 'execute': 'blockdev-snapshot', 'arguments': { 'node':'virtio0', 'overlay':'file_12' } } --{"error": {"class": "GenericError", "desc": "The overlay does not support backing images"}} -+{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} - - === Invalid command - snapshot node used as active layer === - -@@ -96,7 +96,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ - === Invalid command - snapshot node used as backing hd === - - { 'execute': 'blockdev-snapshot', 'arguments': { 'node': 'virtio0', 'overlay':'snap_11' } } --{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}} -+{"error": {"class": "GenericError", "desc": "The overlay is already in use"}} - - === Invalid command - snapshot node has a backing image === - --- -1.8.3.1 - diff --git a/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch b/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch deleted file mode 100644 index 9d49cfa..0000000 --- a/kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 6cc456c4c1e6557fdc7e138e8ef8171b71609222 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:15 +0100 -Subject: [PATCH 4/6] block-backend: Reorder flush/pdiscard function - definitions - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-5-kwolf@redhat.com> -Patchwork-id: 94598 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] block-backend: Reorder flush/pdiscard function definitions -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -Move all variants of the flush/pdiscard functions to a single place and -put the blk_co_*() version first because it is called by all other -variants (and will become static in the next patch). - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Max Reitz -Message-Id: <20200407121259.21350-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 564806c529d4e0acad209b1e5b864a8886092f1f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/block-backend.c | 92 +++++++++++++++++++++++++-------------------------- - 1 file changed, 46 insertions(+), 46 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 8b8f2a8..17b2e87 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1488,38 +1488,6 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset, - blk_aio_write_entry, flags, cb, opaque); - } - --static void blk_aio_flush_entry(void *opaque) --{ -- BlkAioEmAIOCB *acb = opaque; -- BlkRwCo *rwco = &acb->rwco; -- -- rwco->ret = blk_co_flush(rwco->blk); -- blk_aio_complete(acb); --} -- --BlockAIOCB *blk_aio_flush(BlockBackend *blk, -- BlockCompletionFunc *cb, void *opaque) --{ -- return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); --} -- --static void blk_aio_pdiscard_entry(void *opaque) --{ -- BlkAioEmAIOCB *acb = opaque; -- BlkRwCo *rwco = &acb->rwco; -- -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); -- blk_aio_complete(acb); --} -- --BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, -- int64_t offset, int bytes, -- BlockCompletionFunc *cb, void *opaque) --{ -- return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, -- cb, opaque); --} -- - void blk_aio_cancel(BlockAIOCB *acb) - { - bdrv_aio_cancel(acb); -@@ -1586,6 +1554,37 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes) - return bdrv_co_pdiscard(blk->root, offset, bytes); - } - -+static void blk_aio_pdiscard_entry(void *opaque) -+{ -+ BlkAioEmAIOCB *acb = opaque; -+ BlkRwCo *rwco = &acb->rwco; -+ -+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes); -+ blk_aio_complete(acb); -+} -+ -+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, -+ int64_t offset, int bytes, -+ BlockCompletionFunc *cb, void *opaque) -+{ -+ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0, -+ cb, opaque); -+} -+ -+static void blk_pdiscard_entry(void *opaque) -+{ -+ BlkRwCo *rwco = opaque; -+ QEMUIOVector *qiov = rwco->iobuf; -+ -+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); -+ aio_wait_kick(); -+} -+ -+int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) -+{ -+ return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); -+} -+ - int blk_co_flush(BlockBackend *blk) - { - blk_wait_while_drained(blk); -@@ -1597,6 +1596,21 @@ int blk_co_flush(BlockBackend *blk) - return bdrv_co_flush(blk_bs(blk)); - } - -+static void blk_aio_flush_entry(void *opaque) -+{ -+ BlkAioEmAIOCB *acb = opaque; -+ BlkRwCo *rwco = &acb->rwco; -+ -+ rwco->ret = blk_co_flush(rwco->blk); -+ blk_aio_complete(acb); -+} -+ -+BlockAIOCB *blk_aio_flush(BlockBackend *blk, -+ BlockCompletionFunc *cb, void *opaque) -+{ -+ return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque); -+} -+ - static void blk_flush_entry(void *opaque) - { - BlkRwCo *rwco = opaque; -@@ -2083,20 +2097,6 @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact, - return bdrv_truncate(blk->root, offset, exact, prealloc, errp); - } - --static void blk_pdiscard_entry(void *opaque) --{ -- BlkRwCo *rwco = opaque; -- QEMUIOVector *qiov = rwco->iobuf; -- -- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size); -- aio_wait_kick(); --} -- --int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes) --{ -- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0); --} -- - int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, - int64_t pos, int size) - { --- -1.8.3.1 - diff --git a/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch b/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch deleted file mode 100644 index 45f506c..0000000 --- a/kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch +++ /dev/null @@ -1,130 +0,0 @@ -From aefff389c4d11bd69180db7177135c4645a9b1bd Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:46 +0000 -Subject: [PATCH 13/18] block/backup-top: Don't acquire context while dropping - top - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-7-slp@redhat.com> -Patchwork-id: 93759 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/9] block/backup-top: Don't acquire context while dropping top -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -All paths that lead to bdrv_backup_top_drop(), except for the call -from backup_clean(), imply that the BDS AioContext has already been -acquired, so doing it there too can potentially lead to QEMU hanging -on AIO_WAIT_WHILE(). - -An easy way to trigger this situation is by issuing a two actions -transaction, with a proper and a bogus blockdev-backup, so the second -one will trigger a rollback. This will trigger a hang with an stack -trace like this one: - - #0 0x00007fb680c75016 in __GI_ppoll (fds=0x55e74580f7c0, nfds=1, timeout=, - timeout@entry=0x0, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:39 - #1 0x000055e743386e09 in ppoll (__ss=0x0, __timeout=0x0, __nfds=, __fds=) - at /usr/include/bits/poll2.h:77 - #2 0x000055e743386e09 in qemu_poll_ns - (fds=, nfds=, timeout=) at util/qemu-timer.c:336 - #3 0x000055e743388dc4 in aio_poll (ctx=0x55e7458925d0, blocking=blocking@entry=true) - at util/aio-posix.c:669 - #4 0x000055e743305dea in bdrv_flush (bs=bs@entry=0x55e74593c0d0) at block/io.c:2878 - #5 0x000055e7432be58e in bdrv_close (bs=0x55e74593c0d0) at block.c:4017 - #6 0x000055e7432be58e in bdrv_delete (bs=) at block.c:4262 - #7 0x000055e7432be58e in bdrv_unref (bs=bs@entry=0x55e74593c0d0) at block.c:5644 - #8 0x000055e743316b9b in bdrv_backup_top_drop (bs=bs@entry=0x55e74593c0d0) at block/backup-top.c:273 - #9 0x000055e74331461f in backup_job_create - (job_id=0x0, bs=bs@entry=0x55e7458d5820, target=target@entry=0x55e74589f640, speed=0, sync_mode=MIRROR_SYNC_MODE_FULL, sync_bitmap=sync_bitmap@entry=0x0, bitmap_mode=BITMAP_SYNC_MODE_ON_SUCCESS, compress=false, filter_node_name=0x0, on_source_error=BLOCKDEV_ON_ERROR_REPORT, on_target_error=BLOCKDEV_ON_ERROR_REPORT, creation_flags=0, cb=0x0, opaque=0x0, txn=0x0, errp=0x7ffddfd1efb0) at block/backup.c:478 - #10 0x000055e74315bc52 in do_backup_common - (backup=backup@entry=0x55e746c066d0, bs=bs@entry=0x55e7458d5820, target_bs=target_bs@entry=0x55e74589f640, aio_context=aio_context@entry=0x55e7458a91e0, txn=txn@entry=0x0, errp=errp@entry=0x7ffddfd1efb0) - at blockdev.c:3580 - #11 0x000055e74315c37c in do_blockdev_backup - (backup=backup@entry=0x55e746c066d0, txn=0x0, errp=errp@entry=0x7ffddfd1efb0) - at /usr/src/debug/qemu-kvm-4.2.0-2.module+el8.2.0+5135+ed3b2489.x86_64/./qapi/qapi-types-block-core.h:1492 - #12 0x000055e74315c449 in blockdev_backup_prepare (common=0x55e746a8de90, errp=0x7ffddfd1f018) - at blockdev.c:1885 - #13 0x000055e743160152 in qmp_transaction - (dev_list=, has_props=, props=0x55e7467fe2c0, errp=errp@entry=0x7ffddfd1f088) at blockdev.c:2340 - #14 0x000055e743287ff5 in qmp_marshal_transaction - (args=, ret=, errp=0x7ffddfd1f0f8) - at qapi/qapi-commands-transaction.c:44 - #15 0x000055e74333de6c in do_qmp_dispatch - (errp=0x7ffddfd1f0f0, allow_oob=, request=, cmds=0x55e743c28d60 ) at qapi/qmp-dispatch.c:132 - #16 0x000055e74333de6c in qmp_dispatch - (cmds=0x55e743c28d60 , request=, allow_oob=) - at qapi/qmp-dispatch.c:175 - #17 0x000055e74325c061 in monitor_qmp_dispatch (mon=0x55e745908030, req=) - at monitor/qmp.c:145 - #18 0x000055e74325c6fa in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:234 - #19 0x000055e743385866 in aio_bh_call (bh=0x55e745807ae0) at util/async.c:117 - #20 0x000055e743385866 in aio_bh_poll (ctx=ctx@entry=0x55e7458067a0) at util/async.c:117 - #21 0x000055e743388c54 in aio_dispatch (ctx=0x55e7458067a0) at util/aio-posix.c:459 - #22 0x000055e743385742 in aio_ctx_dispatch - (source=, callback=, user_data=) at util/async.c:260 - #23 0x00007fb68543e67d in g_main_dispatch (context=0x55e745893a40) at gmain.c:3176 - #24 0x00007fb68543e67d in g_main_context_dispatch (context=context@entry=0x55e745893a40) at gmain.c:3829 - #25 0x000055e743387d08 in glib_pollfds_poll () at util/main-loop.c:219 - #26 0x000055e743387d08 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #27 0x000055e743387d08 in main_loop_wait (nonblocking=) at util/main-loop.c:518 - #28 0x000055e74316a3c1 in main_loop () at vl.c:1828 - #29 0x000055e743016a72 in main (argc=, argv=, envp=) - at vl.c:4504 - -Fix this by not acquiring the AioContext there, and ensuring all paths -leading to it have it already acquired (backup_clean()). - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782111 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 0abf2581717a19d9749d5c2ff8acd0ac203452c2) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - block/backup-top.c | 5 ----- - block/backup.c | 3 +++ - 2 files changed, 3 insertions(+), 5 deletions(-) - -diff --git a/block/backup-top.c b/block/backup-top.c -index 818d3f2..b8d863f 100644 ---- a/block/backup-top.c -+++ b/block/backup-top.c -@@ -255,9 +255,6 @@ append_failed: - void bdrv_backup_top_drop(BlockDriverState *bs) - { - BDRVBackupTopState *s = bs->opaque; -- AioContext *aio_context = bdrv_get_aio_context(bs); -- -- aio_context_acquire(aio_context); - - bdrv_drained_begin(bs); - -@@ -271,6 +268,4 @@ void bdrv_backup_top_drop(BlockDriverState *bs) - bdrv_drained_end(bs); - - bdrv_unref(bs); -- -- aio_context_release(aio_context); - } -diff --git a/block/backup.c b/block/backup.c -index cf62b1a..1383e21 100644 ---- a/block/backup.c -+++ b/block/backup.c -@@ -135,8 +135,11 @@ static void backup_abort(Job *job) - static void backup_clean(Job *job) - { - BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); -+ AioContext *aio_context = bdrv_get_aio_context(s->backup_top); - -+ aio_context_acquire(aio_context); - bdrv_backup_top_drop(s->backup_top); -+ aio_context_release(aio_context); - } - - void backup_do_checkpoint(BlockJob *job, Error **errp) --- -1.8.3.1 - diff --git a/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch b/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch deleted file mode 100644 index 745be9f..0000000 --- a/kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 1e0582ad34e77a060e2067a35992979c9eae82c9 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:31 +0000 -Subject: [PATCH 11/20] block: bdrv_reopen() with backing file in different - AioContext - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-6-kwolf@redhat.com> -Patchwork-id: 94282 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/13] block: bdrv_reopen() with backing file in different AioContext -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -This patch allows bdrv_reopen() (and therefore the x-blockdev-reopen QMP -command) to attach a node as the new backing file even if the node is in -a different AioContext than the parent if one of both nodes can be moved -to the AioContext of the other node. - -Signed-off-by: Kevin Wolf -Tested-by: Peter Krempa -Message-Id: <20200306141413.30705-3-kwolf@redhat.com> -Reviewed-by: Alberto Garcia -Signed-off-by: Kevin Wolf -(cherry picked from commit 1de6b45fb5c1489b450df7d1a4c692bba9678ce6) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 32 ++++++++++++++++++++++++++------ - tests/qemu-iotests/245 | 8 +++----- - 2 files changed, 29 insertions(+), 11 deletions(-) - -diff --git a/block.c b/block.c -index a744bb5..39e4647 100644 ---- a/block.c -+++ b/block.c -@@ -3749,6 +3749,29 @@ static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs, - *shared = cumulative_shared_perms; - } - -+static bool bdrv_reopen_can_attach(BlockDriverState *parent, -+ BdrvChild *child, -+ BlockDriverState *new_child, -+ Error **errp) -+{ -+ AioContext *parent_ctx = bdrv_get_aio_context(parent); -+ AioContext *child_ctx = bdrv_get_aio_context(new_child); -+ GSList *ignore; -+ bool ret; -+ -+ ignore = g_slist_prepend(NULL, child); -+ ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL); -+ g_slist_free(ignore); -+ if (ret) { -+ return ret; -+ } -+ -+ ignore = g_slist_prepend(NULL, child); -+ ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp); -+ g_slist_free(ignore); -+ return ret; -+} -+ - /* - * Take a BDRVReopenState and check if the value of 'backing' in the - * reopen_state->options QDict is valid or not. -@@ -3800,14 +3823,11 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state, - } - - /* -- * TODO: before removing the x- prefix from x-blockdev-reopen we -- * should move the new backing file into the right AioContext -- * instead of returning an error. -+ * Check AioContext compatibility so that the bdrv_set_backing_hd() call in -+ * bdrv_reopen_commit() won't fail. - */ - if (new_backing_bs) { -- if (bdrv_get_aio_context(new_backing_bs) != bdrv_get_aio_context(bs)) { -- error_setg(errp, "Cannot use a new backing file " -- "with a different AioContext"); -+ if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) { - return -EINVAL; - } - } -diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 -index f69c2fa..919131d 100644 ---- a/tests/qemu-iotests/245 -+++ b/tests/qemu-iotests/245 -@@ -1013,18 +1013,16 @@ class TestBlockdevReopen(iotests.QMPTestCase): - # neither of them can switch to the other AioContext - def test_iothreads_error(self): - self.run_test_iothreads('iothread0', 'iothread1', -- "Cannot use a new backing file with a different AioContext") -+ "Cannot change iothread of active block backend") - - def test_iothreads_compatible_users(self): - self.run_test_iothreads('iothread0', 'iothread0') - - def test_iothreads_switch_backing(self): -- self.run_test_iothreads('iothread0', None, -- "Cannot use a new backing file with a different AioContext") -+ self.run_test_iothreads('iothread0', None) - - def test_iothreads_switch_overlay(self): -- self.run_test_iothreads(None, 'iothread0', -- "Cannot use a new backing file with a different AioContext") -+ self.run_test_iothreads(None, 'iothread0') - - if __name__ == '__main__': - iotests.main(supported_fmts=["qcow2"], --- -1.8.3.1 - diff --git a/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch b/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch deleted file mode 100644 index 378ae1a..0000000 --- a/kvm-block-nbd-Fix-hang-in-.bdrv_close.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 4ef2c464a54b0b618d933641ac0a7012e629fed9 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:42 +0000 -Subject: [PATCH 01/20] block/nbd: Fix hang in .bdrv_close() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-2-mlevitsk@redhat.com> -Patchwork-id: 94224 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] block/nbd: Fix hang in .bdrv_close() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -When nbd_close() is called from a coroutine, the connection_co never -gets to run, and thus nbd_teardown_connection() hangs. - -This is because aio_co_enter() only puts the connection_co into the main -coroutine's wake-up queue, so this main coroutine needs to yield and -wait for connection_co to terminate. - -Suggested-by: Kevin Wolf -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-2-mreitz@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz -(cherry picked from commit 78c81a3f108870d325b0a39d88711366afe6f703) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/nbd.c | 14 +++++++++++++- - 1 file changed, 13 insertions(+), 1 deletion(-) - -diff --git a/block/nbd.c b/block/nbd.c -index 5f18f78..a73f0d9 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -70,6 +70,7 @@ typedef struct BDRVNBDState { - CoMutex send_mutex; - CoQueue free_sema; - Coroutine *connection_co; -+ Coroutine *teardown_co; - QemuCoSleepState *connection_co_sleep_ns_state; - bool drained; - bool wait_drained_end; -@@ -203,7 +204,15 @@ static void nbd_teardown_connection(BlockDriverState *bs) - qemu_co_sleep_wake(s->connection_co_sleep_ns_state); - } - } -- BDRV_POLL_WHILE(bs, s->connection_co); -+ if (qemu_in_coroutine()) { -+ s->teardown_co = qemu_coroutine_self(); -+ /* connection_co resumes us when it terminates */ -+ qemu_coroutine_yield(); -+ s->teardown_co = NULL; -+ } else { -+ BDRV_POLL_WHILE(bs, s->connection_co); -+ } -+ assert(!s->connection_co); - } - - static bool nbd_client_connecting(BDRVNBDState *s) -@@ -395,6 +404,9 @@ static coroutine_fn void nbd_connection_entry(void *opaque) - s->ioc = NULL; - } - -+ if (s->teardown_co) { -+ aio_co_wake(s->teardown_co); -+ } - aio_wait_kick(); - } - --- -1.8.3.1 - diff --git a/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch b/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch deleted file mode 100644 index 43f9ffc..0000000 --- a/kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch +++ /dev/null @@ -1,328 +0,0 @@ -From 25c528b30f8774f33e957d14060805398da524d9 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Thu, 26 Mar 2020 20:23:06 +0000 -Subject: [PATCH 1/4] block: pass BlockDriver reference to the .bdrv_co_create - -RH-Author: Maxim Levitsky -Message-id: <20200326202307.9264-2-mlevitsk@redhat.com> -Patchwork-id: 94447 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] block: pass BlockDriver reference to the .bdrv_co_create -Bugzilla: 1816007 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -This will allow the reuse of a single generic .bdrv_co_create -implementation for several drivers. -No functional changes. - -Signed-off-by: Maxim Levitsky -Message-Id: <20200326011218.29230-2-mlevitsk@redhat.com> -Reviewed-by: Denis V. Lunev -Signed-off-by: Max Reitz -(cherry picked from commit b92902dfeaafbceaf744ab7473f2d070284f6172) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 3 ++- - block/crypto.c | 3 ++- - block/file-posix.c | 4 +++- - block/file-win32.c | 4 +++- - block/gluster.c | 3 ++- - block/nfs.c | 4 +++- - block/parallels.c | 3 ++- - block/qcow.c | 3 ++- - block/qcow2.c | 4 +++- - block/qed.c | 3 ++- - block/raw-format.c | 4 +++- - block/rbd.c | 3 ++- - block/sheepdog.c | 4 +++- - block/ssh.c | 4 +++- - block/vdi.c | 4 +++- - block/vhdx.c | 3 ++- - block/vmdk.c | 4 +++- - block/vpc.c | 6 ++++-- - include/block/block_int.h | 3 ++- - 19 files changed, 49 insertions(+), 20 deletions(-) - -diff --git a/block.c b/block.c -index ec29b1e..f9a1c5b 100644 ---- a/block.c -+++ b/block.c -@@ -482,7 +482,8 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) - CreateCo *cco = opaque; - assert(cco->drv); - -- ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err); -+ ret = cco->drv->bdrv_co_create_opts(cco->drv, -+ cco->filename, cco->opts, &local_err); - error_propagate(&cco->err, local_err); - cco->ret = ret; - } -diff --git a/block/crypto.c b/block/crypto.c -index 2482383..970d463 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -539,7 +539,8 @@ fail: - return ret; - } - --static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, -+static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/file-posix.c b/block/file-posix.c -index fd29372..a2e0a74 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2346,7 +2346,9 @@ out: - return result; - } - --static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn raw_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions options; -diff --git a/block/file-win32.c b/block/file-win32.c -index 77e8ff7..1585983 100644 ---- a/block/file-win32.c -+++ b/block/file-win32.c -@@ -588,7 +588,9 @@ static int raw_co_create(BlockdevCreateOptions *options, Error **errp) - return 0; - } - --static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn raw_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions options; -diff --git a/block/gluster.c b/block/gluster.c -index 4fa4a77..0aa1f2c 100644 ---- a/block/gluster.c -+++ b/block/gluster.c -@@ -1130,7 +1130,8 @@ out: - return ret; - } - --static int coroutine_fn qemu_gluster_co_create_opts(const char *filename, -+static int coroutine_fn qemu_gluster_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/nfs.c b/block/nfs.c -index 9a6311e..cc2413d 100644 ---- a/block/nfs.c -+++ b/block/nfs.c -@@ -662,7 +662,9 @@ out: - return ret; - } - --static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts, -+static int coroutine_fn nfs_file_co_create_opts(BlockDriver *drv, -+ const char *url, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options; -diff --git a/block/parallels.c b/block/parallels.c -index 7a01997..6d4ed77 100644 ---- a/block/parallels.c -+++ b/block/parallels.c -@@ -609,7 +609,8 @@ exit: - goto out; - } - --static int coroutine_fn parallels_co_create_opts(const char *filename, -+static int coroutine_fn parallels_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/qcow.c b/block/qcow.c -index fce8989..8973e4e 100644 ---- a/block/qcow.c -+++ b/block/qcow.c -@@ -934,7 +934,8 @@ exit: - return ret; - } - --static int coroutine_fn qcow_co_create_opts(const char *filename, -+static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, Error **errp) - { - BlockdevCreateOptions *create_options = NULL; -diff --git a/block/qcow2.c b/block/qcow2.c -index 83b1fc0..71067c6 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -3558,7 +3558,9 @@ out: - return ret; - } - --static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options = NULL; -diff --git a/block/qed.c b/block/qed.c -index d8c4e5f..1af9b3c 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -720,7 +720,8 @@ out: - return ret; - } - --static int coroutine_fn bdrv_qed_co_create_opts(const char *filename, -+static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/raw-format.c b/block/raw-format.c -index 3a76ec7..93b25e1 100644 ---- a/block/raw-format.c -+++ b/block/raw-format.c -@@ -419,7 +419,9 @@ static int raw_has_zero_init_truncate(BlockDriverState *bs) - return bdrv_has_zero_init_truncate(bs->file->bs); - } - --static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn raw_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - return bdrv_create_file(filename, opts, errp); -diff --git a/block/rbd.c b/block/rbd.c -index 027cbcc..8847259 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -425,7 +425,8 @@ static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp) - return qemu_rbd_do_create(options, NULL, NULL, errp); - } - --static int coroutine_fn qemu_rbd_co_create_opts(const char *filename, -+static int coroutine_fn qemu_rbd_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/sheepdog.c b/block/sheepdog.c -index cfa8433..a8a7e32 100644 ---- a/block/sheepdog.c -+++ b/block/sheepdog.c -@@ -2157,7 +2157,9 @@ out: - return ret; - } - --static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn sd_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options = NULL; -diff --git a/block/ssh.c b/block/ssh.c -index b4375cf..84e9282 100644 ---- a/block/ssh.c -+++ b/block/ssh.c -@@ -963,7 +963,9 @@ fail: - return ret; - } - --static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn ssh_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - BlockdevCreateOptions *create_options; -diff --git a/block/vdi.c b/block/vdi.c -index 0142da7..e1a11f2 100644 ---- a/block/vdi.c -+++ b/block/vdi.c -@@ -896,7 +896,9 @@ static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, - return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); - } - --static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn vdi_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - QDict *qdict = NULL; -diff --git a/block/vhdx.c b/block/vhdx.c -index f02d261..33e57cd 100644 ---- a/block/vhdx.c -+++ b/block/vhdx.c -@@ -2046,7 +2046,8 @@ delete_and_exit: - return ret; - } - --static int coroutine_fn vhdx_co_create_opts(const char *filename, -+static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp) - { -diff --git a/block/vmdk.c b/block/vmdk.c -index 20e909d..eb726f2 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2588,7 +2588,9 @@ exit: - return blk; - } - --static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts, -+static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, - Error **errp) - { - Error *local_err = NULL; -diff --git a/block/vpc.c b/block/vpc.c -index a655502..6df75e2 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -1089,8 +1089,10 @@ out: - return ret; - } - --static int coroutine_fn vpc_co_create_opts(const char *filename, -- QemuOpts *opts, Error **errp) -+static int coroutine_fn vpc_co_create_opts(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, -+ Error **errp) - { - BlockdevCreateOptions *create_options = NULL; - QDict *qdict; -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 96e327b..7ff81be 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -136,7 +136,8 @@ struct BlockDriver { - void (*bdrv_close)(BlockDriverState *bs); - int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts, - Error **errp); -- int coroutine_fn (*bdrv_co_create_opts)(const char *filename, -+ int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv, -+ const char *filename, - QemuOpts *opts, - Error **errp); - int (*bdrv_make_empty)(BlockDriverState *bs); --- -1.8.3.1 - diff --git a/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch b/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch deleted file mode 100644 index 2c27fd2..0000000 --- a/kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch +++ /dev/null @@ -1,78 +0,0 @@ -From ec5408763c49cd0b63ee324bdc38a429ed1adeee Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:29 +0000 -Subject: [PATCH 09/20] block/qcow2: Move bitmap reopen into - bdrv_reopen_commit_post - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-4-kwolf@redhat.com> -Patchwork-id: 94280 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/13] block/qcow2: Move bitmap reopen into bdrv_reopen_commit_post -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Peter Krempa - -The bitmap code requires writing the 'file' child when the qcow2 driver -is reopened in read-write mode. - -If the 'file' child is being reopened due to a permissions change, the -modification is commited yet when qcow2_reopen_commit is called. This -means that any attempt to write the 'file' child will end with EBADFD -as the original fd was already closed. - -Moving bitmap reopening to the new callback which is called after -permission modifications are commited fixes this as the file descriptor -will be replaced with the correct one. - -The above problem manifests itself when reopening 'qcow2' format layer -which uses a 'file-posix' file child which was opened with the -'auto-read-only' property set. - -Signed-off-by: Peter Krempa -Message-Id: -Signed-off-by: Kevin Wolf -(cherry picked from commit 65eb7c85a3e62529e2bad782e94d5a7b11dd5a92) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/block/qcow2.c b/block/qcow2.c -index 7c18721..83b1fc0 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -1881,6 +1881,11 @@ fail: - static void qcow2_reopen_commit(BDRVReopenState *state) - { - qcow2_update_options_commit(state->bs, state->opaque); -+ g_free(state->opaque); -+} -+ -+static void qcow2_reopen_commit_post(BDRVReopenState *state) -+{ - if (state->flags & BDRV_O_RDWR) { - Error *local_err = NULL; - -@@ -1895,7 +1900,6 @@ static void qcow2_reopen_commit(BDRVReopenState *state) - bdrv_get_node_name(state->bs)); - } - } -- g_free(state->opaque); - } - - static void qcow2_reopen_abort(BDRVReopenState *state) -@@ -5492,6 +5496,7 @@ BlockDriver bdrv_qcow2 = { - .bdrv_close = qcow2_close, - .bdrv_reopen_prepare = qcow2_reopen_prepare, - .bdrv_reopen_commit = qcow2_reopen_commit, -+ .bdrv_reopen_commit_post = qcow2_reopen_commit_post, - .bdrv_reopen_abort = qcow2_reopen_abort, - .bdrv_join_options = qcow2_join_options, - .bdrv_child_perm = bdrv_format_default_perms, --- -1.8.3.1 - diff --git a/kvm-block-trickle-down-the-fallback-image-creation-funct.patch b/kvm-block-trickle-down-the-fallback-image-creation-funct.patch deleted file mode 100644 index 5ba1521..0000000 --- a/kvm-block-trickle-down-the-fallback-image-creation-funct.patch +++ /dev/null @@ -1,296 +0,0 @@ -From a1f7b929ae1fe6fa424c520c3a5eb497333b0fd9 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Thu, 26 Mar 2020 20:23:07 +0000 -Subject: [PATCH 2/4] block: trickle down the fallback image creation function - use to the block drivers - -RH-Author: Maxim Levitsky -Message-id: <20200326202307.9264-3-mlevitsk@redhat.com> -Patchwork-id: 94446 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] block: trickle down the fallback image creation function use to the block drivers -Bugzilla: 1816007 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -Instead of checking the .bdrv_co_create_opts to see if we need the -fallback, just implement the .bdrv_co_create_opts in the drivers that -need it. - -This way we don't break various places that need to know if the -underlying protocol/format really supports image creation, and this way -we still allow some drivers to not support image creation. - -Fixes: fd17146cd93d1704cd96d7c2757b325fc7aac6fd -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1816007 - -Note that technically this driver reverts the image creation fallback -for the vxhs driver since I don't have a means to test it, and IMHO it -is better to leave it not supported as it was prior to generic image -creation patches. - -Also drop iscsi_create_opts which was left accidentally. - -Signed-off-by: Maxim Levitsky -Message-Id: <20200326011218.29230-3-mlevitsk@redhat.com> -Reviewed-by: Denis V. Lunev -[mreitz: Fixed alignment, and moved bdrv_co_create_opts_simple() and - bdrv_create_opts_simple from block.h into block_int.h] -Signed-off-by: Max Reitz -(cherry picked from commit 5a5e7f8cd86b7ced0732b1b6e28c82baa65b09c9) - -Contextual conflicts in block.c and include/block/block_int.h - -(conflict in block.c by default shows as functional but -with --diff-algorithm=patience it becomes a contextual conflict) - -... -001/2:[----] [--] 'block: pass BlockDriver reference to the .bdrv_co_create' -002/2:[0014] [FC] 'block: trickle down the fallback image creation function use to the block drivers' -... -002/2: 'meld <(git show 5a5e7f8^\!) <(git show 6d3bca5^\!)' - -So now running: -meld <(git show 5a5e7f8^\! --diff-algorithm=patience) <(git show 6d3bca5^\! --diff-algorithm=patience) - -shows no contextual conflicts -It is mostly due to missing commit f6dc1c31d3801dcbdf0c56574f9ff4f05180810c -Thanks to Max Reitz for helping me with this. - -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block.c | 35 ++++++++++++++++++++--------------- - block/file-posix.c | 7 ++++++- - block/iscsi.c | 16 ++++------------ - block/nbd.c | 6 ++++++ - block/nvme.c | 3 +++ - include/block/block.h | 1 + - include/block/block_int.h | 11 +++++++++++ - 7 files changed, 51 insertions(+), 28 deletions(-) - -diff --git a/block.c b/block.c -index f9a1c5b..ba3b40d7 100644 ---- a/block.c -+++ b/block.c -@@ -597,8 +597,15 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk, - return 0; - } - --static int bdrv_create_file_fallback(const char *filename, BlockDriver *drv, -- QemuOpts *opts, Error **errp) -+/** -+ * Simple implementation of bdrv_co_create_opts for protocol drivers -+ * which only support creation via opening a file -+ * (usually existing raw storage device) -+ */ -+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, -+ Error **errp) - { - BlockBackend *blk; - QDict *options; -@@ -662,11 +669,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) - return -ENOENT; - } - -- if (drv->bdrv_co_create_opts) { -- return bdrv_create(drv, filename, opts, errp); -- } else { -- return bdrv_create_file_fallback(filename, drv, opts, errp); -- } -+ return bdrv_create(drv, filename, opts, errp); - } - - /** -@@ -1543,9 +1546,9 @@ QemuOptsList bdrv_runtime_opts = { - }, - }; - --static QemuOptsList fallback_create_opts = { -- .name = "fallback-create-opts", -- .head = QTAILQ_HEAD_INITIALIZER(fallback_create_opts.head), -+QemuOptsList bdrv_create_opts_simple = { -+ .name = "simple-create-opts", -+ .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head), - .desc = { - { - .name = BLOCK_OPT_SIZE, -@@ -5910,13 +5913,15 @@ void bdrv_img_create(const char *filename, const char *fmt, - return; - } - -- create_opts = qemu_opts_append(create_opts, drv->create_opts); -- if (proto_drv->create_opts) { -- create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -- } else { -- create_opts = qemu_opts_append(create_opts, &fallback_create_opts); -+ if (!proto_drv->create_opts) { -+ error_setg(errp, "Protocol driver '%s' does not support image creation", -+ proto_drv->format_name); -+ return; - } - -+ create_opts = qemu_opts_append(create_opts, drv->create_opts); -+ create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); -+ - /* Create parameter list with default values */ - opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); - qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort); -diff --git a/block/file-posix.c b/block/file-posix.c -index a2e0a74..dd18d40 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3432,6 +3432,8 @@ static BlockDriver bdrv_host_device = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, -@@ -3558,10 +3560,11 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - -- - .bdrv_co_preadv = raw_co_preadv, - .bdrv_co_pwritev = raw_co_pwritev, - .bdrv_co_flush_to_disk = raw_co_flush_to_disk, -@@ -3690,6 +3693,8 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .mutable_opts = mutable_opts, - - .bdrv_co_preadv = raw_co_preadv, -diff --git a/block/iscsi.c b/block/iscsi.c -index b45da65..16b0716 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -2399,18 +2399,6 @@ out_unlock: - return r; - } - --static QemuOptsList iscsi_create_opts = { -- .name = "iscsi-create-opts", -- .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head), -- .desc = { -- { -- .name = BLOCK_OPT_SIZE, -- .type = QEMU_OPT_SIZE, -- .help = "Virtual disk size" -- }, -- { /* end of list */ } -- } --}; - - static const char *const iscsi_strong_runtime_opts[] = { - "transport", -@@ -2434,6 +2422,8 @@ static BlockDriver bdrv_iscsi = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, -@@ -2471,6 +2461,8 @@ static BlockDriver bdrv_iser = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, -diff --git a/block/nbd.c b/block/nbd.c -index a73f0d9..927915d 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -2030,6 +2030,8 @@ static BlockDriver bdrv_nbd = { - .protocol_name = "nbd", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_file_open = nbd_open, - .bdrv_reopen_prepare = nbd_client_reopen_prepare, - .bdrv_co_preadv = nbd_client_co_preadv, -@@ -2055,6 +2057,8 @@ static BlockDriver bdrv_nbd_tcp = { - .protocol_name = "nbd+tcp", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_file_open = nbd_open, - .bdrv_reopen_prepare = nbd_client_reopen_prepare, - .bdrv_co_preadv = nbd_client_co_preadv, -@@ -2080,6 +2084,8 @@ static BlockDriver bdrv_nbd_unix = { - .protocol_name = "nbd+unix", - .instance_size = sizeof(BDRVNBDState), - .bdrv_parse_filename = nbd_parse_filename, -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, - .bdrv_file_open = nbd_open, - .bdrv_reopen_prepare = nbd_client_reopen_prepare, - .bdrv_co_preadv = nbd_client_co_preadv, -diff --git a/block/nvme.c b/block/nvme.c -index d41c4bd..7b7c0cc 100644 ---- a/block/nvme.c -+++ b/block/nvme.c -@@ -1333,6 +1333,9 @@ static BlockDriver bdrv_nvme = { - .protocol_name = "nvme", - .instance_size = sizeof(BDRVNVMeState), - -+ .bdrv_co_create_opts = bdrv_co_create_opts_simple, -+ .create_opts = &bdrv_create_opts_simple, -+ - .bdrv_parse_filename = nvme_parse_filename, - .bdrv_file_open = nvme_file_open, - .bdrv_close = nvme_close, -diff --git a/include/block/block.h b/include/block/block.h -index 1df9848..92685d2 100644 ---- a/include/block/block.h -+++ b/include/block/block.h -@@ -293,6 +293,7 @@ BlockDriver *bdrv_find_format(const char *format_name); - int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp); - int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); -+ - BlockDriverState *bdrv_new(void); - void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp); -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 7ff81be..529f153 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -1325,4 +1325,15 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset, - - int refresh_total_sectors(BlockDriverState *bs, int64_t hint); - -+/** -+ * Simple implementation of bdrv_co_create_opts for protocol drivers -+ * which only support creation via opening a file -+ * (usually existing raw storage device) -+ */ -+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, -+ const char *filename, -+ QemuOpts *opts, -+ Error **errp); -+extern QemuOptsList bdrv_create_opts_simple; -+ - #endif /* BLOCK_INT_H */ --- -1.8.3.1 - diff --git a/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch b/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch deleted file mode 100644 index 9a69130..0000000 --- a/kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch +++ /dev/null @@ -1,176 +0,0 @@ -From dc2654f2319ad6c379e0ba10be143726c6f0e9e0 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:47 +0000 -Subject: [PATCH 14/18] blockdev: Acquire AioContext on dirty bitmap functions - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-8-slp@redhat.com> -Patchwork-id: 93760 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 7/9] blockdev: Acquire AioContext on dirty bitmap functions -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Dirty map addition and removal functions are not acquiring to BDS -AioContext, while they may call to code that expects it to be -acquired. - -This may trigger a crash with a stack trace like this one: - - #0 0x00007f0ef146370f in __GI_raise (sig=sig@entry=6) - at ../sysdeps/unix/sysv/linux/raise.c:50 - #1 0x00007f0ef144db25 in __GI_abort () at abort.c:79 - #2 0x0000565022294dce in error_exit - (err=, msg=msg@entry=0x56502243a730 <__func__.16350> "qemu_mutex_unlock_impl") at util/qemu-thread-posix.c:36 - #3 0x00005650222950ba in qemu_mutex_unlock_impl - (mutex=mutex@entry=0x5650244b0240, file=file@entry=0x565022439adf "util/async.c", line=line@entry=526) at util/qemu-thread-posix.c:108 - #4 0x0000565022290029 in aio_context_release - (ctx=ctx@entry=0x5650244b01e0) at util/async.c:526 - #5 0x000056502221cd08 in bdrv_can_store_new_dirty_bitmap - (bs=bs@entry=0x5650244dc820, name=name@entry=0x56502481d360 "bitmap1", granularity=granularity@entry=65536, errp=errp@entry=0x7fff22831718) - at block/dirty-bitmap.c:542 - #6 0x000056502206ae53 in qmp_block_dirty_bitmap_add - (errp=0x7fff22831718, disabled=false, has_disabled=, persistent=, has_persistent=true, granularity=65536, has_granularity=, name=0x56502481d360 "bitmap1", node=) at blockdev.c:2894 - #7 0x000056502206ae53 in qmp_block_dirty_bitmap_add - (node=, name=0x56502481d360 "bitmap1", has_granularity=, granularity=, has_persistent=true, persistent=, has_disabled=false, disabled=false, errp=0x7fff22831718) at blockdev.c:2856 - #8 0x00005650221847a3 in qmp_marshal_block_dirty_bitmap_add - (args=, ret=, errp=0x7fff22831798) - at qapi/qapi-commands-block-core.c:651 - #9 0x0000565022247e6c in do_qmp_dispatch - (errp=0x7fff22831790, allow_oob=, request=, cmds=0x565022b32d60 ) at qapi/qmp-dispatch.c:132 - #10 0x0000565022247e6c in qmp_dispatch - (cmds=0x565022b32d60 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 - #11 0x0000565022166061 in monitor_qmp_dispatch - (mon=0x56502450faa0, req=) at monitor/qmp.c:145 - #12 0x00005650221666fa in monitor_qmp_bh_dispatcher - (data=) at monitor/qmp.c:234 - #13 0x000056502228f866 in aio_bh_call (bh=0x56502440eae0) - at util/async.c:117 - #14 0x000056502228f866 in aio_bh_poll (ctx=ctx@entry=0x56502440d7a0) - at util/async.c:117 - #15 0x0000565022292c54 in aio_dispatch (ctx=0x56502440d7a0) - at util/aio-posix.c:459 - #16 0x000056502228f742 in aio_ctx_dispatch - (source=, callback=, user_data=) at util/async.c:260 - #17 0x00007f0ef5ce667d in g_main_dispatch (context=0x56502449aa40) - at gmain.c:3176 - #18 0x00007f0ef5ce667d in g_main_context_dispatch - (context=context@entry=0x56502449aa40) at gmain.c:3829 - #19 0x0000565022291d08 in glib_pollfds_poll () at util/main-loop.c:219 - #20 0x0000565022291d08 in os_host_main_loop_wait - (timeout=) at util/main-loop.c:242 - #21 0x0000565022291d08 in main_loop_wait (nonblocking=) - at util/main-loop.c:518 - #22 0x00005650220743c1 in main_loop () at vl.c:1828 - #23 0x0000565021f20a72 in main - (argc=, argv=, envp=) - at vl.c:4504 - -Fix this by acquiring the AioContext at qmp_block_dirty_bitmap_add() -and qmp_block_dirty_bitmap_add(). - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1782175 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 91005a495e228ebd7e5e173cd18f952450eef82d) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 22 ++++++++++++++++++---- - 1 file changed, 18 insertions(+), 4 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 1dacbc2..d4ef6cd 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2984,6 +2984,7 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -+ AioContext *aio_context; - - if (!name || name[0] == '\0') { - error_setg(errp, "Bitmap name cannot be empty"); -@@ -2995,11 +2996,14 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - return; - } - -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); -+ - if (has_granularity) { - if (granularity < 512 || !is_power_of_2(granularity)) { - error_setg(errp, "Granularity must be power of 2 " - "and at least 512"); -- return; -+ goto out; - } - } else { - /* Default to cluster size, if available: */ -@@ -3017,12 +3021,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - if (persistent && - !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) - { -- return; -+ goto out; - } - - bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); - if (bitmap == NULL) { -- return; -+ goto out; - } - - if (disabled) { -@@ -3030,6 +3034,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, - } - - bdrv_dirty_bitmap_set_persistence(bitmap, persistent); -+ -+out: -+ aio_context_release(aio_context); - } - - static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( -@@ -3038,21 +3045,27 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( - { - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; -+ AioContext *aio_context; - - bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp); - if (!bitmap || !bs) { - return NULL; - } - -+ aio_context = bdrv_get_aio_context(bs); -+ aio_context_acquire(aio_context); -+ - if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, - errp)) { -+ aio_context_release(aio_context); - return NULL; - } - - if (bdrv_dirty_bitmap_get_persistence(bitmap) && - bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) - { -- return NULL; -+ aio_context_release(aio_context); -+ return NULL; - } - - if (release) { -@@ -3063,6 +3076,7 @@ static BdrvDirtyBitmap *do_block_dirty_bitmap_remove( - *bitmap_bs = bs; - } - -+ aio_context_release(aio_context); - return release ? NULL : bitmap; - } - --- -1.8.3.1 - diff --git a/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch b/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch deleted file mode 100644 index b2dd453..0000000 --- a/kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 24e5eca4218b294bd013e2d85a38345045506bec Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:48 +0000 -Subject: [PATCH 15/18] blockdev: Return bs to the proper context on snapshot - abort - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-9-slp@redhat.com> -Patchwork-id: 93761 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 8/9] blockdev: Return bs to the proper context on snapshot abort -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -external_snapshot_abort() calls to bdrv_set_backing_hd(), which -returns state->old_bs to the main AioContext, as it's intended to be -used then the BDS is going to be released. As that's not the case when -aborting an external snapshot, return it to the AioContext it was -before the call. - -This issue can be triggered by issuing a transaction with two actions, -a proper blockdev-snapshot-sync and a bogus one, so the second will -trigger a transaction abort. This results in a crash with an stack -trace like this one: - - #0 0x00007fa1048b28df in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 - #1 0x00007fa10489ccf5 in __GI_abort () at abort.c:79 - #2 0x00007fa10489cbc9 in __assert_fail_base - (fmt=0x7fa104a03300 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=0x557224014d30 "block.c", line=2240, function=) at assert.c:92 - #3 0x00007fa1048aae96 in __GI___assert_fail - (assertion=assertion@entry=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=file@entry=0x557224014d30 "block.c", line=line@entry=2240, function=function@entry=0x5572240b5d60 <__PRETTY_FUNCTION__.31620> "bdrv_replace_child_noperm") at assert.c:101 - #4 0x0000557223e631f8 in bdrv_replace_child_noperm (child=0x557225b9c980, new_bs=new_bs@entry=0x557225c42e40) at block.c:2240 - #5 0x0000557223e68be7 in bdrv_replace_node (from=0x557226951a60, to=0x557225c42e40, errp=0x5572247d6138 ) at block.c:4196 - #6 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1731 - #7 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1717 - #8 0x0000557223d09013 in qmp_transaction (dev_list=, has_props=, props=0x557225cc7d70, errp=errp@entry=0x7ffe704c0c98) at blockdev.c:2360 - #9 0x0000557223e32085 in qmp_marshal_transaction (args=, ret=, errp=0x7ffe704c0d08) at qapi/qapi-commands-transaction.c:44 - #10 0x0000557223ee798c in do_qmp_dispatch (errp=0x7ffe704c0d00, allow_oob=, request=, cmds=0x5572247d3cc0 ) at qapi/qmp-dispatch.c:132 - #11 0x0000557223ee798c in qmp_dispatch (cmds=0x5572247d3cc0 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 - #12 0x0000557223e06141 in monitor_qmp_dispatch (mon=0x557225c69ff0, req=) at monitor/qmp.c:120 - #13 0x0000557223e0678a in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:209 - #14 0x0000557223f2f366 in aio_bh_call (bh=0x557225b9dc60) at util/async.c:117 - #15 0x0000557223f2f366 in aio_bh_poll (ctx=ctx@entry=0x557225b9c840) at util/async.c:117 - #16 0x0000557223f32754 in aio_dispatch (ctx=0x557225b9c840) at util/aio-posix.c:459 - #17 0x0000557223f2f242 in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 - #18 0x00007fa10913467d in g_main_dispatch (context=0x557225c28e80) at gmain.c:3176 - #19 0x00007fa10913467d in g_main_context_dispatch (context=context@entry=0x557225c28e80) at gmain.c:3829 - #20 0x0000557223f31808 in glib_pollfds_poll () at util/main-loop.c:219 - #21 0x0000557223f31808 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #22 0x0000557223f31808 in main_loop_wait (nonblocking=) at util/main-loop.c:518 - #23 0x0000557223d13201 in main_loop () at vl.c:1828 - #24 0x0000557223bbfb82 in main (argc=, argv=, envp=) at vl.c:4504 - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1779036 -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 377410f6fb4f6b0d26d4a028c20766fae05de17e) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/blockdev.c b/blockdev.c -index d4ef6cd..4cd9a58 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1731,6 +1731,8 @@ static void external_snapshot_abort(BlkActionState *common) - if (state->new_bs) { - if (state->overlay_appended) { - AioContext *aio_context; -+ AioContext *tmp_context; -+ int ret; - - aio_context = bdrv_get_aio_context(state->old_bs); - aio_context_acquire(aio_context); -@@ -1738,6 +1740,25 @@ static void external_snapshot_abort(BlkActionState *common) - bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() - close state->old_bs; we need it */ - bdrv_set_backing_hd(state->new_bs, NULL, &error_abort); -+ -+ /* -+ * The call to bdrv_set_backing_hd() above returns state->old_bs to -+ * the main AioContext. As we're still going to be using it, return -+ * it to the AioContext it was before. -+ */ -+ tmp_context = bdrv_get_aio_context(state->old_bs); -+ if (aio_context != tmp_context) { -+ aio_context_release(aio_context); -+ aio_context_acquire(tmp_context); -+ -+ ret = bdrv_try_set_aio_context(state->old_bs, -+ aio_context, NULL); -+ assert(ret == 0); -+ -+ aio_context_release(tmp_context); -+ aio_context_acquire(aio_context); -+ } -+ - bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); - bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ - --- -1.8.3.1 - diff --git a/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch b/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch deleted file mode 100644 index 399a06a..0000000 --- a/kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch +++ /dev/null @@ -1,62 +0,0 @@ -From d56b53cd75c4146eae7a06d1cc30ab823a9bde93 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:41 +0000 -Subject: [PATCH 08/18] blockdev: fix coding style issues in - drive_backup_prepare -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-2-slp@redhat.com> -Patchwork-id: 93754 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/9] blockdev: fix coding style issues in drive_backup_prepare -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Fix a couple of minor coding style issues in drive_backup_prepare. - -Signed-off-by: Sergio Lopez -Reviewed-by: Max Reitz -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 471ded690e19689018535e3f48480507ed073e22) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 8e029e9..553e315 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3620,7 +3620,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - - if (!backup->has_format) { - backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -- NULL : (char*) bs->drv->format_name; -+ NULL : (char *) bs->drv->format_name; - } - - /* Early check to avoid creating target */ -@@ -3630,8 +3630,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, - - flags = bs->open_flags | BDRV_O_RDWR; - -- /* See if we have a backing HD we can use to create our new image -- * on top of. */ -+ /* -+ * See if we have a backing HD we can use to create our new image -+ * on top of. -+ */ - if (backup->sync == MIRROR_SYNC_MODE_TOP) { - source = backing_bs(bs); - if (!source) { --- -1.8.3.1 - diff --git a/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch b/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch deleted file mode 100644 index a94ee75..0000000 --- a/kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch +++ /dev/null @@ -1,204 +0,0 @@ -From da4ee4c0d56200042cb86f8ccd2777009bd82df3 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:44 +0000 -Subject: [PATCH 11/18] blockdev: honor bdrv_try_set_aio_context() context - requirements - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-5-slp@redhat.com> -Patchwork-id: 93758 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/9] blockdev: honor bdrv_try_set_aio_context() context requirements -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -bdrv_try_set_aio_context() requires that the old context is held, and -the new context is not held. Fix all the occurrences where it's not -done this way. - -Suggested-by: Max Reitz -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 3ea67e08832775a28d0bd2795f01bc77e7ea1512) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- - 1 file changed, 60 insertions(+), 8 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 152a0f7..1dacbc2 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, - DO_UPCAST(ExternalSnapshotState, common, common); - TransactionAction *action = common->action; - AioContext *aio_context; -+ AioContext *old_context; - int ret; - - /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar -@@ -1675,7 +1676,16 @@ static void external_snapshot_prepare(BlkActionState *common, - goto out; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(state->new_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ - ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (ret < 0) { - goto out; - } -@@ -1775,11 +1785,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) - BlockDriverState *target_bs; - BlockDriverState *source = NULL; - AioContext *aio_context; -+ AioContext *old_context; - QDict *options; - Error *local_err = NULL; - int flags; - int64_t size; - bool set_backing_hd = false; -+ int ret; - - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; -@@ -1868,6 +1880,21 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) - goto out; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ -+ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ if (ret < 0) { -+ bdrv_unref(target_bs); -+ aio_context_release(old_context); -+ return; -+ } -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (set_backing_hd) { - bdrv_set_backing_hd(target_bs, source, &local_err); - if (local_err) { -@@ -1947,6 +1974,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -+ AioContext *old_context; -+ int ret; - - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; -@@ -1961,7 +1990,18 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - return; - } - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ - aio_context = bdrv_get_aio_context(bs); -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_acquire(old_context); -+ -+ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ if (ret < 0) { -+ aio_context_release(old_context); -+ return; -+ } -+ -+ aio_context_release(old_context); - aio_context_acquire(aio_context); - state->bs = bs; - -@@ -3562,7 +3602,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, - BlockJob *job = NULL; - BdrvDirtyBitmap *bmap = NULL; - int job_flags = JOB_DEFAULT; -- int ret; - - if (!backup->has_speed) { - backup->speed = 0; -@@ -3586,11 +3625,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, - backup->compress = false; - } - -- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -- if (ret < 0) { -- return NULL; -- } -- - if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || - (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { - /* done before desugaring 'incremental' to print the right message */ -@@ -3825,6 +3859,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - BlockDriverState *bs; - BlockDriverState *source, *target_bs; - AioContext *aio_context; -+ AioContext *old_context; - BlockMirrorBackingMode backing_mode; - Error *local_err = NULL; - QDict *options = NULL; -@@ -3937,12 +3972,22 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) - (arg->mode == NEW_IMAGE_MODE_EXISTING || - !bdrv_has_zero_init(target_bs))); - -+ -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); -+ aio_context_release(aio_context); -+ aio_context_acquire(old_context); -+ - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); - if (ret < 0) { - bdrv_unref(target_bs); -- goto out; -+ aio_context_release(old_context); -+ return; - } - -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, - arg->has_replaces, arg->replaces, arg->sync, - backing_mode, zero_target, -@@ -3984,6 +4029,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - BlockDriverState *bs; - BlockDriverState *target_bs; - AioContext *aio_context; -+ AioContext *old_context; - BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; - Error *local_err = NULL; - bool zero_target; -@@ -4001,10 +4047,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, - - zero_target = (sync == MIRROR_SYNC_MODE_FULL); - -+ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ -+ old_context = bdrv_get_aio_context(target_bs); - aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -+ aio_context_acquire(old_context); - - ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); -+ -+ aio_context_release(old_context); -+ aio_context_acquire(aio_context); -+ - if (ret < 0) { - goto out; - } --- -1.8.3.1 - diff --git a/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch b/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch deleted file mode 100644 index c426384..0000000 --- a/kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 959955217f745f1ee6cbea97314efe69f2d7dc08 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:43 +0000 -Subject: [PATCH 10/18] blockdev: unify qmp_blockdev_backup and blockdev-backup - transaction paths - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-4-slp@redhat.com> -Patchwork-id: 93756 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/9] blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Issuing a blockdev-backup from qmp_blockdev_backup takes a slightly -different path than when it's issued from a transaction. In the code, -this is manifested as some redundancy between do_blockdev_backup() and -blockdev_backup_prepare(). - -This change unifies both paths, merging do_blockdev_backup() and -blockdev_backup_prepare(), and changing qmp_blockdev_backup() to -create a transaction instead of calling do_backup_common() direcly. - -As a side-effect, now qmp_blockdev_backup() is executed inside a -drained section, as it happens when creating a blockdev-backup -transaction. This change is visible from the user's perspective, as -the job gets paused and immediately resumed before starting the actual -work. - -Signed-off-by: Sergio Lopez -Reviewed-by: Max Reitz -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 5b7bfe515ecbd584b40ff6e41d2fd8b37c7d5139) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 60 +++++++++++++----------------------------------------------- - 1 file changed, 13 insertions(+), 47 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 5e85fc0..152a0f7 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1940,16 +1940,13 @@ typedef struct BlockdevBackupState { - BlockJob *job; - } BlockdevBackupState; - --static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, -- Error **errp); -- - static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - { - BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); - BlockdevBackup *backup; -- BlockDriverState *bs, *target; -+ BlockDriverState *bs; -+ BlockDriverState *target_bs; - AioContext *aio_context; -- Error *local_err = NULL; - - assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); - backup = common->action->u.blockdev_backup.data; -@@ -1959,8 +1956,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - return; - } - -- target = bdrv_lookup_bs(backup->target, backup->target, errp); -- if (!target) { -+ target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); -+ if (!target_bs) { - return; - } - -@@ -1971,13 +1968,10 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) - /* Paired with .clean() */ - bdrv_drained_begin(state->bs); - -- state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- goto out; -- } -+ state->job = do_backup_common(qapi_BlockdevBackup_base(backup), -+ bs, target_bs, aio_context, -+ common->block_job_txn, errp); - --out: - aio_context_release(aio_context); - } - -@@ -3695,41 +3689,13 @@ XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) - return bdrv_get_xdbg_block_graph(errp); - } - --BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, -- Error **errp) -+void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) - { -- BlockDriverState *bs; -- BlockDriverState *target_bs; -- AioContext *aio_context; -- BlockJob *job; -- -- bs = bdrv_lookup_bs(backup->device, backup->device, errp); -- if (!bs) { -- return NULL; -- } -- -- target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); -- if (!target_bs) { -- return NULL; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- job = do_backup_common(qapi_BlockdevBackup_base(backup), -- bs, target_bs, aio_context, txn, errp); -- -- aio_context_release(aio_context); -- return job; --} -- --void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) --{ -- BlockJob *job; -- job = do_blockdev_backup(arg, NULL, errp); -- if (job) { -- job_start(&job->job); -- } -+ TransactionAction action = { -+ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, -+ .u.blockdev_backup.data = backup, -+ }; -+ blockdev_do_action(&action, errp); - } - - /* Parameter check and block job starting for drive mirroring. --- -1.8.3.1 - diff --git a/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch b/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch deleted file mode 100644 index 9ec1975..0000000 --- a/kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch +++ /dev/null @@ -1,419 +0,0 @@ -From 4a03ab2a6cc4974d8d43240d1297b09160818af3 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 11:27:42 +0000 -Subject: [PATCH 09/18] blockdev: unify qmp_drive_backup and drive-backup - transaction paths - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-3-slp@redhat.com> -Patchwork-id: 93755 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/9] blockdev: unify qmp_drive_backup and drive-backup transaction paths -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Issuing a drive-backup from qmp_drive_backup takes a slightly -different path than when it's issued from a transaction. In the code, -this is manifested as some redundancy between do_drive_backup() and -drive_backup_prepare(). - -This change unifies both paths, merging do_drive_backup() and -drive_backup_prepare(), and changing qmp_drive_backup() to create a -transaction instead of calling do_backup_common() direcly. - -As a side-effect, now qmp_drive_backup() is executed inside a drained -section, as it happens when creating a drive-backup transaction. This -change is visible from the user's perspective, as the job gets paused -and immediately resumed before starting the actual work. - -Also fix tests 141, 185 and 219 to cope with the extra -JOB_STATUS_CHANGE lines. - -Signed-off-by: Sergio Lopez -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 2288ccfac96281c316db942d10e3f921c1373064) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 224 ++++++++++++++++++++------------------------- - tests/qemu-iotests/141.out | 2 + - tests/qemu-iotests/185.out | 2 + - tests/qemu-iotests/219 | 7 +- - tests/qemu-iotests/219.out | 8 ++ - 5 files changed, 117 insertions(+), 126 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index 553e315..5e85fc0 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -1761,39 +1761,128 @@ typedef struct DriveBackupState { - BlockJob *job; - } DriveBackupState; - --static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -- Error **errp); -+static BlockJob *do_backup_common(BackupCommon *backup, -+ BlockDriverState *bs, -+ BlockDriverState *target_bs, -+ AioContext *aio_context, -+ JobTxn *txn, Error **errp); - - static void drive_backup_prepare(BlkActionState *common, Error **errp) - { - DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); -- BlockDriverState *bs; - DriveBackup *backup; -+ BlockDriverState *bs; -+ BlockDriverState *target_bs; -+ BlockDriverState *source = NULL; - AioContext *aio_context; -+ QDict *options; - Error *local_err = NULL; -+ int flags; -+ int64_t size; -+ bool set_backing_hd = false; - - assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); - backup = common->action->u.drive_backup.data; - -+ if (!backup->has_mode) { -+ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -+ } -+ - bs = bdrv_lookup_bs(backup->device, backup->device, errp); - if (!bs) { - return; - } - -+ if (!bs->drv) { -+ error_setg(errp, "Device has no medium"); -+ return; -+ } -+ - aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); - - /* Paired with .clean() */ - bdrv_drained_begin(bs); - -- state->bs = bs; -+ if (!backup->has_format) { -+ backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -+ NULL : (char *) bs->drv->format_name; -+ } -+ -+ /* Early check to avoid creating target */ -+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { -+ goto out; -+ } -+ -+ flags = bs->open_flags | BDRV_O_RDWR; -+ -+ /* -+ * See if we have a backing HD we can use to create our new image -+ * on top of. -+ */ -+ if (backup->sync == MIRROR_SYNC_MODE_TOP) { -+ source = backing_bs(bs); -+ if (!source) { -+ backup->sync = MIRROR_SYNC_MODE_FULL; -+ } -+ } -+ if (backup->sync == MIRROR_SYNC_MODE_NONE) { -+ source = bs; -+ flags |= BDRV_O_NO_BACKING; -+ set_backing_hd = true; -+ } -+ -+ size = bdrv_getlength(bs); -+ if (size < 0) { -+ error_setg_errno(errp, -size, "bdrv_getlength failed"); -+ goto out; -+ } -+ -+ if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -+ assert(backup->format); -+ if (source) { -+ bdrv_refresh_filename(source); -+ bdrv_img_create(backup->target, backup->format, source->filename, -+ source->drv->format_name, NULL, -+ size, flags, false, &local_err); -+ } else { -+ bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, -+ size, flags, false, &local_err); -+ } -+ } - -- state->job = do_drive_backup(backup, common->block_job_txn, &local_err); - if (local_err) { - error_propagate(errp, local_err); - goto out; - } - -+ options = qdict_new(); -+ qdict_put_str(options, "discard", "unmap"); -+ qdict_put_str(options, "detect-zeroes", "unmap"); -+ if (backup->format) { -+ qdict_put_str(options, "driver", backup->format); -+ } -+ -+ target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -+ if (!target_bs) { -+ goto out; -+ } -+ -+ if (set_backing_hd) { -+ bdrv_set_backing_hd(target_bs, source, &local_err); -+ if (local_err) { -+ goto unref; -+ } -+ } -+ -+ state->bs = bs; -+ -+ state->job = do_backup_common(qapi_DriveBackup_base(backup), -+ bs, target_bs, aio_context, -+ common->block_job_txn, errp); -+ -+unref: -+ bdrv_unref(target_bs); - out: - aio_context_release(aio_context); - } -@@ -3587,126 +3676,13 @@ static BlockJob *do_backup_common(BackupCommon *backup, - return job; - } - --static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, -- Error **errp) --{ -- BlockDriverState *bs; -- BlockDriverState *target_bs; -- BlockDriverState *source = NULL; -- BlockJob *job = NULL; -- AioContext *aio_context; -- QDict *options; -- Error *local_err = NULL; -- int flags; -- int64_t size; -- bool set_backing_hd = false; -- -- if (!backup->has_mode) { -- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; -- } -- -- bs = bdrv_lookup_bs(backup->device, backup->device, errp); -- if (!bs) { -- return NULL; -- } -- -- if (!bs->drv) { -- error_setg(errp, "Device has no medium"); -- return NULL; -- } -- -- aio_context = bdrv_get_aio_context(bs); -- aio_context_acquire(aio_context); -- -- if (!backup->has_format) { -- backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? -- NULL : (char *) bs->drv->format_name; -- } -- -- /* Early check to avoid creating target */ -- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { -- goto out; -- } -- -- flags = bs->open_flags | BDRV_O_RDWR; -- -- /* -- * See if we have a backing HD we can use to create our new image -- * on top of. -- */ -- if (backup->sync == MIRROR_SYNC_MODE_TOP) { -- source = backing_bs(bs); -- if (!source) { -- backup->sync = MIRROR_SYNC_MODE_FULL; -- } -- } -- if (backup->sync == MIRROR_SYNC_MODE_NONE) { -- source = bs; -- flags |= BDRV_O_NO_BACKING; -- set_backing_hd = true; -- } -- -- size = bdrv_getlength(bs); -- if (size < 0) { -- error_setg_errno(errp, -size, "bdrv_getlength failed"); -- goto out; -- } -- -- if (backup->mode != NEW_IMAGE_MODE_EXISTING) { -- assert(backup->format); -- if (source) { -- bdrv_refresh_filename(source); -- bdrv_img_create(backup->target, backup->format, source->filename, -- source->drv->format_name, NULL, -- size, flags, false, &local_err); -- } else { -- bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, -- size, flags, false, &local_err); -- } -- } -- -- if (local_err) { -- error_propagate(errp, local_err); -- goto out; -- } -- -- options = qdict_new(); -- qdict_put_str(options, "discard", "unmap"); -- qdict_put_str(options, "detect-zeroes", "unmap"); -- if (backup->format) { -- qdict_put_str(options, "driver", backup->format); -- } -- -- target_bs = bdrv_open(backup->target, NULL, options, flags, errp); -- if (!target_bs) { -- goto out; -- } -- -- if (set_backing_hd) { -- bdrv_set_backing_hd(target_bs, source, &local_err); -- if (local_err) { -- goto unref; -- } -- } -- -- job = do_backup_common(qapi_DriveBackup_base(backup), -- bs, target_bs, aio_context, txn, errp); -- --unref: -- bdrv_unref(target_bs); --out: -- aio_context_release(aio_context); -- return job; --} -- --void qmp_drive_backup(DriveBackup *arg, Error **errp) -+void qmp_drive_backup(DriveBackup *backup, Error **errp) - { -- -- BlockJob *job; -- job = do_drive_backup(arg, NULL, errp); -- if (job) { -- job_start(&job->job); -- } -+ TransactionAction action = { -+ .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP, -+ .u.drive_backup.data = backup, -+ }; -+ blockdev_do_action(&action, errp); - } - - BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) -diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out -index 3645675..263b680 100644 ---- a/tests/qemu-iotests/141.out -+++ b/tests/qemu-iotests/141.out -@@ -13,6 +13,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m. - Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} - {'execute': 'blockdev-del', 'arguments': {'node-name': 'drv0'}} - {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} - {'execute': 'block-job-cancel', 'arguments': {'device': 'job0'}} -diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out -index 8379ac5..9a3b657 100644 ---- a/tests/qemu-iotests/185.out -+++ b/tests/qemu-iotests/185.out -@@ -65,6 +65,8 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 l - Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} - {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} - {"return": {}} - { 'execute': 'quit' } - {"return": {}} -diff --git a/tests/qemu-iotests/219 b/tests/qemu-iotests/219 -index e0c5166..655f54d 100755 ---- a/tests/qemu-iotests/219 -+++ b/tests/qemu-iotests/219 -@@ -63,7 +63,7 @@ def test_pause_resume(vm): - # logged immediately - iotests.log(vm.qmp('query-jobs')) - --def test_job_lifecycle(vm, job, job_args, has_ready=False): -+def test_job_lifecycle(vm, job, job_args, has_ready=False, is_mirror=False): - global img_size - - iotests.log('') -@@ -135,6 +135,9 @@ def test_job_lifecycle(vm, job, job_args, has_ready=False): - iotests.log('Waiting for PENDING state...') - iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) - iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) -+ if is_mirror: -+ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) -+ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) - - if not job_args.get('auto-finalize', True): - # PENDING state: -@@ -218,7 +221,7 @@ with iotests.FilePath('disk.img') as disk_path, \ - - for auto_finalize in [True, False]: - for auto_dismiss in [True, False]: -- test_job_lifecycle(vm, 'drive-backup', job_args={ -+ test_job_lifecycle(vm, 'drive-backup', is_mirror=True, job_args={ - 'device': 'drive0-node', - 'target': copy_path, - 'sync': 'full', -diff --git a/tests/qemu-iotests/219.out b/tests/qemu-iotests/219.out -index 8ebd3fe..0ea5d0b 100644 ---- a/tests/qemu-iotests/219.out -+++ b/tests/qemu-iotests/219.out -@@ -135,6 +135,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -@@ -186,6 +188,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -@@ -245,6 +249,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} -@@ -304,6 +310,8 @@ Pause/resume in RUNNING - {"return": {}} - - Waiting for PENDING state... -+{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} - {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} --- -1.8.3.1 - diff --git a/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch b/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch deleted file mode 100644 index 5d21bf8..0000000 --- a/kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch +++ /dev/null @@ -1,137 +0,0 @@ -From f756c1c4590a37c533ec0429644a7034ba35dada Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:38 +0100 -Subject: [PATCH 007/116] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-4-dgilbert@redhat.com> -Patchwork-id: 93459 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 003/112] build: rename CONFIG_LIBCAP to CONFIG_LIBCAP_NG -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Paolo Bonzini - -Since we are actually testing for the newer capng library, rename the -symbol to match. - -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Paolo Bonzini -(cherry picked from commit a358bca24026a377e0804e137a4499e4e041918d) -Signed-off-by: Miroslav Rezanina ---- - configure | 2 +- - qemu-bridge-helper.c | 6 +++--- - scsi/qemu-pr-helper.c | 12 ++++++------ - 3 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/configure b/configure -index 16564f8..7831618 100755 ---- a/configure -+++ b/configure -@@ -6760,7 +6760,7 @@ if test "$l2tpv3" = "yes" ; then - echo "CONFIG_L2TPV3=y" >> $config_host_mak - fi - if test "$cap_ng" = "yes" ; then -- echo "CONFIG_LIBCAP=y" >> $config_host_mak -+ echo "CONFIG_LIBCAP_NG=y" >> $config_host_mak - fi - echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak - for drv in $audio_drv_list; do -diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c -index 3d50ec0..88b2674 100644 ---- a/qemu-bridge-helper.c -+++ b/qemu-bridge-helper.c -@@ -43,7 +43,7 @@ - - #include "net/tap-linux.h" - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - #include - #endif - -@@ -207,7 +207,7 @@ static int send_fd(int c, int fd) - return sendmsg(c, &msg, 0); - } - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - static int drop_privileges(void) - { - /* clear all capabilities */ -@@ -246,7 +246,7 @@ int main(int argc, char **argv) - int access_allowed, access_denied; - int ret = EXIT_SUCCESS; - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - /* if we're run from an suid binary, immediately drop privileges preserving - * cap_net_admin */ - if (geteuid() == 0 && getuid() != geteuid()) { -diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c -index debb18f..0659cee 100644 ---- a/scsi/qemu-pr-helper.c -+++ b/scsi/qemu-pr-helper.c -@@ -24,7 +24,7 @@ - #include - #include - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - #include - #endif - #include -@@ -70,7 +70,7 @@ static int num_active_sockets = 1; - static int noisy; - static int verbose; - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - static int uid = -1; - static int gid = -1; - #endif -@@ -97,7 +97,7 @@ static void usage(const char *name) - " (default '%s')\n" - " -T, --trace [[enable=]][,events=][,file=]\n" - " specify tracing options\n" --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - " -u, --user=USER user to drop privileges to\n" - " -g, --group=GROUP group to drop privileges to\n" - #endif -@@ -827,7 +827,7 @@ static void close_server_socket(void) - num_active_sockets--; - } - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - static int drop_privileges(void) - { - /* clear all capabilities */ -@@ -920,7 +920,7 @@ int main(int argc, char **argv) - pidfile = g_strdup(optarg); - pidfile_specified = true; - break; --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - case 'u': { - unsigned long res; - struct passwd *userinfo = getpwnam(optarg); -@@ -1056,7 +1056,7 @@ int main(int argc, char **argv) - exit(EXIT_FAILURE); - } - --#ifdef CONFIG_LIBCAP -+#ifdef CONFIG_LIBCAP_NG - if (drop_privileges() < 0) { - error_report("Failed to drop privileges: %s", strerror(errno)); - exit(EXIT_FAILURE); --- -1.8.3.1 - diff --git a/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch b/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch deleted file mode 100644 index 5b1b170..0000000 --- a/kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch +++ /dev/null @@ -1,2463 +0,0 @@ -From fc2d0dfe60b14992a9b67e7a18394ba6365dc5ed Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 18 Mar 2020 18:10:40 +0000 -Subject: [PATCH 2/2] build-sys: do not make qemu-ga link with pixman -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200318181040.256425-1-marcandre.lureau@redhat.com> -Patchwork-id: 94381 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] build-sys: do not make qemu-ga link with pixman -Bugzilla: 1811670 -RH-Acked-by: Markus Armbruster -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange - -Since commit d52c454aadcdae74506f315ebf8b58bb79a05573 ("contrib: add -vhost-user-gpu"), qemu-ga is linking with pixman. - -This is because the Make-based build-system use a global namespace for -variables, and we rely on "main.o-libs" for different linking targets. - -Note: this kind of variable clashing is hard to fix or prevent -currently. meson should help, as declarations have a linear -dependency and doesn't rely so much on variables and clever tricks. - -Note2: we have a lot of main.c (or other duplicated names!) in -tree. Imho, it would be annoying and a bad workaroud to rename all -those to avoid conflicts like I did here. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 - -Signed-off-by: Marc-André Lureau -Message-Id: <20200311160923.882474-1-marcandre.lureau@redhat.com> -Signed-off-by: Paolo Bonzini - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1811670 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=27330493 - -(cherry picked from commit 5b42bc5ce9ab4a3171819feea5042931817211fd) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - contrib/vhost-user-gpu/Makefile.objs | 6 +- - contrib/vhost-user-gpu/main.c | 1191 ------------------------------- - contrib/vhost-user-gpu/vhost-user-gpu.c | 1191 +++++++++++++++++++++++++++++++ - 3 files changed, 1194 insertions(+), 1194 deletions(-) - delete mode 100644 contrib/vhost-user-gpu/main.c - create mode 100644 contrib/vhost-user-gpu/vhost-user-gpu.c - -diff --git a/contrib/vhost-user-gpu/Makefile.objs b/contrib/vhost-user-gpu/Makefile.objs -index 6170c91..0929609 100644 ---- a/contrib/vhost-user-gpu/Makefile.objs -+++ b/contrib/vhost-user-gpu/Makefile.objs -@@ -1,7 +1,7 @@ --vhost-user-gpu-obj-y = main.o virgl.o vugbm.o -+vhost-user-gpu-obj-y = vhost-user-gpu.o virgl.o vugbm.o - --main.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) --main.o-libs := $(PIXMAN_LIBS) -+vhost-user-gpu.o-cflags := $(PIXMAN_CFLAGS) $(GBM_CFLAGS) -+vhost-user-gpu.o-libs := $(PIXMAN_LIBS) - - virgl.o-cflags := $(VIRGL_CFLAGS) $(GBM_CFLAGS) - virgl.o-libs := $(VIRGL_LIBS) -diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c -deleted file mode 100644 -index b45d201..0000000 ---- a/contrib/vhost-user-gpu/main.c -+++ /dev/null -@@ -1,1191 +0,0 @@ --/* -- * Virtio vhost-user GPU Device -- * -- * Copyright Red Hat, Inc. 2013-2018 -- * -- * Authors: -- * Dave Airlie -- * Gerd Hoffmann -- * Marc-André Lureau -- * -- * This work is licensed under the terms of the GNU GPL, version 2 or later. -- * See the COPYING file in the top-level directory. -- */ --#include "qemu/osdep.h" --#include "qemu/drm.h" --#include "qapi/error.h" --#include "qemu/sockets.h" -- --#include --#include -- --#include "vugpu.h" --#include "hw/virtio/virtio-gpu-bswap.h" --#include "hw/virtio/virtio-gpu-pixman.h" --#include "virgl.h" --#include "vugbm.h" -- --enum { -- VHOST_USER_GPU_MAX_QUEUES = 2, --}; -- --struct virtio_gpu_simple_resource { -- uint32_t resource_id; -- uint32_t width; -- uint32_t height; -- uint32_t format; -- struct iovec *iov; -- unsigned int iov_cnt; -- uint32_t scanout_bitmask; -- pixman_image_t *image; -- struct vugbm_buffer buffer; -- QTAILQ_ENTRY(virtio_gpu_simple_resource) next; --}; -- --static gboolean opt_print_caps; --static int opt_fdnum = -1; --static char *opt_socket_path; --static char *opt_render_node; --static gboolean opt_virgl; -- --static void vg_handle_ctrl(VuDev *dev, int qidx); -- --static const char * --vg_cmd_to_string(int cmd) --{ --#define CMD(cmd) [cmd] = #cmd -- static const char *vg_cmd_str[] = { -- CMD(VIRTIO_GPU_UNDEFINED), -- -- /* 2d commands */ -- CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), -- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), -- CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), -- CMD(VIRTIO_GPU_CMD_SET_SCANOUT), -- CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), -- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), -- CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), -- CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), -- CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), -- CMD(VIRTIO_GPU_CMD_GET_CAPSET), -- -- /* 3d commands */ -- CMD(VIRTIO_GPU_CMD_CTX_CREATE), -- CMD(VIRTIO_GPU_CMD_CTX_DESTROY), -- CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), -- CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), -- CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), -- CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), -- CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), -- CMD(VIRTIO_GPU_CMD_SUBMIT_3D), -- -- /* cursor commands */ -- CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), -- CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), -- }; --#undef REQ -- -- if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { -- return vg_cmd_str[cmd]; -- } else { -- return "unknown"; -- } --} -- --static int --vg_sock_fd_read(int sock, void *buf, ssize_t buflen) --{ -- int ret; -- -- do { -- ret = read(sock, buf, buflen); -- } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); -- -- g_warn_if_fail(ret == buflen); -- return ret; --} -- --static void --vg_sock_fd_close(VuGpu *g) --{ -- if (g->sock_fd >= 0) { -- close(g->sock_fd); -- g->sock_fd = -1; -- } --} -- --static gboolean --source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) --{ -- VuGpu *g = user_data; -- -- if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { -- return G_SOURCE_CONTINUE; -- } -- -- /* resume */ -- g->wait_ok = 0; -- vg_handle_ctrl(&g->dev.parent, 0); -- -- return G_SOURCE_REMOVE; --} -- --void --vg_wait_ok(VuGpu *g) --{ -- assert(g->wait_ok == 0); -- g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, -- source_wait_cb, g); --} -- --static int --vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) --{ -- ssize_t ret; -- struct iovec iov = { -- .iov_base = (void *)buf, -- .iov_len = buflen, -- }; -- struct msghdr msg = { -- .msg_iov = &iov, -- .msg_iovlen = 1, -- }; -- union { -- struct cmsghdr cmsghdr; -- char control[CMSG_SPACE(sizeof(int))]; -- } cmsgu; -- struct cmsghdr *cmsg; -- -- if (fd != -1) { -- msg.msg_control = cmsgu.control; -- msg.msg_controllen = sizeof(cmsgu.control); -- -- cmsg = CMSG_FIRSTHDR(&msg); -- cmsg->cmsg_len = CMSG_LEN(sizeof(int)); -- cmsg->cmsg_level = SOL_SOCKET; -- cmsg->cmsg_type = SCM_RIGHTS; -- -- *((int *)CMSG_DATA(cmsg)) = fd; -- } -- -- do { -- ret = sendmsg(sock, &msg, 0); -- } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); -- -- g_warn_if_fail(ret == buflen); -- return ret; --} -- --void --vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) --{ -- if (vg_sock_fd_write(vg->sock_fd, msg, -- VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { -- vg_sock_fd_close(vg); -- } --} -- --bool --vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, -- gpointer payload) --{ -- uint32_t req, flags, size; -- -- if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || -- vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || -- vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { -- goto err; -- } -- -- g_return_val_if_fail(req == expect_req, false); -- g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); -- g_return_val_if_fail(size == expect_size, false); -- -- if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { -- goto err; -- } -- -- return true; -- --err: -- vg_sock_fd_close(g); -- return false; --} -- --static struct virtio_gpu_simple_resource * --virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) --{ -- struct virtio_gpu_simple_resource *res; -- -- QTAILQ_FOREACH(res, &g->reslist, next) { -- if (res->resource_id == resource_id) { -- return res; -- } -- } -- return NULL; --} -- --void --vg_ctrl_response(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd, -- struct virtio_gpu_ctrl_hdr *resp, -- size_t resp_len) --{ -- size_t s; -- -- if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { -- resp->flags |= VIRTIO_GPU_FLAG_FENCE; -- resp->fence_id = cmd->cmd_hdr.fence_id; -- resp->ctx_id = cmd->cmd_hdr.ctx_id; -- } -- virtio_gpu_ctrl_hdr_bswap(resp); -- s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); -- if (s != resp_len) { -- g_critical("%s: response size incorrect %zu vs %zu", -- __func__, s, resp_len); -- } -- vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); -- vu_queue_notify(&g->dev.parent, cmd->vq); -- cmd->finished = true; --} -- --void --vg_ctrl_response_nodata(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd, -- enum virtio_gpu_ctrl_type type) --{ -- struct virtio_gpu_ctrl_hdr resp = { -- .type = type, -- }; -- -- vg_ctrl_response(g, cmd, &resp, sizeof(resp)); --} -- --void --vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_resp_display_info dpy_info = { {} }; -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_GET_DISPLAY_INFO, -- .size = 0, -- }; -- -- assert(vg->wait_ok == 0); -- -- vg_send_msg(vg, &msg, -1); -- if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { -- return; -- } -- -- vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); --} -- --static void --vg_resource_create_2d(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- pixman_format_code_t pformat; -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_create_2d c2d; -- -- VUGPU_FILL_CMD(c2d); -- virtio_gpu_bswap_32(&c2d, sizeof(c2d)); -- -- if (c2d.resource_id == 0) { -- g_critical("%s: resource id 0 is not allowed", __func__); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- res = virtio_gpu_find_resource(g, c2d.resource_id); -- if (res) { -- g_critical("%s: resource already exists %d", __func__, c2d.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- res = g_new0(struct virtio_gpu_simple_resource, 1); -- res->width = c2d.width; -- res->height = c2d.height; -- res->format = c2d.format; -- res->resource_id = c2d.resource_id; -- -- pformat = virtio_gpu_get_pixman_format(c2d.format); -- if (!pformat) { -- g_critical("%s: host couldn't handle guest format %d", -- __func__, c2d.format); -- g_free(res); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); -- res->image = pixman_image_create_bits(pformat, -- c2d.width, -- c2d.height, -- (uint32_t *)res->buffer.mmap, -- res->buffer.stride); -- if (!res->image) { -- g_critical("%s: resource creation failed %d %d %d", -- __func__, c2d.resource_id, c2d.width, c2d.height); -- g_free(res); -- cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; -- return; -- } -- -- QTAILQ_INSERT_HEAD(&g->reslist, res, next); --} -- --static void --vg_disable_scanout(VuGpu *g, int scanout_id) --{ -- struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; -- struct virtio_gpu_simple_resource *res; -- -- if (scanout->resource_id == 0) { -- return; -- } -- -- res = virtio_gpu_find_resource(g, scanout->resource_id); -- if (res) { -- res->scanout_bitmask &= ~(1 << scanout_id); -- } -- -- scanout->width = 0; -- scanout->height = 0; -- -- if (g->sock_fd >= 0) { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_SCANOUT, -- .size = sizeof(VhostUserGpuScanout), -- .payload.scanout.scanout_id = scanout_id, -- }; -- vg_send_msg(g, &msg, -1); -- } --} -- --static void --vg_resource_destroy(VuGpu *g, -- struct virtio_gpu_simple_resource *res) --{ -- int i; -- -- if (res->scanout_bitmask) { -- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -- if (res->scanout_bitmask & (1 << i)) { -- vg_disable_scanout(g, i); -- } -- } -- } -- -- vugbm_buffer_destroy(&res->buffer); -- pixman_image_unref(res->image); -- QTAILQ_REMOVE(&g->reslist, res, next); -- g_free(res); --} -- --static void --vg_resource_unref(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_unref unref; -- -- VUGPU_FILL_CMD(unref); -- virtio_gpu_bswap_32(&unref, sizeof(unref)); -- -- res = virtio_gpu_find_resource(g, unref.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d", -- __func__, unref.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- vg_resource_destroy(g, res); --} -- --int --vg_create_mapping_iov(VuGpu *g, -- struct virtio_gpu_resource_attach_backing *ab, -- struct virtio_gpu_ctrl_command *cmd, -- struct iovec **iov) --{ -- struct virtio_gpu_mem_entry *ents; -- size_t esize, s; -- int i; -- -- if (ab->nr_entries > 16384) { -- g_critical("%s: nr_entries is too big (%d > 16384)", -- __func__, ab->nr_entries); -- return -1; -- } -- -- esize = sizeof(*ents) * ab->nr_entries; -- ents = g_malloc(esize); -- s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -- sizeof(*ab), ents, esize); -- if (s != esize) { -- g_critical("%s: command data size incorrect %zu vs %zu", -- __func__, s, esize); -- g_free(ents); -- return -1; -- } -- -- *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); -- for (i = 0; i < ab->nr_entries; i++) { -- uint64_t len = ents[i].length; -- (*iov)[i].iov_len = ents[i].length; -- (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); -- if (!(*iov)[i].iov_base || len != ents[i].length) { -- g_critical("%s: resource %d element %d", -- __func__, ab->resource_id, i); -- g_free(*iov); -- g_free(ents); -- *iov = NULL; -- return -1; -- } -- } -- g_free(ents); -- return 0; --} -- --static void --vg_resource_attach_backing(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_attach_backing ab; -- int ret; -- -- VUGPU_FILL_CMD(ab); -- virtio_gpu_bswap_32(&ab, sizeof(ab)); -- -- res = virtio_gpu_find_resource(g, ab.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d", -- __func__, ab.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); -- if (ret != 0) { -- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -- return; -- } -- -- res->iov_cnt = ab.nr_entries; --} -- --static void --vg_resource_detach_backing(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_detach_backing detach; -- -- VUGPU_FILL_CMD(detach); -- virtio_gpu_bswap_32(&detach, sizeof(detach)); -- -- res = virtio_gpu_find_resource(g, detach.resource_id); -- if (!res || !res->iov) { -- g_critical("%s: illegal resource specified %d", -- __func__, detach.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- g_free(res->iov); -- res->iov = NULL; -- res->iov_cnt = 0; --} -- --static void --vg_transfer_to_host_2d(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- int h; -- uint32_t src_offset, dst_offset, stride; -- int bpp; -- pixman_format_code_t format; -- struct virtio_gpu_transfer_to_host_2d t2d; -- -- VUGPU_FILL_CMD(t2d); -- virtio_gpu_t2d_bswap(&t2d); -- -- res = virtio_gpu_find_resource(g, t2d.resource_id); -- if (!res || !res->iov) { -- g_critical("%s: illegal resource specified %d", -- __func__, t2d.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- if (t2d.r.x > res->width || -- t2d.r.y > res->height || -- t2d.r.width > res->width || -- t2d.r.height > res->height || -- t2d.r.x + t2d.r.width > res->width || -- t2d.r.y + t2d.r.height > res->height) { -- g_critical("%s: transfer bounds outside resource" -- " bounds for resource %d: %d %d %d %d vs %d %d", -- __func__, t2d.resource_id, t2d.r.x, t2d.r.y, -- t2d.r.width, t2d.r.height, res->width, res->height); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- -- format = pixman_image_get_format(res->image); -- bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; -- stride = pixman_image_get_stride(res->image); -- -- if (t2d.offset || t2d.r.x || t2d.r.y || -- t2d.r.width != pixman_image_get_width(res->image)) { -- void *img_data = pixman_image_get_data(res->image); -- for (h = 0; h < t2d.r.height; h++) { -- src_offset = t2d.offset + stride * h; -- dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); -- -- iov_to_buf(res->iov, res->iov_cnt, src_offset, -- img_data -- + dst_offset, t2d.r.width * bpp); -- } -- } else { -- iov_to_buf(res->iov, res->iov_cnt, 0, -- pixman_image_get_data(res->image), -- pixman_image_get_stride(res->image) -- * pixman_image_get_height(res->image)); -- } --} -- --static void --vg_set_scanout(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res, *ores; -- struct virtio_gpu_scanout *scanout; -- struct virtio_gpu_set_scanout ss; -- int fd; -- -- VUGPU_FILL_CMD(ss); -- virtio_gpu_bswap_32(&ss, sizeof(ss)); -- -- if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { -- g_critical("%s: illegal scanout id specified %d", -- __func__, ss.scanout_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; -- return; -- } -- -- if (ss.resource_id == 0) { -- vg_disable_scanout(g, ss.scanout_id); -- return; -- } -- -- /* create a surface for this scanout */ -- res = virtio_gpu_find_resource(g, ss.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d", -- __func__, ss.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- if (ss.r.x > res->width || -- ss.r.y > res->height || -- ss.r.width > res->width || -- ss.r.height > res->height || -- ss.r.x + ss.r.width > res->width || -- ss.r.y + ss.r.height > res->height) { -- g_critical("%s: illegal scanout %d bounds for" -- " resource %d, (%d,%d)+%d,%d vs %d %d", -- __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, -- ss.r.width, ss.r.height, res->width, res->height); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- -- scanout = &g->scanout[ss.scanout_id]; -- -- ores = virtio_gpu_find_resource(g, scanout->resource_id); -- if (ores) { -- ores->scanout_bitmask &= ~(1 << ss.scanout_id); -- } -- -- res->scanout_bitmask |= (1 << ss.scanout_id); -- scanout->resource_id = ss.resource_id; -- scanout->x = ss.r.x; -- scanout->y = ss.r.y; -- scanout->width = ss.r.width; -- scanout->height = ss.r.height; -- -- struct vugbm_buffer *buffer = &res->buffer; -- -- if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_DMABUF_SCANOUT, -- .size = sizeof(VhostUserGpuDMABUFScanout), -- .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { -- .scanout_id = ss.scanout_id, -- .x = ss.r.x, -- .y = ss.r.y, -- .width = ss.r.width, -- .height = ss.r.height, -- .fd_width = buffer->width, -- .fd_height = buffer->height, -- .fd_stride = buffer->stride, -- .fd_drm_fourcc = buffer->format -- } -- }; -- -- if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { -- vg_send_msg(g, &msg, fd); -- close(fd); -- } -- } else { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_SCANOUT, -- .size = sizeof(VhostUserGpuScanout), -- .payload.scanout = (VhostUserGpuScanout) { -- .scanout_id = ss.scanout_id, -- .width = scanout->width, -- .height = scanout->height -- } -- }; -- vg_send_msg(g, &msg, -1); -- } --} -- --static void --vg_resource_flush(VuGpu *g, -- struct virtio_gpu_ctrl_command *cmd) --{ -- struct virtio_gpu_simple_resource *res; -- struct virtio_gpu_resource_flush rf; -- pixman_region16_t flush_region; -- int i; -- -- VUGPU_FILL_CMD(rf); -- virtio_gpu_bswap_32(&rf, sizeof(rf)); -- -- res = virtio_gpu_find_resource(g, rf.resource_id); -- if (!res) { -- g_critical("%s: illegal resource specified %d\n", -- __func__, rf.resource_id); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -- return; -- } -- -- if (rf.r.x > res->width || -- rf.r.y > res->height || -- rf.r.width > res->width || -- rf.r.height > res->height || -- rf.r.x + rf.r.width > res->width || -- rf.r.y + rf.r.height > res->height) { -- g_critical("%s: flush bounds outside resource" -- " bounds for resource %d: %d %d %d %d vs %d %d\n", -- __func__, rf.resource_id, rf.r.x, rf.r.y, -- rf.r.width, rf.r.height, res->width, res->height); -- cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -- return; -- } -- -- pixman_region_init_rect(&flush_region, -- rf.r.x, rf.r.y, rf.r.width, rf.r.height); -- for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -- struct virtio_gpu_scanout *scanout; -- pixman_region16_t region, finalregion; -- pixman_box16_t *extents; -- -- if (!(res->scanout_bitmask & (1 << i))) { -- continue; -- } -- scanout = &g->scanout[i]; -- -- pixman_region_init(&finalregion); -- pixman_region_init_rect(®ion, scanout->x, scanout->y, -- scanout->width, scanout->height); -- -- pixman_region_intersect(&finalregion, &flush_region, ®ion); -- -- extents = pixman_region_extents(&finalregion); -- size_t width = extents->x2 - extents->x1; -- size_t height = extents->y2 - extents->y1; -- -- if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { -- VhostUserGpuMsg vmsg = { -- .request = VHOST_USER_GPU_DMABUF_UPDATE, -- .size = sizeof(VhostUserGpuUpdate), -- .payload.update = (VhostUserGpuUpdate) { -- .scanout_id = i, -- .x = extents->x1, -- .y = extents->y1, -- .width = width, -- .height = height, -- } -- }; -- vg_send_msg(g, &vmsg, -1); -- vg_wait_ok(g); -- } else { -- size_t bpp = -- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; -- size_t size = width * height * bpp; -- -- void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + -- sizeof(VhostUserGpuUpdate) + size); -- VhostUserGpuMsg *msg = p; -- msg->request = VHOST_USER_GPU_UPDATE; -- msg->size = sizeof(VhostUserGpuUpdate) + size; -- msg->payload.update = (VhostUserGpuUpdate) { -- .scanout_id = i, -- .x = extents->x1, -- .y = extents->y1, -- .width = width, -- .height = height, -- }; -- pixman_image_t *i = -- pixman_image_create_bits(pixman_image_get_format(res->image), -- msg->payload.update.width, -- msg->payload.update.height, -- p + offsetof(VhostUserGpuMsg, -- payload.update.data), -- width * bpp); -- pixman_image_composite(PIXMAN_OP_SRC, -- res->image, NULL, i, -- extents->x1, extents->y1, -- 0, 0, 0, 0, -- width, height); -- pixman_image_unref(i); -- vg_send_msg(g, msg, -1); -- g_free(msg); -- } -- pixman_region_fini(®ion); -- pixman_region_fini(&finalregion); -- } -- pixman_region_fini(&flush_region); --} -- --static void --vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) --{ -- switch (cmd->cmd_hdr.type) { -- case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: -- vg_get_display_info(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: -- vg_resource_create_2d(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_UNREF: -- vg_resource_unref(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_FLUSH: -- vg_resource_flush(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: -- vg_transfer_to_host_2d(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_SET_SCANOUT: -- vg_set_scanout(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: -- vg_resource_attach_backing(vg, cmd); -- break; -- case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: -- vg_resource_detach_backing(vg, cmd); -- break; -- /* case VIRTIO_GPU_CMD_GET_EDID: */ -- /* break */ -- default: -- g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); -- cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -- break; -- } -- if (!cmd->finished) { -- vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : -- VIRTIO_GPU_RESP_OK_NODATA); -- } --} -- --static void --vg_handle_ctrl(VuDev *dev, int qidx) --{ -- VuGpu *vg = container_of(dev, VuGpu, dev.parent); -- VuVirtq *vq = vu_get_queue(dev, qidx); -- struct virtio_gpu_ctrl_command *cmd = NULL; -- size_t len; -- -- for (;;) { -- if (vg->wait_ok != 0) { -- return; -- } -- -- cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); -- if (!cmd) { -- break; -- } -- cmd->vq = vq; -- cmd->error = 0; -- cmd->finished = false; -- -- len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -- 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); -- if (len != sizeof(cmd->cmd_hdr)) { -- g_warning("%s: command size incorrect %zu vs %zu\n", -- __func__, len, sizeof(cmd->cmd_hdr)); -- } -- -- virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); -- g_debug("%d %s\n", cmd->cmd_hdr.type, -- vg_cmd_to_string(cmd->cmd_hdr.type)); -- -- if (vg->virgl) { -- vg_virgl_process_cmd(vg, cmd); -- } else { -- vg_process_cmd(vg, cmd); -- } -- -- if (!cmd->finished) { -- QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); -- vg->inflight++; -- } else { -- g_free(cmd); -- } -- } --} -- --static void --update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) --{ -- struct virtio_gpu_simple_resource *res; -- -- res = virtio_gpu_find_resource(g, resource_id); -- g_return_if_fail(res != NULL); -- g_return_if_fail(pixman_image_get_width(res->image) == 64); -- g_return_if_fail(pixman_image_get_height(res->image) == 64); -- g_return_if_fail( -- PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); -- -- memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); --} -- --static void --vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) --{ -- bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; -- -- g_debug("%s move:%d\n", G_STRFUNC, move); -- -- if (move) { -- VhostUserGpuMsg msg = { -- .request = cursor->resource_id ? -- VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, -- .size = sizeof(VhostUserGpuCursorPos), -- .payload.cursor_pos = { -- .scanout_id = cursor->pos.scanout_id, -- .x = cursor->pos.x, -- .y = cursor->pos.y, -- } -- }; -- vg_send_msg(g, &msg, -1); -- } else { -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_CURSOR_UPDATE, -- .size = sizeof(VhostUserGpuCursorUpdate), -- .payload.cursor_update = { -- .pos = { -- .scanout_id = cursor->pos.scanout_id, -- .x = cursor->pos.x, -- .y = cursor->pos.y, -- }, -- .hot_x = cursor->hot_x, -- .hot_y = cursor->hot_y, -- } -- }; -- if (g->virgl) { -- vg_virgl_update_cursor_data(g, cursor->resource_id, -- msg.payload.cursor_update.data); -- } else { -- update_cursor_data_simple(g, cursor->resource_id, -- msg.payload.cursor_update.data); -- } -- vg_send_msg(g, &msg, -1); -- } --} -- --static void --vg_handle_cursor(VuDev *dev, int qidx) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- VuVirtq *vq = vu_get_queue(dev, qidx); -- VuVirtqElement *elem; -- size_t len; -- struct virtio_gpu_update_cursor cursor; -- -- for (;;) { -- elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); -- if (!elem) { -- break; -- } -- g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); -- -- len = iov_to_buf(elem->out_sg, elem->out_num, -- 0, &cursor, sizeof(cursor)); -- if (len != sizeof(cursor)) { -- g_warning("%s: cursor size incorrect %zu vs %zu\n", -- __func__, len, sizeof(cursor)); -- } else { -- virtio_gpu_bswap_32(&cursor, sizeof(cursor)); -- vg_process_cursor_cmd(g, &cursor); -- } -- vu_queue_push(dev, vq, elem, 0); -- vu_queue_notify(dev, vq); -- g_free(elem); -- } --} -- --static void --vg_panic(VuDev *dev, const char *msg) --{ -- g_critical("%s\n", msg); -- exit(1); --} -- --static void --vg_queue_set_started(VuDev *dev, int qidx, bool started) --{ -- VuVirtq *vq = vu_get_queue(dev, qidx); -- -- g_debug("queue started %d:%d\n", qidx, started); -- -- switch (qidx) { -- case 0: -- vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); -- break; -- case 1: -- vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); -- break; -- default: -- break; -- } --} -- --static void --set_gpu_protocol_features(VuGpu *g) --{ -- uint64_t u64; -- VhostUserGpuMsg msg = { -- .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES -- }; -- -- assert(g->wait_ok == 0); -- vg_send_msg(g, &msg, -1); -- if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { -- return; -- } -- -- msg = (VhostUserGpuMsg) { -- .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, -- .size = sizeof(uint64_t), -- .payload.u64 = 0 -- }; -- vg_send_msg(g, &msg, -1); --} -- --static int --vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- -- switch (msg->request) { -- case VHOST_USER_GPU_SET_SOCKET: { -- g_return_val_if_fail(msg->fd_num == 1, 1); -- g_return_val_if_fail(g->sock_fd == -1, 1); -- g->sock_fd = msg->fds[0]; -- set_gpu_protocol_features(g); -- return 1; -- } -- default: -- return 0; -- } -- -- return 0; --} -- --static uint64_t --vg_get_features(VuDev *dev) --{ -- uint64_t features = 0; -- -- if (opt_virgl) { -- features |= 1 << VIRTIO_GPU_F_VIRGL; -- } -- -- return features; --} -- --static void --vg_set_features(VuDev *dev, uint64_t features) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); -- -- if (virgl && !g->virgl_inited) { -- if (!vg_virgl_init(g)) { -- vg_panic(dev, "Failed to initialize virgl"); -- } -- g->virgl_inited = true; -- } -- -- g->virgl = virgl; --} -- --static int --vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- -- g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); -- -- if (opt_virgl) { -- g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); -- } -- -- memcpy(config, &g->virtio_config, len); -- -- return 0; --} -- --static int --vg_set_config(VuDev *dev, const uint8_t *data, -- uint32_t offset, uint32_t size, -- uint32_t flags) --{ -- VuGpu *g = container_of(dev, VuGpu, dev.parent); -- struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; -- -- if (config->events_clear) { -- g->virtio_config.events_read &= ~config->events_clear; -- } -- -- return 0; --} -- --static const VuDevIface vuiface = { -- .set_features = vg_set_features, -- .get_features = vg_get_features, -- .queue_set_started = vg_queue_set_started, -- .process_msg = vg_process_msg, -- .get_config = vg_get_config, -- .set_config = vg_set_config, --}; -- --static void --vg_destroy(VuGpu *g) --{ -- struct virtio_gpu_simple_resource *res, *tmp; -- -- vug_deinit(&g->dev); -- -- vg_sock_fd_close(g); -- -- QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { -- vg_resource_destroy(g, res); -- } -- -- vugbm_device_destroy(&g->gdev); --} -- --static GOptionEntry entries[] = { -- { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, -- "Print capabilities", NULL }, -- { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, -- "Use inherited fd socket", "FDNUM" }, -- { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, -- "Use UNIX socket path", "PATH" }, -- { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, -- "Specify DRM render node", "PATH" }, -- { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, -- "Turn virgl rendering on", NULL }, -- { NULL, } --}; -- --int --main(int argc, char *argv[]) --{ -- GOptionContext *context; -- GError *error = NULL; -- GMainLoop *loop = NULL; -- int fd; -- VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; -- -- QTAILQ_INIT(&g.reslist); -- QTAILQ_INIT(&g.fenceq); -- -- context = g_option_context_new("QEMU vhost-user-gpu"); -- g_option_context_add_main_entries(context, entries, NULL); -- if (!g_option_context_parse(context, &argc, &argv, &error)) { -- g_printerr("Option parsing failed: %s\n", error->message); -- exit(EXIT_FAILURE); -- } -- g_option_context_free(context); -- -- if (opt_print_caps) { -- g_print("{\n"); -- g_print(" \"type\": \"gpu\",\n"); -- g_print(" \"features\": [\n"); -- g_print(" \"render-node\",\n"); -- g_print(" \"virgl\"\n"); -- g_print(" ]\n"); -- g_print("}\n"); -- exit(EXIT_SUCCESS); -- } -- -- g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); -- if (opt_render_node && g.drm_rnode_fd == -1) { -- g_printerr("Failed to open DRM rendernode.\n"); -- exit(EXIT_FAILURE); -- } -- -- if (g.drm_rnode_fd >= 0) { -- if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { -- g_warning("Failed to init DRM device, using fallback path"); -- } -- } -- -- if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { -- g_printerr("Please specify either --fd or --socket-path\n"); -- exit(EXIT_FAILURE); -- } -- -- if (opt_socket_path) { -- int lsock = unix_listen(opt_socket_path, &error_fatal); -- if (lsock < 0) { -- g_printerr("Failed to listen on %s.\n", opt_socket_path); -- exit(EXIT_FAILURE); -- } -- fd = accept(lsock, NULL, NULL); -- close(lsock); -- } else { -- fd = opt_fdnum; -- } -- if (fd == -1) { -- g_printerr("Invalid vhost-user socket.\n"); -- exit(EXIT_FAILURE); -- } -- -- if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { -- g_printerr("Failed to initialize libvhost-user-glib.\n"); -- exit(EXIT_FAILURE); -- } -- -- loop = g_main_loop_new(NULL, FALSE); -- g_main_loop_run(loop); -- g_main_loop_unref(loop); -- -- vg_destroy(&g); -- if (g.drm_rnode_fd >= 0) { -- close(g.drm_rnode_fd); -- } -- -- return 0; --} -diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c -new file mode 100644 -index 0000000..b45d201 ---- /dev/null -+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c -@@ -0,0 +1,1191 @@ -+/* -+ * Virtio vhost-user GPU Device -+ * -+ * Copyright Red Hat, Inc. 2013-2018 -+ * -+ * Authors: -+ * Dave Airlie -+ * Gerd Hoffmann -+ * Marc-André Lureau -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+#include "qemu/osdep.h" -+#include "qemu/drm.h" -+#include "qapi/error.h" -+#include "qemu/sockets.h" -+ -+#include -+#include -+ -+#include "vugpu.h" -+#include "hw/virtio/virtio-gpu-bswap.h" -+#include "hw/virtio/virtio-gpu-pixman.h" -+#include "virgl.h" -+#include "vugbm.h" -+ -+enum { -+ VHOST_USER_GPU_MAX_QUEUES = 2, -+}; -+ -+struct virtio_gpu_simple_resource { -+ uint32_t resource_id; -+ uint32_t width; -+ uint32_t height; -+ uint32_t format; -+ struct iovec *iov; -+ unsigned int iov_cnt; -+ uint32_t scanout_bitmask; -+ pixman_image_t *image; -+ struct vugbm_buffer buffer; -+ QTAILQ_ENTRY(virtio_gpu_simple_resource) next; -+}; -+ -+static gboolean opt_print_caps; -+static int opt_fdnum = -1; -+static char *opt_socket_path; -+static char *opt_render_node; -+static gboolean opt_virgl; -+ -+static void vg_handle_ctrl(VuDev *dev, int qidx); -+ -+static const char * -+vg_cmd_to_string(int cmd) -+{ -+#define CMD(cmd) [cmd] = #cmd -+ static const char *vg_cmd_str[] = { -+ CMD(VIRTIO_GPU_UNDEFINED), -+ -+ /* 2d commands */ -+ CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF), -+ CMD(VIRTIO_GPU_CMD_SET_SCANOUT), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH), -+ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING), -+ CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO), -+ CMD(VIRTIO_GPU_CMD_GET_CAPSET), -+ -+ /* 3d commands */ -+ CMD(VIRTIO_GPU_CMD_CTX_CREATE), -+ CMD(VIRTIO_GPU_CMD_CTX_DESTROY), -+ CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE), -+ CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE), -+ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D), -+ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D), -+ CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D), -+ CMD(VIRTIO_GPU_CMD_SUBMIT_3D), -+ -+ /* cursor commands */ -+ CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR), -+ CMD(VIRTIO_GPU_CMD_MOVE_CURSOR), -+ }; -+#undef REQ -+ -+ if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) { -+ return vg_cmd_str[cmd]; -+ } else { -+ return "unknown"; -+ } -+} -+ -+static int -+vg_sock_fd_read(int sock, void *buf, ssize_t buflen) -+{ -+ int ret; -+ -+ do { -+ ret = read(sock, buf, buflen); -+ } while (ret < 0 && (errno == EINTR || errno == EAGAIN)); -+ -+ g_warn_if_fail(ret == buflen); -+ return ret; -+} -+ -+static void -+vg_sock_fd_close(VuGpu *g) -+{ -+ if (g->sock_fd >= 0) { -+ close(g->sock_fd); -+ g->sock_fd = -1; -+ } -+} -+ -+static gboolean -+source_wait_cb(gint fd, GIOCondition condition, gpointer user_data) -+{ -+ VuGpu *g = user_data; -+ -+ if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) { -+ return G_SOURCE_CONTINUE; -+ } -+ -+ /* resume */ -+ g->wait_ok = 0; -+ vg_handle_ctrl(&g->dev.parent, 0); -+ -+ return G_SOURCE_REMOVE; -+} -+ -+void -+vg_wait_ok(VuGpu *g) -+{ -+ assert(g->wait_ok == 0); -+ g->wait_ok = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP, -+ source_wait_cb, g); -+} -+ -+static int -+vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd) -+{ -+ ssize_t ret; -+ struct iovec iov = { -+ .iov_base = (void *)buf, -+ .iov_len = buflen, -+ }; -+ struct msghdr msg = { -+ .msg_iov = &iov, -+ .msg_iovlen = 1, -+ }; -+ union { -+ struct cmsghdr cmsghdr; -+ char control[CMSG_SPACE(sizeof(int))]; -+ } cmsgu; -+ struct cmsghdr *cmsg; -+ -+ if (fd != -1) { -+ msg.msg_control = cmsgu.control; -+ msg.msg_controllen = sizeof(cmsgu.control); -+ -+ cmsg = CMSG_FIRSTHDR(&msg); -+ cmsg->cmsg_len = CMSG_LEN(sizeof(int)); -+ cmsg->cmsg_level = SOL_SOCKET; -+ cmsg->cmsg_type = SCM_RIGHTS; -+ -+ *((int *)CMSG_DATA(cmsg)) = fd; -+ } -+ -+ do { -+ ret = sendmsg(sock, &msg, 0); -+ } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); -+ -+ g_warn_if_fail(ret == buflen); -+ return ret; -+} -+ -+void -+vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd) -+{ -+ if (vg_sock_fd_write(vg->sock_fd, msg, -+ VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) { -+ vg_sock_fd_close(vg); -+ } -+} -+ -+bool -+vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size, -+ gpointer payload) -+{ -+ uint32_t req, flags, size; -+ -+ if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 || -+ vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 || -+ vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) { -+ goto err; -+ } -+ -+ g_return_val_if_fail(req == expect_req, false); -+ g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false); -+ g_return_val_if_fail(size == expect_size, false); -+ -+ if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) { -+ goto err; -+ } -+ -+ return true; -+ -+err: -+ vg_sock_fd_close(g); -+ return false; -+} -+ -+static struct virtio_gpu_simple_resource * -+virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id) -+{ -+ struct virtio_gpu_simple_resource *res; -+ -+ QTAILQ_FOREACH(res, &g->reslist, next) { -+ if (res->resource_id == resource_id) { -+ return res; -+ } -+ } -+ return NULL; -+} -+ -+void -+vg_ctrl_response(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd, -+ struct virtio_gpu_ctrl_hdr *resp, -+ size_t resp_len) -+{ -+ size_t s; -+ -+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) { -+ resp->flags |= VIRTIO_GPU_FLAG_FENCE; -+ resp->fence_id = cmd->cmd_hdr.fence_id; -+ resp->ctx_id = cmd->cmd_hdr.ctx_id; -+ } -+ virtio_gpu_ctrl_hdr_bswap(resp); -+ s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len); -+ if (s != resp_len) { -+ g_critical("%s: response size incorrect %zu vs %zu", -+ __func__, s, resp_len); -+ } -+ vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s); -+ vu_queue_notify(&g->dev.parent, cmd->vq); -+ cmd->finished = true; -+} -+ -+void -+vg_ctrl_response_nodata(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd, -+ enum virtio_gpu_ctrl_type type) -+{ -+ struct virtio_gpu_ctrl_hdr resp = { -+ .type = type, -+ }; -+ -+ vg_ctrl_response(g, cmd, &resp, sizeof(resp)); -+} -+ -+void -+vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_resp_display_info dpy_info = { {} }; -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_GET_DISPLAY_INFO, -+ .size = 0, -+ }; -+ -+ assert(vg->wait_ok == 0); -+ -+ vg_send_msg(vg, &msg, -1); -+ if (!vg_recv_msg(vg, msg.request, sizeof(dpy_info), &dpy_info)) { -+ return; -+ } -+ -+ vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info)); -+} -+ -+static void -+vg_resource_create_2d(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ pixman_format_code_t pformat; -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_create_2d c2d; -+ -+ VUGPU_FILL_CMD(c2d); -+ virtio_gpu_bswap_32(&c2d, sizeof(c2d)); -+ -+ if (c2d.resource_id == 0) { -+ g_critical("%s: resource id 0 is not allowed", __func__); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ res = virtio_gpu_find_resource(g, c2d.resource_id); -+ if (res) { -+ g_critical("%s: resource already exists %d", __func__, c2d.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ res = g_new0(struct virtio_gpu_simple_resource, 1); -+ res->width = c2d.width; -+ res->height = c2d.height; -+ res->format = c2d.format; -+ res->resource_id = c2d.resource_id; -+ -+ pformat = virtio_gpu_get_pixman_format(c2d.format); -+ if (!pformat) { -+ g_critical("%s: host couldn't handle guest format %d", -+ __func__, c2d.format); -+ g_free(res); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height); -+ res->image = pixman_image_create_bits(pformat, -+ c2d.width, -+ c2d.height, -+ (uint32_t *)res->buffer.mmap, -+ res->buffer.stride); -+ if (!res->image) { -+ g_critical("%s: resource creation failed %d %d %d", -+ __func__, c2d.resource_id, c2d.width, c2d.height); -+ g_free(res); -+ cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ QTAILQ_INSERT_HEAD(&g->reslist, res, next); -+} -+ -+static void -+vg_disable_scanout(VuGpu *g, int scanout_id) -+{ -+ struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id]; -+ struct virtio_gpu_simple_resource *res; -+ -+ if (scanout->resource_id == 0) { -+ return; -+ } -+ -+ res = virtio_gpu_find_resource(g, scanout->resource_id); -+ if (res) { -+ res->scanout_bitmask &= ~(1 << scanout_id); -+ } -+ -+ scanout->width = 0; -+ scanout->height = 0; -+ -+ if (g->sock_fd >= 0) { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_SCANOUT, -+ .size = sizeof(VhostUserGpuScanout), -+ .payload.scanout.scanout_id = scanout_id, -+ }; -+ vg_send_msg(g, &msg, -1); -+ } -+} -+ -+static void -+vg_resource_destroy(VuGpu *g, -+ struct virtio_gpu_simple_resource *res) -+{ -+ int i; -+ -+ if (res->scanout_bitmask) { -+ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -+ if (res->scanout_bitmask & (1 << i)) { -+ vg_disable_scanout(g, i); -+ } -+ } -+ } -+ -+ vugbm_buffer_destroy(&res->buffer); -+ pixman_image_unref(res->image); -+ QTAILQ_REMOVE(&g->reslist, res, next); -+ g_free(res); -+} -+ -+static void -+vg_resource_unref(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_unref unref; -+ -+ VUGPU_FILL_CMD(unref); -+ virtio_gpu_bswap_32(&unref, sizeof(unref)); -+ -+ res = virtio_gpu_find_resource(g, unref.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, unref.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ vg_resource_destroy(g, res); -+} -+ -+int -+vg_create_mapping_iov(VuGpu *g, -+ struct virtio_gpu_resource_attach_backing *ab, -+ struct virtio_gpu_ctrl_command *cmd, -+ struct iovec **iov) -+{ -+ struct virtio_gpu_mem_entry *ents; -+ size_t esize, s; -+ int i; -+ -+ if (ab->nr_entries > 16384) { -+ g_critical("%s: nr_entries is too big (%d > 16384)", -+ __func__, ab->nr_entries); -+ return -1; -+ } -+ -+ esize = sizeof(*ents) * ab->nr_entries; -+ ents = g_malloc(esize); -+ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -+ sizeof(*ab), ents, esize); -+ if (s != esize) { -+ g_critical("%s: command data size incorrect %zu vs %zu", -+ __func__, s, esize); -+ g_free(ents); -+ return -1; -+ } -+ -+ *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); -+ for (i = 0; i < ab->nr_entries; i++) { -+ uint64_t len = ents[i].length; -+ (*iov)[i].iov_len = ents[i].length; -+ (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr); -+ if (!(*iov)[i].iov_base || len != ents[i].length) { -+ g_critical("%s: resource %d element %d", -+ __func__, ab->resource_id, i); -+ g_free(*iov); -+ g_free(ents); -+ *iov = NULL; -+ return -1; -+ } -+ } -+ g_free(ents); -+ return 0; -+} -+ -+static void -+vg_resource_attach_backing(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_attach_backing ab; -+ int ret; -+ -+ VUGPU_FILL_CMD(ab); -+ virtio_gpu_bswap_32(&ab, sizeof(ab)); -+ -+ res = virtio_gpu_find_resource(g, ab.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, ab.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); -+ if (ret != 0) { -+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -+ return; -+ } -+ -+ res->iov_cnt = ab.nr_entries; -+} -+ -+static void -+vg_resource_detach_backing(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_detach_backing detach; -+ -+ VUGPU_FILL_CMD(detach); -+ virtio_gpu_bswap_32(&detach, sizeof(detach)); -+ -+ res = virtio_gpu_find_resource(g, detach.resource_id); -+ if (!res || !res->iov) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, detach.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ g_free(res->iov); -+ res->iov = NULL; -+ res->iov_cnt = 0; -+} -+ -+static void -+vg_transfer_to_host_2d(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ int h; -+ uint32_t src_offset, dst_offset, stride; -+ int bpp; -+ pixman_format_code_t format; -+ struct virtio_gpu_transfer_to_host_2d t2d; -+ -+ VUGPU_FILL_CMD(t2d); -+ virtio_gpu_t2d_bswap(&t2d); -+ -+ res = virtio_gpu_find_resource(g, t2d.resource_id); -+ if (!res || !res->iov) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, t2d.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ if (t2d.r.x > res->width || -+ t2d.r.y > res->height || -+ t2d.r.width > res->width || -+ t2d.r.height > res->height || -+ t2d.r.x + t2d.r.width > res->width || -+ t2d.r.y + t2d.r.height > res->height) { -+ g_critical("%s: transfer bounds outside resource" -+ " bounds for resource %d: %d %d %d %d vs %d %d", -+ __func__, t2d.resource_id, t2d.r.x, t2d.r.y, -+ t2d.r.width, t2d.r.height, res->width, res->height); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ -+ format = pixman_image_get_format(res->image); -+ bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8; -+ stride = pixman_image_get_stride(res->image); -+ -+ if (t2d.offset || t2d.r.x || t2d.r.y || -+ t2d.r.width != pixman_image_get_width(res->image)) { -+ void *img_data = pixman_image_get_data(res->image); -+ for (h = 0; h < t2d.r.height; h++) { -+ src_offset = t2d.offset + stride * h; -+ dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); -+ -+ iov_to_buf(res->iov, res->iov_cnt, src_offset, -+ img_data -+ + dst_offset, t2d.r.width * bpp); -+ } -+ } else { -+ iov_to_buf(res->iov, res->iov_cnt, 0, -+ pixman_image_get_data(res->image), -+ pixman_image_get_stride(res->image) -+ * pixman_image_get_height(res->image)); -+ } -+} -+ -+static void -+vg_set_scanout(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res, *ores; -+ struct virtio_gpu_scanout *scanout; -+ struct virtio_gpu_set_scanout ss; -+ int fd; -+ -+ VUGPU_FILL_CMD(ss); -+ virtio_gpu_bswap_32(&ss, sizeof(ss)); -+ -+ if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) { -+ g_critical("%s: illegal scanout id specified %d", -+ __func__, ss.scanout_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID; -+ return; -+ } -+ -+ if (ss.resource_id == 0) { -+ vg_disable_scanout(g, ss.scanout_id); -+ return; -+ } -+ -+ /* create a surface for this scanout */ -+ res = virtio_gpu_find_resource(g, ss.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d", -+ __func__, ss.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ if (ss.r.x > res->width || -+ ss.r.y > res->height || -+ ss.r.width > res->width || -+ ss.r.height > res->height || -+ ss.r.x + ss.r.width > res->width || -+ ss.r.y + ss.r.height > res->height) { -+ g_critical("%s: illegal scanout %d bounds for" -+ " resource %d, (%d,%d)+%d,%d vs %d %d", -+ __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y, -+ ss.r.width, ss.r.height, res->width, res->height); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ -+ scanout = &g->scanout[ss.scanout_id]; -+ -+ ores = virtio_gpu_find_resource(g, scanout->resource_id); -+ if (ores) { -+ ores->scanout_bitmask &= ~(1 << ss.scanout_id); -+ } -+ -+ res->scanout_bitmask |= (1 << ss.scanout_id); -+ scanout->resource_id = ss.resource_id; -+ scanout->x = ss.r.x; -+ scanout->y = ss.r.y; -+ scanout->width = ss.r.width; -+ scanout->height = ss.r.height; -+ -+ struct vugbm_buffer *buffer = &res->buffer; -+ -+ if (vugbm_buffer_can_get_dmabuf_fd(buffer)) { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_DMABUF_SCANOUT, -+ .size = sizeof(VhostUserGpuDMABUFScanout), -+ .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) { -+ .scanout_id = ss.scanout_id, -+ .x = ss.r.x, -+ .y = ss.r.y, -+ .width = ss.r.width, -+ .height = ss.r.height, -+ .fd_width = buffer->width, -+ .fd_height = buffer->height, -+ .fd_stride = buffer->stride, -+ .fd_drm_fourcc = buffer->format -+ } -+ }; -+ -+ if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) { -+ vg_send_msg(g, &msg, fd); -+ close(fd); -+ } -+ } else { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_SCANOUT, -+ .size = sizeof(VhostUserGpuScanout), -+ .payload.scanout = (VhostUserGpuScanout) { -+ .scanout_id = ss.scanout_id, -+ .width = scanout->width, -+ .height = scanout->height -+ } -+ }; -+ vg_send_msg(g, &msg, -1); -+ } -+} -+ -+static void -+vg_resource_flush(VuGpu *g, -+ struct virtio_gpu_ctrl_command *cmd) -+{ -+ struct virtio_gpu_simple_resource *res; -+ struct virtio_gpu_resource_flush rf; -+ pixman_region16_t flush_region; -+ int i; -+ -+ VUGPU_FILL_CMD(rf); -+ virtio_gpu_bswap_32(&rf, sizeof(rf)); -+ -+ res = virtio_gpu_find_resource(g, rf.resource_id); -+ if (!res) { -+ g_critical("%s: illegal resource specified %d\n", -+ __func__, rf.resource_id); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; -+ return; -+ } -+ -+ if (rf.r.x > res->width || -+ rf.r.y > res->height || -+ rf.r.width > res->width || -+ rf.r.height > res->height || -+ rf.r.x + rf.r.width > res->width || -+ rf.r.y + rf.r.height > res->height) { -+ g_critical("%s: flush bounds outside resource" -+ " bounds for resource %d: %d %d %d %d vs %d %d\n", -+ __func__, rf.resource_id, rf.r.x, rf.r.y, -+ rf.r.width, rf.r.height, res->width, res->height); -+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; -+ return; -+ } -+ -+ pixman_region_init_rect(&flush_region, -+ rf.r.x, rf.r.y, rf.r.width, rf.r.height); -+ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) { -+ struct virtio_gpu_scanout *scanout; -+ pixman_region16_t region, finalregion; -+ pixman_box16_t *extents; -+ -+ if (!(res->scanout_bitmask & (1 << i))) { -+ continue; -+ } -+ scanout = &g->scanout[i]; -+ -+ pixman_region_init(&finalregion); -+ pixman_region_init_rect(®ion, scanout->x, scanout->y, -+ scanout->width, scanout->height); -+ -+ pixman_region_intersect(&finalregion, &flush_region, ®ion); -+ -+ extents = pixman_region_extents(&finalregion); -+ size_t width = extents->x2 - extents->x1; -+ size_t height = extents->y2 - extents->y1; -+ -+ if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) { -+ VhostUserGpuMsg vmsg = { -+ .request = VHOST_USER_GPU_DMABUF_UPDATE, -+ .size = sizeof(VhostUserGpuUpdate), -+ .payload.update = (VhostUserGpuUpdate) { -+ .scanout_id = i, -+ .x = extents->x1, -+ .y = extents->y1, -+ .width = width, -+ .height = height, -+ } -+ }; -+ vg_send_msg(g, &vmsg, -1); -+ vg_wait_ok(g); -+ } else { -+ size_t bpp = -+ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8; -+ size_t size = width * height * bpp; -+ -+ void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE + -+ sizeof(VhostUserGpuUpdate) + size); -+ VhostUserGpuMsg *msg = p; -+ msg->request = VHOST_USER_GPU_UPDATE; -+ msg->size = sizeof(VhostUserGpuUpdate) + size; -+ msg->payload.update = (VhostUserGpuUpdate) { -+ .scanout_id = i, -+ .x = extents->x1, -+ .y = extents->y1, -+ .width = width, -+ .height = height, -+ }; -+ pixman_image_t *i = -+ pixman_image_create_bits(pixman_image_get_format(res->image), -+ msg->payload.update.width, -+ msg->payload.update.height, -+ p + offsetof(VhostUserGpuMsg, -+ payload.update.data), -+ width * bpp); -+ pixman_image_composite(PIXMAN_OP_SRC, -+ res->image, NULL, i, -+ extents->x1, extents->y1, -+ 0, 0, 0, 0, -+ width, height); -+ pixman_image_unref(i); -+ vg_send_msg(g, msg, -1); -+ g_free(msg); -+ } -+ pixman_region_fini(®ion); -+ pixman_region_fini(&finalregion); -+ } -+ pixman_region_fini(&flush_region); -+} -+ -+static void -+vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) -+{ -+ switch (cmd->cmd_hdr.type) { -+ case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: -+ vg_get_display_info(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: -+ vg_resource_create_2d(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_UNREF: -+ vg_resource_unref(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_FLUSH: -+ vg_resource_flush(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: -+ vg_transfer_to_host_2d(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_SET_SCANOUT: -+ vg_set_scanout(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING: -+ vg_resource_attach_backing(vg, cmd); -+ break; -+ case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: -+ vg_resource_detach_backing(vg, cmd); -+ break; -+ /* case VIRTIO_GPU_CMD_GET_EDID: */ -+ /* break */ -+ default: -+ g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); -+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; -+ break; -+ } -+ if (!cmd->finished) { -+ vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error : -+ VIRTIO_GPU_RESP_OK_NODATA); -+ } -+} -+ -+static void -+vg_handle_ctrl(VuDev *dev, int qidx) -+{ -+ VuGpu *vg = container_of(dev, VuGpu, dev.parent); -+ VuVirtq *vq = vu_get_queue(dev, qidx); -+ struct virtio_gpu_ctrl_command *cmd = NULL; -+ size_t len; -+ -+ for (;;) { -+ if (vg->wait_ok != 0) { -+ return; -+ } -+ -+ cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command)); -+ if (!cmd) { -+ break; -+ } -+ cmd->vq = vq; -+ cmd->error = 0; -+ cmd->finished = false; -+ -+ len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, -+ 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr)); -+ if (len != sizeof(cmd->cmd_hdr)) { -+ g_warning("%s: command size incorrect %zu vs %zu\n", -+ __func__, len, sizeof(cmd->cmd_hdr)); -+ } -+ -+ virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr); -+ g_debug("%d %s\n", cmd->cmd_hdr.type, -+ vg_cmd_to_string(cmd->cmd_hdr.type)); -+ -+ if (vg->virgl) { -+ vg_virgl_process_cmd(vg, cmd); -+ } else { -+ vg_process_cmd(vg, cmd); -+ } -+ -+ if (!cmd->finished) { -+ QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next); -+ vg->inflight++; -+ } else { -+ g_free(cmd); -+ } -+ } -+} -+ -+static void -+update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data) -+{ -+ struct virtio_gpu_simple_resource *res; -+ -+ res = virtio_gpu_find_resource(g, resource_id); -+ g_return_if_fail(res != NULL); -+ g_return_if_fail(pixman_image_get_width(res->image) == 64); -+ g_return_if_fail(pixman_image_get_height(res->image) == 64); -+ g_return_if_fail( -+ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32); -+ -+ memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t)); -+} -+ -+static void -+vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor) -+{ -+ bool move = cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR; -+ -+ g_debug("%s move:%d\n", G_STRFUNC, move); -+ -+ if (move) { -+ VhostUserGpuMsg msg = { -+ .request = cursor->resource_id ? -+ VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE, -+ .size = sizeof(VhostUserGpuCursorPos), -+ .payload.cursor_pos = { -+ .scanout_id = cursor->pos.scanout_id, -+ .x = cursor->pos.x, -+ .y = cursor->pos.y, -+ } -+ }; -+ vg_send_msg(g, &msg, -1); -+ } else { -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_CURSOR_UPDATE, -+ .size = sizeof(VhostUserGpuCursorUpdate), -+ .payload.cursor_update = { -+ .pos = { -+ .scanout_id = cursor->pos.scanout_id, -+ .x = cursor->pos.x, -+ .y = cursor->pos.y, -+ }, -+ .hot_x = cursor->hot_x, -+ .hot_y = cursor->hot_y, -+ } -+ }; -+ if (g->virgl) { -+ vg_virgl_update_cursor_data(g, cursor->resource_id, -+ msg.payload.cursor_update.data); -+ } else { -+ update_cursor_data_simple(g, cursor->resource_id, -+ msg.payload.cursor_update.data); -+ } -+ vg_send_msg(g, &msg, -1); -+ } -+} -+ -+static void -+vg_handle_cursor(VuDev *dev, int qidx) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ VuVirtq *vq = vu_get_queue(dev, qidx); -+ VuVirtqElement *elem; -+ size_t len; -+ struct virtio_gpu_update_cursor cursor; -+ -+ for (;;) { -+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); -+ if (!elem) { -+ break; -+ } -+ g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num); -+ -+ len = iov_to_buf(elem->out_sg, elem->out_num, -+ 0, &cursor, sizeof(cursor)); -+ if (len != sizeof(cursor)) { -+ g_warning("%s: cursor size incorrect %zu vs %zu\n", -+ __func__, len, sizeof(cursor)); -+ } else { -+ virtio_gpu_bswap_32(&cursor, sizeof(cursor)); -+ vg_process_cursor_cmd(g, &cursor); -+ } -+ vu_queue_push(dev, vq, elem, 0); -+ vu_queue_notify(dev, vq); -+ g_free(elem); -+ } -+} -+ -+static void -+vg_panic(VuDev *dev, const char *msg) -+{ -+ g_critical("%s\n", msg); -+ exit(1); -+} -+ -+static void -+vg_queue_set_started(VuDev *dev, int qidx, bool started) -+{ -+ VuVirtq *vq = vu_get_queue(dev, qidx); -+ -+ g_debug("queue started %d:%d\n", qidx, started); -+ -+ switch (qidx) { -+ case 0: -+ vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL); -+ break; -+ case 1: -+ vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL); -+ break; -+ default: -+ break; -+ } -+} -+ -+static void -+set_gpu_protocol_features(VuGpu *g) -+{ -+ uint64_t u64; -+ VhostUserGpuMsg msg = { -+ .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES -+ }; -+ -+ assert(g->wait_ok == 0); -+ vg_send_msg(g, &msg, -1); -+ if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { -+ return; -+ } -+ -+ msg = (VhostUserGpuMsg) { -+ .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, -+ .size = sizeof(uint64_t), -+ .payload.u64 = 0 -+ }; -+ vg_send_msg(g, &msg, -1); -+} -+ -+static int -+vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ -+ switch (msg->request) { -+ case VHOST_USER_GPU_SET_SOCKET: { -+ g_return_val_if_fail(msg->fd_num == 1, 1); -+ g_return_val_if_fail(g->sock_fd == -1, 1); -+ g->sock_fd = msg->fds[0]; -+ set_gpu_protocol_features(g); -+ return 1; -+ } -+ default: -+ return 0; -+ } -+ -+ return 0; -+} -+ -+static uint64_t -+vg_get_features(VuDev *dev) -+{ -+ uint64_t features = 0; -+ -+ if (opt_virgl) { -+ features |= 1 << VIRTIO_GPU_F_VIRGL; -+ } -+ -+ return features; -+} -+ -+static void -+vg_set_features(VuDev *dev, uint64_t features) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL); -+ -+ if (virgl && !g->virgl_inited) { -+ if (!vg_virgl_init(g)) { -+ vg_panic(dev, "Failed to initialize virgl"); -+ } -+ g->virgl_inited = true; -+ } -+ -+ g->virgl = virgl; -+} -+ -+static int -+vg_get_config(VuDev *dev, uint8_t *config, uint32_t len) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ -+ g_return_val_if_fail(len <= sizeof(struct virtio_gpu_config), -1); -+ -+ if (opt_virgl) { -+ g->virtio_config.num_capsets = vg_virgl_get_num_capsets(); -+ } -+ -+ memcpy(config, &g->virtio_config, len); -+ -+ return 0; -+} -+ -+static int -+vg_set_config(VuDev *dev, const uint8_t *data, -+ uint32_t offset, uint32_t size, -+ uint32_t flags) -+{ -+ VuGpu *g = container_of(dev, VuGpu, dev.parent); -+ struct virtio_gpu_config *config = (struct virtio_gpu_config *)data; -+ -+ if (config->events_clear) { -+ g->virtio_config.events_read &= ~config->events_clear; -+ } -+ -+ return 0; -+} -+ -+static const VuDevIface vuiface = { -+ .set_features = vg_set_features, -+ .get_features = vg_get_features, -+ .queue_set_started = vg_queue_set_started, -+ .process_msg = vg_process_msg, -+ .get_config = vg_get_config, -+ .set_config = vg_set_config, -+}; -+ -+static void -+vg_destroy(VuGpu *g) -+{ -+ struct virtio_gpu_simple_resource *res, *tmp; -+ -+ vug_deinit(&g->dev); -+ -+ vg_sock_fd_close(g); -+ -+ QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) { -+ vg_resource_destroy(g, res); -+ } -+ -+ vugbm_device_destroy(&g->gdev); -+} -+ -+static GOptionEntry entries[] = { -+ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps, -+ "Print capabilities", NULL }, -+ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum, -+ "Use inherited fd socket", "FDNUM" }, -+ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path, -+ "Use UNIX socket path", "PATH" }, -+ { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node, -+ "Specify DRM render node", "PATH" }, -+ { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl, -+ "Turn virgl rendering on", NULL }, -+ { NULL, } -+}; -+ -+int -+main(int argc, char *argv[]) -+{ -+ GOptionContext *context; -+ GError *error = NULL; -+ GMainLoop *loop = NULL; -+ int fd; -+ VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 }; -+ -+ QTAILQ_INIT(&g.reslist); -+ QTAILQ_INIT(&g.fenceq); -+ -+ context = g_option_context_new("QEMU vhost-user-gpu"); -+ g_option_context_add_main_entries(context, entries, NULL); -+ if (!g_option_context_parse(context, &argc, &argv, &error)) { -+ g_printerr("Option parsing failed: %s\n", error->message); -+ exit(EXIT_FAILURE); -+ } -+ g_option_context_free(context); -+ -+ if (opt_print_caps) { -+ g_print("{\n"); -+ g_print(" \"type\": \"gpu\",\n"); -+ g_print(" \"features\": [\n"); -+ g_print(" \"render-node\",\n"); -+ g_print(" \"virgl\"\n"); -+ g_print(" ]\n"); -+ g_print("}\n"); -+ exit(EXIT_SUCCESS); -+ } -+ -+ g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node); -+ if (opt_render_node && g.drm_rnode_fd == -1) { -+ g_printerr("Failed to open DRM rendernode.\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (g.drm_rnode_fd >= 0) { -+ if (!vugbm_device_init(&g.gdev, g.drm_rnode_fd)) { -+ g_warning("Failed to init DRM device, using fallback path"); -+ } -+ } -+ -+ if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) { -+ g_printerr("Please specify either --fd or --socket-path\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (opt_socket_path) { -+ int lsock = unix_listen(opt_socket_path, &error_fatal); -+ if (lsock < 0) { -+ g_printerr("Failed to listen on %s.\n", opt_socket_path); -+ exit(EXIT_FAILURE); -+ } -+ fd = accept(lsock, NULL, NULL); -+ close(lsock); -+ } else { -+ fd = opt_fdnum; -+ } -+ if (fd == -1) { -+ g_printerr("Invalid vhost-user socket.\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) { -+ g_printerr("Failed to initialize libvhost-user-glib.\n"); -+ exit(EXIT_FAILURE); -+ } -+ -+ loop = g_main_loop_new(NULL, FALSE); -+ g_main_loop_run(loop); -+ g_main_loop_unref(loop); -+ -+ vg_destroy(&g); -+ if (g.drm_rnode_fd >= 0) { -+ close(g.drm_rnode_fd); -+ } -+ -+ return 0; -+} --- -1.8.3.1 - diff --git a/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch b/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch deleted file mode 100644 index 4212f1c..0000000 --- a/kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 548de8acbf0137b6e49a14b63682badfff037d23 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:44 +0100 -Subject: [PATCH 073/116] contrib/libvhost-user: Protect slave fd with mutex -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-70-dgilbert@redhat.com> -Patchwork-id: 93523 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 069/112] contrib/libvhost-user: Protect slave fd with mutex -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -In future patches we'll be performing commands on the slave-fd driven -by commands on queues, since those queues will be driven by individual -threads we need to make sure they don't attempt to use the slave-fd -for multiple commands in parallel. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c25c02b9e6a196be87a818f459c426556b24770d) -Signed-off-by: Miroslav Rezanina ---- - contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++++++++++---- - contrib/libvhost-user/libvhost-user.h | 3 +++ - 2 files changed, 23 insertions(+), 4 deletions(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index ec27b78..63e4106 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -392,26 +392,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) - return vu_message_write(dev, conn_fd, vmsg); - } - -+/* -+ * Processes a reply on the slave channel. -+ * Entered with slave_mutex held and releases it before exit. -+ * Returns true on success. -+ */ - static bool - vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) - { - VhostUserMsg msg_reply; -+ bool result = false; - - if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { -- return true; -+ result = true; -+ goto out; - } - - if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { -- return false; -+ goto out; - } - - if (msg_reply.request != vmsg->request) { - DPRINT("Received unexpected msg type. Expected %d received %d", - vmsg->request, msg_reply.request); -- return false; -+ goto out; - } - -- return msg_reply.payload.u64 == 0; -+ result = msg_reply.payload.u64 == 0; -+ -+out: -+ pthread_mutex_unlock(&dev->slave_mutex); -+ return result; - } - - /* Kick the log_call_fd if required. */ -@@ -1105,10 +1116,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, - return false; - } - -+ pthread_mutex_lock(&dev->slave_mutex); - if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { -+ pthread_mutex_unlock(&dev->slave_mutex); - return false; - } - -+ /* Also unlocks the slave_mutex */ - return vu_process_message_reply(dev, &vmsg); - } - -@@ -1628,6 +1642,7 @@ vu_deinit(VuDev *dev) - close(dev->slave_fd); - dev->slave_fd = -1; - } -+ pthread_mutex_destroy(&dev->slave_mutex); - - if (dev->sock != -1) { - close(dev->sock); -@@ -1663,6 +1678,7 @@ vu_init(VuDev *dev, - dev->remove_watch = remove_watch; - dev->iface = iface; - dev->log_call_fd = -1; -+ pthread_mutex_init(&dev->slave_mutex, NULL); - dev->slave_fd = -1; - dev->max_queues = max_queues; - -diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h -index 46b6007..1844b6f 100644 ---- a/contrib/libvhost-user/libvhost-user.h -+++ b/contrib/libvhost-user/libvhost-user.h -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include "standard-headers/linux/virtio_ring.h" - - /* Based on qemu/hw/virtio/vhost-user.c */ -@@ -355,6 +356,8 @@ struct VuDev { - VuVirtq *vq; - VuDevInflightInfo inflight_info; - int log_call_fd; -+ /* Must be held while using slave_fd */ -+ pthread_mutex_t slave_mutex; - int slave_fd; - uint64_t log_size; - uint8_t *log_table; --- -1.8.3.1 - diff --git a/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch b/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch deleted file mode 100644 index a6177c6..0000000 --- a/kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch +++ /dev/null @@ -1,56 +0,0 @@ -From f01178897c8f5ff98692a22059dd65e35677eaa3 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Mon, 10 Feb 2020 17:33:58 +0000 -Subject: [PATCH 18/18] docs/arm-cpu-features: Make kvm-no-adjvtime comment - clearer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200210173358.16896-3-drjones@redhat.com> -Patchwork-id: 93772 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer -Bugzilla: 1801320 -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan -RH-Acked-by: Philippe Mathieu-Daudé - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 - -Author: Philippe Mathieu-Daudé -Date: Fri, 07 Feb 2020 14:04:28 +0000 - - docs/arm-cpu-features: Make kvm-no-adjvtime comment clearer - - The bold text sounds like 'knock knock'. Only bolding the - second 'not' makes it easier to read. - - Fixes: dea101a1ae - Signed-off-by: Philippe Mathieu-Daudé - Reviewed-by: Andrew Jones - Message-id: 20200206225148.23923-1-philmd@redhat.com - Signed-off-by: Peter Maydell - -(cherry picked from commit fa3236a970b6ea5be3fa3ad258f1a75920ca1ebb) -Signed-off-by: Danilo C. L. de Paula ---- - docs/arm-cpu-features.rst | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst -index 45d1eb6..48d5054 100644 ---- a/docs/arm-cpu-features.rst -+++ b/docs/arm-cpu-features.rst -@@ -185,7 +185,7 @@ the list of KVM VCPU features and their descriptions. - - kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This - means that by default the virtual time -- adjustment is enabled (vtime is *not not* -+ adjustment is enabled (vtime is not *not* - adjusted). - - When virtual time adjustment is enabled each --- -1.8.3.1 - diff --git a/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch b/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch deleted file mode 100644 index 5d44708..0000000 --- a/kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 5770fe43fe1e15e6f53cfd3705605e8645b95a98 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 13 Mar 2020 17:17:08 +0000 -Subject: [PATCH 20/20] exec/rom_reset: Free rom data during inmigrate skip -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200313171708.242774-1-dgilbert@redhat.com> -Patchwork-id: 94292 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] exec/rom_reset: Free rom data during inmigrate skip -Bugzilla: 1809380 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Paolo Bonzini - -From: "Dr. David Alan Gilbert" - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=1809380 -brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27249921 -branch: rhel-av-8.2.0 -upstream: Posted and with review-by, not merged yet - -Commit 355477f8c73e9 skips rom reset when we're an incoming migration -so as not to overwrite shared ram in the ignore-shared migration -optimisation. -However, it's got an unexpected side effect that because it skips -freeing the ROM data, when rom_reset gets called later on, after -migration (e.g. during a reboot), the ROM does get reset to the original -file contents. Because of seabios/x86's weird reboot process -this confuses a reboot into hanging after a migration. - -Fixes: 355477f8c73e9 ("migration: do not rom_reset() during incoming migration") -https://bugzilla.redhat.com/show_bug.cgi?id=1809380 - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/loader.c | 25 ++++++++++++++++--------- - 1 file changed, 16 insertions(+), 9 deletions(-) - -diff --git a/hw/core/loader.c b/hw/core/loader.c -index 5099f27..375b29b 100644 ---- a/hw/core/loader.c -+++ b/hw/core/loader.c -@@ -1118,19 +1118,26 @@ static void rom_reset(void *unused) - { - Rom *rom; - -- /* -- * We don't need to fill in the RAM with ROM data because we'll fill -- * the data in during the next incoming migration in all cases. Note -- * that some of those RAMs can actually be modified by the guest on ARM -- * so this is probably the only right thing to do here. -- */ -- if (runstate_check(RUN_STATE_INMIGRATE)) -- return; -- - QTAILQ_FOREACH(rom, &roms, next) { - if (rom->fw_file) { - continue; - } -+ /* -+ * We don't need to fill in the RAM with ROM data because we'll fill -+ * the data in during the next incoming migration in all cases. Note -+ * that some of those RAMs can actually be modified by the guest. -+ */ -+ if (runstate_check(RUN_STATE_INMIGRATE)) { -+ if (rom->data && rom->isrom) { -+ /* -+ * Free it so that a rom_reset after migration doesn't -+ * overwrite a potentially modified 'rom'. -+ */ -+ rom_free_data(rom); -+ } -+ continue; -+ } -+ - if (rom->data == NULL) { - continue; - } --- -1.8.3.1 - diff --git a/kvm-file-posix-Drop-hdev_co_create_opts.patch b/kvm-file-posix-Drop-hdev_co_create_opts.patch deleted file mode 100644 index ea2edbd..0000000 --- a/kvm-file-posix-Drop-hdev_co_create_opts.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 3d3509c010129bd15eb1f5ec1a7b9eedcdbf23f6 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:44 +0000 -Subject: [PATCH 03/20] file-posix: Drop hdev_co_create_opts() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-4-mlevitsk@redhat.com> -Patchwork-id: 94225 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 3/6] file-posix: Drop hdev_co_create_opts() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -The generic fallback implementation effectively does the same. - -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-4-mreitz@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit 87ca3b8fa615b278b33cabf9ed22b3f44b5214ba) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/file-posix.c | 67 ------------------------------------------------------ - 1 file changed, 67 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 1b805bd..fd29372 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3418,67 +3418,6 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs, - return raw_do_pwrite_zeroes(bs, offset, bytes, flags, true); - } - --static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts, -- Error **errp) --{ -- int fd; -- int ret = 0; -- struct stat stat_buf; -- int64_t total_size = 0; -- bool has_prefix; -- -- /* This function is used by both protocol block drivers and therefore either -- * of these prefixes may be given. -- * The return value has to be stored somewhere, otherwise this is an error -- * due to -Werror=unused-value. */ -- has_prefix = -- strstart(filename, "host_device:", &filename) || -- strstart(filename, "host_cdrom:" , &filename); -- -- (void)has_prefix; -- -- ret = raw_normalize_devicepath(&filename, errp); -- if (ret < 0) { -- return ret; -- } -- -- /* Read out options */ -- total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), -- BDRV_SECTOR_SIZE); -- -- fd = qemu_open(filename, O_WRONLY | O_BINARY); -- if (fd < 0) { -- ret = -errno; -- error_setg_errno(errp, -ret, "Could not open device"); -- return ret; -- } -- -- if (fstat(fd, &stat_buf) < 0) { -- ret = -errno; -- error_setg_errno(errp, -ret, "Could not stat device"); -- } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) { -- error_setg(errp, -- "The given file is neither a block nor a character device"); -- ret = -ENODEV; -- } else if (lseek(fd, 0, SEEK_END) < total_size) { -- error_setg(errp, "Device is too small"); -- ret = -ENOSPC; -- } -- -- if (!ret && total_size) { -- uint8_t buf[BDRV_SECTOR_SIZE] = { 0 }; -- int64_t zero_size = MIN(BDRV_SECTOR_SIZE, total_size); -- if (lseek(fd, 0, SEEK_SET) == -1) { -- ret = -errno; -- } else { -- ret = qemu_write_full(fd, buf, zero_size); -- ret = ret == zero_size ? 0 : -errno; -- } -- } -- qemu_close(fd); -- return ret; --} -- - static BlockDriver bdrv_host_device = { - .format_name = "host_device", - .protocol_name = "host_device", -@@ -3491,8 +3430,6 @@ static BlockDriver bdrv_host_device = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -- .bdrv_co_create_opts = hdev_co_create_opts, -- .create_opts = &raw_create_opts, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes, -@@ -3619,8 +3556,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -- .bdrv_co_create_opts = hdev_co_create_opts, -- .create_opts = &raw_create_opts, - .mutable_opts = mutable_opts, - .bdrv_co_invalidate_cache = raw_co_invalidate_cache, - -@@ -3753,8 +3688,6 @@ static BlockDriver bdrv_host_cdrom = { - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, -- .bdrv_co_create_opts = hdev_co_create_opts, -- .create_opts = &raw_create_opts, - .mutable_opts = mutable_opts, - - .bdrv_co_preadv = raw_co_preadv, --- -1.8.3.1 - diff --git a/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch b/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch deleted file mode 100644 index f01dec2..0000000 --- a/kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch +++ /dev/null @@ -1,100 +0,0 @@ -From cebc614e5ddd1f770c4d6dc26c066791f36e56df Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:02 +0000 -Subject: [PATCH 05/18] hmp: Allow using qdev ID for qemu-io command - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-5-kwolf@redhat.com> -Patchwork-id: 93750 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] hmp: Allow using qdev ID for qemu-io command -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -In order to issue requests on an existing BlockBackend with the -'qemu-io' HMP command, allow specifying the BlockBackend not only with a -BlockBackend name, but also with a qdev ID/QOM path for a device that -owns the (possibly anonymous) BlockBackend. - -Because qdev names could be conflicting with BlockBackend and node -names, introduce a -d option to explicitly address a device. If the -option is not given, a BlockBackend or a node is addressed. - -Signed-off-by: Kevin Wolf -(cherry picked from commit 89b6fc45614bb45dcd58f1590415afe5c2791abd) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - hmp-commands.hx | 8 +++++--- - monitor/hmp-cmds.c | 28 ++++++++++++++++++---------- - 2 files changed, 23 insertions(+), 13 deletions(-) - -diff --git a/hmp-commands.hx b/hmp-commands.hx -index cfcc044..dc23185 100644 ---- a/hmp-commands.hx -+++ b/hmp-commands.hx -@@ -1875,9 +1875,11 @@ ETEXI - - { - .name = "qemu-io", -- .args_type = "device:B,command:s", -- .params = "[device] \"[command]\"", -- .help = "run a qemu-io command on a block device", -+ .args_type = "qdev:-d,device:B,command:s", -+ .params = "[-d] [device] \"[command]\"", -+ .help = "run a qemu-io command on a block device\n\t\t\t" -+ "-d: [device] is a device ID rather than a " -+ "drive ID or node name", - .cmd = hmp_qemu_io, - }, - -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index b2551c1..5f8941d 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -2468,23 +2468,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) - { - BlockBackend *blk; - BlockBackend *local_blk = NULL; -+ bool qdev = qdict_get_try_bool(qdict, "qdev", false); - const char* device = qdict_get_str(qdict, "device"); - const char* command = qdict_get_str(qdict, "command"); - Error *err = NULL; - int ret; - -- blk = blk_by_name(device); -- if (!blk) { -- BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); -- if (bs) { -- blk = local_blk = blk_new(bdrv_get_aio_context(bs), -- 0, BLK_PERM_ALL); -- ret = blk_insert_bs(blk, bs, &err); -- if (ret < 0) { -+ if (qdev) { -+ blk = blk_by_qdev_id(device, &err); -+ if (!blk) { -+ goto fail; -+ } -+ } else { -+ blk = blk_by_name(device); -+ if (!blk) { -+ BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); -+ if (bs) { -+ blk = local_blk = blk_new(bdrv_get_aio_context(bs), -+ 0, BLK_PERM_ALL); -+ ret = blk_insert_bs(blk, bs, &err); -+ if (ret < 0) { -+ goto fail; -+ } -+ } else { - goto fail; - } -- } else { -- goto fail; - } - } - --- -1.8.3.1 - diff --git a/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch b/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch deleted file mode 100644 index 0f0f126..0000000 --- a/kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch +++ /dev/null @@ -1,262 +0,0 @@ -From e6c3fbfc82863180007569cf2a9132c28a47bf1f Mon Sep 17 00:00:00 2001 -From: "Daniel P. Berrange" -Date: Mon, 20 Jan 2020 16:13:08 +0000 -Subject: [PATCH 01/18] hw/smbios: set new default SMBIOS fields for Windows - driver support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrange -Message-id: <20200120161308.584989-2-berrange@redhat.com> -Patchwork-id: 93422 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] hw/smbios: set new default SMBIOS fields for Windows driver support -Bugzilla: 1782529 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Igor Mammedov -RH-Acked-by: Laszlo Ersek - -For Windows driver support, we have to follow this doc in order to -enable Windows to automatically determine the right drivers to install -for a given guest / host combination: - - https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer - -Out of the choices available, it was decided that the Windows drivers -will be written to expect use of the scheme documented as "HardwareID-6" -against Windows 10. This uses SMBIOS System (Type 1) and Base Board -(Type 2) tables and will match on - - System Manufacturer = Red Hat - System SKU Number = 8.2.0 - Baseboard Manufacturer = Red Hat - Baseboard Product = RHEL-AV - -The new SMBIOS fields will be tied to machine type and only reported for -pc-q35-8.2.0 machine and later. - -The old SMBIOS fields, previously reported by all machines were: - - System Manufacturer: Red Hat - System Product Name: KVM - System Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - System Family: Red Hat Enterprise Linux - Baseboard Manufacturer: Red Hat - Baseboard Product Name: KVM - Baseboard Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - Chassis Manufacturer: Red Hat - Chassis Product Name: KVM - Chassis Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - Processor Manufacturer: Red Hat - Processor Product Name: KVM - Processor Version: RHEL-8.2.0 PC (Q35 + ICH9, 2009) - -This information will continue to be reported for all machines, except -where it conflicts with the requirement of the new SMBIOS data. IOW, -the "Baseboard Product Name" will change to "RHEL-AV" for pc-q35-8.2.0 -machine types and later. - -Management applications MUST NEVER override the 4 new SMBIOS fields that -are used for Windows driver matching, with differing values. Aside from -this, they are free to override any other field, including those from -the old SMBIOS field data. - -In particular if a management application wants to report its own -product name and version, it is recommended to use "System product" -and "System version" as identifying fields, as these avoid a clash with -the new SMBIOS fields used for Windows drivers. - -Note that until now the Baseboard (type 2) table has only been generated -by QEMU if explicitly asked for on the CLI. This patch makes it always -present for new machine types. - -Signed-off-by: Daniel P. Berrangé -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 2 +- - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 8 ++++++++ - hw/smbios/smbios.c | 45 +++++++++++++++++++++++++++++++++++++++++--- - include/hw/firmware/smbios.h | 5 ++++- - include/hw/i386/pc.h | 3 +++ - 6 files changed, 60 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d30d38c..2dcf6e7 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1423,7 +1423,7 @@ static void virt_build_smbios(VirtMachineState *vms) - - smbios_set_defaults("QEMU", product, - vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, -- true, SMBIOS_ENTRY_POINT_30); -+ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); - - smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len); -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index bd7fdb9..2ac94d5 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, - smbios_set_defaults("Red Hat", "KVM", - mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - SMBIOS_ENTRY_POINT_21); - } - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 7531d8e..e975643 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) - smbios_set_defaults("Red Hat", "KVM", - mc->desc, pcmc->smbios_legacy_mode, - pcmc->smbios_uuid_encoded, -+ pcmc->smbios_stream_product, -+ pcmc->smbios_stream_version, - SMBIOS_ENTRY_POINT_21); - } - -@@ -565,8 +567,11 @@ static void pc_q35_init_rhel820(MachineState *machine) - - static void pc_q35_machine_rhel820_options(MachineClass *m) - { -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_rhel_options(m); - m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL-AV"; -+ pcmc->smbios_stream_version = "8.2.0"; - } - - DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, -@@ -579,9 +584,12 @@ static void pc_q35_init_rhel810(MachineState *machine) - - static void pc_q35_machine_rhel810_options(MachineClass *m) - { -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_rhel820_options(m); - m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; - m->alias = NULL; -+ pcmc->smbios_stream_product = NULL; -+ pcmc->smbios_stream_version = NULL; - compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); - compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); - } -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index e6e9355..d65c149 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -57,6 +57,9 @@ static bool smbios_legacy = true; - static bool smbios_uuid_encoded = true; - /* end: legacy structures & constants for <= 2.0 machines */ - -+/* Set to true for modern Windows 10 HardwareID-6 compat */ -+static bool smbios_type2_required; -+ - - uint8_t *smbios_tables; - size_t smbios_tables_len; -@@ -532,7 +535,7 @@ static void smbios_build_type_1_table(void) - - static void smbios_build_type_2_table(void) - { -- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ -+ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); - - SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); - SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -753,7 +756,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) - - void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type) -+ bool uuid_encoded, -+ const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type) - { - smbios_have_defaults = true; - smbios_legacy = legacy_mode; -@@ -774,12 +780,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, - g_free(smbios_entries); - } - -+ /* -+ * If @stream_product & @stream_version are non-NULL, then -+ * we're following rules for new Windows driver support. -+ * The data we have to report is defined in this doc: -+ * -+ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer -+ * -+ * The Windows drivers are written to expect use of the -+ * scheme documented as "HardwareID-6" against Windows 10, -+ * which uses SMBIOS System (Type 1) and Base Board (Type 2) -+ * tables and will match on -+ * -+ * System Manufacturer = Red Hat (@manufacturer) -+ * System SKU Number = 8.2.0 (@stream_version) -+ * Baseboard Manufacturer = Red Hat (@manufacturer) -+ * Baseboard Product = RHEL-AV (@stream_product) -+ * -+ * NB, SKU must be changed with each RHEL-AV release -+ * -+ * Other fields can be freely used by applications using -+ * QEMU. For example apps can use the "System product" -+ * and "System version" to identify themselves. -+ * -+ * We get 'System Manufacturer' and 'Baseboard Manufacturer' -+ */ - SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type1.product, product); - SMBIOS_SET_DEFAULT(type1.version, version); - SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); -+ if (stream_version != NULL) { -+ SMBIOS_SET_DEFAULT(type1.sku, stream_version); -+ } - SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); -- SMBIOS_SET_DEFAULT(type2.product, product); -+ if (stream_product != NULL) { -+ SMBIOS_SET_DEFAULT(type2.product, stream_product); -+ smbios_type2_required = true; -+ } else { -+ SMBIOS_SET_DEFAULT(type2.product, product); -+ } - SMBIOS_SET_DEFAULT(type2.version, version); - SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); - SMBIOS_SET_DEFAULT(type3.version, version); -diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 02a0ced..67e38a1 100644 ---- a/include/hw/firmware/smbios.h -+++ b/include/hw/firmware/smbios.h -@@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); - void smbios_set_cpuid(uint32_t version, uint32_t features); - void smbios_set_defaults(const char *manufacturer, const char *product, - const char *version, bool legacy_mode, -- bool uuid_encoded, SmbiosEntryPointType ep_type); -+ bool uuid_encoded, -+ const char *stream_product, -+ const char *stream_version, -+ SmbiosEntryPointType ep_type); - uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); - void smbios_get_tables(MachineState *ms, - const struct smbios_phys_mem_area *mem_array, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 2e362c8..b9f29ba 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -109,6 +109,9 @@ typedef struct PCMachineClass { - bool smbios_defaults; - bool smbios_legacy_mode; - bool smbios_uuid_encoded; -+ /* New fields needed for Windows HardwareID-6 matching */ -+ const char *smbios_stream_product; -+ const char *smbios_stream_version; - - /* RAM / address space compat: */ - bool gigabyte_align; --- -1.8.3.1 - diff --git a/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch b/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch deleted file mode 100644 index 5d62ace..0000000 --- a/kvm-i386-Remove-cpu64-rhel6-CPU-model.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 4543a3c19816bd07f27eb900f20ae609df03703c Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Mon, 23 Dec 2019 21:10:31 +0000 -Subject: [PATCH 1/2] i386: Remove cpu64-rhel6 CPU model - -RH-Author: Eduardo Habkost -Message-id: <20191223211031.26503-1-ehabkost@redhat.com> -Patchwork-id: 93213 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] i386: Remove cpu64-rhel6 CPU model -Bugzilla: 1741345 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Laszlo Ersek - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1741345 -BRANCH: rhel-av-8.2.0 -Upstream: not applicable -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25525975 - -We don't provide rhel6 machine types anymore, so we don't need to -provide compatibility with RHEl6. cpu64-rhel6 was documented as -deprecated and scheduled for removal in 8.2, so now it's time to -remove it. - -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 26 +------------------------- - 1 file changed, 1 insertion(+), 25 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 790db77..6dce6f2 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1829,12 +1829,7 @@ static CPUCaches epyc_cache_info = { - - static X86CPUDefinition builtin_x86_defs[] = { - { -- /* qemu64 is the default CPU model for all *-rhel7.* machine-types. -- * The default on RHEL-6 was cpu64-rhel6. -- * libvirt assumes that qemu64 is the default for _all_ machine-types, -- * so we should try to keep qemu64 and cpu64-rhel6 as similar as -- * possible. -- */ -+ /* qemu64 is the default CPU model for all machine-types */ - .name = "qemu64", - .level = 0xd, - .vendor = CPUID_VENDOR_AMD, -@@ -2135,25 +2130,6 @@ static X86CPUDefinition builtin_x86_defs[] = { - .model_id = "Intel(R) Atom(TM) CPU N270 @ 1.60GHz", - }, - { -- .name = "cpu64-rhel6", -- .level = 4, -- .vendor = CPUID_VENDOR_AMD, -- .family = 6, -- .model = 13, -- .stepping = 3, -- .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | -- CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -- CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | -- CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | -- CPUID_PSE | CPUID_DE | CPUID_FP87, -- .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, -- .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, -- .features[FEAT_8000_0001_ECX] = CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -- CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, -- .xlevel = 0x8000000A, -- .model_id = "QEMU Virtual CPU version (cpu64-rhel6)", -- }, -- { - .name = "Conroe", - .level = 10, - .vendor = CPUID_VENDOR_INTEL, --- -1.8.3.1 - diff --git a/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch b/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch deleted file mode 100644 index 1027341..0000000 --- a/kvm-i386-Resolve-CPU-models-to-v1-by-default.patch +++ /dev/null @@ -1,95 +0,0 @@ -From ccda4494b0ea4b81b6b0c3e539a0bcf7e673c68c Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Thu, 5 Dec 2019 21:56:50 +0000 -Subject: [PATCH 01/18] i386: Resolve CPU models to v1 by default - -RH-Author: Eduardo Habkost -Message-id: <20191205225650.772600-2-ehabkost@redhat.com> -Patchwork-id: 92907 -O-Subject: [RHEL-AV-8.1.1 qemu-kvm PATCH 1/1] i386: Resolve CPU models to v1 by default -Bugzilla: 1787291 1779078 1779078 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Igor Mammedov -RH-Acked-by: Paolo Bonzini - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=25187823 -Upstream: submitted, Message-Id: <20191205223339.764534-1-ehabkost@redhat.com> - -When using `query-cpu-definitions` using `-machine none`, -QEMU is resolving all CPU models to their latest versions. The -actual CPU model version being used by another machine type (e.g. -`pc-q35-4.0`) might be different. - -In theory, this was OK because the correct CPU model -version is returned when using the correct `-machine` argument. - -Except that in practice, this breaks libvirt expectations: -libvirt always use `-machine none` when checking if a CPU model -is runnable, because runnability is not expected to be affected -when the machine type is changed. - -For example, when running on a Haswell host without TSX, -Haswell-v4 is runnable, but Haswell-v1 is not. On those hosts, -`query-cpu-definitions` says Haswell is runnable if using -`-machine none`, but Haswell is actually not runnable using any -of the `pc-*` machine types (because they resolve Haswell to -Haswell-v1). In other words, we're breaking the "runnability -guarantee" we promised to not break for a few releases (see -qemu-deprecated.texi). - -To address this issue, change the default CPU model version to v1 -on all machine types, so we make `query-cpu-definitions` output -when using `-machine none` match the results when using `pc-*`. -This will change in the future (the plan is to always return the -latest CPU model version if using `-machine none`), but only -after giving libvirt the opportunity to adapt. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 -Signed-off-by: Eduardo Habkost -Signed-off-by: Danilo C. L. de Paula ---- - qemu-deprecated.texi | 7 +++++++ - target/i386/cpu.c | 8 +++++++- - 2 files changed, 14 insertions(+), 1 deletion(-) - -diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi -index 4b4b742..534ebe9 100644 ---- a/qemu-deprecated.texi -+++ b/qemu-deprecated.texi -@@ -374,6 +374,13 @@ guarantees must resolve the CPU model aliases using te - ``alias-of'' field returned by the ``query-cpu-definitions'' QMP - command. - -+While those guarantees are kept, the return value of -+``query-cpu-definitions'' will have existing CPU model aliases -+point to a version that doesn't break runnability guarantees -+(specifically, version 1 of those CPU models). In future QEMU -+versions, aliases will point to newer CPU model versions -+depending on the machine type, so management software must -+resolve CPU model aliases before starting a virtual machine. - - @node Recently removed features - @appendix Recently removed features -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6dce6f2..863192c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3926,7 +3926,13 @@ static PropValue tcg_default_props[] = { - }; - - --X86CPUVersion default_cpu_version = CPU_VERSION_LATEST; -+/* -+ * We resolve CPU model aliases using -v1 when using "-machine -+ * none", but this is just for compatibility while libvirt isn't -+ * adapted to resolve CPU model versions before creating VMs. -+ * See "Runnability guarantee of CPU models" at * qemu-deprecated.texi. -+ */ -+X86CPUVersion default_cpu_version = 1; - - void x86_cpu_set_default_version(X86CPUVersion version) - { --- -1.8.3.1 - diff --git a/kvm-iotests-Add-iothread-cases-to-155.patch b/kvm-iotests-Add-iothread-cases-to-155.patch deleted file mode 100644 index 24ac90c..0000000 --- a/kvm-iotests-Add-iothread-cases-to-155.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 2366cd9066e79d6c93a3a28710aea987b2c8f454 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:38 +0000 -Subject: [PATCH 18/20] iotests: Add iothread cases to 155 - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-13-kwolf@redhat.com> -Patchwork-id: 94289 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 12/13] iotests: Add iothread cases to 155 -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -This patch adds test cases for attaching the backing chain to a mirror -job target right before finalising the job, where the image is in a -non-mainloop AioContext (i.e. the backing chain needs to be moved to the -AioContext of the mirror target). - -This requires switching the test case from virtio-blk to virtio-scsi -because virtio-blk only actually starts using the iothreads when the -guest driver initialises the device (which never happens in a test case -without a guest OS). virtio-scsi always keeps its block nodes in the -AioContext of the the requested iothread without guest interaction. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-7-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit 6a5f6403a11307794ec79d277a065c137cfc12b2) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/155 | 32 +++++++++++++++++++++++--------- - tests/qemu-iotests/155.out | 4 ++-- - 2 files changed, 25 insertions(+), 11 deletions(-) - -diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 -index 3053e50..b552d1f 100755 ---- a/tests/qemu-iotests/155 -+++ b/tests/qemu-iotests/155 -@@ -49,11 +49,14 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) - # chain opened right away. If False, blockdev-add - # opens it without a backing file and job completion - # is supposed to open the backing chain. -+# use_iothread: If True, an iothread is configured for the virtio-blk device -+# that uses the image being mirrored - - class BaseClass(iotests.QMPTestCase): - target_blockdev_backing = None - target_real_backing = None - target_open_with_backing = True -+ use_iothread = False - - def setUp(self): - qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') -@@ -69,7 +72,16 @@ class BaseClass(iotests.QMPTestCase): - 'file': {'driver': 'file', - 'filename': source_img}} - self.vm.add_blockdev(self.vm.qmp_to_opts(blockdev)) -- self.vm.add_device('virtio-blk,id=qdev0,drive=source') -+ -+ if self.use_iothread: -+ self.vm.add_object('iothread,id=iothread0') -+ iothread = ",iothread=iothread0" -+ else: -+ iothread = "" -+ -+ self.vm.add_device('virtio-scsi%s' % iothread) -+ self.vm.add_device('scsi-hd,id=qdev0,drive=source') -+ - self.vm.launch() - - self.assertIntactSourceBackingChain() -@@ -182,24 +194,21 @@ class MirrorBaseClass(BaseClass): - def testFull(self): - self.runMirror('full') - -- node = self.findBlockNode('target', -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode('target', 'qdev0') - self.assertCorrectBackingImage(node, None) - self.assertIntactSourceBackingChain() - - def testTop(self): - self.runMirror('top') - -- node = self.findBlockNode('target', -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode('target', 'qdev0') - self.assertCorrectBackingImage(node, back2_img) - self.assertIntactSourceBackingChain() - - def testNone(self): - self.runMirror('none') - -- node = self.findBlockNode('target', -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode('target', 'qdev0') - self.assertCorrectBackingImage(node, source_img) - self.assertIntactSourceBackingChain() - -@@ -252,6 +261,9 @@ class TestBlockdevMirrorReopen(MirrorBaseClass): - backing="backing") - self.assert_qmp(result, 'return', {}) - -+class TestBlockdevMirrorReopenIothread(TestBlockdevMirrorReopen): -+ use_iothread = True -+ - # Attach the backing chain only during completion, with blockdev-snapshot - class TestBlockdevMirrorSnapshot(MirrorBaseClass): - cmd = 'blockdev-mirror' -@@ -268,6 +280,9 @@ class TestBlockdevMirrorSnapshot(MirrorBaseClass): - overlay="target") - self.assert_qmp(result, 'return', {}) - -+class TestBlockdevMirrorSnapshotIothread(TestBlockdevMirrorSnapshot): -+ use_iothread = True -+ - class TestCommit(BaseClass): - existing = False - -@@ -283,8 +298,7 @@ class TestCommit(BaseClass): - - self.vm.event_wait('BLOCK_JOB_COMPLETED') - -- node = self.findBlockNode(None, -- '/machine/peripheral/qdev0/virtio-backend') -+ node = self.findBlockNode(None, 'qdev0') - self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename', - back1_img) - self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename', -diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out -index 4fd1c2d..ed714d5 100644 ---- a/tests/qemu-iotests/155.out -+++ b/tests/qemu-iotests/155.out -@@ -1,5 +1,5 @@ --......................... -+............................... - ---------------------------------------------------------------------- --Ran 25 tests -+Ran 31 tests - - OK --- -1.8.3.1 - diff --git a/kvm-iotests-Add-test-for-image-creation-fallback.patch b/kvm-iotests-Add-test-for-image-creation-fallback.patch deleted file mode 100644 index a8ea8f7..0000000 --- a/kvm-iotests-Add-test-for-image-creation-fallback.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 55f3a02574da226299d99bd74d12dd91b0f228dc Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:46 +0000 -Subject: [PATCH 05/20] iotests: Add test for image creation fallback - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-6-mlevitsk@redhat.com> -Patchwork-id: 94228 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Add test for image creation fallback -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-6-mreitz@redhat.com> -Reviewed-by: Eric Blake -Reviewed-by: Maxim Levitsky -[mreitz: Added a note that NBD does not support resizing, which is why - the second case is expected to fail] -Signed-off-by: Max Reitz -(cherry picked from commit 4dddeac115c5a2c5f74731fda0afd031a0b45490) -Signed-off-by: Maxim Levitsky - -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/259 | 62 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/259.out | 14 +++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 77 insertions(+) - create mode 100755 tests/qemu-iotests/259 - create mode 100644 tests/qemu-iotests/259.out - -diff --git a/tests/qemu-iotests/259 b/tests/qemu-iotests/259 -new file mode 100755 -index 0000000..62e29af ---- /dev/null -+++ b/tests/qemu-iotests/259 -@@ -0,0 +1,62 @@ -+#!/usr/bin/env bash -+# -+# Test generic image creation fallback (by using NBD) -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=mreitz@redhat.com -+ -+seq=$(basename $0) -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt raw -+_supported_proto nbd -+_supported_os Linux -+ -+ -+_make_test_img 64M -+ -+echo -+echo '--- Testing creation ---' -+ -+$QEMU_IMG create -f qcow2 "$TEST_IMG" 64M | _filter_img_create -+$QEMU_IMG info "$TEST_IMG" | _filter_img_info -+ -+echo -+echo '--- Testing creation for which the node would need to grow ---' -+ -+# NBD does not support resizing, so this will fail -+$QEMU_IMG create -f qcow2 -o preallocation=metadata "$TEST_IMG" 64M 2>&1 \ -+ | _filter_img_create -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/259.out b/tests/qemu-iotests/259.out -new file mode 100644 -index 0000000..ffed19c ---- /dev/null -+++ b/tests/qemu-iotests/259.out -@@ -0,0 +1,14 @@ -+QA output created by 259 -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 -+ -+--- Testing creation --- -+Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 -+image: TEST_DIR/t.IMGFMT -+file format: qcow2 -+virtual size: 64 MiB (67108864 bytes) -+disk size: unavailable -+ -+--- Testing creation for which the node would need to grow --- -+qemu-img: TEST_DIR/t.IMGFMT: Could not resize image: Image format driver does not support resize -+Formatting 'TEST_DIR/t.IMGFMT', fmt=qcow2 size=67108864 preallocation=metadata -+*** done -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index c0e8197..e47cbfc 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -273,6 +273,7 @@ - 256 rw quick - 257 rw - 258 rw quick -+259 rw auto quick - 260 rw quick - 261 rw - 262 rw quick migration --- -1.8.3.1 - diff --git a/kvm-iotests-Create-VM.blockdev_create.patch b/kvm-iotests-Create-VM.blockdev_create.patch deleted file mode 100644 index 805b31a..0000000 --- a/kvm-iotests-Create-VM.blockdev_create.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 05fedde1374abb180cd2b51457385d8128aa7fe4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:00 +0000 -Subject: [PATCH 03/18] iotests: Create VM.blockdev_create() - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-3-kwolf@redhat.com> -Patchwork-id: 93748 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 2/6] iotests: Create VM.blockdev_create() -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -We have several almost identical copies of a blockdev_create() function -in different test cases. Time to create one unified function in -iotests.py. - -To keep the diff managable, this patch only creates the function and -follow-up patches will convert the individual test cases. - -Signed-off-by: Kevin Wolf -(cherry picked from commit e9dbd1cae86f7cb6f8e470e1485aeb0c6e23ae64) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 3cff671..5741efb 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -638,6 +638,22 @@ class VM(qtest.QEMUQtestMachine): - elif status == 'null': - return error - -+ # Returns None on success, and an error string on failure -+ def blockdev_create(self, options, job_id='job0', filters=None): -+ if filters is None: -+ filters = [filter_qmp_testfiles] -+ result = self.qmp_log('blockdev-create', filters=filters, -+ job_id=job_id, options=options) -+ -+ if 'return' in result: -+ assert result['return'] == {} -+ job_result = self.run_job(job_id) -+ else: -+ job_result = result['error'] -+ -+ log("") -+ return job_result -+ - def enable_migration_events(self, name): - log('Enabling migration QMP events on %s...' % name) - log(self.qmp('migrate-set-capabilities', capabilities=[ --- -1.8.3.1 - diff --git a/kvm-iotests-Fix-run_job-with-use_log-False.patch b/kvm-iotests-Fix-run_job-with-use_log-False.patch deleted file mode 100644 index b105fc2..0000000 --- a/kvm-iotests-Fix-run_job-with-use_log-False.patch +++ /dev/null @@ -1,47 +0,0 @@ -From bb7b968a02c97564596b73d8d080cd745d96ed6b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:35 +0000 -Subject: [PATCH 15/20] iotests: Fix run_job() with use_log=False - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-10-kwolf@redhat.com> -Patchwork-id: 94284 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/13] iotests: Fix run_job() with use_log=False -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -The 'job-complete' QMP command should be run with qmp() rather than -qmp_log() if use_log=False is passed. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-4-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit b31b532122ec6f68d17168449c034d2197bf96ec) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 0c55f7b..46f880c 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -618,7 +618,10 @@ class VM(qtest.QEMUQtestMachine): - if use_log: - log('Job failed: %s' % (j['error'])) - elif status == 'ready': -- self.qmp_log('job-complete', id=job) -+ if use_log: -+ self.qmp_log('job-complete', id=job) -+ else: -+ self.qmp('job-complete', id=job) - elif status == 'pending' and not auto_finalize: - if pre_finalize: - pre_finalize() --- -1.8.3.1 - diff --git a/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch b/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch deleted file mode 100644 index 17e4a41..0000000 --- a/kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 7e23b64dc20b64ca6fa887cd06cc5e52374f6268 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:30 +0000 -Subject: [PATCH 10/20] iotests: Refactor blockdev-reopen test for iothreads - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-5-kwolf@redhat.com> -Patchwork-id: 94281 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/13] iotests: Refactor blockdev-reopen test for iothreads -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -We'll want to test more than one successful case in the future, so -prepare the test for that by a refactoring that runs each scenario in a -separate VM. - -test_iothreads_switch_{backing,overlay} currently produce errors, but -these are cases that should actually work, by switching either the -backing file node or the overlay node to the AioContext of the other -node. - -Signed-off-by: Kevin Wolf -Tested-by: Peter Krempa -Message-Id: <20200306141413.30705-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 97518e11c3d902a32386d33797044f6b79bccc6f) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/245 | 47 ++++++++++++++++++++++++++++++++++++---------- - tests/qemu-iotests/245.out | 4 ++-- - 2 files changed, 39 insertions(+), 12 deletions(-) - -diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 -index e66a23c..f69c2fa 100644 ---- a/tests/qemu-iotests/245 -+++ b/tests/qemu-iotests/245 -@@ -968,8 +968,7 @@ class TestBlockdevReopen(iotests.QMPTestCase): - self.assertEqual(self.get_node('hd1'), None) - self.assert_qmp(self.get_node('hd2'), 'ro', True) - -- # We don't allow setting a backing file that uses a different AioContext -- def test_iothreads(self): -+ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): - opts = hd_opts(0) - result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) - self.assert_qmp(result, 'return', {}) -@@ -984,20 +983,48 @@ class TestBlockdevReopen(iotests.QMPTestCase): - result = self.vm.qmp('object-add', qom_type='iothread', id='iothread1') - self.assert_qmp(result, 'return', {}) - -- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd0', iothread='iothread0') -+ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi0', -+ iothread=iothread_a) - self.assert_qmp(result, 'return', {}) - -- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") -- -- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread1') -+ result = self.vm.qmp('device_add', driver='virtio-scsi', id='scsi1', -+ iothread=iothread_b) - self.assert_qmp(result, 'return', {}) - -- self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext") -+ if iothread_a: -+ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd0', -+ share_rw=True, bus="scsi0.0") -+ self.assert_qmp(result, 'return', {}) - -- result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread0') -- self.assert_qmp(result, 'return', {}) -+ if iothread_b: -+ result = self.vm.qmp('device_add', driver='scsi-hd', drive='hd2', -+ share_rw=True, bus="scsi1.0") -+ self.assert_qmp(result, 'return', {}) - -- self.reopen(opts, {'backing': 'hd2'}) -+ # Attaching the backing file may or may not work -+ self.reopen(opts, {'backing': 'hd2'}, errmsg) -+ -+ # But removing the backing file should always work -+ self.reopen(opts, {'backing': None}) -+ -+ self.vm.shutdown() -+ -+ # We don't allow setting a backing file that uses a different AioContext if -+ # neither of them can switch to the other AioContext -+ def test_iothreads_error(self): -+ self.run_test_iothreads('iothread0', 'iothread1', -+ "Cannot use a new backing file with a different AioContext") -+ -+ def test_iothreads_compatible_users(self): -+ self.run_test_iothreads('iothread0', 'iothread0') -+ -+ def test_iothreads_switch_backing(self): -+ self.run_test_iothreads('iothread0', None, -+ "Cannot use a new backing file with a different AioContext") -+ -+ def test_iothreads_switch_overlay(self): -+ self.run_test_iothreads(None, 'iothread0', -+ "Cannot use a new backing file with a different AioContext") - - if __name__ == '__main__': - iotests.main(supported_fmts=["qcow2"], -diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out -index a19de52..682b933 100644 ---- a/tests/qemu-iotests/245.out -+++ b/tests/qemu-iotests/245.out -@@ -1,6 +1,6 @@ --.................. -+..................... - ---------------------------------------------------------------------- --Ran 18 tests -+Ran 21 tests - - OK - {"execute": "job-finalize", "arguments": {"id": "commit0"}} --- -1.8.3.1 - diff --git a/kvm-iotests-Support-job-complete-in-run_job.patch b/kvm-iotests-Support-job-complete-in-run_job.patch deleted file mode 100644 index 08971a0..0000000 --- a/kvm-iotests-Support-job-complete-in-run_job.patch +++ /dev/null @@ -1,46 +0,0 @@ -From a3778aef0be61dead835af39073a62bbf72c8e20 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:23:59 +0000 -Subject: [PATCH 02/18] iotests: Support job-complete in run_job() - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-2-kwolf@redhat.com> -Patchwork-id: 93746 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 1/6] iotests: Support job-complete in run_job() -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Automatically complete jobs that have a 'ready' state and need an -explicit job-complete. Without this, run_job() would hang for such -jobs. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Alberto Garcia -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 4688c4e32ec76004676470f11734478799673d6d) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index df07089..3cff671 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -617,6 +617,8 @@ class VM(qtest.QEMUQtestMachine): - error = j['error'] - if use_log: - log('Job failed: %s' % (j['error'])) -+ elif status == 'ready': -+ self.qmp_log('job-complete', id=job) - elif status == 'pending' and not auto_finalize: - if pre_finalize: - pre_finalize() --- -1.8.3.1 - diff --git a/kvm-iotests-Test-external-snapshot-with-VM-state.patch b/kvm-iotests-Test-external-snapshot-with-VM-state.patch deleted file mode 100644 index 6fcb2f6..0000000 --- a/kvm-iotests-Test-external-snapshot-with-VM-state.patch +++ /dev/null @@ -1,189 +0,0 @@ -From 38b0cff9703fc740c30f5874973ac1be88f94d9f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:03 +0000 -Subject: [PATCH 06/18] iotests: Test external snapshot with VM state - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-6-kwolf@redhat.com> -Patchwork-id: 93752 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 5/6] iotests: Test external snapshot with VM state -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -This tests creating an external snapshot with VM state (which results in -an active overlay over an inactive backing file, which is also the root -node of an inactive BlockBackend), re-activating the images and -performing some operations to test that the re-activation worked as -intended. - -Signed-off-by: Kevin Wolf -(cherry picked from commit f62f08ab7a9d902da70078992248ec5c98f652ad) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/280 | 83 ++++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/280.out | 50 ++++++++++++++++++++++++++++ - tests/qemu-iotests/group | 1 + - 3 files changed, 134 insertions(+) - create mode 100755 tests/qemu-iotests/280 - create mode 100644 tests/qemu-iotests/280.out - -diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 -new file mode 100755 -index 0000000..0b1fa8e ---- /dev/null -+++ b/tests/qemu-iotests/280 -@@ -0,0 +1,83 @@ -+#!/usr/bin/env python -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+# Creator/Owner: Kevin Wolf -+# -+# Test migration to file for taking an external snapshot with VM state. -+ -+import iotests -+import os -+ -+iotests.verify_image_format(supported_fmts=['qcow2']) -+iotests.verify_protocol(supported=['file']) -+iotests.verify_platform(['linux']) -+ -+with iotests.FilePath('base') as base_path , \ -+ iotests.FilePath('top') as top_path, \ -+ iotests.VM() as vm: -+ -+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base_path, '64M') -+ -+ iotests.log('=== Launch VM ===') -+ vm.add_object('iothread,id=iothread0') -+ vm.add_blockdev('file,filename=%s,node-name=base-file' % (base_path)) -+ vm.add_blockdev('%s,file=base-file,node-name=base-fmt' % (iotests.imgfmt)) -+ vm.add_device('virtio-blk,drive=base-fmt,iothread=iothread0,id=vda') -+ vm.launch() -+ -+ vm.enable_migration_events('VM') -+ -+ iotests.log('\n=== Migrate to file ===') -+ vm.qmp_log('migrate', uri='exec:cat > /dev/null') -+ -+ with iotests.Timeout(3, 'Migration does not complete'): -+ vm.wait_migration() -+ -+ iotests.log('\nVM is now stopped:') -+ iotests.log(vm.qmp('query-migrate')['return']['status']) -+ vm.qmp_log('query-status') -+ -+ iotests.log('\n=== Create a snapshot of the disk image ===') -+ vm.blockdev_create({ -+ 'driver': 'file', -+ 'filename': top_path, -+ 'size': 0, -+ }) -+ vm.qmp_log('blockdev-add', node_name='top-file', -+ driver='file', filename=top_path, -+ filters=[iotests.filter_qmp_testfiles]) -+ -+ vm.blockdev_create({ -+ 'driver': iotests.imgfmt, -+ 'file': 'top-file', -+ 'size': 1024 * 1024, -+ }) -+ vm.qmp_log('blockdev-add', node_name='top-fmt', -+ driver=iotests.imgfmt, file='top-file') -+ -+ vm.qmp_log('blockdev-snapshot', node='base-fmt', overlay='top-fmt') -+ -+ iotests.log('\n=== Resume the VM and simulate a write request ===') -+ vm.qmp_log('cont') -+ iotests.log(vm.hmp_qemu_io('-d vda/virtio-backend', 'write 4k 4k')) -+ -+ iotests.log('\n=== Commit it to the backing file ===') -+ result = vm.qmp_log('block-commit', job_id='job0', auto_dismiss=False, -+ device='top-fmt', top_node='top-fmt', -+ filters=[iotests.filter_qmp_testfiles]) -+ if 'return' in result: -+ vm.run_job('job0') -diff --git a/tests/qemu-iotests/280.out b/tests/qemu-iotests/280.out -new file mode 100644 -index 0000000..5d382fa ---- /dev/null -+++ b/tests/qemu-iotests/280.out -@@ -0,0 +1,50 @@ -+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 -+ -+=== Launch VM === -+Enabling migration QMP events on VM... -+{"return": {}} -+ -+=== Migrate to file === -+{"execute": "migrate", "arguments": {"uri": "exec:cat > /dev/null"}} -+{"return": {}} -+{"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+ -+VM is now stopped: -+completed -+{"execute": "query-status", "arguments": {}} -+{"return": {"running": false, "singlestep": false, "status": "postmigrate"}} -+ -+=== Create a snapshot of the disk image === -+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "file", "filename": "TEST_DIR/PID-top", "size": 0}}} -+{"return": {}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -+ -+{"execute": "blockdev-add", "arguments": {"driver": "file", "filename": "TEST_DIR/PID-top", "node-name": "top-file"}} -+{"return": {}} -+{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "top-file", "size": 1048576}}} -+{"return": {}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -+ -+{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": "top-file", "node-name": "top-fmt"}} -+{"return": {}} -+{"execute": "blockdev-snapshot", "arguments": {"node": "base-fmt", "overlay": "top-fmt"}} -+{"return": {}} -+ -+=== Resume the VM and simulate a write request === -+{"execute": "cont", "arguments": {}} -+{"return": {}} -+{"return": ""} -+ -+=== Commit it to the backing file === -+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "device": "top-fmt", "job-id": "job0", "top-node": "top-fmt"}} -+{"return": {}} -+{"execute": "job-complete", "arguments": {"id": "job0"}} -+{"return": {}} -+{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"data": {"device": "job0", "len": 65536, "offset": 65536, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} -+{"execute": "job-dismiss", "arguments": {"id": "job0"}} -+{"return": {}} -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 06cc734..01301cd 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -286,3 +286,4 @@ - 272 rw - 273 backing quick - 277 rw quick -+280 rw migration quick --- -1.8.3.1 - diff --git a/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch b/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch deleted file mode 100644 index b09439b..0000000 --- a/kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch +++ /dev/null @@ -1,322 +0,0 @@ -From 6b9a6ba9ed753ad7aa714b35de938ebeeb4fa6cb Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Fri, 7 Feb 2020 10:27:49 +0000 -Subject: [PATCH 16/18] iotests: Test handling of AioContexts with some - blockdev actions - -RH-Author: Sergio Lopez Pascual -Message-id: <20200207112749.25073-10-slp@redhat.com> -Patchwork-id: 93762 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 9/9] iotests: Test handling of AioContexts with some blockdev actions -Bugzilla: 1745606 1746217 1773517 1779036 1782111 1782175 1783965 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -Includes the following tests: - - - Adding a dirty bitmap. - * RHBZ: 1782175 - - - Starting a drive-mirror to an NBD-backed target. - * RHBZ: 1746217, 1773517 - - - Aborting an external snapshot transaction. - * RHBZ: 1779036 - - - Aborting a blockdev backup transaction. - * RHBZ: 1782111 - -For each one of them, a VM with a number of disks running in an -IOThread AioContext is used. - -Signed-off-by: Sergio Lopez -Signed-off-by: Kevin Wolf -(cherry picked from commit 9b8c59e7610b9c5315ef093d801843dbe8debfac) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/281 | 247 +++++++++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/281.out | 5 + - tests/qemu-iotests/group | 1 + - 3 files changed, 253 insertions(+) - create mode 100755 tests/qemu-iotests/281 - create mode 100644 tests/qemu-iotests/281.out - -diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 -new file mode 100755 -index 0000000..269d583 ---- /dev/null -+++ b/tests/qemu-iotests/281 -@@ -0,0 +1,247 @@ -+#!/usr/bin/env python -+# -+# Test cases for blockdev + IOThread interactions -+# -+# Copyright (C) 2019 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+import os -+import iotests -+from iotests import qemu_img -+ -+image_len = 64 * 1024 * 1024 -+ -+# Test for RHBZ#1782175 -+class TestDirtyBitmapIOThread(iotests.QMPTestCase): -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ images = { 'drive0': drive0_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothread0') -+ -+ for name in self.images: -+ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' -+ % (self.images[name], name)) -+ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' -+ % (name, name)) -+ -+ self.vm.launch() -+ self.vm.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_add_dirty_bitmap(self): -+ result = self.vm.qmp( -+ 'block-dirty-bitmap-add', -+ node='drive0', -+ name='bitmap1', -+ persistent=True, -+ ) -+ -+ self.assert_qmp(result, 'return', {}) -+ -+ -+# Test for RHBZ#1746217 & RHBZ#1773517 -+class TestNBDMirrorIOThread(iotests.QMPTestCase): -+ nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ mirror_img = os.path.join(iotests.test_dir, 'mirror.img') -+ images = { 'drive0': drive0_img, 'mirror': mirror_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm_src = iotests.VM(path_suffix='src') -+ self.vm_src.add_object('iothread,id=iothread0') -+ self.vm_src.add_blockdev('driver=file,filename=%s,node-name=file0' -+ % (self.drive0_img)) -+ self.vm_src.add_blockdev('driver=qcow2,file=file0,node-name=drive0') -+ self.vm_src.launch() -+ self.vm_src.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ self.vm_tgt = iotests.VM(path_suffix='tgt') -+ self.vm_tgt.add_object('iothread,id=iothread0') -+ self.vm_tgt.add_blockdev('driver=file,filename=%s,node-name=file0' -+ % (self.mirror_img)) -+ self.vm_tgt.add_blockdev('driver=qcow2,file=file0,node-name=drive0') -+ self.vm_tgt.launch() -+ self.vm_tgt.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ def tearDown(self): -+ self.vm_src.shutdown() -+ self.vm_tgt.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_nbd_mirror(self): -+ result = self.vm_tgt.qmp( -+ 'nbd-server-start', -+ addr={ -+ 'type': 'unix', -+ 'data': { 'path': self.nbd_sock } -+ } -+ ) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm_tgt.qmp( -+ 'nbd-server-add', -+ device='drive0', -+ writable=True -+ ) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm_src.qmp( -+ 'drive-mirror', -+ device='drive0', -+ target='nbd+unix:///drive0?socket=' + self.nbd_sock, -+ sync='full', -+ mode='existing', -+ speed=64*1024*1024, -+ job_id='j1' -+ ) -+ self.assert_qmp(result, 'return', {}) -+ -+ self.vm_src.event_wait(name="BLOCK_JOB_READY") -+ -+ -+# Test for RHBZ#1779036 -+class TestExternalSnapshotAbort(iotests.QMPTestCase): -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ snapshot_img = os.path.join(iotests.test_dir, 'snapshot.img') -+ images = { 'drive0': drive0_img, 'snapshot': snapshot_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothread0') -+ self.vm.add_blockdev('driver=file,filename=%s,node-name=file0' -+ % (self.drive0_img)) -+ self.vm.add_blockdev('driver=qcow2,file=file0,node-name=drive0') -+ self.vm.launch() -+ self.vm.qmp('x-blockdev-set-iothread', -+ node_name='drive0', iothread='iothread0', -+ force=True) -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_external_snapshot_abort(self): -+ # Use a two actions transaction with a bogus values on the second -+ # one to trigger an abort of the transaction. -+ result = self.vm.qmp('transaction', actions=[ -+ { -+ 'type': 'blockdev-snapshot-sync', -+ 'data': { 'node-name': 'drive0', -+ 'snapshot-file': self.snapshot_img, -+ 'snapshot-node-name': 'snap1', -+ 'mode': 'absolute-paths', -+ 'format': 'qcow2' } -+ }, -+ { -+ 'type': 'blockdev-snapshot-sync', -+ 'data': { 'node-name': 'drive0', -+ 'snapshot-file': '/fakesnapshot', -+ 'snapshot-node-name': 'snap2', -+ 'mode': 'absolute-paths', -+ 'format': 'qcow2' } -+ }, -+ ]) -+ -+ # Crashes on failure, we expect this error. -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+ -+# Test for RHBZ#1782111 -+class TestBlockdevBackupAbort(iotests.QMPTestCase): -+ drive0_img = os.path.join(iotests.test_dir, 'drive0.img') -+ drive1_img = os.path.join(iotests.test_dir, 'drive1.img') -+ snap0_img = os.path.join(iotests.test_dir, 'snap0.img') -+ snap1_img = os.path.join(iotests.test_dir, 'snap1.img') -+ images = { 'drive0': drive0_img, -+ 'drive1': drive1_img, -+ 'snap0': snap0_img, -+ 'snap1': snap1_img } -+ -+ def setUp(self): -+ for name in self.images: -+ qemu_img('create', '-f', iotests.imgfmt, -+ self.images[name], str(image_len)) -+ -+ self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothread0') -+ self.vm.add_device('virtio-scsi,iothread=iothread0') -+ -+ for name in self.images: -+ self.vm.add_blockdev('driver=file,filename=%s,node-name=file_%s' -+ % (self.images[name], name)) -+ self.vm.add_blockdev('driver=qcow2,file=file_%s,node-name=%s' -+ % (name, name)) -+ -+ self.vm.add_device('scsi-hd,drive=drive0') -+ self.vm.add_device('scsi-hd,drive=drive1') -+ self.vm.launch() -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ for name in self.images: -+ os.remove(self.images[name]) -+ -+ def test_blockdev_backup_abort(self): -+ # Use a two actions transaction with a bogus values on the second -+ # one to trigger an abort of the transaction. -+ result = self.vm.qmp('transaction', actions=[ -+ { -+ 'type': 'blockdev-backup', -+ 'data': { 'device': 'drive0', -+ 'target': 'snap0', -+ 'sync': 'full', -+ 'job-id': 'j1' } -+ }, -+ { -+ 'type': 'blockdev-backup', -+ 'data': { 'device': 'drive1', -+ 'target': 'snap1', -+ 'sync': 'full' } -+ }, -+ ]) -+ -+ # Hangs on failure, we expect this error. -+ self.assert_qmp(result, 'error/class', 'GenericError') -+ -+if __name__ == '__main__': -+ iotests.main(supported_fmts=['qcow2'], -+ supported_protocols=['file']) -diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out -new file mode 100644 -index 0000000..89968f3 ---- /dev/null -+++ b/tests/qemu-iotests/281.out -@@ -0,0 +1,5 @@ -+.... -+---------------------------------------------------------------------- -+Ran 4 tests -+ -+OK -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 01301cd..c0e8197 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -287,3 +287,4 @@ - 273 backing quick - 277 rw quick - 280 rw migration quick -+281 rw quick --- -1.8.3.1 - diff --git a/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch b/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch deleted file mode 100644 index 58ef198..0000000 --- a/kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 239f7bdeef48a3c0b07098617371b9955dc55348 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:36 +0000 -Subject: [PATCH 16/20] iotests: Test mirror with temporarily disabled target - backing file - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-11-kwolf@redhat.com> -Patchwork-id: 94288 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/13] iotests: Test mirror with temporarily disabled target backing file -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -The newly tested scenario is a common live storage migration scenario: -The target node is opened without a backing file so that the active -layer is mirrored while its backing chain can be copied in the -background. - -The backing chain should be attached to the mirror target node when -finalising the job, just before switching the users of the source node -to the new copy (at which point the mirror job still has a reference to -the node). drive-mirror did this automatically, but with blockdev-mirror -this is the job of the QMP client. - -This patch adds test cases for two ways to achieve the desired result, -using either x-blockdev-reopen or blockdev-snapshot. - -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-5-kwolf@redhat.com> -Reviewed-by: Peter Krempa -Signed-off-by: Kevin Wolf -(cherry picked from commit 8bdee9f10eac2aefdcc5095feef756354c87bdec) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/155 | 56 +++++++++++++++++++++++++++++++++++++++++----- - tests/qemu-iotests/155.out | 4 ++-- - 2 files changed, 53 insertions(+), 7 deletions(-) - -diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 -index d7ef257..3053e50 100755 ---- a/tests/qemu-iotests/155 -+++ b/tests/qemu-iotests/155 -@@ -45,10 +45,15 @@ target_img = os.path.join(iotests.test_dir, 'target.' + iotests.imgfmt) - # image during runtime, only makes sense if - # target_blockdev_backing is not None - # (None: same as target_backing) -+# target_open_with_backing: If True, the target image is added with its backing -+# chain opened right away. If False, blockdev-add -+# opens it without a backing file and job completion -+# is supposed to open the backing chain. - - class BaseClass(iotests.QMPTestCase): - target_blockdev_backing = None - target_real_backing = None -+ target_open_with_backing = True - - def setUp(self): - qemu_img('create', '-f', iotests.imgfmt, back0_img, '1440K') -@@ -80,9 +85,13 @@ class BaseClass(iotests.QMPTestCase): - options = { 'node-name': 'target', - 'driver': iotests.imgfmt, - 'file': { 'driver': 'file', -+ 'node-name': 'target-file', - 'filename': target_img } } -- if self.target_blockdev_backing: -- options['backing'] = self.target_blockdev_backing -+ -+ if not self.target_open_with_backing: -+ options['backing'] = None -+ elif self.target_blockdev_backing: -+ options['backing'] = self.target_blockdev_backing - - result = self.vm.qmp('blockdev-add', **options) - self.assert_qmp(result, 'return', {}) -@@ -147,10 +156,14 @@ class BaseClass(iotests.QMPTestCase): - # cmd: Mirroring command to execute, either drive-mirror or blockdev-mirror - - class MirrorBaseClass(BaseClass): -+ def openBacking(self): -+ pass -+ - def runMirror(self, sync): - if self.cmd == 'blockdev-mirror': - result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', -- sync=sync, target='target') -+ sync=sync, target='target', -+ auto_finalize=False) - else: - if self.existing: - mode = 'existing' -@@ -159,11 +172,12 @@ class MirrorBaseClass(BaseClass): - result = self.vm.qmp(self.cmd, job_id='mirror-job', device='source', - sync=sync, target=target_img, - format=iotests.imgfmt, mode=mode, -- node_name='target') -+ node_name='target', auto_finalize=False) - - self.assert_qmp(result, 'return', {}) - -- self.complete_and_wait('mirror-job') -+ self.vm.run_job('mirror-job', use_log=False, auto_finalize=False, -+ pre_finalize=self.openBacking, auto_dismiss=True) - - def testFull(self): - self.runMirror('full') -@@ -221,6 +235,38 @@ class TestBlockdevMirrorForcedBacking(MirrorBaseClass): - target_blockdev_backing = { 'driver': 'null-co' } - target_real_backing = 'null-co://' - -+# Attach the backing chain only during completion, with blockdev-reopen -+class TestBlockdevMirrorReopen(MirrorBaseClass): -+ cmd = 'blockdev-mirror' -+ existing = True -+ target_backing = 'null-co://' -+ target_open_with_backing = False -+ -+ def openBacking(self): -+ if not self.target_open_with_backing: -+ result = self.vm.qmp('blockdev-add', node_name="backing", -+ driver="null-co") -+ self.assert_qmp(result, 'return', {}) -+ result = self.vm.qmp('x-blockdev-reopen', node_name="target", -+ driver=iotests.imgfmt, file="target-file", -+ backing="backing") -+ self.assert_qmp(result, 'return', {}) -+ -+# Attach the backing chain only during completion, with blockdev-snapshot -+class TestBlockdevMirrorSnapshot(MirrorBaseClass): -+ cmd = 'blockdev-mirror' -+ existing = True -+ target_backing = 'null-co://' -+ target_open_with_backing = False -+ -+ def openBacking(self): -+ if not self.target_open_with_backing: -+ result = self.vm.qmp('blockdev-add', node_name="backing", -+ driver="null-co") -+ self.assert_qmp(result, 'return', {}) -+ result = self.vm.qmp('blockdev-snapshot', node="backing", -+ overlay="target") -+ self.assert_qmp(result, 'return', {}) - - class TestCommit(BaseClass): - existing = False -diff --git a/tests/qemu-iotests/155.out b/tests/qemu-iotests/155.out -index 4176bb9..4fd1c2d 100644 ---- a/tests/qemu-iotests/155.out -+++ b/tests/qemu-iotests/155.out -@@ -1,5 +1,5 @@ --................... -+......................... - ---------------------------------------------------------------------- --Ran 19 tests -+Ran 25 tests - - OK --- -1.8.3.1 - diff --git a/kvm-iotests-Use-complete_and_wait-in-155.patch b/kvm-iotests-Use-complete_and_wait-in-155.patch deleted file mode 100644 index 38b41be..0000000 --- a/kvm-iotests-Use-complete_and_wait-in-155.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 872fbd32d06bda4aba3a7e67a95f76f62e475dbe Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:27 +0000 -Subject: [PATCH 07/20] iotests: Use complete_and_wait() in 155 - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-2-kwolf@redhat.com> -Patchwork-id: 94279 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/13] iotests: Use complete_and_wait() in 155 -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Max Reitz - -This way, we get to see errors during the completion phase. - -Signed-off-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200218103454.296704-14-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 6644d0e6192b36cdf2902c9774e1afb8ab2e7223) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/155 | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 -index e194859..d7ef257 100755 ---- a/tests/qemu-iotests/155 -+++ b/tests/qemu-iotests/155 -@@ -163,12 +163,7 @@ class MirrorBaseClass(BaseClass): - - self.assert_qmp(result, 'return', {}) - -- self.vm.event_wait('BLOCK_JOB_READY') -- -- result = self.vm.qmp('block-job-complete', device='mirror-job') -- self.assert_qmp(result, 'return', {}) -- -- self.vm.event_wait('BLOCK_JOB_COMPLETED') -+ self.complete_and_wait('mirror-job') - - def testFull(self): - self.runMirror('full') --- -1.8.3.1 - diff --git a/kvm-iotests.py-Let-wait_migration-wait-even-more.patch b/kvm-iotests.py-Let-wait_migration-wait-even-more.patch deleted file mode 100644 index cda8037..0000000 --- a/kvm-iotests.py-Let-wait_migration-wait-even-more.patch +++ /dev/null @@ -1,123 +0,0 @@ -From d6df1426ae65b3a0d50bdbb1f8a7246386dd6ebf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 7 Feb 2020 11:24:04 +0000 -Subject: [PATCH 07/18] iotests.py: Let wait_migration wait even more - -RH-Author: Kevin Wolf -Message-id: <20200207112404.25198-7-kwolf@redhat.com> -Patchwork-id: 93751 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 6/6] iotests.py: Let wait_migration wait even more -Bugzilla: 1781637 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -From: Max Reitz - -The "migration completed" event may be sent (on the source, to be -specific) before the migration is actually completed, so the VM runstate -will still be "finish-migrate" instead of "postmigrate". So ask the -users of VM.wait_migration() to specify the final runstate they desire -and then poll the VM until it has reached that state. (This should be -over very quickly, so busy polling is fine.) - -Without this patch, I see intermittent failures in the new iotest 280 -under high system load. I have not yet seen such failures with other -iotests that use VM.wait_migration() and query-status afterwards, but -maybe they just occur even more rarely, or it is because they also wait -on the destination VM to be running. - -Signed-off-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 8da7969bd7014f6de037d8ae132b40721944b186) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/234 | 8 ++++---- - tests/qemu-iotests/262 | 4 ++-- - tests/qemu-iotests/280 | 2 +- - tests/qemu-iotests/iotests.py | 6 +++++- - 4 files changed, 12 insertions(+), 8 deletions(-) - -diff --git a/tests/qemu-iotests/234 b/tests/qemu-iotests/234 -index 34c818c..59a7f94 100755 ---- a/tests/qemu-iotests/234 -+++ b/tests/qemu-iotests/234 -@@ -69,9 +69,9 @@ with iotests.FilePath('img') as img_path, \ - iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo_a))) - with iotests.Timeout(3, 'Migration does not complete'): - # Wait for the source first (which includes setup=setup) -- vm_a.wait_migration() -+ vm_a.wait_migration('postmigrate') - # Wait for the destination second (which does not) -- vm_b.wait_migration() -+ vm_b.wait_migration('running') - - iotests.log(vm_a.qmp('query-migrate')['return']['status']) - iotests.log(vm_b.qmp('query-migrate')['return']['status']) -@@ -98,9 +98,9 @@ with iotests.FilePath('img') as img_path, \ - iotests.log(vm_b.qmp('migrate', uri='exec:cat >%s' % (fifo_b))) - with iotests.Timeout(3, 'Migration does not complete'): - # Wait for the source first (which includes setup=setup) -- vm_b.wait_migration() -+ vm_b.wait_migration('postmigrate') - # Wait for the destination second (which does not) -- vm_a.wait_migration() -+ vm_a.wait_migration('running') - - iotests.log(vm_a.qmp('query-migrate')['return']['status']) - iotests.log(vm_b.qmp('query-migrate')['return']['status']) -diff --git a/tests/qemu-iotests/262 b/tests/qemu-iotests/262 -index 0963daa..bbcb526 100755 ---- a/tests/qemu-iotests/262 -+++ b/tests/qemu-iotests/262 -@@ -71,9 +71,9 @@ with iotests.FilePath('img') as img_path, \ - iotests.log(vm_a.qmp('migrate', uri='exec:cat >%s' % (fifo))) - with iotests.Timeout(3, 'Migration does not complete'): - # Wait for the source first (which includes setup=setup) -- vm_a.wait_migration() -+ vm_a.wait_migration('postmigrate') - # Wait for the destination second (which does not) -- vm_b.wait_migration() -+ vm_b.wait_migration('running') - - iotests.log(vm_a.qmp('query-migrate')['return']['status']) - iotests.log(vm_b.qmp('query-migrate')['return']['status']) -diff --git a/tests/qemu-iotests/280 b/tests/qemu-iotests/280 -index 0b1fa8e..85e9114 100755 ---- a/tests/qemu-iotests/280 -+++ b/tests/qemu-iotests/280 -@@ -45,7 +45,7 @@ with iotests.FilePath('base') as base_path , \ - vm.qmp_log('migrate', uri='exec:cat > /dev/null') - - with iotests.Timeout(3, 'Migration does not complete'): -- vm.wait_migration() -+ vm.wait_migration('postmigrate') - - iotests.log('\nVM is now stopped:') - iotests.log(vm.qmp('query-migrate')['return']['status']) -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 5741efb..0c55f7b 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -663,12 +663,16 @@ class VM(qtest.QEMUQtestMachine): - } - ])) - -- def wait_migration(self): -+ def wait_migration(self, expect_runstate): - while True: - event = self.event_wait('MIGRATION') - log(event, filters=[filter_qmp_event]) - if event['data']['status'] == 'completed': - break -+ # The event may occur in finish-migrate, so wait for the expected -+ # post-migration runstate -+ while self.qmp('query-status')['return']['status'] != expect_runstate: -+ pass - - def node_info(self, node_name): - nodes = self.qmp('query-named-block-nodes') --- -1.8.3.1 - diff --git a/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch b/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch deleted file mode 100644 index 2ee9dcd..0000000 --- a/kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 1c508d56d154caf5fbf53e7dabafd707236cb16b Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Wed, 29 Jan 2020 13:45:18 +0000 -Subject: [PATCH 06/15] iscsi: Cap block count from GET LBA STATUS - (CVE-2020-1711) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200129134518.1293-2-jmaloy@redhat.com> -Patchwork-id: 93571 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) -Bugzilla: 1794503 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf -RH-Acked-by: Philippe Mathieu-Daudé - -From: Felipe Franciosi - -When querying an iSCSI server for the provisioning status of blocks (via -GET LBA STATUS), Qemu only validates that the response descriptor zero's -LBA matches the one requested. Given the SCSI spec allows servers to -respond with the status of blocks beyond the end of the LUN, Qemu may -have its heap corrupted by clearing/setting too many bits at the end of -its allocmap for the LUN. - -A malicious guest in control of the iSCSI server could carefully program -Qemu's heap (by selectively setting the bitmap) and then smash it. - -This limits the number of bits that iscsi_co_block_status() will try to -update in the allocmap so it can't overflow the bitmap. - -Fixes: CVE-2020-1711 -Cc: qemu-stable@nongnu.org -Signed-off-by: Felipe Franciosi -Signed-off-by: Peter Turschmid -Signed-off-by: Raphael Norwitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 693fd2acdf14dd86c0bf852610f1c2cca80a74dc) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - block/iscsi.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/iscsi.c b/block/iscsi.c -index 2aea7e3..cbd5729 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -701,7 +701,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - struct scsi_get_lba_status *lbas = NULL; - struct scsi_lba_status_descriptor *lbasd = NULL; - struct IscsiTask iTask; -- uint64_t lba; -+ uint64_t lba, max_bytes; - int ret; - - iscsi_co_init_iscsitask(iscsilun, &iTask); -@@ -721,6 +721,7 @@ static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs, - } - - lba = offset / iscsilun->block_size; -+ max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size; - - qemu_mutex_lock(&iscsilun->mutex); - retry: -@@ -764,7 +765,7 @@ retry: - goto out_unlock; - } - -- *pnum = (int64_t) lbasd->num_blocks * iscsilun->block_size; -+ *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes); - - if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED || - lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) { --- -1.8.3.1 - diff --git a/kvm-iscsi-Drop-iscsi_co_create_opts.patch b/kvm-iscsi-Drop-iscsi_co_create_opts.patch deleted file mode 100644 index a6d0baf..0000000 --- a/kvm-iscsi-Drop-iscsi_co_create_opts.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 58b7d33e1bc17b89103ceaa39f5722a69b35d810 Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 11 Mar 2020 10:51:45 +0000 -Subject: [PATCH 04/20] iscsi: Drop iscsi_co_create_opts() - -RH-Author: Maxim Levitsky -Message-id: <20200311105147.13208-5-mlevitsk@redhat.com> -Patchwork-id: 94226 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 4/6] iscsi: Drop iscsi_co_create_opts() -Bugzilla: 1640894 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: John Snow -RH-Acked-by: Max Reitz - -From: Max Reitz - -The generic fallback implementation effectively does the same. - -Reviewed-by: Maxim Levitsky -Signed-off-by: Max Reitz -Message-Id: <20200122164532.178040-5-mreitz@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit 80f0900905b555f00d644894c786b6d66ac2e00e) -Signed-off-by: Maxim Levitsky -Signed-off-by: Danilo C. L. de Paula ---- - block/iscsi.c | 56 -------------------------------------------------------- - 1 file changed, 56 deletions(-) - -diff --git a/block/iscsi.c b/block/iscsi.c -index cbd5729..b45da65 100644 ---- a/block/iscsi.c -+++ b/block/iscsi.c -@@ -2164,58 +2164,6 @@ static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset, - return 0; - } - --static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts, -- Error **errp) --{ -- int ret = 0; -- int64_t total_size = 0; -- BlockDriverState *bs; -- IscsiLun *iscsilun = NULL; -- QDict *bs_options; -- Error *local_err = NULL; -- -- bs = bdrv_new(); -- -- /* Read out options */ -- total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), -- BDRV_SECTOR_SIZE); -- bs->opaque = g_new0(struct IscsiLun, 1); -- iscsilun = bs->opaque; -- -- bs_options = qdict_new(); -- iscsi_parse_filename(filename, bs_options, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- ret = -EINVAL; -- } else { -- ret = iscsi_open(bs, bs_options, 0, NULL); -- } -- qobject_unref(bs_options); -- -- if (ret != 0) { -- goto out; -- } -- iscsi_detach_aio_context(bs); -- if (iscsilun->type != TYPE_DISK) { -- ret = -ENODEV; -- goto out; -- } -- if (bs->total_sectors < total_size) { -- ret = -ENOSPC; -- goto out; -- } -- -- ret = 0; --out: -- if (iscsilun->iscsi != NULL) { -- iscsi_destroy_context(iscsilun->iscsi); -- } -- g_free(bs->opaque); -- bs->opaque = NULL; -- bdrv_unref(bs); -- return ret; --} -- - static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) - { - IscsiLun *iscsilun = bs->opaque; -@@ -2486,8 +2434,6 @@ static BlockDriver bdrv_iscsi = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -- .bdrv_co_create_opts = iscsi_co_create_opts, -- .create_opts = &iscsi_create_opts, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, -@@ -2525,8 +2471,6 @@ static BlockDriver bdrv_iser = { - .bdrv_parse_filename = iscsi_parse_filename, - .bdrv_file_open = iscsi_open, - .bdrv_close = iscsi_close, -- .bdrv_co_create_opts = iscsi_co_create_opts, -- .create_opts = &iscsi_create_opts, - .bdrv_reopen_prepare = iscsi_reopen_prepare, - .bdrv_reopen_commit = iscsi_reopen_commit, - .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache, --- -1.8.3.1 - diff --git a/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch b/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch deleted file mode 100644 index e38428b..0000000 --- a/kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch +++ /dev/null @@ -1,213 +0,0 @@ -From 3f16b8a33bd7503cbe857fbeb45fff7301b6bb5f Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:12 +0100 -Subject: [PATCH 1/6] job: take each job's lock individually in job_txn_apply - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-2-kwolf@redhat.com> -Patchwork-id: 94597 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] job: take each job's lock individually in job_txn_apply -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -From: Stefan Reiter - -All callers of job_txn_apply hold a single job's lock, but different -jobs within a transaction can have different contexts, thus we need to -lock each one individually before applying the callback function. - -Similar to job_completed_txn_abort this also requires releasing the -caller's context before and reacquiring it after to avoid recursive -locks which might break AIO_WAIT_WHILE in the callback. This is safe, since -existing code would already have to take this into account, lest -job_completed_txn_abort might have broken. - -This also brings to light a different issue: When a callback function in -job_txn_apply moves it's job to a different AIO context, callers will -try to release the wrong lock (now that we re-acquire the lock -correctly, previously it would just continue with the old lock, leaving -the job unlocked for the rest of the return path). Fix this by not caching -the job's context. - -This is only necessary for qmp_block_job_finalize, qmp_job_finalize and -job_exit, since everyone else calls through job_exit. - -One test needed adapting, since it calls job_finalize directly, so it -manually needs to acquire the correct context. - -Signed-off-by: Stefan Reiter -Message-Id: <20200407115651.69472-2-s.reiter@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit b660a84bbb0eb1a76b505648d31d5e82594fb75e) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - blockdev.c | 9 +++++++++ - job-qmp.c | 9 +++++++++ - job.c | 50 ++++++++++++++++++++++++++++++++++++++++---------- - tests/test-blockjob.c | 2 ++ - 4 files changed, 60 insertions(+), 10 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index c8d4b51..86eb115 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -4215,7 +4215,16 @@ void qmp_block_job_finalize(const char *id, Error **errp) - } - - trace_qmp_block_job_finalize(job); -+ job_ref(&job->job); - job_finalize(&job->job, errp); -+ -+ /* -+ * Job's context might have changed via job_finalize (and job_txn_apply -+ * automatically acquires the new one), so make sure we release the correct -+ * one. -+ */ -+ aio_context = blk_get_aio_context(job->blk); -+ job_unref(&job->job); - aio_context_release(aio_context); - } - -diff --git a/job-qmp.c b/job-qmp.c -index fbfed25..a201220 100644 ---- a/job-qmp.c -+++ b/job-qmp.c -@@ -114,7 +114,16 @@ void qmp_job_finalize(const char *id, Error **errp) - } - - trace_qmp_job_finalize(job); -+ job_ref(job); - job_finalize(job, errp); -+ -+ /* -+ * Job's context might have changed via job_finalize (and job_txn_apply -+ * automatically acquires the new one), so make sure we release the correct -+ * one. -+ */ -+ aio_context = job->aio_context; -+ job_unref(job); - aio_context_release(aio_context); - } - -diff --git a/job.c b/job.c -index 04409b4..48fc4ad 100644 ---- a/job.c -+++ b/job.c -@@ -136,17 +136,38 @@ static void job_txn_del_job(Job *job) - } - } - --static int job_txn_apply(JobTxn *txn, int fn(Job *)) -+static int job_txn_apply(Job *job, int fn(Job *)) - { -- Job *job, *next; -+ AioContext *inner_ctx; -+ Job *other_job, *next; -+ JobTxn *txn = job->txn; - int rc = 0; - -- QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) { -- rc = fn(job); -+ /* -+ * Similar to job_completed_txn_abort, we take each job's lock before -+ * applying fn, but since we assume that outer_ctx is held by the caller, -+ * we need to release it here to avoid holding the lock twice - which would -+ * break AIO_WAIT_WHILE from within fn. -+ */ -+ job_ref(job); -+ aio_context_release(job->aio_context); -+ -+ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { -+ inner_ctx = other_job->aio_context; -+ aio_context_acquire(inner_ctx); -+ rc = fn(other_job); -+ aio_context_release(inner_ctx); - if (rc) { - break; - } - } -+ -+ /* -+ * Note that job->aio_context might have been changed by calling fn, so we -+ * can't use a local variable to cache it. -+ */ -+ aio_context_acquire(job->aio_context); -+ job_unref(job); - return rc; - } - -@@ -774,11 +795,11 @@ static void job_do_finalize(Job *job) - assert(job && job->txn); - - /* prepare the transaction to complete */ -- rc = job_txn_apply(job->txn, job_prepare); -+ rc = job_txn_apply(job, job_prepare); - if (rc) { - job_completed_txn_abort(job); - } else { -- job_txn_apply(job->txn, job_finalize_single); -+ job_txn_apply(job, job_finalize_single); - } - } - -@@ -824,10 +845,10 @@ static void job_completed_txn_success(Job *job) - assert(other_job->ret == 0); - } - -- job_txn_apply(txn, job_transition_to_pending); -+ job_txn_apply(job, job_transition_to_pending); - - /* If no jobs need manual finalization, automatically do so */ -- if (job_txn_apply(txn, job_needs_finalize) == 0) { -+ if (job_txn_apply(job, job_needs_finalize) == 0) { - job_do_finalize(job); - } - } -@@ -849,9 +870,10 @@ static void job_completed(Job *job) - static void job_exit(void *opaque) - { - Job *job = (Job *)opaque; -- AioContext *ctx = job->aio_context; -+ AioContext *ctx; - -- aio_context_acquire(ctx); -+ job_ref(job); -+ aio_context_acquire(job->aio_context); - - /* This is a lie, we're not quiescent, but still doing the completion - * callbacks. However, completion callbacks tend to involve operations that -@@ -862,6 +884,14 @@ static void job_exit(void *opaque) - - job_completed(job); - -+ /* -+ * Note that calling job_completed can move the job to a different -+ * aio_context, so we cannot cache from above. job_txn_apply takes care of -+ * acquiring the new lock, and we ref/unref to avoid job_completed freeing -+ * the job underneath us. -+ */ -+ ctx = job->aio_context; -+ job_unref(job); - aio_context_release(ctx); - } - -diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c -index 7844c9f..6d857fd 100644 ---- a/tests/test-blockjob.c -+++ b/tests/test-blockjob.c -@@ -368,7 +368,9 @@ static void test_cancel_concluded(void) - aio_poll(qemu_get_aio_context(), true); - assert(job->status == JOB_STATUS_PENDING); - -+ aio_context_acquire(job->aio_context); - job_finalize(job, &error_abort); -+ aio_context_release(job->aio_context); - assert(job->status == JOB_STATUS_CONCLUDED); - - cancel_common(s); --- -1.8.3.1 - diff --git a/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch b/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch deleted file mode 100644 index e362efe..0000000 --- a/kvm-libvhost-user-Fix-some-memtable-remap-cases.patch +++ /dev/null @@ -1,117 +0,0 @@ -From ee360b70f179cf540faebe7e55b34e323e2bb179 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:09 +0100 -Subject: [PATCH 098/116] libvhost-user: Fix some memtable remap cases -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-95-dgilbert@redhat.com> -Patchwork-id: 93548 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 094/112] libvhost-user: Fix some memtable remap cases -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -If a new setmemtable command comes in once the vhost threads are -running, it will remap the guests address space and the threads -will now be looking in the wrong place. - -Fortunately we're running this command under lock, so we can -update the queue mappings so that threads will look in the new-right -place. - -Note: This doesn't fix things that the threads might be doing -without a lock (e.g. a readv/writev!) That's for another time. - -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 49e9ec749d4db62ae51f76354143cee183912a1d) -Signed-off-by: Miroslav Rezanina ---- - contrib/libvhost-user/libvhost-user.c | 33 +++++++++++++++++++++++++-------- - contrib/libvhost-user/libvhost-user.h | 3 +++ - 2 files changed, 28 insertions(+), 8 deletions(-) - -diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c -index 63e4106..b89bf18 100644 ---- a/contrib/libvhost-user/libvhost-user.c -+++ b/contrib/libvhost-user/libvhost-user.c -@@ -565,6 +565,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) - } - - static bool -+map_ring(VuDev *dev, VuVirtq *vq) -+{ -+ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); -+ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); -+ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); -+ -+ DPRINT("Setting virtq addresses:\n"); -+ DPRINT(" vring_desc at %p\n", vq->vring.desc); -+ DPRINT(" vring_used at %p\n", vq->vring.used); -+ DPRINT(" vring_avail at %p\n", vq->vring.avail); -+ -+ return !(vq->vring.desc && vq->vring.used && vq->vring.avail); -+} -+ -+static bool - vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) - { - int i; -@@ -767,6 +782,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) - close(vmsg->fds[i]); - } - -+ for (i = 0; i < dev->max_queues; i++) { -+ if (dev->vq[i].vring.desc) { -+ if (map_ring(dev, &dev->vq[i])) { -+ vu_panic(dev, "remaping queue %d during setmemtable", i); -+ } -+ } -+ } -+ - return false; - } - -@@ -853,18 +876,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) - DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr); - DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr); - -+ vq->vra = *vra; - vq->vring.flags = vra->flags; -- vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); -- vq->vring.used = qva_to_va(dev, vra->used_user_addr); -- vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); - vq->vring.log_guest_addr = vra->log_guest_addr; - -- DPRINT("Setting virtq addresses:\n"); -- DPRINT(" vring_desc at %p\n", vq->vring.desc); -- DPRINT(" vring_used at %p\n", vq->vring.used); -- DPRINT(" vring_avail at %p\n", vq->vring.avail); - -- if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { -+ if (map_ring(dev, vq)) { - vu_panic(dev, "Invalid vring_addr message"); - return false; - } -diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h -index 1844b6f..5cb7708 100644 ---- a/contrib/libvhost-user/libvhost-user.h -+++ b/contrib/libvhost-user/libvhost-user.h -@@ -327,6 +327,9 @@ typedef struct VuVirtq { - int err_fd; - unsigned int enable; - bool started; -+ -+ /* Guest addresses of our ring */ -+ struct vhost_vring_addr vra; - } VuVirtq; - - enum VuWatchCondtion { --- -1.8.3.1 - diff --git a/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch b/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch deleted file mode 100644 index 3477af5..0000000 --- a/kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 38a032829b6b8d523b4cee05f732031e66fc2e41 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 31 Jan 2020 17:12:56 +0000 -Subject: [PATCH 14/15] migration: Change SaveStateEntry.instance_id into - uint32_t - -RH-Author: Peter Xu -Message-id: <20200131171257.1066593-3-peterx@redhat.com> -Patchwork-id: 93629 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] migration: Change SaveStateEntry.instance_id into uint32_t -Bugzilla: 1529231 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -It was always used as 32bit, so define it as used to be clear. -Instead of using -1 as the auto-gen magic value, we switch to -UINT32_MAX. We also make sure that we don't auto-gen this value to -avoid overflowed instance IDs without being noticed. - -Suggested-by: Juan Quintela -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 93062e23619e057743757ee53bf7f8e07f7a3710) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - include/migration/vmstate.h - migration/savevm.c - stubs/vmstate.c - Due to missing 3cad405bab ("vmstate: replace DeviceState with - VMStateIf", 2020-01-06) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/apic_common.c | 2 +- - include/migration/register.h | 2 +- - include/migration/vmstate.h | 2 +- - migration/savevm.c | 18 ++++++++++-------- - stubs/vmstate.c | 2 +- - 5 files changed, 14 insertions(+), 12 deletions(-) - -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index f2c3a7f..54b8731 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -268,7 +268,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - APICCommonState *s = APIC_COMMON(dev); - APICCommonClass *info; - static DeviceState *vapic; -- int instance_id = s->id; -+ uint32_t instance_id = s->id; - - info = APIC_COMMON_GET_CLASS(s); - info->realize(dev, errp); -diff --git a/include/migration/register.h b/include/migration/register.h -index a13359a..f3ba10b 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -69,7 +69,7 @@ typedef struct SaveVMHandlers { - } SaveVMHandlers; - - int register_savevm_live(const char *idstr, -- int instance_id, -+ uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, - void *opaque); -diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h -index 883f1cf..296609c 100644 ---- a/include/migration/vmstate.h -+++ b/include/migration/vmstate.h -@@ -1158,7 +1158,7 @@ bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); - #define VMSTATE_INSTANCE_ID_ANY -1 - - /* Returns: 0 on success, -1 on failure */ --int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, -+int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, - const VMStateDescription *vmsd, - void *base, int alias_id, - int required_for_version, -diff --git a/migration/savevm.c b/migration/savevm.c -index e2e8e0a..a80bb52 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -233,7 +233,7 @@ typedef struct CompatEntry { - typedef struct SaveStateEntry { - QTAILQ_ENTRY(SaveStateEntry) entry; - char idstr[256]; -- int instance_id; -+ uint32_t instance_id; - int alias_id; - int version_id; - /* version id read from the stream */ -@@ -665,10 +665,10 @@ void dump_vmstate_json_to_file(FILE *out_file) - fclose(out_file); - } - --static int calculate_new_instance_id(const char *idstr) -+static uint32_t calculate_new_instance_id(const char *idstr) - { - SaveStateEntry *se; -- int instance_id = 0; -+ uint32_t instance_id = 0; - - QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { - if (strcmp(idstr, se->idstr) == 0 -@@ -676,6 +676,8 @@ static int calculate_new_instance_id(const char *idstr) - instance_id = se->instance_id + 1; - } - } -+ /* Make sure we never loop over without being noticed */ -+ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); - return instance_id; - } - -@@ -730,7 +732,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) - Meanwhile pass -1 as instance_id if you do not already have a clearly - distinguishing id for all instances of your device class. */ - int register_savevm_live(const char *idstr, -- int instance_id, -+ uint32_t instance_id, - int version_id, - const SaveVMHandlers *ops, - void *opaque) -@@ -784,7 +786,7 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) - } - } - --int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, -+int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, - const VMStateDescription *vmsd, - void *opaque, int alias_id, - int required_for_version, -@@ -1600,7 +1602,7 @@ int qemu_save_device_state(QEMUFile *f) - return qemu_file_get_error(f); - } - --static SaveStateEntry *find_se(const char *idstr, int instance_id) -+static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) - { - SaveStateEntry *se; - -@@ -2267,7 +2269,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) - /* Find savevm section */ - se = find_se(idstr, instance_id); - if (se == NULL) { -- error_report("Unknown savevm section or instance '%s' %d. " -+ error_report("Unknown savevm section or instance '%s' %"PRIu32". " - "Make sure that your current VM setup matches your " - "saved VM setup, including any hotplugged devices", - idstr, instance_id); -@@ -2291,7 +2293,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) - - ret = vmstate_load(f, se); - if (ret < 0) { -- error_report("error while loading state for instance 0x%x of" -+ error_report("error while loading state for instance 0x%"PRIx32" of" - " device '%s'", instance_id, idstr); - return ret; - } -diff --git a/stubs/vmstate.c b/stubs/vmstate.c -index e1e89b8..4ed5cc6 100644 ---- a/stubs/vmstate.c -+++ b/stubs/vmstate.c -@@ -4,7 +4,7 @@ - const VMStateDescription vmstate_dummy = {}; - - int vmstate_register_with_alias_id(DeviceState *dev, -- int instance_id, -+ uint32_t instance_id, - const VMStateDescription *vmsd, - void *base, int alias_id, - int required_for_version, --- -1.8.3.1 - diff --git a/kvm-migration-Create-migration_is_running.patch b/kvm-migration-Create-migration_is_running.patch deleted file mode 100644 index c9593de..0000000 --- a/kvm-migration-Create-migration_is_running.patch +++ /dev/null @@ -1,119 +0,0 @@ -From c9e3d13d70a24bf606ce351886b27bdca25ef4dc Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:41 +0000 -Subject: [PATCH 09/18] migration: Create migration_is_running() - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-9-quintela@redhat.com> -Patchwork-id: 94115 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 08/10] migration: Create migration_is_running() -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -This function returns true if we are in the middle of a migration. -It is like migration_is_setup_or_active() with CANCELLING and COLO. -Adapt all callers that are needed. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit 392d87e21325fdb01210176faa07472b4985ccf0) -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 29 ++++++++++++++++++++++++----- - migration/migration.h | 1 + - migration/savevm.c | 4 +--- - 3 files changed, 26 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 30c53c6..eb50d77 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -831,6 +831,27 @@ bool migration_is_setup_or_active(int state) - } - } - -+bool migration_is_running(int state) -+{ -+ switch (state) { -+ case MIGRATION_STATUS_ACTIVE: -+ case MIGRATION_STATUS_POSTCOPY_ACTIVE: -+ case MIGRATION_STATUS_POSTCOPY_PAUSED: -+ case MIGRATION_STATUS_POSTCOPY_RECOVER: -+ case MIGRATION_STATUS_SETUP: -+ case MIGRATION_STATUS_PRE_SWITCHOVER: -+ case MIGRATION_STATUS_DEVICE: -+ case MIGRATION_STATUS_WAIT_UNPLUG: -+ case MIGRATION_STATUS_CANCELLING: -+ case MIGRATION_STATUS_COLO: -+ return true; -+ -+ default: -+ return false; -+ -+ } -+} -+ - static void populate_time_info(MigrationInfo *info, MigrationState *s) - { - info->has_status = true; -@@ -1090,7 +1111,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - MigrationCapabilityStatusList *cap; - bool cap_list[MIGRATION_CAPABILITY__MAX]; - -- if (migration_is_setup_or_active(s->state)) { -+ if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return; - } -@@ -1603,7 +1624,7 @@ static void migrate_fd_cancel(MigrationState *s) - - do { - old_state = s->state; -- if (!migration_is_setup_or_active(old_state)) { -+ if (!migration_is_running(old_state)) { - break; - } - /* If the migration is paused, kick it out of the pause */ -@@ -1900,9 +1921,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - return true; - } - -- if (migration_is_setup_or_active(s->state) || -- s->state == MIGRATION_STATUS_CANCELLING || -- s->state == MIGRATION_STATUS_COLO) { -+ if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return false; - } -diff --git a/migration/migration.h b/migration/migration.h -index 0b1b0d4..a2b2336 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -279,6 +279,7 @@ void migrate_fd_error(MigrationState *s, const Error *error); - void migrate_fd_connect(MigrationState *s, Error *error_in); - - bool migration_is_setup_or_active(int state); -+bool migration_is_running(int state); - - void migrate_init(MigrationState *s); - bool migration_is_blocked(Error **errp); -diff --git a/migration/savevm.c b/migration/savevm.c -index a80bb52..144ecf0 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1506,9 +1506,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - MigrationState *ms = migrate_get_current(); - MigrationStatus status; - -- if (migration_is_setup_or_active(ms->state) || -- ms->state == MIGRATION_STATUS_CANCELLING || -- ms->state == MIGRATION_STATUS_COLO) { -+ if (migration_is_running(ms->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return -EINVAL; - } --- -1.8.3.1 - diff --git a/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch b/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch deleted file mode 100644 index c2ead53..0000000 --- a/kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch +++ /dev/null @@ -1,257 +0,0 @@ -From 2659af9267586fb626f543773bf3f844727e473b Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Fri, 31 Jan 2020 17:12:55 +0000 -Subject: [PATCH 13/15] migration: Define VMSTATE_INSTANCE_ID_ANY - -RH-Author: Peter Xu -Message-id: <20200131171257.1066593-2-peterx@redhat.com> -Patchwork-id: 93630 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] migration: Define VMSTATE_INSTANCE_ID_ANY -Bugzilla: 1529231 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -Define the new macro VMSTATE_INSTANCE_ID_ANY for callers who wants to -auto-generate the vmstate instance ID. Previously it was hard coded -as -1 instead of this macro. It helps to change this default value in -the follow up patches. No functional change. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 1df2c9a26fcb2fa32d099f8e9adcdae4207872e3) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - backends/dbus-vmstate.c - File deleted - hw/core/qdev.c - hw/misc/max111x.c - hw/net/eepro100.c - Due to missing commit 3cad405bab ("vmstate: replace - DeviceState with VMStateIf", 2020-01-06) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/stellaris.c | 2 +- - hw/core/qdev.c | 3 ++- - hw/display/ads7846.c | 2 +- - hw/i2c/core.c | 2 +- - hw/input/stellaris_input.c | 3 ++- - hw/intc/apic_common.c | 2 +- - hw/misc/max111x.c | 2 +- - hw/net/eepro100.c | 2 +- - hw/pci/pci.c | 2 +- - hw/ppc/spapr.c | 2 +- - hw/timer/arm_timer.c | 2 +- - hw/tpm/tpm_emulator.c | 3 ++- - include/migration/vmstate.h | 2 ++ - migration/savevm.c | 8 ++++---- - 14 files changed, 21 insertions(+), 16 deletions(-) - -diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c -index b198066..bb025e0 100644 ---- a/hw/arm/stellaris.c -+++ b/hw/arm/stellaris.c -@@ -708,7 +708,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, - memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); - memory_region_add_subregion(get_system_memory(), base, &s->iomem); - ssys_reset(s); -- vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); - return 0; - } - -diff --git a/hw/core/qdev.c b/hw/core/qdev.c -index cf1ba28..40f6b2b 100644 ---- a/hw/core/qdev.c -+++ b/hw/core/qdev.c -@@ -890,7 +890,8 @@ static void device_set_realized(Object *obj, bool value, Error **errp) - dev->canonical_path = object_get_canonical_path(OBJECT(dev)); - - if (qdev_get_vmsd(dev)) { -- if (vmstate_register_with_alias_id(dev, -1, qdev_get_vmsd(dev), dev, -+ if (vmstate_register_with_alias_id(dev, VMSTATE_INSTANCE_ID_ANY, -+ qdev_get_vmsd(dev), dev, - dev->instance_id_alias, - dev->alias_required_for_version, - &local_err) < 0) { -diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c -index c12272a..9228b40 100644 ---- a/hw/display/ads7846.c -+++ b/hw/display/ads7846.c -@@ -154,7 +154,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) - - ads7846_int_update(s); - -- vmstate_register(NULL, -1, &vmstate_ads7846, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); - } - - static void ads7846_class_init(ObjectClass *klass, void *data) -diff --git a/hw/i2c/core.c b/hw/i2c/core.c -index 92cd489..d770035 100644 ---- a/hw/i2c/core.c -+++ b/hw/i2c/core.c -@@ -61,7 +61,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) - - bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); - QLIST_INIT(&bus->current_devs); -- vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); - return bus; - } - -diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c -index 59892b0..e6ee5e1 100644 ---- a/hw/input/stellaris_input.c -+++ b/hw/input/stellaris_input.c -@@ -88,5 +88,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) - } - s->num_buttons = n; - qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); -- vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, -+ &vmstate_stellaris_gamepad, s); - } -diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c -index 375cb6a..f2c3a7f 100644 ---- a/hw/intc/apic_common.c -+++ b/hw/intc/apic_common.c -@@ -284,7 +284,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) - } - - if (s->legacy_instance_id) { -- instance_id = -1; -+ instance_id = VMSTATE_INSTANCE_ID_ANY; - } - vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, - s, -1, 0, NULL); -diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c -index a713149..81ee73e 100644 ---- a/hw/misc/max111x.c -+++ b/hw/misc/max111x.c -@@ -146,7 +146,7 @@ static int max111x_init(SSISlave *d, int inputs) - s->input[7] = 0x80; - s->com = 0; - -- vmstate_register(dev, -1, &vmstate_max111x, s); -+ vmstate_register(dev, VMSTATE_INSTANCE_ID_ANY, &vmstate_max111x, s); - return 0; - } - -diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c -index cc2dd8b..39920c6 100644 ---- a/hw/net/eepro100.c -+++ b/hw/net/eepro100.c -@@ -1874,7 +1874,7 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) - - s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); - s->vmstate->name = qemu_get_queue(s->nic)->model; -- vmstate_register(&pci_dev->qdev, -1, s->vmstate, s); -+ vmstate_register(&pci_dev->qdev, VMSTATE_INSTANCE_ID_ANY, s->vmstate, s); - } - - static void eepro100_instance_init(Object *obj) -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index cbc7a32..fed019d 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -124,7 +124,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) - bus->machine_done.notify = pcibus_machine_done; - qemu_add_machine_init_done_notifier(&bus->machine_done); - -- vmstate_register(NULL, -1, &vmstate_pcibus, bus); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); - } - - static void pcie_bus_realize(BusState *qbus, Error **errp) -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 8749c72..c12862d 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -3028,7 +3028,7 @@ static void spapr_machine_init(MachineState *machine) - * interface, this is a legacy from the sPAPREnvironment structure - * which predated MachineState but had a similar function */ - vmstate_register(NULL, 0, &vmstate_spapr, spapr); -- register_savevm_live("spapr/htab", -1, 1, -+ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, - &savevm_htab_handlers, spapr); - - qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), -diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c -index af524fa..beaa285 100644 ---- a/hw/timer/arm_timer.c -+++ b/hw/timer/arm_timer.c -@@ -180,7 +180,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) - s->control = TIMER_CTRL_IE; - - s->timer = ptimer_init(arm_timer_tick, s, PTIMER_POLICY_DEFAULT); -- vmstate_register(NULL, -1, &vmstate_arm_timer, s); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); - return s; - } - -diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c -index 22f9113..da7b490 100644 ---- a/hw/tpm/tpm_emulator.c -+++ b/hw/tpm/tpm_emulator.c -@@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) - tpm_emu->cur_locty_number = ~0; - qemu_mutex_init(&tpm_emu->mutex); - -- vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); -+ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, -+ &vmstate_tpm_emulator, obj); - } - - /* -diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h -index ac4f46a..883f1cf 100644 ---- a/include/migration/vmstate.h -+++ b/include/migration/vmstate.h -@@ -1155,6 +1155,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, - - bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); - -+#define VMSTATE_INSTANCE_ID_ANY -1 -+ - /* Returns: 0 on success, -1 on failure */ - int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, - const VMStateDescription *vmsd, -diff --git a/migration/savevm.c b/migration/savevm.c -index a71b930..e2e8e0a 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -750,7 +750,7 @@ int register_savevm_live(const char *idstr, - - pstrcat(se->idstr, sizeof(se->idstr), idstr); - -- if (instance_id == -1) { -+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { - se->instance_id = calculate_new_instance_id(se->idstr); - } else { - se->instance_id = instance_id; -@@ -817,14 +817,14 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, - - se->compat = g_new0(CompatEntry, 1); - pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); -- se->compat->instance_id = instance_id == -1 ? -+ se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? - calculate_compat_instance_id(vmsd->name) : instance_id; -- instance_id = -1; -+ instance_id = VMSTATE_INSTANCE_ID_ANY; - } - } - pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); - -- if (instance_id == -1) { -+ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { - se->instance_id = calculate_new_instance_id(se->idstr); - } else { - se->instance_id = instance_id; --- -1.8.3.1 - diff --git a/kvm-migration-Don-t-send-data-if-we-have-stopped.patch b/kvm-migration-Don-t-send-data-if-we-have-stopped.patch deleted file mode 100644 index 9a36714..0000000 --- a/kvm-migration-Don-t-send-data-if-we-have-stopped.patch +++ /dev/null @@ -1,42 +0,0 @@ -From ab07e0b41c50a85940d798a9a65a58698fd2edfb Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:40 +0000 -Subject: [PATCH 08/18] migration: Don't send data if we have stopped - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-8-quintela@redhat.com> -Patchwork-id: 94114 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 07/10] migration: Don't send data if we have stopped -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -If we do a cancel, we got out without one error, but we can't do the -rest of the output as in a normal situation. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit b69a0227a803256ad270283872d40ff768f4d56d) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index a0257ee..902c56c 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3511,7 +3511,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- if (ret >= 0) { -+ if (ret >= 0 -+ && migration_is_setup_or_active(migrate_get_current()->state)) { - multifd_send_sync_main(rs); - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); --- -1.8.3.1 - diff --git a/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch b/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch deleted file mode 100644 index 01cb0f1..0000000 --- a/kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 71b05ab5782aa1e38c016be6264a14f5650d2a87 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:35 +0000 -Subject: [PATCH 03/18] migration: Make sure that we don't call write() in case - of error - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-3-quintela@redhat.com> -Patchwork-id: 94113 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/10] migration: Make sure that we don't call write() in case of error -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -If we are exiting due to an error/finish/.... Just don't try to even -touch the channel with one IO operation. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Juan Quintela -(cherry picked from commit 4d65a6216bfc44891ac298b74a6921d479805131) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 65580e3..8c783b3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -899,6 +899,12 @@ struct { - uint64_t packet_num; - /* send channels ready */ - QemuSemaphore channels_ready; -+ /* -+ * Have we already run terminate threads. There is a race when it -+ * happens that we got one error while we are exiting. -+ * We will use atomic operations. Only valid values are 0 and 1. -+ */ -+ int exiting; - } *multifd_send_state; - - /* -@@ -927,6 +933,10 @@ static int multifd_send_pages(RAMState *rs) - MultiFDPages_t *pages = multifd_send_state->pages; - uint64_t transferred; - -+ if (atomic_read(&multifd_send_state->exiting)) { -+ return -1; -+ } -+ - qemu_sem_wait(&multifd_send_state->channels_ready); - for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { - p = &multifd_send_state->params[i]; -@@ -1008,6 +1018,16 @@ static void multifd_send_terminate_threads(Error *err) - } - } - -+ /* -+ * We don't want to exit each threads twice. Depending on where -+ * we get the error, or if there are two independent errors in two -+ * threads at the same time, we can end calling this function -+ * twice. -+ */ -+ if (atomic_xchg(&multifd_send_state->exiting, 1)) { -+ return; -+ } -+ - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; - -@@ -1117,6 +1137,10 @@ static void *multifd_send_thread(void *opaque) - - while (true) { - qemu_sem_wait(&p->sem); -+ -+ if (atomic_read(&multifd_send_state->exiting)) { -+ break; -+ } - qemu_mutex_lock(&p->mutex); - - if (p->pending_job) { -@@ -1225,6 +1249,7 @@ int multifd_save_setup(void) - multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); - multifd_send_state->pages = multifd_pages_init(page_count); - qemu_sem_init(&multifd_send_state->channels_ready, 0); -+ atomic_set(&multifd_send_state->exiting, 0); - - for (i = 0; i < thread_count; i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; --- -1.8.3.1 - diff --git a/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch b/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch deleted file mode 100644 index 4a7fb28..0000000 --- a/kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 3c4f6f0c2bf5562f2aa26f964848ae53e6ac4790 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:43 +0000 -Subject: [PATCH 11/18] migration: Maybe VM is paused when migration is - cancelled - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-11-quintela@redhat.com> -Patchwork-id: 94120 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 10/10] migration: Maybe VM is paused when migration is cancelled -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Zhimin Feng - -If the migration is cancelled when it is in the completion phase, -the migration state is set to MIGRATION_STATUS_CANCELLING. -The VM maybe wait for the 'pause_sem' semaphore in migration_maybe_pause -function, so that VM always is paused. - -Reported-by: Euler Robot -Signed-off-by: Zhimin Feng -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 8958338b10abcb346b54a8038a491fda2db1c853) -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 24 ++++++++++++++++-------- - 1 file changed, 16 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index eb50d77..ed18c59 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2786,14 +2786,22 @@ static int migration_maybe_pause(MigrationState *s, - /* This block intentionally left blank */ - } - -- qemu_mutex_unlock_iothread(); -- migrate_set_state(&s->state, *current_active_state, -- MIGRATION_STATUS_PRE_SWITCHOVER); -- qemu_sem_wait(&s->pause_sem); -- migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, -- new_state); -- *current_active_state = new_state; -- qemu_mutex_lock_iothread(); -+ /* -+ * If the migration is cancelled when it is in the completion phase, -+ * the migration state is set to MIGRATION_STATUS_CANCELLING. -+ * So we don't need to wait a semaphore, otherwise we would always -+ * wait for the 'pause_sem' semaphore. -+ */ -+ if (s->state != MIGRATION_STATUS_CANCELLING) { -+ qemu_mutex_unlock_iothread(); -+ migrate_set_state(&s->state, *current_active_state, -+ MIGRATION_STATUS_PRE_SWITCHOVER); -+ qemu_sem_wait(&s->pause_sem); -+ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, -+ new_state); -+ *current_active_state = new_state; -+ qemu_mutex_lock_iothread(); -+ } - - return s->state == new_state ? 0 : -EINVAL; - } --- -1.8.3.1 - diff --git a/kvm-migration-Rate-limit-inside-host-pages.patch b/kvm-migration-Rate-limit-inside-host-pages.patch deleted file mode 100644 index 2d3d519..0000000 --- a/kvm-migration-Rate-limit-inside-host-pages.patch +++ /dev/null @@ -1,172 +0,0 @@ -From 8e8f421cce99543081f225acf46541312cfbc371 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 17 Mar 2020 17:05:18 +0000 -Subject: [PATCH 1/2] migration: Rate limit inside host pages - -RH-Author: Laurent Vivier -Message-id: <20200317170518.9303-1-lvivier@redhat.com> -Patchwork-id: 94374 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] migration: Rate limit inside host pages -Bugzilla: 1814336 -RH-Acked-by: Peter Xu -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert - -From: "Dr. David Alan Gilbert" - -When using hugepages, rate limiting is necessary within each huge -page, since a 1G huge page can take a significant time to send, so -you end up with bursty behaviour. - -Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") -Reported-by: Lin Ma -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6) -Signed-off-by: Laurent Vivier - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1814336 -BRANCH: rhel-av-8.2.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27283241 -TESTED: Tested that the migration abort doesn't trigger an error message in - the kernel logs on P9 - -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 57 ++++++++++++++++++++++++++++---------------------- - migration/migration.h | 1 + - migration/ram.c | 2 ++ - migration/trace-events | 4 ++-- - 4 files changed, 37 insertions(+), 27 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index ed18c59..e31d0f5 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3253,6 +3253,37 @@ void migration_consume_urgent_request(void) - qemu_sem_wait(&migrate_get_current()->rate_limit_sem); - } - -+/* Returns true if the rate limiting was broken by an urgent request */ -+bool migration_rate_limit(void) -+{ -+ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -+ MigrationState *s = migrate_get_current(); -+ -+ bool urgent = false; -+ migration_update_counters(s, now); -+ if (qemu_file_rate_limit(s->to_dst_file)) { -+ /* -+ * Wait for a delay to do rate limiting OR -+ * something urgent to post the semaphore. -+ */ -+ int ms = s->iteration_start_time + BUFFER_DELAY - now; -+ trace_migration_rate_limit_pre(ms); -+ if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { -+ /* -+ * We were woken by one or more urgent things but -+ * the timedwait will have consumed one of them. -+ * The service routine for the urgent wake will dec -+ * the semaphore itself for each item it consumes, -+ * so add this one we just eat back. -+ */ -+ qemu_sem_post(&s->rate_limit_sem); -+ urgent = true; -+ } -+ trace_migration_rate_limit_post(urgent); -+ } -+ return urgent; -+} -+ - /* - * Master migration thread on the source VM. - * It drives the migration and pumps the data down the outgoing channel. -@@ -3319,8 +3350,6 @@ static void *migration_thread(void *opaque) - trace_migration_thread_setup_complete(); - - while (migration_is_active(s)) { -- int64_t current_time; -- - if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { - MigIterateState iter_state = migration_iteration_run(s); - if (iter_state == MIG_ITERATE_SKIP) { -@@ -3347,29 +3376,7 @@ static void *migration_thread(void *opaque) - update_iteration_initial_status(s); - } - -- current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -- -- migration_update_counters(s, current_time); -- -- urgent = false; -- if (qemu_file_rate_limit(s->to_dst_file)) { -- /* Wait for a delay to do rate limiting OR -- * something urgent to post the semaphore. -- */ -- int ms = s->iteration_start_time + BUFFER_DELAY - current_time; -- trace_migration_thread_ratelimit_pre(ms); -- if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { -- /* We were worken by one or more urgent things but -- * the timedwait will have consumed one of them. -- * The service routine for the urgent wake will dec -- * the semaphore itself for each item it consumes, -- * so add this one we just eat back. -- */ -- qemu_sem_post(&s->rate_limit_sem); -- urgent = true; -- } -- trace_migration_thread_ratelimit_post(urgent); -- } -+ urgent = migration_rate_limit(); - } - - trace_migration_thread_after_loop(); -diff --git a/migration/migration.h b/migration/migration.h -index a2b2336..a15e8d8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -347,5 +347,6 @@ extern bool migrate_pre_2_2; - - void migration_make_urgent_request(void); - void migration_consume_urgent_request(void); -+bool migration_rate_limit(void); - - #endif -diff --git a/migration/ram.c b/migration/ram.c -index 3891eff..5344c7d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2661,6 +2661,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, - - pages += tmppages; - pss->page++; -+ /* Allow rate limiting to happen in the middle of huge pages */ -+ migration_rate_limit(); - } while ((pss->page & (pagesize_bits - 1)) && - offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); - -diff --git a/migration/trace-events b/migration/trace-events -index 6dee7b5..2f9129e 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -138,12 +138,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 - migration_completion_file_err(void) "" - migration_completion_postcopy_end(void) "" - migration_completion_postcopy_end_after_complete(void) "" -+migration_rate_limit_pre(int ms) "%d ms" -+migration_rate_limit_post(int urgent) "urgent: %d" - migration_return_path_end_before(void) "" - migration_return_path_end_after(int rp_error) "%d" - migration_thread_after_loop(void) "" - migration_thread_file_err(void) "" --migration_thread_ratelimit_pre(int ms) "%d ms" --migration_thread_ratelimit_post(int urgent) "urgent: %d" - migration_thread_setup_complete(void) "" - open_return_path_on_source(void) "" - open_return_path_on_source_continue(void) "" --- -1.8.3.1 - diff --git a/kvm-migration-multifd-clean-pages-after-filling-packet.patch b/kvm-migration-multifd-clean-pages-after-filling-packet.patch deleted file mode 100644 index 5fa7fde..0000000 --- a/kvm-migration-multifd-clean-pages-after-filling-packet.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 32ee75b7f4a31d6080e5659e2a0285a046ef1036 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:34 +0000 -Subject: [PATCH 02/18] migration/multifd: clean pages after filling packet - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-2-quintela@redhat.com> -Patchwork-id: 94112 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 01/10] migration/multifd: clean pages after filling packet -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Wei Yang - -This is a preparation for the next patch: - - not use multifd during postcopy. - -Without enabling postcopy, everything looks good. While after enabling -postcopy, migration may fail even not use multifd during postcopy. The -reason is the pages is not properly cleared and *old* target page will -continue to be transferred. - -After clean pages, migration succeeds. - -Signed-off-by: Wei Yang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit eab54aa78ffd9fb7895b20fc2761ee998479489b) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 5078f94..65580e3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -944,10 +944,10 @@ static int multifd_send_pages(RAMState *rs) - } - qemu_mutex_unlock(&p->mutex); - } -- p->pages->used = 0; -+ assert(!p->pages->used); -+ assert(!p->pages->block); - - p->packet_num = multifd_send_state->packet_num++; -- p->pages->block = NULL; - multifd_send_state->pages = p->pages; - p->pages = pages; - transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; -@@ -1129,6 +1129,8 @@ static void *multifd_send_thread(void *opaque) - p->flags = 0; - p->num_packets++; - p->num_pages += used; -+ p->pages->used = 0; -+ p->pages->block = NULL; - qemu_mutex_unlock(&p->mutex); - - trace_multifd_send(p->id, packet_num, used, flags, --- -1.8.3.1 - diff --git a/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch b/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch deleted file mode 100644 index 0c5fe80..0000000 --- a/kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 2c14a6831954a59256cc8d1980da0ad705a3a3fa Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:37 +0000 -Subject: [PATCH 05/18] migration/multifd: fix destroyed mutex access in - terminating multifd threads - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-5-quintela@redhat.com> -Patchwork-id: 94119 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 04/10] migration/multifd: fix destroyed mutex access in terminating multifd threads -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Jiahui Cen - -One multifd will lock all the other multifds' IOChannel mutex to inform them -to quit by setting p->quit or shutting down p->c. In this senario, if some -multifds had already been terminated and multifd_load_cleanup/multifd_save_cleanup -had destroyed their mutex, it could cause destroyed mutex access when trying -lock their mutex. - -Here is the coredump stack: - #0 0x00007f81a2794437 in raise () from /usr/lib64/libc.so.6 - #1 0x00007f81a2795b28 in abort () from /usr/lib64/libc.so.6 - #2 0x00007f81a278d1b6 in __assert_fail_base () from /usr/lib64/libc.so.6 - #3 0x00007f81a278d262 in __assert_fail () from /usr/lib64/libc.so.6 - #4 0x000055eb1bfadbd3 in qemu_mutex_lock_impl (mutex=0x55eb1e2d1988, file=, line=) at util/qemu-thread-posix.c:64 - #5 0x000055eb1bb4564a in multifd_send_terminate_threads (err=) at migration/ram.c:1015 - #6 0x000055eb1bb4bb7f in multifd_send_thread (opaque=0x55eb1e2d19f8) at migration/ram.c:1171 - #7 0x000055eb1bfad628 in qemu_thread_start (args=0x55eb1e170450) at util/qemu-thread-posix.c:502 - #8 0x00007f81a2b36df5 in start_thread () from /usr/lib64/libpthread.so.0 - #9 0x00007f81a286048d in clone () from /usr/lib64/libc.so.6 - -To fix it up, let's destroy the mutex after all the other multifd threads had -been terminated. - -Signed-off-by: Jiahui Cen -Signed-off-by: Ying Fang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 9560a48ecc0c20d87bc458a6db77fba651605819) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/migration/ram.c b/migration/ram.c -index 860f781..6c55c5d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1052,6 +1052,10 @@ void multifd_save_cleanup(void) - if (p->running) { - qemu_thread_join(&p->thread); - } -+ } -+ for (i = 0; i < migrate_multifd_channels(); i++) { -+ MultiFDSendParams *p = &multifd_send_state->params[i]; -+ - socket_send_channel_destroy(p->c); - p->c = NULL; - qemu_mutex_destroy(&p->mutex); -@@ -1335,6 +1339,10 @@ int multifd_load_cleanup(Error **errp) - qemu_sem_post(&p->sem_sync); - qemu_thread_join(&p->thread); - } -+ } -+ for (i = 0; i < migrate_multifd_channels(); i++) { -+ MultiFDRecvParams *p = &multifd_recv_state->params[i]; -+ - object_unref(OBJECT(p->c)); - p->c = NULL; - qemu_mutex_destroy(&p->mutex); --- -1.8.3.1 - diff --git a/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch b/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch deleted file mode 100644 index 9e9683c..0000000 --- a/kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 517a99c5fba163bf684978fe3d9476b619481391 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:42 +0000 -Subject: [PATCH 10/18] migration/multifd: fix nullptr access in - multifd_send_terminate_threads - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-10-quintela@redhat.com> -Patchwork-id: 94117 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 09/10] migration/multifd: fix nullptr access in multifd_send_terminate_threads -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Zhimin Feng - -If the multifd_send_threads is not created when migration is failed, -multifd_save_cleanup would be called twice. In this senario, the -multifd_send_state is accessed after it has been released, the result -is that the source VM is crashing down. - -Here is the coredump stack: - Program received signal SIGSEGV, Segmentation fault. - 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 - 1012 MultiFDSendParams *p = &multifd_send_state->params[i]; - #0 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 - #1 0x00005629333ab8a9 in multifd_save_cleanup () at migration/ram.c:1028 - #2 0x00005629333abaea in multifd_new_send_channel_async (task=0x562935450e70, opaque=) at migration/ram.c:1202 - #3 0x000056293373a562 in qio_task_complete (task=task@entry=0x562935450e70) at io/task.c:196 - #4 0x000056293373a6e0 in qio_task_thread_result (opaque=0x562935450e70) at io/task.c:111 - #5 0x00007f475d4d75a7 in g_idle_dispatch () from /usr/lib64/libglib-2.0.so.0 - #6 0x00007f475d4da9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 - #7 0x0000562933785b33 in glib_pollfds_poll () at util/main-loop.c:219 - #8 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 - #9 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:518 - #10 0x00005629334c5acf in main_loop () at vl.c:1810 - #11 0x000056293334d7bb in main (argc=, argv=, envp=) at vl.c:4471 - -If the multifd_send_threads is not created when migration is failed. -In this senario, we don't call multifd_save_cleanup in multifd_new_send_channel_async. - -Signed-off-by: Zhimin Feng -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 9c4d333c092e9c26d38f740ff3616deb42f21681) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 902c56c..3891eff 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1229,7 +1229,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) - trace_multifd_new_send_channel_async(p->id); - if (qio_task_propagate_error(task, &local_err)) { - migrate_set_error(migrate_get_current(), local_err); -- multifd_save_cleanup(); -+ /* Error happen, we need to tell who pay attention to me */ -+ qemu_sem_post(&multifd_send_state->channels_ready); -+ qemu_sem_post(&p->sem_sync); -+ /* -+ * Although multifd_send_thread is not created, but main migration -+ * thread neet to judge whether it is running, so we need to mark -+ * its status. -+ */ -+ p->quit = true; - } else { - p->c = QIO_CHANNEL(sioc); - qio_channel_set_delay(p->c, false); --- -1.8.3.1 - diff --git a/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch b/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch deleted file mode 100644 index e780698..0000000 --- a/kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 7f664fe26ff67f8131faa7a81a388b8a5b51403f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:36 +0000 -Subject: [PATCH 04/18] migration/multifd: fix nullptr access in terminating - multifd threads - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-4-quintela@redhat.com> -Patchwork-id: 94110 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 03/10] migration/multifd: fix nullptr access in terminating multifd threads -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -From: Jiahui Cen - -One multifd channel will shutdown all the other multifd's IOChannel when it -fails to receive an IOChannel. In this senario, if some multifds had not -received its IOChannel yet, it would try to shutdown its IOChannel which could -cause nullptr access at qio_channel_shutdown. - -Here is the coredump stack: - #0 object_get_class (obj=obj@entry=0x0) at qom/object.c:908 - #1 0x00005563fdbb8f4a in qio_channel_shutdown (ioc=0x0, how=QIO_CHANNEL_SHUTDOWN_BOTH, errp=0x0) at io/channel.c:355 - #2 0x00005563fd7b4c5f in multifd_recv_terminate_threads (err=) at migration/ram.c:1280 - #3 0x00005563fd7bc019 in multifd_recv_new_channel (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce00) at migration/ram.c:1478 - #4 0x00005563fda82177 in migration_ioc_process_incoming (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce30) at migration/migration.c:605 - #5 0x00005563fda8567d in migration_channel_process_incoming (ioc=0x556400255610) at migration/channel.c:44 - #6 0x00005563fda83ee0 in socket_accept_incoming_migration (listener=0x5563fff6b920, cioc=0x556400255610, opaque=) at migration/socket.c:166 - #7 0x00005563fdbc25cd in qio_net_listener_channel_func (ioc=, condition=, opaque=) at io/net-listener.c:54 - #8 0x00007f895b6fe9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 - #9 0x00005563fdc18136 in glib_pollfds_poll () at util/main-loop.c:218 - #10 0x00005563fdc181b5 in os_host_main_loop_wait (timeout=1000000000) at util/main-loop.c:241 - #11 0x00005563fdc183a2 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:517 - #12 0x00005563fd8edb37 in main_loop () at vl.c:1791 - #13 0x00005563fd74fd45 in main (argc=, argv=, envp=) at vl.c:4473 - -To fix it up, let's check p->c before calling qio_channel_shutdown. - -Signed-off-by: Jiahui Cen -Signed-off-by: Ying Fang -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit f76e32eb05041ab001184ab16afb56524adccd0c) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 8c783b3..860f781 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1307,7 +1307,9 @@ static void multifd_recv_terminate_threads(Error *err) - - normal quit, i.e. everything went fine, just finished - - error quit: We close the channels so the channel threads - finish the qio_channel_read_all_eof() */ -- qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); -+ if (p->c) { -+ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); -+ } - qemu_mutex_unlock(&p->mutex); - } - } --- -1.8.3.1 - diff --git a/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch b/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch deleted file mode 100644 index c20cb6c..0000000 --- a/kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 261ee33e0e6711fadd3049e4640bb731ee3d44ff Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 24 Feb 2020 16:57:10 +0000 -Subject: [PATCH 9/9] mirror: Don't let an operation wait for itself - -RH-Author: Kevin Wolf -Message-id: <20200224165710.4830-3-kwolf@redhat.com> -Patchwork-id: 94045 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Don't let an operation wait for itself -Bugzilla: 1794692 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -mirror_wait_for_free_in_flight_slot() just picks a random operation to -wait for. However, when mirror_co_read() waits for free slots, its -MirrorOp is already in s->ops_in_flight, so if not enough slots are -immediately available, an operation can end up waiting for itself to -complete, which results in a hang. - -Fix this by passing the current MirrorOp and skipping this operation -when picking an operation to wait for. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -(cherry picked from commit 7e6c4ff792734e196c8ca82564c56b5e7c6288ca) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 8959e42..cacbc70 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -283,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, - } - - static inline void coroutine_fn --mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) -+mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) - { - MirrorOp *op; - - QTAILQ_FOREACH(op, &s->ops_in_flight, next) { -+ if (self == op) { -+ continue; -+ } - /* Do not wait on pseudo ops, because it may in turn wait on - * some other operation to start, which may in fact be the - * caller of this function. Since there is only one pseudo op -@@ -302,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - } - - static inline void coroutine_fn --mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) -+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) - { - /* Only non-active operations use up in-flight slots */ -- mirror_wait_for_any_operation(s, false); -+ mirror_wait_for_any_operation(s, self, false); - } - - /* Perform a mirror copy operation. -@@ -348,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - - while (s->buf_free_count < nb_chunks) { - trace_mirror_yield_in_flight(s, op->offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, op); - } - - /* Now make a QEMUIOVector taking enough granularity-sized chunks -@@ -555,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) - - while (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield_in_flight(s, offset, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, pseudo_op); - } - - if (s->ret < 0) { -@@ -609,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s) - static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) - { - while (s->in_flight > 0) { -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, NULL); - } - } - -@@ -794,7 +797,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) - if (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, - s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, NULL); - continue; - } - -@@ -947,7 +950,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - /* Do not start passive operations while there are active - * writes in progress */ - while (s->in_active_write_counter) { -- mirror_wait_for_any_operation(s, true); -+ mirror_wait_for_any_operation(s, NULL, true); - } - - if (s->ret < 0) { -@@ -973,7 +976,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) - if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || - (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); -- mirror_wait_for_free_in_flight_slot(s); -+ mirror_wait_for_free_in_flight_slot(s, NULL); - continue; - } else if (cnt != 0) { - delay_ns = mirror_iteration(s); --- -1.8.3.1 - diff --git a/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch b/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch deleted file mode 100644 index 67f3e54..0000000 --- a/kvm-mirror-Store-MirrorOp.co-for-debuggability.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 27fe3b8d42a2c99de01ce20e4b0727079c12da65 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 24 Feb 2020 16:57:09 +0000 -Subject: [PATCH 8/9] mirror: Store MirrorOp.co for debuggability - -RH-Author: Kevin Wolf -Message-id: <20200224165710.4830-2-kwolf@redhat.com> -Patchwork-id: 94044 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] mirror: Store MirrorOp.co for debuggability -Bugzilla: 1794692 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -If a coroutine is launched, but the coroutine pointer isn't stored -anywhere, debugging any problems inside the coroutine is quite hard. -Let's store the coroutine pointer of a mirror operation in MirrorOp to -have it available in the debugger. - -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -(cherry picked from commit eed325b92c3e68417121ea23f96e33af6a4654ed) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/block/mirror.c b/block/mirror.c -index f0f2d9d..8959e42 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -103,6 +103,7 @@ struct MirrorOp { - bool is_pseudo_op; - bool is_active_write; - CoQueue waiting_requests; -+ Coroutine *co; - - QTAILQ_ENTRY(MirrorOp) next; - }; -@@ -429,6 +430,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, - default: - abort(); - } -+ op->co = co; - - QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); - qemu_coroutine_enter(co); --- -1.8.3.1 - diff --git a/kvm-mirror-Wait-only-for-in-flight-operations.patch b/kvm-mirror-Wait-only-for-in-flight-operations.patch deleted file mode 100644 index a06d30e..0000000 --- a/kvm-mirror-Wait-only-for-in-flight-operations.patch +++ /dev/null @@ -1,95 +0,0 @@ -From bddf389330e11fb0ce17413c1bfa2264a281ded2 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 30 Mar 2020 11:19:24 +0100 -Subject: [PATCH 4/4] mirror: Wait only for in-flight operations - -RH-Author: Kevin Wolf -Message-id: <20200330111924.22938-3-kwolf@redhat.com> -Patchwork-id: 94463 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] mirror: Wait only for in-flight operations -Bugzilla: 1794692 -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -mirror_wait_for_free_in_flight_slot() just picks a random operation to -wait for. However, a MirrorOp is already in s->ops_in_flight when -mirror_co_read() waits for free slots, so if not enough slots are -immediately available, an operation can end up waiting for itself, or -two or more operations can wait for each other to complete, which -results in a hang. - -Fix this by adding a flag to MirrorOp that tells us if the request is -already in flight (and therefore occupies slots that it will later -free), and picking only such operations for waiting. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 -Signed-off-by: Kevin Wolf -Message-Id: <20200326153628.4869-3-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit ce8cabbd17cf738ddfc68384440c38e5dd2fdf97) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/mirror.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/block/mirror.c b/block/mirror.c -index 8959e42..5e5a521 100644 ---- a/block/mirror.c -+++ b/block/mirror.c -@@ -102,6 +102,7 @@ struct MirrorOp { - - bool is_pseudo_op; - bool is_active_write; -+ bool is_in_flight; - CoQueue waiting_requests; - Coroutine *co; - -@@ -293,7 +294,9 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) - * caller of this function. Since there is only one pseudo op - * at any given time, we will always find some real operation - * to wait on. */ -- if (!op->is_pseudo_op && op->is_active_write == active) { -+ if (!op->is_pseudo_op && op->is_in_flight && -+ op->is_active_write == active) -+ { - qemu_co_queue_wait(&op->waiting_requests, NULL); - return; - } -@@ -367,6 +370,7 @@ static void coroutine_fn mirror_co_read(void *opaque) - /* Copy the dirty cluster. */ - s->in_flight++; - s->bytes_in_flight += op->bytes; -+ op->is_in_flight = true; - trace_mirror_one_iteration(s, op->offset, op->bytes); - - ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, -@@ -382,6 +386,7 @@ static void coroutine_fn mirror_co_zero(void *opaque) - op->s->in_flight++; - op->s->bytes_in_flight += op->bytes; - *op->bytes_handled = op->bytes; -+ op->is_in_flight = true; - - ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, - op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); -@@ -396,6 +401,7 @@ static void coroutine_fn mirror_co_discard(void *opaque) - op->s->in_flight++; - op->s->bytes_in_flight += op->bytes; - *op->bytes_handled = op->bytes; -+ op->is_in_flight = true; - - ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); - mirror_write_complete(op, ret); -@@ -1306,6 +1312,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, - .offset = offset, - .bytes = bytes, - .is_active_write = true, -+ .is_in_flight = true, - }; - qemu_co_queue_init(&op->waiting_requests); - QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); --- -1.8.3.1 - diff --git a/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch b/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch deleted file mode 100644 index bca0b4c..0000000 --- a/kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 78c7fb5afcb298631df47f6b71cf764f921c15f4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:38 +0000 -Subject: [PATCH 06/18] multifd: Make sure that we don't do any IO after an - error - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-6-quintela@redhat.com> -Patchwork-id: 94118 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 05/10] multifd: Make sure that we don't do any IO after an error -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit 3d4095b222d97393b1c2c6e514951ec7798f1c43) -Signed-off-by: Danilo C. L. de Paula ---- - migration/ram.c | 22 +++++++++++++--------- - 1 file changed, 13 insertions(+), 9 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 6c55c5d..a0257ee 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3440,7 +3440,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - { - RAMState **temp = opaque; - RAMState *rs = *temp; -- int ret; -+ int ret = 0; - int i; - int64_t t0; - int done = 0; -@@ -3511,12 +3511,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_ROUND); - - out: -- multifd_send_sync_main(rs); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -- ram_counters.transferred += 8; -+ if (ret >= 0) { -+ multifd_send_sync_main(rs); -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ ram_counters.transferred += 8; - -- ret = qemu_file_get_error(f); -+ ret = qemu_file_get_error(f); -+ } - if (ret < 0) { - return ret; - } -@@ -3568,9 +3570,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_FINISH); - } - -- multifd_send_sync_main(rs); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -+ if (ret >= 0) { -+ multifd_send_sync_main(rs); -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ } - - return ret; - } --- -1.8.3.1 - diff --git a/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch b/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch deleted file mode 100644 index 2dbdb16..0000000 --- a/kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 22fc9bd7e7ae0b72c6f6e483eb66cf996f519766 Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 21 Jan 2020 05:16:11 +0000 -Subject: [PATCH 01/15] ppc: Deassert the external interrupt pin in KVM on - reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200121051613.388295-2-dgibson@redhat.com> -Patchwork-id: 93429 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] ppc: Deassert the external interrupt pin in KVM on reset -Bugzilla: 1776638 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -When a CPU is reset, QEMU makes sure no interrupt is pending by clearing -CPUPPCstate::pending_interrupts in ppc_cpu_reset(). In the case of a -complete machine emulation, eg. a sPAPR machine, an external interrupt -request could still be pending in KVM though, eg. an IPI. It will be -eventually presented to the guest, which is supposed to acknowledge it at -the interrupt controller. If the interrupt controller is emulated in QEMU, -either XICS or XIVE, ppc_set_irq() won't deassert the external interrupt -pin in KVM since it isn't pending anymore for QEMU. When the vCPU re-enters -the guest, the interrupt request is still pending and the vCPU will try -again to acknowledge it. This causes an infinite loop and eventually hangs -the guest. - -The code has been broken since the beginning. The issue wasn't hit before -because accel=kvm,kernel-irqchip=off is an awkward setup that never got -used until recently with the LC92x IBM systems (aka, Boston). - -Add a ppc_irq_reset() function to do the necessary cleanup, ie. deassert -the IRQ pins of the CPU in QEMU and most importantly the external interrupt -pin for this vCPU in KVM. - -Reported-by: Satheesh Rajendran -Signed-off-by: Greg Kurz -Message-Id: <157548861740.3650476.16879693165328764758.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 401774387aeb37f2ada9bb18f7c7e307b21a3e93) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/ppc.c | 8 ++++++++ - include/hw/ppc/ppc.h | 2 ++ - target/ppc/translate_init.inc.c | 1 + - 3 files changed, 11 insertions(+) - -diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c -index 52a18eb..d554b64 100644 ---- a/hw/ppc/ppc.c -+++ b/hw/ppc/ppc.c -@@ -1510,3 +1510,11 @@ PowerPCCPU *ppc_get_vcpu_by_pir(int pir) - - return NULL; - } -+ -+void ppc_irq_reset(PowerPCCPU *cpu) -+{ -+ CPUPPCState *env = &cpu->env; -+ -+ env->irq_input_state = 0; -+ kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); -+} -diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h -index 4bdcb8b..5dd7531 100644 ---- a/include/hw/ppc/ppc.h -+++ b/include/hw/ppc/ppc.h -@@ -76,6 +76,7 @@ static inline void ppc970_irq_init(PowerPCCPU *cpu) {} - static inline void ppcPOWER7_irq_init(PowerPCCPU *cpu) {} - static inline void ppcPOWER9_irq_init(PowerPCCPU *cpu) {} - static inline void ppce500_irq_init(PowerPCCPU *cpu) {} -+static inline void ppc_irq_reset(PowerPCCPU *cpu) {} - #else - void ppc40x_irq_init(PowerPCCPU *cpu); - void ppce500_irq_init(PowerPCCPU *cpu); -@@ -83,6 +84,7 @@ void ppc6xx_irq_init(PowerPCCPU *cpu); - void ppc970_irq_init(PowerPCCPU *cpu); - void ppcPOWER7_irq_init(PowerPCCPU *cpu); - void ppcPOWER9_irq_init(PowerPCCPU *cpu); -+void ppc_irq_reset(PowerPCCPU *cpu); - #endif - - /* PPC machines for OpenBIOS */ -diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c -index ba726de..64a8380 100644 ---- a/target/ppc/translate_init.inc.c -+++ b/target/ppc/translate_init.inc.c -@@ -10461,6 +10461,7 @@ static void ppc_cpu_reset(CPUState *s) - env->pending_interrupts = 0; - s->exception_index = POWERPC_EXCP_NONE; - env->error_code = 0; -+ ppc_irq_reset(cpu); - - /* tininess for underflow is detected before rounding */ - set_float_detect_tininess(float_tininess_before_rounding, --- -1.8.3.1 - diff --git a/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch b/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch deleted file mode 100644 index 457d149..0000000 --- a/kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch +++ /dev/null @@ -1,112 +0,0 @@ -From f2f57c1ed926384e074d2048cdbdc30ee2f426eb Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 21 Jan 2020 05:16:13 +0000 -Subject: [PATCH 03/15] ppc: Don't use CPUPPCState::irq_input_state with modern - Book3s CPU models -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200121051613.388295-4-dgibson@redhat.com> -Patchwork-id: 93431 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] ppc: Don't use CPUPPCState::irq_input_state with modern Book3s CPU models -Bugzilla: 1776638 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -The power7_set_irq() and power9_set_irq() functions set this but it is -never used actually. Modern Book3s compatible CPUs are only supported -by the pnv and spapr machines. They have an interrupt controller, XICS -for POWER7/8 and XIVE for POWER9, whose models don't require to track -IRQ input states at the CPU level. - -Drop these lines to avoid confusion. - -Signed-off-by: Greg Kurz -Message-Id: <157548862861.3650476.16622818876928044450.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit c1ad0b892ce20cf2b5e619c79e8a0c4c66b235dc) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/ppc.c | 16 ++-------------- - target/ppc/cpu.h | 4 +++- - 2 files changed, 5 insertions(+), 15 deletions(-) - -diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c -index d554b64..730a41f 100644 ---- a/hw/ppc/ppc.c -+++ b/hw/ppc/ppc.c -@@ -275,10 +275,9 @@ void ppc970_irq_init(PowerPCCPU *cpu) - static void power7_set_irq(void *opaque, int pin, int level) - { - PowerPCCPU *cpu = opaque; -- CPUPPCState *env = &cpu->env; - - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, -- env, pin, level); -+ &cpu->env, pin, level); - - switch (pin) { - case POWER7_INPUT_INT: -@@ -292,11 +291,6 @@ static void power7_set_irq(void *opaque, int pin, int level) - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; - } -- if (level) { -- env->irq_input_state |= 1 << pin; -- } else { -- env->irq_input_state &= ~(1 << pin); -- } - } - - void ppcPOWER7_irq_init(PowerPCCPU *cpu) -@@ -311,10 +305,9 @@ void ppcPOWER7_irq_init(PowerPCCPU *cpu) - static void power9_set_irq(void *opaque, int pin, int level) - { - PowerPCCPU *cpu = opaque; -- CPUPPCState *env = &cpu->env; - - LOG_IRQ("%s: env %p pin %d level %d\n", __func__, -- env, pin, level); -+ &cpu->env, pin, level); - - switch (pin) { - case POWER9_INPUT_INT: -@@ -334,11 +327,6 @@ static void power9_set_irq(void *opaque, int pin, int level) - LOG_IRQ("%s: unknown IRQ pin %d\n", __func__, pin); - return; - } -- if (level) { -- env->irq_input_state |= 1 << pin; -- } else { -- env->irq_input_state &= ~(1 << pin); -- } - } - - void ppcPOWER9_irq_init(PowerPCCPU *cpu) -diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 5c53801..8887f76 100644 ---- a/target/ppc/cpu.h -+++ b/target/ppc/cpu.h -@@ -1090,7 +1090,9 @@ struct CPUPPCState { - #if !defined(CONFIG_USER_ONLY) - /* - * This is the IRQ controller, which is implementation dependent -- * and only relevant when emulating a complete machine. -+ * and only relevant when emulating a complete machine. Note that -+ * this isn't used by recent Book3s compatible CPUs (POWER7 and -+ * newer). - */ - uint32_t irq_input_state; - void **irq_inputs; --- -1.8.3.1 - diff --git a/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch b/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch deleted file mode 100644 index 9c25b76..0000000 --- a/kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 428eb7260718b69b1f3f421d03bce10b8785fc49 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:39 +0000 -Subject: [PATCH 19/20] qapi: Add '@allow-write-only-overlay' feature for - 'blockdev-snapshot' - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-14-kwolf@redhat.com> -Patchwork-id: 94290 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 13/13] qapi: Add '@allow-write-only-overlay' feature for 'blockdev-snapshot' -Bugzilla: 1790482 1805143 -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -From: Peter Krempa - -Anounce that 'blockdev-snapshot' command's permissions allow changing -of the backing file if the 'consistent_read' permission is not required. - -This is useful for libvirt to allow late opening of the backing chain -during a blockdev-mirror. - -Signed-off-by: Peter Krempa -Signed-off-by: Kevin Wolf -Message-Id: <20200310113831.27293-8-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c6bdc312f30d5c7326aa2fdca3e0f98c15eb541a) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - qapi/block-core.json | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index a1e85b0..a64ad81 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -1541,6 +1541,12 @@ - # - # For the arguments, see the documentation of BlockdevSnapshot. - # -+# Features: -+# @allow-write-only-overlay: If present, the check whether this operation is safe -+# was relaxed so that it can be used to change -+# backing file of a destination of a blockdev-mirror. -+# (since 5.0) -+# - # Since: 2.5 - # - # Example: -@@ -1561,7 +1567,8 @@ - # - ## - { 'command': 'blockdev-snapshot', -- 'data': 'BlockdevSnapshot' } -+ 'data': 'BlockdevSnapshot', -+ 'features': [ 'allow-write-only-overlay' ] } - - ## - # @change-backing-file: --- -1.8.3.1 - diff --git a/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch b/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch deleted file mode 100644 index 1a7ace5..0000000 --- a/kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch +++ /dev/null @@ -1,52 +0,0 @@ -From ecc4fb6e1941035e1d9def1f69b779fbea216caf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 24 Feb 2020 16:13:07 +0000 -Subject: [PATCH 7/9] qcow2: Fix qcow2_alloc_cluster_abort() for external data - file - -RH-Author: Kevin Wolf -Message-id: <20200224161307.29783-2-kwolf@redhat.com> -Patchwork-id: 94042 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] qcow2: Fix qcow2_alloc_cluster_abort() for external data file -Bugzilla: 1703907 -RH-Acked-by: John Snow -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -For external data file, cluster allocations return an offset in the data -file and are not refcounted. In this case, there is nothing to do for -qcow2_alloc_cluster_abort(). Freeing the same offset in the qcow2 file -is wrong and causes crashes in the better case or image corruption in -the worse case. - -Signed-off-by: Kevin Wolf -Message-Id: <20200211094900.17315-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c3b6658c1a5a3fb24d6c27b2594cf86146f75b22) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/qcow2-cluster.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c -index 8982b7b..dc3c270 100644 ---- a/block/qcow2-cluster.c -+++ b/block/qcow2-cluster.c -@@ -1015,8 +1015,11 @@ err: - void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) - { - BDRVQcow2State *s = bs->opaque; -- qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, -- QCOW2_DISCARD_NEVER); -+ if (!has_data_file(bs)) { -+ qcow2_free_clusters(bs, m->alloc_offset, -+ m->nb_clusters << s->cluster_bits, -+ QCOW2_DISCARD_NEVER); -+ } - } - - /* --- -1.8.3.1 - diff --git a/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch b/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch deleted file mode 100644 index 88a6e31..0000000 --- a/kvm-qemu-file-Don-t-do-IO-after-shutdown.patch +++ /dev/null @@ -1,92 +0,0 @@ -From d84814e298e3b05fb5bc61cc8e641a5e104d32d5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Mar 2020 14:51:39 +0000 -Subject: [PATCH 07/18] qemu-file: Don't do IO after shutdown - -RH-Author: Juan Quintela -Message-id: <20200303145143.149290-7-quintela@redhat.com> -Patchwork-id: 94116 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/10] qemu-file: Don't do IO after shutdown -Bugzilla: 1738451 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Peter Xu -RH-Acked-by: Dr. David Alan Gilbert - -Be sure that we are not doing neither read/write after shutdown of the -QEMUFile. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -(cherry picked from commit a555b8092abc6f1bbe4b64c516679cbd68fcfbd8) -Signed-off-by: Danilo C. L. de Paula ---- - migration/qemu-file.c | 22 +++++++++++++++++++++- - 1 file changed, 21 insertions(+), 1 deletion(-) - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index 26fb25d..bbb2b63 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -53,6 +53,8 @@ struct QEMUFile { - - int last_error; - Error *last_error_obj; -+ /* has the file has been shutdown */ -+ bool shutdown; - }; - - /* -@@ -61,10 +63,18 @@ struct QEMUFile { - */ - int qemu_file_shutdown(QEMUFile *f) - { -+ int ret; -+ -+ f->shutdown = true; - if (!f->ops->shut_down) { - return -ENOSYS; - } -- return f->ops->shut_down(f->opaque, true, true, NULL); -+ ret = f->ops->shut_down(f->opaque, true, true, NULL); -+ -+ if (!f->last_error) { -+ qemu_file_set_error(f, -EIO); -+ } -+ return ret; - } - - /* -@@ -214,6 +224,9 @@ void qemu_fflush(QEMUFile *f) - return; - } - -+ if (f->shutdown) { -+ return; -+ } - if (f->iovcnt > 0) { - expect = iov_size(f->iov, f->iovcnt); - ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos, -@@ -328,6 +341,10 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) - f->buf_index = 0; - f->buf_size = pending; - -+ if (f->shutdown) { -+ return 0; -+ } -+ - len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, - IO_BUF_SIZE - pending, &local_error); - if (len > 0) { -@@ -642,6 +659,9 @@ int64_t qemu_ftell(QEMUFile *f) - - int qemu_file_rate_limit(QEMUFile *f) - { -+ if (f->shutdown) { -+ return 1; -+ } - if (qemu_file_get_error(f)) { - return 1; - } --- -1.8.3.1 - diff --git a/kvm-replication-assert-we-own-context-before-job_cancel_.patch b/kvm-replication-assert-we-own-context-before-job_cancel_.patch deleted file mode 100644 index 09ef4de..0000000 --- a/kvm-replication-assert-we-own-context-before-job_cancel_.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 46887feac666d0d7633ff3f5af5721fe2a80a8ab Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 8 Apr 2020 17:29:13 +0100 -Subject: [PATCH 2/6] replication: assert we own context before job_cancel_sync - -RH-Author: Kevin Wolf -Message-id: <20200408172917.18712-3-kwolf@redhat.com> -Patchwork-id: 94595 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] replication: assert we own context before job_cancel_sync -Bugzilla: 1817621 -RH-Acked-by: Eric Blake -RH-Acked-by: Danilo de Paula -RH-Acked-by: Max Reitz - -From: Stefan Reiter - -job_cancel_sync requires the job's lock to be held, all other callers -already do this (replication_stop, drive_backup_abort, -blockdev_backup_abort, job_cancel_sync_all, cancel_common). - -In this case we're in a BlockDriver handler, so we already have a lock, -just assert that it is the same as the one used for the commit_job. - -Signed-off-by: Stefan Reiter -Message-Id: <20200407115651.69472-3-s.reiter@proxmox.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 08558e33257ec796594bd411261028a93414a70c) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - block/replication.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/block/replication.c b/block/replication.c -index 99532ce..0ce27ee 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -144,12 +144,15 @@ fail: - static void replication_close(BlockDriverState *bs) - { - BDRVReplicationState *s = bs->opaque; -+ Job *commit_job; - - if (s->stage == BLOCK_REPLICATION_RUNNING) { - replication_stop(s->rs, false, NULL); - } - if (s->stage == BLOCK_REPLICATION_FAILOVER) { -- job_cancel_sync(&s->commit_job->job); -+ commit_job = &s->commit_job->job; -+ assert(commit_job->aio_context == qemu_get_current_aio_context()); -+ job_cancel_sync(commit_job); - } - - if (s->mode == REPLICATION_MODE_SECONDARY) { --- -1.8.3.1 - diff --git a/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch b/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch deleted file mode 100644 index 6d8dfe1..0000000 --- a/kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 0f659af4870f151e25a7d2184b9a383bff58e3ba Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 17 Jan 2020 12:07:57 +0100 -Subject: [PATCH 2/4] slirp: use correct size while emulating IRC commands -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200117120758.1076549-3-marcandre.lureau@redhat.com> -Patchwork-id: 93400 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 2/3] slirp: use correct size while emulating IRC commands -Bugzilla: 1791568 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Prasad J Pandit - -While emulating IRC DCC commands, tcp_emu() uses 'mbuf' size -'m->m_size' to write DCC commands via snprintf(3). This may -lead to OOB write access, because 'bptr' points somewhere in -the middle of 'mbuf' buffer, not at the start. Use M_FREEROOM(m) -size to avoid OOB access. - -Reported-by: Vishnu Dev TJ -Signed-off-by: Prasad J Pandit -Reviewed-by: Samuel Thibault -Message-Id: <20200109094228.79764-2-ppandit@redhat.com> - -(cherry picked from libslirp commit ce131029d6d4a405cb7d3ac6716d03e58fb4a5d9) -Signed-off-by: Marc-André Lureau - -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tcp_subr.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index cbecd64..cedbfb2 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -778,7 +778,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC CHAT chat %lu %u%c\n", - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), 1); - } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, -@@ -788,8 +789,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += -- snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC SEND %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); - } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, -@@ -799,8 +800,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += -- snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), -+ "DCC MOVE %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); - } --- -1.8.3.1 - diff --git a/kvm-slirp-use-correct-size-while-emulating-commands.patch b/kvm-slirp-use-correct-size-while-emulating-commands.patch deleted file mode 100644 index fe42f4f..0000000 --- a/kvm-slirp-use-correct-size-while-emulating-commands.patch +++ /dev/null @@ -1,71 +0,0 @@ -From dfbfcf02738640ab83f7970e636b72b78f166675 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 17 Jan 2020 12:07:58 +0100 -Subject: [PATCH 3/4] slirp: use correct size while emulating commands -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200117120758.1076549-4-marcandre.lureau@redhat.com> -Patchwork-id: 93401 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 3/3] slirp: use correct size while emulating commands -Bugzilla: 1791568 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Prasad J Pandit - -While emulating services in tcp_emu(), it uses 'mbuf' size -'m->m_size' to write commands via snprintf(3). Use M_FREEROOM(m) -size to avoid possible OOB access. - -Signed-off-by: Prasad J Pandit -Signed-off-by: Samuel Thibault -Message-Id: <20200109094228.79764-3-ppandit@redhat.com> - -(cherry picked from commit 82ebe9c370a0e2970fb5695aa19aa5214a6a1c80) -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tcp_subr.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index cedbfb2..954d1a6 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -696,7 +696,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size - m->m_len, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, - n5, n6, x == 7 ? buff : ""); - return 1; -@@ -731,8 +731,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += -- snprintf(bptr, m->m_size - m->m_len, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", - n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - -@@ -758,8 +757,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) -- m->m_len = -- snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; -+ m->m_len = snprintf(m->m_data, M_ROOM(m), -+ "%d", ntohs(so->so_fport)) + 1; - return 1; - - case EMU_IRC: --- -1.8.3.1 - diff --git a/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch b/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch deleted file mode 100644 index d934712..0000000 --- a/kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch +++ /dev/null @@ -1,113 +0,0 @@ -From f2aeed761d2dad14920fa08c977dc45564886d9b Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Fri, 3 Jan 2020 01:15:12 +0000 -Subject: [PATCH 1/5] spapr: Don't trigger a CAS reboot for XICS/XIVE mode - changeover -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200103011512.49129-2-dgibson@redhat.com> -Patchwork-id: 93261 -O-Subject: [RHEL-AV-4.2 qemu-kvm PATCH 1/1] spapr: Don't trigger a CAS reboot for XICS/XIVE mode changeover -Bugzilla: 1733893 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth -RH-Acked-by: Philippe Mathieu-Daudé - -From: David Gibson - -PAPR allows the interrupt controller used on a POWER9 machine (XICS or -XIVE) to be selected by the guest operating system, by using the -ibm,client-architecture-support (CAS) feature negotiation call. - -Currently, if the guest selects an interrupt controller different from the -one selected at initial boot, this causes the system to be reset with the -new model and the boot starts again. This means we run through the SLOF -boot process twice, as well as any other bootloader (e.g. grub) in use -before the OS calls CAS. This can be confusing and/or inconvenient for -users. - -Thanks to two fairly recent changes, we no longer need this reboot. 1) we -now completely regenerate the device tree when CAS is called (meaning we -don't need special case updates for all the device tree changes caused by -the interrupt controller mode change), 2) we now have explicit code paths -to activate and deactivate the different interrupt controllers, rather than -just implicitly calling those at machine reset time. - -We can therefore eliminate the reboot for changing irq mode, simply by -putting a call to spapr_irq_update_active_intc() before we call -spapr_h_cas_compose_response() (which gives the updated device tree to -the guest firmware and OS). - -Signed-off-by: David Gibson -Reviewed-by: Cedric Le Goater -Reviewed-by: Greg Kurz -(cherry picked from commit 8deb8019d696c75e6ecaee7545026b62aba2f1bb) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1733893 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_hcall.c | 33 +++++++++++++-------------------- - 1 file changed, 13 insertions(+), 20 deletions(-) - -diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c -index 140f05c..05a7ca2 100644 ---- a/hw/ppc/spapr_hcall.c -+++ b/hw/ppc/spapr_hcall.c -@@ -1767,21 +1767,10 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - } - spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00); - spapr_ovec_cleanup(ov1_guest); -- if (!spapr->cas_reboot) { -- /* If spapr_machine_reset() did not set up a HPT but one is necessary -- * (because the guest isn't going to use radix) then set it up here. */ -- if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { -- /* legacy hash or new hash: */ -- spapr_setup_hpt_and_vrma(spapr); -- } -- spapr->cas_reboot = -- (spapr_h_cas_compose_response(spapr, args[1], args[2], -- ov5_updates) != 0); -- } - - /* -- * Ensure the guest asks for an interrupt mode we support; otherwise -- * terminate the boot. -+ * Ensure the guest asks for an interrupt mode we support; -+ * otherwise terminate the boot. - */ - if (guest_xive) { - if (!spapr->irq->xive) { -@@ -1797,14 +1786,18 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu, - } - } - -- /* -- * Generate a machine reset when we have an update of the -- * interrupt mode. Only required when the machine supports both -- * modes. -- */ -+ spapr_irq_update_active_intc(spapr); -+ - if (!spapr->cas_reboot) { -- spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT) -- && spapr->irq->xics && spapr->irq->xive; -+ /* If spapr_machine_reset() did not set up a HPT but one is necessary -+ * (because the guest isn't going to use radix) then set it up here. */ -+ if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { -+ /* legacy hash or new hash: */ -+ spapr_setup_hpt_and_vrma(spapr); -+ } -+ spapr->cas_reboot = -+ (spapr_h_cas_compose_response(spapr, args[1], args[2], -+ ov5_updates) != 0); - } - - spapr_ovec_cleanup(ov5_updates); --- -1.8.3.1 - diff --git a/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch b/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch deleted file mode 100644 index 0aa782b..0000000 --- a/kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch +++ /dev/null @@ -1,135 +0,0 @@ -From eb121ffa97c1c25d7853d51b4c8209c0bb521deb Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Fri, 7 Feb 2020 00:57:04 +0000 -Subject: [PATCH 1/7] spapr: Enable DD2.3 accelerated count cache flush in - pseries-5.0 machine - -RH-Author: David Gibson -Message-id: <20200207005704.194428-1-dgibson@redhat.com> -Patchwork-id: 93737 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCHv2] spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine -Bugzilla: 1796240 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: David Gibson - -For POWER9 DD2.2 cpus, the best current Spectre v2 indirect branch -mitigation is "count cache disabled", which is configured with: - -machine cap-ibs=fixed-ccd -However, this option isn't available on DD2.3 CPUs with KVM, because they -don't have the count cache disabled. - -For POWER9 DD2.3 cpus, it is "count cache flush with assist", configured -with: - -machine cap-ibs=workaround,cap-ccf-assist=on -However this option isn't available on DD2.2 CPUs with KVM, because they -don't have the special CCF assist instruction this relies on. - -On current machine types, we default to "count cache flush w/o assist", -that is: - -machine cap-ibs=workaround,cap-ccf-assist=off -This runs, with mitigation on both DD2.2 and DD2.3 host cpus, but has a -fairly significant performance impact. - -It turns out we can do better. The special instruction that CCF assist -uses to trigger a count cache flush is a no-op on earlier CPUs, rather than -trapping or causing other badness. It doesn't, of itself, implement the -mitigation, but *if* we have count-cache-disabled, then the count cache -flush is unnecessary, and so using the count cache flush mitigation is -harmless. - -Therefore for the new pseries-5.0 machine type, enable cap-ccf-assist by -default. Along with that, suppress throwing an error if cap-ccf-assist -is selected but KVM doesn't support it, as long as KVM *is* giving us -count-cache-disabled. To allow TCG to work out of the box, even though it -doesn't implement the ccf flush assist, downgrade the error in that case to -a warning. This matches several Spectre mitigations where we allow TCG -to operate for debugging, since we don't really make guarantees about TCG -security properties anyway. - -While we're there, make the TCG warning for this case match that for other -mitigations. - -Signed-off-by: David Gibson -Tested-by: Michael Ellerman -(cherry picked from commit 37965dfe4dffa3ac49438337417608e7f346b58a) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - hw/ppc/spapr.c - -Adjusted machine version compatibility code to the RHEL machine types -rather than the upstream machine types. - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1796240 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=26285002 -Branch: rhel-av-8.2.0 -Upstream: Merged for qemu-5.0 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 4 +++- - hw/ppc/spapr_caps.c | 21 +++++++++++++++++---- - 2 files changed, 20 insertions(+), 5 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index c12862d..a330f03 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4440,7 +4440,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ - smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; - smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; -- smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; -+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; - spapr_caps_add_properties(smc, &error_abort); - smc->irq = &spapr_irq_dual; - smc->dr_phb_enabled = true; -@@ -4904,6 +4904,8 @@ static void spapr_machine_rhel810_class_options(MachineClass *mc) - hw_compat_rhel_8_1_len); - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - -+ /* from pseries-4.2 */ -+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; - } - - DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); -diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 805f385..6e6fb28 100644 ---- a/hw/ppc/spapr_caps.c -+++ b/hw/ppc/spapr_caps.c -@@ -492,11 +492,24 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, - uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); - - if (tcg_enabled() && val) { -- /* TODO - for now only allow broken for TCG */ -- error_setg(errp, --"Requested count cache flush assist capability level not supported by tcg," -- " try appending -machine cap-ccf-assist=off"); -+ /* TCG doesn't implement anything here, but allow with a warning */ -+ warn_report("TCG doesn't support requested feature, cap-ccf-assist=on"); - } else if (kvm_enabled() && (val > kvm_val)) { -+ uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch(); -+ -+ if (kvm_ibs == SPAPR_CAP_FIXED_CCD) { -+ /* -+ * If we don't have CCF assist on the host, the assist -+ * instruction is a harmless no-op. It won't correctly -+ * implement the cache count flush *but* if we have -+ * count-cache-disabled in the host, that flush is -+ * unnnecessary. So, specifically allow this case. This -+ * allows us to have better performance on POWER9 DD2.3, -+ * while still working on POWER9 DD2.2 and POWER8 host -+ * cpus. -+ */ -+ return; -+ } - error_setg(errp, - "Requested count cache flush assist capability level not supported by kvm," - " try appending -machine cap-ccf-assist=off"); --- -1.8.3.1 - diff --git a/kvm-target-arm-arch_dump-Add-SVE-notes.patch b/kvm-target-arm-arch_dump-Add-SVE-notes.patch deleted file mode 100644 index febea10..0000000 --- a/kvm-target-arm-arch_dump-Add-SVE-notes.patch +++ /dev/null @@ -1,298 +0,0 @@ -From d8871ae2842531130c9b333e7c06a6a5d1561286 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 24 Jan 2020 09:14:34 +0100 -Subject: [PATCH 001/116] target/arm/arch_dump: Add SVE notes - -RH-Author: Andrew Jones -Message-id: <20200124091434.15021-2-drjones@redhat.com> -Patchwork-id: 93443 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/arm/arch_dump: Add SVE notes -Bugzilla: 1725084 -RH-Acked-by: Auger Eric -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1725084 - -Author: Andrew Jones -Date: Thu, 23 Jan 2020 15:22:40 +0000 - - target/arm/arch_dump: Add SVE notes - - When dumping a guest with dump-guest-memory also dump the SVE - registers if they are in use. - - Signed-off-by: Andrew Jones - Reviewed-by: Richard Henderson - Message-id: 20200120101832.18781-1-drjones@redhat.com - [PMM: fixed checkpatch nits] - Signed-off-by: Peter Maydell - -(cherry picked from commit 538baab245ca881e6a6ff720b5133f3ad1fcaafc) -Signed-off-by: Miroslav Rezanina ---- - include/elf.h | 1 + - target/arm/arch_dump.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++- - target/arm/cpu.h | 25 ++++++++++ - target/arm/kvm64.c | 24 ---------- - 4 files changed, 148 insertions(+), 26 deletions(-) - -diff --git a/include/elf.h b/include/elf.h -index 3501e0c..8fbfe60 100644 ---- a/include/elf.h -+++ b/include/elf.h -@@ -1650,6 +1650,7 @@ typedef struct elf64_shdr { - #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ - #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ - #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ -+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension regs */ - - /* - * Physical entry point into the kernel. -diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c -index 26a2c09..2345dec 100644 ---- a/target/arm/arch_dump.c -+++ b/target/arm/arch_dump.c -@@ -62,12 +62,23 @@ struct aarch64_user_vfp_state { - - QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528); - -+/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */ -+struct aarch64_user_sve_header { -+ uint32_t size; -+ uint32_t max_size; -+ uint16_t vl; -+ uint16_t max_vl; -+ uint16_t flags; -+ uint16_t reserved; -+} QEMU_PACKED; -+ - struct aarch64_note { - Elf64_Nhdr hdr; - char name[8]; /* align_up(sizeof("CORE"), 4) */ - union { - struct aarch64_elf_prstatus prstatus; - struct aarch64_user_vfp_state vfp; -+ struct aarch64_user_sve_header sve; - }; - } QEMU_PACKED; - -@@ -76,6 +87,8 @@ struct aarch64_note { - (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus)) - #define AARCH64_PRFPREG_NOTE_SIZE \ - (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state)) -+#define AARCH64_SVE_NOTE_SIZE(env) \ -+ (AARCH64_NOTE_HEADER_SIZE + sve_size(env)) - - static void aarch64_note_init(struct aarch64_note *note, DumpState *s, - const char *name, Elf64_Word namesz, -@@ -128,11 +141,102 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, - return 0; - } - -+#ifdef TARGET_AARCH64 -+static off_t sve_zreg_offset(uint32_t vq, int n) -+{ -+ off_t off = sizeof(struct aarch64_user_sve_header); -+ return ROUND_UP(off, 16) + vq * 16 * n; -+} -+ -+static off_t sve_preg_offset(uint32_t vq, int n) -+{ -+ return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; -+} -+ -+static off_t sve_fpsr_offset(uint32_t vq) -+{ -+ off_t off = sve_preg_offset(vq, 17); -+ return ROUND_UP(off, 16); -+} -+ -+static off_t sve_fpcr_offset(uint32_t vq) -+{ -+ return sve_fpsr_offset(vq) + sizeof(uint32_t); -+} -+ -+static uint32_t sve_current_vq(CPUARMState *env) -+{ -+ return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; -+} -+ -+static size_t sve_size_vq(uint32_t vq) -+{ -+ off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t); -+ return ROUND_UP(off, 16); -+} -+ -+static size_t sve_size(CPUARMState *env) -+{ -+ return sve_size_vq(sve_current_vq(env)); -+} -+ -+static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, -+ CPUARMState *env, int cpuid, -+ DumpState *s) -+{ -+ struct aarch64_note *note; -+ ARMCPU *cpu = env_archcpu(env); -+ uint32_t vq = sve_current_vq(env); -+ uint64_t tmp[ARM_MAX_VQ * 2], *r; -+ uint32_t fpr; -+ uint8_t *buf; -+ int ret, i; -+ -+ note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env)); -+ buf = (uint8_t *)¬e->sve; -+ -+ aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq)); -+ -+ note->sve.size = cpu_to_dump32(s, sve_size_vq(vq)); -+ note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq)); -+ note->sve.vl = cpu_to_dump16(s, vq * 16); -+ note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16); -+ note->sve.flags = cpu_to_dump16(s, 1); -+ -+ for (i = 0; i < 32; ++i) { -+ r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2); -+ memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16); -+ } -+ -+ for (i = 0; i < 17; ++i) { -+ r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0], -+ DIV_ROUND_UP(vq * 2, 8)); -+ memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8); -+ } -+ -+ fpr = cpu_to_dump32(s, vfp_get_fpsr(env)); -+ memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t)); -+ -+ fpr = cpu_to_dump32(s, vfp_get_fpcr(env)); -+ memcpy(&buf[sve_fpcr_offset(vq)], &fpr, sizeof(uint32_t)); -+ -+ ret = f(note, AARCH64_SVE_NOTE_SIZE(env), s); -+ g_free(note); -+ -+ if (ret < 0) { -+ return -1; -+ } -+ -+ return 0; -+} -+#endif -+ - int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) - { - struct aarch64_note note; -- CPUARMState *env = &ARM_CPU(cs)->env; -+ ARMCPU *cpu = ARM_CPU(cs); -+ CPUARMState *env = &cpu->env; - DumpState *s = opaque; - uint64_t pstate, sp; - int ret, i; -@@ -163,7 +267,18 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - return -1; - } - -- return aarch64_write_elf64_prfpreg(f, env, cpuid, s); -+ ret = aarch64_write_elf64_prfpreg(f, env, cpuid, s); -+ if (ret) { -+ return ret; -+ } -+ -+#ifdef TARGET_AARCH64 -+ if (cpu_isar_feature(aa64_sve, cpu)) { -+ ret = aarch64_write_elf64_sve(f, env, cpuid, s); -+ } -+#endif -+ -+ return ret; - } - - /* struct pt_regs from arch/arm/include/asm/ptrace.h */ -@@ -335,6 +450,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) - if (class == ELFCLASS64) { - note_size = AARCH64_PRSTATUS_NOTE_SIZE; - note_size += AARCH64_PRFPREG_NOTE_SIZE; -+#ifdef TARGET_AARCH64 -+ if (cpu_isar_feature(aa64_sve, cpu)) { -+ note_size += AARCH64_SVE_NOTE_SIZE(env); -+ } -+#endif - } else { - note_size = ARM_PRSTATUS_NOTE_SIZE; - if (arm_feature(env, ARM_FEATURE_VFP)) { -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 83a809d..82dd3cc 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq); - void aarch64_sve_change_el(CPUARMState *env, int old_el, - int new_el, bool el0_a64); - void aarch64_add_sve_properties(Object *obj); -+ -+/* -+ * SVE registers are encoded in KVM's memory in an endianness-invariant format. -+ * The byte at offset i from the start of the in-memory representation contains -+ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the -+ * lowest offsets are stored in the lowest memory addresses, then that nearly -+ * matches QEMU's representation, which is to use an array of host-endian -+ * uint64_t's, where the lower offsets are at the lower indices. To complete -+ * the translation we just need to byte swap the uint64_t's on big-endian hosts. -+ */ -+static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) -+{ -+#ifdef HOST_WORDS_BIGENDIAN -+ int i; -+ -+ for (i = 0; i < nr; ++i) { -+ dst[i] = bswap64(src[i]); -+ } -+ -+ return dst; -+#else -+ return src; -+#endif -+} -+ - #else - static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { } - static inline void aarch64_sve_change_el(CPUARMState *env, int o, -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 876184b..e2da756 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -877,30 +877,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs) - } - - /* -- * SVE registers are encoded in KVM's memory in an endianness-invariant format. -- * The byte at offset i from the start of the in-memory representation contains -- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the -- * lowest offsets are stored in the lowest memory addresses, then that nearly -- * matches QEMU's representation, which is to use an array of host-endian -- * uint64_t's, where the lower offsets are at the lower indices. To complete -- * the translation we just need to byte swap the uint64_t's on big-endian hosts. -- */ --static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr) --{ --#ifdef HOST_WORDS_BIGENDIAN -- int i; -- -- for (i = 0; i < nr; ++i) { -- dst[i] = bswap64(src[i]); -- } -- -- return dst; --#else -- return src; --#endif --} -- --/* - * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits - * and PREGS and the FFR have a slice size of 256 bits. However we simply hard - * code the slice index to zero for now as it's unlikely we'll need more than --- -1.8.3.1 - diff --git a/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch b/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch deleted file mode 100644 index 601b8c4..0000000 --- a/kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch +++ /dev/null @@ -1,281 +0,0 @@ -From 730f72105b478553c4f22555c29b0f64224ff914 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:14 +0000 -Subject: [PATCH 12/15] target/arm/cpu: Add the kvm-no-adjvtime CPU property -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-6-drjones@redhat.com> -Patchwork-id: 93623 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/5] target/arm/cpu: Add the kvm-no-adjvtime CPU property -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - target/arm/cpu: Add the kvm-no-adjvtime CPU property - - kvm-no-adjvtime is a KVM specific CPU property and a first of its - kind. To accommodate it we also add kvm_arm_add_vcpu_properties() - and a KVM specific CPU properties description to the CPU features - document. - - Signed-off-by: Andrew Jones - Message-id: 20200120101023.16030-7-drjones@redhat.com - Reviewed-by: Peter Maydell - Signed-off-by: Peter Maydell - -(cherry picked from commit dea101a1ae9968c9fec6ab0291489dad7c49f36f) -Signed-off-by: Danilo C. L. de Paula - -Conflicts: - Dropped the second hunk of the hw/arm/virt.c changes - as they would patch dead code. - -Signed-off-by: Danilo C. L. de Paula ---- - docs/arm-cpu-features.rst | 37 ++++++++++++++++++++++++++++++++++++- - hw/arm/virt.c | 5 +++++ - include/hw/arm/virt.h | 1 + - target/arm/cpu.c | 2 ++ - target/arm/cpu64.c | 1 + - target/arm/kvm.c | 28 ++++++++++++++++++++++++++++ - target/arm/kvm_arm.h | 11 +++++++++++ - target/arm/monitor.c | 1 + - tests/arm-cpu-features.c | 4 ++++ - 9 files changed, 89 insertions(+), 1 deletion(-) - -diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst -index 1b367e2..45d1eb6 100644 ---- a/docs/arm-cpu-features.rst -+++ b/docs/arm-cpu-features.rst -@@ -31,7 +31,9 @@ supporting the feature or only supporting the feature under certain - configurations. For example, the `aarch64` CPU feature, which, when - disabled, enables the optional AArch32 CPU feature, is only supported - when using the KVM accelerator and when running on a host CPU type that --supports the feature. -+supports the feature. While `aarch64` currently only works with KVM, -+it could work with TCG. CPU features that are specific to KVM are -+prefixed with "kvm-" and are described in "KVM VCPU Features". - - CPU Feature Probing - =================== -@@ -171,6 +173,39 @@ disabling many SVE vector lengths would be quite verbose, the `sve` CPU - properties have special semantics (see "SVE CPU Property Parsing - Semantics"). - -+KVM VCPU Features -+================= -+ -+KVM VCPU features are CPU features that are specific to KVM, such as -+paravirt features or features that enable CPU virtualization extensions. -+The features' CPU properties are only available when KVM is enabled and -+are named with the prefix "kvm-". KVM VCPU features may be probed, -+enabled, and disabled in the same way as other CPU features. Below is -+the list of KVM VCPU features and their descriptions. -+ -+ kvm-no-adjvtime By default kvm-no-adjvtime is disabled. This -+ means that by default the virtual time -+ adjustment is enabled (vtime is *not not* -+ adjusted). -+ -+ When virtual time adjustment is enabled each -+ time the VM transitions back to running state -+ the VCPU's virtual counter is updated to ensure -+ stopped time is not counted. This avoids time -+ jumps surprising guest OSes and applications, -+ as long as they use the virtual counter for -+ timekeeping. However it has the side effect of -+ the virtual and physical counters diverging. -+ All timekeeping based on the virtual counter -+ will appear to lag behind any timekeeping that -+ does not subtract VM stopped time. The guest -+ may resynchronize its virtual counter with -+ other time sources as needed. -+ -+ Enable kvm-no-adjvtime to disable virtual time -+ adjustment, also restoring the legacy (pre-5.0) -+ behavior. -+ - SVE CPU Properties - ================== - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e108391..d30d38c 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1707,6 +1707,11 @@ static void machvirt_init(MachineState *machine) - } - } - -+ if (vmc->kvm_no_adjvtime && -+ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { -+ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); -+ } -+ - if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { - object_property_set_bool(cpuobj, false, "pmu", NULL); - } -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 53fdf16..77828ce 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -109,6 +109,7 @@ typedef struct { - bool smbios_old_sys_ver; - bool no_highmem_ecam; - bool no_ged; /* Machines < 4.2 has no support for ACPI GED device */ -+ bool kvm_no_adjvtime; - } VirtMachineClass; - - typedef struct { -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 3788fc3..e46efe9 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2482,6 +2482,7 @@ static void arm_max_initfn(Object *obj) - - if (kvm_enabled()) { - kvm_arm_set_cpu_features_from_host(cpu); -+ kvm_arm_add_vcpu_properties(obj); - } else { - cortex_a15_initfn(obj); - -@@ -2673,6 +2674,7 @@ static void arm_host_initfn(Object *obj) - if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { - aarch64_add_sve_properties(obj); - } -+ kvm_arm_add_vcpu_properties(obj); - arm_cpu_post_init(obj); - } - -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index a39d6fc..3cd416d 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -605,6 +605,7 @@ static void aarch64_max_initfn(Object *obj) - - if (kvm_enabled()) { - kvm_arm_set_cpu_features_from_host(cpu); -+ kvm_arm_add_vcpu_properties(obj); - } else { - uint64_t t; - uint32_t u; -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 26d7f8b..4be9497 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -17,6 +17,8 @@ - #include "qemu/timer.h" - #include "qemu/error-report.h" - #include "qemu/main-loop.h" -+#include "qom/object.h" -+#include "qapi/error.h" - #include "sysemu/sysemu.h" - #include "sysemu/kvm.h" - #include "sysemu/kvm_int.h" -@@ -179,6 +181,32 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - env->features = arm_host_cpu_features.features; - } - -+static bool kvm_no_adjvtime_get(Object *obj, Error **errp) -+{ -+ return !ARM_CPU(obj)->kvm_adjvtime; -+} -+ -+static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) -+{ -+ ARM_CPU(obj)->kvm_adjvtime = !value; -+} -+ -+/* KVM VCPU properties should be prefixed with "kvm-". */ -+void kvm_arm_add_vcpu_properties(Object *obj) -+{ -+ if (!kvm_enabled()) { -+ return; -+ } -+ -+ ARM_CPU(obj)->kvm_adjvtime = true; -+ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, -+ kvm_no_adjvtime_set, &error_abort); -+ object_property_set_description(obj, "kvm-no-adjvtime", -+ "Set on to disable the adjustment of " -+ "the virtual counter. VM stopped time " -+ "will be counted.", &error_abort); -+} -+ - bool kvm_arm_pmu_supported(CPUState *cpu) - { - KVMState *s = KVM_STATE(current_machine->accelerator); -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 01a9a18..ae9e075 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -256,6 +256,15 @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map); - void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); - - /** -+ * kvm_arm_add_vcpu_properties: -+ * @obj: The CPU object to add the properties to -+ * -+ * Add all KVM specific CPU properties to the CPU object. These -+ * are the CPU properties with "kvm-" prefixed names. -+ */ -+void kvm_arm_add_vcpu_properties(Object *obj); -+ -+/** - * kvm_arm_aarch32_supported: - * @cs: CPUState - * -@@ -345,6 +354,8 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - cpu->host_cpu_probe_failed = true; - } - -+static inline void kvm_arm_add_vcpu_properties(Object *obj) {} -+ - static inline bool kvm_arm_aarch32_supported(CPUState *cs) - { - return false; -diff --git a/target/arm/monitor.c b/target/arm/monitor.c -index fa054f8..9725dff 100644 ---- a/target/arm/monitor.c -+++ b/target/arm/monitor.c -@@ -103,6 +103,7 @@ static const char *cpu_model_advertised_features[] = { - "sve128", "sve256", "sve384", "sve512", - "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", - "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", -+ "kvm-no-adjvtime", - NULL - }; - -diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c -index 89285ca..ba1a6fe 100644 ---- a/tests/arm-cpu-features.c -+++ b/tests/arm-cpu-features.c -@@ -428,6 +428,8 @@ static void test_query_cpu_model_expansion(const void *data) - assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); - -+ assert_has_not_feature(qts, "max", "kvm-no-adjvtime"); -+ - if (g_str_equal(qtest_get_arch(), "aarch64")) { - assert_has_feature_enabled(qts, "max", "aarch64"); - assert_has_feature_enabled(qts, "max", "sve"); -@@ -462,6 +464,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - return; - } - -+ assert_has_feature_disabled(qts, "host", "kvm-no-adjvtime"); -+ - if (g_str_equal(qtest_get_arch(), "aarch64")) { - bool kvm_supports_sve; - char max_name[8], name[8]; --- -1.8.3.1 - diff --git a/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch b/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch deleted file mode 100644 index 3396a32..0000000 --- a/kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch +++ /dev/null @@ -1,330 +0,0 @@ -From 5388ea3fc0737d1a659256ff3663057bef484c19 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:13 +0000 -Subject: [PATCH 11/15] target/arm/kvm: Implement virtual time adjustment -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-5-drjones@redhat.com> -Patchwork-id: 93622 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/5] target/arm/kvm: Implement virtual time adjustment -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - target/arm/kvm: Implement virtual time adjustment - - When a VM is stopped (such as when it's paused) guest virtual time - should stop counting. Otherwise, when the VM is resumed it will - experience time jumps and its kernel may report soft lockups. Not - counting virtual time while the VM is stopped has the side effect - of making the guest's time appear to lag when compared with real - time, and even with time derived from the physical counter. For - this reason, this change, which is enabled by default, comes with - a KVM CPU feature allowing it to be disabled, restoring legacy - behavior. - - This patch only provides the implementation of the virtual time - adjustment. A subsequent patch will provide the CPU property - allowing the change to be enabled and disabled. - - Reported-by: Bijan Mottahedeh - Signed-off-by: Andrew Jones - Message-id: 20200120101023.16030-6-drjones@redhat.com - Reviewed-by: Peter Maydell - Signed-off-by: Peter Maydell - -(cherry picked from commit e5ac4200b4cddf44df9adbef677af0d1f1c579c6) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/cpu.h | 7 ++++ - target/arm/kvm.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ - target/arm/kvm32.c | 3 ++ - target/arm/kvm64.c | 3 ++ - target/arm/kvm_arm.h | 38 ++++++++++++++++++++++ - target/arm/machine.c | 7 ++++ - 6 files changed, 150 insertions(+) - -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 82dd3cc..fbd8ea0 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -821,6 +821,13 @@ struct ARMCPU { - /* KVM init features for this CPU */ - uint32_t kvm_init_features[7]; - -+ /* KVM CPU state */ -+ -+ /* KVM virtual time adjustment */ -+ bool kvm_adjvtime; -+ bool kvm_vtime_dirty; -+ uint64_t kvm_vtime; -+ - /* Uniprocessor system with MP extensions */ - bool mp_is_up; - -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index 5b82cef..26d7f8b 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -359,6 +359,22 @@ static int compare_u64(const void *a, const void *b) - return 0; - } - -+/* -+ * cpreg_values are sorted in ascending order by KVM register ID -+ * (see kvm_arm_init_cpreg_list). This allows us to cheaply find -+ * the storage for a KVM register by ID with a binary search. -+ */ -+static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) -+{ -+ uint64_t *res; -+ -+ res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, -+ sizeof(uint64_t), compare_u64); -+ assert(res); -+ -+ return &cpu->cpreg_values[res - cpu->cpreg_indexes]; -+} -+ - /* Initialize the ARMCPU cpreg list according to the kernel's - * definition of what CPU registers it knows about (and throw away - * the previous TCG-created cpreg list). -@@ -512,6 +528,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) - return ok; - } - -+void kvm_arm_cpu_pre_save(ARMCPU *cpu) -+{ -+ /* KVM virtual time adjustment */ -+ if (cpu->kvm_vtime_dirty) { -+ *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; -+ } -+} -+ -+void kvm_arm_cpu_post_load(ARMCPU *cpu) -+{ -+ /* KVM virtual time adjustment */ -+ if (cpu->kvm_adjvtime) { -+ cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); -+ cpu->kvm_vtime_dirty = true; -+ } -+} -+ - void kvm_arm_reset_vcpu(ARMCPU *cpu) - { - int ret; -@@ -579,6 +612,50 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) - return 0; - } - -+void kvm_arm_get_virtual_time(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ struct kvm_one_reg reg = { -+ .id = KVM_REG_ARM_TIMER_CNT, -+ .addr = (uintptr_t)&cpu->kvm_vtime, -+ }; -+ int ret; -+ -+ if (cpu->kvm_vtime_dirty) { -+ return; -+ } -+ -+ ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); -+ if (ret) { -+ error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); -+ abort(); -+ } -+ -+ cpu->kvm_vtime_dirty = true; -+} -+ -+void kvm_arm_put_virtual_time(CPUState *cs) -+{ -+ ARMCPU *cpu = ARM_CPU(cs); -+ struct kvm_one_reg reg = { -+ .id = KVM_REG_ARM_TIMER_CNT, -+ .addr = (uintptr_t)&cpu->kvm_vtime, -+ }; -+ int ret; -+ -+ if (!cpu->kvm_vtime_dirty) { -+ return; -+ } -+ -+ ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); -+ if (ret) { -+ error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); -+ abort(); -+ } -+ -+ cpu->kvm_vtime_dirty = false; -+} -+ - int kvm_put_vcpu_events(ARMCPU *cpu) - { - CPUARMState *env = &cpu->env; -@@ -690,6 +767,21 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) - return MEMTXATTRS_UNSPECIFIED; - } - -+void kvm_arm_vm_state_change(void *opaque, int running, RunState state) -+{ -+ CPUState *cs = opaque; -+ ARMCPU *cpu = ARM_CPU(cs); -+ -+ if (running) { -+ if (cpu->kvm_adjvtime) { -+ kvm_arm_put_virtual_time(cs); -+ } -+ } else { -+ if (cpu->kvm_adjvtime) { -+ kvm_arm_get_virtual_time(cs); -+ } -+ } -+} - - int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) - { -diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c -index 32bf8d6..3a8b437 100644 ---- a/target/arm/kvm32.c -+++ b/target/arm/kvm32.c -@@ -16,6 +16,7 @@ - #include "qemu-common.h" - #include "cpu.h" - #include "qemu/timer.h" -+#include "sysemu/runstate.h" - #include "sysemu/kvm.h" - #include "kvm_arm.h" - #include "internals.h" -@@ -198,6 +199,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - return -EINVAL; - } - -+ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); -+ - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); - if (cpu->start_powered_off) { -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 666a81a..d368189 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -23,6 +23,7 @@ - #include "qemu/host-utils.h" - #include "qemu/main-loop.h" - #include "exec/gdbstub.h" -+#include "sysemu/runstate.h" - #include "sysemu/kvm.h" - #include "sysemu/kvm_int.h" - #include "kvm_arm.h" -@@ -735,6 +736,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - return -EINVAL; - } - -+ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); -+ - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); - if (cpu->start_powered_off) { -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index b48a9c9..01a9a18 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -128,6 +128,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level); - bool write_kvmstate_to_list(ARMCPU *cpu); - - /** -+ * kvm_arm_cpu_pre_save: -+ * @cpu: ARMCPU -+ * -+ * Called after write_kvmstate_to_list() from cpu_pre_save() to update -+ * the cpreg list with KVM CPU state. -+ */ -+void kvm_arm_cpu_pre_save(ARMCPU *cpu); -+ -+/** -+ * kvm_arm_cpu_post_load: -+ * @cpu: ARMCPU -+ * -+ * Called from cpu_post_load() to update KVM CPU state from the cpreg list. -+ */ -+void kvm_arm_cpu_post_load(ARMCPU *cpu); -+ -+/** - * kvm_arm_reset_vcpu: - * @cpu: ARMCPU - * -@@ -292,6 +309,24 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); - */ - int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); - -+/** -+ * kvm_arm_get_virtual_time: -+ * @cs: CPUState -+ * -+ * Gets the VCPU's virtual counter and stores it in the KVM CPU state. -+ */ -+void kvm_arm_get_virtual_time(CPUState *cs); -+ -+/** -+ * kvm_arm_put_virtual_time: -+ * @cs: CPUState -+ * -+ * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. -+ */ -+void kvm_arm_put_virtual_time(CPUState *cs); -+ -+void kvm_arm_vm_state_change(void *opaque, int running, RunState state); -+ - int kvm_arm_vgic_probe(void); - - void kvm_arm_pmu_set_irq(CPUState *cs, int irq); -@@ -339,6 +374,9 @@ static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {} - static inline void kvm_arm_pmu_init(CPUState *cs) {} - - static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) {} -+ -+static inline void kvm_arm_get_virtual_time(CPUState *cs) {} -+static inline void kvm_arm_put_virtual_time(CPUState *cs) {} - #endif - - static inline const char *gic_class_name(void) -diff --git a/target/arm/machine.c b/target/arm/machine.c -index eb28b23..241890a 100644 ---- a/target/arm/machine.c -+++ b/target/arm/machine.c -@@ -642,6 +642,12 @@ static int cpu_pre_save(void *opaque) - /* This should never fail */ - abort(); - } -+ -+ /* -+ * kvm_arm_cpu_pre_save() must be called after -+ * write_kvmstate_to_list() -+ */ -+ kvm_arm_cpu_pre_save(cpu); - } else { - if (!write_cpustate_to_list(cpu, false)) { - /* This should never fail. */ -@@ -744,6 +750,7 @@ static int cpu_post_load(void *opaque, int version_id) - * we're using it. - */ - write_list_to_cpustate(cpu); -+ kvm_arm_cpu_post_load(cpu); - } else { - if (!write_list_to_cpustate(cpu)) { - return -1; --- -1.8.3.1 - diff --git a/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch b/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch deleted file mode 100644 index 8cdc867..0000000 --- a/kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch +++ /dev/null @@ -1,197 +0,0 @@ -From 11cb9cb7b1b56d5c9723e9c50bc2903281893bcc Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:10 +0000 -Subject: [PATCH 08/15] target/arm/kvm: trivial: Clean up header documentation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-2-drjones@redhat.com> -Patchwork-id: 93625 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/5] target/arm/kvm: trivial: Clean up header documentation -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:05 +0000 - - target/arm/kvm: trivial: Clean up header documentation - - Signed-off-by: Andrew Jones - Message-id: 20200120101023.16030-2-drjones@redhat.com - Reviewed-by: Peter Maydell - Signed-off-by: Peter Maydell - -(cherry picked from commit d1ebbc9d16297b54b153ee33abe05eb4f1df0c66) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/kvm_arm.h | 46 +++++++++++++++++++++++++++------------------- - 1 file changed, 27 insertions(+), 19 deletions(-) - -diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h -index 8e14d40..b48a9c9 100644 ---- a/target/arm/kvm_arm.h -+++ b/target/arm/kvm_arm.h -@@ -28,9 +28,9 @@ - int kvm_arm_vcpu_init(CPUState *cs); - - /** -- * kvm_arm_vcpu_finalize -+ * kvm_arm_vcpu_finalize: - * @cs: CPUState -- * @feature: int -+ * @feature: feature to finalize - * - * Finalizes the configuration of the specified VCPU feature by - * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring -@@ -75,8 +75,8 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, - int kvm_arm_init_cpreg_list(ARMCPU *cpu); - - /** -- * kvm_arm_reg_syncs_via_cpreg_list -- * regidx: KVM register index -+ * kvm_arm_reg_syncs_via_cpreg_list: -+ * @regidx: KVM register index - * - * Return true if this KVM register should be synchronized via the - * cpreg list of arbitrary system registers, false if it is synchronized -@@ -85,8 +85,8 @@ int kvm_arm_init_cpreg_list(ARMCPU *cpu); - bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); - - /** -- * kvm_arm_cpreg_level -- * regidx: KVM register index -+ * kvm_arm_cpreg_level: -+ * @regidx: KVM register index - * - * Return the level of this coprocessor/system register. Return value is - * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. -@@ -148,6 +148,8 @@ void kvm_arm_init_serror_injection(CPUState *cs); - * @cpu: ARMCPU - * - * Get VCPU related state from kvm. -+ * -+ * Returns: 0 if success else < 0 error code - */ - int kvm_get_vcpu_events(ARMCPU *cpu); - -@@ -156,6 +158,8 @@ int kvm_get_vcpu_events(ARMCPU *cpu); - * @cpu: ARMCPU - * - * Put VCPU related state to kvm. -+ * -+ * Returns: 0 if success else < 0 error code - */ - int kvm_put_vcpu_events(ARMCPU *cpu); - -@@ -205,10 +209,12 @@ typedef struct ARMHostCPUFeatures { - - /** - * kvm_arm_get_host_cpu_features: -- * @ahcc: ARMHostCPUClass to fill in -+ * @ahcf: ARMHostCPUClass to fill in - * - * Probe the capabilities of the host kernel's preferred CPU and fill - * in the ARMHostCPUClass struct accordingly. -+ * -+ * Returns true on success and false otherwise. - */ - bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); - -@@ -242,7 +248,7 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); - bool kvm_arm_aarch32_supported(CPUState *cs); - - /** -- * bool kvm_arm_pmu_supported: -+ * kvm_arm_pmu_supported: - * @cs: CPUState - * - * Returns: true if the KVM VCPU can enable its PMU -@@ -251,7 +257,7 @@ bool kvm_arm_aarch32_supported(CPUState *cs); - bool kvm_arm_pmu_supported(CPUState *cs); - - /** -- * bool kvm_arm_sve_supported: -+ * kvm_arm_sve_supported: - * @cs: CPUState - * - * Returns true if the KVM VCPU can enable SVE and false otherwise. -@@ -259,26 +265,30 @@ bool kvm_arm_pmu_supported(CPUState *cs); - bool kvm_arm_sve_supported(CPUState *cs); - - /** -- * kvm_arm_get_max_vm_ipa_size - Returns the number of bits in the -- * IPA address space supported by KVM -- * -+ * kvm_arm_get_max_vm_ipa_size: - * @ms: Machine state handle -+ * -+ * Returns the number of bits in the IPA address space supported by KVM - */ - int kvm_arm_get_max_vm_ipa_size(MachineState *ms); - - /** -- * kvm_arm_sync_mpstate_to_kvm -+ * kvm_arm_sync_mpstate_to_kvm: - * @cpu: ARMCPU - * - * If supported set the KVM MP_STATE based on QEMU's model. -+ * -+ * Returns 0 on success and -1 on failure. - */ - int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); - - /** -- * kvm_arm_sync_mpstate_to_qemu -+ * kvm_arm_sync_mpstate_to_qemu: - * @cpu: ARMCPU - * - * If supported get the MP_STATE from KVM and store in QEMU's model. -+ * -+ * Returns 0 on success and aborts on failure. - */ - int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); - -@@ -292,7 +302,8 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); - - static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) - { -- /* This should never actually be called in the "not KVM" case, -+ /* -+ * This should never actually be called in the "not KVM" case, - * but set up the fields to indicate an error anyway. - */ - cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; -@@ -377,23 +388,20 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit); - * - * Return: TRUE if any hardware breakpoints in use. - */ -- - bool kvm_arm_hw_debug_active(CPUState *cs); - - /** - * kvm_arm_copy_hw_debug_data: -- * - * @ptr: kvm_guest_debug_arch structure - * - * Copy the architecture specific debug registers into the - * kvm_guest_debug ioctl structure. - */ - struct kvm_guest_debug_arch; -- - void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr); - - /** -- * its_class_name -+ * its_class_name: - * - * Return the ITS class name to use depending on whether KVM acceleration - * and KVM CAP_SIGNAL_MSI are supported --- -1.8.3.1 - diff --git a/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch b/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch deleted file mode 100644 index 36c0f1a..0000000 --- a/kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 2740a84fe798ade5c1ce725d65cdaffb255da47c Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:11 +0000 -Subject: [PATCH 09/15] target/arm/kvm64: kvm64 cpus have timer registers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-3-drjones@redhat.com> -Patchwork-id: 93621 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/5] target/arm/kvm64: kvm64 cpus have timer registers -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - target/arm/kvm64: kvm64 cpus have timer registers - - Add the missing GENERIC_TIMER feature to kvm64 cpus. - - We don't currently use these registers when KVM is enabled, but it's - probably best we add the feature flag for consistency and potential - future use. There's also precedent, as we add the PMU feature flag to - KVM enabled guests, even though we don't use those registers either. - - This change was originally posted as a hunk of a different, never - merged patch from Bijan Mottahedeh. - - Signed-off-by: Andrew Jones - Reviewed-by: Richard Henderson - Message-id: 20200120101023.16030-4-drjones@redhat.com - Signed-off-by: Peter Maydell - -(cherry picked from commit 65caa415487f4a6e265105446c6ef8f56bb0aa70) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/kvm64.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index e2da756..666a81a 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -605,6 +605,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) - set_feature(&features, ARM_FEATURE_NEON); - set_feature(&features, ARM_FEATURE_AARCH64); - set_feature(&features, ARM_FEATURE_PMU); -+ set_feature(&features, ARM_FEATURE_GENERIC_TIMER); - - ahcf->features = features; - --- -1.8.3.1 - diff --git a/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch b/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch deleted file mode 100644 index 55f328d..0000000 --- a/kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch +++ /dev/null @@ -1,81 +0,0 @@ -From c82cf5c08617c947b34eb490d1714729103e3379 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Mon, 10 Feb 2020 17:33:57 +0000 -Subject: [PATCH 17/18] target/arm/monitor: query-cpu-model-expansion crashed - qemu when using machine type none -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200210173358.16896-2-drjones@redhat.com> -Patchwork-id: 93773 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none -Bugzilla: 1801320 -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan -RH-Acked-by: Philippe Mathieu-Daudé - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1801320 - -Author: Liang Yan -Date: Fri, 07 Feb 2020 14:04:21 +0000 - - target/arm/monitor: query-cpu-model-expansion crashed qemu when using machine type none - - Commit e19afd566781 mentioned that target-arm only supports queryable - cpu models 'max', 'host', and the current type when KVM is in use. - The logic works well until using machine type none. - - For machine type none, cpu_type will be null if cpu option is not - set by command line, strlen(cpu_type) will terminate process. - So We add a check above it. - - This won't affect i386 and s390x since they do not use current_cpu. - - Signed-off-by: Liang Yan - Message-id: 20200203134251.12986-1-lyan@suse.com - Reviewed-by: Andrew Jones - Tested-by: Andrew Jones - Signed-off-by: Peter Maydell - -(cherry picked from commit 0999a4ba8718aa96105b978d3567fc7e90244c7e) -Signed-off-by: Danilo C. L. de Paula ---- - target/arm/monitor.c | 15 +++++++++------ - 1 file changed, 9 insertions(+), 6 deletions(-) - -diff --git a/target/arm/monitor.c b/target/arm/monitor.c -index 9725dff..c2dc790 100644 ---- a/target/arm/monitor.c -+++ b/target/arm/monitor.c -@@ -137,17 +137,20 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, - } - - if (kvm_enabled()) { -- const char *cpu_type = current_machine->cpu_type; -- int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); - bool supported = false; - - if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { - /* These are kvmarm's recommended cpu types */ - supported = true; -- } else if (strlen(model->name) == len && -- !strncmp(model->name, cpu_type, len)) { -- /* KVM is enabled and we're using this type, so it works. */ -- supported = true; -+ } else if (current_machine->cpu_type) { -+ const char *cpu_type = current_machine->cpu_type; -+ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); -+ -+ if (strlen(model->name) == len && -+ !strncmp(model->name, cpu_type, len)) { -+ /* KVM is enabled and we're using this type, so it works. */ -+ supported = true; -+ } - } - if (!supported) { - error_setg(errp, "We cannot guarantee the CPU type '%s' works " --- -1.8.3.1 - diff --git a/kvm-target-i386-add-a-ucode-rev-property.patch b/kvm-target-i386-add-a-ucode-rev-property.patch deleted file mode 100644 index 5c3c770..0000000 --- a/kvm-target-i386-add-a-ucode-rev-property.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 4009f0bcc8004ce481015d088fe335a16b8d7ce1 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:12 +0000 -Subject: [PATCH 2/9] target/i386: add a ucode-rev property - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-3-pbonzini@redhat.com> -Patchwork-id: 93909 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] target/i386: add a ucode-rev property -Bugzilla: 1791648 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Add the property and plumb it in TCG and HVF (the latter of which -tried to support returning a constant value but used the wrong MSR). - -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-3-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4e45aff398cd1542c2a384a2a3b8600f23337d86) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 10 ++++++++++ - target/i386/cpu.h | 3 +++ - target/i386/hvf/x86_emu.c | 4 +--- - target/i386/misc_helper.c | 4 ++++ - 4 files changed, 18 insertions(+), 3 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 863192c..e505d3e 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6325,6 +6325,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - } - } - -+ if (cpu->ucode_rev == 0) { -+ /* The default is the same as KVM's. */ -+ if (IS_AMD_CPU(env)) { -+ cpu->ucode_rev = 0x01000065; -+ } else { -+ cpu->ucode_rev = 0x100000000ULL; -+ } -+ } -+ - /* mwait extended info: needed for Core compatibility */ - /* We always wake on interrupt even if host does not have the capability */ - cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; -@@ -7008,6 +7017,7 @@ static Property x86_cpu_properties[] = { - DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), - DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), - DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), -+ DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), - DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), - DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), - DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index cde2a16..4441061 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -348,6 +348,7 @@ typedef enum X86Seg { - #define MSR_IA32_SPEC_CTRL 0x48 - #define MSR_VIRT_SSBD 0xc001011f - #define MSR_IA32_PRED_CMD 0x49 -+#define MSR_IA32_UCODE_REV 0x8b - #define MSR_IA32_CORE_CAPABILITY 0xcf - - #define MSR_IA32_ARCH_CAPABILITIES 0x10a -@@ -1621,6 +1622,8 @@ struct X86CPU { - CPUNegativeOffsetState neg; - CPUX86State env; - -+ uint64_t ucode_rev; -+ - uint32_t hyperv_spinlock_attempts; - char *hyperv_vendor_id; - bool hyperv_synic_kvm_only; -diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c -index 3df7672..92ab815 100644 ---- a/target/i386/hvf/x86_emu.c -+++ b/target/i386/hvf/x86_emu.c -@@ -664,8 +664,6 @@ static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) - RIP(env) += decode->len; - } - --#define MSR_IA32_UCODE_REV 0x00000017 -- - void simulate_rdmsr(struct CPUState *cpu) - { - X86CPU *x86_cpu = X86_CPU(cpu); -@@ -681,7 +679,7 @@ void simulate_rdmsr(struct CPUState *cpu) - val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); - break; - case MSR_IA32_UCODE_REV: -- val = (0x100000000ULL << 32) | 0x100000000ULL; -+ val = x86_cpu->ucode_rev; - break; - case MSR_EFER: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); -diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c -index 3eff688..aed16fe 100644 ---- a/target/i386/misc_helper.c -+++ b/target/i386/misc_helper.c -@@ -229,6 +229,7 @@ void helper_rdmsr(CPUX86State *env) - #else - void helper_wrmsr(CPUX86State *env) - { -+ X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); -@@ -371,6 +372,9 @@ void helper_wrmsr(CPUX86State *env) - env->msr_bndcfgs = val; - cpu_sync_bndcs_hflags(env); - break; -+ case MSR_IA32_UCODE_REV: -+ val = x86_cpu->ucode_rev; -+ break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + --- -1.8.3.1 - diff --git a/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch b/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch deleted file mode 100644 index a80c9d3..0000000 --- a/kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 27d7b085f2f568050d638b694ed2f51495db718c Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:15 +0000 -Subject: [PATCH 5/9] target/i386: check for availability of MSR_IA32_UCODE_REV - as an emulated MSR -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-6-pbonzini@redhat.com> -Patchwork-id: 93898 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/6] target/i386: check for availability of MSR_IA32_UCODE_REV as an emulated MSR -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Even though MSR_IA32_UCODE_REV has been available long before Linux 5.6, -which added it to the emulated MSR list, a bug caused the microcode -version to revert to 0x100000000 on INIT. As a result, processors other -than the bootstrap processor would not see the host microcode revision; -some Windows version complain loudly about this and crash with a -fairly explicit MICROCODE REVISION MISMATCH error. - -[If running 5.6 prereleases, the kernel fix "KVM: x86: do not reset - microcode version on INIT or RESET" should also be applied.] - -Reported-by: Alex Williamson -Message-id: <20200211175516.10716-1-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 6702514814c7e7b4cbf179624539b5f38c72740b) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 6c61aef..99840ca 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -105,6 +105,7 @@ static bool has_msr_smi_count; - static bool has_msr_arch_capabs; - static bool has_msr_core_capabs; - static bool has_msr_vmx_vmfunc; -+static bool has_msr_ucode_rev; - - static uint32_t has_architectural_pmu_version; - static uint32_t num_architectural_pmu_gp_counters; -@@ -2056,6 +2057,9 @@ static int kvm_get_supported_msrs(KVMState *s) - case MSR_IA32_VMX_VMFUNC: - has_msr_vmx_vmfunc = true; - break; -+ case MSR_IA32_UCODE_REV: -+ has_msr_ucode_rev = true; -+ break; - } - } - } -@@ -2696,8 +2700,7 @@ static void kvm_init_msrs(X86CPU *cpu) - env->features[FEAT_CORE_CAPABILITY]); - } - -- if (kvm_arch_get_supported_msr_feature(kvm_state, -- MSR_IA32_UCODE_REV)) { -+ if (has_msr_ucode_rev) { - kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); - } - --- -1.8.3.1 - diff --git a/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch b/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch deleted file mode 100644 index 4c2362d..0000000 --- a/kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 77cdcccc49ba988e3b5bcb66decdee2e99fdcd72 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Tue, 14 Apr 2020 15:00:36 +0100 -Subject: [PATCH] target/i386: do not set unsupported VMX secondary execution - controls - -RH-Author: Vitaly Kuznetsov -Message-id: <20200414150036.625732-2-vkuznets@redhat.com> -Patchwork-id: 94674 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] target/i386: do not set unsupported VMX secondary execution controls -Bugzilla: 1822682 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Paolo Bonzini - -Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for -secondary execution controls") added a workaround for KVM pre-dating -commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm -KVM_GET_MSRS") which wasn't setting certain available controls. The -workaround uses generic CPUID feature bits to set missing VMX controls. - -It was found that in some cases it is possible to observe hosts which -have certain CPUID features but lack the corresponding VMX control. - -In particular, it was reported that Azure VMs have RDSEED but lack -VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature -bit result in QEMU abort. - -Resolve the issue but not applying the workaround when we don't have -to. As there is no good way to find out if KVM has the fix itself, use -95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead -as these [are supposed to] come together. - -Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls") -Suggested-by: Paolo Bonzini -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20200331162752.1209928-1-vkuznets@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4a910e1f6ab4155ec8b24c49b2585cc486916985) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 41 ++++++++++++++++++++++++++--------------- - 1 file changed, 26 insertions(+), 15 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 99840ca..fcc8f7d 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -106,6 +106,7 @@ static bool has_msr_arch_capabs; - static bool has_msr_core_capabs; - static bool has_msr_vmx_vmfunc; - static bool has_msr_ucode_rev; -+static bool has_msr_vmx_procbased_ctls2; - - static uint32_t has_architectural_pmu_version; - static uint32_t num_architectural_pmu_gp_counters; -@@ -490,21 +491,28 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) - value = msr_data.entries[0].data; - switch (index) { - case MSR_IA32_VMX_PROCBASED_CTLS2: -- /* KVM forgot to add these bits for some time, do this ourselves. */ -- if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; -- } -- if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { -- value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; -+ if (!has_msr_vmx_procbased_ctls2) { -+ /* KVM forgot to add these bits for some time, do this ourselves. */ -+ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & -+ CPUID_XSAVE_XSAVES) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & -+ CPUID_EXT_RDRAND) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & -+ CPUID_7_0_EBX_INVPCID) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & -+ CPUID_7_0_EBX_RDSEED) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; -+ } -+ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & -+ CPUID_EXT2_RDTSCP) { -+ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; -+ } - } - /* fall through */ - case MSR_IA32_VMX_TRUE_PINBASED_CTLS: -@@ -2060,6 +2068,9 @@ static int kvm_get_supported_msrs(KVMState *s) - case MSR_IA32_UCODE_REV: - has_msr_ucode_rev = true; - break; -+ case MSR_IA32_VMX_PROCBASED_CTLS2: -+ has_msr_vmx_procbased_ctls2 = true; -+ break; - } - } - } --- -1.8.3.1 - diff --git a/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch b/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch deleted file mode 100644 index 47438a3..0000000 --- a/kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 7b71a7011437ebfa3bc7df9297e892b82293ec98 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:16 +0000 -Subject: [PATCH 6/9] target/i386: enable monitor and ucode revision with -cpu - max -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-7-pbonzini@redhat.com> -Patchwork-id: 93910 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] target/i386: enable monitor and ucode revision with -cpu max -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -These two features were incorrectly tied to host_cpuid_required rather than -cpu->max_features. As a result, -cpu max was not enabling either MONITOR -features or ucode revision. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit be02cda3afde60d219786e23c3f8edb53aec8e17) - -[RHEL7: context, upstream uses g_autofree] - -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 5ac843d..1685a8c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6317,7 +6317,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - g_free(name); - goto out; - } -+ } - -+ if (cpu->max_features && accel_uses_host_cpuid()) { - if (enable_cpu_pm) { - host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, - &cpu->mwait.ecx, &cpu->mwait.edx); --- -1.8.3.1 - diff --git a/kvm-target-i386-fix-TCG-UCODE_REV-access.patch b/kvm-target-i386-fix-TCG-UCODE_REV-access.patch deleted file mode 100644 index c7ced8a..0000000 --- a/kvm-target-i386-fix-TCG-UCODE_REV-access.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 3d16f05359e6277da1f970f71aa9f76337d655dc Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:14 +0000 -Subject: [PATCH 4/9] target/i386: fix TCG UCODE_REV access -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-5-pbonzini@redhat.com> -Patchwork-id: 93904 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/6] target/i386: fix TCG UCODE_REV access -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -This was a very interesting semantic conflict that caused git to move -the MSR_IA32_UCODE_REV read to helper_wrmsr. Not a big deal, but -still should be fixed... - -Fixes: 4e45aff398 ("target/i386: add a ucode-rev property", 2020-01-24) -Message-id: <20200206171022.9289-1-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9028c75c9d08be303ccc425bfe3d3b23d8f4cac7) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/misc_helper.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c -index aed16fe..7d61221 100644 ---- a/target/i386/misc_helper.c -+++ b/target/i386/misc_helper.c -@@ -229,7 +229,6 @@ void helper_rdmsr(CPUX86State *env) - #else - void helper_wrmsr(CPUX86State *env) - { -- X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); -@@ -372,9 +371,6 @@ void helper_wrmsr(CPUX86State *env) - env->msr_bndcfgs = val; - cpu_sync_bndcs_hflags(env); - break; -- case MSR_IA32_UCODE_REV: -- val = x86_cpu->ucode_rev; -- break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + -@@ -393,6 +389,7 @@ void helper_wrmsr(CPUX86State *env) - - void helper_rdmsr(CPUX86State *env) - { -+ X86CPU *x86_cpu = env_archcpu(env); - uint64_t val; - - cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC()); -@@ -526,6 +523,9 @@ void helper_rdmsr(CPUX86State *env) - case MSR_IA32_BNDCFGS: - val = env->msr_bndcfgs; - break; -+ case MSR_IA32_UCODE_REV: -+ val = x86_cpu->ucode_rev; -+ break; - default: - if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL - && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + --- -1.8.3.1 - diff --git a/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch b/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch deleted file mode 100644 index 5118aed..0000000 --- a/kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch +++ /dev/null @@ -1,178 +0,0 @@ -From eb0fc0ae2750a0462698d6d21ebb56a4249539f9 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:11 +0000 -Subject: [PATCH 1/9] target/i386: kvm: initialize feature MSRs very early -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-2-pbonzini@redhat.com> -Patchwork-id: 93899 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] target/i386: kvm: initialize feature MSRs very early -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -Some read-only MSRs affect the behavior of ioctls such as -KVM_SET_NESTED_STATE. We can initialize them once and for all -right after the CPU is realized, since they will never be modified -by the guest. - -Reported-by: Qingua Cheng -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-2-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 420ae1fc51c99abfd03b1c590f55617edd2a2bed) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/kvm.c | 81 ++++++++++++++++++++++++++++++-------------------- - target/i386/kvm_i386.h | 1 + - 2 files changed, 49 insertions(+), 33 deletions(-) - -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 86d9a1f..f41605b 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -67,6 +67,8 @@ - * 255 kvm_msr_entry structs */ - #define MSR_BUF_SIZE 4096 - -+static void kvm_init_msrs(X86CPU *cpu); -+ - const KVMCapabilityInfo kvm_arch_required_capabilities[] = { - KVM_CAP_INFO(SET_TSS_ADDR), - KVM_CAP_INFO(EXT_CPUID), -@@ -1842,6 +1844,8 @@ int kvm_arch_init_vcpu(CPUState *cs) - has_msr_tsc_aux = false; - } - -+ kvm_init_msrs(cpu); -+ - r = hyperv_init_vcpu(cpu); - if (r) { - goto fail; -@@ -2660,11 +2664,53 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) - VMCS12_MAX_FIELD_INDEX << 1); - } - -+static int kvm_buf_set_msrs(X86CPU *cpu) -+{ -+ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ if (ret < cpu->kvm_msr_buf->nmsrs) { -+ struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; -+ error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, -+ (uint32_t)e->index, (uint64_t)e->data); -+ } -+ -+ assert(ret == cpu->kvm_msr_buf->nmsrs); -+ return 0; -+} -+ -+static void kvm_init_msrs(X86CPU *cpu) -+{ -+ CPUX86State *env = &cpu->env; -+ -+ kvm_msr_buf_reset(cpu); -+ if (has_msr_arch_capabs) { -+ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, -+ env->features[FEAT_ARCH_CAPABILITIES]); -+ } -+ -+ if (has_msr_core_capabs) { -+ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, -+ env->features[FEAT_CORE_CAPABILITY]); -+ } -+ -+ /* -+ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but -+ * all kernels with MSR features should have them. -+ */ -+ if (kvm_feature_msrs && cpu_has_vmx(env)) { -+ kvm_msr_entry_add_vmx(cpu, env->features); -+ } -+ -+ assert(kvm_buf_set_msrs(cpu) == 0); -+} -+ - static int kvm_put_msrs(X86CPU *cpu, int level) - { - CPUX86State *env = &cpu->env; - int i; -- int ret; - - kvm_msr_buf_reset(cpu); - -@@ -2722,17 +2768,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - } - #endif - -- /* If host supports feature MSR, write down. */ -- if (has_msr_arch_capabs) { -- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, -- env->features[FEAT_ARCH_CAPABILITIES]); -- } -- -- if (has_msr_core_capabs) { -- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, -- env->features[FEAT_CORE_CAPABILITY]); -- } -- - /* - * The following MSRs have side effects on the guest or are too heavy - * for normal writeback. Limit them to reset or full state updates. -@@ -2910,14 +2945,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - - /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see - * kvm_put_msr_feature_control. */ -- -- /* -- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but -- * all kernels with MSR features should have them. -- */ -- if (kvm_feature_msrs && cpu_has_vmx(env)) { -- kvm_msr_entry_add_vmx(cpu, env->features); -- } - } - - if (env->mcg_cap) { -@@ -2933,19 +2960,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) - } - } - -- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); -- if (ret < 0) { -- return ret; -- } -- -- if (ret < cpu->kvm_msr_buf->nmsrs) { -- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; -- error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, -- (uint32_t)e->index, (uint64_t)e->data); -- } -- -- assert(ret == cpu->kvm_msr_buf->nmsrs); -- return 0; -+ return kvm_buf_set_msrs(cpu); - } - - -diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h -index 06fe06b..d98c6f6 100644 ---- a/target/i386/kvm_i386.h -+++ b/target/i386/kvm_i386.h -@@ -66,4 +66,5 @@ bool kvm_enable_x2apic(void); - bool kvm_has_x2apic_api(void); - - bool kvm_hv_vpindex_settable(void); -+ - #endif --- -1.8.3.1 - diff --git a/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch b/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch deleted file mode 100644 index 99b18fc..0000000 --- a/kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 8f39b0c9523630efeb451e2298cf64b88cd2ac81 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 17 Feb 2020 16:23:13 +0000 -Subject: [PATCH 3/9] target/i386: kvm: initialize microcode revision from KVM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20200217162316.2464-4-pbonzini@redhat.com> -Patchwork-id: 93897 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/6] target/i386: kvm: initialize microcode revision from KVM -Bugzilla: 1791648 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Maxim Levitsky -RH-Acked-by: Dr. David Alan Gilbert - -KVM can return the host microcode revision as a feature MSR. -Use it as the default value for -cpu host. - -Signed-off-by: Paolo Bonzini -Message-Id: <1579544504-3616-4-git-send-email-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 32c87d70ff55b96741f08c35108935cac6f40fe4) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 4 ++++ - target/i386/kvm.c | 5 +++++ - 2 files changed, 9 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index e505d3e..5ac843d 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6323,6 +6323,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - &cpu->mwait.ecx, &cpu->mwait.edx); - env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; - } -+ if (kvm_enabled() && cpu->ucode_rev == 0) { -+ cpu->ucode_rev = kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_UCODE_REV); -+ } - } - - if (cpu->ucode_rev == 0) { -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index f41605b..6c61aef 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -2696,6 +2696,11 @@ static void kvm_init_msrs(X86CPU *cpu) - env->features[FEAT_CORE_CAPABILITY]); - } - -+ if (kvm_arch_get_supported_msr_feature(kvm_state, -+ MSR_IA32_UCODE_REV)) { -+ kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); -+ } -+ - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. --- -1.8.3.1 - diff --git a/kvm-tcp_emu-Fix-oob-access.patch b/kvm-tcp_emu-Fix-oob-access.patch deleted file mode 100644 index e532877..0000000 --- a/kvm-tcp_emu-Fix-oob-access.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 5c2c5496083fa549e1dff903413bb6136fc19d8d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Fri, 17 Jan 2020 12:07:56 +0100 -Subject: [PATCH 1/4] tcp_emu: Fix oob access -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200117120758.1076549-2-marcandre.lureau@redhat.com> -Patchwork-id: 93399 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm + RHEL-AV-8.2.0 qemu-kvm PATCH 1/3] tcp_emu: Fix oob access -Bugzilla: 1791568 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -From: Samuel Thibault - -The main loop only checks for one available byte, while we sometimes -need two bytes. - -[ MA - minor conflict, CHANGELOG.md absent ] -(cherry picked from libslirp commit 2655fffed7a9e765bcb4701dd876e9dab975f289) -Signed-off-by: Marc-André Lureau - -Signed-off-by: Miroslav Rezanina ---- - slirp/src/tcp_subr.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index d6dd133..cbecd64 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -886,6 +886,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - break; - - case 5: -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ - /* - * The difference between versions 1.0 and - * 2.0 is here. For future versions of -@@ -901,6 +904,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - /* This is the field containing the port - * number that RA-player is listening to. - */ -+ -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ - lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; - if (lport < 6970) - lport += 256; /* don't know why */ --- -1.8.3.1 - diff --git a/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch b/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch deleted file mode 100644 index 846da73..0000000 --- a/kvm-tcp_emu-fix-unsafe-snprintf-usages.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 9a7810c257711ce02627916d886fc1029f7a8190 Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Thu, 13 Feb 2020 15:50:49 +0000 -Subject: [PATCH 3/7] tcp_emu: fix unsafe snprintf() usages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200213155049.3936-3-jmaloy@redhat.com> -Patchwork-id: 93826 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] tcp_emu: fix unsafe snprintf() usages -Bugzilla: 1798994 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Various calls to snprintf() assume that snprintf() returns "only" the -number of bytes written (excluding terminating NUL). - -https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 - -"Upon successful completion, the snprintf() function shall return the -number of bytes that would be written to s had n been sufficiently -large excluding the terminating null byte." - -Before patch ce131029, if there isn't enough room in "m_data" for the -"DCC ..." message, we overflow "m_data". - -After the patch, if there isn't enough room for the same, we don't -overflow "m_data", but we set "m_len" out-of-bounds. The next time an -access is bounded by "m_len", we'll have a buffer overflow then. - -Use slirp_fmt*() to fix potential OOB memory access. - -Reported-by: Laszlo Ersek -Signed-off-by: Marc-André Lureau -Reviewed-by: Samuel Thibault -Message-Id: <20200127092414.169796-7-marcandre.lureau@redhat.com> -(cherry picked from libslirp commit 68ccb8021a838066f0951d4b2817eb6b6f10a843) -Signed-off-by: Jon Maloy - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/tcp_subr.c | 44 +++++++++++++++++++++----------------------- - 1 file changed, 21 insertions(+), 23 deletions(-) - -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index 954d1a6..26d4ead 100644 ---- a/slirp/src/tcp_subr.c -+++ b/slirp/src/tcp_subr.c -@@ -655,8 +655,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) - NTOHS(n1); - NTOHS(n2); - m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); -- m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); -- assert(m->m_len < M_ROOM(m)); -+ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); - } else { - *eol = '\r'; - } -@@ -696,9 +695,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, -- n5, n6, x == 7 ? buff : ""); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "ORT %d,%d,%d,%d,%d,%d\r\n%s", -+ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - return 1; - } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { - /* -@@ -731,10 +730,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", -- n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); -- -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", -+ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); - return 1; - } - -@@ -757,8 +755,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) - if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) -- m->m_len = snprintf(m->m_data, M_ROOM(m), -- "%d", ntohs(so->so_fport)) + 1; -+ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), -+ "%d", ntohs(so->so_fport)); - return 1; - - case EMU_IRC: -@@ -777,10 +775,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "DCC CHAT chat %lu %u%c\n", -- (unsigned long)ntohl(so->so_faddr.s_addr), -- ntohs(so->so_fport), 1); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC CHAT chat %lu %u%c\n", -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), 1); - } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, - &n1) == 4) { - if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -@@ -788,10 +786,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "DCC SEND %s %lu %u %u%c\n", buff, -- (unsigned long)ntohl(so->so_faddr.s_addr), -- ntohs(so->so_fport), n1, 1); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC SEND %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); - } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, - &n1) == 4) { - if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -@@ -799,10 +797,10 @@ int tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -- "DCC MOVE %s %lu %u %u%c\n", buff, -- (unsigned long)ntohl(so->so_faddr.s_addr), -- ntohs(so->so_fport), n1, 1); -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC MOVE %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); - } - return 1; - --- -1.8.3.1 - diff --git a/kvm-tests-arm-cpu-features-Check-feature-default-values.patch b/kvm-tests-arm-cpu-features-Check-feature-default-values.patch deleted file mode 100644 index e8a48bf..0000000 --- a/kvm-tests-arm-cpu-features-Check-feature-default-values.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 323889aa2182bf39df10f1caf43f22daea2d7d37 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Fri, 31 Jan 2020 14:23:12 +0000 -Subject: [PATCH 10/15] tests/arm-cpu-features: Check feature default values -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -Message-id: <20200131142314.13175-4-drjones@redhat.com> -Patchwork-id: 93626 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/5] tests/arm-cpu-features: Check feature default values -Bugzilla: 1647366 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Auger Eric -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1647366 - -Author: Andrew Jones -Date: Thu, 30 Jan 2020 16:02:06 +0000 - - tests/arm-cpu-features: Check feature default values - - If we know what the default value should be then we can test for - that as well as the feature existence. - - Signed-off-by: Andrew Jones - Reviewed-by: Richard Henderson - Message-id: 20200120101023.16030-5-drjones@redhat.com - Signed-off-by: Peter Maydell - -(cherry picked from commit 789a35efb583464f9fcd5d871a7fd6164318bb91) -Signed-off-by: Danilo C. L. de Paula ---- - tests/arm-cpu-features.c | 37 ++++++++++++++++++++++++++++--------- - 1 file changed, 28 insertions(+), 9 deletions(-) - -diff --git a/tests/arm-cpu-features.c b/tests/arm-cpu-features.c -index 6e99aa9..89285ca 100644 ---- a/tests/arm-cpu-features.c -+++ b/tests/arm-cpu-features.c -@@ -159,6 +159,25 @@ static bool resp_get_feature(QDict *resp, const char *feature) - qobject_unref(_resp); \ - }) - -+#define assert_feature(qts, cpu_type, feature, expected_value) \ -+({ \ -+ QDict *_resp, *_props; \ -+ \ -+ _resp = do_query_no_props(qts, cpu_type); \ -+ g_assert(_resp); \ -+ g_assert(resp_has_props(_resp)); \ -+ _props = resp_get_props(_resp); \ -+ g_assert(qdict_get(_props, feature)); \ -+ g_assert(qdict_get_bool(_props, feature) == (expected_value)); \ -+ qobject_unref(_resp); \ -+}) -+ -+#define assert_has_feature_enabled(qts, cpu_type, feature) \ -+ assert_feature(qts, cpu_type, feature, true) -+ -+#define assert_has_feature_disabled(qts, cpu_type, feature) \ -+ assert_feature(qts, cpu_type, feature, false) -+ - static void assert_type_full(QTestState *qts) - { - const char *error; -@@ -405,16 +424,16 @@ static void test_query_cpu_model_expansion(const void *data) - assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); - - /* Test expected feature presence/absence for some cpu types */ -- assert_has_feature(qts, "max", "pmu"); -- assert_has_feature(qts, "cortex-a15", "pmu"); -+ assert_has_feature_enabled(qts, "max", "pmu"); -+ assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); - - if (g_str_equal(qtest_get_arch(), "aarch64")) { -- assert_has_feature(qts, "max", "aarch64"); -- assert_has_feature(qts, "max", "sve"); -- assert_has_feature(qts, "max", "sve128"); -- assert_has_feature(qts, "cortex-a57", "pmu"); -- assert_has_feature(qts, "cortex-a57", "aarch64"); -+ assert_has_feature_enabled(qts, "max", "aarch64"); -+ assert_has_feature_enabled(qts, "max", "sve"); -+ assert_has_feature_enabled(qts, "max", "sve128"); -+ assert_has_feature_enabled(qts, "cortex-a57", "pmu"); -+ assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); - - sve_tests_default(qts, "max"); - -@@ -451,8 +470,8 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - QDict *resp; - char *error; - -- assert_has_feature(qts, "host", "aarch64"); -- assert_has_feature(qts, "host", "pmu"); -+ assert_has_feature_enabled(qts, "host", "aarch64"); -+ assert_has_feature_enabled(qts, "host", "pmu"); - - assert_error(qts, "cortex-a15", - "We cannot guarantee the CPU type 'cortex-a15' works " --- -1.8.3.1 - diff --git a/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch b/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch deleted file mode 100644 index 3efef47..0000000 --- a/kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch +++ /dev/null @@ -1,55 +0,0 @@ -From e483eea891139ee38138381ba6715b3a2be050cc Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:12 +0000 -Subject: [PATCH 16/18] tools/virtiofsd/fuse_lowlevel: Fix - fuse_out_header::error value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-6-dgilbert@redhat.com> -Patchwork-id: 94128 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 5/7] tools/virtiofsd/fuse_lowlevel: Fix fuse_out_header::error value -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: Philippe Mathieu-Daudé - -Fix warning reported by Clang static code analyzer: - - CC tools/virtiofsd/fuse_lowlevel.o - tools/virtiofsd/fuse_lowlevel.c:195:9: warning: Value stored to 'error' is never read - error = -ERANGE; - ^ ~~~~~~~ - -Fixes: 3db2876 -Reported-by: Clang Static Analyzer -Reviewed-by: Ján Tomko -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 09c086b2a144324199f99a7d4de78c3276a486c1) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_lowlevel.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 704c036..2dd36ec 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -192,7 +192,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - - if (error <= -1000 || error > 0) { - fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -- error = -ERANGE; -+ out.error = -ERANGE; - } - - iov[0].iov_base = &out; --- -1.8.3.1 - diff --git a/kvm-tpm-ppi-page-align-PPI-RAM.patch b/kvm-tpm-ppi-page-align-PPI-RAM.patch deleted file mode 100644 index 32c971d..0000000 --- a/kvm-tpm-ppi-page-align-PPI-RAM.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 7cb1c5e1416de9a09180f0930d2a216c77e8cdbd Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Thu, 30 Jan 2020 16:01:10 +0000 -Subject: [PATCH 07/15] tpm-ppi: page-align PPI RAM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200130160110.126086-1-marcandre.lureau@redhat.com> -Patchwork-id: 93600 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] tpm-ppi: page-align PPI RAM -Bugzilla: 1787444 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -post-copy migration fails on destination with error such as: -2019-12-26T10:22:44.714644Z qemu-kvm: ram_block_discard_range: -Unaligned start address: 0x559d2afae9a0 - -Use qemu_memalign() to constrain the PPI RAM memory alignment. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Marc-André Lureau -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Stefan Berger -Signed-off-by: Stefan Berger -Message-id: 20200103074000.1006389-3-marcandre.lureau@redhat.com - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1787444 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=26122940 - -(cherry picked from commit 71e415c8a75c130875f14d6b2136825789feb297) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - hw/tpm/tpm_ppi.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c -index ff31459..6d9c1a3 100644 ---- a/hw/tpm/tpm_ppi.c -+++ b/hw/tpm/tpm_ppi.c -@@ -43,7 +43,8 @@ void tpm_ppi_reset(TPMPPI *tpmppi) - void tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m, - hwaddr addr, Object *obj) - { -- tpmppi->buf = g_malloc0(HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); -+ tpmppi->buf = qemu_memalign(qemu_real_host_page_size, -+ HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); - memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", - TPM_PPI_ADDR_SIZE, tpmppi->buf); - vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); --- -1.8.3.1 - diff --git a/kvm-trace-update-qemu-trace-stap-to-Python-3.patch b/kvm-trace-update-qemu-trace-stap-to-Python-3.patch deleted file mode 100644 index c49aecd..0000000 --- a/kvm-trace-update-qemu-trace-stap-to-Python-3.patch +++ /dev/null @@ -1,82 +0,0 @@ -From e7cdcd1e39c4c030a32c9e8ef79316eae8555bc8 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 16 Jan 2020 17:52:48 +0000 -Subject: [PATCH 04/15] trace: update qemu-trace-stap to Python 3 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -Message-id: <20200116175248.286556-2-stefanha@redhat.com> -Patchwork-id: 93365 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] trace: update qemu-trace-stap to Python 3 -Bugzilla: 1787395 -RH-Acked-by: John Snow -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Dr. David Alan Gilbert - -qemu-trace-stap does not support Python 3 yet: - - $ scripts/qemu-trace-stap list path/to/qemu-system-x86_64 - Traceback (most recent call last): - File "scripts/qemu-trace-stap", line 175, in - main() - File "scripts/qemu-trace-stap", line 171, in main - args.func(args) - File "scripts/qemu-trace-stap", line 118, in cmd_list - print_probes(args.verbose, "*") - File "scripts/qemu-trace-stap", line 114, in print_probes - if line.startswith(prefix): - TypeError: startswith first arg must be bytes or a tuple of bytes, not str - -Now that QEMU requires Python 3.5 or later we can switch to pure Python -3. Use Popen()'s universal_newlines=True argument to treat stdout as -text instead of binary. - -Fixes: 62dd1048c0bd ("trace: add ability to do simple printf logging via systemtap") -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1787395 -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Message-id: 20200107112438.383958-1-stefanha@redhat.com -Message-Id: <20200107112438.383958-1-stefanha@redhat.com> -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 3f0097169bb60268cc5dda0c5ea47c31ab57b22f) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - scripts/qemu-trace-stap | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap -index 91d1051..90527eb 100755 ---- a/scripts/qemu-trace-stap -+++ b/scripts/qemu-trace-stap -@@ -1,4 +1,4 @@ --#!/usr/bin/python -+#!/usr/bin/env python3 - # -*- python -*- - # - # Copyright (C) 2019 Red Hat, Inc -@@ -18,8 +18,6 @@ - # You should have received a copy of the GNU General Public License - # along with this program; if not, see . - --from __future__ import print_function -- - import argparse - import copy - import os.path -@@ -104,7 +102,9 @@ def cmd_list(args): - if verbose: - print("Listing probes with name '%s'" % script) - proc = subprocess.Popen(["stap", "-l", script], -- stdout=subprocess.PIPE, env=tapset_env(tapsets)) -+ stdout=subprocess.PIPE, -+ universal_newlines=True, -+ env=tapset_env(tapsets)) - out, err = proc.communicate() - if proc.returncode != 0: - print("No probes found, are the tapsets installed in %s" % tapset_dir(args.binary)) --- -1.8.3.1 - diff --git a/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch b/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch deleted file mode 100644 index 8f08256..0000000 --- a/kvm-usbredir-Prevent-recursion-in-usbredir_write.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 8f6311159977b8ee4b78172caa411d3cee4d2ae5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 14 Jan 2020 20:23:30 +0000 -Subject: [PATCH 4/5] usbredir: Prevent recursion in usbredir_write -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200114202331.51831-2-dgilbert@redhat.com> -Patchwork-id: 93344 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] usbredir: Prevent recursion in usbredir_write -Bugzilla: 1790844 -RH-Acked-by: Peter Xu -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Gerd Hoffmann - -From: "Dr. David Alan Gilbert" - -I've got a case where usbredir_write manages to call back into itself -via spice; this patch causes the recursion to fail (0 bytes) the write; -this seems to avoid the deadlock I was previously seeing. - -I can't say I fully understand the interaction of usbredir and spice; -but there are a few similar guards in spice and usbredir -to catch other cases especially onces also related to spice_server_char_device_wakeup - -This case seems to be triggered by repeated migration+repeated -reconnection of the viewer; but my debugging suggests the migration -finished before this hits. - -The backtrace of the hang looks like: - reds_handle_ticket - reds_handle_other_links - reds_channel_do_link - red_channel_connect - spicevmc_connect - usbredir_create_parser - usbredirparser_do_write - usbredir_write - qemu_chr_fe_write - qemu_chr_write - qemu_chr_write_buffer - spice_chr_write - spice_server_char_device_wakeup - red_char_device_wakeup - red_char_device_write_to_device - vmc_write - usbredirparser_do_write - usbredir_write - qemu_chr_fe_write - qemu_chr_write - qemu_chr_write_buffer - qemu_mutex_lock_impl - -and we fail as we land through qemu_chr_write_buffer's lock -twice. - -Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1752320 - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20191218113012.13331-1-dgilbert@redhat.com> -Signed-off-by: Gerd Hoffmann -(cherry picked from commit 394642a8d3742c885e397d5bb5ee0ec40743cdc6) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/redirect.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c -index e0f5ca6..97f2c3a 100644 ---- a/hw/usb/redirect.c -+++ b/hw/usb/redirect.c -@@ -113,6 +113,7 @@ struct USBRedirDevice { - /* Properties */ - CharBackend cs; - bool enable_streams; -+ bool in_write; - uint8_t debug; - int32_t bootindex; - char *filter_str; -@@ -290,6 +291,13 @@ static int usbredir_write(void *priv, uint8_t *data, int count) - return 0; - } - -+ /* Recursion check */ -+ if (dev->in_write) { -+ DPRINTF("usbredir_write recursion\n"); -+ return 0; -+ } -+ dev->in_write = true; -+ - r = qemu_chr_fe_write(&dev->cs, data, count); - if (r < count) { - if (!dev->watch) { -@@ -300,6 +308,7 @@ static int usbredir_write(void *priv, uint8_t *data, int count) - r = 0; - } - } -+ dev->in_write = false; - return r; - } - --- -1.8.3.1 - diff --git a/kvm-util-add-slirp_fmt-helpers.patch b/kvm-util-add-slirp_fmt-helpers.patch deleted file mode 100644 index 31af599..0000000 --- a/kvm-util-add-slirp_fmt-helpers.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 5dc50c6bca059a9cda6677b1fd0187df1de78ed7 Mon Sep 17 00:00:00 2001 -From: jmaloy -Date: Thu, 13 Feb 2020 15:50:48 +0000 -Subject: [PATCH 2/7] util: add slirp_fmt() helpers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: jmaloy -Message-id: <20200213155049.3936-2-jmaloy@redhat.com> -Patchwork-id: 93824 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/2] util: add slirp_fmt() helpers -Bugzilla: 1798994 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi - -From: Marc-André Lureau - -Various calls to snprintf() in libslirp assume that snprintf() returns -"only" the number of bytes written (excluding terminating NUL). - -https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 - -"Upon successful completion, the snprintf() function shall return the -number of bytes that would be written to s had n been sufficiently -large excluding the terminating null byte." - -Introduce slirp_fmt() that handles several pathological cases the -way libslirp usually expect: - -- treat error as fatal (instead of silently returning -1) - -- fmt0() will always \0 end - -- return the number of bytes actually written (instead of what would -have been written, which would usually result in OOB later), including -the ending \0 for fmt0() - -- warn if truncation happened (instead of ignoring) - -Other less common cases can still be handled with strcpy/snprintf() etc. - -Signed-off-by: Marc-André Lureau -Reviewed-by: Samuel Thibault -Message-Id: <20200127092414.169796-2-marcandre.lureau@redhat.com> -(cherry picked from libslirp commit 30648c03b27fb8d9611b723184216cd3174b6775) -Signed-off-by: Jon Maloy - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/util.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - slirp/src/util.h | 3 +++ - 2 files changed, 65 insertions(+) - -diff --git a/slirp/src/util.c b/slirp/src/util.c -index e596087..e3b6257 100644 ---- a/slirp/src/util.c -+++ b/slirp/src/util.c -@@ -364,3 +364,65 @@ void slirp_pstrcpy(char *buf, int buf_size, const char *str) - } - *q = '\0'; - } -+ -+static int slirp_vsnprintf(char *str, size_t size, -+ const char *format, va_list args) -+{ -+ int rv = vsnprintf(str, size, format, args); -+ -+ if (rv < 0) { -+ g_error("vsnprintf() failed: %s", g_strerror(errno)); -+ } -+ -+ return rv; -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - returns the number of bytes written (excluding optional \0-ending) -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv > size) { -+ g_critical("vsnprintf() truncation"); -+ } -+ -+ return MIN(rv, size); -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - always \0-end (unless size == 0) -+ * - returns the number of bytes actually written, including \0 ending -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt0(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv >= size) { -+ g_critical("vsnprintf() truncation"); -+ if (size > 0) -+ str[size - 1] = '\0'; -+ rv = size; -+ } else { -+ rv += 1; /* include \0 */ -+ } -+ -+ return rv; -+} -diff --git a/slirp/src/util.h b/slirp/src/util.h -index 3c6223c..0558dfc 100644 ---- a/slirp/src/util.h -+++ b/slirp/src/util.h -@@ -177,4 +177,7 @@ static inline int slirp_socket_set_fast_reuse(int fd) - - void slirp_pstrcpy(char *buf, int buf_size, const char *str); - -+int slirp_fmt(char *str, size_t size, const char *format, ...); -+int slirp_fmt0(char *str, size_t size, const char *format, ...); -+ - #endif --- -1.8.3.1 - diff --git a/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch b/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch deleted file mode 100644 index d416e0f..0000000 --- a/kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch +++ /dev/null @@ -1,58 +0,0 @@ -From e4631c00d8e9ee3608ef3196cbe8bec4841ee988 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 8 Jan 2020 15:04:57 +0000 -Subject: [PATCH 2/5] vfio/pci: Don't remove irqchip notifier if not registered - -RH-Author: Peter Xu -Message-id: <20200108150457.12324-2-peterx@redhat.com> -Patchwork-id: 93291 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vfio/pci: Don't remove irqchip notifier if not registered -Bugzilla: 1782678 -RH-Acked-by: Alex Williamson -RH-Acked-by: Cornelia Huck -RH-Acked-by: Auger Eric -RH-Acked-by: Jens Freimann - -The kvm irqchip notifier is only registered if the device supports -INTx, however it's unconditionally removed. If the assigned device -does not support INTx, this will cause QEMU to crash when unplugging -the device from the system. Change it to conditionally remove the -notifier only if the notify hook is setup. - -CC: Eduardo Habkost -CC: David Gibson -CC: Alex Williamson -Cc: qemu-stable@nongnu.org # v4.2 -Reported-by: yanghliu@redhat.com -Debugged-by: Eduardo Habkost -Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1782678 -Signed-off-by: Peter Xu -Reviewed-by: David Gibson -Reviewed-by: Greg Kurz -Signed-off-by: Alex Williamson -(cherry picked from commit 0446f8121723b134ca1d1ed0b73e96d4a0a8689d) -Signed-off-by: Peter Xu -Signed-off-by: Danilo C. L. de Paula ---- - hw/vfio/pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 309535f..d717520 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3100,7 +3100,9 @@ static void vfio_exitfn(PCIDevice *pdev) - vfio_unregister_req_notifier(vdev); - vfio_unregister_err_notifier(vdev); - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); -- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ if (vdev->irqchip_change_notifier.notify) { -+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ } - vfio_disable_interrupts(vdev); - if (vdev->intx.mmap_timer) { - timer_free(vdev->intx.mmap_timer); --- -1.8.3.1 - diff --git a/kvm-vhost-Add-names-to-section-rounded-warning.patch b/kvm-vhost-Add-names-to-section-rounded-warning.patch deleted file mode 100644 index c41a14c..0000000 --- a/kvm-vhost-Add-names-to-section-rounded-warning.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 0d545c5850caf76ad3e8dd9bb0fbc9f86b08e220 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 24 Jan 2020 19:46:11 +0100 -Subject: [PATCH 002/116] vhost: Add names to section rounded warning -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200124194613.41119-2-dgilbert@redhat.com> -Patchwork-id: 93450 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 1/3] vhost: Add names to section rounded warning -Bugzilla: 1779041 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Dr. David Alan Gilbert" - -Add the memory region names to section rounding/alignment -warnings. - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20200116202414.157959-2-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit ff4776147e960b128ee68f94c728659f662f4378) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 4da0d5a..774d87d 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -590,9 +590,10 @@ static void vhost_region_add_section(struct vhost_dev *dev, - * match up in the same RAMBlock if they do. - */ - if (mrs_gpa < prev_gpa_start) { -- error_report("%s:Section rounded to %"PRIx64 -- " prior to previous %"PRIx64, -- __func__, mrs_gpa, prev_gpa_start); -+ error_report("%s:Section '%s' rounded to %"PRIx64 -+ " prior to previous '%s' %"PRIx64, -+ __func__, section->mr->name, mrs_gpa, -+ prev_sec->mr->name, prev_gpa_start); - /* A way to cleanly fail here would be better */ - return; - } --- -1.8.3.1 - diff --git a/kvm-vhost-Only-align-sections-for-vhost-user.patch b/kvm-vhost-Only-align-sections-for-vhost-user.patch deleted file mode 100644 index e082ce8..0000000 --- a/kvm-vhost-Only-align-sections-for-vhost-user.patch +++ /dev/null @@ -1,97 +0,0 @@ -From c35466c168e5219bf585aa65ac31fc9bdc7cbf36 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 24 Jan 2020 19:46:12 +0100 -Subject: [PATCH 003/116] vhost: Only align sections for vhost-user -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200124194613.41119-3-dgilbert@redhat.com> -Patchwork-id: 93452 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] vhost: Only align sections for vhost-user -Bugzilla: 1779041 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Dr. David Alan Gilbert" - -I added hugepage alignment code in c1ece84e7c9 to deal with -vhost-user + postcopy which needs aligned pages when using userfault. -However, on x86 the lower 2MB of address space tends to be shotgun'd -with small fragments around the 512-640k range - e.g. video RAM, and -with HyperV synic pages tend to sit around there - again splitting -it up. The alignment code complains with a 'Section rounded to ...' -error and gives up. - -Since vhost-user already filters out devices without an fd -(see vhost-user.c vhost_user_mem_section_filter) it shouldn't be -affected by those overlaps. - -Turn the alignment off on vhost-kernel so that it doesn't try -and align, and thus won't hit the rounding issues. - -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20200116202414.157959-3-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Paolo Bonzini -(cherry picked from commit 76525114736e8f669766e69b715fa59ce8648aae) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost.c | 34 ++++++++++++++++++---------------- - 1 file changed, 18 insertions(+), 16 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 774d87d..25fd469 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -547,26 +547,28 @@ static void vhost_region_add_section(struct vhost_dev *dev, - uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + - section->offset_within_region; - RAMBlock *mrs_rb = section->mr->ram_block; -- size_t mrs_page = qemu_ram_pagesize(mrs_rb); - - trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, - mrs_host); - -- /* Round the section to it's page size */ -- /* First align the start down to a page boundary */ -- uint64_t alignage = mrs_host & (mrs_page - 1); -- if (alignage) { -- mrs_host -= alignage; -- mrs_size += alignage; -- mrs_gpa -= alignage; -- } -- /* Now align the size up to a page boundary */ -- alignage = mrs_size & (mrs_page - 1); -- if (alignage) { -- mrs_size += mrs_page - alignage; -- } -- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, -- mrs_host); -+ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { -+ /* Round the section to it's page size */ -+ /* First align the start down to a page boundary */ -+ size_t mrs_page = qemu_ram_pagesize(mrs_rb); -+ uint64_t alignage = mrs_host & (mrs_page - 1); -+ if (alignage) { -+ mrs_host -= alignage; -+ mrs_size += alignage; -+ mrs_gpa -= alignage; -+ } -+ /* Now align the size up to a page boundary */ -+ alignage = mrs_size & (mrs_page - 1); -+ if (alignage) { -+ mrs_size += mrs_page - alignage; -+ } -+ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, -+ mrs_host); -+ } - - if (dev->n_tmp_sections) { - /* Since we already have at least one section, lets see if --- -1.8.3.1 - diff --git a/kvm-vhost-coding-style-fix.patch b/kvm-vhost-coding-style-fix.patch deleted file mode 100644 index 4546130..0000000 --- a/kvm-vhost-coding-style-fix.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 624d96c456536e1471968a59fbeea206309cc33b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 24 Jan 2020 19:46:13 +0100 -Subject: [PATCH 004/116] vhost: coding style fix -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200124194613.41119-4-dgilbert@redhat.com> -Patchwork-id: 93453 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 3/3] vhost: coding style fix -Bugzilla: 1779041 -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Michael S. Tsirkin" - -Drop a trailing whitespace. Make line shorter. - -Fixes: 76525114736e8 ("vhost: Only align sections for vhost-user") -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8347505640238d3b80f9bb7510fdc1bb574bad19) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 25fd469..9edfadc 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -551,7 +551,7 @@ static void vhost_region_add_section(struct vhost_dev *dev, - trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, - mrs_host); - -- if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { -+ if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) { - /* Round the section to it's page size */ - /* First align the start down to a page boundary */ - size_t mrs_page = qemu_ram_pagesize(mrs_rb); -@@ -566,8 +566,8 @@ static void vhost_region_add_section(struct vhost_dev *dev, - if (alignage) { - mrs_size += mrs_page - alignage; - } -- trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, mrs_size, -- mrs_host); -+ trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa, -+ mrs_size, mrs_host); - } - - if (dev->n_tmp_sections) { --- -1.8.3.1 - diff --git a/kvm-vhost-user-Print-unexpected-slave-message-types.patch b/kvm-vhost-user-Print-unexpected-slave-message-types.patch deleted file mode 100644 index e5776e7..0000000 --- a/kvm-vhost-user-Print-unexpected-slave-message-types.patch +++ /dev/null @@ -1,48 +0,0 @@ -From d6abbdaeb2c35efe6793a599c98116e250b1f179 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:43 +0100 -Subject: [PATCH 072/116] vhost-user: Print unexpected slave message types -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-69-dgilbert@redhat.com> -Patchwork-id: 93519 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 068/112] vhost-user: Print unexpected slave message types -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -When we receive an unexpected message type on the slave fd, print -the type. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0fdc465d7d5aafeae127eba488f247ac6f58df4c) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 02a9b25..e4f46ec 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -1055,7 +1055,7 @@ static void slave_read(void *opaque) - fd[0]); - break; - default: -- error_report("Received unexpected msg type."); -+ error_report("Received unexpected msg type: %d.", hdr.request); - ret = -EINVAL; - } - --- -1.8.3.1 - diff --git a/kvm-vhost-user-fs-remove-vhostfd-property.patch b/kvm-vhost-user-fs-remove-vhostfd-property.patch deleted file mode 100644 index 5904e82..0000000 --- a/kvm-vhost-user-fs-remove-vhostfd-property.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 912af6f7c270e2939a91c9b3f62b6ba1202edc43 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:37 +0100 -Subject: [PATCH 006/116] vhost-user-fs: remove "vhostfd" property -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-3-dgilbert@redhat.com> -Patchwork-id: 93458 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 002/112] vhost-user-fs: remove "vhostfd" property -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Marc-André Lureau - -The property doesn't make much sense for a vhost-user device. - -Signed-off-by: Marc-André Lureau -Message-Id: <20191116112016.14872-1-marcandre.lureau@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 703857348724319735d9be7b5b996e6445c6e6b9) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user-fs.c | 1 - - include/hw/virtio/vhost-user-fs.h | 1 - - 2 files changed, 2 deletions(-) - -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index f0df7f4..ca0b7fc 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -263,7 +263,6 @@ static Property vuf_properties[] = { - DEFINE_PROP_UINT16("num-request-queues", VHostUserFS, - conf.num_request_queues, 1), - DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), -- DEFINE_PROP_STRING("vhostfd", VHostUserFS, conf.vhostfd), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h -index 539885b..9ff1bdb 100644 ---- a/include/hw/virtio/vhost-user-fs.h -+++ b/include/hw/virtio/vhost-user-fs.h -@@ -28,7 +28,6 @@ typedef struct { - char *tag; - uint16_t num_request_queues; - uint16_t queue_size; -- char *vhostfd; - } VHostUserFSConf; - - typedef struct { --- -1.8.3.1 - diff --git a/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch b/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch deleted file mode 100644 index 3a50632..0000000 --- a/kvm-vhost-user-gpu-Drop-trailing-json-comma.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 044feb40e3041759ee77d08136f334cf3ad67c1e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?J=C3=A1n=20Tomko?= -Date: Fri, 21 Feb 2020 09:49:23 +0000 -Subject: [PATCH] vhost-user-gpu: Drop trailing json comma -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Ján Tomko -Message-id: <07fed9a38495938a7180819e27f590d80cd6668d.1582278173.git.jtomko@redhat.com> -Patchwork-id: 94019 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] vhost-user-gpu: Drop trailing json comma -Bugzilla: 1805334 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Stefan Hajnoczi - -From: Cole Robinson - -Trailing comma is not valid json: - -$ cat contrib/vhost-user-gpu/50-qemu-gpu.json.in | jq -parse error: Expected another key-value pair at line 5, column 1 - -Signed-off-by: Cole Robinson -Reviewed-by: Marc-André Lureau -Reviewed-by: Li Qiang -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 7f5dd2ac9f3504e2699f23e69bc3d8051b729832.1568925097.git.crobinso@redhat.com -Signed-off-by: Gerd Hoffmann -(cherry picked from commit ca26b032e5a0e8a190c763ce828a8740d24b9b65) -Signed-off-by: Ján Tomko -Signed-off-by: Danilo C. L. de Paula ---- - contrib/vhost-user-gpu/50-qemu-gpu.json.in | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in -index 658b545..f5edd09 100644 ---- a/contrib/vhost-user-gpu/50-qemu-gpu.json.in -+++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in -@@ -1,5 +1,5 @@ - { - "description": "QEMU vhost-user-gpu", - "type": "gpu", -- "binary": "@libexecdir@/vhost-user-gpu", -+ "binary": "@libexecdir@/vhost-user-gpu" - } --- -1.8.3.1 - diff --git a/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch b/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch deleted file mode 100644 index ed10701..0000000 --- a/kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch +++ /dev/null @@ -1,80 +0,0 @@ -From b395ad369278d0923a590975fabbb99ec7716c6b Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:28 +0000 -Subject: [PATCH 4/7] virtio: add ability to delete vq through a pointer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-2-jusual@redhat.com> -Patchwork-id: 93980 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/4] virtio: add ability to delete vq through a pointer -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: "Michael S. Tsirkin" - -Devices tend to maintain vq pointers, allow deleting them trough a vq pointer. - -Signed-off-by: Michael S. Tsirkin -Reviewed-by: David Hildenbrand -Reviewed-by: David Hildenbrand -(cherry picked from commit 722f8c51d8af223751dfb1d02de40043e8ba067e) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio.c | 15 ++++++++++----- - include/hw/virtio/virtio.h | 2 ++ - 2 files changed, 12 insertions(+), 5 deletions(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 3211135..d63a369 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2335,17 +2335,22 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - return &vdev->vq[i]; - } - -+void virtio_delete_queue(VirtQueue *vq) -+{ -+ vq->vring.num = 0; -+ vq->vring.num_default = 0; -+ vq->handle_output = NULL; -+ vq->handle_aio_output = NULL; -+ g_free(vq->used_elems); -+} -+ - void virtio_del_queue(VirtIODevice *vdev, int n) - { - if (n < 0 || n >= VIRTIO_QUEUE_MAX) { - abort(); - } - -- vdev->vq[n].vring.num = 0; -- vdev->vq[n].vring.num_default = 0; -- vdev->vq[n].handle_output = NULL; -- vdev->vq[n].handle_aio_output = NULL; -- g_free(vdev->vq[n].used_elems); -+ virtio_delete_queue(&vdev->vq[n]); - } - - static void virtio_set_isr(VirtIODevice *vdev, int value) -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index 6a20442..91167f6 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -183,6 +183,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, - - void virtio_del_queue(VirtIODevice *vdev, int n); - -+void virtio_delete_queue(VirtQueue *vq); -+ - void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len); - void virtqueue_flush(VirtQueue *vq, unsigned int count); --- -1.8.3.1 - diff --git a/kvm-virtio-don-t-enable-notifications-during-polling.patch b/kvm-virtio-don-t-enable-notifications-during-polling.patch deleted file mode 100644 index 2dffc01..0000000 --- a/kvm-virtio-don-t-enable-notifications-during-polling.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 351dd07d7b5e69cdf47260c9ea848c0c93cd2c8a Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 9 Jan 2020 11:13:25 +0000 -Subject: [PATCH 3/5] virtio: don't enable notifications during polling - -RH-Author: Stefan Hajnoczi -Message-id: <20200109111325.559557-2-stefanha@redhat.com> -Patchwork-id: 93298 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/1] virtio: don't enable notifications during polling -Bugzilla: 1789301 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Michael S. Tsirkin - -Virtqueue notifications are not necessary during polling, so we disable -them. This allows the guest driver to avoid MMIO vmexits. -Unfortunately the virtio-blk and virtio-scsi handler functions re-enable -notifications, defeating this optimization. - -Fix virtio-blk and virtio-scsi emulation so they leave notifications -disabled. The key thing to remember for correctness is that polling -always checks one last time after ending its loop, therefore it's safe -to lose the race when re-enabling notifications at the end of polling. - -There is a measurable performance improvement of 5-10% with the null-co -block driver. Real-life storage configurations will see a smaller -improvement because the MMIO vmexit overhead contributes less to -latency. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20191209210957.65087-1-stefanha@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit d0435bc513e23a4961b6af20164d1c6c219eb4ea) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/virtio-blk.c | 9 +++++++-- - hw/scsi/virtio-scsi.c | 9 +++++++-- - hw/virtio/virtio.c | 12 ++++++------ - include/hw/virtio/virtio.h | 1 + - 4 files changed, 21 insertions(+), 10 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 4c357d2..c4e55fb 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -764,13 +764,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - { - VirtIOBlockReq *req; - MultiReqBuffer mrb = {}; -+ bool suppress_notifications = virtio_queue_get_notification(vq); - bool progress = false; - - aio_context_acquire(blk_get_aio_context(s->blk)); - blk_io_plug(s->blk); - - do { -- virtio_queue_set_notification(vq, 0); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 0); -+ } - - while ((req = virtio_blk_get_request(s, vq))) { - progress = true; -@@ -781,7 +784,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) - } - } - -- virtio_queue_set_notification(vq, 1); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 1); -+ } - } while (!virtio_queue_empty(vq)); - - if (mrb.num_reqs) { -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 54108c0..e2cd1df 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -597,12 +597,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - { - VirtIOSCSIReq *req, *next; - int ret = 0; -+ bool suppress_notifications = virtio_queue_get_notification(vq); - bool progress = false; - - QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); - - do { -- virtio_queue_set_notification(vq, 0); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 0); -+ } - - while ((req = virtio_scsi_pop_req(s, vq))) { - progress = true; -@@ -622,7 +625,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - } - } - -- virtio_queue_set_notification(vq, 1); -+ if (suppress_notifications) { -+ virtio_queue_set_notification(vq, 1); -+ } - } while (ret != -EINVAL && !virtio_queue_empty(vq)); - - QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 04716b5..3211135 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -432,6 +432,11 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable) - } - } - -+bool virtio_queue_get_notification(VirtQueue *vq) -+{ -+ return vq->notification; -+} -+ - void virtio_queue_set_notification(VirtQueue *vq, int enable) - { - vq->notification = enable; -@@ -3384,17 +3389,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) - { - EventNotifier *n = opaque; - VirtQueue *vq = container_of(n, VirtQueue, host_notifier); -- bool progress; - - if (!vq->vring.desc || virtio_queue_empty(vq)) { - return false; - } - -- progress = virtio_queue_notify_aio_vq(vq); -- -- /* In case the handler function re-enabled notifications */ -- virtio_queue_set_notification(vq, 0); -- return progress; -+ return virtio_queue_notify_aio_vq(vq); - } - - static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index c32a815..6a20442 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -224,6 +224,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); - - void virtio_notify_config(VirtIODevice *vdev); - -+bool virtio_queue_get_notification(VirtQueue *vq); - void virtio_queue_set_notification(VirtQueue *vq, int enable); - - int virtio_queue_ready(VirtQueue *vq); --- -1.8.3.1 - diff --git a/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch b/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch deleted file mode 100644 index 9a69ed1..0000000 --- a/kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch +++ /dev/null @@ -1,60 +0,0 @@ -From c0cf6d8a1d3b9bf3928f37fcfd5aa8ae6f1338ca Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:36 +0100 -Subject: [PATCH 005/116] virtio-fs: fix MSI-X nvectors calculation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-2-dgilbert@redhat.com> -Patchwork-id: 93455 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 001/112] virtio-fs: fix MSI-X nvectors calculation -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -The following MSI-X vectors are required: - * VIRTIO Configuration Change - * hiprio virtqueue - * requests virtqueues - -Fix the calculation to reserve enough MSI-X vectors. Otherwise guest -drivers fall back to a sub-optional configuration where all virtqueues -share a single vector. - -This change does not break live migration compatibility since -vhost-user-fs-pci devices are not migratable yet. - -Reported-by: Vivek Goyal -Signed-off-by: Stefan Hajnoczi -Message-Id: <20191209110759.35227-1-stefanha@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 366844f3d1329c6423dd752891a28ccb3ee8fddd) -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user-fs-pci.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c -index 933a3f2..e3a649d 100644 ---- a/hw/virtio/vhost-user-fs-pci.c -+++ b/hw/virtio/vhost-user-fs-pci.c -@@ -40,7 +40,8 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) - DeviceState *vdev = DEVICE(&dev->vdev); - - if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { -- vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 1; -+ /* Also reserve config change and hiprio queue vectors */ -+ vpci_dev->nvectors = dev->vdev.conf.num_request_queues + 2; - } - - qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); --- -1.8.3.1 - diff --git a/kvm-virtio-make-virtio_delete_queue-idempotent.patch b/kvm-virtio-make-virtio_delete_queue-idempotent.patch deleted file mode 100644 index 16eb1da..0000000 --- a/kvm-virtio-make-virtio_delete_queue-idempotent.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 901e65fa6ccbadeacd6c585cf49a0a7cdafb4737 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:29 +0000 -Subject: [PATCH 5/7] virtio: make virtio_delete_queue idempotent - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-3-jusual@redhat.com> -Patchwork-id: 93981 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/4] virtio: make virtio_delete_queue idempotent -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: "Michael S. Tsirkin" - -Let's make sure calling this twice is harmless - -no known instances, but seems safer. - -Suggested-by: Pan Nengyuan -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8cd353ea0fbf0e334e015d833f612799be642296) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index d63a369..e6a9ba4 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2342,6 +2342,7 @@ void virtio_delete_queue(VirtQueue *vq) - vq->handle_output = NULL; - vq->handle_aio_output = NULL; - g_free(vq->used_elems); -+ vq->used_elems = NULL; - } - - void virtio_del_queue(VirtIODevice *vdev, int n) --- -1.8.3.1 - diff --git a/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch b/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch deleted file mode 100644 index c21c699..0000000 --- a/kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 2f494c41715193522c52eafc6af2a5e33f88ceb9 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:31 +0000 -Subject: [PATCH 7/7] virtio-net: delete also control queue when TX/RX deleted - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-5-jusual@redhat.com> -Patchwork-id: 93983 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/4] virtio-net: delete also control queue when TX/RX deleted -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: Yuri Benditovich - -https://bugzilla.redhat.com/show_bug.cgi?id=1708480 -If the control queue is not deleted together with TX/RX, it -later will be ignored in freeing cache resources and hot -unplug will not be completed. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Yuri Benditovich -Message-Id: <20191226043649.14481-3-yuri.benditovich@daynix.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit d945d9f1731244ef341f74ede93120fc9de35913) -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index db3d7c3..f325440 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3101,7 +3101,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) - for (i = 0; i < max_queues; i++) { - virtio_net_del_queue(n, i); - } -- -+ /* delete also control vq */ -+ virtio_del_queue(vdev, max_queues * 2); - qemu_announce_timer_del(&n->announce_timer, false); - g_free(n->vqs); - qemu_del_nic(n->nic); --- -1.8.3.1 - diff --git a/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch b/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch deleted file mode 100644 index c9f1086..0000000 --- a/kvm-virtio-reset-region-cache-when-on-queue-deletion.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 8bf4f561262d9282cebdb3418cdb9a69c92216a0 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Wed, 19 Feb 2020 21:34:30 +0000 -Subject: [PATCH 6/7] virtio: reset region cache when on queue deletion - -RH-Author: Julia Suvorova -Message-id: <20200219213431.11913-4-jusual@redhat.com> -Patchwork-id: 93982 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/4] virtio: reset region cache when on queue deletion -Bugzilla: 1791590 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Michael S. Tsirkin - -From: Yuri Benditovich - -https://bugzilla.redhat.com/show_bug.cgi?id=1708480 -Fix leak of region reference that prevents complete -device deletion on hot unplug. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Yuri Benditovich -Message-Id: <20191226043649.14481-2-yuri.benditovich@daynix.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 421afd2fe8dd4603216cbf36081877c391f5a2a4) -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/virtio.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index e6a9ba4..f644d9a 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -2343,6 +2343,7 @@ void virtio_delete_queue(VirtQueue *vq) - vq->handle_aio_output = NULL; - g_free(vq->used_elems); - vq->used_elems = NULL; -+ virtio_virtqueue_reset_region_cache(vq); - } - - void virtio_del_queue(VirtIODevice *vdev, int n) --- -1.8.3.1 - diff --git a/kvm-virtiofs-Add-maintainers-entry.patch b/kvm-virtiofs-Add-maintainers-entry.patch deleted file mode 100644 index fec9371..0000000 --- a/kvm-virtiofs-Add-maintainers-entry.patch +++ /dev/null @@ -1,52 +0,0 @@ -From f4144443eacceb04823ee72cb2d4f9f841f05495 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:11 +0100 -Subject: [PATCH 040/116] virtiofs: Add maintainers entry -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-37-dgilbert@redhat.com> -Patchwork-id: 93491 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 036/112] virtiofs: Add maintainers entry -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit bad7d2c3ad1af9344df035aedaf8e0967a543070) -Signed-off-by: Miroslav Rezanina ---- - MAINTAINERS | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/MAINTAINERS b/MAINTAINERS -index 5e5e3e5..d1b3e26 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -1575,6 +1575,14 @@ T: git https://github.com/cohuck/qemu.git s390-next - T: git https://github.com/borntraeger/qemu.git s390-next - L: qemu-s390x@nongnu.org - -+virtiofs -+M: Dr. David Alan Gilbert -+M: Stefan Hajnoczi -+S: Supported -+F: tools/virtiofsd/* -+F: hw/virtio/vhost-user-fs* -+F: include/hw/virtio/vhost-user-fs.h -+ - virtio-input - M: Gerd Hoffmann - S: Maintained --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch b/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch deleted file mode 100644 index a2b91be..0000000 --- a/kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 4d9106acfd7ed9e4d197ddf9f22b79ba6c8afdd8 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:38 +0100 -Subject: [PATCH 067/116] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG - level -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-64-dgilbert@redhat.com> -Patchwork-id: 93514 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 063/112] virtiofsd: Add ID to the log with FUSE_LOG_DEBUG level -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -virtiofsd has some threads, so we see a lot of logs with debug option. -It would be useful for debugging if we can identify the specific thread -from the log. - -Add ID, which is got by gettid(), to the log with FUSE_LOG_DEBUG level -so that we can grep the specific thread. - -The log is like as: - - ]# ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto - ... - [ID: 00000097] unique: 12696, success, outsize: 120 - [ID: 00000097] virtio_send_msg: elem 18: with 2 in desc of length 120 - [ID: 00000003] fv_queue_thread: Got queue event on Queue 1 - [ID: 00000003] fv_queue_thread: Queue 1 gave evalue: 1 available: in: 65552 out: 80 - [ID: 00000003] fv_queue_thread: Waiting for Queue 1 event - [ID: 00000071] fv_queue_worker: elem 33: with 2 out desc of length 80 bad_in_num=0 bad_out_num=0 - [ID: 00000071] unique: 12694, opcode: READ (15), nodeid: 2, insize: 80, pid: 2014 - [ID: 00000071] lo_read(ino=2, size=65536, off=131072) - -Signed-off-by: Masayoshi Mizuma - -Signed-off-by: Dr. David Alan Gilbert - added rework as suggested by Daniel P. Berrangé during review -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 36f3846902bd41413f6c0bf797dee509028c29f4) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ff6910f..f08324f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -43,6 +43,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -2268,10 +2269,17 @@ static void setup_nofile_rlimit(void) - - static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - { -+ g_autofree char *localfmt = NULL; -+ - if (current_log_level < level) { - return; - } - -+ if (current_log_level == FUSE_LOG_DEBUG) { -+ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); -+ fmt = localfmt; -+ } -+ - if (use_syslog) { - int priority = LOG_ERR; - switch (level) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch b/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch deleted file mode 100644 index b017bf4..0000000 --- a/kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 709408de33112d32b7c6675f8c9320b8bebccd58 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:05 +0100 -Subject: [PATCH 034/116] virtiofsd: Add Makefile wiring for virtiofsd contrib -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-31-dgilbert@redhat.com> -Patchwork-id: 93482 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 030/112] virtiofsd: Add Makefile wiring for virtiofsd contrib -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Wire up the building of the virtiofsd in tools. - -virtiofsd relies on Linux-specific system calls and seccomp. Anyone -wishing to port it to other host operating systems should do so -carefully and without reducing security. - -Only allow building on Linux hosts. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Liam Merwick -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 81bfc42dcf473bc8d3790622633410da72d8e622) -Signed-off-by: Miroslav Rezanina ---- - Makefile | 10 ++++++++++ - Makefile.objs | 1 + - tools/virtiofsd/Makefile.objs | 9 +++++++++ - 3 files changed, 20 insertions(+) - create mode 100644 tools/virtiofsd/Makefile.objs - -diff --git a/Makefile b/Makefile -index 4254950..1526775 100644 ---- a/Makefile -+++ b/Makefile -@@ -330,6 +330,10 @@ endif - endif - endif - -+ifdef CONFIG_LINUX -+HELPERS-y += virtiofsd$(EXESUF) -+endif -+ - # Sphinx does not allow building manuals into the same directory as - # the source files, so if we're doing an in-tree QEMU build we must - # build the manuals into a subdirectory (and then install them from -@@ -430,6 +434,7 @@ dummy := $(call unnest-vars,, \ - elf2dmp-obj-y \ - ivshmem-client-obj-y \ - ivshmem-server-obj-y \ -+ virtiofsd-obj-y \ - rdmacm-mux-obj-y \ - libvhost-user-obj-y \ - vhost-user-scsi-obj-y \ -@@ -675,6 +680,11 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" - rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) - $(call LINK, $^) - -+ifdef CONFIG_LINUX # relies on Linux-specific syscalls -+virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) -+ $(call LINK, $^) -+endif -+ - vhost-user-gpu$(EXESUF): $(vhost-user-gpu-obj-y) $(libvhost-user-obj-y) libqemuutil.a libqemustub.a - $(call LINK, $^) - -diff --git a/Makefile.objs b/Makefile.objs -index fcf63e1..1a8f288 100644 ---- a/Makefile.objs -+++ b/Makefile.objs -@@ -125,6 +125,7 @@ vhost-user-blk-obj-y = contrib/vhost-user-blk/ - rdmacm-mux-obj-y = contrib/rdmacm-mux/ - vhost-user-input-obj-y = contrib/vhost-user-input/ - vhost-user-gpu-obj-y = contrib/vhost-user-gpu/ -+virtiofsd-obj-y = tools/virtiofsd/ - - ###################################################################### - trace-events-subdirs = -diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs -new file mode 100644 -index 0000000..45a8075 ---- /dev/null -+++ b/tools/virtiofsd/Makefile.objs -@@ -0,0 +1,9 @@ -+virtiofsd-obj-y = buffer.o \ -+ fuse_opt.o \ -+ fuse_log.o \ -+ fuse_lowlevel.o \ -+ fuse_signals.o \ -+ fuse_virtio.o \ -+ helper.o \ -+ passthrough_ll.o -+ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-auxiliary-.c-s.patch b/kvm-virtiofsd-Add-auxiliary-.c-s.patch deleted file mode 100644 index 90150d9..0000000 --- a/kvm-virtiofsd-Add-auxiliary-.c-s.patch +++ /dev/null @@ -1,1387 +0,0 @@ -From 55b4059d6399c212109c758190e15b574accdd07 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:41 +0100 -Subject: [PATCH 010/116] virtiofsd: Add auxiliary .c's -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-7-dgilbert@redhat.com> -Patchwork-id: 93461 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 006/112] virtiofsd: Add auxiliary .c's -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Add most of the non-main .c files we need from upstream fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ffcf8d9f8649c6e56b1193bbbc9c9f7388920043) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 321 ++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_log.c | 40 ++++ - tools/virtiofsd/fuse_opt.c | 423 +++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_signals.c | 91 +++++++++ - tools/virtiofsd/helper.c | 440 +++++++++++++++++++++++++++++++++++++++++ - 5 files changed, 1315 insertions(+) - create mode 100644 tools/virtiofsd/buffer.c - create mode 100644 tools/virtiofsd/fuse_log.c - create mode 100644 tools/virtiofsd/fuse_opt.c - create mode 100644 tools/virtiofsd/fuse_signals.c - create mode 100644 tools/virtiofsd/helper.c - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -new file mode 100644 -index 0000000..5ab9b87 ---- /dev/null -+++ b/tools/virtiofsd/buffer.c -@@ -0,0 +1,321 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2010 Miklos Szeredi -+ -+ Functions for dealing with `struct fuse_buf` and `struct -+ fuse_bufvec`. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#define _GNU_SOURCE -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_lowlevel.h" -+#include -+#include -+#include -+#include -+ -+size_t fuse_buf_size(const struct fuse_bufvec *bufv) -+{ -+ size_t i; -+ size_t size = 0; -+ -+ for (i = 0; i < bufv->count; i++) { -+ if (bufv->buf[i].size == SIZE_MAX) -+ size = SIZE_MAX; -+ else -+ size += bufv->buf[i].size; -+ } -+ -+ return size; -+} -+ -+static size_t min_size(size_t s1, size_t s2) -+{ -+ return s1 < s2 ? s1 : s2; -+} -+ -+static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) -+{ -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (dst->flags & FUSE_BUF_FD_SEEK) { -+ res = pwrite(dst->fd, (char *)src->mem + src_off, len, -+ dst->pos + dst_off); -+ } else { -+ res = write(dst->fd, (char *)src->mem + src_off, len); -+ } -+ if (res == -1) { -+ if (!copied) -+ return -errno; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ if (!(dst->flags & FUSE_BUF_FD_RETRY)) -+ break; -+ -+ src_off += res; -+ dst_off += res; -+ len -= res; -+ } -+ -+ return copied; -+} -+ -+static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) -+{ -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (src->flags & FUSE_BUF_FD_SEEK) { -+ res = pread(src->fd, (char *)dst->mem + dst_off, len, -+ src->pos + src_off); -+ } else { -+ res = read(src->fd, (char *)dst->mem + dst_off, len); -+ } -+ if (res == -1) { -+ if (!copied) -+ return -errno; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ if (!(src->flags & FUSE_BUF_FD_RETRY)) -+ break; -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; -+} -+ -+static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) -+{ -+ char buf[4096]; -+ struct fuse_buf tmp = { -+ .size = sizeof(buf), -+ .flags = 0, -+ }; -+ ssize_t res; -+ size_t copied = 0; -+ -+ tmp.mem = buf; -+ -+ while (len) { -+ size_t this_len = min_size(tmp.size, len); -+ size_t read_len; -+ -+ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); -+ if (res < 0) { -+ if (!copied) -+ return res; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ read_len = res; -+ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); -+ if (res < 0) { -+ if (!copied) -+ return res; -+ break; -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ -+ if (res < this_len) -+ break; -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; -+} -+ -+#ifdef HAVE_SPLICE -+static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) -+{ -+ int splice_flags = 0; -+ off_t *srcpos = NULL; -+ off_t *dstpos = NULL; -+ off_t srcpos_val; -+ off_t dstpos_val; -+ ssize_t res; -+ size_t copied = 0; -+ -+ if (flags & FUSE_BUF_SPLICE_MOVE) -+ splice_flags |= SPLICE_F_MOVE; -+ if (flags & FUSE_BUF_SPLICE_NONBLOCK) -+ splice_flags |= SPLICE_F_NONBLOCK; -+ -+ if (src->flags & FUSE_BUF_FD_SEEK) { -+ srcpos_val = src->pos + src_off; -+ srcpos = &srcpos_val; -+ } -+ if (dst->flags & FUSE_BUF_FD_SEEK) { -+ dstpos_val = dst->pos + dst_off; -+ dstpos = &dstpos_val; -+ } -+ -+ while (len) { -+ res = splice(src->fd, srcpos, dst->fd, dstpos, len, -+ splice_flags); -+ if (res == -1) { -+ if (copied) -+ break; -+ -+ if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) -+ return -errno; -+ -+ /* Maybe splice is not supported for this combination */ -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, -+ len); -+ } -+ if (res == 0) -+ break; -+ -+ copied += res; -+ if (!(src->flags & FUSE_BUF_FD_RETRY) && -+ !(dst->flags & FUSE_BUF_FD_RETRY)) { -+ break; -+ } -+ -+ len -= res; -+ } -+ -+ return copied; -+} -+#else -+static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) -+{ -+ (void) flags; -+ -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -+} -+#endif -+ -+ -+static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) -+{ -+ int src_is_fd = src->flags & FUSE_BUF_IS_FD; -+ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -+ -+ if (!src_is_fd && !dst_is_fd) { -+ char *dstmem = (char *)dst->mem + dst_off; -+ char *srcmem = (char *)src->mem + src_off; -+ -+ if (dstmem != srcmem) { -+ if (dstmem + len <= srcmem || srcmem + len <= dstmem) -+ memcpy(dstmem, srcmem, len); -+ else -+ memmove(dstmem, srcmem, len); -+ } -+ -+ return len; -+ } else if (!src_is_fd) { -+ return fuse_buf_write(dst, dst_off, src, src_off, len); -+ } else if (!dst_is_fd) { -+ return fuse_buf_read(dst, dst_off, src, src_off, len); -+ } else if (flags & FUSE_BUF_NO_SPLICE) { -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -+ } else { -+ return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); -+ } -+} -+ -+static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) -+{ -+ if (bufv->idx < bufv->count) -+ return &bufv->buf[bufv->idx]; -+ else -+ return NULL; -+} -+ -+static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) -+{ -+ const struct fuse_buf *buf = fuse_bufvec_current(bufv); -+ -+ bufv->off += len; -+ assert(bufv->off <= buf->size); -+ if (bufv->off == buf->size) { -+ assert(bufv->idx < bufv->count); -+ bufv->idx++; -+ if (bufv->idx == bufv->count) -+ return 0; -+ bufv->off = 0; -+ } -+ return 1; -+} -+ -+ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, -+ enum fuse_buf_copy_flags flags) -+{ -+ size_t copied = 0; -+ -+ if (dstv == srcv) -+ return fuse_buf_size(dstv); -+ -+ for (;;) { -+ const struct fuse_buf *src = fuse_bufvec_current(srcv); -+ const struct fuse_buf *dst = fuse_bufvec_current(dstv); -+ size_t src_len; -+ size_t dst_len; -+ size_t len; -+ ssize_t res; -+ -+ if (src == NULL || dst == NULL) -+ break; -+ -+ src_len = src->size - srcv->off; -+ dst_len = dst->size - dstv->off; -+ len = min_size(src_len, dst_len); -+ -+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -+ if (res < 0) { -+ if (!copied) -+ return res; -+ break; -+ } -+ copied += res; -+ -+ if (!fuse_bufvec_advance(srcv, res) || -+ !fuse_bufvec_advance(dstv, res)) -+ break; -+ -+ if (res < len) -+ break; -+ } -+ -+ return copied; -+} -diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c -new file mode 100644 -index 0000000..0d268ab ---- /dev/null -+++ b/tools/virtiofsd/fuse_log.c -@@ -0,0 +1,40 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2019 Red Hat, Inc. -+ -+ Logging API. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "fuse_log.h" -+ -+#include -+#include -+ -+static void default_log_func( -+ __attribute__(( unused )) enum fuse_log_level level, -+ const char *fmt, va_list ap) -+{ -+ vfprintf(stderr, fmt, ap); -+} -+ -+static fuse_log_func_t log_func = default_log_func; -+ -+void fuse_set_log_func(fuse_log_func_t func) -+{ -+ if (!func) -+ func = default_log_func; -+ -+ log_func = func; -+} -+ -+void fuse_log(enum fuse_log_level level, const char *fmt, ...) -+{ -+ va_list ap; -+ -+ va_start(ap, fmt); -+ log_func(level, fmt, ap); -+ va_end(ap); -+} -diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c -new file mode 100644 -index 0000000..93066b9 ---- /dev/null -+++ b/tools/virtiofsd/fuse_opt.c -@@ -0,0 +1,423 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Implementation of option parsing routines (dealing with `struct -+ fuse_args`). -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_opt.h" -+#include "fuse_misc.h" -+ -+#include -+#include -+#include -+#include -+ -+struct fuse_opt_context { -+ void *data; -+ const struct fuse_opt *opt; -+ fuse_opt_proc_t proc; -+ int argctr; -+ int argc; -+ char **argv; -+ struct fuse_args outargs; -+ char *opts; -+ int nonopt; -+}; -+ -+void fuse_opt_free_args(struct fuse_args *args) -+{ -+ if (args) { -+ if (args->argv && args->allocated) { -+ int i; -+ for (i = 0; i < args->argc; i++) -+ free(args->argv[i]); -+ free(args->argv); -+ } -+ args->argc = 0; -+ args->argv = NULL; -+ args->allocated = 0; -+ } -+} -+ -+static int alloc_failed(void) -+{ -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+} -+ -+int fuse_opt_add_arg(struct fuse_args *args, const char *arg) -+{ -+ char **newargv; -+ char *newarg; -+ -+ assert(!args->argv || args->allocated); -+ -+ newarg = strdup(arg); -+ if (!newarg) -+ return alloc_failed(); -+ -+ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); -+ if (!newargv) { -+ free(newarg); -+ return alloc_failed(); -+ } -+ -+ args->argv = newargv; -+ args->allocated = 1; -+ args->argv[args->argc++] = newarg; -+ args->argv[args->argc] = NULL; -+ return 0; -+} -+ -+static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, -+ const char *arg) -+{ -+ assert(pos <= args->argc); -+ if (fuse_opt_add_arg(args, arg) == -1) -+ return -1; -+ -+ if (pos != args->argc - 1) { -+ char *newarg = args->argv[args->argc - 1]; -+ memmove(&args->argv[pos + 1], &args->argv[pos], -+ sizeof(char *) * (args->argc - pos - 1)); -+ args->argv[pos] = newarg; -+ } -+ return 0; -+} -+ -+int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) -+{ -+ return fuse_opt_insert_arg_common(args, pos, arg); -+} -+ -+static int next_arg(struct fuse_opt_context *ctx, const char *opt) -+{ -+ if (ctx->argctr + 1 >= ctx->argc) { -+ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); -+ return -1; -+ } -+ ctx->argctr++; -+ return 0; -+} -+ -+static int add_arg(struct fuse_opt_context *ctx, const char *arg) -+{ -+ return fuse_opt_add_arg(&ctx->outargs, arg); -+} -+ -+static int add_opt_common(char **opts, const char *opt, int esc) -+{ -+ unsigned oldlen = *opts ? strlen(*opts) : 0; -+ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); -+ -+ if (!d) -+ return alloc_failed(); -+ -+ *opts = d; -+ if (oldlen) { -+ d += oldlen; -+ *d++ = ','; -+ } -+ -+ for (; *opt; opt++) { -+ if (esc && (*opt == ',' || *opt == '\\')) -+ *d++ = '\\'; -+ *d++ = *opt; -+ } -+ *d = '\0'; -+ -+ return 0; -+} -+ -+int fuse_opt_add_opt(char **opts, const char *opt) -+{ -+ return add_opt_common(opts, opt, 0); -+} -+ -+int fuse_opt_add_opt_escaped(char **opts, const char *opt) -+{ -+ return add_opt_common(opts, opt, 1); -+} -+ -+static int add_opt(struct fuse_opt_context *ctx, const char *opt) -+{ -+ return add_opt_common(&ctx->opts, opt, 1); -+} -+ -+static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, -+ int iso) -+{ -+ if (key == FUSE_OPT_KEY_DISCARD) -+ return 0; -+ -+ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { -+ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); -+ if (res == -1 || !res) -+ return res; -+ } -+ if (iso) -+ return add_opt(ctx, arg); -+ else -+ return add_arg(ctx, arg); -+} -+ -+static int match_template(const char *t, const char *arg, unsigned *sepp) -+{ -+ int arglen = strlen(arg); -+ const char *sep = strchr(t, '='); -+ sep = sep ? sep : strchr(t, ' '); -+ if (sep && (!sep[1] || sep[1] == '%')) { -+ int tlen = sep - t; -+ if (sep[0] == '=') -+ tlen ++; -+ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { -+ *sepp = sep - t; -+ return 1; -+ } -+ } -+ if (strcmp(t, arg) == 0) { -+ *sepp = 0; -+ return 1; -+ } -+ return 0; -+} -+ -+static const struct fuse_opt *find_opt(const struct fuse_opt *opt, -+ const char *arg, unsigned *sepp) -+{ -+ for (; opt && opt->templ; opt++) -+ if (match_template(opt->templ, arg, sepp)) -+ return opt; -+ return NULL; -+} -+ -+int fuse_opt_match(const struct fuse_opt *opts, const char *opt) -+{ -+ unsigned dummy; -+ return find_opt(opts, opt, &dummy) ? 1 : 0; -+} -+ -+static int process_opt_param(void *var, const char *format, const char *param, -+ const char *arg) -+{ -+ assert(format[0] == '%'); -+ if (format[1] == 's') { -+ char **s = var; -+ char *copy = strdup(param); -+ if (!copy) -+ return alloc_failed(); -+ -+ free(*s); -+ *s = copy; -+ } else { -+ if (sscanf(param, format, var) != 1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); -+ return -1; -+ } -+ } -+ return 0; -+} -+ -+static int process_opt(struct fuse_opt_context *ctx, -+ const struct fuse_opt *opt, unsigned sep, -+ const char *arg, int iso) -+{ -+ if (opt->offset == -1U) { -+ if (call_proc(ctx, arg, opt->value, iso) == -1) -+ return -1; -+ } else { -+ void *var = (char *)ctx->data + opt->offset; -+ if (sep && opt->templ[sep + 1]) { -+ const char *param = arg + sep; -+ if (opt->templ[sep] == '=') -+ param ++; -+ if (process_opt_param(var, opt->templ + sep + 1, -+ param, arg) == -1) -+ return -1; -+ } else -+ *(int *)var = opt->value; -+ } -+ return 0; -+} -+ -+static int process_opt_sep_arg(struct fuse_opt_context *ctx, -+ const struct fuse_opt *opt, unsigned sep, -+ const char *arg, int iso) -+{ -+ int res; -+ char *newarg; -+ char *param; -+ -+ if (next_arg(ctx, arg) == -1) -+ return -1; -+ -+ param = ctx->argv[ctx->argctr]; -+ newarg = malloc(sep + strlen(param) + 1); -+ if (!newarg) -+ return alloc_failed(); -+ -+ memcpy(newarg, arg, sep); -+ strcpy(newarg + sep, param); -+ res = process_opt(ctx, opt, sep, newarg, iso); -+ free(newarg); -+ -+ return res; -+} -+ -+static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) -+{ -+ unsigned sep; -+ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); -+ if (opt) { -+ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { -+ int res; -+ if (sep && opt->templ[sep] == ' ' && !arg[sep]) -+ res = process_opt_sep_arg(ctx, opt, sep, arg, -+ iso); -+ else -+ res = process_opt(ctx, opt, sep, arg, iso); -+ if (res == -1) -+ return -1; -+ } -+ return 0; -+ } else -+ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); -+} -+ -+static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) -+{ -+ char *s = opts; -+ char *d = s; -+ int end = 0; -+ -+ while (!end) { -+ if (*s == '\0') -+ end = 1; -+ if (*s == ',' || end) { -+ int res; -+ -+ *d = '\0'; -+ res = process_gopt(ctx, opts, 1); -+ if (res == -1) -+ return -1; -+ d = opts; -+ } else { -+ if (s[0] == '\\' && s[1] != '\0') { -+ s++; -+ if (s[0] >= '0' && s[0] <= '3' && -+ s[1] >= '0' && s[1] <= '7' && -+ s[2] >= '0' && s[2] <= '7') { -+ *d++ = (s[0] - '0') * 0100 + -+ (s[1] - '0') * 0010 + -+ (s[2] - '0'); -+ s += 2; -+ } else { -+ *d++ = *s; -+ } -+ } else { -+ *d++ = *s; -+ } -+ } -+ s++; -+ } -+ -+ return 0; -+} -+ -+static int process_option_group(struct fuse_opt_context *ctx, const char *opts) -+{ -+ int res; -+ char *copy = strdup(opts); -+ -+ if (!copy) { -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+ } -+ res = process_real_option_group(ctx, copy); -+ free(copy); -+ return res; -+} -+ -+static int process_one(struct fuse_opt_context *ctx, const char *arg) -+{ -+ if (ctx->nonopt || arg[0] != '-') -+ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); -+ else if (arg[1] == 'o') { -+ if (arg[2]) -+ return process_option_group(ctx, arg + 2); -+ else { -+ if (next_arg(ctx, arg) == -1) -+ return -1; -+ -+ return process_option_group(ctx, -+ ctx->argv[ctx->argctr]); -+ } -+ } else if (arg[1] == '-' && !arg[2]) { -+ if (add_arg(ctx, arg) == -1) -+ return -1; -+ ctx->nonopt = ctx->outargs.argc; -+ return 0; -+ } else -+ return process_gopt(ctx, arg, 0); -+} -+ -+static int opt_parse(struct fuse_opt_context *ctx) -+{ -+ if (ctx->argc) { -+ if (add_arg(ctx, ctx->argv[0]) == -1) -+ return -1; -+ } -+ -+ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) -+ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) -+ return -1; -+ -+ if (ctx->opts) { -+ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || -+ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) -+ return -1; -+ } -+ -+ /* If option separator ("--") is the last argument, remove it */ -+ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && -+ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { -+ free(ctx->outargs.argv[ctx->outargs.argc - 1]); -+ ctx->outargs.argv[--ctx->outargs.argc] = NULL; -+ } -+ -+ return 0; -+} -+ -+int fuse_opt_parse(struct fuse_args *args, void *data, -+ const struct fuse_opt opts[], fuse_opt_proc_t proc) -+{ -+ int res; -+ struct fuse_opt_context ctx = { -+ .data = data, -+ .opt = opts, -+ .proc = proc, -+ }; -+ -+ if (!args || !args->argv || !args->argc) -+ return 0; -+ -+ ctx.argc = args->argc; -+ ctx.argv = args->argv; -+ -+ res = opt_parse(&ctx); -+ if (res != -1) { -+ struct fuse_args tmp = *args; -+ *args = ctx.outargs; -+ ctx.outargs = tmp; -+ } -+ free(ctx.opts); -+ fuse_opt_free_args(&ctx.outargs); -+ return res; -+} -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -new file mode 100644 -index 0000000..4271947 ---- /dev/null -+++ b/tools/virtiofsd/fuse_signals.c -@@ -0,0 +1,91 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Utility functions for setting signal handlers. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "config.h" -+#include "fuse_lowlevel.h" -+#include "fuse_i.h" -+ -+#include -+#include -+#include -+#include -+ -+static struct fuse_session *fuse_instance; -+ -+static void exit_handler(int sig) -+{ -+ if (fuse_instance) { -+ fuse_session_exit(fuse_instance); -+ if(sig <= 0) { -+ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); -+ abort(); -+ } -+ fuse_instance->error = sig; -+ } -+} -+ -+static void do_nothing(int sig) -+{ -+ (void) sig; -+} -+ -+static int set_one_signal_handler(int sig, void (*handler)(int), int remove) -+{ -+ struct sigaction sa; -+ struct sigaction old_sa; -+ -+ memset(&sa, 0, sizeof(struct sigaction)); -+ sa.sa_handler = remove ? SIG_DFL : handler; -+ sigemptyset(&(sa.sa_mask)); -+ sa.sa_flags = 0; -+ -+ if (sigaction(sig, NULL, &old_sa) == -1) { -+ perror("fuse: cannot get old signal handler"); -+ return -1; -+ } -+ -+ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && -+ sigaction(sig, &sa, NULL) == -1) { -+ perror("fuse: cannot set signal handler"); -+ return -1; -+ } -+ return 0; -+} -+ -+int fuse_set_signal_handlers(struct fuse_session *se) -+{ -+ /* If we used SIG_IGN instead of the do_nothing function, -+ then we would be unable to tell if we set SIG_IGN (and -+ thus should reset to SIG_DFL in fuse_remove_signal_handlers) -+ or if it was already set to SIG_IGN (and should be left -+ untouched. */ -+ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) -+ return -1; -+ -+ fuse_instance = se; -+ return 0; -+} -+ -+void fuse_remove_signal_handlers(struct fuse_session *se) -+{ -+ if (fuse_instance != se) -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: fuse_remove_signal_handlers: unknown session\n"); -+ else -+ fuse_instance = NULL; -+ -+ set_one_signal_handler(SIGHUP, exit_handler, 1); -+ set_one_signal_handler(SIGINT, exit_handler, 1); -+ set_one_signal_handler(SIGTERM, exit_handler, 1); -+ set_one_signal_handler(SIGPIPE, do_nothing, 1); -+} -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -new file mode 100644 -index 0000000..64ff7ad ---- /dev/null -+++ b/tools/virtiofsd/helper.c -@@ -0,0 +1,440 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Helper functions to create (simple) standalone programs. With the -+ aid of these functions it should be possible to create full FUSE -+ file system by implementing nothing but the request handlers. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_misc.h" -+#include "fuse_opt.h" -+#include "fuse_lowlevel.h" -+#include "mount_util.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define FUSE_HELPER_OPT(t, p) \ -+ { t, offsetof(struct fuse_cmdline_opts, p), 1 } -+ -+static const struct fuse_opt fuse_helper_opts[] = { -+ FUSE_HELPER_OPT("-h", show_help), -+ FUSE_HELPER_OPT("--help", show_help), -+ FUSE_HELPER_OPT("-V", show_version), -+ FUSE_HELPER_OPT("--version", show_version), -+ FUSE_HELPER_OPT("-d", debug), -+ FUSE_HELPER_OPT("debug", debug), -+ FUSE_HELPER_OPT("-d", foreground), -+ FUSE_HELPER_OPT("debug", foreground), -+ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), -+ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("-f", foreground), -+ FUSE_HELPER_OPT("-s", singlethread), -+ FUSE_HELPER_OPT("fsname=", nodefault_subtype), -+ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), -+#ifndef __FreeBSD__ -+ FUSE_HELPER_OPT("subtype=", nodefault_subtype), -+ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), -+#endif -+ FUSE_HELPER_OPT("clone_fd", clone_fd), -+ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_OPT_END -+}; -+ -+struct fuse_conn_info_opts { -+ int atomic_o_trunc; -+ int no_remote_posix_lock; -+ int no_remote_flock; -+ int splice_write; -+ int splice_move; -+ int splice_read; -+ int no_splice_write; -+ int no_splice_move; -+ int no_splice_read; -+ int auto_inval_data; -+ int no_auto_inval_data; -+ int no_readdirplus; -+ int no_readdirplus_auto; -+ int async_dio; -+ int no_async_dio; -+ int writeback_cache; -+ int no_writeback_cache; -+ int async_read; -+ int sync_read; -+ unsigned max_write; -+ unsigned max_readahead; -+ unsigned max_background; -+ unsigned congestion_threshold; -+ unsigned time_gran; -+ int set_max_write; -+ int set_max_readahead; -+ int set_max_background; -+ int set_congestion_threshold; -+ int set_time_gran; -+}; -+ -+#define CONN_OPTION(t, p, v) \ -+ { t, offsetof(struct fuse_conn_info_opts, p), v } -+static const struct fuse_opt conn_info_opt_spec[] = { -+ CONN_OPTION("max_write=%u", max_write, 0), -+ CONN_OPTION("max_write=", set_max_write, 1), -+ CONN_OPTION("max_readahead=%u", max_readahead, 0), -+ CONN_OPTION("max_readahead=", set_max_readahead, 1), -+ CONN_OPTION("max_background=%u", max_background, 0), -+ CONN_OPTION("max_background=", set_max_background, 1), -+ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), -+ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), -+ CONN_OPTION("sync_read", sync_read, 1), -+ CONN_OPTION("async_read", async_read, 1), -+ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), -+ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("no_remote_lock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_flock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("splice_write", splice_write, 1), -+ CONN_OPTION("no_splice_write", no_splice_write, 1), -+ CONN_OPTION("splice_move", splice_move, 1), -+ CONN_OPTION("no_splice_move", no_splice_move, 1), -+ CONN_OPTION("splice_read", splice_read, 1), -+ CONN_OPTION("no_splice_read", no_splice_read, 1), -+ CONN_OPTION("auto_inval_data", auto_inval_data, 1), -+ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), -+ CONN_OPTION("readdirplus=no", no_readdirplus, 1), -+ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), -+ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), -+ CONN_OPTION("async_dio", async_dio, 1), -+ CONN_OPTION("no_async_dio", no_async_dio, 1), -+ CONN_OPTION("writeback_cache", writeback_cache, 1), -+ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), -+ CONN_OPTION("time_gran=%u", time_gran, 0), -+ CONN_OPTION("time_gran=", set_time_gran, 1), -+ FUSE_OPT_END -+}; -+ -+ -+void fuse_cmdline_help(void) -+{ -+ printf(" -h --help print help\n" -+ " -V --version print version\n" -+ " -d -o debug enable debug output (implies -f)\n" -+ " -f foreground operation\n" -+ " -s disable multi-threaded operation\n" -+ " -o clone_fd use separate fuse device fd for each thread\n" -+ " (may improve performance)\n" -+ " -o max_idle_threads the maximum number of idle worker threads\n" -+ " allowed (default: 10)\n"); -+} -+ -+static int fuse_helper_opt_proc(void *data, const char *arg, int key, -+ struct fuse_args *outargs) -+{ -+ (void) outargs; -+ struct fuse_cmdline_opts *opts = data; -+ -+ switch (key) { -+ case FUSE_OPT_KEY_NONOPT: -+ if (!opts->mountpoint) { -+ if (fuse_mnt_parse_fuse_fd(arg) != -1) { -+ return fuse_opt_add_opt(&opts->mountpoint, arg); -+ } -+ -+ char mountpoint[PATH_MAX] = ""; -+ if (realpath(arg, mountpoint) == NULL) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: bad mount point `%s': %s\n", -+ arg, strerror(errno)); -+ return -1; -+ } -+ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -+ } else { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -+ return -1; -+ } -+ -+ default: -+ /* Pass through unknown options */ -+ return 1; -+ } -+} -+ -+/* Under FreeBSD, there is no subtype option so this -+ function actually sets the fsname */ -+static int add_default_subtype(const char *progname, struct fuse_args *args) -+{ -+ int res; -+ char *subtype_opt; -+ -+ const char *basename = strrchr(progname, '/'); -+ if (basename == NULL) -+ basename = progname; -+ else if (basename[1] != '\0') -+ basename++; -+ -+ subtype_opt = (char *) malloc(strlen(basename) + 64); -+ if (subtype_opt == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+ } -+#ifdef __FreeBSD__ -+ sprintf(subtype_opt, "-ofsname=%s", basename); -+#else -+ sprintf(subtype_opt, "-osubtype=%s", basename); -+#endif -+ res = fuse_opt_add_arg(args, subtype_opt); -+ free(subtype_opt); -+ return res; -+} -+ -+int fuse_parse_cmdline(struct fuse_args *args, -+ struct fuse_cmdline_opts *opts) -+{ -+ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); -+ -+ opts->max_idle_threads = 10; -+ -+ if (fuse_opt_parse(args, opts, fuse_helper_opts, -+ fuse_helper_opt_proc) == -1) -+ return -1; -+ -+ /* *Linux*: if neither -o subtype nor -o fsname are specified, -+ set subtype to program's basename. -+ *FreeBSD*: if fsname is not specified, set to program's -+ basename. */ -+ if (!opts->nodefault_subtype) -+ if (add_default_subtype(args->argv[0], args) == -1) -+ return -1; -+ -+ return 0; -+} -+ -+ -+int fuse_daemonize(int foreground) -+{ -+ if (!foreground) { -+ int nullfd; -+ int waiter[2]; -+ char completed; -+ -+ if (pipe(waiter)) { -+ perror("fuse_daemonize: pipe"); -+ return -1; -+ } -+ -+ /* -+ * demonize current process by forking it and killing the -+ * parent. This makes current process as a child of 'init'. -+ */ -+ switch(fork()) { -+ case -1: -+ perror("fuse_daemonize: fork"); -+ return -1; -+ case 0: -+ break; -+ default: -+ (void) read(waiter[0], &completed, sizeof(completed)); -+ _exit(0); -+ } -+ -+ if (setsid() == -1) { -+ perror("fuse_daemonize: setsid"); -+ return -1; -+ } -+ -+ (void) chdir("/"); -+ -+ nullfd = open("/dev/null", O_RDWR, 0); -+ if (nullfd != -1) { -+ (void) dup2(nullfd, 0); -+ (void) dup2(nullfd, 1); -+ (void) dup2(nullfd, 2); -+ if (nullfd > 2) -+ close(nullfd); -+ } -+ -+ /* Propagate completion of daemon initialization */ -+ completed = 1; -+ (void) write(waiter[1], &completed, sizeof(completed)); -+ close(waiter[0]); -+ close(waiter[1]); -+ } else { -+ (void) chdir("/"); -+ } -+ return 0; -+} -+ -+int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -+ size_t op_size, void *user_data) -+{ -+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -+ struct fuse *fuse; -+ struct fuse_cmdline_opts opts; -+ int res; -+ -+ if (fuse_parse_cmdline(&args, &opts) != 0) -+ return 1; -+ -+ if (opts.show_version) { -+ printf("FUSE library version %s\n", PACKAGE_VERSION); -+ fuse_lowlevel_version(); -+ res = 0; -+ goto out1; -+ } -+ -+ if (opts.show_help) { -+ if(args.argv[0][0] != '\0') -+ printf("usage: %s [options] \n\n", -+ args.argv[0]); -+ printf("FUSE options:\n"); -+ fuse_cmdline_help(); -+ fuse_lib_help(&args); -+ res = 0; -+ goto out1; -+ } -+ -+ if (!opts.show_help && -+ !opts.mountpoint) { -+ fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); -+ res = 2; -+ goto out1; -+ } -+ -+ -+ fuse = fuse_new_31(&args, op, op_size, user_data); -+ if (fuse == NULL) { -+ res = 3; -+ goto out1; -+ } -+ -+ if (fuse_mount(fuse,opts.mountpoint) != 0) { -+ res = 4; -+ goto out2; -+ } -+ -+ if (fuse_daemonize(opts.foreground) != 0) { -+ res = 5; -+ goto out3; -+ } -+ -+ struct fuse_session *se = fuse_get_session(fuse); -+ if (fuse_set_signal_handlers(se) != 0) { -+ res = 6; -+ goto out3; -+ } -+ -+ if (opts.singlethread) -+ res = fuse_loop(fuse); -+ else { -+ struct fuse_loop_config loop_config; -+ loop_config.clone_fd = opts.clone_fd; -+ loop_config.max_idle_threads = opts.max_idle_threads; -+ res = fuse_loop_mt_32(fuse, &loop_config); -+ } -+ if (res) -+ res = 7; -+ -+ fuse_remove_signal_handlers(se); -+out3: -+ fuse_unmount(fuse); -+out2: -+ fuse_destroy(fuse); -+out1: -+ free(opts.mountpoint); -+ fuse_opt_free_args(&args); -+ return res; -+} -+ -+ -+void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -+ struct fuse_conn_info *conn) -+{ -+ if(opts->set_max_write) -+ conn->max_write = opts->max_write; -+ if(opts->set_max_background) -+ conn->max_background = opts->max_background; -+ if(opts->set_congestion_threshold) -+ conn->congestion_threshold = opts->congestion_threshold; -+ if(opts->set_time_gran) -+ conn->time_gran = opts->time_gran; -+ if(opts->set_max_readahead) -+ conn->max_readahead = opts->max_readahead; -+ -+#define LL_ENABLE(cond,cap) \ -+ if (cond) conn->want |= (cap) -+#define LL_DISABLE(cond,cap) \ -+ if (cond) conn->want &= ~(cap) -+ -+ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); -+ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); -+ -+ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); -+ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); -+ -+ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); -+ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); -+ -+ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ -+ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); -+ -+ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); -+ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); -+ -+ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ -+ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); -+ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); -+ -+ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); -+ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); -+} -+ -+struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) -+{ -+ struct fuse_conn_info_opts *opts; -+ -+ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); -+ if(opts == NULL) { -+ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); -+ return NULL; -+ } -+ if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { -+ free(opts); -+ return NULL; -+ } -+ return opts; -+} -+ -+int fuse_open_channel(const char *mountpoint, const char* options) -+{ -+ struct mount_opts *opts = NULL; -+ int fd = -1; -+ const char *argv[] = { "", "-o", options }; -+ int argc = sizeof(argv) / sizeof(argv[0]); -+ struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); -+ -+ opts = parse_mount_opts(&args); -+ if (opts == NULL) -+ return -1; -+ -+ fd = fuse_kern_mount(mountpoint, opts); -+ destroy_mount_opts(opts); -+ -+ return fd; -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-fuse_lowlevel.c.patch b/kvm-virtiofsd-Add-fuse_lowlevel.c.patch deleted file mode 100644 index 1318fef..0000000 --- a/kvm-virtiofsd-Add-fuse_lowlevel.c.patch +++ /dev/null @@ -1,3172 +0,0 @@ -From f6c6830f772e8060255323d2a458cd0e774d9654 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:42 +0100 -Subject: [PATCH 011/116] virtiofsd: Add fuse_lowlevel.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-8-dgilbert@redhat.com> -Patchwork-id: 93456 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 007/112] virtiofsd: Add fuse_lowlevel.c -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -fuse_lowlevel is one of the largest files from the library -and does most of the work. Add it separately to keep the diff -sizes small. -Again this is from upstream fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2de121f01e37e2fe98a4362f4abf7c0848697f76) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 3129 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 3129 insertions(+) - create mode 100644 tools/virtiofsd/fuse_lowlevel.c - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -new file mode 100644 -index 0000000..f2d7038 ---- /dev/null -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -0,0 +1,3129 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ Implementation of (most of) the low-level FUSE API. The session loop -+ functions are implemented in separate files. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#define _GNU_SOURCE -+ -+#include "config.h" -+#include "fuse_i.h" -+#include "fuse_kernel.h" -+#include "fuse_opt.h" -+#include "fuse_misc.h" -+#include "mount_util.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifndef F_LINUX_SPECIFIC_BASE -+#define F_LINUX_SPECIFIC_BASE 1024 -+#endif -+#ifndef F_SETPIPE_SZ -+#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) -+#endif -+ -+ -+#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) -+#define OFFSET_MAX 0x7fffffffffffffffLL -+ -+#define container_of(ptr, type, member) ({ \ -+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -+ (type *)( (char *)__mptr - offsetof(type,member) );}) -+ -+struct fuse_pollhandle { -+ uint64_t kh; -+ struct fuse_session *se; -+}; -+ -+static size_t pagesize; -+ -+static __attribute__((constructor)) void fuse_ll_init_pagesize(void) -+{ -+ pagesize = getpagesize(); -+} -+ -+static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) -+{ -+ attr->ino = stbuf->st_ino; -+ attr->mode = stbuf->st_mode; -+ attr->nlink = stbuf->st_nlink; -+ attr->uid = stbuf->st_uid; -+ attr->gid = stbuf->st_gid; -+ attr->rdev = stbuf->st_rdev; -+ attr->size = stbuf->st_size; -+ attr->blksize = stbuf->st_blksize; -+ attr->blocks = stbuf->st_blocks; -+ attr->atime = stbuf->st_atime; -+ attr->mtime = stbuf->st_mtime; -+ attr->ctime = stbuf->st_ctime; -+ attr->atimensec = ST_ATIM_NSEC(stbuf); -+ attr->mtimensec = ST_MTIM_NSEC(stbuf); -+ attr->ctimensec = ST_CTIM_NSEC(stbuf); -+} -+ -+static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) -+{ -+ stbuf->st_mode = attr->mode; -+ stbuf->st_uid = attr->uid; -+ stbuf->st_gid = attr->gid; -+ stbuf->st_size = attr->size; -+ stbuf->st_atime = attr->atime; -+ stbuf->st_mtime = attr->mtime; -+ stbuf->st_ctime = attr->ctime; -+ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); -+ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); -+ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); -+} -+ -+static size_t iov_length(const struct iovec *iov, size_t count) -+{ -+ size_t seg; -+ size_t ret = 0; -+ -+ for (seg = 0; seg < count; seg++) -+ ret += iov[seg].iov_len; -+ return ret; -+} -+ -+static void list_init_req(struct fuse_req *req) -+{ -+ req->next = req; -+ req->prev = req; -+} -+ -+static void list_del_req(struct fuse_req *req) -+{ -+ struct fuse_req *prev = req->prev; -+ struct fuse_req *next = req->next; -+ prev->next = next; -+ next->prev = prev; -+} -+ -+static void list_add_req(struct fuse_req *req, struct fuse_req *next) -+{ -+ struct fuse_req *prev = next->prev; -+ req->next = next; -+ req->prev = prev; -+ prev->next = req; -+ next->prev = req; -+} -+ -+static void destroy_req(fuse_req_t req) -+{ -+ pthread_mutex_destroy(&req->lock); -+ free(req); -+} -+ -+void fuse_free_req(fuse_req_t req) -+{ -+ int ctr; -+ struct fuse_session *se = req->se; -+ -+ pthread_mutex_lock(&se->lock); -+ req->u.ni.func = NULL; -+ req->u.ni.data = NULL; -+ list_del_req(req); -+ ctr = --req->ctr; -+ fuse_chan_put(req->ch); -+ req->ch = NULL; -+ pthread_mutex_unlock(&se->lock); -+ if (!ctr) -+ destroy_req(req); -+} -+ -+static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) -+{ -+ struct fuse_req *req; -+ -+ req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); -+ if (req == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); -+ } else { -+ req->se = se; -+ req->ctr = 1; -+ list_init_req(req); -+ fuse_mutex_init(&req->lock); -+ } -+ -+ return req; -+} -+ -+/* Send data. If *ch* is NULL, send via session master fd */ -+static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count) -+{ -+ struct fuse_out_header *out = iov[0].iov_base; -+ -+ out->len = iov_length(iov, count); -+ if (se->debug) { -+ if (out->unique == 0) { -+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", -+ out->error, out->len); -+ } else if (out->error) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, error: %i (%s), outsize: %i\n", -+ (unsigned long long) out->unique, out->error, -+ strerror(-out->error), out->len); -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, success, outsize: %i\n", -+ (unsigned long long) out->unique, out->len); -+ } -+ } -+ -+ ssize_t res = writev(ch ? ch->fd : se->fd, -+ iov, count); -+ int err = errno; -+ -+ if (res == -1) { -+ assert(se != NULL); -+ -+ /* ENOENT means the operation was interrupted */ -+ if (!fuse_session_exited(se) && err != ENOENT) -+ perror("fuse: writing device"); -+ return -err; -+ } -+ -+ return 0; -+} -+ -+ -+int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -+ int count) -+{ -+ struct fuse_out_header out; -+ -+ if (error <= -1000 || error > 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -+ error = -ERANGE; -+ } -+ -+ out.unique = req->unique; -+ out.error = error; -+ -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); -+ -+ return fuse_send_msg(req->se, req->ch, iov, count); -+} -+ -+static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, -+ int count) -+{ -+ int res; -+ -+ res = fuse_send_reply_iov_nofree(req, error, iov, count); -+ fuse_free_req(req); -+ return res; -+} -+ -+static int send_reply(fuse_req_t req, int error, const void *arg, -+ size_t argsize) -+{ -+ struct iovec iov[2]; -+ int count = 1; -+ if (argsize) { -+ iov[1].iov_base = (void *) arg; -+ iov[1].iov_len = argsize; -+ count++; -+ } -+ return send_reply_iov(req, error, iov, count); -+} -+ -+int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) -+{ -+ int res; -+ struct iovec *padded_iov; -+ -+ padded_iov = malloc((count + 1) * sizeof(struct iovec)); -+ if (padded_iov == NULL) -+ return fuse_reply_err(req, ENOMEM); -+ -+ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); -+ count++; -+ -+ res = send_reply_iov(req, 0, padded_iov, count); -+ free(padded_iov); -+ -+ return res; -+} -+ -+ -+/* `buf` is allowed to be empty so that the proper size may be -+ allocated by the caller */ -+size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, const struct stat *stbuf, off_t off) -+{ -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ struct fuse_dirent *dirent; -+ -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ -+ if ((buf == NULL) || (entlen_padded > bufsize)) -+ return entlen_padded; -+ -+ dirent = (struct fuse_dirent*) buf; -+ dirent->ino = stbuf->st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ -+ return entlen_padded; -+} -+ -+static void convert_statfs(const struct statvfs *stbuf, -+ struct fuse_kstatfs *kstatfs) -+{ -+ kstatfs->bsize = stbuf->f_bsize; -+ kstatfs->frsize = stbuf->f_frsize; -+ kstatfs->blocks = stbuf->f_blocks; -+ kstatfs->bfree = stbuf->f_bfree; -+ kstatfs->bavail = stbuf->f_bavail; -+ kstatfs->files = stbuf->f_files; -+ kstatfs->ffree = stbuf->f_ffree; -+ kstatfs->namelen = stbuf->f_namemax; -+} -+ -+static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) -+{ -+ return send_reply(req, 0, arg, argsize); -+} -+ -+int fuse_reply_err(fuse_req_t req, int err) -+{ -+ return send_reply(req, -err, NULL, 0); -+} -+ -+void fuse_reply_none(fuse_req_t req) -+{ -+ fuse_free_req(req); -+} -+ -+static unsigned long calc_timeout_sec(double t) -+{ -+ if (t > (double) ULONG_MAX) -+ return ULONG_MAX; -+ else if (t < 0.0) -+ return 0; -+ else -+ return (unsigned long) t; -+} -+ -+static unsigned int calc_timeout_nsec(double t) -+{ -+ double f = t - (double) calc_timeout_sec(t); -+ if (f < 0.0) -+ return 0; -+ else if (f >= 0.999999999) -+ return 999999999; -+ else -+ return (unsigned int) (f * 1.0e9); -+} -+ -+static void fill_entry(struct fuse_entry_out *arg, -+ const struct fuse_entry_param *e) -+{ -+ arg->nodeid = e->ino; -+ arg->generation = e->generation; -+ arg->entry_valid = calc_timeout_sec(e->entry_timeout); -+ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -+ arg->attr_valid = calc_timeout_sec(e->attr_timeout); -+ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -+ convert_stat(&e->attr, &arg->attr); -+} -+ -+/* `buf` is allowed to be empty so that the proper size may be -+ allocated by the caller */ -+size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, -+ const struct fuse_entry_param *e, off_t off) -+{ -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ if ((buf == NULL) || (entlen_padded > bufsize)) -+ return entlen_padded; -+ -+ struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; -+ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); -+ fill_entry(&dp->entry_out, e); -+ -+ struct fuse_dirent *dirent = &dp->dirent; -+ dirent->ino = e->attr.st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ -+ return entlen_padded; -+} -+ -+static void fill_open(struct fuse_open_out *arg, -+ const struct fuse_file_info *f) -+{ -+ arg->fh = f->fh; -+ if (f->direct_io) -+ arg->open_flags |= FOPEN_DIRECT_IO; -+ if (f->keep_cache) -+ arg->open_flags |= FOPEN_KEEP_CACHE; -+ if (f->cache_readdir) -+ arg->open_flags |= FOPEN_CACHE_DIR; -+ if (f->nonseekable) -+ arg->open_flags |= FOPEN_NONSEEKABLE; -+} -+ -+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) -+{ -+ struct fuse_entry_out arg; -+ size_t size = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); -+ -+ /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -+ negative entry */ -+ if (!e->ino && req->se->conn.proto_minor < 4) -+ return fuse_reply_err(req, ENOENT); -+ -+ memset(&arg, 0, sizeof(arg)); -+ fill_entry(&arg, e); -+ return send_reply_ok(req, &arg, size); -+} -+ -+int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -+ const struct fuse_file_info *f) -+{ -+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -+ size_t entrysize = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); -+ struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; -+ struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); -+ -+ memset(buf, 0, sizeof(buf)); -+ fill_entry(earg, e); -+ fill_open(oarg, f); -+ return send_reply_ok(req, buf, -+ entrysize + sizeof(struct fuse_open_out)); -+} -+ -+int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -+ double attr_timeout) -+{ -+ struct fuse_attr_out arg; -+ size_t size = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.attr_valid = calc_timeout_sec(attr_timeout); -+ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); -+ convert_stat(attr, &arg.attr); -+ -+ return send_reply_ok(req, &arg, size); -+} -+ -+int fuse_reply_readlink(fuse_req_t req, const char *linkname) -+{ -+ return send_reply_ok(req, linkname, strlen(linkname)); -+} -+ -+int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) -+{ -+ struct fuse_open_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ fill_open(&arg, f); -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_write(fuse_req_t req, size_t count) -+{ -+ struct fuse_write_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) -+{ -+ return send_reply_ok(req, buf, size); -+} -+ -+static int fuse_send_data_iov_fallback(struct fuse_session *se, -+ struct fuse_chan *ch, -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, -+ size_t len) -+{ -+ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -+ void *mbuf; -+ int res; -+ -+ /* Optimize common case */ -+ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && -+ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -+ /* FIXME: also avoid memory copy if there are multiple buffers -+ but none of them contain an fd */ -+ -+ iov[iov_count].iov_base = buf->buf[0].mem; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ return fuse_send_msg(se, ch, iov, iov_count); -+ } -+ -+ res = posix_memalign(&mbuf, pagesize, len); -+ if (res != 0) -+ return res; -+ -+ mem_buf.buf[0].mem = mbuf; -+ res = fuse_buf_copy(&mem_buf, buf, 0); -+ if (res < 0) { -+ free(mbuf); -+ return -res; -+ } -+ len = res; -+ -+ iov[iov_count].iov_base = mbuf; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ res = fuse_send_msg(se, ch, iov, iov_count); -+ free(mbuf); -+ -+ return res; -+} -+ -+struct fuse_ll_pipe { -+ size_t size; -+ int can_grow; -+ int pipe[2]; -+}; -+ -+static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) -+{ -+ close(llp->pipe[0]); -+ close(llp->pipe[1]); -+ free(llp); -+} -+ -+#ifdef HAVE_SPLICE -+#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) -+static int fuse_pipe(int fds[2]) -+{ -+ int rv = pipe(fds); -+ -+ if (rv == -1) -+ return rv; -+ -+ if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || -+ fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || -+ fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || -+ fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { -+ close(fds[0]); -+ close(fds[1]); -+ rv = -1; -+ } -+ return rv; -+} -+#else -+static int fuse_pipe(int fds[2]) -+{ -+ return pipe2(fds, O_CLOEXEC | O_NONBLOCK); -+} -+#endif -+ -+static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) -+{ -+ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -+ if (llp == NULL) { -+ int res; -+ -+ llp = malloc(sizeof(struct fuse_ll_pipe)); -+ if (llp == NULL) -+ return NULL; -+ -+ res = fuse_pipe(llp->pipe); -+ if (res == -1) { -+ free(llp); -+ return NULL; -+ } -+ -+ /* -+ *the default size is 16 pages on linux -+ */ -+ llp->size = pagesize * 16; -+ llp->can_grow = 1; -+ -+ pthread_setspecific(se->pipe_key, llp); -+ } -+ -+ return llp; -+} -+#endif -+ -+static void fuse_ll_clear_pipe(struct fuse_session *se) -+{ -+ struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -+ if (llp) { -+ pthread_setspecific(se->pipe_key, NULL); -+ fuse_ll_pipe_free(llp); -+ } -+} -+ -+#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) -+static int read_back(int fd, char *buf, size_t len) -+{ -+ int res; -+ -+ res = read(fd, buf, len); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); -+ return -EIO; -+ } -+ if (res != len) { -+ fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); -+ return -EIO; -+ } -+ return 0; -+} -+ -+static int grow_pipe_to_max(int pipefd) -+{ -+ int max; -+ int res; -+ int maxfd; -+ char buf[32]; -+ -+ maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); -+ if (maxfd < 0) -+ return -errno; -+ -+ res = read(maxfd, buf, sizeof(buf) - 1); -+ if (res < 0) { -+ int saved_errno; -+ -+ saved_errno = errno; -+ close(maxfd); -+ return -saved_errno; -+ } -+ close(maxfd); -+ buf[res] = '\0'; -+ -+ max = atoi(buf); -+ res = fcntl(pipefd, F_SETPIPE_SZ, max); -+ if (res < 0) -+ return -errno; -+ return max; -+} -+ -+static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, unsigned int flags) -+{ -+ int res; -+ size_t len = fuse_buf_size(buf); -+ struct fuse_out_header *out = iov[0].iov_base; -+ struct fuse_ll_pipe *llp; -+ int splice_flags; -+ size_t pipesize; -+ size_t total_fd_size; -+ size_t idx; -+ size_t headerlen; -+ struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); -+ -+ if (se->broken_splice_nonblock) -+ goto fallback; -+ -+ if (flags & FUSE_BUF_NO_SPLICE) -+ goto fallback; -+ -+ total_fd_size = 0; -+ for (idx = buf->idx; idx < buf->count; idx++) { -+ if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { -+ total_fd_size = buf->buf[idx].size; -+ if (idx == buf->idx) -+ total_fd_size -= buf->off; -+ } -+ } -+ if (total_fd_size < 2 * pagesize) -+ goto fallback; -+ -+ if (se->conn.proto_minor < 14 || -+ !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) -+ goto fallback; -+ -+ llp = fuse_ll_get_pipe(se); -+ if (llp == NULL) -+ goto fallback; -+ -+ -+ headerlen = iov_length(iov, iov_count); -+ -+ out->len = headerlen + len; -+ -+ /* -+ * Heuristic for the required pipe size, does not work if the -+ * source contains less than page size fragments -+ */ -+ pipesize = pagesize * (iov_count + buf->count + 1) + out->len; -+ -+ if (llp->size < pipesize) { -+ if (llp->can_grow) { -+ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); -+ if (res == -1) { -+ res = grow_pipe_to_max(llp->pipe[0]); -+ if (res > 0) -+ llp->size = res; -+ llp->can_grow = 0; -+ goto fallback; -+ } -+ llp->size = res; -+ } -+ if (llp->size < pipesize) -+ goto fallback; -+ } -+ -+ -+ res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); -+ if (res == -1) -+ goto fallback; -+ -+ if (res != headerlen) { -+ res = -EIO; -+ fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, -+ headerlen); -+ goto clear_pipe; -+ } -+ -+ pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; -+ pipe_buf.buf[0].fd = llp->pipe[1]; -+ -+ res = fuse_buf_copy(&pipe_buf, buf, -+ FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); -+ if (res < 0) { -+ if (res == -EAGAIN || res == -EINVAL) { -+ /* -+ * Should only get EAGAIN on kernels with -+ * broken SPLICE_F_NONBLOCK support (<= -+ * 2.6.35) where this error or a short read is -+ * returned even if the pipe itself is not -+ * full -+ * -+ * EINVAL might mean that splice can't handle -+ * this combination of input and output. -+ */ -+ if (res == -EAGAIN) -+ se->broken_splice_nonblock = 1; -+ -+ pthread_setspecific(se->pipe_key, NULL); -+ fuse_ll_pipe_free(llp); -+ goto fallback; -+ } -+ res = -res; -+ goto clear_pipe; -+ } -+ -+ if (res != 0 && res < len) { -+ struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -+ void *mbuf; -+ size_t now_len = res; -+ /* -+ * For regular files a short count is either -+ * 1) due to EOF, or -+ * 2) because of broken SPLICE_F_NONBLOCK (see above) -+ * -+ * For other inputs it's possible that we overflowed -+ * the pipe because of small buffer fragments. -+ */ -+ -+ res = posix_memalign(&mbuf, pagesize, len); -+ if (res != 0) -+ goto clear_pipe; -+ -+ mem_buf.buf[0].mem = mbuf; -+ mem_buf.off = now_len; -+ res = fuse_buf_copy(&mem_buf, buf, 0); -+ if (res > 0) { -+ char *tmpbuf; -+ size_t extra_len = res; -+ /* -+ * Trickiest case: got more data. Need to get -+ * back the data from the pipe and then fall -+ * back to regular write. -+ */ -+ tmpbuf = malloc(headerlen); -+ if (tmpbuf == NULL) { -+ free(mbuf); -+ res = ENOMEM; -+ goto clear_pipe; -+ } -+ res = read_back(llp->pipe[0], tmpbuf, headerlen); -+ free(tmpbuf); -+ if (res != 0) { -+ free(mbuf); -+ goto clear_pipe; -+ } -+ res = read_back(llp->pipe[0], mbuf, now_len); -+ if (res != 0) { -+ free(mbuf); -+ goto clear_pipe; -+ } -+ len = now_len + extra_len; -+ iov[iov_count].iov_base = mbuf; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ res = fuse_send_msg(se, ch, iov, iov_count); -+ free(mbuf); -+ return res; -+ } -+ free(mbuf); -+ res = now_len; -+ } -+ len = res; -+ out->len = headerlen + len; -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, success, outsize: %i (splice)\n", -+ (unsigned long long) out->unique, out->len); -+ } -+ -+ splice_flags = 0; -+ if ((flags & FUSE_BUF_SPLICE_MOVE) && -+ (se->conn.want & FUSE_CAP_SPLICE_MOVE)) -+ splice_flags |= SPLICE_F_MOVE; -+ -+ res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, -+ NULL, out->len, splice_flags); -+ if (res == -1) { -+ res = -errno; -+ perror("fuse: splice from pipe"); -+ goto clear_pipe; -+ } -+ if (res != out->len) { -+ res = -EIO; -+ fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", -+ res, out->len); -+ goto clear_pipe; -+ } -+ return 0; -+ -+clear_pipe: -+ fuse_ll_clear_pipe(se); -+ return res; -+ -+fallback: -+ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); -+} -+#else -+static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, unsigned int flags) -+{ -+ size_t len = fuse_buf_size(buf); -+ (void) flags; -+ -+ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); -+} -+#endif -+ -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags) -+{ -+ struct iovec iov[2]; -+ struct fuse_out_header out; -+ int res; -+ -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); -+ -+ out.unique = req->unique; -+ out.error = 0; -+ -+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -+ if (res <= 0) { -+ fuse_free_req(req); -+ return res; -+ } else { -+ return fuse_reply_err(req, res); -+ } -+} -+ -+int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) -+{ -+ struct fuse_statfs_out arg; -+ size_t size = req->se->conn.proto_minor < 4 ? -+ FUSE_COMPAT_STATFS_SIZE : sizeof(arg); -+ -+ memset(&arg, 0, sizeof(arg)); -+ convert_statfs(stbuf, &arg.st); -+ -+ return send_reply_ok(req, &arg, size); -+} -+ -+int fuse_reply_xattr(fuse_req_t req, size_t count) -+{ -+ struct fuse_getxattr_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_lock(fuse_req_t req, const struct flock *lock) -+{ -+ struct fuse_lk_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.lk.type = lock->l_type; -+ if (lock->l_type != F_UNLCK) { -+ arg.lk.start = lock->l_start; -+ if (lock->l_len == 0) -+ arg.lk.end = OFFSET_MAX; -+ else -+ arg.lk.end = lock->l_start + lock->l_len - 1; -+ } -+ arg.lk.pid = lock->l_pid; -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_bmap(fuse_req_t req, uint64_t idx) -+{ -+ struct fuse_bmap_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.block = idx; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, -+ size_t count) -+{ -+ struct fuse_ioctl_iovec *fiov; -+ size_t i; -+ -+ fiov = malloc(sizeof(fiov[0]) * count); -+ if (!fiov) -+ return NULL; -+ -+ for (i = 0; i < count; i++) { -+ fiov[i].base = (uintptr_t) iov[i].iov_base; -+ fiov[i].len = iov[i].iov_len; -+ } -+ -+ return fiov; -+} -+ -+int fuse_reply_ioctl_retry(fuse_req_t req, -+ const struct iovec *in_iov, size_t in_count, -+ const struct iovec *out_iov, size_t out_count) -+{ -+ struct fuse_ioctl_out arg; -+ struct fuse_ioctl_iovec *in_fiov = NULL; -+ struct fuse_ioctl_iovec *out_fiov = NULL; -+ struct iovec iov[4]; -+ size_t count = 1; -+ int res; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.flags |= FUSE_IOCTL_RETRY; -+ arg.in_iovs = in_count; -+ arg.out_iovs = out_count; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; -+ -+ if (req->se->conn.proto_minor < 16) { -+ if (in_count) { -+ iov[count].iov_base = (void *)in_iov; -+ iov[count].iov_len = sizeof(in_iov[0]) * in_count; -+ count++; -+ } -+ -+ if (out_count) { -+ iov[count].iov_base = (void *)out_iov; -+ iov[count].iov_len = sizeof(out_iov[0]) * out_count; -+ count++; -+ } -+ } else { -+ /* Can't handle non-compat 64bit ioctls on 32bit */ -+ if (sizeof(void *) == 4 && req->ioctl_64bit) { -+ res = fuse_reply_err(req, EINVAL); -+ goto out; -+ } -+ -+ if (in_count) { -+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -+ if (!in_fiov) -+ goto enomem; -+ -+ iov[count].iov_base = (void *)in_fiov; -+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -+ count++; -+ } -+ if (out_count) { -+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -+ if (!out_fiov) -+ goto enomem; -+ -+ iov[count].iov_base = (void *)out_fiov; -+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -+ count++; -+ } -+ } -+ -+ res = send_reply_iov(req, 0, iov, count); -+out: -+ free(in_fiov); -+ free(out_fiov); -+ -+ return res; -+ -+enomem: -+ res = fuse_reply_err(req, ENOMEM); -+ goto out; -+} -+ -+int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) -+{ -+ struct fuse_ioctl_out arg; -+ struct iovec iov[3]; -+ size_t count = 1; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; -+ -+ if (size) { -+ iov[count].iov_base = (char *) buf; -+ iov[count].iov_len = size; -+ count++; -+ } -+ -+ return send_reply_iov(req, 0, iov, count); -+} -+ -+int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -+ int count) -+{ -+ struct iovec *padded_iov; -+ struct fuse_ioctl_out arg; -+ int res; -+ -+ padded_iov = malloc((count + 2) * sizeof(struct iovec)); -+ if (padded_iov == NULL) -+ return fuse_reply_err(req, ENOMEM); -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ padded_iov[1].iov_base = &arg; -+ padded_iov[1].iov_len = sizeof(arg); -+ -+ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); -+ -+ res = send_reply_iov(req, 0, padded_iov, count + 2); -+ free(padded_iov); -+ -+ return res; -+} -+ -+int fuse_reply_poll(fuse_req_t req, unsigned revents) -+{ -+ struct fuse_poll_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.revents = revents; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+int fuse_reply_lseek(fuse_req_t req, off_t off) -+{ -+ struct fuse_lseek_out arg; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.offset = off; -+ -+ return send_reply_ok(req, &arg, sizeof(arg)); -+} -+ -+static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.lookup) -+ req->se->op.lookup(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; -+ -+ if (req->se->op.forget) -+ req->se->op.forget(req, nodeid, arg->nlookup); -+ else -+ fuse_reply_none(req); -+} -+ -+static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, -+ const void *inarg) -+{ -+ struct fuse_batch_forget_in *arg = (void *) inarg; -+ struct fuse_forget_one *param = (void *) PARAM(arg); -+ unsigned int i; -+ -+ (void) nodeid; -+ -+ if (req->se->op.forget_multi) { -+ req->se->op.forget_multi(req, arg->count, -+ (struct fuse_forget_data *) param); -+ } else if (req->se->op.forget) { -+ for (i = 0; i < arg->count; i++) { -+ struct fuse_forget_one *forget = ¶m[i]; -+ struct fuse_req *dummy_req; -+ -+ dummy_req = fuse_ll_alloc_req(req->se); -+ if (dummy_req == NULL) -+ break; -+ -+ dummy_req->unique = req->unique; -+ dummy_req->ctx = req->ctx; -+ dummy_req->ch = NULL; -+ -+ req->se->op.forget(dummy_req, forget->nodeid, -+ forget->nlookup); -+ } -+ fuse_reply_none(req); -+ } else { -+ fuse_reply_none(req); -+ } -+} -+ -+static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_file_info *fip = NULL; -+ struct fuse_file_info fi; -+ -+ if (req->se->conn.proto_minor >= 9) { -+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; -+ -+ if (arg->getattr_flags & FUSE_GETATTR_FH) { -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fip = &fi; -+ } -+ } -+ -+ if (req->se->op.getattr) -+ req->se->op.getattr(req, nodeid, fip); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; -+ -+ if (req->se->op.setattr) { -+ struct fuse_file_info *fi = NULL; -+ struct fuse_file_info fi_store; -+ struct stat stbuf; -+ memset(&stbuf, 0, sizeof(stbuf)); -+ convert_attr(arg, &stbuf); -+ if (arg->valid & FATTR_FH) { -+ arg->valid &= ~FATTR_FH; -+ memset(&fi_store, 0, sizeof(fi_store)); -+ fi = &fi_store; -+ fi->fh = arg->fh; -+ } -+ arg->valid &= -+ FUSE_SET_ATTR_MODE | -+ FUSE_SET_ATTR_UID | -+ FUSE_SET_ATTR_GID | -+ FUSE_SET_ATTR_SIZE | -+ FUSE_SET_ATTR_ATIME | -+ FUSE_SET_ATTR_MTIME | -+ FUSE_SET_ATTR_ATIME_NOW | -+ FUSE_SET_ATTR_MTIME_NOW | -+ FUSE_SET_ATTR_CTIME; -+ -+ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); -+ } else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_access_in *arg = (struct fuse_access_in *) inarg; -+ -+ if (req->se->op.access) -+ req->se->op.access(req, nodeid, arg->mask); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ (void) inarg; -+ -+ if (req->se->op.readlink) -+ req->se->op.readlink(req, nodeid); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; -+ char *name = PARAM(arg); -+ -+ if (req->se->conn.proto_minor >= 12) -+ req->ctx.umask = arg->umask; -+ else -+ name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -+ -+ if (req->se->op.mknod) -+ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; -+ -+ if (req->se->conn.proto_minor >= 12) -+ req->ctx.umask = arg->umask; -+ -+ if (req->se->op.mkdir) -+ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.unlink) -+ req->se->op.unlink(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.rmdir) -+ req->se->op.rmdir(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; -+ -+ if (req->se->op.symlink) -+ req->se->op.symlink(req, linkname, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; -+ -+ if (req->se->op.rename) -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -+ 0); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; -+ -+ if (req->se->op.rename) -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -+ arg->flags); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_link_in *arg = (struct fuse_link_in *) inarg; -+ -+ if (req->se->op.link) -+ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_create_in *arg = (struct fuse_create_in *) inarg; -+ -+ if (req->se->op.create) { -+ struct fuse_file_info fi; -+ char *name = PARAM(arg); -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ -+ if (req->se->conn.proto_minor >= 12) -+ req->ctx.umask = arg->umask; -+ else -+ name = (char *) inarg + sizeof(struct fuse_open_in); -+ -+ req->se->op.create(req, nodeid, name, arg->mode, &fi); -+ } else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ -+ if (req->se->op.open) -+ req->se->op.open(req, nodeid, &fi); -+ else -+ fuse_reply_open(req, &fi); -+} -+ -+static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ -+ if (req->se->op.read) { -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 9) { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ } -+ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); -+ } else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -+ struct fuse_file_info fi; -+ char *param; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; -+ -+ if (req->se->conn.proto_minor < 9) { -+ param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ param = PARAM(arg); -+ } -+ -+ if (req->se->op.write) -+ req->se->op.write(req, nodeid, param, arg->size, -+ arg->offset, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -+ -+ if (se->conn.proto_minor < 9) { -+ bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ bufv.buf[0].size -= sizeof(struct fuse_in_header) + -+ FUSE_COMPAT_WRITE_IN_SIZE; -+ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -+ bufv.buf[0].mem = PARAM(arg); -+ -+ bufv.buf[0].size -= sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_write_in); -+ } -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -+ fuse_reply_err(req, EIO); -+ goto out; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -+ -+out: -+ /* Need to reset the pipe if ->write_buf() didn't consume all data */ -+ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -+ fuse_ll_clear_pipe(se); -+} -+ -+static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.flush = 1; -+ if (req->se->conn.proto_minor >= 7) -+ fi.lock_owner = arg->lock_owner; -+ -+ if (req->se->op.flush) -+ req->se->op.flush(req, nodeid, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 8) { -+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -+ fi.lock_owner = arg->lock_owner; -+ } -+ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { -+ fi.flock_release = 1; -+ fi.lock_owner = arg->lock_owner; -+ } -+ -+ if (req->se->op.release) -+ req->se->op.release(req, nodeid, &fi); -+ else -+ fuse_reply_err(req, 0); -+} -+ -+static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.fsync) -+ req->se->op.fsync(req, nodeid, datasync, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ -+ if (req->se->op.opendir) -+ req->se->op.opendir(req, nodeid, &fi); -+ else -+ fuse_reply_open(req, &fi); -+} -+ -+static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.readdir) -+ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.readdirplus) -+ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; -+ -+ if (req->se->op.releasedir) -+ req->se->op.releasedir(req, nodeid, &fi); -+ else -+ fuse_reply_err(req, 0); -+} -+ -+static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.fsyncdir) -+ req->se->op.fsyncdir(req, nodeid, datasync, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ (void) nodeid; -+ (void) inarg; -+ -+ if (req->se->op.statfs) -+ req->se->op.statfs(req, nodeid); -+ else { -+ struct statvfs buf = { -+ .f_namemax = 255, -+ .f_bsize = 512, -+ }; -+ fuse_reply_statfs(req, &buf); -+ } -+} -+ -+static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; -+ char *name = PARAM(arg); -+ char *value = name + strlen(name) + 1; -+ -+ if (req->se->op.setxattr) -+ req->se->op.setxattr(req, nodeid, name, value, arg->size, -+ arg->flags); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ -+ if (req->se->op.getxattr) -+ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ -+ if (req->se->op.listxattr) -+ req->se->op.listxattr(req, nodeid, arg->size); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ char *name = (char *) inarg; -+ -+ if (req->se->op.removexattr) -+ req->se->op.removexattr(req, nodeid, name); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void convert_fuse_file_lock(struct fuse_file_lock *fl, -+ struct flock *flock) -+{ -+ memset(flock, 0, sizeof(struct flock)); -+ flock->l_type = fl->type; -+ flock->l_whence = SEEK_SET; -+ flock->l_start = fl->start; -+ if (fl->end == OFFSET_MAX) -+ flock->l_len = 0; -+ else -+ flock->l_len = fl->end - fl->start + 1; -+ flock->l_pid = fl->pid; -+} -+ -+static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -+ struct fuse_file_info fi; -+ struct flock flock; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; -+ -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.getlk) -+ req->se->op.getlk(req, nodeid, &fi, &flock); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, -+ const void *inarg, int sleep) -+{ -+ struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -+ struct fuse_file_info fi; -+ struct flock flock; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; -+ -+ if (arg->lk_flags & FUSE_LK_FLOCK) { -+ int op = 0; -+ -+ switch (arg->lk.type) { -+ case F_RDLCK: -+ op = LOCK_SH; -+ break; -+ case F_WRLCK: -+ op = LOCK_EX; -+ break; -+ case F_UNLCK: -+ op = LOCK_UN; -+ break; -+ } -+ if (!sleep) -+ op |= LOCK_NB; -+ -+ if (req->se->op.flock) -+ req->se->op.flock(req, nodeid, &fi, op); -+ else -+ fuse_reply_err(req, ENOSYS); -+ } else { -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.setlk) -+ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); -+ else -+ fuse_reply_err(req, ENOSYS); -+ } -+} -+ -+static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ do_setlk_common(req, nodeid, inarg, 0); -+} -+ -+static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ do_setlk_common(req, nodeid, inarg, 1); -+} -+ -+static int find_interrupted(struct fuse_session *se, struct fuse_req *req) -+{ -+ struct fuse_req *curr; -+ -+ for (curr = se->list.next; curr != &se->list; curr = curr->next) { -+ if (curr->unique == req->u.i.unique) { -+ fuse_interrupt_func_t func; -+ void *data; -+ -+ curr->ctr++; -+ pthread_mutex_unlock(&se->lock); -+ -+ /* Ugh, ugly locking */ -+ pthread_mutex_lock(&curr->lock); -+ pthread_mutex_lock(&se->lock); -+ curr->interrupted = 1; -+ func = curr->u.ni.func; -+ data = curr->u.ni.data; -+ pthread_mutex_unlock(&se->lock); -+ if (func) -+ func(curr, data); -+ pthread_mutex_unlock(&curr->lock); -+ -+ pthread_mutex_lock(&se->lock); -+ curr->ctr--; -+ if (!curr->ctr) -+ destroy_req(curr); -+ -+ return 1; -+ } -+ } -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->u.i.unique) -+ return 1; -+ } -+ return 0; -+} -+ -+static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; -+ struct fuse_session *se = req->se; -+ -+ (void) nodeid; -+ if (se->debug) -+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -+ (unsigned long long) arg->unique); -+ -+ req->u.i.unique = arg->unique; -+ -+ pthread_mutex_lock(&se->lock); -+ if (find_interrupted(se, req)) -+ destroy_req(req); -+ else -+ list_add_req(req, &se->interrupts); -+ pthread_mutex_unlock(&se->lock); -+} -+ -+static struct fuse_req *check_interrupt(struct fuse_session *se, -+ struct fuse_req *req) -+{ -+ struct fuse_req *curr; -+ -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->unique) { -+ req->interrupted = 1; -+ list_del_req(curr); -+ free(curr); -+ return NULL; -+ } -+ } -+ curr = se->interrupts.next; -+ if (curr != &se->interrupts) { -+ list_del_req(curr); -+ list_init_req(curr); -+ return curr; -+ } else -+ return NULL; -+} -+ -+static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; -+ -+ if (req->se->op.bmap) -+ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; -+ unsigned int flags = arg->flags; -+ void *in_buf = arg->in_size ? PARAM(arg) : NULL; -+ struct fuse_file_info fi; -+ -+ if (flags & FUSE_IOCTL_DIR && -+ !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { -+ fuse_reply_err(req, ENOTTY); -+ return; -+ } -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -+ !(flags & FUSE_IOCTL_32BIT)) { -+ req->ioctl_64bit = 1; -+ } -+ -+ if (req->se->op.ioctl) -+ req->se->op.ioctl(req, nodeid, arg->cmd, -+ (void *)(uintptr_t)arg->arg, &fi, flags, -+ in_buf, arg->in_size, arg->out_size); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) -+{ -+ free(ph); -+} -+ -+static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.poll_events = arg->events; -+ -+ if (req->se->op.poll) { -+ struct fuse_pollhandle *ph = NULL; -+ -+ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { -+ ph = malloc(sizeof(struct fuse_pollhandle)); -+ if (ph == NULL) { -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ ph->kh = arg->kh; -+ ph->se = req->se; -+ } -+ -+ req->se->op.poll(req, nodeid, &fi, ph); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } -+} -+ -+static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.fallocate) -+ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) -+{ -+ struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; -+ struct fuse_file_info fi_in, fi_out; -+ -+ memset(&fi_in, 0, sizeof(fi_in)); -+ fi_in.fh = arg->fh_in; -+ -+ memset(&fi_out, 0, sizeof(fi_out)); -+ fi_out.fh = arg->fh_out; -+ -+ -+ if (req->se->op.copy_file_range) -+ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, -+ &fi_in, arg->nodeid_out, -+ arg->off_out, &fi_out, arg->len, -+ arg->flags); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ -+ if (req->se->op.lseek) -+ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); -+ else -+ fuse_reply_err(req, ENOSYS); -+} -+ -+static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_init_in *arg = (struct fuse_init_in *) inarg; -+ struct fuse_init_out outarg; -+ struct fuse_session *se = req->se; -+ size_t bufsize = se->bufsize; -+ size_t outargsize = sizeof(outarg); -+ -+ (void) nodeid; -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -+ if (arg->major == 7 && arg->minor >= 6) { -+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -+ arg->max_readahead); -+ } -+ } -+ se->conn.proto_major = arg->major; -+ se->conn.proto_minor = arg->minor; -+ se->conn.capable = 0; -+ se->conn.want = 0; -+ -+ memset(&outarg, 0, sizeof(outarg)); -+ outarg.major = FUSE_KERNEL_VERSION; -+ outarg.minor = FUSE_KERNEL_MINOR_VERSION; -+ -+ if (arg->major < 7) { -+ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", -+ arg->major, arg->minor); -+ fuse_reply_err(req, EPROTO); -+ return; -+ } -+ -+ if (arg->major > 7) { -+ /* Wait for a second INIT request with a 7.X version */ -+ send_reply_ok(req, &outarg, sizeof(outarg)); -+ return; -+ } -+ -+ if (arg->minor >= 6) { -+ if (arg->max_readahead < se->conn.max_readahead) -+ se->conn.max_readahead = arg->max_readahead; -+ if (arg->flags & FUSE_ASYNC_READ) -+ se->conn.capable |= FUSE_CAP_ASYNC_READ; -+ if (arg->flags & FUSE_POSIX_LOCKS) -+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) -+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -+ if (arg->flags & FUSE_EXPORT_SUPPORT) -+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -+ if (arg->flags & FUSE_DONT_MASK) -+ se->conn.capable |= FUSE_CAP_DONT_MASK; -+ if (arg->flags & FUSE_FLOCK_LOCKS) -+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -+ if (arg->flags & FUSE_AUTO_INVAL_DATA) -+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -+ if (arg->flags & FUSE_DO_READDIRPLUS) -+ se->conn.capable |= FUSE_CAP_READDIRPLUS; -+ if (arg->flags & FUSE_READDIRPLUS_AUTO) -+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -+ if (arg->flags & FUSE_ASYNC_DIO) -+ se->conn.capable |= FUSE_CAP_ASYNC_DIO; -+ if (arg->flags & FUSE_WRITEBACK_CACHE) -+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) -+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -+ if (arg->flags & FUSE_PARALLEL_DIROPS) -+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -+ if (arg->flags & FUSE_POSIX_ACL) -+ se->conn.capable |= FUSE_CAP_POSIX_ACL; -+ if (arg->flags & FUSE_HANDLE_KILLPRIV) -+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) -+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -+ if (!(arg->flags & FUSE_MAX_PAGES)) { -+ size_t max_bufsize = -+ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() -+ + FUSE_BUFFER_HEADER_SIZE; -+ if (bufsize > max_bufsize) { -+ bufsize = max_bufsize; -+ } -+ } -+ } else { -+ se->conn.max_readahead = 0; -+ } -+ -+ if (se->conn.proto_minor >= 14) { -+#ifdef HAVE_SPLICE -+#ifdef HAVE_VMSPLICE -+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; -+#endif -+ se->conn.capable |= FUSE_CAP_SPLICE_READ; -+#endif -+ } -+ if (se->conn.proto_minor >= 18) -+ se->conn.capable |= FUSE_CAP_IOCTL_DIR; -+ -+ /* Default settings for modern filesystems. -+ * -+ * Most of these capabilities were disabled by default in -+ * libfuse2 for backwards compatibility reasons. In libfuse3, -+ * we can finally enable them by default (as long as they're -+ * supported by the kernel). -+ */ -+#define LL_SET_DEFAULT(cond, cap) \ -+ if ((cond) && (se->conn.capable & (cap))) \ -+ se->conn.want |= (cap) -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); -+ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); -+ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); -+ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); -+ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); -+ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); -+ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, -+ FUSE_CAP_POSIX_LOCKS); -+ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); -+ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, -+ FUSE_CAP_READDIRPLUS_AUTO); -+ se->conn.time_gran = 1; -+ -+ if (bufsize < FUSE_MIN_READ_BUFFER) { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", -+ bufsize); -+ bufsize = FUSE_MIN_READ_BUFFER; -+ } -+ se->bufsize = bufsize; -+ -+ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) -+ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; -+ -+ se->got_init = 1; -+ if (se->op.init) -+ se->op.init(se->userdata, &se->conn); -+ -+ if (se->conn.want & (~se->conn.capable)) { -+ fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " -+ "0x%x that are not supported by kernel, aborting.\n", -+ se->conn.want & (~se->conn.capable)); -+ fuse_reply_err(req, EPROTO); -+ se->error = -EPROTO; -+ fuse_session_exit(se); -+ return; -+ } -+ -+ unsigned max_read_mo = get_max_read(se->mo); -+ if (se->conn.max_read != max_read_mo) { -+ fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " -+ "requested different maximum read size (%u vs %u)\n", -+ se->conn.max_read, max_read_mo); -+ fuse_reply_err(req, EPROTO); -+ se->error = -EPROTO; -+ fuse_session_exit(se); -+ return; -+ } -+ -+ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { -+ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; -+ } -+ if (arg->flags & FUSE_MAX_PAGES) { -+ outarg.flags |= FUSE_MAX_PAGES; -+ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; -+ } -+ -+ /* Always enable big writes, this is superseded -+ by the max_write option */ -+ outarg.flags |= FUSE_BIG_WRITES; -+ -+ if (se->conn.want & FUSE_CAP_ASYNC_READ) -+ outarg.flags |= FUSE_ASYNC_READ; -+ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) -+ outarg.flags |= FUSE_POSIX_LOCKS; -+ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) -+ outarg.flags |= FUSE_ATOMIC_O_TRUNC; -+ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) -+ outarg.flags |= FUSE_EXPORT_SUPPORT; -+ if (se->conn.want & FUSE_CAP_DONT_MASK) -+ outarg.flags |= FUSE_DONT_MASK; -+ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) -+ outarg.flags |= FUSE_FLOCK_LOCKS; -+ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) -+ outarg.flags |= FUSE_AUTO_INVAL_DATA; -+ if (se->conn.want & FUSE_CAP_READDIRPLUS) -+ outarg.flags |= FUSE_DO_READDIRPLUS; -+ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) -+ outarg.flags |= FUSE_READDIRPLUS_AUTO; -+ if (se->conn.want & FUSE_CAP_ASYNC_DIO) -+ outarg.flags |= FUSE_ASYNC_DIO; -+ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) -+ outarg.flags |= FUSE_WRITEBACK_CACHE; -+ if (se->conn.want & FUSE_CAP_POSIX_ACL) -+ outarg.flags |= FUSE_POSIX_ACL; -+ outarg.max_readahead = se->conn.max_readahead; -+ outarg.max_write = se->conn.max_write; -+ if (se->conn.proto_minor >= 13) { -+ if (se->conn.max_background >= (1 << 16)) -+ se->conn.max_background = (1 << 16) - 1; -+ if (se->conn.congestion_threshold > se->conn.max_background) -+ se->conn.congestion_threshold = se->conn.max_background; -+ if (!se->conn.congestion_threshold) { -+ se->conn.congestion_threshold = -+ se->conn.max_background * 3 / 4; -+ } -+ -+ outarg.max_background = se->conn.max_background; -+ outarg.congestion_threshold = se->conn.congestion_threshold; -+ } -+ if (se->conn.proto_minor >= 23) -+ outarg.time_gran = se->conn.time_gran; -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); -+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -+ outarg.max_readahead); -+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -+ outarg.max_background); -+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -+ outarg.congestion_threshold); -+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", -+ outarg.time_gran); -+ } -+ if (arg->minor < 5) -+ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -+ else if (arg->minor < 23) -+ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -+ -+ send_reply_ok(req, &outarg, outargsize); -+} -+ -+static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+{ -+ struct fuse_session *se = req->se; -+ -+ (void) nodeid; -+ (void) inarg; -+ -+ se->got_destroy = 1; -+ if (se->op.destroy) -+ se->op.destroy(se->userdata); -+ -+ send_reply_ok(req, NULL, 0); -+} -+ -+static void list_del_nreq(struct fuse_notify_req *nreq) -+{ -+ struct fuse_notify_req *prev = nreq->prev; -+ struct fuse_notify_req *next = nreq->next; -+ prev->next = next; -+ next->prev = prev; -+} -+ -+static void list_add_nreq(struct fuse_notify_req *nreq, -+ struct fuse_notify_req *next) -+{ -+ struct fuse_notify_req *prev = next->prev; -+ nreq->next = next; -+ nreq->prev = prev; -+ prev->next = nreq; -+ next->prev = nreq; -+} -+ -+static void list_init_nreq(struct fuse_notify_req *nreq) -+{ -+ nreq->next = nreq; -+ nreq->prev = nreq; -+} -+ -+static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, -+ const void *inarg, const struct fuse_buf *buf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_notify_req *nreq; -+ struct fuse_notify_req *head; -+ -+ pthread_mutex_lock(&se->lock); -+ head = &se->notify_list; -+ for (nreq = head->next; nreq != head; nreq = nreq->next) { -+ if (nreq->unique == req->unique) { -+ list_del_nreq(nreq); -+ break; -+ } -+ } -+ pthread_mutex_unlock(&se->lock); -+ -+ if (nreq != head) -+ nreq->reply(nreq, req, nodeid, inarg, buf); -+} -+ -+static int send_notify_iov(struct fuse_session *se, int notify_code, -+ struct iovec *iov, int count) -+{ -+ struct fuse_out_header out; -+ -+ if (!se->got_init) -+ return -ENOTCONN; -+ -+ out.unique = 0; -+ out.error = notify_code; -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); -+ -+ return fuse_send_msg(se, NULL, iov, count); -+} -+ -+int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) -+{ -+ if (ph != NULL) { -+ struct fuse_notify_poll_wakeup_out outarg; -+ struct iovec iov[2]; -+ -+ outarg.kh = ph->kh; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); -+ } else { -+ return 0; -+ } -+} -+ -+int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -+ off_t off, off_t len) -+{ -+ struct fuse_notify_inval_inode_out outarg; -+ struct iovec iov[2]; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -+ return -ENOSYS; -+ -+ outarg.ino = ino; -+ outarg.off = off; -+ outarg.len = len; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); -+} -+ -+int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -+ const char *name, size_t namelen) -+{ -+ struct fuse_notify_inval_entry_out outarg; -+ struct iovec iov[3]; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -+ return -ENOSYS; -+ -+ outarg.parent = parent; -+ outarg.namelen = namelen; -+ outarg.padding = 0; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; -+ -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); -+} -+ -+int fuse_lowlevel_notify_delete(struct fuse_session *se, -+ fuse_ino_t parent, fuse_ino_t child, -+ const char *name, size_t namelen) -+{ -+ struct fuse_notify_delete_out outarg; -+ struct iovec iov[3]; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) -+ return -ENOSYS; -+ -+ outarg.parent = parent; -+ outarg.child = child; -+ outarg.namelen = namelen; -+ outarg.padding = 0; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; -+ -+ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); -+} -+ -+int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags) -+{ -+ struct fuse_out_header out; -+ struct fuse_notify_store_out outarg; -+ struct iovec iov[3]; -+ size_t size = fuse_buf_size(bufv); -+ int res; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -+ return -ENOSYS; -+ -+ out.unique = 0; -+ out.error = FUSE_NOTIFY_STORE; -+ -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; -+ -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(out); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -+ if (res > 0) -+ res = -res; -+ -+ return res; -+} -+ -+struct fuse_retrieve_req { -+ struct fuse_notify_req nreq; -+ void *cookie; -+}; -+ -+static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, -+ fuse_req_t req, fuse_ino_t ino, -+ const void *inarg, -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_retrieve_req *rreq = -+ container_of(nreq, struct fuse_retrieve_req, nreq); -+ const struct fuse_notify_retrieve_in *arg = inarg; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -+ bufv.buf[0].mem = PARAM(arg); -+ -+ bufv.buf[0].size -= sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_notify_retrieve_in); -+ -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -+ fuse_reply_none(req); -+ goto out; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ if (se->op.retrieve_reply) { -+ se->op.retrieve_reply(req, rreq->cookie, ino, -+ arg->offset, &bufv); -+ } else { -+ fuse_reply_none(req); -+ } -+out: -+ free(rreq); -+ if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -+ fuse_ll_clear_pipe(se); -+} -+ -+int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -+ size_t size, off_t offset, void *cookie) -+{ -+ struct fuse_notify_retrieve_out outarg; -+ struct iovec iov[2]; -+ struct fuse_retrieve_req *rreq; -+ int err; -+ -+ if (!se) -+ return -EINVAL; -+ -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -+ return -ENOSYS; -+ -+ rreq = malloc(sizeof(*rreq)); -+ if (rreq == NULL) -+ return -ENOMEM; -+ -+ pthread_mutex_lock(&se->lock); -+ rreq->cookie = cookie; -+ rreq->nreq.unique = se->notify_ctr++; -+ rreq->nreq.reply = fuse_ll_retrieve_reply; -+ list_add_nreq(&rreq->nreq, &se->notify_list); -+ pthread_mutex_unlock(&se->lock); -+ -+ outarg.notify_unique = rreq->nreq.unique; -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; -+ -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ -+ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -+ if (err) { -+ pthread_mutex_lock(&se->lock); -+ list_del_nreq(&rreq->nreq); -+ pthread_mutex_unlock(&se->lock); -+ free(rreq); -+ } -+ -+ return err; -+} -+ -+void *fuse_req_userdata(fuse_req_t req) -+{ -+ return req->se->userdata; -+} -+ -+const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) -+{ -+ return &req->ctx; -+} -+ -+void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -+ void *data) -+{ -+ pthread_mutex_lock(&req->lock); -+ pthread_mutex_lock(&req->se->lock); -+ req->u.ni.func = func; -+ req->u.ni.data = data; -+ pthread_mutex_unlock(&req->se->lock); -+ if (req->interrupted && func) -+ func(req, data); -+ pthread_mutex_unlock(&req->lock); -+} -+ -+int fuse_req_interrupted(fuse_req_t req) -+{ -+ int interrupted; -+ -+ pthread_mutex_lock(&req->se->lock); -+ interrupted = req->interrupted; -+ pthread_mutex_unlock(&req->se->lock); -+ -+ return interrupted; -+} -+ -+static struct { -+ void (*func)(fuse_req_t, fuse_ino_t, const void *); -+ const char *name; -+} fuse_ll_ops[] = { -+ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -+ [FUSE_FORGET] = { do_forget, "FORGET" }, -+ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, -+ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, -+ [FUSE_READLINK] = { do_readlink, "READLINK" }, -+ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, -+ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, -+ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, -+ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, -+ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, -+ [FUSE_RENAME] = { do_rename, "RENAME" }, -+ [FUSE_LINK] = { do_link, "LINK" }, -+ [FUSE_OPEN] = { do_open, "OPEN" }, -+ [FUSE_READ] = { do_read, "READ" }, -+ [FUSE_WRITE] = { do_write, "WRITE" }, -+ [FUSE_STATFS] = { do_statfs, "STATFS" }, -+ [FUSE_RELEASE] = { do_release, "RELEASE" }, -+ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, -+ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, -+ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, -+ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, -+ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, -+ [FUSE_FLUSH] = { do_flush, "FLUSH" }, -+ [FUSE_INIT] = { do_init, "INIT" }, -+ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, -+ [FUSE_READDIR] = { do_readdir, "READDIR" }, -+ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, -+ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, -+ [FUSE_GETLK] = { do_getlk, "GETLK" }, -+ [FUSE_SETLK] = { do_setlk, "SETLK" }, -+ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, -+ [FUSE_ACCESS] = { do_access, "ACCESS" }, -+ [FUSE_CREATE] = { do_create, "CREATE" }, -+ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, -+ [FUSE_BMAP] = { do_bmap, "BMAP" }, -+ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, -+ [FUSE_POLL] = { do_poll, "POLL" }, -+ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, -+ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -+ [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, -+ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, -+ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, -+ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -+ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, -+ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, -+ [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, -+}; -+ -+#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) -+ -+static const char *opname(enum fuse_opcode opcode) -+{ -+ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) -+ return "???"; -+ else -+ return fuse_ll_ops[opcode].name; -+} -+ -+static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, -+ struct fuse_bufvec *src) -+{ -+ ssize_t res = fuse_buf_copy(dst, src, 0); -+ if (res < 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); -+ return res; -+ } -+ if ((size_t)res < fuse_buf_size(dst)) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -+ return -1; -+ } -+ return 0; -+} -+ -+void fuse_session_process_buf(struct fuse_session *se, -+ const struct fuse_buf *buf) -+{ -+ fuse_session_process_buf_int(se, buf, NULL); -+} -+ -+void fuse_session_process_buf_int(struct fuse_session *se, -+ const struct fuse_buf *buf, struct fuse_chan *ch) -+{ -+ const size_t write_header_size = sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_write_in); -+ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; -+ struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); -+ struct fuse_in_header *in; -+ const void *inarg; -+ struct fuse_req *req; -+ void *mbuf = NULL; -+ int err; -+ int res; -+ -+ if (buf->flags & FUSE_BUF_IS_FD) { -+ if (buf->size < tmpbuf.buf[0].size) -+ tmpbuf.buf[0].size = buf->size; -+ -+ mbuf = malloc(tmpbuf.buf[0].size); -+ if (mbuf == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); -+ goto clear_pipe; -+ } -+ tmpbuf.buf[0].mem = mbuf; -+ -+ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -+ if (res < 0) -+ goto clear_pipe; -+ -+ in = mbuf; -+ } else { -+ in = buf->mem; -+ } -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", -+ (unsigned long long) in->unique, -+ opname((enum fuse_opcode) in->opcode), in->opcode, -+ (unsigned long long) in->nodeid, buf->size, in->pid); -+ } -+ -+ req = fuse_ll_alloc_req(se); -+ if (req == NULL) { -+ struct fuse_out_header out = { -+ .unique = in->unique, -+ .error = -ENOMEM, -+ }; -+ struct iovec iov = { -+ .iov_base = &out, -+ .iov_len = sizeof(struct fuse_out_header), -+ }; -+ -+ fuse_send_msg(se, ch, &iov, 1); -+ goto clear_pipe; -+ } -+ -+ req->unique = in->unique; -+ req->ctx.uid = in->uid; -+ req->ctx.gid = in->gid; -+ req->ctx.pid = in->pid; -+ req->ch = ch ? fuse_chan_get(ch) : NULL; -+ -+ err = EIO; -+ if (!se->got_init) { -+ enum fuse_opcode expected; -+ -+ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; -+ if (in->opcode != expected) -+ goto reply_err; -+ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) -+ goto reply_err; -+ -+ err = EACCES; -+ /* Implement -o allow_root */ -+ if (se->deny_others && in->uid != se->owner && in->uid != 0 && -+ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && -+ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && -+ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && -+ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && -+ in->opcode != FUSE_NOTIFY_REPLY && -+ in->opcode != FUSE_READDIRPLUS) -+ goto reply_err; -+ -+ err = ENOSYS; -+ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) -+ goto reply_err; -+ if (in->opcode != FUSE_INTERRUPT) { -+ struct fuse_req *intr; -+ pthread_mutex_lock(&se->lock); -+ intr = check_interrupt(se, req); -+ list_add_req(req, &se->list); -+ pthread_mutex_unlock(&se->lock); -+ if (intr) -+ fuse_reply_err(intr, EAGAIN); -+ } -+ -+ if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && -+ (in->opcode != FUSE_WRITE || !se->op.write_buf) && -+ in->opcode != FUSE_NOTIFY_REPLY) { -+ void *newmbuf; -+ -+ err = ENOMEM; -+ newmbuf = realloc(mbuf, buf->size); -+ if (newmbuf == NULL) -+ goto reply_err; -+ mbuf = newmbuf; -+ -+ tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); -+ tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; -+ -+ res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -+ err = -res; -+ if (res < 0) -+ goto reply_err; -+ -+ in = mbuf; -+ } -+ -+ inarg = (void *) &in[1]; -+ if (in->opcode == FUSE_WRITE && se->op.write_buf) -+ do_write_buf(req, in->nodeid, inarg, buf); -+ else if (in->opcode == FUSE_NOTIFY_REPLY) -+ do_notify_reply(req, in->nodeid, inarg, buf); -+ else -+ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -+ -+out_free: -+ free(mbuf); -+ return; -+ -+reply_err: -+ fuse_reply_err(req, err); -+clear_pipe: -+ if (buf->flags & FUSE_BUF_IS_FD) -+ fuse_ll_clear_pipe(se); -+ goto out_free; -+} -+ -+#define LL_OPTION(n,o,v) \ -+ { n, offsetof(struct fuse_session, o), v } -+ -+static const struct fuse_opt fuse_ll_opts[] = { -+ LL_OPTION("debug", debug, 1), -+ LL_OPTION("-d", debug, 1), -+ LL_OPTION("--debug", debug, 1), -+ LL_OPTION("allow_root", deny_others, 1), -+ FUSE_OPT_END -+}; -+ -+void fuse_lowlevel_version(void) -+{ -+ printf("using FUSE kernel interface version %i.%i\n", -+ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); -+ fuse_mount_version(); -+} -+ -+void fuse_lowlevel_help(void) -+{ -+ /* These are not all options, but the ones that are -+ potentially of interest to an end-user */ -+ printf( -+" -o allow_other allow access by all users\n" -+" -o allow_root allow access by root\n" -+" -o auto_unmount auto unmount on process termination\n"); -+} -+ -+void fuse_session_destroy(struct fuse_session *se) -+{ -+ struct fuse_ll_pipe *llp; -+ -+ if (se->got_init && !se->got_destroy) { -+ if (se->op.destroy) -+ se->op.destroy(se->userdata); -+ } -+ llp = pthread_getspecific(se->pipe_key); -+ if (llp != NULL) -+ fuse_ll_pipe_free(llp); -+ pthread_key_delete(se->pipe_key); -+ pthread_mutex_destroy(&se->lock); -+ free(se->cuse_data); -+ if (se->fd != -1) -+ close(se->fd); -+ destroy_mount_opts(se->mo); -+ free(se); -+} -+ -+ -+static void fuse_ll_pipe_destructor(void *data) -+{ -+ struct fuse_ll_pipe *llp = data; -+ fuse_ll_pipe_free(llp); -+} -+ -+int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) -+{ -+ return fuse_session_receive_buf_int(se, buf, NULL); -+} -+ -+int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -+ struct fuse_chan *ch) -+{ -+ int err; -+ ssize_t res; -+#ifdef HAVE_SPLICE -+ size_t bufsize = se->bufsize; -+ struct fuse_ll_pipe *llp; -+ struct fuse_buf tmpbuf; -+ -+ if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) -+ goto fallback; -+ -+ llp = fuse_ll_get_pipe(se); -+ if (llp == NULL) -+ goto fallback; -+ -+ if (llp->size < bufsize) { -+ if (llp->can_grow) { -+ res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); -+ if (res == -1) { -+ llp->can_grow = 0; -+ res = grow_pipe_to_max(llp->pipe[0]); -+ if (res > 0) -+ llp->size = res; -+ goto fallback; -+ } -+ llp->size = res; -+ } -+ if (llp->size < bufsize) -+ goto fallback; -+ } -+ -+ res = splice(ch ? ch->fd : se->fd, -+ NULL, llp->pipe[1], NULL, bufsize, 0); -+ err = errno; -+ -+ if (fuse_session_exited(se)) -+ return 0; -+ -+ if (res == -1) { -+ if (err == ENODEV) { -+ /* Filesystem was unmounted, or connection was aborted -+ via /sys/fs/fuse/connections */ -+ fuse_session_exit(se); -+ return 0; -+ } -+ if (err != EINTR && err != EAGAIN) -+ perror("fuse: splice from device"); -+ return -err; -+ } -+ -+ if (res < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); -+ return -EIO; -+ } -+ -+ tmpbuf = (struct fuse_buf) { -+ .size = res, -+ .flags = FUSE_BUF_IS_FD, -+ .fd = llp->pipe[0], -+ }; -+ -+ /* -+ * Don't bother with zero copy for small requests. -+ * fuse_loop_mt() needs to check for FORGET so this more than -+ * just an optimization. -+ */ -+ if (res < sizeof(struct fuse_in_header) + -+ sizeof(struct fuse_write_in) + pagesize) { -+ struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; -+ struct fuse_bufvec dst = { .count = 1 }; -+ -+ if (!buf->mem) { -+ buf->mem = malloc(se->bufsize); -+ if (!buf->mem) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: failed to allocate read buffer\n"); -+ return -ENOMEM; -+ } -+ } -+ buf->size = se->bufsize; -+ buf->flags = 0; -+ dst.buf[0] = *buf; -+ -+ res = fuse_buf_copy(&dst, &src, 0); -+ if (res < 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", -+ strerror(-res)); -+ fuse_ll_clear_pipe(se); -+ return res; -+ } -+ if (res < tmpbuf.size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -+ fuse_ll_clear_pipe(se); -+ return -EIO; -+ } -+ assert(res == tmpbuf.size); -+ -+ } else { -+ /* Don't overwrite buf->mem, as that would cause a leak */ -+ buf->fd = tmpbuf.fd; -+ buf->flags = tmpbuf.flags; -+ } -+ buf->size = tmpbuf.size; -+ -+ return res; -+ -+fallback: -+#endif -+ if (!buf->mem) { -+ buf->mem = malloc(se->bufsize); -+ if (!buf->mem) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: failed to allocate read buffer\n"); -+ return -ENOMEM; -+ } -+ } -+ -+restart: -+ res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); -+ err = errno; -+ -+ if (fuse_session_exited(se)) -+ return 0; -+ if (res == -1) { -+ /* ENOENT means the operation was interrupted, it's safe -+ to restart */ -+ if (err == ENOENT) -+ goto restart; -+ -+ if (err == ENODEV) { -+ /* Filesystem was unmounted, or connection was aborted -+ via /sys/fs/fuse/connections */ -+ fuse_session_exit(se); -+ return 0; -+ } -+ /* Errors occurring during normal operation: EINTR (read -+ interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem -+ umounted) */ -+ if (err != EINTR && err != EAGAIN) -+ perror("fuse: reading device"); -+ return -err; -+ } -+ if ((size_t) res < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); -+ return -EIO; -+ } -+ -+ buf->size = res; -+ -+ return res; -+} -+ -+struct fuse_session *fuse_session_new(struct fuse_args *args, -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata) -+{ -+ int err; -+ struct fuse_session *se; -+ struct mount_opts *mo; -+ -+ if (sizeof(struct fuse_lowlevel_ops) < op_size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); -+ op_size = sizeof(struct fuse_lowlevel_ops); -+ } -+ -+ if (args->argc == 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); -+ return NULL; -+ } -+ -+ se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); -+ if (se == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); -+ goto out1; -+ } -+ se->fd = -1; -+ se->conn.max_write = UINT_MAX; -+ se->conn.max_readahead = UINT_MAX; -+ -+ /* Parse options */ -+ if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) -+ goto out2; -+ if(se->deny_others) { -+ /* Allowing access only by root is done by instructing -+ * kernel to allow access by everyone, and then restricting -+ * access to root and mountpoint owner in libfuse. -+ */ -+ // We may be adding the option a second time, but -+ // that doesn't hurt. -+ if(fuse_opt_add_arg(args, "-oallow_other") == -1) -+ goto out2; -+ } -+ mo = parse_mount_opts(args); -+ if (mo == NULL) -+ goto out3; -+ -+ if(args->argc == 1 && -+ args->argv[0][0] == '-') { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " -+ "will be ignored\n"); -+ } else if (args->argc != 1) { -+ int i; -+ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); -+ for(i = 1; i < args->argc-1; i++) -+ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); -+ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); -+ goto out4; -+ } -+ -+ if (se->debug) -+ fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); -+ -+ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + -+ FUSE_BUFFER_HEADER_SIZE; -+ -+ list_init_req(&se->list); -+ list_init_req(&se->interrupts); -+ list_init_nreq(&se->notify_list); -+ se->notify_ctr = 1; -+ fuse_mutex_init(&se->lock); -+ -+ err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); -+ if (err) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", -+ strerror(err)); -+ goto out5; -+ } -+ -+ memcpy(&se->op, op, op_size); -+ se->owner = getuid(); -+ se->userdata = userdata; -+ -+ se->mo = mo; -+ return se; -+ -+out5: -+ pthread_mutex_destroy(&se->lock); -+out4: -+ fuse_opt_free_args(args); -+out3: -+ free(mo); -+out2: -+ free(se); -+out1: -+ return NULL; -+} -+ -+int fuse_session_mount(struct fuse_session *se, const char *mountpoint) -+{ -+ int fd; -+ -+ /* -+ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -+ * would ensue. -+ */ -+ do { -+ fd = open("/dev/null", O_RDWR); -+ if (fd > 2) -+ close(fd); -+ } while (fd >= 0 && fd <= 2); -+ -+ /* -+ * To allow FUSE daemons to run without privileges, the caller may open -+ * /dev/fuse before launching the file system and pass on the file -+ * descriptor by specifying /dev/fd/N as the mount point. Note that the -+ * parent process takes care of performing the mount in this case. -+ */ -+ fd = fuse_mnt_parse_fuse_fd(mountpoint); -+ if (fd != -1) { -+ if (fcntl(fd, F_GETFD) == -1) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: Invalid file descriptor /dev/fd/%u\n", -+ fd); -+ return -1; -+ } -+ se->fd = fd; -+ return 0; -+ } -+ -+ /* Open channel */ -+ fd = fuse_kern_mount(mountpoint, se->mo); -+ if (fd == -1) -+ return -1; -+ se->fd = fd; -+ -+ /* Save mountpoint */ -+ se->mountpoint = strdup(mountpoint); -+ if (se->mountpoint == NULL) -+ goto error_out; -+ -+ return 0; -+ -+error_out: -+ fuse_kern_unmount(mountpoint, fd); -+ return -1; -+} -+ -+int fuse_session_fd(struct fuse_session *se) -+{ -+ return se->fd; -+} -+ -+void fuse_session_unmount(struct fuse_session *se) -+{ -+ if (se->mountpoint != NULL) { -+ fuse_kern_unmount(se->mountpoint, se->fd); -+ free(se->mountpoint); -+ se->mountpoint = NULL; -+ } -+} -+ -+#ifdef linux -+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) -+{ -+ char *buf; -+ size_t bufsize = 1024; -+ char path[128]; -+ int ret; -+ int fd; -+ unsigned long pid = req->ctx.pid; -+ char *s; -+ -+ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); -+ -+retry: -+ buf = malloc(bufsize); -+ if (buf == NULL) -+ return -ENOMEM; -+ -+ ret = -EIO; -+ fd = open(path, O_RDONLY); -+ if (fd == -1) -+ goto out_free; -+ -+ ret = read(fd, buf, bufsize); -+ close(fd); -+ if (ret < 0) { -+ ret = -EIO; -+ goto out_free; -+ } -+ -+ if ((size_t)ret == bufsize) { -+ free(buf); -+ bufsize *= 4; -+ goto retry; -+ } -+ -+ ret = -EIO; -+ s = strstr(buf, "\nGroups:"); -+ if (s == NULL) -+ goto out_free; -+ -+ s += 8; -+ ret = 0; -+ while (1) { -+ char *end; -+ unsigned long val = strtoul(s, &end, 0); -+ if (end == s) -+ break; -+ -+ s = end; -+ if (ret < size) -+ list[ret] = val; -+ ret++; -+ } -+ -+out_free: -+ free(buf); -+ return ret; -+} -+#else /* linux */ -+/* -+ * This is currently not implemented on other than Linux... -+ */ -+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) -+{ -+ (void) req; (void) size; (void) list; -+ return -ENOSYS; -+} -+#endif -+ -+void fuse_session_exit(struct fuse_session *se) -+{ -+ se->exited = 1; -+} -+ -+void fuse_session_reset(struct fuse_session *se) -+{ -+ se->exited = 0; -+ se->error = 0; -+} -+ -+int fuse_session_exited(struct fuse_session *se) -+{ -+ return se->exited; -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-main-virtio-loop.patch b/kvm-virtiofsd-Add-main-virtio-loop.patch deleted file mode 100644 index c0ba96a..0000000 --- a/kvm-virtiofsd-Add-main-virtio-loop.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 6f413d8b76ff38e5bc01f36515ca71d7fd6e6144 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:58 +0100 -Subject: [PATCH 027/116] virtiofsd: Add main virtio loop -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-24-dgilbert@redhat.com> -Patchwork-id: 93475 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 023/112] virtiofsd: Add main virtio loop -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Processes incoming requests on the vhost-user fd. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 204d8ae57b3c57098642c79b3c03d42495149c09) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 42 +++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 39 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 2ae3c76..1928a20 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -11,12 +11,14 @@ - * See the file COPYING.LIB - */ - -+#include "fuse_virtio.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" --#include "fuse_virtio.h" - -+#include -+#include - #include - #include - #include -@@ -80,15 +82,49 @@ static const VuDevIface fv_iface = { - .queue_is_processed_in_order = fv_queue_order, - }; - -+/* -+ * Main loop; this mostly deals with events on the vhost-user -+ * socket itself, and not actual fuse data. -+ */ - int virtio_loop(struct fuse_session *se) - { - fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); - -- while (1) { -- /* TODO: Add stuffing */ -+ while (!fuse_session_exited(se)) { -+ struct pollfd pf[1]; -+ pf[0].fd = se->vu_socketfd; -+ pf[0].events = POLLIN; -+ pf[0].revents = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for VU event\n", __func__); -+ int poll_res = ppoll(pf, 1, NULL, NULL); -+ -+ if (poll_res == -1) { -+ if (errno == EINTR) { -+ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", -+ __func__); -+ continue; -+ } -+ fuse_log(FUSE_LOG_ERR, "virtio_loop ppoll: %m\n"); -+ break; -+ } -+ assert(poll_res == 1); -+ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { -+ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x\n", __func__, -+ pf[0].revents); -+ break; -+ } -+ assert(pf[0].revents & POLLIN); -+ fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); -+ if (!vu_dispatch(&se->virtio_dev->dev)) { -+ fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); -+ break; -+ } - } - - fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); -+ -+ return 0; - } - - int virtio_session_mount(struct fuse_session *se) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-options-for-virtio.patch b/kvm-virtiofsd-Add-options-for-virtio.patch deleted file mode 100644 index 8ac7fa7..0000000 --- a/kvm-virtiofsd-Add-options-for-virtio.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 9c1bbe327cf8f88ffc78eed0fce8cdd6f3f006ef Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:54 +0100 -Subject: [PATCH 023/116] virtiofsd: Add options for virtio -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-20-dgilbert@redhat.com> -Patchwork-id: 93473 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 019/112] virtiofsd: Add options for virtio -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Add options to specify parameters for virtio-fs paths, i.e. - - ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 205de006aab8dcbe546a7e3a51d295c2d05e654b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 11 ++++++++--- - tools/virtiofsd/helper.c | 14 +++++++------- - 3 files changed, 16 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index bae0699..26b1a7d 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -63,6 +63,7 @@ struct fuse_session { - struct fuse_notify_req notify_list; - size_t bufsize; - int error; -+ char *vu_socket_path; - }; - - struct fuse_chan { -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 8552cfb..17e8718 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2115,8 +2115,11 @@ reply_err: - } - - static const struct fuse_opt fuse_ll_opts[] = { -- LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), -- LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), -+ LL_OPTION("debug", debug, 1), -+ LL_OPTION("-d", debug, 1), -+ LL_OPTION("--debug", debug, 1), -+ LL_OPTION("allow_root", deny_others, 1), -+ LL_OPTION("--socket-path=%s", vu_socket_path, 0), - FUSE_OPT_END - }; - -@@ -2132,7 +2135,9 @@ void fuse_lowlevel_help(void) - * These are not all options, but the ones that are - * potentially of interest to an end-user - */ -- printf(" -o allow_root allow access by root\n"); -+ printf( -+ " -o allow_root allow access by root\n" -+ " --socket-path=PATH path for the vhost-user socket\n"); - } - - void fuse_session_destroy(struct fuse_session *se) -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 9333691..676032e 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -127,13 +127,13 @@ static const struct fuse_opt conn_info_opt_spec[] = { - - void fuse_cmdline_help(void) - { -- printf( -- " -h --help print help\n" -- " -V --version print version\n" -- " -d -o debug enable debug output (implies -f)\n" -- " -f foreground operation\n" -- " -o max_idle_threads the maximum number of idle worker threads\n" -- " allowed (default: 10)\n"); -+ printf(" -h --help print help\n" -+ " -V --version print version\n" -+ " -d -o debug enable debug output (implies -f)\n" -+ " -f foreground operation\n" -+ " -o max_idle_threads the maximum number of idle worker " -+ "threads\n" -+ " allowed (default: 10)\n"); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-passthrough_ll.patch b/kvm-virtiofsd-Add-passthrough_ll.patch deleted file mode 100644 index 2510551..0000000 --- a/kvm-virtiofsd-Add-passthrough_ll.patch +++ /dev/null @@ -1,1387 +0,0 @@ -From 18ef831cac81a6bd2336c73dda357d9d69f8fd25 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:43 +0100 -Subject: [PATCH 012/116] virtiofsd: Add passthrough_ll -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-9-dgilbert@redhat.com> -Patchwork-id: 93462 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 008/112] virtiofsd: Add passthrough_ll -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -passthrough_ll is one of the examples in the upstream fuse project -and is the main part of our daemon here. It passes through requests -from fuse to the underlying filesystem, using syscalls as directly -as possible. - ->From libfuse fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert - Fixed up 'GPL' to 'GPLv2' as per Dan's comments and consistent - with the 'LICENSE' file in libfuse; patch sent to libfuse to fix - it upstream. -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 7c6b66027241f41720240fc6ee1021cdbd975b2e) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 1338 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 1338 insertions(+) - create mode 100644 tools/virtiofsd/passthrough_ll.c - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -new file mode 100644 -index 0000000..e1a6056 ---- /dev/null -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -0,0 +1,1338 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU GPLv2. -+ See the file COPYING. -+*/ -+ -+/** @file -+ * -+ * This file system mirrors the existing file system hierarchy of the -+ * system, starting at the root file system. This is implemented by -+ * just "passing through" all requests to the corresponding user-space -+ * libc functions. In contrast to passthrough.c and passthrough_fh.c, -+ * this implementation uses the low-level API. Its performance should -+ * be the least bad among the three, but many operations are not -+ * implemented. In particular, it is not possible to remove files (or -+ * directories) because the code necessary to defer actual removal -+ * until the file is not opened anymore would make the example much -+ * more complicated. -+ * -+ * When writeback caching is enabled (-o writeback mount option), it -+ * is only possible to write to files for which the mounting user has -+ * read permissions. This is because the writeback cache requires the -+ * kernel to be able to issue read requests for all files (which the -+ * passthrough filesystem cannot satisfy if it can't read the file in -+ * the underlying filesystem). -+ * -+ * Compile with: -+ * -+ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll -+ * -+ * ## Source code ## -+ * \include passthrough_ll.c -+ */ -+ -+#define _GNU_SOURCE -+#define FUSE_USE_VERSION 31 -+ -+#include "config.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "passthrough_helpers.h" -+ -+/* We are re-using pointers to our `struct lo_inode` and `struct -+ lo_dirp` elements as inodes. This means that we must be able to -+ store uintptr_t values in a fuse_ino_t variable. The following -+ incantation checks this condition at compile time. */ -+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), -+ "fuse_ino_t too small to hold uintptr_t values!"); -+#else -+struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ -+ { unsigned _uintptr_to_must_hold_fuse_ino_t: -+ ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; -+#endif -+ -+struct lo_inode { -+ struct lo_inode *next; /* protected by lo->mutex */ -+ struct lo_inode *prev; /* protected by lo->mutex */ -+ int fd; -+ bool is_symlink; -+ ino_t ino; -+ dev_t dev; -+ uint64_t refcount; /* protected by lo->mutex */ -+}; -+ -+enum { -+ CACHE_NEVER, -+ CACHE_NORMAL, -+ CACHE_ALWAYS, -+}; -+ -+struct lo_data { -+ pthread_mutex_t mutex; -+ int debug; -+ int writeback; -+ int flock; -+ int xattr; -+ const char *source; -+ double timeout; -+ int cache; -+ int timeout_set; -+ struct lo_inode root; /* protected by lo->mutex */ -+}; -+ -+static const struct fuse_opt lo_opts[] = { -+ { "writeback", -+ offsetof(struct lo_data, writeback), 1 }, -+ { "no_writeback", -+ offsetof(struct lo_data, writeback), 0 }, -+ { "source=%s", -+ offsetof(struct lo_data, source), 0 }, -+ { "flock", -+ offsetof(struct lo_data, flock), 1 }, -+ { "no_flock", -+ offsetof(struct lo_data, flock), 0 }, -+ { "xattr", -+ offsetof(struct lo_data, xattr), 1 }, -+ { "no_xattr", -+ offsetof(struct lo_data, xattr), 0 }, -+ { "timeout=%lf", -+ offsetof(struct lo_data, timeout), 0 }, -+ { "timeout=", -+ offsetof(struct lo_data, timeout_set), 1 }, -+ { "cache=never", -+ offsetof(struct lo_data, cache), CACHE_NEVER }, -+ { "cache=auto", -+ offsetof(struct lo_data, cache), CACHE_NORMAL }, -+ { "cache=always", -+ offsetof(struct lo_data, cache), CACHE_ALWAYS }, -+ -+ FUSE_OPT_END -+}; -+ -+static struct lo_data *lo_data(fuse_req_t req) -+{ -+ return (struct lo_data *) fuse_req_userdata(req); -+} -+ -+static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) -+{ -+ if (ino == FUSE_ROOT_ID) -+ return &lo_data(req)->root; -+ else -+ return (struct lo_inode *) (uintptr_t) ino; -+} -+ -+static int lo_fd(fuse_req_t req, fuse_ino_t ino) -+{ -+ return lo_inode(req, ino)->fd; -+} -+ -+static bool lo_debug(fuse_req_t req) -+{ -+ return lo_data(req)->debug != 0; -+} -+ -+static void lo_init(void *userdata, -+ struct fuse_conn_info *conn) -+{ -+ struct lo_data *lo = (struct lo_data*) userdata; -+ -+ if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) -+ conn->want |= FUSE_CAP_EXPORT_SUPPORT; -+ -+ if (lo->writeback && -+ conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -+ if (lo->debug) -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -+ conn->want |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -+ if (lo->debug) -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -+ conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ } -+} -+ -+static void lo_getattr(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) -+{ -+ int res; -+ struct stat buf; -+ struct lo_data *lo = lo_data(req); -+ -+ (void) fi; -+ -+ res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ fuse_reply_attr(req, &buf, lo->timeout); -+} -+ -+static int utimensat_empty_nofollow(struct lo_inode *inode, -+ const struct timespec *tv) -+{ -+ int res; -+ char procname[64]; -+ -+ if (inode->is_symlink) { -+ res = utimensat(inode->fd, "", tv, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1 && errno == EINVAL) { -+ /* Sorry, no race free way to set times on symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ return utimensat(AT_FDCWD, procname, tv, 0); -+} -+ -+static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, -+ int valid, struct fuse_file_info *fi) -+{ -+ int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ int ifd = inode->fd; -+ int res; -+ -+ if (valid & FUSE_SET_ATTR_MODE) { -+ if (fi) { -+ res = fchmod(fi->fh, attr->st_mode); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = chmod(procname, attr->st_mode); -+ } -+ if (res == -1) -+ goto out_err; -+ } -+ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { -+ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? -+ attr->st_uid : (uid_t) -1; -+ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? -+ attr->st_gid : (gid_t) -1; -+ -+ res = fchownat(ifd, "", uid, gid, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ goto out_err; -+ } -+ if (valid & FUSE_SET_ATTR_SIZE) { -+ if (fi) { -+ res = ftruncate(fi->fh, attr->st_size); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = truncate(procname, attr->st_size); -+ } -+ if (res == -1) -+ goto out_err; -+ } -+ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { -+ struct timespec tv[2]; -+ -+ tv[0].tv_sec = 0; -+ tv[1].tv_sec = 0; -+ tv[0].tv_nsec = UTIME_OMIT; -+ tv[1].tv_nsec = UTIME_OMIT; -+ -+ if (valid & FUSE_SET_ATTR_ATIME_NOW) -+ tv[0].tv_nsec = UTIME_NOW; -+ else if (valid & FUSE_SET_ATTR_ATIME) -+ tv[0] = attr->st_atim; -+ -+ if (valid & FUSE_SET_ATTR_MTIME_NOW) -+ tv[1].tv_nsec = UTIME_NOW; -+ else if (valid & FUSE_SET_ATTR_MTIME) -+ tv[1] = attr->st_mtim; -+ -+ if (fi) -+ res = futimens(fi->fh, tv); -+ else -+ res = utimensat_empty_nofollow(inode, tv); -+ if (res == -1) -+ goto out_err; -+ } -+ -+ return lo_getattr(req, ino, fi); -+ -+out_err: -+ saverr = errno; -+ fuse_reply_err(req, saverr); -+} -+ -+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) -+{ -+ struct lo_inode *p; -+ struct lo_inode *ret = NULL; -+ -+ pthread_mutex_lock(&lo->mutex); -+ for (p = lo->root.next; p != &lo->root; p = p->next) { -+ if (p->ino == st->st_ino && p->dev == st->st_dev) { -+ assert(p->refcount > 0); -+ ret = p; -+ ret->refcount++; -+ break; -+ } -+ } -+ pthread_mutex_unlock(&lo->mutex); -+ return ret; -+} -+ -+static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -+ struct fuse_entry_param *e) -+{ -+ int newfd; -+ int res; -+ int saverr; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ -+ memset(e, 0, sizeof(*e)); -+ e->attr_timeout = lo->timeout; -+ e->entry_timeout = lo->timeout; -+ -+ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -+ if (newfd == -1) -+ goto out_err; -+ -+ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ goto out_err; -+ -+ inode = lo_find(lo_data(req), &e->attr); -+ if (inode) { -+ close(newfd); -+ newfd = -1; -+ } else { -+ struct lo_inode *prev, *next; -+ -+ saverr = ENOMEM; -+ inode = calloc(1, sizeof(struct lo_inode)); -+ if (!inode) -+ goto out_err; -+ -+ inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ inode->refcount = 1; -+ inode->fd = newfd; -+ inode->ino = e->attr.st_ino; -+ inode->dev = e->attr.st_dev; -+ -+ pthread_mutex_lock(&lo->mutex); -+ prev = &lo->root; -+ next = prev->next; -+ next->prev = inode; -+ inode->next = next; -+ inode->prev = prev; -+ prev->next = inode; -+ pthread_mutex_unlock(&lo->mutex); -+ } -+ e->ino = (uintptr_t) inode; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long) parent, name, (unsigned long long) e->ino); -+ -+ return 0; -+ -+out_err: -+ saverr = errno; -+ if (newfd != -1) -+ close(newfd); -+ return saverr; -+} -+ -+static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) -+{ -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) -+ fuse_reply_err(req, err); -+ else -+ fuse_reply_entry(req, &e); -+} -+ -+static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, -+ const char *name, mode_t mode, dev_t rdev, -+ const char *link) -+{ -+ int res; -+ int saverr; -+ struct lo_inode *dir = lo_inode(req, parent); -+ struct fuse_entry_param e; -+ -+ saverr = ENOMEM; -+ -+ res = mknod_wrapper(dir->fd, name, link, mode, rdev); -+ -+ saverr = errno; -+ if (res == -1) -+ goto out; -+ -+ saverr = lo_do_lookup(req, parent, name, &e); -+ if (saverr) -+ goto out; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long) parent, name, (unsigned long long) e.ino); -+ -+ fuse_reply_entry(req, &e); -+ return; -+ -+out: -+ fuse_reply_err(req, saverr); -+} -+ -+static void lo_mknod(fuse_req_t req, fuse_ino_t parent, -+ const char *name, mode_t mode, dev_t rdev) -+{ -+ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); -+} -+ -+static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode) -+{ -+ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); -+} -+ -+static void lo_symlink(fuse_req_t req, const char *link, -+ fuse_ino_t parent, const char *name) -+{ -+ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); -+} -+ -+static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, -+ const char *name) -+{ -+ int res; -+ char procname[64]; -+ -+ if (inode->is_symlink) { -+ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); -+ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { -+ /* Sorry, no race free way to hard-link a symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); -+} -+ -+static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, -+ const char *name) -+{ -+ int res; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ struct fuse_entry_param e; -+ int saverr; -+ -+ memset(&e, 0, sizeof(struct fuse_entry_param)); -+ e.attr_timeout = lo->timeout; -+ e.entry_timeout = lo->timeout; -+ -+ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -+ if (res == -1) -+ goto out_err; -+ -+ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) -+ goto out_err; -+ -+ pthread_mutex_lock(&lo->mutex); -+ inode->refcount++; -+ pthread_mutex_unlock(&lo->mutex); -+ e.ino = (uintptr_t) inode; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long) parent, name, -+ (unsigned long long) e.ino); -+ -+ fuse_reply_entry(req, &e); -+ return; -+ -+out_err: -+ saverr = errno; -+ fuse_reply_err(req, saverr); -+} -+ -+static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) -+{ -+ int res; -+ -+ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags) -+{ -+ int res; -+ -+ if (flags) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ res = renameat(lo_fd(req, parent), name, -+ lo_fd(req, newparent), newname); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) -+{ -+ int res; -+ -+ res = unlinkat(lo_fd(req, parent), name, 0); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) -+{ -+ if (!inode) -+ return; -+ -+ pthread_mutex_lock(&lo->mutex); -+ assert(inode->refcount >= n); -+ inode->refcount -= n; -+ if (!inode->refcount) { -+ struct lo_inode *prev, *next; -+ -+ prev = inode->prev; -+ next = inode->next; -+ next->prev = prev; -+ prev->next = next; -+ -+ pthread_mutex_unlock(&lo->mutex); -+ close(inode->fd); -+ free(inode); -+ -+ } else { -+ pthread_mutex_unlock(&lo->mutex); -+ } -+} -+ -+static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -+ (unsigned long long) ino, -+ (unsigned long long) inode->refcount, -+ (unsigned long long) nlookup); -+ } -+ -+ unref_inode(lo, inode, nlookup); -+} -+ -+static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) -+{ -+ lo_forget_one(req, ino, nlookup); -+ fuse_reply_none(req); -+} -+ -+static void lo_forget_multi(fuse_req_t req, size_t count, -+ struct fuse_forget_data *forgets) -+{ -+ int i; -+ -+ for (i = 0; i < count; i++) -+ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); -+ fuse_reply_none(req); -+} -+ -+static void lo_readlink(fuse_req_t req, fuse_ino_t ino) -+{ -+ char buf[PATH_MAX + 1]; -+ int res; -+ -+ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); -+ if (res == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ if (res == sizeof(buf)) -+ return (void) fuse_reply_err(req, ENAMETOOLONG); -+ -+ buf[res] = '\0'; -+ -+ fuse_reply_readlink(req, buf); -+} -+ -+struct lo_dirp { -+ DIR *dp; -+ struct dirent *entry; -+ off_t offset; -+}; -+ -+static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) -+{ -+ return (struct lo_dirp *) (uintptr_t) fi->fh; -+} -+ -+static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ int error = ENOMEM; -+ struct lo_data *lo = lo_data(req); -+ struct lo_dirp *d; -+ int fd; -+ -+ d = calloc(1, sizeof(struct lo_dirp)); -+ if (d == NULL) -+ goto out_err; -+ -+ fd = openat(lo_fd(req, ino), ".", O_RDONLY); -+ if (fd == -1) -+ goto out_errno; -+ -+ d->dp = fdopendir(fd); -+ if (d->dp == NULL) -+ goto out_errno; -+ -+ d->offset = 0; -+ d->entry = NULL; -+ -+ fi->fh = (uintptr_t) d; -+ if (lo->cache == CACHE_ALWAYS) -+ fi->keep_cache = 1; -+ fuse_reply_open(req, fi); -+ return; -+ -+out_errno: -+ error = errno; -+out_err: -+ if (d) { -+ if (fd != -1) -+ close(fd); -+ free(d); -+ } -+ fuse_reply_err(req, error); -+} -+ -+static int is_dot_or_dotdot(const char *name) -+{ -+ return name[0] == '.' && (name[1] == '\0' || -+ (name[1] == '.' && name[2] == '\0')); -+} -+ -+static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi, int plus) -+{ -+ struct lo_dirp *d = lo_dirp(fi); -+ char *buf; -+ char *p; -+ size_t rem = size; -+ int err; -+ -+ (void) ino; -+ -+ buf = calloc(1, size); -+ if (!buf) { -+ err = ENOMEM; -+ goto error; -+ } -+ p = buf; -+ -+ if (offset != d->offset) { -+ seekdir(d->dp, offset); -+ d->entry = NULL; -+ d->offset = offset; -+ } -+ while (1) { -+ size_t entsize; -+ off_t nextoff; -+ const char *name; -+ -+ if (!d->entry) { -+ errno = 0; -+ d->entry = readdir(d->dp); -+ if (!d->entry) { -+ if (errno) { // Error -+ err = errno; -+ goto error; -+ } else { // End of stream -+ break; -+ } -+ } -+ } -+ nextoff = d->entry->d_off; -+ name = d->entry->d_name; -+ fuse_ino_t entry_ino = 0; -+ if (plus) { -+ struct fuse_entry_param e; -+ if (is_dot_or_dotdot(name)) { -+ e = (struct fuse_entry_param) { -+ .attr.st_ino = d->entry->d_ino, -+ .attr.st_mode = d->entry->d_type << 12, -+ }; -+ } else { -+ err = lo_do_lookup(req, ino, name, &e); -+ if (err) -+ goto error; -+ entry_ino = e.ino; -+ } -+ -+ entsize = fuse_add_direntry_plus(req, p, rem, name, -+ &e, nextoff); -+ } else { -+ struct stat st = { -+ .st_ino = d->entry->d_ino, -+ .st_mode = d->entry->d_type << 12, -+ }; -+ entsize = fuse_add_direntry(req, p, rem, name, -+ &st, nextoff); -+ } -+ if (entsize > rem) { -+ if (entry_ino != 0) -+ lo_forget_one(req, entry_ino, 1); -+ break; -+ } -+ -+ p += entsize; -+ rem -= entsize; -+ -+ d->entry = NULL; -+ d->offset = nextoff; -+ } -+ -+ err = 0; -+error: -+ // If there's an error, we can only signal it if we haven't stored -+ // any entries yet - otherwise we'd end up with wrong lookup -+ // counts for the entries that are already in the buffer. So we -+ // return what we've collected until that point. -+ if (err && rem == size) -+ fuse_reply_err(req, err); -+ else -+ fuse_reply_buf(req, buf, size - rem); -+ free(buf); -+} -+ -+static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi) -+{ -+ lo_do_readdir(req, ino, size, offset, fi, 0); -+} -+ -+static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi) -+{ -+ lo_do_readdir(req, ino, size, offset, fi, 1); -+} -+ -+static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ struct lo_dirp *d = lo_dirp(fi); -+ (void) ino; -+ closedir(d->dp); -+ free(d); -+ fuse_reply_err(req, 0); -+} -+ -+static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, struct fuse_file_info *fi) -+{ -+ int fd; -+ struct lo_data *lo = lo_data(req); -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ -+ fd = openat(lo_fd(req, parent), name, -+ (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); -+ if (fd == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) -+ fi->direct_io = 1; -+ else if (lo->cache == CACHE_ALWAYS) -+ fi->keep_cache = 1; -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) -+ fuse_reply_err(req, err); -+ else -+ fuse_reply_create(req, &e, fi); -+} -+ -+static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi) -+{ -+ int res; -+ int fd = dirfd(lo_dirp(fi)->dp); -+ (void) ino; -+ if (datasync) -+ res = fdatasync(fd); -+ else -+ res = fsync(fd); -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ int fd; -+ char buf[64]; -+ struct lo_data *lo = lo_data(req); -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", -+ ino, fi->flags); -+ -+ /* With writeback cache, kernel may send read requests even -+ when userspace opened write-only */ -+ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -+ fi->flags &= ~O_ACCMODE; -+ fi->flags |= O_RDWR; -+ } -+ -+ /* With writeback cache, O_APPEND is handled by the kernel. -+ This breaks atomicity (since the file may change in the -+ underlying filesystem, so that the kernel's idea of the -+ end of the file isn't accurate anymore). In this example, -+ we just accept that. A more rigorous filesystem may want -+ to return an error here */ -+ if (lo->writeback && (fi->flags & O_APPEND)) -+ fi->flags &= ~O_APPEND; -+ -+ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ fd = open(buf, fi->flags & ~O_NOFOLLOW); -+ if (fd == -1) -+ return (void) fuse_reply_err(req, errno); -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) -+ fi->direct_io = 1; -+ else if (lo->cache == CACHE_ALWAYS) -+ fi->keep_cache = 1; -+ fuse_reply_open(req, fi); -+} -+ -+static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ (void) ino; -+ -+ close(fi->fh); -+ fuse_reply_err(req, 0); -+} -+ -+static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+{ -+ int res; -+ (void) ino; -+ res = close(dup(fi->fh)); -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi) -+{ -+ int res; -+ (void) ino; -+ if (datasync) -+ res = fdatasync(fi->fh); -+ else -+ res = fsync(fi->fh); -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, -+ off_t offset, struct fuse_file_info *fi) -+{ -+ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " -+ "off=%lu)\n", ino, size, (unsigned long) offset); -+ -+ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ buf.buf[0].fd = fi->fh; -+ buf.buf[0].pos = offset; -+ -+ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); -+} -+ -+static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_bufvec *in_buf, off_t off, -+ struct fuse_file_info *fi) -+{ -+ (void) ino; -+ ssize_t res; -+ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ -+ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ out_buf.buf[0].fd = fi->fh; -+ out_buf.buf[0].pos = off; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", -+ ino, out_buf.buf[0].size, (unsigned long) off); -+ -+ res = fuse_buf_copy(&out_buf, in_buf, 0); -+ if(res < 0) -+ fuse_reply_err(req, -res); -+ else -+ fuse_reply_write(req, (size_t) res); -+} -+ -+static void lo_statfs(fuse_req_t req, fuse_ino_t ino) -+{ -+ int res; -+ struct statvfs stbuf; -+ -+ res = fstatvfs(lo_fd(req, ino), &stbuf); -+ if (res == -1) -+ fuse_reply_err(req, errno); -+ else -+ fuse_reply_statfs(req, &stbuf); -+} -+ -+static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, -+ off_t offset, off_t length, struct fuse_file_info *fi) -+{ -+ int err = EOPNOTSUPP; -+ (void) ino; -+ -+#ifdef HAVE_FALLOCATE -+ err = fallocate(fi->fh, mode, offset, length); -+ if (err < 0) -+ err = errno; -+ -+#elif defined(HAVE_POSIX_FALLOCATE) -+ if (mode) { -+ fuse_reply_err(req, EOPNOTSUPP); -+ return; -+ } -+ -+ err = posix_fallocate(fi->fh, offset, length); -+#endif -+ -+ fuse_reply_err(req, err); -+} -+ -+static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ int op) -+{ -+ int res; -+ (void) ino; -+ -+ res = flock(fi->fh, op); -+ -+ fuse_reply_err(req, res == -1 ? errno : 0); -+} -+ -+static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -+ size_t size) -+{ -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", -+ ino, name, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to getxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) -+ goto out_err; -+ -+ ret = getxattr(procname, name, value, size); -+ if (ret == -1) -+ goto out_err; -+ saverr = 0; -+ if (ret == 0) -+ goto out; -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = getxattr(procname, name, NULL, 0); -+ if (ret == -1) -+ goto out_err; -+ -+ fuse_reply_xattr(req, ret); -+ } -+out_free: -+ free(value); -+ return; -+ -+out_err: -+ saverr = errno; -+out: -+ fuse_reply_err(req, saverr); -+ goto out_free; -+} -+ -+static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) -+{ -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -+ ino, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to listxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) -+ goto out_err; -+ -+ ret = listxattr(procname, value, size); -+ if (ret == -1) -+ goto out_err; -+ saverr = 0; -+ if (ret == 0) -+ goto out; -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = listxattr(procname, NULL, 0); -+ if (ret == -1) -+ goto out_err; -+ -+ fuse_reply_xattr(req, ret); -+ } -+out_free: -+ free(value); -+ return; -+ -+out_err: -+ saverr = errno; -+out: -+ fuse_reply_err(req, saverr); -+ goto out_free; -+} -+ -+static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -+ const char *value, size_t size, int flags) -+{ -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -+ ino, name, value, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ ret = setxattr(procname, name, value, size, flags); -+ saverr = ret == -1 ? errno : 0; -+ -+out: -+ fuse_reply_err(req, saverr); -+} -+ -+static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) -+{ -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) -+ goto out; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -+ ino, name); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ ret = removexattr(procname, name); -+ saverr = ret == -1 ? errno : 0; -+ -+out: -+ fuse_reply_err(req, saverr); -+} -+ -+#ifdef HAVE_COPY_FILE_RANGE -+static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, -+ struct fuse_file_info *fi_in, -+ fuse_ino_t ino_out, off_t off_out, -+ struct fuse_file_info *fi_out, size_t len, -+ int flags) -+{ -+ ssize_t res; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, size=%zd, flags=0x%x)\n", -+ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, -+ len, flags); -+ -+ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, -+ flags); -+ if (res < 0) -+ fuse_reply_err(req, -errno); -+ else -+ fuse_reply_write(req, res); -+} -+#endif -+ -+static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -+ struct fuse_file_info *fi) -+{ -+ off_t res; -+ -+ (void)ino; -+ res = lseek(fi->fh, off, whence); -+ if (res != -1) -+ fuse_reply_lseek(req, res); -+ else -+ fuse_reply_err(req, errno); -+} -+ -+static struct fuse_lowlevel_ops lo_oper = { -+ .init = lo_init, -+ .lookup = lo_lookup, -+ .mkdir = lo_mkdir, -+ .mknod = lo_mknod, -+ .symlink = lo_symlink, -+ .link = lo_link, -+ .unlink = lo_unlink, -+ .rmdir = lo_rmdir, -+ .rename = lo_rename, -+ .forget = lo_forget, -+ .forget_multi = lo_forget_multi, -+ .getattr = lo_getattr, -+ .setattr = lo_setattr, -+ .readlink = lo_readlink, -+ .opendir = lo_opendir, -+ .readdir = lo_readdir, -+ .readdirplus = lo_readdirplus, -+ .releasedir = lo_releasedir, -+ .fsyncdir = lo_fsyncdir, -+ .create = lo_create, -+ .open = lo_open, -+ .release = lo_release, -+ .flush = lo_flush, -+ .fsync = lo_fsync, -+ .read = lo_read, -+ .write_buf = lo_write_buf, -+ .statfs = lo_statfs, -+ .fallocate = lo_fallocate, -+ .flock = lo_flock, -+ .getxattr = lo_getxattr, -+ .listxattr = lo_listxattr, -+ .setxattr = lo_setxattr, -+ .removexattr = lo_removexattr, -+#ifdef HAVE_COPY_FILE_RANGE -+ .copy_file_range = lo_copy_file_range, -+#endif -+ .lseek = lo_lseek, -+}; -+ -+int main(int argc, char *argv[]) -+{ -+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -+ struct fuse_session *se; -+ struct fuse_cmdline_opts opts; -+ struct lo_data lo = { .debug = 0, -+ .writeback = 0 }; -+ int ret = -1; -+ -+ /* Don't mask creation mode, kernel already did that */ -+ umask(0); -+ -+ pthread_mutex_init(&lo.mutex, NULL); -+ lo.root.next = lo.root.prev = &lo.root; -+ lo.root.fd = -1; -+ lo.cache = CACHE_NORMAL; -+ -+ if (fuse_parse_cmdline(&args, &opts) != 0) -+ return 1; -+ if (opts.show_help) { -+ printf("usage: %s [options] \n\n", argv[0]); -+ fuse_cmdline_help(); -+ fuse_lowlevel_help(); -+ ret = 0; -+ goto err_out1; -+ } else if (opts.show_version) { -+ printf("FUSE library version %s\n", fuse_pkgversion()); -+ fuse_lowlevel_version(); -+ ret = 0; -+ goto err_out1; -+ } -+ -+ if(opts.mountpoint == NULL) { -+ printf("usage: %s [options] \n", argv[0]); -+ printf(" %s --help\n", argv[0]); -+ ret = 1; -+ goto err_out1; -+ } -+ -+ if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) -+ return 1; -+ -+ lo.debug = opts.debug; -+ lo.root.refcount = 2; -+ if (lo.source) { -+ struct stat stat; -+ int res; -+ -+ res = lstat(lo.source, &stat); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", -+ lo.source); -+ exit(1); -+ } -+ if (!S_ISDIR(stat.st_mode)) { -+ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); -+ exit(1); -+ } -+ -+ } else { -+ lo.source = "/"; -+ } -+ lo.root.is_symlink = false; -+ if (!lo.timeout_set) { -+ switch (lo.cache) { -+ case CACHE_NEVER: -+ lo.timeout = 0.0; -+ break; -+ -+ case CACHE_NORMAL: -+ lo.timeout = 1.0; -+ break; -+ -+ case CACHE_ALWAYS: -+ lo.timeout = 86400.0; -+ break; -+ } -+ } else if (lo.timeout < 0) { -+ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", -+ lo.timeout); -+ exit(1); -+ } -+ -+ lo.root.fd = open(lo.source, O_PATH); -+ if (lo.root.fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", -+ lo.source); -+ exit(1); -+ } -+ -+ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); -+ if (se == NULL) -+ goto err_out1; -+ -+ if (fuse_set_signal_handlers(se) != 0) -+ goto err_out2; -+ -+ if (fuse_session_mount(se, opts.mountpoint) != 0) -+ goto err_out3; -+ -+ fuse_daemonize(opts.foreground); -+ -+ /* Block until ctrl+c or fusermount -u */ -+ if (opts.singlethread) -+ ret = fuse_session_loop(se); -+ else -+ ret = fuse_session_loop_mt(se, opts.clone_fd); -+ -+ fuse_session_unmount(se); -+err_out3: -+ fuse_remove_signal_handlers(se); -+err_out2: -+ fuse_session_destroy(se); -+err_out1: -+ free(opts.mountpoint); -+ fuse_opt_free_args(&args); -+ -+ if (lo.root.fd >= 0) -+ close(lo.root.fd); -+ -+ return ret ? 1 : 0; -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch b/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch deleted file mode 100644 index cef537a..0000000 --- a/kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 52e93f2dc499ead339bf808dac3480b369dfadd1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:39 +0100 -Subject: [PATCH 068/116] virtiofsd: Add timestamp to the log with - FUSE_LOG_DEBUG level -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-65-dgilbert@redhat.com> -Patchwork-id: 93517 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 064/112] virtiofsd: Add timestamp to the log with FUSE_LOG_DEBUG level -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -virtiofsd has some threads, so we see a lot of logs with debug option. -It would be useful for debugging if we can see the timestamp. - -Add nano second timestamp, which got by get_clock(), to the log with -FUSE_LOG_DEBUG level if the syslog option isn't set. - -The log is like as: - - # ./virtiofsd -d -o vhost_user_socket=/tmp/vhostqemu0 -o source=/tmp/share0 -o cache=auto - ... - [5365943125463727] [ID: 00000002] fv_queue_thread: Start for queue 0 kick_fd 9 - [5365943125568644] [ID: 00000002] fv_queue_thread: Waiting for Queue 0 event - [5365943125573561] [ID: 00000002] fv_queue_thread: Got queue event on Queue 0 - -Signed-off-by: Masayoshi Mizuma -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 50fb955aa0e6ede929422146936cf68bf1ca876f) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index f08324f..98114a3 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -36,6 +36,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/timer.h" - #include "fuse_virtio.h" - #include "fuse_log.h" - #include "fuse_lowlevel.h" -@@ -2276,7 +2277,13 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - } - - if (current_log_level == FUSE_LOG_DEBUG) { -- localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), fmt); -+ if (!use_syslog) { -+ localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", -+ get_clock(), syscall(__NR_gettid), fmt); -+ } else { -+ localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), -+ fmt); -+ } - fmt = localfmt; - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch b/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch deleted file mode 100644 index 4713a0d..0000000 --- a/kvm-virtiofsd-Clean-up-inodes-on-destroy.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 2b921f7162b53204051955228bf99bbed55d2457 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:53 +0100 -Subject: [PATCH 082/116] virtiofsd: Clean up inodes on destroy -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-79-dgilbert@redhat.com> -Patchwork-id: 93532 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 078/112] virtiofsd: Clean up inodes on destroy -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Clear out our inodes and fd's on a 'destroy' - so we get rid -of them if we reboot the guest. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 771b01eb76ff480fee984bd1d21727147cc3e702) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 26 ++++++++++++++++++++++++++ - 1 file changed, 26 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index b176a31..9ed77a1 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1169,6 +1169,25 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - } - -+static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) -+{ -+ struct lo_inode *inode = value; -+ struct lo_data *lo = user_data; -+ -+ inode->refcount = 0; -+ lo_map_remove(&lo->ino_map, inode->fuse_ino); -+ close(inode->fd); -+ -+ return TRUE; -+} -+ -+static void unref_all_inodes(struct lo_data *lo) -+{ -+ pthread_mutex_lock(&lo->mutex); -+ g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); -+ pthread_mutex_unlock(&lo->mutex); -+} -+ - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -@@ -2035,6 +2054,12 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, - } - } - -+static void lo_destroy(void *userdata) -+{ -+ struct lo_data *lo = (struct lo_data *)userdata; -+ unref_all_inodes(lo); -+} -+ - static struct fuse_lowlevel_ops lo_oper = { - .init = lo_init, - .lookup = lo_lookup, -@@ -2073,6 +2098,7 @@ static struct fuse_lowlevel_ops lo_oper = { - .copy_file_range = lo_copy_file_range, - #endif - .lseek = lo_lseek, -+ .destroy = lo_destroy, - }; - - /* Print vhost-user.json backend program capabilities */ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch b/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch deleted file mode 100644 index c421365..0000000 --- a/kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 24f91062f571ad2dd2ac22db3b7d456a2c8bd2cb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:23 +0100 -Subject: [PATCH 112/116] virtiofsd: Convert lo_destroy to take the lo->mutex - lock itself -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-109-dgilbert@redhat.com> -Patchwork-id: 93563 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 108/112] virtiofsd: Convert lo_destroy to take the lo->mutex lock itself -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -lo_destroy was relying on some implicit knowledge of the locking; -we can avoid this if we create an unref_inode that doesn't take -the lock and then grab it for the whole of the lo_destroy. - -Suggested-by: Vivek Goyal -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fe4c15798a48143dd6b1f58d2d3cad12206ce211) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 31 +++++++++++++++++-------------- - 1 file changed, 17 insertions(+), 14 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index eb001b9..fc15d61 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1344,14 +1344,13 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - lo_inode_put(lo, &inode); - } - --static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -- uint64_t n) -+/* To be called with lo->mutex held */ -+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - { - if (!inode) { - return; - } - -- pthread_mutex_lock(&lo->mutex); - assert(inode->nlookup >= n); - inode->nlookup -= n; - if (!inode->nlookup) { -@@ -1362,15 +1361,24 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - g_hash_table_destroy(inode->posix_locks); - pthread_mutex_destroy(&inode->plock_mutex); -- pthread_mutex_unlock(&lo->mutex); - - /* Drop our refcount from lo_do_lookup() */ - lo_inode_put(lo, &inode); -- } else { -- pthread_mutex_unlock(&lo->mutex); - } - } - -+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -+ uint64_t n) -+{ -+ if (!inode) { -+ return; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ unref_inode(lo, inode, n); -+ pthread_mutex_unlock(&lo->mutex); -+} -+ - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -@@ -2458,13 +2466,7 @@ static void lo_destroy(void *userdata) - { - struct lo_data *lo = (struct lo_data *)userdata; - -- /* -- * Normally lo->mutex must be taken when traversing lo->inodes but -- * lo_destroy() is a serialized request so no races are possible here. -- * -- * In addition, we cannot acquire lo->mutex since unref_inode() takes it -- * too and this would result in a recursive lock. -- */ -+ pthread_mutex_lock(&lo->mutex); - while (true) { - GHashTableIter iter; - gpointer key, value; -@@ -2475,8 +2477,9 @@ static void lo_destroy(void *userdata) - } - - struct lo_inode *inode = value; -- unref_inode_lolocked(lo, inode, inode->nlookup); -+ unref_inode(lo, inode, inode->nlookup); - } -+ pthread_mutex_unlock(&lo->mutex); - } - - static struct fuse_lowlevel_ops lo_oper = { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch b/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch deleted file mode 100644 index 9f198c2..0000000 --- a/kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch +++ /dev/null @@ -1,176 +0,0 @@ -From e217ab392e0d4c770ec18dbfbe986771773cb557 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:33 +0100 -Subject: [PATCH 062/116] virtiofsd: Drop CAP_FSETID if client asked for it -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-59-dgilbert@redhat.com> -Patchwork-id: 93513 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 058/112] virtiofsd: Drop CAP_FSETID if client asked for it -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -If client requested killing setuid/setgid bits on file being written, drop -CAP_FSETID capability so that setuid/setgid bits are cleared upon write -automatically. - -pjdfstest chown/12.t needs this. - -Signed-off-by: Vivek Goyal - dgilbert: reworked for libcap-ng -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ee88465224b3aed2596049caa28f86cbe0d5a3d0) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 105 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 97e7c75..d53cb1e 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -201,6 +201,91 @@ static int load_capng(void) - return 0; - } - -+/* -+ * Helpers for dropping and regaining effective capabilities. Returns 0 -+ * on success, error otherwise -+ */ -+static int drop_effective_cap(const char *cap_name, bool *cap_dropped) -+{ -+ int cap, ret; -+ -+ cap = capng_name_to_capability(cap_name); -+ if (cap < 0) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", -+ cap_name, strerror(errno)); -+ goto out; -+ } -+ -+ if (load_capng()) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); -+ goto out; -+ } -+ -+ /* We dont have this capability in effective set already. */ -+ if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { -+ ret = 0; -+ goto out; -+ } -+ -+ if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); -+ goto out; -+ } -+ -+ if (capng_apply(CAPNG_SELECT_CAPS)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); -+ goto out; -+ } -+ -+ ret = 0; -+ if (cap_dropped) { -+ *cap_dropped = true; -+ } -+ -+out: -+ return ret; -+} -+ -+static int gain_effective_cap(const char *cap_name) -+{ -+ int cap; -+ int ret = 0; -+ -+ cap = capng_name_to_capability(cap_name); -+ if (cap < 0) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", -+ cap_name, strerror(errno)); -+ goto out; -+ } -+ -+ if (load_capng()) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); -+ goto out; -+ } -+ -+ if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); -+ goto out; -+ } -+ -+ if (capng_apply(CAPNG_SELECT_CAPS)) { -+ ret = errno; -+ fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); -+ goto out; -+ } -+ ret = 0; -+ -+out: -+ return ret; -+} -+ - static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; -@@ -1577,6 +1662,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - (void)ino; - ssize_t res; - struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ bool cap_fsetid_dropped = false; - - out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; - out_buf.buf[0].fd = lo_fi_fd(req, fi); -@@ -1588,12 +1674,31 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - out_buf.buf[0].size, (unsigned long)off); - } - -+ /* -+ * If kill_priv is set, drop CAP_FSETID which should lead to kernel -+ * clearing setuid/setgid on file. -+ */ -+ if (fi->kill_priv) { -+ res = drop_effective_cap("FSETID", &cap_fsetid_dropped); -+ if (res != 0) { -+ fuse_reply_err(req, res); -+ return; -+ } -+ } -+ - res = fuse_buf_copy(&out_buf, in_buf); - if (res < 0) { - fuse_reply_err(req, -res); - } else { - fuse_reply_write(req, (size_t)res); - } -+ -+ if (cap_fsetid_dropped) { -+ res = gain_effective_cap("FSETID"); -+ if (res) { -+ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); -+ } -+ } - } - - static void lo_statfs(fuse_req_t req, fuse_ino_t ino) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fast-path-for-virtio-read.patch b/kvm-virtiofsd-Fast-path-for-virtio-read.patch deleted file mode 100644 index 03874ce..0000000 --- a/kvm-virtiofsd-Fast-path-for-virtio-read.patch +++ /dev/null @@ -1,240 +0,0 @@ -From 7d2efc3e4af15eff57b0c38cff7c81b371a98303 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:06 +0100 -Subject: [PATCH 035/116] virtiofsd: Fast path for virtio read -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-32-dgilbert@redhat.com> -Patchwork-id: 93480 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 031/112] virtiofsd: Fast path for virtio read -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Readv the data straight into the guests buffer. - -Signed-off-by: Dr. David Alan Gilbert -With fix by: -Signed-off-by: Eryu Guan -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit eb49d187ef5134483a34c970bbfece28aaa686a7) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 5 ++ - tools/virtiofsd/fuse_virtio.c | 162 ++++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_virtio.h | 4 + - 3 files changed, 171 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 380d93b..4f4684d 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -475,6 +475,11 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - return fuse_send_msg(se, ch, iov, iov_count); - } - -+ if (fuse_lowlevel_is_virtio(se) && buf->count == 1 && -+ buf->buf[0].flags == (FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK)) { -+ return virtio_send_data_iov(se, ch, iov, iov_count, buf, len); -+ } -+ - abort(); /* Will have taken vhost path */ - return 0; - } -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index f1adeb6..7e2711b 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -230,6 +230,168 @@ err: - return ret; - } - -+/* -+ * Callback from fuse_send_data_iov_* when it's virtio and the buffer -+ * is a single FD with FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK -+ * We need send the iov and then the buffer. -+ * Return 0 on success -+ */ -+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count, struct fuse_bufvec *buf, -+ size_t len) -+{ -+ int ret = 0; -+ VuVirtqElement *elem; -+ VuVirtq *q; -+ -+ assert(count >= 1); -+ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -+ -+ struct fuse_out_header *out = iov[0].iov_base; -+ /* TODO: Endianness! */ -+ -+ size_t iov_len = iov_size(iov, count); -+ size_t tosend_len = iov_len + len; -+ -+ out->len = tosend_len; -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: count=%d len=%zd iov_len=%zd\n", __func__, -+ count, len, iov_len); -+ -+ /* unique == 0 is notification which we don't support */ -+ assert(out->unique); -+ -+ /* For virtio we always have ch */ -+ assert(ch); -+ assert(!ch->qi->reply_sent); -+ elem = ch->qi->qe; -+ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ -+ /* The 'in' part of the elem is to qemu */ -+ unsigned int in_num = elem->in_num; -+ struct iovec *in_sg = elem->in_sg; -+ size_t in_len = iov_size(in_sg, in_num); -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", -+ __func__, elem->index, in_num, in_len); -+ -+ /* -+ * The elem should have room for a 'fuse_out_header' (out from fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (in_len < sizeof(struct fuse_out_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", -+ __func__, elem->index); -+ ret = E2BIG; -+ goto err; -+ } -+ if (in_len < tosend_len) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", -+ __func__, elem->index, tosend_len); -+ ret = E2BIG; -+ goto err; -+ } -+ -+ /* TODO: Limit to 'len' */ -+ -+ /* First copy the header data from iov->in_sg */ -+ copy_iov(iov, count, in_sg, in_num, iov_len); -+ -+ /* -+ * Build a copy of the the in_sg iov so we can skip bits in it, -+ * including changing the offsets -+ */ -+ struct iovec *in_sg_cpy = calloc(sizeof(struct iovec), in_num); -+ assert(in_sg_cpy); -+ memcpy(in_sg_cpy, in_sg, sizeof(struct iovec) * in_num); -+ /* These get updated as we skip */ -+ struct iovec *in_sg_ptr = in_sg_cpy; -+ int in_sg_cpy_count = in_num; -+ -+ /* skip over parts of in_sg that contained the header iov */ -+ size_t skip_size = iov_len; -+ -+ size_t in_sg_left = 0; -+ do { -+ while (skip_size != 0 && in_sg_cpy_count) { -+ if (skip_size >= in_sg_ptr[0].iov_len) { -+ skip_size -= in_sg_ptr[0].iov_len; -+ in_sg_ptr++; -+ in_sg_cpy_count--; -+ } else { -+ in_sg_ptr[0].iov_len -= skip_size; -+ in_sg_ptr[0].iov_base += skip_size; -+ break; -+ } -+ } -+ -+ int i; -+ for (i = 0, in_sg_left = 0; i < in_sg_cpy_count; i++) { -+ in_sg_left += in_sg_ptr[i].iov_len; -+ } -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: after skip skip_size=%zd in_sg_cpy_count=%d " -+ "in_sg_left=%zd\n", -+ __func__, skip_size, in_sg_cpy_count, in_sg_left); -+ ret = preadv(buf->buf[0].fd, in_sg_ptr, in_sg_cpy_count, -+ buf->buf[0].pos); -+ -+ if (ret == -1) { -+ ret = errno; -+ fuse_log(FUSE_LOG_DEBUG, "%s: preadv failed (%m) len=%zd\n", -+ __func__, len); -+ free(in_sg_cpy); -+ goto err; -+ } -+ fuse_log(FUSE_LOG_DEBUG, "%s: preadv ret=%d len=%zd\n", __func__, -+ ret, len); -+ if (ret < len && ret) { -+ fuse_log(FUSE_LOG_DEBUG, "%s: ret < len\n", __func__); -+ /* Skip over this much next time around */ -+ skip_size = ret; -+ buf->buf[0].pos += ret; -+ len -= ret; -+ -+ /* Lets do another read */ -+ continue; -+ } -+ if (!ret) { -+ /* EOF case? */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: !ret in_sg_left=%zd\n", __func__, -+ in_sg_left); -+ break; -+ } -+ if (ret != len) { -+ fuse_log(FUSE_LOG_DEBUG, "%s: ret!=len\n", __func__); -+ ret = EIO; -+ free(in_sg_cpy); -+ goto err; -+ } -+ in_sg_left -= ret; -+ len -= ret; -+ } while (in_sg_left); -+ free(in_sg_cpy); -+ -+ /* Need to fix out->len on EOF */ -+ if (len) { -+ struct fuse_out_header *out_sg = in_sg[0].iov_base; -+ -+ tosend_len -= len; -+ out_sg->len = tosend_len; -+ } -+ -+ ret = 0; -+ -+ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -+ vu_queue_notify(&se->virtio_dev->dev, q); -+ -+err: -+ if (ret == 0) { -+ ch->qi->reply_sent = true; -+ } -+ -+ return ret; -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index 135a148..cc676b9 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -26,4 +26,8 @@ int virtio_loop(struct fuse_session *se); - int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count); - -+int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count, -+ struct fuse_bufvec *buf, size_t len); -+ - #endif --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch b/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch deleted file mode 100644 index 12bb9a2..0000000 --- a/kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 6d41fc549198e140f38fddcb02975098df040ae1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:50 +0100 -Subject: [PATCH 019/116] virtiofsd: Fix common header and define for QEMU - builds -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-16-dgilbert@redhat.com> -Patchwork-id: 93470 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 015/112] virtiofsd: Fix common header and define for QEMU builds -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -All of the fuse files include config.h and define GNU_SOURCE -where we don't have either under our build - remove them. -Fixup path to the kernel's fuse.h in the QEMUs world. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 09863ebc7e32a107235b3c815ad54d26cc64f07a) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 4 +--- - tools/virtiofsd/fuse_i.h | 3 +++ - tools/virtiofsd/fuse_log.c | 1 + - tools/virtiofsd/fuse_lowlevel.c | 6 ++---- - tools/virtiofsd/fuse_opt.c | 2 +- - tools/virtiofsd/fuse_signals.c | 2 +- - tools/virtiofsd/helper.c | 1 + - tools/virtiofsd/passthrough_ll.c | 8 ++------ - 8 files changed, 12 insertions(+), 15 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 4d507f3..772efa9 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -9,9 +9,7 @@ - * See the file COPYING.LIB - */ - --#define _GNU_SOURCE -- --#include "config.h" -+#include "qemu/osdep.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - #include -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index e63cb58..bae0699 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -6,6 +6,9 @@ - * See the file COPYING.LIB - */ - -+#define FUSE_USE_VERSION 31 -+ -+ - #include "fuse.h" - #include "fuse_lowlevel.h" - -diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c -index 11345f9..c301ff6 100644 ---- a/tools/virtiofsd/fuse_log.c -+++ b/tools/virtiofsd/fuse_log.c -@@ -8,6 +8,7 @@ - * See the file COPYING.LIB - */ - -+#include "qemu/osdep.h" - #include "fuse_log.h" - - #include -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 3da80de..07fb8a6 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -9,11 +9,9 @@ - * See the file COPYING.LIB - */ - --#define _GNU_SOURCE -- --#include "config.h" -+#include "qemu/osdep.h" - #include "fuse_i.h" --#include "fuse_kernel.h" -+#include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" - -diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c -index edd36f4..2892236 100644 ---- a/tools/virtiofsd/fuse_opt.c -+++ b/tools/virtiofsd/fuse_opt.c -@@ -9,8 +9,8 @@ - * See the file COPYING.LIB - */ - -+#include "qemu/osdep.h" - #include "fuse_opt.h" --#include "config.h" - #include "fuse_i.h" - #include "fuse_misc.h" - -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -index 19d6791..dc7c8ac 100644 ---- a/tools/virtiofsd/fuse_signals.c -+++ b/tools/virtiofsd/fuse_signals.c -@@ -8,7 +8,7 @@ - * See the file COPYING.LIB - */ - --#include "config.h" -+#include "qemu/osdep.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index d9227d7..9333691 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -10,6 +10,7 @@ - * See the file COPYING.LIB. - */ - -+#include "qemu/osdep.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - #include "fuse_misc.h" -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 126a56c..322a889 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -35,15 +35,11 @@ - * \include passthrough_ll.c - */ - --#define _GNU_SOURCE --#define FUSE_USE_VERSION 31 -- --#include "config.h" -- -+#include "qemu/osdep.h" -+#include "fuse_lowlevel.h" - #include - #include - #include --#include - #include - #include - #include --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch b/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch deleted file mode 100644 index f929bab..0000000 --- a/kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch +++ /dev/null @@ -1,136 +0,0 @@ -From 9b5fbc95a287b2ce9448142194b161d8360d5e4e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:15 +0100 -Subject: [PATCH 104/116] virtiofsd: Fix data corruption with O_APPEND write in - writeback mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-101-dgilbert@redhat.com> -Patchwork-id: 93556 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 100/112] virtiofsd: Fix data corruption with O_APPEND write in writeback mode -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Misono Tomohiro - -When writeback mode is enabled (-o writeback), O_APPEND handling is -done in kernel. Therefore virtiofsd clears O_APPEND flag when open. -Otherwise O_APPEND flag takes precedence over pwrite() and write -data may corrupt. - -Currently clearing O_APPEND flag is done in lo_open(), but we also -need the same operation in lo_create(). So, factor out the flag -update operation in lo_open() to update_open_flags() and call it -in both lo_open() and lo_create(). - -This fixes the failure of xfstest generic/069 in writeback mode -(which tests O_APPEND write data integrity). - -Signed-off-by: Misono Tomohiro -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8e4e41e39eac5ee5f378d66f069a2f70a1734317) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 66 ++++++++++++++++++++-------------------- - 1 file changed, 33 insertions(+), 33 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 948cb19..4c61ac5 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1692,6 +1692,37 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, - fuse_reply_err(req, 0); - } - -+static void update_open_flags(int writeback, struct fuse_file_info *fi) -+{ -+ /* -+ * With writeback cache, kernel may send read requests even -+ * when userspace opened write-only -+ */ -+ if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -+ fi->flags &= ~O_ACCMODE; -+ fi->flags |= O_RDWR; -+ } -+ -+ /* -+ * With writeback cache, O_APPEND is handled by the kernel. -+ * This breaks atomicity (since the file may change in the -+ * underlying filesystem, so that the kernel's idea of the -+ * end of the file isn't accurate anymore). In this example, -+ * we just accept that. A more rigorous filesystem may want -+ * to return an error here -+ */ -+ if (writeback && (fi->flags & O_APPEND)) { -+ fi->flags &= ~O_APPEND; -+ } -+ -+ /* -+ * O_DIRECT in guest should not necessarily mean bypassing page -+ * cache on host as well. If somebody needs that behavior, it -+ * probably should be a configuration knob in daemon. -+ */ -+ fi->flags &= ~O_DIRECT; -+} -+ - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode, struct fuse_file_info *fi) - { -@@ -1721,12 +1752,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out; - } - -- /* -- * O_DIRECT in guest should not necessarily mean bypassing page -- * cache on host as well. If somebody needs that behavior, it -- * probably should be a configuration knob in daemon. -- */ -- fi->flags &= ~O_DIRECT; -+ update_open_flags(lo->writeback, fi); - - fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); -@@ -1936,33 +1962,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, - fi->flags); - -- /* -- * With writeback cache, kernel may send read requests even -- * when userspace opened write-only -- */ -- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -- fi->flags &= ~O_ACCMODE; -- fi->flags |= O_RDWR; -- } -- -- /* -- * With writeback cache, O_APPEND is handled by the kernel. -- * This breaks atomicity (since the file may change in the -- * underlying filesystem, so that the kernel's idea of the -- * end of the file isn't accurate anymore). In this example, -- * we just accept that. A more rigorous filesystem may want -- * to return an error here -- */ -- if (lo->writeback && (fi->flags & O_APPEND)) { -- fi->flags &= ~O_APPEND; -- } -- -- /* -- * O_DIRECT in guest should not necessarily mean bypassing page -- * cache on host as well. If somebody needs that behavior, it -- * probably should be a configuration knob in daemon. -- */ -- fi->flags &= ~O_DIRECT; -+ update_open_flags(lo->writeback, fi); - - sprintf(buf, "%i", lo_fd(req, ino)); - fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch b/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch deleted file mode 100644 index 306c183..0000000 --- a/kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 9f726593bc3acbc247876dcc4d79fbf046958003 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:49 +0100 -Subject: [PATCH 018/116] virtiofsd: Fix fuse_daemonize ignored return values -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-15-dgilbert@redhat.com> -Patchwork-id: 93469 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 014/112] virtiofsd: Fix fuse_daemonize ignored return values -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -QEMU's compiler enables warnings/errors for ignored values -and the (void) trick used in the fuse code isn't enough. -Turn all the return values into a return value on the function. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 30d8e49760712d65697ea517c53671bd1d214fc7) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 33 ++++++++++++++++++++++----------- - 1 file changed, 22 insertions(+), 11 deletions(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5e6f205..d9227d7 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -10,12 +10,10 @@ - * See the file COPYING.LIB. - */ - --#include "config.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" - #include "fuse_misc.h" - #include "fuse_opt.h" --#include "mount_util.h" - - #include - #include -@@ -171,6 +169,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - - int fuse_daemonize(int foreground) - { -+ int ret = 0, rett; - if (!foreground) { - int nullfd; - int waiter[2]; -@@ -192,8 +191,8 @@ int fuse_daemonize(int foreground) - case 0: - break; - default: -- (void)read(waiter[0], &completed, sizeof(completed)); -- _exit(0); -+ _exit(read(waiter[0], &completed, -+ sizeof(completed) != sizeof(completed))); - } - - if (setsid() == -1) { -@@ -201,13 +200,22 @@ int fuse_daemonize(int foreground) - return -1; - } - -- (void)chdir("/"); -+ ret = chdir("/"); - - nullfd = open("/dev/null", O_RDWR, 0); - if (nullfd != -1) { -- (void)dup2(nullfd, 0); -- (void)dup2(nullfd, 1); -- (void)dup2(nullfd, 2); -+ rett = dup2(nullfd, 0); -+ if (!ret) { -+ ret = rett; -+ } -+ rett = dup2(nullfd, 1); -+ if (!ret) { -+ ret = rett; -+ } -+ rett = dup2(nullfd, 2); -+ if (!ret) { -+ ret = rett; -+ } - if (nullfd > 2) { - close(nullfd); - } -@@ -215,13 +223,16 @@ int fuse_daemonize(int foreground) - - /* Propagate completion of daemon initialization */ - completed = 1; -- (void)write(waiter[1], &completed, sizeof(completed)); -+ rett = write(waiter[1], &completed, sizeof(completed)); -+ if (!ret) { -+ ret = rett; -+ } - close(waiter[0]); - close(waiter[1]); - } else { -- (void)chdir("/"); -+ ret = chdir("/"); - } -- return 0; -+ return ret; - } - - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Fix-xattr-operations.patch b/kvm-virtiofsd-Fix-xattr-operations.patch deleted file mode 100644 index 532948f..0000000 --- a/kvm-virtiofsd-Fix-xattr-operations.patch +++ /dev/null @@ -1,327 +0,0 @@ -From 8721796f22a8a61d82974088e542377ee6db209e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:14 +0000 -Subject: [PATCH 18/18] virtiofsd: Fix xattr operations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-8-dgilbert@redhat.com> -Patchwork-id: 94123 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 7/7] virtiofsd: Fix xattr operations -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: Misono Tomohiro - -Current virtiofsd has problems about xattr operations and -they does not work properly for directory/symlink/special file. - -The fundamental cause is that virtiofsd uses openat() + f...xattr() -systemcalls for xattr operation but we should not open symlink/special -file in the daemon. Therefore the function is restricted. - -Fix this problem by: - 1. during setup of each thread, call unshare(CLONE_FS) - 2. in xattr operations (i.e. lo_getxattr), if inode is not a regular - file or directory, use fchdir(proc_loot_fd) + ...xattr() + - fchdir(root.fd) instead of openat() + f...xattr() - - (Note: for a regular file/directory openat() + f...xattr() - is still used for performance reason) - -With this patch, xfstests generic/062 passes on virtiofs. - -This fix is suggested by Miklos Szeredi and Stefan Hajnoczi. -The original discussion can be found here: - https://www.redhat.com/archives/virtio-fs/2019-October/msg00046.html - -Signed-off-by: Misono Tomohiro -Message-Id: <20200227055927.24566-3-misono.tomohiro@jp.fujitsu.com> -Acked-by: Vivek Goyal -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit bdfd66788349acc43cd3f1298718ad491663cfcc) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_virtio.c | 13 +++++ - tools/virtiofsd/passthrough_ll.c | 105 +++++++++++++++++++++------------------ - tools/virtiofsd/seccomp.c | 6 +++ - 3 files changed, 77 insertions(+), 47 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index dd1c605..3b6d16a 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -426,6 +426,8 @@ err: - return ret; - } - -+static __thread bool clone_fs_called; -+ - /* Process one FVRequest in a thread pool */ - static void fv_queue_worker(gpointer data, gpointer user_data) - { -@@ -441,6 +443,17 @@ static void fv_queue_worker(gpointer data, gpointer user_data) - - assert(se->bufsize > sizeof(struct fuse_in_header)); - -+ if (!clone_fs_called) { -+ int ret; -+ -+ /* unshare FS for xattr operation */ -+ ret = unshare(CLONE_FS); -+ /* should not fail */ -+ assert(ret == 0); -+ -+ clone_fs_called = true; -+ } -+ - /* - * An element contains one request and the space to send our response - * They're spread over multiple descriptors in a scatter/gather set -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 50c7273..9cba3f1 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -123,7 +123,7 @@ struct lo_inode { - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ - -- bool is_symlink; -+ mode_t filetype; - }; - - struct lo_cred { -@@ -695,7 +695,7 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, - struct lo_inode *parent; - char path[PATH_MAX]; - -- if (inode->is_symlink) { -+ if (S_ISLNK(inode->filetype)) { - res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); - if (res == -1 && errno == EINVAL) { - /* Sorry, no race free way to set times on symlink. */ -@@ -929,7 +929,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out_err; - } - -- inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ /* cache only filetype */ -+ inode->filetype = (e->attr.st_mode & S_IFMT); - - /* - * One for the caller and one for nlookup (released in -@@ -1139,7 +1140,7 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, - struct lo_inode *parent; - char path[PATH_MAX]; - -- if (inode->is_symlink) { -+ if (S_ISLNK(inode->filetype)) { - res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); - if (res == -1 && (errno == ENOENT || errno == EINVAL)) { - /* Sorry, no race free way to hard-link a symlink. */ -@@ -2193,12 +2194,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", - ino, name, size); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to getxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - if (size) { - value = malloc(size); - if (!value) { -@@ -2207,12 +2202,25 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - } - - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDONLY); -- if (fd < 0) { -- goto out_err; -+ /* -+ * It is not safe to open() non-regular/non-dir files in file server -+ * unless O_PATH is used, so use that method for regular files/dir -+ * only (as it seems giving less performance overhead). -+ * Otherwise, call fchdir() to avoid open(). -+ */ -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } -+ ret = fgetxattr(fd, name, value, size); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = getxattr(procname, name, value, size); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = fgetxattr(fd, name, value, size); - if (ret == -1) { - goto out_err; - } -@@ -2266,12 +2274,6 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, - size); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to listxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - if (size) { - value = malloc(size); - if (!value) { -@@ -2280,12 +2282,19 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - } - - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDONLY); -- if (fd < 0) { -- goto out_err; -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } -+ ret = flistxattr(fd, value, size); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = listxattr(procname, value, size); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = flistxattr(fd, value, size); - if (ret == -1) { - goto out_err; - } -@@ -2339,20 +2348,21 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 - ", name=%s value=%s size=%zd)\n", ino, name, value, size); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDWR); -- if (fd < 0) { -- saverr = errno; -- goto out; -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } -+ ret = fsetxattr(fd, name, value, size, flags); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = setxattr(procname, name, value, size, flags); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = fsetxattr(fd, name, value, size, flags); - saverr = ret == -1 ? errno : 0; - - out: -@@ -2387,20 +2397,21 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, - name); - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- - sprintf(procname, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, procname, O_RDWR); -- if (fd < 0) { -- saverr = errno; -- goto out; -+ if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } -+ ret = fremovexattr(fd, name); -+ } else { -+ /* fchdir should not fail here */ -+ assert(fchdir(lo->proc_self_fd) == 0); -+ ret = removexattr(procname, name); -+ assert(fchdir(lo->root.fd) == 0); - } - -- ret = fremovexattr(fd, name); - saverr = ret == -1 ? errno : 0; - - out: -@@ -2800,7 +2811,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - exit(1); - } - -- root->is_symlink = false; -+ root->filetype = S_IFDIR; - root->fd = fd; - root->key.ino = stat.st_ino; - root->key.dev = stat.st_dev; -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -index 2d9d4a7..bd9e7b0 100644 ---- a/tools/virtiofsd/seccomp.c -+++ b/tools/virtiofsd/seccomp.c -@@ -41,6 +41,7 @@ static const int syscall_whitelist[] = { - SCMP_SYS(exit), - SCMP_SYS(exit_group), - SCMP_SYS(fallocate), -+ SCMP_SYS(fchdir), - SCMP_SYS(fchmodat), - SCMP_SYS(fchownat), - SCMP_SYS(fcntl), -@@ -62,7 +63,9 @@ static const int syscall_whitelist[] = { - SCMP_SYS(getpid), - SCMP_SYS(gettid), - SCMP_SYS(gettimeofday), -+ SCMP_SYS(getxattr), - SCMP_SYS(linkat), -+ SCMP_SYS(listxattr), - SCMP_SYS(lseek), - SCMP_SYS(madvise), - SCMP_SYS(mkdirat), -@@ -85,6 +88,7 @@ static const int syscall_whitelist[] = { - SCMP_SYS(recvmsg), - SCMP_SYS(renameat), - SCMP_SYS(renameat2), -+ SCMP_SYS(removexattr), - SCMP_SYS(rt_sigaction), - SCMP_SYS(rt_sigprocmask), - SCMP_SYS(rt_sigreturn), -@@ -98,10 +102,12 @@ static const int syscall_whitelist[] = { - SCMP_SYS(setresuid32), - #endif - SCMP_SYS(set_robust_list), -+ SCMP_SYS(setxattr), - SCMP_SYS(symlinkat), - SCMP_SYS(time), /* Rarely needed, except on static builds */ - SCMP_SYS(tgkill), - SCMP_SYS(unlinkat), -+ SCMP_SYS(unshare), - SCMP_SYS(utimensat), - SCMP_SYS(write), - SCMP_SYS(writev), --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch b/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch deleted file mode 100644 index 5593a33..0000000 --- a/kvm-virtiofsd-Format-imported-files-to-qemu-style.patch +++ /dev/null @@ -1,14743 +0,0 @@ -From e313ab94af558bbc133e7a93b0a6dbff706dd1d8 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:45 +0100 -Subject: [PATCH 014/116] virtiofsd: Format imported files to qemu style -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-11-dgilbert@redhat.com> -Patchwork-id: 93464 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 010/112] virtiofsd: Format imported files to qemu style -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Mostly using a set like: - -indent -nut -i 4 -nlp -br -cs -ce --no-space-after-function-call-names file -clang-format -style=file -i -- file -clang-tidy -fix-errors -checks=readability-braces-around-statements file -clang-format -style=file -i -- file - -With manual cleanups. - -The .clang-format used is below. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed by: Aleksandar Markovic - -Language: Cpp -AlignAfterOpenBracket: Align -AlignConsecutiveAssignments: false # although we like it, it creates churn -AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: true -AlignOperands: true -AlignTrailingComments: false # churn -AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: None -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account -AlwaysBreakBeforeMultilineStrings: false -BinPackArguments: true -BinPackParameters: true -BraceWrapping: - AfterControlStatement: false - AfterEnum: false - AfterFunction: true - AfterStruct: false - AfterUnion: false - BeforeElse: false - IndentBraces: false -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Custom -BreakBeforeTernaryOperators: false -BreakStringLiterals: true -ColumnLimit: 80 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: false -DerivePointerAlignment: false -DisableFormat: false -ForEachMacros: [ - 'CPU_FOREACH', - 'CPU_FOREACH_REVERSE', - 'CPU_FOREACH_SAFE', - 'IOMMU_NOTIFIER_FOREACH', - 'QLIST_FOREACH', - 'QLIST_FOREACH_ENTRY', - 'QLIST_FOREACH_RCU', - 'QLIST_FOREACH_SAFE', - 'QLIST_FOREACH_SAFE_RCU', - 'QSIMPLEQ_FOREACH', - 'QSIMPLEQ_FOREACH_SAFE', - 'QSLIST_FOREACH', - 'QSLIST_FOREACH_SAFE', - 'QTAILQ_FOREACH', - 'QTAILQ_FOREACH_REVERSE', - 'QTAILQ_FOREACH_SAFE', - 'QTAILQ_RAW_FOREACH', - 'RAMBLOCK_FOREACH' -] -IncludeCategories: - - Regex: '^"qemu/osdep.h' - Priority: -3 - - Regex: '^"(block|chardev|crypto|disas|exec|fpu|hw|io|libdecnumber|migration|monitor|net|qapi|qemu|qom|standard-headers|sysemu|ui)/' - Priority: -2 - - Regex: '^"(elf.h|qemu-common.h|glib-compat.h|qemu-io.h|trace-tcg.h)' - Priority: -1 - - Regex: '.*' - Priority: 1 -IncludeIsMainRegex: '$' -IndentCaseLabels: false -IndentWidth: 4 -IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? -MacroBlockEnd: '.*_END$' -MaxEmptyLinesToKeep: 2 -PointerAlignment: Right -ReflowComments: true -SortIncludes: true -SpaceAfterCStyleCast: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInContainerLiterals: true -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Auto -UseTab: Never -... - -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 7387863d033e8028aa09a815736617a7c4490827) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 434 ++-- - tools/virtiofsd/fuse.h | 1572 +++++++------- - tools/virtiofsd/fuse_common.h | 730 +++---- - tools/virtiofsd/fuse_i.h | 121 +- - tools/virtiofsd/fuse_log.c | 38 +- - tools/virtiofsd/fuse_log.h | 32 +- - tools/virtiofsd/fuse_lowlevel.c | 3638 +++++++++++++++++---------------- - tools/virtiofsd/fuse_lowlevel.h | 2392 +++++++++++----------- - tools/virtiofsd/fuse_misc.h | 30 +- - tools/virtiofsd/fuse_opt.c | 659 +++--- - tools/virtiofsd/fuse_opt.h | 79 +- - tools/virtiofsd/fuse_signals.c | 118 +- - tools/virtiofsd/helper.c | 506 ++--- - tools/virtiofsd/passthrough_helpers.h | 33 +- - tools/virtiofsd/passthrough_ll.c | 2061 ++++++++++--------- - 15 files changed, 6382 insertions(+), 6061 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index aefb7db..5df946c 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -1,252 +1,272 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2010 Miklos Szeredi -- -- Functions for dealing with `struct fuse_buf` and `struct -- fuse_bufvec`. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2010 Miklos Szeredi -+ * -+ * Functions for dealing with `struct fuse_buf` and `struct -+ * fuse_bufvec`. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #define _GNU_SOURCE - - #include "config.h" - #include "fuse_i.h" - #include "fuse_lowlevel.h" -+#include -+#include - #include - #include --#include --#include - - size_t fuse_buf_size(const struct fuse_bufvec *bufv) - { -- size_t i; -- size_t size = 0; -- -- for (i = 0; i < bufv->count; i++) { -- if (bufv->buf[i].size == SIZE_MAX) -- size = SIZE_MAX; -- else -- size += bufv->buf[i].size; -- } -- -- return size; -+ size_t i; -+ size_t size = 0; -+ -+ for (i = 0; i < bufv->count; i++) { -+ if (bufv->buf[i].size == SIZE_MAX) { -+ size = SIZE_MAX; -+ } else { -+ size += bufv->buf[i].size; -+ } -+ } -+ -+ return size; - } - - static size_t min_size(size_t s1, size_t s2) - { -- return s1 < s2 ? s1 : s2; -+ return s1 < s2 ? s1 : s2; - } - - static ssize_t fuse_buf_write(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) - { -- ssize_t res = 0; -- size_t copied = 0; -- -- while (len) { -- if (dst->flags & FUSE_BUF_FD_SEEK) { -- res = pwrite(dst->fd, (char *)src->mem + src_off, len, -- dst->pos + dst_off); -- } else { -- res = write(dst->fd, (char *)src->mem + src_off, len); -- } -- if (res == -1) { -- if (!copied) -- return -errno; -- break; -- } -- if (res == 0) -- break; -- -- copied += res; -- if (!(dst->flags & FUSE_BUF_FD_RETRY)) -- break; -- -- src_off += res; -- dst_off += res; -- len -= res; -- } -- -- return copied; -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (dst->flags & FUSE_BUF_FD_SEEK) { -+ res = pwrite(dst->fd, (char *)src->mem + src_off, len, -+ dst->pos + dst_off); -+ } else { -+ res = write(dst->fd, (char *)src->mem + src_off, len); -+ } -+ if (res == -1) { -+ if (!copied) { -+ return -errno; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ copied += res; -+ if (!(dst->flags & FUSE_BUF_FD_RETRY)) { -+ break; -+ } -+ -+ src_off += res; -+ dst_off += res; -+ len -= res; -+ } -+ -+ return copied; - } - - static ssize_t fuse_buf_read(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) - { -- ssize_t res = 0; -- size_t copied = 0; -- -- while (len) { -- if (src->flags & FUSE_BUF_FD_SEEK) { -- res = pread(src->fd, (char *)dst->mem + dst_off, len, -- src->pos + src_off); -- } else { -- res = read(src->fd, (char *)dst->mem + dst_off, len); -- } -- if (res == -1) { -- if (!copied) -- return -errno; -- break; -- } -- if (res == 0) -- break; -- -- copied += res; -- if (!(src->flags & FUSE_BUF_FD_RETRY)) -- break; -- -- dst_off += res; -- src_off += res; -- len -= res; -- } -- -- return copied; -+ ssize_t res = 0; -+ size_t copied = 0; -+ -+ while (len) { -+ if (src->flags & FUSE_BUF_FD_SEEK) { -+ res = pread(src->fd, (char *)dst->mem + dst_off, len, -+ src->pos + src_off); -+ } else { -+ res = read(src->fd, (char *)dst->mem + dst_off, len); -+ } -+ if (res == -1) { -+ if (!copied) { -+ return -errno; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ copied += res; -+ if (!(src->flags & FUSE_BUF_FD_RETRY)) { -+ break; -+ } -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; - } - - static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len) - { -- char buf[4096]; -- struct fuse_buf tmp = { -- .size = sizeof(buf), -- .flags = 0, -- }; -- ssize_t res; -- size_t copied = 0; -- -- tmp.mem = buf; -- -- while (len) { -- size_t this_len = min_size(tmp.size, len); -- size_t read_len; -- -- res = fuse_buf_read(&tmp, 0, src, src_off, this_len); -- if (res < 0) { -- if (!copied) -- return res; -- break; -- } -- if (res == 0) -- break; -- -- read_len = res; -- res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); -- if (res < 0) { -- if (!copied) -- return res; -- break; -- } -- if (res == 0) -- break; -- -- copied += res; -- -- if (res < this_len) -- break; -- -- dst_off += res; -- src_off += res; -- len -= res; -- } -- -- return copied; -+ char buf[4096]; -+ struct fuse_buf tmp = { -+ .size = sizeof(buf), -+ .flags = 0, -+ }; -+ ssize_t res; -+ size_t copied = 0; -+ -+ tmp.mem = buf; -+ -+ while (len) { -+ size_t this_len = min_size(tmp.size, len); -+ size_t read_len; -+ -+ res = fuse_buf_read(&tmp, 0, src, src_off, this_len); -+ if (res < 0) { -+ if (!copied) { -+ return res; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ read_len = res; -+ res = fuse_buf_write(dst, dst_off, &tmp, 0, read_len); -+ if (res < 0) { -+ if (!copied) { -+ return res; -+ } -+ break; -+ } -+ if (res == 0) { -+ break; -+ } -+ -+ copied += res; -+ -+ if (res < this_len) { -+ break; -+ } -+ -+ dst_off += res; -+ src_off += res; -+ len -= res; -+ } -+ -+ return copied; - } - - static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) -+ const struct fuse_buf *src, size_t src_off, -+ size_t len, enum fuse_buf_copy_flags flags) - { -- int src_is_fd = src->flags & FUSE_BUF_IS_FD; -- int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -- -- if (!src_is_fd && !dst_is_fd) { -- char *dstmem = (char *)dst->mem + dst_off; -- char *srcmem = (char *)src->mem + src_off; -- -- if (dstmem != srcmem) { -- if (dstmem + len <= srcmem || srcmem + len <= dstmem) -- memcpy(dstmem, srcmem, len); -- else -- memmove(dstmem, srcmem, len); -- } -- -- return len; -- } else if (!src_is_fd) { -- return fuse_buf_write(dst, dst_off, src, src_off, len); -- } else if (!dst_is_fd) { -- return fuse_buf_read(dst, dst_off, src, src_off, len); -- } else { -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -- } -+ int src_is_fd = src->flags & FUSE_BUF_IS_FD; -+ int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -+ -+ if (!src_is_fd && !dst_is_fd) { -+ char *dstmem = (char *)dst->mem + dst_off; -+ char *srcmem = (char *)src->mem + src_off; -+ -+ if (dstmem != srcmem) { -+ if (dstmem + len <= srcmem || srcmem + len <= dstmem) { -+ memcpy(dstmem, srcmem, len); -+ } else { -+ memmove(dstmem, srcmem, len); -+ } -+ } -+ -+ return len; -+ } else if (!src_is_fd) { -+ return fuse_buf_write(dst, dst_off, src, src_off, len); -+ } else if (!dst_is_fd) { -+ return fuse_buf_read(dst, dst_off, src, src_off, len); -+ } else { -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); -+ } - } - - static const struct fuse_buf *fuse_bufvec_current(struct fuse_bufvec *bufv) - { -- if (bufv->idx < bufv->count) -- return &bufv->buf[bufv->idx]; -- else -- return NULL; -+ if (bufv->idx < bufv->count) { -+ return &bufv->buf[bufv->idx]; -+ } else { -+ return NULL; -+ } - } - - static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) - { -- const struct fuse_buf *buf = fuse_bufvec_current(bufv); -- -- bufv->off += len; -- assert(bufv->off <= buf->size); -- if (bufv->off == buf->size) { -- assert(bufv->idx < bufv->count); -- bufv->idx++; -- if (bufv->idx == bufv->count) -- return 0; -- bufv->off = 0; -- } -- return 1; -+ const struct fuse_buf *buf = fuse_bufvec_current(bufv); -+ -+ bufv->off += len; -+ assert(bufv->off <= buf->size); -+ if (bufv->off == buf->size) { -+ assert(bufv->idx < bufv->count); -+ bufv->idx++; -+ if (bufv->idx == bufv->count) { -+ return 0; -+ } -+ bufv->off = 0; -+ } -+ return 1; - } - - ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, -- enum fuse_buf_copy_flags flags) -+ enum fuse_buf_copy_flags flags) - { -- size_t copied = 0; -- -- if (dstv == srcv) -- return fuse_buf_size(dstv); -- -- for (;;) { -- const struct fuse_buf *src = fuse_bufvec_current(srcv); -- const struct fuse_buf *dst = fuse_bufvec_current(dstv); -- size_t src_len; -- size_t dst_len; -- size_t len; -- ssize_t res; -- -- if (src == NULL || dst == NULL) -- break; -- -- src_len = src->size - srcv->off; -- dst_len = dst->size - dstv->off; -- len = min_size(src_len, dst_len); -- -- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -- if (res < 0) { -- if (!copied) -- return res; -- break; -- } -- copied += res; -- -- if (!fuse_bufvec_advance(srcv, res) || -- !fuse_bufvec_advance(dstv, res)) -- break; -- -- if (res < len) -- break; -- } -- -- return copied; -+ size_t copied = 0; -+ -+ if (dstv == srcv) { -+ return fuse_buf_size(dstv); -+ } -+ -+ for (;;) { -+ const struct fuse_buf *src = fuse_bufvec_current(srcv); -+ const struct fuse_buf *dst = fuse_bufvec_current(dstv); -+ size_t src_len; -+ size_t dst_len; -+ size_t len; -+ ssize_t res; -+ -+ if (src == NULL || dst == NULL) { -+ break; -+ } -+ -+ src_len = src->size - srcv->off; -+ dst_len = dst->size - dstv->off; -+ len = min_size(src_len, dst_len); -+ -+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -+ if (res < 0) { -+ if (!copied) { -+ return res; -+ } -+ break; -+ } -+ copied += res; -+ -+ if (!fuse_bufvec_advance(srcv, res) || -+ !fuse_bufvec_advance(dstv, res)) { -+ break; -+ } -+ -+ if (res < len) { -+ break; -+ } -+ } -+ -+ return copied; - } -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -index 3202fba..7a4c713 100644 ---- a/tools/virtiofsd/fuse.h -+++ b/tools/virtiofsd/fuse.h -@@ -1,15 +1,15 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_H_ - #define FUSE_H_ - --/** @file -+/* - * - * This file defines the library interface of FUSE - * -@@ -19,15 +19,15 @@ - #include "fuse_common.h" - - #include --#include --#include - #include - #include -+#include - #include -+#include - --/* ----------------------------------------------------------- * -- * Basic FUSE API * -- * ----------------------------------------------------------- */ -+/* -+ * Basic FUSE API -+ */ - - /** Handle for a FUSE filesystem */ - struct fuse; -@@ -36,38 +36,39 @@ struct fuse; - * Readdir flags, passed to ->readdir() - */ - enum fuse_readdir_flags { -- /** -- * "Plus" mode. -- * -- * The kernel wants to prefill the inode cache during readdir. The -- * filesystem may honour this by filling in the attributes and setting -- * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also -- * just ignore this flag completely. -- */ -- FUSE_READDIR_PLUS = (1 << 0), -+ /** -+ * "Plus" mode. -+ * -+ * The kernel wants to prefill the inode cache during readdir. The -+ * filesystem may honour this by filling in the attributes and setting -+ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also -+ * just ignore this flag completely. -+ */ -+ FUSE_READDIR_PLUS = (1 << 0), - }; - - enum fuse_fill_dir_flags { -- /** -- * "Plus" mode: all file attributes are valid -- * -- * The attributes are used by the kernel to prefill the inode cache -- * during a readdir. -- * -- * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set -- * and vice versa. -- */ -- FUSE_FILL_DIR_PLUS = (1 << 1), -+ /** -+ * "Plus" mode: all file attributes are valid -+ * -+ * The attributes are used by the kernel to prefill the inode cache -+ * during a readdir. -+ * -+ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set -+ * and vice versa. -+ */ -+ FUSE_FILL_DIR_PLUS = (1 << 1), - }; - --/** Function to add an entry in a readdir() operation -+/** -+ * Function to add an entry in a readdir() operation - * - * The *off* parameter can be any non-zero value that enables the - * filesystem to identify the current point in the directory - * stream. It does not need to be the actual physical position. A - * value of zero is reserved to indicate that seeking in directories - * is not supported. -- * -+ * - * @param buf the buffer passed to the readdir() operation - * @param name the file name of the directory entry - * @param stat file attributes, can be NULL -@@ -75,9 +76,9 @@ enum fuse_fill_dir_flags { - * @param flags fill flags - * @return 1 if buffer is full, zero otherwise - */ --typedef int (*fuse_fill_dir_t) (void *buf, const char *name, -- const struct stat *stbuf, off_t off, -- enum fuse_fill_dir_flags flags); -+typedef int (*fuse_fill_dir_t)(void *buf, const char *name, -+ const struct stat *stbuf, off_t off, -+ enum fuse_fill_dir_flags flags); - /** - * Configuration of the high-level API - * -@@ -87,186 +88,186 @@ typedef int (*fuse_fill_dir_t) (void *buf, const char *name, - * file system implementation. - */ - struct fuse_config { -- /** -- * If `set_gid` is non-zero, the st_gid attribute of each file -- * is overwritten with the value of `gid`. -- */ -- int set_gid; -- unsigned int gid; -- -- /** -- * If `set_uid` is non-zero, the st_uid attribute of each file -- * is overwritten with the value of `uid`. -- */ -- int set_uid; -- unsigned int uid; -- -- /** -- * If `set_mode` is non-zero, the any permissions bits set in -- * `umask` are unset in the st_mode attribute of each file. -- */ -- int set_mode; -- unsigned int umask; -- -- /** -- * The timeout in seconds for which name lookups will be -- * cached. -- */ -- double entry_timeout; -- -- /** -- * The timeout in seconds for which a negative lookup will be -- * cached. This means, that if file did not exist (lookup -- * retuned ENOENT), the lookup will only be redone after the -- * timeout, and the file/directory will be assumed to not -- * exist until then. A value of zero means that negative -- * lookups are not cached. -- */ -- double negative_timeout; -- -- /** -- * The timeout in seconds for which file/directory attributes -- * (as returned by e.g. the `getattr` handler) are cached. -- */ -- double attr_timeout; -- -- /** -- * Allow requests to be interrupted -- */ -- int intr; -- -- /** -- * Specify which signal number to send to the filesystem when -- * a request is interrupted. The default is hardcoded to -- * USR1. -- */ -- int intr_signal; -- -- /** -- * Normally, FUSE assigns inodes to paths only for as long as -- * the kernel is aware of them. With this option inodes are -- * instead remembered for at least this many seconds. This -- * will require more memory, but may be necessary when using -- * applications that make use of inode numbers. -- * -- * A number of -1 means that inodes will be remembered for the -- * entire life-time of the file-system process. -- */ -- int remember; -- -- /** -- * The default behavior is that if an open file is deleted, -- * the file is renamed to a hidden file (.fuse_hiddenXXX), and -- * only removed when the file is finally released. This -- * relieves the filesystem implementation of having to deal -- * with this problem. This option disables the hiding -- * behavior, and files are removed immediately in an unlink -- * operation (or in a rename operation which overwrites an -- * existing file). -- * -- * It is recommended that you not use the hard_remove -- * option. When hard_remove is set, the following libc -- * functions fail on unlinked files (returning errno of -- * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), -- * ftruncate(2), fstat(2), fchmod(2), fchown(2) -- */ -- int hard_remove; -- -- /** -- * Honor the st_ino field in the functions getattr() and -- * fill_dir(). This value is used to fill in the st_ino field -- * in the stat(2), lstat(2), fstat(2) functions and the d_ino -- * field in the readdir(2) function. The filesystem does not -- * have to guarantee uniqueness, however some applications -- * rely on this value being unique for the whole filesystem. -- * -- * Note that this does *not* affect the inode that libfuse -- * and the kernel use internally (also called the "nodeid"). -- */ -- int use_ino; -- -- /** -- * If use_ino option is not given, still try to fill in the -- * d_ino field in readdir(2). If the name was previously -- * looked up, and is still in the cache, the inode number -- * found there will be used. Otherwise it will be set to -1. -- * If use_ino option is given, this option is ignored. -- */ -- int readdir_ino; -- -- /** -- * This option disables the use of page cache (file content cache) -- * in the kernel for this filesystem. This has several affects: -- * -- * 1. Each read(2) or write(2) system call will initiate one -- * or more read or write operations, data will not be -- * cached in the kernel. -- * -- * 2. The return value of the read() and write() system calls -- * will correspond to the return values of the read and -- * write operations. This is useful for example if the -- * file size is not known in advance (before reading it). -- * -- * Internally, enabling this option causes fuse to set the -- * `direct_io` field of `struct fuse_file_info` - overwriting -- * any value that was put there by the file system. -- */ -- int direct_io; -- -- /** -- * This option disables flushing the cache of the file -- * contents on every open(2). This should only be enabled on -- * filesystems where the file data is never changed -- * externally (not through the mounted FUSE filesystem). Thus -- * it is not suitable for network filesystems and other -- * intermediate filesystems. -- * -- * NOTE: if this option is not specified (and neither -- * direct_io) data is still cached after the open(2), so a -- * read(2) system call will not always initiate a read -- * operation. -- * -- * Internally, enabling this option causes fuse to set the -- * `keep_cache` field of `struct fuse_file_info` - overwriting -- * any value that was put there by the file system. -- */ -- int kernel_cache; -- -- /** -- * This option is an alternative to `kernel_cache`. Instead of -- * unconditionally keeping cached data, the cached data is -- * invalidated on open(2) if if the modification time or the -- * size of the file has changed since it was last opened. -- */ -- int auto_cache; -- -- /** -- * The timeout in seconds for which file attributes are cached -- * for the purpose of checking if auto_cache should flush the -- * file data on open. -- */ -- int ac_attr_timeout_set; -- double ac_attr_timeout; -- -- /** -- * If this option is given the file-system handlers for the -- * following operations will not receive path information: -- * read, write, flush, release, fsync, readdir, releasedir, -- * fsyncdir, lock, ioctl and poll. -- * -- * For the truncate, getattr, chmod, chown and utimens -- * operations the path will be provided only if the struct -- * fuse_file_info argument is NULL. -- */ -- int nullpath_ok; -- -- /** -- * The remaining options are used by libfuse internally and -- * should not be touched. -- */ -- int show_help; -- char *modules; -- int debug; -+ /** -+ * If `set_gid` is non-zero, the st_gid attribute of each file -+ * is overwritten with the value of `gid`. -+ */ -+ int set_gid; -+ unsigned int gid; -+ -+ /** -+ * If `set_uid` is non-zero, the st_uid attribute of each file -+ * is overwritten with the value of `uid`. -+ */ -+ int set_uid; -+ unsigned int uid; -+ -+ /** -+ * If `set_mode` is non-zero, the any permissions bits set in -+ * `umask` are unset in the st_mode attribute of each file. -+ */ -+ int set_mode; -+ unsigned int umask; -+ -+ /** -+ * The timeout in seconds for which name lookups will be -+ * cached. -+ */ -+ double entry_timeout; -+ -+ /** -+ * The timeout in seconds for which a negative lookup will be -+ * cached. This means, that if file did not exist (lookup -+ * retuned ENOENT), the lookup will only be redone after the -+ * timeout, and the file/directory will be assumed to not -+ * exist until then. A value of zero means that negative -+ * lookups are not cached. -+ */ -+ double negative_timeout; -+ -+ /** -+ * The timeout in seconds for which file/directory attributes -+ * (as returned by e.g. the `getattr` handler) are cached. -+ */ -+ double attr_timeout; -+ -+ /** -+ * Allow requests to be interrupted -+ */ -+ int intr; -+ -+ /** -+ * Specify which signal number to send to the filesystem when -+ * a request is interrupted. The default is hardcoded to -+ * USR1. -+ */ -+ int intr_signal; -+ -+ /** -+ * Normally, FUSE assigns inodes to paths only for as long as -+ * the kernel is aware of them. With this option inodes are -+ * instead remembered for at least this many seconds. This -+ * will require more memory, but may be necessary when using -+ * applications that make use of inode numbers. -+ * -+ * A number of -1 means that inodes will be remembered for the -+ * entire life-time of the file-system process. -+ */ -+ int remember; -+ -+ /** -+ * The default behavior is that if an open file is deleted, -+ * the file is renamed to a hidden file (.fuse_hiddenXXX), and -+ * only removed when the file is finally released. This -+ * relieves the filesystem implementation of having to deal -+ * with this problem. This option disables the hiding -+ * behavior, and files are removed immediately in an unlink -+ * operation (or in a rename operation which overwrites an -+ * existing file). -+ * -+ * It is recommended that you not use the hard_remove -+ * option. When hard_remove is set, the following libc -+ * functions fail on unlinked files (returning errno of -+ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), -+ * ftruncate(2), fstat(2), fchmod(2), fchown(2) -+ */ -+ int hard_remove; -+ -+ /** -+ * Honor the st_ino field in the functions getattr() and -+ * fill_dir(). This value is used to fill in the st_ino field -+ * in the stat(2), lstat(2), fstat(2) functions and the d_ino -+ * field in the readdir(2) function. The filesystem does not -+ * have to guarantee uniqueness, however some applications -+ * rely on this value being unique for the whole filesystem. -+ * -+ * Note that this does *not* affect the inode that libfuse -+ * and the kernel use internally (also called the "nodeid"). -+ */ -+ int use_ino; -+ -+ /** -+ * If use_ino option is not given, still try to fill in the -+ * d_ino field in readdir(2). If the name was previously -+ * looked up, and is still in the cache, the inode number -+ * found there will be used. Otherwise it will be set to -1. -+ * If use_ino option is given, this option is ignored. -+ */ -+ int readdir_ino; -+ -+ /** -+ * This option disables the use of page cache (file content cache) -+ * in the kernel for this filesystem. This has several affects: -+ * -+ * 1. Each read(2) or write(2) system call will initiate one -+ * or more read or write operations, data will not be -+ * cached in the kernel. -+ * -+ * 2. The return value of the read() and write() system calls -+ * will correspond to the return values of the read and -+ * write operations. This is useful for example if the -+ * file size is not known in advance (before reading it). -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `direct_io` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int direct_io; -+ -+ /** -+ * This option disables flushing the cache of the file -+ * contents on every open(2). This should only be enabled on -+ * filesystems where the file data is never changed -+ * externally (not through the mounted FUSE filesystem). Thus -+ * it is not suitable for network filesystems and other -+ * intermediate filesystems. -+ * -+ * NOTE: if this option is not specified (and neither -+ * direct_io) data is still cached after the open(2), so a -+ * read(2) system call will not always initiate a read -+ * operation. -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `keep_cache` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int kernel_cache; -+ -+ /** -+ * This option is an alternative to `kernel_cache`. Instead of -+ * unconditionally keeping cached data, the cached data is -+ * invalidated on open(2) if if the modification time or the -+ * size of the file has changed since it was last opened. -+ */ -+ int auto_cache; -+ -+ /** -+ * The timeout in seconds for which file attributes are cached -+ * for the purpose of checking if auto_cache should flush the -+ * file data on open. -+ */ -+ int ac_attr_timeout_set; -+ double ac_attr_timeout; -+ -+ /** -+ * If this option is given the file-system handlers for the -+ * following operations will not receive path information: -+ * read, write, flush, release, fsync, readdir, releasedir, -+ * fsyncdir, lock, ioctl and poll. -+ * -+ * For the truncate, getattr, chmod, chown and utimens -+ * operations the path will be provided only if the struct -+ * fuse_file_info argument is NULL. -+ */ -+ int nullpath_ok; -+ -+ /** -+ * The remaining options are used by libfuse internally and -+ * should not be touched. -+ */ -+ int show_help; -+ char *modules; -+ int debug; - }; - - -@@ -293,515 +294,535 @@ struct fuse_config { - * Almost all operations take a path which can be of any length. - */ - struct fuse_operations { -- /** Get file attributes. -- * -- * Similar to stat(). The 'st_dev' and 'st_blksize' fields are -- * ignored. The 'st_ino' field is ignored except if the 'use_ino' -- * mount option is given. In that case it is passed to userspace, -- * but libfuse and the kernel will still assign a different -- * inode for internal use (called the "nodeid"). -- * -- * `fi` will always be NULL if the file is not currently open, but -- * may also be NULL if the file is open. -- */ -- int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); -- -- /** Read the target of a symbolic link -- * -- * The buffer should be filled with a null terminated string. The -- * buffer size argument includes the space for the terminating -- * null character. If the linkname is too long to fit in the -- * buffer, it should be truncated. The return value should be 0 -- * for success. -- */ -- int (*readlink) (const char *, char *, size_t); -- -- /** Create a file node -- * -- * This is called for creation of all non-directory, non-symlink -- * nodes. If the filesystem defines a create() method, then for -- * regular files that will be called instead. -- */ -- int (*mknod) (const char *, mode_t, dev_t); -- -- /** Create a directory -- * -- * Note that the mode argument may not have the type specification -- * bits set, i.e. S_ISDIR(mode) can be false. To obtain the -- * correct directory type bits use mode|S_IFDIR -- * */ -- int (*mkdir) (const char *, mode_t); -- -- /** Remove a file */ -- int (*unlink) (const char *); -- -- /** Remove a directory */ -- int (*rmdir) (const char *); -- -- /** Create a symbolic link */ -- int (*symlink) (const char *, const char *); -- -- /** Rename a file -- * -- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -- * RENAME_NOREPLACE is specified, the filesystem must not -- * overwrite *newname* if it exists and return an error -- * instead. If `RENAME_EXCHANGE` is specified, the filesystem -- * must atomically exchange the two files, i.e. both must -- * exist and neither may be deleted. -- */ -- int (*rename) (const char *, const char *, unsigned int flags); -- -- /** Create a hard link to a file */ -- int (*link) (const char *, const char *); -- -- /** Change the permission bits of a file -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- */ -- int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); -- -- /** Change the owner and group of a file -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); -- -- /** Change the size of a file -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*truncate) (const char *, off_t, struct fuse_file_info *fi); -- -- /** Open a file -- * -- * Open flags are available in fi->flags. The following rules -- * apply. -- * -- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -- * filtered out / handled by the kernel. -- * -- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) -- * should be used by the filesystem to check if the operation is -- * permitted. If the ``-o default_permissions`` mount option is -- * given, this check is already done by the kernel before calling -- * open() and may thus be omitted by the filesystem. -- * -- * - When writeback caching is enabled, the kernel may send -- * read requests even for files opened with O_WRONLY. The -- * filesystem should be prepared to handle this. -- * -- * - When writeback caching is disabled, the filesystem is -- * expected to properly handle the O_APPEND flag and ensure -- * that each write is appending to the end of the file. -- * -- * - When writeback caching is enabled, the kernel will -- * handle O_APPEND. However, unless all changes to the file -- * come through the kernel this will not work reliably. The -- * filesystem should thus either ignore the O_APPEND flag -- * (and let the kernel handle it), or return an error -- * (indicating that reliably O_APPEND is not available). -- * -- * Filesystem may store an arbitrary file handle (pointer, -- * index, etc) in fi->fh, and use this in other all other file -- * operations (read, write, flush, release, fsync). -- * -- * Filesystem may also implement stateless file I/O and not store -- * anything in fi->fh. -- * -- * There are also some flags (direct_io, keep_cache) which the -- * filesystem may set in fi, to change the way the file is opened. -- * See fuse_file_info structure in for more details. -- * -- * If this request is answered with an error code of ENOSYS -- * and FUSE_CAP_NO_OPEN_SUPPORT is set in -- * `fuse_conn_info.capable`, this is treated as success and -- * future calls to open will also succeed without being send -- * to the filesystem process. -- * -- */ -- int (*open) (const char *, struct fuse_file_info *); -- -- /** Read data from an open file -- * -- * Read should return exactly the number of bytes requested except -- * on EOF or error, otherwise the rest of the data will be -- * substituted with zeroes. An exception to this is when the -- * 'direct_io' mount option is specified, in which case the return -- * value of the read system call will reflect the return value of -- * this operation. -- */ -- int (*read) (const char *, char *, size_t, off_t, -- struct fuse_file_info *); -- -- /** Write data to an open file -- * -- * Write should return exactly the number of bytes requested -- * except on error. An exception to this is when the 'direct_io' -- * mount option is specified (see read operation). -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*write) (const char *, const char *, size_t, off_t, -- struct fuse_file_info *); -- -- /** Get file system statistics -- * -- * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored -- */ -- int (*statfs) (const char *, struct statvfs *); -- -- /** Possibly flush cached data -- * -- * BIG NOTE: This is not equivalent to fsync(). It's not a -- * request to sync dirty data. -- * -- * Flush is called on each close() of a file descriptor, as opposed to -- * release which is called on the close of the last file descriptor for -- * a file. Under Linux, errors returned by flush() will be passed to -- * userspace as errors from close(), so flush() is a good place to write -- * back any cached dirty data. However, many applications ignore errors -- * on close(), and on non-Linux systems, close() may succeed even if flush() -- * returns an error. For these reasons, filesystems should not assume -- * that errors returned by flush will ever be noticed or even -- * delivered. -- * -- * NOTE: The flush() method may be called more than once for each -- * open(). This happens if more than one file descriptor refers to an -- * open file handle, e.g. due to dup(), dup2() or fork() calls. It is -- * not possible to determine if a flush is final, so each flush should -- * be treated equally. Multiple write-flush sequences are relatively -- * rare, so this shouldn't be a problem. -- * -- * Filesystems shouldn't assume that flush will be called at any -- * particular point. It may be called more times than expected, or not -- * at all. -- * -- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -- */ -- int (*flush) (const char *, struct fuse_file_info *); -- -- /** Release an open file -- * -- * Release is called when there are no more references to an open -- * file: all file descriptors are closed and all memory mappings -- * are unmapped. -- * -- * For every open() call there will be exactly one release() call -- * with the same flags and file handle. It is possible to -- * have a file opened more than once, in which case only the last -- * release will mean, that no more reads/writes will happen on the -- * file. The return value of release is ignored. -- */ -- int (*release) (const char *, struct fuse_file_info *); -- -- /** Synchronize file contents -- * -- * If the datasync parameter is non-zero, then only the user data -- * should be flushed, not the meta data. -- */ -- int (*fsync) (const char *, int, struct fuse_file_info *); -- -- /** Set extended attributes */ -- int (*setxattr) (const char *, const char *, const char *, size_t, int); -- -- /** Get extended attributes */ -- int (*getxattr) (const char *, const char *, char *, size_t); -- -- /** List extended attributes */ -- int (*listxattr) (const char *, char *, size_t); -- -- /** Remove extended attributes */ -- int (*removexattr) (const char *, const char *); -- -- /** Open directory -- * -- * Unless the 'default_permissions' mount option is given, -- * this method should check if opendir is permitted for this -- * directory. Optionally opendir may also return an arbitrary -- * filehandle in the fuse_file_info structure, which will be -- * passed to readdir, releasedir and fsyncdir. -- */ -- int (*opendir) (const char *, struct fuse_file_info *); -- -- /** Read directory -- * -- * The filesystem may choose between two modes of operation: -- * -- * 1) The readdir implementation ignores the offset parameter, and -- * passes zero to the filler function's offset. The filler -- * function will not return '1' (unless an error happens), so the -- * whole directory is read in a single readdir operation. -- * -- * 2) The readdir implementation keeps track of the offsets of the -- * directory entries. It uses the offset parameter and always -- * passes non-zero offset to the filler function. When the buffer -- * is full (or an error happens) the filler function will return -- * '1'. -- */ -- int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, -- struct fuse_file_info *, enum fuse_readdir_flags); -- -- /** Release directory -- */ -- int (*releasedir) (const char *, struct fuse_file_info *); -- -- /** Synchronize directory contents -- * -- * If the datasync parameter is non-zero, then only the user data -- * should be flushed, not the meta data -- */ -- int (*fsyncdir) (const char *, int, struct fuse_file_info *); -- -- /** -- * Initialize filesystem -- * -- * The return value will passed in the `private_data` field of -- * `struct fuse_context` to all file operations, and as a -- * parameter to the destroy() method. It overrides the initial -- * value provided to fuse_main() / fuse_new(). -- */ -- void *(*init) (struct fuse_conn_info *conn, -- struct fuse_config *cfg); -- -- /** -- * Clean up filesystem -- * -- * Called on filesystem exit. -- */ -- void (*destroy) (void *private_data); -- -- /** -- * Check file access permissions -- * -- * This will be called for the access() system call. If the -- * 'default_permissions' mount option is given, this method is not -- * called. -- * -- * This method is not called under Linux kernel versions 2.4.x -- */ -- int (*access) (const char *, int); -- -- /** -- * Create and open a file -- * -- * If the file does not exist, first create it with the specified -- * mode, and then open it. -- * -- * If this method is not implemented or under Linux kernel -- * versions earlier than 2.6.15, the mknod() and open() methods -- * will be called instead. -- */ -- int (*create) (const char *, mode_t, struct fuse_file_info *); -- -- /** -- * Perform POSIX file locking operation -- * -- * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. -- * -- * For the meaning of fields in 'struct flock' see the man page -- * for fcntl(2). The l_whence field will always be set to -- * SEEK_SET. -- * -- * For checking lock ownership, the 'fuse_file_info->owner' -- * argument must be used. -- * -- * For F_GETLK operation, the library will first check currently -- * held locks, and if a conflicting lock is found it will return -- * information without calling this method. This ensures, that -- * for local locks the l_pid field is correctly filled in. The -- * results may not be accurate in case of race conditions and in -- * the presence of hard links, but it's unlikely that an -- * application would rely on accurate GETLK results in these -- * cases. If a conflicting lock is not found, this method will be -- * called, and the filesystem may fill out l_pid by a meaningful -- * value, or it may leave this field zero. -- * -- * For F_SETLK and F_SETLKW the l_pid field will be set to the pid -- * of the process performing the locking operation. -- * -- * Note: if this method is not implemented, the kernel will still -- * allow file locking to work locally. Hence it is only -- * interesting for network filesystems and similar. -- */ -- int (*lock) (const char *, struct fuse_file_info *, int cmd, -- struct flock *); -- -- /** -- * Change the access and modification times of a file with -- * nanosecond resolution -- * -- * This supersedes the old utime() interface. New applications -- * should use this. -- * -- * `fi` will always be NULL if the file is not currenlty open, but -- * may also be NULL if the file is open. -- * -- * See the utimensat(2) man page for details. -- */ -- int (*utimens) (const char *, const struct timespec tv[2], -- struct fuse_file_info *fi); -- -- /** -- * Map block index within file to block index within device -- * -- * Note: This makes sense only for block device backed filesystems -- * mounted with the 'blkdev' option -- */ -- int (*bmap) (const char *, size_t blocksize, uint64_t *idx); -- -- /** -- * Ioctl -- * -- * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in -- * 64bit environment. The size and direction of data is -- * determined by _IOC_*() decoding of cmd. For _IOC_NONE, -- * data will be NULL, for _IOC_WRITE data is out area, for -- * _IOC_READ in area and if both are set in/out area. In all -- * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. -- * -- * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a -- * directory file handle. -- * -- * Note : the unsigned long request submitted by the application -- * is truncated to 32 bits. -- */ -- int (*ioctl) (const char *, unsigned int cmd, void *arg, -- struct fuse_file_info *, unsigned int flags, void *data); -- -- /** -- * Poll for IO readiness events -- * -- * Note: If ph is non-NULL, the client should notify -- * when IO readiness events occur by calling -- * fuse_notify_poll() with the specified ph. -- * -- * Regardless of the number of times poll with a non-NULL ph -- * is received, single notification is enough to clear all. -- * Notifying more times incurs overhead but doesn't harm -- * correctness. -- * -- * The callee is responsible for destroying ph with -- * fuse_pollhandle_destroy() when no longer in use. -- */ -- int (*poll) (const char *, struct fuse_file_info *, -- struct fuse_pollhandle *ph, unsigned *reventsp); -- -- /** Write contents of buffer to an open file -- * -- * Similar to the write() method, but data is supplied in a -- * generic buffer. Use fuse_buf_copy() to transfer data to -- * the destination. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- */ -- int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, -- struct fuse_file_info *); -- -- /** Store data from an open file in a buffer -- * -- * Similar to the read() method, but data is stored and -- * returned in a generic buffer. -- * -- * No actual copying of data has to take place, the source -- * file descriptor may simply be stored in the buffer for -- * later data transfer. -- * -- * The buffer must be allocated dynamically and stored at the -- * location pointed to by bufp. If the buffer contains memory -- * regions, they too must be allocated using malloc(). The -- * allocated memory will be freed by the caller. -- */ -- int (*read_buf) (const char *, struct fuse_bufvec **bufp, -- size_t size, off_t off, struct fuse_file_info *); -- /** -- * Perform BSD file locking operation -- * -- * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN -- * -- * Nonblocking requests will be indicated by ORing LOCK_NB to -- * the above operations -- * -- * For more information see the flock(2) manual page. -- * -- * Additionally fi->owner will be set to a value unique to -- * this open file. This same value will be supplied to -- * ->release() when the file is released. -- * -- * Note: if this method is not implemented, the kernel will still -- * allow file locking to work locally. Hence it is only -- * interesting for network filesystems and similar. -- */ -- int (*flock) (const char *, struct fuse_file_info *, int op); -- -- /** -- * Allocates space for an open file -- * -- * This function ensures that required space is allocated for specified -- * file. If this function returns success then any subsequent write -- * request to specified range is guaranteed not to fail because of lack -- * of space on the file system media. -- */ -- int (*fallocate) (const char *, int, off_t, off_t, -- struct fuse_file_info *); -- -- /** -- * Copy a range of data from one file to another -- * -- * Performs an optimized copy between two file descriptors without the -- * additional cost of transferring data through the FUSE kernel module -- * to user space (glibc) and then back into the FUSE filesystem again. -- * -- * In case this method is not implemented, glibc falls back to reading -- * data from the source and writing to the destination. Effectively -- * doing an inefficient copy of the data. -- */ -- ssize_t (*copy_file_range) (const char *path_in, -- struct fuse_file_info *fi_in, -- off_t offset_in, const char *path_out, -- struct fuse_file_info *fi_out, -- off_t offset_out, size_t size, int flags); -- -- /** -- * Find next data or hole after the specified offset -- */ -- off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); -+ /** -+ * Get file attributes. -+ * -+ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are -+ * ignored. The 'st_ino' field is ignored except if the 'use_ino' -+ * mount option is given. In that case it is passed to userspace, -+ * but libfuse and the kernel will still assign a different -+ * inode for internal use (called the "nodeid"). -+ * -+ * `fi` will always be NULL if the file is not currently open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*getattr)(const char *, struct stat *, struct fuse_file_info *fi); -+ -+ /** -+ * Read the target of a symbolic link -+ * -+ * The buffer should be filled with a null terminated string. The -+ * buffer size argument includes the space for the terminating -+ * null character. If the linkname is too long to fit in the -+ * buffer, it should be truncated. The return value should be 0 -+ * for success. -+ */ -+ int (*readlink)(const char *, char *, size_t); -+ -+ /** -+ * Create a file node -+ * -+ * This is called for creation of all non-directory, non-symlink -+ * nodes. If the filesystem defines a create() method, then for -+ * regular files that will be called instead. -+ */ -+ int (*mknod)(const char *, mode_t, dev_t); -+ -+ /** -+ * Create a directory -+ * -+ * Note that the mode argument may not have the type specification -+ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the -+ * correct directory type bits use mode|S_IFDIR -+ */ -+ int (*mkdir)(const char *, mode_t); -+ -+ /** Remove a file */ -+ int (*unlink)(const char *); -+ -+ /** Remove a directory */ -+ int (*rmdir)(const char *); -+ -+ /** Create a symbolic link */ -+ int (*symlink)(const char *, const char *); -+ -+ /** -+ * Rename a file -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ */ -+ int (*rename)(const char *, const char *, unsigned int flags); -+ -+ /** Create a hard link to a file */ -+ int (*link)(const char *, const char *); -+ -+ /** -+ * Change the permission bits of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*chmod)(const char *, mode_t, struct fuse_file_info *fi); -+ -+ /** -+ * Change the owner and group of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*chown)(const char *, uid_t, gid_t, struct fuse_file_info *fi); -+ -+ /** -+ * Change the size of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*truncate)(const char *, off_t, struct fuse_file_info *fi); -+ -+ /** -+ * Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) -+ * should be used by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount option is -+ * given, this check is already done by the kernel before calling -+ * open() and may thus be omitted by the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open will also succeed without being send -+ * to the filesystem process. -+ * -+ */ -+ int (*open)(const char *, struct fuse_file_info *); -+ -+ /** -+ * Read data from an open file -+ * -+ * Read should return exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the -+ * 'direct_io' mount option is specified, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ */ -+ int (*read)(const char *, char *, size_t, off_t, struct fuse_file_info *); -+ -+ /** -+ * Write data to an open file -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the 'direct_io' -+ * mount option is specified (see read operation). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write)(const char *, const char *, size_t, off_t, -+ struct fuse_file_info *); -+ -+ /** -+ * Get file system statistics -+ * -+ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored -+ */ -+ int (*statfs)(const char *, struct statvfs *); -+ -+ /** -+ * Possibly flush cached data -+ * -+ * BIG NOTE: This is not equivalent to fsync(). It's not a -+ * request to sync dirty data. -+ * -+ * Flush is called on each close() of a file descriptor, as opposed to -+ * release which is called on the close of the last file descriptor for -+ * a file. Under Linux, errors returned by flush() will be passed to -+ * userspace as errors from close(), so flush() is a good place to write -+ * back any cached dirty data. However, many applications ignore errors -+ * on close(), and on non-Linux systems, close() may succeed even if flush() -+ * returns an error. For these reasons, filesystems should not assume -+ * that errors returned by flush will ever be noticed or even -+ * delivered. -+ * -+ * NOTE: The flush() method may be called more than once for each -+ * open(). This happens if more than one file descriptor refers to an -+ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is -+ * not possible to determine if a flush is final, so each flush should -+ * be treated equally. Multiple write-flush sequences are relatively -+ * rare, so this shouldn't be a problem. -+ * -+ * Filesystems shouldn't assume that flush will be called at any -+ * particular point. It may be called more times than expected, or not -+ * at all. -+ * -+ * [close]: -+ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ int (*flush)(const char *, struct fuse_file_info *); -+ -+ /** -+ * Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open() call there will be exactly one release() call -+ * with the same flags and file handle. It is possible to -+ * have a file opened more than once, in which case only the last -+ * release will mean, that no more reads/writes will happen on the -+ * file. The return value of release is ignored. -+ */ -+ int (*release)(const char *, struct fuse_file_info *); -+ -+ /* -+ * Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ */ -+ int (*fsync)(const char *, int, struct fuse_file_info *); -+ -+ /** Set extended attributes */ -+ int (*setxattr)(const char *, const char *, const char *, size_t, int); -+ -+ /** Get extended attributes */ -+ int (*getxattr)(const char *, const char *, char *, size_t); -+ -+ /** List extended attributes */ -+ int (*listxattr)(const char *, char *, size_t); -+ -+ /** Remove extended attributes */ -+ int (*removexattr)(const char *, const char *); -+ -+ /* -+ * Open directory -+ * -+ * Unless the 'default_permissions' mount option is given, -+ * this method should check if opendir is permitted for this -+ * directory. Optionally opendir may also return an arbitrary -+ * filehandle in the fuse_file_info structure, which will be -+ * passed to readdir, releasedir and fsyncdir. -+ */ -+ int (*opendir)(const char *, struct fuse_file_info *); -+ -+ /* -+ * Read directory -+ * -+ * The filesystem may choose between two modes of operation: -+ * -+ * 1) The readdir implementation ignores the offset parameter, and -+ * passes zero to the filler function's offset. The filler -+ * function will not return '1' (unless an error happens), so the -+ * whole directory is read in a single readdir operation. -+ * -+ * 2) The readdir implementation keeps track of the offsets of the -+ * directory entries. It uses the offset parameter and always -+ * passes non-zero offset to the filler function. When the buffer -+ * is full (or an error happens) the filler function will return -+ * '1'. -+ */ -+ int (*readdir)(const char *, void *, fuse_fill_dir_t, off_t, -+ struct fuse_file_info *, enum fuse_readdir_flags); -+ -+ /** -+ * Release directory -+ */ -+ int (*releasedir)(const char *, struct fuse_file_info *); -+ -+ /** -+ * Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data -+ */ -+ int (*fsyncdir)(const char *, int, struct fuse_file_info *); -+ -+ /** -+ * Initialize filesystem -+ * -+ * The return value will passed in the `private_data` field of -+ * `struct fuse_context` to all file operations, and as a -+ * parameter to the destroy() method. It overrides the initial -+ * value provided to fuse_main() / fuse_new(). -+ */ -+ void *(*init)(struct fuse_conn_info *conn, struct fuse_config *cfg); -+ -+ /** -+ * Clean up filesystem -+ * -+ * Called on filesystem exit. -+ */ -+ void (*destroy)(void *private_data); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() system call. If the -+ * 'default_permissions' mount option is given, this method is not -+ * called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ */ -+ int (*access)(const char *, int); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ */ -+ int (*create)(const char *, mode_t, struct fuse_file_info *); -+ -+ /** -+ * Perform POSIX file locking operation -+ * -+ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. -+ * -+ * For the meaning of fields in 'struct flock' see the man page -+ * for fcntl(2). The l_whence field will always be set to -+ * SEEK_SET. -+ * -+ * For checking lock ownership, the 'fuse_file_info->owner' -+ * argument must be used. -+ * -+ * For F_GETLK operation, the library will first check currently -+ * held locks, and if a conflicting lock is found it will return -+ * information without calling this method. This ensures, that -+ * for local locks the l_pid field is correctly filled in. The -+ * results may not be accurate in case of race conditions and in -+ * the presence of hard links, but it's unlikely that an -+ * application would rely on accurate GETLK results in these -+ * cases. If a conflicting lock is not found, this method will be -+ * called, and the filesystem may fill out l_pid by a meaningful -+ * value, or it may leave this field zero. -+ * -+ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid -+ * of the process performing the locking operation. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*lock)(const char *, struct fuse_file_info *, int cmd, struct flock *); -+ -+ /** -+ * Change the access and modification times of a file with -+ * nanosecond resolution -+ * -+ * This supersedes the old utime() interface. New applications -+ * should use this. -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * See the utimensat(2) man page for details. -+ */ -+ int (*utimens)(const char *, const struct timespec tv[2], -+ struct fuse_file_info *fi); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ */ -+ int (*bmap)(const char *, size_t blocksize, uint64_t *idx); -+ -+ /** -+ * Ioctl -+ * -+ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in -+ * 64bit environment. The size and direction of data is -+ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, -+ * data will be NULL, for _IOC_WRITE data is out area, for -+ * _IOC_READ in area and if both are set in/out area. In all -+ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. -+ * -+ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a -+ * directory file handle. -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ int (*ioctl)(const char *, unsigned int cmd, void *arg, -+ struct fuse_file_info *, unsigned int flags, void *data); -+ -+ /** -+ * Poll for IO readiness events -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ */ -+ int (*poll)(const char *, struct fuse_file_info *, -+ struct fuse_pollhandle *ph, unsigned *reventsp); -+ -+ /* -+ * Write contents of buffer to an open file -+ * -+ * Similar to the write() method, but data is supplied in a -+ * generic buffer. Use fuse_buf_copy() to transfer data to -+ * the destination. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write_buf)(const char *, struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *); -+ -+ /* -+ * Store data from an open file in a buffer -+ * -+ * Similar to the read() method, but data is stored and -+ * returned in a generic buffer. -+ * -+ * No actual copying of data has to take place, the source -+ * file descriptor may simply be stored in the buffer for -+ * later data transfer. -+ * -+ * The buffer must be allocated dynamically and stored at the -+ * location pointed to by bufp. If the buffer contains memory -+ * regions, they too must be allocated using malloc(). The -+ * allocated memory will be freed by the caller. -+ */ -+ int (*read_buf)(const char *, struct fuse_bufvec **bufp, size_t size, -+ off_t off, struct fuse_file_info *); -+ /** -+ * Perform BSD file locking operation -+ * -+ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN -+ * -+ * Nonblocking requests will be indicated by ORing LOCK_NB to -+ * the above operations -+ * -+ * For more information see the flock(2) manual page. -+ * -+ * Additionally fi->owner will be set to a value unique to -+ * this open file. This same value will be supplied to -+ * ->release() when the file is released. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*flock)(const char *, struct fuse_file_info *, int op); -+ -+ /** -+ * Allocates space for an open file -+ * -+ * This function ensures that required space is allocated for specified -+ * file. If this function returns success then any subsequent write -+ * request to specified range is guaranteed not to fail because of lack -+ * of space on the file system media. -+ */ -+ int (*fallocate)(const char *, int, off_t, off_t, struct fuse_file_info *); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ */ -+ ssize_t (*copy_file_range)(const char *path_in, -+ struct fuse_file_info *fi_in, off_t offset_in, -+ const char *path_out, -+ struct fuse_file_info *fi_out, off_t offset_out, -+ size_t size, int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ */ -+ off_t (*lseek)(const char *, off_t off, int whence, -+ struct fuse_file_info *); - }; - --/** Extra context that may be needed by some filesystems -+/* -+ * Extra context that may be needed by some filesystems - * - * The uid, gid and pid fields are not filled in case of a writepage - * operation. - */ - struct fuse_context { -- /** Pointer to the fuse object */ -- struct fuse *fuse; -+ /** Pointer to the fuse object */ -+ struct fuse *fuse; - -- /** User ID of the calling process */ -- uid_t uid; -+ /** User ID of the calling process */ -+ uid_t uid; - -- /** Group ID of the calling process */ -- gid_t gid; -+ /** Group ID of the calling process */ -+ gid_t gid; - -- /** Process ID of the calling thread */ -- pid_t pid; -+ /** Process ID of the calling thread */ -+ pid_t pid; - -- /** Private filesystem data */ -- void *private_data; -+ /** Private filesystem data */ -+ void *private_data; - -- /** Umask of the calling process */ -- mode_t umask; -+ /** Umask of the calling process */ -+ mode_t umask; - }; - - /** -@@ -859,15 +880,15 @@ struct fuse_context { - * Example usage, see hello.c - */ - /* -- int fuse_main(int argc, char *argv[], const struct fuse_operations *op, -- void *private_data); --*/ --#define fuse_main(argc, argv, op, private_data) \ -- fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) -+ * int fuse_main(int argc, char *argv[], const struct fuse_operations *op, -+ * void *private_data); -+ */ -+#define fuse_main(argc, argv, op, private_data) \ -+ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) - --/* ----------------------------------------------------------- * -- * More detailed API * -- * ----------------------------------------------------------- */ -+/* -+ * More detailed API -+ */ - - /** - * Print available options (high- and low-level) to stdout. This is -@@ -910,12 +931,13 @@ void fuse_lib_help(struct fuse_args *args); - * @return the created FUSE handle - */ - #if FUSE_USE_VERSION == 30 --struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, -- size_t op_size, void *private_data); -+struct fuse *fuse_new_30(struct fuse_args *args, -+ const struct fuse_operations *op, size_t op_size, -+ void *private_data); - #define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) - #else - struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, -- size_t op_size, void *private_data); -+ size_t op_size, void *private_data); - #endif - - /** -@@ -940,7 +962,7 @@ void fuse_unmount(struct fuse *f); - /** - * Destroy the FUSE handle. - * -- * NOTE: This function does not unmount the filesystem. If this is -+ * NOTE: This function does not unmount the filesystem. If this is - * needed, call fuse_unmount() before calling this function. - * - * @param f the FUSE handle -@@ -1030,7 +1052,7 @@ int fuse_invalidate_path(struct fuse *f, const char *path); - * Do not call this directly, use fuse_main() - */ - int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -- size_t op_size, void *private_data); -+ size_t op_size, void *private_data); - - /** - * Start the cleanup thread when using option "remember". -@@ -1081,89 +1103,87 @@ struct fuse_fs; - */ - - int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, -- struct fuse_file_info *fi); --int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, -- const char *newpath, unsigned int flags); -+ struct fuse_file_info *fi); -+int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, const char *newpath, -+ unsigned int flags); - int fuse_fs_unlink(struct fuse_fs *fs, const char *path); - int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); --int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, -- const char *path); -+int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, const char *path); - int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); --int fuse_fs_release(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+int fuse_fs_release(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); - int fuse_fs_open(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, -- off_t off, struct fuse_file_info *fi); -+ off_t off, struct fuse_file_info *fi); - int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, -- struct fuse_bufvec **bufp, size_t size, off_t off, -- struct fuse_file_info *fi); -+ struct fuse_bufvec **bufp, size_t size, off_t off, -+ struct fuse_file_info *fi); - int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, -- size_t size, off_t off, struct fuse_file_info *fi); -+ size_t size, off_t off, struct fuse_file_info *fi); - int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, -- struct fuse_bufvec *buf, off_t off, -- struct fuse_file_info *fi); -+ struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *fi); - int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_flush(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); - int fuse_fs_opendir(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, -- fuse_fill_dir_t filler, off_t off, -- struct fuse_file_info *fi, enum fuse_readdir_flags flags); -+ fuse_fill_dir_t filler, off_t off, -+ struct fuse_file_info *fi, enum fuse_readdir_flags flags); - int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_lock(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi, int cmd, struct flock *lock); -+ struct fuse_file_info *fi, int cmd, struct flock *lock); - int fuse_fs_flock(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi, int op); -+ struct fuse_file_info *fi, int op); - int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - int fuse_fs_utimens(struct fuse_fs *fs, const char *path, -- const struct timespec tv[2], struct fuse_file_info *fi); -+ const struct timespec tv[2], struct fuse_file_info *fi); - int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); - int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, -- size_t len); -+ size_t len); - int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, -- dev_t rdev); -+ dev_t rdev); - int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); - int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, -- const char *value, size_t size, int flags); -+ const char *value, size_t size, int flags); - int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, -- char *value, size_t size); -+ char *value, size_t size); - int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, -- size_t size); --int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, -- const char *name); -+ size_t size); -+int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, const char *name); - int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, -- uint64_t *idx); -+ uint64_t *idx); - int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, -- void *arg, struct fuse_file_info *fi, unsigned int flags, -- void *data); -+ void *arg, struct fuse_file_info *fi, unsigned int flags, -+ void *data); - int fuse_fs_poll(struct fuse_fs *fs, const char *path, -- struct fuse_file_info *fi, struct fuse_pollhandle *ph, -- unsigned *reventsp); -+ struct fuse_file_info *fi, struct fuse_pollhandle *ph, -+ unsigned *reventsp); - int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, -- off_t offset, off_t length, struct fuse_file_info *fi); -+ off_t offset, off_t length, struct fuse_file_info *fi); - ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, -- struct fuse_file_info *fi_in, off_t off_in, -- const char *path_out, -- struct fuse_file_info *fi_out, off_t off_out, -- size_t len, int flags); -+ struct fuse_file_info *fi_in, off_t off_in, -+ const char *path_out, -+ struct fuse_file_info *fi_out, off_t off_out, -+ size_t len, int flags); - off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, -- struct fuse_file_info *fi); -+ struct fuse_file_info *fi); - void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, -- struct fuse_config *cfg); -+ struct fuse_config *cfg); - void fuse_fs_destroy(struct fuse_fs *fs); - - int fuse_notify_poll(struct fuse_pollhandle *ph); -@@ -1182,7 +1202,7 @@ int fuse_notify_poll(struct fuse_pollhandle *ph); - * @return a new filesystem object - */ - struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, -- void *private_data); -+ void *private_data); - - /** - * Factory for creating filesystem objects -@@ -1199,7 +1219,7 @@ struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, - * @return the new filesystem object - */ - typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, -- struct fuse_fs *fs[]); -+ struct fuse_fs *fs[]); - /** - * Register filesystem module - * -@@ -1211,7 +1231,7 @@ typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, - * @param factory_ the factory function for this filesystem module - */ - #define FUSE_REGISTER_MODULE(name_, factory_) \ -- fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ -+ fuse_module_factory_t fuse_module_##name_##_factory = factory_ - - /** Get session from fuse object */ - struct fuse_session *fuse_get_session(struct fuse *f); -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index bf8f8cc..bd9bf86 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -1,21 +1,23 @@ --/* FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+/* -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - /** @file */ - - #if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) --#error "Never include directly; use or instead." -+#error \ -+ "Never include directly; use or instead." - #endif - - #ifndef FUSE_COMMON_H_ - #define FUSE_COMMON_H_ - --#include "fuse_opt.h" - #include "fuse_log.h" -+#include "fuse_opt.h" - #include - #include - -@@ -25,7 +27,7 @@ - /** Minor version of FUSE library interface */ - #define FUSE_MINOR_VERSION 2 - --#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) -+#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) - #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) - - /** -@@ -38,67 +40,83 @@ - * descriptors can share a single file handle. - */ - struct fuse_file_info { -- /** Open flags. Available in open() and release() */ -- int flags; -- -- /** In case of a write operation indicates if this was caused -- by a delayed write from the page cache. If so, then the -- context's pid, uid, and gid fields will not be valid, and -- the *fh* value may not match the *fh* value that would -- have been sent with the corresponding individual write -- requests if write caching had been disabled. */ -- unsigned int writepage : 1; -- -- /** Can be filled in by open, to use direct I/O on this file. */ -- unsigned int direct_io : 1; -- -- /** Can be filled in by open. It signals the kernel that any -- currently cached file data (ie., data that the filesystem -- provided the last time the file was open) need not be -- invalidated. Has no effect when set in other contexts (in -- particular it does nothing when set by opendir()). */ -- unsigned int keep_cache : 1; -- -- /** Indicates a flush operation. Set in flush operation, also -- maybe set in highlevel lock operation and lowlevel release -- operation. */ -- unsigned int flush : 1; -- -- /** Can be filled in by open, to indicate that the file is not -- seekable. */ -- unsigned int nonseekable : 1; -- -- /* Indicates that flock locks for this file should be -- released. If set, lock_owner shall contain a valid value. -- May only be set in ->release(). */ -- unsigned int flock_release : 1; -- -- /** Can be filled in by opendir. It signals the kernel to -- enable caching of entries returned by readdir(). Has no -- effect when set in other contexts (in particular it does -- nothing when set by open()). */ -- unsigned int cache_readdir : 1; -- -- /** Padding. Reserved for future use*/ -- unsigned int padding : 25; -- unsigned int padding2 : 32; -- -- /** File handle id. May be filled in by filesystem in create, -- * open, and opendir(). Available in most other file operations on the -- * same file handle. */ -- uint64_t fh; -- -- /** Lock owner id. Available in locking operations and flush */ -- uint64_t lock_owner; -- -- /** Requested poll events. Available in ->poll. Only set on kernels -- which support it. If unsupported, this field is set to zero. */ -- uint32_t poll_events; -+ /** Open flags. Available in open() and release() */ -+ int flags; -+ -+ /* -+ * In case of a write operation indicates if this was caused -+ * by a delayed write from the page cache. If so, then the -+ * context's pid, uid, and gid fields will not be valid, and -+ * the *fh* value may not match the *fh* value that would -+ * have been sent with the corresponding individual write -+ * requests if write caching had been disabled. -+ */ -+ unsigned int writepage:1; -+ -+ /** Can be filled in by open, to use direct I/O on this file. */ -+ unsigned int direct_io:1; -+ -+ /* -+ * Can be filled in by open. It signals the kernel that any -+ * currently cached file data (ie., data that the filesystem -+ * provided the last time the file was open) need not be -+ * invalidated. Has no effect when set in other contexts (in -+ * particular it does nothing when set by opendir()). -+ */ -+ unsigned int keep_cache:1; -+ -+ /* -+ * Indicates a flush operation. Set in flush operation, also -+ * maybe set in highlevel lock operation and lowlevel release -+ * operation. -+ */ -+ unsigned int flush:1; -+ -+ /* -+ * Can be filled in by open, to indicate that the file is not -+ * seekable. -+ */ -+ unsigned int nonseekable:1; -+ -+ /* -+ * Indicates that flock locks for this file should be -+ * released. If set, lock_owner shall contain a valid value. -+ * May only be set in ->release(). -+ */ -+ unsigned int flock_release:1; -+ -+ /* -+ * Can be filled in by opendir. It signals the kernel to -+ * enable caching of entries returned by readdir(). Has no -+ * effect when set in other contexts (in particular it does -+ * nothing when set by open()). -+ */ -+ unsigned int cache_readdir:1; -+ -+ /** Padding. Reserved for future use*/ -+ unsigned int padding:25; -+ unsigned int padding2:32; -+ -+ /* -+ * File handle id. May be filled in by filesystem in create, -+ * open, and opendir(). Available in most other file operations on the -+ * same file handle. -+ */ -+ uint64_t fh; -+ -+ /** Lock owner id. Available in locking operations and flush */ -+ uint64_t lock_owner; -+ -+ /* -+ * Requested poll events. Available in ->poll. Only set on kernels -+ * which support it. If unsupported, this field is set to zero. -+ */ -+ uint32_t poll_events; - }; - --/************************************************************************** -- * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * -- **************************************************************************/ -+/* -+ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' -+ */ - - /** - * Indicates that the filesystem supports asynchronous read requests. -@@ -110,7 +128,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_ASYNC_READ (1 << 0) -+#define FUSE_CAP_ASYNC_READ (1 << 0) - - /** - * Indicates that the filesystem supports "remote" locking. -@@ -118,7 +136,7 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel, - * and if getlk() and setlk() handlers are implemented. - */ --#define FUSE_CAP_POSIX_LOCKS (1 << 1) -+#define FUSE_CAP_POSIX_LOCKS (1 << 1) - - /** - * Indicates that the filesystem supports the O_TRUNC open flag. If -@@ -127,14 +145,14 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) -+#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) - - /** - * Indicates that the filesystem supports lookups of "." and "..". - * - * This feature is disabled by default. - */ --#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) -+#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) - - /** - * Indicates that the kernel should not apply the umask to the -@@ -142,7 +160,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_DONT_MASK (1 << 6) -+#define FUSE_CAP_DONT_MASK (1 << 6) - - /** - * Indicates that libfuse should try to use splice() when writing to -@@ -150,7 +168,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_SPLICE_WRITE (1 << 7) -+#define FUSE_CAP_SPLICE_WRITE (1 << 7) - - /** - * Indicates that libfuse should try to move pages instead of copying when -@@ -158,7 +176,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_SPLICE_MOVE (1 << 8) -+#define FUSE_CAP_SPLICE_MOVE (1 << 8) - - /** - * Indicates that libfuse should try to use splice() when reading from -@@ -167,7 +185,7 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel and - * if the filesystem implements a write_buf() handler. - */ --#define FUSE_CAP_SPLICE_READ (1 << 9) -+#define FUSE_CAP_SPLICE_READ (1 << 9) - - /** - * If set, the calls to flock(2) will be emulated using POSIX locks and must -@@ -180,14 +198,14 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel and - * if the filesystem implements a flock() handler. - */ --#define FUSE_CAP_FLOCK_LOCKS (1 << 10) -+#define FUSE_CAP_FLOCK_LOCKS (1 << 10) - - /** - * Indicates that the filesystem supports ioctl's on directories. - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_IOCTL_DIR (1 << 11) -+#define FUSE_CAP_IOCTL_DIR (1 << 11) - - /** - * Traditionally, while a file is open the FUSE kernel module only -@@ -209,7 +227,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) -+#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) - - /** - * Indicates that the filesystem supports readdirplus. -@@ -217,7 +235,7 @@ struct fuse_file_info { - * This feature is enabled by default when supported by the kernel and if the - * filesystem implements a readdirplus() handler. - */ --#define FUSE_CAP_READDIRPLUS (1 << 13) -+#define FUSE_CAP_READDIRPLUS (1 << 13) - - /** - * Indicates that the filesystem supports adaptive readdirplus. -@@ -245,7 +263,7 @@ struct fuse_file_info { - * if the filesystem implements both a readdirplus() and a readdir() - * handler. - */ --#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) -+#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) - - /** - * Indicates that the filesystem supports asynchronous direct I/O submission. -@@ -256,7 +274,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_ASYNC_DIO (1 << 15) -+#define FUSE_CAP_ASYNC_DIO (1 << 15) - - /** - * Indicates that writeback caching should be enabled. This means that -@@ -265,7 +283,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) -+#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) - - /** - * Indicates support for zero-message opens. If this flag is set in -@@ -278,7 +296,7 @@ struct fuse_file_info { - * Setting (or unsetting) this flag in the `want` field has *no - * effect*. - */ --#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) -+#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) - - /** - * Indicates support for parallel directory operations. If this flag -@@ -288,7 +306,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) -+#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) - - /** - * Indicates support for POSIX ACLs. -@@ -307,7 +325,7 @@ struct fuse_file_info { - * - * This feature is disabled by default. - */ --#define FUSE_CAP_POSIX_ACL (1 << 19) -+#define FUSE_CAP_POSIX_ACL (1 << 19) - - /** - * Indicates that the filesystem is responsible for unsetting -@@ -316,7 +334,7 @@ struct fuse_file_info { - * - * This feature is enabled by default when supported by the kernel. - */ --#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) -+#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) - - /** - * Indicates support for zero-message opendirs. If this flag is set in -@@ -328,7 +346,7 @@ struct fuse_file_info { - * - * Setting (or unsetting) this flag in the `want` field has *no effect*. - */ --#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) -+#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) - - /** - * Ioctl flags -@@ -340,12 +358,12 @@ struct fuse_file_info { - * - * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs - */ --#define FUSE_IOCTL_COMPAT (1 << 0) --#define FUSE_IOCTL_UNRESTRICTED (1 << 1) --#define FUSE_IOCTL_RETRY (1 << 2) --#define FUSE_IOCTL_DIR (1 << 4) -+#define FUSE_IOCTL_COMPAT (1 << 0) -+#define FUSE_IOCTL_UNRESTRICTED (1 << 1) -+#define FUSE_IOCTL_RETRY (1 << 2) -+#define FUSE_IOCTL_DIR (1 << 4) - --#define FUSE_IOCTL_MAX_IOV 256 -+#define FUSE_IOCTL_MAX_IOV 256 - - /** - * Connection information, passed to the ->init() method -@@ -355,114 +373,114 @@ struct fuse_file_info { - * value must usually be smaller than the indicated value. - */ - struct fuse_conn_info { -- /** -- * Major version of the protocol (read-only) -- */ -- unsigned proto_major; -- -- /** -- * Minor version of the protocol (read-only) -- */ -- unsigned proto_minor; -- -- /** -- * Maximum size of the write buffer -- */ -- unsigned max_write; -- -- /** -- * Maximum size of read requests. A value of zero indicates no -- * limit. However, even if the filesystem does not specify a -- * limit, the maximum size of read requests will still be -- * limited by the kernel. -- * -- * NOTE: For the time being, the maximum size of read requests -- * must be set both here *and* passed to fuse_session_new() -- * using the ``-o max_read=`` mount option. At some point -- * in the future, specifying the mount option will no longer -- * be necessary. -- */ -- unsigned max_read; -- -- /** -- * Maximum readahead -- */ -- unsigned max_readahead; -- -- /** -- * Capability flags that the kernel supports (read-only) -- */ -- unsigned capable; -- -- /** -- * Capability flags that the filesystem wants to enable. -- * -- * libfuse attempts to initialize this field with -- * reasonable default values before calling the init() handler. -- */ -- unsigned want; -- -- /** -- * Maximum number of pending "background" requests. A -- * background request is any type of request for which the -- * total number is not limited by other means. As of kernel -- * 4.8, only two types of requests fall into this category: -- * -- * 1. Read-ahead requests -- * 2. Asynchronous direct I/O requests -- * -- * Read-ahead requests are generated (if max_readahead is -- * non-zero) by the kernel to preemptively fill its caches -- * when it anticipates that userspace will soon read more -- * data. -- * -- * Asynchronous direct I/O requests are generated if -- * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large -- * direct I/O request. In this case the kernel will internally -- * split it up into multiple smaller requests and submit them -- * to the filesystem concurrently. -- * -- * Note that the following requests are *not* background -- * requests: writeback requests (limited by the kernel's -- * flusher algorithm), regular (i.e., synchronous and -- * buffered) userspace read/write requests (limited to one per -- * thread), asynchronous read requests (Linux's io_submit(2) -- * call actually blocks, so these are also limited to one per -- * thread). -- */ -- unsigned max_background; -- -- /** -- * Kernel congestion threshold parameter. If the number of pending -- * background requests exceeds this number, the FUSE kernel module will -- * mark the filesystem as "congested". This instructs the kernel to -- * expect that queued requests will take some time to complete, and to -- * adjust its algorithms accordingly (e.g. by putting a waiting thread -- * to sleep instead of using a busy-loop). -- */ -- unsigned congestion_threshold; -- -- /** -- * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible -- * for updating mtime and ctime when write requests are received. The -- * updated values are passed to the filesystem with setattr() requests. -- * However, if the filesystem does not support the full resolution of -- * the kernel timestamps (nanoseconds), the mtime and ctime values used -- * by kernel and filesystem will differ (and result in an apparent -- * change of times after a cache flush). -- * -- * To prevent this problem, this variable can be used to inform the -- * kernel about the timestamp granularity supported by the file-system. -- * The value should be power of 10. The default is 1, i.e. full -- * nano-second resolution. Filesystems supporting only second resolution -- * should set this to 1000000000. -- */ -- unsigned time_gran; -- -- /** -- * For future use. -- */ -- unsigned reserved[22]; -+ /** -+ * Major version of the protocol (read-only) -+ */ -+ unsigned proto_major; -+ -+ /** -+ * Minor version of the protocol (read-only) -+ */ -+ unsigned proto_minor; -+ -+ /** -+ * Maximum size of the write buffer -+ */ -+ unsigned max_write; -+ -+ /** -+ * Maximum size of read requests. A value of zero indicates no -+ * limit. However, even if the filesystem does not specify a -+ * limit, the maximum size of read requests will still be -+ * limited by the kernel. -+ * -+ * NOTE: For the time being, the maximum size of read requests -+ * must be set both here *and* passed to fuse_session_new() -+ * using the ``-o max_read=`` mount option. At some point -+ * in the future, specifying the mount option will no longer -+ * be necessary. -+ */ -+ unsigned max_read; -+ -+ /** -+ * Maximum readahead -+ */ -+ unsigned max_readahead; -+ -+ /** -+ * Capability flags that the kernel supports (read-only) -+ */ -+ unsigned capable; -+ -+ /** -+ * Capability flags that the filesystem wants to enable. -+ * -+ * libfuse attempts to initialize this field with -+ * reasonable default values before calling the init() handler. -+ */ -+ unsigned want; -+ -+ /** -+ * Maximum number of pending "background" requests. A -+ * background request is any type of request for which the -+ * total number is not limited by other means. As of kernel -+ * 4.8, only two types of requests fall into this category: -+ * -+ * 1. Read-ahead requests -+ * 2. Asynchronous direct I/O requests -+ * -+ * Read-ahead requests are generated (if max_readahead is -+ * non-zero) by the kernel to preemptively fill its caches -+ * when it anticipates that userspace will soon read more -+ * data. -+ * -+ * Asynchronous direct I/O requests are generated if -+ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large -+ * direct I/O request. In this case the kernel will internally -+ * split it up into multiple smaller requests and submit them -+ * to the filesystem concurrently. -+ * -+ * Note that the following requests are *not* background -+ * requests: writeback requests (limited by the kernel's -+ * flusher algorithm), regular (i.e., synchronous and -+ * buffered) userspace read/write requests (limited to one per -+ * thread), asynchronous read requests (Linux's io_submit(2) -+ * call actually blocks, so these are also limited to one per -+ * thread). -+ */ -+ unsigned max_background; -+ -+ /** -+ * Kernel congestion threshold parameter. If the number of pending -+ * background requests exceeds this number, the FUSE kernel module will -+ * mark the filesystem as "congested". This instructs the kernel to -+ * expect that queued requests will take some time to complete, and to -+ * adjust its algorithms accordingly (e.g. by putting a waiting thread -+ * to sleep instead of using a busy-loop). -+ */ -+ unsigned congestion_threshold; -+ -+ /** -+ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible -+ * for updating mtime and ctime when write requests are received. The -+ * updated values are passed to the filesystem with setattr() requests. -+ * However, if the filesystem does not support the full resolution of -+ * the kernel timestamps (nanoseconds), the mtime and ctime values used -+ * by kernel and filesystem will differ (and result in an apparent -+ * change of times after a cache flush). -+ * -+ * To prevent this problem, this variable can be used to inform the -+ * kernel about the timestamp granularity supported by the file-system. -+ * The value should be power of 10. The default is 1, i.e. full -+ * nano-second resolution. Filesystems supporting only second resolution -+ * should set this to 1000000000. -+ */ -+ unsigned time_gran; -+ -+ /** -+ * For future use. -+ */ -+ unsigned reserved[22]; - }; - - struct fuse_session; -@@ -489,21 +507,20 @@ struct fuse_conn_info_opts; - * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want - * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want - * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want -- * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock -- * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want -- * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want -- * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want -- * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want -- * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want -- * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want -- * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want -- * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets -- * FUSE_CAP_READDIRPLUS_AUTO in conn->want -- * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and -- * FUSE_CAP_READDIRPLUS_AUTO in conn->want -- * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want -- * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want -- * -o time_gran=N sets conn->time_gran -+ * -o no_remote_lock Equivalent to -o -+ *no_remote_flock,no_remote_posix_lock -o no_remote_flock Unsets -+ *FUSE_CAP_FLOCK_LOCKS in conn->want -o no_remote_posix_lock Unsets -+ *FUSE_CAP_POSIX_LOCKS in conn->want -o [no_]splice_write (un-)sets -+ *FUSE_CAP_SPLICE_WRITE in conn->want -o [no_]splice_move (un-)sets -+ *FUSE_CAP_SPLICE_MOVE in conn->want -o [no_]splice_read (un-)sets -+ *FUSE_CAP_SPLICE_READ in conn->want -o [no_]auto_inval_data (un-)sets -+ *FUSE_CAP_AUTO_INVAL_DATA in conn->want -o readdirplus=no unsets -+ *FUSE_CAP_READDIRPLUS in conn->want -o readdirplus=yes sets -+ *FUSE_CAP_READDIRPLUS and unsets FUSE_CAP_READDIRPLUS_AUTO in conn->want -o -+ *readdirplus=auto sets FUSE_CAP_READDIRPLUS and FUSE_CAP_READDIRPLUS_AUTO -+ *in conn->want -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in -+ *conn->want -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in -+ *conn->want -o time_gran=N sets conn->time_gran - * - * Known options will be removed from *args*, unknown options will be - * passed through unchanged. -@@ -511,7 +528,7 @@ struct fuse_conn_info_opts; - * @param args argument vector (input+output) - * @return parsed options - **/ --struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); -+struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args); - - /** - * This function applies the (parsed) parameters in *opts* to the -@@ -521,7 +538,7 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); - * option has been explicitly set. - */ - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -- struct fuse_conn_info *conn); -+ struct fuse_conn_info *conn); - - /** - * Go into the background -@@ -552,81 +569,81 @@ const char *fuse_pkgversion(void); - */ - void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); - --/* ----------------------------------------------------------- * -- * Data buffer * -- * ----------------------------------------------------------- */ -+/* -+ * Data buffer -+ */ - - /** - * Buffer flags - */ - enum fuse_buf_flags { -- /** -- * Buffer contains a file descriptor -- * -- * If this flag is set, the .fd field is valid, otherwise the -- * .mem fields is valid. -- */ -- FUSE_BUF_IS_FD = (1 << 1), -- -- /** -- * Seek on the file descriptor -- * -- * If this flag is set then the .pos field is valid and is -- * used to seek to the given offset before performing -- * operation on file descriptor. -- */ -- FUSE_BUF_FD_SEEK = (1 << 2), -- -- /** -- * Retry operation on file descriptor -- * -- * If this flag is set then retry operation on file descriptor -- * until .size bytes have been copied or an error or EOF is -- * detected. -- */ -- FUSE_BUF_FD_RETRY = (1 << 3), -+ /** -+ * Buffer contains a file descriptor -+ * -+ * If this flag is set, the .fd field is valid, otherwise the -+ * .mem fields is valid. -+ */ -+ FUSE_BUF_IS_FD = (1 << 1), -+ -+ /** -+ * Seek on the file descriptor -+ * -+ * If this flag is set then the .pos field is valid and is -+ * used to seek to the given offset before performing -+ * operation on file descriptor. -+ */ -+ FUSE_BUF_FD_SEEK = (1 << 2), -+ -+ /** -+ * Retry operation on file descriptor -+ * -+ * If this flag is set then retry operation on file descriptor -+ * until .size bytes have been copied or an error or EOF is -+ * detected. -+ */ -+ FUSE_BUF_FD_RETRY = (1 << 3), - }; - - /** - * Buffer copy flags - */ - enum fuse_buf_copy_flags { -- /** -- * Don't use splice(2) -- * -- * Always fall back to using read and write instead of -- * splice(2) to copy data from one file descriptor to another. -- * -- * If this flag is not set, then only fall back if splice is -- * unavailable. -- */ -- FUSE_BUF_NO_SPLICE = (1 << 1), -- -- /** -- * Force splice -- * -- * Always use splice(2) to copy data from one file descriptor -- * to another. If splice is not available, return -EINVAL. -- */ -- FUSE_BUF_FORCE_SPLICE = (1 << 2), -- -- /** -- * Try to move data with splice. -- * -- * If splice is used, try to move pages from the source to the -- * destination instead of copying. See documentation of -- * SPLICE_F_MOVE in splice(2) man page. -- */ -- FUSE_BUF_SPLICE_MOVE = (1 << 3), -- -- /** -- * Don't block on the pipe when copying data with splice -- * -- * Makes the operations on the pipe non-blocking (if the pipe -- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -- * man page. -- */ -- FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), -+ /** -+ * Don't use splice(2) -+ * -+ * Always fall back to using read and write instead of -+ * splice(2) to copy data from one file descriptor to another. -+ * -+ * If this flag is not set, then only fall back if splice is -+ * unavailable. -+ */ -+ FUSE_BUF_NO_SPLICE = (1 << 1), -+ -+ /** -+ * Force splice -+ * -+ * Always use splice(2) to copy data from one file descriptor -+ * to another. If splice is not available, return -EINVAL. -+ */ -+ FUSE_BUF_FORCE_SPLICE = (1 << 2), -+ -+ /** -+ * Try to move data with splice. -+ * -+ * If splice is used, try to move pages from the source to the -+ * destination instead of copying. See documentation of -+ * SPLICE_F_MOVE in splice(2) man page. -+ */ -+ FUSE_BUF_SPLICE_MOVE = (1 << 3), -+ -+ /** -+ * Don't block on the pipe when copying data with splice -+ * -+ * Makes the operations on the pipe non-blocking (if the pipe -+ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -+ * man page. -+ */ -+ FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), - }; - - /** -@@ -636,36 +653,36 @@ enum fuse_buf_copy_flags { - * be supplied as a memory pointer or as a file descriptor - */ - struct fuse_buf { -- /** -- * Size of data in bytes -- */ -- size_t size; -- -- /** -- * Buffer flags -- */ -- enum fuse_buf_flags flags; -- -- /** -- * Memory pointer -- * -- * Used unless FUSE_BUF_IS_FD flag is set. -- */ -- void *mem; -- -- /** -- * File descriptor -- * -- * Used if FUSE_BUF_IS_FD flag is set. -- */ -- int fd; -- -- /** -- * File position -- * -- * Used if FUSE_BUF_FD_SEEK flag is set. -- */ -- off_t pos; -+ /** -+ * Size of data in bytes -+ */ -+ size_t size; -+ -+ /** -+ * Buffer flags -+ */ -+ enum fuse_buf_flags flags; -+ -+ /** -+ * Memory pointer -+ * -+ * Used unless FUSE_BUF_IS_FD flag is set. -+ */ -+ void *mem; -+ -+ /** -+ * File descriptor -+ * -+ * Used if FUSE_BUF_IS_FD flag is set. -+ */ -+ int fd; -+ -+ /** -+ * File position -+ * -+ * Used if FUSE_BUF_FD_SEEK flag is set. -+ */ -+ off_t pos; - }; - - /** -@@ -677,41 +694,39 @@ struct fuse_buf { - * Allocate dynamically to add more than one buffer. - */ - struct fuse_bufvec { -- /** -- * Number of buffers in the array -- */ -- size_t count; -- -- /** -- * Index of current buffer within the array -- */ -- size_t idx; -- -- /** -- * Current offset within the current buffer -- */ -- size_t off; -- -- /** -- * Array of buffers -- */ -- struct fuse_buf buf[1]; -+ /** -+ * Number of buffers in the array -+ */ -+ size_t count; -+ -+ /** -+ * Index of current buffer within the array -+ */ -+ size_t idx; -+ -+ /** -+ * Current offset within the current buffer -+ */ -+ size_t off; -+ -+ /** -+ * Array of buffers -+ */ -+ struct fuse_buf buf[1]; - }; - - /* Initialize bufvec with a single buffer of given size */ --#define FUSE_BUFVEC_INIT(size__) \ -- ((struct fuse_bufvec) { \ -- /* .count= */ 1, \ -- /* .idx = */ 0, \ -- /* .off = */ 0, \ -- /* .buf = */ { /* [0] = */ { \ -- /* .size = */ (size__), \ -- /* .flags = */ (enum fuse_buf_flags) 0, \ -- /* .mem = */ NULL, \ -- /* .fd = */ -1, \ -- /* .pos = */ 0, \ -- } } \ -- } ) -+#define FUSE_BUFVEC_INIT(size__) \ -+ ((struct fuse_bufvec){ /* .count= */ 1, \ -+ /* .idx = */ 0, \ -+ /* .off = */ 0, /* .buf = */ \ -+ { /* [0] = */ { \ -+ /* .size = */ (size__), \ -+ /* .flags = */ (enum fuse_buf_flags)0, \ -+ /* .mem = */ NULL, \ -+ /* .fd = */ -1, \ -+ /* .pos = */ 0, \ -+ } } }) - - /** - * Get total size of data in a fuse buffer vector -@@ -730,16 +745,16 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); - * @return actual number of bytes copied or -errno on error - */ - ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, -- enum fuse_buf_copy_flags flags); -+ enum fuse_buf_copy_flags flags); - --/* ----------------------------------------------------------- * -- * Signal handling * -- * ----------------------------------------------------------- */ -+/* -+ * Signal handling -+ */ - - /** - * Exit session on HUP, TERM and INT signals and ignore PIPE signal - * -- * Stores session in a global variable. May only be called once per -+ * Stores session in a global variable. May only be called once per - * process until fuse_remove_signal_handlers() is called. - * - * Once either of the POSIX signals arrives, the signal handler calls -@@ -766,12 +781,12 @@ int fuse_set_signal_handlers(struct fuse_session *se); - */ - void fuse_remove_signal_handlers(struct fuse_session *se); - --/* ----------------------------------------------------------- * -- * Compatibility stuff * -- * ----------------------------------------------------------- */ -+/* -+ * Compatibility stuff -+ */ - - #if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 --# error only API version 30 or greater is supported -+#error only API version 30 or greater is supported - #endif - - -@@ -781,11 +796,14 @@ void fuse_remove_signal_handlers(struct fuse_session *se); - * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! - */ - --#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+#if defined(__GNUC__) && \ -+ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ -+ !defined __cplusplus - _Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); - #else --struct _fuse_off_t_must_be_64bit_dummy_struct \ -- { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; -+struct _fuse_off_t_must_be_64bit_dummy_struct { -+ unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); -+}; - #endif - - #endif /* FUSE_COMMON_H_ */ -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index b39522e..e63cb58 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -1,71 +1,71 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include "fuse.h" - #include "fuse_lowlevel.h" - - struct fuse_req { -- struct fuse_session *se; -- uint64_t unique; -- int ctr; -- pthread_mutex_t lock; -- struct fuse_ctx ctx; -- struct fuse_chan *ch; -- int interrupted; -- unsigned int ioctl_64bit : 1; -- union { -- struct { -- uint64_t unique; -- } i; -- struct { -- fuse_interrupt_func_t func; -- void *data; -- } ni; -- } u; -- struct fuse_req *next; -- struct fuse_req *prev; -+ struct fuse_session *se; -+ uint64_t unique; -+ int ctr; -+ pthread_mutex_t lock; -+ struct fuse_ctx ctx; -+ struct fuse_chan *ch; -+ int interrupted; -+ unsigned int ioctl_64bit:1; -+ union { -+ struct { -+ uint64_t unique; -+ } i; -+ struct { -+ fuse_interrupt_func_t func; -+ void *data; -+ } ni; -+ } u; -+ struct fuse_req *next; -+ struct fuse_req *prev; - }; - - struct fuse_notify_req { -- uint64_t unique; -- void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, -- const void *, const struct fuse_buf *); -- struct fuse_notify_req *next; -- struct fuse_notify_req *prev; -+ uint64_t unique; -+ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, -+ const void *, const struct fuse_buf *); -+ struct fuse_notify_req *next; -+ struct fuse_notify_req *prev; - }; - - struct fuse_session { -- char *mountpoint; -- volatile int exited; -- int fd; -- int debug; -- int deny_others; -- struct fuse_lowlevel_ops op; -- int got_init; -- struct cuse_data *cuse_data; -- void *userdata; -- uid_t owner; -- struct fuse_conn_info conn; -- struct fuse_req list; -- struct fuse_req interrupts; -- pthread_mutex_t lock; -- int got_destroy; -- int broken_splice_nonblock; -- uint64_t notify_ctr; -- struct fuse_notify_req notify_list; -- size_t bufsize; -- int error; -+ char *mountpoint; -+ volatile int exited; -+ int fd; -+ int debug; -+ int deny_others; -+ struct fuse_lowlevel_ops op; -+ int got_init; -+ struct cuse_data *cuse_data; -+ void *userdata; -+ uid_t owner; -+ struct fuse_conn_info conn; -+ struct fuse_req list; -+ struct fuse_req interrupts; -+ pthread_mutex_t lock; -+ int got_destroy; -+ int broken_splice_nonblock; -+ uint64_t notify_ctr; -+ struct fuse_notify_req notify_list; -+ size_t bufsize; -+ int error; - }; - - struct fuse_chan { -- pthread_mutex_t lock; -- int ctr; -- int fd; -+ pthread_mutex_t lock; -+ int ctr; -+ int fd; - }; - - /** -@@ -76,19 +76,20 @@ struct fuse_chan { - * - */ - struct fuse_module { -- char *name; -- fuse_module_factory_t factory; -- struct fuse_module *next; -- struct fusemod_so *so; -- int ctr; -+ char *name; -+ fuse_module_factory_t factory; -+ struct fuse_module *next; -+ struct fusemod_so *so; -+ int ctr; - }; - - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -- int count); -+ int count); - void fuse_free_req(fuse_req_t req); - - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, struct fuse_chan *ch); -+ const struct fuse_buf *buf, -+ struct fuse_chan *ch); - - - #define FUSE_MAX_MAX_PAGES 256 -diff --git a/tools/virtiofsd/fuse_log.c b/tools/virtiofsd/fuse_log.c -index 0d268ab..11345f9 100644 ---- a/tools/virtiofsd/fuse_log.c -+++ b/tools/virtiofsd/fuse_log.c -@@ -1,40 +1,40 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2019 Red Hat, Inc. -- -- Logging API. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * Logging API. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include "fuse_log.h" - - #include - #include - --static void default_log_func( -- __attribute__(( unused )) enum fuse_log_level level, -- const char *fmt, va_list ap) -+static void default_log_func(__attribute__((unused)) enum fuse_log_level level, -+ const char *fmt, va_list ap) - { -- vfprintf(stderr, fmt, ap); -+ vfprintf(stderr, fmt, ap); - } - - static fuse_log_func_t log_func = default_log_func; - - void fuse_set_log_func(fuse_log_func_t func) - { -- if (!func) -- func = default_log_func; -+ if (!func) { -+ func = default_log_func; -+ } - -- log_func = func; -+ log_func = func; - } - - void fuse_log(enum fuse_log_level level, const char *fmt, ...) - { -- va_list ap; -+ va_list ap; - -- va_start(ap, fmt); -- log_func(level, fmt, ap); -- va_end(ap); -+ va_start(ap, fmt); -+ log_func(level, fmt, ap); -+ va_end(ap); - } -diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h -index 0af700d..bf6c11f 100644 ---- a/tools/virtiofsd/fuse_log.h -+++ b/tools/virtiofsd/fuse_log.h -@@ -1,10 +1,10 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2019 Red Hat, Inc. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_LOG_H_ - #define FUSE_LOG_H_ -@@ -22,14 +22,14 @@ - * These levels correspond to syslog(2) log levels since they are widely used. - */ - enum fuse_log_level { -- FUSE_LOG_EMERG, -- FUSE_LOG_ALERT, -- FUSE_LOG_CRIT, -- FUSE_LOG_ERR, -- FUSE_LOG_WARNING, -- FUSE_LOG_NOTICE, -- FUSE_LOG_INFO, -- FUSE_LOG_DEBUG -+ FUSE_LOG_EMERG, -+ FUSE_LOG_ALERT, -+ FUSE_LOG_CRIT, -+ FUSE_LOG_ERR, -+ FUSE_LOG_WARNING, -+ FUSE_LOG_NOTICE, -+ FUSE_LOG_INFO, -+ FUSE_LOG_DEBUG - }; - - /** -@@ -45,8 +45,8 @@ enum fuse_log_level { - * @param fmt sprintf-style format string including newline - * @param ap format string arguments - */ --typedef void (*fuse_log_func_t)(enum fuse_log_level level, -- const char *fmt, va_list ap); -+typedef void (*fuse_log_func_t)(enum fuse_log_level level, const char *fmt, -+ va_list ap); - - /** - * Install a custom log handler function. -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index e6fa247..5c9cb52 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1,2380 +1,2515 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- Implementation of (most of) the low-level FUSE API. The session loop -- functions are implemented in separate files. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Implementation of (most of) the low-level FUSE API. The session loop -+ * functions are implemented in separate files. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #define _GNU_SOURCE - - #include "config.h" - #include "fuse_i.h" - #include "fuse_kernel.h" --#include "fuse_opt.h" - #include "fuse_misc.h" -+#include "fuse_opt.h" - -+#include -+#include -+#include -+#include - #include - #include --#include - #include --#include --#include --#include --#include - #include -- -+#include - - - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) - #define OFFSET_MAX 0x7fffffffffffffffLL - --#define container_of(ptr, type, member) ({ \ -- const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -- (type *)( (char *)__mptr - offsetof(type,member) );}) -+#define container_of(ptr, type, member) \ -+ ({ \ -+ const typeof(((type *)0)->member) *__mptr = (ptr); \ -+ (type *)((char *)__mptr - offsetof(type, member)); \ -+ }) - - struct fuse_pollhandle { -- uint64_t kh; -- struct fuse_session *se; -+ uint64_t kh; -+ struct fuse_session *se; - }; - - static size_t pagesize; - - static __attribute__((constructor)) void fuse_ll_init_pagesize(void) - { -- pagesize = getpagesize(); -+ pagesize = getpagesize(); - } - - static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) - { -- attr->ino = stbuf->st_ino; -- attr->mode = stbuf->st_mode; -- attr->nlink = stbuf->st_nlink; -- attr->uid = stbuf->st_uid; -- attr->gid = stbuf->st_gid; -- attr->rdev = stbuf->st_rdev; -- attr->size = stbuf->st_size; -- attr->blksize = stbuf->st_blksize; -- attr->blocks = stbuf->st_blocks; -- attr->atime = stbuf->st_atime; -- attr->mtime = stbuf->st_mtime; -- attr->ctime = stbuf->st_ctime; -- attr->atimensec = ST_ATIM_NSEC(stbuf); -- attr->mtimensec = ST_MTIM_NSEC(stbuf); -- attr->ctimensec = ST_CTIM_NSEC(stbuf); -+ attr->ino = stbuf->st_ino; -+ attr->mode = stbuf->st_mode; -+ attr->nlink = stbuf->st_nlink; -+ attr->uid = stbuf->st_uid; -+ attr->gid = stbuf->st_gid; -+ attr->rdev = stbuf->st_rdev; -+ attr->size = stbuf->st_size; -+ attr->blksize = stbuf->st_blksize; -+ attr->blocks = stbuf->st_blocks; -+ attr->atime = stbuf->st_atime; -+ attr->mtime = stbuf->st_mtime; -+ attr->ctime = stbuf->st_ctime; -+ attr->atimensec = ST_ATIM_NSEC(stbuf); -+ attr->mtimensec = ST_MTIM_NSEC(stbuf); -+ attr->ctimensec = ST_CTIM_NSEC(stbuf); - } - - static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) - { -- stbuf->st_mode = attr->mode; -- stbuf->st_uid = attr->uid; -- stbuf->st_gid = attr->gid; -- stbuf->st_size = attr->size; -- stbuf->st_atime = attr->atime; -- stbuf->st_mtime = attr->mtime; -- stbuf->st_ctime = attr->ctime; -- ST_ATIM_NSEC_SET(stbuf, attr->atimensec); -- ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); -- ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); -+ stbuf->st_mode = attr->mode; -+ stbuf->st_uid = attr->uid; -+ stbuf->st_gid = attr->gid; -+ stbuf->st_size = attr->size; -+ stbuf->st_atime = attr->atime; -+ stbuf->st_mtime = attr->mtime; -+ stbuf->st_ctime = attr->ctime; -+ ST_ATIM_NSEC_SET(stbuf, attr->atimensec); -+ ST_MTIM_NSEC_SET(stbuf, attr->mtimensec); -+ ST_CTIM_NSEC_SET(stbuf, attr->ctimensec); - } - --static size_t iov_length(const struct iovec *iov, size_t count) -+static size_t iov_length(const struct iovec *iov, size_t count) - { -- size_t seg; -- size_t ret = 0; -+ size_t seg; -+ size_t ret = 0; - -- for (seg = 0; seg < count; seg++) -- ret += iov[seg].iov_len; -- return ret; -+ for (seg = 0; seg < count; seg++) { -+ ret += iov[seg].iov_len; -+ } -+ return ret; - } - - static void list_init_req(struct fuse_req *req) - { -- req->next = req; -- req->prev = req; -+ req->next = req; -+ req->prev = req; - } - - static void list_del_req(struct fuse_req *req) - { -- struct fuse_req *prev = req->prev; -- struct fuse_req *next = req->next; -- prev->next = next; -- next->prev = prev; -+ struct fuse_req *prev = req->prev; -+ struct fuse_req *next = req->next; -+ prev->next = next; -+ next->prev = prev; - } - - static void list_add_req(struct fuse_req *req, struct fuse_req *next) - { -- struct fuse_req *prev = next->prev; -- req->next = next; -- req->prev = prev; -- prev->next = req; -- next->prev = req; -+ struct fuse_req *prev = next->prev; -+ req->next = next; -+ req->prev = prev; -+ prev->next = req; -+ next->prev = req; - } - - static void destroy_req(fuse_req_t req) - { -- pthread_mutex_destroy(&req->lock); -- free(req); -+ pthread_mutex_destroy(&req->lock); -+ free(req); - } - - void fuse_free_req(fuse_req_t req) - { -- int ctr; -- struct fuse_session *se = req->se; -+ int ctr; -+ struct fuse_session *se = req->se; - -- pthread_mutex_lock(&se->lock); -- req->u.ni.func = NULL; -- req->u.ni.data = NULL; -- list_del_req(req); -- ctr = --req->ctr; -- req->ch = NULL; -- pthread_mutex_unlock(&se->lock); -- if (!ctr) -- destroy_req(req); -+ pthread_mutex_lock(&se->lock); -+ req->u.ni.func = NULL; -+ req->u.ni.data = NULL; -+ list_del_req(req); -+ ctr = --req->ctr; -+ req->ch = NULL; -+ pthread_mutex_unlock(&se->lock); -+ if (!ctr) { -+ destroy_req(req); -+ } - } - - static struct fuse_req *fuse_ll_alloc_req(struct fuse_session *se) - { -- struct fuse_req *req; -+ struct fuse_req *req; - -- req = (struct fuse_req *) calloc(1, sizeof(struct fuse_req)); -- if (req == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); -- } else { -- req->se = se; -- req->ctr = 1; -- list_init_req(req); -- fuse_mutex_init(&req->lock); -- } -+ req = (struct fuse_req *)calloc(1, sizeof(struct fuse_req)); -+ if (req == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate request\n"); -+ } else { -+ req->se = se; -+ req->ctr = 1; -+ list_init_req(req); -+ fuse_mutex_init(&req->lock); -+ } - -- return req; -+ return req; - } - - /* Send data. If *ch* is NULL, send via session master fd */ - static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, -- struct iovec *iov, int count) -+ struct iovec *iov, int count) - { -- struct fuse_out_header *out = iov[0].iov_base; -+ struct fuse_out_header *out = iov[0].iov_base; - -- out->len = iov_length(iov, count); -- if (se->debug) { -- if (out->unique == 0) { -- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", -- out->error, out->len); -- } else if (out->error) { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, error: %i (%s), outsize: %i\n", -- (unsigned long long) out->unique, out->error, -- strerror(-out->error), out->len); -- } else { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, success, outsize: %i\n", -- (unsigned long long) out->unique, out->len); -- } -- } -+ out->len = iov_length(iov, count); -+ if (se->debug) { -+ if (out->unique == 0) { -+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, -+ out->len); -+ } else if (out->error) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, error: %i (%s), outsize: %i\n", -+ (unsigned long long)out->unique, out->error, -+ strerror(-out->error), out->len); -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", -+ (unsigned long long)out->unique, out->len); -+ } -+ } - -- abort(); /* virtio should have taken it before here */ -- return 0; -+ abort(); /* virtio should have taken it before here */ -+ return 0; - } - - - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -- int count) -+ int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out; - -- if (error <= -1000 || error > 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -- error = -ERANGE; -- } -+ if (error <= -1000 || error > 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); -+ error = -ERANGE; -+ } - -- out.unique = req->unique; -- out.error = error; -+ out.unique = req->unique; -+ out.error = error; - -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(struct fuse_out_header); -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); - -- return fuse_send_msg(req->se, req->ch, iov, count); -+ return fuse_send_msg(req->se, req->ch, iov, count); - } - - static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov, -- int count) -+ int count) - { -- int res; -+ int res; - -- res = fuse_send_reply_iov_nofree(req, error, iov, count); -- fuse_free_req(req); -- return res; -+ res = fuse_send_reply_iov_nofree(req, error, iov, count); -+ fuse_free_req(req); -+ return res; - } - - static int send_reply(fuse_req_t req, int error, const void *arg, -- size_t argsize) -+ size_t argsize) - { -- struct iovec iov[2]; -- int count = 1; -- if (argsize) { -- iov[1].iov_base = (void *) arg; -- iov[1].iov_len = argsize; -- count++; -- } -- return send_reply_iov(req, error, iov, count); -+ struct iovec iov[2]; -+ int count = 1; -+ if (argsize) { -+ iov[1].iov_base = (void *)arg; -+ iov[1].iov_len = argsize; -+ count++; -+ } -+ return send_reply_iov(req, error, iov, count); - } - - int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count) - { -- int res; -- struct iovec *padded_iov; -+ int res; -+ struct iovec *padded_iov; - -- padded_iov = malloc((count + 1) * sizeof(struct iovec)); -- if (padded_iov == NULL) -- return fuse_reply_err(req, ENOMEM); -+ padded_iov = malloc((count + 1) * sizeof(struct iovec)); -+ if (padded_iov == NULL) { -+ return fuse_reply_err(req, ENOMEM); -+ } - -- memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); -- count++; -+ memcpy(padded_iov + 1, iov, count * sizeof(struct iovec)); -+ count++; - -- res = send_reply_iov(req, 0, padded_iov, count); -- free(padded_iov); -+ res = send_reply_iov(req, 0, padded_iov, count); -+ free(padded_iov); - -- return res; -+ return res; - } - - --/* `buf` is allowed to be empty so that the proper size may be -- allocated by the caller */ -+/* -+ * 'buf` is allowed to be empty so that the proper size may be -+ * allocated by the caller -+ */ - size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, const struct stat *stbuf, off_t off) -+ const char *name, const struct stat *stbuf, off_t off) - { -- (void)req; -- size_t namelen; -- size_t entlen; -- size_t entlen_padded; -- struct fuse_dirent *dirent; -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ struct fuse_dirent *dirent; - -- namelen = strlen(name); -- entlen = FUSE_NAME_OFFSET + namelen; -- entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); - -- if ((buf == NULL) || (entlen_padded > bufsize)) -- return entlen_padded; -+ if ((buf == NULL) || (entlen_padded > bufsize)) { -+ return entlen_padded; -+ } - -- dirent = (struct fuse_dirent*) buf; -- dirent->ino = stbuf->st_ino; -- dirent->off = off; -- dirent->namelen = namelen; -- dirent->type = (stbuf->st_mode & S_IFMT) >> 12; -- memcpy(dirent->name, name, namelen); -- memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ dirent = (struct fuse_dirent *)buf; -+ dirent->ino = stbuf->st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (stbuf->st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); - -- return entlen_padded; -+ return entlen_padded; - } - - static void convert_statfs(const struct statvfs *stbuf, -- struct fuse_kstatfs *kstatfs) -+ struct fuse_kstatfs *kstatfs) - { -- kstatfs->bsize = stbuf->f_bsize; -- kstatfs->frsize = stbuf->f_frsize; -- kstatfs->blocks = stbuf->f_blocks; -- kstatfs->bfree = stbuf->f_bfree; -- kstatfs->bavail = stbuf->f_bavail; -- kstatfs->files = stbuf->f_files; -- kstatfs->ffree = stbuf->f_ffree; -- kstatfs->namelen = stbuf->f_namemax; -+ kstatfs->bsize = stbuf->f_bsize; -+ kstatfs->frsize = stbuf->f_frsize; -+ kstatfs->blocks = stbuf->f_blocks; -+ kstatfs->bfree = stbuf->f_bfree; -+ kstatfs->bavail = stbuf->f_bavail; -+ kstatfs->files = stbuf->f_files; -+ kstatfs->ffree = stbuf->f_ffree; -+ kstatfs->namelen = stbuf->f_namemax; - } - - static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) - { -- return send_reply(req, 0, arg, argsize); -+ return send_reply(req, 0, arg, argsize); - } - - int fuse_reply_err(fuse_req_t req, int err) - { -- return send_reply(req, -err, NULL, 0); -+ return send_reply(req, -err, NULL, 0); - } - - void fuse_reply_none(fuse_req_t req) - { -- fuse_free_req(req); -+ fuse_free_req(req); - } - - static unsigned long calc_timeout_sec(double t) - { -- if (t > (double) ULONG_MAX) -- return ULONG_MAX; -- else if (t < 0.0) -- return 0; -- else -- return (unsigned long) t; -+ if (t > (double)ULONG_MAX) { -+ return ULONG_MAX; -+ } else if (t < 0.0) { -+ return 0; -+ } else { -+ return (unsigned long)t; -+ } - } - - static unsigned int calc_timeout_nsec(double t) - { -- double f = t - (double) calc_timeout_sec(t); -- if (f < 0.0) -- return 0; -- else if (f >= 0.999999999) -- return 999999999; -- else -- return (unsigned int) (f * 1.0e9); -+ double f = t - (double)calc_timeout_sec(t); -+ if (f < 0.0) { -+ return 0; -+ } else if (f >= 0.999999999) { -+ return 999999999; -+ } else { -+ return (unsigned int)(f * 1.0e9); -+ } - } - - static void fill_entry(struct fuse_entry_out *arg, -- const struct fuse_entry_param *e) -+ const struct fuse_entry_param *e) - { -- arg->nodeid = e->ino; -- arg->generation = e->generation; -- arg->entry_valid = calc_timeout_sec(e->entry_timeout); -- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -- arg->attr_valid = calc_timeout_sec(e->attr_timeout); -- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -- convert_stat(&e->attr, &arg->attr); -+ arg->nodeid = e->ino; -+ arg->generation = e->generation; -+ arg->entry_valid = calc_timeout_sec(e->entry_timeout); -+ arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -+ arg->attr_valid = calc_timeout_sec(e->attr_timeout); -+ arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -+ convert_stat(&e->attr, &arg->attr); - } - --/* `buf` is allowed to be empty so that the proper size may be -- allocated by the caller */ -+/* -+ * `buf` is allowed to be empty so that the proper size may be -+ * allocated by the caller -+ */ - size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, -- const struct fuse_entry_param *e, off_t off) --{ -- (void)req; -- size_t namelen; -- size_t entlen; -- size_t entlen_padded; -- -- namelen = strlen(name); -- entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; -- entlen_padded = FUSE_DIRENT_ALIGN(entlen); -- if ((buf == NULL) || (entlen_padded > bufsize)) -- return entlen_padded; -- -- struct fuse_direntplus *dp = (struct fuse_direntplus *) buf; -- memset(&dp->entry_out, 0, sizeof(dp->entry_out)); -- fill_entry(&dp->entry_out, e); -- -- struct fuse_dirent *dirent = &dp->dirent; -- dirent->ino = e->attr.st_ino; -- dirent->off = off; -- dirent->namelen = namelen; -- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -- memcpy(dirent->name, name, namelen); -- memset(dirent->name + namelen, 0, entlen_padded - entlen); -- -- return entlen_padded; --} -- --static void fill_open(struct fuse_open_out *arg, -- const struct fuse_file_info *f) --{ -- arg->fh = f->fh; -- if (f->direct_io) -- arg->open_flags |= FOPEN_DIRECT_IO; -- if (f->keep_cache) -- arg->open_flags |= FOPEN_KEEP_CACHE; -- if (f->cache_readdir) -- arg->open_flags |= FOPEN_CACHE_DIR; -- if (f->nonseekable) -- arg->open_flags |= FOPEN_NONSEEKABLE; -+ const char *name, -+ const struct fuse_entry_param *e, off_t off) -+{ -+ (void)req; -+ size_t namelen; -+ size_t entlen; -+ size_t entlen_padded; -+ -+ namelen = strlen(name); -+ entlen = FUSE_NAME_OFFSET_DIRENTPLUS + namelen; -+ entlen_padded = FUSE_DIRENT_ALIGN(entlen); -+ if ((buf == NULL) || (entlen_padded > bufsize)) { -+ return entlen_padded; -+ } -+ -+ struct fuse_direntplus *dp = (struct fuse_direntplus *)buf; -+ memset(&dp->entry_out, 0, sizeof(dp->entry_out)); -+ fill_entry(&dp->entry_out, e); -+ -+ struct fuse_dirent *dirent = &dp->dirent; -+ dirent->ino = e->attr.st_ino; -+ dirent->off = off; -+ dirent->namelen = namelen; -+ dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -+ memcpy(dirent->name, name, namelen); -+ memset(dirent->name + namelen, 0, entlen_padded - entlen); -+ -+ return entlen_padded; -+} -+ -+static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) -+{ -+ arg->fh = f->fh; -+ if (f->direct_io) { -+ arg->open_flags |= FOPEN_DIRECT_IO; -+ } -+ if (f->keep_cache) { -+ arg->open_flags |= FOPEN_KEEP_CACHE; -+ } -+ if (f->cache_readdir) { -+ arg->open_flags |= FOPEN_CACHE_DIR; -+ } -+ if (f->nonseekable) { -+ arg->open_flags |= FOPEN_NONSEEKABLE; -+ } - } - - int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) - { -- struct fuse_entry_out arg; -- size_t size = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(arg); -+ struct fuse_entry_out arg; -+ size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : -+ sizeof(arg); - -- /* before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -- negative entry */ -- if (!e->ino && req->se->conn.proto_minor < 4) -- return fuse_reply_err(req, ENOENT); -+ /* -+ * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -+ * negative entry -+ */ -+ if (!e->ino && req->se->conn.proto_minor < 4) { -+ return fuse_reply_err(req, ENOENT); -+ } - -- memset(&arg, 0, sizeof(arg)); -- fill_entry(&arg, e); -- return send_reply_ok(req, &arg, size); -+ memset(&arg, 0, sizeof(arg)); -+ fill_entry(&arg, e); -+ return send_reply_ok(req, &arg, size); - } - - int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -- const struct fuse_file_info *f) -+ const struct fuse_file_info *f) - { -- char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -- size_t entrysize = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ENTRY_OUT_SIZE : sizeof(struct fuse_entry_out); -- struct fuse_entry_out *earg = (struct fuse_entry_out *) buf; -- struct fuse_open_out *oarg = (struct fuse_open_out *) (buf + entrysize); -+ char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -+ size_t entrysize = req->se->conn.proto_minor < 9 ? -+ FUSE_COMPAT_ENTRY_OUT_SIZE : -+ sizeof(struct fuse_entry_out); -+ struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; -+ struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); - -- memset(buf, 0, sizeof(buf)); -- fill_entry(earg, e); -- fill_open(oarg, f); -- return send_reply_ok(req, buf, -- entrysize + sizeof(struct fuse_open_out)); -+ memset(buf, 0, sizeof(buf)); -+ fill_entry(earg, e); -+ fill_open(oarg, f); -+ return send_reply_ok(req, buf, entrysize + sizeof(struct fuse_open_out)); - } - - int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -- double attr_timeout) -+ double attr_timeout) - { -- struct fuse_attr_out arg; -- size_t size = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); -+ struct fuse_attr_out arg; -+ size_t size = -+ req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); - -- memset(&arg, 0, sizeof(arg)); -- arg.attr_valid = calc_timeout_sec(attr_timeout); -- arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); -- convert_stat(attr, &arg.attr); -+ memset(&arg, 0, sizeof(arg)); -+ arg.attr_valid = calc_timeout_sec(attr_timeout); -+ arg.attr_valid_nsec = calc_timeout_nsec(attr_timeout); -+ convert_stat(attr, &arg.attr); - -- return send_reply_ok(req, &arg, size); -+ return send_reply_ok(req, &arg, size); - } - - int fuse_reply_readlink(fuse_req_t req, const char *linkname) - { -- return send_reply_ok(req, linkname, strlen(linkname)); -+ return send_reply_ok(req, linkname, strlen(linkname)); - } - - int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) - { -- struct fuse_open_out arg; -+ struct fuse_open_out arg; - -- memset(&arg, 0, sizeof(arg)); -- fill_open(&arg, f); -- return send_reply_ok(req, &arg, sizeof(arg)); -+ memset(&arg, 0, sizeof(arg)); -+ fill_open(&arg, f); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_write(fuse_req_t req, size_t count) - { -- struct fuse_write_out arg; -+ struct fuse_write_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.size = count; -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size) - { -- return send_reply_ok(req, buf, size); -+ return send_reply_ok(req, buf, size); - } - - static int fuse_send_data_iov_fallback(struct fuse_session *se, -- struct fuse_chan *ch, -- struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, -- size_t len) -+ struct fuse_chan *ch, struct iovec *iov, -+ int iov_count, struct fuse_bufvec *buf, -+ size_t len) - { -- /* Optimize common case */ -- if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && -- !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -- /* FIXME: also avoid memory copy if there are multiple buffers -- but none of them contain an fd */ -+ /* Optimize common case */ -+ if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && -+ !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -+ /* -+ * FIXME: also avoid memory copy if there are multiple buffers -+ * but none of them contain an fd -+ */ - -- iov[iov_count].iov_base = buf->buf[0].mem; -- iov[iov_count].iov_len = len; -- iov_count++; -- return fuse_send_msg(se, ch, iov, iov_count); -- } -+ iov[iov_count].iov_base = buf->buf[0].mem; -+ iov[iov_count].iov_len = len; -+ iov_count++; -+ return fuse_send_msg(se, ch, iov, iov_count); -+ } - -- abort(); /* Will have taken vhost path */ -- return 0; -+ abort(); /* Will have taken vhost path */ -+ return 0; - } - - static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -- struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, unsigned int flags) -+ struct iovec *iov, int iov_count, -+ struct fuse_bufvec *buf, unsigned int flags) - { -- size_t len = fuse_buf_size(buf); -- (void) flags; -+ size_t len = fuse_buf_size(buf); -+ (void)flags; - -- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); -+ return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); - } - - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+ enum fuse_buf_copy_flags flags) - { -- struct iovec iov[2]; -- struct fuse_out_header out; -- int res; -+ struct iovec iov[2]; -+ struct fuse_out_header out; -+ int res; - -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(struct fuse_out_header); -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); - -- out.unique = req->unique; -- out.error = 0; -+ out.unique = req->unique; -+ out.error = 0; - -- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -- if (res <= 0) { -- fuse_free_req(req); -- return res; -- } else { -- return fuse_reply_err(req, res); -- } -+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -+ if (res <= 0) { -+ fuse_free_req(req); -+ return res; -+ } else { -+ return fuse_reply_err(req, res); -+ } - } - - int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) - { -- struct fuse_statfs_out arg; -- size_t size = req->se->conn.proto_minor < 4 ? -- FUSE_COMPAT_STATFS_SIZE : sizeof(arg); -+ struct fuse_statfs_out arg; -+ size_t size = -+ req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); - -- memset(&arg, 0, sizeof(arg)); -- convert_statfs(stbuf, &arg.st); -+ memset(&arg, 0, sizeof(arg)); -+ convert_statfs(stbuf, &arg.st); - -- return send_reply_ok(req, &arg, size); -+ return send_reply_ok(req, &arg, size); - } - - int fuse_reply_xattr(fuse_req_t req, size_t count) - { -- struct fuse_getxattr_out arg; -+ struct fuse_getxattr_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.size = count; -+ memset(&arg, 0, sizeof(arg)); -+ arg.size = count; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_lock(fuse_req_t req, const struct flock *lock) - { -- struct fuse_lk_out arg; -+ struct fuse_lk_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.lk.type = lock->l_type; -- if (lock->l_type != F_UNLCK) { -- arg.lk.start = lock->l_start; -- if (lock->l_len == 0) -- arg.lk.end = OFFSET_MAX; -- else -- arg.lk.end = lock->l_start + lock->l_len - 1; -- } -- arg.lk.pid = lock->l_pid; -- return send_reply_ok(req, &arg, sizeof(arg)); -+ memset(&arg, 0, sizeof(arg)); -+ arg.lk.type = lock->l_type; -+ if (lock->l_type != F_UNLCK) { -+ arg.lk.start = lock->l_start; -+ if (lock->l_len == 0) { -+ arg.lk.end = OFFSET_MAX; -+ } else { -+ arg.lk.end = lock->l_start + lock->l_len - 1; -+ } -+ } -+ arg.lk.pid = lock->l_pid; -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_bmap(fuse_req_t req, uint64_t idx) - { -- struct fuse_bmap_out arg; -+ struct fuse_bmap_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.block = idx; -+ memset(&arg, 0, sizeof(arg)); -+ arg.block = idx; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - static struct fuse_ioctl_iovec *fuse_ioctl_iovec_copy(const struct iovec *iov, -- size_t count) --{ -- struct fuse_ioctl_iovec *fiov; -- size_t i; -- -- fiov = malloc(sizeof(fiov[0]) * count); -- if (!fiov) -- return NULL; -- -- for (i = 0; i < count; i++) { -- fiov[i].base = (uintptr_t) iov[i].iov_base; -- fiov[i].len = iov[i].iov_len; -- } -- -- return fiov; --} -- --int fuse_reply_ioctl_retry(fuse_req_t req, -- const struct iovec *in_iov, size_t in_count, -- const struct iovec *out_iov, size_t out_count) --{ -- struct fuse_ioctl_out arg; -- struct fuse_ioctl_iovec *in_fiov = NULL; -- struct fuse_ioctl_iovec *out_fiov = NULL; -- struct iovec iov[4]; -- size_t count = 1; -- int res; -- -- memset(&arg, 0, sizeof(arg)); -- arg.flags |= FUSE_IOCTL_RETRY; -- arg.in_iovs = in_count; -- arg.out_iovs = out_count; -- iov[count].iov_base = &arg; -- iov[count].iov_len = sizeof(arg); -- count++; -- -- if (req->se->conn.proto_minor < 16) { -- if (in_count) { -- iov[count].iov_base = (void *)in_iov; -- iov[count].iov_len = sizeof(in_iov[0]) * in_count; -- count++; -- } -- -- if (out_count) { -- iov[count].iov_base = (void *)out_iov; -- iov[count].iov_len = sizeof(out_iov[0]) * out_count; -- count++; -- } -- } else { -- /* Can't handle non-compat 64bit ioctls on 32bit */ -- if (sizeof(void *) == 4 && req->ioctl_64bit) { -- res = fuse_reply_err(req, EINVAL); -- goto out; -- } -- -- if (in_count) { -- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -- if (!in_fiov) -- goto enomem; -- -- iov[count].iov_base = (void *)in_fiov; -- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -- count++; -- } -- if (out_count) { -- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -- if (!out_fiov) -- goto enomem; -- -- iov[count].iov_base = (void *)out_fiov; -- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -- count++; -- } -- } -- -- res = send_reply_iov(req, 0, iov, count); -+ size_t count) -+{ -+ struct fuse_ioctl_iovec *fiov; -+ size_t i; -+ -+ fiov = malloc(sizeof(fiov[0]) * count); -+ if (!fiov) { -+ return NULL; -+ } -+ -+ for (i = 0; i < count; i++) { -+ fiov[i].base = (uintptr_t)iov[i].iov_base; -+ fiov[i].len = iov[i].iov_len; -+ } -+ -+ return fiov; -+} -+ -+int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, -+ size_t in_count, const struct iovec *out_iov, -+ size_t out_count) -+{ -+ struct fuse_ioctl_out arg; -+ struct fuse_ioctl_iovec *in_fiov = NULL; -+ struct fuse_ioctl_iovec *out_fiov = NULL; -+ struct iovec iov[4]; -+ size_t count = 1; -+ int res; -+ -+ memset(&arg, 0, sizeof(arg)); -+ arg.flags |= FUSE_IOCTL_RETRY; -+ arg.in_iovs = in_count; -+ arg.out_iovs = out_count; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; -+ -+ if (req->se->conn.proto_minor < 16) { -+ if (in_count) { -+ iov[count].iov_base = (void *)in_iov; -+ iov[count].iov_len = sizeof(in_iov[0]) * in_count; -+ count++; -+ } -+ -+ if (out_count) { -+ iov[count].iov_base = (void *)out_iov; -+ iov[count].iov_len = sizeof(out_iov[0]) * out_count; -+ count++; -+ } -+ } else { -+ /* Can't handle non-compat 64bit ioctls on 32bit */ -+ if (sizeof(void *) == 4 && req->ioctl_64bit) { -+ res = fuse_reply_err(req, EINVAL); -+ goto out; -+ } -+ -+ if (in_count) { -+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -+ if (!in_fiov) { -+ goto enomem; -+ } -+ -+ iov[count].iov_base = (void *)in_fiov; -+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -+ count++; -+ } -+ if (out_count) { -+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -+ if (!out_fiov) { -+ goto enomem; -+ } -+ -+ iov[count].iov_base = (void *)out_fiov; -+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -+ count++; -+ } -+ } -+ -+ res = send_reply_iov(req, 0, iov, count); - out: -- free(in_fiov); -- free(out_fiov); -+ free(in_fiov); -+ free(out_fiov); - -- return res; -+ return res; - - enomem: -- res = fuse_reply_err(req, ENOMEM); -- goto out; -+ res = fuse_reply_err(req, ENOMEM); -+ goto out; - } - - int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size) - { -- struct fuse_ioctl_out arg; -- struct iovec iov[3]; -- size_t count = 1; -+ struct fuse_ioctl_out arg; -+ struct iovec iov[3]; -+ size_t count = 1; - -- memset(&arg, 0, sizeof(arg)); -- arg.result = result; -- iov[count].iov_base = &arg; -- iov[count].iov_len = sizeof(arg); -- count++; -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ iov[count].iov_base = &arg; -+ iov[count].iov_len = sizeof(arg); -+ count++; - -- if (size) { -- iov[count].iov_base = (char *) buf; -- iov[count].iov_len = size; -- count++; -- } -+ if (size) { -+ iov[count].iov_base = (char *)buf; -+ iov[count].iov_len = size; -+ count++; -+ } - -- return send_reply_iov(req, 0, iov, count); -+ return send_reply_iov(req, 0, iov, count); - } - - int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -- int count) -+ int count) - { -- struct iovec *padded_iov; -- struct fuse_ioctl_out arg; -- int res; -+ struct iovec *padded_iov; -+ struct fuse_ioctl_out arg; -+ int res; - -- padded_iov = malloc((count + 2) * sizeof(struct iovec)); -- if (padded_iov == NULL) -- return fuse_reply_err(req, ENOMEM); -+ padded_iov = malloc((count + 2) * sizeof(struct iovec)); -+ if (padded_iov == NULL) { -+ return fuse_reply_err(req, ENOMEM); -+ } - -- memset(&arg, 0, sizeof(arg)); -- arg.result = result; -- padded_iov[1].iov_base = &arg; -- padded_iov[1].iov_len = sizeof(arg); -+ memset(&arg, 0, sizeof(arg)); -+ arg.result = result; -+ padded_iov[1].iov_base = &arg; -+ padded_iov[1].iov_len = sizeof(arg); - -- memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); -+ memcpy(&padded_iov[2], iov, count * sizeof(struct iovec)); - -- res = send_reply_iov(req, 0, padded_iov, count + 2); -- free(padded_iov); -+ res = send_reply_iov(req, 0, padded_iov, count + 2); -+ free(padded_iov); - -- return res; -+ return res; - } - - int fuse_reply_poll(fuse_req_t req, unsigned revents) - { -- struct fuse_poll_out arg; -+ struct fuse_poll_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.revents = revents; -+ memset(&arg, 0, sizeof(arg)); -+ arg.revents = revents; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - int fuse_reply_lseek(fuse_req_t req, off_t off) - { -- struct fuse_lseek_out arg; -+ struct fuse_lseek_out arg; - -- memset(&arg, 0, sizeof(arg)); -- arg.offset = off; -+ memset(&arg, 0, sizeof(arg)); -+ arg.offset = off; - -- return send_reply_ok(req, &arg, sizeof(arg)); -+ return send_reply_ok(req, &arg, sizeof(arg)); - } - - static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.lookup) -- req->se->op.lookup(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.lookup) { -+ req->se->op.lookup(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_forget_in *arg = (struct fuse_forget_in *) inarg; -+ struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; - -- if (req->se->op.forget) -- req->se->op.forget(req, nodeid, arg->nlookup); -- else -- fuse_reply_none(req); -+ if (req->se->op.forget) { -+ req->se->op.forget(req, nodeid, arg->nlookup); -+ } else { -+ fuse_reply_none(req); -+ } - } - - static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg) -+ const void *inarg) - { -- struct fuse_batch_forget_in *arg = (void *) inarg; -- struct fuse_forget_one *param = (void *) PARAM(arg); -- unsigned int i; -+ struct fuse_batch_forget_in *arg = (void *)inarg; -+ struct fuse_forget_one *param = (void *)PARAM(arg); -+ unsigned int i; - -- (void) nodeid; -+ (void)nodeid; - -- if (req->se->op.forget_multi) { -- req->se->op.forget_multi(req, arg->count, -- (struct fuse_forget_data *) param); -- } else if (req->se->op.forget) { -- for (i = 0; i < arg->count; i++) { -- struct fuse_forget_one *forget = ¶m[i]; -- struct fuse_req *dummy_req; -+ if (req->se->op.forget_multi) { -+ req->se->op.forget_multi(req, arg->count, -+ (struct fuse_forget_data *)param); -+ } else if (req->se->op.forget) { -+ for (i = 0; i < arg->count; i++) { -+ struct fuse_forget_one *forget = ¶m[i]; -+ struct fuse_req *dummy_req; - -- dummy_req = fuse_ll_alloc_req(req->se); -- if (dummy_req == NULL) -- break; -+ dummy_req = fuse_ll_alloc_req(req->se); -+ if (dummy_req == NULL) { -+ break; -+ } - -- dummy_req->unique = req->unique; -- dummy_req->ctx = req->ctx; -- dummy_req->ch = NULL; -+ dummy_req->unique = req->unique; -+ dummy_req->ctx = req->ctx; -+ dummy_req->ch = NULL; - -- req->se->op.forget(dummy_req, forget->nodeid, -- forget->nlookup); -- } -- fuse_reply_none(req); -- } else { -- fuse_reply_none(req); -- } -+ req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); -+ } -+ fuse_reply_none(req); -+ } else { -+ fuse_reply_none(req); -+ } - } - - static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_file_info *fip = NULL; -- struct fuse_file_info fi; -+ struct fuse_file_info *fip = NULL; -+ struct fuse_file_info fi; - -- if (req->se->conn.proto_minor >= 9) { -- struct fuse_getattr_in *arg = (struct fuse_getattr_in *) inarg; -+ if (req->se->conn.proto_minor >= 9) { -+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; - -- if (arg->getattr_flags & FUSE_GETATTR_FH) { -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fip = &fi; -- } -- } -+ if (arg->getattr_flags & FUSE_GETATTR_FH) { -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fip = &fi; -+ } -+ } - -- if (req->se->op.getattr) -- req->se->op.getattr(req, nodeid, fip); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.getattr) { -+ req->se->op.getattr(req, nodeid, fip); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_setattr_in *arg = (struct fuse_setattr_in *) inarg; -- -- if (req->se->op.setattr) { -- struct fuse_file_info *fi = NULL; -- struct fuse_file_info fi_store; -- struct stat stbuf; -- memset(&stbuf, 0, sizeof(stbuf)); -- convert_attr(arg, &stbuf); -- if (arg->valid & FATTR_FH) { -- arg->valid &= ~FATTR_FH; -- memset(&fi_store, 0, sizeof(fi_store)); -- fi = &fi_store; -- fi->fh = arg->fh; -- } -- arg->valid &= -- FUSE_SET_ATTR_MODE | -- FUSE_SET_ATTR_UID | -- FUSE_SET_ATTR_GID | -- FUSE_SET_ATTR_SIZE | -- FUSE_SET_ATTR_ATIME | -- FUSE_SET_ATTR_MTIME | -- FUSE_SET_ATTR_ATIME_NOW | -- FUSE_SET_ATTR_MTIME_NOW | -- FUSE_SET_ATTR_CTIME; -- -- req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); -- } else -- fuse_reply_err(req, ENOSYS); -+ struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; -+ -+ if (req->se->op.setattr) { -+ struct fuse_file_info *fi = NULL; -+ struct fuse_file_info fi_store; -+ struct stat stbuf; -+ memset(&stbuf, 0, sizeof(stbuf)); -+ convert_attr(arg, &stbuf); -+ if (arg->valid & FATTR_FH) { -+ arg->valid &= ~FATTR_FH; -+ memset(&fi_store, 0, sizeof(fi_store)); -+ fi = &fi_store; -+ fi->fh = arg->fh; -+ } -+ arg->valid &= FUSE_SET_ATTR_MODE | FUSE_SET_ATTR_UID | -+ FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE | -+ FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME | -+ FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW | -+ FUSE_SET_ATTR_CTIME; -+ -+ req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_access_in *arg = (struct fuse_access_in *) inarg; -+ struct fuse_access_in *arg = (struct fuse_access_in *)inarg; - -- if (req->se->op.access) -- req->se->op.access(req, nodeid, arg->mask); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.access) { -+ req->se->op.access(req, nodeid, arg->mask); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- (void) inarg; -+ (void)inarg; - -- if (req->se->op.readlink) -- req->se->op.readlink(req, nodeid); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.readlink) { -+ req->se->op.readlink(req, nodeid); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inarg; -- char *name = PARAM(arg); -+ struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; -+ char *name = PARAM(arg); - -- if (req->se->conn.proto_minor >= 12) -- req->ctx.umask = arg->umask; -- else -- name = (char *) inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -+ if (req->se->conn.proto_minor >= 12) { -+ req->ctx.umask = arg->umask; -+ } else { -+ name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -+ } - -- if (req->se->op.mknod) -- req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.mknod) { -+ req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inarg; -+ struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; - -- if (req->se->conn.proto_minor >= 12) -- req->ctx.umask = arg->umask; -+ if (req->se->conn.proto_minor >= 12) { -+ req->ctx.umask = arg->umask; -+ } - -- if (req->se->op.mkdir) -- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.mkdir) { -+ req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.unlink) -- req->se->op.unlink(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.unlink) { -+ req->se->op.unlink(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.rmdir) -- req->se->op.rmdir(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.rmdir) { -+ req->se->op.rmdir(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -- char *linkname = ((char *) inarg) + strlen((char *) inarg) + 1; -+ char *name = (char *)inarg; -+ char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; - -- if (req->se->op.symlink) -- req->se->op.symlink(req, linkname, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.symlink) { -+ req->se->op.symlink(req, linkname, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_rename_in *arg = (struct fuse_rename_in *) inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; - -- if (req->se->op.rename) -- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -- 0); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.rename) { -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_rename2_in *arg = (struct fuse_rename2_in *) inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; -+ char *oldname = PARAM(arg); -+ char *newname = oldname + strlen(oldname) + 1; - -- if (req->se->op.rename) -- req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -- arg->flags); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.rename) { -+ req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -+ arg->flags); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_link_in *arg = (struct fuse_link_in *) inarg; -+ struct fuse_link_in *arg = (struct fuse_link_in *)inarg; - -- if (req->se->op.link) -- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.link) { -+ req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_create_in *arg = (struct fuse_create_in *) inarg; -+ struct fuse_create_in *arg = (struct fuse_create_in *)inarg; - -- if (req->se->op.create) { -- struct fuse_file_info fi; -- char *name = PARAM(arg); -+ if (req->se->op.create) { -+ struct fuse_file_info fi; -+ char *name = PARAM(arg); - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; - -- if (req->se->conn.proto_minor >= 12) -- req->ctx.umask = arg->umask; -- else -- name = (char *) inarg + sizeof(struct fuse_open_in); -+ if (req->se->conn.proto_minor >= 12) { -+ req->ctx.umask = arg->umask; -+ } else { -+ name = (char *)inarg + sizeof(struct fuse_open_in); -+ } - -- req->se->op.create(req, nodeid, name, arg->mode, &fi); -- } else -- fuse_reply_err(req, ENOSYS); -+ req->se->op.create(req, nodeid, name, arg->mode, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; - -- if (req->se->op.open) -- req->se->op.open(req, nodeid, &fi); -- else -- fuse_reply_open(req, &fi); -+ if (req->se->op.open) { -+ req->se->op.open(req, nodeid, &fi); -+ } else { -+ fuse_reply_open(req, &fi); -+ } - } - - static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -+ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; - -- if (req->se->op.read) { -- struct fuse_file_info fi; -+ if (req->se->op.read) { -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 9) { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- } -- req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); -- } else -- fuse_reply_err(req, ENOSYS); -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 9) { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ } -+ req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -- struct fuse_file_info fi; -- char *param; -+ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_file_info fi; -+ char *param; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; - -- if (req->se->conn.proto_minor < 9) { -- param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- param = PARAM(arg); -- } -+ if (req->se->conn.proto_minor < 9) { -+ param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ param = PARAM(arg); -+ } - -- if (req->se->op.write) -- req->se->op.write(req, nodeid, param, arg->size, -- arg->offset, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.write) { -+ req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -- const struct fuse_buf *ibuf) --{ -- struct fuse_session *se = req->se; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -- .count = 1, -- }; -- struct fuse_write_in *arg = (struct fuse_write_in *) inarg; -- struct fuse_file_info fi; -- -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -- -- if (se->conn.proto_minor < 9) { -- bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- bufv.buf[0].size -= sizeof(struct fuse_in_header) + -- FUSE_COMPAT_WRITE_IN_SIZE; -- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -- bufv.buf[0].mem = PARAM(arg); -- -- bufv.buf[0].size -= sizeof(struct fuse_in_header) + -- sizeof(struct fuse_write_in); -- } -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -- fuse_reply_err(req, EIO); -- return; -- } -- bufv.buf[0].size = arg->size; -- -- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_file_info fi; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -+ -+ if (se->conn.proto_minor < 9) { -+ bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; -+ assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -+ } else { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ bufv.buf[0].mem = PARAM(arg); -+ } -+ -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -+ } -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); - } - - static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.flush = 1; -- if (req->se->conn.proto_minor >= 7) -- fi.lock_owner = arg->lock_owner; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.flush = 1; -+ if (req->se->conn.proto_minor >= 7) { -+ fi.lock_owner = arg->lock_owner; -+ } - -- if (req->se->op.flush) -- req->se->op.flush(req, nodeid, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.flush) { -+ req->se->op.flush(req, nodeid, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -- fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 8) { -- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -- fi.lock_owner = arg->lock_owner; -- } -- if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { -- fi.flock_release = 1; -- fi.lock_owner = arg->lock_owner; -- } -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; -+ if (req->se->conn.proto_minor >= 8) { -+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -+ fi.lock_owner = arg->lock_owner; -+ } -+ if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { -+ fi.flock_release = 1; -+ fi.lock_owner = arg->lock_owner; -+ } - -- if (req->se->op.release) -- req->se->op.release(req, nodeid, &fi); -- else -- fuse_reply_err(req, 0); -+ if (req->se->op.release) { -+ req->se->op.release(req, nodeid, &fi); -+ } else { -+ fuse_reply_err(req, 0); -+ } - } - - static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -- struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.fsync) -- req->se->op.fsync(req, nodeid, datasync, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.fsync) { -+ req->se->op.fsync(req, nodeid, datasync, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_open_in *arg = (struct fuse_open_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; - -- if (req->se->op.opendir) -- req->se->op.opendir(req, nodeid, &fi); -- else -- fuse_reply_open(req, &fi); -+ if (req->se->op.opendir) { -+ req->se->op.opendir(req, nodeid, &fi); -+ } else { -+ fuse_reply_open(req, &fi); -+ } - } - - static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.readdir) -- req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.readdir) { -+ req->se->op.readdir(req, nodeid, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_read_in *arg = (struct fuse_read_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.readdirplus) -- req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.readdirplus) { -+ req->se->op.readdirplus(req, nodeid, arg->size, arg->offset, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_release_in *arg = (struct fuse_release_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.flags = arg->flags; -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.flags = arg->flags; -+ fi.fh = arg->fh; - -- if (req->se->op.releasedir) -- req->se->op.releasedir(req, nodeid, &fi); -- else -- fuse_reply_err(req, 0); -+ if (req->se->op.releasedir) { -+ req->se->op.releasedir(req, nodeid, &fi); -+ } else { -+ fuse_reply_err(req, 0); -+ } - } - - static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg; -- struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_file_info fi; -+ int datasync = arg->fsync_flags & 1; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.fsyncdir) -- req->se->op.fsyncdir(req, nodeid, datasync, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.fsyncdir) { -+ req->se->op.fsyncdir(req, nodeid, datasync, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- (void) nodeid; -- (void) inarg; -+ (void)nodeid; -+ (void)inarg; - -- if (req->se->op.statfs) -- req->se->op.statfs(req, nodeid); -- else { -- struct statvfs buf = { -- .f_namemax = 255, -- .f_bsize = 512, -- }; -- fuse_reply_statfs(req, &buf); -- } -+ if (req->se->op.statfs) { -+ req->se->op.statfs(req, nodeid); -+ } else { -+ struct statvfs buf = { -+ .f_namemax = 255, -+ .f_bsize = 512, -+ }; -+ fuse_reply_statfs(req, &buf); -+ } - } - - static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg; -- char *name = PARAM(arg); -- char *value = name + strlen(name) + 1; -+ struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; -+ char *name = PARAM(arg); -+ char *value = name + strlen(name) + 1; - -- if (req->se->op.setxattr) -- req->se->op.setxattr(req, nodeid, name, value, arg->size, -- arg->flags); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.setxattr) { -+ req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; - -- if (req->se->op.getxattr) -- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.getxattr) { -+ req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg; -+ struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; - -- if (req->se->op.listxattr) -- req->se->op.listxattr(req, nodeid, arg->size); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.listxattr) { -+ req->se->op.listxattr(req, nodeid, arg->size); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- char *name = (char *) inarg; -+ char *name = (char *)inarg; - -- if (req->se->op.removexattr) -- req->se->op.removexattr(req, nodeid, name); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.removexattr) { -+ req->se->op.removexattr(req, nodeid, name); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void convert_fuse_file_lock(struct fuse_file_lock *fl, -- struct flock *flock) -+ struct flock *flock) - { -- memset(flock, 0, sizeof(struct flock)); -- flock->l_type = fl->type; -- flock->l_whence = SEEK_SET; -- flock->l_start = fl->start; -- if (fl->end == OFFSET_MAX) -- flock->l_len = 0; -- else -- flock->l_len = fl->end - fl->start + 1; -- flock->l_pid = fl->pid; -+ memset(flock, 0, sizeof(struct flock)); -+ flock->l_type = fl->type; -+ flock->l_whence = SEEK_SET; -+ flock->l_start = fl->start; -+ if (fl->end == OFFSET_MAX) { -+ flock->l_len = 0; -+ } else { -+ flock->l_len = fl->end - fl->start + 1; -+ } -+ flock->l_pid = fl->pid; - } - - static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -- struct fuse_file_info fi; -- struct flock flock; -+ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_file_info fi; -+ struct flock flock; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.lock_owner = arg->owner; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; - -- convert_fuse_file_lock(&arg->lk, &flock); -- if (req->se->op.getlk) -- req->se->op.getlk(req, nodeid, &fi, &flock); -- else -- fuse_reply_err(req, ENOSYS); -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.getlk) { -+ req->se->op.getlk(req, nodeid, &fi, &flock); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, int sleep) --{ -- struct fuse_lk_in *arg = (struct fuse_lk_in *) inarg; -- struct fuse_file_info fi; -- struct flock flock; -- -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.lock_owner = arg->owner; -- -- if (arg->lk_flags & FUSE_LK_FLOCK) { -- int op = 0; -- -- switch (arg->lk.type) { -- case F_RDLCK: -- op = LOCK_SH; -- break; -- case F_WRLCK: -- op = LOCK_EX; -- break; -- case F_UNLCK: -- op = LOCK_UN; -- break; -- } -- if (!sleep) -- op |= LOCK_NB; -- -- if (req->se->op.flock) -- req->se->op.flock(req, nodeid, &fi, op); -- else -- fuse_reply_err(req, ENOSYS); -- } else { -- convert_fuse_file_lock(&arg->lk, &flock); -- if (req->se->op.setlk) -- req->se->op.setlk(req, nodeid, &fi, &flock, sleep); -- else -- fuse_reply_err(req, ENOSYS); -- } -+ const void *inarg, int sleep) -+{ -+ struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_file_info fi; -+ struct flock flock; -+ -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.lock_owner = arg->owner; -+ -+ if (arg->lk_flags & FUSE_LK_FLOCK) { -+ int op = 0; -+ -+ switch (arg->lk.type) { -+ case F_RDLCK: -+ op = LOCK_SH; -+ break; -+ case F_WRLCK: -+ op = LOCK_EX; -+ break; -+ case F_UNLCK: -+ op = LOCK_UN; -+ break; -+ } -+ if (!sleep) { -+ op |= LOCK_NB; -+ } -+ -+ if (req->se->op.flock) { -+ req->se->op.flock(req, nodeid, &fi, op); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } -+ } else { -+ convert_fuse_file_lock(&arg->lk, &flock); -+ if (req->se->op.setlk) { -+ req->se->op.setlk(req, nodeid, &fi, &flock, sleep); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } -+ } - } - - static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- do_setlk_common(req, nodeid, inarg, 0); -+ do_setlk_common(req, nodeid, inarg, 0); - } - - static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- do_setlk_common(req, nodeid, inarg, 1); -+ do_setlk_common(req, nodeid, inarg, 1); - } - - static int find_interrupted(struct fuse_session *se, struct fuse_req *req) - { -- struct fuse_req *curr; -- -- for (curr = se->list.next; curr != &se->list; curr = curr->next) { -- if (curr->unique == req->u.i.unique) { -- fuse_interrupt_func_t func; -- void *data; -- -- curr->ctr++; -- pthread_mutex_unlock(&se->lock); -- -- /* Ugh, ugly locking */ -- pthread_mutex_lock(&curr->lock); -- pthread_mutex_lock(&se->lock); -- curr->interrupted = 1; -- func = curr->u.ni.func; -- data = curr->u.ni.data; -- pthread_mutex_unlock(&se->lock); -- if (func) -- func(curr, data); -- pthread_mutex_unlock(&curr->lock); -- -- pthread_mutex_lock(&se->lock); -- curr->ctr--; -- if (!curr->ctr) -- destroy_req(curr); -- -- return 1; -- } -- } -- for (curr = se->interrupts.next; curr != &se->interrupts; -- curr = curr->next) { -- if (curr->u.i.unique == req->u.i.unique) -- return 1; -- } -- return 0; -+ struct fuse_req *curr; -+ -+ for (curr = se->list.next; curr != &se->list; curr = curr->next) { -+ if (curr->unique == req->u.i.unique) { -+ fuse_interrupt_func_t func; -+ void *data; -+ -+ curr->ctr++; -+ pthread_mutex_unlock(&se->lock); -+ -+ /* Ugh, ugly locking */ -+ pthread_mutex_lock(&curr->lock); -+ pthread_mutex_lock(&se->lock); -+ curr->interrupted = 1; -+ func = curr->u.ni.func; -+ data = curr->u.ni.data; -+ pthread_mutex_unlock(&se->lock); -+ if (func) { -+ func(curr, data); -+ } -+ pthread_mutex_unlock(&curr->lock); -+ -+ pthread_mutex_lock(&se->lock); -+ curr->ctr--; -+ if (!curr->ctr) { -+ destroy_req(curr); -+ } -+ -+ return 1; -+ } -+ } -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->u.i.unique) { -+ return 1; -+ } -+ } -+ return 0; - } - - static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *) inarg; -- struct fuse_session *se = req->se; -+ struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; -+ struct fuse_session *se = req->se; - -- (void) nodeid; -- if (se->debug) -- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -- (unsigned long long) arg->unique); -+ (void)nodeid; -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -+ (unsigned long long)arg->unique); -+ } - -- req->u.i.unique = arg->unique; -+ req->u.i.unique = arg->unique; - -- pthread_mutex_lock(&se->lock); -- if (find_interrupted(se, req)) -- destroy_req(req); -- else -- list_add_req(req, &se->interrupts); -- pthread_mutex_unlock(&se->lock); -+ pthread_mutex_lock(&se->lock); -+ if (find_interrupted(se, req)) { -+ destroy_req(req); -+ } else { -+ list_add_req(req, &se->interrupts); -+ } -+ pthread_mutex_unlock(&se->lock); - } - - static struct fuse_req *check_interrupt(struct fuse_session *se, -- struct fuse_req *req) --{ -- struct fuse_req *curr; -- -- for (curr = se->interrupts.next; curr != &se->interrupts; -- curr = curr->next) { -- if (curr->u.i.unique == req->unique) { -- req->interrupted = 1; -- list_del_req(curr); -- free(curr); -- return NULL; -- } -- } -- curr = se->interrupts.next; -- if (curr != &se->interrupts) { -- list_del_req(curr); -- list_init_req(curr); -- return curr; -- } else -- return NULL; -+ struct fuse_req *req) -+{ -+ struct fuse_req *curr; -+ -+ for (curr = se->interrupts.next; curr != &se->interrupts; -+ curr = curr->next) { -+ if (curr->u.i.unique == req->unique) { -+ req->interrupted = 1; -+ list_del_req(curr); -+ free(curr); -+ return NULL; -+ } -+ } -+ curr = se->interrupts.next; -+ if (curr != &se->interrupts) { -+ list_del_req(curr); -+ list_init_req(curr); -+ return curr; -+ } else { -+ return NULL; -+ } - } - - static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg; -+ struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; - -- if (req->se->op.bmap) -- req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.bmap) { -+ req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *) inarg; -- unsigned int flags = arg->flags; -- void *in_buf = arg->in_size ? PARAM(arg) : NULL; -- struct fuse_file_info fi; -+ struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; -+ unsigned int flags = arg->flags; -+ void *in_buf = arg->in_size ? PARAM(arg) : NULL; -+ struct fuse_file_info fi; - -- if (flags & FUSE_IOCTL_DIR && -- !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { -- fuse_reply_err(req, ENOTTY); -- return; -- } -+ if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { -+ fuse_reply_err(req, ENOTTY); -+ return; -+ } - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -- !(flags & FUSE_IOCTL_32BIT)) { -- req->ioctl_64bit = 1; -- } -+ if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -+ !(flags & FUSE_IOCTL_32BIT)) { -+ req->ioctl_64bit = 1; -+ } - -- if (req->se->op.ioctl) -- req->se->op.ioctl(req, nodeid, arg->cmd, -- (void *)(uintptr_t)arg->arg, &fi, flags, -- in_buf, arg->in_size, arg->out_size); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.ioctl) { -+ req->se->op.ioctl(req, nodeid, arg->cmd, (void *)(uintptr_t)arg->arg, -+ &fi, flags, in_buf, arg->in_size, arg->out_size); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) - { -- free(ph); -+ free(ph); - } - - static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_poll_in *arg = (struct fuse_poll_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fi.poll_events = arg->events; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fi.poll_events = arg->events; - -- if (req->se->op.poll) { -- struct fuse_pollhandle *ph = NULL; -+ if (req->se->op.poll) { -+ struct fuse_pollhandle *ph = NULL; - -- if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { -- ph = malloc(sizeof(struct fuse_pollhandle)); -- if (ph == NULL) { -- fuse_reply_err(req, ENOMEM); -- return; -- } -- ph->kh = arg->kh; -- ph->se = req->se; -- } -+ if (arg->flags & FUSE_POLL_SCHEDULE_NOTIFY) { -+ ph = malloc(sizeof(struct fuse_pollhandle)); -+ if (ph == NULL) { -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ ph->kh = arg->kh; -+ ph->se = req->se; -+ } - -- req->se->op.poll(req, nodeid, &fi, ph); -- } else { -- fuse_reply_err(req, ENOSYS); -- } -+ req->se->op.poll(req, nodeid, &fi, ph); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.fallocate) -- req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.fallocate) { -+ req->se->op.fallocate(req, nodeid, arg->mode, arg->offset, arg->length, -+ &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - --static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, const void *inarg) -+static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, -+ const void *inarg) - { -- struct fuse_copy_file_range_in *arg = (struct fuse_copy_file_range_in *) inarg; -- struct fuse_file_info fi_in, fi_out; -+ struct fuse_copy_file_range_in *arg = -+ (struct fuse_copy_file_range_in *)inarg; -+ struct fuse_file_info fi_in, fi_out; - -- memset(&fi_in, 0, sizeof(fi_in)); -- fi_in.fh = arg->fh_in; -+ memset(&fi_in, 0, sizeof(fi_in)); -+ fi_in.fh = arg->fh_in; - -- memset(&fi_out, 0, sizeof(fi_out)); -- fi_out.fh = arg->fh_out; -+ memset(&fi_out, 0, sizeof(fi_out)); -+ fi_out.fh = arg->fh_out; - - -- if (req->se->op.copy_file_range) -- req->se->op.copy_file_range(req, nodeid_in, arg->off_in, -- &fi_in, arg->nodeid_out, -- arg->off_out, &fi_out, arg->len, -- arg->flags); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.copy_file_range) { -+ req->se->op.copy_file_range(req, nodeid_in, arg->off_in, &fi_in, -+ arg->nodeid_out, arg->off_out, &fi_out, -+ arg->len, arg->flags); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_lseek_in *arg = (struct fuse_lseek_in *) inarg; -- struct fuse_file_info fi; -+ struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; -+ struct fuse_file_info fi; - -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; - -- if (req->se->op.lseek) -- req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); -- else -- fuse_reply_err(req, ENOSYS); -+ if (req->se->op.lseek) { -+ req->se->op.lseek(req, nodeid, arg->offset, arg->whence, &fi); -+ } else { -+ fuse_reply_err(req, ENOSYS); -+ } - } - - static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_init_in *arg = (struct fuse_init_in *) inarg; -- struct fuse_init_out outarg; -- struct fuse_session *se = req->se; -- size_t bufsize = se->bufsize; -- size_t outargsize = sizeof(outarg); -- -- (void) nodeid; -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -- if (arg->major == 7 && arg->minor >= 6) { -- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -- arg->max_readahead); -- } -- } -- se->conn.proto_major = arg->major; -- se->conn.proto_minor = arg->minor; -- se->conn.capable = 0; -- se->conn.want = 0; -- -- memset(&outarg, 0, sizeof(outarg)); -- outarg.major = FUSE_KERNEL_VERSION; -- outarg.minor = FUSE_KERNEL_MINOR_VERSION; -- -- if (arg->major < 7) { -- fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", -- arg->major, arg->minor); -- fuse_reply_err(req, EPROTO); -- return; -- } -- -- if (arg->major > 7) { -- /* Wait for a second INIT request with a 7.X version */ -- send_reply_ok(req, &outarg, sizeof(outarg)); -- return; -- } -- -- if (arg->minor >= 6) { -- if (arg->max_readahead < se->conn.max_readahead) -- se->conn.max_readahead = arg->max_readahead; -- if (arg->flags & FUSE_ASYNC_READ) -- se->conn.capable |= FUSE_CAP_ASYNC_READ; -- if (arg->flags & FUSE_POSIX_LOCKS) -- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -- if (arg->flags & FUSE_ATOMIC_O_TRUNC) -- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -- if (arg->flags & FUSE_EXPORT_SUPPORT) -- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -- if (arg->flags & FUSE_DONT_MASK) -- se->conn.capable |= FUSE_CAP_DONT_MASK; -- if (arg->flags & FUSE_FLOCK_LOCKS) -- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -- if (arg->flags & FUSE_AUTO_INVAL_DATA) -- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -- if (arg->flags & FUSE_DO_READDIRPLUS) -- se->conn.capable |= FUSE_CAP_READDIRPLUS; -- if (arg->flags & FUSE_READDIRPLUS_AUTO) -- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -- if (arg->flags & FUSE_ASYNC_DIO) -- se->conn.capable |= FUSE_CAP_ASYNC_DIO; -- if (arg->flags & FUSE_WRITEBACK_CACHE) -- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -- if (arg->flags & FUSE_NO_OPEN_SUPPORT) -- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -- if (arg->flags & FUSE_PARALLEL_DIROPS) -- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -- if (arg->flags & FUSE_POSIX_ACL) -- se->conn.capable |= FUSE_CAP_POSIX_ACL; -- if (arg->flags & FUSE_HANDLE_KILLPRIV) -- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) -- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -- if (!(arg->flags & FUSE_MAX_PAGES)) { -- size_t max_bufsize = -- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() -- + FUSE_BUFFER_HEADER_SIZE; -- if (bufsize > max_bufsize) { -- bufsize = max_bufsize; -- } -- } -- } else { -- se->conn.max_readahead = 0; -- } -- -- if (se->conn.proto_minor >= 14) { -+ struct fuse_init_in *arg = (struct fuse_init_in *)inarg; -+ struct fuse_init_out outarg; -+ struct fuse_session *se = req->se; -+ size_t bufsize = se->bufsize; -+ size_t outargsize = sizeof(outarg); -+ -+ (void)nodeid; -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -+ if (arg->major == 7 && arg->minor >= 6) { -+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -+ arg->max_readahead); -+ } -+ } -+ se->conn.proto_major = arg->major; -+ se->conn.proto_minor = arg->minor; -+ se->conn.capable = 0; -+ se->conn.want = 0; -+ -+ memset(&outarg, 0, sizeof(outarg)); -+ outarg.major = FUSE_KERNEL_VERSION; -+ outarg.minor = FUSE_KERNEL_MINOR_VERSION; -+ -+ if (arg->major < 7) { -+ fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", -+ arg->major, arg->minor); -+ fuse_reply_err(req, EPROTO); -+ return; -+ } -+ -+ if (arg->major > 7) { -+ /* Wait for a second INIT request with a 7.X version */ -+ send_reply_ok(req, &outarg, sizeof(outarg)); -+ return; -+ } -+ -+ if (arg->minor >= 6) { -+ if (arg->max_readahead < se->conn.max_readahead) { -+ se->conn.max_readahead = arg->max_readahead; -+ } -+ if (arg->flags & FUSE_ASYNC_READ) { -+ se->conn.capable |= FUSE_CAP_ASYNC_READ; -+ } -+ if (arg->flags & FUSE_POSIX_LOCKS) { -+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -+ } -+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { -+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -+ } -+ if (arg->flags & FUSE_EXPORT_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -+ } -+ if (arg->flags & FUSE_DONT_MASK) { -+ se->conn.capable |= FUSE_CAP_DONT_MASK; -+ } -+ if (arg->flags & FUSE_FLOCK_LOCKS) { -+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -+ } -+ if (arg->flags & FUSE_AUTO_INVAL_DATA) { -+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -+ } -+ if (arg->flags & FUSE_DO_READDIRPLUS) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS; -+ } -+ if (arg->flags & FUSE_READDIRPLUS_AUTO) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -+ } -+ if (arg->flags & FUSE_ASYNC_DIO) { -+ se->conn.capable |= FUSE_CAP_ASYNC_DIO; -+ } -+ if (arg->flags & FUSE_WRITEBACK_CACHE) { -+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -+ } -+ if (arg->flags & FUSE_PARALLEL_DIROPS) { -+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -+ } -+ if (arg->flags & FUSE_POSIX_ACL) { -+ se->conn.capable |= FUSE_CAP_POSIX_ACL; -+ } -+ if (arg->flags & FUSE_HANDLE_KILLPRIV) { -+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -+ } -+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -+ } -+ if (!(arg->flags & FUSE_MAX_PAGES)) { -+ size_t max_bufsize = -+ FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + -+ FUSE_BUFFER_HEADER_SIZE; -+ if (bufsize > max_bufsize) { -+ bufsize = max_bufsize; -+ } -+ } -+ } else { -+ se->conn.max_readahead = 0; -+ } -+ -+ if (se->conn.proto_minor >= 14) { - #ifdef HAVE_SPLICE - #ifdef HAVE_VMSPLICE -- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; -+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; - #endif -- se->conn.capable |= FUSE_CAP_SPLICE_READ; -+ se->conn.capable |= FUSE_CAP_SPLICE_READ; - #endif -- } -- if (se->conn.proto_minor >= 18) -- se->conn.capable |= FUSE_CAP_IOCTL_DIR; -- -- /* Default settings for modern filesystems. -- * -- * Most of these capabilities were disabled by default in -- * libfuse2 for backwards compatibility reasons. In libfuse3, -- * we can finally enable them by default (as long as they're -- * supported by the kernel). -- */ --#define LL_SET_DEFAULT(cond, cap) \ -- if ((cond) && (se->conn.capable & (cap))) \ -- se->conn.want |= (cap) -- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); -- LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); -- LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); -- LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); -- LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); -- LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); -- LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); -- LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); -- LL_SET_DEFAULT(se->op.getlk && se->op.setlk, -- FUSE_CAP_POSIX_LOCKS); -- LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); -- LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); -- LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, -- FUSE_CAP_READDIRPLUS_AUTO); -- se->conn.time_gran = 1; -- -- if (bufsize < FUSE_MIN_READ_BUFFER) { -- fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", -- bufsize); -- bufsize = FUSE_MIN_READ_BUFFER; -- } -- se->bufsize = bufsize; -- -- if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) -- se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; -- -- se->got_init = 1; -- if (se->op.init) -- se->op.init(se->userdata, &se->conn); -- -- if (se->conn.want & (~se->conn.capable)) { -- fuse_log(FUSE_LOG_ERR, "fuse: error: filesystem requested capabilities " -- "0x%x that are not supported by kernel, aborting.\n", -- se->conn.want & (~se->conn.capable)); -- fuse_reply_err(req, EPROTO); -- se->error = -EPROTO; -- fuse_session_exit(se); -- return; -- } -- -- if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { -- se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; -- } -- if (arg->flags & FUSE_MAX_PAGES) { -- outarg.flags |= FUSE_MAX_PAGES; -- outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; -- } -- -- /* Always enable big writes, this is superseded -- by the max_write option */ -- outarg.flags |= FUSE_BIG_WRITES; -- -- if (se->conn.want & FUSE_CAP_ASYNC_READ) -- outarg.flags |= FUSE_ASYNC_READ; -- if (se->conn.want & FUSE_CAP_POSIX_LOCKS) -- outarg.flags |= FUSE_POSIX_LOCKS; -- if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) -- outarg.flags |= FUSE_ATOMIC_O_TRUNC; -- if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) -- outarg.flags |= FUSE_EXPORT_SUPPORT; -- if (se->conn.want & FUSE_CAP_DONT_MASK) -- outarg.flags |= FUSE_DONT_MASK; -- if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) -- outarg.flags |= FUSE_FLOCK_LOCKS; -- if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) -- outarg.flags |= FUSE_AUTO_INVAL_DATA; -- if (se->conn.want & FUSE_CAP_READDIRPLUS) -- outarg.flags |= FUSE_DO_READDIRPLUS; -- if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) -- outarg.flags |= FUSE_READDIRPLUS_AUTO; -- if (se->conn.want & FUSE_CAP_ASYNC_DIO) -- outarg.flags |= FUSE_ASYNC_DIO; -- if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) -- outarg.flags |= FUSE_WRITEBACK_CACHE; -- if (se->conn.want & FUSE_CAP_POSIX_ACL) -- outarg.flags |= FUSE_POSIX_ACL; -- outarg.max_readahead = se->conn.max_readahead; -- outarg.max_write = se->conn.max_write; -- if (se->conn.proto_minor >= 13) { -- if (se->conn.max_background >= (1 << 16)) -- se->conn.max_background = (1 << 16) - 1; -- if (se->conn.congestion_threshold > se->conn.max_background) -- se->conn.congestion_threshold = se->conn.max_background; -- if (!se->conn.congestion_threshold) { -- se->conn.congestion_threshold = -- se->conn.max_background * 3 / 4; -- } -- -- outarg.max_background = se->conn.max_background; -- outarg.congestion_threshold = se->conn.congestion_threshold; -- } -- if (se->conn.proto_minor >= 23) -- outarg.time_gran = se->conn.time_gran; -- -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); -- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -- outarg.max_readahead); -- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -- outarg.max_background); -- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -- outarg.congestion_threshold); -- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", -- outarg.time_gran); -- } -- if (arg->minor < 5) -- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -- else if (arg->minor < 23) -- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -- -- send_reply_ok(req, &outarg, outargsize); -+ } -+ if (se->conn.proto_minor >= 18) { -+ se->conn.capable |= FUSE_CAP_IOCTL_DIR; -+ } -+ -+ /* -+ * Default settings for modern filesystems. -+ * -+ * Most of these capabilities were disabled by default in -+ * libfuse2 for backwards compatibility reasons. In libfuse3, -+ * we can finally enable them by default (as long as they're -+ * supported by the kernel). -+ */ -+#define LL_SET_DEFAULT(cond, cap) \ -+ if ((cond) && (se->conn.capable & (cap))) \ -+ se->conn.want |= (cap) -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_READ); -+ LL_SET_DEFAULT(1, FUSE_CAP_PARALLEL_DIROPS); -+ LL_SET_DEFAULT(1, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_SET_DEFAULT(1, FUSE_CAP_HANDLE_KILLPRIV); -+ LL_SET_DEFAULT(1, FUSE_CAP_ASYNC_DIO); -+ LL_SET_DEFAULT(1, FUSE_CAP_IOCTL_DIR); -+ LL_SET_DEFAULT(1, FUSE_CAP_ATOMIC_O_TRUNC); -+ LL_SET_DEFAULT(se->op.write_buf, FUSE_CAP_SPLICE_READ); -+ LL_SET_DEFAULT(se->op.getlk && se->op.setlk, FUSE_CAP_POSIX_LOCKS); -+ LL_SET_DEFAULT(se->op.flock, FUSE_CAP_FLOCK_LOCKS); -+ LL_SET_DEFAULT(se->op.readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_SET_DEFAULT(se->op.readdirplus && se->op.readdir, -+ FUSE_CAP_READDIRPLUS_AUTO); -+ se->conn.time_gran = 1; -+ -+ if (bufsize < FUSE_MIN_READ_BUFFER) { -+ fuse_log(FUSE_LOG_ERR, "fuse: warning: buffer size too small: %zu\n", -+ bufsize); -+ bufsize = FUSE_MIN_READ_BUFFER; -+ } -+ se->bufsize = bufsize; -+ -+ if (se->conn.max_write > bufsize - FUSE_BUFFER_HEADER_SIZE) { -+ se->conn.max_write = bufsize - FUSE_BUFFER_HEADER_SIZE; -+ } -+ -+ se->got_init = 1; -+ if (se->op.init) { -+ se->op.init(se->userdata, &se->conn); -+ } -+ -+ if (se->conn.want & (~se->conn.capable)) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: error: filesystem requested capabilities " -+ "0x%x that are not supported by kernel, aborting.\n", -+ se->conn.want & (~se->conn.capable)); -+ fuse_reply_err(req, EPROTO); -+ se->error = -EPROTO; -+ fuse_session_exit(se); -+ return; -+ } -+ -+ if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { -+ se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; -+ } -+ if (arg->flags & FUSE_MAX_PAGES) { -+ outarg.flags |= FUSE_MAX_PAGES; -+ outarg.max_pages = (se->conn.max_write - 1) / getpagesize() + 1; -+ } -+ -+ /* -+ * Always enable big writes, this is superseded -+ * by the max_write option -+ */ -+ outarg.flags |= FUSE_BIG_WRITES; -+ -+ if (se->conn.want & FUSE_CAP_ASYNC_READ) { -+ outarg.flags |= FUSE_ASYNC_READ; -+ } -+ if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { -+ outarg.flags |= FUSE_POSIX_LOCKS; -+ } -+ if (se->conn.want & FUSE_CAP_ATOMIC_O_TRUNC) { -+ outarg.flags |= FUSE_ATOMIC_O_TRUNC; -+ } -+ if (se->conn.want & FUSE_CAP_EXPORT_SUPPORT) { -+ outarg.flags |= FUSE_EXPORT_SUPPORT; -+ } -+ if (se->conn.want & FUSE_CAP_DONT_MASK) { -+ outarg.flags |= FUSE_DONT_MASK; -+ } -+ if (se->conn.want & FUSE_CAP_FLOCK_LOCKS) { -+ outarg.flags |= FUSE_FLOCK_LOCKS; -+ } -+ if (se->conn.want & FUSE_CAP_AUTO_INVAL_DATA) { -+ outarg.flags |= FUSE_AUTO_INVAL_DATA; -+ } -+ if (se->conn.want & FUSE_CAP_READDIRPLUS) { -+ outarg.flags |= FUSE_DO_READDIRPLUS; -+ } -+ if (se->conn.want & FUSE_CAP_READDIRPLUS_AUTO) { -+ outarg.flags |= FUSE_READDIRPLUS_AUTO; -+ } -+ if (se->conn.want & FUSE_CAP_ASYNC_DIO) { -+ outarg.flags |= FUSE_ASYNC_DIO; -+ } -+ if (se->conn.want & FUSE_CAP_WRITEBACK_CACHE) { -+ outarg.flags |= FUSE_WRITEBACK_CACHE; -+ } -+ if (se->conn.want & FUSE_CAP_POSIX_ACL) { -+ outarg.flags |= FUSE_POSIX_ACL; -+ } -+ outarg.max_readahead = se->conn.max_readahead; -+ outarg.max_write = se->conn.max_write; -+ if (se->conn.proto_minor >= 13) { -+ if (se->conn.max_background >= (1 << 16)) { -+ se->conn.max_background = (1 << 16) - 1; -+ } -+ if (se->conn.congestion_threshold > se->conn.max_background) { -+ se->conn.congestion_threshold = se->conn.max_background; -+ } -+ if (!se->conn.congestion_threshold) { -+ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; -+ } -+ -+ outarg.max_background = se->conn.max_background; -+ outarg.congestion_threshold = se->conn.congestion_threshold; -+ } -+ if (se->conn.proto_minor >= 23) { -+ outarg.time_gran = se->conn.time_gran; -+ } -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, -+ outarg.minor); -+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -+ outarg.max_readahead); -+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -+ outarg.max_background); -+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -+ outarg.congestion_threshold); -+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); -+ } -+ if (arg->minor < 5) { -+ outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -+ } else if (arg->minor < 23) { -+ outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -+ } -+ -+ send_reply_ok(req, &outarg, outargsize); - } - - static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { -- struct fuse_session *se = req->se; -+ struct fuse_session *se = req->se; - -- (void) nodeid; -- (void) inarg; -+ (void)nodeid; -+ (void)inarg; - -- se->got_destroy = 1; -- if (se->op.destroy) -- se->op.destroy(se->userdata); -+ se->got_destroy = 1; -+ if (se->op.destroy) { -+ se->op.destroy(se->userdata); -+ } - -- send_reply_ok(req, NULL, 0); -+ send_reply_ok(req, NULL, 0); - } - - static void list_del_nreq(struct fuse_notify_req *nreq) - { -- struct fuse_notify_req *prev = nreq->prev; -- struct fuse_notify_req *next = nreq->next; -- prev->next = next; -- next->prev = prev; -+ struct fuse_notify_req *prev = nreq->prev; -+ struct fuse_notify_req *next = nreq->next; -+ prev->next = next; -+ next->prev = prev; - } - - static void list_add_nreq(struct fuse_notify_req *nreq, -- struct fuse_notify_req *next) -+ struct fuse_notify_req *next) - { -- struct fuse_notify_req *prev = next->prev; -- nreq->next = next; -- nreq->prev = prev; -- prev->next = nreq; -- next->prev = nreq; -+ struct fuse_notify_req *prev = next->prev; -+ nreq->next = next; -+ nreq->prev = prev; -+ prev->next = nreq; -+ next->prev = nreq; - } - - static void list_init_nreq(struct fuse_notify_req *nreq) - { -- nreq->next = nreq; -- nreq->prev = nreq; -+ nreq->next = nreq; -+ nreq->prev = nreq; - } - - static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, const struct fuse_buf *buf) -+ const void *inarg, const struct fuse_buf *buf) - { -- struct fuse_session *se = req->se; -- struct fuse_notify_req *nreq; -- struct fuse_notify_req *head; -+ struct fuse_session *se = req->se; -+ struct fuse_notify_req *nreq; -+ struct fuse_notify_req *head; - -- pthread_mutex_lock(&se->lock); -- head = &se->notify_list; -- for (nreq = head->next; nreq != head; nreq = nreq->next) { -- if (nreq->unique == req->unique) { -- list_del_nreq(nreq); -- break; -- } -- } -- pthread_mutex_unlock(&se->lock); -+ pthread_mutex_lock(&se->lock); -+ head = &se->notify_list; -+ for (nreq = head->next; nreq != head; nreq = nreq->next) { -+ if (nreq->unique == req->unique) { -+ list_del_nreq(nreq); -+ break; -+ } -+ } -+ pthread_mutex_unlock(&se->lock); - -- if (nreq != head) -- nreq->reply(nreq, req, nodeid, inarg, buf); -+ if (nreq != head) { -+ nreq->reply(nreq, req, nodeid, inarg, buf); -+ } - } - - static int send_notify_iov(struct fuse_session *se, int notify_code, -- struct iovec *iov, int count) -+ struct iovec *iov, int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out; - -- if (!se->got_init) -- return -ENOTCONN; -+ if (!se->got_init) { -+ return -ENOTCONN; -+ } - -- out.unique = 0; -- out.error = notify_code; -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(struct fuse_out_header); -+ out.unique = 0; -+ out.error = notify_code; -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(struct fuse_out_header); - -- return fuse_send_msg(se, NULL, iov, count); -+ return fuse_send_msg(se, NULL, iov, count); - } - - int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) - { -- if (ph != NULL) { -- struct fuse_notify_poll_wakeup_out outarg; -- struct iovec iov[2]; -+ if (ph != NULL) { -+ struct fuse_notify_poll_wakeup_out outarg; -+ struct iovec iov[2]; - -- outarg.kh = ph->kh; -+ outarg.kh = ph->kh; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); -- } else { -- return 0; -- } -+ return send_notify_iov(ph->se, FUSE_NOTIFY_POLL, iov, 2); -+ } else { -+ return 0; -+ } - } - - int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -- off_t off, off_t len) -+ off_t off, off_t len) - { -- struct fuse_notify_inval_inode_out outarg; -- struct iovec iov[2]; -+ struct fuse_notify_inval_inode_out outarg; -+ struct iovec iov[2]; -+ -+ if (!se) { -+ return -EINVAL; -+ } - -- if (!se) -- return -EINVAL; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -+ return -ENOSYS; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -- return -ENOSYS; -- -- outarg.ino = ino; -- outarg.off = off; -- outarg.len = len; -+ outarg.ino = ino; -+ outarg.off = off; -+ outarg.len = len; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_INODE, iov, 2); - } - - int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -- const char *name, size_t namelen) -+ const char *name, size_t namelen) - { -- struct fuse_notify_inval_entry_out outarg; -- struct iovec iov[3]; -+ struct fuse_notify_inval_entry_out outarg; -+ struct iovec iov[3]; -+ -+ if (!se) { -+ return -EINVAL; -+ } - -- if (!se) -- return -EINVAL; -- -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -+ return -ENOSYS; -+ } - -- outarg.parent = parent; -- outarg.namelen = namelen; -- outarg.padding = 0; -+ outarg.parent = parent; -+ outarg.namelen = namelen; -+ outarg.padding = 0; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -- iov[2].iov_base = (void *)name; -- iov[2].iov_len = namelen + 1; -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; - -- return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); -+ return send_notify_iov(se, FUSE_NOTIFY_INVAL_ENTRY, iov, 3); - } - --int fuse_lowlevel_notify_delete(struct fuse_session *se, -- fuse_ino_t parent, fuse_ino_t child, -- const char *name, size_t namelen) -+int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, -+ fuse_ino_t child, const char *name, -+ size_t namelen) - { -- struct fuse_notify_delete_out outarg; -- struct iovec iov[3]; -+ struct fuse_notify_delete_out outarg; -+ struct iovec iov[3]; - -- if (!se) -- return -EINVAL; -+ if (!se) { -+ return -EINVAL; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { -+ return -ENOSYS; -+ } - -- outarg.parent = parent; -- outarg.child = child; -- outarg.namelen = namelen; -- outarg.padding = 0; -+ outarg.parent = parent; -+ outarg.child = child; -+ outarg.namelen = namelen; -+ outarg.padding = 0; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -- iov[2].iov_base = (void *)name; -- iov[2].iov_len = namelen + 1; -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); -+ iov[2].iov_base = (void *)name; -+ iov[2].iov_len = namelen + 1; - -- return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); -+ return send_notify_iov(se, FUSE_NOTIFY_DELETE, iov, 3); - } - - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags) - { -- struct fuse_out_header out; -- struct fuse_notify_store_out outarg; -- struct iovec iov[3]; -- size_t size = fuse_buf_size(bufv); -- int res; -+ struct fuse_out_header out; -+ struct fuse_notify_store_out outarg; -+ struct iovec iov[3]; -+ size_t size = fuse_buf_size(bufv); -+ int res; - -- if (!se) -- return -EINVAL; -+ if (!se) { -+ return -EINVAL; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -+ return -ENOSYS; -+ } - -- out.unique = 0; -- out.error = FUSE_NOTIFY_STORE; -+ out.unique = 0; -+ out.error = FUSE_NOTIFY_STORE; - -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; - -- iov[0].iov_base = &out; -- iov[0].iov_len = sizeof(out); -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[0].iov_base = &out; -+ iov[0].iov_len = sizeof(out); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -- if (res > 0) -- res = -res; -+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -+ if (res > 0) { -+ res = -res; -+ } - -- return res; -+ return res; - } - - struct fuse_retrieve_req { -- struct fuse_notify_req nreq; -- void *cookie; -+ struct fuse_notify_req nreq; -+ void *cookie; - }; - --static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, -- fuse_req_t req, fuse_ino_t ino, -- const void *inarg, -- const struct fuse_buf *ibuf) --{ -- struct fuse_session *se = req->se; -- struct fuse_retrieve_req *rreq = -- container_of(nreq, struct fuse_retrieve_req, nreq); -- const struct fuse_notify_retrieve_in *arg = inarg; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -- .count = 1, -- }; -- -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) -- bufv.buf[0].mem = PARAM(arg); -- -- bufv.buf[0].size -= sizeof(struct fuse_in_header) + -- sizeof(struct fuse_notify_retrieve_in); -- -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -- fuse_reply_none(req); -- goto out; -- } -- bufv.buf[0].size = arg->size; -- -- if (se->op.retrieve_reply) { -- se->op.retrieve_reply(req, rreq->cookie, ino, -- arg->offset, &bufv); -- } else { -- fuse_reply_none(req); -- } -+static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, -+ fuse_ino_t ino, const void *inarg, -+ const struct fuse_buf *ibuf) -+{ -+ struct fuse_session *se = req->se; -+ struct fuse_retrieve_req *rreq = -+ container_of(nreq, struct fuse_retrieve_req, nreq); -+ const struct fuse_notify_retrieve_in *arg = inarg; -+ struct fuse_bufvec bufv = { -+ .buf[0] = *ibuf, -+ .count = 1, -+ }; -+ -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ bufv.buf[0].mem = PARAM(arg); -+ } -+ -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); -+ -+ if (bufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -+ fuse_reply_none(req); -+ goto out; -+ } -+ bufv.buf[0].size = arg->size; -+ -+ if (se->op.retrieve_reply) { -+ se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); -+ } else { -+ fuse_reply_none(req); -+ } - out: -- free(rreq); -+ free(rreq); - } - - int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie) -+ size_t size, off_t offset, void *cookie) - { -- struct fuse_notify_retrieve_out outarg; -- struct iovec iov[2]; -- struct fuse_retrieve_req *rreq; -- int err; -+ struct fuse_notify_retrieve_out outarg; -+ struct iovec iov[2]; -+ struct fuse_retrieve_req *rreq; -+ int err; - -- if (!se) -- return -EINVAL; -+ if (!se) { -+ return -EINVAL; -+ } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) -- return -ENOSYS; -+ if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -+ return -ENOSYS; -+ } - -- rreq = malloc(sizeof(*rreq)); -- if (rreq == NULL) -- return -ENOMEM; -+ rreq = malloc(sizeof(*rreq)); -+ if (rreq == NULL) { -+ return -ENOMEM; -+ } - -- pthread_mutex_lock(&se->lock); -- rreq->cookie = cookie; -- rreq->nreq.unique = se->notify_ctr++; -- rreq->nreq.reply = fuse_ll_retrieve_reply; -- list_add_nreq(&rreq->nreq, &se->notify_list); -- pthread_mutex_unlock(&se->lock); -+ pthread_mutex_lock(&se->lock); -+ rreq->cookie = cookie; -+ rreq->nreq.unique = se->notify_ctr++; -+ rreq->nreq.reply = fuse_ll_retrieve_reply; -+ list_add_nreq(&rreq->nreq, &se->notify_list); -+ pthread_mutex_unlock(&se->lock); - -- outarg.notify_unique = rreq->nreq.unique; -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -+ outarg.notify_unique = rreq->nreq.unique; -+ outarg.nodeid = ino; -+ outarg.offset = offset; -+ outarg.size = size; -+ outarg.padding = 0; - -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -+ iov[1].iov_base = &outarg; -+ iov[1].iov_len = sizeof(outarg); - -- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -- if (err) { -- pthread_mutex_lock(&se->lock); -- list_del_nreq(&rreq->nreq); -- pthread_mutex_unlock(&se->lock); -- free(rreq); -- } -+ err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -+ if (err) { -+ pthread_mutex_lock(&se->lock); -+ list_del_nreq(&rreq->nreq); -+ pthread_mutex_unlock(&se->lock); -+ free(rreq); -+ } - -- return err; -+ return err; - } - - void *fuse_req_userdata(fuse_req_t req) - { -- return req->se->userdata; -+ return req->se->userdata; - } - - const struct fuse_ctx *fuse_req_ctx(fuse_req_t req) - { -- return &req->ctx; -+ return &req->ctx; - } - - void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -- void *data) -+ void *data) - { -- pthread_mutex_lock(&req->lock); -- pthread_mutex_lock(&req->se->lock); -- req->u.ni.func = func; -- req->u.ni.data = data; -- pthread_mutex_unlock(&req->se->lock); -- if (req->interrupted && func) -- func(req, data); -- pthread_mutex_unlock(&req->lock); -+ pthread_mutex_lock(&req->lock); -+ pthread_mutex_lock(&req->se->lock); -+ req->u.ni.func = func; -+ req->u.ni.data = data; -+ pthread_mutex_unlock(&req->se->lock); -+ if (req->interrupted && func) { -+ func(req, data); -+ } -+ pthread_mutex_unlock(&req->lock); - } - - int fuse_req_interrupted(fuse_req_t req) - { -- int interrupted; -+ int interrupted; - -- pthread_mutex_lock(&req->se->lock); -- interrupted = req->interrupted; -- pthread_mutex_unlock(&req->se->lock); -+ pthread_mutex_lock(&req->se->lock); -+ interrupted = req->interrupted; -+ pthread_mutex_unlock(&req->se->lock); - -- return interrupted; -+ return interrupted; - } - - static struct { -- void (*func)(fuse_req_t, fuse_ino_t, const void *); -- const char *name; -+ void (*func)(fuse_req_t, fuse_ino_t, const void *); -+ const char *name; - } fuse_ll_ops[] = { -- [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -- [FUSE_FORGET] = { do_forget, "FORGET" }, -- [FUSE_GETATTR] = { do_getattr, "GETATTR" }, -- [FUSE_SETATTR] = { do_setattr, "SETATTR" }, -- [FUSE_READLINK] = { do_readlink, "READLINK" }, -- [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, -- [FUSE_MKNOD] = { do_mknod, "MKNOD" }, -- [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, -- [FUSE_UNLINK] = { do_unlink, "UNLINK" }, -- [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, -- [FUSE_RENAME] = { do_rename, "RENAME" }, -- [FUSE_LINK] = { do_link, "LINK" }, -- [FUSE_OPEN] = { do_open, "OPEN" }, -- [FUSE_READ] = { do_read, "READ" }, -- [FUSE_WRITE] = { do_write, "WRITE" }, -- [FUSE_STATFS] = { do_statfs, "STATFS" }, -- [FUSE_RELEASE] = { do_release, "RELEASE" }, -- [FUSE_FSYNC] = { do_fsync, "FSYNC" }, -- [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, -- [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, -- [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, -- [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, -- [FUSE_FLUSH] = { do_flush, "FLUSH" }, -- [FUSE_INIT] = { do_init, "INIT" }, -- [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, -- [FUSE_READDIR] = { do_readdir, "READDIR" }, -- [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, -- [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, -- [FUSE_GETLK] = { do_getlk, "GETLK" }, -- [FUSE_SETLK] = { do_setlk, "SETLK" }, -- [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, -- [FUSE_ACCESS] = { do_access, "ACCESS" }, -- [FUSE_CREATE] = { do_create, "CREATE" }, -- [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, -- [FUSE_BMAP] = { do_bmap, "BMAP" }, -- [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, -- [FUSE_POLL] = { do_poll, "POLL" }, -- [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, -- [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -- [FUSE_NOTIFY_REPLY] = { (void *) 1, "NOTIFY_REPLY" }, -- [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, -- [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS"}, -- [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -- [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, -- [FUSE_LSEEK] = { do_lseek, "LSEEK" }, -+ [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -+ [FUSE_FORGET] = { do_forget, "FORGET" }, -+ [FUSE_GETATTR] = { do_getattr, "GETATTR" }, -+ [FUSE_SETATTR] = { do_setattr, "SETATTR" }, -+ [FUSE_READLINK] = { do_readlink, "READLINK" }, -+ [FUSE_SYMLINK] = { do_symlink, "SYMLINK" }, -+ [FUSE_MKNOD] = { do_mknod, "MKNOD" }, -+ [FUSE_MKDIR] = { do_mkdir, "MKDIR" }, -+ [FUSE_UNLINK] = { do_unlink, "UNLINK" }, -+ [FUSE_RMDIR] = { do_rmdir, "RMDIR" }, -+ [FUSE_RENAME] = { do_rename, "RENAME" }, -+ [FUSE_LINK] = { do_link, "LINK" }, -+ [FUSE_OPEN] = { do_open, "OPEN" }, -+ [FUSE_READ] = { do_read, "READ" }, -+ [FUSE_WRITE] = { do_write, "WRITE" }, -+ [FUSE_STATFS] = { do_statfs, "STATFS" }, -+ [FUSE_RELEASE] = { do_release, "RELEASE" }, -+ [FUSE_FSYNC] = { do_fsync, "FSYNC" }, -+ [FUSE_SETXATTR] = { do_setxattr, "SETXATTR" }, -+ [FUSE_GETXATTR] = { do_getxattr, "GETXATTR" }, -+ [FUSE_LISTXATTR] = { do_listxattr, "LISTXATTR" }, -+ [FUSE_REMOVEXATTR] = { do_removexattr, "REMOVEXATTR" }, -+ [FUSE_FLUSH] = { do_flush, "FLUSH" }, -+ [FUSE_INIT] = { do_init, "INIT" }, -+ [FUSE_OPENDIR] = { do_opendir, "OPENDIR" }, -+ [FUSE_READDIR] = { do_readdir, "READDIR" }, -+ [FUSE_RELEASEDIR] = { do_releasedir, "RELEASEDIR" }, -+ [FUSE_FSYNCDIR] = { do_fsyncdir, "FSYNCDIR" }, -+ [FUSE_GETLK] = { do_getlk, "GETLK" }, -+ [FUSE_SETLK] = { do_setlk, "SETLK" }, -+ [FUSE_SETLKW] = { do_setlkw, "SETLKW" }, -+ [FUSE_ACCESS] = { do_access, "ACCESS" }, -+ [FUSE_CREATE] = { do_create, "CREATE" }, -+ [FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" }, -+ [FUSE_BMAP] = { do_bmap, "BMAP" }, -+ [FUSE_IOCTL] = { do_ioctl, "IOCTL" }, -+ [FUSE_POLL] = { do_poll, "POLL" }, -+ [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, -+ [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -+ [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, -+ [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, -+ [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, -+ [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -+ [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, -+ [FUSE_LSEEK] = { do_lseek, "LSEEK" }, - }; - - #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) - - static const char *opname(enum fuse_opcode opcode) - { -- if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) -- return "???"; -- else -- return fuse_ll_ops[opcode].name; -+ if (opcode >= FUSE_MAXOP || !fuse_ll_ops[opcode].name) { -+ return "???"; -+ } else { -+ return fuse_ll_ops[opcode].name; -+ } - } - - void fuse_session_process_buf(struct fuse_session *se, -- const struct fuse_buf *buf) -+ const struct fuse_buf *buf) - { -- fuse_session_process_buf_int(se, buf, NULL); -+ fuse_session_process_buf_int(se, buf, NULL); - } - - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, struct fuse_chan *ch) --{ -- struct fuse_in_header *in; -- const void *inarg; -- struct fuse_req *req; -- int err; -- -- in = buf->mem; -- -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, -- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", -- (unsigned long long) in->unique, -- opname((enum fuse_opcode) in->opcode), in->opcode, -- (unsigned long long) in->nodeid, buf->size, in->pid); -- } -- -- req = fuse_ll_alloc_req(se); -- if (req == NULL) { -- struct fuse_out_header out = { -- .unique = in->unique, -- .error = -ENOMEM, -- }; -- struct iovec iov = { -- .iov_base = &out, -- .iov_len = sizeof(struct fuse_out_header), -- }; -- -- fuse_send_msg(se, ch, &iov, 1); -- return; -- } -- -- req->unique = in->unique; -- req->ctx.uid = in->uid; -- req->ctx.gid = in->gid; -- req->ctx.pid = in->pid; -- req->ch = ch; -- -- err = EIO; -- if (!se->got_init) { -- enum fuse_opcode expected; -- -- expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; -- if (in->opcode != expected) -- goto reply_err; -- } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) -- goto reply_err; -- -- err = EACCES; -- /* Implement -o allow_root */ -- if (se->deny_others && in->uid != se->owner && in->uid != 0 && -- in->opcode != FUSE_INIT && in->opcode != FUSE_READ && -- in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && -- in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && -- in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && -- in->opcode != FUSE_NOTIFY_REPLY && -- in->opcode != FUSE_READDIRPLUS) -- goto reply_err; -- -- err = ENOSYS; -- if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) -- goto reply_err; -- if (in->opcode != FUSE_INTERRUPT) { -- struct fuse_req *intr; -- pthread_mutex_lock(&se->lock); -- intr = check_interrupt(se, req); -- list_add_req(req, &se->list); -- pthread_mutex_unlock(&se->lock); -- if (intr) -- fuse_reply_err(intr, EAGAIN); -- } -- -- inarg = (void *) &in[1]; -- if (in->opcode == FUSE_WRITE && se->op.write_buf) -- do_write_buf(req, in->nodeid, inarg, buf); -- else if (in->opcode == FUSE_NOTIFY_REPLY) -- do_notify_reply(req, in->nodeid, inarg, buf); -- else -- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -- -- return; -+ const struct fuse_buf *buf, -+ struct fuse_chan *ch) -+{ -+ struct fuse_in_header *in; -+ const void *inarg; -+ struct fuse_req *req; -+ int err; -+ -+ in = buf->mem; -+ -+ if (se->debug) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " -+ "pid: %u\n", -+ (unsigned long long)in->unique, -+ opname((enum fuse_opcode)in->opcode), in->opcode, -+ (unsigned long long)in->nodeid, buf->size, in->pid); -+ } -+ -+ req = fuse_ll_alloc_req(se); -+ if (req == NULL) { -+ struct fuse_out_header out = { -+ .unique = in->unique, -+ .error = -ENOMEM, -+ }; -+ struct iovec iov = { -+ .iov_base = &out, -+ .iov_len = sizeof(struct fuse_out_header), -+ }; -+ -+ fuse_send_msg(se, ch, &iov, 1); -+ return; -+ } -+ -+ req->unique = in->unique; -+ req->ctx.uid = in->uid; -+ req->ctx.gid = in->gid; -+ req->ctx.pid = in->pid; -+ req->ch = ch; -+ -+ err = EIO; -+ if (!se->got_init) { -+ enum fuse_opcode expected; -+ -+ expected = se->cuse_data ? CUSE_INIT : FUSE_INIT; -+ if (in->opcode != expected) { -+ goto reply_err; -+ } -+ } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { -+ goto reply_err; -+ } -+ -+ err = EACCES; -+ /* Implement -o allow_root */ -+ if (se->deny_others && in->uid != se->owner && in->uid != 0 && -+ in->opcode != FUSE_INIT && in->opcode != FUSE_READ && -+ in->opcode != FUSE_WRITE && in->opcode != FUSE_FSYNC && -+ in->opcode != FUSE_RELEASE && in->opcode != FUSE_READDIR && -+ in->opcode != FUSE_FSYNCDIR && in->opcode != FUSE_RELEASEDIR && -+ in->opcode != FUSE_NOTIFY_REPLY && in->opcode != FUSE_READDIRPLUS) { -+ goto reply_err; -+ } -+ -+ err = ENOSYS; -+ if (in->opcode >= FUSE_MAXOP || !fuse_ll_ops[in->opcode].func) { -+ goto reply_err; -+ } -+ if (in->opcode != FUSE_INTERRUPT) { -+ struct fuse_req *intr; -+ pthread_mutex_lock(&se->lock); -+ intr = check_interrupt(se, req); -+ list_add_req(req, &se->list); -+ pthread_mutex_unlock(&se->lock); -+ if (intr) { -+ fuse_reply_err(intr, EAGAIN); -+ } -+ } -+ -+ inarg = (void *)&in[1]; -+ if (in->opcode == FUSE_WRITE && se->op.write_buf) { -+ do_write_buf(req, in->nodeid, inarg, buf); -+ } else if (in->opcode == FUSE_NOTIFY_REPLY) { -+ do_notify_reply(req, in->nodeid, inarg, buf); -+ } else { -+ fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -+ } -+ -+ return; - - reply_err: -- fuse_reply_err(req, err); -+ fuse_reply_err(req, err); - } - --#define LL_OPTION(n,o,v) \ -- { n, offsetof(struct fuse_session, o), v } -+#define LL_OPTION(n, o, v) \ -+ { \ -+ n, offsetof(struct fuse_session, o), v \ -+ } - - static const struct fuse_opt fuse_ll_opts[] = { -- LL_OPTION("debug", debug, 1), -- LL_OPTION("-d", debug, 1), -- LL_OPTION("--debug", debug, 1), -- LL_OPTION("allow_root", deny_others, 1), -- FUSE_OPT_END -+ LL_OPTION("debug", debug, 1), LL_OPTION("-d", debug, 1), -+ LL_OPTION("--debug", debug, 1), LL_OPTION("allow_root", deny_others, 1), -+ FUSE_OPT_END - }; - - void fuse_lowlevel_version(void) - { -- printf("using FUSE kernel interface version %i.%i\n", -- FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); -+ printf("using FUSE kernel interface version %i.%i\n", FUSE_KERNEL_VERSION, -+ FUSE_KERNEL_MINOR_VERSION); - } - - void fuse_lowlevel_help(void) - { -- /* These are not all options, but the ones that are -- potentially of interest to an end-user */ -- printf( --" -o allow_root allow access by root\n" --); -+ /* -+ * These are not all options, but the ones that are -+ * potentially of interest to an end-user -+ */ -+ printf(" -o allow_root allow access by root\n"); - } - - void fuse_session_destroy(struct fuse_session *se) - { -- if (se->got_init && !se->got_destroy) { -- if (se->op.destroy) -- se->op.destroy(se->userdata); -- } -- pthread_mutex_destroy(&se->lock); -- free(se->cuse_data); -- if (se->fd != -1) -- close(se->fd); -- free(se); -+ if (se->got_init && !se->got_destroy) { -+ if (se->op.destroy) { -+ se->op.destroy(se->userdata); -+ } -+ } -+ pthread_mutex_destroy(&se->lock); -+ free(se->cuse_data); -+ if (se->fd != -1) { -+ close(se->fd); -+ } -+ free(se); - } - - - struct fuse_session *fuse_session_new(struct fuse_args *args, -- const struct fuse_lowlevel_ops *op, -- size_t op_size, void *userdata) --{ -- struct fuse_session *se; -- -- if (sizeof(struct fuse_lowlevel_ops) < op_size) { -- fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); -- op_size = sizeof(struct fuse_lowlevel_ops); -- } -- -- if (args->argc == 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: empty argv passed to fuse_session_new().\n"); -- return NULL; -- } -- -- se = (struct fuse_session *) calloc(1, sizeof(struct fuse_session)); -- if (se == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); -- goto out1; -- } -- se->fd = -1; -- se->conn.max_write = UINT_MAX; -- se->conn.max_readahead = UINT_MAX; -- -- /* Parse options */ -- if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) -- goto out2; -- if(args->argc == 1 && -- args->argv[0][0] == '-') { -- fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " -- "will be ignored\n"); -- } else if (args->argc != 1) { -- int i; -- fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); -- for(i = 1; i < args->argc-1; i++) -- fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); -- fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); -- goto out4; -- } -- -- se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + -- FUSE_BUFFER_HEADER_SIZE; -- -- list_init_req(&se->list); -- list_init_req(&se->interrupts); -- list_init_nreq(&se->notify_list); -- se->notify_ctr = 1; -- fuse_mutex_init(&se->lock); -- -- memcpy(&se->op, op, op_size); -- se->owner = getuid(); -- se->userdata = userdata; -- -- return se; -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata) -+{ -+ struct fuse_session *se; -+ -+ if (sizeof(struct fuse_lowlevel_ops) < op_size) { -+ fuse_log( -+ FUSE_LOG_ERR, -+ "fuse: warning: library too old, some operations may not work\n"); -+ op_size = sizeof(struct fuse_lowlevel_ops); -+ } -+ -+ if (args->argc == 0) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: empty argv passed to fuse_session_new().\n"); -+ return NULL; -+ } -+ -+ se = (struct fuse_session *)calloc(1, sizeof(struct fuse_session)); -+ if (se == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate fuse object\n"); -+ goto out1; -+ } -+ se->fd = -1; -+ se->conn.max_write = UINT_MAX; -+ se->conn.max_readahead = UINT_MAX; -+ -+ /* Parse options */ -+ if (fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) { -+ goto out2; -+ } -+ if (args->argc == 1 && args->argv[0][0] == '-') { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: warning: argv[0] looks like an option, but " -+ "will be ignored\n"); -+ } else if (args->argc != 1) { -+ int i; -+ fuse_log(FUSE_LOG_ERR, "fuse: unknown option(s): `"); -+ for (i = 1; i < args->argc - 1; i++) { -+ fuse_log(FUSE_LOG_ERR, "%s ", args->argv[i]); -+ } -+ fuse_log(FUSE_LOG_ERR, "%s'\n", args->argv[i]); -+ goto out4; -+ } -+ -+ se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; -+ -+ list_init_req(&se->list); -+ list_init_req(&se->interrupts); -+ list_init_nreq(&se->notify_list); -+ se->notify_ctr = 1; -+ fuse_mutex_init(&se->lock); -+ -+ memcpy(&se->op, op, op_size); -+ se->owner = getuid(); -+ se->userdata = userdata; -+ -+ return se; - - out4: -- fuse_opt_free_args(args); -+ fuse_opt_free_args(args); - out2: -- free(se); -+ free(se); - out1: -- return NULL; -+ return NULL; - } - - int fuse_session_mount(struct fuse_session *se, const char *mountpoint) - { -- int fd; -- -- /* -- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -- * would ensue. -- */ -- do { -- fd = open("/dev/null", O_RDWR); -- if (fd > 2) -- close(fd); -- } while (fd >= 0 && fd <= 2); -- -- /* -- * To allow FUSE daemons to run without privileges, the caller may open -- * /dev/fuse before launching the file system and pass on the file -- * descriptor by specifying /dev/fd/N as the mount point. Note that the -- * parent process takes care of performing the mount in this case. -- */ -- fd = fuse_mnt_parse_fuse_fd(mountpoint); -- if (fd != -1) { -- if (fcntl(fd, F_GETFD) == -1) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: Invalid file descriptor /dev/fd/%u\n", -- fd); -- return -1; -- } -- se->fd = fd; -- return 0; -- } -- -- /* Open channel */ -- fd = fuse_kern_mount(mountpoint, se->mo); -- if (fd == -1) -- return -1; -- se->fd = fd; -- -- /* Save mountpoint */ -- se->mountpoint = strdup(mountpoint); -- if (se->mountpoint == NULL) -- goto error_out; -- -- return 0; -+ int fd; -+ -+ /* -+ * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -+ * would ensue. -+ */ -+ do { -+ fd = open("/dev/null", O_RDWR); -+ if (fd > 2) { -+ close(fd); -+ } -+ } while (fd >= 0 && fd <= 2); -+ -+ /* -+ * To allow FUSE daemons to run without privileges, the caller may open -+ * /dev/fuse before launching the file system and pass on the file -+ * descriptor by specifying /dev/fd/N as the mount point. Note that the -+ * parent process takes care of performing the mount in this case. -+ */ -+ fd = fuse_mnt_parse_fuse_fd(mountpoint); -+ if (fd != -1) { -+ if (fcntl(fd, F_GETFD) == -1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", -+ fd); -+ return -1; -+ } -+ se->fd = fd; -+ return 0; -+ } -+ -+ /* Open channel */ -+ fd = fuse_kern_mount(mountpoint, se->mo); -+ if (fd == -1) { -+ return -1; -+ } -+ se->fd = fd; -+ -+ /* Save mountpoint */ -+ se->mountpoint = strdup(mountpoint); -+ if (se->mountpoint == NULL) { -+ goto error_out; -+ } -+ -+ return 0; - - error_out: -- fuse_kern_unmount(mountpoint, fd); -- return -1; -+ fuse_kern_unmount(mountpoint, fd); -+ return -1; - } - - int fuse_session_fd(struct fuse_session *se) - { -- return se->fd; -+ return se->fd; - } - - void fuse_session_unmount(struct fuse_session *se) -@@ -2384,61 +2519,66 @@ void fuse_session_unmount(struct fuse_session *se) - #ifdef linux - int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) - { -- char *buf; -- size_t bufsize = 1024; -- char path[128]; -- int ret; -- int fd; -- unsigned long pid = req->ctx.pid; -- char *s; -+ char *buf; -+ size_t bufsize = 1024; -+ char path[128]; -+ int ret; -+ int fd; -+ unsigned long pid = req->ctx.pid; -+ char *s; - -- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); -+ sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); - - retry: -- buf = malloc(bufsize); -- if (buf == NULL) -- return -ENOMEM; -- -- ret = -EIO; -- fd = open(path, O_RDONLY); -- if (fd == -1) -- goto out_free; -- -- ret = read(fd, buf, bufsize); -- close(fd); -- if (ret < 0) { -- ret = -EIO; -- goto out_free; -- } -- -- if ((size_t)ret == bufsize) { -- free(buf); -- bufsize *= 4; -- goto retry; -- } -- -- ret = -EIO; -- s = strstr(buf, "\nGroups:"); -- if (s == NULL) -- goto out_free; -- -- s += 8; -- ret = 0; -- while (1) { -- char *end; -- unsigned long val = strtoul(s, &end, 0); -- if (end == s) -- break; -- -- s = end; -- if (ret < size) -- list[ret] = val; -- ret++; -- } -+ buf = malloc(bufsize); -+ if (buf == NULL) { -+ return -ENOMEM; -+ } -+ -+ ret = -EIO; -+ fd = open(path, O_RDONLY); -+ if (fd == -1) { -+ goto out_free; -+ } -+ -+ ret = read(fd, buf, bufsize); -+ close(fd); -+ if (ret < 0) { -+ ret = -EIO; -+ goto out_free; -+ } -+ -+ if ((size_t)ret == bufsize) { -+ free(buf); -+ bufsize *= 4; -+ goto retry; -+ } -+ -+ ret = -EIO; -+ s = strstr(buf, "\nGroups:"); -+ if (s == NULL) { -+ goto out_free; -+ } -+ -+ s += 8; -+ ret = 0; -+ while (1) { -+ char *end; -+ unsigned long val = strtoul(s, &end, 0); -+ if (end == s) { -+ break; -+ } -+ -+ s = end; -+ if (ret < size) { -+ list[ret] = val; -+ } -+ ret++; -+ } - - out_free: -- free(buf); -- return ret; -+ free(buf); -+ return ret; - } - #else /* linux */ - /* -@@ -2446,23 +2586,25 @@ out_free: - */ - int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) - { -- (void) req; (void) size; (void) list; -- return -ENOSYS; -+ (void)req; -+ (void)size; -+ (void)list; -+ return -ENOSYS; - } - #endif - - void fuse_session_exit(struct fuse_session *se) - { -- se->exited = 1; -+ se->exited = 1; - } - - void fuse_session_reset(struct fuse_session *se) - { -- se->exited = 0; -- se->error = 0; -+ se->exited = 0; -+ se->error = 0; - } - - int fuse_session_exited(struct fuse_session *se) - { -- return se->exited; -+ return se->exited; - } -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 6b1adfc..adb9054 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1,15 +1,16 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_LOWLEVEL_H_ - #define FUSE_LOWLEVEL_H_ - --/** @file -+/** -+ * @file - * - * Low level API - * -@@ -24,16 +25,16 @@ - - #include "fuse_common.h" - --#include - #include --#include - #include - #include -+#include - #include -+#include - --/* ----------------------------------------------------------- * -- * Miscellaneous definitions * -- * ----------------------------------------------------------- */ -+/* -+ * Miscellaneous definitions -+ */ - - /** The node ID of the root inode */ - #define FUSE_ROOT_ID 1 -@@ -53,47 +54,54 @@ struct fuse_session; - - /** Directory entry parameters supplied to fuse_reply_entry() */ - struct fuse_entry_param { -- /** Unique inode number -- * -- * In lookup, zero means negative entry (from version 2.5) -- * Returning ENOENT also means negative entry, but by setting zero -- * ino the kernel may cache negative entries for entry_timeout -- * seconds. -- */ -- fuse_ino_t ino; -- -- /** Generation number for this entry. -- * -- * If the file system will be exported over NFS, the -- * ino/generation pairs need to be unique over the file -- * system's lifetime (rather than just the mount time). So if -- * the file system reuses an inode after it has been deleted, -- * it must assign a new, previously unused generation number -- * to the inode at the same time. -- * -- */ -- uint64_t generation; -- -- /** Inode attributes. -- * -- * Even if attr_timeout == 0, attr must be correct. For example, -- * for open(), FUSE uses attr.st_size from lookup() to determine -- * how many bytes to request. If this value is not correct, -- * incorrect data will be returned. -- */ -- struct stat attr; -- -- /** Validity timeout (in seconds) for inode attributes. If -- attributes only change as a result of requests that come -- through the kernel, this should be set to a very large -- value. */ -- double attr_timeout; -- -- /** Validity timeout (in seconds) for the name. If directory -- entries are changed/deleted only as a result of requests -- that come through the kernel, this should be set to a very -- large value. */ -- double entry_timeout; -+ /** -+ * Unique inode number -+ * -+ * In lookup, zero means negative entry (from version 2.5) -+ * Returning ENOENT also means negative entry, but by setting zero -+ * ino the kernel may cache negative entries for entry_timeout -+ * seconds. -+ */ -+ fuse_ino_t ino; -+ -+ /** -+ * Generation number for this entry. -+ * -+ * If the file system will be exported over NFS, the -+ * ino/generation pairs need to be unique over the file -+ * system's lifetime (rather than just the mount time). So if -+ * the file system reuses an inode after it has been deleted, -+ * it must assign a new, previously unused generation number -+ * to the inode at the same time. -+ * -+ */ -+ uint64_t generation; -+ -+ /** -+ * Inode attributes. -+ * -+ * Even if attr_timeout == 0, attr must be correct. For example, -+ * for open(), FUSE uses attr.st_size from lookup() to determine -+ * how many bytes to request. If this value is not correct, -+ * incorrect data will be returned. -+ */ -+ struct stat attr; -+ -+ /** -+ * Validity timeout (in seconds) for inode attributes. If -+ * attributes only change as a result of requests that come -+ * through the kernel, this should be set to a very large -+ * value. -+ */ -+ double attr_timeout; -+ -+ /** -+ * Validity timeout (in seconds) for the name. If directory -+ * entries are changed/deleted only as a result of requests -+ * that come through the kernel, this should be set to a very -+ * large value. -+ */ -+ double entry_timeout; - }; - - /** -@@ -105,38 +113,38 @@ struct fuse_entry_param { - * there is no valid uid/pid/gid that could be reported. - */ - struct fuse_ctx { -- /** User ID of the calling process */ -- uid_t uid; -+ /** User ID of the calling process */ -+ uid_t uid; - -- /** Group ID of the calling process */ -- gid_t gid; -+ /** Group ID of the calling process */ -+ gid_t gid; - -- /** Thread ID of the calling process */ -- pid_t pid; -+ /** Thread ID of the calling process */ -+ pid_t pid; - -- /** Umask of the calling process */ -- mode_t umask; -+ /** Umask of the calling process */ -+ mode_t umask; - }; - - struct fuse_forget_data { -- fuse_ino_t ino; -- uint64_t nlookup; -+ fuse_ino_t ino; -+ uint64_t nlookup; - }; - - /* 'to_set' flags in setattr */ --#define FUSE_SET_ATTR_MODE (1 << 0) --#define FUSE_SET_ATTR_UID (1 << 1) --#define FUSE_SET_ATTR_GID (1 << 2) --#define FUSE_SET_ATTR_SIZE (1 << 3) --#define FUSE_SET_ATTR_ATIME (1 << 4) --#define FUSE_SET_ATTR_MTIME (1 << 5) --#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) --#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) --#define FUSE_SET_ATTR_CTIME (1 << 10) -- --/* ----------------------------------------------------------- * -- * Request methods and replies * -- * ----------------------------------------------------------- */ -+#define FUSE_SET_ATTR_MODE (1 << 0) -+#define FUSE_SET_ATTR_UID (1 << 1) -+#define FUSE_SET_ATTR_GID (1 << 2) -+#define FUSE_SET_ATTR_SIZE (1 << 3) -+#define FUSE_SET_ATTR_ATIME (1 << 4) -+#define FUSE_SET_ATTR_MTIME (1 << 5) -+#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) -+#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) -+#define FUSE_SET_ATTR_CTIME (1 << 10) -+ -+/* -+ * Request methods and replies -+ */ - - /** - * Low level filesystem operations -@@ -166,1075 +174,1069 @@ struct fuse_forget_data { - * this file will not be called. - */ - struct fuse_lowlevel_ops { -- /** -- * Initialize filesystem -- * -- * This function is called when libfuse establishes -- * communication with the FUSE kernel module. The file system -- * should use this module to inspect and/or modify the -- * connection parameters provided in the `conn` structure. -- * -- * Note that some parameters may be overwritten by options -- * passed to fuse_session_new() which take precedence over the -- * values set in this handler. -- * -- * There's no reply to this function -- * -- * @param userdata the user data passed to fuse_session_new() -- */ -- void (*init) (void *userdata, struct fuse_conn_info *conn); -- -- /** -- * Clean up filesystem. -- * -- * Called on filesystem exit. When this method is called, the -- * connection to the kernel may be gone already, so that eg. calls -- * to fuse_lowlevel_notify_* will fail. -- * -- * There's no reply to this function -- * -- * @param userdata the user data passed to fuse_session_new() -- */ -- void (*destroy) (void *userdata); -- -- /** -- * Look up a directory entry by name and get its attributes. -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name the name to look up -- */ -- void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); -- -- /** -- * Forget about an inode -- * -- * This function is called when the kernel removes an inode -- * from its internal caches. -- * -- * The inode's lookup count increases by one for every call to -- * fuse_reply_entry and fuse_reply_create. The nlookup parameter -- * indicates by how much the lookup count should be decreased. -- * -- * Inodes with a non-zero lookup count may receive request from -- * the kernel even after calls to unlink, rmdir or (when -- * overwriting an existing file) rename. Filesystems must handle -- * such requests properly and it is recommended to defer removal -- * of the inode until the lookup count reaches zero. Calls to -- * unlink, rmdir or rename will be followed closely by forget -- * unless the file or directory is open, in which case the -- * kernel issues forget only after the release or releasedir -- * calls. -- * -- * Note that if a file system will be exported over NFS the -- * inodes lifetime must extend even beyond forget. See the -- * generation field in struct fuse_entry_param above. -- * -- * On unmount the lookup count for all inodes implicitly drops -- * to zero. It is not guaranteed that the file system will -- * receive corresponding forget messages for the affected -- * inodes. -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- * @param ino the inode number -- * @param nlookup the number of lookups to forget -- */ -- void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); -- -- /** -- * Get file attributes. -- * -- * If writeback caching is enabled, the kernel may have a -- * better idea of a file's length than the FUSE file system -- * (eg if there has been a write that extended the file size, -- * but that has not yet been passed to the filesystem.n -- * -- * In this case, the st_size value provided by the file system -- * will be ignored. -- * -- * Valid replies: -- * fuse_reply_attr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi for future use, currently always NULL -- */ -- void (*getattr) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Set file attributes -- * -- * In the 'attr' argument only members indicated by the 'to_set' -- * bitmask contain valid values. Other members contain undefined -- * values. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits if the file -- * size or owner is being changed. -- * -- * If the setattr was invoked from the ftruncate() system call -- * under Linux kernel versions 2.6.15 or later, the fi->fh will -- * contain the value set by the open method or will be undefined -- * if the open method didn't set any value. Otherwise (not -- * ftruncate call, or kernel version earlier than 2.6.15) the fi -- * parameter will be NULL. -- * -- * Valid replies: -- * fuse_reply_attr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param attr the attributes -- * @param to_set bit mask of attributes which should be set -- * @param fi file information, or NULL -- */ -- void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, -- int to_set, struct fuse_file_info *fi); -- -- /** -- * Read symbolic link -- * -- * Valid replies: -- * fuse_reply_readlink -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- */ -- void (*readlink) (fuse_req_t req, fuse_ino_t ino); -- -- /** -- * Create file node -- * -- * Create a regular file, character device, block device, fifo or -- * socket node. -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to create -- * @param mode file type and mode with which to create the new file -- * @param rdev the device number (only valid if created file is a device) -- */ -- void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode, dev_t rdev); -- -- /** -- * Create a directory -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to create -- * @param mode with which to create the new file -- */ -- void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode); -- -- /** -- * Remove a file -- * -- * If the file's inode's lookup count is non-zero, the file -- * system is expected to postpone any removal of the inode -- * until the lookup count reaches zero (see description of the -- * forget function). -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to remove -- */ -- void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); -- -- /** -- * Remove a directory -- * -- * If the directory's inode's lookup count is non-zero, the -- * file system is expected to postpone any removal of the -- * inode until the lookup count reaches zero (see description -- * of the forget function). -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to remove -- */ -- void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); -- -- /** -- * Create a symbolic link -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param link the contents of the symbolic link -- * @param parent inode number of the parent directory -- * @param name to create -- */ -- void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, -- const char *name); -- -- /** Rename a file -- * -- * If the target exists it should be atomically replaced. If -- * the target's inode's lookup count is non-zero, the file -- * system is expected to postpone any removal of the inode -- * until the lookup count reaches zero (see description of the -- * forget function). -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EINVAL, i.e. all -- * future bmap requests will fail with EINVAL without being -- * send to the filesystem process. -- * -- * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -- * RENAME_NOREPLACE is specified, the filesystem must not -- * overwrite *newname* if it exists and return an error -- * instead. If `RENAME_EXCHANGE` is specified, the filesystem -- * must atomically exchange the two files, i.e. both must -- * exist and neither may be deleted. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the old parent directory -- * @param name old name -- * @param newparent inode number of the new parent directory -- * @param newname new name -- */ -- void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, -- fuse_ino_t newparent, const char *newname, -- unsigned int flags); -- -- /** -- * Create a hard link -- * -- * Valid replies: -- * fuse_reply_entry -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the old inode number -- * @param newparent inode number of the new parent directory -- * @param newname new name to create -- */ -- void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, -- const char *newname); -- -- /** -- * Open a file -- * -- * Open flags are available in fi->flags. The following rules -- * apply. -- * -- * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -- * filtered out / handled by the kernel. -- * -- * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used -- * by the filesystem to check if the operation is -- * permitted. If the ``-o default_permissions`` mount -- * option is given, this check is already done by the -- * kernel before calling open() and may thus be omitted by -- * the filesystem. -- * -- * - When writeback caching is enabled, the kernel may send -- * read requests even for files opened with O_WRONLY. The -- * filesystem should be prepared to handle this. -- * -- * - When writeback caching is disabled, the filesystem is -- * expected to properly handle the O_APPEND flag and ensure -- * that each write is appending to the end of the file. -- * -- * - When writeback caching is enabled, the kernel will -- * handle O_APPEND. However, unless all changes to the file -- * come through the kernel this will not work reliably. The -- * filesystem should thus either ignore the O_APPEND flag -- * (and let the kernel handle it), or return an error -- * (indicating that reliably O_APPEND is not available). -- * -- * Filesystem may store an arbitrary file handle (pointer, -- * index, etc) in fi->fh, and use this in other all other file -- * operations (read, write, flush, release, fsync). -- * -- * Filesystem may also implement stateless file I/O and not store -- * anything in fi->fh. -- * -- * There are also some flags (direct_io, keep_cache) which the -- * filesystem may set in fi, to change the way the file is opened. -- * See fuse_file_info structure in for more details. -- * -- * If this request is answered with an error code of ENOSYS -- * and FUSE_CAP_NO_OPEN_SUPPORT is set in -- * `fuse_conn_info.capable`, this is treated as success and -- * future calls to open and release will also succeed without being -- * sent to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_open -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*open) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Read data -- * -- * Read should send exactly the number of bytes requested except -- * on EOF or error, otherwise the rest of the data will be -- * substituted with zeroes. An exception to this is when the file -- * has been opened in 'direct_io' mode, in which case the return -- * value of the read system call will reflect the return value of -- * this operation. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_iov -- * fuse_reply_data -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size number of bytes to read -- * @param off offset to read from -- * @param fi file information -- */ -- void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Write data -- * -- * Write should return exactly the number of bytes requested -- * except on error. An exception to this is when the file has -- * been opened in 'direct_io' mode, in which case the return value -- * of the write system call will reflect the return value of this -- * operation. -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * -- * Valid replies: -- * fuse_reply_write -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param buf data to write -- * @param size number of bytes to write -- * @param off offset to write to -- * @param fi file information -- */ -- void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, -- size_t size, off_t off, struct fuse_file_info *fi); -- -- /** -- * Flush method -- * -- * This is called on each close() of the opened file. -- * -- * Since file descriptors can be duplicated (dup, dup2, fork), for -- * one open call there may be many flush calls. -- * -- * Filesystems shouldn't assume that flush will always be called -- * after some writes, or that if will be called at all. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * -- * NOTE: the name of the method is misleading, since (unlike -- * fsync) the filesystem is not forced to flush pending writes. -- * One reason to flush data is if the filesystem wants to return -- * write errors during close. However, such use is non-portable -- * because POSIX does not require [close] to wait for delayed I/O to -- * complete. -- * -- * If the filesystem supports file locking operations (setlk, -- * getlk) it should remove all locks belonging to 'fi->owner'. -- * -- * If this request is answered with an error code of ENOSYS, -- * this is treated as success and future calls to flush() will -- * succeed automatically without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * -- * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -- */ -- void (*flush) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Release an open file -- * -- * Release is called when there are no more references to an open -- * file: all file descriptors are closed and all memory mappings -- * are unmapped. -- * -- * For every open call there will be exactly one release call (unless -- * the filesystem is force-unmounted). -- * -- * The filesystem may reply with an error, but error values are -- * not returned to close() or munmap() which triggered the -- * release. -- * -- * fi->fh will contain the value set by the open method, or will -- * be undefined if the open method didn't set any value. -- * fi->flags will contain the same flags as for open. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*release) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Synchronize file contents -- * -- * If the datasync parameter is non-zero, then only the user data -- * should be flushed, not the meta data. -- * -- * If this request is answered with an error code of ENOSYS, -- * this is treated as success and future calls to fsync() will -- * succeed automatically without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param datasync flag indicating if only data should be flushed -- * @param fi file information -- */ -- void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi); -- -- /** -- * Open a directory -- * -- * Filesystem may store an arbitrary file handle (pointer, index, -- * etc) in fi->fh, and use this in other all other directory -- * stream operations (readdir, releasedir, fsyncdir). -- * -- * If this request is answered with an error code of ENOSYS and -- * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, -- * this is treated as success and future calls to opendir and -- * releasedir will also succeed without being sent to the filesystem -- * process. In addition, the kernel will cache readdir results -- * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. -- * -- * Valid replies: -- * fuse_reply_open -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*opendir) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Read directory -- * -- * Send a buffer filled using fuse_add_direntry(), with size not -- * exceeding the requested size. Send an empty buffer on end of -- * stream. -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * Returning a directory entry from readdir() does not affect -- * its lookup count. -- * -- * If off_t is non-zero, then it will correspond to one of the off_t -- * values that was previously returned by readdir() for the same -- * directory handle. In this case, readdir() should skip over entries -- * coming before the position defined by the off_t value. If entries -- * are added or removed while the directory handle is open, they filesystem -- * may still include the entries that have been removed, and may not -- * report the entries that have been created. However, addition or -- * removal of entries must never cause readdir() to skip over unrelated -- * entries or to report them more than once. This means -- * that off_t can not be a simple index that enumerates the entries -- * that have been returned but must contain sufficient information to -- * uniquely determine the next directory entry to return even when the -- * set of entries is changing. -- * -- * The function does not have to report the '.' and '..' -- * entries, but is allowed to do so. Note that, if readdir does -- * not return '.' or '..', they will not be implicitly returned, -- * and this behavior is observable by the caller. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size maximum number of bytes to send -- * @param off offset to continue reading the directory stream -- * @param fi file information -- */ -- void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Release an open directory -- * -- * For every opendir call there will be exactly one releasedir -- * call (unless the filesystem is force-unmounted). -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- */ -- void (*releasedir) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi); -- -- /** -- * Synchronize directory contents -- * -- * If the datasync parameter is non-zero, then only the directory -- * contents should be flushed, not the meta data. -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * If this request is answered with an error code of ENOSYS, -- * this is treated as success and future calls to fsyncdir() will -- * succeed automatically without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param datasync flag indicating if only data should be flushed -- * @param fi file information -- */ -- void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi); -- -- /** -- * Get file system statistics -- * -- * Valid replies: -- * fuse_reply_statfs -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number, zero means "undefined" -- */ -- void (*statfs) (fuse_req_t req, fuse_ino_t ino); -- -- /** -- * Set an extended attribute -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future setxattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- */ -- void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -- const char *value, size_t size, int flags); -- -- /** -- * Get an extended attribute -- * -- * If size is zero, the size of the value should be sent with -- * fuse_reply_xattr. -- * -- * If the size is non-zero, and the value fits in the buffer, the -- * value should be sent with fuse_reply_buf. -- * -- * If the size is too small for the value, the ERANGE error should -- * be sent. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future getxattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_xattr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param name of the extended attribute -- * @param size maximum size of the value to send -- */ -- void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -- size_t size); -- -- /** -- * List extended attribute names -- * -- * If size is zero, the total size of the attribute list should be -- * sent with fuse_reply_xattr. -- * -- * If the size is non-zero, and the null character separated -- * attribute list fits in the buffer, the list should be sent with -- * fuse_reply_buf. -- * -- * If the size is too small for the list, the ERANGE error should -- * be sent. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future listxattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_xattr -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size maximum size of the list to send -- */ -- void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); -- -- /** -- * Remove an extended attribute -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future removexattr() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param name of the extended attribute -- */ -- void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); -- -- /** -- * Check file access permissions -- * -- * This will be called for the access() and chdir() system -- * calls. If the 'default_permissions' mount option is given, -- * this method is not called. -- * -- * This method is not called under Linux kernel versions 2.4.x -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent success, i.e. this and all future access() -- * requests will succeed without being send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param mask requested access mode -- */ -- void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); -- -- /** -- * Create and open a file -- * -- * If the file does not exist, first create it with the specified -- * mode, and then open it. -- * -- * See the description of the open handler for more -- * information. -- * -- * If this method is not implemented or under Linux kernel -- * versions earlier than 2.6.15, the mknod() and open() methods -- * will be called instead. -- * -- * If this request is answered with an error code of ENOSYS, the handler -- * is treated as not implemented (i.e., for this and future requests the -- * mknod() and open() handlers will be called instead). -- * -- * Valid replies: -- * fuse_reply_create -- * fuse_reply_err -- * -- * @param req request handle -- * @param parent inode number of the parent directory -- * @param name to create -- * @param mode file type and mode with which to create the new file -- * @param fi file information -- */ -- void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode, struct fuse_file_info *fi); -- -- /** -- * Test for a POSIX file lock -- * -- * Valid replies: -- * fuse_reply_lock -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param lock the region/type to test -- */ -- void (*getlk) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi, struct flock *lock); -- -- /** -- * Acquire, modify or release a POSIX file lock -- * -- * For POSIX threads (NPTL) there's a 1-1 relation between pid and -- * owner, but otherwise this is not always the case. For checking -- * lock ownership, 'fi->owner' must be used. The l_pid field in -- * 'struct flock' should only be used to fill in this field in -- * getlk(). -- * -- * Note: if the locking methods are not implemented, the kernel -- * will still allow file locking to work locally. Hence these are -- * only interesting for network filesystems and similar. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param lock the region/type to set -- * @param sleep locking operation may sleep -- */ -- void (*setlk) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi, -- struct flock *lock, int sleep); -- -- /** -- * Map block index within file to block index within device -- * -- * Note: This makes sense only for block device backed filesystems -- * mounted with the 'blkdev' option -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure, i.e. all future bmap() requests will -- * fail with the same error code without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_bmap -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param blocksize unit of block index -- * @param idx block index within file -- */ -- void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, -- uint64_t idx); -- -- /** -- * Ioctl -- * -- * Note: For unrestricted ioctls (not allowed for FUSE -- * servers), data in and out areas can be discovered by giving -- * iovs and setting FUSE_IOCTL_RETRY in *flags*. For -- * restricted ioctls, kernel prepares in/out data area -- * according to the information encoded in cmd. -- * -- * Valid replies: -- * fuse_reply_ioctl_retry -- * fuse_reply_ioctl -- * fuse_reply_ioctl_iov -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param cmd ioctl command -- * @param arg ioctl argument -- * @param fi file information -- * @param flags for FUSE_IOCTL_* flags -- * @param in_buf data fetched from the caller -- * @param in_bufsz number of fetched bytes -- * @param out_bufsz maximum size of output data -- * -- * Note : the unsigned long request submitted by the application -- * is truncated to 32 bits. -- */ -- void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, -- void *arg, struct fuse_file_info *fi, unsigned flags, -- const void *in_buf, size_t in_bufsz, size_t out_bufsz); -- -- /** -- * Poll for IO readiness -- * -- * Note: If ph is non-NULL, the client should notify -- * when IO readiness events occur by calling -- * fuse_lowlevel_notify_poll() with the specified ph. -- * -- * Regardless of the number of times poll with a non-NULL ph -- * is received, single notification is enough to clear all. -- * Notifying more times incurs overhead but doesn't harm -- * correctness. -- * -- * The callee is responsible for destroying ph with -- * fuse_pollhandle_destroy() when no longer in use. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as success (with a kernel-defined default poll-mask) and -- * future calls to pull() will succeed the same way without being send -- * to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_poll -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param ph poll handle to be used for notification -- */ -- void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -- struct fuse_pollhandle *ph); -- -- /** -- * Write data made available in a buffer -- * -- * This is a more generic version of the ->write() method. If -- * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the -- * kernel supports splicing from the fuse device, then the -- * data will be made available in pipe for supporting zero -- * copy data transfer. -- * -- * buf->count is guaranteed to be one (and thus buf->idx is -- * always zero). The write_buf handler must ensure that -- * bufv->off is correctly updated (reflecting the number of -- * bytes read from bufv->buf[0]). -- * -- * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -- * expected to reset the setuid and setgid bits. -- * -- * Valid replies: -- * fuse_reply_write -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param bufv buffer containing the data -- * @param off offset to write to -- * @param fi file information -- */ -- void (*write_buf) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_bufvec *bufv, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Callback function for the retrieve request -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -- * @param bufv the buffer containing the returned data -- */ -- void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv); -- -- /** -- * Forget about multiple inodes -- * -- * See description of the forget function for more -- * information. -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- */ -- void (*forget_multi) (fuse_req_t req, size_t count, -- struct fuse_forget_data *forgets); -- -- /** -- * Acquire, modify or release a BSD file lock -- * -- * Note: if the locking methods are not implemented, the kernel -- * will still allow file locking to work locally. Hence these are -- * only interesting for network filesystems and similar. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param fi file information -- * @param op the locking operation, see flock(2) -- */ -- void (*flock) (fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi, int op); -- -- /** -- * Allocate requested space. If this function returns success then -- * subsequent writes to the specified range shall not fail due to the lack -- * of free space on the file system storage media. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future fallocate() requests will fail with EOPNOTSUPP without being -- * send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param offset starting point for allocated region -- * @param length size of allocated region -- * @param mode determines the operation to be performed on the given range, -- * see fallocate(2) -- */ -- void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, -- off_t offset, off_t length, struct fuse_file_info *fi); -- -- /** -- * Read directory with attributes -- * -- * Send a buffer filled using fuse_add_direntry_plus(), with size not -- * exceeding the requested size. Send an empty buffer on end of -- * stream. -- * -- * fi->fh will contain the value set by the opendir method, or -- * will be undefined if the opendir method didn't set any value. -- * -- * In contrast to readdir() (which does not affect the lookup counts), -- * the lookup count of every entry returned by readdirplus(), except "." -- * and "..", is incremented by one. -- * -- * Valid replies: -- * fuse_reply_buf -- * fuse_reply_data -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param size maximum number of bytes to send -- * @param off offset to continue reading the directory stream -- * @param fi file information -- */ -- void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -- struct fuse_file_info *fi); -- -- /** -- * Copy a range of data from one file to another -- * -- * Performs an optimized copy between two file descriptors without the -- * additional cost of transferring data through the FUSE kernel module -- * to user space (glibc) and then back into the FUSE filesystem again. -- * -- * In case this method is not implemented, glibc falls back to reading -- * data from the source and writing to the destination. Effectively -- * doing an inefficient copy of the data. -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -- * future copy_file_range() requests will fail with EOPNOTSUPP without -- * being send to the filesystem process. -- * -- * Valid replies: -- * fuse_reply_write -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino_in the inode number or the source file -- * @param off_in starting point from were the data should be read -- * @param fi_in file information of the source file -- * @param ino_out the inode number or the destination file -- * @param off_out starting point where the data should be written -- * @param fi_out file information of the destination file -- * @param len maximum size of the data to copy -- * @param flags passed along with the copy_file_range() syscall -- */ -- void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, -- off_t off_in, struct fuse_file_info *fi_in, -- fuse_ino_t ino_out, off_t off_out, -- struct fuse_file_info *fi_out, size_t len, -- int flags); -- -- /** -- * Find next data or hole after the specified offset -- * -- * If this request is answered with an error code of ENOSYS, this is -- * treated as a permanent failure, i.e. all future lseek() requests will -- * fail with the same error code without being send to the filesystem -- * process. -- * -- * Valid replies: -- * fuse_reply_lseek -- * fuse_reply_err -- * -- * @param req request handle -- * @param ino the inode number -- * @param off offset to start search from -- * @param whence either SEEK_DATA or SEEK_HOLE -- * @param fi file information -- */ -- void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -- struct fuse_file_info *fi); -+ /** -+ * Initialize filesystem -+ * -+ * This function is called when libfuse establishes -+ * communication with the FUSE kernel module. The file system -+ * should use this module to inspect and/or modify the -+ * connection parameters provided in the `conn` structure. -+ * -+ * Note that some parameters may be overwritten by options -+ * passed to fuse_session_new() which take precedence over the -+ * values set in this handler. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*init)(void *userdata, struct fuse_conn_info *conn); -+ -+ /** -+ * Clean up filesystem. -+ * -+ * Called on filesystem exit. When this method is called, the -+ * connection to the kernel may be gone already, so that eg. calls -+ * to fuse_lowlevel_notify_* will fail. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*destroy)(void *userdata); -+ -+ /** -+ * Look up a directory entry by name and get its attributes. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name the name to look up -+ */ -+ void (*lookup)(fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Forget about an inode -+ * -+ * This function is called when the kernel removes an inode -+ * from its internal caches. -+ * -+ * The inode's lookup count increases by one for every call to -+ * fuse_reply_entry and fuse_reply_create. The nlookup parameter -+ * indicates by how much the lookup count should be decreased. -+ * -+ * Inodes with a non-zero lookup count may receive request from -+ * the kernel even after calls to unlink, rmdir or (when -+ * overwriting an existing file) rename. Filesystems must handle -+ * such requests properly and it is recommended to defer removal -+ * of the inode until the lookup count reaches zero. Calls to -+ * unlink, rmdir or rename will be followed closely by forget -+ * unless the file or directory is open, in which case the -+ * kernel issues forget only after the release or releasedir -+ * calls. -+ * -+ * Note that if a file system will be exported over NFS the -+ * inodes lifetime must extend even beyond forget. See the -+ * generation field in struct fuse_entry_param above. -+ * -+ * On unmount the lookup count for all inodes implicitly drops -+ * to zero. It is not guaranteed that the file system will -+ * receive corresponding forget messages for the affected -+ * inodes. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param nlookup the number of lookups to forget -+ */ -+ void (*forget)(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); -+ -+ /** -+ * Get file attributes. -+ * -+ * If writeback caching is enabled, the kernel may have a -+ * better idea of a file's length than the FUSE file system -+ * (eg if there has been a write that extended the file size, -+ * but that has not yet been passed to the filesystem.n -+ * -+ * In this case, the st_size value provided by the file system -+ * will be ignored. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi for future use, currently always NULL -+ */ -+ void (*getattr)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Set file attributes -+ * -+ * In the 'attr' argument only members indicated by the 'to_set' -+ * bitmask contain valid values. Other members contain undefined -+ * values. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits if the file -+ * size or owner is being changed. -+ * -+ * If the setattr was invoked from the ftruncate() system call -+ * under Linux kernel versions 2.6.15 or later, the fi->fh will -+ * contain the value set by the open method or will be undefined -+ * if the open method didn't set any value. Otherwise (not -+ * ftruncate call, or kernel version earlier than 2.6.15) the fi -+ * parameter will be NULL. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param attr the attributes -+ * @param to_set bit mask of attributes which should be set -+ * @param fi file information, or NULL -+ */ -+ void (*setattr)(fuse_req_t req, fuse_ino_t ino, struct stat *attr, -+ int to_set, struct fuse_file_info *fi); -+ -+ /** -+ * Read symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_readlink -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ */ -+ void (*readlink)(fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Create file node -+ * -+ * Create a regular file, character device, block device, fifo or -+ * socket node. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param rdev the device number (only valid if created file is a device) -+ */ -+ void (*mknod)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, dev_t rdev); -+ -+ /** -+ * Create a directory -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode with which to create the new file -+ */ -+ void (*mkdir)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode); -+ -+ /** -+ * Remove a file -+ * -+ * If the file's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*unlink)(fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Remove a directory -+ * -+ * If the directory's inode's lookup count is non-zero, the -+ * file system is expected to postpone any removal of the -+ * inode until the lookup count reaches zero (see description -+ * of the forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*rmdir)(fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Create a symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param link the contents of the symbolic link -+ * @param parent inode number of the parent directory -+ * @param name to create -+ */ -+ void (*symlink)(fuse_req_t req, const char *link, fuse_ino_t parent, -+ const char *name); -+ -+ /** -+ * Rename a file -+ * -+ * If the target exists it should be atomically replaced. If -+ * the target's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EINVAL, i.e. all -+ * future bmap requests will fail with EINVAL without being -+ * send to the filesystem process. -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the old parent directory -+ * @param name old name -+ * @param newparent inode number of the new parent directory -+ * @param newname new name -+ */ -+ void (*rename)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags); -+ -+ /** -+ * Create a hard link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the old inode number -+ * @param newparent inode number of the new parent directory -+ * @param newname new name to create -+ */ -+ void (*link)(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, -+ const char *newname); -+ -+ /** -+ * Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used -+ * by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount -+ * option is given, this check is already done by the -+ * kernel before calling open() and may thus be omitted by -+ * the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open and release will also succeed without being -+ * sent to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*open)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Read data -+ * -+ * Read should send exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the file -+ * has been opened in 'direct_io' mode, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_iov -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size number of bytes to read -+ * @param off offset to read from -+ * @param fi file information -+ */ -+ void (*read)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Write data -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the file has -+ * been opened in 'direct_io' mode, in which case the return value -+ * of the write system call will reflect the return value of this -+ * operation. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param buf data to write -+ * @param size number of bytes to write -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write)(fuse_req_t req, fuse_ino_t ino, const char *buf, size_t size, -+ off_t off, struct fuse_file_info *fi); -+ -+ /** -+ * Flush method -+ * -+ * This is called on each close() of the opened file. -+ * -+ * Since file descriptors can be duplicated (dup, dup2, fork), for -+ * one open call there may be many flush calls. -+ * -+ * Filesystems shouldn't assume that flush will always be called -+ * after some writes, or that if will be called at all. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * NOTE: the name of the method is misleading, since (unlike -+ * fsync) the filesystem is not forced to flush pending writes. -+ * One reason to flush data is if the filesystem wants to return -+ * write errors during close. However, such use is non-portable -+ * because POSIX does not require [close] to wait for delayed I/O to -+ * complete. -+ * -+ * If the filesystem supports file locking operations (setlk, -+ * getlk) it should remove all locks belonging to 'fi->owner'. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to flush() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * -+ * [close]: -+ * http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ void (*flush)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open call there will be exactly one release call (unless -+ * the filesystem is force-unmounted). -+ * -+ * The filesystem may reply with an error, but error values are -+ * not returned to close() or munmap() which triggered the -+ * release. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * fi->flags will contain the same flags as for open. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*release)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsync() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsync)(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Open a directory -+ * -+ * Filesystem may store an arbitrary file handle (pointer, index, -+ * etc) in fi->fh, and use this in other all other directory -+ * stream operations (readdir, releasedir, fsyncdir). -+ * -+ * If this request is answered with an error code of ENOSYS and -+ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, -+ * this is treated as success and future calls to opendir and -+ * releasedir will also succeed without being sent to the filesystem -+ * process. In addition, the kernel will cache readdir results -+ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*opendir)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi); -+ -+ /** -+ * Read directory -+ * -+ * Send a buffer filled using fuse_add_direntry(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Returning a directory entry from readdir() does not affect -+ * its lookup count. -+ * -+ * If off_t is non-zero, then it will correspond to one of the off_t -+ * values that was previously returned by readdir() for the same -+ * directory handle. In this case, readdir() should skip over entries -+ * coming before the position defined by the off_t value. If entries -+ * are added or removed while the directory handle is open, they filesystem -+ * may still include the entries that have been removed, and may not -+ * report the entries that have been created. However, addition or -+ * removal of entries must never cause readdir() to skip over unrelated -+ * entries or to report them more than once. This means -+ * that off_t can not be a simple index that enumerates the entries -+ * that have been returned but must contain sufficient information to -+ * uniquely determine the next directory entry to return even when the -+ * set of entries is changing. -+ * -+ * The function does not have to report the '.' and '..' -+ * entries, but is allowed to do so. Note that, if readdir does -+ * not return '.' or '..', they will not be implicitly returned, -+ * and this behavior is observable by the caller. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdir)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Release an open directory -+ * -+ * For every opendir call there will be exactly one releasedir -+ * call (unless the filesystem is force-unmounted). -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*releasedir)(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the directory -+ * contents should be flushed, not the meta data. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsyncdir() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsyncdir)(fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Get file system statistics -+ * -+ * Valid replies: -+ * fuse_reply_statfs -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number, zero means "undefined" -+ */ -+ void (*statfs)(fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Set an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future setxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ */ -+ void (*setxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, -+ const char *value, size_t size, int flags); -+ -+ /** -+ * Get an extended attribute -+ * -+ * If size is zero, the size of the value should be sent with -+ * fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the value fits in the buffer, the -+ * value should be sent with fuse_reply_buf. -+ * -+ * If the size is too small for the value, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future getxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ * @param size maximum size of the value to send -+ */ -+ void (*getxattr)(fuse_req_t req, fuse_ino_t ino, const char *name, -+ size_t size); -+ -+ /** -+ * List extended attribute names -+ * -+ * If size is zero, the total size of the attribute list should be -+ * sent with fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the null character separated -+ * attribute list fits in the buffer, the list should be sent with -+ * fuse_reply_buf. -+ * -+ * If the size is too small for the list, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future listxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum size of the list to send -+ */ -+ void (*listxattr)(fuse_req_t req, fuse_ino_t ino, size_t size); -+ -+ /** -+ * Remove an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future removexattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ */ -+ void (*removexattr)(fuse_req_t req, fuse_ino_t ino, const char *name); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() and chdir() system -+ * calls. If the 'default_permissions' mount option is given, -+ * this method is not called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent success, i.e. this and all future access() -+ * requests will succeed without being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param mask requested access mode -+ */ -+ void (*access)(fuse_req_t req, fuse_ino_t ino, int mask); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * See the description of the open handler for more -+ * information. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ * -+ * If this request is answered with an error code of ENOSYS, the handler -+ * is treated as not implemented (i.e., for this and future requests the -+ * mknod() and open() handlers will be called instead). -+ * -+ * Valid replies: -+ * fuse_reply_create -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param fi file information -+ */ -+ void (*create)(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, struct fuse_file_info *fi); -+ -+ /** -+ * Test for a POSIX file lock -+ * -+ * Valid replies: -+ * fuse_reply_lock -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to test -+ */ -+ void (*getlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock); -+ -+ /** -+ * Acquire, modify or release a POSIX file lock -+ * -+ * For POSIX threads (NPTL) there's a 1-1 relation between pid and -+ * owner, but otherwise this is not always the case. For checking -+ * lock ownership, 'fi->owner' must be used. The l_pid field in -+ * 'struct flock' should only be used to fill in this field in -+ * getlk(). -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to set -+ * @param sleep locking operation may sleep -+ */ -+ void (*setlk)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock, int sleep); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future bmap() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_bmap -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param blocksize unit of block index -+ * @param idx block index within file -+ */ -+ void (*bmap)(fuse_req_t req, fuse_ino_t ino, size_t blocksize, -+ uint64_t idx); -+ -+ /** -+ * Ioctl -+ * -+ * Note: For unrestricted ioctls (not allowed for FUSE -+ * servers), data in and out areas can be discovered by giving -+ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For -+ * restricted ioctls, kernel prepares in/out data area -+ * according to the information encoded in cmd. -+ * -+ * Valid replies: -+ * fuse_reply_ioctl_retry -+ * fuse_reply_ioctl -+ * fuse_reply_ioctl_iov -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param cmd ioctl command -+ * @param arg ioctl argument -+ * @param fi file information -+ * @param flags for FUSE_IOCTL_* flags -+ * @param in_buf data fetched from the caller -+ * @param in_bufsz number of fetched bytes -+ * @param out_bufsz maximum size of output data -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ void (*ioctl)(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg, -+ struct fuse_file_info *fi, unsigned flags, const void *in_buf, -+ size_t in_bufsz, size_t out_bufsz); -+ -+ /** -+ * Poll for IO readiness -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_lowlevel_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as success (with a kernel-defined default poll-mask) and -+ * future calls to pull() will succeed the same way without being send -+ * to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_poll -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param ph poll handle to be used for notification -+ */ -+ void (*poll)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct fuse_pollhandle *ph); -+ -+ /** -+ * Write data made available in a buffer -+ * -+ * This is a more generic version of the ->write() method. If -+ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the -+ * kernel supports splicing from the fuse device, then the -+ * data will be made available in pipe for supporting zero -+ * copy data transfer. -+ * -+ * buf->count is guaranteed to be one (and thus buf->idx is -+ * always zero). The write_buf handler must ensure that -+ * bufv->off is correctly updated (reflecting the number of -+ * bytes read from bufv->buf[0]). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param bufv buffer containing the data -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write_buf)(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *bufv, -+ off_t off, struct fuse_file_info *fi); -+ -+ /** -+ * Callback function for the retrieve request -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -+ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -+ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -+ * @param bufv the buffer containing the returned data -+ */ -+ void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv); -+ -+ /** -+ * Forget about multiple inodes -+ * -+ * See description of the forget function for more -+ * information. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ */ -+ void (*forget_multi)(fuse_req_t req, size_t count, -+ struct fuse_forget_data *forgets); -+ -+ /** -+ * Acquire, modify or release a BSD file lock -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param op the locking operation, see flock(2) -+ */ -+ void (*flock)(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ int op); -+ -+ /** -+ * Allocate requested space. If this function returns success then -+ * subsequent writes to the specified range shall not fail due to the lack -+ * of free space on the file system storage media. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future fallocate() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param offset starting point for allocated region -+ * @param length size of allocated region -+ * @param mode determines the operation to be performed on the given range, -+ * see fallocate(2) -+ */ -+ void (*fallocate)(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, -+ off_t length, struct fuse_file_info *fi); -+ -+ /** -+ * Read directory with attributes -+ * -+ * Send a buffer filled using fuse_add_direntry_plus(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * In contrast to readdir() (which does not affect the lookup counts), -+ * the lookup count of every entry returned by readdirplus(), except "." -+ * and "..", is incremented by one. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdirplus)(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future copy_file_range() requests will fail with EOPNOTSUPP without -+ * being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino_in the inode number or the source file -+ * @param off_in starting point from were the data should be read -+ * @param fi_in file information of the source file -+ * @param ino_out the inode number or the destination file -+ * @param off_out starting point where the data should be written -+ * @param fi_out file information of the destination file -+ * @param len maximum size of the data to copy -+ * @param flags passed along with the copy_file_range() syscall -+ */ -+ void (*copy_file_range)(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, -+ struct fuse_file_info *fi_in, fuse_ino_t ino_out, -+ off_t off_out, struct fuse_file_info *fi_out, -+ size_t len, int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future lseek() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_lseek -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param off offset to start search from -+ * @param whence either SEEK_DATA or SEEK_HOLE -+ * @param fi file information -+ */ -+ void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -+ struct fuse_file_info *fi); - }; - - /** -@@ -1305,7 +1307,7 @@ int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -- const struct fuse_file_info *fi); -+ const struct fuse_file_info *fi); - - /** - * Reply with attributes -@@ -1315,11 +1317,11 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, - * - * @param req request handle - * @param attr the attributes -- * @param attr_timeout validity timeout (in seconds) for the attributes -+ * @param attr_timeout validity timeout (in seconds) for the attributes - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -- double attr_timeout); -+ double attr_timeout); - - /** - * Reply with the contents of a symbolic link -@@ -1417,7 +1419,7 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); - * @return zero for success, -errno for failure to send reply - */ - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+ enum fuse_buf_copy_flags flags); - - /** - * Reply with data vector -@@ -1480,9 +1482,9 @@ int fuse_reply_lock(fuse_req_t req, const struct flock *lock); - */ - int fuse_reply_bmap(fuse_req_t req, uint64_t idx); - --/* ----------------------------------------------------------- * -- * Filling a buffer in readdir * -- * ----------------------------------------------------------- */ -+/* -+ * Filling a buffer in readdir -+ */ - - /** - * Add a directory entry to the buffer -@@ -1512,8 +1514,7 @@ int fuse_reply_bmap(fuse_req_t req, uint64_t idx); - * @return the space needed for the entry - */ - size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, const struct stat *stbuf, -- off_t off); -+ const char *name, const struct stat *stbuf, off_t off); - - /** - * Add a directory entry to the buffer with the attributes -@@ -1529,8 +1530,8 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, - * @return the space needed for the entry - */ - size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -- const char *name, -- const struct fuse_entry_param *e, off_t off); -+ const char *name, -+ const struct fuse_entry_param *e, off_t off); - - /** - * Reply to ask for data fetch and output buffer preparation. ioctl -@@ -1547,9 +1548,9 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, - * @param out_count number of entries in out_iov - * @return zero for success, -errno for failure to send reply - */ --int fuse_reply_ioctl_retry(fuse_req_t req, -- const struct iovec *in_iov, size_t in_count, -- const struct iovec *out_iov, size_t out_count); -+int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, -+ size_t in_count, const struct iovec *out_iov, -+ size_t out_count); - - /** - * Reply to finish ioctl -@@ -1576,7 +1577,7 @@ int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); - * @param count the size of vector - */ - int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -- int count); -+ int count); - - /** - * Reply with poll result event mask -@@ -1598,9 +1599,9 @@ int fuse_reply_poll(fuse_req_t req, unsigned revents); - */ - int fuse_reply_lseek(fuse_req_t req, off_t off); - --/* ----------------------------------------------------------- * -- * Notification * -- * ----------------------------------------------------------- */ -+/* -+ * Notification -+ */ - - /** - * Notify IO readiness event -@@ -1635,7 +1636,7 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -- off_t off, off_t len); -+ off_t off, off_t len); - - /** - * Notify to invalidate parent attributes and the dentry matching -@@ -1663,7 +1664,7 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -- const char *name, size_t namelen); -+ const char *name, size_t namelen); - - /** - * This function behaves like fuse_lowlevel_notify_inval_entry() with -@@ -1693,9 +1694,9 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - * @param namelen strlen() of file name - * @return zero for success, -errno for failure - */ --int fuse_lowlevel_notify_delete(struct fuse_session *se, -- fuse_ino_t parent, fuse_ino_t child, -- const char *name, size_t namelen); -+int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, -+ fuse_ino_t child, const char *name, -+ size_t namelen); - - /** - * Store data to the kernel buffers -@@ -1723,8 +1724,8 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags); - /** - * Retrieve data from the kernel buffers - * -@@ -1755,12 +1756,12 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie); -+ size_t size, off_t offset, void *cookie); - - --/* ----------------------------------------------------------- * -- * Utility functions * -- * ----------------------------------------------------------- */ -+/* -+ * Utility functions -+ */ - - /** - * Get the userdata from the request -@@ -1822,7 +1823,7 @@ typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); - * @param data user data passed to the callback function - */ - void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -- void *data); -+ void *data); - - /** - * Check if a request has already been interrupted -@@ -1833,9 +1834,9 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, - int fuse_req_interrupted(fuse_req_t req); - - --/* ----------------------------------------------------------- * -- * Inquiry functions * -- * ----------------------------------------------------------- */ -+/* -+ * Inquiry functions -+ */ - - /** - * Print low-level version information to stdout. -@@ -1854,18 +1855,18 @@ void fuse_lowlevel_help(void); - */ - void fuse_cmdline_help(void); - --/* ----------------------------------------------------------- * -- * Filesystem setup & teardown * -- * ----------------------------------------------------------- */ -+/* -+ * Filesystem setup & teardown -+ */ - - struct fuse_cmdline_opts { -- int foreground; -- int debug; -- int nodefault_subtype; -- char *mountpoint; -- int show_version; -- int show_help; -- unsigned int max_idle_threads; -+ int foreground; -+ int debug; -+ int nodefault_subtype; -+ char *mountpoint; -+ int show_version; -+ int show_help; -+ unsigned int max_idle_threads; - }; - - /** -@@ -1886,8 +1887,7 @@ struct fuse_cmdline_opts { - * @param opts output argument for parsed options - * @return 0 on success, -1 on failure - */ --int fuse_parse_cmdline(struct fuse_args *args, -- struct fuse_cmdline_opts *opts); -+int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts); - - /** - * Create a low level session. -@@ -1918,8 +1918,8 @@ int fuse_parse_cmdline(struct fuse_args *args, - * @return the fuse session on success, NULL on failure - **/ - struct fuse_session *fuse_session_new(struct fuse_args *args, -- const struct fuse_lowlevel_ops *op, -- size_t op_size, void *userdata); -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata); - - /** - * Mount a FUSE file system. -@@ -2014,9 +2014,9 @@ void fuse_session_unmount(struct fuse_session *se); - */ - void fuse_session_destroy(struct fuse_session *se); - --/* ----------------------------------------------------------- * -- * Custom event loop support * -- * ----------------------------------------------------------- */ -+/* -+ * Custom event loop support -+ */ - - /** - * Return file descriptor for communication with kernel. -@@ -2043,7 +2043,7 @@ int fuse_session_fd(struct fuse_session *se); - * @param buf the fuse_buf containing the request - */ - void fuse_session_process_buf(struct fuse_session *se, -- const struct fuse_buf *buf); -+ const struct fuse_buf *buf); - - /** - * Read a raw request from the kernel into the supplied buffer. -diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h -index 2f6663e..f252baa 100644 ---- a/tools/virtiofsd/fuse_misc.h -+++ b/tools/virtiofsd/fuse_misc.h -@@ -1,18 +1,18 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include - - /* -- Versioned symbols cannot be used in some cases because it -- - confuse the dynamic linker in uClibc -- - not supported on MacOSX (in MachO binary format) --*/ -+ * Versioned symbols cannot be used in some cases because it -+ * - confuse the dynamic linker in uClibc -+ * - not supported on MacOSX (in MachO binary format) -+ */ - #if (!defined(__UCLIBC__) && !defined(__APPLE__)) - #define FUSE_SYMVER(x) __asm__(x) - #else -@@ -25,11 +25,11 @@ - /* Is this hack still needed? */ - static inline void fuse_mutex_init(pthread_mutex_t *mut) - { -- pthread_mutexattr_t attr; -- pthread_mutexattr_init(&attr); -- pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -- pthread_mutex_init(mut, &attr); -- pthread_mutexattr_destroy(&attr); -+ pthread_mutexattr_t attr; -+ pthread_mutexattr_init(&attr); -+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -+ pthread_mutex_init(mut, &attr); -+ pthread_mutexattr_destroy(&attr); - } - #endif - -diff --git a/tools/virtiofsd/fuse_opt.c b/tools/virtiofsd/fuse_opt.c -index 93066b9..edd36f4 100644 ---- a/tools/virtiofsd/fuse_opt.c -+++ b/tools/virtiofsd/fuse_opt.c -@@ -1,423 +1,450 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- Implementation of option parsing routines (dealing with `struct -- fuse_args`). -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Implementation of option parsing routines (dealing with `struct -+ * fuse_args`). -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - -+#include "fuse_opt.h" - #include "config.h" - #include "fuse_i.h" --#include "fuse_opt.h" - #include "fuse_misc.h" - -+#include - #include - #include - #include --#include - - struct fuse_opt_context { -- void *data; -- const struct fuse_opt *opt; -- fuse_opt_proc_t proc; -- int argctr; -- int argc; -- char **argv; -- struct fuse_args outargs; -- char *opts; -- int nonopt; -+ void *data; -+ const struct fuse_opt *opt; -+ fuse_opt_proc_t proc; -+ int argctr; -+ int argc; -+ char **argv; -+ struct fuse_args outargs; -+ char *opts; -+ int nonopt; - }; - - void fuse_opt_free_args(struct fuse_args *args) - { -- if (args) { -- if (args->argv && args->allocated) { -- int i; -- for (i = 0; i < args->argc; i++) -- free(args->argv[i]); -- free(args->argv); -- } -- args->argc = 0; -- args->argv = NULL; -- args->allocated = 0; -- } -+ if (args) { -+ if (args->argv && args->allocated) { -+ int i; -+ for (i = 0; i < args->argc; i++) { -+ free(args->argv[i]); -+ } -+ free(args->argv); -+ } -+ args->argc = 0; -+ args->argv = NULL; -+ args->allocated = 0; -+ } - } - - static int alloc_failed(void) - { -- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -- return -1; -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; - } - - int fuse_opt_add_arg(struct fuse_args *args, const char *arg) - { -- char **newargv; -- char *newarg; -- -- assert(!args->argv || args->allocated); -- -- newarg = strdup(arg); -- if (!newarg) -- return alloc_failed(); -- -- newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); -- if (!newargv) { -- free(newarg); -- return alloc_failed(); -- } -- -- args->argv = newargv; -- args->allocated = 1; -- args->argv[args->argc++] = newarg; -- args->argv[args->argc] = NULL; -- return 0; -+ char **newargv; -+ char *newarg; -+ -+ assert(!args->argv || args->allocated); -+ -+ newarg = strdup(arg); -+ if (!newarg) { -+ return alloc_failed(); -+ } -+ -+ newargv = realloc(args->argv, (args->argc + 2) * sizeof(char *)); -+ if (!newargv) { -+ free(newarg); -+ return alloc_failed(); -+ } -+ -+ args->argv = newargv; -+ args->allocated = 1; -+ args->argv[args->argc++] = newarg; -+ args->argv[args->argc] = NULL; -+ return 0; - } - - static int fuse_opt_insert_arg_common(struct fuse_args *args, int pos, -- const char *arg) -+ const char *arg) - { -- assert(pos <= args->argc); -- if (fuse_opt_add_arg(args, arg) == -1) -- return -1; -- -- if (pos != args->argc - 1) { -- char *newarg = args->argv[args->argc - 1]; -- memmove(&args->argv[pos + 1], &args->argv[pos], -- sizeof(char *) * (args->argc - pos - 1)); -- args->argv[pos] = newarg; -- } -- return 0; -+ assert(pos <= args->argc); -+ if (fuse_opt_add_arg(args, arg) == -1) { -+ return -1; -+ } -+ -+ if (pos != args->argc - 1) { -+ char *newarg = args->argv[args->argc - 1]; -+ memmove(&args->argv[pos + 1], &args->argv[pos], -+ sizeof(char *) * (args->argc - pos - 1)); -+ args->argv[pos] = newarg; -+ } -+ return 0; - } - - int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg) - { -- return fuse_opt_insert_arg_common(args, pos, arg); -+ return fuse_opt_insert_arg_common(args, pos, arg); - } - - static int next_arg(struct fuse_opt_context *ctx, const char *opt) - { -- if (ctx->argctr + 1 >= ctx->argc) { -- fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); -- return -1; -- } -- ctx->argctr++; -- return 0; -+ if (ctx->argctr + 1 >= ctx->argc) { -+ fuse_log(FUSE_LOG_ERR, "fuse: missing argument after `%s'\n", opt); -+ return -1; -+ } -+ ctx->argctr++; -+ return 0; - } - - static int add_arg(struct fuse_opt_context *ctx, const char *arg) - { -- return fuse_opt_add_arg(&ctx->outargs, arg); -+ return fuse_opt_add_arg(&ctx->outargs, arg); - } - - static int add_opt_common(char **opts, const char *opt, int esc) - { -- unsigned oldlen = *opts ? strlen(*opts) : 0; -- char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); -- -- if (!d) -- return alloc_failed(); -- -- *opts = d; -- if (oldlen) { -- d += oldlen; -- *d++ = ','; -- } -- -- for (; *opt; opt++) { -- if (esc && (*opt == ',' || *opt == '\\')) -- *d++ = '\\'; -- *d++ = *opt; -- } -- *d = '\0'; -- -- return 0; -+ unsigned oldlen = *opts ? strlen(*opts) : 0; -+ char *d = realloc(*opts, oldlen + 1 + strlen(opt) * 2 + 1); -+ -+ if (!d) { -+ return alloc_failed(); -+ } -+ -+ *opts = d; -+ if (oldlen) { -+ d += oldlen; -+ *d++ = ','; -+ } -+ -+ for (; *opt; opt++) { -+ if (esc && (*opt == ',' || *opt == '\\')) { -+ *d++ = '\\'; -+ } -+ *d++ = *opt; -+ } -+ *d = '\0'; -+ -+ return 0; - } - - int fuse_opt_add_opt(char **opts, const char *opt) - { -- return add_opt_common(opts, opt, 0); -+ return add_opt_common(opts, opt, 0); - } - - int fuse_opt_add_opt_escaped(char **opts, const char *opt) - { -- return add_opt_common(opts, opt, 1); -+ return add_opt_common(opts, opt, 1); - } - - static int add_opt(struct fuse_opt_context *ctx, const char *opt) - { -- return add_opt_common(&ctx->opts, opt, 1); -+ return add_opt_common(&ctx->opts, opt, 1); - } - - static int call_proc(struct fuse_opt_context *ctx, const char *arg, int key, -- int iso) -+ int iso) - { -- if (key == FUSE_OPT_KEY_DISCARD) -- return 0; -- -- if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { -- int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); -- if (res == -1 || !res) -- return res; -- } -- if (iso) -- return add_opt(ctx, arg); -- else -- return add_arg(ctx, arg); -+ if (key == FUSE_OPT_KEY_DISCARD) { -+ return 0; -+ } -+ -+ if (key != FUSE_OPT_KEY_KEEP && ctx->proc) { -+ int res = ctx->proc(ctx->data, arg, key, &ctx->outargs); -+ if (res == -1 || !res) { -+ return res; -+ } -+ } -+ if (iso) { -+ return add_opt(ctx, arg); -+ } else { -+ return add_arg(ctx, arg); -+ } - } - - static int match_template(const char *t, const char *arg, unsigned *sepp) - { -- int arglen = strlen(arg); -- const char *sep = strchr(t, '='); -- sep = sep ? sep : strchr(t, ' '); -- if (sep && (!sep[1] || sep[1] == '%')) { -- int tlen = sep - t; -- if (sep[0] == '=') -- tlen ++; -- if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { -- *sepp = sep - t; -- return 1; -- } -- } -- if (strcmp(t, arg) == 0) { -- *sepp = 0; -- return 1; -- } -- return 0; -+ int arglen = strlen(arg); -+ const char *sep = strchr(t, '='); -+ sep = sep ? sep : strchr(t, ' '); -+ if (sep && (!sep[1] || sep[1] == '%')) { -+ int tlen = sep - t; -+ if (sep[0] == '=') { -+ tlen++; -+ } -+ if (arglen >= tlen && strncmp(arg, t, tlen) == 0) { -+ *sepp = sep - t; -+ return 1; -+ } -+ } -+ if (strcmp(t, arg) == 0) { -+ *sepp = 0; -+ return 1; -+ } -+ return 0; - } - - static const struct fuse_opt *find_opt(const struct fuse_opt *opt, -- const char *arg, unsigned *sepp) -+ const char *arg, unsigned *sepp) - { -- for (; opt && opt->templ; opt++) -- if (match_template(opt->templ, arg, sepp)) -- return opt; -- return NULL; -+ for (; opt && opt->templ; opt++) { -+ if (match_template(opt->templ, arg, sepp)) { -+ return opt; -+ } -+ } -+ return NULL; - } - - int fuse_opt_match(const struct fuse_opt *opts, const char *opt) - { -- unsigned dummy; -- return find_opt(opts, opt, &dummy) ? 1 : 0; -+ unsigned dummy; -+ return find_opt(opts, opt, &dummy) ? 1 : 0; - } - - static int process_opt_param(void *var, const char *format, const char *param, -- const char *arg) -+ const char *arg) - { -- assert(format[0] == '%'); -- if (format[1] == 's') { -- char **s = var; -- char *copy = strdup(param); -- if (!copy) -- return alloc_failed(); -- -- free(*s); -- *s = copy; -- } else { -- if (sscanf(param, format, var) != 1) { -- fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", arg); -- return -1; -- } -- } -- return 0; -+ assert(format[0] == '%'); -+ if (format[1] == 's') { -+ char **s = var; -+ char *copy = strdup(param); -+ if (!copy) { -+ return alloc_failed(); -+ } -+ -+ free(*s); -+ *s = copy; -+ } else { -+ if (sscanf(param, format, var) != 1) { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid parameter in option `%s'\n", -+ arg); -+ return -1; -+ } -+ } -+ return 0; - } - --static int process_opt(struct fuse_opt_context *ctx, -- const struct fuse_opt *opt, unsigned sep, -- const char *arg, int iso) -+static int process_opt(struct fuse_opt_context *ctx, const struct fuse_opt *opt, -+ unsigned sep, const char *arg, int iso) - { -- if (opt->offset == -1U) { -- if (call_proc(ctx, arg, opt->value, iso) == -1) -- return -1; -- } else { -- void *var = (char *)ctx->data + opt->offset; -- if (sep && opt->templ[sep + 1]) { -- const char *param = arg + sep; -- if (opt->templ[sep] == '=') -- param ++; -- if (process_opt_param(var, opt->templ + sep + 1, -- param, arg) == -1) -- return -1; -- } else -- *(int *)var = opt->value; -- } -- return 0; -+ if (opt->offset == -1U) { -+ if (call_proc(ctx, arg, opt->value, iso) == -1) { -+ return -1; -+ } -+ } else { -+ void *var = (char *)ctx->data + opt->offset; -+ if (sep && opt->templ[sep + 1]) { -+ const char *param = arg + sep; -+ if (opt->templ[sep] == '=') { -+ param++; -+ } -+ if (process_opt_param(var, opt->templ + sep + 1, param, arg) == -+ -1) { -+ return -1; -+ } -+ } else { -+ *(int *)var = opt->value; -+ } -+ } -+ return 0; - } - - static int process_opt_sep_arg(struct fuse_opt_context *ctx, -- const struct fuse_opt *opt, unsigned sep, -- const char *arg, int iso) -+ const struct fuse_opt *opt, unsigned sep, -+ const char *arg, int iso) - { -- int res; -- char *newarg; -- char *param; -- -- if (next_arg(ctx, arg) == -1) -- return -1; -- -- param = ctx->argv[ctx->argctr]; -- newarg = malloc(sep + strlen(param) + 1); -- if (!newarg) -- return alloc_failed(); -- -- memcpy(newarg, arg, sep); -- strcpy(newarg + sep, param); -- res = process_opt(ctx, opt, sep, newarg, iso); -- free(newarg); -- -- return res; -+ int res; -+ char *newarg; -+ char *param; -+ -+ if (next_arg(ctx, arg) == -1) { -+ return -1; -+ } -+ -+ param = ctx->argv[ctx->argctr]; -+ newarg = malloc(sep + strlen(param) + 1); -+ if (!newarg) { -+ return alloc_failed(); -+ } -+ -+ memcpy(newarg, arg, sep); -+ strcpy(newarg + sep, param); -+ res = process_opt(ctx, opt, sep, newarg, iso); -+ free(newarg); -+ -+ return res; - } - - static int process_gopt(struct fuse_opt_context *ctx, const char *arg, int iso) - { -- unsigned sep; -- const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); -- if (opt) { -- for (; opt; opt = find_opt(opt + 1, arg, &sep)) { -- int res; -- if (sep && opt->templ[sep] == ' ' && !arg[sep]) -- res = process_opt_sep_arg(ctx, opt, sep, arg, -- iso); -- else -- res = process_opt(ctx, opt, sep, arg, iso); -- if (res == -1) -- return -1; -- } -- return 0; -- } else -- return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); -+ unsigned sep; -+ const struct fuse_opt *opt = find_opt(ctx->opt, arg, &sep); -+ if (opt) { -+ for (; opt; opt = find_opt(opt + 1, arg, &sep)) { -+ int res; -+ if (sep && opt->templ[sep] == ' ' && !arg[sep]) { -+ res = process_opt_sep_arg(ctx, opt, sep, arg, iso); -+ } else { -+ res = process_opt(ctx, opt, sep, arg, iso); -+ } -+ if (res == -1) { -+ return -1; -+ } -+ } -+ return 0; -+ } else { -+ return call_proc(ctx, arg, FUSE_OPT_KEY_OPT, iso); -+ } - } - - static int process_real_option_group(struct fuse_opt_context *ctx, char *opts) - { -- char *s = opts; -- char *d = s; -- int end = 0; -- -- while (!end) { -- if (*s == '\0') -- end = 1; -- if (*s == ',' || end) { -- int res; -- -- *d = '\0'; -- res = process_gopt(ctx, opts, 1); -- if (res == -1) -- return -1; -- d = opts; -- } else { -- if (s[0] == '\\' && s[1] != '\0') { -- s++; -- if (s[0] >= '0' && s[0] <= '3' && -- s[1] >= '0' && s[1] <= '7' && -- s[2] >= '0' && s[2] <= '7') { -- *d++ = (s[0] - '0') * 0100 + -- (s[1] - '0') * 0010 + -- (s[2] - '0'); -- s += 2; -- } else { -- *d++ = *s; -- } -- } else { -- *d++ = *s; -- } -- } -- s++; -- } -- -- return 0; -+ char *s = opts; -+ char *d = s; -+ int end = 0; -+ -+ while (!end) { -+ if (*s == '\0') { -+ end = 1; -+ } -+ if (*s == ',' || end) { -+ int res; -+ -+ *d = '\0'; -+ res = process_gopt(ctx, opts, 1); -+ if (res == -1) { -+ return -1; -+ } -+ d = opts; -+ } else { -+ if (s[0] == '\\' && s[1] != '\0') { -+ s++; -+ if (s[0] >= '0' && s[0] <= '3' && s[1] >= '0' && s[1] <= '7' && -+ s[2] >= '0' && s[2] <= '7') { -+ *d++ = (s[0] - '0') * 0100 + (s[1] - '0') * 0010 + -+ (s[2] - '0'); -+ s += 2; -+ } else { -+ *d++ = *s; -+ } -+ } else { -+ *d++ = *s; -+ } -+ } -+ s++; -+ } -+ -+ return 0; - } - - static int process_option_group(struct fuse_opt_context *ctx, const char *opts) - { -- int res; -- char *copy = strdup(opts); -- -- if (!copy) { -- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -- return -1; -- } -- res = process_real_option_group(ctx, copy); -- free(copy); -- return res; -+ int res; -+ char *copy = strdup(opts); -+ -+ if (!copy) { -+ fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -+ return -1; -+ } -+ res = process_real_option_group(ctx, copy); -+ free(copy); -+ return res; - } - - static int process_one(struct fuse_opt_context *ctx, const char *arg) - { -- if (ctx->nonopt || arg[0] != '-') -- return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); -- else if (arg[1] == 'o') { -- if (arg[2]) -- return process_option_group(ctx, arg + 2); -- else { -- if (next_arg(ctx, arg) == -1) -- return -1; -- -- return process_option_group(ctx, -- ctx->argv[ctx->argctr]); -- } -- } else if (arg[1] == '-' && !arg[2]) { -- if (add_arg(ctx, arg) == -1) -- return -1; -- ctx->nonopt = ctx->outargs.argc; -- return 0; -- } else -- return process_gopt(ctx, arg, 0); -+ if (ctx->nonopt || arg[0] != '-') { -+ return call_proc(ctx, arg, FUSE_OPT_KEY_NONOPT, 0); -+ } else if (arg[1] == 'o') { -+ if (arg[2]) { -+ return process_option_group(ctx, arg + 2); -+ } else { -+ if (next_arg(ctx, arg) == -1) { -+ return -1; -+ } -+ -+ return process_option_group(ctx, ctx->argv[ctx->argctr]); -+ } -+ } else if (arg[1] == '-' && !arg[2]) { -+ if (add_arg(ctx, arg) == -1) { -+ return -1; -+ } -+ ctx->nonopt = ctx->outargs.argc; -+ return 0; -+ } else { -+ return process_gopt(ctx, arg, 0); -+ } - } - - static int opt_parse(struct fuse_opt_context *ctx) - { -- if (ctx->argc) { -- if (add_arg(ctx, ctx->argv[0]) == -1) -- return -1; -- } -- -- for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) -- if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) -- return -1; -- -- if (ctx->opts) { -- if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || -- fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) -- return -1; -- } -- -- /* If option separator ("--") is the last argument, remove it */ -- if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && -- strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { -- free(ctx->outargs.argv[ctx->outargs.argc - 1]); -- ctx->outargs.argv[--ctx->outargs.argc] = NULL; -- } -- -- return 0; -+ if (ctx->argc) { -+ if (add_arg(ctx, ctx->argv[0]) == -1) { -+ return -1; -+ } -+ } -+ -+ for (ctx->argctr = 1; ctx->argctr < ctx->argc; ctx->argctr++) { -+ if (process_one(ctx, ctx->argv[ctx->argctr]) == -1) { -+ return -1; -+ } -+ } -+ -+ if (ctx->opts) { -+ if (fuse_opt_insert_arg(&ctx->outargs, 1, "-o") == -1 || -+ fuse_opt_insert_arg(&ctx->outargs, 2, ctx->opts) == -1) { -+ return -1; -+ } -+ } -+ -+ /* If option separator ("--") is the last argument, remove it */ -+ if (ctx->nonopt && ctx->nonopt == ctx->outargs.argc && -+ strcmp(ctx->outargs.argv[ctx->outargs.argc - 1], "--") == 0) { -+ free(ctx->outargs.argv[ctx->outargs.argc - 1]); -+ ctx->outargs.argv[--ctx->outargs.argc] = NULL; -+ } -+ -+ return 0; - } - - int fuse_opt_parse(struct fuse_args *args, void *data, -- const struct fuse_opt opts[], fuse_opt_proc_t proc) -+ const struct fuse_opt opts[], fuse_opt_proc_t proc) - { -- int res; -- struct fuse_opt_context ctx = { -- .data = data, -- .opt = opts, -- .proc = proc, -- }; -- -- if (!args || !args->argv || !args->argc) -- return 0; -- -- ctx.argc = args->argc; -- ctx.argv = args->argv; -- -- res = opt_parse(&ctx); -- if (res != -1) { -- struct fuse_args tmp = *args; -- *args = ctx.outargs; -- ctx.outargs = tmp; -- } -- free(ctx.opts); -- fuse_opt_free_args(&ctx.outargs); -- return res; -+ int res; -+ struct fuse_opt_context ctx = { -+ .data = data, -+ .opt = opts, -+ .proc = proc, -+ }; -+ -+ if (!args || !args->argv || !args->argc) { -+ return 0; -+ } -+ -+ ctx.argc = args->argc; -+ ctx.argv = args->argv; -+ -+ res = opt_parse(&ctx); -+ if (res != -1) { -+ struct fuse_args tmp = *args; -+ *args = ctx.outargs; -+ ctx.outargs = tmp; -+ } -+ free(ctx.opts); -+ fuse_opt_free_args(&ctx.outargs); -+ return res; - } -diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h -index 6910255..8f59b4d 100644 ---- a/tools/virtiofsd/fuse_opt.h -+++ b/tools/virtiofsd/fuse_opt.h -@@ -1,10 +1,10 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #ifndef FUSE_OPT_H_ - #define FUSE_OPT_H_ -@@ -37,7 +37,7 @@ - * - * - 'offsetof(struct foo, member)' actions i) and iii) - * -- * - -1 action ii) -+ * - -1 action ii) - * - * The 'offsetof()' macro is defined in the header. - * -@@ -48,7 +48,7 @@ - * - * The types of templates are: - * -- * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only -+ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only - * themselves. Invalid values are "--" and anything beginning - * with "-o" - * -@@ -71,58 +71,67 @@ - * freed. - */ - struct fuse_opt { -- /** Matching template and optional parameter formatting */ -- const char *templ; -+ /** Matching template and optional parameter formatting */ -+ const char *templ; - -- /** -- * Offset of variable within 'data' parameter of fuse_opt_parse() -- * or -1 -- */ -- unsigned long offset; -+ /** -+ * Offset of variable within 'data' parameter of fuse_opt_parse() -+ * or -1 -+ */ -+ unsigned long offset; - -- /** -- * Value to set the variable to, or to be passed as 'key' to the -- * processing function. Ignored if template has a format -- */ -- int value; -+ /** -+ * Value to set the variable to, or to be passed as 'key' to the -+ * processing function. Ignored if template has a format -+ */ -+ int value; - }; - - /** -- * Key option. In case of a match, the processing function will be -+ * Key option. In case of a match, the processing function will be - * called with the specified key. - */ --#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } -+#define FUSE_OPT_KEY(templ, key) \ -+ { \ -+ templ, -1U, key \ -+ } - - /** -- * Last option. An array of 'struct fuse_opt' must end with a NULL -+ * Last option. An array of 'struct fuse_opt' must end with a NULL - * template value - */ --#define FUSE_OPT_END { NULL, 0, 0 } -+#define FUSE_OPT_END \ -+ { \ -+ NULL, 0, 0 \ -+ } - - /** - * Argument list - */ - struct fuse_args { -- /** Argument count */ -- int argc; -+ /** Argument count */ -+ int argc; - -- /** Argument vector. NULL terminated */ -- char **argv; -+ /** Argument vector. NULL terminated */ -+ char **argv; - -- /** Is 'argv' allocated? */ -- int allocated; -+ /** Is 'argv' allocated? */ -+ int allocated; - }; - - /** - * Initializer for 'struct fuse_args' - */ --#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } -+#define FUSE_ARGS_INIT(argc, argv) \ -+ { \ -+ argc, argv, 0 \ -+ } - - /** - * Key value passed to the processing function if an option did not - * match any template - */ --#define FUSE_OPT_KEY_OPT -1 -+#define FUSE_OPT_KEY_OPT -1 - - /** - * Key value passed to the processing function for all non-options -@@ -130,7 +139,7 @@ struct fuse_args { - * Non-options are the arguments beginning with a character other than - * '-' or all arguments after the special '--' option - */ --#define FUSE_OPT_KEY_NONOPT -2 -+#define FUSE_OPT_KEY_NONOPT -2 - - /** - * Special key value for options to keep -@@ -174,7 +183,7 @@ struct fuse_args { - * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept - */ - typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, -- struct fuse_args *outargs); -+ struct fuse_args *outargs); - - /** - * Option parsing function -@@ -197,7 +206,7 @@ typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, - * @return -1 on error, 0 on success - */ - int fuse_opt_parse(struct fuse_args *args, void *data, -- const struct fuse_opt opts[], fuse_opt_proc_t proc); -+ const struct fuse_opt opts[], fuse_opt_proc_t proc); - - /** - * Add an option to a comma separated option list -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -index 4271947..19d6791 100644 ---- a/tools/virtiofsd/fuse_signals.c -+++ b/tools/virtiofsd/fuse_signals.c -@@ -1,91 +1,95 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- Utility functions for setting signal handlers. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Utility functions for setting signal handlers. -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ - - #include "config.h" --#include "fuse_lowlevel.h" - #include "fuse_i.h" -+#include "fuse_lowlevel.h" - --#include --#include - #include -+#include - #include -+#include - - static struct fuse_session *fuse_instance; - - static void exit_handler(int sig) - { -- if (fuse_instance) { -- fuse_session_exit(fuse_instance); -- if(sig <= 0) { -- fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); -- abort(); -- } -- fuse_instance->error = sig; -- } -+ if (fuse_instance) { -+ fuse_session_exit(fuse_instance); -+ if (sig <= 0) { -+ fuse_log(FUSE_LOG_ERR, "assertion error: signal value <= 0\n"); -+ abort(); -+ } -+ fuse_instance->error = sig; -+ } - } - - static void do_nothing(int sig) - { -- (void) sig; -+ (void)sig; - } - - static int set_one_signal_handler(int sig, void (*handler)(int), int remove) - { -- struct sigaction sa; -- struct sigaction old_sa; -+ struct sigaction sa; -+ struct sigaction old_sa; - -- memset(&sa, 0, sizeof(struct sigaction)); -- sa.sa_handler = remove ? SIG_DFL : handler; -- sigemptyset(&(sa.sa_mask)); -- sa.sa_flags = 0; -+ memset(&sa, 0, sizeof(struct sigaction)); -+ sa.sa_handler = remove ? SIG_DFL : handler; -+ sigemptyset(&(sa.sa_mask)); -+ sa.sa_flags = 0; - -- if (sigaction(sig, NULL, &old_sa) == -1) { -- perror("fuse: cannot get old signal handler"); -- return -1; -- } -+ if (sigaction(sig, NULL, &old_sa) == -1) { -+ perror("fuse: cannot get old signal handler"); -+ return -1; -+ } - -- if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && -- sigaction(sig, &sa, NULL) == -1) { -- perror("fuse: cannot set signal handler"); -- return -1; -- } -- return 0; -+ if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && -+ sigaction(sig, &sa, NULL) == -1) { -+ perror("fuse: cannot set signal handler"); -+ return -1; -+ } -+ return 0; - } - - int fuse_set_signal_handlers(struct fuse_session *se) - { -- /* If we used SIG_IGN instead of the do_nothing function, -- then we would be unable to tell if we set SIG_IGN (and -- thus should reset to SIG_DFL in fuse_remove_signal_handlers) -- or if it was already set to SIG_IGN (and should be left -- untouched. */ -- if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || -- set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || -- set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || -- set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) -- return -1; -+ /* -+ * If we used SIG_IGN instead of the do_nothing function, -+ * then we would be unable to tell if we set SIG_IGN (and -+ * thus should reset to SIG_DFL in fuse_remove_signal_handlers) -+ * or if it was already set to SIG_IGN (and should be left -+ * untouched. -+ */ -+ if (set_one_signal_handler(SIGHUP, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGINT, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGTERM, exit_handler, 0) == -1 || -+ set_one_signal_handler(SIGPIPE, do_nothing, 0) == -1) { -+ return -1; -+ } - -- fuse_instance = se; -- return 0; -+ fuse_instance = se; -+ return 0; - } - - void fuse_remove_signal_handlers(struct fuse_session *se) - { -- if (fuse_instance != se) -- fuse_log(FUSE_LOG_ERR, -- "fuse: fuse_remove_signal_handlers: unknown session\n"); -- else -- fuse_instance = NULL; -+ if (fuse_instance != se) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: fuse_remove_signal_handlers: unknown session\n"); -+ } else { -+ fuse_instance = NULL; -+ } - -- set_one_signal_handler(SIGHUP, exit_handler, 1); -- set_one_signal_handler(SIGINT, exit_handler, 1); -- set_one_signal_handler(SIGTERM, exit_handler, 1); -- set_one_signal_handler(SIGPIPE, do_nothing, 1); -+ set_one_signal_handler(SIGHUP, exit_handler, 1); -+ set_one_signal_handler(SIGINT, exit_handler, 1); -+ set_one_signal_handler(SIGTERM, exit_handler, 1); -+ set_one_signal_handler(SIGPIPE, do_nothing, 1); - } -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5a2e64c..5711dd2 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -1,297 +1,309 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * Helper functions to create (simple) standalone programs. With the -+ * aid of these functions it should be possible to create full FUSE -+ * file system by implementing nothing but the request handlers. - -- Helper functions to create (simple) standalone programs. With the -- aid of these functions it should be possible to create full FUSE -- file system by implementing nothing but the request handlers. -- -- This program can be distributed under the terms of the GNU LGPLv2. -- See the file COPYING.LIB. --*/ -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB. -+ */ - - #include "config.h" - #include "fuse_i.h" -+#include "fuse_lowlevel.h" - #include "fuse_misc.h" - #include "fuse_opt.h" --#include "fuse_lowlevel.h" - #include "mount_util.h" - -+#include -+#include -+#include - #include - #include --#include --#include - #include --#include --#include - #include -+#include - --#define FUSE_HELPER_OPT(t, p) \ -- { t, offsetof(struct fuse_cmdline_opts, p), 1 } -+#define FUSE_HELPER_OPT(t, p) \ -+ { \ -+ t, offsetof(struct fuse_cmdline_opts, p), 1 \ -+ } - - static const struct fuse_opt fuse_helper_opts[] = { -- FUSE_HELPER_OPT("-h", show_help), -- FUSE_HELPER_OPT("--help", show_help), -- FUSE_HELPER_OPT("-V", show_version), -- FUSE_HELPER_OPT("--version", show_version), -- FUSE_HELPER_OPT("-d", debug), -- FUSE_HELPER_OPT("debug", debug), -- FUSE_HELPER_OPT("-d", foreground), -- FUSE_HELPER_OPT("debug", foreground), -- FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), -- FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), -- FUSE_HELPER_OPT("-f", foreground), -- FUSE_HELPER_OPT("fsname=", nodefault_subtype), -- FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), -- FUSE_HELPER_OPT("subtype=", nodefault_subtype), -- FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), -- FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -- FUSE_OPT_END -+ FUSE_HELPER_OPT("-h", show_help), -+ FUSE_HELPER_OPT("--help", show_help), -+ FUSE_HELPER_OPT("-V", show_version), -+ FUSE_HELPER_OPT("--version", show_version), -+ FUSE_HELPER_OPT("-d", debug), -+ FUSE_HELPER_OPT("debug", debug), -+ FUSE_HELPER_OPT("-d", foreground), -+ FUSE_HELPER_OPT("debug", foreground), -+ FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), -+ FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("-f", foreground), -+ FUSE_HELPER_OPT("fsname=", nodefault_subtype), -+ FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("subtype=", nodefault_subtype), -+ FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), -+ FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_OPT_END - }; - - struct fuse_conn_info_opts { -- int atomic_o_trunc; -- int no_remote_posix_lock; -- int no_remote_flock; -- int splice_write; -- int splice_move; -- int splice_read; -- int no_splice_write; -- int no_splice_move; -- int no_splice_read; -- int auto_inval_data; -- int no_auto_inval_data; -- int no_readdirplus; -- int no_readdirplus_auto; -- int async_dio; -- int no_async_dio; -- int writeback_cache; -- int no_writeback_cache; -- int async_read; -- int sync_read; -- unsigned max_write; -- unsigned max_readahead; -- unsigned max_background; -- unsigned congestion_threshold; -- unsigned time_gran; -- int set_max_write; -- int set_max_readahead; -- int set_max_background; -- int set_congestion_threshold; -- int set_time_gran; -+ int atomic_o_trunc; -+ int no_remote_posix_lock; -+ int no_remote_flock; -+ int splice_write; -+ int splice_move; -+ int splice_read; -+ int no_splice_write; -+ int no_splice_move; -+ int no_splice_read; -+ int auto_inval_data; -+ int no_auto_inval_data; -+ int no_readdirplus; -+ int no_readdirplus_auto; -+ int async_dio; -+ int no_async_dio; -+ int writeback_cache; -+ int no_writeback_cache; -+ int async_read; -+ int sync_read; -+ unsigned max_write; -+ unsigned max_readahead; -+ unsigned max_background; -+ unsigned congestion_threshold; -+ unsigned time_gran; -+ int set_max_write; -+ int set_max_readahead; -+ int set_max_background; -+ int set_congestion_threshold; -+ int set_time_gran; - }; - --#define CONN_OPTION(t, p, v) \ -- { t, offsetof(struct fuse_conn_info_opts, p), v } -+#define CONN_OPTION(t, p, v) \ -+ { \ -+ t, offsetof(struct fuse_conn_info_opts, p), v \ -+ } - static const struct fuse_opt conn_info_opt_spec[] = { -- CONN_OPTION("max_write=%u", max_write, 0), -- CONN_OPTION("max_write=", set_max_write, 1), -- CONN_OPTION("max_readahead=%u", max_readahead, 0), -- CONN_OPTION("max_readahead=", set_max_readahead, 1), -- CONN_OPTION("max_background=%u", max_background, 0), -- CONN_OPTION("max_background=", set_max_background, 1), -- CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), -- CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), -- CONN_OPTION("sync_read", sync_read, 1), -- CONN_OPTION("async_read", async_read, 1), -- CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), -- CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), -- CONN_OPTION("no_remote_lock", no_remote_flock, 1), -- CONN_OPTION("no_remote_flock", no_remote_flock, 1), -- CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), -- CONN_OPTION("splice_write", splice_write, 1), -- CONN_OPTION("no_splice_write", no_splice_write, 1), -- CONN_OPTION("splice_move", splice_move, 1), -- CONN_OPTION("no_splice_move", no_splice_move, 1), -- CONN_OPTION("splice_read", splice_read, 1), -- CONN_OPTION("no_splice_read", no_splice_read, 1), -- CONN_OPTION("auto_inval_data", auto_inval_data, 1), -- CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), -- CONN_OPTION("readdirplus=no", no_readdirplus, 1), -- CONN_OPTION("readdirplus=yes", no_readdirplus, 0), -- CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), -- CONN_OPTION("readdirplus=auto", no_readdirplus, 0), -- CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), -- CONN_OPTION("async_dio", async_dio, 1), -- CONN_OPTION("no_async_dio", no_async_dio, 1), -- CONN_OPTION("writeback_cache", writeback_cache, 1), -- CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), -- CONN_OPTION("time_gran=%u", time_gran, 0), -- CONN_OPTION("time_gran=", set_time_gran, 1), -- FUSE_OPT_END -+ CONN_OPTION("max_write=%u", max_write, 0), -+ CONN_OPTION("max_write=", set_max_write, 1), -+ CONN_OPTION("max_readahead=%u", max_readahead, 0), -+ CONN_OPTION("max_readahead=", set_max_readahead, 1), -+ CONN_OPTION("max_background=%u", max_background, 0), -+ CONN_OPTION("max_background=", set_max_background, 1), -+ CONN_OPTION("congestion_threshold=%u", congestion_threshold, 0), -+ CONN_OPTION("congestion_threshold=", set_congestion_threshold, 1), -+ CONN_OPTION("sync_read", sync_read, 1), -+ CONN_OPTION("async_read", async_read, 1), -+ CONN_OPTION("atomic_o_trunc", atomic_o_trunc, 1), -+ CONN_OPTION("no_remote_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("no_remote_lock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_flock", no_remote_flock, 1), -+ CONN_OPTION("no_remote_posix_lock", no_remote_posix_lock, 1), -+ CONN_OPTION("splice_write", splice_write, 1), -+ CONN_OPTION("no_splice_write", no_splice_write, 1), -+ CONN_OPTION("splice_move", splice_move, 1), -+ CONN_OPTION("no_splice_move", no_splice_move, 1), -+ CONN_OPTION("splice_read", splice_read, 1), -+ CONN_OPTION("no_splice_read", no_splice_read, 1), -+ CONN_OPTION("auto_inval_data", auto_inval_data, 1), -+ CONN_OPTION("no_auto_inval_data", no_auto_inval_data, 1), -+ CONN_OPTION("readdirplus=no", no_readdirplus, 1), -+ CONN_OPTION("readdirplus=yes", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=yes", no_readdirplus_auto, 1), -+ CONN_OPTION("readdirplus=auto", no_readdirplus, 0), -+ CONN_OPTION("readdirplus=auto", no_readdirplus_auto, 0), -+ CONN_OPTION("async_dio", async_dio, 1), -+ CONN_OPTION("no_async_dio", no_async_dio, 1), -+ CONN_OPTION("writeback_cache", writeback_cache, 1), -+ CONN_OPTION("no_writeback_cache", no_writeback_cache, 1), -+ CONN_OPTION("time_gran=%u", time_gran, 0), -+ CONN_OPTION("time_gran=", set_time_gran, 1), -+ FUSE_OPT_END - }; - - - void fuse_cmdline_help(void) - { -- printf(" -h --help print help\n" -- " -V --version print version\n" -- " -d -o debug enable debug output (implies -f)\n" -- " -f foreground operation\n" -- " -o max_idle_threads the maximum number of idle worker threads\n" -- " allowed (default: 10)\n"); -+ printf( -+ " -h --help print help\n" -+ " -V --version print version\n" -+ " -d -o debug enable debug output (implies -f)\n" -+ " -f foreground operation\n" -+ " -o max_idle_threads the maximum number of idle worker threads\n" -+ " allowed (default: 10)\n"); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, -- struct fuse_args *outargs) -+ struct fuse_args *outargs) - { -- (void) outargs; -- struct fuse_cmdline_opts *opts = data; -- -- switch (key) { -- case FUSE_OPT_KEY_NONOPT: -- if (!opts->mountpoint) { -- if (fuse_mnt_parse_fuse_fd(arg) != -1) { -- return fuse_opt_add_opt(&opts->mountpoint, arg); -- } -- -- char mountpoint[PATH_MAX] = ""; -- if (realpath(arg, mountpoint) == NULL) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: bad mount point `%s': %s\n", -- arg, strerror(errno)); -- return -1; -- } -- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -- } else { -- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -- return -1; -- } -- -- default: -- /* Pass through unknown options */ -- return 1; -- } -+ (void)outargs; -+ struct fuse_cmdline_opts *opts = data; -+ -+ switch (key) { -+ case FUSE_OPT_KEY_NONOPT: -+ if (!opts->mountpoint) { -+ if (fuse_mnt_parse_fuse_fd(arg) != -1) { -+ return fuse_opt_add_opt(&opts->mountpoint, arg); -+ } -+ -+ char mountpoint[PATH_MAX] = ""; -+ if (realpath(arg, mountpoint) == NULL) { -+ fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, -+ strerror(errno)); -+ return -1; -+ } -+ return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -+ } else { -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -+ return -1; -+ } -+ -+ default: -+ /* Pass through unknown options */ -+ return 1; -+ } - } - --int fuse_parse_cmdline(struct fuse_args *args, -- struct fuse_cmdline_opts *opts) -+int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - { -- memset(opts, 0, sizeof(struct fuse_cmdline_opts)); -+ memset(opts, 0, sizeof(struct fuse_cmdline_opts)); - -- opts->max_idle_threads = 10; -+ opts->max_idle_threads = 10; - -- if (fuse_opt_parse(args, opts, fuse_helper_opts, -- fuse_helper_opt_proc) == -1) -- return -1; -+ if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == -+ -1) { -+ return -1; -+ } - -- return 0; -+ return 0; - } - - - int fuse_daemonize(int foreground) - { -- if (!foreground) { -- int nullfd; -- int waiter[2]; -- char completed; -- -- if (pipe(waiter)) { -- perror("fuse_daemonize: pipe"); -- return -1; -- } -- -- /* -- * demonize current process by forking it and killing the -- * parent. This makes current process as a child of 'init'. -- */ -- switch(fork()) { -- case -1: -- perror("fuse_daemonize: fork"); -- return -1; -- case 0: -- break; -- default: -- (void) read(waiter[0], &completed, sizeof(completed)); -- _exit(0); -- } -- -- if (setsid() == -1) { -- perror("fuse_daemonize: setsid"); -- return -1; -- } -- -- (void) chdir("/"); -- -- nullfd = open("/dev/null", O_RDWR, 0); -- if (nullfd != -1) { -- (void) dup2(nullfd, 0); -- (void) dup2(nullfd, 1); -- (void) dup2(nullfd, 2); -- if (nullfd > 2) -- close(nullfd); -- } -- -- /* Propagate completion of daemon initialization */ -- completed = 1; -- (void) write(waiter[1], &completed, sizeof(completed)); -- close(waiter[0]); -- close(waiter[1]); -- } else { -- (void) chdir("/"); -- } -- return 0; -+ if (!foreground) { -+ int nullfd; -+ int waiter[2]; -+ char completed; -+ -+ if (pipe(waiter)) { -+ perror("fuse_daemonize: pipe"); -+ return -1; -+ } -+ -+ /* -+ * demonize current process by forking it and killing the -+ * parent. This makes current process as a child of 'init'. -+ */ -+ switch (fork()) { -+ case -1: -+ perror("fuse_daemonize: fork"); -+ return -1; -+ case 0: -+ break; -+ default: -+ (void)read(waiter[0], &completed, sizeof(completed)); -+ _exit(0); -+ } -+ -+ if (setsid() == -1) { -+ perror("fuse_daemonize: setsid"); -+ return -1; -+ } -+ -+ (void)chdir("/"); -+ -+ nullfd = open("/dev/null", O_RDWR, 0); -+ if (nullfd != -1) { -+ (void)dup2(nullfd, 0); -+ (void)dup2(nullfd, 1); -+ (void)dup2(nullfd, 2); -+ if (nullfd > 2) { -+ close(nullfd); -+ } -+ } -+ -+ /* Propagate completion of daemon initialization */ -+ completed = 1; -+ (void)write(waiter[1], &completed, sizeof(completed)); -+ close(waiter[0]); -+ close(waiter[1]); -+ } else { -+ (void)chdir("/"); -+ } -+ return 0; - } - - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -- struct fuse_conn_info *conn) -+ struct fuse_conn_info *conn) - { -- if(opts->set_max_write) -- conn->max_write = opts->max_write; -- if(opts->set_max_background) -- conn->max_background = opts->max_background; -- if(opts->set_congestion_threshold) -- conn->congestion_threshold = opts->congestion_threshold; -- if(opts->set_time_gran) -- conn->time_gran = opts->time_gran; -- if(opts->set_max_readahead) -- conn->max_readahead = opts->max_readahead; -- --#define LL_ENABLE(cond,cap) \ -- if (cond) conn->want |= (cap) --#define LL_DISABLE(cond,cap) \ -- if (cond) conn->want &= ~(cap) -- -- LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); -- LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); -- -- LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); -- LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); -- -- LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); -- LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); -- -- LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -- LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -- -- LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); -- LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); -- -- LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); -- LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); -- -- LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -- LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -- -- LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); -- LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); -- -- LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); -- LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); -+ if (opts->set_max_write) { -+ conn->max_write = opts->max_write; -+ } -+ if (opts->set_max_background) { -+ conn->max_background = opts->max_background; -+ } -+ if (opts->set_congestion_threshold) { -+ conn->congestion_threshold = opts->congestion_threshold; -+ } -+ if (opts->set_time_gran) { -+ conn->time_gran = opts->time_gran; -+ } -+ if (opts->set_max_readahead) { -+ conn->max_readahead = opts->max_readahead; -+ } -+ -+#define LL_ENABLE(cond, cap) \ -+ if (cond) \ -+ conn->want |= (cap) -+#define LL_DISABLE(cond, cap) \ -+ if (cond) \ -+ conn->want &= ~(cap) -+ -+ LL_ENABLE(opts->splice_read, FUSE_CAP_SPLICE_READ); -+ LL_DISABLE(opts->no_splice_read, FUSE_CAP_SPLICE_READ); -+ -+ LL_ENABLE(opts->splice_write, FUSE_CAP_SPLICE_WRITE); -+ LL_DISABLE(opts->no_splice_write, FUSE_CAP_SPLICE_WRITE); -+ -+ LL_ENABLE(opts->splice_move, FUSE_CAP_SPLICE_MOVE); -+ LL_DISABLE(opts->no_splice_move, FUSE_CAP_SPLICE_MOVE); -+ -+ LL_ENABLE(opts->auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ LL_DISABLE(opts->no_auto_inval_data, FUSE_CAP_AUTO_INVAL_DATA); -+ -+ LL_DISABLE(opts->no_readdirplus, FUSE_CAP_READDIRPLUS); -+ LL_DISABLE(opts->no_readdirplus_auto, FUSE_CAP_READDIRPLUS_AUTO); -+ -+ LL_ENABLE(opts->async_dio, FUSE_CAP_ASYNC_DIO); -+ LL_DISABLE(opts->no_async_dio, FUSE_CAP_ASYNC_DIO); -+ -+ LL_ENABLE(opts->writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ LL_DISABLE(opts->no_writeback_cache, FUSE_CAP_WRITEBACK_CACHE); -+ -+ LL_ENABLE(opts->async_read, FUSE_CAP_ASYNC_READ); -+ LL_DISABLE(opts->sync_read, FUSE_CAP_ASYNC_READ); -+ -+ LL_DISABLE(opts->no_remote_posix_lock, FUSE_CAP_POSIX_LOCKS); -+ LL_DISABLE(opts->no_remote_flock, FUSE_CAP_FLOCK_LOCKS); - } - --struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) -+struct fuse_conn_info_opts *fuse_parse_conn_info_opts(struct fuse_args *args) - { -- struct fuse_conn_info_opts *opts; -- -- opts = calloc(1, sizeof(struct fuse_conn_info_opts)); -- if(opts == NULL) { -- fuse_log(FUSE_LOG_ERR, "calloc failed\n"); -- return NULL; -- } -- if(fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { -- free(opts); -- return NULL; -- } -- return opts; -+ struct fuse_conn_info_opts *opts; -+ -+ opts = calloc(1, sizeof(struct fuse_conn_info_opts)); -+ if (opts == NULL) { -+ fuse_log(FUSE_LOG_ERR, "calloc failed\n"); -+ return NULL; -+ } -+ if (fuse_opt_parse(args, opts, conn_info_opt_spec, NULL) == -1) { -+ free(opts); -+ return NULL; -+ } -+ return opts; - } -diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h -index 7c5f561..0b98275 100644 ---- a/tools/virtiofsd/passthrough_helpers.h -+++ b/tools/virtiofsd/passthrough_helpers.h -@@ -28,23 +28,24 @@ - * operation - */ - static int mknod_wrapper(int dirfd, const char *path, const char *link, -- int mode, dev_t rdev) -+ int mode, dev_t rdev) - { -- int res; -+ int res; - -- if (S_ISREG(mode)) { -- res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); -- if (res >= 0) -- res = close(res); -- } else if (S_ISDIR(mode)) { -- res = mkdirat(dirfd, path, mode); -- } else if (S_ISLNK(mode) && link != NULL) { -- res = symlinkat(link, dirfd, path); -- } else if (S_ISFIFO(mode)) { -- res = mkfifoat(dirfd, path, mode); -- } else { -- res = mknodat(dirfd, path, mode, rdev); -- } -+ if (S_ISREG(mode)) { -+ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); -+ if (res >= 0) { -+ res = close(res); -+ } -+ } else if (S_ISDIR(mode)) { -+ res = mkdirat(dirfd, path, mode); -+ } else if (S_ISLNK(mode) && link != NULL) { -+ res = symlinkat(link, dirfd, path); -+ } else if (S_ISFIFO(mode)) { -+ res = mkfifoat(dirfd, path, mode); -+ } else { -+ res = mknodat(dirfd, path, mode, rdev); -+ } - -- return res; -+ return res; - } -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e5f7115..c5850ef 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1,12 +1,12 @@ - /* -- FUSE: Filesystem in Userspace -- Copyright (C) 2001-2007 Miklos Szeredi -- -- This program can be distributed under the terms of the GNU GPLv2. -- See the file COPYING. --*/ -+ * FUSE: Filesystem in Userspace -+ * Copyright (C) 2001-2007 Miklos Szeredi -+ * -+ * This program can be distributed under the terms of the GNU GPLv2. -+ * See the file COPYING. -+ */ - --/** @file -+/* - * - * This file system mirrors the existing file system hierarchy of the - * system, starting at the root file system. This is implemented by -@@ -28,7 +28,8 @@ - * - * Compile with: - * -- * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o passthrough_ll -+ * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o -+ * passthrough_ll - * - * ## Source code ## - * \include passthrough_ll.c -@@ -39,1299 +40,1365 @@ - - #include "config.h" - --#include --#include --#include --#include --#include --#include --#include --#include --#include - #include -+#include - #include -+#include - #include -+#include - #include -+#include -+#include -+#include -+#include -+#include - #include - #include -+#include - - #include "passthrough_helpers.h" - --/* We are re-using pointers to our `struct lo_inode` and `struct -- lo_dirp` elements as inodes. This means that we must be able to -- store uintptr_t values in a fuse_ino_t variable. The following -- incantation checks this condition at compile time. */ --#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+/* -+ * We are re-using pointers to our `struct lo_inode` and `struct -+ * lo_dirp` elements as inodes. This means that we must be able to -+ * store uintptr_t values in a fuse_ino_t variable. The following -+ * incantation checks this condition at compile time. -+ */ -+#if defined(__GNUC__) && \ -+ (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ -+ !defined __cplusplus - _Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), -- "fuse_ino_t too small to hold uintptr_t values!"); -+ "fuse_ino_t too small to hold uintptr_t values!"); - #else --struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct \ -- { unsigned _uintptr_to_must_hold_fuse_ino_t: -- ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); }; -+struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { -+ unsigned _uintptr_to_must_hold_fuse_ino_t -+ : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); -+}; - #endif - - struct lo_inode { -- struct lo_inode *next; /* protected by lo->mutex */ -- struct lo_inode *prev; /* protected by lo->mutex */ -- int fd; -- bool is_symlink; -- ino_t ino; -- dev_t dev; -- uint64_t refcount; /* protected by lo->mutex */ -+ struct lo_inode *next; /* protected by lo->mutex */ -+ struct lo_inode *prev; /* protected by lo->mutex */ -+ int fd; -+ bool is_symlink; -+ ino_t ino; -+ dev_t dev; -+ uint64_t refcount; /* protected by lo->mutex */ - }; - - enum { -- CACHE_NEVER, -- CACHE_NORMAL, -- CACHE_ALWAYS, -+ CACHE_NEVER, -+ CACHE_NORMAL, -+ CACHE_ALWAYS, - }; - - struct lo_data { -- pthread_mutex_t mutex; -- int debug; -- int writeback; -- int flock; -- int xattr; -- const char *source; -- double timeout; -- int cache; -- int timeout_set; -- struct lo_inode root; /* protected by lo->mutex */ -+ pthread_mutex_t mutex; -+ int debug; -+ int writeback; -+ int flock; -+ int xattr; -+ const char *source; -+ double timeout; -+ int cache; -+ int timeout_set; -+ struct lo_inode root; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -- { "writeback", -- offsetof(struct lo_data, writeback), 1 }, -- { "no_writeback", -- offsetof(struct lo_data, writeback), 0 }, -- { "source=%s", -- offsetof(struct lo_data, source), 0 }, -- { "flock", -- offsetof(struct lo_data, flock), 1 }, -- { "no_flock", -- offsetof(struct lo_data, flock), 0 }, -- { "xattr", -- offsetof(struct lo_data, xattr), 1 }, -- { "no_xattr", -- offsetof(struct lo_data, xattr), 0 }, -- { "timeout=%lf", -- offsetof(struct lo_data, timeout), 0 }, -- { "timeout=", -- offsetof(struct lo_data, timeout_set), 1 }, -- { "cache=never", -- offsetof(struct lo_data, cache), CACHE_NEVER }, -- { "cache=auto", -- offsetof(struct lo_data, cache), CACHE_NORMAL }, -- { "cache=always", -- offsetof(struct lo_data, cache), CACHE_ALWAYS }, -- -- FUSE_OPT_END -+ { "writeback", offsetof(struct lo_data, writeback), 1 }, -+ { "no_writeback", offsetof(struct lo_data, writeback), 0 }, -+ { "source=%s", offsetof(struct lo_data, source), 0 }, -+ { "flock", offsetof(struct lo_data, flock), 1 }, -+ { "no_flock", offsetof(struct lo_data, flock), 0 }, -+ { "xattr", offsetof(struct lo_data, xattr), 1 }, -+ { "no_xattr", offsetof(struct lo_data, xattr), 0 }, -+ { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, -+ { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, -+ { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, -+ { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, -+ { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, -+ -+ FUSE_OPT_END - }; - - static struct lo_data *lo_data(fuse_req_t req) - { -- return (struct lo_data *) fuse_req_userdata(req); -+ return (struct lo_data *)fuse_req_userdata(req); - } - - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { -- if (ino == FUSE_ROOT_ID) -- return &lo_data(req)->root; -- else -- return (struct lo_inode *) (uintptr_t) ino; -+ if (ino == FUSE_ROOT_ID) { -+ return &lo_data(req)->root; -+ } else { -+ return (struct lo_inode *)(uintptr_t)ino; -+ } - } - - static int lo_fd(fuse_req_t req, fuse_ino_t ino) - { -- return lo_inode(req, ino)->fd; -+ return lo_inode(req, ino)->fd; - } - - static bool lo_debug(fuse_req_t req) - { -- return lo_data(req)->debug != 0; -+ return lo_data(req)->debug != 0; - } - --static void lo_init(void *userdata, -- struct fuse_conn_info *conn) -+static void lo_init(void *userdata, struct fuse_conn_info *conn) - { -- struct lo_data *lo = (struct lo_data*) userdata; -- -- if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) -- conn->want |= FUSE_CAP_EXPORT_SUPPORT; -- -- if (lo->writeback && -- conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -- if (lo->debug) -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -- conn->want |= FUSE_CAP_WRITEBACK_CACHE; -- } -- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -- if (lo->debug) -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -- conn->want |= FUSE_CAP_FLOCK_LOCKS; -- } -+ struct lo_data *lo = (struct lo_data *)userdata; -+ -+ if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { -+ conn->want |= FUSE_CAP_EXPORT_SUPPORT; -+ } -+ -+ if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -+ if (lo->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -+ } -+ conn->want |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -+ if (lo->debug) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -+ } -+ conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ } - } - - static void lo_getattr(fuse_req_t req, fuse_ino_t ino, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- int res; -- struct stat buf; -- struct lo_data *lo = lo_data(req); -+ int res; -+ struct stat buf; -+ struct lo_data *lo = lo_data(req); - -- (void) fi; -+ (void)fi; - -- res = fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- return (void) fuse_reply_err(req, errno); -+ res = -+ fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } - -- fuse_reply_attr(req, &buf, lo->timeout); -+ fuse_reply_attr(req, &buf, lo->timeout); - } - - static int utimensat_empty_nofollow(struct lo_inode *inode, -- const struct timespec *tv) -+ const struct timespec *tv) - { -- int res; -- char procname[64]; -- -- if (inode->is_symlink) { -- res = utimensat(inode->fd, "", tv, -- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1 && errno == EINVAL) { -- /* Sorry, no race free way to set times on symlink. */ -- errno = EPERM; -- } -- return res; -- } -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -- -- return utimensat(AT_FDCWD, procname, tv, 0); -+ int res; -+ char procname[64]; -+ -+ if (inode->is_symlink) { -+ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1 && errno == EINVAL) { -+ /* Sorry, no race free way to set times on symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ return utimensat(AT_FDCWD, procname, tv, 0); - } - - static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, -- int valid, struct fuse_file_info *fi) -+ int valid, struct fuse_file_info *fi) - { -- int saverr; -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- int ifd = inode->fd; -- int res; -- -- if (valid & FUSE_SET_ATTR_MODE) { -- if (fi) { -- res = fchmod(fi->fh, attr->st_mode); -- } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = chmod(procname, attr->st_mode); -- } -- if (res == -1) -- goto out_err; -- } -- if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { -- uid_t uid = (valid & FUSE_SET_ATTR_UID) ? -- attr->st_uid : (uid_t) -1; -- gid_t gid = (valid & FUSE_SET_ATTR_GID) ? -- attr->st_gid : (gid_t) -1; -- -- res = fchownat(ifd, "", uid, gid, -- AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- goto out_err; -- } -- if (valid & FUSE_SET_ATTR_SIZE) { -- if (fi) { -- res = ftruncate(fi->fh, attr->st_size); -- } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = truncate(procname, attr->st_size); -- } -- if (res == -1) -- goto out_err; -- } -- if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { -- struct timespec tv[2]; -- -- tv[0].tv_sec = 0; -- tv[1].tv_sec = 0; -- tv[0].tv_nsec = UTIME_OMIT; -- tv[1].tv_nsec = UTIME_OMIT; -- -- if (valid & FUSE_SET_ATTR_ATIME_NOW) -- tv[0].tv_nsec = UTIME_NOW; -- else if (valid & FUSE_SET_ATTR_ATIME) -- tv[0] = attr->st_atim; -- -- if (valid & FUSE_SET_ATTR_MTIME_NOW) -- tv[1].tv_nsec = UTIME_NOW; -- else if (valid & FUSE_SET_ATTR_MTIME) -- tv[1] = attr->st_mtim; -- -- if (fi) -- res = futimens(fi->fh, tv); -- else -- res = utimensat_empty_nofollow(inode, tv); -- if (res == -1) -- goto out_err; -- } -- -- return lo_getattr(req, ino, fi); -+ int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ int ifd = inode->fd; -+ int res; -+ -+ if (valid & FUSE_SET_ATTR_MODE) { -+ if (fi) { -+ res = fchmod(fi->fh, attr->st_mode); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = chmod(procname, attr->st_mode); -+ } -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { -+ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; -+ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; -+ -+ res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ if (valid & FUSE_SET_ATTR_SIZE) { -+ if (fi) { -+ res = ftruncate(fi->fh, attr->st_size); -+ } else { -+ sprintf(procname, "/proc/self/fd/%i", ifd); -+ res = truncate(procname, attr->st_size); -+ } -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { -+ struct timespec tv[2]; -+ -+ tv[0].tv_sec = 0; -+ tv[1].tv_sec = 0; -+ tv[0].tv_nsec = UTIME_OMIT; -+ tv[1].tv_nsec = UTIME_OMIT; -+ -+ if (valid & FUSE_SET_ATTR_ATIME_NOW) { -+ tv[0].tv_nsec = UTIME_NOW; -+ } else if (valid & FUSE_SET_ATTR_ATIME) { -+ tv[0] = attr->st_atim; -+ } -+ -+ if (valid & FUSE_SET_ATTR_MTIME_NOW) { -+ tv[1].tv_nsec = UTIME_NOW; -+ } else if (valid & FUSE_SET_ATTR_MTIME) { -+ tv[1] = attr->st_mtim; -+ } -+ -+ if (fi) { -+ res = futimens(fi->fh, tv); -+ } else { -+ res = utimensat_empty_nofollow(inode, tv); -+ } -+ if (res == -1) { -+ goto out_err; -+ } -+ } -+ -+ return lo_getattr(req, ino, fi); - - out_err: -- saverr = errno; -- fuse_reply_err(req, saverr); -+ saverr = errno; -+ fuse_reply_err(req, saverr); - } - - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - { -- struct lo_inode *p; -- struct lo_inode *ret = NULL; -- -- pthread_mutex_lock(&lo->mutex); -- for (p = lo->root.next; p != &lo->root; p = p->next) { -- if (p->ino == st->st_ino && p->dev == st->st_dev) { -- assert(p->refcount > 0); -- ret = p; -- ret->refcount++; -- break; -- } -- } -- pthread_mutex_unlock(&lo->mutex); -- return ret; -+ struct lo_inode *p; -+ struct lo_inode *ret = NULL; -+ -+ pthread_mutex_lock(&lo->mutex); -+ for (p = lo->root.next; p != &lo->root; p = p->next) { -+ if (p->ino == st->st_ino && p->dev == st->st_dev) { -+ assert(p->refcount > 0); -+ ret = p; -+ ret->refcount++; -+ break; -+ } -+ } -+ pthread_mutex_unlock(&lo->mutex); -+ return ret; - } - - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -- struct fuse_entry_param *e) -+ struct fuse_entry_param *e) - { -- int newfd; -- int res; -- int saverr; -- struct lo_data *lo = lo_data(req); -- struct lo_inode *inode; -- -- memset(e, 0, sizeof(*e)); -- e->attr_timeout = lo->timeout; -- e->entry_timeout = lo->timeout; -- -- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -- if (newfd == -1) -- goto out_err; -- -- res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- goto out_err; -- -- inode = lo_find(lo_data(req), &e->attr); -- if (inode) { -- close(newfd); -- newfd = -1; -- } else { -- struct lo_inode *prev, *next; -- -- saverr = ENOMEM; -- inode = calloc(1, sizeof(struct lo_inode)); -- if (!inode) -- goto out_err; -- -- inode->is_symlink = S_ISLNK(e->attr.st_mode); -- inode->refcount = 1; -- inode->fd = newfd; -- inode->ino = e->attr.st_ino; -- inode->dev = e->attr.st_dev; -- -- pthread_mutex_lock(&lo->mutex); -- prev = &lo->root; -- next = prev->next; -- next->prev = inode; -- inode->next = next; -- inode->prev = prev; -- prev->next = inode; -- pthread_mutex_unlock(&lo->mutex); -- } -- e->ino = (uintptr_t) inode; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long) parent, name, (unsigned long long) e->ino); -- -- return 0; -+ int newfd; -+ int res; -+ int saverr; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ -+ memset(e, 0, sizeof(*e)); -+ e->attr_timeout = lo->timeout; -+ e->entry_timeout = lo->timeout; -+ -+ newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -+ if (newfd == -1) { -+ goto out_err; -+ } -+ -+ res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ goto out_err; -+ } -+ -+ inode = lo_find(lo_data(req), &e->attr); -+ if (inode) { -+ close(newfd); -+ newfd = -1; -+ } else { -+ struct lo_inode *prev, *next; -+ -+ saverr = ENOMEM; -+ inode = calloc(1, sizeof(struct lo_inode)); -+ if (!inode) { -+ goto out_err; -+ } -+ -+ inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ inode->refcount = 1; -+ inode->fd = newfd; -+ inode->ino = e->attr.st_ino; -+ inode->dev = e->attr.st_dev; -+ -+ pthread_mutex_lock(&lo->mutex); -+ prev = &lo->root; -+ next = prev->next; -+ next->prev = inode; -+ inode->next = next; -+ inode->prev = prev; -+ prev->next = inode; -+ pthread_mutex_unlock(&lo->mutex); -+ } -+ e->ino = (uintptr_t)inode; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long)parent, name, (unsigned long long)e->ino); -+ } -+ -+ return 0; - - out_err: -- saverr = errno; -- if (newfd != -1) -- close(newfd); -- return saverr; -+ saverr = errno; -+ if (newfd != -1) { -+ close(newfd); -+ } -+ return saverr; - } - - static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - { -- struct fuse_entry_param e; -- int err; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- -- err = lo_do_lookup(req, parent, name, &e); -- if (err) -- fuse_reply_err(req, err); -- else -- fuse_reply_entry(req, &e); -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ } -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_entry(req, &e); -+ } - } - - static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, -- const char *name, mode_t mode, dev_t rdev, -- const char *link) -+ const char *name, mode_t mode, dev_t rdev, -+ const char *link) - { -- int res; -- int saverr; -- struct lo_inode *dir = lo_inode(req, parent); -- struct fuse_entry_param e; -+ int res; -+ int saverr; -+ struct lo_inode *dir = lo_inode(req, parent); -+ struct fuse_entry_param e; - -- saverr = ENOMEM; -+ saverr = ENOMEM; - -- res = mknod_wrapper(dir->fd, name, link, mode, rdev); -+ res = mknod_wrapper(dir->fd, name, link, mode, rdev); - -- saverr = errno; -- if (res == -1) -- goto out; -+ saverr = errno; -+ if (res == -1) { -+ goto out; -+ } - -- saverr = lo_do_lookup(req, parent, name, &e); -- if (saverr) -- goto out; -+ saverr = lo_do_lookup(req, parent, name, &e); -+ if (saverr) { -+ goto out; -+ } - -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long) parent, name, (unsigned long long) e.ino); -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long)parent, name, (unsigned long long)e.ino); -+ } - -- fuse_reply_entry(req, &e); -- return; -+ fuse_reply_entry(req, &e); -+ return; - - out: -- fuse_reply_err(req, saverr); -+ fuse_reply_err(req, saverr); - } - --static void lo_mknod(fuse_req_t req, fuse_ino_t parent, -- const char *name, mode_t mode, dev_t rdev) -+static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, dev_t rdev) - { -- lo_mknod_symlink(req, parent, name, mode, rdev, NULL); -+ lo_mknod_symlink(req, parent, name, mode, rdev, NULL); - } - - static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode) -+ mode_t mode) - { -- lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); -+ lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); - } - --static void lo_symlink(fuse_req_t req, const char *link, -- fuse_ino_t parent, const char *name) -+static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, -+ const char *name) - { -- lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); -+ lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); - } - - static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, -- const char *name) -+ const char *name) - { -- int res; -- char procname[64]; -+ int res; -+ char procname[64]; - -- if (inode->is_symlink) { -- res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); -- if (res == -1 && (errno == ENOENT || errno == EINVAL)) { -- /* Sorry, no race free way to hard-link a symlink. */ -- errno = EPERM; -- } -- return res; -- } -+ if (inode->is_symlink) { -+ res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); -+ if (res == -1 && (errno == ENOENT || errno == EINVAL)) { -+ /* Sorry, no race free way to hard-link a symlink. */ -+ errno = EPERM; -+ } -+ return res; -+ } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); - -- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); -+ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); - } - - static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, -- const char *name) -+ const char *name) - { -- int res; -- struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -- struct fuse_entry_param e; -- int saverr; -- -- memset(&e, 0, sizeof(struct fuse_entry_param)); -- e.attr_timeout = lo->timeout; -- e.entry_timeout = lo->timeout; -- -- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -- if (res == -1) -- goto out_err; -- -- res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -- if (res == -1) -- goto out_err; -- -- pthread_mutex_lock(&lo->mutex); -- inode->refcount++; -- pthread_mutex_unlock(&lo->mutex); -- e.ino = (uintptr_t) inode; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long) parent, name, -- (unsigned long long) e.ino); -- -- fuse_reply_entry(req, &e); -- return; -+ int res; -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ struct fuse_entry_param e; -+ int saverr; -+ -+ memset(&e, 0, sizeof(struct fuse_entry_param)); -+ e.attr_timeout = lo->timeout; -+ e.entry_timeout = lo->timeout; -+ -+ res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -+ if (res == -1) { -+ goto out_err; -+ } -+ -+ res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ goto out_err; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ inode->refcount++; -+ pthread_mutex_unlock(&lo->mutex); -+ e.ino = (uintptr_t)inode; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -+ (unsigned long long)parent, name, (unsigned long long)e.ino); -+ } -+ -+ fuse_reply_entry(req, &e); -+ return; - - out_err: -- saverr = errno; -- fuse_reply_err(req, saverr); -+ saverr = errno; -+ fuse_reply_err(req, saverr); - } - - static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - { -- int res; -+ int res; - -- res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); -+ res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -- fuse_ino_t newparent, const char *newname, -- unsigned int flags) -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags) - { -- int res; -+ int res; - -- if (flags) { -- fuse_reply_err(req, EINVAL); -- return; -- } -+ if (flags) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - -- res = renameat(lo_fd(req, parent), name, -- lo_fd(req, newparent), newname); -+ res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - { -- int res; -+ int res; - -- res = unlinkat(lo_fd(req, parent), name, 0); -+ res = unlinkat(lo_fd(req, parent), name, 0); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - { -- if (!inode) -- return; -- -- pthread_mutex_lock(&lo->mutex); -- assert(inode->refcount >= n); -- inode->refcount -= n; -- if (!inode->refcount) { -- struct lo_inode *prev, *next; -- -- prev = inode->prev; -- next = inode->next; -- next->prev = prev; -- prev->next = next; -- -- pthread_mutex_unlock(&lo->mutex); -- close(inode->fd); -- free(inode); -- -- } else { -- pthread_mutex_unlock(&lo->mutex); -- } -+ if (!inode) { -+ return; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ assert(inode->refcount >= n); -+ inode->refcount -= n; -+ if (!inode->refcount) { -+ struct lo_inode *prev, *next; -+ -+ prev = inode->prev; -+ next = inode->next; -+ next->prev = prev; -+ prev->next = next; -+ -+ pthread_mutex_unlock(&lo->mutex); -+ close(inode->fd); -+ free(inode); -+ -+ } else { -+ pthread_mutex_unlock(&lo->mutex); -+ } - } - - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { -- struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -- (unsigned long long) ino, -- (unsigned long long) inode->refcount, -- (unsigned long long) nlookup); -- } -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -+ (unsigned long long)ino, (unsigned long long)inode->refcount, -+ (unsigned long long)nlookup); -+ } - -- unref_inode(lo, inode, nlookup); -+ unref_inode(lo, inode, nlookup); - } - - static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { -- lo_forget_one(req, ino, nlookup); -- fuse_reply_none(req); -+ lo_forget_one(req, ino, nlookup); -+ fuse_reply_none(req); - } - - static void lo_forget_multi(fuse_req_t req, size_t count, -- struct fuse_forget_data *forgets) -+ struct fuse_forget_data *forgets) - { -- int i; -+ int i; - -- for (i = 0; i < count; i++) -- lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); -- fuse_reply_none(req); -+ for (i = 0; i < count; i++) { -+ lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); -+ } -+ fuse_reply_none(req); - } - - static void lo_readlink(fuse_req_t req, fuse_ino_t ino) - { -- char buf[PATH_MAX + 1]; -- int res; -+ char buf[PATH_MAX + 1]; -+ int res; - -- res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); -- if (res == -1) -- return (void) fuse_reply_err(req, errno); -+ res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); -+ if (res == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } - -- if (res == sizeof(buf)) -- return (void) fuse_reply_err(req, ENAMETOOLONG); -+ if (res == sizeof(buf)) { -+ return (void)fuse_reply_err(req, ENAMETOOLONG); -+ } - -- buf[res] = '\0'; -+ buf[res] = '\0'; - -- fuse_reply_readlink(req, buf); -+ fuse_reply_readlink(req, buf); - } - - struct lo_dirp { -- DIR *dp; -- struct dirent *entry; -- off_t offset; -+ DIR *dp; -+ struct dirent *entry; -+ off_t offset; - }; - - static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) - { -- return (struct lo_dirp *) (uintptr_t) fi->fh; -+ return (struct lo_dirp *)(uintptr_t)fi->fh; - } - --static void lo_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+static void lo_opendir(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) - { -- int error = ENOMEM; -- struct lo_data *lo = lo_data(req); -- struct lo_dirp *d; -- int fd; -- -- d = calloc(1, sizeof(struct lo_dirp)); -- if (d == NULL) -- goto out_err; -- -- fd = openat(lo_fd(req, ino), ".", O_RDONLY); -- if (fd == -1) -- goto out_errno; -- -- d->dp = fdopendir(fd); -- if (d->dp == NULL) -- goto out_errno; -- -- d->offset = 0; -- d->entry = NULL; -- -- fi->fh = (uintptr_t) d; -- if (lo->cache == CACHE_ALWAYS) -- fi->keep_cache = 1; -- fuse_reply_open(req, fi); -- return; -+ int error = ENOMEM; -+ struct lo_data *lo = lo_data(req); -+ struct lo_dirp *d; -+ int fd; -+ -+ d = calloc(1, sizeof(struct lo_dirp)); -+ if (d == NULL) { -+ goto out_err; -+ } -+ -+ fd = openat(lo_fd(req, ino), ".", O_RDONLY); -+ if (fd == -1) { -+ goto out_errno; -+ } -+ -+ d->dp = fdopendir(fd); -+ if (d->dp == NULL) { -+ goto out_errno; -+ } -+ -+ d->offset = 0; -+ d->entry = NULL; -+ -+ fi->fh = (uintptr_t)d; -+ if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ fuse_reply_open(req, fi); -+ return; - - out_errno: -- error = errno; -+ error = errno; - out_err: -- if (d) { -- if (fd != -1) -- close(fd); -- free(d); -- } -- fuse_reply_err(req, error); -+ if (d) { -+ if (fd != -1) { -+ close(fd); -+ } -+ free(d); -+ } -+ fuse_reply_err(req, error); - } - - static int is_dot_or_dotdot(const char *name) - { -- return name[0] == '.' && (name[1] == '\0' || -- (name[1] == '.' && name[2] == '\0')); -+ return name[0] == '.' && -+ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); - } - - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi, int plus) -+ off_t offset, struct fuse_file_info *fi, int plus) - { -- struct lo_dirp *d = lo_dirp(fi); -- char *buf; -- char *p; -- size_t rem = size; -- int err; -- -- (void) ino; -- -- buf = calloc(1, size); -- if (!buf) { -- err = ENOMEM; -- goto error; -- } -- p = buf; -- -- if (offset != d->offset) { -- seekdir(d->dp, offset); -- d->entry = NULL; -- d->offset = offset; -- } -- while (1) { -- size_t entsize; -- off_t nextoff; -- const char *name; -- -- if (!d->entry) { -- errno = 0; -- d->entry = readdir(d->dp); -- if (!d->entry) { -- if (errno) { // Error -- err = errno; -- goto error; -- } else { // End of stream -- break; -- } -- } -- } -- nextoff = d->entry->d_off; -- name = d->entry->d_name; -- fuse_ino_t entry_ino = 0; -- if (plus) { -- struct fuse_entry_param e; -- if (is_dot_or_dotdot(name)) { -- e = (struct fuse_entry_param) { -- .attr.st_ino = d->entry->d_ino, -- .attr.st_mode = d->entry->d_type << 12, -- }; -- } else { -- err = lo_do_lookup(req, ino, name, &e); -- if (err) -- goto error; -- entry_ino = e.ino; -- } -- -- entsize = fuse_add_direntry_plus(req, p, rem, name, -- &e, nextoff); -- } else { -- struct stat st = { -- .st_ino = d->entry->d_ino, -- .st_mode = d->entry->d_type << 12, -- }; -- entsize = fuse_add_direntry(req, p, rem, name, -- &st, nextoff); -- } -- if (entsize > rem) { -- if (entry_ino != 0) -- lo_forget_one(req, entry_ino, 1); -- break; -- } -- -- p += entsize; -- rem -= entsize; -- -- d->entry = NULL; -- d->offset = nextoff; -- } -+ struct lo_dirp *d = lo_dirp(fi); -+ char *buf; -+ char *p; -+ size_t rem = size; -+ int err; -+ -+ (void)ino; -+ -+ buf = calloc(1, size); -+ if (!buf) { -+ err = ENOMEM; -+ goto error; -+ } -+ p = buf; -+ -+ if (offset != d->offset) { -+ seekdir(d->dp, offset); -+ d->entry = NULL; -+ d->offset = offset; -+ } -+ while (1) { -+ size_t entsize; -+ off_t nextoff; -+ const char *name; -+ -+ if (!d->entry) { -+ errno = 0; -+ d->entry = readdir(d->dp); -+ if (!d->entry) { -+ if (errno) { /* Error */ -+ err = errno; -+ goto error; -+ } else { /* End of stream */ -+ break; -+ } -+ } -+ } -+ nextoff = d->entry->d_off; -+ name = d->entry->d_name; -+ fuse_ino_t entry_ino = 0; -+ if (plus) { -+ struct fuse_entry_param e; -+ if (is_dot_or_dotdot(name)) { -+ e = (struct fuse_entry_param){ -+ .attr.st_ino = d->entry->d_ino, -+ .attr.st_mode = d->entry->d_type << 12, -+ }; -+ } else { -+ err = lo_do_lookup(req, ino, name, &e); -+ if (err) { -+ goto error; -+ } -+ entry_ino = e.ino; -+ } -+ -+ entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); -+ } else { -+ struct stat st = { -+ .st_ino = d->entry->d_ino, -+ .st_mode = d->entry->d_type << 12, -+ }; -+ entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); -+ } -+ if (entsize > rem) { -+ if (entry_ino != 0) { -+ lo_forget_one(req, entry_ino, 1); -+ } -+ break; -+ } -+ -+ p += entsize; -+ rem -= entsize; -+ -+ d->entry = NULL; -+ d->offset = nextoff; -+ } - - err = 0; - error: -- // If there's an error, we can only signal it if we haven't stored -- // any entries yet - otherwise we'd end up with wrong lookup -- // counts for the entries that are already in the buffer. So we -- // return what we've collected until that point. -- if (err && rem == size) -- fuse_reply_err(req, err); -- else -- fuse_reply_buf(req, buf, size - rem); -+ /* -+ * If there's an error, we can only signal it if we haven't stored -+ * any entries yet - otherwise we'd end up with wrong lookup -+ * counts for the entries that are already in the buffer. So we -+ * return what we've collected until that point. -+ */ -+ if (err && rem == size) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_buf(req, buf, size - rem); -+ } - free(buf); - } - - static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi) -+ off_t offset, struct fuse_file_info *fi) - { -- lo_do_readdir(req, ino, size, offset, fi, 0); -+ lo_do_readdir(req, ino, size, offset, fi, 0); - } - - static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi) -+ off_t offset, struct fuse_file_info *fi) - { -- lo_do_readdir(req, ino, size, offset, fi, 1); -+ lo_do_readdir(req, ino, size, offset, fi, 1); - } - --static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) - { -- struct lo_dirp *d = lo_dirp(fi); -- (void) ino; -- closedir(d->dp); -- free(d); -- fuse_reply_err(req, 0); -+ struct lo_dirp *d = lo_dirp(fi); -+ (void)ino; -+ closedir(d->dp); -+ free(d); -+ fuse_reply_err(req, 0); - } - - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, -- mode_t mode, struct fuse_file_info *fi) -+ mode_t mode, struct fuse_file_info *fi) - { -- int fd; -- struct lo_data *lo = lo_data(req); -- struct fuse_entry_param e; -- int err; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- -- fd = openat(lo_fd(req, parent), name, -- (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); -- if (fd == -1) -- return (void) fuse_reply_err(req, errno); -- -- fi->fh = fd; -- if (lo->cache == CACHE_NEVER) -- fi->direct_io = 1; -- else if (lo->cache == CACHE_ALWAYS) -- fi->keep_cache = 1; -- -- err = lo_do_lookup(req, parent, name, &e); -- if (err) -- fuse_reply_err(req, err); -- else -- fuse_reply_create(req, &e, fi); -+ int fd; -+ struct lo_data *lo = lo_data(req); -+ struct fuse_entry_param e; -+ int err; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -+ parent, name); -+ } -+ -+ fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, -+ mode); -+ if (fd == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) { -+ fi->direct_io = 1; -+ } else if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ -+ err = lo_do_lookup(req, parent, name, &e); -+ if (err) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_create(req, &e, fi); -+ } - } - - static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- int res; -- int fd = dirfd(lo_dirp(fi)->dp); -- (void) ino; -- if (datasync) -- res = fdatasync(fd); -- else -- res = fsync(fd); -- fuse_reply_err(req, res == -1 ? errno : 0); -+ int res; -+ int fd = dirfd(lo_dirp(fi)->dp); -+ (void)ino; -+ if (datasync) { -+ res = fdatasync(fd); -+ } else { -+ res = fsync(fd); -+ } -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { -- int fd; -- char buf[64]; -- struct lo_data *lo = lo_data(req); -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", -- ino, fi->flags); -- -- /* With writeback cache, kernel may send read requests even -- when userspace opened write-only */ -- if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -- fi->flags &= ~O_ACCMODE; -- fi->flags |= O_RDWR; -- } -- -- /* With writeback cache, O_APPEND is handled by the kernel. -- This breaks atomicity (since the file may change in the -- underlying filesystem, so that the kernel's idea of the -- end of the file isn't accurate anymore). In this example, -- we just accept that. A more rigorous filesystem may want -- to return an error here */ -- if (lo->writeback && (fi->flags & O_APPEND)) -- fi->flags &= ~O_APPEND; -- -- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -- fd = open(buf, fi->flags & ~O_NOFOLLOW); -- if (fd == -1) -- return (void) fuse_reply_err(req, errno); -- -- fi->fh = fd; -- if (lo->cache == CACHE_NEVER) -- fi->direct_io = 1; -- else if (lo->cache == CACHE_ALWAYS) -- fi->keep_cache = 1; -- fuse_reply_open(req, fi); -+ int fd; -+ char buf[64]; -+ struct lo_data *lo = lo_data(req); -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, -+ fi->flags); -+ } -+ -+ /* -+ * With writeback cache, kernel may send read requests even -+ * when userspace opened write-only -+ */ -+ if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { -+ fi->flags &= ~O_ACCMODE; -+ fi->flags |= O_RDWR; -+ } -+ -+ /* -+ * With writeback cache, O_APPEND is handled by the kernel. -+ * This breaks atomicity (since the file may change in the -+ * underlying filesystem, so that the kernel's idea of the -+ * end of the file isn't accurate anymore). In this example, -+ * we just accept that. A more rigorous filesystem may want -+ * to return an error here -+ */ -+ if (lo->writeback && (fi->flags & O_APPEND)) { -+ fi->flags &= ~O_APPEND; -+ } -+ -+ sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ fd = open(buf, fi->flags & ~O_NOFOLLOW); -+ if (fd == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ -+ fi->fh = fd; -+ if (lo->cache == CACHE_NEVER) { -+ fi->direct_io = 1; -+ } else if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ fuse_reply_open(req, fi); - } - --static void lo_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) -+static void lo_release(fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi) - { -- (void) ino; -+ (void)ino; - -- close(fi->fh); -- fuse_reply_err(req, 0); -+ close(fi->fh); -+ fuse_reply_err(req, 0); - } - - static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { -- int res; -- (void) ino; -- res = close(dup(fi->fh)); -- fuse_reply_err(req, res == -1 ? errno : 0); -+ int res; -+ (void)ino; -+ res = close(dup(fi->fh)); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- int res; -- (void) ino; -- if (datasync) -- res = fdatasync(fi->fh); -- else -- res = fsync(fi->fh); -- fuse_reply_err(req, res == -1 ? errno : 0); -+ int res; -+ (void)ino; -+ if (datasync) { -+ res = fdatasync(fi->fh); -+ } else { -+ res = fsync(fi->fh); -+ } -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - --static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, -- off_t offset, struct fuse_file_info *fi) -+static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, -+ struct fuse_file_info *fi) - { -- struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); -+ struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); - -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_read(ino=%" PRIu64 ", size=%zd, " -- "off=%lu)\n", ino, size, (unsigned long) offset); -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_read(ino=%" PRIu64 ", size=%zd, " -+ "off=%lu)\n", -+ ino, size, (unsigned long)offset); -+ } - -- buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- buf.buf[0].fd = fi->fh; -- buf.buf[0].pos = offset; -+ buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ buf.buf[0].fd = fi->fh; -+ buf.buf[0].pos = offset; - -- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); -+ fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); - } - - static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, -- struct fuse_bufvec *in_buf, off_t off, -- struct fuse_file_info *fi) -+ struct fuse_bufvec *in_buf, off_t off, -+ struct fuse_file_info *fi) - { -- (void) ino; -- ssize_t res; -- struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -- -- out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- out_buf.buf[0].fd = fi->fh; -- out_buf.buf[0].pos = off; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", -- ino, out_buf.buf[0].size, (unsigned long) off); -- -- res = fuse_buf_copy(&out_buf, in_buf, 0); -- if(res < 0) -- fuse_reply_err(req, -res); -- else -- fuse_reply_write(req, (size_t) res); -+ (void)ino; -+ ssize_t res; -+ struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); -+ -+ out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -+ out_buf.buf[0].fd = fi->fh; -+ out_buf.buf[0].pos = off; -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, -+ out_buf.buf[0].size, (unsigned long)off); -+ } -+ -+ res = fuse_buf_copy(&out_buf, in_buf, 0); -+ if (res < 0) { -+ fuse_reply_err(req, -res); -+ } else { -+ fuse_reply_write(req, (size_t)res); -+ } - } - - static void lo_statfs(fuse_req_t req, fuse_ino_t ino) - { -- int res; -- struct statvfs stbuf; -- -- res = fstatvfs(lo_fd(req, ino), &stbuf); -- if (res == -1) -- fuse_reply_err(req, errno); -- else -- fuse_reply_statfs(req, &stbuf); -+ int res; -+ struct statvfs stbuf; -+ -+ res = fstatvfs(lo_fd(req, ino), &stbuf); -+ if (res == -1) { -+ fuse_reply_err(req, errno); -+ } else { -+ fuse_reply_statfs(req, &stbuf); -+ } - } - --static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, -- off_t offset, off_t length, struct fuse_file_info *fi) -+static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, -+ off_t length, struct fuse_file_info *fi) - { -- int err = EOPNOTSUPP; -- (void) ino; -+ int err = EOPNOTSUPP; -+ (void)ino; - - #ifdef HAVE_FALLOCATE -- err = fallocate(fi->fh, mode, offset, length); -- if (err < 0) -- err = errno; -+ err = fallocate(fi->fh, mode, offset, length); -+ if (err < 0) { -+ err = errno; -+ } - - #elif defined(HAVE_POSIX_FALLOCATE) -- if (mode) { -- fuse_reply_err(req, EOPNOTSUPP); -- return; -- } -+ if (mode) { -+ fuse_reply_err(req, EOPNOTSUPP); -+ return; -+ } - -- err = posix_fallocate(fi->fh, offset, length); -+ err = posix_fallocate(fi->fh, offset, length); - #endif - -- fuse_reply_err(req, err); -+ fuse_reply_err(req, err); - } - - static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -- int op) -+ int op) - { -- int res; -- (void) ino; -+ int res; -+ (void)ino; - -- res = flock(fi->fh, op); -+ res = flock(fi->fh, op); - -- fuse_reply_err(req, res == -1 ? errno : 0); -+ fuse_reply_err(req, res == -1 ? errno : 0); - } - - static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -- size_t size) -+ size_t size) - { -- char *value = NULL; -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -- -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -- -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", -- ino, name, size); -- } -- -- if (inode->is_symlink) { -- /* Sorry, no race free way to getxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -- -- if (size) { -- value = malloc(size); -- if (!value) -- goto out_err; -- -- ret = getxattr(procname, name, value, size); -- if (ret == -1) -- goto out_err; -- saverr = 0; -- if (ret == 0) -- goto out; -- -- fuse_reply_buf(req, value, ret); -- } else { -- ret = getxattr(procname, name, NULL, 0); -- if (ret == -1) -- goto out_err; -- -- fuse_reply_xattr(req, ret); -- } -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, -+ size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to getxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ -+ ret = getxattr(procname, name, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } -+ saverr = 0; -+ if (ret == 0) { -+ goto out; -+ } -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = getxattr(procname, name, NULL, 0); -+ if (ret == -1) { -+ goto out_err; -+ } -+ -+ fuse_reply_xattr(req, ret); -+ } - out_free: -- free(value); -- return; -+ free(value); -+ return; - - out_err: -- saverr = errno; -+ saverr = errno; - out: -- fuse_reply_err(req, saverr); -- goto out_free; -+ fuse_reply_err(req, saverr); -+ goto out_free; - } - - static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - { -- char *value = NULL; -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -- -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -- -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -- ino, size); -- } -- -- if (inode->is_symlink) { -- /* Sorry, no race free way to listxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -- -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -- -- if (size) { -- value = malloc(size); -- if (!value) -- goto out_err; -- -- ret = listxattr(procname, value, size); -- if (ret == -1) -- goto out_err; -- saverr = 0; -- if (ret == 0) -- goto out; -- -- fuse_reply_buf(req, value, ret); -- } else { -- ret = listxattr(procname, NULL, 0); -- if (ret == -1) -- goto out_err; -- -- fuse_reply_xattr(req, ret); -- } -+ char *value = NULL; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; -+ -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } -+ -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -+ ino, size); -+ } -+ -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to listxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } -+ -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ -+ ret = listxattr(procname, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } -+ saverr = 0; -+ if (ret == 0) { -+ goto out; -+ } -+ -+ fuse_reply_buf(req, value, ret); -+ } else { -+ ret = listxattr(procname, NULL, 0); -+ if (ret == -1) { -+ goto out_err; -+ } -+ -+ fuse_reply_xattr(req, ret); -+ } - out_free: -- free(value); -- return; -+ free(value); -+ return; - - out_err: -- saverr = errno; -+ saverr = errno; - out: -- fuse_reply_err(req, saverr); -- goto out_free; -+ fuse_reply_err(req, saverr); -+ goto out_free; - } - - static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, -- const char *value, size_t size, int flags) -+ const char *value, size_t size, int flags) - { -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; - -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -- ino, name, value, size); -- } -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -+ ino, name, value, size); -+ } - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); - -- ret = setxattr(procname, name, value, size, flags); -- saverr = ret == -1 ? errno : 0; -+ ret = setxattr(procname, name, value, size, flags); -+ saverr = ret == -1 ? errno : 0; - - out: -- fuse_reply_err(req, saverr); -+ fuse_reply_err(req, saverr); - } - - static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - { -- char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- ssize_t ret; -- int saverr; -+ char procname[64]; -+ struct lo_inode *inode = lo_inode(req, ino); -+ ssize_t ret; -+ int saverr; - -- saverr = ENOSYS; -- if (!lo_data(req)->xattr) -- goto out; -+ saverr = ENOSYS; -+ if (!lo_data(req)->xattr) { -+ goto out; -+ } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -- ino, name); -- } -+ if (lo_debug(req)) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -+ ino, name); -+ } - -- if (inode->is_symlink) { -- /* Sorry, no race free way to setxattr on symlink. */ -- saverr = EPERM; -- goto out; -- } -+ if (inode->is_symlink) { -+ /* Sorry, no race free way to setxattr on symlink. */ -+ saverr = EPERM; -+ goto out; -+ } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); - -- ret = removexattr(procname, name); -- saverr = ret == -1 ? errno : 0; -+ ret = removexattr(procname, name); -+ saverr = ret == -1 ? errno : 0; - - out: -- fuse_reply_err(req, saverr); -+ fuse_reply_err(req, saverr); - } - - #ifdef HAVE_COPY_FILE_RANGE - static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, -- struct fuse_file_info *fi_in, -- fuse_ino_t ino_out, off_t off_out, -- struct fuse_file_info *fi_out, size_t len, -- int flags) -+ struct fuse_file_info *fi_in, fuse_ino_t ino_out, -+ off_t off_out, struct fuse_file_info *fi_out, -+ size_t len, int flags) - { -- ssize_t res; -- -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, size=%zd, flags=0x%x)\n", -- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, -- len, flags); -- -- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, -- flags); -- if (res < 0) -- fuse_reply_err(req, -errno); -- else -- fuse_reply_write(req, res); -+ ssize_t res; -+ -+ if (lo_debug(req)) -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, ino=%" PRIu64 "/fd=%lu, " -+ "off=%lu, size=%zd, flags=0x%x)\n", -+ ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, -+ flags); -+ -+ res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); -+ if (res < 0) { -+ fuse_reply_err(req, -errno); -+ } else { -+ fuse_reply_write(req, res); -+ } - } - #endif - - static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -- struct fuse_file_info *fi) -+ struct fuse_file_info *fi) - { -- off_t res; -- -- (void)ino; -- res = lseek(fi->fh, off, whence); -- if (res != -1) -- fuse_reply_lseek(req, res); -- else -- fuse_reply_err(req, errno); -+ off_t res; -+ -+ (void)ino; -+ res = lseek(fi->fh, off, whence); -+ if (res != -1) { -+ fuse_reply_lseek(req, res); -+ } else { -+ fuse_reply_err(req, errno); -+ } - } - - static struct fuse_lowlevel_ops lo_oper = { -- .init = lo_init, -- .lookup = lo_lookup, -- .mkdir = lo_mkdir, -- .mknod = lo_mknod, -- .symlink = lo_symlink, -- .link = lo_link, -- .unlink = lo_unlink, -- .rmdir = lo_rmdir, -- .rename = lo_rename, -- .forget = lo_forget, -- .forget_multi = lo_forget_multi, -- .getattr = lo_getattr, -- .setattr = lo_setattr, -- .readlink = lo_readlink, -- .opendir = lo_opendir, -- .readdir = lo_readdir, -- .readdirplus = lo_readdirplus, -- .releasedir = lo_releasedir, -- .fsyncdir = lo_fsyncdir, -- .create = lo_create, -- .open = lo_open, -- .release = lo_release, -- .flush = lo_flush, -- .fsync = lo_fsync, -- .read = lo_read, -- .write_buf = lo_write_buf, -- .statfs = lo_statfs, -- .fallocate = lo_fallocate, -- .flock = lo_flock, -- .getxattr = lo_getxattr, -- .listxattr = lo_listxattr, -- .setxattr = lo_setxattr, -- .removexattr = lo_removexattr, -+ .init = lo_init, -+ .lookup = lo_lookup, -+ .mkdir = lo_mkdir, -+ .mknod = lo_mknod, -+ .symlink = lo_symlink, -+ .link = lo_link, -+ .unlink = lo_unlink, -+ .rmdir = lo_rmdir, -+ .rename = lo_rename, -+ .forget = lo_forget, -+ .forget_multi = lo_forget_multi, -+ .getattr = lo_getattr, -+ .setattr = lo_setattr, -+ .readlink = lo_readlink, -+ .opendir = lo_opendir, -+ .readdir = lo_readdir, -+ .readdirplus = lo_readdirplus, -+ .releasedir = lo_releasedir, -+ .fsyncdir = lo_fsyncdir, -+ .create = lo_create, -+ .open = lo_open, -+ .release = lo_release, -+ .flush = lo_flush, -+ .fsync = lo_fsync, -+ .read = lo_read, -+ .write_buf = lo_write_buf, -+ .statfs = lo_statfs, -+ .fallocate = lo_fallocate, -+ .flock = lo_flock, -+ .getxattr = lo_getxattr, -+ .listxattr = lo_listxattr, -+ .setxattr = lo_setxattr, -+ .removexattr = lo_removexattr, - #ifdef HAVE_COPY_FILE_RANGE -- .copy_file_range = lo_copy_file_range, -+ .copy_file_range = lo_copy_file_range, - #endif -- .lseek = lo_lseek, -+ .lseek = lo_lseek, - }; - - int main(int argc, char *argv[]) - { -- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -- struct fuse_session *se; -- struct fuse_cmdline_opts opts; -- struct lo_data lo = { .debug = 0, -- .writeback = 0 }; -- int ret = -1; -- -- /* Don't mask creation mode, kernel already did that */ -- umask(0); -- -- pthread_mutex_init(&lo.mutex, NULL); -- lo.root.next = lo.root.prev = &lo.root; -- lo.root.fd = -1; -- lo.cache = CACHE_NORMAL; -- -- if (fuse_parse_cmdline(&args, &opts) != 0) -- return 1; -- if (opts.show_help) { -- printf("usage: %s [options] \n\n", argv[0]); -- fuse_cmdline_help(); -- fuse_lowlevel_help(); -- ret = 0; -- goto err_out1; -- } else if (opts.show_version) { -- fuse_lowlevel_version(); -- ret = 0; -- goto err_out1; -- } -- -- if(opts.mountpoint == NULL) { -- printf("usage: %s [options] \n", argv[0]); -- printf(" %s --help\n", argv[0]); -- ret = 1; -- goto err_out1; -- } -- -- if (fuse_opt_parse(&args, &lo, lo_opts, NULL)== -1) -- return 1; -- -- lo.debug = opts.debug; -- lo.root.refcount = 2; -- if (lo.source) { -- struct stat stat; -- int res; -- -- res = lstat(lo.source, &stat); -- if (res == -1) { -- fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", -- lo.source); -- exit(1); -- } -- if (!S_ISDIR(stat.st_mode)) { -- fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); -- exit(1); -- } -- -- } else { -- lo.source = "/"; -- } -- lo.root.is_symlink = false; -- if (!lo.timeout_set) { -- switch (lo.cache) { -- case CACHE_NEVER: -- lo.timeout = 0.0; -- break; -- -- case CACHE_NORMAL: -- lo.timeout = 1.0; -- break; -- -- case CACHE_ALWAYS: -- lo.timeout = 86400.0; -- break; -- } -- } else if (lo.timeout < 0) { -- fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", -- lo.timeout); -- exit(1); -- } -- -- lo.root.fd = open(lo.source, O_PATH); -- if (lo.root.fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", -- lo.source); -- exit(1); -- } -- -- se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); -- if (se == NULL) -- goto err_out1; -- -- if (fuse_set_signal_handlers(se) != 0) -- goto err_out2; -- -- if (fuse_session_mount(se, opts.mountpoint) != 0) -- goto err_out3; -- -- fuse_daemonize(opts.foreground); -- -- /* Block until ctrl+c or fusermount -u */ -- if (opts.singlethread) -- ret = fuse_session_loop(se); -- else -- ret = fuse_session_loop_mt(se, opts.clone_fd); -- -- fuse_session_unmount(se); -+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -+ struct fuse_session *se; -+ struct fuse_cmdline_opts opts; -+ struct lo_data lo = { .debug = 0, .writeback = 0 }; -+ int ret = -1; -+ -+ /* Don't mask creation mode, kernel already did that */ -+ umask(0); -+ -+ pthread_mutex_init(&lo.mutex, NULL); -+ lo.root.next = lo.root.prev = &lo.root; -+ lo.root.fd = -1; -+ lo.cache = CACHE_NORMAL; -+ -+ if (fuse_parse_cmdline(&args, &opts) != 0) { -+ return 1; -+ } -+ if (opts.show_help) { -+ printf("usage: %s [options] \n\n", argv[0]); -+ fuse_cmdline_help(); -+ fuse_lowlevel_help(); -+ ret = 0; -+ goto err_out1; -+ } else if (opts.show_version) { -+ fuse_lowlevel_version(); -+ ret = 0; -+ goto err_out1; -+ } -+ -+ if (opts.mountpoint == NULL) { -+ printf("usage: %s [options] \n", argv[0]); -+ printf(" %s --help\n", argv[0]); -+ ret = 1; -+ goto err_out1; -+ } -+ -+ if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { -+ return 1; -+ } -+ -+ lo.debug = opts.debug; -+ lo.root.refcount = 2; -+ if (lo.source) { -+ struct stat stat; -+ int res; -+ -+ res = lstat(lo.source, &stat); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", -+ lo.source); -+ exit(1); -+ } -+ if (!S_ISDIR(stat.st_mode)) { -+ fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); -+ exit(1); -+ } -+ -+ } else { -+ lo.source = "/"; -+ } -+ lo.root.is_symlink = false; -+ if (!lo.timeout_set) { -+ switch (lo.cache) { -+ case CACHE_NEVER: -+ lo.timeout = 0.0; -+ break; -+ -+ case CACHE_NORMAL: -+ lo.timeout = 1.0; -+ break; -+ -+ case CACHE_ALWAYS: -+ lo.timeout = 86400.0; -+ break; -+ } -+ } else if (lo.timeout < 0) { -+ fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); -+ exit(1); -+ } -+ -+ lo.root.fd = open(lo.source, O_PATH); -+ if (lo.root.fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); -+ exit(1); -+ } -+ -+ se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); -+ if (se == NULL) { -+ goto err_out1; -+ } -+ -+ if (fuse_set_signal_handlers(se) != 0) { -+ goto err_out2; -+ } -+ -+ if (fuse_session_mount(se, opts.mountpoint) != 0) { -+ goto err_out3; -+ } -+ -+ fuse_daemonize(opts.foreground); -+ -+ /* Block until ctrl+c or fusermount -u */ -+ if (opts.singlethread) { -+ ret = fuse_session_loop(se); -+ } else { -+ ret = fuse_session_loop_mt(se, opts.clone_fd); -+ } -+ -+ fuse_session_unmount(se); - err_out3: -- fuse_remove_signal_handlers(se); -+ fuse_remove_signal_handlers(se); - err_out2: -- fuse_session_destroy(se); -+ fuse_session_destroy(se); - err_out1: -- free(opts.mountpoint); -- fuse_opt_free_args(&args); -+ free(opts.mountpoint); -+ fuse_opt_free_args(&args); - -- if (lo.root.fd >= 0) -- close(lo.root.fd); -+ if (lo.root.fd >= 0) { -+ close(lo.root.fd); -+ } - -- return ret ? 1 : 0; -+ return ret ? 1 : 0; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Handle-hard-reboot.patch b/kvm-virtiofsd-Handle-hard-reboot.patch deleted file mode 100644 index 8888030..0000000 --- a/kvm-virtiofsd-Handle-hard-reboot.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 616407b06517361ce444dcc0960aeaf55b52da33 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:41 +0100 -Subject: [PATCH 070/116] virtiofsd: Handle hard reboot -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-67-dgilbert@redhat.com> -Patchwork-id: 93521 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 066/112] virtiofsd: Handle hard reboot -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Handle a - mount - hard reboot (without unmount) - mount - -we get another 'init' which FUSE doesn't normally expect. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e8556f49098b5d95634e592d79a97f761b76c96e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 16 +++++++++++++++- - 1 file changed, 15 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 7d742b5..65f91da 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2433,7 +2433,21 @@ void fuse_session_process_buf_int(struct fuse_session *se, - goto reply_err; - } - } else if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT) { -- goto reply_err; -+ if (fuse_lowlevel_is_virtio(se)) { -+ /* -+ * TODO: This is after a hard reboot typically, we need to do -+ * a destroy, but we can't reply to this request yet so -+ * we can't use do_destroy -+ */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: reinit\n", __func__); -+ se->got_destroy = 1; -+ se->got_init = 0; -+ if (se->op.destroy) { -+ se->op.destroy(se->userdata); -+ } -+ } else { -+ goto reply_err; -+ } - } - - err = EACCES; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Handle-reinit.patch b/kvm-virtiofsd-Handle-reinit.patch deleted file mode 100644 index 3f9577b..0000000 --- a/kvm-virtiofsd-Handle-reinit.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 485adfa1aa1b3e2d1449edf5c42d6ec396cbfb5d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:40 +0100 -Subject: [PATCH 069/116] virtiofsd: Handle reinit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-66-dgilbert@redhat.com> -Patchwork-id: 93520 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 065/112] virtiofsd: Handle reinit -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Allow init->destroy->init for mount->umount->mount - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c806d6435fe95fd54b379920aca2f4e3ea1f3258) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index a7a1968..7d742b5 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2028,6 +2028,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - } - - se->got_init = 1; -+ se->got_destroy = 0; - if (se->op.init) { - se->op.init(se->userdata, &se->conn); - } -@@ -2130,6 +2131,7 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, - (void)iter; - - se->got_destroy = 1; -+ se->got_init = 0; - if (se->op.destroy) { - se->op.destroy(se->userdata); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Keep-track-of-replies.patch b/kvm-virtiofsd-Keep-track-of-replies.patch deleted file mode 100644 index 18be3e0..0000000 --- a/kvm-virtiofsd-Keep-track-of-replies.patch +++ /dev/null @@ -1,116 +0,0 @@ -From c818a1cb603cad07aa5c49ce808aa09435667c7c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:04 +0100 -Subject: [PATCH 033/116] virtiofsd: Keep track of replies -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-30-dgilbert@redhat.com> -Patchwork-id: 93481 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 029/112] virtiofsd: Keep track of replies -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Keep track of whether we sent a reply to a request; this is a bit -paranoid but it means: - a) We should always recycle an element even if there was an error - in the request - b) Never try and send two replies on one queue element - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2f65e69a7f22da8d20c747f34f339ebb40a0634f) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 23 ++++++++++++++++++++--- - 1 file changed, 20 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 05d0e29..f1adeb6 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -44,6 +44,7 @@ struct fv_QueueInfo { - - /* The element for the command currently being processed */ - VuVirtqElement *qe; -+ bool reply_sent; - }; - - /* -@@ -178,6 +179,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - { - VuVirtqElement *elem; - VuVirtq *q; -+ int ret = 0; - - assert(count >= 1); - assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -@@ -191,6 +193,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - assert(out->unique); - /* For virtio we always have ch */ - assert(ch); -+ assert(!ch->qi->reply_sent); - elem = ch->qi->qe; - q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; - -@@ -208,19 +211,23 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - if (in_len < sizeof(struct fuse_out_header)) { - fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", - __func__, elem->index); -- return -E2BIG; -+ ret = -E2BIG; -+ goto err; - } - if (in_len < tosend_len) { - fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", - __func__, elem->index, tosend_len); -- return -E2BIG; -+ ret = -E2BIG; -+ goto err; - } - - copy_iov(iov, count, in_sg, in_num, tosend_len); - vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); - vu_queue_notify(&se->virtio_dev->dev, q); -+ ch->qi->reply_sent = true; - -- return 0; -+err: -+ return ret; - } - - /* Thread function for individual queues, created when a queue is 'started' */ -@@ -296,6 +303,9 @@ static void *fv_queue_thread(void *opaque) - break; - } - -+ qi->qe = elem; -+ qi->reply_sent = false; -+ - if (!fbuf.mem) { - fbuf.mem = malloc(se->bufsize); - assert(fbuf.mem); -@@ -331,6 +341,13 @@ static void *fv_queue_thread(void *opaque) - /* TODO: Add checks for fuse_session_exited */ - fuse_session_process_buf_int(se, &fbuf, &ch); - -+ if (!qi->reply_sent) { -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", -+ __func__, elem->index); -+ /* I think we've still got to recycle the element */ -+ vu_queue_push(dev, q, elem, 0); -+ vu_queue_notify(dev, q); -+ } - qi->qe = NULL; - free(elem); - elem = NULL; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch b/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch deleted file mode 100644 index 5e054f3..0000000 --- a/kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch +++ /dev/null @@ -1,143 +0,0 @@ -From b37344c38b866c7e7fb773b4a3172a39306bac7e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:42 +0100 -Subject: [PATCH 071/116] virtiofsd: Kill threads when queues are stopped -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-68-dgilbert@redhat.com> -Patchwork-id: 93522 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 067/112] virtiofsd: Kill threads when queues are stopped -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Kill the threads we've started when the queues get stopped. - -Signed-off-by: Dr. David Alan Gilbert -With improvements by: -Signed-off-by: Eryu Guan -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 10477ac47fc57d00a84802ff97c15450cd8021c1) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 51 +++++++++++++++++++++++++++++++++++++------ - 1 file changed, 44 insertions(+), 7 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 872968f..7a8774a 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -41,6 +41,7 @@ struct fv_QueueInfo { - /* Our queue index, corresponds to array position */ - int qidx; - int kick_fd; -+ int kill_fd; /* For killing the thread */ - - /* The element for the command currently being processed */ - VuVirtqElement *qe; -@@ -412,14 +413,17 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); - while (1) { -- struct pollfd pf[1]; -+ struct pollfd pf[2]; - pf[0].fd = qi->kick_fd; - pf[0].events = POLLIN; - pf[0].revents = 0; -+ pf[1].fd = qi->kill_fd; -+ pf[1].events = POLLIN; -+ pf[1].revents = 0; - - fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, - qi->qidx); -- int poll_res = ppoll(pf, 1, NULL, NULL); -+ int poll_res = ppoll(pf, 2, NULL, NULL); - - if (poll_res == -1) { - if (errno == EINTR) { -@@ -430,12 +434,23 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); - break; - } -- assert(poll_res == 1); -+ assert(poll_res >= 1); - if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { - fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", - __func__, pf[0].revents, qi->qidx); - break; - } -+ if (pf[1].revents & (POLLERR | POLLHUP | POLLNVAL)) { -+ fuse_log(FUSE_LOG_ERR, -+ "%s: Unexpected poll revents %x Queue %d killfd\n", -+ __func__, pf[1].revents, qi->qidx); -+ break; -+ } -+ if (pf[1].revents) { -+ fuse_log(FUSE_LOG_INFO, "%s: kill event on queue %d - quitting\n", -+ __func__, qi->qidx); -+ break; -+ } - assert(pf[0].revents & POLLIN); - fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, - qi->qidx); -@@ -589,6 +604,28 @@ out: - return NULL; - } - -+static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) -+{ -+ int ret; -+ struct fv_QueueInfo *ourqi; -+ -+ assert(qidx < vud->nqueues); -+ ourqi = vud->qi[qidx]; -+ -+ /* Kill the thread */ -+ if (eventfd_write(ourqi->kill_fd, 1)) { -+ fuse_log(FUSE_LOG_ERR, "Eventfd_write for queue %d: %s\n", -+ qidx, strerror(errno)); -+ } -+ ret = pthread_join(ourqi->thread, NULL); -+ if (ret) { -+ fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", -+ __func__, qidx, ret); -+ } -+ close(ourqi->kill_fd); -+ ourqi->kick_fd = -1; -+} -+ - /* Callback from libvhost-user on start or stop of a queue */ - static void fv_queue_set_started(VuDev *dev, int qidx, bool started) - { -@@ -633,16 +670,16 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) - } - ourqi = vud->qi[qidx]; - ourqi->kick_fd = dev->vq[qidx].kick_fd; -+ -+ ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); -+ assert(ourqi->kill_fd != -1); - if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { - fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", - __func__, qidx); - assert(0); - } - } else { -- /* TODO: Kill the thread */ -- assert(qidx < vud->nqueues); -- ourqi = vud->qi[qidx]; -- ourqi->kick_fd = -1; -+ fv_queue_cleanup_thread(vud, qidx); - } - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch b/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch deleted file mode 100644 index 98211cb..0000000 --- a/kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch +++ /dev/null @@ -1,96 +0,0 @@ -From f09f13f9a001a50ee3465c165f4bbaf870fcadb9 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:53 +0100 -Subject: [PATCH 022/116] virtiofsd: Make fsync work even if only inode is - passed in -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-19-dgilbert@redhat.com> -Patchwork-id: 93472 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 018/112] virtiofsd: Make fsync work even if only inode is passed in -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -If caller has not sent file handle in request, then using inode, retrieve -the fd opened using O_PATH and use that to open file again and issue -fsync. This will be needed when dax_flush() calls fsync. At that time -we only have inode information (and not file). - -Signed-off-by: Vivek Goyal -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1b209805f8159c3f4d89ddb9390a5f64887cebff) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 6 +++++- - tools/virtiofsd/passthrough_ll.c | 28 ++++++++++++++++++++++++++-- - 2 files changed, 31 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 514d79c..8552cfb 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1075,7 +1075,11 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - fi.fh = arg->fh; - - if (req->se->op.fsync) { -- req->se->op.fsync(req, nodeid, datasync, &fi); -+ if (fi.fh == (uint64_t)-1) { -+ req->se->op.fsync(req, nodeid, datasync, NULL); -+ } else { -+ req->se->op.fsync(req, nodeid, datasync, &fi); -+ } - } else { - fuse_reply_err(req, ENOSYS); - } -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 6c4da18..26ac870 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -903,10 +903,34 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - { - int res; - (void)ino; -+ int fd; -+ char *buf; -+ -+ fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, -+ (void *)fi); -+ -+ if (!fi) { -+ res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ if (res == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ -+ fd = open(buf, O_RDWR); -+ free(buf); -+ if (fd == -1) { -+ return (void)fuse_reply_err(req, errno); -+ } -+ } else { -+ fd = fi->fh; -+ } -+ - if (datasync) { -- res = fdatasync(fi->fh); -+ res = fdatasync(fd); - } else { -- res = fsync(fi->fh); -+ res = fsync(fd); -+ } -+ if (!fi) { -+ close(fd); - } - fuse_reply_err(req, res == -1 ? errno : 0); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch b/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch deleted file mode 100644 index 2c9874d..0000000 --- a/kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch +++ /dev/null @@ -1,257 +0,0 @@ -From a96042f05eaf494fbe26a9cbd940f5f815f782f9 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:56 +0100 -Subject: [PATCH 025/116] virtiofsd: Open vhost connection instead of mounting -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-22-dgilbert@redhat.com> -Patchwork-id: 93476 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 021/112] virtiofsd: Open vhost connection instead of mounting -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -When run with vhost-user options we conect to the QEMU instead -via a socket. Start this off by creating the socket. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d14bf584dd965821e80d14c16d9292a464b1ab85) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 7 ++-- - tools/virtiofsd/fuse_lowlevel.c | 55 ++++------------------------ - tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_virtio.h | 23 ++++++++++++ - 4 files changed, 114 insertions(+), 50 deletions(-) - create mode 100644 tools/virtiofsd/fuse_virtio.c - create mode 100644 tools/virtiofsd/fuse_virtio.h - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 26b1a7d..82d6ac7 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -6,9 +6,10 @@ - * See the file COPYING.LIB - */ - --#define FUSE_USE_VERSION 31 -- -+#ifndef FUSE_I_H -+#define FUSE_I_H - -+#define FUSE_USE_VERSION 31 - #include "fuse.h" - #include "fuse_lowlevel.h" - -@@ -101,3 +102,5 @@ void fuse_session_process_buf_int(struct fuse_session *se, - - /* room needed in buffer to accommodate header */ - #define FUSE_BUFFER_HEADER_SIZE 0x1000 -+ -+#endif -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 17e8718..5df124e 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -14,6 +14,7 @@ - #include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" -+#include "fuse_virtio.h" - - #include - #include -@@ -2202,6 +2203,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out4; - } - -+ if (!se->vu_socket_path) { -+ fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); -+ goto out4; -+ } -+ - se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + FUSE_BUFFER_HEADER_SIZE; - - list_init_req(&se->list); -@@ -2224,54 +2230,7 @@ out1: - - int fuse_session_mount(struct fuse_session *se) - { -- int fd; -- -- /* -- * Make sure file descriptors 0, 1 and 2 are open, otherwise chaos -- * would ensue. -- */ -- do { -- fd = open("/dev/null", O_RDWR); -- if (fd > 2) { -- close(fd); -- } -- } while (fd >= 0 && fd <= 2); -- -- /* -- * To allow FUSE daemons to run without privileges, the caller may open -- * /dev/fuse before launching the file system and pass on the file -- * descriptor by specifying /dev/fd/N as the mount point. Note that the -- * parent process takes care of performing the mount in this case. -- */ -- fd = fuse_mnt_parse_fuse_fd(mountpoint); -- if (fd != -1) { -- if (fcntl(fd, F_GETFD) == -1) { -- fuse_log(FUSE_LOG_ERR, "fuse: Invalid file descriptor /dev/fd/%u\n", -- fd); -- return -1; -- } -- se->fd = fd; -- return 0; -- } -- -- /* Open channel */ -- fd = fuse_kern_mount(mountpoint, se->mo); -- if (fd == -1) { -- return -1; -- } -- se->fd = fd; -- -- /* Save mountpoint */ -- se->mountpoint = strdup(mountpoint); -- if (se->mountpoint == NULL) { -- goto error_out; -- } -- -- return 0; -- --error_out: -- fuse_kern_unmount(mountpoint, fd); -- return -1; -+ return virtio_session_mount(se); - } - - int fuse_session_fd(struct fuse_session *se) -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -new file mode 100644 -index 0000000..cbef6ff ---- /dev/null -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -0,0 +1,79 @@ -+/* -+ * virtio-fs glue for FUSE -+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates -+ * -+ * Authors: -+ * Dave Gilbert -+ * -+ * Implements the glue between libfuse and libvhost-user -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ -+ -+#include "fuse_i.h" -+#include "standard-headers/linux/fuse.h" -+#include "fuse_misc.h" -+#include "fuse_opt.h" -+#include "fuse_virtio.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* From spec */ -+struct virtio_fs_config { -+ char tag[36]; -+ uint32_t num_queues; -+}; -+ -+int virtio_session_mount(struct fuse_session *se) -+{ -+ struct sockaddr_un un; -+ mode_t old_umask; -+ -+ if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { -+ fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); -+ return -1; -+ } -+ -+ se->fd = -1; -+ -+ /* -+ * Create the Unix socket to communicate with qemu -+ * based on QEMU's vhost-user-bridge -+ */ -+ unlink(se->vu_socket_path); -+ strcpy(un.sun_path, se->vu_socket_path); -+ size_t addr_len = sizeof(un); -+ -+ int listen_sock = socket(AF_UNIX, SOCK_STREAM, 0); -+ if (listen_sock == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket creation: %m\n"); -+ return -1; -+ } -+ un.sun_family = AF_UNIX; -+ -+ /* -+ * Unfortunately bind doesn't let you set the mask on the socket, -+ * so set umask to 077 and restore it later. -+ */ -+ old_umask = umask(0077); -+ if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); -+ umask(old_umask); -+ return -1; -+ } -+ umask(old_umask); -+ -+ if (listen(listen_sock, 1) == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); -+ return -1; -+ } -+ -+ return -1; -+} -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -new file mode 100644 -index 0000000..8f2edb6 ---- /dev/null -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -0,0 +1,23 @@ -+/* -+ * virtio-fs glue for FUSE -+ * Copyright (C) 2018 Red Hat, Inc. and/or its affiliates -+ * -+ * Authors: -+ * Dave Gilbert -+ * -+ * Implements the glue between libfuse and libvhost-user -+ * -+ * This program can be distributed under the terms of the GNU LGPLv2. -+ * See the file COPYING.LIB -+ */ -+ -+#ifndef FUSE_VIRTIO_H -+#define FUSE_VIRTIO_H -+ -+#include "fuse_i.h" -+ -+struct fuse_session; -+ -+int virtio_session_mount(struct fuse_session *se); -+ -+#endif --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch b/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch deleted file mode 100644 index 8d8de78..0000000 --- a/kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch +++ /dev/null @@ -1,76 +0,0 @@ -From ade3dcad8a907d281549b341a8908851e36ba458 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:31 +0100 -Subject: [PATCH 060/116] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-57-dgilbert@redhat.com> -Patchwork-id: 93505 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 056/112] virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -Caller can set FUSE_WRITE_KILL_PRIV in write_flags. Parse it and pass it -to the filesystem. - -Signed-off-by: Vivek Goyal -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f779bc5265e7e7abb13a03d4bfbc74151afc15c2) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_common.h | 6 +++++- - tools/virtiofsd/fuse_lowlevel.c | 4 +++- - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index f8f6433..686c42c 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -93,8 +93,12 @@ struct fuse_file_info { - */ - unsigned int cache_readdir:1; - -+ /* Indicates that suid/sgid bits should be removed upon write */ -+ unsigned int kill_priv:1; -+ -+ - /** Padding. Reserved for future use*/ -- unsigned int padding:25; -+ unsigned int padding:24; - unsigned int padding2:32; - - /* -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 02e1d83..2d6dc5a 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1142,6 +1142,7 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; -+ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); - - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; -@@ -1177,7 +1178,8 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; - fi.fh = arg->fh; -- fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; -+ fi.writepage = !!(arg->write_flags & FUSE_WRITE_CACHE); -+ fi.kill_priv = !!(arg->write_flags & FUSE_WRITE_KILL_PRIV); - - if (ibufv->count == 1) { - assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch b/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch deleted file mode 100644 index 7d095c9..0000000 --- a/kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch +++ /dev/null @@ -1,140 +0,0 @@ -From d5986c804f05070a07dfe702f7c66357daaa1ab6 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:20 +0100 -Subject: [PATCH 049/116] virtiofsd: Pass write iov's all the way through -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-46-dgilbert@redhat.com> -Patchwork-id: 93497 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 045/112] virtiofsd: Pass write iov's all the way through -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Pass the write iov pointing to guest RAM all the way through rather -than copying the data. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Xiao Yang -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e17f7a580e2c599330ad3a6946be615ca2fe97d9) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 79 +++++++++++++++++++++++++++++++++++++++---- - 1 file changed, 73 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index fd588a4..872968f 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -454,6 +454,10 @@ static void *fv_queue_thread(void *opaque) - __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); - - while (1) { -+ bool allocated_bufv = false; -+ struct fuse_bufvec bufv; -+ struct fuse_bufvec *pbufv; -+ - /* - * An element contains one request and the space to send our - * response They're spread over multiple descriptors in a -@@ -495,14 +499,76 @@ static void *fv_queue_thread(void *opaque) - __func__, elem->index); - assert(0); /* TODO */ - } -- copy_from_iov(&fbuf, out_num, out_sg); -- fbuf.size = out_len; -+ /* Copy just the first element and look at it */ -+ copy_from_iov(&fbuf, 1, out_sg); -+ -+ if (out_num > 2 && -+ out_sg[0].iov_len == sizeof(struct fuse_in_header) && -+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { -+ /* -+ * For a write we don't actually need to copy the -+ * data, we can just do it straight out of guest memory -+ * but we must still copy the headers in case the guest -+ * was nasty and changed them while we were using them. -+ */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); -+ -+ /* copy the fuse_write_in header after the fuse_in_header */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; -+ -+ /* Allocate the bufv, with space for the rest of the iov */ -+ allocated_bufv = true; -+ pbufv = malloc(sizeof(struct fuse_bufvec) + -+ sizeof(struct fuse_buf) * (out_num - 2)); -+ if (!pbufv) { -+ vu_queue_unpop(dev, q, elem, 0); -+ free(elem); -+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", -+ __func__); -+ goto out; -+ } -+ -+ pbufv->count = 1; -+ pbufv->buf[0] = fbuf; -+ -+ size_t iovindex, pbufvindex; -+ iovindex = 2; /* 2 headers, separate iovs */ -+ pbufvindex = 1; /* 2 headers, 1 fusebuf */ -+ -+ for (; iovindex < out_num; iovindex++, pbufvindex++) { -+ pbufv->count++; -+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -+ pbufv->buf[pbufvindex].flags = 0; -+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -+ } -+ } else { -+ /* Normal (non fast write) path */ -+ -+ /* Copy the rest of the buffer */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_len; - -- /* TODO! Endianness of header */ -+ /* TODO! Endianness of header */ - -- /* TODO: Add checks for fuse_session_exited */ -- struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; -- fuse_session_process_buf_int(se, &bufv, &ch); -+ /* TODO: Add checks for fuse_session_exited */ -+ bufv.buf[0] = fbuf; -+ bufv.count = 1; -+ pbufv = &bufv; -+ } -+ pbufv->idx = 0; -+ pbufv->off = 0; -+ fuse_session_process_buf_int(se, pbufv, &ch); -+ -+ if (allocated_bufv) { -+ free(pbufv); -+ } - - if (!qi->reply_sent) { - fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", -@@ -516,6 +582,7 @@ static void *fv_queue_thread(void *opaque) - elem = NULL; - } - } -+out: - pthread_mutex_destroy(&ch.lock); - free(fbuf.mem); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch b/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch deleted file mode 100644 index 834ced1..0000000 --- a/kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 9e4320eec5204da851ac95fb7a7e6520c9ccee7d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:19 +0100 -Subject: [PATCH 048/116] virtiofsd: Plumb fuse_bufvec through to do_write_buf -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-45-dgilbert@redhat.com> -Patchwork-id: 93499 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 044/112] virtiofsd: Plumb fuse_bufvec through to do_write_buf -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Let fuse_session_process_buf_int take a fuse_bufvec * instead of a -fuse_buf; and then through to do_write_buf - where in the best -case it can pass that straight through to op.write_buf without copying -(other than skipping a header). - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 469f9d2fc405b0508e6cf1b4b5bbcadfc82064e5) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 2 +- - tools/virtiofsd/fuse_lowlevel.c | 61 +++++++++++++++++++++++++++-------------- - tools/virtiofsd/fuse_virtio.c | 3 +- - 3 files changed, 44 insertions(+), 22 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 45995f3..a20854f 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -100,7 +100,7 @@ int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - void fuse_free_req(fuse_req_t req); - - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, -+ struct fuse_bufvec *bufv, - struct fuse_chan *ch); - - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 95f4db8..7e10995 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1004,11 +1004,12 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -- const struct fuse_buf *ibuf) -+ struct fuse_bufvec *ibufv) - { - struct fuse_session *se = req->se; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -+ struct fuse_bufvec *pbufv = ibufv; -+ struct fuse_bufvec tmpbufv = { -+ .buf[0] = ibufv->buf[0], - .count = 1, - }; - struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -@@ -1018,22 +1019,31 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - fi.fh = arg->fh; - fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; - -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- bufv.buf[0].mem = PARAM(arg); -- } -- -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); -- fuse_reply_err(req, EIO); -- return; -+ if (ibufv->count == 1) { -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ tmpbufv.buf[0].mem = PARAM(arg); -+ } -+ tmpbufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -+ if (tmpbufv.buf[0].size < arg->size) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: do_write_buf: buffer size too small\n"); -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ tmpbufv.buf[0].size = arg->size; -+ pbufv = &tmpbufv; -+ } else { -+ /* -+ * Input bufv contains the headers in the first element -+ * and the data in the rest, we need to skip that first element -+ */ -+ ibufv->buf[0].size = 0; - } -- bufv.buf[0].size = arg->size; - -- se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -+ se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); - } - - static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -@@ -2024,13 +2034,24 @@ static const char *opname(enum fuse_opcode opcode) - void fuse_session_process_buf(struct fuse_session *se, - const struct fuse_buf *buf) - { -- fuse_session_process_buf_int(se, buf, NULL); -+ struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; -+ fuse_session_process_buf_int(se, &bufv, NULL); - } - -+/* -+ * Restriction: -+ * bufv is normally a single entry buffer, except for a write -+ * where (if it's in memory) then the bufv may be multiple entries, -+ * where the first entry contains all headers and subsequent entries -+ * contain data -+ * bufv shall not use any offsets etc to make the data anything -+ * other than contiguous starting from 0. -+ */ - void fuse_session_process_buf_int(struct fuse_session *se, -- const struct fuse_buf *buf, -+ struct fuse_bufvec *bufv, - struct fuse_chan *ch) - { -+ const struct fuse_buf *buf = bufv->buf; - struct fuse_in_header *in; - const void *inarg; - struct fuse_req *req; -@@ -2108,7 +2129,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, - - inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { -- do_write_buf(req, in->nodeid, inarg, buf); -+ do_write_buf(req, in->nodeid, inarg, bufv); - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - } -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 635f877..fd588a4 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -501,7 +501,8 @@ static void *fv_queue_thread(void *opaque) - /* TODO! Endianness of header */ - - /* TODO: Add checks for fuse_session_exited */ -- fuse_session_process_buf_int(se, &fbuf, &ch); -+ struct fuse_bufvec bufv = { .buf[0] = fbuf, .count = 1 }; -+ fuse_session_process_buf_int(se, &bufv, &ch); - - if (!qi->reply_sent) { - fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Poll-kick_fd-for-queue.patch b/kvm-virtiofsd-Poll-kick_fd-for-queue.patch deleted file mode 100644 index d7c6c0a..0000000 --- a/kvm-virtiofsd-Poll-kick_fd-for-queue.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 083b944fac29bc3115a19eb38e176f6b23f04938 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:01 +0100 -Subject: [PATCH 030/116] virtiofsd: Poll kick_fd for queue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-27-dgilbert@redhat.com> -Patchwork-id: 93483 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 026/112] virtiofsd: Poll kick_fd for queue -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -In the queue thread poll the kick_fd we're passed. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5dcd1f56141378226d33dc3df68ec57913e0aa04) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 40 +++++++++++++++++++++++++++++++++++++++- - 1 file changed, 39 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 2a94bb3..05e7258 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -100,13 +101,50 @@ static void fv_panic(VuDev *dev, const char *err) - exit(EXIT_FAILURE); - } - -+/* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { - struct fv_QueueInfo *qi = opaque; - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); - while (1) { -- /* TODO */ -+ struct pollfd pf[1]; -+ pf[0].fd = qi->kick_fd; -+ pf[0].events = POLLIN; -+ pf[0].revents = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Waiting for Queue %d event\n", __func__, -+ qi->qidx); -+ int poll_res = ppoll(pf, 1, NULL, NULL); -+ -+ if (poll_res == -1) { -+ if (errno == EINTR) { -+ fuse_log(FUSE_LOG_INFO, "%s: ppoll interrupted, going around\n", -+ __func__); -+ continue; -+ } -+ fuse_log(FUSE_LOG_ERR, "fv_queue_thread ppoll: %m\n"); -+ break; -+ } -+ assert(poll_res == 1); -+ if (pf[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { -+ fuse_log(FUSE_LOG_ERR, "%s: Unexpected poll revents %x Queue %d\n", -+ __func__, pf[0].revents, qi->qidx); -+ break; -+ } -+ assert(pf[0].revents & POLLIN); -+ fuse_log(FUSE_LOG_DEBUG, "%s: Got queue event on Queue %d\n", __func__, -+ qi->qidx); -+ -+ eventfd_t evalue; -+ if (eventfd_read(qi->kick_fd, &evalue)) { -+ fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); -+ break; -+ } -+ if (qi->virtio_dev->se->debug) { -+ fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, -+ qi->qidx, (size_t)evalue); -+ } - } - - return NULL; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch b/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch deleted file mode 100644 index d4e1ea1..0000000 --- a/kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch +++ /dev/null @@ -1,144 +0,0 @@ -From ab336e3aea97d76c1b2ac725d19b4518f47dd8f0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:59 +0100 -Subject: [PATCH 088/116] virtiofsd: Prevent multiply running with same - vhost_user_socket -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-85-dgilbert@redhat.com> -Patchwork-id: 93541 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 084/112] virtiofsd: Prevent multiply running with same vhost_user_socket -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -virtiofsd can run multiply even if the vhost_user_socket is same path. - - ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & - [1] 244965 - virtio_session_mount: Waiting for vhost-user socket connection... - ]# ./virtiofsd -o vhost_user_socket=/tmp/vhostqemu -o source=/tmp/share & - [2] 244966 - virtio_session_mount: Waiting for vhost-user socket connection... - ]# - -The user will get confused about the situation and maybe the cause of the -unexpected problem. So it's better to prevent the multiple running. - -Create a regular file under localstatedir directory to exclude the -vhost_user_socket. To create and lock the file, use qemu_write_pidfile() -because the API has some sanity checks and file lock. - -Signed-off-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert - Applied fixes from Stefan's review and moved osdep include -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 96814800d2b49d18737c36e021c387697ec40c62) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 1 + - tools/virtiofsd/fuse_virtio.c | 49 ++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 49 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 440508a..aac282f 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -18,6 +18,7 @@ - - #include - #include -+#include - #include - #include - #include -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index e7bd772..b7948de 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -13,11 +13,12 @@ - - #include "qemu/osdep.h" - #include "qemu/iov.h" --#include "fuse_virtio.h" -+#include "qapi/error.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" - #include "fuse_misc.h" - #include "fuse_opt.h" -+#include "fuse_virtio.h" - - #include - #include -@@ -743,6 +744,42 @@ int virtio_loop(struct fuse_session *se) - return 0; - } - -+static void strreplace(char *s, char old, char new) -+{ -+ for (; *s; ++s) { -+ if (*s == old) { -+ *s = new; -+ } -+ } -+} -+ -+static bool fv_socket_lock(struct fuse_session *se) -+{ -+ g_autofree gchar *sk_name = NULL; -+ g_autofree gchar *pidfile = NULL; -+ g_autofree gchar *dir = NULL; -+ Error *local_err = NULL; -+ -+ dir = qemu_get_local_state_pathname("run/virtiofsd"); -+ -+ if (g_mkdir_with_parents(dir, S_IRWXU) < 0) { -+ fuse_log(FUSE_LOG_ERR, "%s: Failed to create directory %s: %s", -+ __func__, dir, strerror(errno)); -+ return false; -+ } -+ -+ sk_name = g_strdup(se->vu_socket_path); -+ strreplace(sk_name, '/', '.'); -+ pidfile = g_strdup_printf("%s/%s.pid", dir, sk_name); -+ -+ if (!qemu_write_pidfile(pidfile, &local_err)) { -+ error_report_err(local_err); -+ return false; -+ } -+ -+ return true; -+} -+ - static int fv_create_listen_socket(struct fuse_session *se) - { - struct sockaddr_un un; -@@ -758,6 +795,16 @@ static int fv_create_listen_socket(struct fuse_session *se) - return -1; - } - -+ if (!strlen(se->vu_socket_path)) { -+ fuse_log(FUSE_LOG_ERR, "Socket path is empty\n"); -+ return -1; -+ } -+ -+ /* Check the vu_socket_path is already used */ -+ if (!fv_socket_lock(se)) { -+ return -1; -+ } -+ - /* - * Create the Unix socket to communicate with qemu - * based on QEMU's vhost-user-bridge --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch b/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch deleted file mode 100644 index f30f23a..0000000 --- a/kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch +++ /dev/null @@ -1,945 +0,0 @@ -From e7c1ad608117b21f80c762f5505a66b21c56e9d3 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:40 +0100 -Subject: [PATCH 009/116] virtiofsd: Pull in kernel's fuse.h -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-6-dgilbert@redhat.com> -Patchwork-id: 93460 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 005/112] virtiofsd: Pull in kernel's fuse.h -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Update scripts/update-linux-headers.sh to add fuse.h and -use it to pull in fuse.h from the kernel; from v5.5-rc1 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a62a9e192bc5f0aa0bc076b51db5a069add87c78) -Signed-off-by: Miroslav Rezanina ---- - include/standard-headers/linux/fuse.h | 891 ++++++++++++++++++++++++++++++++++ - scripts/update-linux-headers.sh | 1 + - 2 files changed, 892 insertions(+) - create mode 100644 include/standard-headers/linux/fuse.h - -diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h -new file mode 100644 -index 0000000..f4df0a4 ---- /dev/null -+++ b/include/standard-headers/linux/fuse.h -@@ -0,0 +1,891 @@ -+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ -+/* -+ This file defines the kernel interface of FUSE -+ Copyright (C) 2001-2008 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU GPL. -+ See the file COPYING. -+ -+ This -- and only this -- header file may also be distributed under -+ the terms of the BSD Licence as follows: -+ -+ Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ 1. Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ 2. Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ -+ THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND -+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE -+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ SUCH DAMAGE. -+*/ -+ -+/* -+ * This file defines the kernel interface of FUSE -+ * -+ * Protocol changelog: -+ * -+ * 7.1: -+ * - add the following messages: -+ * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK, -+ * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE, -+ * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR, -+ * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR, -+ * FUSE_RELEASEDIR -+ * - add padding to messages to accommodate 32-bit servers on 64-bit kernels -+ * -+ * 7.2: -+ * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags -+ * - add FUSE_FSYNCDIR message -+ * -+ * 7.3: -+ * - add FUSE_ACCESS message -+ * - add FUSE_CREATE message -+ * - add filehandle to fuse_setattr_in -+ * -+ * 7.4: -+ * - add frsize to fuse_kstatfs -+ * - clean up request size limit checking -+ * -+ * 7.5: -+ * - add flags and max_write to fuse_init_out -+ * -+ * 7.6: -+ * - add max_readahead to fuse_init_in and fuse_init_out -+ * -+ * 7.7: -+ * - add FUSE_INTERRUPT message -+ * - add POSIX file lock support -+ * -+ * 7.8: -+ * - add lock_owner and flags fields to fuse_release_in -+ * - add FUSE_BMAP message -+ * - add FUSE_DESTROY message -+ * -+ * 7.9: -+ * - new fuse_getattr_in input argument of GETATTR -+ * - add lk_flags in fuse_lk_in -+ * - add lock_owner field to fuse_setattr_in, fuse_read_in and fuse_write_in -+ * - add blksize field to fuse_attr -+ * - add file flags field to fuse_read_in and fuse_write_in -+ * - Add ATIME_NOW and MTIME_NOW flags to fuse_setattr_in -+ * -+ * 7.10 -+ * - add nonseekable open flag -+ * -+ * 7.11 -+ * - add IOCTL message -+ * - add unsolicited notification support -+ * - add POLL message and NOTIFY_POLL notification -+ * -+ * 7.12 -+ * - add umask flag to input argument of create, mknod and mkdir -+ * - add notification messages for invalidation of inodes and -+ * directory entries -+ * -+ * 7.13 -+ * - make max number of background requests and congestion threshold -+ * tunables -+ * -+ * 7.14 -+ * - add splice support to fuse device -+ * -+ * 7.15 -+ * - add store notify -+ * - add retrieve notify -+ * -+ * 7.16 -+ * - add BATCH_FORGET request -+ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct -+ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' -+ * - add FUSE_IOCTL_32BIT flag -+ * -+ * 7.17 -+ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK -+ * -+ * 7.18 -+ * - add FUSE_IOCTL_DIR flag -+ * - add FUSE_NOTIFY_DELETE -+ * -+ * 7.19 -+ * - add FUSE_FALLOCATE -+ * -+ * 7.20 -+ * - add FUSE_AUTO_INVAL_DATA -+ * -+ * 7.21 -+ * - add FUSE_READDIRPLUS -+ * - send the requested events in POLL request -+ * -+ * 7.22 -+ * - add FUSE_ASYNC_DIO -+ * -+ * 7.23 -+ * - add FUSE_WRITEBACK_CACHE -+ * - add time_gran to fuse_init_out -+ * - add reserved space to fuse_init_out -+ * - add FATTR_CTIME -+ * - add ctime and ctimensec to fuse_setattr_in -+ * - add FUSE_RENAME2 request -+ * - add FUSE_NO_OPEN_SUPPORT flag -+ * -+ * 7.24 -+ * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support -+ * -+ * 7.25 -+ * - add FUSE_PARALLEL_DIROPS -+ * -+ * 7.26 -+ * - add FUSE_HANDLE_KILLPRIV -+ * - add FUSE_POSIX_ACL -+ * -+ * 7.27 -+ * - add FUSE_ABORT_ERROR -+ * -+ * 7.28 -+ * - add FUSE_COPY_FILE_RANGE -+ * - add FOPEN_CACHE_DIR -+ * - add FUSE_MAX_PAGES, add max_pages to init_out -+ * - add FUSE_CACHE_SYMLINKS -+ * -+ * 7.29 -+ * - add FUSE_NO_OPENDIR_SUPPORT flag -+ * -+ * 7.30 -+ * - add FUSE_EXPLICIT_INVAL_DATA -+ * - add FUSE_IOCTL_COMPAT_X32 -+ * -+ * 7.31 -+ * - add FUSE_WRITE_KILL_PRIV flag -+ * - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING -+ * - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag -+ */ -+ -+#ifndef _LINUX_FUSE_H -+#define _LINUX_FUSE_H -+ -+#include -+ -+/* -+ * Version negotiation: -+ * -+ * Both the kernel and userspace send the version they support in the -+ * INIT request and reply respectively. -+ * -+ * If the major versions match then both shall use the smallest -+ * of the two minor versions for communication. -+ * -+ * If the kernel supports a larger major version, then userspace shall -+ * reply with the major version it supports, ignore the rest of the -+ * INIT message and expect a new INIT message from the kernel with a -+ * matching major version. -+ * -+ * If the library supports a larger major version, then it shall fall -+ * back to the major protocol version sent by the kernel for -+ * communication and reply with that major version (and an arbitrary -+ * supported minor version). -+ */ -+ -+/** Version number of this interface */ -+#define FUSE_KERNEL_VERSION 7 -+ -+/** Minor version number of this interface */ -+#define FUSE_KERNEL_MINOR_VERSION 31 -+ -+/** The node ID of the root inode */ -+#define FUSE_ROOT_ID 1 -+ -+/* Make sure all structures are padded to 64bit boundary, so 32bit -+ userspace works under 64bit kernels */ -+ -+struct fuse_attr { -+ uint64_t ino; -+ uint64_t size; -+ uint64_t blocks; -+ uint64_t atime; -+ uint64_t mtime; -+ uint64_t ctime; -+ uint32_t atimensec; -+ uint32_t mtimensec; -+ uint32_t ctimensec; -+ uint32_t mode; -+ uint32_t nlink; -+ uint32_t uid; -+ uint32_t gid; -+ uint32_t rdev; -+ uint32_t blksize; -+ uint32_t padding; -+}; -+ -+struct fuse_kstatfs { -+ uint64_t blocks; -+ uint64_t bfree; -+ uint64_t bavail; -+ uint64_t files; -+ uint64_t ffree; -+ uint32_t bsize; -+ uint32_t namelen; -+ uint32_t frsize; -+ uint32_t padding; -+ uint32_t spare[6]; -+}; -+ -+struct fuse_file_lock { -+ uint64_t start; -+ uint64_t end; -+ uint32_t type; -+ uint32_t pid; /* tgid */ -+}; -+ -+/** -+ * Bitmasks for fuse_setattr_in.valid -+ */ -+#define FATTR_MODE (1 << 0) -+#define FATTR_UID (1 << 1) -+#define FATTR_GID (1 << 2) -+#define FATTR_SIZE (1 << 3) -+#define FATTR_ATIME (1 << 4) -+#define FATTR_MTIME (1 << 5) -+#define FATTR_FH (1 << 6) -+#define FATTR_ATIME_NOW (1 << 7) -+#define FATTR_MTIME_NOW (1 << 8) -+#define FATTR_LOCKOWNER (1 << 9) -+#define FATTR_CTIME (1 << 10) -+ -+/** -+ * Flags returned by the OPEN request -+ * -+ * FOPEN_DIRECT_IO: bypass page cache for this open file -+ * FOPEN_KEEP_CACHE: don't invalidate the data cache on open -+ * FOPEN_NONSEEKABLE: the file is not seekable -+ * FOPEN_CACHE_DIR: allow caching this directory -+ * FOPEN_STREAM: the file is stream-like (no file position at all) -+ */ -+#define FOPEN_DIRECT_IO (1 << 0) -+#define FOPEN_KEEP_CACHE (1 << 1) -+#define FOPEN_NONSEEKABLE (1 << 2) -+#define FOPEN_CACHE_DIR (1 << 3) -+#define FOPEN_STREAM (1 << 4) -+ -+/** -+ * INIT request/reply flags -+ * -+ * FUSE_ASYNC_READ: asynchronous read requests -+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks -+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported) -+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem -+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." -+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB -+ * FUSE_DONT_MASK: don't apply umask to file mode on create operations -+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device -+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device -+ * FUSE_SPLICE_READ: kernel supports splice read on the device -+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks -+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories -+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages -+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one) -+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus -+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission -+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes -+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens -+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir -+ * FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc -+ * FUSE_POSIX_ACL: filesystem supports posix acls -+ * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED -+ * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages -+ * FUSE_CACHE_SYMLINKS: cache READLINK responses -+ * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir -+ * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request -+ * FUSE_MAP_ALIGNMENT: map_alignment field is valid -+ */ -+#define FUSE_ASYNC_READ (1 << 0) -+#define FUSE_POSIX_LOCKS (1 << 1) -+#define FUSE_FILE_OPS (1 << 2) -+#define FUSE_ATOMIC_O_TRUNC (1 << 3) -+#define FUSE_EXPORT_SUPPORT (1 << 4) -+#define FUSE_BIG_WRITES (1 << 5) -+#define FUSE_DONT_MASK (1 << 6) -+#define FUSE_SPLICE_WRITE (1 << 7) -+#define FUSE_SPLICE_MOVE (1 << 8) -+#define FUSE_SPLICE_READ (1 << 9) -+#define FUSE_FLOCK_LOCKS (1 << 10) -+#define FUSE_HAS_IOCTL_DIR (1 << 11) -+#define FUSE_AUTO_INVAL_DATA (1 << 12) -+#define FUSE_DO_READDIRPLUS (1 << 13) -+#define FUSE_READDIRPLUS_AUTO (1 << 14) -+#define FUSE_ASYNC_DIO (1 << 15) -+#define FUSE_WRITEBACK_CACHE (1 << 16) -+#define FUSE_NO_OPEN_SUPPORT (1 << 17) -+#define FUSE_PARALLEL_DIROPS (1 << 18) -+#define FUSE_HANDLE_KILLPRIV (1 << 19) -+#define FUSE_POSIX_ACL (1 << 20) -+#define FUSE_ABORT_ERROR (1 << 21) -+#define FUSE_MAX_PAGES (1 << 22) -+#define FUSE_CACHE_SYMLINKS (1 << 23) -+#define FUSE_NO_OPENDIR_SUPPORT (1 << 24) -+#define FUSE_EXPLICIT_INVAL_DATA (1 << 25) -+#define FUSE_MAP_ALIGNMENT (1 << 26) -+ -+/** -+ * CUSE INIT request/reply flags -+ * -+ * CUSE_UNRESTRICTED_IOCTL: use unrestricted ioctl -+ */ -+#define CUSE_UNRESTRICTED_IOCTL (1 << 0) -+ -+/** -+ * Release flags -+ */ -+#define FUSE_RELEASE_FLUSH (1 << 0) -+#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) -+ -+/** -+ * Getattr flags -+ */ -+#define FUSE_GETATTR_FH (1 << 0) -+ -+/** -+ * Lock flags -+ */ -+#define FUSE_LK_FLOCK (1 << 0) -+ -+/** -+ * WRITE flags -+ * -+ * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed -+ * FUSE_WRITE_LOCKOWNER: lock_owner field is valid -+ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits -+ */ -+#define FUSE_WRITE_CACHE (1 << 0) -+#define FUSE_WRITE_LOCKOWNER (1 << 1) -+#define FUSE_WRITE_KILL_PRIV (1 << 2) -+ -+/** -+ * Read flags -+ */ -+#define FUSE_READ_LOCKOWNER (1 << 1) -+ -+/** -+ * Ioctl flags -+ * -+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine -+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed -+ * FUSE_IOCTL_RETRY: retry with new iovecs -+ * FUSE_IOCTL_32BIT: 32bit ioctl -+ * FUSE_IOCTL_DIR: is a directory -+ * FUSE_IOCTL_COMPAT_X32: x32 compat ioctl on 64bit machine (64bit time_t) -+ * -+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs -+ */ -+#define FUSE_IOCTL_COMPAT (1 << 0) -+#define FUSE_IOCTL_UNRESTRICTED (1 << 1) -+#define FUSE_IOCTL_RETRY (1 << 2) -+#define FUSE_IOCTL_32BIT (1 << 3) -+#define FUSE_IOCTL_DIR (1 << 4) -+#define FUSE_IOCTL_COMPAT_X32 (1 << 5) -+ -+#define FUSE_IOCTL_MAX_IOV 256 -+ -+/** -+ * Poll flags -+ * -+ * FUSE_POLL_SCHEDULE_NOTIFY: request poll notify -+ */ -+#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0) -+ -+/** -+ * Fsync flags -+ * -+ * FUSE_FSYNC_FDATASYNC: Sync data only, not metadata -+ */ -+#define FUSE_FSYNC_FDATASYNC (1 << 0) -+ -+enum fuse_opcode { -+ FUSE_LOOKUP = 1, -+ FUSE_FORGET = 2, /* no reply */ -+ FUSE_GETATTR = 3, -+ FUSE_SETATTR = 4, -+ FUSE_READLINK = 5, -+ FUSE_SYMLINK = 6, -+ FUSE_MKNOD = 8, -+ FUSE_MKDIR = 9, -+ FUSE_UNLINK = 10, -+ FUSE_RMDIR = 11, -+ FUSE_RENAME = 12, -+ FUSE_LINK = 13, -+ FUSE_OPEN = 14, -+ FUSE_READ = 15, -+ FUSE_WRITE = 16, -+ FUSE_STATFS = 17, -+ FUSE_RELEASE = 18, -+ FUSE_FSYNC = 20, -+ FUSE_SETXATTR = 21, -+ FUSE_GETXATTR = 22, -+ FUSE_LISTXATTR = 23, -+ FUSE_REMOVEXATTR = 24, -+ FUSE_FLUSH = 25, -+ FUSE_INIT = 26, -+ FUSE_OPENDIR = 27, -+ FUSE_READDIR = 28, -+ FUSE_RELEASEDIR = 29, -+ FUSE_FSYNCDIR = 30, -+ FUSE_GETLK = 31, -+ FUSE_SETLK = 32, -+ FUSE_SETLKW = 33, -+ FUSE_ACCESS = 34, -+ FUSE_CREATE = 35, -+ FUSE_INTERRUPT = 36, -+ FUSE_BMAP = 37, -+ FUSE_DESTROY = 38, -+ FUSE_IOCTL = 39, -+ FUSE_POLL = 40, -+ FUSE_NOTIFY_REPLY = 41, -+ FUSE_BATCH_FORGET = 42, -+ FUSE_FALLOCATE = 43, -+ FUSE_READDIRPLUS = 44, -+ FUSE_RENAME2 = 45, -+ FUSE_LSEEK = 46, -+ FUSE_COPY_FILE_RANGE = 47, -+ FUSE_SETUPMAPPING = 48, -+ FUSE_REMOVEMAPPING = 49, -+ -+ /* CUSE specific operations */ -+ CUSE_INIT = 4096, -+ -+ /* Reserved opcodes: helpful to detect structure endian-ness */ -+ CUSE_INIT_BSWAP_RESERVED = 1048576, /* CUSE_INIT << 8 */ -+ FUSE_INIT_BSWAP_RESERVED = 436207616, /* FUSE_INIT << 24 */ -+}; -+ -+enum fuse_notify_code { -+ FUSE_NOTIFY_POLL = 1, -+ FUSE_NOTIFY_INVAL_INODE = 2, -+ FUSE_NOTIFY_INVAL_ENTRY = 3, -+ FUSE_NOTIFY_STORE = 4, -+ FUSE_NOTIFY_RETRIEVE = 5, -+ FUSE_NOTIFY_DELETE = 6, -+ FUSE_NOTIFY_CODE_MAX, -+}; -+ -+/* The read buffer is required to be at least 8k, but may be much larger */ -+#define FUSE_MIN_READ_BUFFER 8192 -+ -+#define FUSE_COMPAT_ENTRY_OUT_SIZE 120 -+ -+struct fuse_entry_out { -+ uint64_t nodeid; /* Inode ID */ -+ uint64_t generation; /* Inode generation: nodeid:gen must -+ be unique for the fs's lifetime */ -+ uint64_t entry_valid; /* Cache timeout for the name */ -+ uint64_t attr_valid; /* Cache timeout for the attributes */ -+ uint32_t entry_valid_nsec; -+ uint32_t attr_valid_nsec; -+ struct fuse_attr attr; -+}; -+ -+struct fuse_forget_in { -+ uint64_t nlookup; -+}; -+ -+struct fuse_forget_one { -+ uint64_t nodeid; -+ uint64_t nlookup; -+}; -+ -+struct fuse_batch_forget_in { -+ uint32_t count; -+ uint32_t dummy; -+}; -+ -+struct fuse_getattr_in { -+ uint32_t getattr_flags; -+ uint32_t dummy; -+ uint64_t fh; -+}; -+ -+#define FUSE_COMPAT_ATTR_OUT_SIZE 96 -+ -+struct fuse_attr_out { -+ uint64_t attr_valid; /* Cache timeout for the attributes */ -+ uint32_t attr_valid_nsec; -+ uint32_t dummy; -+ struct fuse_attr attr; -+}; -+ -+#define FUSE_COMPAT_MKNOD_IN_SIZE 8 -+ -+struct fuse_mknod_in { -+ uint32_t mode; -+ uint32_t rdev; -+ uint32_t umask; -+ uint32_t padding; -+}; -+ -+struct fuse_mkdir_in { -+ uint32_t mode; -+ uint32_t umask; -+}; -+ -+struct fuse_rename_in { -+ uint64_t newdir; -+}; -+ -+struct fuse_rename2_in { -+ uint64_t newdir; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ -+struct fuse_link_in { -+ uint64_t oldnodeid; -+}; -+ -+struct fuse_setattr_in { -+ uint32_t valid; -+ uint32_t padding; -+ uint64_t fh; -+ uint64_t size; -+ uint64_t lock_owner; -+ uint64_t atime; -+ uint64_t mtime; -+ uint64_t ctime; -+ uint32_t atimensec; -+ uint32_t mtimensec; -+ uint32_t ctimensec; -+ uint32_t mode; -+ uint32_t unused4; -+ uint32_t uid; -+ uint32_t gid; -+ uint32_t unused5; -+}; -+ -+struct fuse_open_in { -+ uint32_t flags; -+ uint32_t unused; -+}; -+ -+struct fuse_create_in { -+ uint32_t flags; -+ uint32_t mode; -+ uint32_t umask; -+ uint32_t padding; -+}; -+ -+struct fuse_open_out { -+ uint64_t fh; -+ uint32_t open_flags; -+ uint32_t padding; -+}; -+ -+struct fuse_release_in { -+ uint64_t fh; -+ uint32_t flags; -+ uint32_t release_flags; -+ uint64_t lock_owner; -+}; -+ -+struct fuse_flush_in { -+ uint64_t fh; -+ uint32_t unused; -+ uint32_t padding; -+ uint64_t lock_owner; -+}; -+ -+struct fuse_read_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t read_flags; -+ uint64_t lock_owner; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ -+#define FUSE_COMPAT_WRITE_IN_SIZE 24 -+ -+struct fuse_write_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t write_flags; -+ uint64_t lock_owner; -+ uint32_t flags; -+ uint32_t padding; -+}; -+ -+struct fuse_write_out { -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+#define FUSE_COMPAT_STATFS_SIZE 48 -+ -+struct fuse_statfs_out { -+ struct fuse_kstatfs st; -+}; -+ -+struct fuse_fsync_in { -+ uint64_t fh; -+ uint32_t fsync_flags; -+ uint32_t padding; -+}; -+ -+struct fuse_setxattr_in { -+ uint32_t size; -+ uint32_t flags; -+}; -+ -+struct fuse_getxattr_in { -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+struct fuse_getxattr_out { -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+struct fuse_lk_in { -+ uint64_t fh; -+ uint64_t owner; -+ struct fuse_file_lock lk; -+ uint32_t lk_flags; -+ uint32_t padding; -+}; -+ -+struct fuse_lk_out { -+ struct fuse_file_lock lk; -+}; -+ -+struct fuse_access_in { -+ uint32_t mask; -+ uint32_t padding; -+}; -+ -+struct fuse_init_in { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t max_readahead; -+ uint32_t flags; -+}; -+ -+#define FUSE_COMPAT_INIT_OUT_SIZE 8 -+#define FUSE_COMPAT_22_INIT_OUT_SIZE 24 -+ -+struct fuse_init_out { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t max_readahead; -+ uint32_t flags; -+ uint16_t max_background; -+ uint16_t congestion_threshold; -+ uint32_t max_write; -+ uint32_t time_gran; -+ uint16_t max_pages; -+ uint16_t map_alignment; -+ uint32_t unused[8]; -+}; -+ -+#define CUSE_INIT_INFO_MAX 4096 -+ -+struct cuse_init_in { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t unused; -+ uint32_t flags; -+}; -+ -+struct cuse_init_out { -+ uint32_t major; -+ uint32_t minor; -+ uint32_t unused; -+ uint32_t flags; -+ uint32_t max_read; -+ uint32_t max_write; -+ uint32_t dev_major; /* chardev major */ -+ uint32_t dev_minor; /* chardev minor */ -+ uint32_t spare[10]; -+}; -+ -+struct fuse_interrupt_in { -+ uint64_t unique; -+}; -+ -+struct fuse_bmap_in { -+ uint64_t block; -+ uint32_t blocksize; -+ uint32_t padding; -+}; -+ -+struct fuse_bmap_out { -+ uint64_t block; -+}; -+ -+struct fuse_ioctl_in { -+ uint64_t fh; -+ uint32_t flags; -+ uint32_t cmd; -+ uint64_t arg; -+ uint32_t in_size; -+ uint32_t out_size; -+}; -+ -+struct fuse_ioctl_iovec { -+ uint64_t base; -+ uint64_t len; -+}; -+ -+struct fuse_ioctl_out { -+ int32_t result; -+ uint32_t flags; -+ uint32_t in_iovs; -+ uint32_t out_iovs; -+}; -+ -+struct fuse_poll_in { -+ uint64_t fh; -+ uint64_t kh; -+ uint32_t flags; -+ uint32_t events; -+}; -+ -+struct fuse_poll_out { -+ uint32_t revents; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_poll_wakeup_out { -+ uint64_t kh; -+}; -+ -+struct fuse_fallocate_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint64_t length; -+ uint32_t mode; -+ uint32_t padding; -+}; -+ -+struct fuse_in_header { -+ uint32_t len; -+ uint32_t opcode; -+ uint64_t unique; -+ uint64_t nodeid; -+ uint32_t uid; -+ uint32_t gid; -+ uint32_t pid; -+ uint32_t padding; -+}; -+ -+struct fuse_out_header { -+ uint32_t len; -+ int32_t error; -+ uint64_t unique; -+}; -+ -+struct fuse_dirent { -+ uint64_t ino; -+ uint64_t off; -+ uint32_t namelen; -+ uint32_t type; -+ char name[]; -+}; -+ -+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) -+#define FUSE_DIRENT_ALIGN(x) \ -+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) -+#define FUSE_DIRENT_SIZE(d) \ -+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) -+ -+struct fuse_direntplus { -+ struct fuse_entry_out entry_out; -+ struct fuse_dirent dirent; -+}; -+ -+#define FUSE_NAME_OFFSET_DIRENTPLUS \ -+ offsetof(struct fuse_direntplus, dirent.name) -+#define FUSE_DIRENTPLUS_SIZE(d) \ -+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen) -+ -+struct fuse_notify_inval_inode_out { -+ uint64_t ino; -+ int64_t off; -+ int64_t len; -+}; -+ -+struct fuse_notify_inval_entry_out { -+ uint64_t parent; -+ uint32_t namelen; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_delete_out { -+ uint64_t parent; -+ uint64_t child; -+ uint32_t namelen; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_store_out { -+ uint64_t nodeid; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+struct fuse_notify_retrieve_out { -+ uint64_t notify_unique; -+ uint64_t nodeid; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t padding; -+}; -+ -+/* Matches the size of fuse_write_in */ -+struct fuse_notify_retrieve_in { -+ uint64_t dummy1; -+ uint64_t offset; -+ uint32_t size; -+ uint32_t dummy2; -+ uint64_t dummy3; -+ uint64_t dummy4; -+}; -+ -+/* Device ioctls: */ -+#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) -+ -+struct fuse_lseek_in { -+ uint64_t fh; -+ uint64_t offset; -+ uint32_t whence; -+ uint32_t padding; -+}; -+ -+struct fuse_lseek_out { -+ uint64_t offset; -+}; -+ -+struct fuse_copy_file_range_in { -+ uint64_t fh_in; -+ uint64_t off_in; -+ uint64_t nodeid_out; -+ uint64_t fh_out; -+ uint64_t off_out; -+ uint64_t len; -+ uint64_t flags; -+}; -+ -+#endif /* _LINUX_FUSE_H */ -diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh -index f76d773..29c27f4 100755 ---- a/scripts/update-linux-headers.sh -+++ b/scripts/update-linux-headers.sh -@@ -186,6 +186,7 @@ rm -rf "$output/include/standard-headers/linux" - mkdir -p "$output/include/standard-headers/linux" - for i in "$tmpdir"/include/linux/*virtio*.h \ - "$tmpdir/include/linux/qemu_fw_cfg.h" \ -+ "$tmpdir/include/linux/fuse.h" \ - "$tmpdir/include/linux/input.h" \ - "$tmpdir/include/linux/input-event-codes.h" \ - "$tmpdir/include/linux/pci_regs.h" \ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Pull-in-upstream-headers.patch b/kvm-virtiofsd-Pull-in-upstream-headers.patch deleted file mode 100644 index 78784fb..0000000 --- a/kvm-virtiofsd-Pull-in-upstream-headers.patch +++ /dev/null @@ -1,4911 +0,0 @@ -From 434b51e5c2fce756906dec4803900397bc98ad72 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:39 +0100 -Subject: [PATCH 008/116] virtiofsd: Pull in upstream headers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-5-dgilbert@redhat.com> -Patchwork-id: 93457 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 004/112] virtiofsd: Pull in upstream headers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Pull in headers fromlibfuse's upstream fuse-3.8.0 - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ee46c78901eb7fa78e328e04c0494ad6d207238b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse.h | 1275 ++++++++++++++++++++ - tools/virtiofsd/fuse_common.h | 823 +++++++++++++ - tools/virtiofsd/fuse_i.h | 139 +++ - tools/virtiofsd/fuse_log.h | 82 ++ - tools/virtiofsd/fuse_lowlevel.h | 2089 +++++++++++++++++++++++++++++++++ - tools/virtiofsd/fuse_misc.h | 59 + - tools/virtiofsd/fuse_opt.h | 271 +++++ - tools/virtiofsd/passthrough_helpers.h | 76 ++ - 8 files changed, 4814 insertions(+) - create mode 100644 tools/virtiofsd/fuse.h - create mode 100644 tools/virtiofsd/fuse_common.h - create mode 100644 tools/virtiofsd/fuse_i.h - create mode 100644 tools/virtiofsd/fuse_log.h - create mode 100644 tools/virtiofsd/fuse_lowlevel.h - create mode 100644 tools/virtiofsd/fuse_misc.h - create mode 100644 tools/virtiofsd/fuse_opt.h - create mode 100644 tools/virtiofsd/passthrough_helpers.h - -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -new file mode 100644 -index 0000000..883f6e5 ---- /dev/null -+++ b/tools/virtiofsd/fuse.h -@@ -0,0 +1,1275 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_H_ -+#define FUSE_H_ -+ -+/** @file -+ * -+ * This file defines the library interface of FUSE -+ * -+ * IMPORTANT: you should define FUSE_USE_VERSION before including this header. -+ */ -+ -+#include "fuse_common.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* ----------------------------------------------------------- * -+ * Basic FUSE API * -+ * ----------------------------------------------------------- */ -+ -+/** Handle for a FUSE filesystem */ -+struct fuse; -+ -+/** -+ * Readdir flags, passed to ->readdir() -+ */ -+enum fuse_readdir_flags { -+ /** -+ * "Plus" mode. -+ * -+ * The kernel wants to prefill the inode cache during readdir. The -+ * filesystem may honour this by filling in the attributes and setting -+ * FUSE_FILL_DIR_FLAGS for the filler function. The filesystem may also -+ * just ignore this flag completely. -+ */ -+ FUSE_READDIR_PLUS = (1 << 0), -+}; -+ -+enum fuse_fill_dir_flags { -+ /** -+ * "Plus" mode: all file attributes are valid -+ * -+ * The attributes are used by the kernel to prefill the inode cache -+ * during a readdir. -+ * -+ * It is okay to set FUSE_FILL_DIR_PLUS if FUSE_READDIR_PLUS is not set -+ * and vice versa. -+ */ -+ FUSE_FILL_DIR_PLUS = (1 << 1), -+}; -+ -+/** Function to add an entry in a readdir() operation -+ * -+ * The *off* parameter can be any non-zero value that enables the -+ * filesystem to identify the current point in the directory -+ * stream. It does not need to be the actual physical position. A -+ * value of zero is reserved to indicate that seeking in directories -+ * is not supported. -+ * -+ * @param buf the buffer passed to the readdir() operation -+ * @param name the file name of the directory entry -+ * @param stat file attributes, can be NULL -+ * @param off offset of the next entry or zero -+ * @param flags fill flags -+ * @return 1 if buffer is full, zero otherwise -+ */ -+typedef int (*fuse_fill_dir_t) (void *buf, const char *name, -+ const struct stat *stbuf, off_t off, -+ enum fuse_fill_dir_flags flags); -+/** -+ * Configuration of the high-level API -+ * -+ * This structure is initialized from the arguments passed to -+ * fuse_new(), and then passed to the file system's init() handler -+ * which should ensure that the configuration is compatible with the -+ * file system implementation. -+ */ -+struct fuse_config { -+ /** -+ * If `set_gid` is non-zero, the st_gid attribute of each file -+ * is overwritten with the value of `gid`. -+ */ -+ int set_gid; -+ unsigned int gid; -+ -+ /** -+ * If `set_uid` is non-zero, the st_uid attribute of each file -+ * is overwritten with the value of `uid`. -+ */ -+ int set_uid; -+ unsigned int uid; -+ -+ /** -+ * If `set_mode` is non-zero, the any permissions bits set in -+ * `umask` are unset in the st_mode attribute of each file. -+ */ -+ int set_mode; -+ unsigned int umask; -+ -+ /** -+ * The timeout in seconds for which name lookups will be -+ * cached. -+ */ -+ double entry_timeout; -+ -+ /** -+ * The timeout in seconds for which a negative lookup will be -+ * cached. This means, that if file did not exist (lookup -+ * retuned ENOENT), the lookup will only be redone after the -+ * timeout, and the file/directory will be assumed to not -+ * exist until then. A value of zero means that negative -+ * lookups are not cached. -+ */ -+ double negative_timeout; -+ -+ /** -+ * The timeout in seconds for which file/directory attributes -+ * (as returned by e.g. the `getattr` handler) are cached. -+ */ -+ double attr_timeout; -+ -+ /** -+ * Allow requests to be interrupted -+ */ -+ int intr; -+ -+ /** -+ * Specify which signal number to send to the filesystem when -+ * a request is interrupted. The default is hardcoded to -+ * USR1. -+ */ -+ int intr_signal; -+ -+ /** -+ * Normally, FUSE assigns inodes to paths only for as long as -+ * the kernel is aware of them. With this option inodes are -+ * instead remembered for at least this many seconds. This -+ * will require more memory, but may be necessary when using -+ * applications that make use of inode numbers. -+ * -+ * A number of -1 means that inodes will be remembered for the -+ * entire life-time of the file-system process. -+ */ -+ int remember; -+ -+ /** -+ * The default behavior is that if an open file is deleted, -+ * the file is renamed to a hidden file (.fuse_hiddenXXX), and -+ * only removed when the file is finally released. This -+ * relieves the filesystem implementation of having to deal -+ * with this problem. This option disables the hiding -+ * behavior, and files are removed immediately in an unlink -+ * operation (or in a rename operation which overwrites an -+ * existing file). -+ * -+ * It is recommended that you not use the hard_remove -+ * option. When hard_remove is set, the following libc -+ * functions fail on unlinked files (returning errno of -+ * ENOENT): read(2), write(2), fsync(2), close(2), f*xattr(2), -+ * ftruncate(2), fstat(2), fchmod(2), fchown(2) -+ */ -+ int hard_remove; -+ -+ /** -+ * Honor the st_ino field in the functions getattr() and -+ * fill_dir(). This value is used to fill in the st_ino field -+ * in the stat(2), lstat(2), fstat(2) functions and the d_ino -+ * field in the readdir(2) function. The filesystem does not -+ * have to guarantee uniqueness, however some applications -+ * rely on this value being unique for the whole filesystem. -+ * -+ * Note that this does *not* affect the inode that libfuse -+ * and the kernel use internally (also called the "nodeid"). -+ */ -+ int use_ino; -+ -+ /** -+ * If use_ino option is not given, still try to fill in the -+ * d_ino field in readdir(2). If the name was previously -+ * looked up, and is still in the cache, the inode number -+ * found there will be used. Otherwise it will be set to -1. -+ * If use_ino option is given, this option is ignored. -+ */ -+ int readdir_ino; -+ -+ /** -+ * This option disables the use of page cache (file content cache) -+ * in the kernel for this filesystem. This has several affects: -+ * -+ * 1. Each read(2) or write(2) system call will initiate one -+ * or more read or write operations, data will not be -+ * cached in the kernel. -+ * -+ * 2. The return value of the read() and write() system calls -+ * will correspond to the return values of the read and -+ * write operations. This is useful for example if the -+ * file size is not known in advance (before reading it). -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `direct_io` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int direct_io; -+ -+ /** -+ * This option disables flushing the cache of the file -+ * contents on every open(2). This should only be enabled on -+ * filesystems where the file data is never changed -+ * externally (not through the mounted FUSE filesystem). Thus -+ * it is not suitable for network filesystems and other -+ * intermediate filesystems. -+ * -+ * NOTE: if this option is not specified (and neither -+ * direct_io) data is still cached after the open(2), so a -+ * read(2) system call will not always initiate a read -+ * operation. -+ * -+ * Internally, enabling this option causes fuse to set the -+ * `keep_cache` field of `struct fuse_file_info` - overwriting -+ * any value that was put there by the file system. -+ */ -+ int kernel_cache; -+ -+ /** -+ * This option is an alternative to `kernel_cache`. Instead of -+ * unconditionally keeping cached data, the cached data is -+ * invalidated on open(2) if if the modification time or the -+ * size of the file has changed since it was last opened. -+ */ -+ int auto_cache; -+ -+ /** -+ * The timeout in seconds for which file attributes are cached -+ * for the purpose of checking if auto_cache should flush the -+ * file data on open. -+ */ -+ int ac_attr_timeout_set; -+ double ac_attr_timeout; -+ -+ /** -+ * If this option is given the file-system handlers for the -+ * following operations will not receive path information: -+ * read, write, flush, release, fsync, readdir, releasedir, -+ * fsyncdir, lock, ioctl and poll. -+ * -+ * For the truncate, getattr, chmod, chown and utimens -+ * operations the path will be provided only if the struct -+ * fuse_file_info argument is NULL. -+ */ -+ int nullpath_ok; -+ -+ /** -+ * The remaining options are used by libfuse internally and -+ * should not be touched. -+ */ -+ int show_help; -+ char *modules; -+ int debug; -+}; -+ -+ -+/** -+ * The file system operations: -+ * -+ * Most of these should work very similarly to the well known UNIX -+ * file system operations. A major exception is that instead of -+ * returning an error in 'errno', the operation should return the -+ * negated error value (-errno) directly. -+ * -+ * All methods are optional, but some are essential for a useful -+ * filesystem (e.g. getattr). Open, flush, release, fsync, opendir, -+ * releasedir, fsyncdir, access, create, truncate, lock, init and -+ * destroy are special purpose methods, without which a full featured -+ * filesystem can still be implemented. -+ * -+ * In general, all methods are expected to perform any necessary -+ * permission checking. However, a filesystem may delegate this task -+ * to the kernel by passing the `default_permissions` mount option to -+ * `fuse_new()`. In this case, methods will only be called if -+ * the kernel's permission check has succeeded. -+ * -+ * Almost all operations take a path which can be of any length. -+ */ -+struct fuse_operations { -+ /** Get file attributes. -+ * -+ * Similar to stat(). The 'st_dev' and 'st_blksize' fields are -+ * ignored. The 'st_ino' field is ignored except if the 'use_ino' -+ * mount option is given. In that case it is passed to userspace, -+ * but libfuse and the kernel will still assign a different -+ * inode for internal use (called the "nodeid"). -+ * -+ * `fi` will always be NULL if the file is not currently open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*getattr) (const char *, struct stat *, struct fuse_file_info *fi); -+ -+ /** Read the target of a symbolic link -+ * -+ * The buffer should be filled with a null terminated string. The -+ * buffer size argument includes the space for the terminating -+ * null character. If the linkname is too long to fit in the -+ * buffer, it should be truncated. The return value should be 0 -+ * for success. -+ */ -+ int (*readlink) (const char *, char *, size_t); -+ -+ /** Create a file node -+ * -+ * This is called for creation of all non-directory, non-symlink -+ * nodes. If the filesystem defines a create() method, then for -+ * regular files that will be called instead. -+ */ -+ int (*mknod) (const char *, mode_t, dev_t); -+ -+ /** Create a directory -+ * -+ * Note that the mode argument may not have the type specification -+ * bits set, i.e. S_ISDIR(mode) can be false. To obtain the -+ * correct directory type bits use mode|S_IFDIR -+ * */ -+ int (*mkdir) (const char *, mode_t); -+ -+ /** Remove a file */ -+ int (*unlink) (const char *); -+ -+ /** Remove a directory */ -+ int (*rmdir) (const char *); -+ -+ /** Create a symbolic link */ -+ int (*symlink) (const char *, const char *); -+ -+ /** Rename a file -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ */ -+ int (*rename) (const char *, const char *, unsigned int flags); -+ -+ /** Create a hard link to a file */ -+ int (*link) (const char *, const char *); -+ -+ /** Change the permission bits of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ */ -+ int (*chmod) (const char *, mode_t, struct fuse_file_info *fi); -+ -+ /** Change the owner and group of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*chown) (const char *, uid_t, gid_t, struct fuse_file_info *fi); -+ -+ /** Change the size of a file -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*truncate) (const char *, off_t, struct fuse_file_info *fi); -+ -+ /** Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR, O_EXEC, O_SEARCH) -+ * should be used by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount option is -+ * given, this check is already done by the kernel before calling -+ * open() and may thus be omitted by the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open will also succeed without being send -+ * to the filesystem process. -+ * -+ */ -+ int (*open) (const char *, struct fuse_file_info *); -+ -+ /** Read data from an open file -+ * -+ * Read should return exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the -+ * 'direct_io' mount option is specified, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ */ -+ int (*read) (const char *, char *, size_t, off_t, -+ struct fuse_file_info *); -+ -+ /** Write data to an open file -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the 'direct_io' -+ * mount option is specified (see read operation). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write) (const char *, const char *, size_t, off_t, -+ struct fuse_file_info *); -+ -+ /** Get file system statistics -+ * -+ * The 'f_favail', 'f_fsid' and 'f_flag' fields are ignored -+ */ -+ int (*statfs) (const char *, struct statvfs *); -+ -+ /** Possibly flush cached data -+ * -+ * BIG NOTE: This is not equivalent to fsync(). It's not a -+ * request to sync dirty data. -+ * -+ * Flush is called on each close() of a file descriptor, as opposed to -+ * release which is called on the close of the last file descriptor for -+ * a file. Under Linux, errors returned by flush() will be passed to -+ * userspace as errors from close(), so flush() is a good place to write -+ * back any cached dirty data. However, many applications ignore errors -+ * on close(), and on non-Linux systems, close() may succeed even if flush() -+ * returns an error. For these reasons, filesystems should not assume -+ * that errors returned by flush will ever be noticed or even -+ * delivered. -+ * -+ * NOTE: The flush() method may be called more than once for each -+ * open(). This happens if more than one file descriptor refers to an -+ * open file handle, e.g. due to dup(), dup2() or fork() calls. It is -+ * not possible to determine if a flush is final, so each flush should -+ * be treated equally. Multiple write-flush sequences are relatively -+ * rare, so this shouldn't be a problem. -+ * -+ * Filesystems shouldn't assume that flush will be called at any -+ * particular point. It may be called more times than expected, or not -+ * at all. -+ * -+ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ int (*flush) (const char *, struct fuse_file_info *); -+ -+ /** Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open() call there will be exactly one release() call -+ * with the same flags and file handle. It is possible to -+ * have a file opened more than once, in which case only the last -+ * release will mean, that no more reads/writes will happen on the -+ * file. The return value of release is ignored. -+ */ -+ int (*release) (const char *, struct fuse_file_info *); -+ -+ /** Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ */ -+ int (*fsync) (const char *, int, struct fuse_file_info *); -+ -+ /** Set extended attributes */ -+ int (*setxattr) (const char *, const char *, const char *, size_t, int); -+ -+ /** Get extended attributes */ -+ int (*getxattr) (const char *, const char *, char *, size_t); -+ -+ /** List extended attributes */ -+ int (*listxattr) (const char *, char *, size_t); -+ -+ /** Remove extended attributes */ -+ int (*removexattr) (const char *, const char *); -+ -+ /** Open directory -+ * -+ * Unless the 'default_permissions' mount option is given, -+ * this method should check if opendir is permitted for this -+ * directory. Optionally opendir may also return an arbitrary -+ * filehandle in the fuse_file_info structure, which will be -+ * passed to readdir, releasedir and fsyncdir. -+ */ -+ int (*opendir) (const char *, struct fuse_file_info *); -+ -+ /** Read directory -+ * -+ * The filesystem may choose between two modes of operation: -+ * -+ * 1) The readdir implementation ignores the offset parameter, and -+ * passes zero to the filler function's offset. The filler -+ * function will not return '1' (unless an error happens), so the -+ * whole directory is read in a single readdir operation. -+ * -+ * 2) The readdir implementation keeps track of the offsets of the -+ * directory entries. It uses the offset parameter and always -+ * passes non-zero offset to the filler function. When the buffer -+ * is full (or an error happens) the filler function will return -+ * '1'. -+ */ -+ int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t, -+ struct fuse_file_info *, enum fuse_readdir_flags); -+ -+ /** Release directory -+ */ -+ int (*releasedir) (const char *, struct fuse_file_info *); -+ -+ /** Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data -+ */ -+ int (*fsyncdir) (const char *, int, struct fuse_file_info *); -+ -+ /** -+ * Initialize filesystem -+ * -+ * The return value will passed in the `private_data` field of -+ * `struct fuse_context` to all file operations, and as a -+ * parameter to the destroy() method. It overrides the initial -+ * value provided to fuse_main() / fuse_new(). -+ */ -+ void *(*init) (struct fuse_conn_info *conn, -+ struct fuse_config *cfg); -+ -+ /** -+ * Clean up filesystem -+ * -+ * Called on filesystem exit. -+ */ -+ void (*destroy) (void *private_data); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() system call. If the -+ * 'default_permissions' mount option is given, this method is not -+ * called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ */ -+ int (*access) (const char *, int); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ */ -+ int (*create) (const char *, mode_t, struct fuse_file_info *); -+ -+ /** -+ * Perform POSIX file locking operation -+ * -+ * The cmd argument will be either F_GETLK, F_SETLK or F_SETLKW. -+ * -+ * For the meaning of fields in 'struct flock' see the man page -+ * for fcntl(2). The l_whence field will always be set to -+ * SEEK_SET. -+ * -+ * For checking lock ownership, the 'fuse_file_info->owner' -+ * argument must be used. -+ * -+ * For F_GETLK operation, the library will first check currently -+ * held locks, and if a conflicting lock is found it will return -+ * information without calling this method. This ensures, that -+ * for local locks the l_pid field is correctly filled in. The -+ * results may not be accurate in case of race conditions and in -+ * the presence of hard links, but it's unlikely that an -+ * application would rely on accurate GETLK results in these -+ * cases. If a conflicting lock is not found, this method will be -+ * called, and the filesystem may fill out l_pid by a meaningful -+ * value, or it may leave this field zero. -+ * -+ * For F_SETLK and F_SETLKW the l_pid field will be set to the pid -+ * of the process performing the locking operation. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*lock) (const char *, struct fuse_file_info *, int cmd, -+ struct flock *); -+ -+ /** -+ * Change the access and modification times of a file with -+ * nanosecond resolution -+ * -+ * This supersedes the old utime() interface. New applications -+ * should use this. -+ * -+ * `fi` will always be NULL if the file is not currenlty open, but -+ * may also be NULL if the file is open. -+ * -+ * See the utimensat(2) man page for details. -+ */ -+ int (*utimens) (const char *, const struct timespec tv[2], -+ struct fuse_file_info *fi); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ */ -+ int (*bmap) (const char *, size_t blocksize, uint64_t *idx); -+ -+ /** -+ * Ioctl -+ * -+ * flags will have FUSE_IOCTL_COMPAT set for 32bit ioctls in -+ * 64bit environment. The size and direction of data is -+ * determined by _IOC_*() decoding of cmd. For _IOC_NONE, -+ * data will be NULL, for _IOC_WRITE data is out area, for -+ * _IOC_READ in area and if both are set in/out area. In all -+ * non-NULL cases, the area is of _IOC_SIZE(cmd) bytes. -+ * -+ * If flags has FUSE_IOCTL_DIR then the fuse_file_info refers to a -+ * directory file handle. -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ int (*ioctl) (const char *, unsigned int cmd, void *arg, -+ struct fuse_file_info *, unsigned int flags, void *data); -+ -+ /** -+ * Poll for IO readiness events -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ */ -+ int (*poll) (const char *, struct fuse_file_info *, -+ struct fuse_pollhandle *ph, unsigned *reventsp); -+ -+ /** Write contents of buffer to an open file -+ * -+ * Similar to the write() method, but data is supplied in a -+ * generic buffer. Use fuse_buf_copy() to transfer data to -+ * the destination. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ */ -+ int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *); -+ -+ /** Store data from an open file in a buffer -+ * -+ * Similar to the read() method, but data is stored and -+ * returned in a generic buffer. -+ * -+ * No actual copying of data has to take place, the source -+ * file descriptor may simply be stored in the buffer for -+ * later data transfer. -+ * -+ * The buffer must be allocated dynamically and stored at the -+ * location pointed to by bufp. If the buffer contains memory -+ * regions, they too must be allocated using malloc(). The -+ * allocated memory will be freed by the caller. -+ */ -+ int (*read_buf) (const char *, struct fuse_bufvec **bufp, -+ size_t size, off_t off, struct fuse_file_info *); -+ /** -+ * Perform BSD file locking operation -+ * -+ * The op argument will be either LOCK_SH, LOCK_EX or LOCK_UN -+ * -+ * Nonblocking requests will be indicated by ORing LOCK_NB to -+ * the above operations -+ * -+ * For more information see the flock(2) manual page. -+ * -+ * Additionally fi->owner will be set to a value unique to -+ * this open file. This same value will be supplied to -+ * ->release() when the file is released. -+ * -+ * Note: if this method is not implemented, the kernel will still -+ * allow file locking to work locally. Hence it is only -+ * interesting for network filesystems and similar. -+ */ -+ int (*flock) (const char *, struct fuse_file_info *, int op); -+ -+ /** -+ * Allocates space for an open file -+ * -+ * This function ensures that required space is allocated for specified -+ * file. If this function returns success then any subsequent write -+ * request to specified range is guaranteed not to fail because of lack -+ * of space on the file system media. -+ */ -+ int (*fallocate) (const char *, int, off_t, off_t, -+ struct fuse_file_info *); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ */ -+ ssize_t (*copy_file_range) (const char *path_in, -+ struct fuse_file_info *fi_in, -+ off_t offset_in, const char *path_out, -+ struct fuse_file_info *fi_out, -+ off_t offset_out, size_t size, int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ */ -+ off_t (*lseek) (const char *, off_t off, int whence, struct fuse_file_info *); -+}; -+ -+/** Extra context that may be needed by some filesystems -+ * -+ * The uid, gid and pid fields are not filled in case of a writepage -+ * operation. -+ */ -+struct fuse_context { -+ /** Pointer to the fuse object */ -+ struct fuse *fuse; -+ -+ /** User ID of the calling process */ -+ uid_t uid; -+ -+ /** Group ID of the calling process */ -+ gid_t gid; -+ -+ /** Process ID of the calling thread */ -+ pid_t pid; -+ -+ /** Private filesystem data */ -+ void *private_data; -+ -+ /** Umask of the calling process */ -+ mode_t umask; -+}; -+ -+/** -+ * Main function of FUSE. -+ * -+ * This is for the lazy. This is all that has to be called from the -+ * main() function. -+ * -+ * This function does the following: -+ * - parses command line options, and handles --help and -+ * --version -+ * - installs signal handlers for INT, HUP, TERM and PIPE -+ * - registers an exit handler to unmount the filesystem on program exit -+ * - creates a fuse handle -+ * - registers the operations -+ * - calls either the single-threaded or the multi-threaded event loop -+ * -+ * Most file systems will have to parse some file-system specific -+ * arguments before calling this function. It is recommended to do -+ * this with fuse_opt_parse() and a processing function that passes -+ * through any unknown options (this can also be achieved by just -+ * passing NULL as the processing function). That way, the remaining -+ * options can be passed directly to fuse_main(). -+ * -+ * fuse_main() accepts all options that can be passed to -+ * fuse_parse_cmdline(), fuse_new(), or fuse_session_new(). -+ * -+ * Option parsing skips argv[0], which is assumed to contain the -+ * program name. This element must always be present and is used to -+ * construct a basic ``usage: `` message for the --help -+ * output. argv[0] may also be set to the empty string. In this case -+ * the usage message is suppressed. This can be used by file systems -+ * to print their own usage line first. See hello.c for an example of -+ * how to do this. -+ * -+ * Note: this is currently implemented as a macro. -+ * -+ * The following error codes may be returned from fuse_main(): -+ * 1: Invalid option arguments -+ * 2: No mount point specified -+ * 3: FUSE setup failed -+ * 4: Mounting failed -+ * 5: Failed to daemonize (detach from session) -+ * 6: Failed to set up signal handlers -+ * 7: An error occured during the life of the file system -+ * -+ * @param argc the argument counter passed to the main() function -+ * @param argv the argument vector passed to the main() function -+ * @param op the file system operation -+ * @param private_data Initial value for the `private_data` -+ * field of `struct fuse_context`. May be overridden by the -+ * `struct fuse_operations.init` handler. -+ * @return 0 on success, nonzero on failure -+ * -+ * Example usage, see hello.c -+ */ -+/* -+ int fuse_main(int argc, char *argv[], const struct fuse_operations *op, -+ void *private_data); -+*/ -+#define fuse_main(argc, argv, op, private_data) \ -+ fuse_main_real(argc, argv, op, sizeof(*(op)), private_data) -+ -+/* ----------------------------------------------------------- * -+ * More detailed API * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Print available options (high- and low-level) to stdout. This is -+ * not an exhaustive list, but includes only those options that may be -+ * of interest to an end-user of a file system. -+ * -+ * The function looks at the argument vector only to determine if -+ * there are additional modules to be loaded (module=foo option), -+ * and attempts to call their help functions as well. -+ * -+ * @param args the argument vector. -+ */ -+void fuse_lib_help(struct fuse_args *args); -+ -+/** -+ * Create a new FUSE filesystem. -+ * -+ * This function accepts most file-system independent mount options -+ * (like context, nodev, ro - see mount(8)), as well as the -+ * FUSE-specific mount options from mount.fuse(8). -+ * -+ * If the --help option is specified, the function writes a help text -+ * to stdout and returns NULL. -+ * -+ * Option parsing skips argv[0], which is assumed to contain the -+ * program name. This element must always be present and is used to -+ * construct a basic ``usage: `` message for the --help output. If -+ * argv[0] is set to the empty string, no usage message is included in -+ * the --help output. -+ * -+ * If an unknown option is passed in, an error message is written to -+ * stderr and the function returns NULL. -+ * -+ * @param args argument vector -+ * @param op the filesystem operations -+ * @param op_size the size of the fuse_operations structure -+ * @param private_data Initial value for the `private_data` -+ * field of `struct fuse_context`. May be overridden by the -+ * `struct fuse_operations.init` handler. -+ * @return the created FUSE handle -+ */ -+#if FUSE_USE_VERSION == 30 -+struct fuse *fuse_new_30(struct fuse_args *args, const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+#define fuse_new(args, op, size, data) fuse_new_30(args, op, size, data) -+#else -+struct fuse *fuse_new(struct fuse_args *args, const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+#endif -+ -+/** -+ * Mount a FUSE file system. -+ * -+ * @param mountpoint the mount point path -+ * @param f the FUSE handle -+ * -+ * @return 0 on success, -1 on failure. -+ **/ -+int fuse_mount(struct fuse *f, const char *mountpoint); -+ -+/** -+ * Unmount a FUSE file system. -+ * -+ * See fuse_session_unmount() for additional information. -+ * -+ * @param f the FUSE handle -+ **/ -+void fuse_unmount(struct fuse *f); -+ -+/** -+ * Destroy the FUSE handle. -+ * -+ * NOTE: This function does not unmount the filesystem. If this is -+ * needed, call fuse_unmount() before calling this function. -+ * -+ * @param f the FUSE handle -+ */ -+void fuse_destroy(struct fuse *f); -+ -+/** -+ * FUSE event loop. -+ * -+ * Requests from the kernel are processed, and the appropriate -+ * operations are called. -+ * -+ * For a description of the return value and the conditions when the -+ * event loop exits, refer to the documentation of -+ * fuse_session_loop(). -+ * -+ * @param f the FUSE handle -+ * @return see fuse_session_loop() -+ * -+ * See also: fuse_loop_mt() -+ */ -+int fuse_loop(struct fuse *f); -+ -+/** -+ * Flag session as terminated -+ * -+ * This function will cause any running event loops to exit on -+ * the next opportunity. -+ * -+ * @param f the FUSE handle -+ */ -+void fuse_exit(struct fuse *f); -+ -+/** -+ * FUSE event loop with multiple threads -+ * -+ * Requests from the kernel are processed, and the appropriate -+ * operations are called. Request are processed in parallel by -+ * distributing them between multiple threads. -+ * -+ * For a description of the return value and the conditions when the -+ * event loop exits, refer to the documentation of -+ * fuse_session_loop(). -+ * -+ * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in -+ * single-threaded mode, and that you will not have to worry about reentrancy, -+ * though you will have to worry about recursive lookups. In single-threaded -+ * mode, FUSE will wait for one callback to return before calling another. -+ * -+ * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make -+ * multiple simultaneous calls into the various callback functions given by your -+ * fuse_operations record. -+ * -+ * If you are using multiple threads, you can enjoy all the parallel execution -+ * and interactive response benefits of threads, and you get to enjoy all the -+ * benefits of race conditions and locking bugs, too. Ensure that any code used -+ * in the callback function of fuse_operations is also thread-safe. -+ * -+ * @param f the FUSE handle -+ * @param config loop configuration -+ * @return see fuse_session_loop() -+ * -+ * See also: fuse_loop() -+ */ -+#if FUSE_USE_VERSION < 32 -+int fuse_loop_mt_31(struct fuse *f, int clone_fd); -+#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) -+#else -+int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); -+#endif -+ -+/** -+ * Get the current context -+ * -+ * The context is only valid for the duration of a filesystem -+ * operation, and thus must not be stored and used later. -+ * -+ * @return the context -+ */ -+struct fuse_context *fuse_get_context(void); -+ -+/** -+ * Get the current supplementary group IDs for the current request -+ * -+ * Similar to the getgroups(2) system call, except the return value is -+ * always the total number of group IDs, even if it is larger than the -+ * specified size. -+ * -+ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -+ * the group list to userspace, hence this function needs to parse -+ * "/proc/$TID/task/$TID/status" to get the group IDs. -+ * -+ * This feature may not be supported on all operating systems. In -+ * such a case this function will return -ENOSYS. -+ * -+ * @param size size of given array -+ * @param list array of group IDs to be filled in -+ * @return the total number of supplementary group IDs or -errno on failure -+ */ -+int fuse_getgroups(int size, gid_t list[]); -+ -+/** -+ * Check if the current request has already been interrupted -+ * -+ * @return 1 if the request has been interrupted, 0 otherwise -+ */ -+int fuse_interrupted(void); -+ -+/** -+ * Invalidates cache for the given path. -+ * -+ * This calls fuse_lowlevel_notify_inval_inode internally. -+ * -+ * @return 0 on successful invalidation, negative error value otherwise. -+ * This routine may return -ENOENT to indicate that there was -+ * no entry to be invalidated, e.g., because the path has not -+ * been seen before or has been forgotten; this should not be -+ * considered to be an error. -+ */ -+int fuse_invalidate_path(struct fuse *f, const char *path); -+ -+/** -+ * The real main function -+ * -+ * Do not call this directly, use fuse_main() -+ */ -+int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+ -+/** -+ * Start the cleanup thread when using option "remember". -+ * -+ * This is done automatically by fuse_loop_mt() -+ * @param fuse struct fuse pointer for fuse instance -+ * @return 0 on success and -1 on error -+ */ -+int fuse_start_cleanup_thread(struct fuse *fuse); -+ -+/** -+ * Stop the cleanup thread when using option "remember". -+ * -+ * This is done automatically by fuse_loop_mt() -+ * @param fuse struct fuse pointer for fuse instance -+ */ -+void fuse_stop_cleanup_thread(struct fuse *fuse); -+ -+/** -+ * Iterate over cache removing stale entries -+ * use in conjunction with "-oremember" -+ * -+ * NOTE: This is already done for the standard sessions -+ * -+ * @param fuse struct fuse pointer for fuse instance -+ * @return the number of seconds until the next cleanup -+ */ -+int fuse_clean_cache(struct fuse *fuse); -+ -+/* -+ * Stacking API -+ */ -+ -+/** -+ * Fuse filesystem object -+ * -+ * This is opaque object represents a filesystem layer -+ */ -+struct fuse_fs; -+ -+/* -+ * These functions call the relevant filesystem operation, and return -+ * the result. -+ * -+ * If the operation is not defined, they return -ENOSYS, with the -+ * exception of fuse_fs_open, fuse_fs_release, fuse_fs_opendir, -+ * fuse_fs_releasedir and fuse_fs_statfs, which return 0. -+ */ -+ -+int fuse_fs_getattr(struct fuse_fs *fs, const char *path, struct stat *buf, -+ struct fuse_file_info *fi); -+int fuse_fs_rename(struct fuse_fs *fs, const char *oldpath, -+ const char *newpath, unsigned int flags); -+int fuse_fs_unlink(struct fuse_fs *fs, const char *path); -+int fuse_fs_rmdir(struct fuse_fs *fs, const char *path); -+int fuse_fs_symlink(struct fuse_fs *fs, const char *linkname, -+ const char *path); -+int fuse_fs_link(struct fuse_fs *fs, const char *oldpath, const char *newpath); -+int fuse_fs_release(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_open(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_read(struct fuse_fs *fs, const char *path, char *buf, size_t size, -+ off_t off, struct fuse_file_info *fi); -+int fuse_fs_read_buf(struct fuse_fs *fs, const char *path, -+ struct fuse_bufvec **bufp, size_t size, off_t off, -+ struct fuse_file_info *fi); -+int fuse_fs_write(struct fuse_fs *fs, const char *path, const char *buf, -+ size_t size, off_t off, struct fuse_file_info *fi); -+int fuse_fs_write_buf(struct fuse_fs *fs, const char *path, -+ struct fuse_bufvec *buf, off_t off, -+ struct fuse_file_info *fi); -+int fuse_fs_fsync(struct fuse_fs *fs, const char *path, int datasync, -+ struct fuse_file_info *fi); -+int fuse_fs_flush(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_statfs(struct fuse_fs *fs, const char *path, struct statvfs *buf); -+int fuse_fs_opendir(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_readdir(struct fuse_fs *fs, const char *path, void *buf, -+ fuse_fill_dir_t filler, off_t off, -+ struct fuse_file_info *fi, enum fuse_readdir_flags flags); -+int fuse_fs_fsyncdir(struct fuse_fs *fs, const char *path, int datasync, -+ struct fuse_file_info *fi); -+int fuse_fs_releasedir(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi); -+int fuse_fs_create(struct fuse_fs *fs, const char *path, mode_t mode, -+ struct fuse_file_info *fi); -+int fuse_fs_lock(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi, int cmd, struct flock *lock); -+int fuse_fs_flock(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi, int op); -+int fuse_fs_chmod(struct fuse_fs *fs, const char *path, mode_t mode, -+ struct fuse_file_info *fi); -+int fuse_fs_chown(struct fuse_fs *fs, const char *path, uid_t uid, gid_t gid, -+ struct fuse_file_info *fi); -+int fuse_fs_truncate(struct fuse_fs *fs, const char *path, off_t size, -+ struct fuse_file_info *fi); -+int fuse_fs_utimens(struct fuse_fs *fs, const char *path, -+ const struct timespec tv[2], struct fuse_file_info *fi); -+int fuse_fs_access(struct fuse_fs *fs, const char *path, int mask); -+int fuse_fs_readlink(struct fuse_fs *fs, const char *path, char *buf, -+ size_t len); -+int fuse_fs_mknod(struct fuse_fs *fs, const char *path, mode_t mode, -+ dev_t rdev); -+int fuse_fs_mkdir(struct fuse_fs *fs, const char *path, mode_t mode); -+int fuse_fs_setxattr(struct fuse_fs *fs, const char *path, const char *name, -+ const char *value, size_t size, int flags); -+int fuse_fs_getxattr(struct fuse_fs *fs, const char *path, const char *name, -+ char *value, size_t size); -+int fuse_fs_listxattr(struct fuse_fs *fs, const char *path, char *list, -+ size_t size); -+int fuse_fs_removexattr(struct fuse_fs *fs, const char *path, -+ const char *name); -+int fuse_fs_bmap(struct fuse_fs *fs, const char *path, size_t blocksize, -+ uint64_t *idx); -+int fuse_fs_ioctl(struct fuse_fs *fs, const char *path, unsigned int cmd, -+ void *arg, struct fuse_file_info *fi, unsigned int flags, -+ void *data); -+int fuse_fs_poll(struct fuse_fs *fs, const char *path, -+ struct fuse_file_info *fi, struct fuse_pollhandle *ph, -+ unsigned *reventsp); -+int fuse_fs_fallocate(struct fuse_fs *fs, const char *path, int mode, -+ off_t offset, off_t length, struct fuse_file_info *fi); -+ssize_t fuse_fs_copy_file_range(struct fuse_fs *fs, const char *path_in, -+ struct fuse_file_info *fi_in, off_t off_in, -+ const char *path_out, -+ struct fuse_file_info *fi_out, off_t off_out, -+ size_t len, int flags); -+off_t fuse_fs_lseek(struct fuse_fs *fs, const char *path, off_t off, int whence, -+ struct fuse_file_info *fi); -+void fuse_fs_init(struct fuse_fs *fs, struct fuse_conn_info *conn, -+ struct fuse_config *cfg); -+void fuse_fs_destroy(struct fuse_fs *fs); -+ -+int fuse_notify_poll(struct fuse_pollhandle *ph); -+ -+/** -+ * Create a new fuse filesystem object -+ * -+ * This is usually called from the factory of a fuse module to create -+ * a new instance of a filesystem. -+ * -+ * @param op the filesystem operations -+ * @param op_size the size of the fuse_operations structure -+ * @param private_data Initial value for the `private_data` -+ * field of `struct fuse_context`. May be overridden by the -+ * `struct fuse_operations.init` handler. -+ * @return a new filesystem object -+ */ -+struct fuse_fs *fuse_fs_new(const struct fuse_operations *op, size_t op_size, -+ void *private_data); -+ -+/** -+ * Factory for creating filesystem objects -+ * -+ * The function may use and remove options from 'args' that belong -+ * to this module. -+ * -+ * For now the 'fs' vector always contains exactly one filesystem. -+ * This is the filesystem which will be below the newly created -+ * filesystem in the stack. -+ * -+ * @param args the command line arguments -+ * @param fs NULL terminated filesystem object vector -+ * @return the new filesystem object -+ */ -+typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args, -+ struct fuse_fs *fs[]); -+/** -+ * Register filesystem module -+ * -+ * If the "-omodules=*name*_:..." option is present, filesystem -+ * objects are created and pushed onto the stack with the *factory_* -+ * function. -+ * -+ * @param name_ the name of this filesystem module -+ * @param factory_ the factory function for this filesystem module -+ */ -+#define FUSE_REGISTER_MODULE(name_, factory_) \ -+ fuse_module_factory_t fuse_module_ ## name_ ## _factory = factory_ -+ -+/** Get session from fuse object */ -+struct fuse_session *fuse_get_session(struct fuse *f); -+ -+/** -+ * Open a FUSE file descriptor and set up the mount for the given -+ * mountpoint and flags. -+ * -+ * @param mountpoint reference to the mount in the file system -+ * @param options mount options -+ * @return the FUSE file descriptor or -1 upon error -+ */ -+int fuse_open_channel(const char *mountpoint, const char *options); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_H_ */ -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -new file mode 100644 -index 0000000..2d686b2 ---- /dev/null -+++ b/tools/virtiofsd/fuse_common.h -@@ -0,0 +1,823 @@ -+/* FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+/** @file */ -+ -+#if !defined(FUSE_H_) && !defined(FUSE_LOWLEVEL_H_) -+#error "Never include directly; use or instead." -+#endif -+ -+#ifndef FUSE_COMMON_H_ -+#define FUSE_COMMON_H_ -+ -+#include "fuse_opt.h" -+#include "fuse_log.h" -+#include -+#include -+ -+/** Major version of FUSE library interface */ -+#define FUSE_MAJOR_VERSION 3 -+ -+/** Minor version of FUSE library interface */ -+#define FUSE_MINOR_VERSION 2 -+ -+#define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) -+#define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/** -+ * Information about an open file. -+ * -+ * File Handles are created by the open, opendir, and create methods and closed -+ * by the release and releasedir methods. Multiple file handles may be -+ * concurrently open for the same file. Generally, a client will create one -+ * file handle per file descriptor, though in some cases multiple file -+ * descriptors can share a single file handle. -+ */ -+struct fuse_file_info { -+ /** Open flags. Available in open() and release() */ -+ int flags; -+ -+ /** In case of a write operation indicates if this was caused -+ by a delayed write from the page cache. If so, then the -+ context's pid, uid, and gid fields will not be valid, and -+ the *fh* value may not match the *fh* value that would -+ have been sent with the corresponding individual write -+ requests if write caching had been disabled. */ -+ unsigned int writepage : 1; -+ -+ /** Can be filled in by open, to use direct I/O on this file. */ -+ unsigned int direct_io : 1; -+ -+ /** Can be filled in by open. It signals the kernel that any -+ currently cached file data (ie., data that the filesystem -+ provided the last time the file was open) need not be -+ invalidated. Has no effect when set in other contexts (in -+ particular it does nothing when set by opendir()). */ -+ unsigned int keep_cache : 1; -+ -+ /** Indicates a flush operation. Set in flush operation, also -+ maybe set in highlevel lock operation and lowlevel release -+ operation. */ -+ unsigned int flush : 1; -+ -+ /** Can be filled in by open, to indicate that the file is not -+ seekable. */ -+ unsigned int nonseekable : 1; -+ -+ /* Indicates that flock locks for this file should be -+ released. If set, lock_owner shall contain a valid value. -+ May only be set in ->release(). */ -+ unsigned int flock_release : 1; -+ -+ /** Can be filled in by opendir. It signals the kernel to -+ enable caching of entries returned by readdir(). Has no -+ effect when set in other contexts (in particular it does -+ nothing when set by open()). */ -+ unsigned int cache_readdir : 1; -+ -+ /** Padding. Reserved for future use*/ -+ unsigned int padding : 25; -+ unsigned int padding2 : 32; -+ -+ /** File handle id. May be filled in by filesystem in create, -+ * open, and opendir(). Available in most other file operations on the -+ * same file handle. */ -+ uint64_t fh; -+ -+ /** Lock owner id. Available in locking operations and flush */ -+ uint64_t lock_owner; -+ -+ /** Requested poll events. Available in ->poll. Only set on kernels -+ which support it. If unsupported, this field is set to zero. */ -+ uint32_t poll_events; -+}; -+ -+/** -+ * Configuration parameters passed to fuse_session_loop_mt() and -+ * fuse_loop_mt(). -+ */ -+struct fuse_loop_config { -+ /** -+ * whether to use separate device fds for each thread -+ * (may increase performance) -+ */ -+ int clone_fd; -+ -+ /** -+ * The maximum number of available worker threads before they -+ * start to get deleted when they become idle. If not -+ * specified, the default is 10. -+ * -+ * Adjusting this has performance implications; a very small number -+ * of threads in the pool will cause a lot of thread creation and -+ * deletion overhead and performance may suffer. When set to 0, a new -+ * thread will be created to service every operation. -+ */ -+ unsigned int max_idle_threads; -+}; -+ -+/************************************************************************** -+ * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * -+ **************************************************************************/ -+ -+/** -+ * Indicates that the filesystem supports asynchronous read requests. -+ * -+ * If this capability is not requested/available, the kernel will -+ * ensure that there is at most one pending read request per -+ * file-handle at any time, and will attempt to order read requests by -+ * increasing offset. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_ASYNC_READ (1 << 0) -+ -+/** -+ * Indicates that the filesystem supports "remote" locking. -+ * -+ * This feature is enabled by default when supported by the kernel, -+ * and if getlk() and setlk() handlers are implemented. -+ */ -+#define FUSE_CAP_POSIX_LOCKS (1 << 1) -+ -+/** -+ * Indicates that the filesystem supports the O_TRUNC open flag. If -+ * disabled, and an application specifies O_TRUNC, fuse first calls -+ * truncate() and then open() with O_TRUNC filtered out. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3) -+ -+/** -+ * Indicates that the filesystem supports lookups of "." and "..". -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_EXPORT_SUPPORT (1 << 4) -+ -+/** -+ * Indicates that the kernel should not apply the umask to the -+ * file mode on create operations. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_DONT_MASK (1 << 6) -+ -+/** -+ * Indicates that libfuse should try to use splice() when writing to -+ * the fuse device. This may improve performance. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_SPLICE_WRITE (1 << 7) -+ -+/** -+ * Indicates that libfuse should try to move pages instead of copying when -+ * writing to / reading from the fuse device. This may improve performance. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_SPLICE_MOVE (1 << 8) -+ -+/** -+ * Indicates that libfuse should try to use splice() when reading from -+ * the fuse device. This may improve performance. -+ * -+ * This feature is enabled by default when supported by the kernel and -+ * if the filesystem implements a write_buf() handler. -+ */ -+#define FUSE_CAP_SPLICE_READ (1 << 9) -+ -+/** -+ * If set, the calls to flock(2) will be emulated using POSIX locks and must -+ * then be handled by the filesystem's setlock() handler. -+ * -+ * If not set, flock(2) calls will be handled by the FUSE kernel module -+ * internally (so any access that does not go through the kernel cannot be taken -+ * into account). -+ * -+ * This feature is enabled by default when supported by the kernel and -+ * if the filesystem implements a flock() handler. -+ */ -+#define FUSE_CAP_FLOCK_LOCKS (1 << 10) -+ -+/** -+ * Indicates that the filesystem supports ioctl's on directories. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_IOCTL_DIR (1 << 11) -+ -+/** -+ * Traditionally, while a file is open the FUSE kernel module only -+ * asks the filesystem for an update of the file's attributes when a -+ * client attempts to read beyond EOF. This is unsuitable for -+ * e.g. network filesystems, where the file contents may change -+ * without the kernel knowing about it. -+ * -+ * If this flag is set, FUSE will check the validity of the attributes -+ * on every read. If the attributes are no longer valid (i.e., if the -+ * *attr_timeout* passed to fuse_reply_attr() or set in `struct -+ * fuse_entry_param` has passed), it will first issue a `getattr` -+ * request. If the new mtime differs from the previous value, any -+ * cached file *contents* will be invalidated as well. -+ * -+ * This flag should always be set when available. If all file changes -+ * go through the kernel, *attr_timeout* should be set to a very large -+ * number to avoid unnecessary getattr() calls. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_AUTO_INVAL_DATA (1 << 12) -+ -+/** -+ * Indicates that the filesystem supports readdirplus. -+ * -+ * This feature is enabled by default when supported by the kernel and if the -+ * filesystem implements a readdirplus() handler. -+ */ -+#define FUSE_CAP_READDIRPLUS (1 << 13) -+ -+/** -+ * Indicates that the filesystem supports adaptive readdirplus. -+ * -+ * If FUSE_CAP_READDIRPLUS is not set, this flag has no effect. -+ * -+ * If FUSE_CAP_READDIRPLUS is set and this flag is not set, the kernel -+ * will always issue readdirplus() requests to retrieve directory -+ * contents. -+ * -+ * If FUSE_CAP_READDIRPLUS is set and this flag is set, the kernel -+ * will issue both readdir() and readdirplus() requests, depending on -+ * how much information is expected to be required. -+ * -+ * As of Linux 4.20, the algorithm is as follows: when userspace -+ * starts to read directory entries, issue a READDIRPLUS request to -+ * the filesystem. If any entry attributes have been looked up by the -+ * time userspace requests the next batch of entries continue with -+ * READDIRPLUS, otherwise switch to plain READDIR. This will reasult -+ * in eg plain "ls" triggering READDIRPLUS first then READDIR after -+ * that because it doesn't do lookups. "ls -l" should result in all -+ * READDIRPLUS, except if dentries are already cached. -+ * -+ * This feature is enabled by default when supported by the kernel and -+ * if the filesystem implements both a readdirplus() and a readdir() -+ * handler. -+ */ -+#define FUSE_CAP_READDIRPLUS_AUTO (1 << 14) -+ -+/** -+ * Indicates that the filesystem supports asynchronous direct I/O submission. -+ * -+ * If this capability is not requested/available, the kernel will ensure that -+ * there is at most one pending read and one pending write request per direct -+ * I/O file-handle at any time. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_ASYNC_DIO (1 << 15) -+ -+/** -+ * Indicates that writeback caching should be enabled. This means that -+ * individual write request may be buffered and merged in the kernel -+ * before they are send to the filesystem. -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_WRITEBACK_CACHE (1 << 16) -+ -+/** -+ * Indicates support for zero-message opens. If this flag is set in -+ * the `capable` field of the `fuse_conn_info` structure, then the -+ * filesystem may return `ENOSYS` from the open() handler to indicate -+ * success. Further attempts to open files will be handled in the -+ * kernel. (If this flag is not set, returning ENOSYS will be treated -+ * as an error and signaled to the caller). -+ * -+ * Setting (or unsetting) this flag in the `want` field has *no -+ * effect*. -+ */ -+#define FUSE_CAP_NO_OPEN_SUPPORT (1 << 17) -+ -+/** -+ * Indicates support for parallel directory operations. If this flag -+ * is unset, the FUSE kernel module will ensure that lookup() and -+ * readdir() requests are never issued concurrently for the same -+ * directory. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_PARALLEL_DIROPS (1 << 18) -+ -+/** -+ * Indicates support for POSIX ACLs. -+ * -+ * If this feature is enabled, the kernel will cache and have -+ * responsibility for enforcing ACLs. ACL will be stored as xattrs and -+ * passed to userspace, which is responsible for updating the ACLs in -+ * the filesystem, keeping the file mode in sync with the ACL, and -+ * ensuring inheritance of default ACLs when new filesystem nodes are -+ * created. Note that this requires that the file system is able to -+ * parse and interpret the xattr representation of ACLs. -+ * -+ * Enabling this feature implicitly turns on the -+ * ``default_permissions`` mount option (even if it was not passed to -+ * mount(2)). -+ * -+ * This feature is disabled by default. -+ */ -+#define FUSE_CAP_POSIX_ACL (1 << 19) -+ -+/** -+ * Indicates that the filesystem is responsible for unsetting -+ * setuid and setgid bits when a file is written, truncated, or -+ * its owner is changed. -+ * -+ * This feature is enabled by default when supported by the kernel. -+ */ -+#define FUSE_CAP_HANDLE_KILLPRIV (1 << 20) -+ -+/** -+ * Indicates support for zero-message opendirs. If this flag is set in -+ * the `capable` field of the `fuse_conn_info` structure, then the filesystem -+ * may return `ENOSYS` from the opendir() handler to indicate success. Further -+ * opendir and releasedir messages will be handled in the kernel. (If this -+ * flag is not set, returning ENOSYS will be treated as an error and signalled -+ * to the caller.) -+ * -+ * Setting (or unsetting) this flag in the `want` field has *no effect*. -+ */ -+#define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) -+ -+/** -+ * Ioctl flags -+ * -+ * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine -+ * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed -+ * FUSE_IOCTL_RETRY: retry with new iovecs -+ * FUSE_IOCTL_DIR: is a directory -+ * -+ * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs -+ */ -+#define FUSE_IOCTL_COMPAT (1 << 0) -+#define FUSE_IOCTL_UNRESTRICTED (1 << 1) -+#define FUSE_IOCTL_RETRY (1 << 2) -+#define FUSE_IOCTL_DIR (1 << 4) -+ -+#define FUSE_IOCTL_MAX_IOV 256 -+ -+/** -+ * Connection information, passed to the ->init() method -+ * -+ * Some of the elements are read-write, these can be changed to -+ * indicate the value requested by the filesystem. The requested -+ * value must usually be smaller than the indicated value. -+ */ -+struct fuse_conn_info { -+ /** -+ * Major version of the protocol (read-only) -+ */ -+ unsigned proto_major; -+ -+ /** -+ * Minor version of the protocol (read-only) -+ */ -+ unsigned proto_minor; -+ -+ /** -+ * Maximum size of the write buffer -+ */ -+ unsigned max_write; -+ -+ /** -+ * Maximum size of read requests. A value of zero indicates no -+ * limit. However, even if the filesystem does not specify a -+ * limit, the maximum size of read requests will still be -+ * limited by the kernel. -+ * -+ * NOTE: For the time being, the maximum size of read requests -+ * must be set both here *and* passed to fuse_session_new() -+ * using the ``-o max_read=`` mount option. At some point -+ * in the future, specifying the mount option will no longer -+ * be necessary. -+ */ -+ unsigned max_read; -+ -+ /** -+ * Maximum readahead -+ */ -+ unsigned max_readahead; -+ -+ /** -+ * Capability flags that the kernel supports (read-only) -+ */ -+ unsigned capable; -+ -+ /** -+ * Capability flags that the filesystem wants to enable. -+ * -+ * libfuse attempts to initialize this field with -+ * reasonable default values before calling the init() handler. -+ */ -+ unsigned want; -+ -+ /** -+ * Maximum number of pending "background" requests. A -+ * background request is any type of request for which the -+ * total number is not limited by other means. As of kernel -+ * 4.8, only two types of requests fall into this category: -+ * -+ * 1. Read-ahead requests -+ * 2. Asynchronous direct I/O requests -+ * -+ * Read-ahead requests are generated (if max_readahead is -+ * non-zero) by the kernel to preemptively fill its caches -+ * when it anticipates that userspace will soon read more -+ * data. -+ * -+ * Asynchronous direct I/O requests are generated if -+ * FUSE_CAP_ASYNC_DIO is enabled and userspace submits a large -+ * direct I/O request. In this case the kernel will internally -+ * split it up into multiple smaller requests and submit them -+ * to the filesystem concurrently. -+ * -+ * Note that the following requests are *not* background -+ * requests: writeback requests (limited by the kernel's -+ * flusher algorithm), regular (i.e., synchronous and -+ * buffered) userspace read/write requests (limited to one per -+ * thread), asynchronous read requests (Linux's io_submit(2) -+ * call actually blocks, so these are also limited to one per -+ * thread). -+ */ -+ unsigned max_background; -+ -+ /** -+ * Kernel congestion threshold parameter. If the number of pending -+ * background requests exceeds this number, the FUSE kernel module will -+ * mark the filesystem as "congested". This instructs the kernel to -+ * expect that queued requests will take some time to complete, and to -+ * adjust its algorithms accordingly (e.g. by putting a waiting thread -+ * to sleep instead of using a busy-loop). -+ */ -+ unsigned congestion_threshold; -+ -+ /** -+ * When FUSE_CAP_WRITEBACK_CACHE is enabled, the kernel is responsible -+ * for updating mtime and ctime when write requests are received. The -+ * updated values are passed to the filesystem with setattr() requests. -+ * However, if the filesystem does not support the full resolution of -+ * the kernel timestamps (nanoseconds), the mtime and ctime values used -+ * by kernel and filesystem will differ (and result in an apparent -+ * change of times after a cache flush). -+ * -+ * To prevent this problem, this variable can be used to inform the -+ * kernel about the timestamp granularity supported by the file-system. -+ * The value should be power of 10. The default is 1, i.e. full -+ * nano-second resolution. Filesystems supporting only second resolution -+ * should set this to 1000000000. -+ */ -+ unsigned time_gran; -+ -+ /** -+ * For future use. -+ */ -+ unsigned reserved[22]; -+}; -+ -+struct fuse_session; -+struct fuse_pollhandle; -+struct fuse_conn_info_opts; -+ -+/** -+ * This function parses several command-line options that can be used -+ * to override elements of struct fuse_conn_info. The pointer returned -+ * by this function should be passed to the -+ * fuse_apply_conn_info_opts() method by the file system's init() -+ * handler. -+ * -+ * Before using this function, think twice if you really want these -+ * parameters to be adjustable from the command line. In most cases, -+ * they should be determined by the file system internally. -+ * -+ * The following options are recognized: -+ * -+ * -o max_write=N sets conn->max_write -+ * -o max_readahead=N sets conn->max_readahead -+ * -o max_background=N sets conn->max_background -+ * -o congestion_threshold=N sets conn->congestion_threshold -+ * -o async_read sets FUSE_CAP_ASYNC_READ in conn->want -+ * -o sync_read unsets FUSE_CAP_ASYNC_READ in conn->want -+ * -o atomic_o_trunc sets FUSE_CAP_ATOMIC_O_TRUNC in conn->want -+ * -o no_remote_lock Equivalent to -o no_remote_flock,no_remote_posix_lock -+ * -o no_remote_flock Unsets FUSE_CAP_FLOCK_LOCKS in conn->want -+ * -o no_remote_posix_lock Unsets FUSE_CAP_POSIX_LOCKS in conn->want -+ * -o [no_]splice_write (un-)sets FUSE_CAP_SPLICE_WRITE in conn->want -+ * -o [no_]splice_move (un-)sets FUSE_CAP_SPLICE_MOVE in conn->want -+ * -o [no_]splice_read (un-)sets FUSE_CAP_SPLICE_READ in conn->want -+ * -o [no_]auto_inval_data (un-)sets FUSE_CAP_AUTO_INVAL_DATA in conn->want -+ * -o readdirplus=no unsets FUSE_CAP_READDIRPLUS in conn->want -+ * -o readdirplus=yes sets FUSE_CAP_READDIRPLUS and unsets -+ * FUSE_CAP_READDIRPLUS_AUTO in conn->want -+ * -o readdirplus=auto sets FUSE_CAP_READDIRPLUS and -+ * FUSE_CAP_READDIRPLUS_AUTO in conn->want -+ * -o [no_]async_dio (un-)sets FUSE_CAP_ASYNC_DIO in conn->want -+ * -o [no_]writeback_cache (un-)sets FUSE_CAP_WRITEBACK_CACHE in conn->want -+ * -o time_gran=N sets conn->time_gran -+ * -+ * Known options will be removed from *args*, unknown options will be -+ * passed through unchanged. -+ * -+ * @param args argument vector (input+output) -+ * @return parsed options -+ **/ -+struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args); -+ -+/** -+ * This function applies the (parsed) parameters in *opts* to the -+ * *conn* pointer. It may modify the following fields: wants, -+ * max_write, max_readahead, congestion_threshold, max_background, -+ * time_gran. A field is only set (or unset) if the corresponding -+ * option has been explicitly set. -+ */ -+void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, -+ struct fuse_conn_info *conn); -+ -+/** -+ * Go into the background -+ * -+ * @param foreground if true, stay in the foreground -+ * @return 0 on success, -1 on failure -+ */ -+int fuse_daemonize(int foreground); -+ -+/** -+ * Get the version of the library -+ * -+ * @return the version -+ */ -+int fuse_version(void); -+ -+/** -+ * Get the full package version string of the library -+ * -+ * @return the package version -+ */ -+const char *fuse_pkgversion(void); -+ -+/** -+ * Destroy poll handle -+ * -+ * @param ph the poll handle -+ */ -+void fuse_pollhandle_destroy(struct fuse_pollhandle *ph); -+ -+/* ----------------------------------------------------------- * -+ * Data buffer * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Buffer flags -+ */ -+enum fuse_buf_flags { -+ /** -+ * Buffer contains a file descriptor -+ * -+ * If this flag is set, the .fd field is valid, otherwise the -+ * .mem fields is valid. -+ */ -+ FUSE_BUF_IS_FD = (1 << 1), -+ -+ /** -+ * Seek on the file descriptor -+ * -+ * If this flag is set then the .pos field is valid and is -+ * used to seek to the given offset before performing -+ * operation on file descriptor. -+ */ -+ FUSE_BUF_FD_SEEK = (1 << 2), -+ -+ /** -+ * Retry operation on file descriptor -+ * -+ * If this flag is set then retry operation on file descriptor -+ * until .size bytes have been copied or an error or EOF is -+ * detected. -+ */ -+ FUSE_BUF_FD_RETRY = (1 << 3), -+}; -+ -+/** -+ * Buffer copy flags -+ */ -+enum fuse_buf_copy_flags { -+ /** -+ * Don't use splice(2) -+ * -+ * Always fall back to using read and write instead of -+ * splice(2) to copy data from one file descriptor to another. -+ * -+ * If this flag is not set, then only fall back if splice is -+ * unavailable. -+ */ -+ FUSE_BUF_NO_SPLICE = (1 << 1), -+ -+ /** -+ * Force splice -+ * -+ * Always use splice(2) to copy data from one file descriptor -+ * to another. If splice is not available, return -EINVAL. -+ */ -+ FUSE_BUF_FORCE_SPLICE = (1 << 2), -+ -+ /** -+ * Try to move data with splice. -+ * -+ * If splice is used, try to move pages from the source to the -+ * destination instead of copying. See documentation of -+ * SPLICE_F_MOVE in splice(2) man page. -+ */ -+ FUSE_BUF_SPLICE_MOVE = (1 << 3), -+ -+ /** -+ * Don't block on the pipe when copying data with splice -+ * -+ * Makes the operations on the pipe non-blocking (if the pipe -+ * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -+ * man page. -+ */ -+ FUSE_BUF_SPLICE_NONBLOCK= (1 << 4), -+}; -+ -+/** -+ * Single data buffer -+ * -+ * Generic data buffer for I/O, extended attributes, etc... Data may -+ * be supplied as a memory pointer or as a file descriptor -+ */ -+struct fuse_buf { -+ /** -+ * Size of data in bytes -+ */ -+ size_t size; -+ -+ /** -+ * Buffer flags -+ */ -+ enum fuse_buf_flags flags; -+ -+ /** -+ * Memory pointer -+ * -+ * Used unless FUSE_BUF_IS_FD flag is set. -+ */ -+ void *mem; -+ -+ /** -+ * File descriptor -+ * -+ * Used if FUSE_BUF_IS_FD flag is set. -+ */ -+ int fd; -+ -+ /** -+ * File position -+ * -+ * Used if FUSE_BUF_FD_SEEK flag is set. -+ */ -+ off_t pos; -+}; -+ -+/** -+ * Data buffer vector -+ * -+ * An array of data buffers, each containing a memory pointer or a -+ * file descriptor. -+ * -+ * Allocate dynamically to add more than one buffer. -+ */ -+struct fuse_bufvec { -+ /** -+ * Number of buffers in the array -+ */ -+ size_t count; -+ -+ /** -+ * Index of current buffer within the array -+ */ -+ size_t idx; -+ -+ /** -+ * Current offset within the current buffer -+ */ -+ size_t off; -+ -+ /** -+ * Array of buffers -+ */ -+ struct fuse_buf buf[1]; -+}; -+ -+/* Initialize bufvec with a single buffer of given size */ -+#define FUSE_BUFVEC_INIT(size__) \ -+ ((struct fuse_bufvec) { \ -+ /* .count= */ 1, \ -+ /* .idx = */ 0, \ -+ /* .off = */ 0, \ -+ /* .buf = */ { /* [0] = */ { \ -+ /* .size = */ (size__), \ -+ /* .flags = */ (enum fuse_buf_flags) 0, \ -+ /* .mem = */ NULL, \ -+ /* .fd = */ -1, \ -+ /* .pos = */ 0, \ -+ } } \ -+ } ) -+ -+/** -+ * Get total size of data in a fuse buffer vector -+ * -+ * @param bufv buffer vector -+ * @return size of data -+ */ -+size_t fuse_buf_size(const struct fuse_bufvec *bufv); -+ -+/** -+ * Copy data from one buffer vector to another -+ * -+ * @param dst destination buffer vector -+ * @param src source buffer vector -+ * @param flags flags controlling the copy -+ * @return actual number of bytes copied or -errno on error -+ */ -+ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, -+ enum fuse_buf_copy_flags flags); -+ -+/* ----------------------------------------------------------- * -+ * Signal handling * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Exit session on HUP, TERM and INT signals and ignore PIPE signal -+ * -+ * Stores session in a global variable. May only be called once per -+ * process until fuse_remove_signal_handlers() is called. -+ * -+ * Once either of the POSIX signals arrives, the signal handler calls -+ * fuse_session_exit(). -+ * -+ * @param se the session to exit -+ * @return 0 on success, -1 on failure -+ * -+ * See also: -+ * fuse_remove_signal_handlers() -+ */ -+int fuse_set_signal_handlers(struct fuse_session *se); -+ -+/** -+ * Restore default signal handlers -+ * -+ * Resets global session. After this fuse_set_signal_handlers() may -+ * be called again. -+ * -+ * @param se the same session as given in fuse_set_signal_handlers() -+ * -+ * See also: -+ * fuse_set_signal_handlers() -+ */ -+void fuse_remove_signal_handlers(struct fuse_session *se); -+ -+/* ----------------------------------------------------------- * -+ * Compatibility stuff * -+ * ----------------------------------------------------------- */ -+ -+#if !defined(FUSE_USE_VERSION) || FUSE_USE_VERSION < 30 -+# error only API version 30 or greater is supported -+#endif -+ -+#ifdef __cplusplus -+} -+#endif -+ -+ -+/* -+ * This interface uses 64 bit off_t. -+ * -+ * On 32bit systems please add -D_FILE_OFFSET_BITS=64 to your compile flags! -+ */ -+ -+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && !defined __cplusplus -+_Static_assert(sizeof(off_t) == 8, "fuse: off_t must be 64bit"); -+#else -+struct _fuse_off_t_must_be_64bit_dummy_struct \ -+ { unsigned _fuse_off_t_must_be_64bit:((sizeof(off_t) == 8) ? 1 : -1); }; -+#endif -+ -+#endif /* FUSE_COMMON_H_ */ -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -new file mode 100644 -index 0000000..d38b630 ---- /dev/null -+++ b/tools/virtiofsd/fuse_i.h -@@ -0,0 +1,139 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include "fuse.h" -+#include "fuse_lowlevel.h" -+ -+struct mount_opts; -+ -+struct fuse_req { -+ struct fuse_session *se; -+ uint64_t unique; -+ int ctr; -+ pthread_mutex_t lock; -+ struct fuse_ctx ctx; -+ struct fuse_chan *ch; -+ int interrupted; -+ unsigned int ioctl_64bit : 1; -+ union { -+ struct { -+ uint64_t unique; -+ } i; -+ struct { -+ fuse_interrupt_func_t func; -+ void *data; -+ } ni; -+ } u; -+ struct fuse_req *next; -+ struct fuse_req *prev; -+}; -+ -+struct fuse_notify_req { -+ uint64_t unique; -+ void (*reply)(struct fuse_notify_req *, fuse_req_t, fuse_ino_t, -+ const void *, const struct fuse_buf *); -+ struct fuse_notify_req *next; -+ struct fuse_notify_req *prev; -+}; -+ -+struct fuse_session { -+ char *mountpoint; -+ volatile int exited; -+ int fd; -+ struct mount_opts *mo; -+ int debug; -+ int deny_others; -+ struct fuse_lowlevel_ops op; -+ int got_init; -+ struct cuse_data *cuse_data; -+ void *userdata; -+ uid_t owner; -+ struct fuse_conn_info conn; -+ struct fuse_req list; -+ struct fuse_req interrupts; -+ pthread_mutex_t lock; -+ int got_destroy; -+ pthread_key_t pipe_key; -+ int broken_splice_nonblock; -+ uint64_t notify_ctr; -+ struct fuse_notify_req notify_list; -+ size_t bufsize; -+ int error; -+}; -+ -+struct fuse_chan { -+ pthread_mutex_t lock; -+ int ctr; -+ int fd; -+}; -+ -+/** -+ * Filesystem module -+ * -+ * Filesystem modules are registered with the FUSE_REGISTER_MODULE() -+ * macro. -+ * -+ */ -+struct fuse_module { -+ char *name; -+ fuse_module_factory_t factory; -+ struct fuse_module *next; -+ struct fusemod_so *so; -+ int ctr; -+}; -+ -+/* ----------------------------------------------------------- * -+ * Channel interface (when using -o clone_fd) * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Obtain counted reference to the channel -+ * -+ * @param ch the channel -+ * @return the channel -+ */ -+struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); -+ -+/** -+ * Drop counted reference to a channel -+ * -+ * @param ch the channel -+ */ -+void fuse_chan_put(struct fuse_chan *ch); -+ -+struct mount_opts *parse_mount_opts(struct fuse_args *args); -+void destroy_mount_opts(struct mount_opts *mo); -+void fuse_mount_version(void); -+unsigned get_max_read(struct mount_opts *o); -+void fuse_kern_unmount(const char *mountpoint, int fd); -+int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); -+ -+int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, -+ int count); -+void fuse_free_req(fuse_req_t req); -+ -+void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); -+ -+int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); -+ -+int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -+ struct fuse_chan *ch); -+void fuse_session_process_buf_int(struct fuse_session *se, -+ const struct fuse_buf *buf, struct fuse_chan *ch); -+ -+struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, -+ size_t op_size, void *private_data); -+int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); -+int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); -+ -+#define FUSE_MAX_MAX_PAGES 256 -+#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 -+ -+/* room needed in buffer to accommodate header */ -+#define FUSE_BUFFER_HEADER_SIZE 0x1000 -+ -diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h -new file mode 100644 -index 0000000..5e112e0 ---- /dev/null -+++ b/tools/virtiofsd/fuse_log.h -@@ -0,0 +1,82 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2019 Red Hat, Inc. -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_LOG_H_ -+#define FUSE_LOG_H_ -+ -+/** @file -+ * -+ * This file defines the logging interface of FUSE -+ */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/** -+ * Log severity level -+ * -+ * These levels correspond to syslog(2) log levels since they are widely used. -+ */ -+enum fuse_log_level { -+ FUSE_LOG_EMERG, -+ FUSE_LOG_ALERT, -+ FUSE_LOG_CRIT, -+ FUSE_LOG_ERR, -+ FUSE_LOG_WARNING, -+ FUSE_LOG_NOTICE, -+ FUSE_LOG_INFO, -+ FUSE_LOG_DEBUG -+}; -+ -+/** -+ * Log message handler function. -+ * -+ * This function must be thread-safe. It may be called from any libfuse -+ * function, including fuse_parse_cmdline() and other functions invoked before -+ * a FUSE filesystem is created. -+ * -+ * Install a custom log message handler function using fuse_set_log_func(). -+ * -+ * @param level log severity level -+ * @param fmt sprintf-style format string including newline -+ * @param ap format string arguments -+ */ -+typedef void (*fuse_log_func_t)(enum fuse_log_level level, -+ const char *fmt, va_list ap); -+ -+/** -+ * Install a custom log handler function. -+ * -+ * Log messages are emitted by libfuse functions to report errors and debug -+ * information. Messages are printed to stderr by default but this can be -+ * overridden by installing a custom log message handler function. -+ * -+ * The log message handler function is global and affects all FUSE filesystems -+ * created within this process. -+ * -+ * @param func a custom log message handler function or NULL to revert to -+ * the default -+ */ -+void fuse_set_log_func(fuse_log_func_t func); -+ -+/** -+ * Emit a log message -+ * -+ * @param level severity level (FUSE_LOG_ERR, FUSE_LOG_DEBUG, etc) -+ * @param fmt sprintf-style format string including newline -+ */ -+void fuse_log(enum fuse_log_level level, const char *fmt, ...); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_LOG_H_ */ -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -new file mode 100644 -index 0000000..18c6363 ---- /dev/null -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -0,0 +1,2089 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_LOWLEVEL_H_ -+#define FUSE_LOWLEVEL_H_ -+ -+/** @file -+ * -+ * Low level API -+ * -+ * IMPORTANT: you should define FUSE_USE_VERSION before including this -+ * header. To use the newest API define it to 31 (recommended for any -+ * new application). -+ */ -+ -+#ifndef FUSE_USE_VERSION -+#error FUSE_USE_VERSION not defined -+#endif -+ -+#include "fuse_common.h" -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* ----------------------------------------------------------- * -+ * Miscellaneous definitions * -+ * ----------------------------------------------------------- */ -+ -+/** The node ID of the root inode */ -+#define FUSE_ROOT_ID 1 -+ -+/** Inode number type */ -+typedef uint64_t fuse_ino_t; -+ -+/** Request pointer type */ -+typedef struct fuse_req *fuse_req_t; -+ -+/** -+ * Session -+ * -+ * This provides hooks for processing requests, and exiting -+ */ -+struct fuse_session; -+ -+/** Directory entry parameters supplied to fuse_reply_entry() */ -+struct fuse_entry_param { -+ /** Unique inode number -+ * -+ * In lookup, zero means negative entry (from version 2.5) -+ * Returning ENOENT also means negative entry, but by setting zero -+ * ino the kernel may cache negative entries for entry_timeout -+ * seconds. -+ */ -+ fuse_ino_t ino; -+ -+ /** Generation number for this entry. -+ * -+ * If the file system will be exported over NFS, the -+ * ino/generation pairs need to be unique over the file -+ * system's lifetime (rather than just the mount time). So if -+ * the file system reuses an inode after it has been deleted, -+ * it must assign a new, previously unused generation number -+ * to the inode at the same time. -+ * -+ */ -+ uint64_t generation; -+ -+ /** Inode attributes. -+ * -+ * Even if attr_timeout == 0, attr must be correct. For example, -+ * for open(), FUSE uses attr.st_size from lookup() to determine -+ * how many bytes to request. If this value is not correct, -+ * incorrect data will be returned. -+ */ -+ struct stat attr; -+ -+ /** Validity timeout (in seconds) for inode attributes. If -+ attributes only change as a result of requests that come -+ through the kernel, this should be set to a very large -+ value. */ -+ double attr_timeout; -+ -+ /** Validity timeout (in seconds) for the name. If directory -+ entries are changed/deleted only as a result of requests -+ that come through the kernel, this should be set to a very -+ large value. */ -+ double entry_timeout; -+}; -+ -+/** -+ * Additional context associated with requests. -+ * -+ * Note that the reported client uid, gid and pid may be zero in some -+ * situations. For example, if the FUSE file system is running in a -+ * PID or user namespace but then accessed from outside the namespace, -+ * there is no valid uid/pid/gid that could be reported. -+ */ -+struct fuse_ctx { -+ /** User ID of the calling process */ -+ uid_t uid; -+ -+ /** Group ID of the calling process */ -+ gid_t gid; -+ -+ /** Thread ID of the calling process */ -+ pid_t pid; -+ -+ /** Umask of the calling process */ -+ mode_t umask; -+}; -+ -+struct fuse_forget_data { -+ fuse_ino_t ino; -+ uint64_t nlookup; -+}; -+ -+/* 'to_set' flags in setattr */ -+#define FUSE_SET_ATTR_MODE (1 << 0) -+#define FUSE_SET_ATTR_UID (1 << 1) -+#define FUSE_SET_ATTR_GID (1 << 2) -+#define FUSE_SET_ATTR_SIZE (1 << 3) -+#define FUSE_SET_ATTR_ATIME (1 << 4) -+#define FUSE_SET_ATTR_MTIME (1 << 5) -+#define FUSE_SET_ATTR_ATIME_NOW (1 << 7) -+#define FUSE_SET_ATTR_MTIME_NOW (1 << 8) -+#define FUSE_SET_ATTR_CTIME (1 << 10) -+ -+/* ----------------------------------------------------------- * -+ * Request methods and replies * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Low level filesystem operations -+ * -+ * Most of the methods (with the exception of init and destroy) -+ * receive a request handle (fuse_req_t) as their first argument. -+ * This handle must be passed to one of the specified reply functions. -+ * -+ * This may be done inside the method invocation, or after the call -+ * has returned. The request handle is valid until one of the reply -+ * functions is called. -+ * -+ * Other pointer arguments (name, fuse_file_info, etc) are not valid -+ * after the call has returned, so if they are needed later, their -+ * contents have to be copied. -+ * -+ * In general, all methods are expected to perform any necessary -+ * permission checking. However, a filesystem may delegate this task -+ * to the kernel by passing the `default_permissions` mount option to -+ * `fuse_session_new()`. In this case, methods will only be called if -+ * the kernel's permission check has succeeded. -+ * -+ * The filesystem sometimes needs to handle a return value of -ENOENT -+ * from the reply function, which means, that the request was -+ * interrupted, and the reply discarded. For example if -+ * fuse_reply_open() return -ENOENT means, that the release method for -+ * this file will not be called. -+ */ -+struct fuse_lowlevel_ops { -+ /** -+ * Initialize filesystem -+ * -+ * This function is called when libfuse establishes -+ * communication with the FUSE kernel module. The file system -+ * should use this module to inspect and/or modify the -+ * connection parameters provided in the `conn` structure. -+ * -+ * Note that some parameters may be overwritten by options -+ * passed to fuse_session_new() which take precedence over the -+ * values set in this handler. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*init) (void *userdata, struct fuse_conn_info *conn); -+ -+ /** -+ * Clean up filesystem. -+ * -+ * Called on filesystem exit. When this method is called, the -+ * connection to the kernel may be gone already, so that eg. calls -+ * to fuse_lowlevel_notify_* will fail. -+ * -+ * There's no reply to this function -+ * -+ * @param userdata the user data passed to fuse_session_new() -+ */ -+ void (*destroy) (void *userdata); -+ -+ /** -+ * Look up a directory entry by name and get its attributes. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name the name to look up -+ */ -+ void (*lookup) (fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Forget about an inode -+ * -+ * This function is called when the kernel removes an inode -+ * from its internal caches. -+ * -+ * The inode's lookup count increases by one for every call to -+ * fuse_reply_entry and fuse_reply_create. The nlookup parameter -+ * indicates by how much the lookup count should be decreased. -+ * -+ * Inodes with a non-zero lookup count may receive request from -+ * the kernel even after calls to unlink, rmdir or (when -+ * overwriting an existing file) rename. Filesystems must handle -+ * such requests properly and it is recommended to defer removal -+ * of the inode until the lookup count reaches zero. Calls to -+ * unlink, rmdir or rename will be followed closely by forget -+ * unless the file or directory is open, in which case the -+ * kernel issues forget only after the release or releasedir -+ * calls. -+ * -+ * Note that if a file system will be exported over NFS the -+ * inodes lifetime must extend even beyond forget. See the -+ * generation field in struct fuse_entry_param above. -+ * -+ * On unmount the lookup count for all inodes implicitly drops -+ * to zero. It is not guaranteed that the file system will -+ * receive corresponding forget messages for the affected -+ * inodes. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param nlookup the number of lookups to forget -+ */ -+ void (*forget) (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup); -+ -+ /** -+ * Get file attributes. -+ * -+ * If writeback caching is enabled, the kernel may have a -+ * better idea of a file's length than the FUSE file system -+ * (eg if there has been a write that extended the file size, -+ * but that has not yet been passed to the filesystem.n -+ * -+ * In this case, the st_size value provided by the file system -+ * will be ignored. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi for future use, currently always NULL -+ */ -+ void (*getattr) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Set file attributes -+ * -+ * In the 'attr' argument only members indicated by the 'to_set' -+ * bitmask contain valid values. Other members contain undefined -+ * values. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits if the file -+ * size or owner is being changed. -+ * -+ * If the setattr was invoked from the ftruncate() system call -+ * under Linux kernel versions 2.6.15 or later, the fi->fh will -+ * contain the value set by the open method or will be undefined -+ * if the open method didn't set any value. Otherwise (not -+ * ftruncate call, or kernel version earlier than 2.6.15) the fi -+ * parameter will be NULL. -+ * -+ * Valid replies: -+ * fuse_reply_attr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param attr the attributes -+ * @param to_set bit mask of attributes which should be set -+ * @param fi file information, or NULL -+ */ -+ void (*setattr) (fuse_req_t req, fuse_ino_t ino, struct stat *attr, -+ int to_set, struct fuse_file_info *fi); -+ -+ /** -+ * Read symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_readlink -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ */ -+ void (*readlink) (fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Create file node -+ * -+ * Create a regular file, character device, block device, fifo or -+ * socket node. -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param rdev the device number (only valid if created file is a device) -+ */ -+ void (*mknod) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, dev_t rdev); -+ -+ /** -+ * Create a directory -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode with which to create the new file -+ */ -+ void (*mkdir) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode); -+ -+ /** -+ * Remove a file -+ * -+ * If the file's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*unlink) (fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Remove a directory -+ * -+ * If the directory's inode's lookup count is non-zero, the -+ * file system is expected to postpone any removal of the -+ * inode until the lookup count reaches zero (see description -+ * of the forget function). -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to remove -+ */ -+ void (*rmdir) (fuse_req_t req, fuse_ino_t parent, const char *name); -+ -+ /** -+ * Create a symbolic link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param link the contents of the symbolic link -+ * @param parent inode number of the parent directory -+ * @param name to create -+ */ -+ void (*symlink) (fuse_req_t req, const char *link, fuse_ino_t parent, -+ const char *name); -+ -+ /** Rename a file -+ * -+ * If the target exists it should be atomically replaced. If -+ * the target's inode's lookup count is non-zero, the file -+ * system is expected to postpone any removal of the inode -+ * until the lookup count reaches zero (see description of the -+ * forget function). -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EINVAL, i.e. all -+ * future bmap requests will fail with EINVAL without being -+ * send to the filesystem process. -+ * -+ * *flags* may be `RENAME_EXCHANGE` or `RENAME_NOREPLACE`. If -+ * RENAME_NOREPLACE is specified, the filesystem must not -+ * overwrite *newname* if it exists and return an error -+ * instead. If `RENAME_EXCHANGE` is specified, the filesystem -+ * must atomically exchange the two files, i.e. both must -+ * exist and neither may be deleted. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the old parent directory -+ * @param name old name -+ * @param newparent inode number of the new parent directory -+ * @param newname new name -+ */ -+ void (*rename) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ fuse_ino_t newparent, const char *newname, -+ unsigned int flags); -+ -+ /** -+ * Create a hard link -+ * -+ * Valid replies: -+ * fuse_reply_entry -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the old inode number -+ * @param newparent inode number of the new parent directory -+ * @param newname new name to create -+ */ -+ void (*link) (fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, -+ const char *newname); -+ -+ /** -+ * Open a file -+ * -+ * Open flags are available in fi->flags. The following rules -+ * apply. -+ * -+ * - Creation (O_CREAT, O_EXCL, O_NOCTTY) flags will be -+ * filtered out / handled by the kernel. -+ * -+ * - Access modes (O_RDONLY, O_WRONLY, O_RDWR) should be used -+ * by the filesystem to check if the operation is -+ * permitted. If the ``-o default_permissions`` mount -+ * option is given, this check is already done by the -+ * kernel before calling open() and may thus be omitted by -+ * the filesystem. -+ * -+ * - When writeback caching is enabled, the kernel may send -+ * read requests even for files opened with O_WRONLY. The -+ * filesystem should be prepared to handle this. -+ * -+ * - When writeback caching is disabled, the filesystem is -+ * expected to properly handle the O_APPEND flag and ensure -+ * that each write is appending to the end of the file. -+ * -+ * - When writeback caching is enabled, the kernel will -+ * handle O_APPEND. However, unless all changes to the file -+ * come through the kernel this will not work reliably. The -+ * filesystem should thus either ignore the O_APPEND flag -+ * (and let the kernel handle it), or return an error -+ * (indicating that reliably O_APPEND is not available). -+ * -+ * Filesystem may store an arbitrary file handle (pointer, -+ * index, etc) in fi->fh, and use this in other all other file -+ * operations (read, write, flush, release, fsync). -+ * -+ * Filesystem may also implement stateless file I/O and not store -+ * anything in fi->fh. -+ * -+ * There are also some flags (direct_io, keep_cache) which the -+ * filesystem may set in fi, to change the way the file is opened. -+ * See fuse_file_info structure in for more details. -+ * -+ * If this request is answered with an error code of ENOSYS -+ * and FUSE_CAP_NO_OPEN_SUPPORT is set in -+ * `fuse_conn_info.capable`, this is treated as success and -+ * future calls to open and release will also succeed without being -+ * sent to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*open) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Read data -+ * -+ * Read should send exactly the number of bytes requested except -+ * on EOF or error, otherwise the rest of the data will be -+ * substituted with zeroes. An exception to this is when the file -+ * has been opened in 'direct_io' mode, in which case the return -+ * value of the read system call will reflect the return value of -+ * this operation. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_iov -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size number of bytes to read -+ * @param off offset to read from -+ * @param fi file information -+ */ -+ void (*read) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Write data -+ * -+ * Write should return exactly the number of bytes requested -+ * except on error. An exception to this is when the file has -+ * been opened in 'direct_io' mode, in which case the return value -+ * of the write system call will reflect the return value of this -+ * operation. -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param buf data to write -+ * @param size number of bytes to write -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write) (fuse_req_t req, fuse_ino_t ino, const char *buf, -+ size_t size, off_t off, struct fuse_file_info *fi); -+ -+ /** -+ * Flush method -+ * -+ * This is called on each close() of the opened file. -+ * -+ * Since file descriptors can be duplicated (dup, dup2, fork), for -+ * one open call there may be many flush calls. -+ * -+ * Filesystems shouldn't assume that flush will always be called -+ * after some writes, or that if will be called at all. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * -+ * NOTE: the name of the method is misleading, since (unlike -+ * fsync) the filesystem is not forced to flush pending writes. -+ * One reason to flush data is if the filesystem wants to return -+ * write errors during close. However, such use is non-portable -+ * because POSIX does not require [close] to wait for delayed I/O to -+ * complete. -+ * -+ * If the filesystem supports file locking operations (setlk, -+ * getlk) it should remove all locks belonging to 'fi->owner'. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to flush() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * -+ * [close]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/close.html -+ */ -+ void (*flush) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Release an open file -+ * -+ * Release is called when there are no more references to an open -+ * file: all file descriptors are closed and all memory mappings -+ * are unmapped. -+ * -+ * For every open call there will be exactly one release call (unless -+ * the filesystem is force-unmounted). -+ * -+ * The filesystem may reply with an error, but error values are -+ * not returned to close() or munmap() which triggered the -+ * release. -+ * -+ * fi->fh will contain the value set by the open method, or will -+ * be undefined if the open method didn't set any value. -+ * fi->flags will contain the same flags as for open. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*release) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize file contents -+ * -+ * If the datasync parameter is non-zero, then only the user data -+ * should be flushed, not the meta data. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsync() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsync) (fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Open a directory -+ * -+ * Filesystem may store an arbitrary file handle (pointer, index, -+ * etc) in fi->fh, and use this in other all other directory -+ * stream operations (readdir, releasedir, fsyncdir). -+ * -+ * If this request is answered with an error code of ENOSYS and -+ * FUSE_CAP_NO_OPENDIR_SUPPORT is set in `fuse_conn_info.capable`, -+ * this is treated as success and future calls to opendir and -+ * releasedir will also succeed without being sent to the filesystem -+ * process. In addition, the kernel will cache readdir results -+ * as if opendir returned FOPEN_KEEP_CACHE | FOPEN_CACHE_DIR. -+ * -+ * Valid replies: -+ * fuse_reply_open -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*opendir) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Read directory -+ * -+ * Send a buffer filled using fuse_add_direntry(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Returning a directory entry from readdir() does not affect -+ * its lookup count. -+ * -+ * If off_t is non-zero, then it will correspond to one of the off_t -+ * values that was previously returned by readdir() for the same -+ * directory handle. In this case, readdir() should skip over entries -+ * coming before the position defined by the off_t value. If entries -+ * are added or removed while the directory handle is open, they filesystem -+ * may still include the entries that have been removed, and may not -+ * report the entries that have been created. However, addition or -+ * removal of entries must never cause readdir() to skip over unrelated -+ * entries or to report them more than once. This means -+ * that off_t can not be a simple index that enumerates the entries -+ * that have been returned but must contain sufficient information to -+ * uniquely determine the next directory entry to return even when the -+ * set of entries is changing. -+ * -+ * The function does not have to report the '.' and '..' -+ * entries, but is allowed to do so. Note that, if readdir does -+ * not return '.' or '..', they will not be implicitly returned, -+ * and this behavior is observable by the caller. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdir) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Release an open directory -+ * -+ * For every opendir call there will be exactly one releasedir -+ * call (unless the filesystem is force-unmounted). -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ */ -+ void (*releasedir) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Synchronize directory contents -+ * -+ * If the datasync parameter is non-zero, then only the directory -+ * contents should be flushed, not the meta data. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * If this request is answered with an error code of ENOSYS, -+ * this is treated as success and future calls to fsyncdir() will -+ * succeed automatically without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param datasync flag indicating if only data should be flushed -+ * @param fi file information -+ */ -+ void (*fsyncdir) (fuse_req_t req, fuse_ino_t ino, int datasync, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Get file system statistics -+ * -+ * Valid replies: -+ * fuse_reply_statfs -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number, zero means "undefined" -+ */ -+ void (*statfs) (fuse_req_t req, fuse_ino_t ino); -+ -+ /** -+ * Set an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future setxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ */ -+ void (*setxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -+ const char *value, size_t size, int flags); -+ -+ /** -+ * Get an extended attribute -+ * -+ * If size is zero, the size of the value should be sent with -+ * fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the value fits in the buffer, the -+ * value should be sent with fuse_reply_buf. -+ * -+ * If the size is too small for the value, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future getxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ * @param size maximum size of the value to send -+ */ -+ void (*getxattr) (fuse_req_t req, fuse_ino_t ino, const char *name, -+ size_t size); -+ -+ /** -+ * List extended attribute names -+ * -+ * If size is zero, the total size of the attribute list should be -+ * sent with fuse_reply_xattr. -+ * -+ * If the size is non-zero, and the null character separated -+ * attribute list fits in the buffer, the list should be sent with -+ * fuse_reply_buf. -+ * -+ * If the size is too small for the list, the ERANGE error should -+ * be sent. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future listxattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_xattr -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum size of the list to send -+ */ -+ void (*listxattr) (fuse_req_t req, fuse_ino_t ino, size_t size); -+ -+ /** -+ * Remove an extended attribute -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future removexattr() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param name of the extended attribute -+ */ -+ void (*removexattr) (fuse_req_t req, fuse_ino_t ino, const char *name); -+ -+ /** -+ * Check file access permissions -+ * -+ * This will be called for the access() and chdir() system -+ * calls. If the 'default_permissions' mount option is given, -+ * this method is not called. -+ * -+ * This method is not called under Linux kernel versions 2.4.x -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent success, i.e. this and all future access() -+ * requests will succeed without being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param mask requested access mode -+ */ -+ void (*access) (fuse_req_t req, fuse_ino_t ino, int mask); -+ -+ /** -+ * Create and open a file -+ * -+ * If the file does not exist, first create it with the specified -+ * mode, and then open it. -+ * -+ * See the description of the open handler for more -+ * information. -+ * -+ * If this method is not implemented or under Linux kernel -+ * versions earlier than 2.6.15, the mknod() and open() methods -+ * will be called instead. -+ * -+ * If this request is answered with an error code of ENOSYS, the handler -+ * is treated as not implemented (i.e., for this and future requests the -+ * mknod() and open() handlers will be called instead). -+ * -+ * Valid replies: -+ * fuse_reply_create -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param parent inode number of the parent directory -+ * @param name to create -+ * @param mode file type and mode with which to create the new file -+ * @param fi file information -+ */ -+ void (*create) (fuse_req_t req, fuse_ino_t parent, const char *name, -+ mode_t mode, struct fuse_file_info *fi); -+ -+ /** -+ * Test for a POSIX file lock -+ * -+ * Valid replies: -+ * fuse_reply_lock -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to test -+ */ -+ void (*getlk) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi, struct flock *lock); -+ -+ /** -+ * Acquire, modify or release a POSIX file lock -+ * -+ * For POSIX threads (NPTL) there's a 1-1 relation between pid and -+ * owner, but otherwise this is not always the case. For checking -+ * lock ownership, 'fi->owner' must be used. The l_pid field in -+ * 'struct flock' should only be used to fill in this field in -+ * getlk(). -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param lock the region/type to set -+ * @param sleep locking operation may sleep -+ */ -+ void (*setlk) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi, -+ struct flock *lock, int sleep); -+ -+ /** -+ * Map block index within file to block index within device -+ * -+ * Note: This makes sense only for block device backed filesystems -+ * mounted with the 'blkdev' option -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future bmap() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_bmap -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param blocksize unit of block index -+ * @param idx block index within file -+ */ -+ void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize, -+ uint64_t idx); -+ -+ /** -+ * Ioctl -+ * -+ * Note: For unrestricted ioctls (not allowed for FUSE -+ * servers), data in and out areas can be discovered by giving -+ * iovs and setting FUSE_IOCTL_RETRY in *flags*. For -+ * restricted ioctls, kernel prepares in/out data area -+ * according to the information encoded in cmd. -+ * -+ * Valid replies: -+ * fuse_reply_ioctl_retry -+ * fuse_reply_ioctl -+ * fuse_reply_ioctl_iov -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param cmd ioctl command -+ * @param arg ioctl argument -+ * @param fi file information -+ * @param flags for FUSE_IOCTL_* flags -+ * @param in_buf data fetched from the caller -+ * @param in_bufsz number of fetched bytes -+ * @param out_bufsz maximum size of output data -+ * -+ * Note : the unsigned long request submitted by the application -+ * is truncated to 32 bits. -+ */ -+ void (*ioctl) (fuse_req_t req, fuse_ino_t ino, unsigned int cmd, -+ void *arg, struct fuse_file_info *fi, unsigned flags, -+ const void *in_buf, size_t in_bufsz, size_t out_bufsz); -+ -+ /** -+ * Poll for IO readiness -+ * -+ * Note: If ph is non-NULL, the client should notify -+ * when IO readiness events occur by calling -+ * fuse_lowlevel_notify_poll() with the specified ph. -+ * -+ * Regardless of the number of times poll with a non-NULL ph -+ * is received, single notification is enough to clear all. -+ * Notifying more times incurs overhead but doesn't harm -+ * correctness. -+ * -+ * The callee is responsible for destroying ph with -+ * fuse_pollhandle_destroy() when no longer in use. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as success (with a kernel-defined default poll-mask) and -+ * future calls to pull() will succeed the same way without being send -+ * to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_poll -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param ph poll handle to be used for notification -+ */ -+ void (*poll) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct fuse_pollhandle *ph); -+ -+ /** -+ * Write data made available in a buffer -+ * -+ * This is a more generic version of the ->write() method. If -+ * FUSE_CAP_SPLICE_READ is set in fuse_conn_info.want and the -+ * kernel supports splicing from the fuse device, then the -+ * data will be made available in pipe for supporting zero -+ * copy data transfer. -+ * -+ * buf->count is guaranteed to be one (and thus buf->idx is -+ * always zero). The write_buf handler must ensure that -+ * bufv->off is correctly updated (reflecting the number of -+ * bytes read from bufv->buf[0]). -+ * -+ * Unless FUSE_CAP_HANDLE_KILLPRIV is disabled, this method is -+ * expected to reset the setuid and setgid bits. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param bufv buffer containing the data -+ * @param off offset to write to -+ * @param fi file information -+ */ -+ void (*write_buf) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_bufvec *bufv, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Callback function for the retrieve request -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -+ * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -+ * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -+ * @param bufv the buffer containing the returned data -+ */ -+ void (*retrieve_reply) (fuse_req_t req, void *cookie, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv); -+ -+ /** -+ * Forget about multiple inodes -+ * -+ * See description of the forget function for more -+ * information. -+ * -+ * Valid replies: -+ * fuse_reply_none -+ * -+ * @param req request handle -+ */ -+ void (*forget_multi) (fuse_req_t req, size_t count, -+ struct fuse_forget_data *forgets); -+ -+ /** -+ * Acquire, modify or release a BSD file lock -+ * -+ * Note: if the locking methods are not implemented, the kernel -+ * will still allow file locking to work locally. Hence these are -+ * only interesting for network filesystems and similar. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param fi file information -+ * @param op the locking operation, see flock(2) -+ */ -+ void (*flock) (fuse_req_t req, fuse_ino_t ino, -+ struct fuse_file_info *fi, int op); -+ -+ /** -+ * Allocate requested space. If this function returns success then -+ * subsequent writes to the specified range shall not fail due to the lack -+ * of free space on the file system storage media. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future fallocate() requests will fail with EOPNOTSUPP without being -+ * send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param offset starting point for allocated region -+ * @param length size of allocated region -+ * @param mode determines the operation to be performed on the given range, -+ * see fallocate(2) -+ */ -+ void (*fallocate) (fuse_req_t req, fuse_ino_t ino, int mode, -+ off_t offset, off_t length, struct fuse_file_info *fi); -+ -+ /** -+ * Read directory with attributes -+ * -+ * Send a buffer filled using fuse_add_direntry_plus(), with size not -+ * exceeding the requested size. Send an empty buffer on end of -+ * stream. -+ * -+ * fi->fh will contain the value set by the opendir method, or -+ * will be undefined if the opendir method didn't set any value. -+ * -+ * In contrast to readdir() (which does not affect the lookup counts), -+ * the lookup count of every entry returned by readdirplus(), except "." -+ * and "..", is incremented by one. -+ * -+ * Valid replies: -+ * fuse_reply_buf -+ * fuse_reply_data -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param size maximum number of bytes to send -+ * @param off offset to continue reading the directory stream -+ * @param fi file information -+ */ -+ void (*readdirplus) (fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, -+ struct fuse_file_info *fi); -+ -+ /** -+ * Copy a range of data from one file to another -+ * -+ * Performs an optimized copy between two file descriptors without the -+ * additional cost of transferring data through the FUSE kernel module -+ * to user space (glibc) and then back into the FUSE filesystem again. -+ * -+ * In case this method is not implemented, glibc falls back to reading -+ * data from the source and writing to the destination. Effectively -+ * doing an inefficient copy of the data. -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure with error code EOPNOTSUPP, i.e. all -+ * future copy_file_range() requests will fail with EOPNOTSUPP without -+ * being send to the filesystem process. -+ * -+ * Valid replies: -+ * fuse_reply_write -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino_in the inode number or the source file -+ * @param off_in starting point from were the data should be read -+ * @param fi_in file information of the source file -+ * @param ino_out the inode number or the destination file -+ * @param off_out starting point where the data should be written -+ * @param fi_out file information of the destination file -+ * @param len maximum size of the data to copy -+ * @param flags passed along with the copy_file_range() syscall -+ */ -+ void (*copy_file_range) (fuse_req_t req, fuse_ino_t ino_in, -+ off_t off_in, struct fuse_file_info *fi_in, -+ fuse_ino_t ino_out, off_t off_out, -+ struct fuse_file_info *fi_out, size_t len, -+ int flags); -+ -+ /** -+ * Find next data or hole after the specified offset -+ * -+ * If this request is answered with an error code of ENOSYS, this is -+ * treated as a permanent failure, i.e. all future lseek() requests will -+ * fail with the same error code without being send to the filesystem -+ * process. -+ * -+ * Valid replies: -+ * fuse_reply_lseek -+ * fuse_reply_err -+ * -+ * @param req request handle -+ * @param ino the inode number -+ * @param off offset to start search from -+ * @param whence either SEEK_DATA or SEEK_HOLE -+ * @param fi file information -+ */ -+ void (*lseek) (fuse_req_t req, fuse_ino_t ino, off_t off, int whence, -+ struct fuse_file_info *fi); -+}; -+ -+/** -+ * Reply with an error code or success. -+ * -+ * Possible requests: -+ * all except forget -+ * -+ * Whereever possible, error codes should be chosen from the list of -+ * documented error conditions in the corresponding system calls -+ * manpage. -+ * -+ * An error code of ENOSYS is sometimes treated specially. This is -+ * indicated in the documentation of the affected handler functions. -+ * -+ * The following requests may be answered with a zero error code: -+ * unlink, rmdir, rename, flush, release, fsync, fsyncdir, setxattr, -+ * removexattr, setlk. -+ * -+ * @param req request handle -+ * @param err the positive error value, or zero for success -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_err(fuse_req_t req, int err); -+ -+/** -+ * Don't send reply -+ * -+ * Possible requests: -+ * forget -+ * forget_multi -+ * retrieve_reply -+ * -+ * @param req request handle -+ */ -+void fuse_reply_none(fuse_req_t req); -+ -+/** -+ * Reply with a directory entry -+ * -+ * Possible requests: -+ * lookup, mknod, mkdir, symlink, link -+ * -+ * Side effects: -+ * increments the lookup count on success -+ * -+ * @param req request handle -+ * @param e the entry parameters -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e); -+ -+/** -+ * Reply with a directory entry and open parameters -+ * -+ * currently the following members of 'fi' are used: -+ * fh, direct_io, keep_cache -+ * -+ * Possible requests: -+ * create -+ * -+ * Side effects: -+ * increments the lookup count on success -+ * -+ * @param req request handle -+ * @param e the entry parameters -+ * @param fi file information -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, -+ const struct fuse_file_info *fi); -+ -+/** -+ * Reply with attributes -+ * -+ * Possible requests: -+ * getattr, setattr -+ * -+ * @param req request handle -+ * @param attr the attributes -+ * @param attr_timeout validity timeout (in seconds) for the attributes -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_attr(fuse_req_t req, const struct stat *attr, -+ double attr_timeout); -+ -+/** -+ * Reply with the contents of a symbolic link -+ * -+ * Possible requests: -+ * readlink -+ * -+ * @param req request handle -+ * @param link symbolic link contents -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_readlink(fuse_req_t req, const char *link); -+ -+/** -+ * Reply with open parameters -+ * -+ * currently the following members of 'fi' are used: -+ * fh, direct_io, keep_cache -+ * -+ * Possible requests: -+ * open, opendir -+ * -+ * @param req request handle -+ * @param fi file information -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *fi); -+ -+/** -+ * Reply with number of bytes written -+ * -+ * Possible requests: -+ * write -+ * -+ * @param req request handle -+ * @param count the number of bytes written -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_write(fuse_req_t req, size_t count); -+ -+/** -+ * Reply with data -+ * -+ * Possible requests: -+ * read, readdir, getxattr, listxattr -+ * -+ * @param req request handle -+ * @param buf buffer containing data -+ * @param size the size of data in bytes -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); -+ -+/** -+ * Reply with data copied/moved from buffer(s) -+ * -+ * Zero copy data transfer ("splicing") will be used under -+ * the following circumstances: -+ * -+ * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and -+ * 2. the kernel supports splicing from the fuse device -+ * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and -+ * 3. *flags* does not contain FUSE_BUF_NO_SPLICE -+ * 4. The amount of data that is provided in file-descriptor backed -+ * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) -+ * is at least twice the page size. -+ * -+ * In order for SPLICE_F_MOVE to be used, the following additional -+ * conditions have to be fulfilled: -+ * -+ * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and -+ * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in -+ fuse_conn_info.capable), and -+ * 3. *flags* contains FUSE_BUF_SPLICE_MOVE -+ * -+ * Note that, if splice is used, the data is actually spliced twice: -+ * once into a temporary pipe (to prepend header data), and then again -+ * into the kernel. If some of the provided buffers are memory-backed, -+ * the data in them is copied in step one and spliced in step two. -+ * -+ * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags -+ * are silently ignored. -+ * -+ * Possible requests: -+ * read, readdir, getxattr, listxattr -+ * -+ * Side effects: -+ * when used to return data from a readdirplus() (but not readdir()) -+ * call, increments the lookup count of each returned entry by one -+ * on success. -+ * -+ * @param req request handle -+ * @param bufv buffer vector -+ * @param flags flags controlling the copy -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags); -+ -+/** -+ * Reply with data vector -+ * -+ * Possible requests: -+ * read, readdir, getxattr, listxattr -+ * -+ * @param req request handle -+ * @param iov the vector containing the data -+ * @param count the size of vector -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_iov(fuse_req_t req, const struct iovec *iov, int count); -+ -+/** -+ * Reply with filesystem statistics -+ * -+ * Possible requests: -+ * statfs -+ * -+ * @param req request handle -+ * @param stbuf filesystem statistics -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf); -+ -+/** -+ * Reply with needed buffer size -+ * -+ * Possible requests: -+ * getxattr, listxattr -+ * -+ * @param req request handle -+ * @param count the buffer size needed in bytes -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_xattr(fuse_req_t req, size_t count); -+ -+/** -+ * Reply with file lock information -+ * -+ * Possible requests: -+ * getlk -+ * -+ * @param req request handle -+ * @param lock the lock information -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_lock(fuse_req_t req, const struct flock *lock); -+ -+/** -+ * Reply with block index -+ * -+ * Possible requests: -+ * bmap -+ * -+ * @param req request handle -+ * @param idx block index within device -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_bmap(fuse_req_t req, uint64_t idx); -+ -+/* ----------------------------------------------------------- * -+ * Filling a buffer in readdir * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Add a directory entry to the buffer -+ * -+ * Buffer needs to be large enough to hold the entry. If it's not, -+ * then the entry is not filled in but the size of the entry is still -+ * returned. The caller can check this by comparing the bufsize -+ * parameter with the returned entry size. If the entry size is -+ * larger than the buffer size, the operation failed. -+ * -+ * From the 'stbuf' argument the st_ino field and bits 12-15 of the -+ * st_mode field are used. The other fields are ignored. -+ * -+ * *off* should be any non-zero value that the filesystem can use to -+ * identify the current point in the directory stream. It does not -+ * need to be the actual physical position. A value of zero is -+ * reserved to mean "from the beginning", and should therefore never -+ * be used (the first call to fuse_add_direntry should be passed the -+ * offset of the second directory entry). -+ * -+ * @param req request handle -+ * @param buf the point where the new entry will be added to the buffer -+ * @param bufsize remaining size of the buffer -+ * @param name the name of the entry -+ * @param stbuf the file attributes -+ * @param off the offset of the next entry -+ * @return the space needed for the entry -+ */ -+size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, const struct stat *stbuf, -+ off_t off); -+ -+/** -+ * Add a directory entry to the buffer with the attributes -+ * -+ * See documentation of `fuse_add_direntry()` for more details. -+ * -+ * @param req request handle -+ * @param buf the point where the new entry will be added to the buffer -+ * @param bufsize remaining size of the buffer -+ * @param name the name of the entry -+ * @param e the directory entry -+ * @param off the offset of the next entry -+ * @return the space needed for the entry -+ */ -+size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, -+ const char *name, -+ const struct fuse_entry_param *e, off_t off); -+ -+/** -+ * Reply to ask for data fetch and output buffer preparation. ioctl -+ * will be retried with the specified input data fetched and output -+ * buffer prepared. -+ * -+ * Possible requests: -+ * ioctl -+ * -+ * @param req request handle -+ * @param in_iov iovec specifying data to fetch from the caller -+ * @param in_count number of entries in in_iov -+ * @param out_iov iovec specifying addresses to write output to -+ * @param out_count number of entries in out_iov -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_ioctl_retry(fuse_req_t req, -+ const struct iovec *in_iov, size_t in_count, -+ const struct iovec *out_iov, size_t out_count); -+ -+/** -+ * Reply to finish ioctl -+ * -+ * Possible requests: -+ * ioctl -+ * -+ * @param req request handle -+ * @param result result to be passed to the caller -+ * @param buf buffer containing output data -+ * @param size length of output data -+ */ -+int fuse_reply_ioctl(fuse_req_t req, int result, const void *buf, size_t size); -+ -+/** -+ * Reply to finish ioctl with iov buffer -+ * -+ * Possible requests: -+ * ioctl -+ * -+ * @param req request handle -+ * @param result result to be passed to the caller -+ * @param iov the vector containing the data -+ * @param count the size of vector -+ */ -+int fuse_reply_ioctl_iov(fuse_req_t req, int result, const struct iovec *iov, -+ int count); -+ -+/** -+ * Reply with poll result event mask -+ * -+ * @param req request handle -+ * @param revents poll result event mask -+ */ -+int fuse_reply_poll(fuse_req_t req, unsigned revents); -+ -+/** -+ * Reply with offset -+ * -+ * Possible requests: -+ * lseek -+ * -+ * @param req request handle -+ * @param off offset of next data or hole -+ * @return zero for success, -errno for failure to send reply -+ */ -+int fuse_reply_lseek(fuse_req_t req, off_t off); -+ -+/* ----------------------------------------------------------- * -+ * Notification * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Notify IO readiness event -+ * -+ * For more information, please read comment for poll operation. -+ * -+ * @param ph poll handle to notify IO readiness event for -+ */ -+int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph); -+ -+/** -+ * Notify to invalidate cache for an inode. -+ * -+ * Added in FUSE protocol version 7.12. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * If the filesystem has writeback caching enabled, invalidating an -+ * inode will first trigger a writeback of all dirty pages. The call -+ * will block until all writeback requests have completed and the -+ * inode has been invalidated. It will, however, not wait for -+ * completion of pending writeback requests that have been issued -+ * before. -+ * -+ * If there are no dirty pages, this function will never block. -+ * -+ * @param se the session object -+ * @param ino the inode number -+ * @param off the offset in the inode where to start invalidating -+ * or negative to invalidate attributes only -+ * @param len the amount of cache to invalidate or 0 for all -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, -+ off_t off, off_t len); -+ -+/** -+ * Notify to invalidate parent attributes and the dentry matching -+ * parent/name -+ * -+ * To avoid a deadlock this function must not be called in the -+ * execution path of a related filesytem operation or within any code -+ * that could hold a lock that could be needed to execute such an -+ * operation. As of kernel 4.18, a "related operation" is a lookup(), -+ * symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() -+ * request for the parent, and a setattr(), unlink(), rmdir(), -+ * rename(), setxattr(), removexattr(), readdir() or readdirplus() -+ * request for the inode itself. -+ * -+ * When called correctly, this function will never block. -+ * -+ * Added in FUSE protocol version 7.12. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param parent inode number -+ * @param name file name -+ * @param namelen strlen() of file name -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, -+ const char *name, size_t namelen); -+ -+/** -+ * This function behaves like fuse_lowlevel_notify_inval_entry() with -+ * the following additional effect (at least as of Linux kernel 4.8): -+ * -+ * If the provided *child* inode matches the inode that is currently -+ * associated with the cached dentry, and if there are any inotify -+ * watches registered for the dentry, then the watchers are informed -+ * that the dentry has been deleted. -+ * -+ * To avoid a deadlock this function must not be called while -+ * executing a related filesytem operation or while holding a lock -+ * that could be needed to execute such an operation (see the -+ * description of fuse_lowlevel_notify_inval_entry() for more -+ * details). -+ * -+ * When called correctly, this function will never block. -+ * -+ * Added in FUSE protocol version 7.18. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param parent inode number -+ * @param child inode number -+ * @param name file name -+ * @param namelen strlen() of file name -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_delete(struct fuse_session *se, -+ fuse_ino_t parent, fuse_ino_t child, -+ const char *name, size_t namelen); -+ -+/** -+ * Store data to the kernel buffers -+ * -+ * Synchronously store data in the kernel buffers belonging to the -+ * given inode. The stored data is marked up-to-date (no read will be -+ * performed against it, unless it's invalidated or evicted from the -+ * cache). -+ * -+ * If the stored data overflows the current file size, then the size -+ * is extended, similarly to a write(2) on the filesystem. -+ * -+ * If this function returns an error, then the store wasn't fully -+ * completed, but it may have been partially completed. -+ * -+ * Added in FUSE protocol version 7.15. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param ino the inode number -+ * @param offset the starting offset into the file to store to -+ * @param bufv buffer vector -+ * @param flags flags controlling the copy -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -+ off_t offset, struct fuse_bufvec *bufv, -+ enum fuse_buf_copy_flags flags); -+/** -+ * Retrieve data from the kernel buffers -+ * -+ * Retrieve data in the kernel buffers belonging to the given inode. -+ * If successful then the retrieve_reply() method will be called with -+ * the returned data. -+ * -+ * Only present pages are returned in the retrieve reply. Retrieving -+ * stops when it finds a non-present page and only data prior to that -+ * is returned. -+ * -+ * If this function returns an error, then the retrieve will not be -+ * completed and no reply will be sent. -+ * -+ * This function doesn't change the dirty state of pages in the kernel -+ * buffer. For dirty pages the write() method will be called -+ * regardless of having been retrieved previously. -+ * -+ * Added in FUSE protocol version 7.15. If the kernel does not support -+ * this (or a newer) version, the function will return -ENOSYS and do -+ * nothing. -+ * -+ * @param se the session object -+ * @param ino the inode number -+ * @param size the number of bytes to retrieve -+ * @param offset the starting offset into the file to retrieve from -+ * @param cookie user data to supply to the reply callback -+ * @return zero for success, -errno for failure -+ */ -+int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -+ size_t size, off_t offset, void *cookie); -+ -+ -+/* ----------------------------------------------------------- * -+ * Utility functions * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Get the userdata from the request -+ * -+ * @param req request handle -+ * @return the user data passed to fuse_session_new() -+ */ -+void *fuse_req_userdata(fuse_req_t req); -+ -+/** -+ * Get the context from the request -+ * -+ * The pointer returned by this function will only be valid for the -+ * request's lifetime -+ * -+ * @param req request handle -+ * @return the context structure -+ */ -+const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); -+ -+/** -+ * Get the current supplementary group IDs for the specified request -+ * -+ * Similar to the getgroups(2) system call, except the return value is -+ * always the total number of group IDs, even if it is larger than the -+ * specified size. -+ * -+ * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -+ * the group list to userspace, hence this function needs to parse -+ * "/proc/$TID/task/$TID/status" to get the group IDs. -+ * -+ * This feature may not be supported on all operating systems. In -+ * such a case this function will return -ENOSYS. -+ * -+ * @param req request handle -+ * @param size size of given array -+ * @param list array of group IDs to be filled in -+ * @return the total number of supplementary group IDs or -errno on failure -+ */ -+int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); -+ -+/** -+ * Callback function for an interrupt -+ * -+ * @param req interrupted request -+ * @param data user data -+ */ -+typedef void (*fuse_interrupt_func_t)(fuse_req_t req, void *data); -+ -+/** -+ * Register/unregister callback for an interrupt -+ * -+ * If an interrupt has already happened, then the callback function is -+ * called from within this function, hence it's not possible for -+ * interrupts to be lost. -+ * -+ * @param req request handle -+ * @param func the callback function or NULL for unregister -+ * @param data user data passed to the callback function -+ */ -+void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, -+ void *data); -+ -+/** -+ * Check if a request has already been interrupted -+ * -+ * @param req request handle -+ * @return 1 if the request has been interrupted, 0 otherwise -+ */ -+int fuse_req_interrupted(fuse_req_t req); -+ -+ -+/* ----------------------------------------------------------- * -+ * Inquiry functions * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Print low-level version information to stdout. -+ */ -+void fuse_lowlevel_version(void); -+ -+/** -+ * Print available low-level options to stdout. This is not an -+ * exhaustive list, but includes only those options that may be of -+ * interest to an end-user of a file system. -+ */ -+void fuse_lowlevel_help(void); -+ -+/** -+ * Print available options for `fuse_parse_cmdline()`. -+ */ -+void fuse_cmdline_help(void); -+ -+/* ----------------------------------------------------------- * -+ * Filesystem setup & teardown * -+ * ----------------------------------------------------------- */ -+ -+struct fuse_cmdline_opts { -+ int singlethread; -+ int foreground; -+ int debug; -+ int nodefault_subtype; -+ char *mountpoint; -+ int show_version; -+ int show_help; -+ int clone_fd; -+ unsigned int max_idle_threads; -+}; -+ -+/** -+ * Utility function to parse common options for simple file systems -+ * using the low-level API. A help text that describes the available -+ * options can be printed with `fuse_cmdline_help`. A single -+ * non-option argument is treated as the mountpoint. Multiple -+ * non-option arguments will result in an error. -+ * -+ * If neither -o subtype= or -o fsname= options are given, a new -+ * subtype option will be added and set to the basename of the program -+ * (the fsname will remain unset, and then defaults to "fuse"). -+ * -+ * Known options will be removed from *args*, unknown options will -+ * remain. -+ * -+ * @param args argument vector (input+output) -+ * @param opts output argument for parsed options -+ * @return 0 on success, -1 on failure -+ */ -+int fuse_parse_cmdline(struct fuse_args *args, -+ struct fuse_cmdline_opts *opts); -+ -+/** -+ * Create a low level session. -+ * -+ * Returns a session structure suitable for passing to -+ * fuse_session_mount() and fuse_session_loop(). -+ * -+ * This function accepts most file-system independent mount options -+ * (like context, nodev, ro - see mount(8)), as well as the general -+ * fuse mount options listed in mount.fuse(8) (e.g. -o allow_root and -+ * -o default_permissions, but not ``-o use_ino``). Instead of `-o -+ * debug`, debugging may also enabled with `-d` or `--debug`. -+ * -+ * If not all options are known, an error message is written to stderr -+ * and the function returns NULL. -+ * -+ * Option parsing skips argv[0], which is assumed to contain the -+ * program name. To prevent accidentally passing an option in -+ * argv[0], this element must always be present (even if no options -+ * are specified). It may be set to the empty string ('\0') if no -+ * reasonable value can be provided. -+ * -+ * @param args argument vector -+ * @param op the (low-level) filesystem operations -+ * @param op_size sizeof(struct fuse_lowlevel_ops) -+ * @param userdata user data -+ * -+ * @return the fuse session on success, NULL on failure -+ **/ -+struct fuse_session *fuse_session_new(struct fuse_args *args, -+ const struct fuse_lowlevel_ops *op, -+ size_t op_size, void *userdata); -+ -+/** -+ * Mount a FUSE file system. -+ * -+ * @param mountpoint the mount point path -+ * @param se session object -+ * -+ * @return 0 on success, -1 on failure. -+ **/ -+int fuse_session_mount(struct fuse_session *se, const char *mountpoint); -+ -+/** -+ * Enter a single threaded, blocking event loop. -+ * -+ * When the event loop terminates because the connection to the FUSE -+ * kernel module has been closed, this function returns zero. This -+ * happens when the filesystem is unmounted regularly (by the -+ * filesystem owner or root running the umount(8) or fusermount(1) -+ * command), or if connection is explicitly severed by writing ``1`` -+ * to the``abort`` file in ``/sys/fs/fuse/connections/NNN``. The only -+ * way to distinguish between these two conditions is to check if the -+ * filesystem is still mounted after the session loop returns. -+ * -+ * When some error occurs during request processing, the function -+ * returns a negated errno(3) value. -+ * -+ * If the loop has been terminated because of a signal handler -+ * installed by fuse_set_signal_handlers(), this function returns the -+ * (positive) signal value that triggered the exit. -+ * -+ * @param se the session -+ * @return 0, -errno, or a signal value -+ */ -+int fuse_session_loop(struct fuse_session *se); -+ -+/** -+ * Enter a multi-threaded event loop. -+ * -+ * For a description of the return value and the conditions when the -+ * event loop exits, refer to the documentation of -+ * fuse_session_loop(). -+ * -+ * @param se the session -+ * @param config session loop configuration -+ * @return see fuse_session_loop() -+ */ -+#if FUSE_USE_VERSION < 32 -+int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); -+#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) -+#else -+int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); -+#endif -+ -+/** -+ * Flag a session as terminated. -+ * -+ * This function is invoked by the POSIX signal handlers, when -+ * registered using fuse_set_signal_handlers(). It will cause any -+ * running event loops to terminate on the next opportunity. -+ * -+ * @param se the session -+ */ -+void fuse_session_exit(struct fuse_session *se); -+ -+/** -+ * Reset the terminated flag of a session -+ * -+ * @param se the session -+ */ -+void fuse_session_reset(struct fuse_session *se); -+ -+/** -+ * Query the terminated flag of a session -+ * -+ * @param se the session -+ * @return 1 if exited, 0 if not exited -+ */ -+int fuse_session_exited(struct fuse_session *se); -+ -+/** -+ * Ensure that file system is unmounted. -+ * -+ * In regular operation, the file system is typically unmounted by the -+ * user calling umount(8) or fusermount(1), which then terminates the -+ * FUSE session loop. However, the session loop may also terminate as -+ * a result of an explicit call to fuse_session_exit() (e.g. by a -+ * signal handler installed by fuse_set_signal_handler()). In this -+ * case the filesystem remains mounted, but any attempt to access it -+ * will block (while the filesystem process is still running) or give -+ * an ESHUTDOWN error (after the filesystem process has terminated). -+ * -+ * If the communication channel with the FUSE kernel module is still -+ * open (i.e., if the session loop was terminated by an explicit call -+ * to fuse_session_exit()), this function will close it and unmount -+ * the filesystem. If the communication channel has been closed by the -+ * kernel, this method will do (almost) nothing. -+ * -+ * NOTE: The above semantics mean that if the connection to the kernel -+ * is terminated via the ``/sys/fs/fuse/connections/NNN/abort`` file, -+ * this method will *not* unmount the filesystem. -+ * -+ * @param se the session -+ */ -+void fuse_session_unmount(struct fuse_session *se); -+ -+/** -+ * Destroy a session -+ * -+ * @param se the session -+ */ -+void fuse_session_destroy(struct fuse_session *se); -+ -+/* ----------------------------------------------------------- * -+ * Custom event loop support * -+ * ----------------------------------------------------------- */ -+ -+/** -+ * Return file descriptor for communication with kernel. -+ * -+ * The file selector can be used to integrate FUSE with a custom event -+ * loop. Whenever data is available for reading on the provided fd, -+ * the event loop should call `fuse_session_receive_buf` followed by -+ * `fuse_session_process_buf` to process the request. -+ * -+ * The returned file descriptor is valid until `fuse_session_unmount` -+ * is called. -+ * -+ * @param se the session -+ * @return a file descriptor -+ */ -+int fuse_session_fd(struct fuse_session *se); -+ -+/** -+ * Process a raw request supplied in a generic buffer -+ * -+ * The fuse_buf may contain a memory buffer or a pipe file descriptor. -+ * -+ * @param se the session -+ * @param buf the fuse_buf containing the request -+ */ -+void fuse_session_process_buf(struct fuse_session *se, -+ const struct fuse_buf *buf); -+ -+/** -+ * Read a raw request from the kernel into the supplied buffer. -+ * -+ * Depending on file system options, system capabilities, and request -+ * size the request is either read into a memory buffer or spliced -+ * into a temporary pipe. -+ * -+ * @param se the session -+ * @param buf the fuse_buf to store the request in -+ * @return the actual size of the raw request, or -errno on error -+ */ -+int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_LOWLEVEL_H_ */ -diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h -new file mode 100644 -index 0000000..2f6663e ---- /dev/null -+++ b/tools/virtiofsd/fuse_misc.h -@@ -0,0 +1,59 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB -+*/ -+ -+#include -+ -+/* -+ Versioned symbols cannot be used in some cases because it -+ - confuse the dynamic linker in uClibc -+ - not supported on MacOSX (in MachO binary format) -+*/ -+#if (!defined(__UCLIBC__) && !defined(__APPLE__)) -+#define FUSE_SYMVER(x) __asm__(x) -+#else -+#define FUSE_SYMVER(x) -+#endif -+ -+#ifndef USE_UCLIBC -+#define fuse_mutex_init(mut) pthread_mutex_init(mut, NULL) -+#else -+/* Is this hack still needed? */ -+static inline void fuse_mutex_init(pthread_mutex_t *mut) -+{ -+ pthread_mutexattr_t attr; -+ pthread_mutexattr_init(&attr); -+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -+ pthread_mutex_init(mut, &attr); -+ pthread_mutexattr_destroy(&attr); -+} -+#endif -+ -+#ifdef HAVE_STRUCT_STAT_ST_ATIM -+/* Linux */ -+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec) -+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec) -+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec) -+#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atim.tv_nsec = (val) -+#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctim.tv_nsec = (val) -+#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtim.tv_nsec = (val) -+#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC) -+/* FreeBSD */ -+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec) -+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec) -+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec) -+#define ST_ATIM_NSEC_SET(stbuf, val) (stbuf)->st_atimespec.tv_nsec = (val) -+#define ST_CTIM_NSEC_SET(stbuf, val) (stbuf)->st_ctimespec.tv_nsec = (val) -+#define ST_MTIM_NSEC_SET(stbuf, val) (stbuf)->st_mtimespec.tv_nsec = (val) -+#else -+#define ST_ATIM_NSEC(stbuf) 0 -+#define ST_CTIM_NSEC(stbuf) 0 -+#define ST_MTIM_NSEC(stbuf) 0 -+#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0) -+#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0) -+#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0) -+#endif -diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h -new file mode 100644 -index 0000000..d8573e7 ---- /dev/null -+++ b/tools/virtiofsd/fuse_opt.h -@@ -0,0 +1,271 @@ -+/* -+ FUSE: Filesystem in Userspace -+ Copyright (C) 2001-2007 Miklos Szeredi -+ -+ This program can be distributed under the terms of the GNU LGPLv2. -+ See the file COPYING.LIB. -+*/ -+ -+#ifndef FUSE_OPT_H_ -+#define FUSE_OPT_H_ -+ -+/** @file -+ * -+ * This file defines the option parsing interface of FUSE -+ */ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/** -+ * Option description -+ * -+ * This structure describes a single option, and action associated -+ * with it, in case it matches. -+ * -+ * More than one such match may occur, in which case the action for -+ * each match is executed. -+ * -+ * There are three possible actions in case of a match: -+ * -+ * i) An integer (int or unsigned) variable determined by 'offset' is -+ * set to 'value' -+ * -+ * ii) The processing function is called, with 'value' as the key -+ * -+ * iii) An integer (any) or string (char *) variable determined by -+ * 'offset' is set to the value of an option parameter -+ * -+ * 'offset' should normally be either set to -+ * -+ * - 'offsetof(struct foo, member)' actions i) and iii) -+ * -+ * - -1 action ii) -+ * -+ * The 'offsetof()' macro is defined in the header. -+ * -+ * The template determines which options match, and also have an -+ * effect on the action. Normally the action is either i) or ii), but -+ * if a format is present in the template, then action iii) is -+ * performed. -+ * -+ * The types of templates are: -+ * -+ * 1) "-x", "-foo", "--foo", "--foo-bar", etc. These match only -+ * themselves. Invalid values are "--" and anything beginning -+ * with "-o" -+ * -+ * 2) "foo", "foo-bar", etc. These match "-ofoo", "-ofoo-bar" or -+ * the relevant option in a comma separated option list -+ * -+ * 3) "bar=", "--foo=", etc. These are variations of 1) and 2) -+ * which have a parameter -+ * -+ * 4) "bar=%s", "--foo=%lu", etc. Same matching as above but perform -+ * action iii). -+ * -+ * 5) "-x ", etc. Matches either "-xparam" or "-x param" as -+ * two separate arguments -+ * -+ * 6) "-x %s", etc. Combination of 4) and 5) -+ * -+ * If the format is "%s", memory is allocated for the string unlike with -+ * scanf(). The previous value (if non-NULL) stored at the this location is -+ * freed. -+ */ -+struct fuse_opt { -+ /** Matching template and optional parameter formatting */ -+ const char *templ; -+ -+ /** -+ * Offset of variable within 'data' parameter of fuse_opt_parse() -+ * or -1 -+ */ -+ unsigned long offset; -+ -+ /** -+ * Value to set the variable to, or to be passed as 'key' to the -+ * processing function. Ignored if template has a format -+ */ -+ int value; -+}; -+ -+/** -+ * Key option. In case of a match, the processing function will be -+ * called with the specified key. -+ */ -+#define FUSE_OPT_KEY(templ, key) { templ, -1U, key } -+ -+/** -+ * Last option. An array of 'struct fuse_opt' must end with a NULL -+ * template value -+ */ -+#define FUSE_OPT_END { NULL, 0, 0 } -+ -+/** -+ * Argument list -+ */ -+struct fuse_args { -+ /** Argument count */ -+ int argc; -+ -+ /** Argument vector. NULL terminated */ -+ char **argv; -+ -+ /** Is 'argv' allocated? */ -+ int allocated; -+}; -+ -+/** -+ * Initializer for 'struct fuse_args' -+ */ -+#define FUSE_ARGS_INIT(argc, argv) { argc, argv, 0 } -+ -+/** -+ * Key value passed to the processing function if an option did not -+ * match any template -+ */ -+#define FUSE_OPT_KEY_OPT -1 -+ -+/** -+ * Key value passed to the processing function for all non-options -+ * -+ * Non-options are the arguments beginning with a character other than -+ * '-' or all arguments after the special '--' option -+ */ -+#define FUSE_OPT_KEY_NONOPT -2 -+ -+/** -+ * Special key value for options to keep -+ * -+ * Argument is not passed to processing function, but behave as if the -+ * processing function returned 1 -+ */ -+#define FUSE_OPT_KEY_KEEP -3 -+ -+/** -+ * Special key value for options to discard -+ * -+ * Argument is not passed to processing function, but behave as if the -+ * processing function returned zero -+ */ -+#define FUSE_OPT_KEY_DISCARD -4 -+ -+/** -+ * Processing function -+ * -+ * This function is called if -+ * - option did not match any 'struct fuse_opt' -+ * - argument is a non-option -+ * - option did match and offset was set to -1 -+ * -+ * The 'arg' parameter will always contain the whole argument or -+ * option including the parameter if exists. A two-argument option -+ * ("-x foo") is always converted to single argument option of the -+ * form "-xfoo" before this function is called. -+ * -+ * Options of the form '-ofoo' are passed to this function without the -+ * '-o' prefix. -+ * -+ * The return value of this function determines whether this argument -+ * is to be inserted into the output argument vector, or discarded. -+ * -+ * @param data is the user data passed to the fuse_opt_parse() function -+ * @param arg is the whole argument or option -+ * @param key determines why the processing function was called -+ * @param outargs the current output argument list -+ * @return -1 on error, 0 if arg is to be discarded, 1 if arg should be kept -+ */ -+typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key, -+ struct fuse_args *outargs); -+ -+/** -+ * Option parsing function -+ * -+ * If 'args' was returned from a previous call to fuse_opt_parse() or -+ * it was constructed from -+ * -+ * A NULL 'args' is equivalent to an empty argument vector -+ * -+ * A NULL 'opts' is equivalent to an 'opts' array containing a single -+ * end marker -+ * -+ * A NULL 'proc' is equivalent to a processing function always -+ * returning '1' -+ * -+ * @param args is the input and output argument list -+ * @param data is the user data -+ * @param opts is the option description array -+ * @param proc is the processing function -+ * @return -1 on error, 0 on success -+ */ -+int fuse_opt_parse(struct fuse_args *args, void *data, -+ const struct fuse_opt opts[], fuse_opt_proc_t proc); -+ -+/** -+ * Add an option to a comma separated option list -+ * -+ * @param opts is a pointer to an option list, may point to a NULL value -+ * @param opt is the option to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_add_opt(char **opts, const char *opt); -+ -+/** -+ * Add an option, escaping commas, to a comma separated option list -+ * -+ * @param opts is a pointer to an option list, may point to a NULL value -+ * @param opt is the option to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_add_opt_escaped(char **opts, const char *opt); -+ -+/** -+ * Add an argument to a NULL terminated argument vector -+ * -+ * @param args is the structure containing the current argument list -+ * @param arg is the new argument to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_add_arg(struct fuse_args *args, const char *arg); -+ -+/** -+ * Add an argument at the specified position in a NULL terminated -+ * argument vector -+ * -+ * Adds the argument to the N-th position. This is useful for adding -+ * options at the beginning of the array which must not come after the -+ * special '--' option. -+ * -+ * @param args is the structure containing the current argument list -+ * @param pos is the position at which to add the argument -+ * @param arg is the new argument to add -+ * @return -1 on allocation error, 0 on success -+ */ -+int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg); -+ -+/** -+ * Free the contents of argument list -+ * -+ * The structure itself is not freed -+ * -+ * @param args is the structure containing the argument list -+ */ -+void fuse_opt_free_args(struct fuse_args *args); -+ -+ -+/** -+ * Check if an option matches -+ * -+ * @param opts is the option description array -+ * @param opt is the option to match -+ * @return 1 if a match is found, 0 if not -+ */ -+int fuse_opt_match(const struct fuse_opt opts[], const char *opt); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* FUSE_OPT_H_ */ -diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h -new file mode 100644 -index 0000000..6b77c33 ---- /dev/null -+++ b/tools/virtiofsd/passthrough_helpers.h -@@ -0,0 +1,76 @@ -+/* -+ * FUSE: Filesystem in Userspace -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE -+ */ -+ -+/* -+ * Creates files on the underlying file system in response to a FUSE_MKNOD -+ * operation -+ */ -+static int mknod_wrapper(int dirfd, const char *path, const char *link, -+ int mode, dev_t rdev) -+{ -+ int res; -+ -+ if (S_ISREG(mode)) { -+ res = openat(dirfd, path, O_CREAT | O_EXCL | O_WRONLY, mode); -+ if (res >= 0) -+ res = close(res); -+ } else if (S_ISDIR(mode)) { -+ res = mkdirat(dirfd, path, mode); -+ } else if (S_ISLNK(mode) && link != NULL) { -+ res = symlinkat(link, dirfd, path); -+ } else if (S_ISFIFO(mode)) { -+ res = mkfifoat(dirfd, path, mode); -+#ifdef __FreeBSD__ -+ } else if (S_ISSOCK(mode)) { -+ struct sockaddr_un su; -+ int fd; -+ -+ if (strlen(path) >= sizeof(su.sun_path)) { -+ errno = ENAMETOOLONG; -+ return -1; -+ } -+ fd = socket(AF_UNIX, SOCK_STREAM, 0); -+ if (fd >= 0) { -+ /* -+ * We must bind the socket to the underlying file -+ * system to create the socket file, even though -+ * we'll never listen on this socket. -+ */ -+ su.sun_family = AF_UNIX; -+ strncpy(su.sun_path, path, sizeof(su.sun_path)); -+ res = bindat(dirfd, fd, (struct sockaddr*)&su, -+ sizeof(su)); -+ if (res == 0) -+ close(fd); -+ } else { -+ res = -1; -+ } -+#endif -+ } else { -+ res = mknodat(dirfd, path, mode, rdev); -+ } -+ -+ return res; -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Remove-fuse_req_getgroups.patch b/kvm-virtiofsd-Remove-fuse_req_getgroups.patch deleted file mode 100644 index 27e71f2..0000000 --- a/kvm-virtiofsd-Remove-fuse_req_getgroups.patch +++ /dev/null @@ -1,193 +0,0 @@ -From 7a1860c83ff042f3e796c449e780ee0528107213 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:08 +0000 -Subject: [PATCH 12/18] virtiofsd: Remove fuse_req_getgroups -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-2-dgilbert@redhat.com> -Patchwork-id: 94122 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/7] virtiofsd: Remove fuse_req_getgroups -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -Remove fuse_req_getgroups that's unused in virtiofsd; it came in -from libfuse but we don't actually use it. It was called from -fuse_getgroups which we previously removed (but had left it's header -in). - -Coverity had complained about null termination in it, but removing -it is the easiest answer. - -Fixes: Coverity CID: 1413117 (String not null terminated) -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 988717b46b6424907618cb845ace9d69062703af) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse.h | 20 ----------- - tools/virtiofsd/fuse_lowlevel.c | 77 ----------------------------------------- - tools/virtiofsd/fuse_lowlevel.h | 21 ----------- - 3 files changed, 118 deletions(-) - -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -index 7a4c713..aba13fe 100644 ---- a/tools/virtiofsd/fuse.h -+++ b/tools/virtiofsd/fuse.h -@@ -1007,26 +1007,6 @@ void fuse_exit(struct fuse *f); - struct fuse_context *fuse_get_context(void); - - /** -- * Get the current supplementary group IDs for the current request -- * -- * Similar to the getgroups(2) system call, except the return value is -- * always the total number of group IDs, even if it is larger than the -- * specified size. -- * -- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -- * the group list to userspace, hence this function needs to parse -- * "/proc/$TID/task/$TID/status" to get the group IDs. -- * -- * This feature may not be supported on all operating systems. In -- * such a case this function will return -ENOSYS. -- * -- * @param size size of given array -- * @param list array of group IDs to be filled in -- * @return the total number of supplementary group IDs or -errno on failure -- */ --int fuse_getgroups(int size, gid_t list[]); -- --/** - * Check if the current request has already been interrupted - * - * @return 1 if the request has been interrupted, 0 otherwise -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index de2e2e0..01c418a 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2667,83 +2667,6 @@ int fuse_lowlevel_is_virtio(struct fuse_session *se) - return !!se->virtio_dev; - } - --#ifdef linux --int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) --{ -- char *buf; -- size_t bufsize = 1024; -- char path[128]; -- int ret; -- int fd; -- unsigned long pid = req->ctx.pid; -- char *s; -- -- sprintf(path, "/proc/%lu/task/%lu/status", pid, pid); -- --retry: -- buf = malloc(bufsize); -- if (buf == NULL) { -- return -ENOMEM; -- } -- -- ret = -EIO; -- fd = open(path, O_RDONLY); -- if (fd == -1) { -- goto out_free; -- } -- -- ret = read(fd, buf, bufsize); -- close(fd); -- if (ret < 0) { -- ret = -EIO; -- goto out_free; -- } -- -- if ((size_t)ret == bufsize) { -- free(buf); -- bufsize *= 4; -- goto retry; -- } -- -- ret = -EIO; -- s = strstr(buf, "\nGroups:"); -- if (s == NULL) { -- goto out_free; -- } -- -- s += 8; -- ret = 0; -- while (1) { -- char *end; -- unsigned long val = strtoul(s, &end, 0); -- if (end == s) { -- break; -- } -- -- s = end; -- if (ret < size) { -- list[ret] = val; -- } -- ret++; -- } -- --out_free: -- free(buf); -- return ret; --} --#else /* linux */ --/* -- * This is currently not implemented on other than Linux... -- */ --int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) --{ -- (void)req; -- (void)size; -- (void)list; -- return -ENOSYS; --} --#endif -- - void fuse_session_exit(struct fuse_session *se) - { - se->exited = 1; -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 138041e..8f6d705 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1705,27 +1705,6 @@ void *fuse_req_userdata(fuse_req_t req); - const struct fuse_ctx *fuse_req_ctx(fuse_req_t req); - - /** -- * Get the current supplementary group IDs for the specified request -- * -- * Similar to the getgroups(2) system call, except the return value is -- * always the total number of group IDs, even if it is larger than the -- * specified size. -- * -- * The current fuse kernel module in linux (as of 2.6.30) doesn't pass -- * the group list to userspace, hence this function needs to parse -- * "/proc/$TID/task/$TID/status" to get the group IDs. -- * -- * This feature may not be supported on all operating systems. In -- * such a case this function will return -ENOSYS. -- * -- * @param req request handle -- * @param size size of given array -- * @param list array of group IDs to be filled in -- * @return the total number of supplementary group IDs or -errno on failure -- */ --int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]); -- --/** - * Callback function for an interrupt - * - * @param req interrupted request --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch b/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch deleted file mode 100644 index 7f9c5bb..0000000 --- a/kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch +++ /dev/null @@ -1,271 +0,0 @@ -From 80237df2b22eca685037456e65d149fed4654165 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:48 +0100 -Subject: [PATCH 017/116] virtiofsd: Remove unused enum fuse_buf_copy_flags -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-14-dgilbert@redhat.com> -Patchwork-id: 93465 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 013/112] virtiofsd: Remove unused enum fuse_buf_copy_flags -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Xiao Yang - -Signed-off-by: Xiao Yang -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8c3fe75e0308ba2f01d160ace534b7e386cea808) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 7 +++--- - tools/virtiofsd/fuse_common.h | 46 +--------------------------------------- - tools/virtiofsd/fuse_lowlevel.c | 13 +++++------- - tools/virtiofsd/fuse_lowlevel.h | 35 ++---------------------------- - tools/virtiofsd/passthrough_ll.c | 4 ++-- - 5 files changed, 13 insertions(+), 92 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 5df946c..4d507f3 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -171,7 +171,7 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, - - static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, - const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) -+ size_t len) - { - int src_is_fd = src->flags & FUSE_BUF_IS_FD; - int dst_is_fd = dst->flags & FUSE_BUF_IS_FD; -@@ -224,8 +224,7 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) - return 1; - } - --ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, -- enum fuse_buf_copy_flags flags) -+ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) - { - size_t copied = 0; - -@@ -249,7 +248,7 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv, - dst_len = dst->size - dstv->off; - len = min_size(src_len, dst_len); - -- res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len, flags); -+ res = fuse_buf_copy_one(dst, dstv->off, src, srcv->off, len); - if (res < 0) { - if (!copied) { - return res; -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index bd9bf86..0cb33ac 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -605,48 +605,6 @@ enum fuse_buf_flags { - }; - - /** -- * Buffer copy flags -- */ --enum fuse_buf_copy_flags { -- /** -- * Don't use splice(2) -- * -- * Always fall back to using read and write instead of -- * splice(2) to copy data from one file descriptor to another. -- * -- * If this flag is not set, then only fall back if splice is -- * unavailable. -- */ -- FUSE_BUF_NO_SPLICE = (1 << 1), -- -- /** -- * Force splice -- * -- * Always use splice(2) to copy data from one file descriptor -- * to another. If splice is not available, return -EINVAL. -- */ -- FUSE_BUF_FORCE_SPLICE = (1 << 2), -- -- /** -- * Try to move data with splice. -- * -- * If splice is used, try to move pages from the source to the -- * destination instead of copying. See documentation of -- * SPLICE_F_MOVE in splice(2) man page. -- */ -- FUSE_BUF_SPLICE_MOVE = (1 << 3), -- -- /** -- * Don't block on the pipe when copying data with splice -- * -- * Makes the operations on the pipe non-blocking (if the pipe -- * is full or empty). See SPLICE_F_NONBLOCK in the splice(2) -- * man page. -- */ -- FUSE_BUF_SPLICE_NONBLOCK = (1 << 4), --}; -- --/** - * Single data buffer - * - * Generic data buffer for I/O, extended attributes, etc... Data may -@@ -741,11 +699,9 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); - * - * @param dst destination buffer vector - * @param src source buffer vector -- * @param flags flags controlling the copy - * @return actual number of bytes copied or -errno on error - */ --ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src, -- enum fuse_buf_copy_flags flags); -+ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); - - /* - * Signal handling -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index eb0ec49..3da80de 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -490,16 +490,14 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - - static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, unsigned int flags) -+ struct fuse_bufvec *buf) - { - size_t len = fuse_buf_size(buf); -- (void)flags; - - return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); - } - --int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) - { - struct iovec iov[2]; - struct fuse_out_header out; -@@ -511,7 +509,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, - out.unique = req->unique; - out.error = 0; - -- res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv, flags); -+ res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); - if (res <= 0) { - fuse_free_req(req); - return res; -@@ -1969,8 +1967,7 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - } - - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags) -+ off_t offset, struct fuse_bufvec *bufv) - { - struct fuse_out_header out; - struct fuse_notify_store_out outarg; -@@ -1999,7 +1996,7 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - -- res = fuse_send_data_iov(se, NULL, iov, 2, bufv, flags); -+ res = fuse_send_data_iov(se, NULL, iov, 2, bufv); - if (res > 0) { - res = -res; - } -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 12a84b4..2fa225d 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1363,33 +1363,6 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); - /** - * Reply with data copied/moved from buffer(s) - * -- * Zero copy data transfer ("splicing") will be used under -- * the following circumstances: -- * -- * 1. FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.want, and -- * 2. the kernel supports splicing from the fuse device -- * (FUSE_CAP_SPLICE_WRITE is set in fuse_conn_info.capable), and -- * 3. *flags* does not contain FUSE_BUF_NO_SPLICE -- * 4. The amount of data that is provided in file-descriptor backed -- * buffers (i.e., buffers for which bufv[n].flags == FUSE_BUF_FD) -- * is at least twice the page size. -- * -- * In order for SPLICE_F_MOVE to be used, the following additional -- * conditions have to be fulfilled: -- * -- * 1. FUSE_CAP_SPLICE_MOVE is set in fuse_conn_info.want, and -- * 2. the kernel supports it (i.e, FUSE_CAP_SPLICE_MOVE is set in -- fuse_conn_info.capable), and -- * 3. *flags* contains FUSE_BUF_SPLICE_MOVE -- * -- * Note that, if splice is used, the data is actually spliced twice: -- * once into a temporary pipe (to prepend header data), and then again -- * into the kernel. If some of the provided buffers are memory-backed, -- * the data in them is copied in step one and spliced in step two. -- * -- * The FUSE_BUF_SPLICE_FORCE_SPLICE and FUSE_BUF_SPLICE_NONBLOCK flags -- * are silently ignored. -- * - * Possible requests: - * read, readdir, getxattr, listxattr - * -@@ -1400,11 +1373,9 @@ int fuse_reply_buf(fuse_req_t req, const char *buf, size_t size); - * - * @param req request handle - * @param bufv buffer vector -- * @param flags flags controlling the copy - * @return zero for success, -errno for failure to send reply - */ --int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv); - - /** - * Reply with data vector -@@ -1705,12 +1676,10 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - * @param ino the inode number - * @param offset the starting offset into the file to store to - * @param bufv buffer vector -- * @param flags flags controlling the copy - * @return zero for success, -errno for failure - */ - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv, -- enum fuse_buf_copy_flags flags); -+ off_t offset, struct fuse_bufvec *bufv); - - /* - * Utility functions -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9377718..126a56c 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -931,7 +931,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, - buf.buf[0].fd = fi->fh; - buf.buf[0].pos = offset; - -- fuse_reply_data(req, &buf, FUSE_BUF_SPLICE_MOVE); -+ fuse_reply_data(req, &buf); - } - - static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, -@@ -952,7 +952,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - out_buf.buf[0].size, (unsigned long)off); - } - -- res = fuse_buf_copy(&out_buf, in_buf, 0); -+ res = fuse_buf_copy(&out_buf, in_buf); - if (res < 0) { - fuse_reply_err(req, -res); - } else { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch b/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch deleted file mode 100644 index e1a3cd1..0000000 --- a/kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch +++ /dev/null @@ -1,72 +0,0 @@ -From b8d62021f28114f054571b96ec0cd4dad4476923 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:14 +0100 -Subject: [PATCH 103/116] virtiofsd: Reset O_DIRECT flag during file open -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-100-dgilbert@redhat.com> -Patchwork-id: 93553 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 099/112] virtiofsd: Reset O_DIRECT flag during file open -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -If an application wants to do direct IO and opens a file with O_DIRECT -in guest, that does not necessarily mean that we need to bypass page -cache on host as well. So reset this flag on host. - -If somebody needs to bypass page cache on host as well (and it is safe to -do so), we can add a knob in daemon later to control this behavior. - -I check virtio-9p and they do reset O_DIRECT flag. - -Signed-off-by: Vivek Goyal -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 65da4539803373ec4eec97ffc49ee90083e56efd) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ccbbec1..948cb19 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1721,6 +1721,13 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out; - } - -+ /* -+ * O_DIRECT in guest should not necessarily mean bypassing page -+ * cache on host as well. If somebody needs that behavior, it -+ * probably should be a configuration knob in daemon. -+ */ -+ fi->flags &= ~O_DIRECT; -+ - fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); - err = fd == -1 ? errno : 0; -@@ -1950,6 +1957,13 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - fi->flags &= ~O_APPEND; - } - -+ /* -+ * O_DIRECT in guest should not necessarily mean bypassing page -+ * cache on host as well. If somebody needs that behavior, it -+ * probably should be a configuration knob in daemon. -+ */ -+ fi->flags &= ~O_DIRECT; -+ - sprintf(buf, "%i", lo_fd(req, ino)); - fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); - if (fd == -1) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Send-replies-to-messages.patch b/kvm-virtiofsd-Send-replies-to-messages.patch deleted file mode 100644 index 5453fda..0000000 --- a/kvm-virtiofsd-Send-replies-to-messages.patch +++ /dev/null @@ -1,199 +0,0 @@ -From bb1f691dc410ce11ac9675ced70e78a3ce2511b0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:03 +0100 -Subject: [PATCH 032/116] virtiofsd: Send replies to messages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-29-dgilbert@redhat.com> -Patchwork-id: 93485 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 028/112] virtiofsd: Send replies to messages -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Route fuse out messages back through the same queue elements -that had the command that triggered the request. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit df57ba919ec3edef9cc208d35685095e6e92713e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 4 ++ - tools/virtiofsd/fuse_virtio.c | 107 ++++++++++++++++++++++++++++++++++++++-- - tools/virtiofsd/fuse_virtio.h | 4 ++ - 3 files changed, 111 insertions(+), 4 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index af09fa2..380d93b 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -171,6 +171,10 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - } - } - -+ if (fuse_lowlevel_is_virtio(se)) { -+ return virtio_send_msg(se, ch, iov, count); -+ } -+ - abort(); /* virtio should have taken it before here */ - return 0; - } -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 3841b20..05d0e29 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -41,6 +41,9 @@ struct fv_QueueInfo { - /* Our queue index, corresponds to array position */ - int qidx; - int kick_fd; -+ -+ /* The element for the command currently being processed */ -+ VuVirtqElement *qe; - }; - - /* -@@ -121,6 +124,105 @@ static void copy_from_iov(struct fuse_buf *buf, size_t out_num, - } - } - -+/* -+ * Copy from one iov to another, the given number of bytes -+ * The caller must have checked sizes. -+ */ -+static void copy_iov(struct iovec *src_iov, int src_count, -+ struct iovec *dst_iov, int dst_count, size_t to_copy) -+{ -+ size_t dst_offset = 0; -+ /* Outer loop copies 'src' elements */ -+ while (to_copy) { -+ assert(src_count); -+ size_t src_len = src_iov[0].iov_len; -+ size_t src_offset = 0; -+ -+ if (src_len > to_copy) { -+ src_len = to_copy; -+ } -+ /* Inner loop copies contents of one 'src' to maybe multiple dst. */ -+ while (src_len) { -+ assert(dst_count); -+ size_t dst_len = dst_iov[0].iov_len - dst_offset; -+ if (dst_len > src_len) { -+ dst_len = src_len; -+ } -+ -+ memcpy(dst_iov[0].iov_base + dst_offset, -+ src_iov[0].iov_base + src_offset, dst_len); -+ src_len -= dst_len; -+ to_copy -= dst_len; -+ src_offset += dst_len; -+ dst_offset += dst_len; -+ -+ assert(dst_offset <= dst_iov[0].iov_len); -+ if (dst_offset == dst_iov[0].iov_len) { -+ dst_offset = 0; -+ dst_iov++; -+ dst_count--; -+ } -+ } -+ src_iov++; -+ src_count--; -+ } -+} -+ -+/* -+ * Called back by ll whenever it wants to send a reply/message back -+ * The 1st element of the iov starts with the fuse_out_header -+ * 'unique'==0 means it's a notify message. -+ */ -+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count) -+{ -+ VuVirtqElement *elem; -+ VuVirtq *q; -+ -+ assert(count >= 1); -+ assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -+ -+ struct fuse_out_header *out = iov[0].iov_base; -+ /* TODO: Endianness! */ -+ -+ size_t tosend_len = iov_size(iov, count); -+ -+ /* unique == 0 is notification, which we don't support */ -+ assert(out->unique); -+ /* For virtio we always have ch */ -+ assert(ch); -+ elem = ch->qi->qe; -+ q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ -+ /* The 'in' part of the elem is to qemu */ -+ unsigned int in_num = elem->in_num; -+ struct iovec *in_sg = elem->in_sg; -+ size_t in_len = iov_size(in_sg, in_num); -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d: with %d in desc of length %zd\n", -+ __func__, elem->index, in_num, in_len); -+ -+ /* -+ * The elem should have room for a 'fuse_out_header' (out from fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (in_len < sizeof(struct fuse_out_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for out_header\n", -+ __func__, elem->index); -+ return -E2BIG; -+ } -+ if (in_len < tosend_len) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too small for data len %zd\n", -+ __func__, elem->index, tosend_len); -+ return -E2BIG; -+ } -+ -+ copy_iov(iov, count, in_sg, in_num, tosend_len); -+ vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -+ vu_queue_notify(&se->virtio_dev->dev, q); -+ -+ return 0; -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { -@@ -226,13 +328,10 @@ static void *fv_queue_thread(void *opaque) - - /* TODO! Endianness of header */ - -- /* TODO: Fixup fuse_send_msg */ - /* TODO: Add checks for fuse_session_exited */ - fuse_session_process_buf_int(se, &fbuf, &ch); - -- /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ -- vu_queue_notify(dev, q); -- -+ qi->qe = NULL; - free(elem); - elem = NULL; - } -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index 23026d6..135a148 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -22,4 +22,8 @@ int virtio_session_mount(struct fuse_session *se); - - int virtio_loop(struct fuse_session *se); - -+ -+int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, -+ struct iovec *iov, int count); -+ - #endif --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Start-queue-threads.patch b/kvm-virtiofsd-Start-queue-threads.patch deleted file mode 100644 index 8b03cd6..0000000 --- a/kvm-virtiofsd-Start-queue-threads.patch +++ /dev/null @@ -1,165 +0,0 @@ -From 38282d996cde61261211160577b366b83cad8012 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:00 +0100 -Subject: [PATCH 029/116] virtiofsd: Start queue threads -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-26-dgilbert@redhat.com> -Patchwork-id: 93479 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 025/112] virtiofsd: Start queue threads -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Start a thread for each queue when we get notified it's been started. - -Signed-off-by: Dr. David Alan Gilbert -fix by: -Signed-off-by: Jun Piao -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e4c55a3c144493b436e40031e2eed61a84eca47b) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 89 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 89 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 4819e56..2a94bb3 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -11,6 +11,7 @@ - * See the file COPYING.LIB - */ - -+#include "qemu/osdep.h" - #include "fuse_virtio.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" -@@ -30,6 +31,15 @@ - - #include "contrib/libvhost-user/libvhost-user.h" - -+struct fv_QueueInfo { -+ pthread_t thread; -+ struct fv_VuDev *virtio_dev; -+ -+ /* Our queue index, corresponds to array position */ -+ int qidx; -+ int kick_fd; -+}; -+ - /* - * We pass the dev element into libvhost-user - * and then use it to get back to the outer -@@ -38,6 +48,13 @@ - struct fv_VuDev { - VuDev dev; - struct fuse_session *se; -+ -+ /* -+ * The following pair of fields are only accessed in the main -+ * virtio_loop -+ */ -+ size_t nqueues; -+ struct fv_QueueInfo **qi; - }; - - /* From spec */ -@@ -83,6 +100,75 @@ static void fv_panic(VuDev *dev, const char *err) - exit(EXIT_FAILURE); - } - -+static void *fv_queue_thread(void *opaque) -+{ -+ struct fv_QueueInfo *qi = opaque; -+ fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, -+ qi->qidx, qi->kick_fd); -+ while (1) { -+ /* TODO */ -+ } -+ -+ return NULL; -+} -+ -+/* Callback from libvhost-user on start or stop of a queue */ -+static void fv_queue_set_started(VuDev *dev, int qidx, bool started) -+{ -+ struct fv_VuDev *vud = container_of(dev, struct fv_VuDev, dev); -+ struct fv_QueueInfo *ourqi; -+ -+ fuse_log(FUSE_LOG_INFO, "%s: qidx=%d started=%d\n", __func__, qidx, -+ started); -+ assert(qidx >= 0); -+ -+ /* -+ * Ignore additional request queues for now. passthrough_ll.c must be -+ * audited for thread-safety issues first. It was written with a -+ * well-behaved client in mind and may not protect against all types of -+ * races yet. -+ */ -+ if (qidx > 1) { -+ fuse_log(FUSE_LOG_ERR, -+ "%s: multiple request queues not yet implemented, please only " -+ "configure 1 request queue\n", -+ __func__); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (started) { -+ /* Fire up a thread to watch this queue */ -+ if (qidx >= vud->nqueues) { -+ vud->qi = realloc(vud->qi, (qidx + 1) * sizeof(vud->qi[0])); -+ assert(vud->qi); -+ memset(vud->qi + vud->nqueues, 0, -+ sizeof(vud->qi[0]) * (1 + (qidx - vud->nqueues))); -+ vud->nqueues = qidx + 1; -+ } -+ if (!vud->qi[qidx]) { -+ vud->qi[qidx] = calloc(sizeof(struct fv_QueueInfo), 1); -+ assert(vud->qi[qidx]); -+ vud->qi[qidx]->virtio_dev = vud; -+ vud->qi[qidx]->qidx = qidx; -+ } else { -+ /* Shouldn't have been started */ -+ assert(vud->qi[qidx]->kick_fd == -1); -+ } -+ ourqi = vud->qi[qidx]; -+ ourqi->kick_fd = dev->vq[qidx].kick_fd; -+ if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { -+ fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", -+ __func__, qidx); -+ assert(0); -+ } -+ } else { -+ /* TODO: Kill the thread */ -+ assert(qidx < vud->nqueues); -+ ourqi = vud->qi[qidx]; -+ ourqi->kick_fd = -1; -+ } -+} -+ - static bool fv_queue_order(VuDev *dev, int qidx) - { - return false; -@@ -92,6 +178,9 @@ static const VuDevIface fv_iface = { - .get_features = fv_get_features, - .set_features = fv_set_features, - -+ /* Don't need process message, we've not got any at vhost-user level */ -+ .queue_set_started = fv_queue_set_started, -+ - .queue_is_processed_in_order = fv_queue_order, - }; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Start-reading-commands-from-queue.patch b/kvm-virtiofsd-Start-reading-commands-from-queue.patch deleted file mode 100644 index 2022480..0000000 --- a/kvm-virtiofsd-Start-reading-commands-from-queue.patch +++ /dev/null @@ -1,200 +0,0 @@ -From b4af2eff8ecadb4e2c9520602455f77fac2cb943 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:02 +0100 -Subject: [PATCH 031/116] virtiofsd: Start reading commands from queue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-28-dgilbert@redhat.com> -Patchwork-id: 93484 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 027/112] virtiofsd: Start reading commands from queue -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Pop queue elements off queues, copy the data from them and -pass that to fuse. - - Note: 'out' in a VuVirtqElement is from QEMU - 'in' in libfuse is into the daemon - - So we read from the out iov's to get a fuse_in_header - -When we get a kick we've got to read all the elements until the queue -is empty. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b509e1228b3e5eb83c14819045988999fc2dbd1b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 2 + - tools/virtiofsd/fuse_virtio.c | 99 +++++++++++++++++++++++++++++++++++++++++-- - 2 files changed, 98 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index ec04449..1126723 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -14,6 +14,7 @@ - #include "fuse_lowlevel.h" - - struct fv_VuDev; -+struct fv_QueueInfo; - - struct fuse_req { - struct fuse_session *se; -@@ -75,6 +76,7 @@ struct fuse_chan { - pthread_mutex_t lock; - int ctr; - int fd; -+ struct fv_QueueInfo *qi; - }; - - /** -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 05e7258..3841b20 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -12,6 +12,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/iov.h" - #include "fuse_virtio.h" - #include "fuse_i.h" - #include "standard-headers/linux/fuse.h" -@@ -32,6 +33,7 @@ - - #include "contrib/libvhost-user/libvhost-user.h" - -+struct fv_VuDev; - struct fv_QueueInfo { - pthread_t thread; - struct fv_VuDev *virtio_dev; -@@ -101,10 +103,41 @@ static void fv_panic(VuDev *dev, const char *err) - exit(EXIT_FAILURE); - } - -+/* -+ * Copy from an iovec into a fuse_buf (memory only) -+ * Caller must ensure there is space -+ */ -+static void copy_from_iov(struct fuse_buf *buf, size_t out_num, -+ const struct iovec *out_sg) -+{ -+ void *dest = buf->mem; -+ -+ while (out_num) { -+ size_t onelen = out_sg->iov_len; -+ memcpy(dest, out_sg->iov_base, onelen); -+ dest += onelen; -+ out_sg++; -+ out_num--; -+ } -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { - struct fv_QueueInfo *qi = opaque; -+ struct VuDev *dev = &qi->virtio_dev->dev; -+ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ struct fuse_session *se = qi->virtio_dev->se; -+ struct fuse_chan ch; -+ struct fuse_buf fbuf; -+ -+ fbuf.mem = NULL; -+ fbuf.flags = 0; -+ -+ fuse_mutex_init(&ch.lock); -+ ch.fd = (int)0xdaff0d111; -+ ch.qi = qi; -+ - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); - while (1) { -@@ -141,11 +174,71 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); - break; - } -- if (qi->virtio_dev->se->debug) { -- fprintf(stderr, "%s: Queue %d gave evalue: %zx\n", __func__, -- qi->qidx, (size_t)evalue); -+ /* out is from guest, in is too guest */ -+ unsigned int in_bytes, out_bytes; -+ vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", -+ __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); -+ -+ while (1) { -+ /* -+ * An element contains one request and the space to send our -+ * response They're spread over multiple descriptors in a -+ * scatter/gather set and we can't trust the guest to keep them -+ * still; so copy in/out. -+ */ -+ VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); -+ if (!elem) { -+ break; -+ } -+ -+ if (!fbuf.mem) { -+ fbuf.mem = malloc(se->bufsize); -+ assert(fbuf.mem); -+ assert(se->bufsize > sizeof(struct fuse_in_header)); -+ } -+ /* The 'out' part of the elem is from qemu */ -+ unsigned int out_num = elem->out_num; -+ struct iovec *out_sg = elem->out_sg; -+ size_t out_len = iov_size(out_sg, out_num); -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: elem %d: with %d out desc of length %zd\n", __func__, -+ elem->index, out_num, out_len); -+ -+ /* -+ * The elem should contain a 'fuse_in_header' (in to fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (out_len < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", -+ __func__, elem->index); -+ assert(0); /* TODO */ -+ } -+ if (out_len > se->bufsize) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", -+ __func__, elem->index); -+ assert(0); /* TODO */ -+ } -+ copy_from_iov(&fbuf, out_num, out_sg); -+ fbuf.size = out_len; -+ -+ /* TODO! Endianness of header */ -+ -+ /* TODO: Fixup fuse_send_msg */ -+ /* TODO: Add checks for fuse_session_exited */ -+ fuse_session_process_buf_int(se, &fbuf, &ch); -+ -+ /* TODO: vu_queue_push(dev, q, elem, qi->write_count); */ -+ vu_queue_notify(dev, q); -+ -+ free(elem); -+ elem = NULL; - } - } -+ pthread_mutex_destroy(&ch.lock); -+ free(fbuf.mem); - - return NULL; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Start-wiring-up-vhost-user.patch b/kvm-virtiofsd-Start-wiring-up-vhost-user.patch deleted file mode 100644 index 7b50118..0000000 --- a/kvm-virtiofsd-Start-wiring-up-vhost-user.patch +++ /dev/null @@ -1,247 +0,0 @@ -From 020f593031b0b54e4c35faffea489b700aed6a72 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:57 +0100 -Subject: [PATCH 026/116] virtiofsd: Start wiring up vhost-user -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-23-dgilbert@redhat.com> -Patchwork-id: 93477 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 022/112] virtiofsd: Start wiring up vhost-user -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Listen on our unix socket for the connection from QEMU, when we get it -initialise vhost-user and dive into our own loop variant (currently -dummy). - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f6f3573c6f271af5ded63ce28589a113f7205c72) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 4 ++ - tools/virtiofsd/fuse_lowlevel.c | 5 +++ - tools/virtiofsd/fuse_lowlevel.h | 7 ++++ - tools/virtiofsd/fuse_virtio.c | 87 +++++++++++++++++++++++++++++++++++++++- - tools/virtiofsd/fuse_virtio.h | 2 + - tools/virtiofsd/passthrough_ll.c | 7 +--- - 6 files changed, 106 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 82d6ac7..ec04449 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -13,6 +13,8 @@ - #include "fuse.h" - #include "fuse_lowlevel.h" - -+struct fv_VuDev; -+ - struct fuse_req { - struct fuse_session *se; - uint64_t unique; -@@ -65,6 +67,8 @@ struct fuse_session { - size_t bufsize; - int error; - char *vu_socket_path; -+ int vu_socketfd; -+ struct fv_VuDev *virtio_dev; - }; - - struct fuse_chan { -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 5df124e..af09fa2 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2242,6 +2242,11 @@ void fuse_session_unmount(struct fuse_session *se) - { - } - -+int fuse_lowlevel_is_virtio(struct fuse_session *se) -+{ -+ return se->vu_socket_path != NULL; -+} -+ - #ifdef linux - int fuse_req_getgroups(fuse_req_t req, int size, gid_t list[]) - { -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 2fa225d..f6b3470 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1755,6 +1755,13 @@ void fuse_req_interrupt_func(fuse_req_t req, fuse_interrupt_func_t func, - */ - int fuse_req_interrupted(fuse_req_t req); - -+/** -+ * Check if the session is connected via virtio -+ * -+ * @param se session object -+ * @return 1 if the session is a virtio session -+ */ -+int fuse_lowlevel_is_virtio(struct fuse_session *se); - - /* - * Inquiry functions -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index cbef6ff..2ae3c76 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -19,18 +19,78 @@ - - #include - #include -+#include - #include - #include - #include - #include - #include - -+#include "contrib/libvhost-user/libvhost-user.h" -+ -+/* -+ * We pass the dev element into libvhost-user -+ * and then use it to get back to the outer -+ * container for other data. -+ */ -+struct fv_VuDev { -+ VuDev dev; -+ struct fuse_session *se; -+}; -+ - /* From spec */ - struct virtio_fs_config { - char tag[36]; - uint32_t num_queues; - }; - -+/* -+ * Callback from libvhost-user if there's a new fd we're supposed to listen -+ * to, typically a queue kick? -+ */ -+static void fv_set_watch(VuDev *dev, int fd, int condition, vu_watch_cb cb, -+ void *data) -+{ -+ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); -+} -+ -+/* -+ * Callback from libvhost-user if we're no longer supposed to listen on an fd -+ */ -+static void fv_remove_watch(VuDev *dev, int fd) -+{ -+ fuse_log(FUSE_LOG_WARNING, "%s: TODO! fd=%d\n", __func__, fd); -+} -+ -+/* Callback from libvhost-user to panic */ -+static void fv_panic(VuDev *dev, const char *err) -+{ -+ fuse_log(FUSE_LOG_ERR, "%s: libvhost-user: %s\n", __func__, err); -+ /* TODO: Allow reconnects?? */ -+ exit(EXIT_FAILURE); -+} -+ -+static bool fv_queue_order(VuDev *dev, int qidx) -+{ -+ return false; -+} -+ -+static const VuDevIface fv_iface = { -+ /* TODO: Add other callbacks */ -+ .queue_is_processed_in_order = fv_queue_order, -+}; -+ -+int virtio_loop(struct fuse_session *se) -+{ -+ fuse_log(FUSE_LOG_INFO, "%s: Entry\n", __func__); -+ -+ while (1) { -+ /* TODO: Add stuffing */ -+ } -+ -+ fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); -+} -+ - int virtio_session_mount(struct fuse_session *se) - { - struct sockaddr_un un; -@@ -75,5 +135,30 @@ int virtio_session_mount(struct fuse_session *se) - return -1; - } - -- return -1; -+ fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", -+ __func__); -+ int data_sock = accept(listen_sock, NULL, NULL); -+ if (data_sock == -1) { -+ fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); -+ close(listen_sock); -+ return -1; -+ } -+ close(listen_sock); -+ fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", -+ __func__); -+ -+ /* TODO: Some cleanup/deallocation! */ -+ se->virtio_dev = calloc(sizeof(struct fv_VuDev), 1); -+ if (!se->virtio_dev) { -+ fuse_log(FUSE_LOG_ERR, "%s: virtio_dev calloc failed\n", __func__); -+ close(data_sock); -+ return -1; -+ } -+ -+ se->vu_socketfd = data_sock; -+ se->virtio_dev->se = se; -+ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, -+ fv_remove_watch, &fv_iface); -+ -+ return 0; - } -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index 8f2edb6..23026d6 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -20,4 +20,6 @@ struct fuse_session; - - int virtio_session_mount(struct fuse_session *se); - -+int virtio_loop(struct fuse_session *se); -+ - #endif -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index fc9b264..037c5d7 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -36,6 +36,7 @@ - */ - - #include "qemu/osdep.h" -+#include "fuse_virtio.h" - #include "fuse_lowlevel.h" - #include - #include -@@ -1395,11 +1396,7 @@ int main(int argc, char *argv[]) - fuse_daemonize(opts.foreground); - - /* Block until ctrl+c or fusermount -u */ -- if (opts.singlethread) { -- ret = fuse_session_loop(se); -- } else { -- ret = fuse_session_loop_mt(se, opts.clone_fd); -- } -+ ret = virtio_loop(se); - - fuse_session_unmount(se); - err_out3: --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Support-remote-posix-locks.patch b/kvm-virtiofsd-Support-remote-posix-locks.patch deleted file mode 100644 index e60364a..0000000 --- a/kvm-virtiofsd-Support-remote-posix-locks.patch +++ /dev/null @@ -1,355 +0,0 @@ -From 8e46d0862c4c204f92c08ce2ae961921f270efb5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:03 +0100 -Subject: [PATCH 092/116] virtiofsd: Support remote posix locks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-89-dgilbert@redhat.com> -Patchwork-id: 93537 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 088/112] virtiofsd: Support remote posix locks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -Doing posix locks with-in guest kernel are not sufficient if a file/dir -is being shared by multiple guests. So we need the notion of daemon doing -the locks which are visible to rest of the guests. - -Given posix locks are per process, one can not call posix lock API on host, -otherwise bunch of basic posix locks properties are broken. For example, -If two processes (A and B) in guest open the file and take locks on different -sections of file, if one of the processes closes the fd, it will close -fd on virtiofsd and all posix locks on file will go away. This means if -process A closes the fd, then locks of process B will go away too. - -Similar other problems exist too. - -This patch set tries to emulate posix locks while using open file -description locks provided on Linux. - -Daemon provides two options (-o posix_lock, -o no_posix_lock) to enable -or disable posix locking in daemon. By default it is enabled. - -There are few issues though. - -- GETLK() returns pid of process holding lock. As we are emulating locks - using OFD, and these locks are not per process and don't return pid - of process, so GETLK() in guest does not reuturn process pid. - -- As of now only F_SETLK is supported and not F_SETLKW. We can't block - the thread in virtiofsd for arbitrary long duration as there is only - one thread serving the queue. That means unlock request will not make - it to daemon and F_SETLKW will block infinitely and bring virtio-fs - to a halt. This is a solvable problem though and will require significant - changes in virtiofsd and kernel. Left as a TODO item for now. - -Signed-off-by: Vivek Goyal -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0e81414c54161296212f6bc8a1c70526c4a9755a) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 3 + - tools/virtiofsd/passthrough_ll.c | 189 +++++++++++++++++++++++++++++++++++++++ - 2 files changed, 192 insertions(+) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5672024..33749bf 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -156,6 +156,9 @@ void fuse_cmdline_help(void) - " allowed (default: 10)\n" - " -o norace disable racy fallback\n" - " default: false\n" -+ " -o posix_lock|no_posix_lock\n" -+ " enable/disable remote posix lock\n" -+ " default: posix_lock\n" - " -o readdirplus|no_readdirplus\n" - " enable/disable readirplus\n" - " default: readdirplus except with " -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 05b5f89..9414935 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -67,6 +67,12 @@ - #include "passthrough_helpers.h" - #include "seccomp.h" - -+/* Keep track of inode posix locks for each owner. */ -+struct lo_inode_plock { -+ uint64_t lock_owner; -+ int fd; /* fd for OFD locks */ -+}; -+ - struct lo_map_elem { - union { - struct lo_inode *inode; -@@ -95,6 +101,8 @@ struct lo_inode { - struct lo_key key; - uint64_t refcount; /* protected by lo->mutex */ - fuse_ino_t fuse_ino; -+ pthread_mutex_t plock_mutex; -+ GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ - }; - - struct lo_cred { -@@ -114,6 +122,7 @@ struct lo_data { - int norace; - int writeback; - int flock; -+ int posix_lock; - int xattr; - char *source; - double timeout; -@@ -137,6 +146,8 @@ static const struct fuse_opt lo_opts[] = { - { "source=%s", offsetof(struct lo_data, source), 0 }, - { "flock", offsetof(struct lo_data, flock), 1 }, - { "no_flock", offsetof(struct lo_data, flock), 0 }, -+ { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, -+ { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, - { "xattr", offsetof(struct lo_data, xattr), 1 }, - { "no_xattr", offsetof(struct lo_data, xattr), 0 }, - { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, -@@ -485,6 +496,17 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -+ -+ if (conn->capable & FUSE_CAP_POSIX_LOCKS) { -+ if (lo->posix_lock) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); -+ conn->want |= FUSE_CAP_POSIX_LOCKS; -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); -+ conn->want &= ~FUSE_CAP_POSIX_LOCKS; -+ } -+ } -+ - if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || - lo->readdirplus_clear) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); -@@ -772,6 +794,19 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - return p; - } - -+/* value_destroy_func for posix_locks GHashTable */ -+static void posix_locks_value_destroy(gpointer data) -+{ -+ struct lo_inode_plock *plock = data; -+ -+ /* -+ * We had used open() for locks and had only one fd. So -+ * closing this fd should release all OFD locks. -+ */ -+ close(plock->fd); -+ free(plock); -+} -+ - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct fuse_entry_param *e) - { -@@ -825,6 +860,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - newfd = -1; - inode->key.ino = e->attr.st_ino; - inode->key.dev = e->attr.st_dev; -+ pthread_mutex_init(&inode->plock_mutex, NULL); -+ inode->posix_locks = g_hash_table_new_full( -+ g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); - - pthread_mutex_lock(&lo->mutex); - inode->fuse_ino = lo_add_inode_mapping(req, inode); -@@ -1160,6 +1198,11 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - if (!inode->refcount) { - lo_map_remove(&lo->ino_map, inode->fuse_ino); - g_hash_table_remove(lo->inodes, &inode->key); -+ if (g_hash_table_size(inode->posix_locks)) { -+ fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); -+ } -+ g_hash_table_destroy(inode->posix_locks); -+ pthread_mutex_destroy(&inode->plock_mutex); - pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); -@@ -1516,6 +1559,136 @@ out: - } - } - -+/* Should be called with inode->plock_mutex held */ -+static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, -+ struct lo_inode *inode, -+ uint64_t lock_owner, -+ pid_t pid, int *err) -+{ -+ struct lo_inode_plock *plock; -+ char procname[64]; -+ int fd; -+ -+ plock = -+ g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); -+ -+ if (plock) { -+ return plock; -+ } -+ -+ plock = malloc(sizeof(struct lo_inode_plock)); -+ if (!plock) { -+ *err = ENOMEM; -+ return NULL; -+ } -+ -+ /* Open another instance of file which can be used for ofd locks. */ -+ sprintf(procname, "%i", inode->fd); -+ -+ /* TODO: What if file is not writable? */ -+ fd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (fd == -1) { -+ *err = errno; -+ free(plock); -+ return NULL; -+ } -+ -+ plock->lock_owner = lock_owner; -+ plock->fd = fd; -+ g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), -+ plock); -+ return plock; -+} -+ -+static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ struct lo_inode_plock *plock; -+ int ret, saverr = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_getlk(ino=%" PRIu64 ", flags=%d)" -+ " owner=0x%lx, l_type=%d l_start=0x%lx" -+ " l_len=0x%lx\n", -+ ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, -+ lock->l_len); -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ pthread_mutex_lock(&inode->plock_mutex); -+ plock = -+ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); -+ if (!plock) { -+ pthread_mutex_unlock(&inode->plock_mutex); -+ fuse_reply_err(req, ret); -+ return; -+ } -+ -+ ret = fcntl(plock->fd, F_OFD_GETLK, lock); -+ if (ret == -1) { -+ saverr = errno; -+ } -+ pthread_mutex_unlock(&inode->plock_mutex); -+ -+ if (saverr) { -+ fuse_reply_err(req, saverr); -+ } else { -+ fuse_reply_lock(req, lock); -+ } -+} -+ -+static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, -+ struct flock *lock, int sleep) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode; -+ struct lo_inode_plock *plock; -+ int ret, saverr = 0; -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_setlk(ino=%" PRIu64 ", flags=%d)" -+ " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d" -+ " l_start=0x%lx l_len=0x%lx\n", -+ ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, -+ lock->l_whence, lock->l_start, lock->l_len); -+ -+ if (sleep) { -+ fuse_reply_err(req, EOPNOTSUPP); -+ return; -+ } -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ pthread_mutex_lock(&inode->plock_mutex); -+ plock = -+ lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); -+ -+ if (!plock) { -+ pthread_mutex_unlock(&inode->plock_mutex); -+ fuse_reply_err(req, ret); -+ return; -+ } -+ -+ /* TODO: Is it alright to modify flock? */ -+ lock->l_pid = 0; -+ ret = fcntl(plock->fd, F_OFD_SETLK, lock); -+ if (ret == -1) { -+ saverr = errno; -+ } -+ pthread_mutex_unlock(&inode->plock_mutex); -+ fuse_reply_err(req, saverr); -+} -+ - static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { -@@ -1617,6 +1790,19 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { - int res; - (void)ino; -+ struct lo_inode *inode; -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ /* An fd is going away. Cleanup associated posix locks */ -+ pthread_mutex_lock(&inode->plock_mutex); -+ g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); -+ pthread_mutex_unlock(&inode->plock_mutex); -+ - res = close(dup(lo_fi_fd(req, fi))); - fuse_reply_err(req, res == -1 ? errno : 0); - } -@@ -2080,6 +2266,8 @@ static struct fuse_lowlevel_ops lo_oper = { - .releasedir = lo_releasedir, - .fsyncdir = lo_fsyncdir, - .create = lo_create, -+ .getlk = lo_getlk, -+ .setlk = lo_setlk, - .open = lo_open, - .release = lo_release, - .flush = lo_flush, -@@ -2434,6 +2622,7 @@ int main(int argc, char *argv[]) - struct lo_data lo = { - .debug = 0, - .writeback = 0, -+ .posix_lock = 1, - .proc_self_fd = -1, - }; - struct lo_map_elem *root_elem; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Trim-down-imported-files.patch b/kvm-virtiofsd-Trim-down-imported-files.patch deleted file mode 100644 index f3f1e85..0000000 --- a/kvm-virtiofsd-Trim-down-imported-files.patch +++ /dev/null @@ -1,1582 +0,0 @@ -From 9d3788b1c2fa5cb4f14e292232a05c6a5217802d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:44 +0100 -Subject: [PATCH 013/116] virtiofsd: Trim down imported files -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-10-dgilbert@redhat.com> -Patchwork-id: 93463 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 009/112] virtiofsd: Trim down imported files -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -There's a lot of the original fuse code we don't need; trim them down. - -Signed-off-by: Dr. David Alan Gilbert -with additional trimming by: -Signed-off-by: Misono Tomohiro -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Xiao Yang -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a3e23f325439a290c504d6bbc48c2e742149ecab) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 71 +--- - tools/virtiofsd/fuse.h | 46 --- - tools/virtiofsd/fuse_common.h | 32 -- - tools/virtiofsd/fuse_i.h | 41 --- - tools/virtiofsd/fuse_log.h | 8 - - tools/virtiofsd/fuse_lowlevel.c | 675 +--------------------------------- - tools/virtiofsd/fuse_lowlevel.h | 28 -- - tools/virtiofsd/fuse_opt.h | 8 - - tools/virtiofsd/helper.c | 143 ------- - tools/virtiofsd/passthrough_helpers.h | 26 -- - tools/virtiofsd/passthrough_ll.c | 1 - - 11 files changed, 8 insertions(+), 1071 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 5ab9b87..aefb7db 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -157,73 +157,6 @@ static ssize_t fuse_buf_fd_to_fd(const struct fuse_buf *dst, size_t dst_off, - return copied; - } - --#ifdef HAVE_SPLICE --static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) --{ -- int splice_flags = 0; -- off_t *srcpos = NULL; -- off_t *dstpos = NULL; -- off_t srcpos_val; -- off_t dstpos_val; -- ssize_t res; -- size_t copied = 0; -- -- if (flags & FUSE_BUF_SPLICE_MOVE) -- splice_flags |= SPLICE_F_MOVE; -- if (flags & FUSE_BUF_SPLICE_NONBLOCK) -- splice_flags |= SPLICE_F_NONBLOCK; -- -- if (src->flags & FUSE_BUF_FD_SEEK) { -- srcpos_val = src->pos + src_off; -- srcpos = &srcpos_val; -- } -- if (dst->flags & FUSE_BUF_FD_SEEK) { -- dstpos_val = dst->pos + dst_off; -- dstpos = &dstpos_val; -- } -- -- while (len) { -- res = splice(src->fd, srcpos, dst->fd, dstpos, len, -- splice_flags); -- if (res == -1) { -- if (copied) -- break; -- -- if (errno != EINVAL || (flags & FUSE_BUF_FORCE_SPLICE)) -- return -errno; -- -- /* Maybe splice is not supported for this combination */ -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, -- len); -- } -- if (res == 0) -- break; -- -- copied += res; -- if (!(src->flags & FUSE_BUF_FD_RETRY) && -- !(dst->flags & FUSE_BUF_FD_RETRY)) { -- break; -- } -- -- len -= res; -- } -- -- return copied; --} --#else --static ssize_t fuse_buf_splice(const struct fuse_buf *dst, size_t dst_off, -- const struct fuse_buf *src, size_t src_off, -- size_t len, enum fuse_buf_copy_flags flags) --{ -- (void) flags; -- -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); --} --#endif -- -- - static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, - const struct fuse_buf *src, size_t src_off, - size_t len, enum fuse_buf_copy_flags flags) -@@ -247,10 +180,8 @@ static ssize_t fuse_buf_copy_one(const struct fuse_buf *dst, size_t dst_off, - return fuse_buf_write(dst, dst_off, src, src_off, len); - } else if (!dst_is_fd) { - return fuse_buf_read(dst, dst_off, src, src_off, len); -- } else if (flags & FUSE_BUF_NO_SPLICE) { -- return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); - } else { -- return fuse_buf_splice(dst, dst_off, src, src_off, len, flags); -+ return fuse_buf_fd_to_fd(dst, dst_off, src, src_off, len); - } - } - -diff --git a/tools/virtiofsd/fuse.h b/tools/virtiofsd/fuse.h -index 883f6e5..3202fba 100644 ---- a/tools/virtiofsd/fuse.h -+++ b/tools/virtiofsd/fuse.h -@@ -25,10 +25,6 @@ - #include - #include - --#ifdef __cplusplus --extern "C" { --#endif -- - /* ----------------------------------------------------------- * - * Basic FUSE API * - * ----------------------------------------------------------- */ -@@ -979,44 +975,6 @@ int fuse_loop(struct fuse *f); - void fuse_exit(struct fuse *f); - - /** -- * FUSE event loop with multiple threads -- * -- * Requests from the kernel are processed, and the appropriate -- * operations are called. Request are processed in parallel by -- * distributing them between multiple threads. -- * -- * For a description of the return value and the conditions when the -- * event loop exits, refer to the documentation of -- * fuse_session_loop(). -- * -- * Note: using fuse_loop() instead of fuse_loop_mt() means you are running in -- * single-threaded mode, and that you will not have to worry about reentrancy, -- * though you will have to worry about recursive lookups. In single-threaded -- * mode, FUSE will wait for one callback to return before calling another. -- * -- * Enabling multiple threads, by using fuse_loop_mt(), will cause FUSE to make -- * multiple simultaneous calls into the various callback functions given by your -- * fuse_operations record. -- * -- * If you are using multiple threads, you can enjoy all the parallel execution -- * and interactive response benefits of threads, and you get to enjoy all the -- * benefits of race conditions and locking bugs, too. Ensure that any code used -- * in the callback function of fuse_operations is also thread-safe. -- * -- * @param f the FUSE handle -- * @param config loop configuration -- * @return see fuse_session_loop() -- * -- * See also: fuse_loop() -- */ --#if FUSE_USE_VERSION < 32 --int fuse_loop_mt_31(struct fuse *f, int clone_fd); --#define fuse_loop_mt(f, clone_fd) fuse_loop_mt_31(f, clone_fd) --#else --int fuse_loop_mt(struct fuse *f, struct fuse_loop_config *config); --#endif -- --/** - * Get the current context - * - * The context is only valid for the duration of a filesystem -@@ -1268,8 +1226,4 @@ struct fuse_session *fuse_get_session(struct fuse *f); - */ - int fuse_open_channel(const char *mountpoint, const char *options); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_H_ */ -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index 2d686b2..bf8f8cc 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -28,10 +28,6 @@ - #define FUSE_MAKE_VERSION(maj, min) ((maj) * 10 + (min)) - #define FUSE_VERSION FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION) - --#ifdef __cplusplus --extern "C" { --#endif -- - /** - * Information about an open file. - * -@@ -100,30 +96,6 @@ struct fuse_file_info { - uint32_t poll_events; - }; - --/** -- * Configuration parameters passed to fuse_session_loop_mt() and -- * fuse_loop_mt(). -- */ --struct fuse_loop_config { -- /** -- * whether to use separate device fds for each thread -- * (may increase performance) -- */ -- int clone_fd; -- -- /** -- * The maximum number of available worker threads before they -- * start to get deleted when they become idle. If not -- * specified, the default is 10. -- * -- * Adjusting this has performance implications; a very small number -- * of threads in the pool will cause a lot of thread creation and -- * deletion overhead and performance may suffer. When set to 0, a new -- * thread will be created to service every operation. -- */ -- unsigned int max_idle_threads; --}; -- - /************************************************************************** - * Capability bits for 'fuse_conn_info.capable' and 'fuse_conn_info.want' * - **************************************************************************/ -@@ -802,10 +774,6 @@ void fuse_remove_signal_handlers(struct fuse_session *se); - # error only API version 30 or greater is supported - #endif - --#ifdef __cplusplus --} --#endif -- - - /* - * This interface uses 64 bit off_t. -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index d38b630..b39522e 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -9,8 +9,6 @@ - #include "fuse.h" - #include "fuse_lowlevel.h" - --struct mount_opts; -- - struct fuse_req { - struct fuse_session *se; - uint64_t unique; -@@ -45,7 +43,6 @@ struct fuse_session { - char *mountpoint; - volatile int exited; - int fd; -- struct mount_opts *mo; - int debug; - int deny_others; - struct fuse_lowlevel_ops op; -@@ -58,7 +55,6 @@ struct fuse_session { - struct fuse_req interrupts; - pthread_mutex_t lock; - int got_destroy; -- pthread_key_t pipe_key; - int broken_splice_nonblock; - uint64_t notify_ctr; - struct fuse_notify_req notify_list; -@@ -87,53 +83,16 @@ struct fuse_module { - int ctr; - }; - --/* ----------------------------------------------------------- * -- * Channel interface (when using -o clone_fd) * -- * ----------------------------------------------------------- */ -- --/** -- * Obtain counted reference to the channel -- * -- * @param ch the channel -- * @return the channel -- */ --struct fuse_chan *fuse_chan_get(struct fuse_chan *ch); -- --/** -- * Drop counted reference to a channel -- * -- * @param ch the channel -- */ --void fuse_chan_put(struct fuse_chan *ch); -- --struct mount_opts *parse_mount_opts(struct fuse_args *args); --void destroy_mount_opts(struct mount_opts *mo); --void fuse_mount_version(void); --unsigned get_max_read(struct mount_opts *o); --void fuse_kern_unmount(const char *mountpoint, int fd); --int fuse_kern_mount(const char *mountpoint, struct mount_opts *mo); -- - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - int count); - void fuse_free_req(fuse_req_t req); - --void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg); -- --int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg); -- --int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -- struct fuse_chan *ch); - void fuse_session_process_buf_int(struct fuse_session *se, - const struct fuse_buf *buf, struct fuse_chan *ch); - --struct fuse *fuse_new_31(struct fuse_args *args, const struct fuse_operations *op, -- size_t op_size, void *private_data); --int fuse_loop_mt_32(struct fuse *f, struct fuse_loop_config *config); --int fuse_session_loop_mt_32(struct fuse_session *se, struct fuse_loop_config *config); - - #define FUSE_MAX_MAX_PAGES 256 - #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 - - /* room needed in buffer to accommodate header */ - #define FUSE_BUFFER_HEADER_SIZE 0x1000 -- -diff --git a/tools/virtiofsd/fuse_log.h b/tools/virtiofsd/fuse_log.h -index 5e112e0..0af700d 100644 ---- a/tools/virtiofsd/fuse_log.h -+++ b/tools/virtiofsd/fuse_log.h -@@ -16,10 +16,6 @@ - - #include - --#ifdef __cplusplus --extern "C" { --#endif -- - /** - * Log severity level - * -@@ -75,8 +71,4 @@ void fuse_set_log_func(fuse_log_func_t func); - */ - void fuse_log(enum fuse_log_level level, const char *fmt, ...); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_LOG_H_ */ -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index f2d7038..e6fa247 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -16,7 +16,6 @@ - #include "fuse_kernel.h" - #include "fuse_opt.h" - #include "fuse_misc.h" --#include "mount_util.h" - - #include - #include -@@ -28,12 +27,6 @@ - #include - #include - --#ifndef F_LINUX_SPECIFIC_BASE --#define F_LINUX_SPECIFIC_BASE 1024 --#endif --#ifndef F_SETPIPE_SZ --#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7) --#endif - - - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) -@@ -137,7 +130,6 @@ void fuse_free_req(fuse_req_t req) - req->u.ni.data = NULL; - list_del_req(req); - ctr = --req->ctr; -- fuse_chan_put(req->ch); - req->ch = NULL; - pthread_mutex_unlock(&se->lock); - if (!ctr) -@@ -184,19 +176,7 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - } - } - -- ssize_t res = writev(ch ? ch->fd : se->fd, -- iov, count); -- int err = errno; -- -- if (res == -1) { -- assert(se != NULL); -- -- /* ENOENT means the operation was interrupted */ -- if (!fuse_session_exited(se) && err != ENOENT) -- perror("fuse: writing device"); -- return -err; -- } -- -+ abort(); /* virtio should have taken it before here */ - return 0; - } - -@@ -480,10 +460,6 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - struct fuse_bufvec *buf, - size_t len) - { -- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -- void *mbuf; -- int res; -- - /* Optimize common case */ - if (buf->count == 1 && buf->idx == 0 && buf->off == 0 && - !(buf->buf[0].flags & FUSE_BUF_IS_FD)) { -@@ -496,350 +472,10 @@ static int fuse_send_data_iov_fallback(struct fuse_session *se, - return fuse_send_msg(se, ch, iov, iov_count); - } - -- res = posix_memalign(&mbuf, pagesize, len); -- if (res != 0) -- return res; -- -- mem_buf.buf[0].mem = mbuf; -- res = fuse_buf_copy(&mem_buf, buf, 0); -- if (res < 0) { -- free(mbuf); -- return -res; -- } -- len = res; -- -- iov[iov_count].iov_base = mbuf; -- iov[iov_count].iov_len = len; -- iov_count++; -- res = fuse_send_msg(se, ch, iov, iov_count); -- free(mbuf); -- -- return res; --} -- --struct fuse_ll_pipe { -- size_t size; -- int can_grow; -- int pipe[2]; --}; -- --static void fuse_ll_pipe_free(struct fuse_ll_pipe *llp) --{ -- close(llp->pipe[0]); -- close(llp->pipe[1]); -- free(llp); --} -- --#ifdef HAVE_SPLICE --#if !defined(HAVE_PIPE2) || !defined(O_CLOEXEC) --static int fuse_pipe(int fds[2]) --{ -- int rv = pipe(fds); -- -- if (rv == -1) -- return rv; -- -- if (fcntl(fds[0], F_SETFL, O_NONBLOCK) == -1 || -- fcntl(fds[1], F_SETFL, O_NONBLOCK) == -1 || -- fcntl(fds[0], F_SETFD, FD_CLOEXEC) == -1 || -- fcntl(fds[1], F_SETFD, FD_CLOEXEC) == -1) { -- close(fds[0]); -- close(fds[1]); -- rv = -1; -- } -- return rv; --} --#else --static int fuse_pipe(int fds[2]) --{ -- return pipe2(fds, O_CLOEXEC | O_NONBLOCK); --} --#endif -- --static struct fuse_ll_pipe *fuse_ll_get_pipe(struct fuse_session *se) --{ -- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -- if (llp == NULL) { -- int res; -- -- llp = malloc(sizeof(struct fuse_ll_pipe)); -- if (llp == NULL) -- return NULL; -- -- res = fuse_pipe(llp->pipe); -- if (res == -1) { -- free(llp); -- return NULL; -- } -- -- /* -- *the default size is 16 pages on linux -- */ -- llp->size = pagesize * 16; -- llp->can_grow = 1; -- -- pthread_setspecific(se->pipe_key, llp); -- } -- -- return llp; --} --#endif -- --static void fuse_ll_clear_pipe(struct fuse_session *se) --{ -- struct fuse_ll_pipe *llp = pthread_getspecific(se->pipe_key); -- if (llp) { -- pthread_setspecific(se->pipe_key, NULL); -- fuse_ll_pipe_free(llp); -- } --} -- --#if defined(HAVE_SPLICE) && defined(HAVE_VMSPLICE) --static int read_back(int fd, char *buf, size_t len) --{ -- int res; -- -- res = read(fd, buf, len); -- if (res == -1) { -- fuse_log(FUSE_LOG_ERR, "fuse: internal error: failed to read back from pipe: %s\n", strerror(errno)); -- return -EIO; -- } -- if (res != len) { -- fuse_log(FUSE_LOG_ERR, "fuse: internal error: short read back from pipe: %i from %zi\n", res, len); -- return -EIO; -- } -+ abort(); /* Will have taken vhost path */ - return 0; - } - --static int grow_pipe_to_max(int pipefd) --{ -- int max; -- int res; -- int maxfd; -- char buf[32]; -- -- maxfd = open("/proc/sys/fs/pipe-max-size", O_RDONLY); -- if (maxfd < 0) -- return -errno; -- -- res = read(maxfd, buf, sizeof(buf) - 1); -- if (res < 0) { -- int saved_errno; -- -- saved_errno = errno; -- close(maxfd); -- return -saved_errno; -- } -- close(maxfd); -- buf[res] = '\0'; -- -- max = atoi(buf); -- res = fcntl(pipefd, F_SETPIPE_SZ, max); -- if (res < 0) -- return -errno; -- return max; --} -- --static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, -- struct iovec *iov, int iov_count, -- struct fuse_bufvec *buf, unsigned int flags) --{ -- int res; -- size_t len = fuse_buf_size(buf); -- struct fuse_out_header *out = iov[0].iov_base; -- struct fuse_ll_pipe *llp; -- int splice_flags; -- size_t pipesize; -- size_t total_fd_size; -- size_t idx; -- size_t headerlen; -- struct fuse_bufvec pipe_buf = FUSE_BUFVEC_INIT(len); -- -- if (se->broken_splice_nonblock) -- goto fallback; -- -- if (flags & FUSE_BUF_NO_SPLICE) -- goto fallback; -- -- total_fd_size = 0; -- for (idx = buf->idx; idx < buf->count; idx++) { -- if (buf->buf[idx].flags & FUSE_BUF_IS_FD) { -- total_fd_size = buf->buf[idx].size; -- if (idx == buf->idx) -- total_fd_size -= buf->off; -- } -- } -- if (total_fd_size < 2 * pagesize) -- goto fallback; -- -- if (se->conn.proto_minor < 14 || -- !(se->conn.want & FUSE_CAP_SPLICE_WRITE)) -- goto fallback; -- -- llp = fuse_ll_get_pipe(se); -- if (llp == NULL) -- goto fallback; -- -- -- headerlen = iov_length(iov, iov_count); -- -- out->len = headerlen + len; -- -- /* -- * Heuristic for the required pipe size, does not work if the -- * source contains less than page size fragments -- */ -- pipesize = pagesize * (iov_count + buf->count + 1) + out->len; -- -- if (llp->size < pipesize) { -- if (llp->can_grow) { -- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, pipesize); -- if (res == -1) { -- res = grow_pipe_to_max(llp->pipe[0]); -- if (res > 0) -- llp->size = res; -- llp->can_grow = 0; -- goto fallback; -- } -- llp->size = res; -- } -- if (llp->size < pipesize) -- goto fallback; -- } -- -- -- res = vmsplice(llp->pipe[1], iov, iov_count, SPLICE_F_NONBLOCK); -- if (res == -1) -- goto fallback; -- -- if (res != headerlen) { -- res = -EIO; -- fuse_log(FUSE_LOG_ERR, "fuse: short vmsplice to pipe: %u/%zu\n", res, -- headerlen); -- goto clear_pipe; -- } -- -- pipe_buf.buf[0].flags = FUSE_BUF_IS_FD; -- pipe_buf.buf[0].fd = llp->pipe[1]; -- -- res = fuse_buf_copy(&pipe_buf, buf, -- FUSE_BUF_FORCE_SPLICE | FUSE_BUF_SPLICE_NONBLOCK); -- if (res < 0) { -- if (res == -EAGAIN || res == -EINVAL) { -- /* -- * Should only get EAGAIN on kernels with -- * broken SPLICE_F_NONBLOCK support (<= -- * 2.6.35) where this error or a short read is -- * returned even if the pipe itself is not -- * full -- * -- * EINVAL might mean that splice can't handle -- * this combination of input and output. -- */ -- if (res == -EAGAIN) -- se->broken_splice_nonblock = 1; -- -- pthread_setspecific(se->pipe_key, NULL); -- fuse_ll_pipe_free(llp); -- goto fallback; -- } -- res = -res; -- goto clear_pipe; -- } -- -- if (res != 0 && res < len) { -- struct fuse_bufvec mem_buf = FUSE_BUFVEC_INIT(len); -- void *mbuf; -- size_t now_len = res; -- /* -- * For regular files a short count is either -- * 1) due to EOF, or -- * 2) because of broken SPLICE_F_NONBLOCK (see above) -- * -- * For other inputs it's possible that we overflowed -- * the pipe because of small buffer fragments. -- */ -- -- res = posix_memalign(&mbuf, pagesize, len); -- if (res != 0) -- goto clear_pipe; -- -- mem_buf.buf[0].mem = mbuf; -- mem_buf.off = now_len; -- res = fuse_buf_copy(&mem_buf, buf, 0); -- if (res > 0) { -- char *tmpbuf; -- size_t extra_len = res; -- /* -- * Trickiest case: got more data. Need to get -- * back the data from the pipe and then fall -- * back to regular write. -- */ -- tmpbuf = malloc(headerlen); -- if (tmpbuf == NULL) { -- free(mbuf); -- res = ENOMEM; -- goto clear_pipe; -- } -- res = read_back(llp->pipe[0], tmpbuf, headerlen); -- free(tmpbuf); -- if (res != 0) { -- free(mbuf); -- goto clear_pipe; -- } -- res = read_back(llp->pipe[0], mbuf, now_len); -- if (res != 0) { -- free(mbuf); -- goto clear_pipe; -- } -- len = now_len + extra_len; -- iov[iov_count].iov_base = mbuf; -- iov[iov_count].iov_len = len; -- iov_count++; -- res = fuse_send_msg(se, ch, iov, iov_count); -- free(mbuf); -- return res; -- } -- free(mbuf); -- res = now_len; -- } -- len = res; -- out->len = headerlen + len; -- -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, success, outsize: %i (splice)\n", -- (unsigned long long) out->unique, out->len); -- } -- -- splice_flags = 0; -- if ((flags & FUSE_BUF_SPLICE_MOVE) && -- (se->conn.want & FUSE_CAP_SPLICE_MOVE)) -- splice_flags |= SPLICE_F_MOVE; -- -- res = splice(llp->pipe[0], NULL, ch ? ch->fd : se->fd, -- NULL, out->len, splice_flags); -- if (res == -1) { -- res = -errno; -- perror("fuse: splice from pipe"); -- goto clear_pipe; -- } -- if (res != out->len) { -- res = -EIO; -- fuse_log(FUSE_LOG_ERR, "fuse: short splice from pipe: %u/%u\n", -- res, out->len); -- goto clear_pipe; -- } -- return 0; -- --clear_pipe: -- fuse_ll_clear_pipe(se); -- return res; -- --fallback: -- return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); --} --#else - static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int iov_count, - struct fuse_bufvec *buf, unsigned int flags) -@@ -849,7 +485,6 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - - return fuse_send_data_iov_fallback(se, ch, iov, iov_count, buf, len); - } --#endif - - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv, - enum fuse_buf_copy_flags flags) -@@ -1408,16 +1043,11 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - if (bufv.buf[0].size < arg->size) { - fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); - fuse_reply_err(req, EIO); -- goto out; -+ return; - } - bufv.buf[0].size = arg->size; - - se->op.write_buf(req, nodeid, &bufv, arg->offset, &fi); -- --out: -- /* Need to reset the pipe if ->write_buf() didn't consume all data */ -- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -- fuse_ll_clear_pipe(se); - } - - static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -@@ -2038,17 +1668,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - return; - } - -- unsigned max_read_mo = get_max_read(se->mo); -- if (se->conn.max_read != max_read_mo) { -- fuse_log(FUSE_LOG_ERR, "fuse: error: init() and fuse_session_new() " -- "requested different maximum read size (%u vs %u)\n", -- se->conn.max_read, max_read_mo); -- fuse_reply_err(req, EPROTO); -- se->error = -EPROTO; -- fuse_session_exit(se); -- return; -- } -- - if (se->conn.max_write < bufsize - FUSE_BUFFER_HEADER_SIZE) { - se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; - } -@@ -2364,8 +1983,6 @@ static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, - } - out: - free(rreq); -- if ((ibuf->flags & FUSE_BUF_IS_FD) && bufv.idx < bufv.count) -- fuse_ll_clear_pipe(se); - } - - int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -@@ -2496,7 +2113,6 @@ static struct { - [FUSE_RENAME2] = { do_rename2, "RENAME2" }, - [FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" }, - [FUSE_LSEEK] = { do_lseek, "LSEEK" }, -- [CUSE_INIT] = { cuse_lowlevel_init, "CUSE_INIT" }, - }; - - #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0])) -@@ -2509,21 +2125,6 @@ static const char *opname(enum fuse_opcode opcode) - return fuse_ll_ops[opcode].name; - } - --static int fuse_ll_copy_from_pipe(struct fuse_bufvec *dst, -- struct fuse_bufvec *src) --{ -- ssize_t res = fuse_buf_copy(dst, src, 0); -- if (res < 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", strerror(-res)); -- return res; -- } -- if ((size_t)res < fuse_buf_size(dst)) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -- return -1; -- } -- return 0; --} -- - void fuse_session_process_buf(struct fuse_session *se, - const struct fuse_buf *buf) - { -@@ -2533,36 +2134,12 @@ void fuse_session_process_buf(struct fuse_session *se, - void fuse_session_process_buf_int(struct fuse_session *se, - const struct fuse_buf *buf, struct fuse_chan *ch) - { -- const size_t write_header_size = sizeof(struct fuse_in_header) + -- sizeof(struct fuse_write_in); -- struct fuse_bufvec bufv = { .buf[0] = *buf, .count = 1 }; -- struct fuse_bufvec tmpbuf = FUSE_BUFVEC_INIT(write_header_size); - struct fuse_in_header *in; - const void *inarg; - struct fuse_req *req; -- void *mbuf = NULL; - int err; -- int res; -- -- if (buf->flags & FUSE_BUF_IS_FD) { -- if (buf->size < tmpbuf.buf[0].size) -- tmpbuf.buf[0].size = buf->size; - -- mbuf = malloc(tmpbuf.buf[0].size); -- if (mbuf == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to allocate header\n"); -- goto clear_pipe; -- } -- tmpbuf.buf[0].mem = mbuf; -- -- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -- if (res < 0) -- goto clear_pipe; -- -- in = mbuf; -- } else { -- in = buf->mem; -- } -+ in = buf->mem; - - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, -@@ -2584,14 +2161,14 @@ void fuse_session_process_buf_int(struct fuse_session *se, - }; - - fuse_send_msg(se, ch, &iov, 1); -- goto clear_pipe; -+ return; - } - - req->unique = in->unique; - req->ctx.uid = in->uid; - req->ctx.gid = in->gid; - req->ctx.pid = in->pid; -- req->ch = ch ? fuse_chan_get(ch) : NULL; -+ req->ch = ch; - - err = EIO; - if (!se->got_init) { -@@ -2627,28 +2204,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, - fuse_reply_err(intr, EAGAIN); - } - -- if ((buf->flags & FUSE_BUF_IS_FD) && write_header_size < buf->size && -- (in->opcode != FUSE_WRITE || !se->op.write_buf) && -- in->opcode != FUSE_NOTIFY_REPLY) { -- void *newmbuf; -- -- err = ENOMEM; -- newmbuf = realloc(mbuf, buf->size); -- if (newmbuf == NULL) -- goto reply_err; -- mbuf = newmbuf; -- -- tmpbuf = FUSE_BUFVEC_INIT(buf->size - write_header_size); -- tmpbuf.buf[0].mem = (char *)mbuf + write_header_size; -- -- res = fuse_ll_copy_from_pipe(&tmpbuf, &bufv); -- err = -res; -- if (res < 0) -- goto reply_err; -- -- in = mbuf; -- } -- - inarg = (void *) &in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) - do_write_buf(req, in->nodeid, inarg, buf); -@@ -2657,16 +2212,10 @@ void fuse_session_process_buf_int(struct fuse_session *se, - else - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - --out_free: -- free(mbuf); - return; - - reply_err: - fuse_reply_err(req, err); --clear_pipe: -- if (buf->flags & FUSE_BUF_IS_FD) -- fuse_ll_clear_pipe(se); -- goto out_free; - } - - #define LL_OPTION(n,o,v) \ -@@ -2684,7 +2233,6 @@ void fuse_lowlevel_version(void) - { - printf("using FUSE kernel interface version %i.%i\n", - FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); -- fuse_mount_version(); - } - - void fuse_lowlevel_help(void) -@@ -2692,204 +2240,29 @@ void fuse_lowlevel_help(void) - /* These are not all options, but the ones that are - potentially of interest to an end-user */ - printf( --" -o allow_other allow access by all users\n" - " -o allow_root allow access by root\n" --" -o auto_unmount auto unmount on process termination\n"); -+); - } - - void fuse_session_destroy(struct fuse_session *se) - { -- struct fuse_ll_pipe *llp; -- - if (se->got_init && !se->got_destroy) { - if (se->op.destroy) - se->op.destroy(se->userdata); - } -- llp = pthread_getspecific(se->pipe_key); -- if (llp != NULL) -- fuse_ll_pipe_free(llp); -- pthread_key_delete(se->pipe_key); - pthread_mutex_destroy(&se->lock); - free(se->cuse_data); - if (se->fd != -1) - close(se->fd); -- destroy_mount_opts(se->mo); - free(se); - } - - --static void fuse_ll_pipe_destructor(void *data) --{ -- struct fuse_ll_pipe *llp = data; -- fuse_ll_pipe_free(llp); --} -- --int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) --{ -- return fuse_session_receive_buf_int(se, buf, NULL); --} -- --int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf, -- struct fuse_chan *ch) --{ -- int err; -- ssize_t res; --#ifdef HAVE_SPLICE -- size_t bufsize = se->bufsize; -- struct fuse_ll_pipe *llp; -- struct fuse_buf tmpbuf; -- -- if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ)) -- goto fallback; -- -- llp = fuse_ll_get_pipe(se); -- if (llp == NULL) -- goto fallback; -- -- if (llp->size < bufsize) { -- if (llp->can_grow) { -- res = fcntl(llp->pipe[0], F_SETPIPE_SZ, bufsize); -- if (res == -1) { -- llp->can_grow = 0; -- res = grow_pipe_to_max(llp->pipe[0]); -- if (res > 0) -- llp->size = res; -- goto fallback; -- } -- llp->size = res; -- } -- if (llp->size < bufsize) -- goto fallback; -- } -- -- res = splice(ch ? ch->fd : se->fd, -- NULL, llp->pipe[1], NULL, bufsize, 0); -- err = errno; -- -- if (fuse_session_exited(se)) -- return 0; -- -- if (res == -1) { -- if (err == ENODEV) { -- /* Filesystem was unmounted, or connection was aborted -- via /sys/fs/fuse/connections */ -- fuse_session_exit(se); -- return 0; -- } -- if (err != EINTR && err != EAGAIN) -- perror("fuse: splice from device"); -- return -err; -- } -- -- if (res < sizeof(struct fuse_in_header)) { -- fuse_log(FUSE_LOG_ERR, "short splice from fuse device\n"); -- return -EIO; -- } -- -- tmpbuf = (struct fuse_buf) { -- .size = res, -- .flags = FUSE_BUF_IS_FD, -- .fd = llp->pipe[0], -- }; -- -- /* -- * Don't bother with zero copy for small requests. -- * fuse_loop_mt() needs to check for FORGET so this more than -- * just an optimization. -- */ -- if (res < sizeof(struct fuse_in_header) + -- sizeof(struct fuse_write_in) + pagesize) { -- struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 }; -- struct fuse_bufvec dst = { .count = 1 }; -- -- if (!buf->mem) { -- buf->mem = malloc(se->bufsize); -- if (!buf->mem) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: failed to allocate read buffer\n"); -- return -ENOMEM; -- } -- } -- buf->size = se->bufsize; -- buf->flags = 0; -- dst.buf[0] = *buf; -- -- res = fuse_buf_copy(&dst, &src, 0); -- if (res < 0) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n", -- strerror(-res)); -- fuse_ll_clear_pipe(se); -- return res; -- } -- if (res < tmpbuf.size) { -- fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n"); -- fuse_ll_clear_pipe(se); -- return -EIO; -- } -- assert(res == tmpbuf.size); -- -- } else { -- /* Don't overwrite buf->mem, as that would cause a leak */ -- buf->fd = tmpbuf.fd; -- buf->flags = tmpbuf.flags; -- } -- buf->size = tmpbuf.size; -- -- return res; -- --fallback: --#endif -- if (!buf->mem) { -- buf->mem = malloc(se->bufsize); -- if (!buf->mem) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: failed to allocate read buffer\n"); -- return -ENOMEM; -- } -- } -- --restart: -- res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); -- err = errno; -- -- if (fuse_session_exited(se)) -- return 0; -- if (res == -1) { -- /* ENOENT means the operation was interrupted, it's safe -- to restart */ -- if (err == ENOENT) -- goto restart; -- -- if (err == ENODEV) { -- /* Filesystem was unmounted, or connection was aborted -- via /sys/fs/fuse/connections */ -- fuse_session_exit(se); -- return 0; -- } -- /* Errors occurring during normal operation: EINTR (read -- interrupted), EAGAIN (nonblocking I/O), ENODEV (filesystem -- umounted) */ -- if (err != EINTR && err != EAGAIN) -- perror("fuse: reading device"); -- return -err; -- } -- if ((size_t) res < sizeof(struct fuse_in_header)) { -- fuse_log(FUSE_LOG_ERR, "short read on fuse device\n"); -- return -EIO; -- } -- -- buf->size = res; -- -- return res; --} -- - struct fuse_session *fuse_session_new(struct fuse_args *args, - const struct fuse_lowlevel_ops *op, - size_t op_size, void *userdata) - { -- int err; - struct fuse_session *se; -- struct mount_opts *mo; - - if (sizeof(struct fuse_lowlevel_ops) < op_size) { - fuse_log(FUSE_LOG_ERR, "fuse: warning: library too old, some operations may not work\n"); -@@ -2913,20 +2286,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - /* Parse options */ - if(fuse_opt_parse(args, se, fuse_ll_opts, NULL) == -1) - goto out2; -- if(se->deny_others) { -- /* Allowing access only by root is done by instructing -- * kernel to allow access by everyone, and then restricting -- * access to root and mountpoint owner in libfuse. -- */ -- // We may be adding the option a second time, but -- // that doesn't hurt. -- if(fuse_opt_add_arg(args, "-oallow_other") == -1) -- goto out2; -- } -- mo = parse_mount_opts(args); -- if (mo == NULL) -- goto out3; -- - if(args->argc == 1 && - args->argv[0][0] == '-') { - fuse_log(FUSE_LOG_ERR, "fuse: warning: argv[0] looks like an option, but " -@@ -2940,9 +2299,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out4; - } - -- if (se->debug) -- fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); -- - se->bufsize = FUSE_MAX_MAX_PAGES * getpagesize() + - FUSE_BUFFER_HEADER_SIZE; - -@@ -2952,26 +2308,14 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - se->notify_ctr = 1; - fuse_mutex_init(&se->lock); - -- err = pthread_key_create(&se->pipe_key, fuse_ll_pipe_destructor); -- if (err) { -- fuse_log(FUSE_LOG_ERR, "fuse: failed to create thread specific key: %s\n", -- strerror(err)); -- goto out5; -- } -- - memcpy(&se->op, op, op_size); - se->owner = getuid(); - se->userdata = userdata; - -- se->mo = mo; - return se; - --out5: -- pthread_mutex_destroy(&se->lock); - out4: - fuse_opt_free_args(args); --out3: -- free(mo); - out2: - free(se); - out1: -@@ -3035,11 +2379,6 @@ int fuse_session_fd(struct fuse_session *se) - - void fuse_session_unmount(struct fuse_session *se) - { -- if (se->mountpoint != NULL) { -- fuse_kern_unmount(se->mountpoint, se->fd); -- free(se->mountpoint); -- se->mountpoint = NULL; -- } - } - - #ifdef linux -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 18c6363..6b1adfc 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -31,10 +31,6 @@ - #include - #include - --#ifdef __cplusplus --extern "C" { --#endif -- - /* ----------------------------------------------------------- * - * Miscellaneous definitions * - * ----------------------------------------------------------- */ -@@ -1863,14 +1859,12 @@ void fuse_cmdline_help(void); - * ----------------------------------------------------------- */ - - struct fuse_cmdline_opts { -- int singlethread; - int foreground; - int debug; - int nodefault_subtype; - char *mountpoint; - int show_version; - int show_help; -- int clone_fd; - unsigned int max_idle_threads; - }; - -@@ -1962,24 +1956,6 @@ int fuse_session_mount(struct fuse_session *se, const char *mountpoint); - int fuse_session_loop(struct fuse_session *se); - - /** -- * Enter a multi-threaded event loop. -- * -- * For a description of the return value and the conditions when the -- * event loop exits, refer to the documentation of -- * fuse_session_loop(). -- * -- * @param se the session -- * @param config session loop configuration -- * @return see fuse_session_loop() -- */ --#if FUSE_USE_VERSION < 32 --int fuse_session_loop_mt_31(struct fuse_session *se, int clone_fd); --#define fuse_session_loop_mt(se, clone_fd) fuse_session_loop_mt_31(se, clone_fd) --#else --int fuse_session_loop_mt(struct fuse_session *se, struct fuse_loop_config *config); --#endif -- --/** - * Flag a session as terminated. - * - * This function is invoked by the POSIX signal handlers, when -@@ -2082,8 +2058,4 @@ void fuse_session_process_buf(struct fuse_session *se, - */ - int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_LOWLEVEL_H_ */ -diff --git a/tools/virtiofsd/fuse_opt.h b/tools/virtiofsd/fuse_opt.h -index d8573e7..6910255 100644 ---- a/tools/virtiofsd/fuse_opt.h -+++ b/tools/virtiofsd/fuse_opt.h -@@ -14,10 +14,6 @@ - * This file defines the option parsing interface of FUSE - */ - --#ifdef __cplusplus --extern "C" { --#endif -- - /** - * Option description - * -@@ -264,8 +260,4 @@ void fuse_opt_free_args(struct fuse_args *args); - */ - int fuse_opt_match(const struct fuse_opt opts[], const char *opt); - --#ifdef __cplusplus --} --#endif -- - #endif /* FUSE_OPT_H_ */ -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 64ff7ad..5a2e64c 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -41,14 +41,10 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), - FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("-f", foreground), -- FUSE_HELPER_OPT("-s", singlethread), - FUSE_HELPER_OPT("fsname=", nodefault_subtype), - FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), --#ifndef __FreeBSD__ - FUSE_HELPER_OPT("subtype=", nodefault_subtype), - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), --#endif -- FUSE_HELPER_OPT("clone_fd", clone_fd), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), - FUSE_OPT_END - }; -@@ -132,9 +128,6 @@ void fuse_cmdline_help(void) - " -V --version print version\n" - " -d -o debug enable debug output (implies -f)\n" - " -f foreground operation\n" -- " -s disable multi-threaded operation\n" -- " -o clone_fd use separate fuse device fd for each thread\n" -- " (may improve performance)\n" - " -o max_idle_threads the maximum number of idle worker threads\n" - " allowed (default: 10)\n"); - } -@@ -171,34 +164,6 @@ static int fuse_helper_opt_proc(void *data, const char *arg, int key, - } - } - --/* Under FreeBSD, there is no subtype option so this -- function actually sets the fsname */ --static int add_default_subtype(const char *progname, struct fuse_args *args) --{ -- int res; -- char *subtype_opt; -- -- const char *basename = strrchr(progname, '/'); -- if (basename == NULL) -- basename = progname; -- else if (basename[1] != '\0') -- basename++; -- -- subtype_opt = (char *) malloc(strlen(basename) + 64); -- if (subtype_opt == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: memory allocation failed\n"); -- return -1; -- } --#ifdef __FreeBSD__ -- sprintf(subtype_opt, "-ofsname=%s", basename); --#else -- sprintf(subtype_opt, "-osubtype=%s", basename); --#endif -- res = fuse_opt_add_arg(args, subtype_opt); -- free(subtype_opt); -- return res; --} -- - int fuse_parse_cmdline(struct fuse_args *args, - struct fuse_cmdline_opts *opts) - { -@@ -210,14 +175,6 @@ int fuse_parse_cmdline(struct fuse_args *args, - fuse_helper_opt_proc) == -1) - return -1; - -- /* *Linux*: if neither -o subtype nor -o fsname are specified, -- set subtype to program's basename. -- *FreeBSD*: if fsname is not specified, set to program's -- basename. */ -- if (!opts->nodefault_subtype) -- if (add_default_subtype(args->argv[0], args) == -1) -- return -1; -- - return 0; - } - -@@ -276,88 +233,6 @@ int fuse_daemonize(int foreground) - return 0; - } - --int fuse_main_real(int argc, char *argv[], const struct fuse_operations *op, -- size_t op_size, void *user_data) --{ -- struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -- struct fuse *fuse; -- struct fuse_cmdline_opts opts; -- int res; -- -- if (fuse_parse_cmdline(&args, &opts) != 0) -- return 1; -- -- if (opts.show_version) { -- printf("FUSE library version %s\n", PACKAGE_VERSION); -- fuse_lowlevel_version(); -- res = 0; -- goto out1; -- } -- -- if (opts.show_help) { -- if(args.argv[0][0] != '\0') -- printf("usage: %s [options] \n\n", -- args.argv[0]); -- printf("FUSE options:\n"); -- fuse_cmdline_help(); -- fuse_lib_help(&args); -- res = 0; -- goto out1; -- } -- -- if (!opts.show_help && -- !opts.mountpoint) { -- fuse_log(FUSE_LOG_ERR, "error: no mountpoint specified\n"); -- res = 2; -- goto out1; -- } -- -- -- fuse = fuse_new_31(&args, op, op_size, user_data); -- if (fuse == NULL) { -- res = 3; -- goto out1; -- } -- -- if (fuse_mount(fuse,opts.mountpoint) != 0) { -- res = 4; -- goto out2; -- } -- -- if (fuse_daemonize(opts.foreground) != 0) { -- res = 5; -- goto out3; -- } -- -- struct fuse_session *se = fuse_get_session(fuse); -- if (fuse_set_signal_handlers(se) != 0) { -- res = 6; -- goto out3; -- } -- -- if (opts.singlethread) -- res = fuse_loop(fuse); -- else { -- struct fuse_loop_config loop_config; -- loop_config.clone_fd = opts.clone_fd; -- loop_config.max_idle_threads = opts.max_idle_threads; -- res = fuse_loop_mt_32(fuse, &loop_config); -- } -- if (res) -- res = 7; -- -- fuse_remove_signal_handlers(se); --out3: -- fuse_unmount(fuse); --out2: -- fuse_destroy(fuse); --out1: -- free(opts.mountpoint); -- fuse_opt_free_args(&args); -- return res; --} -- -- - void fuse_apply_conn_info_opts(struct fuse_conn_info_opts *opts, - struct fuse_conn_info *conn) - { -@@ -420,21 +295,3 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args) - } - return opts; - } -- --int fuse_open_channel(const char *mountpoint, const char* options) --{ -- struct mount_opts *opts = NULL; -- int fd = -1; -- const char *argv[] = { "", "-o", options }; -- int argc = sizeof(argv) / sizeof(argv[0]); -- struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv); -- -- opts = parse_mount_opts(&args); -- if (opts == NULL) -- return -1; -- -- fd = fuse_kern_mount(mountpoint, opts); -- destroy_mount_opts(opts); -- -- return fd; --} -diff --git a/tools/virtiofsd/passthrough_helpers.h b/tools/virtiofsd/passthrough_helpers.h -index 6b77c33..7c5f561 100644 ---- a/tools/virtiofsd/passthrough_helpers.h -+++ b/tools/virtiofsd/passthrough_helpers.h -@@ -42,32 +42,6 @@ static int mknod_wrapper(int dirfd, const char *path, const char *link, - res = symlinkat(link, dirfd, path); - } else if (S_ISFIFO(mode)) { - res = mkfifoat(dirfd, path, mode); --#ifdef __FreeBSD__ -- } else if (S_ISSOCK(mode)) { -- struct sockaddr_un su; -- int fd; -- -- if (strlen(path) >= sizeof(su.sun_path)) { -- errno = ENAMETOOLONG; -- return -1; -- } -- fd = socket(AF_UNIX, SOCK_STREAM, 0); -- if (fd >= 0) { -- /* -- * We must bind the socket to the underlying file -- * system to create the socket file, even though -- * we'll never listen on this socket. -- */ -- su.sun_family = AF_UNIX; -- strncpy(su.sun_path, path, sizeof(su.sun_path)); -- res = bindat(dirfd, fd, (struct sockaddr*)&su, -- sizeof(su)); -- if (res == 0) -- close(fd); -- } else { -- res = -1; -- } --#endif - } else { - res = mknodat(dirfd, path, mode, rdev); - } -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e1a6056..e5f7115 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1240,7 +1240,6 @@ int main(int argc, char *argv[]) - ret = 0; - goto err_out1; - } else if (opts.show_version) { -- printf("FUSE library version %s\n", fuse_pkgversion()); - fuse_lowlevel_version(); - ret = 0; - goto err_out1; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-Trim-out-compatibility-code.patch b/kvm-virtiofsd-Trim-out-compatibility-code.patch deleted file mode 100644 index 411af77..0000000 --- a/kvm-virtiofsd-Trim-out-compatibility-code.patch +++ /dev/null @@ -1,545 +0,0 @@ -From ff16b837e402de773581f77ca188f8806c0b500f Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:51 +0100 -Subject: [PATCH 020/116] virtiofsd: Trim out compatibility code -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-17-dgilbert@redhat.com> -Patchwork-id: 93468 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 016/112] virtiofsd: Trim out compatibility code -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -virtiofsd only supports major=7, minor>=31; trim out a lot of -old compatibility code. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 72c42e2d65510e073cf78fdc924d121c77fa0080) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 330 +++++++++++++++------------------------- - 1 file changed, 119 insertions(+), 211 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 07fb8a6..514d79c 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -387,16 +387,7 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) - int fuse_reply_entry(fuse_req_t req, const struct fuse_entry_param *e) - { - struct fuse_entry_out arg; -- size_t size = req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ENTRY_OUT_SIZE : -- sizeof(arg); -- -- /* -- * before ABI 7.4 e->ino == 0 was invalid, only ENOENT meant -- * negative entry -- */ -- if (!e->ino && req->se->conn.proto_minor < 4) { -- return fuse_reply_err(req, ENOENT); -- } -+ size_t size = sizeof(arg); - - memset(&arg, 0, sizeof(arg)); - fill_entry(&arg, e); -@@ -407,9 +398,7 @@ int fuse_reply_create(fuse_req_t req, const struct fuse_entry_param *e, - const struct fuse_file_info *f) - { - char buf[sizeof(struct fuse_entry_out) + sizeof(struct fuse_open_out)]; -- size_t entrysize = req->se->conn.proto_minor < 9 ? -- FUSE_COMPAT_ENTRY_OUT_SIZE : -- sizeof(struct fuse_entry_out); -+ size_t entrysize = sizeof(struct fuse_entry_out); - struct fuse_entry_out *earg = (struct fuse_entry_out *)buf; - struct fuse_open_out *oarg = (struct fuse_open_out *)(buf + entrysize); - -@@ -423,8 +412,7 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr, - double attr_timeout) - { - struct fuse_attr_out arg; -- size_t size = -- req->se->conn.proto_minor < 9 ? FUSE_COMPAT_ATTR_OUT_SIZE : sizeof(arg); -+ size_t size = sizeof(arg); - - memset(&arg, 0, sizeof(arg)); - arg.attr_valid = calc_timeout_sec(attr_timeout); -@@ -519,8 +507,7 @@ int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) - int fuse_reply_statfs(fuse_req_t req, const struct statvfs *stbuf) - { - struct fuse_statfs_out arg; -- size_t size = -- req->se->conn.proto_minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(arg); -+ size_t size = sizeof(arg); - - memset(&arg, 0, sizeof(arg)); - convert_statfs(stbuf, &arg.st); -@@ -604,45 +591,31 @@ int fuse_reply_ioctl_retry(fuse_req_t req, const struct iovec *in_iov, - iov[count].iov_len = sizeof(arg); - count++; - -- if (req->se->conn.proto_minor < 16) { -- if (in_count) { -- iov[count].iov_base = (void *)in_iov; -- iov[count].iov_len = sizeof(in_iov[0]) * in_count; -- count++; -- } -+ /* Can't handle non-compat 64bit ioctls on 32bit */ -+ if (sizeof(void *) == 4 && req->ioctl_64bit) { -+ res = fuse_reply_err(req, EINVAL); -+ goto out; -+ } - -- if (out_count) { -- iov[count].iov_base = (void *)out_iov; -- iov[count].iov_len = sizeof(out_iov[0]) * out_count; -- count++; -+ if (in_count) { -+ in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -+ if (!in_fiov) { -+ goto enomem; - } -- } else { -- /* Can't handle non-compat 64bit ioctls on 32bit */ -- if (sizeof(void *) == 4 && req->ioctl_64bit) { -- res = fuse_reply_err(req, EINVAL); -- goto out; -- } -- -- if (in_count) { -- in_fiov = fuse_ioctl_iovec_copy(in_iov, in_count); -- if (!in_fiov) { -- goto enomem; -- } - -- iov[count].iov_base = (void *)in_fiov; -- iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -- count++; -+ iov[count].iov_base = (void *)in_fiov; -+ iov[count].iov_len = sizeof(in_fiov[0]) * in_count; -+ count++; -+ } -+ if (out_count) { -+ out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -+ if (!out_fiov) { -+ goto enomem; - } -- if (out_count) { -- out_fiov = fuse_ioctl_iovec_copy(out_iov, out_count); -- if (!out_fiov) { -- goto enomem; -- } - -- iov[count].iov_base = (void *)out_fiov; -- iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -- count++; -- } -+ iov[count].iov_base = (void *)out_fiov; -+ iov[count].iov_len = sizeof(out_fiov[0]) * out_count; -+ count++; - } - - res = send_reply_iov(req, 0, iov, count); -@@ -784,14 +757,12 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - struct fuse_file_info *fip = NULL; - struct fuse_file_info fi; - -- if (req->se->conn.proto_minor >= 9) { -- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; -+ struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; - -- if (arg->getattr_flags & FUSE_GETATTR_FH) { -- memset(&fi, 0, sizeof(fi)); -- fi.fh = arg->fh; -- fip = &fi; -- } -+ if (arg->getattr_flags & FUSE_GETATTR_FH) { -+ memset(&fi, 0, sizeof(fi)); -+ fi.fh = arg->fh; -+ fip = &fi; - } - - if (req->se->op.getattr) { -@@ -856,11 +827,7 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; - char *name = PARAM(arg); - -- if (req->se->conn.proto_minor >= 12) { -- req->ctx.umask = arg->umask; -- } else { -- name = (char *)inarg + FUSE_COMPAT_MKNOD_IN_SIZE; -- } -+ req->ctx.umask = arg->umask; - - if (req->se->op.mknod) { - req->se->op.mknod(req, nodeid, name, arg->mode, arg->rdev); -@@ -873,9 +840,7 @@ static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - { - struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; - -- if (req->se->conn.proto_minor >= 12) { -- req->ctx.umask = arg->umask; -- } -+ req->ctx.umask = arg->umask; - - if (req->se->op.mkdir) { - req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -@@ -967,11 +932,7 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - -- if (req->se->conn.proto_minor >= 12) { -- req->ctx.umask = arg->umask; -- } else { -- name = (char *)inarg + sizeof(struct fuse_open_in); -- } -+ req->ctx.umask = arg->umask; - - req->se->op.create(req, nodeid, name, arg->mode, &fi); - } else { -@@ -1003,10 +964,8 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 9) { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- } -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; - req->se->op.read(req, nodeid, arg->size, arg->offset, &fi); - } else { - fuse_reply_err(req, ENOSYS); -@@ -1023,13 +982,9 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - fi.fh = arg->fh; - fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0; - -- if (req->se->conn.proto_minor < 9) { -- param = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- param = PARAM(arg); -- } -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ param = PARAM(arg); - - if (req->se->op.write) { - req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); -@@ -1053,21 +1008,14 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - fi.fh = arg->fh; - fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; - -- if (se->conn.proto_minor < 9) { -- bufv.buf[0].mem = ((char *)arg) + FUSE_COMPAT_WRITE_IN_SIZE; -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + FUSE_COMPAT_WRITE_IN_SIZE; -- assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD)); -- } else { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- bufv.buf[0].mem = PARAM(arg); -- } -- -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; -+ if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -+ bufv.buf[0].mem = PARAM(arg); - } -+ -+ bufv.buf[0].size -= -+ sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); - if (bufv.buf[0].size < arg->size) { - fuse_log(FUSE_LOG_ERR, "fuse: do_write_buf: buffer size too small\n"); - fuse_reply_err(req, EIO); -@@ -1086,9 +1034,7 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.flush = 1; -- if (req->se->conn.proto_minor >= 7) { -- fi.lock_owner = arg->lock_owner; -- } -+ fi.lock_owner = arg->lock_owner; - - if (req->se->op.flush) { - req->se->op.flush(req, nodeid, &fi); -@@ -1105,10 +1051,8 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - fi.fh = arg->fh; -- if (req->se->conn.proto_minor >= 8) { -- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -- fi.lock_owner = arg->lock_owner; -- } -+ fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; -+ fi.lock_owner = arg->lock_owner; - if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { - fi.flock_release = 1; - fi.lock_owner = arg->lock_owner; -@@ -1477,8 +1421,7 @@ static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -- if (sizeof(void *) == 4 && req->se->conn.proto_minor >= 16 && -- !(flags & FUSE_IOCTL_32BIT)) { -+ if (sizeof(void *) == 4 && !(flags & FUSE_IOCTL_32BIT)) { - req->ioctl_64bit = 1; - } - -@@ -1603,7 +1546,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - outarg.major = FUSE_KERNEL_VERSION; - outarg.minor = FUSE_KERNEL_MINOR_VERSION; - -- if (arg->major < 7) { -+ if (arg->major < 7 || (arg->major == 7 && arg->minor < 31)) { - fuse_log(FUSE_LOG_ERR, "fuse: unsupported protocol version: %u.%u\n", - arg->major, arg->minor); - fuse_reply_err(req, EPROTO); -@@ -1616,81 +1559,71 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - return; - } - -- if (arg->minor >= 6) { -- if (arg->max_readahead < se->conn.max_readahead) { -- se->conn.max_readahead = arg->max_readahead; -- } -- if (arg->flags & FUSE_ASYNC_READ) { -- se->conn.capable |= FUSE_CAP_ASYNC_READ; -- } -- if (arg->flags & FUSE_POSIX_LOCKS) { -- se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -- } -- if (arg->flags & FUSE_ATOMIC_O_TRUNC) { -- se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -- } -- if (arg->flags & FUSE_EXPORT_SUPPORT) { -- se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -- } -- if (arg->flags & FUSE_DONT_MASK) { -- se->conn.capable |= FUSE_CAP_DONT_MASK; -- } -- if (arg->flags & FUSE_FLOCK_LOCKS) { -- se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -- } -- if (arg->flags & FUSE_AUTO_INVAL_DATA) { -- se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -- } -- if (arg->flags & FUSE_DO_READDIRPLUS) { -- se->conn.capable |= FUSE_CAP_READDIRPLUS; -- } -- if (arg->flags & FUSE_READDIRPLUS_AUTO) { -- se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -- } -- if (arg->flags & FUSE_ASYNC_DIO) { -- se->conn.capable |= FUSE_CAP_ASYNC_DIO; -- } -- if (arg->flags & FUSE_WRITEBACK_CACHE) { -- se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -- } -- if (arg->flags & FUSE_NO_OPEN_SUPPORT) { -- se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -- } -- if (arg->flags & FUSE_PARALLEL_DIROPS) { -- se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -- } -- if (arg->flags & FUSE_POSIX_ACL) { -- se->conn.capable |= FUSE_CAP_POSIX_ACL; -- } -- if (arg->flags & FUSE_HANDLE_KILLPRIV) { -- se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -- } -- if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { -- se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -- } -- if (!(arg->flags & FUSE_MAX_PAGES)) { -- size_t max_bufsize = -- FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + -- FUSE_BUFFER_HEADER_SIZE; -- if (bufsize > max_bufsize) { -- bufsize = max_bufsize; -- } -+ if (arg->max_readahead < se->conn.max_readahead) { -+ se->conn.max_readahead = arg->max_readahead; -+ } -+ if (arg->flags & FUSE_ASYNC_READ) { -+ se->conn.capable |= FUSE_CAP_ASYNC_READ; -+ } -+ if (arg->flags & FUSE_POSIX_LOCKS) { -+ se->conn.capable |= FUSE_CAP_POSIX_LOCKS; -+ } -+ if (arg->flags & FUSE_ATOMIC_O_TRUNC) { -+ se->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC; -+ } -+ if (arg->flags & FUSE_EXPORT_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_EXPORT_SUPPORT; -+ } -+ if (arg->flags & FUSE_DONT_MASK) { -+ se->conn.capable |= FUSE_CAP_DONT_MASK; -+ } -+ if (arg->flags & FUSE_FLOCK_LOCKS) { -+ se->conn.capable |= FUSE_CAP_FLOCK_LOCKS; -+ } -+ if (arg->flags & FUSE_AUTO_INVAL_DATA) { -+ se->conn.capable |= FUSE_CAP_AUTO_INVAL_DATA; -+ } -+ if (arg->flags & FUSE_DO_READDIRPLUS) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS; -+ } -+ if (arg->flags & FUSE_READDIRPLUS_AUTO) { -+ se->conn.capable |= FUSE_CAP_READDIRPLUS_AUTO; -+ } -+ if (arg->flags & FUSE_ASYNC_DIO) { -+ se->conn.capable |= FUSE_CAP_ASYNC_DIO; -+ } -+ if (arg->flags & FUSE_WRITEBACK_CACHE) { -+ se->conn.capable |= FUSE_CAP_WRITEBACK_CACHE; -+ } -+ if (arg->flags & FUSE_NO_OPEN_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPEN_SUPPORT; -+ } -+ if (arg->flags & FUSE_PARALLEL_DIROPS) { -+ se->conn.capable |= FUSE_CAP_PARALLEL_DIROPS; -+ } -+ if (arg->flags & FUSE_POSIX_ACL) { -+ se->conn.capable |= FUSE_CAP_POSIX_ACL; -+ } -+ if (arg->flags & FUSE_HANDLE_KILLPRIV) { -+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV; -+ } -+ if (arg->flags & FUSE_NO_OPENDIR_SUPPORT) { -+ se->conn.capable |= FUSE_CAP_NO_OPENDIR_SUPPORT; -+ } -+ if (!(arg->flags & FUSE_MAX_PAGES)) { -+ size_t max_bufsize = FUSE_DEFAULT_MAX_PAGES_PER_REQ * getpagesize() + -+ FUSE_BUFFER_HEADER_SIZE; -+ if (bufsize > max_bufsize) { -+ bufsize = max_bufsize; - } -- } else { -- se->conn.max_readahead = 0; - } -- -- if (se->conn.proto_minor >= 14) { - #ifdef HAVE_SPLICE - #ifdef HAVE_VMSPLICE -- se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; -+ se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; - #endif -- se->conn.capable |= FUSE_CAP_SPLICE_READ; -+ se->conn.capable |= FUSE_CAP_SPLICE_READ; - #endif -- } -- if (se->conn.proto_minor >= 18) { -- se->conn.capable |= FUSE_CAP_IOCTL_DIR; -- } -+ se->conn.capable |= FUSE_CAP_IOCTL_DIR; - - /* - * Default settings for modern filesystems. -@@ -1797,24 +1730,20 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - outarg.max_readahead = se->conn.max_readahead; - outarg.max_write = se->conn.max_write; -- if (se->conn.proto_minor >= 13) { -- if (se->conn.max_background >= (1 << 16)) { -- se->conn.max_background = (1 << 16) - 1; -- } -- if (se->conn.congestion_threshold > se->conn.max_background) { -- se->conn.congestion_threshold = se->conn.max_background; -- } -- if (!se->conn.congestion_threshold) { -- se->conn.congestion_threshold = se->conn.max_background * 3 / 4; -- } -- -- outarg.max_background = se->conn.max_background; -- outarg.congestion_threshold = se->conn.congestion_threshold; -+ if (se->conn.max_background >= (1 << 16)) { -+ se->conn.max_background = (1 << 16) - 1; -+ } -+ if (se->conn.congestion_threshold > se->conn.max_background) { -+ se->conn.congestion_threshold = se->conn.max_background; - } -- if (se->conn.proto_minor >= 23) { -- outarg.time_gran = se->conn.time_gran; -+ if (!se->conn.congestion_threshold) { -+ se->conn.congestion_threshold = se->conn.max_background * 3 / 4; - } - -+ outarg.max_background = se->conn.max_background; -+ outarg.congestion_threshold = se->conn.congestion_threshold; -+ outarg.time_gran = se->conn.time_gran; -+ - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, - outarg.minor); -@@ -1828,11 +1757,6 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - outarg.congestion_threshold); - fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); - } -- if (arg->minor < 5) { -- outargsize = FUSE_COMPAT_INIT_OUT_SIZE; -- } else if (arg->minor < 23) { -- outargsize = FUSE_COMPAT_22_INIT_OUT_SIZE; -- } - - send_reply_ok(req, &outarg, outargsize); - } -@@ -1896,10 +1820,6 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -- return -ENOSYS; -- } -- - outarg.ino = ino; - outarg.off = off; - outarg.len = len; -@@ -1920,10 +1840,6 @@ int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 12) { -- return -ENOSYS; -- } -- - outarg.parent = parent; - outarg.namelen = namelen; - outarg.padding = 0; -@@ -1947,10 +1863,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 18) { -- return -ENOSYS; -- } -- - outarg.parent = parent; - outarg.child = child; - outarg.namelen = namelen; -@@ -1977,10 +1889,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - return -EINVAL; - } - -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -- return -ENOSYS; -- } -- - out.unique = 0; - out.error = FUSE_NOTIFY_STORE; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch b/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch deleted file mode 100644 index a0882d5..0000000 --- a/kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch +++ /dev/null @@ -1,93 +0,0 @@ -From e4c8fd1060fb69a093064851ebf66dd82533ec0e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:17 +0100 -Subject: [PATCH 106/116] virtiofsd: add definition of fuse_buf_writev() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-103-dgilbert@redhat.com> -Patchwork-id: 93557 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 102/112] virtiofsd: add definition of fuse_buf_writev() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: piaojun - -Define fuse_buf_writev() which use pwritev and writev to improve io -bandwidth. Especially, the src bufs with 0 size should be skipped as -their mems are not *block_size* aligned which will cause writev failed -in direct io mode. - -Signed-off-by: Jun Piao -Suggested-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9ceaaa15cf21073c2b23058c374f61c30cd39c31) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 38 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 38 insertions(+) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 42a608f..37befeb 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -14,6 +14,7 @@ - #include "fuse_lowlevel.h" - #include - #include -+#include - #include - #include - -@@ -33,6 +34,43 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) - return size; - } - -+__attribute__((unused)) -+static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, -+ struct fuse_bufvec *in_buf) -+{ -+ ssize_t res, i, j; -+ size_t iovcnt = in_buf->count; -+ struct iovec *iov; -+ int fd = out_buf->fd; -+ -+ iov = calloc(iovcnt, sizeof(struct iovec)); -+ if (!iov) { -+ return -ENOMEM; -+ } -+ -+ for (i = 0, j = 0; i < iovcnt; i++) { -+ /* Skip the buf with 0 size */ -+ if (in_buf->buf[i].size) { -+ iov[j].iov_base = in_buf->buf[i].mem; -+ iov[j].iov_len = in_buf->buf[i].size; -+ j++; -+ } -+ } -+ -+ if (out_buf->flags & FUSE_BUF_FD_SEEK) { -+ res = pwritev(fd, iov, iovcnt, out_buf->pos); -+ } else { -+ res = writev(fd, iov, iovcnt); -+ } -+ -+ if (res == -1) { -+ res = -errno; -+ } -+ -+ free(iov); -+ return res; -+} -+ - static size_t min_size(size_t s1, size_t s2) - { - return s1 < s2 ? s1 : s2; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch b/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch deleted file mode 100644 index 451f12b..0000000 --- a/kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch +++ /dev/null @@ -1,170 +0,0 @@ -From f91a9bdc171142174110e9ff1716b611f6fb0039 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:07 +0100 -Subject: [PATCH 036/116] virtiofsd: add --fd=FDNUM fd passing option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-33-dgilbert@redhat.com> -Patchwork-id: 93487 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 032/112] virtiofsd: add --fd=FDNUM fd passing option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Although --socket-path=PATH is useful for manual invocations, management -tools typically create the UNIX domain socket themselves and pass it to -the vhost-user device backend. This way QEMU can be launched -immediately with a valid socket. No waiting for the vhost-user device -backend is required when fd passing is used. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit cee8e35d4386e34bf79c3ca2aab7f7b1bb48cf8d) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 16 ++++++++++++---- - tools/virtiofsd/fuse_virtio.c | 31 +++++++++++++++++++++++++------ - 3 files changed, 38 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 1126723..45995f3 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -68,6 +68,7 @@ struct fuse_session { - size_t bufsize; - int error; - char *vu_socket_path; -+ int vu_listen_fd; - int vu_socketfd; - struct fv_VuDev *virtio_dev; - }; -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 4f4684d..95f4db8 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2130,6 +2130,7 @@ static const struct fuse_opt fuse_ll_opts[] = { - LL_OPTION("--debug", debug, 1), - LL_OPTION("allow_root", deny_others, 1), - LL_OPTION("--socket-path=%s", vu_socket_path, 0), -+ LL_OPTION("--fd=%d", vu_listen_fd, 0), - FUSE_OPT_END - }; - -@@ -2147,7 +2148,8 @@ void fuse_lowlevel_help(void) - */ - printf( - " -o allow_root allow access by root\n" -- " --socket-path=PATH path for the vhost-user socket\n"); -+ " --socket-path=PATH path for the vhost-user socket\n" -+ " --fd=FDNUM fd number of vhost-user socket\n"); - } - - void fuse_session_destroy(struct fuse_session *se) -@@ -2191,6 +2193,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out1; - } - se->fd = -1; -+ se->vu_listen_fd = -1; - se->conn.max_write = UINT_MAX; - se->conn.max_readahead = UINT_MAX; - -@@ -2212,8 +2215,13 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - goto out4; - } - -- if (!se->vu_socket_path) { -- fprintf(stderr, "fuse: missing -o vhost_user_socket option\n"); -+ if (!se->vu_socket_path && se->vu_listen_fd < 0) { -+ fuse_log(FUSE_LOG_ERR, "fuse: missing --socket-path or --fd option\n"); -+ goto out4; -+ } -+ if (se->vu_socket_path && se->vu_listen_fd >= 0) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: --socket-path and --fd cannot be given together\n"); - goto out4; - } - -@@ -2253,7 +2261,7 @@ void fuse_session_unmount(struct fuse_session *se) - - int fuse_lowlevel_is_virtio(struct fuse_session *se) - { -- return se->vu_socket_path != NULL; -+ return !!se->virtio_dev; - } - - #ifdef linux -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 7e2711b..635f877 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -638,18 +638,21 @@ int virtio_loop(struct fuse_session *se) - return 0; - } - --int virtio_session_mount(struct fuse_session *se) -+static int fv_create_listen_socket(struct fuse_session *se) - { - struct sockaddr_un un; - mode_t old_umask; - -+ /* Nothing to do if fd is already initialized */ -+ if (se->vu_listen_fd >= 0) { -+ return 0; -+ } -+ - if (strlen(se->vu_socket_path) >= sizeof(un.sun_path)) { - fuse_log(FUSE_LOG_ERR, "Socket path too long\n"); - return -1; - } - -- se->fd = -1; -- - /* - * Create the Unix socket to communicate with qemu - * based on QEMU's vhost-user-bridge -@@ -682,15 +685,31 @@ int virtio_session_mount(struct fuse_session *se) - return -1; - } - -+ se->vu_listen_fd = listen_sock; -+ return 0; -+} -+ -+int virtio_session_mount(struct fuse_session *se) -+{ -+ int ret; -+ -+ ret = fv_create_listen_socket(se); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ se->fd = -1; -+ - fuse_log(FUSE_LOG_INFO, "%s: Waiting for vhost-user socket connection...\n", - __func__); -- int data_sock = accept(listen_sock, NULL, NULL); -+ int data_sock = accept(se->vu_listen_fd, NULL, NULL); - if (data_sock == -1) { - fuse_log(FUSE_LOG_ERR, "vhost socket accept: %m\n"); -- close(listen_sock); -+ close(se->vu_listen_fd); - return -1; - } -- close(listen_sock); -+ close(se->vu_listen_fd); -+ se->vu_listen_fd = -1; - fuse_log(FUSE_LOG_INFO, "%s: Received vhost-user socket connection\n", - __func__); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch b/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch deleted file mode 100644 index b874dc9..0000000 --- a/kvm-virtiofsd-add-fuse_mbuf_iter-API.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 1b0edd3d0a2ee5c097bcf3501c1dfa937f02e473 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:21 +0100 -Subject: [PATCH 050/116] virtiofsd: add fuse_mbuf_iter API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-47-dgilbert@redhat.com> -Patchwork-id: 93502 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 046/112] virtiofsd: add fuse_mbuf_iter API -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Introduce an API for consuming bytes from a buffer with size checks. -All FUSE operations will be converted to use this safe API instead of -void *inarg. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit dad157e880416ab3a0e45beaa0e81977516568bc) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 28 +++++++++++++++++++++++++ - tools/virtiofsd/fuse_common.h | 49 ++++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 76 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 772efa9..42a608f 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -267,3 +267,31 @@ ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) - - return copied; - } -+ -+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len) -+{ -+ void *ptr; -+ -+ if (len > iter->size - iter->pos) { -+ return NULL; -+ } -+ -+ ptr = iter->mem + iter->pos; -+ iter->pos += len; -+ return ptr; -+} -+ -+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter) -+{ -+ const char *str = iter->mem + iter->pos; -+ size_t remaining = iter->size - iter->pos; -+ size_t i; -+ -+ for (i = 0; i < remaining; i++) { -+ if (str[i] == '\0') { -+ iter->pos += i + 1; -+ return str; -+ } -+ } -+ return NULL; -+} -diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h -index 0cb33ac..f8f6433 100644 ---- a/tools/virtiofsd/fuse_common.h -+++ b/tools/virtiofsd/fuse_common.h -@@ -703,10 +703,57 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv); - */ - ssize_t fuse_buf_copy(struct fuse_bufvec *dst, struct fuse_bufvec *src); - -+/** -+ * Memory buffer iterator -+ * -+ */ -+struct fuse_mbuf_iter { -+ /** -+ * Data pointer -+ */ -+ void *mem; -+ -+ /** -+ * Total length, in bytes -+ */ -+ size_t size; -+ -+ /** -+ * Offset from start of buffer -+ */ -+ size_t pos; -+}; -+ -+/* Initialize memory buffer iterator from a fuse_buf */ -+#define FUSE_MBUF_ITER_INIT(fbuf) \ -+ ((struct fuse_mbuf_iter){ \ -+ .mem = fbuf->mem, \ -+ .size = fbuf->size, \ -+ .pos = 0, \ -+ }) -+ -+/** -+ * Consume bytes from a memory buffer iterator -+ * -+ * @param iter memory buffer iterator -+ * @param len number of bytes to consume -+ * @return pointer to start of consumed bytes or -+ * NULL if advancing beyond end of buffer -+ */ -+void *fuse_mbuf_iter_advance(struct fuse_mbuf_iter *iter, size_t len); -+ -+/** -+ * Consume a NUL-terminated string from a memory buffer iterator -+ * -+ * @param iter memory buffer iterator -+ * @return pointer to the string or -+ * NULL if advancing beyond end of buffer or there is no NUL-terminator -+ */ -+const char *fuse_mbuf_iter_advance_str(struct fuse_mbuf_iter *iter); -+ - /* - * Signal handling - */ -- - /** - * Exit session on HUP, TERM and INT signals and ignore PIPE signal - * --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch b/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch deleted file mode 100644 index bdef115..0000000 --- a/kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 7a3c94e10b087c06635ef72aadb1550184dd5c58 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:58 +0100 -Subject: [PATCH 087/116] virtiofsd: add helper for lo_data cleanup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-84-dgilbert@redhat.com> -Patchwork-id: 93538 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 083/112] virtiofsd: add helper for lo_data cleanup -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -This offers an helper function for lo_data's cleanup. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 18a69cbbb6a4caa7c2040c6db4a33b044a32be7e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++---------------- - 1 file changed, 21 insertions(+), 16 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 056ebe8..e8dc5c7 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2407,6 +2407,26 @@ static gboolean lo_key_equal(gconstpointer a, gconstpointer b) - return la->ino == lb->ino && la->dev == lb->dev; - } - -+static void fuse_lo_data_cleanup(struct lo_data *lo) -+{ -+ if (lo->inodes) { -+ g_hash_table_destroy(lo->inodes); -+ } -+ lo_map_destroy(&lo->fd_map); -+ lo_map_destroy(&lo->dirp_map); -+ lo_map_destroy(&lo->ino_map); -+ -+ if (lo->proc_self_fd >= 0) { -+ close(lo->proc_self_fd); -+ } -+ -+ if (lo->root.fd >= 0) { -+ close(lo->root.fd); -+ } -+ -+ free(lo->source); -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2554,22 +2574,7 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -- if (lo.inodes) { -- g_hash_table_destroy(lo.inodes); -- } -- lo_map_destroy(&lo.fd_map); -- lo_map_destroy(&lo.dirp_map); -- lo_map_destroy(&lo.ino_map); -- -- if (lo.proc_self_fd >= 0) { -- close(lo.proc_self_fd); -- } -- -- if (lo.root.fd >= 0) { -- close(lo.root.fd); -- } -- -- free(lo.source); -+ fuse_lo_data_cleanup(&lo); - - return ret ? 1 : 0; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch b/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch deleted file mode 100644 index 5e81663..0000000 --- a/kvm-virtiofsd-add-o-source-PATH-to-help-output.patch +++ /dev/null @@ -1,46 +0,0 @@ -From c55995c25f60168e3cb6b5bae1bf9a47813383d0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:55 +0100 -Subject: [PATCH 024/116] virtiofsd: add -o source=PATH to help output -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-21-dgilbert@redhat.com> -Patchwork-id: 93474 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 020/112] virtiofsd: add -o source=PATH to help output -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -The -o source=PATH option will be used by most command-line invocations. -Let's document it! - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 4ff075f72be2f489c8998ae492ec5cdbbbd73e07) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 26ac870..fc9b264 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1319,6 +1319,7 @@ int main(int argc, char *argv[]) - if (opts.show_help) { - printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); -+ printf(" -o source=PATH shared directory tree\n"); - fuse_lowlevel_help(); - ret = 0; - goto err_out1; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-print-capabilities-option.patch b/kvm-virtiofsd-add-print-capabilities-option.patch deleted file mode 100644 index b57e408..0000000 --- a/kvm-virtiofsd-add-print-capabilities-option.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 23d81ee7564084f29e32fedaed5196ae1a5a3240 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:10 +0100 -Subject: [PATCH 039/116] virtiofsd: add --print-capabilities option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-36-dgilbert@redhat.com> -Patchwork-id: 93486 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 035/112] virtiofsd: add --print-capabilities option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Add the --print-capabilities option as per vhost-user.rst "Backend -programs conventions". Currently there are no advertised features. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 45018fbb0a73ce66fd3dd87ecd2872b45658add4) -Signed-off-by: Miroslav Rezanina ---- - docs/interop/vhost-user.json | 4 +++- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 2 ++ - tools/virtiofsd/passthrough_ll.c | 12 ++++++++++++ - 4 files changed, 18 insertions(+), 1 deletion(-) - -diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json -index da6aaf5..d4ea1f7 100644 ---- a/docs/interop/vhost-user.json -+++ b/docs/interop/vhost-user.json -@@ -31,6 +31,7 @@ - # @rproc-serial: virtio remoteproc serial link - # @scsi: virtio scsi - # @vsock: virtio vsock transport -+# @fs: virtio fs (since 4.2) - # - # Since: 4.0 - ## -@@ -50,7 +51,8 @@ - 'rpmsg', - 'rproc-serial', - 'scsi', -- 'vsock' -+ 'vsock', -+ 'fs' - ] - } - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index f6b3470..0d61df8 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1794,6 +1794,7 @@ struct fuse_cmdline_opts { - int nodefault_subtype; - int show_version; - int show_help; -+ int print_capabilities; - unsigned int max_idle_threads; - }; - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index a3645fc..b8ec5ac 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -40,6 +40,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("--help", show_help), - FUSE_HELPER_OPT("-V", show_version), - FUSE_HELPER_OPT("--version", show_version), -+ FUSE_HELPER_OPT("--print-capabilities", print_capabilities), - FUSE_HELPER_OPT("-d", debug), - FUSE_HELPER_OPT("debug", debug), - FUSE_HELPER_OPT("-d", foreground), -@@ -135,6 +136,7 @@ void fuse_cmdline_help(void) - { - printf(" -h --help print help\n" - " -V --version print version\n" -+ " --print-capabilities print vhost-user.json\n" - " -d -o debug enable debug output (implies -f)\n" - " -f foreground operation\n" - " --daemonize run in background\n" -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 037c5d7..cd27c09 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1298,6 +1298,14 @@ static struct fuse_lowlevel_ops lo_oper = { - .lseek = lo_lseek, - }; - -+/* Print vhost-user.json backend program capabilities */ -+static void print_capabilities(void) -+{ -+ printf("{\n"); -+ printf(" \"type\": \"fs\"\n"); -+ printf("}\n"); -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -1328,6 +1336,10 @@ int main(int argc, char *argv[]) - fuse_lowlevel_version(); - ret = 0; - goto err_out1; -+ } else if (opts.print_capabilities) { -+ print_capabilities(); -+ ret = 0; -+ goto err_out1; - } - - if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-seccomp-whitelist.patch b/kvm-virtiofsd-add-seccomp-whitelist.patch deleted file mode 100644 index b34108e..0000000 --- a/kvm-virtiofsd-add-seccomp-whitelist.patch +++ /dev/null @@ -1,285 +0,0 @@ -From 58c4e9473b364fb62aac797b0d69fd8ddb02c8c7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:30 +0100 -Subject: [PATCH 059/116] virtiofsd: add seccomp whitelist -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-56-dgilbert@redhat.com> -Patchwork-id: 93511 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 055/112] virtiofsd: add seccomp whitelist -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Only allow system calls that are needed by virtiofsd. All other system -calls cause SIGSYS to be directed at the thread and the process will -coredump. - -Restricting system calls reduces the kernel attack surface and limits -what the process can do when compromised. - -Signed-off-by: Stefan Hajnoczi -with additional entries by: -Signed-off-by: Ganesh Maharaj Mahalingam -Signed-off-by: Masayoshi Mizuma -Signed-off-by: Misono Tomohiro -Signed-off-by: piaojun -Signed-off-by: Vivek Goyal -Signed-off-by: Eric Ren -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 4f8bde99c175ffd86b5125098a4707d43f5e80c6) - -Signed-off-by: Miroslav Rezanina ---- - Makefile | 5 +- - tools/virtiofsd/Makefile.objs | 5 +- - tools/virtiofsd/passthrough_ll.c | 2 + - tools/virtiofsd/seccomp.c | 151 +++++++++++++++++++++++++++++++++++++++ - tools/virtiofsd/seccomp.h | 14 ++++ - 5 files changed, 174 insertions(+), 3 deletions(-) - create mode 100644 tools/virtiofsd/seccomp.c - create mode 100644 tools/virtiofsd/seccomp.h - -diff --git a/Makefile b/Makefile -index 0e9755d..6879a06 100644 ---- a/Makefile -+++ b/Makefile -@@ -330,7 +330,7 @@ endif - endif - endif - --ifdef CONFIG_LINUX -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) - HELPERS-y += virtiofsd$(EXESUF) - vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json - endif -@@ -681,7 +681,8 @@ rdmacm-mux$(EXESUF): LIBS += "-libumad" - rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) - $(call LINK, $^) - --ifdef CONFIG_LINUX # relies on Linux-specific syscalls -+# relies on Linux-specific syscalls -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) - virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) - $(call LINK, $^) - endif -diff --git a/tools/virtiofsd/Makefile.objs b/tools/virtiofsd/Makefile.objs -index 45a8075..076f667 100644 ---- a/tools/virtiofsd/Makefile.objs -+++ b/tools/virtiofsd/Makefile.objs -@@ -5,5 +5,8 @@ virtiofsd-obj-y = buffer.o \ - fuse_signals.o \ - fuse_virtio.o \ - helper.o \ -- passthrough_ll.o -+ passthrough_ll.o \ -+ seccomp.o - -+seccomp.o-cflags := $(SECCOMP_CFLAGS) -+seccomp.o-libs := $(SECCOMP_LIBS) -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0947d14..bd8925b 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -59,6 +59,7 @@ - #include - - #include "passthrough_helpers.h" -+#include "seccomp.h" - - struct lo_map_elem { - union { -@@ -2091,6 +2092,7 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) - { - setup_namespaces(lo, se); - setup_mounts(lo->source); -+ setup_seccomp(); - } - - int main(int argc, char *argv[]) -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -new file mode 100644 -index 0000000..691fb63 ---- /dev/null -+++ b/tools/virtiofsd/seccomp.c -@@ -0,0 +1,151 @@ -+/* -+ * Seccomp sandboxing for virtiofsd -+ * -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "seccomp.h" -+#include "fuse_i.h" -+#include "fuse_log.h" -+#include -+#include -+#include -+#include -+ -+/* Bodge for libseccomp 2.4.2 which broke ppoll */ -+#if !defined(__SNR_ppoll) && defined(__SNR_brk) -+#ifdef __NR_ppoll -+#define __SNR_ppoll __NR_ppoll -+#else -+#define __SNR_ppoll __PNR_ppoll -+#endif -+#endif -+ -+static const int syscall_whitelist[] = { -+ /* TODO ireg sem*() syscalls */ -+ SCMP_SYS(brk), -+ SCMP_SYS(capget), /* For CAP_FSETID */ -+ SCMP_SYS(capset), -+ SCMP_SYS(clock_gettime), -+ SCMP_SYS(clone), -+#ifdef __NR_clone3 -+ SCMP_SYS(clone3), -+#endif -+ SCMP_SYS(close), -+ SCMP_SYS(copy_file_range), -+ SCMP_SYS(dup), -+ SCMP_SYS(eventfd2), -+ SCMP_SYS(exit), -+ SCMP_SYS(exit_group), -+ SCMP_SYS(fallocate), -+ SCMP_SYS(fchmodat), -+ SCMP_SYS(fchownat), -+ SCMP_SYS(fcntl), -+ SCMP_SYS(fdatasync), -+ SCMP_SYS(fgetxattr), -+ SCMP_SYS(flistxattr), -+ SCMP_SYS(flock), -+ SCMP_SYS(fremovexattr), -+ SCMP_SYS(fsetxattr), -+ SCMP_SYS(fstat), -+ SCMP_SYS(fstatfs), -+ SCMP_SYS(fsync), -+ SCMP_SYS(ftruncate), -+ SCMP_SYS(futex), -+ SCMP_SYS(getdents), -+ SCMP_SYS(getdents64), -+ SCMP_SYS(getegid), -+ SCMP_SYS(geteuid), -+ SCMP_SYS(getpid), -+ SCMP_SYS(gettid), -+ SCMP_SYS(gettimeofday), -+ SCMP_SYS(linkat), -+ SCMP_SYS(lseek), -+ SCMP_SYS(madvise), -+ SCMP_SYS(mkdirat), -+ SCMP_SYS(mknodat), -+ SCMP_SYS(mmap), -+ SCMP_SYS(mprotect), -+ SCMP_SYS(mremap), -+ SCMP_SYS(munmap), -+ SCMP_SYS(newfstatat), -+ SCMP_SYS(open), -+ SCMP_SYS(openat), -+ SCMP_SYS(ppoll), -+ SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ -+ SCMP_SYS(preadv), -+ SCMP_SYS(pread64), -+ SCMP_SYS(pwritev), -+ SCMP_SYS(pwrite64), -+ SCMP_SYS(read), -+ SCMP_SYS(readlinkat), -+ SCMP_SYS(recvmsg), -+ SCMP_SYS(renameat), -+ SCMP_SYS(renameat2), -+ SCMP_SYS(rt_sigaction), -+ SCMP_SYS(rt_sigprocmask), -+ SCMP_SYS(rt_sigreturn), -+ SCMP_SYS(sendmsg), -+ SCMP_SYS(setresgid), -+ SCMP_SYS(setresuid), -+#ifdef __NR_setresgid32 -+ SCMP_SYS(setresgid32), -+#endif -+#ifdef __NR_setresuid32 -+ SCMP_SYS(setresuid32), -+#endif -+ SCMP_SYS(set_robust_list), -+ SCMP_SYS(symlinkat), -+ SCMP_SYS(time), /* Rarely needed, except on static builds */ -+ SCMP_SYS(tgkill), -+ SCMP_SYS(unlinkat), -+ SCMP_SYS(utimensat), -+ SCMP_SYS(write), -+ SCMP_SYS(writev), -+}; -+ -+void setup_seccomp(void) -+{ -+ scmp_filter_ctx ctx; -+ size_t i; -+ -+#ifdef SCMP_ACT_KILL_PROCESS -+ ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); -+ /* Handle a newer libseccomp but an older kernel */ -+ if (!ctx && errno == EOPNOTSUPP) { -+ ctx = seccomp_init(SCMP_ACT_TRAP); -+ } -+#else -+ ctx = seccomp_init(SCMP_ACT_TRAP); -+#endif -+ if (!ctx) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_init() failed\n"); -+ exit(1); -+ } -+ -+ for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { -+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, -+ syscall_whitelist[i], 0) != 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", -+ syscall_whitelist[i]); -+ exit(1); -+ } -+ } -+ -+ /* libvhost-user calls this for post-copy migration, we don't need it */ -+ if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOSYS), -+ SCMP_SYS(userfaultfd), 0) != 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add userfaultfd failed\n"); -+ exit(1); -+ } -+ -+ if (seccomp_load(ctx) < 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_load() failed\n"); -+ exit(1); -+ } -+ -+ seccomp_release(ctx); -+} -diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h -new file mode 100644 -index 0000000..86bce72 ---- /dev/null -+++ b/tools/virtiofsd/seccomp.h -@@ -0,0 +1,14 @@ -+/* -+ * Seccomp sandboxing for virtiofsd -+ * -+ * Copyright (C) 2019 Red Hat, Inc. -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#ifndef VIRTIOFSD_SECCOMP_H -+#define VIRTIOFSD_SECCOMP_H -+ -+void setup_seccomp(void); -+ -+#endif /* VIRTIOFSD_SECCOMP_H */ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-some-options-to-the-help-message.patch b/kvm-virtiofsd-add-some-options-to-the-help-message.patch deleted file mode 100644 index ac6dc54..0000000 --- a/kvm-virtiofsd-add-some-options-to-the-help-message.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 6d62abb99b6b918f05f099b01a99f4326a69d650 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:26 +0100 -Subject: [PATCH 115/116] virtiofsd: add some options to the help message -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-112-dgilbert@redhat.com> -Patchwork-id: 93565 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 111/112] virtiofsd: add some options to the help message -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Masayoshi Mizuma - -Add following options to the help message: -- cache -- flock|no_flock -- norace -- posix_lock|no_posix_lock -- readdirplus|no_readdirplus -- timeout -- writeback|no_writeback -- xattr|no_xattr - -Signed-off-by: Masayoshi Mizuma - -dgilbert: Split cache, norace, posix_lock, readdirplus off - into our own earlier patches that added the options - -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1d59b1b210d7c3b0bdf4b10ebe0bb1fccfcb8b95) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index f98d8f2..0801cf7 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -148,6 +148,8 @@ void fuse_cmdline_help(void) - " -o cache= cache mode. could be one of \"auto, " - "always, none\"\n" - " default: auto\n" -+ " -o flock|no_flock enable/disable flock\n" -+ " default: no_flock\n" - " -o log_level= log level, default to \"info\"\n" - " level could be one of \"debug, " - "info, warn, err\"\n" -@@ -163,7 +165,13 @@ void fuse_cmdline_help(void) - " enable/disable readirplus\n" - " default: readdirplus except with " - "cache=none\n" -- ); -+ " -o timeout= I/O timeout (second)\n" -+ " default: depends on cache= option.\n" -+ " -o writeback|no_writeback enable/disable writeback cache\n" -+ " default: no_writeback\n" -+ " -o xattr|no_xattr enable/disable xattr\n" -+ " default: no_xattr\n" -+ ); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-syslog-command-line-option.patch b/kvm-virtiofsd-add-syslog-command-line-option.patch deleted file mode 100644 index 5b55342..0000000 --- a/kvm-virtiofsd-add-syslog-command-line-option.patch +++ /dev/null @@ -1,239 +0,0 @@ -From 6f5cf644bebc189bdb16f1caf3d7c47835d7c287 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:36 +0100 -Subject: [PATCH 065/116] virtiofsd: add --syslog command-line option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-62-dgilbert@redhat.com> -Patchwork-id: 93509 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 061/112] virtiofsd: add --syslog command-line option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Sometimes collecting output from stderr is inconvenient or does not fit -within the overall logging architecture. Add syslog(3) support for -cases where stderr cannot be used. - -Signed-off-by: Stefan Hajnoczi -dgilbert: Reworked as a logging function -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f185621d41f03a23b55795b89e6584253fa23505) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 2 ++ - tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++--- - tools/virtiofsd/seccomp.c | 32 +++++++++++++++++-------- - tools/virtiofsd/seccomp.h | 4 +++- - 5 files changed, 76 insertions(+), 13 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 0d61df8..f2750bc 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1795,6 +1795,7 @@ struct fuse_cmdline_opts { - int show_version; - int show_help; - int print_capabilities; -+ int syslog; - unsigned int max_idle_threads; - }; - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5531425..9692ef9 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -54,6 +54,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("subtype=", nodefault_subtype), - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), -+ FUSE_HELPER_OPT("--syslog", syslog), - FUSE_OPT_END - }; - -@@ -138,6 +139,7 @@ void fuse_cmdline_help(void) - " -V --version print version\n" - " --print-capabilities print vhost-user.json\n" - " -d -o debug enable debug output (implies -f)\n" -+ " --syslog log to syslog (default stderr)\n" - " -f foreground operation\n" - " --daemonize run in background\n" - " -o max_idle_threads the maximum number of idle worker " -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c281d81..0372aca 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -58,6 +58,7 @@ - #include - #include - #include -+#include - #include - - #include "passthrough_helpers.h" -@@ -138,6 +139,7 @@ static const struct fuse_opt lo_opts[] = { - { "norace", offsetof(struct lo_data, norace), 1 }, - FUSE_OPT_END - }; -+static bool use_syslog = false; - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - -@@ -2262,11 +2264,12 @@ static void setup_mounts(const char *source) - * Lock down this process to prevent access to other processes or files outside - * source directory. This reduces the impact of arbitrary code execution bugs. - */ --static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) -+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, -+ bool enable_syslog) - { - setup_namespaces(lo, se); - setup_mounts(lo->source); -- setup_seccomp(); -+ setup_seccomp(enable_syslog); - } - - /* Raise the maximum number of open file descriptors */ -@@ -2298,6 +2301,42 @@ static void setup_nofile_rlimit(void) - } - } - -+static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) -+{ -+ if (use_syslog) { -+ int priority = LOG_ERR; -+ switch (level) { -+ case FUSE_LOG_EMERG: -+ priority = LOG_EMERG; -+ break; -+ case FUSE_LOG_ALERT: -+ priority = LOG_ALERT; -+ break; -+ case FUSE_LOG_CRIT: -+ priority = LOG_CRIT; -+ break; -+ case FUSE_LOG_ERR: -+ priority = LOG_ERR; -+ break; -+ case FUSE_LOG_WARNING: -+ priority = LOG_WARNING; -+ break; -+ case FUSE_LOG_NOTICE: -+ priority = LOG_NOTICE; -+ break; -+ case FUSE_LOG_INFO: -+ priority = LOG_INFO; -+ break; -+ case FUSE_LOG_DEBUG: -+ priority = LOG_DEBUG; -+ break; -+ } -+ vsyslog(priority, fmt, ap); -+ } else { -+ vfprintf(stderr, fmt, ap); -+ } -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2336,6 +2375,11 @@ int main(int argc, char *argv[]) - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; - } -+ fuse_set_log_func(log_func); -+ use_syslog = opts.syslog; -+ if (use_syslog) { -+ openlog("virtiofsd", LOG_PID, LOG_DAEMON); -+ } - if (opts.show_help) { - printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); -@@ -2424,7 +2468,7 @@ int main(int argc, char *argv[]) - /* Must be before sandbox since it wants /proc */ - setup_capng(); - -- setup_sandbox(&lo, se); -+ setup_sandbox(&lo, se, opts.syslog); - - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); -diff --git a/tools/virtiofsd/seccomp.c b/tools/virtiofsd/seccomp.c -index 691fb63..2d9d4a7 100644 ---- a/tools/virtiofsd/seccomp.c -+++ b/tools/virtiofsd/seccomp.c -@@ -107,11 +107,28 @@ static const int syscall_whitelist[] = { - SCMP_SYS(writev), - }; - --void setup_seccomp(void) -+/* Syscalls used when --syslog is enabled */ -+static const int syscall_whitelist_syslog[] = { -+ SCMP_SYS(sendto), -+}; -+ -+static void add_whitelist(scmp_filter_ctx ctx, const int syscalls[], size_t len) - { -- scmp_filter_ctx ctx; - size_t i; - -+ for (i = 0; i < len; i++) { -+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscalls[i], 0) != 0) { -+ fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d failed\n", -+ syscalls[i]); -+ exit(1); -+ } -+ } -+} -+ -+void setup_seccomp(bool enable_syslog) -+{ -+ scmp_filter_ctx ctx; -+ - #ifdef SCMP_ACT_KILL_PROCESS - ctx = seccomp_init(SCMP_ACT_KILL_PROCESS); - /* Handle a newer libseccomp but an older kernel */ -@@ -126,13 +143,10 @@ void setup_seccomp(void) - exit(1); - } - -- for (i = 0; i < G_N_ELEMENTS(syscall_whitelist); i++) { -- if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, -- syscall_whitelist[i], 0) != 0) { -- fuse_log(FUSE_LOG_ERR, "seccomp_rule_add syscall %d", -- syscall_whitelist[i]); -- exit(1); -- } -+ add_whitelist(ctx, syscall_whitelist, G_N_ELEMENTS(syscall_whitelist)); -+ if (enable_syslog) { -+ add_whitelist(ctx, syscall_whitelist_syslog, -+ G_N_ELEMENTS(syscall_whitelist_syslog)); - } - - /* libvhost-user calls this for post-copy migration, we don't need it */ -diff --git a/tools/virtiofsd/seccomp.h b/tools/virtiofsd/seccomp.h -index 86bce72..d47c8ea 100644 ---- a/tools/virtiofsd/seccomp.h -+++ b/tools/virtiofsd/seccomp.h -@@ -9,6 +9,8 @@ - #ifndef VIRTIOFSD_SECCOMP_H - #define VIRTIOFSD_SECCOMP_H - --void setup_seccomp(void); -+#include -+ -+void setup_seccomp(bool enable_syslog); - - #endif /* VIRTIOFSD_SECCOMP_H */ --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch b/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch deleted file mode 100644 index 0241a9d..0000000 --- a/kvm-virtiofsd-add-thread-pool-size-NUM-option.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 3dbfb932288eb5a55dfdc0eebca7e4c7f0cf6f33 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:22 +0100 -Subject: [PATCH 111/116] virtiofsd: add --thread-pool-size=NUM option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-108-dgilbert@redhat.com> -Patchwork-id: 93561 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 107/112] virtiofsd: add --thread-pool-size=NUM option -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Add an option to control the size of the thread pool. Requests are now -processed in parallel by default. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 951b3120dbc971f08681e1d860360e4a1e638902) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 7 ++++++- - tools/virtiofsd/fuse_virtio.c | 5 +++-- - 3 files changed, 10 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index 1447d86..4e47e58 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -72,6 +72,7 @@ struct fuse_session { - int vu_listen_fd; - int vu_socketfd; - struct fv_VuDev *virtio_dev; -+ int thread_pool_size; - }; - - struct fuse_chan { -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 79a4031..de2e2e0 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -28,6 +28,7 @@ - #include - #include - -+#define THREAD_POOL_SIZE 64 - - #define OFFSET_MAX 0x7fffffffffffffffLL - -@@ -2519,6 +2520,7 @@ static const struct fuse_opt fuse_ll_opts[] = { - LL_OPTION("allow_root", deny_others, 1), - LL_OPTION("--socket-path=%s", vu_socket_path, 0), - LL_OPTION("--fd=%d", vu_listen_fd, 0), -+ LL_OPTION("--thread-pool-size=%d", thread_pool_size, 0), - FUSE_OPT_END - }; - -@@ -2537,7 +2539,9 @@ void fuse_lowlevel_help(void) - printf( - " -o allow_root allow access by root\n" - " --socket-path=PATH path for the vhost-user socket\n" -- " --fd=FDNUM fd number of vhost-user socket\n"); -+ " --fd=FDNUM fd number of vhost-user socket\n" -+ " --thread-pool-size=NUM thread pool size limit (default %d)\n", -+ THREAD_POOL_SIZE); - } - - void fuse_session_destroy(struct fuse_session *se) -@@ -2591,6 +2595,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - } - se->fd = -1; - se->vu_listen_fd = -1; -+ se->thread_pool_size = THREAD_POOL_SIZE; - se->conn.max_write = UINT_MAX; - se->conn.max_readahead = UINT_MAX; - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 0dcf2ef..9f65823 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -572,10 +572,11 @@ static void *fv_queue_thread(void *opaque) - struct fv_QueueInfo *qi = opaque; - struct VuDev *dev = &qi->virtio_dev->dev; - struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ struct fuse_session *se = qi->virtio_dev->se; - GThreadPool *pool; - -- pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, -- TRUE, NULL); -+ pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE, -+ NULL); - if (!pool) { - fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); - return NULL; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-add-vhost-user.json-file.patch b/kvm-virtiofsd-add-vhost-user.json-file.patch deleted file mode 100644 index a24b24f..0000000 --- a/kvm-virtiofsd-add-vhost-user.json-file.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 77eb3258e76a1ac240503572d4f41d45cb832ba2 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:09 +0100 -Subject: [PATCH 038/116] virtiofsd: add vhost-user.json file -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-35-dgilbert@redhat.com> -Patchwork-id: 93490 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 034/112] virtiofsd: add vhost-user.json file -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Install a vhost-user.json file describing virtiofsd. This allows -libvirt and other management tools to enumerate vhost-user backend -programs. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 315616ed50ba15a5d7236ade8a402a93898202de) -Signed-off-by: Miroslav Rezanina ---- - .gitignore | 1 + - Makefile | 1 + - tools/virtiofsd/50-qemu-virtiofsd.json.in | 5 +++++ - 3 files changed, 7 insertions(+) - create mode 100644 tools/virtiofsd/50-qemu-virtiofsd.json.in - -diff --git a/.gitignore b/.gitignore -index aefad32..d7a4f99 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -6,6 +6,7 @@ - /config-target.* - /config.status - /config-temp -+/tools/virtiofsd/50-qemu-virtiofsd.json - /elf2dmp - /trace-events-all - /trace/generated-events.h -diff --git a/Makefile b/Makefile -index 1526775..0e9755d 100644 ---- a/Makefile -+++ b/Makefile -@@ -332,6 +332,7 @@ endif - - ifdef CONFIG_LINUX - HELPERS-y += virtiofsd$(EXESUF) -+vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json - endif - - # Sphinx does not allow building manuals into the same directory as -diff --git a/tools/virtiofsd/50-qemu-virtiofsd.json.in b/tools/virtiofsd/50-qemu-virtiofsd.json.in -new file mode 100644 -index 0000000..9bcd86f ---- /dev/null -+++ b/tools/virtiofsd/50-qemu-virtiofsd.json.in -@@ -0,0 +1,5 @@ -+{ -+ "description": "QEMU virtiofsd vhost-user-fs", -+ "type": "fs", -+ "binary": "@libexecdir@/virtiofsd" -+} --- -1.8.3.1 - diff --git a/kvm-virtiofsd-cap-ng-helpers.patch b/kvm-virtiofsd-cap-ng-helpers.patch deleted file mode 100644 index 305745d..0000000 --- a/kvm-virtiofsd-cap-ng-helpers.patch +++ /dev/null @@ -1,175 +0,0 @@ -From f62613d8058bcb60b26727d980a37537103b0033 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:32 +0100 -Subject: [PATCH 061/116] virtiofsd: cap-ng helpers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-58-dgilbert@redhat.com> -Patchwork-id: 93512 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 057/112] virtiofsd: cap-ng helpers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -libcap-ng reads /proc during capng_get_caps_process, and virtiofsd's -sandboxing doesn't have /proc mounted; thus we have to do the -caps read before we sandbox it and save/restore the state. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2405f3c0d19eb4d516a88aa4e5c54e5f9c6bbea3) -Signed-off-by: Miroslav Rezanina ---- - Makefile | 4 +-- - tools/virtiofsd/passthrough_ll.c | 72 ++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 74 insertions(+), 2 deletions(-) - -diff --git a/Makefile b/Makefile -index 6879a06..ff05c30 100644 ---- a/Makefile -+++ b/Makefile -@@ -330,7 +330,7 @@ endif - endif - endif - --ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) - HELPERS-y += virtiofsd$(EXESUF) - vhost-user-json-y += tools/virtiofsd/50-qemu-virtiofsd.json - endif -@@ -682,7 +682,7 @@ rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS) - $(call LINK, $^) - - # relies on Linux-specific syscalls --ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP),yy) -+ifeq ($(CONFIG_LINUX)$(CONFIG_SECCOMP)$(CONFIG_LIBCAP_NG),yyy) - virtiofsd$(EXESUF): $(virtiofsd-obj-y) libvhost-user.a $(COMMON_LDADDS) - $(call LINK, $^) - endif -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index bd8925b..97e7c75 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -39,6 +39,7 @@ - #include "fuse_virtio.h" - #include "fuse_lowlevel.h" - #include -+#include - #include - #include - #include -@@ -139,6 +140,13 @@ static const struct fuse_opt lo_opts[] = { - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - -+static struct { -+ pthread_mutex_t mutex; -+ void *saved; -+} cap; -+/* That we loaded cap-ng in the current thread from the saved */ -+static __thread bool cap_loaded = 0; -+ - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); - - static int is_dot_or_dotdot(const char *name) -@@ -162,6 +170,37 @@ static struct lo_data *lo_data(fuse_req_t req) - return (struct lo_data *)fuse_req_userdata(req); - } - -+/* -+ * Load capng's state from our saved state if the current thread -+ * hadn't previously been loaded. -+ * returns 0 on success -+ */ -+static int load_capng(void) -+{ -+ if (!cap_loaded) { -+ pthread_mutex_lock(&cap.mutex); -+ capng_restore_state(&cap.saved); -+ /* -+ * restore_state free's the saved copy -+ * so make another. -+ */ -+ cap.saved = capng_save_state(); -+ if (!cap.saved) { -+ fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); -+ return -EINVAL; -+ } -+ pthread_mutex_unlock(&cap.mutex); -+ -+ /* -+ * We want to use the loaded state for our pid, -+ * not the original -+ */ -+ capng_setpid(syscall(SYS_gettid)); -+ cap_loaded = true; -+ } -+ return 0; -+} -+ - static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; -@@ -2024,6 +2063,35 @@ static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) - } - - /* -+ * Capture the capability state, we'll need to restore this for individual -+ * threads later; see load_capng. -+ */ -+static void setup_capng(void) -+{ -+ /* Note this accesses /proc so has to happen before the sandbox */ -+ if (capng_get_caps_process()) { -+ fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); -+ exit(1); -+ } -+ pthread_mutex_init(&cap.mutex, NULL); -+ pthread_mutex_lock(&cap.mutex); -+ cap.saved = capng_save_state(); -+ if (!cap.saved) { -+ fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); -+ exit(1); -+ } -+ pthread_mutex_unlock(&cap.mutex); -+} -+ -+static void cleanup_capng(void) -+{ -+ free(cap.saved); -+ cap.saved = NULL; -+ pthread_mutex_destroy(&cap.mutex); -+} -+ -+ -+/* - * Make the source directory our root so symlinks cannot escape and no other - * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. - */ -@@ -2216,12 +2284,16 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -+ /* Must be before sandbox since it wants /proc */ -+ setup_capng(); -+ - setup_sandbox(&lo, se); - - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - - fuse_session_unmount(se); -+ cleanup_capng(); - err_out3: - fuse_remove_signal_handlers(se); - err_out2: --- -1.8.3.1 - diff --git a/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch b/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch deleted file mode 100644 index caa4560..0000000 --- a/kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch +++ /dev/null @@ -1,1111 +0,0 @@ -From d6a0067e6c08523a8f605f775be980eaf0a23690 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:23 +0100 -Subject: [PATCH 052/116] virtiofsd: check input buffer size in fuse_lowlevel.c - ops -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-49-dgilbert@redhat.com> -Patchwork-id: 93503 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 048/112] virtiofsd: check input buffer size in fuse_lowlevel.c ops -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Each FUSE operation involves parsing the input buffer. Currently the -code assumes the input buffer is large enough for the expected -arguments. This patch uses fuse_mbuf_iter to check the size. - -Most operations are simple to convert. Some are more complicated due to -variable-length inputs or different sizes depending on the protocol -version. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 70995754416eb4491c31607fe380a83cfd25a087) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 581 +++++++++++++++++++++++++++++++--------- - 1 file changed, 456 insertions(+), 125 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 611e8b0..02e1d83 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -27,7 +28,6 @@ - #include - - --#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) - #define OFFSET_MAX 0x7fffffffffffffffLL - - struct fuse_pollhandle { -@@ -706,9 +706,14 @@ int fuse_reply_lseek(fuse_req_t req, off_t off) - return send_reply_ok(req, &arg, sizeof(arg)); - } - --static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.lookup) { - req->se->op.lookup(req, nodeid, name); -@@ -717,9 +722,16 @@ static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_forget(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_forget_in *arg = (struct fuse_forget_in *)inarg; -+ struct fuse_forget_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.forget) { - req->se->op.forget(req, nodeid, arg->nlookup); -@@ -729,20 +741,48 @@ static void do_forget(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg) -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_batch_forget_in *arg = (void *)inarg; -- struct fuse_forget_one *param = (void *)PARAM(arg); -- unsigned int i; -+ struct fuse_batch_forget_in *arg; -+ struct fuse_forget_data *forgets; -+ size_t scount; - - (void)nodeid; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_none(req); -+ return; -+ } -+ -+ /* -+ * Prevent integer overflow. The compiler emits the following warning -+ * unless we use the scount local variable: -+ * -+ * error: comparison is always false due to limited range of data type -+ * [-Werror=type-limits] -+ * -+ * This may be true on 64-bit hosts but we need this check for 32-bit -+ * hosts. -+ */ -+ scount = arg->count; -+ if (scount > SIZE_MAX / sizeof(forgets[0])) { -+ fuse_reply_none(req); -+ return; -+ } -+ -+ forgets = fuse_mbuf_iter_advance(iter, arg->count * sizeof(forgets[0])); -+ if (!forgets) { -+ fuse_reply_none(req); -+ return; -+ } -+ - if (req->se->op.forget_multi) { -- req->se->op.forget_multi(req, arg->count, -- (struct fuse_forget_data *)param); -+ req->se->op.forget_multi(req, arg->count, forgets); - } else if (req->se->op.forget) { -+ unsigned int i; -+ - for (i = 0; i < arg->count; i++) { -- struct fuse_forget_one *forget = ¶m[i]; - struct fuse_req *dummy_req; - - dummy_req = fuse_ll_alloc_req(req->se); -@@ -754,7 +794,7 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, - dummy_req->ctx = req->ctx; - dummy_req->ch = NULL; - -- req->se->op.forget(dummy_req, forget->nodeid, forget->nlookup); -+ req->se->op.forget(dummy_req, forgets[i].ino, forgets[i].nlookup); - } - fuse_reply_none(req); - } else { -@@ -762,12 +802,19 @@ static void do_batch_forget(fuse_req_t req, fuse_ino_t nodeid, - } - } - --static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { - struct fuse_file_info *fip = NULL; - struct fuse_file_info fi; - -- struct fuse_getattr_in *arg = (struct fuse_getattr_in *)inarg; -+ struct fuse_getattr_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (arg->getattr_flags & FUSE_GETATTR_FH) { - memset(&fi, 0, sizeof(fi)); -@@ -782,14 +829,21 @@ static void do_getattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inarg; -- - if (req->se->op.setattr) { -+ struct fuse_setattr_in *arg; - struct fuse_file_info *fi = NULL; - struct fuse_file_info fi_store; - struct stat stbuf; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&stbuf, 0, sizeof(stbuf)); - convert_attr(arg, &stbuf); - if (arg->valid & FATTR_FH) { -@@ -810,9 +864,16 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_access(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_access_in *arg = (struct fuse_access_in *)inarg; -+ struct fuse_access_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.access) { - req->se->op.access(req, nodeid, arg->mask); -@@ -821,9 +882,10 @@ static void do_access(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- (void)inarg; -+ (void)iter; - - if (req->se->op.readlink) { - req->se->op.readlink(req, nodeid); -@@ -832,10 +894,18 @@ static void do_readlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_mknod_in *arg = (struct fuse_mknod_in *)inarg; -- char *name = PARAM(arg); -+ struct fuse_mknod_in *arg; -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - req->ctx.umask = arg->umask; - -@@ -846,22 +916,37 @@ static void do_mknod(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_mkdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *)inarg; -+ struct fuse_mkdir_in *arg; -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - req->ctx.umask = arg->umask; - - if (req->se->op.mkdir) { -- req->se->op.mkdir(req, nodeid, PARAM(arg), arg->mode); -+ req->se->op.mkdir(req, nodeid, name, arg->mode); - } else { - fuse_reply_err(req, ENOSYS); - } - } - --static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.unlink) { - req->se->op.unlink(req, nodeid, name); -@@ -870,9 +955,15 @@ static void do_unlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.rmdir) { - req->se->op.rmdir(req, nodeid, name); -@@ -881,10 +972,16 @@ static void do_rmdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -- char *linkname = ((char *)inarg) + strlen((char *)inarg) + 1; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ const char *linkname = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name || !linkname) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.symlink) { - req->se->op.symlink(req, linkname, nodeid, name); -@@ -893,11 +990,20 @@ static void do_symlink(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_rename(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_rename_in *arg = (struct fuse_rename_in *)inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename_in *arg; -+ const char *oldname; -+ const char *newname; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ oldname = fuse_mbuf_iter_advance_str(iter); -+ newname = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !oldname || !newname) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.rename) { - req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, 0); -@@ -906,11 +1012,20 @@ static void do_rename(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_rename2_in *arg = (struct fuse_rename2_in *)inarg; -- char *oldname = PARAM(arg); -- char *newname = oldname + strlen(oldname) + 1; -+ struct fuse_rename2_in *arg; -+ const char *oldname; -+ const char *newname; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ oldname = fuse_mbuf_iter_advance_str(iter); -+ newname = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !oldname || !newname) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.rename) { - req->se->op.rename(req, nodeid, oldname, arg->newdir, newname, -@@ -920,24 +1035,38 @@ static void do_rename2(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_link(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_link(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_link_in *arg = (struct fuse_link_in *)inarg; -+ struct fuse_link_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.link) { -- req->se->op.link(req, arg->oldnodeid, nodeid, PARAM(arg)); -+ req->se->op.link(req, arg->oldnodeid, nodeid, name); - } else { - fuse_reply_err(req, ENOSYS); - } - } - --static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_create(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_create_in *arg = (struct fuse_create_in *)inarg; -- - if (req->se->op.create) { -+ struct fuse_create_in *arg; - struct fuse_file_info fi; -- char *name = PARAM(arg); -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; -@@ -950,11 +1079,18 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_open(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_open_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - -@@ -965,13 +1101,15 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_read(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -- - if (req->se->op.read) { -+ struct fuse_read_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.lock_owner = arg->lock_owner; -@@ -982,11 +1120,24 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_write(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_write_in *arg; - struct fuse_file_info fi; -- char *param; -+ const char *param; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ param = fuse_mbuf_iter_advance(iter, arg->size); -+ if (!param) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -@@ -994,7 +1145,6 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - - fi.lock_owner = arg->lock_owner; - fi.flags = arg->flags; -- param = PARAM(arg); - - if (req->se->op.write) { - req->se->op.write(req, nodeid, param, arg->size, arg->offset, &fi); -@@ -1052,11 +1202,18 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, - se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); - } - --static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_flush(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_flush_in *arg = (struct fuse_flush_in *)inarg; -+ struct fuse_flush_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.flush = 1; -@@ -1069,19 +1226,26 @@ static void do_flush(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_release(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_release_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - fi.fh = arg->fh; - fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0; - fi.lock_owner = arg->lock_owner; -+ - if (arg->release_flags & FUSE_RELEASE_FLOCK_UNLOCK) { - fi.flock_release = 1; -- fi.lock_owner = arg->lock_owner; - } - - if (req->se->op.release) { -@@ -1091,11 +1255,19 @@ static void do_release(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_fsync_in *arg; - struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ int datasync; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ datasync = arg->fsync_flags & 1; - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -@@ -1111,11 +1283,18 @@ static void do_fsync(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_open_in *arg = (struct fuse_open_in *)inarg; -+ struct fuse_open_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - -@@ -1126,11 +1305,18 @@ static void do_opendir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_read_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1141,11 +1327,18 @@ static void do_readdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_read_in *arg = (struct fuse_read_in *)inarg; -+ struct fuse_read_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1156,11 +1349,18 @@ static void do_readdirplus(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_release_in *arg = (struct fuse_release_in *)inarg; -+ struct fuse_release_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.flags = arg->flags; - fi.fh = arg->fh; -@@ -1172,11 +1372,19 @@ static void do_releasedir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_fsync_in *arg = (struct fuse_fsync_in *)inarg; -+ struct fuse_fsync_in *arg; - struct fuse_file_info fi; -- int datasync = arg->fsync_flags & 1; -+ int datasync; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ datasync = arg->fsync_flags & 1; - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; -@@ -1188,10 +1396,11 @@ static void do_fsyncdir(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { - (void)nodeid; -- (void)inarg; -+ (void)iter; - - if (req->se->op.statfs) { - req->se->op.statfs(req, nodeid); -@@ -1204,11 +1413,25 @@ static void do_statfs(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *)inarg; -- char *name = PARAM(arg); -- char *value = name + strlen(name) + 1; -+ struct fuse_setxattr_in *arg; -+ const char *name; -+ const char *value; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ value = fuse_mbuf_iter_advance(iter, arg->size); -+ if (!value) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.setxattr) { - req->se->op.setxattr(req, nodeid, name, value, arg->size, arg->flags); -@@ -1217,20 +1440,36 @@ static void do_setxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_getxattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; -+ struct fuse_getxattr_in *arg; -+ const char *name; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ name = fuse_mbuf_iter_advance_str(iter); -+ if (!arg || !name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.getxattr) { -- req->se->op.getxattr(req, nodeid, PARAM(arg), arg->size); -+ req->se->op.getxattr(req, nodeid, name, arg->size); - } else { - fuse_reply_err(req, ENOSYS); - } - } - --static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *)inarg; -+ struct fuse_getxattr_in *arg; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.listxattr) { - req->se->op.listxattr(req, nodeid, arg->size); -@@ -1239,9 +1478,15 @@ static void do_listxattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_removexattr(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- char *name = (char *)inarg; -+ const char *name = fuse_mbuf_iter_advance_str(iter); -+ -+ if (!name) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.removexattr) { - req->se->op.removexattr(req, nodeid, name); -@@ -1265,12 +1510,19 @@ static void convert_fuse_file_lock(struct fuse_file_lock *fl, - flock->l_pid = fl->pid; - } - --static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_lk_in *arg; - struct fuse_file_info fi; - struct flock flock; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.lock_owner = arg->owner; -@@ -1284,12 +1536,18 @@ static void do_getlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, int sleep) -+ struct fuse_mbuf_iter *iter, int sleep) - { -- struct fuse_lk_in *arg = (struct fuse_lk_in *)inarg; -+ struct fuse_lk_in *arg; - struct fuse_file_info fi; - struct flock flock; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.lock_owner = arg->owner; -@@ -1327,14 +1585,16 @@ static void do_setlk_common(fuse_req_t req, fuse_ino_t nodeid, - } - } - --static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setlk(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- do_setlk_common(req, nodeid, inarg, 0); -+ do_setlk_common(req, nodeid, iter, 0); - } - --static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_setlkw(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- do_setlk_common(req, nodeid, inarg, 1); -+ do_setlk_common(req, nodeid, iter, 1); - } - - static int find_interrupted(struct fuse_session *se, struct fuse_req *req) -@@ -1379,12 +1639,20 @@ static int find_interrupted(struct fuse_session *se, struct fuse_req *req) - return 0; - } - --static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_interrupt_in *arg = (struct fuse_interrupt_in *)inarg; -+ struct fuse_interrupt_in *arg; - struct fuse_session *se = req->se; - - (void)nodeid; -+ -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", - (unsigned long long)arg->unique); -@@ -1425,9 +1693,15 @@ static struct fuse_req *check_interrupt(struct fuse_session *se, - } - } - --static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_bmap_in *arg = (struct fuse_bmap_in *)inarg; -+ struct fuse_bmap_in *arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - if (req->se->op.bmap) { - req->se->op.bmap(req, nodeid, arg->blocksize, arg->block); -@@ -1436,18 +1710,34 @@ static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_ioctl(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_ioctl_in *arg = (struct fuse_ioctl_in *)inarg; -- unsigned int flags = arg->flags; -- void *in_buf = arg->in_size ? PARAM(arg) : NULL; -+ struct fuse_ioctl_in *arg; -+ unsigned int flags; -+ void *in_buf = NULL; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ flags = arg->flags; - if (flags & FUSE_IOCTL_DIR && !(req->se->conn.want & FUSE_CAP_IOCTL_DIR)) { - fuse_reply_err(req, ENOTTY); - return; - } - -+ if (arg->in_size) { -+ in_buf = fuse_mbuf_iter_advance(iter, arg->in_size); -+ if (!in_buf) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1468,11 +1758,18 @@ void fuse_pollhandle_destroy(struct fuse_pollhandle *ph) - free(ph); - } - --static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_poll(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_poll_in *arg = (struct fuse_poll_in *)inarg; -+ struct fuse_poll_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - fi.poll_events = arg->events; -@@ -1496,11 +1793,18 @@ static void do_poll(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_fallocate_in *arg = (struct fuse_fallocate_in *)inarg; -+ struct fuse_fallocate_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1513,12 +1817,17 @@ static void do_fallocate(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - - static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, -- const void *inarg) -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_copy_file_range_in *arg = -- (struct fuse_copy_file_range_in *)inarg; -+ struct fuse_copy_file_range_in *arg; - struct fuse_file_info fi_in, fi_out; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - memset(&fi_in, 0, sizeof(fi_in)); - fi_in.fh = arg->fh_in; - -@@ -1535,11 +1844,17 @@ static void do_copy_file_range(fuse_req_t req, fuse_ino_t nodeid_in, - } - } - --static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_lseek_in *arg = (struct fuse_lseek_in *)inarg; -+ struct fuse_lseek_in *arg; - struct fuse_file_info fi; - -+ arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; - -@@ -1550,15 +1865,33 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_init(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { -- struct fuse_init_in *arg = (struct fuse_init_in *)inarg; -+ size_t compat_size = offsetof(struct fuse_init_in, max_readahead); -+ struct fuse_init_in *arg; - struct fuse_init_out outarg; - struct fuse_session *se = req->se; - size_t bufsize = se->bufsize; - size_t outargsize = sizeof(outarg); - - (void)nodeid; -+ -+ /* First consume the old fields... */ -+ arg = fuse_mbuf_iter_advance(iter, compat_size); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ /* ...and now consume the new fields. */ -+ if (arg->major == 7 && arg->minor >= 6) { -+ if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ } -+ - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); - if (arg->major == 7 && arg->minor >= 6) { -@@ -1791,12 +2124,13 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - send_reply_ok(req, &outarg, outargsize); - } - --static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) -+static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter) - { - struct fuse_session *se = req->se; - - (void)nodeid; -- (void)inarg; -+ (void)iter; - - se->got_destroy = 1; - if (se->op.destroy) { -@@ -1976,7 +2310,7 @@ int fuse_req_interrupted(fuse_req_t req) - } - - static struct { -- void (*func)(fuse_req_t, fuse_ino_t, const void *); -+ void (*func)(fuse_req_t, fuse_ino_t, struct fuse_mbuf_iter *); - const char *name; - } fuse_ll_ops[] = { - [FUSE_LOOKUP] = { do_lookup, "LOOKUP" }, -@@ -2060,7 +2394,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, - const struct fuse_buf *buf = bufv->buf; - struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); - struct fuse_in_header *in; -- const void *inarg; - struct fuse_req *req; - int err; - -@@ -2138,13 +2471,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, - } - } - -- inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { - do_write_buf(req, in->nodeid, &iter, bufv); - } else { -- fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); -+ fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); - } -- - return; - - reply_err: --- -1.8.3.1 - diff --git a/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch b/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch deleted file mode 100644 index b6de0a9..0000000 --- a/kvm-virtiofsd-cleanup-allocated-resource-in-se.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 99ff67682ef7c5659bdc9836008541861ae313d5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:56 +0100 -Subject: [PATCH 085/116] virtiofsd: cleanup allocated resource in se -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-82-dgilbert@redhat.com> -Patchwork-id: 93533 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 081/112] virtiofsd: cleanup allocated resource in se -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -This cleans up unfreed resources in se on quiting, including -se->virtio_dev, se->vu_socket_path, se->vu_socketfd. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 61cfc44982e566c33b9d5df17858e4d5ae373873) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 7 +++++++ - tools/virtiofsd/fuse_virtio.c | 7 +++++++ - tools/virtiofsd/fuse_virtio.h | 2 +- - 3 files changed, 15 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 65f91da..440508a 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2532,6 +2532,13 @@ void fuse_session_destroy(struct fuse_session *se) - if (se->fd != -1) { - close(se->fd); - } -+ -+ if (se->vu_socket_path) { -+ virtio_session_close(se); -+ free(se->vu_socket_path); -+ se->vu_socket_path = NULL; -+ } -+ - free(se); - } - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 7a8774a..e7bd772 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -833,3 +833,10 @@ int virtio_session_mount(struct fuse_session *se) - - return 0; - } -+ -+void virtio_session_close(struct fuse_session *se) -+{ -+ close(se->vu_socketfd); -+ free(se->virtio_dev); -+ se->virtio_dev = NULL; -+} -diff --git a/tools/virtiofsd/fuse_virtio.h b/tools/virtiofsd/fuse_virtio.h -index cc676b9..1116840 100644 ---- a/tools/virtiofsd/fuse_virtio.h -+++ b/tools/virtiofsd/fuse_virtio.h -@@ -19,7 +19,7 @@ - struct fuse_session; - - int virtio_session_mount(struct fuse_session *se); -- -+void virtio_session_close(struct fuse_session *se); - int virtio_loop(struct fuse_session *se); - - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch b/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch deleted file mode 100644 index d01b000..0000000 --- a/kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch +++ /dev/null @@ -1,99 +0,0 @@ -From e00543b0384fba61a9c7274c73e11a25e7ab2946 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:13 +0100 -Subject: [PATCH 102/116] virtiofsd: convert more fprintf and perror to use - fuse log infra -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-99-dgilbert@redhat.com> -Patchwork-id: 93552 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 098/112] virtiofsd: convert more fprintf and perror to use fuse log infra -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eryu Guan - -Signed-off-by: Eryu Guan -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Misono Tomohiro -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fc1aed0bf96259d0b46b1cfea7497b7762c4ee3d) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_signals.c | 7 +++++-- - tools/virtiofsd/helper.c | 9 ++++++--- - 2 files changed, 11 insertions(+), 5 deletions(-) - -diff --git a/tools/virtiofsd/fuse_signals.c b/tools/virtiofsd/fuse_signals.c -index dc7c8ac..f18625b 100644 ---- a/tools/virtiofsd/fuse_signals.c -+++ b/tools/virtiofsd/fuse_signals.c -@@ -12,6 +12,7 @@ - #include "fuse_i.h" - #include "fuse_lowlevel.h" - -+#include - #include - #include - #include -@@ -47,13 +48,15 @@ static int set_one_signal_handler(int sig, void (*handler)(int), int remove) - sa.sa_flags = 0; - - if (sigaction(sig, NULL, &old_sa) == -1) { -- perror("fuse: cannot get old signal handler"); -+ fuse_log(FUSE_LOG_ERR, "fuse: cannot get old signal handler: %s\n", -+ strerror(errno)); - return -1; - } - - if (old_sa.sa_handler == (remove ? handler : SIG_DFL) && - sigaction(sig, &sa, NULL) == -1) { -- perror("fuse: cannot set signal handler"); -+ fuse_log(FUSE_LOG_ERR, "fuse: cannot set signal handler: %s\n", -+ strerror(errno)); - return -1; - } - return 0; -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 33749bf..f98d8f2 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -208,7 +208,8 @@ int fuse_daemonize(int foreground) - char completed; - - if (pipe(waiter)) { -- perror("fuse_daemonize: pipe"); -+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: pipe: %s\n", -+ strerror(errno)); - return -1; - } - -@@ -218,7 +219,8 @@ int fuse_daemonize(int foreground) - */ - switch (fork()) { - case -1: -- perror("fuse_daemonize: fork"); -+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: fork: %s\n", -+ strerror(errno)); - return -1; - case 0: - break; -@@ -228,7 +230,8 @@ int fuse_daemonize(int foreground) - } - - if (setsid() == -1) { -- perror("fuse_daemonize: setsid"); -+ fuse_log(FUSE_LOG_ERR, "fuse_daemonize: setsid: %s\n", -+ strerror(errno)); - return -1; - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch b/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch deleted file mode 100644 index 8c1022a..0000000 --- a/kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 8e6473e906dfc7d2a62abaf1ec80ff461e4d201d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:12 +0100 -Subject: [PATCH 101/116] virtiofsd: do not always set FUSE_FLOCK_LOCKS -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-98-dgilbert@redhat.com> -Patchwork-id: 93551 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 097/112] virtiofsd: do not always set FUSE_FLOCK_LOCKS -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Peng Tao - -Right now we always enable it regardless of given commandlines. -Fix it by setting the flag relying on the lo->flock bit. - -Signed-off-by: Peng Tao -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e468d4af5f5192ab33283464a9f6933044ce47f7) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ab16135..ccbbec1 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -546,9 +546,14 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); - conn->want |= FUSE_CAP_WRITEBACK_CACHE; - } -- if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -- conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { -+ if (lo->flock) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -+ conn->want |= FUSE_CAP_FLOCK_LOCKS; -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); -+ conn->want &= ~FUSE_CAP_FLOCK_LOCKS; -+ } - } - - if (conn->capable & FUSE_CAP_POSIX_LOCKS) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-do_read-missing-NULL-check.patch b/kvm-virtiofsd-do_read-missing-NULL-check.patch deleted file mode 100644 index 4f8e5ef..0000000 --- a/kvm-virtiofsd-do_read-missing-NULL-check.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 901c005299b0316bbca7bc190de56f6c7a2a9880 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:11 +0000 -Subject: [PATCH 15/18] virtiofsd: do_read missing NULL check -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-5-dgilbert@redhat.com> -Patchwork-id: 94127 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/7] virtiofsd: do_read missing NULL check -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -Missing a NULL check if the argument fetch fails. - -Fixes: Coverity CID 1413119 -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 99ce9a7e60fd12b213b985343ff8fcc172de59fd) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_lowlevel.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 01c418a..704c036 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1116,6 +1116,10 @@ static void do_read(fuse_req_t req, fuse_ino_t nodeid, - struct fuse_file_info fi; - - arg = fuse_mbuf_iter_advance(iter, sizeof(*arg)); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - memset(&fi, 0, sizeof(fi)); - fi.fh = arg->fh; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch b/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch deleted file mode 100644 index 3279a5e..0000000 --- a/kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch +++ /dev/null @@ -1,47 +0,0 @@ -From bc127914b29f2e4163bc7ca786e04ed955d96016 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:00 +0100 -Subject: [PATCH 089/116] virtiofsd: enable PARALLEL_DIROPS during INIT -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-86-dgilbert@redhat.com> -Patchwork-id: 93539 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 085/112] virtiofsd: enable PARALLEL_DIROPS during INIT -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -lookup is a RO operations, PARALLEL_DIROPS can be enabled. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b7ed733a3841c4d489d3bd6ca7ed23c84db119c2) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index aac282f..70568d2 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2062,6 +2062,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - if (se->conn.want & FUSE_CAP_ASYNC_READ) { - outarg.flags |= FUSE_ASYNC_READ; - } -+ if (se->conn.want & FUSE_CAP_PARALLEL_DIROPS) { -+ outarg.flags |= FUSE_PARALLEL_DIROPS; -+ } - if (se->conn.want & FUSE_CAP_POSIX_LOCKS) { - outarg.flags |= FUSE_POSIX_LOCKS; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch b/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch deleted file mode 100644 index 96f91a1..0000000 --- a/kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 983b383bc4a92a9f7ecff0332cadefed2f58f502 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:50 +0100 -Subject: [PATCH 079/116] virtiofsd: extract root inode init into setup_root() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-76-dgilbert@redhat.com> -Patchwork-id: 93527 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 075/112] virtiofsd: extract root inode init into setup_root() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Inititialize the root inode in a single place. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Stefan Hajnoczi -dgilbert: -with fix suggested by Misono Tomohiro -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 3ca8a2b1c83eb185c232a4e87abbb65495263756) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 35 +++++++++++++++++++++++++---------- - 1 file changed, 25 insertions(+), 10 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 33bfb4d..9e7191e 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2351,6 +2351,30 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - } - } - -+static void setup_root(struct lo_data *lo, struct lo_inode *root) -+{ -+ int fd, res; -+ struct stat stat; -+ -+ fd = open("/", O_PATH); -+ if (fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); -+ exit(1); -+ } -+ -+ res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); -+ exit(1); -+ } -+ -+ root->is_symlink = false; -+ root->fd = fd; -+ root->ino = stat.st_ino; -+ root->dev = stat.st_dev; -+ root->refcount = 2; -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2426,8 +2450,6 @@ int main(int argc, char *argv[]) - if (lo.debug) { - current_log_level = FUSE_LOG_DEBUG; - } -- lo.root.refcount = 2; -- - if (lo.source) { - struct stat stat; - int res; -@@ -2446,7 +2468,6 @@ int main(int argc, char *argv[]) - } else { - lo.source = "/"; - } -- lo.root.is_symlink = false; - if (!lo.timeout_set) { - switch (lo.cache) { - case CACHE_NEVER: -@@ -2466,13 +2487,6 @@ int main(int argc, char *argv[]) - exit(1); - } - -- lo.root.fd = open(lo.source, O_PATH); -- -- if (lo.root.fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); -- exit(1); -- } -- - se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); - if (se == NULL) { - goto err_out1; -@@ -2495,6 +2509,7 @@ int main(int argc, char *argv[]) - - setup_sandbox(&lo, se, opts.syslog); - -+ setup_root(&lo, &lo.root); - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch b/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch deleted file mode 100644 index 4860bec..0000000 --- a/kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch +++ /dev/null @@ -1,85 +0,0 @@ -From b3cd18ab58e331d3610cf00f857d6a945f11a030 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:49 +0100 -Subject: [PATCH 078/116] virtiofsd: fail when parent inode isn't known in - lo_do_lookup() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-75-dgilbert@redhat.com> -Patchwork-id: 93529 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 074/112] virtiofsd: fail when parent inode isn't known in lo_do_lookup() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -The Linux file handle APIs (struct export_operations) can access inodes -that are not attached to parents because path name traversal is not -performed. Refuse if there is no parent in lo_do_lookup(). - -Also clean up lo_do_lookup() while we're here. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9de4fab5995d115f8ebfb41d8d94a866d80a1708) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index de12e75..33bfb4d 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -777,6 +777,15 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_data *lo = lo_data(req); - struct lo_inode *inode, *dir = lo_inode(req, parent); - -+ /* -+ * name_to_handle_at() and open_by_handle_at() can reach here with fuse -+ * mount point in guest, but we don't have its inode info in the -+ * ino_map. -+ */ -+ if (!dir) { -+ return ENOENT; -+ } -+ - memset(e, 0, sizeof(*e)); - e->attr_timeout = lo->timeout; - e->entry_timeout = lo->timeout; -@@ -786,7 +795,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - name = "."; - } - -- newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); -+ newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); - if (newfd == -1) { - goto out_err; - } -@@ -796,7 +805,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out_err; - } - -- inode = lo_find(lo_data(req), &e->attr); -+ inode = lo_find(lo, &e->attr); - if (inode) { - close(newfd); - newfd = -1; -@@ -812,6 +821,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - inode->is_symlink = S_ISLNK(e->attr.st_mode); - inode->refcount = 1; - inode->fd = newfd; -+ newfd = -1; - inode->ino = e->attr.st_ino; - inode->dev = e->attr.st_dev; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-error-handling-in-main.patch b/kvm-virtiofsd-fix-error-handling-in-main.patch deleted file mode 100644 index a831992..0000000 --- a/kvm-virtiofsd-fix-error-handling-in-main.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 0ea1c7375d6509367399c706eb9d1e8cf79a5830 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:55 +0100 -Subject: [PATCH 084/116] virtiofsd: fix error handling in main() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-81-dgilbert@redhat.com> -Patchwork-id: 93534 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 080/112] virtiofsd: fix error handling in main() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -Neither fuse_parse_cmdline() nor fuse_opt_parse() goes to the right place -to do cleanup. - -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c6de804670f2255ce776263124c37f3370dc5ac1) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9ed77a1..af050c6 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2443,13 +2443,14 @@ int main(int argc, char *argv[]) - lo_map_init(&lo.fd_map); - - if (fuse_parse_cmdline(&args, &opts) != 0) { -- return 1; -+ goto err_out1; - } - fuse_set_log_func(log_func); - use_syslog = opts.syslog; - if (use_syslog) { - openlog("virtiofsd", LOG_PID, LOG_DAEMON); - } -+ - if (opts.show_help) { - printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); -@@ -2468,7 +2469,7 @@ int main(int argc, char *argv[]) - } - - if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { -- return 1; -+ goto err_out1; - } - - /* --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch b/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch deleted file mode 100644 index 420a8a6..0000000 --- a/kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 9c291ca8624318613ede6e4174d08cf45aae8384 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:01 +0100 -Subject: [PATCH 090/116] virtiofsd: fix incorrect error handling in - lo_do_lookup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-87-dgilbert@redhat.com> -Patchwork-id: 93543 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 086/112] virtiofsd: fix incorrect error handling in lo_do_lookup -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eric Ren - -Signed-off-by: Eric Ren -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fc3f0041b43b6c64aa97b3558a6abe1a10028354) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e8dc5c7..05b5f89 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -814,7 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - close(newfd); - newfd = -1; - } else { -- saverr = ENOMEM; - inode = calloc(1, sizeof(struct lo_inode)); - if (!inode) { - goto out_err; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-libfuse-information-leaks.patch b/kvm-virtiofsd-fix-libfuse-information-leaks.patch deleted file mode 100644 index 90debb0..0000000 --- a/kvm-virtiofsd-fix-libfuse-information-leaks.patch +++ /dev/null @@ -1,322 +0,0 @@ -From e0d64e481e5a9fab5ff90d2a8f84afcd3311d13b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:35 +0100 -Subject: [PATCH 064/116] virtiofsd: fix libfuse information leaks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-61-dgilbert@redhat.com> -Patchwork-id: 93515 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 060/112] virtiofsd: fix libfuse information leaks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Some FUSE message replies contain padding fields that are not -initialized by libfuse. This is fine in traditional FUSE applications -because the kernel is trusted. virtiofsd does not trust the guest and -must not expose uninitialized memory. - -Use C struct initializers to automatically zero out memory. Not all of -these code changes are strictly necessary but they will prevent future -information leaks if the structs are extended. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 3db2876a0153ac7103c077c53090e020faffb3ea) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 150 ++++++++++++++++++++-------------------- - 1 file changed, 76 insertions(+), 74 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 2d6dc5a..6ceb33d 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -44,21 +44,23 @@ static __attribute__((constructor)) void fuse_ll_init_pagesize(void) - - static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) - { -- attr->ino = stbuf->st_ino; -- attr->mode = stbuf->st_mode; -- attr->nlink = stbuf->st_nlink; -- attr->uid = stbuf->st_uid; -- attr->gid = stbuf->st_gid; -- attr->rdev = stbuf->st_rdev; -- attr->size = stbuf->st_size; -- attr->blksize = stbuf->st_blksize; -- attr->blocks = stbuf->st_blocks; -- attr->atime = stbuf->st_atime; -- attr->mtime = stbuf->st_mtime; -- attr->ctime = stbuf->st_ctime; -- attr->atimensec = ST_ATIM_NSEC(stbuf); -- attr->mtimensec = ST_MTIM_NSEC(stbuf); -- attr->ctimensec = ST_CTIM_NSEC(stbuf); -+ *attr = (struct fuse_attr){ -+ .ino = stbuf->st_ino, -+ .mode = stbuf->st_mode, -+ .nlink = stbuf->st_nlink, -+ .uid = stbuf->st_uid, -+ .gid = stbuf->st_gid, -+ .rdev = stbuf->st_rdev, -+ .size = stbuf->st_size, -+ .blksize = stbuf->st_blksize, -+ .blocks = stbuf->st_blocks, -+ .atime = stbuf->st_atime, -+ .mtime = stbuf->st_mtime, -+ .ctime = stbuf->st_ctime, -+ .atimensec = ST_ATIM_NSEC(stbuf), -+ .mtimensec = ST_MTIM_NSEC(stbuf), -+ .ctimensec = ST_CTIM_NSEC(stbuf), -+ }; - } - - static void convert_attr(const struct fuse_setattr_in *attr, struct stat *stbuf) -@@ -183,16 +185,16 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov, - int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out = { -+ .unique = req->unique, -+ .error = error, -+ }; - - if (error <= -1000 || error > 0) { - fuse_log(FUSE_LOG_ERR, "fuse: bad error value: %i\n", error); - error = -ERANGE; - } - -- out.unique = req->unique; -- out.error = error; -- - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - -@@ -277,14 +279,16 @@ size_t fuse_add_direntry(fuse_req_t req, char *buf, size_t bufsize, - static void convert_statfs(const struct statvfs *stbuf, - struct fuse_kstatfs *kstatfs) - { -- kstatfs->bsize = stbuf->f_bsize; -- kstatfs->frsize = stbuf->f_frsize; -- kstatfs->blocks = stbuf->f_blocks; -- kstatfs->bfree = stbuf->f_bfree; -- kstatfs->bavail = stbuf->f_bavail; -- kstatfs->files = stbuf->f_files; -- kstatfs->ffree = stbuf->f_ffree; -- kstatfs->namelen = stbuf->f_namemax; -+ *kstatfs = (struct fuse_kstatfs){ -+ .bsize = stbuf->f_bsize, -+ .frsize = stbuf->f_frsize, -+ .blocks = stbuf->f_blocks, -+ .bfree = stbuf->f_bfree, -+ .bavail = stbuf->f_bavail, -+ .files = stbuf->f_files, -+ .ffree = stbuf->f_ffree, -+ .namelen = stbuf->f_namemax, -+ }; - } - - static int send_reply_ok(fuse_req_t req, const void *arg, size_t argsize) -@@ -328,12 +332,14 @@ static unsigned int calc_timeout_nsec(double t) - static void fill_entry(struct fuse_entry_out *arg, - const struct fuse_entry_param *e) - { -- arg->nodeid = e->ino; -- arg->generation = e->generation; -- arg->entry_valid = calc_timeout_sec(e->entry_timeout); -- arg->entry_valid_nsec = calc_timeout_nsec(e->entry_timeout); -- arg->attr_valid = calc_timeout_sec(e->attr_timeout); -- arg->attr_valid_nsec = calc_timeout_nsec(e->attr_timeout); -+ *arg = (struct fuse_entry_out){ -+ .nodeid = e->ino, -+ .generation = e->generation, -+ .entry_valid = calc_timeout_sec(e->entry_timeout), -+ .entry_valid_nsec = calc_timeout_nsec(e->entry_timeout), -+ .attr_valid = calc_timeout_sec(e->attr_timeout), -+ .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout), -+ }; - convert_stat(&e->attr, &arg->attr); - } - -@@ -362,10 +368,12 @@ size_t fuse_add_direntry_plus(fuse_req_t req, char *buf, size_t bufsize, - fill_entry(&dp->entry_out, e); - - struct fuse_dirent *dirent = &dp->dirent; -- dirent->ino = e->attr.st_ino; -- dirent->off = off; -- dirent->namelen = namelen; -- dirent->type = (e->attr.st_mode & S_IFMT) >> 12; -+ *dirent = (struct fuse_dirent){ -+ .ino = e->attr.st_ino, -+ .off = off, -+ .namelen = namelen, -+ .type = (e->attr.st_mode & S_IFMT) >> 12, -+ }; - memcpy(dirent->name, name, namelen); - memset(dirent->name + namelen, 0, entlen_padded - entlen); - -@@ -496,15 +504,14 @@ static int fuse_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - int fuse_reply_data(fuse_req_t req, struct fuse_bufvec *bufv) - { - struct iovec iov[2]; -- struct fuse_out_header out; -+ struct fuse_out_header out = { -+ .unique = req->unique, -+ }; - int res; - - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - -- out.unique = req->unique; -- out.error = 0; -- - res = fuse_send_data_iov(req->se, req->ch, iov, 1, bufv); - if (res <= 0) { - fuse_free_req(req); -@@ -2145,14 +2152,14 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, - static int send_notify_iov(struct fuse_session *se, int notify_code, - struct iovec *iov, int count) - { -- struct fuse_out_header out; -+ struct fuse_out_header out = { -+ .error = notify_code, -+ }; - - if (!se->got_init) { - return -ENOTCONN; - } - -- out.unique = 0; -- out.error = notify_code; - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(struct fuse_out_header); - -@@ -2162,11 +2169,11 @@ static int send_notify_iov(struct fuse_session *se, int notify_code, - int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) - { - if (ph != NULL) { -- struct fuse_notify_poll_wakeup_out outarg; -+ struct fuse_notify_poll_wakeup_out outarg = { -+ .kh = ph->kh, -+ }; - struct iovec iov[2]; - -- outarg.kh = ph->kh; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - -@@ -2179,17 +2186,17 @@ int fuse_lowlevel_notify_poll(struct fuse_pollhandle *ph) - int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - off_t off, off_t len) - { -- struct fuse_notify_inval_inode_out outarg; -+ struct fuse_notify_inval_inode_out outarg = { -+ .ino = ino, -+ .off = off, -+ .len = len, -+ }; - struct iovec iov[2]; - - if (!se) { - return -EINVAL; - } - -- outarg.ino = ino; -- outarg.off = off; -- outarg.len = len; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - -@@ -2199,17 +2206,16 @@ int fuse_lowlevel_notify_inval_inode(struct fuse_session *se, fuse_ino_t ino, - int fuse_lowlevel_notify_inval_entry(struct fuse_session *se, fuse_ino_t parent, - const char *name, size_t namelen) - { -- struct fuse_notify_inval_entry_out outarg; -+ struct fuse_notify_inval_entry_out outarg = { -+ .parent = parent, -+ .namelen = namelen, -+ }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - -- outarg.parent = parent; -- outarg.namelen = namelen; -- outarg.padding = 0; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; -@@ -2222,18 +2228,17 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - fuse_ino_t child, const char *name, - size_t namelen) - { -- struct fuse_notify_delete_out outarg; -+ struct fuse_notify_delete_out outarg = { -+ .parent = parent, -+ .child = child, -+ .namelen = namelen, -+ }; - struct iovec iov[3]; - - if (!se) { - return -EINVAL; - } - -- outarg.parent = parent; -- outarg.child = child; -- outarg.namelen = namelen; -- outarg.padding = 0; -- - iov[1].iov_base = &outarg; - iov[1].iov_len = sizeof(outarg); - iov[2].iov_base = (void *)name; -@@ -2245,24 +2250,21 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - off_t offset, struct fuse_bufvec *bufv) - { -- struct fuse_out_header out; -- struct fuse_notify_store_out outarg; -+ struct fuse_out_header out = { -+ .error = FUSE_NOTIFY_STORE, -+ }; -+ struct fuse_notify_store_out outarg = { -+ .nodeid = ino, -+ .offset = offset, -+ .size = fuse_buf_size(bufv), -+ }; - struct iovec iov[3]; -- size_t size = fuse_buf_size(bufv); - int res; - - if (!se) { - return -EINVAL; - } - -- out.unique = 0; -- out.error = FUSE_NOTIFY_STORE; -- -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -- - iov[0].iov_base = &out; - iov[0].iov_len = sizeof(out); - iov[1].iov_base = &outarg; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch b/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch deleted file mode 100644 index 6243037..0000000 --- a/kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 9a44d78f5019280b006bb5b3de7164336289d639 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:21 +0100 -Subject: [PATCH 110/116] virtiofsd: fix lo_destroy() resource leaks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-107-dgilbert@redhat.com> -Patchwork-id: 93560 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 106/112] virtiofsd: fix lo_destroy() resource leaks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Now that lo_destroy() is serialized we can call unref_inode() so that -all inode resources are freed. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 28f7a3b026f231bfe8de5fed6a18a8d27b1dfcee) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++-------------------- - 1 file changed, 20 insertions(+), 21 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 79b8b71..eb001b9 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1371,26 +1371,6 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - } - --static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) --{ -- struct lo_inode *inode = value; -- struct lo_data *lo = user_data; -- -- inode->nlookup = 0; -- lo_map_remove(&lo->ino_map, inode->fuse_ino); -- close(inode->fd); -- lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ -- -- return TRUE; --} -- --static void unref_all_inodes(struct lo_data *lo) --{ -- pthread_mutex_lock(&lo->mutex); -- g_hash_table_foreach_remove(lo->inodes, unref_all_inodes_cb, lo); -- pthread_mutex_unlock(&lo->mutex); --} -- - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -@@ -2477,7 +2457,26 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, - static void lo_destroy(void *userdata) - { - struct lo_data *lo = (struct lo_data *)userdata; -- unref_all_inodes(lo); -+ -+ /* -+ * Normally lo->mutex must be taken when traversing lo->inodes but -+ * lo_destroy() is a serialized request so no races are possible here. -+ * -+ * In addition, we cannot acquire lo->mutex since unref_inode() takes it -+ * too and this would result in a recursive lock. -+ */ -+ while (true) { -+ GHashTableIter iter; -+ gpointer key, value; -+ -+ g_hash_table_iter_init(&iter, lo->inodes); -+ if (!g_hash_table_iter_next(&iter, &key, &value)) { -+ break; -+ } -+ -+ struct lo_inode *inode = value; -+ unref_inode_lolocked(lo, inode, inode->nlookup); -+ } - } - - static struct fuse_lowlevel_ops lo_oper = { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch b/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch deleted file mode 100644 index 4d7d6dc..0000000 --- a/kvm-virtiofsd-fix-memory-leak-on-lo.source.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 9e0f5b64f30c2f841f297e25c2f3a6d82c8a16b8 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:57 +0100 -Subject: [PATCH 086/116] virtiofsd: fix memory leak on lo.source -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-83-dgilbert@redhat.com> -Patchwork-id: 93536 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 082/112] virtiofsd: fix memory leak on lo.source -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Liu Bo - -valgrind reported that lo.source is leaked on quiting, but it was defined -as (const char*) as it may point to a const string "/". - -Signed-off-by: Liu Bo -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit eb68a33b5fc5dde87bd9b99b94e7c33a5d8ea82e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index af050c6..056ebe8 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -115,7 +115,7 @@ struct lo_data { - int writeback; - int flock; - int xattr; -- const char *source; -+ char *source; - double timeout; - int cache; - int timeout_set; -@@ -2497,9 +2497,8 @@ int main(int argc, char *argv[]) - fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); - exit(1); - } -- - } else { -- lo.source = "/"; -+ lo.source = strdup("/"); - } - if (!lo.timeout_set) { - switch (lo.cache) { -@@ -2570,5 +2569,7 @@ err_out1: - close(lo.root.fd); - } - -+ free(lo.source); -+ - return ret ? 1 : 0; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch b/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch deleted file mode 100644 index b17d93c..0000000 --- a/kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 3b6461ee08654b2cbb6d4e0cc15c02f89a6610d5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:09 +0000 -Subject: [PATCH 13/18] virtiofsd: fv_create_listen_socket error path socket - leak -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-3-dgilbert@redhat.com> -Patchwork-id: 94124 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/7] virtiofsd: fv_create_listen_socket error path socket leak -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -If we fail when bringing up the socket we can leak the listen_fd; -in practice the daemon will exit so it's not really a problem. - -Fixes: Coverity CID 1413121 -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 6fa249027f97e3080f3d9c0fab3f94f8f80828fe) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/fuse_virtio.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 80a6e92..dd1c605 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -916,6 +916,7 @@ static int fv_create_listen_socket(struct fuse_session *se) - old_umask = umask(0077); - if (bind(listen_sock, (struct sockaddr *)&un, addr_len) == -1) { - fuse_log(FUSE_LOG_ERR, "vhost socket bind: %m\n"); -+ close(listen_sock); - umask(old_umask); - return -1; - } -@@ -923,6 +924,7 @@ static int fv_create_listen_socket(struct fuse_session *se) - - if (listen(listen_sock, 1) == -1) { - fuse_log(FUSE_LOG_ERR, "vhost socket listen: %m\n"); -+ close(listen_sock); - return -1; - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-get-set-features-callbacks.patch b/kvm-virtiofsd-get-set-features-callbacks.patch deleted file mode 100644 index fcb5ca2..0000000 --- a/kvm-virtiofsd-get-set-features-callbacks.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 59bfe3ad924d00dc9c7a4363fcd3db36ea247988 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:59 +0100 -Subject: [PATCH 028/116] virtiofsd: get/set features callbacks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-25-dgilbert@redhat.com> -Patchwork-id: 93478 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 024/112] virtiofsd: get/set features callbacks -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: "Dr. David Alan Gilbert" - -Add the get/set features callbacks. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f2cef5fb9ae20136ca18d16328787b69b3abfa18) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 1928a20..4819e56 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -46,6 +46,17 @@ struct virtio_fs_config { - uint32_t num_queues; - }; - -+/* Callback from libvhost-user */ -+static uint64_t fv_get_features(VuDev *dev) -+{ -+ return 1ULL << VIRTIO_F_VERSION_1; -+} -+ -+/* Callback from libvhost-user */ -+static void fv_set_features(VuDev *dev, uint64_t features) -+{ -+} -+ - /* - * Callback from libvhost-user if there's a new fd we're supposed to listen - * to, typically a queue kick? -@@ -78,7 +89,9 @@ static bool fv_queue_order(VuDev *dev, int qidx) - } - - static const VuDevIface fv_iface = { -- /* TODO: Add other callbacks */ -+ .get_features = fv_get_features, -+ .set_features = fv_set_features, -+ - .queue_is_processed_in_order = fv_queue_order, - }; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch b/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch deleted file mode 100644 index 68d20e7..0000000 --- a/kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch +++ /dev/null @@ -1,589 +0,0 @@ -From da6ee5c24397d2ca93dfaf275fdd9dafc922da15 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:11 +0100 -Subject: [PATCH 100/116] virtiofsd: introduce inode refcount to prevent - use-after-free -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-97-dgilbert@redhat.com> -Patchwork-id: 93550 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 096/112] virtiofsd: introduce inode refcount to prevent use-after-free -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -If thread A is using an inode it must not be deleted by thread B when -processing a FUSE_FORGET request. - -The FUSE protocol itself already has a counter called nlookup that is -used in FUSE_FORGET messages. We cannot trust this counter since the -untrusted client can manipulate it via FUSE_FORGET messages. - -Introduce a new refcount to keep inodes alive for the required lifespan. -lo_inode_put() must be called to release a reference. FUSE's nlookup -counter holds exactly one reference so that the inode stays alive as -long as the client still wants to remember it. - -Note that the lo_inode->is_symlink field is moved to avoid creating a -hole in the struct due to struct field alignment. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c241aa9457d88c6a0d027f48fadfed131646bce3) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 169 +++++++++++++++++++++++++++++++++------ - 1 file changed, 146 insertions(+), 23 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e3a6d6b..ab16135 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -97,7 +97,13 @@ struct lo_key { - - struct lo_inode { - int fd; -- bool is_symlink; -+ -+ /* -+ * Atomic reference count for this object. The nlookup field holds a -+ * reference and release it when nlookup reaches 0. -+ */ -+ gint refcount; -+ - struct lo_key key; - - /* -@@ -116,6 +122,8 @@ struct lo_inode { - fuse_ino_t fuse_ino; - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ -+ -+ bool is_symlink; - }; - - struct lo_cred { -@@ -471,6 +479,23 @@ static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) - return elem - lo_data(req)->ino_map.elems; - } - -+static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) -+{ -+ struct lo_inode *inode = *inodep; -+ -+ if (!inode) { -+ return; -+ } -+ -+ *inodep = NULL; -+ -+ if (g_atomic_int_dec_and_test(&inode->refcount)) { -+ close(inode->fd); -+ free(inode); -+ } -+} -+ -+/* Caller must release refcount using lo_inode_put() */ - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { - struct lo_data *lo = lo_data(req); -@@ -478,6 +503,9 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - - pthread_mutex_lock(&lo->mutex); - elem = lo_map_get(&lo->ino_map, ino); -+ if (elem) { -+ g_atomic_int_inc(&elem->inode->refcount); -+ } - pthread_mutex_unlock(&lo->mutex); - - if (!elem) { -@@ -487,10 +515,23 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - return elem->inode; - } - -+/* -+ * TODO Remove this helper and force callers to hold an inode refcount until -+ * they are done with the fd. This will be done in a later patch to make -+ * review easier. -+ */ - static int lo_fd(fuse_req_t req, fuse_ino_t ino) - { - struct lo_inode *inode = lo_inode(req, ino); -- return inode ? inode->fd : -1; -+ int fd; -+ -+ if (!inode) { -+ return -1; -+ } -+ -+ fd = inode->fd; -+ lo_inode_put(lo_data(req), &inode); -+ return fd; - } - - static void lo_init(void *userdata, struct fuse_conn_info *conn) -@@ -545,6 +586,10 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, - fuse_reply_attr(req, &buf, lo->timeout); - } - -+/* -+ * Increments parent->nlookup and caller must release refcount using -+ * lo_inode_put(&parent). -+ */ - static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, - char path[PATH_MAX], struct lo_inode **parent) - { -@@ -582,6 +627,7 @@ retry: - p = &lo->root; - pthread_mutex_lock(&lo->mutex); - p->nlookup++; -+ g_atomic_int_inc(&p->refcount); - pthread_mutex_unlock(&lo->mutex); - } else { - *last = '\0'; -@@ -625,6 +671,7 @@ retry: - - fail_unref: - unref_inode_lolocked(lo, p, 1); -+ lo_inode_put(lo, &p); - fail: - if (retries) { - retries--; -@@ -663,6 +710,7 @@ fallback: - if (res != -1) { - res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); - unref_inode_lolocked(lo, parent, 1); -+ lo_inode_put(lo, &parent); - } - - return res; -@@ -780,11 +828,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - goto out_err; - } - } -+ lo_inode_put(lo, &inode); - - return lo_getattr(req, ino, fi); - - out_err: - saverr = errno; -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -@@ -801,6 +851,7 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - if (p) { - assert(p->nlookup > 0); - p->nlookup++; -+ g_atomic_int_inc(&p->refcount); - } - pthread_mutex_unlock(&lo->mutex); - -@@ -820,6 +871,10 @@ static void posix_locks_value_destroy(gpointer data) - free(plock); - } - -+/* -+ * Increments nlookup and caller must release refcount using -+ * lo_inode_put(&parent). -+ */ - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct fuse_entry_param *e) - { -@@ -827,7 +882,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - int res; - int saverr; - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode, *dir = lo_inode(req, parent); -+ struct lo_inode *inode = NULL; -+ struct lo_inode *dir = lo_inode(req, parent); - - /* - * name_to_handle_at() and open_by_handle_at() can reach here with fuse -@@ -868,6 +924,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - inode->is_symlink = S_ISLNK(e->attr.st_mode); -+ -+ /* -+ * One for the caller and one for nlookup (released in -+ * unref_inode_lolocked()) -+ */ -+ g_atomic_int_set(&inode->refcount, 2); -+ - inode->nlookup = 1; - inode->fd = newfd; - newfd = -1; -@@ -883,6 +946,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - pthread_mutex_unlock(&lo->mutex); - } - e->ino = inode->fuse_ino; -+ lo_inode_put(lo, &inode); -+ lo_inode_put(lo, &dir); - - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, - name, (unsigned long long)e->ino); -@@ -894,6 +959,8 @@ out_err: - if (newfd != -1) { - close(newfd); - } -+ lo_inode_put(lo, &inode); -+ lo_inode_put(lo, &dir); - return saverr; - } - -@@ -991,6 +1058,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - { - int res; - int saverr; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *dir; - struct fuse_entry_param e; - struct lo_cred old = {}; -@@ -1032,9 +1100,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); -+ lo_inode_put(lo, &dir); - return; - - out: -+ lo_inode_put(lo, &dir); - fuse_reply_err(req, saverr); - } - -@@ -1085,6 +1155,7 @@ fallback: - if (res != -1) { - res = linkat(parent->fd, path, dfd, name, 0); - unref_inode_lolocked(lo, parent, 1); -+ lo_inode_put(lo, &parent); - } - - return res; -@@ -1095,6 +1166,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - { - int res; - struct lo_data *lo = lo_data(req); -+ struct lo_inode *parent_inode; - struct lo_inode *inode; - struct fuse_entry_param e; - int saverr; -@@ -1104,17 +1176,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - return; - } - -+ parent_inode = lo_inode(req, parent); - inode = lo_inode(req, ino); -- if (!inode) { -- fuse_reply_err(req, EBADF); -- return; -+ if (!parent_inode || !inode) { -+ errno = EBADF; -+ goto out_err; - } - - memset(&e, 0, sizeof(struct fuse_entry_param)); - e.attr_timeout = lo->timeout; - e.entry_timeout = lo->timeout; - -- res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); -+ res = linkat_empty_nofollow(lo, inode, parent_inode->fd, name); - if (res == -1) { - goto out_err; - } -@@ -1133,13 +1206,18 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); -+ lo_inode_put(lo, &parent_inode); -+ lo_inode_put(lo, &inode); - return; - - out_err: - saverr = errno; -+ lo_inode_put(lo, &parent_inode); -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -+/* Increments nlookup and caller must release refcount using lo_inode_put() */ - static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, - const char *name) - { -@@ -1176,6 +1254,7 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - - fuse_reply_err(req, res == -1 ? errno : 0); - unref_inode_lolocked(lo, inode, 1); -+ lo_inode_put(lo, &inode); - } - - static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -@@ -1183,8 +1262,10 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - unsigned int flags) - { - int res; -- struct lo_inode *oldinode; -- struct lo_inode *newinode; -+ struct lo_inode *parent_inode; -+ struct lo_inode *newparent_inode; -+ struct lo_inode *oldinode = NULL; -+ struct lo_inode *newinode = NULL; - struct lo_data *lo = lo_data(req); - - if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { -@@ -1192,6 +1273,13 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - return; - } - -+ parent_inode = lo_inode(req, parent); -+ newparent_inode = lo_inode(req, newparent); -+ if (!parent_inode || !newparent_inode) { -+ fuse_reply_err(req, EBADF); -+ goto out; -+ } -+ - oldinode = lookup_name(req, parent, name); - newinode = lookup_name(req, newparent, newname); - -@@ -1204,8 +1292,8 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - #ifndef SYS_renameat2 - fuse_reply_err(req, EINVAL); - #else -- res = syscall(SYS_renameat2, lo_fd(req, parent), name, -- lo_fd(req, newparent), newname, flags); -+ res = syscall(SYS_renameat2, parent_inode->fd, name, -+ newparent_inode->fd, newname, flags); - if (res == -1 && errno == ENOSYS) { - fuse_reply_err(req, EINVAL); - } else { -@@ -1215,12 +1303,16 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - goto out; - } - -- res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); -+ res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); - - fuse_reply_err(req, res == -1 ? errno : 0); - out: - unref_inode_lolocked(lo, oldinode, 1); - unref_inode_lolocked(lo, newinode, 1); -+ lo_inode_put(lo, &oldinode); -+ lo_inode_put(lo, &newinode); -+ lo_inode_put(lo, &parent_inode); -+ lo_inode_put(lo, &newparent_inode); - } - - static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) -@@ -1244,6 +1336,7 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - - fuse_reply_err(req, res == -1 ? errno : 0); - unref_inode_lolocked(lo, inode, 1); -+ lo_inode_put(lo, &inode); - } - - static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -@@ -1265,8 +1358,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - g_hash_table_destroy(inode->posix_locks); - pthread_mutex_destroy(&inode->plock_mutex); - pthread_mutex_unlock(&lo->mutex); -- close(inode->fd); -- free(inode); -+ -+ /* Drop our refcount from lo_do_lookup() */ -+ lo_inode_put(lo, &inode); - } else { - pthread_mutex_unlock(&lo->mutex); - } -@@ -1280,6 +1374,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) - inode->nlookup = 0; - lo_map_remove(&lo->ino_map, inode->fuse_ino); - close(inode->fd); -+ lo_inode_put(lo, &inode); /* Drop our refcount from lo_do_lookup() */ - - return TRUE; - } -@@ -1306,6 +1401,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - (unsigned long long)nlookup); - - unref_inode_lolocked(lo, inode, nlookup); -+ lo_inode_put(lo, &inode); - } - - static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) -@@ -1537,6 +1633,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - err = 0; - error: - lo_dirp_put(&d); -+ lo_inode_put(lo, &dinode); - - /* - * If there's an error, we can only signal it if we haven't stored -@@ -1595,6 +1692,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - { - int fd; - struct lo_data *lo = lo_data(req); -+ struct lo_inode *parent_inode; - struct fuse_entry_param e; - int err; - struct lo_cred old = {}; -@@ -1607,12 +1705,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - return; - } - -+ parent_inode = lo_inode(req, parent); -+ if (!parent_inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - err = lo_change_cred(req, &old); - if (err) { - goto out; - } - -- fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, -+ fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); - err = fd == -1 ? errno : 0; - lo_restore_cred(&old); -@@ -1625,8 +1729,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - pthread_mutex_unlock(&lo->mutex); - if (fh == -1) { - close(fd); -- fuse_reply_err(req, ENOMEM); -- return; -+ err = ENOMEM; -+ goto out; - } - - fi->fh = fh; -@@ -1639,6 +1743,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - out: -+ lo_inode_put(lo, &parent_inode); -+ - if (err) { - fuse_reply_err(req, err); - } else { -@@ -1712,16 +1818,18 @@ static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - plock = - lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); - if (!plock) { -- pthread_mutex_unlock(&inode->plock_mutex); -- fuse_reply_err(req, ret); -- return; -+ saverr = ret; -+ goto out; - } - - ret = fcntl(plock->fd, F_OFD_GETLK, lock); - if (ret == -1) { - saverr = errno; - } -+ -+out: - pthread_mutex_unlock(&inode->plock_mutex); -+ lo_inode_put(lo, &inode); - - if (saverr) { - fuse_reply_err(req, saverr); -@@ -1761,9 +1869,8 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); - - if (!plock) { -- pthread_mutex_unlock(&inode->plock_mutex); -- fuse_reply_err(req, ret); -- return; -+ saverr = ret; -+ goto out; - } - - /* TODO: Is it alright to modify flock? */ -@@ -1772,7 +1879,11 @@ static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - if (ret == -1) { - saverr = errno; - } -+ -+out: - pthread_mutex_unlock(&inode->plock_mutex); -+ lo_inode_put(lo, &inode); -+ - fuse_reply_err(req, saverr); - } - -@@ -1898,6 +2009,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - pthread_mutex_unlock(&inode->plock_mutex); - - res = close(dup(lo_fi_fd(req, fi))); -+ lo_inode_put(lo_data(req), &inode); - fuse_reply_err(req, res == -1 ? errno : 0); - } - -@@ -2115,11 +2227,14 @@ out_free: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - return; - - out_err: - saverr = errno; - out: -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } -@@ -2190,11 +2305,14 @@ out_free: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - return; - - out_err: - saverr = errno; - out: -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } -@@ -2243,6 +2361,8 @@ out: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -@@ -2289,6 +2409,8 @@ out: - if (fd >= 0) { - close(fd); - } -+ -+ lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } - -@@ -2671,6 +2793,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - root->key.ino = stat.st_ino; - root->key.dev = stat.st_dev; - root->nlookup = 2; -+ g_atomic_int_set(&root->refcount, 2); - } - - static guint lo_key_hash(gconstpointer key) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-load_capng-missing-unlock.patch b/kvm-virtiofsd-load_capng-missing-unlock.patch deleted file mode 100644 index bc04f6b..0000000 --- a/kvm-virtiofsd-load_capng-missing-unlock.patch +++ /dev/null @@ -1,46 +0,0 @@ -From ece7649025fbdbde48ff0b954e8ec2e42c4a8b3d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:10 +0000 -Subject: [PATCH 14/18] virtiofsd: load_capng missing unlock -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-4-dgilbert@redhat.com> -Patchwork-id: 94126 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 3/7] virtiofsd: load_capng missing unlock -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: "Dr. David Alan Gilbert" - -Missing unlock in error path. - -Fixes: Covertiy CID 1413123 -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Stefan Hajnoczi -(cherry picked from commit 686391112fd42c615bcc4233472887a66a9b5a4a) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e6f2399..c635fc8 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -232,6 +232,7 @@ static int load_capng(void) - */ - cap.saved = capng_save_state(); - if (!cap.saved) { -+ pthread_mutex_unlock(&cap.mutex); - fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); - return -EINVAL; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-make-f-foreground-the-default.patch b/kvm-virtiofsd-make-f-foreground-the-default.patch deleted file mode 100644 index d6cb0e3..0000000 --- a/kvm-virtiofsd-make-f-foreground-the-default.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 7f2e1f79a3addb242c3018c7a80e2e57589119f0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:08 +0100 -Subject: [PATCH 037/116] virtiofsd: make -f (foreground) the default -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-34-dgilbert@redhat.com> -Patchwork-id: 93489 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 033/112] virtiofsd: make -f (foreground) the default -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -According to vhost-user.rst "Backend program conventions", backend -programs should run in the foregound by default. Follow the -conventions so libvirt and other management tools can control virtiofsd -in a standard way. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0bbd31753714ac2899efda0f0de31e353e965789) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 676032e..a3645fc 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -29,6 +29,11 @@ - { \ - t, offsetof(struct fuse_cmdline_opts, p), 1 \ - } -+#define FUSE_HELPER_OPT_VALUE(t, p, v) \ -+ { \ -+ t, offsetof(struct fuse_cmdline_opts, p), v \ -+ } -+ - - static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("-h", show_help), -@@ -42,6 +47,7 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_OPT_KEY("-d", FUSE_OPT_KEY_KEEP), - FUSE_OPT_KEY("debug", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("-f", foreground), -+ FUSE_HELPER_OPT_VALUE("--daemonize", foreground, 0), - FUSE_HELPER_OPT("fsname=", nodefault_subtype), - FUSE_OPT_KEY("fsname=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("subtype=", nodefault_subtype), -@@ -131,6 +137,7 @@ void fuse_cmdline_help(void) - " -V --version print version\n" - " -d -o debug enable debug output (implies -f)\n" - " -f foreground operation\n" -+ " --daemonize run in background\n" - " -o max_idle_threads the maximum number of idle worker " - "threads\n" - " allowed (default: 10)\n"); -@@ -158,6 +165,7 @@ int fuse_parse_cmdline(struct fuse_args *args, struct fuse_cmdline_opts *opts) - memset(opts, 0, sizeof(struct fuse_cmdline_opts)); - - opts->max_idle_threads = 10; -+ opts->foreground = 1; - - if (fuse_opt_parse(args, opts, fuse_helper_opts, fuse_helper_opt_proc) == - -1) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-make-lo_release-atomic.patch b/kvm-virtiofsd-make-lo_release-atomic.patch deleted file mode 100644 index 6d88549..0000000 --- a/kvm-virtiofsd-make-lo_release-atomic.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 4ebabb66f4132186152edf8e1907fce436bf5c69 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:06 +0100 -Subject: [PATCH 095/116] virtiofsd: make lo_release() atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-92-dgilbert@redhat.com> -Patchwork-id: 93545 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 091/112] virtiofsd: make lo_release() atomic -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Hold the lock across both lo_map_get() and lo_map_remove() to prevent -races between two FUSE_RELEASE requests. In this case I don't see a -serious bug but it's safer to do things atomically. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit baed65c060c0e524530bc243eec427fb408bd477) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9414935..690edbc 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1772,14 +1772,18 @@ static void lo_release(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { - struct lo_data *lo = lo_data(req); -- int fd; -+ struct lo_map_elem *elem; -+ int fd = -1; - - (void)ino; - -- fd = lo_fi_fd(req, fi); -- - pthread_mutex_lock(&lo->mutex); -- lo_map_remove(&lo->fd_map, fi->fh); -+ elem = lo_map_get(&lo->fd_map, fi->fh); -+ if (elem) { -+ fd = elem->fd; -+ elem = NULL; -+ lo_map_remove(&lo->fd_map, fi->fh); -+ } - pthread_mutex_unlock(&lo->mutex); - - close(fd); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-move-to-a-new-pid-namespace.patch b/kvm-virtiofsd-move-to-a-new-pid-namespace.patch deleted file mode 100644 index 9a33d1b..0000000 --- a/kvm-virtiofsd-move-to-a-new-pid-namespace.patch +++ /dev/null @@ -1,223 +0,0 @@ -From a7a87a751a9893830d031a957a751b7622b71fb2 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:29 +0100 -Subject: [PATCH 058/116] virtiofsd: move to a new pid namespace -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-55-dgilbert@redhat.com> -Patchwork-id: 93510 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 054/112] virtiofsd: move to a new pid namespace -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -virtiofsd needs access to /proc/self/fd. Let's move to a new pid -namespace so that a compromised process cannot see another other -processes running on the system. - -One wrinkle in this approach: unshare(CLONE_NEWPID) affects *child* -processes and not the current process. Therefore we need to fork the -pid 1 process that will actually run virtiofsd and leave a parent in -waitpid(2). This is not the same thing as daemonization and parent -processes should not notice a difference. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8e1d4ef231d8327be219f7aea7aa15d181375bbc) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 134 +++++++++++++++++++++++++-------------- - 1 file changed, 86 insertions(+), 48 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 27ab328..0947d14 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -51,7 +51,10 @@ - #include - #include - #include -+#include - #include -+#include -+#include - #include - #include - -@@ -1945,24 +1948,95 @@ static void print_capabilities(void) - } - - /* -- * Called after our UNIX domain sockets have been created, now we can move to -- * an empty network namespace to prevent TCP/IP and other network activity in -- * case this process is compromised. -+ * Move to a new mount, net, and pid namespaces to isolate this process. - */ --static void setup_net_namespace(void) -+static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) - { -- if (unshare(CLONE_NEWNET) != 0) { -- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); -+ pid_t child; -+ -+ /* -+ * Create a new pid namespace for *child* processes. We'll have to -+ * fork in order to enter the new pid namespace. A new mount namespace -+ * is also needed so that we can remount /proc for the new pid -+ * namespace. -+ * -+ * Our UNIX domain sockets have been created. Now we can move to -+ * an empty network namespace to prevent TCP/IP and other network -+ * activity in case this process is compromised. -+ */ -+ if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { -+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); -+ exit(1); -+ } -+ -+ child = fork(); -+ if (child < 0) { -+ fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); -+ exit(1); -+ } -+ if (child > 0) { -+ pid_t waited; -+ int wstatus; -+ -+ /* The parent waits for the child */ -+ do { -+ waited = waitpid(child, &wstatus, 0); -+ } while (waited < 0 && errno == EINTR && !se->exited); -+ -+ /* We were terminated by a signal, see fuse_signals.c */ -+ if (se->exited) { -+ exit(0); -+ } -+ -+ if (WIFEXITED(wstatus)) { -+ exit(WEXITSTATUS(wstatus)); -+ } -+ -+ exit(1); -+ } -+ -+ /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ -+ prctl(PR_SET_PDEATHSIG, SIGTERM); -+ -+ /* -+ * If the mounts have shared propagation then we want to opt out so our -+ * mount changes don't affect the parent mount namespace. -+ */ -+ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); -+ exit(1); -+ } -+ -+ /* The child must remount /proc to use the new pid namespace */ -+ if (mount("proc", "/proc", "proc", -+ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); -+ exit(1); -+ } -+ -+ /* Now we can get our /proc/self/fd directory file descriptor */ -+ lo->proc_self_fd = open("/proc/self/fd", O_PATH); -+ if (lo->proc_self_fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); - exit(1); - } - } - --/* This magic is based on lxc's lxc_pivot_root() */ --static void setup_pivot_root(const char *source) -+/* -+ * Make the source directory our root so symlinks cannot escape and no other -+ * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. -+ */ -+static void setup_mounts(const char *source) - { - int oldroot; - int newroot; - -+ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); -+ exit(1); -+ } -+ -+ /* This magic is based on lxc's lxc_pivot_root() */ - oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); - if (oldroot < 0) { - fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); -@@ -2009,47 +2083,14 @@ static void setup_pivot_root(const char *source) - close(oldroot); - } - --static void setup_proc_self_fd(struct lo_data *lo) --{ -- lo->proc_self_fd = open("/proc/self/fd", O_PATH); -- if (lo->proc_self_fd == -1) { -- fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); -- exit(1); -- } --} -- --/* -- * Make the source directory our root so symlinks cannot escape and no other -- * files are accessible. -- */ --static void setup_mount_namespace(const char *source) --{ -- if (unshare(CLONE_NEWNS) != 0) { -- fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); -- exit(1); -- } -- -- if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { -- fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); -- exit(1); -- } -- -- if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -- fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); -- exit(1); -- } -- -- setup_pivot_root(source); --} -- - /* - * Lock down this process to prevent access to other processes or files outside - * source directory. This reduces the impact of arbitrary code execution bugs. - */ --static void setup_sandbox(struct lo_data *lo) -+static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) - { -- setup_net_namespace(); -- setup_mount_namespace(lo->source); -+ setup_namespaces(lo, se); -+ setup_mounts(lo->source); - } - - int main(int argc, char *argv[]) -@@ -2173,10 +2214,7 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -- /* Must be after daemonize to get the right /proc/self/fd */ -- setup_proc_self_fd(&lo); -- -- setup_sandbox(&lo); -+ setup_sandbox(&lo, se); - - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-move-to-an-empty-network-namespace.patch b/kvm-virtiofsd-move-to-an-empty-network-namespace.patch deleted file mode 100644 index 69a7c20..0000000 --- a/kvm-virtiofsd-move-to-an-empty-network-namespace.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 19a16f26bdeb6302159736e182a18b06160a3f42 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:28 +0100 -Subject: [PATCH 057/116] virtiofsd: move to an empty network namespace -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-54-dgilbert@redhat.com> -Patchwork-id: 93508 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 053/112] virtiofsd: move to an empty network namespace -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -If the process is compromised there should be no network access. Use an -empty network namespace to sandbox networking. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d74830d12ae233186ff74ddf64c552d26bb39e50) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0570453..27ab328 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1944,6 +1944,19 @@ static void print_capabilities(void) - printf("}\n"); - } - -+/* -+ * Called after our UNIX domain sockets have been created, now we can move to -+ * an empty network namespace to prevent TCP/IP and other network activity in -+ * case this process is compromised. -+ */ -+static void setup_net_namespace(void) -+{ -+ if (unshare(CLONE_NEWNET) != 0) { -+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNET): %m\n"); -+ exit(1); -+ } -+} -+ - /* This magic is based on lxc's lxc_pivot_root() */ - static void setup_pivot_root(const char *source) - { -@@ -2035,6 +2048,7 @@ static void setup_mount_namespace(const char *source) - */ - static void setup_sandbox(struct lo_data *lo) - { -+ setup_net_namespace(); - setup_mount_namespace(lo->source); - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch b/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch deleted file mode 100644 index e3d5773..0000000 --- a/kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch +++ /dev/null @@ -1,54 +0,0 @@ -From fe031dbbf5e287f64de9fcc9aec361e8ab492109 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:24 +0100 -Subject: [PATCH 113/116] virtiofsd/passthrough_ll: Pass errno to - fuse_reply_err() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-110-dgilbert@redhat.com> -Patchwork-id: 93559 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 109/112] virtiofsd/passthrough_ll: Pass errno to fuse_reply_err() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Xiao Yang - -lo_copy_file_range() passes -errno to fuse_reply_err() and then fuse_reply_err() -changes it to errno again, so that subsequent fuse_send_reply_iov_nofree() catches -the wrong errno.(i.e. reports "fuse: bad error value: ..."). - -Make fuse_send_reply_iov_nofree() accept the correct -errno by passing errno -directly in lo_copy_file_range(). - -Signed-off-by: Xiao Yang -Reviewed-by: Eryu Guan - -dgilbert: Sent upstream and now Merged as aa1185e153f774f1df65 -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a931b6861e59c78d861017e9c6a9c161ff49a163) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index fc15d61..e6f2399 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2441,7 +2441,7 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, - - res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); - if (res < 0) { -- fuse_reply_err(req, -errno); -+ fuse_reply_err(req, errno); - } else { - fuse_reply_write(req, res); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch b/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch deleted file mode 100644 index ddacdbe..0000000 --- a/kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 83b03fc4a3ecf6086394363488bbebc8d55428c0 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:16 +0100 -Subject: [PATCH 105/116] virtiofsd: passthrough_ll: Use cache_readdir for - directory open -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-102-dgilbert@redhat.com> -Patchwork-id: 93555 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 101/112] virtiofsd: passthrough_ll: Use cache_readdir for directory open -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Misono Tomohiro - -Since keep_cache(FOPEN_KEEP_CACHE) has no effect for directory as -described in fuse_common.h, use cache_readdir(FOPNE_CACHE_DIR) for -diretory open when cache=always mode. - -Signed-off-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9b610b09b49b1aada256097b338d49da805da6ae) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 4c61ac5..79b8b71 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1523,7 +1523,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - - fi->fh = fh; - if (lo->cache == CACHE_ALWAYS) { -- fi->keep_cache = 1; -+ fi->cache_readdir = 1; - } - fuse_reply_open(req, fi); - return; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch b/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch deleted file mode 100644 index 0506574..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch +++ /dev/null @@ -1,238 +0,0 @@ -From 474d0adafed4d73720d6413b2903d6c4b529e5e6 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:15 +0100 -Subject: [PATCH 044/116] virtiofsd: passthrough_ll: add dirp_map to hide - lo_dirp pointers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-41-dgilbert@redhat.com> -Patchwork-id: 93495 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 040/112] virtiofsd: passthrough_ll: add dirp_map to hide lo_dirp pointers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Do not expose lo_dirp pointers to clients. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b39bce121bfad8757eec0ee41f14607b883935d3) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 103 +++++++++++++++++++++++++++++---------- - 1 file changed, 76 insertions(+), 27 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index a3ebf74..5f5a72f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -56,27 +56,10 @@ - - #include "passthrough_helpers.h" - --/* -- * We are re-using pointers to our `struct lo_inode` -- * elements as inodes. This means that we must be able to -- * store uintptr_t values in a fuse_ino_t variable. The following -- * incantation checks this condition at compile time. -- */ --#if defined(__GNUC__) && \ -- (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 6) && \ -- !defined __cplusplus --_Static_assert(sizeof(fuse_ino_t) >= sizeof(uintptr_t), -- "fuse_ino_t too small to hold uintptr_t values!"); --#else --struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { -- unsigned _uintptr_to_must_hold_fuse_ino_t -- : ((sizeof(fuse_ino_t) >= sizeof(uintptr_t)) ? 1 : -1); --}; --#endif -- - struct lo_map_elem { - union { - struct lo_inode *inode; -+ struct lo_dirp *dirp; - ssize_t freelist; - }; - bool in_use; -@@ -123,6 +106,7 @@ struct lo_data { - int timeout_set; - struct lo_inode root; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ -+ struct lo_map dirp_map; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -@@ -253,6 +237,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) - } - - /* Assumes lo->mutex is held */ -+static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) -+{ -+ struct lo_map_elem *elem; -+ -+ elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); -+ if (!elem) { -+ return -1; -+ } -+ -+ elem->dirp = dirp; -+ return elem - lo_data(req)->dirp_map.elems; -+} -+ -+/* Assumes lo->mutex is held */ - static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) - { - struct lo_map_elem *elem; -@@ -861,9 +859,19 @@ struct lo_dirp { - off_t offset; - }; - --static struct lo_dirp *lo_dirp(struct fuse_file_info *fi) -+static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) - { -- return (struct lo_dirp *)(uintptr_t)fi->fh; -+ struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; -+ -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->dirp_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ if (!elem) { -+ return NULL; -+ } -+ -+ return elem->dirp; - } - - static void lo_opendir(fuse_req_t req, fuse_ino_t ino, -@@ -873,6 +881,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - struct lo_data *lo = lo_data(req); - struct lo_dirp *d; - int fd; -+ ssize_t fh; - - d = calloc(1, sizeof(struct lo_dirp)); - if (d == NULL) { -@@ -892,7 +901,14 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - d->offset = 0; - d->entry = NULL; - -- fi->fh = (uintptr_t)d; -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_dirp_mapping(req, d); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ goto out_err; -+ } -+ -+ fi->fh = fh; - if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; - } -@@ -903,6 +919,9 @@ out_errno: - error = errno; - out_err: - if (d) { -+ if (d->dp) { -+ closedir(d->dp); -+ } - if (fd != -1) { - close(fd); - } -@@ -920,17 +939,21 @@ static int is_dot_or_dotdot(const char *name) - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { -- struct lo_dirp *d = lo_dirp(fi); -- char *buf; -+ struct lo_dirp *d; -+ char *buf = NULL; - char *p; - size_t rem = size; -- int err; -+ int err = ENOMEM; - - (void)ino; - -+ d = lo_dirp(req, fi); -+ if (!d) { -+ goto error; -+ } -+ - buf = calloc(1, size); - if (!buf) { -- err = ENOMEM; - goto error; - } - p = buf; -@@ -1028,8 +1051,21 @@ static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, - static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { -- struct lo_dirp *d = lo_dirp(fi); -+ struct lo_data *lo = lo_data(req); -+ struct lo_dirp *d; -+ - (void)ino; -+ -+ d = lo_dirp(req, fi); -+ if (!d) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ lo_map_remove(&lo->dirp_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ - closedir(d->dp); - free(d); - fuse_reply_err(req, 0); -@@ -1081,8 +1117,18 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { - int res; -- int fd = dirfd(lo_dirp(fi)->dp); -+ struct lo_dirp *d; -+ int fd; -+ - (void)ino; -+ -+ d = lo_dirp(req, fi); -+ if (!d) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ fd = dirfd(d->dp); - if (datasync) { - res = fdatasync(fd); - } else { -@@ -1614,6 +1660,8 @@ int main(int argc, char *argv[]) - root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); - root_elem->inode = &lo.root; - -+ lo_map_init(&lo.dirp_map); -+ - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; - } -@@ -1710,6 +1758,7 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); - - if (lo.root.fd >= 0) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch b/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch deleted file mode 100644 index b8de3d8..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch +++ /dev/null @@ -1,303 +0,0 @@ -From 03effbc021064bb77d231ae5ca02d1a579c71ee1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:17 +0100 -Subject: [PATCH 046/116] virtiofsd: passthrough_ll: add fallback for racy ops -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-43-dgilbert@redhat.com> -Patchwork-id: 93496 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 042/112] virtiofsd: passthrough_ll: add fallback for racy ops -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -We have two operations that cannot be done race-free on a symlink in -certain cases: utimes and link. - -Add racy fallback for these if the race-free method doesn't work. We do -our best to avoid races even in this case: - - - get absolute path by reading /proc/self/fd/NN symlink - - - lookup parent directory: after this we are safe against renames in - ancestors - - - lookup name in parent directory, and verify that we got to the original - inode, if not retry the whole thing - -Both utimes(2) and link(2) hold i_lock on the inode across the operation, -so a racing rename/delete by this fuse instance is not possible, only from -other entities changing the filesystem. - -If the "norace" option is given, then disable the racy fallbacks. - -Signed-off-by: Miklos Szeredi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5fe319a7b19c9c328e6e061bffcf1ff6cc8b89ce) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 5 +- - tools/virtiofsd/passthrough_ll.c | 157 +++++++++++++++++++++++++++++++++++---- - 2 files changed, 145 insertions(+), 17 deletions(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index b8ec5ac..5531425 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -142,7 +142,10 @@ void fuse_cmdline_help(void) - " --daemonize run in background\n" - " -o max_idle_threads the maximum number of idle worker " - "threads\n" -- " allowed (default: 10)\n"); -+ " allowed (default: 10)\n" -+ " -o norace disable racy fallback\n" -+ " default: false\n" -+ ); - } - - static int fuse_helper_opt_proc(void *data, const char *arg, int key, -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9815bfa..ac380ef 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -98,6 +98,7 @@ enum { - struct lo_data { - pthread_mutex_t mutex; - int debug; -+ int norace; - int writeback; - int flock; - int xattr; -@@ -124,10 +125,15 @@ static const struct fuse_opt lo_opts[] = { - { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, - { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, -- -+ { "norace", offsetof(struct lo_data, norace), 1 }, - FUSE_OPT_END - }; - -+static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); -+ -+static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); -+ -+ - static struct lo_data *lo_data(fuse_req_t req) - { - return (struct lo_data *)fuse_req_userdata(req); -@@ -347,23 +353,127 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino, - fuse_reply_attr(req, &buf, lo->timeout); - } - --static int utimensat_empty_nofollow(struct lo_inode *inode, -- const struct timespec *tv) -+static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, -+ char path[PATH_MAX], struct lo_inode **parent) - { -- int res; - char procname[64]; -+ char *last; -+ struct stat stat; -+ struct lo_inode *p; -+ int retries = 2; -+ int res; -+ -+retry: -+ sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ -+ res = readlink(procname, path, PATH_MAX); -+ if (res < 0) { -+ fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); -+ goto fail_noretry; -+ } -+ -+ if (res >= PATH_MAX) { -+ fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__); -+ goto fail_noretry; -+ } -+ path[res] = '\0'; -+ -+ last = strrchr(path, '/'); -+ if (last == NULL) { -+ /* Shouldn't happen */ -+ fuse_log( -+ FUSE_LOG_WARNING, -+ "%s: INTERNAL ERROR: bad path read from proc\n", __func__); -+ goto fail_noretry; -+ } -+ if (last == path) { -+ p = &lo->root; -+ pthread_mutex_lock(&lo->mutex); -+ p->refcount++; -+ pthread_mutex_unlock(&lo->mutex); -+ } else { -+ *last = '\0'; -+ res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0); -+ if (res == -1) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to stat parent: %m\n", __func__); -+ } -+ goto fail; -+ } -+ p = lo_find(lo, &stat); -+ if (p == NULL) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to find parent\n", __func__); -+ } -+ goto fail; -+ } -+ } -+ last++; -+ res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to stat last\n", __func__); -+ } -+ goto fail_unref; -+ } -+ if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { -+ if (!retries) { -+ fuse_log(FUSE_LOG_WARNING, -+ "%s: failed to match last\n", __func__); -+ } -+ goto fail_unref; -+ } -+ *parent = p; -+ memmove(path, last, strlen(last) + 1); -+ -+ return 0; -+ -+fail_unref: -+ unref_inode(lo, p, 1); -+fail: -+ if (retries) { -+ retries--; -+ goto retry; -+ } -+fail_noretry: -+ errno = EIO; -+ return -1; -+} -+ -+static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, -+ const struct timespec *tv) -+{ -+ int res; -+ struct lo_inode *parent; -+ char path[PATH_MAX]; - - if (inode->is_symlink) { -- res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH); - if (res == -1 && errno == EINVAL) { - /* Sorry, no race free way to set times on symlink. */ -- errno = EPERM; -+ if (lo->norace) { -+ errno = EPERM; -+ } else { -+ goto fallback; -+ } - } - return res; - } -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "/proc/self/fd/%i", inode->fd); - -- return utimensat(AT_FDCWD, procname, tv, 0); -+ return utimensat(AT_FDCWD, path, tv, 0); -+ -+fallback: -+ res = lo_parent_and_name(lo, inode, path, &parent); -+ if (res != -1) { -+ res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); -+ unref_inode(lo, parent, 1); -+ } -+ -+ return res; - } - - static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) -@@ -387,6 +497,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - { - int saverr; - char procname[64]; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *inode; - int ifd; - int res; -@@ -459,7 +570,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - if (fi) { - res = futimens(fd, tv); - } else { -- res = utimensat_empty_nofollow(inode, tv); -+ res = utimensat_empty(lo, inode, tv); - } - if (res == -1) { - goto out_err; -@@ -709,24 +820,38 @@ static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, - lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); - } - --static int linkat_empty_nofollow(struct lo_inode *inode, int dfd, -- const char *name) -+static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, -+ int dfd, const char *name) - { - int res; -- char procname[64]; -+ struct lo_inode *parent; -+ char path[PATH_MAX]; - - if (inode->is_symlink) { - res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH); - if (res == -1 && (errno == ENOENT || errno == EINVAL)) { - /* Sorry, no race free way to hard-link a symlink. */ -- errno = EPERM; -+ if (lo->norace) { -+ errno = EPERM; -+ } else { -+ goto fallback; -+ } - } - return res; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "/proc/self/fd/%i", inode->fd); -+ -+ return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); -+ -+fallback: -+ res = lo_parent_and_name(lo, inode, path, &parent); -+ if (res != -1) { -+ res = linkat(parent->fd, path, dfd, name, 0); -+ unref_inode(lo, parent, 1); -+ } - -- return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); -+ return res; - } - - static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, -@@ -748,7 +873,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - e.attr_timeout = lo->timeout; - e.entry_timeout = lo->timeout; - -- res = linkat_empty_nofollow(inode, lo_fd(req, parent), name); -+ res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name); - if (res == -1) { - goto out_err; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch b/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch deleted file mode 100644 index 24b2a6e..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch +++ /dev/null @@ -1,328 +0,0 @@ -From 35337e604e9149d6d8fcf74b8b82ac33a8611ebb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:16 +0100 -Subject: [PATCH 045/116] virtiofsd: passthrough_ll: add fd_map to hide file - descriptors -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-42-dgilbert@redhat.com> -Patchwork-id: 93494 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 041/112] virtiofsd: passthrough_ll: add fd_map to hide file descriptors -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Do not expose file descriptor numbers to clients. This prevents the -abuse of internal file descriptors (like stdin/stdout). - -Signed-off-by: Stefan Hajnoczi -Fix from: -Signed-off-by: Xiao Yang -dgilbert: - Added lseek -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 73b4d19dfc4248a74c1f3e511cfa934681d9c602) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 116 +++++++++++++++++++++++++++++++-------- - 1 file changed, 94 insertions(+), 22 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 5f5a72f..9815bfa 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -60,6 +60,7 @@ struct lo_map_elem { - union { - struct lo_inode *inode; - struct lo_dirp *dirp; -+ int fd; - ssize_t freelist; - }; - bool in_use; -@@ -107,6 +108,7 @@ struct lo_data { - struct lo_inode root; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ -+ struct lo_map fd_map; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -@@ -237,6 +239,20 @@ static void lo_map_remove(struct lo_map *map, size_t key) - } - - /* Assumes lo->mutex is held */ -+static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) -+{ -+ struct lo_map_elem *elem; -+ -+ elem = lo_map_alloc_elem(&lo_data(req)->fd_map); -+ if (!elem) { -+ return -1; -+ } -+ -+ elem->fd = fd; -+ return elem - lo_data(req)->fd_map.elems; -+} -+ -+/* Assumes lo->mutex is held */ - static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) - { - struct lo_map_elem *elem; -@@ -350,6 +366,22 @@ static int utimensat_empty_nofollow(struct lo_inode *inode, - return utimensat(AT_FDCWD, procname, tv, 0); - } - -+static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) -+{ -+ struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; -+ -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->fd_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ -+ if (!elem) { -+ return -1; -+ } -+ -+ return elem->fd; -+} -+ - static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - int valid, struct fuse_file_info *fi) - { -@@ -358,6 +390,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - struct lo_inode *inode; - int ifd; - int res; -+ int fd; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -367,9 +400,14 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - - ifd = inode->fd; - -+ /* If fi->fh is invalid we'll report EBADF later */ -+ if (fi) { -+ fd = lo_fi_fd(req, fi); -+ } -+ - if (valid & FUSE_SET_ATTR_MODE) { - if (fi) { -- res = fchmod(fi->fh, attr->st_mode); -+ res = fchmod(fd, attr->st_mode); - } else { - sprintf(procname, "/proc/self/fd/%i", ifd); - res = chmod(procname, attr->st_mode); -@@ -389,7 +427,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - if (valid & FUSE_SET_ATTR_SIZE) { - if (fi) { -- res = ftruncate(fi->fh, attr->st_size); -+ res = ftruncate(fd, attr->st_size); - } else { - sprintf(procname, "/proc/self/fd/%i", ifd); - res = truncate(procname, attr->st_size); -@@ -419,7 +457,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - - if (fi) { -- res = futimens(fi->fh, tv); -+ res = futimens(fd, tv); - } else { - res = utimensat_empty_nofollow(inode, tv); - } -@@ -1096,7 +1134,18 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - lo_restore_cred(&old); - - if (!err) { -- fi->fh = fd; -+ ssize_t fh; -+ -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_fd_mapping(req, fd); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ close(fd); -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ -+ fi->fh = fh; - err = lo_do_lookup(req, parent, name, &e); - } - if (lo->cache == CACHE_NEVER) { -@@ -1140,6 +1189,7 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { - int fd; -+ ssize_t fh; - char buf[64]; - struct lo_data *lo = lo_data(req); - -@@ -1175,7 +1225,16 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - return (void)fuse_reply_err(req, errno); - } - -- fi->fh = fd; -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_fd_mapping(req, fd); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ close(fd); -+ fuse_reply_err(req, ENOMEM); -+ return; -+ } -+ -+ fi->fh = fh; - if (lo->cache == CACHE_NEVER) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { -@@ -1187,9 +1246,18 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - static void lo_release(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { -+ struct lo_data *lo = lo_data(req); -+ int fd; -+ - (void)ino; - -- close(fi->fh); -+ fd = lo_fi_fd(req, fi); -+ -+ pthread_mutex_lock(&lo->mutex); -+ lo_map_remove(&lo->fd_map, fi->fh); -+ pthread_mutex_unlock(&lo->mutex); -+ -+ close(fd); - fuse_reply_err(req, 0); - } - -@@ -1197,7 +1265,7 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { - int res; - (void)ino; -- res = close(dup(fi->fh)); -+ res = close(dup(lo_fi_fd(req, fi))); - fuse_reply_err(req, res == -1 ? errno : 0); - } - -@@ -1224,7 +1292,7 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - return (void)fuse_reply_err(req, errno); - } - } else { -- fd = fi->fh; -+ fd = lo_fi_fd(req, fi); - } - - if (datasync) { -@@ -1251,7 +1319,7 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, - } - - buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- buf.buf[0].fd = fi->fh; -+ buf.buf[0].fd = lo_fi_fd(req, fi); - buf.buf[0].pos = offset; - - fuse_reply_data(req, &buf); -@@ -1266,7 +1334,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); - - out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; -- out_buf.buf[0].fd = fi->fh; -+ out_buf.buf[0].fd = lo_fi_fd(req, fi); - out_buf.buf[0].pos = off; - - if (lo_debug(req)) { -@@ -1303,7 +1371,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - (void)ino; - - #ifdef CONFIG_FALLOCATE -- err = fallocate(fi->fh, mode, offset, length); -+ err = fallocate(lo_fi_fd(req, fi), mode, offset, length); - if (err < 0) { - err = errno; - } -@@ -1314,7 +1382,7 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - return; - } - -- err = posix_fallocate(fi->fh, offset, length); -+ err = posix_fallocate(lo_fi_fd(req, fi), offset, length); - #endif - - fuse_reply_err(req, err); -@@ -1326,7 +1394,7 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - int res; - (void)ino; - -- res = flock(fi->fh, op); -+ res = flock(lo_fi_fd(req, fi), op); - - fuse_reply_err(req, res == -1 ? errno : 0); - } -@@ -1551,17 +1619,19 @@ static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, - off_t off_out, struct fuse_file_info *fi_out, - size_t len, int flags) - { -+ int in_fd, out_fd; - ssize_t res; - -- if (lo_debug(req)) -- fuse_log(FUSE_LOG_DEBUG, -- "lo_copy_file_range(ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, ino=%" PRIu64 "/fd=%lu, " -- "off=%lu, size=%zd, flags=0x%x)\n", -- ino_in, fi_in->fh, off_in, ino_out, fi_out->fh, off_out, len, -- flags); -+ in_fd = lo_fi_fd(req, fi_in); -+ out_fd = lo_fi_fd(req, fi_out); -+ -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " -+ "off=%lu, ino=%" PRIu64 "/fd=%d, " -+ "off=%lu, size=%zd, flags=0x%x)\n", -+ ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags); - -- res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len, flags); -+ res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); - if (res < 0) { - fuse_reply_err(req, -errno); - } else { -@@ -1576,7 +1646,7 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, - off_t res; - - (void)ino; -- res = lseek(fi->fh, off, whence); -+ res = lseek(lo_fi_fd(req, fi), off, whence); - if (res != -1) { - fuse_reply_lseek(req, res); - } else { -@@ -1661,6 +1731,7 @@ int main(int argc, char *argv[]) - root_elem->inode = &lo.root; - - lo_map_init(&lo.dirp_map); -+ lo_map_init(&lo.fd_map); - - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; -@@ -1758,6 +1829,7 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ lo_map_destroy(&lo.fd_map); - lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch b/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch deleted file mode 100644 index ba8b730..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch +++ /dev/null @@ -1,395 +0,0 @@ -From d81396cc3d9815730903b0755c9d2e67d6954d54 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:14 +0100 -Subject: [PATCH 043/116] virtiofsd: passthrough_ll: add ino_map to hide - lo_inode pointers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-40-dgilbert@redhat.com> -Patchwork-id: 93493 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 039/112] virtiofsd: passthrough_ll: add ino_map to hide lo_inode pointers -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Do not expose lo_inode pointers to clients. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 92fb57b83cdbfc4bf53c0c46a3d0bcbc36e64126) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 144 +++++++++++++++++++++++++++++++-------- - 1 file changed, 114 insertions(+), 30 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e83a976..a3ebf74 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -57,8 +57,8 @@ - #include "passthrough_helpers.h" - - /* -- * We are re-using pointers to our `struct lo_inode` and `struct -- * lo_dirp` elements as inodes. This means that we must be able to -+ * We are re-using pointers to our `struct lo_inode` -+ * elements as inodes. This means that we must be able to - * store uintptr_t values in a fuse_ino_t variable. The following - * incantation checks this condition at compile time. - */ -@@ -76,7 +76,7 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { - - struct lo_map_elem { - union { -- /* Element values will go here... */ -+ struct lo_inode *inode; - ssize_t freelist; - }; - bool in_use; -@@ -97,6 +97,7 @@ struct lo_inode { - ino_t ino; - dev_t dev; - uint64_t refcount; /* protected by lo->mutex */ -+ fuse_ino_t fuse_ino; - }; - - struct lo_cred { -@@ -121,6 +122,7 @@ struct lo_data { - int cache; - int timeout_set; - struct lo_inode root; /* protected by lo->mutex */ -+ struct lo_map ino_map; /* protected by lo->mutex */ - }; - - static const struct fuse_opt lo_opts[] = { -@@ -145,14 +147,14 @@ static struct lo_data *lo_data(fuse_req_t req) - return (struct lo_data *)fuse_req_userdata(req); - } - --__attribute__((unused)) static void lo_map_init(struct lo_map *map) -+static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; - map->nelems = 0; - map->freelist = -1; - } - --__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) -+static void lo_map_destroy(struct lo_map *map) - { - free(map->elems); - } -@@ -183,8 +185,7 @@ static int lo_map_grow(struct lo_map *map, size_t new_nelems) - return 1; - } - --__attribute__((unused)) static struct lo_map_elem * --lo_map_alloc_elem(struct lo_map *map) -+static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) - { - struct lo_map_elem *elem; - -@@ -200,8 +201,7 @@ lo_map_alloc_elem(struct lo_map *map) - return elem; - } - --__attribute__((unused)) static struct lo_map_elem * --lo_map_reserve(struct lo_map *map, size_t key) -+static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) - { - ssize_t *prev; - -@@ -222,8 +222,7 @@ lo_map_reserve(struct lo_map *map, size_t key) - return NULL; - } - --__attribute__((unused)) static struct lo_map_elem * --lo_map_get(struct lo_map *map, size_t key) -+static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) - { - if (key >= map->nelems) { - return NULL; -@@ -234,8 +233,7 @@ lo_map_get(struct lo_map *map, size_t key) - return &map->elems[key]; - } - --__attribute__((unused)) static void lo_map_remove(struct lo_map *map, -- size_t key) -+static void lo_map_remove(struct lo_map *map, size_t key) - { - struct lo_map_elem *elem; - -@@ -254,18 +252,40 @@ __attribute__((unused)) static void lo_map_remove(struct lo_map *map, - map->freelist = key; - } - -+/* Assumes lo->mutex is held */ -+static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) -+{ -+ struct lo_map_elem *elem; -+ -+ elem = lo_map_alloc_elem(&lo_data(req)->ino_map); -+ if (!elem) { -+ return -1; -+ } -+ -+ elem->inode = inode; -+ return elem - lo_data(req)->ino_map.elems; -+} -+ - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { -- if (ino == FUSE_ROOT_ID) { -- return &lo_data(req)->root; -- } else { -- return (struct lo_inode *)(uintptr_t)ino; -+ struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; -+ -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->ino_map, ino); -+ pthread_mutex_unlock(&lo->mutex); -+ -+ if (!elem) { -+ return NULL; - } -+ -+ return elem->inode; - } - - static int lo_fd(fuse_req_t req, fuse_ino_t ino) - { -- return lo_inode(req, ino)->fd; -+ struct lo_inode *inode = lo_inode(req, ino); -+ return inode ? inode->fd : -1; - } - - static bool lo_debug(fuse_req_t req) -@@ -337,10 +357,18 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - { - int saverr; - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -- int ifd = inode->fd; -+ struct lo_inode *inode; -+ int ifd; - int res; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ -+ ifd = inode->fd; -+ - if (valid & FUSE_SET_ATTR_MODE) { - if (fi) { - res = fchmod(fi->fh, attr->st_mode); -@@ -470,6 +498,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - inode->dev = e->attr.st_dev; - - pthread_mutex_lock(&lo->mutex); -+ inode->fuse_ino = lo_add_inode_mapping(req, inode); - prev = &lo->root; - next = prev->next; - next->prev = inode; -@@ -478,7 +507,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - prev->next = inode; - pthread_mutex_unlock(&lo->mutex); - } -- e->ino = (uintptr_t)inode; -+ e->ino = inode->fuse_ino; - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -@@ -582,10 +611,16 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - { - int res; - int saverr; -- struct lo_inode *dir = lo_inode(req, parent); -+ struct lo_inode *dir; - struct fuse_entry_param e; - struct lo_cred old = {}; - -+ dir = lo_inode(req, parent); -+ if (!dir) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOMEM; - - saverr = lo_change_cred(req, &old); -@@ -663,10 +698,16 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - { - int res; - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - struct fuse_entry_param e; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - memset(&e, 0, sizeof(struct fuse_entry_param)); - e.attr_timeout = lo->timeout; - e.entry_timeout = lo->timeout; -@@ -684,7 +725,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - pthread_mutex_lock(&lo->mutex); - inode->refcount++; - pthread_mutex_unlock(&lo->mutex); -- e.ino = (uintptr_t)inode; -+ e.ino = inode->fuse_ino; - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -@@ -750,10 +791,10 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - next->prev = prev; - prev->next = next; - -+ lo_map_remove(&lo->ino_map, inode->fuse_ino); - pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); -- - } else { - pthread_mutex_unlock(&lo->mutex); - } -@@ -762,7 +803,12 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) - static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - { - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; -+ -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ return; -+ } - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -@@ -1244,10 +1290,16 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - { - char *value = NULL; - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1306,10 +1358,16 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - { - char *value = NULL; - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1367,10 +1425,16 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - const char *value, size_t size, int flags) - { - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1400,10 +1464,16 @@ out: - static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - { - char procname[64]; -- struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_inode *inode; - ssize_t ret; - int saverr; - -+ inode = lo_inode(req, ino); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } -+ - saverr = ENOSYS; - if (!lo_data(req)->xattr) { - goto out; -@@ -1522,6 +1592,7 @@ int main(int argc, char *argv[]) - struct fuse_session *se; - struct fuse_cmdline_opts opts; - struct lo_data lo = { .debug = 0, .writeback = 0 }; -+ struct lo_map_elem *root_elem; - int ret = -1; - - /* Don't mask creation mode, kernel already did that */ -@@ -1530,8 +1601,19 @@ int main(int argc, char *argv[]) - pthread_mutex_init(&lo.mutex, NULL); - lo.root.next = lo.root.prev = &lo.root; - lo.root.fd = -1; -+ lo.root.fuse_ino = FUSE_ROOT_ID; - lo.cache = CACHE_NORMAL; - -+ /* -+ * Set up the ino map like this: -+ * [0] Reserved (will not be used) -+ * [1] Root inode -+ */ -+ lo_map_init(&lo.ino_map); -+ lo_map_reserve(&lo.ino_map, 0)->in_use = false; -+ root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); -+ root_elem->inode = &lo.root; -+ - if (fuse_parse_cmdline(&args, &opts) != 0) { - return 1; - } -@@ -1628,6 +1710,8 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ lo_map_destroy(&lo.ino_map); -+ - if (lo.root.fd >= 0) { - close(lo.root.fd); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch b/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch deleted file mode 100644 index 4751f95..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch +++ /dev/null @@ -1,182 +0,0 @@ -From d56651e227bae83ee0cceb12bd91e3e9f6045ab3 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:13 +0100 -Subject: [PATCH 042/116] virtiofsd: passthrough_ll: add lo_map for ino/fh - indirection -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-39-dgilbert@redhat.com> -Patchwork-id: 93492 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 038/112] virtiofsd: passthrough_ll: add lo_map for ino/fh indirection -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -A layer of indirection is needed because passthrough_ll cannot expose -pointers or file descriptor numbers to untrusted clients. Malicious -clients could send invalid pointers or file descriptors in order to -crash or exploit the file system daemon. - -lo_map provides an integer key->value mapping. This will be used for -ino and fh fields in the patches that follow. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 25c135727b08dca90f00094e522a69170b13dfac) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 124 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 124 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 5e06179..e83a976 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -74,6 +74,21 @@ struct _uintptr_to_must_hold_fuse_ino_t_dummy_struct { - }; - #endif - -+struct lo_map_elem { -+ union { -+ /* Element values will go here... */ -+ ssize_t freelist; -+ }; -+ bool in_use; -+}; -+ -+/* Maps FUSE fh or ino values to internal objects */ -+struct lo_map { -+ struct lo_map_elem *elems; -+ size_t nelems; -+ ssize_t freelist; -+}; -+ - struct lo_inode { - struct lo_inode *next; /* protected by lo->mutex */ - struct lo_inode *prev; /* protected by lo->mutex */ -@@ -130,6 +145,115 @@ static struct lo_data *lo_data(fuse_req_t req) - return (struct lo_data *)fuse_req_userdata(req); - } - -+__attribute__((unused)) static void lo_map_init(struct lo_map *map) -+{ -+ map->elems = NULL; -+ map->nelems = 0; -+ map->freelist = -1; -+} -+ -+__attribute__((unused)) static void lo_map_destroy(struct lo_map *map) -+{ -+ free(map->elems); -+} -+ -+static int lo_map_grow(struct lo_map *map, size_t new_nelems) -+{ -+ struct lo_map_elem *new_elems; -+ size_t i; -+ -+ if (new_nelems <= map->nelems) { -+ return 1; -+ } -+ -+ new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems); -+ if (!new_elems) { -+ return 0; -+ } -+ -+ for (i = map->nelems; i < new_nelems; i++) { -+ new_elems[i].freelist = i + 1; -+ new_elems[i].in_use = false; -+ } -+ new_elems[new_nelems - 1].freelist = -1; -+ -+ map->elems = new_elems; -+ map->freelist = map->nelems; -+ map->nelems = new_nelems; -+ return 1; -+} -+ -+__attribute__((unused)) static struct lo_map_elem * -+lo_map_alloc_elem(struct lo_map *map) -+{ -+ struct lo_map_elem *elem; -+ -+ if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { -+ return NULL; -+ } -+ -+ elem = &map->elems[map->freelist]; -+ map->freelist = elem->freelist; -+ -+ elem->in_use = true; -+ -+ return elem; -+} -+ -+__attribute__((unused)) static struct lo_map_elem * -+lo_map_reserve(struct lo_map *map, size_t key) -+{ -+ ssize_t *prev; -+ -+ if (!lo_map_grow(map, key + 1)) { -+ return NULL; -+ } -+ -+ for (prev = &map->freelist; *prev != -1; -+ prev = &map->elems[*prev].freelist) { -+ if (*prev == key) { -+ struct lo_map_elem *elem = &map->elems[key]; -+ -+ *prev = elem->freelist; -+ elem->in_use = true; -+ return elem; -+ } -+ } -+ return NULL; -+} -+ -+__attribute__((unused)) static struct lo_map_elem * -+lo_map_get(struct lo_map *map, size_t key) -+{ -+ if (key >= map->nelems) { -+ return NULL; -+ } -+ if (!map->elems[key].in_use) { -+ return NULL; -+ } -+ return &map->elems[key]; -+} -+ -+__attribute__((unused)) static void lo_map_remove(struct lo_map *map, -+ size_t key) -+{ -+ struct lo_map_elem *elem; -+ -+ if (key >= map->nelems) { -+ return; -+ } -+ -+ elem = &map->elems[key]; -+ if (!elem->in_use) { -+ return; -+ } -+ -+ elem->in_use = false; -+ -+ elem->freelist = map->freelist; -+ map->freelist = key; -+} -+ - static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) - { - if (ino == FUSE_ROOT_ID) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch b/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch deleted file mode 100644 index a3f7970..0000000 --- a/kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 86b4f2865f2ebd7e6b3d85beb66a9390eb46eb96 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:45 +0100 -Subject: [PATCH 074/116] virtiofsd: passthrough_ll: add renameat2 support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-71-dgilbert@redhat.com> -Patchwork-id: 93531 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 070/112] virtiofsd: passthrough_ll: add renameat2 support -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f0ab7d6f78a7d3c1c19fd81a91c9b1199f56c4f6) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 98114a3..18d69ab 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1099,7 +1099,17 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - if (flags) { -+#ifndef SYS_renameat2 - fuse_reply_err(req, EINVAL); -+#else -+ res = syscall(SYS_renameat2, lo_fd(req, parent), name, -+ lo_fd(req, newparent), newname, flags); -+ if (res == -1 && errno == ENOSYS) { -+ fuse_reply_err(req, EINVAL); -+ } else { -+ fuse_reply_err(req, res == -1 ? errno : 0); -+ } -+#endif - return; - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch b/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch deleted file mode 100644 index dc87ef2..0000000 --- a/kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 079199c53f483f0051f994b195ebb595aec76a39 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:51 +0100 -Subject: [PATCH 080/116] virtiofsd: passthrough_ll: clean up cache related - options -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-77-dgilbert@redhat.com> -Patchwork-id: 93530 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 076/112] virtiofsd: passthrough_ll: clean up cache related options -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - - - Rename "cache=never" to "cache=none" to match 9p's similar option. - - - Rename CACHE_NORMAL constant to CACHE_AUTO to match the "cache=auto" - option. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 230e777b5e250759ee0480fcc0e9ccfa2b082fba) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 5 ++++- - tools/virtiofsd/passthrough_ll.c | 20 ++++++++++---------- - 2 files changed, 14 insertions(+), 11 deletions(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 14f5d70..5672024 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -145,6 +145,9 @@ void fuse_cmdline_help(void) - " --syslog log to syslog (default stderr)\n" - " -f foreground operation\n" - " --daemonize run in background\n" -+ " -o cache= cache mode. could be one of \"auto, " -+ "always, none\"\n" -+ " default: auto\n" - " -o log_level= log level, default to \"info\"\n" - " level could be one of \"debug, " - "info, warn, err\"\n" -@@ -156,7 +159,7 @@ void fuse_cmdline_help(void) - " -o readdirplus|no_readdirplus\n" - " enable/disable readirplus\n" - " default: readdirplus except with " -- "cache=never\n" -+ "cache=none\n" - ); - } - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 9e7191e..b40f287 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -101,8 +101,8 @@ struct lo_cred { - }; - - enum { -- CACHE_NEVER, -- CACHE_NORMAL, -+ CACHE_NONE, -+ CACHE_AUTO, - CACHE_ALWAYS, - }; - -@@ -138,8 +138,8 @@ static const struct fuse_opt lo_opts[] = { - { "no_xattr", offsetof(struct lo_data, xattr), 0 }, - { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, - { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, -- { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER }, -- { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, -+ { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, -+ { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, - { "norace", offsetof(struct lo_data, norace), 1 }, - { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, -@@ -482,7 +482,7 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -- if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || -+ if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || - lo->readdirplus_clear) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); - conn->want &= ~FUSE_CAP_READDIRPLUS; -@@ -1493,7 +1493,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - fi->fh = fh; - err = lo_do_lookup(req, parent, name, &e); - } -- if (lo->cache == CACHE_NEVER) { -+ if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; -@@ -1578,7 +1578,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - } - - fi->fh = fh; -- if (lo->cache == CACHE_NEVER) { -+ if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; -@@ -2395,7 +2395,7 @@ int main(int argc, char *argv[]) - lo.root.next = lo.root.prev = &lo.root; - lo.root.fd = -1; - lo.root.fuse_ino = FUSE_ROOT_ID; -- lo.cache = CACHE_NORMAL; -+ lo.cache = CACHE_AUTO; - - /* - * Set up the ino map like this: -@@ -2470,11 +2470,11 @@ int main(int argc, char *argv[]) - } - if (!lo.timeout_set) { - switch (lo.cache) { -- case CACHE_NEVER: -+ case CACHE_NONE: - lo.timeout = 0.0; - break; - -- case CACHE_NORMAL: -+ case CACHE_AUTO: - lo.timeout = 1.0; - break; - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch b/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch deleted file mode 100644 index c55eead..0000000 --- a/kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch +++ /dev/null @@ -1,154 +0,0 @@ -From f93ea308351cbe2630d7ecf637c3b69894d84a11 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 3 Mar 2020 18:43:13 +0000 -Subject: [PATCH 17/18] virtiofsd: passthrough_ll: cleanup getxattr/listxattr -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200303184314.155564-7-dgilbert@redhat.com> -Patchwork-id: 94125 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/7] virtiofsd: passthrough_ll: cleanup getxattr/listxattr -Bugzilla: 1797064 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Ján Tomko - -From: Misono Tomohiro - -This is a cleanup patch to simplify the following xattr fix and -there is no functional changes. - -- Move memory allocation to head of the function -- Unify fgetxattr/flistxattr call for both size == 0 and - size != 0 case -- Remove redundant lo_inode_put call in error path - (Note: second call is ignored now since @inode is already NULL) - -Signed-off-by: Misono Tomohiro -Message-Id: <20200227055927.24566-2-misono.tomohiro@jp.fujitsu.com> -Acked-by: Vivek Goyal -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 16e15a73089102c3d8846792d514e769300fcc3c) -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 54 ++++++++++++++++------------------------ - 1 file changed, 22 insertions(+), 32 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c635fc8..50c7273 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -2199,34 +2199,30 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ } -+ - sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDONLY); - if (fd < 0) { - goto out_err; - } - -+ ret = fgetxattr(fd, name, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } - if (size) { -- value = malloc(size); -- if (!value) { -- goto out_err; -- } -- -- ret = fgetxattr(fd, name, value, size); -- if (ret == -1) { -- goto out_err; -- } - saverr = 0; - if (ret == 0) { - goto out; - } -- - fuse_reply_buf(req, value, ret); - } else { -- ret = fgetxattr(fd, name, NULL, 0); -- if (ret == -1) { -- goto out_err; -- } -- - fuse_reply_xattr(req, ret); - } - out_free: -@@ -2242,7 +2238,6 @@ out_free: - out_err: - saverr = errno; - out: -- lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } -@@ -2277,34 +2272,30 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out; - } - -+ if (size) { -+ value = malloc(size); -+ if (!value) { -+ goto out_err; -+ } -+ } -+ - sprintf(procname, "%i", inode->fd); - fd = openat(lo->proc_self_fd, procname, O_RDONLY); - if (fd < 0) { - goto out_err; - } - -+ ret = flistxattr(fd, value, size); -+ if (ret == -1) { -+ goto out_err; -+ } - if (size) { -- value = malloc(size); -- if (!value) { -- goto out_err; -- } -- -- ret = flistxattr(fd, value, size); -- if (ret == -1) { -- goto out_err; -- } - saverr = 0; - if (ret == 0) { - goto out; - } -- - fuse_reply_buf(req, value, ret); - } else { -- ret = flistxattr(fd, NULL, 0); -- if (ret == -1) { -- goto out_err; -- } -- - fuse_reply_xattr(req, ret); - } - out_free: -@@ -2320,7 +2311,6 @@ out_free: - out_err: - saverr = errno; - out: -- lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - goto out_free; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch b/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch deleted file mode 100644 index 98d00fc..0000000 --- a/kvm-virtiofsd-passthrough_ll-control-readdirplus.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 0f1d456fad4ba6a696eff8976b9fe8a0f251e1b5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:47 +0100 -Subject: [PATCH 076/116] virtiofsd: passthrough_ll: control readdirplus -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-73-dgilbert@redhat.com> -Patchwork-id: 93524 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 072/112] virtiofsd: passthrough_ll: control readdirplus -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 59aef494be2d8d91055ff3f3a8eb13d9f32873d8) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/helper.c | 4 ++++ - tools/virtiofsd/passthrough_ll.c | 7 ++++++- - 2 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 6d50a46..14f5d70 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -153,6 +153,10 @@ void fuse_cmdline_help(void) - " allowed (default: 10)\n" - " -o norace disable racy fallback\n" - " default: false\n" -+ " -o readdirplus|no_readdirplus\n" -+ " enable/disable readirplus\n" -+ " default: readdirplus except with " -+ "cache=never\n" - ); - } - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 6480c51..8b1784f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -117,6 +117,8 @@ struct lo_data { - double timeout; - int cache; - int timeout_set; -+ int readdirplus_set; -+ int readdirplus_clear; - struct lo_inode root; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ -@@ -140,6 +142,8 @@ static const struct fuse_opt lo_opts[] = { - { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL }, - { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, - { "norace", offsetof(struct lo_data, norace), 1 }, -+ { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, -+ { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, - FUSE_OPT_END - }; - static bool use_syslog = false; -@@ -478,7 +482,8 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -- if (lo->cache == CACHE_NEVER) { -+ if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) || -+ lo->readdirplus_clear) { - fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); - conn->want &= ~FUSE_CAP_READDIRPLUS; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch b/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch deleted file mode 100644 index 4b02779..0000000 --- a/kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch +++ /dev/null @@ -1,198 +0,0 @@ -From af14ef1dba9356e566c9c7531b8fd23361c2b16d Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:12 +0100 -Subject: [PATCH 041/116] virtiofsd: passthrough_ll: create new files in - caller's context -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-38-dgilbert@redhat.com> -Patchwork-id: 93488 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 037/112] virtiofsd: passthrough_ll: create new files in caller's context -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Vivek Goyal - -We need to create files in the caller's context. Otherwise after -creating a file, the caller might not be able to do file operations on -that file. - -Changed effective uid/gid to caller's uid/gid, create file and then -switch back to uid/gid 0. - -Use syscall(setresuid, ...) otherwise glibc does some magic to change EUID -in all threads, which is not what we want. - -Signed-off-by: Vivek Goyal -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 929cfb7a9a1b101cdfc9ac19807ecab4c81a13e4) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 96 +++++++++++++++++++++++++++++++++++++--- - 1 file changed, 91 insertions(+), 5 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index cd27c09..5e06179 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -50,6 +50,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -83,6 +84,11 @@ struct lo_inode { - uint64_t refcount; /* protected by lo->mutex */ - }; - -+struct lo_cred { -+ uid_t euid; -+ gid_t egid; -+}; -+ - enum { - CACHE_NEVER, - CACHE_NORMAL, -@@ -383,6 +389,69 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - } - } - -+/* -+ * On some archs, setres*id is limited to 2^16 but they -+ * provide setres*id32 variants that allow 2^32. -+ * Others just let setres*id do 2^32 anyway. -+ */ -+#ifdef SYS_setresgid32 -+#define OURSYS_setresgid SYS_setresgid32 -+#else -+#define OURSYS_setresgid SYS_setresgid -+#endif -+ -+#ifdef SYS_setresuid32 -+#define OURSYS_setresuid SYS_setresuid32 -+#else -+#define OURSYS_setresuid SYS_setresuid -+#endif -+ -+/* -+ * Change to uid/gid of caller so that file is created with -+ * ownership of caller. -+ * TODO: What about selinux context? -+ */ -+static int lo_change_cred(fuse_req_t req, struct lo_cred *old) -+{ -+ int res; -+ -+ old->euid = geteuid(); -+ old->egid = getegid(); -+ -+ res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); -+ if (res == -1) { -+ return errno; -+ } -+ -+ res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); -+ if (res == -1) { -+ int errno_save = errno; -+ -+ syscall(OURSYS_setresgid, -1, old->egid, -1); -+ return errno_save; -+ } -+ -+ return 0; -+} -+ -+/* Regain Privileges */ -+static void lo_restore_cred(struct lo_cred *old) -+{ -+ int res; -+ -+ res = syscall(OURSYS_setresuid, -1, old->euid, -1); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); -+ exit(1); -+ } -+ -+ res = syscall(OURSYS_setresgid, -1, old->egid, -1); -+ if (res == -1) { -+ fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); -+ exit(1); -+ } -+} -+ - static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - const char *name, mode_t mode, dev_t rdev, - const char *link) -@@ -391,12 +460,21 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - int saverr; - struct lo_inode *dir = lo_inode(req, parent); - struct fuse_entry_param e; -+ struct lo_cred old = {}; - - saverr = ENOMEM; - -+ saverr = lo_change_cred(req, &old); -+ if (saverr) { -+ goto out; -+ } -+ - res = mknod_wrapper(dir->fd, name, link, mode, rdev); - - saverr = errno; -+ -+ lo_restore_cred(&old); -+ - if (res == -1) { - goto out; - } -@@ -794,26 +872,34 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_data *lo = lo_data(req); - struct fuse_entry_param e; - int err; -+ struct lo_cred old = {}; - - if (lo_debug(req)) { - fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", - parent, name); - } - -+ err = lo_change_cred(req, &old); -+ if (err) { -+ goto out; -+ } -+ - fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, - mode); -- if (fd == -1) { -- return (void)fuse_reply_err(req, errno); -- } -+ err = fd == -1 ? errno : 0; -+ lo_restore_cred(&old); - -- fi->fh = fd; -+ if (!err) { -+ fi->fh = fd; -+ err = lo_do_lookup(req, parent, name, &e); -+ } - if (lo->cache == CACHE_NEVER) { - fi->direct_io = 1; - } else if (lo->cache == CACHE_ALWAYS) { - fi->keep_cache = 1; - } - -- err = lo_do_lookup(req, parent, name, &e); -+out: - if (err) { - fuse_reply_err(req, err); - } else { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch b/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch deleted file mode 100644 index 4a531a3..0000000 --- a/kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch +++ /dev/null @@ -1,50 +0,0 @@ -From bbf92338e5e5eed796d511d2bd3c3686b7d1e5fd Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:46 +0100 -Subject: [PATCH 075/116] virtiofsd: passthrough_ll: disable readdirplus on - cache=never -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-72-dgilbert@redhat.com> -Patchwork-id: 93525 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 071/112] virtiofsd: passthrough_ll: disable readdirplus on cache=never -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -...because the attributes sent in the READDIRPLUS reply would be discarded -anyway. - -Signed-off-by: Miklos Szeredi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit ddcbabcb0ea177be3ec3500726b699c7c26ffd93) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 18d69ab..6480c51 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -478,6 +478,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } -+ if (lo->cache == CACHE_NEVER) { -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); -+ conn->want &= ~FUSE_CAP_READDIRPLUS; -+ } - } - - static void lo_getattr(fuse_req_t req, fuse_ino_t ino, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch b/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch deleted file mode 100644 index 00e11b4..0000000 --- a/kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 5e33269d5fbc4ba4614bab4a6b9e0ef759bebcb7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:10 +0100 -Subject: [PATCH 099/116] virtiofsd: passthrough_ll: fix refcounting on - remove/rename -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-96-dgilbert@redhat.com> -Patchwork-id: 93549 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 095/112] virtiofsd: passthrough_ll: fix refcounting on remove/rename -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9257e514d861afa759c36704e1904d43ca3fec88) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 50 +++++++++++++++++++++++++++++++++++++++- - 1 file changed, 49 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c819b5f..e3a6d6b 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1140,17 +1140,42 @@ out_err: - fuse_reply_err(req, saverr); - } - -+static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, -+ const char *name) -+{ -+ int res; -+ struct stat attr; -+ -+ res = fstatat(lo_fd(req, parent), name, &attr, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (res == -1) { -+ return NULL; -+ } -+ -+ return lo_find(lo_data(req), &attr); -+} -+ - static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; -+ struct lo_inode *inode; -+ struct lo_data *lo = lo_data(req); -+ - if (!is_safe_path_component(name)) { - fuse_reply_err(req, EINVAL); - return; - } - -+ inode = lookup_name(req, parent, name); -+ if (!inode) { -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ - res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); - - fuse_reply_err(req, res == -1 ? errno : 0); -+ unref_inode_lolocked(lo, inode, 1); - } - - static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, -@@ -1158,12 +1183,23 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - unsigned int flags) - { - int res; -+ struct lo_inode *oldinode; -+ struct lo_inode *newinode; -+ struct lo_data *lo = lo_data(req); - - if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { - fuse_reply_err(req, EINVAL); - return; - } - -+ oldinode = lookup_name(req, parent, name); -+ newinode = lookup_name(req, newparent, newname); -+ -+ if (!oldinode) { -+ fuse_reply_err(req, EIO); -+ goto out; -+ } -+ - if (flags) { - #ifndef SYS_renameat2 - fuse_reply_err(req, EINVAL); -@@ -1176,26 +1212,38 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - fuse_reply_err(req, res == -1 ? errno : 0); - } - #endif -- return; -+ goto out; - } - - res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname); - - fuse_reply_err(req, res == -1 ? errno : 0); -+out: -+ unref_inode_lolocked(lo, oldinode, 1); -+ unref_inode_lolocked(lo, newinode, 1); - } - - static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; -+ struct lo_inode *inode; -+ struct lo_data *lo = lo_data(req); - - if (!is_safe_path_component(name)) { - fuse_reply_err(req, EINVAL); - return; - } - -+ inode = lookup_name(req, parent, name); -+ if (!inode) { -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ - res = unlinkat(lo_fd(req, parent), name, 0); - - fuse_reply_err(req, res == -1 ? errno : 0); -+ unref_inode_lolocked(lo, inode, 1); - } - - static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, --- -1.8.3.1 - diff --git a/kvm-virtiofsd-passthrough_ll-use-hashtable.patch b/kvm-virtiofsd-passthrough_ll-use-hashtable.patch deleted file mode 100644 index b0be1f9..0000000 --- a/kvm-virtiofsd-passthrough_ll-use-hashtable.patch +++ /dev/null @@ -1,211 +0,0 @@ -From 44f4434b1305f6ff47b4f63fafcf39bcea9e4ceb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:52 +0100 -Subject: [PATCH 081/116] virtiofsd: passthrough_ll: use hashtable -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-78-dgilbert@redhat.com> -Patchwork-id: 93528 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 077/112] virtiofsd: passthrough_ll: use hashtable -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Improve performance of inode lookup by using a hash table. - -Signed-off-by: Miklos Szeredi -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Liu Bo -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit bfc50a6e06b10b2f9dbaf6c1a89dd523322e016f) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 81 ++++++++++++++++++++++------------------ - 1 file changed, 45 insertions(+), 36 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index b40f287..b176a31 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -84,13 +84,15 @@ struct lo_map { - ssize_t freelist; - }; - -+struct lo_key { -+ ino_t ino; -+ dev_t dev; -+}; -+ - struct lo_inode { -- struct lo_inode *next; /* protected by lo->mutex */ -- struct lo_inode *prev; /* protected by lo->mutex */ - int fd; - bool is_symlink; -- ino_t ino; -- dev_t dev; -+ struct lo_key key; - uint64_t refcount; /* protected by lo->mutex */ - fuse_ino_t fuse_ino; - }; -@@ -119,7 +121,8 @@ struct lo_data { - int timeout_set; - int readdirplus_set; - int readdirplus_clear; -- struct lo_inode root; /* protected by lo->mutex */ -+ struct lo_inode root; -+ GHashTable *inodes; /* protected by lo->mutex */ - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ - struct lo_map fd_map; /* protected by lo->mutex */ -@@ -573,7 +576,7 @@ retry: - } - goto fail_unref; - } -- if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) { -+ if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) { - if (!retries) { - fuse_log(FUSE_LOG_WARNING, - "%s: failed to match last\n", __func__); -@@ -753,19 +756,20 @@ out_err: - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - { - struct lo_inode *p; -- struct lo_inode *ret = NULL; -+ struct lo_key key = { -+ .ino = st->st_ino, -+ .dev = st->st_dev, -+ }; - - pthread_mutex_lock(&lo->mutex); -- for (p = lo->root.next; p != &lo->root; p = p->next) { -- if (p->ino == st->st_ino && p->dev == st->st_dev) { -- assert(p->refcount > 0); -- ret = p; -- ret->refcount++; -- break; -- } -+ p = g_hash_table_lookup(lo->inodes, &key); -+ if (p) { -+ assert(p->refcount > 0); -+ p->refcount++; - } - pthread_mutex_unlock(&lo->mutex); -- return ret; -+ -+ return p; - } - - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -@@ -810,8 +814,6 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - close(newfd); - newfd = -1; - } else { -- struct lo_inode *prev, *next; -- - saverr = ENOMEM; - inode = calloc(1, sizeof(struct lo_inode)); - if (!inode) { -@@ -822,17 +824,12 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - inode->refcount = 1; - inode->fd = newfd; - newfd = -1; -- inode->ino = e->attr.st_ino; -- inode->dev = e->attr.st_dev; -+ inode->key.ino = e->attr.st_ino; -+ inode->key.dev = e->attr.st_dev; - - pthread_mutex_lock(&lo->mutex); - inode->fuse_ino = lo_add_inode_mapping(req, inode); -- prev = &lo->root; -- next = prev->next; -- next->prev = inode; -- inode->next = next; -- inode->prev = prev; -- prev->next = inode; -+ g_hash_table_insert(lo->inodes, &inode->key, inode); - pthread_mutex_unlock(&lo->mutex); - } - e->ino = inode->fuse_ino; -@@ -1162,14 +1159,8 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - assert(inode->refcount >= n); - inode->refcount -= n; - if (!inode->refcount) { -- struct lo_inode *prev, *next; -- -- prev = inode->prev; -- next = inode->next; -- next->prev = prev; -- prev->next = next; -- - lo_map_remove(&lo->ino_map, inode->fuse_ino); -+ g_hash_table_remove(lo->inodes, &inode->key); - pthread_mutex_unlock(&lo->mutex); - close(inode->fd); - free(inode); -@@ -1369,7 +1360,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - /* Hide root's parent directory */ - if (dinode == &lo->root && strcmp(name, "..") == 0) { -- e.attr.st_ino = lo->root.ino; -+ e.attr.st_ino = lo->root.key.ino; - e.attr.st_mode = DT_DIR << 12; - } - -@@ -2370,11 +2361,26 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - - root->is_symlink = false; - root->fd = fd; -- root->ino = stat.st_ino; -- root->dev = stat.st_dev; -+ root->key.ino = stat.st_ino; -+ root->key.dev = stat.st_dev; - root->refcount = 2; - } - -+static guint lo_key_hash(gconstpointer key) -+{ -+ const struct lo_key *lkey = key; -+ -+ return (guint)lkey->ino + (guint)lkey->dev; -+} -+ -+static gboolean lo_key_equal(gconstpointer a, gconstpointer b) -+{ -+ const struct lo_key *la = a; -+ const struct lo_key *lb = b; -+ -+ return la->ino == lb->ino && la->dev == lb->dev; -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2392,7 +2398,7 @@ int main(int argc, char *argv[]) - umask(0); - - pthread_mutex_init(&lo.mutex, NULL); -- lo.root.next = lo.root.prev = &lo.root; -+ lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); - lo.root.fd = -1; - lo.root.fuse_ino = FUSE_ROOT_ID; - lo.cache = CACHE_AUTO; -@@ -2522,6 +2528,9 @@ err_out2: - err_out1: - fuse_opt_free_args(&args); - -+ if (lo.inodes) { -+ g_hash_table_destroy(lo.inodes); -+ } - lo_map_destroy(&lo.fd_map); - lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch b/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch deleted file mode 100644 index 68eb03e..0000000 --- a/kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch +++ /dev/null @@ -1,54 +0,0 @@ -From feb005dfeb15dd5ac5156c994f323ab4c573b1fc Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:24 +0100 -Subject: [PATCH 053/116] virtiofsd: prevent ".." escape in lo_do_lookup() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-50-dgilbert@redhat.com> -Patchwork-id: 93500 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 049/112] virtiofsd: prevent ".." escape in lo_do_lookup() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 854684bc0b3d63eb90b3abdfe471c2e4271ef176) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e375406..79d5966 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -624,12 +624,17 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - int res; - int saverr; - struct lo_data *lo = lo_data(req); -- struct lo_inode *inode; -+ struct lo_inode *inode, *dir = lo_inode(req, parent); - - memset(e, 0, sizeof(*e)); - e->attr_timeout = lo->timeout; - e->entry_timeout = lo->timeout; - -+ /* Do not allow escaping root directory */ -+ if (dir == &lo->root && strcmp(name, "..") == 0) { -+ name = "."; -+ } -+ - newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW); - if (newfd == -1) { - goto out_err; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch b/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch deleted file mode 100644 index 5f97cbf..0000000 --- a/kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 97e232e75bbc0032f4a309d248f383384612eafe Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:25 +0100 -Subject: [PATCH 054/116] virtiofsd: prevent ".." escape in lo_do_readdir() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-51-dgilbert@redhat.com> -Patchwork-id: 93507 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 050/112] virtiofsd: prevent ".." escape in lo_do_readdir() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Construct a fake dirent for the root directory's ".." entry. This hides -the parent directory from the FUSE client. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 752272da2b68a2312f0e11fc5303015a6c3ee1ac) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 36 ++++++++++++++++++++++-------------- - 1 file changed, 22 insertions(+), 14 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 79d5966..e3d65c3 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1149,19 +1149,25 @@ out_err: - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { -+ struct lo_data *lo = lo_data(req); - struct lo_dirp *d; -+ struct lo_inode *dinode; - char *buf = NULL; - char *p; - size_t rem = size; -- int err = ENOMEM; -+ int err = EBADF; - -- (void)ino; -+ dinode = lo_inode(req, ino); -+ if (!dinode) { -+ goto error; -+ } - - d = lo_dirp(req, fi); - if (!d) { - goto error; - } - -+ err = ENOMEM; - buf = calloc(1, size); - if (!buf) { - goto error; -@@ -1192,15 +1198,21 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - } - nextoff = d->entry->d_off; - name = d->entry->d_name; -+ - fuse_ino_t entry_ino = 0; -+ struct fuse_entry_param e = (struct fuse_entry_param){ -+ .attr.st_ino = d->entry->d_ino, -+ .attr.st_mode = d->entry->d_type << 12, -+ }; -+ -+ /* Hide root's parent directory */ -+ if (dinode == &lo->root && strcmp(name, "..") == 0) { -+ e.attr.st_ino = lo->root.ino; -+ e.attr.st_mode = DT_DIR << 12; -+ } -+ - if (plus) { -- struct fuse_entry_param e; -- if (is_dot_or_dotdot(name)) { -- e = (struct fuse_entry_param){ -- .attr.st_ino = d->entry->d_ino, -- .attr.st_mode = d->entry->d_type << 12, -- }; -- } else { -+ if (!is_dot_or_dotdot(name)) { - err = lo_do_lookup(req, ino, name, &e); - if (err) { - goto error; -@@ -1210,11 +1222,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); - } else { -- struct stat st = { -- .st_ino = d->entry->d_ino, -- .st_mode = d->entry->d_type << 12, -- }; -- entsize = fuse_add_direntry(req, p, rem, name, &st, nextoff); -+ entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); - } - if (entsize > rem) { - if (entry_ino != 0) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch b/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch deleted file mode 100644 index be7c120..0000000 --- a/kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 249c02ae54739dc5894ee1b2905bbe8f1e79e909 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:20 +0100 -Subject: [PATCH 109/116] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-106-dgilbert@redhat.com> -Patchwork-id: 93562 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 105/112] virtiofsd: prevent FUSE_INIT/FUSE_DESTROY races -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -When running with multiple threads it can be tricky to handle -FUSE_INIT/FUSE_DESTROY in parallel with other request types or in -parallel with themselves. Serialize FUSE_INIT and FUSE_DESTROY so that -malicious clients cannot trigger race conditions. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Masayoshi Mizuma -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit cdc497c6925be745bc895355bd4674a17a4b2a8b) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_i.h | 1 + - tools/virtiofsd/fuse_lowlevel.c | 18 ++++++++++++++++++ - 2 files changed, 19 insertions(+) - -diff --git a/tools/virtiofsd/fuse_i.h b/tools/virtiofsd/fuse_i.h -index a20854f..1447d86 100644 ---- a/tools/virtiofsd/fuse_i.h -+++ b/tools/virtiofsd/fuse_i.h -@@ -61,6 +61,7 @@ struct fuse_session { - struct fuse_req list; - struct fuse_req interrupts; - pthread_mutex_t lock; -+ pthread_rwlock_t init_rwlock; - int got_destroy; - int broken_splice_nonblock; - uint64_t notify_ctr; -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index dab6a31..79a4031 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2428,6 +2428,19 @@ void fuse_session_process_buf_int(struct fuse_session *se, - req->ctx.pid = in->pid; - req->ch = ch; - -+ /* -+ * INIT and DESTROY requests are serialized, all other request types -+ * run in parallel. This prevents races between FUSE_INIT and ordinary -+ * requests, FUSE_INIT and FUSE_INIT, FUSE_INIT and FUSE_DESTROY, and -+ * FUSE_DESTROY and FUSE_DESTROY. -+ */ -+ if (in->opcode == FUSE_INIT || in->opcode == CUSE_INIT || -+ in->opcode == FUSE_DESTROY) { -+ pthread_rwlock_wrlock(&se->init_rwlock); -+ } else { -+ pthread_rwlock_rdlock(&se->init_rwlock); -+ } -+ - err = EIO; - if (!se->got_init) { - enum fuse_opcode expected; -@@ -2485,10 +2498,13 @@ void fuse_session_process_buf_int(struct fuse_session *se, - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, &iter); - } -+ -+ pthread_rwlock_unlock(&se->init_rwlock); - return; - - reply_err: - fuse_reply_err(req, err); -+ pthread_rwlock_unlock(&se->init_rwlock); - } - - #define LL_OPTION(n, o, v) \ -@@ -2531,6 +2547,7 @@ void fuse_session_destroy(struct fuse_session *se) - se->op.destroy(se->userdata); - } - } -+ pthread_rwlock_destroy(&se->init_rwlock); - pthread_mutex_destroy(&se->lock); - free(se->cuse_data); - if (se->fd != -1) { -@@ -2610,6 +2627,7 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - list_init_req(&se->list); - list_init_req(&se->interrupts); - fuse_mutex_init(&se->lock); -+ pthread_rwlock_init(&se->init_rwlock, NULL); - - memcpy(&se->op, op, op_size); - se->owner = getuid(); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch b/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch deleted file mode 100644 index 8eabede..0000000 --- a/kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 69c6a829f8136a8c95ccdf480f2fd0173d64b6ec Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:05 +0100 -Subject: [PATCH 094/116] virtiofsd: prevent fv_queue_thread() vs virtio_loop() - races -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-91-dgilbert@redhat.com> -Patchwork-id: 93544 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 090/112] virtiofsd: prevent fv_queue_thread() vs virtio_loop() races -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -We call into libvhost-user from the virtqueue handler thread and the -vhost-user message processing thread without a lock. There is nothing -protecting the virtqueue handler thread if the vhost-user message -processing thread changes the virtqueue or memory table while it is -running. - -This patch introduces a read-write lock. Virtqueue handler threads are -readers. The vhost-user message processing thread is a writer. This -will allow concurrency for multiqueue in the future while protecting -against fv_queue_thread() vs virtio_loop() races. - -Note that the critical sections could be made smaller but it would be -more invasive and require libvhost-user changes. Let's start simple and -improve performance later, if necessary. Another option would be an -RCU-style approach with lighter-weight primitives. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit e7b337326d594b71b07cd6dbb332c49c122c80a4) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 34 +++++++++++++++++++++++++++++++++- - 1 file changed, 33 insertions(+), 1 deletion(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index fb8d6d1..f6242f9 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -59,6 +59,18 @@ struct fv_VuDev { - struct fuse_session *se; - - /* -+ * Either handle virtqueues or vhost-user protocol messages. Don't do -+ * both at the same time since that could lead to race conditions if -+ * virtqueues or memory tables change while another thread is accessing -+ * them. -+ * -+ * The assumptions are: -+ * 1. fv_queue_thread() reads/writes to virtqueues and only reads VuDev. -+ * 2. virtio_loop() reads/writes virtqueues and VuDev. -+ */ -+ pthread_rwlock_t vu_dispatch_rwlock; -+ -+ /* - * The following pair of fields are only accessed in the main - * virtio_loop - */ -@@ -415,6 +427,8 @@ static void *fv_queue_thread(void *opaque) - qi->qidx, qi->kick_fd); - while (1) { - struct pollfd pf[2]; -+ int ret; -+ - pf[0].fd = qi->kick_fd; - pf[0].events = POLLIN; - pf[0].revents = 0; -@@ -461,6 +475,9 @@ static void *fv_queue_thread(void *opaque) - fuse_log(FUSE_LOG_ERR, "Eventfd_read for queue: %m\n"); - break; - } -+ /* Mutual exclusion with virtio_loop() */ -+ ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ assert(ret == 0); /* there is no possible error case */ - /* out is from guest, in is too guest */ - unsigned int in_bytes, out_bytes; - vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); -@@ -469,6 +486,7 @@ static void *fv_queue_thread(void *opaque) - "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", - __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); - -+ - while (1) { - bool allocated_bufv = false; - struct fuse_bufvec bufv; -@@ -597,6 +615,8 @@ static void *fv_queue_thread(void *opaque) - free(elem); - elem = NULL; - } -+ -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); - } - out: - pthread_mutex_destroy(&ch.lock); -@@ -711,6 +731,8 @@ int virtio_loop(struct fuse_session *se) - - while (!fuse_session_exited(se)) { - struct pollfd pf[1]; -+ bool ok; -+ int ret; - pf[0].fd = se->vu_socketfd; - pf[0].events = POLLIN; - pf[0].revents = 0; -@@ -735,7 +757,15 @@ int virtio_loop(struct fuse_session *se) - } - assert(pf[0].revents & POLLIN); - fuse_log(FUSE_LOG_DEBUG, "%s: Got VU event\n", __func__); -- if (!vu_dispatch(&se->virtio_dev->dev)) { -+ /* Mutual exclusion with fv_queue_thread() */ -+ ret = pthread_rwlock_wrlock(&se->virtio_dev->vu_dispatch_rwlock); -+ assert(ret == 0); /* there is no possible error case */ -+ -+ ok = vu_dispatch(&se->virtio_dev->dev); -+ -+ pthread_rwlock_unlock(&se->virtio_dev->vu_dispatch_rwlock); -+ -+ if (!ok) { - fuse_log(FUSE_LOG_ERR, "%s: vu_dispatch failed\n", __func__); - break; - } -@@ -877,6 +907,7 @@ int virtio_session_mount(struct fuse_session *se) - - se->vu_socketfd = data_sock; - se->virtio_dev->se = se; -+ pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL); - vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch, - fv_remove_watch, &fv_iface); - -@@ -892,6 +923,7 @@ void virtio_session_close(struct fuse_session *se) - } - - free(se->virtio_dev->qi); -+ pthread_rwlock_destroy(&se->virtio_dev->vu_dispatch_rwlock); - free(se->virtio_dev); - se->virtio_dev = NULL; - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch b/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch deleted file mode 100644 index acafa41..0000000 --- a/kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 2e58ff6978f8433fc8672d2e357c6f0f5f36d24f Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:07 +0100 -Subject: [PATCH 096/116] virtiofsd: prevent races with lo_dirp_put() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-93-dgilbert@redhat.com> -Patchwork-id: 93546 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 092/112] virtiofsd: prevent races with lo_dirp_put() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Introduce lo_dirp_put() so that FUSE_RELEASEDIR does not cause -use-after-free races with other threads that are accessing lo_dirp. - -Also make lo_releasedir() atomic to prevent FUSE_RELEASEDIR racing with -itself. This prevents double-frees. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit acefdde73b403576a241ebd8dbe8431ddc0d9442) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 41 ++++++++++++++++++++++++++++++++++------ - 1 file changed, 35 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 690edbc..2d703b5 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1284,11 +1284,28 @@ static void lo_readlink(fuse_req_t req, fuse_ino_t ino) - } - - struct lo_dirp { -+ gint refcount; - DIR *dp; - struct dirent *entry; - off_t offset; - }; - -+static void lo_dirp_put(struct lo_dirp **dp) -+{ -+ struct lo_dirp *d = *dp; -+ -+ if (!d) { -+ return; -+ } -+ *dp = NULL; -+ -+ if (g_atomic_int_dec_and_test(&d->refcount)) { -+ closedir(d->dp); -+ free(d); -+ } -+} -+ -+/* Call lo_dirp_put() on the return value when no longer needed */ - static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) - { - struct lo_data *lo = lo_data(req); -@@ -1296,6 +1313,9 @@ static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) - - pthread_mutex_lock(&lo->mutex); - elem = lo_map_get(&lo->dirp_map, fi->fh); -+ if (elem) { -+ g_atomic_int_inc(&elem->dirp->refcount); -+ } - pthread_mutex_unlock(&lo->mutex); - if (!elem) { - return NULL; -@@ -1331,6 +1351,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino, - d->offset = 0; - d->entry = NULL; - -+ g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ - pthread_mutex_lock(&lo->mutex); - fh = lo_add_dirp_mapping(req, d); - pthread_mutex_unlock(&lo->mutex); -@@ -1364,7 +1385,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { - struct lo_data *lo = lo_data(req); -- struct lo_dirp *d; -+ struct lo_dirp *d = NULL; - struct lo_inode *dinode; - char *buf = NULL; - char *p; -@@ -1454,6 +1475,8 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - err = 0; - error: -+ lo_dirp_put(&d); -+ - /* - * If there's an error, we can only signal it if we haven't stored - * any entries yet - otherwise we'd end up with wrong lookup -@@ -1484,22 +1507,25 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, - struct fuse_file_info *fi) - { - struct lo_data *lo = lo_data(req); -+ struct lo_map_elem *elem; - struct lo_dirp *d; - - (void)ino; - -- d = lo_dirp(req, fi); -- if (!d) { -+ pthread_mutex_lock(&lo->mutex); -+ elem = lo_map_get(&lo->dirp_map, fi->fh); -+ if (!elem) { -+ pthread_mutex_unlock(&lo->mutex); - fuse_reply_err(req, EBADF); - return; - } - -- pthread_mutex_lock(&lo->mutex); -+ d = elem->dirp; - lo_map_remove(&lo->dirp_map, fi->fh); - pthread_mutex_unlock(&lo->mutex); - -- closedir(d->dp); -- free(d); -+ lo_dirp_put(&d); /* paired with lo_opendir() */ -+ - fuse_reply_err(req, 0); - } - -@@ -1710,6 +1736,9 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - } else { - res = fsync(fd); - } -+ -+ lo_dirp_put(&d); -+ - fuse_reply_err(req, res == -1 ? errno : 0); - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch b/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch deleted file mode 100644 index 056559d..0000000 --- a/kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch +++ /dev/null @@ -1,469 +0,0 @@ -From 5c9bbd00e8f8c944d9e8e22e7d1cf08cb8fddd6b Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:37 +0100 -Subject: [PATCH 066/116] virtiofsd: print log only when priority is high - enough -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-63-dgilbert@redhat.com> -Patchwork-id: 93518 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 062/112] virtiofsd: print log only when priority is high enough -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eryu Guan - -Introduce "-o log_level=" command line option to specify current log -level (priority), valid values are "debug info warn err", e.g. - - ./virtiofsd -o log_level=debug ... - -So only log priority higher than "debug" will be printed to -stderr/syslog. And the default level is info. - -The "-o debug"/"-d" options are kept, and imply debug log level. - -Signed-off-by: Eryu Guan -dgilbert: Reworked for libfuse's log_func -Signed-off-by: Dr. David Alan Gilbert -with fix by: -Signed-off-by: Xiao Yang -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d240314a1a18a1d914af1b5763fe8c9a572e6409) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 75 ++++++++++--------------- - tools/virtiofsd/fuse_lowlevel.h | 1 + - tools/virtiofsd/helper.c | 8 ++- - tools/virtiofsd/passthrough_ll.c | 118 ++++++++++++++++----------------------- - 4 files changed, 87 insertions(+), 115 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 6ceb33d..a7a1968 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -158,19 +158,17 @@ static int fuse_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct fuse_out_header *out = iov[0].iov_base; - - out->len = iov_length(iov, count); -- if (se->debug) { -- if (out->unique == 0) { -- fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, -- out->len); -- } else if (out->error) { -- fuse_log(FUSE_LOG_DEBUG, -- " unique: %llu, error: %i (%s), outsize: %i\n", -- (unsigned long long)out->unique, out->error, -- strerror(-out->error), out->len); -- } else { -- fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", -- (unsigned long long)out->unique, out->len); -- } -+ if (out->unique == 0) { -+ fuse_log(FUSE_LOG_DEBUG, "NOTIFY: code=%d length=%u\n", out->error, -+ out->len); -+ } else if (out->error) { -+ fuse_log(FUSE_LOG_DEBUG, -+ " unique: %llu, error: %i (%s), outsize: %i\n", -+ (unsigned long long)out->unique, out->error, -+ strerror(-out->error), out->len); -+ } else { -+ fuse_log(FUSE_LOG_DEBUG, " unique: %llu, success, outsize: %i\n", -+ (unsigned long long)out->unique, out->len); - } - - if (fuse_lowlevel_is_virtio(se)) { -@@ -1662,10 +1660,8 @@ static void do_interrupt(fuse_req_t req, fuse_ino_t nodeid, - return; - } - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -- (unsigned long long)arg->unique); -- } -+ fuse_log(FUSE_LOG_DEBUG, "INTERRUPT: %llu\n", -+ (unsigned long long)arg->unique); - - req->u.i.unique = arg->unique; - -@@ -1901,13 +1897,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - } - } - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -- if (arg->major == 7 && arg->minor >= 6) { -- fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -- fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", -- arg->max_readahead); -- } -+ fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); -+ if (arg->major == 7 && arg->minor >= 6) { -+ fuse_log(FUSE_LOG_DEBUG, "flags=0x%08x\n", arg->flags); -+ fuse_log(FUSE_LOG_DEBUG, "max_readahead=0x%08x\n", arg->max_readahead); - } - se->conn.proto_major = arg->major; - se->conn.proto_minor = arg->minor; -@@ -2116,19 +2109,14 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, - outarg.congestion_threshold = se->conn.congestion_threshold; - outarg.time_gran = se->conn.time_gran; - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, -- outarg.minor); -- fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -- fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", -- outarg.max_readahead); -- fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -- fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", -- outarg.max_background); -- fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -- outarg.congestion_threshold); -- fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); -- } -+ fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor); -+ fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags); -+ fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead); -+ fuse_log(FUSE_LOG_DEBUG, " max_write=0x%08x\n", outarg.max_write); -+ fuse_log(FUSE_LOG_DEBUG, " max_background=%i\n", outarg.max_background); -+ fuse_log(FUSE_LOG_DEBUG, " congestion_threshold=%i\n", -+ outarg.congestion_threshold); -+ fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); - - send_reply_ok(req, &outarg, outargsize); - } -@@ -2407,14 +2395,11 @@ void fuse_session_process_buf_int(struct fuse_session *se, - in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); - assert(in); /* caller guarantees the input buffer is large enough */ - -- if (se->debug) { -- fuse_log(FUSE_LOG_DEBUG, -- "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, " -- "pid: %u\n", -- (unsigned long long)in->unique, -- opname((enum fuse_opcode)in->opcode), in->opcode, -- (unsigned long long)in->nodeid, buf->size, in->pid); -- } -+ fuse_log( -+ FUSE_LOG_DEBUG, -+ "unique: %llu, opcode: %s (%i), nodeid: %llu, insize: %zu, pid: %u\n", -+ (unsigned long long)in->unique, opname((enum fuse_opcode)in->opcode), -+ in->opcode, (unsigned long long)in->nodeid, buf->size, in->pid); - - req = fuse_ll_alloc_req(se); - if (req == NULL) { -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index f2750bc..138041e 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1796,6 +1796,7 @@ struct fuse_cmdline_opts { - int show_help; - int print_capabilities; - int syslog; -+ int log_level; - unsigned int max_idle_threads; - }; - -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 9692ef9..6d50a46 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -34,7 +34,6 @@ - t, offsetof(struct fuse_cmdline_opts, p), v \ - } - -- - static const struct fuse_opt fuse_helper_opts[] = { - FUSE_HELPER_OPT("-h", show_help), - FUSE_HELPER_OPT("--help", show_help), -@@ -55,6 +54,10 @@ static const struct fuse_opt fuse_helper_opts[] = { - FUSE_OPT_KEY("subtype=", FUSE_OPT_KEY_KEEP), - FUSE_HELPER_OPT("max_idle_threads=%u", max_idle_threads), - FUSE_HELPER_OPT("--syslog", syslog), -+ FUSE_HELPER_OPT_VALUE("log_level=debug", log_level, FUSE_LOG_DEBUG), -+ FUSE_HELPER_OPT_VALUE("log_level=info", log_level, FUSE_LOG_INFO), -+ FUSE_HELPER_OPT_VALUE("log_level=warn", log_level, FUSE_LOG_WARNING), -+ FUSE_HELPER_OPT_VALUE("log_level=err", log_level, FUSE_LOG_ERR), - FUSE_OPT_END - }; - -@@ -142,6 +145,9 @@ void fuse_cmdline_help(void) - " --syslog log to syslog (default stderr)\n" - " -f foreground operation\n" - " --daemonize run in background\n" -+ " -o log_level= log level, default to \"info\"\n" -+ " level could be one of \"debug, " -+ "info, warn, err\"\n" - " -o max_idle_threads the maximum number of idle worker " - "threads\n" - " allowed (default: 10)\n" -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 0372aca..ff6910f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -37,6 +37,7 @@ - - #include "qemu/osdep.h" - #include "fuse_virtio.h" -+#include "fuse_log.h" - #include "fuse_lowlevel.h" - #include - #include -@@ -140,6 +141,7 @@ static const struct fuse_opt lo_opts[] = { - FUSE_OPT_END - }; - static bool use_syslog = false; -+static int current_log_level; - - static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - -@@ -458,11 +460,6 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) - return inode ? inode->fd : -1; - } - --static bool lo_debug(fuse_req_t req) --{ -- return lo_data(req)->debug != 0; --} -- - static void lo_init(void *userdata, struct fuse_conn_info *conn) - { - struct lo_data *lo = (struct lo_data *)userdata; -@@ -472,15 +469,11 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) - } - - if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { -- if (lo->debug) { -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); - conn->want |= FUSE_CAP_WRITEBACK_CACHE; - } - if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) { -- if (lo->debug) { -- fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); - conn->want |= FUSE_CAP_FLOCK_LOCKS; - } - } -@@ -823,10 +816,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - } - e->ino = inode->fuse_ino; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long)parent, name, (unsigned long long)e->ino); -- } -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -+ name, (unsigned long long)e->ino); - - return 0; - -@@ -843,10 +834,8 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - struct fuse_entry_param e; - int err; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, -+ name); - - /* - * Don't use is_safe_path_component(), allow "." and ".." for NFS export -@@ -971,10 +960,8 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long)parent, name, (unsigned long long)e.ino); -- } -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -+ name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); - return; -@@ -1074,10 +1061,8 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - pthread_mutex_unlock(&lo->mutex); - e.ino = inode->fuse_ino; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", -- (unsigned long long)parent, name, (unsigned long long)e.ino); -- } -+ fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -+ name, (unsigned long long)e.ino); - - fuse_reply_entry(req, &e); - return; -@@ -1171,11 +1156,9 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - return; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -- (unsigned long long)ino, (unsigned long long)inode->refcount, -- (unsigned long long)nlookup); -- } -+ fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -+ (unsigned long long)ino, (unsigned long long)inode->refcount, -+ (unsigned long long)nlookup); - - unref_inode(lo, inode, nlookup); - } -@@ -1445,10 +1428,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - int err; - struct lo_cred old = {}; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", -- parent, name); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent, -+ name); - - if (!is_safe_path_component(name)) { - fuse_reply_err(req, EINVAL); -@@ -1525,10 +1506,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - char buf[64]; - struct lo_data *lo = lo_data(req); - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, -- fi->flags); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, -+ fi->flags); - - /* - * With writeback cache, kernel may send read requests even -@@ -1644,12 +1623,10 @@ static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, - { - struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_read(ino=%" PRIu64 ", size=%zd, " -- "off=%lu)\n", -- ino, size, (unsigned long)offset); -- } -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_read(ino=%" PRIu64 ", size=%zd, " -+ "off=%lu)\n", -+ ino, size, (unsigned long)offset); - - buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; - buf.buf[0].fd = lo_fi_fd(req, fi); -@@ -1671,11 +1648,9 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - out_buf.buf[0].fd = lo_fi_fd(req, fi); - out_buf.buf[0].pos = off; - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, -- out_buf.buf[0].size, (unsigned long)off); -- } -+ fuse_log(FUSE_LOG_DEBUG, -+ "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, -+ out_buf.buf[0].size, (unsigned long)off); - - /* - * If kill_priv is set, drop CAP_FSETID which should lead to kernel -@@ -1774,11 +1749,8 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name, -- size); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", -+ ino, name, size); - - if (inode->is_symlink) { - /* Sorry, no race free way to getxattr on symlink. */ -@@ -1852,10 +1824,8 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", -- ino, size); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, -+ size); - - if (inode->is_symlink) { - /* Sorry, no race free way to listxattr on symlink. */ -@@ -1929,11 +1899,8 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, -- "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n", -- ino, name, value, size); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 -+ ", name=%s value=%s size=%zd)\n", ino, name, value, size); - - if (inode->is_symlink) { - /* Sorry, no race free way to setxattr on symlink. */ -@@ -1978,10 +1945,8 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - goto out; - } - -- if (lo_debug(req)) { -- fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", -- ino, name); -- } -+ fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, -+ name); - - if (inode->is_symlink) { - /* Sorry, no race free way to setxattr on symlink. */ -@@ -2303,6 +2268,10 @@ static void setup_nofile_rlimit(void) - - static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) - { -+ if (current_log_level < level) { -+ return; -+ } -+ - if (use_syslog) { - int priority = LOG_ERR; - switch (level) { -@@ -2401,8 +2370,19 @@ int main(int argc, char *argv[]) - return 1; - } - -+ /* -+ * log_level is 0 if not configured via cmd options (0 is LOG_EMERG, -+ * and we don't use this log level). -+ */ -+ if (opts.log_level != 0) { -+ current_log_level = opts.log_level; -+ } - lo.debug = opts.debug; -+ if (lo.debug) { -+ current_log_level = FUSE_LOG_DEBUG; -+ } - lo.root.refcount = 2; -+ - if (lo.source) { - struct stat stat; - int res; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-process-requests-in-a-thread-pool.patch b/kvm-virtiofsd-process-requests-in-a-thread-pool.patch deleted file mode 100644 index 87fff99..0000000 --- a/kvm-virtiofsd-process-requests-in-a-thread-pool.patch +++ /dev/null @@ -1,533 +0,0 @@ -From b0db5e666aaa43eadff3e60a1ada704f33b03074 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:19 +0100 -Subject: [PATCH 108/116] virtiofsd: process requests in a thread pool -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-105-dgilbert@redhat.com> -Patchwork-id: 93554 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 104/112] virtiofsd: process requests in a thread pool -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Introduce a thread pool so that fv_queue_thread() just pops -VuVirtqElements and hands them to the thread pool. For the time being -only one worker thread is allowed since passthrough_ll.c is not -thread-safe yet. Future patches will lift this restriction so that -multiple FUSE requests can be processed in parallel. - -The main new concept is struct FVRequest, which contains both -VuVirtqElement and struct fuse_chan. We now have fv_VuDev for a device, -fv_QueueInfo for a virtqueue, and FVRequest for a request. Some of -fv_QueueInfo's fields are moved into FVRequest because they are -per-request. The name FVRequest conforms to QEMU coding style and I -expect the struct fv_* types will be renamed in a future refactoring. - -This patch series is not optimal. fbuf reuse is dropped so each request -does malloc(se->bufsize), but there is no clean and cheap way to keep -this with a thread pool. The vq_lock mutex is held for longer than -necessary, especially during the eventfd_write() syscall. Performance -can be improved in the future. - -prctl(2) had to be added to the seccomp whitelist because glib invokes -it. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Misono Tomohiro -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a3d756c5aecccc4c0e51060a7e2f1c87bf8f1180) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 359 +++++++++++++++++++++++------------------- - 1 file changed, 201 insertions(+), 158 deletions(-) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index f6242f9..0dcf2ef 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -22,6 +22,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -37,17 +38,28 @@ - struct fv_VuDev; - struct fv_QueueInfo { - pthread_t thread; -+ /* -+ * This lock protects the VuVirtq preventing races between -+ * fv_queue_thread() and fv_queue_worker(). -+ */ -+ pthread_mutex_t vq_lock; -+ - struct fv_VuDev *virtio_dev; - - /* Our queue index, corresponds to array position */ - int qidx; - int kick_fd; - int kill_fd; /* For killing the thread */ -+}; - -- /* The element for the command currently being processed */ -- VuVirtqElement *qe; -+/* A FUSE request */ -+typedef struct { -+ VuVirtqElement elem; -+ struct fuse_chan ch; -+ -+ /* Used to complete requests that involve no reply */ - bool reply_sent; --}; -+} FVRequest; - - /* - * We pass the dev element into libvhost-user -@@ -191,8 +203,11 @@ static void copy_iov(struct iovec *src_iov, int src_count, - int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count) - { -- VuVirtqElement *elem; -- VuVirtq *q; -+ FVRequest *req = container_of(ch, FVRequest, ch); -+ struct fv_QueueInfo *qi = ch->qi; -+ VuDev *dev = &se->virtio_dev->dev; -+ VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ VuVirtqElement *elem = &req->elem; - int ret = 0; - - assert(count >= 1); -@@ -205,11 +220,7 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - - /* unique == 0 is notification, which we don't support */ - assert(out->unique); -- /* For virtio we always have ch */ -- assert(ch); -- assert(!ch->qi->reply_sent); -- elem = ch->qi->qe; -- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ assert(!req->reply_sent); - - /* The 'in' part of the elem is to qemu */ - unsigned int in_num = elem->in_num; -@@ -236,9 +247,15 @@ int virtio_send_msg(struct fuse_session *se, struct fuse_chan *ch, - } - - copy_iov(iov, count, in_sg, in_num, tosend_len); -- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -- vu_queue_notify(&se->virtio_dev->dev, q); -- ch->qi->reply_sent = true; -+ -+ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ pthread_mutex_lock(&qi->vq_lock); -+ vu_queue_push(dev, q, elem, tosend_len); -+ vu_queue_notify(dev, q); -+ pthread_mutex_unlock(&qi->vq_lock); -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ -+ req->reply_sent = true; - - err: - return ret; -@@ -254,9 +271,12 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - struct iovec *iov, int count, struct fuse_bufvec *buf, - size_t len) - { -+ FVRequest *req = container_of(ch, FVRequest, ch); -+ struct fv_QueueInfo *qi = ch->qi; -+ VuDev *dev = &se->virtio_dev->dev; -+ VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ VuVirtqElement *elem = &req->elem; - int ret = 0; -- VuVirtqElement *elem; -- VuVirtq *q; - - assert(count >= 1); - assert(iov[0].iov_len >= sizeof(struct fuse_out_header)); -@@ -275,11 +295,7 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - /* unique == 0 is notification which we don't support */ - assert(out->unique); - -- /* For virtio we always have ch */ -- assert(ch); -- assert(!ch->qi->reply_sent); -- elem = ch->qi->qe; -- q = &ch->qi->virtio_dev->dev.vq[ch->qi->qidx]; -+ assert(!req->reply_sent); - - /* The 'in' part of the elem is to qemu */ - unsigned int in_num = elem->in_num; -@@ -395,33 +411,175 @@ int virtio_send_data_iov(struct fuse_session *se, struct fuse_chan *ch, - - ret = 0; - -- vu_queue_push(&se->virtio_dev->dev, q, elem, tosend_len); -- vu_queue_notify(&se->virtio_dev->dev, q); -+ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ pthread_mutex_lock(&qi->vq_lock); -+ vu_queue_push(dev, q, elem, tosend_len); -+ vu_queue_notify(dev, q); -+ pthread_mutex_unlock(&qi->vq_lock); -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); - - err: - if (ret == 0) { -- ch->qi->reply_sent = true; -+ req->reply_sent = true; - } - - return ret; - } - -+/* Process one FVRequest in a thread pool */ -+static void fv_queue_worker(gpointer data, gpointer user_data) -+{ -+ struct fv_QueueInfo *qi = user_data; -+ struct fuse_session *se = qi->virtio_dev->se; -+ struct VuDev *dev = &qi->virtio_dev->dev; -+ FVRequest *req = data; -+ VuVirtqElement *elem = &req->elem; -+ struct fuse_buf fbuf = {}; -+ bool allocated_bufv = false; -+ struct fuse_bufvec bufv; -+ struct fuse_bufvec *pbufv; -+ -+ assert(se->bufsize > sizeof(struct fuse_in_header)); -+ -+ /* -+ * An element contains one request and the space to send our response -+ * They're spread over multiple descriptors in a scatter/gather set -+ * and we can't trust the guest to keep them still; so copy in/out. -+ */ -+ fbuf.mem = malloc(se->bufsize); -+ assert(fbuf.mem); -+ -+ fuse_mutex_init(&req->ch.lock); -+ req->ch.fd = -1; -+ req->ch.qi = qi; -+ -+ /* The 'out' part of the elem is from qemu */ -+ unsigned int out_num = elem->out_num; -+ struct iovec *out_sg = elem->out_sg; -+ size_t out_len = iov_size(out_sg, out_num); -+ fuse_log(FUSE_LOG_DEBUG, -+ "%s: elem %d: with %d out desc of length %zd\n", -+ __func__, elem->index, out_num, out_len); -+ -+ /* -+ * The elem should contain a 'fuse_in_header' (in to fuse) -+ * plus the data based on the len in the header. -+ */ -+ if (out_len < sizeof(struct fuse_in_header)) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", -+ __func__, elem->index); -+ assert(0); /* TODO */ -+ } -+ if (out_len > se->bufsize) { -+ fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", __func__, -+ elem->index); -+ assert(0); /* TODO */ -+ } -+ /* Copy just the first element and look at it */ -+ copy_from_iov(&fbuf, 1, out_sg); -+ -+ pbufv = NULL; /* Compiler thinks an unitialised path */ -+ if (out_num > 2 && -+ out_sg[0].iov_len == sizeof(struct fuse_in_header) && -+ ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -+ out_sg[1].iov_len == sizeof(struct fuse_write_in)) { -+ /* -+ * For a write we don't actually need to copy the -+ * data, we can just do it straight out of guest memory -+ * but we must still copy the headers in case the guest -+ * was nasty and changed them while we were using them. -+ */ -+ fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); -+ -+ /* copy the fuse_write_in header afte rthe fuse_in_header */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; -+ -+ /* Allocate the bufv, with space for the rest of the iov */ -+ pbufv = malloc(sizeof(struct fuse_bufvec) + -+ sizeof(struct fuse_buf) * (out_num - 2)); -+ if (!pbufv) { -+ fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", -+ __func__); -+ goto out; -+ } -+ -+ allocated_bufv = true; -+ pbufv->count = 1; -+ pbufv->buf[0] = fbuf; -+ -+ size_t iovindex, pbufvindex; -+ iovindex = 2; /* 2 headers, separate iovs */ -+ pbufvindex = 1; /* 2 headers, 1 fusebuf */ -+ -+ for (; iovindex < out_num; iovindex++, pbufvindex++) { -+ pbufv->count++; -+ pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -+ pbufv->buf[pbufvindex].flags = 0; -+ pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -+ pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -+ } -+ } else { -+ /* Normal (non fast write) path */ -+ -+ /* Copy the rest of the buffer */ -+ fbuf.mem += out_sg->iov_len; -+ copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -+ fbuf.mem -= out_sg->iov_len; -+ fbuf.size = out_len; -+ -+ /* TODO! Endianness of header */ -+ -+ /* TODO: Add checks for fuse_session_exited */ -+ bufv.buf[0] = fbuf; -+ bufv.count = 1; -+ pbufv = &bufv; -+ } -+ pbufv->idx = 0; -+ pbufv->off = 0; -+ fuse_session_process_buf_int(se, pbufv, &req->ch); -+ -+out: -+ if (allocated_bufv) { -+ free(pbufv); -+ } -+ -+ /* If the request has no reply, still recycle the virtqueue element */ -+ if (!req->reply_sent) { -+ struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -+ -+ fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", __func__, -+ elem->index); -+ -+ pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ pthread_mutex_lock(&qi->vq_lock); -+ vu_queue_push(dev, q, elem, 0); -+ vu_queue_notify(dev, q); -+ pthread_mutex_unlock(&qi->vq_lock); -+ pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); -+ } -+ -+ pthread_mutex_destroy(&req->ch.lock); -+ free(fbuf.mem); -+ free(req); -+} -+ - /* Thread function for individual queues, created when a queue is 'started' */ - static void *fv_queue_thread(void *opaque) - { - struct fv_QueueInfo *qi = opaque; - struct VuDev *dev = &qi->virtio_dev->dev; - struct VuVirtq *q = vu_get_queue(dev, qi->qidx); -- struct fuse_session *se = qi->virtio_dev->se; -- struct fuse_chan ch; -- struct fuse_buf fbuf; -+ GThreadPool *pool; - -- fbuf.mem = NULL; -- fbuf.flags = 0; -- -- fuse_mutex_init(&ch.lock); -- ch.fd = (int)0xdaff0d111; -- ch.qi = qi; -+ pool = g_thread_pool_new(fv_queue_worker, qi, 1 /* TODO max_threads */, -+ TRUE, NULL); -+ if (!pool) { -+ fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); -+ return NULL; -+ } - - fuse_log(FUSE_LOG_INFO, "%s: Start for queue %d kick_fd %d\n", __func__, - qi->qidx, qi->kick_fd); -@@ -478,6 +636,7 @@ static void *fv_queue_thread(void *opaque) - /* Mutual exclusion with virtio_loop() */ - ret = pthread_rwlock_rdlock(&qi->virtio_dev->vu_dispatch_rwlock); - assert(ret == 0); /* there is no possible error case */ -+ pthread_mutex_lock(&qi->vq_lock); - /* out is from guest, in is too guest */ - unsigned int in_bytes, out_bytes; - vu_queue_get_avail_bytes(dev, q, &in_bytes, &out_bytes, ~0, ~0); -@@ -486,141 +645,22 @@ static void *fv_queue_thread(void *opaque) - "%s: Queue %d gave evalue: %zx available: in: %u out: %u\n", - __func__, qi->qidx, (size_t)evalue, in_bytes, out_bytes); - -- - while (1) { -- bool allocated_bufv = false; -- struct fuse_bufvec bufv; -- struct fuse_bufvec *pbufv; -- -- /* -- * An element contains one request and the space to send our -- * response They're spread over multiple descriptors in a -- * scatter/gather set and we can't trust the guest to keep them -- * still; so copy in/out. -- */ -- VuVirtqElement *elem = vu_queue_pop(dev, q, sizeof(VuVirtqElement)); -- if (!elem) { -+ FVRequest *req = vu_queue_pop(dev, q, sizeof(FVRequest)); -+ if (!req) { - break; - } - -- qi->qe = elem; -- qi->reply_sent = false; -+ req->reply_sent = false; - -- if (!fbuf.mem) { -- fbuf.mem = malloc(se->bufsize); -- assert(fbuf.mem); -- assert(se->bufsize > sizeof(struct fuse_in_header)); -- } -- /* The 'out' part of the elem is from qemu */ -- unsigned int out_num = elem->out_num; -- struct iovec *out_sg = elem->out_sg; -- size_t out_len = iov_size(out_sg, out_num); -- fuse_log(FUSE_LOG_DEBUG, -- "%s: elem %d: with %d out desc of length %zd\n", __func__, -- elem->index, out_num, out_len); -- -- /* -- * The elem should contain a 'fuse_in_header' (in to fuse) -- * plus the data based on the len in the header. -- */ -- if (out_len < sizeof(struct fuse_in_header)) { -- fuse_log(FUSE_LOG_ERR, "%s: elem %d too short for in_header\n", -- __func__, elem->index); -- assert(0); /* TODO */ -- } -- if (out_len > se->bufsize) { -- fuse_log(FUSE_LOG_ERR, "%s: elem %d too large for buffer\n", -- __func__, elem->index); -- assert(0); /* TODO */ -- } -- /* Copy just the first element and look at it */ -- copy_from_iov(&fbuf, 1, out_sg); -- -- if (out_num > 2 && -- out_sg[0].iov_len == sizeof(struct fuse_in_header) && -- ((struct fuse_in_header *)fbuf.mem)->opcode == FUSE_WRITE && -- out_sg[1].iov_len == sizeof(struct fuse_write_in)) { -- /* -- * For a write we don't actually need to copy the -- * data, we can just do it straight out of guest memory -- * but we must still copy the headers in case the guest -- * was nasty and changed them while we were using them. -- */ -- fuse_log(FUSE_LOG_DEBUG, "%s: Write special case\n", __func__); -- -- /* copy the fuse_write_in header after the fuse_in_header */ -- fbuf.mem += out_sg->iov_len; -- copy_from_iov(&fbuf, 1, out_sg + 1); -- fbuf.mem -= out_sg->iov_len; -- fbuf.size = out_sg[0].iov_len + out_sg[1].iov_len; -- -- /* Allocate the bufv, with space for the rest of the iov */ -- allocated_bufv = true; -- pbufv = malloc(sizeof(struct fuse_bufvec) + -- sizeof(struct fuse_buf) * (out_num - 2)); -- if (!pbufv) { -- vu_queue_unpop(dev, q, elem, 0); -- free(elem); -- fuse_log(FUSE_LOG_ERR, "%s: pbufv malloc failed\n", -- __func__); -- goto out; -- } -- -- pbufv->count = 1; -- pbufv->buf[0] = fbuf; -- -- size_t iovindex, pbufvindex; -- iovindex = 2; /* 2 headers, separate iovs */ -- pbufvindex = 1; /* 2 headers, 1 fusebuf */ -- -- for (; iovindex < out_num; iovindex++, pbufvindex++) { -- pbufv->count++; -- pbufv->buf[pbufvindex].pos = ~0; /* Dummy */ -- pbufv->buf[pbufvindex].flags = 0; -- pbufv->buf[pbufvindex].mem = out_sg[iovindex].iov_base; -- pbufv->buf[pbufvindex].size = out_sg[iovindex].iov_len; -- } -- } else { -- /* Normal (non fast write) path */ -- -- /* Copy the rest of the buffer */ -- fbuf.mem += out_sg->iov_len; -- copy_from_iov(&fbuf, out_num - 1, out_sg + 1); -- fbuf.mem -= out_sg->iov_len; -- fbuf.size = out_len; -- -- /* TODO! Endianness of header */ -- -- /* TODO: Add checks for fuse_session_exited */ -- bufv.buf[0] = fbuf; -- bufv.count = 1; -- pbufv = &bufv; -- } -- pbufv->idx = 0; -- pbufv->off = 0; -- fuse_session_process_buf_int(se, pbufv, &ch); -- -- if (allocated_bufv) { -- free(pbufv); -- } -- -- if (!qi->reply_sent) { -- fuse_log(FUSE_LOG_DEBUG, "%s: elem %d no reply sent\n", -- __func__, elem->index); -- /* I think we've still got to recycle the element */ -- vu_queue_push(dev, q, elem, 0); -- vu_queue_notify(dev, q); -- } -- qi->qe = NULL; -- free(elem); -- elem = NULL; -+ g_thread_pool_push(pool, req, NULL); - } - -+ pthread_mutex_unlock(&qi->vq_lock); - pthread_rwlock_unlock(&qi->virtio_dev->vu_dispatch_rwlock); - } --out: -- pthread_mutex_destroy(&ch.lock); -- free(fbuf.mem); -+ -+ g_thread_pool_free(pool, FALSE, TRUE); - - return NULL; - } -@@ -643,6 +683,7 @@ static void fv_queue_cleanup_thread(struct fv_VuDev *vud, int qidx) - fuse_log(FUSE_LOG_ERR, "%s: Failed to join thread idx %d err %d\n", - __func__, qidx, ret); - } -+ pthread_mutex_destroy(&ourqi->vq_lock); - close(ourqi->kill_fd); - ourqi->kick_fd = -1; - free(vud->qi[qidx]); -@@ -696,6 +737,8 @@ static void fv_queue_set_started(VuDev *dev, int qidx, bool started) - - ourqi->kill_fd = eventfd(0, EFD_CLOEXEC | EFD_SEMAPHORE); - assert(ourqi->kill_fd != -1); -+ pthread_mutex_init(&ourqi->vq_lock, NULL); -+ - if (pthread_create(&ourqi->thread, NULL, fv_queue_thread, ourqi)) { - fuse_log(FUSE_LOG_ERR, "%s: Failed to create thread for queue %d\n", - __func__, qidx); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch b/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch deleted file mode 100644 index 181e32d..0000000 --- a/kvm-virtiofsd-remove-mountpoint-dummy-argument.patch +++ /dev/null @@ -1,159 +0,0 @@ -From a8a1835a82510be7d2d6edcc28a60e506a2cedad Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:46 +0100 -Subject: [PATCH 015/116] virtiofsd: remove mountpoint dummy argument -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-12-dgilbert@redhat.com> -Patchwork-id: 93466 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 011/112] virtiofsd: remove mountpoint dummy argument -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Classic FUSE file system daemons take a mountpoint argument but -virtiofsd exposes a vhost-user UNIX domain socket instead. The -mountpoint argument is not used by virtiofsd but the user is still -required to pass a dummy argument on the command-line. - -Remove the mountpoint argument to clean up the command-line. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 67aab02272f6cb47c56420f60b370c184961b5ca) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 2 +- - tools/virtiofsd/fuse_lowlevel.h | 4 +--- - tools/virtiofsd/helper.c | 20 +++----------------- - tools/virtiofsd/passthrough_ll.c | 12 ++---------- - 4 files changed, 7 insertions(+), 31 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 5c9cb52..2f32c68 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2455,7 +2455,7 @@ out1: - return NULL; - } - --int fuse_session_mount(struct fuse_session *se, const char *mountpoint) -+int fuse_session_mount(struct fuse_session *se) - { - int fd; - -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index adb9054..8d8909b 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1863,7 +1863,6 @@ struct fuse_cmdline_opts { - int foreground; - int debug; - int nodefault_subtype; -- char *mountpoint; - int show_version; - int show_help; - unsigned int max_idle_threads; -@@ -1924,12 +1923,11 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - /** - * Mount a FUSE file system. - * -- * @param mountpoint the mount point path - * @param se session object - * - * @return 0 on success, -1 on failure. - **/ --int fuse_session_mount(struct fuse_session *se, const char *mountpoint); -+int fuse_session_mount(struct fuse_session *se); - - /** - * Enter a single threaded, blocking event loop. -diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c -index 5711dd2..5e6f205 100644 ---- a/tools/virtiofsd/helper.c -+++ b/tools/virtiofsd/helper.c -@@ -140,27 +140,13 @@ void fuse_cmdline_help(void) - static int fuse_helper_opt_proc(void *data, const char *arg, int key, - struct fuse_args *outargs) - { -+ (void)data; - (void)outargs; -- struct fuse_cmdline_opts *opts = data; - - switch (key) { - case FUSE_OPT_KEY_NONOPT: -- if (!opts->mountpoint) { -- if (fuse_mnt_parse_fuse_fd(arg) != -1) { -- return fuse_opt_add_opt(&opts->mountpoint, arg); -- } -- -- char mountpoint[PATH_MAX] = ""; -- if (realpath(arg, mountpoint) == NULL) { -- fuse_log(FUSE_LOG_ERR, "fuse: bad mount point `%s': %s\n", arg, -- strerror(errno)); -- return -1; -- } -- return fuse_opt_add_opt(&opts->mountpoint, mountpoint); -- } else { -- fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -- return -1; -- } -+ fuse_log(FUSE_LOG_ERR, "fuse: invalid argument `%s'\n", arg); -+ return -1; - - default: - /* Pass through unknown options */ -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index c5850ef..9377718 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -1297,7 +1297,7 @@ int main(int argc, char *argv[]) - return 1; - } - if (opts.show_help) { -- printf("usage: %s [options] \n\n", argv[0]); -+ printf("usage: %s [options]\n\n", argv[0]); - fuse_cmdline_help(); - fuse_lowlevel_help(); - ret = 0; -@@ -1308,13 +1308,6 @@ int main(int argc, char *argv[]) - goto err_out1; - } - -- if (opts.mountpoint == NULL) { -- printf("usage: %s [options] \n", argv[0]); -- printf(" %s --help\n", argv[0]); -- ret = 1; -- goto err_out1; -- } -- - if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { - return 1; - } -@@ -1374,7 +1367,7 @@ int main(int argc, char *argv[]) - goto err_out2; - } - -- if (fuse_session_mount(se, opts.mountpoint) != 0) { -+ if (fuse_session_mount(se) != 0) { - goto err_out3; - } - -@@ -1393,7 +1386,6 @@ err_out3: - err_out2: - fuse_session_destroy(se); - err_out1: -- free(opts.mountpoint); - fuse_opt_free_args(&args); - - if (lo.root.fd >= 0) { --- -1.8.3.1 - diff --git a/kvm-virtiofsd-remove-unused-notify-reply-support.patch b/kvm-virtiofsd-remove-unused-notify-reply-support.patch deleted file mode 100644 index 98fb968..0000000 --- a/kvm-virtiofsd-remove-unused-notify-reply-support.patch +++ /dev/null @@ -1,294 +0,0 @@ -From e5534c0d4b866f61dbafa8d2422a24ab956189c1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:47 +0100 -Subject: [PATCH 016/116] virtiofsd: remove unused notify reply support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-13-dgilbert@redhat.com> -Patchwork-id: 93467 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 012/112] virtiofsd: remove unused notify reply support -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Notify reply support is unused by virtiofsd. The code would need to be -updated to validate input buffer sizes. Remove this unused code since -changes to it are untestable. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 64c6f408a29ef03e9b8da9f5a5d8fd511b0d801e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 147 +--------------------------------------- - tools/virtiofsd/fuse_lowlevel.h | 47 ------------- - 2 files changed, 1 insertion(+), 193 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 2f32c68..eb0ec49 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -31,12 +31,6 @@ - #define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg))) - #define OFFSET_MAX 0x7fffffffffffffffLL - --#define container_of(ptr, type, member) \ -- ({ \ -- const typeof(((type *)0)->member) *__mptr = (ptr); \ -- (type *)((char *)__mptr - offsetof(type, member)); \ -- }) -- - struct fuse_pollhandle { - uint64_t kh; - struct fuse_session *se; -@@ -1862,52 +1856,6 @@ static void do_destroy(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - send_reply_ok(req, NULL, 0); - } - --static void list_del_nreq(struct fuse_notify_req *nreq) --{ -- struct fuse_notify_req *prev = nreq->prev; -- struct fuse_notify_req *next = nreq->next; -- prev->next = next; -- next->prev = prev; --} -- --static void list_add_nreq(struct fuse_notify_req *nreq, -- struct fuse_notify_req *next) --{ -- struct fuse_notify_req *prev = next->prev; -- nreq->next = next; -- nreq->prev = prev; -- prev->next = nreq; -- next->prev = nreq; --} -- --static void list_init_nreq(struct fuse_notify_req *nreq) --{ -- nreq->next = nreq; -- nreq->prev = nreq; --} -- --static void do_notify_reply(fuse_req_t req, fuse_ino_t nodeid, -- const void *inarg, const struct fuse_buf *buf) --{ -- struct fuse_session *se = req->se; -- struct fuse_notify_req *nreq; -- struct fuse_notify_req *head; -- -- pthread_mutex_lock(&se->lock); -- head = &se->notify_list; -- for (nreq = head->next; nreq != head; nreq = nreq->next) { -- if (nreq->unique == req->unique) { -- list_del_nreq(nreq); -- break; -- } -- } -- pthread_mutex_unlock(&se->lock); -- -- if (nreq != head) { -- nreq->reply(nreq, req, nodeid, inarg, buf); -- } --} -- - static int send_notify_iov(struct fuse_session *se, int notify_code, - struct iovec *iov, int count) - { -@@ -2059,95 +2007,6 @@ int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - return res; - } - --struct fuse_retrieve_req { -- struct fuse_notify_req nreq; -- void *cookie; --}; -- --static void fuse_ll_retrieve_reply(struct fuse_notify_req *nreq, fuse_req_t req, -- fuse_ino_t ino, const void *inarg, -- const struct fuse_buf *ibuf) --{ -- struct fuse_session *se = req->se; -- struct fuse_retrieve_req *rreq = -- container_of(nreq, struct fuse_retrieve_req, nreq); -- const struct fuse_notify_retrieve_in *arg = inarg; -- struct fuse_bufvec bufv = { -- .buf[0] = *ibuf, -- .count = 1, -- }; -- -- if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- bufv.buf[0].mem = PARAM(arg); -- } -- -- bufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_notify_retrieve_in); -- -- if (bufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, "fuse: retrieve reply: buffer size too small\n"); -- fuse_reply_none(req); -- goto out; -- } -- bufv.buf[0].size = arg->size; -- -- if (se->op.retrieve_reply) { -- se->op.retrieve_reply(req, rreq->cookie, ino, arg->offset, &bufv); -- } else { -- fuse_reply_none(req); -- } --out: -- free(rreq); --} -- --int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie) --{ -- struct fuse_notify_retrieve_out outarg; -- struct iovec iov[2]; -- struct fuse_retrieve_req *rreq; -- int err; -- -- if (!se) { -- return -EINVAL; -- } -- -- if (se->conn.proto_major < 6 || se->conn.proto_minor < 15) { -- return -ENOSYS; -- } -- -- rreq = malloc(sizeof(*rreq)); -- if (rreq == NULL) { -- return -ENOMEM; -- } -- -- pthread_mutex_lock(&se->lock); -- rreq->cookie = cookie; -- rreq->nreq.unique = se->notify_ctr++; -- rreq->nreq.reply = fuse_ll_retrieve_reply; -- list_add_nreq(&rreq->nreq, &se->notify_list); -- pthread_mutex_unlock(&se->lock); -- -- outarg.notify_unique = rreq->nreq.unique; -- outarg.nodeid = ino; -- outarg.offset = offset; -- outarg.size = size; -- outarg.padding = 0; -- -- iov[1].iov_base = &outarg; -- iov[1].iov_len = sizeof(outarg); -- -- err = send_notify_iov(se, FUSE_NOTIFY_RETRIEVE, iov, 2); -- if (err) { -- pthread_mutex_lock(&se->lock); -- list_del_nreq(&rreq->nreq); -- pthread_mutex_unlock(&se->lock); -- free(rreq); -- } -- -- return err; --} -- - void *fuse_req_userdata(fuse_req_t req) - { - return req->se->userdata; -@@ -2226,7 +2085,7 @@ static struct { - [FUSE_POLL] = { do_poll, "POLL" }, - [FUSE_FALLOCATE] = { do_fallocate, "FALLOCATE" }, - [FUSE_DESTROY] = { do_destroy, "DESTROY" }, -- [FUSE_NOTIFY_REPLY] = { (void *)1, "NOTIFY_REPLY" }, -+ [FUSE_NOTIFY_REPLY] = { NULL, "NOTIFY_REPLY" }, - [FUSE_BATCH_FORGET] = { do_batch_forget, "BATCH_FORGET" }, - [FUSE_READDIRPLUS] = { do_readdirplus, "READDIRPLUS" }, - [FUSE_RENAME2] = { do_rename2, "RENAME2" }, -@@ -2333,8 +2192,6 @@ void fuse_session_process_buf_int(struct fuse_session *se, - inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { - do_write_buf(req, in->nodeid, inarg, buf); -- } else if (in->opcode == FUSE_NOTIFY_REPLY) { -- do_notify_reply(req, in->nodeid, inarg, buf); - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - } -@@ -2437,8 +2294,6 @@ struct fuse_session *fuse_session_new(struct fuse_args *args, - - list_init_req(&se->list); - list_init_req(&se->interrupts); -- list_init_nreq(&se->notify_list); -- se->notify_ctr = 1; - fuse_mutex_init(&se->lock); - - memcpy(&se->op, op, op_size); -diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h -index 8d8909b..12a84b4 100644 ---- a/tools/virtiofsd/fuse_lowlevel.h -+++ b/tools/virtiofsd/fuse_lowlevel.h -@@ -1085,21 +1085,6 @@ struct fuse_lowlevel_ops { - off_t off, struct fuse_file_info *fi); - - /** -- * Callback function for the retrieve request -- * -- * Valid replies: -- * fuse_reply_none -- * -- * @param req request handle -- * @param cookie user data supplied to fuse_lowlevel_notify_retrieve() -- * @param ino the inode number supplied to fuse_lowlevel_notify_retrieve() -- * @param offset the offset supplied to fuse_lowlevel_notify_retrieve() -- * @param bufv the buffer containing the returned data -- */ -- void (*retrieve_reply)(fuse_req_t req, void *cookie, fuse_ino_t ino, -- off_t offset, struct fuse_bufvec *bufv); -- -- /** - * Forget about multiple inodes - * - * See description of the forget function for more -@@ -1726,38 +1711,6 @@ int fuse_lowlevel_notify_delete(struct fuse_session *se, fuse_ino_t parent, - int fuse_lowlevel_notify_store(struct fuse_session *se, fuse_ino_t ino, - off_t offset, struct fuse_bufvec *bufv, - enum fuse_buf_copy_flags flags); --/** -- * Retrieve data from the kernel buffers -- * -- * Retrieve data in the kernel buffers belonging to the given inode. -- * If successful then the retrieve_reply() method will be called with -- * the returned data. -- * -- * Only present pages are returned in the retrieve reply. Retrieving -- * stops when it finds a non-present page and only data prior to that -- * is returned. -- * -- * If this function returns an error, then the retrieve will not be -- * completed and no reply will be sent. -- * -- * This function doesn't change the dirty state of pages in the kernel -- * buffer. For dirty pages the write() method will be called -- * regardless of having been retrieved previously. -- * -- * Added in FUSE protocol version 7.15. If the kernel does not support -- * this (or a newer) version, the function will return -ENOSYS and do -- * nothing. -- * -- * @param se the session object -- * @param ino the inode number -- * @param size the number of bytes to retrieve -- * @param offset the starting offset into the file to retrieve from -- * @param cookie user data to supply to the reply callback -- * @return zero for success, -errno for failure -- */ --int fuse_lowlevel_notify_retrieve(struct fuse_session *se, fuse_ino_t ino, -- size_t size, off_t offset, void *cookie); -- - - /* - * Utility functions --- -1.8.3.1 - diff --git a/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch b/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch deleted file mode 100644 index 97a0db3..0000000 --- a/kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch +++ /dev/null @@ -1,139 +0,0 @@ -From e01a6e68d799ed2af0ca3b04d75818ba62b18682 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:08 +0100 -Subject: [PATCH 097/116] virtiofsd: rename inode->refcount to inode->nlookup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-94-dgilbert@redhat.com> -Patchwork-id: 93547 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 093/112] virtiofsd: rename inode->refcount to inode->nlookup -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -This reference counter plays a specific role in the FUSE protocol. It's -not a generic object reference counter and the FUSE kernel code calls it -"nlookup". - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 1222f015558fc34cea02aa3a5a92de608c82cec8) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 37 +++++++++++++++++++++++++------------ - 1 file changed, 25 insertions(+), 12 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 2d703b5..c819b5f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -99,7 +99,20 @@ struct lo_inode { - int fd; - bool is_symlink; - struct lo_key key; -- uint64_t refcount; /* protected by lo->mutex */ -+ -+ /* -+ * This counter keeps the inode alive during the FUSE session. -+ * Incremented when the FUSE inode number is sent in a reply -+ * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is -+ * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. -+ * -+ * Note that this value is untrusted because the client can manipulate -+ * it arbitrarily using FUSE_FORGET requests. -+ * -+ * Protected by lo->mutex. -+ */ -+ uint64_t nlookup; -+ - fuse_ino_t fuse_ino; - pthread_mutex_t plock_mutex; - GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ -@@ -568,7 +581,7 @@ retry: - if (last == path) { - p = &lo->root; - pthread_mutex_lock(&lo->mutex); -- p->refcount++; -+ p->nlookup++; - pthread_mutex_unlock(&lo->mutex); - } else { - *last = '\0'; -@@ -786,8 +799,8 @@ static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) - pthread_mutex_lock(&lo->mutex); - p = g_hash_table_lookup(lo->inodes, &key); - if (p) { -- assert(p->refcount > 0); -- p->refcount++; -+ assert(p->nlookup > 0); -+ p->nlookup++; - } - pthread_mutex_unlock(&lo->mutex); - -@@ -855,7 +868,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - inode->is_symlink = S_ISLNK(e->attr.st_mode); -- inode->refcount = 1; -+ inode->nlookup = 1; - inode->fd = newfd; - newfd = -1; - inode->key.ino = e->attr.st_ino; -@@ -1112,7 +1125,7 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - } - - pthread_mutex_lock(&lo->mutex); -- inode->refcount++; -+ inode->nlookup++; - pthread_mutex_unlock(&lo->mutex); - e.ino = inode->fuse_ino; - -@@ -1193,9 +1206,9 @@ static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, - } - - pthread_mutex_lock(&lo->mutex); -- assert(inode->refcount >= n); -- inode->refcount -= n; -- if (!inode->refcount) { -+ assert(inode->nlookup >= n); -+ inode->nlookup -= n; -+ if (!inode->nlookup) { - lo_map_remove(&lo->ino_map, inode->fuse_ino); - g_hash_table_remove(lo->inodes, &inode->key); - if (g_hash_table_size(inode->posix_locks)) { -@@ -1216,7 +1229,7 @@ static int unref_all_inodes_cb(gpointer key, gpointer value, gpointer user_data) - struct lo_inode *inode = value; - struct lo_data *lo = user_data; - -- inode->refcount = 0; -+ inode->nlookup = 0; - lo_map_remove(&lo->ino_map, inode->fuse_ino); - close(inode->fd); - -@@ -1241,7 +1254,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - } - - fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", -- (unsigned long long)ino, (unsigned long long)inode->refcount, -+ (unsigned long long)ino, (unsigned long long)inode->nlookup, - (unsigned long long)nlookup); - - unref_inode_lolocked(lo, inode, nlookup); -@@ -2609,7 +2622,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) - root->fd = fd; - root->key.ino = stat.st_ino; - root->key.dev = stat.st_dev; -- root->refcount = 2; -+ root->nlookup = 2; - } - - static guint lo_key_hash(gconstpointer key) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch b/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch deleted file mode 100644 index 95858f8..0000000 --- a/kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch +++ /dev/null @@ -1,94 +0,0 @@ -From cfa4550f926e7a07757853f94273f2d1589cb9d3 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:48 +0100 -Subject: [PATCH 077/116] virtiofsd: rename unref_inode() to - unref_inode_lolocked() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-74-dgilbert@redhat.com> -Patchwork-id: 93526 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 073/112] virtiofsd: rename unref_inode() to unref_inode_lolocked() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Miklos Szeredi - -Signed-off-by: Miklos Szeredi -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 95d2715791c60b5dc2d22e4eb7b83217273296fa) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 8b1784f..de12e75 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -148,8 +148,8 @@ static const struct fuse_opt lo_opts[] = { - }; - static bool use_syslog = false; - static int current_log_level; -- --static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); -+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -+ uint64_t n); - - static struct { - pthread_mutex_t mutex; -@@ -586,7 +586,7 @@ retry: - return 0; - - fail_unref: -- unref_inode(lo, p, 1); -+ unref_inode_lolocked(lo, p, 1); - fail: - if (retries) { - retries--; -@@ -624,7 +624,7 @@ fallback: - res = lo_parent_and_name(lo, inode, path, &parent); - if (res != -1) { - res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW); -- unref_inode(lo, parent, 1); -+ unref_inode_lolocked(lo, parent, 1); - } - - return res; -@@ -1027,7 +1027,7 @@ fallback: - res = lo_parent_and_name(lo, inode, path, &parent); - if (res != -1) { - res = linkat(parent->fd, path, dfd, name, 0); -- unref_inode(lo, parent, 1); -+ unref_inode_lolocked(lo, parent, 1); - } - - return res; -@@ -1141,7 +1141,8 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - fuse_reply_err(req, res == -1 ? errno : 0); - } - --static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) -+static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, -+ uint64_t n) - { - if (!inode) { - return; -@@ -1181,7 +1182,7 @@ static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) - (unsigned long long)ino, (unsigned long long)inode->refcount, - (unsigned long long)nlookup); - -- unref_inode(lo, inode, nlookup); -+ unref_inode_lolocked(lo, inode, nlookup); - } - - static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) --- -1.8.3.1 - diff --git a/kvm-virtiofsd-sandbox-mount-namespace.patch b/kvm-virtiofsd-sandbox-mount-namespace.patch deleted file mode 100644 index ab6f751..0000000 --- a/kvm-virtiofsd-sandbox-mount-namespace.patch +++ /dev/null @@ -1,166 +0,0 @@ -From c7ae38df696e4be432fd418c670dcea892b910a7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:27 +0100 -Subject: [PATCH 056/116] virtiofsd: sandbox mount namespace -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-53-dgilbert@redhat.com> -Patchwork-id: 93504 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 052/112] virtiofsd: sandbox mount namespace -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Use a mount namespace with the shared directory tree mounted at "/" and -no other mounts. - -This prevents symlink escape attacks because symlink targets are -resolved only against the shared directory and cannot go outside it. - -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Peng Tao -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5baa3b8e95064c2434bd9e2f312edd5e9ae275dc) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 89 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 89 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e2e2211..0570453 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -50,6 +50,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1943,6 +1944,58 @@ static void print_capabilities(void) - printf("}\n"); - } - -+/* This magic is based on lxc's lxc_pivot_root() */ -+static void setup_pivot_root(const char *source) -+{ -+ int oldroot; -+ int newroot; -+ -+ oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); -+ if (oldroot < 0) { -+ fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); -+ exit(1); -+ } -+ -+ newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); -+ if (newroot < 0) { -+ fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); -+ exit(1); -+ } -+ -+ if (fchdir(newroot) < 0) { -+ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); -+ exit(1); -+ } -+ -+ if (syscall(__NR_pivot_root, ".", ".") < 0) { -+ fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); -+ exit(1); -+ } -+ -+ if (fchdir(oldroot) < 0) { -+ fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); -+ exit(1); -+ } -+ -+ if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); -+ exit(1); -+ } -+ -+ if (umount2(".", MNT_DETACH) < 0) { -+ fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); -+ exit(1); -+ } -+ -+ if (fchdir(newroot) < 0) { -+ fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); -+ exit(1); -+ } -+ -+ close(newroot); -+ close(oldroot); -+} -+ - static void setup_proc_self_fd(struct lo_data *lo) - { - lo->proc_self_fd = open("/proc/self/fd", O_PATH); -@@ -1952,6 +2005,39 @@ static void setup_proc_self_fd(struct lo_data *lo) - } - } - -+/* -+ * Make the source directory our root so symlinks cannot escape and no other -+ * files are accessible. -+ */ -+static void setup_mount_namespace(const char *source) -+{ -+ if (unshare(CLONE_NEWNS) != 0) { -+ fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n"); -+ exit(1); -+ } -+ -+ if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n"); -+ exit(1); -+ } -+ -+ if (mount(source, source, NULL, MS_BIND, NULL) < 0) { -+ fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); -+ exit(1); -+ } -+ -+ setup_pivot_root(source); -+} -+ -+/* -+ * Lock down this process to prevent access to other processes or files outside -+ * source directory. This reduces the impact of arbitrary code execution bugs. -+ */ -+static void setup_sandbox(struct lo_data *lo) -+{ -+ setup_mount_namespace(lo->source); -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2052,6 +2138,7 @@ int main(int argc, char *argv[]) - } - - lo.root.fd = open(lo.source, O_PATH); -+ - if (lo.root.fd == -1) { - fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source); - exit(1); -@@ -2075,6 +2162,8 @@ int main(int argc, char *argv[]) - /* Must be after daemonize to get the right /proc/self/fd */ - setup_proc_self_fd(&lo); - -+ setup_sandbox(&lo); -+ - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch b/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch deleted file mode 100644 index e54248c..0000000 --- a/kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 4cc435b3a8a9a419cc85ee883d5184f810f91e52 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:34 +0100 -Subject: [PATCH 063/116] virtiofsd: set maximum RLIMIT_NOFILE limit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-60-dgilbert@redhat.com> -Patchwork-id: 93516 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 059/112] virtiofsd: set maximum RLIMIT_NOFILE limit -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -virtiofsd can exceed the default open file descriptor limit easily on -most systems. Take advantage of the fact that it runs as root to raise -the limit. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 01a6dc95ec7f71eeff9963fe3cb03d85225fba3e) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 32 ++++++++++++++++++++++++++++++++ - 1 file changed, 32 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index d53cb1e..c281d81 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -53,6 +53,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -2268,6 +2269,35 @@ static void setup_sandbox(struct lo_data *lo, struct fuse_session *se) - setup_seccomp(); - } - -+/* Raise the maximum number of open file descriptors */ -+static void setup_nofile_rlimit(void) -+{ -+ const rlim_t max_fds = 1000000; -+ struct rlimit rlim; -+ -+ if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { -+ fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n"); -+ exit(1); -+ } -+ -+ if (rlim.rlim_cur >= max_fds) { -+ return; /* nothing to do */ -+ } -+ -+ rlim.rlim_cur = max_fds; -+ rlim.rlim_max = max_fds; -+ -+ if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { -+ /* Ignore SELinux denials */ -+ if (errno == EPERM) { -+ return; -+ } -+ -+ fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); -+ exit(1); -+ } -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); -@@ -2389,6 +2419,8 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -+ setup_nofile_rlimit(); -+ - /* Must be before sandbox since it wants /proc */ - setup_capng(); - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch b/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch deleted file mode 100644 index be6b244..0000000 --- a/kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 06a24b54c94345b436d888a48b92fafa967c3d58 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:25 +0100 -Subject: [PATCH 114/116] virtiofsd: stop all queue threads on exit in - virtio_loop() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-111-dgilbert@redhat.com> -Patchwork-id: 93564 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 110/112] virtiofsd: stop all queue threads on exit in virtio_loop() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Eryu Guan - -On guest graceful shutdown, virtiofsd receives VHOST_USER_GET_VRING_BASE -request from VMM and shuts down virtqueues by calling fv_set_started(), -which joins fv_queue_thread() threads. So when virtio_loop() returns, -there should be no thread is still accessing data in fuse session and/or -virtio dev. - -But on abnormal exit, e.g. guest got killed for whatever reason, -vhost-user socket is closed and virtio_loop() breaks out the main loop -and returns to main(). But it's possible fv_queue_worker()s are still -working and accessing fuse session and virtio dev, which results in -crash or use-after-free. - -Fix it by stopping fv_queue_thread()s before virtio_loop() returns, -to make sure there's no-one could access fuse session and virtio dev. - -Reported-by: Qingming Su -Signed-off-by: Eryu Guan -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9883df8ccae6d744a0c8d9cbf9d62b1797d70ebd) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_virtio.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c -index 9f65823..80a6e92 100644 ---- a/tools/virtiofsd/fuse_virtio.c -+++ b/tools/virtiofsd/fuse_virtio.c -@@ -815,6 +815,19 @@ int virtio_loop(struct fuse_session *se) - } - } - -+ /* -+ * Make sure all fv_queue_thread()s quit on exit, as we're about to -+ * free virtio dev and fuse session, no one should access them anymore. -+ */ -+ for (int i = 0; i < se->virtio_dev->nqueues; i++) { -+ if (!se->virtio_dev->qi[i]) { -+ continue; -+ } -+ -+ fuse_log(FUSE_LOG_INFO, "%s: Stopping queue %d thread\n", __func__, i); -+ fv_queue_cleanup_thread(se->virtio_dev, i); -+ } -+ - fuse_log(FUSE_LOG_INFO, "%s: Exit\n", __func__); - - return 0; --- -1.8.3.1 - diff --git a/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch b/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch deleted file mode 100644 index f595ffa..0000000 --- a/kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 1744329bcba4a3e1a82cec3b1a34b3fbf0a9d7cf Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:54 +0100 -Subject: [PATCH 083/116] virtiofsd: support nanosecond resolution for file - timestamp -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-80-dgilbert@redhat.com> -Patchwork-id: 93535 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 079/112] virtiofsd: support nanosecond resolution for file timestamp -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Jiufei Xue - -Define HAVE_STRUCT_STAT_ST_ATIM to 1 if `st_atim' is member of `struct -stat' which means support nanosecond resolution for the file timestamp -fields. - -Signed-off-by: Jiufei Xue -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8a792b034d4b315251fd842bb4c73a133aa1368f) -Signed-off-by: Miroslav Rezanina ---- - configure | 16 ++++++++++++++++ - tools/virtiofsd/fuse_misc.h | 1 + - 2 files changed, 17 insertions(+) - -diff --git a/configure b/configure -index 7831618..5120c14 100755 ---- a/configure -+++ b/configure -@@ -5218,6 +5218,19 @@ if compile_prog "" "" ; then - strchrnul=yes - fi - -+######################################### -+# check if we have st_atim -+ -+st_atim=no -+cat > $TMPC << EOF -+#include -+#include -+int main(void) { return offsetof(struct stat, st_atim); } -+EOF -+if compile_prog "" "" ; then -+ st_atim=yes -+fi -+ - ########################################## - # check if trace backend exists - -@@ -6919,6 +6932,9 @@ fi - if test "$strchrnul" = "yes" ; then - echo "HAVE_STRCHRNUL=y" >> $config_host_mak - fi -+if test "$st_atim" = "yes" ; then -+ echo "HAVE_STRUCT_STAT_ST_ATIM=y" >> $config_host_mak -+fi - if test "$byteswap_h" = "yes" ; then - echo "CONFIG_BYTESWAP_H=y" >> $config_host_mak - fi -diff --git a/tools/virtiofsd/fuse_misc.h b/tools/virtiofsd/fuse_misc.h -index f252baa..5c618ce 100644 ---- a/tools/virtiofsd/fuse_misc.h -+++ b/tools/virtiofsd/fuse_misc.h -@@ -7,6 +7,7 @@ - */ - - #include -+#include "config-host.h" - - /* - * Versioned symbols cannot be used in some cases because it --- -1.8.3.1 - diff --git a/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch b/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch deleted file mode 100644 index 1bae1bf..0000000 --- a/kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 7bc27a767bc8c78b1bca46bbe5e1d53dcd7173b4 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:18 +0100 -Subject: [PATCH 107/116] virtiofsd: use fuse_buf_writev to replace - fuse_buf_write for better performance -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-104-dgilbert@redhat.com> -Patchwork-id: 93558 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 103/112] virtiofsd: use fuse_buf_writev to replace fuse_buf_write for better performance -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: piaojun - -fuse_buf_writev() only handles the normal write in which src is buffer -and dest is fd. Specially if src buffer represents guest physical -address that can't be mapped by the daemon process, IO must be bounced -back to the VMM to do it by fuse_buf_copy(). - -Signed-off-by: Jun Piao -Suggested-by: Dr. David Alan Gilbert -Suggested-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c465bba2c90a810f6e71e4f2646b1b4ee4b478de) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/buffer.c | 20 ++++++++++++++++++-- - 1 file changed, 18 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/buffer.c b/tools/virtiofsd/buffer.c -index 37befeb..27c1377 100644 ---- a/tools/virtiofsd/buffer.c -+++ b/tools/virtiofsd/buffer.c -@@ -34,7 +34,6 @@ size_t fuse_buf_size(const struct fuse_bufvec *bufv) - return size; - } - --__attribute__((unused)) - static ssize_t fuse_buf_writev(struct fuse_buf *out_buf, - struct fuse_bufvec *in_buf) - { -@@ -262,12 +261,29 @@ static int fuse_bufvec_advance(struct fuse_bufvec *bufv, size_t len) - - ssize_t fuse_buf_copy(struct fuse_bufvec *dstv, struct fuse_bufvec *srcv) - { -- size_t copied = 0; -+ size_t copied = 0, i; - - if (dstv == srcv) { - return fuse_buf_size(dstv); - } - -+ /* -+ * use writev to improve bandwidth when all the -+ * src buffers already mapped by the daemon -+ * process -+ */ -+ for (i = 0; i < srcv->count; i++) { -+ if (srcv->buf[i].flags & FUSE_BUF_IS_FD) { -+ break; -+ } -+ } -+ if ((i == srcv->count) && (dstv->count == 1) && -+ (dstv->idx == 0) && -+ (dstv->buf[0].flags & FUSE_BUF_IS_FD)) { -+ dstv->buf[0].pos += dstv->off; -+ return fuse_buf_writev(&dstv->buf[0], srcv); -+ } -+ - for (;;) { - const struct fuse_buf *src = fuse_bufvec_current(srcv); - const struct fuse_buf *dst = fuse_bufvec_current(dstv); --- -1.8.3.1 - diff --git a/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch b/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch deleted file mode 100644 index feffb5e..0000000 --- a/kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 1724f54070d33d8070ba2d22c8fac87ea65814c1 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:02:04 +0100 -Subject: [PATCH 093/116] virtiofsd: use fuse_lowlevel_is_virtio() in - fuse_session_destroy() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-90-dgilbert@redhat.com> -Patchwork-id: 93540 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 089/112] virtiofsd: use fuse_lowlevel_is_virtio() in fuse_session_destroy() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -vu_socket_path is NULL when --fd=FDNUM was used. Use -fuse_lowlevel_is_virtio() instead. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 620e9d8d9cee6df7fe71168dea950dba0cc21a4a) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 70568d2..dab6a31 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -2537,12 +2537,13 @@ void fuse_session_destroy(struct fuse_session *se) - close(se->fd); - } - -- if (se->vu_socket_path) { -+ if (fuse_lowlevel_is_virtio(se)) { - virtio_session_close(se); -- free(se->vu_socket_path); -- se->vu_socket_path = NULL; - } - -+ free(se->vu_socket_path); -+ se->vu_socket_path = NULL; -+ - free(se); - } - --- -1.8.3.1 - diff --git a/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch b/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch deleted file mode 100644 index f250ed7..0000000 --- a/kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch +++ /dev/null @@ -1,390 +0,0 @@ -From bce5070d1aada88154b811a08eec1586ab24fce5 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:26 +0100 -Subject: [PATCH 055/116] virtiofsd: use /proc/self/fd/ O_PATH file descriptor -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-52-dgilbert@redhat.com> -Patchwork-id: 93506 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 051/112] virtiofsd: use /proc/self/fd/ O_PATH file descriptor -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Sandboxing will remove /proc from the mount namespace so we can no -longer build string paths into "/proc/self/fd/...". - -Keep an O_PATH file descriptor so we can still re-open fds via -/proc/self/fd. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 9f59d175e2ca96f0b87f534dba69ea547dd35945) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 130 +++++++++++++++++++++++++++++++-------- - 1 file changed, 103 insertions(+), 27 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index e3d65c3..e2e2211 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -110,6 +110,9 @@ struct lo_data { - struct lo_map ino_map; /* protected by lo->mutex */ - struct lo_map dirp_map; /* protected by lo->mutex */ - struct lo_map fd_map; /* protected by lo->mutex */ -+ -+ /* An O_PATH file descriptor to /proc/self/fd/ */ -+ int proc_self_fd; - }; - - static const struct fuse_opt lo_opts[] = { -@@ -379,9 +382,9 @@ static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode, - int res; - - retry: -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); - -- res = readlink(procname, path, PATH_MAX); -+ res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX); - if (res < 0) { - fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__); - goto fail_noretry; -@@ -477,9 +480,9 @@ static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode, - } - return res; - } -- sprintf(path, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "%i", inode->fd); - -- return utimensat(AT_FDCWD, path, tv, 0); -+ return utimensat(lo->proc_self_fd, path, tv, 0); - - fallback: - res = lo_parent_and_name(lo, inode, path, &parent); -@@ -535,8 +538,8 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - if (fi) { - res = fchmod(fd, attr->st_mode); - } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = chmod(procname, attr->st_mode); -+ sprintf(procname, "%i", ifd); -+ res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); - } - if (res == -1) { - goto out_err; -@@ -552,11 +555,23 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - } - if (valid & FUSE_SET_ATTR_SIZE) { -+ int truncfd; -+ - if (fi) { -- res = ftruncate(fd, attr->st_size); -+ truncfd = fd; - } else { -- sprintf(procname, "/proc/self/fd/%i", ifd); -- res = truncate(procname, attr->st_size); -+ sprintf(procname, "%i", ifd); -+ truncfd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (truncfd < 0) { -+ goto out_err; -+ } -+ } -+ -+ res = ftruncate(truncfd, attr->st_size); -+ if (!fi) { -+ saverr = errno; -+ close(truncfd); -+ errno = saverr; - } - if (res == -1) { - goto out_err; -@@ -874,9 +889,9 @@ static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode, - return res; - } - -- sprintf(path, "/proc/self/fd/%i", inode->fd); -+ sprintf(path, "%i", inode->fd); - -- return linkat(AT_FDCWD, path, dfd, name, AT_SYMLINK_FOLLOW); -+ return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW); - - fallback: - res = lo_parent_and_name(lo, inode, path, &parent); -@@ -1404,8 +1419,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - fi->flags &= ~O_APPEND; - } - -- sprintf(buf, "/proc/self/fd/%i", lo_fd(req, ino)); -- fd = open(buf, fi->flags & ~O_NOFOLLOW); -+ sprintf(buf, "%i", lo_fd(req, ino)); -+ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); - if (fd == -1) { - return (void)fuse_reply_err(req, errno); - } -@@ -1458,7 +1473,6 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { - int res; -- (void)ino; - int fd; - char *buf; - -@@ -1466,12 +1480,14 @@ static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - (void *)fi); - - if (!fi) { -- res = asprintf(&buf, "/proc/self/fd/%i", lo_fd(req, ino)); -+ struct lo_data *lo = lo_data(req); -+ -+ res = asprintf(&buf, "%i", lo_fd(req, ino)); - if (res == -1) { - return (void)fuse_reply_err(req, errno); - } - -- fd = open(buf, O_RDWR); -+ fd = openat(lo->proc_self_fd, buf, O_RDWR); - free(buf); - if (fd == -1) { - return (void)fuse_reply_err(req, errno); -@@ -1587,11 +1603,13 @@ static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, - static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - size_t size) - { -+ struct lo_data *lo = lo_data(req); - char *value = NULL; - char procname[64]; - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1616,7 +1634,11 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } - - if (size) { - value = malloc(size); -@@ -1624,7 +1646,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out_err; - } - -- ret = getxattr(procname, name, value, size); -+ ret = fgetxattr(fd, name, value, size); - if (ret == -1) { - goto out_err; - } -@@ -1635,7 +1657,7 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - - fuse_reply_buf(req, value, ret); - } else { -- ret = getxattr(procname, name, NULL, 0); -+ ret = fgetxattr(fd, name, NULL, 0); - if (ret == -1) { - goto out_err; - } -@@ -1644,6 +1666,10 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - } - out_free: - free(value); -+ -+ if (fd >= 0) { -+ close(fd); -+ } - return; - - out_err: -@@ -1655,11 +1681,13 @@ out: - - static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - { -+ struct lo_data *lo = lo_data(req); - char *value = NULL; - char procname[64]; - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1683,7 +1711,11 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDONLY); -+ if (fd < 0) { -+ goto out_err; -+ } - - if (size) { - value = malloc(size); -@@ -1691,7 +1723,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - goto out_err; - } - -- ret = listxattr(procname, value, size); -+ ret = flistxattr(fd, value, size); - if (ret == -1) { - goto out_err; - } -@@ -1702,7 +1734,7 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - - fuse_reply_buf(req, value, ret); - } else { -- ret = listxattr(procname, NULL, 0); -+ ret = flistxattr(fd, NULL, 0); - if (ret == -1) { - goto out_err; - } -@@ -1711,6 +1743,10 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) - } - out_free: - free(value); -+ -+ if (fd >= 0) { -+ close(fd); -+ } - return; - - out_err: -@@ -1724,9 +1760,11 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - const char *value, size_t size, int flags) - { - char procname[64]; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1751,21 +1789,31 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } - -- ret = setxattr(procname, name, value, size, flags); -+ ret = fsetxattr(fd, name, value, size, flags); - saverr = ret == -1 ? errno : 0; - - out: -+ if (fd >= 0) { -+ close(fd); -+ } - fuse_reply_err(req, saverr); - } - - static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - { - char procname[64]; -+ struct lo_data *lo = lo_data(req); - struct lo_inode *inode; - ssize_t ret; - int saverr; -+ int fd = -1; - - inode = lo_inode(req, ino); - if (!inode) { -@@ -1789,12 +1837,20 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) - goto out; - } - -- sprintf(procname, "/proc/self/fd/%i", inode->fd); -+ sprintf(procname, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, procname, O_RDWR); -+ if (fd < 0) { -+ saverr = errno; -+ goto out; -+ } - -- ret = removexattr(procname, name); -+ ret = fremovexattr(fd, name); - saverr = ret == -1 ? errno : 0; - - out: -+ if (fd >= 0) { -+ close(fd); -+ } - fuse_reply_err(req, saverr); - } - -@@ -1887,12 +1943,25 @@ static void print_capabilities(void) - printf("}\n"); - } - -+static void setup_proc_self_fd(struct lo_data *lo) -+{ -+ lo->proc_self_fd = open("/proc/self/fd", O_PATH); -+ if (lo->proc_self_fd == -1) { -+ fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n"); -+ exit(1); -+ } -+} -+ - int main(int argc, char *argv[]) - { - struct fuse_args args = FUSE_ARGS_INIT(argc, argv); - struct fuse_session *se; - struct fuse_cmdline_opts opts; -- struct lo_data lo = { .debug = 0, .writeback = 0 }; -+ struct lo_data lo = { -+ .debug = 0, -+ .writeback = 0, -+ .proc_self_fd = -1, -+ }; - struct lo_map_elem *root_elem; - int ret = -1; - -@@ -2003,6 +2072,9 @@ int main(int argc, char *argv[]) - - fuse_daemonize(opts.foreground); - -+ /* Must be after daemonize to get the right /proc/self/fd */ -+ setup_proc_self_fd(&lo); -+ - /* Block until ctrl+c or fusermount -u */ - ret = virtio_loop(se); - -@@ -2018,6 +2090,10 @@ err_out1: - lo_map_destroy(&lo.dirp_map); - lo_map_destroy(&lo.ino_map); - -+ if (lo.proc_self_fd >= 0) { -+ close(lo.proc_self_fd); -+ } -+ - if (lo.root.fd >= 0) { - close(lo.root.fd); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch b/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch deleted file mode 100644 index d60a902..0000000 --- a/kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch +++ /dev/null @@ -1,137 +0,0 @@ -From 6877a6c456178d6c1ca9a0ffaabaa7e51105b2ac Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:22 +0100 -Subject: [PATCH 051/116] virtiofsd: validate input buffer sizes in - do_write_buf() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-48-dgilbert@redhat.com> -Patchwork-id: 93501 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 047/112] virtiofsd: validate input buffer sizes in do_write_buf() -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -There is a small change in behavior: if fuse_write_in->size doesn't -match the input buffer size then the request is failed. Previously -write requests with 1 fuse_buf element would truncate to -fuse_write_in->size. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Sergio Lopez -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 0ba8c3c6fce8fe949d59c1fd84d98d220ef9e759) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/fuse_lowlevel.c | 49 +++++++++++++++++++++++++---------------- - 1 file changed, 30 insertions(+), 19 deletions(-) - -diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c -index 7e10995..611e8b0 100644 ---- a/tools/virtiofsd/fuse_lowlevel.c -+++ b/tools/virtiofsd/fuse_lowlevel.c -@@ -1003,8 +1003,8 @@ static void do_write(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) - } - } - --static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, -- struct fuse_bufvec *ibufv) -+static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, -+ struct fuse_mbuf_iter *iter, struct fuse_bufvec *ibufv) - { - struct fuse_session *se = req->se; - struct fuse_bufvec *pbufv = ibufv; -@@ -1012,28 +1012,27 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - .buf[0] = ibufv->buf[0], - .count = 1, - }; -- struct fuse_write_in *arg = (struct fuse_write_in *)inarg; -+ struct fuse_write_in *arg; -+ size_t arg_size = sizeof(*arg); - struct fuse_file_info fi; - - memset(&fi, 0, sizeof(fi)); -+ -+ arg = fuse_mbuf_iter_advance(iter, arg_size); -+ if (!arg) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ -+ fi.lock_owner = arg->lock_owner; -+ fi.flags = arg->flags; - fi.fh = arg->fh; - fi.writepage = arg->write_flags & FUSE_WRITE_CACHE; - - if (ibufv->count == 1) { -- fi.lock_owner = arg->lock_owner; -- fi.flags = arg->flags; -- if (!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)) { -- tmpbufv.buf[0].mem = PARAM(arg); -- } -- tmpbufv.buf[0].size -= -- sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in); -- if (tmpbufv.buf[0].size < arg->size) { -- fuse_log(FUSE_LOG_ERR, -- "fuse: do_write_buf: buffer size too small\n"); -- fuse_reply_err(req, EIO); -- return; -- } -- tmpbufv.buf[0].size = arg->size; -+ assert(!(tmpbufv.buf[0].flags & FUSE_BUF_IS_FD)); -+ tmpbufv.buf[0].mem = ((char *)arg) + arg_size; -+ tmpbufv.buf[0].size -= sizeof(struct fuse_in_header) + arg_size; - pbufv = &tmpbufv; - } else { - /* -@@ -1043,6 +1042,13 @@ static void do_write_buf(fuse_req_t req, fuse_ino_t nodeid, const void *inarg, - ibufv->buf[0].size = 0; - } - -+ if (fuse_buf_size(pbufv) != arg->size) { -+ fuse_log(FUSE_LOG_ERR, -+ "fuse: do_write_buf: buffer size doesn't match arg->size\n"); -+ fuse_reply_err(req, EIO); -+ return; -+ } -+ - se->op.write_buf(req, nodeid, pbufv, arg->offset, &fi); - } - -@@ -2052,12 +2058,17 @@ void fuse_session_process_buf_int(struct fuse_session *se, - struct fuse_chan *ch) - { - const struct fuse_buf *buf = bufv->buf; -+ struct fuse_mbuf_iter iter = FUSE_MBUF_ITER_INIT(buf); - struct fuse_in_header *in; - const void *inarg; - struct fuse_req *req; - int err; - -- in = buf->mem; -+ /* The first buffer must be a memory buffer */ -+ assert(!(buf->flags & FUSE_BUF_IS_FD)); -+ -+ in = fuse_mbuf_iter_advance(&iter, sizeof(*in)); -+ assert(in); /* caller guarantees the input buffer is large enough */ - - if (se->debug) { - fuse_log(FUSE_LOG_DEBUG, -@@ -2129,7 +2140,7 @@ void fuse_session_process_buf_int(struct fuse_session *se, - - inarg = (void *)&in[1]; - if (in->opcode == FUSE_WRITE && se->op.write_buf) { -- do_write_buf(req, in->nodeid, inarg, bufv); -+ do_write_buf(req, in->nodeid, &iter, bufv); - } else { - fuse_ll_ops[in->opcode].func(req, in->nodeid, inarg); - } --- -1.8.3.1 - diff --git a/kvm-virtiofsd-validate-path-components.patch b/kvm-virtiofsd-validate-path-components.patch deleted file mode 100644 index b35aed7..0000000 --- a/kvm-virtiofsd-validate-path-components.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 69ac47502848c37ca3ede00f432c0675d9eef42c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:01:18 +0100 -Subject: [PATCH 047/116] virtiofsd: validate path components -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-44-dgilbert@redhat.com> -Patchwork-id: 93498 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 043/112] virtiofsd: validate path components -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Stefan Hajnoczi - -Several FUSE requests contain single path components. A correct FUSE -client sends well-formed path components but there is currently no input -validation in case something went wrong or the client is malicious. - -Refuse ".", "..", and paths containing '/' when we expect a path -component. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 25dae28c58d7e706b5d5db99042c9db3cef2e657) -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 59 ++++++++++++++++++++++++++++++++++++---- - 1 file changed, 53 insertions(+), 6 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index ac380ef..e375406 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -133,6 +133,21 @@ static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n); - - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); - -+static int is_dot_or_dotdot(const char *name) -+{ -+ return name[0] == '.' && -+ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); -+} -+ -+/* Is `path` a single path component that is not "." or ".."? */ -+static int is_safe_path_component(const char *path) -+{ -+ if (strchr(path, '/')) { -+ return 0; -+ } -+ -+ return !is_dot_or_dotdot(path); -+} - - static struct lo_data *lo_data(fuse_req_t req) - { -@@ -681,6 +696,15 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - parent, name); - } - -+ /* -+ * Don't use is_safe_path_component(), allow "." and ".." for NFS export -+ * support. -+ */ -+ if (strchr(name, '/')) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - err = lo_do_lookup(req, parent, name, &e); - if (err) { - fuse_reply_err(req, err); -@@ -762,6 +786,11 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - struct fuse_entry_param e; - struct lo_cred old = {}; - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - dir = lo_inode(req, parent); - if (!dir) { - fuse_reply_err(req, EBADF); -@@ -863,6 +892,11 @@ static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, - struct fuse_entry_param e; - int saverr; - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - inode = lo_inode(req, ino); - if (!inode) { - fuse_reply_err(req, EBADF); -@@ -904,6 +938,10 @@ out_err: - static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } - - res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); - -@@ -916,6 +954,11 @@ static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, - { - int res; - -+ if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - if (flags) { - fuse_reply_err(req, EINVAL); - return; -@@ -930,6 +973,11 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) - { - int res; - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - res = unlinkat(lo_fd(req, parent), name, 0); - - fuse_reply_err(req, res == -1 ? errno : 0); -@@ -1093,12 +1141,6 @@ out_err: - fuse_reply_err(req, error); - } - --static int is_dot_or_dotdot(const char *name) --{ -- return name[0] == '.' && -- (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); --} -- - static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t offset, struct fuse_file_info *fi, int plus) - { -@@ -1248,6 +1290,11 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - parent, name); - } - -+ if (!is_safe_path_component(name)) { -+ fuse_reply_err(req, EINVAL); -+ return; -+ } -+ - err = lo_change_cred(req, &old); - if (err) { - goto out; --- -1.8.3.1 - diff --git a/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch b/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch deleted file mode 100644 index 20add81..0000000 --- a/kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 247987aa987b7332eb501e00c440079b9e8e1fe7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jan 2020 19:00:52 +0100 -Subject: [PATCH 021/116] vitriofsd/passthrough_ll: fix fallocate() ifdefs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200127190227.40942-18-dgilbert@redhat.com> -Patchwork-id: 93471 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 017/112] vitriofsd/passthrough_ll: fix fallocate() ifdefs -Bugzilla: 1694164 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual - -From: Xiao Yang - -1) Use correct CONFIG_FALLOCATE macro to check if fallocate() is supported.(i.e configure - script sets CONFIG_FALLOCATE intead of HAVE_FALLOCATE if fallocate() is supported) -2) Replace HAVE_POSIX_FALLOCATE with CONFIG_POSIX_FALLOCATE. - -Signed-off-by: Xiao Yang -Signed-off-by: Dr. David Alan Gilbert - Merged from two of Xiao Yang's patches -(cherry picked from commit 9776457ca6f05d5900e27decb1dba2ffddf95a22) - -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_ll.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 322a889..6c4da18 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -975,13 +975,13 @@ static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, - int err = EOPNOTSUPP; - (void)ino; - --#ifdef HAVE_FALLOCATE -+#ifdef CONFIG_FALLOCATE - err = fallocate(fi->fh, mode, offset, length); - if (err < 0) { - err = errno; - } - --#elif defined(HAVE_POSIX_FALLOCATE) -+#elif defined(CONFIG_POSIX_FALLOCATE) - if (mode) { - fuse_reply_err(req, EOPNOTSUPP); - return; --- -1.8.3.1 - diff --git a/kvm-xhci-recheck-slot-status.patch b/kvm-xhci-recheck-slot-status.patch deleted file mode 100644 index 8bcbc2c..0000000 --- a/kvm-xhci-recheck-slot-status.patch +++ /dev/null @@ -1,77 +0,0 @@ -From ab87c0ed2a8f0a626099261a3028bc34cfac3929 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 14 Jan 2020 20:23:31 +0000 -Subject: [PATCH 5/5] xhci: recheck slot status -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200114202331.51831-3-dgilbert@redhat.com> -Patchwork-id: 93345 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/2] xhci: recheck slot status -Bugzilla: 1790844 -RH-Acked-by: Peter Xu -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Gerd Hoffmann - -From: Gerd Hoffmann - -Factor out slot status check into a helper function. Add an additional -check after completing transfers. This is needed in case a guest -queues multiple transfers in a row and a device unplug happens while -qemu processes them. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1786413 -Signed-off-by: Gerd Hoffmann -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 20200107083606.12393-1-kraxel@redhat.com -(cherry picked from commit 236846a019c4f7aa3111026fc9a1fe09684c8978) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/hcd-xhci.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index d2b9744..646c78c 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -1861,6 +1861,13 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, - xhci_kick_epctx(epctx, streamid); - } - -+static bool xhci_slot_ok(XHCIState *xhci, int slotid) -+{ -+ return (xhci->slots[slotid - 1].uport && -+ xhci->slots[slotid - 1].uport->dev && -+ xhci->slots[slotid - 1].uport->dev->attached); -+} -+ - static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - { - XHCIState *xhci = epctx->xhci; -@@ -1878,9 +1885,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - - /* If the device has been detached, but the guest has not noticed this - yet the 2 above checks will succeed, but we must NOT continue */ -- if (!xhci->slots[epctx->slotid - 1].uport || -- !xhci->slots[epctx->slotid - 1].uport->dev || -- !xhci->slots[epctx->slotid - 1].uport->dev->attached) { -+ if (!xhci_slot_ok(xhci, epctx->slotid)) { - return; - } - -@@ -1987,6 +1992,10 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) - } else { - xhci_fire_transfer(xhci, xfer, epctx); - } -+ if (!xhci_slot_ok(xhci, epctx->slotid)) { -+ /* surprise removal -> stop processing */ -+ break; -+ } - if (xfer->complete) { - /* update ring dequeue ptr */ - xhci_set_ep_state(xhci, epctx, stctx, epctx->state); --- -1.8.3.1 - diff --git a/kvm-xics-Don-t-deassert-outputs.patch b/kvm-xics-Don-t-deassert-outputs.patch deleted file mode 100644 index 08ed724..0000000 --- a/kvm-xics-Don-t-deassert-outputs.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 99b6ee4b7f63ea49e5b73f61bbf68f67252f27da Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Tue, 21 Jan 2020 05:16:12 +0000 -Subject: [PATCH 02/15] xics: Don't deassert outputs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Gibson -Message-id: <20200121051613.388295-3-dgibson@redhat.com> -Patchwork-id: 93430 -O-Subject: [RHEL-AV-8.2 qemu-kvm PATCH 2/3] xics: Don't deassert outputs -Bugzilla: 1776638 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth - -From: Greg Kurz - -The correct way to do this is to deassert the input pins on the CPU side. -This is the case since a previous change. - -Signed-off-by: Greg Kurz -Message-Id: <157548862298.3650476.1228720391270249433.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 4febcdd88f08422a66a1aa0dc55e1472abed3c4b) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1776638 - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/xics.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/hw/intc/xics.c b/hw/intc/xics.c -index e7ac9ba..72c5dca 100644 ---- a/hw/intc/xics.c -+++ b/hw/intc/xics.c -@@ -289,9 +289,6 @@ void icp_reset(ICPState *icp) - icp->pending_priority = 0xff; - icp->mfrr = 0xff; - -- /* Make all outputs are deasserted */ -- qemu_set_irq(icp->output, 0); -- - if (kvm_irqchip_in_kernel()) { - Error *local_err = NULL; - --- -1.8.3.1 - diff --git a/kvm.modules b/kvm.modules deleted file mode 100644 index b9d9646..0000000 --- a/kvm.modules +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -case $(uname -m) in - ppc64) - grep OPAL /proc/cpuinfo >/dev/null 2>&1 && opal=1 - - modprobe -b kvm >/dev/null 2>&1 - modprobe -b kvm-pr >/dev/null 2>&1 && kvm=1 - if [ "$opal" ]; then - modprobe -b kvm-hv >/dev/null 2>&1 - fi - ;; - s390x) - modprobe -b kvm >/dev/null 2>&1 && kvm=1 - ;; -esac - -exit 0 diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4f9fc85..db9f5c6 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -9,6 +9,7 @@ %global have_kvm_setup 0 %global have_memlock_limits 0 + %ifnarch %{ix86} x86_64 %global have_usbredir 0 %endif @@ -66,8 +67,8 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 4.2.0 -Release: 19%{?dist} +Version: 5.0.0 +Release: 0%{?dist}.wrb200506 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -76,7 +77,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-4.2.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-5.0.0.tar.xz # KSM control scripts Source4: ksm.service @@ -120,529 +121,7 @@ Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0021: 0021-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch -# For bz#1741345 - Remove the "cpu64-rhel6" CPU from qemu-kvm -Patch22: kvm-i386-Remove-cpu64-rhel6-CPU-model.patch -# For bz#1772774 - qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed ) -Patch23: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch -# For bz#1733893 - Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC -Patch24: kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch -# For bz#1782678 - qemu core dump after hot-unplugging the XXV710/XL710 PF -Patch25: kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch -# For bz#1789301 - virtio-blk/scsi: fix notification suppression during AioContext polling -Patch26: kvm-virtio-don-t-enable-notifications-during-polling.patch -# For bz#1790844 - USB related fixes -Patch27: kvm-usbredir-Prevent-recursion-in-usbredir_write.patch -# For bz#1790844 - USB related fixes -Patch28: kvm-xhci-recheck-slot-status.patch -# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] -Patch29: kvm-tcp_emu-Fix-oob-access.patch -# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] -Patch30: kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch -# For bz#1791568 - CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0] -Patch31: kvm-slirp-use-correct-size-while-emulating-commands.patch -# For bz#1559846 - Nested KVM: limit VMX features according to CPU models - Fast Train -Patch32: kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch -# For bz#1725084 - aarch64: support dumping SVE registers -Patch33: kvm-target-arm-arch_dump-Add-SVE-notes.patch -# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic -Patch34: kvm-vhost-Add-names-to-section-rounded-warning.patch -# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic -Patch35: kvm-vhost-Only-align-sections-for-vhost-user.patch -# For bz#1779041 - netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic -Patch36: kvm-vhost-coding-style-fix.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch37: kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch38: kvm-vhost-user-fs-remove-vhostfd-property.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch39: kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch40: kvm-virtiofsd-Pull-in-upstream-headers.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch41: kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch42: kvm-virtiofsd-Add-auxiliary-.c-s.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch43: kvm-virtiofsd-Add-fuse_lowlevel.c.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch44: kvm-virtiofsd-Add-passthrough_ll.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch45: kvm-virtiofsd-Trim-down-imported-files.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch46: kvm-virtiofsd-Format-imported-files-to-qemu-style.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch47: kvm-virtiofsd-remove-mountpoint-dummy-argument.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch48: kvm-virtiofsd-remove-unused-notify-reply-support.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch49: kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch50: kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch51: kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch52: kvm-virtiofsd-Trim-out-compatibility-code.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch53: kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch54: kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch55: kvm-virtiofsd-Add-options-for-virtio.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch56: kvm-virtiofsd-add-o-source-PATH-to-help-output.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch57: kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch58: kvm-virtiofsd-Start-wiring-up-vhost-user.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch59: kvm-virtiofsd-Add-main-virtio-loop.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch60: kvm-virtiofsd-get-set-features-callbacks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch61: kvm-virtiofsd-Start-queue-threads.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch62: kvm-virtiofsd-Poll-kick_fd-for-queue.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch63: kvm-virtiofsd-Start-reading-commands-from-queue.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch64: kvm-virtiofsd-Send-replies-to-messages.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch65: kvm-virtiofsd-Keep-track-of-replies.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch66: kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch67: kvm-virtiofsd-Fast-path-for-virtio-read.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch68: kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch69: kvm-virtiofsd-make-f-foreground-the-default.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch70: kvm-virtiofsd-add-vhost-user.json-file.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch71: kvm-virtiofsd-add-print-capabilities-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch72: kvm-virtiofs-Add-maintainers-entry.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch73: kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch74: kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch75: kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch76: kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch77: kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch78: kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch79: kvm-virtiofsd-validate-path-components.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch80: kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch81: kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch82: kvm-virtiofsd-add-fuse_mbuf_iter-API.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch83: kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch84: kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch85: kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch86: kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch87: kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch88: kvm-virtiofsd-sandbox-mount-namespace.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch89: kvm-virtiofsd-move-to-an-empty-network-namespace.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch90: kvm-virtiofsd-move-to-a-new-pid-namespace.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch91: kvm-virtiofsd-add-seccomp-whitelist.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch92: kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch93: kvm-virtiofsd-cap-ng-helpers.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch94: kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch95: kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch96: kvm-virtiofsd-fix-libfuse-information-leaks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch97: kvm-virtiofsd-add-syslog-command-line-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch98: kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch99: kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch100: kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch101: kvm-virtiofsd-Handle-reinit.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch102: kvm-virtiofsd-Handle-hard-reboot.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch103: kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch104: kvm-vhost-user-Print-unexpected-slave-message-types.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch105: kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch106: kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch107: kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch108: kvm-virtiofsd-passthrough_ll-control-readdirplus.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch109: kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch110: kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch111: kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch112: kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch113: kvm-virtiofsd-passthrough_ll-use-hashtable.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch114: kvm-virtiofsd-Clean-up-inodes-on-destroy.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch115: kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch116: kvm-virtiofsd-fix-error-handling-in-main.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch117: kvm-virtiofsd-cleanup-allocated-resource-in-se.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch118: kvm-virtiofsd-fix-memory-leak-on-lo.source.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch119: kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch120: kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch121: kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch122: kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch123: kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch124: kvm-virtiofsd-Support-remote-posix-locks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch125: kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch126: kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch127: kvm-virtiofsd-make-lo_release-atomic.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch128: kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch129: kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch130: kvm-libvhost-user-Fix-some-memtable-remap-cases.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch131: kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch132: kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch133: kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch134: kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch135: kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch136: kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch137: kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch138: kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch139: kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch140: kvm-virtiofsd-process-requests-in-a-thread-pool.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch141: kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch142: kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch143: kvm-virtiofsd-add-thread-pool-size-NUM-option.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch144: kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch145: kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch146: kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch -# For bz#1694164 - virtio-fs: host<->guest shared file system (qemu) -Patch147: kvm-virtiofsd-add-some-options-to-the-help-message.patch -# For bz#1776638 - Guest failed to boot up after system_reset 20 times -Patch148: kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch -# For bz#1776638 - Guest failed to boot up after system_reset 20 times -Patch149: kvm-xics-Don-t-deassert-outputs.patch -# For bz#1776638 - Guest failed to boot up after system_reset 20 times -Patch150: kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch -# For bz#1787395 - qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str -Patch151: kvm-trace-update-qemu-trace-stap-to-Python-3.patch -# For bz#1794503 - CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0] -Patch153: kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch -# For bz#1787444 - Broken postcopy migration with vTPM device -Patch154: kvm-tpm-ppi-page-align-PPI-RAM.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch155: kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch156: kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch157: kvm-tests-arm-cpu-features-Check-feature-default-values.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch158: kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch -# For bz#1647366 - aarch64: Add support for the kvm-no-adjvtime ARM CPU feature -Patch159: kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch -# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs -Patch160: kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch -# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs -Patch161: kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch -# For bz#1529231 - [q35] VM hangs after migration with 200 vCPUs -Patch162: kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch -# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) -# For bz#1787291 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z] -# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) -# For bz#1779078 - RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) -Patch163: kvm-i386-Resolve-CPU-models-to-v1-by-default.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch164: kvm-iotests-Support-job-complete-in-run_job.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch165: kvm-iotests-Create-VM.blockdev_create.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch166: kvm-block-Activate-recursively-even-for-already-active-n.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch167: kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch168: kvm-iotests-Test-external-snapshot-with-VM-state.patch -# For bz#1781637 - qemu crashed when do mem and disk snapshot -Patch169: kvm-iotests.py-Let-wait_migration-wait-even-more.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch170: kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch171: kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch172: kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch173: kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch174: kvm-backup-top-Begin-drain-earlier.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch175: kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch176: kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch177: kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch -# For bz#1745606 - Qemu hang when do incremental live backup in transaction mode without bitmap -# For bz#1746217 - Src qemu hang when do storage vm migration during guest installation -# For bz#1773517 - Src qemu hang when do storage vm migration with dataplane enable -# For bz#1779036 - Qemu coredump when do snapshot in transaction mode with one snapshot path not exist -# For bz#1782111 - Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable) -# For bz#1782175 - Qemu core dump when add persistent bitmap(data plane enable) -# For bz#1783965 - Qemu core dump when do backup with sync: bitmap and no bitmap provided -Patch178: kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch -# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes -Patch179: kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch -# For bz#1801320 - aarch64: backport query-cpu-model-expansion and adjvtime document fixes -Patch180: kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch -# For bz#1796240 - Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus -Patch181: kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch -# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] -Patch182: kvm-util-add-slirp_fmt-helpers.patch -# For bz#1798994 - CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0] -Patch183: kvm-tcp_emu-fix-unsafe-snprintf-usages.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch184: kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch185: kvm-virtio-make-virtio_delete_queue-idempotent.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch186: kvm-virtio-reset-region-cache-when-on-queue-deletion.patch -# For bz#1791590 - [Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device -Patch187: kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch -# For bz#1805334 - vhost-user/50-qemu-gpu.json is not valid JSON -Patch188: kvm-vhost-user-gpu-Drop-trailing-json-comma.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch189: kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch190: kvm-target-i386-add-a-ucode-rev-property.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch191: kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch192: kvm-target-i386-fix-TCG-UCODE_REV-access.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch193: kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch -# For bz#1791648 - [RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough -Patch194: kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch -# For bz#1703907 - [upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading -Patch195: kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch -# For bz#1794692 - Mirror block job stops making progress -Patch196: kvm-mirror-Store-MirrorOp.co-for-debuggability.patch -# For bz#1794692 - Mirror block job stops making progress -Patch197: kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch -# For bz#1782529 - Windows Update Enablement with default smbios strings in qemu -Patch198: kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch199: kvm-migration-multifd-clean-pages-after-filling-packet.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch200: kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch201: kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch202: kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch203: kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch204: kvm-qemu-file-Don-t-do-IO-after-shutdown.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch205: kvm-migration-Don-t-send-data-if-we-have-stopped.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch206: kvm-migration-Create-migration_is_running.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch207: kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch -# For bz#1738451 - qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel) -Patch208: kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch -# For bz#1797064 - virtiofsd: Fixes -Patch209: kvm-virtiofsd-Remove-fuse_req_getgroups.patch -# For bz#1797064 - virtiofsd: Fixes -Patch210: kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch -# For bz#1797064 - virtiofsd: Fixes -Patch211: kvm-virtiofsd-load_capng-missing-unlock.patch -# For bz#1797064 - virtiofsd: Fixes -Patch212: kvm-virtiofsd-do_read-missing-NULL-check.patch -# For bz#1797064 - virtiofsd: Fixes -Patch213: kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch -# For bz#1797064 - virtiofsd: Fixes -Patch214: kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch -# For bz#1797064 - virtiofsd: Fixes -Patch215: kvm-virtiofsd-Fix-xattr-operations.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch216: kvm-block-nbd-Fix-hang-in-.bdrv_close.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch217: kvm-block-Generic-file-creation-fallback.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch218: kvm-file-posix-Drop-hdev_co_create_opts.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch219: kvm-iscsi-Drop-iscsi_co_create_opts.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch220: kvm-iotests-Add-test-for-image-creation-fallback.patch -# For bz#1640894 - Fix generic file creation fallback for qemu-img nvme:// image creation support -Patch221: kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch222: kvm-iotests-Use-complete_and_wait-in-155.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch223: kvm-block-Introduce-bdrv_reopen_commit_post-step.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch224: kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch225: kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch226: kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch227: kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch228: kvm-block-Make-bdrv_get_cumulative_perm-public.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch229: kvm-block-Relax-restrictions-for-blockdev-snapshot.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch230: kvm-iotests-Fix-run_job-with-use_log-False.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch231: kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch232: kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch233: kvm-iotests-Add-iothread-cases-to-155.patch -# For bz#1790482 - bitmaps in backing images can't be modified -# For bz#1805143 - allow late/lazy opening of backing chain for shallow blockdev-mirror -Patch234: kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch -# For bz#1809380 - guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0. -Patch235: kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch -# For bz#1814336 - [POWER9] QEMU migration-test triggers a kernel warning -Patch236: kvm-migration-Rate-limit-inside-host-pages.patch -# For bz#1811670 - Unneeded qemu-guest-agent dependency on pixman -Patch237: kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch -# For bz#1816007 - qemu-img convert failed to convert with block device as target -Patch238: kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch -# For bz#1816007 - qemu-img convert failed to convert with block device as target -Patch239: kvm-block-trickle-down-the-fallback-image-creation-funct.patch -# For bz#1794692 - Mirror block job stops making progress -Patch240: kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch -# For bz#1794692 - Mirror block job stops making progress -Patch241: kvm-mirror-Wait-only-for-in-flight-operations.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch242: kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch243: kvm-replication-assert-we-own-context-before-job_cancel_.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch244: kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch245: kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch246: kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch -# For bz#1817621 - Crash and deadlock with block jobs when using io-threads -Patch247: kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch -# For bz#1822682 - QEMU-4.2 fails to start a VM on Azure -Patch248: kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch +Patch0021: 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch BuildRequires: wget BuildRequires: rpm-build @@ -691,8 +170,6 @@ BuildRequires: systemtap-sdt-devel BuildRequires: libpng-devel # For uuid generation BuildRequires: libuuid-devel -# For BlueZ device support -BuildRequires: bluez-libs-devel # For Braille device support BuildRequires: brlapi-devel # For test suite @@ -930,6 +407,7 @@ the Secure Shell (SSH) protocol. %prep %setup -n qemu-%{version} %autopatch -p1 +mkdir qemu-kvm-build %build %global buildarch %{kvm_target}-softmmu @@ -943,166 +421,174 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %global block_drivers_list %{block_drivers_list},gluster %endif -./configure \ - --prefix="%{_prefix}" \ - --libdir="%{_libdir}" \ - --sysconfdir="%{_sysconfdir}" \ - --interp-prefix=%{_prefix}/qemu-%M \ - --localstatedir="%{_localstatedir}" \ - --docdir="%{qemudocdir}" \ - --libexecdir="%{_libexecdir}" \ - --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ - --extra-cflags="%{optflags}" \ - --with-pkgversion="%{name}-%{version}-%{release}" \ - --with-confsuffix=/"%{name}" \ - --firmwarepath=%{_prefix}/share/qemu-firmware \ -%if 0%{have_fdt} - --enable-fdt \ -%else - --disable-fdt \ - %endif -%if 0%{have_gluster} - --enable-glusterfs \ -%else - --disable-glusterfs \ -%endif - --enable-guest-agent \ -%ifnarch s390x - --enable-numa \ -%else - --disable-numa \ -%endif - --enable-rbd \ -%if 0%{have_librdma} - --enable-rdma \ -%else - --disable-rdma \ -%endif - --disable-pvrdma \ - --enable-seccomp \ -%if 0%{have_spice} - --enable-spice \ - --enable-smartcard \ - --enable-virglrenderer \ -%else - --disable-spice \ - --disable-smartcard \ - --disable-virglrenderer \ -%endif -%if 0%{have_opengl} - --enable-opengl \ -%else - --disable-opengl \ -%endif -%if 0%{have_usbredir} - --enable-usb-redir \ -%else - --disable-usb-redir \ -%endif - --disable-tcmalloc \ -%ifarch x86_64 - --enable-libpmem \ -%else - --disable-libpmem \ -%endif - --enable-vhost-user \ -%ifarch %{ix86} x86_64 - --enable-avx2 \ -%else - --disable-avx2 \ -%endif +cd qemu-kvm-build +../configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{qemudocdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-confsuffix=/"%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ --python=%{__python3} \ --target-list="%{buildarch}" \ --block-drv-rw-whitelist=%{block_drivers_list} \ --audio-drv-list= \ --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ --with-coroutine=ucontext \ + --with-git=git \ --tls-priority=NORMAL \ - --disable-bluez \ + --enable-attr \ + --disable-auth-pam \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ +%else + --disable-avx2 \ +%endif + --disable-avx512f \ + --disable-bochs \ --disable-brlapi \ + --disable-bsd-user \ + --disable-bzip2 \ --enable-cap-ng \ + --enable-capstone \ + --disable-cloop \ + --disable-cocoa \ --enable-coroutine-pool \ + --disable-crypto-afalg \ --enable-curl \ --disable-curses \ + --enable-debug-info \ + --disable-debug-mutex \ --disable-debug-tcg \ + --disable-dmg \ --enable-docs \ +%if 0%{have_fdt} + --enable-fdt \ +%else + --disable-fdt \ + %endif + --enable-gcrypt \ + --disable-git-update \ +%if 0%{have_gluster} + --enable-glusterfs \ +%else + --disable-glusterfs \ +%endif + --enable-gnutls \ --disable-gtk \ + --enable-guest-agent \ + --disable-guest-agent-msi \ + --disable-hax \ + --disable-hvf \ + --enable-iconv \ + --disable-jemalloc \ --enable-kvm \ --enable-libiscsi \ --disable-libnfs \ +%ifarch x86_64 + --enable-libpmem \ +%else + --disable-libpmem \ +%endif --enable-libssh \ --enable-libusb \ - --disable-bzip2 \ + --disable-libxml2 \ --enable-linux-aio \ - --disable-live-block-migration \ - --enable-lzo \ - --enable-pie \ - --disable-qom-cast-debug \ - --disable-sdl \ - --enable-snappy \ - --disable-sparse \ - --disable-strip \ - --enable-tpm \ - --enable-trace-backend=dtrace \ - --disable-vde \ - --disable-vhost-scsi \ - --disable-vxhs \ - --disable-virtfs \ - --disable-vnc-jpeg \ - --disable-vte \ - --enable-vnc-png \ - --enable-vnc-sasl \ - --enable-werror \ - --disable-xen \ - --disable-xfsctl \ - --enable-gnutls \ - --enable-gcrypt \ - --disable-nettle \ - --enable-attr \ - --disable-bsd-user \ - --disable-cocoa \ - --enable-debug-info \ - --disable-guest-agent-msi \ - --disable-hax \ - --disable-jemalloc \ + --disable-linux-io-uring \ --disable-linux-user \ - --enable-modules \ - --disable-netmap \ - --disable-replication \ - --enable-system \ - --enable-tools \ - --disable-user \ - --enable-vhost-net \ - --enable-vhost-vsock \ - --enable-vnc \ - --enable-mpath \ - --disable-xen-pci-passthrough \ - --enable-tcg \ - --with-git=git \ - --disable-sanitizers \ - --disable-hvf \ - --disable-whpx \ + --disable-live-block-migration \ + --disable-lzfse \ + --enable-lzo \ --enable-malloc-trim \ --disable-membarrier \ - --disable-vhost-crypto \ - --disable-libxml2 \ - --enable-capstone \ - --disable-git-update \ - --disable-crypto-afalg \ - --disable-debug-mutex \ - --disable-bochs \ - --disable-cloop \ - --disable-dmg \ - --disable-qcow1 \ - --disable-vdi \ - --disable-vvfat \ - --disable-qed \ + --enable-modules \ + --disable-module-upgrades \ + --enable-mpath \ + --disable-netmap \ + --disable-nettle \ +%ifnarch s390x + --enable-numa \ +%else + --disable-numa \ +%endif +%if 0%{have_opengl} + --enable-opengl \ +%else + --disable-opengl \ +%endif --disable-parallels \ + --enable-pie \ + --disable-pvrdma \ + --disable-qcow1 \ + --disable-qed \ + --disable-qom-cast-debug \ + --enable-rbd \ +%if 0%{have_librdma} + --enable-rdma \ +%else + --disable-rdma \ +%endif + --disable-replication \ + --disable-sanitizers \ + --disable-sdl \ + --disable-sdl-image \ + --enable-seccomp \ --disable-sheepdog \ - --disable-auth-pam \ - --enable-iconv \ - --disable-lzfse \ + --enable-snappy \ + --disable-sparse \ +%if 0%{have_spice} + --enable-smartcard \ + --enable-spice \ +%else + --disable-smartcard \ + --disable-spice \ +%endif + --disable-strip \ + --enable-system \ + --enable-tcg \ + --disable-tcmalloc \ + --enable-tools \ + --enable-tpm \ + --enable-trace-backend=dtrace \ +%if 0%{have_usbredir} + --enable-usb-redir \ +%else + --disable-usb-redir \ +%endif + --disable-user \ + --disable-vde \ + --disable-vdi \ + --disable-vhost-crypto \ --enable-vhost-kernel \ + --enable-vhost-net \ + --disable-vhost-scsi \ + --enable-vhost-user \ + --enable-vhost-vsock \ +%if 0%{have_spice} + --enable-virglrenderer \ +%else + --disable-virglrenderer \ +%endif + --disable-virtfs \ + --enable-vnc \ + --disable-vnc-jpeg \ + --enable-vnc-png \ + --enable-vnc-sasl \ + --disable-vte \ + --disable-vvfat \ + --disable-vxhs \ + --enable-werror \ + --disable-whpx \ + --disable-xen \ + --disable-xen-pci-passthrough \ + --disable-xfsctl \ + --enable-xkbcommon \ --without-default-devices echo "config-host.mak contents:" @@ -1131,6 +617,7 @@ gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check %install +cd qemu-kvm-build %define _udevdir %(pkg-config --variable=udevdir udev) %define _udevrulesdir %{_udevdir}/rules.d @@ -1175,13 +662,13 @@ cp -R tests/acceptance/* $RPM_BUILD_ROOT%{testsdir}/tests/acceptance/ # Install qemu.py and qmp/ scripts required to run avocado_qemu tests cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp -install -p -m 0755 tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ +install -p -m 0755 ../tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ # Install qemu-iotests cp -R tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ # Avoid ambiguous 'python' interpreter name find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python+%{__python3}+' {} \; -find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python3+%{__python3}+' {} \; find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/python+%{__python3}+' {} \; install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README @@ -1235,7 +722,6 @@ rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp # Install simpletrace install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py # Avoid ambiguous 'python' interpreter name -sed -i -e '1 s/python/python3/' $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend @@ -1244,11 +730,11 @@ mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py mkdir -p $RPM_BUILD_ROOT%{qemudocdir} -install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} Changelog README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} ../Changelog ../README.rst ../README.systemtap ../COPYING ../COPYING.LIB ../LICENSE ../docs/interop/qmp-spec.txt chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* -install -D -p -m 0644 qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf +install -D -p -m 0644 ../qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf # Provided by package openbios rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc @@ -1276,6 +762,7 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-sifive_u-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin @@ -1305,6 +792,9 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server +# Remove qemu-storage-daemon +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/qemu-storage-daemon + # Remove efi roms rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom @@ -1378,11 +868,15 @@ chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so # Remove buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/system/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/tools/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo # Remove spec rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs %check +cd qemu-kvm-build export DIFF=diff; make check V=1 %post -n qemu-kvm-core @@ -1431,18 +925,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %dir %{qemudocdir} %doc %{qemudocdir}/Changelog %doc %{qemudocdir}/README.rst -%doc %{qemudocdir}/qemu-doc.html %doc %{qemudocdir}/COPYING %doc %{qemudocdir}/COPYING.LIB %doc %{qemudocdir}/LICENSE %doc %{qemudocdir}/README.systemtap %doc %{qemudocdir}/qmp-spec.txt -%doc %{qemudocdir}/qemu-doc.txt %doc %{qemudocdir}/qemu-ga-ref.html %doc %{qemudocdir}/qemu-ga-ref.txt %doc %{qemudocdir}/qemu-qmp-ref.html %doc %{qemudocdir}/qemu-qmp-ref.txt %doc %{qemudocdir}/interop/* +%doc %{qemudocdir}/index.html +%doc %{qemudocdir}/system/* +%doc %{qemudocdir}/tools/* +%doc %{qemudocdir}/user/* %{_mandir}/man7/qemu-qmp-ref.7* %{_mandir}/man7/qemu-cpu-models.7* %{_bindir}/qemu-keymap @@ -1452,6 +948,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_unitdir}/qemu-pr-helper.service %{_unitdir}/qemu-pr-helper.socket %{_mandir}/man7/qemu-ga-ref.7* +%{_mandir}/man1/virtiofsd.1* %dir %{_datadir}/%{name}/ %{_datadir}/%{name}/keymaps/ @@ -1576,6 +1073,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +Tue May 12 2020 Danilo Cesar Lemes de Paula - 5.0.0-0 +- Temporary rebase of qemu-kvm to 5.0.0 + * Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 - kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] - Resolves: bz#1822682 diff --git a/sources b/sources index 46350e1..23b2923 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-4.2.0.tar.xz) = 2a79973c2b07c53e8c57a808ea8add7b6b2cbca96488ed5d4b669ead8c9318907dec2b6109f180fc8ca8f04c0f73a56e82b3a527b5626b799d7e849f2474ec56 +SHA512 (qemu-5.0.0.tar.xz) = 34c87dfc56c5a63c7649cdc1281cb742e7665e9f3fe2c1dfc1c6b3abf0ca937a2b8a0d4d8894060f6f3e03f4ba6616a11097c48b32db2cbc8925f87255b4acb5 From 350fd5e3886ffe04434e4cad93d938a187913b4d Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 12 May 2020 21:16:23 -0400 Subject: [PATCH 078/195] tmp rebase of qemu-kvm to 5.0.0 - Resolves: rhbz#1801485 --- qemu-kvm.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index db9f5c6..cfd328b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.0.0 -Release: 0%{?dist}.wrb200506 +Release: 0%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -1073,7 +1073,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -Tue May 12 2020 Danilo Cesar Lemes de Paula - 5.0.0-0 +* Tue May 12 2020 Danilo Cesar Lemes de Paula - 5.0.0-0 - Temporary rebase of qemu-kvm to 5.0.0 * Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 From 68a8aa6b4d9183b2a9fede54885bc592ef6a3686 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 13 May 2020 10:58:13 -0400 Subject: [PATCH 079/195] Properly syncronize with the actuall 5.0.0 GA of qemu -Resolves: rhbz#1809650 (rebase qemu-kvm to 5.1 for RHEL AV-8.3.0) --- ...at-Adding-slirp-to-the-exploded-tree.patch | 2 +- 0005-Initial-redhat-build.patch | 12 +++--- 0006-Enable-disable-devices-for-RHEL.patch | 43 +++++++++---------- ...Machine-type-related-general-changes.patch | 27 ++++++------ 0008-Add-aarch64-machine-types.patch | 29 ++++++------- 0009-Add-ppc64-machine-types.patch | 2 +- 0010-Add-s390x-machine-types.patch | 10 ++--- 0011-Add-x86_64-machine-types.patch | 38 ++++++++-------- 0012-Enable-make-check.patch | 4 +- ...mber-of-devices-that-can-be-assigned.patch | 2 +- ...Add-support-statement-to-help-output.patch | 4 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 4 +- 0016-Add-support-for-simpletrace.patch | 6 +-- ...documentation-instead-of-qemu-system.patch | 2 +- 0018-usb-xhci-Fix-PCI-capability-order.patch | 2 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 2 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 2 +- ...x-blockdev-reopen-API-with-feature-f.patch | 2 +- qemu-kvm.spec | 1 + sources | 2 +- 20 files changed, 95 insertions(+), 101 deletions(-) diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch index 04e73be..da6d424 100644 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -1,4 +1,4 @@ -From e4d185c8c4efbf15a9380c1433bc66b49a09e79d Mon Sep 17 00:00:00 2001 +From 606314e48b9307fd4c79b08017a143d9f749d395 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 23 Apr 2020 05:26:54 +0200 Subject: redhat: Adding slirp to the exploded tree diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index 174bcc9..21a0cf8 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 4b7f57db366243eeafc0528d3cff4fa6967e7522 Mon Sep 17 00:00:00 2001 +From b510775819649eeb2499b9d4b9baf93538d4bde7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -11,7 +11,7 @@ several issues are fixed in QEMU tree: - Man page renamed from qemu to qemu-kvm - man page is installed using make install so we have to fix it in qemu tree -This rebase includes changes up to qemu-kvm-4.2.0-20.el8 +This rebase includes changes up to qemu-kvm-4.2.0-19.el8 Rebase notes (3.1.0): - added new configure options @@ -120,19 +120,19 @@ Conflicts: redhat/Makefile | 88 ++ redhat/Makefile.common | 51 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2820 +++++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2812 +++++++++++++++++++++++++++++++++++ redhat/scripts/extract_build_cmd.py | 2 +- redhat/scripts/process-patches.sh | 7 +- tests/check-block.sh | 2 + ui/vnc.c | 2 +- - 11 files changed, 3008 insertions(+), 8 deletions(-) + 11 files changed, 3000 insertions(+), 8 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index 34275f5..aee2e8e 100644 +index 8a9113e..a89cc44 100644 --- a/Makefile +++ b/Makefile @@ -548,6 +548,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM @@ -143,7 +143,7 @@ index 34275f5..aee2e8e 100644 .PHONY: capstone/all capstone/all: .git-submodule-status -@@ -883,7 +884,7 @@ install-doc: $(DOCS) install-sphinxdocs +@@ -884,7 +885,7 @@ install-doc: $(DOCS) install-sphinxdocs $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index ff90dbb..7ebe37c 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From db33b6f22ff44edfb3ca54ef7bf051ccfc53e479 Mon Sep 17 00:00:00 2001 +From 950e13b225f6e890a2c19223b8a05cdc9ecabfa7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -65,12 +65,9 @@ Merged patches (4.2.0): Merged patches (weekly-200108): - 4543a3c i386: Remove cpu64-rhel6 CPU model - -Conflicts: - target/arm/cpu.c --- backends/Makefile.objs | 3 +- - default-configs/aarch64-rh-devices.mak | 21 +++++++ + default-configs/aarch64-rh-devices.mak | 20 +++++++ default-configs/aarch64-softmmu.mak | 10 ++-- default-configs/ppc64-rh-devices.mak | 34 +++++++++++ default-configs/ppc64-softmmu.mak | 10 ++-- @@ -95,13 +92,13 @@ Conflicts: qemu-options.hx | 4 -- redhat/qemu-kvm.spec.template | 5 +- softmmu/vl.c | 2 +- - target/arm/cpu.c | 3 + + target/arm/cpu.c | 4 +- target/i386/cpu.c | 17 +++--- target/ppc/cpu-models.c | 10 ++++ target/s390x/cpu_models.c | 3 + target/s390x/kvm.c | 8 +++ util/memfd.c | 2 +- - 32 files changed, 281 insertions(+), 36 deletions(-) + 32 files changed, 280 insertions(+), 37 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak create mode 100644 default-configs/rh-virtio.mak @@ -125,10 +122,10 @@ index 28a847c..0eda216 100644 dbus-vmstate.o-cflags = $(GIO_CFLAGS) diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 -index 0000000..0d756a2 +index 0000000..a1ed641 --- /dev/null +++ b/default-configs/aarch64-rh-devices.mak -@@ -0,0 +1,21 @@ +@@ -0,0 +1,20 @@ +include rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -149,7 +146,6 @@ index 0000000..0d756a2 +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_XIO3130=y -+CONFIG_NVDIMM=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak index 958b1e0..8f6867d 100644 --- a/default-configs/aarch64-softmmu.mak @@ -426,7 +422,7 @@ index 534a6a1..bd62442 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 9628cc1..37989fe 100644 +index 33bc9e2..3cb8779 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -46,6 +46,8 @@ @@ -452,7 +448,7 @@ index 9628cc1..37989fe 100644 + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); - return; + } diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs new file mode 100644 index 0000000..e678e9e @@ -492,10 +488,10 @@ index 77a7d62..68c793e 100644 common-obj-$(CONFIG_ADS7846) += ads7846.o common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 1f29731..cac9e40 100644 +index 0d391e1..fb1b2c4 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2973,6 +2973,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -637,7 +633,7 @@ index 292d4e7..1df25ae 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" diff --git a/softmmu/vl.c b/softmmu/vl.c -index afd2615..00f7604 100644 +index 32c0047..46b4d98 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -144,7 +144,7 @@ static Chardev **serial_hds; @@ -650,21 +646,22 @@ index afd2615..00f7604 100644 static int no_reboot; int no_shutdown = 0; diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 5d64adf..f1d18b8 100644 +index a79f233..dcdf3b2 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2904,6 +2904,9 @@ static void arm_cpu_register_types(void) +@@ -2905,7 +2905,9 @@ static void arm_cpu_register_types(void) + type_register_static(&idau_interface_type_info); while (info->name) { - arm_cpu_register(info); +- cpu_register(info); + /* RHEL specific: Filter out unsupported cpu models */ -+ if (!strcmp(info->name, "cortex-a15-arm-cpu")) -+ arm_cpu_register(info); ++ if (!strcmp(info->name, "cortex-a15")) ++ cpu_register(info); info++; } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 9c256ab..26a8584 100644 +index 90ffc5f..d98618c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1801,20 +1801,21 @@ static CPUCaches epyc_rome_cache_info = { @@ -782,10 +779,10 @@ index 7c32180..88bf4a9 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 69881a0..9802878 100644 +index 7f7ebab..8dc2df2 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2494,6 +2494,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2429,6 +2429,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index a302d2c..bb565a1 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 799c934a1ec957ae2e163f367f5f7550949178da Mon Sep 17 00:00:00 2001 +From 355248605abeab3ad30da6be267d6352dfea65f0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -35,9 +35,6 @@ Merged patches (4.2.0): Merged patches (weekly-200318): - e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) - -Merged patches (weekly-200506): -- 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw --- hw/acpi/ich9.c | 16 ++++ hw/acpi/piix4.c | 5 +- @@ -122,10 +119,10 @@ index 964d6f5..b8458ba 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_pci_hotplug, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 171e690..25e6839 100644 +index 7dc96ab..1f8d6bf 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1436,7 +1436,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1413,7 +1413,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -190,7 +187,7 @@ index c822a9a..0c7f1ff 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index c1a444c..af407cc 100644 +index c1a444c..3c45365 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,6 +28,176 @@ @@ -242,7 +239,7 @@ index c1a444c..af407cc 100644 + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-vga", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "virtio-gpu-device", "edid", "false" }, ++ { "virtio-gpu-pci", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-device", "use-started", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ @@ -384,10 +381,10 @@ index 0633ed3..b703e9e 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index b75087d..b255d56 100644 +index 22dee0e..26ff640 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, +@@ -176,6 +176,8 @@ static void pc_init1(MachineState *machine, smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -397,10 +394,10 @@ index b75087d..b255d56 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index d2806c1..461e1cd 100644 +index d37c425..f67232e 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine) +@@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -507,7 +504,7 @@ index 70aca7e..0950cee 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index d18c099..8a3bd68 100644 +index dc4269c..3fb9469 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -42,6 +42,7 @@ @@ -763,7 +760,7 @@ index 02a0ced..67e38a1 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 05e1945..811c3d5 100644 +index 6ab6eda..69fca4e 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -100,6 +100,9 @@ typedef struct PCMachineClass { @@ -790,7 +787,7 @@ index c24d968..b353438 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 177cce9..2864560 100644 +index 187ac04..041b773 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -128,6 +128,8 @@ enum mig_rp_message_type { diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 320b19f..ce2aba5 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From a373b0198f9268478a5211efb6a545d9c598b364 Mon Sep 17 00:00:00 2001 +From 449b1fd5bc6075a535cb57e6e0b0eda837663a52 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -38,23 +38,23 @@ Merged patches (4.1.0): - c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM - 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine --- - hw/arm/virt.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 11 ++++ - 2 files changed, 179 insertions(+), 1 deletion(-) + 2 files changed, 178 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 25e6839..1387ff6 100644 +index 1f8d6bf..1559689 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -79,6 +79,7 @@ +@@ -78,6 +78,7 @@ + #include "hw/virtio/virtio-iommu.h" #include "hw/char/pl011.h" - #include "qemu/guest-random.h" +#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -105,7 +106,49 @@ +@@ -104,7 +105,49 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -105,7 +105,7 @@ index 25e6839..1387ff6 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1914,6 +1957,7 @@ static void machvirt_init(MachineState *machine) +@@ -1879,6 +1922,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -113,7 +113,7 @@ index 25e6839..1387ff6 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1942,6 +1986,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -1907,6 +1951,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -121,7 +121,7 @@ index 25e6839..1387ff6 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2218,6 +2263,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2176,6 +2221,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return requested_pa_size > 40 ? requested_pa_size : 0; } @@ -129,7 +129,7 @@ index 25e6839..1387ff6 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2478,3 +2524,124 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2435,3 +2481,123 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -163,7 +163,6 @@ index 25e6839..1387ff6 100644 + hc->plug = virt_machine_device_plug_cb; + hc->unplug_request = virt_machine_device_unplug_request_cb; + mc->numa_mem_supported = true; -+ mc->nvdimm_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->default_ram_id = "mach-virt.ram"; + @@ -255,10 +254,10 @@ index 25e6839..1387ff6 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 6d67ace..e44e25c 100644 +index 60b2f52..ae44797 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -156,6 +156,7 @@ typedef struct { +@@ -155,6 +155,7 @@ typedef struct { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -266,7 +265,7 @@ index 6d67ace..e44e25c 100644 #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -164,6 +165,16 @@ typedef struct { +@@ -163,6 +164,16 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index b14fcee..0c3b0df 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From c50a71e2a577b532a904e70d23f7533aca0b3a6f Mon Sep 17 00:00:00 2001 +From a07369c763a13a1bbc97b162f5d029a0d98d6fd3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index bed50e9..4b5b8c5 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 349c332a69933b977b40f4a2198236611d002818 Mon Sep 17 00:00:00 2001 +From d51b2da2b2f22313593ec618f510ebf7452b94af Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -25,10 +25,10 @@ Merged patches (4.2.0): 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 45292fb..1b3a04c 100644 +index 0fa00a9..d61d85d 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -777,7 +777,7 @@ bool css_migration_enabled(void) +@@ -648,7 +648,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -37,7 +37,7 @@ index 45292fb..1b3a04c 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = true; \ -@@ -801,6 +801,7 @@ bool css_migration_enabled(void) +@@ -672,6 +672,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) @@ -45,7 +45,7 @@ index 45292fb..1b3a04c 100644 static void ccw_machine_5_0_instance_options(MachineState *machine) { } -@@ -1041,6 +1042,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -912,6 +913,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index fba69c6..338c262 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 9da7d3c4b5a90c155ea4227c412b0ebd4d2a9b87 Mon Sep 17 00:00:00 2001 +From fa1624d37542a12fc2668b23b2a7a9a650457dcc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -54,10 +54,10 @@ Merged patches (weekly-200318): 8 files changed, 675 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 2e15f68..8dbf49b 100644 +index 23c77ee..6cfcdfc 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -213,6 +213,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) +@@ -207,6 +207,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -68,10 +68,10 @@ index 2e15f68..8dbf49b 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index f6b8431..2ed002f 100644 +index 5143c51..382a17b 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -341,6 +341,263 @@ GlobalProperty pc_compat_1_4[] = { +@@ -340,6 +340,263 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -335,7 +335,7 @@ index f6b8431..2ed002f 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1031,7 +1288,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -1030,7 +1287,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -345,7 +345,7 @@ index f6b8431..2ed002f 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -1937,6 +2195,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1936,6 +2194,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); @@ -354,7 +354,7 @@ index f6b8431..2ed002f 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1948,7 +2208,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1947,7 +2207,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -365,7 +365,7 @@ index f6b8431..2ed002f 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index b255d56..8eb64d1 100644 +index 26ff640..74a434d 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -53,6 +53,7 @@ @@ -376,7 +376,7 @@ index b255d56..8eb64d1 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, +@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -387,7 +387,7 @@ index b255d56..8eb64d1 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -312,6 +313,7 @@ static void pc_init1(MachineState *machine, +@@ -310,6 +311,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -395,7 +395,7 @@ index b255d56..8eb64d1 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -975,3 +977,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -973,3 +975,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -604,10 +604,10 @@ index b255d56..8eb64d1 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 461e1cd..f5ae759 100644 +index f67232e..daa05f4 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine) +@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -618,7 +618,7 @@ index 461e1cd..f5ae759 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -335,6 +335,7 @@ static void pc_q35_init(MachineState *machine) +@@ -333,6 +333,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -626,7 +626,7 @@ index 461e1cd..f5ae759 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -549,3 +550,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -547,3 +548,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -801,7 +801,7 @@ index 0046ab5..d81225b 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 811c3d5..5b90f8b 100644 +index 69fca4e..2e0395b 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -118,6 +118,9 @@ typedef struct PCMachineClass { @@ -814,7 +814,7 @@ index 811c3d5..5b90f8b 100644 } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -260,6 +263,36 @@ extern const size_t pc_compat_1_5_len; +@@ -273,6 +276,36 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; @@ -852,7 +852,7 @@ index 811c3d5..5b90f8b 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 26a8584..dc4d1c9 100644 +index d98618c..4cfaf91 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1806,7 +1806,7 @@ static X86CPUDefinition builtin_x86_defs[] = { diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index cab4413..1c4fe6c 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 0114b7010c87be70014b170ffdf66e1317f6becc Mon Sep 17 00:00:00 2001 +From 19bc7ed029e014148be05551a2f36b7d41035b1c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -82,7 +82,7 @@ index 034d3a3..aadc413 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 1710470..0711b66 100644 +index 435dccd..4ad61bc 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index 77ec099..b85574f 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From e77808a25ee638b717e1507a1e55cbf8350afbfd Mon Sep 17 00:00:00 2001 +From 59faef98f835eb3522a0b49f6af6302b20b17b72 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index d87713a..54d2b1b 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 8b189d52c8b8e2c251d76c7b00dc4a2a0a570bf8 Mon Sep 17 00:00:00 2001 +From 583a53be6a7c1c7291ab3fa0d2a4b864ad07358f Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -23,7 +23,7 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 00f7604..5ba8c19 100644 +index 46b4d98..552bae5 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -1674,9 +1674,17 @@ static void version(void) diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index 89e14e7..1a7f388 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From e0aee69fcafe1c3656db2676b8a0d379a48c299c Mon Sep 17 00:00:00 2001 +From 08d4d4615d252407ba4753e1b53087df813eb9d1 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -103,7 +103,7 @@ index 439a4ef..6f804b8 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/softmmu/vl.c b/softmmu/vl.c -index 5ba8c19..e98ab6b 100644 +index 552bae5..bdc4f7f 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -115,6 +115,8 @@ diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch index d64625b..6725065 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 565cee8e4965ece9e0c271cad813263b606b3e65 Mon Sep 17 00:00:00 2001 +From 6941da1d2cca51e23c525630610e10202db3434b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -37,10 +37,10 @@ Merged patches (2.3.0): create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index aee2e8e..ded56e5 100644 +index a89cc44..dcd088e 100644 --- a/Makefile +++ b/Makefile -@@ -999,6 +999,10 @@ endif +@@ -1000,6 +1000,10 @@ endif $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ done $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 7065d7d..cc223aa 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 4375e8b568866c7ddbde19de1bb999cf3ebfe6fe Mon Sep 17 00:00:00 2001 +From aaf64f4f80e6266921d9547f0b3b71e57abaf922 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 29 Jan 2020 09:30:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch index 8137171..33c77ab 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From 50d4f1973a86696cb7487173cbdbc68453445c54 Mon Sep 17 00:00:00 2001 +From ffa7a8516256750abd8c3bfb6964811582d36835 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index c087f5e..ac784ce 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 5d9529f40e7cc092a57f9203aad22f3644a2b6d6 Mon Sep 17 00:00:00 2001 +From 6f3b48a5ead2663197589066dfd4ed81d4658fe2 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 41c655c..e999a52 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 3ea4a35afce28805241b3be3c11de605600ecda1 Mon Sep 17 00:00:00 2001 +From 45b492a99791a4a463486be35e9ab44986507efe Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts diff --git a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index be042ba..026ba5b 100644 --- a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,4 +1,4 @@ -From f07c3ee209b3897efebb4cf008c88a390205a5dd Mon Sep 17 00:00:00 2001 +From ce12fa518d472f85d7d036c65867f40ec69ca759 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 Subject: block: Versioned x-blockdev-reopen API with feature flag diff --git a/qemu-kvm.spec b/qemu-kvm.spec index cfd328b..75a223b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -1075,6 +1075,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog * Tue May 12 2020 Danilo Cesar Lemes de Paula - 5.0.0-0 - Temporary rebase of qemu-kvm to 5.0.0 +- Updated the tarball to actually point to 5.0.0 GA * Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 - kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] diff --git a/sources b/sources index 23b2923..bc432c6 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-5.0.0.tar.xz) = 34c87dfc56c5a63c7649cdc1281cb742e7665e9f3fe2c1dfc1c6b3abf0ca937a2b8a0d4d8894060f6f3e03f4ba6616a11097c48b32db2cbc8925f87255b4acb5 +SHA512 (qemu-5.0.0.tar.xz) = 21ef0cbe107c468a40f0fa2635db2a40048c8790b629dfffca5cd62bb1b502ea8eb133bfc40df5ecf1489e2bffe87f6829aee041cb8a380ff04a8afa23b39fcf From 3a50cdf32ace32d66cd36388ed02dcbbc9bbd469 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 8 Jul 2020 09:37:45 -0400 Subject: [PATCH 080/195] Update AV-8.2.1 with the latest weekly rebase from qemu-5.0.0 --- ...at-Adding-slirp-to-the-exploded-tree.patch | 2 +- 0005-Initial-redhat-build.patch | 48 +++-- 0006-Enable-disable-devices-for-RHEL.patch | 185 +++++++----------- ...Machine-type-related-general-changes.patch | 93 ++++----- 0008-Add-aarch64-machine-types.patch | 66 ++++--- 0009-Add-ppc64-machine-types.patch | 34 ++-- 0010-Add-s390x-machine-types.patch | 12 +- 0011-Add-x86_64-machine-types.patch | 59 +++--- 0012-Enable-make-check.patch | 12 +- ...mber-of-devices-that-can-be-assigned.patch | 8 +- ...Add-support-statement-to-help-output.patch | 4 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 12 +- 0016-Add-support-for-simpletrace.patch | 6 +- ...documentation-instead-of-qemu-system.patch | 6 +- 0018-usb-xhci-Fix-PCI-capability-order.patch | 2 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 4 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 6 +- ...x-blockdev-reopen-API-with-feature-f.patch | 6 +- ...-vTPM-for-POWER-in-downstream-config.patch | 41 ++++ ...t-fix-5.0-rebase-missing-ISA-TPM-TIS.patch | 43 ++++ 0025-redhat-define-hw_compat_8_2.patch | 81 ++++++++ 0026-x86-Add-8.3.0-x86_64-machine-type.patch | 117 +++++++++++ 0027-hw-arm-Changes-to-rhel820-machine.patch | 78 ++++++++ ...oduce-rhel_virt_instance_init-helper.patch | 50 +++++ 0029-hw-arm-Add-rhel830-machine-type.patch | 58 ++++++ qemu-kvm.spec | 177 +++++++++++++++-- qemu-pr-helper.service | 2 +- sources | 2 +- 28 files changed, 910 insertions(+), 304 deletions(-) create mode 100644 0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch create mode 100644 0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch create mode 100644 0025-redhat-define-hw_compat_8_2.patch create mode 100644 0026-x86-Add-8.3.0-x86_64-machine-type.patch create mode 100644 0027-hw-arm-Changes-to-rhel820-machine.patch create mode 100644 0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch create mode 100644 0029-hw-arm-Add-rhel830-machine-type.patch diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch index da6d424..9d73fb7 100644 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -1,4 +1,4 @@ -From 606314e48b9307fd4c79b08017a143d9f749d395 Mon Sep 17 00:00:00 2001 +From 6527eafb590d72354bb02662f95d7603cd3a3fd3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 23 Apr 2020 05:26:54 +0200 Subject: redhat: Adding slirp to the exploded tree diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index 21a0cf8..7a562ab 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From b510775819649eeb2499b9d4b9baf93538d4bde7 Mon Sep 17 00:00:00 2001 +From b9d95992b85d3667c13df3a321933c44c7157d0b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -11,7 +11,7 @@ several issues are fixed in QEMU tree: - Man page renamed from qemu to qemu-kvm - man page is installed using make install so we have to fix it in qemu tree -This rebase includes changes up to qemu-kvm-4.2.0-19.el8 +This rebase includes changes up to qemu-kvm-4.2.0-28.el8 Rebase notes (3.1.0): - added new configure options @@ -85,6 +85,15 @@ Rebase notes (5.0.0 rc2): Rebase notes (5.0.0 rc3): - Reorder configure options +Rebase notes (weekly-200617): +- Fixing qemu-kvm-tests rpm content + - Fixed python shenigans + - Include all qemu-iotests files +- qemu-pr-helper moved to /usr/libexec/ (upstream) + +Rebase notes (weekly-200701): +- Moved qemu-ga-ref.* and qemu-qmp-ref.* docs files to interop diretory (upstream) + Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file - Spec file cleanups @@ -111,31 +120,36 @@ Merged patches (weekly-200205): - 5edf6bd Add support for rh-brew-module - f77d52d redhat: ship virtiofsd vhost-user device backend -Conflicts: - gdbstub.c +Merged patches (weekly-200610): +- 63f12d4 redhat: Always use module build target for rh-brew (modified) +- 9b1e140 redhat: updating the modular target + +Merged patches (weekly-200617): +- 44b8bd0 spec: Fix python shenigans for tests --- .gitignore | 1 + Makefile | 3 +- configure | 1 + - redhat/Makefile | 88 ++ - redhat/Makefile.common | 51 + + redhat/Makefile | 86 + + redhat/Makefile.common | 53 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2812 +++++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2956 +++++++++++++++++++++++++++++++++++ + redhat/qemu-pr-helper.service | 2 +- redhat/scripts/extract_build_cmd.py | 2 +- redhat/scripts/process-patches.sh | 7 +- tests/check-block.sh | 2 + ui/vnc.c | 2 +- - 11 files changed, 3000 insertions(+), 8 deletions(-) + 12 files changed, 3145 insertions(+), 9 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index 8a9113e..a89cc44 100644 +index b1b8a5a..88b7565 100644 --- a/Makefile +++ b/Makefile -@@ -548,6 +548,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM +@@ -549,6 +549,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC CAP_CFLAGS += -DCAPSTONE_HAS_X86 @@ -143,8 +157,8 @@ index 8a9113e..a89cc44 100644 .PHONY: capstone/all capstone/all: .git-submodule-status -@@ -884,7 +885,7 @@ install-doc: $(DOCS) install-sphinxdocs - $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)" +@@ -878,7 +879,7 @@ install-doc: $(DOCS) install-sphinxdocs + $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)/interop" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" - $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu.1 "$(DESTDIR)$(mandir)/man1" @@ -153,10 +167,10 @@ index 8a9113e..a89cc44 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index 23b5e93..1b40d7e 100755 +index 4a22dcd..53979dc 100755 --- a/configure +++ b/configure -@@ -2538,6 +2538,7 @@ if test "$seccomp" != "no" ; then +@@ -2596,6 +2596,7 @@ if test "$seccomp" != "no" ; then seccomp="no" fi fi @@ -165,10 +179,10 @@ index 23b5e93..1b40d7e 100755 # xen probe diff --git a/tests/check-block.sh b/tests/check-block.sh -index ad320c2..1f26083 100755 +index 8e29c86..e9bcb5a 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh -@@ -43,6 +43,8 @@ if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then +@@ -53,6 +53,8 @@ if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then fi fi @@ -178,7 +192,7 @@ index ad320c2..1f26083 100755 ret=0 diff --git a/ui/vnc.c b/ui/vnc.c -index 1d7138a..1fc55b7 100644 +index 12a1271..89fb616 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -3972,7 +3972,7 @@ void vnc_display_open(const char *id, Error **errp) diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index 7ebe37c..50eb1de 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 950e13b225f6e890a2c19223b8a05cdc9ecabfa7 Mon Sep 17 00:00:00 2001 +From b1e6c8e2a9b58d215042adbd0fffdf102ac9a9d7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -45,6 +45,12 @@ Rebase notes (weekly-200205): Rebase notes (weekly-200226): - Added CONFIG_NVDIMM +Rebase notes (weekly-200513) +- updated cortex-15 disabling to upstream code + +Rebase notes (weekly-200520): +- Add CONFIG_ACPI_APEI for aarch64 + Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV - 4b889f3 Declare cirrus-vga as deprecated @@ -65,9 +71,15 @@ Merged patches (4.2.0): Merged patches (weekly-200108): - 4543a3c i386: Remove cpu64-rhel6 CPU model + +Merged patches (weekly-200520): +- 96533 aarch64: Remove tcg cpu types (pjw commit) + +Merged patches (weekly-200610): +- 559d589 Revert "RHEL: disable hostmem-memfd" +- 441128e enable ramfb --- - backends/Makefile.objs | 3 +- - default-configs/aarch64-rh-devices.mak | 20 +++++++ + default-configs/aarch64-rh-devices.mak | 22 ++++++++ default-configs/aarch64-softmmu.mak | 10 ++-- default-configs/ppc64-rh-devices.mak | 34 +++++++++++ default-configs/ppc64-softmmu.mak | 10 ++-- @@ -81,24 +93,22 @@ Merged patches (weekly-200108): hw/block/fdc.c | 10 ++++ hw/bt/Makefile.objs | 3 + hw/cpu/Makefile.objs | 5 +- - hw/display/Makefile.objs | 5 +- hw/display/cirrus_vga.c | 3 + hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/Makefile.objs | 4 +- - hw/vfio/pci.c | 5 ++ qemu-options.hx | 4 -- redhat/qemu-kvm.spec.template | 5 +- softmmu/vl.c | 2 +- target/arm/cpu.c | 4 +- + target/arm/cpu_tcg.c | 3 + target/i386/cpu.c | 17 +++--- target/ppc/cpu-models.c | 10 ++++ target/s390x/cpu_models.c | 3 + target/s390x/kvm.c | 8 +++ - util/memfd.c | 2 +- - 32 files changed, 280 insertions(+), 37 deletions(-) + 29 files changed, 274 insertions(+), 33 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak create mode 100644 default-configs/rh-virtio.mak @@ -106,26 +116,12 @@ Merged patches (weekly-200108): create mode 100644 default-configs/x86_64-rh-devices.mak create mode 100644 hw/bt/Makefile.objs -diff --git a/backends/Makefile.objs b/backends/Makefile.objs -index 28a847c..0eda216 100644 ---- a/backends/Makefile.objs -+++ b/backends/Makefile.objs -@@ -16,7 +16,8 @@ endif - - common-obj-$(call land,$(CONFIG_VHOST_USER),$(CONFIG_VIRTIO)) += vhost-user.o - --common-obj-$(CONFIG_LINUX) += hostmem-memfd.o -+# RHEL: disable memfd -+# common-obj-$(CONFIG_LINUX) += hostmem-memfd.o - - common-obj-$(CONFIG_GIO) += dbus-vmstate.o - dbus-vmstate.o-cflags = $(GIO_CFLAGS) diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 -index 0000000..a1ed641 +index 0000000..f0cf5a1 --- /dev/null +++ b/default-configs/aarch64-rh-devices.mak -@@ -0,0 +1,20 @@ +@@ -0,0 +1,22 @@ +include rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -146,6 +142,8 @@ index 0000000..a1ed641 +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_XIO3130=y ++CONFIG_NVDIMM=y ++CONFIG_ACPI_APEI=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak index 958b1e0..8f6867d 100644 --- a/default-configs/aarch64-softmmu.mak @@ -394,10 +392,10 @@ index 64b2ee2..b5de7e5 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 336cace..ae86900 100644 +index 2d204ba..7ead029 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; pm->acpi_memory_hotplug.is_enabled = true; pm->cpu_hotplug_legacy = true; @@ -422,10 +420,10 @@ index 534a6a1..bd62442 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 33bc9e2..3cb8779 100644 +index 3425d56..f4ae220 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -46,6 +46,8 @@ +@@ -47,6 +47,8 @@ #include "qemu/module.h" #include "trace.h" @@ -434,7 +432,7 @@ index 33bc9e2..3cb8779 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2613,6 +2615,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, +@@ -2625,6 +2627,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, int i, j; static int command_tables_inited = 0; @@ -448,7 +446,7 @@ index 33bc9e2..3cb8779 100644 + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); - } + return; diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs new file mode 100644 index 0000000..e678e9e @@ -471,27 +469,11 @@ index 8db9e8a..1601ea9 100644 +#obj-$(CONFIG_A15MPCORE) += a15mpcore.o +common-obj-y += core.o +# cluster.o -diff --git a/hw/display/Makefile.objs b/hw/display/Makefile.objs -index 77a7d62..68c793e 100644 ---- a/hw/display/Makefile.objs -+++ b/hw/display/Makefile.objs -@@ -1,8 +1,9 @@ - common-obj-$(CONFIG_DDC) += i2c-ddc.o - common-obj-$(CONFIG_EDID) += edid-generate.o edid-region.o - --common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o --common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o -+# Disabled for Red Hat Enterprise Linux -+#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb.o -+#common-obj-$(CONFIG_FW_CFG_DMA) += ramfb-standalone.o - - common-obj-$(CONFIG_ADS7846) += ads7846.o - common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 0d391e1..fb1b2c4 100644 +index 212d6f5..f2504e5 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2975,6 +2975,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2958,6 +2958,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -502,7 +484,7 @@ index 0d391e1..fb1b2c4 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 3b2de4c..980c35e 100644 +index b402a93..d3621a4 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -221,7 +221,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) @@ -525,12 +507,12 @@ index 3b2de4c..980c35e 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index 60a4130..b2f1f91 100644 +index 29d633c..1442f46 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -568,6 +568,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) - dc->realize = i8042_realizefn; +@@ -599,6 +599,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; + isa->build_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + /* Disabled for Red Hat Enterprise Linux: */ + dc->user_creatable = false; @@ -538,7 +520,7 @@ index 60a4130..b2f1f91 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 2a69eee..af3ec17 100644 +index a18f80e..960b2f0 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1797,6 +1797,7 @@ static const E1000Info e1000_devices[] = { @@ -558,10 +540,10 @@ index 2a69eee..af3ec17 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index ac1c109..542c19e 100644 +index 26ad566..bfdf6b3 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -399,10 +399,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -396,10 +396,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -575,10 +557,10 @@ index ac1c109..542c19e 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs -index 66835e5..1b03645 100644 +index fa5c3fa..854094c 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs -@@ -29,7 +29,9 @@ common-obj-$(CONFIG_USB_NETWORK) += dev-network.o +@@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_NETWORK) += dev-network.o ifeq ($(CONFIG_USB_SMARTCARD),y) common-obj-y += dev-smartcard-reader.o common-obj-$(CONFIG_SMARTCARD) += smartcard.mo @@ -589,39 +571,11 @@ index 66835e5..1b03645 100644 smartcard.mo-cflags := $(SMARTCARD_CFLAGS) smartcard.mo-libs := $(SMARTCARD_LIBS) endif -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 5e75a95..e265d77 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3222,6 +3222,7 @@ static const TypeInfo vfio_pci_dev_info = { - }, - }; - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static Property vfio_pci_dev_nohotplug_properties[] = { - DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), - DEFINE_PROP_END_OF_LIST(), -@@ -3241,11 +3242,15 @@ static const TypeInfo vfio_pci_nohotplug_dev_info = { - .instance_size = sizeof(VFIOPCIDevice), - .class_init = vfio_pci_nohotplug_dev_class_init, - }; -+#endif - - static void register_vfio_pci_dev_type(void) - { - type_register_static(&vfio_pci_dev_info); -+ -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - type_register_static(&vfio_pci_nohotplug_dev_info); -+#endif - } - - type_init(register_vfio_pci_dev_type) diff --git a/qemu-options.hx b/qemu-options.hx -index 292d4e7..1df25ae 100644 +index 196f468..43b0dbc 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2239,10 +2239,6 @@ ERST +@@ -2248,10 +2248,6 @@ ERST DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) @@ -633,10 +587,10 @@ index 292d4e7..1df25ae 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" diff --git a/softmmu/vl.c b/softmmu/vl.c -index 32c0047..46b4d98 100644 +index 3e15ee2..3693dfb 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -144,7 +144,7 @@ static Chardev **serial_hds; +@@ -145,7 +145,7 @@ static Chardev **serial_hds; Chardev *parallel_hds[MAX_PARALLEL_PORTS]; int win2k_install_hack = 0; int singlestep = 0; @@ -646,25 +600,39 @@ index 32c0047..46b4d98 100644 static int no_reboot; int no_shutdown = 0; diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index a79f233..dcdf3b2 100644 +index 5050e18..79adfe2 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2905,7 +2905,9 @@ static void arm_cpu_register_types(void) - type_register_static(&idau_interface_type_info); +@@ -2335,7 +2335,9 @@ static void arm_cpu_register_types(void) - while (info->name) { -- cpu_register(info); -+ /* RHEL specific: Filter out unsupported cpu models */ -+ if (!strcmp(info->name, "cortex-a15")) -+ cpu_register(info); - info++; + type_register_static(&idau_interface_type_info); + for (i = 0; i < cpu_count; ++i) { +- arm_cpu_register(&arm_cpus[i]); ++ /* RHEL specific: Filter out unsupported cpu models */ ++ if (!strcmp(arm_cpus[i].name, "cortex-a15")) ++ arm_cpu_register(&arm_cpus[i]); + } } + } +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 00b0e08..94d429b 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -655,6 +655,9 @@ static void arm_tcg_cpu_register_types(void) + { + size_t i; ++ /* Disable TCG cpu types for Red Hat Enterprise Linux */ ++ return; ++ + for (i = 0; i < ARRAY_SIZE(arm_tcg_cpus); ++i) { + arm_cpu_register(&arm_tcg_cpus[i]); + } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 90ffc5f..d98618c 100644 +index 36cbd3d..55a30cd 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1801,20 +1801,21 @@ static CPUCaches epyc_rome_cache_info = { +@@ -1825,20 +1825,21 @@ static CPUCaches epyc_rome_cache_info = { static X86CPUDefinition builtin_x86_defs[] = { { @@ -765,7 +733,7 @@ index 4ad1686..16b2185 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 7c32180..88bf4a9 100644 +index 2fa609b..f6bee32 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -779,10 +747,10 @@ index 7c32180..88bf4a9 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 7f7ebab..8dc2df2 100644 +index f2f75d2..8970e4c 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2429,6 +2429,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2494,6 +2494,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -797,19 +765,6 @@ index 7f7ebab..8dc2df2 100644 prop.cpuid = s390_cpuid_from_cpu_model(model); prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -diff --git a/util/memfd.c b/util/memfd.c -index 4a3c07e..3303ec9 100644 ---- a/util/memfd.c -+++ b/util/memfd.c -@@ -193,7 +193,7 @@ bool qemu_memfd_alloc_check(void) - */ - bool qemu_memfd_check(unsigned int flags) - { --#ifdef CONFIG_LINUX -+#if 0 /* RHEL: memfd support disabled */ - int mfd = memfd_create("test", flags | MFD_CLOEXEC); - - if (mfd >= 0) { -- 1.8.3.1 diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index bb565a1..9933847 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 355248605abeab3ad30da6be267d6352dfea65f0 Mon Sep 17 00:00:00 2001 +From 1266dec11bec3867e8eda3062384b07986a89d3b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -35,8 +35,14 @@ Merged patches (4.2.0): Merged patches (weekly-200318): - e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) + +Merged patches (weekly-200506): +- 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw + +Conflicts: + hw/core/machine.c --- - hw/acpi/ich9.c | 16 ++++ + hw/acpi/ich9.c | 15 ++++ hw/acpi/piix4.c | 5 +- hw/arm/virt.c | 2 +- hw/char/serial.c | 16 ++++ @@ -59,10 +65,10 @@ Merged patches (weekly-200318): include/hw/usb.h | 4 + migration/migration.c | 2 + migration/migration.h | 5 ++ - 23 files changed, 355 insertions(+), 11 deletions(-) + 23 files changed, 354 insertions(+), 11 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index ae86900..9a8a627 100644 +index 7ead029..3b8501f 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -81,25 +87,24 @@ index ae86900..9a8a627 100644 + s->pm.force_rev1_fadt = value; +} + - void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; -@@ -393,6 +405,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp) +@@ -391,6 +403,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + object_property_add_bool(obj, "cpu-hotplug-legacy", ich9_pm_get_cpu_hotplug_legacy, - ich9_pm_set_cpu_hotplug_legacy, - NULL); + ich9_pm_set_cpu_hotplug_legacy); + object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", + ich9_pm_get_force_rev1_fadt, -+ ich9_pm_set_force_rev1_fadt, -+ NULL); ++ ich9_pm_set_force_rev1_fadt); object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, - &pm->disable_s3, OBJ_PROP_FLAG_READWRITE, - NULL); + &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 964d6f5..b8458ba 100644 +index 630ca6e..715bc17 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -275,6 +275,7 @@ static const VMStateDescription vmstate_acpi = { +@@ -276,6 +276,7 @@ static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, .minimum_version_id = 3, @@ -107,7 +112,7 @@ index 964d6f5..b8458ba 100644 .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -628,8 +629,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -630,8 +631,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -117,12 +122,12 @@ index 964d6f5..b8458ba 100644 + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, - use_acpi_pci_hotplug, true), + use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 7dc96ab..1f8d6bf 100644 +index cd0834c..2e3efd3 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1413,7 +1413,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1450,7 +1450,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -132,7 +137,7 @@ index 7dc96ab..1f8d6bf 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len); diff --git a/hw/char/serial.c b/hw/char/serial.c -index c822a9a..0c7f1ff 100644 +index 9eebcb2..11d2d08 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -35,6 +35,7 @@ @@ -187,7 +192,7 @@ index c822a9a..0c7f1ff 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index c1a444c..3c45365 100644 +index 211b4e0..ef6b320 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,6 +28,176 @@ @@ -239,7 +244,7 @@ index c1a444c..3c45365 100644 + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-vga", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ -+ { "virtio-gpu-pci", "edid", "false" }, ++ { "virtio-gpu-device", "edid", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_4_0 */ + { "virtio-device", "use-started", "false" }, + /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ @@ -364,11 +369,11 @@ index c1a444c..3c45365 100644 +}; +const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + - GlobalProperty hw_compat_4_2[] = { - { "virtio-blk-device", "queue-size", "128"}, - { "virtio-scsi-device", "virtqueue_size", "128"}, + GlobalProperty hw_compat_5_0[] = { + { "virtio-balloon-device", "page-poison", "false" }, + { "vmport", "x-read-set-eax", "off" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 0633ed3..b703e9e 100644 +index 3aaeeec..d88f52a 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -84,7 +84,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) @@ -381,10 +386,10 @@ index 0633ed3..b703e9e 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 22dee0e..26ff640 100644 +index 1d832b2..82a33b4 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -176,6 +176,8 @@ static void pc_init1(MachineState *machine, +@@ -179,6 +179,8 @@ static void pc_init1(MachineState *machine, smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -394,10 +399,10 @@ index 22dee0e..26ff640 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index d37c425..f67232e 100644 +index 047ea8d..1818d31 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine) +@@ -204,6 +204,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -407,7 +412,7 @@ index d37c425..f67232e 100644 } diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index 79ba158..311dbe0 100644 +index fda3451..be64a6b 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -79,6 +79,11 @@ typedef struct E1000EState { @@ -481,7 +486,7 @@ index 79ba158..311dbe0 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 70aca7e..0950cee 100644 +index ab93d78..48a8135 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) @@ -504,10 +509,10 @@ index 70aca7e..0950cee 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index dc4269c..3fb9469 100644 +index 7a38540..377d861 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c -@@ -42,6 +42,7 @@ +@@ -43,6 +43,7 @@ #include "qapi/visitor.h" #include "exec/address-spaces.h" #include "hw/rtc/mc146818rtc_regs.h" @@ -515,7 +520,7 @@ index dc4269c..3fb9469 100644 #ifdef TARGET_I386 #include "qapi/qapi-commands-misc-target.h" -@@ -820,6 +821,11 @@ static int rtc_post_load(void *opaque, int version_id) +@@ -821,6 +822,11 @@ static int rtc_post_load(void *opaque, int version_id) static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) { RTCState *s = (RTCState *)opaque; @@ -684,7 +689,7 @@ index b330e36..b25cce8 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 2fad4df..f554b67 100644 +index 946af51..cc91a7e 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -157,6 +157,8 @@ typedef struct XHCIEvent { @@ -697,7 +702,7 @@ index 2fad4df..f554b67 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 41568d1..1a23ccc 100644 +index 28a5318..ff4a672 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -711,10 +716,10 @@ index 41568d1..1a23ccc 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index fd4d62b..0046ab5 100644 +index 18815d9..2224fbc 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -369,4 +369,28 @@ extern const size_t hw_compat_2_2_len; +@@ -372,4 +372,28 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -760,10 +765,10 @@ index 02a0ced..67e38a1 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 6ab6eda..69fca4e 100644 +index dce1273..665c430 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -100,6 +100,9 @@ typedef struct PCMachineClass { +@@ -102,6 +102,9 @@ typedef struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; @@ -774,10 +779,10 @@ index 6ab6eda..69fca4e 100644 /* RAM / address space compat: */ bool gigabyte_align; diff --git a/include/hw/usb.h b/include/hw/usb.h -index c24d968..b353438 100644 +index e29a376..35ac38c 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h -@@ -605,4 +605,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, +@@ -575,4 +575,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, uint8_t interface_class, uint8_t interface_subclass, uint8_t interface_protocol); @@ -787,7 +792,7 @@ index c24d968..b353438 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 187ac04..041b773 100644 +index 481a590..a5a64f9 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -128,6 +128,8 @@ enum mig_rp_message_type { @@ -800,7 +805,7 @@ index 187ac04..041b773 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 507284e..0baa337 100644 +index f617960..b8bc10d 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -339,6 +339,11 @@ void init_dirty_bitmap_incoming_migration(void); diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index ce2aba5..2d1ab2a 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 449b1fd5bc6075a535cb57e6e0b0eda837663a52 Mon Sep 17 00:00:00 2001 +From 9506867ddf6d00d2dc502f69cfc0d274a2f1040c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -29,6 +29,12 @@ Rebase notes (weekly-200226): Rebase notes (5.0.0-rc1): - Added setting acpi properties +Rebase notes (weekly-200520): +- Added ras property + +Rebase notes (weekly-200701): +- Added to virt_machine_device_unplug_cb to machine type (upstream) + Merged patches (4.0.0): - 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM - 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 @@ -38,23 +44,23 @@ Merged patches (4.1.0): - c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM - 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine --- - hw/arm/virt.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 11 ++++ - 2 files changed, 178 insertions(+), 1 deletion(-) + 2 files changed, 182 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 1f8d6bf..1559689 100644 +index 2e3efd3..abbc7d5 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -78,6 +78,7 @@ - #include "hw/virtio/virtio-iommu.h" +@@ -79,6 +79,7 @@ #include "hw/char/pl011.h" + #include "qemu/guest-random.h" +#if 0 /* Disabled for Red Hat Enterprise Linux */ #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -104,7 +105,49 @@ +@@ -105,7 +106,49 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -105,7 +111,7 @@ index 1f8d6bf..1559689 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1879,6 +1922,7 @@ static void machvirt_init(MachineState *machine) +@@ -1964,6 +2007,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -113,7 +119,7 @@ index 1f8d6bf..1559689 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1907,6 +1951,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -1992,6 +2036,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -121,7 +127,7 @@ index 1f8d6bf..1559689 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2176,6 +2221,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2339,6 +2384,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return requested_pa_size > 40 ? requested_pa_size : 0; } @@ -129,7 +135,7 @@ index 1f8d6bf..1559689 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2435,3 +2481,123 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2604,3 +2650,127 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -162,15 +168,17 @@ index 1f8d6bf..1559689 100644 + hc->pre_plug = virt_machine_device_pre_plug_cb; + hc->plug = virt_machine_device_plug_cb; + hc->unplug_request = virt_machine_device_unplug_request_cb; ++ hc->unplug = virt_machine_device_unplug_cb; + mc->numa_mem_supported = true; ++ mc->nvdimm_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->default_ram_id = "mach-virt.ram"; + + object_class_property_add(oc, "acpi", "OnOffAuto", + virt_get_acpi, virt_set_acpi, -+ NULL, NULL, &error_abort); ++ NULL, NULL); + object_class_property_set_description(oc, "acpi", -+ "Enable ACPI", &error_abort); ++ "Enable ACPI"); +} + +static const TypeInfo rhel_machine_info = { @@ -204,11 +212,10 @@ index 1f8d6bf..1559689 100644 + /* High memory is enabled by default for RHEL */ + vms->highmem = true; + object_property_add_bool(obj, "highmem", virt_get_highmem, -+ virt_set_highmem, NULL); ++ virt_set_highmem); + object_property_set_description(obj, "highmem", + "Set on/off to enable/disable using " -+ "physical address space above 32 bits", -+ NULL); ++ "physical address space above 32 bits"); + /* + * Default GIC type is still v2, but became configurable for RHEL. We + * keep v2 instead of max as TCG CI test cases require an MSI controller @@ -216,10 +223,10 @@ index 1f8d6bf..1559689 100644 + */ + vms->gic_version = 2; + object_property_add_str(obj, "gic-version", virt_get_gic_version, -+ virt_set_gic_version, NULL); ++ virt_set_gic_version); + object_property_set_description(obj, "gic-version", + "Set GIC version. " -+ "Valid values are 2, 3 and host", NULL); ++ "Valid values are 2, 3 and host"); + + vms->highmem_ecam = !vmc->no_highmem_ecam; + @@ -229,21 +236,24 @@ index 1f8d6bf..1559689 100644 + /* Default allows ITS instantiation */ + vms->its = true; + object_property_add_bool(obj, "its", virt_get_its, -+ virt_set_its, NULL); ++ virt_set_its); + object_property_set_description(obj, "its", + "Set on/off to enable/disable " -+ "ITS instantiation", -+ NULL); ++ "ITS instantiation"); + } + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; -+ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu, NULL); ++ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); + object_property_set_description(obj, "iommu", + "Set the IOMMU type. " -+ "Valid values are none and smmuv3", -+ NULL); -+ ++ "Valid values are none and smmuv3"); ++ vms->ras = false; ++ object_property_add_bool(obj, "ras", virt_get_ras, ++ virt_set_ras); ++ object_property_set_description(obj, "ras", ++ "Set on/off to enable/disable reporting host memory errors " ++ "to a KVM guest using ACPI and guest external abort exceptions"); + vms->irqmap=a15irqmap; + virt_flash_create(vms); +} @@ -254,10 +264,10 @@ index 1f8d6bf..1559689 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 60b2f52..ae44797 100644 +index 31878dd..5582fba 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -155,6 +155,7 @@ typedef struct { +@@ -157,6 +157,7 @@ typedef struct { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -265,7 +275,7 @@ index 60b2f52..ae44797 100644 #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -163,6 +164,16 @@ typedef struct { +@@ -165,6 +166,16 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index 0c3b0df..779ee5d 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From a07369c763a13a1bbc97b162f5d029a0d98d6fd3 Mon Sep 17 00:00:00 2001 +From d27b53512182f6aa6d9ccfcaf64e7aab1a064146 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -42,10 +42,10 @@ Merged patches (weekly-200226): 5 files changed, 307 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 9a2bd50..20b3437 100644 +index 4c185bc..2fa25b1 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4549,6 +4549,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4530,6 +4530,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->smp_threads_vsmt = true; smc->nr_xirqs = SPAPR_NR_XIRQS; xfc->match_nvt = spapr_match_nvt; @@ -53,15 +53,15 @@ index 9a2bd50..20b3437 100644 } static const TypeInfo spapr_machine_info = { -@@ -4599,6 +4600,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4580,6 +4581,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-5.0 + * pseries-5.1 */ -@@ -4645,6 +4647,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4638,6 +4640,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -69,7 +69,7 @@ index 9a2bd50..20b3437 100644 /* * pseries-4.0 -@@ -4661,6 +4664,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4654,6 +4657,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -77,7 +77,7 @@ index 9a2bd50..20b3437 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4820,6 +4824,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4813,6 +4817,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ @@ -85,7 +85,7 @@ index 9a2bd50..20b3437 100644 static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4874,6 +4879,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, +@@ -4867,6 +4872,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -93,7 +93,7 @@ index 9a2bd50..20b3437 100644 static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4988,6 +4994,280 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4981,6 +4987,280 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -375,7 +375,7 @@ index 9a2bd50..20b3437 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 542c19e..916ab0e 100644 +index bfdf6b3..39fcaf8 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -392,7 +392,7 @@ index 542c19e..916ab0e 100644 Error *local_err = NULL; + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); + qdev_realize(DEVICE(cpu), NULL, &local_err); if (local_err) { @@ -250,6 +252,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); @@ -413,10 +413,10 @@ index 542c19e..916ab0e 100644 goto error_intc_create; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 42d64a0..c03611f 100644 +index c421410..5190d6a 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -128,6 +128,7 @@ struct SpaprMachineClass { +@@ -130,6 +130,7 @@ struct SpaprMachineClass { bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ hwaddr rma_limit; /* clamp the RMA to this size */ @@ -425,7 +425,7 @@ index 42d64a0..c03611f 100644 uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index f48df25..34e3d0f 100644 +index fda0dfe..ab8e3b2 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -450,10 +450,10 @@ index f48df25..34e3d0f 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 88d9449..0d7f5f5 100644 +index e7d382a..3cf209d 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1323,6 +1323,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1352,6 +1352,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index 4b5b8c5..8c76c93 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From d51b2da2b2f22313593ec618f510ebf7452b94af Mon Sep 17 00:00:00 2001 +From bf565781e2deb548173e7530e29d886f6c1eaad8 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -25,10 +25,10 @@ Merged patches (4.2.0): 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 0fa00a9..d61d85d 100644 +index b111406..f1b1722 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -648,7 +648,7 @@ bool css_migration_enabled(void) +@@ -774,7 +774,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -37,15 +37,15 @@ index 0fa00a9..d61d85d 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = true; \ -@@ -672,6 +672,7 @@ bool css_migration_enabled(void) +@@ -798,6 +798,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_5_0_instance_options(MachineState *machine) + static void ccw_machine_5_1_instance_options(MachineState *machine) { } -@@ -912,6 +913,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1050,6 +1051,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index 338c262..c245d3d 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From fa1624d37542a12fc2668b23b2a7a9a650457dcc Mon Sep 17 00:00:00 2001 +From 04955c10da93ce94d73693a0be07b2a4c81246fe Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -42,6 +42,9 @@ Merged patches (weekly-200122): Merged patches (weekly-200318): - e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) + +Conflicts: + hw/i386/pc_piix.c --- hw/i386/acpi-build.c | 3 + hw/i386/pc.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++- @@ -54,10 +57,10 @@ Merged patches (weekly-200318): 8 files changed, 675 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 23c77ee..6cfcdfc 100644 +index b7bcbbb..fe815c5 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -207,6 +207,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) +@@ -212,6 +212,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -68,10 +71,10 @@ index 23c77ee..6cfcdfc 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 5143c51..382a17b 100644 +index 4af9679..d17d6f8 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -340,6 +340,263 @@ GlobalProperty pc_compat_1_4[] = { +@@ -345,6 +345,263 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -335,7 +338,7 @@ index 5143c51..382a17b 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1030,7 +1287,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -1010,7 +1267,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -345,7 +348,7 @@ index 5143c51..382a17b 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -1936,6 +2194,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1960,6 +2218,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); @@ -354,7 +357,7 @@ index 5143c51..382a17b 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1947,7 +2207,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1971,7 +2231,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -365,18 +368,18 @@ index 5143c51..382a17b 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 26ff640..74a434d 100644 +index 82a33b4..b195f26 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -53,6 +53,7 @@ - #include "cpu.h" +@@ -54,6 +54,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" + #include "sysemu/xen.h" +#include "migration/migration.h" #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -173,8 +174,8 @@ static void pc_init1(MachineState *machine, +@@ -176,8 +177,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -387,7 +390,7 @@ index 26ff640..74a434d 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -310,6 +311,7 @@ static void pc_init1(MachineState *machine, +@@ -314,6 +315,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -395,7 +398,7 @@ index 26ff640..74a434d 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -973,3 +975,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -991,3 +993,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -604,10 +607,10 @@ index 26ff640..74a434d 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index f67232e..daa05f4 100644 +index 1818d31..6c49a50 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) +@@ -201,8 +201,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -618,7 +621,7 @@ index f67232e..daa05f4 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -333,6 +333,7 @@ static void pc_q35_init(MachineState *machine) +@@ -337,6 +337,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -626,7 +629,7 @@ index f67232e..daa05f4 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -547,3 +548,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -564,3 +565,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -788,7 +791,7 @@ index f67232e..daa05f4 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 0046ab5..d81225b 100644 +index 2224fbc..24c1348 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -207,6 +207,8 @@ struct MachineClass { @@ -801,10 +804,10 @@ index 0046ab5..d81225b 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 69fca4e..2e0395b 100644 +index 665c430..95d07f8 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -118,6 +118,9 @@ typedef struct PCMachineClass { +@@ -120,6 +120,9 @@ typedef struct PCMachineClass { /* use PVH to load kernels that support this feature */ bool pvh_enabled; @@ -814,7 +817,7 @@ index 69fca4e..2e0395b 100644 } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -273,6 +276,36 @@ extern const size_t pc_compat_1_5_len; +@@ -264,6 +267,36 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; @@ -852,10 +855,10 @@ index 69fca4e..2e0395b 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index d98618c..4cfaf91 100644 +index 55a30cd..5477433 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1806,7 +1806,7 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1830,7 +1830,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 6, @@ -864,7 +867,7 @@ index d98618c..4cfaf91 100644 .stepping = 3, .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -4085,6 +4085,7 @@ static PropValue kvm_default_props[] = { +@@ -4114,6 +4114,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -873,10 +876,10 @@ index d98618c..4cfaf91 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 4901c6d..e41cff2 100644 +index 6adbff3..51fbfd5 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -3113,6 +3113,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3139,6 +3139,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -884,7 +887,7 @@ index 4901c6d..e41cff2 100644 kvm_msr_buf_reset(cpu); -@@ -3422,6 +3423,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3448,6 +3449,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index 1c4fe6c..f12db32 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 19bc7ed029e014148be05551a2f36b7d41035b1c Mon Sep 17 00:00:00 2001 +From 8704d7b6ca7438f10d162caf70572b62509b6341 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -26,7 +26,7 @@ Rebase changes (weekly-200129): - Disable qtest/q35-test (uses upstream machine types) Rebased changes (weekly-200212): -- Do not run iotests on make check +- Do not run iotests on make checka Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce @@ -82,7 +82,7 @@ index 034d3a3..aadc413 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 435dccd..4ad61bc 100644 +index d886fa0..37dd761 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ @@ -104,7 +104,7 @@ index 435dccd..4ad61bc 100644 101 rw quick 102 rw quick diff --git a/tests/qtest/Makefile.include b/tests/qtest/Makefile.include -index 9e5a51d..0732f59 100644 +index 98af2c2..d370459 100644 --- a/tests/qtest/Makefile.include +++ b/tests/qtest/Makefile.include @@ -29,7 +29,7 @@ check-qtest-i386-y += ide-test @@ -266,7 +266,7 @@ index 60e6ec3..f9d6adc 100644 add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c -index 772287b..e7c075e 100644 +index 7ca1883..983aa07 100644 --- a/tests/qtest/test-x86-cpuid-compat.c +++ b/tests/qtest/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -279,7 +279,7 @@ index 772287b..e7c075e 100644 @@ -350,6 +351,7 @@ int main(int argc, char **argv) add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", - "-machine pc-i440fx-2.4 -cpu SandyBridge,+npt", + "-machine pc-i440fx-2.4 -cpu SandyBridge,+svm,+npt", "xlevel", 0x80000008); +#endif diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index b85574f..baa4aef 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 59faef98f835eb3522a0b49f6af6302b20b17b72 Mon Sep 17 00:00:00 2001 +From a7132dd3a148a8990ef18b38baa2fb2aa89484b4 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -41,7 +41,7 @@ Merged patches (2.9.0): 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index e265d77..41d00a3 100644 +index 6838bcc..ac109b4 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -47,6 +47,9 @@ @@ -54,7 +54,7 @@ index e265d77..41d00a3 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2722,9 +2725,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2715,9 +2718,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -86,7 +86,7 @@ index e265d77..41d00a3 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3169,6 +3193,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3162,6 +3186,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index 54d2b1b..3eadb83 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 583a53be6a7c1c7291ab3fa0d2a4b864ad07358f Mon Sep 17 00:00:00 2001 +From b87487f40dcc351fb7c31a37b6facbdaab285b0c Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -23,7 +23,7 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 46b4d98..552bae5 100644 +index 3693dfb..9d2e2d3 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -1674,9 +1674,17 @@ static void version(void) diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index 1a7f388..c457bbd 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 08d4d4615d252407ba4753e1b53087df813eb9d1 Mon Sep 17 00:00:00 2001 +From 0aa24293cae6e15d483b9aa34f5c27ace53e478c Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -80,10 +80,10 @@ This commit matches the limit to current KVM_CAP_NR_VCPUS value. 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 439a4ef..6f804b8 100644 +index d54a870..aa562c2 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -1975,6 +1975,18 @@ static int kvm_init(MachineState *ms) +@@ -2089,6 +2089,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -103,10 +103,10 @@ index 439a4ef..6f804b8 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/softmmu/vl.c b/softmmu/vl.c -index 552bae5..bdc4f7f 100644 +index 9d2e2d3..accccd8 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -115,6 +115,8 @@ +@@ -116,6 +116,8 @@ #define MAX_VIRTIO_CONSOLES 1 @@ -136,7 +136,7 @@ index 552bae5..bdc4f7f 100644 static int machine_help_func(QemuOpts *opts, MachineState *machine) { ObjectProperty *prop; -@@ -3829,6 +3845,8 @@ void qemu_init(int argc, char **argv, char **envp) +@@ -3831,6 +3847,8 @@ void qemu_init(int argc, char **argv, char **envp) "mutually exclusive"); exit(EXIT_FAILURE); } diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch index 6725065..d42323a 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From 6941da1d2cca51e23c525630610e10202db3434b Mon Sep 17 00:00:00 2001 +From dbf4ed3ddf5439d9563eb408bb95396b3b2c650f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -37,10 +37,10 @@ Merged patches (2.3.0): create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index a89cc44..dcd088e 100644 +index 88b7565..045eeb5 100644 --- a/Makefile +++ b/Makefile -@@ -1000,6 +1000,10 @@ endif +@@ -995,6 +995,10 @@ endif $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ done $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index cc223aa..b0c7b3c 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From aaf64f4f80e6266921d9547f0b3b71e57abaf922 Mon Sep 17 00:00:00 2001 +From 9c4bcb1912ff003bc74c0bb3d49c78b1aca995d9 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 29 Jan 2020 09:30:03 +0100 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -3692,10 +3692,10 @@ index 0000000..10cd1de + +@bye diff --git a/qemu-options.hx b/qemu-options.hx -index 1df25ae..8c48b40 100644 +index 43b0dbc..9b14fe8 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2878,11 +2878,11 @@ SRST +@@ -2887,11 +2887,11 @@ SRST :: diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch index 33c77ab..4f5c1fc 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From ffa7a8516256750abd8c3bfb6964811582d36835 Mon Sep 17 00:00:00 2001 +From 6a0ad69ff5249d19e497ea902cd456c442febb80 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index ac784ce..738713e 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 6f3b48a5ead2663197589066dfd4ed81d4658fe2 Mon Sep 17 00:00:00 2001 +From fed8a416435347899223bbfc260e57d8f8ddd20a Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,7 +45,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 472bbd2..ba2dac8 100644 +index 9b72094..71614b4 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -814,6 +814,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index e999a52..32a70ef 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 45b492a99791a4a463486be35e9ab44986507efe Mon Sep 17 00:00:00 2001 +From 00aeb546c0f989cf0c4a9623bbac9b187b051d68 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,10 +32,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index eb54f94..ecefb08 100644 +index 0c2bc8e..15bef3d 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c -@@ -351,12 +351,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, +@@ -335,12 +335,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { diff --git a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index 026ba5b..d0a3bd9 100644 --- a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,4 +1,4 @@ -From ce12fa518d472f85d7d036c65867f40ec69ca759 Mon Sep 17 00:00:00 2001 +From 79b1a539f271440baa5d3ef4264c761175ca1c9d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 Subject: block: Versioned x-blockdev-reopen API with feature flag @@ -29,10 +29,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json -index 943df19..50b99fb 100644 +index 0e1c6a5..6d94f92 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json -@@ -4126,10 +4126,17 @@ +@@ -4135,10 +4135,17 @@ # image does not have a default backing file name as part of its # metadata. # diff --git a/0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch b/0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch new file mode 100644 index 0000000..c3e67db --- /dev/null +++ b/0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch @@ -0,0 +1,41 @@ +From c3b099b389455b919b4b22011ed2fa3e7fd49510 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 1 Jul 2020 12:24:02 +0200 +Subject: RHEL-only: Enable vTPM for POWER in downstream configs + +RH-Author: David Gibson +Message-id: <20200522032718.387731-1-dgibson@redhat.com> +Patchwork-id: 96743 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] RHEL-only: Enable vTPM for POWER in downstream configs +Bugzilla: 1781911 + +From: David Gibson + +With the rebase to 5.0 we have the necessary code to implement virtual TPMs +for POWER targets. However, it's not enabled in the Red Hat configuration. +This downstream only patch corrects that. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1781911 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=28742072 +Testing: With brewed qemu was able to see a vTPM device in a guest + +Signed-off-by: David Gibson +Signed-off-by: Miroslav Rezanina +--- + default-configs/ppc64-rh-devices.mak | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak +index ecbe53f..032422e 100644 +--- a/default-configs/ppc64-rh-devices.mak ++++ b/default-configs/ppc64-rh-devices.mak +@@ -32,3 +32,6 @@ CONFIG_XICS_SPAPR=y + CONFIG_XIVE=y + CONFIG_XIVE_SPAPR=y + CONFIG_XIVE_KVM=y ++CONFIG_TPM_SPAPR=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +-- +1.8.3.1 + diff --git a/0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch b/0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch new file mode 100644 index 0000000..1ae7b1a --- /dev/null +++ b/0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch @@ -0,0 +1,43 @@ +From 7fd82f454755e0c7c68faac76a156c9ddb322124 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 1 Jul 2020 12:24:02 +0200 +Subject: redhat: fix 5.0 rebase missing ISA TPM TIS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20200608155426.112078-1-marcandre.lureau@redhat.com> +Patchwork-id: 97457 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] redhat: fix 5.0 rebase missing ISA TPM TIS +Bugzilla: 1841529 +RH-Acked-by: Auger Eric +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Danilo de Paula + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1841529 +BRANCH: rhel-av-8.3.0-preview-2020-04-29 +UPSTREAM: N/A +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=29172313 + +Signed-off-by: Marc-André Lureau +Signed-off-by: Miroslav Rezanina +--- + default-configs/x86_64-rh-devices.mak | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak +index d59b6d9..1469e05 100644 +--- a/default-configs/x86_64-rh-devices.mak ++++ b/default-configs/x86_64-rh-devices.mak +@@ -95,6 +95,6 @@ CONFIG_WDT_IB6300ESB=y + CONFIG_WDT_IB700=y + CONFIG_XIO3130=y + CONFIG_TPM_CRB=y +-CONFIG_TPM_TIS=y ++CONFIG_TPM_TIS_ISA=y + CONFIG_TPM_EMULATOR=y + CONFIG_TPM_PASSTHROUGH=y +-- +1.8.3.1 + diff --git a/0025-redhat-define-hw_compat_8_2.patch b/0025-redhat-define-hw_compat_8_2.patch new file mode 100644 index 0000000..e565300 --- /dev/null +++ b/0025-redhat-define-hw_compat_8_2.patch @@ -0,0 +1,81 @@ +From 3be5878b68235837729f452f0940105505bf4a55 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 1 Jul 2020 12:24:02 +0200 +Subject: redhat: define hw_compat_8_2 + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200619154227.23845-2-dgilbert@redhat.com> +Patchwork-id: 97662 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 1/2] redhat: define hw_compat_8_2 +Bugzilla: 1842902 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Stefan Hajnoczi + +From: Laurent Vivier + +Signed-off-by: Laurent Vivier +Signed-off-by: Dr. David Alan Gilbert + For minor fix + +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 28 ++++++++++++++++++++++++++++ + include/hw/boards.h | 3 +++ + 2 files changed, 31 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index ef6b320..b837399 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -29,6 +29,34 @@ + #include "migration/vmstate.h" + + /* ++ * The same as hw_compat_4_2 ++ */ ++GlobalProperty hw_compat_rhel_8_2[] = { ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "queue-size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "virtqueue_size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "seg-max-adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "vhost-blk-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-host", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-redir", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl-vga", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "fw_cfg", "acpi-mr-restore", "false" }, ++}; ++const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); ++/* + * The same as hw_compat_4_1 + */ + GlobalProperty hw_compat_rhel_8_1[] = { +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 24c1348..26ac91d 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -374,6 +374,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_2[]; ++extern const size_t hw_compat_rhel_8_2_len; ++ + extern GlobalProperty hw_compat_rhel_8_1[]; + extern const size_t hw_compat_rhel_8_1_len; + +-- +1.8.3.1 + diff --git a/0026-x86-Add-8.3.0-x86_64-machine-type.patch b/0026-x86-Add-8.3.0-x86_64-machine-type.patch new file mode 100644 index 0000000..1b490ad --- /dev/null +++ b/0026-x86-Add-8.3.0-x86_64-machine-type.patch @@ -0,0 +1,117 @@ +From 16e1749209e7df15f7ce12418886117c2259dee7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 1 Jul 2020 12:24:02 +0200 +Subject: x86: Add 8.3.0 x86_64 machine type + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200619154227.23845-3-dgilbert@redhat.com> +Patchwork-id: 97663 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 2/2] x86: Add 8.3.0 x86_64 machine type +Bugzilla: 1842902 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Stefan Hajnoczi + +From: "Dr. David Alan Gilbert" + +Not much change, just the smbase-smram. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 6 ++++++ + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 22 ++++++++++++++++++++++ + include/hw/i386/pc.h | 3 +++ + 4 files changed, 35 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index d17d6f8..aaf3ef4 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -359,6 +359,12 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_2_compat[] = { ++ /* pc_rhel_8_2_compat from pc_compat_4_2 */ ++ { "mch", "smbase-smram", "off" }, ++}; ++const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat); ++ + /* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ + GlobalProperty pc_rhel_8_1_compat[] = { }; + const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index b195f26..c951107 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1027,6 +1027,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + m->smbus_no_migration_support = true; + pcmc->pvh_enabled = false; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 6c49a50..ef0b6e3 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -589,6 +589,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel830(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel830_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.3.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, ++ pc_q35_machine_rhel830_options); ++ + static void pc_q35_init_rhel820(MachineState *machine) + { + pc_q35_init(machine); +@@ -599,8 +616,13 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.2.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 95d07f8..e67468b 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -270,6 +270,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_2_compat[]; ++extern const size_t pc_rhel_8_2_compat_len; ++ + extern GlobalProperty pc_rhel_8_1_compat[]; + extern const size_t pc_rhel_8_1_compat_len; + +-- +1.8.3.1 + diff --git a/0027-hw-arm-Changes-to-rhel820-machine.patch b/0027-hw-arm-Changes-to-rhel820-machine.patch new file mode 100644 index 0000000..842d73b --- /dev/null +++ b/0027-hw-arm-Changes-to-rhel820-machine.patch @@ -0,0 +1,78 @@ +From b3f4822d380a046220749314c9a05cdb0d5d2718 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 1 Jul 2020 12:24:02 +0200 +Subject: hw/arm: Changes to rhel820 machine + +RH-Author: Gavin Shan +Message-id: <20200630013648.101937-1-gshan@redhat.com> +Patchwork-id: 97844 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v5 1/3] hw/arm: Changes to rhel820 machine +Bugzilla: 1818843 + +This applies two changes to rhel820 machine: + + * Set the gic version to VIRT_GIC_VERSION_NOSEL by default, which + doesn't cause functional changes. + * Disallow to configure the RAS property, which is hidden by default. + +Signed-off-by: Gavin Shan +RH-Acked-by: Auger Eric +RH-Acked-by: Andrew Jones +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 17 ++++++----------- + 1 file changed, 6 insertions(+), 11 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index abbc7d5..07c5c14 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2090,6 +2090,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + visit_type_OnOffAuto(v, name, &vms->acpi, errp); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_ras(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2103,6 +2104,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) + + vms->ras = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_gic_version(Object *obj, Error **errp) + { +@@ -2727,12 +2729,8 @@ static void rhel820_virt_instance_init(Object *obj) + object_property_set_description(obj, "highmem", + "Set on/off to enable/disable using " + "physical address space above 32 bits"); +- /* +- * Default GIC type is still v2, but became configurable for RHEL. We +- * keep v2 instead of max as TCG CI test cases require an MSI controller +- * and there is no userspace ITS MSI emulation available. +- */ +- vms->gic_version = 2; ++ ++ vms->gic_version = VIRT_GIC_VERSION_NOSEL; + object_property_add_str(obj, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_property_set_description(obj, "gic-version", +@@ -2759,12 +2757,9 @@ static void rhel820_virt_instance_init(Object *obj) + object_property_set_description(obj, "iommu", + "Set the IOMMU type. " + "Valid values are none and smmuv3"); ++ + vms->ras = false; +- object_property_add_bool(obj, "ras", virt_get_ras, +- virt_set_ras); +- object_property_set_description(obj, "ras", +- "Set on/off to enable/disable reporting host memory errors " +- "to a KVM guest using ACPI and guest external abort exceptions"); ++ + vms->irqmap=a15irqmap; + virt_flash_create(vms); + } +-- +1.8.3.1 + diff --git a/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch b/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch new file mode 100644 index 0000000..20ee03d --- /dev/null +++ b/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch @@ -0,0 +1,50 @@ +From 75c705bcd0e6adb903889dd031c6f867a1ca7a63 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 1 Jul 2020 12:24:02 +0200 +Subject: hw/arm: Introduce rhel_virt_instance_init() helper + +RH-Author: Gavin Shan +Message-id: <20200629022939.76453-3-gshan@redhat.com> +Patchwork-id: 97838 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v4 2/3] hw/arm: Introduce rhel_virt_instance_init() helper +Bugzilla: 1818843 +RH-Acked-by: Andrew Jones + +This introduces rhel_virt_instance_init() helper function so that +it can be shared by rhel820 and rhel830 machine. This shouldn't +cause functional changes. + +Signed-off-by: Gavin Shan +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 07c5c14..c1bc866 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2713,7 +2713,7 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + +-static void rhel820_virt_instance_init(Object *obj) ++static void rhel_virt_instance_init(Object *obj) + { + VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); +@@ -2764,6 +2764,11 @@ static void rhel820_virt_instance_init(Object *obj) + virt_flash_create(vms); + } + ++static void rhel820_virt_instance_init(Object *obj) ++{ ++ rhel_virt_instance_init(obj); ++} ++ + static void rhel820_virt_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); +-- +1.8.3.1 + diff --git a/0029-hw-arm-Add-rhel830-machine-type.patch b/0029-hw-arm-Add-rhel830-machine-type.patch new file mode 100644 index 0000000..7517842 --- /dev/null +++ b/0029-hw-arm-Add-rhel830-machine-type.patch @@ -0,0 +1,58 @@ +From c0c64a417f65d388526c62b2d82f29fc4f5aed76 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 1 Jul 2020 12:24:02 +0200 +Subject: hw/arm: Add rhel830 machine type + +RH-Author: Gavin Shan +Message-id: <20200630014756.102753-1-gshan@redhat.com> +Patchwork-id: 97845 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v5 3/3] hw/arm: Add rhel830 machine type +Bugzilla: 1818843 +RH-Acked-by: Andrew Jones + +This adds rhel830 machine type, whose properties are same as to +rhel820. + +Signed-off-by: Gavin Shan +RH-Acked-by: Auger Eric +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c1bc866..48af222 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2764,6 +2764,17 @@ static void rhel_virt_instance_init(Object *obj) + virt_flash_create(vms); + } + ++static void rhel830_virt_instance_init(Object *obj) ++{ ++ rhel_virt_instance_init(obj); ++} ++ ++static void rhel830_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) ++ + static void rhel820_virt_instance_init(Object *obj) + { + rhel_virt_instance_init(obj); +@@ -2771,6 +2782,8 @@ static void rhel820_virt_instance_init(Object *obj) + + static void rhel820_virt_options(MachineClass *mc) + { +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel830_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) ++DEFINE_RHEL_MACHINE(8, 2, 0) +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 75a223b..30d1c07 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.0.0 -Release: 0%{?dist} +Release: 0%{?dist}.wrb200701 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -105,6 +105,7 @@ Source35: udev-kvm-check.c Source36: README.tests +Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch Patch0005: 0005-Initial-redhat-build.patch Patch0006: 0006-Enable-disable-devices-for-RHEL.patch Patch0007: 0007-Machine-type-related-general-changes.patch @@ -122,6 +123,13 @@ Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0021: 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +Patch0023: 0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch +Patch0024: 0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch +Patch0025: 0025-redhat-define-hw_compat_8_2.patch +Patch0026: 0026-x86-Add-8.3.0-x86_64-machine-type.patch +Patch0027: 0027-hw-arm-Changes-to-rhel820-machine.patch +Patch0028: 0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch +Patch0029: 0029-hw-arm-Add-rhel830-machine-type.patch BuildRequires: wget BuildRequires: rpm-build @@ -406,6 +414,9 @@ the Secure Shell (SSH) protocol. %prep %setup -n qemu-%{version} +# Remove slirp content in scratchbuilds because it's being applyed as a patch +rm -fr slirp +mkdir slirp %autopatch -p1 mkdir qemu-kvm-build @@ -665,11 +676,12 @@ cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp install -p -m 0755 ../tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ # Install qemu-iotests -cp -R tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +cp -R ../tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +cp -u tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ # Avoid ambiguous 'python' interpreter name -find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python+%{__python3}+' {} \; -find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env python3+%{__python3}+' {} \; -find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/python+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/\(python\|python3\)+%{__python3}+' {} \; install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README @@ -930,10 +942,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %doc %{qemudocdir}/LICENSE %doc %{qemudocdir}/README.systemtap %doc %{qemudocdir}/qmp-spec.txt -%doc %{qemudocdir}/qemu-ga-ref.html -%doc %{qemudocdir}/qemu-ga-ref.txt -%doc %{qemudocdir}/qemu-qmp-ref.html -%doc %{qemudocdir}/qemu-qmp-ref.txt %doc %{qemudocdir}/interop/* %doc %{qemudocdir}/index.html %doc %{qemudocdir}/system/* @@ -942,7 +950,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_mandir}/man7/qemu-qmp-ref.7* %{_mandir}/man7/qemu-cpu-models.7* %{_bindir}/qemu-keymap -%{_bindir}/qemu-pr-helper +%{_libexecdir}/qemu-pr-helper %{_bindir}/qemu-edid %{_bindir}/qemu-trace-stap %{_unitdir}/qemu-pr-helper.service @@ -1073,9 +1081,152 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Tue May 12 2020 Danilo Cesar Lemes de Paula - 5.0.0-0 -- Temporary rebase of qemu-kvm to 5.0.0 -- Updated the tarball to actually point to 5.0.0 GA +* Sun Jun 28 2020 Danilo Cesar Lemes de Paula - 4.2.0-28.el8 +- kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch [bz#1812765] +- kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch [bz#1812765] +- kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch [bz#1838082] +- Resolves: bz#1812765 + (qemu with iothreads enabled crashes on resume after enospc pause for disk extension) +- Resolves: bz#1838082 + (CVE-2020-1983 virt:8.2/qemu-kvm: QEMU: slirp: use-after-free in ip_reass() function in ip_input.c [rhel-av-8]) + +* Thu Jun 18 2020 Eduardo Lima (Etrunko) - 4.2.0-27.el8 +- kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch [bz#1820531] +- kvm-spec-Fix-python-shenigans-for-tests.patch [bz#1845779] +- kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch [bz#1840342] +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) +- Resolves: bz#1840342 + ([Intel 8.2.1 Bug] qemu-kvm Add ARCH_CAPABILITIES to Icelake-Server cpu model - Fast Train) +- Resolves: bz#1845779 + (Install 'qemu-kvm-tests' failed as nothing provides /usr/libexec/platform-python3 - virt module 6972) + +* Wed Jun 17 2020 Eduardo Lima (Etrunko) - 4.2.0-26.el8 +- kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch [bz#1845384] +- kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch [bz#1845384] +- Resolves: bz#1845384 + (CVE-2020-10761 virt:8.2/qemu-kvm: QEMU: nbd: reachable assertion failure in nbd_negotiate_send_rep_verr via remote client [rhel-av-8]) + +* Tue Jun 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-25.el8 +- kvm-enable-ramfb.patch [bz#1841068] +- kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch [bz#1780574] +- kvm-block-Add-flags-to-bdrv-_co-_truncate.patch [bz#1780574] +- kvm-block-backend-Add-flags-to-blk_truncate.patch [bz#1780574] +- kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-block-truncate-Don-t-make-backing-file-data-visible.patch [bz#1780574] +- kvm-iotests-Add-qemu_io_log.patch [bz#1780574] +- kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch [bz#1780574] +- kvm-iotests-Test-committing-to-short-backing-file.patch [bz#1780574] +- kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch [bz#1780574] +- kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch [bz#1769912] +- kvm-i386-Add-macro-for-stibp.patch [bz#1769912] +- kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch [bz#1769912] +- kvm-i386-Add-new-CPU-model-Cooperlake.patch [bz#1769912] +- kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch [bz#1769912] +- Resolves: bz#1769912 + ([Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train) +- Resolves: bz#1780574 + (Data corruption with resizing short overlay over longer backing files) +- Resolves: bz#1841068 + (RFE: please support the "ramfb" display device model) + +* Mon Jun 08 2020 Danilo Cesar Lemes de Paula - 4.2.0-24.el8 +- kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch [bz#1513681] +- kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch [bz#1841038] +- kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch [bz#1841038] +- kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch [bz#1779893 bz#1779904] +- kvm-iotests-Let-_make_test_img-parse-its-parameters.patch [bz#1779893 bz#1779904] +- kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch [bz#1779893 bz#1779904] +- kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-bitmap-sub-command.patch [bz#1779893 bz#1779904] +- kvm-iotests-Fix-test-178.patch [bz#1779893 bz#1779904] +- kvm-qcow2-Expose-bitmaps-size-during-measure.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-convert-bitmaps-option.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch [bz#1778593] +- kvm-iotests-don-t-use-format-for-drive_add.patch [bz#1778593] +- kvm-iotests-055-refactor-compressed-backup-to-vmdk.patch [bz#1778593] +- kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch [bz#1778593] +- kvm-backup-Improve-error-for-bdrv_getlength-failure.patch [bz#1778593] +- kvm-backup-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Backup-with-different-source-target-size.patch [bz#1778593] +- kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch [bz#1778593] +- kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch [bz#1778593] +- kvm-mirror-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Mirror-with-different-source-target-size.patch [bz#1778593] +- Resolves: bz#1513681 + ([Intel 8.2.1 Feat] qemu-kvm PT VMX -- Fast Train) +- Resolves: bz#1778593 + (Qemu coredump when backup to a existing small size image) +- Resolves: bz#1779893 + (RFE: Copy bitmaps with qemu-img convert) +- Resolves: bz#1779904 + (RFE: ability to estimate bitmap space utilization for qcow2) +- Resolves: bz#1841038 + (qemu-img: /var/tmp/v2vovl56bced.qcow2: CURL: Error opening file: Server does not support 'range' (byte ranges) with HTTP/2 server in VMware ESXi 7) + +* Thu Jun 04 2020 Danilo Cesar Lemes de Paula - 4.2.0-23.el8 +- kvm-target-arm-Fix-PAuth-sbox-functions.patch [bz#1813940] +- kvm-Don-t-leak-memory-when-reallocation-fails.patch [bz#1749737] +- kvm-Replace-remaining-malloc-free-user-with-glib.patch [bz#1749737] +- kvm-Revert-RHEL-disable-hostmem-memfd.patch [bz#1839030] +- kvm-block-introducing-bdrv_co_delete_file-interface.patch [bz#1827630] +- kvm-block.c-adding-bdrv_co_delete_file.patch [bz#1827630] +- kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch [bz#1827630] +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) +- Resolves: bz#1813940 + (CVE-2020-10702 virt:8.1/qemu-kvm: qemu: weak signature generation in Pointer Authentication support for ARM [rhel-av-8]) +- Resolves: bz#1827630 + (volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm)) +- Resolves: bz#1839030 + (RFE: enable the "memfd" memory backend) + +* Mon May 25 2020 Danilo Cesar Lemes de Paula - 4.2.0-22.el8 +- kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch [bz#1775462] +- kvm-numa-remove-not-needed-check.patch [bz#1600217] +- kvm-numa-properly-check-if-numa-is-supported.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch [bz#1600217] +- kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch [bz#1600217] +- kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch [bz#1600217] +- kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch [bz#1600217] +- kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch [bz#1600217] +- Resolves: bz#1600217 + ([Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train) +- Resolves: bz#1775462 + (Creating luks-inside-qcow2 images with cluster_size=2k/4k will get a corrupted image) + +* Mon May 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-21.el8 +- kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch [bz#1820531] +- kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch [bz#1820531] +- kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch [bz#1817445] +- kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch [bz#1817445] +- kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch [bz#1817445] +- kvm-virtiofsd-jail-lo-proc_self_fd.patch [bz#1817445] +- kvm-virtiofsd-Show-submounts.patch [bz#1817445] +- kvm-virtiofsd-only-retain-file-system-capabilities.patch [bz#1817445] +- kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch [bz#1817445] +- Resolves: bz#1817445 + (CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8]) +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) + +* Fri May 01 2020 Jon Maloy - 4.2.0-20.el8 +- kvm-pcie_root_port-Add-hotplug-disabling-option.patch [bz#1790899] +- kvm-compat-disable-edid-for-virtio-gpu-ccw.patch [bz#1816793] +- Resolves: bz#1790899 + ([RFE] QEMU devices should have the option to enable/disable hotplug/unplug) +- Resolves: bz#1816793 + ('edid' compat handling missing for virtio-gpu-ccw) * Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 - kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] diff --git a/qemu-pr-helper.service b/qemu-pr-helper.service index a1d27b0..ac6cda1 100644 --- a/qemu-pr-helper.service +++ b/qemu-pr-helper.service @@ -4,7 +4,7 @@ Description=Persistent Reservation Daemon for QEMU [Service] WorkingDirectory=/tmp Type=simple -ExecStart=/usr/bin/qemu-pr-helper +ExecStart=/usr/libexec/qemu-pr-helper PrivateTmp=yes ProtectSystem=strict ReadWritePaths=/var/run diff --git a/sources b/sources index bc432c6..ccf796d 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-5.0.0.tar.xz) = 21ef0cbe107c468a40f0fa2635db2a40048c8790b629dfffca5cd62bb1b502ea8eb133bfc40df5ecf1489e2bffe87f6829aee041cb8a380ff04a8afa23b39fcf +SHA512 (qemu-5.0.0.tar.xz) = d39a728aac20baa56eaa02afb456cff0b220180682f922602428fd45b566e2fb9944142207cb56db68149110df79720137fe1f84d79c0b266b8b23c6eca909e3 From d364584f57b96ad1d0d403e8adbe034cfad0ccd9 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 14 Jul 2020 10:45:56 -0400 Subject: [PATCH 081/195] Another update with the weekly rebase --- ...at-Adding-slirp-to-the-exploded-tree.patch | 2 +- 0005-Initial-redhat-build.patch | 16 +- 0006-Enable-disable-devices-for-RHEL.patch | 6 +- ...Machine-type-related-general-changes.patch | 16 +- 0008-Add-aarch64-machine-types.patch | 14 +- 0009-Add-ppc64-machine-types.patch | 4 +- 0010-Add-s390x-machine-types.patch | 10 +- 0011-Add-x86_64-machine-types.patch | 29 +- 0012-Enable-make-check.patch | 4 +- ...mber-of-devices-that-can-be-assigned.patch | 4 +- ...Add-support-statement-to-help-output.patch | 4 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 8 +- 0016-Add-support-for-simpletrace.patch | 2 +- ...documentation-instead-of-qemu-system.patch | 3804 +---------------- 0018-usb-xhci-Fix-PCI-capability-order.patch | 2 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 4 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 2 +- ...x-blockdev-reopen-API-with-feature-f.patch | 4 +- ...o-net-fix-removal-of-failover-device.patch | 52 + ...-vTPM-for-POWER-in-downstream-config.patch | 4 +- ...t-fix-5.0-rebase-missing-ISA-TPM-TIS.patch | 4 +- ... => 0026-redhat-define-hw_compat_8_2.patch | 9 +- ...27-x86-Add-8.3.0-x86_64-machine-type.patch | 19 +- ...28-hw-arm-Changes-to-rhel820-machine.patch | 10 +- ...oduce-rhel_virt_instance_init-helper.patch | 10 +- ...0030-hw-arm-Add-rhel830-machine-type.patch | 10 +- ...efine-pseries-rhel8.3.0-machine-type.patch | 74 + qemu-kvm.spec | 31 +- sources | 2 +- 29 files changed, 374 insertions(+), 3786 deletions(-) create mode 100644 0022-virtio-net-fix-removal-of-failover-device.patch rename 0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch => 0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch (93%) rename 0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch => 0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch (93%) rename 0025-redhat-define-hw_compat_8_2.patch => 0026-redhat-define-hw_compat_8_2.patch (91%) rename 0026-x86-Add-8.3.0-x86_64-machine-type.patch => 0027-x86-Add-8.3.0-x86_64-machine-type.patch (88%) rename 0027-hw-arm-Changes-to-rhel820-machine.patch => 0028-hw-arm-Changes-to-rhel820-machine.patch (91%) rename 0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch => 0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch (84%) rename 0029-hw-arm-Add-rhel830-machine-type.patch => 0030-hw-arm-Add-rhel830-machine-type.patch (85%) create mode 100644 0031-redhat-define-pseries-rhel8.3.0-machine-type.patch diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch index 9d73fb7..632d69e 100644 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -1,4 +1,4 @@ -From 6527eafb590d72354bb02662f95d7603cd3a3fd3 Mon Sep 17 00:00:00 2001 +From effec13a9f842205be3526bcfb15e2068c0067b0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 23 Apr 2020 05:26:54 +0200 Subject: redhat: Adding slirp to the exploded tree diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index 7a562ab..696916e 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From b9d95992b85d3667c13df3a321933c44c7157d0b Mon Sep 17 00:00:00 2001 +From 9f51ea3d645d7da3eaf55f3e5cedc4cd42981efc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -11,7 +11,7 @@ several issues are fixed in QEMU tree: - Man page renamed from qemu to qemu-kvm - man page is installed using make install so we have to fix it in qemu tree -This rebase includes changes up to qemu-kvm-4.2.0-28.el8 +This rebase includes changes up to qemu-kvm-4.2.0-29.el8 Rebase notes (3.1.0): - added new configure options @@ -133,13 +133,13 @@ Merged patches (weekly-200617): redhat/Makefile | 86 + redhat/Makefile.common | 53 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2956 +++++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2961 +++++++++++++++++++++++++++++++++++ redhat/qemu-pr-helper.service | 2 +- redhat/scripts/extract_build_cmd.py | 2 +- redhat/scripts/process-patches.sh | 7 +- tests/check-block.sh | 2 + ui/vnc.c | 2 +- - 12 files changed, 3145 insertions(+), 9 deletions(-) + 12 files changed, 3150 insertions(+), 9 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests @@ -167,10 +167,10 @@ index b1b8a5a..88b7565 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index 4a22dcd..53979dc 100755 +index ee6c3c6..bd957a5 100755 --- a/configure +++ b/configure -@@ -2596,6 +2596,7 @@ if test "$seccomp" != "no" ; then +@@ -2610,6 +2610,7 @@ if test "$seccomp" != "no" ; then seccomp="no" fi fi @@ -192,10 +192,10 @@ index 8e29c86..e9bcb5a 100755 ret=0 diff --git a/ui/vnc.c b/ui/vnc.c -index 12a1271..89fb616 100644 +index 527ad25..8099c0e 100644 --- a/ui/vnc.c +++ b/ui/vnc.c -@@ -3972,7 +3972,7 @@ void vnc_display_open(const char *id, Error **errp) +@@ -3970,7 +3970,7 @@ void vnc_display_open(const char *id, Error **errp) #ifdef CONFIG_VNC_SASL if (sasl) { diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index 50eb1de..0ea7715 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From b1e6c8e2a9b58d215042adbd0fffdf102ac9a9d7 Mon Sep 17 00:00:00 2001 +From c956c1d145adb87a8a830bf6091dac80b7925054 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -572,7 +572,7 @@ index fa5c3fa..854094c 100644 smartcard.mo-libs := $(SMARTCARD_LIBS) endif diff --git a/qemu-options.hx b/qemu-options.hx -index 196f468..43b0dbc 100644 +index fa1b19d..b969944 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2248,10 +2248,6 @@ ERST @@ -587,7 +587,7 @@ index 196f468..43b0dbc 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" diff --git a/softmmu/vl.c b/softmmu/vl.c -index 3e15ee2..3693dfb 100644 +index 9da2e23..6509057 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -145,7 +145,7 @@ static Chardev **serial_hds; diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index 9933847..5117133 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 1266dec11bec3867e8eda3062384b07986a89d3b Mon Sep 17 00:00:00 2001 +From cbbdd67535fc6da1b77b2fa9f5368f72c211eeb1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -101,7 +101,7 @@ index 7ead029..3b8501f 100644 &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 630ca6e..715bc17 100644 +index 283422e..f664201 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -276,6 +276,7 @@ static const VMStateDescription vmstate_acpi = { @@ -124,7 +124,7 @@ index 630ca6e..715bc17 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index cd0834c..2e3efd3 100644 +index 7d9f715..e30b837 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1450,7 +1450,7 @@ static void virt_build_smbios(VirtMachineState *vms) @@ -386,7 +386,7 @@ index 3aaeeec..d88f52a 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 1d832b2..82a33b4 100644 +index fae487f..18815e8 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -179,6 +179,8 @@ static void pc_init1(MachineState *machine, @@ -399,7 +399,7 @@ index 1d832b2..82a33b4 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 047ea8d..1818d31 100644 +index acd6d40..a1131e6 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -204,6 +204,8 @@ static void pc_q35_init(MachineState *machine) @@ -716,10 +716,10 @@ index 28a5318..ff4a672 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index 18815d9..2224fbc 100644 +index 426ce5f..1062df9 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -372,4 +372,28 @@ extern const size_t hw_compat_2_2_len; +@@ -373,4 +373,28 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -792,7 +792,7 @@ index e29a376..35ac38c 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 481a590..a5a64f9 100644 +index 92e44e0..67d3b75 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -128,6 +128,8 @@ enum mig_rp_message_type { diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 2d1ab2a..fff091f 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 9506867ddf6d00d2dc502f69cfc0d274a2f1040c Mon Sep 17 00:00:00 2001 +From 1f0568a29b004dd3557d5405fddc6979ac5ff911 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -49,7 +49,7 @@ Merged patches (4.1.0): 2 files changed, 182 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 2e3efd3..abbc7d5 100644 +index e30b837..5a45677 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -79,6 +79,7 @@ @@ -127,7 +127,7 @@ index 2e3efd3..abbc7d5 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2339,6 +2384,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2367,6 +2412,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return requested_pa_size > 40 ? requested_pa_size : 0; } @@ -135,7 +135,7 @@ index 2e3efd3..abbc7d5 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2604,3 +2650,127 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2637,3 +2683,127 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -264,10 +264,10 @@ index 2e3efd3..abbc7d5 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 31878dd..5582fba 100644 +index 54bcf17..5fdabd8 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -157,6 +157,7 @@ typedef struct { +@@ -165,6 +165,7 @@ typedef struct { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -275,7 +275,7 @@ index 31878dd..5582fba 100644 #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -165,6 +166,16 @@ typedef struct { +@@ -173,6 +174,16 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index 779ee5d..9be23b5 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From d27b53512182f6aa6d9ccfcaf64e7aab1a064146 Mon Sep 17 00:00:00 2001 +From 59c55f3104aa650b5b2a31150a34646cc7018b77 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -42,7 +42,7 @@ Merged patches (weekly-200226): 5 files changed, 307 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 4c185bc..2fa25b1 100644 +index f6f034d..f30618e 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4530,6 +4530,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index 8c76c93..bd769c9 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From bf565781e2deb548173e7530e29d886f6c1eaad8 Mon Sep 17 00:00:00 2001 +From 92594368e1369e85d1b87dd1a65408a4f594cf09 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -25,10 +25,10 @@ Merged patches (4.2.0): 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index b111406..f1b1722 100644 +index 023fd25..0e8dd62 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -774,7 +774,7 @@ bool css_migration_enabled(void) +@@ -778,7 +778,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -37,7 +37,7 @@ index b111406..f1b1722 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = true; \ -@@ -798,6 +798,7 @@ bool css_migration_enabled(void) +@@ -802,6 +802,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) @@ -45,7 +45,7 @@ index b111406..f1b1722 100644 static void ccw_machine_5_1_instance_options(MachineState *machine) { } -@@ -1050,6 +1051,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1054,6 +1055,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index c245d3d..4b98372 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 04955c10da93ce94d73693a0be07b2a4c81246fe Mon Sep 17 00:00:00 2001 +From 31a74446c5312b248bbc6093f21aee3500b96fca Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -42,9 +42,6 @@ Merged patches (weekly-200122): Merged patches (weekly-200318): - e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) - -Conflicts: - hw/i386/pc_piix.c --- hw/i386/acpi-build.c | 3 + hw/i386/pc.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++- @@ -71,10 +68,10 @@ index b7bcbbb..fe815c5 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 4af9679..d17d6f8 100644 +index 61acc9e..fe37bde 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -345,6 +345,263 @@ GlobalProperty pc_compat_1_4[] = { +@@ -346,6 +346,263 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -338,7 +335,7 @@ index 4af9679..d17d6f8 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1010,7 +1267,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -1011,7 +1268,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -348,7 +345,7 @@ index 4af9679..d17d6f8 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -1960,6 +2218,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1969,6 +2227,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); @@ -357,7 +354,7 @@ index 4af9679..d17d6f8 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1971,7 +2231,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1981,7 +2241,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -368,7 +365,7 @@ index 4af9679..d17d6f8 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 82a33b4..b195f26 100644 +index 18815e8..eeadd89 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -54,6 +54,7 @@ @@ -398,7 +395,7 @@ index 82a33b4..b195f26 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -991,3 +993,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -992,3 +994,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -607,7 +604,7 @@ index 82a33b4..b195f26 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 1818d31..6c49a50 100644 +index a1131e6..1cd4e15 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -201,8 +201,8 @@ static void pc_q35_init(MachineState *machine) @@ -629,7 +626,7 @@ index 1818d31..6c49a50 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -564,3 +565,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -565,3 +566,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -791,13 +788,13 @@ index 1818d31..6c49a50 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 2224fbc..24c1348 100644 +index 1062df9..c357731 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -207,6 +207,8 @@ struct MachineClass { - const char **valid_cpu_types; +@@ -208,6 +208,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; + bool auto_enable_numa_with_memdev; + /* RHEL only */ + bool async_pf_vmexit_disable; void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index f12db32..ae2533d 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 8704d7b6ca7438f10d162caf70572b62509b6341 Mon Sep 17 00:00:00 2001 +From 93f9f7beccd34102c5c8e25c3b2b3888c61aa063 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -82,7 +82,7 @@ index 034d3a3..aadc413 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index d886fa0..37dd761 100644 +index 9b07a7e..c678d91 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index baa4aef..1445da6 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From a7132dd3a148a8990ef18b38baa2fb2aa89484b4 Mon Sep 17 00:00:00 2001 +From ac9ff21203ff07854c232464d96e1f35741af8a6 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -41,7 +41,7 @@ Merged patches (2.9.0): 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6838bcc..ac109b4 100644 +index d020ea9..e4292ea 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -47,6 +47,9 @@ diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index 3eadb83..3f8f0ff 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From b87487f40dcc351fb7c31a37b6facbdaab285b0c Mon Sep 17 00:00:00 2001 +From 0f0f3ab53bc61fbf66e546ab6bd22d1a60102b79 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -23,7 +23,7 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 3693dfb..9d2e2d3 100644 +index 6509057..8453de5 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -1674,9 +1674,17 @@ static void version(void) diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index c457bbd..38b9095 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 0aa24293cae6e15d483b9aa34f5c27ace53e478c Mon Sep 17 00:00:00 2001 +From 47faf13d999995693d505074ee73d4908356888b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -80,10 +80,10 @@ This commit matches the limit to current KVM_CAP_NR_VCPUS value. 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index d54a870..aa562c2 100644 +index ab36fbf..6f6bb47 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -2089,6 +2089,18 @@ static int kvm_init(MachineState *ms) +@@ -2088,6 +2088,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -103,7 +103,7 @@ index d54a870..aa562c2 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/softmmu/vl.c b/softmmu/vl.c -index 9d2e2d3..accccd8 100644 +index 8453de5..ea6e9e4 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -116,6 +116,8 @@ diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch index d42323a..d9e4626 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From dbf4ed3ddf5439d9563eb408bb95396b3b2c650f Mon Sep 17 00:00:00 2001 +From bc118b0038083bcd54a970ed0a6c92f9d55759e2 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index b0c7b3c..c63c892 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,6 +1,6 @@ -From 9c4bcb1912ff003bc74c0bb3d49c78b1aca995d9 Mon Sep 17 00:00:00 2001 +From cc88b2746e9e8cfa6816e871ca282cddb07a0146 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Wed, 29 Jan 2020 09:30:03 +0100 +Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- Patchwork-id: 62380 @@ -20,3682 +20,130 @@ Signed-off-by: Miroslav Rezanina --- Rebase notes (weekly-200129): -- qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) + - qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) -Conflicts: - docs/qemu-cpu-models.texi - docs/system/qemu-block-drivers.rst - qemu-doc.texi - qemu-options.hx +Rebase notes (weekly-200708 + - rewrite patch to new docs structure --- - docs/qemu-cpu-models.texi | 677 +++++++++++ - qemu-doc.texi | 2967 +++++++++++++++++++++++++++++++++++++++++++++ - qemu-options.hx | 10 +- - 3 files changed, 3649 insertions(+), 5 deletions(-) - create mode 100644 docs/qemu-cpu-models.texi - create mode 100644 qemu-doc.texi + docs/defs.rst.inc | 4 ++-- + docs/interop/live-block-operations.rst | 4 ++-- + docs/tools/qemu-trace-stap.rst | 14 +++++++------- + docs/tools/virtiofsd.rst | 2 +- + qemu-options.hx | 10 +++++----- + 5 files changed, 17 insertions(+), 17 deletions(-) -diff --git a/docs/qemu-cpu-models.texi b/docs/qemu-cpu-models.texi -new file mode 100644 -index 0000000..c82cf8f ---- /dev/null -+++ b/docs/qemu-cpu-models.texi -@@ -0,0 +1,677 @@ -+@c man begin SYNOPSIS -+QEMU / KVM CPU model configuration -+@c man end -+ -+@set qemu_system_x86 qemu-kvm -+ -+@c man begin DESCRIPTION -+ -+@menu -+* recommendations_cpu_models_x86:: Recommendations for KVM CPU model configuration on x86 hosts -+* recommendations_cpu_models_MIPS:: Supported CPU model configurations on MIPS hosts -+* cpu_model_syntax_apps:: Syntax for configuring CPU models -+@end menu -+ -+QEMU / KVM virtualization supports two ways to configure CPU models -+ -+@table @option -+ -+@item Host passthrough -+ -+This passes the host CPU model features, model, stepping, exactly to the -+guest. Note that KVM may filter out some host CPU model features if they -+cannot be supported with virtualization. Live migration is unsafe when -+this mode is used as libvirt / QEMU cannot guarantee a stable CPU is -+exposed to the guest across hosts. This is the recommended CPU to use, -+provided live migration is not required. -+ -+@item Named model -+ -+QEMU comes with a number of predefined named CPU models, that typically -+refer to specific generations of hardware released by Intel and AMD. -+These allow the guest VMs to have a degree of isolation from the host CPU, -+allowing greater flexibility in live migrating between hosts with differing -+hardware. -+@end table -+ -+In both cases, it is possible to optionally add or remove individual CPU -+features, to alter what is presented to the guest by default. -+ -+Libvirt supports a third way to configure CPU models known as "Host model". -+This uses the QEMU "Named model" feature, automatically picking a CPU model -+that is similar the host CPU, and then adding extra features to approximate -+the host model as closely as possible. This does not guarantee the CPU family, -+stepping, etc will precisely match the host CPU, as they would with "Host -+passthrough", but gives much of the benefit of passthrough, while making -+live migration safe. -+ -+@node recommendations_cpu_models_x86 -+@subsection Recommendations for KVM CPU model configuration on x86 hosts -+ -+The information that follows provides recommendations for configuring -+CPU models on x86 hosts. The goals are to maximise performance, while -+protecting guest OS against various CPU hardware flaws, and optionally -+enabling live migration between hosts with heterogeneous CPU models. -+ -+@menu -+* preferred_cpu_models_intel_x86:: Preferred CPU models for Intel x86 hosts -+* important_cpu_features_intel_x86:: Important CPU features for Intel x86 hosts -+* preferred_cpu_models_amd_x86:: Preferred CPU models for AMD x86 hosts -+* important_cpu_features_amd_x86:: Important CPU features for AMD x86 hosts -+* default_cpu_models_x86:: Default x86 CPU models -+* other_non_recommended_cpu_models_x86:: Other non-recommended x86 CPUs -+@end menu -+ -+@node preferred_cpu_models_intel_x86 -+@subsubsection Preferred CPU models for Intel x86 hosts -+ -+The following CPU models are preferred for use on Intel hosts. Administrators / -+applications are recommended to use the CPU model that matches the generation -+of the host CPUs in use. In a deployment with a mixture of host CPU models -+between machines, if live migration compatibility is required, use the newest -+CPU model that is compatible across all desired hosts. -+ -+@table @option -+@item @code{Skylake-Server} -+@item @code{Skylake-Server-IBRS} -+ -+Intel Xeon Processor (Skylake, 2016) -+ -+ -+@item @code{Skylake-Client} -+@item @code{Skylake-Client-IBRS} -+ -+Intel Core Processor (Skylake, 2015) -+ -+ -+@item @code{Broadwell} -+@item @code{Broadwell-IBRS} -+@item @code{Broadwell-noTSX} -+@item @code{Broadwell-noTSX-IBRS} -+ -+Intel Core Processor (Broadwell, 2014) -+ -+ -+@item @code{Haswell} -+@item @code{Haswell-IBRS} -+@item @code{Haswell-noTSX} -+@item @code{Haswell-noTSX-IBRS} -+ -+Intel Core Processor (Haswell, 2013) -+ -+ -+@item @code{IvyBridge} -+@item @code{IvyBridge-IBRS} -+ -+Intel Xeon E3-12xx v2 (Ivy Bridge, 2012) -+ -+ -+@item @code{SandyBridge} -+@item @code{SandyBridge-IBRS} -+ -+Intel Xeon E312xx (Sandy Bridge, 2011) -+ -+ -+@item @code{Westmere} -+@item @code{Westmere-IBRS} -+ -+Westmere E56xx/L56xx/X56xx (Nehalem-C, 2010) -+ -+ -+@item @code{Nehalem} -+@item @code{Nehalem-IBRS} -+ -+Intel Core i7 9xx (Nehalem Class Core i7, 2008) -+ -+ -+@item @code{Penryn} -+ -+Intel Core 2 Duo P9xxx (Penryn Class Core 2, 2007) -+ -+ -+@item @code{Conroe} -+ -+Intel Celeron_4x0 (Conroe/Merom Class Core 2, 2006) -+ -+@end table -+ -+@node important_cpu_features_intel_x86 -+@subsubsection Important CPU features for Intel x86 hosts -+ -+The following are important CPU features that should be used on Intel x86 -+hosts, when available in the host CPU. Some of them require explicit -+configuration to enable, as they are not included by default in some, or all, -+of the named CPU models listed above. In general all of these features are -+included if using "Host passthrough" or "Host model". -+ -+ -+@table @option -+ -+@item @code{pcid} -+ -+Recommended to mitigate the cost of the Meltdown (CVE-2017-5754) fix -+ -+Included by default in Haswell, Broadwell & Skylake Intel CPU models. -+ -+Should be explicitly turned on for Westmere, SandyBridge, and IvyBridge -+Intel CPU models. Note that some desktop/mobile Westmere CPUs cannot -+support this feature. -+ -+ -+@item @code{spec-ctrl} -+ -+Required to enable the Spectre v2 (CVE-2017-5715) fix. -+ -+Included by default in Intel CPU models with -IBRS suffix. -+ -+Must be explicitly turned on for Intel CPU models without -IBRS suffix. -+ -+Requires the host CPU microcode to support this feature before it -+can be used for guest CPUs. -+ -+ -+@item @code{stibp} -+ -+Required to enable stronger Spectre v2 (CVE-2017-5715) fixes in some -+operating systems. -+ -+Must be explicitly turned on for all Intel CPU models. -+ -+Requires the host CPU microcode to support this feature before it -+can be used for guest CPUs. -+ -+ -+@item @code{ssbd} -+ -+Required to enable the CVE-2018-3639 fix -+ -+Not included by default in any Intel CPU model. -+ -+Must be explicitly turned on for all Intel CPU models. -+ -+Requires the host CPU microcode to support this feature before it -+can be used for guest CPUs. -+ -+ -+@item @code{pdpe1gb} -+ -+Recommended to allow guest OS to use 1GB size pages -+ -+Not included by default in any Intel CPU model. -+ -+Should be explicitly turned on for all Intel CPU models. -+ -+Note that not all CPU hardware will support this feature. -+ -+@item @code{md-clear} -+ -+Required to confirm the MDS (CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, -+CVE-2019-11091) fixes. -+ -+Not included by default in any Intel CPU model. -+ -+Must be explicitly turned on for all Intel CPU models. -+ -+Requires the host CPU microcode to support this feature before it -+can be used for guest CPUs. -+@end table -+ -+ -+@node preferred_cpu_models_amd_x86 -+@subsubsection Preferred CPU models for AMD x86 hosts -+ -+The following CPU models are preferred for use on Intel hosts. Administrators / -+applications are recommended to use the CPU model that matches the generation -+of the host CPUs in use. In a deployment with a mixture of host CPU models -+between machines, if live migration compatibility is required, use the newest -+CPU model that is compatible across all desired hosts. -+ -+@table @option -+ -+@item @code{EPYC} -+@item @code{EPYC-IBPB} -+ -+AMD EPYC Processor (2017) -+ -+ -+@item @code{Opteron_G5} -+ -+AMD Opteron 63xx class CPU (2012) -+ -+ -+@item @code{Opteron_G4} -+ -+AMD Opteron 62xx class CPU (2011) -+ -+ -+@item @code{Opteron_G3} -+ -+AMD Opteron 23xx (Gen 3 Class Opteron, 2009) -+ -+ -+@item @code{Opteron_G2} -+ -+AMD Opteron 22xx (Gen 2 Class Opteron, 2006) -+ -+ -+@item @code{Opteron_G1} -+ -+AMD Opteron 240 (Gen 1 Class Opteron, 2004) -+@end table -+ -+@node important_cpu_features_amd_x86 -+@subsubsection Important CPU features for AMD x86 hosts -+ -+The following are important CPU features that should be used on AMD x86 -+hosts, when available in the host CPU. Some of them require explicit -+configuration to enable, as they are not included by default in some, or all, -+of the named CPU models listed above. In general all of these features are -+included if using "Host passthrough" or "Host model". -+ -+ -+@table @option -+ -+@item @code{ibpb} -+ -+Required to enable the Spectre v2 (CVE-2017-5715) fix. -+ -+Included by default in AMD CPU models with -IBPB suffix. -+ -+Must be explicitly turned on for AMD CPU models without -IBPB suffix. -+ -+Requires the host CPU microcode to support this feature before it -+can be used for guest CPUs. -+ -+ -+@item @code{stibp} -+ -+Required to enable stronger Spectre v2 (CVE-2017-5715) fixes in some -+operating systems. -+ -+Must be explicitly turned on for all AMD CPU models. -+ -+Requires the host CPU microcode to support this feature before it -+can be used for guest CPUs. -+ -+ -+@item @code{virt-ssbd} -+ -+Required to enable the CVE-2018-3639 fix -+ -+Not included by default in any AMD CPU model. -+ -+Must be explicitly turned on for all AMD CPU models. -+ -+This should be provided to guests, even if amd-ssbd is also -+provided, for maximum guest compatibility. -+ -+Note for some QEMU / libvirt versions, this must be force enabled -+when when using "Host model", because this is a virtual feature -+that doesn't exist in the physical host CPUs. -+ -+ -+@item @code{amd-ssbd} -+ -+Required to enable the CVE-2018-3639 fix -+ -+Not included by default in any AMD CPU model. -+ -+Must be explicitly turned on for all AMD CPU models. -+ -+This provides higher performance than virt-ssbd so should be -+exposed to guests whenever available in the host. virt-ssbd -+should none the less also be exposed for maximum guest -+compatibility as some kernels only know about virt-ssbd. -+ -+ -+@item @code{amd-no-ssb} -+ -+Recommended to indicate the host is not vulnerable CVE-2018-3639 -+ -+Not included by default in any AMD CPU model. -+ -+Future hardware generations of CPU will not be vulnerable to -+CVE-2018-3639, and thus the guest should be told not to enable -+its mitigations, by exposing amd-no-ssb. This is mutually -+exclusive with virt-ssbd and amd-ssbd. -+ -+ -+@item @code{pdpe1gb} -+ -+Recommended to allow guest OS to use 1GB size pages -+ -+Not included by default in any AMD CPU model. -+ -+Should be explicitly turned on for all AMD CPU models. -+ -+Note that not all CPU hardware will support this feature. -+@end table -+ -+ -+@node default_cpu_models_x86 -+@subsubsection Default x86 CPU models -+ -+The default QEMU CPU models are designed such that they can run on all hosts. -+If an application does not wish to do perform any host compatibility checks -+before launching guests, the default is guaranteed to work. -+ -+The default CPU models will, however, leave the guest OS vulnerable to various -+CPU hardware flaws, so their use is strongly discouraged. Applications should -+follow the earlier guidance to setup a better CPU configuration, with host -+passthrough recommended if live migration is not needed. -+ -+@table @option -+@item @code{qemu32} -+@item @code{qemu64} -+ -+QEMU Virtual CPU version 2.5+ (32 & 64 bit variants) -+ -+qemu64 is used for x86_64 guests and qemu32 is used for i686 guests, when no -+-cpu argument is given to QEMU, or no is provided in libvirt XML. -+@end table -+ -+ -+@node other_non_recommended_cpu_models_x86 -+@subsubsection Other non-recommended x86 CPUs -+ -+The following CPUs models are compatible with most AMD and Intel x86 hosts, but -+their usage is discouraged, as they expose a very limited featureset, which -+prevents guests having optimal performance. -+ -+@table @option -+ -+@item @code{kvm32} -+@item @code{kvm64} -+ -+Common KVM processor (32 & 64 bit variants) -+ -+Legacy models just for historical compatibility with ancient QEMU versions. -+ -+ -+@item @code{486} -+@item @code{athlon} -+@item @code{phenom} -+@item @code{coreduo} -+@item @code{core2duo} -+@item @code{n270} -+@item @code{pentium} -+@item @code{pentium2} -+@item @code{pentium3} -+ -+Various very old x86 CPU models, mostly predating the introduction of -+hardware assisted virtualization, that should thus not be required for -+running virtual machines. -+@end table -+ -+@node recommendations_cpu_models_MIPS -+@subsection Supported CPU model configurations on MIPS hosts -+ -+QEMU supports variety of MIPS CPU models: -+ -+@menu -+* cpu_models_MIPS32:: Supported CPU models for MIPS32 hosts -+* cpu_models_MIPS64:: Supported CPU models for MIPS64 hosts -+* cpu_models_nanoMIPS:: Supported CPU models for nanoMIPS hosts -+* preferred_cpu_models_MIPS:: Preferred CPU models for MIPS hosts -+@end menu -+ -+@node cpu_models_MIPS32 -+@subsubsection Supported CPU models for MIPS32 hosts -+ -+The following CPU models are supported for use on MIPS32 hosts. Administrators / -+applications are recommended to use the CPU model that matches the generation -+of the host CPUs in use. In a deployment with a mixture of host CPU models -+between machines, if live migration compatibility is required, use the newest -+CPU model that is compatible across all desired hosts. -+ -+@table @option -+@item @code{mips32r6-generic} -+ -+MIPS32 Processor (Release 6, 2015) -+ -+ -+@item @code{P5600} -+ -+MIPS32 Processor (P5600, 2014) -+ -+ -+@item @code{M14K} -+@item @code{M14Kc} -+ -+MIPS32 Processor (M14K, 2009) -+ -+ -+@item @code{74Kf} -+ -+MIPS32 Processor (74K, 2007) -+ -+ -+@item @code{34Kf} -+ -+MIPS32 Processor (34K, 2006) -+ -+ -+@item @code{24Kc} -+@item @code{24KEc} -+@item @code{24Kf} -+ -+MIPS32 Processor (24K, 2003) -+ -+ -+@item @code{4Kc} -+@item @code{4Km} -+@item @code{4KEcR1} -+@item @code{4KEmR1} -+@item @code{4KEc} -+@item @code{4KEm} -+ -+MIPS32 Processor (4K, 1999) -+@end table -+ -+@node cpu_models_MIPS64 -+@subsubsection Supported CPU models for MIPS64 hosts -+ -+The following CPU models are supported for use on MIPS64 hosts. Administrators / -+applications are recommended to use the CPU model that matches the generation -+of the host CPUs in use. In a deployment with a mixture of host CPU models -+between machines, if live migration compatibility is required, use the newest -+CPU model that is compatible across all desired hosts. -+ -+@table @option -+@item @code{I6400} -+ -+MIPS64 Processor (Release 6, 2014) -+ -+ -+@item @code{Loongson-2F} -+ -+MIPS64 Processor (Loongson 2, 2008) -+ -+ -+@item @code{Loongson-2E} -+ -+MIPS64 Processor (Loongson 2, 2006) -+ -+ -+@item @code{mips64dspr2} -+ -+MIPS64 Processor (Release 2, 2006) -+ -+ -+@item @code{MIPS64R2-generic} -+@item @code{5KEc} -+@item @code{5KEf} -+ -+MIPS64 Processor (Release 2, 2002) -+ -+ -+@item @code{20Kc} -+ -+MIPS64 Processor (20K, 2000) -+ -+ -+@item @code{5Kc} -+@item @code{5Kf} -+ -+MIPS64 Processor (5K, 1999) -+ -+ -+@item @code{VR5432} -+ -+MIPS64 Processor (VR, 1998) -+ -+ -+@item @code{R4000} -+ -+MIPS64 Processor (MIPS III, 1991) -+@end table -+ -+@node cpu_models_nanoMIPS -+@subsubsection Supported CPU models for nanoMIPS hosts -+ -+The following CPU models are supported for use on nanoMIPS hosts. Administrators / -+applications are recommended to use the CPU model that matches the generation -+of the host CPUs in use. In a deployment with a mixture of host CPU models -+between machines, if live migration compatibility is required, use the newest -+CPU model that is compatible across all desired hosts. -+ -+@table @option -+@item @code{I7200} -+ -+MIPS I7200 (nanoMIPS, 2018) -+ -+@end table -+ -+@node preferred_cpu_models_MIPS -+@subsubsection Preferred CPU models for MIPS hosts -+ -+The following CPU models are preferred for use on different MIPS hosts: -+ -+@table @option -+@item @code{MIPS III} -+R4000 -+ -+@item @code{MIPS32R2} -+34Kf -+ -+@item @code{MIPS64R6} -+I6400 -+ -+@item @code{nanoMIPS} -+I7200 -+@end table -+ -+@node cpu_model_syntax_apps -+@subsection Syntax for configuring CPU models -+ -+The example below illustrate the approach to configuring the various -+CPU models / features in QEMU and libvirt -+ -+@menu -+* cpu_model_syntax_qemu:: QEMU command line -+* cpu_model_syntax_libvirt:: Libvirt guest XML -+@end menu -+ -+@node cpu_model_syntax_qemu -+@subsubsection QEMU command line -+ -+@table @option -+ -+@item Host passthrough -+ -+@example -+ $ @value{qemu_system_x86} -cpu host -+@end example -+ -+With feature customization: -+ -+@example -+ $ @value{qemu_system_x86} -cpu host,-vmx,... -+@end example -+ -+@item Named CPU models -+ -+@example -+ $ @value{qemu_system_x86} -cpu Westmere -+@end example -+ -+With feature customization: -+ -+@example -+ $ @value{qemu_system_x86} -cpu Westmere,+pcid,... -+@end example -+ -+@end table -+ -+@node cpu_model_syntax_libvirt -+@subsubsection Libvirt guest XML -+ -+@table @option -+ -+@item Host passthrough -+ -+@example -+ -+@end example -+ -+With feature customization: -+ -+@example -+ -+ -+ ... -+ -+@end example -+ -+@item Host model -+ -+@example -+ -+@end example -+ -+With feature customization: -+ -+@example -+ -+ -+ ... -+ -+@end example -+ -+@item Named model -+ -+@example -+ -+ -+ -+@end example -+ -+With feature customization: -+ -+@example -+ -+ -+ -+ ... -+ -+@end example -+ -+@end table -+ -+@c man end -+ -+@ignore -+ -+@setfilename qemu-cpu-models -+@settitle QEMU / KVM CPU model configuration -+ -+@c man begin SEEALSO -+The HTML documentation of QEMU for more precise information and Linux -+user mode emulator invocation. -+@c man end -+ -+@c man begin AUTHOR -+Daniel P. Berrange -+@c man end -+ -+@end ignore -diff --git a/qemu-doc.texi b/qemu-doc.texi -new file mode 100644 -index 0000000..10cd1de ---- /dev/null -+++ b/qemu-doc.texi -@@ -0,0 +1,2967 @@ -+\input texinfo @c -*- texinfo -*- -+@c %**start of header -+@setfilename qemu-doc.info -+@include version.texi -+ -+@documentlanguage en -+@documentencoding UTF-8 -+ -+@settitle QEMU version @value{VERSION} User Documentation -+@exampleindent 0 -+@paragraphindent 0 -+@c %**end of header -+ -+@set qemu_system qemu-kvm -+@set qemu_system_x86 qemu-kvm -+ -+@ifinfo -+@direntry -+* QEMU: (qemu-doc). The QEMU Emulator User Documentation. -+@end direntry -+@end ifinfo -+ -+@iftex -+@titlepage -+@sp 7 -+@center @titlefont{QEMU version @value{VERSION}} -+@sp 1 -+@center @titlefont{User Documentation} -+@sp 3 -+@end titlepage -+@end iftex -+ -+@ifnottex -+@node Top -+@top -+ -+@menu -+* Introduction:: -+* QEMU PC System emulator:: -+* QEMU System emulator for non PC targets:: -+* QEMU User space emulator:: -+* System requirements:: -+* Security:: -+* Implementation notes:: -+* Deprecated features:: -+* Recently removed features:: -+* Supported build platforms:: -+* License:: -+* Index:: -+@end menu -+@end ifnottex -+ -+@contents -+ -+@node Introduction -+@chapter Introduction -+ -+@menu -+* intro_features:: Features -+@end menu -+ -+@node intro_features -+@section Features -+ -+QEMU is a FAST! processor emulator using dynamic translation to -+achieve good emulation speed. -+ -+@cindex operating modes -+QEMU has two operating modes: -+ -+@itemize -+@cindex system emulation -+@item Full system emulation. In this mode, QEMU emulates a full system (for -+example a PC), including one or several processors and various -+peripherals. It can be used to launch different Operating Systems -+without rebooting the PC or to debug system code. -+ -+@cindex user mode emulation -+@item User mode emulation. In this mode, QEMU can launch -+processes compiled for one CPU on another CPU. It can be used to -+launch the Wine Windows API emulator (@url{https://www.winehq.org}) or -+to ease cross-compilation and cross-debugging. -+ -+@end itemize -+ -+QEMU has the following features: -+ -+@itemize -+@item QEMU can run without a host kernel driver and yet gives acceptable -+performance. It uses dynamic translation to native code for reasonable speed, -+with support for self-modifying code and precise exceptions. -+ -+@item It is portable to several operating systems (GNU/Linux, *BSD, Mac OS X, -+Windows) and architectures. -+ -+@item It performs accurate software emulation of the FPU. -+@end itemize -+ -+QEMU user mode emulation has the following features: -+@itemize -+@item Generic Linux system call converter, including most ioctls. -+ -+@item clone() emulation using native CPU clone() to use Linux scheduler for threads. -+ -+@item Accurate signal handling by remapping host signals to target signals. -+@end itemize -+ -+QEMU full system emulation has the following features: -+@itemize -+@item -+QEMU uses a full software MMU for maximum portability. -+ -+@item -+QEMU can optionally use an in-kernel accelerator, like kvm. The accelerators -+execute most of the guest code natively, while -+continuing to emulate the rest of the machine. -+ -+@item -+Various hardware devices can be emulated and in some cases, host -+devices (e.g. serial and parallel ports, USB, drives) can be used -+transparently by the guest Operating System. Host device passthrough -+can be used for talking to external physical peripherals (e.g. a -+webcam, modem or tape drive). -+ -+@item -+Symmetric multiprocessing (SMP) support. Currently, an in-kernel -+accelerator is required to use more than one host CPU for emulation. -+ -+@end itemize -+ -+ -+@node QEMU PC System emulator -+@chapter QEMU PC System emulator -+@cindex system emulation (PC) -+ -+@menu -+* pcsys_introduction:: Introduction -+* pcsys_quickstart:: Quick Start -+* sec_invocation:: Invocation -+* pcsys_keys:: Keys in the graphical frontends -+* mux_keys:: Keys in the character backend multiplexer -+* pcsys_monitor:: QEMU Monitor -+* cpu_models:: CPU models -+* disk_images:: Disk Images -+* pcsys_network:: Network emulation -+* pcsys_other_devs:: Other Devices -+* direct_linux_boot:: Direct Linux Boot -+* pcsys_usb:: USB emulation -+* vnc_security:: VNC security -+* network_tls:: TLS setup for network services -+* gdb_usage:: GDB usage -+* pcsys_os_specific:: Target OS specific information -+@end menu -+ -+@node pcsys_introduction -+@section Introduction -+ -+@c man begin DESCRIPTION -+ -+The QEMU PC System emulator simulates the -+following peripherals: -+ -+@itemize @minus -+@item -+i440FX host PCI bridge and PIIX3 PCI to ISA bridge -+@item -+Cirrus CLGD 5446 PCI VGA card or dummy VGA card with Bochs VESA -+extensions (hardware level, including all non standard modes). -+@item -+PS/2 mouse and keyboard -+@item -+2 PCI IDE interfaces with hard disk and CD-ROM support -+@item -+Floppy disk -+@item -+PCI and ISA network adapters -+@item -+Serial ports -+@item -+IPMI BMC, either and internal or external one -+@item -+Creative SoundBlaster 16 sound card -+@item -+ENSONIQ AudioPCI ES1370 sound card -+@item -+Intel 82801AA AC97 Audio compatible sound card -+@item -+Intel HD Audio Controller and HDA codec -+@item -+Adlib (OPL2) - Yamaha YM3812 compatible chip -+@item -+Gravis Ultrasound GF1 sound card -+@item -+CS4231A compatible sound card -+@item -+PCI UHCI, OHCI, EHCI or XHCI USB controller and a virtual USB-1.1 hub. -+@end itemize -+ -+SMP is supported with up to 255 CPUs. -+ -+QEMU uses the PC BIOS from the Seabios project and the Plex86/Bochs LGPL -+VGA BIOS. -+ -+QEMU uses YM3812 emulation by Tatsuyuki Satoh. -+ -+QEMU uses GUS emulation (GUSEMU32 @url{http://www.deinmeister.de/gusemu/}) -+by Tibor "TS" Schütz. -+ -+Note that, by default, GUS shares IRQ(7) with parallel ports and so -+QEMU must be told to not have parallel ports to have working GUS. -+ -+@example -+@value{qemu_system_x86} dos.img -soundhw gus -parallel none -+@end example -+ -+Alternatively: -+@example -+@value{qemu_system_x86} dos.img -device gus,irq=5 -+@end example -+ -+Or some other unclaimed IRQ. -+ -+CS4231A is the chip used in Windows Sound System and GUSMAX products -+ -+@c man end -+ -+@node pcsys_quickstart -+@section Quick Start -+@cindex quick start -+ -+Download and uncompress a hard disk image with Linux installed (e.g. -+@file{linux.img}) and type: -+ -+@example -+@value{qemu_system} linux.img -+@end example -+ -+Linux should boot and give you a prompt. -+ -+@node sec_invocation -+@section Invocation -+ -+@example -+@c man begin SYNOPSIS -+@command{@value{qemu_system}} [@var{options}] [@var{disk_image}] -+@c man end -+@end example -+ -+@c man begin OPTIONS -+@var{disk_image} is a raw hard disk image for IDE hard disk 0. Some -+targets do not need a disk image. -+ -+@include qemu-options.texi -+ -+@c man end -+ -+@subsection Device URL Syntax -+@c TODO merge this with section Disk Images -+ -+@c man begin NOTES -+ -+In addition to using normal file images for the emulated storage devices, -+QEMU can also use networked resources such as iSCSI devices. These are -+specified using a special URL syntax. -+ -+@table @option -+@item iSCSI -+iSCSI support allows QEMU to access iSCSI resources directly and use as -+images for the guest storage. Both disk and cdrom images are supported. -+ -+Syntax for specifying iSCSI LUNs is -+``iscsi://[:]//'' -+ -+By default qemu will use the iSCSI initiator-name -+'iqn.2008-11.org.linux-kvm[:]' but this can also be set from the command -+line or a configuration file. -+ -+Since version Qemu 2.4 it is possible to specify a iSCSI request timeout to detect -+stalled requests and force a reestablishment of the session. The timeout -+is specified in seconds. The default is 0 which means no timeout. Libiscsi -+1.15.0 or greater is required for this feature. -+ -+Example (without authentication): -+@example -+@value{qemu_system} -iscsi initiator-name=iqn.2001-04.com.example:my-initiator \ -+ -cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \ -+ -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 -+@end example -+ -+Example (CHAP username/password via URL): -+@example -+@value{qemu_system} -drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1 -+@end example -+ -+Example (CHAP username/password via environment variables): -+@example -+LIBISCSI_CHAP_USERNAME="user" \ -+LIBISCSI_CHAP_PASSWORD="password" \ -+@value{qemu_system} -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1 -+@end example -+ -+@item NBD -+QEMU supports NBD (Network Block Devices) both using TCP protocol as well -+as Unix Domain Sockets. With TCP, the default port is 10809. -+ -+Syntax for specifying a NBD device using TCP, in preferred URI form: -+``nbd://[:]/[]'' -+ -+Syntax for specifying a NBD device using Unix Domain Sockets; remember -+that '?' is a shell glob character and may need quoting: -+``nbd+unix:///[]?socket='' -+ -+Older syntax that is also recognized: -+``nbd::[:exportname=]'' -+ -+Syntax for specifying a NBD device using Unix Domain Sockets -+``nbd:unix:[:exportname=]'' -+ -+Example for TCP -+@example -+@value{qemu_system} --drive file=nbd:192.0.2.1:30000 -+@end example -+ -+Example for Unix Domain Sockets -+@example -+@value{qemu_system} --drive file=nbd:unix:/tmp/nbd-socket -+@end example -+ -+@item SSH -+QEMU supports SSH (Secure Shell) access to remote disks. -+ -+Examples: -+@example -+@value{qemu_system} -drive file=ssh://user@@host/path/to/disk.img -+@value{qemu_system} -drive file.driver=ssh,file.user=user,file.host=host,file.port=22,file.path=/path/to/disk.img -+@end example -+ -+Currently authentication must be done using ssh-agent. Other -+authentication methods may be supported in future. -+ -+@item Sheepdog -+Sheepdog is a distributed storage system for QEMU. -+QEMU supports using either local sheepdog devices or remote networked -+devices. -+ -+Syntax for specifying a sheepdog device -+@example -+sheepdog[+tcp|+unix]://[host:port]/vdiname[?socket=path][#snapid|#tag] -+@end example -+ -+Example -+@example -+@value{qemu_system} --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine -+@end example -+ -+See also @url{https://sheepdog.github.io/sheepdog/}. -+ -+@item GlusterFS -+GlusterFS is a user space distributed file system. -+QEMU supports the use of GlusterFS volumes for hosting VM disk images using -+TCP, Unix Domain Sockets and RDMA transport protocols. -+ -+Syntax for specifying a VM disk image on GlusterFS volume is -+@example -+ -+URI: -+gluster[+type]://[host[:port]]/volume/path[?socket=...][,debug=N][,logfile=...] -+ -+JSON: -+'json:@{"driver":"qcow2","file":@{"driver":"gluster","volume":"testvol","path":"a.img","debug":N,"logfile":"...", -+@ "server":[@{"type":"tcp","host":"...","port":"..."@}, -+@ @{"type":"unix","socket":"..."@}]@}@}' -+@end example -+ -+ -+Example -+@example -+URI: -+@value{qemu_system} --drive file=gluster://192.0.2.1/testvol/a.img, -+@ file.debug=9,file.logfile=/var/log/qemu-gluster.log -+ -+JSON: -+@value{qemu_system} 'json:@{"driver":"qcow2", -+@ "file":@{"driver":"gluster", -+@ "volume":"testvol","path":"a.img", -+@ "debug":9,"logfile":"/var/log/qemu-gluster.log", -+@ "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@}, -+@ @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}' -+@value{qemu_system} -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img, -+@ file.debug=9,file.logfile=/var/log/qemu-gluster.log, -+@ file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007, -+@ file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket -+@end example -+ -+See also @url{http://www.gluster.org}. -+ -+@item HTTP/HTTPS/FTP/FTPS -+QEMU supports read-only access to files accessed over http(s) and ftp(s). -+ -+Syntax using a single filename: -+@example -+://[[:]@@]/ -+@end example -+ -+where: -+@table @option -+@item protocol -+'http', 'https', 'ftp', or 'ftps'. -+ -+@item username -+Optional username for authentication to the remote server. -+ -+@item password -+Optional password for authentication to the remote server. -+ -+@item host -+Address of the remote server. -+ -+@item path -+Path on the remote server, including any query string. -+@end table -+ -+The following options are also supported: -+@table @option -+@item url -+The full URL when passing options to the driver explicitly. -+ -+@item readahead -+The amount of data to read ahead with each range request to the remote server. -+This value may optionally have the suffix 'T', 'G', 'M', 'K', 'k' or 'b'. If it -+does not have a suffix, it will be assumed to be in bytes. The value must be a -+multiple of 512 bytes. It defaults to 256k. -+ -+@item sslverify -+Whether to verify the remote server's certificate when connecting over SSL. It -+can have the value 'on' or 'off'. It defaults to 'on'. -+ -+@item cookie -+Send this cookie (it can also be a list of cookies separated by ';') with -+each outgoing request. Only supported when using protocols such as HTTP -+which support cookies, otherwise ignored. -+ -+@item timeout -+Set the timeout in seconds of the CURL connection. This timeout is the time -+that CURL waits for a response from the remote server to get the size of the -+image to be downloaded. If not set, the default timeout of 5 seconds is used. -+@end table -+ -+Note that when passing options to qemu explicitly, @option{driver} is the value -+of . -+ -+Example: boot from a remote Fedora 20 live ISO image -+@example -+@value{qemu_system_x86} --drive media=cdrom,file=https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly -+ -+@value{qemu_system_x86} --drive media=cdrom,file.driver=http,file.url=http://archives.fedoraproject.org/pub/fedora/linux/releases/20/Live/x86_64/Fedora-Live-Desktop-x86_64-20-1.iso,readonly -+@end example -+ -+Example: boot from a remote Fedora 20 cloud image using a local overlay for -+writes, copy-on-read, and a readahead of 64k -+@example -+qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"http",, "file.url":"http://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Images/x86_64/Fedora-x86_64-20-20131211.1-sda.qcow2",, "file.readahead":"64k"@}' /tmp/Fedora-x86_64-20-20131211.1-sda.qcow2 -+ -+@value{qemu_system_x86} -drive file=/tmp/Fedora-x86_64-20-20131211.1-sda.qcow2,copy-on-read=on -+@end example -+ -+Example: boot from an image stored on a VMware vSphere server with a self-signed -+certificate using a local overlay for writes, a readahead of 64k and a timeout -+of 10 seconds. -+@example -+qemu-img create -f qcow2 -o backing_file='json:@{"file.driver":"https",, "file.url":"https://user:password@@vsphere.example.com/folder/test/test-flat.vmdk?dcPath=Datacenter&dsName=datastore1",, "file.sslverify":"off",, "file.readahead":"64k",, "file.timeout":10@}' /tmp/test.qcow2 -+ -+@value{qemu_system_x86} -drive file=/tmp/test.qcow2 -+@end example -+ -+@end table -+ -+@c man end -+ -+@node pcsys_keys -+@section Keys in the graphical frontends -+ -+@c man begin OPTIONS -+ -+During the graphical emulation, you can use special key combinations to change -+modes. The default key mappings are shown below, but if you use @code{-alt-grab} -+then the modifier is Ctrl-Alt-Shift (instead of Ctrl-Alt) and if you use -+@code{-ctrl-grab} then the modifier is the right Ctrl key (instead of Ctrl-Alt): -+ -+@table @key -+@item Ctrl-Alt-f -+@kindex Ctrl-Alt-f -+Toggle full screen -+ -+@item Ctrl-Alt-+ -+@kindex Ctrl-Alt-+ -+Enlarge the screen -+ -+@item Ctrl-Alt-- -+@kindex Ctrl-Alt-- -+Shrink the screen -+ -+@item Ctrl-Alt-u -+@kindex Ctrl-Alt-u -+Restore the screen's un-scaled dimensions -+ -+@item Ctrl-Alt-n -+@kindex Ctrl-Alt-n -+Switch to virtual console 'n'. Standard console mappings are: -+@table @emph -+@item 1 -+Target system display -+@item 2 -+Monitor -+@item 3 -+Serial port -+@end table -+ -+@item Ctrl-Alt -+@kindex Ctrl-Alt -+Toggle mouse and keyboard grab. -+@end table -+ -+@kindex Ctrl-Up -+@kindex Ctrl-Down -+@kindex Ctrl-PageUp -+@kindex Ctrl-PageDown -+In the virtual consoles, you can use @key{Ctrl-Up}, @key{Ctrl-Down}, -+@key{Ctrl-PageUp} and @key{Ctrl-PageDown} to move in the back log. -+ -+@c man end -+ -+@node mux_keys -+@section Keys in the character backend multiplexer -+ -+@c man begin OPTIONS -+ -+During emulation, if you are using a character backend multiplexer -+(which is the default if you are using @option{-nographic}) then -+several commands are available via an escape sequence. These -+key sequences all start with an escape character, which is @key{Ctrl-a} -+by default, but can be changed with @option{-echr}. The list below assumes -+you're using the default. -+ -+@table @key -+@item Ctrl-a h -+@kindex Ctrl-a h -+Print this help -+@item Ctrl-a x -+@kindex Ctrl-a x -+Exit emulator -+@item Ctrl-a s -+@kindex Ctrl-a s -+Save disk data back to file (if -snapshot) -+@item Ctrl-a t -+@kindex Ctrl-a t -+Toggle console timestamps -+@item Ctrl-a b -+@kindex Ctrl-a b -+Send break (magic sysrq in Linux) -+@item Ctrl-a c -+@kindex Ctrl-a c -+Rotate between the frontends connected to the multiplexer (usually -+this switches between the monitor and the console) -+@item Ctrl-a Ctrl-a -+@kindex Ctrl-a Ctrl-a -+Send the escape character to the frontend -+@end table -+@c man end -+ -+@ignore -+ -+@c man begin SEEALSO -+The HTML documentation of QEMU for more precise information and Linux -+user mode emulator invocation. -+@c man end -+ -+@c man begin AUTHOR -+Fabrice Bellard -+@c man end -+ -+@end ignore -+ -+@node pcsys_monitor -+@section QEMU Monitor -+@cindex QEMU monitor -+ -+The QEMU monitor is used to give complex commands to the QEMU -+emulator. You can use it to: -+ -+@itemize @minus -+ -+@item -+Remove or insert removable media images -+(such as CD-ROM or floppies). -+ -+@item -+Freeze/unfreeze the Virtual Machine (VM) and save or restore its state -+from a disk file. -+ -+@item Inspect the VM state without an external debugger. -+ -+@end itemize -+ -+@subsection Commands -+ -+The following commands are available: -+ -+@include qemu-monitor.texi -+ -+@include qemu-monitor-info.texi -+ -+@subsection Integer expressions -+ -+The monitor understands integers expressions for every integer -+argument. You can use register names to get the value of specifics -+CPU registers by prefixing them with @emph{$}. -+ -+@node cpu_models -+@section CPU models -+ -+@include docs/qemu-cpu-models.texi -+ -+@node disk_images -+@section Disk Images -+ -+QEMU supports many disk image formats, including growable disk images -+(their size increase as non empty sectors are written), compressed and -+encrypted disk images. -+ -+@menu -+* disk_images_quickstart:: Quick start for disk image creation -+* disk_images_snapshot_mode:: Snapshot mode -+* vm_snapshots:: VM snapshots -+@end menu -+ -+@node disk_images_quickstart -+@subsection Quick start for disk image creation -+ -+You can create a disk image with the command: -+@example -+qemu-img create myimage.img mysize -+@end example -+where @var{myimage.img} is the disk image filename and @var{mysize} is its -+size in kilobytes. You can add an @code{M} suffix to give the size in -+megabytes and a @code{G} suffix for gigabytes. -+ -+@c When this document is converted to rst we should make this into -+@c a proper linked reference to the qemu-img documentation again: -+See the qemu-img invocation documentation for more information. -+ -+@node disk_images_snapshot_mode -+@subsection Snapshot mode -+ -+If you use the option @option{-snapshot}, all disk images are -+considered as read only. When sectors in written, they are written in -+a temporary file created in @file{/tmp}. You can however force the -+write back to the raw disk images by using the @code{commit} monitor -+command (or @key{C-a s} in the serial console). -+ -+@node vm_snapshots -+@subsection VM snapshots -+ -+VM snapshots are snapshots of the complete virtual machine including -+CPU state, RAM, device state and the content of all the writable -+disks. In order to use VM snapshots, you must have at least one non -+removable and writable block device using the @code{qcow2} disk image -+format. Normally this device is the first virtual hard drive. -+ -+Use the monitor command @code{savevm} to create a new VM snapshot or -+replace an existing one. A human readable name can be assigned to each -+snapshot in addition to its numerical ID. -+ -+Use @code{loadvm} to restore a VM snapshot and @code{delvm} to remove -+a VM snapshot. @code{info snapshots} lists the available snapshots -+with their associated information: -+ -+@example -+(qemu) info snapshots -+Snapshot devices: hda -+Snapshot list (from hda): -+ID TAG VM SIZE DATE VM CLOCK -+1 start 41M 2006-08-06 12:38:02 00:00:14.954 -+2 40M 2006-08-06 12:43:29 00:00:18.633 -+3 msys 40M 2006-08-06 12:44:04 00:00:23.514 -+@end example -+ -+A VM snapshot is made of a VM state info (its size is shown in -+@code{info snapshots}) and a snapshot of every writable disk image. -+The VM state info is stored in the first @code{qcow2} non removable -+and writable block device. The disk image snapshots are stored in -+every disk image. The size of a snapshot in a disk image is difficult -+to evaluate and is not shown by @code{info snapshots} because the -+associated disk sectors are shared among all the snapshots to save -+disk space (otherwise each snapshot would need a full copy of all the -+disk images). -+ -+When using the (unrelated) @code{-snapshot} option -+(@ref{disk_images_snapshot_mode}), you can always make VM snapshots, -+but they are deleted as soon as you exit QEMU. -+ -+VM snapshots currently have the following known limitations: -+@itemize -+@item -+They cannot cope with removable devices if they are removed or -+inserted after a snapshot is done. -+@item -+A few device drivers still have incomplete snapshot support so their -+state is not saved or restored properly (in particular USB). -+@end itemize -+ -+@node pcsys_network -+@section Network emulation -+ -+QEMU can simulate several network cards (e.g. PCI or ISA cards on the PC -+target) and can connect them to a network backend on the host or an emulated -+hub. The various host network backends can either be used to connect the NIC of -+the guest to a real network (e.g. by using a TAP devices or the non-privileged -+user mode network stack), or to other guest instances running in another QEMU -+process (e.g. by using the socket host network backend). -+ -+@subsection Using TAP network interfaces -+ -+This is the standard way to connect QEMU to a real network. QEMU adds -+a virtual network device on your host (called @code{tapN}), and you -+can then configure it as if it was a real ethernet card. -+ -+@subsubsection Linux host -+ -+As an example, you can download the @file{linux-test-xxx.tar.gz} -+archive and copy the script @file{qemu-ifup} in @file{/etc} and -+configure properly @code{sudo} so that the command @code{ifconfig} -+contained in @file{qemu-ifup} can be executed as root. You must verify -+that your host kernel supports the TAP network interfaces: the -+device @file{/dev/net/tun} must be present. -+ -+See @ref{sec_invocation} to have examples of command lines using the -+TAP network interfaces. -+ -+@subsubsection Windows host -+ -+There is a virtual ethernet driver for Windows 2000/XP systems, called -+TAP-Win32. But it is not included in standard QEMU for Windows, -+so you will need to get it separately. It is part of OpenVPN package, -+so download OpenVPN from : @url{https://openvpn.net/}. -+ -+@subsection Using the user mode network stack -+ -+By using the option @option{-net user} (default configuration if no -+@option{-net} option is specified), QEMU uses a completely user mode -+network stack (you don't need root privilege to use the virtual -+network). The virtual network configuration is the following: -+ -+@example -+ -+ guest (10.0.2.15) <------> Firewall/DHCP server <-----> Internet -+ | (10.0.2.2) -+ | -+ ----> DNS server (10.0.2.3) -+ | -+ ----> SMB server (10.0.2.4) -+@end example -+ -+The QEMU VM behaves as if it was behind a firewall which blocks all -+incoming connections. You can use a DHCP client to automatically -+configure the network in the QEMU VM. The DHCP server assign addresses -+to the hosts starting from 10.0.2.15. -+ -+In order to check that the user mode network is working, you can ping -+the address 10.0.2.2 and verify that you got an address in the range -+10.0.2.x from the QEMU virtual DHCP server. -+ -+Note that ICMP traffic in general does not work with user mode networking. -+@code{ping}, aka. ICMP echo, to the local router (10.0.2.2) shall work, -+however. If you're using QEMU on Linux >= 3.0, it can use unprivileged ICMP -+ping sockets to allow @code{ping} to the Internet. The host admin has to set -+the ping_group_range in order to grant access to those sockets. To allow ping -+for GID 100 (usually users group): -+ -+@example -+echo 100 100 > /proc/sys/net/ipv4/ping_group_range -+@end example -+ -+When using the built-in TFTP server, the router is also the TFTP -+server. -+ -+When using the @option{'-netdev user,hostfwd=...'} option, TCP or UDP -+connections can be redirected from the host to the guest. It allows for -+example to redirect X11, telnet or SSH connections. -+ -+@subsection Hubs -+ -+QEMU can simulate several hubs. A hub can be thought of as a virtual connection -+between several network devices. These devices can be for example QEMU virtual -+ethernet cards or virtual Host ethernet devices (TAP devices). You can connect -+guest NICs or host network backends to such a hub using the @option{-netdev -+hubport} or @option{-nic hubport} options. The legacy @option{-net} option -+also connects the given device to the emulated hub with ID 0 (i.e. the default -+hub) unless you specify a netdev with @option{-net nic,netdev=xxx} here. -+ -+@subsection Connecting emulated networks between QEMU instances -+ -+Using the @option{-netdev socket} (or @option{-nic socket} or -+@option{-net socket}) option, it is possible to create emulated -+networks that span several QEMU instances. -+See the description of the @option{-netdev socket} option in the -+@ref{sec_invocation,,Invocation chapter} to have a basic example. -+ -+@node pcsys_other_devs -+@section Other Devices -+ -+@subsection Inter-VM Shared Memory device -+ -+On Linux hosts, a shared memory device is available. The basic syntax -+is: -+ -+@example -+@value{qemu_system_x86} -device ivshmem-plain,memdev=@var{hostmem} -+@end example -+ -+where @var{hostmem} names a host memory backend. For a POSIX shared -+memory backend, use something like -+ -+@example -+-object memory-backend-file,size=1M,share,mem-path=/dev/shm/ivshmem,id=@var{hostmem} -+@end example -+ -+If desired, interrupts can be sent between guest VMs accessing the same shared -+memory region. Interrupt support requires using a shared memory server and -+using a chardev socket to connect to it. The code for the shared memory server -+is qemu.git/contrib/ivshmem-server. An example syntax when using the shared -+memory server is: -+ -+@example -+# First start the ivshmem server once and for all -+ivshmem-server -p @var{pidfile} -S @var{path} -m @var{shm-name} -l @var{shm-size} -n @var{vectors} -+ -+# Then start your qemu instances with matching arguments -+@value{qemu_system_x86} -device ivshmem-doorbell,vectors=@var{vectors},chardev=@var{id} -+ -chardev socket,path=@var{path},id=@var{id} -+@end example -+ -+When using the server, the guest will be assigned a VM ID (>=0) that allows guests -+using the same server to communicate via interrupts. Guests can read their -+VM ID from a device register (see ivshmem-spec.txt). -+ -+@subsubsection Migration with ivshmem -+ -+With device property @option{master=on}, the guest will copy the shared -+memory on migration to the destination host. With @option{master=off}, -+the guest will not be able to migrate with the device attached. In the -+latter case, the device should be detached and then reattached after -+migration using the PCI hotplug support. -+ -+At most one of the devices sharing the same memory can be master. The -+master must complete migration before you plug back the other devices. -+ -+@subsubsection ivshmem and hugepages -+ -+Instead of specifying the using POSIX shm, you may specify -+a memory backend that has hugepage support: -+ -+@example -+@value{qemu_system_x86} -object memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1 -+ -device ivshmem-plain,memdev=mb1 -+@end example -+ -+ivshmem-server also supports hugepages mount points with the -+@option{-m} memory path argument. -+ -+@node direct_linux_boot -+@section Direct Linux Boot -+ -+This section explains how to launch a Linux kernel inside QEMU without -+having to make a full bootable image. It is very useful for fast Linux -+kernel testing. -+ -+The syntax is: -+@example -+@value{qemu_system} -kernel bzImage -hda rootdisk.img -append "root=/dev/hda" -+@end example -+ -+Use @option{-kernel} to provide the Linux kernel image and -+@option{-append} to give the kernel command line arguments. The -+@option{-initrd} option can be used to provide an INITRD image. -+ -+If you do not need graphical output, you can disable it and redirect -+the virtual serial port and the QEMU monitor to the console with the -+@option{-nographic} option. The typical command line is: -+@example -+@value{qemu_system} -kernel bzImage -hda rootdisk.img \ -+ -append "root=/dev/hda console=ttyS0" -nographic -+@end example -+ -+Use @key{Ctrl-a c} to switch between the serial console and the -+monitor (@pxref{pcsys_keys}). -+ -+@node pcsys_usb -+@section USB emulation -+ -+QEMU can emulate a PCI UHCI, OHCI, EHCI or XHCI USB controller. You can -+plug virtual USB devices or real host USB devices (only works with certain -+host operating systems). QEMU will automatically create and connect virtual -+USB hubs as necessary to connect multiple USB devices. -+ -+@menu -+* usb_devices:: -+* host_usb_devices:: -+@end menu -+@node usb_devices -+@subsection Connecting USB devices -+ -+USB devices can be connected with the @option{-device usb-...} command line -+option or the @code{device_add} monitor command. Available devices are: -+ -+@table @code -+@item usb-mouse -+Virtual Mouse. This will override the PS/2 mouse emulation when activated. -+@item usb-tablet -+Pointer device that uses absolute coordinates (like a touchscreen). -+This means QEMU is able to report the mouse position without having -+to grab the mouse. Also overrides the PS/2 mouse emulation when activated. -+@item usb-storage,drive=@var{drive_id} -+Mass storage device backed by @var{drive_id} (@pxref{disk_images}) -+@item usb-uas -+USB attached SCSI device, see -+@url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt} -+for details -+@item usb-bot -+Bulk-only transport storage device, see -+@url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/usb-storage.txt,usb-storage.txt} -+for details here, too -+@item usb-mtp,rootdir=@var{dir} -+Media transfer protocol device, using @var{dir} as root of the file tree -+that is presented to the guest. -+@item usb-host,hostbus=@var{bus},hostaddr=@var{addr} -+Pass through the host device identified by @var{bus} and @var{addr} -+@item usb-host,vendorid=@var{vendor},productid=@var{product} -+Pass through the host device identified by @var{vendor} and @var{product} ID -+@item usb-wacom-tablet -+Virtual Wacom PenPartner tablet. This device is similar to the @code{tablet} -+above but it can be used with the tslib library because in addition to touch -+coordinates it reports touch pressure. -+@item usb-kbd -+Standard USB keyboard. Will override the PS/2 keyboard (if present). -+@item usb-serial,chardev=@var{id} -+Serial converter. This emulates an FTDI FT232BM chip connected to host character -+device @var{id}. -+@item usb-braille,chardev=@var{id} -+Braille device. This will use BrlAPI to display the braille output on a real -+or fake device referenced by @var{id}. -+@item usb-net[,netdev=@var{id}] -+Network adapter that supports CDC ethernet and RNDIS protocols. @var{id} -+specifies a netdev defined with @code{-netdev @dots{},id=@var{id}}. -+For instance, user-mode networking can be used with -+@example -+@value{qemu_system} [...] -netdev user,id=net0 -device usb-net,netdev=net0 -+@end example -+@item usb-ccid -+Smartcard reader device -+@item usb-audio -+USB audio device -+@end table -+ -+@node host_usb_devices -+@subsection Using host USB devices on a Linux host -+ -+WARNING: this is an experimental feature. QEMU will slow down when -+using it. USB devices requiring real time streaming (i.e. USB Video -+Cameras) are not supported yet. -+ -+@enumerate -+@item If you use an early Linux 2.4 kernel, verify that no Linux driver -+is actually using the USB device. A simple way to do that is simply to -+disable the corresponding kernel module by renaming it from @file{mydriver.o} -+to @file{mydriver.o.disabled}. -+ -+@item Verify that @file{/proc/bus/usb} is working (most Linux distributions should enable it by default). You should see something like that: -+@example -+ls /proc/bus/usb -+001 devices drivers -+@end example -+ -+@item Since only root can access to the USB devices directly, you can either launch QEMU as root or change the permissions of the USB devices you want to use. For testing, the following suffices: -+@example -+chown -R myuid /proc/bus/usb -+@end example -+ -+@item Launch QEMU and do in the monitor: -+@example -+info usbhost -+ Device 1.2, speed 480 Mb/s -+ Class 00: USB device 1234:5678, USB DISK -+@end example -+You should see the list of the devices you can use (Never try to use -+hubs, it won't work). -+ -+@item Add the device in QEMU by using: -+@example -+device_add usb-host,vendorid=0x1234,productid=0x5678 -+@end example -+ -+Normally the guest OS should report that a new USB device is plugged. -+You can use the option @option{-device usb-host,...} to do the same. -+ -+@item Now you can try to use the host USB device in QEMU. -+ -+@end enumerate -+ -+When relaunching QEMU, you may have to unplug and plug again the USB -+device to make it work again (this is a bug). -+ -+@node vnc_security -+@section VNC security -+ -+The VNC server capability provides access to the graphical console -+of the guest VM across the network. This has a number of security -+considerations depending on the deployment scenarios. -+ -+@menu -+* vnc_sec_none:: -+* vnc_sec_password:: -+* vnc_sec_certificate:: -+* vnc_sec_certificate_verify:: -+* vnc_sec_certificate_pw:: -+* vnc_sec_sasl:: -+* vnc_sec_certificate_sasl:: -+* vnc_setup_sasl:: -+@end menu -+@node vnc_sec_none -+@subsection Without passwords -+ -+The simplest VNC server setup does not include any form of authentication. -+For this setup it is recommended to restrict it to listen on a UNIX domain -+socket only. For example -+ -+@example -+@value{qemu_system} [...OPTIONS...] -vnc unix:/home/joebloggs/.qemu-myvm-vnc -+@end example -+ -+This ensures that only users on local box with read/write access to that -+path can access the VNC server. To securely access the VNC server from a -+remote machine, a combination of netcat+ssh can be used to provide a secure -+tunnel. -+ -+@node vnc_sec_password -+@subsection With passwords -+ -+The VNC protocol has limited support for password based authentication. Since -+the protocol limits passwords to 8 characters it should not be considered -+to provide high security. The password can be fairly easily brute-forced by -+a client making repeat connections. For this reason, a VNC server using password -+authentication should be restricted to only listen on the loopback interface -+or UNIX domain sockets. Password authentication is not supported when operating -+in FIPS 140-2 compliance mode as it requires the use of the DES cipher. Password -+authentication is requested with the @code{password} option, and then once QEMU -+is running the password is set with the monitor. Until the monitor is used to -+set the password all clients will be rejected. -+ -+@example -+@value{qemu_system} [...OPTIONS...] -vnc :1,password -monitor stdio -+(qemu) change vnc password -+Password: ******** -+(qemu) -+@end example -+ -+@node vnc_sec_certificate -+@subsection With x509 certificates -+ -+The QEMU VNC server also implements the VeNCrypt extension allowing use of -+TLS for encryption of the session, and x509 certificates for authentication. -+The use of x509 certificates is strongly recommended, because TLS on its -+own is susceptible to man-in-the-middle attacks. Basic x509 certificate -+support provides a secure session, but no authentication. This allows any -+client to connect, and provides an encrypted session. -+ -+@example -+@value{qemu_system} [...OPTIONS...] \ -+ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=no \ -+ -vnc :1,tls-creds=tls0 -monitor stdio -+@end example -+ -+In the above example @code{/etc/pki/qemu} should contain at least three files, -+@code{ca-cert.pem}, @code{server-cert.pem} and @code{server-key.pem}. Unprivileged -+users will want to use a private directory, for example @code{$HOME/.pki/qemu}. -+NB the @code{server-key.pem} file should be protected with file mode 0600 to -+only be readable by the user owning it. -+ -+@node vnc_sec_certificate_verify -+@subsection With x509 certificates and client verification -+ -+Certificates can also provide a means to authenticate the client connecting. -+The server will request that the client provide a certificate, which it will -+then validate against the CA certificate. This is a good choice if deploying -+in an environment with a private internal certificate authority. It uses the -+same syntax as previously, but with @code{verify-peer} set to @code{yes} -+instead. -+ -+@example -+@value{qemu_system} [...OPTIONS...] \ -+ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ -+ -vnc :1,tls-creds=tls0 -monitor stdio -+@end example -+ -+ -+@node vnc_sec_certificate_pw -+@subsection With x509 certificates, client verification and passwords -+ -+Finally, the previous method can be combined with VNC password authentication -+to provide two layers of authentication for clients. -+ -+@example -+@value{qemu_system} [...OPTIONS...] \ -+ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ -+ -vnc :1,tls-creds=tls0,password -monitor stdio -+(qemu) change vnc password -+Password: ******** -+(qemu) -+@end example -+ -+ -+@node vnc_sec_sasl -+@subsection With SASL authentication -+ -+The SASL authentication method is a VNC extension, that provides an -+easily extendable, pluggable authentication method. This allows for -+integration with a wide range of authentication mechanisms, such as -+PAM, GSSAPI/Kerberos, LDAP, SQL databases, one-time keys and more. -+The strength of the authentication depends on the exact mechanism -+configured. If the chosen mechanism also provides a SSF layer, then -+it will encrypt the datastream as well. -+ -+Refer to the later docs on how to choose the exact SASL mechanism -+used for authentication, but assuming use of one supporting SSF, -+then QEMU can be launched with: -+ -+@example -+@value{qemu_system} [...OPTIONS...] -vnc :1,sasl -monitor stdio -+@end example -+ -+@node vnc_sec_certificate_sasl -+@subsection With x509 certificates and SASL authentication -+ -+If the desired SASL authentication mechanism does not supported -+SSF layers, then it is strongly advised to run it in combination -+with TLS and x509 certificates. This provides securely encrypted -+data stream, avoiding risk of compromising of the security -+credentials. This can be enabled, by combining the 'sasl' option -+with the aforementioned TLS + x509 options: -+ -+@example -+@value{qemu_system} [...OPTIONS...] \ -+ -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server,verify-peer=yes \ -+ -vnc :1,tls-creds=tls0,sasl -monitor stdio -+@end example -+ -+@node vnc_setup_sasl -+ -+@subsection Configuring SASL mechanisms -+ -+The following documentation assumes use of the Cyrus SASL implementation on a -+Linux host, but the principles should apply to any other SASL implementation -+or host. When SASL is enabled, the mechanism configuration will be loaded from -+system default SASL service config /etc/sasl2/qemu.conf. If running QEMU as an -+unprivileged user, an environment variable SASL_CONF_PATH can be used to make -+it search alternate locations for the service config file. -+ -+If the TLS option is enabled for VNC, then it will provide session encryption, -+otherwise the SASL mechanism will have to provide encryption. In the latter -+case the list of possible plugins that can be used is drastically reduced. In -+fact only the GSSAPI SASL mechanism provides an acceptable level of security -+by modern standards. Previous versions of QEMU referred to the DIGEST-MD5 -+mechanism, however, it has multiple serious flaws described in detail in -+RFC 6331 and thus should never be used any more. The SCRAM-SHA-1 mechanism -+provides a simple username/password auth facility similar to DIGEST-MD5, but -+does not support session encryption, so can only be used in combination with -+TLS. -+ -+When not using TLS the recommended configuration is -+ -+@example -+mech_list: gssapi -+keytab: /etc/qemu/krb5.tab -+@end example -+ -+This says to use the 'GSSAPI' mechanism with the Kerberos v5 protocol, with -+the server principal stored in /etc/qemu/krb5.tab. For this to work the -+administrator of your KDC must generate a Kerberos principal for the server, -+with a name of 'qemu/somehost.example.com@@EXAMPLE.COM' replacing -+'somehost.example.com' with the fully qualified host name of the machine -+running QEMU, and 'EXAMPLE.COM' with the Kerberos Realm. -+ -+When using TLS, if username+password authentication is desired, then a -+reasonable configuration is -+ -+@example -+mech_list: scram-sha-1 -+sasldb_path: /etc/qemu/passwd.db -+@end example -+ -+The @code{saslpasswd2} program can be used to populate the @code{passwd.db} -+file with accounts. -+ -+Other SASL configurations will be left as an exercise for the reader. Note that -+all mechanisms, except GSSAPI, should be combined with use of TLS to ensure a -+secure data channel. -+ -+ -+@node network_tls -+@section TLS setup for network services -+ -+Almost all network services in QEMU have the ability to use TLS for -+session data encryption, along with x509 certificates for simple -+client authentication. What follows is a description of how to -+generate certificates suitable for usage with QEMU, and applies to -+the VNC server, character devices with the TCP backend, NBD server -+and client, and migration server and client. -+ -+At a high level, QEMU requires certificates and private keys to be -+provided in PEM format. Aside from the core fields, the certificates -+should include various extension data sets, including v3 basic -+constraints data, key purpose, key usage and subject alt name. -+ -+The GnuTLS package includes a command called @code{certtool} which can -+be used to easily generate certificates and keys in the required format -+with expected data present. Alternatively a certificate management -+service may be used. -+ -+At a minimum it is necessary to setup a certificate authority, and -+issue certificates to each server. If using x509 certificates for -+authentication, then each client will also need to be issued a -+certificate. -+ -+Assuming that the QEMU network services will only ever be exposed to -+clients on a private intranet, there is no need to use a commercial -+certificate authority to create certificates. A self-signed CA is -+sufficient, and in fact likely to be more secure since it removes -+the ability of malicious 3rd parties to trick the CA into mis-issuing -+certs for impersonating your services. The only likely exception -+where a commercial CA might be desirable is if enabling the VNC -+websockets server and exposing it directly to remote browser clients. -+In such a case it might be useful to use a commercial CA to avoid -+needing to install custom CA certs in the web browsers. -+ -+The recommendation is for the server to keep its certificates in either -+@code{/etc/pki/qemu} or for unprivileged users in @code{$HOME/.pki/qemu}. -+ -+@menu -+* tls_generate_ca:: -+* tls_generate_server:: -+* tls_generate_client:: -+* tls_creds_setup:: -+* tls_psk:: -+@end menu -+@node tls_generate_ca -+@subsection Setup the Certificate Authority -+ -+This step only needs to be performed once per organization / organizational -+unit. First the CA needs a private key. This key must be kept VERY secret -+and secure. If this key is compromised the entire trust chain of the certificates -+issued with it is lost. -+ -+@example -+# certtool --generate-privkey > ca-key.pem -+@end example -+ -+To generate a self-signed certificate requires one core piece of information, -+the name of the organization. A template file @code{ca.info} should be -+populated with the desired data to avoid having to deal with interactive -+prompts from certtool: -+@example -+# cat > ca.info < server-hostNNN.info < server-hostNNN-key.pem -+# certtool --generate-certificate \ -+ --load-ca-certificate ca-cert.pem \ -+ --load-ca-privkey ca-key.pem \ -+ --load-privkey server-hostNNN-key.pem \ -+ --template server-hostNNN.info \ -+ --outfile server-hostNNN-cert.pem -+@end example -+ -+The @code{dns_name} and @code{ip_address} fields in the template are setting -+the subject alt name extension data. The @code{tls_www_server} keyword is the -+key purpose extension to indicate this certificate is intended for usage in -+a web server. Although QEMU network services are not in fact HTTP servers -+(except for VNC websockets), setting this key purpose is still recommended. -+The @code{encryption_key} and @code{signing_key} keyword is the key usage -+extension to indicate this certificate is intended for usage in the data -+session. -+ -+The @code{server-hostNNN-key.pem} and @code{server-hostNNN-cert.pem} files -+should now be securely copied to the server for which they were generated, -+and renamed to @code{server-key.pem} and @code{server-cert.pem} when added -+to the @code{/etc/pki/qemu} directory on the target host. The @code{server-key.pem} -+file is security sensitive and should be kept protected with file mode 0600 -+to prevent disclosure. -+ -+@node tls_generate_client -+@subsection Issuing client certificates -+ -+The QEMU x509 TLS credential setup defaults to enabling client verification -+using certificates, providing a simple authentication mechanism. If this -+default is used, each client also needs to be issued a certificate. The client -+certificate contains enough metadata to uniquely identify the client with the -+scope of the certificate authority. The client certificate would typically -+include fields for organization, state, city, building, etc. -+ -+Once again on the host holding the CA, create template files containing the -+information for each client, and use it to issue client certificates. -+ -+ -+@example -+# cat > client-hostNNN.info < client-hostNNN-key.pem -+# certtool --generate-certificate \ -+ --load-ca-certificate ca-cert.pem \ -+ --load-ca-privkey ca-key.pem \ -+ --load-privkey client-hostNNN-key.pem \ -+ --template client-hostNNN.info \ -+ --outfile client-hostNNN-cert.pem -+@end example -+ -+The subject alt name extension data is not required for clients, so the -+the @code{dns_name} and @code{ip_address} fields are not included. -+The @code{tls_www_client} keyword is the key purpose extension to indicate -+this certificate is intended for usage in a web client. Although QEMU -+network clients are not in fact HTTP clients, setting this key purpose is -+still recommended. The @code{encryption_key} and @code{signing_key} keyword -+is the key usage extension to indicate this certificate is intended for -+usage in the data session. -+ -+The @code{client-hostNNN-key.pem} and @code{client-hostNNN-cert.pem} files -+should now be securely copied to the client for which they were generated, -+and renamed to @code{client-key.pem} and @code{client-cert.pem} when added -+to the @code{/etc/pki/qemu} directory on the target host. The @code{client-key.pem} -+file is security sensitive and should be kept protected with file mode 0600 -+to prevent disclosure. -+ -+If a single host is going to be using TLS in both a client and server -+role, it is possible to create a single certificate to cover both roles. -+This would be quite common for the migration and NBD services, where a -+QEMU process will be started by accepting a TLS protected incoming migration, -+and later itself be migrated out to another host. To generate a single -+certificate, simply include the template data from both the client and server -+instructions in one. -+ -+@example -+# cat > both-hostNNN.info < both-hostNNN-key.pem -+# certtool --generate-certificate \ -+ --load-ca-certificate ca-cert.pem \ -+ --load-ca-privkey ca-key.pem \ -+ --load-privkey both-hostNNN-key.pem \ -+ --template both-hostNNN.info \ -+ --outfile both-hostNNN-cert.pem -+@end example -+ -+When copying the PEM files to the target host, save them twice, -+once as @code{server-cert.pem} and @code{server-key.pem}, and -+again as @code{client-cert.pem} and @code{client-key.pem}. -+ -+@node tls_creds_setup -+@subsection TLS x509 credential configuration -+ -+QEMU has a standard mechanism for loading x509 credentials that will be -+used for network services and clients. It requires specifying the -+@code{tls-creds-x509} class name to the @code{--object} command line -+argument for the system emulators. Each set of credentials loaded should -+be given a unique string identifier via the @code{id} parameter. A single -+set of TLS credentials can be used for multiple network backends, so VNC, -+migration, NBD, character devices can all share the same credentials. Note, -+however, that credentials for use in a client endpoint must be loaded -+separately from those used in a server endpoint. -+ -+When specifying the object, the @code{dir} parameters specifies which -+directory contains the credential files. This directory is expected to -+contain files with the names mentioned previously, @code{ca-cert.pem}, -+@code{server-key.pem}, @code{server-cert.pem}, @code{client-key.pem} -+and @code{client-cert.pem} as appropriate. It is also possible to -+include a set of pre-generated Diffie-Hellman (DH) parameters in a file -+@code{dh-params.pem}, which can be created using the -+@code{certtool --generate-dh-params} command. If omitted, QEMU will -+dynamically generate DH parameters when loading the credentials. -+ -+The @code{endpoint} parameter indicates whether the credentials will -+be used for a network client or server, and determines which PEM -+files are loaded. -+ -+The @code{verify} parameter determines whether x509 certificate -+validation should be performed. This defaults to enabled, meaning -+clients will always validate the server hostname against the -+certificate subject alt name fields and/or CN field. It also -+means that servers will request that clients provide a certificate -+and validate them. Verification should never be turned off for -+client endpoints, however, it may be turned off for server endpoints -+if an alternative mechanism is used to authenticate clients. For -+example, the VNC server can use SASL to authenticate clients -+instead. -+ -+To load server credentials with client certificate validation -+enabled -+ -+@example -+@value{qemu_system} -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=server -+@end example -+ -+while to load client credentials use -+ -+@example -+@value{qemu_system} -object tls-creds-x509,id=tls0,dir=/etc/pki/qemu,endpoint=client -+@end example -+ -+Network services which support TLS will all have a @code{tls-creds} -+parameter which expects the ID of the TLS credentials object. For -+example with VNC: -+ -+@example -+@value{qemu_system} -vnc 0.0.0.0:0,tls-creds=tls0 -+@end example -+ -+@node tls_psk -+@subsection TLS Pre-Shared Keys (PSK) -+ -+Instead of using certificates, you may also use TLS Pre-Shared Keys -+(TLS-PSK). This can be simpler to set up than certificates but is -+less scalable. -+ -+Use the GnuTLS @code{psktool} program to generate a @code{keys.psk} -+file containing one or more usernames and random keys: -+ -+@example -+mkdir -m 0700 /tmp/keys -+psktool -u rich -p /tmp/keys/keys.psk -+@end example -+ -+TLS-enabled servers such as qemu-nbd can use this directory like so: -+ -+@example -+qemu-nbd \ -+ -t -x / \ -+ --object tls-creds-psk,id=tls0,endpoint=server,dir=/tmp/keys \ -+ --tls-creds tls0 \ -+ image.qcow2 -+@end example -+ -+When connecting from a qemu-based client you must specify the -+directory containing @code{keys.psk} and an optional @var{username} -+(defaults to ``qemu''): -+ -+@example -+qemu-img info \ -+ --object tls-creds-psk,id=tls0,dir=/tmp/keys,username=rich,endpoint=client \ -+ --image-opts \ -+ file.driver=nbd,file.host=localhost,file.port=10809,file.tls-creds=tls0,file.export=/ -+@end example -+ -+@node gdb_usage -+@section GDB usage -+ -+QEMU has a primitive support to work with gdb, so that you can do -+'Ctrl-C' while the virtual machine is running and inspect its state. -+ -+In order to use gdb, launch QEMU with the '-s' option. It will wait for a -+gdb connection: -+@example -+@value{qemu_system} -s -kernel bzImage -hda rootdisk.img -append "root=/dev/hda" -+Connected to host network interface: tun0 -+Waiting gdb connection on port 1234 -+@end example -+ -+Then launch gdb on the 'vmlinux' executable: -+@example -+> gdb vmlinux -+@end example -+ -+In gdb, connect to QEMU: -+@example -+(gdb) target remote localhost:1234 -+@end example -+ -+Then you can use gdb normally. For example, type 'c' to launch the kernel: -+@example -+(gdb) c -+@end example -+ -+Here are some useful tips in order to use gdb on system code: -+ -+@enumerate -+@item -+Use @code{info reg} to display all the CPU registers. -+@item -+Use @code{x/10i $eip} to display the code at the PC position. -+@item -+Use @code{set architecture i8086} to dump 16 bit code. Then use -+@code{x/10i $cs*16+$eip} to dump the code at the PC position. -+@end enumerate -+ -+Advanced debugging options: -+ -+The default single stepping behavior is step with the IRQs and timer service routines off. It is set this way because when gdb executes a single step it expects to advance beyond the current instruction. With the IRQs and timer service routines on, a single step might jump into the one of the interrupt or exception vectors instead of executing the current instruction. This means you may hit the same breakpoint a number of times before executing the instruction gdb wants to have executed. Because there are rare circumstances where you want to single step into an interrupt vector the behavior can be controlled from GDB. There are three commands you can query and set the single step behavior: -+@table @code -+@item maintenance packet qqemu.sstepbits -+ -+This will display the MASK bits used to control the single stepping IE: -+@example -+(gdb) maintenance packet qqemu.sstepbits -+sending: "qqemu.sstepbits" -+received: "ENABLE=1,NOIRQ=2,NOTIMER=4" -+@end example -+@item maintenance packet qqemu.sstep -+ -+This will display the current value of the mask used when single stepping IE: -+@example -+(gdb) maintenance packet qqemu.sstep -+sending: "qqemu.sstep" -+received: "0x7" -+@end example -+@item maintenance packet Qqemu.sstep=HEX_VALUE -+ -+This will change the single step mask, so if wanted to enable IRQs on the single step, but not timers, you would use: -+@example -+(gdb) maintenance packet Qqemu.sstep=0x5 -+sending: "qemu.sstep=0x5" -+received: "OK" -+@end example -+@end table -+ -+@node pcsys_os_specific -+@section Target OS specific information -+ -+@subsection Linux -+ -+To have access to SVGA graphic modes under X11, use the @code{vesa} or -+the @code{cirrus} X11 driver. For optimal performances, use 16 bit -+color depth in the guest and the host OS. -+ -+When using a 2.6 guest Linux kernel, you should add the option -+@code{clock=pit} on the kernel command line because the 2.6 Linux -+kernels make very strict real time clock checks by default that QEMU -+cannot simulate exactly. -+ -+When using a 2.6 guest Linux kernel, verify that the 4G/4G patch is -+not activated because QEMU is slower with this patch. The QEMU -+Accelerator Module is also much slower in this case. Earlier Fedora -+Core 3 Linux kernel (< 2.6.9-1.724_FC3) were known to incorporate this -+patch by default. Newer kernels don't have it. -+ -+@subsection Windows -+ -+If you have a slow host, using Windows 95 is better as it gives the -+best speed. Windows 2000 is also a good choice. -+ -+@subsubsection SVGA graphic modes support -+ -+QEMU emulates a Cirrus Logic GD5446 Video -+card. All Windows versions starting from Windows 95 should recognize -+and use this graphic card. For optimal performances, use 16 bit color -+depth in the guest and the host OS. -+ -+If you are using Windows XP as guest OS and if you want to use high -+resolution modes which the Cirrus Logic BIOS does not support (i.e. >= -+1280x1024x16), then you should use the VESA VBE virtual graphic card -+(option @option{-std-vga}). -+ -+@subsubsection CPU usage reduction -+ -+Windows 9x does not correctly use the CPU HLT -+instruction. The result is that it takes host CPU cycles even when -+idle. You can install the utility from -+@url{https://web.archive.org/web/20060212132151/http://www.user.cityline.ru/~maxamn/amnhltm.zip} -+to solve this problem. Note that no such tool is needed for NT, 2000 or XP. -+ -+@subsubsection Windows 2000 disk full problem -+ -+Windows 2000 has a bug which gives a disk full problem during its -+installation. When installing it, use the @option{-win2k-hack} QEMU -+option to enable a specific workaround. After Windows 2000 is -+installed, you no longer need this option (this option slows down the -+IDE transfers). -+ -+@subsubsection Windows 2000 shutdown -+ -+Windows 2000 cannot automatically shutdown in QEMU although Windows 98 -+can. It comes from the fact that Windows 2000 does not automatically -+use the APM driver provided by the BIOS. -+ -+In order to correct that, do the following (thanks to Struan -+Bartlett): go to the Control Panel => Add/Remove Hardware & Next => -+Add/Troubleshoot a device => Add a new device & Next => No, select the -+hardware from a list & Next => NT Apm/Legacy Support & Next => Next -+(again) a few times. Now the driver is installed and Windows 2000 now -+correctly instructs QEMU to shutdown at the appropriate moment. -+ -+@subsubsection Share a directory between Unix and Windows -+ -+See @ref{sec_invocation} about the help of the option -+@option{'-netdev user,smb=...'}. -+ -+@subsubsection Windows XP security problem -+ -+Some releases of Windows XP install correctly but give a security -+error when booting: -+@example -+A problem is preventing Windows from accurately checking the -+license for this computer. Error code: 0x800703e6. -+@end example -+ -+The workaround is to install a service pack for XP after a boot in safe -+mode. Then reboot, and the problem should go away. Since there is no -+network while in safe mode, its recommended to download the full -+installation of SP1 or SP2 and transfer that via an ISO or using the -+vvfat block device ("-hdb fat:directory_which_holds_the_SP"). -+ -+@subsection MS-DOS and FreeDOS -+ -+@subsubsection CPU usage reduction -+ -+DOS does not correctly use the CPU HLT instruction. The result is that -+it takes host CPU cycles even when idle. You can install the utility from -+@url{https://web.archive.org/web/20051222085335/http://www.vmware.com/software/dosidle210.zip} -+to solve this problem. -+ -+@node QEMU System emulator for non PC targets -+@chapter QEMU System emulator for non PC targets -+ -+QEMU is a generic emulator and it emulates many non PC -+machines. Most of the options are similar to the PC emulator. The -+differences are mentioned in the following sections. -+ -+@menu -+* PowerPC System emulator:: -+* Sparc32 System emulator:: -+* Sparc64 System emulator:: -+* MIPS System emulator:: -+* ARM System emulator:: -+* ColdFire System emulator:: -+* Cris System emulator:: -+* Microblaze System emulator:: -+* SH4 System emulator:: -+* Xtensa System emulator:: -+@end menu -+ -+@node PowerPC System emulator -+@section PowerPC System emulator -+@cindex system emulation (PowerPC) -+ -+Use the executable @file{qemu-system-ppc} to simulate a complete 40P (PREP) -+or PowerMac PowerPC system. -+ -+QEMU emulates the following PowerMac peripherals: -+ -+@itemize @minus -+@item -+UniNorth or Grackle PCI Bridge -+@item -+PCI VGA compatible card with VESA Bochs Extensions -+@item -+2 PMAC IDE interfaces with hard disk and CD-ROM support -+@item -+NE2000 PCI adapters -+@item -+Non Volatile RAM -+@item -+VIA-CUDA with ADB keyboard and mouse. -+@end itemize -+ -+QEMU emulates the following 40P (PREP) peripherals: -+ -+@itemize @minus -+@item -+PCI Bridge -+@item -+PCI VGA compatible card with VESA Bochs Extensions -+@item -+2 IDE interfaces with hard disk and CD-ROM support -+@item -+Floppy disk -+@item -+PCnet network adapters -+@item -+Serial port -+@item -+PREP Non Volatile RAM -+@item -+PC compatible keyboard and mouse. -+@end itemize -+ -+Since version 0.9.1, QEMU uses OpenBIOS @url{https://www.openbios.org/} -+for the g3beige and mac99 PowerMac and the 40p machines. OpenBIOS is a free -+(GPL v2) portable firmware implementation. The goal is to implement a 100% -+IEEE 1275-1994 (referred to as Open Firmware) compliant firmware. -+ -+@c man begin OPTIONS -+ -+The following options are specific to the PowerPC emulation: -+ -+@table @option -+ -+@item -g @var{W}x@var{H}[x@var{DEPTH}] -+ -+Set the initial VGA graphic mode. The default is 800x600x32. -+ -+@item -prom-env @var{string} -+ -+Set OpenBIOS variables in NVRAM, for example: -+ -+@example -+qemu-kvm -prom-env 'auto-boot?=false' \ -+ -prom-env 'boot-device=hd:2,\yaboot' \ -+ -prom-env 'boot-args=conf=hd:2,\yaboot.conf' -+@end example -+ -+@end table -+ -+@c man end -+ -+ -+More information is available at -+@url{http://perso.magic.fr/l_indien/qemu-ppc/}. -+ -+@node Sparc32 System emulator -+@section Sparc32 System emulator -+@cindex system emulation (Sparc32) -+ -+Use the executable @file{qemu-system-sparc} to simulate the following -+Sun4m architecture machines: -+@itemize @minus -+@item -+SPARCstation 4 -+@item -+SPARCstation 5 -+@item -+SPARCstation 10 -+@item -+SPARCstation 20 -+@item -+SPARCserver 600MP -+@item -+SPARCstation LX -+@item -+SPARCstation Voyager -+@item -+SPARCclassic -+@item -+SPARCbook -+@end itemize -+ -+The emulation is somewhat complete. SMP up to 16 CPUs is supported, -+but Linux limits the number of usable CPUs to 4. -+ -+QEMU emulates the following sun4m peripherals: -+ -+@itemize @minus -+@item -+IOMMU -+@item -+TCX or cgthree Frame buffer -+@item -+Lance (Am7990) Ethernet -+@item -+Non Volatile RAM M48T02/M48T08 -+@item -+Slave I/O: timers, interrupt controllers, Zilog serial ports, keyboard -+and power/reset logic -+@item -+ESP SCSI controller with hard disk and CD-ROM support -+@item -+Floppy drive (not on SS-600MP) -+@item -+CS4231 sound device (only on SS-5, not working yet) -+@end itemize -+ -+The number of peripherals is fixed in the architecture. Maximum -+memory size depends on the machine type, for SS-5 it is 256MB and for -+others 2047MB. -+ -+Since version 0.8.2, QEMU uses OpenBIOS -+@url{https://www.openbios.org/}. OpenBIOS is a free (GPL v2) portable -+firmware implementation. The goal is to implement a 100% IEEE -+1275-1994 (referred to as Open Firmware) compliant firmware. -+ -+A sample Linux 2.6 series kernel and ram disk image are available on -+the QEMU web site. There are still issues with NetBSD and OpenBSD, but -+most kernel versions work. Please note that currently older Solaris kernels -+don't work probably due to interface issues between OpenBIOS and -+Solaris. -+ -+@c man begin OPTIONS -+ -+The following options are specific to the Sparc32 emulation: -+ -+@table @option -+ -+@item -g @var{W}x@var{H}x[x@var{DEPTH}] -+ -+Set the initial graphics mode. For TCX, the default is 1024x768x8 with the -+option of 1024x768x24. For cgthree, the default is 1024x768x8 with the option -+of 1152x900x8 for people who wish to use OBP. -+ -+@item -prom-env @var{string} -+ -+Set OpenBIOS variables in NVRAM, for example: -+ -+@example -+qemu-system-sparc -prom-env 'auto-boot?=false' \ -+ -prom-env 'boot-device=sd(0,2,0):d' -prom-env 'boot-args=linux single' -+@end example -+ -+@item -M [SS-4|SS-5|SS-10|SS-20|SS-600MP|LX|Voyager|SPARCClassic] [|SPARCbook] -+ -+Set the emulated machine type. Default is SS-5. -+ -+@end table -+ -+@c man end -+ -+@node Sparc64 System emulator -+@section Sparc64 System emulator -+@cindex system emulation (Sparc64) -+ -+Use the executable @file{qemu-system-sparc64} to simulate a Sun4u -+(UltraSPARC PC-like machine), Sun4v (T1 PC-like machine), or generic -+Niagara (T1) machine. The Sun4u emulator is mostly complete, being -+able to run Linux, NetBSD and OpenBSD in headless (-nographic) mode. The -+Sun4v emulator is still a work in progress. -+ -+The Niagara T1 emulator makes use of firmware and OS binaries supplied in the S10image/ directory -+of the OpenSPARC T1 project @url{http://download.oracle.com/technetwork/systems/opensparc/OpenSPARCT1_Arch.1.5.tar.bz2} -+and is able to boot the disk.s10hw2 Solaris image. -+@example -+qemu-system-sparc64 -M niagara -L /path-to/S10image/ \ -+ -nographic -m 256 \ -+ -drive if=pflash,readonly=on,file=/S10image/disk.s10hw2 -+@end example -+ -+ -+QEMU emulates the following peripherals: -+ -+@itemize @minus -+@item -+UltraSparc IIi APB PCI Bridge -+@item -+PCI VGA compatible card with VESA Bochs Extensions -+@item -+PS/2 mouse and keyboard -+@item -+Non Volatile RAM M48T59 -+@item -+PC-compatible serial ports -+@item -+2 PCI IDE interfaces with hard disk and CD-ROM support -+@item -+Floppy disk -+@end itemize -+ -+@c man begin OPTIONS -+ -+The following options are specific to the Sparc64 emulation: -+ -+@table @option -+ -+@item -prom-env @var{string} -+ -+Set OpenBIOS variables in NVRAM, for example: -+ -+@example -+qemu-system-sparc64 -prom-env 'auto-boot?=false' -+@end example -+ -+@item -M [sun4u|sun4v|niagara] -+ -+Set the emulated machine type. The default is sun4u. -+ -+@end table -+ -+@c man end -+ -+@node MIPS System emulator -+@section MIPS System emulator -+@cindex system emulation (MIPS) -+ -+@menu -+* nanoMIPS System emulator :: -+@end menu -+ -+Four executables cover simulation of 32 and 64-bit MIPS systems in -+both endian options, @file{qemu-system-mips}, @file{qemu-system-mipsel} -+@file{qemu-system-mips64} and @file{qemu-system-mips64el}. -+Five different machine types are emulated: -+ -+@itemize @minus -+@item -+A generic ISA PC-like machine "mips" -+@item -+The MIPS Malta prototype board "malta" -+@item -+An ACER Pica "pica61". This machine needs the 64-bit emulator. -+@item -+MIPS emulator pseudo board "mipssim" -+@item -+A MIPS Magnum R4000 machine "magnum". This machine needs the 64-bit emulator. -+@end itemize -+ -+The generic emulation is supported by Debian 'Etch' and is able to -+install Debian into a virtual disk image. The following devices are -+emulated: -+ -+@itemize @minus -+@item -+A range of MIPS CPUs, default is the 24Kf -+@item -+PC style serial port -+@item -+PC style IDE disk -+@item -+NE2000 network card -+@end itemize -+ -+The Malta emulation supports the following devices: -+ -+@itemize @minus -+@item -+Core board with MIPS 24Kf CPU and Galileo system controller -+@item -+PIIX4 PCI/USB/SMbus controller -+@item -+The Multi-I/O chip's serial device -+@item -+PCI network cards (PCnet32 and others) -+@item -+Malta FPGA serial device -+@item -+Cirrus (default) or any other PCI VGA graphics card -+@end itemize -+ -+The Boston board emulation supports the following devices: -+ -+@itemize @minus -+@item -+Xilinx FPGA, which includes a PCIe root port and an UART -+@item -+Intel EG20T PCH connects the I/O peripherals, but only the SATA bus is emulated -+@end itemize -+ -+The ACER Pica emulation supports: -+ -+@itemize @minus -+@item -+MIPS R4000 CPU -+@item -+PC-style IRQ and DMA controllers -+@item -+PC Keyboard -+@item -+IDE controller -+@end itemize -+ -+The MIPS Magnum R4000 emulation supports: -+ -+@itemize @minus -+@item -+MIPS R4000 CPU -+@item -+PC-style IRQ controller -+@item -+PC Keyboard -+@item -+SCSI controller -+@item -+G364 framebuffer -+@end itemize -+ -+The Fulong 2E emulation supports: -+ -+@itemize @minus -+@item -+Loongson 2E CPU -+@item -+Bonito64 system controller as North Bridge -+@item -+VT82C686 chipset as South Bridge -+@item -+RTL8139D as a network card chipset -+@end itemize -+ -+The mipssim pseudo board emulation provides an environment similar -+to what the proprietary MIPS emulator uses for running Linux. -+It supports: -+ -+@itemize @minus -+@item -+A range of MIPS CPUs, default is the 24Kf -+@item -+PC style serial port -+@item -+MIPSnet network emulation -+@end itemize -+ -+@node nanoMIPS System emulator -+@subsection nanoMIPS System emulator -+@cindex system emulation (nanoMIPS) -+ -+Executable @file{qemu-system-mipsel} also covers simulation of -+32-bit nanoMIPS system in little endian mode: -+ -+@itemize @minus -+@item -+nanoMIPS I7200 CPU -+@end itemize -+ -+Example of @file{qemu-system-mipsel} usage for nanoMIPS is shown below: -+ -+Download @code{} from @url{https://mipsdistros.mips.com/LinuxDistro/nanomips/buildroot/index.html}. -+ -+Download @code{} from @url{https://mipsdistros.mips.com/LinuxDistro/nanomips/kernels/v4.15.18-432-gb2eb9a8b07a1-20180627102142/index.html}. -+ -+Start system emulation of Malta board with nanoMIPS I7200 CPU: -+@example -+qemu-system-mipsel -cpu I7200 -kernel @code{} \ -+ -M malta -serial stdio -m @code{} -hda @code{} \ -+ -append "mem=256m@@0x0 rw console=ttyS0 vga=cirrus vesa=0x111 root=/dev/sda" -+@end example -+ -+ -+@node ARM System emulator -+@section ARM System emulator -+@cindex system emulation (ARM) -+ -+Use the executable @file{qemu-system-arm} to simulate a ARM -+machine. The ARM Integrator/CP board is emulated with the following -+devices: -+ -+@itemize @minus -+@item -+ARM926E, ARM1026E, ARM946E, ARM1136 or Cortex-A8 CPU -+@item -+Two PL011 UARTs -+@item -+SMC 91c111 Ethernet adapter -+@item -+PL110 LCD controller -+@item -+PL050 KMI with PS/2 keyboard and mouse. -+@item -+PL181 MultiMedia Card Interface with SD card. -+@end itemize -+ -+The ARM Versatile baseboard is emulated with the following devices: -+ -+@itemize @minus -+@item -+ARM926E, ARM1136 or Cortex-A8 CPU -+@item -+PL190 Vectored Interrupt Controller -+@item -+Four PL011 UARTs -+@item -+SMC 91c111 Ethernet adapter -+@item -+PL110 LCD controller -+@item -+PL050 KMI with PS/2 keyboard and mouse. -+@item -+PCI host bridge. Note the emulated PCI bridge only provides access to -+PCI memory space. It does not provide access to PCI IO space. -+This means some devices (eg. ne2k_pci NIC) are not usable, and others -+(eg. rtl8139 NIC) are only usable when the guest drivers use the memory -+mapped control registers. -+@item -+PCI OHCI USB controller. -+@item -+LSI53C895A PCI SCSI Host Bus Adapter with hard disk and CD-ROM devices. -+@item -+PL181 MultiMedia Card Interface with SD card. -+@end itemize -+ -+Several variants of the ARM RealView baseboard are emulated, -+including the EB, PB-A8 and PBX-A9. Due to interactions with the -+bootloader, only certain Linux kernel configurations work out -+of the box on these boards. -+ -+Kernels for the PB-A8 board should have CONFIG_REALVIEW_HIGH_PHYS_OFFSET -+enabled in the kernel, and expect 512M RAM. Kernels for The PBX-A9 board -+should have CONFIG_SPARSEMEM enabled, CONFIG_REALVIEW_HIGH_PHYS_OFFSET -+disabled and expect 1024M RAM. -+ -+The following devices are emulated: -+ -+@itemize @minus -+@item -+ARM926E, ARM1136, ARM11MPCore, Cortex-A8 or Cortex-A9 MPCore CPU -+@item -+ARM AMBA Generic/Distributed Interrupt Controller -+@item -+Four PL011 UARTs -+@item -+SMC 91c111 or SMSC LAN9118 Ethernet adapter -+@item -+PL110 LCD controller -+@item -+PL050 KMI with PS/2 keyboard and mouse -+@item -+PCI host bridge -+@item -+PCI OHCI USB controller -+@item -+LSI53C895A PCI SCSI Host Bus Adapter with hard disk and CD-ROM devices -+@item -+PL181 MultiMedia Card Interface with SD card. -+@end itemize -+ -+The XScale-based clamshell PDA models ("Spitz", "Akita", "Borzoi" -+and "Terrier") emulation includes the following peripherals: -+ -+@itemize @minus -+@item -+Intel PXA270 System-on-chip (ARM V5TE core) -+@item -+NAND Flash memory -+@item -+IBM/Hitachi DSCM microdrive in a PXA PCMCIA slot - not in "Akita" -+@item -+On-chip OHCI USB controller -+@item -+On-chip LCD controller -+@item -+On-chip Real Time Clock -+@item -+TI ADS7846 touchscreen controller on SSP bus -+@item -+Maxim MAX1111 analog-digital converter on I@math{^2}C bus -+@item -+GPIO-connected keyboard controller and LEDs -+@item -+Secure Digital card connected to PXA MMC/SD host -+@item -+Three on-chip UARTs -+@item -+WM8750 audio CODEC on I@math{^2}C and I@math{^2}S busses -+@end itemize -+ -+The Palm Tungsten|E PDA (codename "Cheetah") emulation includes the -+following elements: -+ -+@itemize @minus -+@item -+Texas Instruments OMAP310 System-on-chip (ARM 925T core) -+@item -+ROM and RAM memories (ROM firmware image can be loaded with -option-rom) -+@item -+On-chip LCD controller -+@item -+On-chip Real Time Clock -+@item -+TI TSC2102i touchscreen controller / analog-digital converter / Audio -+CODEC, connected through MicroWire and I@math{^2}S busses -+@item -+GPIO-connected matrix keypad -+@item -+Secure Digital card connected to OMAP MMC/SD host -+@item -+Three on-chip UARTs -+@end itemize -+ -+Nokia N800 and N810 internet tablets (known also as RX-34 and RX-44 / 48) -+emulation supports the following elements: -+ -+@itemize @minus -+@item -+Texas Instruments OMAP2420 System-on-chip (ARM 1136 core) -+@item -+RAM and non-volatile OneNAND Flash memories -+@item -+Display connected to EPSON remote framebuffer chip and OMAP on-chip -+display controller and a LS041y3 MIPI DBI-C controller -+@item -+TI TSC2301 (in N800) and TI TSC2005 (in N810) touchscreen controllers -+driven through SPI bus -+@item -+National Semiconductor LM8323-controlled qwerty keyboard driven -+through I@math{^2}C bus -+@item -+Secure Digital card connected to OMAP MMC/SD host -+@item -+Three OMAP on-chip UARTs and on-chip STI debugging console -+@item -+Mentor Graphics "Inventra" dual-role USB controller embedded in a TI -+TUSB6010 chip - only USB host mode is supported -+@item -+TI TMP105 temperature sensor driven through I@math{^2}C bus -+@item -+TI TWL92230C power management companion with an RTC on I@math{^2}C bus -+@item -+Nokia RETU and TAHVO multi-purpose chips with an RTC, connected -+through CBUS -+@end itemize -+ -+The Luminary Micro Stellaris LM3S811EVB emulation includes the following -+devices: -+ -+@itemize @minus -+@item -+Cortex-M3 CPU core. -+@item -+64k Flash and 8k SRAM. -+@item -+Timers, UARTs, ADC and I@math{^2}C interface. -+@item -+OSRAM Pictiva 96x16 OLED with SSD0303 controller on I@math{^2}C bus. -+@end itemize -+ -+The Luminary Micro Stellaris LM3S6965EVB emulation includes the following -+devices: -+ -+@itemize @minus -+@item -+Cortex-M3 CPU core. -+@item -+256k Flash and 64k SRAM. -+@item -+Timers, UARTs, ADC, I@math{^2}C and SSI interfaces. -+@item -+OSRAM Pictiva 128x64 OLED with SSD0323 controller connected via SSI. -+@end itemize -+ -+The Freecom MusicPal internet radio emulation includes the following -+elements: -+ -+@itemize @minus -+@item -+Marvell MV88W8618 ARM core. -+@item -+32 MB RAM, 256 KB SRAM, 8 MB flash. -+@item -+Up to 2 16550 UARTs -+@item -+MV88W8xx8 Ethernet controller -+@item -+MV88W8618 audio controller, WM8750 CODEC and mixer -+@item -+128×64 display with brightness control -+@item -+2 buttons, 2 navigation wheels with button function -+@end itemize -+ -+The Siemens SX1 models v1 and v2 (default) basic emulation. -+The emulation includes the following elements: -+ -+@itemize @minus -+@item -+Texas Instruments OMAP310 System-on-chip (ARM 925T core) -+@item -+ROM and RAM memories (ROM firmware image can be loaded with -pflash) -+V1 -+1 Flash of 16MB and 1 Flash of 8MB -+V2 -+1 Flash of 32MB -+@item -+On-chip LCD controller -+@item -+On-chip Real Time Clock -+@item -+Secure Digital card connected to OMAP MMC/SD host -+@item -+Three on-chip UARTs -+@end itemize -+ -+A Linux 2.6 test image is available on the QEMU web site. More -+information is available in the QEMU mailing-list archive. -+ -+@c man begin OPTIONS -+ -+The following options are specific to the ARM emulation: -+ -+@table @option -+ -+@item -semihosting -+Enable semihosting syscall emulation. -+ -+On ARM this implements the "Angel" interface. -+ -+Note that this allows guest direct access to the host filesystem, -+so should only be used with trusted guest OS. -+ -+@end table -+ -+@c man end -+ -+@node ColdFire System emulator -+@section ColdFire System emulator -+@cindex system emulation (ColdFire) -+@cindex system emulation (M68K) -+ -+Use the executable @file{qemu-system-m68k} to simulate a ColdFire machine. -+The emulator is able to boot a uClinux kernel. -+ -+The M5208EVB emulation includes the following devices: -+ -+@itemize @minus -+@item -+MCF5208 ColdFire V2 Microprocessor (ISA A+ with EMAC). -+@item -+Three Two on-chip UARTs. -+@item -+Fast Ethernet Controller (FEC) -+@end itemize -+ -+The AN5206 emulation includes the following devices: -+ -+@itemize @minus -+@item -+MCF5206 ColdFire V2 Microprocessor. -+@item -+Two on-chip UARTs. -+@end itemize -+ -+@c man begin OPTIONS -+ -+The following options are specific to the ColdFire emulation: -+ -+@table @option -+ -+@item -semihosting -+Enable semihosting syscall emulation. -+ -+On M68K this implements the "ColdFire GDB" interface used by libgloss. -+ -+Note that this allows guest direct access to the host filesystem, -+so should only be used with trusted guest OS. -+ -+@end table -+ -+@c man end -+ -+@node Cris System emulator -+@section Cris System emulator -+@cindex system emulation (Cris) -+ -+TODO -+ -+@node Microblaze System emulator -+@section Microblaze System emulator -+@cindex system emulation (Microblaze) -+ -+TODO -+ -+@node SH4 System emulator -+@section SH4 System emulator -+@cindex system emulation (SH4) -+ -+TODO -+ -+@node Xtensa System emulator -+@section Xtensa System emulator -+@cindex system emulation (Xtensa) -+ -+Two executables cover simulation of both Xtensa endian options, -+@file{qemu-system-xtensa} and @file{qemu-system-xtensaeb}. -+Two different machine types are emulated: -+ -+@itemize @minus -+@item -+Xtensa emulator pseudo board "sim" -+@item -+Avnet LX60/LX110/LX200 board -+@end itemize -+ -+The sim pseudo board emulation provides an environment similar -+to one provided by the proprietary Tensilica ISS. -+It supports: -+ -+@itemize @minus -+@item -+A range of Xtensa CPUs, default is the DC232B -+@item -+Console and filesystem access via semihosting calls -+@end itemize -+ -+The Avnet LX60/LX110/LX200 emulation supports: -+ -+@itemize @minus -+@item -+A range of Xtensa CPUs, default is the DC232B -+@item -+16550 UART -+@item -+OpenCores 10/100 Mbps Ethernet MAC -+@end itemize -+ -+@c man begin OPTIONS -+ -+The following options are specific to the Xtensa emulation: -+ -+@table @option -+ -+@item -semihosting -+Enable semihosting syscall emulation. -+ -+Xtensa semihosting provides basic file IO calls, such as open/read/write/seek/select. -+Tensilica baremetal libc for ISS and linux platform "sim" use this interface. -+ -+Note that this allows guest direct access to the host filesystem, -+so should only be used with trusted guest OS. -+ -+@end table -+ -+@c man end -+ -+@node QEMU User space emulator -+@chapter QEMU User space emulator -+ -+@menu -+* Supported Operating Systems :: -+* Features:: -+* Linux User space emulator:: -+* BSD User space emulator :: -+@end menu -+ -+@node Supported Operating Systems -+@section Supported Operating Systems -+ -+The following OS are supported in user space emulation: -+ -+@itemize @minus -+@item -+Linux (referred as qemu-linux-user) -+@item -+BSD (referred as qemu-bsd-user) -+@end itemize -+ -+@node Features -+@section Features -+ -+QEMU user space emulation has the following notable features: -+ -+@table @strong -+@item System call translation: -+QEMU includes a generic system call translator. This means that -+the parameters of the system calls can be converted to fix -+endianness and 32/64-bit mismatches between hosts and targets. -+IOCTLs can be converted too. -+ -+@item POSIX signal handling: -+QEMU can redirect to the running program all signals coming from -+the host (such as @code{SIGALRM}), as well as synthesize signals from -+virtual CPU exceptions (for example @code{SIGFPE} when the program -+executes a division by zero). -+ -+QEMU relies on the host kernel to emulate most signal system -+calls, for example to emulate the signal mask. On Linux, QEMU -+supports both normal and real-time signals. -+ -+@item Threading: -+On Linux, QEMU can emulate the @code{clone} syscall and create a real -+host thread (with a separate virtual CPU) for each emulated thread. -+Note that not all targets currently emulate atomic operations correctly. -+x86 and ARM use a global lock in order to preserve their semantics. -+@end table -+ -+QEMU was conceived so that ultimately it can emulate itself. Although -+it is not very useful, it is an important test to show the power of the -+emulator. -+ -+@node Linux User space emulator -+@section Linux User space emulator -+ -+@menu -+* Quick Start:: -+* Wine launch:: -+* Command line options:: -+* Other binaries:: -+@end menu -+ -+@node Quick Start -+@subsection Quick Start -+ -+In order to launch a Linux process, QEMU needs the process executable -+itself and all the target (x86) dynamic libraries used by it. -+ -+@itemize -+ -+@item On x86, you can just try to launch any process by using the native -+libraries: -+ -+@example -+qemu-i386 -L / /bin/ls -+@end example -+ -+@code{-L /} tells that the x86 dynamic linker must be searched with a -+@file{/} prefix. -+ -+@item Since QEMU is also a linux process, you can launch QEMU with -+QEMU (NOTE: you can only do that if you compiled QEMU from the sources): -+ -+@example -+qemu-i386 -L / qemu-i386 -L / /bin/ls -+@end example -+ -+@item On non x86 CPUs, you need first to download at least an x86 glibc -+(@file{qemu-runtime-i386-XXX-.tar.gz} on the QEMU web page). Ensure that -+@code{LD_LIBRARY_PATH} is not set: -+ -+@example -+unset LD_LIBRARY_PATH -+@end example -+ -+Then you can launch the precompiled @file{ls} x86 executable: -+ -+@example -+qemu-i386 tests/i386/ls -+@end example -+You can look at @file{scripts/qemu-binfmt-conf.sh} so that -+QEMU is automatically launched by the Linux kernel when you try to -+launch x86 executables. It requires the @code{binfmt_misc} module in the -+Linux kernel. -+ -+@item The x86 version of QEMU is also included. You can try weird things such as: -+@example -+qemu-i386 /usr/local/qemu-i386/bin/qemu-i386 \ -+ /usr/local/qemu-i386/bin/ls-i386 -+@end example -+ -+@end itemize -+ -+@node Wine launch -+@subsection Wine launch -+ -+@itemize -+ -+@item Ensure that you have a working QEMU with the x86 glibc -+distribution (see previous section). In order to verify it, you must be -+able to do: -+ -+@example -+qemu-i386 /usr/local/qemu-i386/bin/ls-i386 -+@end example -+ -+@item Download the binary x86 Wine install -+(@file{qemu-XXX-i386-wine.tar.gz} on the QEMU web page). -+ -+@item Configure Wine on your account. Look at the provided script -+@file{/usr/local/qemu-i386/@/bin/wine-conf.sh}. Your previous -+@code{$@{HOME@}/.wine} directory is saved to @code{$@{HOME@}/.wine.org}. -+ -+@item Then you can try the example @file{putty.exe}: -+ -+@example -+qemu-i386 /usr/local/qemu-i386/wine/bin/wine \ -+ /usr/local/qemu-i386/wine/c/Program\ Files/putty.exe -+@end example -+ -+@end itemize -+ -+@node Command line options -+@subsection Command line options -+ -+@example -+@command{qemu-i386} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-cpu} @var{model}] [@option{-g} @var{port}] [@option{-B} @var{offset}] [@option{-R} @var{size}] @var{program} [@var{arguments}...] -+@end example -+ -+@table @option -+@item -h -+Print the help -+@item -L path -+Set the x86 elf interpreter prefix (default=/usr/local/qemu-i386) -+@item -s size -+Set the x86 stack size in bytes (default=524288) -+@item -cpu model -+Select CPU model (-cpu help for list and additional feature selection) -+@item -E @var{var}=@var{value} -+Set environment @var{var} to @var{value}. -+@item -U @var{var} -+Remove @var{var} from the environment. -+@item -B offset -+Offset guest address by the specified number of bytes. This is useful when -+the address region required by guest applications is reserved on the host. -+This option is currently only supported on some hosts. -+@item -R size -+Pre-allocate a guest virtual address space of the given size (in bytes). -+"G", "M", and "k" suffixes may be used when specifying the size. -+@end table -+ -+Debug options: -+ -+@table @option -+@item -d item1,... -+Activate logging of the specified items (use '-d help' for a list of log items) -+@item -p pagesize -+Act as if the host page size was 'pagesize' bytes -+@item -g port -+Wait gdb connection to port -+@item -singlestep -+Run the emulation in single step mode. -+@end table -+ -+Environment variables: -+ -+@table @env -+@item QEMU_STRACE -+Print system calls and arguments similar to the 'strace' program -+(NOTE: the actual 'strace' program will not work because the user -+space emulator hasn't implemented ptrace). At the moment this is -+incomplete. All system calls that don't have a specific argument -+format are printed with information for six arguments. Many -+flag-style arguments don't have decoders and will show up as numbers. -+@end table -+ -+@node Other binaries -+@subsection Other binaries -+ -+@cindex user mode (Alpha) -+@command{qemu-alpha} TODO. -+ -+@cindex user mode (ARM) -+@command{qemu-armeb} TODO. -+ -+@cindex user mode (ARM) -+@command{qemu-arm} is also capable of running ARM "Angel" semihosted ELF -+binaries (as implemented by the arm-elf and arm-eabi Newlib/GDB -+configurations), and arm-uclinux bFLT format binaries. -+ -+@cindex user mode (ColdFire) -+@cindex user mode (M68K) -+@command{qemu-m68k} is capable of running semihosted binaries using the BDM -+(m5xxx-ram-hosted.ld) or m68k-sim (sim.ld) syscall interfaces, and -+coldfire uClinux bFLT format binaries. -+ -+The binary format is detected automatically. -+ -+@cindex user mode (Cris) -+@command{qemu-cris} TODO. -+ -+@cindex user mode (i386) -+@command{qemu-i386} TODO. -+@command{qemu-x86_64} TODO. -+ -+@cindex user mode (Microblaze) -+@command{qemu-microblaze} TODO. -+ -+@cindex user mode (MIPS) -+@command{qemu-mips} executes 32-bit big endian MIPS binaries (MIPS O32 ABI). -+ -+@command{qemu-mipsel} executes 32-bit little endian MIPS binaries (MIPS O32 ABI). -+ -+@command{qemu-mips64} executes 64-bit big endian MIPS binaries (MIPS N64 ABI). -+ -+@command{qemu-mips64el} executes 64-bit little endian MIPS binaries (MIPS N64 ABI). -+ -+@command{qemu-mipsn32} executes 32-bit big endian MIPS binaries (MIPS N32 ABI). -+ -+@command{qemu-mipsn32el} executes 32-bit little endian MIPS binaries (MIPS N32 ABI). -+ -+@cindex user mode (NiosII) -+@command{qemu-nios2} TODO. -+ -+@cindex user mode (PowerPC) -+@command{qemu-ppc64abi32} TODO. -+@command{qemu-ppc64} TODO. -+@command{qemu-ppc} TODO. -+ -+@cindex user mode (SH4) -+@command{qemu-sh4eb} TODO. -+@command{qemu-sh4} TODO. -+ -+@cindex user mode (SPARC) -+@command{qemu-sparc} can execute Sparc32 binaries (Sparc32 CPU, 32 bit ABI). -+ -+@command{qemu-sparc32plus} can execute Sparc32 and SPARC32PLUS binaries -+(Sparc64 CPU, 32 bit ABI). -+ -+@command{qemu-sparc64} can execute some Sparc64 (Sparc64 CPU, 64 bit ABI) and -+SPARC32PLUS binaries (Sparc64 CPU, 32 bit ABI). -+ -+@node BSD User space emulator -+@section BSD User space emulator -+ -+@menu -+* BSD Status:: -+* BSD Quick Start:: -+* BSD Command line options:: -+@end menu -+ -+@node BSD Status -+@subsection BSD Status -+ -+@itemize @minus -+@item -+target Sparc64 on Sparc64: Some trivial programs work. -+@end itemize -+ -+@node BSD Quick Start -+@subsection Quick Start -+ -+In order to launch a BSD process, QEMU needs the process executable -+itself and all the target dynamic libraries used by it. -+ -+@itemize -+ -+@item On Sparc64, you can just try to launch any process by using the native -+libraries: -+ -+@example -+qemu-sparc64 /bin/ls -+@end example -+ -+@end itemize -+ -+@node BSD Command line options -+@subsection Command line options -+ -+@example -+@command{qemu-sparc64} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-bsd} @var{type}] @var{program} [@var{arguments}...] -+@end example -+ -+@table @option -+@item -h -+Print the help -+@item -L path -+Set the library root path (default=/) -+@item -s size -+Set the stack size in bytes (default=524288) -+@item -ignore-environment -+Start with an empty environment. Without this option, -+the initial environment is a copy of the caller's environment. -+@item -E @var{var}=@var{value} -+Set environment @var{var} to @var{value}. -+@item -U @var{var} -+Remove @var{var} from the environment. -+@item -bsd type -+Set the type of the emulated BSD Operating system. Valid values are -+FreeBSD, NetBSD and OpenBSD (default). -+@end table -+ -+Debug options: -+ -+@table @option -+@item -d item1,... -+Activate logging of the specified items (use '-d help' for a list of log items) -+@item -p pagesize -+Act as if the host page size was 'pagesize' bytes -+@item -singlestep -+Run the emulation in single step mode. -+@end table -+ -+@node System requirements -+@chapter System requirements -+ -+@section KVM kernel module -+ -+On x86_64 hosts, the default set of CPU features enabled by the KVM accelerator -+require the host to be running Linux v4.5 or newer. -+ -+The OpteronG[345] CPU models require KVM support for RDTSCP, which was -+added with Linux 4.5 which is supported by the major distros. And even -+if RHEL7 has kernel 3.10, KVM there has the required functionality there -+to make it close to a 4.5 or newer kernel. -+ -+@include docs/security.texi -+ -+@include qemu-tech.texi -+ -+@include qemu-deprecated.texi -+ -+@node Supported build platforms -+@appendix Supported build platforms -+ -+QEMU aims to support building and executing on multiple host OS platforms. -+This appendix outlines which platforms are the major build targets. These -+platforms are used as the basis for deciding upon the minimum required -+versions of 3rd party software QEMU depends on. The supported platforms -+are the targets for automated testing performed by the project when patches -+are submitted for review, and tested before and after merge. -+ -+If a platform is not listed here, it does not imply that QEMU won't work. -+If an unlisted platform has comparable software versions to a listed platform, -+there is every expectation that it will work. Bug reports are welcome for -+problems encountered on unlisted platforms unless they are clearly older -+vintage than what is described here. -+ -+Note that when considering software versions shipped in distros as support -+targets, QEMU considers only the version number, and assumes the features in -+that distro match the upstream release with the same version. In other words, -+if a distro backports extra features to the software in their distro, QEMU -+upstream code will not add explicit support for those backports, unless the -+feature is auto-detectable in a manner that works for the upstream releases -+too. -+ -+The Repology site @url{https://repology.org} is a useful resource to identify -+currently shipped versions of software in various operating systems, though -+it does not cover all distros listed below. -+ -+@section Linux OS -+ -+For distributions with frequent, short-lifetime releases, the project will -+aim to support all versions that are not end of life by their respective -+vendors. For the purposes of identifying supported software versions, the -+project will look at Fedora, Ubuntu, and openSUSE distros. Other short- -+lifetime distros will be assumed to ship similar software versions. -+ -+For distributions with long-lifetime releases, the project will aim to support -+the most recent major version at all times. Support for the previous major -+version will be dropped 2 years after the new major version is released, -+or when it reaches ``end of life''. For the purposes of identifying -+supported software versions, the project will look at RHEL, Debian, -+Ubuntu LTS, and SLES distros. Other long-lifetime distros will be -+assumed to ship similar software versions. -+ -+@section Windows -+ -+The project supports building with current versions of the MinGW toolchain, -+hosted on Linux. -+ -+@section macOS -+ -+The project supports building with the two most recent versions of macOS, with -+the current homebrew package set available. -+ -+@section FreeBSD -+ -+The project aims to support the all the versions which are not end of life. -+ -+@section NetBSD -+ -+The project aims to support the most recent major version at all times. Support -+for the previous major version will be dropped 2 years after the new major -+version is released. -+ -+@section OpenBSD -+ -+The project aims to support the all the versions which are not end of life. -+ -+@node License -+@appendix License -+ -+QEMU is a trademark of Fabrice Bellard. -+ -+QEMU is released under the -+@url{https://www.gnu.org/licenses/gpl-2.0.txt,GNU General Public License}, -+version 2. Parts of QEMU have specific licenses, see file -+@url{https://git.qemu.org/?p=qemu.git;a=blob_plain;f=LICENSE,LICENSE}. -+ -+@node Index -+@appendix Index -+@menu -+* Concept Index:: -+* Function Index:: -+* Keystroke Index:: -+* Program Index:: -+* Data Type Index:: -+* Variable Index:: -+@end menu -+ -+@node Concept Index -+@section Concept Index -+This is the main index. Should we combine all keywords in one index? TODO -+@printindex cp -+ -+@node Function Index -+@section Function Index -+This index could be used for command line options and monitor functions. -+@printindex fn -+ -+@node Keystroke Index -+@section Keystroke Index -+ -+This is a list of all keystrokes which have a special function -+in system emulation. -+ -+@printindex ky -+ -+@node Program Index -+@section Program Index -+@printindex pg -+ -+@node Data Type Index -+@section Data Type Index -+ -+This index could be used for qdev device names and options. -+ -+@printindex tp -+ -+@node Variable Index -+@section Variable Index -+@printindex vr -+ -+@bye +diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc +index 48d05aa..d74dbde 100644 +--- a/docs/defs.rst.inc ++++ b/docs/defs.rst.inc +@@ -9,7 +9,7 @@ + but the manpages will end up misrendered with following normal text + incorrectly in boldface. + +-.. |qemu_system| replace:: qemu-system-x86_64 +-.. |qemu_system_x86| replace:: qemu_system-x86_64 ++.. |qemu_system| replace:: qemu-kvm ++.. |qemu_system_x86| replace:: qemu-kvm + .. |I2C| replace:: I\ :sup:`2`\ C + .. |I2S| replace:: I\ :sup:`2`\ S +diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst +index 48afdc7..6650b2c 100644 +--- a/docs/interop/live-block-operations.rst ++++ b/docs/interop/live-block-operations.rst +@@ -129,7 +129,7 @@ To show some example invocations of command-line, we will use the + following invocation of QEMU, with a QMP server running over UNIX + socket:: + +- $ ./x86_64-softmmu/qemu-system-x86_64 -display none -no-user-config \ ++ $ qemu-kvm -display none -no-user-config \ + -M q35 -nodefaults -m 512 \ + -blockdev node-name=node-A,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./a.qcow2 \ + -device virtio-blk,drive=node-A,id=virtio0 \ +@@ -694,7 +694,7 @@ instance, with the following invocation. (As noted earlier, for + simplicity's sake, the destination QEMU is started on the same host, but + it could be located elsewhere):: + +- $ ./x86_64-softmmu/qemu-system-x86_64 -display none -no-user-config \ ++ $ qemu-kvm -display none -no-user-config \ + -M q35 -nodefaults -m 512 \ + -blockdev node-name=node-TargetDisk,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./target-disk.qcow2 \ + -device virtio-blk,drive=node-TargetDisk,id=virtio0 \ +diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst +index fb70445..0d9a783 100644 +--- a/docs/tools/qemu-trace-stap.rst ++++ b/docs/tools/qemu-trace-stap.rst +@@ -45,19 +45,19 @@ The following commands are valid: + any of the listed names. If no *PATTERN* is given, the all possible + probes will be listed. + +- For example, to list all probes available in the ``qemu-system-x86_64`` ++ For example, to list all probes available in the ``qemu-kvm`` + binary: + + :: + +- $ qemu-trace-stap list qemu-system-x86_64 ++ $ qemu-trace-stap list qemu-kvm + + To filter the list to only cover probes related to QEMU's cryptographic + subsystem, in a binary outside ``$PATH`` + + :: + +- $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-system-x86_64 'qcrypto*' ++ $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-kvm 'qcrypto*' + + .. option:: run OPTIONS BINARY PATTERN... + +@@ -89,18 +89,18 @@ The following commands are valid: + Restrict the tracing session so that it only triggers for the process + identified by *PID*. + +- For example, to monitor all processes executing ``qemu-system-x86_64`` ++ For example, to monitor all processes executing ``qemu-kvm`` + as found on ``$PATH``, displaying all I/O related probes: + + :: + +- $ qemu-trace-stap run qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run qemu-kvm 'qio*' + + To monitor only the QEMU process with PID 1732 + + :: + +- $ qemu-trace-stap run --pid=1732 qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run --pid=1732 qemu-kvm 'qio*' + + To monitor QEMU processes running an alternative binary outside of + ``$PATH``, displaying verbose information about setup of the +@@ -108,7 +108,7 @@ The following commands are valid: + + :: + +- $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-kvm 'qio*' + + See also + -------- +diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst +index 824e713..8449936 100644 +--- a/docs/tools/virtiofsd.rst ++++ b/docs/tools/virtiofsd.rst +@@ -116,7 +116,7 @@ Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket + :: + + host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001 +- host# qemu-system-x86_64 \ ++ host# qemu-kvm \ + -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \ + -device vhost-user-fs-pci,chardev=char0,tag=myfs \ + -object memory-backend-memfd,id=mem,size=4G,share=on \ diff --git a/qemu-options.hx b/qemu-options.hx -index 43b0dbc..9b14fe8 100644 +index b969944..fe85a0e 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2887,11 +2887,11 @@ SRST +@@ -2891,11 +2891,11 @@ SRST :: @@ -3705,13 +153,13 @@ index 43b0dbc..9b14fe8 100644 - -netdev type=vhost-user,id=net0,chardev=chr0 \ - -device virtio-net-pci,netdev=net0 + qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -+ -numa node,memdev=mem \ -+ -chardev socket,id=chr0,path=/path/to/socket \ -+ -netdev type=vhost-user,id=net0,chardev=chr0 \ -+ -device virtio-net-pci,netdev=net0 ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 - ``-netdev hubport,id=id,hubid=hubid[,netdev=nd]`` - Create a hub port on the emulated hub with ID hubid. + ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` + Establish a vhost-vdpa netdev. -- 1.8.3.1 diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch index 4f5c1fc..81a8467 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From 6a0ad69ff5249d19e497ea902cd456c442febb80 Mon Sep 17 00:00:00 2001 +From 1cd3f04262da39a69509435c1db96c0c2a8ebd62 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 738713e..63a6d18 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From fed8a416435347899223bbfc260e57d8f8ddd20a Mon Sep 17 00:00:00 2001 +From 766fb0162dea8353a39f6eeff5ba90309cb0338e Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -45,7 +45,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 9b72094..71614b4 100644 +index b497752..d3a64a6 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -814,6 +814,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 32a70ef..2534964 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 00aeb546c0f989cf0c4a9623bbac9b187b051d68 Mon Sep 17 00:00:00 2001 +From ff6425248e516c678db5bb85d59b5811c48bedaf Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts diff --git a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index d0a3bd9..cc7fdca 100644 --- a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,4 +1,4 @@ -From 79b1a539f271440baa5d3ef4264c761175ca1c9d Mon Sep 17 00:00:00 2001 +From 2ce0d065e712fdfae74a52cfa5188791eaa7f848 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 Subject: block: Versioned x-blockdev-reopen API with feature flag @@ -29,7 +29,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json -index 0e1c6a5..6d94f92 100644 +index b20332e..db4544d 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4135,10 +4135,17 @@ diff --git a/0022-virtio-net-fix-removal-of-failover-device.patch b/0022-virtio-net-fix-removal-of-failover-device.patch new file mode 100644 index 0000000..8a2b5be --- /dev/null +++ b/0022-virtio-net-fix-removal-of-failover-device.patch @@ -0,0 +1,52 @@ +From 0e4d9f0332efd8417831815a414a5131f85e0a85 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Fri, 3 Jul 2020 12:37:05 -0400 +Subject: virtio-net: fix removal of failover device + +RH-Author: Juan Quintela +Message-id: <20200703123705.7175-2-quintela@redhat.com> +Patchwork-id: 97901 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] virtio-net: fix removal of failover device +Bugzilla: 1820120 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Laurent Vivier +RH-Acked-by: Dr. David Alan Gilbert + +If you have a networking device and its virtio failover device, and +you remove them in this order: +- virtio device +- the real device + +You get qemu crash. +See bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1820120 + +Bug exist on qemu 4.2 and 5.0. +But in 5.0 don't shows because commit +77b06bba62034a87cc61a9c8de1309ae3e527d97 + +somehow papers over it. + +CC: Jason Wang +CC: Michael S. Tsirkin + +Signed-off-by: Juan Quintela +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 1596cb1..f82455b 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3417,6 +3417,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) + g_free(n->vlans); + + if (n->failover) { ++ device_listener_unregister(&n->primary_listener); + g_free(n->primary_device_id); + g_free(n->standby_id); + qobject_unref(n->primary_device_dict); +-- +1.8.3.1 + diff --git a/0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch b/0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch similarity index 93% rename from 0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch rename to 0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch index c3e67db..3301702 100644 --- a/0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch +++ b/0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch @@ -1,6 +1,6 @@ -From c3b099b389455b919b4b22011ed2fa3e7fd49510 Mon Sep 17 00:00:00 2001 +From 34817c44ee8e467df7e9f92b5fffa5679172bb58 Mon Sep 17 00:00:00 2001 From: David Gibson -Date: Wed, 1 Jul 2020 12:24:02 +0200 +Date: Wed, 8 Jul 2020 10:02:25 +0200 Subject: RHEL-only: Enable vTPM for POWER in downstream configs RH-Author: David Gibson diff --git a/0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch b/0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch similarity index 93% rename from 0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch rename to 0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch index 1ae7b1a..5924eed 100644 --- a/0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch +++ b/0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch @@ -1,6 +1,6 @@ -From 7fd82f454755e0c7c68faac76a156c9ddb322124 Mon Sep 17 00:00:00 2001 +From 0de024660bb9c42ca87fd179b6a4122c3a9e0eb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 1 Jul 2020 12:24:02 +0200 +Date: Wed, 8 Jul 2020 10:02:25 +0200 Subject: redhat: fix 5.0 rebase missing ISA TPM TIS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 diff --git a/0025-redhat-define-hw_compat_8_2.patch b/0026-redhat-define-hw_compat_8_2.patch similarity index 91% rename from 0025-redhat-define-hw_compat_8_2.patch rename to 0026-redhat-define-hw_compat_8_2.patch index e565300..eb1a0a1 100644 --- a/0025-redhat-define-hw_compat_8_2.patch +++ b/0026-redhat-define-hw_compat_8_2.patch @@ -1,6 +1,6 @@ -From 3be5878b68235837729f452f0940105505bf4a55 Mon Sep 17 00:00:00 2001 +From 43a29be3b4f2186441067a2f5cd45d4e6035f206 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" -Date: Wed, 1 Jul 2020 12:24:02 +0200 +Date: Wed, 8 Jul 2020 10:02:25 +0200 Subject: redhat: define hw_compat_8_2 RH-Author: Dr. David Alan Gilbert @@ -8,6 +8,7 @@ Message-id: <20200619154227.23845-2-dgilbert@redhat.com> Patchwork-id: 97662 O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 1/2] redhat: define hw_compat_8_2 Bugzilla: 1842902 +RH-Acked-by: Eduardo Habkost RH-Acked-by: Laurent Vivier RH-Acked-by: Stefan Hajnoczi @@ -63,10 +64,10 @@ index ef6b320..b837399 100644 */ GlobalProperty hw_compat_rhel_8_1[] = { diff --git a/include/hw/boards.h b/include/hw/boards.h -index 24c1348..26ac91d 100644 +index c357731..f918a15 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -374,6 +374,9 @@ extern const size_t hw_compat_2_2_len; +@@ -375,6 +375,9 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; diff --git a/0026-x86-Add-8.3.0-x86_64-machine-type.patch b/0027-x86-Add-8.3.0-x86_64-machine-type.patch similarity index 88% rename from 0026-x86-Add-8.3.0-x86_64-machine-type.patch rename to 0027-x86-Add-8.3.0-x86_64-machine-type.patch index 1b490ad..01d528e 100644 --- a/0026-x86-Add-8.3.0-x86_64-machine-type.patch +++ b/0027-x86-Add-8.3.0-x86_64-machine-type.patch @@ -1,6 +1,6 @@ -From 16e1749209e7df15f7ce12418886117c2259dee7 Mon Sep 17 00:00:00 2001 +From 9fc44f5b2b271ea3337f8e5eae3bc3d2f9d857f3 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" -Date: Wed, 1 Jul 2020 12:24:02 +0200 +Date: Wed, 8 Jul 2020 10:02:25 +0200 Subject: x86: Add 8.3.0 x86_64 machine type RH-Author: Dr. David Alan Gilbert @@ -8,6 +8,7 @@ Message-id: <20200619154227.23845-3-dgilbert@redhat.com> Patchwork-id: 97663 O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 2/2] x86: Add 8.3.0 x86_64 machine type Bugzilla: 1842902 +RH-Acked-by: Eduardo Habkost RH-Acked-by: Laurent Vivier RH-Acked-by: Stefan Hajnoczi @@ -25,10 +26,10 @@ Signed-off-by: Miroslav Rezanina 4 files changed, 35 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index d17d6f8..aaf3ef4 100644 +index fe37bde..94c857e 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -359,6 +359,12 @@ GlobalProperty pc_rhel_compat[] = { +@@ -360,6 +360,12 @@ GlobalProperty pc_rhel_compat[] = { }; const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); @@ -42,10 +43,10 @@ index d17d6f8..aaf3ef4 100644 GlobalProperty pc_rhel_8_1_compat[] = { }; const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index b195f26..c951107 100644 +index eeadd89..f14ddcb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -1027,6 +1027,10 @@ static void pc_machine_rhel760_options(MachineClass *m) +@@ -1028,6 +1028,10 @@ static void pc_machine_rhel760_options(MachineClass *m) m->smbus_no_migration_support = true; pcmc->pvh_enabled = false; pcmc->default_cpu_version = CPU_VERSION_LEGACY; @@ -57,10 +58,10 @@ index b195f26..c951107 100644 compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 6c49a50..ef0b6e3 100644 +index 1cd4e15..b9e8dcb 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -589,6 +589,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) +@@ -590,6 +590,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); } @@ -84,7 +85,7 @@ index 6c49a50..ef0b6e3 100644 static void pc_q35_init_rhel820(MachineState *machine) { pc_q35_init(machine); -@@ -599,8 +616,13 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) +@@ -600,8 +617,13 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_machine_rhel_options(m); m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; diff --git a/0027-hw-arm-Changes-to-rhel820-machine.patch b/0028-hw-arm-Changes-to-rhel820-machine.patch similarity index 91% rename from 0027-hw-arm-Changes-to-rhel820-machine.patch rename to 0028-hw-arm-Changes-to-rhel820-machine.patch index 842d73b..419610d 100644 --- a/0027-hw-arm-Changes-to-rhel820-machine.patch +++ b/0028-hw-arm-Changes-to-rhel820-machine.patch @@ -1,6 +1,6 @@ -From b3f4822d380a046220749314c9a05cdb0d5d2718 Mon Sep 17 00:00:00 2001 +From 2717baf34693fc9aa5fa7d1f2a5e8eb1677c1bc2 Mon Sep 17 00:00:00 2001 From: Gavin Shan -Date: Wed, 1 Jul 2020 12:24:02 +0200 +Date: Wed, 8 Jul 2020 10:02:25 +0200 Subject: hw/arm: Changes to rhel820 machine RH-Author: Gavin Shan @@ -24,7 +24,7 @@ Signed-off-by: Miroslav Rezanina 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index abbc7d5..07c5c14 100644 +index 5a45677..53f02e0 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2090,6 +2090,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, @@ -43,7 +43,7 @@ index abbc7d5..07c5c14 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2727,12 +2729,8 @@ static void rhel820_virt_instance_init(Object *obj) +@@ -2760,12 +2762,8 @@ static void rhel820_virt_instance_init(Object *obj) object_property_set_description(obj, "highmem", "Set on/off to enable/disable using " "physical address space above 32 bits"); @@ -58,7 +58,7 @@ index abbc7d5..07c5c14 100644 object_property_add_str(obj, "gic-version", virt_get_gic_version, virt_set_gic_version); object_property_set_description(obj, "gic-version", -@@ -2759,12 +2757,9 @@ static void rhel820_virt_instance_init(Object *obj) +@@ -2792,12 +2790,9 @@ static void rhel820_virt_instance_init(Object *obj) object_property_set_description(obj, "iommu", "Set the IOMMU type. " "Valid values are none and smmuv3"); diff --git a/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch b/0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch similarity index 84% rename from 0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch rename to 0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch index 20ee03d..c982e43 100644 --- a/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch +++ b/0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch @@ -1,6 +1,6 @@ -From 75c705bcd0e6adb903889dd031c6f867a1ca7a63 Mon Sep 17 00:00:00 2001 +From 91449aaf4ad482d9208c8861f549f7fe58af02ac Mon Sep 17 00:00:00 2001 From: Gavin Shan -Date: Wed, 1 Jul 2020 12:24:02 +0200 +Date: Wed, 8 Jul 2020 10:02:25 +0200 Subject: hw/arm: Introduce rhel_virt_instance_init() helper RH-Author: Gavin Shan @@ -21,10 +21,10 @@ Signed-off-by: Miroslav Rezanina 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 07c5c14..c1bc866 100644 +index 53f02e0..f26ae9f 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -2713,7 +2713,7 @@ static void rhel_machine_init(void) +@@ -2746,7 +2746,7 @@ static void rhel_machine_init(void) } type_init(rhel_machine_init); @@ -33,7 +33,7 @@ index 07c5c14..c1bc866 100644 { VirtMachineState *vms = VIRT_MACHINE(obj); VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); -@@ -2764,6 +2764,11 @@ static void rhel820_virt_instance_init(Object *obj) +@@ -2797,6 +2797,11 @@ static void rhel820_virt_instance_init(Object *obj) virt_flash_create(vms); } diff --git a/0029-hw-arm-Add-rhel830-machine-type.patch b/0030-hw-arm-Add-rhel830-machine-type.patch similarity index 85% rename from 0029-hw-arm-Add-rhel830-machine-type.patch rename to 0030-hw-arm-Add-rhel830-machine-type.patch index 7517842..748a6ba 100644 --- a/0029-hw-arm-Add-rhel830-machine-type.patch +++ b/0030-hw-arm-Add-rhel830-machine-type.patch @@ -1,6 +1,6 @@ -From c0c64a417f65d388526c62b2d82f29fc4f5aed76 Mon Sep 17 00:00:00 2001 +From afad5b1cdd3d29cd497cb9987fca3009fa352d40 Mon Sep 17 00:00:00 2001 From: Gavin Shan -Date: Wed, 1 Jul 2020 12:24:02 +0200 +Date: Wed, 8 Jul 2020 10:02:25 +0200 Subject: hw/arm: Add rhel830 machine type RH-Author: Gavin Shan @@ -21,10 +21,10 @@ Signed-off-by: Miroslav Rezanina 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index c1bc866..48af222 100644 +index f26ae9f..f1553f3 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -2764,6 +2764,17 @@ static void rhel_virt_instance_init(Object *obj) +@@ -2797,6 +2797,17 @@ static void rhel_virt_instance_init(Object *obj) virt_flash_create(vms); } @@ -42,7 +42,7 @@ index c1bc866..48af222 100644 static void rhel820_virt_instance_init(Object *obj) { rhel_virt_instance_init(obj); -@@ -2771,6 +2782,8 @@ static void rhel820_virt_instance_init(Object *obj) +@@ -2804,6 +2815,8 @@ static void rhel820_virt_instance_init(Object *obj) static void rhel820_virt_options(MachineClass *mc) { diff --git a/0031-redhat-define-pseries-rhel8.3.0-machine-type.patch b/0031-redhat-define-pseries-rhel8.3.0-machine-type.patch new file mode 100644 index 0000000..b6d46d5 --- /dev/null +++ b/0031-redhat-define-pseries-rhel8.3.0-machine-type.patch @@ -0,0 +1,74 @@ +From 3ccb92293ca895bc52907b36c8d2e8b6936ed975 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 8 Jul 2020 10:04:09 +0200 +Subject: redhat: define pseries-rhel8.3.0 machine type + +RH-Author: Laurent Vivier +Message-id: <20200706104117.219174-3-lvivier@redhat.com> +Patchwork-id: 97904 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 2/2] redhat: define pseries-rhel8.3.0 machine type +Bugzilla: 1853265 + +Note: rebase to qemu-5.1 introduces + + 32a354dc6c07 ("numa: forbid '-numa node, mem' for 5.1 and newer machine types") + +and so '-numa node, mem' will not be available with pseries-rhel8.3.0 + +Signed-off-by: Laurent Vivier +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/spapr.c | 30 ++++++++++++++++++++++++++++-- + 1 file changed, 28 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index f30618e..dc1e9cb 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4990,15 +4990,41 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); + #endif + + /* ++ * pseries-rhel8.3.0 ++ * like pseries-5.1 ++ */ ++ ++static void spapr_machine_rhel830_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); ++ ++/* + * pseries-rhel8.2.0 ++ * like pseries-4.2 + pseries-5.0 ++ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 + */ + + static void spapr_machine_rhel820_class_options(MachineClass *mc) + { +- /* Defaults for the latest behaviour inherited from the base class */ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel830_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; ++ smc->rma_limit = 16 * GiB; ++ mc->nvdimm_supported = false; ++ ++ /* from pseries-5.0 */ ++ mc->numa_mem_supported = true; + } + +-DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", true); ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); + + /* + * pseries-rhel8.1.0 +-- +1.8.3.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 30d1c07..1d955dd 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.0.0 -Release: 0%{?dist}.wrb200701 +Release: 1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -123,13 +123,15 @@ Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0021: 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -Patch0023: 0023-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch -Patch0024: 0024-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch -Patch0025: 0025-redhat-define-hw_compat_8_2.patch -Patch0026: 0026-x86-Add-8.3.0-x86_64-machine-type.patch -Patch0027: 0027-hw-arm-Changes-to-rhel820-machine.patch -Patch0028: 0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch -Patch0029: 0029-hw-arm-Add-rhel830-machine-type.patch +Patch0022: 0022-virtio-net-fix-removal-of-failover-device.patch +Patch0024: 0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch +Patch0025: 0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch +Patch0026: 0026-redhat-define-hw_compat_8_2.patch +Patch0027: 0027-x86-Add-8.3.0-x86_64-machine-type.patch +Patch0028: 0028-hw-arm-Changes-to-rhel820-machine.patch +Patch0029: 0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch +Patch0030: 0030-hw-arm-Add-rhel830-machine-type.patch +Patch0031: 0031-redhat-define-pseries-rhel8.3.0-machine-type.patch BuildRequires: wget BuildRequires: rpm-build @@ -1081,6 +1083,19 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Jul 14 2020 Danilo Cesar Lemes de Paula - 5.0.0-1.el8 +- This is an interational rebase, based on the weekly rebase. +- Altough not official yet, It contains fixes for the following BZs: +- Resolves: bz#1781911 +- Resolves: bz#1841529 +- Resolves: bz#1842902 +- Resolves: bz#1818843 + +* Tue Jul 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-29.el8 +- kvm-virtio-net-fix-removal-of-failover-device.patch [bz#1820120] +- Resolves: bz#1820120 + (After hotunplugging the vitrio device and netdev, hotunpluging the failover VF will cause qemu core dump) + * Sun Jun 28 2020 Danilo Cesar Lemes de Paula - 4.2.0-28.el8 - kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch [bz#1812765] - kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch [bz#1812765] diff --git a/sources b/sources index ccf796d..13799b0 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-5.0.0.tar.xz) = d39a728aac20baa56eaa02afb456cff0b220180682f922602428fd45b566e2fb9944142207cb56db68149110df79720137fe1f84d79c0b266b8b23c6eca909e3 +SHA512 (qemu-5.0.0.tar.xz) = 2011fc15747d9d8effcf0af4e1e3af6440eaf801c27948a8bdf97d0cb33cf99ac380f828c1aee02e55e2c2c6c674150a264ce025c99642c8f974fda34be285cd From aaa55b9cc85c0a3505184d9defdecc44f64784ef Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 15 Jul 2020 16:28:51 -0400 Subject: [PATCH 082/195] syncronize with exploded tree --- ...at-Adding-slirp-to-the-exploded-tree.patch | 226 +++++++++--------- 0005-Initial-redhat-build.patch | 14 +- 0006-Enable-disable-devices-for-RHEL.patch | 84 +++---- ...Machine-type-related-general-changes.patch | 58 ++--- 0008-Add-aarch64-machine-types.patch | 12 +- 0009-Add-ppc64-machine-types.patch | 20 +- 0010-Add-s390x-machine-types.patch | 8 +- 0011-Add-x86_64-machine-types.patch | 28 ++- 0012-Enable-make-check.patch | 24 +- ...mber-of-devices-that-can-be-assigned.patch | 7 +- ...Add-support-statement-to-help-output.patch | 5 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 8 +- 0016-Add-support-for-simpletrace.patch | 16 +- ...documentation-instead-of-qemu-system.patch | 14 +- 0018-usb-xhci-Fix-PCI-capability-order.patch | 5 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 4 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 4 +- ...x-blockdev-reopen-API-with-feature-f.patch | 4 +- ...o-net-fix-removal-of-failover-device.patch | 4 +- ...-vTPM-for-POWER-in-downstream-config.patch | 13 +- ...t-fix-5.0-rebase-missing-ISA-TPM-TIS.patch | 12 +- 0026-redhat-define-hw_compat_8_2.patch | 21 +- 0027-x86-Add-8.3.0-x86_64-machine-type.patch | 16 +- 0028-hw-arm-Changes-to-rhel820-machine.patch | 78 ------ ...oduce-rhel_virt_instance_init-helper.patch | 50 ---- 0030-hw-arm-Add-rhel830-machine-type.patch | 58 ----- ...efine-pseries-rhel8.3.0-machine-type.patch | 74 ------ kvm.modules | 18 ++ qemu-kvm.spec | 13 +- 29 files changed, 340 insertions(+), 558 deletions(-) delete mode 100644 0028-hw-arm-Changes-to-rhel820-machine.patch delete mode 100644 0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch delete mode 100644 0030-hw-arm-Add-rhel830-machine-type.patch delete mode 100644 0031-redhat-define-pseries-rhel8.3.0-machine-type.patch create mode 100644 kvm.modules diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch index 632d69e..60c33f1 100644 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -48,77 +48,79 @@ Signed-off-by: Danilo C. L. de Paula Rebase notes (5.0.0-rc4): - Update slirp directory to commit 2faae0f778 (used upstream) + +Signed-off-by: Danilo C. L. de Paula --- .gitmodules | 3 - slirp/.clang-format | 58 ++ slirp/.gitignore | 10 + slirp/.gitlab-ci.yml | 27 + slirp/.gitpublish | 3 + - slirp/CHANGELOG.md | 88 +++ + slirp/CHANGELOG.md | 88 ++ slirp/COPYRIGHT | 62 ++ slirp/Makefile | 62 ++ slirp/README.md | 60 ++ slirp/build-aux/git-version-gen | 158 ++++ slirp/build-aux/meson-dist | 16 + - slirp/meson.build | 134 ++++ - slirp/src/arp_table.c | 91 +++ - slirp/src/bootp.c | 369 ++++++++++ - slirp/src/bootp.h | 129 ++++ - slirp/src/cksum.c | 179 +++++ - slirp/src/debug.h | 51 ++ - slirp/src/dhcpv6.c | 224 ++++++ + slirp/meson.build | 134 +++ + slirp/src/arp_table.c | 91 ++ + slirp/src/bootp.c | 369 ++++++++ + slirp/src/bootp.h | 129 +++ + slirp/src/cksum.c | 179 ++++ + slirp/src/debug.h | 51 + + slirp/src/dhcpv6.c | 224 +++++ slirp/src/dhcpv6.h | 68 ++ - slirp/src/dnssearch.c | 306 ++++++++ - slirp/src/if.c | 213 ++++++ + slirp/src/dnssearch.c | 306 ++++++ + slirp/src/if.c | 213 +++++ slirp/src/if.h | 25 + - slirp/src/ip.h | 242 ++++++ - slirp/src/ip6.h | 214 ++++++ - slirp/src/ip6_icmp.c | 434 +++++++++++ - slirp/src/ip6_icmp.h | 219 ++++++ + slirp/src/ip.h | 242 +++++ + slirp/src/ip6.h | 214 +++++ + slirp/src/ip6_icmp.c | 434 +++++++++ + slirp/src/ip6_icmp.h | 219 +++++ slirp/src/ip6_input.c | 78 ++ slirp/src/ip6_output.c | 39 + - slirp/src/ip_icmp.c | 489 +++++++++++++ - slirp/src/ip_icmp.h | 166 +++++ - slirp/src/ip_input.c | 461 ++++++++++++ - slirp/src/ip_output.c | 169 +++++ + slirp/src/ip_icmp.c | 489 ++++++++++ + slirp/src/ip_icmp.h | 166 ++++ + slirp/src/ip_input.c | 461 +++++++++ + slirp/src/ip_output.c | 169 ++++ slirp/src/libslirp-version.h.in | 24 + - slirp/src/libslirp.h | 171 +++++ + slirp/src/libslirp.h | 171 ++++ slirp/src/libslirp.map | 30 + slirp/src/main.h | 16 + - slirp/src/mbuf.c | 224 ++++++ - slirp/src/mbuf.h | 127 ++++ - slirp/src/misc.c | 390 ++++++++++ + slirp/src/mbuf.c | 224 +++++ + slirp/src/mbuf.h | 127 +++ + slirp/src/misc.c | 390 ++++++++ slirp/src/misc.h | 72 ++ - slirp/src/ncsi-pkt.h | 445 +++++++++++ - slirp/src/ncsi.c | 192 +++++ - slirp/src/ndp_table.c | 87 +++ - slirp/src/sbuf.c | 168 +++++ + slirp/src/ncsi-pkt.h | 445 +++++++++ + slirp/src/ncsi.c | 192 ++++ + slirp/src/ndp_table.c | 87 ++ + slirp/src/sbuf.c | 168 ++++ slirp/src/sbuf.h | 27 + - slirp/src/slirp.c | 1185 ++++++++++++++++++++++++++++++ - slirp/src/slirp.h | 283 +++++++ - slirp/src/socket.c | 957 ++++++++++++++++++++++++ - slirp/src/socket.h | 164 +++++ - slirp/src/state.c | 379 ++++++++++ + slirp/src/slirp.c | 1185 ++++++++++++++++++++++++ + slirp/src/slirp.h | 283 ++++++ + slirp/src/socket.c | 957 +++++++++++++++++++ + slirp/src/socket.h | 164 ++++ + slirp/src/state.c | 379 ++++++++ slirp/src/stream.c | 120 +++ slirp/src/stream.h | 35 + - slirp/src/tcp.h | 169 +++++ - slirp/src/tcp_input.c | 1539 +++++++++++++++++++++++++++++++++++++++ - slirp/src/tcp_output.c | 516 +++++++++++++ - slirp/src/tcp_subr.c | 980 +++++++++++++++++++++++++ - slirp/src/tcp_timer.c | 286 ++++++++ - slirp/src/tcp_timer.h | 130 ++++ + slirp/src/tcp.h | 169 ++++ + slirp/src/tcp_input.c | 1539 +++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 +++++++++++ + slirp/src/tcp_subr.c | 980 ++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++ + slirp/src/tcp_timer.h | 130 +++ slirp/src/tcp_var.h | 161 ++++ slirp/src/tcpip.h | 104 +++ - slirp/src/tftp.c | 462 ++++++++++++ + slirp/src/tftp.c | 462 ++++++++++ slirp/src/tftp.h | 52 ++ - slirp/src/udp.c | 361 +++++++++ - slirp/src/udp.h | 90 +++ - slirp/src/udp6.c | 173 +++++ - slirp/src/util.c | 428 +++++++++++ - slirp/src/util.h | 189 +++++ + slirp/src/udp.c | 361 ++++++++ + slirp/src/udp.h | 90 ++ + slirp/src/udp6.c | 173 ++++ + slirp/src/util.c | 428 +++++++++ + slirp/src/util.h | 189 ++++ slirp/src/version.c | 8 + - slirp/src/vmstate.c | 444 +++++++++++ - slirp/src/vmstate.h | 391 ++++++++++ + slirp/src/vmstate.c | 444 +++++++++ + slirp/src/vmstate.h | 391 ++++++++ 70 files changed, 16423 insertions(+), 3 deletions(-) create mode 100644 slirp/.clang-format create mode 100644 slirp/.gitignore @@ -192,7 +194,7 @@ Rebase notes (5.0.0-rc4): diff --git a/slirp/.clang-format b/slirp/.clang-format new file mode 100644 -index 0000000..17fb49f +index 0000000000..17fb49fe65 --- /dev/null +++ b/slirp/.clang-format @@ -0,0 +1,58 @@ @@ -256,7 +258,7 @@ index 0000000..17fb49f +... diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md new file mode 100644 -index 0000000..67b0a74 +index 0000000000..67b0a74195 --- /dev/null +++ b/slirp/CHANGELOG.md @@ -0,0 +1,88 @@ @@ -350,7 +352,7 @@ index 0000000..67b0a74 +[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT new file mode 100644 -index 0000000..ed49512 +index 0000000000..ed49512dbc --- /dev/null +++ b/slirp/COPYRIGHT @@ -0,0 +1,62 @@ @@ -418,7 +420,7 @@ index 0000000..ed49512 +copyrights. diff --git a/slirp/Makefile b/slirp/Makefile new file mode 100644 -index 0000000..8857b41 +index 0000000000..8857b4159b --- /dev/null +++ b/slirp/Makefile @@ -0,0 +1,62 @@ @@ -486,7 +488,7 @@ index 0000000..8857b41 +-include $(DEPS) diff --git a/slirp/README.md b/slirp/README.md new file mode 100644 -index 0000000..dc11e5f +index 0000000000..dc11e5f18b --- /dev/null +++ b/slirp/README.md @@ -0,0 +1,60 @@ @@ -552,7 +554,7 @@ index 0000000..dc11e5f +See the [COPYRIGHT](COPYRIGHT) file for details. diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen new file mode 100755 -index 0000000..5617eb8 +index 0000000000..5617eb8d4e --- /dev/null +++ b/slirp/build-aux/git-version-gen @@ -0,0 +1,158 @@ @@ -716,7 +718,7 @@ index 0000000..5617eb8 +# End: diff --git a/slirp/build-aux/meson-dist b/slirp/build-aux/meson-dist new file mode 100755 -index 0000000..80d534f +index 0000000000..80d534fec6 --- /dev/null +++ b/slirp/build-aux/meson-dist @@ -0,0 +1,16 @@ @@ -738,7 +740,7 @@ index 0000000..80d534f +echo "$1" > "$MESON_DIST_ROOT/.tarball-version" diff --git a/slirp/meson.build b/slirp/meson.build new file mode 100644 -index 0000000..3a27149 +index 0000000000..3a27149373 --- /dev/null +++ b/slirp/meson.build @@ -0,0 +1,134 @@ @@ -878,7 +880,7 @@ index 0000000..3a27149 +) diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c new file mode 100644 -index 0000000..054fbf5 +index 0000000000..054fbf5e10 --- /dev/null +++ b/slirp/src/arp_table.c @@ -0,0 +1,91 @@ @@ -975,7 +977,7 @@ index 0000000..054fbf5 +} diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c new file mode 100644 -index 0000000..46e9681 +index 0000000000..46e96810ab --- /dev/null +++ b/slirp/src/bootp.c @@ -0,0 +1,369 @@ @@ -1350,7 +1352,7 @@ index 0000000..46e9681 +} diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h new file mode 100644 -index 0000000..a57fa51 +index 0000000000..a57fa51bcb --- /dev/null +++ b/slirp/src/bootp.h @@ -0,0 +1,129 @@ @@ -1485,7 +1487,7 @@ index 0000000..a57fa51 +#endif diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c new file mode 100644 -index 0000000..4d08380 +index 0000000000..4d08380a4e --- /dev/null +++ b/slirp/src/cksum.c @@ -0,0 +1,179 @@ @@ -1670,7 +1672,7 @@ index 0000000..4d08380 +} diff --git a/slirp/src/debug.h b/slirp/src/debug.h new file mode 100644 -index 0000000..47712bd +index 0000000000..47712bd78b --- /dev/null +++ b/slirp/src/debug.h @@ -0,0 +1,51 @@ @@ -1727,7 +1729,7 @@ index 0000000..47712bd +#endif /* DEBUG_H_ */ diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c new file mode 100644 -index 0000000..77b451b +index 0000000000..77b451b910 --- /dev/null +++ b/slirp/src/dhcpv6.c @@ -0,0 +1,224 @@ @@ -1957,7 +1959,7 @@ index 0000000..77b451b +} diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h new file mode 100644 -index 0000000..d12c49b +index 0000000000..d12c49b36c --- /dev/null +++ b/slirp/src/dhcpv6.h @@ -0,0 +1,68 @@ @@ -2031,7 +2033,7 @@ index 0000000..d12c49b +#endif diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c new file mode 100644 -index 0000000..e8f14e3 +index 0000000000..e8f14e34bb --- /dev/null +++ b/slirp/src/dnssearch.c @@ -0,0 +1,306 @@ @@ -2343,7 +2345,7 @@ index 0000000..e8f14e3 +} diff --git a/slirp/src/if.c b/slirp/src/if.c new file mode 100644 -index 0000000..23190b5 +index 0000000000..23190b5593 --- /dev/null +++ b/slirp/src/if.c @@ -0,0 +1,213 @@ @@ -2562,7 +2564,7 @@ index 0000000..23190b5 +} diff --git a/slirp/src/if.h b/slirp/src/if.h new file mode 100644 -index 0000000..7cf9d27 +index 0000000000..7cf9d2750e --- /dev/null +++ b/slirp/src/if.h @@ -0,0 +1,25 @@ @@ -2593,7 +2595,7 @@ index 0000000..7cf9d27 +#endif diff --git a/slirp/src/ip.h b/slirp/src/ip.h new file mode 100644 -index 0000000..e5d4aa8 +index 0000000000..e5d4aa8a6d --- /dev/null +++ b/slirp/src/ip.h @@ -0,0 +1,242 @@ @@ -2841,7 +2843,7 @@ index 0000000..e5d4aa8 +#endif diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h new file mode 100644 -index 0000000..0630309 +index 0000000000..0630309d29 --- /dev/null +++ b/slirp/src/ip6.h @@ -0,0 +1,214 @@ @@ -3061,7 +3063,7 @@ index 0000000..0630309 +#endif diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c new file mode 100644 -index 0000000..28ec2be +index 0000000000..28ec2bee35 --- /dev/null +++ b/slirp/src/ip6_icmp.c @@ -0,0 +1,434 @@ @@ -3501,7 +3503,7 @@ index 0000000..28ec2be +} diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h new file mode 100644 -index 0000000..c37e60f +index 0000000000..c37e60f28d --- /dev/null +++ b/slirp/src/ip6_icmp.h @@ -0,0 +1,219 @@ @@ -3726,7 +3728,7 @@ index 0000000..c37e60f +#endif diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c new file mode 100644 -index 0000000..dfcbfd6 +index 0000000000..dfcbfd6a78 --- /dev/null +++ b/slirp/src/ip6_input.c @@ -0,0 +1,78 @@ @@ -3810,7 +3812,7 @@ index 0000000..dfcbfd6 +} diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c new file mode 100644 -index 0000000..b861106 +index 0000000000..b86110662c --- /dev/null +++ b/slirp/src/ip6_output.c @@ -0,0 +1,39 @@ @@ -3855,7 +3857,7 @@ index 0000000..b861106 +} diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c new file mode 100644 -index 0000000..fe0add4 +index 0000000000..fe0add438d --- /dev/null +++ b/slirp/src/ip_icmp.c @@ -0,0 +1,489 @@ @@ -4350,7 +4352,7 @@ index 0000000..fe0add4 +} diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h new file mode 100644 -index 0000000..84707db +index 0000000000..84707db247 --- /dev/null +++ b/slirp/src/ip_icmp.h @@ -0,0 +1,166 @@ @@ -4522,7 +4524,7 @@ index 0000000..84707db +#endif diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c new file mode 100644 -index 0000000..89a01d4 +index 0000000000..89a01d45aa --- /dev/null +++ b/slirp/src/ip_input.c @@ -0,0 +1,461 @@ @@ -4989,7 +4991,7 @@ index 0000000..89a01d4 +} diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c new file mode 100644 -index 0000000..22916a3 +index 0000000000..22916a37df --- /dev/null +++ b/slirp/src/ip_output.c @@ -0,0 +1,169 @@ @@ -5164,7 +5166,7 @@ index 0000000..22916a3 +} diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in new file mode 100644 -index 0000000..faa6c85 +index 0000000000..faa6c85952 --- /dev/null +++ b/slirp/src/libslirp-version.h.in @@ -0,0 +1,24 @@ @@ -5194,7 +5196,7 @@ index 0000000..faa6c85 +#endif /* LIBSLIRP_VERSION_H_ */ diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h new file mode 100644 -index 0000000..fb4c7e8 +index 0000000000..fb4c7e882c --- /dev/null +++ b/slirp/src/libslirp.h @@ -0,0 +1,171 @@ @@ -5371,7 +5373,7 @@ index 0000000..fb4c7e8 +#endif /* LIBSLIRP_H */ diff --git a/slirp/src/libslirp.map b/slirp/src/libslirp.map new file mode 100644 -index 0000000..72aab91 +index 0000000000..72aab912f4 --- /dev/null +++ b/slirp/src/libslirp.map @@ -0,0 +1,30 @@ @@ -5407,7 +5409,7 @@ index 0000000..72aab91 +} SLIRP_4.1; diff --git a/slirp/src/main.h b/slirp/src/main.h new file mode 100644 -index 0000000..3b3f883 +index 0000000000..3b3f883703 --- /dev/null +++ b/slirp/src/main.h @@ -0,0 +1,16 @@ @@ -5429,7 +5431,7 @@ index 0000000..3b3f883 +#endif diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c new file mode 100644 -index 0000000..54ec721 +index 0000000000..54ec721eb5 --- /dev/null +++ b/slirp/src/mbuf.c @@ -0,0 +1,224 @@ @@ -5659,7 +5661,7 @@ index 0000000..54ec721 +} diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h new file mode 100644 -index 0000000..546e785 +index 0000000000..546e7852c5 --- /dev/null +++ b/slirp/src/mbuf.h @@ -0,0 +1,127 @@ @@ -5792,7 +5794,7 @@ index 0000000..546e785 +#endif diff --git a/slirp/src/misc.c b/slirp/src/misc.c new file mode 100644 -index 0000000..e6bc0a2 +index 0000000000..e6bc0a207d --- /dev/null +++ b/slirp/src/misc.c @@ -0,0 +1,390 @@ @@ -6189,7 +6191,7 @@ index 0000000..e6bc0a2 \ No newline at end of file diff --git a/slirp/src/misc.h b/slirp/src/misc.h new file mode 100644 -index 0000000..81b370c +index 0000000000..81b370cfb1 --- /dev/null +++ b/slirp/src/misc.h @@ -0,0 +1,72 @@ @@ -6267,7 +6269,7 @@ index 0000000..81b370c +#endif diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h new file mode 100644 -index 0000000..7795ad8 +index 0000000000..7795ad83ee --- /dev/null +++ b/slirp/src/ncsi-pkt.h @@ -0,0 +1,445 @@ @@ -6718,7 +6720,7 @@ index 0000000..7795ad8 +#endif /* NCSI_PKT_H */ diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c new file mode 100644 -index 0000000..ddd980d +index 0000000000..ddd980d869 --- /dev/null +++ b/slirp/src/ncsi.c @@ -0,0 +1,192 @@ @@ -6916,7 +6918,7 @@ index 0000000..ddd980d +} diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c new file mode 100644 -index 0000000..110d6ea +index 0000000000..110d6ea0e4 --- /dev/null +++ b/slirp/src/ndp_table.c @@ -0,0 +1,87 @@ @@ -7009,7 +7011,7 @@ index 0000000..110d6ea +} diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c new file mode 100644 -index 0000000..2fb9176 +index 0000000000..2fb9176144 --- /dev/null +++ b/slirp/src/sbuf.c @@ -0,0 +1,168 @@ @@ -7183,7 +7185,7 @@ index 0000000..2fb9176 +} diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h new file mode 100644 -index 0000000..01886fb +index 0000000000..01886fbd01 --- /dev/null +++ b/slirp/src/sbuf.h @@ -0,0 +1,27 @@ @@ -7216,7 +7218,7 @@ index 0000000..01886fb +#endif diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c new file mode 100644 -index 0000000..14458e8 +index 0000000000..14458e8510 --- /dev/null +++ b/slirp/src/slirp.c @@ -0,0 +1,1185 @@ @@ -8407,7 +8409,7 @@ index 0000000..14458e8 +} diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h new file mode 100644 -index 0000000..32634bc +index 0000000000..32634bcc00 --- /dev/null +++ b/slirp/src/slirp.h @@ -0,0 +1,283 @@ @@ -8696,7 +8698,7 @@ index 0000000..32634bc +#endif diff --git a/slirp/src/socket.c b/slirp/src/socket.c new file mode 100644 -index 0000000..4cd9a64 +index 0000000000..4cd9a64b3c --- /dev/null +++ b/slirp/src/socket.c @@ -0,0 +1,957 @@ @@ -9659,7 +9661,7 @@ index 0000000..4cd9a64 +} diff --git a/slirp/src/socket.h b/slirp/src/socket.h new file mode 100644 -index 0000000..a6a1e5e +index 0000000000..a6a1e5e214 --- /dev/null +++ b/slirp/src/socket.h @@ -0,0 +1,164 @@ @@ -9829,7 +9831,7 @@ index 0000000..a6a1e5e +#endif /* SLIRP_SOCKET_H */ diff --git a/slirp/src/state.c b/slirp/src/state.c new file mode 100644 -index 0000000..22af77b +index 0000000000..22af77b256 --- /dev/null +++ b/slirp/src/state.c @@ -0,0 +1,379 @@ @@ -10214,7 +10216,7 @@ index 0000000..22af77b +} diff --git a/slirp/src/stream.c b/slirp/src/stream.c new file mode 100644 -index 0000000..6cf326f +index 0000000000..6cf326f669 --- /dev/null +++ b/slirp/src/stream.c @@ -0,0 +1,120 @@ @@ -10340,7 +10342,7 @@ index 0000000..6cf326f +} diff --git a/slirp/src/stream.h b/slirp/src/stream.h new file mode 100644 -index 0000000..08bb5b6 +index 0000000000..08bb5b6610 --- /dev/null +++ b/slirp/src/stream.h @@ -0,0 +1,35 @@ @@ -10381,7 +10383,7 @@ index 0000000..08bb5b6 +#endif /* STREAM_H_ */ diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h new file mode 100644 -index 0000000..70a9760 +index 0000000000..70a9760664 --- /dev/null +++ b/slirp/src/tcp.h @@ -0,0 +1,169 @@ @@ -10556,7 +10558,7 @@ index 0000000..70a9760 +#endif diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c new file mode 100644 -index 0000000..d55b0c8 +index 0000000000..d55b0c81dc --- /dev/null +++ b/slirp/src/tcp_input.c @@ -0,0 +1,1539 @@ @@ -12101,7 +12103,7 @@ index 0000000..d55b0c8 +} diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c new file mode 100644 -index 0000000..383fe31 +index 0000000000..383fe31dcf --- /dev/null +++ b/slirp/src/tcp_output.c @@ -0,0 +1,516 @@ @@ -12623,7 +12625,7 @@ index 0000000..383fe31 +} diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c new file mode 100644 -index 0000000..a1016d9 +index 0000000000..a1016d90df --- /dev/null +++ b/slirp/src/tcp_subr.c @@ -0,0 +1,980 @@ @@ -13609,7 +13611,7 @@ index 0000000..a1016d9 +} diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c new file mode 100644 -index 0000000..102023e +index 0000000000..102023e7cd --- /dev/null +++ b/slirp/src/tcp_timer.c @@ -0,0 +1,286 @@ @@ -13901,7 +13903,7 @@ index 0000000..102023e +} diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h new file mode 100644 -index 0000000..584a559 +index 0000000000..584a5594e4 --- /dev/null +++ b/slirp/src/tcp_timer.h @@ -0,0 +1,130 @@ @@ -14037,7 +14039,7 @@ index 0000000..584a559 +#endif diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h new file mode 100644 -index 0000000..c8da8cb +index 0000000000..c8da8cbd16 --- /dev/null +++ b/slirp/src/tcp_var.h @@ -0,0 +1,161 @@ @@ -14204,7 +14206,7 @@ index 0000000..c8da8cb +#endif diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h new file mode 100644 -index 0000000..d3df021 +index 0000000000..d3df021493 --- /dev/null +++ b/slirp/src/tcpip.h @@ -0,0 +1,104 @@ @@ -14314,7 +14316,7 @@ index 0000000..d3df021 +#endif diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c new file mode 100644 -index 0000000..c209145 +index 0000000000..c209145282 --- /dev/null +++ b/slirp/src/tftp.c @@ -0,0 +1,462 @@ @@ -14782,7 +14784,7 @@ index 0000000..c209145 +} diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h new file mode 100644 -index 0000000..c47bb43 +index 0000000000..c47bb43c7d --- /dev/null +++ b/slirp/src/tftp.h @@ -0,0 +1,52 @@ @@ -14840,7 +14842,7 @@ index 0000000..c47bb43 +#endif diff --git a/slirp/src/udp.c b/slirp/src/udp.c new file mode 100644 -index 0000000..6bde20f +index 0000000000..6bde20fafa --- /dev/null +++ b/slirp/src/udp.c @@ -0,0 +1,361 @@ @@ -15207,7 +15209,7 @@ index 0000000..6bde20f +} diff --git a/slirp/src/udp.h b/slirp/src/udp.h new file mode 100644 -index 0000000..c3b83fd +index 0000000000..c3b83fdc56 --- /dev/null +++ b/slirp/src/udp.h @@ -0,0 +1,90 @@ @@ -15303,7 +15305,7 @@ index 0000000..c3b83fd +#endif diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c new file mode 100644 -index 0000000..6f9486b +index 0000000000..6f9486bbca --- /dev/null +++ b/slirp/src/udp6.c @@ -0,0 +1,173 @@ @@ -15482,7 +15484,7 @@ index 0000000..6f9486b +} diff --git a/slirp/src/util.c b/slirp/src/util.c new file mode 100644 -index 0000000..570c53f +index 0000000000..570c53f303 --- /dev/null +++ b/slirp/src/util.c @@ -0,0 +1,428 @@ @@ -15916,7 +15918,7 @@ index 0000000..570c53f +} diff --git a/slirp/src/util.h b/slirp/src/util.h new file mode 100644 -index 0000000..d67b3d0 +index 0000000000..d67b3d0de9 --- /dev/null +++ b/slirp/src/util.h @@ -0,0 +1,189 @@ @@ -16111,7 +16113,7 @@ index 0000000..d67b3d0 +#endif diff --git a/slirp/src/version.c b/slirp/src/version.c new file mode 100644 -index 0000000..93e0be9 +index 0000000000..93e0be9c24 --- /dev/null +++ b/slirp/src/version.c @@ -0,0 +1,8 @@ @@ -16125,7 +16127,7 @@ index 0000000..93e0be9 +} diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c new file mode 100644 -index 0000000..68cc172 +index 0000000000..68cc1729c5 --- /dev/null +++ b/slirp/src/vmstate.c @@ -0,0 +1,444 @@ @@ -16575,7 +16577,7 @@ index 0000000..68cc172 +} diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h new file mode 100644 -index 0000000..94c6a4b +index 0000000000..94c6a4bc7b --- /dev/null +++ b/slirp/src/vmstate.h @@ -0,0 +1,391 @@ @@ -16971,5 +16973,5 @@ index 0000000..94c6a4b + +#endif -- -1.8.3.1 +2.27.0 diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index 696916e..c2d0dad 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -126,6 +126,8 @@ Merged patches (weekly-200610): Merged patches (weekly-200617): - 44b8bd0 spec: Fix python shenigans for tests + +Signed-off-by: Danilo C. L. de Paula --- .gitignore | 1 + Makefile | 3 +- @@ -133,7 +135,7 @@ Merged patches (weekly-200617): redhat/Makefile | 86 + redhat/Makefile.common | 53 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2961 +++++++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2961 +++++++++++++++++++++++++++ redhat/qemu-pr-helper.service | 2 +- redhat/scripts/extract_build_cmd.py | 2 +- redhat/scripts/process-patches.sh | 7 +- @@ -146,7 +148,7 @@ Merged patches (weekly-200617): create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index b1b8a5a..88b7565 100644 +index b1b8a5a6d0..88b7565246 100644 --- a/Makefile +++ b/Makefile @@ -549,6 +549,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM @@ -167,7 +169,7 @@ index b1b8a5a..88b7565 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index ee6c3c6..bd957a5 100755 +index ee6c3c6792..bd957a5a26 100755 --- a/configure +++ b/configure @@ -2610,6 +2610,7 @@ if test "$seccomp" != "no" ; then @@ -179,7 +181,7 @@ index ee6c3c6..bd957a5 100755 # xen probe diff --git a/tests/check-block.sh b/tests/check-block.sh -index 8e29c86..e9bcb5a 100755 +index 8e29c868e5..e9bcb5ac27 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh @@ -53,6 +53,8 @@ if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then @@ -192,7 +194,7 @@ index 8e29c86..e9bcb5a 100755 ret=0 diff --git a/ui/vnc.c b/ui/vnc.c -index 527ad25..8099c0e 100644 +index 527ad25124..8099c0ee13 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -3970,7 +3970,7 @@ void vnc_display_open(const char *id, Error **errp) @@ -205,5 +207,5 @@ index 527ad25..8099c0e 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -1.8.3.1 +2.27.0 diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index 0ea7715..12b0b4a 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -78,19 +78,21 @@ Merged patches (weekly-200520): Merged patches (weekly-200610): - 559d589 Revert "RHEL: disable hostmem-memfd" - 441128e enable ramfb + +Signed-off-by: Danilo C. L. de Paula --- - default-configs/aarch64-rh-devices.mak | 22 ++++++++ - default-configs/aarch64-softmmu.mak | 10 ++-- - default-configs/ppc64-rh-devices.mak | 34 +++++++++++ - default-configs/ppc64-softmmu.mak | 10 ++-- - default-configs/rh-virtio.mak | 10 ++++ - default-configs/s390x-rh-devices.mak | 15 +++++ + default-configs/aarch64-rh-devices.mak | 22 ++++++ + default-configs/aarch64-softmmu.mak | 10 ++- + default-configs/ppc64-rh-devices.mak | 34 +++++++++ + default-configs/ppc64-softmmu.mak | 10 ++- + default-configs/rh-virtio.mak | 10 +++ + default-configs/s390x-rh-devices.mak | 15 ++++ default-configs/s390x-softmmu.mak | 4 +- - default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++++++++++ + default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ default-configs/x86_64-softmmu.mak | 4 +- hw/acpi/ich9.c | 4 +- hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 10 ++++ + hw/block/fdc.c | 10 +++ hw/bt/Makefile.objs | 3 + hw/cpu/Makefile.objs | 5 +- hw/display/cirrus_vga.c | 3 + @@ -99,15 +101,15 @@ Merged patches (weekly-200610): hw/net/e1000.c | 2 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/Makefile.objs | 4 +- - qemu-options.hx | 4 -- + qemu-options.hx | 4 - redhat/qemu-kvm.spec.template | 5 +- softmmu/vl.c | 2 +- target/arm/cpu.c | 4 +- target/arm/cpu_tcg.c | 3 + - target/i386/cpu.c | 17 +++--- - target/ppc/cpu-models.c | 10 ++++ + target/i386/cpu.c | 17 +++-- + target/ppc/cpu-models.c | 10 +++ target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 +++ + target/s390x/kvm.c | 8 ++ 29 files changed, 274 insertions(+), 33 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak @@ -118,7 +120,7 @@ Merged patches (weekly-200610): diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 -index 0000000..f0cf5a1 +index 0000000000..f0cf5a1b22 --- /dev/null +++ b/default-configs/aarch64-rh-devices.mak @@ -0,0 +1,22 @@ @@ -145,7 +147,7 @@ index 0000000..f0cf5a1 +CONFIG_NVDIMM=y +CONFIG_ACPI_APEI=y diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 958b1e0..8f6867d 100644 +index 958b1e08e4..8f6867d48a 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak @@ -1,8 +1,10 @@ @@ -165,7 +167,7 @@ index 958b1e0..8f6867d 100644 +include aarch64-rh-devices.mak diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak new file mode 100644 -index 0000000..ecbe53f +index 0000000000..ecbe53fe63 --- /dev/null +++ b/default-configs/ppc64-rh-devices.mak @@ -0,0 +1,34 @@ @@ -204,7 +206,7 @@ index 0000000..ecbe53f +CONFIG_XIVE_SPAPR=y +CONFIG_XIVE_KVM=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index ae0841f..040e557 100644 +index ae0841fa3a..040e5575e7 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -1,11 +1,13 @@ @@ -227,7 +229,7 @@ index ae0841f..040e557 100644 +include ppc64-rh-devices.mak diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak new file mode 100644 -index 0000000..94ede1b +index 0000000000..94ede1b5f6 --- /dev/null +++ b/default-configs/rh-virtio.mak @@ -0,0 +1,10 @@ @@ -243,7 +245,7 @@ index 0000000..94ede1b +CONFIG_VIRTIO_SERIAL=y diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak new file mode 100644 -index 0000000..c3c73fe +index 0000000000..c3c73fe752 --- /dev/null +++ b/default-configs/s390x-rh-devices.mak @@ -0,0 +1,15 @@ @@ -263,7 +265,7 @@ index 0000000..c3c73fe +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index f2287a1..3e2e388 100644 +index f2287a133f..3e2e388e91 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -10,4 +10,6 @@ @@ -276,7 +278,7 @@ index f2287a1..3e2e388 100644 +include s390x-rh-devices.mak diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak new file mode 100644 -index 0000000..d59b6d9 +index 0000000000..d59b6d9bb5 --- /dev/null +++ b/default-configs/x86_64-rh-devices.mak @@ -0,0 +1,100 @@ @@ -381,7 +383,7 @@ index 0000000..d59b6d9 +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 64b2ee2..b5de7e5 100644 +index 64b2ee2960..b5de7e5279 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -1,3 +1,5 @@ @@ -392,7 +394,7 @@ index 64b2ee2..b5de7e5 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 2d204ba..7ead029 100644 +index 2d204babc6..7ead029e68 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) @@ -407,7 +409,7 @@ index 2d204ba..7ead029 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index 534a6a1..bd62442 100644 +index 534a6a119e..bd62442b54 100644 --- a/hw/arm/Makefile.objs +++ b/hw/arm/Makefile.objs @@ -28,7 +28,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o @@ -420,7 +422,7 @@ index 534a6a1..bd62442 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 3425d56..f4ae220 100644 +index 3425d56e2a..f4ae220905 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -47,6 +47,8 @@ @@ -449,7 +451,7 @@ index 3425d56..f4ae220 100644 return; diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs new file mode 100644 -index 0000000..e678e9e +index 0000000000..e678e9ee3c --- /dev/null +++ b/hw/bt/Makefile.objs @@ -0,0 +1,3 @@ @@ -457,7 +459,7 @@ index 0000000..e678e9e +#common-obj-y += hci-csr.o + diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs -index 8db9e8a..1601ea9 100644 +index 8db9e8a7b3..1601ea93c7 100644 --- a/hw/cpu/Makefile.objs +++ b/hw/cpu/Makefile.objs @@ -1,5 +1,6 @@ @@ -470,7 +472,7 @@ index 8db9e8a..1601ea9 100644 +common-obj-y += core.o +# cluster.o diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 212d6f5..f2504e5 100644 +index 212d6f5e61..f2504e5649 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -2958,6 +2958,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) @@ -484,7 +486,7 @@ index 212d6f5..f2504e5 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index b402a93..d3621a4 100644 +index b402a93636..d3621a45d9 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -221,7 +221,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) @@ -507,7 +509,7 @@ index b402a93..d3621a4 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index 29d633c..1442f46 100644 +index 29d633ca94..1442f46195 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -599,6 +599,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) @@ -520,7 +522,7 @@ index 29d633c..1442f46 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a18f80e..960b2f0 100644 +index a18f80e369..960b2f00ee 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1797,6 +1797,7 @@ static const E1000Info e1000_devices[] = { @@ -540,7 +542,7 @@ index a18f80e..960b2f0 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 26ad566..bfdf6b3 100644 +index 26ad566f42..bfdf6b3e5c 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -396,10 +396,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { @@ -557,7 +559,7 @@ index 26ad566..bfdf6b3 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs -index fa5c3fa..854094c 100644 +index fa5c3fa1b8..854094c4f2 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs @@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_NETWORK) += dev-network.o @@ -572,7 +574,7 @@ index fa5c3fa..854094c 100644 smartcard.mo-libs := $(SMARTCARD_LIBS) endif diff --git a/qemu-options.hx b/qemu-options.hx -index fa1b19d..b969944 100644 +index fa1b19de4c..b9699440a3 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2248,10 +2248,6 @@ ERST @@ -587,7 +589,7 @@ index fa1b19d..b969944 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" diff --git a/softmmu/vl.c b/softmmu/vl.c -index 9da2e23..6509057 100644 +index 9da2e23144..6509057752 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -145,7 +145,7 @@ static Chardev **serial_hds; @@ -600,7 +602,7 @@ index 9da2e23..6509057 100644 static int no_reboot; int no_shutdown = 0; diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 5050e18..79adfe2 100644 +index 5050e1843a..79adfe25c4 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2335,7 +2335,9 @@ static void arm_cpu_register_types(void) @@ -615,7 +617,7 @@ index 5050e18..79adfe2 100644 } } diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 00b0e08..94d429b 100644 +index 00b0e08f33..94d429b61c 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c @@ -655,6 +655,9 @@ static void arm_tcg_cpu_register_types(void) @@ -629,7 +631,7 @@ index 00b0e08..94d429b 100644 arm_cpu_register(&arm_tcg_cpus[i]); } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 36cbd3d..55a30cd 100644 +index 36cbd3d027..55a30cd4d9 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1825,20 +1825,21 @@ static CPUCaches epyc_rome_cache_info = { @@ -663,7 +665,7 @@ index 36cbd3d..55a30cd 100644 CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, .xlevel = 0x8000000A, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 4ad1686..16b2185 100644 +index 4ad16863c0..16b2185fd8 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -733,7 +735,7 @@ index 4ad1686..16b2185 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 2fa609b..f6bee32 100644 +index 2fa609bffe..f6bee3204c 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -747,7 +749,7 @@ index 2fa609b..f6bee32 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index f2f75d2..8970e4c 100644 +index f2f75d2a57..8970e4c374 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2494,6 +2494,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) @@ -766,5 +768,5 @@ index f2f75d2..8970e4c 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -- -1.8.3.1 +2.27.0 diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index 5117133..467366b 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -41,25 +41,27 @@ Merged patches (weekly-200506): Conflicts: hw/core/machine.c + +Signed-off-by: Danilo C. L. de Paula --- hw/acpi/ich9.c | 15 ++++ hw/acpi/piix4.c | 5 +- hw/arm/virt.c | 2 +- hw/char/serial.c | 16 ++++ - hw/core/machine.c | 170 +++++++++++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 170 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + - hw/net/e1000e.c | 21 ++++++ + hw/net/e1000e.c | 21 +++++ hw/net/rtl8139.c | 4 +- hw/rtc/mc146818rtc.c | 6 ++ - hw/smbios/smbios.c | 46 +++++++++++- + hw/smbios/smbios.c | 46 +++++++++- hw/timer/i8254_common.c | 2 +- hw/usb/hcd-uhci.c | 4 +- hw/usb/hcd-xhci.c | 20 +++++ hw/usb/hcd-xhci.h | 2 + include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 24 ++++++ + include/hw/boards.h | 24 +++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + include/hw/usb.h | 4 + @@ -68,7 +70,7 @@ Conflicts: 23 files changed, 354 insertions(+), 11 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 7ead029..3b8501f 100644 +index 7ead029e68..3b8501fa38 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -101,7 +103,7 @@ index 7ead029..3b8501f 100644 &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 283422e..f664201 100644 +index 283422e0d3..f6642011c6 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -276,6 +276,7 @@ static const VMStateDescription vmstate_acpi = { @@ -124,7 +126,7 @@ index 283422e..f664201 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 7d9f715..e30b837 100644 +index 7d9f7157da..e30b837135 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1450,7 +1450,7 @@ static void virt_build_smbios(VirtMachineState *vms) @@ -137,7 +139,7 @@ index 7d9f715..e30b837 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len); diff --git a/hw/char/serial.c b/hw/char/serial.c -index 9eebcb2..11d2d08 100644 +index 9eebcb27e7..11d2d08912 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -35,6 +35,7 @@ @@ -192,7 +194,7 @@ index 9eebcb2..11d2d08 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 211b4e0..ef6b320 100644 +index 211b4e077a..ef6b320ea7 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,6 +28,176 @@ @@ -373,7 +375,7 @@ index 211b4e0..ef6b320 100644 { "virtio-balloon-device", "page-poison", "false" }, { "vmport", "x-read-set-eax", "off" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 3aaeeec..d88f52a 100644 +index 3aaeeeca1e..d88f52a587 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -84,7 +84,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) @@ -386,7 +388,7 @@ index 3aaeeec..d88f52a 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index fae487f..18815e8 100644 +index fae487f57d..18815e8302 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -179,6 +179,8 @@ static void pc_init1(MachineState *machine, @@ -399,7 +401,7 @@ index fae487f..18815e8 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index acd6d40..a1131e6 100644 +index acd6d405f0..a1131e6825 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -204,6 +204,8 @@ static void pc_q35_init(MachineState *machine) @@ -412,7 +414,7 @@ index acd6d40..a1131e6 100644 } diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index fda3451..be64a6b 100644 +index fda34518c9..be64a6b56f 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -79,6 +79,11 @@ typedef struct E1000EState { @@ -486,7 +488,7 @@ index fda3451..be64a6b 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index ab93d78..48a8135 100644 +index ab93d78ab3..48a81354fc 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) @@ -509,7 +511,7 @@ index ab93d78..48a8135 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 7a38540..377d861 100644 +index 7a38540cb9..377d861913 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -43,6 +43,7 @@ @@ -533,7 +535,7 @@ index 7a38540..377d861 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index ffd9872..7818b90 100644 +index ffd98727ee..7818b90c96 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -56,6 +56,9 @@ static bool smbios_legacy = true; @@ -615,7 +617,7 @@ index ffd9872..7818b90 100644 SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b..32935da 100644 +index 050875b497..32935da46c 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { @@ -628,7 +630,7 @@ index 050875b..32935da 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 37f7beb..2741edc 100644 +index 37f7beb3fa..2741edc589 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) @@ -648,7 +650,7 @@ index 37f7beb..2741edc 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index b330e36..b25cce8 100644 +index b330e36fe6..b25cce8f0c 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3600,9 +3600,27 @@ static const VMStateDescription vmstate_xhci_slot = { @@ -689,7 +691,7 @@ index b330e36..b25cce8 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 946af51..cc91a7e 100644 +index 946af51fc2..cc91a7e4bd 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -157,6 +157,8 @@ typedef struct XHCIEvent { @@ -702,7 +704,7 @@ index 946af51..cc91a7e 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 28a5318..ff4a672 100644 +index 28a53181cb..ff4a672b90 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -716,7 +718,7 @@ index 28a5318..ff4a672 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index 426ce5f..1062df9 100644 +index 426ce5f625..1062df96c0 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -373,4 +373,28 @@ extern const size_t hw_compat_2_2_len; @@ -749,7 +751,7 @@ index 426ce5f..1062df9 100644 + #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 02a0ced..67e38a1 100644 +index 02a0ced0a0..67e38a1b13 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h @@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); @@ -765,7 +767,7 @@ index 02a0ced..67e38a1 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index dce1273..665c430 100644 +index dce1273c7d..665c4309a2 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -102,6 +102,9 @@ typedef struct PCMachineClass { @@ -779,7 +781,7 @@ index dce1273..665c430 100644 /* RAM / address space compat: */ bool gigabyte_align; diff --git a/include/hw/usb.h b/include/hw/usb.h -index e29a376..35ac38c 100644 +index e29a37635b..35ac38c459 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -575,4 +575,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, @@ -792,7 +794,7 @@ index e29a376..35ac38c 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 92e44e0..67d3b75 100644 +index 92e44e021e..67d3b75485 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -128,6 +128,8 @@ enum mig_rp_message_type { @@ -805,7 +807,7 @@ index 92e44e0..67d3b75 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index f617960..b8bc10d 100644 +index f617960522..b8bc10d16d 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -339,6 +339,11 @@ void init_dirty_bitmap_incoming_migration(void); @@ -821,5 +823,5 @@ index f617960..b8bc10d 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -1.8.3.1 +2.27.0 diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index fff091f..9e569fb 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -43,13 +43,15 @@ Merged patches (4.0.0): Merged patches (4.1.0): - c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM - 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine + +Signed-off-by: Danilo C. L. de Paula --- - hw/arm/virt.c | 172 +++++++++++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 11 ++++ + hw/arm/virt.c | 172 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 11 +++ 2 files changed, 182 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e30b837..5a45677 100644 +index e30b837135..5a45677205 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -79,6 +79,7 @@ @@ -264,7 +266,7 @@ index e30b837..5a45677 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 54bcf17..5fdabd8 100644 +index 54bcf17afd..5fdabd87d6 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -165,6 +165,7 @@ typedef struct { @@ -293,5 +295,5 @@ index 54bcf17..5fdabd8 100644 bool virt_is_acpi_enabled(VirtMachineState *vms); -- -1.8.3.1 +2.27.0 diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index 9be23b5..c8e6c81 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -33,16 +33,18 @@ Merged patches (4.2.0): Merged patches (weekly-200226): - eb121ff spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine (partial) + +Signed-off-by: Danilo C. L. de Paula --- - hw/ppc/spapr.c | 280 ++++++++++++++++++++++++++++++++++++++++++++++++ - hw/ppc/spapr_cpu_core.c | 13 +++ + hw/ppc/spapr.c | 280 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 ++ include/hw/ppc/spapr.h | 1 + - target/ppc/compat.c | 13 ++- + target/ppc/compat.c | 13 +- target/ppc/cpu.h | 1 + 5 files changed, 307 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index f6f034d..f30618e 100644 +index f6f034d039..f30618e4b1 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4530,6 +4530,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) @@ -375,7 +377,7 @@ index f6f034d..f30618e 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index bfdf6b3..39fcaf8 100644 +index bfdf6b3e5c..39fcaf855b 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -413,7 +415,7 @@ index bfdf6b3..39fcaf8 100644 goto error_intc_create; } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index c421410..5190d6a 100644 +index c421410e3f..5190d6a936 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -130,6 +130,7 @@ struct SpaprMachineClass { @@ -425,7 +427,7 @@ index c421410..5190d6a 100644 uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index fda0dfe..ab8e3b2 100644 +index fda0dfe8f8..ab8e3b2125 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -450,7 +452,7 @@ index fda0dfe..ab8e3b2 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index e7d382a..3cf209d 100644 +index e7d382ac10..3cf209dd90 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1352,6 +1352,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) @@ -462,5 +464,5 @@ index e7d382a..3cf209d 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, -- -1.8.3.1 +2.27.0 diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index bd769c9..d363b4b 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -20,12 +20,14 @@ Merged patches (4.2.0): - fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 - a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine - hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) + +Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 71 +++++++++++++++++++++++++++++++++++++++++++++- + hw/s390x/s390-virtio-ccw.c | 71 +++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 023fd25..0e8dd62 100644 +index 023fd25f2b..0e8dd62b18 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -778,7 +778,7 @@ bool css_migration_enabled(void) @@ -121,5 +123,5 @@ index 023fd25..0e8dd62 100644 static void ccw_machine_register_types(void) { -- -1.8.3.1 +2.27.0 diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index 4b98372..45c1f1b 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -42,19 +42,21 @@ Merged patches (weekly-200122): Merged patches (weekly-200318): - e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) + +Signed-off-by: Danilo C. L. de Paula --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 162 ++++++++++++++++++++++++++++++- + hw/i386/pc.c | 265 ++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 162 +++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 33 +++++++ + include/hw/i386/pc.h | 33 ++++++ target/i386/cpu.c | 3 +- target/i386/kvm.c | 4 + 8 files changed, 675 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index b7bcbbb..fe815c5 100644 +index b7bcbbbb2a..fe815c5403 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -212,6 +212,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) @@ -68,7 +70,7 @@ index b7bcbbb..fe815c5 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 61acc9e..fe37bde 100644 +index 61acc9e530..fe37bdec7a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -346,6 +346,263 @@ GlobalProperty pc_compat_1_4[] = { @@ -365,7 +367,7 @@ index 61acc9e..fe37bde 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 18815e8..eeadd89 100644 +index 18815e8302..eeadd896c2 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -54,6 +54,7 @@ @@ -604,7 +606,7 @@ index 18815e8..eeadd89 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index a1131e6..1cd4e15 100644 +index a1131e6825..1cd4e15297 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -201,8 +201,8 @@ static void pc_q35_init(MachineState *machine) @@ -788,7 +790,7 @@ index a1131e6..1cd4e15 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 1062df9..c357731 100644 +index 1062df96c0..c3577319c0 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -208,6 +208,8 @@ struct MachineClass { @@ -801,7 +803,7 @@ index 1062df9..c357731 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 665c430..95d07f8 100644 +index 665c4309a2..95d07f81a0 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -120,6 +120,9 @@ typedef struct PCMachineClass { @@ -852,7 +854,7 @@ index 665c430..95d07f8 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 55a30cd..5477433 100644 +index 55a30cd4d9..5477433af5 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1830,7 +1830,7 @@ static X86CPUDefinition builtin_x86_defs[] = { @@ -873,7 +875,7 @@ index 55a30cd..5477433 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 6adbff3..51fbfd5 100644 +index 6adbff3d74..51fbfd528e 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -3139,6 +3139,7 @@ static int kvm_get_msrs(X86CPU *cpu) @@ -895,5 +897,5 @@ index 6adbff3..51fbfd5 100644 case MSR_KVM_PV_EOI_EN: env->pv_eoi_en_msr = msrs[i].data; -- -1.8.3.1 +2.27.0 diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index ae2533d..52af98e 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -33,6 +33,8 @@ Merged patches (4.0.0): Merged patches (4.1.0-rc0): - 41288ff redhat: Remove raw iotest 205 + +Signed-off-by: Danilo C. L. de Paula --- redhat/qemu-kvm.spec.template | 2 +- tests/qemu-iotests/051 | 12 ++++++------ @@ -48,7 +50,7 @@ Merged patches (4.1.0-rc0): 11 files changed, 38 insertions(+), 18 deletions(-) diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 034d3a3..aadc413 100755 +index 034d3a3250..aadc413ee6 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -183,11 +183,11 @@ run_qemu -drive if=virtio @@ -82,7 +84,7 @@ index 034d3a3..aadc413 100755 *) ;; diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 9b07a7e..c678d91 100644 +index 9b07a7ed03..c678d915b2 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -92,7 +92,7 @@ @@ -104,7 +106,7 @@ index 9b07a7e..c678d91 100644 101 rw quick 102 rw quick diff --git a/tests/qtest/Makefile.include b/tests/qtest/Makefile.include -index 98af2c2..d370459 100644 +index 98af2c2d93..d370459c3a 100644 --- a/tests/qtest/Makefile.include +++ b/tests/qtest/Makefile.include @@ -29,7 +29,7 @@ check-qtest-i386-y += ide-test @@ -155,7 +157,7 @@ index 98af2c2..d370459 100644 check-qtest-s390x-y += cpu-plug-test check-qtest-s390x-y += migration-test diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c -index 85a3614..1c18441 100644 +index 85a3614286..1c18441334 100644 --- a/tests/qtest/boot-serial-test.c +++ b/tests/qtest/boot-serial-test.c @@ -109,19 +109,23 @@ static testdef_t tests[] = { @@ -184,7 +186,7 @@ index 85a3614..1c18441 100644 { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c -index e8ffbbc..fda7269 100644 +index e8ffbbce4b..fda7269c82 100644 --- a/tests/qtest/cpu-plug-test.c +++ b/tests/qtest/cpu-plug-test.c @@ -181,8 +181,8 @@ static void add_pseries_test_case(const char *mname) @@ -199,7 +201,7 @@ index e8ffbbc..fda7269 100644 } data = g_new(PlugTestData, 1); diff --git a/tests/qtest/e1000-test.c b/tests/qtest/e1000-test.c -index c387984..c89112d 100644 +index c387984ef6..c89112d6f8 100644 --- a/tests/qtest/e1000-test.c +++ b/tests/qtest/e1000-test.c @@ -22,9 +22,11 @@ struct QE1000 { @@ -215,7 +217,7 @@ index c387984..c89112d 100644 static void *e1000_get_driver(void *obj, const char *interface) diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c -index 48e8e02..6496196 100644 +index 48e8e02d6e..6496196b07 100644 --- a/tests/qtest/hd-geo-test.c +++ b/tests/qtest/hd-geo-test.c @@ -737,6 +737,7 @@ static void test_override_ide(void) @@ -247,7 +249,7 @@ index 48e8e02..6496196 100644 qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); qtest_add_func("hd-geo/override/scsi_hot_unplug", diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c -index 60e6ec3..f9d6adc 100644 +index 60e6ec3153..f9d6adcfe9 100644 --- a/tests/qtest/prom-env-test.c +++ b/tests/qtest/prom-env-test.c @@ -89,10 +89,14 @@ int main(int argc, char *argv[]) @@ -266,7 +268,7 @@ index 60e6ec3..f9d6adc 100644 add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c -index 7ca1883..983aa07 100644 +index 7ca1883a29..983aa0719a 100644 --- a/tests/qtest/test-x86-cpuid-compat.c +++ b/tests/qtest/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -286,7 +288,7 @@ index 7ca1883..983aa07 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c -index 10ef9d2..3855873 100644 +index 10ef9d2a91..3855873050 100644 --- a/tests/qtest/usb-hcd-xhci-test.c +++ b/tests/qtest/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) @@ -316,5 +318,5 @@ index 10ef9d2..3855873 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -1.8.3.1 +2.27.0 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index 1445da6..cbf42e0 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -35,13 +35,14 @@ Merged patches (2.9.0): (cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) (cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) +Signed-off-by: Danilo C. L. de Paula --- hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d020ea9..e4292ea 100644 +index d020ea9f82..e4292ea2b4 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -47,6 +47,9 @@ @@ -97,7 +98,7 @@ index d020ea9..e4292ea 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 0da7a20..5d2b0d2 100644 +index 0da7a20a7e..5d2b0d2f2d 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { @@ -109,5 +110,5 @@ index 0da7a20..5d2b0d2 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -1.8.3.1 +2.27.0 diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index 3f8f0ff..2ef2119 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -18,12 +18,13 @@ as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost (cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) (cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) +Signed-off-by: Danilo C. L. de Paula --- softmmu/vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 6509057..8453de5 100644 +index 6509057752..8453de5e68 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -1674,9 +1674,17 @@ static void version(void) @@ -53,5 +54,5 @@ index 6509057..8453de5 100644 } -- -1.8.3.1 +2.27.0 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index 38b9095..e5bb3c8 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -74,13 +74,15 @@ The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. This commit matches the limit to current KVM_CAP_NR_VCPUS value. + +Signed-off-by: Danilo C. L. de Paula --- accel/kvm/kvm-all.c | 12 ++++++++++++ softmmu/vl.c | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index ab36fbf..6f6bb47 100644 +index ab36fbfa0c..6f6bb47d9a 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2088,6 +2088,18 @@ static int kvm_init(MachineState *ms) @@ -103,7 +105,7 @@ index ab36fbf..6f6bb47 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/softmmu/vl.c b/softmmu/vl.c -index 8453de5..ea6e9e4 100644 +index 8453de5e68..ea6e9e4f01 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -116,6 +116,8 @@ @@ -146,5 +148,5 @@ index 8453de5..ea6e9e4 100644 configure_rtc(qemu_find_opts_singleton("rtc")); -- -1.8.3.1 +2.27.0 diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch index d9e4626..7309f2c 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -24,11 +24,13 @@ Merged patches (2.3.0): - 5292fc3 trace: add SystemTap init scripts for simpletrace bridge - eda9e5e simpletrace: install simpletrace.py - 85c4c8f trace: add systemtap-initscript README file to RPM + +Signed-off-by: Danilo C. L. de Paula --- .gitignore | 2 ++ Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 25 ++++++++++++++++++- + README.systemtap | 43 +++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 25 +++++++++++++- scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ scripts/systemtap/script.d/qemu_kvm.stp | 1 + 6 files changed, 78 insertions(+), 1 deletion(-) @@ -37,7 +39,7 @@ Merged patches (2.3.0): create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 88b7565..045eeb5 100644 +index 88b7565246..045eeb545e 100644 --- a/Makefile +++ b/Makefile @@ -995,6 +995,10 @@ endif @@ -53,7 +55,7 @@ index 88b7565..045eeb5 100644 ctags: diff --git a/README.systemtap b/README.systemtap new file mode 100644 -index 0000000..ad913fc +index 0000000000..ad913fc990 --- /dev/null +++ b/README.systemtap @@ -0,0 +1,43 @@ @@ -102,7 +104,7 @@ index 0000000..ad913fc + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf new file mode 100644 -index 0000000..372d816 +index 0000000000..372d8160a4 --- /dev/null +++ b/scripts/systemtap/conf.d/qemu_kvm.conf @@ -0,0 +1,4 @@ @@ -112,11 +114,11 @@ index 0000000..372d816 +qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp new file mode 100644 -index 0000000..c04abf9 +index 0000000000..c04abf9449 --- /dev/null +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -- -1.8.3.1 +2.27.0 diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index c63c892..48d46dc 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -24,6 +24,8 @@ Rebase notes (weekly-200129): Rebase notes (weekly-200708 - rewrite patch to new docs structure + +Signed-off-by: Danilo C. L. de Paula --- docs/defs.rst.inc | 4 ++-- docs/interop/live-block-operations.rst | 4 ++-- @@ -33,7 +35,7 @@ Rebase notes (weekly-200708 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc -index 48d05aa..d74dbde 100644 +index 48d05aaf33..d74dbdeca9 100644 --- a/docs/defs.rst.inc +++ b/docs/defs.rst.inc @@ -9,7 +9,7 @@ @@ -47,7 +49,7 @@ index 48d05aa..d74dbde 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst -index 48afdc7..6650b2c 100644 +index 48afdc7927..6650b2c975 100644 --- a/docs/interop/live-block-operations.rst +++ b/docs/interop/live-block-operations.rst @@ -129,7 +129,7 @@ To show some example invocations of command-line, we will use the @@ -69,7 +71,7 @@ index 48afdc7..6650b2c 100644 -blockdev node-name=node-TargetDisk,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./target-disk.qcow2 \ -device virtio-blk,drive=node-TargetDisk,id=virtio0 \ diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst -index fb70445..0d9a783 100644 +index fb70445c75..0d9a783112 100644 --- a/docs/tools/qemu-trace-stap.rst +++ b/docs/tools/qemu-trace-stap.rst @@ -45,19 +45,19 @@ The following commands are valid: @@ -127,7 +129,7 @@ index fb70445..0d9a783 100644 See also -------- diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst -index 824e713..8449936 100644 +index 824e713491..8449936c63 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -116,7 +116,7 @@ Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket @@ -140,7 +142,7 @@ index 824e713..8449936 100644 -device vhost-user-fs-pci,chardev=char0,tag=myfs \ -object memory-backend-memfd,id=mem,size=4G,share=on \ diff --git a/qemu-options.hx b/qemu-options.hx -index b969944..fe85a0e 100644 +index b9699440a3..fe85a0e952 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2891,11 +2891,11 @@ SRST @@ -161,5 +163,5 @@ index b969944..fe85a0e 100644 ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` Establish a vhost-vdpa netdev. -- -1.8.3.1 +2.27.0 diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch index 81a8467..f4c5de6 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -56,12 +56,13 @@ Conflicts: (cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) (cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) +Signed-off-by: Danilo C. L. de Paula --- hw/usb/hcd-xhci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index b25cce8..9582d81 100644 +index b25cce8f0c..9582d81d14 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3413,6 +3413,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) @@ -91,5 +92,5 @@ index b25cce8..9582d81 100644 /* TODO check for errors, and should fail when msix=on */ msix_init(dev, xhci->numintrs, -- -1.8.3.1 +2.27.0 diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 63a6d18..94a535a 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -45,7 +45,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index b497752..d3a64a6 100644 +index b49775269e..d3a64a6582 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -814,6 +814,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -65,5 +65,5 @@ index b497752..d3a64a6 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -1.8.3.1 +2.27.0 diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 2534964..b6808d3 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -32,7 +32,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 0c2bc8e..15bef3d 100644 +index 0c2bc8e06e..15bef3d27f 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -335,12 +335,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, @@ -56,5 +56,5 @@ index 0c2bc8e..15bef3d 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -1.8.3.1 +2.27.0 diff --git a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index cc7fdca..55de732 100644 --- a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -29,7 +29,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json -index b20332e..db4544d 100644 +index b20332e592..db4544df75 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4135,10 +4135,17 @@ @@ -52,5 +52,5 @@ index b20332e..db4544d 100644 ## # @blockdev-del: -- -1.8.3.1 +2.27.0 diff --git a/0022-virtio-net-fix-removal-of-failover-device.patch b/0022-virtio-net-fix-removal-of-failover-device.patch index 8a2b5be..ca33979 100644 --- a/0022-virtio-net-fix-removal-of-failover-device.patch +++ b/0022-virtio-net-fix-removal-of-failover-device.patch @@ -36,7 +36,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 1 insertion(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 1596cb1..f82455b 100644 +index 1596cb1397..f82455ba5d 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3417,6 +3417,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) @@ -48,5 +48,5 @@ index 1596cb1..f82455b 100644 g_free(n->standby_id); qobject_unref(n->primary_device_dict); -- -1.8.3.1 +2.27.0 diff --git a/0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch b/0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch index 3301702..edd80d0 100644 --- a/0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch +++ b/0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch @@ -1,6 +1,6 @@ -From 34817c44ee8e467df7e9f92b5fffa5679172bb58 Mon Sep 17 00:00:00 2001 +From 64afb1a8b06f0017b3ee5eb59790e1751424b1d5 Mon Sep 17 00:00:00 2001 From: David Gibson -Date: Wed, 8 Jul 2020 10:02:25 +0200 +Date: Fri, 22 May 2020 02:27:18 +0000 Subject: RHEL-only: Enable vTPM for POWER in downstream configs RH-Author: David Gibson @@ -8,6 +8,9 @@ Message-id: <20200522032718.387731-1-dgibson@redhat.com> Patchwork-id: 96743 O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] RHEL-only: Enable vTPM for POWER in downstream configs Bugzilla: 1781911 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Greg Kurz From: David Gibson @@ -20,13 +23,13 @@ Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=28742072 Testing: With brewed qemu was able to see a vTPM device in a guest Signed-off-by: David Gibson -Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula --- default-configs/ppc64-rh-devices.mak | 3 +++ 1 file changed, 3 insertions(+) diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak -index ecbe53f..032422e 100644 +index ecbe53fe63..032422e103 100644 --- a/default-configs/ppc64-rh-devices.mak +++ b/default-configs/ppc64-rh-devices.mak @@ -32,3 +32,6 @@ CONFIG_XICS_SPAPR=y @@ -37,5 +40,5 @@ index ecbe53f..032422e 100644 +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y -- -1.8.3.1 +2.27.0 diff --git a/0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch b/0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch index 5924eed..87ec07a 100644 --- a/0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch +++ b/0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch @@ -1,6 +1,6 @@ -From 0de024660bb9c42ca87fd179b6a4122c3a9e0eb3 Mon Sep 17 00:00:00 2001 +From def5545e3588c8b8c7cd12e3ff8967ce059d3f38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 8 Jul 2020 10:02:25 +0200 +Date: Mon, 8 Jun 2020 15:54:26 -0400 Subject: redhat: fix 5.0 rebase missing ISA TPM TIS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 @@ -20,14 +20,14 @@ BRANCH: rhel-av-8.3.0-preview-2020-04-29 UPSTREAM: N/A BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=29172313 -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula --- default-configs/x86_64-rh-devices.mak | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak -index d59b6d9..1469e05 100644 +index d59b6d9bb5..1469e05382 100644 --- a/default-configs/x86_64-rh-devices.mak +++ b/default-configs/x86_64-rh-devices.mak @@ -95,6 +95,6 @@ CONFIG_WDT_IB6300ESB=y @@ -39,5 +39,5 @@ index d59b6d9..1469e05 100644 CONFIG_TPM_EMULATOR=y CONFIG_TPM_PASSTHROUGH=y -- -1.8.3.1 +2.27.0 diff --git a/0026-redhat-define-hw_compat_8_2.patch b/0026-redhat-define-hw_compat_8_2.patch index eb1a0a1..229c59c 100644 --- a/0026-redhat-define-hw_compat_8_2.patch +++ b/0026-redhat-define-hw_compat_8_2.patch @@ -1,6 +1,6 @@ -From 43a29be3b4f2186441067a2f5cd45d4e6035f206 Mon Sep 17 00:00:00 2001 +From f8f3c2d598ca5921b6a5d477e848f02977c715fd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" -Date: Wed, 8 Jul 2020 10:02:25 +0200 +Date: Fri, 19 Jun 2020 15:42:26 -0400 Subject: redhat: define hw_compat_8_2 RH-Author: Dr. David Alan Gilbert @@ -17,21 +17,21 @@ From: Laurent Vivier Signed-off-by: Laurent Vivier Signed-off-by: Dr. David Alan Gilbert For minor fix - -Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula --- hw/core/machine.c | 28 ++++++++++++++++++++++++++++ include/hw/boards.h | 3 +++ 2 files changed, 31 insertions(+) diff --git a/hw/core/machine.c b/hw/core/machine.c -index ef6b320..b837399 100644 +index ef6b320ea7..b8373991e9 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -29,6 +29,34 @@ +@@ -28,6 +28,34 @@ + #include "hw/mem/nvdimm.h" #include "migration/vmstate.h" - /* ++/* + * The same as hw_compat_4_2 + */ +GlobalProperty hw_compat_rhel_8_2[] = { @@ -59,12 +59,11 @@ index ef6b320..b837399 100644 + { "fw_cfg", "acpi-mr-restore", "false" }, +}; +const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); -+/* + /* * The same as hw_compat_4_1 */ - GlobalProperty hw_compat_rhel_8_1[] = { diff --git a/include/hw/boards.h b/include/hw/boards.h -index c357731..f918a15 100644 +index c3577319c0..f918a15c66 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -375,6 +375,9 @@ extern const size_t hw_compat_2_2_len; @@ -78,5 +77,5 @@ index c357731..f918a15 100644 extern const size_t hw_compat_rhel_8_1_len; -- -1.8.3.1 +2.27.0 diff --git a/0027-x86-Add-8.3.0-x86_64-machine-type.patch b/0027-x86-Add-8.3.0-x86_64-machine-type.patch index 01d528e..cee8833 100644 --- a/0027-x86-Add-8.3.0-x86_64-machine-type.patch +++ b/0027-x86-Add-8.3.0-x86_64-machine-type.patch @@ -1,6 +1,6 @@ -From 9fc44f5b2b271ea3337f8e5eae3bc3d2f9d857f3 Mon Sep 17 00:00:00 2001 +From 8b509ce5c11a82d05ad0d43270e4b76e0010015b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" -Date: Wed, 8 Jul 2020 10:02:25 +0200 +Date: Fri, 19 Jun 2020 15:42:27 -0400 Subject: x86: Add 8.3.0 x86_64 machine type RH-Author: Dr. David Alan Gilbert @@ -17,7 +17,7 @@ From: "Dr. David Alan Gilbert" Not much change, just the smbase-smram. Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula --- hw/i386/pc.c | 6 ++++++ hw/i386/pc_piix.c | 4 ++++ @@ -26,7 +26,7 @@ Signed-off-by: Miroslav Rezanina 4 files changed, 35 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index fe37bde..94c857e 100644 +index fe37bdec7a..94c857ea97 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -360,6 +360,12 @@ GlobalProperty pc_rhel_compat[] = { @@ -43,7 +43,7 @@ index fe37bde..94c857e 100644 GlobalProperty pc_rhel_8_1_compat[] = { }; const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index eeadd89..f14ddcb 100644 +index eeadd896c2..f14ddcb472 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -1028,6 +1028,10 @@ static void pc_machine_rhel760_options(MachineClass *m) @@ -58,7 +58,7 @@ index eeadd89..f14ddcb 100644 compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 1cd4e15..b9e8dcb 100644 +index 1cd4e15297..b9e8dcb392 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -590,6 +590,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) @@ -100,7 +100,7 @@ index 1cd4e15..b9e8dcb 100644 DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 95d07f8..e67468b 100644 +index 95d07f81a0..e67468ba41 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -270,6 +270,9 @@ extern const size_t pc_compat_1_4_len; @@ -114,5 +114,5 @@ index 95d07f8..e67468b 100644 extern const size_t pc_rhel_8_1_compat_len; -- -1.8.3.1 +2.27.0 diff --git a/0028-hw-arm-Changes-to-rhel820-machine.patch b/0028-hw-arm-Changes-to-rhel820-machine.patch deleted file mode 100644 index 419610d..0000000 --- a/0028-hw-arm-Changes-to-rhel820-machine.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 2717baf34693fc9aa5fa7d1f2a5e8eb1677c1bc2 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 8 Jul 2020 10:02:25 +0200 -Subject: hw/arm: Changes to rhel820 machine - -RH-Author: Gavin Shan -Message-id: <20200630013648.101937-1-gshan@redhat.com> -Patchwork-id: 97844 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v5 1/3] hw/arm: Changes to rhel820 machine -Bugzilla: 1818843 - -This applies two changes to rhel820 machine: - - * Set the gic version to VIRT_GIC_VERSION_NOSEL by default, which - doesn't cause functional changes. - * Disallow to configure the RAS property, which is hidden by default. - -Signed-off-by: Gavin Shan -RH-Acked-by: Auger Eric -RH-Acked-by: Andrew Jones -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 17 ++++++----------- - 1 file changed, 6 insertions(+), 11 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 5a45677..53f02e0 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2090,6 +2090,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, - visit_type_OnOffAuto(v, name, &vms->acpi, errp); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_ras(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2103,6 +2104,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) - - vms->ras = value; - } -+#endif /* disabled for RHEL */ - - static char *virt_get_gic_version(Object *obj, Error **errp) - { -@@ -2760,12 +2762,8 @@ static void rhel820_virt_instance_init(Object *obj) - object_property_set_description(obj, "highmem", - "Set on/off to enable/disable using " - "physical address space above 32 bits"); -- /* -- * Default GIC type is still v2, but became configurable for RHEL. We -- * keep v2 instead of max as TCG CI test cases require an MSI controller -- * and there is no userspace ITS MSI emulation available. -- */ -- vms->gic_version = 2; -+ -+ vms->gic_version = VIRT_GIC_VERSION_NOSEL; - object_property_add_str(obj, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_property_set_description(obj, "gic-version", -@@ -2792,12 +2790,9 @@ static void rhel820_virt_instance_init(Object *obj) - object_property_set_description(obj, "iommu", - "Set the IOMMU type. " - "Valid values are none and smmuv3"); -+ - vms->ras = false; -- object_property_add_bool(obj, "ras", virt_get_ras, -- virt_set_ras); -- object_property_set_description(obj, "ras", -- "Set on/off to enable/disable reporting host memory errors " -- "to a KVM guest using ACPI and guest external abort exceptions"); -+ - vms->irqmap=a15irqmap; - virt_flash_create(vms); - } --- -1.8.3.1 - diff --git a/0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch b/0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch deleted file mode 100644 index c982e43..0000000 --- a/0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 91449aaf4ad482d9208c8861f549f7fe58af02ac Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 8 Jul 2020 10:02:25 +0200 -Subject: hw/arm: Introduce rhel_virt_instance_init() helper - -RH-Author: Gavin Shan -Message-id: <20200629022939.76453-3-gshan@redhat.com> -Patchwork-id: 97838 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v4 2/3] hw/arm: Introduce rhel_virt_instance_init() helper -Bugzilla: 1818843 -RH-Acked-by: Andrew Jones - -This introduces rhel_virt_instance_init() helper function so that -it can be shared by rhel820 and rhel830 machine. This shouldn't -cause functional changes. - -Signed-off-by: Gavin Shan -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 53f02e0..f26ae9f 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2746,7 +2746,7 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - --static void rhel820_virt_instance_init(Object *obj) -+static void rhel_virt_instance_init(Object *obj) - { - VirtMachineState *vms = VIRT_MACHINE(obj); - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); -@@ -2797,6 +2797,11 @@ static void rhel820_virt_instance_init(Object *obj) - virt_flash_create(vms); - } - -+static void rhel820_virt_instance_init(Object *obj) -+{ -+ rhel_virt_instance_init(obj); -+} -+ - static void rhel820_virt_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); --- -1.8.3.1 - diff --git a/0030-hw-arm-Add-rhel830-machine-type.patch b/0030-hw-arm-Add-rhel830-machine-type.patch deleted file mode 100644 index 748a6ba..0000000 --- a/0030-hw-arm-Add-rhel830-machine-type.patch +++ /dev/null @@ -1,58 +0,0 @@ -From afad5b1cdd3d29cd497cb9987fca3009fa352d40 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 8 Jul 2020 10:02:25 +0200 -Subject: hw/arm: Add rhel830 machine type - -RH-Author: Gavin Shan -Message-id: <20200630014756.102753-1-gshan@redhat.com> -Patchwork-id: 97845 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v5 3/3] hw/arm: Add rhel830 machine type -Bugzilla: 1818843 -RH-Acked-by: Andrew Jones - -This adds rhel830 machine type, whose properties are same as to -rhel820. - -Signed-off-by: Gavin Shan -RH-Acked-by: Auger Eric -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 17 +++++++++++++++-- - 1 file changed, 15 insertions(+), 2 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index f26ae9f..f1553f3 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2797,6 +2797,17 @@ static void rhel_virt_instance_init(Object *obj) - virt_flash_create(vms); - } - -+static void rhel830_virt_instance_init(Object *obj) -+{ -+ rhel_virt_instance_init(obj); -+} -+ -+static void rhel830_virt_options(MachineClass *mc) -+{ -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) -+ - static void rhel820_virt_instance_init(Object *obj) - { - rhel_virt_instance_init(obj); -@@ -2804,6 +2815,8 @@ static void rhel820_virt_instance_init(Object *obj) - - static void rhel820_virt_options(MachineClass *mc) - { -- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+ rhel830_virt_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, -+ hw_compat_rhel_8_2_len); - } --DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) -+DEFINE_RHEL_MACHINE(8, 2, 0) --- -1.8.3.1 - diff --git a/0031-redhat-define-pseries-rhel8.3.0-machine-type.patch b/0031-redhat-define-pseries-rhel8.3.0-machine-type.patch deleted file mode 100644 index b6d46d5..0000000 --- a/0031-redhat-define-pseries-rhel8.3.0-machine-type.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 3ccb92293ca895bc52907b36c8d2e8b6936ed975 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 8 Jul 2020 10:04:09 +0200 -Subject: redhat: define pseries-rhel8.3.0 machine type - -RH-Author: Laurent Vivier -Message-id: <20200706104117.219174-3-lvivier@redhat.com> -Patchwork-id: 97904 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 2/2] redhat: define pseries-rhel8.3.0 machine type -Bugzilla: 1853265 - -Note: rebase to qemu-5.1 introduces - - 32a354dc6c07 ("numa: forbid '-numa node, mem' for 5.1 and newer machine types") - -and so '-numa node, mem' will not be available with pseries-rhel8.3.0 - -Signed-off-by: Laurent Vivier -Signed-off-by: Miroslav Rezanina ---- - hw/ppc/spapr.c | 30 ++++++++++++++++++++++++++++-- - 1 file changed, 28 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index f30618e..dc1e9cb 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4990,15 +4990,41 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); - #endif - - /* -+ * pseries-rhel8.3.0 -+ * like pseries-5.1 -+ */ -+ -+static void spapr_machine_rhel830_class_options(MachineClass *mc) -+{ -+ /* Defaults for the latest behaviour inherited from the base class */ -+} -+ -+DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); -+ -+/* - * pseries-rhel8.2.0 -+ * like pseries-4.2 + pseries-5.0 -+ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 - */ - - static void spapr_machine_rhel820_class_options(MachineClass *mc) - { -- /* Defaults for the latest behaviour inherited from the base class */ -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel830_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, -+ hw_compat_rhel_8_2_len); -+ -+ /* from pseries-4.2 */ -+ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; -+ smc->rma_limit = 16 * GiB; -+ mc->nvdimm_supported = false; -+ -+ /* from pseries-5.0 */ -+ mc->numa_mem_supported = true; - } - --DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", true); -+DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); - - /* - * pseries-rhel8.1.0 --- -1.8.3.1 - diff --git a/kvm.modules b/kvm.modules new file mode 100644 index 0000000..b9d9646 --- /dev/null +++ b/kvm.modules @@ -0,0 +1,18 @@ +#!/bin/sh + +case $(uname -m) in + ppc64) + grep OPAL /proc/cpuinfo >/dev/null 2>&1 && opal=1 + + modprobe -b kvm >/dev/null 2>&1 + modprobe -b kvm-pr >/dev/null 2>&1 && kvm=1 + if [ "$opal" ]; then + modprobe -b kvm-hv >/dev/null 2>&1 + fi + ;; + s390x) + modprobe -b kvm >/dev/null 2>&1 && kvm=1 + ;; +esac + +exit 0 diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 1d955dd..0530b54 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -68,7 +68,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.0.0 -Release: 1%{?dist} +Release: 2%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -128,10 +128,6 @@ Patch0024: 0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch Patch0025: 0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch Patch0026: 0026-redhat-define-hw_compat_8_2.patch Patch0027: 0027-x86-Add-8.3.0-x86_64-machine-type.patch -Patch0028: 0028-hw-arm-Changes-to-rhel820-machine.patch -Patch0029: 0029-hw-arm-Introduce-rhel_virt_instance_init-helper.patch -Patch0030: 0030-hw-arm-Add-rhel830-machine-type.patch -Patch0031: 0031-redhat-define-pseries-rhel8.3.0-machine-type.patch BuildRequires: wget BuildRequires: rpm-build @@ -1083,13 +1079,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Tue Jul 14 2020 Danilo Cesar Lemes de Paula - 5.0.0-1.el8 -- This is an interational rebase, based on the weekly rebase. -- Altough not official yet, It contains fixes for the following BZs: +* Wed Jul 15 2020 Danilo Cesar Lemes de Paula - 5.0.0-2.el8 - Resolves: bz#1781911 - Resolves: bz#1841529 - Resolves: bz#1842902 -- Resolves: bz#1818843 + (This is an unofficial build that fixes the BZs mentioned above) + * Tue Jul 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-29.el8 - kvm-virtio-net-fix-removal-of-failover-device.patch [bz#1820120] From 2b808133e810bf8cacb715cee081aa211b4d6412 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 12 Aug 2020 11:53:34 -0400 Subject: [PATCH 083/195] Rebase to qemu-5.1.0 --- .gitignore | 1 + ...at-Adding-slirp-to-the-exploded-tree.patch | 135 ++++++++++-------- 0005-Initial-redhat-build.patch | 68 +++------ 0006-Enable-disable-devices-for-RHEL.patch | 91 ++++-------- ...Machine-type-related-general-changes.patch | 34 ++--- 0008-Add-aarch64-machine-types.patch | 47 ++++-- 0009-Add-ppc64-machine-types.patch | 38 ++--- 0010-Add-s390x-machine-types.patch | 10 +- 0011-Add-x86_64-machine-types.patch | 44 +++--- 0012-Enable-make-check.patch | 44 ++---- ...mber-of-devices-that-can-be-assigned.patch | 8 +- ...Add-support-statement-to-help-output.patch | 8 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 10 +- 0016-Add-support-for-simpletrace.patch | 6 +- ...documentation-instead-of-qemu-system.patch | 6 +- 0018-usb-xhci-Fix-PCI-capability-order.patch | 4 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 2 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 6 +- ...x-blockdev-reopen-API-with-feature-f.patch | 6 +- ...-vTPM-for-POWER-in-downstream-config.patch | 5 +- ...o-net-fix-removal-of-failover-device.patch | 52 ------- ...t-fix-5.0-rebase-missing-ISA-TPM-TIS.patch | 7 +- ... => 0024-redhat-define-hw_compat_8_2.patch | 8 +- ...25-x86-Add-8.3.0-x86_64-machine-type.patch | 23 +-- 0027-hw-arm-Changes-to-rhel820-machine.patch | 82 +++++++++++ ...oduce-rhel_virt_instance_init-helper.patch | 53 +++++++ 0029-hw-arm-Add-rhel830-machine-type.patch | 61 ++++++++ ...efine-pseries-rhel8.3.0-machine-type.patch | 77 ++++++++++ ...max_cpus-value-on-spapr-rhel-machine.patch | 49 +++++++ ...max_cpus-value-on-virt-rhel-machine-.patch | 53 +++++++ ...e-downstream-only-MAX_RHEL_CPUS-code.patch | 83 +++++++++++ 0034-q35-Set-max_cpus-to-512.patch | 45 ++++++ ...rt-Allow-the-TPM_TIS_SYSBUS-device-d.patch | 39 +++++ ...e-vTPM-for-ARM-in-downstream-configs.patch | 35 +++++ qemu-kvm.spec | 56 +++++--- sources | 2 +- 36 files changed, 900 insertions(+), 398 deletions(-) rename 0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch => 0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch (90%) delete mode 100644 0022-virtio-net-fix-removal-of-failover-device.patch rename 0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch => 0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch (86%) rename 0026-redhat-define-hw_compat_8_2.patch => 0024-redhat-define-hw_compat_8_2.patch (93%) rename 0027-x86-Add-8.3.0-x86_64-machine-type.patch => 0025-x86-Add-8.3.0-x86_64-machine-type.patch (86%) create mode 100644 0027-hw-arm-Changes-to-rhel820-machine.patch create mode 100644 0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch create mode 100644 0029-hw-arm-Add-rhel830-machine-type.patch create mode 100644 0030-redhat-define-pseries-rhel8.3.0-machine-type.patch create mode 100644 0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch create mode 100644 0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch create mode 100644 0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch create mode 100644 0034-q35-Set-max_cpus-to-512.patch create mode 100644 0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch create mode 100644 0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch diff --git a/.gitignore b/.gitignore index ba7d4aa..c3cbd56 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ /qemu-5.0.0-rc3.tar.xz /qemu-5.0.0-rc4.tar.xz /qemu-5.0.0.tar.xz +/qemu-5.1.0.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch index 60c33f1..d23a83d 100644 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -1,6 +1,6 @@ -From effec13a9f842205be3526bcfb15e2068c0067b0 Mon Sep 17 00:00:00 2001 +From a71208a6b42d0ef657b2f712d2f08d2ed40e7094 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Thu, 23 Apr 2020 05:26:54 +0200 +Date: Wed, 29 Jul 2020 07:48:57 +0200 Subject: redhat: Adding slirp to the exploded tree RH-Author: Danilo de Paula @@ -46,8 +46,8 @@ This is an exploded-tree-only change and shouldn't be applied to dist-git. Signed-off-by: Danilo C. L. de Paula -Rebase notes (5.0.0-rc4): - - Update slirp directory to commit 2faae0f778 (used upstream) +Rebase notes (5.1.0-rc2): + - Update slirp directory to commit ce94eba2042d52a0ba3d9e252ebce86715e94275 (used upstream) Signed-off-by: Danilo C. L. de Paula --- @@ -63,7 +63,7 @@ Signed-off-by: Danilo C. L. de Paula slirp/build-aux/git-version-gen | 158 ++++ slirp/build-aux/meson-dist | 16 + slirp/meson.build | 134 +++ - slirp/src/arp_table.c | 91 ++ + slirp/src/arp_table.c | 92 ++ slirp/src/bootp.c | 369 ++++++++ slirp/src/bootp.h | 129 +++ slirp/src/cksum.c | 179 ++++ @@ -75,11 +75,11 @@ Signed-off-by: Danilo C. L. de Paula slirp/src/if.h | 25 + slirp/src/ip.h | 242 +++++ slirp/src/ip6.h | 214 +++++ - slirp/src/ip6_icmp.c | 434 +++++++++ + slirp/src/ip6_icmp.c | 433 +++++++++ slirp/src/ip6_icmp.h | 219 +++++ - slirp/src/ip6_input.c | 78 ++ + slirp/src/ip6_input.c | 85 ++ slirp/src/ip6_output.c | 39 + - slirp/src/ip_icmp.c | 489 ++++++++++ + slirp/src/ip_icmp.c | 492 ++++++++++ slirp/src/ip_icmp.h | 166 ++++ slirp/src/ip_input.c | 461 +++++++++ slirp/src/ip_output.c | 169 ++++ @@ -92,13 +92,13 @@ Signed-off-by: Danilo C. L. de Paula slirp/src/misc.c | 390 ++++++++ slirp/src/misc.h | 72 ++ slirp/src/ncsi-pkt.h | 445 +++++++++ - slirp/src/ncsi.c | 192 ++++ + slirp/src/ncsi.c | 193 ++++ slirp/src/ndp_table.c | 87 ++ slirp/src/sbuf.c | 168 ++++ slirp/src/sbuf.h | 27 + slirp/src/slirp.c | 1185 ++++++++++++++++++++++++ - slirp/src/slirp.h | 283 ++++++ - slirp/src/socket.c | 957 +++++++++++++++++++ + slirp/src/slirp.h | 284 ++++++ + slirp/src/socket.c | 954 +++++++++++++++++++ slirp/src/socket.h | 164 ++++ slirp/src/state.c | 379 ++++++++ slirp/src/stream.c | 120 +++ @@ -111,9 +111,9 @@ Signed-off-by: Danilo C. L. de Paula slirp/src/tcp_timer.h | 130 +++ slirp/src/tcp_var.h | 161 ++++ slirp/src/tcpip.h | 104 +++ - slirp/src/tftp.c | 462 ++++++++++ - slirp/src/tftp.h | 52 ++ - slirp/src/udp.c | 361 ++++++++ + slirp/src/tftp.c | 464 ++++++++++ + slirp/src/tftp.h | 54 ++ + slirp/src/udp.c | 365 ++++++++ slirp/src/udp.h | 90 ++ slirp/src/udp6.c | 173 ++++ slirp/src/util.c | 428 +++++++++ @@ -121,7 +121,7 @@ Signed-off-by: Danilo C. L. de Paula slirp/src/version.c | 8 + slirp/src/vmstate.c | 444 +++++++++ slirp/src/vmstate.h | 391 ++++++++ - 70 files changed, 16423 insertions(+), 3 deletions(-) + 70 files changed, 16440 insertions(+), 3 deletions(-) create mode 100644 slirp/.clang-format create mode 100644 slirp/.gitignore create mode 100644 slirp/.gitlab-ci.yml @@ -880,10 +880,10 @@ index 0000000000..3a27149373 +) diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c new file mode 100644 -index 0000000000..054fbf5e10 +index 0000000000..959e5b9ec0 --- /dev/null +++ b/slirp/src/arp_table.c -@@ -0,0 +1,91 @@ +@@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: MIT */ +/* + * ARP table @@ -913,7 +913,8 @@ index 0000000000..054fbf5e10 + +#include + -+void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]) ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]) +{ + const uint32_t broadcast_addr = + ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; @@ -2033,7 +2034,7 @@ index 0000000000..d12c49b36c +#endif diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c new file mode 100644 -index 0000000000..e8f14e34bb +index 0000000000..55497e860e --- /dev/null +++ b/slirp/src/dnssearch.c @@ -0,0 +1,306 @@ @@ -2278,7 +2279,7 @@ index 0000000000..e8f14e34bb + uint8_t *result = NULL, *outptr; + CompactDomain *domains = NULL; + -+ num_domains = g_strv_length((GStrv)names); ++ num_domains = g_strv_length((GStrv)(void *)names); + if (num_domains == 0) { + return -2; + } @@ -3063,10 +3064,10 @@ index 0000000000..0630309d29 +#endif diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c new file mode 100644 -index 0000000000..28ec2bee35 +index 0000000000..d9c872bc97 --- /dev/null +++ b/slirp/src/ip6_icmp.c -@@ -0,0 +1,434 @@ +@@ -0,0 +1,433 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2013 @@ -3188,7 +3189,6 @@ index 0000000000..28ec2bee35 + break; + default: + g_assert_not_reached(); -+ break; + } + t->m_data += ICMP6_ERROR_MINLEN; + memcpy(t->m_data, m->m_data, error_data_len); @@ -3728,10 +3728,10 @@ index 0000000000..c37e60f28d +#endif diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c new file mode 100644 -index 0000000000..dfcbfd6a78 +index 0000000000..a83e4f8e3d --- /dev/null +++ b/slirp/src/ip6_input.c -@@ -0,0 +1,78 @@ +@@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 2013 @@ -3778,11 +3778,18 @@ index 0000000000..dfcbfd6a78 + goto bad; + } + -+ if (ntohs(ip6->ip_pl) > slirp->if_mtu) { ++ if (ntohs(ip6->ip_pl) + sizeof(struct ip6) > slirp->if_mtu) { + icmp6_send_error(m, ICMP6_TOOBIG, 0); + goto bad; + } + ++ // Check if the message size is big enough to hold what's ++ // set in the payload length header. If not this is an invalid ++ // packet ++ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { ++ goto bad; ++ } ++ + /* check ip_ttl for a correct ICMP reply */ + if (ip6->ip_hl == 0) { + icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); @@ -3857,10 +3864,10 @@ index 0000000000..b86110662c +} diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c new file mode 100644 -index 0000000000..fe0add438d +index 0000000000..13a0e55085 --- /dev/null +++ b/slirp/src/ip_icmp.c -@@ -0,0 +1,489 @@ +@@ -0,0 +1,492 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 1982, 1986, 1988, 1993 @@ -3938,8 +3945,11 @@ index 0000000000..fe0add438d + +void icmp_cleanup(Slirp *slirp) +{ -+ while (slirp->icmp.so_next != &slirp->icmp) { -+ icmp_detach(slirp->icmp.so_next); ++ struct socket *so, *so_next; ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ icmp_detach(so); + } +} + @@ -4137,8 +4147,8 @@ index 0000000000..fe0add438d + ip = mtod(msrc, struct ip *); + if (slirp_debug & DBG_MISC) { + char bufa[20], bufb[20]; -+ strcpy(bufa, inet_ntoa(ip->ip_src)); -+ strcpy(bufb, inet_ntoa(ip->ip_dst)); ++ slirp_pstrcpy(bufa, sizeof(bufa), inet_ntoa(ip->ip_src)); ++ slirp_pstrcpy(bufb, sizeof(bufb), inet_ntoa(ip->ip_dst)); + DEBUG_MISC(" %.16s to %.16s", bufa, bufb); + } + if (ip->ip_off & IP_OFFMASK) @@ -4524,7 +4534,7 @@ index 0000000000..84707db247 +#endif diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c new file mode 100644 -index 0000000000..89a01d45aa +index 0000000000..7f017a238a --- /dev/null +++ b/slirp/src/ip_input.c @@ -0,0 +1,461 @@ @@ -4984,7 +4994,7 @@ index 0000000000..89a01d45aa + olen = (ip->ip_hl << 2) - sizeof(struct ip); + opts = (char *)(ip + 1); + i = m->m_len - (sizeof(struct ip) + olen); -+ memcpy(opts, opts + olen, (unsigned)i); ++ memmove(opts, opts + olen, (unsigned)i); + m->m_len -= olen; + + ip->ip_hl = sizeof(struct ip) >> 2; @@ -6720,10 +6730,10 @@ index 0000000000..7795ad83ee +#endif /* NCSI_PKT_H */ diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c new file mode 100644 -index 0000000000..ddd980d869 +index 0000000000..3c1dfef1ff --- /dev/null +++ b/slirp/src/ncsi.c -@@ -0,0 +1,192 @@ +@@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * NC-SI (Network Controller Sideband Interface) "echo" model @@ -6862,7 +6872,8 @@ index 0000000000..ddd980d869 + +void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) +{ -+ struct ncsi_pkt_hdr *nh = (struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); ++ const struct ncsi_pkt_hdr *nh = ++ (const struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); + uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; + struct ethhdr *reh = (struct ethhdr *)ncsi_reply; + struct ncsi_rsp_pkt_hdr *rnh = @@ -7218,7 +7229,7 @@ index 0000000000..01886fbd01 +#endif diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c new file mode 100644 -index 0000000000..14458e8510 +index 0000000000..dba7c98163 --- /dev/null +++ b/slirp/src/slirp.c @@ -0,0 +1,1185 @@ @@ -7968,7 +7979,8 @@ index 0000000000..14458e8510 + +static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) +{ -+ struct slirp_arphdr *ah = (struct slirp_arphdr *)(pkt + ETH_HLEN); ++ const struct slirp_arphdr *ah = ++ (const struct slirp_arphdr *)(pkt + ETH_HLEN); + uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; + struct ethhdr *reh = (struct ethhdr *)arp_reply; + struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); @@ -8191,7 +8203,6 @@ index 0000000000..14458e8510 + + default: + g_assert_not_reached(); -+ break; + } + + memcpy(eh->h_dest, ethaddr, ETH_ALEN); @@ -8409,10 +8420,10 @@ index 0000000000..14458e8510 +} diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h new file mode 100644 -index 0000000000..32634bcc00 +index 0000000000..763a65b9ef --- /dev/null +++ b/slirp/src/slirp.h -@@ -0,0 +1,283 @@ +@@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +#ifndef SLIRP_H +#define SLIRP_H @@ -8513,7 +8524,8 @@ index 0000000000..32634bcc00 + int next_victim; +} ArpTable; + -+void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]); ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]); + +bool arp_table_search(Slirp *slirp, uint32_t ip_addr, + uint8_t out_ethaddr[ETH_ALEN]); @@ -8698,10 +8710,10 @@ index 0000000000..32634bcc00 +#endif diff --git a/slirp/src/socket.c b/slirp/src/socket.c new file mode 100644 -index 0000000000..4cd9a64b3c +index 0000000000..1e385df0d8 --- /dev/null +++ b/slirp/src/socket.c -@@ -0,0 +1,957 @@ +@@ -0,0 +1,954 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 1995 Danny Gasparovski. @@ -9255,7 +9267,6 @@ index 0000000000..4cd9a64b3c + break; + default: + g_assert_not_reached(); -+ break; + } + + /* @@ -9307,7 +9318,6 @@ index 0000000000..4cd9a64b3c + break; + default: + g_assert_not_reached(); -+ break; + } + m_free(m); + } else { @@ -9343,7 +9353,6 @@ index 0000000000..4cd9a64b3c + break; + default: + g_assert_not_reached(); -+ break; + } + } /* rx error */ + } /* if ping packet */ @@ -14316,10 +14325,10 @@ index 0000000000..d3df021493 +#endif diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c new file mode 100644 -index 0000000000..c209145282 +index 0000000000..c6950ee10f --- /dev/null +++ b/slirp/src/tftp.c -@@ -0,0 +1,462 @@ +@@ -0,0 +1,464 @@ +/* SPDX-License-Identifier: MIT */ +/* + * tftp.c - a simple, read-only tftp server for qemu @@ -14440,7 +14449,9 @@ index 0000000000..c209145282 + } + + if (len) { -+ lseek(spt->fd, block_nr * spt->block_size, SEEK_SET); ++ if (lseek(spt->fd, block_nr * spt->block_size, SEEK_SET) == (off_t)-1) { ++ return -1; ++ } + + bytes_read = read(spt->fd, buf, len); + } @@ -14784,16 +14795,18 @@ index 0000000000..c209145282 +} diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h new file mode 100644 -index 0000000000..c47bb43c7d +index 0000000000..6d75478e83 --- /dev/null +++ b/slirp/src/tftp.h -@@ -0,0 +1,52 @@ +@@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* tftp defines */ + +#ifndef SLIRP_TFTP_H +#define SLIRP_TFTP_H + ++#include "util.h" ++ +#define TFTP_SESSIONS_MAX 20 + +#define TFTP_SERVER 69 @@ -14822,7 +14835,7 @@ index 0000000000..c47bb43c7d + } tp_error; + char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; + } x; -+} __attribute__((packed)); ++} SLIRP_PACKED; + +struct tftp_session { + Slirp *slirp; @@ -14842,10 +14855,10 @@ index 0000000000..c47bb43c7d +#endif diff --git a/slirp/src/udp.c b/slirp/src/udp.c new file mode 100644 -index 0000000000..6bde20fafa +index 0000000000..0ad44d7c03 --- /dev/null +++ b/slirp/src/udp.c -@@ -0,0 +1,361 @@ +@@ -0,0 +1,365 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 @@ -14900,7 +14913,10 @@ index 0000000000..6bde20fafa + +void udp_cleanup(Slirp *slirp) +{ -+ while (slirp->udb.so_next != &slirp->udb) { ++ struct socket *so, *so_next; ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; + udp_detach(slirp->udb.so_next); + } +} @@ -15174,6 +15190,7 @@ index 0000000000..6bde20fafa + struct socket *so; + socklen_t addrlen = sizeof(struct sockaddr_in); + ++ memset(&addr, 0, sizeof(addr)); + so = socreate(slirp); + so->s = slirp_socket(AF_INET, SOCK_DGRAM, 0); + if (so->s < 0) { @@ -15484,7 +15501,7 @@ index 0000000000..6f9486bbca +} diff --git a/slirp/src/util.c b/slirp/src/util.c new file mode 100644 -index 0000000000..570c53f303 +index 0000000000..d3ed5faf8b --- /dev/null +++ b/slirp/src/util.c @@ -0,0 +1,428 @@ @@ -15882,7 +15899,7 @@ index 0000000000..570c53f303 + rv = slirp_vsnprintf(str, size, format, args); + va_end(args); + -+ if (rv > size) { ++ if (rv >= size) { + g_critical("slirp_fmt() truncation"); + } + diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index c2d0dad..c3341be 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 9f51ea3d645d7da3eaf55f3e5cedc4cd42981efc Mon Sep 17 00:00:00 2001 +From cf7532e0c854b385ee7acdf5788bc407172f7ae9 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -47,52 +47,24 @@ Rebase notes (4.2.0): - Removed spapr-rtas.bin (upstream) - Require newer SLOF (20191022) -Rebase notes (weekly-200115): -- Added index.html (upstream) - -Rebase notes (weekly-200122): +Rebase notes (5.1.0): - Use python3 for virtio_seg_max_adjust.py test - Removed qemu-trace-stap shebang from spec file - -Rebase notes (weekly-200129): -- Ship docs/qemu-kvm/system help files (added upstream) - -Rebase notes (weekly-200212): - Added virtiofsd.1 (upstream) - -Rebase notes (weekly-200219): - Use out-of-tree build - -Rebase notes (weekly-200226): -- added tools documentation (upstream) - -Rebase notes (weekly-200304): +- New documentation structure (upstream) - Update local build - -Rebase notes (weekly-200311): -- Add docs/qemu-kvm/user help files (added upstream) - Removing installed qemu-storage-daemon (added upstream) - -Rebase notes (weekly-200318): - Removing opensbi-riscv32-sifive_u-fw_jump.bin (added upstream) - -Rebase notes (weekly-200325): - Disable iotests (moved from Enable make check commit) - -Rebase notes (5.0.0 rc2): - Added missing configure options - -Rebase notes (5.0.0 rc3): - Reorder configure options - -Rebase notes (weekly-200617): -- Fixing qemu-kvm-tests rpm content - - Fixed python shenigans - - Include all qemu-iotests files - qemu-pr-helper moved to /usr/libexec/ (upstream) - -Rebase notes (weekly-200701): -- Moved qemu-ga-ref.* and qemu-qmp-ref.* docs files to interop diretory (upstream) +- Added submodules for usb-redir, smartcard-reader and qxl display (upstream) +- Added setting rc version in Makefile for build +- removed --disable-vxhs configure option (removed upstream) +- bumped required libusbx-devel version to 1.0.23 +- bumped libfdt version to 1.6.0 Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file @@ -116,15 +88,11 @@ Merged patches (4.2.0): - 69e1fb2 enable virgla - d4f6115 enable virgl, for real this time ... -Merged patches (weekly-200205): +Merged patches (5.1.0): - 5edf6bd Add support for rh-brew-module - f77d52d redhat: ship virtiofsd vhost-user device backend - -Merged patches (weekly-200610): - 63f12d4 redhat: Always use module build target for rh-brew (modified) - 9b1e140 redhat: updating the modular target - -Merged patches (weekly-200617): - 44b8bd0 spec: Fix python shenigans for tests Signed-off-by: Danilo C. L. de Paula @@ -133,22 +101,22 @@ Signed-off-by: Danilo C. L. de Paula Makefile | 3 +- configure | 1 + redhat/Makefile | 86 + - redhat/Makefile.common | 53 + + redhat/Makefile.common | 54 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2961 +++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 2977 +++++++++++++++++++++++++++ redhat/qemu-pr-helper.service | 2 +- redhat/scripts/extract_build_cmd.py | 2 +- - redhat/scripts/process-patches.sh | 7 +- + redhat/scripts/process-patches.sh | 17 +- tests/check-block.sh | 2 + ui/vnc.c | 2 +- - 12 files changed, 3150 insertions(+), 9 deletions(-) + 12 files changed, 3173 insertions(+), 13 deletions(-) create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template diff --git a/Makefile b/Makefile -index b1b8a5a6d0..88b7565246 100644 +index 13dd708c4a..42e854b2b1 100644 --- a/Makefile +++ b/Makefile @@ -549,6 +549,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM @@ -159,7 +127,7 @@ index b1b8a5a6d0..88b7565246 100644 .PHONY: capstone/all capstone/all: .git-submodule-status -@@ -878,7 +879,7 @@ install-doc: $(DOCS) install-sphinxdocs +@@ -879,7 +880,7 @@ install-doc: $(DOCS) install-sphinxdocs $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)/interop" ifdef CONFIG_POSIX $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" @@ -169,10 +137,10 @@ index b1b8a5a6d0..88b7565246 100644 $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" diff --git a/configure b/configure -index ee6c3c6792..bd957a5a26 100755 +index 2acc4d1465..eb5b695dbe 100755 --- a/configure +++ b/configure -@@ -2610,6 +2610,7 @@ if test "$seccomp" != "no" ; then +@@ -2633,6 +2633,7 @@ if test "$seccomp" != "no" ; then seccomp="no" fi fi @@ -194,7 +162,7 @@ index 8e29c868e5..e9bcb5ac27 100755 ret=0 diff --git a/ui/vnc.c b/ui/vnc.c -index 527ad25124..8099c0ee13 100644 +index f006aa1afd..992f428fec 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -3970,7 +3970,7 @@ void vnc_display_open(const char *id, Error **errp) diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index 12b0b4a..5b44b6b 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From c956c1d145adb87a8a830bf6091dac80b7925054 Mon Sep 17 00:00:00 2001 +From 3c93dbb29fed4f555904494efe9b823310a14604 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Jan 2016 11:53:33 +0100 Subject: Enable/disable devices for RHEL @@ -39,17 +39,13 @@ Rebase notes (4.2.0-rc3): - Disabled ccid-card-emulated (patch 92566) - Disabled vfio-pci-igd-lpc-bridge (patch 92565) -Rebase notes (weekly-200205): +Rebase notes (5.1.0): - added CONFIG_PCI_EXPRESS on ppc64 (due to upstream dependency) - -Rebase notes (weekly-200226): - Added CONFIG_NVDIMM - -Rebase notes (weekly-200513) - updated cortex-15 disabling to upstream code - -Rebase notes (weekly-200520): - Add CONFIG_ACPI_APEI for aarch64 +- removed obsolete hw/bt/Makefile.objs chunk +- removed unnecessary changes in target/i386/cpu.c Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV @@ -69,13 +65,9 @@ Merged patches (4.1.0): Merged patches (4.2.0): - f7587dd RHEL: disable hostmem-memfd -Merged patches (weekly-200108): +Merged patches (weekly-5.1.0): - 4543a3c i386: Remove cpu64-rhel6 CPU model - -Merged patches (weekly-200520): - 96533 aarch64: Remove tcg cpu types (pjw commit) - -Merged patches (weekly-200610): - 559d589 Revert "RHEL: disable hostmem-memfd" - 441128e enable ramfb @@ -93,7 +85,6 @@ Signed-off-by: Danilo C. L. de Paula hw/acpi/ich9.c | 4 +- hw/arm/Makefile.objs | 2 +- hw/block/fdc.c | 10 +++ - hw/bt/Makefile.objs | 3 + hw/cpu/Makefile.objs | 5 +- hw/display/cirrus_vga.c | 3 + hw/ide/piix.c | 5 +- @@ -106,17 +97,16 @@ Signed-off-by: Danilo C. L. de Paula softmmu/vl.c | 2 +- target/arm/cpu.c | 4 +- target/arm/cpu_tcg.c | 3 + - target/i386/cpu.c | 17 +++-- + target/i386/cpu.c | 1 + target/ppc/cpu-models.c | 10 +++ target/s390x/cpu_models.c | 3 + target/s390x/kvm.c | 8 ++ - 29 files changed, 274 insertions(+), 33 deletions(-) + 28 files changed, 263 insertions(+), 25 deletions(-) create mode 100644 default-configs/aarch64-rh-devices.mak create mode 100644 default-configs/ppc64-rh-devices.mak create mode 100644 default-configs/rh-virtio.mak create mode 100644 default-configs/s390x-rh-devices.mak create mode 100644 default-configs/x86_64-rh-devices.mak - create mode 100644 hw/bt/Makefile.objs diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak new file mode 100644 @@ -394,7 +384,7 @@ index 64b2ee2960..b5de7e5279 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 2d204babc6..7ead029e68 100644 +index 6a19070cec..bb8379f6a7 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) @@ -422,7 +412,7 @@ index 534a6a119e..bd62442b54 100644 obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o obj-$(CONFIG_DIGIC) += digic.o diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 3425d56e2a..f4ae220905 100644 +index e9ed3eef45..965528a512 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -47,6 +47,8 @@ @@ -434,7 +424,7 @@ index 3425d56e2a..f4ae220905 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2625,6 +2627,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, +@@ -2621,6 +2623,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, int i, j; static int command_tables_inited = 0; @@ -449,15 +439,6 @@ index 3425d56e2a..f4ae220905 100644 if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; -diff --git a/hw/bt/Makefile.objs b/hw/bt/Makefile.objs -new file mode 100644 -index 0000000000..e678e9ee3c ---- /dev/null -+++ b/hw/bt/Makefile.objs -@@ -0,0 +1,3 @@ -+#common-obj-y += core.o l2cap.o sdp.o hci.o hid.o -+#common-obj-y += hci-csr.o -+ diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs index 8db9e8a7b3..1601ea93c7 100644 --- a/hw/cpu/Makefile.objs @@ -542,10 +523,10 @@ index a18f80e369..960b2f00ee 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 26ad566f42..bfdf6b3e5c 100644 +index c4f47dcc04..6a2239d5e5 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -396,10 +396,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -389,10 +389,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -559,25 +540,25 @@ index 26ad566f42..bfdf6b3e5c 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs -index fa5c3fa1b8..854094c4f2 100644 +index e342ff59fa..abf044a20f 100644 --- a/hw/usb/Makefile.objs +++ b/hw/usb/Makefile.objs -@@ -30,7 +30,9 @@ common-obj-$(CONFIG_USB_NETWORK) += dev-network.o - ifeq ($(CONFIG_USB_SMARTCARD),y) +@@ -31,7 +31,9 @@ ifeq ($(CONFIG_USB_SMARTCARD),y) common-obj-y += dev-smartcard-reader.o - common-obj-$(CONFIG_SMARTCARD) += smartcard.mo + ifeq ($(CONFIG_SMARTCARD),y) + common-obj-m += smartcard.mo -smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o +# Disabled for Red Hat Enterprise Linux: -+# smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o ++#smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o +smartcard.mo-objs := ccid-card-passthru.o smartcard.mo-cflags := $(SMARTCARD_CFLAGS) smartcard.mo-libs := $(SMARTCARD_LIBS) endif diff --git a/qemu-options.hx b/qemu-options.hx -index fa1b19de4c..b9699440a3 100644 +index 708583b4ce..1700205035 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2248,10 +2248,6 @@ ERST +@@ -2257,10 +2257,6 @@ ERST DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) @@ -589,7 +570,7 @@ index fa1b19de4c..b9699440a3 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" diff --git a/softmmu/vl.c b/softmmu/vl.c -index 9da2e23144..6509057752 100644 +index 4eb9d1f7fd..a1fb06dca0 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -145,7 +145,7 @@ static Chardev **serial_hds; @@ -602,10 +583,10 @@ index 9da2e23144..6509057752 100644 static int no_reboot; int no_shutdown = 0; diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 5050e1843a..79adfe25c4 100644 +index 111579554f..13ad40aa7d 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2335,7 +2335,9 @@ static void arm_cpu_register_types(void) +@@ -2338,7 +2338,9 @@ static void arm_cpu_register_types(void) type_register_static(&idau_interface_type_info); for (i = 0; i < cpu_count; ++i) { @@ -631,10 +612,10 @@ index 00b0e08f33..94d429b61c 100644 arm_cpu_register(&arm_tcg_cpus[i]); } diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 36cbd3d027..55a30cd4d9 100644 +index 588f32e136..030a5a09ed 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1825,20 +1825,21 @@ static CPUCaches epyc_rome_cache_info = { +@@ -1825,6 +1825,7 @@ static CPUCaches epyc_rome_cache_info = { static X86CPUDefinition builtin_x86_defs[] = { { @@ -642,28 +623,6 @@ index 36cbd3d027..55a30cd4d9 100644 .name = "qemu64", .level = 0xd, .vendor = CPUID_VENDOR_AMD, - .family = 6, - .model = 6, - .stepping = 3, -- .features[FEAT_1_EDX] = -- PPRO_FEATURES | -- CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | -- CPUID_PSE36, -- .features[FEAT_1_ECX] = -- CPUID_EXT_SSE3 | CPUID_EXT_CX16, -- .features[FEAT_8000_0001_EDX] = -- CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, -+ .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | -+ CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -+ CPUID_MCA | CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | -+ CPUID_CX8 | CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | -+ CPUID_PSE | CPUID_DE | CPUID_FP87, -+ .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, -+ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | -+ CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM, - .xlevel = 0x8000000A, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c index 4ad16863c0..16b2185fd8 100644 --- a/target/ppc/cpu-models.c @@ -735,7 +694,7 @@ index 4ad16863c0..16b2185fd8 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 2fa609bffe..f6bee3204c 100644 +index c2af226174..e35bf745dd 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index 467366b..32ede92 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From cbbdd67535fc6da1b77b2fa9f5368f72c211eeb1 Mon Sep 17 00:00:00 2001 +From a86a622ecc7b0b3c66d21fdd1c5dd279bfc75a03 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -70,7 +70,7 @@ Signed-off-by: Danilo C. L. de Paula 23 files changed, 354 insertions(+), 11 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 7ead029e68..3b8501fa38 100644 +index bb8379f6a7..43ad1ff927 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -103,7 +103,7 @@ index 7ead029e68..3b8501fa38 100644 &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 283422e0d3..f6642011c6 100644 +index 26bac4f16c..7c2e17ceb6 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -276,6 +276,7 @@ static const VMStateDescription vmstate_acpi = { @@ -126,7 +126,7 @@ index 283422e0d3..f6642011c6 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 7d9f7157da..e30b837135 100644 +index ecfee362a1..43cf75333b 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1450,7 +1450,7 @@ static void virt_build_smbios(VirtMachineState *vms) @@ -139,7 +139,7 @@ index 7d9f7157da..e30b837135 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len); diff --git a/hw/char/serial.c b/hw/char/serial.c -index 9eebcb27e7..11d2d08912 100644 +index 2386479492..c83f816f42 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -35,6 +35,7 @@ @@ -194,7 +194,7 @@ index 9eebcb27e7..11d2d08912 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 211b4e077a..ef6b320ea7 100644 +index 8d1a90c6cf..2b8e480040 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,6 +28,176 @@ @@ -372,8 +372,8 @@ index 211b4e077a..ef6b320ea7 100644 +const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + GlobalProperty hw_compat_5_0[] = { + { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, { "virtio-balloon-device", "page-poison", "false" }, - { "vmport", "x-read-set-eax", "off" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 3aaeeeca1e..d88f52a587 100644 --- a/hw/display/vga-isa.c @@ -388,7 +388,7 @@ index 3aaeeeca1e..d88f52a587 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index fae487f57d..18815e8302 100644 +index b789e83f9a..0ecdd57689 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -179,6 +179,8 @@ static void pc_init1(MachineState *machine, @@ -401,7 +401,7 @@ index fae487f57d..18815e8302 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index acd6d405f0..a1131e6825 100644 +index a3e607a544..b8ea764ce3 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -204,6 +204,8 @@ static void pc_q35_init(MachineState *machine) @@ -535,7 +535,7 @@ index 7a38540cb9..377d861913 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index ffd98727ee..7818b90c96 100644 +index f560826904..8875e83941 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -56,6 +56,9 @@ static bool smbios_legacy = true; @@ -650,7 +650,7 @@ index 37f7beb3fa..2741edc589 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index b330e36fe6..b25cce8f0c 100644 +index 67a18fe2b6..38bdfaf3fd 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3600,9 +3600,27 @@ static const VMStateDescription vmstate_xhci_slot = { @@ -767,10 +767,10 @@ index 02a0ced0a0..67e38a1b13 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index dce1273c7d..665c4309a2 100644 +index 3d7ed3a55e..951e825778 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -102,6 +102,9 @@ typedef struct PCMachineClass { +@@ -103,6 +103,9 @@ typedef struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; @@ -794,10 +794,10 @@ index e29a37635b..35ac38c459 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 92e44e021e..67d3b75485 100644 +index 8fe36339db..bf684185b7 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -128,6 +128,8 @@ enum mig_rp_message_type { +@@ -129,6 +129,8 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; @@ -807,10 +807,10 @@ index 92e44e021e..67d3b75485 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index f617960522..b8bc10d16d 100644 +index 6c6a931d0d..721e272713 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -339,6 +339,11 @@ void init_dirty_bitmap_incoming_migration(void); +@@ -340,6 +340,11 @@ void dirty_bitmap_mig_cancel_incoming(void); void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 9e569fb..7d60da2 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 1f0568a29b004dd3557d5405fddc6979ac5ff911 Mon Sep 17 00:00:00 2001 +From 0ba70804c179d934e5be555abff3c4455ac137a0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -35,6 +35,9 @@ Rebase notes (weekly-200520): Rebase notes (weekly-200701): - Added to virt_machine_device_unplug_cb to machine type (upstream) +Rebase notes (5.1.0-rc1): +- added mte property (upstream) + Merged patches (4.0.0): - 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM - 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 @@ -46,12 +49,12 @@ Merged patches (4.1.0): Signed-off-by: Danilo C. L. de Paula --- - hw/arm/virt.c | 172 +++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 180 +++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 11 +++ - 2 files changed, 182 insertions(+), 1 deletion(-) + 2 files changed, 188 insertions(+), 3 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e30b837135..5a45677205 100644 +index 43cf75333b..e1a17e7c87 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -79,6 +79,7 @@ @@ -113,7 +116,7 @@ index e30b837135..5a45677205 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1964,6 +2007,7 @@ static void machvirt_init(MachineState *machine) +@@ -1979,6 +2022,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -121,7 +124,7 @@ index e30b837135..5a45677205 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -1992,6 +2036,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2007,6 +2051,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -129,7 +132,25 @@ index e30b837135..5a45677205 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2367,6 +2412,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2073,7 +2118,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) + + vms->ras = value; + } +- ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_mte(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2087,7 +2132,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) + + vms->mte = value; + } +- ++#endif + static char *virt_get_gic_version(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2401,6 +2446,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return requested_pa_size > 40 ? requested_pa_size : 0; } @@ -137,7 +158,7 @@ index e30b837135..5a45677205 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2637,3 +2683,127 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2679,3 +2725,131 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -256,6 +277,10 @@ index e30b837135..5a45677205 100644 + object_property_set_description(obj, "ras", + "Set on/off to enable/disable reporting host memory errors " + "to a KVM guest using ACPI and guest external abort exceptions"); ++ ++ /* MTE is disabled by default. */ ++ vms->mte = false; ++ + vms->irqmap=a15irqmap; + virt_flash_create(vms); +} @@ -266,10 +291,10 @@ index e30b837135..5a45677205 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 54bcf17afd..5fdabd87d6 100644 +index dff67e1bef..7c1e085749 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -165,6 +165,7 @@ typedef struct { +@@ -166,6 +166,7 @@ typedef struct { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -277,7 +302,7 @@ index 54bcf17afd..5fdabd87d6 100644 #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") #define VIRT_MACHINE(obj) \ OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -173,6 +174,16 @@ typedef struct { +@@ -174,6 +175,16 @@ typedef struct { #define VIRT_MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index c8e6c81..cca2073 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 59c55f3104aa650b5b2a31150a34646cc7018b77 Mon Sep 17 00:00:00 2001 +From da49e223e70695ec4ecc5668658d836346e7f29c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -44,10 +44,10 @@ Signed-off-by: Danilo C. L. de Paula 5 files changed, 307 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index f6f034d039..f30618e4b1 100644 +index 0ae293ec94..756c8667c1 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4530,6 +4530,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4529,6 +4529,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->smp_threads_vsmt = true; smc->nr_xirqs = SPAPR_NR_XIRQS; xfc->match_nvt = spapr_match_nvt; @@ -55,7 +55,7 @@ index f6f034d039..f30618e4b1 100644 } static const TypeInfo spapr_machine_info = { -@@ -4580,6 +4581,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4579,6 +4580,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) @@ -63,7 +63,7 @@ index f6f034d039..f30618e4b1 100644 /* * pseries-5.1 */ -@@ -4638,6 +4640,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4644,6 +4646,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -71,7 +71,7 @@ index f6f034d039..f30618e4b1 100644 /* * pseries-4.0 -@@ -4654,6 +4657,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4660,6 +4663,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -79,7 +79,7 @@ index f6f034d039..f30618e4b1 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4813,6 +4817,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4819,6 +4823,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ @@ -87,7 +87,7 @@ index f6f034d039..f30618e4b1 100644 static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4867,6 +4872,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, +@@ -4873,6 +4878,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -95,7 +95,7 @@ index f6f034d039..f30618e4b1 100644 static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4981,6 +4987,280 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4987,6 +4993,280 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -377,7 +377,7 @@ index f6f034d039..f30618e4b1 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index bfdf6b3e5c..39fcaf855b 100644 +index 6a2239d5e5..f228f8bb75 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -394,9 +394,9 @@ index bfdf6b3e5c..39fcaf855b 100644 Error *local_err = NULL; + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); - qdev_realize(DEVICE(cpu), NULL, &local_err); - if (local_err) { -@@ -250,6 +252,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + if (!qdev_realize(DEVICE(cpu), NULL, errp)) { + return; +@@ -249,6 +251,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -412,22 +412,22 @@ index bfdf6b3e5c..39fcaf855b 100644 + } + if (spapr_irq_cpu_intc_create(spapr, cpu, &local_err) < 0) { - goto error_intc_create; - } + cpu_remove_sync(CPU(cpu)); + return; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index c421410e3f..5190d6a936 100644 +index 3134d339e8..f48089edba 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -130,6 +130,7 @@ struct SpaprMachineClass { - bool smp_threads_vsmt; /* set VSMT to smp_threads by default */ +@@ -131,6 +131,7 @@ struct SpaprMachineClass { hwaddr rma_limit; /* clamp the RMA to this size */ + bool pre_5_1_assoc_refpoints; + bool has_power9_support; void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index fda0dfe8f8..ab8e3b2125 100644 +index 08aede88dc..b193445aca 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index d363b4b..0d4e3ef 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 92594368e1369e85d1b87dd1a65408a4f594cf09 Mon Sep 17 00:00:00 2001 +From dc0914b5e43a9925217af8ddde44194176822108 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -27,10 +27,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 023fd25f2b..0e8dd62b18 100644 +index e72c61d2ea..a6a37cce94 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -778,7 +778,7 @@ bool css_migration_enabled(void) +@@ -777,7 +777,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -39,7 +39,7 @@ index 023fd25f2b..0e8dd62b18 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = true; \ -@@ -802,6 +802,7 @@ bool css_migration_enabled(void) +@@ -801,6 +801,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) @@ -47,7 +47,7 @@ index 023fd25f2b..0e8dd62b18 100644 static void ccw_machine_5_1_instance_options(MachineState *machine) { } -@@ -1054,6 +1055,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1053,6 +1054,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index 45c1f1b..5f827db 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 31a74446c5312b248bbc6093f21aee3500b96fca Mon Sep 17 00:00:00 2001 +From 45d5ef7594e5b07d3a975feea8e72541402a797f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -70,10 +70,10 @@ index b7bcbbbb2a..fe815c5403 100644 } diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 61acc9e530..fe37bdec7a 100644 +index 47c5ca3e34..063f01d19a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -346,6 +346,263 @@ GlobalProperty pc_compat_1_4[] = { +@@ -347,6 +347,263 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -337,7 +337,7 @@ index 61acc9e530..fe37bdec7a 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1011,7 +1268,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -1012,7 +1269,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -347,7 +347,7 @@ index 61acc9e530..fe37bdec7a 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -1969,6 +2227,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1956,6 +2214,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->linuxboot_dma_enabled = true; pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); @@ -356,7 +356,7 @@ index 61acc9e530..fe37bdec7a 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1981,7 +2241,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1968,7 +2228,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->hot_add_cpu = pc_hot_add_cpu; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -367,7 +367,7 @@ index 61acc9e530..fe37bdec7a 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 18815e8302..eeadd896c2 100644 +index 0ecdd57689..6d935645b6 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -54,6 +54,7 @@ @@ -389,7 +389,7 @@ index 18815e8302..eeadd896c2 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -314,6 +315,7 @@ static void pc_init1(MachineState *machine, +@@ -316,6 +317,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -397,7 +397,7 @@ index 18815e8302..eeadd896c2 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -992,3 +994,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -994,3 +996,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -606,7 +606,7 @@ index 18815e8302..eeadd896c2 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index a1131e6825..1cd4e15297 100644 +index b8ea764ce3..f4edb049d6 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -201,8 +201,8 @@ static void pc_q35_init(MachineState *machine) @@ -620,7 +620,7 @@ index a1131e6825..1cd4e15297 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -337,6 +337,7 @@ static void pc_q35_init(MachineState *machine) +@@ -336,6 +336,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -628,7 +628,7 @@ index a1131e6825..1cd4e15297 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -565,3 +566,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -564,3 +565,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -803,10 +803,10 @@ index 1062df96c0..c3577319c0 100644 int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 665c4309a2..95d07f81a0 100644 +index 951e825778..156be22995 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -120,6 +120,9 @@ typedef struct PCMachineClass { +@@ -121,6 +121,9 @@ typedef struct PCMachineClass { /* use PVH to load kernels that support this feature */ bool pvh_enabled; @@ -816,7 +816,7 @@ index 665c4309a2..95d07f81a0 100644 } PCMachineClass; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -264,6 +267,36 @@ extern const size_t pc_compat_1_5_len; +@@ -265,6 +268,36 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; @@ -854,7 +854,7 @@ index 665c4309a2..95d07f81a0 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 55a30cd4d9..5477433af5 100644 +index 030a5a09ed..cdaa1463f2 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1830,7 +1830,7 @@ static X86CPUDefinition builtin_x86_defs[] = { @@ -864,9 +864,9 @@ index 55a30cd4d9..5477433af5 100644 - .model = 6, + .model = 13, .stepping = 3, - .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | - CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | -@@ -4114,6 +4114,7 @@ static PropValue kvm_default_props[] = { + .features[FEAT_1_EDX] = + PPRO_FEATURES | +@@ -4142,6 +4142,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -875,10 +875,10 @@ index 55a30cd4d9..5477433af5 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 6adbff3d74..51fbfd528e 100644 +index 6f18d940a5..98249b2e3b 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -3139,6 +3139,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3135,6 +3135,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -886,7 +886,7 @@ index 6adbff3d74..51fbfd528e 100644 kvm_msr_buf_reset(cpu); -@@ -3448,6 +3449,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3444,6 +3445,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index 52af98e..1ce3d04 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 93f9f7beccd34102c5c8e25c3b2b3888c61aa063 Mon Sep 17 00:00:00 2001 +From 69de445124e71df949a3d5289f8a10a5f771bac2 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:48:41 +0200 Subject: Enable make check @@ -28,6 +28,9 @@ Rebase changes (weekly-200129): Rebased changes (weekly-200212): - Do not run iotests on make checka +Rebase changes (5.1.0-rc1): +- Enabled iotests 071 and 099 + Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce @@ -38,7 +41,6 @@ Signed-off-by: Danilo C. L. de Paula --- redhat/qemu-kvm.spec.template | 2 +- tests/qemu-iotests/051 | 12 ++++++------ - tests/qemu-iotests/group | 4 ++-- tests/qtest/Makefile.include | 12 ++++++------ tests/qtest/boot-serial-test.c | 6 +++++- tests/qtest/cpu-plug-test.c | 4 ++-- @@ -47,10 +49,10 @@ Signed-off-by: Danilo C. L. de Paula tests/qtest/prom-env-test.c | 4 ++++ tests/qtest/test-x86-cpuid-compat.c | 2 ++ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - 11 files changed, 38 insertions(+), 18 deletions(-) + 10 files changed, 36 insertions(+), 16 deletions(-) diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 034d3a3250..aadc413ee6 100755 +index bee26075b2..61d25c4ed7 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -183,11 +183,11 @@ run_qemu -drive if=virtio @@ -83,30 +85,8 @@ index 034d3a3250..aadc413ee6 100755 ;; *) ;; -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 9b07a7ed03..c678d915b2 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -92,7 +92,7 @@ - 068 rw quick - 069 rw auto quick - 070 rw quick --071 rw auto quick -+# 071 rw auto quick -- requires whitelisted blkverify - 072 rw auto quick - 073 rw auto quick - 074 rw auto quick -@@ -120,7 +120,7 @@ - 096 rw quick - 097 rw auto backing - 098 rw auto backing quick --099 rw auto quick -+# 099 rw auto quick -- requires whitelisted blkverify - # 100 was removed, do not reuse - 101 rw quick - 102 rw quick diff --git a/tests/qtest/Makefile.include b/tests/qtest/Makefile.include -index 98af2c2d93..d370459c3a 100644 +index b0204e44f2..cf8a138791 100644 --- a/tests/qtest/Makefile.include +++ b/tests/qtest/Makefile.include @@ -29,7 +29,7 @@ check-qtest-i386-y += ide-test @@ -127,7 +107,7 @@ index 98af2c2d93..d370459c3a 100644 check-qtest-i386-y += vmgenid-test check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-swtpm-test check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-test -@@ -88,7 +88,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += display-vga-test +@@ -90,7 +90,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += display-vga-test check-qtest-moxie-y += boot-serial-test check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = endianness-test @@ -136,7 +116,7 @@ index 98af2c2d93..d370459c3a 100644 check-qtest-ppc-y += prom-env-test check-qtest-ppc-y += drive_del-test check-qtest-ppc-y += boot-serial-test -@@ -102,8 +102,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += rtas-test +@@ -104,8 +104,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += rtas-test check-qtest-ppc64-$(CONFIG_SLIRP) += pxe-test check-qtest-ppc64-$(CONFIG_USB_UHCI) += usb-hcd-uhci-test check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += usb-hcd-xhci-test @@ -147,7 +127,7 @@ index 98af2c2d93..d370459c3a 100644 check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += test-filter-redirector check-qtest-ppc64-$(CONFIG_VGA) += display-vga-test check-qtest-ppc64-y += numa-test -@@ -152,7 +152,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += test-netfilter +@@ -154,7 +154,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += test-netfilter check-qtest-s390x-$(CONFIG_POSIX) += test-filter-mirror check-qtest-s390x-$(CONFIG_POSIX) += test-filter-redirector check-qtest-s390x-y += drive_del-test @@ -157,10 +137,10 @@ index 98af2c2d93..d370459c3a 100644 check-qtest-s390x-y += cpu-plug-test check-qtest-s390x-y += migration-test diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c -index 85a3614286..1c18441334 100644 +index bfe7624dc6..1ffaa0bd7e 100644 --- a/tests/qtest/boot-serial-test.c +++ b/tests/qtest/boot-serial-test.c -@@ -109,19 +109,23 @@ static testdef_t tests[] = { +@@ -120,19 +120,23 @@ static testdef_t tests[] = { { "ppc", "g3beige", "", "PowerPC,750" }, { "ppc", "mac99", "", "PowerPC,G4" }, { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index cbf42e0..428a3f9 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From ac9ff21203ff07854c232464d96e1f35741af8a6 Mon Sep 17 00:00:00 2001 +From 08d751996b6c983d037134551f1c4b4691989168 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -42,7 +42,7 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d020ea9f82..e4292ea2b4 100644 +index 2e561c06d6..be1b843e7f 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -47,6 +47,9 @@ @@ -55,7 +55,7 @@ index d020ea9f82..e4292ea2b4 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2715,9 +2718,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2712,9 +2715,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -87,7 +87,7 @@ index d020ea9f82..e4292ea2b4 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3162,6 +3186,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3158,6 +3182,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index 2ef2119..2e97d83 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 0f0f3ab53bc61fbf66e546ab6bd22d1a60102b79 Mon Sep 17 00:00:00 2001 +From 5ee503bc80a271f9b0fcf3d24df42ee3f8c7d687 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -24,10 +24,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 6509057752..8453de5e68 100644 +index a1fb06dca0..3c383911cd 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -1674,9 +1674,17 @@ static void version(void) +@@ -1686,9 +1686,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -45,7 +45,7 @@ index 6509057752..8453de5e68 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1693,6 +1701,7 @@ static void help(int exitcode) +@@ -1705,6 +1713,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index e5bb3c8..a6ee268 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 47faf13d999995693d505074ee73d4908356888b Mon Sep 17 00:00:00 2001 +From edf9e38a2ef58908f45e37a63746926a494f9057 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -82,7 +82,7 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 30 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index ab36fbfa0c..6f6bb47d9a 100644 +index 63ef6af9a1..db9a6b38b1 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2088,6 +2088,18 @@ static int kvm_init(MachineState *ms) @@ -105,7 +105,7 @@ index ab36fbfa0c..6f6bb47d9a 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " diff --git a/softmmu/vl.c b/softmmu/vl.c -index 8453de5e68..ea6e9e4f01 100644 +index 3c383911cd..62fc7c898f 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -116,6 +116,8 @@ @@ -117,7 +117,7 @@ index 8453de5e68..ea6e9e4f01 100644 static const char *data_dir[16]; static int data_dir_idx; const char *bios_name = NULL; -@@ -1177,6 +1179,20 @@ static MachineClass *find_default_machine(GSList *machines) +@@ -1189,6 +1191,20 @@ static MachineClass *find_default_machine(GSList *machines) return default_machineclass; } @@ -138,7 +138,7 @@ index 8453de5e68..ea6e9e4f01 100644 static int machine_help_func(QemuOpts *opts, MachineState *machine) { ObjectProperty *prop; -@@ -3831,6 +3847,8 @@ void qemu_init(int argc, char **argv, char **envp) +@@ -3845,6 +3861,8 @@ void qemu_init(int argc, char **argv, char **envp) "mutually exclusive"); exit(EXIT_FAILURE); } diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch index 7309f2c..3c79abb 100644 --- a/0016-Add-support-for-simpletrace.patch +++ b/0016-Add-support-for-simpletrace.patch @@ -1,4 +1,4 @@ -From bc118b0038083bcd54a970ed0a6c92f9d55759e2 Mon Sep 17 00:00:00 2001 +From 9238ce7ba819979c2df2caa49c5db3185376fedd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 8 Oct 2015 09:50:17 +0200 Subject: Add support for simpletrace @@ -39,10 +39,10 @@ Signed-off-by: Danilo C. L. de Paula create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp diff --git a/Makefile b/Makefile -index 88b7565246..045eeb545e 100644 +index 42e854b2b1..3b9ff25f10 100644 --- a/Makefile +++ b/Makefile -@@ -995,6 +995,10 @@ endif +@@ -996,6 +996,10 @@ endif $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ done $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 48d46dc..4ca0022 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From cc88b2746e9e8cfa6816e871ca282cddb07a0146 Mon Sep 17 00:00:00 2001 +From 44c255eddd3096d63ebdc055181d3fcebe202eef Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -142,10 +142,10 @@ index 824e713491..8449936c63 100644 -device vhost-user-fs-pci,chardev=char0,tag=myfs \ -object memory-backend-memfd,id=mem,size=4G,share=on \ diff --git a/qemu-options.hx b/qemu-options.hx -index b9699440a3..fe85a0e952 100644 +index 1700205035..697276859b 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2891,11 +2891,11 @@ SRST +@@ -2900,11 +2900,11 @@ SRST :: diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch index f4c5de6..7549bdb 100644 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ b/0018-usb-xhci-Fix-PCI-capability-order.patch @@ -1,4 +1,4 @@ -From 1cd3f04262da39a69509435c1db96c0c2a8ebd62 Mon Sep 17 00:00:00 2001 +From 5bb61f2b69d9880dea36d604719ee7bd4cf74ba6 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 5 May 2017 19:06:14 +0200 Subject: usb-xhci: Fix PCI capability order @@ -62,7 +62,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index b25cce8f0c..9582d81d14 100644 +index 38bdfaf3fd..4acd7842ac 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3413,6 +3413,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 94a535a..269d357 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 766fb0162dea8353a39f6eeff5ba90309cb0338e Mon Sep 17 00:00:00 2001 +From 63a330ad8eeb2025c12d56714cb1271196f80dbe Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index b6808d3..00b6ba7 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From ff6425248e516c678db5bb85d59b5811c48bedaf Mon Sep 17 00:00:00 2001 +From 9ff2cc74cd1f41abc4b379fbaaaaa03a30415494 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,10 +32,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 0c2bc8e06e..15bef3d27f 100644 +index 3225fc5a2e..08ae7724d5 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c -@@ -335,12 +335,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, +@@ -326,12 +326,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { diff --git a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index 55de732..eaaf5a3 100644 --- a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,4 +1,4 @@ -From 2ce0d065e712fdfae74a52cfa5188791eaa7f848 Mon Sep 17 00:00:00 2001 +From e27f3d72ac38e37758d4ea5b84bccf03cab8219a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 Subject: block: Versioned x-blockdev-reopen API with feature flag @@ -29,10 +29,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json -index b20332e592..db4544df75 100644 +index 197bdc1c36..efc0f6a377 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json -@@ -4135,10 +4135,17 @@ +@@ -4118,10 +4118,17 @@ # image does not have a default backing file name as part of its # metadata. # diff --git a/0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch b/0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch similarity index 90% rename from 0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch rename to 0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch index edd80d0..7bafd66 100644 --- a/0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch +++ b/0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch @@ -1,6 +1,6 @@ -From 64afb1a8b06f0017b3ee5eb59790e1751424b1d5 Mon Sep 17 00:00:00 2001 +From f70eb50b7107ee4e18cec3561bbdde1cbd0a0bdb Mon Sep 17 00:00:00 2001 From: David Gibson -Date: Fri, 22 May 2020 02:27:18 +0000 +Date: Mon, 27 Jul 2020 13:29:01 +0200 Subject: RHEL-only: Enable vTPM for POWER in downstream configs RH-Author: David Gibson @@ -23,6 +23,7 @@ Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=28742072 Testing: With brewed qemu was able to see a vTPM device in a guest Signed-off-by: David Gibson +Signed-off-by: Miroslav Rezanina Signed-off-by: Danilo C. L. de Paula --- default-configs/ppc64-rh-devices.mak | 3 +++ diff --git a/0022-virtio-net-fix-removal-of-failover-device.patch b/0022-virtio-net-fix-removal-of-failover-device.patch deleted file mode 100644 index ca33979..0000000 --- a/0022-virtio-net-fix-removal-of-failover-device.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 0e4d9f0332efd8417831815a414a5131f85e0a85 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Fri, 3 Jul 2020 12:37:05 -0400 -Subject: virtio-net: fix removal of failover device - -RH-Author: Juan Quintela -Message-id: <20200703123705.7175-2-quintela@redhat.com> -Patchwork-id: 97901 -O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] virtio-net: fix removal of failover device -Bugzilla: 1820120 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Laurent Vivier -RH-Acked-by: Dr. David Alan Gilbert - -If you have a networking device and its virtio failover device, and -you remove them in this order: -- virtio device -- the real device - -You get qemu crash. -See bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1820120 - -Bug exist on qemu 4.2 and 5.0. -But in 5.0 don't shows because commit -77b06bba62034a87cc61a9c8de1309ae3e527d97 - -somehow papers over it. - -CC: Jason Wang -CC: Michael S. Tsirkin - -Signed-off-by: Juan Quintela -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 1596cb1397..f82455ba5d 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3417,6 +3417,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) - g_free(n->vlans); - - if (n->failover) { -+ device_listener_unregister(&n->primary_listener); - g_free(n->primary_device_id); - g_free(n->standby_id); - qobject_unref(n->primary_device_dict); --- -2.27.0 - diff --git a/0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch b/0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch similarity index 86% rename from 0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch rename to 0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch index 87ec07a..7489bf1 100644 --- a/0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch +++ b/0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch @@ -1,6 +1,6 @@ -From def5545e3588c8b8c7cd12e3ff8967ce059d3f38 Mon Sep 17 00:00:00 2001 +From 69d8ae7ad5314e465c24fdeb1317751fa3e50ceb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 8 Jun 2020 15:54:26 -0400 +Date: Mon, 27 Jul 2020 13:29:01 +0200 Subject: redhat: fix 5.0 rebase missing ISA TPM TIS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 @@ -20,7 +20,8 @@ BRANCH: rhel-av-8.3.0-preview-2020-04-29 UPSTREAM: N/A BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=29172313 -Signed-off-by: Marc-André Lureau +Signed-off-by: Marc-André Lureau +Signed-off-by: Miroslav Rezanina Signed-off-by: Danilo C. L. de Paula --- default-configs/x86_64-rh-devices.mak | 2 +- diff --git a/0026-redhat-define-hw_compat_8_2.patch b/0024-redhat-define-hw_compat_8_2.patch similarity index 93% rename from 0026-redhat-define-hw_compat_8_2.patch rename to 0024-redhat-define-hw_compat_8_2.patch index 229c59c..c31fdde 100644 --- a/0026-redhat-define-hw_compat_8_2.patch +++ b/0024-redhat-define-hw_compat_8_2.patch @@ -1,6 +1,6 @@ -From f8f3c2d598ca5921b6a5d477e848f02977c715fd Mon Sep 17 00:00:00 2001 +From 83486421151111ec118cc703819bd4764fea677e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" -Date: Fri, 19 Jun 2020 15:42:26 -0400 +Date: Mon, 27 Jul 2020 13:29:01 +0200 Subject: redhat: define hw_compat_8_2 RH-Author: Dr. David Alan Gilbert @@ -17,6 +17,8 @@ From: Laurent Vivier Signed-off-by: Laurent Vivier Signed-off-by: Dr. David Alan Gilbert For minor fix + +Signed-off-by: Miroslav Rezanina Signed-off-by: Danilo C. L. de Paula --- hw/core/machine.c | 28 ++++++++++++++++++++++++++++ @@ -24,7 +26,7 @@ Signed-off-by: Danilo C. L. de Paula 2 files changed, 31 insertions(+) diff --git a/hw/core/machine.c b/hw/core/machine.c -index ef6b320ea7..b8373991e9 100644 +index 2b8e480040..5476af98e1 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,6 +28,34 @@ diff --git a/0027-x86-Add-8.3.0-x86_64-machine-type.patch b/0025-x86-Add-8.3.0-x86_64-machine-type.patch similarity index 86% rename from 0027-x86-Add-8.3.0-x86_64-machine-type.patch rename to 0025-x86-Add-8.3.0-x86_64-machine-type.patch index cee8833..70da7fa 100644 --- a/0027-x86-Add-8.3.0-x86_64-machine-type.patch +++ b/0025-x86-Add-8.3.0-x86_64-machine-type.patch @@ -1,6 +1,6 @@ -From 8b509ce5c11a82d05ad0d43270e4b76e0010015b Mon Sep 17 00:00:00 2001 +From b02c9f5373f6ffa65b8ddbdee32d6ed4e59198ad Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" -Date: Fri, 19 Jun 2020 15:42:27 -0400 +Date: Mon, 27 Jul 2020 13:29:01 +0200 Subject: x86: Add 8.3.0 x86_64 machine type RH-Author: Dr. David Alan Gilbert @@ -17,6 +17,7 @@ From: "Dr. David Alan Gilbert" Not much change, just the smbase-smram. Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina Signed-off-by: Danilo C. L. de Paula --- hw/i386/pc.c | 6 ++++++ @@ -26,10 +27,10 @@ Signed-off-by: Danilo C. L. de Paula 4 files changed, 35 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index fe37bdec7a..94c857ea97 100644 +index 063f01d19a..a75e0137ab 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -360,6 +360,12 @@ GlobalProperty pc_rhel_compat[] = { +@@ -361,6 +361,12 @@ GlobalProperty pc_rhel_compat[] = { }; const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); @@ -43,10 +44,10 @@ index fe37bdec7a..94c857ea97 100644 GlobalProperty pc_rhel_8_1_compat[] = { }; const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index eeadd896c2..f14ddcb472 100644 +index 6d935645b6..4af4497a0c 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -1028,6 +1028,10 @@ static void pc_machine_rhel760_options(MachineClass *m) +@@ -1030,6 +1030,10 @@ static void pc_machine_rhel760_options(MachineClass *m) m->smbus_no_migration_support = true; pcmc->pvh_enabled = false; pcmc->default_cpu_version = CPU_VERSION_LEGACY; @@ -58,10 +59,10 @@ index eeadd896c2..f14ddcb472 100644 compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 1cd4e15297..b9e8dcb392 100644 +index f4edb049d6..d75d6d8805 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -590,6 +590,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) +@@ -589,6 +589,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); } @@ -85,7 +86,7 @@ index 1cd4e15297..b9e8dcb392 100644 static void pc_q35_init_rhel820(MachineState *machine) { pc_q35_init(machine); -@@ -600,8 +617,13 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) +@@ -599,8 +616,13 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_machine_rhel_options(m); m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; @@ -100,10 +101,10 @@ index 1cd4e15297..b9e8dcb392 100644 DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 95d07f81a0..e67468ba41 100644 +index 156be22995..e9dc8c370c 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -270,6 +270,9 @@ extern const size_t pc_compat_1_4_len; +@@ -271,6 +271,9 @@ extern const size_t pc_compat_1_4_len; extern GlobalProperty pc_rhel_compat[]; extern const size_t pc_rhel_compat_len; diff --git a/0027-hw-arm-Changes-to-rhel820-machine.patch b/0027-hw-arm-Changes-to-rhel820-machine.patch new file mode 100644 index 0000000..84c289c --- /dev/null +++ b/0027-hw-arm-Changes-to-rhel820-machine.patch @@ -0,0 +1,82 @@ +From 12990ad9479216d96e4d67a7e613d2ef3b4fb700 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 12 Aug 2020 10:58:04 +0200 +Subject: hw/arm: Changes to rhel820 machine + +RH-Author: Gavin Shan +Message-id: <20200630013648.101937-1-gshan@redhat.com> +Patchwork-id: 97844 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v5 1/3] hw/arm: Changes to rhel820 machine +Bugzilla: 1818843 +RH-Acked-by: Auger Eric +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones + +This applies two changes to rhel820 machine: + + * Set the gic version to VIRT_GIC_VERSION_NOSEL by default, which + doesn't cause functional changes. + * Disallow to configure the RAS property, which is hidden by default. + +Signed-off-by: Gavin Shan +RH-Acked-by: Auger Eric +RH-Acked-by: Andrew Jones +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e1a17e7c87..c22e1e6d5c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2105,6 +2105,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + visit_type_OnOffAuto(v, name, &vms->acpi, errp); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_ras(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2125,6 +2126,7 @@ static bool virt_get_mte(Object *obj, Error **errp) + + return vms->mte; + } ++#endif /* disabled for RHEL */ + + static void virt_set_mte(Object *obj, bool value, Error **errp) + { +@@ -2802,12 +2804,8 @@ static void rhel820_virt_instance_init(Object *obj) + object_property_set_description(obj, "highmem", + "Set on/off to enable/disable using " + "physical address space above 32 bits"); +- /* +- * Default GIC type is still v2, but became configurable for RHEL. We +- * keep v2 instead of max as TCG CI test cases require an MSI controller +- * and there is no userspace ITS MSI emulation available. +- */ +- vms->gic_version = 2; ++ ++ vms->gic_version = VIRT_GIC_VERSION_NOSEL; + object_property_add_str(obj, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_property_set_description(obj, "gic-version", +@@ -2834,13 +2832,8 @@ static void rhel820_virt_instance_init(Object *obj) + object_property_set_description(obj, "iommu", + "Set the IOMMU type. " + "Valid values are none and smmuv3"); +- vms->ras = false; +- object_property_add_bool(obj, "ras", virt_get_ras, +- virt_set_ras); +- object_property_set_description(obj, "ras", +- "Set on/off to enable/disable reporting host memory errors " +- "to a KVM guest using ACPI and guest external abort exceptions"); + ++ vms->ras = false; + /* MTE is disabled by default. */ + vms->mte = false; + +-- +2.27.0 + diff --git a/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch b/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch new file mode 100644 index 0000000..04d0eda --- /dev/null +++ b/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch @@ -0,0 +1,53 @@ +From 46d5a797986373ecc0dfa578cae07a3641847935 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 12 Aug 2020 10:58:04 +0200 +Subject: hw/arm: Introduce rhel_virt_instance_init() helper + +RH-Author: Gavin Shan +Message-id: <20200629022939.76453-3-gshan@redhat.com> +Patchwork-id: 97838 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v4 2/3] hw/arm: Introduce rhel_virt_instance_init() helper +Bugzilla: 1818843 +RH-Acked-by: Auger Eric +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones + +This introduces rhel_virt_instance_init() helper function so that +it can be shared by rhel820 and rhel830 machine. This shouldn't +cause functional changes. + +Signed-off-by: Gavin Shan +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c22e1e6d5c..650668a8d1 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2788,7 +2788,7 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + +-static void rhel820_virt_instance_init(Object *obj) ++static void rhel_virt_instance_init(Object *obj) + { + VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); +@@ -2841,6 +2841,11 @@ static void rhel820_virt_instance_init(Object *obj) + virt_flash_create(vms); + } + ++static void rhel820_virt_instance_init(Object *obj) ++{ ++ rhel_virt_instance_init(obj); ++} ++ + static void rhel820_virt_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); +-- +2.27.0 + diff --git a/0029-hw-arm-Add-rhel830-machine-type.patch b/0029-hw-arm-Add-rhel830-machine-type.patch new file mode 100644 index 0000000..af129dc --- /dev/null +++ b/0029-hw-arm-Add-rhel830-machine-type.patch @@ -0,0 +1,61 @@ +From 098954acda750a54d2eb512297bcd205212ee718 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 12 Aug 2020 10:58:04 +0200 +Subject: hw/arm: Add rhel830 machine type + +RH-Author: Gavin Shan +Message-id: <20200630014756.102753-1-gshan@redhat.com> +Patchwork-id: 97845 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v5 3/3] hw/arm: Add rhel830 machine type +Bugzilla: 1818843 +RH-Acked-by: Auger Eric +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones + +This adds rhel830 machine type, whose properties are same as to +rhel820. + +Signed-off-by: Gavin Shan +RH-Acked-by: Auger Eric +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 650668a8d1..48b58be597 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2841,6 +2841,17 @@ static void rhel_virt_instance_init(Object *obj) + virt_flash_create(vms); + } + ++static void rhel830_virt_instance_init(Object *obj) ++{ ++ rhel_virt_instance_init(obj); ++} ++ ++static void rhel830_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) ++ + static void rhel820_virt_instance_init(Object *obj) + { + rhel_virt_instance_init(obj); +@@ -2848,6 +2859,8 @@ static void rhel820_virt_instance_init(Object *obj) + + static void rhel820_virt_options(MachineClass *mc) + { +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel830_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) ++DEFINE_RHEL_MACHINE(8, 2, 0) +-- +2.27.0 + diff --git a/0030-redhat-define-pseries-rhel8.3.0-machine-type.patch b/0030-redhat-define-pseries-rhel8.3.0-machine-type.patch new file mode 100644 index 0000000..165d197 --- /dev/null +++ b/0030-redhat-define-pseries-rhel8.3.0-machine-type.patch @@ -0,0 +1,77 @@ +From 311a20fb12a4d0ebed840be194db8117c8eea595 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 12 Aug 2020 10:58:04 +0200 +Subject: redhat: define pseries-rhel8.3.0 machine type + +RH-Author: Laurent Vivier +Message-id: <20200706104117.219174-3-lvivier@redhat.com> +Patchwork-id: 97904 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 2/2] redhat: define pseries-rhel8.3.0 machine type +Bugzilla: 1853265 +RH-Acked-by: Thomas Huth + +Note: rebase to qemu-5.1 introduces + + 32a354dc6c07 ("numa: forbid '-numa node, mem' for 5.1 and newer machine types") + +and so '-numa node, mem' will not be available with pseries-rhel8.3.0 + +Signed-off-by: Laurent Vivier +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 30 ++++++++++++++++++++++++++++-- + 1 file changed, 28 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 756c8667c1..ccceb6d39f 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4995,16 +4995,42 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); + #endif + ++/* ++ * pseries-rhel8.3.0 ++ * like pseries-5.1 ++ */ ++ ++static void spapr_machine_rhel830_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); ++ + /* + * pseries-rhel8.2.0 ++ * like pseries-4.2 + pseries-5.0 ++ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 + */ + + static void spapr_machine_rhel820_class_options(MachineClass *mc) + { +- /* Defaults for the latest behaviour inherited from the base class */ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel830_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; ++ smc->rma_limit = 16 * GiB; ++ mc->nvdimm_supported = false; ++ ++ /* from pseries-5.0 */ ++ mc->numa_mem_supported = true; + } + +-DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", true); ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); + + /* + * pseries-rhel8.1.0 +-- +2.27.0 + diff --git a/0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch b/0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch new file mode 100644 index 0000000..6fde229 --- /dev/null +++ b/0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch @@ -0,0 +1,49 @@ +From 12841675e2a81f3b98cb9741b54c3041cebf9e87 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 12 Aug 2020 10:58:04 +0200 +Subject: ppc: Set correct max_cpus value on spapr-rhel* machine types + +RH-Author: Eduardo Habkost +Message-id: <20200729180236.627559-2-ehabkost@redhat.com> +Patchwork-id: 98073 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 1/4] ppc: Set correct max_cpus value on spapr-rhel* machine types +Bugzilla: 1819292 +RH-Acked-by: Andrew Jones +RH-Acked-by: Thomas Huth +RH-Acked-by: David Gibson + +Currently vl.c forces MachineClass::max_cpus to be +<= RHEL_MAX_CPUS (384) on all machine types. + +Instead of relying on that global limit, set max_cpus=384 +explicitly at spapr_machine_rhel820_class_options(), which will +affect all pseriesl-rhel* machine types. + +This will keep exactly the same behavior as before, but will +allow us to remove the downstream-only RHEL_MAX_CPUS code at vl.c +later. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index ccceb6d39f..1c367a2367 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5003,6 +5003,9 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); + static void spapr_machine_rhel830_class_options(MachineClass *mc) + { + /* Defaults for the latest behaviour inherited from the base class */ ++ ++ /* Maximum supported VCPU count for all pseries-rhel* machines */ ++ mc->max_cpus = 384; + } + + DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); +-- +2.27.0 + diff --git a/0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch b/0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch new file mode 100644 index 0000000..0656938 --- /dev/null +++ b/0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch @@ -0,0 +1,53 @@ +From ee8e99d0a7821b26d0afe20c3a1f7517e4fa6772 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 12 Aug 2020 10:58:04 +0200 +Subject: arm: Set correct max_cpus value on virt-rhel* machine types + +RH-Author: Eduardo Habkost +Message-id: <20200729180236.627559-3-ehabkost@redhat.com> +Patchwork-id: 98074 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 2/4] arm: Set correct max_cpus value on virt-rhel* machine types +Bugzilla: 1819292 +RH-Acked-by: Andrew Jones +RH-Acked-by: Thomas Huth +RH-Acked-by: David Gibson + +Currently vl.c forces MachineClass::max_cpus to be +<= RHEL_MAX_CPUS (384) on all machine types. + +Instead of relying on that global limit, set max_cpus=384 +explicitly at the virt-rhel-machine base class, which will affect +all virt-rhel* machine types. + +This will keep exactly the same behavior as before, but will +allow us to remove the downstream-only RHEL_MAX_CPUS code at vl.c +later. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 48b58be597..fb5a5a7013 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2736,11 +2736,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; +- /* Start with max_cpus set to 512, which is the maximum supported by KVM. +- * The value may be reduced later when we have more information about the +- * configuration of the particular instance. +- */ +- mc->max_cpus = 512; ++ /* Maximum supported VCPU count for all virt-rhel* machines */ ++ mc->max_cpus = 384; + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; + mc->pci_allow_0_address = true; +-- +2.27.0 + diff --git a/0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch b/0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch new file mode 100644 index 0000000..b522ab1 --- /dev/null +++ b/0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch @@ -0,0 +1,83 @@ +From f8a4123e211ed0685097f496c99e73913a6b34d0 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 12 Aug 2020 10:58:04 +0200 +Subject: vl: Remove downstream-only MAX_RHEL_CPUS code + +RH-Author: Eduardo Habkost +Message-id: <20200729180236.627559-4-ehabkost@redhat.com> +Patchwork-id: 98075 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 3/4] vl: Remove downstream-only MAX_RHEL_CPUS code +Bugzilla: 1819292 +RH-Acked-by: Andrew Jones +RH-Acked-by: Thomas Huth +RH-Acked-by: David Gibson + +Now that all machine types have max_cpus set to the actual +supported number of VCPUs, the MAX_RHEL_CPUS code becomes +unnecessary and can be completely removed. + +For reference these are the max_cpus values set by the RHEL +machine types: + +- arm: virt-rhel*: max_cpus=384 (rhel_machine_class_init()); +- ppc: spapr-rhel*: max_cpus=384 + (spapr_machine_rhel820_class_options()); +- s390: s390-ccw*: max_cpus=248 (ccw_machine_class_init()); +- x86: q35: max_cpus=384 (pc_q35_machine_rhel_options()); +- x86: q35-rhel7.3.0 and older: max_cpus=255 + (pc_q35_machine_rhel730_options()); +- x86: pc-i440fx*: max_cpus=240 (pc_machine_class_init()). + +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + softmmu/vl.c | 18 ------------------ + 1 file changed, 18 deletions(-) + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 62fc7c898f..3c383911cd 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -116,8 +116,6 @@ + + #define MAX_VIRTIO_CONSOLES 1 + +-#define RHEL_MAX_CPUS 384 +- + static const char *data_dir[16]; + static int data_dir_idx; + const char *bios_name = NULL; +@@ -1191,20 +1189,6 @@ static MachineClass *find_default_machine(GSList *machines) + return default_machineclass; + } + +-/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ +-static void limit_max_cpus_in_machines(void) +-{ +- GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); +- +- for (el = machines; el; el = el->next) { +- MachineClass *mc = el->data; +- +- if (mc->max_cpus > RHEL_MAX_CPUS) { +- mc->max_cpus = RHEL_MAX_CPUS; +- } +- } +-} +- + static int machine_help_func(QemuOpts *opts, MachineState *machine) + { + ObjectProperty *prop; +@@ -3861,8 +3845,6 @@ void qemu_init(int argc, char **argv, char **envp) + "mutually exclusive"); + exit(EXIT_FAILURE); + } +- /* Maximum number of CPUs limited for Red Hat Enterprise Linux */ +- limit_max_cpus_in_machines(); + + configure_rtc(qemu_find_opts_singleton("rtc")); + +-- +2.27.0 + diff --git a/0034-q35-Set-max_cpus-to-512.patch b/0034-q35-Set-max_cpus-to-512.patch new file mode 100644 index 0000000..410103d --- /dev/null +++ b/0034-q35-Set-max_cpus-to-512.patch @@ -0,0 +1,45 @@ +From f2edc4f9262e9130d020ef6caef2443e7ae31371 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 12 Aug 2020 10:58:04 +0200 +Subject: q35: Set max_cpus to 512 + +RH-Author: Eduardo Habkost +Message-id: <20200729180236.627559-5-ehabkost@redhat.com> +Patchwork-id: 98076 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 4/4] q35: Set max_cpus to 512 +Bugzilla: 1819292 +RH-Acked-by: Andrew Jones +RH-Acked-by: Thomas Huth +RH-Acked-by: David Gibson + +Increase supported VCPU count for the Q35 machine type. + +The VCPU count that partners confirmed to work depended on other +parameters (especially RAM size), but fluctuated between 640 and +710 VCPUs. I chose to increase the limit to 512 to be +conservative, until we find out what exactly prevents larger VMs +from booting. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc_q35.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index d75d6d8805..c709460ab7 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -585,7 +585,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + m->alias = "q35"; +- m->max_cpus = 384; ++ m->max_cpus = 512; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + +-- +2.27.0 + diff --git a/0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch b/0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch new file mode 100644 index 0000000..69877bc --- /dev/null +++ b/0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch @@ -0,0 +1,39 @@ +From e5edd3824a782900bcb7aa2a980696e550b55cf6 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 12 Aug 2020 11:03:02 +0200 +Subject: RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic + allocation in machvirt + +RH-Author: Auger Eric +Message-id: <20200811163601.14341-2-eric.auger@redhat.com> +Patchwork-id: 98143 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/2] RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic allocation in machvirt +Bugzilla: 1801242 + +Allow the TPM_TIS_SYSBUS device dynamic instantiation onto the +platform bus. The TPM_TIS sysbus device compilation will be enabled +in a separate patch. That way associated qmp tests pass once the config +is set. + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index fb5a5a7013..f087483a04 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2738,6 +2738,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + mc->init = machvirt_init; + /* Maximum supported VCPU count for all virt-rhel* machines */ + mc->max_cpus = 384; ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; + mc->pci_allow_0_address = true; +-- +2.27.0 + diff --git a/0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch b/0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch new file mode 100644 index 0000000..d833611 --- /dev/null +++ b/0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch @@ -0,0 +1,35 @@ +From 8310f89d8818bc9d41b386bbb2824593aa8e8dca Mon Sep 17 00:00:00 2001 +From: Auger Eric +Date: Wed, 12 Aug 2020 11:03:31 +0200 +Subject: RHEL-only: Enable vTPM for ARM in downstream configs + +RH-Author: Auger Eric +Message-id: <20200811163601.14341-3-eric.auger@redhat.com> +Patchwork-id: 98144 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/2] RHEL-only: Enable vTPM for ARM in downstream configs +Bugzilla: 1801242 + +We allow the compilation of the TPM_TIS_SYSBUS device and both +passthrough and software emulation backends. + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + default-configs/aarch64-rh-devices.mak | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak +index f0cf5a1b22..0b6a03f816 100644 +--- a/default-configs/aarch64-rh-devices.mak ++++ b/default-configs/aarch64-rh-devices.mak +@@ -20,3 +20,6 @@ CONFIG_VIRTIO_PCI=y + CONFIG_XIO3130=y + CONFIG_NVDIMM=y + CONFIG_ACPI_APEI=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y ++CONFIG_TPM_TIS_SYSBUS=y +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 0530b54..8edde51 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -10,6 +10,7 @@ %global have_memlock_limits 0 + %ifnarch %{ix86} x86_64 %global have_usbredir 0 %endif @@ -67,8 +68,8 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 5.0.0 -Release: 2%{?dist} +Version: 5.1.0 +Release: 0%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -77,7 +78,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-5.0.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-5.1.0.tar.xz # KSM control scripts Source4: ksm.service @@ -123,11 +124,20 @@ Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0021: 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -Patch0022: 0022-virtio-net-fix-removal-of-failover-device.patch -Patch0024: 0024-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch -Patch0025: 0025-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch -Patch0026: 0026-redhat-define-hw_compat_8_2.patch -Patch0027: 0027-x86-Add-8.3.0-x86_64-machine-type.patch +Patch0022: 0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch +Patch0023: 0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch +Patch0024: 0024-redhat-define-hw_compat_8_2.patch +Patch0025: 0025-x86-Add-8.3.0-x86_64-machine-type.patch +Patch0027: 0027-hw-arm-Changes-to-rhel820-machine.patch +Patch0028: 0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch +Patch0029: 0029-hw-arm-Add-rhel830-machine-type.patch +Patch0030: 0030-redhat-define-pseries-rhel8.3.0-machine-type.patch +Patch0031: 0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch +Patch0032: 0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch +Patch0033: 0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch +Patch0034: 0034-q35-Set-max_cpus-to-512.patch +Patch0035: 0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch +Patch0036: 0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch BuildRequires: wget BuildRequires: rpm-build @@ -144,7 +154,7 @@ BuildRequires: pciutils-devel BuildRequires: libiscsi-devel BuildRequires: ncurses-devel BuildRequires: libattr-devel -BuildRequires: libusbx-devel >= 1.0.22 +BuildRequires: libusbx-devel >= 1.0.23 %if %{have_usbredir} BuildRequires: usbredir-devel >= 0.7.1 %endif @@ -193,7 +203,8 @@ BuildRequires: python3-sphinx BuildRequires: rdma-core-devel %endif %if %{have_fdt} -BuildRequires: libfdt-devel >= 1.4.3 +BuildRequires: libfdt-devel >= 1.6.0 +Requires: libfdt >= 1.6.0 %endif # iasl and cpp for acpi generation (not a hard requirement as we can use # pre-compiled files, but it's better to use this) @@ -430,6 +441,7 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %global block_drivers_list %{block_drivers_list},gluster %endif + cd qemu-kvm-build ../configure \ --prefix="%{_prefix}" \ @@ -498,6 +510,7 @@ cd qemu-kvm-build --enable-iconv \ --disable-jemalloc \ --enable-kvm \ + --disable-libdaxctl \ --enable-libiscsi \ --disable-libnfs \ %ifarch x86_64 @@ -543,7 +556,9 @@ cd qemu-kvm-build %else --disable-rdma \ %endif + --disable-rng-none \ --disable-replication \ + --disable-safe-stack \ --disable-sanitizers \ --disable-sdl \ --disable-sdl-image \ @@ -578,6 +593,7 @@ cd qemu-kvm-build --enable-vhost-net \ --disable-vhost-scsi \ --enable-vhost-user \ + --enable-vhost-vdpa \ --enable-vhost-vsock \ %if 0%{have_spice} --enable-virglrenderer \ @@ -591,13 +607,13 @@ cd qemu-kvm-build --enable-vnc-sasl \ --disable-vte \ --disable-vvfat \ - --disable-vxhs \ --enable-werror \ --disable-whpx \ --disable-xen \ --disable-xen-pci-passthrough \ --disable-xfsctl \ --enable-xkbcommon \ + --disable-zstd \ --without-default-devices echo "config-host.mak contents:" @@ -1036,6 +1052,15 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %{_libexecdir}/virtiofsd %{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json +%if %{have_usbredir} + %{_libdir}/qemu-kvm/hw-usb-redirect.so +%endif +%if 0%{have_spice} + %{_libdir}/qemu-kvm/hw-usb-smartcard.so +%endif +%ifarch x86_64 + %{_libdir}/qemu-kvm/hw-display-qxl.so +%endif %files -n qemu-img %defattr(-,root,root) @@ -1079,12 +1104,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Wed Jul 15 2020 Danilo Cesar Lemes de Paula - 5.0.0-2.el8 -- Resolves: bz#1781911 -- Resolves: bz#1841529 -- Resolves: bz#1842902 - (This is an unofficial build that fixes the BZs mentioned above) - +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-0.el8 +- Rebase to 5.1.0 +- Resolves: bz#1809650 * Tue Jul 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-29.el8 - kvm-virtio-net-fix-removal-of-failover-device.patch [bz#1820120] diff --git a/sources b/sources index 13799b0..eae2427 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-5.0.0.tar.xz) = 2011fc15747d9d8effcf0af4e1e3af6440eaf801c27948a8bdf97d0cb33cf99ac380f828c1aee02e55e2c2c6c674150a264ce025c99642c8f974fda34be285cd +SHA512 (qemu-5.1.0.tar.xz) = e213edb71d93d5167ddce7546220ecb7b52a7778586a4f476f65bd1e510c9cfc6d1876238a7b501d9cc3fd31cc2ae4b7fb9e753bc3f12cc17cd16dfce2a96ba3 From e68d4a564c6ee7f77ed3e4cc7c393fa9118805da Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 12 Aug 2020 14:52:12 -0400 Subject: [PATCH 084/195] Quick changelog fix to reflect the current fixes: Resolve: bz#1781911 Resolve: bz#1841529 Resolve: bz#1842902 Resolve: bz#1818843 Resolve: bz#1819292 Resolve: bz#1801242 --- qemu-kvm.spec | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 8edde51..415c7c0 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 0%{?dist} +Release: 1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -422,7 +422,7 @@ the Secure Shell (SSH) protocol. %prep -%setup -n qemu-%{version} +%setup -n qemu-%{version}%{?rcversion} # Remove slirp content in scratchbuilds because it's being applyed as a patch rm -fr slirp mkdir slirp @@ -1104,6 +1104,15 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-1.el8 +- Quick changelog fix to reflect the current fixes: +- Resolve: bz#1781911 +- Resolve: bz#1841529 +- Resolve: bz#1842902 +- Resolve: bz#1818843 +- Resolve: bz#1819292 +- Resolve: bz#1801242 + * Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-0.el8 - Rebase to 5.1.0 - Resolves: bz#1809650 From 58a130aa5fd1ea78bfe07ffd4b6bb8276a649ad3 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 12 Aug 2020 16:21:49 -0400 Subject: [PATCH 085/195] * Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-2.el8 - kvm-redhat-define-hw_compat_8_2.patch [bz#1853265] - Resolves: bz#1853265 (Forward and backward migration from rhel-av-8.3.0(qemu-kvm-5.0.0) to rhel-av-8.2.1(qemu-kvm-4.2.0) failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") --- kvm-redhat-define-hw_compat_8_2.patch | 47 +++++++++++++++++++++++++++ qemu-kvm.spec | 9 ++++- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 kvm-redhat-define-hw_compat_8_2.patch diff --git a/kvm-redhat-define-hw_compat_8_2.patch b/kvm-redhat-define-hw_compat_8_2.patch new file mode 100644 index 0000000..42bb6a4 --- /dev/null +++ b/kvm-redhat-define-hw_compat_8_2.patch @@ -0,0 +1,47 @@ +From 45b840275a5f5d3a3e4803c72c02a1db7fe1927d Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 6 Jul 2020 09:41:16 +0000 +Subject: [PATCH] redhat: define hw_compat_8_2 + +RH-Author: Laurent Vivier +Message-id: <20200706104117.219174-2-lvivier@redhat.com> +Patchwork-id: 97903 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 1/2] redhat: define hw_compat_8_2 +Bugzilla: 1853265 +RH-Acked-by: David Gibson +RH-Acked-by: Thomas Huth +RH-Acked-by: Greg Kurz + +Signed-off-by: Laurent Vivier +Signed-off-by: Dr. David Alan Gilbert + For minor fix +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5476af98e1..6d17d9938d 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -54,6 +54,17 @@ GlobalProperty hw_compat_rhel_8_2[] = { + { "qxl-vga", "revision", "4" }, + /* hw_compat_rhel_8_2 from hw_compat_4_2 */ + { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-balloon-device", "page-poison", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-read-set-eax", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-signal-unsupported-cmd", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-report-vmx-type", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-cmds-v2", "off" }, ++ + }; + const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); + /* +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 415c7c0..32fbb46 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 1%{?dist} +Release: 2%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -138,6 +138,8 @@ Patch0033: 0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch Patch0034: 0034-q35-Set-max_cpus-to-512.patch Patch0035: 0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch Patch0036: 0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch +# For bz#1853265 - Forward and backward migration from rhel-av-8.3.0(qemu-kvm-5.0.0) to rhel-av-8.2.1(qemu-kvm-4.2.0) failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" +Patch37: kvm-redhat-define-hw_compat_8_2.patch BuildRequires: wget BuildRequires: rpm-build @@ -1104,6 +1106,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-2.el8 +- kvm-redhat-define-hw_compat_8_2.patch [bz#1853265] +- Resolves: bz#1853265 + (Forward and backward migration from rhel-av-8.3.0(qemu-kvm-5.0.0) to rhel-av-8.2.1(qemu-kvm-4.2.0) failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + * Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-1.el8 - Quick changelog fix to reflect the current fixes: - Resolve: bz#1781911 From 915cb810bee63c39054093e3efba7a0f63241e0a Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Wed, 19 Aug 2020 13:01:59 -0400 Subject: [PATCH 086/195] * Wed Aug 19 2020 Danilo Cesar Lemes de Paula - 5.1.0-3.el8 - kvm-redhat-Update-hw_compat_8_2.patch [bz#1843348] - kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch [bz#1843348] - kvm-Disable-TPM-passthrough-backend-on-ARM.patch [bz#1801242] - kvm-Require-libfdt-1.6.0.patch [bz#1867847] - Resolves: bz#1801242 ([aarch64] vTPM support in machvirt) - Resolves: bz#1843348 (8.3 machine types for POWER) - Resolves: bz#1867847 ([ppc] virt module 7629: /usr/libexec/qemu-kvm: undefined symbol: fdt_check_full, version LIBFDT_1.2) --- ...sable-TPM-passthrough-backend-on-ARM.patch | 44 +++++++++++++ kvm-redhat-Update-hw_compat_8_2.patch | 64 +++++++++++++++++++ ...pdate-pseries-rhel8.2.0-machine-type.patch | 58 +++++++++++++++++ qemu-kvm.spec | 24 ++++++- 4 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 kvm-Disable-TPM-passthrough-backend-on-ARM.patch create mode 100644 kvm-redhat-Update-hw_compat_8_2.patch create mode 100644 kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch diff --git a/kvm-Disable-TPM-passthrough-backend-on-ARM.patch b/kvm-Disable-TPM-passthrough-backend-on-ARM.patch new file mode 100644 index 0000000..59f9ffb --- /dev/null +++ b/kvm-Disable-TPM-passthrough-backend-on-ARM.patch @@ -0,0 +1,44 @@ +From 4a8ccfdf57fb0e0835faef9d95939d31546202f8 Mon Sep 17 00:00:00 2001 +From: Auger Eric +Date: Wed, 19 Aug 2020 09:16:03 -0400 +Subject: [PATCH 3/4] Disable TPM passthrough backend on ARM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Auger Eric +Message-id: <20200819091603.23319-1-eric.auger@redhat.com> +Patchwork-id: 98190 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] Disable TPM passthrough backend on ARM +Bugzilla: 1801242 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Andrew Jones + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1801242 +UPSTREAM: not applicable +BRANCH: rhel-av-8.3.0 +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=30820412 + +Let's disable the TPM passthrough backend on ARM as this looks +to be a marginal use case and it was not tested comprehensively yet. + +Signed-off-by: Eric Auger +Signed-off-by: Danilo C. L. de Paula +--- + default-configs/aarch64-rh-devices.mak | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak +index 0b6a03f816..6a597384ea 100644 +--- a/default-configs/aarch64-rh-devices.mak ++++ b/default-configs/aarch64-rh-devices.mak +@@ -21,5 +21,4 @@ CONFIG_XIO3130=y + CONFIG_NVDIMM=y + CONFIG_ACPI_APEI=y + CONFIG_TPM_EMULATOR=y +-CONFIG_TPM_PASSTHROUGH=y + CONFIG_TPM_TIS_SYSBUS=y +-- +2.27.0 + diff --git a/kvm-redhat-Update-hw_compat_8_2.patch b/kvm-redhat-Update-hw_compat_8_2.patch new file mode 100644 index 0000000..f4a096f --- /dev/null +++ b/kvm-redhat-Update-hw_compat_8_2.patch @@ -0,0 +1,64 @@ +From 4effa711a867eaf0f10b38c2bcf4c8c39a00c1ab Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 14 Aug 2020 16:02:48 -0400 +Subject: [PATCH 1/4] redhat: Update hw_compat_8_2 + +RH-Author: Laurent Vivier +Message-id: <20200814160249.217753-2-lvivier@redhat.com> +Patchwork-id: 98157 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/2] redhat: Update hw_compat_8_2 +Bugzilla: 1843348 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Gibson + +v5.1.0-rc2 has introduced a new entry in hw_compat_5_0: + 2ebc21216f58 ("hw/pci-host: save/restore pci host config register") + +Add it in hw_compat_rhel_8_2 + +Update hw_compat_8_2 comment as it also includes hw_compat_5_0 +Move a blank line + +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 6d17d9938d..10fa9b8c75 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -29,7 +29,7 @@ + #include "migration/vmstate.h" + + /* +- * The same as hw_compat_4_2 ++ * The same as hw_compat_4_2 + hw_compat_5_0 + */ + GlobalProperty hw_compat_rhel_8_2[] = { + /* hw_compat_rhel_8_2 from hw_compat_4_2 */ +@@ -55,6 +55,8 @@ GlobalProperty hw_compat_rhel_8_2[] = { + /* hw_compat_rhel_8_2 from hw_compat_4_2 */ + { "fw_cfg", "acpi-mr-restore", "false" }, + /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ + { "virtio-balloon-device", "page-poison", "false" }, + /* hw_compat_rhel_8_2 from hw_compat_5_0 */ + { "vmport", "x-read-set-eax", "off" }, +@@ -64,9 +66,9 @@ GlobalProperty hw_compat_rhel_8_2[] = { + { "vmport", "x-report-vmx-type", "off" }, + /* hw_compat_rhel_8_2 from hw_compat_5_0 */ + { "vmport", "x-cmds-v2", "off" }, +- + }; + const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); ++ + /* + * The same as hw_compat_4_1 + */ +-- +2.27.0 + diff --git a/kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch b/kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch new file mode 100644 index 0000000..45f4e77 --- /dev/null +++ b/kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch @@ -0,0 +1,58 @@ +From 1ab8783e716eb5ae2fb44b06a2db16b9fb91dad9 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 14 Aug 2020 16:02:49 -0400 +Subject: [PATCH 2/4] redhat: update pseries-rhel8.2.0 machine type + +RH-Author: Laurent Vivier +Message-id: <20200814160249.217753-3-lvivier@redhat.com> +Patchwork-id: 98156 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/2] redhat: update pseries-rhel8.2.0 machine type +Bugzilla: 1843348 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Gibson + +v5.1.0-rc1 has modified the default state of pseries-5.1 and +introduced a new entry in pseries-5.0 machine type: + a6030d7e0b35 ("spapr: Add a new level of NUMA for GPUs") + +Add this entry to pseries-rhel8.2.0 as the default state has +also changed for pseries-rhel8.3.0 + +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 1c367a2367..5e3964326d 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5019,10 +5019,15 @@ DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); + static void spapr_machine_rhel820_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ /* from pseries-5.0 */ ++ static GlobalProperty compat[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, ++ }; + + spapr_machine_rhel830_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_2, + hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + + /* from pseries-4.2 */ + smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; +@@ -5031,6 +5036,7 @@ static void spapr_machine_rhel820_class_options(MachineClass *mc) + + /* from pseries-5.0 */ + mc->numa_mem_supported = true; ++ smc->pre_5_1_assoc_refpoints = true; + } + + DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 32fbb46..e32e40f 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 2%{?dist} +Release: 3%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -140,6 +140,12 @@ Patch0035: 0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch Patch0036: 0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch # For bz#1853265 - Forward and backward migration from rhel-av-8.3.0(qemu-kvm-5.0.0) to rhel-av-8.2.1(qemu-kvm-4.2.0) failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" Patch37: kvm-redhat-define-hw_compat_8_2.patch +# For bz#1843348 - 8.3 machine types for POWER +Patch38: kvm-redhat-Update-hw_compat_8_2.patch +# For bz#1843348 - 8.3 machine types for POWER +Patch39: kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch +# For bz#1801242 - [aarch64] vTPM support in machvirt +Patch40: kvm-Disable-TPM-passthrough-backend-on-ARM.patch BuildRequires: wget BuildRequires: rpm-build @@ -206,7 +212,6 @@ BuildRequires: rdma-core-devel %endif %if %{have_fdt} BuildRequires: libfdt-devel >= 1.6.0 -Requires: libfdt >= 1.6.0 %endif # iasl and cpp for acpi generation (not a hard requirement as we can use # pre-compiled files, but it's better to use this) @@ -303,6 +308,9 @@ Requires: libusbx >= 1.0.19 %if %{have_usbredir} Requires: usbredir >= 0.7.1 %endif +%if %{have_fdt} +Requires: libfdt >= 1.6.0 +%endif %rhev_ma_conflicts qemu-kvm @@ -1106,6 +1114,18 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Wed Aug 19 2020 Danilo Cesar Lemes de Paula - 5.1.0-3.el8 +- kvm-redhat-Update-hw_compat_8_2.patch [bz#1843348] +- kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch [bz#1843348] +- kvm-Disable-TPM-passthrough-backend-on-ARM.patch [bz#1801242] +- kvm-Require-libfdt-1.6.0.patch [bz#1867847] +- Resolves: bz#1801242 + ([aarch64] vTPM support in machvirt) +- Resolves: bz#1843348 + (8.3 machine types for POWER) +- Resolves: bz#1867847 + ([ppc] virt module 7629: /usr/libexec/qemu-kvm: undefined symbol: fdt_check_full, version LIBFDT_1.2) + * Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-2.el8 - kvm-redhat-define-hw_compat_8_2.patch [bz#1853265] - Resolves: bz#1853265 From e5fba7f9b321aa2ab58f9baab8d09ec3a85585b7 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 27 Aug 2020 14:28:38 -0400 Subject: [PATCH 087/195] * Thu Aug 27 2020 Danilo Cesar Lemes de Paula - 5.1.0-4.el8 - kvm-Drop-bogus-IPv6-messages.patch [bz#1867075] - kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch [bz#1849707] - kvm-machine_types-numa-compatibility-for-auto_enable_num.patch [bz#1849707] - kvm-migration-Add-block-bitmap-mapping-parameter.patch [bz#1790492] - kvm-iotests.py-Let-wait_migration-return-on-failure.patch [bz#1790492] - kvm-iotests-Test-node-bitmap-aliases-during-migration.patch [bz#1790492] - Resolves: bz#1790492 ('dirty-bitmaps' migration capability should allow configuring target nodenames) - Resolves: bz#1849707 (8.3 machine types for x86 - 5.1 update) - Resolves: bz#1867075 (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) --- kvm-Drop-bogus-IPv6-messages.patch | 51 + ...node-bitmap-aliases-during-migration.patch | 655 ++++++++++++ ...Let-wait_migration-return-on-failure.patch | 66 ++ ...ma-set-numa_mem_supported-on-old-mac.patch | 77 ++ ...ma-compatibility-for-auto_enable_num.patch | 81 ++ ...n-Add-block-bitmap-mapping-parameter.patch | 947 ++++++++++++++++++ qemu-kvm.spec | 28 +- 7 files changed, 1904 insertions(+), 1 deletion(-) create mode 100644 kvm-Drop-bogus-IPv6-messages.patch create mode 100644 kvm-iotests-Test-node-bitmap-aliases-during-migration.patch create mode 100644 kvm-iotests.py-Let-wait_migration-return-on-failure.patch create mode 100644 kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch create mode 100644 kvm-machine_types-numa-compatibility-for-auto_enable_num.patch create mode 100644 kvm-migration-Add-block-bitmap-mapping-parameter.patch diff --git a/kvm-Drop-bogus-IPv6-messages.patch b/kvm-Drop-bogus-IPv6-messages.patch new file mode 100644 index 0000000..337dee8 --- /dev/null +++ b/kvm-Drop-bogus-IPv6-messages.patch @@ -0,0 +1,51 @@ +From 6ceab004edfb7c1f0f03701bc2ae443941468fd7 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 17 Aug 2020 22:06:08 -0400 +Subject: [PATCH 1/6] Drop bogus IPv6 messages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +Message-id: <20200817220608.1142611-2-jmaloy@redhat.com> +Patchwork-id: 98161 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] Drop bogus IPv6 messages +Bugzilla: 1867075 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Marc-André Lureau + +From: Ralf Haferkamp + +Drop IPv6 message shorter than what's mentioned in the payload +length header (+ the size of the IPv6 header). They're invalid an could +lead to data leakage in icmp6_send_echoreply(). + +(cherry picked from libslirp commit c7ede54cbd2e2b25385325600958ba0124e31cc0) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/ip6_input.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +index a83e4f8e3d..f7ef354ee4 100644 +--- a/slirp/src/ip6_input.c ++++ b/slirp/src/ip6_input.c +@@ -56,6 +56,13 @@ void ip6_input(struct mbuf *m) + goto bad; + } + ++ // Check if the message size is big enough to hold what's ++ // set in the payload length header. If not this is an invalid ++ // packet ++ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { ++ goto bad; ++ } ++ + /* check ip_ttl for a correct ICMP reply */ + if (ip6->ip_hl == 0) { + icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); +-- +2.27.0 + diff --git a/kvm-iotests-Test-node-bitmap-aliases-during-migration.patch b/kvm-iotests-Test-node-bitmap-aliases-during-migration.patch new file mode 100644 index 0000000..98c3433 --- /dev/null +++ b/kvm-iotests-Test-node-bitmap-aliases-during-migration.patch @@ -0,0 +1,655 @@ +From 2877fd4f92a86f43a113691f56738b09a0b4d500 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 24 Aug 2020 09:20:38 -0400 +Subject: [PATCH 6/6] iotests: Test node/bitmap aliases during migration + +RH-Author: Max Reitz +Message-id: <20200824092038.227913-4-mreitz@redhat.com> +Patchwork-id: 98214 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/3] iotests: Test node/bitmap aliases during migration +Bugzilla: 1790492 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Stefan Hajnoczi + +Signed-off-by: Max Reitz +Message-Id: <20200820150725.68687-4-mreitz@redhat.com> +Reviewed-by: Eric Blake +Tested-by: Eric Blake +[eblake: fold in python cleanups recommended by Vladimir] +Signed-off-by: Eric Blake +(cherry picked from commit cb5c6cd2dc984812f560fbe41f57a6bfc34d8708) +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/300 | 593 +++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/300.out | 5 + + tests/qemu-iotests/group | 1 + + 3 files changed, 599 insertions(+) + create mode 100755 tests/qemu-iotests/300 + create mode 100644 tests/qemu-iotests/300.out + +diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300 +new file mode 100755 +index 0000000000..5b75121b84 +--- /dev/null ++++ b/tests/qemu-iotests/300 +@@ -0,0 +1,593 @@ ++#!/usr/bin/env python3 ++# ++# Copyright (C) 2020 Red Hat, Inc. ++# ++# Tests for dirty bitmaps migration with node aliases ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import random ++import re ++from typing import Dict, List, Optional, Union ++import iotests ++import qemu ++ ++BlockBitmapMapping = List[Dict[str, Union[str, List[Dict[str, str]]]]] ++ ++assert iotests.sock_dir is not None ++mig_sock = os.path.join(iotests.sock_dir, 'mig_sock') ++ ++ ++class TestDirtyBitmapMigration(iotests.QMPTestCase): ++ src_node_name: str = '' ++ dst_node_name: str = '' ++ src_bmap_name: str = '' ++ dst_bmap_name: str = '' ++ ++ def setUp(self) -> None: ++ self.vm_a = iotests.VM(path_suffix='-a') ++ self.vm_a.add_blockdev(f'node-name={self.src_node_name},' ++ 'driver=null-co') ++ self.vm_a.launch() ++ ++ self.vm_b = iotests.VM(path_suffix='-b') ++ self.vm_b.add_blockdev(f'node-name={self.dst_node_name},' ++ 'driver=null-co') ++ self.vm_b.add_incoming(f'unix:{mig_sock}') ++ self.vm_b.launch() ++ ++ result = self.vm_a.qmp('block-dirty-bitmap-add', ++ node=self.src_node_name, ++ name=self.src_bmap_name) ++ self.assert_qmp(result, 'return', {}) ++ ++ # Dirty some random megabytes ++ for _ in range(9): ++ mb_ofs = random.randrange(1024) ++ self.vm_a.hmp_qemu_io(self.src_node_name, f'discard {mb_ofs}M 1M') ++ ++ result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256', ++ node=self.src_node_name, ++ name=self.src_bmap_name) ++ self.bitmap_hash_reference = result['return']['sha256'] ++ ++ caps = [{'capability': name, 'state': True} ++ for name in ('dirty-bitmaps', 'events')] ++ ++ for vm in (self.vm_a, self.vm_b): ++ result = vm.qmp('migrate-set-capabilities', capabilities=caps) ++ self.assert_qmp(result, 'return', {}) ++ ++ def tearDown(self) -> None: ++ self.vm_a.shutdown() ++ self.vm_b.shutdown() ++ try: ++ os.remove(mig_sock) ++ except OSError: ++ pass ++ ++ def check_bitmap(self, bitmap_name_valid: bool) -> None: ++ result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256', ++ node=self.dst_node_name, ++ name=self.dst_bmap_name) ++ if bitmap_name_valid: ++ self.assert_qmp(result, 'return/sha256', ++ self.bitmap_hash_reference) ++ else: ++ self.assert_qmp(result, 'error/desc', ++ f"Dirty bitmap '{self.dst_bmap_name}' not found") ++ ++ def migrate(self, bitmap_name_valid: bool = True, ++ migration_success: bool = True) -> None: ++ result = self.vm_a.qmp('migrate', uri=f'unix:{mig_sock}') ++ self.assert_qmp(result, 'return', {}) ++ ++ with iotests.Timeout(5, 'Timeout waiting for migration to complete'): ++ self.assertEqual(self.vm_a.wait_migration('postmigrate'), ++ migration_success) ++ self.assertEqual(self.vm_b.wait_migration('running'), ++ migration_success) ++ ++ if migration_success: ++ self.check_bitmap(bitmap_name_valid) ++ ++ def verify_dest_error(self, msg: Optional[str]) -> None: ++ """ ++ Check whether the given error message is present in vm_b's log. ++ (vm_b is shut down to do so.) ++ If @msg is None, check that there has not been any error. ++ """ ++ self.vm_b.shutdown() ++ if msg is None: ++ self.assertNotIn('qemu-system-', self.vm_b.get_log()) ++ else: ++ self.assertIn(msg, self.vm_b.get_log()) ++ ++ @staticmethod ++ def mapping(node_name: str, node_alias: str, ++ bitmap_name: str, bitmap_alias: str) -> BlockBitmapMapping: ++ return [{ ++ 'node-name': node_name, ++ 'alias': node_alias, ++ 'bitmaps': [{ ++ 'name': bitmap_name, ++ 'alias': bitmap_alias ++ }] ++ }] ++ ++ def set_mapping(self, vm: iotests.VM, mapping: BlockBitmapMapping, ++ error: Optional[str] = None) -> None: ++ """ ++ Invoke migrate-set-parameters on @vm to set the given @mapping. ++ Check for success if @error is None, or verify the error message ++ if it is not. ++ On success, verify that "info migrate_parameters" on HMP returns ++ our mapping. (Just to check its formatting code.) ++ """ ++ result = vm.qmp('migrate-set-parameters', ++ block_bitmap_mapping=mapping) ++ ++ if error is None: ++ self.assert_qmp(result, 'return', {}) ++ ++ result = vm.qmp('human-monitor-command', ++ command_line='info migrate_parameters') ++ ++ m = re.search(r'^block-bitmap-mapping:\r?(\n .*)*\n', ++ result['return'], flags=re.MULTILINE) ++ hmp_mapping = m.group(0).replace('\r', '') if m else None ++ ++ self.assertEqual(hmp_mapping, self.to_hmp_mapping(mapping)) ++ else: ++ self.assert_qmp(result, 'error/desc', error) ++ ++ @staticmethod ++ def to_hmp_mapping(mapping: BlockBitmapMapping) -> str: ++ result = 'block-bitmap-mapping:\n' ++ ++ for node in mapping: ++ result += f" '{node['node-name']}' -> '{node['alias']}'\n" ++ ++ assert isinstance(node['bitmaps'], list) ++ for bitmap in node['bitmaps']: ++ result += f" '{bitmap['name']}' -> '{bitmap['alias']}'\n" ++ ++ return result ++ ++ ++class TestAliasMigration(TestDirtyBitmapMigration): ++ src_node_name = 'node0' ++ dst_node_name = 'node0' ++ src_bmap_name = 'bmap0' ++ dst_bmap_name = 'bmap0' ++ ++ def test_migration_without_alias(self) -> None: ++ self.migrate(self.src_node_name == self.dst_node_name and ++ self.src_bmap_name == self.dst_bmap_name) ++ ++ # Check for error message on the destination ++ if self.src_node_name != self.dst_node_name: ++ self.verify_dest_error(f"Cannot find " ++ f"device={self.src_node_name} nor " ++ f"node_name={self.src_node_name}") ++ else: ++ self.verify_dest_error(None) ++ ++ def test_alias_on_src_migration(self) -> None: ++ mapping = self.mapping(self.src_node_name, self.dst_node_name, ++ self.src_bmap_name, self.dst_bmap_name) ++ ++ self.set_mapping(self.vm_a, mapping) ++ self.migrate() ++ self.verify_dest_error(None) ++ ++ def test_alias_on_dst_migration(self) -> None: ++ mapping = self.mapping(self.dst_node_name, self.src_node_name, ++ self.dst_bmap_name, self.src_bmap_name) ++ ++ self.set_mapping(self.vm_b, mapping) ++ self.migrate() ++ self.verify_dest_error(None) ++ ++ def test_alias_on_both_migration(self) -> None: ++ src_map = self.mapping(self.src_node_name, 'node-alias', ++ self.src_bmap_name, 'bmap-alias') ++ ++ dst_map = self.mapping(self.dst_node_name, 'node-alias', ++ self.dst_bmap_name, 'bmap-alias') ++ ++ self.set_mapping(self.vm_a, src_map) ++ self.set_mapping(self.vm_b, dst_map) ++ self.migrate() ++ self.verify_dest_error(None) ++ ++ ++class TestNodeAliasMigration(TestAliasMigration): ++ src_node_name = 'node-src' ++ dst_node_name = 'node-dst' ++ ++ ++class TestBitmapAliasMigration(TestAliasMigration): ++ src_bmap_name = 'bmap-src' ++ dst_bmap_name = 'bmap-dst' ++ ++ ++class TestFullAliasMigration(TestAliasMigration): ++ src_node_name = 'node-src' ++ dst_node_name = 'node-dst' ++ src_bmap_name = 'bmap-src' ++ dst_bmap_name = 'bmap-dst' ++ ++ ++class TestLongBitmapNames(TestAliasMigration): ++ # Giving long bitmap names is OK, as long as there is a short alias for ++ # migration ++ src_bmap_name = 'a' * 512 ++ dst_bmap_name = 'b' * 512 ++ ++ # Skip all tests that do not use the intermediate alias ++ def test_migration_without_alias(self) -> None: ++ pass ++ ++ def test_alias_on_src_migration(self) -> None: ++ pass ++ ++ def test_alias_on_dst_migration(self) -> None: ++ pass ++ ++ ++class TestBlockBitmapMappingErrors(TestDirtyBitmapMigration): ++ src_node_name = 'node0' ++ dst_node_name = 'node0' ++ src_bmap_name = 'bmap0' ++ dst_bmap_name = 'bmap0' ++ ++ """ ++ Note that mapping nodes or bitmaps that do not exist is not an error. ++ """ ++ ++ def test_non_injective_node_mapping(self) -> None: ++ mapping: BlockBitmapMapping = [ ++ { ++ 'node-name': 'node0', ++ 'alias': 'common-alias', ++ 'bitmaps': [{ ++ 'name': 'bmap0', ++ 'alias': 'bmap-alias0' ++ }] ++ }, ++ { ++ 'node-name': 'node1', ++ 'alias': 'common-alias', ++ 'bitmaps': [{ ++ 'name': 'bmap1', ++ 'alias': 'bmap-alias1' ++ }] ++ } ++ ] ++ ++ self.set_mapping(self.vm_a, mapping, ++ "Invalid mapping given for block-bitmap-mapping: " ++ "The node alias 'common-alias' is used twice") ++ ++ def test_non_injective_bitmap_mapping(self) -> None: ++ mapping: BlockBitmapMapping = [{ ++ 'node-name': 'node0', ++ 'alias': 'node-alias0', ++ 'bitmaps': [ ++ { ++ 'name': 'bmap0', ++ 'alias': 'common-alias' ++ }, ++ { ++ 'name': 'bmap1', ++ 'alias': 'common-alias' ++ } ++ ] ++ }] ++ ++ self.set_mapping(self.vm_a, mapping, ++ "Invalid mapping given for block-bitmap-mapping: " ++ "The bitmap alias 'node-alias0'/'common-alias' is " ++ "used twice") ++ ++ def test_ambiguous_node_mapping(self) -> None: ++ mapping: BlockBitmapMapping = [ ++ { ++ 'node-name': 'node0', ++ 'alias': 'node-alias0', ++ 'bitmaps': [{ ++ 'name': 'bmap0', ++ 'alias': 'bmap-alias0' ++ }] ++ }, ++ { ++ 'node-name': 'node0', ++ 'alias': 'node-alias1', ++ 'bitmaps': [{ ++ 'name': 'bmap0', ++ 'alias': 'bmap-alias0' ++ }] ++ } ++ ] ++ ++ self.set_mapping(self.vm_a, mapping, ++ "Invalid mapping given for block-bitmap-mapping: " ++ "The node name 'node0' is mapped twice") ++ ++ def test_ambiguous_bitmap_mapping(self) -> None: ++ mapping: BlockBitmapMapping = [{ ++ 'node-name': 'node0', ++ 'alias': 'node-alias0', ++ 'bitmaps': [ ++ { ++ 'name': 'bmap0', ++ 'alias': 'bmap-alias0' ++ }, ++ { ++ 'name': 'bmap0', ++ 'alias': 'bmap-alias1' ++ } ++ ] ++ }] ++ ++ self.set_mapping(self.vm_a, mapping, ++ "Invalid mapping given for block-bitmap-mapping: " ++ "The bitmap 'node0'/'bmap0' is mapped twice") ++ ++ def test_migratee_node_is_not_mapped_on_src(self) -> None: ++ self.set_mapping(self.vm_a, []) ++ # Should just ignore all bitmaps on unmapped nodes ++ self.migrate(False) ++ self.verify_dest_error(None) ++ ++ def test_migratee_node_is_not_mapped_on_dst(self) -> None: ++ self.set_mapping(self.vm_b, []) ++ self.migrate(False) ++ self.verify_dest_error(f"Unknown node alias '{self.src_node_name}'") ++ ++ def test_migratee_bitmap_is_not_mapped_on_src(self) -> None: ++ mapping: BlockBitmapMapping = [{ ++ 'node-name': self.src_node_name, ++ 'alias': self.dst_node_name, ++ 'bitmaps': [] ++ }] ++ ++ self.set_mapping(self.vm_a, mapping) ++ # Should just ignore all unmapped bitmaps ++ self.migrate(False) ++ self.verify_dest_error(None) ++ ++ def test_migratee_bitmap_is_not_mapped_on_dst(self) -> None: ++ mapping: BlockBitmapMapping = [{ ++ 'node-name': self.dst_node_name, ++ 'alias': self.src_node_name, ++ 'bitmaps': [] ++ }] ++ ++ self.set_mapping(self.vm_b, mapping) ++ self.migrate(False) ++ self.verify_dest_error(f"Unknown bitmap alias " ++ f"'{self.src_bmap_name}' " ++ f"on node '{self.dst_node_name}' " ++ f"(alias '{self.src_node_name}')") ++ ++ def test_unused_mapping_on_dst(self) -> None: ++ # Let the source not send any bitmaps ++ self.set_mapping(self.vm_a, []) ++ ++ # Establish some mapping on the destination ++ self.set_mapping(self.vm_b, []) ++ ++ # The fact that there is a mapping on B without any bitmaps ++ # being received should be fine, not fatal ++ self.migrate(False) ++ self.verify_dest_error(None) ++ ++ def test_non_wellformed_node_alias(self) -> None: ++ alias = '123-foo' ++ ++ mapping: BlockBitmapMapping = [{ ++ 'node-name': self.src_node_name, ++ 'alias': alias, ++ 'bitmaps': [] ++ }] ++ ++ self.set_mapping(self.vm_a, mapping, ++ f"Invalid mapping given for block-bitmap-mapping: " ++ f"The node alias '{alias}' is not well-formed") ++ ++ def test_node_alias_too_long(self) -> None: ++ alias = 'a' * 256 ++ ++ mapping: BlockBitmapMapping = [{ ++ 'node-name': self.src_node_name, ++ 'alias': alias, ++ 'bitmaps': [] ++ }] ++ ++ self.set_mapping(self.vm_a, mapping, ++ f"Invalid mapping given for block-bitmap-mapping: " ++ f"The node alias '{alias}' is longer than 255 bytes") ++ ++ def test_bitmap_alias_too_long(self) -> None: ++ alias = 'a' * 256 ++ ++ mapping = self.mapping(self.src_node_name, self.dst_node_name, ++ self.src_bmap_name, alias) ++ ++ self.set_mapping(self.vm_a, mapping, ++ f"Invalid mapping given for block-bitmap-mapping: " ++ f"The bitmap alias '{alias}' is longer than 255 " ++ f"bytes") ++ ++ def test_bitmap_name_too_long(self) -> None: ++ name = 'a' * 256 ++ ++ result = self.vm_a.qmp('block-dirty-bitmap-add', ++ node=self.src_node_name, ++ name=name) ++ self.assert_qmp(result, 'return', {}) ++ ++ self.migrate(False, False) ++ ++ # Check for the error in the source's log ++ self.vm_a.shutdown() ++ self.assertIn(f"Cannot migrate bitmap '{name}' on node " ++ f"'{self.src_node_name}': Name is longer than 255 bytes", ++ self.vm_a.get_log()) ++ ++ # Expect abnormal shutdown of the destination VM because of ++ # the failed migration ++ try: ++ self.vm_b.shutdown() ++ except qemu.machine.AbnormalShutdown: ++ pass ++ ++ def test_aliased_bitmap_name_too_long(self) -> None: ++ # Longer than the maximum for bitmap names ++ self.dst_bmap_name = 'a' * 1024 ++ ++ mapping = self.mapping(self.dst_node_name, self.src_node_name, ++ self.dst_bmap_name, self.src_bmap_name) ++ ++ # We would have to create this bitmap during migration, and ++ # that would fail, because the name is too long. Better to ++ # catch it early. ++ self.set_mapping(self.vm_b, mapping, ++ f"Invalid mapping given for block-bitmap-mapping: " ++ f"The bitmap name '{self.dst_bmap_name}' is longer " ++ f"than 1023 bytes") ++ ++ def test_node_name_too_long(self) -> None: ++ # Longer than the maximum for node names ++ self.dst_node_name = 'a' * 32 ++ ++ mapping = self.mapping(self.dst_node_name, self.src_node_name, ++ self.dst_bmap_name, self.src_bmap_name) ++ ++ # During migration, this would appear simply as a node that ++ # cannot be found. Still better to catch impossible node ++ # names early (similar to test_non_wellformed_node_alias). ++ self.set_mapping(self.vm_b, mapping, ++ f"Invalid mapping given for block-bitmap-mapping: " ++ f"The node name '{self.dst_node_name}' is longer " ++ f"than 31 bytes") ++ ++ ++class TestCrossAliasMigration(TestDirtyBitmapMigration): ++ """ ++ Swap aliases, both to see that qemu does not get confused, and ++ that we can migrate multiple things at once. ++ ++ So we migrate this: ++ node-a.bmap-a -> node-b.bmap-b ++ node-a.bmap-b -> node-b.bmap-a ++ node-b.bmap-a -> node-a.bmap-b ++ node-b.bmap-b -> node-a.bmap-a ++ """ ++ ++ src_node_name = 'node-a' ++ dst_node_name = 'node-b' ++ src_bmap_name = 'bmap-a' ++ dst_bmap_name = 'bmap-b' ++ ++ def setUp(self) -> None: ++ TestDirtyBitmapMigration.setUp(self) ++ ++ # Now create another block device and let both have two bitmaps each ++ result = self.vm_a.qmp('blockdev-add', ++ node_name='node-b', driver='null-co') ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm_b.qmp('blockdev-add', ++ node_name='node-a', driver='null-co') ++ self.assert_qmp(result, 'return', {}) ++ ++ bmaps_to_add = (('node-a', 'bmap-b'), ++ ('node-b', 'bmap-a'), ++ ('node-b', 'bmap-b')) ++ ++ for (node, bmap) in bmaps_to_add: ++ result = self.vm_a.qmp('block-dirty-bitmap-add', ++ node=node, name=bmap) ++ self.assert_qmp(result, 'return', {}) ++ ++ @staticmethod ++ def cross_mapping() -> BlockBitmapMapping: ++ return [ ++ { ++ 'node-name': 'node-a', ++ 'alias': 'node-b', ++ 'bitmaps': [ ++ { ++ 'name': 'bmap-a', ++ 'alias': 'bmap-b' ++ }, ++ { ++ 'name': 'bmap-b', ++ 'alias': 'bmap-a' ++ } ++ ] ++ }, ++ { ++ 'node-name': 'node-b', ++ 'alias': 'node-a', ++ 'bitmaps': [ ++ { ++ 'name': 'bmap-b', ++ 'alias': 'bmap-a' ++ }, ++ { ++ 'name': 'bmap-a', ++ 'alias': 'bmap-b' ++ } ++ ] ++ } ++ ] ++ ++ def verify_dest_has_all_bitmaps(self) -> None: ++ bitmaps = self.vm_b.query_bitmaps() ++ ++ # Extract and sort bitmap names ++ for node in bitmaps: ++ bitmaps[node] = sorted((bmap['name'] for bmap in bitmaps[node])) ++ ++ self.assertEqual(bitmaps, ++ {'node-a': ['bmap-a', 'bmap-b'], ++ 'node-b': ['bmap-a', 'bmap-b']}) ++ ++ def test_alias_on_src(self) -> None: ++ self.set_mapping(self.vm_a, self.cross_mapping()) ++ ++ # Checks that node-a.bmap-a was migrated to node-b.bmap-b, and ++ # that is enough ++ self.migrate() ++ self.verify_dest_has_all_bitmaps() ++ self.verify_dest_error(None) ++ ++ def test_alias_on_dst(self) -> None: ++ self.set_mapping(self.vm_b, self.cross_mapping()) ++ ++ # Checks that node-a.bmap-a was migrated to node-b.bmap-b, and ++ # that is enough ++ self.migrate() ++ self.verify_dest_has_all_bitmaps() ++ self.verify_dest_error(None) ++ ++ ++if __name__ == '__main__': ++ iotests.main(supported_protocols=['file']) +diff --git a/tests/qemu-iotests/300.out b/tests/qemu-iotests/300.out +new file mode 100644 +index 0000000000..cafb8161f7 +--- /dev/null ++++ b/tests/qemu-iotests/300.out +@@ -0,0 +1,5 @@ ++..................................... ++---------------------------------------------------------------------- ++Ran 37 tests ++ ++OK +diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group +index 025ed5238d..b0b55e241c 100644 +--- a/tests/qemu-iotests/group ++++ b/tests/qemu-iotests/group +@@ -307,5 +307,6 @@ + 296 rw + 297 meta + 299 auto quick ++300 migration + 301 backing quick + 302 quick +-- +2.27.0 + diff --git a/kvm-iotests.py-Let-wait_migration-return-on-failure.patch b/kvm-iotests.py-Let-wait_migration-return-on-failure.patch new file mode 100644 index 0000000..452d080 --- /dev/null +++ b/kvm-iotests.py-Let-wait_migration-return-on-failure.patch @@ -0,0 +1,66 @@ +From 2a597bba9b1e07adb6531628962682a0e53d29b1 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 24 Aug 2020 09:20:37 -0400 +Subject: [PATCH 5/6] iotests.py: Let wait_migration() return on failure + +RH-Author: Max Reitz +Message-id: <20200824092038.227913-3-mreitz@redhat.com> +Patchwork-id: 98213 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/3] iotests.py: Let wait_migration() return on failure +Bugzilla: 1790492 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Stefan Hajnoczi + +Let wait_migration() return on failure (with the return value indicating +whether the migration was completed or has failed), so we can use it for +migrations that are expected to fail, too. + +Signed-off-by: Max Reitz +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200820150725.68687-3-mreitz@redhat.com> +Signed-off-by: Eric Blake +(cherry picked from commit 4bf63c80357031be4eb8fff8a751f40e73ef1c10) +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/iotests.py | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 717b5b652c..e197c73ca5 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -729,16 +729,22 @@ class VM(qtest.QEMUQtestMachine): + } + ])) + +- def wait_migration(self, expect_runstate): ++ def wait_migration(self, expect_runstate: Optional[str]) -> bool: + while True: + event = self.event_wait('MIGRATION') + log(event, filters=[filter_qmp_event]) +- if event['data']['status'] == 'completed': ++ if event['data']['status'] in ('completed', 'failed'): + break +- # The event may occur in finish-migrate, so wait for the expected +- # post-migration runstate +- while self.qmp('query-status')['return']['status'] != expect_runstate: +- pass ++ ++ if event['data']['status'] == 'completed': ++ # The event may occur in finish-migrate, so wait for the expected ++ # post-migration runstate ++ runstate = None ++ while runstate != expect_runstate: ++ runstate = self.qmp('query-status')['return']['status'] ++ return True ++ else: ++ return False + + def node_info(self, node_name): + nodes = self.qmp('query-named-block-nodes') +-- +2.27.0 + diff --git a/kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch b/kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch new file mode 100644 index 0000000..7816d07 --- /dev/null +++ b/kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch @@ -0,0 +1,77 @@ +From 6d7ba662e980fcc6f3056173043136063e6d68db Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 20 Aug 2020 15:14:18 -0400 +Subject: [PATCH 2/6] machine types/numa: set numa_mem_supported on old machine + types + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200820151419.14723-2-dgilbert@redhat.com> +Patchwork-id: 98197 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 1/2] machine types/numa: set numa_mem_supported on old machine types +Bugzilla: 1849707 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: David Hildenbrand +RH-Acked-by: Igor Mammedov + +From: "Dr. David Alan Gilbert" + +Reenable the -numa mem= syntax for old machine types, this is making +the downstream old machines behave in the same way as the upstream old +machines changed in upstream 32a354dc6c07d7. + +Power already seems to have the change. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 2 +- + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + 3 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f087483a04..26a7920081 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2755,7 +2755,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + hc->plug = virt_machine_device_plug_cb; + hc->unplug_request = virt_machine_device_unplug_request_cb; + hc->unplug = virt_machine_device_unplug_cb; +- mc->numa_mem_supported = true; + mc->nvdimm_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->default_ram_id = "mach-virt.ram"; +@@ -2860,5 +2859,6 @@ static void rhel820_virt_options(MachineClass *mc) + rhel830_virt_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_2, + hw_compat_rhel_8_2_len); ++ mc->numa_mem_supported = true; + } + DEFINE_RHEL_MACHINE(8, 2, 0) +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 4af4497a0c..bda2d9ffc8 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1009,6 +1009,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + pcmc->default_nic_model = "e1000"; + m->default_display = "std"; + m->no_parallel = 1; ++ m->numa_mem_supported = true; + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index c709460ab7..d1e3a9b575 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -617,6 +617,7 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) + pc_q35_machine_rhel_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; + m->alias = NULL; ++ m->numa_mem_supported = true; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.2.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_2, +-- +2.27.0 + diff --git a/kvm-machine_types-numa-compatibility-for-auto_enable_num.patch b/kvm-machine_types-numa-compatibility-for-auto_enable_num.patch new file mode 100644 index 0000000..6296a75 --- /dev/null +++ b/kvm-machine_types-numa-compatibility-for-auto_enable_num.patch @@ -0,0 +1,81 @@ +From 25c5644164e3286dc722d59c8d7876b1c49c1385 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 20 Aug 2020 15:14:19 -0400 +Subject: [PATCH 3/6] machine_types/numa: compatibility for + auto_enable_numa_with_memdev + +RH-Author: Dr. David Alan Gilbert +Message-id: <20200820151419.14723-3-dgilbert@redhat.com> +Patchwork-id: 98196 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 2/2] machine_types/numa: compatibility for auto_enable_numa_with_memdev +Bugzilla: 1849707 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Igor Mammedov +RH-Acked-by: David Hildenbrand + +From: "Dr. David Alan Gilbert" + +The auto_enable_numa_with_memdev flag automatically creates NUMA a +NUMA node in a case like: + + -m 8G,maxmem=16G + +but we need it to keep old machine types the same. +This is (mostly) done for upstream machine types in 195784a0cfad. + +Power seems to have auto_enable_numa permenantly on anyway. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 2 ++ + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + 3 files changed, 4 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 26a7920081..26102f22ff 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2757,6 +2757,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + hc->unplug = virt_machine_device_unplug_cb; + mc->nvdimm_supported = true; + mc->auto_enable_numa_with_memhp = true; ++ mc->auto_enable_numa_with_memdev = true; + mc->default_ram_id = "mach-virt.ram"; + + object_class_property_add(oc, "acpi", "OnOffAuto", +@@ -2860,5 +2861,6 @@ static void rhel820_virt_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_rhel_8_2, + hw_compat_rhel_8_2_len); + mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memdev = false; + } + DEFINE_RHEL_MACHINE(8, 2, 0) +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index bda2d9ffc8..2415c5edd6 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1010,6 +1010,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->default_display = "std"; + m->no_parallel = 1; + m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index d1e3a9b575..87a0572ec1 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -618,6 +618,7 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; + m->alias = NULL; + m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.2.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_2, +-- +2.27.0 + diff --git a/kvm-migration-Add-block-bitmap-mapping-parameter.patch b/kvm-migration-Add-block-bitmap-mapping-parameter.patch new file mode 100644 index 0000000..1944c27 --- /dev/null +++ b/kvm-migration-Add-block-bitmap-mapping-parameter.patch @@ -0,0 +1,947 @@ +From 8ac15801169cb8744b57b939a3c751ea9d381d98 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 24 Aug 2020 09:20:36 -0400 +Subject: [PATCH 4/6] migration: Add block-bitmap-mapping parameter + +RH-Author: Max Reitz +Message-id: <20200824092038.227913-2-mreitz@redhat.com> +Patchwork-id: 98211 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/3] migration: Add block-bitmap-mapping parameter +Bugzilla: 1790492 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Stefan Hajnoczi + +This migration parameter allows mapping block node names and bitmap +names to aliases for the purpose of block dirty bitmap migration. + +This way, management tools can use different node and bitmap names on +the source and destination and pass the mapping of how bitmaps are to be +transferred to qemu (on the source, the destination, or even both with +arbitrary aliases in the migration stream). + +While touching this code, fix a bug where bitmap names longer than 255 +bytes would fail an assertion in qemu_put_counted_string(). + +Suggested-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Max Reitz +Message-Id: <20200820150725.68687-2-mreitz@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Eric Blake +(cherry picked from commit 31e4c354b38cd42a051ad030eb7779d5e7ee32fe) +Signed-off-by: Max Reitz +Signed-off-by: Danilo C. L. de Paula +--- + migration/block-dirty-bitmap.c | 412 ++++++++++++++++++++++++++++----- + migration/migration.c | 30 +++ + migration/migration.h | 3 + + monitor/hmp-cmds.c | 30 +++ + qapi/migration.json | 104 ++++++++- + 5 files changed, 522 insertions(+), 57 deletions(-) + +diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c +index 784330ebe1..549e14daba 100644 +--- a/migration/block-dirty-bitmap.c ++++ b/migration/block-dirty-bitmap.c +@@ -29,10 +29,10 @@ + * + * # Header (shared for different chunk types) + * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags) +- * [ 1 byte: node name size ] \ flags & DEVICE_NAME +- * [ n bytes: node name ] / +- * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME +- * [ n bytes: bitmap name ] / ++ * [ 1 byte: node alias size ] \ flags & DEVICE_NAME ++ * [ n bytes: node alias ] / ++ * [ 1 byte: bitmap alias size ] \ flags & BITMAP_NAME ++ * [ n bytes: bitmap alias ] / + * + * # Start of bitmap migration (flags & START) + * header +@@ -72,7 +72,9 @@ + #include "migration/register.h" + #include "qemu/hbitmap.h" + #include "qemu/cutils.h" ++#include "qemu/id.h" + #include "qapi/error.h" ++#include "qapi/qapi-commands-migration.h" + #include "trace.h" + + #define CHUNK_SIZE (1 << 10) +@@ -104,7 +106,8 @@ + typedef struct SaveBitmapState { + /* Written during setup phase. */ + BlockDriverState *bs; +- const char *node_name; ++ char *node_alias; ++ char *bitmap_alias; + BdrvDirtyBitmap *bitmap; + uint64_t total_sectors; + uint64_t sectors_per_chunk; +@@ -138,8 +141,9 @@ typedef struct LoadBitmapState { + /* State of the dirty bitmap migration (DBM) during load process */ + typedef struct DBMLoadState { + uint32_t flags; +- char node_name[256]; +- char bitmap_name[256]; ++ char node_alias[256]; ++ char bitmap_alias[256]; ++ char bitmap_name[BDRV_BITMAP_MAX_NAME_SIZE + 1]; + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; + +@@ -165,6 +169,188 @@ typedef struct DBMState { + + static DBMState dbm_state; + ++/* For hash tables that map node/bitmap names to aliases */ ++typedef struct AliasMapInnerNode { ++ char *string; ++ GHashTable *subtree; ++} AliasMapInnerNode; ++ ++static void free_alias_map_inner_node(void *amin_ptr) ++{ ++ AliasMapInnerNode *amin = amin_ptr; ++ ++ g_free(amin->string); ++ g_hash_table_unref(amin->subtree); ++ g_free(amin); ++} ++ ++/** ++ * Construct an alias map based on the given QMP structure. ++ * ++ * (Note that we cannot store such maps in the MigrationParameters ++ * object, because that struct is defined by the QAPI schema, which ++ * makes it basically impossible to have dicts with arbitrary keys. ++ * Therefore, we instead have to construct these maps when migration ++ * starts.) ++ * ++ * @bbm is the block_bitmap_mapping from the migration parameters. ++ * ++ * If @name_to_alias is true, the returned hash table will map node ++ * and bitmap names to their respective aliases (for outgoing ++ * migration). ++ * ++ * If @name_to_alias is false, the returned hash table will map node ++ * and bitmap aliases to their respective names (for incoming ++ * migration). ++ * ++ * The hash table maps node names/aliases to AliasMapInnerNode ++ * objects, whose .string is the respective node alias/name, and whose ++ * .subtree table maps bitmap names/aliases to the respective bitmap ++ * alias/name. ++ */ ++static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, ++ bool name_to_alias, ++ Error **errp) ++{ ++ GHashTable *alias_map; ++ size_t max_node_name_len = sizeof_field(BlockDriverState, node_name) - 1; ++ ++ alias_map = g_hash_table_new_full(g_str_hash, g_str_equal, ++ g_free, free_alias_map_inner_node); ++ ++ for (; bbm; bbm = bbm->next) { ++ const BitmapMigrationNodeAlias *bmna = bbm->value; ++ const BitmapMigrationBitmapAliasList *bmbal; ++ AliasMapInnerNode *amin; ++ GHashTable *bitmaps_map; ++ const char *node_map_from, *node_map_to; ++ ++ if (!id_wellformed(bmna->alias)) { ++ error_setg(errp, "The node alias '%s' is not well-formed", ++ bmna->alias); ++ goto fail; ++ } ++ ++ if (strlen(bmna->alias) > UINT8_MAX) { ++ error_setg(errp, "The node alias '%s' is longer than %u bytes", ++ bmna->alias, UINT8_MAX); ++ goto fail; ++ } ++ ++ if (strlen(bmna->node_name) > max_node_name_len) { ++ error_setg(errp, "The node name '%s' is longer than %zu bytes", ++ bmna->node_name, max_node_name_len); ++ goto fail; ++ } ++ ++ if (name_to_alias) { ++ if (g_hash_table_contains(alias_map, bmna->node_name)) { ++ error_setg(errp, "The node name '%s' is mapped twice", ++ bmna->node_name); ++ goto fail; ++ } ++ ++ node_map_from = bmna->node_name; ++ node_map_to = bmna->alias; ++ } else { ++ if (g_hash_table_contains(alias_map, bmna->alias)) { ++ error_setg(errp, "The node alias '%s' is used twice", ++ bmna->alias); ++ goto fail; ++ } ++ ++ node_map_from = bmna->alias; ++ node_map_to = bmna->node_name; ++ } ++ ++ bitmaps_map = g_hash_table_new_full(g_str_hash, g_str_equal, ++ g_free, g_free); ++ ++ amin = g_new(AliasMapInnerNode, 1); ++ *amin = (AliasMapInnerNode){ ++ .string = g_strdup(node_map_to), ++ .subtree = bitmaps_map, ++ }; ++ ++ g_hash_table_insert(alias_map, g_strdup(node_map_from), amin); ++ ++ for (bmbal = bmna->bitmaps; bmbal; bmbal = bmbal->next) { ++ const BitmapMigrationBitmapAlias *bmba = bmbal->value; ++ const char *bmap_map_from, *bmap_map_to; ++ ++ if (strlen(bmba->alias) > UINT8_MAX) { ++ error_setg(errp, ++ "The bitmap alias '%s' is longer than %u bytes", ++ bmba->alias, UINT8_MAX); ++ goto fail; ++ } ++ ++ if (strlen(bmba->name) > BDRV_BITMAP_MAX_NAME_SIZE) { ++ error_setg(errp, "The bitmap name '%s' is longer than %d bytes", ++ bmba->name, BDRV_BITMAP_MAX_NAME_SIZE); ++ goto fail; ++ } ++ ++ if (name_to_alias) { ++ bmap_map_from = bmba->name; ++ bmap_map_to = bmba->alias; ++ ++ if (g_hash_table_contains(bitmaps_map, bmba->name)) { ++ error_setg(errp, "The bitmap '%s'/'%s' is mapped twice", ++ bmna->node_name, bmba->name); ++ goto fail; ++ } ++ } else { ++ bmap_map_from = bmba->alias; ++ bmap_map_to = bmba->name; ++ ++ if (g_hash_table_contains(bitmaps_map, bmba->alias)) { ++ error_setg(errp, "The bitmap alias '%s'/'%s' is used twice", ++ bmna->alias, bmba->alias); ++ goto fail; ++ } ++ } ++ ++ g_hash_table_insert(bitmaps_map, ++ g_strdup(bmap_map_from), g_strdup(bmap_map_to)); ++ } ++ } ++ ++ return alias_map; ++ ++fail: ++ g_hash_table_destroy(alias_map); ++ return NULL; ++} ++ ++/** ++ * Run construct_alias_map() in both directions to check whether @bbm ++ * is valid. ++ * (This function is to be used by migration/migration.c to validate ++ * the user-specified block-bitmap-mapping migration parameter.) ++ * ++ * Returns true if and only if the mapping is valid. ++ */ ++bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, ++ Error **errp) ++{ ++ GHashTable *alias_map; ++ ++ alias_map = construct_alias_map(bbm, true, errp); ++ if (!alias_map) { ++ return false; ++ } ++ g_hash_table_destroy(alias_map); ++ ++ alias_map = construct_alias_map(bbm, false, errp); ++ if (!alias_map) { ++ return false; ++ } ++ g_hash_table_destroy(alias_map); ++ ++ return true; ++} ++ + static uint32_t qemu_get_bitmap_flags(QEMUFile *f) + { + uint8_t flags = qemu_get_byte(f); +@@ -207,11 +393,11 @@ static void send_bitmap_header(QEMUFile *f, DBMSaveState *s, + qemu_put_bitmap_flags(f, flags); + + if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) { +- qemu_put_counted_string(f, dbms->node_name); ++ qemu_put_counted_string(f, dbms->node_alias); + } + + if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) { +- qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap)); ++ qemu_put_counted_string(f, dbms->bitmap_alias); + } + } + +@@ -282,18 +468,25 @@ static void dirty_bitmap_do_save_cleanup(DBMSaveState *s) + QSIMPLEQ_REMOVE_HEAD(&s->dbms_list, entry); + bdrv_dirty_bitmap_set_busy(dbms->bitmap, false); + bdrv_unref(dbms->bs); ++ g_free(dbms->node_alias); ++ g_free(dbms->bitmap_alias); + g_free(dbms); + } + } + + /* Called with iothread lock taken. */ + static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, +- const char *bs_name) ++ const char *bs_name, GHashTable *alias_map) + { + BdrvDirtyBitmap *bitmap; + SaveBitmapState *dbms; ++ GHashTable *bitmap_aliases; ++ const char *node_alias, *bitmap_name, *bitmap_alias; + Error *local_err = NULL; + ++ /* When an alias map is given, @bs_name must be @bs's node name */ ++ assert(!alias_map || !strcmp(bs_name, bdrv_get_node_name(bs))); ++ + FOR_EACH_DIRTY_BITMAP(bs, bitmap) { + if (bdrv_dirty_bitmap_name(bitmap)) { + break; +@@ -303,21 +496,39 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, + return 0; + } + ++ bitmap_name = bdrv_dirty_bitmap_name(bitmap); ++ + if (!bs_name || strcmp(bs_name, "") == 0) { + error_report("Bitmap '%s' in unnamed node can't be migrated", +- bdrv_dirty_bitmap_name(bitmap)); ++ bitmap_name); + return -1; + } + +- if (bs_name[0] == '#') { ++ if (alias_map) { ++ const AliasMapInnerNode *amin = g_hash_table_lookup(alias_map, bs_name); ++ ++ if (!amin) { ++ /* Skip bitmaps on nodes with no alias */ ++ return 0; ++ } ++ ++ node_alias = amin->string; ++ bitmap_aliases = amin->subtree; ++ } else { ++ node_alias = bs_name; ++ bitmap_aliases = NULL; ++ } ++ ++ if (node_alias[0] == '#') { + error_report("Bitmap '%s' in a node with auto-generated " + "name '%s' can't be migrated", +- bdrv_dirty_bitmap_name(bitmap), bs_name); ++ bitmap_name, node_alias); + return -1; + } + + FOR_EACH_DIRTY_BITMAP(bs, bitmap) { +- if (!bdrv_dirty_bitmap_name(bitmap)) { ++ bitmap_name = bdrv_dirty_bitmap_name(bitmap); ++ if (!bitmap_name) { + continue; + } + +@@ -326,12 +537,29 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, + return -1; + } + ++ if (bitmap_aliases) { ++ bitmap_alias = g_hash_table_lookup(bitmap_aliases, bitmap_name); ++ if (!bitmap_alias) { ++ /* Skip bitmaps with no alias */ ++ continue; ++ } ++ } else { ++ if (strlen(bitmap_name) > UINT8_MAX) { ++ error_report("Cannot migrate bitmap '%s' on node '%s': " ++ "Name is longer than %u bytes", ++ bitmap_name, bs_name, UINT8_MAX); ++ return -1; ++ } ++ bitmap_alias = bitmap_name; ++ } ++ + bdrv_ref(bs); + bdrv_dirty_bitmap_set_busy(bitmap, true); + + dbms = g_new0(SaveBitmapState, 1); + dbms->bs = bs; +- dbms->node_name = bs_name; ++ dbms->node_alias = g_strdup(node_alias); ++ dbms->bitmap_alias = g_strdup(bitmap_alias); + dbms->bitmap = bitmap; + dbms->total_sectors = bdrv_nb_sectors(bs); + dbms->sectors_per_chunk = CHUNK_SIZE * 8 * +@@ -356,43 +584,52 @@ static int init_dirty_bitmap_migration(DBMSaveState *s) + SaveBitmapState *dbms; + GHashTable *handled_by_blk = g_hash_table_new(NULL, NULL); + BlockBackend *blk; ++ const MigrationParameters *mig_params = &migrate_get_current()->parameters; ++ GHashTable *alias_map = NULL; ++ ++ if (mig_params->has_block_bitmap_mapping) { ++ alias_map = construct_alias_map(mig_params->block_bitmap_mapping, true, ++ &error_abort); ++ } + + s->bulk_completed = false; + s->prev_bs = NULL; + s->prev_bitmap = NULL; + s->no_bitmaps = false; + +- /* +- * Use blockdevice name for direct (or filtered) children of named block +- * backends. +- */ +- for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { +- const char *name = blk_name(blk); +- +- if (!name || strcmp(name, "") == 0) { +- continue; +- } ++ if (!alias_map) { ++ /* ++ * Use blockdevice name for direct (or filtered) children of named block ++ * backends. ++ */ ++ for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { ++ const char *name = blk_name(blk); + +- bs = blk_bs(blk); ++ if (!name || strcmp(name, "") == 0) { ++ continue; ++ } + +- /* Skip filters without bitmaps */ +- while (bs && bs->drv && bs->drv->is_filter && +- !bdrv_has_named_bitmaps(bs)) +- { +- if (bs->backing) { +- bs = bs->backing->bs; +- } else if (bs->file) { +- bs = bs->file->bs; +- } else { +- bs = NULL; ++ bs = blk_bs(blk); ++ ++ /* Skip filters without bitmaps */ ++ while (bs && bs->drv && bs->drv->is_filter && ++ !bdrv_has_named_bitmaps(bs)) ++ { ++ if (bs->backing) { ++ bs = bs->backing->bs; ++ } else if (bs->file) { ++ bs = bs->file->bs; ++ } else { ++ bs = NULL; ++ } + } +- } + +- if (bs && bs->drv && !bs->drv->is_filter) { +- if (add_bitmaps_to_list(s, bs, name)) { +- goto fail; ++ if (bs && bs->drv && !bs->drv->is_filter) { ++ if (add_bitmaps_to_list(s, bs, name, NULL)) { ++ goto fail; ++ } ++ g_hash_table_add(handled_by_blk, bs); + } +- g_hash_table_add(handled_by_blk, bs); + } + } + +@@ -401,7 +638,7 @@ static int init_dirty_bitmap_migration(DBMSaveState *s) + continue; + } + +- if (add_bitmaps_to_list(s, bs, bdrv_get_node_name(bs))) { ++ if (add_bitmaps_to_list(s, bs, bdrv_get_node_name(bs), alias_map)) { + goto fail; + } + } +@@ -416,11 +653,17 @@ static int init_dirty_bitmap_migration(DBMSaveState *s) + } + + g_hash_table_destroy(handled_by_blk); ++ if (alias_map) { ++ g_hash_table_destroy(alias_map); ++ } + + return 0; + + fail: + g_hash_table_destroy(handled_by_blk); ++ if (alias_map) { ++ g_hash_table_destroy(alias_map); ++ } + dirty_bitmap_do_save_cleanup(s); + + return -1; +@@ -770,8 +1013,10 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) + return 0; + } + +-static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s) ++static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s, ++ GHashTable *alias_map) + { ++ GHashTable *bitmap_alias_map = NULL; + Error *local_err = NULL; + bool nothing; + s->flags = qemu_get_bitmap_flags(f); +@@ -780,28 +1025,75 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s) + nothing = s->flags == (s->flags & DIRTY_BITMAP_MIG_FLAG_EOS); + + if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) { +- if (!qemu_get_counted_string(f, s->node_name)) { +- error_report("Unable to read node name string"); ++ if (!qemu_get_counted_string(f, s->node_alias)) { ++ error_report("Unable to read node alias string"); + return -EINVAL; + } ++ + if (!s->cancelled) { +- s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err); ++ if (alias_map) { ++ const AliasMapInnerNode *amin; ++ ++ amin = g_hash_table_lookup(alias_map, s->node_alias); ++ if (!amin) { ++ error_setg(&local_err, "Error: Unknown node alias '%s'", ++ s->node_alias); ++ s->bs = NULL; ++ } else { ++ bitmap_alias_map = amin->subtree; ++ s->bs = bdrv_lookup_bs(NULL, amin->string, &local_err); ++ } ++ } else { ++ s->bs = bdrv_lookup_bs(s->node_alias, s->node_alias, ++ &local_err); ++ } + if (!s->bs) { + error_report_err(local_err); + cancel_incoming_locked(s); + } + } +- } else if (!s->bs && !nothing && !s->cancelled) { ++ } else if (s->bs) { ++ if (alias_map) { ++ const AliasMapInnerNode *amin; ++ ++ /* Must be present in the map, or s->bs would not be set */ ++ amin = g_hash_table_lookup(alias_map, s->node_alias); ++ assert(amin != NULL); ++ ++ bitmap_alias_map = amin->subtree; ++ } ++ } else if (!nothing && !s->cancelled) { + error_report("Error: block device name is not set"); + cancel_incoming_locked(s); + } + ++ assert(nothing || s->cancelled || !!alias_map == !!bitmap_alias_map); ++ + if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) { +- if (!qemu_get_counted_string(f, s->bitmap_name)) { +- error_report("Unable to read bitmap name string"); ++ const char *bitmap_name; ++ ++ if (!qemu_get_counted_string(f, s->bitmap_alias)) { ++ error_report("Unable to read bitmap alias string"); + return -EINVAL; + } ++ ++ if (!s->cancelled) { ++ if (bitmap_alias_map) { ++ bitmap_name = g_hash_table_lookup(bitmap_alias_map, ++ s->bitmap_alias); ++ if (!bitmap_name) { ++ error_report("Error: Unknown bitmap alias '%s' on node " ++ "'%s' (alias '%s')", s->bitmap_alias, ++ s->bs->node_name, s->node_alias); ++ cancel_incoming_locked(s); ++ } ++ } else { ++ bitmap_name = s->bitmap_alias; ++ } ++ } ++ + if (!s->cancelled) { ++ g_strlcpy(s->bitmap_name, bitmap_name, sizeof(s->bitmap_name)); + s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name); + + /* +@@ -811,7 +1103,7 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s) + if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) { + error_report("Error: unknown dirty bitmap " + "'%s' for block device '%s'", +- s->bitmap_name, s->node_name); ++ s->bitmap_name, s->bs->node_name); + cancel_incoming_locked(s); + } + } +@@ -835,6 +1127,8 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s) + */ + static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id) + { ++ GHashTable *alias_map = NULL; ++ const MigrationParameters *mig_params = &migrate_get_current()->parameters; + DBMLoadState *s = &((DBMState *)opaque)->load; + int ret = 0; + +@@ -846,13 +1140,18 @@ static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id) + return -EINVAL; + } + ++ if (mig_params->has_block_bitmap_mapping) { ++ alias_map = construct_alias_map(mig_params->block_bitmap_mapping, ++ false, &error_abort); ++ } ++ + do { + QEMU_LOCK_GUARD(&s->lock); + +- ret = dirty_bitmap_load_header(f, s); ++ ret = dirty_bitmap_load_header(f, s, alias_map); + if (ret < 0) { + cancel_incoming_locked(s); +- return ret; ++ goto fail; + } + + if (s->flags & DIRTY_BITMAP_MIG_FLAG_START) { +@@ -869,12 +1168,17 @@ static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id) + + if (ret) { + cancel_incoming_locked(s); +- return ret; ++ goto fail; + } + } while (!(s->flags & DIRTY_BITMAP_MIG_FLAG_EOS)); + + trace_dirty_bitmap_load_success(); +- return 0; ++ ret = 0; ++fail: ++ if (alias_map) { ++ g_hash_table_destroy(alias_map); ++ } ++ return ret; + } + + static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque) +diff --git a/migration/migration.c b/migration/migration.c +index bf684185b7..7a89ce39a7 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -36,6 +36,7 @@ + #include "block/block.h" + #include "qapi/error.h" + #include "qapi/clone-visitor.h" ++#include "qapi/qapi-visit-migration.h" + #include "qapi/qapi-visit-sockets.h" + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-events-migration.h" +@@ -845,6 +846,13 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->has_announce_step = true; + params->announce_step = s->parameters.announce_step; + ++ if (s->parameters.has_block_bitmap_mapping) { ++ params->has_block_bitmap_mapping = true; ++ params->block_bitmap_mapping = ++ QAPI_CLONE(BitmapMigrationNodeAliasList, ++ s->parameters.block_bitmap_mapping); ++ } ++ + return params; + } + +@@ -1310,6 +1318,13 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + "is invalid, it must be in the range of 1 to 10000 ms"); + return false; + } ++ ++ if (params->has_block_bitmap_mapping && ++ !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { ++ error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); ++ return false; ++ } ++ + return true; + } + +@@ -1404,6 +1419,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_announce_step) { + dest->announce_step = params->announce_step; + } ++ ++ if (params->has_block_bitmap_mapping) { ++ dest->has_block_bitmap_mapping = true; ++ dest->block_bitmap_mapping = params->block_bitmap_mapping; ++ } + } + + static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +@@ -1516,6 +1536,16 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_announce_step) { + s->parameters.announce_step = params->announce_step; + } ++ ++ if (params->has_block_bitmap_mapping) { ++ qapi_free_BitmapMigrationNodeAliasList( ++ s->parameters.block_bitmap_mapping); ++ ++ s->parameters.has_block_bitmap_mapping = true; ++ s->parameters.block_bitmap_mapping = ++ QAPI_CLONE(BitmapMigrationNodeAliasList, ++ params->block_bitmap_mapping); ++ } + } + + void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) +diff --git a/migration/migration.h b/migration/migration.h +index 721e272713..4be42e8c11 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -337,6 +337,9 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); + void dirty_bitmap_mig_before_vm_start(void); + void dirty_bitmap_mig_cancel_outgoing(void); + void dirty_bitmap_mig_cancel_incoming(void); ++bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, ++ Error **errp); ++ + void migrate_add_address(SocketAddress *address); + + int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index ae4b6a4246..7711726fd2 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -469,6 +469,32 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "%s: '%s'\n", + MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ), + params->tls_authz); ++ ++ if (params->has_block_bitmap_mapping) { ++ const BitmapMigrationNodeAliasList *bmnal; ++ ++ monitor_printf(mon, "%s:\n", ++ MigrationParameter_str( ++ MIGRATION_PARAMETER_BLOCK_BITMAP_MAPPING)); ++ ++ for (bmnal = params->block_bitmap_mapping; ++ bmnal; ++ bmnal = bmnal->next) ++ { ++ const BitmapMigrationNodeAlias *bmna = bmnal->value; ++ const BitmapMigrationBitmapAliasList *bmbal; ++ ++ monitor_printf(mon, " '%s' -> '%s'\n", ++ bmna->node_name, bmna->alias); ++ ++ for (bmbal = bmna->bitmaps; bmbal; bmbal = bmbal->next) { ++ const BitmapMigrationBitmapAlias *bmba = bmbal->value; ++ ++ monitor_printf(mon, " '%s' -> '%s'\n", ++ bmba->name, bmba->alias); ++ } ++ } ++ } + } + + qapi_free_MigrationParameters(params); +@@ -1384,6 +1410,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_announce_step = true; + visit_type_size(v, param, &p->announce_step, &err); + break; ++ case MIGRATION_PARAMETER_BLOCK_BITMAP_MAPPING: ++ error_setg(&err, "The block-bitmap-mapping parameter can only be set " ++ "through QMP"); ++ break; + default: + assert(0); + } +diff --git a/qapi/migration.json b/qapi/migration.json +index ea53b23dca..5f6b06172c 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -508,6 +508,44 @@ + 'data': [ 'none', 'zlib', + { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] } + ++## ++# @BitmapMigrationBitmapAlias: ++# ++# @name: The name of the bitmap. ++# ++# @alias: An alias name for migration (for example the bitmap name on ++# the opposite site). ++# ++# Since: 5.2 ++## ++{ 'struct': 'BitmapMigrationBitmapAlias', ++ 'data': { ++ 'name': 'str', ++ 'alias': 'str' ++ } } ++ ++## ++# @BitmapMigrationNodeAlias: ++# ++# Maps a block node name and the bitmaps it has to aliases for dirty ++# bitmap migration. ++# ++# @node-name: A block node name. ++# ++# @alias: An alias block node name for migration (for example the ++# node name on the opposite site). ++# ++# @bitmaps: Mappings for the bitmaps on this node. ++# ++# Since: 5.2 ++## ++{ 'struct': 'BitmapMigrationNodeAlias', ++ 'data': { ++ 'node-name': 'str', ++ 'alias': 'str', ++ 'bitmaps': [ 'BitmapMigrationBitmapAlias' ] ++ } } ++ + ## + # @MigrationParameter: + # +@@ -642,6 +680,25 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @block-bitmap-mapping: Maps block nodes and bitmaps on them to ++# aliases for the purpose of dirty bitmap migration. Such ++# aliases may for example be the corresponding names on the ++# opposite site. ++# The mapping must be one-to-one, but not necessarily ++# complete: On the source, unmapped bitmaps and all bitmaps ++# on unmapped nodes will be ignored. On the destination, ++# encountering an unmapped alias in the incoming migration ++# stream will result in a report, and all further bitmap ++# migration data will then be discarded. ++# Note that the destination does not know about bitmaps it ++# does not receive, so there is no limitation or requirement ++# regarding the number of bitmaps received, or how they are ++# named, or on which nodes they are placed. ++# By default (when this parameter has never been set), bitmap ++# names are mapped to themselves. Nodes are mapped to their ++# block device name if there is one, and to their node name ++# otherwise. (Since 5.2) ++# + # Since: 2.4 + ## + { 'enum': 'MigrationParameter', +@@ -656,7 +713,8 @@ + 'multifd-channels', + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', +- 'multifd-zlib-level' ,'multifd-zstd-level' ] } ++ 'multifd-zlib-level' ,'multifd-zstd-level', ++ 'block-bitmap-mapping' ] } + + ## + # @MigrateSetParameters: +@@ -782,6 +840,25 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @block-bitmap-mapping: Maps block nodes and bitmaps on them to ++# aliases for the purpose of dirty bitmap migration. Such ++# aliases may for example be the corresponding names on the ++# opposite site. ++# The mapping must be one-to-one, but not necessarily ++# complete: On the source, unmapped bitmaps and all bitmaps ++# on unmapped nodes will be ignored. On the destination, ++# encountering an unmapped alias in the incoming migration ++# stream will result in a report, and all further bitmap ++# migration data will then be discarded. ++# Note that the destination does not know about bitmaps it ++# does not receive, so there is no limitation or requirement ++# regarding the number of bitmaps received, or how they are ++# named, or on which nodes they are placed. ++# By default (when this parameter has never been set), bitmap ++# names are mapped to themselves. Nodes are mapped to their ++# block device name if there is one, and to their node name ++# otherwise. (Since 5.2) ++# + # Since: 2.4 + ## + # TODO either fuse back into MigrationParameters, or make +@@ -812,7 +889,8 @@ + '*max-cpu-throttle': 'int', + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'int', +- '*multifd-zstd-level': 'int' } } ++ '*multifd-zstd-level': 'int', ++ '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## + # @migrate-set-parameters: +@@ -958,6 +1036,25 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @block-bitmap-mapping: Maps block nodes and bitmaps on them to ++# aliases for the purpose of dirty bitmap migration. Such ++# aliases may for example be the corresponding names on the ++# opposite site. ++# The mapping must be one-to-one, but not necessarily ++# complete: On the source, unmapped bitmaps and all bitmaps ++# on unmapped nodes will be ignored. On the destination, ++# encountering an unmapped alias in the incoming migration ++# stream will result in a report, and all further bitmap ++# migration data will then be discarded. ++# Note that the destination does not know about bitmaps it ++# does not receive, so there is no limitation or requirement ++# regarding the number of bitmaps received, or how they are ++# named, or on which nodes they are placed. ++# By default (when this parameter has never been set), bitmap ++# names are mapped to themselves. Nodes are mapped to their ++# block device name if there is one, and to their node name ++# otherwise. (Since 5.2) ++# + # Since: 2.4 + ## + { 'struct': 'MigrationParameters', +@@ -986,7 +1083,8 @@ + '*max-cpu-throttle': 'uint8', + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', +- '*multifd-zstd-level': 'uint8' } } ++ '*multifd-zstd-level': 'uint8', ++ '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## + # @query-migrate-parameters: +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e32e40f..b80c4b7 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 3%{?dist} +Release: 4%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -146,6 +146,18 @@ Patch38: kvm-redhat-Update-hw_compat_8_2.patch Patch39: kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch # For bz#1801242 - [aarch64] vTPM support in machvirt Patch40: kvm-Disable-TPM-passthrough-backend-on-ARM.patch +# For bz#1867075 - CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8] +Patch41: kvm-Drop-bogus-IPv6-messages.patch +# For bz#1849707 - 8.3 machine types for x86 - 5.1 update +Patch42: kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch +# For bz#1849707 - 8.3 machine types for x86 - 5.1 update +Patch43: kvm-machine_types-numa-compatibility-for-auto_enable_num.patch +# For bz#1790492 - 'dirty-bitmaps' migration capability should allow configuring target nodenames +Patch44: kvm-migration-Add-block-bitmap-mapping-parameter.patch +# For bz#1790492 - 'dirty-bitmaps' migration capability should allow configuring target nodenames +Patch45: kvm-iotests.py-Let-wait_migration-return-on-failure.patch +# For bz#1790492 - 'dirty-bitmaps' migration capability should allow configuring target nodenames +Patch46: kvm-iotests-Test-node-bitmap-aliases-during-migration.patch BuildRequires: wget BuildRequires: rpm-build @@ -1114,6 +1126,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Aug 27 2020 Danilo Cesar Lemes de Paula - 5.1.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch [bz#1849707] +- kvm-machine_types-numa-compatibility-for-auto_enable_num.patch [bz#1849707] +- kvm-migration-Add-block-bitmap-mapping-parameter.patch [bz#1790492] +- kvm-iotests.py-Let-wait_migration-return-on-failure.patch [bz#1790492] +- kvm-iotests-Test-node-bitmap-aliases-during-migration.patch [bz#1790492] +- Resolves: bz#1790492 + ('dirty-bitmaps' migration capability should allow configuring target nodenames) +- Resolves: bz#1849707 + (8.3 machine types for x86 - 5.1 update) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + * Wed Aug 19 2020 Danilo Cesar Lemes de Paula - 5.1.0-3.el8 - kvm-redhat-Update-hw_compat_8_2.patch [bz#1843348] - kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch [bz#1843348] From 7f165dc009a2757a05a1309e72b829e4acd355fb Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 8 Sep 2020 21:22:04 -0400 Subject: [PATCH 088/195] * Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-5.el8 - kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch [bz#1873417] - kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch [bz#1873417] - kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch [bz#1873417] - kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch [bz#1873417] - kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch [bz#1873417] - kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch [bz#1873417] - kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch [bz#1873417] - kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch [bz#1867739] - kvm-usb-fix-setup_len-init-CVE-2020-14364.patch [bz#1869715] - kvm-Remove-explicit-glusterfs-api-dependency.patch [bz#1872853] - kvm-disable-virgl.patch [bz#1831271] - Resolves: bz#1831271 (Drop virgil acceleration support and remove virglrenderer dependency) - Resolves: bz#1867739 (-prom-env does not validate input) - Resolves: bz#1869715 (CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0]) - Resolves: bz#1872853 (move the glusterfs dependency out of qemu-kvm-core to the glusterfs module) - Resolves: bz#1873417 (AMD/NUMA topology - revert 5.1 changes) --- ...d-EPYC-mode-topology-decoding-functi.patch | 168 ++++++++++ ...ntroduce-apicid-functions-inside-X86.patch | 80 +++++ ...ove-arch_id-decode-inside-x86_cpus_i.patch | 157 ++++++++++ ...ix-pkg_id-offset-for-EPYC-cpu-models.patch | 103 +++++++ ...oduce-use_epyc_apic_id_encoding-in-X.patch | 90 ++++++ ...86-Cleanup-and-use-the-EPYC-mode-top.patch | 288 ++++++++++++++++++ ...86-Enable-new-apic-id-encoding-for-E.patch | 63 ++++ ...if-NVRAM-cannot-contain-all-prom-env.patch | 254 +++++++++++++++ ...sb-fix-setup_len-init-CVE-2020-14364.patch | 102 +++++++ qemu-kvm.spec | 57 +++- 10 files changed, 1348 insertions(+), 14 deletions(-) create mode 100644 kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch create mode 100644 kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch create mode 100644 kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch create mode 100644 kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch create mode 100644 kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch create mode 100644 kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch create mode 100644 kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch create mode 100644 kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch create mode 100644 kvm-usb-fix-setup_len-init-CVE-2020-14364.patch diff --git a/kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch b/kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch new file mode 100644 index 0000000..22f49b7 --- /dev/null +++ b/kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch @@ -0,0 +1,168 @@ +From d2629755385917d277b80267cb88436c950123a7 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:49 -0400 +Subject: [PATCH 07/11] Revert "hw/386: Add EPYC mode topology decoding + functions" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-8-imammedo@redhat.com> +Patchwork-id: 98250 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 7/7] Revert "hw/386: Add EPYC mode topology decoding functions" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 7568b205555a6405042f62c64af3268f4330aed5. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/i386/topology.h | 100 ------------------------------------- + 1 file changed, 100 deletions(-) + +diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h +index 07239f95f4..b9593b9905 100644 +--- a/include/hw/i386/topology.h ++++ b/include/hw/i386/topology.h +@@ -47,7 +47,6 @@ typedef uint32_t apic_id_t; + + typedef struct X86CPUTopoIDs { + unsigned pkg_id; +- unsigned node_id; + unsigned die_id; + unsigned core_id; + unsigned smt_id; +@@ -89,11 +88,6 @@ static inline unsigned apicid_die_width(X86CPUTopoInfo *topo_info) + return apicid_bitwidth_for_count(topo_info->dies_per_pkg); + } + +-/* Bit width of the node_id field per socket */ +-static inline unsigned apicid_node_width_epyc(X86CPUTopoInfo *topo_info) +-{ +- return apicid_bitwidth_for_count(MAX(topo_info->nodes_per_pkg, 1)); +-} + /* Bit offset of the Core_ID field + */ + static inline unsigned apicid_core_offset(X86CPUTopoInfo *topo_info) +@@ -114,100 +108,6 @@ static inline unsigned apicid_pkg_offset(X86CPUTopoInfo *topo_info) + return apicid_die_offset(topo_info) + apicid_die_width(topo_info); + } + +-#define NODE_ID_OFFSET 3 /* Minimum node_id offset if numa configured */ +- +-/* +- * Bit offset of the node_id field +- * +- * Make sure nodes_per_pkg > 0 if numa configured else zero. +- */ +-static inline unsigned apicid_node_offset_epyc(X86CPUTopoInfo *topo_info) +-{ +- unsigned offset = apicid_die_offset(topo_info) + +- apicid_die_width(topo_info); +- +- if (topo_info->nodes_per_pkg) { +- return MAX(NODE_ID_OFFSET, offset); +- } else { +- return offset; +- } +-} +- +-/* Bit offset of the Pkg_ID (socket ID) field */ +-static inline unsigned apicid_pkg_offset_epyc(X86CPUTopoInfo *topo_info) +-{ +- return apicid_node_offset_epyc(topo_info) + +- apicid_node_width_epyc(topo_info); +-} +- +-/* +- * Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID +- * +- * The caller must make sure core_id < nr_cores and smt_id < nr_threads. +- */ +-static inline apic_id_t +-x86_apicid_from_topo_ids_epyc(X86CPUTopoInfo *topo_info, +- const X86CPUTopoIDs *topo_ids) +-{ +- return (topo_ids->pkg_id << apicid_pkg_offset_epyc(topo_info)) | +- (topo_ids->node_id << apicid_node_offset_epyc(topo_info)) | +- (topo_ids->die_id << apicid_die_offset(topo_info)) | +- (topo_ids->core_id << apicid_core_offset(topo_info)) | +- topo_ids->smt_id; +-} +- +-static inline void x86_topo_ids_from_idx_epyc(X86CPUTopoInfo *topo_info, +- unsigned cpu_index, +- X86CPUTopoIDs *topo_ids) +-{ +- unsigned nr_nodes = MAX(topo_info->nodes_per_pkg, 1); +- unsigned nr_dies = topo_info->dies_per_pkg; +- unsigned nr_cores = topo_info->cores_per_die; +- unsigned nr_threads = topo_info->threads_per_core; +- unsigned cores_per_node = DIV_ROUND_UP((nr_dies * nr_cores * nr_threads), +- nr_nodes); +- +- topo_ids->pkg_id = cpu_index / (nr_dies * nr_cores * nr_threads); +- topo_ids->node_id = (cpu_index / cores_per_node) % nr_nodes; +- topo_ids->die_id = cpu_index / (nr_cores * nr_threads) % nr_dies; +- topo_ids->core_id = cpu_index / nr_threads % nr_cores; +- topo_ids->smt_id = cpu_index % nr_threads; +-} +- +-/* +- * Calculate thread/core/package IDs for a specific topology, +- * based on APIC ID +- */ +-static inline void x86_topo_ids_from_apicid_epyc(apic_id_t apicid, +- X86CPUTopoInfo *topo_info, +- X86CPUTopoIDs *topo_ids) +-{ +- topo_ids->smt_id = apicid & +- ~(0xFFFFFFFFUL << apicid_smt_width(topo_info)); +- topo_ids->core_id = +- (apicid >> apicid_core_offset(topo_info)) & +- ~(0xFFFFFFFFUL << apicid_core_width(topo_info)); +- topo_ids->die_id = +- (apicid >> apicid_die_offset(topo_info)) & +- ~(0xFFFFFFFFUL << apicid_die_width(topo_info)); +- topo_ids->node_id = +- (apicid >> apicid_node_offset_epyc(topo_info)) & +- ~(0xFFFFFFFFUL << apicid_node_width_epyc(topo_info)); +- topo_ids->pkg_id = apicid >> apicid_pkg_offset_epyc(topo_info); +-} +- +-/* +- * Make APIC ID for the CPU 'cpu_index' +- * +- * 'cpu_index' is a sequential, contiguous ID for the CPU. +- */ +-static inline apic_id_t x86_apicid_from_cpu_idx_epyc(X86CPUTopoInfo *topo_info, +- unsigned cpu_index) +-{ +- X86CPUTopoIDs topo_ids; +- x86_topo_ids_from_idx_epyc(topo_info, cpu_index, &topo_ids); +- return x86_apicid_from_topo_ids_epyc(topo_info, &topo_ids); +-} + /* Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID + * + * The caller must make sure core_id < nr_cores and smt_id < nr_threads. +-- +2.27.0 + diff --git a/kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch b/kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch new file mode 100644 index 0000000..5988443 --- /dev/null +++ b/kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch @@ -0,0 +1,80 @@ +From da24d2c5e2d61043340b601a09f22e41a1d52e5e Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:47 -0400 +Subject: [PATCH 05/11] Revert "hw/i386: Introduce apicid functions inside + X86MachineState" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-6-imammedo@redhat.com> +Patchwork-id: 98246 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 5/7] Revert "hw/i386: Introduce apicid functions inside X86MachineState" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 6121c7fbfd98dbc3af1b00b56ff2eef66df87828. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/x86.c | 5 ----- + include/hw/i386/x86.h | 9 --------- + 2 files changed, 14 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 41bdf146bd..4d8cb66258 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -896,11 +896,6 @@ static void x86_machine_initfn(Object *obj) + x86ms->smm = ON_OFF_AUTO_AUTO; + x86ms->acpi = ON_OFF_AUTO_AUTO; + x86ms->smp_dies = 1; +- +- x86ms->apicid_from_cpu_idx = x86_apicid_from_cpu_idx; +- x86ms->topo_ids_from_apicid = x86_topo_ids_from_apicid; +- x86ms->apicid_from_topo_ids = x86_apicid_from_topo_ids; +- x86ms->apicid_pkg_offset = apicid_pkg_offset; + } + + static void x86_machine_class_init(ObjectClass *oc, void *data) +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index b79f24e285..4d9a26326d 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -63,15 +63,6 @@ typedef struct { + OnOffAuto smm; + OnOffAuto acpi; + +- /* Apic id specific handlers */ +- uint32_t (*apicid_from_cpu_idx)(X86CPUTopoInfo *topo_info, +- unsigned cpu_index); +- void (*topo_ids_from_apicid)(apic_id_t apicid, X86CPUTopoInfo *topo_info, +- X86CPUTopoIDs *topo_ids); +- apic_id_t (*apicid_from_topo_ids)(X86CPUTopoInfo *topo_info, +- const X86CPUTopoIDs *topo_ids); +- uint32_t (*apicid_pkg_offset)(X86CPUTopoInfo *topo_info); +- + /* + * Address space used by IOAPIC device. All IOAPIC interrupts + * will be translated to MSI messages in the address space. +-- +2.27.0 + diff --git a/kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch b/kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch new file mode 100644 index 0000000..b9ac7b1 --- /dev/null +++ b/kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch @@ -0,0 +1,157 @@ +From 61b9bdeafac573093e171947be1a0c9212ba8b95 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:45 -0400 +Subject: [PATCH 03/11] Revert "hw/i386: Move arch_id decode inside + x86_cpus_init" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-4-imammedo@redhat.com> +Patchwork-id: 98248 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 3/7] Revert "hw/i386: Move arch_id decode inside x86_cpus_init" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 2e26f4ab3bf8390a2677d3afd9b1a04f015d7721. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 6 +++--- + hw/i386/x86.c | 37 +++++++------------------------------ + 2 files changed, 10 insertions(+), 33 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index f469c060e5..ac2cc79fca 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1817,14 +1817,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + topo_ids.die_id = cpu->die_id; + topo_ids.core_id = cpu->core_id; + topo_ids.smt_id = cpu->thread_id; +- cpu->apic_id = x86ms->apicid_from_topo_ids(&topo_info, &topo_ids); ++ cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); + } + + cpu_slot = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx); + if (!cpu_slot) { + MachineState *ms = MACHINE(pcms); + +- x86ms->topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); + error_setg(errp, + "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" + " APIC ID %" PRIu32 ", valid index range 0:%d", +@@ -1845,7 +1845,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() + * once -smp refactoring is complete and there will be CPU private + * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ +- x86ms->topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); + if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { + error_setg(errp, "property socket-id: %u doesn't match set apic-id:" + " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 67bee1bcb8..41bdf146bd 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -68,22 +68,6 @@ inline void init_topo_info(X86CPUTopoInfo *topo_info, + topo_info->threads_per_core = ms->smp.threads; + } + +-/* +- * Set up with the new EPYC topology handlers +- * +- * AMD uses different apic id encoding for EPYC based cpus. Override +- * the default topo handlers with EPYC encoding handlers. +- */ +-static void x86_set_epyc_topo_handlers(MachineState *machine) +-{ +- X86MachineState *x86ms = X86_MACHINE(machine); +- +- x86ms->apicid_from_cpu_idx = x86_apicid_from_cpu_idx_epyc; +- x86ms->topo_ids_from_apicid = x86_topo_ids_from_apicid_epyc; +- x86ms->apicid_from_topo_ids = x86_apicid_from_topo_ids_epyc; +- x86ms->apicid_pkg_offset = apicid_pkg_offset_epyc; +-} +- + /* + * Calculates initial APIC ID for a specific CPU index + * +@@ -102,7 +86,7 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + + init_topo_info(&topo_info, x86ms); + +- correct_id = x86ms->apicid_from_cpu_idx(&topo_info, cpu_index); ++ correct_id = x86_apicid_from_cpu_idx(&topo_info, cpu_index); + if (x86mc->compat_apic_id_mode) { + if (cpu_index != correct_id && !warned && !qtest_enabled()) { + error_report("APIC IDs set in compatibility mode, " +@@ -136,11 +120,6 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) + MachineState *ms = MACHINE(x86ms); + MachineClass *mc = MACHINE_GET_CLASS(x86ms); + +- /* Check for apicid encoding */ +- if (cpu_x86_use_epyc_apic_id_encoding(ms->cpu_type)) { +- x86_set_epyc_topo_handlers(ms); +- } +- + x86_cpu_set_default_version(default_cpu_version); + + /* +@@ -154,12 +133,6 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) + x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, + ms->smp.max_cpus - 1) + 1; + possible_cpus = mc->possible_cpu_arch_ids(ms); +- +- for (i = 0; i < ms->possible_cpus->len; i++) { +- ms->possible_cpus->cpus[i].arch_id = +- x86_cpu_apic_id_from_index(x86ms, i); +- } +- + for (i = 0; i < ms->smp.cpus; i++) { + x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); + } +@@ -184,7 +157,8 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + init_topo_info(&topo_info, x86ms); + + assert(idx < ms->possible_cpus->len); +- x86_topo_ids_from_idx(&topo_info, idx, &topo_ids); ++ x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, ++ &topo_info, &topo_ids); + return topo_ids.pkg_id % ms->numa_state->num_nodes; + } + +@@ -215,7 +189,10 @@ const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) + + ms->possible_cpus->cpus[i].type = ms->cpu_type; + ms->possible_cpus->cpus[i].vcpus_count = 1; +- x86_topo_ids_from_idx(&topo_info, i, &topo_ids); ++ ms->possible_cpus->cpus[i].arch_id = ++ x86_cpu_apic_id_from_index(x86ms, i); ++ x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, ++ &topo_info, &topo_ids); + ms->possible_cpus->cpus[i].props.has_socket_id = true; + ms->possible_cpus->cpus[i].props.socket_id = topo_ids.pkg_id; + if (x86ms->smp_dies > 1) { +-- +2.27.0 + diff --git a/kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch b/kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch new file mode 100644 index 0000000..9492f85 --- /dev/null +++ b/kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch @@ -0,0 +1,103 @@ +From 7f7a15ba9ad3f1d906b472cad4972c80d11b77fc Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:43 -0400 +Subject: [PATCH 01/11] Revert "i386: Fix pkg_id offset for EPYC cpu models" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-2-imammedo@redhat.com> +Patchwork-id: 98247 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 1/7] Revert "i386: Fix pkg_id offset for EPYC cpu models" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 7b225762c8c05fd31d4c2be116aedfbc00383f8b. + +PS: +fixup an access to pkg_offset that were added by +cac9edfc4da (target/i386: Fix the CPUID leaf CPUID_Fn80000008) + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 1 - + target/i386/cpu.c | 6 +++--- + target/i386/cpu.h | 1 - + 3 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index a75e0137ab..f469c060e5 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1763,7 +1763,6 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + + env->nr_dies = x86ms->smp_dies; + env->nr_nodes = topo_info.nodes_per_pkg; +- env->pkg_offset = x86ms->apicid_pkg_offset(&topo_info); + + /* + * If APIC ID is not set, +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cdaa1463f2..6517cc73a2 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5680,7 +5680,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + *ecx |= CPUID_TOPOLOGY_LEVEL_SMT; + break; + case 1: +- *eax = env->pkg_offset; ++ *eax = apicid_pkg_offset(&topo_info); + *ebx = cs->nr_cores * cs->nr_threads; + *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; + break; +@@ -5714,7 +5714,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; + break; + case 2: +- *eax = env->pkg_offset; ++ *eax = apicid_pkg_offset(&topo_info); + *ebx = env->nr_dies * cs->nr_cores * cs->nr_threads; + *ecx |= CPUID_TOPOLOGY_LEVEL_DIE; + break; +@@ -5895,7 +5895,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + * CPUX86State::pkg_offset. + * Bits 7:0 is "The number of threads in the package is NC+1" + */ +- *ecx = (env->pkg_offset << 12) | ++ *ecx = (apicid_pkg_offset(&topo_info) << 12) | + ((cs->nr_cores * cs->nr_threads) - 1); + } else { + *ecx = 0; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index e1a5c174dc..d5ad42d694 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1630,7 +1630,6 @@ typedef struct CPUX86State { + + unsigned nr_dies; + unsigned nr_nodes; +- unsigned pkg_offset; + } CPUX86State; + + struct kvm_msrs; +-- +2.27.0 + diff --git a/kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch b/kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch new file mode 100644 index 0000000..489c5a3 --- /dev/null +++ b/kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch @@ -0,0 +1,90 @@ +From bc3db6832c57b1b28204b376f3c4c61cadfe0a35 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:46 -0400 +Subject: [PATCH 04/11] Revert "i386: Introduce use_epyc_apic_id_encoding in + X86CPUDefinition" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-5-imammedo@redhat.com> +Patchwork-id: 98249 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 4/7] Revert "i386: Introduce use_epyc_apic_id_encoding in X86CPUDefinition" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 0c1538cb1a26287c072645f4759b9872b1596d79. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 16 ---------------- + target/i386/cpu.h | 1 - + 2 files changed, 17 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 66b6a77b2f..5e3d086f05 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1638,10 +1638,6 @@ typedef struct X86CPUDefinition { + FeatureWordArray features; + const char *model_id; + CPUCaches *cache_info; +- +- /* Use AMD EPYC encoding for apic id */ +- bool use_epyc_apic_id_encoding; +- + /* + * Definitions for alternative versions of CPU model. + * List is terminated by item with version == 0. +@@ -1683,18 +1679,6 @@ static const X86CPUVersionDefinition *x86_cpu_def_get_versions(X86CPUDefinition + return def->versions ?: default_version_list; + } + +-bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type) +-{ +- X86CPUClass *xcc = X86_CPU_CLASS(object_class_by_name(cpu_type)); +- +- assert(xcc); +- if (xcc->model && xcc->model->cpudef) { +- return xcc->model->cpudef->use_epyc_apic_id_encoding; +- } else { +- return false; +- } +-} +- + static CPUCaches epyc_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index d5ad42d694..5ff8ad8427 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1918,7 +1918,6 @@ void cpu_clear_apic_feature(CPUX86State *env); + void host_cpuid(uint32_t function, uint32_t count, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); + void host_vendor_fms(char *vendor, int *family, int *model, int *stepping); +-bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type); + + /* helper.c */ + bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size, +-- +2.27.0 + diff --git a/kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch b/kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch new file mode 100644 index 0000000..eeea50d --- /dev/null +++ b/kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch @@ -0,0 +1,288 @@ +From 4236a54d72270d871ff1ed3fd09a2971327077a1 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:48 -0400 +Subject: [PATCH 06/11] Revert "target/i386: Cleanup and use the EPYC mode + topology functions" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-7-imammedo@redhat.com> +Patchwork-id: 98251 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 6/7] Revert "target/i386: Cleanup and use the EPYC mode topology functions" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit dd08ef0318e2b61d14bc069590d174913f7f437a. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 161 ++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 127 insertions(+), 34 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 5e3d086f05..73fc83e53f 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -338,15 +338,68 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, + } + } + ++/* ++ * Definitions used for building CPUID Leaf 0x8000001D and 0x8000001E ++ * Please refer to the AMD64 Architecture Programmer’s Manual Volume 3. ++ * Define the constants to build the cpu topology. Right now, TOPOEXT ++ * feature is enabled only on EPYC. So, these constants are based on ++ * EPYC supported configurations. We may need to handle the cases if ++ * these values change in future. ++ */ ++/* Maximum core complexes in a node */ ++#define MAX_CCX 2 ++/* Maximum cores in a core complex */ ++#define MAX_CORES_IN_CCX 4 ++/* Maximum cores in a node */ ++#define MAX_CORES_IN_NODE 8 ++/* Maximum nodes in a socket */ ++#define MAX_NODES_PER_SOCKET 4 ++ ++/* ++ * Figure out the number of nodes required to build this config. ++ * Max cores in a node is 8 ++ */ ++static int nodes_in_socket(int nr_cores) ++{ ++ int nodes; ++ ++ nodes = DIV_ROUND_UP(nr_cores, MAX_CORES_IN_NODE); ++ ++ /* Hardware does not support config with 3 nodes, return 4 in that case */ ++ return (nodes == 3) ? 4 : nodes; ++} ++ ++/* ++ * Decide the number of cores in a core complex with the given nr_cores using ++ * following set constants MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE and ++ * MAX_NODES_PER_SOCKET. Maintain symmetry as much as possible ++ * L3 cache is shared across all cores in a core complex. So, this will also ++ * tell us how many cores are sharing the L3 cache. ++ */ ++static int cores_in_core_complex(int nr_cores) ++{ ++ int nodes; ++ ++ /* Check if we can fit all the cores in one core complex */ ++ if (nr_cores <= MAX_CORES_IN_CCX) { ++ return nr_cores; ++ } ++ /* Get the number of nodes required to build this config */ ++ nodes = nodes_in_socket(nr_cores); ++ ++ /* ++ * Divide the cores accros all the core complexes ++ * Return rounded up value ++ */ ++ return DIV_ROUND_UP(nr_cores, nodes * MAX_CCX); ++} ++ + /* Encode cache info for CPUID[8000001D] */ +-static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, +- X86CPUTopoInfo *topo_info, +- uint32_t *eax, uint32_t *ebx, +- uint32_t *ecx, uint32_t *edx) ++static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, CPUState *cs, ++ uint32_t *eax, uint32_t *ebx, ++ uint32_t *ecx, uint32_t *edx) + { + uint32_t l3_cores; +- unsigned nodes = MAX(topo_info->nodes_per_pkg, 1); +- + assert(cache->size == cache->line_size * cache->associativity * + cache->partitions * cache->sets); + +@@ -355,13 +408,10 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, + + /* L3 is shared among multiple cores */ + if (cache->level == 3) { +- l3_cores = DIV_ROUND_UP((topo_info->dies_per_pkg * +- topo_info->cores_per_die * +- topo_info->threads_per_core), +- nodes); +- *eax |= (l3_cores - 1) << 14; ++ l3_cores = cores_in_core_complex(cs->nr_cores); ++ *eax |= ((l3_cores * cs->nr_threads) - 1) << 14; + } else { +- *eax |= ((topo_info->threads_per_core - 1) << 14); ++ *eax |= ((cs->nr_threads - 1) << 14); + } + + assert(cache->line_size > 0); +@@ -381,17 +431,55 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, + (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0); + } + ++/* Data structure to hold the configuration info for a given core index */ ++struct core_topology { ++ /* core complex id of the current core index */ ++ int ccx_id; ++ /* ++ * Adjusted core index for this core in the topology ++ * This can be 0,1,2,3 with max 4 cores in a core complex ++ */ ++ int core_id; ++ /* Node id for this core index */ ++ int node_id; ++ /* Number of nodes in this config */ ++ int num_nodes; ++}; ++ ++/* ++ * Build the configuration closely match the EPYC hardware. Using the EPYC ++ * hardware configuration values (MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE) ++ * right now. This could change in future. ++ * nr_cores : Total number of cores in the config ++ * core_id : Core index of the current CPU ++ * topo : Data structure to hold all the config info for this core index ++ */ ++static void build_core_topology(int nr_cores, int core_id, ++ struct core_topology *topo) ++{ ++ int nodes, cores_in_ccx; ++ ++ /* First get the number of nodes required */ ++ nodes = nodes_in_socket(nr_cores); ++ ++ cores_in_ccx = cores_in_core_complex(nr_cores); ++ ++ topo->node_id = core_id / (cores_in_ccx * MAX_CCX); ++ topo->ccx_id = (core_id % (cores_in_ccx * MAX_CCX)) / cores_in_ccx; ++ topo->core_id = core_id % cores_in_ccx; ++ topo->num_nodes = nodes; ++} ++ + /* Encode cache info for CPUID[8000001E] */ +-static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, ++static void encode_topo_cpuid8000001e(CPUState *cs, X86CPU *cpu, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) + { +- X86CPUTopoIDs topo_ids = {0}; +- unsigned long nodes = MAX(topo_info->nodes_per_pkg, 1); ++ struct core_topology topo = {0}; ++ unsigned long nodes; + int shift; + +- x86_topo_ids_from_apicid_epyc(cpu->apic_id, topo_info, &topo_ids); +- ++ build_core_topology(cs->nr_cores, cpu->core_id, &topo); + *eax = cpu->apic_id; + /* + * CPUID_Fn8000001E_EBX +@@ -408,8 +496,12 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, + * 3 Core complex id + * 1:0 Core id + */ +- *ebx = ((topo_info->threads_per_core - 1) << 8) | (topo_ids.node_id << 3) | +- (topo_ids.core_id); ++ if (cs->nr_threads - 1) { ++ *ebx = ((cs->nr_threads - 1) << 8) | (topo.node_id << 3) | ++ (topo.ccx_id << 2) | topo.core_id; ++ } else { ++ *ebx = (topo.node_id << 4) | (topo.ccx_id << 3) | topo.core_id; ++ } + /* + * CPUID_Fn8000001E_ECX + * 31:11 Reserved +@@ -418,8 +510,9 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, + * 2 Socket id + * 1:0 Node id + */ +- if (nodes <= 4) { +- *ecx = ((nodes - 1) << 8) | (topo_ids.pkg_id << 2) | topo_ids.node_id; ++ if (topo.num_nodes <= 4) { ++ *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << 2) | ++ topo.node_id; + } else { + /* + * Node id fix up. Actual hardware supports up to 4 nodes. But with +@@ -434,10 +527,10 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, + * number of nodes. find_last_bit returns last set bit(0 based). Left + * shift(+1) the socket id to represent all the nodes. + */ +- nodes -= 1; ++ nodes = topo.num_nodes - 1; + shift = find_last_bit(&nodes, 8); +- *ecx = (nodes << 8) | (topo_ids.pkg_id << (shift + 1)) | +- topo_ids.node_id; ++ *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << (shift + 1)) | ++ topo.node_id; + } + *edx = 0; + } +@@ -5473,7 +5566,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + uint32_t signature[3]; + X86CPUTopoInfo topo_info; + +- topo_info.nodes_per_pkg = env->nr_nodes; + topo_info.dies_per_pkg = env->nr_dies; + topo_info.cores_per_die = cs->nr_cores; + topo_info.threads_per_core = cs->nr_threads; +@@ -5905,20 +5997,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + } + switch (count) { + case 0: /* L1 dcache info */ +- encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, +- &topo_info, eax, ebx, ecx, edx); ++ encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, cs, ++ eax, ebx, ecx, edx); + break; + case 1: /* L1 icache info */ +- encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, +- &topo_info, eax, ebx, ecx, edx); ++ encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, cs, ++ eax, ebx, ecx, edx); + break; + case 2: /* L2 cache info */ +- encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, +- &topo_info, eax, ebx, ecx, edx); ++ encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, cs, ++ eax, ebx, ecx, edx); + break; + case 3: /* L3 cache info */ +- encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, +- &topo_info, eax, ebx, ecx, edx); ++ encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, cs, ++ eax, ebx, ecx, edx); + break; + default: /* end of info */ + *eax = *ebx = *ecx = *edx = 0; +@@ -5927,7 +6019,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + break; + case 0x8000001E: + assert(cpu->core_id <= 255); +- encode_topo_cpuid8000001e(&topo_info, cpu, eax, ebx, ecx, edx); ++ encode_topo_cpuid8000001e(cs, cpu, ++ eax, ebx, ecx, edx); + break; + case 0xC0000000: + *eax = env->cpuid_xlevel2; +-- +2.27.0 + diff --git a/kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch b/kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch new file mode 100644 index 0000000..0326049 --- /dev/null +++ b/kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch @@ -0,0 +1,63 @@ +From a36be18a97841a091256e9934fb323afc9c3a57a Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:44 -0400 +Subject: [PATCH 02/11] Revert "target/i386: Enable new apic id encoding for + EPYC based cpus models" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-3-imammedo@redhat.com> +Patchwork-id: 98245 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 2/7] Revert "target/i386: Enable new apic id encoding for EPYC based cpus models" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 247b18c593ec298446645af8d5d28911daf653b1. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6517cc73a2..66b6a77b2f 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3996,7 +3996,6 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x8000001E, + .model_id = "AMD EPYC Processor", + .cache_info = &epyc_cache_info, +- .use_epyc_apic_id_encoding = 1, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { +@@ -4124,7 +4123,6 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x8000001E, + .model_id = "AMD EPYC-Rome Processor", + .cache_info = &epyc_rome_cache_info, +- .use_epyc_apic_id_encoding = 1, + }, + }; + +-- +2.27.0 + diff --git a/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch b/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch new file mode 100644 index 0000000..c6f1506 --- /dev/null +++ b/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch @@ -0,0 +1,254 @@ +From 74ce16018bcb202ab81f3aa7b5a33279dd4800da Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 1 Sep 2020 19:07:04 -0400 +Subject: [PATCH 08/11] nvram: Exit QEMU if NVRAM cannot contain all -prom-env + data +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +Message-id: <20200901190704.474799-2-gkurz@redhat.com> +Patchwork-id: 98256 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] nvram: Exit QEMU if NVRAM cannot contain all -prom-env data +Bugzilla: 1867739 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Greg Kurz + +Since commit 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to +support the -prom-env parameter"), pseries machines can pre-initialize +the "system" partition in the NVRAM with the data passed to all -prom-env +parameters on the QEMU command line. + +In this case it is assumed that all the data fits in 64 KiB, but the user +can easily pass more and crash QEMU: + +$ qemu-system-ppc64 -M pseries $(for ((x=0;x<128;x++)); do \ + echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ + done) # this requires ~128 Kib +malloc(): corrupted top size +Aborted (core dumped) + +This happens because we don't check if all the prom-env data fits in +the NVRAM and chrp_nvram_set_var() happily memcpy() it passed the +buffer. + +This crash affects basically all ppc/ppc64 machine types that use -prom-env: +- pseries (all versions) +- g3beige +- mac99 + +and also sparc/sparc64 machine types: +- LX +- SPARCClassic +- SPARCbook +- SS-10 +- SS-20 +- SS-4 +- SS-5 +- SS-600MP +- Voyager +- sun4u +- sun4v + +Add a max_len argument to chrp_nvram_create_system_partition() so that +it can check the available size before writing to memory. + +Since NVRAM is populated at machine init, it seems reasonable to consider +this error as fatal. So, instead of reporting an error when we detect that +the NVRAM is too small and adapt all machine types to handle it, we simply +exit QEMU in all cases. This is still better than crashing. If someone +wants another behavior, I guess this can be reworked later. + +Tested with: + +$ yes q | \ + (for arch in ppc ppc64 sparc sparc64; do \ + echo == $arch ==; \ + qemu=${arch}-softmmu/qemu-system-$arch; \ + for mach in $($qemu -M help | awk '! /^Supported/ { print $1 }'); do \ + echo $mach; \ + $qemu -M $mach -monitor stdio -nodefaults -nographic \ + $(for ((x=0;x<128;x++)); do \ + echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ + done) >/dev/null; \ + done; echo; \ + done) + +Without the patch, affected machine types cause QEMU to report some +memory corruption and crash: + +malloc(): corrupted top size + +free(): invalid size + +*** stack smashing detected ***: terminated + +With the patch, QEMU prints the following message and exits: + +NVRAM is too small. Try to pass less data to -prom-env + +It seems that the conditions for the crash have always existed, but it +affects pseries, the machine type I care for, since commit 61f20b9dc5b7 +only. + +Fixes: 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to support the -prom-env parameter") +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1867739 +Reported-by: John Snow +Reviewed-by: Laurent Vivier +Signed-off-by: Greg Kurz +Message-Id: <159736033937.350502.12402444542194031035.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 37035df51eaabb8d26b71da75b88a1c6727de8fa) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +--- + hw/nvram/chrp_nvram.c | 24 +++++++++++++++++++++--- + hw/nvram/mac_nvram.c | 2 +- + hw/nvram/spapr_nvram.c | 3 ++- + hw/sparc/sun4m.c | 2 +- + hw/sparc64/sun4u.c | 2 +- + include/hw/nvram/chrp_nvram.h | 3 ++- + 6 files changed, 28 insertions(+), 8 deletions(-) + +diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c +index d969f26704..d4d10a7c03 100644 +--- a/hw/nvram/chrp_nvram.c ++++ b/hw/nvram/chrp_nvram.c +@@ -21,14 +21,21 @@ + + #include "qemu/osdep.h" + #include "qemu/cutils.h" ++#include "qemu/error-report.h" + #include "hw/nvram/chrp_nvram.h" + #include "sysemu/sysemu.h" + +-static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) ++static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str, ++ int max_len) + { + int len; + + len = strlen(str) + 1; ++ ++ if (max_len < len) { ++ return -1; ++ } ++ + memcpy(&nvram[addr], str, len); + + return addr + len; +@@ -38,19 +45,26 @@ static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) + * Create a "system partition", used for the Open Firmware + * environment variables. + */ +-int chrp_nvram_create_system_partition(uint8_t *data, int min_len) ++int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len) + { + ChrpNvramPartHdr *part_header; + unsigned int i; + int end; + ++ if (max_len < sizeof(*part_header)) { ++ goto fail; ++ } ++ + part_header = (ChrpNvramPartHdr *)data; + part_header->signature = CHRP_NVPART_SYSTEM; + pstrcpy(part_header->name, sizeof(part_header->name), "system"); + + end = sizeof(ChrpNvramPartHdr); + for (i = 0; i < nb_prom_envs; i++) { +- end = chrp_nvram_set_var(data, end, prom_envs[i]); ++ end = chrp_nvram_set_var(data, end, prom_envs[i], max_len - end); ++ if (end == -1) { ++ goto fail; ++ } + } + + /* End marker */ +@@ -65,6 +79,10 @@ int chrp_nvram_create_system_partition(uint8_t *data, int min_len) + chrp_nvram_finish_partition(part_header, end); + + return end; ++ ++fail: ++ error_report("NVRAM is too small. Try to pass less data to -prom-env"); ++ exit(EXIT_FAILURE); + } + + /** +diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c +index beec1c4e4d..11f2d31cdb 100644 +--- a/hw/nvram/mac_nvram.c ++++ b/hw/nvram/mac_nvram.c +@@ -141,7 +141,7 @@ static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off, + + /* OpenBIOS nvram variables partition */ + sysp_end = chrp_nvram_create_system_partition(&nvr->data[off], +- DEF_SYSTEM_SIZE) + off; ++ DEF_SYSTEM_SIZE, len) + off; + + /* Free space partition */ + chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end); +diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c +index 15d08281d4..386513499f 100644 +--- a/hw/nvram/spapr_nvram.c ++++ b/hw/nvram/spapr_nvram.c +@@ -188,7 +188,8 @@ static void spapr_nvram_realize(SpaprVioDevice *dev, Error **errp) + } + } else if (nb_prom_envs > 0) { + /* Create a system partition to pass the -prom-env variables */ +- chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4); ++ chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4, ++ nvram->size); + chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4], + nvram->size - MIN_NVRAM_SIZE / 4); + } +diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c +index 9be930415f..cf7dfa4af5 100644 +--- a/hw/sparc/sun4m.c ++++ b/hw/sparc/sun4m.c +@@ -143,7 +143,7 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr, + memset(image, '\0', sizeof(image)); + + /* OpenBIOS nvram variables partition */ +- sysp_end = chrp_nvram_create_system_partition(image, 0); ++ sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); + + /* Free space partition */ + chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); +diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c +index 9e30203dcc..37310b73e6 100644 +--- a/hw/sparc64/sun4u.c ++++ b/hw/sparc64/sun4u.c +@@ -136,7 +136,7 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size, + memset(image, '\0', sizeof(image)); + + /* OpenBIOS nvram variables partition */ +- sysp_end = chrp_nvram_create_system_partition(image, 0); ++ sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); + + /* Free space partition */ + chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); +diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h +index 09941a9be4..4a0f5c21b8 100644 +--- a/include/hw/nvram/chrp_nvram.h ++++ b/include/hw/nvram/chrp_nvram.h +@@ -50,7 +50,8 @@ chrp_nvram_finish_partition(ChrpNvramPartHdr *header, uint32_t size) + header->checksum = sum & 0xff; + } + +-int chrp_nvram_create_system_partition(uint8_t *data, int min_len); ++/* chrp_nvram_create_system_partition() failure is fatal */ ++int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len); + int chrp_nvram_create_free_partition(uint8_t *data, int len); + + #endif +-- +2.27.0 + diff --git a/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch b/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch new file mode 100644 index 0000000..e53187c --- /dev/null +++ b/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch @@ -0,0 +1,102 @@ +From e6d43ded51d658d77bb7f8a490f2bf93946d3215 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 3 Sep 2020 14:27:19 -0400 +Subject: [PATCH 09/11] usb: fix setup_len init (CVE-2020-14364) + +RH-Author: Jon Maloy +Message-id: <20200903142719.1415757-2-jmaloy@redhat.com> +Patchwork-id: 98265 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] usb: fix setup_len init (CVE-2020-14364) +Bugzilla: 1869715 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier + +From: Gerd Hoffmann + +Store calculated setup_len in a local variable, verify it, and only +write it to the struct (USBDevice->setup_len) in case it passed the +sanity checks. + +This prevents other code (do_token_{in,out} functions specifically) +from working with invalid USBDevice->setup_len values and overrunning +the USBDevice->setup_buf[] buffer. + +Fixes: CVE-2020-14364 +Signed-off-by: Gerd Hoffmann +Tested-by: Gonglei +Reviewed-by: Li Qiang +Message-id: 20200825053636.29648-1-kraxel@redhat.com +(cherry picked from commit b946434f2659a182afc17e155be6791ebfb302eb) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/core.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/hw/usb/core.c b/hw/usb/core.c +index 5abd128b6b..5234dcc73f 100644 +--- a/hw/usb/core.c ++++ b/hw/usb/core.c +@@ -129,6 +129,7 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) + static void do_token_setup(USBDevice *s, USBPacket *p) + { + int request, value, index; ++ unsigned int setup_len; + + if (p->iov.size != 8) { + p->status = USB_RET_STALL; +@@ -138,14 +139,15 @@ static void do_token_setup(USBDevice *s, USBPacket *p) + usb_packet_copy(p, s->setup_buf, p->iov.size); + s->setup_index = 0; + p->actual_length = 0; +- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; +- if (s->setup_len > sizeof(s->data_buf)) { ++ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; ++ if (setup_len > sizeof(s->data_buf)) { + fprintf(stderr, + "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", +- s->setup_len, sizeof(s->data_buf)); ++ setup_len, sizeof(s->data_buf)); + p->status = USB_RET_STALL; + return; + } ++ s->setup_len = setup_len; + + request = (s->setup_buf[0] << 8) | s->setup_buf[1]; + value = (s->setup_buf[3] << 8) | s->setup_buf[2]; +@@ -259,26 +261,28 @@ static void do_token_out(USBDevice *s, USBPacket *p) + static void do_parameter(USBDevice *s, USBPacket *p) + { + int i, request, value, index; ++ unsigned int setup_len; + + for (i = 0; i < 8; i++) { + s->setup_buf[i] = p->parameter >> (i*8); + } + + s->setup_state = SETUP_STATE_PARAM; +- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; + s->setup_index = 0; + + request = (s->setup_buf[0] << 8) | s->setup_buf[1]; + value = (s->setup_buf[3] << 8) | s->setup_buf[2]; + index = (s->setup_buf[5] << 8) | s->setup_buf[4]; + +- if (s->setup_len > sizeof(s->data_buf)) { ++ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; ++ if (setup_len > sizeof(s->data_buf)) { + fprintf(stderr, + "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", +- s->setup_len, sizeof(s->data_buf)); ++ setup_len, sizeof(s->data_buf)); + p->status = USB_RET_STALL; + return; + } ++ s->setup_len = setup_len; + + if (p->pid == USB_TOKEN_OUT) { + usb_packet_copy(p, s->data_buf, s->setup_len); +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b80c4b7..80e6227 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 4%{?dist} +Release: 5%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -158,6 +158,24 @@ Patch44: kvm-migration-Add-block-bitmap-mapping-parameter.patch Patch45: kvm-iotests.py-Let-wait_migration-return-on-failure.patch # For bz#1790492 - 'dirty-bitmaps' migration capability should allow configuring target nodenames Patch46: kvm-iotests-Test-node-bitmap-aliases-during-migration.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch47: kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch48: kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch49: kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch50: kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch51: kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch52: kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch53: kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch +# For bz#1867739 - -prom-env does not validate input +Patch54: kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch +# For bz#1869715 - CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0] +Patch55: kvm-usb-fix-setup_len-init-CVE-2020-14364.patch BuildRequires: wget BuildRequires: rpm-build @@ -184,7 +202,6 @@ BuildRequires: python3-sphinx BuildRequires: spice-protocol >= 0.12.12 BuildRequires: spice-server-devel >= 0.12.8 BuildRequires: libcacard-devel -BuildRequires: virglrenderer-devel # For smartcard NSS support BuildRequires: nss-devel %endif @@ -196,7 +213,7 @@ BuildRequires: librados-devel BuildRequires: librbd-devel %if %{have_gluster} # For gluster block driver -BuildRequires: glusterfs-api-devel >= 3.6.0 +BuildRequires: glusterfs-api-devel BuildRequires: glusterfs-devel %endif # We need both because the 'stap' binary is probed for by configure @@ -306,9 +323,6 @@ Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: libseccomp >= 2.4.0 # For compressed guest memory dumps Requires: lzo snappy -%if %{have_gluster} -Requires: glusterfs-api >= 3.6.0 -%endif %if %{have_kvm_setup} Requires(post): systemd-units Requires(preun): systemd-units @@ -617,11 +631,7 @@ cd qemu-kvm-build --enable-vhost-user \ --enable-vhost-vdpa \ --enable-vhost-vsock \ -%if 0%{have_spice} - --enable-virglrenderer \ -%else --disable-virglrenderer \ -%endif --disable-virtfs \ --enable-vnc \ --disable-vnc-jpeg \ @@ -1068,10 +1078,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %if 0%{have_memlock_limits} %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf %endif -%if %{have_spice} -%{_libexecdir}/vhost-user-gpu -%{_datadir}/%{name}/vhost-user/50-qemu-gpu.json -%endif %{_libexecdir}/virtiofsd %{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json %if %{have_usbredir} @@ -1126,6 +1132,29 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-5.el8 +- kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch [bz#1873417] +- kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch [bz#1873417] +- kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch [bz#1873417] +- kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch [bz#1873417] +- kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch [bz#1873417] +- kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch [bz#1873417] +- kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch [bz#1873417] +- kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch [bz#1867739] +- kvm-usb-fix-setup_len-init-CVE-2020-14364.patch [bz#1869715] +- kvm-Remove-explicit-glusterfs-api-dependency.patch [bz#1872853] +- kvm-disable-virgl.patch [bz#1831271] +- Resolves: bz#1831271 + (Drop virgil acceleration support and remove virglrenderer dependency) +- Resolves: bz#1867739 + (-prom-env does not validate input) +- Resolves: bz#1869715 + (CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0]) +- Resolves: bz#1872853 + (move the glusterfs dependency out of qemu-kvm-core to the glusterfs module) +- Resolves: bz#1873417 + (AMD/NUMA topology - revert 5.1 changes) + * Thu Aug 27 2020 Danilo Cesar Lemes de Paula - 5.1.0-4.el8 - kvm-Drop-bogus-IPv6-messages.patch [bz#1867075] - kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch [bz#1849707] From 8198b6bf7f935b44d3827584f4dd8ec7468ff7a7 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 10 Sep 2020 15:57:46 -0400 Subject: [PATCH 089/195] * Thu Sep 10 2020 Danilo Cesar Lemes de Paula - 5.1.0-6.el8 - kvm-spec-Move-qemu-pr-helper-back-to-usr-bin.patch [bz#1869635] - kvm-Bump-required-libusbx-version.patch [bz#1856591] - Resolves: bz#1856591 (libusbx isn't updated with qemu-kvm) - Resolves: bz#1869635 ('/usr/bin/qemu-pr-helper' is not a suitable pr helper: No such file or directory) --- qemu-kvm.spec | 17 ++++++++++++++--- qemu-pr-helper.service | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 80e6227..10df9c8 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 5%{?dist} +Release: 6%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -330,7 +330,7 @@ Requires(preun): systemd-units Requires: powerpc-utils %endif %endif -Requires: libusbx >= 1.0.19 +Requires: libusbx >= 1.0.23 %if %{have_usbredir} Requires: usbredir >= 0.7.1 %endif @@ -933,6 +933,9 @@ rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo # Remove spec rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs +# Hack to keep qemu-pr-helper in original location +mv $RPM_BUILD_ROOT%{_libexecdir}/qemu-pr-helper $RPM_BUILD_ROOT%{_bindir}/qemu-pr-helper + %check cd qemu-kvm-build export DIFF=diff; make check V=1 @@ -996,7 +999,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_mandir}/man7/qemu-qmp-ref.7* %{_mandir}/man7/qemu-cpu-models.7* %{_bindir}/qemu-keymap -%{_libexecdir}/qemu-pr-helper +%{_bindir}/qemu-pr-helper %{_bindir}/qemu-edid %{_bindir}/qemu-trace-stap %{_unitdir}/qemu-pr-helper.service @@ -1132,6 +1135,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Sep 10 2020 Danilo Cesar Lemes de Paula - 5.1.0-6.el8 +- kvm-spec-Move-qemu-pr-helper-back-to-usr-bin.patch [bz#1869635] +- kvm-Bump-required-libusbx-version.patch [bz#1856591] +- Resolves: bz#1856591 + (libusbx isn't updated with qemu-kvm) +- Resolves: bz#1869635 + ('/usr/bin/qemu-pr-helper' is not a suitable pr helper: No such file or directory) + * Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-5.el8 - kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch [bz#1873417] - kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch [bz#1873417] diff --git a/qemu-pr-helper.service b/qemu-pr-helper.service index ac6cda1..a1d27b0 100644 --- a/qemu-pr-helper.service +++ b/qemu-pr-helper.service @@ -4,7 +4,7 @@ Description=Persistent Reservation Daemon for QEMU [Service] WorkingDirectory=/tmp Type=simple -ExecStart=/usr/libexec/qemu-pr-helper +ExecStart=/usr/bin/qemu-pr-helper PrivateTmp=yes ProtectSystem=strict ReadWritePaths=/var/run From 7b68902699d81cdc8d2930229ea3e5c0680e74c9 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Tue, 15 Sep 2020 11:56:35 -0400 Subject: [PATCH 090/195] * Tue Sep 15 2020 Danilo Cesar Lemes de Paula - 5.1.0-7.el8 - kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch [bz#1789757 bz#1870384] - kvm-target-arm-Move-start-powered-off-property-to-generi.patch [bz#1849483] - kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch [bz#1849483] - kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch [bz#1849483] - Resolves: bz#1789757 ([IBM 8.4 FEAT] Add machine option to enable secure VM support) - Resolves: bz#1849483 (Failed to boot up guest when hotplugging vcpus on bios stage) - Resolves: bz#1870384 ([IBM 8.3 FEAT] Add interim/unsupported machine option to enable secure VM support for testing purposes) --- ...-start-powered-off-CPUState-property.patch | 82 +++++++ ...setting-of-CPU-halted-state-to-gener.patch | 66 ++++++ ...start-powered-off-property-to-generi.patch | 141 +++++++++++ ...xperimental-option-for-enabling-secu.patch | 219 ++++++++++++++++++ qemu-kvm.spec | 23 +- 5 files changed, 530 insertions(+), 1 deletion(-) create mode 100644 kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch create mode 100644 kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch create mode 100644 kvm-target-arm-Move-start-powered-off-property-to-generi.patch create mode 100644 kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch diff --git a/kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch b/kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch new file mode 100644 index 0000000..b8affe1 --- /dev/null +++ b/kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch @@ -0,0 +1,82 @@ +From 5dd7cdf3739c73d910d5df6443b39e9b0b79f3fd Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 8 Sep 2020 18:47:16 -0400 +Subject: [PATCH 4/4] ppc/spapr: Use start-powered-off CPUState property + +RH-Author: Laurent Vivier +Message-id: <20200908184716.1125192-4-lvivier@redhat.com> +Patchwork-id: 98302 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/3] ppc/spapr: Use start-powered-off CPUState property +Bugzilla: 1849483 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Gibson +RH-Acked-by: Greg Kurz + +From: Thiago Jung Bauermann + +PowerPC sPAPR CPUs start in the halted state, and spapr_reset_vcpu() +attempts to implement this by setting CPUState::halted to 1. But that's too +late for the case of hotplugged CPUs in a machine configure with 2 or more +threads per core. + +By then, other parts of QEMU have already caused the vCPU to run in an +unitialized state a couple of times. For example, ppc_cpu_reset() calls +ppc_tlb_invalidate_all(), which ends up calling async_run_on_cpu(). This +kicks the new vCPU while it has CPUState::halted = 0, causing QEMU to issue +a KVM_RUN ioctl on the new vCPU before the guest is able to make the +start-cpu RTAS call to initialize its register state. + +This problem doesn't seem to cause visible issues for regular guests, but +on a secure guest running under the Ultravisor it does. The Ultravisor +relies on being able to snoop on the start-cpu RTAS call to map vCPUs to +guests, and this issue causes it to see a stray vCPU that doesn't belong to +any guest. + +Fix by setting the start-powered-off CPUState property in +spapr_create_vcpu(), which makes cpu_common_reset() initialize +CPUState::halted to 1 at an earlier moment. + +Suggested-by: Eduardo Habkost +Acked-by: David Gibson +Reviewed-by: Greg Kurz +Signed-off-by: Thiago Jung Bauermann +Message-Id: <20200826055535.951207-4-bauerman@linux.ibm.com> +Signed-off-by: David Gibson +(cherry picked from commit 554c2169e9251ca2829ab968bd9ba5641a5abe1d) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_cpu_core.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index f228f8bb75..86fed5c528 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -37,11 +37,6 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu) + + cpu_reset(cs); + +- /* All CPUs start halted. CPU0 is unhalted from the machine level +- * reset code and the rest are explicitly started up by the guest +- * using an RTAS call */ +- cs->halted = 1; +- + env->spr[SPR_HIOR] = 0; + + lpcr = env->spr[SPR_LPCR]; +@@ -287,6 +282,11 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp) + + cs = CPU(obj); + cpu = POWERPC_CPU(obj); ++ /* ++ * All CPUs start halted. CPU0 is unhalted from the machine level reset code ++ * and the rest are explicitly started up by the guest using an RTAS call. ++ */ ++ cs->start_powered_off = true; + cs->cpu_index = cc->core_id + i; + spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err); + if (local_err) { +-- +2.27.0 + diff --git a/kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch b/kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch new file mode 100644 index 0000000..f7d6940 --- /dev/null +++ b/kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch @@ -0,0 +1,66 @@ +From de7263538a5cab64163edd1bdb3a934dabd625f2 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 8 Sep 2020 18:47:15 -0400 +Subject: [PATCH 3/4] target/arm: Move setting of CPU halted state to generic + code +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +Message-id: <20200908184716.1125192-3-lvivier@redhat.com> +Patchwork-id: 98300 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/3] target/arm: Move setting of CPU halted state to generic code +Bugzilla: 1849483 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Gibson +RH-Acked-by: Greg Kurz + +From: Thiago Jung Bauermann + +This change is in a separate patch because it's not so obvious that it +won't cause a regression. + +Suggested-by: Eduardo Habkost +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: David Gibson +Reviewed-by: Greg Kurz +Signed-off-by: Thiago Jung Bauermann +Message-Id: <20200826055535.951207-3-bauerman@linux.ibm.com> +Signed-off-by: David Gibson +(cherry picked from commit 6ad1da667c8e21f019d4adc21702e06dd9225790) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/cpu.c | 2 +- + target/arm/cpu.c | 1 - + 2 files changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/core/cpu.c b/hw/core/cpu.c +index 594441a150..71bb7859f1 100644 +--- a/hw/core/cpu.c ++++ b/hw/core/cpu.c +@@ -258,7 +258,7 @@ static void cpu_common_reset(DeviceState *dev) + } + + cpu->interrupt_request = 0; +- cpu->halted = 0; ++ cpu->halted = cpu->start_powered_off; + cpu->mem_io_pc = 0; + cpu->icount_extra = 0; + atomic_set(&cpu->icount_decr_ptr->u32, 0); +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 54fb653229..059a5ced6e 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -175,7 +175,6 @@ static void arm_cpu_reset(DeviceState *dev) + env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; + + cpu->power_state = s->start_powered_off ? PSCI_OFF : PSCI_ON; +- s->halted = s->start_powered_off; + + if (arm_feature(env, ARM_FEATURE_IWMMXT)) { + env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q'; +-- +2.27.0 + diff --git a/kvm-target-arm-Move-start-powered-off-property-to-generi.patch b/kvm-target-arm-Move-start-powered-off-property-to-generi.patch new file mode 100644 index 0000000..9a39a56 --- /dev/null +++ b/kvm-target-arm-Move-start-powered-off-property-to-generi.patch @@ -0,0 +1,141 @@ +From 76821af8b6d74237718fb554ae1a96f969308c69 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 8 Sep 2020 18:47:14 -0400 +Subject: [PATCH 2/4] target/arm: Move start-powered-off property to generic + CPUState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +Message-id: <20200908184716.1125192-2-lvivier@redhat.com> +Patchwork-id: 98299 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/3] target/arm: Move start-powered-off property to generic CPUState +Bugzilla: 1849483 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Gibson +RH-Acked-by: Greg Kurz + +From: Thiago Jung Bauermann + +There are other platforms which also have CPUs that start powered off, so +generalize the start-powered-off property so that it can be used by them. + +Note that ARMv7MState also has a property of the same name but this patch +doesn't change it because that class isn't a subclass of CPUState so it +wouldn't be a trivial change. + +This change should not cause any change in behavior. + +Suggested-by: Eduardo Habkost +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: David Gibson +Reviewed-by: Greg Kurz +Signed-off-by: Thiago Jung Bauermann +Message-Id: <20200826055535.951207-2-bauerman@linux.ibm.com> +Signed-off-by: David Gibson +(cherry picked from commit c1b701587e59d9569c38d1d6033cd7cc2a992105) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + exec.c | 1 + + include/hw/core/cpu.h | 4 ++++ + target/arm/cpu.c | 5 ++--- + target/arm/cpu.h | 3 --- + target/arm/kvm32.c | 2 +- + target/arm/kvm64.c | 2 +- + 6 files changed, 9 insertions(+), 8 deletions(-) + +diff --git a/exec.c b/exec.c +index 6f381f98e2..82e82fab09 100644 +--- a/exec.c ++++ b/exec.c +@@ -899,6 +899,7 @@ Property cpu_common_props[] = { + DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION, + MemoryRegion *), + #endif ++ DEFINE_PROP_BOOL("start-powered-off", CPUState, start_powered_off, false), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 8f145733ce..9fc2696db5 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -374,6 +374,10 @@ struct CPUState { + bool created; + bool stop; + bool stopped; ++ ++ /* Should CPU start in powered-off state? */ ++ bool start_powered_off; ++ + bool unplug; + bool crash_occurred; + bool exit_request; +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 13ad40aa7d..54fb653229 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -174,8 +174,8 @@ static void arm_cpu_reset(DeviceState *dev) + env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1; + env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; + +- cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON; +- s->halted = cpu->start_powered_off; ++ cpu->power_state = s->start_powered_off ? PSCI_OFF : PSCI_ON; ++ s->halted = s->start_powered_off; + + if (arm_feature(env, ARM_FEATURE_IWMMXT)) { + env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q'; +@@ -2182,7 +2182,6 @@ static const ARMCPUInfo arm_cpus[] = { + }; + + static Property arm_cpu_properties[] = { +- DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false), + DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0), + DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0), + DEFINE_PROP_UINT64("mp-affinity", ARMCPU, +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 9e8ed423ea..a925d26996 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -810,9 +810,6 @@ struct ARMCPU { + */ + uint32_t psci_version; + +- /* Should CPU start in PSCI powered-off state? */ +- bool start_powered_off; +- + /* Current power state, access guarded by BQL */ + ARMPSCIState power_state; + +diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c +index 0af46b41c8..1f2b8f8b7a 100644 +--- a/target/arm/kvm32.c ++++ b/target/arm/kvm32.c +@@ -218,7 +218,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); +- if (cpu->start_powered_off) { ++ if (cs->start_powered_off) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; + } + if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 1169237905..f8a6d905fb 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -775,7 +775,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); +- if (cpu->start_powered_off) { ++ if (cs->start_powered_off) { + cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; + } + if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { +-- +2.27.0 + diff --git a/kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch b/kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch new file mode 100644 index 0000000..f229a28 --- /dev/null +++ b/kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch @@ -0,0 +1,219 @@ +From b162af531abdf6f5e8ad13b93699a3ba28de6702 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 20 Aug 2020 23:00:51 -0400 +Subject: [PATCH 1/4] target/ppc: Add experimental option for enabling secure + guests + +RH-Author: Michael Roth +Message-id: <20200820230051.516359-2-mroth@redhat.com> +Patchwork-id: 98208 +O-Subject: [RHEL-AV-8.3.0 qemu virt PATCH 1/1] target/ppc: Add experimental option for enabling secure guests +Bugzilla: 1870384 +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Fabiano Rosas + +Making use of ppc's Protected Execution Facility (PEF) feature, a +guest can become a secure guest (aka. secure VM - SVM) and have its +memory protected from access by the host. This feature is mediated by +a piece of firmware called the Ultravisor (UV). + +The transition from a regular to a secure VM is initiated by the guest +kernel during prom_init via the use of an ultracall (enter secure mode +- UV_ESM) and with cooperation from the hypervisor via an hcall +(H_SVM_INIT_START). + +Currently QEMU has no knowledge of this process and no way to +determine if a host supports the feature. A guest with PEF support +enabled would always try to enter secure mode regardless of user +intent or hardware support. + +To address the above, a new KVM capability (KVM_CAP_PPC_SECURE_GUEST +[1]) is being introduced in the kernel without which KVM will block +the secure transition. + +This patch adds support for checking/enabling this KVM capability via +a new experimental spapr machine option, e.g.: + + -machine pseries,x-svm-allowed=on + +The capability defaults to off. + +1- https://lore.kernel.org/kvm/20200319043301.GA13052@blackberry + +Signed-off-by: Fabiano Rosas + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1789757 +Upstream: RHEL-only +*re-worked to drop use of spapr capabilities infrastructure in favor + of a simple one-off machine option +Signed-off-by: Michael Roth +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 23 +++++++++++++++++++++++ + include/hw/ppc/spapr.h | 3 +++ + target/ppc/kvm.c | 27 +++++++++++++++++++++++++++ + target/ppc/kvm_ppc.h | 13 +++++++++++++ + 4 files changed, 66 insertions(+) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 5e3964326d..e77c90bfc5 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1636,6 +1636,9 @@ static void spapr_machine_reset(MachineState *machine) + + kvmppc_svm_off(&error_fatal); + spapr_caps_apply(spapr); ++ if (spapr->svm_allowed) { ++ kvmppc_svm_allow(&error_fatal); ++ } + + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && +@@ -3303,6 +3306,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) + spapr->host_serial = g_strdup(value); + } + ++static bool spapr_get_svm_allowed(Object *obj, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ return spapr->svm_allowed; ++} ++ ++static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ spapr->svm_allowed = value; ++} ++ + static void spapr_instance_init(Object *obj) + { + SpaprMachineState *spapr = SPAPR_MACHINE(obj); +@@ -3358,6 +3375,12 @@ static void spapr_instance_init(Object *obj) + spapr_get_host_serial, spapr_set_host_serial); + object_property_set_description(obj, "host-serial", + "Host serial number to advertise in guest device tree"); ++ object_property_add_bool(obj, "x-svm-allowed", ++ spapr_get_svm_allowed, ++ spapr_set_svm_allowed); ++ object_property_set_description(obj, "x-svm-allowed", ++ "Allow the guest to become a Secure Guest" ++ " (experimental only)"); + } + + static void spapr_machine_finalizefn(Object *obj) +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index f48089edba..d0728a4758 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -212,6 +212,9 @@ struct SpaprMachineState { + int fwnmi_machine_check_interlock; + QemuCond fwnmi_machine_check_interlock_cond; + ++ /* Secure Guest support via x-svm-allowed */ ++ bool svm_allowed; ++ + /*< public >*/ + char *kvm_type; + char *host_model; +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index d85ba8ffe0..ce63f8b6f3 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -89,6 +89,7 @@ static int cap_ppc_count_cache_flush_assist; + static int cap_ppc_nested_kvm_hv; + static int cap_large_decr; + static int cap_fwnmi; ++static int cap_ppc_secure_guest; + + static uint32_t debug_inst_opcode; + +@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); + kvmppc_get_cpu_characteristics(s); + cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); ++ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST); + cap_large_decr = kvmppc_get_dec_bits(); + cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); + /* +@@ -2538,6 +2540,16 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) + return 0; + } + ++bool kvmppc_has_cap_secure_guest(void) ++{ ++ return !!cap_ppc_secure_guest; ++} ++ ++int kvmppc_enable_cap_secure_guest(void) ++{ ++ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); ++} ++ + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) + { + uint32_t host_pvr = mfpvr(); +@@ -2948,3 +2960,18 @@ void kvmppc_svm_off(Error **errp) + error_setg_errno(errp, -rc, "KVM_PPC_SVM_OFF ioctl failed"); + } + } ++ ++void kvmppc_svm_allow(Error **errp) ++{ ++ if (!kvm_enabled()) { ++ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off"); ++ return; ++ } ++ ++ if (!kvmppc_has_cap_secure_guest()) { ++ error_setg(errp, "KVM implementation does not support secure guests, " ++ "try x-svm-allowed=off"); ++ } else if (kvmppc_enable_cap_secure_guest() < 0) { ++ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off"); ++ } ++} +diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h +index 72e05f1cd2..3fd5ea2414 100644 +--- a/target/ppc/kvm_ppc.h ++++ b/target/ppc/kvm_ppc.h +@@ -40,6 +40,7 @@ target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, + bool radix, bool gtse, + uint64_t proc_tbl); + void kvmppc_svm_off(Error **errp); ++void kvmppc_svm_allow(Error **errp); + #ifndef CONFIG_USER_ONLY + bool kvmppc_spapr_use_multitce(void); + int kvmppc_spapr_enable_inkernel_multitce(void); +@@ -73,6 +74,8 @@ int kvmppc_set_cap_nested_kvm_hv(int enable); + int kvmppc_get_cap_large_decr(void); + int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + int kvmppc_enable_hwrng(void); ++bool kvmppc_has_cap_secure_guest(void); ++int kvmppc_enable_cap_secure_guest(void); + int kvmppc_put_books_sregs(PowerPCCPU *cpu); + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); + void kvmppc_check_papr_resize_hpt(Error **errp); +@@ -386,6 +389,16 @@ static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) + return -1; + } + ++static inline bool kvmppc_has_cap_secure_guest(void) ++{ ++ return false; ++} ++ ++static inline int kvmppc_enable_cap_secure_guest(void) ++{ ++ return -1; ++} ++ + static inline int kvmppc_enable_hwrng(void) + { + return -1; +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 10df9c8..64848ce 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 6%{?dist} +Release: 7%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -176,6 +176,15 @@ Patch53: kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch Patch54: kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch # For bz#1869715 - CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0] Patch55: kvm-usb-fix-setup_len-init-CVE-2020-14364.patch +# For bz#1789757 - [IBM 8.4 FEAT] Add machine option to enable secure VM support +# For bz#1870384 - [IBM 8.3 FEAT] Add interim/unsupported machine option to enable secure VM support for testing purposes +Patch56: kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch +# For bz#1849483 - Failed to boot up guest when hotplugging vcpus on bios stage +Patch57: kvm-target-arm-Move-start-powered-off-property-to-generi.patch +# For bz#1849483 - Failed to boot up guest when hotplugging vcpus on bios stage +Patch58: kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch +# For bz#1849483 - Failed to boot up guest when hotplugging vcpus on bios stage +Patch59: kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch BuildRequires: wget BuildRequires: rpm-build @@ -1135,6 +1144,18 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Sep 15 2020 Danilo Cesar Lemes de Paula - 5.1.0-7.el8 +- kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch [bz#1789757 bz#1870384] +- kvm-target-arm-Move-start-powered-off-property-to-generi.patch [bz#1849483] +- kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch [bz#1849483] +- kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch [bz#1849483] +- Resolves: bz#1789757 + ([IBM 8.4 FEAT] Add machine option to enable secure VM support) +- Resolves: bz#1849483 + (Failed to boot up guest when hotplugging vcpus on bios stage) +- Resolves: bz#1870384 + ([IBM 8.3 FEAT] Add interim/unsupported machine option to enable secure VM support for testing purposes) + * Thu Sep 10 2020 Danilo Cesar Lemes de Paula - 5.1.0-6.el8 - kvm-spec-Move-qemu-pr-helper-back-to-usr-bin.patch [bz#1869635] - kvm-Bump-required-libusbx-version.patch [bz#1856591] From 811a9b1b706f2992d69a6355c8c3b06ad0faf830 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 17 Sep 2020 14:02:36 -0400 Subject: [PATCH 091/195] * Thu Sep 17 2020 Danilo Cesar Lemes de Paula - 5.1.0-8.el8 - kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch [bz#1738820] - kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1752376] - kvm-Revert-Drop-bogus-IPv6-messages.patch [bz#1867075] - kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch [bz#1821528] - Resolves: bz#1738820 ('-F' option of qemu-ga command cause the guest-fsfreeze-freeze command doesn't work) - Resolves: bz#1752376 (qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available) - Resolves: bz#1821528 (missing namespace attribute when access the rbd image with namespace) - Resolves: bz#1867075 (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) --- kvm-Revert-Drop-bogus-IPv6-messages.patch | 48 +++++++++++ ...mespace-to-qemu_rbd_strong_runtime_o.patch | 51 ++++++++++++ ...qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch | 61 ++++++++++++++ ...ing-of-whole-process-instead-of-thre.patch | 79 +++++++++++++++++++ qemu-kvm.spec | 30 ++++++- 5 files changed, 268 insertions(+), 1 deletion(-) create mode 100644 kvm-Revert-Drop-bogus-IPv6-messages.patch create mode 100644 kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch create mode 100644 kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch create mode 100644 kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch diff --git a/kvm-Revert-Drop-bogus-IPv6-messages.patch b/kvm-Revert-Drop-bogus-IPv6-messages.patch new file mode 100644 index 0000000..5ac7159 --- /dev/null +++ b/kvm-Revert-Drop-bogus-IPv6-messages.patch @@ -0,0 +1,48 @@ +From 40ce2a0e9f0a9d5c00ba82f187802fdf0a0702d0 Mon Sep 17 00:00:00 2001 +From: Danilo de Paula +Date: Wed, 16 Sep 2020 01:25:22 -0400 +Subject: [PATCH 3/4] Revert "Drop bogus IPv6 messages" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Danilo de Paula +Message-id: <20200916012522.1183051-2-ddepaula@redhat.com> +Patchwork-id: 98394 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] Revert "Drop bogus IPv6 messages" +Bugzilla: 1867075 +RH-Acked-by: Jon Maloy +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Marc-André Lureau + +This reverts commit 6ceab004edfb7c1f0f03701bc2ae443941468fd7. + +This fix was applied during the rebase. +The commit above just duplicates it. + +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/ip6_input.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +index f7ef354ee4..a83e4f8e3d 100644 +--- a/slirp/src/ip6_input.c ++++ b/slirp/src/ip6_input.c +@@ -56,13 +56,6 @@ void ip6_input(struct mbuf *m) + goto bad; + } + +- // Check if the message size is big enough to hold what's +- // set in the payload length header. If not this is an invalid +- // packet +- if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { +- goto bad; +- } +- + /* check ip_ttl for a correct ICMP reply */ + if (ip6->ip_hl == 0) { + icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); +-- +2.27.0 + diff --git a/kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch b/kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch new file mode 100644 index 0000000..932bc30 --- /dev/null +++ b/kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch @@ -0,0 +1,51 @@ +From ba82420d04b2e2ca69d5ff4720e37dd0748936ea Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Wed, 16 Sep 2020 11:40:25 -0400 +Subject: [PATCH 4/4] block/rbd: add 'namespace' to + qemu_rbd_strong_runtime_opts[] + +RH-Author: Stefano Garzarella +Message-id: <20200916114025.47973-2-sgarzare@redhat.com> +Patchwork-id: 98399 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] block/rbd: add 'namespace' to qemu_rbd_strong_runtime_opts[] +Bugzilla: 1821528 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +Commit 19ae9ae014 ("block/rbd: Add support for ceph namespaces") +introduced namespace support for RBD, but we forgot to add the +new 'namespace' options to qemu_rbd_strong_runtime_opts[]. + +The 'namespace' is used to identify the image, so it is a strong +option since it can changes the data of a BDS. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1821528 +Fixes: 19ae9ae014 ("block/rbd: Add support for ceph namespaces") +Cc: Florian Florensa +Signed-off-by: Stefano Garzarella +Message-Id: <20200914190553.74871-1-sgarzare@redhat.com> +Reviewed-by: Jason Dillaman +Signed-off-by: Max Reitz +(cherry picked from commit 7bae7c805d82675eb3a02c744093703d84ada2d6) +Signed-off-by: Stefano Garzarella +Signed-off-by: Danilo C. L. de Paula +--- + block/rbd.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block/rbd.c b/block/rbd.c +index 688074c64b..5356753fbe 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1289,6 +1289,7 @@ static QemuOptsList qemu_rbd_create_opts = { + + static const char *const qemu_rbd_strong_runtime_opts[] = { + "pool", ++ "namespace", + "image", + "conf", + "snapshot", +-- +2.27.0 + diff --git a/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch b/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch new file mode 100644 index 0000000..069ed3a --- /dev/null +++ b/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch @@ -0,0 +1,61 @@ +From 020501879841afb788087f0455df79367c0337a0 Mon Sep 17 00:00:00 2001 +From: Danilo de Paula +Date: Fri, 11 Sep 2020 14:23:22 -0400 +Subject: [PATCH 1/4] redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Danilo de Paula +Message-id: <20200911142322.1865501-2-ddepaula@redhat.com> +Patchwork-id: 98358 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 1/1] redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ +Bugzilla: 1738820 +RH-Acked-by: Eduardo Lima (Etrunko) +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Miroslav Rezanina + +When qemu-ga was introduced to RHEL-8, we used the qemu-guest-agent +from RHEL-7 as base. + +In RHEL-7, qemu-guest-agent is built as standalone package. +It's built as "qemu-ga", hence the "qemu-ga" folders. + +For RHEL-8, that should have been renamed to qemu-kvm, but I missed it. +Renaming those folders to /etc/qemu-kvm is a no go today, because +users might have populated the /etc/qemu-ga/fsfreeze-hook.d folder. + +So, in order to make qemu-ga -F works in RHEL-8, a link is being +created in the expected place, pointing to the real one. + +Also, fsfreeze-hook opens up the fsfreeze-hook.d on the same PATH where +it is stored. However, it doesn't follow symlinks. In order to fix this, +I had to change it to make sure it follows the link. + +An option would be to also link the fsfreeze-hook.d folder, but I choose +not to do so as it creates a permanent/visible change in users +environments. The downside is to keep another downstream-only change. + +Signed-off-by: Danilo C. L. de Paula +--- + redhat/qemu-kvm.spec.template | 6 ++++++ + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + 2 files changed, 7 insertions(+), 1 deletion(-) + + +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd4845..e9b84ec028 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +-- +2.27.0 + diff --git a/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch b/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch new file mode 100644 index 0000000..f2dee26 --- /dev/null +++ b/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch @@ -0,0 +1,79 @@ +From 4740b2ba008ca469409cf5af58ac85ae5dc7f6a5 Mon Sep 17 00:00:00 2001 +From: Eduardo Otubo +Date: Tue, 15 Sep 2020 15:15:49 -0400 +Subject: [PATCH 2/4] seccomp: fix killing of whole process instead of thread +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eduardo Otubo +Message-id: <20200915151549.5007-1-otubo@redhat.com> +Patchwork-id: 98392 +O-Subject: [RHEL-AV-8.3.0/RHEL-8.3.0 qemu-kvm PATCH] seccomp: fix killing of whole process instead of thread +Bugzilla: 1752376 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Daniel P. Berrange + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1752376 +BRANCH: rhel-av-8.3.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31339152 + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1752376 +BRANCH: rhel-8.3.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31339111 + +From: Daniel P. Berrangé + +Back in 2018 we introduced support for killing the whole QEMU process +instead of just one thread, when a seccomp rule is violated: + + commit bda08a5764d470f101fa38635d30b41179a313e1 + Author: Marc-André Lureau + Date: Wed Aug 22 19:02:48 2018 +0200 + + seccomp: prefer SCMP_ACT_KILL_PROCESS if available + +Fast forward a year and we introduced a patch to avoid killing the +process for resource control syscalls tickled by Mesa. + + commit 9a1565a03b79d80b236bc7cc2dbce52a2ef3a1b8 + Author: Daniel P. Berrangé + Date: Wed Mar 13 09:49:03 2019 +0000 + + seccomp: don't kill process for resource control syscalls + +Unfortunately a logic bug effectively reverted the first commit +mentioned so that we go back to only killing the thread, not the whole +process. + +Signed-off-by: Daniel P. Berrangé +Reviewed-by: Stefan Hajnoczi +Acked-by: Eduardo Otubo +(cherry picked from commit e474e3aacf4276eb0781d11c45e2fab996f9dc56) +Signed-off-by: Eduardo Otubo +Signed-off-by: Danilo C. L. de Paula +--- + qemu-seccomp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/qemu-seccomp.c b/qemu-seccomp.c +index e0a1829b3d..8325ecb766 100644 +--- a/qemu-seccomp.c ++++ b/qemu-seccomp.c +@@ -136,8 +136,9 @@ static uint32_t qemu_seccomp_get_action(int set) + + if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { + kill_process = 1; ++ } else { ++ kill_process = 0; + } +- kill_process = 0; + } + if (kill_process == 1) { + return SCMP_ACT_KILL_PROCESS; +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 64848ce..6436e1a 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 7%{?dist} +Release: 8%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -185,6 +185,14 @@ Patch57: kvm-target-arm-Move-start-powered-off-property-to-generi.patch Patch58: kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch # For bz#1849483 - Failed to boot up guest when hotplugging vcpus on bios stage Patch59: kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch +# For bz#1738820 - '-F' option of qemu-ga command cause the guest-fsfreeze-freeze command doesn't work +Patch60: kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch +# For bz#1752376 - qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available +Patch61: kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch +# For bz#1867075 - CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8] +Patch62: kvm-Revert-Drop-bogus-IPv6-messages.patch +# For bz#1821528 - missing namespace attribute when access the rbd image with namespace +Patch63: kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch BuildRequires: wget BuildRequires: rpm-build @@ -756,6 +764,11 @@ install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrules install -D --preserve-timestamps \ scripts/qemu-guest-agent/fsfreeze-hook \ $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook +# Workaround for the missing /etc/qemu-kvm/fsfreeze-hook +# Please, do not carry this over to RHEL-9 +mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/ +ln -s %{_sysconfdir}/qemu-ga/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/fsfreeze-hook # - the directory for user scripts: mkdir $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook.d @@ -1119,6 +1132,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_udevrulesdir}/99-qemu-guest-agent.rules %config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga %{_sysconfdir}/qemu-ga +%{_sysconfdir}/qemu-kvm/fsfreeze-hook %{_datadir}/%{name}/qemu-ga %dir %{_localstatedir}/log/qemu-ga @@ -1144,6 +1158,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Sep 17 2020 Danilo Cesar Lemes de Paula - 5.1.0-8.el8 +- kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch [bz#1738820] +- kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1752376] +- kvm-Revert-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch [bz#1821528] +- Resolves: bz#1738820 + ('-F' option of qemu-ga command cause the guest-fsfreeze-freeze command doesn't work) +- Resolves: bz#1752376 + (qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available) +- Resolves: bz#1821528 + (missing namespace attribute when access the rbd image with namespace) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + * Tue Sep 15 2020 Danilo Cesar Lemes de Paula - 5.1.0-7.el8 - kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch [bz#1789757 bz#1870384] - kvm-target-arm-Move-start-powered-off-property-to-generi.patch [bz#1849483] From 127b33070af258b6711d22d9550e9b8828ab0512 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 21 Sep 2020 13:09:06 -0400 Subject: [PATCH 092/195] * Mon Sep 21 2020 Danilo Cesar Lemes de Paula - 5.1.0-9.el8 - kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch [bz#1688978] - Resolves: bz#1688978 (RFE: forward host preferences for cipher suites and CA certs to guest firmware) --- ...fix-FWCfgDataGeneratorClass-get_data.patch | 78 +++++++++++++++++++ qemu-kvm.spec | 9 ++- 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch diff --git a/kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch b/kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch new file mode 100644 index 0000000..e1ea970 --- /dev/null +++ b/kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch @@ -0,0 +1,78 @@ +From d323d7648a64e213d099d7ee3c66edc186b97808 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 18 Sep 2020 19:35:42 -0400 +Subject: [PATCH] hw/nvram/fw_cfg: fix FWCfgDataGeneratorClass::get_data() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20200918193542.191031-2-philmd@redhat.com> +Patchwork-id: 98402 +O-Subject: [PATCH 1/1] hw/nvram/fw_cfg: fix FWCfgDataGeneratorClass::get_data() consumption +Bugzilla: 1688978 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Danilo de Paula + +From: Laszlo Ersek + +The documentation on g_byte_array_free() + +says: + +> Returns +> +> the element data if free_segment is FALSE, otherwise NULL. The element +> data should be freed using g_free(). + +Because we currently call g_byte_array_free() with free_segment=TRUE, we +end up passing data=NULL to fw_cfg_add_file(). + +On the plus side, fw_cfg_data_read() and fw_cfg_dma_transfer() both deal +with NULL data gracefully: QEMU does not crash when the guest reads such +an item, the guest just gets a properly sized, but zero-filled blob. + +However, the bug breaks UEFI HTTPS boot, as the IANA_TLS_CIPHER array, +generated otherwise correctly by the "tls-cipher-suites" object, is in +effect replaced with a zero blob. + +Fix the issue by passing free_segment=FALSE to g_byte_array_free(): + +- the caller (fw_cfg_add_from_generator()) temporarily assumes ownership + of the generated byte array, + +- then ownership of the byte array is transfered to fw_cfg, as + fw_cfg_add_file() links (not copies) "data" into fw_cfg. + +Cc: "Daniel P. Berrangé" +Cc: "Philippe Mathieu-Daudé" +Cc: Gerd Hoffmann +Fixes: 3203148917d035b09f71986ac2eaa19a352d6d9d +Signed-off-by: Laszlo Ersek +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Daniel P. Berrangé +Message-Id: <20200916151510.22767-1-lersek@redhat.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 4318432ccd3f7fb69b7169f39dcae3d4ee04f5ea) +Signed-off-by: Danilo C. L. de Paula +--- + hw/nvram/fw_cfg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c +index f3a4728288e..0e95d057fd5 100644 +--- a/hw/nvram/fw_cfg.c ++++ b/hw/nvram/fw_cfg.c +@@ -1056,7 +1056,7 @@ bool fw_cfg_add_from_generator(FWCfgState *s, const char *filename, + return false; + } + size = array->len; +- fw_cfg_add_file(s, filename, g_byte_array_free(array, TRUE), size); ++ fw_cfg_add_file(s, filename, g_byte_array_free(array, FALSE), size); + + return true; + } +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 6436e1a..5039a6b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 8%{?dist} +Release: 9%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -193,6 +193,8 @@ Patch61: kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch Patch62: kvm-Revert-Drop-bogus-IPv6-messages.patch # For bz#1821528 - missing namespace attribute when access the rbd image with namespace Patch63: kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch +# For bz#1688978 - RFE: forward host preferences for cipher suites and CA certs to guest firmware +Patch64: kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch BuildRequires: wget BuildRequires: rpm-build @@ -1158,6 +1160,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Sep 21 2020 Danilo Cesar Lemes de Paula - 5.1.0-9.el8 +- kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch [bz#1688978] +- Resolves: bz#1688978 + (RFE: forward host preferences for cipher suites and CA certs to guest firmware) + * Thu Sep 17 2020 Danilo Cesar Lemes de Paula - 5.1.0-8.el8 - kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch [bz#1738820] - kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1752376] From a318bb58b73c8f1dadd43ac54739ffa8c229a710 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 28 Sep 2020 10:55:54 -0400 Subject: [PATCH 093/195] * Mon Sep 28 2020 Danilo Cesar Lemes de Paula - 5.1.0-10.el8 - kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch [bz#1877209] - Resolves: bz#1877209 ('qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap) --- ...port-bitmap-merge-into-backing-image.patch | 203 ++++++++++++++++++ qemu-kvm.spec | 9 +- 2 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch diff --git a/kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch b/kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch new file mode 100644 index 0000000..98dff89 --- /dev/null +++ b/kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch @@ -0,0 +1,203 @@ +From a768aa960a2e9e3b8dd72dfc7135a7e7d0814d3f Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 23 Sep 2020 17:49:40 -0400 +Subject: [PATCH] qemu-img: Support bitmap --merge into backing image + +RH-Author: Eric Blake +Message-id: <20200923174940.704681-2-eblake@redhat.com> +Patchwork-id: 98487 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] qemu-img: Support bitmap --merge into backing image +Bugzilla: 1877209 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Max Reitz + +If you have the chain 'base.qcow2 <- top.qcow2' and want to merge a +bitmap from top into base, qemu-img was failing with: + +qemu-img: Could not open 'top.qcow2': Could not open backing file: Failed to get shared "write" lock +Is another process using the image [base.qcow2]? + +The easiest fix is to not open the entire backing chain of either +image (source or destination); after all, the point of 'qemu-img +bitmap' is solely to manipulate bitmaps directly within a single qcow2 +image, and this is made more precise if we don't pay attention to +other images in the chain that may happen to have a bitmap by the same +name. + +However, note that on a case-by-case analysis, there _are_ times where +we treat it as a feature that we can access a bitmap from a backing +layer in association with an overlay BDS. A demonstration of this is +using NBD to expose both an overlay BDS (for constant contents) and a +bitmap (for learning which blocks are interesting) during an +incremental backup: + +Base <- Active <- Temporary + \--block job ->/ + +where Temporary is being fed by a backup 'sync=none' job. When +exposing Temporary over NBD, referring to a bitmap that lives only in +Active is less effort than having to copy a bitmap into Temporary [1]. +So the testsuite additions in this patch check both where bitmaps get +allocated (the qemu-img info output), and that qemu-nbd is indeed able +to access a bitmap inherited from the backing chain since it is a +different use case than 'qemu-img bitmap'. + +[1] Full disclosure: prior to the recent commit 374eedd1c4 and +friends, we were NOT able to see bitmaps through filters, which meant +that we actually did not have nice clean semantics for uniformly being +able to pick up bitmaps from anywhere in the backing chain (seen as a +change in behavior between qemu 4.1 and 4.2 at commit 00e30f05de, when +block-copy swapped from a one-off to a filter). Which means libvirt +was already coded to copy bitmaps around for the sake of older qemu, +even though modern qemu no longer needs it. Oh well. + +Fixes: http://bugzilla.redhat.com/1877209 +Reported-by: Eyal Shenitzky +Signed-off-by: Eric Blake +Message-Id: <20200914191009.644842-1-eblake@redhat.com> +[eblake: more commit message tweaks, per Max Reitz review] +Reviewed-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy + +(cherry picked from commit 14f16bf9474c860ecc127a66a86961942319f7af) +Tweak 291.out since extended L2 is not backported. + +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + qemu-img.c | 11 +++++--- + tests/qemu-iotests/291 | 12 +++++++++ + tests/qemu-iotests/291.out | 54 ++++++++++++++++++++++++++++++++++++++ + 3 files changed, 74 insertions(+), 3 deletions(-) + +diff --git a/qemu-img.c b/qemu-img.c +index 5308773811f..b0ca321a6be 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -4747,14 +4747,19 @@ static int img_bitmap(int argc, char **argv) + filename = argv[optind]; + bitmap = argv[optind + 1]; + +- blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR, false, false, +- false); ++ /* ++ * No need to open backing chains; we will be manipulating bitmaps ++ * directly in this image without reference to image contents. ++ */ ++ blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING, ++ false, false, false); + if (!blk) { + goto out; + } + bs = blk_bs(blk); + if (src_filename) { +- src = img_open(false, src_filename, src_fmt, 0, false, false, false); ++ src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING, ++ false, false, false); + if (!src) { + goto out; + } +diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291 +index 1e0bb76959b..4f837b20565 100755 +--- a/tests/qemu-iotests/291 ++++ b/tests/qemu-iotests/291 +@@ -91,6 +91,15 @@ $QEMU_IMG bitmap --remove --image-opts \ + driver=$IMGFMT,file.driver=file,file.filename="$TEST_IMG" tmp + _img_info --format-specific + ++echo ++echo "=== Merge from top layer into backing image ===" ++echo ++ ++$QEMU_IMG rebase -u -F qcow2 -b "$TEST_IMG.base" "$TEST_IMG" ++$QEMU_IMG bitmap --add --merge b2 -b "$TEST_IMG" -F $IMGFMT \ ++ -f $IMGFMT "$TEST_IMG.base" b3 ++_img_info --format-specific --backing-chain ++ + echo + echo "=== Check bitmap contents ===" + echo +@@ -107,6 +116,9 @@ $QEMU_IMG map --output=json --image-opts \ + nbd_server_start_unix_socket -r -f qcow2 -B b2 "$TEST_IMG" + $QEMU_IMG map --output=json --image-opts \ + "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b2" | _filter_qemu_img_map ++nbd_server_start_unix_socket -r -f qcow2 -B b3 "$TEST_IMG" ++$QEMU_IMG map --output=json --image-opts \ ++ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b3" | _filter_qemu_img_map + + # success, all done + echo '*** done' +diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out +index 9f661515b41..332584f67a8 100644 +--- a/tests/qemu-iotests/291.out ++++ b/tests/qemu-iotests/291.out +@@ -66,6 +66,57 @@ Format specific information: + refcount bits: 16 + corrupt: false + ++=== Merge from top layer into backing image === ++ ++image: TEST_DIR/t.IMGFMT ++file format: IMGFMT ++virtual size: 10 MiB (10485760 bytes) ++cluster_size: 65536 ++backing file: TEST_DIR/t.IMGFMT.base ++backing file format: IMGFMT ++Format specific information: ++ compat: 1.1 ++ compression type: zlib ++ lazy refcounts: false ++ bitmaps: ++ [0]: ++ flags: ++ name: b1 ++ granularity: 524288 ++ [1]: ++ flags: ++ [0]: auto ++ name: b2 ++ granularity: 65536 ++ [2]: ++ flags: ++ name: b0 ++ granularity: 65536 ++ refcount bits: 16 ++ corrupt: false ++ ++image: TEST_DIR/t.IMGFMT.base ++file format: IMGFMT ++virtual size: 10 MiB (10485760 bytes) ++cluster_size: 65536 ++Format specific information: ++ compat: 1.1 ++ compression type: zlib ++ lazy refcounts: false ++ bitmaps: ++ [0]: ++ flags: ++ [0]: auto ++ name: b0 ++ granularity: 65536 ++ [1]: ++ flags: ++ [0]: auto ++ name: b3 ++ granularity: 65536 ++ refcount bits: 16 ++ corrupt: false ++ + === Check bitmap contents === + + [{ "start": 0, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, +@@ -77,4 +128,7 @@ Format specific information: + [{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, + { "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false}, + { "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] ++[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false}, ++{ "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] + *** done +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 5039a6b..b871e53 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 9%{?dist} +Release: 10%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -195,6 +195,8 @@ Patch62: kvm-Revert-Drop-bogus-IPv6-messages.patch Patch63: kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch # For bz#1688978 - RFE: forward host preferences for cipher suites and CA certs to guest firmware Patch64: kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch +# For bz#1877209 - 'qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap +Patch65: kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch BuildRequires: wget BuildRequires: rpm-build @@ -1160,6 +1162,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Sep 28 2020 Danilo Cesar Lemes de Paula - 5.1.0-10.el8 +- kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch [bz#1877209] +- Resolves: bz#1877209 + ('qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap) + * Mon Sep 21 2020 Danilo Cesar Lemes de Paula - 5.1.0-9.el8 - kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch [bz#1688978] - Resolves: bz#1688978 From b7068d958b5322c0f1fd1d4d99d307b4dd2bcc92 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 5 Oct 2020 08:26:58 -0400 Subject: [PATCH 094/195] * Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 - kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] - kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] - kvm-vhost-user-vsock-pci-force-virtio-version-1.patch [bz#1868449] - kvm-vhost-vsock-ccw-force-virtio-version-1.patch [bz#1868449] - kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] - kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] - Resolves: bz#1868449 (vhost_vsock error: device is modern-only, use disable-legacy=on) - Resolves: bz#1874004 (Live migration performance is poor during guest installation process on power host) - Resolves: bz#1876635 (VM fails to start with a passthrough smartcard) --- ...se-max-bandwidth-to-128-MiB-s-1-Gib-.patch | 56 ++++++ ...ser-vsock-pci-force-virtio-version-1.patch | 71 ++++++++ ...ost-vsock-ccw-force-virtio-version-1.patch | 66 +++++++ ...ost-vsock-pci-force-virtio-version-1.patch | 87 +++++++++ ...cy-support-check-on-machine-types-le.patch | 169 ++++++++++++++++++ qemu-kvm.spec | 32 +++- 6 files changed, 477 insertions(+), 4 deletions(-) create mode 100644 kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch create mode 100644 kvm-vhost-user-vsock-pci-force-virtio-version-1.patch create mode 100644 kvm-vhost-vsock-ccw-force-virtio-version-1.patch create mode 100644 kvm-vhost-vsock-pci-force-virtio-version-1.patch create mode 100644 kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch diff --git a/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch b/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch new file mode 100644 index 0000000..0993573 --- /dev/null +++ b/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch @@ -0,0 +1,56 @@ +From 5583f651d07532729bdaf9d65ede9bea18e38b5d Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 28 Sep 2020 11:06:48 -0400 +Subject: [PATCH 5/6] migration: increase max-bandwidth to 128 MiB/s (1 Gib/s) + +RH-Author: Laurent Vivier +Message-id: <20200928110648.342290-1-lvivier@redhat.com> +Patchwork-id: 98494 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] migration: increase max-bandwidth to 128 MiB/s (1 Gib/s) +Bugzilla: 1874004 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Greg Kurz + +max-bandwidth is set by default to 32 MiB/s (256 Mib/s) +since 2008 (5bb7910af031c). + +Most of the CPUs can dirty memory faster than that now, +and this is clearly a problem with POWER where the page +size is 64 kiB and not 4 KiB. + +Signed-off-by: Laurent Vivier +Message-Id: <20200921144957.979989-1-lvivier@redhat.com> +Reviewed-by: David Gibson +Reviewed-by: Greg Kurz +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 7590a2ae091fde8bb72d5df93977ab9707e23242) +Signed-off-by: Laurent Vivier + +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31576368 +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1874004 +Upstream: Merged +Tested: checked a migration while an installation is running can end + before the end of installation +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 7a89ce39a76..8193f33f768 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -57,7 +57,7 @@ + #include "qemu/queue.h" + #include "multifd.h" + +-#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ ++#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + + /* Amount of time to allocate to each "chunk" of bandwidth-throttled + * data. */ +-- +2.27.0 + diff --git a/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch b/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch new file mode 100644 index 0000000..9922e1a --- /dev/null +++ b/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch @@ -0,0 +1,71 @@ +From 0c85b4d826747708971a1add4fe464e31b84b36e Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 29 Sep 2020 12:41:42 -0400 +Subject: [PATCH 3/6] vhost-user-vsock-pci: force virtio version 1 + +RH-Author: Stefano Garzarella +Message-id: <20200929124143.41520-4-sgarzare@redhat.com> +Patchwork-id: 98513 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/4] vhost-user-vsock-pci: force virtio version 1 +Bugzilla: 1868449 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +Commit 9b3a35ec82 ("virtio: verify that legacy support is not +accidentally on") added a safety check that requires to set +'disable-legacy=on' on vhost-user-vsock-pci device: + + $ ./qemu-system-x86_64 ... \ + -chardev socket,id=char0,reconnect=0,path=/tmp/vhost4.socket \ + -device vhost-user-vsock-pci,chardev=char0 + qemu-system-x86_64: -device vhost-user-vsock-pci,chardev=char0: + device is modern-only, use disable-legacy=on + +virtio-vsock was introduced after the release of VIRTIO 1.0 +specifications, so it should be 'modern-only'. + +This patch forces virtio version 1 and removes the 'transitional_name' +property, as done for vhost-vsock-pci, removing the need to specify +'disable-legacy=on' on vhost-user-vsock-pci device. + +Cc: qemu-stable@nongnu.org +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Stefano Garzarella +Message-Id: <20200921122506.82515-4-sgarzare@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 27eda699f59d430c33fc054a36a17251992e70dc) +Signed-off-by: Stefano Garzarella +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/vhost-user-vsock-pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c +index f4cf95873d3..492df6418a4 100644 +--- a/hw/virtio/vhost-user-vsock-pci.c ++++ b/hw/virtio/vhost-user-vsock-pci.c +@@ -40,6 +40,9 @@ static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + ++ /* unlike vhost-vsock, we do not need to care about pre-5.1 compat */ ++ virtio_pci_force_virtio_1(vpci_dev); ++ + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); + } + +@@ -68,7 +71,6 @@ static void vhost_user_vsock_pci_instance_init(Object *obj) + static const VirtioPCIDeviceTypeInfo vhost_user_vsock_pci_info = { + .base_name = TYPE_VHOST_USER_VSOCK_PCI, + .generic_name = "vhost-user-vsock-pci", +- .transitional_name = "vhost-user-vsock-pci-transitional", + .non_transitional_name = "vhost-user-vsock-pci-non-transitional", + .instance_size = sizeof(VHostUserVSockPCI), + .instance_init = vhost_user_vsock_pci_instance_init, +-- +2.27.0 + diff --git a/kvm-vhost-vsock-ccw-force-virtio-version-1.patch b/kvm-vhost-vsock-ccw-force-virtio-version-1.patch new file mode 100644 index 0000000..cadb930 --- /dev/null +++ b/kvm-vhost-vsock-ccw-force-virtio-version-1.patch @@ -0,0 +1,66 @@ +From ca33af3be677c483117f7124170003fe2876a025 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 29 Sep 2020 12:41:43 -0400 +Subject: [PATCH 4/6] vhost-vsock-ccw: force virtio version 1 + +RH-Author: Stefano Garzarella +Message-id: <20200929124143.41520-5-sgarzare@redhat.com> +Patchwork-id: 98514 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 4/4] vhost-vsock-ccw: force virtio version 1 +Bugzilla: 1868449 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +virtio-vsock was introduced after the release of VIRTIO 1.0 +specifications, so it should be 'modern-only'. + +This patch forces virtio version 1 as done for vhost-vsock-pci. + +To avoid migration issues, we force virtio version 1 only when +legacy check is enabled in the new machine types (>= 5.1). + +Cc: qemu-stable@nongnu.org +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Stefano Garzarella +Message-Id: <20200921122506.82515-5-sgarzare@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit a6704a34cf02add13964149e0de6453ae62bd9db) +Signed-off-by: Stefano Garzarella +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/vhost-vsock-ccw.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/s390x/vhost-vsock-ccw.c b/hw/s390x/vhost-vsock-ccw.c +index 0822ecca893..246416a8f96 100644 +--- a/hw/s390x/vhost-vsock-ccw.c ++++ b/hw/s390x/vhost-vsock-ccw.c +@@ -40,9 +40,21 @@ static void vhost_vsock_ccw_class_init(ObjectClass *klass, void *data) + static void vhost_vsock_ccw_instance_init(Object *obj) + { + VHostVSockCCWState *dev = VHOST_VSOCK_CCW(obj); ++ VirtioCcwDevice *ccw_dev = VIRTIO_CCW_DEVICE(obj); ++ VirtIODevice *virtio_dev; + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_VSOCK); ++ ++ virtio_dev = VIRTIO_DEVICE(&dev->vdev); ++ ++ /* ++ * To avoid migration issues, we force virtio version 1 only when ++ * legacy check is enabled in the new machine types (>= 5.1). ++ */ ++ if (!virtio_legacy_check_disabled(virtio_dev)) { ++ ccw_dev->force_revision_1 = true; ++ } + } + + static const TypeInfo vhost_vsock_ccw_info = { +-- +2.27.0 + diff --git a/kvm-vhost-vsock-pci-force-virtio-version-1.patch b/kvm-vhost-vsock-pci-force-virtio-version-1.patch new file mode 100644 index 0000000..1fb3733 --- /dev/null +++ b/kvm-vhost-vsock-pci-force-virtio-version-1.patch @@ -0,0 +1,87 @@ +From 0af2bd4abfdb7b79a3816a920d55c86190c90533 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 29 Sep 2020 12:41:41 -0400 +Subject: [PATCH 2/6] vhost-vsock-pci: force virtio version 1 + +RH-Author: Stefano Garzarella +Message-id: <20200929124143.41520-3-sgarzare@redhat.com> +Patchwork-id: 98511 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/4] vhost-vsock-pci: force virtio version 1 +Bugzilla: 1868449 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +Commit 9b3a35ec82 ("virtio: verify that legacy support is not +accidentally on") added a safety check that requires to set +'disable-legacy=on' on vhost-vsock-pci device: + + $ ./qemu-system-x86_64 ... -device vhost-vsock-pci,guest-cid=5 + qemu-system-x86_64: -device vhost-vsock-pci,guest-cid=5: + device is modern-only, use disable-legacy=on + +virtio-vsock was introduced after the release of VIRTIO 1.0 +specifications, so it should be 'modern-only'. +In addition Cornelia verified that forcing a legacy mode on +vhost-vsock-pci device using x86-64 host and s390x guest, so with +different endianness, produces strange behaviours. + +This patch forces virtio version 1 and removes the 'transitional_name' +property removing the need to specify 'disable-legacy=on' on +vhost-vsock-pci device. + +To avoid migration issues, we force virtio version 1 only when +legacy check is enabled in the new machine types (>= 5.1). + +As the transitional device name is not commonly used, we do not +provide compatibility handling for it. + +Cc: qemu-stable@nongnu.org +Reported-by: Qian Cai +Reported-by: Qinghua Cheng +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1868449 +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Stefano Garzarella +Message-Id: <20200921122506.82515-3-sgarzare@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6209070503989cf4f28549f228989419d4f0b236) +Signed-off-by: Stefano Garzarella +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/vhost-vsock-pci.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vsock-pci.c b/hw/virtio/vhost-vsock-pci.c +index a815278e69c..f11a38292fe 100644 +--- a/hw/virtio/vhost-vsock-pci.c ++++ b/hw/virtio/vhost-vsock-pci.c +@@ -43,6 +43,15 @@ static void vhost_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + { + VHostVSockPCI *dev = VHOST_VSOCK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); ++ VirtIODevice *virtio_dev = VIRTIO_DEVICE(vdev); ++ ++ /* ++ * To avoid migration issues, we force virtio version 1 only when ++ * legacy check is enabled in the new machine types (>= 5.1). ++ */ ++ if (!virtio_legacy_check_disabled(virtio_dev)) { ++ virtio_pci_force_virtio_1(vpci_dev); ++ } + + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); + } +@@ -72,7 +81,6 @@ static void vhost_vsock_pci_instance_init(Object *obj) + static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { + .base_name = TYPE_VHOST_VSOCK_PCI, + .generic_name = "vhost-vsock-pci", +- .transitional_name = "vhost-vsock-pci-transitional", + .non_transitional_name = "vhost-vsock-pci-non-transitional", + .instance_size = sizeof(VHostVSockPCI), + .instance_init = vhost_vsock_pci_instance_init, +-- +2.27.0 + diff --git a/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch b/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch new file mode 100644 index 0000000..1bea17a --- /dev/null +++ b/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch @@ -0,0 +1,169 @@ +From 9455373aefea91177dcf32da2c448f93b8e7aa38 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 29 Sep 2020 12:41:40 -0400 +Subject: [PATCH 1/6] virtio: skip legacy support check on machine types less + than 5.1 + +RH-Author: Stefano Garzarella +Message-id: <20200929124143.41520-2-sgarzare@redhat.com> +Patchwork-id: 98512 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/4] virtio: skip legacy support check on machine types less than 5.1 +Bugzilla: 1868449 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +Commit 9b3a35ec82 ("virtio: verify that legacy support is not accidentally +on") added a check that returns an error if legacy support is on, but the +device does not support legacy. + +Unfortunately some devices were wrongly declared legacy capable even if +they were not (e.g vhost-vsock). + +To avoid migration issues, we add a virtio-device property +(x-disable-legacy-check) to skip the legacy error, printing a warning +instead, for machine types < 5.1. + +Cc: qemu-stable@nongnu.org +Fixes: 9b3a35ec82 ("virtio: verify that legacy support is not accidentally on") +Suggested-by: Dr. David Alan Gilbert +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Stefano Garzarella +Message-Id: <20200921122506.82515-2-sgarzare@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit d55f518248f263bb8d0852f98e47102ea09d4f89) +Added 'x-disable-legacy-check' in hw_compat_rhel_8_2 + +Signed-off-by: Stefano Garzarella +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 3 +++ + hw/s390x/virtio-ccw.c | 15 ++++++++++++--- + hw/virtio/virtio-pci.c | 14 ++++++++++++-- + hw/virtio/virtio.c | 7 +++++++ + include/hw/virtio/virtio.h | 2 ++ + 5 files changed, 36 insertions(+), 5 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 10fa9b8c756..86ce3af71e4 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -66,6 +66,8 @@ GlobalProperty hw_compat_rhel_8_2[] = { + { "vmport", "x-report-vmx-type", "off" }, + /* hw_compat_rhel_8_2 from hw_compat_5_0 */ + { "vmport", "x-cmds-v2", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-device", "x-disable-legacy-check", "true" }, + }; + const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); + +@@ -246,6 +248,7 @@ GlobalProperty hw_compat_5_0[] = { + { "vmport", "x-signal-unsupported-cmd", "off" }, + { "vmport", "x-report-vmx-type", "off" }, + { "vmport", "x-cmds-v2", "off" }, ++ { "virtio-device", "x-disable-legacy-check", "true" }, + }; + const size_t hw_compat_5_0_len = G_N_ELEMENTS(hw_compat_5_0); + +diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c +index 0e602702971..3dfc93d4f6f 100644 +--- a/hw/s390x/virtio-ccw.c ++++ b/hw/s390x/virtio-ccw.c +@@ -1122,9 +1122,18 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp) + } + + if (!virtio_ccw_rev_max(dev) && !virtio_legacy_allowed(vdev)) { +- error_setg(errp, "Invalid value of property max_rev " +- "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); +- return; ++ /* ++ * To avoid migration issues, we allow legacy mode when legacy ++ * check is disabled in the old machine types (< 5.1). ++ */ ++ if (virtio_legacy_check_disabled(vdev)) { ++ warn_report("device requires revision >= 1, but for backward " ++ "compatibility max_revision=0 is allowed"); ++ } else { ++ error_setg(errp, "Invalid value of property max_rev " ++ "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); ++ return; ++ } + } + + if (virtio_get_num_queues(vdev) > VIRTIO_QUEUE_MAX) { +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index ccdf54e81c7..4211565f2c9 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1596,8 +1596,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) + + if (legacy) { + if (!virtio_legacy_allowed(vdev)) { +- error_setg(errp, "device is modern-only, use disable-legacy=on"); +- return; ++ /* ++ * To avoid migration issues, we allow legacy mode when legacy ++ * check is disabled in the old machine types (< 5.1). ++ */ ++ if (virtio_legacy_check_disabled(vdev)) { ++ warn_report("device is modern-only, but for backward " ++ "compatibility legacy is allowed"); ++ } else { ++ error_setg(errp, ++ "device is modern-only, use disable-legacy=on"); ++ return; ++ } + } + if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { + error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by" +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index e9830252176..b85277da673 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3304,6 +3304,11 @@ bool virtio_legacy_allowed(VirtIODevice *vdev) + } + } + ++bool virtio_legacy_check_disabled(VirtIODevice *vdev) ++{ ++ return vdev->disable_legacy_check; ++} ++ + hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) + { + return vdev->vq[n].vring.desc; +@@ -3713,6 +3718,8 @@ static Property virtio_properties[] = { + DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), + DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), + DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true), ++ DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice, ++ disable_legacy_check, false), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index e424df12cf6..c50f5a9dfe8 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -105,6 +105,7 @@ struct VirtIODevice + bool use_started; + bool started; + bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ ++ bool disable_legacy_check; + VMChangeStateEntry *vmstate; + char *bus_name; + uint8_t device_endian; +@@ -398,5 +399,6 @@ static inline bool virtio_device_disabled(VirtIODevice *vdev) + } + + bool virtio_legacy_allowed(VirtIODevice *vdev); ++bool virtio_legacy_check_disabled(VirtIODevice *vdev); + + #endif +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b871e53..4b53bbd 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 10%{?dist} +Release: 11%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -197,6 +197,16 @@ Patch63: kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch Patch64: kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch # For bz#1877209 - 'qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap Patch65: kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch +# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on +Patch66: kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch +# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on +Patch67: kvm-vhost-vsock-pci-force-virtio-version-1.patch +# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on +Patch68: kvm-vhost-user-vsock-pci-force-virtio-version-1.patch +# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on +Patch69: kvm-vhost-vsock-ccw-force-virtio-version-1.patch +# For bz#1874004 - Live migration performance is poor during guest installation process on power host +Patch70: kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch BuildRequires: wget BuildRequires: rpm-build @@ -946,9 +956,9 @@ install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f -# We need to make the block device modules executable else -# RPM won't pick up their dependencies. -chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so +# We need to make the block device modules and other qemu SO files executable +# otherwise RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/*.so # Remove buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo @@ -1162,6 +1172,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 +- kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] +- kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-user-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-vsock-ccw-force-virtio-version-1.patch [bz#1868449] +- kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] +- kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] +- Resolves: bz#1868449 + (vhost_vsock error: device is modern-only, use disable-legacy=on) +- Resolves: bz#1874004 + (Live migration performance is poor during guest installation process on power host) +- Resolves: bz#1876635 + (VM fails to start with a passthrough smartcard) + * Mon Sep 28 2020 Danilo Cesar Lemes de Paula - 5.1.0-10.el8 - kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch [bz#1877209] - Resolves: bz#1877209 From 0f0da3d1bf4733f182882176376235c5acb19019 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 5 Oct 2020 10:12:59 -0400 Subject: [PATCH 095/195] Revert "* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3" This reverts commit b7068d958b5322c0f1fd1d4d99d307b4dd2bcc92. --- ...se-max-bandwidth-to-128-MiB-s-1-Gib-.patch | 56 ------ ...ser-vsock-pci-force-virtio-version-1.patch | 71 -------- ...ost-vsock-ccw-force-virtio-version-1.patch | 66 ------- ...ost-vsock-pci-force-virtio-version-1.patch | 87 --------- ...cy-support-check-on-machine-types-le.patch | 169 ------------------ qemu-kvm.spec | 32 +--- 6 files changed, 4 insertions(+), 477 deletions(-) delete mode 100644 kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch delete mode 100644 kvm-vhost-user-vsock-pci-force-virtio-version-1.patch delete mode 100644 kvm-vhost-vsock-ccw-force-virtio-version-1.patch delete mode 100644 kvm-vhost-vsock-pci-force-virtio-version-1.patch delete mode 100644 kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch diff --git a/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch b/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch deleted file mode 100644 index 0993573..0000000 --- a/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 5583f651d07532729bdaf9d65ede9bea18e38b5d Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Mon, 28 Sep 2020 11:06:48 -0400 -Subject: [PATCH 5/6] migration: increase max-bandwidth to 128 MiB/s (1 Gib/s) - -RH-Author: Laurent Vivier -Message-id: <20200928110648.342290-1-lvivier@redhat.com> -Patchwork-id: 98494 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] migration: increase max-bandwidth to 128 MiB/s (1 Gib/s) -Bugzilla: 1874004 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Greg Kurz - -max-bandwidth is set by default to 32 MiB/s (256 Mib/s) -since 2008 (5bb7910af031c). - -Most of the CPUs can dirty memory faster than that now, -and this is clearly a problem with POWER where the page -size is 64 kiB and not 4 KiB. - -Signed-off-by: Laurent Vivier -Message-Id: <20200921144957.979989-1-lvivier@redhat.com> -Reviewed-by: David Gibson -Reviewed-by: Greg Kurz -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 7590a2ae091fde8bb72d5df93977ab9707e23242) -Signed-off-by: Laurent Vivier - -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31576368 -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1874004 -Upstream: Merged -Tested: checked a migration while an installation is running can end - before the end of installation -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 7a89ce39a76..8193f33f768 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -57,7 +57,7 @@ - #include "qemu/queue.h" - #include "multifd.h" - --#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ -+#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - - /* Amount of time to allocate to each "chunk" of bandwidth-throttled - * data. */ --- -2.27.0 - diff --git a/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch b/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch deleted file mode 100644 index 9922e1a..0000000 --- a/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 0c85b4d826747708971a1add4fe464e31b84b36e Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 29 Sep 2020 12:41:42 -0400 -Subject: [PATCH 3/6] vhost-user-vsock-pci: force virtio version 1 - -RH-Author: Stefano Garzarella -Message-id: <20200929124143.41520-4-sgarzare@redhat.com> -Patchwork-id: 98513 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/4] vhost-user-vsock-pci: force virtio version 1 -Bugzilla: 1868449 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -Commit 9b3a35ec82 ("virtio: verify that legacy support is not -accidentally on") added a safety check that requires to set -'disable-legacy=on' on vhost-user-vsock-pci device: - - $ ./qemu-system-x86_64 ... \ - -chardev socket,id=char0,reconnect=0,path=/tmp/vhost4.socket \ - -device vhost-user-vsock-pci,chardev=char0 - qemu-system-x86_64: -device vhost-user-vsock-pci,chardev=char0: - device is modern-only, use disable-legacy=on - -virtio-vsock was introduced after the release of VIRTIO 1.0 -specifications, so it should be 'modern-only'. - -This patch forces virtio version 1 and removes the 'transitional_name' -property, as done for vhost-vsock-pci, removing the need to specify -'disable-legacy=on' on vhost-user-vsock-pci device. - -Cc: qemu-stable@nongnu.org -Suggested-by: Cornelia Huck -Reviewed-by: Cornelia Huck -Signed-off-by: Stefano Garzarella -Message-Id: <20200921122506.82515-4-sgarzare@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 27eda699f59d430c33fc054a36a17251992e70dc) -Signed-off-by: Stefano Garzarella -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/vhost-user-vsock-pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c -index f4cf95873d3..492df6418a4 100644 ---- a/hw/virtio/vhost-user-vsock-pci.c -+++ b/hw/virtio/vhost-user-vsock-pci.c -@@ -40,6 +40,9 @@ static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) - VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(vpci_dev); - DeviceState *vdev = DEVICE(&dev->vdev); - -+ /* unlike vhost-vsock, we do not need to care about pre-5.1 compat */ -+ virtio_pci_force_virtio_1(vpci_dev); -+ - qdev_realize(vdev, BUS(&vpci_dev->bus), errp); - } - -@@ -68,7 +71,6 @@ static void vhost_user_vsock_pci_instance_init(Object *obj) - static const VirtioPCIDeviceTypeInfo vhost_user_vsock_pci_info = { - .base_name = TYPE_VHOST_USER_VSOCK_PCI, - .generic_name = "vhost-user-vsock-pci", -- .transitional_name = "vhost-user-vsock-pci-transitional", - .non_transitional_name = "vhost-user-vsock-pci-non-transitional", - .instance_size = sizeof(VHostUserVSockPCI), - .instance_init = vhost_user_vsock_pci_instance_init, --- -2.27.0 - diff --git a/kvm-vhost-vsock-ccw-force-virtio-version-1.patch b/kvm-vhost-vsock-ccw-force-virtio-version-1.patch deleted file mode 100644 index cadb930..0000000 --- a/kvm-vhost-vsock-ccw-force-virtio-version-1.patch +++ /dev/null @@ -1,66 +0,0 @@ -From ca33af3be677c483117f7124170003fe2876a025 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 29 Sep 2020 12:41:43 -0400 -Subject: [PATCH 4/6] vhost-vsock-ccw: force virtio version 1 - -RH-Author: Stefano Garzarella -Message-id: <20200929124143.41520-5-sgarzare@redhat.com> -Patchwork-id: 98514 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 4/4] vhost-vsock-ccw: force virtio version 1 -Bugzilla: 1868449 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -virtio-vsock was introduced after the release of VIRTIO 1.0 -specifications, so it should be 'modern-only'. - -This patch forces virtio version 1 as done for vhost-vsock-pci. - -To avoid migration issues, we force virtio version 1 only when -legacy check is enabled in the new machine types (>= 5.1). - -Cc: qemu-stable@nongnu.org -Suggested-by: Cornelia Huck -Reviewed-by: Cornelia Huck -Signed-off-by: Stefano Garzarella -Message-Id: <20200921122506.82515-5-sgarzare@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit a6704a34cf02add13964149e0de6453ae62bd9db) -Signed-off-by: Stefano Garzarella -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/vhost-vsock-ccw.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/hw/s390x/vhost-vsock-ccw.c b/hw/s390x/vhost-vsock-ccw.c -index 0822ecca893..246416a8f96 100644 ---- a/hw/s390x/vhost-vsock-ccw.c -+++ b/hw/s390x/vhost-vsock-ccw.c -@@ -40,9 +40,21 @@ static void vhost_vsock_ccw_class_init(ObjectClass *klass, void *data) - static void vhost_vsock_ccw_instance_init(Object *obj) - { - VHostVSockCCWState *dev = VHOST_VSOCK_CCW(obj); -+ VirtioCcwDevice *ccw_dev = VIRTIO_CCW_DEVICE(obj); -+ VirtIODevice *virtio_dev; - - virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), - TYPE_VHOST_VSOCK); -+ -+ virtio_dev = VIRTIO_DEVICE(&dev->vdev); -+ -+ /* -+ * To avoid migration issues, we force virtio version 1 only when -+ * legacy check is enabled in the new machine types (>= 5.1). -+ */ -+ if (!virtio_legacy_check_disabled(virtio_dev)) { -+ ccw_dev->force_revision_1 = true; -+ } - } - - static const TypeInfo vhost_vsock_ccw_info = { --- -2.27.0 - diff --git a/kvm-vhost-vsock-pci-force-virtio-version-1.patch b/kvm-vhost-vsock-pci-force-virtio-version-1.patch deleted file mode 100644 index 1fb3733..0000000 --- a/kvm-vhost-vsock-pci-force-virtio-version-1.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 0af2bd4abfdb7b79a3816a920d55c86190c90533 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 29 Sep 2020 12:41:41 -0400 -Subject: [PATCH 2/6] vhost-vsock-pci: force virtio version 1 - -RH-Author: Stefano Garzarella -Message-id: <20200929124143.41520-3-sgarzare@redhat.com> -Patchwork-id: 98511 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/4] vhost-vsock-pci: force virtio version 1 -Bugzilla: 1868449 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -Commit 9b3a35ec82 ("virtio: verify that legacy support is not -accidentally on") added a safety check that requires to set -'disable-legacy=on' on vhost-vsock-pci device: - - $ ./qemu-system-x86_64 ... -device vhost-vsock-pci,guest-cid=5 - qemu-system-x86_64: -device vhost-vsock-pci,guest-cid=5: - device is modern-only, use disable-legacy=on - -virtio-vsock was introduced after the release of VIRTIO 1.0 -specifications, so it should be 'modern-only'. -In addition Cornelia verified that forcing a legacy mode on -vhost-vsock-pci device using x86-64 host and s390x guest, so with -different endianness, produces strange behaviours. - -This patch forces virtio version 1 and removes the 'transitional_name' -property removing the need to specify 'disable-legacy=on' on -vhost-vsock-pci device. - -To avoid migration issues, we force virtio version 1 only when -legacy check is enabled in the new machine types (>= 5.1). - -As the transitional device name is not commonly used, we do not -provide compatibility handling for it. - -Cc: qemu-stable@nongnu.org -Reported-by: Qian Cai -Reported-by: Qinghua Cheng -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1868449 -Suggested-by: Cornelia Huck -Reviewed-by: Cornelia Huck -Signed-off-by: Stefano Garzarella -Message-Id: <20200921122506.82515-3-sgarzare@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6209070503989cf4f28549f228989419d4f0b236) -Signed-off-by: Stefano Garzarella -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/vhost-vsock-pci.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vsock-pci.c b/hw/virtio/vhost-vsock-pci.c -index a815278e69c..f11a38292fe 100644 ---- a/hw/virtio/vhost-vsock-pci.c -+++ b/hw/virtio/vhost-vsock-pci.c -@@ -43,6 +43,15 @@ static void vhost_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) - { - VHostVSockPCI *dev = VHOST_VSOCK_PCI(vpci_dev); - DeviceState *vdev = DEVICE(&dev->vdev); -+ VirtIODevice *virtio_dev = VIRTIO_DEVICE(vdev); -+ -+ /* -+ * To avoid migration issues, we force virtio version 1 only when -+ * legacy check is enabled in the new machine types (>= 5.1). -+ */ -+ if (!virtio_legacy_check_disabled(virtio_dev)) { -+ virtio_pci_force_virtio_1(vpci_dev); -+ } - - qdev_realize(vdev, BUS(&vpci_dev->bus), errp); - } -@@ -72,7 +81,6 @@ static void vhost_vsock_pci_instance_init(Object *obj) - static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { - .base_name = TYPE_VHOST_VSOCK_PCI, - .generic_name = "vhost-vsock-pci", -- .transitional_name = "vhost-vsock-pci-transitional", - .non_transitional_name = "vhost-vsock-pci-non-transitional", - .instance_size = sizeof(VHostVSockPCI), - .instance_init = vhost_vsock_pci_instance_init, --- -2.27.0 - diff --git a/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch b/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch deleted file mode 100644 index 1bea17a..0000000 --- a/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 9455373aefea91177dcf32da2c448f93b8e7aa38 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 29 Sep 2020 12:41:40 -0400 -Subject: [PATCH 1/6] virtio: skip legacy support check on machine types less - than 5.1 - -RH-Author: Stefano Garzarella -Message-id: <20200929124143.41520-2-sgarzare@redhat.com> -Patchwork-id: 98512 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/4] virtio: skip legacy support check on machine types less than 5.1 -Bugzilla: 1868449 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -Commit 9b3a35ec82 ("virtio: verify that legacy support is not accidentally -on") added a check that returns an error if legacy support is on, but the -device does not support legacy. - -Unfortunately some devices were wrongly declared legacy capable even if -they were not (e.g vhost-vsock). - -To avoid migration issues, we add a virtio-device property -(x-disable-legacy-check) to skip the legacy error, printing a warning -instead, for machine types < 5.1. - -Cc: qemu-stable@nongnu.org -Fixes: 9b3a35ec82 ("virtio: verify that legacy support is not accidentally on") -Suggested-by: Dr. David Alan Gilbert -Suggested-by: Cornelia Huck -Reviewed-by: Cornelia Huck -Signed-off-by: Stefano Garzarella -Message-Id: <20200921122506.82515-2-sgarzare@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin - -(cherry picked from commit d55f518248f263bb8d0852f98e47102ea09d4f89) -Added 'x-disable-legacy-check' in hw_compat_rhel_8_2 - -Signed-off-by: Stefano Garzarella -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 3 +++ - hw/s390x/virtio-ccw.c | 15 ++++++++++++--- - hw/virtio/virtio-pci.c | 14 ++++++++++++-- - hw/virtio/virtio.c | 7 +++++++ - include/hw/virtio/virtio.h | 2 ++ - 5 files changed, 36 insertions(+), 5 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 10fa9b8c756..86ce3af71e4 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -66,6 +66,8 @@ GlobalProperty hw_compat_rhel_8_2[] = { - { "vmport", "x-report-vmx-type", "off" }, - /* hw_compat_rhel_8_2 from hw_compat_5_0 */ - { "vmport", "x-cmds-v2", "off" }, -+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ -+ { "virtio-device", "x-disable-legacy-check", "true" }, - }; - const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); - -@@ -246,6 +248,7 @@ GlobalProperty hw_compat_5_0[] = { - { "vmport", "x-signal-unsupported-cmd", "off" }, - { "vmport", "x-report-vmx-type", "off" }, - { "vmport", "x-cmds-v2", "off" }, -+ { "virtio-device", "x-disable-legacy-check", "true" }, - }; - const size_t hw_compat_5_0_len = G_N_ELEMENTS(hw_compat_5_0); - -diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c -index 0e602702971..3dfc93d4f6f 100644 ---- a/hw/s390x/virtio-ccw.c -+++ b/hw/s390x/virtio-ccw.c -@@ -1122,9 +1122,18 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp) - } - - if (!virtio_ccw_rev_max(dev) && !virtio_legacy_allowed(vdev)) { -- error_setg(errp, "Invalid value of property max_rev " -- "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); -- return; -+ /* -+ * To avoid migration issues, we allow legacy mode when legacy -+ * check is disabled in the old machine types (< 5.1). -+ */ -+ if (virtio_legacy_check_disabled(vdev)) { -+ warn_report("device requires revision >= 1, but for backward " -+ "compatibility max_revision=0 is allowed"); -+ } else { -+ error_setg(errp, "Invalid value of property max_rev " -+ "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); -+ return; -+ } - } - - if (virtio_get_num_queues(vdev) > VIRTIO_QUEUE_MAX) { -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index ccdf54e81c7..4211565f2c9 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -1596,8 +1596,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) - - if (legacy) { - if (!virtio_legacy_allowed(vdev)) { -- error_setg(errp, "device is modern-only, use disable-legacy=on"); -- return; -+ /* -+ * To avoid migration issues, we allow legacy mode when legacy -+ * check is disabled in the old machine types (< 5.1). -+ */ -+ if (virtio_legacy_check_disabled(vdev)) { -+ warn_report("device is modern-only, but for backward " -+ "compatibility legacy is allowed"); -+ } else { -+ error_setg(errp, -+ "device is modern-only, use disable-legacy=on"); -+ return; -+ } - } - if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { - error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by" -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index e9830252176..b85277da673 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -3304,6 +3304,11 @@ bool virtio_legacy_allowed(VirtIODevice *vdev) - } - } - -+bool virtio_legacy_check_disabled(VirtIODevice *vdev) -+{ -+ return vdev->disable_legacy_check; -+} -+ - hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) - { - return vdev->vq[n].vring.desc; -@@ -3713,6 +3718,8 @@ static Property virtio_properties[] = { - DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), - DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), - DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true), -+ DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice, -+ disable_legacy_check, false), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index e424df12cf6..c50f5a9dfe8 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -105,6 +105,7 @@ struct VirtIODevice - bool use_started; - bool started; - bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ -+ bool disable_legacy_check; - VMChangeStateEntry *vmstate; - char *bus_name; - uint8_t device_endian; -@@ -398,5 +399,6 @@ static inline bool virtio_device_disabled(VirtIODevice *vdev) - } - - bool virtio_legacy_allowed(VirtIODevice *vdev); -+bool virtio_legacy_check_disabled(VirtIODevice *vdev); - - #endif --- -2.27.0 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4b53bbd..b871e53 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 11%{?dist} +Release: 10%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -197,16 +197,6 @@ Patch63: kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch Patch64: kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch # For bz#1877209 - 'qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap Patch65: kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch -# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on -Patch66: kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch -# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on -Patch67: kvm-vhost-vsock-pci-force-virtio-version-1.patch -# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on -Patch68: kvm-vhost-user-vsock-pci-force-virtio-version-1.patch -# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on -Patch69: kvm-vhost-vsock-ccw-force-virtio-version-1.patch -# For bz#1874004 - Live migration performance is poor during guest installation process on power host -Patch70: kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch BuildRequires: wget BuildRequires: rpm-build @@ -956,9 +946,9 @@ install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f -# We need to make the block device modules and other qemu SO files executable -# otherwise RPM won't pick up their dependencies. -chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/*.so +# We need to make the block device modules executable else +# RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so # Remove buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo @@ -1172,20 +1162,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog -* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 -- kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] -- kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] -- kvm-vhost-user-vsock-pci-force-virtio-version-1.patch [bz#1868449] -- kvm-vhost-vsock-ccw-force-virtio-version-1.patch [bz#1868449] -- kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] -- kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] -- Resolves: bz#1868449 - (vhost_vsock error: device is modern-only, use disable-legacy=on) -- Resolves: bz#1874004 - (Live migration performance is poor during guest installation process on power host) -- Resolves: bz#1876635 - (VM fails to start with a passthrough smartcard) - * Mon Sep 28 2020 Danilo Cesar Lemes de Paula - 5.1.0-10.el8 - kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch [bz#1877209] - Resolves: bz#1877209 From 6fa88dd103e43e8275ddb74f9ab2b9a7df72b677 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 5 Oct 2020 11:04:44 -0400 Subject: [PATCH 096/195] * Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 - kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] - kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] - Resolves: bz#1874004 (Live migration performance is poor during guest installation process on power host) - Resolves: bz#1876635 (VM fails to start with a passthrough smartcard) --- ...se-max-bandwidth-to-128-MiB-s-1-Gib-.patch | 56 +++++++++++++++++++ qemu-kvm.spec | 18 ++++-- 2 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch diff --git a/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch b/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch new file mode 100644 index 0000000..64858cd --- /dev/null +++ b/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch @@ -0,0 +1,56 @@ +From 60e1b84487fead757b5feaf0e55448338f2b2671 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 28 Sep 2020 11:06:48 -0400 +Subject: [PATCH 1/2] migration: increase max-bandwidth to 128 MiB/s (1 Gib/s) + +RH-Author: Laurent Vivier +Message-id: <20200928110648.342290-1-lvivier@redhat.com> +Patchwork-id: 98494 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] migration: increase max-bandwidth to 128 MiB/s (1 Gib/s) +Bugzilla: 1874004 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Greg Kurz + +max-bandwidth is set by default to 32 MiB/s (256 Mib/s) +since 2008 (5bb7910af031c). + +Most of the CPUs can dirty memory faster than that now, +and this is clearly a problem with POWER where the page +size is 64 kiB and not 4 KiB. + +Signed-off-by: Laurent Vivier +Message-Id: <20200921144957.979989-1-lvivier@redhat.com> +Reviewed-by: David Gibson +Reviewed-by: Greg Kurz +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 7590a2ae091fde8bb72d5df93977ab9707e23242) +Signed-off-by: Laurent Vivier + +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31576368 +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1874004 +Upstream: Merged +Tested: checked a migration while an installation is running can end + before the end of installation +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 7a89ce39a76..8193f33f768 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -57,7 +57,7 @@ + #include "qemu/queue.h" + #include "multifd.h" + +-#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ ++#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + + /* Amount of time to allocate to each "chunk" of bandwidth-throttled + * data. */ +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b871e53..fe669fc 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 10%{?dist} +Release: 11%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -197,6 +197,8 @@ Patch63: kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch Patch64: kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch # For bz#1877209 - 'qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap Patch65: kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch +# For bz#1874004 - Live migration performance is poor during guest installation process on power host +Patch66: kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch BuildRequires: wget BuildRequires: rpm-build @@ -946,9 +948,9 @@ install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f -# We need to make the block device modules executable else -# RPM won't pick up their dependencies. -chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/block-*.so +# We need to make the block device modules and other qemu SO files executable +# otherwise RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/*.so # Remove buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo @@ -1162,6 +1164,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 +- kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] +- kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] +- Resolves: bz#1874004 + (Live migration performance is poor during guest installation process on power host) +- Resolves: bz#1876635 + (VM fails to start with a passthrough smartcard) + * Mon Sep 28 2020 Danilo Cesar Lemes de Paula - 5.1.0-10.el8 - kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch [bz#1877209] - Resolves: bz#1877209 From a7bf1d2d7cb02276399db02c18d59afab0a5799d Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Mon, 5 Oct 2020 14:47:19 -0400 Subject: [PATCH 097/195] * Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-12.el8_3 - kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] - kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] - kvm-vhost-user-vsock-pci-force-virtio-version-1.patch [bz#1868449] - kvm-vhost-vsock-ccw-force-virtio-version-1.patch [bz#1868449] - Resolves: bz#1868449 (vhost_vsock error: device is modern-only, use disable-legacy=on) --- ...ser-vsock-pci-force-virtio-version-1.patch | 71 ++++++++ ...ost-vsock-ccw-force-virtio-version-1.patch | 66 +++++++ ...ost-vsock-pci-force-virtio-version-1.patch | 87 +++++++++ ...cy-support-check-on-machine-types-le.patch | 169 ++++++++++++++++++ qemu-kvm.spec | 18 +- 5 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 kvm-vhost-user-vsock-pci-force-virtio-version-1.patch create mode 100644 kvm-vhost-vsock-ccw-force-virtio-version-1.patch create mode 100644 kvm-vhost-vsock-pci-force-virtio-version-1.patch create mode 100644 kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch diff --git a/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch b/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch new file mode 100644 index 0000000..535e8f3 --- /dev/null +++ b/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch @@ -0,0 +1,71 @@ +From b969c169c82c3022949a7717d6f5745fbdc579bc Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 29 Sep 2020 12:41:42 -0400 +Subject: [PATCH 3/4] vhost-user-vsock-pci: force virtio version 1 + +RH-Author: Stefano Garzarella +Message-id: <20200929124143.41520-4-sgarzare@redhat.com> +Patchwork-id: 98513 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/4] vhost-user-vsock-pci: force virtio version 1 +Bugzilla: 1868449 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +Commit 9b3a35ec82 ("virtio: verify that legacy support is not +accidentally on") added a safety check that requires to set +'disable-legacy=on' on vhost-user-vsock-pci device: + + $ ./qemu-system-x86_64 ... \ + -chardev socket,id=char0,reconnect=0,path=/tmp/vhost4.socket \ + -device vhost-user-vsock-pci,chardev=char0 + qemu-system-x86_64: -device vhost-user-vsock-pci,chardev=char0: + device is modern-only, use disable-legacy=on + +virtio-vsock was introduced after the release of VIRTIO 1.0 +specifications, so it should be 'modern-only'. + +This patch forces virtio version 1 and removes the 'transitional_name' +property, as done for vhost-vsock-pci, removing the need to specify +'disable-legacy=on' on vhost-user-vsock-pci device. + +Cc: qemu-stable@nongnu.org +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Stefano Garzarella +Message-Id: <20200921122506.82515-4-sgarzare@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 27eda699f59d430c33fc054a36a17251992e70dc) +Signed-off-by: Stefano Garzarella +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/vhost-user-vsock-pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c +index f4cf95873d3..492df6418a4 100644 +--- a/hw/virtio/vhost-user-vsock-pci.c ++++ b/hw/virtio/vhost-user-vsock-pci.c +@@ -40,6 +40,9 @@ static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + ++ /* unlike vhost-vsock, we do not need to care about pre-5.1 compat */ ++ virtio_pci_force_virtio_1(vpci_dev); ++ + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); + } + +@@ -68,7 +71,6 @@ static void vhost_user_vsock_pci_instance_init(Object *obj) + static const VirtioPCIDeviceTypeInfo vhost_user_vsock_pci_info = { + .base_name = TYPE_VHOST_USER_VSOCK_PCI, + .generic_name = "vhost-user-vsock-pci", +- .transitional_name = "vhost-user-vsock-pci-transitional", + .non_transitional_name = "vhost-user-vsock-pci-non-transitional", + .instance_size = sizeof(VHostUserVSockPCI), + .instance_init = vhost_user_vsock_pci_instance_init, +-- +2.27.0 + diff --git a/kvm-vhost-vsock-ccw-force-virtio-version-1.patch b/kvm-vhost-vsock-ccw-force-virtio-version-1.patch new file mode 100644 index 0000000..1b3f4d1 --- /dev/null +++ b/kvm-vhost-vsock-ccw-force-virtio-version-1.patch @@ -0,0 +1,66 @@ +From 41467dab8f8e312bbb13a47454724e20c4b08d60 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 29 Sep 2020 12:41:43 -0400 +Subject: [PATCH 4/4] vhost-vsock-ccw: force virtio version 1 + +RH-Author: Stefano Garzarella +Message-id: <20200929124143.41520-5-sgarzare@redhat.com> +Patchwork-id: 98514 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 4/4] vhost-vsock-ccw: force virtio version 1 +Bugzilla: 1868449 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +virtio-vsock was introduced after the release of VIRTIO 1.0 +specifications, so it should be 'modern-only'. + +This patch forces virtio version 1 as done for vhost-vsock-pci. + +To avoid migration issues, we force virtio version 1 only when +legacy check is enabled in the new machine types (>= 5.1). + +Cc: qemu-stable@nongnu.org +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Stefano Garzarella +Message-Id: <20200921122506.82515-5-sgarzare@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit a6704a34cf02add13964149e0de6453ae62bd9db) +Signed-off-by: Stefano Garzarella +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/vhost-vsock-ccw.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/s390x/vhost-vsock-ccw.c b/hw/s390x/vhost-vsock-ccw.c +index 0822ecca893..246416a8f96 100644 +--- a/hw/s390x/vhost-vsock-ccw.c ++++ b/hw/s390x/vhost-vsock-ccw.c +@@ -40,9 +40,21 @@ static void vhost_vsock_ccw_class_init(ObjectClass *klass, void *data) + static void vhost_vsock_ccw_instance_init(Object *obj) + { + VHostVSockCCWState *dev = VHOST_VSOCK_CCW(obj); ++ VirtioCcwDevice *ccw_dev = VIRTIO_CCW_DEVICE(obj); ++ VirtIODevice *virtio_dev; + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_VSOCK); ++ ++ virtio_dev = VIRTIO_DEVICE(&dev->vdev); ++ ++ /* ++ * To avoid migration issues, we force virtio version 1 only when ++ * legacy check is enabled in the new machine types (>= 5.1). ++ */ ++ if (!virtio_legacy_check_disabled(virtio_dev)) { ++ ccw_dev->force_revision_1 = true; ++ } + } + + static const TypeInfo vhost_vsock_ccw_info = { +-- +2.27.0 + diff --git a/kvm-vhost-vsock-pci-force-virtio-version-1.patch b/kvm-vhost-vsock-pci-force-virtio-version-1.patch new file mode 100644 index 0000000..db1d86c --- /dev/null +++ b/kvm-vhost-vsock-pci-force-virtio-version-1.patch @@ -0,0 +1,87 @@ +From 7153f4862bf6c4396412a8ba0d7db45ae087a337 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 29 Sep 2020 12:41:41 -0400 +Subject: [PATCH 2/4] vhost-vsock-pci: force virtio version 1 + +RH-Author: Stefano Garzarella +Message-id: <20200929124143.41520-3-sgarzare@redhat.com> +Patchwork-id: 98511 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/4] vhost-vsock-pci: force virtio version 1 +Bugzilla: 1868449 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +Commit 9b3a35ec82 ("virtio: verify that legacy support is not +accidentally on") added a safety check that requires to set +'disable-legacy=on' on vhost-vsock-pci device: + + $ ./qemu-system-x86_64 ... -device vhost-vsock-pci,guest-cid=5 + qemu-system-x86_64: -device vhost-vsock-pci,guest-cid=5: + device is modern-only, use disable-legacy=on + +virtio-vsock was introduced after the release of VIRTIO 1.0 +specifications, so it should be 'modern-only'. +In addition Cornelia verified that forcing a legacy mode on +vhost-vsock-pci device using x86-64 host and s390x guest, so with +different endianness, produces strange behaviours. + +This patch forces virtio version 1 and removes the 'transitional_name' +property removing the need to specify 'disable-legacy=on' on +vhost-vsock-pci device. + +To avoid migration issues, we force virtio version 1 only when +legacy check is enabled in the new machine types (>= 5.1). + +As the transitional device name is not commonly used, we do not +provide compatibility handling for it. + +Cc: qemu-stable@nongnu.org +Reported-by: Qian Cai +Reported-by: Qinghua Cheng +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1868449 +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Stefano Garzarella +Message-Id: <20200921122506.82515-3-sgarzare@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6209070503989cf4f28549f228989419d4f0b236) +Signed-off-by: Stefano Garzarella +Signed-off-by: Danilo C. L. de Paula +--- + hw/virtio/vhost-vsock-pci.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vsock-pci.c b/hw/virtio/vhost-vsock-pci.c +index a815278e69c..f11a38292fe 100644 +--- a/hw/virtio/vhost-vsock-pci.c ++++ b/hw/virtio/vhost-vsock-pci.c +@@ -43,6 +43,15 @@ static void vhost_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + { + VHostVSockPCI *dev = VHOST_VSOCK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); ++ VirtIODevice *virtio_dev = VIRTIO_DEVICE(vdev); ++ ++ /* ++ * To avoid migration issues, we force virtio version 1 only when ++ * legacy check is enabled in the new machine types (>= 5.1). ++ */ ++ if (!virtio_legacy_check_disabled(virtio_dev)) { ++ virtio_pci_force_virtio_1(vpci_dev); ++ } + + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); + } +@@ -72,7 +81,6 @@ static void vhost_vsock_pci_instance_init(Object *obj) + static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { + .base_name = TYPE_VHOST_VSOCK_PCI, + .generic_name = "vhost-vsock-pci", +- .transitional_name = "vhost-vsock-pci-transitional", + .non_transitional_name = "vhost-vsock-pci-non-transitional", + .instance_size = sizeof(VHostVSockPCI), + .instance_init = vhost_vsock_pci_instance_init, +-- +2.27.0 + diff --git a/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch b/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch new file mode 100644 index 0000000..6b2c802 --- /dev/null +++ b/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch @@ -0,0 +1,169 @@ +From 0e84dffa20452130768c81390d9df56fab8ba260 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 29 Sep 2020 12:41:40 -0400 +Subject: [PATCH 1/4] virtio: skip legacy support check on machine types less + than 5.1 + +RH-Author: Stefano Garzarella +Message-id: <20200929124143.41520-2-sgarzare@redhat.com> +Patchwork-id: 98512 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/4] virtio: skip legacy support check on machine types less than 5.1 +Bugzilla: 1868449 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck + +Commit 9b3a35ec82 ("virtio: verify that legacy support is not accidentally +on") added a check that returns an error if legacy support is on, but the +device does not support legacy. + +Unfortunately some devices were wrongly declared legacy capable even if +they were not (e.g vhost-vsock). + +To avoid migration issues, we add a virtio-device property +(x-disable-legacy-check) to skip the legacy error, printing a warning +instead, for machine types < 5.1. + +Cc: qemu-stable@nongnu.org +Fixes: 9b3a35ec82 ("virtio: verify that legacy support is not accidentally on") +Suggested-by: Dr. David Alan Gilbert +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Stefano Garzarella +Message-Id: <20200921122506.82515-2-sgarzare@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit d55f518248f263bb8d0852f98e47102ea09d4f89) +Added 'x-disable-legacy-check' in hw_compat_rhel_8_2 + +Signed-off-by: Stefano Garzarella +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 3 +++ + hw/s390x/virtio-ccw.c | 15 ++++++++++++--- + hw/virtio/virtio-pci.c | 14 ++++++++++++-- + hw/virtio/virtio.c | 7 +++++++ + include/hw/virtio/virtio.h | 2 ++ + 5 files changed, 36 insertions(+), 5 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 10fa9b8c756..86ce3af71e4 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -66,6 +66,8 @@ GlobalProperty hw_compat_rhel_8_2[] = { + { "vmport", "x-report-vmx-type", "off" }, + /* hw_compat_rhel_8_2 from hw_compat_5_0 */ + { "vmport", "x-cmds-v2", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-device", "x-disable-legacy-check", "true" }, + }; + const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); + +@@ -246,6 +248,7 @@ GlobalProperty hw_compat_5_0[] = { + { "vmport", "x-signal-unsupported-cmd", "off" }, + { "vmport", "x-report-vmx-type", "off" }, + { "vmport", "x-cmds-v2", "off" }, ++ { "virtio-device", "x-disable-legacy-check", "true" }, + }; + const size_t hw_compat_5_0_len = G_N_ELEMENTS(hw_compat_5_0); + +diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c +index 0e602702971..3dfc93d4f6f 100644 +--- a/hw/s390x/virtio-ccw.c ++++ b/hw/s390x/virtio-ccw.c +@@ -1122,9 +1122,18 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp) + } + + if (!virtio_ccw_rev_max(dev) && !virtio_legacy_allowed(vdev)) { +- error_setg(errp, "Invalid value of property max_rev " +- "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); +- return; ++ /* ++ * To avoid migration issues, we allow legacy mode when legacy ++ * check is disabled in the old machine types (< 5.1). ++ */ ++ if (virtio_legacy_check_disabled(vdev)) { ++ warn_report("device requires revision >= 1, but for backward " ++ "compatibility max_revision=0 is allowed"); ++ } else { ++ error_setg(errp, "Invalid value of property max_rev " ++ "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); ++ return; ++ } + } + + if (virtio_get_num_queues(vdev) > VIRTIO_QUEUE_MAX) { +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index ccdf54e81c7..4211565f2c9 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1596,8 +1596,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) + + if (legacy) { + if (!virtio_legacy_allowed(vdev)) { +- error_setg(errp, "device is modern-only, use disable-legacy=on"); +- return; ++ /* ++ * To avoid migration issues, we allow legacy mode when legacy ++ * check is disabled in the old machine types (< 5.1). ++ */ ++ if (virtio_legacy_check_disabled(vdev)) { ++ warn_report("device is modern-only, but for backward " ++ "compatibility legacy is allowed"); ++ } else { ++ error_setg(errp, ++ "device is modern-only, use disable-legacy=on"); ++ return; ++ } + } + if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { + error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by" +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index e9830252176..b85277da673 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3304,6 +3304,11 @@ bool virtio_legacy_allowed(VirtIODevice *vdev) + } + } + ++bool virtio_legacy_check_disabled(VirtIODevice *vdev) ++{ ++ return vdev->disable_legacy_check; ++} ++ + hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) + { + return vdev->vq[n].vring.desc; +@@ -3713,6 +3718,8 @@ static Property virtio_properties[] = { + DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), + DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), + DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true), ++ DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice, ++ disable_legacy_check, false), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index e424df12cf6..c50f5a9dfe8 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -105,6 +105,7 @@ struct VirtIODevice + bool use_started; + bool started; + bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ ++ bool disable_legacy_check; + VMChangeStateEntry *vmstate; + char *bus_name; + uint8_t device_endian; +@@ -398,5 +399,6 @@ static inline bool virtio_device_disabled(VirtIODevice *vdev) + } + + bool virtio_legacy_allowed(VirtIODevice *vdev); ++bool virtio_legacy_check_disabled(VirtIODevice *vdev); + + #endif +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index fe669fc..9cee8ce 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 11%{?dist} +Release: 12%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -199,6 +199,14 @@ Patch64: kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch Patch65: kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch # For bz#1874004 - Live migration performance is poor during guest installation process on power host Patch66: kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch +# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on +Patch67: kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch +# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on +Patch68: kvm-vhost-vsock-pci-force-virtio-version-1.patch +# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on +Patch69: kvm-vhost-user-vsock-pci-force-virtio-version-1.patch +# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on +Patch70: kvm-vhost-vsock-ccw-force-virtio-version-1.patch BuildRequires: wget BuildRequires: rpm-build @@ -1164,6 +1172,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-12.el8_3 +- kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] +- kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-user-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-vsock-ccw-force-virtio-version-1.patch [bz#1868449] +- Resolves: bz#1868449 + (vhost_vsock error: device is modern-only, use disable-legacy=on) + * Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 - kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] - kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] From 88e6244f9729fe5b19dbfcaa2a899c9f671f3199 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Thu, 8 Oct 2020 16:47:39 -0400 Subject: [PATCH 098/195] * Thu Oct 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-13.el8_3 - kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch [bz#1846886] - kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch [bz#1846886] - kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch [bz#1846886] - Resolves: bz#1846886 (Guest hit soft lockup or reboots if hotplug vcpu under ovmf) --- ...t-guest-crash-on-CPU-hotplug-when-br.patch | 99 ++++++++++++++++ ...-cpu-hot-unplug-request-earlier-if-n.patch | 68 +++++++++++ ...mware-negotiate-CPU-hotplug-with-SMI.patch | 110 ++++++++++++++++++ qemu-kvm.spec | 15 ++- 4 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch create mode 100644 kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch create mode 100644 kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch diff --git a/kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch b/kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch new file mode 100644 index 0000000..548fd67 --- /dev/null +++ b/kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch @@ -0,0 +1,99 @@ +From 98eced5d367a6a69006cab1ea2b77c2c2622694a Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 5 Oct 2020 15:27:02 -0400 +Subject: [PATCH 2/3] x86: cpuhp: prevent guest crash on CPU hotplug when + broadcast SMI is in use + +RH-Author: Igor Mammedov +Message-id: <20201005152703.1555401-3-imammedo@redhat.com> +Patchwork-id: 98550 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/3] x86: cpuhp: prevent guest crash on CPU hotplug when broadcast SMI is in use +Bugzilla: 1846886 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Michael S. Tsirkin + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1846886 +BRANCH: rhel-av-8.3.0 +UPSTREAM: Merged +BREW: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31759628 +Upstream commit: c5be7517d658 + +There were reports of guest crash on CPU hotplug, when using q35 machine +type and OVMF with SMM, due to hotplugged CPU trying to process SMI at +default SMI handler location without it being relocated by firmware first. + +Fix it by refusing hotplug if firmware hasn't negotiated CPU hotplug with +SMI support while SMI broadcast is in use. + +Conflicts: + hw/i386/x86.c + cpu wiring routines were moved to x86.c upstream + to be shared with micro vm, so the second hunk + has to be put into pc_cpu_pre_plug() and s/x86ms/pcms/. + +Signed-off-by: Igor Mammedov +Reviewed-by: Laszlo Ersek +Tested-by: Laszlo Ersek +Message-Id: <20200923094650.1301166-3-imammedo@redhat.com> +Tested-by: Laszlo Ersek +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + hw/acpi/ich9.c | 12 +++++++++++- + hw/i386/pc.c | 11 +++++++++++ + 2 files changed, 22 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index 43ad1ff9278..37286a03288 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -423,10 +423,20 @@ void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && +- !lpc->pm.acpi_memory_hotplug.is_enabled) ++ !lpc->pm.acpi_memory_hotplug.is_enabled) { + error_setg(errp, + "memory hotplug is not enabled: %s.memory-hotplug-support " + "is not set", object_get_typename(OBJECT(lpc))); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ uint64_t negotiated = lpc->smi_negotiated_features; ++ ++ if (negotiated & BIT_ULL(ICH9_LPC_SMI_F_BROADCAST_BIT) && ++ !(negotiated & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT))) { ++ error_setg(errp, "cpu hotplug with SMI wasn't enabled by firmware"); ++ error_append_hint(errp, "update machine type to newer than 5.1 " ++ "and firmware that suppors CPU hotplug with SMM"); ++ } ++ } + } + + void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 6e0a3f391b0..0332589359b 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1761,6 +1761,17 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + return; + } + ++ if (pcms->acpi_dev) { ++ Error *local_err = NULL; ++ ++ hotplug_handler_pre_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, ++ &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ } ++ + init_topo_info(&topo_info, x86ms); + + env->nr_dies = x86ms->smp_dies; +-- +2.27.0 + diff --git a/kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch b/kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch new file mode 100644 index 0000000..6d4c9e8 --- /dev/null +++ b/kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch @@ -0,0 +1,68 @@ +From 77c5df3ab28f294f7b21d33a2f6116b0889292ed Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 5 Oct 2020 15:27:03 -0400 +Subject: [PATCH 3/3] x86: cpuhp: refuse cpu hot-unplug request earlier if not + supported + +RH-Author: Igor Mammedov +Message-id: <20201005152703.1555401-4-imammedo@redhat.com> +Patchwork-id: 98551 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/3] x86: cpuhp: refuse cpu hot-unplug request earlier if not supported +Bugzilla: 1846886 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Michael S. Tsirkin + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1846886 +BRANCH: rhel-av-8.3.0 +UPSTREAM: Merged +BREW: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31759628 +Upstream commit: b48ad7c02ba7 + +CPU hot-unplug with SMM requires firmware participation to prevent +guest crash (i.e. CPU can be removed only after OS _and_ firmware +were prepared for the action). +Previous patches introduced ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT +feature bit, which is advertised by firmware when it has support +for CPU hot-unplug. Use it to check if guest is able to handle +unplug and make device_del fail gracefully if hot-unplug feature +hasn't been negotiated. + +Signed-off-by: Igor Mammedov +Tested-by: Laszlo Ersek +Reviewed-by: Laszlo Ersek +Message-Id: <20200923094650.1301166-4-imammedo@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + hw/acpi/ich9.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index 37286a03288..f6c6c6a916a 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -475,6 +475,18 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev, + errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && + !lpc->pm.cpu_hotplug_legacy) { ++ uint64_t negotiated = lpc->smi_negotiated_features; ++ ++ if (negotiated & BIT_ULL(ICH9_LPC_SMI_F_BROADCAST_BIT) && ++ !(negotiated & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT))) { ++ error_setg(errp, "cpu hot-unplug with SMI wasn't enabled " ++ "by firmware"); ++ error_append_hint(errp, "update machine type to a version having " ++ "x-smi-cpu-hotunplug=on and firmware that " ++ "supports CPU hot-unplug with SMM"); ++ return; ++ } ++ + acpi_cpu_unplug_request_cb(hotplug_dev, &lpc->pm.cpuhp_state, + dev, errp); + } else { +-- +2.27.0 + diff --git a/kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch b/kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch new file mode 100644 index 0000000..f3c09d0 --- /dev/null +++ b/kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch @@ -0,0 +1,110 @@ +From e2d32096071d7175d11b444db80e25709d6bf3d4 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 5 Oct 2020 15:27:01 -0400 +Subject: [PATCH 1/3] x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' + features + +RH-Author: Igor Mammedov +Message-id: <20201005152703.1555401-2-imammedo@redhat.com> +Patchwork-id: 98549 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/3] x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' features +Bugzilla: 1846886 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Michael S. Tsirkin + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1846886 +BRANCH: rhel-av-8.3.0 +UPSTREAM: Merged +BREW: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31759628 +Upstream commit: 00dc02d284ea + +It will allow firmware to notify QEMU that firmware requires SMI +being triggered on CPU hot[un]plug, so that it would be able to account +for hotplugged CPU and relocate it to new SMM base and/or safely remove +CPU on unplug. + +Using negotiated features, follow up patches will insert SMI upcall +into AML code, to make sure that firmware processes hotplug before +guest OS would attempt to use new CPU. + +Conflicts: + hw/i386/pc.c + move x-smi-cpu-hotplug chunk from missing pc_compat_5_1[] compat props + to pc_rhel_compat[] to disable cpu hotplug for [ovmf+smi] config + (should be moved to versioned q35 machine type later, when RHEL gets + complete feature and we decide to support it downstream) + +Signed-off-by: Igor Mammedov +Reviewed-by: Laszlo Ersek +Tested-by: Laszlo Ersek +Message-Id: <20200923094650.1301166-2-imammedo@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 2 ++ + hw/isa/lpc_ich9.c | 13 +++++++++++++ + include/hw/i386/ich9.h | 2 ++ + 3 files changed, 17 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index ac2cc79fca2..6e0a3f391b0 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -358,6 +358,8 @@ GlobalProperty pc_rhel_compat[] = { + { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* BZ 1846886 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + +diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c +index cd6e169d47a..19f32bed3e9 100644 +--- a/hw/isa/lpc_ich9.c ++++ b/hw/isa/lpc_ich9.c +@@ -373,6 +373,15 @@ static void smi_features_ok_callback(void *opaque) + /* guest requests invalid features, leave @features_ok at zero */ + return; + } ++ if (!(guest_features & BIT_ULL(ICH9_LPC_SMI_F_BROADCAST_BIT)) && ++ guest_features & (BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT) | ++ BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT))) { ++ /* ++ * cpu hot-[un]plug with SMI requires SMI broadcast, ++ * leave @features_ok at zero ++ */ ++ return; ++ } + + /* valid feature subset requested, lock it down, report success */ + lpc->smi_negotiated_features = guest_features; +@@ -747,6 +756,10 @@ static Property ich9_lpc_properties[] = { + DEFINE_PROP_BOOL("noreboot", ICH9LPCState, pin_strap.spkr_hi, true), + DEFINE_PROP_BIT64("x-smi-broadcast", ICH9LPCState, smi_host_features, + ICH9_LPC_SMI_F_BROADCAST_BIT, true), ++ DEFINE_PROP_BIT64("x-smi-cpu-hotplug", ICH9LPCState, smi_host_features, ++ ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT, true), ++ DEFINE_PROP_BIT64("x-smi-cpu-hotunplug", ICH9LPCState, smi_host_features, ++ ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT, false), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h +index a98d10b252d..d1bb3f7bf0e 100644 +--- a/include/hw/i386/ich9.h ++++ b/include/hw/i386/ich9.h +@@ -247,5 +247,7 @@ typedef struct ICH9LPCState { + + /* bit positions used in fw_cfg SMI feature negotiation */ + #define ICH9_LPC_SMI_F_BROADCAST_BIT 0 ++#define ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT 1 ++#define ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT 2 + + #endif /* HW_ICH9_H */ +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 9cee8ce..2e13a11 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 12%{?dist} +Release: 13%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -207,6 +207,12 @@ Patch68: kvm-vhost-vsock-pci-force-virtio-version-1.patch Patch69: kvm-vhost-user-vsock-pci-force-virtio-version-1.patch # For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on Patch70: kvm-vhost-vsock-ccw-force-virtio-version-1.patch +# For bz#1846886 - Guest hit soft lockup or reboots if hotplug vcpu under ovmf +Patch71: kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch +# For bz#1846886 - Guest hit soft lockup or reboots if hotplug vcpu under ovmf +Patch72: kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch +# For bz#1846886 - Guest hit soft lockup or reboots if hotplug vcpu under ovmf +Patch73: kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch BuildRequires: wget BuildRequires: rpm-build @@ -1172,6 +1178,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Thu Oct 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-13.el8_3 +- kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch [bz#1846886] +- kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch [bz#1846886] +- kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch [bz#1846886] +- Resolves: bz#1846886 + (Guest hit soft lockup or reboots if hotplug vcpu under ovmf) + * Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-12.el8_3 - kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] - kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] From ae7540466fd66ce59507d16a3892ce3b486f7d04 Mon Sep 17 00:00:00 2001 From: "Danilo C. L. de Paula" Date: Fri, 9 Oct 2020 13:27:20 -0400 Subject: [PATCH 100/195] trying another invalid BZ - Resolves: bz#1846886 --- qemu-kvm.spec | 1 - 1 file changed, 1 deletion(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 2e13a11..29d61a1 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -10,7 +10,6 @@ %global have_memlock_limits 0 - %ifnarch %{ix86} x86_64 %global have_usbredir 0 %endif From 7de5fc2e491443fe547f940067357eb999166e21 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 13 Nov 2020 14:09:35 +0100 Subject: [PATCH 101/195] Initial buildable RHEL 9 qemu-kvm based on RHEL 5.2.0 rc1 --- .gitignore | 1 + ...at-Adding-slirp-to-the-exploded-tree.patch | 230 +++-- 0005-Initial-redhat-build.patch | 248 ++++- 0006-Enable-disable-devices-for-RHEL.patch | 326 +++--- ...Machine-type-related-general-changes.patch | 189 ++-- 0008-Add-aarch64-machine-types.patch | 128 ++- 0009-Add-ppc64-machine-types.patch | 256 ++++- 0010-Add-s390x-machine-types.patch | 20 +- 0011-Add-x86_64-machine-types.patch | 142 ++- 0012-Enable-make-check.patch | 163 +-- ...mber-of-devices-that-can-be-assigned.patch | 20 +- ...Add-support-statement-to-help-output.patch | 13 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 109 +- 0016-Add-support-for-simpletrace.patch | 124 --- ...documentation-instead-of-qemu-system.patch | 30 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 11 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 8 +- 0018-usb-xhci-Fix-PCI-capability-order.patch | 96 -- ...x-blockdev-reopen-API-with-feature-f.patch | 8 +- 0020-Upstream.patch | 37 + 0021-RHEL-9-test.patch | 33 + ...-vTPM-for-POWER-in-downstream-config.patch | 45 - ...t-fix-5.0-rebase-missing-ISA-TPM-TIS.patch | 44 - 0024-redhat-define-hw_compat_8_2.patch | 83 -- 0025-x86-Add-8.3.0-x86_64-machine-type.patch | 119 --- 0027-hw-arm-Changes-to-rhel820-machine.patch | 82 -- ...oduce-rhel_virt_instance_init-helper.patch | 53 - 0029-hw-arm-Add-rhel830-machine-type.patch | 61 -- ...efine-pseries-rhel8.3.0-machine-type.patch | 77 -- ...max_cpus-value-on-spapr-rhel-machine.patch | 49 - ...max_cpus-value-on-virt-rhel-machine-.patch | 53 - ...e-downstream-only-MAX_RHEL_CPUS-code.patch | 83 -- 0034-q35-Set-max_cpus-to-512.patch | 45 - ...rt-Allow-the-TPM_TIS_SYSBUS-device-d.patch | 39 - ...e-vTPM-for-ARM-in-downstream-configs.patch | 35 - ...sable-TPM-passthrough-backend-on-ARM.patch | 44 - kvm-Drop-bogus-IPv6-messages.patch | 51 - kvm-Revert-Drop-bogus-IPv6-messages.patch | 48 - ...d-EPYC-mode-topology-decoding-functi.patch | 168 ---- ...ntroduce-apicid-functions-inside-X86.patch | 80 -- ...ove-arch_id-decode-inside-x86_cpus_i.patch | 157 --- ...ix-pkg_id-offset-for-EPYC-cpu-models.patch | 103 -- ...oduce-use_epyc_apic_id_encoding-in-X.patch | 90 -- ...86-Cleanup-and-use-the-EPYC-mode-top.patch | 288 ------ ...86-Enable-new-apic-id-encoding-for-E.patch | 63 -- ...mespace-to-qemu_rbd_strong_runtime_o.patch | 51 - ...fix-FWCfgDataGeneratorClass-get_data.patch | 78 -- ...node-bitmap-aliases-during-migration.patch | 655 ------------ ...Let-wait_migration-return-on-failure.patch | 66 -- ...ma-set-numa_mem_supported-on-old-mac.patch | 77 -- ...ma-compatibility-for-auto_enable_num.patch | 81 -- ...n-Add-block-bitmap-mapping-parameter.patch | 947 ------------------ ...se-max-bandwidth-to-128-MiB-s-1-Gib-.patch | 56 -- ...if-NVRAM-cannot-contain-all-prom-env.patch | 254 ----- ...-start-powered-off-CPUState-property.patch | 82 -- ...port-bitmap-merge-into-backing-image.patch | 203 ---- kvm-redhat-Update-hw_compat_8_2.patch | 64 -- kvm-redhat-define-hw_compat_8_2.patch | 47 - ...qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch | 61 -- ...pdate-pseries-rhel8.2.0-machine-type.patch | 58 -- ...ing-of-whole-process-instead-of-thre.patch | 79 -- ...setting-of-CPU-halted-state-to-gener.patch | 66 -- ...start-powered-off-property-to-generi.patch | 141 --- ...xperimental-option-for-enabling-secu.patch | 219 ---- ...sb-fix-setup_len-init-CVE-2020-14364.patch | 102 -- ...ser-vsock-pci-force-virtio-version-1.patch | 71 -- ...ost-vsock-ccw-force-virtio-version-1.patch | 66 -- ...ost-vsock-pci-force-virtio-version-1.patch | 87 -- ...cy-support-check-on-machine-types-le.patch | 169 ---- ...t-guest-crash-on-CPU-hotplug-when-br.patch | 99 -- ...-cpu-hot-unplug-request-earlier-if-n.patch | 68 -- ...mware-negotiate-CPU-hotplug-with-SMI.patch | 110 -- kvm.modules | 18 - qemu-kvm.spec | 181 ++-- sources | 2 +- 75 files changed, 1256 insertions(+), 7154 deletions(-) delete mode 100644 0016-Add-support-for-simpletrace.patch rename 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch => 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch (90%) rename 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch => 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch (87%) rename 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch => 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch (92%) delete mode 100644 0018-usb-xhci-Fix-PCI-capability-order.patch rename 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch => 0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch (93%) create mode 100644 0020-Upstream.patch create mode 100644 0021-RHEL-9-test.patch delete mode 100644 0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch delete mode 100644 0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch delete mode 100644 0024-redhat-define-hw_compat_8_2.patch delete mode 100644 0025-x86-Add-8.3.0-x86_64-machine-type.patch delete mode 100644 0027-hw-arm-Changes-to-rhel820-machine.patch delete mode 100644 0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch delete mode 100644 0029-hw-arm-Add-rhel830-machine-type.patch delete mode 100644 0030-redhat-define-pseries-rhel8.3.0-machine-type.patch delete mode 100644 0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch delete mode 100644 0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch delete mode 100644 0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch delete mode 100644 0034-q35-Set-max_cpus-to-512.patch delete mode 100644 0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch delete mode 100644 0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch delete mode 100644 kvm-Disable-TPM-passthrough-backend-on-ARM.patch delete mode 100644 kvm-Drop-bogus-IPv6-messages.patch delete mode 100644 kvm-Revert-Drop-bogus-IPv6-messages.patch delete mode 100644 kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch delete mode 100644 kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch delete mode 100644 kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch delete mode 100644 kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch delete mode 100644 kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch delete mode 100644 kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch delete mode 100644 kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch delete mode 100644 kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch delete mode 100644 kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch delete mode 100644 kvm-iotests-Test-node-bitmap-aliases-during-migration.patch delete mode 100644 kvm-iotests.py-Let-wait_migration-return-on-failure.patch delete mode 100644 kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch delete mode 100644 kvm-machine_types-numa-compatibility-for-auto_enable_num.patch delete mode 100644 kvm-migration-Add-block-bitmap-mapping-parameter.patch delete mode 100644 kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch delete mode 100644 kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch delete mode 100644 kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch delete mode 100644 kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch delete mode 100644 kvm-redhat-Update-hw_compat_8_2.patch delete mode 100644 kvm-redhat-define-hw_compat_8_2.patch delete mode 100644 kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch delete mode 100644 kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch delete mode 100644 kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch delete mode 100644 kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch delete mode 100644 kvm-target-arm-Move-start-powered-off-property-to-generi.patch delete mode 100644 kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch delete mode 100644 kvm-usb-fix-setup_len-init-CVE-2020-14364.patch delete mode 100644 kvm-vhost-user-vsock-pci-force-virtio-version-1.patch delete mode 100644 kvm-vhost-vsock-ccw-force-virtio-version-1.patch delete mode 100644 kvm-vhost-vsock-pci-force-virtio-version-1.patch delete mode 100644 kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch delete mode 100644 kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch delete mode 100644 kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch delete mode 100644 kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch delete mode 100644 kvm.modules diff --git a/.gitignore b/.gitignore index c3cbd56..ba6025e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ /qemu-5.0.0-rc4.tar.xz /qemu-5.0.0.tar.xz /qemu-5.1.0.tar.xz +/qemu-5.2.0-rc1.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch index d23a83d..5293bb8 100644 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -1,4 +1,4 @@ -From a71208a6b42d0ef657b2f712d2f08d2ed40e7094 Mon Sep 17 00:00:00 2001 +From bd38ad15dd837dd3baa136334f667d8d63850ae0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 29 Jul 2020 07:48:57 +0200 Subject: redhat: Adding slirp to the exploded tree @@ -48,79 +48,77 @@ Signed-off-by: Danilo C. L. de Paula Rebase notes (5.1.0-rc2): - Update slirp directory to commit ce94eba2042d52a0ba3d9e252ebce86715e94275 (used upstream) - -Signed-off-by: Danilo C. L. de Paula --- .gitmodules | 3 - slirp/.clang-format | 58 ++ slirp/.gitignore | 10 + slirp/.gitlab-ci.yml | 27 + slirp/.gitpublish | 3 + - slirp/CHANGELOG.md | 88 ++ + slirp/CHANGELOG.md | 88 +++ slirp/COPYRIGHT | 62 ++ slirp/Makefile | 62 ++ slirp/README.md | 60 ++ slirp/build-aux/git-version-gen | 158 ++++ slirp/build-aux/meson-dist | 16 + - slirp/meson.build | 134 +++ - slirp/src/arp_table.c | 92 ++ - slirp/src/bootp.c | 369 ++++++++ - slirp/src/bootp.h | 129 +++ - slirp/src/cksum.c | 179 ++++ - slirp/src/debug.h | 51 + - slirp/src/dhcpv6.c | 224 +++++ + slirp/meson.build | 134 ++++ + slirp/src/arp_table.c | 92 +++ + slirp/src/bootp.c | 369 ++++++++++ + slirp/src/bootp.h | 129 ++++ + slirp/src/cksum.c | 179 +++++ + slirp/src/debug.h | 51 ++ + slirp/src/dhcpv6.c | 224 ++++++ slirp/src/dhcpv6.h | 68 ++ - slirp/src/dnssearch.c | 306 ++++++ - slirp/src/if.c | 213 +++++ + slirp/src/dnssearch.c | 306 ++++++++ + slirp/src/if.c | 213 ++++++ slirp/src/if.h | 25 + - slirp/src/ip.h | 242 +++++ - slirp/src/ip6.h | 214 +++++ - slirp/src/ip6_icmp.c | 433 +++++++++ - slirp/src/ip6_icmp.h | 219 +++++ - slirp/src/ip6_input.c | 85 ++ + slirp/src/ip.h | 242 ++++++ + slirp/src/ip6.h | 214 ++++++ + slirp/src/ip6_icmp.c | 433 +++++++++++ + slirp/src/ip6_icmp.h | 219 ++++++ + slirp/src/ip6_input.c | 85 +++ slirp/src/ip6_output.c | 39 + - slirp/src/ip_icmp.c | 492 ++++++++++ - slirp/src/ip_icmp.h | 166 ++++ - slirp/src/ip_input.c | 461 +++++++++ - slirp/src/ip_output.c | 169 ++++ + slirp/src/ip_icmp.c | 492 +++++++++++++ + slirp/src/ip_icmp.h | 166 +++++ + slirp/src/ip_input.c | 461 ++++++++++++ + slirp/src/ip_output.c | 169 +++++ slirp/src/libslirp-version.h.in | 24 + - slirp/src/libslirp.h | 171 ++++ + slirp/src/libslirp.h | 171 +++++ slirp/src/libslirp.map | 30 + slirp/src/main.h | 16 + - slirp/src/mbuf.c | 224 +++++ - slirp/src/mbuf.h | 127 +++ - slirp/src/misc.c | 390 ++++++++ + slirp/src/mbuf.c | 224 ++++++ + slirp/src/mbuf.h | 127 ++++ + slirp/src/misc.c | 390 ++++++++++ slirp/src/misc.h | 72 ++ - slirp/src/ncsi-pkt.h | 445 +++++++++ - slirp/src/ncsi.c | 193 ++++ - slirp/src/ndp_table.c | 87 ++ - slirp/src/sbuf.c | 168 ++++ + slirp/src/ncsi-pkt.h | 445 +++++++++++ + slirp/src/ncsi.c | 193 +++++ + slirp/src/ndp_table.c | 87 +++ + slirp/src/sbuf.c | 168 +++++ slirp/src/sbuf.h | 27 + - slirp/src/slirp.c | 1185 ++++++++++++++++++++++++ - slirp/src/slirp.h | 284 ++++++ - slirp/src/socket.c | 954 +++++++++++++++++++ - slirp/src/socket.h | 164 ++++ - slirp/src/state.c | 379 ++++++++ + slirp/src/slirp.c | 1185 ++++++++++++++++++++++++++++++ + slirp/src/slirp.h | 284 ++++++++ + slirp/src/socket.c | 954 ++++++++++++++++++++++++ + slirp/src/socket.h | 164 +++++ + slirp/src/state.c | 379 ++++++++++ slirp/src/stream.c | 120 +++ slirp/src/stream.h | 35 + - slirp/src/tcp.h | 169 ++++ - slirp/src/tcp_input.c | 1539 +++++++++++++++++++++++++++++++ - slirp/src/tcp_output.c | 516 +++++++++++ - slirp/src/tcp_subr.c | 980 ++++++++++++++++++++ - slirp/src/tcp_timer.c | 286 ++++++ - slirp/src/tcp_timer.h | 130 +++ + slirp/src/tcp.h | 169 +++++ + slirp/src/tcp_input.c | 1539 +++++++++++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 +++++++++++++ + slirp/src/tcp_subr.c | 980 +++++++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++++ + slirp/src/tcp_timer.h | 130 ++++ slirp/src/tcp_var.h | 161 ++++ slirp/src/tcpip.h | 104 +++ - slirp/src/tftp.c | 464 ++++++++++ + slirp/src/tftp.c | 464 ++++++++++++ slirp/src/tftp.h | 54 ++ - slirp/src/udp.c | 365 ++++++++ - slirp/src/udp.h | 90 ++ - slirp/src/udp6.c | 173 ++++ - slirp/src/util.c | 428 +++++++++ - slirp/src/util.h | 189 ++++ + slirp/src/udp.c | 365 ++++++++++ + slirp/src/udp.h | 90 +++ + slirp/src/udp6.c | 173 +++++ + slirp/src/util.c | 428 +++++++++++ + slirp/src/util.h | 189 +++++ slirp/src/version.c | 8 + - slirp/src/vmstate.c | 444 +++++++++ - slirp/src/vmstate.h | 391 ++++++++ + slirp/src/vmstate.c | 444 +++++++++++ + slirp/src/vmstate.h | 391 ++++++++++ 70 files changed, 16440 insertions(+), 3 deletions(-) create mode 100644 slirp/.clang-format create mode 100644 slirp/.gitignore @@ -194,7 +192,7 @@ Signed-off-by: Danilo C. L. de Paula diff --git a/slirp/.clang-format b/slirp/.clang-format new file mode 100644 -index 0000000000..17fb49fe65 +index 0000000..17fb49f --- /dev/null +++ b/slirp/.clang-format @@ -0,0 +1,58 @@ @@ -258,7 +256,7 @@ index 0000000000..17fb49fe65 +... diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md new file mode 100644 -index 0000000000..67b0a74195 +index 0000000..67b0a74 --- /dev/null +++ b/slirp/CHANGELOG.md @@ -0,0 +1,88 @@ @@ -352,7 +350,7 @@ index 0000000000..67b0a74195 +[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT new file mode 100644 -index 0000000000..ed49512dbc +index 0000000..ed49512 --- /dev/null +++ b/slirp/COPYRIGHT @@ -0,0 +1,62 @@ @@ -420,7 +418,7 @@ index 0000000000..ed49512dbc +copyrights. diff --git a/slirp/Makefile b/slirp/Makefile new file mode 100644 -index 0000000000..8857b4159b +index 0000000..8857b41 --- /dev/null +++ b/slirp/Makefile @@ -0,0 +1,62 @@ @@ -488,7 +486,7 @@ index 0000000000..8857b4159b +-include $(DEPS) diff --git a/slirp/README.md b/slirp/README.md new file mode 100644 -index 0000000000..dc11e5f18b +index 0000000..dc11e5f --- /dev/null +++ b/slirp/README.md @@ -0,0 +1,60 @@ @@ -554,7 +552,7 @@ index 0000000000..dc11e5f18b +See the [COPYRIGHT](COPYRIGHT) file for details. diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen new file mode 100755 -index 0000000000..5617eb8d4e +index 0000000..5617eb8 --- /dev/null +++ b/slirp/build-aux/git-version-gen @@ -0,0 +1,158 @@ @@ -718,7 +716,7 @@ index 0000000000..5617eb8d4e +# End: diff --git a/slirp/build-aux/meson-dist b/slirp/build-aux/meson-dist new file mode 100755 -index 0000000000..80d534fec6 +index 0000000..80d534f --- /dev/null +++ b/slirp/build-aux/meson-dist @@ -0,0 +1,16 @@ @@ -740,7 +738,7 @@ index 0000000000..80d534fec6 +echo "$1" > "$MESON_DIST_ROOT/.tarball-version" diff --git a/slirp/meson.build b/slirp/meson.build new file mode 100644 -index 0000000000..3a27149373 +index 0000000..3a27149 --- /dev/null +++ b/slirp/meson.build @@ -0,0 +1,134 @@ @@ -880,7 +878,7 @@ index 0000000000..3a27149373 +) diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c new file mode 100644 -index 0000000000..959e5b9ec0 +index 0000000..959e5b9 --- /dev/null +++ b/slirp/src/arp_table.c @@ -0,0 +1,92 @@ @@ -978,7 +976,7 @@ index 0000000000..959e5b9ec0 +} diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c new file mode 100644 -index 0000000000..46e96810ab +index 0000000..46e9681 --- /dev/null +++ b/slirp/src/bootp.c @@ -0,0 +1,369 @@ @@ -1353,7 +1351,7 @@ index 0000000000..46e96810ab +} diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h new file mode 100644 -index 0000000000..a57fa51bcb +index 0000000..a57fa51 --- /dev/null +++ b/slirp/src/bootp.h @@ -0,0 +1,129 @@ @@ -1488,7 +1486,7 @@ index 0000000000..a57fa51bcb +#endif diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c new file mode 100644 -index 0000000000..4d08380a4e +index 0000000..4d08380 --- /dev/null +++ b/slirp/src/cksum.c @@ -0,0 +1,179 @@ @@ -1673,7 +1671,7 @@ index 0000000000..4d08380a4e +} diff --git a/slirp/src/debug.h b/slirp/src/debug.h new file mode 100644 -index 0000000000..47712bd78b +index 0000000..47712bd --- /dev/null +++ b/slirp/src/debug.h @@ -0,0 +1,51 @@ @@ -1730,7 +1728,7 @@ index 0000000000..47712bd78b +#endif /* DEBUG_H_ */ diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c new file mode 100644 -index 0000000000..77b451b910 +index 0000000..77b451b --- /dev/null +++ b/slirp/src/dhcpv6.c @@ -0,0 +1,224 @@ @@ -1960,7 +1958,7 @@ index 0000000000..77b451b910 +} diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h new file mode 100644 -index 0000000000..d12c49b36c +index 0000000..d12c49b --- /dev/null +++ b/slirp/src/dhcpv6.h @@ -0,0 +1,68 @@ @@ -2034,7 +2032,7 @@ index 0000000000..d12c49b36c +#endif diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c new file mode 100644 -index 0000000000..55497e860e +index 0000000..55497e8 --- /dev/null +++ b/slirp/src/dnssearch.c @@ -0,0 +1,306 @@ @@ -2346,7 +2344,7 @@ index 0000000000..55497e860e +} diff --git a/slirp/src/if.c b/slirp/src/if.c new file mode 100644 -index 0000000000..23190b5593 +index 0000000..23190b5 --- /dev/null +++ b/slirp/src/if.c @@ -0,0 +1,213 @@ @@ -2565,7 +2563,7 @@ index 0000000000..23190b5593 +} diff --git a/slirp/src/if.h b/slirp/src/if.h new file mode 100644 -index 0000000000..7cf9d2750e +index 0000000..7cf9d27 --- /dev/null +++ b/slirp/src/if.h @@ -0,0 +1,25 @@ @@ -2596,7 +2594,7 @@ index 0000000000..7cf9d2750e +#endif diff --git a/slirp/src/ip.h b/slirp/src/ip.h new file mode 100644 -index 0000000000..e5d4aa8a6d +index 0000000..e5d4aa8 --- /dev/null +++ b/slirp/src/ip.h @@ -0,0 +1,242 @@ @@ -2844,7 +2842,7 @@ index 0000000000..e5d4aa8a6d +#endif diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h new file mode 100644 -index 0000000000..0630309d29 +index 0000000..0630309 --- /dev/null +++ b/slirp/src/ip6.h @@ -0,0 +1,214 @@ @@ -3064,7 +3062,7 @@ index 0000000000..0630309d29 +#endif diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c new file mode 100644 -index 0000000000..d9c872bc97 +index 0000000..d9c872b --- /dev/null +++ b/slirp/src/ip6_icmp.c @@ -0,0 +1,433 @@ @@ -3503,7 +3501,7 @@ index 0000000000..d9c872bc97 +} diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h new file mode 100644 -index 0000000000..c37e60f28d +index 0000000..c37e60f --- /dev/null +++ b/slirp/src/ip6_icmp.h @@ -0,0 +1,219 @@ @@ -3728,7 +3726,7 @@ index 0000000000..c37e60f28d +#endif diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c new file mode 100644 -index 0000000000..a83e4f8e3d +index 0000000..a83e4f8 --- /dev/null +++ b/slirp/src/ip6_input.c @@ -0,0 +1,85 @@ @@ -3819,7 +3817,7 @@ index 0000000000..a83e4f8e3d +} diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c new file mode 100644 -index 0000000000..b86110662c +index 0000000..b861106 --- /dev/null +++ b/slirp/src/ip6_output.c @@ -0,0 +1,39 @@ @@ -3864,7 +3862,7 @@ index 0000000000..b86110662c +} diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c new file mode 100644 -index 0000000000..13a0e55085 +index 0000000..13a0e55 --- /dev/null +++ b/slirp/src/ip_icmp.c @@ -0,0 +1,492 @@ @@ -4362,7 +4360,7 @@ index 0000000000..13a0e55085 +} diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h new file mode 100644 -index 0000000000..84707db247 +index 0000000..84707db --- /dev/null +++ b/slirp/src/ip_icmp.h @@ -0,0 +1,166 @@ @@ -4534,7 +4532,7 @@ index 0000000000..84707db247 +#endif diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c new file mode 100644 -index 0000000000..7f017a238a +index 0000000..7f017a2 --- /dev/null +++ b/slirp/src/ip_input.c @@ -0,0 +1,461 @@ @@ -5001,7 +4999,7 @@ index 0000000000..7f017a238a +} diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c new file mode 100644 -index 0000000000..22916a37df +index 0000000..22916a3 --- /dev/null +++ b/slirp/src/ip_output.c @@ -0,0 +1,169 @@ @@ -5176,7 +5174,7 @@ index 0000000000..22916a37df +} diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in new file mode 100644 -index 0000000000..faa6c85952 +index 0000000..faa6c85 --- /dev/null +++ b/slirp/src/libslirp-version.h.in @@ -0,0 +1,24 @@ @@ -5206,7 +5204,7 @@ index 0000000000..faa6c85952 +#endif /* LIBSLIRP_VERSION_H_ */ diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h new file mode 100644 -index 0000000000..fb4c7e882c +index 0000000..fb4c7e8 --- /dev/null +++ b/slirp/src/libslirp.h @@ -0,0 +1,171 @@ @@ -5383,7 +5381,7 @@ index 0000000000..fb4c7e882c +#endif /* LIBSLIRP_H */ diff --git a/slirp/src/libslirp.map b/slirp/src/libslirp.map new file mode 100644 -index 0000000000..72aab912f4 +index 0000000..72aab91 --- /dev/null +++ b/slirp/src/libslirp.map @@ -0,0 +1,30 @@ @@ -5419,7 +5417,7 @@ index 0000000000..72aab912f4 +} SLIRP_4.1; diff --git a/slirp/src/main.h b/slirp/src/main.h new file mode 100644 -index 0000000000..3b3f883703 +index 0000000..3b3f883 --- /dev/null +++ b/slirp/src/main.h @@ -0,0 +1,16 @@ @@ -5441,7 +5439,7 @@ index 0000000000..3b3f883703 +#endif diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c new file mode 100644 -index 0000000000..54ec721eb5 +index 0000000..54ec721 --- /dev/null +++ b/slirp/src/mbuf.c @@ -0,0 +1,224 @@ @@ -5671,7 +5669,7 @@ index 0000000000..54ec721eb5 +} diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h new file mode 100644 -index 0000000000..546e7852c5 +index 0000000..546e785 --- /dev/null +++ b/slirp/src/mbuf.h @@ -0,0 +1,127 @@ @@ -5804,7 +5802,7 @@ index 0000000000..546e7852c5 +#endif diff --git a/slirp/src/misc.c b/slirp/src/misc.c new file mode 100644 -index 0000000000..e6bc0a207d +index 0000000..e6bc0a2 --- /dev/null +++ b/slirp/src/misc.c @@ -0,0 +1,390 @@ @@ -6201,7 +6199,7 @@ index 0000000000..e6bc0a207d \ No newline at end of file diff --git a/slirp/src/misc.h b/slirp/src/misc.h new file mode 100644 -index 0000000000..81b370cfb1 +index 0000000..81b370c --- /dev/null +++ b/slirp/src/misc.h @@ -0,0 +1,72 @@ @@ -6279,7 +6277,7 @@ index 0000000000..81b370cfb1 +#endif diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h new file mode 100644 -index 0000000000..7795ad83ee +index 0000000..7795ad8 --- /dev/null +++ b/slirp/src/ncsi-pkt.h @@ -0,0 +1,445 @@ @@ -6730,7 +6728,7 @@ index 0000000000..7795ad83ee +#endif /* NCSI_PKT_H */ diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c new file mode 100644 -index 0000000000..3c1dfef1ff +index 0000000..3c1dfef --- /dev/null +++ b/slirp/src/ncsi.c @@ -0,0 +1,193 @@ @@ -6929,7 +6927,7 @@ index 0000000000..3c1dfef1ff +} diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c new file mode 100644 -index 0000000000..110d6ea0e4 +index 0000000..110d6ea --- /dev/null +++ b/slirp/src/ndp_table.c @@ -0,0 +1,87 @@ @@ -7022,7 +7020,7 @@ index 0000000000..110d6ea0e4 +} diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c new file mode 100644 -index 0000000000..2fb9176144 +index 0000000..2fb9176 --- /dev/null +++ b/slirp/src/sbuf.c @@ -0,0 +1,168 @@ @@ -7196,7 +7194,7 @@ index 0000000000..2fb9176144 +} diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h new file mode 100644 -index 0000000000..01886fbd01 +index 0000000..01886fb --- /dev/null +++ b/slirp/src/sbuf.h @@ -0,0 +1,27 @@ @@ -7229,7 +7227,7 @@ index 0000000000..01886fbd01 +#endif diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c new file mode 100644 -index 0000000000..dba7c98163 +index 0000000..dba7c98 --- /dev/null +++ b/slirp/src/slirp.c @@ -0,0 +1,1185 @@ @@ -8420,7 +8418,7 @@ index 0000000000..dba7c98163 +} diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h new file mode 100644 -index 0000000000..763a65b9ef +index 0000000..763a65b --- /dev/null +++ b/slirp/src/slirp.h @@ -0,0 +1,284 @@ @@ -8710,7 +8708,7 @@ index 0000000000..763a65b9ef +#endif diff --git a/slirp/src/socket.c b/slirp/src/socket.c new file mode 100644 -index 0000000000..1e385df0d8 +index 0000000..1e385df --- /dev/null +++ b/slirp/src/socket.c @@ -0,0 +1,954 @@ @@ -9670,7 +9668,7 @@ index 0000000000..1e385df0d8 +} diff --git a/slirp/src/socket.h b/slirp/src/socket.h new file mode 100644 -index 0000000000..a6a1e5e214 +index 0000000..a6a1e5e --- /dev/null +++ b/slirp/src/socket.h @@ -0,0 +1,164 @@ @@ -9840,7 +9838,7 @@ index 0000000000..a6a1e5e214 +#endif /* SLIRP_SOCKET_H */ diff --git a/slirp/src/state.c b/slirp/src/state.c new file mode 100644 -index 0000000000..22af77b256 +index 0000000..22af77b --- /dev/null +++ b/slirp/src/state.c @@ -0,0 +1,379 @@ @@ -10225,7 +10223,7 @@ index 0000000000..22af77b256 +} diff --git a/slirp/src/stream.c b/slirp/src/stream.c new file mode 100644 -index 0000000000..6cf326f669 +index 0000000..6cf326f --- /dev/null +++ b/slirp/src/stream.c @@ -0,0 +1,120 @@ @@ -10351,7 +10349,7 @@ index 0000000000..6cf326f669 +} diff --git a/slirp/src/stream.h b/slirp/src/stream.h new file mode 100644 -index 0000000000..08bb5b6610 +index 0000000..08bb5b6 --- /dev/null +++ b/slirp/src/stream.h @@ -0,0 +1,35 @@ @@ -10392,7 +10390,7 @@ index 0000000000..08bb5b6610 +#endif /* STREAM_H_ */ diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h new file mode 100644 -index 0000000000..70a9760664 +index 0000000..70a9760 --- /dev/null +++ b/slirp/src/tcp.h @@ -0,0 +1,169 @@ @@ -10567,7 +10565,7 @@ index 0000000000..70a9760664 +#endif diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c new file mode 100644 -index 0000000000..d55b0c81dc +index 0000000..d55b0c8 --- /dev/null +++ b/slirp/src/tcp_input.c @@ -0,0 +1,1539 @@ @@ -12112,7 +12110,7 @@ index 0000000000..d55b0c81dc +} diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c new file mode 100644 -index 0000000000..383fe31dcf +index 0000000..383fe31 --- /dev/null +++ b/slirp/src/tcp_output.c @@ -0,0 +1,516 @@ @@ -12634,7 +12632,7 @@ index 0000000000..383fe31dcf +} diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c new file mode 100644 -index 0000000000..a1016d90df +index 0000000..a1016d9 --- /dev/null +++ b/slirp/src/tcp_subr.c @@ -0,0 +1,980 @@ @@ -13620,7 +13618,7 @@ index 0000000000..a1016d90df +} diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c new file mode 100644 -index 0000000000..102023e7cd +index 0000000..102023e --- /dev/null +++ b/slirp/src/tcp_timer.c @@ -0,0 +1,286 @@ @@ -13912,7 +13910,7 @@ index 0000000000..102023e7cd +} diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h new file mode 100644 -index 0000000000..584a5594e4 +index 0000000..584a559 --- /dev/null +++ b/slirp/src/tcp_timer.h @@ -0,0 +1,130 @@ @@ -14048,7 +14046,7 @@ index 0000000000..584a5594e4 +#endif diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h new file mode 100644 -index 0000000000..c8da8cbd16 +index 0000000..c8da8cb --- /dev/null +++ b/slirp/src/tcp_var.h @@ -0,0 +1,161 @@ @@ -14215,7 +14213,7 @@ index 0000000000..c8da8cbd16 +#endif diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h new file mode 100644 -index 0000000000..d3df021493 +index 0000000..d3df021 --- /dev/null +++ b/slirp/src/tcpip.h @@ -0,0 +1,104 @@ @@ -14325,7 +14323,7 @@ index 0000000000..d3df021493 +#endif diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c new file mode 100644 -index 0000000000..c6950ee10f +index 0000000..c6950ee --- /dev/null +++ b/slirp/src/tftp.c @@ -0,0 +1,464 @@ @@ -14795,7 +14793,7 @@ index 0000000000..c6950ee10f +} diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h new file mode 100644 -index 0000000000..6d75478e83 +index 0000000..6d75478 --- /dev/null +++ b/slirp/src/tftp.h @@ -0,0 +1,54 @@ @@ -14855,7 +14853,7 @@ index 0000000000..6d75478e83 +#endif diff --git a/slirp/src/udp.c b/slirp/src/udp.c new file mode 100644 -index 0000000000..0ad44d7c03 +index 0000000..0ad44d7 --- /dev/null +++ b/slirp/src/udp.c @@ -0,0 +1,365 @@ @@ -15226,7 +15224,7 @@ index 0000000000..0ad44d7c03 +} diff --git a/slirp/src/udp.h b/slirp/src/udp.h new file mode 100644 -index 0000000000..c3b83fdc56 +index 0000000..c3b83fd --- /dev/null +++ b/slirp/src/udp.h @@ -0,0 +1,90 @@ @@ -15322,7 +15320,7 @@ index 0000000000..c3b83fdc56 +#endif diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c new file mode 100644 -index 0000000000..6f9486bbca +index 0000000..6f9486b --- /dev/null +++ b/slirp/src/udp6.c @@ -0,0 +1,173 @@ @@ -15501,7 +15499,7 @@ index 0000000000..6f9486bbca +} diff --git a/slirp/src/util.c b/slirp/src/util.c new file mode 100644 -index 0000000000..d3ed5faf8b +index 0000000..d3ed5fa --- /dev/null +++ b/slirp/src/util.c @@ -0,0 +1,428 @@ @@ -15935,7 +15933,7 @@ index 0000000000..d3ed5faf8b +} diff --git a/slirp/src/util.h b/slirp/src/util.h new file mode 100644 -index 0000000000..d67b3d0de9 +index 0000000..d67b3d0 --- /dev/null +++ b/slirp/src/util.h @@ -0,0 +1,189 @@ @@ -16130,7 +16128,7 @@ index 0000000000..d67b3d0de9 +#endif diff --git a/slirp/src/version.c b/slirp/src/version.c new file mode 100644 -index 0000000000..93e0be9c24 +index 0000000..93e0be9 --- /dev/null +++ b/slirp/src/version.c @@ -0,0 +1,8 @@ @@ -16144,7 +16142,7 @@ index 0000000000..93e0be9c24 +} diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c new file mode 100644 -index 0000000000..68cc1729c5 +index 0000000..68cc172 --- /dev/null +++ b/slirp/src/vmstate.c @@ -0,0 +1,444 @@ @@ -16594,7 +16592,7 @@ index 0000000000..68cc1729c5 +} diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h new file mode 100644 -index 0000000000..94c6a4bc7b +index 0000000..94c6a4b --- /dev/null +++ b/slirp/src/vmstate.h @@ -0,0 +1,391 @@ @@ -16990,5 +16988,5 @@ index 0000000000..94c6a4bc7b + +#endif -- -2.27.0 +1.8.3.1 diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index c3341be..346e59a 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From cf7532e0c854b385ee7acdf5788bc407172f7ae9 Mon Sep 17 00:00:00 2001 +From 788398591901ece77695d73db0e392ce2c357636 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -11,7 +11,7 @@ several issues are fixed in QEMU tree: - Man page renamed from qemu to qemu-kvm - man page is installed using make install so we have to fix it in qemu tree -This rebase includes changes up to qemu-kvm-4.2.0-29.el8 +This rebase includes changes up to qemu-kvm-5.1.0-14.el8 Rebase notes (3.1.0): - added new configure options @@ -66,6 +66,21 @@ Rebase notes (5.1.0): - bumped required libusbx-devel version to 1.0.23 - bumped libfdt version to 1.6.0 +Rebase notes (5.2.0 rc0): +- Move libfdt dependency to qemu-kvm-core +- Move manpage rename from Makefile to spec file +- rename with-confsuffix configure option to with-suffix (upstream) +- Bump libusbx Requires version to 1.0.234 +- Manual copy of keymaps in spec file (BZ 1875217) +- Removed /usr/share/qemu-kvm/npcm7xx_bootrom.bin, considering it + unpackaged for now. +- Removed /usr/share/qemu-kvm/qboot.rom, considering unpackaged. +- Added build dependency for meson and ninja-build +- hw/s390/s390-pci-vfio.c hack - set NULL for g_autofree variables +- Removed Chanelog (upstream) +- Fix in directory used for docs (upstream add %name so we do not pass it in configure) +- Package various .so as part of qemu-kvm-core package. + Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file - Spec file cleanups @@ -95,64 +110,191 @@ Merged patches (5.1.0): - 9b1e140 redhat: updating the modular target - 44b8bd0 spec: Fix python shenigans for tests -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.2.0 rc0): +- 9238ce7 Add support for simpletrace +- 5797cff Remove explicit glusterfs-api dependency +- fd62478 disable virgl +- 0205018 redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ +- 3645097 redhat: Make all generated so files executable (not only block-*) --- - .gitignore | 1 + - Makefile | 3 +- - configure | 1 + - redhat/Makefile | 86 + - redhat/Makefile.common | 54 + - redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 2977 +++++++++++++++++++++++++++ - redhat/qemu-pr-helper.service | 2 +- - redhat/scripts/extract_build_cmd.py | 2 +- - redhat/scripts/process-patches.sh | 17 +- - tests/check-block.sh | 2 + - ui/vnc.c | 2 +- - 12 files changed, 3173 insertions(+), 13 deletions(-) + .gitignore | 1 + + README.systemtap | 43 + + crypto/meson.build | 1 - + hw/s390x/s390-pci-vfio.c | 4 +- + meson.build | 10 +- + redhat/Makefile | 90 + + redhat/Makefile.common | 53 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 3170 +++++++++++++++++++++++++++++++ + redhat/scripts/extract_build_cmd.py | 2 +- + redhat/scripts/process-patches.sh | 17 +- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 + + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + tests/check-block.sh | 2 + + ui/vnc.c | 2 +- + 16 files changed, 3424 insertions(+), 17 deletions(-) + create mode 100644 README.systemtap create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common create mode 100644 redhat/README.tests create mode 100644 redhat/qemu-kvm.spec.template + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp -diff --git a/Makefile b/Makefile -index 13dd708c4a..42e854b2b1 100644 ---- a/Makefile -+++ b/Makefile -@@ -549,6 +549,7 @@ CAP_CFLAGS += -DCAPSTONE_HAS_ARM - CAP_CFLAGS += -DCAPSTONE_HAS_ARM64 - CAP_CFLAGS += -DCAPSTONE_HAS_POWERPC - CAP_CFLAGS += -DCAPSTONE_HAS_X86 -+CAP_CFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS - - .PHONY: capstone/all - capstone/all: .git-submodule-status -@@ -879,7 +880,7 @@ install-doc: $(DOCS) install-sphinxdocs - $(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)/interop" - ifdef CONFIG_POSIX - $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" -- $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu.1 "$(DESTDIR)$(mandir)/man1" -+ $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu.1 "$(DESTDIR)$(mandir)/man1/qemu-kvm.1" - $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man7" - $(INSTALL_DATA) docs/interop/qemu-qmp-ref.7 "$(DESTDIR)$(mandir)/man7" - $(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu-block-drivers.7 "$(DESTDIR)$(mandir)/man7" -diff --git a/configure b/configure -index 2acc4d1465..eb5b695dbe 100755 ---- a/configure -+++ b/configure -@@ -2633,6 +2633,7 @@ if test "$seccomp" != "no" ; then - seccomp="no" - fi - fi +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000..ad913fc +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- + - ########################################## - # xen probe ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/crypto/meson.build b/crypto/meson.build +index 7f37b5d..e30efb8 100644 +--- a/crypto/meson.build ++++ b/crypto/meson.build +@@ -50,7 +50,6 @@ if 'CONFIG_GNUTLS' in config_host + crypto_ss.add(gnutls) + endif +- + util_ss.add(files('aes.c')) + util_ss.add(files('init.c')) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index d5c7806..19f92eb 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -28,7 +28,7 @@ + */ + bool s390_pci_update_dma_avail(int fd, unsigned int *avail) + { +- g_autofree struct vfio_iommu_type1_info *info; ++ g_autofree struct vfio_iommu_type1_info *info = NULL; + uint32_t argsz; + + assert(avail); +@@ -229,7 +229,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, + */ + void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) + { +- g_autofree struct vfio_device_info *info; ++ g_autofree struct vfio_device_info *info = NULL; + VFIOPCIDevice *vfio_pci; + uint32_t argsz; + int fd; +diff --git a/meson.build b/meson.build +index b473620..3636fb9 100644 +--- a/meson.build ++++ b/meson.build +@@ -292,6 +292,10 @@ if 'CONFIG_GNUTLS' in config_host + gnutls = declare_dependency(compile_args: config_host['GNUTLS_CFLAGS'].split(), + link_args: config_host['GNUTLS_LIBS'].split()) + endif ++gcrypt = not_found ++if 'CONFIG_GCRYPT' in config_host ++ gcrypt = dependency('libgcrypt') ++endif + pixman = not_found + if have_system or have_tools + pixman = dependency('pixman-1', required: have_system, version:'>=0.21.8', +@@ -1123,7 +1127,9 @@ if capstone_opt == 'internal' + # Include all configuration defines via a header file, which will wind up + # as a dependency on the object file, and thus changes here will result + # in a rebuild. +- '-include', 'capstone-defs.h' ++ '-include', 'capstone-defs.h', ++ ++ '-Wp,-D_GLIBCXX_ASSERTIONS', + ] + + libcapstone = static_library('capstone', +@@ -1641,7 +1647,7 @@ libblock = static_library('block', block_ss.sources() + genh, + + block = declare_dependency(link_whole: [libblock], + link_args: '@block.syms', +- dependencies: [crypto, io]) ++ dependencies: [crypto, io, zlib]) + + blockdev_ss = blockdev_ss.apply(config_host, strict: false) + libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd4..e9b84ec 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000..372d816 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000..c04abf9 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} diff --git a/tests/check-block.sh b/tests/check-block.sh -index 8e29c868e5..e9bcb5ac27 100755 +index f6b1bda..645b550 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh -@@ -53,6 +53,8 @@ if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then +@@ -58,6 +58,8 @@ if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then fi fi @@ -160,12 +302,12 @@ index 8e29c868e5..e9bcb5ac27 100755 + cd tests/qemu-iotests - ret=0 + # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests diff --git a/ui/vnc.c b/ui/vnc.c -index f006aa1afd..992f428fec 100644 +index 4923505..eb5520e 100644 --- a/ui/vnc.c +++ b/ui/vnc.c -@@ -3970,7 +3970,7 @@ void vnc_display_open(const char *id, Error **errp) +@@ -3982,7 +3982,7 @@ void vnc_display_open(const char *id, Error **errp) #ifdef CONFIG_VNC_SASL if (sasl) { @@ -175,5 +317,5 @@ index f006aa1afd..992f428fec 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.27.0 +1.8.3.1 diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index 5b44b6b..86ecb27 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,6 +1,6 @@ -From 3c93dbb29fed4f555904494efe9b823310a14604 Mon Sep 17 00:00:00 2001 +From 65eea220dcce6177b306eae08935f5354847bb08 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Mon, 11 Jan 2016 11:53:33 +0100 +Date: Wed, 2 Sep 2020 09:11:07 +0200 Subject: Enable/disable devices for RHEL This commit adds all changes related to changes in supported devices. @@ -47,6 +47,15 @@ Rebase notes (5.1.0): - removed obsolete hw/bt/Makefile.objs chunk - removed unnecessary changes in target/i386/cpu.c +Rebase notes (5.2.0 rc0): +- Added CONFIG_USB_XHCI_PCI on aarch64 ppc64 and x86_64 +- remove vl.c hack for no hpet +- Enable CONFIG_PTIMER for aarch64 +- Do not package hw-display-virtio-gpu.so on s390x + +Rebase notes (5.2.0 rc1): +- Added CONFIG_ARM_GIC for aarch64 (required for build) + Merged patches (qemu 3.1.0): - d51e082 Re-enable CONFIG_HYPERV_TESTDEV - 4b889f3 Declare cirrus-vga as deprecated @@ -65,58 +74,62 @@ Merged patches (4.1.0): Merged patches (4.2.0): - f7587dd RHEL: disable hostmem-memfd -Merged patches (weekly-5.1.0): +Merged patches (5.1.0): - 4543a3c i386: Remove cpu64-rhel6 CPU model - 96533 aarch64: Remove tcg cpu types (pjw commit) - 559d589 Revert "RHEL: disable hostmem-memfd" - 441128e enable ramfb -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.2.0 rc0): +- f70eb50 RHEL-only: Enable vTPM for POWER in downstream configs +- 69d8ae7 redhat: fix 5.0 rebase missing ISA TPM TIS +- 8310f89 RHEL-only: Enable vTPM for ARM in downstream configs +- 4a8ccfd Disable TPM passthrough backend on ARM --- - default-configs/aarch64-rh-devices.mak | 22 ++++++ - default-configs/aarch64-softmmu.mak | 10 ++- - default-configs/ppc64-rh-devices.mak | 34 +++++++++ - default-configs/ppc64-softmmu.mak | 10 ++- - default-configs/rh-virtio.mak | 10 +++ - default-configs/s390x-rh-devices.mak | 15 ++++ - default-configs/s390x-softmmu.mak | 4 +- - default-configs/x86_64-rh-devices.mak | 100 +++++++++++++++++++++++++ - default-configs/x86_64-softmmu.mak | 4 +- - hw/acpi/ich9.c | 4 +- - hw/arm/Makefile.objs | 2 +- - hw/block/fdc.c | 10 +++ - hw/cpu/Makefile.objs | 5 +- - hw/display/cirrus_vga.c | 3 + - hw/ide/piix.c | 5 +- - hw/input/pckbd.c | 2 + - hw/net/e1000.c | 2 + - hw/ppc/spapr_cpu_core.c | 2 + - hw/usb/Makefile.objs | 4 +- - qemu-options.hx | 4 - - redhat/qemu-kvm.spec.template | 5 +- - softmmu/vl.c | 2 +- - target/arm/cpu.c | 4 +- - target/arm/cpu_tcg.c | 3 + - target/i386/cpu.c | 1 + - target/ppc/cpu-models.c | 10 +++ - target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 ++ - 28 files changed, 263 insertions(+), 25 deletions(-) - create mode 100644 default-configs/aarch64-rh-devices.mak - create mode 100644 default-configs/ppc64-rh-devices.mak - create mode 100644 default-configs/rh-virtio.mak - create mode 100644 default-configs/s390x-rh-devices.mak - create mode 100644 default-configs/x86_64-rh-devices.mak + default-configs/devices/aarch64-rh-devices.mak | 27 +++++++ + default-configs/devices/aarch64-softmmu.mak | 10 ++- + default-configs/devices/ppc64-rh-devices.mak | 38 ++++++++++ + default-configs/devices/ppc64-softmmu.mak | 10 ++- + default-configs/devices/rh-virtio.mak | 10 +++ + default-configs/devices/s390x-rh-devices.mak | 15 ++++ + default-configs/devices/s390x-softmmu.mak | 4 +- + default-configs/devices/x86_64-rh-devices.mak | 101 +++++++++++++++++++++++++ + default-configs/devices/x86_64-softmmu.mak | 4 +- + hw/acpi/ich9.c | 4 +- + hw/arm/meson.build | 2 +- + hw/block/fdc.c | 10 +++ + hw/cpu/meson.build | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/usb/meson.build | 2 +- + qemu-options.hx | 4 - + redhat/Makefile.common | 1 + + redhat/qemu-kvm.spec.template | 9 ++- + target/arm/cpu.c | 4 +- + target/arm/cpu_tcg.c | 3 + + target/ppc/cpu-models.c | 10 +++ + target/s390x/cpu_models.c | 3 + + target/s390x/kvm.c | 8 ++ + 27 files changed, 273 insertions(+), 25 deletions(-) + create mode 100644 default-configs/devices/aarch64-rh-devices.mak + create mode 100644 default-configs/devices/ppc64-rh-devices.mak + create mode 100644 default-configs/devices/rh-virtio.mak + create mode 100644 default-configs/devices/s390x-rh-devices.mak + create mode 100644 default-configs/devices/x86_64-rh-devices.mak -diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak +diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..f0cf5a1b22 +index 0000000..9831940 --- /dev/null -+++ b/default-configs/aarch64-rh-devices.mak -@@ -0,0 +1,22 @@ ++++ b/default-configs/devices/aarch64-rh-devices.mak +@@ -0,0 +1,27 @@ +include rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_GIC=y +CONFIG_ARM_SMMUV3=y +CONFIG_ARM_V7M=y +CONFIG_ARM_VIRT=y @@ -129,6 +142,7 @@ index 0000000000..f0cf5a1b22 +CONFIG_SEMIHOSTING=y +CONFIG_USB=y +CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_PCI=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y +CONFIG_VIRTIO_MMIO=y @@ -136,10 +150,13 @@ index 0000000000..f0cf5a1b22 +CONFIG_XIO3130=y +CONFIG_NVDIMM=y +CONFIG_ACPI_APEI=y -diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 958b1e08e4..8f6867d48a 100644 ---- a/default-configs/aarch64-softmmu.mak -+++ b/default-configs/aarch64-softmmu.mak ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_TIS_SYSBUS=y ++CONFIG_PTIMER=y +diff --git a/default-configs/devices/aarch64-softmmu.mak b/default-configs/devices/aarch64-softmmu.mak +index 958b1e0..8f6867d 100644 +--- a/default-configs/devices/aarch64-softmmu.mak ++++ b/default-configs/devices/aarch64-softmmu.mak @@ -1,8 +1,10 @@ # Default configuration for aarch64-softmmu @@ -155,12 +172,12 @@ index 958b1e08e4..8f6867d48a 100644 +#CONFIG_SBSA_REF=y + +include aarch64-rh-devices.mak -diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak +diff --git a/default-configs/devices/ppc64-rh-devices.mak b/default-configs/devices/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..ecbe53fe63 +index 0000000..467a16b --- /dev/null -+++ b/default-configs/ppc64-rh-devices.mak -@@ -0,0 +1,34 @@ ++++ b/default-configs/devices/ppc64-rh-devices.mak +@@ -0,0 +1,38 @@ +include rh-virtio.mak + +CONFIG_DIMM=y @@ -181,6 +198,7 @@ index 0000000000..ecbe53fe63 +CONFIG_USB_STORAGE_BOT=y +CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y +CONFIG_VGA=y @@ -195,10 +213,13 @@ index 0000000000..ecbe53fe63 +CONFIG_XIVE=y +CONFIG_XIVE_SPAPR=y +CONFIG_XIVE_KVM=y -diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak -index ae0841fa3a..040e5575e7 100644 ---- a/default-configs/ppc64-softmmu.mak -+++ b/default-configs/ppc64-softmmu.mak ++CONFIG_TPM_SPAPR=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/default-configs/devices/ppc64-softmmu.mak b/default-configs/devices/ppc64-softmmu.mak +index ae0841f..040e557 100644 +--- a/default-configs/devices/ppc64-softmmu.mak ++++ b/default-configs/devices/ppc64-softmmu.mak @@ -1,11 +1,13 @@ # Default configuration for ppc64-softmmu @@ -217,11 +238,11 @@ index ae0841fa3a..040e5575e7 100644 +#CONFIG_NVDIMM=y + +include ppc64-rh-devices.mak -diff --git a/default-configs/rh-virtio.mak b/default-configs/rh-virtio.mak +diff --git a/default-configs/devices/rh-virtio.mak b/default-configs/devices/rh-virtio.mak new file mode 100644 -index 0000000000..94ede1b5f6 +index 0000000..94ede1b --- /dev/null -+++ b/default-configs/rh-virtio.mak ++++ b/default-configs/devices/rh-virtio.mak @@ -0,0 +1,10 @@ +CONFIG_VIRTIO=y +CONFIG_VIRTIO_BALLOON=y @@ -233,11 +254,11 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_RNG=y +CONFIG_VIRTIO_SCSI=y +CONFIG_VIRTIO_SERIAL=y -diff --git a/default-configs/s390x-rh-devices.mak b/default-configs/s390x-rh-devices.mak +diff --git a/default-configs/devices/s390x-rh-devices.mak b/default-configs/devices/s390x-rh-devices.mak new file mode 100644 -index 0000000000..c3c73fe752 +index 0000000..c3c73fe --- /dev/null -+++ b/default-configs/s390x-rh-devices.mak ++++ b/default-configs/devices/s390x-rh-devices.mak @@ -0,0 +1,15 @@ +include rh-virtio.mak + @@ -254,10 +275,10 @@ index 0000000000..c3c73fe752 +CONFIG_VHOST_USER=y +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y -diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak -index f2287a133f..3e2e388e91 100644 ---- a/default-configs/s390x-softmmu.mak -+++ b/default-configs/s390x-softmmu.mak +diff --git a/default-configs/devices/s390x-softmmu.mak b/default-configs/devices/s390x-softmmu.mak +index f2287a1..3e2e388 100644 +--- a/default-configs/devices/s390x-softmmu.mak ++++ b/default-configs/devices/s390x-softmmu.mak @@ -10,4 +10,6 @@ # Boards: @@ -266,12 +287,12 @@ index f2287a133f..3e2e388e91 100644 +#CONFIG_S390_CCW_VIRTIO=y + +include s390x-rh-devices.mak -diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak +diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..d59b6d9bb5 +index 0000000..e80877d --- /dev/null -+++ b/default-configs/x86_64-rh-devices.mak -@@ -0,0 +1,100 @@ ++++ b/default-configs/devices/x86_64-rh-devices.mak +@@ -0,0 +1,101 @@ +include rh-virtio.mak + +CONFIG_AC97=y @@ -354,6 +375,7 @@ index 0000000000..d59b6d9bb5 +CONFIG_USB_UHCI=y +CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y +CONFIG_VGA=y @@ -369,13 +391,13 @@ index 0000000000..d59b6d9bb5 +CONFIG_WDT_IB700=y +CONFIG_XIO3130=y +CONFIG_TPM_CRB=y -+CONFIG_TPM_TIS=y ++CONFIG_TPM_TIS_ISA=y +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y -diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak -index 64b2ee2960..b5de7e5279 100644 ---- a/default-configs/x86_64-softmmu.mak -+++ b/default-configs/x86_64-softmmu.mak +diff --git a/default-configs/devices/x86_64-softmmu.mak b/default-configs/devices/x86_64-softmmu.mak +index 64b2ee2..b5de7e5 100644 +--- a/default-configs/devices/x86_64-softmmu.mak ++++ b/default-configs/devices/x86_64-softmmu.mak @@ -1,3 +1,5 @@ # Default configuration for x86_64-softmmu @@ -384,7 +406,7 @@ index 64b2ee2960..b5de7e5279 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 6a19070cec..bb8379f6a7 100644 +index 95cb0f9..f9690a0 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) @@ -398,26 +420,26 @@ index 6a19070cec..bb8379f6a7 100644 pm->s4_val = 2; object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, -diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs -index 534a6a119e..bd62442b54 100644 ---- a/hw/arm/Makefile.objs -+++ b/hw/arm/Makefile.objs -@@ -28,7 +28,7 @@ obj-$(CONFIG_VEXPRESS) += vexpress.o - obj-$(CONFIG_ZYNQ) += xilinx_zynq.o - obj-$(CONFIG_SABRELITE) += sabrelite.o +diff --git a/hw/arm/meson.build b/hw/arm/meson.build +index be39117..6fcc5ed 100644 +--- a/hw/arm/meson.build ++++ b/hw/arm/meson.build +@@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) + arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) + arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) --obj-$(CONFIG_ARM_V7M) += armv7m.o -+#obj-$(CONFIG_ARM_V7M) += armv7m.o - obj-$(CONFIG_EXYNOS4) += exynos4210.o - obj-$(CONFIG_PXA2XX) += pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o - obj-$(CONFIG_DIGIC) += digic.o +-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) ++#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) + arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) + arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) + arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index e9ed3eef45..965528a512 100644 +index 4c2c35e..e9eb7b8 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -47,6 +47,8 @@ - #include "qemu/module.h" +@@ -48,6 +48,8 @@ #include "trace.h" + #include "qom/object.h" +#include "hw/boards.h" + @@ -439,24 +461,25 @@ index e9ed3eef45..965528a512 100644 if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; -diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs -index 8db9e8a7b3..1601ea93c7 100644 ---- a/hw/cpu/Makefile.objs -+++ b/hw/cpu/Makefile.objs -@@ -1,5 +1,6 @@ - obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o - obj-$(CONFIG_REALVIEW) += realview_mpcore.o - obj-$(CONFIG_A9MPCORE) += a9mpcore.o --obj-$(CONFIG_A15MPCORE) += a15mpcore.o --common-obj-y += core.o cluster.o -+#obj-$(CONFIG_A15MPCORE) += a15mpcore.o -+common-obj-y += core.o -+# cluster.o +diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build +index 9e52fee..bb71c9f 100644 +--- a/hw/cpu/meson.build ++++ b/hw/cpu/meson.build +@@ -1,6 +1,7 @@ +-softmmu_ss.add(files('core.c', 'cluster.c')) ++#softmmu_ss.add(files('core.c', 'cluster.c')) ++softmmu_ss.add(files('core.c')) + + specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) +-specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) ++#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 212d6f5e61..f2504e5649 100644 +index 722b9e7..25de8b2 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2958,6 +2958,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2963,6 +2963,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -467,10 +490,10 @@ index 212d6f5e61..f2504e5649 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index b402a93636..d3621a45d9 100644 +index b9860e3..beb1ea6 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -221,7 +221,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -220,7 +220,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -480,7 +503,7 @@ index b402a93636..d3621a45d9 100644 } static const TypeInfo piix3_ide_info = { -@@ -250,6 +251,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -249,6 +250,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -490,10 +513,10 @@ index b402a93636..d3621a45d9 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index 29d633ca94..1442f46195 100644 +index dde85ba..62cf60c 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -599,6 +599,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -597,6 +597,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; isa->build_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -503,10 +526,10 @@ index 29d633ca94..1442f46195 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a18f80e369..960b2f00ee 100644 +index 83347cb..8fb83a1 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1797,6 +1797,7 @@ static const E1000Info e1000_devices[] = { +@@ -1796,6 +1796,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -514,7 +537,7 @@ index a18f80e369..960b2f00ee 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1809,6 +1810,7 @@ static const E1000Info e1000_devices[] = { +@@ -1808,6 +1809,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -523,10 +546,10 @@ index a18f80e369..960b2f00ee 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index c4f47dcc04..6a2239d5e5 100644 +index 2f7dc3c..55d36e0 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -389,10 +389,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -376,10 +376,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -539,26 +562,24 @@ index c4f47dcc04..6a2239d5e5 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), -diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs -index e342ff59fa..abf044a20f 100644 ---- a/hw/usb/Makefile.objs -+++ b/hw/usb/Makefile.objs -@@ -31,7 +31,9 @@ ifeq ($(CONFIG_USB_SMARTCARD),y) - common-obj-y += dev-smartcard-reader.o - ifeq ($(CONFIG_SMARTCARD),y) - common-obj-m += smartcard.mo --smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o -+# Disabled for Red Hat Enterprise Linux: -+#smartcard.mo-objs := ccid-card-passthru.o ccid-card-emulated.o -+smartcard.mo-objs := ccid-card-passthru.o - smartcard.mo-cflags := $(SMARTCARD_CFLAGS) - smartcard.mo-libs := $(SMARTCARD_LIBS) +diff --git a/hw/usb/meson.build b/hw/usb/meson.build +index 934e4fa..e3abba5 100644 +--- a/hw/usb/meson.build ++++ b/hw/usb/meson.build +@@ -48,7 +48,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade + if config_host.has_key('CONFIG_SMARTCARD') + usbsmartcard_ss = ss.source_set() + usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', +- if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')]) ++ if_true: [cacard, files('ccid-card-passthru.c')]) + hw_usb_modules += {'smartcard': usbsmartcard_ss} endif + diff --git a/qemu-options.hx b/qemu-options.hx -index 708583b4ce..1700205035 100644 +index 2c83390..53472fd 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2257,10 +2257,6 @@ ERST +@@ -2251,10 +2251,6 @@ ERST DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) @@ -569,24 +590,11 @@ index 708583b4ce..1700205035 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" -diff --git a/softmmu/vl.c b/softmmu/vl.c -index 4eb9d1f7fd..a1fb06dca0 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -145,7 +145,7 @@ static Chardev **serial_hds; - Chardev *parallel_hds[MAX_PARALLEL_PORTS]; - int win2k_install_hack = 0; - int singlestep = 0; --int no_hpet = 0; -+int no_hpet = 1; /* Always disabled for Red Hat Enterprise Linux */ - int fd_bootchk = 1; - static int no_reboot; - int no_shutdown = 0; diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 111579554f..13ad40aa7d 100644 +index 07492e9..a048714 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2338,7 +2338,9 @@ static void arm_cpu_register_types(void) +@@ -2358,7 +2358,9 @@ static void arm_cpu_register_types(void) type_register_static(&idau_interface_type_info); for (i = 0; i < cpu_count; ++i) { @@ -598,10 +606,10 @@ index 111579554f..13ad40aa7d 100644 } } diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 00b0e08f33..94d429b61c 100644 +index 0013e25..6540046 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -655,6 +655,9 @@ static void arm_tcg_cpu_register_types(void) +@@ -679,6 +679,9 @@ static void arm_tcg_cpu_register_types(void) { size_t i; @@ -611,20 +619,8 @@ index 00b0e08f33..94d429b61c 100644 for (i = 0; i < ARRAY_SIZE(arm_tcg_cpus); ++i) { arm_cpu_register(&arm_tcg_cpus[i]); } -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 588f32e136..030a5a09ed 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1825,6 +1825,7 @@ static CPUCaches epyc_rome_cache_info = { - - static X86CPUDefinition builtin_x86_defs[] = { - { -+ /* qemu64 is the default CPU model for all machine-types */ - .name = "qemu64", - .level = 0xd, - .vendor = CPUID_VENDOR_AMD, diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 4ad16863c0..16b2185fd8 100644 +index 4ad1686..16b2185 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -694,10 +690,10 @@ index 4ad16863c0..16b2185fd8 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index c2af226174..e35bf745dd 100644 +index b5abff8..abe09d7 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c -@@ -404,6 +404,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -408,6 +408,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -708,10 +704,10 @@ index c2af226174..e35bf745dd 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index f2f75d2a57..8970e4c374 100644 +index baa070f..10ce36a 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2494,6 +2494,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2540,6 +2540,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -727,5 +723,5 @@ index f2f75d2a57..8970e4c374 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -- -2.27.0 +1.8.3.1 diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index 32ede92..519cff2 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From a86a622ecc7b0b3c66d21fdd1c5dd279bfc75a03 Mon Sep 17 00:00:00 2001 +From 673234091c4073ebc31bf36559e249796772c8b5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -33,22 +33,21 @@ Merged patches (4.2.0): - ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional - compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) -Merged patches (weekly-200318): +Merged patches (5.1.0): - e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) - -Merged patches (weekly-200506): - 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw -Conflicts: - hw/core/machine.c - -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.2.0 rc0): +- 8348642 redhat: define hw_compat_8_2 +- 45b8402 redhat: define hw_compat_8_2 +- 4effa71 redhat: Update hw_compat_8_2 +- 0e84dff virtio: skip legacy support check on machine types less than 5.1 (partialy) --- - hw/acpi/ich9.c | 15 ++++ + hw/acpi/ich9.c | 15 +++ hw/acpi/piix4.c | 5 +- hw/arm/virt.c | 2 +- hw/char/serial.c | 16 ++++ - hw/core/machine.c | 170 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 213 +++++++++++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + @@ -58,19 +57,19 @@ Signed-off-by: Danilo C. L. de Paula hw/smbios/smbios.c | 46 +++++++++- hw/timer/i8254_common.c | 2 +- hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 +++++ + hw/usb/hcd-xhci.c | 20 ++++ hw/usb/hcd-xhci.h | 2 + include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 24 +++++ + include/hw/boards.h | 27 ++++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + include/hw/usb.h | 4 + migration/migration.c | 2 + - migration/migration.h | 5 ++ - 23 files changed, 354 insertions(+), 11 deletions(-) + migration/migration.h | 5 + + 23 files changed, 400 insertions(+), 11 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index bb8379f6a7..43ad1ff927 100644 +index f9690a0..f6c6c6a 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -103,10 +102,10 @@ index bb8379f6a7..43ad1ff927 100644 &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 26bac4f16c..7c2e17ceb6 100644 +index 67a1ea4..85312a3 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -276,6 +276,7 @@ static const VMStateDescription vmstate_acpi = { +@@ -277,6 +277,7 @@ static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, .minimum_version_id = 3, @@ -114,7 +113,7 @@ index 26bac4f16c..7c2e17ceb6 100644 .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -630,8 +631,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -633,8 +634,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -126,10 +125,10 @@ index 26bac4f16c..7c2e17ceb6 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ecfee362a1..43cf75333b 100644 +index 27dbeb5..c908b5f 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1450,7 +1450,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1441,7 +1441,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -139,7 +138,7 @@ index ecfee362a1..43cf75333b 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len); diff --git a/hw/char/serial.c b/hw/char/serial.c -index 2386479492..c83f816f42 100644 +index 97f7187..aeb207e 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -35,6 +35,7 @@ @@ -148,9 +147,9 @@ index 2386479492..c83f816f42 100644 #include "hw/qdev-properties.h" +#include "migration/migration.h" - //#define DEBUG_SERIAL + #define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ -@@ -704,6 +705,9 @@ static int serial_post_load(void *opaque, int version_id) +@@ -691,6 +692,9 @@ static int serial_post_load(void *opaque, int version_id) static bool serial_thr_ipending_needed(void *opaque) { SerialState *s = opaque; @@ -160,7 +159,7 @@ index 2386479492..c83f816f42 100644 if (s->ier & UART_IER_THRI) { bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); -@@ -785,6 +789,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { +@@ -772,6 +776,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { static bool serial_fifo_timeout_timer_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -171,7 +170,7 @@ index 2386479492..c83f816f42 100644 return timer_pending(s->fifo_timeout_timer); } -@@ -802,6 +810,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { +@@ -789,6 +797,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { static bool serial_timeout_ipending_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -182,7 +181,7 @@ index 2386479492..c83f816f42 100644 return s->timeout_ipending != 0; } -@@ -819,6 +831,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { +@@ -806,6 +818,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { static bool serial_poll_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -194,13 +193,56 @@ index 2386479492..c83f816f42 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 8d1a90c6cf..2b8e480040 100644 +index 98b87f7..8674586 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -28,6 +28,176 @@ +@@ -28,6 +28,219 @@ #include "hw/mem/nvdimm.h" #include "migration/vmstate.h" ++/* ++ * The same as hw_compat_4_2 + hw_compat_5_0 ++ */ ++GlobalProperty hw_compat_rhel_8_2[] = { ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "queue-size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "virtqueue_size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "seg-max-adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "vhost-blk-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-host", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-redir", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl-vga", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-balloon-device", "page-poison", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-read-set-eax", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-signal-unsupported-cmd", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-report-vmx-type", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-cmds-v2", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-device", "x-disable-legacy-check", "true" }, ++}; ++const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); ++ +/* + * The same as hw_compat_4_1 + */ @@ -371,14 +413,14 @@ index 8d1a90c6cf..2b8e480040 100644 +}; +const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + - GlobalProperty hw_compat_5_0[] = { - { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, - { "virtio-balloon-device", "page-poison", "false" }, + GlobalProperty hw_compat_5_1[] = { + { "vhost-scsi", "num_queues", "1"}, + { "vhost-user-blk", "num-queues", "1"}, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 3aaeeeca1e..d88f52a587 100644 +index 90851e7..a91c5d7 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c -@@ -84,7 +84,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) +@@ -85,7 +85,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) } static Property vga_isa_properties[] = { @@ -388,7 +430,7 @@ index 3aaeeeca1e..d88f52a587 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index b789e83f9a..0ecdd57689 100644 +index 13d1628..9fcc5aa 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -179,6 +179,8 @@ static void pc_init1(MachineState *machine, @@ -401,10 +443,10 @@ index b789e83f9a..0ecdd57689 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index a3e607a544..b8ea764ce3 100644 +index a3f4959..f6c2ef4 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -204,6 +204,8 @@ static void pc_q35_init(MachineState *machine) +@@ -198,6 +198,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -414,10 +456,10 @@ index a3e607a544..b8ea764ce3 100644 } diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index fda34518c9..be64a6b56f 100644 +index b6f1ae3..19955eb 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c -@@ -79,6 +79,11 @@ typedef struct E1000EState { +@@ -80,6 +80,11 @@ struct E1000EState { E1000ECore core; @@ -426,10 +468,10 @@ index fda34518c9..be64a6b56f 100644 + */ + bool redhat_7_3_intr_state_enable; + uint32_t redhat_7_3_intr_state; - } E1000EState; + }; #define E1000E_MMIO_IDX 0 -@@ -94,6 +99,10 @@ typedef struct E1000EState { +@@ -95,6 +100,10 @@ struct E1000EState { #define E1000E_MSIX_TABLE (0x0000) #define E1000E_MSIX_PBA (0x2000) @@ -440,7 +482,7 @@ index fda34518c9..be64a6b56f 100644 static uint64_t e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) { -@@ -305,6 +314,8 @@ e1000e_init_msix(E1000EState *s) +@@ -306,6 +315,8 @@ e1000e_init_msix(E1000EState *s) } else { if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { msix_uninit(d, &s->msix, &s->msix); @@ -449,7 +491,7 @@ index fda34518c9..be64a6b56f 100644 } } } -@@ -476,6 +487,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) +@@ -477,6 +488,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); if (ret) { trace_e1000e_msi_init_fail(ret); @@ -458,7 +500,7 @@ index fda34518c9..be64a6b56f 100644 } if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, -@@ -599,6 +612,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { +@@ -600,6 +613,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ e1000e_vmstate_intr_timer, E1000IntrDelayTimer) @@ -470,7 +512,7 @@ index fda34518c9..be64a6b56f 100644 static const VMStateDescription e1000e_vmstate = { .name = "e1000e", .version_id = 1, -@@ -610,6 +628,7 @@ static const VMStateDescription e1000e_vmstate = { +@@ -611,6 +629,7 @@ static const VMStateDescription e1000e_vmstate = { VMSTATE_MSIX(parent_obj, E1000EState), VMSTATE_UINT32(ioaddr, E1000EState), @@ -478,7 +520,7 @@ index fda34518c9..be64a6b56f 100644 VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), VMSTATE_UINT8(core.rx_desc_len, E1000EState), VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, -@@ -658,6 +677,8 @@ static PropertyInfo e1000e_prop_disable_vnet, +@@ -659,6 +678,8 @@ static PropertyInfo e1000e_prop_disable_vnet, static Property e1000e_properties[] = { DEFINE_NIC_PROPERTIES(E1000EState, conf), @@ -488,7 +530,7 @@ index fda34518c9..be64a6b56f 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index ab93d78ab3..48a81354fc 100644 +index ba5ace1..a2e6e83 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) @@ -511,7 +553,7 @@ index ab93d78ab3..48a81354fc 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 7a38540cb9..377d861913 100644 +index 7a38540..377d861 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -43,6 +43,7 @@ @@ -535,7 +577,7 @@ index 7a38540cb9..377d861913 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index f560826904..8875e83941 100644 +index 6a3d397..232fd61 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -56,6 +56,9 @@ static bool smbios_legacy = true; @@ -548,7 +590,7 @@ index f560826904..8875e83941 100644 uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -531,7 +534,7 @@ static void smbios_build_type_1_table(void) +@@ -570,7 +573,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -557,7 +599,7 @@ index f560826904..8875e83941 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -752,7 +755,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -792,7 +795,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -569,7 +611,7 @@ index f560826904..8875e83941 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -773,11 +779,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -813,11 +819,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -617,7 +659,7 @@ index f560826904..8875e83941 100644 SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b497..32935da46c 100644 +index 050875b..32935da 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { @@ -630,10 +672,10 @@ index 050875b497..32935da46c 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 37f7beb3fa..2741edc589 100644 +index 27ca237..eb24e39 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c -@@ -1219,12 +1219,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) +@@ -1221,12 +1221,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) UHCIState *s = UHCI(dev); uint8_t *pci_conf = s->dev.config; int i; @@ -650,10 +692,10 @@ index 37f7beb3fa..2741edc589 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 67a18fe2b6..38bdfaf3fd 100644 +index 79ce5c4..325cd02 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3600,9 +3600,27 @@ static const VMStateDescription vmstate_xhci_slot = { +@@ -3486,9 +3486,27 @@ static const VMStateDescription vmstate_xhci_slot = { } }; @@ -681,7 +723,7 @@ index 67a18fe2b6..38bdfaf3fd 100644 .fields = (VMStateField[]) { VMSTATE_UINT32(type, XHCIEvent), VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3611,6 +3629,8 @@ static const VMStateDescription vmstate_xhci_event = { +@@ -3497,6 +3515,8 @@ static const VMStateDescription vmstate_xhci_event = { VMSTATE_UINT32(flags, XHCIEvent), VMSTATE_UINT8(slotid, XHCIEvent), VMSTATE_UINT8(epid, XHCIEvent), @@ -691,10 +733,10 @@ index 67a18fe2b6..38bdfaf3fd 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 946af51fc2..cc91a7e4bd 100644 +index ccf50ae..8716904 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h -@@ -157,6 +157,8 @@ typedef struct XHCIEvent { +@@ -149,6 +149,8 @@ typedef struct XHCIEvent { uint32_t flags; uint8_t slotid; uint8_t epid; @@ -704,7 +746,7 @@ index 946af51fc2..cc91a7e4bd 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 28a53181cb..ff4a672b90 100644 +index 28a5318..ff4a672 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -718,13 +760,16 @@ index 28a53181cb..ff4a672b90 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index 426ce5f625..1062df96c0 100644 +index a49e3a6..dd18c9e 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -373,4 +373,28 @@ extern const size_t hw_compat_2_2_len; +@@ -367,4 +367,31 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_8_2[]; ++extern const size_t hw_compat_rhel_8_2_len; ++ +extern GlobalProperty hw_compat_rhel_8_1[]; +extern const size_t hw_compat_rhel_8_1_len; + @@ -751,7 +796,7 @@ index 426ce5f625..1062df96c0 100644 + #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 02a0ced0a0..67e38a1b13 100644 +index 02a0ced..67e38a1 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h @@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); @@ -767,10 +812,10 @@ index 02a0ced0a0..67e38a1b13 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 3d7ed3a55e..951e825778 100644 +index 911e460..ae6bf1d 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -103,6 +103,9 @@ typedef struct PCMachineClass { +@@ -104,6 +104,9 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; @@ -781,10 +826,10 @@ index 3d7ed3a55e..951e825778 100644 /* RAM / address space compat: */ bool gigabyte_align; diff --git a/include/hw/usb.h b/include/hw/usb.h -index e29a37635b..35ac38c459 100644 +index a70a72e..78b9043 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h -@@ -575,4 +575,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, +@@ -570,4 +570,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, uint8_t interface_class, uint8_t interface_subclass, uint8_t interface_protocol); @@ -794,10 +839,10 @@ index e29a37635b..35ac38c459 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 8fe36339db..bf684185b7 100644 +index 3263aa5..c8d54ac 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -129,6 +129,8 @@ enum mig_rp_message_type { +@@ -134,6 +134,8 @@ enum mig_rp_message_type { MIG_RP_MSG_MAX }; @@ -807,10 +852,10 @@ index 8fe36339db..bf684185b7 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index 6c6a931d0d..721e272713 100644 +index d096b77..6134a53 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -340,6 +340,11 @@ void dirty_bitmap_mig_cancel_incoming(void); +@@ -364,6 +364,11 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); @@ -823,5 +868,5 @@ index 6c6a931d0d..721e272713 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -2.27.0 +1.8.3.1 diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 7d60da2..91f21e0 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 0ba70804c179d934e5be555abff3c4455ac137a0 Mon Sep 17 00:00:00 2001 +From be3ca54f04ade6a20265f9aeeb46662caa6d16dc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -23,19 +23,13 @@ Rebase notes (4.2.0-rc3): - aarch64: virt: Allow PCDIMM instantiation (patch 92247) - aarch64: virt: Enhance the comment related to gic-version (patch 92248) -Rebase notes (weekly-200226): +Rebase notes (5.0.0): - Set default_ram_id in rhel_machine_class_init - -Rebase notes (5.0.0-rc1): - Added setting acpi properties -Rebase notes (weekly-200520): +Rebase notes (5.1.0): - Added ras property - -Rebase notes (weekly-200701): - Added to virt_machine_device_unplug_cb to machine type (upstream) - -Rebase notes (5.1.0-rc1): - added mte property (upstream) Merged patches (4.0.0): @@ -47,14 +41,21 @@ Merged patches (4.1.0): - c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM - 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.2.0 rc0): +- 12990ad hw/arm: Changes to rhel820 machine +- 46d5a79 hw/arm: Introduce rhel_virt_instance_init() helper +- 098954a hw/arm: Add rhel830 machine type +- ee8e99d arm: Set correct max_cpus value on virt-rhel* machine types +- e5edd38 RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic allocation in machvirt +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) --- - hw/arm/virt.c | 180 +++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 11 +++ - 2 files changed, 188 insertions(+), 3 deletions(-) + hw/arm/virt.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 8 +++ + 2 files changed, 196 insertions(+), 3 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 43cf75333b..e1a17e7c87 100644 +index c908b5f..21e0485 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -79,6 +79,7 @@ @@ -116,7 +117,7 @@ index 43cf75333b..e1a17e7c87 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -1979,6 +2022,7 @@ static void machvirt_init(MachineState *machine) +@@ -2027,6 +2070,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -124,7 +125,7 @@ index 43cf75333b..e1a17e7c87 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2007,6 +2051,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2055,6 +2099,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -132,7 +133,15 @@ index 43cf75333b..e1a17e7c87 100644 static bool virt_get_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2073,7 +2118,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2108,6 +2153,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + visit_type_OnOffAuto(v, name, &vms->acpi, errp); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_ras(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2121,13 +2167,14 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -141,7 +150,14 @@ index 43cf75333b..e1a17e7c87 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2087,7 +2132,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) + + return vms->mte; + } ++#endif /* disabled for RHEL */ + + static void virt_set_mte(Object *obj, bool value, Error **errp) + { +@@ -2135,7 +2182,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -150,7 +166,7 @@ index 43cf75333b..e1a17e7c87 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2401,6 +2446,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2442,6 +2489,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return requested_pa_size > 40 ? requested_pa_size : 0; } @@ -158,7 +174,7 @@ index 43cf75333b..e1a17e7c87 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2679,3 +2725,131 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2730,3 +2778,140 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -171,11 +187,9 @@ index 43cf75333b..e1a17e7c87 100644 + + mc->family = "virt-rhel-Z"; + mc->init = machvirt_init; -+ /* Start with max_cpus set to 512, which is the maximum supported by KVM. -+ * The value may be reduced later when we have more information about the -+ * configuration of the particular instance. -+ */ -+ mc->max_cpus = 512; ++ /* Maximum supported VCPU count for all virt-rhel* machines */ ++ mc->max_cpus = 384; ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; + mc->pci_allow_0_address = true; @@ -192,9 +206,9 @@ index 43cf75333b..e1a17e7c87 100644 + hc->plug = virt_machine_device_plug_cb; + hc->unplug_request = virt_machine_device_unplug_request_cb; + hc->unplug = virt_machine_device_unplug_cb; -+ mc->numa_mem_supported = true; + mc->nvdimm_supported = true; + mc->auto_enable_numa_with_memhp = true; ++ mc->auto_enable_numa_with_memdev = true; + mc->default_ram_id = "mach-virt.ram"; + + object_class_property_add(oc, "acpi", "OnOffAuto", @@ -223,7 +237,7 @@ index 43cf75333b..e1a17e7c87 100644 +} +type_init(rhel_machine_init); + -+static void rhel820_virt_instance_init(Object *obj) ++static void rhel_virt_instance_init(Object *obj) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); @@ -239,12 +253,8 @@ index 43cf75333b..e1a17e7c87 100644 + object_property_set_description(obj, "highmem", + "Set on/off to enable/disable using " + "physical address space above 32 bits"); -+ /* -+ * Default GIC type is still v2, but became configurable for RHEL. We -+ * keep v2 instead of max as TCG CI test cases require an MSI controller -+ * and there is no userspace ITS MSI emulation available. -+ */ -+ vms->gic_version = 2; ++ ++ vms->gic_version = VIRT_GIC_VERSION_NOSEL; + object_property_add_str(obj, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_property_set_description(obj, "gic-version", @@ -271,13 +281,8 @@ index 43cf75333b..e1a17e7c87 100644 + object_property_set_description(obj, "iommu", + "Set the IOMMU type. " + "Valid values are none and smmuv3"); -+ vms->ras = false; -+ object_property_add_bool(obj, "ras", virt_get_ras, -+ virt_set_ras); -+ object_property_set_description(obj, "ras", -+ "Set on/off to enable/disable reporting host memory errors " -+ "to a KVM guest using ACPI and guest external abort exceptions"); + ++ vms->ras = false; + /* MTE is disabled by default. */ + vms->mte = false; + @@ -285,40 +290,53 @@ index 43cf75333b..e1a17e7c87 100644 + virt_flash_create(vms); +} + -+static void rhel820_virt_options(MachineClass *mc) ++static void rhel830_virt_instance_init(Object *obj) ++{ ++ rhel_virt_instance_init(obj); ++} ++ ++static void rhel830_virt_options(MachineClass *mc) +{ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); +} -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) ++ ++static void rhel820_virt_instance_init(Object *obj) ++{ ++ rhel_virt_instance_init(obj); ++} ++ ++static void rhel820_virt_options(MachineClass *mc) ++{ ++ rhel830_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memdev = false; ++} ++DEFINE_RHEL_MACHINE(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index dff67e1bef..7c1e085749 100644 +index aad6d69..745b76b 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -166,6 +166,7 @@ typedef struct { +@@ -167,9 +167,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +#if 0 /* disabled for Red Hat Enterprise Linux */ #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") - #define VIRT_MACHINE(obj) \ - OBJECT_CHECK(VirtMachineState, (obj), TYPE_VIRT_MACHINE) -@@ -174,6 +175,16 @@ typedef struct { - #define VIRT_MACHINE_CLASS(klass) \ - OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) + OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) +#else +#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") -+#define VIRT_MACHINE(obj) \ -+ OBJECT_CHECK(VirtMachineState, (obj), TYPE_RHEL_MACHINE) -+#define VIRT_MACHINE_GET_CLASS(obj) \ -+ OBJECT_GET_CLASS(VirtMachineClass, obj, TYPE_RHEL_MACHINE) -+#define VIRT_MACHINE_CLASS(klass) \ -+ OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_RHEL_MACHINE) ++typedef struct VirtMachineClass VirtMachineClass; ++typedef struct VirtMachineState VirtMachineState; ++DECLARE_OBJ_CHECKERS(VirtMachineState, VirtMachineClass, VIRT_MACHINE, TYPE_RHEL_MACHINE) +#endif + void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); -- -2.27.0 +1.8.3.1 diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index cca2073..b7c5e68 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From da49e223e70695ec4ecc5668658d836346e7f29c Mon Sep 17 00:00:00 2001 +From d33e7e8c4d6e006d5039782d54f583ea3f242fd6 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -31,23 +31,73 @@ Merged patches (4.2.0): - redhat: update pseries-rhel-7.6.0 machine type (patch 93039) - redhat: define pseries-rhel8.2.0 machine type (patch 93041) -Merged patches (weekly-200226): +Merged patches (5.1.0): - eb121ff spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine (partial) -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.2.0 rc0): +- 311a20f redhat: define pseries-rhel8.3.0 machine type +- 1284167 ppc: Set correct max_cpus value on spapr-rhel* machine types +- 1ab8783 redhat: update pseries-rhel8.2.0 machine type +- b162af531a target/ppc: Add experimental option for enabling secure guests --- - hw/ppc/spapr.c | 280 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr.c | 337 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 ++ - include/hw/ppc/spapr.h | 1 + + include/hw/ppc/spapr.h | 4 + target/ppc/compat.c | 13 +- target/ppc/cpu.h | 1 + - 5 files changed, 307 insertions(+), 1 deletion(-) + target/ppc/kvm.c | 27 ++++ + target/ppc/kvm_ppc.h | 13 ++ + 7 files changed, 407 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 0ae293ec94..756c8667c1 100644 +index 12a012d..4a838cc 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -4529,6 +4529,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -1585,6 +1585,9 @@ static void spapr_machine_reset(MachineState *machine) + + kvmppc_svm_off(&error_fatal); + spapr_caps_apply(spapr); ++ if (spapr->svm_allowed) { ++ kvmppc_svm_allow(&error_fatal); ++ } + + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && +@@ -3266,6 +3269,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) + spapr->host_serial = g_strdup(value); + } + ++static bool spapr_get_svm_allowed(Object *obj, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ return spapr->svm_allowed; ++} ++ ++static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ spapr->svm_allowed = value; ++} ++ + static void spapr_instance_init(Object *obj) + { + SpaprMachineState *spapr = SPAPR_MACHINE(obj); +@@ -3321,6 +3338,12 @@ static void spapr_instance_init(Object *obj) + spapr_get_host_serial, spapr_set_host_serial); + object_property_set_description(obj, "host-serial", + "Host serial number to advertise in guest device tree"); ++ object_property_add_bool(obj, "x-svm-allowed", ++ spapr_get_svm_allowed, ++ spapr_set_svm_allowed); ++ object_property_set_description(obj, "x-svm-allowed", ++ "Allow the guest to become a Secure Guest" ++ " (experimental only)"); + } + + static void spapr_machine_finalizefn(Object *obj) +@@ -4459,6 +4482,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->smp_threads_vsmt = true; smc->nr_xirqs = SPAPR_NR_XIRQS; xfc->match_nvt = spapr_match_nvt; @@ -55,15 +105,15 @@ index 0ae293ec94..756c8667c1 100644 } static const TypeInfo spapr_machine_info = { -@@ -4579,6 +4580,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4509,6 +4533,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-5.1 + * pseries-5.2 */ -@@ -4644,6 +4646,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4588,6 +4613,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -71,7 +121,7 @@ index 0ae293ec94..756c8667c1 100644 /* * pseries-4.0 -@@ -4660,6 +4663,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4604,6 +4630,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -79,7 +129,7 @@ index 0ae293ec94..756c8667c1 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4819,6 +4823,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4762,6 +4789,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ @@ -87,7 +137,7 @@ index 0ae293ec94..756c8667c1 100644 static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4873,6 +4878,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, +@@ -4816,6 +4844,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; } @@ -95,22 +145,57 @@ index 0ae293ec94..756c8667c1 100644 static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4987,6 +4993,280 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -4930,6 +4959,314 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); +#endif + +/* ++ * pseries-rhel8.3.0 ++ * like pseries-5.1 ++ */ ++ ++static void spapr_machine_rhel830_class_options(MachineClass *mc) ++{ ++ /* Defaults for the latest behaviour inherited from the base class */ ++ ++ /* Maximum supported VCPU count for all pseries-rhel* machines */ ++ mc->max_cpus = 384; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); ++ ++/* + * pseries-rhel8.2.0 ++ * like pseries-4.2 + pseries-5.0 ++ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 + */ + +static void spapr_machine_rhel820_class_options(MachineClass *mc) +{ -+ /* Defaults for the latest behaviour inherited from the base class */ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ /* from pseries-5.0 */ ++ static GlobalProperty compat[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, ++ }; ++ ++ spapr_machine_rhel830_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; ++ smc->rma_limit = 16 * GiB; ++ mc->nvdimm_supported = false; ++ ++ /* from pseries-5.0 */ ++ mc->numa_mem_supported = true; ++ smc->pre_5_1_assoc_refpoints = true; +} + -+DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", true); ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); + +/* + * pseries-rhel8.1.0 @@ -275,7 +360,6 @@ index 0ae293ec94..756c8667c1 100644 + spapr_machine_rhel750_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); + compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); -+ mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + smc->has_power9_support = false; + smc->pre_2_10_has_unused_icps = true; + smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; @@ -377,7 +461,7 @@ index 0ae293ec94..756c8667c1 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 6a2239d5e5..f228f8bb75 100644 +index 55d36e0..008074b 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -388,15 +472,15 @@ index 6a2239d5e5..f228f8bb75 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -238,6 +239,7 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -250,6 +251,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); - Error *local_err = NULL; + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); if (!qdev_realize(DEVICE(cpu), NULL, errp)) { - return; -@@ -249,6 +251,17 @@ static void spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + return false; +@@ -261,6 +263,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); @@ -408,26 +492,36 @@ index 6a2239d5e5..f228f8bb75 100644 + ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "POWER9 CPU is not supported by this machine class"); -+ return; ++ return false; + } + - if (spapr_irq_cpu_intc_create(spapr, cpu, &local_err) < 0) { - cpu_remove_sync(CPU(cpu)); - return; + if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) { + qdev_unrealize(DEVICE(cpu)); + return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 3134d339e8..f48089edba 100644 +index 2e89e36..ba2d814 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -131,6 +131,7 @@ struct SpaprMachineClass { - hwaddr rma_limit; /* clamp the RMA to this size */ +@@ -140,6 +140,7 @@ struct SpaprMachineClass { bool pre_5_1_assoc_refpoints; + bool pre_5_2_numa_associativity; + bool has_power9_support; void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, +@@ -220,6 +221,9 @@ struct SpaprMachineState { + int fwnmi_machine_check_interlock; + QemuCond fwnmi_machine_check_interlock_cond; + ++ /* Secure Guest support via x-svm-allowed */ ++ bool svm_allowed; ++ + /*< public >*/ + char *kvm_type; + char *host_model; diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 08aede88dc..b193445aca 100644 +index e9bec5f..74e3db9 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -452,10 +546,10 @@ index 08aede88dc..b193445aca 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index e7d382ac10..3cf209dd90 100644 +index 2eb41a2..d850521 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1352,6 +1352,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1347,6 +1347,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -463,6 +557,100 @@ index e7d382ac10..3cf209dd90 100644 bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index daf690a..9bf3449 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -89,6 +89,7 @@ static int cap_ppc_count_cache_flush_assist; + static int cap_ppc_nested_kvm_hv; + static int cap_large_decr; + static int cap_fwnmi; ++static int cap_ppc_secure_guest; + + static uint32_t debug_inst_opcode; + +@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); + kvmppc_get_cpu_characteristics(s); + cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); ++ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST); + cap_large_decr = kvmppc_get_dec_bits(); + cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); + /* +@@ -2538,6 +2540,16 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) + return 0; + } + ++bool kvmppc_has_cap_secure_guest(void) ++{ ++ return !!cap_ppc_secure_guest; ++} ++ ++int kvmppc_enable_cap_secure_guest(void) ++{ ++ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); ++} ++ + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) + { + uint32_t host_pvr = mfpvr(); +@@ -2947,3 +2959,18 @@ void kvmppc_svm_off(Error **errp) + error_setg_errno(errp, -rc, "KVM_PPC_SVM_OFF ioctl failed"); + } + } ++ ++void kvmppc_svm_allow(Error **errp) ++{ ++ if (!kvm_enabled()) { ++ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off"); ++ return; ++ } ++ ++ if (!kvmppc_has_cap_secure_guest()) { ++ error_setg(errp, "KVM implementation does not support secure guests, " ++ "try x-svm-allowed=off"); ++ } else if (kvmppc_enable_cap_secure_guest() < 0) { ++ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off"); ++ } ++} +diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h +index 73ce2bc..1239b84 100644 +--- a/target/ppc/kvm_ppc.h ++++ b/target/ppc/kvm_ppc.h +@@ -40,6 +40,7 @@ target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, + bool radix, bool gtse, + uint64_t proc_tbl); + void kvmppc_svm_off(Error **errp); ++void kvmppc_svm_allow(Error **errp); + #ifndef CONFIG_USER_ONLY + bool kvmppc_spapr_use_multitce(void); + int kvmppc_spapr_enable_inkernel_multitce(void); +@@ -73,6 +74,8 @@ int kvmppc_set_cap_nested_kvm_hv(int enable); + int kvmppc_get_cap_large_decr(void); + int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + int kvmppc_enable_hwrng(void); ++bool kvmppc_has_cap_secure_guest(void); ++int kvmppc_enable_cap_secure_guest(void); + int kvmppc_put_books_sregs(PowerPCCPU *cpu); + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); + void kvmppc_check_papr_resize_hpt(Error **errp); +@@ -387,6 +390,16 @@ static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) + return -1; + } + ++static inline bool kvmppc_has_cap_secure_guest(void) ++{ ++ return false; ++} ++ ++static inline int kvmppc_enable_cap_secure_guest(void) ++{ ++ return -1; ++} ++ + static inline int kvmppc_enable_hwrng(void) + { + return -1; -- -2.27.0 +1.8.3.1 diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index 0d4e3ef..0b94070 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From dc0914b5e43a9925217af8ddde44194176822108 Mon Sep 17 00:00:00 2001 +From 79307dba97fef45adfbc03ab46db0460b27ceab9 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -20,17 +20,15 @@ Merged patches (4.2.0): - fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 - a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine - hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) - -Signed-off-by: Danilo C. L. de Paula --- - hw/s390x/s390-virtio-ccw.c | 71 +++++++++++++++++++++++++++++++++++++- + hw/s390x/s390-virtio-ccw.c | 71 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index e72c61d2ea..a6a37cce94 100644 +index 22222c4..68cea20 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -777,7 +777,7 @@ bool css_migration_enabled(void) +@@ -761,7 +761,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -39,15 +37,15 @@ index e72c61d2ea..a6a37cce94 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = true; \ -@@ -801,6 +801,7 @@ bool css_migration_enabled(void) +@@ -785,6 +785,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_5_1_instance_options(MachineState *machine) + static void ccw_machine_5_2_instance_options(MachineState *machine) { } -@@ -1053,6 +1054,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1049,6 +1050,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); @@ -116,12 +114,12 @@ index e72c61d2ea..a6a37cce94 100644 + ccw_machine_rhel760_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); + compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); -+ S390_MACHINE_CLASS(mc)->hpage_1m_allowed = false; ++ S390_CCW_MACHINE_CLASS(mc)->hpage_1m_allowed = false; +} +DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); static void ccw_machine_register_types(void) { -- -2.27.0 +1.8.3.1 diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index 5f827db..2aff59c 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 45d5ef7594e5b07d3a975feea8e72541402a797f Mon Sep 17 00:00:00 2001 +From 4c6e7a672399b3962d904c2b01e8844544383d89 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -37,29 +37,32 @@ Merged patches (4.2.0): - 0784125 x86 machine types: add pc-q35-rhel8.1.0 - machines/x86: Add rhel 8.2 machine type (patch 92959) -Merged patches (weekly-200122): +Merged patches (5.1.0): - 481357e RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support - -Merged patches (weekly-200318): - e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.2.0 rc0): +- b02c9f5 x86: Add 8.3.0 x86_64 machine type +- f2edc4f q35: Set max_cpus to 512 +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) +- e2d3209 x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' features (partialy) --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 265 ++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 210 +++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 162 +++++++++++++++++++++++++- + hw/i386/pc.c | 273 ++++++++++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 215 +++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 185 +++++++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 33 ++++++ + include/hw/i386/pc.h | 36 +++++++ target/i386/cpu.c | 3 +- target/i386/kvm.c | 4 + - 8 files changed, 675 insertions(+), 7 deletions(-) + 8 files changed, 714 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index b7bcbbbb2a..fe815c5403 100644 +index 4f66642..78f50d4 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -212,6 +212,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) +@@ -217,6 +217,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -67,13 +70,13 @@ index b7bcbbbb2a..fe815c5403 100644 + "__com.redhat_force-rev1-fadt", NULL)) + pm->fadt.rev = 1; pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; - } - + pm->smi_on_cpuhp = + !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 47c5ca3e34..063f01d19a 100644 +index 17b514d..f3fc695 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -347,6 +347,263 @@ GlobalProperty pc_compat_1_4[] = { +@@ -352,6 +352,271 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -88,9 +91,17 @@ index 47c5ca3e34..063f01d19a 100644 + { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* BZ 1846886 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_2_compat[] = { ++ /* pc_rhel_8_2_compat from pc_compat_4_2 */ ++ { "mch", "smbase-smram", "off" }, ++}; ++const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat); ++ +/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ +GlobalProperty pc_rhel_8_1_compat[] = { }; +const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); @@ -337,7 +348,7 @@ index 47c5ca3e34..063f01d19a 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1012,7 +1269,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -970,7 +1235,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -347,17 +358,17 @@ index 47c5ca3e34..063f01d19a 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -1956,6 +2214,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->linuxboot_dma_enabled = true; +@@ -1674,6 +1940,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); + pcmc->pc_rom_ro = true; + mc->async_pf_vmexit_disable = false; mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1968,7 +2228,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - mc->hot_add_cpu = pc_hot_add_cpu; +@@ -1685,7 +1953,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->default_boot_order = "cad"; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; - mc->max_cpus = 255; @@ -367,7 +378,7 @@ index 47c5ca3e34..063f01d19a 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 0ecdd57689..6d935645b6 100644 +index 9fcc5aa..815da79 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -54,6 +54,7 @@ @@ -397,7 +408,7 @@ index 0ecdd57689..6d935645b6 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -994,3 +996,207 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -1007,3 +1009,212 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -414,6 +425,8 @@ index 0ecdd57689..6d935645b6 100644 + pcmc->default_nic_model = "e1000"; + m->default_display = "std"; + m->no_parallel = 1; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; @@ -435,6 +448,10 @@ index 0ecdd57689..6d935645b6 100644 + m->smbus_no_migration_support = true; + pcmc->pvh_enabled = false; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); @@ -477,7 +494,6 @@ index 0ecdd57689..6d935645b6 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_machine_rhel750_options(m); + m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; -+ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + pcmc->pc_rom_ro = false; + compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); + compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); @@ -606,10 +622,10 @@ index 0ecdd57689..6d935645b6 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index b8ea764ce3..f4edb049d6 100644 +index f6c2ef4..3340008 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -201,8 +201,8 @@ static void pc_q35_init(MachineState *machine) +@@ -195,8 +195,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -620,7 +636,7 @@ index b8ea764ce3..f4edb049d6 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -336,6 +336,7 @@ static void pc_q35_init(MachineState *machine) +@@ -327,6 +327,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -628,7 +644,7 @@ index b8ea764ce3..f4edb049d6 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -564,3 +565,160 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -568,3 +569,183 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -652,10 +668,27 @@ index b8ea764ce3..f4edb049d6 100644 + machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + m->alias = "q35"; -+ m->max_cpus = 384; ++ m->max_cpus = 512; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel830(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel830_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.3.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, ++ pc_q35_machine_rhel830_options); ++ +static void pc_q35_init_rhel820(MachineState *machine) +{ + pc_q35_init(machine); @@ -666,8 +699,15 @@ index b8ea764ce3..f4edb049d6 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.2.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, @@ -762,7 +802,6 @@ index b8ea764ce3..f4edb049d6 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel750_options(m); + m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; -+ m->numa_auto_assign_ram = numa_legacy_auto_assign_ram; + pcmc->pc_rom_ro = false; + compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); + compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); @@ -790,39 +829,42 @@ index b8ea764ce3..f4edb049d6 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 1062df96c0..c3577319c0 100644 +index dd18c9e..4e4a54b 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -208,6 +208,8 @@ struct MachineClass { +@@ -202,6 +202,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; + /* RHEL only */ + bool async_pf_vmexit_disable; - void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes, - int nb_nodes, ram_addr_t size); bool ignore_boot_device_suffixes; + bool smbus_no_migration_support; + bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 951e825778..156be22995 100644 +index ae6bf1d..e2ba9a4 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -121,6 +121,9 @@ typedef struct PCMachineClass { +@@ -125,6 +125,9 @@ struct PCMachineClass { - /* use PVH to load kernels that support this feature */ - bool pvh_enabled; -+ + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; ++ + /* RH only, see bz 1489800 */ + bool pc_rom_ro; - } PCMachineClass; + }; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -265,6 +268,36 @@ extern const size_t pc_compat_1_5_len; +@@ -266,6 +269,39 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_2_compat[]; ++extern const size_t pc_rhel_8_2_compat_len; ++ +extern GlobalProperty pc_rhel_8_1_compat[]; +extern const size_t pc_rhel_8_1_compat_len; + @@ -854,10 +896,10 @@ index 951e825778..156be22995 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 030a5a09ed..cdaa1463f2 100644 +index 0d86069..dd52fe5 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1830,7 +1830,7 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1803,7 +1803,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 6, @@ -866,7 +908,7 @@ index 030a5a09ed..cdaa1463f2 100644 .stepping = 3, .features[FEAT_1_EDX] = PPRO_FEATURES | -@@ -4142,6 +4142,7 @@ static PropValue kvm_default_props[] = { +@@ -4117,6 +4117,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -875,10 +917,10 @@ index 030a5a09ed..cdaa1463f2 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index 6f18d940a5..98249b2e3b 100644 +index cf46259..4e58c09 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -3135,6 +3135,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3124,6 +3124,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -886,7 +928,7 @@ index 6f18d940a5..98249b2e3b 100644 kvm_msr_buf_reset(cpu); -@@ -3444,6 +3445,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3436,6 +3437,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -894,8 +936,8 @@ index 6f18d940a5..98249b2e3b 100644 + env->async_pf_en_msr &= ~(1ULL << 2); + } break; - case MSR_KVM_PV_EOI_EN: - env->pv_eoi_en_msr = msrs[i].data; + case MSR_KVM_ASYNC_PF_INT: + env->async_pf_int_msr = msrs[i].data; -- -2.27.0 +1.8.3.1 diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index 1ce3d04..ae7c435 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,6 +1,6 @@ -From 69de445124e71df949a3d5289f8a10a5f771bac2 Mon Sep 17 00:00:00 2001 +From 1170780d2fdd4cb8b663603c4d509d65b2e05846 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Fri, 19 Oct 2018 13:48:41 +0200 +Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check Fixing tests after device disabling and machine types changes and enabling @@ -22,37 +22,36 @@ Rebase changes (4.1.0-rc1): Rebase changes (4.2.0-rc0): - partially disable hd-geo-test (requires lsi53c895a) -Rebase changes (weekly-200129): -- Disable qtest/q35-test (uses upstream machine types) - -Rebased changes (weekly-200212): -- Do not run iotests on make checka - Rebase changes (5.1.0-rc1): +- Disable qtest/q35-test (uses upstream machine types) +- Do not run iotests on make checka - Enabled iotests 071 and 099 +Rebase changes (5.2.0 rc0): +- Disable cdrom tests (unsupported devices) on x86_64 +- disable fuzz test + Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce Merged patches (4.1.0-rc0): - 41288ff redhat: Remove raw iotest 205 - -Signed-off-by: Danilo C. L. de Paula --- redhat/qemu-kvm.spec.template | 2 +- tests/qemu-iotests/051 | 12 ++++++------ - tests/qtest/Makefile.include | 12 ++++++------ tests/qtest/boot-serial-test.c | 6 +++++- + tests/qtest/cdrom-test.c | 2 ++ tests/qtest/cpu-plug-test.c | 4 ++-- tests/qtest/e1000-test.c | 2 ++ tests/qtest/hd-geo-test.c | 4 ++++ + tests/qtest/meson.build | 10 ++-------- tests/qtest/prom-env-test.c | 4 ++++ tests/qtest/test-x86-cpuid-compat.c | 2 ++ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - 10 files changed, 36 insertions(+), 16 deletions(-) + 11 files changed, 34 insertions(+), 18 deletions(-) diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index bee26075b2..61d25c4ed7 100755 +index bee2607..61d25c4 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -183,11 +183,11 @@ run_qemu -drive if=virtio @@ -85,59 +84,8 @@ index bee26075b2..61d25c4ed7 100755 ;; *) ;; -diff --git a/tests/qtest/Makefile.include b/tests/qtest/Makefile.include -index b0204e44f2..cf8a138791 100644 ---- a/tests/qtest/Makefile.include -+++ b/tests/qtest/Makefile.include -@@ -29,7 +29,7 @@ check-qtest-i386-y += ide-test - check-qtest-i386-$(CONFIG_TOOLS) += ahci-test - check-qtest-i386-y += hd-geo-test - check-qtest-i386-y += boot-order-test --check-qtest-i386-y += bios-tables-test -+#check-qtest-i386-y += bios-tables-test - check-qtest-i386-$(CONFIG_SGA) += boot-serial-test - check-qtest-i386-$(CONFIG_SLIRP) += pxe-test - check-qtest-i386-y += rtc-test -@@ -51,7 +51,7 @@ check-qtest-i386-$(CONFIG_USB_UHCI) += usb-hcd-uhci-test - check-qtest-i386-$(call land,$(CONFIG_USB_EHCI),$(CONFIG_USB_UHCI)) += usb-hcd-ehci-test - check-qtest-i386-$(CONFIG_USB_XHCI_NEC) += usb-hcd-xhci-test - check-qtest-i386-y += cpu-plug-test --check-qtest-i386-y += q35-test -+#check-qtest-i386-y += q35-test - check-qtest-i386-y += vmgenid-test - check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-swtpm-test - check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-test -@@ -90,7 +90,7 @@ check-qtest-mips64el-$(CONFIG_VGA) += display-vga-test - check-qtest-moxie-y += boot-serial-test - - check-qtest-ppc-$(CONFIG_ISA_TESTDEV) = endianness-test --check-qtest-ppc-y += boot-order-test -+#check-qtest-ppc-y += boot-order-test - check-qtest-ppc-y += prom-env-test - check-qtest-ppc-y += drive_del-test - check-qtest-ppc-y += boot-serial-test -@@ -104,8 +104,8 @@ check-qtest-ppc64-$(CONFIG_PSERIES) += rtas-test - check-qtest-ppc64-$(CONFIG_SLIRP) += pxe-test - check-qtest-ppc64-$(CONFIG_USB_UHCI) += usb-hcd-uhci-test - check-qtest-ppc64-$(CONFIG_USB_XHCI_NEC) += usb-hcd-xhci-test --check-qtest-ppc64-$(CONFIG_SLIRP) += test-netfilter --check-qtest-ppc64-$(CONFIG_POSIX) += test-filter-mirror -+#check-qtest-ppc64-$(CONFIG_SLIRP) += test-netfilter -+#check-qtest-ppc64-$(CONFIG_POSIX) += test-filter-mirror - check-qtest-ppc64-$(CONFIG_RTL8139_PCI) += test-filter-redirector - check-qtest-ppc64-$(CONFIG_VGA) += display-vga-test - check-qtest-ppc64-y += numa-test -@@ -154,7 +154,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += test-netfilter - check-qtest-s390x-$(CONFIG_POSIX) += test-filter-mirror - check-qtest-s390x-$(CONFIG_POSIX) += test-filter-redirector - check-qtest-s390x-y += drive_del-test --check-qtest-s390x-y += device-plug-test -+#check-qtest-s390x-y += device-plug-test - check-qtest-s390x-y += virtio-ccw-test - check-qtest-s390x-y += cpu-plug-test - check-qtest-s390x-y += migration-test diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c -index bfe7624dc6..1ffaa0bd7e 100644 +index b6b1c23..cefa1b3 100644 --- a/tests/qtest/boot-serial-test.c +++ b/tests/qtest/boot-serial-test.c @@ -120,19 +120,23 @@ static testdef_t tests[] = { @@ -165,11 +113,31 @@ index bfe7624dc6..1ffaa0bd7e 100644 { "x86_64", "q35", "-device sga", "SGABIOS" }, { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c +index 5af944a..cd5b8e0 100644 +--- a/tests/qtest/cdrom-test.c ++++ b/tests/qtest/cdrom-test.c +@@ -140,6 +140,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/isapc", "-M isapc " + "-drive if=ide,media=cdrom,file=", test_cdboot); + } ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_data_func("cdrom/boot/am53c974", + "-device am53c974 -device scsi-cd,drive=cd1 " + "-drive if=none,id=cd1,format=raw,file=", test_cdboot); +@@ -155,6 +156,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/megasas-gen2", "-M q35 " + "-device megasas-gen2 -device scsi-cd,drive=cd1 " + "-blockdev file,node-name=cd1,filename=", test_cdboot); ++#endif + } + + static void add_s390x_tests(void) diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c -index e8ffbbce4b..fda7269c82 100644 +index a1c6894..a8f0767 100644 --- a/tests/qtest/cpu-plug-test.c +++ b/tests/qtest/cpu-plug-test.c -@@ -181,8 +181,8 @@ static void add_pseries_test_case(const char *mname) +@@ -110,8 +110,8 @@ static void add_pseries_test_case(const char *mname) char *path; PlugTestData *data; @@ -181,7 +149,7 @@ index e8ffbbce4b..fda7269c82 100644 } data = g_new(PlugTestData, 1); diff --git a/tests/qtest/e1000-test.c b/tests/qtest/e1000-test.c -index c387984ef6..c89112d6f8 100644 +index ea286d1..a1847ac 100644 --- a/tests/qtest/e1000-test.c +++ b/tests/qtest/e1000-test.c @@ -22,9 +22,11 @@ struct QE1000 { @@ -197,7 +165,7 @@ index c387984ef6..c89112d6f8 100644 static void *e1000_get_driver(void *obj, const char *interface) diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c -index 48e8e02d6e..6496196b07 100644 +index f7b7cfb..99cccf8 100644 --- a/tests/qtest/hd-geo-test.c +++ b/tests/qtest/hd-geo-test.c @@ -737,6 +737,7 @@ static void test_override_ide(void) @@ -228,8 +196,57 @@ index 48e8e02d6e..6496196b07 100644 qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); qtest_add_func("hd-geo/override/scsi_hot_unplug", +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index c19f1c8..15ed460 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -51,16 +51,13 @@ qtests_i386 = \ + 'ide-test', + 'hd-geo-test', + 'boot-order-test', +- 'bios-tables-test', + 'rtc-test', + 'i440fx-test', +- 'fuzz-test', + 'fw_cfg-test', + 'device-plug-test', + 'drive_del-test', + 'tco-test', + 'cpu-plug-test', +- 'q35-test', + 'vmgenid-test', + 'migration-test', + 'test-x86-cpuid-compat', +@@ -111,17 +108,15 @@ qtests_moxie = [ 'boot-serial-test' ] + + qtests_ppc = \ + (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) + \ +- (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + \ +- ['boot-order-test', 'prom-env-test', 'boot-serial-test'] \ ++ (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + + qtests_ppc64 = \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) + \ + (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) + \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) + \ +- (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ ++ (slirp.found() ? ['pxe-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_UHCI') ? ['usb-hcd-uhci-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_XHCI_NEC') ? ['usb-hcd-xhci-test'] : []) + \ +- (config_host.has_key('CONFIG_POSIX') ? ['test-filter-mirror'] : []) + \ + qtests_pci + ['migration-test', 'numa-test', 'cpu-plug-test', 'drive_del-test'] + + qtests_sh4 = (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +@@ -164,7 +159,6 @@ qtests_s390x = \ + (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ + ['boot-serial-test', + 'drive_del-test', +- 'device-plug-test', + 'virtio-ccw-test', + 'cpu-plug-test', + 'migration-test'] diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c -index 60e6ec3153..f9d6adcfe9 100644 +index f41d801..f8dc478 100644 --- a/tests/qtest/prom-env-test.c +++ b/tests/qtest/prom-env-test.c @@ -89,10 +89,14 @@ int main(int argc, char *argv[]) @@ -248,7 +265,7 @@ index 60e6ec3153..f9d6adcfe9 100644 add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c -index 7ca1883a29..983aa0719a 100644 +index 7ca1883..983aa07 100644 --- a/tests/qtest/test-x86-cpuid-compat.c +++ b/tests/qtest/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -268,7 +285,7 @@ index 7ca1883a29..983aa0719a 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c -index 10ef9d2a91..3855873050 100644 +index 10ef9d2..3855873 100644 --- a/tests/qtest/usb-hcd-xhci-test.c +++ b/tests/qtest/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) @@ -298,5 +315,5 @@ index 10ef9d2a91..3855873050 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -2.27.0 +1.8.3.1 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index 428a3f9..d57621f 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 08d751996b6c983d037134551f1c4b4691989168 Mon Sep 17 00:00:00 2001 +From 3bf885effef5666a13145e7942116ed9ba5039bb Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,20 +32,16 @@ Merged patches (2.9.0): Merged patches (4.1.0-rc3): - 2b89558 vfio: increase the cap on number of assigned devices to 64 - -(cherry picked from commit 9fa3c9fc6dfcde76d80db1aa601b2d577f72ceec) -(cherry picked from commit 3cb35556dc7d994f203d732fe952f95fcdb03c0a) -Signed-off-by: Danilo C. L. de Paula --- hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 2e561c06d6..be1b843e7f 100644 +index 58c0ce8..558eea8 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -47,6 +47,9 @@ +@@ -45,6 +45,9 @@ #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" @@ -55,7 +51,7 @@ index 2e561c06d6..be1b843e7f 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2712,9 +2715,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2768,9 +2771,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -87,7 +83,7 @@ index 2e561c06d6..be1b843e7f 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3158,6 +3182,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3202,6 +3226,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -98,10 +94,10 @@ index 2e561c06d6..be1b843e7f 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 0da7a20a7e..5d2b0d2f2d 100644 +index 1574ef9..fef907c 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h -@@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice { +@@ -139,6 +139,7 @@ struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); @@ -110,5 +106,5 @@ index 0da7a20a7e..5d2b0d2f2d 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.27.0 +1.8.3.1 diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index 2e97d83..d123653 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 5ee503bc80a271f9b0fcf3d24df42ee3f8c7d687 Mon Sep 17 00:00:00 2001 +From 107ded716aa28243015b41940e660fe72dd4d3de Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -16,18 +16,15 @@ Add support statement to -help output, reporting direct qemu-kvm usage as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost -(cherry picked from commit 2a07700936e39856cc9f149c6a6517f0715536a6) -(cherry picked from commit 5dd2f4706e2fef945771949e59a8fcc1b5452de9) -Signed-off-by: Danilo C. L. de Paula --- softmmu/vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index a1fb06dca0..3c383911cd 100644 +index a711644..f951684 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -1686,9 +1686,17 @@ static void version(void) +@@ -1687,9 +1687,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -45,7 +42,7 @@ index a1fb06dca0..3c383911cd 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1705,6 +1713,7 @@ static void help(int exitcode) +@@ -1706,6 +1714,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -54,5 +51,5 @@ index a1fb06dca0..3c383911cd 100644 } -- -2.27.0 +1.8.3.1 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index a6ee268..eed1259 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From edf9e38a2ef58908f45e37a63746926a494f9057 Mon Sep 17 00:00:00 2001 +From e3883f9d6e74843fd14d44cdf2d36b35123347a3 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -25,67 +25,23 @@ Merged patches (2.11.0): - 0584216921 Match POWER max cpus to x86 Signed-off-by: Andrew Jones -(cherry picked from commit a4ceb63bdc5cbac19f5f633ec761b9de0dedb55e) -(cherry picked from commit a1f26d85171b4d554225150053700e93ba6eba10) -redhat: globally limit the maximum number of CPUs +Merged patches (5.1.0): +- redhat: globally limit the maximum number of CPUs +- redhat: remove manual max_cpus limitations for ppc +- use recommended max vcpu count -RH-Author: David Hildenbrand -Message-id: <20180109103253.24517-2-david@redhat.com> -Patchwork-id: 78531 -O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 1/2] redhat: globally limit the maximum number of CPUs -Bugzilla: 1527449 -RH-Acked-by: David Gibson -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -Upstream-status: n/a - -For RHEL, we support 240, for RHV up to 384 VCPUs. Let's limit this -globally instead of fixing up all machines. This way, we can easily -change (increase) the product specific levels later. - -Signed-off-by: David Hildenbrand -Signed-off-by: Miroslav Rezanina - -redhat: remove manual max_cpus limitations for ppc - -RH-Author: David Hildenbrand -Message-id: <20180109103253.24517-3-david@redhat.com> -Patchwork-id: 78532 -O-Subject: [RHEL-7.5 qemu-kvm-ma PATCH v2 2/2] redhat: remove manual max_cpus limitations for ppc -Bugzilla: 1527449 -RH-Acked-by: David Gibson -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -Upstream-status: n/a - -RH-Author: Andrew Jones -Message-id: <1390301212-15344-1-git-send-email-drjones@redhat.com> -Patchwork-id: 56862 -O-Subject: [RHEL7.0 qemu-kvm PATCH v6] use recommended max vcpu count -Bugzilla: 998708 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Marcelo Tosatti - -The recommended vcpu max limit (KVM_CAP_NR_VCPUS) should be used instead -of the actual max vcpu limit (KVM_CAP_MAX_VCPUS) to give an error. - -This commit matches the limit to current KVM_CAP_NR_VCPUS value. - -Signed-off-by: Danilo C. L. de Paula +Merged patches (5.2.0 rc0): +- f8a4123 vl: Remove downstream-only MAX_RHEL_CPUS code --- accel/kvm/kvm-all.c | 12 ++++++++++++ - softmmu/vl.c | 18 ++++++++++++++++++ - 2 files changed, 30 insertions(+) + 1 file changed, 12 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 63ef6af9a1..db9a6b38b1 100644 +index 9ef5daf..4fab04d 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -2088,6 +2088,18 @@ static int kvm_init(MachineState *ms) +@@ -2108,6 +2108,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -104,49 +60,6 @@ index 63ef6af9a1..db9a6b38b1 100644 while (nc->name) { if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " -diff --git a/softmmu/vl.c b/softmmu/vl.c -index 3c383911cd..62fc7c898f 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -116,6 +116,8 @@ - - #define MAX_VIRTIO_CONSOLES 1 - -+#define RHEL_MAX_CPUS 384 -+ - static const char *data_dir[16]; - static int data_dir_idx; - const char *bios_name = NULL; -@@ -1189,6 +1191,20 @@ static MachineClass *find_default_machine(GSList *machines) - return default_machineclass; - } - -+/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ -+static void limit_max_cpus_in_machines(void) -+{ -+ GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); -+ -+ for (el = machines; el; el = el->next) { -+ MachineClass *mc = el->data; -+ -+ if (mc->max_cpus > RHEL_MAX_CPUS) { -+ mc->max_cpus = RHEL_MAX_CPUS; -+ } -+ } -+} -+ - static int machine_help_func(QemuOpts *opts, MachineState *machine) - { - ObjectProperty *prop; -@@ -3845,6 +3861,8 @@ void qemu_init(int argc, char **argv, char **envp) - "mutually exclusive"); - exit(EXIT_FAILURE); - } -+ /* Maximum number of CPUs limited for Red Hat Enterprise Linux */ -+ limit_max_cpus_in_machines(); - - configure_rtc(qemu_find_opts_singleton("rtc")); - -- -2.27.0 +1.8.3.1 diff --git a/0016-Add-support-for-simpletrace.patch b/0016-Add-support-for-simpletrace.patch deleted file mode 100644 index 3c79abb..0000000 --- a/0016-Add-support-for-simpletrace.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 9238ce7ba819979c2df2caa49c5db3185376fedd Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 8 Oct 2015 09:50:17 +0200 -Subject: Add support for simpletrace - -As simpletrace is upstream, we just need to properly handle it during rpmbuild. - -Signed-off-by: Miroslav Rezanina - -Rebase notes (3.1.0): -- Fixed python 2 to python3 switch - -Rebase notes (2.9.0): -- Added group argument for tracetool.py (upstream) - -Rebase notes (2.8.0): -- Changed tracetool.py parameters - -Rebase notes (weekly-200219): -- Removed python shenigan (done upstream) - -Merged patches (2.3.0): -- db959d6 redhat/qemu-kvm.spec.template: Install qemu-kvm-simpletrace.stp -- 5292fc3 trace: add SystemTap init scripts for simpletrace bridge -- eda9e5e simpletrace: install simpletrace.py -- 85c4c8f trace: add systemtap-initscript README file to RPM - -Signed-off-by: Danilo C. L. de Paula ---- - .gitignore | 2 ++ - Makefile | 4 +++ - README.systemtap | 43 +++++++++++++++++++++++++ - redhat/qemu-kvm.spec.template | 25 +++++++++++++- - scripts/systemtap/conf.d/qemu_kvm.conf | 4 +++ - scripts/systemtap/script.d/qemu_kvm.stp | 1 + - 6 files changed, 78 insertions(+), 1 deletion(-) - create mode 100644 README.systemtap - create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf - create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp - -diff --git a/Makefile b/Makefile -index 42e854b2b1..3b9ff25f10 100644 ---- a/Makefile -+++ b/Makefile -@@ -996,6 +996,10 @@ endif - $(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \ - done - $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all" -+ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/script.d" -+ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/script.d/qemu_kvm.stp "$(DESTDIR)$(qemu_datadir)/systemtap/script.d/" -+ $(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d" -+ $(INSTALL_DATA) $(SRC_PATH)/scripts/systemtap/conf.d/qemu_kvm.conf "$(DESTDIR)$(qemu_datadir)/systemtap/conf.d/" - - .PHONY: ctags - ctags: -diff --git a/README.systemtap b/README.systemtap -new file mode 100644 -index 0000000000..ad913fc990 ---- /dev/null -+++ b/README.systemtap -@@ -0,0 +1,43 @@ -+QEMU tracing using systemtap-initscript -+--------------------------------------- -+ -+You can capture QEMU trace data all the time using systemtap-initscript. This -+uses SystemTap's flight recorder mode to trace all running guests to a -+fixed-size buffer on the host. Old trace entries are overwritten by new -+entries when the buffer size wraps. -+ -+1. Install the systemtap-initscript package: -+ # yum install systemtap-initscript -+ -+2. Install the systemtap scripts and the conf file: -+ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ -+ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ -+ -+The set of trace events to enable is given in qemu_kvm.stp. This SystemTap -+script can be customized to add or remove trace events provided in -+/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. -+ -+SystemTap customizations can be made to qemu_kvm.conf to control the flight -+recorder buffer size and whether to store traces in memory only or disk too. -+See stap(1) for option documentation. -+ -+3. Start the systemtap service. -+ # service systemtap start qemu_kvm -+ -+4. Make the service start at boot time. -+ # chkconfig systemtap on -+ -+5. Confirm that the service works. -+ # service systemtap status qemu_kvm -+ qemu_kvm is running... -+ -+When you want to inspect the trace buffer, perform the following steps: -+ -+1. Dump the trace buffer. -+ # staprun -A qemu_kvm >/tmp/trace.log -+ -+2. Start the systemtap service because the preceding step stops the service. -+ # service systemtap start qemu_kvm -+ -+3. Translate the trace record to readable format. -+ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log -diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf -new file mode 100644 -index 0000000000..372d8160a4 ---- /dev/null -+++ b/scripts/systemtap/conf.d/qemu_kvm.conf -@@ -0,0 +1,4 @@ -+# Force load uprobes (see BZ#1118352) -+stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true -+ -+qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes -diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp -new file mode 100644 -index 0000000000..c04abf9449 ---- /dev/null -+++ b/scripts/systemtap/script.d/qemu_kvm.stp -@@ -0,0 +1 @@ -+probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} --- -2.27.0 - diff --git a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch similarity index 90% rename from 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename to 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 4ca0022..96ac311 100644 --- a/0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 44c255eddd3096d63ebdc055181d3fcebe202eef Mon Sep 17 00:00:00 2001 +From 4f1d9fa771f3932ab14319a9df8cb37e1c9f7547 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -17,15 +17,11 @@ to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina ---- - -Rebase notes (weekly-200129): +Rebase notes (5.1.0 rc0): - qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) -Rebase notes (weekly-200708 +Rebase notes (5.2.0 rc0): - rewrite patch to new docs structure - -Signed-off-by: Danilo C. L. de Paula --- docs/defs.rst.inc | 4 ++-- docs/interop/live-block-operations.rst | 4 ++-- @@ -35,7 +31,7 @@ Signed-off-by: Danilo C. L. de Paula 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc -index 48d05aaf33..d74dbdeca9 100644 +index 48d05aa..d74dbde 100644 --- a/docs/defs.rst.inc +++ b/docs/defs.rst.inc @@ -9,7 +9,7 @@ @@ -49,14 +45,14 @@ index 48d05aaf33..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst -index 48afdc7927..6650b2c975 100644 +index e13f5a2..6650b2c 100644 --- a/docs/interop/live-block-operations.rst +++ b/docs/interop/live-block-operations.rst @@ -129,7 +129,7 @@ To show some example invocations of command-line, we will use the following invocation of QEMU, with a QMP server running over UNIX socket:: -- $ ./x86_64-softmmu/qemu-system-x86_64 -display none -no-user-config \ +- $ ./qemu-system-x86_64 -display none -no-user-config \ + $ qemu-kvm -display none -no-user-config \ -M q35 -nodefaults -m 512 \ -blockdev node-name=node-A,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./a.qcow2 \ @@ -65,13 +61,13 @@ index 48afdc7927..6650b2c975 100644 simplicity's sake, the destination QEMU is started on the same host, but it could be located elsewhere):: -- $ ./x86_64-softmmu/qemu-system-x86_64 -display none -no-user-config \ +- $ ./qemu-system-x86_64 -display none -no-user-config \ + $ qemu-kvm -display none -no-user-config \ -M q35 -nodefaults -m 512 \ -blockdev node-name=node-TargetDisk,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./target-disk.qcow2 \ -device virtio-blk,drive=node-TargetDisk,id=virtio0 \ diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst -index fb70445c75..0d9a783112 100644 +index fb70445..0d9a783 100644 --- a/docs/tools/qemu-trace-stap.rst +++ b/docs/tools/qemu-trace-stap.rst @@ -45,19 +45,19 @@ The following commands are valid: @@ -129,10 +125,10 @@ index fb70445c75..0d9a783112 100644 See also -------- diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst -index 824e713491..8449936c63 100644 +index 34a9e40..50fec10 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst -@@ -116,7 +116,7 @@ Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket +@@ -297,7 +297,7 @@ Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket :: host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001 @@ -142,10 +138,10 @@ index 824e713491..8449936c63 100644 -device vhost-user-fs-pci,chardev=char0,tag=myfs \ -object memory-backend-memfd,id=mem,size=4G,share=on \ diff --git a/qemu-options.hx b/qemu-options.hx -index 1700205035..697276859b 100644 +index 53472fd..fde1a62 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2900,11 +2900,11 @@ SRST +@@ -2935,11 +2935,11 @@ SRST :: @@ -163,5 +159,5 @@ index 1700205035..697276859b 100644 ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` Establish a vhost-vdpa netdev. -- -2.27.0 +1.8.3.1 diff --git a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch similarity index 87% rename from 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename to 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 269d357..95964c2 100644 --- a/0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 63a330ad8eeb2025c12d56714cb1271196f80dbe Mon Sep 17 00:00:00 2001 +From b8b6ddabd9482c454a68fabe51660fda6a13b0b4 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -36,19 +36,16 @@ crashing. Signed-off-by: Fam Zheng Signed-off-by: Miroslav Rezanina -(cherry picked from commit b0caf00bbc35c7d89e02999bdce86e1f867728e8) -(cherry picked from commit c9c4f117d8b507c2f86035c282d537c0a327364f) -(cherry picked from commit 5d586bb2543337f0ff172c6ce942dba3acbcedff) Signed-off-by: Danilo C. L. de Paula --- hw/scsi/virtio-scsi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index b49775269e..d3a64a6582 100644 +index 3db9a8a..82c0251 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c -@@ -814,6 +814,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -823,6 +823,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, SCSIDevice *sd = SCSI_DEVICE(dev); int ret; @@ -65,5 +62,5 @@ index b49775269e..d3a64a6582 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -2.27.0 +1.8.3.1 diff --git a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch similarity index 92% rename from 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename to 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 00b6ba7..7e3c2b6 100644 --- a/0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 9ff2cc74cd1f41abc4b379fbaaaaa03a30415494 Mon Sep 17 00:00:00 2001 +From 90dad3577e6873e23eb99c1b55c9e1f8fe0e1e16 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,10 +32,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 3225fc5a2e..08ae7724d5 100644 +index 9341e97..f11428e 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c -@@ -326,12 +326,19 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, +@@ -333,12 +333,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { @@ -56,5 +56,5 @@ index 3225fc5a2e..08ae7724d5 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -2.27.0 +1.8.3.1 diff --git a/0018-usb-xhci-Fix-PCI-capability-order.patch b/0018-usb-xhci-Fix-PCI-capability-order.patch deleted file mode 100644 index 7549bdb..0000000 --- a/0018-usb-xhci-Fix-PCI-capability-order.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 5bb61f2b69d9880dea36d604719ee7bd4cf74ba6 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Fri, 5 May 2017 19:06:14 +0200 -Subject: usb-xhci: Fix PCI capability order - -RH-Author: Dr. David Alan Gilbert -Message-id: <20170505190614.15987-2-dgilbert@redhat.com> -Patchwork-id: 75038 -O-Subject: [RHEL-7.4 qemu-kvm-rhev PATCH 1/1] usb-xhci: Fix PCI capability order -Bugzilla: 1447874 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Juan Quintela - -From: "Dr. David Alan Gilbert" - -Upstream commit 1108b2f8a9 in 2.7.0 changed the order -of the PCI capability chain in the XHCI pci device in the case -where the device has the PCIe endpoint capability (i.e. only -older machine types, pc-i440fx-2.0 upstream, pc-i440fx-rhel7.0.0 -apparently for us). - -Changing the order breaks migration compatibility; fixing this -upstream would mean breaking the same case going from 2.7.0->current -that currently works 2.7.0->2.9.0 - so upstream it's a choice -of two breakages. - -Since we never released 2.7.0/2.8.0 we can fix this downstream. - -This reverts the order so that we create the capabilities in the -order: - PCIe - MSI - MSI-X - -The symptom is: -qemu-kvm: get_pci_config_device: Bad config data: i=0x71 read: a0 device: 0 cmask: ff wmask: 0 w1cmask:0 -qemu-kvm: Failed to load PCIDevice:config -qemu-kvm: Failed to load xhci:parent_obj -qemu-kvm: error while loading state for instance 0x0 of device '0000:00:0d.0/xhci' -qemu-kvm: load of migration failed: Invalid argument - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Miroslav Rezanina - --- -Rebase notes (2.9.0): -- Change in assert condition (upstream) - -(cherry picked from commit aad727a5ecde1ad4935eb8427604d4df5a1f1f35) -(cherry picked from commit 2dd7402227e77d748a7375233ac9e7feab244bda) - -Conflicts: - hw/usb/hcd-xhci.c - -(cherry picked from commit a42f86dc906cc7d2c16d02bf125ed76847b469cb) -(cherry picked from commit 992ab2e4f6e15d3e51bc716763aa8d6f45c6d29d) -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/hcd-xhci.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 38bdfaf3fd..4acd7842ac 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -3413,6 +3413,12 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) - xhci->max_pstreams_mask = 0; - } - -+ if (pci_bus_is_express(pci_get_bus(dev)) || -+ xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { -+ ret = pcie_endpoint_cap_init(dev, 0xa0); -+ assert(ret > 0); -+ } -+ - if (xhci->msi != ON_OFF_AUTO_OFF) { - ret = msi_init(dev, 0x70, xhci->numintrs, true, false, &err); - /* Any error other than -ENOTSUP(board's MSI support is broken) -@@ -3461,12 +3467,6 @@ static void usb_xhci_realize(struct PCIDevice *dev, Error **errp) - PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64, - &xhci->mem); - -- if (pci_bus_is_express(pci_get_bus(dev)) || -- xhci_get_flag(xhci, XHCI_FLAG_FORCE_PCIE_ENDCAP)) { -- ret = pcie_endpoint_cap_init(dev, 0xa0); -- assert(ret > 0); -- } -- - if (xhci->msix != ON_OFF_AUTO_OFF) { - /* TODO check for errors, and should fail when msix=on */ - msix_init(dev, xhci->numintrs, --- -2.27.0 - diff --git a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch similarity index 93% rename from 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch rename to 0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index eaaf5a3..f768d26 100644 --- a/0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,4 +1,4 @@ -From e27f3d72ac38e37758d4ea5b84bccf03cab8219a Mon Sep 17 00:00:00 2001 +From 35b0411d0de4e3e8ef4fb4cef9ee2e8f8ef836e6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 Subject: block: Versioned x-blockdev-reopen API with feature flag @@ -29,10 +29,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json -index 197bdc1c36..efc0f6a377 100644 +index 04ad80b..2a7dca2 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json -@@ -4118,10 +4118,17 @@ +@@ -4143,10 +4143,17 @@ # image does not have a default backing file name as part of its # metadata. # @@ -52,5 +52,5 @@ index 197bdc1c36..efc0f6a377 100644 ## # @blockdev-del: -- -2.27.0 +1.8.3.1 diff --git a/0020-Upstream.patch b/0020-Upstream.patch new file mode 100644 index 0000000..f7ca399 --- /dev/null +++ b/0020-Upstream.patch @@ -0,0 +1,37 @@ +From 36ad7c726f12e4b706eebc7bac185cd91646dbc3 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 11 Nov 2020 15:41:44 +0100 +Subject: Upstream + +--- + default-configs/devices/aarch64-rh-devices.mak | 1 - + hw/arm/Kconfig | 1 + + 2 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak +index 9831940..d289f34 100644 +--- a/default-configs/devices/aarch64-rh-devices.mak ++++ b/default-configs/devices/aarch64-rh-devices.mak +@@ -1,7 +1,6 @@ + include rh-virtio.mak + + CONFIG_ARM_GIC_KVM=y +-CONFIG_ARM_GIC=y + CONFIG_ARM_SMMUV3=y + CONFIG_ARM_V7M=y + CONFIG_ARM_VIRT=y +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index 7d022ee..e69a900 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -6,6 +6,7 @@ config ARM_VIRT + imply VFIO_PLATFORM + imply VFIO_XGMAC + imply TPM_TIS_SYSBUS ++ select ARM_GIC + select ACPI + select ARM_SMMUV3 + select GPIO_KEY +-- +1.8.3.1 + diff --git a/0021-RHEL-9-test.patch b/0021-RHEL-9-test.patch new file mode 100644 index 0000000..611078f --- /dev/null +++ b/0021-RHEL-9-test.patch @@ -0,0 +1,33 @@ +From c7b7fffb5ad743115dac8918200a848513acbd4e Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 13 Nov 2020 11:03:26 +0100 +Subject: RHEL 9 test + +--- + redhat/Makefile.common | 4 ++-- + redhat/qemu-kvm.spec.template | 8 -------- + tests/meson.build | 3 +-- + 3 files changed, 3 insertions(+), 12 deletions(-) + +diff --git a/tests/meson.build b/tests/meson.build +index afeb6be..2180fc4 100644 +--- a/tests/meson.build ++++ b/tests/meson.build +@@ -136,7 +136,7 @@ if have_block + 'test-blockjob': [testblock], + 'test-blockjob-txn': [testblock], + 'test-block-backend': [testblock], +- 'test-block-iothread': [testblock], ++# 'test-block-iothread': [testblock], + 'test-write-threshold': [testblock], + 'test-crypto-hash': [crypto], + 'test-crypto-hmac': [crypto], +@@ -286,5 +286,4 @@ if not get_option('tcg').disabled() + endif + + subdir('qapi-schema') +-subdir('qtest') + subdir('migration') +-- +1.8.3.1 + diff --git a/0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch b/0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch deleted file mode 100644 index 7bafd66..0000000 --- a/0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch +++ /dev/null @@ -1,45 +0,0 @@ -From f70eb50b7107ee4e18cec3561bbdde1cbd0a0bdb Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Mon, 27 Jul 2020 13:29:01 +0200 -Subject: RHEL-only: Enable vTPM for POWER in downstream configs - -RH-Author: David Gibson -Message-id: <20200522032718.387731-1-dgibson@redhat.com> -Patchwork-id: 96743 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] RHEL-only: Enable vTPM for POWER in downstream configs -Bugzilla: 1781911 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Thomas Huth -RH-Acked-by: Greg Kurz - -From: David Gibson - -With the rebase to 5.0 we have the necessary code to implement virtual TPMs -for POWER targets. However, it's not enabled in the Red Hat configuration. -This downstream only patch corrects that. - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1781911 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=28742072 -Testing: With brewed qemu was able to see a vTPM device in a guest - -Signed-off-by: David Gibson -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - default-configs/ppc64-rh-devices.mak | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/default-configs/ppc64-rh-devices.mak b/default-configs/ppc64-rh-devices.mak -index ecbe53fe63..032422e103 100644 ---- a/default-configs/ppc64-rh-devices.mak -+++ b/default-configs/ppc64-rh-devices.mak -@@ -32,3 +32,6 @@ CONFIG_XICS_SPAPR=y - CONFIG_XIVE=y - CONFIG_XIVE_SPAPR=y - CONFIG_XIVE_KVM=y -+CONFIG_TPM_SPAPR=y -+CONFIG_TPM_EMULATOR=y -+CONFIG_TPM_PASSTHROUGH=y --- -2.27.0 - diff --git a/0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch b/0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch deleted file mode 100644 index 7489bf1..0000000 --- a/0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 69d8ae7ad5314e465c24fdeb1317751fa3e50ceb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 27 Jul 2020 13:29:01 +0200 -Subject: redhat: fix 5.0 rebase missing ISA TPM TIS -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20200608155426.112078-1-marcandre.lureau@redhat.com> -Patchwork-id: 97457 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] redhat: fix 5.0 rebase missing ISA TPM TIS -Bugzilla: 1841529 -RH-Acked-by: Auger Eric -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Danilo de Paula - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1841529 -BRANCH: rhel-av-8.3.0-preview-2020-04-29 -UPSTREAM: N/A -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=29172313 - -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - default-configs/x86_64-rh-devices.mak | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/default-configs/x86_64-rh-devices.mak b/default-configs/x86_64-rh-devices.mak -index d59b6d9bb5..1469e05382 100644 ---- a/default-configs/x86_64-rh-devices.mak -+++ b/default-configs/x86_64-rh-devices.mak -@@ -95,6 +95,6 @@ CONFIG_WDT_IB6300ESB=y - CONFIG_WDT_IB700=y - CONFIG_XIO3130=y - CONFIG_TPM_CRB=y --CONFIG_TPM_TIS=y -+CONFIG_TPM_TIS_ISA=y - CONFIG_TPM_EMULATOR=y - CONFIG_TPM_PASSTHROUGH=y --- -2.27.0 - diff --git a/0024-redhat-define-hw_compat_8_2.patch b/0024-redhat-define-hw_compat_8_2.patch deleted file mode 100644 index c31fdde..0000000 --- a/0024-redhat-define-hw_compat_8_2.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 83486421151111ec118cc703819bd4764fea677e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jul 2020 13:29:01 +0200 -Subject: redhat: define hw_compat_8_2 - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200619154227.23845-2-dgilbert@redhat.com> -Patchwork-id: 97662 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 1/2] redhat: define hw_compat_8_2 -Bugzilla: 1842902 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Laurent Vivier -RH-Acked-by: Stefan Hajnoczi - -From: Laurent Vivier - -Signed-off-by: Laurent Vivier -Signed-off-by: Dr. David Alan Gilbert - For minor fix - -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 28 ++++++++++++++++++++++++++++ - include/hw/boards.h | 3 +++ - 2 files changed, 31 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 2b8e480040..5476af98e1 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -28,6 +28,34 @@ - #include "hw/mem/nvdimm.h" - #include "migration/vmstate.h" - -+/* -+ * The same as hw_compat_4_2 -+ */ -+GlobalProperty hw_compat_rhel_8_2[] = { -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "virtio-blk-device", "queue-size", "128"}, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "virtio-scsi-device", "virtqueue_size", "128"}, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "virtio-blk-device", "seg-max-adjust", "off"}, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "virtio-scsi-device", "seg_max_adjust", "off"}, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "vhost-blk-device", "seg_max_adjust", "off"}, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "usb-host", "suppress-remote-wake", "off" }, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "usb-redir", "suppress-remote-wake", "off" }, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "qxl", "revision", "4" }, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "qxl-vga", "revision", "4" }, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "fw_cfg", "acpi-mr-restore", "false" }, -+}; -+const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); - /* - * The same as hw_compat_4_1 - */ -diff --git a/include/hw/boards.h b/include/hw/boards.h -index c3577319c0..f918a15c66 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -375,6 +375,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_8_2[]; -+extern const size_t hw_compat_rhel_8_2_len; -+ - extern GlobalProperty hw_compat_rhel_8_1[]; - extern const size_t hw_compat_rhel_8_1_len; - --- -2.27.0 - diff --git a/0025-x86-Add-8.3.0-x86_64-machine-type.patch b/0025-x86-Add-8.3.0-x86_64-machine-type.patch deleted file mode 100644 index 70da7fa..0000000 --- a/0025-x86-Add-8.3.0-x86_64-machine-type.patch +++ /dev/null @@ -1,119 +0,0 @@ -From b02c9f5373f6ffa65b8ddbdee32d6ed4e59198ad Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 27 Jul 2020 13:29:01 +0200 -Subject: x86: Add 8.3.0 x86_64 machine type - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200619154227.23845-3-dgilbert@redhat.com> -Patchwork-id: 97663 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 2/2] x86: Add 8.3.0 x86_64 machine type -Bugzilla: 1842902 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Laurent Vivier -RH-Acked-by: Stefan Hajnoczi - -From: "Dr. David Alan Gilbert" - -Not much change, just the smbase-smram. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc.c | 6 ++++++ - hw/i386/pc_piix.c | 4 ++++ - hw/i386/pc_q35.c | 22 ++++++++++++++++++++++ - include/hw/i386/pc.h | 3 +++ - 4 files changed, 35 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 063f01d19a..a75e0137ab 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -361,6 +361,12 @@ GlobalProperty pc_rhel_compat[] = { - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_8_2_compat[] = { -+ /* pc_rhel_8_2_compat from pc_compat_4_2 */ -+ { "mch", "smbase-smram", "off" }, -+}; -+const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat); -+ - /* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ - GlobalProperty pc_rhel_8_1_compat[] = { }; - const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 6d935645b6..4af4497a0c 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1030,6 +1030,10 @@ static void pc_machine_rhel760_options(MachineClass *m) - m->smbus_no_migration_support = true; - pcmc->pvh_enabled = false; - pcmc->default_cpu_version = CPU_VERSION_LEGACY; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_2, -+ hw_compat_rhel_8_2_len); -+ compat_props_add(m->compat_props, pc_rhel_8_2_compat, -+ pc_rhel_8_2_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); - compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index f4edb049d6..d75d6d8805 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -589,6 +589,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - -+static void pc_q35_init_rhel830(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel830_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL-AV"; -+ pcmc->smbios_stream_version = "8.3.0"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, -+ pc_q35_machine_rhel830_options); -+ - static void pc_q35_init_rhel820(MachineState *machine) - { - pc_q35_init(machine); -@@ -599,8 +616,13 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_rhel_options(m); - m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.2.0"; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_2, -+ hw_compat_rhel_8_2_len); -+ compat_props_add(m->compat_props, pc_rhel_8_2_compat, -+ pc_rhel_8_2_compat_len); - } - - DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 156be22995..e9dc8c370c 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -271,6 +271,9 @@ extern const size_t pc_compat_1_4_len; - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_8_2_compat[]; -+extern const size_t pc_rhel_8_2_compat_len; -+ - extern GlobalProperty pc_rhel_8_1_compat[]; - extern const size_t pc_rhel_8_1_compat_len; - --- -2.27.0 - diff --git a/0027-hw-arm-Changes-to-rhel820-machine.patch b/0027-hw-arm-Changes-to-rhel820-machine.patch deleted file mode 100644 index 84c289c..0000000 --- a/0027-hw-arm-Changes-to-rhel820-machine.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 12990ad9479216d96e4d67a7e613d2ef3b4fb700 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 12 Aug 2020 10:58:04 +0200 -Subject: hw/arm: Changes to rhel820 machine - -RH-Author: Gavin Shan -Message-id: <20200630013648.101937-1-gshan@redhat.com> -Patchwork-id: 97844 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v5 1/3] hw/arm: Changes to rhel820 machine -Bugzilla: 1818843 -RH-Acked-by: Auger Eric -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Andrew Jones - -This applies two changes to rhel820 machine: - - * Set the gic version to VIRT_GIC_VERSION_NOSEL by default, which - doesn't cause functional changes. - * Disallow to configure the RAS property, which is hidden by default. - -Signed-off-by: Gavin Shan -RH-Acked-by: Auger Eric -RH-Acked-by: Andrew Jones -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 17 +++++------------ - 1 file changed, 5 insertions(+), 12 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e1a17e7c87..c22e1e6d5c 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2105,6 +2105,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, - visit_type_OnOffAuto(v, name, &vms->acpi, errp); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_ras(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2125,6 +2126,7 @@ static bool virt_get_mte(Object *obj, Error **errp) - - return vms->mte; - } -+#endif /* disabled for RHEL */ - - static void virt_set_mte(Object *obj, bool value, Error **errp) - { -@@ -2802,12 +2804,8 @@ static void rhel820_virt_instance_init(Object *obj) - object_property_set_description(obj, "highmem", - "Set on/off to enable/disable using " - "physical address space above 32 bits"); -- /* -- * Default GIC type is still v2, but became configurable for RHEL. We -- * keep v2 instead of max as TCG CI test cases require an MSI controller -- * and there is no userspace ITS MSI emulation available. -- */ -- vms->gic_version = 2; -+ -+ vms->gic_version = VIRT_GIC_VERSION_NOSEL; - object_property_add_str(obj, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_property_set_description(obj, "gic-version", -@@ -2834,13 +2832,8 @@ static void rhel820_virt_instance_init(Object *obj) - object_property_set_description(obj, "iommu", - "Set the IOMMU type. " - "Valid values are none and smmuv3"); -- vms->ras = false; -- object_property_add_bool(obj, "ras", virt_get_ras, -- virt_set_ras); -- object_property_set_description(obj, "ras", -- "Set on/off to enable/disable reporting host memory errors " -- "to a KVM guest using ACPI and guest external abort exceptions"); - -+ vms->ras = false; - /* MTE is disabled by default. */ - vms->mte = false; - --- -2.27.0 - diff --git a/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch b/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch deleted file mode 100644 index 04d0eda..0000000 --- a/0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 46d5a797986373ecc0dfa578cae07a3641847935 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 12 Aug 2020 10:58:04 +0200 -Subject: hw/arm: Introduce rhel_virt_instance_init() helper - -RH-Author: Gavin Shan -Message-id: <20200629022939.76453-3-gshan@redhat.com> -Patchwork-id: 97838 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v4 2/3] hw/arm: Introduce rhel_virt_instance_init() helper -Bugzilla: 1818843 -RH-Acked-by: Auger Eric -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Andrew Jones - -This introduces rhel_virt_instance_init() helper function so that -it can be shared by rhel820 and rhel830 machine. This shouldn't -cause functional changes. - -Signed-off-by: Gavin Shan -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index c22e1e6d5c..650668a8d1 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2788,7 +2788,7 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - --static void rhel820_virt_instance_init(Object *obj) -+static void rhel_virt_instance_init(Object *obj) - { - VirtMachineState *vms = VIRT_MACHINE(obj); - VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); -@@ -2841,6 +2841,11 @@ static void rhel820_virt_instance_init(Object *obj) - virt_flash_create(vms); - } - -+static void rhel820_virt_instance_init(Object *obj) -+{ -+ rhel_virt_instance_init(obj); -+} -+ - static void rhel820_virt_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); --- -2.27.0 - diff --git a/0029-hw-arm-Add-rhel830-machine-type.patch b/0029-hw-arm-Add-rhel830-machine-type.patch deleted file mode 100644 index af129dc..0000000 --- a/0029-hw-arm-Add-rhel830-machine-type.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 098954acda750a54d2eb512297bcd205212ee718 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 12 Aug 2020 10:58:04 +0200 -Subject: hw/arm: Add rhel830 machine type - -RH-Author: Gavin Shan -Message-id: <20200630014756.102753-1-gshan@redhat.com> -Patchwork-id: 97845 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH v5 3/3] hw/arm: Add rhel830 machine type -Bugzilla: 1818843 -RH-Acked-by: Auger Eric -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Andrew Jones - -This adds rhel830 machine type, whose properties are same as to -rhel820. - -Signed-off-by: Gavin Shan -RH-Acked-by: Auger Eric -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 17 +++++++++++++++-- - 1 file changed, 15 insertions(+), 2 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 650668a8d1..48b58be597 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2841,6 +2841,17 @@ static void rhel_virt_instance_init(Object *obj) - virt_flash_create(vms); - } - -+static void rhel830_virt_instance_init(Object *obj) -+{ -+ rhel_virt_instance_init(obj); -+} -+ -+static void rhel830_virt_options(MachineClass *mc) -+{ -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) -+ - static void rhel820_virt_instance_init(Object *obj) - { - rhel_virt_instance_init(obj); -@@ -2848,6 +2859,8 @@ static void rhel820_virt_instance_init(Object *obj) - - static void rhel820_virt_options(MachineClass *mc) - { -- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+ rhel830_virt_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, -+ hw_compat_rhel_8_2_len); - } --DEFINE_RHEL_MACHINE_AS_LATEST(8, 2, 0) -+DEFINE_RHEL_MACHINE(8, 2, 0) --- -2.27.0 - diff --git a/0030-redhat-define-pseries-rhel8.3.0-machine-type.patch b/0030-redhat-define-pseries-rhel8.3.0-machine-type.patch deleted file mode 100644 index 165d197..0000000 --- a/0030-redhat-define-pseries-rhel8.3.0-machine-type.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 311a20fb12a4d0ebed840be194db8117c8eea595 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 12 Aug 2020 10:58:04 +0200 -Subject: redhat: define pseries-rhel8.3.0 machine type - -RH-Author: Laurent Vivier -Message-id: <20200706104117.219174-3-lvivier@redhat.com> -Patchwork-id: 97904 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 2/2] redhat: define pseries-rhel8.3.0 machine type -Bugzilla: 1853265 -RH-Acked-by: Thomas Huth - -Note: rebase to qemu-5.1 introduces - - 32a354dc6c07 ("numa: forbid '-numa node, mem' for 5.1 and newer machine types") - -and so '-numa node, mem' will not be available with pseries-rhel8.3.0 - -Signed-off-by: Laurent Vivier -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 30 ++++++++++++++++++++++++++++-- - 1 file changed, 28 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 756c8667c1..ccceb6d39f 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4995,16 +4995,42 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) - DEFINE_SPAPR_MACHINE(2_1, "2.1", false); - #endif - -+/* -+ * pseries-rhel8.3.0 -+ * like pseries-5.1 -+ */ -+ -+static void spapr_machine_rhel830_class_options(MachineClass *mc) -+{ -+ /* Defaults for the latest behaviour inherited from the base class */ -+} -+ -+DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); -+ - /* - * pseries-rhel8.2.0 -+ * like pseries-4.2 + pseries-5.0 -+ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 - */ - - static void spapr_machine_rhel820_class_options(MachineClass *mc) - { -- /* Defaults for the latest behaviour inherited from the base class */ -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel830_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, -+ hw_compat_rhel_8_2_len); -+ -+ /* from pseries-4.2 */ -+ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; -+ smc->rma_limit = 16 * GiB; -+ mc->nvdimm_supported = false; -+ -+ /* from pseries-5.0 */ -+ mc->numa_mem_supported = true; - } - --DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", true); -+DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); - - /* - * pseries-rhel8.1.0 --- -2.27.0 - diff --git a/0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch b/0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch deleted file mode 100644 index 6fde229..0000000 --- a/0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 12841675e2a81f3b98cb9741b54c3041cebf9e87 Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 12 Aug 2020 10:58:04 +0200 -Subject: ppc: Set correct max_cpus value on spapr-rhel* machine types - -RH-Author: Eduardo Habkost -Message-id: <20200729180236.627559-2-ehabkost@redhat.com> -Patchwork-id: 98073 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 1/4] ppc: Set correct max_cpus value on spapr-rhel* machine types -Bugzilla: 1819292 -RH-Acked-by: Andrew Jones -RH-Acked-by: Thomas Huth -RH-Acked-by: David Gibson - -Currently vl.c forces MachineClass::max_cpus to be -<= RHEL_MAX_CPUS (384) on all machine types. - -Instead of relying on that global limit, set max_cpus=384 -explicitly at spapr_machine_rhel820_class_options(), which will -affect all pseriesl-rhel* machine types. - -This will keep exactly the same behavior as before, but will -allow us to remove the downstream-only RHEL_MAX_CPUS code at vl.c -later. - -Signed-off-by: Eduardo Habkost -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index ccceb6d39f..1c367a2367 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -5003,6 +5003,9 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); - static void spapr_machine_rhel830_class_options(MachineClass *mc) - { - /* Defaults for the latest behaviour inherited from the base class */ -+ -+ /* Maximum supported VCPU count for all pseries-rhel* machines */ -+ mc->max_cpus = 384; - } - - DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); --- -2.27.0 - diff --git a/0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch b/0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch deleted file mode 100644 index 0656938..0000000 --- a/0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch +++ /dev/null @@ -1,53 +0,0 @@ -From ee8e99d0a7821b26d0afe20c3a1f7517e4fa6772 Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 12 Aug 2020 10:58:04 +0200 -Subject: arm: Set correct max_cpus value on virt-rhel* machine types - -RH-Author: Eduardo Habkost -Message-id: <20200729180236.627559-3-ehabkost@redhat.com> -Patchwork-id: 98074 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 2/4] arm: Set correct max_cpus value on virt-rhel* machine types -Bugzilla: 1819292 -RH-Acked-by: Andrew Jones -RH-Acked-by: Thomas Huth -RH-Acked-by: David Gibson - -Currently vl.c forces MachineClass::max_cpus to be -<= RHEL_MAX_CPUS (384) on all machine types. - -Instead of relying on that global limit, set max_cpus=384 -explicitly at the virt-rhel-machine base class, which will affect -all virt-rhel* machine types. - -This will keep exactly the same behavior as before, but will -allow us to remove the downstream-only RHEL_MAX_CPUS code at vl.c -later. - -Signed-off-by: Eduardo Habkost -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 7 ++----- - 1 file changed, 2 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 48b58be597..fb5a5a7013 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2736,11 +2736,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - - mc->family = "virt-rhel-Z"; - mc->init = machvirt_init; -- /* Start with max_cpus set to 512, which is the maximum supported by KVM. -- * The value may be reduced later when we have more information about the -- * configuration of the particular instance. -- */ -- mc->max_cpus = 512; -+ /* Maximum supported VCPU count for all virt-rhel* machines */ -+ mc->max_cpus = 384; - mc->block_default_type = IF_VIRTIO; - mc->no_cdrom = 1; - mc->pci_allow_0_address = true; --- -2.27.0 - diff --git a/0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch b/0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch deleted file mode 100644 index b522ab1..0000000 --- a/0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch +++ /dev/null @@ -1,83 +0,0 @@ -From f8a4123e211ed0685097f496c99e73913a6b34d0 Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 12 Aug 2020 10:58:04 +0200 -Subject: vl: Remove downstream-only MAX_RHEL_CPUS code - -RH-Author: Eduardo Habkost -Message-id: <20200729180236.627559-4-ehabkost@redhat.com> -Patchwork-id: 98075 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 3/4] vl: Remove downstream-only MAX_RHEL_CPUS code -Bugzilla: 1819292 -RH-Acked-by: Andrew Jones -RH-Acked-by: Thomas Huth -RH-Acked-by: David Gibson - -Now that all machine types have max_cpus set to the actual -supported number of VCPUs, the MAX_RHEL_CPUS code becomes -unnecessary and can be completely removed. - -For reference these are the max_cpus values set by the RHEL -machine types: - -- arm: virt-rhel*: max_cpus=384 (rhel_machine_class_init()); -- ppc: spapr-rhel*: max_cpus=384 - (spapr_machine_rhel820_class_options()); -- s390: s390-ccw*: max_cpus=248 (ccw_machine_class_init()); -- x86: q35: max_cpus=384 (pc_q35_machine_rhel_options()); -- x86: q35-rhel7.3.0 and older: max_cpus=255 - (pc_q35_machine_rhel730_options()); -- x86: pc-i440fx*: max_cpus=240 (pc_machine_class_init()). - -Signed-off-by: Eduardo Habkost -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - softmmu/vl.c | 18 ------------------ - 1 file changed, 18 deletions(-) - -diff --git a/softmmu/vl.c b/softmmu/vl.c -index 62fc7c898f..3c383911cd 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -116,8 +116,6 @@ - - #define MAX_VIRTIO_CONSOLES 1 - --#define RHEL_MAX_CPUS 384 -- - static const char *data_dir[16]; - static int data_dir_idx; - const char *bios_name = NULL; -@@ -1191,20 +1189,6 @@ static MachineClass *find_default_machine(GSList *machines) - return default_machineclass; - } - --/* Maximum number of CPUs limited for Red Hat Enterprise Linux */ --static void limit_max_cpus_in_machines(void) --{ -- GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); -- -- for (el = machines; el; el = el->next) { -- MachineClass *mc = el->data; -- -- if (mc->max_cpus > RHEL_MAX_CPUS) { -- mc->max_cpus = RHEL_MAX_CPUS; -- } -- } --} -- - static int machine_help_func(QemuOpts *opts, MachineState *machine) - { - ObjectProperty *prop; -@@ -3861,8 +3845,6 @@ void qemu_init(int argc, char **argv, char **envp) - "mutually exclusive"); - exit(EXIT_FAILURE); - } -- /* Maximum number of CPUs limited for Red Hat Enterprise Linux */ -- limit_max_cpus_in_machines(); - - configure_rtc(qemu_find_opts_singleton("rtc")); - --- -2.27.0 - diff --git a/0034-q35-Set-max_cpus-to-512.patch b/0034-q35-Set-max_cpus-to-512.patch deleted file mode 100644 index 410103d..0000000 --- a/0034-q35-Set-max_cpus-to-512.patch +++ /dev/null @@ -1,45 +0,0 @@ -From f2edc4f9262e9130d020ef6caef2443e7ae31371 Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Wed, 12 Aug 2020 10:58:04 +0200 -Subject: q35: Set max_cpus to 512 - -RH-Author: Eduardo Habkost -Message-id: <20200729180236.627559-5-ehabkost@redhat.com> -Patchwork-id: 98076 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 4/4] q35: Set max_cpus to 512 -Bugzilla: 1819292 -RH-Acked-by: Andrew Jones -RH-Acked-by: Thomas Huth -RH-Acked-by: David Gibson - -Increase supported VCPU count for the Q35 machine type. - -The VCPU count that partners confirmed to work depended on other -parameters (especially RAM size), but fluctuated between 640 and -710 VCPUs. I chose to increase the limit to 512 to be -conservative, until we find out what exactly prevents larger VMs -from booting. - -Signed-off-by: Eduardo Habkost -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc_q35.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index d75d6d8805..c709460ab7 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -585,7 +585,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); - machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); - m->alias = "q35"; -- m->max_cpus = 384; -+ m->max_cpus = 512; - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - --- -2.27.0 - diff --git a/0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch b/0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch deleted file mode 100644 index 69877bc..0000000 --- a/0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch +++ /dev/null @@ -1,39 +0,0 @@ -From e5edd3824a782900bcb7aa2a980696e550b55cf6 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 12 Aug 2020 11:03:02 +0200 -Subject: RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic - allocation in machvirt - -RH-Author: Auger Eric -Message-id: <20200811163601.14341-2-eric.auger@redhat.com> -Patchwork-id: 98143 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/2] RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic allocation in machvirt -Bugzilla: 1801242 - -Allow the TPM_TIS_SYSBUS device dynamic instantiation onto the -platform bus. The TPM_TIS sysbus device compilation will be enabled -in a separate patch. That way associated qmp tests pass once the config -is set. - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index fb5a5a7013..f087483a04 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2738,6 +2738,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - mc->init = machvirt_init; - /* Maximum supported VCPU count for all virt-rhel* machines */ - mc->max_cpus = 384; -+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); - mc->block_default_type = IF_VIRTIO; - mc->no_cdrom = 1; - mc->pci_allow_0_address = true; --- -2.27.0 - diff --git a/0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch b/0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch deleted file mode 100644 index d833611..0000000 --- a/0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 8310f89d8818bc9d41b386bbb2824593aa8e8dca Mon Sep 17 00:00:00 2001 -From: Auger Eric -Date: Wed, 12 Aug 2020 11:03:31 +0200 -Subject: RHEL-only: Enable vTPM for ARM in downstream configs - -RH-Author: Auger Eric -Message-id: <20200811163601.14341-3-eric.auger@redhat.com> -Patchwork-id: 98144 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/2] RHEL-only: Enable vTPM for ARM in downstream configs -Bugzilla: 1801242 - -We allow the compilation of the TPM_TIS_SYSBUS device and both -passthrough and software emulation backends. - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - default-configs/aarch64-rh-devices.mak | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak -index f0cf5a1b22..0b6a03f816 100644 ---- a/default-configs/aarch64-rh-devices.mak -+++ b/default-configs/aarch64-rh-devices.mak -@@ -20,3 +20,6 @@ CONFIG_VIRTIO_PCI=y - CONFIG_XIO3130=y - CONFIG_NVDIMM=y - CONFIG_ACPI_APEI=y -+CONFIG_TPM_EMULATOR=y -+CONFIG_TPM_PASSTHROUGH=y -+CONFIG_TPM_TIS_SYSBUS=y --- -2.27.0 - diff --git a/kvm-Disable-TPM-passthrough-backend-on-ARM.patch b/kvm-Disable-TPM-passthrough-backend-on-ARM.patch deleted file mode 100644 index 59f9ffb..0000000 --- a/kvm-Disable-TPM-passthrough-backend-on-ARM.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 4a8ccfdf57fb0e0835faef9d95939d31546202f8 Mon Sep 17 00:00:00 2001 -From: Auger Eric -Date: Wed, 19 Aug 2020 09:16:03 -0400 -Subject: [PATCH 3/4] Disable TPM passthrough backend on ARM -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Auger Eric -Message-id: <20200819091603.23319-1-eric.auger@redhat.com> -Patchwork-id: 98190 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] Disable TPM passthrough backend on ARM -Bugzilla: 1801242 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Andrew Jones - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1801242 -UPSTREAM: not applicable -BRANCH: rhel-av-8.3.0 -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=30820412 - -Let's disable the TPM passthrough backend on ARM as this looks -to be a marginal use case and it was not tested comprehensively yet. - -Signed-off-by: Eric Auger -Signed-off-by: Danilo C. L. de Paula ---- - default-configs/aarch64-rh-devices.mak | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/default-configs/aarch64-rh-devices.mak b/default-configs/aarch64-rh-devices.mak -index 0b6a03f816..6a597384ea 100644 ---- a/default-configs/aarch64-rh-devices.mak -+++ b/default-configs/aarch64-rh-devices.mak -@@ -21,5 +21,4 @@ CONFIG_XIO3130=y - CONFIG_NVDIMM=y - CONFIG_ACPI_APEI=y - CONFIG_TPM_EMULATOR=y --CONFIG_TPM_PASSTHROUGH=y - CONFIG_TPM_TIS_SYSBUS=y --- -2.27.0 - diff --git a/kvm-Drop-bogus-IPv6-messages.patch b/kvm-Drop-bogus-IPv6-messages.patch deleted file mode 100644 index 337dee8..0000000 --- a/kvm-Drop-bogus-IPv6-messages.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 6ceab004edfb7c1f0f03701bc2ae443941468fd7 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Mon, 17 Aug 2020 22:06:08 -0400 -Subject: [PATCH 1/6] Drop bogus IPv6 messages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20200817220608.1142611-2-jmaloy@redhat.com> -Patchwork-id: 98161 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] Drop bogus IPv6 messages -Bugzilla: 1867075 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Marc-André Lureau - -From: Ralf Haferkamp - -Drop IPv6 message shorter than what's mentioned in the payload -length header (+ the size of the IPv6 header). They're invalid an could -lead to data leakage in icmp6_send_echoreply(). - -(cherry picked from libslirp commit c7ede54cbd2e2b25385325600958ba0124e31cc0) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/ip6_input.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c -index a83e4f8e3d..f7ef354ee4 100644 ---- a/slirp/src/ip6_input.c -+++ b/slirp/src/ip6_input.c -@@ -56,6 +56,13 @@ void ip6_input(struct mbuf *m) - goto bad; - } - -+ // Check if the message size is big enough to hold what's -+ // set in the payload length header. If not this is an invalid -+ // packet -+ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { -+ goto bad; -+ } -+ - /* check ip_ttl for a correct ICMP reply */ - if (ip6->ip_hl == 0) { - icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); --- -2.27.0 - diff --git a/kvm-Revert-Drop-bogus-IPv6-messages.patch b/kvm-Revert-Drop-bogus-IPv6-messages.patch deleted file mode 100644 index 5ac7159..0000000 --- a/kvm-Revert-Drop-bogus-IPv6-messages.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 40ce2a0e9f0a9d5c00ba82f187802fdf0a0702d0 Mon Sep 17 00:00:00 2001 -From: Danilo de Paula -Date: Wed, 16 Sep 2020 01:25:22 -0400 -Subject: [PATCH 3/4] Revert "Drop bogus IPv6 messages" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Danilo de Paula -Message-id: <20200916012522.1183051-2-ddepaula@redhat.com> -Patchwork-id: 98394 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] Revert "Drop bogus IPv6 messages" -Bugzilla: 1867075 -RH-Acked-by: Jon Maloy -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Marc-André Lureau - -This reverts commit 6ceab004edfb7c1f0f03701bc2ae443941468fd7. - -This fix was applied during the rebase. -The commit above just duplicates it. - -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/ip6_input.c | 7 ------- - 1 file changed, 7 deletions(-) - -diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c -index f7ef354ee4..a83e4f8e3d 100644 ---- a/slirp/src/ip6_input.c -+++ b/slirp/src/ip6_input.c -@@ -56,13 +56,6 @@ void ip6_input(struct mbuf *m) - goto bad; - } - -- // Check if the message size is big enough to hold what's -- // set in the payload length header. If not this is an invalid -- // packet -- if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { -- goto bad; -- } -- - /* check ip_ttl for a correct ICMP reply */ - if (ip6->ip_hl == 0) { - icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); --- -2.27.0 - diff --git a/kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch b/kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch deleted file mode 100644 index 22f49b7..0000000 --- a/kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch +++ /dev/null @@ -1,168 +0,0 @@ -From d2629755385917d277b80267cb88436c950123a7 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 28 Aug 2020 16:23:49 -0400 -Subject: [PATCH 07/11] Revert "hw/386: Add EPYC mode topology decoding - functions" - -RH-Author: Igor Mammedov -Message-id: <20200828162349.1616028-8-imammedo@redhat.com> -Patchwork-id: 98250 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 7/7] Revert "hw/386: Add EPYC mode topology decoding functions" -Bugzilla: 1873417 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 -Branch: rhel-av-8.3.0 -Upstream: RHEL only -Tested: locally - -A regression was introduced since qemu-5.0, when EPYC specific -APIC ID encoding was introduced. Which leads to migration failing -with: -" - : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices - : load of migration failed: Invalid argument -" -when EPYC cpu model and more than 1 numa node is used. -EPYC specific APIC ID encoding is considered as failed -experiment and upstream is preparing to revert it as well. - -This reverts commit 7568b205555a6405042f62c64af3268f4330aed5. - -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/i386/topology.h | 100 ------------------------------------- - 1 file changed, 100 deletions(-) - -diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h -index 07239f95f4..b9593b9905 100644 ---- a/include/hw/i386/topology.h -+++ b/include/hw/i386/topology.h -@@ -47,7 +47,6 @@ typedef uint32_t apic_id_t; - - typedef struct X86CPUTopoIDs { - unsigned pkg_id; -- unsigned node_id; - unsigned die_id; - unsigned core_id; - unsigned smt_id; -@@ -89,11 +88,6 @@ static inline unsigned apicid_die_width(X86CPUTopoInfo *topo_info) - return apicid_bitwidth_for_count(topo_info->dies_per_pkg); - } - --/* Bit width of the node_id field per socket */ --static inline unsigned apicid_node_width_epyc(X86CPUTopoInfo *topo_info) --{ -- return apicid_bitwidth_for_count(MAX(topo_info->nodes_per_pkg, 1)); --} - /* Bit offset of the Core_ID field - */ - static inline unsigned apicid_core_offset(X86CPUTopoInfo *topo_info) -@@ -114,100 +108,6 @@ static inline unsigned apicid_pkg_offset(X86CPUTopoInfo *topo_info) - return apicid_die_offset(topo_info) + apicid_die_width(topo_info); - } - --#define NODE_ID_OFFSET 3 /* Minimum node_id offset if numa configured */ -- --/* -- * Bit offset of the node_id field -- * -- * Make sure nodes_per_pkg > 0 if numa configured else zero. -- */ --static inline unsigned apicid_node_offset_epyc(X86CPUTopoInfo *topo_info) --{ -- unsigned offset = apicid_die_offset(topo_info) + -- apicid_die_width(topo_info); -- -- if (topo_info->nodes_per_pkg) { -- return MAX(NODE_ID_OFFSET, offset); -- } else { -- return offset; -- } --} -- --/* Bit offset of the Pkg_ID (socket ID) field */ --static inline unsigned apicid_pkg_offset_epyc(X86CPUTopoInfo *topo_info) --{ -- return apicid_node_offset_epyc(topo_info) + -- apicid_node_width_epyc(topo_info); --} -- --/* -- * Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID -- * -- * The caller must make sure core_id < nr_cores and smt_id < nr_threads. -- */ --static inline apic_id_t --x86_apicid_from_topo_ids_epyc(X86CPUTopoInfo *topo_info, -- const X86CPUTopoIDs *topo_ids) --{ -- return (topo_ids->pkg_id << apicid_pkg_offset_epyc(topo_info)) | -- (topo_ids->node_id << apicid_node_offset_epyc(topo_info)) | -- (topo_ids->die_id << apicid_die_offset(topo_info)) | -- (topo_ids->core_id << apicid_core_offset(topo_info)) | -- topo_ids->smt_id; --} -- --static inline void x86_topo_ids_from_idx_epyc(X86CPUTopoInfo *topo_info, -- unsigned cpu_index, -- X86CPUTopoIDs *topo_ids) --{ -- unsigned nr_nodes = MAX(topo_info->nodes_per_pkg, 1); -- unsigned nr_dies = topo_info->dies_per_pkg; -- unsigned nr_cores = topo_info->cores_per_die; -- unsigned nr_threads = topo_info->threads_per_core; -- unsigned cores_per_node = DIV_ROUND_UP((nr_dies * nr_cores * nr_threads), -- nr_nodes); -- -- topo_ids->pkg_id = cpu_index / (nr_dies * nr_cores * nr_threads); -- topo_ids->node_id = (cpu_index / cores_per_node) % nr_nodes; -- topo_ids->die_id = cpu_index / (nr_cores * nr_threads) % nr_dies; -- topo_ids->core_id = cpu_index / nr_threads % nr_cores; -- topo_ids->smt_id = cpu_index % nr_threads; --} -- --/* -- * Calculate thread/core/package IDs for a specific topology, -- * based on APIC ID -- */ --static inline void x86_topo_ids_from_apicid_epyc(apic_id_t apicid, -- X86CPUTopoInfo *topo_info, -- X86CPUTopoIDs *topo_ids) --{ -- topo_ids->smt_id = apicid & -- ~(0xFFFFFFFFUL << apicid_smt_width(topo_info)); -- topo_ids->core_id = -- (apicid >> apicid_core_offset(topo_info)) & -- ~(0xFFFFFFFFUL << apicid_core_width(topo_info)); -- topo_ids->die_id = -- (apicid >> apicid_die_offset(topo_info)) & -- ~(0xFFFFFFFFUL << apicid_die_width(topo_info)); -- topo_ids->node_id = -- (apicid >> apicid_node_offset_epyc(topo_info)) & -- ~(0xFFFFFFFFUL << apicid_node_width_epyc(topo_info)); -- topo_ids->pkg_id = apicid >> apicid_pkg_offset_epyc(topo_info); --} -- --/* -- * Make APIC ID for the CPU 'cpu_index' -- * -- * 'cpu_index' is a sequential, contiguous ID for the CPU. -- */ --static inline apic_id_t x86_apicid_from_cpu_idx_epyc(X86CPUTopoInfo *topo_info, -- unsigned cpu_index) --{ -- X86CPUTopoIDs topo_ids; -- x86_topo_ids_from_idx_epyc(topo_info, cpu_index, &topo_ids); -- return x86_apicid_from_topo_ids_epyc(topo_info, &topo_ids); --} - /* Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID - * - * The caller must make sure core_id < nr_cores and smt_id < nr_threads. --- -2.27.0 - diff --git a/kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch b/kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch deleted file mode 100644 index 5988443..0000000 --- a/kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch +++ /dev/null @@ -1,80 +0,0 @@ -From da24d2c5e2d61043340b601a09f22e41a1d52e5e Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 28 Aug 2020 16:23:47 -0400 -Subject: [PATCH 05/11] Revert "hw/i386: Introduce apicid functions inside - X86MachineState" - -RH-Author: Igor Mammedov -Message-id: <20200828162349.1616028-6-imammedo@redhat.com> -Patchwork-id: 98246 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 5/7] Revert "hw/i386: Introduce apicid functions inside X86MachineState" -Bugzilla: 1873417 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 -Branch: rhel-av-8.3.0 -Upstream: RHEL only -Tested: locally - -A regression was introduced since qemu-5.0, when EPYC specific -APIC ID encoding was introduced. Which leads to migration failing -with: -" - : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices - : load of migration failed: Invalid argument -" -when EPYC cpu model and more than 1 numa node is used. -EPYC specific APIC ID encoding is considered as failed -experiment and upstream is preparing to revert it as well. - -This reverts commit 6121c7fbfd98dbc3af1b00b56ff2eef66df87828. - -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/x86.c | 5 ----- - include/hw/i386/x86.h | 9 --------- - 2 files changed, 14 deletions(-) - -diff --git a/hw/i386/x86.c b/hw/i386/x86.c -index 41bdf146bd..4d8cb66258 100644 ---- a/hw/i386/x86.c -+++ b/hw/i386/x86.c -@@ -896,11 +896,6 @@ static void x86_machine_initfn(Object *obj) - x86ms->smm = ON_OFF_AUTO_AUTO; - x86ms->acpi = ON_OFF_AUTO_AUTO; - x86ms->smp_dies = 1; -- -- x86ms->apicid_from_cpu_idx = x86_apicid_from_cpu_idx; -- x86ms->topo_ids_from_apicid = x86_topo_ids_from_apicid; -- x86ms->apicid_from_topo_ids = x86_apicid_from_topo_ids; -- x86ms->apicid_pkg_offset = apicid_pkg_offset; - } - - static void x86_machine_class_init(ObjectClass *oc, void *data) -diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h -index b79f24e285..4d9a26326d 100644 ---- a/include/hw/i386/x86.h -+++ b/include/hw/i386/x86.h -@@ -63,15 +63,6 @@ typedef struct { - OnOffAuto smm; - OnOffAuto acpi; - -- /* Apic id specific handlers */ -- uint32_t (*apicid_from_cpu_idx)(X86CPUTopoInfo *topo_info, -- unsigned cpu_index); -- void (*topo_ids_from_apicid)(apic_id_t apicid, X86CPUTopoInfo *topo_info, -- X86CPUTopoIDs *topo_ids); -- apic_id_t (*apicid_from_topo_ids)(X86CPUTopoInfo *topo_info, -- const X86CPUTopoIDs *topo_ids); -- uint32_t (*apicid_pkg_offset)(X86CPUTopoInfo *topo_info); -- - /* - * Address space used by IOAPIC device. All IOAPIC interrupts - * will be translated to MSI messages in the address space. --- -2.27.0 - diff --git a/kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch b/kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch deleted file mode 100644 index b9ac7b1..0000000 --- a/kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 61b9bdeafac573093e171947be1a0c9212ba8b95 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 28 Aug 2020 16:23:45 -0400 -Subject: [PATCH 03/11] Revert "hw/i386: Move arch_id decode inside - x86_cpus_init" - -RH-Author: Igor Mammedov -Message-id: <20200828162349.1616028-4-imammedo@redhat.com> -Patchwork-id: 98248 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 3/7] Revert "hw/i386: Move arch_id decode inside x86_cpus_init" -Bugzilla: 1873417 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 -Branch: rhel-av-8.3.0 -Upstream: RHEL only -Tested: locally - -A regression was introduced since qemu-5.0, when EPYC specific -APIC ID encoding was introduced. Which leads to migration failing -with: -" - : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices - : load of migration failed: Invalid argument -" -when EPYC cpu model and more than 1 numa node is used. -EPYC specific APIC ID encoding is considered as failed -experiment and upstream is preparing to revert it as well. - -This reverts commit 2e26f4ab3bf8390a2677d3afd9b1a04f015d7721. - -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc.c | 6 +++--- - hw/i386/x86.c | 37 +++++++------------------------------ - 2 files changed, 10 insertions(+), 33 deletions(-) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index f469c060e5..ac2cc79fca 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1817,14 +1817,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, - topo_ids.die_id = cpu->die_id; - topo_ids.core_id = cpu->core_id; - topo_ids.smt_id = cpu->thread_id; -- cpu->apic_id = x86ms->apicid_from_topo_ids(&topo_info, &topo_ids); -+ cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); - } - - cpu_slot = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx); - if (!cpu_slot) { - MachineState *ms = MACHINE(pcms); - -- x86ms->topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); -+ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); - error_setg(errp, - "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" - " APIC ID %" PRIu32 ", valid index range 0:%d", -@@ -1845,7 +1845,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, - /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() - * once -smp refactoring is complete and there will be CPU private - * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ -- x86ms->topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); -+ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); - if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { - error_setg(errp, "property socket-id: %u doesn't match set apic-id:" - " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, -diff --git a/hw/i386/x86.c b/hw/i386/x86.c -index 67bee1bcb8..41bdf146bd 100644 ---- a/hw/i386/x86.c -+++ b/hw/i386/x86.c -@@ -68,22 +68,6 @@ inline void init_topo_info(X86CPUTopoInfo *topo_info, - topo_info->threads_per_core = ms->smp.threads; - } - --/* -- * Set up with the new EPYC topology handlers -- * -- * AMD uses different apic id encoding for EPYC based cpus. Override -- * the default topo handlers with EPYC encoding handlers. -- */ --static void x86_set_epyc_topo_handlers(MachineState *machine) --{ -- X86MachineState *x86ms = X86_MACHINE(machine); -- -- x86ms->apicid_from_cpu_idx = x86_apicid_from_cpu_idx_epyc; -- x86ms->topo_ids_from_apicid = x86_topo_ids_from_apicid_epyc; -- x86ms->apicid_from_topo_ids = x86_apicid_from_topo_ids_epyc; -- x86ms->apicid_pkg_offset = apicid_pkg_offset_epyc; --} -- - /* - * Calculates initial APIC ID for a specific CPU index - * -@@ -102,7 +86,7 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, - - init_topo_info(&topo_info, x86ms); - -- correct_id = x86ms->apicid_from_cpu_idx(&topo_info, cpu_index); -+ correct_id = x86_apicid_from_cpu_idx(&topo_info, cpu_index); - if (x86mc->compat_apic_id_mode) { - if (cpu_index != correct_id && !warned && !qtest_enabled()) { - error_report("APIC IDs set in compatibility mode, " -@@ -136,11 +120,6 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) - MachineState *ms = MACHINE(x86ms); - MachineClass *mc = MACHINE_GET_CLASS(x86ms); - -- /* Check for apicid encoding */ -- if (cpu_x86_use_epyc_apic_id_encoding(ms->cpu_type)) { -- x86_set_epyc_topo_handlers(ms); -- } -- - x86_cpu_set_default_version(default_cpu_version); - - /* -@@ -154,12 +133,6 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) - x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, - ms->smp.max_cpus - 1) + 1; - possible_cpus = mc->possible_cpu_arch_ids(ms); -- -- for (i = 0; i < ms->possible_cpus->len; i++) { -- ms->possible_cpus->cpus[i].arch_id = -- x86_cpu_apic_id_from_index(x86ms, i); -- } -- - for (i = 0; i < ms->smp.cpus; i++) { - x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); - } -@@ -184,7 +157,8 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) - init_topo_info(&topo_info, x86ms); - - assert(idx < ms->possible_cpus->len); -- x86_topo_ids_from_idx(&topo_info, idx, &topo_ids); -+ x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, -+ &topo_info, &topo_ids); - return topo_ids.pkg_id % ms->numa_state->num_nodes; - } - -@@ -215,7 +189,10 @@ const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) - - ms->possible_cpus->cpus[i].type = ms->cpu_type; - ms->possible_cpus->cpus[i].vcpus_count = 1; -- x86_topo_ids_from_idx(&topo_info, i, &topo_ids); -+ ms->possible_cpus->cpus[i].arch_id = -+ x86_cpu_apic_id_from_index(x86ms, i); -+ x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, -+ &topo_info, &topo_ids); - ms->possible_cpus->cpus[i].props.has_socket_id = true; - ms->possible_cpus->cpus[i].props.socket_id = topo_ids.pkg_id; - if (x86ms->smp_dies > 1) { --- -2.27.0 - diff --git a/kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch b/kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch deleted file mode 100644 index 9492f85..0000000 --- a/kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 7f7a15ba9ad3f1d906b472cad4972c80d11b77fc Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 28 Aug 2020 16:23:43 -0400 -Subject: [PATCH 01/11] Revert "i386: Fix pkg_id offset for EPYC cpu models" - -RH-Author: Igor Mammedov -Message-id: <20200828162349.1616028-2-imammedo@redhat.com> -Patchwork-id: 98247 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 1/7] Revert "i386: Fix pkg_id offset for EPYC cpu models" -Bugzilla: 1873417 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 -Branch: rhel-av-8.3.0 -Upstream: RHEL only -Tested: locally - -A regression was introduced since qemu-5.0, when EPYC specific -APIC ID encoding was introduced. Which leads to migration failing -with: -" - : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices - : load of migration failed: Invalid argument -" -when EPYC cpu model and more than 1 numa node is used. -EPYC specific APIC ID encoding is considered as failed -experiment and upstream is preparing to revert it as well. - -This reverts commit 7b225762c8c05fd31d4c2be116aedfbc00383f8b. - -PS: -fixup an access to pkg_offset that were added by -cac9edfc4da (target/i386: Fix the CPUID leaf CPUID_Fn80000008) - -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc.c | 1 - - target/i386/cpu.c | 6 +++--- - target/i386/cpu.h | 1 - - 3 files changed, 3 insertions(+), 5 deletions(-) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index a75e0137ab..f469c060e5 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1763,7 +1763,6 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, - - env->nr_dies = x86ms->smp_dies; - env->nr_nodes = topo_info.nodes_per_pkg; -- env->pkg_offset = x86ms->apicid_pkg_offset(&topo_info); - - /* - * If APIC ID is not set, -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index cdaa1463f2..6517cc73a2 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -5680,7 +5680,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - *ecx |= CPUID_TOPOLOGY_LEVEL_SMT; - break; - case 1: -- *eax = env->pkg_offset; -+ *eax = apicid_pkg_offset(&topo_info); - *ebx = cs->nr_cores * cs->nr_threads; - *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; - break; -@@ -5714,7 +5714,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; - break; - case 2: -- *eax = env->pkg_offset; -+ *eax = apicid_pkg_offset(&topo_info); - *ebx = env->nr_dies * cs->nr_cores * cs->nr_threads; - *ecx |= CPUID_TOPOLOGY_LEVEL_DIE; - break; -@@ -5895,7 +5895,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - * CPUX86State::pkg_offset. - * Bits 7:0 is "The number of threads in the package is NC+1" - */ -- *ecx = (env->pkg_offset << 12) | -+ *ecx = (apicid_pkg_offset(&topo_info) << 12) | - ((cs->nr_cores * cs->nr_threads) - 1); - } else { - *ecx = 0; -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index e1a5c174dc..d5ad42d694 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -1630,7 +1630,6 @@ typedef struct CPUX86State { - - unsigned nr_dies; - unsigned nr_nodes; -- unsigned pkg_offset; - } CPUX86State; - - struct kvm_msrs; --- -2.27.0 - diff --git a/kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch b/kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch deleted file mode 100644 index 489c5a3..0000000 --- a/kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch +++ /dev/null @@ -1,90 +0,0 @@ -From bc3db6832c57b1b28204b376f3c4c61cadfe0a35 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 28 Aug 2020 16:23:46 -0400 -Subject: [PATCH 04/11] Revert "i386: Introduce use_epyc_apic_id_encoding in - X86CPUDefinition" - -RH-Author: Igor Mammedov -Message-id: <20200828162349.1616028-5-imammedo@redhat.com> -Patchwork-id: 98249 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 4/7] Revert "i386: Introduce use_epyc_apic_id_encoding in X86CPUDefinition" -Bugzilla: 1873417 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 -Branch: rhel-av-8.3.0 -Upstream: RHEL only -Tested: locally - -A regression was introduced since qemu-5.0, when EPYC specific -APIC ID encoding was introduced. Which leads to migration failing -with: -" - : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices - : load of migration failed: Invalid argument -" -when EPYC cpu model and more than 1 numa node is used. -EPYC specific APIC ID encoding is considered as failed -experiment and upstream is preparing to revert it as well. - -This reverts commit 0c1538cb1a26287c072645f4759b9872b1596d79. - -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 16 ---------------- - target/i386/cpu.h | 1 - - 2 files changed, 17 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 66b6a77b2f..5e3d086f05 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1638,10 +1638,6 @@ typedef struct X86CPUDefinition { - FeatureWordArray features; - const char *model_id; - CPUCaches *cache_info; -- -- /* Use AMD EPYC encoding for apic id */ -- bool use_epyc_apic_id_encoding; -- - /* - * Definitions for alternative versions of CPU model. - * List is terminated by item with version == 0. -@@ -1683,18 +1679,6 @@ static const X86CPUVersionDefinition *x86_cpu_def_get_versions(X86CPUDefinition - return def->versions ?: default_version_list; - } - --bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type) --{ -- X86CPUClass *xcc = X86_CPU_CLASS(object_class_by_name(cpu_type)); -- -- assert(xcc); -- if (xcc->model && xcc->model->cpudef) { -- return xcc->model->cpudef->use_epyc_apic_id_encoding; -- } else { -- return false; -- } --} -- - static CPUCaches epyc_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index d5ad42d694..5ff8ad8427 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -1918,7 +1918,6 @@ void cpu_clear_apic_feature(CPUX86State *env); - void host_cpuid(uint32_t function, uint32_t count, - uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); - void host_vendor_fms(char *vendor, int *family, int *model, int *stepping); --bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type); - - /* helper.c */ - bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size, --- -2.27.0 - diff --git a/kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch b/kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch deleted file mode 100644 index eeea50d..0000000 --- a/kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch +++ /dev/null @@ -1,288 +0,0 @@ -From 4236a54d72270d871ff1ed3fd09a2971327077a1 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 28 Aug 2020 16:23:48 -0400 -Subject: [PATCH 06/11] Revert "target/i386: Cleanup and use the EPYC mode - topology functions" - -RH-Author: Igor Mammedov -Message-id: <20200828162349.1616028-7-imammedo@redhat.com> -Patchwork-id: 98251 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 6/7] Revert "target/i386: Cleanup and use the EPYC mode topology functions" -Bugzilla: 1873417 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 -Branch: rhel-av-8.3.0 -Upstream: RHEL only -Tested: locally - -A regression was introduced since qemu-5.0, when EPYC specific -APIC ID encoding was introduced. Which leads to migration failing -with: -" - : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices - : load of migration failed: Invalid argument -" -when EPYC cpu model and more than 1 numa node is used. -EPYC specific APIC ID encoding is considered as failed -experiment and upstream is preparing to revert it as well. - -This reverts commit dd08ef0318e2b61d14bc069590d174913f7f437a. - -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 161 ++++++++++++++++++++++++++++++++++++---------- - 1 file changed, 127 insertions(+), 34 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 5e3d086f05..73fc83e53f 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -338,15 +338,68 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, - } - } - -+/* -+ * Definitions used for building CPUID Leaf 0x8000001D and 0x8000001E -+ * Please refer to the AMD64 Architecture Programmer’s Manual Volume 3. -+ * Define the constants to build the cpu topology. Right now, TOPOEXT -+ * feature is enabled only on EPYC. So, these constants are based on -+ * EPYC supported configurations. We may need to handle the cases if -+ * these values change in future. -+ */ -+/* Maximum core complexes in a node */ -+#define MAX_CCX 2 -+/* Maximum cores in a core complex */ -+#define MAX_CORES_IN_CCX 4 -+/* Maximum cores in a node */ -+#define MAX_CORES_IN_NODE 8 -+/* Maximum nodes in a socket */ -+#define MAX_NODES_PER_SOCKET 4 -+ -+/* -+ * Figure out the number of nodes required to build this config. -+ * Max cores in a node is 8 -+ */ -+static int nodes_in_socket(int nr_cores) -+{ -+ int nodes; -+ -+ nodes = DIV_ROUND_UP(nr_cores, MAX_CORES_IN_NODE); -+ -+ /* Hardware does not support config with 3 nodes, return 4 in that case */ -+ return (nodes == 3) ? 4 : nodes; -+} -+ -+/* -+ * Decide the number of cores in a core complex with the given nr_cores using -+ * following set constants MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE and -+ * MAX_NODES_PER_SOCKET. Maintain symmetry as much as possible -+ * L3 cache is shared across all cores in a core complex. So, this will also -+ * tell us how many cores are sharing the L3 cache. -+ */ -+static int cores_in_core_complex(int nr_cores) -+{ -+ int nodes; -+ -+ /* Check if we can fit all the cores in one core complex */ -+ if (nr_cores <= MAX_CORES_IN_CCX) { -+ return nr_cores; -+ } -+ /* Get the number of nodes required to build this config */ -+ nodes = nodes_in_socket(nr_cores); -+ -+ /* -+ * Divide the cores accros all the core complexes -+ * Return rounded up value -+ */ -+ return DIV_ROUND_UP(nr_cores, nodes * MAX_CCX); -+} -+ - /* Encode cache info for CPUID[8000001D] */ --static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, -- X86CPUTopoInfo *topo_info, -- uint32_t *eax, uint32_t *ebx, -- uint32_t *ecx, uint32_t *edx) -+static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, CPUState *cs, -+ uint32_t *eax, uint32_t *ebx, -+ uint32_t *ecx, uint32_t *edx) - { - uint32_t l3_cores; -- unsigned nodes = MAX(topo_info->nodes_per_pkg, 1); -- - assert(cache->size == cache->line_size * cache->associativity * - cache->partitions * cache->sets); - -@@ -355,13 +408,10 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, - - /* L3 is shared among multiple cores */ - if (cache->level == 3) { -- l3_cores = DIV_ROUND_UP((topo_info->dies_per_pkg * -- topo_info->cores_per_die * -- topo_info->threads_per_core), -- nodes); -- *eax |= (l3_cores - 1) << 14; -+ l3_cores = cores_in_core_complex(cs->nr_cores); -+ *eax |= ((l3_cores * cs->nr_threads) - 1) << 14; - } else { -- *eax |= ((topo_info->threads_per_core - 1) << 14); -+ *eax |= ((cs->nr_threads - 1) << 14); - } - - assert(cache->line_size > 0); -@@ -381,17 +431,55 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, - (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0); - } - -+/* Data structure to hold the configuration info for a given core index */ -+struct core_topology { -+ /* core complex id of the current core index */ -+ int ccx_id; -+ /* -+ * Adjusted core index for this core in the topology -+ * This can be 0,1,2,3 with max 4 cores in a core complex -+ */ -+ int core_id; -+ /* Node id for this core index */ -+ int node_id; -+ /* Number of nodes in this config */ -+ int num_nodes; -+}; -+ -+/* -+ * Build the configuration closely match the EPYC hardware. Using the EPYC -+ * hardware configuration values (MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE) -+ * right now. This could change in future. -+ * nr_cores : Total number of cores in the config -+ * core_id : Core index of the current CPU -+ * topo : Data structure to hold all the config info for this core index -+ */ -+static void build_core_topology(int nr_cores, int core_id, -+ struct core_topology *topo) -+{ -+ int nodes, cores_in_ccx; -+ -+ /* First get the number of nodes required */ -+ nodes = nodes_in_socket(nr_cores); -+ -+ cores_in_ccx = cores_in_core_complex(nr_cores); -+ -+ topo->node_id = core_id / (cores_in_ccx * MAX_CCX); -+ topo->ccx_id = (core_id % (cores_in_ccx * MAX_CCX)) / cores_in_ccx; -+ topo->core_id = core_id % cores_in_ccx; -+ topo->num_nodes = nodes; -+} -+ - /* Encode cache info for CPUID[8000001E] */ --static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, -+static void encode_topo_cpuid8000001e(CPUState *cs, X86CPU *cpu, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) - { -- X86CPUTopoIDs topo_ids = {0}; -- unsigned long nodes = MAX(topo_info->nodes_per_pkg, 1); -+ struct core_topology topo = {0}; -+ unsigned long nodes; - int shift; - -- x86_topo_ids_from_apicid_epyc(cpu->apic_id, topo_info, &topo_ids); -- -+ build_core_topology(cs->nr_cores, cpu->core_id, &topo); - *eax = cpu->apic_id; - /* - * CPUID_Fn8000001E_EBX -@@ -408,8 +496,12 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, - * 3 Core complex id - * 1:0 Core id - */ -- *ebx = ((topo_info->threads_per_core - 1) << 8) | (topo_ids.node_id << 3) | -- (topo_ids.core_id); -+ if (cs->nr_threads - 1) { -+ *ebx = ((cs->nr_threads - 1) << 8) | (topo.node_id << 3) | -+ (topo.ccx_id << 2) | topo.core_id; -+ } else { -+ *ebx = (topo.node_id << 4) | (topo.ccx_id << 3) | topo.core_id; -+ } - /* - * CPUID_Fn8000001E_ECX - * 31:11 Reserved -@@ -418,8 +510,9 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, - * 2 Socket id - * 1:0 Node id - */ -- if (nodes <= 4) { -- *ecx = ((nodes - 1) << 8) | (topo_ids.pkg_id << 2) | topo_ids.node_id; -+ if (topo.num_nodes <= 4) { -+ *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << 2) | -+ topo.node_id; - } else { - /* - * Node id fix up. Actual hardware supports up to 4 nodes. But with -@@ -434,10 +527,10 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, - * number of nodes. find_last_bit returns last set bit(0 based). Left - * shift(+1) the socket id to represent all the nodes. - */ -- nodes -= 1; -+ nodes = topo.num_nodes - 1; - shift = find_last_bit(&nodes, 8); -- *ecx = (nodes << 8) | (topo_ids.pkg_id << (shift + 1)) | -- topo_ids.node_id; -+ *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << (shift + 1)) | -+ topo.node_id; - } - *edx = 0; - } -@@ -5473,7 +5566,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - uint32_t signature[3]; - X86CPUTopoInfo topo_info; - -- topo_info.nodes_per_pkg = env->nr_nodes; - topo_info.dies_per_pkg = env->nr_dies; - topo_info.cores_per_die = cs->nr_cores; - topo_info.threads_per_core = cs->nr_threads; -@@ -5905,20 +5997,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - } - switch (count) { - case 0: /* L1 dcache info */ -- encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, -- &topo_info, eax, ebx, ecx, edx); -+ encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, cs, -+ eax, ebx, ecx, edx); - break; - case 1: /* L1 icache info */ -- encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, -- &topo_info, eax, ebx, ecx, edx); -+ encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, cs, -+ eax, ebx, ecx, edx); - break; - case 2: /* L2 cache info */ -- encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, -- &topo_info, eax, ebx, ecx, edx); -+ encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, cs, -+ eax, ebx, ecx, edx); - break; - case 3: /* L3 cache info */ -- encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, -- &topo_info, eax, ebx, ecx, edx); -+ encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, cs, -+ eax, ebx, ecx, edx); - break; - default: /* end of info */ - *eax = *ebx = *ecx = *edx = 0; -@@ -5927,7 +6019,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - break; - case 0x8000001E: - assert(cpu->core_id <= 255); -- encode_topo_cpuid8000001e(&topo_info, cpu, eax, ebx, ecx, edx); -+ encode_topo_cpuid8000001e(cs, cpu, -+ eax, ebx, ecx, edx); - break; - case 0xC0000000: - *eax = env->cpuid_xlevel2; --- -2.27.0 - diff --git a/kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch b/kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch deleted file mode 100644 index 0326049..0000000 --- a/kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch +++ /dev/null @@ -1,63 +0,0 @@ -From a36be18a97841a091256e9934fb323afc9c3a57a Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Fri, 28 Aug 2020 16:23:44 -0400 -Subject: [PATCH 02/11] Revert "target/i386: Enable new apic id encoding for - EPYC based cpus models" - -RH-Author: Igor Mammedov -Message-id: <20200828162349.1616028-3-imammedo@redhat.com> -Patchwork-id: 98245 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 2/7] Revert "target/i386: Enable new apic id encoding for EPYC based cpus models" -Bugzilla: 1873417 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Dr. David Alan Gilbert - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 -Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 -Branch: rhel-av-8.3.0 -Upstream: RHEL only -Tested: locally - -A regression was introduced since qemu-5.0, when EPYC specific -APIC ID encoding was introduced. Which leads to migration failing -with: -" - : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices - : load of migration failed: Invalid argument -" -when EPYC cpu model and more than 1 numa node is used. -EPYC specific APIC ID encoding is considered as failed -experiment and upstream is preparing to revert it as well. - -This reverts commit 247b18c593ec298446645af8d5d28911daf653b1. - -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6517cc73a2..66b6a77b2f 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -3996,7 +3996,6 @@ static X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x8000001E, - .model_id = "AMD EPYC Processor", - .cache_info = &epyc_cache_info, -- .use_epyc_apic_id_encoding = 1, - .versions = (X86CPUVersionDefinition[]) { - { .version = 1 }, - { -@@ -4124,7 +4123,6 @@ static X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x8000001E, - .model_id = "AMD EPYC-Rome Processor", - .cache_info = &epyc_rome_cache_info, -- .use_epyc_apic_id_encoding = 1, - }, - }; - --- -2.27.0 - diff --git a/kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch b/kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch deleted file mode 100644 index 932bc30..0000000 --- a/kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch +++ /dev/null @@ -1,51 +0,0 @@ -From ba82420d04b2e2ca69d5ff4720e37dd0748936ea Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 16 Sep 2020 11:40:25 -0400 -Subject: [PATCH 4/4] block/rbd: add 'namespace' to - qemu_rbd_strong_runtime_opts[] - -RH-Author: Stefano Garzarella -Message-id: <20200916114025.47973-2-sgarzare@redhat.com> -Patchwork-id: 98399 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] block/rbd: add 'namespace' to qemu_rbd_strong_runtime_opts[] -Bugzilla: 1821528 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -Commit 19ae9ae014 ("block/rbd: Add support for ceph namespaces") -introduced namespace support for RBD, but we forgot to add the -new 'namespace' options to qemu_rbd_strong_runtime_opts[]. - -The 'namespace' is used to identify the image, so it is a strong -option since it can changes the data of a BDS. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1821528 -Fixes: 19ae9ae014 ("block/rbd: Add support for ceph namespaces") -Cc: Florian Florensa -Signed-off-by: Stefano Garzarella -Message-Id: <20200914190553.74871-1-sgarzare@redhat.com> -Reviewed-by: Jason Dillaman -Signed-off-by: Max Reitz -(cherry picked from commit 7bae7c805d82675eb3a02c744093703d84ada2d6) -Signed-off-by: Stefano Garzarella -Signed-off-by: Danilo C. L. de Paula ---- - block/rbd.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block/rbd.c b/block/rbd.c -index 688074c64b..5356753fbe 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -1289,6 +1289,7 @@ static QemuOptsList qemu_rbd_create_opts = { - - static const char *const qemu_rbd_strong_runtime_opts[] = { - "pool", -+ "namespace", - "image", - "conf", - "snapshot", --- -2.27.0 - diff --git a/kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch b/kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch deleted file mode 100644 index e1ea970..0000000 --- a/kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch +++ /dev/null @@ -1,78 +0,0 @@ -From d323d7648a64e213d099d7ee3c66edc186b97808 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Fri, 18 Sep 2020 19:35:42 -0400 -Subject: [PATCH] hw/nvram/fw_cfg: fix FWCfgDataGeneratorClass::get_data() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Philippe Mathieu-Daudé -Message-id: <20200918193542.191031-2-philmd@redhat.com> -Patchwork-id: 98402 -O-Subject: [PATCH 1/1] hw/nvram/fw_cfg: fix FWCfgDataGeneratorClass::get_data() consumption -Bugzilla: 1688978 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Danilo de Paula - -From: Laszlo Ersek - -The documentation on g_byte_array_free() - -says: - -> Returns -> -> the element data if free_segment is FALSE, otherwise NULL. The element -> data should be freed using g_free(). - -Because we currently call g_byte_array_free() with free_segment=TRUE, we -end up passing data=NULL to fw_cfg_add_file(). - -On the plus side, fw_cfg_data_read() and fw_cfg_dma_transfer() both deal -with NULL data gracefully: QEMU does not crash when the guest reads such -an item, the guest just gets a properly sized, but zero-filled blob. - -However, the bug breaks UEFI HTTPS boot, as the IANA_TLS_CIPHER array, -generated otherwise correctly by the "tls-cipher-suites" object, is in -effect replaced with a zero blob. - -Fix the issue by passing free_segment=FALSE to g_byte_array_free(): - -- the caller (fw_cfg_add_from_generator()) temporarily assumes ownership - of the generated byte array, - -- then ownership of the byte array is transfered to fw_cfg, as - fw_cfg_add_file() links (not copies) "data" into fw_cfg. - -Cc: "Daniel P. Berrangé" -Cc: "Philippe Mathieu-Daudé" -Cc: Gerd Hoffmann -Fixes: 3203148917d035b09f71986ac2eaa19a352d6d9d -Signed-off-by: Laszlo Ersek -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Daniel P. Berrangé -Message-Id: <20200916151510.22767-1-lersek@redhat.com> -Signed-off-by: Philippe Mathieu-Daudé -(cherry picked from commit 4318432ccd3f7fb69b7169f39dcae3d4ee04f5ea) -Signed-off-by: Danilo C. L. de Paula ---- - hw/nvram/fw_cfg.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c -index f3a4728288e..0e95d057fd5 100644 ---- a/hw/nvram/fw_cfg.c -+++ b/hw/nvram/fw_cfg.c -@@ -1056,7 +1056,7 @@ bool fw_cfg_add_from_generator(FWCfgState *s, const char *filename, - return false; - } - size = array->len; -- fw_cfg_add_file(s, filename, g_byte_array_free(array, TRUE), size); -+ fw_cfg_add_file(s, filename, g_byte_array_free(array, FALSE), size); - - return true; - } --- -2.27.0 - diff --git a/kvm-iotests-Test-node-bitmap-aliases-during-migration.patch b/kvm-iotests-Test-node-bitmap-aliases-during-migration.patch deleted file mode 100644 index 98c3433..0000000 --- a/kvm-iotests-Test-node-bitmap-aliases-during-migration.patch +++ /dev/null @@ -1,655 +0,0 @@ -From 2877fd4f92a86f43a113691f56738b09a0b4d500 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 24 Aug 2020 09:20:38 -0400 -Subject: [PATCH 6/6] iotests: Test node/bitmap aliases during migration - -RH-Author: Max Reitz -Message-id: <20200824092038.227913-4-mreitz@redhat.com> -Patchwork-id: 98214 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/3] iotests: Test node/bitmap aliases during migration -Bugzilla: 1790492 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Stefan Hajnoczi - -Signed-off-by: Max Reitz -Message-Id: <20200820150725.68687-4-mreitz@redhat.com> -Reviewed-by: Eric Blake -Tested-by: Eric Blake -[eblake: fold in python cleanups recommended by Vladimir] -Signed-off-by: Eric Blake -(cherry picked from commit cb5c6cd2dc984812f560fbe41f57a6bfc34d8708) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/300 | 593 +++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/300.out | 5 + - tests/qemu-iotests/group | 1 + - 3 files changed, 599 insertions(+) - create mode 100755 tests/qemu-iotests/300 - create mode 100644 tests/qemu-iotests/300.out - -diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300 -new file mode 100755 -index 0000000000..5b75121b84 ---- /dev/null -+++ b/tests/qemu-iotests/300 -@@ -0,0 +1,593 @@ -+#!/usr/bin/env python3 -+# -+# Copyright (C) 2020 Red Hat, Inc. -+# -+# Tests for dirty bitmaps migration with node aliases -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+import os -+import random -+import re -+from typing import Dict, List, Optional, Union -+import iotests -+import qemu -+ -+BlockBitmapMapping = List[Dict[str, Union[str, List[Dict[str, str]]]]] -+ -+assert iotests.sock_dir is not None -+mig_sock = os.path.join(iotests.sock_dir, 'mig_sock') -+ -+ -+class TestDirtyBitmapMigration(iotests.QMPTestCase): -+ src_node_name: str = '' -+ dst_node_name: str = '' -+ src_bmap_name: str = '' -+ dst_bmap_name: str = '' -+ -+ def setUp(self) -> None: -+ self.vm_a = iotests.VM(path_suffix='-a') -+ self.vm_a.add_blockdev(f'node-name={self.src_node_name},' -+ 'driver=null-co') -+ self.vm_a.launch() -+ -+ self.vm_b = iotests.VM(path_suffix='-b') -+ self.vm_b.add_blockdev(f'node-name={self.dst_node_name},' -+ 'driver=null-co') -+ self.vm_b.add_incoming(f'unix:{mig_sock}') -+ self.vm_b.launch() -+ -+ result = self.vm_a.qmp('block-dirty-bitmap-add', -+ node=self.src_node_name, -+ name=self.src_bmap_name) -+ self.assert_qmp(result, 'return', {}) -+ -+ # Dirty some random megabytes -+ for _ in range(9): -+ mb_ofs = random.randrange(1024) -+ self.vm_a.hmp_qemu_io(self.src_node_name, f'discard {mb_ofs}M 1M') -+ -+ result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256', -+ node=self.src_node_name, -+ name=self.src_bmap_name) -+ self.bitmap_hash_reference = result['return']['sha256'] -+ -+ caps = [{'capability': name, 'state': True} -+ for name in ('dirty-bitmaps', 'events')] -+ -+ for vm in (self.vm_a, self.vm_b): -+ result = vm.qmp('migrate-set-capabilities', capabilities=caps) -+ self.assert_qmp(result, 'return', {}) -+ -+ def tearDown(self) -> None: -+ self.vm_a.shutdown() -+ self.vm_b.shutdown() -+ try: -+ os.remove(mig_sock) -+ except OSError: -+ pass -+ -+ def check_bitmap(self, bitmap_name_valid: bool) -> None: -+ result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256', -+ node=self.dst_node_name, -+ name=self.dst_bmap_name) -+ if bitmap_name_valid: -+ self.assert_qmp(result, 'return/sha256', -+ self.bitmap_hash_reference) -+ else: -+ self.assert_qmp(result, 'error/desc', -+ f"Dirty bitmap '{self.dst_bmap_name}' not found") -+ -+ def migrate(self, bitmap_name_valid: bool = True, -+ migration_success: bool = True) -> None: -+ result = self.vm_a.qmp('migrate', uri=f'unix:{mig_sock}') -+ self.assert_qmp(result, 'return', {}) -+ -+ with iotests.Timeout(5, 'Timeout waiting for migration to complete'): -+ self.assertEqual(self.vm_a.wait_migration('postmigrate'), -+ migration_success) -+ self.assertEqual(self.vm_b.wait_migration('running'), -+ migration_success) -+ -+ if migration_success: -+ self.check_bitmap(bitmap_name_valid) -+ -+ def verify_dest_error(self, msg: Optional[str]) -> None: -+ """ -+ Check whether the given error message is present in vm_b's log. -+ (vm_b is shut down to do so.) -+ If @msg is None, check that there has not been any error. -+ """ -+ self.vm_b.shutdown() -+ if msg is None: -+ self.assertNotIn('qemu-system-', self.vm_b.get_log()) -+ else: -+ self.assertIn(msg, self.vm_b.get_log()) -+ -+ @staticmethod -+ def mapping(node_name: str, node_alias: str, -+ bitmap_name: str, bitmap_alias: str) -> BlockBitmapMapping: -+ return [{ -+ 'node-name': node_name, -+ 'alias': node_alias, -+ 'bitmaps': [{ -+ 'name': bitmap_name, -+ 'alias': bitmap_alias -+ }] -+ }] -+ -+ def set_mapping(self, vm: iotests.VM, mapping: BlockBitmapMapping, -+ error: Optional[str] = None) -> None: -+ """ -+ Invoke migrate-set-parameters on @vm to set the given @mapping. -+ Check for success if @error is None, or verify the error message -+ if it is not. -+ On success, verify that "info migrate_parameters" on HMP returns -+ our mapping. (Just to check its formatting code.) -+ """ -+ result = vm.qmp('migrate-set-parameters', -+ block_bitmap_mapping=mapping) -+ -+ if error is None: -+ self.assert_qmp(result, 'return', {}) -+ -+ result = vm.qmp('human-monitor-command', -+ command_line='info migrate_parameters') -+ -+ m = re.search(r'^block-bitmap-mapping:\r?(\n .*)*\n', -+ result['return'], flags=re.MULTILINE) -+ hmp_mapping = m.group(0).replace('\r', '') if m else None -+ -+ self.assertEqual(hmp_mapping, self.to_hmp_mapping(mapping)) -+ else: -+ self.assert_qmp(result, 'error/desc', error) -+ -+ @staticmethod -+ def to_hmp_mapping(mapping: BlockBitmapMapping) -> str: -+ result = 'block-bitmap-mapping:\n' -+ -+ for node in mapping: -+ result += f" '{node['node-name']}' -> '{node['alias']}'\n" -+ -+ assert isinstance(node['bitmaps'], list) -+ for bitmap in node['bitmaps']: -+ result += f" '{bitmap['name']}' -> '{bitmap['alias']}'\n" -+ -+ return result -+ -+ -+class TestAliasMigration(TestDirtyBitmapMigration): -+ src_node_name = 'node0' -+ dst_node_name = 'node0' -+ src_bmap_name = 'bmap0' -+ dst_bmap_name = 'bmap0' -+ -+ def test_migration_without_alias(self) -> None: -+ self.migrate(self.src_node_name == self.dst_node_name and -+ self.src_bmap_name == self.dst_bmap_name) -+ -+ # Check for error message on the destination -+ if self.src_node_name != self.dst_node_name: -+ self.verify_dest_error(f"Cannot find " -+ f"device={self.src_node_name} nor " -+ f"node_name={self.src_node_name}") -+ else: -+ self.verify_dest_error(None) -+ -+ def test_alias_on_src_migration(self) -> None: -+ mapping = self.mapping(self.src_node_name, self.dst_node_name, -+ self.src_bmap_name, self.dst_bmap_name) -+ -+ self.set_mapping(self.vm_a, mapping) -+ self.migrate() -+ self.verify_dest_error(None) -+ -+ def test_alias_on_dst_migration(self) -> None: -+ mapping = self.mapping(self.dst_node_name, self.src_node_name, -+ self.dst_bmap_name, self.src_bmap_name) -+ -+ self.set_mapping(self.vm_b, mapping) -+ self.migrate() -+ self.verify_dest_error(None) -+ -+ def test_alias_on_both_migration(self) -> None: -+ src_map = self.mapping(self.src_node_name, 'node-alias', -+ self.src_bmap_name, 'bmap-alias') -+ -+ dst_map = self.mapping(self.dst_node_name, 'node-alias', -+ self.dst_bmap_name, 'bmap-alias') -+ -+ self.set_mapping(self.vm_a, src_map) -+ self.set_mapping(self.vm_b, dst_map) -+ self.migrate() -+ self.verify_dest_error(None) -+ -+ -+class TestNodeAliasMigration(TestAliasMigration): -+ src_node_name = 'node-src' -+ dst_node_name = 'node-dst' -+ -+ -+class TestBitmapAliasMigration(TestAliasMigration): -+ src_bmap_name = 'bmap-src' -+ dst_bmap_name = 'bmap-dst' -+ -+ -+class TestFullAliasMigration(TestAliasMigration): -+ src_node_name = 'node-src' -+ dst_node_name = 'node-dst' -+ src_bmap_name = 'bmap-src' -+ dst_bmap_name = 'bmap-dst' -+ -+ -+class TestLongBitmapNames(TestAliasMigration): -+ # Giving long bitmap names is OK, as long as there is a short alias for -+ # migration -+ src_bmap_name = 'a' * 512 -+ dst_bmap_name = 'b' * 512 -+ -+ # Skip all tests that do not use the intermediate alias -+ def test_migration_without_alias(self) -> None: -+ pass -+ -+ def test_alias_on_src_migration(self) -> None: -+ pass -+ -+ def test_alias_on_dst_migration(self) -> None: -+ pass -+ -+ -+class TestBlockBitmapMappingErrors(TestDirtyBitmapMigration): -+ src_node_name = 'node0' -+ dst_node_name = 'node0' -+ src_bmap_name = 'bmap0' -+ dst_bmap_name = 'bmap0' -+ -+ """ -+ Note that mapping nodes or bitmaps that do not exist is not an error. -+ """ -+ -+ def test_non_injective_node_mapping(self) -> None: -+ mapping: BlockBitmapMapping = [ -+ { -+ 'node-name': 'node0', -+ 'alias': 'common-alias', -+ 'bitmaps': [{ -+ 'name': 'bmap0', -+ 'alias': 'bmap-alias0' -+ }] -+ }, -+ { -+ 'node-name': 'node1', -+ 'alias': 'common-alias', -+ 'bitmaps': [{ -+ 'name': 'bmap1', -+ 'alias': 'bmap-alias1' -+ }] -+ } -+ ] -+ -+ self.set_mapping(self.vm_a, mapping, -+ "Invalid mapping given for block-bitmap-mapping: " -+ "The node alias 'common-alias' is used twice") -+ -+ def test_non_injective_bitmap_mapping(self) -> None: -+ mapping: BlockBitmapMapping = [{ -+ 'node-name': 'node0', -+ 'alias': 'node-alias0', -+ 'bitmaps': [ -+ { -+ 'name': 'bmap0', -+ 'alias': 'common-alias' -+ }, -+ { -+ 'name': 'bmap1', -+ 'alias': 'common-alias' -+ } -+ ] -+ }] -+ -+ self.set_mapping(self.vm_a, mapping, -+ "Invalid mapping given for block-bitmap-mapping: " -+ "The bitmap alias 'node-alias0'/'common-alias' is " -+ "used twice") -+ -+ def test_ambiguous_node_mapping(self) -> None: -+ mapping: BlockBitmapMapping = [ -+ { -+ 'node-name': 'node0', -+ 'alias': 'node-alias0', -+ 'bitmaps': [{ -+ 'name': 'bmap0', -+ 'alias': 'bmap-alias0' -+ }] -+ }, -+ { -+ 'node-name': 'node0', -+ 'alias': 'node-alias1', -+ 'bitmaps': [{ -+ 'name': 'bmap0', -+ 'alias': 'bmap-alias0' -+ }] -+ } -+ ] -+ -+ self.set_mapping(self.vm_a, mapping, -+ "Invalid mapping given for block-bitmap-mapping: " -+ "The node name 'node0' is mapped twice") -+ -+ def test_ambiguous_bitmap_mapping(self) -> None: -+ mapping: BlockBitmapMapping = [{ -+ 'node-name': 'node0', -+ 'alias': 'node-alias0', -+ 'bitmaps': [ -+ { -+ 'name': 'bmap0', -+ 'alias': 'bmap-alias0' -+ }, -+ { -+ 'name': 'bmap0', -+ 'alias': 'bmap-alias1' -+ } -+ ] -+ }] -+ -+ self.set_mapping(self.vm_a, mapping, -+ "Invalid mapping given for block-bitmap-mapping: " -+ "The bitmap 'node0'/'bmap0' is mapped twice") -+ -+ def test_migratee_node_is_not_mapped_on_src(self) -> None: -+ self.set_mapping(self.vm_a, []) -+ # Should just ignore all bitmaps on unmapped nodes -+ self.migrate(False) -+ self.verify_dest_error(None) -+ -+ def test_migratee_node_is_not_mapped_on_dst(self) -> None: -+ self.set_mapping(self.vm_b, []) -+ self.migrate(False) -+ self.verify_dest_error(f"Unknown node alias '{self.src_node_name}'") -+ -+ def test_migratee_bitmap_is_not_mapped_on_src(self) -> None: -+ mapping: BlockBitmapMapping = [{ -+ 'node-name': self.src_node_name, -+ 'alias': self.dst_node_name, -+ 'bitmaps': [] -+ }] -+ -+ self.set_mapping(self.vm_a, mapping) -+ # Should just ignore all unmapped bitmaps -+ self.migrate(False) -+ self.verify_dest_error(None) -+ -+ def test_migratee_bitmap_is_not_mapped_on_dst(self) -> None: -+ mapping: BlockBitmapMapping = [{ -+ 'node-name': self.dst_node_name, -+ 'alias': self.src_node_name, -+ 'bitmaps': [] -+ }] -+ -+ self.set_mapping(self.vm_b, mapping) -+ self.migrate(False) -+ self.verify_dest_error(f"Unknown bitmap alias " -+ f"'{self.src_bmap_name}' " -+ f"on node '{self.dst_node_name}' " -+ f"(alias '{self.src_node_name}')") -+ -+ def test_unused_mapping_on_dst(self) -> None: -+ # Let the source not send any bitmaps -+ self.set_mapping(self.vm_a, []) -+ -+ # Establish some mapping on the destination -+ self.set_mapping(self.vm_b, []) -+ -+ # The fact that there is a mapping on B without any bitmaps -+ # being received should be fine, not fatal -+ self.migrate(False) -+ self.verify_dest_error(None) -+ -+ def test_non_wellformed_node_alias(self) -> None: -+ alias = '123-foo' -+ -+ mapping: BlockBitmapMapping = [{ -+ 'node-name': self.src_node_name, -+ 'alias': alias, -+ 'bitmaps': [] -+ }] -+ -+ self.set_mapping(self.vm_a, mapping, -+ f"Invalid mapping given for block-bitmap-mapping: " -+ f"The node alias '{alias}' is not well-formed") -+ -+ def test_node_alias_too_long(self) -> None: -+ alias = 'a' * 256 -+ -+ mapping: BlockBitmapMapping = [{ -+ 'node-name': self.src_node_name, -+ 'alias': alias, -+ 'bitmaps': [] -+ }] -+ -+ self.set_mapping(self.vm_a, mapping, -+ f"Invalid mapping given for block-bitmap-mapping: " -+ f"The node alias '{alias}' is longer than 255 bytes") -+ -+ def test_bitmap_alias_too_long(self) -> None: -+ alias = 'a' * 256 -+ -+ mapping = self.mapping(self.src_node_name, self.dst_node_name, -+ self.src_bmap_name, alias) -+ -+ self.set_mapping(self.vm_a, mapping, -+ f"Invalid mapping given for block-bitmap-mapping: " -+ f"The bitmap alias '{alias}' is longer than 255 " -+ f"bytes") -+ -+ def test_bitmap_name_too_long(self) -> None: -+ name = 'a' * 256 -+ -+ result = self.vm_a.qmp('block-dirty-bitmap-add', -+ node=self.src_node_name, -+ name=name) -+ self.assert_qmp(result, 'return', {}) -+ -+ self.migrate(False, False) -+ -+ # Check for the error in the source's log -+ self.vm_a.shutdown() -+ self.assertIn(f"Cannot migrate bitmap '{name}' on node " -+ f"'{self.src_node_name}': Name is longer than 255 bytes", -+ self.vm_a.get_log()) -+ -+ # Expect abnormal shutdown of the destination VM because of -+ # the failed migration -+ try: -+ self.vm_b.shutdown() -+ except qemu.machine.AbnormalShutdown: -+ pass -+ -+ def test_aliased_bitmap_name_too_long(self) -> None: -+ # Longer than the maximum for bitmap names -+ self.dst_bmap_name = 'a' * 1024 -+ -+ mapping = self.mapping(self.dst_node_name, self.src_node_name, -+ self.dst_bmap_name, self.src_bmap_name) -+ -+ # We would have to create this bitmap during migration, and -+ # that would fail, because the name is too long. Better to -+ # catch it early. -+ self.set_mapping(self.vm_b, mapping, -+ f"Invalid mapping given for block-bitmap-mapping: " -+ f"The bitmap name '{self.dst_bmap_name}' is longer " -+ f"than 1023 bytes") -+ -+ def test_node_name_too_long(self) -> None: -+ # Longer than the maximum for node names -+ self.dst_node_name = 'a' * 32 -+ -+ mapping = self.mapping(self.dst_node_name, self.src_node_name, -+ self.dst_bmap_name, self.src_bmap_name) -+ -+ # During migration, this would appear simply as a node that -+ # cannot be found. Still better to catch impossible node -+ # names early (similar to test_non_wellformed_node_alias). -+ self.set_mapping(self.vm_b, mapping, -+ f"Invalid mapping given for block-bitmap-mapping: " -+ f"The node name '{self.dst_node_name}' is longer " -+ f"than 31 bytes") -+ -+ -+class TestCrossAliasMigration(TestDirtyBitmapMigration): -+ """ -+ Swap aliases, both to see that qemu does not get confused, and -+ that we can migrate multiple things at once. -+ -+ So we migrate this: -+ node-a.bmap-a -> node-b.bmap-b -+ node-a.bmap-b -> node-b.bmap-a -+ node-b.bmap-a -> node-a.bmap-b -+ node-b.bmap-b -> node-a.bmap-a -+ """ -+ -+ src_node_name = 'node-a' -+ dst_node_name = 'node-b' -+ src_bmap_name = 'bmap-a' -+ dst_bmap_name = 'bmap-b' -+ -+ def setUp(self) -> None: -+ TestDirtyBitmapMigration.setUp(self) -+ -+ # Now create another block device and let both have two bitmaps each -+ result = self.vm_a.qmp('blockdev-add', -+ node_name='node-b', driver='null-co') -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm_b.qmp('blockdev-add', -+ node_name='node-a', driver='null-co') -+ self.assert_qmp(result, 'return', {}) -+ -+ bmaps_to_add = (('node-a', 'bmap-b'), -+ ('node-b', 'bmap-a'), -+ ('node-b', 'bmap-b')) -+ -+ for (node, bmap) in bmaps_to_add: -+ result = self.vm_a.qmp('block-dirty-bitmap-add', -+ node=node, name=bmap) -+ self.assert_qmp(result, 'return', {}) -+ -+ @staticmethod -+ def cross_mapping() -> BlockBitmapMapping: -+ return [ -+ { -+ 'node-name': 'node-a', -+ 'alias': 'node-b', -+ 'bitmaps': [ -+ { -+ 'name': 'bmap-a', -+ 'alias': 'bmap-b' -+ }, -+ { -+ 'name': 'bmap-b', -+ 'alias': 'bmap-a' -+ } -+ ] -+ }, -+ { -+ 'node-name': 'node-b', -+ 'alias': 'node-a', -+ 'bitmaps': [ -+ { -+ 'name': 'bmap-b', -+ 'alias': 'bmap-a' -+ }, -+ { -+ 'name': 'bmap-a', -+ 'alias': 'bmap-b' -+ } -+ ] -+ } -+ ] -+ -+ def verify_dest_has_all_bitmaps(self) -> None: -+ bitmaps = self.vm_b.query_bitmaps() -+ -+ # Extract and sort bitmap names -+ for node in bitmaps: -+ bitmaps[node] = sorted((bmap['name'] for bmap in bitmaps[node])) -+ -+ self.assertEqual(bitmaps, -+ {'node-a': ['bmap-a', 'bmap-b'], -+ 'node-b': ['bmap-a', 'bmap-b']}) -+ -+ def test_alias_on_src(self) -> None: -+ self.set_mapping(self.vm_a, self.cross_mapping()) -+ -+ # Checks that node-a.bmap-a was migrated to node-b.bmap-b, and -+ # that is enough -+ self.migrate() -+ self.verify_dest_has_all_bitmaps() -+ self.verify_dest_error(None) -+ -+ def test_alias_on_dst(self) -> None: -+ self.set_mapping(self.vm_b, self.cross_mapping()) -+ -+ # Checks that node-a.bmap-a was migrated to node-b.bmap-b, and -+ # that is enough -+ self.migrate() -+ self.verify_dest_has_all_bitmaps() -+ self.verify_dest_error(None) -+ -+ -+if __name__ == '__main__': -+ iotests.main(supported_protocols=['file']) -diff --git a/tests/qemu-iotests/300.out b/tests/qemu-iotests/300.out -new file mode 100644 -index 0000000000..cafb8161f7 ---- /dev/null -+++ b/tests/qemu-iotests/300.out -@@ -0,0 +1,5 @@ -+..................................... -+---------------------------------------------------------------------- -+Ran 37 tests -+ -+OK -diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group -index 025ed5238d..b0b55e241c 100644 ---- a/tests/qemu-iotests/group -+++ b/tests/qemu-iotests/group -@@ -307,5 +307,6 @@ - 296 rw - 297 meta - 299 auto quick -+300 migration - 301 backing quick - 302 quick --- -2.27.0 - diff --git a/kvm-iotests.py-Let-wait_migration-return-on-failure.patch b/kvm-iotests.py-Let-wait_migration-return-on-failure.patch deleted file mode 100644 index 452d080..0000000 --- a/kvm-iotests.py-Let-wait_migration-return-on-failure.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 2a597bba9b1e07adb6531628962682a0e53d29b1 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 24 Aug 2020 09:20:37 -0400 -Subject: [PATCH 5/6] iotests.py: Let wait_migration() return on failure - -RH-Author: Max Reitz -Message-id: <20200824092038.227913-3-mreitz@redhat.com> -Patchwork-id: 98213 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/3] iotests.py: Let wait_migration() return on failure -Bugzilla: 1790492 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Stefan Hajnoczi - -Let wait_migration() return on failure (with the return value indicating -whether the migration was completed or has failed), so we can use it for -migrations that are expected to fail, too. - -Signed-off-by: Max Reitz -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20200820150725.68687-3-mreitz@redhat.com> -Signed-off-by: Eric Blake -(cherry picked from commit 4bf63c80357031be4eb8fff8a751f40e73ef1c10) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/iotests.py | 18 ++++++++++++------ - 1 file changed, 12 insertions(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 717b5b652c..e197c73ca5 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -729,16 +729,22 @@ class VM(qtest.QEMUQtestMachine): - } - ])) - -- def wait_migration(self, expect_runstate): -+ def wait_migration(self, expect_runstate: Optional[str]) -> bool: - while True: - event = self.event_wait('MIGRATION') - log(event, filters=[filter_qmp_event]) -- if event['data']['status'] == 'completed': -+ if event['data']['status'] in ('completed', 'failed'): - break -- # The event may occur in finish-migrate, so wait for the expected -- # post-migration runstate -- while self.qmp('query-status')['return']['status'] != expect_runstate: -- pass -+ -+ if event['data']['status'] == 'completed': -+ # The event may occur in finish-migrate, so wait for the expected -+ # post-migration runstate -+ runstate = None -+ while runstate != expect_runstate: -+ runstate = self.qmp('query-status')['return']['status'] -+ return True -+ else: -+ return False - - def node_info(self, node_name): - nodes = self.qmp('query-named-block-nodes') --- -2.27.0 - diff --git a/kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch b/kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch deleted file mode 100644 index 7816d07..0000000 --- a/kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 6d7ba662e980fcc6f3056173043136063e6d68db Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 20 Aug 2020 15:14:18 -0400 -Subject: [PATCH 2/6] machine types/numa: set numa_mem_supported on old machine - types - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200820151419.14723-2-dgilbert@redhat.com> -Patchwork-id: 98197 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 1/2] machine types/numa: set numa_mem_supported on old machine types -Bugzilla: 1849707 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: David Hildenbrand -RH-Acked-by: Igor Mammedov - -From: "Dr. David Alan Gilbert" - -Reenable the -numa mem= syntax for old machine types, this is making -the downstream old machines behave in the same way as the upstream old -machines changed in upstream 32a354dc6c07d7. - -Power already seems to have the change. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 2 +- - hw/i386/pc_piix.c | 1 + - hw/i386/pc_q35.c | 1 + - 3 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index f087483a04..26a7920081 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2755,7 +2755,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - hc->plug = virt_machine_device_plug_cb; - hc->unplug_request = virt_machine_device_unplug_request_cb; - hc->unplug = virt_machine_device_unplug_cb; -- mc->numa_mem_supported = true; - mc->nvdimm_supported = true; - mc->auto_enable_numa_with_memhp = true; - mc->default_ram_id = "mach-virt.ram"; -@@ -2860,5 +2859,6 @@ static void rhel820_virt_options(MachineClass *mc) - rhel830_virt_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_2, - hw_compat_rhel_8_2_len); -+ mc->numa_mem_supported = true; - } - DEFINE_RHEL_MACHINE(8, 2, 0) -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 4af4497a0c..bda2d9ffc8 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1009,6 +1009,7 @@ static void pc_machine_rhel7_options(MachineClass *m) - pcmc->default_nic_model = "e1000"; - m->default_display = "std"; - m->no_parallel = 1; -+ m->numa_mem_supported = true; - machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - m->alias = "pc"; -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index c709460ab7..d1e3a9b575 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -617,6 +617,7 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) - pc_q35_machine_rhel_options(m); - m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; - m->alias = NULL; -+ m->numa_mem_supported = true; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.2.0"; - compat_props_add(m->compat_props, hw_compat_rhel_8_2, --- -2.27.0 - diff --git a/kvm-machine_types-numa-compatibility-for-auto_enable_num.patch b/kvm-machine_types-numa-compatibility-for-auto_enable_num.patch deleted file mode 100644 index 6296a75..0000000 --- a/kvm-machine_types-numa-compatibility-for-auto_enable_num.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 25c5644164e3286dc722d59c8d7876b1c49c1385 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 20 Aug 2020 15:14:19 -0400 -Subject: [PATCH 3/6] machine_types/numa: compatibility for - auto_enable_numa_with_memdev - -RH-Author: Dr. David Alan Gilbert -Message-id: <20200820151419.14723-3-dgilbert@redhat.com> -Patchwork-id: 98196 -O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 2/2] machine_types/numa: compatibility for auto_enable_numa_with_memdev -Bugzilla: 1849707 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Igor Mammedov -RH-Acked-by: David Hildenbrand - -From: "Dr. David Alan Gilbert" - -The auto_enable_numa_with_memdev flag automatically creates NUMA a -NUMA node in a case like: - - -m 8G,maxmem=16G - -but we need it to keep old machine types the same. -This is (mostly) done for upstream machine types in 195784a0cfad. - -Power seems to have auto_enable_numa permenantly on anyway. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 2 ++ - hw/i386/pc_piix.c | 1 + - hw/i386/pc_q35.c | 1 + - 3 files changed, 4 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 26a7920081..26102f22ff 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2757,6 +2757,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - hc->unplug = virt_machine_device_unplug_cb; - mc->nvdimm_supported = true; - mc->auto_enable_numa_with_memhp = true; -+ mc->auto_enable_numa_with_memdev = true; - mc->default_ram_id = "mach-virt.ram"; - - object_class_property_add(oc, "acpi", "OnOffAuto", -@@ -2860,5 +2861,6 @@ static void rhel820_virt_options(MachineClass *mc) - compat_props_add(mc->compat_props, hw_compat_rhel_8_2, - hw_compat_rhel_8_2_len); - mc->numa_mem_supported = true; -+ mc->auto_enable_numa_with_memdev = false; - } - DEFINE_RHEL_MACHINE(8, 2, 0) -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index bda2d9ffc8..2415c5edd6 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1010,6 +1010,7 @@ static void pc_machine_rhel7_options(MachineClass *m) - m->default_display = "std"; - m->no_parallel = 1; - m->numa_mem_supported = true; -+ m->auto_enable_numa_with_memdev = false; - machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - m->alias = "pc"; -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index d1e3a9b575..87a0572ec1 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -618,6 +618,7 @@ static void pc_q35_machine_rhel820_options(MachineClass *m) - m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; - m->alias = NULL; - m->numa_mem_supported = true; -+ m->auto_enable_numa_with_memdev = false; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.2.0"; - compat_props_add(m->compat_props, hw_compat_rhel_8_2, --- -2.27.0 - diff --git a/kvm-migration-Add-block-bitmap-mapping-parameter.patch b/kvm-migration-Add-block-bitmap-mapping-parameter.patch deleted file mode 100644 index 1944c27..0000000 --- a/kvm-migration-Add-block-bitmap-mapping-parameter.patch +++ /dev/null @@ -1,947 +0,0 @@ -From 8ac15801169cb8744b57b939a3c751ea9d381d98 Mon Sep 17 00:00:00 2001 -From: Max Reitz -Date: Mon, 24 Aug 2020 09:20:36 -0400 -Subject: [PATCH 4/6] migration: Add block-bitmap-mapping parameter - -RH-Author: Max Reitz -Message-id: <20200824092038.227913-2-mreitz@redhat.com> -Patchwork-id: 98211 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/3] migration: Add block-bitmap-mapping parameter -Bugzilla: 1790492 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Stefan Hajnoczi - -This migration parameter allows mapping block node names and bitmap -names to aliases for the purpose of block dirty bitmap migration. - -This way, management tools can use different node and bitmap names on -the source and destination and pass the mapping of how bitmaps are to be -transferred to qemu (on the source, the destination, or even both with -arbitrary aliases in the migration stream). - -While touching this code, fix a bug where bitmap names longer than 255 -bytes would fail an assertion in qemu_put_counted_string(). - -Suggested-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Max Reitz -Message-Id: <20200820150725.68687-2-mreitz@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Eric Blake -(cherry picked from commit 31e4c354b38cd42a051ad030eb7779d5e7ee32fe) -Signed-off-by: Max Reitz -Signed-off-by: Danilo C. L. de Paula ---- - migration/block-dirty-bitmap.c | 412 ++++++++++++++++++++++++++++----- - migration/migration.c | 30 +++ - migration/migration.h | 3 + - monitor/hmp-cmds.c | 30 +++ - qapi/migration.json | 104 ++++++++- - 5 files changed, 522 insertions(+), 57 deletions(-) - -diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c -index 784330ebe1..549e14daba 100644 ---- a/migration/block-dirty-bitmap.c -+++ b/migration/block-dirty-bitmap.c -@@ -29,10 +29,10 @@ - * - * # Header (shared for different chunk types) - * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags) -- * [ 1 byte: node name size ] \ flags & DEVICE_NAME -- * [ n bytes: node name ] / -- * [ 1 byte: bitmap name size ] \ flags & BITMAP_NAME -- * [ n bytes: bitmap name ] / -+ * [ 1 byte: node alias size ] \ flags & DEVICE_NAME -+ * [ n bytes: node alias ] / -+ * [ 1 byte: bitmap alias size ] \ flags & BITMAP_NAME -+ * [ n bytes: bitmap alias ] / - * - * # Start of bitmap migration (flags & START) - * header -@@ -72,7 +72,9 @@ - #include "migration/register.h" - #include "qemu/hbitmap.h" - #include "qemu/cutils.h" -+#include "qemu/id.h" - #include "qapi/error.h" -+#include "qapi/qapi-commands-migration.h" - #include "trace.h" - - #define CHUNK_SIZE (1 << 10) -@@ -104,7 +106,8 @@ - typedef struct SaveBitmapState { - /* Written during setup phase. */ - BlockDriverState *bs; -- const char *node_name; -+ char *node_alias; -+ char *bitmap_alias; - BdrvDirtyBitmap *bitmap; - uint64_t total_sectors; - uint64_t sectors_per_chunk; -@@ -138,8 +141,9 @@ typedef struct LoadBitmapState { - /* State of the dirty bitmap migration (DBM) during load process */ - typedef struct DBMLoadState { - uint32_t flags; -- char node_name[256]; -- char bitmap_name[256]; -+ char node_alias[256]; -+ char bitmap_alias[256]; -+ char bitmap_name[BDRV_BITMAP_MAX_NAME_SIZE + 1]; - BlockDriverState *bs; - BdrvDirtyBitmap *bitmap; - -@@ -165,6 +169,188 @@ typedef struct DBMState { - - static DBMState dbm_state; - -+/* For hash tables that map node/bitmap names to aliases */ -+typedef struct AliasMapInnerNode { -+ char *string; -+ GHashTable *subtree; -+} AliasMapInnerNode; -+ -+static void free_alias_map_inner_node(void *amin_ptr) -+{ -+ AliasMapInnerNode *amin = amin_ptr; -+ -+ g_free(amin->string); -+ g_hash_table_unref(amin->subtree); -+ g_free(amin); -+} -+ -+/** -+ * Construct an alias map based on the given QMP structure. -+ * -+ * (Note that we cannot store such maps in the MigrationParameters -+ * object, because that struct is defined by the QAPI schema, which -+ * makes it basically impossible to have dicts with arbitrary keys. -+ * Therefore, we instead have to construct these maps when migration -+ * starts.) -+ * -+ * @bbm is the block_bitmap_mapping from the migration parameters. -+ * -+ * If @name_to_alias is true, the returned hash table will map node -+ * and bitmap names to their respective aliases (for outgoing -+ * migration). -+ * -+ * If @name_to_alias is false, the returned hash table will map node -+ * and bitmap aliases to their respective names (for incoming -+ * migration). -+ * -+ * The hash table maps node names/aliases to AliasMapInnerNode -+ * objects, whose .string is the respective node alias/name, and whose -+ * .subtree table maps bitmap names/aliases to the respective bitmap -+ * alias/name. -+ */ -+static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, -+ bool name_to_alias, -+ Error **errp) -+{ -+ GHashTable *alias_map; -+ size_t max_node_name_len = sizeof_field(BlockDriverState, node_name) - 1; -+ -+ alias_map = g_hash_table_new_full(g_str_hash, g_str_equal, -+ g_free, free_alias_map_inner_node); -+ -+ for (; bbm; bbm = bbm->next) { -+ const BitmapMigrationNodeAlias *bmna = bbm->value; -+ const BitmapMigrationBitmapAliasList *bmbal; -+ AliasMapInnerNode *amin; -+ GHashTable *bitmaps_map; -+ const char *node_map_from, *node_map_to; -+ -+ if (!id_wellformed(bmna->alias)) { -+ error_setg(errp, "The node alias '%s' is not well-formed", -+ bmna->alias); -+ goto fail; -+ } -+ -+ if (strlen(bmna->alias) > UINT8_MAX) { -+ error_setg(errp, "The node alias '%s' is longer than %u bytes", -+ bmna->alias, UINT8_MAX); -+ goto fail; -+ } -+ -+ if (strlen(bmna->node_name) > max_node_name_len) { -+ error_setg(errp, "The node name '%s' is longer than %zu bytes", -+ bmna->node_name, max_node_name_len); -+ goto fail; -+ } -+ -+ if (name_to_alias) { -+ if (g_hash_table_contains(alias_map, bmna->node_name)) { -+ error_setg(errp, "The node name '%s' is mapped twice", -+ bmna->node_name); -+ goto fail; -+ } -+ -+ node_map_from = bmna->node_name; -+ node_map_to = bmna->alias; -+ } else { -+ if (g_hash_table_contains(alias_map, bmna->alias)) { -+ error_setg(errp, "The node alias '%s' is used twice", -+ bmna->alias); -+ goto fail; -+ } -+ -+ node_map_from = bmna->alias; -+ node_map_to = bmna->node_name; -+ } -+ -+ bitmaps_map = g_hash_table_new_full(g_str_hash, g_str_equal, -+ g_free, g_free); -+ -+ amin = g_new(AliasMapInnerNode, 1); -+ *amin = (AliasMapInnerNode){ -+ .string = g_strdup(node_map_to), -+ .subtree = bitmaps_map, -+ }; -+ -+ g_hash_table_insert(alias_map, g_strdup(node_map_from), amin); -+ -+ for (bmbal = bmna->bitmaps; bmbal; bmbal = bmbal->next) { -+ const BitmapMigrationBitmapAlias *bmba = bmbal->value; -+ const char *bmap_map_from, *bmap_map_to; -+ -+ if (strlen(bmba->alias) > UINT8_MAX) { -+ error_setg(errp, -+ "The bitmap alias '%s' is longer than %u bytes", -+ bmba->alias, UINT8_MAX); -+ goto fail; -+ } -+ -+ if (strlen(bmba->name) > BDRV_BITMAP_MAX_NAME_SIZE) { -+ error_setg(errp, "The bitmap name '%s' is longer than %d bytes", -+ bmba->name, BDRV_BITMAP_MAX_NAME_SIZE); -+ goto fail; -+ } -+ -+ if (name_to_alias) { -+ bmap_map_from = bmba->name; -+ bmap_map_to = bmba->alias; -+ -+ if (g_hash_table_contains(bitmaps_map, bmba->name)) { -+ error_setg(errp, "The bitmap '%s'/'%s' is mapped twice", -+ bmna->node_name, bmba->name); -+ goto fail; -+ } -+ } else { -+ bmap_map_from = bmba->alias; -+ bmap_map_to = bmba->name; -+ -+ if (g_hash_table_contains(bitmaps_map, bmba->alias)) { -+ error_setg(errp, "The bitmap alias '%s'/'%s' is used twice", -+ bmna->alias, bmba->alias); -+ goto fail; -+ } -+ } -+ -+ g_hash_table_insert(bitmaps_map, -+ g_strdup(bmap_map_from), g_strdup(bmap_map_to)); -+ } -+ } -+ -+ return alias_map; -+ -+fail: -+ g_hash_table_destroy(alias_map); -+ return NULL; -+} -+ -+/** -+ * Run construct_alias_map() in both directions to check whether @bbm -+ * is valid. -+ * (This function is to be used by migration/migration.c to validate -+ * the user-specified block-bitmap-mapping migration parameter.) -+ * -+ * Returns true if and only if the mapping is valid. -+ */ -+bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, -+ Error **errp) -+{ -+ GHashTable *alias_map; -+ -+ alias_map = construct_alias_map(bbm, true, errp); -+ if (!alias_map) { -+ return false; -+ } -+ g_hash_table_destroy(alias_map); -+ -+ alias_map = construct_alias_map(bbm, false, errp); -+ if (!alias_map) { -+ return false; -+ } -+ g_hash_table_destroy(alias_map); -+ -+ return true; -+} -+ - static uint32_t qemu_get_bitmap_flags(QEMUFile *f) - { - uint8_t flags = qemu_get_byte(f); -@@ -207,11 +393,11 @@ static void send_bitmap_header(QEMUFile *f, DBMSaveState *s, - qemu_put_bitmap_flags(f, flags); - - if (flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) { -- qemu_put_counted_string(f, dbms->node_name); -+ qemu_put_counted_string(f, dbms->node_alias); - } - - if (flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) { -- qemu_put_counted_string(f, bdrv_dirty_bitmap_name(bitmap)); -+ qemu_put_counted_string(f, dbms->bitmap_alias); - } - } - -@@ -282,18 +468,25 @@ static void dirty_bitmap_do_save_cleanup(DBMSaveState *s) - QSIMPLEQ_REMOVE_HEAD(&s->dbms_list, entry); - bdrv_dirty_bitmap_set_busy(dbms->bitmap, false); - bdrv_unref(dbms->bs); -+ g_free(dbms->node_alias); -+ g_free(dbms->bitmap_alias); - g_free(dbms); - } - } - - /* Called with iothread lock taken. */ - static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, -- const char *bs_name) -+ const char *bs_name, GHashTable *alias_map) - { - BdrvDirtyBitmap *bitmap; - SaveBitmapState *dbms; -+ GHashTable *bitmap_aliases; -+ const char *node_alias, *bitmap_name, *bitmap_alias; - Error *local_err = NULL; - -+ /* When an alias map is given, @bs_name must be @bs's node name */ -+ assert(!alias_map || !strcmp(bs_name, bdrv_get_node_name(bs))); -+ - FOR_EACH_DIRTY_BITMAP(bs, bitmap) { - if (bdrv_dirty_bitmap_name(bitmap)) { - break; -@@ -303,21 +496,39 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, - return 0; - } - -+ bitmap_name = bdrv_dirty_bitmap_name(bitmap); -+ - if (!bs_name || strcmp(bs_name, "") == 0) { - error_report("Bitmap '%s' in unnamed node can't be migrated", -- bdrv_dirty_bitmap_name(bitmap)); -+ bitmap_name); - return -1; - } - -- if (bs_name[0] == '#') { -+ if (alias_map) { -+ const AliasMapInnerNode *amin = g_hash_table_lookup(alias_map, bs_name); -+ -+ if (!amin) { -+ /* Skip bitmaps on nodes with no alias */ -+ return 0; -+ } -+ -+ node_alias = amin->string; -+ bitmap_aliases = amin->subtree; -+ } else { -+ node_alias = bs_name; -+ bitmap_aliases = NULL; -+ } -+ -+ if (node_alias[0] == '#') { - error_report("Bitmap '%s' in a node with auto-generated " - "name '%s' can't be migrated", -- bdrv_dirty_bitmap_name(bitmap), bs_name); -+ bitmap_name, node_alias); - return -1; - } - - FOR_EACH_DIRTY_BITMAP(bs, bitmap) { -- if (!bdrv_dirty_bitmap_name(bitmap)) { -+ bitmap_name = bdrv_dirty_bitmap_name(bitmap); -+ if (!bitmap_name) { - continue; - } - -@@ -326,12 +537,29 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, - return -1; - } - -+ if (bitmap_aliases) { -+ bitmap_alias = g_hash_table_lookup(bitmap_aliases, bitmap_name); -+ if (!bitmap_alias) { -+ /* Skip bitmaps with no alias */ -+ continue; -+ } -+ } else { -+ if (strlen(bitmap_name) > UINT8_MAX) { -+ error_report("Cannot migrate bitmap '%s' on node '%s': " -+ "Name is longer than %u bytes", -+ bitmap_name, bs_name, UINT8_MAX); -+ return -1; -+ } -+ bitmap_alias = bitmap_name; -+ } -+ - bdrv_ref(bs); - bdrv_dirty_bitmap_set_busy(bitmap, true); - - dbms = g_new0(SaveBitmapState, 1); - dbms->bs = bs; -- dbms->node_name = bs_name; -+ dbms->node_alias = g_strdup(node_alias); -+ dbms->bitmap_alias = g_strdup(bitmap_alias); - dbms->bitmap = bitmap; - dbms->total_sectors = bdrv_nb_sectors(bs); - dbms->sectors_per_chunk = CHUNK_SIZE * 8 * -@@ -356,43 +584,52 @@ static int init_dirty_bitmap_migration(DBMSaveState *s) - SaveBitmapState *dbms; - GHashTable *handled_by_blk = g_hash_table_new(NULL, NULL); - BlockBackend *blk; -+ const MigrationParameters *mig_params = &migrate_get_current()->parameters; -+ GHashTable *alias_map = NULL; -+ -+ if (mig_params->has_block_bitmap_mapping) { -+ alias_map = construct_alias_map(mig_params->block_bitmap_mapping, true, -+ &error_abort); -+ } - - s->bulk_completed = false; - s->prev_bs = NULL; - s->prev_bitmap = NULL; - s->no_bitmaps = false; - -- /* -- * Use blockdevice name for direct (or filtered) children of named block -- * backends. -- */ -- for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { -- const char *name = blk_name(blk); -- -- if (!name || strcmp(name, "") == 0) { -- continue; -- } -+ if (!alias_map) { -+ /* -+ * Use blockdevice name for direct (or filtered) children of named block -+ * backends. -+ */ -+ for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { -+ const char *name = blk_name(blk); - -- bs = blk_bs(blk); -+ if (!name || strcmp(name, "") == 0) { -+ continue; -+ } - -- /* Skip filters without bitmaps */ -- while (bs && bs->drv && bs->drv->is_filter && -- !bdrv_has_named_bitmaps(bs)) -- { -- if (bs->backing) { -- bs = bs->backing->bs; -- } else if (bs->file) { -- bs = bs->file->bs; -- } else { -- bs = NULL; -+ bs = blk_bs(blk); -+ -+ /* Skip filters without bitmaps */ -+ while (bs && bs->drv && bs->drv->is_filter && -+ !bdrv_has_named_bitmaps(bs)) -+ { -+ if (bs->backing) { -+ bs = bs->backing->bs; -+ } else if (bs->file) { -+ bs = bs->file->bs; -+ } else { -+ bs = NULL; -+ } - } -- } - -- if (bs && bs->drv && !bs->drv->is_filter) { -- if (add_bitmaps_to_list(s, bs, name)) { -- goto fail; -+ if (bs && bs->drv && !bs->drv->is_filter) { -+ if (add_bitmaps_to_list(s, bs, name, NULL)) { -+ goto fail; -+ } -+ g_hash_table_add(handled_by_blk, bs); - } -- g_hash_table_add(handled_by_blk, bs); - } - } - -@@ -401,7 +638,7 @@ static int init_dirty_bitmap_migration(DBMSaveState *s) - continue; - } - -- if (add_bitmaps_to_list(s, bs, bdrv_get_node_name(bs))) { -+ if (add_bitmaps_to_list(s, bs, bdrv_get_node_name(bs), alias_map)) { - goto fail; - } - } -@@ -416,11 +653,17 @@ static int init_dirty_bitmap_migration(DBMSaveState *s) - } - - g_hash_table_destroy(handled_by_blk); -+ if (alias_map) { -+ g_hash_table_destroy(alias_map); -+ } - - return 0; - - fail: - g_hash_table_destroy(handled_by_blk); -+ if (alias_map) { -+ g_hash_table_destroy(alias_map); -+ } - dirty_bitmap_do_save_cleanup(s); - - return -1; -@@ -770,8 +1013,10 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) - return 0; - } - --static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s) -+static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s, -+ GHashTable *alias_map) - { -+ GHashTable *bitmap_alias_map = NULL; - Error *local_err = NULL; - bool nothing; - s->flags = qemu_get_bitmap_flags(f); -@@ -780,28 +1025,75 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s) - nothing = s->flags == (s->flags & DIRTY_BITMAP_MIG_FLAG_EOS); - - if (s->flags & DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME) { -- if (!qemu_get_counted_string(f, s->node_name)) { -- error_report("Unable to read node name string"); -+ if (!qemu_get_counted_string(f, s->node_alias)) { -+ error_report("Unable to read node alias string"); - return -EINVAL; - } -+ - if (!s->cancelled) { -- s->bs = bdrv_lookup_bs(s->node_name, s->node_name, &local_err); -+ if (alias_map) { -+ const AliasMapInnerNode *amin; -+ -+ amin = g_hash_table_lookup(alias_map, s->node_alias); -+ if (!amin) { -+ error_setg(&local_err, "Error: Unknown node alias '%s'", -+ s->node_alias); -+ s->bs = NULL; -+ } else { -+ bitmap_alias_map = amin->subtree; -+ s->bs = bdrv_lookup_bs(NULL, amin->string, &local_err); -+ } -+ } else { -+ s->bs = bdrv_lookup_bs(s->node_alias, s->node_alias, -+ &local_err); -+ } - if (!s->bs) { - error_report_err(local_err); - cancel_incoming_locked(s); - } - } -- } else if (!s->bs && !nothing && !s->cancelled) { -+ } else if (s->bs) { -+ if (alias_map) { -+ const AliasMapInnerNode *amin; -+ -+ /* Must be present in the map, or s->bs would not be set */ -+ amin = g_hash_table_lookup(alias_map, s->node_alias); -+ assert(amin != NULL); -+ -+ bitmap_alias_map = amin->subtree; -+ } -+ } else if (!nothing && !s->cancelled) { - error_report("Error: block device name is not set"); - cancel_incoming_locked(s); - } - -+ assert(nothing || s->cancelled || !!alias_map == !!bitmap_alias_map); -+ - if (s->flags & DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME) { -- if (!qemu_get_counted_string(f, s->bitmap_name)) { -- error_report("Unable to read bitmap name string"); -+ const char *bitmap_name; -+ -+ if (!qemu_get_counted_string(f, s->bitmap_alias)) { -+ error_report("Unable to read bitmap alias string"); - return -EINVAL; - } -+ -+ if (!s->cancelled) { -+ if (bitmap_alias_map) { -+ bitmap_name = g_hash_table_lookup(bitmap_alias_map, -+ s->bitmap_alias); -+ if (!bitmap_name) { -+ error_report("Error: Unknown bitmap alias '%s' on node " -+ "'%s' (alias '%s')", s->bitmap_alias, -+ s->bs->node_name, s->node_alias); -+ cancel_incoming_locked(s); -+ } -+ } else { -+ bitmap_name = s->bitmap_alias; -+ } -+ } -+ - if (!s->cancelled) { -+ g_strlcpy(s->bitmap_name, bitmap_name, sizeof(s->bitmap_name)); - s->bitmap = bdrv_find_dirty_bitmap(s->bs, s->bitmap_name); - - /* -@@ -811,7 +1103,7 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s) - if (!s->bitmap && !(s->flags & DIRTY_BITMAP_MIG_FLAG_START)) { - error_report("Error: unknown dirty bitmap " - "'%s' for block device '%s'", -- s->bitmap_name, s->node_name); -+ s->bitmap_name, s->bs->node_name); - cancel_incoming_locked(s); - } - } -@@ -835,6 +1127,8 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s) - */ - static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id) - { -+ GHashTable *alias_map = NULL; -+ const MigrationParameters *mig_params = &migrate_get_current()->parameters; - DBMLoadState *s = &((DBMState *)opaque)->load; - int ret = 0; - -@@ -846,13 +1140,18 @@ static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id) - return -EINVAL; - } - -+ if (mig_params->has_block_bitmap_mapping) { -+ alias_map = construct_alias_map(mig_params->block_bitmap_mapping, -+ false, &error_abort); -+ } -+ - do { - QEMU_LOCK_GUARD(&s->lock); - -- ret = dirty_bitmap_load_header(f, s); -+ ret = dirty_bitmap_load_header(f, s, alias_map); - if (ret < 0) { - cancel_incoming_locked(s); -- return ret; -+ goto fail; - } - - if (s->flags & DIRTY_BITMAP_MIG_FLAG_START) { -@@ -869,12 +1168,17 @@ static int dirty_bitmap_load(QEMUFile *f, void *opaque, int version_id) - - if (ret) { - cancel_incoming_locked(s); -- return ret; -+ goto fail; - } - } while (!(s->flags & DIRTY_BITMAP_MIG_FLAG_EOS)); - - trace_dirty_bitmap_load_success(); -- return 0; -+ ret = 0; -+fail: -+ if (alias_map) { -+ g_hash_table_destroy(alias_map); -+ } -+ return ret; - } - - static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque) -diff --git a/migration/migration.c b/migration/migration.c -index bf684185b7..7a89ce39a7 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -36,6 +36,7 @@ - #include "block/block.h" - #include "qapi/error.h" - #include "qapi/clone-visitor.h" -+#include "qapi/qapi-visit-migration.h" - #include "qapi/qapi-visit-sockets.h" - #include "qapi/qapi-commands-migration.h" - #include "qapi/qapi-events-migration.h" -@@ -845,6 +846,13 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - params->has_announce_step = true; - params->announce_step = s->parameters.announce_step; - -+ if (s->parameters.has_block_bitmap_mapping) { -+ params->has_block_bitmap_mapping = true; -+ params->block_bitmap_mapping = -+ QAPI_CLONE(BitmapMigrationNodeAliasList, -+ s->parameters.block_bitmap_mapping); -+ } -+ - return params; - } - -@@ -1310,6 +1318,13 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - "is invalid, it must be in the range of 1 to 10000 ms"); - return false; - } -+ -+ if (params->has_block_bitmap_mapping && -+ !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { -+ error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); -+ return false; -+ } -+ - return true; - } - -@@ -1404,6 +1419,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, - if (params->has_announce_step) { - dest->announce_step = params->announce_step; - } -+ -+ if (params->has_block_bitmap_mapping) { -+ dest->has_block_bitmap_mapping = true; -+ dest->block_bitmap_mapping = params->block_bitmap_mapping; -+ } - } - - static void migrate_params_apply(MigrateSetParameters *params, Error **errp) -@@ -1516,6 +1536,16 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) - if (params->has_announce_step) { - s->parameters.announce_step = params->announce_step; - } -+ -+ if (params->has_block_bitmap_mapping) { -+ qapi_free_BitmapMigrationNodeAliasList( -+ s->parameters.block_bitmap_mapping); -+ -+ s->parameters.has_block_bitmap_mapping = true; -+ s->parameters.block_bitmap_mapping = -+ QAPI_CLONE(BitmapMigrationNodeAliasList, -+ params->block_bitmap_mapping); -+ } - } - - void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) -diff --git a/migration/migration.h b/migration/migration.h -index 721e272713..4be42e8c11 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -337,6 +337,9 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); - void dirty_bitmap_mig_before_vm_start(void); - void dirty_bitmap_mig_cancel_outgoing(void); - void dirty_bitmap_mig_cancel_incoming(void); -+bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, -+ Error **errp); -+ - void migrate_add_address(SocketAddress *address); - - int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index ae4b6a4246..7711726fd2 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -469,6 +469,32 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) - monitor_printf(mon, "%s: '%s'\n", - MigrationParameter_str(MIGRATION_PARAMETER_TLS_AUTHZ), - params->tls_authz); -+ -+ if (params->has_block_bitmap_mapping) { -+ const BitmapMigrationNodeAliasList *bmnal; -+ -+ monitor_printf(mon, "%s:\n", -+ MigrationParameter_str( -+ MIGRATION_PARAMETER_BLOCK_BITMAP_MAPPING)); -+ -+ for (bmnal = params->block_bitmap_mapping; -+ bmnal; -+ bmnal = bmnal->next) -+ { -+ const BitmapMigrationNodeAlias *bmna = bmnal->value; -+ const BitmapMigrationBitmapAliasList *bmbal; -+ -+ monitor_printf(mon, " '%s' -> '%s'\n", -+ bmna->node_name, bmna->alias); -+ -+ for (bmbal = bmna->bitmaps; bmbal; bmbal = bmbal->next) { -+ const BitmapMigrationBitmapAlias *bmba = bmbal->value; -+ -+ monitor_printf(mon, " '%s' -> '%s'\n", -+ bmba->name, bmba->alias); -+ } -+ } -+ } - } - - qapi_free_MigrationParameters(params); -@@ -1384,6 +1410,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - p->has_announce_step = true; - visit_type_size(v, param, &p->announce_step, &err); - break; -+ case MIGRATION_PARAMETER_BLOCK_BITMAP_MAPPING: -+ error_setg(&err, "The block-bitmap-mapping parameter can only be set " -+ "through QMP"); -+ break; - default: - assert(0); - } -diff --git a/qapi/migration.json b/qapi/migration.json -index ea53b23dca..5f6b06172c 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -508,6 +508,44 @@ - 'data': [ 'none', 'zlib', - { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] } - -+## -+# @BitmapMigrationBitmapAlias: -+# -+# @name: The name of the bitmap. -+# -+# @alias: An alias name for migration (for example the bitmap name on -+# the opposite site). -+# -+# Since: 5.2 -+## -+{ 'struct': 'BitmapMigrationBitmapAlias', -+ 'data': { -+ 'name': 'str', -+ 'alias': 'str' -+ } } -+ -+## -+# @BitmapMigrationNodeAlias: -+# -+# Maps a block node name and the bitmaps it has to aliases for dirty -+# bitmap migration. -+# -+# @node-name: A block node name. -+# -+# @alias: An alias block node name for migration (for example the -+# node name on the opposite site). -+# -+# @bitmaps: Mappings for the bitmaps on this node. -+# -+# Since: 5.2 -+## -+{ 'struct': 'BitmapMigrationNodeAlias', -+ 'data': { -+ 'node-name': 'str', -+ 'alias': 'str', -+ 'bitmaps': [ 'BitmapMigrationBitmapAlias' ] -+ } } -+ - ## - # @MigrationParameter: - # -@@ -642,6 +680,25 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # -+# @block-bitmap-mapping: Maps block nodes and bitmaps on them to -+# aliases for the purpose of dirty bitmap migration. Such -+# aliases may for example be the corresponding names on the -+# opposite site. -+# The mapping must be one-to-one, but not necessarily -+# complete: On the source, unmapped bitmaps and all bitmaps -+# on unmapped nodes will be ignored. On the destination, -+# encountering an unmapped alias in the incoming migration -+# stream will result in a report, and all further bitmap -+# migration data will then be discarded. -+# Note that the destination does not know about bitmaps it -+# does not receive, so there is no limitation or requirement -+# regarding the number of bitmaps received, or how they are -+# named, or on which nodes they are placed. -+# By default (when this parameter has never been set), bitmap -+# names are mapped to themselves. Nodes are mapped to their -+# block device name if there is one, and to their node name -+# otherwise. (Since 5.2) -+# - # Since: 2.4 - ## - { 'enum': 'MigrationParameter', -@@ -656,7 +713,8 @@ - 'multifd-channels', - 'xbzrle-cache-size', 'max-postcopy-bandwidth', - 'max-cpu-throttle', 'multifd-compression', -- 'multifd-zlib-level' ,'multifd-zstd-level' ] } -+ 'multifd-zlib-level' ,'multifd-zstd-level', -+ 'block-bitmap-mapping' ] } - - ## - # @MigrateSetParameters: -@@ -782,6 +840,25 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # -+# @block-bitmap-mapping: Maps block nodes and bitmaps on them to -+# aliases for the purpose of dirty bitmap migration. Such -+# aliases may for example be the corresponding names on the -+# opposite site. -+# The mapping must be one-to-one, but not necessarily -+# complete: On the source, unmapped bitmaps and all bitmaps -+# on unmapped nodes will be ignored. On the destination, -+# encountering an unmapped alias in the incoming migration -+# stream will result in a report, and all further bitmap -+# migration data will then be discarded. -+# Note that the destination does not know about bitmaps it -+# does not receive, so there is no limitation or requirement -+# regarding the number of bitmaps received, or how they are -+# named, or on which nodes they are placed. -+# By default (when this parameter has never been set), bitmap -+# names are mapped to themselves. Nodes are mapped to their -+# block device name if there is one, and to their node name -+# otherwise. (Since 5.2) -+# - # Since: 2.4 - ## - # TODO either fuse back into MigrationParameters, or make -@@ -812,7 +889,8 @@ - '*max-cpu-throttle': 'int', - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'int', -- '*multifd-zstd-level': 'int' } } -+ '*multifd-zstd-level': 'int', -+ '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## - # @migrate-set-parameters: -@@ -958,6 +1036,25 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # -+# @block-bitmap-mapping: Maps block nodes and bitmaps on them to -+# aliases for the purpose of dirty bitmap migration. Such -+# aliases may for example be the corresponding names on the -+# opposite site. -+# The mapping must be one-to-one, but not necessarily -+# complete: On the source, unmapped bitmaps and all bitmaps -+# on unmapped nodes will be ignored. On the destination, -+# encountering an unmapped alias in the incoming migration -+# stream will result in a report, and all further bitmap -+# migration data will then be discarded. -+# Note that the destination does not know about bitmaps it -+# does not receive, so there is no limitation or requirement -+# regarding the number of bitmaps received, or how they are -+# named, or on which nodes they are placed. -+# By default (when this parameter has never been set), bitmap -+# names are mapped to themselves. Nodes are mapped to their -+# block device name if there is one, and to their node name -+# otherwise. (Since 5.2) -+# - # Since: 2.4 - ## - { 'struct': 'MigrationParameters', -@@ -986,7 +1083,8 @@ - '*max-cpu-throttle': 'uint8', - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'uint8', -- '*multifd-zstd-level': 'uint8' } } -+ '*multifd-zstd-level': 'uint8', -+ '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## - # @query-migrate-parameters: --- -2.27.0 - diff --git a/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch b/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch deleted file mode 100644 index 64858cd..0000000 --- a/kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 60e1b84487fead757b5feaf0e55448338f2b2671 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Mon, 28 Sep 2020 11:06:48 -0400 -Subject: [PATCH 1/2] migration: increase max-bandwidth to 128 MiB/s (1 Gib/s) - -RH-Author: Laurent Vivier -Message-id: <20200928110648.342290-1-lvivier@redhat.com> -Patchwork-id: 98494 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH] migration: increase max-bandwidth to 128 MiB/s (1 Gib/s) -Bugzilla: 1874004 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Greg Kurz - -max-bandwidth is set by default to 32 MiB/s (256 Mib/s) -since 2008 (5bb7910af031c). - -Most of the CPUs can dirty memory faster than that now, -and this is clearly a problem with POWER where the page -size is 64 kiB and not 4 KiB. - -Signed-off-by: Laurent Vivier -Message-Id: <20200921144957.979989-1-lvivier@redhat.com> -Reviewed-by: David Gibson -Reviewed-by: Greg Kurz -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 7590a2ae091fde8bb72d5df93977ab9707e23242) -Signed-off-by: Laurent Vivier - -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31576368 -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1874004 -Upstream: Merged -Tested: checked a migration while an installation is running can end - before the end of installation -Signed-off-by: Danilo C. L. de Paula ---- - migration/migration.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 7a89ce39a76..8193f33f768 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -57,7 +57,7 @@ - #include "qemu/queue.h" - #include "multifd.h" - --#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ -+#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - - /* Amount of time to allocate to each "chunk" of bandwidth-throttled - * data. */ --- -2.27.0 - diff --git a/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch b/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch deleted file mode 100644 index c6f1506..0000000 --- a/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch +++ /dev/null @@ -1,254 +0,0 @@ -From 74ce16018bcb202ab81f3aa7b5a33279dd4800da Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Tue, 1 Sep 2020 19:07:04 -0400 -Subject: [PATCH 08/11] nvram: Exit QEMU if NVRAM cannot contain all -prom-env - data -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Greg Kurz -Message-id: <20200901190704.474799-2-gkurz@redhat.com> -Patchwork-id: 98256 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] nvram: Exit QEMU if NVRAM cannot contain all -prom-env data -Bugzilla: 1867739 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: Greg Kurz - -Since commit 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to -support the -prom-env parameter"), pseries machines can pre-initialize -the "system" partition in the NVRAM with the data passed to all -prom-env -parameters on the QEMU command line. - -In this case it is assumed that all the data fits in 64 KiB, but the user -can easily pass more and crash QEMU: - -$ qemu-system-ppc64 -M pseries $(for ((x=0;x<128;x++)); do \ - echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ - done) # this requires ~128 Kib -malloc(): corrupted top size -Aborted (core dumped) - -This happens because we don't check if all the prom-env data fits in -the NVRAM and chrp_nvram_set_var() happily memcpy() it passed the -buffer. - -This crash affects basically all ppc/ppc64 machine types that use -prom-env: -- pseries (all versions) -- g3beige -- mac99 - -and also sparc/sparc64 machine types: -- LX -- SPARCClassic -- SPARCbook -- SS-10 -- SS-20 -- SS-4 -- SS-5 -- SS-600MP -- Voyager -- sun4u -- sun4v - -Add a max_len argument to chrp_nvram_create_system_partition() so that -it can check the available size before writing to memory. - -Since NVRAM is populated at machine init, it seems reasonable to consider -this error as fatal. So, instead of reporting an error when we detect that -the NVRAM is too small and adapt all machine types to handle it, we simply -exit QEMU in all cases. This is still better than crashing. If someone -wants another behavior, I guess this can be reworked later. - -Tested with: - -$ yes q | \ - (for arch in ppc ppc64 sparc sparc64; do \ - echo == $arch ==; \ - qemu=${arch}-softmmu/qemu-system-$arch; \ - for mach in $($qemu -M help | awk '! /^Supported/ { print $1 }'); do \ - echo $mach; \ - $qemu -M $mach -monitor stdio -nodefaults -nographic \ - $(for ((x=0;x<128;x++)); do \ - echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ - done) >/dev/null; \ - done; echo; \ - done) - -Without the patch, affected machine types cause QEMU to report some -memory corruption and crash: - -malloc(): corrupted top size - -free(): invalid size - -*** stack smashing detected ***: terminated - -With the patch, QEMU prints the following message and exits: - -NVRAM is too small. Try to pass less data to -prom-env - -It seems that the conditions for the crash have always existed, but it -affects pseries, the machine type I care for, since commit 61f20b9dc5b7 -only. - -Fixes: 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to support the -prom-env parameter") -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1867739 -Reported-by: John Snow -Reviewed-by: Laurent Vivier -Signed-off-by: Greg Kurz -Message-Id: <159736033937.350502.12402444542194031035.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 37035df51eaabb8d26b71da75b88a1c6727de8fa) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula ---- - hw/nvram/chrp_nvram.c | 24 +++++++++++++++++++++--- - hw/nvram/mac_nvram.c | 2 +- - hw/nvram/spapr_nvram.c | 3 ++- - hw/sparc/sun4m.c | 2 +- - hw/sparc64/sun4u.c | 2 +- - include/hw/nvram/chrp_nvram.h | 3 ++- - 6 files changed, 28 insertions(+), 8 deletions(-) - -diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c -index d969f26704..d4d10a7c03 100644 ---- a/hw/nvram/chrp_nvram.c -+++ b/hw/nvram/chrp_nvram.c -@@ -21,14 +21,21 @@ - - #include "qemu/osdep.h" - #include "qemu/cutils.h" -+#include "qemu/error-report.h" - #include "hw/nvram/chrp_nvram.h" - #include "sysemu/sysemu.h" - --static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) -+static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str, -+ int max_len) - { - int len; - - len = strlen(str) + 1; -+ -+ if (max_len < len) { -+ return -1; -+ } -+ - memcpy(&nvram[addr], str, len); - - return addr + len; -@@ -38,19 +45,26 @@ static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) - * Create a "system partition", used for the Open Firmware - * environment variables. - */ --int chrp_nvram_create_system_partition(uint8_t *data, int min_len) -+int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len) - { - ChrpNvramPartHdr *part_header; - unsigned int i; - int end; - -+ if (max_len < sizeof(*part_header)) { -+ goto fail; -+ } -+ - part_header = (ChrpNvramPartHdr *)data; - part_header->signature = CHRP_NVPART_SYSTEM; - pstrcpy(part_header->name, sizeof(part_header->name), "system"); - - end = sizeof(ChrpNvramPartHdr); - for (i = 0; i < nb_prom_envs; i++) { -- end = chrp_nvram_set_var(data, end, prom_envs[i]); -+ end = chrp_nvram_set_var(data, end, prom_envs[i], max_len - end); -+ if (end == -1) { -+ goto fail; -+ } - } - - /* End marker */ -@@ -65,6 +79,10 @@ int chrp_nvram_create_system_partition(uint8_t *data, int min_len) - chrp_nvram_finish_partition(part_header, end); - - return end; -+ -+fail: -+ error_report("NVRAM is too small. Try to pass less data to -prom-env"); -+ exit(EXIT_FAILURE); - } - - /** -diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c -index beec1c4e4d..11f2d31cdb 100644 ---- a/hw/nvram/mac_nvram.c -+++ b/hw/nvram/mac_nvram.c -@@ -141,7 +141,7 @@ static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off, - - /* OpenBIOS nvram variables partition */ - sysp_end = chrp_nvram_create_system_partition(&nvr->data[off], -- DEF_SYSTEM_SIZE) + off; -+ DEF_SYSTEM_SIZE, len) + off; - - /* Free space partition */ - chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end); -diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c -index 15d08281d4..386513499f 100644 ---- a/hw/nvram/spapr_nvram.c -+++ b/hw/nvram/spapr_nvram.c -@@ -188,7 +188,8 @@ static void spapr_nvram_realize(SpaprVioDevice *dev, Error **errp) - } - } else if (nb_prom_envs > 0) { - /* Create a system partition to pass the -prom-env variables */ -- chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4); -+ chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4, -+ nvram->size); - chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4], - nvram->size - MIN_NVRAM_SIZE / 4); - } -diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c -index 9be930415f..cf7dfa4af5 100644 ---- a/hw/sparc/sun4m.c -+++ b/hw/sparc/sun4m.c -@@ -143,7 +143,7 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr, - memset(image, '\0', sizeof(image)); - - /* OpenBIOS nvram variables partition */ -- sysp_end = chrp_nvram_create_system_partition(image, 0); -+ sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); - - /* Free space partition */ - chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); -diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c -index 9e30203dcc..37310b73e6 100644 ---- a/hw/sparc64/sun4u.c -+++ b/hw/sparc64/sun4u.c -@@ -136,7 +136,7 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size, - memset(image, '\0', sizeof(image)); - - /* OpenBIOS nvram variables partition */ -- sysp_end = chrp_nvram_create_system_partition(image, 0); -+ sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); - - /* Free space partition */ - chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); -diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h -index 09941a9be4..4a0f5c21b8 100644 ---- a/include/hw/nvram/chrp_nvram.h -+++ b/include/hw/nvram/chrp_nvram.h -@@ -50,7 +50,8 @@ chrp_nvram_finish_partition(ChrpNvramPartHdr *header, uint32_t size) - header->checksum = sum & 0xff; - } - --int chrp_nvram_create_system_partition(uint8_t *data, int min_len); -+/* chrp_nvram_create_system_partition() failure is fatal */ -+int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len); - int chrp_nvram_create_free_partition(uint8_t *data, int len); - - #endif --- -2.27.0 - diff --git a/kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch b/kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch deleted file mode 100644 index b8affe1..0000000 --- a/kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 5dd7cdf3739c73d910d5df6443b39e9b0b79f3fd Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 8 Sep 2020 18:47:16 -0400 -Subject: [PATCH 4/4] ppc/spapr: Use start-powered-off CPUState property - -RH-Author: Laurent Vivier -Message-id: <20200908184716.1125192-4-lvivier@redhat.com> -Patchwork-id: 98302 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/3] ppc/spapr: Use start-powered-off CPUState property -Bugzilla: 1849483 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Gibson -RH-Acked-by: Greg Kurz - -From: Thiago Jung Bauermann - -PowerPC sPAPR CPUs start in the halted state, and spapr_reset_vcpu() -attempts to implement this by setting CPUState::halted to 1. But that's too -late for the case of hotplugged CPUs in a machine configure with 2 or more -threads per core. - -By then, other parts of QEMU have already caused the vCPU to run in an -unitialized state a couple of times. For example, ppc_cpu_reset() calls -ppc_tlb_invalidate_all(), which ends up calling async_run_on_cpu(). This -kicks the new vCPU while it has CPUState::halted = 0, causing QEMU to issue -a KVM_RUN ioctl on the new vCPU before the guest is able to make the -start-cpu RTAS call to initialize its register state. - -This problem doesn't seem to cause visible issues for regular guests, but -on a secure guest running under the Ultravisor it does. The Ultravisor -relies on being able to snoop on the start-cpu RTAS call to map vCPUs to -guests, and this issue causes it to see a stray vCPU that doesn't belong to -any guest. - -Fix by setting the start-powered-off CPUState property in -spapr_create_vcpu(), which makes cpu_common_reset() initialize -CPUState::halted to 1 at an earlier moment. - -Suggested-by: Eduardo Habkost -Acked-by: David Gibson -Reviewed-by: Greg Kurz -Signed-off-by: Thiago Jung Bauermann -Message-Id: <20200826055535.951207-4-bauerman@linux.ibm.com> -Signed-off-by: David Gibson -(cherry picked from commit 554c2169e9251ca2829ab968bd9ba5641a5abe1d) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_cpu_core.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index f228f8bb75..86fed5c528 100644 ---- a/hw/ppc/spapr_cpu_core.c -+++ b/hw/ppc/spapr_cpu_core.c -@@ -37,11 +37,6 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu) - - cpu_reset(cs); - -- /* All CPUs start halted. CPU0 is unhalted from the machine level -- * reset code and the rest are explicitly started up by the guest -- * using an RTAS call */ -- cs->halted = 1; -- - env->spr[SPR_HIOR] = 0; - - lpcr = env->spr[SPR_LPCR]; -@@ -287,6 +282,11 @@ static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp) - - cs = CPU(obj); - cpu = POWERPC_CPU(obj); -+ /* -+ * All CPUs start halted. CPU0 is unhalted from the machine level reset code -+ * and the rest are explicitly started up by the guest using an RTAS call. -+ */ -+ cs->start_powered_off = true; - cs->cpu_index = cc->core_id + i; - spapr_set_vcpu_id(cpu, cs->cpu_index, &local_err); - if (local_err) { --- -2.27.0 - diff --git a/kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch b/kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch deleted file mode 100644 index 98dff89..0000000 --- a/kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch +++ /dev/null @@ -1,203 +0,0 @@ -From a768aa960a2e9e3b8dd72dfc7135a7e7d0814d3f Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 23 Sep 2020 17:49:40 -0400 -Subject: [PATCH] qemu-img: Support bitmap --merge into backing image - -RH-Author: Eric Blake -Message-id: <20200923174940.704681-2-eblake@redhat.com> -Patchwork-id: 98487 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] qemu-img: Support bitmap --merge into backing image -Bugzilla: 1877209 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Max Reitz - -If you have the chain 'base.qcow2 <- top.qcow2' and want to merge a -bitmap from top into base, qemu-img was failing with: - -qemu-img: Could not open 'top.qcow2': Could not open backing file: Failed to get shared "write" lock -Is another process using the image [base.qcow2]? - -The easiest fix is to not open the entire backing chain of either -image (source or destination); after all, the point of 'qemu-img -bitmap' is solely to manipulate bitmaps directly within a single qcow2 -image, and this is made more precise if we don't pay attention to -other images in the chain that may happen to have a bitmap by the same -name. - -However, note that on a case-by-case analysis, there _are_ times where -we treat it as a feature that we can access a bitmap from a backing -layer in association with an overlay BDS. A demonstration of this is -using NBD to expose both an overlay BDS (for constant contents) and a -bitmap (for learning which blocks are interesting) during an -incremental backup: - -Base <- Active <- Temporary - \--block job ->/ - -where Temporary is being fed by a backup 'sync=none' job. When -exposing Temporary over NBD, referring to a bitmap that lives only in -Active is less effort than having to copy a bitmap into Temporary [1]. -So the testsuite additions in this patch check both where bitmaps get -allocated (the qemu-img info output), and that qemu-nbd is indeed able -to access a bitmap inherited from the backing chain since it is a -different use case than 'qemu-img bitmap'. - -[1] Full disclosure: prior to the recent commit 374eedd1c4 and -friends, we were NOT able to see bitmaps through filters, which meant -that we actually did not have nice clean semantics for uniformly being -able to pick up bitmaps from anywhere in the backing chain (seen as a -change in behavior between qemu 4.1 and 4.2 at commit 00e30f05de, when -block-copy swapped from a one-off to a filter). Which means libvirt -was already coded to copy bitmaps around for the sake of older qemu, -even though modern qemu no longer needs it. Oh well. - -Fixes: http://bugzilla.redhat.com/1877209 -Reported-by: Eyal Shenitzky -Signed-off-by: Eric Blake -Message-Id: <20200914191009.644842-1-eblake@redhat.com> -[eblake: more commit message tweaks, per Max Reitz review] -Reviewed-by: Max Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy - -(cherry picked from commit 14f16bf9474c860ecc127a66a86961942319f7af) -Tweak 291.out since extended L2 is not backported. - -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - qemu-img.c | 11 +++++--- - tests/qemu-iotests/291 | 12 +++++++++ - tests/qemu-iotests/291.out | 54 ++++++++++++++++++++++++++++++++++++++ - 3 files changed, 74 insertions(+), 3 deletions(-) - -diff --git a/qemu-img.c b/qemu-img.c -index 5308773811f..b0ca321a6be 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -4747,14 +4747,19 @@ static int img_bitmap(int argc, char **argv) - filename = argv[optind]; - bitmap = argv[optind + 1]; - -- blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR, false, false, -- false); -+ /* -+ * No need to open backing chains; we will be manipulating bitmaps -+ * directly in this image without reference to image contents. -+ */ -+ blk = img_open(image_opts, filename, fmt, BDRV_O_RDWR | BDRV_O_NO_BACKING, -+ false, false, false); - if (!blk) { - goto out; - } - bs = blk_bs(blk); - if (src_filename) { -- src = img_open(false, src_filename, src_fmt, 0, false, false, false); -+ src = img_open(false, src_filename, src_fmt, BDRV_O_NO_BACKING, -+ false, false, false); - if (!src) { - goto out; - } -diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291 -index 1e0bb76959b..4f837b20565 100755 ---- a/tests/qemu-iotests/291 -+++ b/tests/qemu-iotests/291 -@@ -91,6 +91,15 @@ $QEMU_IMG bitmap --remove --image-opts \ - driver=$IMGFMT,file.driver=file,file.filename="$TEST_IMG" tmp - _img_info --format-specific - -+echo -+echo "=== Merge from top layer into backing image ===" -+echo -+ -+$QEMU_IMG rebase -u -F qcow2 -b "$TEST_IMG.base" "$TEST_IMG" -+$QEMU_IMG bitmap --add --merge b2 -b "$TEST_IMG" -F $IMGFMT \ -+ -f $IMGFMT "$TEST_IMG.base" b3 -+_img_info --format-specific --backing-chain -+ - echo - echo "=== Check bitmap contents ===" - echo -@@ -107,6 +116,9 @@ $QEMU_IMG map --output=json --image-opts \ - nbd_server_start_unix_socket -r -f qcow2 -B b2 "$TEST_IMG" - $QEMU_IMG map --output=json --image-opts \ - "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b2" | _filter_qemu_img_map -+nbd_server_start_unix_socket -r -f qcow2 -B b3 "$TEST_IMG" -+$QEMU_IMG map --output=json --image-opts \ -+ "$IMG,x-dirty-bitmap=qemu:dirty-bitmap:b3" | _filter_qemu_img_map - - # success, all done - echo '*** done' -diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/291.out -index 9f661515b41..332584f67a8 100644 ---- a/tests/qemu-iotests/291.out -+++ b/tests/qemu-iotests/291.out -@@ -66,6 +66,57 @@ Format specific information: - refcount bits: 16 - corrupt: false - -+=== Merge from top layer into backing image === -+ -+image: TEST_DIR/t.IMGFMT -+file format: IMGFMT -+virtual size: 10 MiB (10485760 bytes) -+cluster_size: 65536 -+backing file: TEST_DIR/t.IMGFMT.base -+backing file format: IMGFMT -+Format specific information: -+ compat: 1.1 -+ compression type: zlib -+ lazy refcounts: false -+ bitmaps: -+ [0]: -+ flags: -+ name: b1 -+ granularity: 524288 -+ [1]: -+ flags: -+ [0]: auto -+ name: b2 -+ granularity: 65536 -+ [2]: -+ flags: -+ name: b0 -+ granularity: 65536 -+ refcount bits: 16 -+ corrupt: false -+ -+image: TEST_DIR/t.IMGFMT.base -+file format: IMGFMT -+virtual size: 10 MiB (10485760 bytes) -+cluster_size: 65536 -+Format specific information: -+ compat: 1.1 -+ compression type: zlib -+ lazy refcounts: false -+ bitmaps: -+ [0]: -+ flags: -+ [0]: auto -+ name: b0 -+ granularity: 65536 -+ [1]: -+ flags: -+ [0]: auto -+ name: b3 -+ granularity: 65536 -+ refcount bits: 16 -+ corrupt: false -+ - === Check bitmap contents === - - [{ "start": 0, "length": 3145728, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -@@ -77,4 +128,7 @@ Format specific information: - [{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, - { "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false}, - { "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] -+[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false}, -+{ "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] - *** done --- -2.27.0 - diff --git a/kvm-redhat-Update-hw_compat_8_2.patch b/kvm-redhat-Update-hw_compat_8_2.patch deleted file mode 100644 index f4a096f..0000000 --- a/kvm-redhat-Update-hw_compat_8_2.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 4effa711a867eaf0f10b38c2bcf4c8c39a00c1ab Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 14 Aug 2020 16:02:48 -0400 -Subject: [PATCH 1/4] redhat: Update hw_compat_8_2 - -RH-Author: Laurent Vivier -Message-id: <20200814160249.217753-2-lvivier@redhat.com> -Patchwork-id: 98157 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/2] redhat: Update hw_compat_8_2 -Bugzilla: 1843348 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Gibson - -v5.1.0-rc2 has introduced a new entry in hw_compat_5_0: - 2ebc21216f58 ("hw/pci-host: save/restore pci host config register") - -Add it in hw_compat_rhel_8_2 - -Update hw_compat_8_2 comment as it also includes hw_compat_5_0 -Move a blank line - -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 6d17d9938d..10fa9b8c75 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -29,7 +29,7 @@ - #include "migration/vmstate.h" - - /* -- * The same as hw_compat_4_2 -+ * The same as hw_compat_4_2 + hw_compat_5_0 - */ - GlobalProperty hw_compat_rhel_8_2[] = { - /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -@@ -55,6 +55,8 @@ GlobalProperty hw_compat_rhel_8_2[] = { - /* hw_compat_rhel_8_2 from hw_compat_4_2 */ - { "fw_cfg", "acpi-mr-restore", "false" }, - /* hw_compat_rhel_8_2 from hw_compat_5_0 */ -+ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, -+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ - { "virtio-balloon-device", "page-poison", "false" }, - /* hw_compat_rhel_8_2 from hw_compat_5_0 */ - { "vmport", "x-read-set-eax", "off" }, -@@ -64,9 +66,9 @@ GlobalProperty hw_compat_rhel_8_2[] = { - { "vmport", "x-report-vmx-type", "off" }, - /* hw_compat_rhel_8_2 from hw_compat_5_0 */ - { "vmport", "x-cmds-v2", "off" }, -- - }; - const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); -+ - /* - * The same as hw_compat_4_1 - */ --- -2.27.0 - diff --git a/kvm-redhat-define-hw_compat_8_2.patch b/kvm-redhat-define-hw_compat_8_2.patch deleted file mode 100644 index 42bb6a4..0000000 --- a/kvm-redhat-define-hw_compat_8_2.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 45b840275a5f5d3a3e4803c72c02a1db7fe1927d Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Mon, 6 Jul 2020 09:41:16 +0000 -Subject: [PATCH] redhat: define hw_compat_8_2 - -RH-Author: Laurent Vivier -Message-id: <20200706104117.219174-2-lvivier@redhat.com> -Patchwork-id: 97903 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 1/2] redhat: define hw_compat_8_2 -Bugzilla: 1853265 -RH-Acked-by: David Gibson -RH-Acked-by: Thomas Huth -RH-Acked-by: Greg Kurz - -Signed-off-by: Laurent Vivier -Signed-off-by: Dr. David Alan Gilbert - For minor fix -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5476af98e1..6d17d9938d 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -54,6 +54,17 @@ GlobalProperty hw_compat_rhel_8_2[] = { - { "qxl-vga", "revision", "4" }, - /* hw_compat_rhel_8_2 from hw_compat_4_2 */ - { "fw_cfg", "acpi-mr-restore", "false" }, -+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ -+ { "virtio-balloon-device", "page-poison", "false" }, -+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ -+ { "vmport", "x-read-set-eax", "off" }, -+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ -+ { "vmport", "x-signal-unsupported-cmd", "off" }, -+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ -+ { "vmport", "x-report-vmx-type", "off" }, -+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ -+ { "vmport", "x-cmds-v2", "off" }, -+ - }; - const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); - /* --- -2.27.0 - diff --git a/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch b/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch deleted file mode 100644 index 069ed3a..0000000 --- a/kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 020501879841afb788087f0455df79367c0337a0 Mon Sep 17 00:00:00 2001 -From: Danilo de Paula -Date: Fri, 11 Sep 2020 14:23:22 -0400 -Subject: [PATCH 1/4] redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Danilo de Paula -Message-id: <20200911142322.1865501-2-ddepaula@redhat.com> -Patchwork-id: 98358 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH v2 1/1] redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ -Bugzilla: 1738820 -RH-Acked-by: Eduardo Lima (Etrunko) -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Miroslav Rezanina - -When qemu-ga was introduced to RHEL-8, we used the qemu-guest-agent -from RHEL-7 as base. - -In RHEL-7, qemu-guest-agent is built as standalone package. -It's built as "qemu-ga", hence the "qemu-ga" folders. - -For RHEL-8, that should have been renamed to qemu-kvm, but I missed it. -Renaming those folders to /etc/qemu-kvm is a no go today, because -users might have populated the /etc/qemu-ga/fsfreeze-hook.d folder. - -So, in order to make qemu-ga -F works in RHEL-8, a link is being -created in the expected place, pointing to the real one. - -Also, fsfreeze-hook opens up the fsfreeze-hook.d on the same PATH where -it is stored. However, it doesn't follow symlinks. In order to fix this, -I had to change it to make sure it follows the link. - -An option would be to also link the fsfreeze-hook.d folder, but I choose -not to do so as it creates a permanent/visible change in users -environments. The downside is to keep another downstream-only change. - -Signed-off-by: Danilo C. L. de Paula ---- - redhat/qemu-kvm.spec.template | 6 ++++++ - scripts/qemu-guest-agent/fsfreeze-hook | 2 +- - 2 files changed, 7 insertions(+), 1 deletion(-) - - -diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook -index 13aafd4845..e9b84ec028 100755 ---- a/scripts/qemu-guest-agent/fsfreeze-hook -+++ b/scripts/qemu-guest-agent/fsfreeze-hook -@@ -8,7 +8,7 @@ - # request, it is issued with "thaw" argument after filesystem is thawed. - - LOGFILE=/var/log/qga-fsfreeze-hook.log --FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d -+FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d - - # Check whether file $1 is a backup or rpm-generated file and should be ignored - is_ignored_file() { --- -2.27.0 - diff --git a/kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch b/kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch deleted file mode 100644 index 45f4e77..0000000 --- a/kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 1ab8783e716eb5ae2fb44b06a2db16b9fb91dad9 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 14 Aug 2020 16:02:49 -0400 -Subject: [PATCH 2/4] redhat: update pseries-rhel8.2.0 machine type - -RH-Author: Laurent Vivier -Message-id: <20200814160249.217753-3-lvivier@redhat.com> -Patchwork-id: 98156 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/2] redhat: update pseries-rhel8.2.0 machine type -Bugzilla: 1843348 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Gibson - -v5.1.0-rc1 has modified the default state of pseries-5.1 and -introduced a new entry in pseries-5.0 machine type: - a6030d7e0b35 ("spapr: Add a new level of NUMA for GPUs") - -Add this entry to pseries-rhel8.2.0 as the default state has -also changed for pseries-rhel8.3.0 - -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 1c367a2367..5e3964326d 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -5019,10 +5019,15 @@ DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); - static void spapr_machine_rhel820_class_options(MachineClass *mc) - { - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ /* from pseries-5.0 */ -+ static GlobalProperty compat[] = { -+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, -+ }; - - spapr_machine_rhel830_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_2, - hw_compat_rhel_8_2_len); -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - - /* from pseries-4.2 */ - smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; -@@ -5031,6 +5036,7 @@ static void spapr_machine_rhel820_class_options(MachineClass *mc) - - /* from pseries-5.0 */ - mc->numa_mem_supported = true; -+ smc->pre_5_1_assoc_refpoints = true; - } - - DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); --- -2.27.0 - diff --git a/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch b/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch deleted file mode 100644 index f2dee26..0000000 --- a/kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 4740b2ba008ca469409cf5af58ac85ae5dc7f6a5 Mon Sep 17 00:00:00 2001 -From: Eduardo Otubo -Date: Tue, 15 Sep 2020 15:15:49 -0400 -Subject: [PATCH 2/4] seccomp: fix killing of whole process instead of thread -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eduardo Otubo -Message-id: <20200915151549.5007-1-otubo@redhat.com> -Patchwork-id: 98392 -O-Subject: [RHEL-AV-8.3.0/RHEL-8.3.0 qemu-kvm PATCH] seccomp: fix killing of whole process instead of thread -Bugzilla: 1752376 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Daniel P. Berrange - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1752376 -BRANCH: rhel-av-8.3.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31339152 - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1752376 -BRANCH: rhel-8.3.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=31339111 - -From: Daniel P. Berrangé - -Back in 2018 we introduced support for killing the whole QEMU process -instead of just one thread, when a seccomp rule is violated: - - commit bda08a5764d470f101fa38635d30b41179a313e1 - Author: Marc-André Lureau - Date: Wed Aug 22 19:02:48 2018 +0200 - - seccomp: prefer SCMP_ACT_KILL_PROCESS if available - -Fast forward a year and we introduced a patch to avoid killing the -process for resource control syscalls tickled by Mesa. - - commit 9a1565a03b79d80b236bc7cc2dbce52a2ef3a1b8 - Author: Daniel P. Berrangé - Date: Wed Mar 13 09:49:03 2019 +0000 - - seccomp: don't kill process for resource control syscalls - -Unfortunately a logic bug effectively reverted the first commit -mentioned so that we go back to only killing the thread, not the whole -process. - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Stefan Hajnoczi -Acked-by: Eduardo Otubo -(cherry picked from commit e474e3aacf4276eb0781d11c45e2fab996f9dc56) -Signed-off-by: Eduardo Otubo -Signed-off-by: Danilo C. L. de Paula ---- - qemu-seccomp.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/qemu-seccomp.c b/qemu-seccomp.c -index e0a1829b3d..8325ecb766 100644 ---- a/qemu-seccomp.c -+++ b/qemu-seccomp.c -@@ -136,8 +136,9 @@ static uint32_t qemu_seccomp_get_action(int set) - - if (qemu_seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &action) == 0) { - kill_process = 1; -+ } else { -+ kill_process = 0; - } -- kill_process = 0; - } - if (kill_process == 1) { - return SCMP_ACT_KILL_PROCESS; --- -2.27.0 - diff --git a/kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch b/kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch deleted file mode 100644 index f7d6940..0000000 --- a/kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch +++ /dev/null @@ -1,66 +0,0 @@ -From de7263538a5cab64163edd1bdb3a934dabd625f2 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 8 Sep 2020 18:47:15 -0400 -Subject: [PATCH 3/4] target/arm: Move setting of CPU halted state to generic - code -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -Message-id: <20200908184716.1125192-3-lvivier@redhat.com> -Patchwork-id: 98300 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/3] target/arm: Move setting of CPU halted state to generic code -Bugzilla: 1849483 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Gibson -RH-Acked-by: Greg Kurz - -From: Thiago Jung Bauermann - -This change is in a separate patch because it's not so obvious that it -won't cause a regression. - -Suggested-by: Eduardo Habkost -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: David Gibson -Reviewed-by: Greg Kurz -Signed-off-by: Thiago Jung Bauermann -Message-Id: <20200826055535.951207-3-bauerman@linux.ibm.com> -Signed-off-by: David Gibson -(cherry picked from commit 6ad1da667c8e21f019d4adc21702e06dd9225790) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/cpu.c | 2 +- - target/arm/cpu.c | 1 - - 2 files changed, 1 insertion(+), 2 deletions(-) - -diff --git a/hw/core/cpu.c b/hw/core/cpu.c -index 594441a150..71bb7859f1 100644 ---- a/hw/core/cpu.c -+++ b/hw/core/cpu.c -@@ -258,7 +258,7 @@ static void cpu_common_reset(DeviceState *dev) - } - - cpu->interrupt_request = 0; -- cpu->halted = 0; -+ cpu->halted = cpu->start_powered_off; - cpu->mem_io_pc = 0; - cpu->icount_extra = 0; - atomic_set(&cpu->icount_decr_ptr->u32, 0); -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 54fb653229..059a5ced6e 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -175,7 +175,6 @@ static void arm_cpu_reset(DeviceState *dev) - env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; - - cpu->power_state = s->start_powered_off ? PSCI_OFF : PSCI_ON; -- s->halted = s->start_powered_off; - - if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q'; --- -2.27.0 - diff --git a/kvm-target-arm-Move-start-powered-off-property-to-generi.patch b/kvm-target-arm-Move-start-powered-off-property-to-generi.patch deleted file mode 100644 index 9a39a56..0000000 --- a/kvm-target-arm-Move-start-powered-off-property-to-generi.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 76821af8b6d74237718fb554ae1a96f969308c69 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 8 Sep 2020 18:47:14 -0400 -Subject: [PATCH 2/4] target/arm: Move start-powered-off property to generic - CPUState -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -Message-id: <20200908184716.1125192-2-lvivier@redhat.com> -Patchwork-id: 98299 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/3] target/arm: Move start-powered-off property to generic CPUState -Bugzilla: 1849483 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Gibson -RH-Acked-by: Greg Kurz - -From: Thiago Jung Bauermann - -There are other platforms which also have CPUs that start powered off, so -generalize the start-powered-off property so that it can be used by them. - -Note that ARMv7MState also has a property of the same name but this patch -doesn't change it because that class isn't a subclass of CPUState so it -wouldn't be a trivial change. - -This change should not cause any change in behavior. - -Suggested-by: Eduardo Habkost -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: David Gibson -Reviewed-by: Greg Kurz -Signed-off-by: Thiago Jung Bauermann -Message-Id: <20200826055535.951207-2-bauerman@linux.ibm.com> -Signed-off-by: David Gibson -(cherry picked from commit c1b701587e59d9569c38d1d6033cd7cc2a992105) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - exec.c | 1 + - include/hw/core/cpu.h | 4 ++++ - target/arm/cpu.c | 5 ++--- - target/arm/cpu.h | 3 --- - target/arm/kvm32.c | 2 +- - target/arm/kvm64.c | 2 +- - 6 files changed, 9 insertions(+), 8 deletions(-) - -diff --git a/exec.c b/exec.c -index 6f381f98e2..82e82fab09 100644 ---- a/exec.c -+++ b/exec.c -@@ -899,6 +899,7 @@ Property cpu_common_props[] = { - DEFINE_PROP_LINK("memory", CPUState, memory, TYPE_MEMORY_REGION, - MemoryRegion *), - #endif -+ DEFINE_PROP_BOOL("start-powered-off", CPUState, start_powered_off, false), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h -index 8f145733ce..9fc2696db5 100644 ---- a/include/hw/core/cpu.h -+++ b/include/hw/core/cpu.h -@@ -374,6 +374,10 @@ struct CPUState { - bool created; - bool stop; - bool stopped; -+ -+ /* Should CPU start in powered-off state? */ -+ bool start_powered_off; -+ - bool unplug; - bool crash_occurred; - bool exit_request; -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 13ad40aa7d..54fb653229 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -174,8 +174,8 @@ static void arm_cpu_reset(DeviceState *dev) - env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1; - env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; - -- cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON; -- s->halted = cpu->start_powered_off; -+ cpu->power_state = s->start_powered_off ? PSCI_OFF : PSCI_ON; -+ s->halted = s->start_powered_off; - - if (arm_feature(env, ARM_FEATURE_IWMMXT)) { - env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q'; -@@ -2182,7 +2182,6 @@ static const ARMCPUInfo arm_cpus[] = { - }; - - static Property arm_cpu_properties[] = { -- DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false), - DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0), - DEFINE_PROP_UINT64("midr", ARMCPU, midr, 0), - DEFINE_PROP_UINT64("mp-affinity", ARMCPU, -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 9e8ed423ea..a925d26996 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -810,9 +810,6 @@ struct ARMCPU { - */ - uint32_t psci_version; - -- /* Should CPU start in PSCI powered-off state? */ -- bool start_powered_off; -- - /* Current power state, access guarded by BQL */ - ARMPSCIState power_state; - -diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c -index 0af46b41c8..1f2b8f8b7a 100644 ---- a/target/arm/kvm32.c -+++ b/target/arm/kvm32.c -@@ -218,7 +218,7 @@ int kvm_arch_init_vcpu(CPUState *cs) - - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); -- if (cpu->start_powered_off) { -+ if (cs->start_powered_off) { - cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; - } - if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { -diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c -index 1169237905..f8a6d905fb 100644 ---- a/target/arm/kvm64.c -+++ b/target/arm/kvm64.c -@@ -775,7 +775,7 @@ int kvm_arch_init_vcpu(CPUState *cs) - - /* Determine init features for this CPU */ - memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); -- if (cpu->start_powered_off) { -+ if (cs->start_powered_off) { - cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; - } - if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { --- -2.27.0 - diff --git a/kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch b/kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch deleted file mode 100644 index f229a28..0000000 --- a/kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch +++ /dev/null @@ -1,219 +0,0 @@ -From b162af531abdf6f5e8ad13b93699a3ba28de6702 Mon Sep 17 00:00:00 2001 -From: Michael Roth -Date: Thu, 20 Aug 2020 23:00:51 -0400 -Subject: [PATCH 1/4] target/ppc: Add experimental option for enabling secure - guests - -RH-Author: Michael Roth -Message-id: <20200820230051.516359-2-mroth@redhat.com> -Patchwork-id: 98208 -O-Subject: [RHEL-AV-8.3.0 qemu virt PATCH 1/1] target/ppc: Add experimental option for enabling secure guests -Bugzilla: 1870384 -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: Fabiano Rosas - -Making use of ppc's Protected Execution Facility (PEF) feature, a -guest can become a secure guest (aka. secure VM - SVM) and have its -memory protected from access by the host. This feature is mediated by -a piece of firmware called the Ultravisor (UV). - -The transition from a regular to a secure VM is initiated by the guest -kernel during prom_init via the use of an ultracall (enter secure mode -- UV_ESM) and with cooperation from the hypervisor via an hcall -(H_SVM_INIT_START). - -Currently QEMU has no knowledge of this process and no way to -determine if a host supports the feature. A guest with PEF support -enabled would always try to enter secure mode regardless of user -intent or hardware support. - -To address the above, a new KVM capability (KVM_CAP_PPC_SECURE_GUEST -[1]) is being introduced in the kernel without which KVM will block -the secure transition. - -This patch adds support for checking/enabling this KVM capability via -a new experimental spapr machine option, e.g.: - - -machine pseries,x-svm-allowed=on - -The capability defaults to off. - -1- https://lore.kernel.org/kvm/20200319043301.GA13052@blackberry - -Signed-off-by: Fabiano Rosas - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1789757 -Upstream: RHEL-only -*re-worked to drop use of spapr capabilities infrastructure in favor - of a simple one-off machine option -Signed-off-by: Michael Roth -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 23 +++++++++++++++++++++++ - include/hw/ppc/spapr.h | 3 +++ - target/ppc/kvm.c | 27 +++++++++++++++++++++++++++ - target/ppc/kvm_ppc.h | 13 +++++++++++++ - 4 files changed, 66 insertions(+) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 5e3964326d..e77c90bfc5 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -1636,6 +1636,9 @@ static void spapr_machine_reset(MachineState *machine) - - kvmppc_svm_off(&error_fatal); - spapr_caps_apply(spapr); -+ if (spapr->svm_allowed) { -+ kvmppc_svm_allow(&error_fatal); -+ } - - first_ppc_cpu = POWERPC_CPU(first_cpu); - if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3303,6 +3306,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) - spapr->host_serial = g_strdup(value); - } - -+static bool spapr_get_svm_allowed(Object *obj, Error **errp) -+{ -+ SpaprMachineState *spapr = SPAPR_MACHINE(obj); -+ -+ return spapr->svm_allowed; -+} -+ -+static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp) -+{ -+ SpaprMachineState *spapr = SPAPR_MACHINE(obj); -+ -+ spapr->svm_allowed = value; -+} -+ - static void spapr_instance_init(Object *obj) - { - SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3358,6 +3375,12 @@ static void spapr_instance_init(Object *obj) - spapr_get_host_serial, spapr_set_host_serial); - object_property_set_description(obj, "host-serial", - "Host serial number to advertise in guest device tree"); -+ object_property_add_bool(obj, "x-svm-allowed", -+ spapr_get_svm_allowed, -+ spapr_set_svm_allowed); -+ object_property_set_description(obj, "x-svm-allowed", -+ "Allow the guest to become a Secure Guest" -+ " (experimental only)"); - } - - static void spapr_machine_finalizefn(Object *obj) -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index f48089edba..d0728a4758 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -212,6 +212,9 @@ struct SpaprMachineState { - int fwnmi_machine_check_interlock; - QemuCond fwnmi_machine_check_interlock_cond; - -+ /* Secure Guest support via x-svm-allowed */ -+ bool svm_allowed; -+ - /*< public >*/ - char *kvm_type; - char *host_model; -diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index d85ba8ffe0..ce63f8b6f3 100644 ---- a/target/ppc/kvm.c -+++ b/target/ppc/kvm.c -@@ -89,6 +89,7 @@ static int cap_ppc_count_cache_flush_assist; - static int cap_ppc_nested_kvm_hv; - static int cap_large_decr; - static int cap_fwnmi; -+static int cap_ppc_secure_guest; - - static uint32_t debug_inst_opcode; - -@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); - kvmppc_get_cpu_characteristics(s); - cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); -+ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST); - cap_large_decr = kvmppc_get_dec_bits(); - cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); - /* -@@ -2538,6 +2540,16 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) - return 0; - } - -+bool kvmppc_has_cap_secure_guest(void) -+{ -+ return !!cap_ppc_secure_guest; -+} -+ -+int kvmppc_enable_cap_secure_guest(void) -+{ -+ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); -+} -+ - PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) - { - uint32_t host_pvr = mfpvr(); -@@ -2948,3 +2960,18 @@ void kvmppc_svm_off(Error **errp) - error_setg_errno(errp, -rc, "KVM_PPC_SVM_OFF ioctl failed"); - } - } -+ -+void kvmppc_svm_allow(Error **errp) -+{ -+ if (!kvm_enabled()) { -+ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off"); -+ return; -+ } -+ -+ if (!kvmppc_has_cap_secure_guest()) { -+ error_setg(errp, "KVM implementation does not support secure guests, " -+ "try x-svm-allowed=off"); -+ } else if (kvmppc_enable_cap_secure_guest() < 0) { -+ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off"); -+ } -+} -diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index 72e05f1cd2..3fd5ea2414 100644 ---- a/target/ppc/kvm_ppc.h -+++ b/target/ppc/kvm_ppc.h -@@ -40,6 +40,7 @@ target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, - bool radix, bool gtse, - uint64_t proc_tbl); - void kvmppc_svm_off(Error **errp); -+void kvmppc_svm_allow(Error **errp); - #ifndef CONFIG_USER_ONLY - bool kvmppc_spapr_use_multitce(void); - int kvmppc_spapr_enable_inkernel_multitce(void); -@@ -73,6 +74,8 @@ int kvmppc_set_cap_nested_kvm_hv(int enable); - int kvmppc_get_cap_large_decr(void); - int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); - int kvmppc_enable_hwrng(void); -+bool kvmppc_has_cap_secure_guest(void); -+int kvmppc_enable_cap_secure_guest(void); - int kvmppc_put_books_sregs(PowerPCCPU *cpu); - PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); - void kvmppc_check_papr_resize_hpt(Error **errp); -@@ -386,6 +389,16 @@ static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) - return -1; - } - -+static inline bool kvmppc_has_cap_secure_guest(void) -+{ -+ return false; -+} -+ -+static inline int kvmppc_enable_cap_secure_guest(void) -+{ -+ return -1; -+} -+ - static inline int kvmppc_enable_hwrng(void) - { - return -1; --- -2.27.0 - diff --git a/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch b/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch deleted file mode 100644 index e53187c..0000000 --- a/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch +++ /dev/null @@ -1,102 +0,0 @@ -From e6d43ded51d658d77bb7f8a490f2bf93946d3215 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Thu, 3 Sep 2020 14:27:19 -0400 -Subject: [PATCH 09/11] usb: fix setup_len init (CVE-2020-14364) - -RH-Author: Jon Maloy -Message-id: <20200903142719.1415757-2-jmaloy@redhat.com> -Patchwork-id: 98265 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] usb: fix setup_len init (CVE-2020-14364) -Bugzilla: 1869715 -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier - -From: Gerd Hoffmann - -Store calculated setup_len in a local variable, verify it, and only -write it to the struct (USBDevice->setup_len) in case it passed the -sanity checks. - -This prevents other code (do_token_{in,out} functions specifically) -from working with invalid USBDevice->setup_len values and overrunning -the USBDevice->setup_buf[] buffer. - -Fixes: CVE-2020-14364 -Signed-off-by: Gerd Hoffmann -Tested-by: Gonglei -Reviewed-by: Li Qiang -Message-id: 20200825053636.29648-1-kraxel@redhat.com -(cherry picked from commit b946434f2659a182afc17e155be6791ebfb302eb) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/usb/core.c | 16 ++++++++++------ - 1 file changed, 10 insertions(+), 6 deletions(-) - -diff --git a/hw/usb/core.c b/hw/usb/core.c -index 5abd128b6b..5234dcc73f 100644 ---- a/hw/usb/core.c -+++ b/hw/usb/core.c -@@ -129,6 +129,7 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) - static void do_token_setup(USBDevice *s, USBPacket *p) - { - int request, value, index; -+ unsigned int setup_len; - - if (p->iov.size != 8) { - p->status = USB_RET_STALL; -@@ -138,14 +139,15 @@ static void do_token_setup(USBDevice *s, USBPacket *p) - usb_packet_copy(p, s->setup_buf, p->iov.size); - s->setup_index = 0; - p->actual_length = 0; -- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; -- if (s->setup_len > sizeof(s->data_buf)) { -+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; -+ if (setup_len > sizeof(s->data_buf)) { - fprintf(stderr, - "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", -- s->setup_len, sizeof(s->data_buf)); -+ setup_len, sizeof(s->data_buf)); - p->status = USB_RET_STALL; - return; - } -+ s->setup_len = setup_len; - - request = (s->setup_buf[0] << 8) | s->setup_buf[1]; - value = (s->setup_buf[3] << 8) | s->setup_buf[2]; -@@ -259,26 +261,28 @@ static void do_token_out(USBDevice *s, USBPacket *p) - static void do_parameter(USBDevice *s, USBPacket *p) - { - int i, request, value, index; -+ unsigned int setup_len; - - for (i = 0; i < 8; i++) { - s->setup_buf[i] = p->parameter >> (i*8); - } - - s->setup_state = SETUP_STATE_PARAM; -- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; - s->setup_index = 0; - - request = (s->setup_buf[0] << 8) | s->setup_buf[1]; - value = (s->setup_buf[3] << 8) | s->setup_buf[2]; - index = (s->setup_buf[5] << 8) | s->setup_buf[4]; - -- if (s->setup_len > sizeof(s->data_buf)) { -+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; -+ if (setup_len > sizeof(s->data_buf)) { - fprintf(stderr, - "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", -- s->setup_len, sizeof(s->data_buf)); -+ setup_len, sizeof(s->data_buf)); - p->status = USB_RET_STALL; - return; - } -+ s->setup_len = setup_len; - - if (p->pid == USB_TOKEN_OUT) { - usb_packet_copy(p, s->data_buf, s->setup_len); --- -2.27.0 - diff --git a/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch b/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch deleted file mode 100644 index 535e8f3..0000000 --- a/kvm-vhost-user-vsock-pci-force-virtio-version-1.patch +++ /dev/null @@ -1,71 +0,0 @@ -From b969c169c82c3022949a7717d6f5745fbdc579bc Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 29 Sep 2020 12:41:42 -0400 -Subject: [PATCH 3/4] vhost-user-vsock-pci: force virtio version 1 - -RH-Author: Stefano Garzarella -Message-id: <20200929124143.41520-4-sgarzare@redhat.com> -Patchwork-id: 98513 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/4] vhost-user-vsock-pci: force virtio version 1 -Bugzilla: 1868449 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -Commit 9b3a35ec82 ("virtio: verify that legacy support is not -accidentally on") added a safety check that requires to set -'disable-legacy=on' on vhost-user-vsock-pci device: - - $ ./qemu-system-x86_64 ... \ - -chardev socket,id=char0,reconnect=0,path=/tmp/vhost4.socket \ - -device vhost-user-vsock-pci,chardev=char0 - qemu-system-x86_64: -device vhost-user-vsock-pci,chardev=char0: - device is modern-only, use disable-legacy=on - -virtio-vsock was introduced after the release of VIRTIO 1.0 -specifications, so it should be 'modern-only'. - -This patch forces virtio version 1 and removes the 'transitional_name' -property, as done for vhost-vsock-pci, removing the need to specify -'disable-legacy=on' on vhost-user-vsock-pci device. - -Cc: qemu-stable@nongnu.org -Suggested-by: Cornelia Huck -Reviewed-by: Cornelia Huck -Signed-off-by: Stefano Garzarella -Message-Id: <20200921122506.82515-4-sgarzare@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 27eda699f59d430c33fc054a36a17251992e70dc) -Signed-off-by: Stefano Garzarella -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/vhost-user-vsock-pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c -index f4cf95873d3..492df6418a4 100644 ---- a/hw/virtio/vhost-user-vsock-pci.c -+++ b/hw/virtio/vhost-user-vsock-pci.c -@@ -40,6 +40,9 @@ static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) - VHostUserVSockPCI *dev = VHOST_USER_VSOCK_PCI(vpci_dev); - DeviceState *vdev = DEVICE(&dev->vdev); - -+ /* unlike vhost-vsock, we do not need to care about pre-5.1 compat */ -+ virtio_pci_force_virtio_1(vpci_dev); -+ - qdev_realize(vdev, BUS(&vpci_dev->bus), errp); - } - -@@ -68,7 +71,6 @@ static void vhost_user_vsock_pci_instance_init(Object *obj) - static const VirtioPCIDeviceTypeInfo vhost_user_vsock_pci_info = { - .base_name = TYPE_VHOST_USER_VSOCK_PCI, - .generic_name = "vhost-user-vsock-pci", -- .transitional_name = "vhost-user-vsock-pci-transitional", - .non_transitional_name = "vhost-user-vsock-pci-non-transitional", - .instance_size = sizeof(VHostUserVSockPCI), - .instance_init = vhost_user_vsock_pci_instance_init, --- -2.27.0 - diff --git a/kvm-vhost-vsock-ccw-force-virtio-version-1.patch b/kvm-vhost-vsock-ccw-force-virtio-version-1.patch deleted file mode 100644 index 1b3f4d1..0000000 --- a/kvm-vhost-vsock-ccw-force-virtio-version-1.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 41467dab8f8e312bbb13a47454724e20c4b08d60 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 29 Sep 2020 12:41:43 -0400 -Subject: [PATCH 4/4] vhost-vsock-ccw: force virtio version 1 - -RH-Author: Stefano Garzarella -Message-id: <20200929124143.41520-5-sgarzare@redhat.com> -Patchwork-id: 98514 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 4/4] vhost-vsock-ccw: force virtio version 1 -Bugzilla: 1868449 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -virtio-vsock was introduced after the release of VIRTIO 1.0 -specifications, so it should be 'modern-only'. - -This patch forces virtio version 1 as done for vhost-vsock-pci. - -To avoid migration issues, we force virtio version 1 only when -legacy check is enabled in the new machine types (>= 5.1). - -Cc: qemu-stable@nongnu.org -Suggested-by: Cornelia Huck -Reviewed-by: Cornelia Huck -Signed-off-by: Stefano Garzarella -Message-Id: <20200921122506.82515-5-sgarzare@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit a6704a34cf02add13964149e0de6453ae62bd9db) -Signed-off-by: Stefano Garzarella -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/vhost-vsock-ccw.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/hw/s390x/vhost-vsock-ccw.c b/hw/s390x/vhost-vsock-ccw.c -index 0822ecca893..246416a8f96 100644 ---- a/hw/s390x/vhost-vsock-ccw.c -+++ b/hw/s390x/vhost-vsock-ccw.c -@@ -40,9 +40,21 @@ static void vhost_vsock_ccw_class_init(ObjectClass *klass, void *data) - static void vhost_vsock_ccw_instance_init(Object *obj) - { - VHostVSockCCWState *dev = VHOST_VSOCK_CCW(obj); -+ VirtioCcwDevice *ccw_dev = VIRTIO_CCW_DEVICE(obj); -+ VirtIODevice *virtio_dev; - - virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), - TYPE_VHOST_VSOCK); -+ -+ virtio_dev = VIRTIO_DEVICE(&dev->vdev); -+ -+ /* -+ * To avoid migration issues, we force virtio version 1 only when -+ * legacy check is enabled in the new machine types (>= 5.1). -+ */ -+ if (!virtio_legacy_check_disabled(virtio_dev)) { -+ ccw_dev->force_revision_1 = true; -+ } - } - - static const TypeInfo vhost_vsock_ccw_info = { --- -2.27.0 - diff --git a/kvm-vhost-vsock-pci-force-virtio-version-1.patch b/kvm-vhost-vsock-pci-force-virtio-version-1.patch deleted file mode 100644 index db1d86c..0000000 --- a/kvm-vhost-vsock-pci-force-virtio-version-1.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 7153f4862bf6c4396412a8ba0d7db45ae087a337 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 29 Sep 2020 12:41:41 -0400 -Subject: [PATCH 2/4] vhost-vsock-pci: force virtio version 1 - -RH-Author: Stefano Garzarella -Message-id: <20200929124143.41520-3-sgarzare@redhat.com> -Patchwork-id: 98511 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/4] vhost-vsock-pci: force virtio version 1 -Bugzilla: 1868449 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -Commit 9b3a35ec82 ("virtio: verify that legacy support is not -accidentally on") added a safety check that requires to set -'disable-legacy=on' on vhost-vsock-pci device: - - $ ./qemu-system-x86_64 ... -device vhost-vsock-pci,guest-cid=5 - qemu-system-x86_64: -device vhost-vsock-pci,guest-cid=5: - device is modern-only, use disable-legacy=on - -virtio-vsock was introduced after the release of VIRTIO 1.0 -specifications, so it should be 'modern-only'. -In addition Cornelia verified that forcing a legacy mode on -vhost-vsock-pci device using x86-64 host and s390x guest, so with -different endianness, produces strange behaviours. - -This patch forces virtio version 1 and removes the 'transitional_name' -property removing the need to specify 'disable-legacy=on' on -vhost-vsock-pci device. - -To avoid migration issues, we force virtio version 1 only when -legacy check is enabled in the new machine types (>= 5.1). - -As the transitional device name is not commonly used, we do not -provide compatibility handling for it. - -Cc: qemu-stable@nongnu.org -Reported-by: Qian Cai -Reported-by: Qinghua Cheng -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1868449 -Suggested-by: Cornelia Huck -Reviewed-by: Cornelia Huck -Signed-off-by: Stefano Garzarella -Message-Id: <20200921122506.82515-3-sgarzare@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6209070503989cf4f28549f228989419d4f0b236) -Signed-off-by: Stefano Garzarella -Signed-off-by: Danilo C. L. de Paula ---- - hw/virtio/vhost-vsock-pci.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vsock-pci.c b/hw/virtio/vhost-vsock-pci.c -index a815278e69c..f11a38292fe 100644 ---- a/hw/virtio/vhost-vsock-pci.c -+++ b/hw/virtio/vhost-vsock-pci.c -@@ -43,6 +43,15 @@ static void vhost_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) - { - VHostVSockPCI *dev = VHOST_VSOCK_PCI(vpci_dev); - DeviceState *vdev = DEVICE(&dev->vdev); -+ VirtIODevice *virtio_dev = VIRTIO_DEVICE(vdev); -+ -+ /* -+ * To avoid migration issues, we force virtio version 1 only when -+ * legacy check is enabled in the new machine types (>= 5.1). -+ */ -+ if (!virtio_legacy_check_disabled(virtio_dev)) { -+ virtio_pci_force_virtio_1(vpci_dev); -+ } - - qdev_realize(vdev, BUS(&vpci_dev->bus), errp); - } -@@ -72,7 +81,6 @@ static void vhost_vsock_pci_instance_init(Object *obj) - static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { - .base_name = TYPE_VHOST_VSOCK_PCI, - .generic_name = "vhost-vsock-pci", -- .transitional_name = "vhost-vsock-pci-transitional", - .non_transitional_name = "vhost-vsock-pci-non-transitional", - .instance_size = sizeof(VHostVSockPCI), - .instance_init = vhost_vsock_pci_instance_init, --- -2.27.0 - diff --git a/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch b/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch deleted file mode 100644 index 6b2c802..0000000 --- a/kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 0e84dffa20452130768c81390d9df56fab8ba260 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 29 Sep 2020 12:41:40 -0400 -Subject: [PATCH 1/4] virtio: skip legacy support check on machine types less - than 5.1 - -RH-Author: Stefano Garzarella -Message-id: <20200929124143.41520-2-sgarzare@redhat.com> -Patchwork-id: 98512 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/4] virtio: skip legacy support check on machine types less than 5.1 -Bugzilla: 1868449 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck - -Commit 9b3a35ec82 ("virtio: verify that legacy support is not accidentally -on") added a check that returns an error if legacy support is on, but the -device does not support legacy. - -Unfortunately some devices were wrongly declared legacy capable even if -they were not (e.g vhost-vsock). - -To avoid migration issues, we add a virtio-device property -(x-disable-legacy-check) to skip the legacy error, printing a warning -instead, for machine types < 5.1. - -Cc: qemu-stable@nongnu.org -Fixes: 9b3a35ec82 ("virtio: verify that legacy support is not accidentally on") -Suggested-by: Dr. David Alan Gilbert -Suggested-by: Cornelia Huck -Reviewed-by: Cornelia Huck -Signed-off-by: Stefano Garzarella -Message-Id: <20200921122506.82515-2-sgarzare@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin - -(cherry picked from commit d55f518248f263bb8d0852f98e47102ea09d4f89) -Added 'x-disable-legacy-check' in hw_compat_rhel_8_2 - -Signed-off-by: Stefano Garzarella -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 3 +++ - hw/s390x/virtio-ccw.c | 15 ++++++++++++--- - hw/virtio/virtio-pci.c | 14 ++++++++++++-- - hw/virtio/virtio.c | 7 +++++++ - include/hw/virtio/virtio.h | 2 ++ - 5 files changed, 36 insertions(+), 5 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 10fa9b8c756..86ce3af71e4 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -66,6 +66,8 @@ GlobalProperty hw_compat_rhel_8_2[] = { - { "vmport", "x-report-vmx-type", "off" }, - /* hw_compat_rhel_8_2 from hw_compat_5_0 */ - { "vmport", "x-cmds-v2", "off" }, -+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ -+ { "virtio-device", "x-disable-legacy-check", "true" }, - }; - const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); - -@@ -246,6 +248,7 @@ GlobalProperty hw_compat_5_0[] = { - { "vmport", "x-signal-unsupported-cmd", "off" }, - { "vmport", "x-report-vmx-type", "off" }, - { "vmport", "x-cmds-v2", "off" }, -+ { "virtio-device", "x-disable-legacy-check", "true" }, - }; - const size_t hw_compat_5_0_len = G_N_ELEMENTS(hw_compat_5_0); - -diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c -index 0e602702971..3dfc93d4f6f 100644 ---- a/hw/s390x/virtio-ccw.c -+++ b/hw/s390x/virtio-ccw.c -@@ -1122,9 +1122,18 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp) - } - - if (!virtio_ccw_rev_max(dev) && !virtio_legacy_allowed(vdev)) { -- error_setg(errp, "Invalid value of property max_rev " -- "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); -- return; -+ /* -+ * To avoid migration issues, we allow legacy mode when legacy -+ * check is disabled in the old machine types (< 5.1). -+ */ -+ if (virtio_legacy_check_disabled(vdev)) { -+ warn_report("device requires revision >= 1, but for backward " -+ "compatibility max_revision=0 is allowed"); -+ } else { -+ error_setg(errp, "Invalid value of property max_rev " -+ "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); -+ return; -+ } - } - - if (virtio_get_num_queues(vdev) > VIRTIO_QUEUE_MAX) { -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index ccdf54e81c7..4211565f2c9 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -1596,8 +1596,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) - - if (legacy) { - if (!virtio_legacy_allowed(vdev)) { -- error_setg(errp, "device is modern-only, use disable-legacy=on"); -- return; -+ /* -+ * To avoid migration issues, we allow legacy mode when legacy -+ * check is disabled in the old machine types (< 5.1). -+ */ -+ if (virtio_legacy_check_disabled(vdev)) { -+ warn_report("device is modern-only, but for backward " -+ "compatibility legacy is allowed"); -+ } else { -+ error_setg(errp, -+ "device is modern-only, use disable-legacy=on"); -+ return; -+ } - } - if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { - error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by" -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index e9830252176..b85277da673 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -3304,6 +3304,11 @@ bool virtio_legacy_allowed(VirtIODevice *vdev) - } - } - -+bool virtio_legacy_check_disabled(VirtIODevice *vdev) -+{ -+ return vdev->disable_legacy_check; -+} -+ - hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) - { - return vdev->vq[n].vring.desc; -@@ -3713,6 +3718,8 @@ static Property virtio_properties[] = { - DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), - DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), - DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true), -+ DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice, -+ disable_legacy_check, false), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index e424df12cf6..c50f5a9dfe8 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -105,6 +105,7 @@ struct VirtIODevice - bool use_started; - bool started; - bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ -+ bool disable_legacy_check; - VMChangeStateEntry *vmstate; - char *bus_name; - uint8_t device_endian; -@@ -398,5 +399,6 @@ static inline bool virtio_device_disabled(VirtIODevice *vdev) - } - - bool virtio_legacy_allowed(VirtIODevice *vdev); -+bool virtio_legacy_check_disabled(VirtIODevice *vdev); - - #endif --- -2.27.0 - diff --git a/kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch b/kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch deleted file mode 100644 index 548fd67..0000000 --- a/kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 98eced5d367a6a69006cab1ea2b77c2c2622694a Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 5 Oct 2020 15:27:02 -0400 -Subject: [PATCH 2/3] x86: cpuhp: prevent guest crash on CPU hotplug when - broadcast SMI is in use - -RH-Author: Igor Mammedov -Message-id: <20201005152703.1555401-3-imammedo@redhat.com> -Patchwork-id: 98550 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 2/3] x86: cpuhp: prevent guest crash on CPU hotplug when broadcast SMI is in use -Bugzilla: 1846886 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Michael S. Tsirkin - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1846886 -BRANCH: rhel-av-8.3.0 -UPSTREAM: Merged -BREW: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31759628 -Upstream commit: c5be7517d658 - -There were reports of guest crash on CPU hotplug, when using q35 machine -type and OVMF with SMM, due to hotplugged CPU trying to process SMI at -default SMI handler location without it being relocated by firmware first. - -Fix it by refusing hotplug if firmware hasn't negotiated CPU hotplug with -SMI support while SMI broadcast is in use. - -Conflicts: - hw/i386/x86.c - cpu wiring routines were moved to x86.c upstream - to be shared with micro vm, so the second hunk - has to be put into pc_cpu_pre_plug() and s/x86ms/pcms/. - -Signed-off-by: Igor Mammedov -Reviewed-by: Laszlo Ersek -Tested-by: Laszlo Ersek -Message-Id: <20200923094650.1301166-3-imammedo@redhat.com> -Tested-by: Laszlo Ersek -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - hw/acpi/ich9.c | 12 +++++++++++- - hw/i386/pc.c | 11 +++++++++++ - 2 files changed, 22 insertions(+), 1 deletion(-) - -diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 43ad1ff9278..37286a03288 100644 ---- a/hw/acpi/ich9.c -+++ b/hw/acpi/ich9.c -@@ -423,10 +423,20 @@ void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && -- !lpc->pm.acpi_memory_hotplug.is_enabled) -+ !lpc->pm.acpi_memory_hotplug.is_enabled) { - error_setg(errp, - "memory hotplug is not enabled: %s.memory-hotplug-support " - "is not set", object_get_typename(OBJECT(lpc))); -+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { -+ uint64_t negotiated = lpc->smi_negotiated_features; -+ -+ if (negotiated & BIT_ULL(ICH9_LPC_SMI_F_BROADCAST_BIT) && -+ !(negotiated & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT))) { -+ error_setg(errp, "cpu hotplug with SMI wasn't enabled by firmware"); -+ error_append_hint(errp, "update machine type to newer than 5.1 " -+ "and firmware that suppors CPU hotplug with SMM"); -+ } -+ } - } - - void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 6e0a3f391b0..0332589359b 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -1761,6 +1761,17 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, - return; - } - -+ if (pcms->acpi_dev) { -+ Error *local_err = NULL; -+ -+ hotplug_handler_pre_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, -+ &local_err); -+ if (local_err) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ } -+ - init_topo_info(&topo_info, x86ms); - - env->nr_dies = x86ms->smp_dies; --- -2.27.0 - diff --git a/kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch b/kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch deleted file mode 100644 index 6d4c9e8..0000000 --- a/kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 77c5df3ab28f294f7b21d33a2f6116b0889292ed Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 5 Oct 2020 15:27:03 -0400 -Subject: [PATCH 3/3] x86: cpuhp: refuse cpu hot-unplug request earlier if not - supported - -RH-Author: Igor Mammedov -Message-id: <20201005152703.1555401-4-imammedo@redhat.com> -Patchwork-id: 98551 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 3/3] x86: cpuhp: refuse cpu hot-unplug request earlier if not supported -Bugzilla: 1846886 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Michael S. Tsirkin - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1846886 -BRANCH: rhel-av-8.3.0 -UPSTREAM: Merged -BREW: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31759628 -Upstream commit: b48ad7c02ba7 - -CPU hot-unplug with SMM requires firmware participation to prevent -guest crash (i.e. CPU can be removed only after OS _and_ firmware -were prepared for the action). -Previous patches introduced ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT -feature bit, which is advertised by firmware when it has support -for CPU hot-unplug. Use it to check if guest is able to handle -unplug and make device_del fail gracefully if hot-unplug feature -hasn't been negotiated. - -Signed-off-by: Igor Mammedov -Tested-by: Laszlo Ersek -Reviewed-by: Laszlo Ersek -Message-Id: <20200923094650.1301166-4-imammedo@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - hw/acpi/ich9.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 37286a03288..f6c6c6a916a 100644 ---- a/hw/acpi/ich9.c -+++ b/hw/acpi/ich9.c -@@ -475,6 +475,18 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev, - errp); - } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && - !lpc->pm.cpu_hotplug_legacy) { -+ uint64_t negotiated = lpc->smi_negotiated_features; -+ -+ if (negotiated & BIT_ULL(ICH9_LPC_SMI_F_BROADCAST_BIT) && -+ !(negotiated & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT))) { -+ error_setg(errp, "cpu hot-unplug with SMI wasn't enabled " -+ "by firmware"); -+ error_append_hint(errp, "update machine type to a version having " -+ "x-smi-cpu-hotunplug=on and firmware that " -+ "supports CPU hot-unplug with SMM"); -+ return; -+ } -+ - acpi_cpu_unplug_request_cb(hotplug_dev, &lpc->pm.cpuhp_state, - dev, errp); - } else { --- -2.27.0 - diff --git a/kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch b/kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch deleted file mode 100644 index f3c09d0..0000000 --- a/kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch +++ /dev/null @@ -1,110 +0,0 @@ -From e2d32096071d7175d11b444db80e25709d6bf3d4 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 5 Oct 2020 15:27:01 -0400 -Subject: [PATCH 1/3] x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' - features - -RH-Author: Igor Mammedov -Message-id: <20201005152703.1555401-2-imammedo@redhat.com> -Patchwork-id: 98549 -O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/3] x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' features -Bugzilla: 1846886 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Michael S. Tsirkin - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1846886 -BRANCH: rhel-av-8.3.0 -UPSTREAM: Merged -BREW: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31759628 -Upstream commit: 00dc02d284ea - -It will allow firmware to notify QEMU that firmware requires SMI -being triggered on CPU hot[un]plug, so that it would be able to account -for hotplugged CPU and relocate it to new SMM base and/or safely remove -CPU on unplug. - -Using negotiated features, follow up patches will insert SMI upcall -into AML code, to make sure that firmware processes hotplug before -guest OS would attempt to use new CPU. - -Conflicts: - hw/i386/pc.c - move x-smi-cpu-hotplug chunk from missing pc_compat_5_1[] compat props - to pc_rhel_compat[] to disable cpu hotplug for [ovmf+smi] config - (should be moved to versioned q35 machine type later, when RHEL gets - complete feature and we decide to support it downstream) - -Signed-off-by: Igor Mammedov -Reviewed-by: Laszlo Ersek -Tested-by: Laszlo Ersek -Message-Id: <20200923094650.1301166-2-imammedo@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Signed-off-by: Igor Mammedov -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc.c | 2 ++ - hw/isa/lpc_ich9.c | 13 +++++++++++++ - include/hw/i386/ich9.h | 2 ++ - 3 files changed, 17 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index ac2cc79fca2..6e0a3f391b0 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -358,6 +358,8 @@ GlobalProperty pc_rhel_compat[] = { - { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, - /* bz 1508330 */ - { "vfio-pci", "x-no-geforce-quirks", "on" }, -+ /* BZ 1846886 */ -+ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c -index cd6e169d47a..19f32bed3e9 100644 ---- a/hw/isa/lpc_ich9.c -+++ b/hw/isa/lpc_ich9.c -@@ -373,6 +373,15 @@ static void smi_features_ok_callback(void *opaque) - /* guest requests invalid features, leave @features_ok at zero */ - return; - } -+ if (!(guest_features & BIT_ULL(ICH9_LPC_SMI_F_BROADCAST_BIT)) && -+ guest_features & (BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT) | -+ BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT))) { -+ /* -+ * cpu hot-[un]plug with SMI requires SMI broadcast, -+ * leave @features_ok at zero -+ */ -+ return; -+ } - - /* valid feature subset requested, lock it down, report success */ - lpc->smi_negotiated_features = guest_features; -@@ -747,6 +756,10 @@ static Property ich9_lpc_properties[] = { - DEFINE_PROP_BOOL("noreboot", ICH9LPCState, pin_strap.spkr_hi, true), - DEFINE_PROP_BIT64("x-smi-broadcast", ICH9LPCState, smi_host_features, - ICH9_LPC_SMI_F_BROADCAST_BIT, true), -+ DEFINE_PROP_BIT64("x-smi-cpu-hotplug", ICH9LPCState, smi_host_features, -+ ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT, true), -+ DEFINE_PROP_BIT64("x-smi-cpu-hotunplug", ICH9LPCState, smi_host_features, -+ ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT, false), - DEFINE_PROP_END_OF_LIST(), - }; - -diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h -index a98d10b252d..d1bb3f7bf0e 100644 ---- a/include/hw/i386/ich9.h -+++ b/include/hw/i386/ich9.h -@@ -247,5 +247,7 @@ typedef struct ICH9LPCState { - - /* bit positions used in fw_cfg SMI feature negotiation */ - #define ICH9_LPC_SMI_F_BROADCAST_BIT 0 -+#define ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT 1 -+#define ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT 2 - - #endif /* HW_ICH9_H */ --- -2.27.0 - diff --git a/kvm.modules b/kvm.modules deleted file mode 100644 index b9d9646..0000000 --- a/kvm.modules +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -case $(uname -m) in - ppc64) - grep OPAL /proc/cpuinfo >/dev/null 2>&1 && opal=1 - - modprobe -b kvm >/dev/null 2>&1 - modprobe -b kvm-pr >/dev/null 2>&1 && kvm=1 - if [ "$opal" ]; then - modprobe -b kvm-hv >/dev/null 2>&1 - fi - ;; - s390x) - modprobe -b kvm >/dev/null 2>&1 && kvm=1 - ;; -esac - -exit 0 diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 29d61a1..db61b06 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -8,6 +8,7 @@ %global have_gluster 1 %global have_kvm_setup 0 %global have_memlock_limits 0 +%global rcversion -rc1 %ifnarch %{ix86} x86_64 @@ -61,14 +62,11 @@ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} # Macro to properly setup RHEL/RHEV conflict handling -%define rhev_ma_conflicts() \ -Obsoletes: %1-ma \ -Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 5.1.0 -Release: 13%{?dist} +Version: 5.2.0-rc1 +Release: 1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -76,8 +74,7 @@ Group: Development/Tools URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x - -Source0: http://wiki.qemu.org/download/qemu-5.1.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-5.2.0-rc1.tar.xz # KSM control scripts Source4: ksm.service @@ -117,104 +114,17 @@ Patch0012: 0012-Enable-make-check.patch Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0014: 0014-Add-support-statement-to-help-output.patch Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch -Patch0016: 0016-Add-support-for-simpletrace.patch -Patch0017: 0017-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0018: 0018-usb-xhci-Fix-PCI-capability-order.patch -Patch0019: 0019-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0020: 0020-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0021: 0021-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -Patch0022: 0022-RHEL-only-Enable-vTPM-for-POWER-in-downstream-config.patch -Patch0023: 0023-redhat-fix-5.0-rebase-missing-ISA-TPM-TIS.patch -Patch0024: 0024-redhat-define-hw_compat_8_2.patch -Patch0025: 0025-x86-Add-8.3.0-x86_64-machine-type.patch -Patch0027: 0027-hw-arm-Changes-to-rhel820-machine.patch -Patch0028: 0028-hw-arm-Introduce-rhel_virt_instance_init-helper.patch -Patch0029: 0029-hw-arm-Add-rhel830-machine-type.patch -Patch0030: 0030-redhat-define-pseries-rhel8.3.0-machine-type.patch -Patch0031: 0031-ppc-Set-correct-max_cpus-value-on-spapr-rhel-machine.patch -Patch0032: 0032-arm-Set-correct-max_cpus-value-on-virt-rhel-machine-.patch -Patch0033: 0033-vl-Remove-downstream-only-MAX_RHEL_CPUS-code.patch -Patch0034: 0034-q35-Set-max_cpus-to-512.patch -Patch0035: 0035-RHEL-only-arm-virt-Allow-the-TPM_TIS_SYSBUS-device-d.patch -Patch0036: 0036-RHEL-only-Enable-vTPM-for-ARM-in-downstream-configs.patch -# For bz#1853265 - Forward and backward migration from rhel-av-8.3.0(qemu-kvm-5.0.0) to rhel-av-8.2.1(qemu-kvm-4.2.0) failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'" -Patch37: kvm-redhat-define-hw_compat_8_2.patch -# For bz#1843348 - 8.3 machine types for POWER -Patch38: kvm-redhat-Update-hw_compat_8_2.patch -# For bz#1843348 - 8.3 machine types for POWER -Patch39: kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch -# For bz#1801242 - [aarch64] vTPM support in machvirt -Patch40: kvm-Disable-TPM-passthrough-backend-on-ARM.patch -# For bz#1867075 - CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8] -Patch41: kvm-Drop-bogus-IPv6-messages.patch -# For bz#1849707 - 8.3 machine types for x86 - 5.1 update -Patch42: kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch -# For bz#1849707 - 8.3 machine types for x86 - 5.1 update -Patch43: kvm-machine_types-numa-compatibility-for-auto_enable_num.patch -# For bz#1790492 - 'dirty-bitmaps' migration capability should allow configuring target nodenames -Patch44: kvm-migration-Add-block-bitmap-mapping-parameter.patch -# For bz#1790492 - 'dirty-bitmaps' migration capability should allow configuring target nodenames -Patch45: kvm-iotests.py-Let-wait_migration-return-on-failure.patch -# For bz#1790492 - 'dirty-bitmaps' migration capability should allow configuring target nodenames -Patch46: kvm-iotests-Test-node-bitmap-aliases-during-migration.patch -# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes -Patch47: kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch -# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes -Patch48: kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch -# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes -Patch49: kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch -# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes -Patch50: kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch -# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes -Patch51: kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch -# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes -Patch52: kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch -# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes -Patch53: kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch -# For bz#1867739 - -prom-env does not validate input -Patch54: kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch -# For bz#1869715 - CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0] -Patch55: kvm-usb-fix-setup_len-init-CVE-2020-14364.patch -# For bz#1789757 - [IBM 8.4 FEAT] Add machine option to enable secure VM support -# For bz#1870384 - [IBM 8.3 FEAT] Add interim/unsupported machine option to enable secure VM support for testing purposes -Patch56: kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch -# For bz#1849483 - Failed to boot up guest when hotplugging vcpus on bios stage -Patch57: kvm-target-arm-Move-start-powered-off-property-to-generi.patch -# For bz#1849483 - Failed to boot up guest when hotplugging vcpus on bios stage -Patch58: kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch -# For bz#1849483 - Failed to boot up guest when hotplugging vcpus on bios stage -Patch59: kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch -# For bz#1738820 - '-F' option of qemu-ga command cause the guest-fsfreeze-freeze command doesn't work -Patch60: kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch -# For bz#1752376 - qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available -Patch61: kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch -# For bz#1867075 - CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8] -Patch62: kvm-Revert-Drop-bogus-IPv6-messages.patch -# For bz#1821528 - missing namespace attribute when access the rbd image with namespace -Patch63: kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch -# For bz#1688978 - RFE: forward host preferences for cipher suites and CA certs to guest firmware -Patch64: kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch -# For bz#1877209 - 'qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap -Patch65: kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch -# For bz#1874004 - Live migration performance is poor during guest installation process on power host -Patch66: kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch -# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on -Patch67: kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch -# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on -Patch68: kvm-vhost-vsock-pci-force-virtio-version-1.patch -# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on -Patch69: kvm-vhost-user-vsock-pci-force-virtio-version-1.patch -# For bz#1868449 - vhost_vsock error: device is modern-only, use disable-legacy=on -Patch70: kvm-vhost-vsock-ccw-force-virtio-version-1.patch -# For bz#1846886 - Guest hit soft lockup or reboots if hotplug vcpu under ovmf -Patch71: kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch -# For bz#1846886 - Guest hit soft lockup or reboots if hotplug vcpu under ovmf -Patch72: kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch -# For bz#1846886 - Guest hit soft lockup or reboots if hotplug vcpu under ovmf -Patch73: kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch +Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0019: 0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +Patch0020: 0020-Upstream.patch +Patch0021: 0021-RHEL-9-test.patch BuildRequires: wget BuildRequires: rpm-build +BuildRequires: ninja-build +BuildRequires: meson BuildRequires: zlib-devel BuildRequires: glib2-devel BuildRequires: which @@ -323,7 +233,6 @@ Requires: mesa-dri-drivers BuildRequires: perl-Test-Harness Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} -%rhev_ma_conflicts qemu-kvm %{requires_all_modules} @@ -374,7 +283,6 @@ Requires: usbredir >= 0.7.1 Requires: libfdt >= 1.6.0 %endif -%rhev_ma_conflicts qemu-kvm %description -n qemu-kvm-core qemu-kvm is an open source virtualizer that provides hardware @@ -387,7 +295,6 @@ hardware for a full system such as a PC and its associated peripherals. Summary: QEMU command line tool for manipulating disk images Group: Development/Tools -%rhev_ma_conflicts qemu-img %description -n qemu-img This package provides a command line tool for manipulating disk images. @@ -402,7 +309,6 @@ Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units -%rhev_ma_conflicts qemu-kvm-common %description -n qemu-kvm-common qemu-kvm is an open source virtualizer that provides hardware emulation for @@ -518,15 +424,16 @@ cd qemu-kvm-build ../configure \ --prefix="%{_prefix}" \ --libdir="%{_libdir}" \ + --datadir="%{_datadir}" \ --sysconfdir="%{_sysconfdir}" \ --interp-prefix=%{_prefix}/qemu-%M \ --localstatedir="%{_localstatedir}" \ - --docdir="%{qemudocdir}" \ + --docdir="%{_docdir}" \ --libexecdir="%{_libexecdir}" \ --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ --extra-cflags="%{optflags}" \ --with-pkgversion="%{name}-%{version}-%{release}" \ - --with-confsuffix=/"%{name}" \ + --with-suffix="%{name}" \ --firmwarepath=%{_prefix}/share/qemu-firmware \ --python=%{__python3} \ --target-list="%{buildarch}" \ @@ -694,15 +601,15 @@ make V=1 %{?_smp_mflags} $buildldflags # Setup back compat qemu-kvm binary %{__python3} scripts/tracetool.py --backend dtrace --format stap \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ - trace-events-all > qemu-kvm.stp + trace/trace-events-all > qemu-kvm.stp %{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ - trace-events-all > qemu-kvm-log.stp + trace/trace-events-all > qemu-kvm-log.stp %{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ - trace-events-all > qemu-kvm-simpletrace.stp + trace/trace-events-all > qemu-kvm-simpletrace.stp cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm @@ -759,7 +666,7 @@ install -p -m 0755 ../tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ # Install qemu-iotests cp -R ../tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ -cp -u tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +cp -ur tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ # Avoid ambiguous 'python' interpreter name find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; @@ -802,14 +709,18 @@ install --preserve-timestamps --mode=0644 \ mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ mkdir -p $RPM_BUILD_ROOT%{_bindir} -install -c -m 0755 qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga +install -c -m 0755 qga/qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 -install -m 0755 qemu-kvm $RPM_BUILD_ROOT%{_libexecdir}/ +install -m 0755 %{kvm_target}-softmmu/qemu-system-%{kvm_target} $RPM_BUILD_ROOT%{_libexecdir}/qemu-kvm install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ install -m 0644 qemu-kvm-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d" +install -c -m 0644 scripts/systemtap/script.d/qemu_kvm.stp "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d/" +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d" +install -c -m 0644 scripts/systemtap/conf.d/qemu_kvm.conf "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d/" rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} @@ -829,12 +740,27 @@ mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py mkdir -p $RPM_BUILD_ROOT%{qemudocdir} -install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} ../Changelog ../README.rst ../README.systemtap ../COPYING ../COPYING.LIB ../LICENSE ../docs/interop/qmp-spec.txt +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} ../README.rst ../README.systemtap ../COPYING ../COPYING.LIB ../LICENSE ../docs/interop/qmp-spec.txt + +# Rename man page +pushd ${RPM_BUILD_ROOT}%{_mandir}/man1/ +for fn in qemu.1*; do + mv $fn "qemu-kvm${fn#qemu}" +done +popd chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* install -D -p -m 0644 ../qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf +# Install keymaps +pushd pc-bios/keymaps +for kmp in *; do + install $kmp ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/ +done +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/*.stamp +popd + # Provided by package openbios rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc32 @@ -851,6 +777,7 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-zipl.rom rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot.e500 rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu_vga.ndrv rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qboot.rom rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img @@ -863,9 +790,12 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-sifive_u-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-generic-fw_dynamic.* rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/npcm7xx_bootrom.bin rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so @@ -974,9 +904,6 @@ rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo # Remove spec rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs -# Hack to keep qemu-pr-helper in original location -mv $RPM_BUILD_ROOT%{_libexecdir}/qemu-pr-helper $RPM_BUILD_ROOT%{_bindir}/qemu-pr-helper - %check cd qemu-kvm-build export DIFF=diff; make check V=1 @@ -1025,7 +952,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files -n qemu-kvm-common %defattr(-,root,root) %dir %{qemudocdir} -%doc %{qemudocdir}/Changelog %doc %{qemudocdir}/README.rst %doc %{qemudocdir}/COPYING %doc %{qemudocdir}/COPYING.LIB @@ -1083,6 +1009,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/kvmvapic.bin %{_datadir}/%{name}/sgabios.bin %{_datadir}/%{name}/pvh.bin + %{_libdir}/qemu-kvm/ui-egl-headless.so %endif %ifarch s390x %{_datadir}/%{name}/s390-ccw.img @@ -1103,6 +1030,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/efi-pcnet.rom %{_datadir}/%{name}/efi-rtl8139.rom %{_datadir}/%{name}/efi-ne2k_pci.rom + %{_libdir}/qemu-kvm/hw-display-virtio-vga.so %endif %{_datadir}/icons/* %{_datadir}/%{name}/linuxboot_dma.bin @@ -1129,10 +1057,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %if 0%{have_spice} %{_libdir}/qemu-kvm/hw-usb-smartcard.so + %{_libdir}/qemu-kvm/audio-spice.so + %{_libdir}/qemu-kvm/ui-spice-core.so + %{_libdir}/qemu-kvm/chardev-spice.so %endif %ifarch x86_64 %{_libdir}/qemu-kvm/hw-display-qxl.so %endif +%{_libdir}/qemu-kvm/hw-display-virtio-gpu.so +%ifnarch s390x + %{_libdir}/qemu-kvm/hw-display-virtio-gpu-pci.so +%endif +%if 0%{have_opengl} + %{_libdir}/qemu-kvm/ui-opengl.so +%endif %files -n qemu-img %defattr(-,root,root) @@ -1177,6 +1115,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Oct 13 2020 Danilo Cesar Lemes de Paula - 5.1.0-14.el8_3 +- kvm-virtiofsd-avoid-proc-self-fd-tempdir.patch [bz#1884276] +- Resolves: bz#1884276 + (Pod with kata-runtime won't start, QEMU: "vhost_user_dev init failed, Operation not permitted" [mkdtemp failing in sandboxing]) + * Thu Oct 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-13.el8_3 - kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch [bz#1846886] - kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch [bz#1846886] diff --git a/sources b/sources index eae2427..7c6eed4 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-5.1.0.tar.xz) = e213edb71d93d5167ddce7546220ecb7b52a7778586a4f476f65bd1e510c9cfc6d1876238a7b501d9cc3fd31cc2ae4b7fb9e753bc3f12cc17cd16dfce2a96ba3 +SHA512 (qemu-5.2.0-rc1.tar.xz) = 5345c9e8811efe2c1bab92ecb846f267dc6ef2a67ac03a39547344dc810a13027ef7352c7209d528ec81108dbc1e5e9ca96da6f7306c682ad8f785f596fa4dde From a2f0353550dd2ee29bd38f18289b8409b2dedce1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 13 Nov 2020 15:11:07 +0100 Subject: [PATCH 102/195] Fix for upstream version --- qemu-kvm.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index db61b06..4c1f51b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -65,7 +65,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 5.2.0-rc1 +Version: 5.2.0rc1 Release: 1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 From a89668ee57c177b1bd9f8f1c533aa19c169a8647 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 13 Nov 2020 16:34:57 +0100 Subject: [PATCH 103/195] Final fix for release version --- qemu-kvm.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4c1f51b..929d4cd 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -65,8 +65,8 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 5.2.0rc1 -Release: 1%{?dist} +Version: 5.2.0 +Release: rc1.1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY From 661065af9d1fdd183c27bd8861b09a2875ef2233 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 26 Nov 2020 10:42:21 +0100 Subject: [PATCH 104/195] Update to QEMU 5.2.0 rc3 --- .gitignore | 1 + ...at-Adding-slirp-to-the-exploded-tree.patch | 2 +- 0005-Initial-redhat-build.patch | 70 ++- 0006-Enable-disable-devices-for-RHEL.patch | 23 +- ...Machine-type-related-general-changes.patch | 22 +- 0008-Add-aarch64-machine-types.patch | 2 +- 0009-Add-ppc64-machine-types.patch | 6 +- 0010-Add-s390x-machine-types.patch | 10 +- 0011-Add-x86_64-machine-types.patch | 12 +- 0012-Enable-make-check.patch | 9 +- ...mber-of-devices-that-can-be-assigned.patch | 6 +- ...Add-support-statement-to-help-output.patch | 8 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 4 +- ...documentation-instead-of-qemu-system.patch | 6 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 2 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 2 +- ...x-blockdev-reopen-API-with-feature-f.patch | 2 +- 0020-Build-RHEL-9.patch | 62 +++ 0020-Upstream.patch | 37 -- 0021-RHEL-9-test.patch | 33 -- qemu-kvm.spec | 476 +++++++++++++----- sources | 2 +- 22 files changed, 507 insertions(+), 290 deletions(-) create mode 100644 0020-Build-RHEL-9.patch delete mode 100644 0020-Upstream.patch delete mode 100644 0021-RHEL-9-test.patch diff --git a/.gitignore b/.gitignore index ba6025e..ad21a5a 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ /qemu-5.0.0.tar.xz /qemu-5.1.0.tar.xz /qemu-5.2.0-rc1.tar.xz +/qemu-5.2.0-rc3.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch index 5293bb8..f942236 100644 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -1,4 +1,4 @@ -From bd38ad15dd837dd3baa136334f667d8d63850ae0 Mon Sep 17 00:00:00 2001 +From 03c528f8dd064ee0ac40bd37f686cd5616071fdf Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 29 Jul 2020 07:48:57 +0200 Subject: redhat: Adding slirp to the exploded tree diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index 346e59a..6b55be8 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 788398591901ece77695d73db0e392ce2c357636 Mon Sep 17 00:00:00 2001 +From b3ec5c5cc8ed777c5c2cd1bd27f6684a99317953 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -11,7 +11,7 @@ several issues are fixed in QEMU tree: - Man page renamed from qemu to qemu-kvm - man page is installed using make install so we have to fix it in qemu tree -This rebase includes changes up to qemu-kvm-5.1.0-14.el8 +This rebase includes changes up to qemu-kvm-5.1.0-15.el8 Rebase notes (3.1.0): - added new configure options @@ -81,6 +81,14 @@ Rebase notes (5.2.0 rc0): - Fix in directory used for docs (upstream add %name so we do not pass it in configure) - Package various .so as part of qemu-kvm-core package. +Rebase notes (5.2.0 rc2): +- Added fix for dtrace build on RHEL 8.4.0 + +Rebase notes (5.2.0 rc3): +- Added man page for qemu-pr-helper +- Added new configure options +- Update qemu-kiwi patches to v4 + Merged patches (3.1.0): - 01f0c9f RHEL8: Add disable configure options to qemu spec file - Spec file cleanups @@ -116,24 +124,34 @@ Merged patches (5.2.0 rc0): - fd62478 disable virgl - 0205018 redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ - 3645097 redhat: Make all generated so files executable (not only block-*) + +Merged patches (5.2.0 rc2): +- pjw 99657 redhat: introduces disable_everything macro into the configure call +- pjw 99659 redhat: scripts/extract_build_cmd.py - Avoid listing empty lines +- pjw 99658 redhat: Fixing rh-local build +- pjw 99660 redhat: Add qemu-kiwi subpackage +- d2e59ce redhat: add (un/pre)install systemd hooks for qemu-ga + +Merged patches (5.2.0 rc3): +- pjw 99887 - redhat: allow Makefile rh-prep builddep to fail +- pjw 99885 - redhat: adding rh-rpm target --- .gitignore | 1 + README.systemtap | 43 + - crypto/meson.build | 1 - hw/s390x/s390-pci-vfio.c | 4 +- - meson.build | 10 +- + meson.build | 4 +- redhat/Makefile | 90 + redhat/Makefile.common | 53 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 3170 +++++++++++++++++++++++++++++++ - redhat/scripts/extract_build_cmd.py | 2 +- + redhat/qemu-kvm.spec.template | 3409 +++++++++++++++++++++++++++++++ + redhat/scripts/extract_build_cmd.py | 5 +- redhat/scripts/process-patches.sh | 17 +- scripts/qemu-guest-agent/fsfreeze-hook | 2 +- scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + tests/check-block.sh | 2 + ui/vnc.c | 2 +- - 16 files changed, 3424 insertions(+), 17 deletions(-) + 15 files changed, 3660 insertions(+), 16 deletions(-) create mode 100644 README.systemtap create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common @@ -191,20 +209,8 @@ index 0000000..ad913fc + +3. Translate the trace record to readable format. + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log -diff --git a/crypto/meson.build b/crypto/meson.build -index 7f37b5d..e30efb8 100644 ---- a/crypto/meson.build -+++ b/crypto/meson.build -@@ -50,7 +50,6 @@ if 'CONFIG_GNUTLS' in config_host - crypto_ss.add(gnutls) - endif - -- - util_ss.add(files('aes.c')) - util_ss.add(files('init.c')) - diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index d5c7806..19f92eb 100644 +index 9296e1b..f70c5a8 100644 --- a/hw/s390x/s390-pci-vfio.c +++ b/hw/s390x/s390-pci-vfio.c @@ -28,7 +28,7 @@ @@ -226,21 +232,10 @@ index d5c7806..19f92eb 100644 uint32_t argsz; int fd; diff --git a/meson.build b/meson.build -index b473620..3636fb9 100644 +index 5062407..c1db9b8 100644 --- a/meson.build +++ b/meson.build -@@ -292,6 +292,10 @@ if 'CONFIG_GNUTLS' in config_host - gnutls = declare_dependency(compile_args: config_host['GNUTLS_CFLAGS'].split(), - link_args: config_host['GNUTLS_LIBS'].split()) - endif -+gcrypt = not_found -+if 'CONFIG_GCRYPT' in config_host -+ gcrypt = dependency('libgcrypt') -+endif - pixman = not_found - if have_system or have_tools - pixman = dependency('pixman-1', required: have_system, version:'>=0.21.8', -@@ -1123,7 +1127,9 @@ if capstone_opt == 'internal' +@@ -1149,7 +1149,9 @@ if capstone_opt == 'internal' # Include all configuration defines via a header file, which will wind up # as a dependency on the object file, and thus changes here will result # in a rebuild. @@ -251,15 +246,6 @@ index b473620..3636fb9 100644 ] libcapstone = static_library('capstone', -@@ -1641,7 +1647,7 @@ libblock = static_library('block', block_ss.sources() + genh, - - block = declare_dependency(link_whole: [libblock], - link_args: '@block.syms', -- dependencies: [crypto, io]) -+ dependencies: [crypto, io, zlib]) - - blockdev_ss = blockdev_ss.apply(config_host, strict: false) - libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook index 13aafd4..e9b84ec 100755 --- a/scripts/qemu-guest-agent/fsfreeze-hook diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index 86ecb27..50e42d5 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 65eea220dcce6177b306eae08935f5354847bb08 Mon Sep 17 00:00:00 2001 +From 2ed436b54735a68c7f4422a8d6e5b4f3a7580fd3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:11:07 +0200 Subject: Enable/disable devices for RHEL @@ -106,14 +106,13 @@ Merged patches (5.2.0 rc0): hw/ppc/spapr_cpu_core.c | 2 + hw/usb/meson.build | 2 +- qemu-options.hx | 4 - - redhat/Makefile.common | 1 + - redhat/qemu-kvm.spec.template | 9 ++- + redhat/qemu-kvm.spec.template | 11 ++- target/arm/cpu.c | 4 +- target/arm/cpu_tcg.c | 3 + target/ppc/cpu-models.c | 10 +++ target/s390x/cpu_models.c | 3 + target/s390x/kvm.c | 8 ++ - 27 files changed, 273 insertions(+), 25 deletions(-) + 26 files changed, 274 insertions(+), 25 deletions(-) create mode 100644 default-configs/devices/aarch64-rh-devices.mak create mode 100644 default-configs/devices/ppc64-rh-devices.mak create mode 100644 default-configs/devices/rh-virtio.mak @@ -406,7 +405,7 @@ index 64b2ee2..b5de7e5 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 95cb0f9..f9690a0 100644 +index 5ff4e01..ac45ca4 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) @@ -476,10 +475,10 @@ index 9e52fee..bb71c9f 100644 -specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) +#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 722b9e7..25de8b2 100644 +index fdca6ca..fa1a7ee 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2963,6 +2963,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2945,6 +2945,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -526,7 +525,7 @@ index dde85ba..62cf60c 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 83347cb..8fb83a1 100644 +index d7d05ae..aaea06d 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1796,6 +1796,7 @@ static const E1000Info e1000_devices[] = { @@ -576,7 +575,7 @@ index 934e4fa..e3abba5 100644 endif diff --git a/qemu-options.hx b/qemu-options.hx -index 2c83390..53472fd 100644 +index 104632e..363a15b 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2251,10 +2251,6 @@ ERST @@ -620,7 +619,7 @@ index 0013e25..6540046 100644 arm_cpu_register(&arm_tcg_cpus[i]); } diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 4ad1686..16b2185 100644 +index 87e4228..6eaa65e 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -704,10 +703,10 @@ index b5abff8..abe09d7 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index baa070f..10ce36a 100644 +index b8385e6..1839cc6 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2540,6 +2540,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2552,6 +2552,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index 519cff2..a630ff8 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 673234091c4073ebc31bf36559e249796772c8b5 Mon Sep 17 00:00:00 2001 +From c08267680d5cdede8c1b80591f294f8c0e8a2ddc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -69,7 +69,7 @@ Merged patches (5.2.0 rc0): 23 files changed, 400 insertions(+), 11 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index f9690a0..f6c6c6a 100644 +index ac45ca4..0b35b35 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -102,7 +102,7 @@ index f9690a0..f6c6c6a 100644 &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 67a1ea4..85312a3 100644 +index 669be5b..2063131 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -277,6 +277,7 @@ static const VMStateDescription vmstate_acpi = { @@ -193,7 +193,7 @@ index 97f7187..aeb207e 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 98b87f7..8674586 100644 +index d040804..19d50dd 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,6 +28,219 @@ @@ -456,7 +456,7 @@ index a3f4959..f6c2ef4 100644 } diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index b6f1ae3..19955eb 100644 +index a8a77ec..6d39c1f 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -80,6 +80,11 @@ struct E1000EState { @@ -692,10 +692,10 @@ index 27ca237..eb24e39 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 79ce5c4..325cd02 100644 +index 9ce7ca7..0af661c 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3486,9 +3486,27 @@ static const VMStateDescription vmstate_xhci_slot = { +@@ -3491,9 +3491,27 @@ static const VMStateDescription vmstate_xhci_slot = { } }; @@ -723,7 +723,7 @@ index 79ce5c4..325cd02 100644 .fields = (VMStateField[]) { VMSTATE_UINT32(type, XHCIEvent), VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3497,6 +3515,8 @@ static const VMStateDescription vmstate_xhci_event = { +@@ -3502,6 +3520,8 @@ static const VMStateDescription vmstate_xhci_event = { VMSTATE_UINT32(flags, XHCIEvent), VMSTATE_UINT8(slotid, XHCIEvent), VMSTATE_UINT8(epid, XHCIEvent), @@ -733,7 +733,7 @@ index 79ce5c4..325cd02 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index ccf50ae..8716904 100644 +index 02ebd76..dfda04b 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -149,6 +149,8 @@ typedef struct XHCIEvent { @@ -746,7 +746,7 @@ index ccf50ae..8716904 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 28a5318..ff4a672 100644 +index 54571c7..b3369da 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -839,7 +839,7 @@ index a70a72e..78b9043 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 3263aa5..c8d54ac 100644 +index 87a9b59..1bb8d01 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -134,6 +134,8 @@ enum mig_rp_message_type { diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 91f21e0..57b281c 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From be3ca54f04ade6a20265f9aeeb46662caa6d16dc Mon Sep 17 00:00:00 2001 +From 57f949e002928186b80562fe517e1d83464c24fd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index b7c5e68..fd3fda5 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From d33e7e8c4d6e006d5039782d54f583ea3f242fd6 Mon Sep 17 00:00:00 2001 +From 965f17e40984c06f87be2dad8100f4742412cc05 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -521,7 +521,7 @@ index 2e89e36..ba2d814 100644 char *kvm_type; char *host_model; diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index e9bec5f..74e3db9 100644 +index 7949a24..f207a9b 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -546,7 +546,7 @@ index e9bec5f..74e3db9 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 2eb41a2..d850521 100644 +index 2609e40..21c63b5 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1347,6 +1347,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index 0b94070..9c1ea62 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 79307dba97fef45adfbc03ab46db0460b27ceab9 Mon Sep 17 00:00:00 2001 +From 79dafd0d91aecadc163685311c220dc2d7a49add Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -25,10 +25,10 @@ Merged patches (4.2.0): 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 22222c4..68cea20 100644 +index 4e140bb..b8dde7e 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -761,7 +761,7 @@ bool css_migration_enabled(void) +@@ -765,7 +765,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -37,7 +37,7 @@ index 22222c4..68cea20 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = true; \ -@@ -785,6 +785,7 @@ bool css_migration_enabled(void) +@@ -789,6 +789,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) @@ -45,7 +45,7 @@ index 22222c4..68cea20 100644 static void ccw_machine_5_2_instance_options(MachineState *machine) { } -@@ -1049,6 +1050,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1053,6 +1054,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index 2aff59c..74e6ccc 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 4c6e7a672399b3962d904c2b01e8844544383d89 Mon Sep 17 00:00:00 2001 +From 3fb64e4127e2b74f0d93a51dd3709fe30adc1d23 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -59,7 +59,7 @@ Merged patches (5.2.0 rc0): 8 files changed, 714 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 4f66642..78f50d4 100644 +index 1f5c211..b1082bd 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -217,6 +217,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) @@ -896,7 +896,7 @@ index ae6bf1d..e2ba9a4 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 0d86069..dd52fe5 100644 +index 5a8c960..dc592e9 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1803,7 +1803,7 @@ static X86CPUDefinition builtin_x86_defs[] = { @@ -917,10 +917,10 @@ index 0d86069..dd52fe5 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index cf46259..4e58c09 100644 +index a2934dd..19bc39b 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c -@@ -3124,6 +3124,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3126,6 +3126,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -928,7 +928,7 @@ index cf46259..4e58c09 100644 kvm_msr_buf_reset(cpu); -@@ -3436,6 +3437,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3438,6 +3439,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index ae7c435..5e51706 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 1170780d2fdd4cb8b663603c4d509d65b2e05846 Mon Sep 17 00:00:00 2001 +From 2621db7ae95fdf112a7e1798ae428a865ae55b59 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -36,8 +36,11 @@ Merged patches (4.0.0): Merged patches (4.1.0-rc0): - 41288ff redhat: Remove raw iotest 205 + +Conflicts: + redhat/qemu-kvm.spec.template --- - redhat/qemu-kvm.spec.template | 2 +- + redhat/qemu-kvm.spec.template | 4 ++-- tests/qemu-iotests/051 | 12 ++++++------ tests/qtest/boot-serial-test.c | 6 +++++- tests/qtest/cdrom-test.c | 2 ++ @@ -48,7 +51,7 @@ Merged patches (4.1.0-rc0): tests/qtest/prom-env-test.c | 4 ++++ tests/qtest/test-x86-cpuid-compat.c | 2 ++ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - 11 files changed, 34 insertions(+), 18 deletions(-) + 11 files changed, 35 insertions(+), 19 deletions(-) diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index bee2607..61d25c4 100755 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index d57621f..bbed9bb 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 3bf885effef5666a13145e7942116ed9ba5039bb Mon Sep 17 00:00:00 2001 +From bb05135a744ae87847bcaf2344f826664dc9e19c Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -38,7 +38,7 @@ Merged patches (2.9.0): 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 58c0ce8..558eea8 100644 +index 51dc373..06ce2a3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -45,6 +45,9 @@ @@ -83,7 +83,7 @@ index 58c0ce8..558eea8 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3202,6 +3226,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3207,6 +3231,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index d123653..0a9f748 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 107ded716aa28243015b41940e660fe72dd4d3de Mon Sep 17 00:00:00 2001 +From a2490cc686e14979a82f176a76ca0f5ec22082ad Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index a711644..f951684 100644 +index e6e0ad5..065d52e 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -1687,9 +1687,17 @@ static void version(void) +@@ -1688,9 +1688,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index a711644..f951684 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1706,6 +1714,7 @@ static void help(int exitcode) +@@ -1707,6 +1715,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index eed1259..6e4d4ce 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From e3883f9d6e74843fd14d44cdf2d36b35123347a3 Mon Sep 17 00:00:00 2001 +From b0baccfdfb10c34d9f9d35363e098dab7f376fe9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -38,7 +38,7 @@ Merged patches (5.2.0 rc0): 1 file changed, 12 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 9ef5daf..4fab04d 100644 +index baaa542..a1fbda0 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2108,6 +2108,18 @@ static int kvm_init(MachineState *ms) diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 96ac311..6ec42b2 100644 --- a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 4f1d9fa771f3932ab14319a9df8cb37e1c9f7547 Mon Sep 17 00:00:00 2001 +From f72477389598ad4fee78640ec3a96166f00baf97 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -125,7 +125,7 @@ index fb70445..0d9a783 100644 See also -------- diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst -index 34a9e40..50fec10 100644 +index 866b7db..5b3be8a 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -297,7 +297,7 @@ Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket @@ -138,7 +138,7 @@ index 34a9e40..50fec10 100644 -device vhost-user-fs-pci,chardev=char0,tag=myfs \ -object memory-backend-memfd,id=mem,size=4G,share=on \ diff --git a/qemu-options.hx b/qemu-options.hx -index 53472fd..fde1a62 100644 +index 363a15b..5e5e265 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2935,11 +2935,11 @@ SRST diff --git a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 95964c2..33a8125 100644 --- a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From b8b6ddabd9482c454a68fabe51660fda6a13b0b4 Mon Sep 17 00:00:00 2001 +From 21a45442631721270ff6aba4635e2b72ac2cb248 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] diff --git a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 7e3c2b6..6f60d45 100644 --- a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 90dad3577e6873e23eb99c1b55c9e1f8fe0e1e16 Mon Sep 17 00:00:00 2001 +From f0561c2a8caa9080f2849b5679816e2268ee420d Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts diff --git a/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index f768d26..7a6e1fe 100644 --- a/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,4 +1,4 @@ -From 35b0411d0de4e3e8ef4fb4cef9ee2e8f8ef836e6 Mon Sep 17 00:00:00 2001 +From 37e71d91a69d0437d6f181b757a702910c25c21f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 Subject: block: Versioned x-blockdev-reopen API with feature flag diff --git a/0020-Build-RHEL-9.patch b/0020-Build-RHEL-9.patch new file mode 100644 index 0000000..e815d00 --- /dev/null +++ b/0020-Build-RHEL-9.patch @@ -0,0 +1,62 @@ +From 1a0497bf6405db1e9ee07db40d90309566bb9f25 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 26 Nov 2020 08:26:34 +0100 +Subject: Build RHEL 9 + +--- + migration/qemu-file.c | 2 +- + qobject/block-qdict.c | 2 +- + redhat/Makefile.common | 3 ++- + redhat/qemu-kvm.spec.template | 12 ------------ + tests/meson.build | 4 ++-- + 5 files changed, 6 insertions(+), 17 deletions(-) + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index be21518..d6e03db 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -595,7 +595,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) + { + if (size < IO_BUF_SIZE) { + size_t res; +- uint8_t *src; ++ uint8_t *src = NULL; + + res = qemu_peek_buffer(f, &src, size, 0); + +diff --git a/qobject/block-qdict.c b/qobject/block-qdict.c +index 1487cc5..b265244 100644 +--- a/qobject/block-qdict.c ++++ b/qobject/block-qdict.c +@@ -224,7 +224,7 @@ void qdict_array_split(QDict *src, QList **dst) + for (i = 0; i < UINT_MAX; i++) { + QObject *subqobj; + bool is_subqdict; +- QDict *subqdict; ++ QDict *subqdict = NULL; + char indexstr[32], prefix[32]; + size_t snprintf_ret; + +diff --git a/tests/meson.build b/tests/meson.build +index afeb6be..52aeaf4 100644 +--- a/tests/meson.build ++++ b/tests/meson.build +@@ -136,7 +136,7 @@ if have_block + 'test-blockjob': [testblock], + 'test-blockjob-txn': [testblock], + 'test-block-backend': [testblock], +- 'test-block-iothread': [testblock], ++# 'test-block-iothread': [testblock], + 'test-write-threshold': [testblock], + 'test-crypto-hash': [crypto], + 'test-crypto-hmac': [crypto], +@@ -286,5 +286,5 @@ if not get_option('tcg').disabled() + endif + + subdir('qapi-schema') +-subdir('qtest') ++#subdir('qtest') + subdir('migration') +-- +1.8.3.1 + diff --git a/0020-Upstream.patch b/0020-Upstream.patch deleted file mode 100644 index f7ca399..0000000 --- a/0020-Upstream.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 36ad7c726f12e4b706eebc7bac185cd91646dbc3 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 11 Nov 2020 15:41:44 +0100 -Subject: Upstream - ---- - default-configs/devices/aarch64-rh-devices.mak | 1 - - hw/arm/Kconfig | 1 + - 2 files changed, 1 insertion(+), 1 deletion(-) - -diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak -index 9831940..d289f34 100644 ---- a/default-configs/devices/aarch64-rh-devices.mak -+++ b/default-configs/devices/aarch64-rh-devices.mak -@@ -1,7 +1,6 @@ - include rh-virtio.mak - - CONFIG_ARM_GIC_KVM=y --CONFIG_ARM_GIC=y - CONFIG_ARM_SMMUV3=y - CONFIG_ARM_V7M=y - CONFIG_ARM_VIRT=y -diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig -index 7d022ee..e69a900 100644 ---- a/hw/arm/Kconfig -+++ b/hw/arm/Kconfig -@@ -6,6 +6,7 @@ config ARM_VIRT - imply VFIO_PLATFORM - imply VFIO_XGMAC - imply TPM_TIS_SYSBUS -+ select ARM_GIC - select ACPI - select ARM_SMMUV3 - select GPIO_KEY --- -1.8.3.1 - diff --git a/0021-RHEL-9-test.patch b/0021-RHEL-9-test.patch deleted file mode 100644 index 611078f..0000000 --- a/0021-RHEL-9-test.patch +++ /dev/null @@ -1,33 +0,0 @@ -From c7b7fffb5ad743115dac8918200a848513acbd4e Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Fri, 13 Nov 2020 11:03:26 +0100 -Subject: RHEL 9 test - ---- - redhat/Makefile.common | 4 ++-- - redhat/qemu-kvm.spec.template | 8 -------- - tests/meson.build | 3 +-- - 3 files changed, 3 insertions(+), 12 deletions(-) - -diff --git a/tests/meson.build b/tests/meson.build -index afeb6be..2180fc4 100644 ---- a/tests/meson.build -+++ b/tests/meson.build -@@ -136,7 +136,7 @@ if have_block - 'test-blockjob': [testblock], - 'test-blockjob-txn': [testblock], - 'test-block-backend': [testblock], -- 'test-block-iothread': [testblock], -+# 'test-block-iothread': [testblock], - 'test-write-threshold': [testblock], - 'test-crypto-hash': [crypto], - 'test-crypto-hmac': [crypto], -@@ -286,5 +286,4 @@ if not get_option('tcg').disabled() - endif - - subdir('qapi-schema') --subdir('qtest') - subdir('migration') --- -1.8.3.1 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 929d4cd..4ce97ec 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -8,7 +8,7 @@ %global have_gluster 1 %global have_kvm_setup 0 %global have_memlock_limits 0 -%global rcversion -rc1 +%global rcversion -rc3 %ifnarch %{ix86} x86_64 @@ -61,12 +61,10 @@ Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} -# Macro to properly setup RHEL/RHEV conflict handling - Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: rc1.1%{?dist} +Release: rc3.1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -74,7 +72,8 @@ Group: Development/Tools URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-5.2.0-rc1.tar.xz + +Source0: http://wiki.qemu.org/download/qemu-5.2.0-rc3.tar.xz # KSM control scripts Source4: ksm.service @@ -118,8 +117,7 @@ Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0019: 0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -Patch0020: 0020-Upstream.patch -Patch0021: 0021-RHEL-9-test.patch +Patch0020: 0020-Build-RHEL-9.patch BuildRequires: wget BuildRequires: rpm-build @@ -247,24 +245,19 @@ hardware for a full system such as a PC and its associated peripherals. %package -n qemu-kvm-core Summary: qemu-kvm core components +Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: qemu-img = %{epoch}:%{version}-%{release} +Conflicts: qemu-kiwi %ifarch %{ix86} x86_64 -Requires: seabios-bin >= 1.10.2-1 -Requires: sgabios-bin Requires: edk2-ovmf %endif %ifarch aarch64 Requires: edk2-aarch64 %endif -%ifnarch aarch64 s390x -Requires: seavgabios-bin >= 1.12.0-3 -Requires: ipxe-roms-qemu >= 20170123-1 -%endif %ifarch %{power64} Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} %endif -Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: libseccomp >= 2.4.0 # For compressed guest memory dumps Requires: lzo snappy @@ -283,19 +276,25 @@ Requires: usbredir >= 0.7.1 Requires: libfdt >= 1.6.0 %endif - %description -n qemu-kvm-core qemu-kvm is an open source virtualizer that provides hardware emulation for the KVM hypervisor. qemu-kvm acts as a virtual machine monitor together with the KVM kernel modules, and emulates the hardware for a full system such as a PC and its associated peripherals. +%package -n qemu-kiwi +Summary: qemu-kiwi components +Requires: qemu-kvm-common = %{epoch}:%{version}-%{release} + +%description -n qemu-kiwi +qemu-kiwi is a version of qemu-kvm with a restricted set of features +intended for use by specific applications. +It's experimental and unsupported. %package -n qemu-img Summary: QEMU command line tool for manipulating disk images Group: Development/Tools - %description -n qemu-img This package provides a command line tool for manipulating disk images. @@ -308,7 +307,14 @@ Requires(post): /usr/sbin/useradd Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units - +%ifarch %{ix86} x86_64 +Requires: seabios-bin >= 1.10.2-1 +Requires: sgabios-bin +%endif +%ifnarch aarch64 s390x +Requires: seavgabios-bin >= 1.12.0-3 +Requires: ipxe-roms-qemu >= 20170123-1 +%endif %description -n qemu-kvm-common qemu-kvm is an open source virtualizer that provides hardware emulation for @@ -405,7 +411,22 @@ the Secure Shell (SSH) protocol. rm -fr slirp mkdir slirp %autopatch -p1 -mkdir qemu-kvm-build + +%global qemu_kvm_build qemu_kvm_build +%global qemu_kiwi_build qemu_kiwi_src/build + +# XXX: ugly hack to copy source tree into a new folder. +# it allows to build qemu-kiwi without touching the original source tree. +# This is required as the build isolation is not 100% as we also have to +# change the source tree when building qemu-kiwi. And, when we do that, +# calling "make check" on qemu-kvm see that change and behaves baddly. +# Newer version of qemu allow us to create a better sollution, and this +# hack can be dropped. +cp -fpr . ../qemu_kiwi_src +mv ../qemu_kiwi_src ./qemu_kiwi_src +mkdir -p %{qemu_kiwi_build} +mkdir -p %{qemu_kvm_build} + %build %global buildarch %{kvm_target}-softmmu @@ -420,7 +441,121 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %endif -cd qemu-kvm-build +%define disable_everything \\\ + --disable-attr \\\ + --disable-auth-pam \\\ + --disable-avx2 \\\ + --disable-avx512f \\\ + --disable-bochs \\\ + --disable-brlapi \\\ + --disable-bsd-user \\\ + --disable-bzip2 \\\ + --disable-cap-ng \\\ + --disable-capstone \\\ + --disable-cloop \\\ + --disable-cocoa \\\ + --disable-coroutine-pool \\\ + --disable-crypto-afalg \\\ + --disable-curl \\\ + --disable-curses \\\ + --disable-debug-info \\\ + --disable-debug-mutex \\\ + --disable-debug-tcg \\\ + --disable-dmg \\\ + --disable-docs \\\ + --disable-fdt \\\ + --disable-gcrypt \\\ + --disable-git-update \\\ + --disable-glusterfs \\\ + --disable-gnutls \\\ + --disable-gtk \\\ + --disable-guest-agent \\\ + --disable-guest-agent-msi \\\ + --disable-hax \\\ + --disable-hvf \\\ + --disable-iconv \\\ + --disable-jemalloc \\\ + --disable-kvm \\\ + --disable-libdaxctl \\\ + --disable-libiscsi \\\ + --disable-libnfs \\\ + --disable-libpmem \\\ + --disable-libssh \\\ + --disable-libudev \\\ + --disable-libusb \\\ + --disable-libxml2 \\\ + --disable-linux-aio \\\ + --disable-linux-io-uring \\\ + --disable-linux-user \\\ + --disable-live-block-migration \\\ + --disable-lzfse \\\ + --disable-lzo \\\ + --disable-malloc-trim \\\ + --disable-membarrier \\\ + --disable-modules \\\ + --disable-module-upgrades \\\ + --disable-mpath \\\ + --disable-netmap \\\ + --disable-nettle \\\ + --disable-numa \\\ + --disable-opengl \\\ + --disable-parallels \\\ + --disable-pie \\\ + --disable-pvrdma \\\ + --disable-qcow1 \\\ + --disable-qed \\\ + --disable-qom-cast-debug \\\ + --disable-rbd \\\ + --disable-rdma \\\ + --disable-replication \\\ + --disable-rng-none \\\ + --disable-safe-stack \\\ + --disable-sanitizers \\\ + --disable-sdl \\\ + --disable-sdl-image \\\ + --disable-seccomp \\\ + --disable-sheepdog \\\ + --disable-smartcard \\\ + --disable-snappy \\\ + --disable-sparse \\\ + --disable-spice \\\ + --disable-strip \\\ + --disable-system \\\ + --disable-tcg \\\ + --disable-tcmalloc \\\ + --disable-tools \\\ + --disable-tpm \\\ + --disable-u2f \\\ + --disable-usb-redir \\\ + --disable-user \\\ + --disable-vde \\\ + --disable-vdi \\\ + --disable-vhost-crypto \\\ + --disable-vhost-kernel \\\ + --disable-vhost-net \\\ + --disable-vhost-scsi \\\ + --disable-vhost-user \\\ + --disable-vhost-user-blk-server \\\ + --disable-vhost-vdpa \\\ + --disable-vhost-vsock \\\ + --disable-virglrenderer \\\ + --disable-virtfs \\\ + --disable-virtiofsd \\\ + --disable-vnc \\\ + --disable-vnc-jpeg \\\ + --disable-vnc-png \\\ + --disable-vnc-sasl \\\ + --disable-vte \\\ + --disable-vvfat \\\ + --disable-werror \\\ + --disable-whpx \\\ + --disable-xen \\\ + --disable-xen-pci-passthrough \\\ + --disable-xfsctl \\\ + --disable-xkbcommon \\\ + --disable-zstd + +pushd %{qemu_kvm_build} ../configure \ --prefix="%{_prefix}" \ --libdir="%{_libdir}" \ @@ -443,155 +578,84 @@ cd qemu-kvm-build --with-coroutine=ucontext \ --with-git=git \ --tls-priority=NORMAL \ + %{disable_everything} \ --enable-attr \ - --disable-auth-pam \ %ifarch %{ix86} x86_64 --enable-avx2 \ %else - --disable-avx2 \ %endif - --disable-avx512f \ - --disable-bochs \ - --disable-brlapi \ - --disable-bsd-user \ - --disable-bzip2 \ --enable-cap-ng \ --enable-capstone \ - --disable-cloop \ - --disable-cocoa \ --enable-coroutine-pool \ - --disable-crypto-afalg \ --enable-curl \ - --disable-curses \ --enable-debug-info \ - --disable-debug-mutex \ --disable-debug-tcg \ --disable-dmg \ --enable-docs \ %if 0%{have_fdt} --enable-fdt \ -%else - --disable-fdt \ - %endif +%endif --enable-gcrypt \ - --disable-git-update \ %if 0%{have_gluster} --enable-glusterfs \ -%else - --disable-glusterfs \ %endif --enable-gnutls \ - --disable-gtk \ --enable-guest-agent \ - --disable-guest-agent-msi \ - --disable-hax \ - --disable-hvf \ --enable-iconv \ - --disable-jemalloc \ --enable-kvm \ - --disable-libdaxctl \ --enable-libiscsi \ - --disable-libnfs \ %ifarch x86_64 --enable-libpmem \ -%else - --disable-libpmem \ %endif --enable-libssh \ --enable-libusb \ - --disable-libxml2 \ + --enable-libudev \ --enable-linux-aio \ - --disable-linux-io-uring \ - --disable-linux-user \ - --disable-live-block-migration \ - --disable-lzfse \ --enable-lzo \ --enable-malloc-trim \ - --disable-membarrier \ --enable-modules \ - --disable-module-upgrades \ --enable-mpath \ - --disable-netmap \ - --disable-nettle \ %ifnarch s390x --enable-numa \ -%else - --disable-numa \ %endif %if 0%{have_opengl} --enable-opengl \ -%else - --disable-opengl \ %endif - --disable-parallels \ --enable-pie \ - --disable-pvrdma \ - --disable-qcow1 \ - --disable-qed \ - --disable-qom-cast-debug \ --enable-rbd \ %if 0%{have_librdma} --enable-rdma \ -%else - --disable-rdma \ %endif - --disable-rng-none \ - --disable-replication \ - --disable-safe-stack \ - --disable-sanitizers \ - --disable-sdl \ - --disable-sdl-image \ --enable-seccomp \ - --disable-sheepdog \ --enable-snappy \ - --disable-sparse \ %if 0%{have_spice} --enable-smartcard \ --enable-spice \ -%else - --disable-smartcard \ - --disable-spice \ %endif - --disable-strip \ --enable-system \ --enable-tcg \ - --disable-tcmalloc \ --enable-tools \ --enable-tpm \ --enable-trace-backend=dtrace \ %if 0%{have_usbredir} --enable-usb-redir \ -%else - --disable-usb-redir \ %endif - --disable-user \ - --disable-vde \ - --disable-vdi \ - --disable-vhost-crypto \ + --enable-virtiofsd \ --enable-vhost-kernel \ --enable-vhost-net \ - --disable-vhost-scsi \ --enable-vhost-user \ + --enable-vhost-user-blk-server \ --enable-vhost-vdpa \ --enable-vhost-vsock \ - --disable-virglrenderer \ - --disable-virtfs \ --enable-vnc \ - --disable-vnc-jpeg \ --enable-vnc-png \ --enable-vnc-sasl \ - --disable-vte \ - --disable-vvfat \ --enable-werror \ - --disable-whpx \ - --disable-xen \ - --disable-xen-pci-passthrough \ - --disable-xfsctl \ --enable-xkbcommon \ --disable-zstd \ --without-default-devices -echo "config-host.mak contents:" +echo "qemu-kvm config-host.mak contents:" echo "===" cat config-host.mak echo "===" @@ -616,8 +680,97 @@ cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check +popd +echo "Starting qemu-kiwi build" + +pushd %{qemu_kiwi_build} +# XXX: removing QXL and CONFIG_TPM.* mak configuration, +# which causes problem with the config options used by qemu-kiwi. +# Ideally we should be able to do this at configure time. +find ../default-configs -name "*-rh-devices.mak" \ + -exec sed -i '/CONFIG_QXL=/d' {} \; +find ../default-configs -name "*-rh-devices.mak" \ + -exec sed -i '/CONFIG_TPM.*=/d' {} \; + +../configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ + --python=%{__python3} \ + --target-list="%{buildarch}" \ + --block-drv-rw-whitelist=%{block_drivers_list} \ + --audio-drv-list= \ + --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ + --with-coroutine=ucontext \ + --with-git=git \ + --tls-priority=NORMAL \ + %{disable_everything} \ + --enable-attr \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ +%endif + --enable-cap-ng \ + --enable-coroutine-pool \ + --enable-debug-info \ +%if 0%{have_fdt} + --enable-fdt \ +%endif + --enable-kvm \ +%ifarch x86_64 + --enable-libpmem \ +%endif + --enable-linux-aio \ + --enable-libudev \ + --enable-malloc-trim \ + --enable-mpath \ +%ifnarch s390x + --enable-numa \ +%endif + --enable-seccomp \ + --enable-system \ + --enable-tcg \ + --enable-trace-backend=dtrace \ + --enable-vhost-kernel \ + --enable-vhost-net \ + --enable-vhost-user \ + --enable-vhost-user-blk-server \ + --enable-vhost-vdpa \ + --enable-vhost-vsock \ + --enable-werror \ + --enable-xkbcommon \ + --without-default-devices + +echo "qemu-kiki config-host.mak contents:" +echo "===" +cat config-host.mak +echo "===" + +make V=1 %{?_smp_mflags} $buildldflags + +%{__python3} scripts/tracetool.py --backend dtrace --format stap \ + --group=all --binary %{_libexecdir}/qemu-kiwi --probe-prefix qemu.kvm \ + trace/trace-events-all > qemu-kiwi.stp + +%{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ + --group=all --binary %{_libexecdir}/qemu-kiwi --probe-prefix qemu.kvm \ + trace/trace-events-all > qemu-kiwi-log.stp + +%{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ + --group=all --binary %{_libexecdir}/qemu-kiwi --probe-prefix qemu.kvm \ + trace/trace-events-all > qemu-kiwi-simpletrace.stp + +cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kiwi +popd + %install -cd qemu-kvm-build +pushd %{qemu_kvm_build} %define _udevdir %(pkg-config --variable=udevdir udev) %define _udevrulesdir %{_udevdir}/rules.d @@ -722,6 +875,7 @@ install -c -m 0644 scripts/systemtap/script.d/qemu_kvm.stp "$RPM_BUILD_ROOT%{_da install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d" install -c -m 0644 scripts/systemtap/conf.d/qemu_kvm.conf "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d/" + rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp @@ -904,11 +1058,50 @@ rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo # Remove spec rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs -%check -cd qemu-kvm-build -export DIFF=diff; make check V=1 +popd -%post -n qemu-kvm-core +pushd %{qemu_kiwi_build} +install -m 0755 %{kvm_target}-softmmu/qemu-system-%{kvm_target} $RPM_BUILD_ROOT%{_libexecdir}/qemu-kiwi +install -m 0644 qemu-kiwi.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kiwi-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kiwi-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +popd + +%check +pushd %{qemu_kvm_build} +echo "Testing qemu-kvm-build" +export DIFF=diff; make check V=1 +popd + +echo "Testing qemu-kiwi" +pushd %{qemu_kiwi_build} +export DIFF=diff; make check V=1 +popd + +%post -n qemu-kvm-common +%systemd_post ksm.service +%systemd_post ksmtuned.service + +getent group kvm >/dev/null || groupadd -g 36 -r kvm +getent group qemu >/dev/null || groupadd -g 107 -r qemu +getent passwd qemu >/dev/null || \ +useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ + -c "qemu user" qemu + +# load kvm modules now, so we can make sure no reboot is needed. +# If there's already a kvm module installed, we don't mess with it +%udev_rules_update +sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : + udevadm trigger --subsystem-match=misc --sysname-match=kvm --action=add || : +%if %{have_kvm_setup} + systemctl daemon-reload # Make sure it sees the new presets and unitfile + %systemd_post kvm-setup.service + if systemctl is-enabled kvm-setup.service > /dev/null; then + systemctl start kvm-setup.service + fi +%endif + +%post -n qemu-kiwi # load kvm modules now, so we can make sure no reboot is needed. # If there's already a kvm module installed, we don't mess with it %udev_rules_update @@ -923,28 +1116,28 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %if %{have_kvm_setup} -%preun -n qemu-kvm-core +%preun -n qemu-kiwi %systemd_preun kvm-setup.service %endif -%post -n qemu-kvm-common -%systemd_post ksm.service -%systemd_post ksmtuned.service - -getent group kvm >/dev/null || groupadd -g 36 -r kvm -getent group qemu >/dev/null || groupadd -g 107 -r qemu -getent passwd qemu >/dev/null || \ -useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - -c "qemu user" qemu - %preun -n qemu-kvm-common %systemd_preun ksm.service %systemd_preun ksmtuned.service +%if %{have_kvm_setup} +%systemd_preun kvm-setup.service +%endif %postun -n qemu-kvm-common %systemd_postun_with_restart ksm.service %systemd_postun_with_restart ksmtuned.service +%post -n qemu-guest-agent +%systemd_post qemu-guest-agent.service +%preun -n qemu-guest-agent +%systemd_preun qemu-guest-agent.service +%postun -n qemu-guest-agent +%systemd_postun_with_restart qemu-guest-agent.service + %files # Deliberately empty @@ -972,6 +1165,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_unitdir}/qemu-pr-helper.service %{_unitdir}/qemu-pr-helper.socket %{_mandir}/man7/qemu-ga-ref.7* +%{_mandir}/man8/qemu-pr-helper.8* %{_mandir}/man1/virtiofsd.1* %dir %{_datadir}/%{name}/ @@ -999,8 +1193,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/tracetool/backend/*.py* %{_datadir}/%{name}/tracetool/format/*.py* -%files -n qemu-kvm-core -%defattr(-,root,root) %ifarch x86_64 %{_datadir}/%{name}/bios.bin %{_datadir}/%{name}/bios-256k.bin @@ -1035,13 +1227,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/icons/* %{_datadir}/%{name}/linuxboot_dma.bin %{_datadir}/%{name}/dump-guest-memory.py* -%{_libexecdir}/qemu-kvm -%{_datadir}/systemtap/tapset/qemu-kvm.stp -%{_datadir}/systemtap/tapset/qemu-kvm-log.stp %{_datadir}/%{name}/trace-events-all -%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp -%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp -%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf %if 0%{have_kvm_setup} %{_prefix}/lib/systemd/kvm-setup %{_unitdir}/kvm-setup.service @@ -1052,6 +1238,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %{_libexecdir}/virtiofsd %{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json + +%files -n qemu-kvm-core +%defattr(-,root,root) +%{_libexecdir}/qemu-kvm +%{_datadir}/systemtap/tapset/qemu-kvm.stp +%{_datadir}/systemtap/tapset/qemu-kvm-log.stp +%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp +%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp +%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf + %if %{have_usbredir} %{_libdir}/qemu-kvm/hw-usb-redirect.so %endif @@ -1072,6 +1268,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_libdir}/qemu-kvm/ui-opengl.so %endif +%files -n qemu-kiwi +%defattr(-,root,root) +%{_libexecdir}/qemu-kiwi +%{_datadir}/systemtap/tapset/qemu-kiwi.stp +%{_datadir}/systemtap/tapset/qemu-kiwi-log.stp +%{_datadir}/systemtap/tapset/qemu-kiwi-simpletrace.stp + %files -n qemu-img %defattr(-,root,root) %{_bindir}/qemu-img @@ -1115,6 +1318,39 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Nov 16 2020 Danilo Cesar Lemes de Paula - 5.1.0-15.el8 +- kvm-redhat-add-un-pre-install-systemd-hooks-for-qemu-ga.patch [bz#1882719] +- kvm-rcu-Implement-drain_call_rcu.patch [bz#1812399 bz#1866707] +- kvm-libqtest-Rename-qmp_assert_error_class-to-qmp_expect.patch [bz#1812399 bz#1866707] +- kvm-qtest-rename-qtest_qmp_receive-to-qtest_qmp_receive_.patch [bz#1812399 bz#1866707] +- kvm-qtest-Reintroduce-qtest_qmp_receive-with-QMP-event-b.patch [bz#1812399 bz#1866707] +- kvm-qtest-remove-qtest_qmp_receive_success.patch [bz#1812399 bz#1866707] +- kvm-device-plug-test-use-qtest_qmp-to-send-the-device_de.patch [bz#1812399 bz#1866707] +- kvm-qtest-switch-users-back-to-qtest_qmp_receive.patch [bz#1812399 bz#1866707] +- kvm-qtest-check-that-drives-are-really-appearing-and-dis.patch [bz#1812399 bz#1866707] +- kvm-qemu-iotests-qtest-rewrite-test-067-as-a-qtest.patch [bz#1812399 bz#1866707] +- kvm-qdev-add-check-if-address-free-callback-for-buses.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-switch-search-direction-in-scsi_device.patch [bz#1812399 bz#1866707] +- kvm-device_core-use-drain_call_rcu-in-in-qmp_device_add.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-RCU-for-list-of-children-of-a-bus.patch [bz#1812399 bz#1866707] +- kvm-scsi-switch-to-bus-check_address.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-atomic_set-on-.realized-property.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi-bus-scsi_device_find-don-t-return-unrealiz.patch [bz#1812399] +- kvm-scsi-scsi_bus-Add-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-virtio-scsi-use-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-fix-races-in-REPORT-LUNS.patch [bz#1812399 bz#1866707] +- kvm-tests-migration-fix-memleak-in-wait_command-wait_com.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-the-order-of-buffered-events.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-memory-leak-in-the-qtest_qmp_event_ref.patch [bz#1812399 bz#1866707] +- kvm-iotests-add-filter_qmp_virtio_scsi-function.patch [bz#1812399 bz#1866707] +- kvm-iotests-rewrite-iotest-240-in-python.patch [bz#1812399 bz#1866707] +- Resolves: bz#1812399 + (Qemu crash when detach disk with cache="none" discard="ignore" io="native") +- Resolves: bz#1866707 + (qemu-kvm is crashing with error "scsi_target_emulate_report_luns: Assertion `i == n + 8' failed") +- Resolves: bz#1882719 + (qemu-ga service still active and can work after qemu-guest-agent been removed) + * Tue Oct 13 2020 Danilo Cesar Lemes de Paula - 5.1.0-14.el8_3 - kvm-virtiofsd-avoid-proc-self-fd-tempdir.patch [bz#1884276] - Resolves: bz#1884276 diff --git a/sources b/sources index 7c6eed4..235915c 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-5.2.0-rc1.tar.xz) = 5345c9e8811efe2c1bab92ecb846f267dc6ef2a67ac03a39547344dc810a13027ef7352c7209d528ec81108dbc1e5e9ca96da6f7306c682ad8f785f596fa4dde +SHA512 (qemu-5.2.0-rc3.tar.xz) = e3913388fd5f5b7e3564bdc10869e1b9a9bf1a569c11748aec391ae6d13ea99fad43f74c4d70202f69dce93dd8961072a0655f63a8d9bbe78dc2a7220bc32048 From eea10ec9175b04c7cf823a2b1c5d2a9b8e56f3d9 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 5 Jan 2021 06:59:21 +0100 Subject: [PATCH 105/195] Update to 5.2.0-2 release --- .gitignore | 1 + ...at-Adding-slirp-to-the-exploded-tree.patch | 249 +++++++++--------- 0005-Initial-redhat-build.patch | 28 +- 0006-Enable-disable-devices-for-RHEL.patch | 108 ++++---- ...Machine-type-related-general-changes.patch | 62 ++--- 0008-Add-aarch64-machine-types.patch | 12 +- 0009-Add-ppc64-machine-types.patch | 20 +- 0010-Add-s390x-machine-types.patch | 8 +- 0011-Add-x86_64-machine-types.patch | 28 +- 0012-Enable-make-check.patch | 46 ++-- ...mber-of-devices-that-can-be-assigned.patch | 8 +- ...Add-support-statement-to-help-output.patch | 6 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 6 +- ...documentation-instead-of-qemu-system.patch | 14 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 6 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 6 +- ...x-blockdev-reopen-API-with-feature-f.patch | 6 +- 0020-Build-RHEL-9.patch | 62 ----- 0021-redhat-Define-hw_compat_8_3.patch | 70 +++++ ...r_machine_rhel_default_class_options.patch | 66 +++++ ...efine-pseries-rhel8.4.0-machine-type.patch | 70 +++++ ...-s390x-add-rhel-8.4.0-compat-machine.patch | 72 +++++ ...pc_open-read-the-full-dynamic-header.patch | 56 ++++ 0028-GCC-11-warnings-hacks.patch | 163 ++++++++++++ ...-problematic-tests-for-initial-build.patch | 27 ++ qemu-kvm.spec | 88 ++++--- sources | 2 +- 27 files changed, 898 insertions(+), 392 deletions(-) delete mode 100644 0020-Build-RHEL-9.patch create mode 100644 0021-redhat-Define-hw_compat_8_3.patch create mode 100644 0022-redhat-Add-spapr_machine_rhel_default_class_options.patch create mode 100644 0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch create mode 100644 0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch create mode 100644 0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch create mode 100644 0028-GCC-11-warnings-hacks.patch create mode 100644 0029-Disable-problematic-tests-for-initial-build.patch diff --git a/.gitignore b/.gitignore index ad21a5a..72537aa 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ /qemu-5.1.0.tar.xz /qemu-5.2.0-rc1.tar.xz /qemu-5.2.0-rc3.tar.xz +/qemu-5.2.0.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch index f942236..4895179 100644 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -1,6 +1,6 @@ -From 03c528f8dd064ee0ac40bd37f686cd5616071fdf Mon Sep 17 00:00:00 2001 +From f04f3d3ab0bb9ffd06a16ee5157f08bcb4f5f459 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Wed, 29 Jul 2020 07:48:57 +0200 +Date: Wed, 2 Dec 2020 07:38:31 +0100 Subject: redhat: Adding slirp to the exploded tree RH-Author: Danilo de Paula @@ -48,82 +48,83 @@ Signed-off-by: Danilo C. L. de Paula Rebase notes (5.1.0-rc2): - Update slirp directory to commit ce94eba2042d52a0ba3d9e252ebce86715e94275 (used upstream) + +Rebase notes (5.2.0-rc4): + - Update slirp directory to commit 8f43a99191afb47ca3f3c6972f6306209f367ece (used upstream) --- .gitmodules | 3 - slirp/.clang-format | 58 ++ slirp/.gitignore | 10 + slirp/.gitlab-ci.yml | 27 + - slirp/.gitpublish | 3 + - slirp/CHANGELOG.md | 88 +++ + slirp/CHANGELOG.md | 88 ++ slirp/COPYRIGHT | 62 ++ slirp/Makefile | 62 ++ slirp/README.md | 60 ++ slirp/build-aux/git-version-gen | 158 ++++ slirp/build-aux/meson-dist | 16 + - slirp/meson.build | 134 ++++ - slirp/src/arp_table.c | 92 +++ - slirp/src/bootp.c | 369 ++++++++++ - slirp/src/bootp.h | 129 ++++ - slirp/src/cksum.c | 179 +++++ - slirp/src/debug.h | 51 ++ - slirp/src/dhcpv6.c | 224 ++++++ + slirp/meson.build | 134 +++ + slirp/src/arp_table.c | 92 ++ + slirp/src/bootp.c | 369 ++++++++ + slirp/src/bootp.h | 129 +++ + slirp/src/cksum.c | 179 ++++ + slirp/src/debug.h | 51 + + slirp/src/dhcpv6.c | 224 +++++ slirp/src/dhcpv6.h | 68 ++ - slirp/src/dnssearch.c | 306 ++++++++ - slirp/src/if.c | 213 ++++++ + slirp/src/dnssearch.c | 306 ++++++ + slirp/src/if.c | 213 +++++ slirp/src/if.h | 25 + - slirp/src/ip.h | 242 ++++++ - slirp/src/ip6.h | 214 ++++++ - slirp/src/ip6_icmp.c | 433 +++++++++++ - slirp/src/ip6_icmp.h | 219 ++++++ - slirp/src/ip6_input.c | 85 +++ + slirp/src/ip.h | 242 +++++ + slirp/src/ip6.h | 214 +++++ + slirp/src/ip6_icmp.c | 433 +++++++++ + slirp/src/ip6_icmp.h | 219 +++++ + slirp/src/ip6_input.c | 85 ++ slirp/src/ip6_output.c | 39 + - slirp/src/ip_icmp.c | 492 +++++++++++++ - slirp/src/ip_icmp.h | 166 +++++ - slirp/src/ip_input.c | 461 ++++++++++++ - slirp/src/ip_output.c | 169 +++++ + slirp/src/ip_icmp.c | 492 ++++++++++ + slirp/src/ip_icmp.h | 166 ++++ + slirp/src/ip_input.c | 461 +++++++++ + slirp/src/ip_output.c | 169 ++++ slirp/src/libslirp-version.h.in | 24 + - slirp/src/libslirp.h | 171 +++++ + slirp/src/libslirp.h | 171 ++++ slirp/src/libslirp.map | 30 + slirp/src/main.h | 16 + - slirp/src/mbuf.c | 224 ++++++ - slirp/src/mbuf.h | 127 ++++ - slirp/src/misc.c | 390 ++++++++++ + slirp/src/mbuf.c | 224 +++++ + slirp/src/mbuf.h | 127 +++ + slirp/src/misc.c | 390 ++++++++ slirp/src/misc.h | 72 ++ - slirp/src/ncsi-pkt.h | 445 +++++++++++ - slirp/src/ncsi.c | 193 +++++ - slirp/src/ndp_table.c | 87 +++ - slirp/src/sbuf.c | 168 +++++ + slirp/src/ncsi-pkt.h | 445 +++++++++ + slirp/src/ncsi.c | 197 ++++ + slirp/src/ndp_table.c | 87 ++ + slirp/src/sbuf.c | 168 ++++ slirp/src/sbuf.h | 27 + - slirp/src/slirp.c | 1185 ++++++++++++++++++++++++++++++ - slirp/src/slirp.h | 284 ++++++++ - slirp/src/socket.c | 954 ++++++++++++++++++++++++ - slirp/src/socket.h | 164 +++++ - slirp/src/state.c | 379 ++++++++++ + slirp/src/slirp.c | 1189 ++++++++++++++++++++++++ + slirp/src/slirp.h | 284 ++++++ + slirp/src/socket.c | 954 +++++++++++++++++++ + slirp/src/socket.h | 164 ++++ + slirp/src/state.c | 379 ++++++++ slirp/src/stream.c | 120 +++ slirp/src/stream.h | 35 + - slirp/src/tcp.h | 169 +++++ - slirp/src/tcp_input.c | 1539 +++++++++++++++++++++++++++++++++++++++ - slirp/src/tcp_output.c | 516 +++++++++++++ - slirp/src/tcp_subr.c | 980 +++++++++++++++++++++++++ - slirp/src/tcp_timer.c | 286 ++++++++ - slirp/src/tcp_timer.h | 130 ++++ + slirp/src/tcp.h | 169 ++++ + slirp/src/tcp_input.c | 1539 +++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 +++++++++++ + slirp/src/tcp_subr.c | 980 ++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++ + slirp/src/tcp_timer.h | 130 +++ slirp/src/tcp_var.h | 161 ++++ slirp/src/tcpip.h | 104 +++ - slirp/src/tftp.c | 464 ++++++++++++ + slirp/src/tftp.c | 464 ++++++++++ slirp/src/tftp.h | 54 ++ - slirp/src/udp.c | 365 ++++++++++ - slirp/src/udp.h | 90 +++ - slirp/src/udp6.c | 173 +++++ - slirp/src/util.c | 428 +++++++++++ - slirp/src/util.h | 189 +++++ + slirp/src/udp.c | 365 ++++++++ + slirp/src/udp.h | 90 ++ + slirp/src/udp6.c | 173 ++++ + slirp/src/util.c | 428 +++++++++ + slirp/src/util.h | 189 ++++ slirp/src/version.c | 8 + - slirp/src/vmstate.c | 444 +++++++++++ - slirp/src/vmstate.h | 391 ++++++++++ - 70 files changed, 16440 insertions(+), 3 deletions(-) + slirp/src/vmstate.c | 444 +++++++++ + slirp/src/vmstate.h | 391 ++++++++ + 69 files changed, 16445 insertions(+), 3 deletions(-) create mode 100644 slirp/.clang-format create mode 100644 slirp/.gitignore create mode 100644 slirp/.gitlab-ci.yml - create mode 100644 slirp/.gitpublish create mode 100644 slirp/CHANGELOG.md create mode 100644 slirp/COPYRIGHT create mode 100644 slirp/Makefile @@ -192,7 +193,7 @@ Rebase notes (5.1.0-rc2): diff --git a/slirp/.clang-format b/slirp/.clang-format new file mode 100644 -index 0000000..17fb49f +index 0000000000..17fb49fe65 --- /dev/null +++ b/slirp/.clang-format @@ -0,0 +1,58 @@ @@ -256,7 +257,7 @@ index 0000000..17fb49f +... diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md new file mode 100644 -index 0000000..67b0a74 +index 0000000000..67b0a74195 --- /dev/null +++ b/slirp/CHANGELOG.md @@ -0,0 +1,88 @@ @@ -350,7 +351,7 @@ index 0000000..67b0a74 +[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT new file mode 100644 -index 0000000..ed49512 +index 0000000000..ed49512dbc --- /dev/null +++ b/slirp/COPYRIGHT @@ -0,0 +1,62 @@ @@ -418,7 +419,7 @@ index 0000000..ed49512 +copyrights. diff --git a/slirp/Makefile b/slirp/Makefile new file mode 100644 -index 0000000..8857b41 +index 0000000000..8857b4159b --- /dev/null +++ b/slirp/Makefile @@ -0,0 +1,62 @@ @@ -486,7 +487,7 @@ index 0000000..8857b41 +-include $(DEPS) diff --git a/slirp/README.md b/slirp/README.md new file mode 100644 -index 0000000..dc11e5f +index 0000000000..dc11e5f18b --- /dev/null +++ b/slirp/README.md @@ -0,0 +1,60 @@ @@ -552,7 +553,7 @@ index 0000000..dc11e5f +See the [COPYRIGHT](COPYRIGHT) file for details. diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen new file mode 100755 -index 0000000..5617eb8 +index 0000000000..5617eb8d4e --- /dev/null +++ b/slirp/build-aux/git-version-gen @@ -0,0 +1,158 @@ @@ -716,7 +717,7 @@ index 0000000..5617eb8 +# End: diff --git a/slirp/build-aux/meson-dist b/slirp/build-aux/meson-dist new file mode 100755 -index 0000000..80d534f +index 0000000000..80d534fec6 --- /dev/null +++ b/slirp/build-aux/meson-dist @@ -0,0 +1,16 @@ @@ -738,7 +739,7 @@ index 0000000..80d534f +echo "$1" > "$MESON_DIST_ROOT/.tarball-version" diff --git a/slirp/meson.build b/slirp/meson.build new file mode 100644 -index 0000000..3a27149 +index 0000000000..3a27149373 --- /dev/null +++ b/slirp/meson.build @@ -0,0 +1,134 @@ @@ -878,7 +879,7 @@ index 0000000..3a27149 +) diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c new file mode 100644 -index 0000000..959e5b9 +index 0000000000..959e5b9ec0 --- /dev/null +++ b/slirp/src/arp_table.c @@ -0,0 +1,92 @@ @@ -976,7 +977,7 @@ index 0000000..959e5b9 +} diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c new file mode 100644 -index 0000000..46e9681 +index 0000000000..46e96810ab --- /dev/null +++ b/slirp/src/bootp.c @@ -0,0 +1,369 @@ @@ -1351,7 +1352,7 @@ index 0000000..46e9681 +} diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h new file mode 100644 -index 0000000..a57fa51 +index 0000000000..a57fa51bcb --- /dev/null +++ b/slirp/src/bootp.h @@ -0,0 +1,129 @@ @@ -1486,7 +1487,7 @@ index 0000000..a57fa51 +#endif diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c new file mode 100644 -index 0000000..4d08380 +index 0000000000..4d08380a4e --- /dev/null +++ b/slirp/src/cksum.c @@ -0,0 +1,179 @@ @@ -1671,7 +1672,7 @@ index 0000000..4d08380 +} diff --git a/slirp/src/debug.h b/slirp/src/debug.h new file mode 100644 -index 0000000..47712bd +index 0000000000..47712bd78b --- /dev/null +++ b/slirp/src/debug.h @@ -0,0 +1,51 @@ @@ -1728,7 +1729,7 @@ index 0000000..47712bd +#endif /* DEBUG_H_ */ diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c new file mode 100644 -index 0000000..77b451b +index 0000000000..77b451b910 --- /dev/null +++ b/slirp/src/dhcpv6.c @@ -0,0 +1,224 @@ @@ -1958,7 +1959,7 @@ index 0000000..77b451b +} diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h new file mode 100644 -index 0000000..d12c49b +index 0000000000..d12c49b36c --- /dev/null +++ b/slirp/src/dhcpv6.h @@ -0,0 +1,68 @@ @@ -2032,7 +2033,7 @@ index 0000000..d12c49b +#endif diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c new file mode 100644 -index 0000000..55497e8 +index 0000000000..55497e860e --- /dev/null +++ b/slirp/src/dnssearch.c @@ -0,0 +1,306 @@ @@ -2344,7 +2345,7 @@ index 0000000..55497e8 +} diff --git a/slirp/src/if.c b/slirp/src/if.c new file mode 100644 -index 0000000..23190b5 +index 0000000000..23190b5593 --- /dev/null +++ b/slirp/src/if.c @@ -0,0 +1,213 @@ @@ -2563,7 +2564,7 @@ index 0000000..23190b5 +} diff --git a/slirp/src/if.h b/slirp/src/if.h new file mode 100644 -index 0000000..7cf9d27 +index 0000000000..7cf9d2750e --- /dev/null +++ b/slirp/src/if.h @@ -0,0 +1,25 @@ @@ -2594,7 +2595,7 @@ index 0000000..7cf9d27 +#endif diff --git a/slirp/src/ip.h b/slirp/src/ip.h new file mode 100644 -index 0000000..e5d4aa8 +index 0000000000..e5d4aa8a6d --- /dev/null +++ b/slirp/src/ip.h @@ -0,0 +1,242 @@ @@ -2842,7 +2843,7 @@ index 0000000..e5d4aa8 +#endif diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h new file mode 100644 -index 0000000..0630309 +index 0000000000..0630309d29 --- /dev/null +++ b/slirp/src/ip6.h @@ -0,0 +1,214 @@ @@ -3062,7 +3063,7 @@ index 0000000..0630309 +#endif diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c new file mode 100644 -index 0000000..d9c872b +index 0000000000..d9c872bc97 --- /dev/null +++ b/slirp/src/ip6_icmp.c @@ -0,0 +1,433 @@ @@ -3501,7 +3502,7 @@ index 0000000..d9c872b +} diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h new file mode 100644 -index 0000000..c37e60f +index 0000000000..c37e60f28d --- /dev/null +++ b/slirp/src/ip6_icmp.h @@ -0,0 +1,219 @@ @@ -3726,7 +3727,7 @@ index 0000000..c37e60f +#endif diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c new file mode 100644 -index 0000000..a83e4f8 +index 0000000000..a83e4f8e3d --- /dev/null +++ b/slirp/src/ip6_input.c @@ -0,0 +1,85 @@ @@ -3817,7 +3818,7 @@ index 0000000..a83e4f8 +} diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c new file mode 100644 -index 0000000..b861106 +index 0000000000..b86110662c --- /dev/null +++ b/slirp/src/ip6_output.c @@ -0,0 +1,39 @@ @@ -3862,7 +3863,7 @@ index 0000000..b861106 +} diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c new file mode 100644 -index 0000000..13a0e55 +index 0000000000..13a0e55085 --- /dev/null +++ b/slirp/src/ip_icmp.c @@ -0,0 +1,492 @@ @@ -4360,7 +4361,7 @@ index 0000000..13a0e55 +} diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h new file mode 100644 -index 0000000..84707db +index 0000000000..84707db247 --- /dev/null +++ b/slirp/src/ip_icmp.h @@ -0,0 +1,166 @@ @@ -4532,7 +4533,7 @@ index 0000000..84707db +#endif diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c new file mode 100644 -index 0000000..7f017a2 +index 0000000000..7f017a238a --- /dev/null +++ b/slirp/src/ip_input.c @@ -0,0 +1,461 @@ @@ -4999,7 +5000,7 @@ index 0000000..7f017a2 +} diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c new file mode 100644 -index 0000000..22916a3 +index 0000000000..22916a37df --- /dev/null +++ b/slirp/src/ip_output.c @@ -0,0 +1,169 @@ @@ -5174,7 +5175,7 @@ index 0000000..22916a3 +} diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in new file mode 100644 -index 0000000..faa6c85 +index 0000000000..faa6c85952 --- /dev/null +++ b/slirp/src/libslirp-version.h.in @@ -0,0 +1,24 @@ @@ -5204,7 +5205,7 @@ index 0000000..faa6c85 +#endif /* LIBSLIRP_VERSION_H_ */ diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h new file mode 100644 -index 0000000..fb4c7e8 +index 0000000000..fb4c7e882c --- /dev/null +++ b/slirp/src/libslirp.h @@ -0,0 +1,171 @@ @@ -5381,7 +5382,7 @@ index 0000000..fb4c7e8 +#endif /* LIBSLIRP_H */ diff --git a/slirp/src/libslirp.map b/slirp/src/libslirp.map new file mode 100644 -index 0000000..72aab91 +index 0000000000..72aab912f4 --- /dev/null +++ b/slirp/src/libslirp.map @@ -0,0 +1,30 @@ @@ -5417,7 +5418,7 @@ index 0000000..72aab91 +} SLIRP_4.1; diff --git a/slirp/src/main.h b/slirp/src/main.h new file mode 100644 -index 0000000..3b3f883 +index 0000000000..3b3f883703 --- /dev/null +++ b/slirp/src/main.h @@ -0,0 +1,16 @@ @@ -5439,7 +5440,7 @@ index 0000000..3b3f883 +#endif diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c new file mode 100644 -index 0000000..54ec721 +index 0000000000..54ec721eb5 --- /dev/null +++ b/slirp/src/mbuf.c @@ -0,0 +1,224 @@ @@ -5669,7 +5670,7 @@ index 0000000..54ec721 +} diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h new file mode 100644 -index 0000000..546e785 +index 0000000000..546e7852c5 --- /dev/null +++ b/slirp/src/mbuf.h @@ -0,0 +1,127 @@ @@ -5802,7 +5803,7 @@ index 0000000..546e785 +#endif diff --git a/slirp/src/misc.c b/slirp/src/misc.c new file mode 100644 -index 0000000..e6bc0a2 +index 0000000000..e6bc0a207d --- /dev/null +++ b/slirp/src/misc.c @@ -0,0 +1,390 @@ @@ -6199,7 +6200,7 @@ index 0000000..e6bc0a2 \ No newline at end of file diff --git a/slirp/src/misc.h b/slirp/src/misc.h new file mode 100644 -index 0000000..81b370c +index 0000000000..81b370cfb1 --- /dev/null +++ b/slirp/src/misc.h @@ -0,0 +1,72 @@ @@ -6277,7 +6278,7 @@ index 0000000..81b370c +#endif diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h new file mode 100644 -index 0000000..7795ad8 +index 0000000000..7795ad83ee --- /dev/null +++ b/slirp/src/ncsi-pkt.h @@ -0,0 +1,445 @@ @@ -6728,10 +6729,10 @@ index 0000000..7795ad8 +#endif /* NCSI_PKT_H */ diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c new file mode 100644 -index 0000000..3c1dfef +index 0000000000..75dcc08356 --- /dev/null +++ b/slirp/src/ncsi.c -@@ -0,0 +1,193 @@ +@@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * NC-SI (Network Controller Sideband Interface) "echo" model @@ -6882,6 +6883,10 @@ index 0000000..3c1dfef + uint32_t checksum; + uint32_t *pchecksum; + ++ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { ++ return; /* packet too short */ ++ } ++ + memset(ncsi_reply, 0, sizeof(ncsi_reply)); + + memset(reh->h_dest, 0xff, ETH_ALEN); @@ -6927,7 +6932,7 @@ index 0000000..3c1dfef +} diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c new file mode 100644 -index 0000000..110d6ea +index 0000000000..110d6ea0e4 --- /dev/null +++ b/slirp/src/ndp_table.c @@ -0,0 +1,87 @@ @@ -7020,7 +7025,7 @@ index 0000000..110d6ea +} diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c new file mode 100644 -index 0000000..2fb9176 +index 0000000000..2fb9176144 --- /dev/null +++ b/slirp/src/sbuf.c @@ -0,0 +1,168 @@ @@ -7194,7 +7199,7 @@ index 0000000..2fb9176 +} diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h new file mode 100644 -index 0000000..01886fb +index 0000000000..01886fbd01 --- /dev/null +++ b/slirp/src/sbuf.h @@ -0,0 +1,27 @@ @@ -7227,10 +7232,10 @@ index 0000000..01886fb +#endif diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c new file mode 100644 -index 0000000..dba7c98 +index 0000000000..9be58e2add --- /dev/null +++ b/slirp/src/slirp.c -@@ -0,0 +1,1185 @@ +@@ -0,0 +1,1189 @@ +/* SPDX-License-Identifier: MIT */ +/* + * libslirp glue @@ -7989,6 +7994,10 @@ index 0000000..dba7c98 + return; + } + ++ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { ++ return; /* packet too short */ ++ } ++ + ar_op = ntohs(ah->ar_op); + switch (ar_op) { + case ARPOP_REQUEST: @@ -8418,7 +8427,7 @@ index 0000000..dba7c98 +} diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h new file mode 100644 -index 0000000..763a65b +index 0000000000..763a65b9ef --- /dev/null +++ b/slirp/src/slirp.h @@ -0,0 +1,284 @@ @@ -8708,7 +8717,7 @@ index 0000000..763a65b +#endif diff --git a/slirp/src/socket.c b/slirp/src/socket.c new file mode 100644 -index 0000000..1e385df +index 0000000000..1e385df0d8 --- /dev/null +++ b/slirp/src/socket.c @@ -0,0 +1,954 @@ @@ -9668,7 +9677,7 @@ index 0000000..1e385df +} diff --git a/slirp/src/socket.h b/slirp/src/socket.h new file mode 100644 -index 0000000..a6a1e5e +index 0000000000..a6a1e5e214 --- /dev/null +++ b/slirp/src/socket.h @@ -0,0 +1,164 @@ @@ -9838,7 +9847,7 @@ index 0000000..a6a1e5e +#endif /* SLIRP_SOCKET_H */ diff --git a/slirp/src/state.c b/slirp/src/state.c new file mode 100644 -index 0000000..22af77b +index 0000000000..22af77b256 --- /dev/null +++ b/slirp/src/state.c @@ -0,0 +1,379 @@ @@ -10223,7 +10232,7 @@ index 0000000..22af77b +} diff --git a/slirp/src/stream.c b/slirp/src/stream.c new file mode 100644 -index 0000000..6cf326f +index 0000000000..6cf326f669 --- /dev/null +++ b/slirp/src/stream.c @@ -0,0 +1,120 @@ @@ -10349,7 +10358,7 @@ index 0000000..6cf326f +} diff --git a/slirp/src/stream.h b/slirp/src/stream.h new file mode 100644 -index 0000000..08bb5b6 +index 0000000000..08bb5b6610 --- /dev/null +++ b/slirp/src/stream.h @@ -0,0 +1,35 @@ @@ -10390,7 +10399,7 @@ index 0000000..08bb5b6 +#endif /* STREAM_H_ */ diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h new file mode 100644 -index 0000000..70a9760 +index 0000000000..70a9760664 --- /dev/null +++ b/slirp/src/tcp.h @@ -0,0 +1,169 @@ @@ -10565,7 +10574,7 @@ index 0000000..70a9760 +#endif diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c new file mode 100644 -index 0000000..d55b0c8 +index 0000000000..d55b0c81dc --- /dev/null +++ b/slirp/src/tcp_input.c @@ -0,0 +1,1539 @@ @@ -12110,7 +12119,7 @@ index 0000000..d55b0c8 +} diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c new file mode 100644 -index 0000000..383fe31 +index 0000000000..383fe31dcf --- /dev/null +++ b/slirp/src/tcp_output.c @@ -0,0 +1,516 @@ @@ -12632,7 +12641,7 @@ index 0000000..383fe31 +} diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c new file mode 100644 -index 0000000..a1016d9 +index 0000000000..a1016d90df --- /dev/null +++ b/slirp/src/tcp_subr.c @@ -0,0 +1,980 @@ @@ -13618,7 +13627,7 @@ index 0000000..a1016d9 +} diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c new file mode 100644 -index 0000000..102023e +index 0000000000..102023e7cd --- /dev/null +++ b/slirp/src/tcp_timer.c @@ -0,0 +1,286 @@ @@ -13910,7 +13919,7 @@ index 0000000..102023e +} diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h new file mode 100644 -index 0000000..584a559 +index 0000000000..584a5594e4 --- /dev/null +++ b/slirp/src/tcp_timer.h @@ -0,0 +1,130 @@ @@ -14046,7 +14055,7 @@ index 0000000..584a559 +#endif diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h new file mode 100644 -index 0000000..c8da8cb +index 0000000000..c8da8cbd16 --- /dev/null +++ b/slirp/src/tcp_var.h @@ -0,0 +1,161 @@ @@ -14213,7 +14222,7 @@ index 0000000..c8da8cb +#endif diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h new file mode 100644 -index 0000000..d3df021 +index 0000000000..d3df021493 --- /dev/null +++ b/slirp/src/tcpip.h @@ -0,0 +1,104 @@ @@ -14323,7 +14332,7 @@ index 0000000..d3df021 +#endif diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c new file mode 100644 -index 0000000..c6950ee +index 0000000000..c6950ee10f --- /dev/null +++ b/slirp/src/tftp.c @@ -0,0 +1,464 @@ @@ -14793,7 +14802,7 @@ index 0000000..c6950ee +} diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h new file mode 100644 -index 0000000..6d75478 +index 0000000000..6d75478e83 --- /dev/null +++ b/slirp/src/tftp.h @@ -0,0 +1,54 @@ @@ -14853,7 +14862,7 @@ index 0000000..6d75478 +#endif diff --git a/slirp/src/udp.c b/slirp/src/udp.c new file mode 100644 -index 0000000..0ad44d7 +index 0000000000..0ad44d7c03 --- /dev/null +++ b/slirp/src/udp.c @@ -0,0 +1,365 @@ @@ -15224,7 +15233,7 @@ index 0000000..0ad44d7 +} diff --git a/slirp/src/udp.h b/slirp/src/udp.h new file mode 100644 -index 0000000..c3b83fd +index 0000000000..c3b83fdc56 --- /dev/null +++ b/slirp/src/udp.h @@ -0,0 +1,90 @@ @@ -15320,7 +15329,7 @@ index 0000000..c3b83fd +#endif diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c new file mode 100644 -index 0000000..6f9486b +index 0000000000..6f9486bbca --- /dev/null +++ b/slirp/src/udp6.c @@ -0,0 +1,173 @@ @@ -15499,7 +15508,7 @@ index 0000000..6f9486b +} diff --git a/slirp/src/util.c b/slirp/src/util.c new file mode 100644 -index 0000000..d3ed5fa +index 0000000000..d3ed5faf8b --- /dev/null +++ b/slirp/src/util.c @@ -0,0 +1,428 @@ @@ -15933,7 +15942,7 @@ index 0000000..d3ed5fa +} diff --git a/slirp/src/util.h b/slirp/src/util.h new file mode 100644 -index 0000000..d67b3d0 +index 0000000000..d67b3d0de9 --- /dev/null +++ b/slirp/src/util.h @@ -0,0 +1,189 @@ @@ -16128,7 +16137,7 @@ index 0000000..d67b3d0 +#endif diff --git a/slirp/src/version.c b/slirp/src/version.c new file mode 100644 -index 0000000..93e0be9 +index 0000000000..93e0be9c24 --- /dev/null +++ b/slirp/src/version.c @@ -0,0 +1,8 @@ @@ -16142,7 +16151,7 @@ index 0000000..93e0be9 +} diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c new file mode 100644 -index 0000000..68cc172 +index 0000000000..68cc1729c5 --- /dev/null +++ b/slirp/src/vmstate.c @@ -0,0 +1,444 @@ @@ -16592,7 +16601,7 @@ index 0000000..68cc172 +} diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h new file mode 100644 -index 0000000..94c6a4b +index 0000000000..94c6a4bc7b --- /dev/null +++ b/slirp/src/vmstate.h @@ -0,0 +1,391 @@ @@ -16988,5 +16997,5 @@ index 0000000..94c6a4b + +#endif -- -1.8.3.1 +2.18.4 diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch index 6b55be8..fb98b03 100644 --- a/0005-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From b3ec5c5cc8ed777c5c2cd1bd27f6684a99317953 Mon Sep 17 00:00:00 2001 +From e4cd78dda8017f181fa94bbad1f0f015a99271db Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Oct 2018 07:31:11 +0200 Subject: Initial redhat build @@ -11,7 +11,7 @@ several issues are fixed in QEMU tree: - Man page renamed from qemu to qemu-kvm - man page is installed using make install so we have to fix it in qemu tree -This rebase includes changes up to qemu-kvm-5.1.0-15.el8 +This rebase includes changes up to qemu-kvm-5.1.0-16.el8 Rebase notes (3.1.0): - added new configure options @@ -143,7 +143,7 @@ Merged patches (5.2.0 rc3): redhat/Makefile | 90 + redhat/Makefile.common | 53 + redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 3409 +++++++++++++++++++++++++++++++ + redhat/qemu-kvm.spec.template | 3402 +++++++++++++++++++++++ redhat/scripts/extract_build_cmd.py | 5 +- redhat/scripts/process-patches.sh | 17 +- scripts/qemu-guest-agent/fsfreeze-hook | 2 +- @@ -151,7 +151,7 @@ Merged patches (5.2.0 rc3): scripts/systemtap/script.d/qemu_kvm.stp | 1 + tests/check-block.sh | 2 + ui/vnc.c | 2 +- - 15 files changed, 3660 insertions(+), 16 deletions(-) + 15 files changed, 3653 insertions(+), 16 deletions(-) create mode 100644 README.systemtap create mode 100644 redhat/Makefile create mode 100644 redhat/Makefile.common @@ -162,7 +162,7 @@ Merged patches (5.2.0 rc3): diff --git a/README.systemtap b/README.systemtap new file mode 100644 -index 0000000..ad913fc +index 0000000000..ad913fc990 --- /dev/null +++ b/README.systemtap @@ -0,0 +1,43 @@ @@ -210,7 +210,7 @@ index 0000000..ad913fc +3. Translate the trace record to readable format. + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index 9296e1b..f70c5a8 100644 +index 9296e1bb6e..f70c5a8946 100644 --- a/hw/s390x/s390-pci-vfio.c +++ b/hw/s390x/s390-pci-vfio.c @@ -28,7 +28,7 @@ @@ -232,10 +232,10 @@ index 9296e1b..f70c5a8 100644 uint32_t argsz; int fd; diff --git a/meson.build b/meson.build -index 5062407..c1db9b8 100644 +index e3386196ba..8c38b2ea36 100644 --- a/meson.build +++ b/meson.build -@@ -1149,7 +1149,9 @@ if capstone_opt == 'internal' +@@ -1148,7 +1148,9 @@ if capstone_opt == 'internal' # Include all configuration defines via a header file, which will wind up # as a dependency on the object file, and thus changes here will result # in a rebuild. @@ -247,7 +247,7 @@ index 5062407..c1db9b8 100644 libcapstone = static_library('capstone', diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook -index 13aafd4..e9b84ec 100755 +index 13aafd4845..e9b84ec028 100755 --- a/scripts/qemu-guest-agent/fsfreeze-hook +++ b/scripts/qemu-guest-agent/fsfreeze-hook @@ -8,7 +8,7 @@ @@ -261,7 +261,7 @@ index 13aafd4..e9b84ec 100755 is_ignored_file() { diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf new file mode 100644 -index 0000000..372d816 +index 0000000000..372d8160a4 --- /dev/null +++ b/scripts/systemtap/conf.d/qemu_kvm.conf @@ -0,0 +1,4 @@ @@ -271,13 +271,13 @@ index 0000000..372d816 +qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp new file mode 100644 -index 0000000..c04abf9 +index 0000000000..c04abf9449 --- /dev/null +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} diff --git a/tests/check-block.sh b/tests/check-block.sh -index f6b1bda..645b550 100755 +index f6b1bda7b9..645b550af8 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh @@ -58,6 +58,8 @@ if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then @@ -290,7 +290,7 @@ index f6b1bda..645b550 100755 # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests diff --git a/ui/vnc.c b/ui/vnc.c -index 4923505..eb5520e 100644 +index 49235056f7..eb5520ed73 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -3982,7 +3982,7 @@ void vnc_display_open(const char *id, Error **errp) @@ -303,5 +303,5 @@ index 4923505..eb5520e 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -1.8.3.1 +2.18.4 diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch index 50e42d5..0827288 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 2ed436b54735a68c7f4422a8d6e5b4f3a7580fd3 Mon Sep 17 00:00:00 2001 +From 0ad3e82af785512a5a77373d2ad95c63dfedeaba Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:11:07 +0200 Subject: Enable/disable devices for RHEL @@ -86,33 +86,33 @@ Merged patches (5.2.0 rc0): - 8310f89 RHEL-only: Enable vTPM for ARM in downstream configs - 4a8ccfd Disable TPM passthrough backend on ARM --- - default-configs/devices/aarch64-rh-devices.mak | 27 +++++++ - default-configs/devices/aarch64-softmmu.mak | 10 ++- - default-configs/devices/ppc64-rh-devices.mak | 38 ++++++++++ - default-configs/devices/ppc64-softmmu.mak | 10 ++- - default-configs/devices/rh-virtio.mak | 10 +++ - default-configs/devices/s390x-rh-devices.mak | 15 ++++ - default-configs/devices/s390x-softmmu.mak | 4 +- - default-configs/devices/x86_64-rh-devices.mak | 101 +++++++++++++++++++++++++ - default-configs/devices/x86_64-softmmu.mak | 4 +- - hw/acpi/ich9.c | 4 +- - hw/arm/meson.build | 2 +- - hw/block/fdc.c | 10 +++ - hw/cpu/meson.build | 5 +- - hw/display/cirrus_vga.c | 3 + - hw/ide/piix.c | 5 +- - hw/input/pckbd.c | 2 + - hw/net/e1000.c | 2 + - hw/ppc/spapr_cpu_core.c | 2 + - hw/usb/meson.build | 2 +- - qemu-options.hx | 4 - - redhat/qemu-kvm.spec.template | 11 ++- - target/arm/cpu.c | 4 +- - target/arm/cpu_tcg.c | 3 + - target/ppc/cpu-models.c | 10 +++ - target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 ++ - 26 files changed, 274 insertions(+), 25 deletions(-) + .../devices/aarch64-rh-devices.mak | 27 +++++ + default-configs/devices/aarch64-softmmu.mak | 10 +- + default-configs/devices/ppc64-rh-devices.mak | 38 +++++++ + default-configs/devices/ppc64-softmmu.mak | 10 +- + default-configs/devices/rh-virtio.mak | 10 ++ + default-configs/devices/s390x-rh-devices.mak | 15 +++ + default-configs/devices/s390x-softmmu.mak | 4 +- + default-configs/devices/x86_64-rh-devices.mak | 101 ++++++++++++++++++ + default-configs/devices/x86_64-softmmu.mak | 4 +- + hw/acpi/ich9.c | 4 +- + hw/arm/meson.build | 2 +- + hw/block/fdc.c | 10 ++ + hw/cpu/meson.build | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/usb/meson.build | 2 +- + qemu-options.hx | 4 - + redhat/qemu-kvm.spec.template | 10 +- + target/arm/cpu.c | 4 +- + target/arm/cpu_tcg.c | 3 + + target/ppc/cpu-models.c | 10 ++ + target/s390x/cpu_models.c | 3 + + target/s390x/kvm.c | 8 ++ + 26 files changed, 273 insertions(+), 25 deletions(-) create mode 100644 default-configs/devices/aarch64-rh-devices.mak create mode 100644 default-configs/devices/ppc64-rh-devices.mak create mode 100644 default-configs/devices/rh-virtio.mak @@ -121,7 +121,7 @@ Merged patches (5.2.0 rc0): diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak new file mode 100644 -index 0000000..9831940 +index 0000000000..98319407de --- /dev/null +++ b/default-configs/devices/aarch64-rh-devices.mak @@ -0,0 +1,27 @@ @@ -153,7 +153,7 @@ index 0000000..9831940 +CONFIG_TPM_TIS_SYSBUS=y +CONFIG_PTIMER=y diff --git a/default-configs/devices/aarch64-softmmu.mak b/default-configs/devices/aarch64-softmmu.mak -index 958b1e0..8f6867d 100644 +index 958b1e08e4..8f6867d48a 100644 --- a/default-configs/devices/aarch64-softmmu.mak +++ b/default-configs/devices/aarch64-softmmu.mak @@ -1,8 +1,10 @@ @@ -173,7 +173,7 @@ index 958b1e0..8f6867d 100644 +include aarch64-rh-devices.mak diff --git a/default-configs/devices/ppc64-rh-devices.mak b/default-configs/devices/ppc64-rh-devices.mak new file mode 100644 -index 0000000..467a16b +index 0000000000..467a16bdc2 --- /dev/null +++ b/default-configs/devices/ppc64-rh-devices.mak @@ -0,0 +1,38 @@ @@ -216,7 +216,7 @@ index 0000000..467a16b +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/devices/ppc64-softmmu.mak b/default-configs/devices/ppc64-softmmu.mak -index ae0841f..040e557 100644 +index ae0841fa3a..040e5575e7 100644 --- a/default-configs/devices/ppc64-softmmu.mak +++ b/default-configs/devices/ppc64-softmmu.mak @@ -1,11 +1,13 @@ @@ -239,7 +239,7 @@ index ae0841f..040e557 100644 +include ppc64-rh-devices.mak diff --git a/default-configs/devices/rh-virtio.mak b/default-configs/devices/rh-virtio.mak new file mode 100644 -index 0000000..94ede1b +index 0000000000..94ede1b5f6 --- /dev/null +++ b/default-configs/devices/rh-virtio.mak @@ -0,0 +1,10 @@ @@ -255,7 +255,7 @@ index 0000000..94ede1b +CONFIG_VIRTIO_SERIAL=y diff --git a/default-configs/devices/s390x-rh-devices.mak b/default-configs/devices/s390x-rh-devices.mak new file mode 100644 -index 0000000..c3c73fe +index 0000000000..c3c73fe752 --- /dev/null +++ b/default-configs/devices/s390x-rh-devices.mak @@ -0,0 +1,15 @@ @@ -275,7 +275,7 @@ index 0000000..c3c73fe +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y diff --git a/default-configs/devices/s390x-softmmu.mak b/default-configs/devices/s390x-softmmu.mak -index f2287a1..3e2e388 100644 +index f2287a133f..3e2e388e91 100644 --- a/default-configs/devices/s390x-softmmu.mak +++ b/default-configs/devices/s390x-softmmu.mak @@ -10,4 +10,6 @@ @@ -288,7 +288,7 @@ index f2287a1..3e2e388 100644 +include s390x-rh-devices.mak diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak new file mode 100644 -index 0000000..e80877d +index 0000000000..e80877d4e2 --- /dev/null +++ b/default-configs/devices/x86_64-rh-devices.mak @@ -0,0 +1,101 @@ @@ -394,7 +394,7 @@ index 0000000..e80877d +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/devices/x86_64-softmmu.mak b/default-configs/devices/x86_64-softmmu.mak -index 64b2ee2..b5de7e5 100644 +index 64b2ee2960..b5de7e5279 100644 --- a/default-configs/devices/x86_64-softmmu.mak +++ b/default-configs/devices/x86_64-softmmu.mak @@ -1,3 +1,5 @@ @@ -405,7 +405,7 @@ index 64b2ee2..b5de7e5 100644 + +include x86_64-rh-devices.mak diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 5ff4e01..ac45ca4 100644 +index 5ff4e01c36..ac45ca4acb 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) @@ -420,7 +420,7 @@ index 5ff4e01..ac45ca4 100644 object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index be39117..6fcc5ed 100644 +index be39117b9b..6fcc5ede50 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build @@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) @@ -433,7 +433,7 @@ index be39117..6fcc5ed 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 4c2c35e..e9eb7b8 100644 +index 4c2c35e223..e9eb7b8279 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -48,6 +48,8 @@ @@ -461,7 +461,7 @@ index 4c2c35e..e9eb7b8 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index 9e52fee..bb71c9f 100644 +index 9e52fee9e7..bb71c9f3e7 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build @@ -1,6 +1,7 @@ @@ -475,7 +475,7 @@ index 9e52fee..bb71c9f 100644 -specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) +#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index fdca6ca..fa1a7ee 100644 +index fdca6ca659..fa1a7eee51 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -2945,6 +2945,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) @@ -489,7 +489,7 @@ index fdca6ca..fa1a7ee 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index b9860e3..beb1ea6 100644 +index b9860e35a5..beb1ea6c46 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -220,7 +220,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) @@ -512,7 +512,7 @@ index b9860e3..beb1ea6 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index dde85ba..62cf60c 100644 +index dde85ba6c6..62cf60c9c9 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c @@ -597,6 +597,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) @@ -525,7 +525,7 @@ index dde85ba..62cf60c 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index d7d05ae..aaea06d 100644 +index d7d05ae30a..aaea06d29c 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -1796,6 +1796,7 @@ static const E1000Info e1000_devices[] = { @@ -545,7 +545,7 @@ index d7d05ae..aaea06d 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 2f7dc3c..55d36e0 100644 +index 2f7dc3c23d..55d36e0069 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -376,10 +376,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { @@ -562,7 +562,7 @@ index 2f7dc3c..55d36e0 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index 934e4fa..e3abba5 100644 +index 934e4fa675..e3abba548a 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -48,7 +48,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade @@ -575,7 +575,7 @@ index 934e4fa..e3abba5 100644 endif diff --git a/qemu-options.hx b/qemu-options.hx -index 104632e..363a15b 100644 +index 104632ea34..363a15b4e8 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2251,10 +2251,6 @@ ERST @@ -590,7 +590,7 @@ index 104632e..363a15b 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 07492e9..a048714 100644 +index 07492e9f9a..a0487148e8 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2358,7 +2358,9 @@ static void arm_cpu_register_types(void) @@ -605,7 +605,7 @@ index 07492e9..a048714 100644 } } diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 0013e25..6540046 100644 +index 0013e25412..6540046128 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c @@ -679,6 +679,9 @@ static void arm_tcg_cpu_register_types(void) @@ -619,7 +619,7 @@ index 0013e25..6540046 100644 arm_cpu_register(&arm_tcg_cpus[i]); } diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 87e4228..6eaa65e 100644 +index 87e4228614..6eaa65efff 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -689,7 +689,7 @@ index 87e4228..6eaa65e 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index b5abff8..abe09d7 100644 +index b5abff8bef..abe09d73c2 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -408,6 +408,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -703,7 +703,7 @@ index b5abff8..abe09d7 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index b8385e6..1839cc6 100644 +index b8385e6b95..1839cc6648 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2552,6 +2552,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) @@ -722,5 +722,5 @@ index b8385e6..1839cc6 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -- -1.8.3.1 +2.18.4 diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch index a630ff8..a6f8696 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From c08267680d5cdede8c1b80591f294f8c0e8a2ddc Mon Sep 17 00:00:00 2001 +From b97fdd8e425f1c9a156ebdfbdce986d9351c0d19 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -46,21 +46,21 @@ Merged patches (5.2.0 rc0): hw/acpi/ich9.c | 15 +++ hw/acpi/piix4.c | 5 +- hw/arm/virt.c | 2 +- - hw/char/serial.c | 16 ++++ - hw/core/machine.c | 213 +++++++++++++++++++++++++++++++++++++++++++ + hw/char/serial.c | 16 +++ + hw/core/machine.c | 213 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + - hw/net/e1000e.c | 21 +++++ + hw/net/e1000e.c | 21 ++++ hw/net/rtl8139.c | 4 +- - hw/rtc/mc146818rtc.c | 6 ++ - hw/smbios/smbios.c | 46 +++++++++- + hw/rtc/mc146818rtc.c | 6 + + hw/smbios/smbios.c | 46 +++++++- hw/timer/i8254_common.c | 2 +- hw/usb/hcd-uhci.c | 4 +- hw/usb/hcd-xhci.c | 20 ++++ hw/usb/hcd-xhci.h | 2 + include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 27 ++++++ + include/hw/boards.h | 27 +++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + include/hw/usb.h | 4 + @@ -69,7 +69,7 @@ Merged patches (5.2.0 rc0): 23 files changed, 400 insertions(+), 11 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index ac45ca4..0b35b35 100644 +index ac45ca4acb..0b35b35b28 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -102,7 +102,7 @@ index ac45ca4..0b35b35 100644 &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 669be5b..2063131 100644 +index 669be5bbf6..2063131bcc 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -277,6 +277,7 @@ static const VMStateDescription vmstate_acpi = { @@ -125,7 +125,7 @@ index 669be5b..2063131 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 27dbeb5..c908b5f 100644 +index 27dbeb549e..c908b5fcf4 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1441,7 +1441,7 @@ static void virt_build_smbios(VirtMachineState *vms) @@ -138,7 +138,7 @@ index 27dbeb5..c908b5f 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len); diff --git a/hw/char/serial.c b/hw/char/serial.c -index 97f7187..aeb207e 100644 +index 97f71879ff..aeb207ef73 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -35,6 +35,7 @@ @@ -193,7 +193,7 @@ index 97f7187..aeb207e 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index d040804..19d50dd 100644 +index d0408049b5..19d50dde45 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -28,6 +28,219 @@ @@ -417,7 +417,7 @@ index d040804..19d50dd 100644 { "vhost-scsi", "num_queues", "1"}, { "vhost-user-blk", "num-queues", "1"}, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 90851e7..a91c5d7 100644 +index 90851e730b..a91c5d7467 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c @@ -85,7 +85,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) @@ -430,7 +430,7 @@ index 90851e7..a91c5d7 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 13d1628..9fcc5aa 100644 +index 13d1628f13..9fcc5aaf69 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -179,6 +179,8 @@ static void pc_init1(MachineState *machine, @@ -443,7 +443,7 @@ index 13d1628..9fcc5aa 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index a3f4959..f6c2ef4 100644 +index a3f4959c43..f6c2ef4e43 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -198,6 +198,8 @@ static void pc_q35_init(MachineState *machine) @@ -456,7 +456,7 @@ index a3f4959..f6c2ef4 100644 } diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index a8a77ec..6d39c1f 100644 +index a8a77eca95..6d39c1f1c4 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -80,6 +80,11 @@ struct E1000EState { @@ -530,7 +530,7 @@ index a8a77ec..6d39c1f 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index ba5ace1..a2e6e83 100644 +index ba5ace1ab7..a2e6e83522 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) @@ -553,7 +553,7 @@ index ba5ace1..a2e6e83 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 7a38540..377d861 100644 +index 7a38540cb9..377d861913 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c @@ -43,6 +43,7 @@ @@ -577,7 +577,7 @@ index 7a38540..377d861 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 6a3d397..232fd61 100644 +index 6a3d39793b..232fd61bf8 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -56,6 +56,9 @@ static bool smbios_legacy = true; @@ -659,7 +659,7 @@ index 6a3d397..232fd61 100644 SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b..32935da 100644 +index 050875b497..32935da46c 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { @@ -672,7 +672,7 @@ index 050875b..32935da 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 27ca237..eb24e39 100644 +index 27ca237d71..eb24e39b81 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c @@ -1221,12 +1221,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) @@ -692,7 +692,7 @@ index 27ca237..eb24e39 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 9ce7ca7..0af661c 100644 +index 9ce7ca706e..0af661ce1d 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3491,9 +3491,27 @@ static const VMStateDescription vmstate_xhci_slot = { @@ -733,7 +733,7 @@ index 9ce7ca7..0af661c 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 02ebd76..dfda04b 100644 +index 02ebd76450..dfda04b125 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -149,6 +149,8 @@ typedef struct XHCIEvent { @@ -746,7 +746,7 @@ index 02ebd76..dfda04b 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 54571c7..b3369da 100644 +index 54571c77e0..b3369dab9e 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { @@ -760,7 +760,7 @@ index 54571c7..b3369da 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index a49e3a6..dd18c9e 100644 +index a49e3a6b44..dd18c9e94d 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -367,4 +367,31 @@ extern const size_t hw_compat_2_2_len; @@ -796,7 +796,7 @@ index a49e3a6..dd18c9e 100644 + #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 02a0ced..67e38a1 100644 +index 02a0ced0a0..67e38a1b13 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h @@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); @@ -812,7 +812,7 @@ index 02a0ced..67e38a1 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 911e460..ae6bf1d 100644 +index 911e460097..ae6bf1d209 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -104,6 +104,9 @@ struct PCMachineClass { @@ -826,7 +826,7 @@ index 911e460..ae6bf1d 100644 /* RAM / address space compat: */ bool gigabyte_align; diff --git a/include/hw/usb.h b/include/hw/usb.h -index a70a72e..78b9043 100644 +index a70a72e917..78b90436c9 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h @@ -570,4 +570,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, @@ -839,7 +839,7 @@ index a70a72e..78b9043 100644 + #endif diff --git a/migration/migration.c b/migration/migration.c -index 87a9b59..1bb8d01 100644 +index 87a9b59f83..1bb8d012e6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -134,6 +134,8 @@ enum mig_rp_message_type { @@ -852,7 +852,7 @@ index 87a9b59..1bb8d01 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index d096b77..6134a53 100644 +index d096b77f74..6134a534b3 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -364,6 +364,11 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, @@ -868,5 +868,5 @@ index d096b77..6134a53 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -1.8.3.1 +2.18.4 diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch index 57b281c..e252a3c 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 57f949e002928186b80562fe517e1d83464c24fd Mon Sep 17 00:00:00 2001 +From fcf44f2334a6d82709b9c64d45fa2ab1aec595b9 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -50,12 +50,12 @@ Merged patches (5.2.0 rc0): - 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) - 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) --- - hw/arm/virt.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 8 +++ + hw/arm/virt.c | 191 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 8 ++ 2 files changed, 196 insertions(+), 3 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index c908b5f..21e0485 100644 +index c908b5fcf4..21e0485ac5 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -79,6 +79,7 @@ @@ -316,7 +316,7 @@ index c908b5f..21e0485 100644 +} +DEFINE_RHEL_MACHINE(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index aad6d69..745b76b 100644 +index aad6d69841..745b76b186 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -167,9 +167,17 @@ struct VirtMachineState { @@ -338,5 +338,5 @@ index aad6d69..745b76b 100644 bool virt_is_acpi_enabled(VirtMachineState *vms); -- -1.8.3.1 +2.18.4 diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch index fd3fda5..bee2ba9 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 965f17e40984c06f87be2dad8100f4742412cc05 Mon Sep 17 00:00:00 2001 +From 06a8855e3b36996d4478219c008986877a253674 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -40,7 +40,7 @@ Merged patches (5.2.0 rc0): - 1ab8783 redhat: update pseries-rhel8.2.0 machine type - b162af531a target/ppc: Add experimental option for enabling secure guests --- - hw/ppc/spapr.c | 337 ++++++++++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr.c | 337 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 ++ include/hw/ppc/spapr.h | 4 + target/ppc/compat.c | 13 +- @@ -50,7 +50,7 @@ Merged patches (5.2.0 rc0): 7 files changed, 407 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 12a012d..4a838cc 100644 +index 12a012d9dd..4a838cc955 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1585,6 +1585,9 @@ static void spapr_machine_reset(MachineState *machine) @@ -461,7 +461,7 @@ index 12a012d..4a838cc 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 55d36e0..008074b 100644 +index 55d36e0069..008074bae0 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -499,7 +499,7 @@ index 55d36e0..008074b 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 2e89e36..ba2d814 100644 +index 2e89e36cfb..ba2d81404b 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -140,6 +140,7 @@ struct SpaprMachineClass { @@ -521,7 +521,7 @@ index 2e89e36..ba2d814 100644 char *kvm_type; char *host_model; diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7949a24..f207a9b 100644 +index 7949a24f5a..f207a9ba01 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -546,7 +546,7 @@ index 7949a24..f207a9b 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 2609e40..21c63b5 100644 +index 2609e4082e..21c63b5360 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1347,6 +1347,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) @@ -558,7 +558,7 @@ index 2609e40..21c63b5 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index daf690a..9bf3449 100644 +index daf690a678..9bf3449adb 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -89,6 +89,7 @@ static int cap_ppc_count_cache_flush_assist; @@ -614,7 +614,7 @@ index daf690a..9bf3449 100644 + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index 73ce2bc..1239b84 100644 +index 73ce2bc951..1239b841fd 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -40,6 +40,7 @@ target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, @@ -652,5 +652,5 @@ index 73ce2bc..1239b84 100644 { return -1; -- -1.8.3.1 +2.18.4 diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch index 9c1ea62..606a004 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 79dafd0d91aecadc163685311c220dc2d7a49add Mon Sep 17 00:00:00 2001 +From 36540969ad3b08f1964c71406f1fc14c0e5b47de Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -21,11 +21,11 @@ Merged patches (4.2.0): - a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine - hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) --- - hw/s390x/s390-virtio-ccw.c | 71 +++++++++++++++++++++++++++++++++++++++++++++- + hw/s390x/s390-virtio-ccw.c | 71 +++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 4e140bb..b8dde7e 100644 +index 4e140bbead..b8dde7e4e1 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -765,7 +765,7 @@ bool css_migration_enabled(void) @@ -121,5 +121,5 @@ index 4e140bb..b8dde7e 100644 static void ccw_machine_register_types(void) { -- -1.8.3.1 +2.18.4 diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch index 74e6ccc..63656ab 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 3fb64e4127e2b74f0d93a51dd3709fe30adc1d23 Mon Sep 17 00:00:00 2001 +From 004d31cf0e8bb83374a85ecab59eb22683a1e361 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -49,17 +49,17 @@ Merged patches (5.2.0 rc0): - e2d3209 x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' features (partialy) --- hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 273 ++++++++++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 215 +++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 185 +++++++++++++++++++++++++++++++++- + hw/i386/pc.c | 273 ++++++++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 215 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 185 ++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 36 +++++++ + include/hw/i386/pc.h | 36 ++++++ target/i386/cpu.c | 3 +- target/i386/kvm.c | 4 + 8 files changed, 714 insertions(+), 7 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 1f5c211..b1082bd 100644 +index 1f5c211245..b1082bd412 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -217,6 +217,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) @@ -73,7 +73,7 @@ index 1f5c211..b1082bd 100644 pm->smi_on_cpuhp = !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 17b514d..f3fc695 100644 +index 17b514d1da..f3fc695fe2 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -352,6 +352,271 @@ GlobalProperty pc_compat_1_4[] = { @@ -378,7 +378,7 @@ index 17b514d..f3fc695 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 9fcc5aa..815da79 100644 +index 9fcc5aaf69..815da79108 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -54,6 +54,7 @@ @@ -622,7 +622,7 @@ index 9fcc5aa..815da79 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index f6c2ef4..3340008 100644 +index f6c2ef4e43..3340008c00 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -195,8 +195,8 @@ static void pc_q35_init(MachineState *machine) @@ -829,7 +829,7 @@ index f6c2ef4..3340008 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index dd18c9e..4e4a54b 100644 +index dd18c9e94d..4e4a54b313 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -202,6 +202,8 @@ struct MachineClass { @@ -842,7 +842,7 @@ index dd18c9e..4e4a54b 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index ae6bf1d..e2ba9a4 100644 +index ae6bf1d209..e2ba9a4b58 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -125,6 +125,9 @@ struct PCMachineClass { @@ -896,7 +896,7 @@ index ae6bf1d..e2ba9a4 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 5a8c960..dc592e9 100644 +index 5a8c96072e..dc592e990e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1803,7 +1803,7 @@ static X86CPUDefinition builtin_x86_defs[] = { @@ -917,7 +917,7 @@ index 5a8c960..dc592e9 100644 }; diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index a2934dd..19bc39b 100644 +index a2934dda02..19bc39b9e3 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -3126,6 +3126,7 @@ static int kvm_get_msrs(X86CPU *cpu) @@ -939,5 +939,5 @@ index a2934dd..19bc39b 100644 case MSR_KVM_ASYNC_PF_INT: env->async_pf_int_msr = msrs[i].data; -- -1.8.3.1 +2.18.4 diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index 5e51706..fa397df 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 2621db7ae95fdf112a7e1798ae428a865ae55b59 Mon Sep 17 00:00:00 2001 +From 28d744b42d381b15254706f90fed3310ce4a5116 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -31,16 +31,19 @@ Rebase changes (5.2.0 rc0): - Disable cdrom tests (unsupported devices) on x86_64 - disable fuzz test +Rebaes changes (RHEL 9): +- disable block-iothreads test + Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce Merged patches (4.1.0-rc0): - 41288ff redhat: Remove raw iotest 205 -Conflicts: - redhat/qemu-kvm.spec.template +Dissable problematic tests --- redhat/qemu-kvm.spec.template | 4 ++-- + tests/meson.build | 2 +- tests/qemu-iotests/051 | 12 ++++++------ tests/qtest/boot-serial-test.c | 6 +++++- tests/qtest/cdrom-test.c | 2 ++ @@ -51,10 +54,23 @@ Conflicts: tests/qtest/prom-env-test.c | 4 ++++ tests/qtest/test-x86-cpuid-compat.c | 2 ++ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - 11 files changed, 35 insertions(+), 19 deletions(-) + 12 files changed, 36 insertions(+), 20 deletions(-) +diff --git a/tests/meson.build b/tests/meson.build +index afeb6be689..e562a0499e 100644 +--- a/tests/meson.build ++++ b/tests/meson.build +@@ -136,7 +136,7 @@ if have_block + 'test-blockjob': [testblock], + 'test-blockjob-txn': [testblock], + 'test-block-backend': [testblock], +- 'test-block-iothread': [testblock], ++# 'test-block-iothread': [testblock], + 'test-write-threshold': [testblock], + 'test-crypto-hash': [crypto], + 'test-crypto-hmac': [crypto], diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index bee2607..61d25c4 100755 +index bee26075b2..61d25c4ed7 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -183,11 +183,11 @@ run_qemu -drive if=virtio @@ -88,7 +104,7 @@ index bee2607..61d25c4 100755 *) ;; diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c -index b6b1c23..cefa1b3 100644 +index b6b1c23cd0..cefa1b38b7 100644 --- a/tests/qtest/boot-serial-test.c +++ b/tests/qtest/boot-serial-test.c @@ -120,19 +120,23 @@ static testdef_t tests[] = { @@ -117,7 +133,7 @@ index b6b1c23..cefa1b3 100644 { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c -index 5af944a..cd5b8e0 100644 +index 5af944a5fb..cd5b8e0f16 100644 --- a/tests/qtest/cdrom-test.c +++ b/tests/qtest/cdrom-test.c @@ -140,6 +140,7 @@ static void add_x86_tests(void) @@ -137,7 +153,7 @@ index 5af944a..cd5b8e0 100644 static void add_s390x_tests(void) diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c -index a1c6894..a8f0767 100644 +index a1c689414b..a8f076711c 100644 --- a/tests/qtest/cpu-plug-test.c +++ b/tests/qtest/cpu-plug-test.c @@ -110,8 +110,8 @@ static void add_pseries_test_case(const char *mname) @@ -152,7 +168,7 @@ index a1c6894..a8f0767 100644 } data = g_new(PlugTestData, 1); diff --git a/tests/qtest/e1000-test.c b/tests/qtest/e1000-test.c -index ea286d1..a1847ac 100644 +index ea286d1793..a1847ac8ed 100644 --- a/tests/qtest/e1000-test.c +++ b/tests/qtest/e1000-test.c @@ -22,9 +22,11 @@ struct QE1000 { @@ -168,7 +184,7 @@ index ea286d1..a1847ac 100644 static void *e1000_get_driver(void *obj, const char *interface) diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c -index f7b7cfb..99cccf8 100644 +index f7b7cfbc2d..99cccf8638 100644 --- a/tests/qtest/hd-geo-test.c +++ b/tests/qtest/hd-geo-test.c @@ -737,6 +737,7 @@ static void test_override_ide(void) @@ -200,7 +216,7 @@ index f7b7cfb..99cccf8 100644 qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); qtest_add_func("hd-geo/override/scsi_hot_unplug", diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index c19f1c8..15ed460 100644 +index c19f1c8503..15ed460ff0 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -51,16 +51,13 @@ qtests_i386 = \ @@ -249,7 +265,7 @@ index c19f1c8..15ed460 100644 'cpu-plug-test', 'migration-test'] diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c -index f41d801..f8dc478 100644 +index f41d80154a..f8dc478ce8 100644 --- a/tests/qtest/prom-env-test.c +++ b/tests/qtest/prom-env-test.c @@ -89,10 +89,14 @@ int main(int argc, char *argv[]) @@ -268,7 +284,7 @@ index f41d801..f8dc478 100644 add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c -index 7ca1883..983aa07 100644 +index 7ca1883a29..983aa0719a 100644 --- a/tests/qtest/test-x86-cpuid-compat.c +++ b/tests/qtest/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) @@ -288,7 +304,7 @@ index 7ca1883..983aa07 100644 /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c -index 10ef9d2..3855873 100644 +index 10ef9d2a91..3855873050 100644 --- a/tests/qtest/usb-hcd-xhci-test.c +++ b/tests/qtest/usb-hcd-xhci-test.c @@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) @@ -318,5 +334,5 @@ index 10ef9d2..3855873 100644 qtest_start("-device nec-usb-xhci,id=xhci" -- -1.8.3.1 +2.18.4 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index bbed9bb..90d78d2 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From bb05135a744ae87847bcaf2344f826664dc9e19c Mon Sep 17 00:00:00 2001 +From 514eb840d98c8047e88fb503a4bba71455a2e8b0 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -38,7 +38,7 @@ Merged patches (2.9.0): 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 51dc373..06ce2a3 100644 +index 51dc373695..06ce2a39aa 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -45,6 +45,9 @@ @@ -94,7 +94,7 @@ index 51dc373..06ce2a3 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 1574ef9..fef907c 100644 +index 1574ef983f..fef907c112 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -139,6 +139,7 @@ struct VFIOPCIDevice { @@ -106,5 +106,5 @@ index 1574ef9..fef907c 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -1.8.3.1 +2.18.4 diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index 0a9f748..4ae3d79 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From a2490cc686e14979a82f176a76ca0f5ec22082ad Mon Sep 17 00:00:00 2001 +From f63ec823f8df7024f33c145b88a2b50c589cc633 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,7 +21,7 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index e6e0ad5..065d52e 100644 +index e6e0ad5a92..065d52e8dc 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -1688,9 +1688,17 @@ static void version(void) @@ -51,5 +51,5 @@ index e6e0ad5..065d52e 100644 } -- -1.8.3.1 +2.18.4 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index 6e4d4ce..5df16d4 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From b0baccfdfb10c34d9f9d35363e098dab7f376fe9 Mon Sep 17 00:00:00 2001 +From 6eddce7d3e8cd95c4b848fe3f7c5ac27854dc0da Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -38,7 +38,7 @@ Merged patches (5.2.0 rc0): 1 file changed, 12 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index baaa542..a1fbda0 100644 +index baaa54249d..a1fbda0945 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2108,6 +2108,18 @@ static int kvm_init(MachineState *ms) @@ -61,5 +61,5 @@ index baaa542..a1fbda0 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " -- -1.8.3.1 +2.18.4 diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 6ec42b2..9c9d07d 100644 --- a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From f72477389598ad4fee78640ec3a96166f00baf97 Mon Sep 17 00:00:00 2001 +From c615fb7d219b7b88f6517d6772d92e233007aff3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -31,7 +31,7 @@ Rebase notes (5.2.0 rc0): 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc -index 48d05aa..d74dbde 100644 +index 48d05aaf33..d74dbdeca9 100644 --- a/docs/defs.rst.inc +++ b/docs/defs.rst.inc @@ -9,7 +9,7 @@ @@ -45,7 +45,7 @@ index 48d05aa..d74dbde 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst -index e13f5a2..6650b2c 100644 +index e13f5a21f8..6650b2c975 100644 --- a/docs/interop/live-block-operations.rst +++ b/docs/interop/live-block-operations.rst @@ -129,7 +129,7 @@ To show some example invocations of command-line, we will use the @@ -67,7 +67,7 @@ index e13f5a2..6650b2c 100644 -blockdev node-name=node-TargetDisk,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./target-disk.qcow2 \ -device virtio-blk,drive=node-TargetDisk,id=virtio0 \ diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst -index fb70445..0d9a783 100644 +index fb70445c75..0d9a783112 100644 --- a/docs/tools/qemu-trace-stap.rst +++ b/docs/tools/qemu-trace-stap.rst @@ -45,19 +45,19 @@ The following commands are valid: @@ -125,7 +125,7 @@ index fb70445..0d9a783 100644 See also -------- diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst -index 866b7db..5b3be8a 100644 +index 866b7db3ee..5b3be8a6d6 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -297,7 +297,7 @@ Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket @@ -138,7 +138,7 @@ index 866b7db..5b3be8a 100644 -device vhost-user-fs-pci,chardev=char0,tag=myfs \ -object memory-backend-memfd,id=mem,size=4G,share=on \ diff --git a/qemu-options.hx b/qemu-options.hx -index 363a15b..5e5e265 100644 +index 363a15b4e8..5e5e265331 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2935,11 +2935,11 @@ SRST @@ -159,5 +159,5 @@ index 363a15b..5e5e265 100644 ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` Establish a vhost-vdpa netdev. -- -1.8.3.1 +2.18.4 diff --git a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 33a8125..fc51351 100644 --- a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 21a45442631721270ff6aba4635e2b72ac2cb248 Mon Sep 17 00:00:00 2001 +From 5095570936ccd71ac82bf441c36e85bd16b8e459 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -42,7 +42,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 3db9a8a..82c0251 100644 +index 3db9a8aae9..82c025146d 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -823,6 +823,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -62,5 +62,5 @@ index 3db9a8a..82c0251 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -1.8.3.1 +2.18.4 diff --git a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 6f60d45..44da707 100644 --- a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From f0561c2a8caa9080f2849b5679816e2268ee420d Mon Sep 17 00:00:00 2001 +From 0619f89b5e0eb713e4d426c869e7a6a826a13728 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,7 +32,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 9341e97..f11428e 100644 +index 9341e9782a..f11428eae9 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -333,12 +333,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, @@ -56,5 +56,5 @@ index 9341e97..f11428e 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -1.8.3.1 +2.18.4 diff --git a/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index 7a6e1fe..93eb976 100644 --- a/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,4 +1,4 @@ -From 37e71d91a69d0437d6f181b757a702910c25c21f Mon Sep 17 00:00:00 2001 +From e7321dc3f2159d2f4b7f93bd0f7ebb89752e8604 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 Subject: block: Versioned x-blockdev-reopen API with feature flag @@ -29,7 +29,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json -index 04ad80b..2a7dca2 100644 +index 04ad80bc1e..2a7dca299f 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4143,10 +4143,17 @@ @@ -52,5 +52,5 @@ index 04ad80b..2a7dca2 100644 ## # @blockdev-del: -- -1.8.3.1 +2.18.4 diff --git a/0020-Build-RHEL-9.patch b/0020-Build-RHEL-9.patch deleted file mode 100644 index e815d00..0000000 --- a/0020-Build-RHEL-9.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 1a0497bf6405db1e9ee07db40d90309566bb9f25 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 26 Nov 2020 08:26:34 +0100 -Subject: Build RHEL 9 - ---- - migration/qemu-file.c | 2 +- - qobject/block-qdict.c | 2 +- - redhat/Makefile.common | 3 ++- - redhat/qemu-kvm.spec.template | 12 ------------ - tests/meson.build | 4 ++-- - 5 files changed, 6 insertions(+), 17 deletions(-) - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index be21518..d6e03db 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -595,7 +595,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) - { - if (size < IO_BUF_SIZE) { - size_t res; -- uint8_t *src; -+ uint8_t *src = NULL; - - res = qemu_peek_buffer(f, &src, size, 0); - -diff --git a/qobject/block-qdict.c b/qobject/block-qdict.c -index 1487cc5..b265244 100644 ---- a/qobject/block-qdict.c -+++ b/qobject/block-qdict.c -@@ -224,7 +224,7 @@ void qdict_array_split(QDict *src, QList **dst) - for (i = 0; i < UINT_MAX; i++) { - QObject *subqobj; - bool is_subqdict; -- QDict *subqdict; -+ QDict *subqdict = NULL; - char indexstr[32], prefix[32]; - size_t snprintf_ret; - -diff --git a/tests/meson.build b/tests/meson.build -index afeb6be..52aeaf4 100644 ---- a/tests/meson.build -+++ b/tests/meson.build -@@ -136,7 +136,7 @@ if have_block - 'test-blockjob': [testblock], - 'test-blockjob-txn': [testblock], - 'test-block-backend': [testblock], -- 'test-block-iothread': [testblock], -+# 'test-block-iothread': [testblock], - 'test-write-threshold': [testblock], - 'test-crypto-hash': [crypto], - 'test-crypto-hmac': [crypto], -@@ -286,5 +286,5 @@ if not get_option('tcg').disabled() - endif - - subdir('qapi-schema') --subdir('qtest') -+#subdir('qtest') - subdir('migration') --- -1.8.3.1 - diff --git a/0021-redhat-Define-hw_compat_8_3.patch b/0021-redhat-Define-hw_compat_8_3.patch new file mode 100644 index 0000000..742e81f --- /dev/null +++ b/0021-redhat-Define-hw_compat_8_3.patch @@ -0,0 +1,70 @@ +From bd9e5c1703ef16727db863ba79f46ae9cb81cbfd Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Fri, 20 Nov 2020 14:00:31 -0500 +Subject: redhat: Define hw_compat_8_3 + +RH-Author: Greg Kurz +Message-id: <20201120140033.578472-2-gkurz@redhat.com> +Patchwork-id: 99790 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 1/3] redhat: Define hw_compat_8_3 +Bugzilla: 1893935 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 21 +++++++++++++++++++++ + include/hw/boards.h | 3 +++ + 2 files changed, 24 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 19d50dde45..aba05ad676 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -28,6 +28,27 @@ + #include "hw/mem/nvdimm.h" + #include "migration/vmstate.h" + ++/* ++ * The same as hw_compat_5_1 ++ */ ++GlobalProperty hw_compat_rhel_8_3[] = { ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-blk", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-blk-device", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-scsi-device", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "nvme", "use-intel-id", "on"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++}; ++const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); ++ + /* + * The same as hw_compat_4_2 + hw_compat_5_0 + */ +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 4e4a54b313..526e5aea04 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -369,6 +369,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_3[]; ++extern const size_t hw_compat_rhel_8_3_len; ++ + extern GlobalProperty hw_compat_rhel_8_2[]; + extern const size_t hw_compat_rhel_8_2_len; + +-- +2.18.4 + diff --git a/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch b/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch new file mode 100644 index 0000000..e5fa8bc --- /dev/null +++ b/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch @@ -0,0 +1,66 @@ +From e5c00782e6f609b4f25dc214825c6491def46e15 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Fri, 20 Nov 2020 14:00:32 -0500 +Subject: redhat: Add spapr_machine_rhel_default_class_options() + +RH-Author: Greg Kurz +Message-id: <20201120140033.578472-3-gkurz@redhat.com> +Patchwork-id: 99791 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 2/3] redhat: Add spapr_machine_rhel_default_class_options() +Bugzilla: 1893935 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +RHEL may need to override some default property inherited from upstream. +This is currently handled in the class_options() function of the latest +machine type, and thus the defaults need to be carried around each time +we add a new RHEL machine. + +Override the defaults in a dedicated function to be called by the +latest RHEL machine type. + +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 4a838cc955..1d7482b2fb 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4961,6 +4961,17 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); + #endif + ++static void spapr_machine_rhel_default_class_options(MachineClass *mc) ++{ ++ /* ++ * Defaults for the latest behaviour inherited from the base class ++ * can be overriden here for all pseries-rhel* machines. ++ */ ++ ++ /* Maximum supported VCPU count */ ++ mc->max_cpus = 384; ++} ++ + /* + * pseries-rhel8.3.0 + * like pseries-5.1 +@@ -4968,10 +4979,8 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); + + static void spapr_machine_rhel830_class_options(MachineClass *mc) + { +- /* Defaults for the latest behaviour inherited from the base class */ +- +- /* Maximum supported VCPU count for all pseries-rhel* machines */ +- mc->max_cpus = 384; ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); + } + + DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); +-- +2.18.4 + diff --git a/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch b/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch new file mode 100644 index 0000000..fa811f6 --- /dev/null +++ b/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch @@ -0,0 +1,70 @@ +From e5f8c128550c8e6020095152a9fa171cccc6aa18 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Fri, 20 Nov 2020 14:00:33 -0500 +Subject: redhat: Define pseries-rhel8.4.0 machine type + +RH-Author: Greg Kurz +Message-id: <20201120140033.578472-4-gkurz@redhat.com> +Patchwork-id: 99792 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 3/3] redhat: Define pseries-rhel8.4.0 machine type +Bugzilla: 1893935 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Greg Kurz + +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr.c | 25 ++++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 1d7482b2fb..4f61b64a21 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4972,6 +4972,19 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + mc->max_cpus = 384; + } + ++/* ++ * pseries-rhel8.4.0 ++ * like pseries-5.2 ++ */ ++ ++static void spapr_machine_rhel840_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", true); ++ + /* + * pseries-rhel8.3.0 + * like pseries-5.1 +@@ -4979,11 +4992,17 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + + static void spapr_machine_rhel830_class_options(MachineClass *mc) + { +- /* The default machine type must apply the RHEL specific defaults */ +- spapr_machine_rhel_default_class_options(mc); ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel840_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ ++ /* from pseries-5.1 */ ++ smc->pre_5_2_numa_associativity = true; + } + +-DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false); + + /* + * pseries-rhel8.2.0 +-- +2.18.4 + diff --git a/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch b/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch new file mode 100644 index 0000000..1db12e8 --- /dev/null +++ b/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch @@ -0,0 +1,72 @@ +From a4ce96735ad8f1e07ded93e39e32e22bd9ac00ba Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 1 Dec 2020 17:53:41 -0500 +Subject: redhat: s390x: add rhel-8.4.0 compat machine + +RH-Author: Cornelia Huck +Message-id: <20201201175341.37537-3-cohuck@redhat.com> +Patchwork-id: 100195 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] redhat: s390x: add rhel-8.4.0 compat machine +Bugzilla: 1836282 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand + +Note that we did not publish a rhel-8.3.0 machine on s390x, so we +need to add the respective hw_compat entry in the rhel-8.2.0 machine. + +Also, the hw_compat entry for 8.1 was missing; however, the contents +there are not relevant for s390x. + +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +--- + hw/s390x/s390-virtio-ccw.c | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index b8dde7e4e1..c7b5bcb06b 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1056,15 +1056,29 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++static void ccw_machine_rhel840_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel840_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", true); ++ + static void ccw_machine_rhel820_instance_options(MachineState *machine) + { ++ ccw_machine_rhel840_instance_options(machine); + } + + static void ccw_machine_rhel820_class_options(MachineClass *mc) + { ++ ccw_machine_rhel840_class_options(mc); + mc->fixup_ram_size = s390_fixup_ram_size; ++ /* we did not publish a rhel8.3.0 machine */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); + } +-DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", true); ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false); + + static void ccw_machine_rhel760_instance_options(MachineState *machine) + { +@@ -1086,6 +1100,7 @@ static void ccw_machine_rhel760_class_options(MachineClass *mc) + { + ccw_machine_rhel820_class_options(mc); + /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); + compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); + } +-- +2.18.4 + diff --git a/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch b/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch new file mode 100644 index 0000000..e33fe8f --- /dev/null +++ b/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch @@ -0,0 +1,56 @@ +From 8d3c826bca23d64cbb2f71bd3b506b43fc2b1c70 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 17 Dec 2020 17:58:43 +0100 +Subject: block/vpc: Make vpc_open() read the full dynamic header + +The dynamic header's size is 1024 bytes. + +vpc_open() reads only the 512 bytes of the dynamic header into buf[]. +Works, because it doesn't actually access the second half. However, a +colleague told me that GCC 11 warns: + + ../block/vpc.c:358:51: error: array subscript 'struct VHDDynDiskHeader[0]' is partly outside array bounds of 'uint8_t[512]' [-Werror=array-bounds] + +Clean up to read the full header. + +Rename buf[] to dyndisk_header_buf[] while there. + +Signed-off-by: Markus Armbruster +--- + block/vpc.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/block/vpc.c b/block/vpc.c +index 1ab55f9287..2fcf3f6283 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -220,7 +220,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, + QemuOpts *opts = NULL; + Error *local_err = NULL; + bool use_chs; +- uint8_t buf[HEADER_SIZE]; ++ uint8_t dyndisk_header_buf[1024]; + uint32_t checksum; + uint64_t computed_size; + uint64_t pagetable_size; +@@ -340,14 +340,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, + } + + if (disk_type == VHD_DYNAMIC) { +- ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, +- HEADER_SIZE); ++ ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), ++ dyndisk_header_buf, 1024); + if (ret < 0) { + error_setg(errp, "Error reading dynamic VHD header"); + goto fail; + } + +- dyndisk_header = (VHDDynDiskHeader *) buf; ++ dyndisk_header = (VHDDynDiskHeader *)dyndisk_header_buf; + + if (strncmp(dyndisk_header->magic, "cxsparse", 8)) { + error_setg(errp, "Invalid header magic"); +-- +2.18.4 + diff --git a/0028-GCC-11-warnings-hacks.patch b/0028-GCC-11-warnings-hacks.patch new file mode 100644 index 0000000..9655f6b --- /dev/null +++ b/0028-GCC-11-warnings-hacks.patch @@ -0,0 +1,163 @@ +From 0db17b3fa57012894e9e410f139703baf21f590a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 4 Jan 2021 07:47:03 +0100 +Subject: GCC 11 warnings hacks + +--- + hw/scsi/scsi-disk.c | 13 +++++++------ + net/eth.c | 4 +++- + target/s390x/kvm.c | 2 +- + target/s390x/misc_helper.c | 2 +- + tcg/aarch64/tcg-target.c.inc | 3 +-- + tests/test-block-iothread.c | 12 ++++++------ + 6 files changed, 19 insertions(+), 17 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 90841ad791..8ce77777d3 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2578,14 +2578,15 @@ static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf) + int len = scsi_cdb_length(buf); + char *line_buffer, *p; + +- line_buffer = g_malloc(len * 5 + 1); ++ if (len > 0) { ++ line_buffer = g_malloc(len * 5 + 1); ++ for (i = 0, p = line_buffer; i < len; i++) { ++ p += sprintf(p, " 0x%02x", buf[i]); ++ } ++ trace_scsi_disk_new_request(lun, tag, line_buffer); + +- for (i = 0, p = line_buffer; i < len; i++) { +- p += sprintf(p, " 0x%02x", buf[i]); ++ g_free(line_buffer); + } +- trace_scsi_disk_new_request(lun, tag, line_buffer); +- +- g_free(line_buffer); + } + + static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun, +diff --git a/net/eth.c b/net/eth.c +index 1e0821c5f8..041ac4865a 100644 +--- a/net/eth.c ++++ b/net/eth.c +@@ -405,6 +405,8 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, + struct ip6_ext_hdr *ext_hdr, + struct in6_address *dst_addr) + { ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Warray-bounds" + struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; + + if ((rthdr->rtype == 2) && +@@ -424,7 +426,7 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, + + return bytes_read == sizeof(*dst_addr); + } +- ++#pragma GCC diagnostic pop + return false; + } + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 1839cc6648..ab1ca6b1bf 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -1918,7 +1918,7 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) + */ + if (qemu_name) { + strncpy((char *)sysib.ext_names[0], qemu_name, +- sizeof(sysib.ext_names[0])); ++ sizeof(sysib.ext_names[0])-1); + } else { + strcpy((char *)sysib.ext_names[0], "KVMguest"); + } +diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c +index 58dbc023eb..adaf4145e6 100644 +--- a/target/s390x/misc_helper.c ++++ b/target/s390x/misc_helper.c +@@ -370,7 +370,7 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1) + MIN(sizeof(sysib.sysib_322.vm[0].name), + strlen(qemu_name))); + strncpy((char *)sysib.sysib_322.ext_names[0], qemu_name, +- sizeof(sysib.sysib_322.ext_names[0])); ++ sizeof(sysib.sysib_322.ext_names[0])-1); + } else { + ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8); + strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest"); +diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc +index 26f71cb599..fe6bdbf721 100644 +--- a/tcg/aarch64/tcg-target.c.inc ++++ b/tcg/aarch64/tcg-target.c.inc +@@ -1852,8 +1852,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + static tcg_insn_unit *tb_ret_addr; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, +- const TCGArg args[TCG_MAX_OP_ARGS], +- const int const_args[TCG_MAX_OP_ARGS]) ++ const TCGArg *args, const int *const_args) + { + /* 99% of the time, we can signal the use of extension registers + by looking to see if the opcode handles 64-bit data. */ +diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c +index 3f866a35c6..bc64b50e66 100644 +--- a/tests/test-block-iothread.c ++++ b/tests/test-block-iothread.c +@@ -75,7 +75,7 @@ static BlockDriver bdrv_test = { + + static void test_sync_op_pread(BdrvChild *c) + { +- uint8_t buf[512]; ++ uint8_t buf[512] = {0}; + int ret; + + /* Success */ +@@ -89,7 +89,7 @@ static void test_sync_op_pread(BdrvChild *c) + + static void test_sync_op_pwrite(BdrvChild *c) + { +- uint8_t buf[512]; ++ uint8_t buf[512] = {0}; + int ret; + + /* Success */ +@@ -103,7 +103,7 @@ static void test_sync_op_pwrite(BdrvChild *c) + + static void test_sync_op_blk_pread(BlockBackend *blk) + { +- uint8_t buf[512]; ++ uint8_t buf[512] = {0}; + int ret; + + /* Success */ +@@ -117,7 +117,7 @@ static void test_sync_op_blk_pread(BlockBackend *blk) + + static void test_sync_op_blk_pwrite(BlockBackend *blk) + { +- uint8_t buf[512]; ++ uint8_t buf[512] = {0}; + int ret; + + /* Success */ +@@ -131,7 +131,7 @@ static void test_sync_op_blk_pwrite(BlockBackend *blk) + + static void test_sync_op_load_vmstate(BdrvChild *c) + { +- uint8_t buf[512]; ++ uint8_t buf[512] = {0}; + int ret; + + /* Error: Driver does not support snapshots */ +@@ -141,7 +141,7 @@ static void test_sync_op_load_vmstate(BdrvChild *c) + + static void test_sync_op_save_vmstate(BdrvChild *c) + { +- uint8_t buf[512]; ++ uint8_t buf[512] = {0}; + int ret; + + /* Error: Driver does not support snapshots */ +-- +2.18.4 + diff --git a/0029-Disable-problematic-tests-for-initial-build.patch b/0029-Disable-problematic-tests-for-initial-build.patch new file mode 100644 index 0000000..c2eeb1a --- /dev/null +++ b/0029-Disable-problematic-tests-for-initial-build.patch @@ -0,0 +1,27 @@ +From 6d129eac73fdc94b2712af5d402c0f2debd65600 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Tue, 5 Jan 2021 07:40:08 +0100 +Subject: Disable problematic tests for initial build + +--- + tests/qtest/meson.build | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index 15ed460ff0..70ef8c236c 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -150,8 +150,8 @@ qtests_aarch64 = \ + (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ + ['arm-cpu-features', + 'numa-test', +- 'boot-serial-test', +- 'migration-test'] ++ 'boot-serial-test'] ++# 'migration-test'] + + qtests_s390x = \ + (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ +-- +2.18.4 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4ce97ec..ab130d0 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -8,7 +8,7 @@ %global have_gluster 1 %global have_kvm_setup 0 %global have_memlock_limits 0 -%global rcversion -rc3 + %ifnarch %{ix86} x86_64 @@ -64,7 +64,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: rc3.1%{?dist} +Release: 2%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -73,7 +73,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-5.2.0-rc3.tar.xz +Source0: http://wiki.qemu.org/download/qemu-5.2.0.tar.xz # KSM control scripts Source4: ksm.service @@ -117,7 +117,13 @@ Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0019: 0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -Patch0020: 0020-Build-RHEL-9.patch +Patch0021: 0021-redhat-Define-hw_compat_8_3.patch +Patch0022: 0022-redhat-Add-spapr_machine_rhel_default_class_options.patch +Patch0023: 0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch +Patch0024: 0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch +Patch0027: 0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch +Patch0028: 0028-GCC-11-warnings-hacks.patch +Patch0029: 0029-Disable-problematic-tests-for-initial-build.patch BuildRequires: wget BuildRequires: rpm-build @@ -247,13 +253,14 @@ hardware for a full system such as a PC and its associated peripherals. Summary: qemu-kvm core components Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: qemu-img = %{epoch}:%{version}-%{release} -Conflicts: qemu-kiwi -%ifarch %{ix86} x86_64 -Requires: edk2-ovmf -%endif -%ifarch aarch64 -Requires: edk2-aarch64 -%endif + +# Temporary disable edk2 dependency as there's no edk2 available yet +#%ifarch %{ix86} x86_64 +#Requires: edk2-ovmf +#%endif +#%ifarch aarch64 +#Requires: edk2-aarch64 +#%endif %ifarch %{power64} Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} @@ -418,7 +425,7 @@ mkdir slirp # XXX: ugly hack to copy source tree into a new folder. # it allows to build qemu-kiwi without touching the original source tree. # This is required as the build isolation is not 100% as we also have to -# change the source tree when building qemu-kiwi. And, when we do that, +# change the source tree when building qemu-kiwi. And, when we do that, # calling "make check" on qemu-kvm see that change and behaves baddly. # Newer version of qemu allow us to create a better sollution, and this # hack can be dropped. @@ -582,15 +589,12 @@ pushd %{qemu_kvm_build} --enable-attr \ %ifarch %{ix86} x86_64 --enable-avx2 \ -%else %endif --enable-cap-ng \ --enable-capstone \ --enable-coroutine-pool \ --enable-curl \ --enable-debug-info \ - --disable-debug-tcg \ - --disable-dmg \ --enable-docs \ %if 0%{have_fdt} --enable-fdt \ @@ -652,9 +656,9 @@ pushd %{qemu_kvm_build} --enable-vnc-sasl \ --enable-werror \ --enable-xkbcommon \ - --disable-zstd \ --without-default-devices + echo "qemu-kvm config-host.mak contents:" echo "===" cat config-host.mak @@ -702,6 +706,7 @@ find ../default-configs -name "*-rh-devices.mak" \ --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ --extra-cflags="%{optflags}" \ --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-suffix="%{name}" \ --firmwarepath=%{_prefix}/share/qemu-firmware \ --python=%{__python3} \ --target-list="%{buildarch}" \ @@ -733,6 +738,7 @@ find ../default-configs -name "*-rh-devices.mak" \ %ifnarch s390x --enable-numa \ %endif + --enable-pie \ --enable-seccomp \ --enable-system \ --enable-tcg \ @@ -747,6 +753,7 @@ find ../default-configs -name "*-rh-devices.mak" \ --enable-xkbcommon \ --without-default-devices + echo "qemu-kiki config-host.mak contents:" echo "===" cat config-host.mak @@ -1101,25 +1108,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : fi %endif -%post -n qemu-kiwi -# load kvm modules now, so we can make sure no reboot is needed. -# If there's already a kvm module installed, we don't mess with it -%udev_rules_update -sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : - udevadm trigger --subsystem-match=misc --sysname-match=kvm --action=add || : -%if %{have_kvm_setup} - systemctl daemon-reload # Make sure it sees the new presets and unitfile - %systemd_post kvm-setup.service - if systemctl is-enabled kvm-setup.service > /dev/null; then - systemctl start kvm-setup.service - fi -%endif - -%if %{have_kvm_setup} -%preun -n qemu-kiwi -%systemd_preun kvm-setup.service -%endif - %preun -n qemu-kvm-common %systemd_preun ksm.service %systemd_preun ksmtuned.service @@ -1318,6 +1306,36 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Tue Jan 05 2021 Miroslav Rezanina - 5.2.0-2.el9 +- Rebuild for RHEL 9 + +* Tue Dec 15 2020 Danilo Cesar Lemes de Paula - 5.2.0-2.el8 +- kvm-redhat-Define-hw_compat_8_3.patch [bz#1893935] +- kvm-redhat-Add-spapr_machine_rhel_default_class_options.patch [bz#1893935] +- kvm-redhat-Define-pseries-rhel8.4.0-machine-type.patch [bz#1893935] +- kvm-redhat-s390x-add-rhel-8.4.0-compat-machine.patch [bz#1836282] +- Resolves: bz#1836282 + (New machine type for qemu-kvm on s390x in RHEL-AV) +- Resolves: bz#1893935 + (New machine type on RHEL-AV 8.4 for ppc64le) + +* Wed Dec 09 2020 Miroslav Rezanina - 5.2.0-1.el8 +- Rebase to QEMU 5.2.0 [bz#1905933] +- Resolves: bz#1905933 + (Rebase qemu-kvm to version 5.2.0) + +* Tue Dec 01 2020 Danilo Cesar Lemes de Paula - 5.1.0-16.el8 +- kvm-redhat-introduces-disable_everything-macro-into-the-.patch [bz#1884611] +- kvm-redhat-scripts-extract_build_cmd.py-Avoid-listing-em.patch [bz#1884611] +- kvm-redhat-Removing-unecessary-configurations.patch [bz#1884611] +- kvm-redhat-Fixing-rh-local-build.patch [bz#1884611] +- kvm-redhat-allow-Makefile-rh-prep-builddep-to-fail.patch [bz#1884611] +- kvm-redhat-adding-rh-rpm-target.patch [bz#1884611] +- kvm-redhat-move-shareable-files-from-qemu-kvm-core-to-qe.patch [bz#1884611] +- kvm-redhat-Add-qemu-kiwi-subpackage.patch [bz#1884611] +- Resolves: bz#1884611 + (Build kata-specific version of qemu) + * Mon Nov 16 2020 Danilo Cesar Lemes de Paula - 5.1.0-15.el8 - kvm-redhat-add-un-pre-install-systemd-hooks-for-qemu-ga.patch [bz#1882719] - kvm-rcu-Implement-drain_call_rcu.patch [bz#1812399 bz#1866707] diff --git a/sources b/sources index 235915c..6a86af7 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-5.2.0-rc3.tar.xz) = e3913388fd5f5b7e3564bdc10869e1b9a9bf1a569c11748aec391ae6d13ea99fad43f74c4d70202f69dce93dd8961072a0655f63a8d9bbe78dc2a7220bc32048 +SHA512 (qemu-5.2.0.tar.xz) = bddd633ce111471ebc651e03080251515178808556b49a308a724909e55dac0be0cc0c79c536ac12d239678ae94c60100dc124be9b9d9538340c03a2f27177f3 From 6c1454d3d0caea379f317683748ab1e00ca15a9a Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 15 Jan 2021 12:15:19 +0100 Subject: [PATCH 106/195] Update to enable edk2 as dependency and properly fix gcc 11 issues. --- 0012-Enable-make-check.patch | 24 +-- ...mber-of-devices-that-can-be-assigned.patch | 2 +- ...Add-support-statement-to-help-output.patch | 2 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 2 +- ...documentation-instead-of-qemu-system.patch | 2 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 2 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 2 +- ...x-blockdev-reopen-API-with-feature-f.patch | 2 +- 0021-redhat-Define-hw_compat_8_3.patch | 2 +- ...r_machine_rhel_default_class_options.patch | 2 +- ...efine-pseries-rhel8.4.0-machine-type.patch | 2 +- ...-s390x-add-rhel-8.4.0-compat-machine.patch | 2 +- ...pc_open-read-the-full-dynamic-header.patch | 2 +- 0028-GCC-11-warnings-hacks.patch | 2 +- ...-problematic-tests-for-initial-build.patch | 18 +- 0030-Revert-GCC-11-warnings-hacks.patch | 166 +++++++++++++++ ...0x-Use-strpadcpy-for-copying-vm-name.patch | 84 ++++++++ ..._out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch | 138 ++++++++++++ ...th-Simplify-_eth_get_rss_ex_dst_addr.patch | 52 +++++ ...net-eth-Fix-stack-buffer-overflow-in.patch | 196 ++++++++++++++++++ qemu-kvm.spec | 24 +-- 21 files changed, 681 insertions(+), 47 deletions(-) create mode 100644 0030-Revert-GCC-11-warnings-hacks.patch create mode 100644 0031-s390x-Use-strpadcpy-for-copying-vm-name.patch create mode 100644 0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch create mode 100644 0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch create mode 100644 0034-net-eth-Fix-stack-buffer-overflow-in.patch diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch index fa397df..906bb4e 100644 --- a/0012-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 28d744b42d381b15254706f90fed3310ce4a5116 Mon Sep 17 00:00:00 2001 +From 7b8ca8c1cbd3763900e3e472556116c9832e06f8 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -31,19 +31,16 @@ Rebase changes (5.2.0 rc0): - Disable cdrom tests (unsupported devices) on x86_64 - disable fuzz test -Rebaes changes (RHEL 9): -- disable block-iothreads test - Merged patches (4.0.0): - f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce Merged patches (4.1.0-rc0): - 41288ff redhat: Remove raw iotest 205 -Dissable problematic tests +Conflicts: + redhat/qemu-kvm.spec.template --- redhat/qemu-kvm.spec.template | 4 ++-- - tests/meson.build | 2 +- tests/qemu-iotests/051 | 12 ++++++------ tests/qtest/boot-serial-test.c | 6 +++++- tests/qtest/cdrom-test.c | 2 ++ @@ -54,21 +51,8 @@ Dissable problematic tests tests/qtest/prom-env-test.c | 4 ++++ tests/qtest/test-x86-cpuid-compat.c | 2 ++ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - 12 files changed, 36 insertions(+), 20 deletions(-) + 11 files changed, 35 insertions(+), 19 deletions(-) -diff --git a/tests/meson.build b/tests/meson.build -index afeb6be689..e562a0499e 100644 ---- a/tests/meson.build -+++ b/tests/meson.build -@@ -136,7 +136,7 @@ if have_block - 'test-blockjob': [testblock], - 'test-blockjob-txn': [testblock], - 'test-block-backend': [testblock], -- 'test-block-iothread': [testblock], -+# 'test-block-iothread': [testblock], - 'test-write-threshold': [testblock], - 'test-crypto-hash': [crypto], - 'test-crypto-hmac': [crypto], diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index bee26075b2..61d25c4ed7 100755 --- a/tests/qemu-iotests/051 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index 90d78d2..9575257 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 514eb840d98c8047e88fb503a4bba71455a2e8b0 Mon Sep 17 00:00:00 2001 +From da70823afbdbb904950068fe5f0323ff75b0d4fc Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch index 4ae3d79..04d89d8 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From f63ec823f8df7024f33c145b88a2b50c589cc633 Mon Sep 17 00:00:00 2001 +From f69c3b855ec419b4afe240bbd039141a59aad808 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch index 5df16d4..4a65df5 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 6eddce7d3e8cd95c4b848fe3f7c5ac27854dc0da Mon Sep 17 00:00:00 2001 +From 9585c8927744d8b07b317063ef788e1f01773f0e Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 9c9d07d..67f920a 100644 --- a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From c615fb7d219b7b88f6517d6772d92e233007aff3 Mon Sep 17 00:00:00 2001 +From 091f9e47dc4609bfded5474cfe2797777cdd56f1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- diff --git a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index fc51351..7ab1831 100644 --- a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 5095570936ccd71ac82bf441c36e85bd16b8e459 Mon Sep 17 00:00:00 2001 +From 4d69dc90e66deec6bc6b46074ee44ef8c902266b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] diff --git a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 44da707..6c16c93 100644 --- a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 0619f89b5e0eb713e4d426c869e7a6a826a13728 Mon Sep 17 00:00:00 2001 +From 18c5a8c24e22b7c2ba9f7e26cac190cefc7ecf26 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts diff --git a/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index 93eb976..c644891 100644 --- a/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,4 +1,4 @@ -From e7321dc3f2159d2f4b7f93bd0f7ebb89752e8604 Mon Sep 17 00:00:00 2001 +From 989cfded8fdd5df3b6b1f1a304ca16c128d7561b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 Subject: block: Versioned x-blockdev-reopen API with feature flag diff --git a/0021-redhat-Define-hw_compat_8_3.patch b/0021-redhat-Define-hw_compat_8_3.patch index 742e81f..a5ca2c6 100644 --- a/0021-redhat-Define-hw_compat_8_3.patch +++ b/0021-redhat-Define-hw_compat_8_3.patch @@ -1,4 +1,4 @@ -From bd9e5c1703ef16727db863ba79f46ae9cb81cbfd Mon Sep 17 00:00:00 2001 +From fa0063ba67071384d8c749cee8f4f4e5bbc8ef91 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 20 Nov 2020 14:00:31 -0500 Subject: redhat: Define hw_compat_8_3 diff --git a/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch b/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch index e5fa8bc..f77916f 100644 --- a/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch +++ b/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch @@ -1,4 +1,4 @@ -From e5c00782e6f609b4f25dc214825c6491def46e15 Mon Sep 17 00:00:00 2001 +From 943c936df3b6b5c3197ad727f2105e61778e749a Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 20 Nov 2020 14:00:32 -0500 Subject: redhat: Add spapr_machine_rhel_default_class_options() diff --git a/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch b/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch index fa811f6..406c7e1 100644 --- a/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch +++ b/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch @@ -1,4 +1,4 @@ -From e5f8c128550c8e6020095152a9fa171cccc6aa18 Mon Sep 17 00:00:00 2001 +From 030b5e6fba510b8b9f8c8690ef6ea63f71628d25 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 20 Nov 2020 14:00:33 -0500 Subject: redhat: Define pseries-rhel8.4.0 machine type diff --git a/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch b/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch index 1db12e8..bedb835 100644 --- a/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch +++ b/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch @@ -1,4 +1,4 @@ -From a4ce96735ad8f1e07ded93e39e32e22bd9ac00ba Mon Sep 17 00:00:00 2001 +From a6ae745cceee1acc3667f5ba5e007ca6c083f8a8 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 1 Dec 2020 17:53:41 -0500 Subject: redhat: s390x: add rhel-8.4.0 compat machine diff --git a/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch b/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch index e33fe8f..7af0b8d 100644 --- a/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch +++ b/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch @@ -1,4 +1,4 @@ -From 8d3c826bca23d64cbb2f71bd3b506b43fc2b1c70 Mon Sep 17 00:00:00 2001 +From 974af930d4e5cae5611bb2e3a5ac18d3bda15a68 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 17 Dec 2020 17:58:43 +0100 Subject: block/vpc: Make vpc_open() read the full dynamic header diff --git a/0028-GCC-11-warnings-hacks.patch b/0028-GCC-11-warnings-hacks.patch index 9655f6b..86ae8c2 100644 --- a/0028-GCC-11-warnings-hacks.patch +++ b/0028-GCC-11-warnings-hacks.patch @@ -1,4 +1,4 @@ -From 0db17b3fa57012894e9e410f139703baf21f590a Mon Sep 17 00:00:00 2001 +From 6e9564986a00456c6748cf888d9ba9f7f0db01bf Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 4 Jan 2021 07:47:03 +0100 Subject: GCC 11 warnings hacks diff --git a/0029-Disable-problematic-tests-for-initial-build.patch b/0029-Disable-problematic-tests-for-initial-build.patch index c2eeb1a..84743b8 100644 --- a/0029-Disable-problematic-tests-for-initial-build.patch +++ b/0029-Disable-problematic-tests-for-initial-build.patch @@ -1,12 +1,26 @@ -From 6d129eac73fdc94b2712af5d402c0f2debd65600 Mon Sep 17 00:00:00 2001 +From bb42f8a495aa0da2410109de14aca901b8c4ac4f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 5 Jan 2021 07:40:08 +0100 Subject: Disable problematic tests for initial build --- + tests/meson.build | 2 +- tests/qtest/meson.build | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) + 2 files changed, 3 insertions(+), 3 deletions(-) +diff --git a/tests/meson.build b/tests/meson.build +index afeb6be689..e562a0499e 100644 +--- a/tests/meson.build ++++ b/tests/meson.build +@@ -136,7 +136,7 @@ if have_block + 'test-blockjob': [testblock], + 'test-blockjob-txn': [testblock], + 'test-block-backend': [testblock], +- 'test-block-iothread': [testblock], ++# 'test-block-iothread': [testblock], + 'test-write-threshold': [testblock], + 'test-crypto-hash': [crypto], + 'test-crypto-hmac': [crypto], diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 15ed460ff0..70ef8c236c 100644 --- a/tests/qtest/meson.build diff --git a/0030-Revert-GCC-11-warnings-hacks.patch b/0030-Revert-GCC-11-warnings-hacks.patch new file mode 100644 index 0000000..6f13efa --- /dev/null +++ b/0030-Revert-GCC-11-warnings-hacks.patch @@ -0,0 +1,166 @@ +From f488becdbb12c6001a2524d049371196a05f5256 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 15 Jan 2021 09:27:40 +0100 +Subject: Revert "GCC 11 warnings hacks" + +This reverts commit 6e9564986a00456c6748cf888d9ba9f7f0db01bf. + +Hacks solved upstream. Going to import upstream solutions. +--- + hw/scsi/scsi-disk.c | 13 ++++++------- + net/eth.c | 4 +--- + target/s390x/kvm.c | 2 +- + target/s390x/misc_helper.c | 2 +- + tcg/aarch64/tcg-target.c.inc | 3 ++- + tests/test-block-iothread.c | 12 ++++++------ + 6 files changed, 17 insertions(+), 19 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 8ce77777d3..90841ad791 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2578,15 +2578,14 @@ static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf) + int len = scsi_cdb_length(buf); + char *line_buffer, *p; + +- if (len > 0) { +- line_buffer = g_malloc(len * 5 + 1); +- for (i = 0, p = line_buffer; i < len; i++) { +- p += sprintf(p, " 0x%02x", buf[i]); +- } +- trace_scsi_disk_new_request(lun, tag, line_buffer); ++ line_buffer = g_malloc(len * 5 + 1); + +- g_free(line_buffer); ++ for (i = 0, p = line_buffer; i < len; i++) { ++ p += sprintf(p, " 0x%02x", buf[i]); + } ++ trace_scsi_disk_new_request(lun, tag, line_buffer); ++ ++ g_free(line_buffer); + } + + static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun, +diff --git a/net/eth.c b/net/eth.c +index 041ac4865a..1e0821c5f8 100644 +--- a/net/eth.c ++++ b/net/eth.c +@@ -405,8 +405,6 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, + struct ip6_ext_hdr *ext_hdr, + struct in6_address *dst_addr) + { +-#pragma GCC diagnostic push +-#pragma GCC diagnostic ignored "-Warray-bounds" + struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; + + if ((rthdr->rtype == 2) && +@@ -426,7 +424,7 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, + + return bytes_read == sizeof(*dst_addr); + } +-#pragma GCC diagnostic pop ++ + return false; + } + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index ab1ca6b1bf..1839cc6648 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -1918,7 +1918,7 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) + */ + if (qemu_name) { + strncpy((char *)sysib.ext_names[0], qemu_name, +- sizeof(sysib.ext_names[0])-1); ++ sizeof(sysib.ext_names[0])); + } else { + strcpy((char *)sysib.ext_names[0], "KVMguest"); + } +diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c +index adaf4145e6..58dbc023eb 100644 +--- a/target/s390x/misc_helper.c ++++ b/target/s390x/misc_helper.c +@@ -370,7 +370,7 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1) + MIN(sizeof(sysib.sysib_322.vm[0].name), + strlen(qemu_name))); + strncpy((char *)sysib.sysib_322.ext_names[0], qemu_name, +- sizeof(sysib.sysib_322.ext_names[0])-1); ++ sizeof(sysib.sysib_322.ext_names[0])); + } else { + ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8); + strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest"); +diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc +index fe6bdbf721..26f71cb599 100644 +--- a/tcg/aarch64/tcg-target.c.inc ++++ b/tcg/aarch64/tcg-target.c.inc +@@ -1852,7 +1852,8 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, + static tcg_insn_unit *tb_ret_addr; + + static void tcg_out_op(TCGContext *s, TCGOpcode opc, +- const TCGArg *args, const int *const_args) ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]) + { + /* 99% of the time, we can signal the use of extension registers + by looking to see if the opcode handles 64-bit data. */ +diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c +index bc64b50e66..3f866a35c6 100644 +--- a/tests/test-block-iothread.c ++++ b/tests/test-block-iothread.c +@@ -75,7 +75,7 @@ static BlockDriver bdrv_test = { + + static void test_sync_op_pread(BdrvChild *c) + { +- uint8_t buf[512] = {0}; ++ uint8_t buf[512]; + int ret; + + /* Success */ +@@ -89,7 +89,7 @@ static void test_sync_op_pread(BdrvChild *c) + + static void test_sync_op_pwrite(BdrvChild *c) + { +- uint8_t buf[512] = {0}; ++ uint8_t buf[512]; + int ret; + + /* Success */ +@@ -103,7 +103,7 @@ static void test_sync_op_pwrite(BdrvChild *c) + + static void test_sync_op_blk_pread(BlockBackend *blk) + { +- uint8_t buf[512] = {0}; ++ uint8_t buf[512]; + int ret; + + /* Success */ +@@ -117,7 +117,7 @@ static void test_sync_op_blk_pread(BlockBackend *blk) + + static void test_sync_op_blk_pwrite(BlockBackend *blk) + { +- uint8_t buf[512] = {0}; ++ uint8_t buf[512]; + int ret; + + /* Success */ +@@ -131,7 +131,7 @@ static void test_sync_op_blk_pwrite(BlockBackend *blk) + + static void test_sync_op_load_vmstate(BdrvChild *c) + { +- uint8_t buf[512] = {0}; ++ uint8_t buf[512]; + int ret; + + /* Error: Driver does not support snapshots */ +@@ -141,7 +141,7 @@ static void test_sync_op_load_vmstate(BdrvChild *c) + + static void test_sync_op_save_vmstate(BdrvChild *c) + { +- uint8_t buf[512] = {0}; ++ uint8_t buf[512]; + int ret; + + /* Error: Driver does not support snapshots */ +-- +2.18.4 + diff --git a/0031-s390x-Use-strpadcpy-for-copying-vm-name.patch b/0031-s390x-Use-strpadcpy-for-copying-vm-name.patch new file mode 100644 index 0000000..a7827ee --- /dev/null +++ b/0031-s390x-Use-strpadcpy-for-copying-vm-name.patch @@ -0,0 +1,84 @@ +From adbabd33e81f46c6b29c4b940c053e562e4f55fd Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 15 Jan 2021 09:28:59 +0100 +Subject: s390x: Use strpadcpy for copying vm name + +Using strncpy with length equal to the size of target array, GCC 11 +reports following warning: + + warning: '__builtin_strncpy' specified bound 256 equals destination size [-Wstringop-truncation] + +We can prevent this warning by using strpadcpy that copies string +up to specified length, zeroes target array after copied string +and does not raise warning when length is equal to target array +size (and ending '\0' is discarded). + +Signed-off-by: Miroslav Rezanina +--- + target/s390x/kvm.c | 12 +++++------- + target/s390x/misc_helper.c | 7 +++++-- + 2 files changed, 10 insertions(+), 9 deletions(-) + +diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c +index 1839cc6648..c08b5bc2de 100644 +--- a/target/s390x/kvm.c ++++ b/target/s390x/kvm.c +@@ -29,6 +29,7 @@ + #include "internal.h" + #include "kvm_s390x.h" + #include "sysemu/kvm_int.h" ++#include "qemu/cutils.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "qemu/timer.h" +@@ -1910,18 +1911,15 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) + strlen(qemu_name))); + } + sysib.vm[0].ext_name_encoding = 2; /* 2 = UTF-8 */ +- memset(sysib.ext_names[0], 0, sizeof(sysib.ext_names[0])); + /* If hypervisor specifies zero Extended Name in STSI322 SYSIB, it's + * considered by s390 as not capable of providing any Extended Name. + * Therefore if no name was specified on qemu invocation, we go with the + * same "KVMguest" default, which KVM has filled into short name field. + */ +- if (qemu_name) { +- strncpy((char *)sysib.ext_names[0], qemu_name, +- sizeof(sysib.ext_names[0])); +- } else { +- strcpy((char *)sysib.ext_names[0], "KVMguest"); +- } ++ strpadcpy((char *)sysib.ext_names[0], ++ sizeof(sysib.ext_names[0]), ++ qemu_name ?: "KVMguest", '\0'); ++ + /* Insert UUID */ + memcpy(sysib.vm[0].uuid, &qemu_uuid, sizeof(sysib.vm[0].uuid)); + +diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c +index 58dbc023eb..7ea90d414a 100644 +--- a/target/s390x/misc_helper.c ++++ b/target/s390x/misc_helper.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/cutils.h" + #include "qemu/main-loop.h" + #include "cpu.h" + #include "internal.h" +@@ -369,8 +370,10 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1) + ebcdic_put(sysib.sysib_322.vm[0].name, qemu_name, + MIN(sizeof(sysib.sysib_322.vm[0].name), + strlen(qemu_name))); +- strncpy((char *)sysib.sysib_322.ext_names[0], qemu_name, +- sizeof(sysib.sysib_322.ext_names[0])); ++ strpadcpy((char *)sysib.sysib_322.ext_names[0], ++ sizeof(sysib.sysib_322.ext_names[0]), ++ qemu_name, '\0'); ++ + } else { + ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8); + strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest"); +-- +2.18.4 + diff --git a/0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch b/0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch new file mode 100644 index 0000000..6cd75ce --- /dev/null +++ b/0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch @@ -0,0 +1,138 @@ +From 8773f3688ca87e5e7da2e1a5170d0bde9a54eae0 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 15 Jan 2021 09:38:53 +0100 +Subject: tcg: Restrict tcg_out_op() to arrays of TCG_MAX_OP_ARGS elements + +--- + tcg/aarch64/tcg-target.c.inc | 3 ++- + tcg/i386/tcg-target.c.inc | 6 ++++-- + tcg/ppc/tcg-target.c.inc | 8 +++++--- + tcg/s390/tcg-target.c.inc | 3 ++- + tcg/tcg.c | 19 +++++++++++-------- + 5 files changed, 24 insertions(+), 15 deletions(-) + +diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc +index 26f71cb599..ce8689e889 100644 +--- a/tcg/aarch64/tcg-target.c.inc ++++ b/tcg/aarch64/tcg-target.c.inc +@@ -2271,7 +2271,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, + + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, + unsigned vecl, unsigned vece, +- const TCGArg *args, const int *const_args) ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]) + { + static const AArch64Insn cmp_insn[16] = { + [TCG_COND_EQ] = I3616_CMEQ, +diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc +index d8797ed398..0e557d177a 100644 +--- a/tcg/i386/tcg-target.c.inc ++++ b/tcg/i386/tcg-target.c.inc +@@ -2242,7 +2242,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) + } + + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, +- const TCGArg *args, const int *const_args) ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]) + { + TCGArg a0, a1, a2; + int c, const_a2, vexop, rexw = 0; +@@ -2679,7 +2680,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, + + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, + unsigned vecl, unsigned vece, +- const TCGArg *args, const int *const_args) ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]) + { + static int const add_insn[4] = { + OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ +diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc +index 18ee989f95..b2bc1fc0c4 100644 +--- a/tcg/ppc/tcg-target.c.inc ++++ b/tcg/ppc/tcg-target.c.inc +@@ -2353,8 +2353,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) + tcg_out32(s, BCLR | BO_ALWAYS); + } + +-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, +- const int *const_args) ++static void tcg_out_op(TCGContext *s, TCGOpcode opc, ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]) + { + TCGArg a0, a1, a2; + int c; +@@ -3151,7 +3152,8 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + + static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, + unsigned vecl, unsigned vece, +- const TCGArg *args, const int *const_args) ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]) + { + static const uint32_t + add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, +diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc +index c5e096449b..79753c8af7 100644 +--- a/tcg/s390/tcg-target.c.inc ++++ b/tcg/s390/tcg-target.c.inc +@@ -1746,7 +1746,8 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, + case glue(glue(INDEX_op_,x),_i64) + + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, +- const TCGArg *args, const int *const_args) ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]) + { + S390Opcode op, op2; + TCGArg a0, a1, a2; +diff --git a/tcg/tcg.c b/tcg/tcg.c +index 43c6cf8f52..2d0116d29f 100644 +--- a/tcg/tcg.c ++++ b/tcg/tcg.c +@@ -109,8 +109,9 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, + static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); + static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg); +-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, +- const int *const_args); ++static void tcg_out_op(TCGContext *s, TCGOpcode opc, ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]); + #if TCG_TARGET_MAYBE_vec + static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg src); +@@ -118,9 +119,10 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg base, intptr_t offset); + static void tcg_out_dupi_vec(TCGContext *s, TCGType type, + TCGReg dst, tcg_target_long arg); +-static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, +- unsigned vece, const TCGArg *args, +- const int *const_args); ++static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, ++ unsigned vecl, unsigned vece, ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]); + #else + static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg dst, TCGReg src) +@@ -137,9 +139,10 @@ static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, + { + g_assert_not_reached(); + } +-static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, +- unsigned vece, const TCGArg *args, +- const int *const_args) ++static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, ++ unsigned vecl, unsigned vece, ++ const TCGArg args[TCG_MAX_OP_ARGS], ++ const int const_args[TCG_MAX_OP_ARGS]) + { + g_assert_not_reached(); + } +-- +2.18.4 + diff --git a/0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch b/0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch new file mode 100644 index 0000000..29a1b7e --- /dev/null +++ b/0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch @@ -0,0 +1,52 @@ +From 76ed390a52769c5ca64db5496a2adcb43df72035 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 15 Jan 2021 09:42:33 +0100 +Subject: net/eth: Simplify _eth_get_rss_ex_dst_addr() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The length field is already contained in the ip6_ext_hdr structure. +Check it direcly in eth_parse_ipv6_hdr() before calling +_eth_get_rss_ex_dst_addr(), which gets a bit simplified. + +Signed-off-by: Philippe Mathieu-Daudé +--- + net/eth.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/net/eth.c b/net/eth.c +index 1e0821c5f8..7d4dd48c1f 100644 +--- a/net/eth.c ++++ b/net/eth.c +@@ -407,9 +407,7 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, + { + struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; + +- if ((rthdr->rtype == 2) && +- (rthdr->len == sizeof(struct in6_address) / 8) && +- (rthdr->segleft == 1)) { ++ if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) { + + size_t input_size = iov_size(pkt, pkt_frags); + size_t bytes_read; +@@ -528,10 +526,12 @@ bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, + } + + if (curr_ext_hdr_type == IP6_ROUTING) { +- info->rss_ex_dst_valid = +- _eth_get_rss_ex_dst_addr(pkt, pkt_frags, +- ip6hdr_off + info->full_hdr_len, +- &ext_hdr, &info->rss_ex_dst); ++ if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) { ++ info->rss_ex_dst_valid = ++ _eth_get_rss_ex_dst_addr(pkt, pkt_frags, ++ ip6hdr_off + info->full_hdr_len, ++ &ext_hdr, &info->rss_ex_dst); ++ } + } else if (curr_ext_hdr_type == IP6_DESTINATON) { + info->rss_ex_src_valid = + _eth_get_rss_ex_src_addr(pkt, pkt_frags, +-- +2.18.4 + diff --git a/0034-net-eth-Fix-stack-buffer-overflow-in.patch b/0034-net-eth-Fix-stack-buffer-overflow-in.patch new file mode 100644 index 0000000..5be9d9e --- /dev/null +++ b/0034-net-eth-Fix-stack-buffer-overflow-in.patch @@ -0,0 +1,196 @@ +From 9abf30d739cfe5a7808f1e30ec85c0cfd73b67cb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 15 Jan 2021 09:43:31 +0100 +Subject: net/eth: Fix stack-buffer-overflow in +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +QEMU fuzzer reported a buffer overflow in _eth_get_rss_ex_dst_addr() +reproducible as: + + $ cat << EOF | ./qemu-system-i386 -M pc-q35-5.0 \ + -accel qtest -monitor none \ + -serial none -nographic -qtest stdio + outl 0xcf8 0x80001010 + outl 0xcfc 0xe1020000 + outl 0xcf8 0x80001004 + outw 0xcfc 0x7 + write 0x25 0x1 0x86 + write 0x26 0x1 0xdd + write 0x4f 0x1 0x2b + write 0xe1020030 0x4 0x190002e1 + write 0xe102003a 0x2 0x0807 + write 0xe1020048 0x4 0x12077cdd + write 0xe1020400 0x4 0xba077cdd + write 0xe1020420 0x4 0x190002e1 + write 0xe1020428 0x4 0x3509d807 + write 0xe1020438 0x1 0xe2 + EOF + ================================================================= + ==2859770==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffdef904902 at pc 0x561ceefa78de bp 0x7ffdef904820 sp 0x7ffdef904818 + READ of size 1 at 0x7ffdef904902 thread T0 + #0 0x561ceefa78dd in _eth_get_rss_ex_dst_addr net/eth.c:410:17 + #1 0x561ceefa41fb in eth_parse_ipv6_hdr net/eth.c:532:17 + #2 0x561cef7de639 in net_tx_pkt_parse_headers hw/net/net_tx_pkt.c:228:14 + #3 0x561cef7dbef4 in net_tx_pkt_parse hw/net/net_tx_pkt.c:273:9 + #4 0x561ceec29f22 in e1000e_process_tx_desc hw/net/e1000e_core.c:730:29 + #5 0x561ceec28eac in e1000e_start_xmit hw/net/e1000e_core.c:927:9 + #6 0x561ceec1baab in e1000e_set_tdt hw/net/e1000e_core.c:2444:9 + #7 0x561ceebf300e in e1000e_core_write hw/net/e1000e_core.c:3256:9 + #8 0x561cef3cd4cd in e1000e_mmio_write hw/net/e1000e.c:110:5 + + Address 0x7ffdef904902 is located in stack of thread T0 at offset 34 in frame + #0 0x561ceefa320f in eth_parse_ipv6_hdr net/eth.c:486 + + This frame has 1 object(s): + [32, 34) 'ext_hdr' (line 487) <== Memory access at offset 34 overflows this variable + HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork + (longjmp and C++ exceptions *are* supported) + SUMMARY: AddressSanitizer: stack-buffer-overflow net/eth.c:410:17 in _eth_get_rss_ex_dst_addr + Shadow bytes around the buggy address: + 0x10003df188d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x10003df188e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x10003df188f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x10003df18900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x10003df18910: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 + =>0x10003df18920:[02]f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 + 0x10003df18930: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x10003df18940: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x10003df18950: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x10003df18960: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x10003df18970: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + Shadow byte legend (one shadow byte represents 8 application bytes): + Addressable: 00 + Partially addressable: 01 02 03 04 05 06 07 + Stack left redzone: f1 + Stack right redzone: f3 + ==2859770==ABORTING + +Similarly GCC 11 reports: + + net/eth.c: In function 'eth_parse_ipv6_hdr': + net/eth.c:410:15: error: array subscript 'struct ip6_ext_hdr_routing[0]' is partly outside array bounds of 'struct ip6_ext_hdr[1]' [-Werror=array-bounds] + 410 | if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) { + | ~~~~~^~~~~~~ + net/eth.c:485:24: note: while referencing 'ext_hdr' + 485 | struct ip6_ext_hdr ext_hdr; + | ^~~~~~~ + net/eth.c:410:38: error: array subscript 'struct ip6_ext_hdr_routing[0]' is partly outside array bounds of 'struct ip6_ext_hdr[1]' [-Werror=array-bounds] + 410 | if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) { + | ~~~~~^~~~~~~~~ + net/eth.c:485:24: note: while referencing 'ext_hdr' + 485 | struct ip6_ext_hdr ext_hdr; + | ^~~~~~~ + +In eth_parse_ipv6_hdr() we called iov_to_buf() to fill the 2 bytes of +the 'ext_hdr' buffer, then _eth_get_rss_ex_dst_addr() tries to access +beside the 2 filled bytes. + +Fix by reworking the function, filling the full rt_hdr buffer on the +stack calling iov_to_buf() again. + +Cc: qemu-stable@nongnu.org +Buglink: https://bugs.launchpad.net/qemu/+bug/1879531 +Reported-by: Alexander Bulekov +Reported-by: Miroslav Rezanina +Fixes: eb700029c78 ("net_pkt: Extend packet abstraction as required by e1000e functionality") +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Miroslav Rezanina +--- + net/eth.c | 25 +++++++++++-------------- + tests/qtest/fuzz-test.c | 29 +++++++++++++++++++++++++++++ + 2 files changed, 40 insertions(+), 14 deletions(-) + +diff --git a/net/eth.c b/net/eth.c +index 7d4dd48c1f..ae4db37888 100644 +--- a/net/eth.c ++++ b/net/eth.c +@@ -401,26 +401,23 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type) + + static bool + _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, +- size_t rthdr_offset, ++ size_t ext_hdr_offset, + struct ip6_ext_hdr *ext_hdr, + struct in6_address *dst_addr) + { +- struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; +- +- if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) { +- +- size_t input_size = iov_size(pkt, pkt_frags); +- size_t bytes_read; ++ struct ip6_ext_hdr_routing rt_hdr; ++ size_t input_size = iov_size(pkt, pkt_frags); ++ size_t bytes_read; + +- if (input_size < rthdr_offset + sizeof(*ext_hdr)) { +- return false; +- } ++ if (input_size < ext_hdr_offset + sizeof(rt_hdr)) { ++ return false; ++ } + +- bytes_read = iov_to_buf(pkt, pkt_frags, +- rthdr_offset + sizeof(*ext_hdr), +- dst_addr, sizeof(*dst_addr)); ++ bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset, ++ &rt_hdr, sizeof(rt_hdr)); + +- return bytes_read == sizeof(*dst_addr); ++ if ((rt_hdr.rtype == 2) && (rt_hdr.segleft == 1)) { ++ return bytes_read == sizeof(*ext_hdr) + sizeof(*dst_addr); + } + + return false; +diff --git a/tests/qtest/fuzz-test.c b/tests/qtest/fuzz-test.c +index 9cb4c42bde..2692d556d9 100644 +--- a/tests/qtest/fuzz-test.c ++++ b/tests/qtest/fuzz-test.c +@@ -47,6 +47,32 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) + qtest_outl(s, 0x5d02, 0xebed205d); + } + ++/* ++ * https://bugs.launchpad.net/qemu/+bug/1879531 ++ */ ++static void test_lp1879531_eth_get_rss_ex_dst_addr(void) ++{ ++ QTestState *s; ++ ++ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); ++ ++ qtest_outl(s, 0xcf8 0x80001010); ++ qtest_outl(s, 0xcfc 0xe1020000); ++ qtest_outl(s, 0xcf8 0x80001004); ++ qtest_outw(s, 0xcfc 0x7); ++ qtest_writeb(s, 0x25 0x1 0x86); ++ qtest_writeb(s, 0x26 0x1 0xdd); ++ qtest_writeb(s, 0x4f 0x1 0x2b); ++ qtest_writel(s, 0xe1020030, 0x190002e1); ++ qtest_writew(s, 0xe102003a, 0x0807); ++ qtest_writel(s, 0xe1020048, 0x12077cdd); ++ qtest_writel(s, 0xe1020400, 0xba077cdd); ++ qtest_writel(s, 0xe1020420, 0x190002e1); ++ qtest_writel(s, 0xe1020428, 0x3509d807); ++ qtest_writeb(s, 0xe1020438, 0xe2); ++ qtest_quit(s); ++} ++ + int main(int argc, char **argv) + { + const char *arch = qtest_get_arch(); +@@ -58,6 +84,9 @@ int main(int argc, char **argv) + test_lp1878263_megasas_zero_iov_cnt); + qtest_add_func("fuzz/test_lp1878642_pci_bus_get_irq_level_assert", + test_lp1878642_pci_bus_get_irq_level_assert); ++ qtest_add_func("fuzz/test_lp1879531_eth_get_rss_ex_dst_addr", ++ test_lp1879531_eth_get_rss_ex_dst_addr); ++ + } + + return g_test_run(); +-- +2.18.4 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index ab130d0..9c7c40a 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -64,7 +64,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 2%{?dist} +Release: 2.1%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -124,6 +124,11 @@ Patch0024: 0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch Patch0027: 0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch Patch0028: 0028-GCC-11-warnings-hacks.patch Patch0029: 0029-Disable-problematic-tests-for-initial-build.patch +Patch0030: 0030-Revert-GCC-11-warnings-hacks.patch +Patch0031: 0031-s390x-Use-strpadcpy-for-copying-vm-name.patch +Patch0032: 0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch +Patch0033: 0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch +Patch0034: 0034-net-eth-Fix-stack-buffer-overflow-in.patch BuildRequires: wget BuildRequires: rpm-build @@ -253,14 +258,12 @@ hardware for a full system such as a PC and its associated peripherals. Summary: qemu-kvm core components Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: qemu-img = %{epoch}:%{version}-%{release} - -# Temporary disable edk2 dependency as there's no edk2 available yet -#%ifarch %{ix86} x86_64 -#Requires: edk2-ovmf -#%endif -#%ifarch aarch64 -#Requires: edk2-aarch64 -#%endif +%ifarch %{ix86} x86_64 +Requires: edk2-ovmf +%endif +%ifarch aarch64 +Requires: edk2-aarch64 +%endif %ifarch %{power64} Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} @@ -1306,9 +1309,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog -* Tue Jan 05 2021 Miroslav Rezanina - 5.2.0-2.el9 -- Rebuild for RHEL 9 - * Tue Dec 15 2020 Danilo Cesar Lemes de Paula - 5.2.0-2.el8 - kvm-redhat-Define-hw_compat_8_3.patch [bz#1893935] - kvm-redhat-Add-spapr_machine_rhel_default_class_options.patch [bz#1893935] From b5941f2b18eeb18bc498ea49de5f795aa889fcc5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 21 Jan 2021 12:44:09 +0100 Subject: [PATCH 107/195] Synchronization with qemu-kvm-5.2.0-3.el8 --- ...me-Implement-fake-truncate-coroutine.patch | 84 +++ 0037-build-system-use-b_staticpic-false.patch | 77 +++ ...-overflow-in-spapr_numa_associativit.patch | 131 ++++ ...ci-Fixup-capabilities-ordering-again.patch | 175 +++++ ...ix-Send-CCW-address-on-s390x-with-th.patch | 132 ++++ 0041-AArch64-machine-types-cleanup.patch | 188 +++++ 0042-hw-arm-virt-Add-8.4-Machine-type.patch | 55 ++ ...mory_region_notify_one-to-memory_reg.patch | 146 ++++ 0045-memory-Add-IOMMUTLBEvent.patch | 647 ++++++++++++++++++ ..._NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch | 88 +++ ...-page-walking-on-device-iotlb-invali.patch | 57 ++ ...range-assertion-if-notifier-is-DEVIO.patch | 69 ++ 0049-RHEL-Switch-pvpanic-test-to-q35.patch | 47 ++ 0050-8.4-x86-machine-type.patch | 144 ++++ ...hed-translation-in-case-it-points-to.patch | 153 +++++ qemu-kvm.spec | 62 +- udev-kvm-check.c | 19 +- 17 files changed, 2252 insertions(+), 22 deletions(-) create mode 100644 0035-block-nvme-Implement-fake-truncate-coroutine.patch create mode 100644 0037-build-system-use-b_staticpic-false.patch create mode 100644 0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch create mode 100644 0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch create mode 100644 0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch create mode 100644 0041-AArch64-machine-types-cleanup.patch create mode 100644 0042-hw-arm-virt-Add-8.4-Machine-type.patch create mode 100644 0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch create mode 100644 0045-memory-Add-IOMMUTLBEvent.patch create mode 100644 0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch create mode 100644 0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch create mode 100644 0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch create mode 100644 0049-RHEL-Switch-pvpanic-test-to-q35.patch create mode 100644 0050-8.4-x86-machine-type.patch create mode 100644 0051-memory-clamp-cached-translation-in-case-it-points-to.patch diff --git a/0035-block-nvme-Implement-fake-truncate-coroutine.patch b/0035-block-nvme-Implement-fake-truncate-coroutine.patch new file mode 100644 index 0000000..237e9e1 --- /dev/null +++ b/0035-block-nvme-Implement-fake-truncate-coroutine.patch @@ -0,0 +1,84 @@ +From f4c65e14055e208e331a83b9340998ecbe796b5f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 1 Jan 2021 17:18:13 -0500 +Subject: block/nvme: Implement fake truncate() coroutine +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Philippe Mathieu-Daudé +Message-id: <20210101171813.1734014-2-philmd@redhat.com> +Patchwork-id: 100503 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] block/nvme: Implement fake truncate() coroutine +Bugzilla: 1848834 +RH-Acked-by: Thomas Huth +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +NVMe drive cannot be shrunk. + +Since commit c80d8b06cfa we can use the @exact parameter (set +to false) to return success if the block device is larger than +the requested offset (even if we can not be shrunk). + +Use this parameter to implement the NVMe truncate() coroutine, +similarly how it is done for the iscsi and file-posix drivers +(see commit 82325ae5f2f "Evaluate @exact in protocol drivers"). + +Reported-by: Xueqiang Wei +Suggested-by: Max Reitz +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20201210125202.858656-1-philmd@redhat.com> +Signed-off-by: Max Reitz +(cherry picked from commit c8807c5edcc8bd8917a5b7531d47ef6a99e07bd8) +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Danilo C. L. de Paula +--- + block/nvme.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/block/nvme.c b/block/nvme.c +index a06a188d53..5a6fbacf4a 100644 +--- a/block/nvme.c ++++ b/block/nvme.c +@@ -1389,6 +1389,29 @@ out: + + } + ++static int coroutine_fn nvme_co_truncate(BlockDriverState *bs, int64_t offset, ++ bool exact, PreallocMode prealloc, ++ BdrvRequestFlags flags, Error **errp) ++{ ++ int64_t cur_length; ++ ++ if (prealloc != PREALLOC_MODE_OFF) { ++ error_setg(errp, "Unsupported preallocation mode '%s'", ++ PreallocMode_str(prealloc)); ++ return -ENOTSUP; ++ } ++ ++ cur_length = nvme_getlength(bs); ++ if (offset != cur_length && exact) { ++ error_setg(errp, "Cannot resize NVMe devices"); ++ return -ENOTSUP; ++ } else if (offset > cur_length) { ++ error_setg(errp, "Cannot grow NVMe devices"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} + + static int nvme_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) +@@ -1523,6 +1546,7 @@ static BlockDriver bdrv_nvme = { + .bdrv_close = nvme_close, + .bdrv_getlength = nvme_getlength, + .bdrv_probe_blocksizes = nvme_probe_blocksizes, ++ .bdrv_co_truncate = nvme_co_truncate, + + .bdrv_co_preadv = nvme_co_preadv, + .bdrv_co_pwritev = nvme_co_pwritev, +-- +2.18.4 + diff --git a/0037-build-system-use-b_staticpic-false.patch b/0037-build-system-use-b_staticpic-false.patch new file mode 100644 index 0000000..b47336b --- /dev/null +++ b/0037-build-system-use-b_staticpic-false.patch @@ -0,0 +1,77 @@ +From 50b575b27b9daa331da08d10dbe6524de0580833 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 16 Dec 2020 17:53:08 -0500 +Subject: build-system: use b_staticpic=false +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20201216175308.1463822-3-pbonzini@redhat.com> +Patchwork-id: 100484 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] build-system: use b_staticpic=false +Bugzilla: 1899619 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Danilo de Paula + +Meson 0.56.0 correctly builds non-PIC static libraries with -fPIE if +b_pie=true, while Meson 0.55.3 has a bug that causes the library +to use non-PIE objects and fail to link. Therefore, upstream +QEMU looks at the meson version in order to decide between +b_staticpic=false and b_staticpic=$pie. + +Unfortunately, b_staticpic=$pie still has a negative effect +on performance when you QEMU is compiled with --enable-pie +like RHEL does. Therefore, we have backported the fix +to Meson 0.55.3-3.el8. We can require it and unconditionally +use b_staticpic=false. + +The patch is RHEL-specific, but a similar change is included +in the larger patch for "meson: switch minimum meson version to +0.56.0". + +Signed-off-by: Paolo Bonzini +Signed-off-by: Danilo C. L. de Paula +--- + configure | 5 ----- + meson.build | 4 ++-- + redhat/qemu-kvm.spec.template | 2 +- + 3 files changed, 3 insertions(+), 8 deletions(-) + +diff --git a/configure b/configure +index 18c26e0389..d60097c0d4 100755 +--- a/configure ++++ b/configure +@@ -6979,10 +6979,6 @@ fi + mv $cross config-meson.cross + + rm -rf meson-private meson-info meson-logs +-unset staticpic +-if ! version_ge "$($meson --version)" 0.56.0; then +- staticpic=$(if test "$pie" = yes; then echo true; else echo false; fi) +-fi + NINJA=$ninja $meson setup \ + --prefix "$prefix" \ + --libdir "$libdir" \ +@@ -7002,7 +6998,6 @@ NINJA=$ninja $meson setup \ + -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \ + -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \ + -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \ +- ${staticpic:+-Db_staticpic=$staticpic} \ + -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \ + -Dmalloc=$malloc -Dmalloc_trim=$malloc_trim -Dsparse=$sparse \ + -Dkvm=$kvm -Dhax=$hax -Dwhpx=$whpx -Dhvf=$hvf \ +diff --git a/meson.build b/meson.build +index 8c38b2ea36..c482d075d5 100644 +--- a/meson.build ++++ b/meson.build +@@ -1,6 +1,6 @@ + project('qemu', ['c'], meson_version: '>=0.55.0', +- default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto'] + +- (meson.version().version_compare('>=0.56.0') ? [ 'b_staticpic=false' ] : []), ++ default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto', ++ 'b_staticpic=false' ], + version: run_command('head', meson.source_root() / 'VERSION').stdout().strip()) + + not_found = dependency('', required: false) diff --git a/0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch b/0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch new file mode 100644 index 0000000..5c7f88d --- /dev/null +++ b/0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch @@ -0,0 +1,131 @@ +From d66ae008007853df7d3a24bd2d5e7494f53f007c Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 7 Jan 2021 10:10:20 -0500 +Subject: spapr: Fix buffer overflow in spapr_numa_associativity_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +Message-id: <20210107101020.579456-2-gkurz@redhat.com> +Patchwork-id: 100515 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] spapr: Fix buffer overflow in spapr_numa_associativity_init() +Bugzilla: 1908693 +RH-Acked-by: David Gibson +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier + +From: Greg Kurz + +Running a guest with 128 NUMA nodes crashes QEMU: + +../../util/error.c:59: error_setv: Assertion `*errp == NULL' failed. + +The crash happens when setting the FWNMI migration blocker: + +2861 if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_ON) { +2862 /* Create the error string for live migration blocker */ +2863 error_setg(&spapr->fwnmi_migration_blocker, +2864 "A machine check is being handled during migration. The handler" +2865 "may run and log hardware error on the destination"); +2866 } + +Inspection reveals that papr->fwnmi_migration_blocker isn't NULL: + +(gdb) p spapr->fwnmi_migration_blocker +$1 = (Error *) 0x8000000004000000 + +Since this is the only place where papr->fwnmi_migration_blocker is +set, this means someone wrote there in our back. Further analysis +points to spapr_numa_associativity_init(), especially the part +that initializes the associative arrays for NVLink GPUs: + + max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM; + +ie. max_nodes_with_gpus = 128 + 6, but the array isn't sized to +accommodate the 6 extra nodes: + +struct SpaprMachineState { + . + . + . + uint32_t numa_assoc_array[MAX_NODES][NUMA_ASSOC_SIZE]; + + Error *fwnmi_migration_blocker; +}; + +and the following loops happily overwrite spapr->fwnmi_migration_blocker, +and probably more: + + for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) { + spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); + + for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) { + uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ? + SPAPR_GPU_NUMA_ID : cpu_to_be32(i); + spapr->numa_assoc_array[i][j] = gpu_assoc; + } + + spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); + } + +Fix the size of the array. This requires "hw/ppc/spapr.h" to see +NVGPU_MAX_NUM. Including "hw/pci-host/spapr.h" introduces a +circular dependency that breaks the build, so this moves the +definition of NVGPU_MAX_NUM to "hw/ppc/spapr.h" instead. + +Reported-by: Min Deng +BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1908693 +Fixes: dd7e1d7ae431 ("spapr_numa: move NVLink2 associativity handling to spapr_numa.c") +Cc: danielhb413@gmail.com +Signed-off-by: Greg Kurz +Message-Id: <160829960428.734871.12634150161215429514.stgit@bahia.lan> +Reviewed-by: Daniel Henrique Barboza +Signed-off-by: David Gibson +(cherry picked from commit 30499fdd9883026e106d74e8199e2f1311fd4011) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/pci-host/spapr.h | 2 -- + include/hw/ppc/spapr.h | 5 ++++- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h +index 4f58f0223b..bd014823a9 100644 +--- a/include/hw/pci-host/spapr.h ++++ b/include/hw/pci-host/spapr.h +@@ -115,8 +115,6 @@ struct SpaprPhbState { + #define SPAPR_PCI_NV2RAM64_WIN_BASE SPAPR_PCI_LIMIT + #define SPAPR_PCI_NV2RAM64_WIN_SIZE (2 * TiB) /* For up to 6 GPUs 256GB each */ + +-/* Max number of these GPUsper a physical box */ +-#define NVGPU_MAX_NUM 6 + /* Max number of NVLinks per GPU in any physical box */ + #define NVGPU_MAX_LINKS 3 + +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index ba2d81404b..28bbf07f8f 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -112,6 +112,9 @@ typedef enum { + #define NUMA_ASSOC_SIZE (MAX_DISTANCE_REF_POINTS + 1) + #define VCPU_ASSOC_SIZE (NUMA_ASSOC_SIZE + 1) + ++/* Max number of these GPUsper a physical box */ ++#define NVGPU_MAX_NUM 6 ++ + typedef struct SpaprCapabilities SpaprCapabilities; + struct SpaprCapabilities { + uint8_t caps[SPAPR_CAP_NUM]; +@@ -243,7 +246,7 @@ struct SpaprMachineState { + unsigned gpu_numa_id; + SpaprTpmProxy *tpm_proxy; + +- uint32_t numa_assoc_array[MAX_NODES][NUMA_ASSOC_SIZE]; ++ uint32_t numa_assoc_array[MAX_NODES + NVGPU_MAX_NUM][NUMA_ASSOC_SIZE]; + + Error *fwnmi_migration_blocker; + }; +-- +2.18.4 + diff --git a/0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch b/0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch new file mode 100644 index 0000000..01edcf3 --- /dev/null +++ b/0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch @@ -0,0 +1,175 @@ +From e85ee5f0196b85ad6f9faa02571325831b612c37 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 7 Jan 2021 14:12:25 -0500 +Subject: usb/hcd-xhci-pci: Fixup capabilities ordering (again) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20210107141225.19709-2-dgilbert@redhat.com> +Patchwork-id: 100518 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] usb/hcd-xhci-pci: Fixup capabilities ordering (again) +Bugzilla: 1912846 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Gerd Hoffmann + +From: "Dr. David Alan Gilbert" + +Allow the reordering of the PCIe capabilities for MSI around the PCIe +capability. +This changed incompatibly way back in QEMU 2.7 and in RHEL we fixed +it up in bz 1447874 unconditionally putting it back. + +The xhci code got reorganised between 5.0 and 5.2; and we lost this +fixup on rebase. + +This time, add it as a property, and enable the property for old +machine types; this will allow us to drop this patch once the +old machine types go. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/machine.c | 4 ++- + hw/usb/hcd-xhci-pci.c | 59 +++++++++++++++++++++++++++++++++---------- + hw/usb/hcd-xhci-pci.h | 1 + + 3 files changed, 49 insertions(+), 15 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index aba05ad676..68495b9411 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -29,7 +29,7 @@ + #include "migration/vmstate.h" + + /* +- * The same as hw_compat_5_1 ++ * Mostly the same as hw_compat_5_1 + */ + GlobalProperty hw_compat_rhel_8_3[] = { + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ +@@ -46,6 +46,8 @@ GlobalProperty hw_compat_rhel_8_3[] = { + { "nvme", "use-intel-id", "on"}, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ + { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 bz 1912846 */ ++ { "pci-xhci", "x-rh-late-msi-cap", "off" }, + }; + const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); + +diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c +index bba628d3d2..d045a2a8be 100644 +--- a/hw/usb/hcd-xhci-pci.c ++++ b/hw/usb/hcd-xhci-pci.c +@@ -101,6 +101,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) + return 0; + } + ++/* RH bz 1912846 */ ++static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp) ++{ ++ int ret; ++ Error *err = NULL; ++ XHCIPciState *s = XHCI_PCI(dev); ++ ++ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); ++ /* ++ * Any error other than -ENOTSUP(board's MSI support is broken) ++ * is a programming error ++ */ ++ assert(!ret || ret == -ENOTSUP); ++ if (ret && s->msi == ON_OFF_AUTO_ON) { ++ /* Can't satisfy user's explicit msi=on request, fail */ ++ error_append_hint(&err, "You have to use msi=auto (default) or " ++ "msi=off with this machine type.\n"); ++ error_propagate(errp, err); ++ return true; ++ } ++ assert(!err || s->msi == ON_OFF_AUTO_AUTO); ++ /* With msi=auto, we fall back to MSI off silently */ ++ error_free(err); ++ ++ return false; ++} ++ + static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + { + int ret; +@@ -124,23 +151,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + s->xhci.nec_quirks = true; + } + +- if (s->msi != ON_OFF_AUTO_OFF) { +- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); +- /* +- * Any error other than -ENOTSUP(board's MSI support is broken) +- * is a programming error +- */ +- assert(!ret || ret == -ENOTSUP); +- if (ret && s->msi == ON_OFF_AUTO_ON) { +- /* Can't satisfy user's explicit msi=on request, fail */ +- error_append_hint(&err, "You have to use msi=auto (default) or " +- "msi=off with this machine type.\n"); ++ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) { ++ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { + error_propagate(errp, err); + return; + } +- assert(!err || s->msi == ON_OFF_AUTO_AUTO); +- /* With msi=auto, we fall back to MSI off silently */ +- error_free(err); + } + pci_register_bar(dev, 0, + PCI_BASE_ADDRESS_SPACE_MEMORY | +@@ -153,6 +169,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + assert(ret > 0); + } + ++ /* RH bz 1912846 */ ++ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) { ++ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { ++ error_propagate(errp, err); ++ return; ++ } ++ } + if (s->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, s->xhci.numintrs, +@@ -197,11 +221,18 @@ static void xhci_instance_init(Object *obj) + qdev_alias_all_properties(DEVICE(&s->xhci), obj); + } + ++static Property xhci_pci_properties[] = { ++ /* RH bz 1912846 */ ++ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void xhci_class_init(ObjectClass *klass, void *data) + { + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + ++ device_class_set_props(dc, xhci_pci_properties); + dc->reset = xhci_pci_reset; + dc->vmsd = &vmstate_xhci_pci; + set_bit(DEVICE_CATEGORY_USB, dc->categories); +diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h +index c193f79443..086a1feb1e 100644 +--- a/hw/usb/hcd-xhci-pci.h ++++ b/hw/usb/hcd-xhci-pci.h +@@ -39,6 +39,7 @@ typedef struct XHCIPciState { + XHCIState xhci; + OnOffAuto msi; + OnOffAuto msix; ++ bool rh_late_msi_cap; /* bz 1912846 */ + } XHCIPciState; + + #endif +-- +2.18.4 + diff --git a/0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch b/0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch new file mode 100644 index 0000000..cd881aa --- /dev/null +++ b/0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch @@ -0,0 +1,132 @@ +From 0e1bc444240fb2d8d3ee65533baaa72a7267c53a Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jan 2021 12:27:19 -0500 +Subject: qga/commands-posix: Send CCW address on s390x with the fsinfo data +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +Message-id: <20210108122719.73201-2-thuth@redhat.com> +Patchwork-id: 100532 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 1/1] qga/commands-posix: Send CCW address on s390x with the fsinfo data +Bugzilla: 1755075 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +We need the CCW address on the libvirt side to correctly identify +the disk, so add this information to the GuestDiskAddress on s390x. + +Signed-off-by: Thomas Huth +Reviewed-by: Cornelia Huck +Reviewed-by: Michael Roth +Message-Id: <20201127082353.448251-1-thuth@redhat.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 5b723a5d8df44b69b8ba350e643059c8fd889315) +Signed-off-by: Danilo C. L. de Paula +--- + qga/commands-posix.c | 34 ++++++++++++++++++++++++++++++++++ + qga/qapi-schema.json | 20 +++++++++++++++++++- + 2 files changed, 53 insertions(+), 1 deletion(-) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index c089e38120..5aa5eff84f 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -1029,6 +1029,38 @@ static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath, + return true; + } + ++/* ++ * Store disk device info for CCW devices (s390x channel I/O devices). ++ * Returns true if information has been stored, or false for failure. ++ */ ++static bool build_guest_fsinfo_for_ccw_dev(char const *syspath, ++ GuestDiskAddress *disk, ++ Error **errp) ++{ ++ unsigned int cssid, ssid, subchno, devno; ++ char *p; ++ ++ p = strstr(syspath, "/devices/css"); ++ if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/", ++ &cssid, &ssid, &subchno, &devno) < 4) { ++ g_debug("could not parse ccw device sysfs path: %s", syspath); ++ return false; ++ } ++ ++ disk->has_ccw_address = true; ++ disk->ccw_address = g_new0(GuestCCWAddress, 1); ++ disk->ccw_address->cssid = cssid; ++ disk->ccw_address->ssid = ssid; ++ disk->ccw_address->subchno = subchno; ++ disk->ccw_address->devno = devno; ++ ++ if (strstr(p, "/virtio")) { ++ build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp); ++ } ++ ++ return true; ++} ++ + /* Store disk device info specified by @sysfs into @fs */ + static void build_guest_fsinfo_for_real_device(char const *syspath, + GuestFilesystemInfo *fs, +@@ -1081,6 +1113,8 @@ static void build_guest_fsinfo_for_real_device(char const *syspath, + + if (strstr(syspath, "/devices/pci")) { + has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp); ++ } else if (strstr(syspath, "/devices/css")) { ++ has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp); + } else if (strstr(syspath, "/virtio")) { + has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp); + } else { +diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json +index 3b3d1d0bd9..9a82b7e952 100644 +--- a/qga/qapi-schema.json ++++ b/qga/qapi-schema.json +@@ -846,6 +846,22 @@ + 'data': {'domain': 'int', 'bus': 'int', + 'slot': 'int', 'function': 'int'} } + ++## ++# @GuestCCWAddress: ++# ++# @cssid: channel subsystem image id ++# @ssid: subchannel set id ++# @subchno: subchannel number ++# @devno: device number ++# ++# Since: 6.0 ++## ++{ 'struct': 'GuestCCWAddress', ++ 'data': {'cssid': 'int', ++ 'ssid': 'int', ++ 'subchno': 'int', ++ 'devno': 'int'} } ++ + ## + # @GuestDiskAddress: + # +@@ -856,6 +872,7 @@ + # @unit: unit id + # @serial: serial number (since: 3.1) + # @dev: device node (POSIX) or device UNC (Windows) (since: 3.1) ++# @ccw-address: CCW address on s390x (since: 6.0) + # + # Since: 2.2 + ## +@@ -863,7 +880,8 @@ + 'data': {'pci-controller': 'GuestPCIAddress', + 'bus-type': 'GuestDiskBusType', + 'bus': 'int', 'target': 'int', 'unit': 'int', +- '*serial': 'str', '*dev': 'str'} } ++ '*serial': 'str', '*dev': 'str', ++ '*ccw-address': 'GuestCCWAddress'} } + + ## + # @GuestDiskInfo: +-- +2.18.4 + diff --git a/0041-AArch64-machine-types-cleanup.patch b/0041-AArch64-machine-types-cleanup.patch new file mode 100644 index 0000000..0ac8f70 --- /dev/null +++ b/0041-AArch64-machine-types-cleanup.patch @@ -0,0 +1,188 @@ +From bfa3dc6e290c7b4f7f8825e4d4320ba062ed445a Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Sat, 9 Jan 2021 22:19:27 -0500 +Subject: AArch64 machine types cleanup + +RH-Author: Andrew Jones +Message-id: <20210109221928.31407-2-drjones@redhat.com> +Patchwork-id: 100547 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 1/2] AArch64 machine types cleanup +Bugzilla: 1895276 +RH-Acked-by: Gavin Shan +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Auger Eric +RH-Acked-by: Thomas Huth + +No functional change here, just a reduction of downstream-only +changes and whitespace differences. Also the removal of a nested +'#if 0 /* disabled for RHEL */' block. + +Signed-off-by: Andrew Jones +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 69 +++++++++++++++++++++++---------------------------- + 1 file changed, 31 insertions(+), 38 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 21e0485ac5..530072fce0 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -123,7 +123,6 @@ + static const TypeInfo rhel##m##n##s##_machvirt_info = { \ + .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ + .parent = TYPE_RHEL_MACHINE, \ +- .instance_init = rhel##m##n##s##_virt_instance_init, \ + .class_init = rhel##m##n##s##_virt_class_init, \ + }; \ + static void rhel##m##n##s##_machvirt_init(void) \ +@@ -2098,8 +2097,8 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + + vms->virt = value; + } +- + #endif /* disabled for RHEL */ ++ + static bool virt_get_highmem(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2167,14 +2166,13 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) + + vms->ras = value; + } +-#if 0 /* Disabled for Red Hat Enterprise Linux */ ++ + static bool virt_get_mte(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); + + return vms->mte; + } +-#endif /* disabled for RHEL */ + + static void virt_set_mte(Object *obj, bool value, Error **errp) + { +@@ -2182,7 +2180,8 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) + + vms->mte = value; + } +-#endif ++#endif /* disabled for RHEL */ ++ + static char *virt_get_gic_version(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2818,25 +2817,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Enable ACPI"); + } + +-static const TypeInfo rhel_machine_info = { +- .name = TYPE_RHEL_MACHINE, +- .parent = TYPE_MACHINE, +- .abstract = true, +- .instance_size = sizeof(VirtMachineState), +- .class_size = sizeof(VirtMachineClass), +- .class_init = rhel_machine_class_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_HOTPLUG_HANDLER }, +- { } +- }, +-}; +- +-static void rhel_machine_init(void) +-{ +- type_register_static(&rhel_machine_info); +-} +-type_init(rhel_machine_init); +- + static void rhel_virt_instance_init(Object *obj) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2844,22 +2824,23 @@ static void rhel_virt_instance_init(Object *obj) + + /* EL3 is disabled by default and non-configurable for RHEL */ + vms->secure = false; ++ + /* EL2 is disabled by default and non-configurable for RHEL */ + vms->virt = false; +- /* High memory is enabled by default for RHEL */ ++ ++ /* High memory is enabled by default */ + vms->highmem = true; + object_property_add_bool(obj, "highmem", virt_get_highmem, + virt_set_highmem); + object_property_set_description(obj, "highmem", + "Set on/off to enable/disable using " + "physical address space above 32 bits"); +- + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + object_property_add_str(obj, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_property_set_description(obj, "gic-version", + "Set GIC version. " +- "Valid values are 2, 3 and host"); ++ "Valid values are 2, 3, host and max"); + + vms->highmem_ecam = !vmc->no_highmem_ecam; + +@@ -2882,18 +2863,36 @@ static void rhel_virt_instance_init(Object *obj) + "Set the IOMMU type. " + "Valid values are none and smmuv3"); + ++ /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; +- /* MTE is disabled by default. */ ++ ++ /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + +- vms->irqmap=a15irqmap; ++ vms->irqmap = a15irqmap; ++ + virt_flash_create(vms); + } + +-static void rhel830_virt_instance_init(Object *obj) ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .instance_init = rhel_virt_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) + { +- rhel_virt_instance_init(obj); ++ type_register_static(&rhel_machine_info); + } ++type_init(rhel_machine_init); + + static void rhel830_virt_options(MachineClass *mc) + { +@@ -2901,16 +2900,10 @@ static void rhel830_virt_options(MachineClass *mc) + } + DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) + +-static void rhel820_virt_instance_init(Object *obj) +-{ +- rhel_virt_instance_init(obj); +-} +- + static void rhel820_virt_options(MachineClass *mc) + { + rhel830_virt_options(mc); +- compat_props_add(mc->compat_props, hw_compat_rhel_8_2, +- hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); + mc->numa_mem_supported = true; + mc->auto_enable_numa_with_memdev = false; + } +-- +2.18.4 + diff --git a/0042-hw-arm-virt-Add-8.4-Machine-type.patch b/0042-hw-arm-virt-Add-8.4-Machine-type.patch new file mode 100644 index 0000000..89753c9 --- /dev/null +++ b/0042-hw-arm-virt-Add-8.4-Machine-type.patch @@ -0,0 +1,55 @@ +From 1bc68127d1531ed519cb839844febaecb2a3f6d0 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Sat, 9 Jan 2021 22:19:28 -0500 +Subject: hw/arm/virt: Add 8.4 Machine type + +RH-Author: Andrew Jones +Message-id: <20210109221928.31407-3-drjones@redhat.com> +Patchwork-id: 100548 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 2/2] hw/arm/virt: Add 8.4 Machine type +Bugzilla: 1895276 +RH-Acked-by: Gavin Shan +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Auger Eric +RH-Acked-by: Thomas Huth + +8.4 isn't much different than 8.3, except it adds the steal-time +feature and enables it by default. + +Signed-off-by: Andrew Jones +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/virt.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 530072fce0..208c360342 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2894,11 +2894,21 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + +-static void rhel830_virt_options(MachineClass *mc) ++static void rhel840_virt_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 4, 0) ++ ++static void rhel830_virt_options(MachineClass *mc) ++{ ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ ++ rhel840_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ vmc->no_kvm_steal_time = true; ++} ++DEFINE_RHEL_MACHINE(8, 3, 0) + + static void rhel820_virt_options(MachineClass *mc) + { +-- +2.18.4 + diff --git a/0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch b/0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch new file mode 100644 index 0000000..419535a --- /dev/null +++ b/0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch @@ -0,0 +1,146 @@ +From 256180b78107813b8e8c292bc799f5d7c7676cd2 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Mon, 11 Jan 2021 14:36:11 -0500 +Subject: memory: Rename memory_region_notify_one to + memory_region_notify_iommu_one +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210111143615.303645-2-eperezma@redhat.com> +Patchwork-id: 100570 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/5] memory: Rename memory_region_notify_one to memory_region_notify_iommu_one +Bugzilla: 1845758 +RH-Acked-by: Xiao Wang +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +Previous name didn't reflect the iommu operation. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Peter Xu +Reviewed-by: David Gibson +Reviewed-by: Juan Quintela +Reviewed-by: Eric Auger +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 3b5ebf8532afdc1518bd8b0961ed802bc3f5f07c) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 2 +- + hw/arm/smmuv3.c | 2 +- + hw/i386/intel_iommu.c | 4 ++-- + include/exec/memory.h | 6 +++--- + softmmu/memory.c | 6 +++--- + 5 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 3838db1395..88d2c454f0 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -472,7 +472,7 @@ static void smmu_unmap_notifier_range(IOMMUNotifier *n) + entry.perm = IOMMU_NONE; + entry.addr_mask = n->end - n->start; + +- memory_region_notify_one(n, &entry); ++ memory_region_notify_iommu_one(n, &entry); + } + + /* Unmap all notifiers attached to @mr */ +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 22607c3784..273f5f7dce 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -828,7 +828,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + entry.addr_mask = num_pages * (1 << granule) - 1; + entry.perm = IOMMU_NONE; + +- memory_region_notify_one(n, &entry); ++ memory_region_notify_iommu_one(n, &entry); + } + + /* invalidate an asid/iova range tuple in all mr's */ +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 70ac837733..067593b9e4 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -3497,7 +3497,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) + /* This field is meaningless for unmap */ + entry.translated_addr = 0; + +- memory_region_notify_one(n, &entry); ++ memory_region_notify_iommu_one(n, &entry); + + start += mask; + remain -= mask; +@@ -3535,7 +3535,7 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s) + + static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) + { +- memory_region_notify_one((IOMMUNotifier *)private, entry); ++ memory_region_notify_iommu_one((IOMMUNotifier *)private, entry); + return 0; + } + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 0f3e6bcd5e..d8456ccf52 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -236,7 +236,7 @@ enum IOMMUMemoryRegionAttr { + * The IOMMU implementation must use the IOMMU notifier infrastructure + * to report whenever mappings are changed, by calling + * memory_region_notify_iommu() (or, if necessary, by calling +- * memory_region_notify_one() for each registered notifier). ++ * memory_region_notify_iommu_one() for each registered notifier). + * + * Conceptually an IOMMU provides a mapping from input address + * to an output TLB entry. If the IOMMU is aware of memory transaction +@@ -1346,7 +1346,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + IOMMUTLBEntry entry); + + /** +- * memory_region_notify_one: notify a change in an IOMMU translation ++ * memory_region_notify_iommu_one: notify a change in an IOMMU translation + * entry to a single notifier + * + * This works just like memory_region_notify_iommu(), but it only +@@ -1357,7 +1357,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + * replaces all old entries for the same virtual I/O address range. + * Deleted entries have .@perm == 0. + */ +-void memory_region_notify_one(IOMMUNotifier *notifier, ++void memory_region_notify_iommu_one(IOMMUNotifier *notifier, + IOMMUTLBEntry *entry); + + /** +diff --git a/softmmu/memory.c b/softmmu/memory.c +index 11ca94d037..44de610c72 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -1942,8 +1942,8 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, + memory_region_update_iommu_notify_flags(iommu_mr, NULL); + } + +-void memory_region_notify_one(IOMMUNotifier *notifier, +- IOMMUTLBEntry *entry) ++void memory_region_notify_iommu_one(IOMMUNotifier *notifier, ++ IOMMUTLBEntry *entry) + { + IOMMUNotifierFlag request_flags; + hwaddr entry_end = entry->iova + entry->addr_mask; +@@ -1979,7 +1979,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + + IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { + if (iommu_notifier->iommu_idx == iommu_idx) { +- memory_region_notify_one(iommu_notifier, &entry); ++ memory_region_notify_iommu_one(iommu_notifier, &entry); + } + } + } +-- +2.18.4 + diff --git a/0045-memory-Add-IOMMUTLBEvent.patch b/0045-memory-Add-IOMMUTLBEvent.patch new file mode 100644 index 0000000..0cc568b --- /dev/null +++ b/0045-memory-Add-IOMMUTLBEvent.patch @@ -0,0 +1,647 @@ +From d282fdd88e60aa081365d8e0903ceb18743ccc9d Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Mon, 11 Jan 2021 14:36:12 -0500 +Subject: memory: Add IOMMUTLBEvent +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210111143615.303645-3-eperezma@redhat.com> +Patchwork-id: 100568 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/5] memory: Add IOMMUTLBEvent +Bugzilla: 1845758 +RH-Acked-by: Xiao Wang +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +This way we can tell between regular IOMMUTLBEntry (entry of IOMMU +hardware) and notifications. + +In the notifications, we set explicitly if it is a MAPs or an UNMAP, +instead of trusting in entry permissions to differentiate them. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-3-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Matthew Rosato +Acked-by: David Gibson +(cherry picked from commit 5039caf3c449c49e625d34e134463260cf8e00e0) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/arm/smmu-common.c | 13 +++--- + hw/arm/smmuv3.c | 13 +++--- + hw/i386/intel_iommu.c | 88 ++++++++++++++++++++++------------------ + hw/misc/tz-mpc.c | 32 ++++++++------- + hw/ppc/spapr_iommu.c | 15 +++---- + hw/s390x/s390-pci-inst.c | 27 +++++++----- + hw/virtio/virtio-iommu.c | 30 +++++++------- + include/exec/memory.h | 27 ++++++------ + softmmu/memory.c | 20 ++++----- + 9 files changed, 143 insertions(+), 122 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 88d2c454f0..405d5c5325 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -465,14 +465,15 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) + /* Unmap the whole notifier's range */ + static void smmu_unmap_notifier_range(IOMMUNotifier *n) + { +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + +- entry.target_as = &address_space_memory; +- entry.iova = n->start; +- entry.perm = IOMMU_NONE; +- entry.addr_mask = n->end - n->start; ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.target_as = &address_space_memory; ++ event.entry.iova = n->start; ++ event.entry.perm = IOMMU_NONE; ++ event.entry.addr_mask = n->end - n->start; + +- memory_region_notify_iommu_one(n, &entry); ++ memory_region_notify_iommu_one(n, &event); + } + + /* Unmap all notifiers attached to @mr */ +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 273f5f7dce..bbca0e9f20 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -800,7 +800,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + uint8_t tg, uint64_t num_pages) + { + SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + uint8_t granule = tg; + + if (!tg) { +@@ -823,12 +823,13 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + granule = tt->granule_sz; + } + +- entry.target_as = &address_space_memory; +- entry.iova = iova; +- entry.addr_mask = num_pages * (1 << granule) - 1; +- entry.perm = IOMMU_NONE; ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.target_as = &address_space_memory; ++ event.entry.iova = iova; ++ event.entry.addr_mask = num_pages * (1 << granule) - 1; ++ event.entry.perm = IOMMU_NONE; + +- memory_region_notify_iommu_one(n, &entry); ++ memory_region_notify_iommu_one(n, &event); + } + + /* invalidate an asid/iova range tuple in all mr's */ +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 067593b9e4..56180b1c43 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -1073,7 +1073,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, + } + } + +-typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); ++typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private); + + /** + * Constant information used during page walking +@@ -1094,11 +1094,12 @@ typedef struct { + uint16_t domain_id; + } vtd_page_walk_info; + +-static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) ++static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) + { + VTDAddressSpace *as = info->as; + vtd_page_walk_hook hook_fn = info->hook_fn; + void *private = info->private; ++ IOMMUTLBEntry *entry = &event->entry; + DMAMap target = { + .iova = entry->iova, + .size = entry->addr_mask, +@@ -1107,7 +1108,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) + }; + DMAMap *mapped = iova_tree_find(as->iova_tree, &target); + +- if (entry->perm == IOMMU_NONE && !info->notify_unmap) { ++ if (event->type == IOMMU_NOTIFIER_UNMAP && !info->notify_unmap) { + trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); + return 0; + } +@@ -1115,7 +1116,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) + assert(hook_fn); + + /* Update local IOVA mapped ranges */ +- if (entry->perm) { ++ if (event->type == IOMMU_NOTIFIER_MAP) { + if (mapped) { + /* If it's exactly the same translation, skip */ + if (!memcmp(mapped, &target, sizeof(target))) { +@@ -1141,19 +1142,21 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) + int ret; + + /* Emulate an UNMAP */ ++ event->type = IOMMU_NOTIFIER_UNMAP; + entry->perm = IOMMU_NONE; + trace_vtd_page_walk_one(info->domain_id, + entry->iova, + entry->translated_addr, + entry->addr_mask, + entry->perm); +- ret = hook_fn(entry, private); ++ ret = hook_fn(event, private); + if (ret) { + return ret; + } + /* Drop any existing mapping */ + iova_tree_remove(as->iova_tree, &target); +- /* Recover the correct permission */ ++ /* Recover the correct type */ ++ event->type = IOMMU_NOTIFIER_MAP; + entry->perm = cache_perm; + } + } +@@ -1170,7 +1173,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) + trace_vtd_page_walk_one(info->domain_id, entry->iova, + entry->translated_addr, entry->addr_mask, + entry->perm); +- return hook_fn(entry, private); ++ return hook_fn(event, private); + } + + /** +@@ -1191,7 +1194,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, + uint32_t offset; + uint64_t slpte; + uint64_t subpage_size, subpage_mask; +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + uint64_t iova = start; + uint64_t iova_next; + int ret = 0; +@@ -1245,13 +1248,15 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, + * + * In either case, we send an IOTLB notification down. + */ +- entry.target_as = &address_space_memory; +- entry.iova = iova & subpage_mask; +- entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); +- entry.addr_mask = ~subpage_mask; ++ event.entry.target_as = &address_space_memory; ++ event.entry.iova = iova & subpage_mask; ++ event.entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); ++ event.entry.addr_mask = ~subpage_mask; + /* NOTE: this is only meaningful if entry_valid == true */ +- entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); +- ret = vtd_page_walk_one(&entry, info); ++ event.entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); ++ event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : ++ IOMMU_NOTIFIER_UNMAP; ++ ret = vtd_page_walk_one(&event, info); + } + + if (ret < 0) { +@@ -1430,10 +1435,10 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, + return 0; + } + +-static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry, ++static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event, + void *private) + { +- memory_region_notify_iommu((IOMMUMemoryRegion *)private, 0, *entry); ++ memory_region_notify_iommu(private, 0, *event); + return 0; + } + +@@ -1993,14 +1998,17 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, + * page tables. We just deliver the PSI down to + * invalidate caches. + */ +- IOMMUTLBEntry entry = { +- .target_as = &address_space_memory, +- .iova = addr, +- .translated_addr = 0, +- .addr_mask = size - 1, +- .perm = IOMMU_NONE, ++ IOMMUTLBEvent event = { ++ .type = IOMMU_NOTIFIER_UNMAP, ++ .entry = { ++ .target_as = &address_space_memory, ++ .iova = addr, ++ .translated_addr = 0, ++ .addr_mask = size - 1, ++ .perm = IOMMU_NONE, ++ }, + }; +- memory_region_notify_iommu(&vtd_as->iommu, 0, entry); ++ memory_region_notify_iommu(&vtd_as->iommu, 0, event); + } + } + } +@@ -2412,7 +2420,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) + { + VTDAddressSpace *vtd_dev_as; +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + struct VTDBus *vtd_bus; + hwaddr addr; + uint64_t sz; +@@ -2460,12 +2468,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, + sz = VTD_PAGE_SIZE; + } + +- entry.target_as = &vtd_dev_as->as; +- entry.addr_mask = sz - 1; +- entry.iova = addr; +- entry.perm = IOMMU_NONE; +- entry.translated_addr = 0; +- memory_region_notify_iommu(&vtd_dev_as->iommu, 0, entry); ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.target_as = &vtd_dev_as->as; ++ event.entry.addr_mask = sz - 1; ++ event.entry.iova = addr; ++ event.entry.perm = IOMMU_NONE; ++ event.entry.translated_addr = 0; ++ memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); + + done: + return true; +@@ -3485,19 +3494,20 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) + size = remain = end - start + 1; + + while (remain >= VTD_PAGE_SIZE) { +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + uint64_t mask = get_naturally_aligned_size(start, remain, s->aw_bits); + + assert(mask); + +- entry.iova = start; +- entry.addr_mask = mask - 1; +- entry.target_as = &address_space_memory; +- entry.perm = IOMMU_NONE; ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.iova = start; ++ event.entry.addr_mask = mask - 1; ++ event.entry.target_as = &address_space_memory; ++ event.entry.perm = IOMMU_NONE; + /* This field is meaningless for unmap */ +- entry.translated_addr = 0; ++ event.entry.translated_addr = 0; + +- memory_region_notify_iommu_one(n, &entry); ++ memory_region_notify_iommu_one(n, &event); + + start += mask; + remain -= mask; +@@ -3533,9 +3543,9 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s) + vtd_switch_address_space_all(s); + } + +-static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) ++static int vtd_replay_hook(IOMMUTLBEvent *event, void *private) + { +- memory_region_notify_iommu_one((IOMMUNotifier *)private, entry); ++ memory_region_notify_iommu_one(private, event); + return 0; + } + +diff --git a/hw/misc/tz-mpc.c b/hw/misc/tz-mpc.c +index 98f151237f..30481e1c90 100644 +--- a/hw/misc/tz-mpc.c ++++ b/hw/misc/tz-mpc.c +@@ -82,8 +82,10 @@ static void tz_mpc_iommu_notify(TZMPC *s, uint32_t lutidx, + /* Called when the LUT word at lutidx has changed from oldlut to newlut; + * must call the IOMMU notifiers for the changed blocks. + */ +- IOMMUTLBEntry entry = { +- .addr_mask = s->blocksize - 1, ++ IOMMUTLBEvent event = { ++ .entry = { ++ .addr_mask = s->blocksize - 1, ++ } + }; + hwaddr addr = lutidx * s->blocksize * 32; + int i; +@@ -100,26 +102,28 @@ static void tz_mpc_iommu_notify(TZMPC *s, uint32_t lutidx, + block_is_ns = newlut & (1 << i); + + trace_tz_mpc_iommu_notify(addr); +- entry.iova = addr; +- entry.translated_addr = addr; ++ event.entry.iova = addr; ++ event.entry.translated_addr = addr; + +- entry.perm = IOMMU_NONE; +- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); +- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.perm = IOMMU_NONE; ++ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, event); ++ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, event); + +- entry.perm = IOMMU_RW; ++ event.type = IOMMU_NOTIFIER_MAP; ++ event.entry.perm = IOMMU_RW; + if (block_is_ns) { +- entry.target_as = &s->blocked_io_as; ++ event.entry.target_as = &s->blocked_io_as; + } else { +- entry.target_as = &s->downstream_as; ++ event.entry.target_as = &s->downstream_as; + } +- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); ++ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, event); + if (block_is_ns) { +- entry.target_as = &s->downstream_as; ++ event.entry.target_as = &s->downstream_as; + } else { +- entry.target_as = &s->blocked_io_as; ++ event.entry.target_as = &s->blocked_io_as; + } +- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); ++ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, event); + } + } + +diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c +index 0790239ba5..30352df00e 100644 +--- a/hw/ppc/spapr_iommu.c ++++ b/hw/ppc/spapr_iommu.c +@@ -445,7 +445,7 @@ static void spapr_tce_reset(DeviceState *dev) + static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba, + target_ulong tce) + { +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift; + +@@ -457,12 +457,13 @@ static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba, + + tcet->table[index] = tce; + +- entry.target_as = &address_space_memory, +- entry.iova = (ioba - tcet->bus_offset) & page_mask; +- entry.translated_addr = tce & page_mask; +- entry.addr_mask = ~page_mask; +- entry.perm = spapr_tce_iommu_access_flags(tce); +- memory_region_notify_iommu(&tcet->iommu, 0, entry); ++ event.entry.target_as = &address_space_memory, ++ event.entry.iova = (ioba - tcet->bus_offset) & page_mask; ++ event.entry.translated_addr = tce & page_mask; ++ event.entry.addr_mask = ~page_mask; ++ event.entry.perm = spapr_tce_iommu_access_flags(tce); ++ event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; ++ memory_region_notify_iommu(&tcet->iommu, 0, event); + + return H_SUCCESS; + } +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 70bfd91bf7..d9e1e29f1e 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -602,15 +602,18 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + S390IOTLBEntry *entry) + { + S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova); +- IOMMUTLBEntry notify = { +- .target_as = &address_space_memory, +- .iova = entry->iova, +- .translated_addr = entry->translated_addr, +- .perm = entry->perm, +- .addr_mask = ~PAGE_MASK, ++ IOMMUTLBEvent event = { ++ .type = entry->perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP, ++ .entry = { ++ .target_as = &address_space_memory, ++ .iova = entry->iova, ++ .translated_addr = entry->translated_addr, ++ .perm = entry->perm, ++ .addr_mask = ~PAGE_MASK, ++ }, + }; + +- if (entry->perm == IOMMU_NONE) { ++ if (event.type == IOMMU_NOTIFIER_UNMAP) { + if (!cache) { + goto out; + } +@@ -623,9 +626,11 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + goto out; + } + +- notify.perm = IOMMU_NONE; +- memory_region_notify_iommu(&iommu->iommu_mr, 0, notify); +- notify.perm = entry->perm; ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.perm = IOMMU_NONE; ++ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); ++ event.type = IOMMU_NOTIFIER_MAP; ++ event.entry.perm = entry->perm; + } + + cache = g_new(S390IOTLBEntry, 1); +@@ -637,7 +642,7 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + dec_dma_avail(iommu); + } + +- memory_region_notify_iommu(&iommu->iommu_mr, 0, notify); ++ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); + + out: + return iommu->dma_limit ? iommu->dma_limit->avail : 1; +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index fc5c75d693..cea8811295 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -129,7 +129,7 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, + hwaddr virt_end, hwaddr paddr, + uint32_t flags) + { +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ, + flags & VIRTIO_IOMMU_MAP_F_WRITE); + +@@ -141,19 +141,20 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, + trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end, + paddr, perm); + +- entry.target_as = &address_space_memory; +- entry.addr_mask = virt_end - virt_start; +- entry.iova = virt_start; +- entry.perm = perm; +- entry.translated_addr = paddr; ++ event.type = IOMMU_NOTIFIER_MAP; ++ event.entry.target_as = &address_space_memory; ++ event.entry.addr_mask = virt_end - virt_start; ++ event.entry.iova = virt_start; ++ event.entry.perm = perm; ++ event.entry.translated_addr = paddr; + +- memory_region_notify_iommu(mr, 0, entry); ++ memory_region_notify_iommu(mr, 0, event); + } + + static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, + hwaddr virt_end) + { +- IOMMUTLBEntry entry; ++ IOMMUTLBEvent event; + + if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) { + return; +@@ -161,13 +162,14 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, + + trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end); + +- entry.target_as = &address_space_memory; +- entry.addr_mask = virt_end - virt_start; +- entry.iova = virt_start; +- entry.perm = IOMMU_NONE; +- entry.translated_addr = 0; ++ event.type = IOMMU_NOTIFIER_UNMAP; ++ event.entry.target_as = &address_space_memory; ++ event.entry.addr_mask = virt_end - virt_start; ++ event.entry.iova = virt_start; ++ event.entry.perm = IOMMU_NONE; ++ event.entry.translated_addr = 0; + +- memory_region_notify_iommu(mr, 0, entry); ++ memory_region_notify_iommu(mr, 0, event); + } + + static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value, +diff --git a/include/exec/memory.h b/include/exec/memory.h +index d8456ccf52..e86b5e92da 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -116,6 +116,11 @@ struct IOMMUNotifier { + }; + typedef struct IOMMUNotifier IOMMUNotifier; + ++typedef struct IOMMUTLBEvent { ++ IOMMUNotifierFlag type; ++ IOMMUTLBEntry entry; ++} IOMMUTLBEvent; ++ + /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ + #define RAM_PREALLOC (1 << 0) + +@@ -1326,24 +1331,18 @@ uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr); + /** + * memory_region_notify_iommu: notify a change in an IOMMU translation entry. + * +- * The notification type will be decided by entry.perm bits: +- * +- * - For UNMAP (cache invalidation) notifies: set entry.perm to IOMMU_NONE. +- * - For MAP (newly added entry) notifies: set entry.perm to the +- * permission of the page (which is definitely !IOMMU_NONE). +- * + * Note: for any IOMMU implementation, an in-place mapping change + * should be notified with an UNMAP followed by a MAP. + * + * @iommu_mr: the memory region that was changed + * @iommu_idx: the IOMMU index for the translation table which has changed +- * @entry: the new entry in the IOMMU translation table. The entry +- * replaces all old entries for the same virtual I/O address range. +- * Deleted entries have .@perm == 0. ++ * @event: TLB event with the new entry in the IOMMU translation table. ++ * The entry replaces all old entries for the same virtual I/O address ++ * range. + */ + void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + int iommu_idx, +- IOMMUTLBEntry entry); ++ IOMMUTLBEvent event); + + /** + * memory_region_notify_iommu_one: notify a change in an IOMMU translation +@@ -1353,12 +1352,12 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + * notifies a specific notifier, not all of them. + * + * @notifier: the notifier to be notified +- * @entry: the new entry in the IOMMU translation table. The entry +- * replaces all old entries for the same virtual I/O address range. +- * Deleted entries have .@perm == 0. ++ * @event: TLB event with the new entry in the IOMMU translation table. ++ * The entry replaces all old entries for the same virtual I/O address ++ * range. + */ + void memory_region_notify_iommu_one(IOMMUNotifier *notifier, +- IOMMUTLBEntry *entry); ++ IOMMUTLBEvent *event); + + /** + * memory_region_register_iommu_notifier: register a notifier for changes to +diff --git a/softmmu/memory.c b/softmmu/memory.c +index 44de610c72..6ca87e8d73 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -1943,11 +1943,15 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, + } + + void memory_region_notify_iommu_one(IOMMUNotifier *notifier, +- IOMMUTLBEntry *entry) ++ IOMMUTLBEvent *event) + { +- IOMMUNotifierFlag request_flags; ++ IOMMUTLBEntry *entry = &event->entry; + hwaddr entry_end = entry->iova + entry->addr_mask; + ++ if (event->type == IOMMU_NOTIFIER_UNMAP) { ++ assert(entry->perm == IOMMU_NONE); ++ } ++ + /* + * Skip the notification if the notification does not overlap + * with registered range. +@@ -1958,20 +1962,14 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, + + assert(entry->iova >= notifier->start && entry_end <= notifier->end); + +- if (entry->perm & IOMMU_RW) { +- request_flags = IOMMU_NOTIFIER_MAP; +- } else { +- request_flags = IOMMU_NOTIFIER_UNMAP; +- } +- +- if (notifier->notifier_flags & request_flags) { ++ if (event->type & notifier->notifier_flags) { + notifier->notify(notifier, entry); + } + } + + void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + int iommu_idx, +- IOMMUTLBEntry entry) ++ IOMMUTLBEvent event) + { + IOMMUNotifier *iommu_notifier; + +@@ -1979,7 +1977,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, + + IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { + if (iommu_notifier->iommu_idx == iommu_idx) { +- memory_region_notify_iommu_one(iommu_notifier, &entry); ++ memory_region_notify_iommu_one(iommu_notifier, &event); + } + } + } +-- +2.18.4 + diff --git a/0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch b/0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch new file mode 100644 index 0000000..6201e2f --- /dev/null +++ b/0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch @@ -0,0 +1,88 @@ +From 6eb76ae169aaf695a5fb6ef052859828e3ea91bc Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Mon, 11 Jan 2021 14:36:13 -0500 +Subject: memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP IOMMUTLBNotificationType +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210111143615.303645-4-eperezma@redhat.com> +Patchwork-id: 100571 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/5] memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP IOMMUTLBNotificationType +Bugzilla: 1845758 +RH-Acked-by: Xiao Wang +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +This allows us to differentiate between regular IOMMU map/unmap events +and DEVIOTLB unmap. Doing so, notifiers that only need device IOTLB +invalidations will not receive regular IOMMU unmappings. + +Adapt intel and vhost to use it. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-4-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit b68ba1ca57677acf870d5ab10579e6105c1f5338) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/intel_iommu.c | 2 +- + hw/virtio/vhost.c | 2 +- + include/exec/memory.h | 7 ++++++- + 3 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 56180b1c43..edc3090f91 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -2468,7 +2468,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, + sz = VTD_PAGE_SIZE; + } + +- event.type = IOMMU_NOTIFIER_UNMAP; ++ event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP; + event.entry.target_as = &vtd_dev_as->as; + event.entry.addr_mask = sz - 1; + event.entry.iova = addr; +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 614ccc2bcb..28c7d78172 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -718,7 +718,7 @@ static void vhost_iommu_region_add(MemoryListener *listener, + iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, + MEMTXATTRS_UNSPECIFIED); + iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify, +- IOMMU_NOTIFIER_UNMAP, ++ IOMMU_NOTIFIER_DEVIOTLB_UNMAP, + section->offset_within_region, + int128_get64(end), + iommu_idx); +diff --git a/include/exec/memory.h b/include/exec/memory.h +index e86b5e92da..521d9901d7 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -97,9 +97,14 @@ typedef enum { + IOMMU_NOTIFIER_UNMAP = 0x1, + /* Notify entry changes (newly created entries) */ + IOMMU_NOTIFIER_MAP = 0x2, ++ /* Notify changes on device IOTLB entries */ ++ IOMMU_NOTIFIER_DEVIOTLB_UNMAP = 0x04, + } IOMMUNotifierFlag; + +-#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) ++#define IOMMU_NOTIFIER_IOTLB_EVENTS (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) ++#define IOMMU_NOTIFIER_DEVIOTLB_EVENTS IOMMU_NOTIFIER_DEVIOTLB_UNMAP ++#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_IOTLB_EVENTS | \ ++ IOMMU_NOTIFIER_DEVIOTLB_EVENTS) + + struct IOMMUNotifier; + typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier, +-- +2.18.4 + diff --git a/0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch b/0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch new file mode 100644 index 0000000..e5fd578 --- /dev/null +++ b/0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch @@ -0,0 +1,57 @@ +From add80ba59a85aca4c5e2619dee95557d2ec14169 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Mon, 11 Jan 2021 14:36:14 -0500 +Subject: intel_iommu: Skip page walking on device iotlb invalidations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210111143615.303645-5-eperezma@redhat.com> +Patchwork-id: 100572 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/5] intel_iommu: Skip page walking on device iotlb invalidations +Bugzilla: 1845758 +RH-Acked-by: Xiao Wang +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +Although they didn't reach the notifier because of the filtering in +memory_region_notify_iommu_one, the vt-d was still splitting huge +memory invalidations in chunks. Skipping it. + +This improves performance in case of netperf with vhost-net: +* TCP_STREAM: From 1923.6Mbit/s to 2175.13Mbit/s (13%) +* TCP_RR: From 8464.73 trans/s to 8932.703333 trans/s (5.5%) +* UDP_RR: From 8562.08 trans/s to 9005.62/s (5.1%) +* UDP_STREAM: No change observed (insignificant 0.1% improvement) + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-5-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f7701e2c7983b680790af47117577b285b6a1aed) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/intel_iommu.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index edc3090f91..0cc71e4057 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -1478,6 +1478,10 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) + VTDContextEntry ce; + IOMMUNotifier *n; + ++ if (!(vtd_as->iommu.iommu_notify_flags & IOMMU_NOTIFIER_IOTLB_EVENTS)) { ++ return 0; ++ } ++ + ret = vtd_dev_to_context_entry(vtd_as->iommu_state, + pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce); +-- +2.18.4 + diff --git a/0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch b/0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch new file mode 100644 index 0000000..25fb623 --- /dev/null +++ b/0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch @@ -0,0 +1,69 @@ +From ce5295813c0f1c94964cbd126f37a3202c360b92 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Mon, 11 Jan 2021 14:36:15 -0500 +Subject: memory: Skip bad range assertion if notifier is DEVIOTLB_UNMAP type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210111143615.303645-6-eperezma@redhat.com> +Patchwork-id: 100573 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 5/5] memory: Skip bad range assertion if notifier is DEVIOTLB_UNMAP type +Bugzilla: 1845758 +RH-Acked-by: Xiao Wang +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +Device IOTLB invalidations can unmap arbitrary ranges, eiter outside of +the memory region or even [0, ~0ULL] for all the space. The assertion +could be hit by a guest, and rhel7 guest effectively hit it. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Acked-by: Jason Wang +Message-Id: <20201116165506.31315-6-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1804857f19f612f6907832e35599cdb51d4ec764) +Signed-off-by: Eugenio Pérez +Signed-off-by: Danilo C. L. de Paula +--- + softmmu/memory.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/softmmu/memory.c b/softmmu/memory.c +index 6ca87e8d73..22bacbbc78 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -1947,6 +1947,7 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, + { + IOMMUTLBEntry *entry = &event->entry; + hwaddr entry_end = entry->iova + entry->addr_mask; ++ IOMMUTLBEntry tmp = *entry; + + if (event->type == IOMMU_NOTIFIER_UNMAP) { + assert(entry->perm == IOMMU_NONE); +@@ -1960,10 +1961,16 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, + return; + } + +- assert(entry->iova >= notifier->start && entry_end <= notifier->end); ++ if (notifier->notifier_flags & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { ++ /* Crop (iova, addr_mask) to range */ ++ tmp.iova = MAX(tmp.iova, notifier->start); ++ tmp.addr_mask = MIN(entry_end, notifier->end) - tmp.iova; ++ } else { ++ assert(entry->iova >= notifier->start && entry_end <= notifier->end); ++ } + + if (event->type & notifier->notifier_flags) { +- notifier->notify(notifier, entry); ++ notifier->notify(notifier, &tmp); + } + } + +-- +2.18.4 + diff --git a/0049-RHEL-Switch-pvpanic-test-to-q35.patch b/0049-RHEL-Switch-pvpanic-test-to-q35.patch new file mode 100644 index 0000000..7b6f4bf --- /dev/null +++ b/0049-RHEL-Switch-pvpanic-test-to-q35.patch @@ -0,0 +1,47 @@ +From c489d2cd175e879071a3c5504a17d7f656dd7b06 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 8 Dec 2020 16:27:15 -0500 +Subject: RHEL: Switch pvpanic test to q35 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +Message-id: <20201208162716.30836-3-dgilbert@redhat.com> +Patchwork-id: 100360 +O-Subject: [RHEL-av-8.4.0 qemu-kvm PATCH v2 2/3] RHEL: Switch pvpanic test to q35 +Bugzilla: 1885555 +RH-Acked-by: Thomas Huth +RH-Acked-by: Juan Quintela +RH-Acked-by: Philippe Mathieu-Daudé + +From: "Dr. David Alan Gilbert" + +Since b1b0393c3c5 the pvpanic test checks for a different +result (3) expecting it to get that on new machine types. +But, downstream, our 'pc' machine type is old, so switch the +test to q35, so it gets the new behaviour it's expecting. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + tests/qtest/pvpanic-test.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c +index 016b32ebee..f0a7282b47 100644 +--- a/tests/qtest/pvpanic-test.c ++++ b/tests/qtest/pvpanic-test.c +@@ -17,7 +17,8 @@ static void test_panic(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic"); ++ /* RHEL: Use q35 */ ++ qts = qtest_init("-M q35 -device pvpanic"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +-- +2.18.4 + diff --git a/0050-8.4-x86-machine-type.patch b/0050-8.4-x86-machine-type.patch new file mode 100644 index 0000000..70d0554 --- /dev/null +++ b/0050-8.4-x86-machine-type.patch @@ -0,0 +1,144 @@ +From cb95a2dd9f549a4b7fcfac97b9a83c46a232d41e Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 8 Dec 2020 16:27:16 -0500 +Subject: 8.4 x86 machine type + +RH-Author: Dr. David Alan Gilbert +Message-id: <20201208162716.30836-4-dgilbert@redhat.com> +Patchwork-id: 100362 +O-Subject: [RHEL-av-8.4.0 qemu-kvm PATCH v2 3/3] 8.4 x86 machine type +Bugzilla: 1885555 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Igor Mammedov +RH-Acked-by: Juan Quintela + +From: "Dr. David Alan Gilbert" + +Add pc-q35-rhel8.4.0 and fix all the compatiiblity glue up. + +Note the moving of x-smi-cpu-hotplug follows bz 1846886 comment 18 +part 2. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 8 ++++++-- + hw/i386/pc_piix.c | 5 +++++ + hw/i386/pc_q35.c | 30 +++++++++++++++++++++++++++--- + include/hw/i386/pc.h | 3 +++ + 4 files changed, 41 insertions(+), 5 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index f3fc695fe2..d5ea5b634c 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -363,11 +363,15 @@ GlobalProperty pc_rhel_compat[] = { + { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, +- /* BZ 1846886 */ +- { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_3_compat[] = { ++ /* pc_rhel_8_3_compat from pc_compat_5_1 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, ++}; ++const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat); ++ + GlobalProperty pc_rhel_8_2_compat[] = { + /* pc_rhel_8_2_compat from pc_compat_4_2 */ + { "mch", "smbase-smram", "off" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 815da79108..1b1cc18ae0 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1045,6 +1045,11 @@ static void pc_machine_rhel760_options(MachineClass *m) + m->smbus_no_migration_support = true; + pcmc->pvh_enabled = false; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ pcmc->kvmclock_create_always = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_2, + hw_compat_rhel_8_2_len); + compat_props_add(m->compat_props, pc_rhel_8_2_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 3340008c00..5acb47afcf 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -593,6 +593,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel840(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel840_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.4.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, ++ pc_q35_machine_rhel840_options); ++ ++ + static void pc_q35_init_rhel830(MachineState *machine) + { + pc_q35_init(machine); +@@ -601,10 +619,17 @@ static void pc_q35_init_rhel830(MachineState *machine) + static void pc_q35_machine_rhel830_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel840_options(m); + m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.3.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->kvmclock_create_always = false; + } + + DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, +@@ -618,9 +643,8 @@ static void pc_q35_init_rhel820(MachineState *machine) + static void pc_q35_machine_rhel820_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel830_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; +- m->alias = NULL; + m->numa_mem_supported = true; + m->auto_enable_numa_with_memdev = false; + pcmc->smbios_stream_product = "RHEL-AV"; +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index e2ba9a4b58..68091bea98 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -272,6 +272,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_3_compat[]; ++extern const size_t pc_rhel_8_3_compat_len; ++ + extern GlobalProperty pc_rhel_8_2_compat[]; + extern const size_t pc_rhel_8_2_compat_len; + +-- +2.18.4 + diff --git a/0051-memory-clamp-cached-translation-in-case-it-points-to.patch b/0051-memory-clamp-cached-translation-in-case-it-points-to.patch new file mode 100644 index 0000000..7700dcf --- /dev/null +++ b/0051-memory-clamp-cached-translation-in-case-it-points-to.patch @@ -0,0 +1,153 @@ +From cf7723d08da5b371ef8b89a6e4edfaa21f88f03f Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 12 Jan 2021 21:01:25 -0500 +Subject: memory: clamp cached translation in case it points to an MMIO region + +RH-Author: Jon Maloy +Message-id: <20210112210125.851866-2-jmaloy@redhat.com> +Patchwork-id: 100614 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] memory: clamp cached translation in case it points to an MMIO region +Bugzilla: 1904392 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Thomas Huth + +From: Paolo Bonzini + +In using the address_space_translate_internal API, address_space_cache_init +forgot one piece of advice that can be found in the code for +address_space_translate_internal: + + /* MMIO registers can be expected to perform full-width accesses based only + * on their address, without considering adjacent registers that could + * decode to completely different MemoryRegions. When such registers + * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO + * regions overlap wildly. For this reason we cannot clamp the accesses + * here. + * + * If the length is small (as is the case for address_space_ldl/stl), + * everything works fine. If the incoming length is large, however, + * the caller really has to do the clamping through memory_access_size. + */ + +address_space_cache_init is exactly one such case where "the incoming length +is large", therefore we need to clamp the resulting length---not to +memory_access_size though, since we are not doing an access yet, but to +the size of the resulting section. This ensures that subsequent accesses +to the cached MemoryRegionSection will be in range. + +With this patch, the enclosed testcase notices that the used ring does +not fit into the MSI-X table and prints a "qemu-system-x86_64: Cannot map used" +error. + +Signed-off-by: Paolo Bonzini + +(cherry picked from 4bfb024bc76973d40a359476dc0291f46e435442) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + softmmu/physmem.c | 10 ++++++++ + tests/qtest/fuzz-test.c | 52 ++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 61 insertions(+), 1 deletion(-) + +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 3027747c03..fb3f276844 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -3255,6 +3255,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, + AddressSpaceDispatch *d; + hwaddr l; + MemoryRegion *mr; ++ Int128 diff; + + assert(len > 0); + +@@ -3263,6 +3264,15 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, + d = flatview_to_dispatch(cache->fv); + cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true); + ++ /* ++ * cache->xlat is now relative to cache->mrs.mr, not to the section itself. ++ * Take that into account to compute how many bytes are there between ++ * cache->xlat and the end of the section. ++ */ ++ diff = int128_sub(cache->mrs.size, ++ int128_make64(cache->xlat - cache->mrs.offset_within_region)); ++ l = int128_get64(int128_min(diff, int128_make64(l))); ++ + mr = cache->mrs.mr; + memory_region_ref(mr); + if (memory_access_is_direct(mr, is_write)) { +diff --git a/tests/qtest/fuzz-test.c b/tests/qtest/fuzz-test.c +index 2692d556d9..99d1a3ee12 100644 +--- a/tests/qtest/fuzz-test.c ++++ b/tests/qtest/fuzz-test.c +@@ -73,6 +73,55 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) + qtest_quit(s); + } + ++ /* ++ * Here a MemoryRegionCache pointed to an MMIO region but had a ++ * larger size than the underlying region. ++ */ ++static void test_mmio_oob_from_memory_region_cache(void) ++{ ++ QTestState *s; ++ ++ s = qtest_init("-M pc-q35-5.2 -display none -m 512M " ++ "-device virtio-scsi,num_queues=8,addr=03.0 "); ++ ++ qtest_outl(s, 0xcf8, 0x80001811); ++ qtest_outb(s, 0xcfc, 0x6e); ++ qtest_outl(s, 0xcf8, 0x80001824); ++ qtest_outl(s, 0xcf8, 0x80001813); ++ qtest_outl(s, 0xcfc, 0xa080000); ++ qtest_outl(s, 0xcf8, 0x80001802); ++ qtest_outl(s, 0xcfc, 0x5a175a63); ++ qtest_outb(s, 0x6e08, 0x9e); ++ qtest_writeb(s, 0x9f003, 0xff); ++ qtest_writeb(s, 0x9f004, 0x01); ++ qtest_writeb(s, 0x9e012, 0x0e); ++ qtest_writeb(s, 0x9e01b, 0x0e); ++ qtest_writeb(s, 0x9f006, 0x01); ++ qtest_writeb(s, 0x9f008, 0x01); ++ qtest_writeb(s, 0x9f00a, 0x01); ++ qtest_writeb(s, 0x9f00c, 0x01); ++ qtest_writeb(s, 0x9f00e, 0x01); ++ qtest_writeb(s, 0x9f010, 0x01); ++ qtest_writeb(s, 0x9f012, 0x01); ++ qtest_writeb(s, 0x9f014, 0x01); ++ qtest_writeb(s, 0x9f016, 0x01); ++ qtest_writeb(s, 0x9f018, 0x01); ++ qtest_writeb(s, 0x9f01a, 0x01); ++ qtest_writeb(s, 0x9f01c, 0x01); ++ qtest_writeb(s, 0x9f01e, 0x01); ++ qtest_writeb(s, 0x9f020, 0x01); ++ qtest_writeb(s, 0x9f022, 0x01); ++ qtest_writeb(s, 0x9f024, 0x01); ++ qtest_writeb(s, 0x9f026, 0x01); ++ qtest_writeb(s, 0x9f028, 0x01); ++ qtest_writeb(s, 0x9f02a, 0x01); ++ qtest_writeb(s, 0x9f02c, 0x01); ++ qtest_writeb(s, 0x9f02e, 0x01); ++ qtest_writeb(s, 0x9f030, 0x01); ++ qtest_outb(s, 0x6e10, 0x00); ++ qtest_quit(s); ++} ++ + int main(int argc, char **argv) + { + const char *arch = qtest_get_arch(); +@@ -86,7 +135,8 @@ int main(int argc, char **argv) + test_lp1878642_pci_bus_get_irq_level_assert); + qtest_add_func("fuzz/test_lp1879531_eth_get_rss_ex_dst_addr", + test_lp1879531_eth_get_rss_ex_dst_addr); +- ++ qtest_add_func("fuzz/test_mmio_oob_from_memory_region_cache", ++ test_mmio_oob_from_memory_region_cache); + } + + return g_test_run(); +-- +2.18.4 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 9c7c40a..39fd078 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -64,7 +64,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 2.1%{?dist} +Release: 3%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -129,11 +129,26 @@ Patch0031: 0031-s390x-Use-strpadcpy-for-copying-vm-name.patch Patch0032: 0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch Patch0033: 0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch Patch0034: 0034-net-eth-Fix-stack-buffer-overflow-in.patch +Patch0035: 0035-block-nvme-Implement-fake-truncate-coroutine.patch +Patch0037: 0037-build-system-use-b_staticpic-false.patch +Patch0038: 0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch +Patch0039: 0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch +Patch0040: 0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch +Patch0041: 0041-AArch64-machine-types-cleanup.patch +Patch0042: 0042-hw-arm-virt-Add-8.4-Machine-type.patch +Patch0044: 0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch +Patch0045: 0045-memory-Add-IOMMUTLBEvent.patch +Patch0046: 0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch +Patch0047: 0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch +Patch0048: 0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch +Patch0049: 0049-RHEL-Switch-pvpanic-test-to-q35.patch +Patch0050: 0050-8.4-x86-machine-type.patch +Patch0051: 0051-memory-clamp-cached-translation-in-case-it-points-to.patch BuildRequires: wget BuildRequires: rpm-build BuildRequires: ninja-build -BuildRequires: meson +BuildRequires: meson >= 0.55.3-3 BuildRequires: zlib-devel BuildRequires: glib2-devel BuildRequires: which @@ -580,7 +595,7 @@ pushd %{qemu_kvm_build} --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ --firmwarepath=%{_prefix}/share/qemu-firmware \ - --python=%{__python3} \ + --meson="%{__meson}" \ --target-list="%{buildarch}" \ --block-drv-rw-whitelist=%{block_drivers_list} \ --audio-drv-list= \ @@ -711,7 +726,7 @@ find ../default-configs -name "*-rh-devices.mak" \ --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ --firmwarepath=%{_prefix}/share/qemu-firmware \ - --python=%{__python3} \ + --meson="%{__meson}" \ --target-list="%{buildarch}" \ --block-drv-rw-whitelist=%{block_drivers_list} \ --audio-drv-list= \ @@ -1309,6 +1324,45 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Mon Jan 18 2021 Danilo Cesar Lemes de Paula - 5.2.0-3.el8 +- kvm-block-nvme-Implement-fake-truncate-coroutine.patch [bz#1848834] +- kvm-spec-find-system-python-via-meson.patch [bz#1899619] +- kvm-build-system-use-b_staticpic-false.patch [bz#1899619] +- kvm-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch [bz#1908693] +- kvm-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch [bz#1912846] +- kvm-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch [bz#1755075] +- kvm-AArch64-machine-types-cleanup.patch [bz#1895276] +- kvm-hw-arm-virt-Add-8.4-Machine-type.patch [bz#1895276] +- kvm-udev-kvm-check-remove-the-exceeded-subscription-limi.patch [bz#1914463] +- kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch [bz#1845758] +- kvm-memory-Add-IOMMUTLBEvent.patch [bz#1845758] +- kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch [bz#1845758] +- kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch [bz#1845758] +- kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch [bz#1845758] +- kvm-RHEL-Switch-pvpanic-test-to-q35.patch [bz#1885555] +- kvm-8.4-x86-machine-type.patch [bz#1885555] +- kvm-memory-clamp-cached-translation-in-case-it-points-to.patch [bz#1904392] +- Resolves: bz#1848834 + (Failed to create luks format image on NVMe device) +- Resolves: bz#1899619 + (QEMU 5.2 is built with PIC objects instead of PIE) +- Resolves: bz#1908693 + ([ppc64le]boot up a guest with 128 numa nodes ,qemu got coredump) +- Resolves: bz#1912846 + (qemu-kvm: Failed to load xhci:parent_obj during migration) +- Resolves: bz#1755075 + ([qemu-guest-agent] fsinfo doesn't return disk info on s390x) +- Resolves: bz#1895276 + (Machine types update for aarch64 for QEMU 5.2.0) +- Resolves: bz#1914463 + (Remove KVM guest count and limit info message) +- Resolves: bz#1845758 + (qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed.) +- Resolves: bz#1885555 + (8.4 machine types for x86) +- Resolves: bz#1904392 + (CVE-2020-27821 virt:8.4/qemu-kvm: QEMU: heap buffer overflow in msix_table_mmio_write() in hw/pci/msix.c [rhel-av-8]) + * Tue Dec 15 2020 Danilo Cesar Lemes de Paula - 5.2.0-2.el8 - kvm-redhat-Define-hw_compat_8_3.patch [bz#1893935] - kvm-redhat-Add-spapr_machine_rhel_default_class_options.patch [bz#1893935] diff --git a/udev-kvm-check.c b/udev-kvm-check.c index cb0ecba..928b9de 100644 --- a/udev-kvm-check.c +++ b/udev-kvm-check.c @@ -32,14 +32,6 @@ #define COUNT_MSG \ "%d %s now active" -#define SUBSCRIPTION_MSG \ - "%d %s now active; your Red Hat Enterprise Linux subscription" \ - " limit is %d guests. Please review your Red Hat Enterprise Linux" \ - " subscription agreement or contact your Red Hat" \ - " support representative for more information. You" \ - " may review the Red Hat Enterprise subscription" \ - " limits at http://www.redhat.com/rhel-virt-limits" - int get_threshold_from_file(FILE *fp) { static const char key[] = "THRESHOLD="; @@ -139,13 +131,6 @@ void emit_count_message(int count) closelog(); } -void emit_subscription_message(int count, int threshold) -{ - openlog(FACILITY, LOG_CONS, LOG_USER); - syslog(LOG_WARNING, SUBSCRIPTION_MSG, count, guest(count), threshold); - closelog(); -} - int main(int argc, char **argv) { int count, threshold; @@ -157,10 +142,8 @@ int main(int argc, char **argv) threshold = get_threshold(); if (!strcmp(argv[2], "create")) { - if (threshold == 0) { + if (threshold == 0 || count > threshold) { emit_count_message(count); - } else if (count > threshold) { - emit_subscription_message(count, threshold); } } else { if (count >= threshold) { From e6d35e8f4a92e4ec0a3a4b4de372962206053e59 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 2 Feb 2021 13:27:53 +0100 Subject: [PATCH 108/195] Synchronization with qemu-kvm-5.2.0-4.el8 - Not required specific SLOF version --- 0054-Drop-bogus-IPv6-messages.patch | 51 +++++++++++++++++++++++++++++ qemu-kvm.spec | 10 ++++-- 2 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 0054-Drop-bogus-IPv6-messages.patch diff --git a/0054-Drop-bogus-IPv6-messages.patch b/0054-Drop-bogus-IPv6-messages.patch new file mode 100644 index 0000000..1ba8fd9 --- /dev/null +++ b/0054-Drop-bogus-IPv6-messages.patch @@ -0,0 +1,51 @@ +From 1b118c53c70d9fa4ba3dcdf172039d29335bed73 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 20 Jan 2021 00:13:11 -0500 +Subject: Drop bogus IPv6 messages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +Message-id: <20210120001311.1356511-2-jmaloy@redhat.com> +Patchwork-id: 100699 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] Drop bogus IPv6 messages +Bugzilla: 1918061 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Thomas Huth + +From: Ralf Haferkamp + +Drop IPv6 message shorter than what's mentioned in the payload +length header (+ the size of the IPv6 header). They're invalid an could +lead to data leakage in icmp6_send_echoreply(). + +(cherry picked from libslirp commit c7ede54cbd2e2b25385325600958ba0124e31cc0) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + slirp/src/ip6_input.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +index a83e4f8e3d..f7ef354ee4 100644 +--- a/slirp/src/ip6_input.c ++++ b/slirp/src/ip6_input.c +@@ -56,6 +56,13 @@ void ip6_input(struct mbuf *m) + goto bad; + } + ++ // Check if the message size is big enough to hold what's ++ // set in the payload length header. If not this is an invalid ++ // packet ++ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { ++ goto bad; ++ } ++ + /* check ip_ttl for a correct ICMP reply */ + if (ip6->ip_hl == 0) { + icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); +-- +2.18.4 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 39fd078..50db574 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -64,7 +64,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 3%{?dist} +Release: 4%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -144,6 +144,7 @@ Patch0048: 0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch Patch0049: 0049-RHEL-Switch-pvpanic-test-to-q35.patch Patch0050: 0050-8.4-x86-machine-type.patch Patch0051: 0051-memory-clamp-cached-translation-in-case-it-points-to.patch +Patch0054: 0054-Drop-bogus-IPv6-messages.patch BuildRequires: wget BuildRequires: rpm-build @@ -281,7 +282,7 @@ Requires: edk2-aarch64 %endif %ifarch %{power64} -Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} +Requires: SLOF %endif Requires: libseccomp >= 2.4.0 # For compressed guest memory dumps @@ -1324,6 +1325,11 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Wed Jan 27 2021 Danilo Cesar Lemes de Paula - 5.2.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1918061] +- Resolves: bz#1918061 + (CVE-2020-10756 virt:rhel/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + * Mon Jan 18 2021 Danilo Cesar Lemes de Paula - 5.2.0-3.el8 - kvm-block-nvme-Implement-fake-truncate-coroutine.patch [bz#1848834] - kvm-spec-find-system-python-via-meson.patch [bz#1899619] From 991a8cd654da16f27b9dce222a0f86d1382b8c5e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 8 Feb 2021 12:21:04 +0100 Subject: [PATCH 109/195] Synchronization with qemu-kvm-5.2.0-5.el8 --- kvm-config-enable-VFIO_CCW.patch | 42 +++++ ..._cpus-to-710-on-pc-q35-rhel8-machine.patch | 45 +++++ ...llow-memory-unplug-to-always-succeed.patch | 100 +++++++++++ ...ndling-of-memory-unplug-with-old-gue.patch | 168 ++++++++++++++++++ kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch | 71 ++++++++ qemu-kvm.spec | 27 ++- 6 files changed, 452 insertions(+), 1 deletion(-) create mode 100644 kvm-config-enable-VFIO_CCW.patch create mode 100644 kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch create mode 100644 kvm-spapr-Allow-memory-unplug-to-always-succeed.patch create mode 100644 kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch create mode 100644 kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch diff --git a/kvm-config-enable-VFIO_CCW.patch b/kvm-config-enable-VFIO_CCW.patch new file mode 100644 index 0000000..50f6fc2 --- /dev/null +++ b/kvm-config-enable-VFIO_CCW.patch @@ -0,0 +1,42 @@ +From f6e6416e8267d302ba5ec40c2a26bc25cc0d1d55 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Fri, 29 Jan 2021 14:40:05 -0500 +Subject: [PATCH 5/5] config: enable VFIO_CCW + +RH-Author: Cornelia Huck +Message-id: <20210129144005.698097-1-cohuck@redhat.com> +Patchwork-id: 100941 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] config: enable VFIO_CCW +Bugzilla: 1922170 +RH-Acked-by: Alex Williamson +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1922170 +BRANCH: rhel-av-8.4.0 +UPSTREAM: n/a +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34609010 + +Enable vfio-ccw in RHEL AV builds. + +Signed-off-by: Cornelia Huck +Signed-off-by: Eduardo Lima (Etrunko) +--- + default-configs/devices/s390x-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/default-configs/devices/s390x-rh-devices.mak b/default-configs/devices/s390x-rh-devices.mak +index c3c73fe752..08a15f3e01 100644 +--- a/default-configs/devices/s390x-rh-devices.mak ++++ b/default-configs/devices/s390x-rh-devices.mak +@@ -9,6 +9,7 @@ CONFIG_SCSI=y + CONFIG_TERMINAL3270=y + CONFIG_VFIO=y + CONFIG_VFIO_AP=y ++CONFIG_VFIO_CCW=y + CONFIG_VFIO_PCI=y + CONFIG_VHOST_USER=y + CONFIG_VIRTIO_CCW=y +-- +2.18.4 + diff --git a/kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch b/kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch new file mode 100644 index 0000000..ac1341e --- /dev/null +++ b/kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch @@ -0,0 +1,45 @@ +From 8fa6654712c7cba73fd1c8d93b094d90c1757000 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 26 Jan 2021 23:46:44 -0500 +Subject: [PATCH 4/5] q35: Increase max_cpus to 710 on pc-q35-rhel8* machine + types + +RH-Author: Eduardo Habkost +Message-id: <20210126234644.3091529-1-ehabkost@redhat.com> +Patchwork-id: 100791 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] q35: Increase max_cpus to 710 on pc-q35-rhel8* machine types +Bugzilla: 1904268 +RH-Acked-by: Daniel P. Berrange +RH-Acked-by: Thomas Huth +RH-Acked-by: Paolo Bonzini + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1904268 +Upstream: not applicable +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34536802 + +The original goal was to support 1024 VCPUs, but 710 VCPUs is the +maximum number we can reach before hitting SMBIOS table size +limits. + +Signed-off-by: Eduardo Habkost +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/i386/pc_q35.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 5acb47afcf..72854192a9 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -589,7 +589,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + m->alias = "q35"; +- m->max_cpus = 512; ++ m->max_cpus = 710; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + +-- +2.18.4 + diff --git a/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch b/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch new file mode 100644 index 0000000..f7a6b9a --- /dev/null +++ b/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch @@ -0,0 +1,100 @@ +From 1365bf10ad49fd7c0a3b4e2eabeaacd1abf60d18 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 19 Jan 2021 15:20:43 -0500 +Subject: [PATCH 1/5] spapr: Allow memory unplug to always succeed + +RH-Author: Greg Kurz +Message-id: <20210119152044.1019191-2-gkurz@redhat.com> +Patchwork-id: 100690 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] spapr: Allow memory unplug to always succeed +Bugzilla: 1914069 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Greg Kurz + +It is currently impossible to hot-unplug a memory device between +machine reset and CAS. + +(qemu) device_del dimm1 +Error: Memory hot unplug not supported for this guest + +This limitation was introduced in order to provide an explicit +error path for older guests that didn't support hot-plug event +sources (and thus memory hot-unplug). + +The linux kernel has been supporting these since 4.11. All recent +enough guests are thus capable of handling the removal of a memory +device at all time, including during early boot. + +Lift the limitation for the latest machine type. This means that +trying to unplug memory from a guest that doesn't support it will +likely just do nothing and the memory will only get removed at +next reboot. Such older guests can still get the existing behavior +by using an older machine type. + +Signed-off-by: Greg Kurz +Message-Id: <160794035064.23292.17560963281911312439.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 1e8b5b1aa16b7d73ba8ba52c95d0b52329d5c9d0) +Signed-off-by: Greg Kurz + +Conflicts: + hw/ppc/spapr.c + +Conflict because RHEL-AV doesn't have upstream 576a00bdeb5b ("hw: add +compat machines for 6.0"). Just ignore the change that sets +pre_6_0_memory_unplug for older machine types since the next patch +removes the flag. + +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/ppc/spapr.c | 3 ++- + hw/ppc/spapr_events.c | 3 ++- + include/hw/ppc/spapr.h | 1 + + 3 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 4f61b64a21..65a647134a 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4064,7 +4064,8 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { +- if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { ++ if (!smc->pre_6_0_memory_unplug || ++ spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { + spapr_memory_unplug_request(hotplug_dev, dev, errp); + } else { + /* NOTE: this means there is a window after guest reset, prior to +diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c +index 1add53547e..c30123177b 100644 +--- a/hw/ppc/spapr_events.c ++++ b/hw/ppc/spapr_events.c +@@ -659,7 +659,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, + /* we should not be using count_indexed value unless the guest + * supports dedicated hotplug event source + */ +- g_assert(spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); ++ g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug || ++ spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); + hp->drc_id.count_indexed.count = + cpu_to_be32(drc_id->count_indexed.count); + hp->drc_id.count_indexed.index = +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index 28bbf07f8f..4941fe9b4f 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -142,6 +142,7 @@ struct SpaprMachineClass { + hwaddr rma_limit; /* clamp the RMA to this size */ + bool pre_5_1_assoc_refpoints; + bool pre_5_2_numa_associativity; ++ bool pre_6_0_memory_unplug; + + bool has_power9_support; + void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, +-- +2.18.4 + diff --git a/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch b/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch new file mode 100644 index 0000000..94bad1d --- /dev/null +++ b/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch @@ -0,0 +1,168 @@ +From cd719765bd751142c4040ee7daf615b859fb3e9d Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 19 Jan 2021 15:20:44 -0500 +Subject: [PATCH 2/5] spapr: Improve handling of memory unplug with old guests + +RH-Author: Greg Kurz +Message-id: <20210119152044.1019191-3-gkurz@redhat.com> +Patchwork-id: 100691 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] spapr: Improve handling of memory unplug with old guests +Bugzilla: 1914069 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Greg Kurz + +Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed") +trying to unplug memory from a guest that doesn't support it (eg. rhel6) +no longer generates an error like it used to. Instead, it leaves the +memory around : only a subsequent reboot or manual use of drmgr within +the guest can complete the hot-unplug sequence. A flag was added to +SpaprMachineClass so that this new behavior only applies to the default +machine type. + +We can do better. CAS processes all pending hot-unplug requests. This +means that we don't really care about what the guest supports if +the hot-unplug request happens before CAS. + +All guests that we care for, even old ones, set enough bits in OV5 +that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a +heuristic to decide if CAS has already occured or not. + +Always accept unplug requests that happen before CAS since CAS will +process them. Restore the previous behavior of rejecting them after +CAS when we know that the guest doesn't support memory hot-unplug. + +This behavior is suitable for all machine types : this allows to +drop the pre_6_0_memory_unplug flag. + +Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed") +Signed-off-by: Greg Kurz +Message-Id: <161012708715.801107.11418801796987916516.stgit@bahia.lan> +Reviewed-by: Daniel Henrique Barboza +Signed-off-by: David Gibson +(cherry picked from commit 73598c75df0585e039825e642adede21912dabc7) +Signed-off-by: Greg Kurz + +Conflicts: + hw/ppc/spapr.c + +Conflict around the removal of pre_6_0_memory_unplug, which was only +partially backported from upstream 1e8b5b1aa16b. + +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/ppc/spapr.c | 21 +++++++++++++-------- + hw/ppc/spapr_events.c | 3 +-- + hw/ppc/spapr_ovec.c | 7 +++++++ + include/hw/ppc/spapr.h | 2 +- + include/hw/ppc/spapr_ovec.h | 1 + + 5 files changed, 23 insertions(+), 11 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 65a647134a..a67df8cb26 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4056,6 +4056,18 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, + } + } + ++bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr) ++{ ++ return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) || ++ /* ++ * CAS will process all pending unplug requests. ++ * ++ * HACK: a guest could theoretically have cleared all bits in OV5, ++ * but none of the guests we care for do. ++ */ ++ spapr_ovec_empty(spapr->ov5_cas); ++} ++ + static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -4064,16 +4076,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { +- if (!smc->pre_6_0_memory_unplug || +- spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { ++ if (spapr_memory_hot_unplug_supported(sms)) { + spapr_memory_unplug_request(hotplug_dev, dev, errp); + } else { +- /* NOTE: this means there is a window after guest reset, prior to +- * CAS negotiation, where unplug requests will fail due to the +- * capability not being detected yet. This is a bit different than +- * the case with PCI unplug, where the events will be queued and +- * eventually handled by the guest after boot +- */ + error_setg(errp, "Memory hot unplug not supported for this guest"); + } + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { +diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c +index c30123177b..80b918ff5f 100644 +--- a/hw/ppc/spapr_events.c ++++ b/hw/ppc/spapr_events.c +@@ -659,8 +659,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, + /* we should not be using count_indexed value unless the guest + * supports dedicated hotplug event source + */ +- g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug || +- spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); ++ g_assert(spapr_memory_hot_unplug_supported(spapr)); + hp->drc_id.count_indexed.count = + cpu_to_be32(drc_id->count_indexed.count); + hp->drc_id.count_indexed.index = +diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c +index dd003f1763..b2567caa5c 100644 +--- a/hw/ppc/spapr_ovec.c ++++ b/hw/ppc/spapr_ovec.c +@@ -125,6 +125,13 @@ bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr) + return test_bit(bitnr, ov->bitmap) ? true : false; + } + ++bool spapr_ovec_empty(SpaprOptionVector *ov) ++{ ++ g_assert(ov); ++ ++ return bitmap_empty(ov->bitmap, OV_MAXBITS); ++} ++ + static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap, + long bitmap_offset) + { +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index 4941fe9b4f..5952942362 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -142,7 +142,6 @@ struct SpaprMachineClass { + hwaddr rma_limit; /* clamp the RMA to this size */ + bool pre_5_1_assoc_refpoints; + bool pre_5_2_numa_associativity; +- bool pre_6_0_memory_unplug; + + bool has_power9_support; + void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, +@@ -954,4 +953,5 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + + void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); + hwaddr spapr_get_rtas_addr(void); ++bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr); + #endif /* HW_SPAPR_H */ +diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h +index d4dee9e06a..48b716a060 100644 +--- a/include/hw/ppc/spapr_ovec.h ++++ b/include/hw/ppc/spapr_ovec.h +@@ -71,6 +71,7 @@ void spapr_ovec_cleanup(SpaprOptionVector *ov); + void spapr_ovec_set(SpaprOptionVector *ov, long bitnr); + void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr); + bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr); ++bool spapr_ovec_empty(SpaprOptionVector *ov); + SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector); + int spapr_dt_ovec(void *fdt, int fdt_offset, + SpaprOptionVector *ov, const char *name); +-- +2.18.4 + diff --git a/kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch b/kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch new file mode 100644 index 0000000..f04d944 --- /dev/null +++ b/kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch @@ -0,0 +1,71 @@ +From 5840880e2ed3747464242e0559a6cf7ec4e55a11 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 21 Jan 2021 17:43:53 -0500 +Subject: [PATCH 3/5] x86/cpu: Add AVX512_FP16 cpu feature + +RH-Author: plai@redhat.com +Message-id: <20210121174353.16032-1-plai@redhat.com> +Patchwork-id: 100758 +O-Subject: [RHEL8.4 AV qemu-kvm PATCH] x86/cpu: Add AVX512_FP16 cpu feature +Bugzilla: 1838738 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Bandan Das + +From: Cathy Zhang + +BZ https://bugzilla.redhat.com/show_bug.cgi?id=1838738 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34299228 + x86 https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34299267 +Branch rhel-av-8.4.0 + +Tested on intel-eaglestream-spr-01.khw1.lab.eng.bos.redhat.com. +All flags found as expected + avx512_vp2intersect, serialize, and avx512_fp16 +except: + tsxldtrk + +Cpuid reports CPUID.(7.0).EDX[16] isn't enabled on this CPU. +Leaf Subleaf EAX EBX ECX EDX +00000007 00000000: 00000001 .... f3bfbfef .... fa417f5e ^.A. ff8c4532 2E.. + +Already in rhel-av-8.4.0 (rebased to v5.2.0 ): + 353f98c9a x86/cpu: Enable AVX512_VP2INTERSECT cpu feature + 5dd13f2a5 target/i386: Add SERIALIZE cpu feature + b3c7344e3 target/i386: Enable TSX Suspend Load Address Tracking feature + +Signed-off-by: Eduardo Lima (Etrunko) +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index dc592e990e..f944b41573 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -977,7 +977,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "avx512-vp2intersect", NULL, "md-clear", NULL, + NULL, NULL, "serialize", NULL, + "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, +- NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, "avx512-fp16", + NULL, NULL, "spec-ctrl", "stibp", + NULL, "arch-capabilities", "core-capability", "ssbd", + }, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 88e8586f8f..a3db7e3c6c 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -783,6 +783,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_SERIALIZE (1U << 14) + /* TSX Suspend Load Address Tracking instruction */ + #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) ++/* AVX512_FP16 instruction */ ++#define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) + /* Speculation Control */ + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) + /* Single Thread Indirect Branch Predictors */ +-- +2.18.4 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 50db574..fa863e7 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -64,7 +64,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 4%{?dist} +Release: 5%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -145,6 +145,16 @@ Patch0049: 0049-RHEL-Switch-pvpanic-test-to-q35.patch Patch0050: 0050-8.4-x86-machine-type.patch Patch0051: 0051-memory-clamp-cached-translation-in-case-it-points-to.patch Patch0054: 0054-Drop-bogus-IPv6-messages.patch +# For bz#1914069 - [ppc64le] have this fix for rhel8.4 av (spapr: Allow memory unplug to always succeed) +Patch55: kvm-spapr-Allow-memory-unplug-to-always-succeed.patch +# For bz#1914069 - [ppc64le] have this fix for rhel8.4 av (spapr: Allow memory unplug to always succeed) +Patch56: kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch +# For bz#1838738 - [Intel 8.4 FEAT] qemu-kvm Sapphire Rapids (SPR) New Instructions (NIs) - Fast Train +Patch57: kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch +# For bz#1904268 - [RFE] [HPEMC] qemu-kvm: support up to 710 VCPUs +Patch58: kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch +# For bz#1922170 - Enable vfio-ccw in AV +Patch59: kvm-config-enable-VFIO_CCW.patch BuildRequires: wget BuildRequires: rpm-build @@ -1325,6 +1335,21 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Tue Feb 02 2021 Eduardo Lima (Etrunko) - 5.2.0-5.el8 +- kvm-spapr-Allow-memory-unplug-to-always-succeed.patch [bz#1914069] +- kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch [bz#1914069] +- kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch [bz#1838738] +- kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch [bz#1904268] +- kvm-config-enable-VFIO_CCW.patch [bz#1922170] +- Resolves: bz#1914069 + ([ppc64le] have this fix for rhel8.4 av (spapr: Allow memory unplug to always succeed)) +- Resolves: bz#1838738 + ([Intel 8.4 FEAT] qemu-kvm Sapphire Rapids (SPR) New Instructions (NIs) - Fast Train) +- Resolves: bz#1904268 + ([RFE] [HPEMC] qemu-kvm: support up to 710 VCPUs) +- Resolves: bz#1922170 + (Enable vfio-ccw in AV) + * Wed Jan 27 2021 Danilo Cesar Lemes de Paula - 5.2.0-4.el8 - kvm-Drop-bogus-IPv6-messages.patch [bz#1918061] - Resolves: bz#1918061 From c7f7ddb9f2c40867ebab2c7166f9e8996062829f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 12 Feb 2021 09:03:43 +0100 Subject: [PATCH 110/195] Synchronization with qemu-kvm-5.2.0-5.el8 --- ...docs-set-CONFDIR-when-running-sphinx.patch | 56 ++++++++ ...x-addr_mask-for-range-based-invalida.patch | 66 ++++++++++ ...removal-race-vs-IO-restart-callback-.patch | 62 +++++++++ ...trip-l-and-ll-from-systemtap-format-.patch | 69 ++++++++++ ...MU-and-virtio-iommu-on-dev-iotlb-sup.patch | 124 ++++++++++++++++++ qemu-kvm.spec | 54 +++++++- 6 files changed, 426 insertions(+), 5 deletions(-) create mode 100644 kvm-docs-set-CONFDIR-when-running-sphinx.patch create mode 100644 kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch create mode 100644 kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch create mode 100644 kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch create mode 100644 kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch diff --git a/kvm-docs-set-CONFDIR-when-running-sphinx.patch b/kvm-docs-set-CONFDIR-when-running-sphinx.patch new file mode 100644 index 0000000..db8c0c9 --- /dev/null +++ b/kvm-docs-set-CONFDIR-when-running-sphinx.patch @@ -0,0 +1,56 @@ +From 16130479cc03434a85111608d9d2b0e179dc8b98 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 8 Feb 2021 09:37:30 -0500 +Subject: [PATCH 7/7] docs: set CONFDIR when running sphinx +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210208093730.1166952-1-marcandre.lureau@redhat.com> +Patchwork-id: 101004 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] docs: set CONFDIR when running sphinx +Bugzilla: 1902537 +RH-Acked-by: Eduardo Lima (Etrunko) +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé + +From: Marc-André Lureau + +The default configuration path /etc/qemu can be overriden with configure +options, and the generated documentation used to reflect it. + +Fixes regression introduced in commit +f8aa24ea9a82da38370470c6bc0eaa393999edfe ("meson: sphinx-build"). + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1902537 +Signed-off-by: Marc-André Lureau +Message-Id: <20201201183704.299697-1-marcandre.lureau@redhat.com> +Signed-off-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1902537 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34816282 + +(cherry picked from commit daf07a6714b111340fe2d0234d1a5287d6ebe0ec) +Signed-off-by: Marc-André Lureau +Signed-off-by: Eduardo Lima (Etrunko) +--- + docs/meson.build | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/meson.build b/docs/meson.build +index ebd85d59f9..bb8fe4c9e4 100644 +--- a/docs/meson.build ++++ b/docs/meson.build +@@ -9,7 +9,7 @@ endif + # Check if tools are available to build documentation. + build_docs = false + if sphinx_build.found() +- SPHINX_ARGS = [sphinx_build] ++ SPHINX_ARGS = ['env', 'CONFDIR=' + qemu_confdir, sphinx_build] + # If we're making warnings fatal, apply this to Sphinx runs as well + if get_option('werror') + SPHINX_ARGS += [ '-W' ] +-- +2.18.4 + diff --git a/kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch b/kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch new file mode 100644 index 0000000..1ac03d4 --- /dev/null +++ b/kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch @@ -0,0 +1,66 @@ +From 9768ea83a3f23f112514ad34d4abcd6e9590bb71 Mon Sep 17 00:00:00 2001 +From: Auger Eric +Date: Wed, 3 Feb 2021 20:31:27 -0500 +Subject: [PATCH 4/7] hw/arm/smmuv3: Fix addr_mask for range-based invalidation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Auger Eric +Message-id: <20210203203127.3613-1-eric.auger@redhat.com> +Patchwork-id: 100971 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] hw/arm/smmuv3: Fix addr_mask for range-based invalidation +Bugzilla: 1834152 +RH-Acked-by: Gavin Shan +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Andrew Jones + +From: Zenghui Yu + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1834152 +BRANCH: rhel-av-8.4.0 +UPSTREAM: yes +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34711554 + +When handling guest range-based IOTLB invalidation, we should decode the TG +field into the corresponding translation granule size so that we can pass +the correct invalidation range to backend. Set @granule to (tg * 2 + 10) to +properly emulate the architecture. + +Fixes: d52915616c05 ("hw/arm/smmuv3: Get prepared for range invalidation") +Signed-off-by: Zenghui Yu +Acked-by: Eric Auger +Message-id: 20210130043220.1345-1-yuzenghui@huawei.com +Signed-off-by: Peter Maydell +(cherry picked from commit dcda883cd21125c699419a3fc0fe182ea989d9c4) +Signed-off-by: Eric Auger +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/arm/smmuv3.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index bbca0e9f20..98b99d4fe8 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -801,7 +801,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + { + SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); + IOMMUTLBEvent event; +- uint8_t granule = tg; ++ uint8_t granule; + + if (!tg) { + SMMUEventInfo event = {.inval_ste_allowed = true}; +@@ -821,6 +821,8 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + return; + } + granule = tt->granule_sz; ++ } else { ++ granule = tg * 2 + 10; + } + + event.type = IOMMU_NOTIFIER_UNMAP; +-- +2.18.4 + diff --git a/kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch b/kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch new file mode 100644 index 0000000..75baf13 --- /dev/null +++ b/kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch @@ -0,0 +1,62 @@ +From 3b537ab3eb342af4222a8cf825062d17893cd18f Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Wed, 27 Jan 2021 11:47:54 -0500 +Subject: [PATCH 1/7] scsi: fix device removal race vs IO restart callback on + resume + +RH-Author: Maxim Levitsky +Message-id: <20210127114754.477582-2-mlevitsk@redhat.com> +Patchwork-id: 100795 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] scsi: fix device removal race vs IO restart callback on resume +Bugzilla: 1854811 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: John Snow + +There is (mostly theoretical) race between removal of a scsi device and +scsi_dma_restart_bh. + +It used to be easier to hit this race prior to my / Paulo's patch series +that added rcu to scsi bus device handling code, but IMHO this race +should still be possible to hit, at least in theory. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1854811 + +Fix it anyway with a patch that was proposed by Paulo in the above bugzilla. + +Suggested-by: Paolo Bonzini +Signed-off-by: Maxim Levitsky +Message-Id: <20201210125929.1136390-2-mlevitsk@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cfd4e36352d4426221aa94da44a172da1aaa741b) +Signed-off-by: Maxim Levitsky +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/scsi/scsi-bus.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index b901e701f0..edb5c3492a 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -170,6 +170,8 @@ static void scsi_dma_restart_bh(void *opaque) + scsi_req_unref(req); + } + aio_context_release(blk_get_aio_context(s->conf.blk)); ++ /* Drop the reference that was acquired in scsi_dma_restart_cb */ ++ object_unref(OBJECT(s)); + } + + void scsi_req_retry(SCSIRequest *req) +@@ -188,6 +190,8 @@ static void scsi_dma_restart_cb(void *opaque, int running, RunState state) + } + if (!s->bh) { + AioContext *ctx = blk_get_aio_context(s->conf.blk); ++ /* The reference is dropped in scsi_dma_restart_bh.*/ ++ object_ref(OBJECT(s)); + s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); + qemu_bh_schedule(s->bh); + } +-- +2.18.4 + diff --git a/kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch b/kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch new file mode 100644 index 0000000..7d519a1 --- /dev/null +++ b/kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch @@ -0,0 +1,69 @@ +From b51851d9684443028c2568e70bb203481ecd533a Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 2 Feb 2021 14:03:34 -0500 +Subject: [PATCH 2/7] tracetool: also strip %l and %ll from systemtap format + strings +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +Message-id: <20210202140334.1798082-2-lvivier@redhat.com> +Patchwork-id: 100948 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] tracetool: also strip %l and %ll from systemtap format strings +Bugzilla: 1907264 +RH-Acked-by: Jon Maloy +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Daniel P. Berrange + +From: Daniel P. Berrangé + +All variables are 64-bit and so %l / %ll are not required, and the +latter is actually invalid: + + $ sudo stap -e 'probe begin{printf ("BEGIN")}' -I . + parse error: invalid or missing conversion specifier + saw: operator ',' at ./qemu-system-x86_64-log.stp:15118:101 + source: printf("%d@%d vhost_vdpa_set_log_base dev: %p base: 0x%x size: %llu +refcnt: %d fd: %d log: %p\n", pid(), gettimeofday_ns(), dev, base, size, refcnt, fd, log) + + ^ + +Signed-off-by: Daniel P. Berrangé +Reviewed-by: Laurent Vivier +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Laurent Vivier +Message-id: 20210106130239.1004729-1-berrange@redhat.com + +[Fixed "simiarly" typo found by Laurent Vivier +--Stefan] + +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 09612de7e9adbe9666a8fa4cc60bab0a29a68ed1) +Signed-off-by: Laurent Vivier +Signed-off-by: Eduardo Lima (Etrunko) +--- + scripts/tracetool/format/log_stap.py | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/scripts/tracetool/format/log_stap.py b/scripts/tracetool/format/log_stap.py +index b486beb672..2d910ced82 100644 +--- a/scripts/tracetool/format/log_stap.py ++++ b/scripts/tracetool/format/log_stap.py +@@ -77,7 +77,12 @@ def c_fmt_to_stap(fmt): + elif state == STATE_LITERAL: + bits.append(literal) + +- fmt = re.sub("%(\d*)z(x|u|d)", "%\\1\\2", "".join(bits)) ++ # All variables in systemtap are 64-bit in size ++ # The "%l" integer size qualifier is thus redundant ++ # and "%ll" is not valid at all. Similarly the size_t ++ # based "%z" size qualifier is not valid. We just ++ # strip all size qualifiers for sanity. ++ fmt = re.sub("%(\d*)(l+|z)(x|u|d)", "%\\1\\3", "".join(bits)) + return fmt + + def generate(events, backend, group): +-- +2.18.4 + diff --git a/kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch b/kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch new file mode 100644 index 0000000..2926e5b --- /dev/null +++ b/kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch @@ -0,0 +1,124 @@ +From 26c3b9b2a5e904f2799ac097c91588cb2248a6e0 Mon Sep 17 00:00:00 2001 +From: Auger Eric +Date: Fri, 5 Feb 2021 18:58:52 -0500 +Subject: [PATCH 6/7] vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support + +RH-Author: Auger Eric +Message-id: <20210205185852.12830-1-eric.auger@redhat.com> +Patchwork-id: 100996 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support +Bugzilla: 1925028 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Peter Xu + +From: Peter Xu + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1925028 +BRANCH: rhel-av-8.4.0 +UPSTREAM: merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34788078 + +Previous work on dev-iotlb message broke vhost on either SMMU or virtio-iommu +since dev-iotlb (or PCIe ATS) is not yet supported for those archs. + +An initial idea is that we can let IOMMU to export this information to vhost so +that vhost would know whether the vIOMMU would support dev-iotlb, then vhost +can conditionally register to dev-iotlb or the old iotlb way. We can work +based on some previous patch to introduce PCIIOMMUOps as Yi Liu proposed [1]. + +However it's not as easy as I thought since vhost_iommu_region_add() does not +have a PCIDevice context at all since it's completely a backend. It seems +non-trivial to pass over a PCI device to the backend during init. E.g. when +the IOMMU notifier registered hdev->vdev is still NULL. + +To make the fix smaller and easier, this patch goes the other way to leverage +the flag_changed() hook of vIOMMUs so that SMMU and virtio-iommu can trap the +dev-iotlb registration and fail it. Then vhost could try the fallback solution +as using UNMAP invalidation for it's translations. + +[1] https://lore.kernel.org/qemu-devel/1599735398-6829-4-git-send-email-yi.l.liu@intel.com/ + +Reported-by: Eric Auger +Fixes: b68ba1ca57677acf870d5ab10579e6105c1f5338 +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Signed-off-by: Peter Xu +Message-Id: <20210204191228.187550-1-peterx@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 958ec334bca3fa9862289e4cfe31bf1019e55816) +Signed-off-by: Eric Auger +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/arm/smmuv3.c | 5 +++++ + hw/virtio/vhost.c | 13 +++++++++++-- + hw/virtio/virtio-iommu.c | 5 +++++ + 3 files changed, 21 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 98b99d4fe8..bd1f97000d 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1497,6 +1497,11 @@ static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, + SMMUv3State *s3 = sdev->smmu; + SMMUState *s = &(s3->smmu_state); + ++ if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { ++ error_setg(errp, "SMMUv3 does not support dev-iotlb yet"); ++ return -EINVAL; ++ } ++ + if (new & IOMMU_NOTIFIER_MAP) { + error_setg(errp, + "device %02x.%02x.%x requires iommu MAP notifier which is " +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 28c7d78172..6e17d631f7 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -704,6 +704,7 @@ static void vhost_iommu_region_add(MemoryListener *listener, + Int128 end; + int iommu_idx; + IOMMUMemoryRegion *iommu_mr; ++ int ret; + + if (!memory_region_is_iommu(section->mr)) { + return; +@@ -726,8 +727,16 @@ static void vhost_iommu_region_add(MemoryListener *listener, + iommu->iommu_offset = section->offset_within_address_space - + section->offset_within_region; + iommu->hdev = dev; +- memory_region_register_iommu_notifier(section->mr, &iommu->n, +- &error_fatal); ++ ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL); ++ if (ret) { ++ /* ++ * Some vIOMMUs do not support dev-iotlb yet. If so, try to use the ++ * UNMAP legacy message ++ */ ++ iommu->n.notifier_flags = IOMMU_NOTIFIER_UNMAP; ++ memory_region_register_iommu_notifier(section->mr, &iommu->n, ++ &error_fatal); ++ } + QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next); + /* TODO: can replay help performance here? */ + } +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index cea8811295..65184f6e43 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -893,6 +893,11 @@ static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, + IOMMUNotifierFlag new, + Error **errp) + { ++ if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { ++ error_setg(errp, "Virtio-iommu does not support dev-iotlb yet"); ++ return -EINVAL; ++ } ++ + if (old == IOMMU_NOTIFIER_NONE) { + trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name); + } else if (new == IOMMU_NOTIFIER_NONE) { +-- +2.18.4 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index fa863e7..0d479b1 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -64,7 +64,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 5%{?dist} +Release: 6%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -155,6 +155,16 @@ Patch57: kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch Patch58: kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch # For bz#1922170 - Enable vfio-ccw in AV Patch59: kvm-config-enable-VFIO_CCW.patch +# For bz#1854811 - scsi-bus.c: use-after-free due to race between device unplug and I/O operation causes guest crash +Patch60: kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch +# For bz#1907264 - systemtap: invalid or missing conversion specifier at the trace event vhost_vdpa_set_log_base +Patch61: kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch +# For bz#1834152 - [aarch64] QEMU SMMUv3 device: Support range invalidation +Patch63: kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch +# For bz#1925028 - vsmmuv3/vhost and virtio-iommu/vhost regression +Patch65: kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch +# For bz#1902537 - The default fsfreeze-hook path from man page and qemu-ga --help command are different +Patch66: kvm-docs-set-CONFDIR-when-running-sphinx.patch BuildRequires: wget BuildRequires: rpm-build @@ -284,6 +294,7 @@ hardware for a full system such as a PC and its associated peripherals. Summary: qemu-kvm core components Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: qemu-img = %{epoch}:%{version}-%{release} +Recommends: qemu-kvm-docs %ifarch %{ix86} x86_64 Requires: edk2-ovmf %endif @@ -327,6 +338,12 @@ qemu-kiwi is a version of qemu-kvm with a restricted set of features intended for use by specific applications. It's experimental and unsupported. +%package -n qemu-kvm-docs +Summary: qemu-kvm documentation + +%description -n qemu-kvm-docs +qemu-kvm-docs provides documentation files regarding qemu-kvm. + %package -n qemu-img Summary: QEMU command line tool for manipulating disk images Group: Development/Tools @@ -613,7 +630,7 @@ pushd %{qemu_kvm_build} --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ --with-coroutine=ucontext \ --with-git=git \ - --tls-priority=NORMAL \ + --tls-priority=@QEMU,SYSTEM \ %{disable_everything} \ --enable-attr \ %ifarch %{ix86} x86_64 @@ -744,7 +761,7 @@ find ../default-configs -name "*-rh-devices.mak" \ --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ --with-coroutine=ucontext \ --with-git=git \ - --tls-priority=NORMAL \ + --tls-priority=@QEMU,SYSTEM \ %{disable_everything} \ --enable-attr \ %ifarch %{ix86} x86_64 @@ -1158,8 +1175,7 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %files # Deliberately empty - -%files -n qemu-kvm-common +%files -n qemu-kvm-docs %defattr(-,root,root) %dir %{qemudocdir} %doc %{qemudocdir}/README.rst @@ -1173,6 +1189,9 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %doc %{qemudocdir}/system/* %doc %{qemudocdir}/tools/* %doc %{qemudocdir}/user/* + +%files -n qemu-kvm-common +%defattr(-,root,root) %{_mandir}/man7/qemu-qmp-ref.7* %{_mandir}/man7/qemu-cpu-models.7* %{_bindir}/qemu-keymap @@ -1335,6 +1354,31 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Fri Feb 12 2021 Miroslav Rezanina - 5.2.0-6.el8 +- kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch [bz#1854811] +- kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch [bz#1907264] +- kvm-redhat-moving-all-documentation-files-to-qemu-kvm-do.patch [bz#1881170 bz#1924766] +- kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch [bz#1834152] +- kvm-redhat-makes-qemu-respect-system-s-crypto-profile.patch [bz#1902219] +- kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch [bz#1925028] +- kvm-docs-set-CONFDIR-when-running-sphinx.patch [bz#1902537] +- Resolves: bz#1854811 + (scsi-bus.c: use-after-free due to race between device unplug and I/O operation causes guest crash) +- Resolves: bz#1907264 + (systemtap: invalid or missing conversion specifier at the trace event vhost_vdpa_set_log_base) +- Resolves: bz#1881170 + (split documentation from the qemu-kvm-core package to its own subpackage) +- Resolves: bz#1924766 + (split documentation from the qemu-kvm-core package to its own subpackage [av-8.4.0]) +- Resolves: bz#1834152 + ([aarch64] QEMU SMMUv3 device: Support range invalidation) +- Resolves: bz#1902219 + (QEMU doesn't honour system crypto policies) +- Resolves: bz#1925028 + (vsmmuv3/vhost and virtio-iommu/vhost regression) +- Resolves: bz#1902537 + (The default fsfreeze-hook path from man page and qemu-ga --help command are different) + * Tue Feb 02 2021 Eduardo Lima (Etrunko) - 5.2.0-5.el8 - kvm-spapr-Allow-memory-unplug-to-always-succeed.patch [bz#1914069] - kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch [bz#1914069] From cd9fb661b4502d0cd9b2d178c6ed60c22a9ecd38 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 18 Feb 2021 08:11:01 +0100 Subject: [PATCH 111/195] Synchronization with qemu-kvm-5.2.0-7.el8 --- ...essing-BDS-twice-in-bdrv_set_aio_con.patch | 96 +++++++ ...set_aio_context-context-requirements.patch | 118 +++++++++ ...ve-blk_exp_close_all-to-qemu_cleanup.patch | 101 +++++++ ...Quiesce-coroutines-on-context-switch.patch | 249 ++++++++++++++++++ ...e-daemon-Call-bdrv_close_all-on-exit.patch | 48 ++++ ...sponding-memory_listener_unregister-.patch | 234 ++++++++++++++++ qemu-kvm.spec | 34 ++- 7 files changed, 879 insertions(+), 1 deletion(-) create mode 100644 kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch create mode 100644 kvm-block-Honor-blk_set_aio_context-context-requirements.patch create mode 100644 kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch create mode 100644 kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch create mode 100644 kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch create mode 100644 kvm-virtio-Add-corresponding-memory_listener_unregister-.patch diff --git a/kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch b/kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch new file mode 100644 index 0000000..59376b8 --- /dev/null +++ b/kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch @@ -0,0 +1,96 @@ +From 0db52fa2553ba83454a347e0aca4896e1b0d9b41 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Thu, 11 Feb 2021 14:42:06 -0300 +Subject: [PATCH 4/6] block: Avoid processing BDS twice in + bdrv_set_aio_context_ignore() + +RH-Author: Sergio Lopez Pascual +Message-id: <20210211144208.58930-4-slp@redhat.com> +Patchwork-id: 101050 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/5] block: Avoid processing BDS twice in bdrv_set_aio_context_ignore() +Bugzilla: 1918966 1918968 +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake + +Some graphs may contain an indirect reference to the first BDS in the +chain that can be reached while walking it bottom->up from one its +children. + +Doubling-processing of a BDS is especially problematic for the +aio_notifiers, as they might attempt to work on both the old and the +new AIO contexts. + +To avoid this problem, add every child and parent to the ignore list +before actually processing them. + +Suggested-by: Kevin Wolf +Signed-off-by: Sergio Lopez +Message-Id: <20210201125032.44713-2-slp@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 722d8e73d65cb54f39d360ecb2147ac58f43c399) +Signed-off-by: Sergio Lopez +Signed-off-by: Eduardo Lima (Etrunko) +--- + block.c | 34 +++++++++++++++++++++++++++------- + 1 file changed, 27 insertions(+), 7 deletions(-) + +diff --git a/block.c b/block.c +index f1cedac362..8bfa446f9c 100644 +--- a/block.c ++++ b/block.c +@@ -6454,7 +6454,10 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, + AioContext *new_context, GSList **ignore) + { + AioContext *old_context = bdrv_get_aio_context(bs); +- BdrvChild *child; ++ GSList *children_to_process = NULL; ++ GSList *parents_to_process = NULL; ++ GSList *entry; ++ BdrvChild *child, *parent; + + g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + +@@ -6469,16 +6472,33 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, + continue; + } + *ignore = g_slist_prepend(*ignore, child); +- bdrv_set_aio_context_ignore(child->bs, new_context, ignore); ++ children_to_process = g_slist_prepend(children_to_process, child); + } +- QLIST_FOREACH(child, &bs->parents, next_parent) { +- if (g_slist_find(*ignore, child)) { ++ ++ QLIST_FOREACH(parent, &bs->parents, next_parent) { ++ if (g_slist_find(*ignore, parent)) { + continue; + } +- assert(child->klass->set_aio_ctx); +- *ignore = g_slist_prepend(*ignore, child); +- child->klass->set_aio_ctx(child, new_context, ignore); ++ *ignore = g_slist_prepend(*ignore, parent); ++ parents_to_process = g_slist_prepend(parents_to_process, parent); ++ } ++ ++ for (entry = children_to_process; ++ entry != NULL; ++ entry = g_slist_next(entry)) { ++ child = entry->data; ++ bdrv_set_aio_context_ignore(child->bs, new_context, ignore); ++ } ++ g_slist_free(children_to_process); ++ ++ for (entry = parents_to_process; ++ entry != NULL; ++ entry = g_slist_next(entry)) { ++ parent = entry->data; ++ assert(parent->klass->set_aio_ctx); ++ parent->klass->set_aio_ctx(parent, new_context, ignore); + } ++ g_slist_free(parents_to_process); + + bdrv_detach_aio_context(bs); + +-- +2.27.0 + diff --git a/kvm-block-Honor-blk_set_aio_context-context-requirements.patch b/kvm-block-Honor-blk_set_aio_context-context-requirements.patch new file mode 100644 index 0000000..de75ecc --- /dev/null +++ b/kvm-block-Honor-blk_set_aio_context-context-requirements.patch @@ -0,0 +1,118 @@ +From bc284d49a00a1a716b380c2245aa0b897a259a5d Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Thu, 11 Feb 2021 14:42:04 -0300 +Subject: [PATCH 2/6] block: Honor blk_set_aio_context() context requirements + +RH-Author: Sergio Lopez Pascual +Message-id: <20210211144208.58930-2-slp@redhat.com> +Patchwork-id: 101049 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/5] block: Honor blk_set_aio_context() context requirements +Bugzilla: 1918966 1918968 +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake + +The documentation for bdrv_set_aio_context_ignore() states this: + + * The caller must own the AioContext lock for the old AioContext of bs, but it + * must not own the AioContext lock for new_context (unless new_context is the + * same as the current context of bs). + +As blk_set_aio_context() makes use of this function, this rule also +applies to it. + +Fix all occurrences where this rule wasn't honored. + +Suggested-by: Kevin Wolf +Signed-off-by: Sergio Lopez +Message-Id: <20201214170519.223781-2-slp@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Eric Blake +(cherry picked from commit c7040ff64ec93ee925a81d3547db925fe7d1f1c0) +Signed-off-by: Sergio Lopez +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/block/dataplane/virtio-blk.c | 4 ++++ + hw/block/dataplane/xen-block.c | 7 ++++++- + hw/scsi/virtio-scsi.c | 6 ++++-- + 3 files changed, 14 insertions(+), 3 deletions(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 37499c5564..e9050c8987 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -172,6 +172,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + VirtIOBlockDataPlane *s = vblk->dataplane; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ AioContext *old_context; + unsigned i; + unsigned nvqs = s->conf->num_queues; + Error *local_err = NULL; +@@ -214,7 +215,10 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + vblk->dataplane_started = true; + trace_virtio_blk_data_plane_start(s); + ++ old_context = blk_get_aio_context(s->conf->conf.blk); ++ aio_context_acquire(old_context); + r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); ++ aio_context_release(old_context); + if (r < 0) { + error_report_err(local_err); + goto fail_guest_notifiers; +diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c +index 71c337c7b7..3675f8deaf 100644 +--- a/hw/block/dataplane/xen-block.c ++++ b/hw/block/dataplane/xen-block.c +@@ -725,6 +725,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, + { + ERRP_GUARD(); + XenDevice *xendev = dataplane->xendev; ++ AioContext *old_context; + unsigned int ring_size; + unsigned int i; + +@@ -808,10 +809,14 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, + goto stop; + } + +- aio_context_acquire(dataplane->ctx); ++ old_context = blk_get_aio_context(dataplane->blk); ++ aio_context_acquire(old_context); + /* If other users keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); ++ aio_context_release(old_context); ++ + /* Only reason for failure is a NULL channel */ ++ aio_context_acquire(dataplane->ctx); + xen_device_set_event_channel_context(xendev, dataplane->event_channel, + dataplane->ctx, &error_abort); + aio_context_release(dataplane->ctx); +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 82c025146d..66bdda5473 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -821,6 +821,7 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + SCSIDevice *sd = SCSI_DEVICE(dev); ++ AioContext *old_context; + int ret; + + /* XXX: Remove this check once block backend is capable of handling +@@ -836,9 +837,10 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; + } +- virtio_scsi_acquire(s); ++ old_context = blk_get_aio_context(sd->conf.blk); ++ aio_context_acquire(old_context); + ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); +- virtio_scsi_release(s); ++ aio_context_release(old_context); + if (ret < 0) { + return; + } +-- +2.27.0 + diff --git a/kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch b/kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch new file mode 100644 index 0000000..dcda5bc --- /dev/null +++ b/kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch @@ -0,0 +1,101 @@ +From 661245e1baf416570295fad0db1fdd5ad8485e33 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Thu, 11 Feb 2021 14:42:08 -0300 +Subject: [PATCH 6/6] block: move blk_exp_close_all() to qemu_cleanup() + +RH-Author: Sergio Lopez Pascual +Message-id: <20210211144208.58930-6-slp@redhat.com> +Patchwork-id: 101052 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 5/5] block: move blk_exp_close_all() to qemu_cleanup() +Bugzilla: 1918966 1918968 +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake + +Move blk_exp_close_all() from bdrv_close() to qemu_cleanup(), before +bdrv_drain_all_begin(). + +Export drivers may have coroutines yielding at some point in the block +layer, so we need to shut them down before draining the block layer, +as otherwise they may get stuck blk_wait_while_drained(). + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1900505 +Signed-off-by: Sergio Lopez +Message-Id: <20210201125032.44713-3-slp@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1895b977f9a69419ae45cfc25805f71efae32eaf) +Signed-off-by: Sergio Lopez +Signed-off-by: Eduardo Lima (Etrunko) +--- + block.c | 1 - + qemu-nbd.c | 1 + + softmmu/vl.c | 9 +++++++++ + storage-daemon/qemu-storage-daemon.c | 1 + + 4 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 8bfa446f9c..57c60efc7f 100644 +--- a/block.c ++++ b/block.c +@@ -4472,7 +4472,6 @@ static void bdrv_close(BlockDriverState *bs) + void bdrv_close_all(void) + { + assert(job_next(NULL) == NULL); +- blk_exp_close_all(); + + /* Drop references from requests still in flight, such as canceled block + * jobs whose AIO context has not been polled yet */ +diff --git a/qemu-nbd.c b/qemu-nbd.c +index a7075c5419..1d337b7504 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -509,6 +509,7 @@ static const char *socket_activation_validate_opts(const char *device, + static void qemu_nbd_shutdown(void) + { + job_cancel_sync_all(); ++ blk_exp_close_all(); + bdrv_close_all(); + } + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 065d52e8dc..3244ee5e12 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -66,6 +66,7 @@ + #include "qemu/log.h" + #include "sysemu/blockdev.h" + #include "hw/block/block.h" ++#include "block/export.h" + #include "migration/misc.h" + #include "migration/snapshot.h" + #include "migration/global_state.h" +@@ -4526,6 +4527,14 @@ void qemu_cleanup(void) + */ + migration_shutdown(); + ++ /* ++ * Close the exports before draining the block layer. The export ++ * drivers may have coroutines yielding on it, so we need to clean ++ * them up before the drain, as otherwise they may be get stuck in ++ * blk_wait_while_drained(). ++ */ ++ blk_exp_close_all(); ++ + /* + * We must cancel all block jobs while the block layer is drained, + * or cancelling will be affected by throttling and thus may block +diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c +index e0c87edbdd..d8d172cc60 100644 +--- a/storage-daemon/qemu-storage-daemon.c ++++ b/storage-daemon/qemu-storage-daemon.c +@@ -314,6 +314,7 @@ int main(int argc, char *argv[]) + main_loop_wait(false); + } + ++ blk_exp_close_all(); + bdrv_drain_all_begin(); + bdrv_close_all(); + +-- +2.27.0 + diff --git a/kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch b/kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch new file mode 100644 index 0000000..d0080d2 --- /dev/null +++ b/kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch @@ -0,0 +1,249 @@ +From 7cadf68c46abcd097fcbcecb11a4a04f264d0316 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Thu, 11 Feb 2021 14:42:05 -0300 +Subject: [PATCH 3/6] nbd/server: Quiesce coroutines on context switch + +RH-Author: Sergio Lopez Pascual +Message-id: <20210211144208.58930-3-slp@redhat.com> +Patchwork-id: 101051 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/5] nbd/server: Quiesce coroutines on context switch +Bugzilla: 1918966 1918968 +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake + +When switching between AIO contexts we need to me make sure that both +recv_coroutine and send_coroutine are not scheduled to run. Otherwise, +QEMU may crash while attaching the new context with an error like +this one: + +aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule' + +To achieve this we need a local implementation of +'qio_channel_readv_all_eof' named 'nbd_read_eof' (a trick already done +by 'nbd/client.c') that allows us to interrupt the operation and to +know when recv_coroutine is yielding. + +With this in place, we delegate detaching the AIO context to the +owning context with a BH ('nbd_aio_detach_bh') scheduled using +'aio_wait_bh_oneshot'. This BH signals that we need to quiesce the +channel by setting 'client->quiescing' to 'true', and either waits for +the coroutine to finish using AIO_WAIT_WHILE or, if it's yielding in +'nbd_read_eof', actively enters the coroutine to interrupt it. + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1900326 +Signed-off-by: Sergio Lopez +Reviewed-by: Eric Blake +Message-Id: <20201214170519.223781-4-slp@redhat.com> +Signed-off-by: Eric Blake +(cherry picked from commit f148ae7d36cbb924447f4b528a94d7799836c749) +Signed-off-by: Sergio Lopez +Signed-off-by: Eduardo Lima (Etrunko) +--- + nbd/server.c | 120 +++++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 106 insertions(+), 14 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 613ed2634a..7229f487d2 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -132,6 +132,9 @@ struct NBDClient { + CoMutex send_lock; + Coroutine *send_coroutine; + ++ bool read_yielding; ++ bool quiescing; ++ + QTAILQ_ENTRY(NBDClient) next; + int nb_requests; + bool closing; +@@ -1352,14 +1355,60 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) + return 0; + } + +-static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request, ++/* nbd_read_eof ++ * Tries to read @size bytes from @ioc. This is a local implementation of ++ * qio_channel_readv_all_eof. We have it here because we need it to be ++ * interruptible and to know when the coroutine is yielding. ++ * Returns 1 on success ++ * 0 on eof, when no data was read (errp is not set) ++ * negative errno on failure (errp is set) ++ */ ++static inline int coroutine_fn ++nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp) ++{ ++ bool partial = false; ++ ++ assert(size); ++ while (size > 0) { ++ struct iovec iov = { .iov_base = buffer, .iov_len = size }; ++ ssize_t len; ++ ++ len = qio_channel_readv(client->ioc, &iov, 1, errp); ++ if (len == QIO_CHANNEL_ERR_BLOCK) { ++ client->read_yielding = true; ++ qio_channel_yield(client->ioc, G_IO_IN); ++ client->read_yielding = false; ++ if (client->quiescing) { ++ return -EAGAIN; ++ } ++ continue; ++ } else if (len < 0) { ++ return -EIO; ++ } else if (len == 0) { ++ if (partial) { ++ error_setg(errp, ++ "Unexpected end-of-file before all bytes were read"); ++ return -EIO; ++ } else { ++ return 0; ++ } ++ } ++ ++ partial = true; ++ size -= len; ++ buffer = (uint8_t *) buffer + len; ++ } ++ return 1; ++} ++ ++static int nbd_receive_request(NBDClient *client, NBDRequest *request, + Error **errp) + { + uint8_t buf[NBD_REQUEST_SIZE]; + uint32_t magic; + int ret; + +- ret = nbd_read(ioc, buf, sizeof(buf), "request", errp); ++ ret = nbd_read_eof(client, buf, sizeof(buf), errp); + if (ret < 0) { + return ret; + } +@@ -1480,11 +1529,37 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) + + QTAILQ_FOREACH(client, &exp->clients, next) { + qio_channel_attach_aio_context(client->ioc, ctx); ++ ++ assert(client->recv_coroutine == NULL); ++ assert(client->send_coroutine == NULL); ++ ++ if (client->quiescing) { ++ client->quiescing = false; ++ nbd_client_receive_next_request(client); ++ } ++ } ++} ++ ++static void nbd_aio_detach_bh(void *opaque) ++{ ++ NBDExport *exp = opaque; ++ NBDClient *client; ++ ++ QTAILQ_FOREACH(client, &exp->clients, next) { ++ qio_channel_detach_aio_context(client->ioc); ++ client->quiescing = true; ++ + if (client->recv_coroutine) { +- aio_co_schedule(ctx, client->recv_coroutine); ++ if (client->read_yielding) { ++ qemu_aio_coroutine_enter(exp->common.ctx, ++ client->recv_coroutine); ++ } else { ++ AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL); ++ } + } ++ + if (client->send_coroutine) { +- aio_co_schedule(ctx, client->send_coroutine); ++ AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL); + } + } + } +@@ -1492,13 +1567,10 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) + static void blk_aio_detach(void *opaque) + { + NBDExport *exp = opaque; +- NBDClient *client; + + trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); + +- QTAILQ_FOREACH(client, &exp->clients, next) { +- qio_channel_detach_aio_context(client->ioc); +- } ++ aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp); + + exp->common.ctx = NULL; + } +@@ -2151,20 +2223,23 @@ static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle, + + /* nbd_co_receive_request + * Collect a client request. Return 0 if request looks valid, -EIO to drop +- * connection right away, and any other negative value to report an error to +- * the client (although the caller may still need to disconnect after reporting +- * the error). ++ * connection right away, -EAGAIN to indicate we were interrupted and the ++ * channel should be quiesced, and any other negative value to report an error ++ * to the client (although the caller may still need to disconnect after ++ * reporting the error). + */ + static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, + Error **errp) + { + NBDClient *client = req->client; + int valid_flags; ++ int ret; + + g_assert(qemu_in_coroutine()); + assert(client->recv_coroutine == qemu_coroutine_self()); +- if (nbd_receive_request(client->ioc, request, errp) < 0) { +- return -EIO; ++ ret = nbd_receive_request(client, request, errp); ++ if (ret < 0) { ++ return ret; + } + + trace_nbd_co_receive_request_decode_type(request->handle, request->type, +@@ -2507,6 +2582,17 @@ static coroutine_fn void nbd_trip(void *opaque) + return; + } + ++ if (client->quiescing) { ++ /* ++ * We're switching between AIO contexts. Don't attempt to receive a new ++ * request and kick the main context which may be waiting for us. ++ */ ++ nbd_client_put(client); ++ client->recv_coroutine = NULL; ++ aio_wait_kick(); ++ return; ++ } ++ + req = nbd_request_get(client); + ret = nbd_co_receive_request(req, &request, &local_err); + client->recv_coroutine = NULL; +@@ -2519,6 +2605,11 @@ static coroutine_fn void nbd_trip(void *opaque) + goto done; + } + ++ if (ret == -EAGAIN) { ++ assert(client->quiescing); ++ goto done; ++ } ++ + nbd_client_receive_next_request(client); + if (ret == -EIO) { + goto disconnect; +@@ -2565,7 +2656,8 @@ disconnect: + + static void nbd_client_receive_next_request(NBDClient *client) + { +- if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) { ++ if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && ++ !client->quiescing) { + nbd_client_get(client); + client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); + aio_co_schedule(client->exp->common.ctx, client->recv_coroutine); +-- +2.27.0 + diff --git a/kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch b/kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch new file mode 100644 index 0000000..7b0472c --- /dev/null +++ b/kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch @@ -0,0 +1,48 @@ +From b1883ddf10c2ec31ac72866494687d8897535a82 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Thu, 11 Feb 2021 14:42:07 -0300 +Subject: [PATCH 5/6] storage-daemon: Call bdrv_close_all() on exit + +RH-Author: Sergio Lopez Pascual +Message-id: <20210211144208.58930-5-slp@redhat.com> +Patchwork-id: 101048 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/5] storage-daemon: Call bdrv_close_all() on exit +Bugzilla: 1918966 1918968 +RH-Acked-by: Max Reitz +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake + +From: Max Reitz + +Otherwise, exports and block devices are not properly shut down and +closed, unless the users explicitly issues blockdev-del and +block-export-del commands for each of them. + +Signed-off-by: Max Reitz +Reviewed-by: Kevin Wolf +Message-Id: <20201027190600.192171-17-mreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit b55a3c8860b763b62b2cc2f4a6f55379977bbde5) +Signed-off-by: Sergio Lopez +Signed-off-by: Eduardo Lima (Etrunko) +--- + storage-daemon/qemu-storage-daemon.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c +index 7c914b0dc1..e0c87edbdd 100644 +--- a/storage-daemon/qemu-storage-daemon.c ++++ b/storage-daemon/qemu-storage-daemon.c +@@ -314,6 +314,9 @@ int main(int argc, char *argv[]) + main_loop_wait(false); + } + ++ bdrv_drain_all_begin(); ++ bdrv_close_all(); ++ + monitor_cleanup(); + qemu_chr_cleanup(); + user_creatable_cleanup(); +-- +2.27.0 + diff --git a/kvm-virtio-Add-corresponding-memory_listener_unregister-.patch b/kvm-virtio-Add-corresponding-memory_listener_unregister-.patch new file mode 100644 index 0000000..46c96b0 --- /dev/null +++ b/kvm-virtio-Add-corresponding-memory_listener_unregister-.patch @@ -0,0 +1,234 @@ +From ac9e40a75eba0019fb9930835804e8daceead981 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Tue, 9 Feb 2021 10:38:16 -0300 +Subject: [PATCH 1/6] virtio: Add corresponding memory_listener_unregister to + unrealize +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210209103816.1636200-2-eperezma@redhat.com> +Patchwork-id: 101009 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] virtio: Add corresponding memory_listener_unregister to unrealize +Bugzilla: 1903521 +RH-Acked-by: Peter Xu +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Stefano Garzarella + +Address space is destroyed without proper removal of its listeners with +current code. They are expected to be removed in +virtio_device_instance_finalize [1], but qemu calls it through +object_deinit, after address_space_destroy call through +device_set_realized [2]. + +Move it to virtio_device_unrealize, called before device_set_realized +[3] and making it symmetric with memory_listener_register in +virtio_device_realize. + +v2: Delete no-op call of virtio_device_instance_finalize. + Add backtraces. + +[1] + + #0 virtio_device_instance_finalize (obj=0x555557de5120) + at /home/qemu/include/hw/virtio/virtio.h:71 + #1 0x0000555555b703c9 in object_deinit (type=0x555556639860, + obj=) at ../qom/object.c:671 + #2 object_finalize (data=0x555557de5120) at ../qom/object.c:685 + #3 object_unref (objptr=0x555557de5120) at ../qom/object.c:1184 + #4 0x0000555555b4de9d in bus_free_bus_child (kid=0x555557df0660) + at ../hw/core/qdev.c:55 + #5 0x0000555555c65003 in call_rcu_thread (opaque=opaque@entry=0x0) + at ../util/rcu.c:281 + +Queued by: + + #0 bus_remove_child (bus=0x555557de5098, + child=child@entry=0x555557de5120) at ../hw/core/qdev.c:60 + #1 0x0000555555b4ee31 in device_unparent (obj=) + at ../hw/core/qdev.c:984 + #2 0x0000555555b70465 in object_finalize_child_property ( + obj=, name=, opaque=0x555557de5120) + at ../qom/object.c:1725 + #3 0x0000555555b6fa17 in object_property_del_child ( + child=0x555557de5120, obj=0x555557ddcf90) at ../qom/object.c:645 + #4 object_unparent (obj=0x555557de5120) at ../qom/object.c:664 + #5 0x0000555555b4c071 in bus_unparent (obj=) + at ../hw/core/bus.c:147 + #6 0x0000555555b70465 in object_finalize_child_property ( + obj=, name=, opaque=0x555557de5098) + at ../qom/object.c:1725 + #7 0x0000555555b6fa17 in object_property_del_child ( + child=0x555557de5098, obj=0x555557ddcf90) at ../qom/object.c:645 + #8 object_unparent (obj=0x555557de5098) at ../qom/object.c:664 + #9 0x0000555555b4ee19 in device_unparent (obj=) + at ../hw/core/qdev.c:981 + #10 0x0000555555b70465 in object_finalize_child_property ( + obj=, name=, opaque=0x555557ddcf90) + at ../qom/object.c:1725 + #11 0x0000555555b6fa17 in object_property_del_child ( + child=0x555557ddcf90, obj=0x55555685da10) at ../qom/object.c:645 + #12 object_unparent (obj=0x555557ddcf90) at ../qom/object.c:664 + #13 0x00005555558dc331 in pci_for_each_device_under_bus ( + opaque=, fn=, bus=) + at ../hw/pci/pci.c:1654 + +[2] + +Optimizer omits pci_qdev_unrealize, called by device_set_realized, and +do_pci_unregister_device, called by pci_qdev_unrealize and caller of +address_space_destroy. + + #0 address_space_destroy (as=0x555557ddd1b8) + at ../softmmu/memory.c:2840 + #1 0x0000555555b4fc53 in device_set_realized (obj=0x555557ddcf90, + value=, errp=0x7fffeea8f1e0) + at ../hw/core/qdev.c:850 + #2 0x0000555555b6eaa6 in property_set_bool (obj=0x555557ddcf90, + v=, name=, opaque=0x555556650ba0, + errp=0x7fffeea8f1e0) at ../qom/object.c:2255 + #3 0x0000555555b70e07 in object_property_set ( + obj=obj@entry=0x555557ddcf90, + name=name@entry=0x555555db99df "realized", + v=v@entry=0x7fffe46b7500, + errp=errp@entry=0x5555565bbf38 ) + at ../qom/object.c:1400 + #4 0x0000555555b73c5f in object_property_set_qobject ( + obj=obj@entry=0x555557ddcf90, + name=name@entry=0x555555db99df "realized", + value=value@entry=0x7fffe44f6180, + errp=errp@entry=0x5555565bbf38 ) + at ../qom/qom-qobject.c:28 + #5 0x0000555555b71044 in object_property_set_bool ( + obj=0x555557ddcf90, name=0x555555db99df "realized", + value=, errp=0x5555565bbf38 ) + at ../qom/object.c:1470 + #6 0x0000555555921cb7 in pcie_unplug_device (bus=, + dev=0x555557ddcf90, + opaque=) at /home/qemu/include/hw/qdev-core.h:17 + #7 0x00005555558dc331 in pci_for_each_device_under_bus ( + opaque=, fn=, + bus=) at ../hw/pci/pci.c:1654 + +[3] + + #0 virtio_device_unrealize (dev=0x555557de5120) + at ../hw/virtio/virtio.c:3680 + #1 0x0000555555b4fc63 in device_set_realized (obj=0x555557de5120, + value=, errp=0x7fffee28df90) + at ../hw/core/qdev.c:850 + #2 0x0000555555b6eab6 in property_set_bool (obj=0x555557de5120, + v=, name=, opaque=0x555556650ba0, + errp=0x7fffee28df90) at ../qom/object.c:2255 + #3 0x0000555555b70e17 in object_property_set ( + obj=obj@entry=0x555557de5120, + name=name@entry=0x555555db99ff "realized", + v=v@entry=0x7ffdd8035040, + errp=errp@entry=0x5555565bbf38 ) + at ../qom/object.c:1400 + #4 0x0000555555b73c6f in object_property_set_qobject ( + obj=obj@entry=0x555557de5120, + name=name@entry=0x555555db99ff "realized", + value=value@entry=0x7ffdd8035020, + errp=errp@entry=0x5555565bbf38 ) + at ../qom/qom-qobject.c:28 + #5 0x0000555555b71054 in object_property_set_bool ( + obj=0x555557de5120, name=name@entry=0x555555db99ff "realized", + value=value@entry=false, errp=0x5555565bbf38 ) + at ../qom/object.c:1470 + #6 0x0000555555b4edc5 in qdev_unrealize (dev=) + at ../hw/core/qdev.c:403 + #7 0x0000555555b4c2a9 in bus_set_realized (obj=, + value=, errp=) + at ../hw/core/bus.c:204 + #8 0x0000555555b6eab6 in property_set_bool (obj=0x555557de5098, + v=, name=, opaque=0x555557df04c0, + errp=0x7fffee28e0a0) at ../qom/object.c:2255 + #9 0x0000555555b70e17 in object_property_set ( + obj=obj@entry=0x555557de5098, + name=name@entry=0x555555db99ff "realized", + v=v@entry=0x7ffdd8034f50, + errp=errp@entry=0x5555565bbf38 ) + at ../qom/object.c:1400 + #10 0x0000555555b73c6f in object_property_set_qobject ( + obj=obj@entry=0x555557de5098, + name=name@entry=0x555555db99ff "realized", + value=value@entry=0x7ffdd8020630, + errp=errp@entry=0x5555565bbf38 ) + at ../qom/qom-qobject.c:28 + #11 0x0000555555b71054 in object_property_set_bool ( + obj=obj@entry=0x555557de5098, + name=name@entry=0x555555db99ff "realized", + value=value@entry=false, errp=0x5555565bbf38 ) + at ../qom/object.c:1470 + #12 0x0000555555b4c725 in qbus_unrealize ( + bus=bus@entry=0x555557de5098) at ../hw/core/bus.c:178 + #13 0x0000555555b4fc00 in device_set_realized (obj=0x555557ddcf90, + value=, errp=0x7fffee28e1e0) + at ../hw/core/qdev.c:844 + #14 0x0000555555b6eab6 in property_set_bool (obj=0x555557ddcf90, + v=, name=, opaque=0x555556650ba0, + errp=0x7fffee28e1e0) at ../qom/object.c:2255 + #15 0x0000555555b70e17 in object_property_set ( + obj=obj@entry=0x555557ddcf90, + name=name@entry=0x555555db99ff "realized", + v=v@entry=0x7ffdd8020560, + errp=errp@entry=0x5555565bbf38 ) + at ../qom/object.c:1400 + #16 0x0000555555b73c6f in object_property_set_qobject ( + obj=obj@entry=0x555557ddcf90, + name=name@entry=0x555555db99ff "realized", + value=value@entry=0x7ffdd8020540, + errp=errp@entry=0x5555565bbf38 ) + at ../qom/qom-qobject.c:28 + #17 0x0000555555b71054 in object_property_set_bool ( + obj=0x555557ddcf90, name=0x555555db99ff "realized", + value=, errp=0x5555565bbf38 ) + at ../qom/object.c:1470 + #18 0x0000555555921cb7 in pcie_unplug_device (bus=, + dev=0x555557ddcf90, opaque=) + at /home/qemu/include/hw/qdev-core.h:17 + #19 0x00005555558dc331 in pci_for_each_device_under_bus ( + opaque=, fn=, bus=) + at ../hw/pci/pci.c:1654 + +Fixes: c611c76417f ("virtio: add MemoryListener to cache ring translations") +Buglink: https://bugs.launchpad.net/qemu/+bug/1912846 +Signed-off-by: Eugenio Pérez +Message-Id: <20210125192505.390554-1-eperezma@redhat.com> +Reviewed-by: Peter Xu +Acked-by: Jason Wang +Reviewed-by: Stefano Garzarella +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f6ab64c05f8a6229bf6569d3791c23abb9f6eee4) +Signed-off-by: Eugenio Pérez +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/virtio/virtio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index ceb58fda6c..9312e7191b 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3677,6 +3677,7 @@ static void virtio_device_unrealize(DeviceState *dev) + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); + ++ memory_listener_unregister(&vdev->listener); + virtio_bus_device_unplugged(vdev); + + if (vdc->unrealize != NULL) { +@@ -3707,7 +3708,6 @@ static void virtio_device_instance_finalize(Object *obj) + { + VirtIODevice *vdev = VIRTIO_DEVICE(obj); + +- memory_listener_unregister(&vdev->listener); + virtio_device_free_virtqueues(vdev); + + g_free(vdev->config); +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 0d479b1..15cb799 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -64,7 +64,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 6%{?dist} +Release: 7%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -165,6 +165,23 @@ Patch63: kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch Patch65: kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch # For bz#1902537 - The default fsfreeze-hook path from man page and qemu-ga --help command are different Patch66: kvm-docs-set-CONFDIR-when-running-sphinx.patch +# For bz#1903521 - hot unplug vhost-user cause qemu crash: qemu-kvm: ../softmmu/memory.c:2818: do_address_space_destroy: Assertion `QTAILQ_EMPTY(&as->listeners)' failed. +Patch67: kvm-virtio-Add-corresponding-memory_listener_unregister-.patch +# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" +# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() +Patch68: kvm-block-Honor-blk_set_aio_context-context-requirements.patch +# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" +# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() +Patch69: kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch +# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" +# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() +Patch70: kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch +# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" +# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() +Patch71: kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch +# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" +# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() +Patch72: kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch BuildRequires: wget BuildRequires: rpm-build @@ -1354,6 +1371,21 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Fri Feb 12 2021 Eduardo Lima (Etrunko) - 5.2.0-7.el8 +- kvm-virtio-Add-corresponding-memory_listener_unregister-.patch [bz#1903521] +- kvm-block-Honor-blk_set_aio_context-context-requirements.patch [bz#1918966 bz#1918968] +- kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch [bz#1918966 bz#1918968] +- kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch [bz#1918966 bz#1918968] +- kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch [bz#1918966 bz#1918968] +- kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch [bz#1918966 bz#1918968] +- Resolves: bz#1903521 + (hot unplug vhost-user cause qemu crash: qemu-kvm: ../softmmu/memory.c:2818: do_address_space_destroy: Assertion `QTAILQ_EMPTY(&as->listeners)' failed.) +- Resolves: bz#1918966 + ([incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'") +- Resolves: bz#1918968 + ([incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all()) + + * Fri Feb 12 2021 Miroslav Rezanina - 5.2.0-6.el8 - kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch [bz#1854811] - kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch [bz#1907264] From bf6207fc0a1a7601c20ca85109400339fbbd3ae8 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 8 Mar 2021 12:28:11 +0100 Subject: [PATCH 112/195] Synchronization with qemu-kvm-5.2.0-10.el8 --- ...etach-existing-iochannel-from-aio_co.patch | 125 +++++++ ...nter-connection-coroutine-if-it-s-pr.patch | 124 +++++++ ...efault-configs-Enable-vhost-user-blk.patch | 39 +++ ...orage-daemon-1-manpage-to-meson.buil.patch | 50 +++ ...s-add-qemu-storage-daemon-1-man-page.patch | 218 ++++++++++++ ...mu-storage-daemon-qmp-ref-7-man-page.patch | 111 +++++++ ...of-this-two-functions-already-have-p.patch | 121 +++++++ ...external-partially_hotplugged-proper.patch | 52 +++ kvm-failover-Remove-memory-leak.patch | 60 ++++ kvm-failover-Remove-primary_dev-member.patch | 158 +++++++++ kvm-failover-Remove-primary_device_dict.patch | 96 ++++++ kvm-failover-Remove-primary_device_opts.patch | 110 +++++++ kvm-failover-Remove-unused-parameter.patch | 68 ++++ ...name-bool-to-failover_primary_hidden.patch | 113 +++++++ ...lover-Rename-function-to-hide_device.patch | 127 +++++++ ...name-to-failover_find_primary_device.patch | 77 +++++ ...ays-atomics-for-primary_should_be_hi.patch | 49 +++ ...t-need-to-cache-primary_device_id-an.patch | 128 +++++++ kvm-failover-fix-indentantion.patch | 171 ++++++++++ ...r-g_strcmp0-knows-how-to-handle-NULL.patch | 48 +++ ...lover-make-sure-that-id-always-exist.patch | 68 ++++ ...-bus-is-only-used-once-and-where-it-.patch | 87 +++++ ...ev_device_add-returns-err-or-dev-set.patch | 55 ++++ ...failover_find_primary_device-error-p.patch | 72 ++++ kvm-failover-remove-standby_id-variable.patch | 89 +++++ ...-should_be_hidden-should-take-a-bool.patch | 144 ++++++++ ...ver-simplify-failover_unplug_primary.patch | 86 +++++ ...mplify-qdev_device_add-failover-case.patch | 70 ++++ kvm-failover-simplify-qdev_device_add.patch | 89 +++++ ...ver-simplify-virtio_net_find_primary.patch | 70 ++++ ...plit-failover_find_primary_device_id.patch | 128 +++++++ ...net_connect_failover_devices-does-no.patch | 80 +++++ ...port-for-AMD-EPYC-3rd-generation-pro.patch | 213 ++++++++++++ ...bitmap-Allow-control-of-bitmap-persi.patch | 167 ++++++++++ ...bitmap-Use-struct-for-alias-map-inne.patch | 143 ++++++++ ...bd-make-nbd_read-return-EIO-on-error.patch | 72 ++++ kvm-pci-add-romsize-property.patch | 137 ++++++++ kvm-pci-reject-too-large-ROMs.patch | 89 +++++ ...ink-state-active-if-the-slot-is-empt.patch | 146 ++++++++ ...-Add-test-case-for-modifying-persist.patch | 154 +++++++++ ...-SOMAXCONN-for-socket-listen-backlog.patch | 96 ++++++ ...emu-storage-daemon-Enable-object-add.patch | 49 +++ ...devices-for-exporting-upstream-machi.patch | 135 ++++++++ ...-Adjust-firmware-path-of-PCI-devices.patch | 205 ++++++++++++ ...valid-vdev-in-vhost_backend_handle_i.patch | 75 +++++ ...disabled-flag-property-to-hw_compat_.patch | 87 +++++ kvm-virtio-net-add-missing-object_unref.patch | 66 ++++ ...ofsd-extract-lo_do_open-from-lo_open.patch | 157 +++++++++ ...ally-return-inode-pointer-from-lo_do.patch | 121 +++++++ ...t-opening-of-special-files-CVE-2020-.patch | 311 ++++++++++++++++++ ...-cpu-Populate-SVM-CPUID-feature-bits.patch | 91 +++++ qemu-kvm.spec | 196 ++++++++++- 52 files changed, 5789 insertions(+), 4 deletions(-) create mode 100644 kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch create mode 100644 kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch create mode 100644 kvm-default-configs-Enable-vhost-user-blk.patch create mode 100644 kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch create mode 100644 kvm-docs-add-qemu-storage-daemon-1-man-page.patch create mode 100644 kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch create mode 100644 kvm-failover-Caller-of-this-two-functions-already-have-p.patch create mode 100644 kvm-failover-Remove-external-partially_hotplugged-proper.patch create mode 100644 kvm-failover-Remove-memory-leak.patch create mode 100644 kvm-failover-Remove-primary_dev-member.patch create mode 100644 kvm-failover-Remove-primary_device_dict.patch create mode 100644 kvm-failover-Remove-primary_device_opts.patch create mode 100644 kvm-failover-Remove-unused-parameter.patch create mode 100644 kvm-failover-Rename-bool-to-failover_primary_hidden.patch create mode 100644 kvm-failover-Rename-function-to-hide_device.patch create mode 100644 kvm-failover-Rename-to-failover_find_primary_device.patch create mode 100644 kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch create mode 100644 kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch create mode 100644 kvm-failover-fix-indentantion.patch create mode 100644 kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch create mode 100644 kvm-failover-make-sure-that-id-always-exist.patch create mode 100644 kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch create mode 100644 kvm-failover-qdev_device_add-returns-err-or-dev-set.patch create mode 100644 kvm-failover-remove-failover_find_primary_device-error-p.patch create mode 100644 kvm-failover-remove-standby_id-variable.patch create mode 100644 kvm-failover-should_be_hidden-should-take-a-bool.patch create mode 100644 kvm-failover-simplify-failover_unplug_primary.patch create mode 100644 kvm-failover-simplify-qdev_device_add-failover-case.patch create mode 100644 kvm-failover-simplify-qdev_device_add.patch create mode 100644 kvm-failover-simplify-virtio_net_find_primary.patch create mode 100644 kvm-failover-split-failover_find_primary_device_id.patch create mode 100644 kvm-failover-virtio_net_connect_failover_devices-does-no.patch create mode 100644 kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch create mode 100644 kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch create mode 100644 kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch create mode 100644 kvm-nbd-make-nbd_read-return-EIO-on-error.patch create mode 100644 kvm-pci-add-romsize-property.patch create mode 100644 kvm-pci-reject-too-large-ROMs.patch create mode 100644 kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch create mode 100644 kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch create mode 100644 kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch create mode 100644 kvm-qemu-storage-daemon-Enable-object-add.patch create mode 100644 kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch create mode 100644 kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch create mode 100644 kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch create mode 100644 kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch create mode 100644 kvm-virtio-net-add-missing-object_unref.patch create mode 100644 kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch create mode 100644 kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch create mode 100644 kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch create mode 100644 kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch diff --git a/kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch b/kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch new file mode 100644 index 0000000..96c0d86 --- /dev/null +++ b/kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch @@ -0,0 +1,125 @@ +From 23d161ad92d783275ad56f3acb663f7a21b809f4 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Mon, 8 Feb 2021 22:56:59 -0300 +Subject: [PATCH 01/54] block/nbd: only detach existing iochannel from + aio_context + +RH-Author: Eric Blake +Message-id: <20210208225701.110110-2-eblake@redhat.com> +Patchwork-id: 101005 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v4 1/3] block/nbd: only detach existing iochannel from aio_context +Bugzilla: 1887883 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz + +From: Roman Kagan + +When the reconnect in NBD client is in progress, the iochannel used for +NBD connection doesn't exist. Therefore an attempt to detach it from +the aio_context of the parent BlockDriverState results in a NULL pointer +dereference. + +The problem is triggerable, in particular, when an outgoing migration is +about to finish, and stopping the dataplane tries to move the +BlockDriverState from the iothread aio_context to the main loop. If the +NBD connection is lost before this point, and the NBD client has entered +the reconnect procedure, QEMU crashes: + + #0 qemu_aio_coroutine_enter (ctx=0x5618056c7580, co=0x0) + at /build/qemu-6MF7tq/qemu-5.0.1/util/qemu-coroutine.c:109 + #1 0x00005618034b1b68 in nbd_client_attach_aio_context_bh ( + opaque=0x561805ed4c00) at /build/qemu-6MF7tq/qemu-5.0.1/block/nbd.c:164 + #2 0x000056180353116b in aio_wait_bh (opaque=0x7f60e1e63700) + at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-wait.c:55 + #3 0x0000561803530633 in aio_bh_call (bh=0x7f60d40a7e80) + at /build/qemu-6MF7tq/qemu-5.0.1/util/async.c:136 + #4 aio_bh_poll (ctx=ctx@entry=0x5618056c7580) + at /build/qemu-6MF7tq/qemu-5.0.1/util/async.c:164 + #5 0x0000561803533e5a in aio_poll (ctx=ctx@entry=0x5618056c7580, + blocking=blocking@entry=true) + at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-posix.c:650 + #6 0x000056180353128d in aio_wait_bh_oneshot (ctx=0x5618056c7580, + cb=, opaque=) + at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-wait.c:71 + #7 0x000056180345c50a in bdrv_attach_aio_context (new_context=0x5618056c7580, + bs=0x561805ed4c00) at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6172 + #8 bdrv_set_aio_context_ignore (bs=bs@entry=0x561805ed4c00, + new_context=new_context@entry=0x5618056c7580, + ignore=ignore@entry=0x7f60e1e63780) + at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6237 + #9 0x000056180345c969 in bdrv_child_try_set_aio_context ( + bs=bs@entry=0x561805ed4c00, ctx=0x5618056c7580, + ignore_child=, errp=) + at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6332 + #10 0x00005618034957db in blk_do_set_aio_context (blk=0x56180695b3f0, + new_context=0x5618056c7580, update_root_node=update_root_node@entry=true, + errp=errp@entry=0x0) + at /build/qemu-6MF7tq/qemu-5.0.1/block/block-backend.c:1989 + #11 0x00005618034980bd in blk_set_aio_context (blk=, + new_context=, errp=errp@entry=0x0) + at /build/qemu-6MF7tq/qemu-5.0.1/block/block-backend.c:2010 + #12 0x0000561803197953 in virtio_blk_data_plane_stop (vdev=) + at /build/qemu-6MF7tq/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292 + #13 0x00005618033d67bf in virtio_bus_stop_ioeventfd (bus=0x5618056d9f08) + at /build/qemu-6MF7tq/qemu-5.0.1/hw/virtio/virtio-bus.c:245 + #14 0x00005618031c9b2e in virtio_vmstate_change (opaque=0x5618056d9f90, + running=0, state=) + at /build/qemu-6MF7tq/qemu-5.0.1/hw/virtio/virtio.c:3220 + #15 0x0000561803208bfd in vm_state_notify (running=running@entry=0, + state=state@entry=RUN_STATE_FINISH_MIGRATE) + at /build/qemu-6MF7tq/qemu-5.0.1/softmmu/vl.c:1275 + #16 0x0000561803155c02 in do_vm_stop (state=RUN_STATE_FINISH_MIGRATE, + send_stop=) at /build/qemu-6MF7tq/qemu-5.0.1/cpus.c:1032 + #17 0x00005618033e3765 in migration_completion (s=0x5618056e6960) + at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:2914 + #18 migration_iteration_run (s=0x5618056e6960) + at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:3275 + #19 migration_thread (opaque=opaque@entry=0x5618056e6960) + at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:3439 + #20 0x0000561803536ad6 in qemu_thread_start (args=) + at /build/qemu-6MF7tq/qemu-5.0.1/util/qemu-thread-posix.c:519 + #21 0x00007f61085d06ba in start_thread () + from /lib/x86_64-linux-gnu/libpthread.so.0 + #22 0x00007f610830641d in sysctl () from /lib/x86_64-linux-gnu/libc.so.6 + #23 0x0000000000000000 in ?? () + +Fix it by checking that the iochannel is non-null before trying to +detach it from the aio_context. If it is null, no detaching is needed, +and it will get reattached in the proper aio_context once the connection +is reestablished. + +Signed-off-by: Roman Kagan +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20210129073859.683063-2-rvkagan@yandex-team.ru> +Signed-off-by: Eric Blake +(cherry picked from commit 3b5e4db6734d30e551101c0941b2a6140862ba40) +Signed-off-by: Eric Blake +Signed-off-by: Eduardo Lima (Etrunko) +--- + block/nbd.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/nbd.c b/block/nbd.c +index 42536702b6..ed7b6df10b 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -234,7 +234,14 @@ static void nbd_client_detach_aio_context(BlockDriverState *bs) + + /* Timer is deleted in nbd_client_co_drain_begin() */ + assert(!s->reconnect_delay_timer); +- qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); ++ /* ++ * If reconnect is in progress we may have no ->ioc. It will be ++ * re-instantiated in the proper aio context once the connection is ++ * reestablished. ++ */ ++ if (s->ioc) { ++ qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); ++ } + } + + static void nbd_client_attach_aio_context_bh(void *opaque) +-- +2.27.0 + diff --git a/kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch b/kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch new file mode 100644 index 0000000..6ab629c --- /dev/null +++ b/kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch @@ -0,0 +1,124 @@ +From ed5dbeb52152217fc7fe9023327dbacfac8b2322 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Mon, 8 Feb 2021 22:57:00 -0300 +Subject: [PATCH 02/54] block/nbd: only enter connection coroutine if it's + present + +RH-Author: Eric Blake +Message-id: <20210208225701.110110-3-eblake@redhat.com> +Patchwork-id: 101008 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v4 2/3] block/nbd: only enter connection coroutine if it's present +Bugzilla: 1887883 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz + +From: Roman Kagan + +When an NBD block driver state is moved from one aio_context to another +(e.g. when doing a drain in a migration thread), +nbd_client_attach_aio_context_bh is executed that enters the connection +coroutine. + +However, the assumption that ->connection_co is always present here +appears incorrect: the connection may have encountered an error other +than -EIO in the underlying transport, and thus may have decided to quit +rather than keep trying to reconnect, and therefore it may have +terminated the connection coroutine. As a result an attempt to reassign +the client in this state (NBD_CLIENT_QUIT) to a different aio_context +leads to a null pointer dereference: + + #0 qio_channel_detach_aio_context (ioc=0x0) + at /build/qemu-gYtjVn/qemu-5.0.1/io/channel.c:452 + #1 0x0000562a242824b3 in bdrv_detach_aio_context (bs=0x562a268d6a00) + at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6151 + #2 bdrv_set_aio_context_ignore (bs=bs@entry=0x562a268d6a00, + new_context=new_context@entry=0x562a260c9580, + ignore=ignore@entry=0x7feeadc9b780) + at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6230 + #3 0x0000562a24282969 in bdrv_child_try_set_aio_context + (bs=bs@entry=0x562a268d6a00, ctx=0x562a260c9580, + ignore_child=, errp=) + at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6332 + #4 0x0000562a242bb7db in blk_do_set_aio_context (blk=0x562a2735d0d0, + new_context=0x562a260c9580, + update_root_node=update_root_node@entry=true, errp=errp@entry=0x0) + at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:1989 + #5 0x0000562a242be0bd in blk_set_aio_context (blk=, + new_context=, errp=errp@entry=0x0) + at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:2010 + #6 0x0000562a23fbd953 in virtio_blk_data_plane_stop (vdev=) + at /build/qemu-gYtjVn/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292 + #7 0x0000562a241fc7bf in virtio_bus_stop_ioeventfd (bus=0x562a260dbf08) + at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio-bus.c:245 + #8 0x0000562a23fefb2e in virtio_vmstate_change (opaque=0x562a260dbf90, + running=0, state=) + at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio.c:3220 + #9 0x0000562a2402ebfd in vm_state_notify (running=running@entry=0, + state=state@entry=RUN_STATE_FINISH_MIGRATE) + at /build/qemu-gYtjVn/qemu-5.0.1/softmmu/vl.c:1275 + #10 0x0000562a23f7bc02 in do_vm_stop (state=RUN_STATE_FINISH_MIGRATE, + send_stop=) + at /build/qemu-gYtjVn/qemu-5.0.1/cpus.c:1032 + #11 0x0000562a24209765 in migration_completion (s=0x562a260e83a0) + at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:2914 + #12 migration_iteration_run (s=0x562a260e83a0) + at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3275 + #13 migration_thread (opaque=opaque@entry=0x562a260e83a0) + at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3439 + #14 0x0000562a2435ca96 in qemu_thread_start (args=) + at /build/qemu-gYtjVn/qemu-5.0.1/util/qemu-thread-posix.c:519 + #15 0x00007feed31466ba in start_thread (arg=0x7feeadc9c700) + at pthread_create.c:333 + #16 0x00007feed2e7c41d in __GI___sysctl (name=0x0, nlen=608471908, + oldval=0x562a2452b138, oldlenp=0x0, newval=0x562a2452c5e0 + <__func__.28102>, newlen=0) + at ../sysdeps/unix/sysv/linux/sysctl.c:30 + #17 0x0000000000000000 in ?? () + +Fix it by checking that the connection coroutine is non-null before +trying to enter it. If it is null, no entering is needed, as the +connection is probably going down anyway. + +Signed-off-by: Roman Kagan +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20210129073859.683063-3-rvkagan@yandex-team.ru> +Signed-off-by: Eric Blake +(cherry picked from commit ddde5ee769fcc84b96f879d7b94f35268f69ca3b) +Signed-off-by: Eric Blake +Signed-off-by: Eduardo Lima (Etrunko) +--- + block/nbd.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/block/nbd.c b/block/nbd.c +index ed7b6df10b..1bdba9fc49 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -249,13 +249,15 @@ static void nbd_client_attach_aio_context_bh(void *opaque) + BlockDriverState *bs = opaque; + BDRVNBDState *s = (BDRVNBDState *)bs->opaque; + +- /* +- * The node is still drained, so we know the coroutine has yielded in +- * nbd_read_eof(), the only place where bs->in_flight can reach 0, or it is +- * entered for the first time. Both places are safe for entering the +- * coroutine. +- */ +- qemu_aio_coroutine_enter(bs->aio_context, s->connection_co); ++ if (s->connection_co) { ++ /* ++ * The node is still drained, so we know the coroutine has yielded in ++ * nbd_read_eof(), the only place where bs->in_flight can reach 0, or ++ * it is entered for the first time. Both places are safe for entering ++ * the coroutine. ++ */ ++ qemu_aio_coroutine_enter(bs->aio_context, s->connection_co); ++ } + bdrv_dec_in_flight(bs); + } + +-- +2.27.0 + diff --git a/kvm-default-configs-Enable-vhost-user-blk.patch b/kvm-default-configs-Enable-vhost-user-blk.patch new file mode 100644 index 0000000..b56f834 --- /dev/null +++ b/kvm-default-configs-Enable-vhost-user-blk.patch @@ -0,0 +1,39 @@ +From 5aadfd88a3438cee837d2e7e96fa0801d885d119 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 19 Feb 2021 16:14:09 -0500 +Subject: [PATCH 18/54] default-configs: Enable vhost-user-blk + +RH-Author: Kevin Wolf +Message-id: <20210219161409.53788-2-kwolf@redhat.com> +Patchwork-id: 101166 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] default-configs: Enable vhost-user-blk +Bugzilla: 1930033 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Max Reitz + +Now that we have qemu-storage-daemon with a vhost-user-blk export, +we want to be able to use that in guests. So enable vhost-user-blk in +our build configuration. + +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + default-configs/devices/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak +index e80877d4e2..993e2c1d2f 100644 +--- a/default-configs/devices/x86_64-rh-devices.mak ++++ b/default-configs/devices/x86_64-rh-devices.mak +@@ -87,6 +87,7 @@ CONFIG_VGA=y + CONFIG_VGA_CIRRUS=y + CONFIG_VGA_PCI=y + CONFIG_VHOST_USER=y ++CONFIG_VHOST_USER_BLK=y + CONFIG_VIRTIO_PCI=y + CONFIG_VIRTIO_VGA=y + CONFIG_VMMOUSE=y +-- +2.27.0 + diff --git a/kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch b/kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch new file mode 100644 index 0000000..b70409f --- /dev/null +++ b/kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch @@ -0,0 +1,50 @@ +From b3dbe8179b0f73d09bb90cbf92e991a187ef3534 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 16 Feb 2021 16:19:41 -0500 +Subject: [PATCH 15/54] docs: Add qemu-storage-daemon(1) manpage to meson.build +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +Message-id: <20210216161943.126728-4-kwolf@redhat.com> +Patchwork-id: 101104 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/5] docs: Add qemu-storage-daemon(1) manpage to meson.build +Bugzilla: 1901323 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +From: Peter Maydell + +In commit 1982e1602d15 we added a new qemu-storage-daemon(1) manpage. +At the moment new manpages have to be listed both in the conf.py for +Sphinx and also in docs/meson.build for Meson. We forgot the second +of those -- correct the omission. + +Signed-off-by: Peter Maydell +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Alex Bennée +Message-id: 20210108161416.21129-2-peter.maydell@linaro.org +(cherry picked from commit fa56cf7e86f99d5557a4fb730e375777b89d8b50) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + docs/meson.build | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/docs/meson.build b/docs/meson.build +index 71641b4fe0..fae9849b79 100644 +--- a/docs/meson.build ++++ b/docs/meson.build +@@ -62,6 +62,7 @@ if build_docs + 'qemu-img.1': (have_tools ? 'man1' : ''), + 'qemu-nbd.8': (have_tools ? 'man8' : ''), + 'qemu-pr-helper.8': (have_tools ? 'man8' : ''), ++ 'qemu-storage-daemon.1': (have_tools ? 'man1' : ''), + 'qemu-trace-stap.1': (config_host.has_key('CONFIG_TRACE_SYSTEMTAP') ? 'man1' : ''), + 'virtfs-proxy-helper.1': (have_virtfs_proxy_helper ? 'man1' : ''), + 'virtiofsd.1': (have_virtiofsd ? 'man1' : ''), +-- +2.27.0 + diff --git a/kvm-docs-add-qemu-storage-daemon-1-man-page.patch b/kvm-docs-add-qemu-storage-daemon-1-man-page.patch new file mode 100644 index 0000000..f0cbc3f --- /dev/null +++ b/kvm-docs-add-qemu-storage-daemon-1-man-page.patch @@ -0,0 +1,218 @@ +From f3831252e618e420ea24e53dbdee8eb51e8cad3e Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 16 Feb 2021 16:19:40 -0500 +Subject: [PATCH 14/54] docs: add qemu-storage-daemon(1) man page + +RH-Author: Kevin Wolf +Message-id: <20210216161943.126728-3-kwolf@redhat.com> +Patchwork-id: 101102 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/5] docs: add qemu-storage-daemon(1) man page +Bugzilla: 1901323 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +From: Stefan Hajnoczi + +Document the qemu-storage-daemon tool. Most of the command-line options +are identical to their QEMU counterparts. Perhaps Sphinx hxtool +integration could be extended to extract documentation for individual +command-line options so they can be shared. For now the +qemu-storage-daemon simply refers to the qemu(1) man page where the +command-line options are identical. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20201209103802.350848-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1982e1602d15313cd82f225e821c37733ece3404) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + docs/tools/conf.py | 2 + + docs/tools/index.rst | 1 + + docs/tools/qemu-storage-daemon.rst | 148 +++++++++++++++++++++++++++++ + 3 files changed, 151 insertions(+) + create mode 100644 docs/tools/qemu-storage-daemon.rst + +diff --git a/docs/tools/conf.py b/docs/tools/conf.py +index 4760d36ff2..7072d99324 100644 +--- a/docs/tools/conf.py ++++ b/docs/tools/conf.py +@@ -20,6 +20,8 @@ html_theme_options['description'] = \ + man_pages = [ + ('qemu-img', 'qemu-img', u'QEMU disk image utility', + ['Fabrice Bellard'], 1), ++ ('qemu-storage-daemon', 'qemu-storage-daemon', u'QEMU storage daemon', ++ [], 1), + ('qemu-nbd', 'qemu-nbd', u'QEMU Disk Network Block Device Server', + ['Anthony Liguori '], 8), + ('qemu-pr-helper', 'qemu-pr-helper', 'QEMU persistent reservation helper', +diff --git a/docs/tools/index.rst b/docs/tools/index.rst +index b99f86c7c6..3a5829c17a 100644 +--- a/docs/tools/index.rst ++++ b/docs/tools/index.rst +@@ -11,6 +11,7 @@ Contents: + :maxdepth: 2 + + qemu-img ++ qemu-storage-daemon + qemu-nbd + qemu-pr-helper + qemu-trace-stap +diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst +new file mode 100644 +index 0000000000..f63627eaf6 +--- /dev/null ++++ b/docs/tools/qemu-storage-daemon.rst +@@ -0,0 +1,148 @@ ++QEMU Storage Daemon ++=================== ++ ++Synopsis ++-------- ++ ++**qemu-storage-daemon** [options] ++ ++Description ++----------- ++ ++qemu-storage-daemon provides disk image functionality from QEMU, qemu-img, and ++qemu-nbd in a long-running process controlled via QMP commands without running ++a virtual machine. It can export disk images, run block job operations, and ++perform other disk-related operations. The daemon is controlled via a QMP ++monitor and initial configuration from the command-line. ++ ++The daemon offers the following subset of QEMU features: ++ ++* Block nodes ++* Block jobs ++* Block exports ++* Throttle groups ++* Character devices ++* Crypto and secrets ++* QMP ++* IOThreads ++ ++Commands can be sent over a QEMU Monitor Protocol (QMP) connection. See the ++:manpage:`qemu-storage-daemon-qmp-ref(7)` manual page for a description of the ++commands. ++ ++The daemon runs until it is stopped using the ``quit`` QMP command or ++SIGINT/SIGHUP/SIGTERM. ++ ++**Warning:** Never modify images in use by a running virtual machine or any ++other process; this may destroy the image. Also, be aware that querying an ++image that is being modified by another process may encounter inconsistent ++state. ++ ++Options ++------- ++ ++.. program:: qemu-storage-daemon ++ ++Standard options: ++ ++.. option:: -h, --help ++ ++ Display help and exit ++ ++.. option:: -V, --version ++ ++ Display version information and exit ++ ++.. option:: -T, --trace [[enable=]PATTERN][,events=FILE][,file=FILE] ++ ++ .. include:: ../qemu-option-trace.rst.inc ++ ++.. option:: --blockdev BLOCKDEVDEF ++ ++ is a block node definition. See the :manpage:`qemu(1)` manual page for a ++ description of block node properties and the :manpage:`qemu-block-drivers(7)` ++ manual page for a description of driver-specific parameters. ++ ++.. option:: --chardev CHARDEVDEF ++ ++ is a character device definition. See the :manpage:`qemu(1)` manual page for ++ a description of character device properties. A common character device ++ definition configures a UNIX domain socket:: ++ ++ --chardev socket,id=char1,path=/tmp/qmp.sock,server,nowait ++ ++.. option:: --export [type=]nbd,id=,node-name=[,name=][,writable=on|off][,bitmap=] ++ --export [type=]vhost-user-blk,id=,node-name=,addr.type=unix,addr.path=[,writable=on|off][,logical-block-size=][,num-queues=] ++ --export [type=]vhost-user-blk,id=,node-name=,addr.type=fd,addr.str=[,writable=on|off][,logical-block-size=][,num-queues=] ++ ++ is a block export definition. ``node-name`` is the block node that should be ++ exported. ``writable`` determines whether or not the export allows write ++ requests for modifying data (the default is off). ++ ++ The ``nbd`` export type requires ``--nbd-server`` (see below). ``name`` is ++ the NBD export name. ``bitmap`` is the name of a dirty bitmap reachable from ++ the block node, so the NBD client can use NBD_OPT_SET_META_CONTEXT with the ++ metadata context name "qemu:dirty-bitmap:BITMAP" to inspect the bitmap. ++ ++ The ``vhost-user-blk`` export type takes a vhost-user socket address on which ++ it accept incoming connections. Both ++ ``addr.type=unix,addr.path=`` for UNIX domain sockets and ++ ``addr.type=fd,addr.str=`` for file descriptor passing are supported. ++ ``logical-block-size`` sets the logical block size in bytes (the default is ++ 512). ``num-queues`` sets the number of virtqueues (the default is 1). ++ ++.. option:: --monitor MONITORDEF ++ ++ is a QMP monitor definition. See the :manpage:`qemu(1)` manual page for ++ a description of QMP monitor properties. A common QMP monitor definition ++ configures a monitor on character device ``char1``:: ++ ++ --monitor chardev=char1 ++ ++.. option:: --nbd-server addr.type=inet,addr.host=,addr.port=[,tls-creds=][,tls-authz=][,max-connections=] ++ --nbd-server addr.type=unix,addr.path=[,tls-creds=][,tls-authz=][,max-connections=] ++ ++ is a server for NBD exports. Both TCP and UNIX domain sockets are supported. ++ TLS encryption can be configured using ``--object`` tls-creds-* and authz-* ++ secrets (see below). ++ ++ To configure an NBD server on UNIX domain socket path ``/tmp/nbd.sock``:: ++ ++ --nbd-server addr.type=unix,addr.path=/tmp/nbd.sock ++ ++.. option:: --object help ++ --object ,help ++ --object [,=...] ++ ++ is a QEMU user creatable object definition. List object types with ``help``. ++ List object properties with ``,help``. See the :manpage:`qemu(1)` ++ manual page for a description of the object properties. ++ ++Examples ++-------- ++Launch the daemon with QMP monitor socket ``qmp.sock`` so clients can execute ++QMP commands:: ++ ++ $ qemu-storage-daemon \ ++ --chardev socket,path=qmp.sock,server,nowait,id=char1 \ ++ --monitor chardev=char1 ++ ++Export raw image file ``disk.img`` over NBD UNIX domain socket ``nbd.sock``:: ++ ++ $ qemu-storage-daemon \ ++ --blockdev driver=file,node-name=disk,filename=disk.img \ ++ --nbd-server addr.type=unix,addr.path=nbd.sock \ ++ --export type=nbd,id=export,node-name=disk,writable=on ++ ++Export a qcow2 image file ``disk.qcow2`` as a vhosts-user-blk device over UNIX ++domain socket ``vhost-user-blk.sock``:: ++ ++ $ qemu-storage-daemon \ ++ --blockdev driver=file,node-name=file,filename=disk.qcow2 \ ++ --blockdev driver=qcow2,node-name=qcow2,file=file \ ++ --export type=vhost-user-blk,id=export,addr.type=unix,addr.path=vhost-user-blk.sock,node-name=qcow2 ++ ++See also ++-------- ++ ++:manpage:`qemu(1)`, :manpage:`qemu-block-drivers(7)`, :manpage:`qemu-storage-daemon-qmp-ref(7)` +-- +2.27.0 + diff --git a/kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch b/kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch new file mode 100644 index 0000000..fbc0235 --- /dev/null +++ b/kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch @@ -0,0 +1,111 @@ +From 5425716387734e0a782ac633021cd85eb4d4b914 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 16 Feb 2021 16:19:39 -0500 +Subject: [PATCH 13/54] docs: generate qemu-storage-daemon-qmp-ref(7) man page + +RH-Author: Kevin Wolf +Message-id: <20210216161943.126728-2-kwolf@redhat.com> +Patchwork-id: 101101 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/5] docs: generate qemu-storage-daemon-qmp-ref(7) man page +Bugzilla: 1901323 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +From: Stefan Hajnoczi + +Although individual qemu-storage-daemon QMP commands are identical to +QEMU QMP commands, qemu-storage-daemon only supports a subset of QEMU's +QMP commands. Generate a manual page of just the commands supported by +qemu-storage-daemon so that users know exactly what is available in +qemu-storage-daemon. + +Add an h1 heading in storage-daemon/qapi/qapi-schema.json so that +block-core.json is at the h2 heading level. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20201209103802.350848-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 23c02ace3508dba5f781ed9ecfde400e462f3a37) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + docs/interop/conf.py | 2 ++ + docs/interop/index.rst | 1 + + docs/interop/qemu-storage-daemon-qmp-ref.rst | 13 +++++++++++++ + docs/meson.build | 1 + + storage-daemon/qapi/qapi-schema.json | 3 +++ + 5 files changed, 20 insertions(+) + create mode 100644 docs/interop/qemu-storage-daemon-qmp-ref.rst + +diff --git a/docs/interop/conf.py b/docs/interop/conf.py +index 2634ca3410..f4370aaa13 100644 +--- a/docs/interop/conf.py ++++ b/docs/interop/conf.py +@@ -23,4 +23,6 @@ man_pages = [ + [], 7), + ('qemu-qmp-ref', 'qemu-qmp-ref', 'QEMU QMP Reference Manual', + [], 7), ++ ('qemu-storage-daemon-qmp-ref', 'qemu-storage-daemon-qmp-ref', ++ 'QEMU Storage Daemon QMP Reference Manual', [], 7), + ] +diff --git a/docs/interop/index.rst b/docs/interop/index.rst +index cd78d679d8..95d56495f6 100644 +--- a/docs/interop/index.rst ++++ b/docs/interop/index.rst +@@ -20,6 +20,7 @@ Contents: + qemu-ga + qemu-ga-ref + qemu-qmp-ref ++ qemu-storage-daemon-qmp-ref + vhost-user + vhost-user-gpu + vhost-vdpa +diff --git a/docs/interop/qemu-storage-daemon-qmp-ref.rst b/docs/interop/qemu-storage-daemon-qmp-ref.rst +new file mode 100644 +index 0000000000..caf9dad23a +--- /dev/null ++++ b/docs/interop/qemu-storage-daemon-qmp-ref.rst +@@ -0,0 +1,13 @@ ++QEMU Storage Daemon QMP Reference Manual ++======================================== ++ ++.. ++ TODO: the old Texinfo manual used to note that this manual ++ is GPL-v2-or-later. We should make that reader-visible ++ both here and in our Sphinx manuals more generally. ++ ++.. ++ TODO: display the QEMU version, both here and in our Sphinx manuals ++ more generally. ++ ++.. qapi-doc:: storage-daemon/qapi/qapi-schema.json +diff --git a/docs/meson.build b/docs/meson.build +index bb8fe4c9e4..71641b4fe0 100644 +--- a/docs/meson.build ++++ b/docs/meson.build +@@ -56,6 +56,7 @@ if build_docs + 'qemu-ga.8': (have_tools ? 'man8' : ''), + 'qemu-ga-ref.7': 'man7', + 'qemu-qmp-ref.7': 'man7', ++ 'qemu-storage-daemon-qmp-ref.7': (have_tools ? 'man7' : ''), + }, + 'tools': { + 'qemu-img.1': (have_tools ? 'man1' : ''), +diff --git a/storage-daemon/qapi/qapi-schema.json b/storage-daemon/qapi/qapi-schema.json +index c6ad5ae1e3..28117c3aac 100644 +--- a/storage-daemon/qapi/qapi-schema.json ++++ b/storage-daemon/qapi/qapi-schema.json +@@ -15,6 +15,9 @@ + + { 'include': '../../qapi/pragma.json' } + ++## ++# = Block devices ++## + { 'include': '../../qapi/block-core.json' } + { 'include': '../../qapi/block-export.json' } + { 'include': '../../qapi/char.json' } +-- +2.27.0 + diff --git a/kvm-failover-Caller-of-this-two-functions-already-have-p.patch b/kvm-failover-Caller-of-this-two-functions-already-have-p.patch new file mode 100644 index 0000000..de38731 --- /dev/null +++ b/kvm-failover-Caller-of-this-two-functions-already-have-p.patch @@ -0,0 +1,121 @@ +From 4f94bc7cc479dba60fba841608b3da74b940a26d Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:44 -0500 +Subject: [PATCH 47/54] failover: Caller of this two functions already have + primary_dev + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-25-lvivier@redhat.com> +Patchwork-id: 101246 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 24/27] failover: Caller of this two functions already have primary_dev +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Pass it as an argument. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-26-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 0e9a65c5b168b993b845ec2acb2568328c2353da) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 746ed3fb71..b37e9cd1d9 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3093,17 +3093,17 @@ void virtio_net_set_netclient_name(VirtIONet *n, const char *name, + n->netclient_type = g_strdup(type); + } + +-static bool failover_unplug_primary(VirtIONet *n) ++static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) + { + HotplugHandler *hotplug_ctrl; + PCIDevice *pci_dev; + Error *err = NULL; + +- hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev); ++ hotplug_ctrl = qdev_get_hotplug_handler(dev); + if (hotplug_ctrl) { +- pci_dev = PCI_DEVICE(n->primary_dev); ++ pci_dev = PCI_DEVICE(dev); + pci_dev->partially_hotplugged = true; +- hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err); ++ hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); + if (err) { + error_report_err(err); + return false; +@@ -3114,30 +3114,31 @@ static bool failover_unplug_primary(VirtIONet *n) + return true; + } + +-static bool failover_replug_primary(VirtIONet *n, Error **errp) ++static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, ++ Error **errp) + { + Error *err = NULL; + HotplugHandler *hotplug_ctrl; +- PCIDevice *pdev = PCI_DEVICE(n->primary_dev); ++ PCIDevice *pdev = PCI_DEVICE(dev); + BusState *primary_bus; + + if (!pdev->partially_hotplugged) { + return true; + } +- primary_bus = n->primary_dev->parent_bus; ++ primary_bus = dev->parent_bus; + if (!primary_bus) { + error_setg(errp, "virtio_net: couldn't find primary bus"); + return false; + } +- qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort); ++ qdev_set_parent_bus(dev, primary_bus, &error_abort); + qatomic_set(&n->failover_primary_hidden, false); +- hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev); ++ hotplug_ctrl = qdev_get_hotplug_handler(dev); + if (hotplug_ctrl) { +- hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err); ++ hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); + if (err) { + goto out; + } +- hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err); ++ hotplug_handler_plug(hotplug_ctrl, dev, &err); + } + + out: +@@ -3161,7 +3162,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + } + + if (migration_in_setup(s) && !should_be_hidden) { +- if (failover_unplug_primary(n)) { ++ if (failover_unplug_primary(n, n->primary_dev)) { + vmstate_unregister(VMSTATE_IF(n->primary_dev), + qdev_get_vmsd(n->primary_dev), + n->primary_dev); +@@ -3172,7 +3173,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + } + } else if (migration_has_failed(s)) { + /* We already unplugged the device let's plug it back */ +- if (!failover_replug_primary(n, &err)) { ++ if (!failover_replug_primary(n, n->primary_dev, &err)) { + if (err) { + error_report_err(err); + } +-- +2.27.0 + diff --git a/kvm-failover-Remove-external-partially_hotplugged-proper.patch b/kvm-failover-Remove-external-partially_hotplugged-proper.patch new file mode 100644 index 0000000..833b268 --- /dev/null +++ b/kvm-failover-Remove-external-partially_hotplugged-proper.patch @@ -0,0 +1,52 @@ +From 03ad2d1426775c5c993f59512932c4bbf62206c1 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:25 -0500 +Subject: [PATCH 28/54] failover: Remove external partially_hotplugged property + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-6-lvivier@redhat.com> +Patchwork-id: 101251 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 05/27] failover: Remove external partially_hotplugged property +Bugzilla: 1819991 +RH-Acked-by: Jens Freimann +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +It was only set "once", and with the wrong value. As far as I can see, +libvirt still don't use it. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-7-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 594d308b9314b446ed2ccc42de7b4d57ba1b7118) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 3f658d6246..6ca85627d8 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3135,10 +3135,6 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) + } + qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort); + qatomic_set(&n->primary_should_be_hidden, false); +- if (!qemu_opt_set_bool(n->primary_device_opts, +- "partially_hotplugged", true, errp)) { +- return false; +- } + hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev); + if (hotplug_ctrl) { + hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err); +-- +2.27.0 + diff --git a/kvm-failover-Remove-memory-leak.patch b/kvm-failover-Remove-memory-leak.patch new file mode 100644 index 0000000..c6d6701 --- /dev/null +++ b/kvm-failover-Remove-memory-leak.patch @@ -0,0 +1,60 @@ +From e9380df03375e871de088ad5aee8fd19d6ad3794 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:32 -0500 +Subject: [PATCH 35/54] failover: Remove memory leak + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-13-lvivier@redhat.com> +Patchwork-id: 101261 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 12/27] failover: Remove memory leak +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Two things, at this point: + +* n->primary_device_id has to be set, otherwise + virtio_net_find_primary don't work. So we have a leak here. + +* it has to be exactly the same that prim_dev->id because what + qdev_find_recursive() does is just compare this two values. + +So remove the unneeded assignment and leaky bits. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-14-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7b3dc2f8c0b817bbe78ba347130b3c99fe2c4470) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 6e5a56a230..70fa372c08 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -854,9 +854,7 @@ static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) + Error *err = NULL; + + prim_dev = virtio_net_find_primary(n, &err); +- if (prim_dev) { +- n->primary_device_id = g_strdup(prim_dev->id); +- } else { ++ if (!prim_dev) { + error_propagate(errp, err); + } + +-- +2.27.0 + diff --git a/kvm-failover-Remove-primary_dev-member.patch b/kvm-failover-Remove-primary_dev-member.patch new file mode 100644 index 0000000..aff570e --- /dev/null +++ b/kvm-failover-Remove-primary_dev-member.patch @@ -0,0 +1,158 @@ +From 52dce3568320900c79e34eb2093058e5c3f60aa9 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:46 -0500 +Subject: [PATCH 49/54] failover: Remove primary_dev member + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-27-lvivier@redhat.com> +Patchwork-id: 101250 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 26/27] failover: Remove primary_dev member +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Only three uses remained, and we can remove them on that case. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-28-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 21e8709b29cd981c74565e75276ed476c954cbbf) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 55 +++++++++++++++------------------- + include/hw/virtio/virtio-net.h | 1 - + 2 files changed, 24 insertions(+), 32 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 9203d81780..044ac95f6f 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -832,13 +832,31 @@ static char *failover_find_primary_device_id(VirtIONet *n) + return fid.id; + } + ++/** ++ * Find the primary device for this failover virtio-net ++ * ++ * @n: VirtIONet device ++ * @errp: returns an error if this function fails ++ */ ++static DeviceState *failover_find_primary_device(VirtIONet *n) ++{ ++ char *id = failover_find_primary_device_id(n); ++ ++ if (!id) { ++ return NULL; ++ } ++ ++ return qdev_find_recursive(sysbus_get_default(), id); ++} ++ + static void failover_add_primary(VirtIONet *n, Error **errp) + { + Error *err = NULL; + QemuOpts *opts; + char *id; ++ DeviceState *dev = failover_find_primary_device(n); + +- if (n->primary_dev) { ++ if (dev) { + return; + } + +@@ -848,7 +866,7 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + } + opts = qemu_opts_find(qemu_find_opts("device"), id); + if (opts) { +- n->primary_dev = qdev_device_add(opts, &err); ++ dev = qdev_device_add(opts, &err); + if (err) { + qemu_opts_del(opts); + } +@@ -861,23 +879,6 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + error_propagate(errp, err); + } + +-/** +- * Find the primary device for this failover virtio-net +- * +- * @n: VirtIONet device +- * @errp: returns an error if this function fails +- */ +-static DeviceState *failover_find_primary_device(VirtIONet *n) +-{ +- char *id = failover_find_primary_device_id(n); +- +- if (!id) { +- return NULL; +- } +- +- return qdev_find_recursive(sysbus_get_default(), id); +-} +- + static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + { + VirtIONet *n = VIRTIO_NET(vdev); +@@ -933,19 +934,9 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + qatomic_set(&n->failover_primary_hidden, false); + failover_add_primary(n, &err); + if (err) { +- n->primary_dev = failover_find_primary_device(n); +- failover_add_primary(n, &err); +- if (err) { +- goto out_err; +- } ++ warn_report_err(err); + } + } +- return; +- +-out_err: +- if (err) { +- warn_report_err(err); +- } + } + + static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, +@@ -3420,13 +3411,15 @@ static int virtio_net_pre_save(void *opaque) + static bool primary_unplug_pending(void *opaque) + { + DeviceState *dev = opaque; ++ DeviceState *primary; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIONet *n = VIRTIO_NET(vdev); + + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { + return false; + } +- return n->primary_dev ? n->primary_dev->pending_deleted_event : false; ++ primary = failover_find_primary_device(n); ++ return primary ? primary->pending_deleted_event : false; + } + + static bool dev_unplug_pending(void *opaque) +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index efef64e02f..7e96d193aa 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -202,7 +202,6 @@ struct VirtIONet { + AnnounceTimer announce_timer; + bool needs_vnet_hdr_swap; + bool mtu_bypass_backend; +- DeviceState *primary_dev; + /* primary failover device is hidden*/ + bool failover_primary_hidden; + bool failover; +-- +2.27.0 + diff --git a/kvm-failover-Remove-primary_device_dict.patch b/kvm-failover-Remove-primary_device_dict.patch new file mode 100644 index 0000000..74c9aa6 --- /dev/null +++ b/kvm-failover-Remove-primary_device_dict.patch @@ -0,0 +1,96 @@ +From 277c3c13377cc7f41d4121fdce918df3005fc063 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:31 -0500 +Subject: [PATCH 34/54] failover: Remove primary_device_dict + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-12-lvivier@redhat.com> +Patchwork-id: 101262 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 11/27] failover: Remove primary_device_dict +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +It was only used once. And we have there opts->id, so no need for it. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-13-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9673a88e97d1eb428872bd261dbf56a0f3c2fd71) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 17 ++++------------- + include/hw/virtio/virtio-net.h | 1 - + 2 files changed, 4 insertions(+), 14 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 953d5c2bc8..6e5a56a230 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3186,28 +3186,21 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, + if (!device_opts) { + return -1; + } +- n->primary_device_dict = qemu_opts_to_qdict(device_opts, +- n->primary_device_dict); + standby_id = qemu_opt_get(device_opts, "failover_pair_id"); + if (g_strcmp0(standby_id, n->netclient_name) == 0) { + match_found = true; + } else { + match_found = false; + hide = false; +- n->primary_device_dict = NULL; + goto out; + } + + /* failover_primary_hidden is set during feature negotiation */ + hide = qatomic_read(&n->failover_primary_hidden); +- +- if (n->primary_device_dict) { +- g_free(n->primary_device_id); +- n->primary_device_id = g_strdup(qdict_get_try_str( +- n->primary_device_dict, "id")); +- if (!n->primary_device_id) { +- warn_report("primary_device_id not set"); +- } ++ g_free(n->primary_device_id); ++ n->primary_device_id = g_strdup(device_opts->id); ++ if (!n->primary_device_id) { ++ warn_report("primary_device_id not set"); + } + + out: +@@ -3396,8 +3389,6 @@ static void virtio_net_device_unrealize(DeviceState *dev) + if (n->failover) { + device_listener_unregister(&n->primary_listener); + g_free(n->primary_device_id); +- qobject_unref(n->primary_device_dict); +- n->primary_device_dict = NULL; + } + + max_queues = n->multiqueue ? n->max_queues : 1; +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index a055f39dd6..fe353d8299 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -202,7 +202,6 @@ struct VirtIONet { + AnnounceTimer announce_timer; + bool needs_vnet_hdr_swap; + bool mtu_bypass_backend; +- QDict *primary_device_dict; + DeviceState *primary_dev; + char *primary_device_id; + /* primary failover device is hidden*/ +-- +2.27.0 + diff --git a/kvm-failover-Remove-primary_device_opts.patch b/kvm-failover-Remove-primary_device_opts.patch new file mode 100644 index 0000000..e57fb51 --- /dev/null +++ b/kvm-failover-Remove-primary_device_opts.patch @@ -0,0 +1,110 @@ +From ec36f213983c0ea89fe8db8b44d1105df0bd3dc2 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:29 -0500 +Subject: [PATCH 32/54] failover: Remove primary_device_opts + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-10-lvivier@redhat.com> +Patchwork-id: 101259 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 09/27] failover: Remove primary_device_opts +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +It was really only used once, in failover_add_primary(). Just search +for it on global opts when it is needed. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-11-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 19e49bc2e984bd065719fc3595f35368b3ae87cd) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 21 +++++---------------- + include/hw/virtio/virtio-net.h | 1 - + 2 files changed, 5 insertions(+), 17 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index e334f05352..2a99b0e0f6 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -791,17 +791,17 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) + static void failover_add_primary(VirtIONet *n, Error **errp) + { + Error *err = NULL; ++ QemuOpts *opts; + + if (n->primary_dev) { + return; + } + +- n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"), +- n->primary_device_id); +- if (n->primary_device_opts) { +- n->primary_dev = qdev_device_add(n->primary_device_opts, &err); ++ opts = qemu_opts_find(qemu_find_opts("device"), n->primary_device_id); ++ if (opts) { ++ n->primary_dev = qdev_device_add(opts, &err); + if (err) { +- qemu_opts_del(n->primary_device_opts); ++ qemu_opts_del(opts); + } + } else { + error_setg(errp, "Primary device not found"); +@@ -856,7 +856,6 @@ static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) + prim_dev = virtio_net_find_primary(n, &err); + if (prim_dev) { + n->primary_device_id = g_strdup(prim_dev->id); +- n->primary_device_opts = prim_dev->opts; + } else { + error_propagate(errp, err); + } +@@ -3113,14 +3112,6 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) + if (!pdev->partially_hotplugged) { + return true; + } +- if (!n->primary_device_opts) { +- n->primary_device_opts = qemu_opts_from_qdict(qemu_find_opts("device"), +- n->primary_device_dict, +- errp); +- if (!n->primary_device_opts) { +- return false; +- } +- } + primary_bus = n->primary_dev->parent_bus; + if (!primary_bus) { + error_setg(errp, "virtio_net: couldn't find primary bus"); +@@ -3211,8 +3202,6 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, + goto out; + } + +- n->primary_device_opts = device_opts; +- + /* failover_primary_hidden is set during feature negotiation */ + hide = qatomic_read(&n->failover_primary_hidden); + +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index ca68be759f..7159e6c0a0 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -202,7 +202,6 @@ struct VirtIONet { + AnnounceTimer announce_timer; + bool needs_vnet_hdr_swap; + bool mtu_bypass_backend; +- QemuOpts *primary_device_opts; + QDict *primary_device_dict; + DeviceState *primary_dev; + char *primary_device_id; +-- +2.27.0 + diff --git a/kvm-failover-Remove-unused-parameter.patch b/kvm-failover-Remove-unused-parameter.patch new file mode 100644 index 0000000..b95c033 --- /dev/null +++ b/kvm-failover-Remove-unused-parameter.patch @@ -0,0 +1,68 @@ +From 69ba4fc743b29e9e3f595c1e96596204abc1aa0e Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:24 -0500 +Subject: [PATCH 27/54] failover: Remove unused parameter + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-5-lvivier@redhat.com> +Patchwork-id: 101243 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 04/27] failover: Remove unused parameter +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Jens Freimann +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-6-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 82ceb65799855efb0db965a6ef86d81ae1c8bcd7) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 786d313330..3f658d6246 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -855,9 +855,7 @@ static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) + return dev; + } + +-static DeviceState *virtio_connect_failover_devices(VirtIONet *n, +- DeviceState *dev, +- Error **errp) ++static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) + { + DeviceState *prim_dev = NULL; + Error *err = NULL; +@@ -928,7 +926,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + qatomic_set(&n->primary_should_be_hidden, false); + failover_add_primary(n, &err); + if (err) { +- n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err); ++ n->primary_dev = virtio_connect_failover_devices(n, &err); + if (err) { + goto out_err; + } +@@ -3164,7 +3162,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + should_be_hidden = qatomic_read(&n->primary_should_be_hidden); + + if (!n->primary_dev) { +- n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err); ++ n->primary_dev = virtio_connect_failover_devices(n, &err); + if (!n->primary_dev) { + return; + } +-- +2.27.0 + diff --git a/kvm-failover-Rename-bool-to-failover_primary_hidden.patch b/kvm-failover-Rename-bool-to-failover_primary_hidden.patch new file mode 100644 index 0000000..f2c3ff1 --- /dev/null +++ b/kvm-failover-Rename-bool-to-failover_primary_hidden.patch @@ -0,0 +1,113 @@ +From 6d228bc32fa1e6c9619dc99dc10bfa3a9116bbf0 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:27 -0500 +Subject: [PATCH 30/54] failover: Rename bool to failover_primary_hidden + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-8-lvivier@redhat.com> +Patchwork-id: 101260 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 07/27] failover: Rename bool to failover_primary_hidden +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Jens Freimann +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +You should not use passive naming variables. +And once there, be able to search for them. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-9-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit e2bde83e23d3cfc1d90911c74500fd2e3b0b04fa) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 14 +++++++------- + include/hw/virtio/virtio-net.h | 3 ++- + 2 files changed, 9 insertions(+), 8 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 3e82108d42..c221671852 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -916,7 +916,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + + if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { + qapi_event_send_failover_negotiated(n->netclient_name); +- qatomic_set(&n->primary_should_be_hidden, false); ++ qatomic_set(&n->failover_primary_hidden, false); + failover_add_primary(n, &err); + if (err) { + n->primary_dev = virtio_connect_failover_devices(n, &err); +@@ -3127,7 +3127,7 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) + return false; + } + qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort); +- qatomic_set(&n->primary_should_be_hidden, false); ++ qatomic_set(&n->failover_primary_hidden, false); + hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev); + if (hotplug_ctrl) { + hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err); +@@ -3148,7 +3148,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + bool should_be_hidden; + Error *err = NULL; + +- should_be_hidden = qatomic_read(&n->primary_should_be_hidden); ++ should_be_hidden = qatomic_read(&n->failover_primary_hidden); + + if (!n->primary_dev) { + n->primary_dev = virtio_connect_failover_devices(n, &err); +@@ -3163,7 +3163,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + qdev_get_vmsd(n->primary_dev), + n->primary_dev); + qapi_event_send_unplug_primary(n->primary_device_id); +- qatomic_set(&n->primary_should_be_hidden, true); ++ qatomic_set(&n->failover_primary_hidden, true); + } else { + warn_report("couldn't unplug primary device"); + } +@@ -3213,8 +3213,8 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, + + n->primary_device_opts = device_opts; + +- /* primary_should_be_hidden is set during feature negotiation */ +- hide = qatomic_read(&n->primary_should_be_hidden); ++ /* failover_primary_hidden is set during feature negotiation */ ++ hide = qatomic_read(&n->failover_primary_hidden); + + if (n->primary_device_dict) { + g_free(n->primary_device_id); +@@ -3271,7 +3271,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + if (n->failover) { + n->primary_listener.should_be_hidden = + virtio_net_primary_should_be_hidden; +- qatomic_set(&n->primary_should_be_hidden, true); ++ qatomic_set(&n->failover_primary_hidden, true); + device_listener_register(&n->primary_listener); + n->migration_state.notify = virtio_net_migration_state_notifier; + add_migration_state_change_notifier(&n->migration_state); +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index c8da637d40..ca68be759f 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -207,7 +207,8 @@ struct VirtIONet { + DeviceState *primary_dev; + char *primary_device_id; + char *standby_id; +- bool primary_should_be_hidden; ++ /* primary failover device is hidden*/ ++ bool failover_primary_hidden; + bool failover; + DeviceListener primary_listener; + Notifier migration_state; +-- +2.27.0 + diff --git a/kvm-failover-Rename-function-to-hide_device.patch b/kvm-failover-Rename-function-to-hide_device.patch new file mode 100644 index 0000000..bbc86b8 --- /dev/null +++ b/kvm-failover-Rename-function-to-hide_device.patch @@ -0,0 +1,127 @@ +From 1fbde79ae60990ff0439f3f3bb060f7d723e4910 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:35 -0500 +Subject: [PATCH 38/54] failover: Rename function to hide_device() + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-16-lvivier@redhat.com> +Patchwork-id: 101264 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 15/27] failover: Rename function to hide_device() +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +You should not use pasive. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-17-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit b91ad981b867e15171234efc3f2ab4074d377cef) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/qdev.c | 4 ++-- + hw/net/virtio-net.c | 7 +++---- + include/hw/qdev-core.h | 28 +++++++++++++++------------- + 3 files changed, 20 insertions(+), 19 deletions(-) + +diff --git a/hw/core/qdev.c b/hw/core/qdev.c +index 8f4b8f3cc1..cbdff0b6c6 100644 +--- a/hw/core/qdev.c ++++ b/hw/core/qdev.c +@@ -217,8 +217,8 @@ bool qdev_should_hide_device(QemuOpts *opts) + DeviceListener *listener; + + QTAILQ_FOREACH(listener, &device_listeners, link) { +- if (listener->should_be_hidden) { +- if (listener->should_be_hidden(listener, opts)) { ++ if (listener->hide_device) { ++ if (listener->hide_device(listener, opts)) { + return true; + } + } +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 9f12d33da0..747614ff2a 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3161,8 +3161,8 @@ static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) + virtio_net_handle_migration_primary(n, s); + } + +-static bool virtio_net_primary_should_be_hidden(DeviceListener *listener, +- QemuOpts *device_opts) ++static bool failover_hide_primary_device(DeviceListener *listener, ++ QemuOpts *device_opts) + { + VirtIONet *n = container_of(listener, VirtIONet, primary_listener); + bool hide; +@@ -3220,8 +3220,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + } + + if (n->failover) { +- n->primary_listener.should_be_hidden = +- virtio_net_primary_should_be_hidden; ++ n->primary_listener.hide_device = failover_hide_primary_device; + qatomic_set(&n->failover_primary_hidden, true); + device_listener_register(&n->primary_listener); + n->migration_state.notify = virtio_net_migration_state_notifier; +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index 250f4edef6..6ac86db44e 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -81,16 +81,17 @@ typedef void (*BusUnrealize)(BusState *bus); + * + * + * # Hiding a device # +- * To hide a device, a DeviceListener function should_be_hidden() needs to ++ * To hide a device, a DeviceListener function hide_device() needs to + * be registered. +- * It can be used to defer adding a device and therefore hide it from the +- * guest. The handler registering to this DeviceListener can save the QOpts +- * passed to it for re-using it later and must return that it wants the device +- * to be/remain hidden or not. When the handler function decides the device +- * shall not be hidden it will be added in qdev_device_add() and +- * realized as any other device. Otherwise qdev_device_add() will return early +- * without adding the device. The guest will not see a "hidden" device +- * until it was marked don't hide and qdev_device_add called again. ++ * It can be used to defer adding a device and therefore hide it from ++ * the guest. The handler registering to this DeviceListener can save ++ * the QOpts passed to it for re-using it later. It must return if it ++ * wants the device to be hidden or visible. When the handler function ++ * decides the device shall be visible it will be added with ++ * qdev_device_add() and realized as any other device. Otherwise ++ * qdev_device_add() will return early without adding the device. The ++ * guest will not see a "hidden" device until it was marked visible ++ * and qdev_device_add called again. + * + */ + struct DeviceClass { +@@ -196,11 +197,12 @@ struct DeviceListener { + void (*realize)(DeviceListener *listener, DeviceState *dev); + void (*unrealize)(DeviceListener *listener, DeviceState *dev); + /* +- * This callback is called upon init of the DeviceState and allows to +- * inform qdev that a device should be hidden, depending on the device +- * opts, for example, to hide a standby device. ++ * This callback is called upon init of the DeviceState and ++ * informs qdev if a device should be visible or hidden. We can ++ * hide a failover device depending for example on the device ++ * opts. + */ +- bool (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts); ++ bool (*hide_device)(DeviceListener *listener, QemuOpts *device_opts); + QTAILQ_ENTRY(DeviceListener) link; + }; + +-- +2.27.0 + diff --git a/kvm-failover-Rename-to-failover_find_primary_device.patch b/kvm-failover-Rename-to-failover_find_primary_device.patch new file mode 100644 index 0000000..89e6060 --- /dev/null +++ b/kvm-failover-Rename-to-failover_find_primary_device.patch @@ -0,0 +1,77 @@ +From e1ea7c178c1762dca02e2c85f57ccfad1063c753 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:37 -0500 +Subject: [PATCH 40/54] failover: Rename to failover_find_primary_device() + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-18-lvivier@redhat.com> +Patchwork-id: 101263 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 17/27] failover: Rename to failover_find_primary_device() +Bugzilla: 1819991 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +This commit: +* Rename them to failover_find_primary_devices() so + - it starts with failover_ + - it don't connect anything, just find the primary device +* Create documentation for the function + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-19-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 85d3b93196e43c4493c118aa9e3a82fe657636b5) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index c6200b924e..ff82f1017d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -826,7 +826,13 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) + return ret; + } + +-static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) ++/** ++ * Find the primary device for this failover virtio-net ++ * ++ * @n: VirtIONet device ++ * @errp: returns an error if this function fails ++ */ ++static DeviceState *failover_find_primary_device(VirtIONet *n, Error **errp) + { + Error *err = NULL; + +@@ -891,7 +897,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + qatomic_set(&n->failover_primary_hidden, false); + failover_add_primary(n, &err); + if (err) { +- n->primary_dev = virtio_net_find_primary(n, &err); ++ n->primary_dev = failover_find_primary_device(n, &err); + if (err) { + goto out_err; + } +@@ -3115,7 +3121,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + should_be_hidden = qatomic_read(&n->failover_primary_hidden); + + if (!n->primary_dev) { +- n->primary_dev = virtio_net_find_primary(n, &err); ++ n->primary_dev = failover_find_primary_device(n, &err); + if (!n->primary_dev) { + return; + } +-- +2.27.0 + diff --git a/kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch b/kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch new file mode 100644 index 0000000..41fa84a --- /dev/null +++ b/kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch @@ -0,0 +1,49 @@ +From be9147ddedc35a458b976a71fd947634ab71bb44 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:22 -0500 +Subject: [PATCH 25/54] failover: Use always atomics for + primary_should_be_hidden + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-3-lvivier@redhat.com> +Patchwork-id: 101247 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 02/27] failover: Use always atomics for primary_should_be_hidden +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Jens Freimann +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-4-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 587f2fcb93eddf69736e00731a2da018a0e0a726) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 1011a524bf..a0fa63e7cb 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3136,7 +3136,7 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) + return false; + } + qdev_set_parent_bus(n->primary_dev, n->primary_bus, &error_abort); +- n->primary_should_be_hidden = false; ++ qatomic_set(&n->primary_should_be_hidden, false); + if (!qemu_opt_set_bool(n->primary_device_opts, + "partially_hotplugged", true, errp)) { + return false; +-- +2.27.0 + diff --git a/kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch b/kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch new file mode 100644 index 0000000..1012001 --- /dev/null +++ b/kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch @@ -0,0 +1,128 @@ +From 845e4811506c58b8f1f4cfcb183994f1d0f4d66b Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:43 -0500 +Subject: [PATCH 46/54] failover: We don't need to cache primary_device_id + anymore + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-24-lvivier@redhat.com> +Patchwork-id: 101258 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 23/27] failover: We don't need to cache primary_device_id anymore +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-25-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 3abad4a221e050d43fa8540677b285057642baaf) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 20 ++++++++++---------- + include/hw/virtio/virtio-net.h | 1 - + 2 files changed, 10 insertions(+), 11 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 2c502c13fd..746ed3fb71 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -824,6 +824,7 @@ static char *failover_find_primary_device_id(VirtIONet *n) + Error *err = NULL; + FailoverId fid; + ++ fid.n = n; + if (!qemu_opts_foreach(qemu_find_opts("device"), + failover_set_primary, &fid, &err)) { + return NULL; +@@ -835,12 +836,17 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + { + Error *err = NULL; + QemuOpts *opts; ++ char *id; + + if (n->primary_dev) { + return; + } + +- opts = qemu_opts_find(qemu_find_opts("device"), n->primary_device_id); ++ id = failover_find_primary_device_id(n); ++ if (!id) { ++ return; ++ } ++ opts = qemu_opts_find(qemu_find_opts("device"), id); + if (opts) { + n->primary_dev = qdev_device_add(opts, &err); + if (err) { +@@ -868,9 +874,8 @@ static DeviceState *failover_find_primary_device(VirtIONet *n) + if (!id) { + return NULL; + } +- n->primary_device_id = g_strdup(id); + +- return qdev_find_recursive(sysbus_get_default(), n->primary_device_id); ++ return qdev_find_recursive(sysbus_get_default(), id); + } + + static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) +@@ -3160,7 +3165,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + vmstate_unregister(VMSTATE_IF(n->primary_dev), + qdev_get_vmsd(n->primary_dev), + n->primary_dev); +- qapi_event_send_unplug_primary(n->primary_device_id); ++ qapi_event_send_unplug_primary(n->primary_dev->id); + qatomic_set(&n->failover_primary_hidden, true); + } else { + warn_report("couldn't unplug primary device"); +@@ -3186,7 +3191,6 @@ static bool failover_hide_primary_device(DeviceListener *listener, + QemuOpts *device_opts) + { + VirtIONet *n = container_of(listener, VirtIONet, primary_listener); +- bool hide; + const char *standby_id; + + if (!device_opts) { +@@ -3198,10 +3202,7 @@ static bool failover_hide_primary_device(DeviceListener *listener, + } + + /* failover_primary_hidden is set during feature negotiation */ +- hide = qatomic_read(&n->failover_primary_hidden); +- g_free(n->primary_device_id); +- n->primary_device_id = g_strdup(device_opts->id); +- return hide; ++ return qatomic_read(&n->failover_primary_hidden); + } + + static void virtio_net_device_realize(DeviceState *dev, Error **errp) +@@ -3378,7 +3379,6 @@ static void virtio_net_device_unrealize(DeviceState *dev) + + if (n->failover) { + device_listener_unregister(&n->primary_listener); +- g_free(n->primary_device_id); + } + + max_queues = n->multiqueue ? n->max_queues : 1; +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index fe353d8299..efef64e02f 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -203,7 +203,6 @@ struct VirtIONet { + bool needs_vnet_hdr_swap; + bool mtu_bypass_backend; + DeviceState *primary_dev; +- char *primary_device_id; + /* primary failover device is hidden*/ + bool failover_primary_hidden; + bool failover; +-- +2.27.0 + diff --git a/kvm-failover-fix-indentantion.patch b/kvm-failover-fix-indentantion.patch new file mode 100644 index 0000000..194ae92 --- /dev/null +++ b/kvm-failover-fix-indentantion.patch @@ -0,0 +1,171 @@ +From 619e58f19e3e20c4144eb1259ce2f338d09176c1 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:21 -0500 +Subject: [PATCH 24/54] failover: fix indentantion + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-2-lvivier@redhat.com> +Patchwork-id: 101240 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 01/27] failover: fix indentantion +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Jens Freimann +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Once there, remove not needed cast. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-3-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1c775d65d4bff3a5a9876e398b2e689bc45aa1f7) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 33 +++++++++++++++------------------ + softmmu/qdev-monitor.c | 4 ++-- + 2 files changed, 17 insertions(+), 20 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 9179013ac4..1011a524bf 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -797,7 +797,7 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + } + + n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"), +- n->primary_device_id); ++ n->primary_device_id); + if (n->primary_device_opts) { + n->primary_dev = qdev_device_add(n->primary_device_opts, &err); + if (err) { +@@ -814,9 +814,9 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + } else { + error_setg(errp, "Primary device not found"); + error_append_hint(errp, "Virtio-net failover will not work. Make " +- "sure primary device has parameter" +- " failover_pair_id=\n"); +-} ++ "sure primary device has parameter" ++ " failover_pair_id=\n"); ++ } + error_propagate(errp, err); + } + +@@ -824,7 +824,6 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) + { + VirtIONet *n = opaque; + int ret = 0; +- + const char *standby_id = qemu_opt_get(opts, "failover_pair_id"); + + if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) { +@@ -841,14 +840,14 @@ static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) + Error *err = NULL; + + if (qemu_opts_foreach(qemu_find_opts("device"), +- is_my_primary, n, &err)) { ++ is_my_primary, n, &err)) { + if (err) { + error_propagate(errp, err); + return NULL; + } + if (n->primary_device_id) { + dev = qdev_find_recursive(sysbus_get_default(), +- n->primary_device_id); ++ n->primary_device_id); + } else { + error_setg(errp, "Primary device id not found"); + return NULL; +@@ -857,8 +856,6 @@ static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) + return dev; + } + +- +- + static DeviceState *virtio_connect_failover_devices(VirtIONet *n, + DeviceState *dev, + Error **errp) +@@ -3126,9 +3123,9 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) + return true; + } + if (!n->primary_device_opts) { +- n->primary_device_opts = qemu_opts_from_qdict( +- qemu_find_opts("device"), +- n->primary_device_dict, errp); ++ n->primary_device_opts = qemu_opts_from_qdict(qemu_find_opts("device"), ++ n->primary_device_dict, ++ errp); + if (!n->primary_device_opts) { + return false; + } +@@ -3176,8 +3173,8 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + if (migration_in_setup(s) && !should_be_hidden) { + if (failover_unplug_primary(n)) { + vmstate_unregister(VMSTATE_IF(n->primary_dev), +- qdev_get_vmsd(n->primary_dev), +- n->primary_dev); ++ qdev_get_vmsd(n->primary_dev), ++ n->primary_dev); + qapi_event_send_unplug_primary(n->primary_device_id); + qatomic_set(&n->primary_should_be_hidden, true); + } else { +@@ -3201,7 +3198,7 @@ static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) + } + + static int virtio_net_primary_should_be_hidden(DeviceListener *listener, +- QemuOpts *device_opts) ++ QemuOpts *device_opts) + { + VirtIONet *n = container_of(listener, VirtIONet, primary_listener); + bool match_found = false; +@@ -3211,11 +3208,11 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, + return -1; + } + n->primary_device_dict = qemu_opts_to_qdict(device_opts, +- n->primary_device_dict); ++ n->primary_device_dict); + if (n->primary_device_dict) { + g_free(n->standby_id); + n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict, +- "failover_pair_id")); ++ "failover_pair_id")); + } + if (g_strcmp0(n->standby_id, n->netclient_name) == 0) { + match_found = true; +@@ -3235,7 +3232,7 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, + if (n->primary_device_dict) { + g_free(n->primary_device_id); + n->primary_device_id = g_strdup(qdict_get_try_str( +- n->primary_device_dict, "id")); ++ n->primary_device_dict, "id")); + if (!n->primary_device_id) { + warn_report("primary_device_id not set"); + } +diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c +index bf79d0bbcd..a25f5d612c 100644 +--- a/softmmu/qdev-monitor.c ++++ b/softmmu/qdev-monitor.c +@@ -573,10 +573,10 @@ void qdev_set_id(DeviceState *dev, const char *id) + } + + static int is_failover_device(void *opaque, const char *name, const char *value, +- Error **errp) ++ Error **errp) + { + if (strcmp(name, "failover_pair_id") == 0) { +- QemuOpts *opts = (QemuOpts *)opaque; ++ QemuOpts *opts = opaque; + + if (qdev_should_hide_device(opts)) { + return 1; +-- +2.27.0 + diff --git a/kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch b/kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch new file mode 100644 index 0000000..20c58c0 --- /dev/null +++ b/kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch @@ -0,0 +1,48 @@ +From 97b7137652441a3d458f3d9f7bc326047de185c3 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:28 -0500 +Subject: [PATCH 31/54] failover: g_strcmp0() knows how to handle NULL + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-9-lvivier@redhat.com> +Patchwork-id: 101249 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 08/27] failover: g_strcmp0() knows how to handle NULL +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Jens Freimann +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-10-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 518eda9fda49da910d47f5baf66a1c0d1d30cebd) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index c221671852..e334f05352 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -818,7 +818,7 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) + int ret = 0; + const char *standby_id = qemu_opt_get(opts, "failover_pair_id"); + +- if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) { ++ if (g_strcmp0(standby_id, n->netclient_name) == 0) { + n->primary_device_id = g_strdup(opts->id); + ret = 1; + } +-- +2.27.0 + diff --git a/kvm-failover-make-sure-that-id-always-exist.patch b/kvm-failover-make-sure-that-id-always-exist.patch new file mode 100644 index 0000000..ce1ed0a --- /dev/null +++ b/kvm-failover-make-sure-that-id-always-exist.patch @@ -0,0 +1,68 @@ +From 06c77533c61f65886bf0a9236d8f13085b2f3e51 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:40 -0500 +Subject: [PATCH 43/54] failover: make sure that id always exist + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-21-lvivier@redhat.com> +Patchwork-id: 101257 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 20/27] failover: make sure that id always exist +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +We check that it exist at device creation time, so we don't have to +check anywhere else. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-22-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit fec037c1e2da0a7ea54eabce65cc14d461fdc5eb) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 3 --- + softmmu/qdev-monitor.c | 4 ++++ + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index ff82f1017d..c708c03cf6 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3173,9 +3173,6 @@ static bool failover_hide_primary_device(DeviceListener *listener, + hide = qatomic_read(&n->failover_primary_hidden); + g_free(n->primary_device_id); + n->primary_device_id = g_strdup(device_opts->id); +- if (!n->primary_device_id) { +- warn_report("primary_device_id not set"); +- } + return hide; + } + +diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c +index 0e10f0466f..301089eaea 100644 +--- a/softmmu/qdev-monitor.c ++++ b/softmmu/qdev-monitor.c +@@ -613,6 +613,10 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + } + + if (qemu_opt_get(opts, "failover_pair_id")) { ++ if (!opts->id) { ++ error_setg(errp, "Device with failover_pair_id don't have id"); ++ return NULL; ++ } + if (qdev_should_hide_device(opts)) { + if (bus && !qbus_is_hotpluggable(bus)) { + error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); +-- +2.27.0 + diff --git a/kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch b/kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch new file mode 100644 index 0000000..b19d5d0 --- /dev/null +++ b/kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch @@ -0,0 +1,87 @@ +From 0c0190ed0d933a6900230427c374e4b93faab73b Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:23 -0500 +Subject: [PATCH 26/54] failover: primary bus is only used once, and where it + is set + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-4-lvivier@redhat.com> +Patchwork-id: 101245 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 03/27] failover: primary bus is only used once, and where it is set +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Jens Freimann +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Just remove the struct member. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-5-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 78274682b79d48e8de76c817c67c3cfbb76dc2ee) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 8 ++++---- + include/hw/virtio/virtio-net.h | 1 - + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index a0fa63e7cb..786d313330 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -804,7 +804,6 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + qemu_opts_del(n->primary_device_opts); + } + if (n->primary_dev) { +- n->primary_bus = n->primary_dev->parent_bus; + if (err) { + qdev_unplug(n->primary_dev, &err); + qdev_set_id(n->primary_dev, ""); +@@ -3118,6 +3117,7 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) + Error *err = NULL; + HotplugHandler *hotplug_ctrl; + PCIDevice *pdev = PCI_DEVICE(n->primary_dev); ++ BusState *primary_bus; + + if (!pdev->partially_hotplugged) { + return true; +@@ -3130,12 +3130,12 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) + return false; + } + } +- n->primary_bus = n->primary_dev->parent_bus; +- if (!n->primary_bus) { ++ primary_bus = n->primary_dev->parent_bus; ++ if (!primary_bus) { + error_setg(errp, "virtio_net: couldn't find primary bus"); + return false; + } +- qdev_set_parent_bus(n->primary_dev, n->primary_bus, &error_abort); ++ qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort); + qatomic_set(&n->primary_should_be_hidden, false); + if (!qemu_opt_set_bool(n->primary_device_opts, + "partially_hotplugged", true, errp)) { +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index f4852ac27b..c8da637d40 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -205,7 +205,6 @@ struct VirtIONet { + QemuOpts *primary_device_opts; + QDict *primary_device_dict; + DeviceState *primary_dev; +- BusState *primary_bus; + char *primary_device_id; + char *standby_id; + bool primary_should_be_hidden; +-- +2.27.0 + diff --git a/kvm-failover-qdev_device_add-returns-err-or-dev-set.patch b/kvm-failover-qdev_device_add-returns-err-or-dev-set.patch new file mode 100644 index 0000000..7b0dbf2 --- /dev/null +++ b/kvm-failover-qdev_device_add-returns-err-or-dev-set.patch @@ -0,0 +1,55 @@ +From 63f2415d2cee7bcf24e7f3dc515c5155731071e6 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:26 -0500 +Subject: [PATCH 29/54] failover: qdev_device_add() returns err or dev set + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-7-lvivier@redhat.com> +Patchwork-id: 101252 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 06/27] failover: qdev_device_add() returns err or dev set +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Jens Freimann +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Never both. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-8-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 3d1c7a9782d19052505aabc8f2c134ccd6f3f3fb) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 6ca85627d8..3e82108d42 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -803,13 +803,6 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + if (err) { + qemu_opts_del(n->primary_device_opts); + } +- if (n->primary_dev) { +- if (err) { +- qdev_unplug(n->primary_dev, &err); +- qdev_set_id(n->primary_dev, ""); +- +- } +- } + } else { + error_setg(errp, "Primary device not found"); + error_append_hint(errp, "Virtio-net failover will not work. Make " +-- +2.27.0 + diff --git a/kvm-failover-remove-failover_find_primary_device-error-p.patch b/kvm-failover-remove-failover_find_primary_device-error-p.patch new file mode 100644 index 0000000..aa16347 --- /dev/null +++ b/kvm-failover-remove-failover_find_primary_device-error-p.patch @@ -0,0 +1,72 @@ +From 96883a1a05766ac6c1a2a064f40aab6c0bd54861 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:41 -0500 +Subject: [PATCH 44/54] failover: remove failover_find_primary_device() error + parameter + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-22-lvivier@redhat.com> +Patchwork-id: 101265 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 21/27] failover: remove failover_find_primary_device() error parameter +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +It can never give one error. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-23-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 0a0a27d66bcb275e5b984d8758880a7eff75464e) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index c708c03cf6..b994796734 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -832,7 +832,7 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) + * @n: VirtIONet device + * @errp: returns an error if this function fails + */ +-static DeviceState *failover_find_primary_device(VirtIONet *n, Error **errp) ++static DeviceState *failover_find_primary_device(VirtIONet *n) + { + Error *err = NULL; + +@@ -897,10 +897,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + qatomic_set(&n->failover_primary_hidden, false); + failover_add_primary(n, &err); + if (err) { +- n->primary_dev = failover_find_primary_device(n, &err); +- if (err) { +- goto out_err; +- } ++ n->primary_dev = failover_find_primary_device(n); + failover_add_primary(n, &err); + if (err) { + goto out_err; +@@ -3121,7 +3118,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + should_be_hidden = qatomic_read(&n->failover_primary_hidden); + + if (!n->primary_dev) { +- n->primary_dev = failover_find_primary_device(n, &err); ++ n->primary_dev = failover_find_primary_device(n); + if (!n->primary_dev) { + return; + } +-- +2.27.0 + diff --git a/kvm-failover-remove-standby_id-variable.patch b/kvm-failover-remove-standby_id-variable.patch new file mode 100644 index 0000000..c16f2ff --- /dev/null +++ b/kvm-failover-remove-standby_id-variable.patch @@ -0,0 +1,89 @@ +From cead8b9c03911360666ac3bb56d7b1db068ade36 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:30 -0500 +Subject: [PATCH 33/54] failover: remove standby_id variable + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-11-lvivier@redhat.com> +Patchwork-id: 101248 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 10/27] failover: remove standby_id variable +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +We can calculate it, and we only use it once anyways. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-12-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 4f0303aed87f83715055e558176046a8a3d9b987) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 11 +++-------- + include/hw/virtio/virtio-net.h | 1 - + 2 files changed, 3 insertions(+), 9 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 2a99b0e0f6..953d5c2bc8 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3181,23 +3181,19 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, + VirtIONet *n = container_of(listener, VirtIONet, primary_listener); + bool match_found = false; + bool hide = false; ++ const char *standby_id; + + if (!device_opts) { + return -1; + } + n->primary_device_dict = qemu_opts_to_qdict(device_opts, + n->primary_device_dict); +- if (n->primary_device_dict) { +- g_free(n->standby_id); +- n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict, +- "failover_pair_id")); +- } +- if (g_strcmp0(n->standby_id, n->netclient_name) == 0) { ++ standby_id = qemu_opt_get(device_opts, "failover_pair_id"); ++ if (g_strcmp0(standby_id, n->netclient_name) == 0) { + match_found = true; + } else { + match_found = false; + hide = false; +- g_free(n->standby_id); + n->primary_device_dict = NULL; + goto out; + } +@@ -3400,7 +3396,6 @@ static void virtio_net_device_unrealize(DeviceState *dev) + if (n->failover) { + device_listener_unregister(&n->primary_listener); + g_free(n->primary_device_id); +- g_free(n->standby_id); + qobject_unref(n->primary_device_dict); + n->primary_device_dict = NULL; + } +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index 7159e6c0a0..a055f39dd6 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -205,7 +205,6 @@ struct VirtIONet { + QDict *primary_device_dict; + DeviceState *primary_dev; + char *primary_device_id; +- char *standby_id; + /* primary failover device is hidden*/ + bool failover_primary_hidden; + bool failover; +-- +2.27.0 + diff --git a/kvm-failover-should_be_hidden-should-take-a-bool.patch b/kvm-failover-should_be_hidden-should-take-a-bool.patch new file mode 100644 index 0000000..b0fb927 --- /dev/null +++ b/kvm-failover-should_be_hidden-should-take-a-bool.patch @@ -0,0 +1,144 @@ +From 8dadc3183e8e75e47b5f5e39823b9eaf950cf4fe Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:34 -0500 +Subject: [PATCH 37/54] failover: should_be_hidden() should take a bool + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-15-lvivier@redhat.com> +Patchwork-id: 101241 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 14/27] failover: should_be_hidden() should take a bool +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +We didn't use at all the -1 value, and we don't really care. It was +only used for the cases when this is not the device that we are +searching for. And in that case we should not hide the device. + +Once there, simplify virtio-Snet_primary_should_be_hidden. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-16-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 89631fed27bd76b0292d8b2a78291ea96185c87d) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/core/qdev.c | 19 +++++-------------- + hw/net/virtio-net.c | 27 +++++++-------------------- + include/hw/qdev-core.h | 2 +- + 3 files changed, 13 insertions(+), 35 deletions(-) + +diff --git a/hw/core/qdev.c b/hw/core/qdev.c +index 262bca716f..8f4b8f3cc1 100644 +--- a/hw/core/qdev.c ++++ b/hw/core/qdev.c +@@ -214,26 +214,17 @@ void device_listener_unregister(DeviceListener *listener) + + bool qdev_should_hide_device(QemuOpts *opts) + { +- int rc = -1; + DeviceListener *listener; + + QTAILQ_FOREACH(listener, &device_listeners, link) { +- if (listener->should_be_hidden) { +- /* +- * should_be_hidden_will return +- * 1 if device matches opts and it should be hidden +- * 0 if device matches opts and should not be hidden +- * -1 if device doesn't match ops +- */ +- rc = listener->should_be_hidden(listener, opts); +- } +- +- if (rc > 0) { +- break; ++ if (listener->should_be_hidden) { ++ if (listener->should_be_hidden(listener, opts)) { ++ return true; ++ } + } + } + +- return rc > 0; ++ return false; + } + + void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id, +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 881907d1bd..9f12d33da0 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3161,24 +3161,19 @@ static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) + virtio_net_handle_migration_primary(n, s); + } + +-static int virtio_net_primary_should_be_hidden(DeviceListener *listener, +- QemuOpts *device_opts) ++static bool virtio_net_primary_should_be_hidden(DeviceListener *listener, ++ QemuOpts *device_opts) + { + VirtIONet *n = container_of(listener, VirtIONet, primary_listener); +- bool match_found = false; +- bool hide = false; ++ bool hide; + const char *standby_id; + + if (!device_opts) { +- return -1; ++ return false; + } + standby_id = qemu_opt_get(device_opts, "failover_pair_id"); +- if (g_strcmp0(standby_id, n->netclient_name) == 0) { +- match_found = true; +- } else { +- match_found = false; +- hide = false; +- goto out; ++ if (g_strcmp0(standby_id, n->netclient_name) != 0) { ++ return false; + } + + /* failover_primary_hidden is set during feature negotiation */ +@@ -3188,15 +3183,7 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, + if (!n->primary_device_id) { + warn_report("primary_device_id not set"); + } +- +-out: +- if (match_found && hide) { +- return 1; +- } else if (match_found && !hide) { +- return 0; +- } else { +- return -1; +- } ++ return hide; + } + + static void virtio_net_device_realize(DeviceState *dev, Error **errp) +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index 5e737195b5..250f4edef6 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -200,7 +200,7 @@ struct DeviceListener { + * inform qdev that a device should be hidden, depending on the device + * opts, for example, to hide a standby device. + */ +- int (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts); ++ bool (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts); + QTAILQ_ENTRY(DeviceListener) link; + }; + +-- +2.27.0 + diff --git a/kvm-failover-simplify-failover_unplug_primary.patch b/kvm-failover-simplify-failover_unplug_primary.patch new file mode 100644 index 0000000..523b8ab --- /dev/null +++ b/kvm-failover-simplify-failover_unplug_primary.patch @@ -0,0 +1,86 @@ +From cf70ee739171e208243b5b06a57d2517df8c3d91 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:45 -0500 +Subject: [PATCH 48/54] failover: simplify failover_unplug_primary + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-26-lvivier@redhat.com> +Patchwork-id: 101242 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 25/27] failover: simplify failover_unplug_primary +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +We can calculate device just once. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-27-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 07a5d816d50f5f876d5fcd43724a6ff17cf59a4f) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 25 ++++++++++--------------- + 1 file changed, 10 insertions(+), 15 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index b37e9cd1d9..9203d81780 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3146,34 +3146,29 @@ out: + return !err; + } + +-static void virtio_net_handle_migration_primary(VirtIONet *n, +- MigrationState *s) ++static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) + { + bool should_be_hidden; + Error *err = NULL; ++ DeviceState *dev = failover_find_primary_device(n); + +- should_be_hidden = qatomic_read(&n->failover_primary_hidden); +- +- if (!n->primary_dev) { +- n->primary_dev = failover_find_primary_device(n); +- if (!n->primary_dev) { +- return; +- } ++ if (!dev) { ++ return; + } + ++ should_be_hidden = qatomic_read(&n->failover_primary_hidden); ++ + if (migration_in_setup(s) && !should_be_hidden) { +- if (failover_unplug_primary(n, n->primary_dev)) { +- vmstate_unregister(VMSTATE_IF(n->primary_dev), +- qdev_get_vmsd(n->primary_dev), +- n->primary_dev); +- qapi_event_send_unplug_primary(n->primary_dev->id); ++ if (failover_unplug_primary(n, dev)) { ++ vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); ++ qapi_event_send_unplug_primary(dev->id); + qatomic_set(&n->failover_primary_hidden, true); + } else { + warn_report("couldn't unplug primary device"); + } + } else if (migration_has_failed(s)) { + /* We already unplugged the device let's plug it back */ +- if (!failover_replug_primary(n, n->primary_dev, &err)) { ++ if (!failover_replug_primary(n, dev, &err)) { + if (err) { + error_report_err(err); + } +-- +2.27.0 + diff --git a/kvm-failover-simplify-qdev_device_add-failover-case.patch b/kvm-failover-simplify-qdev_device_add-failover-case.patch new file mode 100644 index 0000000..dd04f26 --- /dev/null +++ b/kvm-failover-simplify-qdev_device_add-failover-case.patch @@ -0,0 +1,70 @@ +From 27a1972d1a5961a8218d5a52fba16b67816635fe Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:38 -0500 +Subject: [PATCH 41/54] failover: simplify qdev_device_add() failover case + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-19-lvivier@redhat.com> +Patchwork-id: 101255 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 18/27] failover: simplify qdev_device_add() failover case +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +Just put allthe logic inside the same if. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-20-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 5f2ef3b0d032797b6bad9449dfece3a8111a8529) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + softmmu/qdev-monitor.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c +index a25f5d612c..12b7540f17 100644 +--- a/softmmu/qdev-monitor.c ++++ b/softmmu/qdev-monitor.c +@@ -600,7 +600,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + const char *driver, *path; + DeviceState *dev = NULL; + BusState *bus = NULL; +- bool hide; + + driver = qemu_opt_get(opts, "driver"); + if (!driver) { +@@ -634,14 +633,16 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + return NULL; + } + } +- hide = should_hide_device(opts); + +- if ((hide || qdev_hotplug) && bus && !qbus_is_hotpluggable(bus)) { +- error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); ++ if (should_hide_device(opts)) { ++ if (bus && !qbus_is_hotpluggable(bus)) { ++ error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); ++ } + return NULL; + } + +- if (hide) { ++ if (qdev_hotplug && bus && !qbus_is_hotpluggable(bus)) { ++ error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); + return NULL; + } + +-- +2.27.0 + diff --git a/kvm-failover-simplify-qdev_device_add.patch b/kvm-failover-simplify-qdev_device_add.patch new file mode 100644 index 0000000..d69b72e --- /dev/null +++ b/kvm-failover-simplify-qdev_device_add.patch @@ -0,0 +1,89 @@ +From 7822f8042e26cca6c1307e26c6f08d5f99636d90 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:39 -0500 +Subject: [PATCH 42/54] failover: simplify qdev_device_add() + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-20-lvivier@redhat.com> +Patchwork-id: 101256 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 19/27] failover: simplify qdev_device_add() +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +We don't need to walk the opts by hand. qmp_opt_get() already does +that. And then we can remove the functions that did that walk. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-21-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2e28095369f4eab516852fd49dde17c3bfd782f9) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + softmmu/qdev-monitor.c | 32 ++++++-------------------------- + 1 file changed, 6 insertions(+), 26 deletions(-) + +diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c +index 12b7540f17..0e10f0466f 100644 +--- a/softmmu/qdev-monitor.c ++++ b/softmmu/qdev-monitor.c +@@ -572,28 +572,6 @@ void qdev_set_id(DeviceState *dev, const char *id) + } + } + +-static int is_failover_device(void *opaque, const char *name, const char *value, +- Error **errp) +-{ +- if (strcmp(name, "failover_pair_id") == 0) { +- QemuOpts *opts = opaque; +- +- if (qdev_should_hide_device(opts)) { +- return 1; +- } +- } +- +- return 0; +-} +- +-static bool should_hide_device(QemuOpts *opts) +-{ +- if (qemu_opt_foreach(opts, is_failover_device, opts, NULL) == 0) { +- return false; +- } +- return true; +-} +- + DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + { + DeviceClass *dc; +@@ -634,11 +612,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + } + } + +- if (should_hide_device(opts)) { +- if (bus && !qbus_is_hotpluggable(bus)) { +- error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); ++ if (qemu_opt_get(opts, "failover_pair_id")) { ++ if (qdev_should_hide_device(opts)) { ++ if (bus && !qbus_is_hotpluggable(bus)) { ++ error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); ++ } ++ return NULL; + } +- return NULL; + } + + if (qdev_hotplug && bus && !qbus_is_hotpluggable(bus)) { +-- +2.27.0 + diff --git a/kvm-failover-simplify-virtio_net_find_primary.patch b/kvm-failover-simplify-virtio_net_find_primary.patch new file mode 100644 index 0000000..63e35aa --- /dev/null +++ b/kvm-failover-simplify-virtio_net_find_primary.patch @@ -0,0 +1,70 @@ +From 24bd4b43c3f59c9c28f924da8ef7a9dacc0f2f52 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:33 -0500 +Subject: [PATCH 36/54] failover: simplify virtio_net_find_primary() + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-14-lvivier@redhat.com> +Patchwork-id: 101253 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 13/27] failover: simplify virtio_net_find_primary() +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +a - is_my_primary() never sets one error +b - If we return 1, primary_device_id is always set + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-15-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7cf05b7ed8e84e89b873701e3dfcd56aa81b2d13) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 18 +++--------------- + 1 file changed, 3 insertions(+), 15 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 70fa372c08..881907d1bd 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -828,24 +828,12 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) + + static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) + { +- DeviceState *dev = NULL; + Error *err = NULL; + +- if (qemu_opts_foreach(qemu_find_opts("device"), +- is_my_primary, n, &err)) { +- if (err) { +- error_propagate(errp, err); +- return NULL; +- } +- if (n->primary_device_id) { +- dev = qdev_find_recursive(sysbus_get_default(), +- n->primary_device_id); +- } else { +- error_setg(errp, "Primary device id not found"); +- return NULL; +- } ++ if (!qemu_opts_foreach(qemu_find_opts("device"), is_my_primary, n, &err)) { ++ return NULL; + } +- return dev; ++ return qdev_find_recursive(sysbus_get_default(), n->primary_device_id); + } + + static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) +-- +2.27.0 + diff --git a/kvm-failover-split-failover_find_primary_device_id.patch b/kvm-failover-split-failover_find_primary_device_id.patch new file mode 100644 index 0000000..2b7efbb --- /dev/null +++ b/kvm-failover-split-failover_find_primary_device_id.patch @@ -0,0 +1,128 @@ +From 2e3e87787776632d521ec5f08758973d42fc208e Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:42 -0500 +Subject: [PATCH 45/54] failover: split failover_find_primary_device_id() + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-23-lvivier@redhat.com> +Patchwork-id: 101244 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 22/27] failover: split failover_find_primary_device_id() +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +So we can calculate the device id when we need it. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-24-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f5e1847ba50a8d1adf66c0cf312e53c162e52487) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 63 +++++++++++++++++++++++++++++++++------------ + 1 file changed, 47 insertions(+), 16 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index b994796734..2c502c13fd 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -788,6 +788,49 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) + return virtio_net_guest_offloads_by_features(vdev->guest_features); + } + ++typedef struct { ++ VirtIONet *n; ++ char *id; ++} FailoverId; ++ ++/** ++ * Set the id of the failover primary device ++ * ++ * @opaque: FailoverId to setup ++ * @opts: opts for device we are handling ++ * @errp: returns an error if this function fails ++ */ ++static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp) ++{ ++ FailoverId *fid = opaque; ++ const char *standby_id = qemu_opt_get(opts, "failover_pair_id"); ++ ++ if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) { ++ fid->id = g_strdup(opts->id); ++ return 1; ++ } ++ ++ return 0; ++} ++ ++/** ++ * Find the primary device id for this failover virtio-net ++ * ++ * @n: VirtIONet device ++ * @errp: returns an error if this function fails ++ */ ++static char *failover_find_primary_device_id(VirtIONet *n) ++{ ++ Error *err = NULL; ++ FailoverId fid; ++ ++ if (!qemu_opts_foreach(qemu_find_opts("device"), ++ failover_set_primary, &fid, &err)) { ++ return NULL; ++ } ++ return fid.id; ++} ++ + static void failover_add_primary(VirtIONet *n, Error **errp) + { + Error *err = NULL; +@@ -812,20 +855,6 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + error_propagate(errp, err); + } + +-static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) +-{ +- VirtIONet *n = opaque; +- int ret = 0; +- const char *standby_id = qemu_opt_get(opts, "failover_pair_id"); +- +- if (g_strcmp0(standby_id, n->netclient_name) == 0) { +- n->primary_device_id = g_strdup(opts->id); +- ret = 1; +- } +- +- return ret; +-} +- + /** + * Find the primary device for this failover virtio-net + * +@@ -834,11 +863,13 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) + */ + static DeviceState *failover_find_primary_device(VirtIONet *n) + { +- Error *err = NULL; ++ char *id = failover_find_primary_device_id(n); + +- if (!qemu_opts_foreach(qemu_find_opts("device"), is_my_primary, n, &err)) { ++ if (!id) { + return NULL; + } ++ n->primary_device_id = g_strdup(id); ++ + return qdev_find_recursive(sysbus_get_default(), n->primary_device_id); + } + +-- +2.27.0 + diff --git a/kvm-failover-virtio_net_connect_failover_devices-does-no.patch b/kvm-failover-virtio_net_connect_failover_devices-does-no.patch new file mode 100644 index 0000000..e04a77f --- /dev/null +++ b/kvm-failover-virtio_net_connect_failover_devices-does-no.patch @@ -0,0 +1,80 @@ +From 385df1f898e08c9cf0c90e543978cc68ee0c1097 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:36 -0500 +Subject: [PATCH 39/54] failover: virtio_net_connect_failover_devices() does + nothing + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-17-lvivier@redhat.com> +Patchwork-id: 101254 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 16/27] failover: virtio_net_connect_failover_devices() does nothing +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +From: Juan Quintela + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +It just calls virtio_net_find_primary(), so just update the callers. + +Signed-off-by: Juan Quintela +Message-Id: <20201118083748.1328-18-quintela@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 0763db4f2df3a92336d78e8b68a665f7d1a1bc66) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 17 ++--------------- + 1 file changed, 2 insertions(+), 15 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 747614ff2a..c6200b924e 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -836,19 +836,6 @@ static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) + return qdev_find_recursive(sysbus_get_default(), n->primary_device_id); + } + +-static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) +-{ +- DeviceState *prim_dev = NULL; +- Error *err = NULL; +- +- prim_dev = virtio_net_find_primary(n, &err); +- if (!prim_dev) { +- error_propagate(errp, err); +- } +- +- return prim_dev; +-} +- + static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + { + VirtIONet *n = VIRTIO_NET(vdev); +@@ -904,7 +891,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + qatomic_set(&n->failover_primary_hidden, false); + failover_add_primary(n, &err); + if (err) { +- n->primary_dev = virtio_connect_failover_devices(n, &err); ++ n->primary_dev = virtio_net_find_primary(n, &err); + if (err) { + goto out_err; + } +@@ -3128,7 +3115,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, + should_be_hidden = qatomic_read(&n->failover_primary_hidden); + + if (!n->primary_dev) { +- n->primary_dev = virtio_connect_failover_devices(n, &err); ++ n->primary_dev = virtio_net_find_primary(n, &err); + if (!n->primary_dev) { + return; + } +-- +2.27.0 + diff --git a/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch b/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch new file mode 100644 index 0000000..2e75110 --- /dev/null +++ b/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch @@ -0,0 +1,213 @@ +From 78375038a68fee2e7b182b4f191d5ba53fbdcd72 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Feb 2021 15:18:11 -0500 +Subject: [PATCH 52/54] i386: Add the support for AMD EPYC 3rd generation + processors + +RH-Author: Dr. David Alan Gilbert +Message-id: <20210223151811.27968-3-dgilbert@redhat.com> +Patchwork-id: 101198 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] i386: Add the support for AMD EPYC 3rd generation processors +Bugzilla: 1926785 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Peter Xu + +From: Babu Moger + +Adds the support for AMD 3rd generation processors. The model +display for the new processor will be EPYC-Milan. + +Adds the following new feature bits on top of the feature bits from +the first and second generation EPYC models. + +pcid : Process context identifiers support +ibrs : Indirect Branch Restricted Speculation +ssbd : Speculative Store Bypass Disable +erms : Enhanced REP MOVSB/STOSB support +fsrm : Fast Short REP MOVSB support +invpcid : Invalidate processor context ID +pku : Protection keys support +svme-addr-chk : SVM instructions address check for #GP handling + +Depends on the following kernel commits: +14c2bf81fcd2 ("KVM: SVM: Fix #GP handling for doubly-nested virtualization") +3b9c723ed7cf ("KVM: SVM: Add support for SVM instruction address check change") +4aa2691dcbd3 ("8ce1c461188799d863398dd2865d KVM: x86: Factor out x86 instruction emulation with decoding") +4407a797e941 ("KVM: SVM: Enable INVPCID feature on AMD") +9715092f8d7e ("KVM: X86: Move handling of INVPCID types to x86") +3f3393b3ce38 ("KVM: X86: Rename and move the function vmx_handle_memory_failure to x86.c") +830bd71f2c06 ("KVM: SVM: Remove set_cr_intercept, clr_cr_intercept and is_cr_intercept") +4c44e8d6c193 ("KVM: SVM: Add new intercept word in vmcb_control_area") +c62e2e94b9d4 ("KVM: SVM: Modify 64 bit intercept field to two 32 bit vectors") +9780d51dc2af ("KVM: SVM: Modify intercept_exceptions to generic intercepts") +30abaa88382c ("KVM: SVM: Change intercept_dr to generic intercepts") +03bfeeb988a9 ("KVM: SVM: Change intercept_cr to generic intercepts") +c45ad7229d13 ("KVM: SVM: Introduce vmcb_(set_intercept/clr_intercept/_is_intercept)") +a90c1ed9f11d ("(pcid) KVM: nSVM: Remove unused field") +fa44b82eb831 ("KVM: x86: Move MPK feature detection to common code") +38f3e775e9c2 ("x86/Kconfig: Update config and kernel doc for MPK feature on AMD") +37486135d3a7 ("KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c") + +Signed-off-by: Babu Moger +Message-Id: <161290460478.11352.8933244555799318236.stgit@bmoger-ubuntu> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 623972ceae091b31331ae4a1dc94fe5cbb891937) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 107 +++++++++++++++++++++++++++++++++++++++++++++- + target/i386/cpu.h | 4 ++ + 2 files changed, 110 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 372cba2942..523a97c0fb 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1029,7 +1029,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "clzero", NULL, "xsaveerptr", NULL, + NULL, NULL, NULL, NULL, + NULL, "wbnoinvd", NULL, NULL, +- "ibpb", NULL, NULL, "amd-stibp", ++ "ibpb", NULL, "ibrs", "amd-stibp", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, +@@ -1769,6 +1769,56 @@ static CPUCaches epyc_rome_cache_info = { + }, + }; + ++static CPUCaches epyc_milan_cache_info = { ++ .l1d_cache = &(CPUCacheInfo) { ++ .type = DATA_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l1i_cache = &(CPUCacheInfo) { ++ .type = INSTRUCTION_CACHE, ++ .level = 1, ++ .size = 32 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 64, ++ .lines_per_tag = 1, ++ .self_init = 1, ++ .no_invd_sharing = true, ++ }, ++ .l2_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 2, ++ .size = 512 * KiB, ++ .line_size = 64, ++ .associativity = 8, ++ .partitions = 1, ++ .sets = 1024, ++ .lines_per_tag = 1, ++ }, ++ .l3_cache = &(CPUCacheInfo) { ++ .type = UNIFIED_CACHE, ++ .level = 3, ++ .size = 32 * MiB, ++ .line_size = 64, ++ .associativity = 16, ++ .partitions = 1, ++ .sets = 32768, ++ .lines_per_tag = 1, ++ .self_init = true, ++ .inclusive = true, ++ .complex_indexing = true, ++ }, ++}; ++ + /* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: + * +@@ -4101,6 +4151,61 @@ static X86CPUDefinition builtin_x86_defs[] = { + .model_id = "AMD EPYC-Rome Processor", + .cache_info = &epyc_rome_cache_info, + }, ++ { ++ .name = "EPYC-Milan", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_AMD, ++ .family = 25, ++ .model = 1, ++ .stepping = 1, ++ .features[FEAT_1_EDX] = ++ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | ++ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | ++ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | ++ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | ++ CPUID_VME | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | ++ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | ++ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | ++ CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | ++ CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | ++ CPUID_EXT_PCID, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | ++ CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | ++ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | ++ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | ++ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | ++ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | ++ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | ++ CPUID_8000_0008_EBX_AMD_SSBD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | ++ CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | ++ CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_PKU, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_SVM] = ++ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_SVME_ADDR_CHK, ++ .xlevel = 0x8000001E, ++ .model_id = "AMD EPYC-Milan Processor", ++ .cache_info = &epyc_milan_cache_info, ++ }, + }; + + /* KVM-specific features that are automatically added/removed +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 4fdb552f93..92ca64a21b 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -811,8 +811,12 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) + /* Indirect Branch Prediction Barrier */ + #define CPUID_8000_0008_EBX_IBPB (1U << 12) ++/* Indirect Branch Restricted Speculation */ ++#define CPUID_8000_0008_EBX_IBRS (1U << 14) + /* Single Thread Indirect Branch Predictors */ + #define CPUID_8000_0008_EBX_STIBP (1U << 15) ++/* Speculative Store Bypass Disable */ ++#define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) + + #define CPUID_XSAVE_XSAVEOPT (1U << 0) + #define CPUID_XSAVE_XSAVEC (1U << 1) +-- +2.27.0 + diff --git a/kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch b/kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch new file mode 100644 index 0000000..940231e --- /dev/null +++ b/kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch @@ -0,0 +1,167 @@ +From 039775f93548382ec1b98f5a6004c3eee02fbd28 Mon Sep 17 00:00:00 2001 +From: Peter Krempa +Date: Mon, 22 Feb 2021 13:35:05 -0500 +Subject: [PATCH 22/54] migration: dirty-bitmap: Allow control of bitmap + persistence + +RH-Author: Peter Krempa +Message-id: +Patchwork-id: 101171 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/3] migration: dirty-bitmap: Allow control of bitmap persistence +Bugzilla: 1930757 +RH-Acked-by: John Snow +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake + +Bitmap's source persistence is transported over the migration stream and +the destination mirrors it. In some cases the destination might want to +persist bitmaps which are not persistent on the source (e.g. the result +of merging bitmaps from a number of layers on the source when migrating +into a squashed image) but currently it would need to create another set +of persistent bitmaps and merge them. + +This patch adds a 'transform' property to the alias map which allows +overriding the persistence of migrated bitmaps both on the source and +destination sides. + +Signed-off-by: Peter Krempa +Message-Id: +Reviewed-by: Eric Blake +[eblake: grammar tweaks, drop dead conditional] +Signed-off-by: Eric Blake +(cherry picked from commit 6e9f21a2aa8a78bc9a512a836a40c79fe50dd2b4) + +https://bugzilla.redhat.com/show_bug.cgi?id=1930757 +Signed-off-by: Danilo C. L. de Paula +--- + migration/block-dirty-bitmap.c | 29 ++++++++++++++++++++++++++--- + qapi/migration.json | 19 ++++++++++++++++++- + 2 files changed, 44 insertions(+), 4 deletions(-) + +diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c +index b39c13ce4e..975093610a 100644 +--- a/migration/block-dirty-bitmap.c ++++ b/migration/block-dirty-bitmap.c +@@ -150,6 +150,7 @@ typedef struct DBMLoadState { + BdrvDirtyBitmap *bitmap; + + bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */ ++ BitmapMigrationBitmapAlias *bmap_inner; + + /* + * cancelled +@@ -529,6 +530,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, + } + + FOR_EACH_DIRTY_BITMAP(bs, bitmap) { ++ BitmapMigrationBitmapAliasTransform *bitmap_transform = NULL; + bitmap_name = bdrv_dirty_bitmap_name(bitmap); + if (!bitmap_name) { + continue; +@@ -549,6 +551,9 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, + } + + bitmap_alias = bmap_inner->alias; ++ if (bmap_inner->has_transform) { ++ bitmap_transform = bmap_inner->transform; ++ } + } else { + if (strlen(bitmap_name) > UINT8_MAX) { + error_report("Cannot migrate bitmap '%s' on node '%s': " +@@ -574,8 +579,15 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, + if (bdrv_dirty_bitmap_enabled(bitmap)) { + dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED; + } +- if (bdrv_dirty_bitmap_get_persistence(bitmap)) { +- dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; ++ if (bitmap_transform && ++ bitmap_transform->has_persistent) { ++ if (bitmap_transform->persistent) { ++ dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; ++ } ++ } else { ++ if (bdrv_dirty_bitmap_get_persistence(bitmap)) { ++ dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; ++ } + } + + QSIMPLEQ_INSERT_TAIL(&s->dbms_list, dbms, entry); +@@ -783,6 +795,7 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s) + uint32_t granularity = qemu_get_be32(f); + uint8_t flags = qemu_get_byte(f); + LoadBitmapState *b; ++ bool persistent; + + if (s->cancelled) { + return 0; +@@ -807,7 +820,15 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s) + return -EINVAL; + } + +- if (flags & DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT) { ++ if (s->bmap_inner && ++ s->bmap_inner->has_transform && ++ s->bmap_inner->transform->has_persistent) { ++ persistent = s->bmap_inner->transform->persistent; ++ } else { ++ persistent = flags & DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; ++ } ++ ++ if (persistent) { + bdrv_dirty_bitmap_set_persistence(s->bitmap, true); + } + +@@ -1091,6 +1112,8 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s, + } else { + bitmap_name = bmap_inner->name; + } ++ ++ s->bmap_inner = bmap_inner; + } + + if (!s->cancelled) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 3c75820527..19b796ab47 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -525,6 +525,19 @@ + 'data': [ 'none', 'zlib', + { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] } + ++## ++# @BitmapMigrationBitmapAliasTransform: ++# ++# @persistent: If present, the bitmap will be made persistent ++# or transient depending on this parameter. ++# ++# Since: 6.0 ++## ++{ 'struct': 'BitmapMigrationBitmapAliasTransform', ++ 'data': { ++ '*persistent': 'bool' ++ } } ++ + ## + # @BitmapMigrationBitmapAlias: + # +@@ -533,12 +546,16 @@ + # @alias: An alias name for migration (for example the bitmap name on + # the opposite site). + # ++# @transform: Allows the modification of the migrated bitmap. ++# (since 6.0) ++# + # Since: 5.2 + ## + { 'struct': 'BitmapMigrationBitmapAlias', + 'data': { + 'name': 'str', +- 'alias': 'str' ++ 'alias': 'str', ++ '*transform': 'BitmapMigrationBitmapAliasTransform' + } } + + ## +-- +2.27.0 + diff --git a/kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch b/kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch new file mode 100644 index 0000000..156117f --- /dev/null +++ b/kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch @@ -0,0 +1,143 @@ +From e49b317a80df94b769c01c2ae488a369921088d2 Mon Sep 17 00:00:00 2001 +From: Peter Krempa +Date: Mon, 22 Feb 2021 13:35:04 -0500 +Subject: [PATCH 21/54] migration: dirty-bitmap: Use struct for alias map inner + members + +RH-Author: Peter Krempa +Message-id: <943503323f3f97d576715d09736376cf07d6efab.1614000630.git.pkrempa@redhat.com> +Patchwork-id: 101170 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/3] migration: dirty-bitmap: Use struct for alias map inner members +Bugzilla: 1930757 +RH-Acked-by: John Snow +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake + +Currently the alias mapping hash stores just strings of the target +objects internally. In further patches we'll be adding another member +which will need to be stored in the map so pass a copy of the whole +BitmapMigrationBitmapAlias QAPI struct into the map. + +Signed-off-by: Peter Krempa +Message-Id: +Reviewed-by: Eric Blake +[eblake: adjust long lines] +Signed-off-by: Eric Blake +(cherry picked from commit 0d1e450c7b3117ee635a00c81d9a92666ebc7ffa) + +https://bugzilla.redhat.com/show_bug.cgi?id=1930757 +Signed-off-by: Danilo C. L. de Paula +--- + migration/block-dirty-bitmap.c | 33 +++++++++++++++++++++------------ + 1 file changed, 21 insertions(+), 12 deletions(-) + +diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c +index c61d382be8..b39c13ce4e 100644 +--- a/migration/block-dirty-bitmap.c ++++ b/migration/block-dirty-bitmap.c +@@ -75,6 +75,8 @@ + #include "qemu/id.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" ++#include "qapi/qapi-visit-migration.h" ++#include "qapi/clone-visitor.h" + #include "trace.h" + + #define CHUNK_SIZE (1 << 10) +@@ -224,6 +226,7 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, + AliasMapInnerNode *amin; + GHashTable *bitmaps_map; + const char *node_map_from, *node_map_to; ++ GDestroyNotify gdn; + + if (!id_wellformed(bmna->alias)) { + error_setg(errp, "The node alias '%s' is not well-formed", +@@ -263,8 +266,9 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, + node_map_to = bmna->node_name; + } + +- bitmaps_map = g_hash_table_new_full(g_str_hash, g_str_equal, +- g_free, g_free); ++ gdn = (GDestroyNotify) qapi_free_BitmapMigrationBitmapAlias; ++ bitmaps_map = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, ++ gdn); + + amin = g_new(AliasMapInnerNode, 1); + *amin = (AliasMapInnerNode){ +@@ -276,7 +280,7 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, + + for (bmbal = bmna->bitmaps; bmbal; bmbal = bmbal->next) { + const BitmapMigrationBitmapAlias *bmba = bmbal->value; +- const char *bmap_map_from, *bmap_map_to; ++ const char *bmap_map_from; + + if (strlen(bmba->alias) > UINT8_MAX) { + error_setg(errp, +@@ -293,7 +297,6 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, + + if (name_to_alias) { + bmap_map_from = bmba->name; +- bmap_map_to = bmba->alias; + + if (g_hash_table_contains(bitmaps_map, bmba->name)) { + error_setg(errp, "The bitmap '%s'/'%s' is mapped twice", +@@ -302,7 +305,6 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, + } + } else { + bmap_map_from = bmba->alias; +- bmap_map_to = bmba->name; + + if (g_hash_table_contains(bitmaps_map, bmba->alias)) { + error_setg(errp, "The bitmap alias '%s'/'%s' is used twice", +@@ -311,8 +313,8 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, + } + } + +- g_hash_table_insert(bitmaps_map, +- g_strdup(bmap_map_from), g_strdup(bmap_map_to)); ++ g_hash_table_insert(bitmaps_map, g_strdup(bmap_map_from), ++ QAPI_CLONE(BitmapMigrationBitmapAlias, bmba)); + } + } + +@@ -538,11 +540,15 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, + } + + if (bitmap_aliases) { +- bitmap_alias = g_hash_table_lookup(bitmap_aliases, bitmap_name); +- if (!bitmap_alias) { ++ BitmapMigrationBitmapAlias *bmap_inner; ++ ++ bmap_inner = g_hash_table_lookup(bitmap_aliases, bitmap_name); ++ if (!bmap_inner) { + /* Skip bitmaps with no alias */ + continue; + } ++ ++ bitmap_alias = bmap_inner->alias; + } else { + if (strlen(bitmap_name) > UINT8_MAX) { + error_report("Cannot migrate bitmap '%s' on node '%s': " +@@ -1074,13 +1080,16 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s, + + bitmap_name = s->bitmap_alias; + if (!s->cancelled && bitmap_alias_map) { +- bitmap_name = g_hash_table_lookup(bitmap_alias_map, +- s->bitmap_alias); +- if (!bitmap_name) { ++ BitmapMigrationBitmapAlias *bmap_inner; ++ ++ bmap_inner = g_hash_table_lookup(bitmap_alias_map, s->bitmap_alias); ++ if (!bmap_inner) { + error_report("Error: Unknown bitmap alias '%s' on node " + "'%s' (alias '%s')", s->bitmap_alias, + s->bs->node_name, s->node_alias); + cancel_incoming_locked(s); ++ } else { ++ bitmap_name = bmap_inner->name; + } + } + +-- +2.27.0 + diff --git a/kvm-nbd-make-nbd_read-return-EIO-on-error.patch b/kvm-nbd-make-nbd_read-return-EIO-on-error.patch new file mode 100644 index 0000000..9dacfa9 --- /dev/null +++ b/kvm-nbd-make-nbd_read-return-EIO-on-error.patch @@ -0,0 +1,72 @@ +From 7b7974468656d2ceba6a7f6dba2b35dfe28a5d1f Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Mon, 8 Feb 2021 22:57:01 -0300 +Subject: [PATCH 03/54] nbd: make nbd_read* return -EIO on error + +RH-Author: Eric Blake +Message-id: <20210208225701.110110-4-eblake@redhat.com> +Patchwork-id: 101007 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v4 3/3] nbd: make nbd_read* return -EIO on error +Bugzilla: 1887883 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz + +From: Roman Kagan + +NBD reconnect logic considers the error code from the functions that +read NBD messages to tell if reconnect should be attempted or not: it is +attempted on -EIO, otherwise the client transitions to NBD_CLIENT_QUIT +state (see nbd_channel_error). This error code is propagated from the +primitives like nbd_read. + +The problem, however, is that nbd_read itself turns every error into -1 +rather than -EIO. As a result, if the NBD server happens to die while +sending the message, the client in QEMU receives less data than it +expects, considers it as a fatal error, and wouldn't attempt +reestablishing the connection. + +Fix it by turning every negative return from qio_channel_read_all into +-EIO returned from nbd_read. Apparently that was the original behavior, +but got broken later. Also adjust nbd_readXX to follow. + +Fixes: e6798f06a6 ("nbd: generalize usage of nbd_read") +Signed-off-by: Roman Kagan +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20210129073859.683063-4-rvkagan@yandex-team.ru> +Signed-off-by: Eric Blake +(cherry picked from commit 5082fc82a6bc3fc06a04be47d39777c7cff61e5b) +Signed-off-by: Eric Blake +Signed-off-by: Eduardo Lima (Etrunko) +--- + include/block/nbd.h | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 4a52a43ef5..5f34d23bb0 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -364,7 +364,7 @@ static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size, + if (desc) { + error_prepend(errp, "Failed to read %s: ", desc); + } +- return -1; ++ return ret; + } + + return 0; +@@ -375,8 +375,9 @@ static inline int nbd_read##bits(QIOChannel *ioc, \ + uint##bits##_t *val, \ + const char *desc, Error **errp) \ + { \ +- if (nbd_read(ioc, val, sizeof(*val), desc, errp) < 0) { \ +- return -1; \ ++ int ret = nbd_read(ioc, val, sizeof(*val), desc, errp); \ ++ if (ret < 0) { \ ++ return ret; \ + } \ + *val = be##bits##_to_cpu(*val); \ + return 0; \ +-- +2.27.0 + diff --git a/kvm-pci-add-romsize-property.patch b/kvm-pci-add-romsize-property.patch new file mode 100644 index 0000000..961073f --- /dev/null +++ b/kvm-pci-add-romsize-property.patch @@ -0,0 +1,137 @@ +From aee681700e512679981e39928d8709eb226a4a6d Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 10 Feb 2021 17:04:45 -0300 +Subject: [PATCH 10/54] pci: add romsize property + +RH-Author: Peter Xu +Message-id: <20210210170445.128304-3-peterx@redhat.com> +Patchwork-id: 101041 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] pci: add romsize property +Bugzilla: 1917830 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Paolo Bonzini + +From: Paolo Bonzini + +This property can be useful for distros to set up known-good ROM sizes for +migration purposes. The VM will fail to start if the ROM is too large, +and migration compatibility will not be broken if the ROM is too small. + +Note that even though romsize is a uint32_t, it has to be between 1 +(because empty ROM files are not accepted, and romsize must be greater +than the file) and 2^31 (because values above are not powers of two and +are rejected). + +Signed-off-by: Paolo Bonzini +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +Message-Id: <20201218182736.1634344-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +Message-Id: <20210203131828.156467-3-pbonzini@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: David Edmondson +Acked-by: Laszlo Ersek +(cherry picked from commit 08b1df8ff463e72b0875538fb991d5393047606c) +Signed-off-by: Peter Xu +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/pci/pci.c | 19 +++++++++++++++++-- + hw/xen/xen_pt_load_rom.c | 14 ++++++++++++-- + include/hw/pci/pci.h | 1 + + 3 files changed, 30 insertions(+), 4 deletions(-) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index e4caad33c5..9619b8d068 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -67,6 +67,7 @@ static void pcibus_reset(BusState *qbus); + static Property pci_props[] = { + DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), + DEFINE_PROP_STRING("romfile", PCIDevice, romfile), ++ DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, -1), + DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1), + DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present, + QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false), +@@ -2106,6 +2107,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) + bool is_default_rom; + uint16_t class_id; + ++ if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) { ++ error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize); ++ return; ++ } ++ + /* initialize cap_present for pci_is_express() and pci_config_size(), + * Note that hybrid PCIs are not set automatically and need to manage + * QEMU_PCI_CAP_EXPRESS manually */ +@@ -2371,7 +2377,16 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, + g_free(path); + return; + } +- size = pow2ceil(size); ++ if (pdev->romsize != -1) { ++ if (size > pdev->romsize) { ++ error_setg(errp, "romfile \"%s\" (%u bytes) is too large for ROM size %u", ++ pdev->romfile, (uint32_t)size, pdev->romsize); ++ g_free(path); ++ return; ++ } ++ } else { ++ pdev->romsize = pow2ceil(size); ++ } + + vmsd = qdev_get_vmsd(DEVICE(pdev)); + +@@ -2381,7 +2396,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, + snprintf(name, sizeof(name), "%s.rom", object_get_typename(OBJECT(pdev))); + } + pdev->has_rom = true; +- memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, size, &error_fatal); ++ memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, &error_fatal); + ptr = memory_region_get_ram_ptr(&pdev->rom); + if (load_image_size(path, ptr, size) < 0) { + error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); +diff --git a/hw/xen/xen_pt_load_rom.c b/hw/xen/xen_pt_load_rom.c +index a50a80837e..03422a8a71 100644 +--- a/hw/xen/xen_pt_load_rom.c ++++ b/hw/xen/xen_pt_load_rom.c +@@ -53,10 +53,20 @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev, + } + fseek(fp, 0, SEEK_SET); + ++ if (dev->romsize != -1) { ++ if (st.st_size > dev->romsize) { ++ error_report("ROM BAR \"%s\" (%ld bytes) is too large for ROM size %u", ++ rom_file, (long) st.st_size, dev->romsize); ++ goto close_rom; ++ } ++ } else { ++ dev->romsize = st.st_size; ++ } ++ + snprintf(name, sizeof(name), "%s.rom", object_get_typename(owner)); +- memory_region_init_ram(&dev->rom, owner, name, st.st_size, &error_abort); ++ memory_region_init_ram(&dev->rom, owner, name, dev->romsize, &error_abort); + ptr = memory_region_get_ram_ptr(&dev->rom); +- memset(ptr, 0xff, st.st_size); ++ memset(ptr, 0xff, dev->romsize); + + if (!fread(ptr, 1, st.st_size, fp)) { + error_report("pci-assign: Cannot read from host %s", rom_file); +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index 72ce649eee..75a6b15757 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -343,6 +343,7 @@ struct PCIDevice { + + /* Location of option rom */ + char *romfile; ++ uint32_t romsize; + bool has_rom; + MemoryRegion rom; + uint32_t rom_bar; +-- +2.27.0 + diff --git a/kvm-pci-reject-too-large-ROMs.patch b/kvm-pci-reject-too-large-ROMs.patch new file mode 100644 index 0000000..739b908 --- /dev/null +++ b/kvm-pci-reject-too-large-ROMs.patch @@ -0,0 +1,89 @@ +From a6e34aa76d86319d15355fd55fa6d12eb49a816f Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 10 Feb 2021 17:04:44 -0300 +Subject: [PATCH 09/54] pci: reject too large ROMs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +Message-id: <20210210170445.128304-2-peterx@redhat.com> +Patchwork-id: 101039 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] pci: reject too large ROMs +Bugzilla: 1917830 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Paolo Bonzini + +From: Paolo Bonzini + +get_image_size() returns an int64_t, which pci_add_option_rom() assigns +to an "int" without any range checking. A 32-bit BAR could be up to +2 GiB in size, so reject anything above it. In order to accomodate +a rounded-up size of 2 GiB, change pci_patch_ids's size argument +to unsigned. + +Conflicts: + hw/pci/pci.c: missing 2c65db5e58d ("vl: extract softmmu/datadir.c") so + there's no "#include " yet + +Reviewed-by: Peter Xu +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Laszlo Ersek +Signed-off-by: Paolo Bonzini +Message-Id: <20210203131828.156467-2-pbonzini@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: David Edmondson +(cherry picked from commit 7c16b5bbb6c0f797945327d17e4be60f25a4427d) +Signed-off-by: Peter Xu +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/pci/pci.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index 0131d9d02c..e4caad33c5 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -24,6 +24,7 @@ + + #include "qemu/osdep.h" + #include "qemu-common.h" ++#include "qemu/units.h" + #include "hw/irq.h" + #include "hw/pci/pci.h" + #include "hw/pci/pci_bridge.h" +@@ -2256,7 +2257,7 @@ static uint8_t pci_find_capability_at_offset(PCIDevice *pdev, uint8_t offset) + + /* Patch the PCI vendor and device ids in a PCI rom image if necessary. + This is needed for an option rom which is used for more than one device. */ +-static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, int size) ++static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) + { + uint16_t vendor_id; + uint16_t device_id; +@@ -2314,7 +2315,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, int size) + static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, + Error **errp) + { +- int size; ++ int64_t size; + char *path; + void *ptr; + char name[32]; +@@ -2364,6 +2365,11 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, + error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); + g_free(path); + return; ++ } else if (size > 2 * GiB) { ++ error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)", ++ pdev->romfile); ++ g_free(path); ++ return; + } + size = pow2ceil(size); + +-- +2.27.0 + diff --git a/kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch b/kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch new file mode 100644 index 0000000..6ffcc2c --- /dev/null +++ b/kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch @@ -0,0 +1,146 @@ +From 20eb8dc4f6679e3325e1f1f434b17e2dc6a60eee Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 21:42:47 -0500 +Subject: [PATCH 20/54] pcie: don't set link state active if the slot is empty + +RH-Author: Laurent Vivier +Message-id: <20210225214247.1336554-1-lvivier@redhat.com> +Patchwork-id: 101211 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] pcie: don't set link state active if the slot is empty +Bugzilla: 1917654 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Alex Williamson + +BZ: https://bugzilla.redhat.com/1917654 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=35163495 + +When the pcie slot is initialized, by default PCI_EXP_LNKSTA_DLLLA +(Data Link Layer Link Active) is set in PCI_EXP_LNKSTA +(Link Status) without checking if the slot is empty or not. + +This is confusing for the kernel because as it sees the link is up +it tries to read the vendor ID and fails: + +(From https://bugzilla.kernel.org/show_bug.cgi?id=211691) + +[ 1.661105] pcieport 0000:00:02.2: pciehp: Slot Capabilities : 0x0002007b +[ 1.661115] pcieport 0000:00:02.2: pciehp: Slot Status : 0x0010 +[ 1.661123] pcieport 0000:00:02.2: pciehp: Slot Control : 0x07c0 +[ 1.661138] pcieport 0000:00:02.2: pciehp: Slot #0 AttnBtn+ PwrCtrl+ MRL- AttnInd+ PwrInd+ HotPlug+ Surprise+ Interlock+ NoCompl- IbPresDis- LLActRep+ +[ 1.662581] pcieport 0000:00:02.2: pciehp: pciehp_get_power_status: SLOTCTRL 6c value read 7c0 +[ 1.662597] pcieport 0000:00:02.2: pciehp: pciehp_check_link_active: lnk_status = 2204 +[ 1.662703] pcieport 0000:00:02.2: pciehp: pending interrupts 0x0010 from Slot Status +[ 1.662706] pcieport 0000:00:02.2: pciehp: pcie_enable_notification: SLOTCTRL 6c write cmd 1031 +[ 1.662730] pcieport 0000:00:02.2: pciehp: pciehp_check_link_active: lnk_status = 2204 +[ 1.662748] pcieport 0000:00:02.2: pciehp: pciehp_check_link_active: lnk_status = 2204 +[ 1.662750] pcieport 0000:00:02.2: pciehp: Slot(0-2): Link Up +[ 2.896132] pcieport 0000:00:02.2: pciehp: pciehp_check_link_status: lnk_status = 2204 +[ 2.896135] pcieport 0000:00:02.2: pciehp: Slot(0-2): No device found +[ 2.896900] pcieport 0000:00:02.2: pciehp: pending interrupts 0x0010 from Slot Status +[ 2.896903] pcieport 0000:00:02.2: pciehp: pciehp_power_off_slot: SLOTCTRL 6c write cmd 400 +[ 3.656901] pcieport 0000:00:02.2: pciehp: pending interrupts 0x0009 from Slot Status + +This is really a problem with virtio-net failover that hotplugs a VFIO +card during the boot process. The kernel can shutdown the slot while +QEMU is hotplugging it, and this likely ends by an automatic unplug of +the card. At the end of the boot sequence the card has disappeared. + +To fix that, don't set the "Link Active" state in the init function, but +rely on the plug function to do it, as the mechanism has already been +introduced by 2f2b18f60bf1. + +Fixes: 2f2b18f60bf1 ("pcie: set link state inactive/active after hot unplug/plug") +Cc: zhengxiang9@huawei.com +Fixes: 3d67447fe7c2 ("pcie: Fill PCIESlot link fields to support higher speeds and widths") +Cc: alex.williamson@redhat.com +Fixes: b2101eae63ea ("pcie: Set the "link active" in the link status register") +Cc: benh@kernel.crashing.org +Signed-off-by: Laurent Vivier +Message-Id: <20210212135250.2738750-5-lvivier@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit df72184ec15829053b3bb5a0d5801773b6d9ec25) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/pci/pcie.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index d4010cf8f3..a733e2fb87 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -75,11 +75,6 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) + QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1) | + QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT)); + +- if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { +- pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, +- PCI_EXP_LNKSTA_DLLLA); +- } +- + /* We changed link status bits over time, and changing them across + * migrations is generally fine as hardware changes them too. + * Let's not bother checking. +@@ -125,8 +120,7 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) + */ + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_DLLLARC); +- pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, +- PCI_EXP_LNKSTA_DLLLA); ++ /* the PCI_EXP_LNKSTA_DLLLA will be set in the hotplug function */ + + /* + * Target Link Speed defaults to the highest link speed supported by +@@ -427,6 +421,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); + uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; + PCIDevice *pci_dev = PCI_DEVICE(dev); ++ uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); + + /* Don't send event when device is enabled during qemu machine creation: + * it is present on boot, no hotplug event is necessary. We do send an +@@ -434,7 +429,8 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + if (!dev->hotplugged) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); +- if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { ++ if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || ++ (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } +@@ -448,7 +444,8 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + if (pci_get_function_0(pci_dev)) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); +- if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { ++ if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || ++ (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } +@@ -640,6 +637,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, + uint32_t pos = dev->exp.exp_cap; + uint8_t *exp_cap = dev->config + pos; + uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); ++ uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); + + if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { + /* +@@ -695,7 +693,8 @@ void pcie_cap_slot_write_config(PCIDevice *dev, + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); +- if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { ++ if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || ++ (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } +-- +2.27.0 + diff --git a/kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch b/kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch new file mode 100644 index 0000000..0cf96d6 --- /dev/null +++ b/kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch @@ -0,0 +1,154 @@ +From b76dbfedc47366039a08f68de82792b9c70a6be9 Mon Sep 17 00:00:00 2001 +From: Peter Krempa +Date: Mon, 22 Feb 2021 13:35:06 -0500 +Subject: [PATCH 23/54] qemu-iotests: 300: Add test case for modifying + persistence of bitmap + +RH-Author: Peter Krempa +Message-id: +Patchwork-id: 101172 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/3] qemu-iotests: 300: Add test case for modifying persistence of bitmap +Bugzilla: 1930757 +RH-Acked-by: John Snow +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake + +Verify that the modification of the bitmap persistence over migration +which is controlled via BitmapMigrationBitmapAliasTransform works +properly. + +Based on TestCrossAliasMigration + +Signed-off-by: Peter Krempa +Message-Id: +Reviewed-by: Eric Blake +[eblake: Adjust test for explicit read_zeroes=False] +Signed-off-by: Eric Blake +(cherry picked from commit ca4bfec41d56a1154da89b105048b3462361d0f0) + +https://bugzilla.redhat.com/show_bug.cgi?id=1930757 +Signed-off-by: Danilo C. L. de Paula +--- + tests/qemu-iotests/300 | 93 ++++++++++++++++++++++++++++++++++++++ + tests/qemu-iotests/300.out | 4 +- + 2 files changed, 95 insertions(+), 2 deletions(-) + +diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300 +index 5b75121b84..b25d8b04c0 100755 +--- a/tests/qemu-iotests/300 ++++ b/tests/qemu-iotests/300 +@@ -588,6 +588,99 @@ class TestCrossAliasMigration(TestDirtyBitmapMigration): + self.verify_dest_has_all_bitmaps() + self.verify_dest_error(None) + ++class TestAliasTransformMigration(TestDirtyBitmapMigration): ++ """ ++ Tests the 'transform' option which modifies bitmap persistence on migration. ++ """ ++ ++ src_node_name = 'node-a' ++ dst_node_name = 'node-b' ++ src_bmap_name = 'bmap-a' ++ dst_bmap_name = 'bmap-b' ++ ++ def setUp(self) -> None: ++ TestDirtyBitmapMigration.setUp(self) ++ ++ # Now create another block device and let both have two bitmaps each ++ result = self.vm_a.qmp('blockdev-add', ++ node_name='node-b', driver='null-co', ++ read_zeroes=False) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.vm_b.qmp('blockdev-add', ++ node_name='node-a', driver='null-co', ++ read_zeroes=False) ++ self.assert_qmp(result, 'return', {}) ++ ++ bmaps_to_add = (('node-a', 'bmap-b'), ++ ('node-b', 'bmap-a'), ++ ('node-b', 'bmap-b')) ++ ++ for (node, bmap) in bmaps_to_add: ++ result = self.vm_a.qmp('block-dirty-bitmap-add', ++ node=node, name=bmap) ++ self.assert_qmp(result, 'return', {}) ++ ++ @staticmethod ++ def transform_mapping() -> BlockBitmapMapping: ++ return [ ++ { ++ 'node-name': 'node-a', ++ 'alias': 'node-a', ++ 'bitmaps': [ ++ { ++ 'name': 'bmap-a', ++ 'alias': 'bmap-a', ++ 'transform': ++ { ++ 'persistent': True ++ } ++ }, ++ { ++ 'name': 'bmap-b', ++ 'alias': 'bmap-b' ++ } ++ ] ++ }, ++ { ++ 'node-name': 'node-b', ++ 'alias': 'node-b', ++ 'bitmaps': [ ++ { ++ 'name': 'bmap-a', ++ 'alias': 'bmap-a' ++ }, ++ { ++ 'name': 'bmap-b', ++ 'alias': 'bmap-b' ++ } ++ ] ++ } ++ ] ++ ++ def verify_dest_bitmap_state(self) -> None: ++ bitmaps = self.vm_b.query_bitmaps() ++ ++ for node in bitmaps: ++ bitmaps[node] = sorted(((bmap['name'], bmap['persistent']) for bmap in bitmaps[node])) ++ ++ self.assertEqual(bitmaps, ++ {'node-a': [('bmap-a', True), ('bmap-b', False)], ++ 'node-b': [('bmap-a', False), ('bmap-b', False)]}) ++ ++ def test_transform_on_src(self) -> None: ++ self.set_mapping(self.vm_a, self.transform_mapping()) ++ ++ self.migrate() ++ self.verify_dest_bitmap_state() ++ self.verify_dest_error(None) ++ ++ def test_transform_on_dst(self) -> None: ++ self.set_mapping(self.vm_b, self.transform_mapping()) ++ ++ self.migrate() ++ self.verify_dest_bitmap_state() ++ self.verify_dest_error(None) + + if __name__ == '__main__': + iotests.main(supported_protocols=['file']) +diff --git a/tests/qemu-iotests/300.out b/tests/qemu-iotests/300.out +index cafb8161f7..12e9ab7d57 100644 +--- a/tests/qemu-iotests/300.out ++++ b/tests/qemu-iotests/300.out +@@ -1,5 +1,5 @@ +-..................................... ++....................................... + ---------------------------------------------------------------------- +-Ran 37 tests ++Ran 39 tests + + OK +-- +2.27.0 + diff --git a/kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch b/kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch new file mode 100644 index 0000000..573aeaf --- /dev/null +++ b/kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch @@ -0,0 +1,96 @@ +From 1107799dade18adccfca5097341b6dfb4977e69e Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Mon, 22 Feb 2021 21:34:55 -0500 +Subject: [PATCH 19/54] qemu-nbd: Use SOMAXCONN for socket listen() backlog +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +Message-id: <20210222213455.320104-2-eblake@redhat.com> +Patchwork-id: 101192 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] qemu-nbd: Use SOMAXCONN for socket listen() backlog +Bugzilla: 1925345 +RH-Acked-by: Richard Jones +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Daniel P. Berrange + +Our default of a backlog of 1 connection is rather puny; it gets in +the way when we are explicitly allowing multiple clients (such as +qemu-nbd -e N [--shared], or nbd-server-start with its default +"max-connections":0 for unlimited), but is even a problem when we +stick to qemu-nbd's default of only 1 active client but use -t +[--persistent] where a second client can start using the server once +the first finishes. While the effects are less noticeable on TCP +sockets (since the client can poll() to learn when the server is ready +again), it is definitely observable on Unix sockets, where on Linux, a +client will fail with EAGAIN and no recourse but to sleep an arbitrary +amount of time before retrying if the server backlog is already full. + +Since QMP nbd-server-start is always persistent, it now always +requests a backlog of SOMAXCONN; meanwhile, qemu-nbd will request +SOMAXCONN if persistent, otherwise its backlog should be based on the +expected number of clients. + +See https://bugzilla.redhat.com/1925045 for a demonstration of where +our low backlog prevents libnbd from connecting as many parallel +clients as it wants. + +Reported-by: Richard W.M. Jones +Signed-off-by: Eric Blake +CC: qemu-stable@nongnu.org +Message-Id: <20210209152759.209074-2-eblake@redhat.com> +Tested-by: Richard W.M. Jones +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +(cherry picked from commit 582d4210eb2f2ab5baac328fe4b479cd86da1647) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +--- + blockdev-nbd.c | 7 ++++++- + qemu-nbd.c | 10 +++++++++- + 2 files changed, 15 insertions(+), 2 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index d8443d235b..b264620b98 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -134,7 +134,12 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, + qio_net_listener_set_name(nbd_server->listener, + "nbd-listener"); + +- if (qio_net_listener_open_sync(nbd_server->listener, addr, 1, errp) < 0) { ++ /* ++ * Because this server is persistent, a backlog of SOMAXCONN is ++ * better than trying to size it to max_connections. ++ */ ++ if (qio_net_listener_open_sync(nbd_server->listener, addr, SOMAXCONN, ++ errp) < 0) { + goto error; + } + +diff --git a/qemu-nbd.c b/qemu-nbd.c +index 1d337b7504..ce1dc43d69 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -970,8 +970,16 @@ int main(int argc, char **argv) + + server = qio_net_listener_new(); + if (socket_activation == 0) { ++ int backlog; ++ ++ if (persistent) { ++ backlog = SOMAXCONN; ++ } else { ++ backlog = MIN(shared, SOMAXCONN); ++ } + saddr = nbd_build_socket_address(sockpath, bindto, port); +- if (qio_net_listener_open_sync(server, saddr, 1, &local_err) < 0) { ++ if (qio_net_listener_open_sync(server, saddr, backlog, ++ &local_err) < 0) { + object_unref(OBJECT(server)); + error_report_err(local_err); + exit(EXIT_FAILURE); +-- +2.27.0 + diff --git a/kvm-qemu-storage-daemon-Enable-object-add.patch b/kvm-qemu-storage-daemon-Enable-object-add.patch new file mode 100644 index 0000000..8f48b5a --- /dev/null +++ b/kvm-qemu-storage-daemon-Enable-object-add.patch @@ -0,0 +1,49 @@ +From 6707057bc09cef526579bddb54ef7d4c3a7883ad Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 16 Feb 2021 16:19:42 -0500 +Subject: [PATCH 16/54] qemu-storage-daemon: Enable object-add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +Message-id: <20210216161943.126728-5-kwolf@redhat.com> +Patchwork-id: 101103 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/5] qemu-storage-daemon: Enable object-add +Bugzilla: 1901323 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +As we don't have a fully QAPIfied version of object-add yet and it still +has 'gen': false in the schema, it needs to be registered explicitly in +init_qmp_commands() to be available for users. + +Fixes: 2af282ec51a27116d0402cab237b8970800f870c +Signed-off-by: Kevin Wolf +Message-Id: <20210204072137.19663-1-kwolf@redhat.com> +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Kevin Wolf +(cherry picked from commit 15d40e9204eb3d89577187f117a1dde2237bdc4d) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + storage-daemon/qemu-storage-daemon.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c +index d8d172cc60..9021a46b3a 100644 +--- a/storage-daemon/qemu-storage-daemon.c ++++ b/storage-daemon/qemu-storage-daemon.c +@@ -144,6 +144,8 @@ static void init_qmp_commands(void) + qmp_init_marshal(&qmp_commands); + qmp_register_command(&qmp_commands, "query-qmp-schema", + qmp_query_qmp_schema, QCO_ALLOW_PRECONFIG); ++ qmp_register_command(&qmp_commands, "object-add", qmp_object_add, ++ QCO_NO_OPTIONS); + + QTAILQ_INIT(&qmp_cap_negotiation_commands); + qmp_register_command(&qmp_cap_negotiation_commands, "qmp_capabilities", +-- +2.27.0 + diff --git a/kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch b/kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch new file mode 100644 index 0000000..46e9ec7 --- /dev/null +++ b/kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch @@ -0,0 +1,135 @@ +From 1b6e1cc1f3d8033620bc0c04670d252180bd2c36 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 10 Feb 2021 17:10:34 -0300 +Subject: [PATCH 11/54] redhat: Add some devices for exporting upstream machine + types + +RH-Author: Peter Xu +Message-id: <20210210171034.129116-2-peterx@redhat.com> +Patchwork-id: 101043 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] redhat: Add some devices for exporting upstream machine types +Bugzilla: 1917826 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Dr. David Alan Gilbert + +Both "isa-parallel" and "hpet" will be required for the to-be-exported upstream +x86 machine types, so add them back into config. + +Since HPET was disabled for rhel machine types previously, we need to +explicitly do that for RHEL now after we add HPET back. + +Meanwhile, add blockers for the two devices so that they can never be created +on RHEL machine types. That should keep the old behavior for RHEL-AV. + +Signed-off-by: Peter Xu +Signed-off-by: Eduardo Lima (Etrunko) +--- + default-configs/devices/x86_64-softmmu.mak | 6 ++++++ + default-configs/devices/x86_64-upstream-devices.mak | 4 ++++ + hw/char/parallel.c | 9 +++++++++ + hw/i386/pc_piix.c | 2 +- + hw/i386/pc_q35.c | 2 +- + hw/timer/hpet.c | 8 ++++++++ + 6 files changed, 29 insertions(+), 2 deletions(-) + create mode 100644 default-configs/devices/x86_64-upstream-devices.mak + +diff --git a/default-configs/devices/x86_64-softmmu.mak b/default-configs/devices/x86_64-softmmu.mak +index b5de7e5279..e57bcff7d9 100644 +--- a/default-configs/devices/x86_64-softmmu.mak ++++ b/default-configs/devices/x86_64-softmmu.mak +@@ -3,3 +3,9 @@ + #include i386-softmmu.mak + + include x86_64-rh-devices.mak ++ ++# ++# RHEL: this is for the limited upstream machine type support, so to export ++# some more devices than what RHEL machines have. ++# ++include x86_64-upstream-devices.mak +diff --git a/default-configs/devices/x86_64-upstream-devices.mak b/default-configs/devices/x86_64-upstream-devices.mak +new file mode 100644 +index 0000000000..2cd20f54d2 +--- /dev/null ++++ b/default-configs/devices/x86_64-upstream-devices.mak +@@ -0,0 +1,4 @@ ++# We need "isa-parallel" ++CONFIG_PARALLEL=y ++# We need "hpet" ++CONFIG_HPET=y +diff --git a/hw/char/parallel.c b/hw/char/parallel.c +index 8b418abf71..6b3696a237 100644 +--- a/hw/char/parallel.c ++++ b/hw/char/parallel.c +@@ -29,6 +29,7 @@ + #include "chardev/char-parallel.h" + #include "chardev/char-fe.h" + #include "hw/acpi/aml-build.h" ++#include "hw/boards.h" + #include "hw/irq.h" + #include "hw/isa/isa.h" + #include "hw/qdev-properties.h" +@@ -533,6 +534,14 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) + int base; + uint8_t dummy; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Can't create parallel device, empty char device"); + return; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 1b1cc18ae0..6e1f1ba082 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1018,7 +1018,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + m->family = "pc_piix_Y"; +- m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + pcmc->default_nic_model = "e1000"; + m->default_display = "std"; + m->no_parallel = 1; +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 72854192a9..a8c0496c9f 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -580,7 +580,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + pcmc->default_nic_model = "e1000e"; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; +- m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + m->default_display = "std"; + m->no_floppy = 1; + m->no_parallel = 1; +diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c +index 9520471be2..202e032524 100644 +--- a/hw/timer/hpet.c ++++ b/hw/timer/hpet.c +@@ -733,6 +733,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) + int i; + HPETTimer *timer; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!s->intcap) { + warn_report("Hpet's intcap not initialized"); + } +-- +2.27.0 + diff --git a/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch b/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch new file mode 100644 index 0000000..e5a4937 --- /dev/null +++ b/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch @@ -0,0 +1,205 @@ +From 2cb473c2e1cd671da4458b58a0f760f4f1c36cbc Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Wed, 10 Feb 2021 16:54:03 -0300 +Subject: [PATCH 08/54] spapr: Adjust firmware path of PCI devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +Message-id: <20210210165403.469213-2-gkurz@redhat.com> +Patchwork-id: 101036 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] spapr: Adjust firmware path of PCI devices +Bugzilla: 1920941 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: David Gibson +RH-Acked-by: Laszlo Ersek + +From: Greg Kurz + +It is currently not possible to perform a strict boot from USB storage: + +$ qemu-system-ppc64 -accel kvm -nodefaults -nographic -serial stdio \ + -boot strict=on \ + -device qemu-xhci \ + -device usb-storage,drive=disk,bootindex=0 \ + -blockdev driver=file,node-name=disk,filename=fedora-ppc64le.qcow2 + +SLOF ********************************************************************** +QEMU Starting + Build Date = Jul 17 2020 11:15:24 + FW Version = git-e18ddad8516ff2cf + Press "s" to enter Open Firmware. + +Populating /vdevice methods +Populating /vdevice/vty@71000000 +Populating /vdevice/nvram@71000001 +Populating /pci@800000020000000 + 00 0000 (D) : 1b36 000d serial bus [ usb-xhci ] +No NVRAM common partition, re-initializing... +Scanning USB + XHCI: Initializing + USB Storage + SCSI: Looking for devices + 101000000000000 DISK : "QEMU QEMU HARDDISK 2.5+" +Using default console: /vdevice/vty@71000000 + + Welcome to Open Firmware + + Copyright (c) 2004, 2017 IBM Corporation All rights reserved. + This program and the accompanying materials are made available + under the terms of the BSD License available at + http://www.opensource.org/licenses/bsd-license.php + +Trying to load: from: /pci@800000020000000/usb@0/storage@1/disk@101000000000000 ... +E3405: No such device + +E3407: Load failed + + Type 'boot' and press return to continue booting the system. + Type 'reset-all' and press return to reboot the system. + +Ready! +0 > + +The device tree handed over by QEMU to SLOF indeed contains: + +qemu,boot-list = + "/pci@800000020000000/usb@0/storage@1/disk@101000000000000 HALT"; + +but the device node is named usb-xhci@0, not usb@0. + +This happens because the firmware names of PCI devices returned +by get_boot_devices_list() come from pcibus_get_fw_dev_path(), +while the sPAPR PHB code uses a different naming scheme for +device nodes. This inconsistency has always been there but it was +hidden for a long time because SLOF used to rename USB device +nodes, until this commit, merged in QEMU 4.2.0 : + +commit 85164ad4ed9960cac842fa4cc067c6b6699b0994 +Author: Alexey Kardashevskiy +Date: Wed Sep 11 16:24:32 2019 +1000 + + pseries: Update SLOF firmware image + + This fixes USB host bus adapter name in the device tree to match QEMU's + one. + + Signed-off-by: Alexey Kardashevskiy + Signed-off-by: David Gibson + +Fortunately, sPAPR implements the firmware path provider interface. +This provides a way to override the default firmware paths. + +Just factor out the sPAPR PHB naming logic from spapr_dt_pci_device() +to a helper, and use it in the sPAPR firmware path provider hook. + +Fixes: 85164ad4ed99 ("pseries: Update SLOF firmware image") +Signed-off-by: Greg Kurz +Message-Id: <20210122170157.246374-1-groug@kaod.org> +Reviewed-by: Daniel Henrique Barboza +Signed-off-by: David Gibson +(cherry picked from commit 040bdafce12f750816d879442014df2999a995c4) +Signed-off-by: Greg Kurz +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/ppc/spapr.c | 5 +++++ + hw/ppc/spapr_pci.c | 33 ++++++++++++++++++--------------- + include/hw/pci-host/spapr.h | 2 ++ + 3 files changed, 25 insertions(+), 15 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index a67df8cb26..c6a97e7964 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -3057,6 +3057,7 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, + SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE); + SpaprPhbState *phb = CAST(SpaprPhbState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE); + VHostSCSICommon *vsc = CAST(VHostSCSICommon, dev, TYPE_VHOST_SCSI_COMMON); ++ PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE); + + if (d) { + void *spapr = CAST(void, bus->parent, "spapr-vscsi"); +@@ -3130,6 +3131,10 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, + return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn)); + } + ++ if (pcidev) { ++ return spapr_pci_fw_dev_name(pcidev); ++ } ++ + return NULL; + } + +diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c +index 88ce87f130..e78e917af1 100644 +--- a/hw/ppc/spapr_pci.c ++++ b/hw/ppc/spapr_pci.c +@@ -1334,15 +1334,29 @@ static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus, + return offset; + } + ++char *spapr_pci_fw_dev_name(PCIDevice *dev) ++{ ++ const gchar *basename; ++ int slot = PCI_SLOT(dev->devfn); ++ int func = PCI_FUNC(dev->devfn); ++ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); ++ ++ basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, ++ ccode & 0xff); ++ ++ if (func != 0) { ++ return g_strdup_printf("%s@%x,%x", basename, slot, func); ++ } else { ++ return g_strdup_printf("%s@%x", basename, slot); ++ } ++} ++ + /* create OF node for pci device and required OF DT properties */ + static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, + void *fdt, int parent_offset) + { + int offset; +- const gchar *basename; +- gchar *nodename; +- int slot = PCI_SLOT(dev->devfn); +- int func = PCI_FUNC(dev->devfn); ++ g_autofree gchar *nodename = spapr_pci_fw_dev_name(dev); + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + ResourceProps rp; + SpaprDrc *drc = drc_from_dev(sphb, dev); +@@ -1359,19 +1373,8 @@ static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, + uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2); + gchar *loc_code; + +- basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, +- ccode & 0xff); +- +- if (func != 0) { +- nodename = g_strdup_printf("%s@%x,%x", basename, slot, func); +- } else { +- nodename = g_strdup_printf("%s@%x", basename, slot); +- } +- + _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename)); + +- g_free(nodename); +- + /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */ + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id)); + _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id)); +diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h +index bd014823a9..5b03a7b0eb 100644 +--- a/include/hw/pci-host/spapr.h ++++ b/include/hw/pci-host/spapr.h +@@ -210,4 +210,6 @@ static inline unsigned spapr_phb_windows_supported(SpaprPhbState *sphb) + return sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; + } + ++char *spapr_pci_fw_dev_name(PCIDevice *dev); ++ + #endif /* PCI_HOST_SPAPR_H */ +-- +2.27.0 + diff --git a/kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch b/kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch new file mode 100644 index 0000000..ddd67b7 --- /dev/null +++ b/kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch @@ -0,0 +1,75 @@ +From 9142072649d593acbd118e71f5d257bd9996ab36 Mon Sep 17 00:00:00 2001 +From: eperezma +Date: Fri, 19 Feb 2021 08:49:50 -0300 +Subject: [PATCH 12/54] vhost: Check for valid vdev in + vhost_backend_handle_iotlb_msg +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: eperezma +Message-id: <20210219084950.2159701-1-eperezma@redhat.com> +Patchwork-id: 101110 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] vhost: Check for valid vdev in vhost_backend_handle_iotlb_msg +Bugzilla: 1880299 +RH-Acked-by: Peter Xu +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Xiao Wang + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1880299 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=35031170 + +Not checking this can lead to invalid dev->vdev member access in +vhost_device_iotlb_miss if backend issue an iotlb message in a bad +timing, either maliciously or by a bug. + +Reproduced rebooting a guest with testpmd in txonly forward mode. + #0 0x0000559ffff94394 in vhost_device_iotlb_miss ( + dev=dev@entry=0x55a0012f6680, iova=10245279744, write=1) + at ../hw/virtio/vhost.c:1013 + #1 0x0000559ffff9ac31 in vhost_backend_handle_iotlb_msg ( + imsg=0x7ffddcfd32c0, dev=0x55a0012f6680) + at ../hw/virtio/vhost-backend.c:411 + #2 vhost_backend_handle_iotlb_msg (dev=dev@entry=0x55a0012f6680, + imsg=imsg@entry=0x7ffddcfd32c0) + at ../hw/virtio/vhost-backend.c:404 + #3 0x0000559fffeded7b in slave_read (opaque=0x55a0012f6680) + at ../hw/virtio/vhost-user.c:1464 + #4 0x000055a0000c541b in aio_dispatch_handler ( + ctx=ctx@entry=0x55a0010a2120, node=0x55a0012d9e00) + at ../util/aio-posix.c:329 + +Fixes: 020e571b8b ("vhost: rework IOTLB messaging") +Signed-off-by: Eugenio Pérez +Message-Id: <20210129090728.831208-1-eperezma@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 4d1ccc17f40f73313e13c84914f70ec3d40ac738) +Signed-off-by: Eugenio Pérez +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/virtio/vhost-backend.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c +index 222bbcc62d..31b33bde37 100644 +--- a/hw/virtio/vhost-backend.c ++++ b/hw/virtio/vhost-backend.c +@@ -406,6 +406,11 @@ int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, + { + int ret = 0; + ++ if (unlikely(!dev->vdev)) { ++ error_report("Unexpected IOTLB message when virtio device is stopped"); ++ return -EINVAL; ++ } ++ + switch (imsg->type) { + case VHOST_IOTLB_MISS: + ret = vhost_device_iotlb_miss(dev, imsg->iova, +-- +2.27.0 + diff --git a/kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch b/kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch new file mode 100644 index 0000000..1869297 --- /dev/null +++ b/kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch @@ -0,0 +1,87 @@ +From 4a1b30af56f99b9fe7ecdd47aa9691fed5d3a0c3 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 9 Feb 2021 17:15:33 -0300 +Subject: [PATCH 04/54] virtio: move 'use-disabled-flag' property to + hw_compat_4_2 + +RH-Author: Stefano Garzarella +Message-id: <20210209171533.133268-2-sgarzare@redhat.com> +Patchwork-id: 101012 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] virtio: move 'use-disabled-flag' property to hw_compat_4_2 +Bugzilla: 1907255 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Dr. David Alan Gilbert + +Commit 9d7bd0826f introduced a new 'use-disabled-flag' property +set to true by default. +To allow the migration, we set this property to false in the hw_compat, +but in the wrong place (hw_compat_4_1). + +Since commit 9d7bd0826f was released with QEMU 5.0, we move +'use-disabled-flag' property to hw_compat_4_2, so 4.2 machine types +will have the pre-patch behavior and the migration can work. + +The issue was discovered with vhost-vsock device and 4.2 machine +type without running any kernel in the VM: + $ qemu-4.2 -M pc-q35-4.2,accel=kvm \ + -device vhost-vsock-pci,guest-cid=4 \ + -monitor stdio -incoming tcp:0:3333 + + $ qemu-5.2 -M pc-q35-4.2,accel=kvm \ + -device vhost-vsock-pci,guest-cid=3 \ + -monitor stdio + (qemu) migrate -d tcp:0:3333 + + # qemu-4.2 output + qemu-system-x86_64: Failed to load virtio-vhost_vsock:virtio + qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-vhost_vsock' + qemu-system-x86_64: load of migration failed: No such file or directory + +Reported-by: Jing Zhao +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1907255 +Fixes: 9d7bd0826f ("virtio-pci: disable vring processing when bus-mastering is disabled") +Cc: mdroth@linux.vnet.ibm.com +CC: qemu-stable@nongnu.org +Signed-off-by: Stefano Garzarella +Message-Id: <20210108171252.209502-1-sgarzare@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit c126b4c57e0164549de606ca35d1512762051083) +[sgarzare: add 'use-disabled-flag' property to hw_compat_rhel_8_2] +Signed-off-by: Stefano Garzarella +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/core/machine.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 68495b9411..92398966a4 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -77,6 +77,8 @@ GlobalProperty hw_compat_rhel_8_2[] = { + { "qxl-vga", "revision", "4" }, + /* hw_compat_rhel_8_2 from hw_compat_4_2 */ + { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-device", "use-disabled-flag", "false" }, + /* hw_compat_rhel_8_2 from hw_compat_5_0 */ + { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, + /* hw_compat_rhel_8_2 from hw_compat_5_0 */ +@@ -298,12 +300,12 @@ GlobalProperty hw_compat_4_2[] = { + { "qxl", "revision", "4" }, + { "qxl-vga", "revision", "4" }, + { "fw_cfg", "acpi-mr-restore", "false" }, ++ { "virtio-device", "use-disabled-flag", "false" }, + }; + const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2); + + GlobalProperty hw_compat_4_1[] = { + { "virtio-pci", "x-pcie-flr-init", "off" }, +- { "virtio-device", "use-disabled-flag", "false" }, + }; + const size_t hw_compat_4_1_len = G_N_ELEMENTS(hw_compat_4_1); + +-- +2.27.0 + diff --git a/kvm-virtio-net-add-missing-object_unref.patch b/kvm-virtio-net-add-missing-object_unref.patch new file mode 100644 index 0000000..efa7b44 --- /dev/null +++ b/kvm-virtio-net-add-missing-object_unref.patch @@ -0,0 +1,66 @@ +From d66b778c2f888507dedc3e4111006215dd394b95 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 25 Feb 2021 23:14:47 -0500 +Subject: [PATCH 50/54] virtio-net: add missing object_unref() + +RH-Author: Laurent Vivier +Message-id: <20210225231447.2187738-28-lvivier@redhat.com> +Patchwork-id: 101266 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 27/27] virtio-net: add missing object_unref() +Bugzilla: 1819991 +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Jens Freimann +RH-Acked-by: Michael S. Tsirkin + +BZ: https://bugzilla.redhat.com/1819991 +BRANCH: rhel-av-8.4.0 +UPSTREAM: Merged + +failover_add_primary() calls qdev_device_add() and doesn't unref +the device. Because of that, when the device is unplugged a reference +is remaining and prevents the cleanup of the object. + +This prevents to be able to plugin back the failover primary device, +with errors like: + + (qemu) device_add vfio-pci,host=0000:41:00.0,id=hostdev0,bus=root.3,failover_pair_id=net0 + (qemu) device_del hostdev0 + +We can check with "info qtree" and "info pci" that the device has been removed, and then: + + (qemu) device_add vfio-pci,host=0000:41:00.0,id=hostdev1,bus=root.3,failover_pair_id=net0 + Error: vfio 0000:41:00.0: device is already attached + (qemu) device_add vfio-pci,host=0000:41:00.0,id=hostdev0,bus=root.3,failover_pair_id=net0 + qemu-kvm: Duplicate ID 'hostdev0' for device + +Fixes: 21e8709b29cd ("failover: Remove primary_dev member") +Cc: quintela@redhat.com +Signed-off-by: Laurent Vivier +Message-Id: <20210212135250.2738750-3-lvivier@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jens Freimann +(cherry picked from commit 00e7b1299599384dfdda2a2a4570a0fb2d69eb6b) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 044ac95f6f..7faaa829b6 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -869,6 +869,8 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + dev = qdev_device_add(opts, &err); + if (err) { + qemu_opts_del(opts); ++ } else { ++ object_unref(OBJECT(dev)); + } + } else { + error_setg(errp, "Primary device not found"); +-- +2.27.0 + diff --git a/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch b/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch new file mode 100644 index 0000000..eb73a1c --- /dev/null +++ b/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch @@ -0,0 +1,157 @@ +From 745a04765f21dad1991be89e23dd97a0543d3fce Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 10 Feb 2021 11:15:16 -0300 +Subject: [PATCH 05/54] virtiofsd: extract lo_do_open() from lo_open() + +RH-Author: Stefan Hajnoczi +Message-id: <20210210111518.228148-2-stefanha@redhat.com> +Patchwork-id: 101032 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/3] virtiofsd: extract lo_do_open() from lo_open() +Bugzilla: 1920740 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Greg Kurz +RH-Acked-by: Dr. David Alan Gilbert + +Both lo_open() and lo_create() have similar code to open a file. Extract +a common lo_do_open() function from lo_open() that will be used by +lo_create() in a later commit. + +Since lo_do_open() does not otherwise need fuse_req_t req, convert +lo_add_fd_mapping() to use struct lo_data *lo instead. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210204150208.367837-2-stefanha@redhat.com> +Reviewed-by: Greg Kurz +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 8afaaee976965b7fb90ec225a51d60f35c5f173c) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Eduardo Lima (Etrunko) +--- + tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 27 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 97485b22b4..218e20e9d7 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -471,17 +471,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) + } + + /* Assumes lo->mutex is held */ +-static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) ++static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) + { + struct lo_map_elem *elem; + +- elem = lo_map_alloc_elem(&lo_data(req)->fd_map); ++ elem = lo_map_alloc_elem(&lo->fd_map); + if (!elem) { + return -1; + } + + elem->fd = fd; +- return elem - lo_data(req)->fd_map.elems; ++ return elem - lo->fd_map.elems; + } + + /* Assumes lo->mutex is held */ +@@ -1661,6 +1661,38 @@ static void update_open_flags(int writeback, int allow_direct_io, + } + } + ++static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, ++ struct fuse_file_info *fi) ++{ ++ char buf[64]; ++ ssize_t fh; ++ int fd; ++ ++ update_open_flags(lo->writeback, lo->allow_direct_io, fi); ++ ++ sprintf(buf, "%i", inode->fd); ++ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); ++ if (fd == -1) { ++ return errno; ++ } ++ ++ pthread_mutex_lock(&lo->mutex); ++ fh = lo_add_fd_mapping(lo, fd); ++ pthread_mutex_unlock(&lo->mutex); ++ if (fh == -1) { ++ close(fd); ++ return ENOMEM; ++ } ++ ++ fi->fh = fh; ++ if (lo->cache == CACHE_NONE) { ++ fi->direct_io = 1; ++ } else if (lo->cache == CACHE_ALWAYS) { ++ fi->keep_cache = 1; ++ } ++ return 0; ++} ++ + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +@@ -1701,7 +1733,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + ssize_t fh; + + pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(req, fd); ++ fh = lo_add_fd_mapping(lo, fd); + pthread_mutex_unlock(&lo->mutex); + if (fh == -1) { + close(fd); +@@ -1892,38 +1924,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + + static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + { +- int fd; +- ssize_t fh; +- char buf[64]; + struct lo_data *lo = lo_data(req); ++ struct lo_inode *inode = lo_inode(req, ino); ++ int err; + + fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, + fi->flags); + +- update_open_flags(lo->writeback, lo->allow_direct_io, fi); +- +- sprintf(buf, "%i", lo_fd(req, ino)); +- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); +- } +- +- pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(req, fd); +- pthread_mutex_unlock(&lo->mutex); +- if (fh == -1) { +- close(fd); +- fuse_reply_err(req, ENOMEM); ++ if (!inode) { ++ fuse_reply_err(req, EBADF); + return; + } + +- fi->fh = fh; +- if (lo->cache == CACHE_NONE) { +- fi->direct_io = 1; +- } else if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ err = lo_do_open(lo, inode, fi); ++ lo_inode_put(lo, &inode); ++ if (err) { ++ fuse_reply_err(req, err); ++ } else { ++ fuse_reply_open(req, fi); + } +- fuse_reply_open(req, fi); + } + + static void lo_release(fuse_req_t req, fuse_ino_t ino, +-- +2.27.0 + diff --git a/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch b/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch new file mode 100644 index 0000000..95d8085 --- /dev/null +++ b/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch @@ -0,0 +1,121 @@ +From 24833a2db44e39ec7652779a0fa2e70983b9cb4e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 10 Feb 2021 11:15:17 -0300 +Subject: [PATCH 06/54] virtiofsd: optionally return inode pointer from + lo_do_lookup() + +RH-Author: Stefan Hajnoczi +Message-id: <20210210111518.228148-3-stefanha@redhat.com> +Patchwork-id: 101033 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/3] virtiofsd: optionally return inode pointer from lo_do_lookup() +Bugzilla: 1920740 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Greg Kurz +RH-Acked-by: Dr. David Alan Gilbert + +lo_do_lookup() finds an existing inode or allocates a new one. It +increments nlookup so that the inode stays alive until the client +releases it. + +Existing callers don't need the struct lo_inode so the function doesn't +return it. Extend the function to optionally return the inode. The next +commit will need it. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Greg Kurz +Message-Id: <20210204150208.367837-3-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 22d2ece71e533310da31f2857ebc4a00d91968b3) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Eduardo Lima (Etrunko) +--- + tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- + 1 file changed, 21 insertions(+), 8 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 218e20e9d7..2bd050b620 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -843,11 +843,13 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname, + } + + /* +- * Increments nlookup and caller must release refcount using +- * lo_inode_put(&parent). ++ * Increments nlookup on the inode on success. unref_inode_lolocked() must be ++ * called eventually to decrement nlookup again. If inodep is non-NULL, the ++ * inode pointer is stored and the caller must call lo_inode_put(). + */ + static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, +- struct fuse_entry_param *e) ++ struct fuse_entry_param *e, ++ struct lo_inode **inodep) + { + int newfd; + int res; +@@ -857,6 +859,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_inode *inode = NULL; + struct lo_inode *dir = lo_inode(req, parent); + ++ if (inodep) { ++ *inodep = NULL; ++ } ++ + /* + * name_to_handle_at() and open_by_handle_at() can reach here with fuse + * mount point in guest, but we don't have its inode info in the +@@ -924,7 +930,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + pthread_mutex_unlock(&lo->mutex); + } + e->ino = inode->fuse_ino; +- lo_inode_put(lo, &inode); ++ ++ /* Transfer ownership of inode pointer to caller or drop it */ ++ if (inodep) { ++ *inodep = inode; ++ } else { ++ lo_inode_put(lo, &inode); ++ } ++ + lo_inode_put(lo, &dir); + + fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, +@@ -959,7 +972,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + return; + } + +- err = lo_do_lookup(req, parent, name, &e); ++ err = lo_do_lookup(req, parent, name, &e, NULL); + if (err) { + fuse_reply_err(req, err); + } else { +@@ -1067,7 +1080,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, + goto out; + } + +- saverr = lo_do_lookup(req, parent, name, &e); ++ saverr = lo_do_lookup(req, parent, name, &e, NULL); + if (saverr) { + goto out; + } +@@ -1544,7 +1557,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + + if (plus) { + if (!is_dot_or_dotdot(name)) { +- err = lo_do_lookup(req, ino, name, &e); ++ err = lo_do_lookup(req, ino, name, &e, NULL); + if (err) { + goto error; + } +@@ -1742,7 +1755,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + } + + fi->fh = fh; +- err = lo_do_lookup(req, parent, name, &e); ++ err = lo_do_lookup(req, parent, name, &e, NULL); + } + if (lo->cache == CACHE_NONE) { + fi->direct_io = 1; +-- +2.27.0 + diff --git a/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch b/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch new file mode 100644 index 0000000..a4ded98 --- /dev/null +++ b/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch @@ -0,0 +1,311 @@ +From 8cc13bdaa45cca3ef907cad9697683390aff2545 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 10 Feb 2021 11:15:18 -0300 +Subject: [PATCH 07/54] virtiofsd: prevent opening of special files + (CVE-2020-35517) + +RH-Author: Stefan Hajnoczi +Message-id: <20210210111518.228148-4-stefanha@redhat.com> +Patchwork-id: 101034 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/3] virtiofsd: prevent opening of special files (CVE-2020-35517) +Bugzilla: 1920740 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Greg Kurz +RH-Acked-by: Dr. David Alan Gilbert + +A well-behaved FUSE client does not attempt to open special files with +FUSE_OPEN because they are handled on the client side (e.g. device nodes +are handled by client-side device drivers). + +The check to prevent virtiofsd from opening special files is missing in +a few cases, most notably FUSE_OPEN. A malicious client can cause +virtiofsd to open a device node, potentially allowing the guest to +escape. This can be exploited by a modified guest device driver. It is +not exploitable from guest userspace since the guest kernel will handle +special files inside the guest instead of sending FUSE requests. + +This patch fixes this issue by introducing the lo_inode_open() function +to check the file type before opening it. This is a short-term solution +because it does not prevent a compromised virtiofsd process from opening +device nodes on the host. + +Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT +| O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not +necessary here. If the file exists and the user did not specify O_EXCL, +open it via lo_do_open(). + +Reported-by: Alex Xu +Fixes: CVE-2020-35517 +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Vivek Goyal +Reviewed-by: Greg Kurz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210204150208.367837-4-stefanha@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a3fdbbc7f271bff7d53d0501b29d910ece0b3789) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Eduardo Lima (Etrunko) +--- + tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- + 1 file changed, 92 insertions(+), 52 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 2bd050b620..03c5e0d13c 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -567,6 +567,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) + return fd; + } + ++/* ++ * Open a file descriptor for an inode. Returns -EBADF if the inode is not a ++ * regular file or a directory. ++ * ++ * Use this helper function instead of raw openat(2) to prevent security issues ++ * when a malicious client opens special files such as block device nodes. ++ * Symlink inodes are also rejected since symlinks must already have been ++ * traversed on the client side. ++ */ ++static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, ++ int open_flags) ++{ ++ g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); ++ int fd; ++ ++ if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { ++ return -EBADF; ++ } ++ ++ /* ++ * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier ++ * that the inode is not a special file but if an external process races ++ * with us then symlinks are traversed here. It is not possible to escape ++ * the shared directory since it is mounted as "/" though. ++ */ ++ fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); ++ if (fd < 0) { ++ return -errno; ++ } ++ return fd; ++} ++ + static void lo_init(void *userdata, struct fuse_conn_info *conn) + { + struct lo_data *lo = (struct lo_data *)userdata; +@@ -696,9 +728,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + if (fi) { + truncfd = fd; + } else { +- sprintf(procname, "%i", ifd); +- truncfd = openat(lo->proc_self_fd, procname, O_RDWR); ++ truncfd = lo_inode_open(lo, inode, O_RDWR); + if (truncfd < 0) { ++ errno = -truncfd; + goto out_err; + } + } +@@ -860,7 +892,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, + struct lo_inode *dir = lo_inode(req, parent); + + if (inodep) { +- *inodep = NULL; ++ *inodep = NULL; /* in case there is an error */ + } + + /* +@@ -1674,19 +1706,26 @@ static void update_open_flags(int writeback, int allow_direct_io, + } + } + ++/* ++ * Open a regular file, set up an fd mapping, and fill out the struct ++ * fuse_file_info for it. If existing_fd is not negative, use that fd instead ++ * opening a new one. Takes ownership of existing_fd. ++ * ++ * Returns 0 on success or a positive errno. ++ */ + static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, +- struct fuse_file_info *fi) ++ int existing_fd, struct fuse_file_info *fi) + { +- char buf[64]; + ssize_t fh; +- int fd; ++ int fd = existing_fd; + + update_open_flags(lo->writeback, lo->allow_direct_io, fi); + +- sprintf(buf, "%i", inode->fd); +- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); +- if (fd == -1) { +- return errno; ++ if (fd < 0) { ++ fd = lo_inode_open(lo, inode, fi->flags); ++ if (fd < 0) { ++ return -fd; ++ } + } + + pthread_mutex_lock(&lo->mutex); +@@ -1709,9 +1748,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, + static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) + { +- int fd; ++ int fd = -1; + struct lo_data *lo = lo_data(req); + struct lo_inode *parent_inode; ++ struct lo_inode *inode = NULL; + struct fuse_entry_param e; + int err; + struct lo_cred old = {}; +@@ -1737,36 +1777,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, + + update_open_flags(lo->writeback, lo->allow_direct_io, fi); + +- fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, +- mode); ++ /* Try to create a new file but don't open existing files */ ++ fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); + err = fd == -1 ? errno : 0; +- lo_restore_cred(&old); + +- if (!err) { +- ssize_t fh; ++ lo_restore_cred(&old); + +- pthread_mutex_lock(&lo->mutex); +- fh = lo_add_fd_mapping(lo, fd); +- pthread_mutex_unlock(&lo->mutex); +- if (fh == -1) { +- close(fd); +- err = ENOMEM; +- goto out; +- } ++ /* Ignore the error if file exists and O_EXCL was not given */ ++ if (err && (err != EEXIST || (fi->flags & O_EXCL))) { ++ goto out; ++ } + +- fi->fh = fh; +- err = lo_do_lookup(req, parent, name, &e, NULL); ++ err = lo_do_lookup(req, parent, name, &e, &inode); ++ if (err) { ++ goto out; + } +- if (lo->cache == CACHE_NONE) { +- fi->direct_io = 1; +- } else if (lo->cache == CACHE_ALWAYS) { +- fi->keep_cache = 1; ++ ++ err = lo_do_open(lo, inode, fd, fi); ++ fd = -1; /* lo_do_open() takes ownership of fd */ ++ if (err) { ++ /* Undo lo_do_lookup() nlookup ref */ ++ unref_inode_lolocked(lo, inode, 1); + } + + out: ++ lo_inode_put(lo, &inode); + lo_inode_put(lo, &parent_inode); + + if (err) { ++ if (fd >= 0) { ++ close(fd); ++ } ++ + fuse_reply_err(req, err); + } else { + fuse_reply_create(req, &e, fi); +@@ -1780,7 +1822,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, + pid_t pid, int *err) + { + struct lo_inode_plock *plock; +- char procname[64]; + int fd; + + plock = +@@ -1797,12 +1838,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, + } + + /* Open another instance of file which can be used for ofd locks. */ +- sprintf(procname, "%i", inode->fd); +- + /* TODO: What if file is not writable? */ +- fd = openat(lo->proc_self_fd, procname, O_RDWR); +- if (fd == -1) { +- *err = errno; ++ fd = lo_inode_open(lo, inode, O_RDWR); ++ if (fd < 0) { ++ *err = -fd; + free(plock); + return NULL; + } +@@ -1949,7 +1988,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + return; + } + +- err = lo_do_open(lo, inode, fi); ++ err = lo_do_open(lo, inode, -1, fi); + lo_inode_put(lo, &inode); + if (err) { + fuse_reply_err(req, err); +@@ -2005,39 +2044,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) + static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) + { ++ struct lo_inode *inode = lo_inode(req, ino); ++ struct lo_data *lo = lo_data(req); + int res; + int fd; +- char *buf; + + fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, + (void *)fi); + +- if (!fi) { +- struct lo_data *lo = lo_data(req); +- +- res = asprintf(&buf, "%i", lo_fd(req, ino)); +- if (res == -1) { +- return (void)fuse_reply_err(req, errno); +- } ++ if (!inode) { ++ fuse_reply_err(req, EBADF); ++ return; ++ } + +- fd = openat(lo->proc_self_fd, buf, O_RDWR); +- free(buf); +- if (fd == -1) { +- return (void)fuse_reply_err(req, errno); ++ if (!fi) { ++ fd = lo_inode_open(lo, inode, O_RDWR); ++ if (fd < 0) { ++ res = -fd; ++ goto out; + } + } else { + fd = lo_fi_fd(req, fi); + } + + if (datasync) { +- res = fdatasync(fd); ++ res = fdatasync(fd) == -1 ? errno : 0; + } else { +- res = fsync(fd); ++ res = fsync(fd) == -1 ? errno : 0; + } + if (!fi) { + close(fd); + } +- fuse_reply_err(req, res == -1 ? errno : 0); ++out: ++ lo_inode_put(lo, &inode); ++ fuse_reply_err(req, res); + } + + static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, +-- +2.27.0 + diff --git a/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch b/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch new file mode 100644 index 0000000..68d2cd8 --- /dev/null +++ b/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch @@ -0,0 +1,91 @@ +From ed5fe7ae48c263ff69602b55361806f896ed12fb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Feb 2021 15:18:10 -0500 +Subject: [PATCH 51/54] x86/cpu: Populate SVM CPUID feature bits + +RH-Author: Dr. David Alan Gilbert +Message-id: <20210223151811.27968-2-dgilbert@redhat.com> +Patchwork-id: 101197 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] x86/cpu: Populate SVM CPUID feature bits +Bugzilla: 1926785 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Peter Xu + +From: Wei Huang + +Newer AMD CPUs will add CPUID_0x8000000A_EDX[28] bit, which indicates +that SVM instructions (VMRUN/VMSAVE/VMLOAD) will trigger #VMEXIT before +CPU checking their EAX against reserved memory regions. This change will +allow the hypervisor to avoid intercepting #GP and emulating SVM +instructions. KVM turns on this CPUID bit for nested VMs. In order to +support it, let us populate this bit, along with other SVM feature bits, +in FEAT_SVM. + +Signed-off-by: Wei Huang +Message-Id: <20210126202456.589932-1-wei.huang2@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5447089c2b3b084b51670af36fc86ee3979e04be) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 6 +++--- + target/i386/cpu.h | 24 ++++++++++++++---------- + 2 files changed, 17 insertions(+), 13 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f944b41573..372cba2942 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -922,11 +922,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "npt", "lbrv", "svm-lock", "nrip-save", + "tsc-scale", "vmcb-clean", "flushbyasid", "decodeassists", + NULL, NULL, "pause-filter", NULL, +- "pfthreshold", NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ "pfthreshold", "avic", NULL, "v-vmsave-vmload", ++ "vgif", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, ++ "svme-addr-chk", NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, + .tcg_features = TCG_SVM_FEATURES, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index a3db7e3c6c..4fdb552f93 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -672,16 +672,20 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_EXT3_PERFCORE (1U << 23) + #define CPUID_EXT3_PERFNB (1U << 24) + +-#define CPUID_SVM_NPT (1U << 0) +-#define CPUID_SVM_LBRV (1U << 1) +-#define CPUID_SVM_SVMLOCK (1U << 2) +-#define CPUID_SVM_NRIPSAVE (1U << 3) +-#define CPUID_SVM_TSCSCALE (1U << 4) +-#define CPUID_SVM_VMCBCLEAN (1U << 5) +-#define CPUID_SVM_FLUSHASID (1U << 6) +-#define CPUID_SVM_DECODEASSIST (1U << 7) +-#define CPUID_SVM_PAUSEFILTER (1U << 10) +-#define CPUID_SVM_PFTHRESHOLD (1U << 12) ++#define CPUID_SVM_NPT (1U << 0) ++#define CPUID_SVM_LBRV (1U << 1) ++#define CPUID_SVM_SVMLOCK (1U << 2) ++#define CPUID_SVM_NRIPSAVE (1U << 3) ++#define CPUID_SVM_TSCSCALE (1U << 4) ++#define CPUID_SVM_VMCBCLEAN (1U << 5) ++#define CPUID_SVM_FLUSHASID (1U << 6) ++#define CPUID_SVM_DECODEASSIST (1U << 7) ++#define CPUID_SVM_PAUSEFILTER (1U << 10) ++#define CPUID_SVM_PFTHRESHOLD (1U << 12) ++#define CPUID_SVM_AVIC (1U << 13) ++#define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) ++#define CPUID_SVM_VGIF (1U << 16) ++#define CPUID_SVM_SVME_ADDR_CHK (1U << 28) + + /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ + #define CPUID_7_0_EBX_FSGSBASE (1U << 0) +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 15cb799..fb49fac 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -64,7 +64,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 7%{?dist} +Release: 10%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -182,6 +182,108 @@ Patch71: kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch # For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" # For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() Patch72: kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch +# For bz#1887883 - qemu blocks client progress with various NBD actions +Patch73: kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch +# For bz#1887883 - qemu blocks client progress with various NBD actions +Patch74: kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch +# For bz#1887883 - qemu blocks client progress with various NBD actions +Patch75: kvm-nbd-make-nbd_read-return-EIO-on-error.patch +# For bz#1907255 - Migrate failed with vhost-vsock-pci from RHEL-AV 8.3.1 to RHEL-AV 8.2.1 +Patch76: kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch +# For bz#1920740 - CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0] +Patch77: kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch +# For bz#1920740 - CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0] +Patch78: kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch +# For bz#1920740 - CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0] +Patch79: kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch +# For bz#1920941 - [ppc64le] [AV]--disk cdimage.iso,bus=usb fails to boot +Patch80: kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch +# For bz#1917830 - Add romsize property to qemu-kvm +Patch81: kvm-pci-reject-too-large-ROMs.patch +# For bz#1917830 - Add romsize property to qemu-kvm +Patch82: kvm-pci-add-romsize-property.patch +# For bz#1917826 - Add extra device support to qemu-kvm, but not to rhel machine types +Patch83: kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch +# For bz#1880299 - vhost-user mq connection fails to restart after kill host testpmd which acts as vhost-user client +Patch84: kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch +# For bz#1901323 - QSD (QEMU Storage Daemon): basic support - TechPreview +Patch85: kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch +# For bz#1901323 - QSD (QEMU Storage Daemon): basic support - TechPreview +Patch86: kvm-docs-add-qemu-storage-daemon-1-man-page.patch +# For bz#1901323 - QSD (QEMU Storage Daemon): basic support - TechPreview +Patch87: kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch +# For bz#1901323 - QSD (QEMU Storage Daemon): basic support - TechPreview +Patch88: kvm-qemu-storage-daemon-Enable-object-add.patch +# For bz#1930033 - enable vhost-user-blk device [TechPreview] +Patch90: kvm-default-configs-Enable-vhost-user-blk.patch +# For bz#1925345 - qemu-nbd needs larger backlog for Unix socket listen() +Patch91: kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch +# For bz#1917654 - [failover vf migration][RHEL84 vm] After start a vm with a failover vf + a failover virtio net device, the failvoer vf do not exist in the vm +Patch92: kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch +# For bz#1930757 - Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping' +Patch93: kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch +# For bz#1930757 - Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping' +Patch94: kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch +# For bz#1930757 - Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping' +Patch95: kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch96: kvm-failover-fix-indentantion.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch97: kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch98: kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch99: kvm-failover-Remove-unused-parameter.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch100: kvm-failover-Remove-external-partially_hotplugged-proper.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch101: kvm-failover-qdev_device_add-returns-err-or-dev-set.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch102: kvm-failover-Rename-bool-to-failover_primary_hidden.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch103: kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch104: kvm-failover-Remove-primary_device_opts.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch105: kvm-failover-remove-standby_id-variable.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch106: kvm-failover-Remove-primary_device_dict.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch107: kvm-failover-Remove-memory-leak.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch108: kvm-failover-simplify-virtio_net_find_primary.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch109: kvm-failover-should_be_hidden-should-take-a-bool.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch110: kvm-failover-Rename-function-to-hide_device.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch111: kvm-failover-virtio_net_connect_failover_devices-does-no.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch112: kvm-failover-Rename-to-failover_find_primary_device.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch113: kvm-failover-simplify-qdev_device_add-failover-case.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch114: kvm-failover-simplify-qdev_device_add.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch115: kvm-failover-make-sure-that-id-always-exist.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch116: kvm-failover-remove-failover_find_primary_device-error-p.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch117: kvm-failover-split-failover_find_primary_device_id.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch118: kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch119: kvm-failover-Caller-of-this-two-functions-already-have-p.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch120: kvm-failover-simplify-failover_unplug_primary.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch121: kvm-failover-Remove-primary_dev-member.patch +# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug +Patch122: kvm-virtio-net-add-missing-object_unref.patch +# For bz#1926785 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train +Patch123: kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch +# For bz#1926785 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train +Patch124: kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch BuildRequires: wget BuildRequires: rpm-build @@ -1045,9 +1147,6 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server -# Remove qemu-storage-daemon -rm -rf ${RPM_BUILD_ROOT}%{_bindir}/qemu-storage-daemon - # Remove efi roms rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom @@ -1333,8 +1432,11 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_bindir}/qemu-img %{_bindir}/qemu-io %{_bindir}/qemu-nbd +%{_bindir}/qemu-storage-daemon %{_mandir}/man1/qemu-img.1* %{_mandir}/man8/qemu-nbd.8* +%{_mandir}/man1/qemu-storage-daemon.1* +%{_mandir}/man7/qemu-storage-daemon-qmp-ref.7* %files -n qemu-guest-agent %defattr(-,root,root,-) @@ -1371,6 +1473,92 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Wed Mar 03 2021 Danilo Cesar Lemes de Paula - 5.2.0-10.el8 +- kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch [bz#1930757] +- kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch [bz#1930757] +- kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch [bz#1930757] +- kvm-failover-fix-indentantion.patch [bz#1819991] +- kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch [bz#1819991] +- kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch [bz#1819991] +- kvm-failover-Remove-unused-parameter.patch [bz#1819991] +- kvm-failover-Remove-external-partially_hotplugged-proper.patch [bz#1819991] +- kvm-failover-qdev_device_add-returns-err-or-dev-set.patch [bz#1819991] +- kvm-failover-Rename-bool-to-failover_primary_hidden.patch [bz#1819991] +- kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch [bz#1819991] +- kvm-failover-Remove-primary_device_opts.patch [bz#1819991] +- kvm-failover-remove-standby_id-variable.patch [bz#1819991] +- kvm-failover-Remove-primary_device_dict.patch [bz#1819991] +- kvm-failover-Remove-memory-leak.patch [bz#1819991] +- kvm-failover-simplify-virtio_net_find_primary.patch [bz#1819991] +- kvm-failover-should_be_hidden-should-take-a-bool.patch [bz#1819991] +- kvm-failover-Rename-function-to-hide_device.patch [bz#1819991] +- kvm-failover-virtio_net_connect_failover_devices-does-no.patch [bz#1819991] +- kvm-failover-Rename-to-failover_find_primary_device.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add-failover-case.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add.patch [bz#1819991] +- kvm-failover-make-sure-that-id-always-exist.patch [bz#1819991] +- kvm-failover-remove-failover_find_primary_device-error-p.patch [bz#1819991] +- kvm-failover-split-failover_find_primary_device_id.patch [bz#1819991] +- kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch [bz#1819991] +- kvm-failover-Caller-of-this-two-functions-already-have-p.patch [bz#1819991] +- kvm-failover-simplify-failover_unplug_primary.patch [bz#1819991] +- kvm-failover-Remove-primary_dev-member.patch [bz#1819991] +- kvm-virtio-net-add-missing-object_unref.patch [bz#1819991] +- kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch [bz#1926785] +- kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch [bz#1926785] +- Resolves: bz#1930757 + (Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping') +- Resolves: bz#1819991 + (Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug) +- Resolves: bz#1926785 + ([RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train) + +* Mon Mar 01 2021 Danilo Cesar Lemes de Paula - 5.2.0-9.el8 +- kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch [bz#1901323] +- kvm-docs-add-qemu-storage-daemon-1-man-page.patch [bz#1901323] +- kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch [bz#1901323] +- kvm-qemu-storage-daemon-Enable-object-add.patch [bz#1901323] +- kvm-spec-Package-qemu-storage-daemon.patch [bz#1901323] +- kvm-default-configs-Enable-vhost-user-blk.patch [bz#1930033] +- kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch [bz#1925345] +- kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch [bz#1917654] +- Resolves: bz#1901323 + (QSD (QEMU Storage Daemon): basic support - TechPreview) +- Resolves: bz#1930033 + (enable vhost-user-blk device) +- Resolves: bz#1925345 + (qemu-nbd needs larger backlog for Unix socket listen()) +- Resolves: bz#1917654 + ([failover vf migration][RHEL84 vm] After start a vm with a failover vf + a failover virtio net device, the failvoer vf do not exist in the vm) + +* Fri Feb 19 2021 Eduardo Lima (Etrunko) - 5.2.0-8.el8 +- kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch [bz#1887883] +- kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch [bz#1887883] +- kvm-nbd-make-nbd_read-return-EIO-on-error.patch [bz#1887883] +- kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch [bz#1907255] +- kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch [bz#1920740] +- kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch [bz#1920740] +- kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch [bz#1920740] +- kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch [bz#1920941] +- kvm-pci-reject-too-large-ROMs.patch [bz#1917830] +- kvm-pci-add-romsize-property.patch [bz#1917830] +- kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch [bz#1917826] +- kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch [bz#1880299] +- Resolves: bz#1887883 + (qemu blocks client progress with various NBD actions) +- Resolves: bz#1907255 + (Migrate failed with vhost-vsock-pci from RHEL-AV 8.3.1 to RHEL-AV 8.2.1) +- Resolves: bz#1920740 + (CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0]) +- Resolves: bz#1920941 + ([ppc64le] [AV]--disk cdimage.iso,bus=usb fails to boot) +- Resolves: bz#1917830 + (Add romsize property to qemu-kvm) +- Resolves: bz#1917826 + (Add extra device support to qemu-kvm, but not to rhel machine types) +- Resolves: bz#1880299 + (vhost-user mq connection fails to restart after kill host testpmd which acts as vhost-user client) + * Fri Feb 12 2021 Eduardo Lima (Etrunko) - 5.2.0-7.el8 - kvm-virtio-Add-corresponding-memory_listener_unregister-.patch [bz#1903521] - kvm-block-Honor-blk_set_aio_context-context-requirements.patch [bz#1918966 bz#1918968] From f394fc88317a677278219f6b246303766035bf03 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 15 Mar 2021 07:02:41 +0100 Subject: [PATCH 113/195] Synchronization with qemu-kvm-5.2.0-11.el8 --- ...the-rendering-is-done-when-skipping-.patch | 49 ++++ ...qxl.ssd.dcl.con-on-secondary-devices.patch | 50 ++++ ...mapped-security.capability-xattr-as-.patch | 224 ++++++++++++++++++ ...rror-code-early-at-the-failure-calls.patch | 100 ++++++++ qemu-kvm.spec | 20 +- 5 files changed, 442 insertions(+), 1 deletion(-) create mode 100644 kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch create mode 100644 kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch create mode 100644 kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch create mode 100644 kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch diff --git a/kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch b/kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch new file mode 100644 index 0000000..52532b4 --- /dev/null +++ b/kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch @@ -0,0 +1,49 @@ +From 51c6fc79d712c73bfeec2e4ff6779da3cab649fd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 1 Mar 2021 08:39:20 -0500 +Subject: [PATCH 2/4] qxl: also notify the rendering is done when skipping it +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210301083920.895324-3-marcandre.lureau@redhat.com> +Patchwork-id: 101275 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] qxl: also notify the rendering is done when skipping it +Bugzilla: 1932190 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Sergio Lopez Pascual + +From: Marc-André Lureau + +Asynchronous handlers may be waiting for the graphic_hw_update_done() to +be called in this case too. + +Fixes: 4d6316218 ("console: add graphic_hw_update_done()") +Signed-off-by: Marc-André Lureau +Message-Id: <20210201201422.446552-3-marcandre.lureau@redhat.com> +Signed-off-by: Gerd Hoffmann + +(cherry picked from commit b577ab2dda3afc7d6a7befabcf226507ff06c17c) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + hw/display/qxl-render.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index 3ce2e57b8f..d28849b121 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -181,6 +181,7 @@ void qxl_render_update(PCIQXLDevice *qxl) + qxl->mode == QXL_MODE_UNDEFINED) { + qxl_render_update_area_unlocked(qxl); + qemu_mutex_unlock(&qxl->ssd.lock); ++ graphic_hw_update_done(qxl->ssd.dcl.con); + return; + } + +-- +2.27.0 + diff --git a/kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch b/kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch new file mode 100644 index 0000000..55ea413 --- /dev/null +++ b/kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch @@ -0,0 +1,50 @@ +From 94dc0414a7d5dadbbfc29a19617df7facb0ea7d6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 1 Mar 2021 08:39:19 -0500 +Subject: [PATCH 1/4] qxl: set qxl.ssd.dcl.con on secondary devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +Message-id: <20210301083920.895324-2-marcandre.lureau@redhat.com> +Patchwork-id: 101274 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] qxl: set qxl.ssd.dcl.con on secondary devices +Bugzilla: 1932190 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Sergio Lopez Pascual + +From: Marc-André Lureau + +On secondary QXL devices, the console is only set on qxl.vga.con. But +graphic_hw_update_done() is called with qxl.ssd.dcl.con. + +Like for primary QXL devices, set qxl.sdd.dcl.con = qxl.vga.con. + +Signed-off-by: Marc-André Lureau +Message-Id: <20210201201422.446552-2-marcandre.lureau@redhat.com> +Signed-off-by: Gerd Hoffmann + +(cherry picked from commit c502758670432195d61ff848b1b47b0f78918ae2) +Signed-off-by: Marc-André Lureau +Signed-off-by: Danilo C. L. de Paula +--- + hw/display/qxl.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index 431c107096..50f4756b6a 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -2266,6 +2266,7 @@ static void qxl_realize_secondary(PCIDevice *dev, Error **errp) + qxl->vga.vram_size, &error_fatal); + qxl->vga.vram_ptr = memory_region_get_ram_ptr(&qxl->vga.vram); + qxl->vga.con = graphic_console_init(DEVICE(dev), 0, &qxl_ops, qxl); ++ qxl->ssd.dcl.con = qxl->vga.con; + qxl->id = qemu_console_get_index(qxl->vga.con); /* == channel_id */ + + qxl_realize_common(qxl, errp); +-- +2.27.0 + diff --git a/kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch b/kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch new file mode 100644 index 0000000..72fe0a4 --- /dev/null +++ b/kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch @@ -0,0 +1,224 @@ +From 6a0564e81d5e329f955c4391809daf248f078481 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 4 Mar 2021 15:49:01 -0500 +Subject: [PATCH 4/4] virtiofs: drop remapped security.capability xattr as + needed + +RH-Author: Dr. David Alan Gilbert +Message-id: <20210304154901.47930-3-dgilbert@redhat.com> +Patchwork-id: 101305 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] virtiofs: drop remapped security.capability xattr as needed +Bugzilla: 1935071 +RH-Acked-by: Connor Kuehl +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Stefan Hajnoczi + +From: "Dr. David Alan Gilbert" + +On Linux, the 'security.capability' xattr holds a set of +capabilities that can change when an executable is run, giving +a limited form of privilege escalation to those programs that +the writer of the file deemed worthy. + +Any write causes the 'security.capability' xattr to be dropped, +stopping anyone from gaining privilege by modifying a blessed +file. + +Fuse relies on the daemon to do this dropping, and in turn the +daemon relies on the host kernel to drop the xattr for it. However, +with the addition of -o xattrmap, the xattr that the guest +stores its capabilities in is now not the same as the one that +the host kernel automatically clears. + +Where the mapping changes 'security.capability', explicitly clear +the remapped name to preserve the same behaviour. + +This bug is assigned CVE-2021-20263. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Vivek Goyal +(cherry picked from commit e586edcb410543768ef009eaa22a2d9dd4a53846) +Signed-off-by: Dr. David Alan Gilbert + Downstream slight context difference due to missing d64907ac FUSE_HANDLE_KILLPRIV_V2 +Signed-off-by: Danilo C. L. de Paula +--- + docs/tools/virtiofsd.rst | 4 ++ + tools/virtiofsd/passthrough_ll.c | 77 +++++++++++++++++++++++++++++++- + 2 files changed, 80 insertions(+), 1 deletion(-) + +diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst +index 5b3be8a6d6..6e0fc94005 100644 +--- a/docs/tools/virtiofsd.rst ++++ b/docs/tools/virtiofsd.rst +@@ -228,6 +228,10 @@ The 'map' type adds a number of separate rules to add **prepend** as a prefix + to the matched **key** (or all attributes if **key** is empty). + There may be at most one 'map' rule and it must be the last rule in the set. + ++Note: When the 'security.capability' xattr is remapped, the daemon has to do ++extra work to remove it during many operations, which the host kernel normally ++does itself. ++ + xattr-mapping Examples + ---------------------- + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index f06074d81f..9c33b0344b 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -160,6 +160,7 @@ struct lo_data { + int posix_lock; + int xattr; + char *xattrmap; ++ char *xattr_security_capability; + char *source; + char *modcaps; + double timeout; +@@ -226,6 +227,8 @@ static __thread bool cap_loaded = 0; + + static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, + uint64_t mnt_id); ++static int xattr_map_client(const struct lo_data *lo, const char *client_name, ++ char **out_name); + + static int is_dot_or_dotdot(const char *name) + { +@@ -365,6 +368,37 @@ out: + return ret; + } + ++/* ++ * The host kernel normally drops security.capability xattr's on ++ * any write, however if we're remapping xattr names we need to drop ++ * whatever the clients security.capability is actually stored as. ++ */ ++static int drop_security_capability(const struct lo_data *lo, int fd) ++{ ++ if (!lo->xattr_security_capability) { ++ /* We didn't remap the name, let the host kernel do it */ ++ return 0; ++ } ++ if (!fremovexattr(fd, lo->xattr_security_capability)) { ++ /* All good */ ++ return 0; ++ } ++ ++ switch (errno) { ++ case ENODATA: ++ /* Attribute didn't exist, that's fine */ ++ return 0; ++ ++ case ENOTSUP: ++ /* FS didn't support attribute anyway, also fine */ ++ return 0; ++ ++ default: ++ /* Hmm other error */ ++ return errno; ++ } ++} ++ + static void lo_map_init(struct lo_map *map) + { + map->elems = NULL; +@@ -718,6 +752,11 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; + gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; + ++ saverr = drop_security_capability(lo, ifd); ++ if (saverr) { ++ goto out_err; ++ } ++ + res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) { + saverr = errno; +@@ -737,6 +776,14 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } + } + ++ saverr = drop_security_capability(lo, truncfd); ++ if (saverr) { ++ if (!fi) { ++ close(truncfd); ++ } ++ goto out_err; ++ } ++ + res = ftruncate(truncfd, attr->st_size); + saverr = res == -1 ? errno : 0; + if (!fi) { +@@ -1727,6 +1774,13 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, + if (fd < 0) { + return -fd; + } ++ if (fi->flags & (O_TRUNC)) { ++ int err = drop_security_capability(lo, fd); ++ if (err) { ++ close(fd); ++ return err; ++ } ++ } + } + + pthread_mutex_lock(&lo->mutex); +@@ -2115,6 +2169,12 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, + "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, + out_buf.buf[0].size, (unsigned long)off); + ++ res = drop_security_capability(lo_data(req), out_buf.buf[0].fd); ++ if (res) { ++ fuse_reply_err(req, res); ++ return; ++ } ++ + /* + * If kill_priv is set, drop CAP_FSETID which should lead to kernel + * clearing setuid/setgid on file. +@@ -2354,6 +2414,7 @@ static void parse_xattrmap(struct lo_data *lo) + { + const char *map = lo->xattrmap; + const char *tmp; ++ int ret; + + lo->xattr_map_nentries = 0; + while (*map) { +@@ -2384,7 +2445,7 @@ static void parse_xattrmap(struct lo_data *lo) + * the last entry. + */ + parse_xattrmap_map(lo, map, sep); +- return; ++ break; + } else { + fuse_log(FUSE_LOG_ERR, + "%s: Unexpected type;" +@@ -2453,6 +2514,19 @@ static void parse_xattrmap(struct lo_data *lo) + fuse_log(FUSE_LOG_ERR, "Empty xattr map\n"); + exit(1); + } ++ ++ ret = xattr_map_client(lo, "security.capability", ++ &lo->xattr_security_capability); ++ if (ret) { ++ fuse_log(FUSE_LOG_ERR, "Failed to map security.capability: %s\n", ++ strerror(ret)); ++ exit(1); ++ } ++ if (!strcmp(lo->xattr_security_capability, "security.capability")) { ++ /* 1-1 mapping, don't need to do anything */ ++ free(lo->xattr_security_capability); ++ lo->xattr_security_capability = NULL; ++ } + } + + /* +@@ -3481,6 +3555,7 @@ static void fuse_lo_data_cleanup(struct lo_data *lo) + + free(lo->xattrmap); + free_xattrmap(lo); ++ free(lo->xattr_security_capability); + free(lo->source); + } + +-- +2.27.0 + diff --git a/kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch b/kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch new file mode 100644 index 0000000..72e42ac --- /dev/null +++ b/kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch @@ -0,0 +1,100 @@ +From 22fe525a532619088a135c0f5f80bde12da68109 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 4 Mar 2021 15:49:00 -0500 +Subject: [PATCH 3/4] virtiofsd: Save error code early at the failure callsite + +RH-Author: Dr. David Alan Gilbert +Message-id: <20210304154901.47930-2-dgilbert@redhat.com> +Patchwork-id: 101304 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] virtiofsd: Save error code early at the failure callsite +Bugzilla: 1935071 +RH-Acked-by: Connor Kuehl +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Stefan Hajnoczi + +From: Vivek Goyal + +Change error code handling slightly in lo_setattr(). Right now we seem +to jump to out_err and assume that "errno" is valid and use that to +send reply. + +But if caller has to do some other operations before jumping to out_err, +then it does the dance of first saving errno to saverr and the restore +errno before jumping to out_err. This makes it more confusing. + +I am about to make more changes where caller will have to do some +work after error before jumping to out_err. I found it easier to +change the convention a bit. That is caller saves error in "saverr" +before jumping to out_err. And out_err uses "saverr" to send error +back and does not rely on "errno" having actual error. + +v3: Resolved conflicts in lo_setattr() due to lo_inode_open() changes. + +Signed-off-by: Vivek Goyal +Reviewed-by: Dr. David Alan Gilbert +Message-Id: <20210208224024.43555-2-vgoyal@redhat.com> +(cherry picked from commit 1e08f164e9fdc9528ad6990012301b9a04b0bc90) +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +--- + tools/virtiofsd/passthrough_ll.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 03c5e0d13c..f06074d81f 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -710,6 +710,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); + } + if (res == -1) { ++ saverr = errno; + goto out_err; + } + } +@@ -719,6 +720,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + + res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) { ++ saverr = errno; + goto out_err; + } + } +@@ -730,16 +732,15 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + } else { + truncfd = lo_inode_open(lo, inode, O_RDWR); + if (truncfd < 0) { +- errno = -truncfd; ++ saverr = -truncfd; + goto out_err; + } + } + + res = ftruncate(truncfd, attr->st_size); ++ saverr = res == -1 ? errno : 0; + if (!fi) { +- saverr = errno; + close(truncfd); +- errno = saverr; + } + if (res == -1) { + goto out_err; +@@ -772,6 +773,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + res = utimensat(lo->proc_self_fd, procname, tv, 0); + } + if (res == -1) { ++ saverr = errno; + goto out_err; + } + } +@@ -780,7 +782,6 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + return lo_getattr(req, ino, fi); + + out_err: +- saverr = errno; + lo_inode_put(lo, &inode); + fuse_reply_err(req, saverr); + } +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index fb49fac..07c2861 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -64,7 +64,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 10%{?dist} +Release: 11%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -284,6 +284,14 @@ Patch122: kvm-virtio-net-add-missing-object_unref.patch Patch123: kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch # For bz#1926785 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train Patch124: kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch +# For bz#1932190 - Timeout when dump the screen from 2nd VGA +Patch125: kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch +# For bz#1932190 - Timeout when dump the screen from 2nd VGA +Patch126: kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch +# For bz#1935071 - CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8] +Patch127: kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch +# For bz#1935071 - CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8] +Patch128: kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch BuildRequires: wget BuildRequires: rpm-build @@ -1473,6 +1481,16 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Mon Mar 15 2021 Miroslav Rezanina - 5.2.0-11.el9 +- kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch [bz#1932190] +- kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch [bz#1932190] +- kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch [bz#1935071] +- kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch [bz#1935071] +- Resolves: bz#1932190 + (Timeout when dump the screen from 2nd VGA) +- Resolves: bz#1935071 + (CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8]) + * Wed Mar 03 2021 Danilo Cesar Lemes de Paula - 5.2.0-10.el8 - kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch [bz#1930757] - kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch [bz#1930757] From 384b680e849326480c3c8739ebf63ec3141ad27a Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 22 Mar 2021 07:54:27 +0100 Subject: [PATCH 114/195] Synchronization with qemu-kvm-5.2.0-14.el8 --- ...-export-fix-blk_size-double-byteswap.patch | 44 ++++ ...-vhost-user-blk-export-sector-number.patch | 53 +++++ ...t-virtio-blk-discard-write-zeroes-in.patch | 199 ++++++++++++++++ ...rt-virtio-blk-read-write-range-check.patch | 70 ++++++ ...ck-export-use-VIRTIO_BLK_SECTOR_BITS.patch | 84 +++++++ ...Fix-interrupt-ID-in-GICD_SGIR-regist.patch | 80 +++++++ ...restore-device-paths-for-pre-5.1-vms.patch | 177 ++++++++++++++ ...-complete-requests-early-for-rerror-.patch | 50 ++++ ...sk-move-scsi_handle_rw_error-earlier.patch | 222 ++++++++++++++++++ ...-SCSI-status-to-scsi_handle_rw_error.patch | 106 +++++++++ ...uest-recoverable-errors-through-even.patch | 106 +++++++++ ...scsi-introduce-scsi_sense_from_errno.patch | 181 ++++++++++++++ ...blk-fix-blkcfg-num_queues-endianness.patch | 68 ++++++ qemu-kvm.spec | 126 ++++++++-- 14 files changed, 1548 insertions(+), 18 deletions(-) create mode 100644 kvm-block-export-fix-blk_size-double-byteswap.patch create mode 100644 kvm-block-export-fix-vhost-user-blk-export-sector-number.patch create mode 100644 kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch create mode 100644 kvm-block-export-port-virtio-blk-read-write-range-check.patch create mode 100644 kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch create mode 100644 kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch create mode 100644 kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch create mode 100644 kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch create mode 100644 kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch create mode 100644 kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch create mode 100644 kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch create mode 100644 kvm-scsi-introduce-scsi_sense_from_errno.patch create mode 100644 kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch diff --git a/kvm-block-export-fix-blk_size-double-byteswap.patch b/kvm-block-export-fix-blk_size-double-byteswap.patch new file mode 100644 index 0000000..c93ab98 --- /dev/null +++ b/kvm-block-export-fix-blk_size-double-byteswap.patch @@ -0,0 +1,44 @@ +From 29c5b94ae259f21b792a611096c60b240e0c0983 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 15 Mar 2021 18:16:25 -0400 +Subject: [PATCH 09/15] block/export: fix blk_size double byteswap + +RH-Author: Stefan Hajnoczi +Message-id: <20210315181629.212884-3-stefanha@redhat.com> +Patchwork-id: 101340 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/6] block/export: fix blk_size double byteswap +Bugzilla: 1937004 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +The config->blk_size field is little-endian. Use the native-endian +blk_size variable to avoid double byteswapping. + +Fixes: 11f60f7eaee2630dd6fa0c3a8c49f792e46c4cf1 ("block/export: make vhost-user-blk config space little-endian") +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210223144653.811468-8-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit a4f1542af58fd6ab061e594d4e161f1c8b4a4372) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + block/export/vhost-user-blk-server.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c +index 62672d1cb9..3003cff189 100644 +--- a/block/export/vhost-user-blk-server.c ++++ b/block/export/vhost-user-blk-server.c +@@ -354,7 +354,7 @@ vu_blk_initialize_config(BlockDriverState *bs, + config->num_queues = cpu_to_le16(num_queues); + config->max_discard_sectors = cpu_to_le32(32768); + config->max_discard_seg = cpu_to_le32(1); +- config->discard_sector_alignment = cpu_to_le32(config->blk_size >> 9); ++ config->discard_sector_alignment = cpu_to_le32(blk_size >> 9); + config->max_write_zeroes_sectors = cpu_to_le32(32768); + config->max_write_zeroes_seg = cpu_to_le32(1); + } +-- +2.27.0 + diff --git a/kvm-block-export-fix-vhost-user-blk-export-sector-number.patch b/kvm-block-export-fix-vhost-user-blk-export-sector-number.patch new file mode 100644 index 0000000..dee1102 --- /dev/null +++ b/kvm-block-export-fix-vhost-user-blk-export-sector-number.patch @@ -0,0 +1,53 @@ +From e158a830fa229937fcb2ef755b50695abd64533a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 15 Mar 2021 18:16:27 -0400 +Subject: [PATCH 11/15] block/export: fix vhost-user-blk export sector number + calculation + +RH-Author: Stefan Hajnoczi +Message-id: <20210315181629.212884-5-stefanha@redhat.com> +Patchwork-id: 101341 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/6] block/export: fix vhost-user-blk export sector number calculation +Bugzilla: 1937004 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +The driver is supposed to honor the blk_size field but the protocol +still uses 512-byte sector numbers. It is incorrect to multiply +req->sector_num by blk_size. + +VIRTIO 1.1 5.2.5 Device Initialization says: + + blk_size can be read to determine the optimal sector size for the + driver to use. This does not affect the units used in the protocol + (always 512 bytes), but awareness of the correct value can affect + performance. + +Fixes: 3578389bcf76c824a5d82e6586a6f0c71e56f2aa ("block/export: vhost-user block device backend server") +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210223144653.811468-10-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit e44362ce317bcc46d409ed6c4a5ed2b46804bcbf) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + block/export/vhost-user-blk-server.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c +index feb139e067..bb07f499c8 100644 +--- a/block/export/vhost-user-blk-server.c ++++ b/block/export/vhost-user-blk-server.c +@@ -144,7 +144,7 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) + break; + } + +- int64_t offset = req->sector_num * vexp->blk_size; ++ int64_t offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS; + QEMUIOVector qiov; + if (is_write) { + qemu_iovec_init_external(&qiov, out_iov, out_num); +-- +2.27.0 + diff --git a/kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch b/kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch new file mode 100644 index 0000000..7af3e89 --- /dev/null +++ b/kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch @@ -0,0 +1,199 @@ +From 400ddccbcd8ddc13c85dbb7796b15fe9d6a01c1f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 15 Mar 2021 18:16:28 -0400 +Subject: [PATCH 12/15] block/export: port virtio-blk discard/write zeroes + input validation + +RH-Author: Stefan Hajnoczi +Message-id: <20210315181629.212884-6-stefanha@redhat.com> +Patchwork-id: 101342 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 5/6] block/export: port virtio-blk discard/write zeroes input validation +Bugzilla: 1937004 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +Validate discard/write zeroes the same way we do for virtio-blk. Some of +these checks are mandated by the VIRTIO specification, others are +internal to QEMU. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210223144653.811468-11-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit db4eadf9f10e19f864d70d1df3a90fbda31b8c06) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + block/export/vhost-user-blk-server.c | 116 +++++++++++++++++++++------ + 1 file changed, 93 insertions(+), 23 deletions(-) + +diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c +index bb07f499c8..937bb5e9b4 100644 +--- a/block/export/vhost-user-blk-server.c ++++ b/block/export/vhost-user-blk-server.c +@@ -29,6 +29,8 @@ + + enum { + VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, ++ VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768, ++ VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768, + }; + struct virtio_blk_inhdr { + unsigned char status; +@@ -65,30 +67,102 @@ static void vu_blk_req_complete(VuBlkReq *req) + free(req); + } + ++static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector, ++ size_t size) ++{ ++ uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; ++ uint64_t total_sectors; ++ ++ if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { ++ return false; ++ } ++ if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) { ++ return false; ++ } ++ blk_get_geometry(vexp->export.blk, &total_sectors); ++ if (sector > total_sectors || nb_sectors > total_sectors - sector) { ++ return false; ++ } ++ return true; ++} ++ + static int coroutine_fn +-vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov, ++vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, + uint32_t iovcnt, uint32_t type) + { ++ BlockBackend *blk = vexp->export.blk; + struct virtio_blk_discard_write_zeroes desc; +- ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc)); ++ ssize_t size; ++ uint64_t sector; ++ uint32_t num_sectors; ++ uint32_t max_sectors; ++ uint32_t flags; ++ int bytes; ++ ++ /* Only one desc is currently supported */ ++ if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) { ++ return VIRTIO_BLK_S_UNSUPP; ++ } ++ ++ size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc)); + if (unlikely(size != sizeof(desc))) { +- error_report("Invalid size %zd, expect %zu", size, sizeof(desc)); +- return -EINVAL; ++ error_report("Invalid size %zd, expected %zu", size, sizeof(desc)); ++ return VIRTIO_BLK_S_IOERR; + } + +- uint64_t range[2] = { le64_to_cpu(desc.sector) << 9, +- le32_to_cpu(desc.num_sectors) << 9 }; +- if (type == VIRTIO_BLK_T_DISCARD) { +- if (blk_co_pdiscard(blk, range[0], range[1]) == 0) { +- return 0; ++ sector = le64_to_cpu(desc.sector); ++ num_sectors = le32_to_cpu(desc.num_sectors); ++ flags = le32_to_cpu(desc.flags); ++ max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ? ++ VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS : ++ VHOST_USER_BLK_MAX_DISCARD_SECTORS; ++ ++ /* This check ensures that 'bytes' fits in an int */ ++ if (unlikely(num_sectors > max_sectors)) { ++ return VIRTIO_BLK_S_IOERR; ++ } ++ ++ bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS; ++ ++ if (unlikely(!vu_blk_sect_range_ok(vexp, sector, bytes))) { ++ return VIRTIO_BLK_S_IOERR; ++ } ++ ++ /* ++ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard ++ * and write zeroes commands if any unknown flag is set. ++ */ ++ if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { ++ return VIRTIO_BLK_S_UNSUPP; ++ } ++ ++ if (type == VIRTIO_BLK_T_WRITE_ZEROES) { ++ int blk_flags = 0; ++ ++ if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { ++ blk_flags |= BDRV_REQ_MAY_UNMAP; ++ } ++ ++ if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS, ++ bytes, blk_flags) == 0) { ++ return VIRTIO_BLK_S_OK; + } +- } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { +- if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) { +- return 0; ++ } else if (type == VIRTIO_BLK_T_DISCARD) { ++ /* ++ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for ++ * discard commands if the unmap flag is set. ++ */ ++ if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { ++ return VIRTIO_BLK_S_UNSUPP; ++ } ++ ++ if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS, ++ bytes) == 0) { ++ return VIRTIO_BLK_S_OK; + } + } + +- return -EINVAL; ++ return VIRTIO_BLK_S_IOERR; + } + + static void coroutine_fn vu_blk_virtio_process_req(void *opaque) +@@ -177,19 +251,13 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) + } + case VIRTIO_BLK_T_DISCARD: + case VIRTIO_BLK_T_WRITE_ZEROES: { +- int rc; +- + if (!vexp->writable) { + req->in->status = VIRTIO_BLK_S_IOERR; + break; + } + +- rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type); +- if (rc == 0) { +- req->in->status = VIRTIO_BLK_S_OK; +- } else { +- req->in->status = VIRTIO_BLK_S_IOERR; +- } ++ req->in->status = vu_blk_discard_write_zeroes(vexp, out_iov, out_num, ++ type); + break; + } + default: +@@ -360,11 +428,13 @@ vu_blk_initialize_config(BlockDriverState *bs, + config->min_io_size = cpu_to_le16(1); + config->opt_io_size = cpu_to_le32(1); + config->num_queues = cpu_to_le16(num_queues); +- config->max_discard_sectors = cpu_to_le32(32768); ++ config->max_discard_sectors = ++ cpu_to_le32(VHOST_USER_BLK_MAX_DISCARD_SECTORS); + config->max_discard_seg = cpu_to_le32(1); + config->discard_sector_alignment = + cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS); +- config->max_write_zeroes_sectors = cpu_to_le32(32768); ++ config->max_write_zeroes_sectors ++ = cpu_to_le32(VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS); + config->max_write_zeroes_seg = cpu_to_le32(1); + } + +-- +2.27.0 + diff --git a/kvm-block-export-port-virtio-blk-read-write-range-check.patch b/kvm-block-export-port-virtio-blk-read-write-range-check.patch new file mode 100644 index 0000000..f35ef1f --- /dev/null +++ b/kvm-block-export-port-virtio-blk-read-write-range-check.patch @@ -0,0 +1,70 @@ +From 03aeb30096eb0d48e0b493ed4925b99b0e27979e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 15 Mar 2021 18:16:29 -0400 +Subject: [PATCH 13/15] block/export: port virtio-blk read/write range check + +RH-Author: Stefan Hajnoczi +Message-id: <20210315181629.212884-7-stefanha@redhat.com> +Patchwork-id: 101343 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 6/6] block/export: port virtio-blk read/write range check +Bugzilla: 1937004 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +Check that the sector number and byte count are valid. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210223144653.811468-13-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 05ae4e674e3d47342a7660ae7bc55b393e09f4c7) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + block/export/vhost-user-blk-server.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c +index 937bb5e9b4..dbe3cfb9e8 100644 +--- a/block/export/vhost-user-blk-server.c ++++ b/block/export/vhost-user-blk-server.c +@@ -209,6 +209,8 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) + switch (type & ~VIRTIO_BLK_T_BARRIER) { + case VIRTIO_BLK_T_IN: + case VIRTIO_BLK_T_OUT: { ++ QEMUIOVector qiov; ++ int64_t offset; + ssize_t ret = 0; + bool is_write = type & VIRTIO_BLK_T_OUT; + req->sector_num = le64_to_cpu(req->out.sector); +@@ -218,13 +220,24 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) + break; + } + +- int64_t offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS; +- QEMUIOVector qiov; + if (is_write) { + qemu_iovec_init_external(&qiov, out_iov, out_num); +- ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0); + } else { + qemu_iovec_init_external(&qiov, in_iov, in_num); ++ } ++ ++ if (unlikely(!vu_blk_sect_range_ok(vexp, ++ req->sector_num, ++ qiov.size))) { ++ req->in->status = VIRTIO_BLK_S_IOERR; ++ break; ++ } ++ ++ offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS; ++ ++ if (is_write) { ++ ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0); ++ } else { + ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0); + } + if (ret >= 0) { +-- +2.27.0 + diff --git a/kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch b/kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch new file mode 100644 index 0000000..45b022f --- /dev/null +++ b/kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch @@ -0,0 +1,84 @@ +From 38097598172fa6b5b66224ee3a17dcc7d8ff6488 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 15 Mar 2021 18:16:26 -0400 +Subject: [PATCH 10/15] block/export: use VIRTIO_BLK_SECTOR_BITS + +RH-Author: Stefan Hajnoczi +Message-id: <20210315181629.212884-4-stefanha@redhat.com> +Patchwork-id: 101339 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/6] block/export: use VIRTIO_BLK_SECTOR_BITS +Bugzilla: 1937004 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +Use VIRTIO_BLK_SECTOR_BITS and VIRTIO_BLK_SECTOR_SIZE when dealing with +virtio-blk sector numbers. Although the values happen to be the same as +BDRV_SECTOR_BITS and BDRV_SECTOR_SIZE, they are conceptually different. +This makes it clearer when we are dealing with virtio-blk sector units. + +Use VIRTIO_BLK_SECTOR_BITS in vu_blk_initialize_config(). Later patches +will use it the new constants the virtqueue request processing code +path. + +Suggested-by: Max Reitz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210223144653.811468-9-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 524bac0744e5abf95856fb9e31c01fd2ef102188) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + block/export/vhost-user-blk-server.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c +index 3003cff189..feb139e067 100644 +--- a/block/export/vhost-user-blk-server.c ++++ b/block/export/vhost-user-blk-server.c +@@ -20,6 +20,13 @@ + #include "sysemu/block-backend.h" + #include "util/block-helpers.h" + ++/* ++ * Sector units are 512 bytes regardless of the ++ * virtio_blk_config->blk_size value. ++ */ ++#define VIRTIO_BLK_SECTOR_BITS 9 ++#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS) ++ + enum { + VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, + }; +@@ -345,7 +352,8 @@ vu_blk_initialize_config(BlockDriverState *bs, + uint32_t blk_size, + uint16_t num_queues) + { +- config->capacity = cpu_to_le64(bdrv_getlength(bs) >> BDRV_SECTOR_BITS); ++ config->capacity = ++ cpu_to_le64(bdrv_getlength(bs) >> VIRTIO_BLK_SECTOR_BITS); + config->blk_size = cpu_to_le32(blk_size); + config->size_max = cpu_to_le32(0); + config->seg_max = cpu_to_le32(128 - 2); +@@ -354,7 +362,8 @@ vu_blk_initialize_config(BlockDriverState *bs, + config->num_queues = cpu_to_le16(num_queues); + config->max_discard_sectors = cpu_to_le32(32768); + config->max_discard_seg = cpu_to_le32(1); +- config->discard_sector_alignment = cpu_to_le32(blk_size >> 9); ++ config->discard_sector_alignment = ++ cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS); + config->max_write_zeroes_sectors = cpu_to_le32(32768); + config->max_write_zeroes_seg = cpu_to_le32(1); + } +@@ -381,7 +390,7 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, + if (vu_opts->has_logical_block_size) { + logical_block_size = vu_opts->logical_block_size; + } else { +- logical_block_size = BDRV_SECTOR_SIZE; ++ logical_block_size = VIRTIO_BLK_SECTOR_SIZE; + } + check_block_size(exp->id, "logical-block-size", logical_block_size, + &local_err); +-- +2.27.0 + diff --git a/kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch b/kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch new file mode 100644 index 0000000..a6227d9 --- /dev/null +++ b/kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch @@ -0,0 +1,80 @@ +From efdd1b8911d5ae5c0eacbc63fd4fe85f0cc4614b Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Sun, 14 Mar 2021 15:54:19 -0400 +Subject: [PATCH 06/15] hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +Message-id: <20210314155419.911760-2-jmaloy@redhat.com> +Patchwork-id: 101336 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register +Bugzilla: 1936948 +RH-Acked-by: Auger Eric +RH-Acked-by: Andrew Jones +RH-Acked-by: Philippe Mathieu-Daudé + +From: Philippe Mathieu-Daudé + +Per the ARM Generic Interrupt Controller Architecture specification +(document "ARM IHI 0048B.b (ID072613)"), the SGIINTID field is 4 bit, +not 10: + + - 4.3 Distributor register descriptions + - 4.3.15 Software Generated Interrupt Register, GICD_SG + + - Table 4-21 GICD_SGIR bit assignments + + The Interrupt ID of the SGI to forward to the specified CPU + interfaces. The value of this field is the Interrupt ID, in + the range 0-15, for example a value of 0b0011 specifies + Interrupt ID 3. + +Correct the irq mask to fix an undefined behavior (which eventually +lead to a heap-buffer-overflow, see [Buglink]): + + $ echo 'writel 0x8000f00 0xff4affb0' | qemu-system-aarch64 -M virt,accel=qtest -qtest stdio + [I 1612088147.116987] OPENED + [R +0.278293] writel 0x8000f00 0xff4affb0 + ../hw/intc/arm_gic.c:1498:13: runtime error: index 944 out of bounds for type 'uint8_t [16][8]' + SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ../hw/intc/arm_gic.c:1498:13 + +This fixes a security issue when running with KVM on Arm with +kernel-irqchip=off. (The default is kernel-irqchip=on, which is +unaffected, and which is also the correct choice for performance.) + +Cc: qemu-stable@nongnu.org +Fixes: CVE-2021-20221 +Fixes: 9ee6e8bb853 ("ARMv7 support.") +Buglink: https://bugs.launchpad.net/qemu/+bug/1913916 +Buglink: https://bugs.launchpad.net/qemu/+bug/1913917 +Reported-by: Alexander Bulekov +Signed-off-by: Philippe Mathieu-Daudé +Message-id: 20210131103401.217160-1-f4bug@amsat.org +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell + +(cherry picked from commit edfe2eb4360cde4ed5d95bda7777edcb3510f76a) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + hw/intc/arm_gic.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c +index c60dc6b5e6..fbde60de05 100644 +--- a/hw/intc/arm_gic.c ++++ b/hw/intc/arm_gic.c +@@ -1474,7 +1474,7 @@ static void gic_dist_writel(void *opaque, hwaddr offset, + int target_cpu; + + cpu = gic_get_current_cpu(s); +- irq = value & 0x3ff; ++ irq = value & 0xf; + switch ((value >> 24) & 3) { + case 0: + mask = (value >> 16) & ALL_CPU_MASK; +-- +2.27.0 + diff --git a/kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch b/kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch new file mode 100644 index 0000000..ef0f424 --- /dev/null +++ b/kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch @@ -0,0 +1,177 @@ +From 1f6e36fd98ba0610a438c2352117c5b1ed4f01ba Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 8 Mar 2021 18:10:41 -0500 +Subject: [PATCH 07/15] i386/acpi: restore device paths for pre-5.1 vms +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Igor Mammedov +Message-id: <20210308181041.2427279-1-imammedo@redhat.com> +Patchwork-id: 101321 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] i386/acpi: restore device paths for pre-5.1 vms +Bugzilla: 1934158 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Michael S. Tsirkin +RH-Acked-by: Philippe Mathieu-Daudé + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1934158 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=35317870 +Upstream: 0a343a5add75f9f90c65e932863d57ddbcb28f5c + + From: Vitaly Cheptsov + Date: Mon Mar 1 22:59:18 2021 +0300 + + After fixing the _UID value for the primary PCI root bridge in + af1b80ae it was discovered that this change updates Windows + configuration in an incompatible way causing network configuration + failure unless DHCP is used. More details provided on the list: + + https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html + + This change reverts the _UID update from 1 to 0 for q35 and i440fx + VMs before version 5.2 to maintain the original behaviour when + upgrading. + + Cc: qemu-stable@nongnu.org + Cc: qemu-devel@nongnu.org + Reported-by: Thomas Lamprecht + Suggested-by: Michael S. Tsirkin + Signed-off-by: Vitaly Cheptsov + Message-Id: <20210301195919.9333-1-cheptsov@ispras.ru> + Tested-by: Thomas Lamprecht + Reviewed-by: Igor Mammedov + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + Fixes: af1b80ae56c9 ("i386/acpi: fix inconsistent QEMU/OVMF device paths") + +(cherry picked from commit 0a343a5add75f9f90c65e932863d57ddbcb28f5c) +Signed-off-by: Igor Mammedov + +Notes: +clean cherrypick + +adding the same quirk to RHEL's pc(7.6)/q35(8.3) machine types +to preserve old UID. pc-q35-rhel8.4.0 will have new UID as defined +by spec (but since it's not been released yet there is no risk of +breaking [non]existing Windows deployments and new installations +should pickup new PCI device enumeration just fine) + +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/acpi-build.c | 4 ++-- + hw/i386/pc_piix.c | 5 +++++ + hw/i386/pc_q35.c | 5 +++++ + include/hw/i386/pc.h | 1 + + 4 files changed, 13 insertions(+), 2 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index b1082bd412..be6a260b85 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -1516,7 +1516,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + dev = aml_device("PCI0"); + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); + aml_append(dev, aml_name_decl("_ADR", aml_int(0))); +- aml_append(dev, aml_name_decl("_UID", aml_int(0))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); + aml_append(sb_scope, dev); + aml_append(dsdt, sb_scope); + +@@ -1533,7 +1533,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); + aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); + aml_append(dev, aml_name_decl("_ADR", aml_int(0))); +- aml_append(dev, aml_name_decl("_UID", aml_int(0))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); + aml_append(dev, build_q35_osc_method()); + aml_append(sb_scope, dev); + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 6e1f1ba082..819fb5fed9 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -421,6 +421,7 @@ static void pc_i440fx_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pcmc->default_nic_model = "e1000"; ++ pcmc->pci_root_uid = 0; + + m->family = "pc_piix"; + m->desc = "Standard PC (i440FX + PIIX, 1996)"; +@@ -452,6 +453,7 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len); + compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len); + pcmc->kvmclock_create_always = false; ++ pcmc->pci_root_uid = 1; + } + + DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL, +@@ -1020,6 +1022,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->family = "pc_piix_Y"; + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + pcmc->default_nic_model = "e1000"; ++ pcmc->pci_root_uid = 0; + m->default_display = "std"; + m->no_parallel = 1; + m->numa_mem_supported = true; +@@ -1046,6 +1049,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->pvh_enabled = false; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; + pcmc->kvmclock_create_always = false; ++ /* From pc_i440fx_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; + compat_props_add(m->compat_props, hw_compat_rhel_8_3, + hw_compat_rhel_8_3_len); + compat_props_add(m->compat_props, pc_rhel_8_3_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index a8c0496c9f..f848f1484e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -332,6 +332,7 @@ static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pcmc->default_nic_model = "e1000e"; ++ pcmc->pci_root_uid = 0; + + m->family = "pc_q35"; + m->desc = "Standard PC (Q35 + ICH9, 2009)"; +@@ -367,6 +368,7 @@ static void pc_q35_5_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len); + compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len); + pcmc->kvmclock_create_always = false; ++ pcmc->pci_root_uid = 1; + } + + DEFINE_Q35_MACHINE(v5_1, "pc-q35-5.1", NULL, +@@ -578,6 +580,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pcmc->default_nic_model = "e1000e"; ++ pcmc->pci_root_uid = 0; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; +@@ -630,6 +633,8 @@ static void pc_q35_machine_rhel830_options(MachineClass *m) + pc_rhel_8_3_compat_len); + /* From pc_q35_5_1_machine_options() */ + pcmc->kvmclock_create_always = false; ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; + } + + DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 68091bea98..d2efc65cec 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -99,6 +99,7 @@ struct PCMachineClass { + int legacy_acpi_table_size; + unsigned acpi_data_size; + bool do_not_add_smb_acpi; ++ int pci_root_uid; + + /* SMBIOS compat: */ + bool smbios_defaults; +-- +2.27.0 + diff --git a/kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch b/kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch new file mode 100644 index 0000000..1e18da6 --- /dev/null +++ b/kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch @@ -0,0 +1,50 @@ +From 570d5034b8c6124df1830857144dc1ac08c13d06 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 8 Mar 2021 10:48:59 -0500 +Subject: [PATCH 02/15] scsi-disk: do not complete requests early for + rerror/werror=ignore +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20210308104902.149906-3-pbonzini@redhat.com> +Patchwork-id: 101309 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/5] scsi-disk: do not complete requests early for rerror/werror=ignore +Bugzilla: 1927530 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +When requested to ignore errors, just do nothing and let the +request complete normally. This means that the request will +be accounted correctly. + +This is what commit 40dce4ee61 ("scsi-disk: fix rerror/werror=ignore", +2018-10-19) was supposed to do: + +Fixes: 40dce4ee61 ("scsi-disk: fix rerror/werror=ignore", 2018-10-19) +Signed-off-by: Paolo Bonzini +(cherry picked from commit 424740def9a42da88550410de9a41ef07cc4a010) +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/scsi-disk.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index cecdea2640..e8de15f549 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -252,8 +252,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) + + blk_error_action(s->qdev.conf.blk, action, is_read, error); + if (action == BLOCK_ERROR_ACTION_IGNORE) { +- scsi_req_complete(&r->req, 0); +- return true; ++ return false; + } + + if (action == BLOCK_ERROR_ACTION_STOP) { +-- +2.27.0 + diff --git a/kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch b/kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch new file mode 100644 index 0000000..766321a --- /dev/null +++ b/kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch @@ -0,0 +1,222 @@ +From c029d041853805ba612d27886f769c0e004c35e6 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 8 Mar 2021 10:48:58 -0500 +Subject: [PATCH 01/15] scsi-disk: move scsi_handle_rw_error earlier +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20210308104902.149906-2-pbonzini@redhat.com> +Patchwork-id: 101307 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/5] scsi-disk: move scsi_handle_rw_error earlier +Bugzilla: 1927530 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +Remove the forward declaration. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit f95f61c2c9618fae7d8ea4c1d63e7416884bad52) +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/scsi-disk.c | 168 ++++++++++++++++++++++---------------------- + 1 file changed, 83 insertions(+), 85 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 90841ad791..cecdea2640 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -110,8 +110,6 @@ struct SCSIDiskState { + uint16_t rotation_rate; + }; + +-static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed); +- + static void scsi_free_request(SCSIRequest *req) + { + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); +@@ -181,6 +179,89 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) + qemu_iovec_init_external(&r->qiov, &r->iov, 1); + } + ++/* ++ * scsi_handle_rw_error has two return values. False means that the error ++ * must be ignored, true means that the error has been processed and the ++ * caller should not do anything else for this request. Note that ++ * scsi_handle_rw_error always manages its reference counts, independent ++ * of the return value. ++ */ ++static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) ++{ ++ bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); ++ SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); ++ BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, ++ is_read, error); ++ ++ if (action == BLOCK_ERROR_ACTION_REPORT) { ++ if (acct_failed) { ++ block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); ++ } ++ switch (error) { ++ case 0: ++ /* A passthrough command has run and has produced sense data; check ++ * whether the error has to be handled by the guest or should rather ++ * pause the host. ++ */ ++ assert(r->status && *r->status); ++ if (scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { ++ /* These errors are handled by guest. */ ++ sdc->update_sense(&r->req); ++ scsi_req_complete(&r->req, *r->status); ++ return true; ++ } ++ error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); ++ break; ++#ifdef CONFIG_LINUX ++ /* These errno mapping are specific to Linux. For more information: ++ * - scsi_decide_disposition in drivers/scsi/scsi_error.c ++ * - scsi_result_to_blk_status in drivers/scsi/scsi_lib.c ++ * - blk_errors[] in block/blk-core.c ++ */ ++ case EBADE: ++ /* DID_NEXUS_FAILURE -> BLK_STS_NEXUS. */ ++ scsi_req_complete(&r->req, RESERVATION_CONFLICT); ++ break; ++ case ENODATA: ++ /* DID_MEDIUM_ERROR -> BLK_STS_MEDIUM. */ ++ scsi_check_condition(r, SENSE_CODE(READ_ERROR)); ++ break; ++ case EREMOTEIO: ++ /* DID_TARGET_FAILURE -> BLK_STS_TARGET. */ ++ scsi_req_complete(&r->req, HARDWARE_ERROR); ++ break; ++#endif ++ case ENOMEDIUM: ++ scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); ++ break; ++ case ENOMEM: ++ scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE)); ++ break; ++ case EINVAL: ++ scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); ++ break; ++ case ENOSPC: ++ scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED)); ++ break; ++ default: ++ scsi_check_condition(r, SENSE_CODE(IO_ERROR)); ++ break; ++ } ++ } ++ ++ blk_error_action(s->qdev.conf.blk, action, is_read, error); ++ if (action == BLOCK_ERROR_ACTION_IGNORE) { ++ scsi_req_complete(&r->req, 0); ++ return true; ++ } ++ ++ if (action == BLOCK_ERROR_ACTION_STOP) { ++ scsi_req_retry(&r->req); ++ } ++ return true; ++} ++ + static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + { + if (r->req.io_canceled) { +@@ -427,89 +508,6 @@ static void scsi_read_data(SCSIRequest *req) + } + } + +-/* +- * scsi_handle_rw_error has two return values. False means that the error +- * must be ignored, true means that the error has been processed and the +- * caller should not do anything else for this request. Note that +- * scsi_handle_rw_error always manages its reference counts, independent +- * of the return value. +- */ +-static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) +-{ +- bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); +- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); +- SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); +- BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, +- is_read, error); +- +- if (action == BLOCK_ERROR_ACTION_REPORT) { +- if (acct_failed) { +- block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } +- switch (error) { +- case 0: +- /* A passthrough command has run and has produced sense data; check +- * whether the error has to be handled by the guest or should rather +- * pause the host. +- */ +- assert(r->status && *r->status); +- if (scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { +- /* These errors are handled by guest. */ +- sdc->update_sense(&r->req); +- scsi_req_complete(&r->req, *r->status); +- return true; +- } +- error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); +- break; +-#ifdef CONFIG_LINUX +- /* These errno mapping are specific to Linux. For more information: +- * - scsi_decide_disposition in drivers/scsi/scsi_error.c +- * - scsi_result_to_blk_status in drivers/scsi/scsi_lib.c +- * - blk_errors[] in block/blk-core.c +- */ +- case EBADE: +- /* DID_NEXUS_FAILURE -> BLK_STS_NEXUS. */ +- scsi_req_complete(&r->req, RESERVATION_CONFLICT); +- break; +- case ENODATA: +- /* DID_MEDIUM_ERROR -> BLK_STS_MEDIUM. */ +- scsi_check_condition(r, SENSE_CODE(READ_ERROR)); +- break; +- case EREMOTEIO: +- /* DID_TARGET_FAILURE -> BLK_STS_TARGET. */ +- scsi_req_complete(&r->req, HARDWARE_ERROR); +- break; +-#endif +- case ENOMEDIUM: +- scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); +- break; +- case ENOMEM: +- scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE)); +- break; +- case EINVAL: +- scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); +- break; +- case ENOSPC: +- scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED)); +- break; +- default: +- scsi_check_condition(r, SENSE_CODE(IO_ERROR)); +- break; +- } +- } +- +- blk_error_action(s->qdev.conf.blk, action, is_read, error); +- if (action == BLOCK_ERROR_ACTION_IGNORE) { +- scsi_req_complete(&r->req, 0); +- return true; +- } +- +- if (action == BLOCK_ERROR_ACTION_STOP) { +- scsi_req_retry(&r->req); +- } +- return true; +-} +- + static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) + { + uint32_t n; +-- +2.27.0 + diff --git a/kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch b/kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch new file mode 100644 index 0000000..eb49e97 --- /dev/null +++ b/kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch @@ -0,0 +1,106 @@ +From 620d646367a38ff9908de811e1f0a24a3f105529 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 8 Mar 2021 10:49:01 -0500 +Subject: [PATCH 04/15] scsi-disk: pass SCSI status to scsi_handle_rw_error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20210308104902.149906-5-pbonzini@redhat.com> +Patchwork-id: 101310 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/5] scsi-disk: pass SCSI status to scsi_handle_rw_error +Bugzilla: 1927530 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +Instead of fishing it from *r->status, just pass the SCSI status +as a positive value of the second parameter and an errno as a +negative value. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit f63c68bc0f514694a958b2e84a204b7792d28b17) +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/scsi-disk.c | 38 +++++++++++++++++++++++++++----------- + 1 file changed, 27 insertions(+), 11 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 7393f33ee2..c545f0b674 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -186,34 +186,48 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) + * scsi_handle_rw_error always manages its reference counts, independent + * of the return value. + */ +-static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) ++static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + { + bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); +- BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, +- is_read, error); +- SCSISense sense; ++ SCSISense sense = SENSE_CODE(NO_SENSE); ++ int error = 0; ++ bool req_has_sense = false; ++ BlockErrorAction action; ++ int status; + ++ if (ret < 0) { ++ status = scsi_sense_from_errno(-ret, &sense); ++ error = -ret; ++ } else { ++ /* A passthrough command has completed with nonzero status. */ ++ status = ret; ++ if (status == CHECK_CONDITION) { ++ req_has_sense = true; ++ error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); ++ } else { ++ error = EINVAL; ++ } ++ } ++ ++ action = blk_get_error_action(s->qdev.conf.blk, is_read, error); + if (action == BLOCK_ERROR_ACTION_REPORT) { + if (acct_failed) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } +- if (error == 0) { ++ if (req_has_sense) { + /* A passthrough command has run and has produced sense data; check + * whether the error has to be handled by the guest or should rather + * pause the host. + */ +- assert(r->status && *r->status); + if (scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { + /* These errors are handled by guest. */ + sdc->update_sense(&r->req); +- scsi_req_complete(&r->req, *r->status); ++ scsi_req_complete(&r->req, status); + return true; + } +- error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); + } else { +- int status = scsi_sense_from_errno(error, &sense); + if (status == CHECK_CONDITION) { + scsi_req_build_sense(&r->req, sense); + } +@@ -239,8 +253,10 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return true; + } + +- if (ret < 0 || (r->status && *r->status)) { +- return scsi_handle_rw_error(r, -ret, acct_failed); ++ if (ret < 0) { ++ return scsi_handle_rw_error(r, ret, acct_failed); ++ } else if (r->status && *r->status) { ++ return scsi_handle_rw_error(r, *r->status, acct_failed); + } + + return false; +-- +2.27.0 + diff --git a/kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch b/kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch new file mode 100644 index 0000000..58fac83 --- /dev/null +++ b/kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch @@ -0,0 +1,106 @@ +From 9cf10f41fc8a89cd80f27e3b2674dec7eead60d4 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 8 Mar 2021 10:49:02 -0500 +Subject: [PATCH 05/15] scsi-disk: pass guest recoverable errors through even + for rerror=stop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20210308104902.149906-6-pbonzini@redhat.com> +Patchwork-id: 101311 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 5/5] scsi-disk: pass guest recoverable errors through even for rerror=stop +Bugzilla: 1927530 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +Right now, recoverable sense values are only passed directly to the +guest only for rerror=report. However, when rerror/werror are 'stop' +we still don't want the host to be involved on every UNIT ATTENTION +(especially considered that the QMP event will not have enough information +to act on the report). + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 782a78c9e994c2be23467262f50e885a0eb0d9fc) +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/scsi-disk.c | 51 +++++++++++++++++++++++++-------------------- + 1 file changed, 28 insertions(+), 23 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index c545f0b674..f2abbf0d87 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -211,39 +211,44 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + } + } + +- action = blk_get_error_action(s->qdev.conf.blk, is_read, error); +- if (action == BLOCK_ERROR_ACTION_REPORT) { ++ /* ++ * Check whether the error has to be handled by the guest or should ++ * rather follow the rerror=/werror= settings. Guest-handled errors ++ * are usually retried immediately, so do not post them to QMP and ++ * do not account them as failed I/O. ++ */ ++ if (req_has_sense && ++ scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { ++ action = BLOCK_ERROR_ACTION_REPORT; ++ acct_failed = false; ++ } else { ++ action = blk_get_error_action(s->qdev.conf.blk, is_read, error); ++ blk_error_action(s->qdev.conf.blk, action, is_read, error); ++ } ++ ++ switch (action) { ++ case BLOCK_ERROR_ACTION_REPORT: + if (acct_failed) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + if (req_has_sense) { +- /* A passthrough command has run and has produced sense data; check +- * whether the error has to be handled by the guest or should rather +- * pause the host. +- */ +- if (scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { +- /* These errors are handled by guest. */ +- sdc->update_sense(&r->req); +- scsi_req_complete(&r->req, status); +- return true; +- } +- } else { +- if (status == CHECK_CONDITION) { +- scsi_req_build_sense(&r->req, sense); +- } +- scsi_req_complete(&r->req, status); ++ sdc->update_sense(&r->req); ++ } else if (status == CHECK_CONDITION) { ++ scsi_req_build_sense(&r->req, sense); + } +- } ++ scsi_req_complete(&r->req, status); ++ return true; + +- blk_error_action(s->qdev.conf.blk, action, is_read, error); +- if (action == BLOCK_ERROR_ACTION_IGNORE) { ++ case BLOCK_ERROR_ACTION_IGNORE: + return false; +- } + +- if (action == BLOCK_ERROR_ACTION_STOP) { ++ case BLOCK_ERROR_ACTION_STOP: + scsi_req_retry(&r->req); ++ return true; ++ ++ default: ++ g_assert_not_reached(); + } +- return true; + } + + static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) +-- +2.27.0 + diff --git a/kvm-scsi-introduce-scsi_sense_from_errno.patch b/kvm-scsi-introduce-scsi_sense_from_errno.patch new file mode 100644 index 0000000..4a13519 --- /dev/null +++ b/kvm-scsi-introduce-scsi_sense_from_errno.patch @@ -0,0 +1,181 @@ +From 38a29a168f4b377eb6381469af16887e12ebfa3d Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 8 Mar 2021 10:49:00 -0500 +Subject: [PATCH 03/15] scsi: introduce scsi_sense_from_errno() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20210308104902.149906-4-pbonzini@redhat.com> +Patchwork-id: 101308 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/5] scsi: introduce scsi_sense_from_errno() +Bugzilla: 1927530 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Philippe Mathieu-Daudé + +The new function is an extension of the switch statement in scsi-disk.c +which also includes the errno cases only found in sg_io_sense_from_errno. +This allows us to consolidate the errno handling. + +Extracted from a patch by Hannes Reinecke . + +Signed-off-by: Paolo Bonzini +(cherry picked from commit d7a84021db8eeddcd5d24ab591a1434763caff6c) +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/scsi-disk.c | 45 +++++++------------------------------- + include/scsi/utils.h | 2 ++ + scsi/utils.c | 51 +++++++++++++++++++++++++++++++++++--------- + 3 files changed, 51 insertions(+), 47 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index e8de15f549..7393f33ee2 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -193,13 +193,13 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, + is_read, error); ++ SCSISense sense; + + if (action == BLOCK_ERROR_ACTION_REPORT) { + if (acct_failed) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } +- switch (error) { +- case 0: ++ if (error == 0) { + /* A passthrough command has run and has produced sense data; check + * whether the error has to be handled by the guest or should rather + * pause the host. +@@ -212,41 +212,12 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) + return true; + } + error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); +- break; +-#ifdef CONFIG_LINUX +- /* These errno mapping are specific to Linux. For more information: +- * - scsi_decide_disposition in drivers/scsi/scsi_error.c +- * - scsi_result_to_blk_status in drivers/scsi/scsi_lib.c +- * - blk_errors[] in block/blk-core.c +- */ +- case EBADE: +- /* DID_NEXUS_FAILURE -> BLK_STS_NEXUS. */ +- scsi_req_complete(&r->req, RESERVATION_CONFLICT); +- break; +- case ENODATA: +- /* DID_MEDIUM_ERROR -> BLK_STS_MEDIUM. */ +- scsi_check_condition(r, SENSE_CODE(READ_ERROR)); +- break; +- case EREMOTEIO: +- /* DID_TARGET_FAILURE -> BLK_STS_TARGET. */ +- scsi_req_complete(&r->req, HARDWARE_ERROR); +- break; +-#endif +- case ENOMEDIUM: +- scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); +- break; +- case ENOMEM: +- scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE)); +- break; +- case EINVAL: +- scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); +- break; +- case ENOSPC: +- scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED)); +- break; +- default: +- scsi_check_condition(r, SENSE_CODE(IO_ERROR)); +- break; ++ } else { ++ int status = scsi_sense_from_errno(error, &sense); ++ if (status == CHECK_CONDITION) { ++ scsi_req_build_sense(&r->req, sense); ++ } ++ scsi_req_complete(&r->req, status); + } + } + +diff --git a/include/scsi/utils.h b/include/scsi/utils.h +index fbc5588279..878434a8f5 100644 +--- a/include/scsi/utils.h ++++ b/include/scsi/utils.h +@@ -133,4 +133,6 @@ int sg_io_sense_from_errno(int errno_value, struct sg_io_hdr *io_hdr, + SCSISense *sense); + #endif + ++int scsi_sense_from_errno(int errno_value, SCSISense *sense); ++ + #endif +diff --git a/scsi/utils.c b/scsi/utils.c +index b37c283014..c93458b80e 100644 +--- a/scsi/utils.c ++++ b/scsi/utils.c +@@ -560,21 +560,52 @@ const char *scsi_command_name(uint8_t cmd) + return names[cmd]; + } + ++int scsi_sense_from_errno(int errno_value, SCSISense *sense) ++{ ++ switch (errno_value) { ++ case 0: ++ return GOOD; ++ case EDOM: ++ return TASK_SET_FULL; ++#ifdef CONFIG_LINUX ++ /* These errno mapping are specific to Linux. For more information: ++ * - scsi_decide_disposition in drivers/scsi/scsi_error.c ++ * - scsi_result_to_blk_status in drivers/scsi/scsi_lib.c ++ * - blk_errors[] in block/blk-core.c ++ */ ++ case EBADE: ++ return RESERVATION_CONFLICT; ++ case ENODATA: ++ *sense = SENSE_CODE(READ_ERROR); ++ return CHECK_CONDITION; ++ case EREMOTEIO: ++ *sense = SENSE_CODE(LUN_COMM_FAILURE); ++ return CHECK_CONDITION; ++#endif ++ case ENOMEDIUM: ++ *sense = SENSE_CODE(NO_MEDIUM); ++ return CHECK_CONDITION; ++ case ENOMEM: ++ *sense = SENSE_CODE(TARGET_FAILURE); ++ return CHECK_CONDITION; ++ case EINVAL: ++ *sense = SENSE_CODE(INVALID_FIELD); ++ return CHECK_CONDITION; ++ case ENOSPC: ++ *sense = SENSE_CODE(SPACE_ALLOC_FAILED); ++ return CHECK_CONDITION; ++ default: ++ *sense = SENSE_CODE(IO_ERROR); ++ return CHECK_CONDITION; ++ } ++} ++ + #ifdef CONFIG_LINUX + int sg_io_sense_from_errno(int errno_value, struct sg_io_hdr *io_hdr, + SCSISense *sense) + { + if (errno_value != 0) { +- switch (errno_value) { +- case EDOM: +- return TASK_SET_FULL; +- case ENOMEM: +- *sense = SENSE_CODE(TARGET_FAILURE); +- return CHECK_CONDITION; +- default: +- *sense = SENSE_CODE(IO_ERROR); +- return CHECK_CONDITION; +- } ++ return scsi_sense_from_errno(errno_value, sense); + } else { + if (io_hdr->host_status == SG_ERR_DID_NO_CONNECT || + io_hdr->host_status == SG_ERR_DID_BUS_BUSY || +-- +2.27.0 + diff --git a/kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch b/kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch new file mode 100644 index 0000000..efb0b34 --- /dev/null +++ b/kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch @@ -0,0 +1,68 @@ +From f6ad6b772dce72042afbe8779cd9c52d5e352418 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 15 Mar 2021 18:16:24 -0400 +Subject: [PATCH 08/15] vhost-user-blk: fix blkcfg->num_queues endianness +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +Message-id: <20210315181629.212884-2-stefanha@redhat.com> +Patchwork-id: 101338 +O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/6] vhost-user-blk: fix blkcfg->num_queues endianness +Bugzilla: 1937004 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Kevin Wolf +RH-Acked-by: Max Reitz + +Treat the num_queues field as virtio-endian. On big-endian hosts the +vhost-user-blk num_queues field was in the wrong endianness. + +Move the blkcfg.num_queues store operation from realize to +vhost_user_blk_update_config() so feature negotiation has finished and +we know the endianness of the device. VIRTIO 1.0 devices are +little-endian, but in case someone wants to use legacy VIRTIO we support +all endianness cases. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Raphael Norwitz +Reviewed-by: Michael S. Tsirkin +Message-Id: <20210223144653.811468-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 535255b43898d2e96744057eb86f8497d4d7a461) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +--- + hw/block/vhost-user-blk.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index 2dd3d93ca0..d9d9dc8a89 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -53,6 +53,9 @@ static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) + { + VHostUserBlk *s = VHOST_USER_BLK(vdev); + ++ /* Our num_queues overrides the device backend */ ++ virtio_stw_p(vdev, &s->blkcfg.num_queues, s->num_queues); ++ + memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config)); + } + +@@ -490,10 +493,6 @@ reconnect: + goto reconnect; + } + +- if (s->blkcfg.num_queues != s->num_queues) { +- s->blkcfg.num_queues = s->num_queues; +- } +- + return; + + virtio_err: +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 07c2861..90082d6 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -53,6 +53,12 @@ #Versions of various parts: %global requires_all_modules \ +%if %{have_spice} \ +Requires: %{name}-ui-spice = %{epoch}:%{version}-%{release} \ +%endif \ +%if %{have_opengl} \ +Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ +%endif \ Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ %if %{have_gluster} \ Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ @@ -64,7 +70,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 11%{?dist} +Release: 14%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -292,6 +298,32 @@ Patch126: kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch Patch127: kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch # For bz#1935071 - CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8] Patch128: kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch +# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning +Patch129: kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch +# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning +Patch130: kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch +# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning +Patch131: kvm-scsi-introduce-scsi_sense_from_errno.patch +# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning +Patch132: kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch +# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning +Patch133: kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch +# For bz#1936948 - CVE-2021-20221 virt:av/qemu-kvm: qemu: out-of-bound heap buffer access via an interrupt ID field [rhel-av-8.4.0] +Patch134: kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch +# For bz#1934158 - Windows guest looses network connectivity when NIC was configured with static IP +Patch135: kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch +# For bz#1937004 - vhost-user-blk server endianness and input validation fixes +Patch136: kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch +# For bz#1937004 - vhost-user-blk server endianness and input validation fixes +Patch137: kvm-block-export-fix-blk_size-double-byteswap.patch +# For bz#1937004 - vhost-user-blk server endianness and input validation fixes +Patch138: kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch +# For bz#1937004 - vhost-user-blk server endianness and input validation fixes +Patch139: kvm-block-export-fix-vhost-user-blk-export-sector-number.patch +# For bz#1937004 - vhost-user-blk server endianness and input validation fixes +Patch140: kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch +# For bz#1937004 - vhost-user-blk server endianness and input validation fixes +Patch141: kvm-block-export-port-virtio-blk-read-write-range-check.patch BuildRequires: wget BuildRequires: rpm-build @@ -397,9 +429,6 @@ BuildRequires: binutils >= 2.27-16 BuildRequires: pkgconfig(epoxy) BuildRequires: pkgconfig(libdrm) BuildRequires: pkgconfig(gbm) -Requires: mesa-libGL -Requires: mesa-libEGL -Requires: mesa-dri-drivers %endif BuildRequires: perl-Test-Harness @@ -585,6 +614,32 @@ Install this package if you want to access remote disks using the Secure Shell (SSH) protocol. +%if %{have_spice} +%package ui-spice +Summary: QEMU spice support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%if %{have_opengl} +Requires: %{name}-ui-opengl%{?_isa} = %{epoch}:%{version}-%{release} +%endif + +%description ui-spice +This package provides spice support. +%endif + + +%if %{have_opengl} +%package ui-opengl +Summary: QEMU opengl support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: mesa-libGL +Requires: mesa-libEGL +Requires: mesa-dri-drivers + +%description ui-opengl +This package provides opengl support. +%endif + + %prep %setup -n qemu-%{version}%{?rcversion} # Remove slirp content in scratchbuilds because it's being applyed as a patch @@ -1361,7 +1416,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_datadir}/%{name}/kvmvapic.bin %{_datadir}/%{name}/sgabios.bin %{_datadir}/%{name}/pvh.bin - %{_libdir}/qemu-kvm/ui-egl-headless.so %endif %ifarch s390x %{_datadir}/%{name}/s390-ccw.img @@ -1411,22 +1465,10 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %if %{have_usbredir} %{_libdir}/qemu-kvm/hw-usb-redirect.so %endif -%if 0%{have_spice} - %{_libdir}/qemu-kvm/hw-usb-smartcard.so - %{_libdir}/qemu-kvm/audio-spice.so - %{_libdir}/qemu-kvm/ui-spice-core.so - %{_libdir}/qemu-kvm/chardev-spice.so -%endif -%ifarch x86_64 - %{_libdir}/qemu-kvm/hw-display-qxl.so -%endif %{_libdir}/qemu-kvm/hw-display-virtio-gpu.so %ifnarch s390x %{_libdir}/qemu-kvm/hw-display-virtio-gpu-pci.so %endif -%if 0%{have_opengl} - %{_libdir}/qemu-kvm/ui-opengl.so -%endif %files -n qemu-kiwi %defattr(-,root,root) @@ -1479,9 +1521,57 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %files block-ssh %{_libdir}/qemu-kvm/block-ssh.so +%if 0%{have_spice} +%files ui-spice + %{_libdir}/qemu-kvm/hw-usb-smartcard.so + %{_libdir}/qemu-kvm/audio-spice.so + %{_libdir}/qemu-kvm/ui-spice-core.so + %{_libdir}/qemu-kvm/chardev-spice.so +%ifarch x86_64 + %{_libdir}/qemu-kvm/hw-display-qxl.so +%endif +%endif + +%if 0%{have_opengl} +%files ui-opengl + %{_libdir}/qemu-kvm/ui-egl-headless.so + %{_libdir}/qemu-kvm/ui-opengl.so +%endif + %changelog -* Mon Mar 15 2021 Miroslav Rezanina - 5.2.0-11.el9 +* Sat Mar 20 2021 Danilo Cesar Lemes de Paula - 5.2.0-14.el8 +- kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch [bz#1937004] +- kvm-block-export-fix-blk_size-double-byteswap.patch [bz#1937004] +- kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch [bz#1937004] +- kvm-block-export-fix-vhost-user-blk-export-sector-number.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-read-write-range-check.patch [bz#1937004] +- kvm-spec-ui-spice-sub-package.patch [bz#1936373] +- kvm-spec-ui-opengl-sub-package.patch [bz#1936373] +- Resolves: bz#1937004 + (vhost-user-blk server endianness and input validation fixes) +- Resolves: bz#1936373 + (move spice & opengl modules to rpm subpackages) + +* Tue Mar 16 2021 Danilo Cesar Lemes de Paula - 5.2.0-13.el8 +- kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch [bz#1934158] +- Resolves: bz#1934158 + (Windows guest looses network connectivity when NIC was configured with static IP) + +* Mon Mar 15 2021 Danilo Cesar Lemes de Paula - 5.2.0-12.el8 +- kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch [bz#1927530] +- kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch [bz#1927530] +- kvm-scsi-introduce-scsi_sense_from_errno.patch [bz#1927530] +- kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch [bz#1927530] +- kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch [bz#1927530] +- kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch [bz#1936948] +- Resolves: bz#1927530 + (RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning) +- Resolves: bz#1936948 + (CVE-2021-20221 virt:av/qemu-kvm: qemu: out-of-bound heap buffer access via an interrupt ID field [rhel-av-8.4.0]) + +* Mon Mar 08 2021 Danilo Cesar Lemes de Paula - 5.2.0-11.el8 - kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch [bz#1932190] - kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch [bz#1932190] - kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch [bz#1935071] From e30bd9398b92b77098296c73bb38bcfa78bd107a Mon Sep 17 00:00:00 2001 From: Mohan Boddu Date: Fri, 16 Apr 2021 05:00:38 +0000 Subject: [PATCH 115/195] - Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937 Signed-off-by: Mohan Boddu --- qemu-kvm.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 90082d6..54b5070 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -70,7 +70,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 14%{?dist} +Release: 15%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -1540,6 +1540,9 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Fri Apr 16 2021 Mohan Boddu - 15:5.2.0-15 +- Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937 + * Sat Mar 20 2021 Danilo Cesar Lemes de Paula - 5.2.0-14.el8 - kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch [bz#1937004] - kvm-block-export-fix-blk_size-double-byteswap.patch [bz#1937004] From 93978e9d19e858a71c37ebca1aa8204d5a00fbbf Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 22 Apr 2021 13:57:41 +0200 Subject: [PATCH 116/195] Add README file Resolves: rhbz#1948979 --- README.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 README.rst diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..4ca14c6 --- /dev/null +++ b/README.rst @@ -0,0 +1,18 @@ +=================== +qemu-kvm development +=================== + +qemu-kvm is maintained in a `source tree`_ rather than directly in dist-git +using packit service that provides way to develope using regular source code +structure and provides way to generate SRPM and build using koji service. + +Developers deliver all changes to source-git using merge request. Only maintainers +will be pushing changes sent to source-git to dist-git. + +Each release in dist-git is tagged in the source repository so you can easily +check out the source tree for a build. The tags are in the format +name-version-release, but note release doesn't contain the dist tag since the +source can be built in different build roots (Fedora, CentOS, etc.) + +.. _source tree: https://gitlab.com/redhat/centos-stream/src/qemu-kvm + From 3bdeb95dd5206a49831ea59cd117ae654f33ab0f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 26 Apr 2021 14:29:58 +0200 Subject: [PATCH 117/195] * Mon Apr 26 2021 Miroslav Rezanina - 5.2.0-16 - kvm-Limit-build-on-Power-to-qemu-img-and-qemu-ga-only.patch [bz#1944056] - Resolves: bz#1944056 (Do not build qemu-kvm for Power) --- qemu-kvm.spec | 63 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 54b5070..91601d0 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -70,7 +70,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.2.0 -Release: 15%{?dist} +Release: 16%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -458,18 +458,12 @@ Requires: edk2-ovmf Requires: edk2-aarch64 %endif -%ifarch %{power64} -Requires: SLOF -%endif Requires: libseccomp >= 2.4.0 # For compressed guest memory dumps Requires: lzo snappy %if %{have_kvm_setup} Requires(post): systemd-units Requires(preun): systemd-units - %ifarch %{power64} -Requires: powerpc-utils - %endif %endif Requires: libusbx >= 1.0.23 %if %{have_usbredir} @@ -650,6 +644,7 @@ mkdir slirp %global qemu_kvm_build qemu_kvm_build %global qemu_kiwi_build qemu_kiwi_src/build +%ifnarch %{power64} # XXX: ugly hack to copy source tree into a new folder. # it allows to build qemu-kiwi without touching the original source tree. # This is required as the build isolation is not 100% as we also have to @@ -660,9 +655,9 @@ mkdir slirp cp -fpr . ../qemu_kiwi_src mv ../qemu_kiwi_src ./qemu_kiwi_src mkdir -p %{qemu_kiwi_build} +%endif mkdir -p %{qemu_kvm_build} - %build %global buildarch %{kvm_target}-softmmu @@ -892,6 +887,21 @@ echo "===" cat config-host.mak echo "===" +%ifarch %{power64} +make V=1 %{?_smp_mflags} $buildldflags qemu-img +make V=1 %{?_smp_mflags} $buildldflags qemu-io +make V=1 %{?_smp_mflags} $buildldflags qemu-nbd +make V=1 %{?_smp_mflags} $buildldflags storage-daemon/qemu-storage-daemon + +make V=1 %{?_smp_mflags} $buildldflags docs/qemu-img.1 +make V=1 %{?_smp_mflags} $buildldflags docs/qemu-nbd.8 +make V=1 %{?_smp_mflags} $buildldflags docs/qemu-storage-daemon.1 +make V=1 %{?_smp_mflags} $buildldflags docs/qemu-storage-daemon-qmp-ref.7 + +make V=1 %{?_smp_mflags} $buildldflags qga/qemu-ga +make V=1 %{?_smp_mflags} $buildldflags docs/qemu-ga.8 +%else + make V=1 %{?_smp_mflags} $buildldflags # Setup back compat qemu-kvm binary @@ -1002,13 +1012,31 @@ make V=1 %{?_smp_mflags} $buildldflags trace/trace-events-all > qemu-kiwi-simpletrace.stp cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kiwi +%endif popd %install pushd %{qemu_kvm_build} + %define _udevdir %(pkg-config --variable=udevdir udev) %define _udevrulesdir %{_udevdir}/rules.d +%ifarch %{power64} +install -D -p -m 0755 qemu-img $RPM_BUILD_ROOT%{_bindir}/qemu-img +install -D -p -m 0755 qemu-io $RPM_BUILD_ROOT%{_bindir}/qemu-io +install -D -p -m 0755 qemu-nbd $RPM_BUILD_ROOT%{_bindir}/qemu-nbd +install -D -p -m 0755 storage-daemon/qemu-storage-daemon $RPM_BUILD_ROOT%{_bindir}/qemu-storage-daemon + +mkdir -p $RPM_BUILD_ROOT%{_mandir}/man1/ +mkdir -p $RPM_BUILD_ROOT%{_mandir}/man7/ +mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8/ + +install -D -p -m 644 docs/qemu-img.1* $RPM_BUILD_ROOT%{_mandir}/man1 +install -D -p -m 644 docs/qemu-nbd.8* $RPM_BUILD_ROOT%{_mandir}/man8 +install -D -p -p -m 644 docs/qemu-storage-daemon.1* $RPM_BUILD_ROOT%{_mandir}/man1 +install -D -p -p -m 644 docs/qemu-storage-daemon-qmp-ref.7* $RPM_BUILD_ROOT%{_mandir}/man7 +install -D -p -m 644 docs/qemu-ga.8* $RPM_BUILD_ROOT%{_mandir}/man8 +%else install -D -p -m 0644 %{SOURCE4} $RPM_BUILD_ROOT%{_unitdir}/ksm.service install -D -p -m 0644 %{SOURCE5} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ksm install -D -p -m 0755 ksmctl $RPM_BUILD_ROOT%{_libexecdir}/ksmctl @@ -1068,11 +1096,12 @@ make DESTDIR=$RPM_BUILD_ROOT \ install mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset +%endif # Install qemu-guest-agent service and udev rules -install -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir} -install -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga -install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrulesdir} +install -D -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir}/qemu-guest-agent.service +install -D -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga +install -D -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrulesdir}/99-qemu-guest-agent.rules # - the fsfreeze hook script: install -D --preserve-timestamps \ @@ -1099,6 +1128,7 @@ mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ mkdir -p $RPM_BUILD_ROOT%{_bindir} install -c -m 0755 qga/qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga +%ifnarch %{power64} mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 install -m 0755 %{kvm_target}-softmmu/qemu-system-%{kvm_target} $RPM_BUILD_ROOT%{_libexecdir}/qemu-kvm @@ -1343,6 +1373,7 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %postun -n qemu-kvm-common %systemd_postun_with_restart ksm.service %systemd_postun_with_restart ksmtuned.service +%endif %post -n qemu-guest-agent %systemd_post qemu-guest-agent.service @@ -1351,6 +1382,7 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %postun -n qemu-guest-agent %systemd_postun_with_restart qemu-guest-agent.service +%ifnarch %{power64} %files # Deliberately empty @@ -1476,6 +1508,7 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_datadir}/systemtap/tapset/qemu-kiwi.stp %{_datadir}/systemtap/tapset/qemu-kiwi-log.stp %{_datadir}/systemtap/tapset/qemu-kiwi-simpletrace.stp +%endif %files -n qemu-img %defattr(-,root,root) @@ -1501,6 +1534,7 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_datadir}/%{name}/qemu-ga %dir %{_localstatedir}/log/qemu-ga +%ifnarch %{power64} %files tests %{testsdir} @@ -1537,9 +1571,14 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_libdir}/qemu-kvm/ui-egl-headless.so %{_libdir}/qemu-kvm/ui-opengl.so %endif - +%endif %changelog +* Mon Apr 26 2021 Miroslav Rezanina - 5.2.0-16 +- kvm-Limit-build-on-Power-to-qemu-img-and-qemu-ga-only.patch [bz#1944056] +- Resolves: bz#1944056 + (Do not build qemu-kvm for Power) + * Fri Apr 16 2021 Mohan Boddu - 15:5.2.0-15 - Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937 From 28609d56ccbcd2cbc72aaf900acb287bcdbafb6f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 30 Apr 2021 09:28:51 +0200 Subject: [PATCH 118/195] Rebase to qemu 6.0.0 Resolves: rhbz#1872569 --- .gitignore | 1 + ...at-Adding-slirp-to-the-exploded-tree.patch | 17001 ---------------- 0004-Initial-redhat-build.patch | 230 + ...0005-Enable-disable-devices-for-RHEL.patch | 334 +- 0005-Initial-redhat-build.patch | 307 - ...Machine-type-related-general-changes.patch | 310 +- ...ch => 0007-Add-aarch64-machine-types.patch | 190 +- ...atch => 0008-Add-ppc64-machine-types.patch | 148 +- ...atch => 0009-Add-s390x-machine-types.patch | 50 +- ...tch => 0010-Add-x86_64-machine-types.patch | 199 +- ...heck.patch => 0011-Enable-make-check.patch | 206 +- ...mber-of-devices-that-can-be-assigned.patch | 23 +- ...Add-support-statement-to-help-output.patch | 10 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 28 +- ...documentation-instead-of-qemu-system.patch | 24 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 10 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 8 +- ...x-blockdev-reopen-API-with-feature-f.patch | 24 +- 0021-redhat-Define-hw_compat_8_3.patch | 70 - ...r_machine_rhel_default_class_options.patch | 66 - ...efine-pseries-rhel8.4.0-machine-type.patch | 70 - ...-s390x-add-rhel-8.4.0-compat-machine.patch | 72 - ...pc_open-read-the-full-dynamic-header.patch | 56 - 0028-GCC-11-warnings-hacks.patch | 163 - ...-problematic-tests-for-initial-build.patch | 41 - 0030-Revert-GCC-11-warnings-hacks.patch | 166 - ...0x-Use-strpadcpy-for-copying-vm-name.patch | 84 - ..._out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch | 138 - ...th-Simplify-_eth_get_rss_ex_dst_addr.patch | 52 - ...net-eth-Fix-stack-buffer-overflow-in.patch | 196 - ...me-Implement-fake-truncate-coroutine.patch | 84 - 0037-build-system-use-b_staticpic-false.patch | 77 - ...-overflow-in-spapr_numa_associativit.patch | 131 - ...ci-Fixup-capabilities-ordering-again.patch | 175 - ...ix-Send-CCW-address-on-s390x-with-th.patch | 132 - 0041-AArch64-machine-types-cleanup.patch | 188 - 0042-hw-arm-virt-Add-8.4-Machine-type.patch | 55 - ...mory_region_notify_one-to-memory_reg.patch | 146 - 0045-memory-Add-IOMMUTLBEvent.patch | 647 - ..._NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch | 88 - ...-page-walking-on-device-iotlb-invali.patch | 57 - ...range-assertion-if-notifier-is-DEVIO.patch | 69 - 0049-RHEL-Switch-pvpanic-test-to-q35.patch | 47 - 0050-8.4-x86-machine-type.patch | 144 - ...hed-translation-in-case-it-points-to.patch | 153 - 0054-Drop-bogus-IPv6-messages.patch | 51 - README.rst | 18 - ...essing-BDS-twice-in-bdrv_set_aio_con.patch | 96 - ...set_aio_context-context-requirements.patch | 118 - ...-export-fix-blk_size-double-byteswap.patch | 44 - ...-vhost-user-blk-export-sector-number.patch | 53 - ...t-virtio-blk-discard-write-zeroes-in.patch | 199 - ...rt-virtio-blk-read-write-range-check.patch | 70 - ...ck-export-use-VIRTIO_BLK_SECTOR_BITS.patch | 84 - ...ve-blk_exp_close_all-to-qemu_cleanup.patch | 101 - ...etach-existing-iochannel-from-aio_co.patch | 125 - ...nter-connection-coroutine-if-it-s-pr.patch | 124 - kvm-config-enable-VFIO_CCW.patch | 42 - ...efault-configs-Enable-vhost-user-blk.patch | 39 - ...orage-daemon-1-manpage-to-meson.buil.patch | 50 - ...s-add-qemu-storage-daemon-1-man-page.patch | 218 - ...mu-storage-daemon-qmp-ref-7-man-page.patch | 111 - ...docs-set-CONFDIR-when-running-sphinx.patch | 56 - ...of-this-two-functions-already-have-p.patch | 121 - ...external-partially_hotplugged-proper.patch | 52 - kvm-failover-Remove-memory-leak.patch | 60 - kvm-failover-Remove-primary_dev-member.patch | 158 - kvm-failover-Remove-primary_device_dict.patch | 96 - kvm-failover-Remove-primary_device_opts.patch | 110 - kvm-failover-Remove-unused-parameter.patch | 68 - ...name-bool-to-failover_primary_hidden.patch | 113 - ...lover-Rename-function-to-hide_device.patch | 127 - ...name-to-failover_find_primary_device.patch | 77 - ...ays-atomics-for-primary_should_be_hi.patch | 49 - ...t-need-to-cache-primary_device_id-an.patch | 128 - kvm-failover-fix-indentantion.patch | 171 - ...r-g_strcmp0-knows-how-to-handle-NULL.patch | 48 - ...lover-make-sure-that-id-always-exist.patch | 68 - ...-bus-is-only-used-once-and-where-it-.patch | 87 - ...ev_device_add-returns-err-or-dev-set.patch | 55 - ...failover_find_primary_device-error-p.patch | 72 - kvm-failover-remove-standby_id-variable.patch | 89 - ...-should_be_hidden-should-take-a-bool.patch | 144 - ...ver-simplify-failover_unplug_primary.patch | 86 - ...mplify-qdev_device_add-failover-case.patch | 70 - kvm-failover-simplify-qdev_device_add.patch | 89 - ...ver-simplify-virtio_net_find_primary.patch | 70 - ...plit-failover_find_primary_device_id.patch | 128 - ...net_connect_failover_devices-does-no.patch | 80 - ...x-addr_mask-for-range-based-invalida.patch | 66 - ...Fix-interrupt-ID-in-GICD_SGIR-regist.patch | 80 - ...port-for-AMD-EPYC-3rd-generation-pro.patch | 213 - ...restore-device-paths-for-pre-5.1-vms.patch | 177 - ...bitmap-Allow-control-of-bitmap-persi.patch | 167 - ...bitmap-Use-struct-for-alias-map-inne.patch | 143 - ...bd-make-nbd_read-return-EIO-on-error.patch | 72 - ...Quiesce-coroutines-on-context-switch.patch | 249 - kvm-pci-add-romsize-property.patch | 137 - kvm-pci-reject-too-large-ROMs.patch | 89 - ...ink-state-active-if-the-slot-is-empt.patch | 146 - ..._cpus-to-710-on-pc-q35-rhel8-machine.patch | 45 - ...-Add-test-case-for-modifying-persist.patch | 154 - ...-SOMAXCONN-for-socket-listen-backlog.patch | 96 - ...emu-storage-daemon-Enable-object-add.patch | 49 - ...the-rendering-is-done-when-skipping-.patch | 49 - ...qxl.ssd.dcl.con-on-secondary-devices.patch | 50 - ...devices-for-exporting-upstream-machi.patch | 135 - ...-complete-requests-early-for-rerror-.patch | 50 - ...sk-move-scsi_handle_rw_error-earlier.patch | 222 - ...-SCSI-status-to-scsi_handle_rw_error.patch | 106 - ...uest-recoverable-errors-through-even.patch | 106 - ...removal-race-vs-IO-restart-callback-.patch | 62 - ...scsi-introduce-scsi_sense_from_errno.patch | 181 - ...-Adjust-firmware-path-of-PCI-devices.patch | 205 - ...llow-memory-unplug-to-always-succeed.patch | 100 - ...ndling-of-memory-unplug-with-old-gue.patch | 168 - ...e-daemon-Call-bdrv_close_all-on-exit.patch | 48 - ...trip-l-and-ll-from-systemtap-format-.patch | 69 - ...valid-vdev-in-vhost_backend_handle_i.patch | 75 - ...MU-and-virtio-iommu-on-dev-iotlb-sup.patch | 124 - ...blk-fix-blkcfg-num_queues-endianness.patch | 68 - ...sponding-memory_listener_unregister-.patch | 234 - ...disabled-flag-property-to-hw_compat_.patch | 87 - kvm-virtio-net-add-missing-object_unref.patch | 66 - ...mapped-security.capability-xattr-as-.patch | 224 - ...rror-code-early-at-the-failure-calls.patch | 100 - ...ofsd-extract-lo_do_open-from-lo_open.patch | 157 - ...ally-return-inode-pointer-from-lo_do.patch | 121 - ...t-opening-of-special-files-CVE-2020-.patch | 311 - kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch | 71 - ...-cpu-Populate-SVM-CPUID-feature-bits.patch | 91 - qemu-kvm.spec | 427 +- rpminspect.yaml | 5 + sources | 2 +- 134 files changed, 1189 insertions(+), 30893 deletions(-) delete mode 100644 0001-redhat-Adding-slirp-to-the-exploded-tree.patch create mode 100644 0004-Initial-redhat-build.patch rename 0006-Enable-disable-devices-for-RHEL.patch => 0005-Enable-disable-devices-for-RHEL.patch (73%) delete mode 100644 0005-Initial-redhat-build.patch rename 0007-Machine-type-related-general-changes.patch => 0006-Machine-type-related-general-changes.patch (78%) rename 0008-Add-aarch64-machine-types.patch => 0007-Add-aarch64-machine-types.patch (73%) rename 0009-Add-ppc64-machine-types.patch => 0008-Add-ppc64-machine-types.patch (84%) rename 0010-Add-s390x-machine-types.patch => 0009-Add-s390x-machine-types.patch (76%) rename 0011-Add-x86_64-machine-types.patch => 0010-Add-x86_64-machine-types.patch (88%) rename 0012-Enable-make-check.patch => 0011-Enable-make-check.patch (65%) rename 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch (85%) rename 0014-Add-support-statement-to-help-output.patch => 0013-Add-support-statement-to-help-output.patch (88%) rename 0015-globally-limit-the-maximum-number-of-CPUs.patch => 0014-globally-limit-the-maximum-number-of-CPUs.patch (63%) rename 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch => 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch (91%) rename 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch => 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch (92%) rename 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch => 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch (92%) rename 0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch => 0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch (72%) delete mode 100644 0021-redhat-Define-hw_compat_8_3.patch delete mode 100644 0022-redhat-Add-spapr_machine_rhel_default_class_options.patch delete mode 100644 0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch delete mode 100644 0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch delete mode 100644 0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch delete mode 100644 0028-GCC-11-warnings-hacks.patch delete mode 100644 0029-Disable-problematic-tests-for-initial-build.patch delete mode 100644 0030-Revert-GCC-11-warnings-hacks.patch delete mode 100644 0031-s390x-Use-strpadcpy-for-copying-vm-name.patch delete mode 100644 0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch delete mode 100644 0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch delete mode 100644 0034-net-eth-Fix-stack-buffer-overflow-in.patch delete mode 100644 0035-block-nvme-Implement-fake-truncate-coroutine.patch delete mode 100644 0037-build-system-use-b_staticpic-false.patch delete mode 100644 0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch delete mode 100644 0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch delete mode 100644 0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch delete mode 100644 0041-AArch64-machine-types-cleanup.patch delete mode 100644 0042-hw-arm-virt-Add-8.4-Machine-type.patch delete mode 100644 0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch delete mode 100644 0045-memory-Add-IOMMUTLBEvent.patch delete mode 100644 0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch delete mode 100644 0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch delete mode 100644 0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch delete mode 100644 0049-RHEL-Switch-pvpanic-test-to-q35.patch delete mode 100644 0050-8.4-x86-machine-type.patch delete mode 100644 0051-memory-clamp-cached-translation-in-case-it-points-to.patch delete mode 100644 0054-Drop-bogus-IPv6-messages.patch delete mode 100644 README.rst delete mode 100644 kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch delete mode 100644 kvm-block-Honor-blk_set_aio_context-context-requirements.patch delete mode 100644 kvm-block-export-fix-blk_size-double-byteswap.patch delete mode 100644 kvm-block-export-fix-vhost-user-blk-export-sector-number.patch delete mode 100644 kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch delete mode 100644 kvm-block-export-port-virtio-blk-read-write-range-check.patch delete mode 100644 kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch delete mode 100644 kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch delete mode 100644 kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch delete mode 100644 kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch delete mode 100644 kvm-config-enable-VFIO_CCW.patch delete mode 100644 kvm-default-configs-Enable-vhost-user-blk.patch delete mode 100644 kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch delete mode 100644 kvm-docs-add-qemu-storage-daemon-1-man-page.patch delete mode 100644 kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch delete mode 100644 kvm-docs-set-CONFDIR-when-running-sphinx.patch delete mode 100644 kvm-failover-Caller-of-this-two-functions-already-have-p.patch delete mode 100644 kvm-failover-Remove-external-partially_hotplugged-proper.patch delete mode 100644 kvm-failover-Remove-memory-leak.patch delete mode 100644 kvm-failover-Remove-primary_dev-member.patch delete mode 100644 kvm-failover-Remove-primary_device_dict.patch delete mode 100644 kvm-failover-Remove-primary_device_opts.patch delete mode 100644 kvm-failover-Remove-unused-parameter.patch delete mode 100644 kvm-failover-Rename-bool-to-failover_primary_hidden.patch delete mode 100644 kvm-failover-Rename-function-to-hide_device.patch delete mode 100644 kvm-failover-Rename-to-failover_find_primary_device.patch delete mode 100644 kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch delete mode 100644 kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch delete mode 100644 kvm-failover-fix-indentantion.patch delete mode 100644 kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch delete mode 100644 kvm-failover-make-sure-that-id-always-exist.patch delete mode 100644 kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch delete mode 100644 kvm-failover-qdev_device_add-returns-err-or-dev-set.patch delete mode 100644 kvm-failover-remove-failover_find_primary_device-error-p.patch delete mode 100644 kvm-failover-remove-standby_id-variable.patch delete mode 100644 kvm-failover-should_be_hidden-should-take-a-bool.patch delete mode 100644 kvm-failover-simplify-failover_unplug_primary.patch delete mode 100644 kvm-failover-simplify-qdev_device_add-failover-case.patch delete mode 100644 kvm-failover-simplify-qdev_device_add.patch delete mode 100644 kvm-failover-simplify-virtio_net_find_primary.patch delete mode 100644 kvm-failover-split-failover_find_primary_device_id.patch delete mode 100644 kvm-failover-virtio_net_connect_failover_devices-does-no.patch delete mode 100644 kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch delete mode 100644 kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch delete mode 100644 kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch delete mode 100644 kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch delete mode 100644 kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch delete mode 100644 kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch delete mode 100644 kvm-nbd-make-nbd_read-return-EIO-on-error.patch delete mode 100644 kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch delete mode 100644 kvm-pci-add-romsize-property.patch delete mode 100644 kvm-pci-reject-too-large-ROMs.patch delete mode 100644 kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch delete mode 100644 kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch delete mode 100644 kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch delete mode 100644 kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch delete mode 100644 kvm-qemu-storage-daemon-Enable-object-add.patch delete mode 100644 kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch delete mode 100644 kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch delete mode 100644 kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch delete mode 100644 kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch delete mode 100644 kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch delete mode 100644 kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch delete mode 100644 kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch delete mode 100644 kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch delete mode 100644 kvm-scsi-introduce-scsi_sense_from_errno.patch delete mode 100644 kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch delete mode 100644 kvm-spapr-Allow-memory-unplug-to-always-succeed.patch delete mode 100644 kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch delete mode 100644 kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch delete mode 100644 kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch delete mode 100644 kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch delete mode 100644 kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch delete mode 100644 kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch delete mode 100644 kvm-virtio-Add-corresponding-memory_listener_unregister-.patch delete mode 100644 kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch delete mode 100644 kvm-virtio-net-add-missing-object_unref.patch delete mode 100644 kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch delete mode 100644 kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch delete mode 100644 kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch delete mode 100644 kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch delete mode 100644 kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch delete mode 100644 kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch delete mode 100644 kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch create mode 100644 rpminspect.yaml diff --git a/.gitignore b/.gitignore index 72537aa..9bf151b 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ /qemu-5.2.0-rc1.tar.xz /qemu-5.2.0-rc3.tar.xz /qemu-5.2.0.tar.xz +/qemu-6.0.0.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch deleted file mode 100644 index 4895179..0000000 --- a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch +++ /dev/null @@ -1,17001 +0,0 @@ -From f04f3d3ab0bb9ffd06a16ee5157f08bcb4f5f459 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 2 Dec 2020 07:38:31 +0100 -Subject: redhat: Adding slirp to the exploded tree - -RH-Author: Danilo de Paula -Message-id: <20190907020756.8619-1-ddepaula@redhat.com> -Patchwork-id: 90309 -O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] redhat: Adding slirp to the exploded tree -Bugzilla: -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Wainer dos Santos Moschetta - -Until qemu-kvm-3.1 slirp used to live as a regular folder in qemu-kvm. -After that it got moved into its own submodule. Which means it's not -part of the qemu-kvm git tree anymore. - -This passed unoticed for RHEL-AV-8.0.1 and 8.1.0 because qemu still ships -the code in the tarball. That's why scratch builds still works (it's based in -the tarball content). - -As we're receiving some CVE's against slirp, we need a way to patch -slirp in RHEL-8.1.0 without handling as a separate package (as we do for -firmwares). - -The simplest solution is to copy the slirp folder from the tarball into the -exploded tree. - -To be able to do that, I had to make some changes: - -slirp needs to be removed from .gitmodules, otherwise git complains -about files on it. - -Since "make -C redhat rh-brew" uses the tarball and apply all the -patches on top of it, we need to remove the folder from the tarball before applying -the patch (because we are actually re-applying them). - -We also need to use --ignore-submodule while generating the patches for -scratch-build, otherwise it will include some weird definition of the -slirp folder in the patch, something that /usr/bin/patch gets mad with. - -After that I compared the patch list, after and before this change, and -saw no major differences. - -This is an exploded-tree-only change and shouldn't be applied to dist-git. - -Signed-off-by: Danilo C. L. de Paula - -Rebase notes (5.1.0-rc2): - - Update slirp directory to commit ce94eba2042d52a0ba3d9e252ebce86715e94275 (used upstream) - -Rebase notes (5.2.0-rc4): - - Update slirp directory to commit 8f43a99191afb47ca3f3c6972f6306209f367ece (used upstream) ---- - .gitmodules | 3 - - slirp/.clang-format | 58 ++ - slirp/.gitignore | 10 + - slirp/.gitlab-ci.yml | 27 + - slirp/CHANGELOG.md | 88 ++ - slirp/COPYRIGHT | 62 ++ - slirp/Makefile | 62 ++ - slirp/README.md | 60 ++ - slirp/build-aux/git-version-gen | 158 ++++ - slirp/build-aux/meson-dist | 16 + - slirp/meson.build | 134 +++ - slirp/src/arp_table.c | 92 ++ - slirp/src/bootp.c | 369 ++++++++ - slirp/src/bootp.h | 129 +++ - slirp/src/cksum.c | 179 ++++ - slirp/src/debug.h | 51 + - slirp/src/dhcpv6.c | 224 +++++ - slirp/src/dhcpv6.h | 68 ++ - slirp/src/dnssearch.c | 306 ++++++ - slirp/src/if.c | 213 +++++ - slirp/src/if.h | 25 + - slirp/src/ip.h | 242 +++++ - slirp/src/ip6.h | 214 +++++ - slirp/src/ip6_icmp.c | 433 +++++++++ - slirp/src/ip6_icmp.h | 219 +++++ - slirp/src/ip6_input.c | 85 ++ - slirp/src/ip6_output.c | 39 + - slirp/src/ip_icmp.c | 492 ++++++++++ - slirp/src/ip_icmp.h | 166 ++++ - slirp/src/ip_input.c | 461 +++++++++ - slirp/src/ip_output.c | 169 ++++ - slirp/src/libslirp-version.h.in | 24 + - slirp/src/libslirp.h | 171 ++++ - slirp/src/libslirp.map | 30 + - slirp/src/main.h | 16 + - slirp/src/mbuf.c | 224 +++++ - slirp/src/mbuf.h | 127 +++ - slirp/src/misc.c | 390 ++++++++ - slirp/src/misc.h | 72 ++ - slirp/src/ncsi-pkt.h | 445 +++++++++ - slirp/src/ncsi.c | 197 ++++ - slirp/src/ndp_table.c | 87 ++ - slirp/src/sbuf.c | 168 ++++ - slirp/src/sbuf.h | 27 + - slirp/src/slirp.c | 1189 ++++++++++++++++++++++++ - slirp/src/slirp.h | 284 ++++++ - slirp/src/socket.c | 954 +++++++++++++++++++ - slirp/src/socket.h | 164 ++++ - slirp/src/state.c | 379 ++++++++ - slirp/src/stream.c | 120 +++ - slirp/src/stream.h | 35 + - slirp/src/tcp.h | 169 ++++ - slirp/src/tcp_input.c | 1539 +++++++++++++++++++++++++++++++ - slirp/src/tcp_output.c | 516 +++++++++++ - slirp/src/tcp_subr.c | 980 ++++++++++++++++++++ - slirp/src/tcp_timer.c | 286 ++++++ - slirp/src/tcp_timer.h | 130 +++ - slirp/src/tcp_var.h | 161 ++++ - slirp/src/tcpip.h | 104 +++ - slirp/src/tftp.c | 464 ++++++++++ - slirp/src/tftp.h | 54 ++ - slirp/src/udp.c | 365 ++++++++ - slirp/src/udp.h | 90 ++ - slirp/src/udp6.c | 173 ++++ - slirp/src/util.c | 428 +++++++++ - slirp/src/util.h | 189 ++++ - slirp/src/version.c | 8 + - slirp/src/vmstate.c | 444 +++++++++ - slirp/src/vmstate.h | 391 ++++++++ - 69 files changed, 16445 insertions(+), 3 deletions(-) - create mode 100644 slirp/.clang-format - create mode 100644 slirp/.gitignore - create mode 100644 slirp/.gitlab-ci.yml - create mode 100644 slirp/CHANGELOG.md - create mode 100644 slirp/COPYRIGHT - create mode 100644 slirp/Makefile - create mode 100644 slirp/README.md - create mode 100755 slirp/build-aux/git-version-gen - create mode 100755 slirp/build-aux/meson-dist - create mode 100644 slirp/meson.build - create mode 100644 slirp/src/arp_table.c - create mode 100644 slirp/src/bootp.c - create mode 100644 slirp/src/bootp.h - create mode 100644 slirp/src/cksum.c - create mode 100644 slirp/src/debug.h - create mode 100644 slirp/src/dhcpv6.c - create mode 100644 slirp/src/dhcpv6.h - create mode 100644 slirp/src/dnssearch.c - create mode 100644 slirp/src/if.c - create mode 100644 slirp/src/if.h - create mode 100644 slirp/src/ip.h - create mode 100644 slirp/src/ip6.h - create mode 100644 slirp/src/ip6_icmp.c - create mode 100644 slirp/src/ip6_icmp.h - create mode 100644 slirp/src/ip6_input.c - create mode 100644 slirp/src/ip6_output.c - create mode 100644 slirp/src/ip_icmp.c - create mode 100644 slirp/src/ip_icmp.h - create mode 100644 slirp/src/ip_input.c - create mode 100644 slirp/src/ip_output.c - create mode 100644 slirp/src/libslirp-version.h.in - create mode 100644 slirp/src/libslirp.h - create mode 100644 slirp/src/libslirp.map - create mode 100644 slirp/src/main.h - create mode 100644 slirp/src/mbuf.c - create mode 100644 slirp/src/mbuf.h - create mode 100644 slirp/src/misc.c - create mode 100644 slirp/src/misc.h - create mode 100644 slirp/src/ncsi-pkt.h - create mode 100644 slirp/src/ncsi.c - create mode 100644 slirp/src/ndp_table.c - create mode 100644 slirp/src/sbuf.c - create mode 100644 slirp/src/sbuf.h - create mode 100644 slirp/src/slirp.c - create mode 100644 slirp/src/slirp.h - create mode 100644 slirp/src/socket.c - create mode 100644 slirp/src/socket.h - create mode 100644 slirp/src/state.c - create mode 100644 slirp/src/stream.c - create mode 100644 slirp/src/stream.h - create mode 100644 slirp/src/tcp.h - create mode 100644 slirp/src/tcp_input.c - create mode 100644 slirp/src/tcp_output.c - create mode 100644 slirp/src/tcp_subr.c - create mode 100644 slirp/src/tcp_timer.c - create mode 100644 slirp/src/tcp_timer.h - create mode 100644 slirp/src/tcp_var.h - create mode 100644 slirp/src/tcpip.h - create mode 100644 slirp/src/tftp.c - create mode 100644 slirp/src/tftp.h - create mode 100644 slirp/src/udp.c - create mode 100644 slirp/src/udp.h - create mode 100644 slirp/src/udp6.c - create mode 100644 slirp/src/util.c - create mode 100644 slirp/src/util.h - create mode 100644 slirp/src/version.c - create mode 100644 slirp/src/vmstate.c - create mode 100644 slirp/src/vmstate.h - -diff --git a/slirp/.clang-format b/slirp/.clang-format -new file mode 100644 -index 0000000000..17fb49fe65 ---- /dev/null -+++ b/slirp/.clang-format -@@ -0,0 +1,58 @@ -+# https://clang.llvm.org/docs/ClangFormat.html -+# https://clang.llvm.org/docs/ClangFormatStyleOptions.html -+--- -+Language: Cpp -+AlignAfterOpenBracket: Align -+AlignConsecutiveAssignments: false # although we like it, it creates churn -+AlignConsecutiveDeclarations: false -+AlignEscapedNewlinesLeft: true -+AlignOperands: true -+AlignTrailingComments: false # churn -+AllowAllParametersOfDeclarationOnNextLine: true -+AllowShortBlocksOnASingleLine: false -+AllowShortCaseLabelsOnASingleLine: false -+AllowShortFunctionsOnASingleLine: None -+AllowShortIfStatementsOnASingleLine: false -+AllowShortLoopsOnASingleLine: false -+AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account -+AlwaysBreakBeforeMultilineStrings: false -+BinPackArguments: true -+BinPackParameters: true -+BraceWrapping: -+ AfterControlStatement: false -+ AfterEnum: false -+ AfterFunction: true -+ AfterStruct: false -+ AfterUnion: false -+ BeforeElse: false -+ IndentBraces: false -+BreakBeforeBinaryOperators: None -+BreakBeforeBraces: Custom -+BreakBeforeTernaryOperators: false -+BreakStringLiterals: true -+ColumnLimit: 80 -+ContinuationIndentWidth: 4 -+Cpp11BracedListStyle: false -+DerivePointerAlignment: false -+DisableFormat: false -+IndentCaseLabels: false -+IndentWidth: 4 -+IndentWrappedFunctionNames: false -+KeepEmptyLinesAtTheStartOfBlocks: false -+MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? -+MacroBlockEnd: '.*_END$' -+MaxEmptyLinesToKeep: 2 -+PointerAlignment: Right -+ReflowComments: true -+SortIncludes: false -+SpaceAfterCStyleCast: false -+SpaceBeforeAssignmentOperators: true -+SpaceBeforeParens: ControlStatements -+SpaceInEmptyParentheses: false -+SpacesBeforeTrailingComments: 1 -+SpacesInContainerLiterals: true -+SpacesInParentheses: false -+SpacesInSquareBrackets: false -+Standard: Auto -+UseTab: Never -+... -diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md -new file mode 100644 -index 0000000000..67b0a74195 ---- /dev/null -+++ b/slirp/CHANGELOG.md -@@ -0,0 +1,88 @@ -+# Changelog -+ -+All notable changes to this project will be documented in this file. -+ -+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -+ -+## [Unreleased] -+ -+### Added -+ -+### Changed -+ -+### Deprecated -+ -+### Fixed -+ -+## [4.2.0] - 2020-03-17 -+ -+### Added -+ -+ - New API function `slirp_add_unix`: add a forward rule to a Unix socket. -+ - New API function `slirp_remove_guestfwd`: remove a forward rule previously -+ added by `slirp_add_exec`, `slirp_add_unix` or `slirp_add_guestfwd` -+ - New SlirpConfig.outbound_addr{,6} fields to bind output socket to a -+ specific address -+ -+### Changed -+ -+ - socket: do not fallback on host loopback if get_dns_addr() failed -+ or the address is in slirp network -+ -+### Fixed -+ -+ - ncsi: fix checksum OOB memory access -+ - `tcp_emu()`: fix OOB accesses -+ - tftp: restrict relative path access -+ - state: fix loading of guestfwd state -+ -+## [4.1.0] - 2019-12-02 -+ -+### Added -+ -+ - The `slirp_new()` API, simpler and more extensible than `slirp_init()`. -+ - Allow custom MTU configuration. -+ - Option to disable host loopback connections. -+ - CI now runs scan-build too. -+ -+### Changed -+ -+ - Disable `tcp_emu()` by default. `tcp_emu()` is known to have caused -+ several CVEs, and not useful today in most cases. The feature can -+ be still enabled by setting `SlirpConfig.enable_emu` to true. -+ - meson build system is now `subproject()` friendly. -+ - Replace remaining `malloc()`/`free()` with glib (which aborts on OOM) -+ - Various code cleanups. -+ -+### Deprecated -+ -+ - The `slirp_init()` API. -+ -+### Fixed -+ -+ - `getpeername()` error after `shutdown(SHUT_WR)`. -+ - Exec forward: correctly parse command lines that contain spaces. -+ - Allow 0.0.0.0 destination address. -+ - Make host receive broadcast packets. -+ - Various memory related fixes (heap overflow, leaks, NULL -+ dereference). -+ - Compilation warnings, dead code. -+ -+## [4.0.0] - 2019-05-24 -+ -+### Added -+ -+ - Installable as a shared library. -+ - meson build system -+ (& make build system for in-tree QEMU integration) -+ -+### Changed -+ -+ - Standalone project, removing any QEMU dependency. -+ - License clarifications. -+ -+[unreleased]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.2.0...master -+[4.2.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.1.0...v4.2.0 -+[4.1.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.0.0...v4.1.0 -+[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 -diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT -new file mode 100644 -index 0000000000..ed49512dbc ---- /dev/null -+++ b/slirp/COPYRIGHT -@@ -0,0 +1,62 @@ -+Slirp was written by Danny Gasparovski. -+Copyright (c), 1995,1996 All Rights Reserved. -+ -+Slirp is free software; "free" as in you don't have to pay for it, and you -+are free to do whatever you want with it. I do not accept any donations, -+monetary or otherwise, for Slirp. Instead, I would ask you to pass this -+potential donation to your favorite charity. In fact, I encourage -+*everyone* who finds Slirp useful to make a small donation to their -+favorite charity (for example, GreenPeace). This is not a requirement, but -+a suggestion from someone who highly values the service they provide. -+ -+The copyright terms and conditions: -+ -+---BEGIN--- -+ -+ Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ 1. Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ 2. Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ 3. Neither the name of the copyright holder nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, -+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY -+ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -+ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+---END--- -+ -+This basically means you can do anything you want with the software, except -+1) call it your own, and 2) claim warranty on it. There is no warranty for -+this software. None. Nada. If you lose a million dollars while using -+Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. -+ -+If these conditions cannot be met due to legal restrictions (E.g. where it -+is against the law to give out Software without warranty), you must cease -+using the software and delete all copies you have. -+ -+Slirp uses code that is copyrighted by the following people/organizations: -+ -+Juha Pirkola. -+Gregory M. Christy. -+The Regents of the University of California. -+Carnegie Mellon University. -+The Australian National University. -+RSA Data Security, Inc. -+ -+Please read the top of each source file for the details on the various -+copyrights. -diff --git a/slirp/Makefile b/slirp/Makefile -new file mode 100644 -index 0000000000..8857b4159b ---- /dev/null -+++ b/slirp/Makefile -@@ -0,0 +1,62 @@ -+ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) -+BUILD_DIR ?= . -+ -+LIBSLIRP = $(BUILD_DIR)/libslirp.a -+SLIRP_MAJOR_VERSION = 4 -+SLIRP_MINOR_VERSION = 2 -+SLIRP_MICRO_VERSION = 0 -+SLIRP_VERSION_STRING = "$(SLIRP_MAJOR_VERSION).$(SLIRP_MINOR_VERSION).$(SLIRP_MICRO_VERSION)-git" -+ -+all: $(LIBSLIRP) -+ -+SRCS := $(wildcard src/*.c) -+OBJS := $(SRCS:%.c=$(BUILD_DIR)/%.o) -+DEPS := $(OBJS:%.o=%.d) -+ -+INC_DIRS := $(BUILD_DIR)/src -+INC_FLAGS := $(addprefix -I,$(INC_DIRS)) -+ -+override CFLAGS += \ -+ -DG_LOG_DOMAIN='"Slirp"' \ -+ $(shell $(PKG_CONFIG) --cflags glib-2.0) \ -+ $(INC_FLAGS) \ -+ -MMD -MP -+override LDFLAGS += $(shell $(PKG_CONFIG) --libs glib-2.0) -+ -+$(BUILD_DIR)/src/libslirp-version.h: Makefile -+ @$(MKDIR_P) $(dir $@) -+ $(call quiet-command,cat $(ROOT_DIR)/src/libslirp-version.h.in | \ -+ sed 's/@SLIRP_MAJOR_VERSION@/$(SLIRP_MAJOR_VERSION)/' | \ -+ sed 's/@SLIRP_MINOR_VERSION@/$(SLIRP_MINOR_VERSION)/' | \ -+ sed 's/@SLIRP_MICRO_VERSION@/$(SLIRP_MICRO_VERSION)/' | \ -+ sed 's/@SLIRP_VERSION_STRING@/$(SLIRP_VERSION_STRING)/' \ -+ > $@,"GEN","$@") -+ -+$(OBJS): $(BUILD_DIR)/src/libslirp-version.h -+ -+$(LIBSLIRP): $(OBJS) -+ -+.PHONY: clean -+ -+clean: -+ rm -r $(OBJS) $(DEPS) $(LIBSLIRP) $(BUILD_DIR)/src/libslirp-version.h -+ -+$(BUILD_DIR)/src/%.o: $(ROOT_DIR)/src/%.c -+ @$(MKDIR_P) $(dir $@) -+ $(call quiet-command,$(CC) $(CFLAGS) -c -o $@ $<,"CC","$@") -+ -+%.a: -+ $(call quiet-command,rm -f $@ && $(AR) rcs $@ $^,"AR","$@") -+ -+PKG_CONFIG ?= pkg-config -+MKDIR_P ?= mkdir -p -+quiet-command-run = $(if $(V),,$(if $2,printf " %-7s %s\n" $2 $3 && ))$1 -+quiet-@ = $(if $(V),,@) -+quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3) -+ -+print-%: -+ @echo '$*=$($*)' -+ -+.SUFFIXES: -+ -+-include $(DEPS) -diff --git a/slirp/README.md b/slirp/README.md -new file mode 100644 -index 0000000000..dc11e5f18b ---- /dev/null -+++ b/slirp/README.md -@@ -0,0 +1,60 @@ -+# libslirp -+ -+libslirp is a user-mode networking library used by virtual machines, -+containers or various tools. -+ -+## Getting Started -+ -+### Prerequisites -+ -+A C compiler, make/meson and glib2 development libraries. -+ -+(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list -+of dependencies on Fedora) -+ -+### Building -+ -+You may build and install the shared library with meson: -+ -+``` sh -+meson build -+ninja -C build install -+``` -+And configure QEMU with --enable-slirp=system to link against it. -+ -+(QEMU may build with the submodule static library using --enable-slirp=git) -+ -+### Testing -+ -+Unfortunately, there are no automated tests available. -+ -+You may run QEMU ``-net user`` linked with your development version. -+ -+## Contributing -+ -+Feel free to open issues on the [project -+issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page. -+ -+You may clone the [gitlab -+project](https://gitlab.freedesktop.org/slirp/libslirp) and create a -+merge request. -+ -+Contributing with gitlab allows gitlab workflow, tracking issues, -+running CI etc. -+ -+Alternatively, you may send patches to slirp@lists.freedesktop.org -+mailing list. -+ -+## Versioning -+ -+We intend to use [libtool's -+versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html) -+for the shared libraries and use [SemVer](http://semver.org/) for -+project versions. -+ -+For the versions available, see the [tags on this -+repository](https://gitlab.freedesktop.org/slirp/libslirp/releases). -+ -+## License -+ -+See the [COPYRIGHT](COPYRIGHT) file for details. -diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen -new file mode 100755 -index 0000000000..5617eb8d4e ---- /dev/null -+++ b/slirp/build-aux/git-version-gen -@@ -0,0 +1,158 @@ -+#!/bin/sh -+# Print a version string. -+scriptversion=2010-06-14.19; # UTC -+ -+# Copyright (C) 2007-2010 Free Software Foundation, Inc. -+# -+# This program is free software: you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+# This script is derived from GIT-VERSION-GEN from GIT: http://git.or.cz/. -+# It may be run two ways: -+# - from a git repository in which the "git describe" command below -+# produces useful output (thus requiring at least one signed tag) -+# - from a non-git-repo directory containing a .tarball-version file, which -+# presumes this script is invoked like "./git-version-gen .tarball-version". -+ -+# In order to use intra-version strings in your project, you will need two -+# separate generated version string files: -+# -+# .tarball-version - present only in a distribution tarball, and not in -+# a checked-out repository. Created with contents that were learned at -+# the last time autoconf was run, and used by git-version-gen. Must not -+# be present in either $(srcdir) or $(builddir) for git-version-gen to -+# give accurate answers during normal development with a checked out tree, -+# but must be present in a tarball when there is no version control system. -+# Therefore, it cannot be used in any dependencies. GNUmakefile has -+# hooks to force a reconfigure at distribution time to get the value -+# correct, without penalizing normal development with extra reconfigures. -+# -+# .version - present in a checked-out repository and in a distribution -+# tarball. Usable in dependencies, particularly for files that don't -+# want to depend on config.h but do want to track version changes. -+# Delete this file prior to any autoconf run where you want to rebuild -+# files to pick up a version string change; and leave it stale to -+# minimize rebuild time after unrelated changes to configure sources. -+# -+# It is probably wise to add these two files to .gitignore, so that you -+# don't accidentally commit either generated file. -+# -+# Use the following line in your configure.ac, so that $(VERSION) will -+# automatically be up-to-date each time configure is run (and note that -+# since configure.ac no longer includes a version string, Makefile rules -+# should not depend on configure.ac for version updates). -+# -+# AC_INIT([GNU project], -+# m4_esyscmd([build-aux/git-version-gen .tarball-version]), -+# [bug-project@example]) -+# -+# Then use the following lines in your Makefile.am, so that .version -+# will be present for dependencies, and so that .tarball-version will -+# exist in distribution tarballs. -+# -+# BUILT_SOURCES = $(top_srcdir)/.version -+# $(top_srcdir)/.version: -+# echo $(VERSION) > $@-t && mv $@-t $@ -+# dist-hook: -+# echo $(VERSION) > $(distdir)/.tarball-version -+ -+case $# in -+ 1|2) ;; -+ *) echo 1>&2 "Usage: $0 \$srcdir/.tarball-version" \ -+ '[TAG-NORMALIZATION-SED-SCRIPT]' -+ exit 1;; -+esac -+ -+tarball_version_file=$1 -+tag_sed_script="${2:-s/x/x/}" -+nl=' -+' -+ -+# Avoid meddling by environment variable of the same name. -+v= -+ -+# First see if there is a tarball-only version file. -+# then try "git describe", then default. -+if test -f $tarball_version_file -+then -+ v=`cat $tarball_version_file` || exit 1 -+ case $v in -+ *$nl*) v= ;; # reject multi-line output -+ [0-9]*) ;; -+ *) v= ;; -+ esac -+ test -z "$v" \ -+ && echo "$0: WARNING: $tarball_version_file seems to be damaged" 1>&2 -+fi -+ -+if test -n "$v" -+then -+ : # use $v -+elif test -d .git \ -+ && v=`git describe --abbrev=4 --match='v*' HEAD 2>/dev/null \ -+ || git describe --abbrev=4 HEAD 2>/dev/null` \ -+ && v=`printf '%s\n' "$v" | sed "$tag_sed_script"` \ -+ && case $v in -+ v[0-9]*) ;; -+ *) (exit 1) ;; -+ esac -+then -+ # Is this a new git that lists number of commits since the last -+ # tag or the previous older version that did not? -+ # Newer: v6.10-77-g0f8faeb -+ # Older: v6.10-g0f8faeb -+ case $v in -+ *-*-*) : git describe is okay three part flavor ;; -+ *-*) -+ : git describe is older two part flavor -+ # Recreate the number of commits and rewrite such that the -+ # result is the same as if we were using the newer version -+ # of git describe. -+ vtag=`echo "$v" | sed 's/-.*//'` -+ numcommits=`git rev-list "$vtag"..HEAD | wc -l` -+ v=`echo "$v" | sed "s/\(.*\)-\(.*\)/\1-$numcommits-\2/"`; -+ ;; -+ esac -+ -+ # Change the first '-' to a '.', so version-comparing tools work properly. -+ # Remove the "g" in git describe's output string, to save a byte. -+ v=`echo "$v" | sed 's/-/./;s/\(.*\)-g/\1-/'`; -+else -+ v=UNKNOWN -+fi -+ -+v=`echo "$v" |sed 's/^v//'` -+ -+# Don't declare a version "dirty" merely because a time stamp has changed. -+git update-index --refresh > /dev/null 2>&1 -+ -+dirty=`sh -c 'git diff-index --name-only HEAD' 2>/dev/null` || dirty= -+case "$dirty" in -+ '') ;; -+ *) # Append the suffix only if there isn't one already. -+ case $v in -+ *-dirty) ;; -+ *) v="$v-dirty" ;; -+ esac ;; -+esac -+ -+# Omit the trailing newline, so that m4_esyscmd can use the result directly. -+echo "$v" | tr -d "$nl" -+ -+# Local variables: -+# eval: (add-hook 'write-file-hooks 'time-stamp) -+# time-stamp-start: "scriptversion=" -+# time-stamp-format: "%:y-%02m-%02d.%02H" -+# time-stamp-time-zone: "UTC" -+# time-stamp-end: "; # UTC" -+# End: -diff --git a/slirp/build-aux/meson-dist b/slirp/build-aux/meson-dist -new file mode 100755 -index 0000000000..80d534fec6 ---- /dev/null -+++ b/slirp/build-aux/meson-dist -@@ -0,0 +1,16 @@ -+#!/bin/bash -+ -+set -e -+set -o pipefail -+ -+if test "$1" = ""; then -+ echo "Version not provided" >&2 -+ exit 1 -+fi -+if ! test -d "$2"; then -+ echo "Source directory not provided" >&2 -+ exit 1 -+fi -+ -+# generate tarball version -+echo "$1" > "$MESON_DIST_ROOT/.tarball-version" -diff --git a/slirp/meson.build b/slirp/meson.build -new file mode 100644 -index 0000000000..3a27149373 ---- /dev/null -+++ b/slirp/meson.build -@@ -0,0 +1,134 @@ -+project('libslirp', 'c', -+ version : run_command('build-aux/git-version-gen', '@0@/.tarball-version'.format(meson.source_root()), check : true).stdout().strip(), -+ license : 'BSD-3-Clause', -+ default_options : ['warning_level=1', 'c_std=gnu99'], -+ meson_version : '>= 0.49', -+) -+ -+meson.add_dist_script('build-aux/meson-dist', meson.project_version(), meson.source_root()) -+ -+version = meson.project_version() -+varr = version.split('.') -+major_version = varr[0] -+minor_version = varr[1] -+micro_version = varr[2] -+ -+conf = configuration_data() -+conf.set('SLIRP_MAJOR_VERSION', major_version) -+conf.set('SLIRP_MINOR_VERSION', minor_version) -+conf.set('SLIRP_MICRO_VERSION', micro_version) -+conf.set_quoted('SLIRP_VERSION_STRING', version) -+ -+# libtool versioning - this applies to libslirp -+# -+# See http://sources.redhat.com/autobook/autobook/autobook_91.html#SEC91 for details -+# -+# - If interfaces have been changed or added, but binary compatibility -+# has been preserved, change: -+# CURRENT += 1 -+# REVISION = 0 -+# AGE += 1 -+# - If binary compatibility has been broken (eg removed or changed -+# interfaces), change: -+# CURRENT += 1 -+# REVISION = 0 -+# AGE = 0 -+# - If the interface is the same as the previous version, but bugs are -+# fixed, change: -+# REVISION += 1 -+lt_current = 2 -+lt_revision = 0 -+lt_age = 2 -+lt_version = '@0@.@1@.@2@'.format(lt_current - lt_age, lt_age, lt_revision) -+ -+host_system = host_machine.system() -+ -+glib_dep = dependency('glib-2.0') -+ -+cc = meson.get_compiler('c') -+ -+platform_deps = [] -+ -+if host_system == 'windows' -+ platform_deps += [ -+ cc.find_library('ws2_32'), -+ cc.find_library('iphlpapi') -+ ] -+endif -+ -+cargs = [ -+ '-DG_LOG_DOMAIN="Slirp"', -+] -+ -+sources = [ -+ 'src/arp_table.c', -+ 'src/bootp.c', -+ 'src/cksum.c', -+ 'src/dhcpv6.c', -+ 'src/dnssearch.c', -+ 'src/if.c', -+ 'src/ip6_icmp.c', -+ 'src/ip6_input.c', -+ 'src/ip6_output.c', -+ 'src/ip_icmp.c', -+ 'src/ip_input.c', -+ 'src/ip_output.c', -+ 'src/mbuf.c', -+ 'src/misc.c', -+ 'src/ncsi.c', -+ 'src/ndp_table.c', -+ 'src/sbuf.c', -+ 'src/slirp.c', -+ 'src/socket.c', -+ 'src/state.c', -+ 'src/stream.c', -+ 'src/tcp_input.c', -+ 'src/tcp_output.c', -+ 'src/tcp_subr.c', -+ 'src/tcp_timer.c', -+ 'src/tftp.c', -+ 'src/udp.c', -+ 'src/udp6.c', -+ 'src/util.c', -+ 'src/version.c', -+ 'src/vmstate.c', -+] -+ -+mapfile = 'src/libslirp.map' -+vflag = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) -+ -+configure_file( -+ input : 'src/libslirp-version.h.in', -+ output : 'libslirp-version.h', -+ install_dir : join_paths(get_option('includedir'), 'slirp'), -+ configuration : conf -+) -+ -+lib = library('slirp', sources, -+ version : lt_version, -+ c_args : cargs, -+ link_args : vflag, -+ link_depends : mapfile, -+ dependencies : [glib_dep, platform_deps], -+ install : true -+) -+ -+libslirp_dep = declare_dependency( -+ include_directories: include_directories('.', 'src'), -+ link_with: lib) -+ -+install_headers(['src/libslirp.h'], subdir : 'slirp') -+ -+pkg = import('pkgconfig') -+ -+pkg.generate( -+ version : version, -+ libraries : lib, -+ requires : [ -+ 'glib-2.0', -+ ], -+ name : 'slirp', -+ description : 'User-space network stack', -+ filebase : 'slirp', -+ subdirs : 'slirp', -+) -diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c -new file mode 100644 -index 0000000000..959e5b9ec0 ---- /dev/null -+++ b/slirp/src/arp_table.c -@@ -0,0 +1,92 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * ARP table -+ * -+ * Copyright (c) 2011 AdaCore -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "slirp.h" -+ -+#include -+ -+void arp_table_add(Slirp *slirp, uint32_t ip_addr, -+ const uint8_t ethaddr[ETH_ALEN]) -+{ -+ const uint32_t broadcast_addr = -+ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; -+ ArpTable *arptbl = &slirp->arp_table; -+ int i; -+ -+ DEBUG_CALL("arp_table_add"); -+ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); -+ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], -+ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); -+ -+ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { -+ /* Do not register broadcast addresses */ -+ return; -+ } -+ -+ /* Search for an entry */ -+ for (i = 0; i < ARP_TABLE_SIZE; i++) { -+ if (arptbl->table[i].ar_sip == ip_addr) { -+ /* Update the entry */ -+ memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); -+ return; -+ } -+ } -+ -+ /* No entry found, create a new one */ -+ arptbl->table[arptbl->next_victim].ar_sip = ip_addr; -+ memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); -+ arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; -+} -+ -+bool arp_table_search(Slirp *slirp, uint32_t ip_addr, -+ uint8_t out_ethaddr[ETH_ALEN]) -+{ -+ const uint32_t broadcast_addr = -+ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; -+ ArpTable *arptbl = &slirp->arp_table; -+ int i; -+ -+ DEBUG_CALL("arp_table_search"); -+ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); -+ -+ /* If broadcast address */ -+ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { -+ /* return Ethernet broadcast address */ -+ memset(out_ethaddr, 0xff, ETH_ALEN); -+ return 1; -+ } -+ -+ for (i = 0; i < ARP_TABLE_SIZE; i++) { -+ if (arptbl->table[i].ar_sip == ip_addr) { -+ memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); -+ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", -+ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], -+ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); -+ return 1; -+ } -+ } -+ -+ return 0; -+} -diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c -new file mode 100644 -index 0000000000..46e96810ab ---- /dev/null -+++ b/slirp/src/bootp.c -@@ -0,0 +1,369 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * QEMU BOOTP/DHCP server -+ * -+ * Copyright (c) 2004 Fabrice Bellard -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "slirp.h" -+ -+#if defined(_WIN32) -+/* Windows ntohl() returns an u_long value. -+ * Add a type cast to match the format strings. */ -+#define ntohl(n) ((uint32_t)ntohl(n)) -+#endif -+ -+/* XXX: only DHCP is supported */ -+ -+#define LEASE_TIME (24 * 3600) -+ -+static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; -+ -+#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) -+ -+static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, -+ const uint8_t *macaddr) -+{ -+ BOOTPClient *bc; -+ int i; -+ -+ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { -+ bc = &slirp->bootp_clients[i]; -+ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) -+ goto found; -+ } -+ return NULL; -+found: -+ bc = &slirp->bootp_clients[i]; -+ bc->allocated = 1; -+ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); -+ return bc; -+} -+ -+static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, -+ const uint8_t *macaddr) -+{ -+ uint32_t req_addr = ntohl(paddr->s_addr); -+ uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); -+ BOOTPClient *bc; -+ -+ if (req_addr >= dhcp_addr && req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { -+ bc = &slirp->bootp_clients[req_addr - dhcp_addr]; -+ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { -+ bc->allocated = 1; -+ return bc; -+ } -+ } -+ return NULL; -+} -+ -+static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, -+ const uint8_t *macaddr) -+{ -+ BOOTPClient *bc; -+ int i; -+ -+ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { -+ if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) -+ goto found; -+ } -+ return NULL; -+found: -+ bc = &slirp->bootp_clients[i]; -+ bc->allocated = 1; -+ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); -+ return bc; -+} -+ -+static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, -+ struct in_addr *preq_addr) -+{ -+ const uint8_t *p, *p_end; -+ int len, tag; -+ -+ *pmsg_type = 0; -+ preq_addr->s_addr = htonl(0L); -+ -+ p = bp->bp_vend; -+ p_end = p + DHCP_OPT_LEN; -+ if (memcmp(p, rfc1533_cookie, 4) != 0) -+ return; -+ p += 4; -+ while (p < p_end) { -+ tag = p[0]; -+ if (tag == RFC1533_PAD) { -+ p++; -+ } else if (tag == RFC1533_END) { -+ break; -+ } else { -+ p++; -+ if (p >= p_end) -+ break; -+ len = *p++; -+ if (p + len > p_end) { -+ break; -+ } -+ DPRINTF("dhcp: tag=%d len=%d\n", tag, len); -+ -+ switch (tag) { -+ case RFC2132_MSG_TYPE: -+ if (len >= 1) -+ *pmsg_type = p[0]; -+ break; -+ case RFC2132_REQ_ADDR: -+ if (len >= 4) { -+ memcpy(&(preq_addr->s_addr), p, 4); -+ } -+ break; -+ default: -+ break; -+ } -+ p += len; -+ } -+ } -+ if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && -+ bp->bp_ciaddr.s_addr) { -+ memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); -+ } -+} -+ -+static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) -+{ -+ BOOTPClient *bc = NULL; -+ struct mbuf *m; -+ struct bootp_t *rbp; -+ struct sockaddr_in saddr, daddr; -+ struct in_addr preq_addr; -+ int dhcp_msg_type, val; -+ uint8_t *q; -+ uint8_t *end; -+ uint8_t client_ethaddr[ETH_ALEN]; -+ -+ /* extract exact DHCP msg type */ -+ dhcp_decode(bp, &dhcp_msg_type, &preq_addr); -+ DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); -+ if (preq_addr.s_addr != htonl(0L)) -+ DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); -+ else { -+ DPRINTF("\n"); -+ } -+ -+ if (dhcp_msg_type == 0) -+ dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ -+ -+ if (dhcp_msg_type != DHCPDISCOVER && dhcp_msg_type != DHCPREQUEST) -+ return; -+ -+ /* Get client's hardware address from bootp request */ -+ memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); -+ -+ m = m_get(slirp); -+ if (!m) { -+ return; -+ } -+ m->m_data += IF_MAXLINKHDR; -+ rbp = (struct bootp_t *)m->m_data; -+ m->m_data += sizeof(struct udpiphdr); -+ memset(rbp, 0, sizeof(struct bootp_t)); -+ -+ if (dhcp_msg_type == DHCPDISCOVER) { -+ if (preq_addr.s_addr != htonl(0L)) { -+ bc = request_addr(slirp, &preq_addr, client_ethaddr); -+ if (bc) { -+ daddr.sin_addr = preq_addr; -+ } -+ } -+ if (!bc) { -+ new_addr: -+ bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); -+ if (!bc) { -+ DPRINTF("no address left\n"); -+ return; -+ } -+ } -+ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); -+ } else if (preq_addr.s_addr != htonl(0L)) { -+ bc = request_addr(slirp, &preq_addr, client_ethaddr); -+ if (bc) { -+ daddr.sin_addr = preq_addr; -+ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); -+ } else { -+ /* DHCPNAKs should be sent to broadcast */ -+ daddr.sin_addr.s_addr = 0xffffffff; -+ } -+ } else { -+ bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); -+ if (!bc) { -+ /* if never assigned, behaves as if it was already -+ assigned (windows fix because it remembers its address) */ -+ goto new_addr; -+ } -+ } -+ -+ /* Update ARP table for this IP address */ -+ arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); -+ -+ saddr.sin_addr = slirp->vhost_addr; -+ saddr.sin_port = htons(BOOTP_SERVER); -+ -+ daddr.sin_port = htons(BOOTP_CLIENT); -+ -+ rbp->bp_op = BOOTP_REPLY; -+ rbp->bp_xid = bp->bp_xid; -+ rbp->bp_htype = 1; -+ rbp->bp_hlen = 6; -+ memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); -+ -+ rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ -+ rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ -+ -+ q = rbp->bp_vend; -+ end = (uint8_t *)&rbp[1]; -+ memcpy(q, rfc1533_cookie, 4); -+ q += 4; -+ -+ if (bc) { -+ DPRINTF("%s addr=%08" PRIx32 "\n", -+ (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", -+ ntohl(daddr.sin_addr.s_addr)); -+ -+ if (dhcp_msg_type == DHCPDISCOVER) { -+ *q++ = RFC2132_MSG_TYPE; -+ *q++ = 1; -+ *q++ = DHCPOFFER; -+ } else /* DHCPREQUEST */ { -+ *q++ = RFC2132_MSG_TYPE; -+ *q++ = 1; -+ *q++ = DHCPACK; -+ } -+ -+ if (slirp->bootp_filename) { -+ g_assert(strlen(slirp->bootp_filename) < sizeof(rbp->bp_file)); -+ strcpy(rbp->bp_file, slirp->bootp_filename); -+ } -+ -+ *q++ = RFC2132_SRV_ID; -+ *q++ = 4; -+ memcpy(q, &saddr.sin_addr, 4); -+ q += 4; -+ -+ *q++ = RFC1533_NETMASK; -+ *q++ = 4; -+ memcpy(q, &slirp->vnetwork_mask, 4); -+ q += 4; -+ -+ if (!slirp->restricted) { -+ *q++ = RFC1533_GATEWAY; -+ *q++ = 4; -+ memcpy(q, &saddr.sin_addr, 4); -+ q += 4; -+ -+ *q++ = RFC1533_DNS; -+ *q++ = 4; -+ memcpy(q, &slirp->vnameserver_addr, 4); -+ q += 4; -+ } -+ -+ *q++ = RFC2132_LEASE_TIME; -+ *q++ = 4; -+ val = htonl(LEASE_TIME); -+ memcpy(q, &val, 4); -+ q += 4; -+ -+ if (*slirp->client_hostname) { -+ val = strlen(slirp->client_hostname); -+ if (q + val + 2 >= end) { -+ g_warning("DHCP packet size exceeded, " -+ "omitting host name option."); -+ } else { -+ *q++ = RFC1533_HOSTNAME; -+ *q++ = val; -+ memcpy(q, slirp->client_hostname, val); -+ q += val; -+ } -+ } -+ -+ if (slirp->vdomainname) { -+ val = strlen(slirp->vdomainname); -+ if (q + val + 2 >= end) { -+ g_warning("DHCP packet size exceeded, " -+ "omitting domain name option."); -+ } else { -+ *q++ = RFC1533_DOMAINNAME; -+ *q++ = val; -+ memcpy(q, slirp->vdomainname, val); -+ q += val; -+ } -+ } -+ -+ if (slirp->tftp_server_name) { -+ val = strlen(slirp->tftp_server_name); -+ if (q + val + 2 >= end) { -+ g_warning("DHCP packet size exceeded, " -+ "omitting tftp-server-name option."); -+ } else { -+ *q++ = RFC2132_TFTP_SERVER_NAME; -+ *q++ = val; -+ memcpy(q, slirp->tftp_server_name, val); -+ q += val; -+ } -+ } -+ -+ if (slirp->vdnssearch) { -+ val = slirp->vdnssearch_len; -+ if (q + val >= end) { -+ g_warning("DHCP packet size exceeded, " -+ "omitting domain-search option."); -+ } else { -+ memcpy(q, slirp->vdnssearch, val); -+ q += val; -+ } -+ } -+ } else { -+ static const char nak_msg[] = "requested address not available"; -+ -+ DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); -+ -+ *q++ = RFC2132_MSG_TYPE; -+ *q++ = 1; -+ *q++ = DHCPNAK; -+ -+ *q++ = RFC2132_MESSAGE; -+ *q++ = sizeof(nak_msg) - 1; -+ memcpy(q, nak_msg, sizeof(nak_msg) - 1); -+ q += sizeof(nak_msg) - 1; -+ } -+ assert(q < end); -+ *q = RFC1533_END; -+ -+ daddr.sin_addr.s_addr = 0xffffffffu; -+ -+ m->m_len = sizeof(struct bootp_t) - sizeof(struct ip) - sizeof(struct udphdr); -+ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); -+} -+ -+void bootp_input(struct mbuf *m) -+{ -+ struct bootp_t *bp = mtod(m, struct bootp_t *); -+ -+ if (bp->bp_op == BOOTP_REQUEST) { -+ bootp_reply(m->slirp, bp); -+ } -+} -diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h -new file mode 100644 -index 0000000000..a57fa51bcb ---- /dev/null -+++ b/slirp/src/bootp.h -@@ -0,0 +1,129 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* bootp/dhcp defines */ -+ -+#ifndef SLIRP_BOOTP_H -+#define SLIRP_BOOTP_H -+ -+#define BOOTP_SERVER 67 -+#define BOOTP_CLIENT 68 -+ -+#define BOOTP_REQUEST 1 -+#define BOOTP_REPLY 2 -+ -+#define RFC1533_COOKIE 99, 130, 83, 99 -+#define RFC1533_PAD 0 -+#define RFC1533_NETMASK 1 -+#define RFC1533_TIMEOFFSET 2 -+#define RFC1533_GATEWAY 3 -+#define RFC1533_TIMESERVER 4 -+#define RFC1533_IEN116NS 5 -+#define RFC1533_DNS 6 -+#define RFC1533_LOGSERVER 7 -+#define RFC1533_COOKIESERVER 8 -+#define RFC1533_LPRSERVER 9 -+#define RFC1533_IMPRESSSERVER 10 -+#define RFC1533_RESOURCESERVER 11 -+#define RFC1533_HOSTNAME 12 -+#define RFC1533_BOOTFILESIZE 13 -+#define RFC1533_MERITDUMPFILE 14 -+#define RFC1533_DOMAINNAME 15 -+#define RFC1533_SWAPSERVER 16 -+#define RFC1533_ROOTPATH 17 -+#define RFC1533_EXTENSIONPATH 18 -+#define RFC1533_IPFORWARDING 19 -+#define RFC1533_IPSOURCEROUTING 20 -+#define RFC1533_IPPOLICYFILTER 21 -+#define RFC1533_IPMAXREASSEMBLY 22 -+#define RFC1533_IPTTL 23 -+#define RFC1533_IPMTU 24 -+#define RFC1533_IPMTUPLATEAU 25 -+#define RFC1533_INTMTU 26 -+#define RFC1533_INTLOCALSUBNETS 27 -+#define RFC1533_INTBROADCAST 28 -+#define RFC1533_INTICMPDISCOVER 29 -+#define RFC1533_INTICMPRESPOND 30 -+#define RFC1533_INTROUTEDISCOVER 31 -+#define RFC1533_INTROUTESOLICIT 32 -+#define RFC1533_INTSTATICROUTES 33 -+#define RFC1533_LLTRAILERENCAP 34 -+#define RFC1533_LLARPCACHETMO 35 -+#define RFC1533_LLETHERNETENCAP 36 -+#define RFC1533_TCPTTL 37 -+#define RFC1533_TCPKEEPALIVETMO 38 -+#define RFC1533_TCPKEEPALIVEGB 39 -+#define RFC1533_NISDOMAIN 40 -+#define RFC1533_NISSERVER 41 -+#define RFC1533_NTPSERVER 42 -+#define RFC1533_VENDOR 43 -+#define RFC1533_NBNS 44 -+#define RFC1533_NBDD 45 -+#define RFC1533_NBNT 46 -+#define RFC1533_NBSCOPE 47 -+#define RFC1533_XFS 48 -+#define RFC1533_XDM 49 -+ -+#define RFC2132_REQ_ADDR 50 -+#define RFC2132_LEASE_TIME 51 -+#define RFC2132_MSG_TYPE 53 -+#define RFC2132_SRV_ID 54 -+#define RFC2132_PARAM_LIST 55 -+#define RFC2132_MESSAGE 56 -+#define RFC2132_MAX_SIZE 57 -+#define RFC2132_RENEWAL_TIME 58 -+#define RFC2132_REBIND_TIME 59 -+#define RFC2132_TFTP_SERVER_NAME 66 -+ -+#define DHCPDISCOVER 1 -+#define DHCPOFFER 2 -+#define DHCPREQUEST 3 -+#define DHCPACK 5 -+#define DHCPNAK 6 -+ -+#define RFC1533_VENDOR_MAJOR 0 -+#define RFC1533_VENDOR_MINOR 0 -+ -+#define RFC1533_VENDOR_MAGIC 128 -+#define RFC1533_VENDOR_ADDPARM 129 -+#define RFC1533_VENDOR_ETHDEV 130 -+#define RFC1533_VENDOR_HOWTO 132 -+#define RFC1533_VENDOR_MNUOPTS 160 -+#define RFC1533_VENDOR_SELECTION 176 -+#define RFC1533_VENDOR_MOTD 184 -+#define RFC1533_VENDOR_NUMOFMOTD 8 -+#define RFC1533_VENDOR_IMG 192 -+#define RFC1533_VENDOR_NUMOFIMG 16 -+ -+#define RFC1533_END 255 -+#define BOOTP_VENDOR_LEN 64 -+#define DHCP_OPT_LEN 312 -+ -+struct bootp_t { -+ struct ip ip; -+ struct udphdr udp; -+ uint8_t bp_op; -+ uint8_t bp_htype; -+ uint8_t bp_hlen; -+ uint8_t bp_hops; -+ uint32_t bp_xid; -+ uint16_t bp_secs; -+ uint16_t unused; -+ struct in_addr bp_ciaddr; -+ struct in_addr bp_yiaddr; -+ struct in_addr bp_siaddr; -+ struct in_addr bp_giaddr; -+ uint8_t bp_hwaddr[16]; -+ uint8_t bp_sname[64]; -+ char bp_file[128]; -+ uint8_t bp_vend[DHCP_OPT_LEN]; -+}; -+ -+typedef struct { -+ uint16_t allocated; -+ uint8_t macaddr[6]; -+} BOOTPClient; -+ -+#define NB_BOOTP_CLIENTS 16 -+ -+void bootp_input(struct mbuf *m); -+ -+#endif -diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c -new file mode 100644 -index 0000000000..4d08380a4e ---- /dev/null -+++ b/slirp/src/cksum.c -@@ -0,0 +1,179 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1988, 1992, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 -+ * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp -+ */ -+ -+#include "slirp.h" -+ -+/* -+ * Checksum routine for Internet Protocol family headers (Portable Version). -+ * -+ * This routine is very heavily used in the network -+ * code and should be modified for each CPU to be as fast as possible. -+ * -+ * XXX Since we will never span more than 1 mbuf, we can optimise this -+ */ -+ -+#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) -+#define REDUCE \ -+ { \ -+ l_util.l = sum; \ -+ sum = l_util.s[0] + l_util.s[1]; \ -+ (void)ADDCARRY(sum); \ -+ } -+ -+int cksum(struct mbuf *m, int len) -+{ -+ register uint16_t *w; -+ register int sum = 0; -+ register int mlen = 0; -+ int byte_swapped = 0; -+ -+ union { -+ uint8_t c[2]; -+ uint16_t s; -+ } s_util; -+ union { -+ uint16_t s[2]; -+ uint32_t l; -+ } l_util; -+ -+ if (m->m_len == 0) -+ goto cont; -+ w = mtod(m, uint16_t *); -+ -+ mlen = m->m_len; -+ -+ if (len < mlen) -+ mlen = len; -+ len -= mlen; -+ /* -+ * Force to even boundary. -+ */ -+ if ((1 & (uintptr_t)w) && (mlen > 0)) { -+ REDUCE; -+ sum <<= 8; -+ s_util.c[0] = *(uint8_t *)w; -+ w = (uint16_t *)((int8_t *)w + 1); -+ mlen--; -+ byte_swapped = 1; -+ } -+ /* -+ * Unroll the loop to make overhead from -+ * branches &c small. -+ */ -+ while ((mlen -= 32) >= 0) { -+ sum += w[0]; -+ sum += w[1]; -+ sum += w[2]; -+ sum += w[3]; -+ sum += w[4]; -+ sum += w[5]; -+ sum += w[6]; -+ sum += w[7]; -+ sum += w[8]; -+ sum += w[9]; -+ sum += w[10]; -+ sum += w[11]; -+ sum += w[12]; -+ sum += w[13]; -+ sum += w[14]; -+ sum += w[15]; -+ w += 16; -+ } -+ mlen += 32; -+ while ((mlen -= 8) >= 0) { -+ sum += w[0]; -+ sum += w[1]; -+ sum += w[2]; -+ sum += w[3]; -+ w += 4; -+ } -+ mlen += 8; -+ if (mlen == 0 && byte_swapped == 0) -+ goto cont; -+ REDUCE; -+ while ((mlen -= 2) >= 0) { -+ sum += *w++; -+ } -+ -+ if (byte_swapped) { -+ REDUCE; -+ sum <<= 8; -+ if (mlen == -1) { -+ s_util.c[1] = *(uint8_t *)w; -+ sum += s_util.s; -+ mlen = 0; -+ } else -+ -+ mlen = -1; -+ } else if (mlen == -1) -+ s_util.c[0] = *(uint8_t *)w; -+ -+cont: -+ if (len) { -+ DEBUG_ERROR("cksum: out of data"); -+ DEBUG_ERROR(" len = %d", len); -+ } -+ if (mlen == -1) { -+ /* The last mbuf has odd # of bytes. Follow the -+ standard (the odd byte may be shifted left by 8 bits -+ or not as determined by endian-ness of the machine) */ -+ s_util.c[1] = 0; -+ sum += s_util.s; -+ } -+ REDUCE; -+ return (~sum & 0xffff); -+} -+ -+int ip6_cksum(struct mbuf *m) -+{ -+ /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum -+ * separately from the mbuf */ -+ struct ip6 save_ip, *ip = mtod(m, struct ip6 *); -+ struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); -+ int sum; -+ -+ save_ip = *ip; -+ -+ ih->ih_src = save_ip.ip_src; -+ ih->ih_dst = save_ip.ip_dst; -+ ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); -+ ih->ih_zero_hi = 0; -+ ih->ih_zero_lo = 0; -+ ih->ih_nh = save_ip.ip_nh; -+ -+ sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + ntohl(ih->ih_pl)); -+ -+ *ip = save_ip; -+ -+ return sum; -+} -diff --git a/slirp/src/debug.h b/slirp/src/debug.h -new file mode 100644 -index 0000000000..47712bd78b ---- /dev/null -+++ b/slirp/src/debug.h -@@ -0,0 +1,51 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef DEBUG_H_ -+#define DEBUG_H_ -+ -+#define DBG_CALL (1 << 0) -+#define DBG_MISC (1 << 1) -+#define DBG_ERROR (1 << 2) -+#define DBG_TFTP (1 << 3) -+ -+extern int slirp_debug; -+ -+#define DEBUG_CALL(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ -+ g_debug(fmt "...", ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define DEBUG_ARG(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ -+ g_debug(" " fmt, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define DEBUG_MISC(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ -+ g_debug(fmt, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define DEBUG_ERROR(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ -+ g_debug(fmt, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#define DEBUG_TFTP(fmt, ...) \ -+ do { \ -+ if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ -+ g_debug(fmt, ##__VA_ARGS__); \ -+ } \ -+ } while (0) -+ -+#endif /* DEBUG_H_ */ -diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c -new file mode 100644 -index 0000000000..77b451b910 ---- /dev/null -+++ b/slirp/src/dhcpv6.c -@@ -0,0 +1,224 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * SLIRP stateless DHCPv6 -+ * -+ * We only support stateless DHCPv6, e.g. for network booting. -+ * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. -+ * -+ * Copyright 2016 Thomas Huth, Red Hat Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include "slirp.h" -+#include "dhcpv6.h" -+ -+/* DHCPv6 message types */ -+#define MSGTYPE_REPLY 7 -+#define MSGTYPE_INFO_REQUEST 11 -+ -+/* DHCPv6 option types */ -+#define OPTION_CLIENTID 1 -+#define OPTION_IAADDR 5 -+#define OPTION_ORO 6 -+#define OPTION_DNS_SERVERS 23 -+#define OPTION_BOOTFILE_URL 59 -+ -+struct requested_infos { -+ uint8_t *client_id; -+ int client_id_len; -+ bool want_dns; -+ bool want_boot_url; -+}; -+ -+/** -+ * Analyze the info request message sent by the client to see what data it -+ * provided and what it wants to have. The information is gathered in the -+ * "requested_infos" struct. Note that client_id (if provided) points into -+ * the odata region, thus the caller must keep odata valid as long as it -+ * needs to access the requested_infos struct. -+ */ -+static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, -+ struct requested_infos *ri) -+{ -+ int i, req_opt; -+ -+ while (olen > 4) { -+ /* Parse one option */ -+ int option = odata[0] << 8 | odata[1]; -+ int len = odata[2] << 8 | odata[3]; -+ -+ if (len + 4 > olen) { -+ slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", -+ slirp->opaque); -+ return -E2BIG; -+ } -+ -+ switch (option) { -+ case OPTION_IAADDR: -+ /* According to RFC3315, we must discard requests with IA option */ -+ return -EINVAL; -+ case OPTION_CLIENTID: -+ if (len > 256) { -+ /* Avoid very long IDs which could cause problems later */ -+ return -E2BIG; -+ } -+ ri->client_id = odata + 4; -+ ri->client_id_len = len; -+ break; -+ case OPTION_ORO: /* Option request option */ -+ if (len & 1) { -+ return -EINVAL; -+ } -+ /* Check which options the client wants to have */ -+ for (i = 0; i < len; i += 2) { -+ req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; -+ switch (req_opt) { -+ case OPTION_DNS_SERVERS: -+ ri->want_dns = true; -+ break; -+ case OPTION_BOOTFILE_URL: -+ ri->want_boot_url = true; -+ break; -+ default: -+ DEBUG_MISC("dhcpv6: Unsupported option request %d", -+ req_opt); -+ } -+ } -+ break; -+ default: -+ DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", option, -+ len); -+ } -+ -+ odata += len + 4; -+ olen -= len + 4; -+ } -+ -+ return 0; -+} -+ -+ -+/** -+ * Handle information request messages -+ */ -+static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, -+ uint32_t xid, uint8_t *odata, int olen) -+{ -+ struct requested_infos ri = { NULL }; -+ struct sockaddr_in6 sa6, da6; -+ struct mbuf *m; -+ uint8_t *resp; -+ -+ if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { -+ return; -+ } -+ -+ m = m_get(slirp); -+ if (!m) { -+ return; -+ } -+ memset(m->m_data, 0, m->m_size); -+ m->m_data += IF_MAXLINKHDR; -+ resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); -+ -+ /* Fill in response */ -+ *resp++ = MSGTYPE_REPLY; -+ *resp++ = (uint8_t)(xid >> 16); -+ *resp++ = (uint8_t)(xid >> 8); -+ *resp++ = (uint8_t)xid; -+ -+ if (ri.client_id) { -+ *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ -+ *resp++ = OPTION_CLIENTID; /* option-code low byte */ -+ *resp++ = ri.client_id_len >> 8; /* option-len high byte */ -+ *resp++ = ri.client_id_len; /* option-len low byte */ -+ memcpy(resp, ri.client_id, ri.client_id_len); -+ resp += ri.client_id_len; -+ } -+ if (ri.want_dns) { -+ *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ -+ *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ -+ *resp++ = 0; /* option-len high byte */ -+ *resp++ = 16; /* option-len low byte */ -+ memcpy(resp, &slirp->vnameserver_addr6, 16); -+ resp += 16; -+ } -+ if (ri.want_boot_url) { -+ uint8_t *sa = slirp->vhost_addr6.s6_addr; -+ int slen, smaxlen; -+ -+ *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ -+ *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ -+ smaxlen = (uint8_t *)m->m_data + slirp->if_mtu - (resp + 2); -+ slen = slirp_fmt((char *)resp + 2, smaxlen, -+ "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" -+ "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", -+ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], -+ sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], -+ sa[15], slirp->bootp_filename); -+ *resp++ = slen >> 8; /* option-len high byte */ -+ *resp++ = slen; /* option-len low byte */ -+ resp += slen; -+ } -+ -+ sa6.sin6_addr = slirp->vhost_addr6; -+ sa6.sin6_port = DHCPV6_SERVER_PORT; -+ da6.sin6_addr = srcsas->sin6_addr; -+ da6.sin6_port = srcsas->sin6_port; -+ m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); -+ m->m_len = resp - (uint8_t *)m->m_data; -+ udp6_output(NULL, m, &sa6, &da6); -+} -+ -+/** -+ * Handle DHCPv6 messages sent by the client -+ */ -+void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) -+{ -+ uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); -+ int data_len = m->m_len - sizeof(struct udphdr); -+ uint32_t xid; -+ -+ if (data_len < 4) { -+ return; -+ } -+ -+ xid = ntohl(*(uint32_t *)data) & 0xffffff; -+ -+ switch (data[0]) { -+ case MSGTYPE_INFO_REQUEST: -+ dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); -+ break; -+ default: -+ DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); -+ } -+} -diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h -new file mode 100644 -index 0000000000..d12c49b36c ---- /dev/null -+++ b/slirp/src/dhcpv6.h -@@ -0,0 +1,68 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Definitions and prototypes for SLIRP stateless DHCPv6 -+ * -+ * Copyright 2016 Thomas Huth, Red Hat Inc. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef SLIRP_DHCPV6_H -+#define SLIRP_DHCPV6_H -+ -+#define DHCPV6_SERVER_PORT 547 -+ -+#define ALLDHCP_MULTICAST \ -+ { \ -+ .s6_addr = { \ -+ 0xff, \ -+ 0x02, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x01, \ -+ 0x00, \ -+ 0x02 \ -+ } \ -+ } -+ -+#define in6_dhcp_multicast(a) in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) -+ -+void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); -+ -+#endif -diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c -new file mode 100644 -index 0000000000..55497e860e ---- /dev/null -+++ b/slirp/src/dnssearch.c -@@ -0,0 +1,306 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * Domain search option for DHCP (RFC 3397) -+ * -+ * Copyright (c) 2012 Klaus Stengel -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "slirp.h" -+ -+static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; -+static const uint8_t MAX_OPT_LEN = 255; -+static const uint8_t OPT_HEADER_LEN = 2; -+static const uint8_t REFERENCE_LEN = 2; -+ -+struct compact_domain; -+ -+typedef struct compact_domain { -+ struct compact_domain *self; -+ struct compact_domain *refdom; -+ uint8_t *labels; -+ size_t len; -+ size_t common_octets; -+} CompactDomain; -+ -+static size_t domain_suffix_diffoff(const CompactDomain *a, -+ const CompactDomain *b) -+{ -+ size_t la = a->len, lb = b->len; -+ uint8_t *da = a->labels + la, *db = b->labels + lb; -+ size_t i, lm = (la < lb) ? la : lb; -+ -+ for (i = 0; i < lm; i++) { -+ da--; -+ db--; -+ if (*da != *db) { -+ break; -+ } -+ } -+ return i; -+} -+ -+static int domain_suffix_ord(const void *cva, const void *cvb) -+{ -+ const CompactDomain *a = cva, *b = cvb; -+ size_t la = a->len, lb = b->len; -+ size_t doff = domain_suffix_diffoff(a, b); -+ uint8_t ca = a->labels[la - doff]; -+ uint8_t cb = b->labels[lb - doff]; -+ -+ if (ca < cb) { -+ return -1; -+ } -+ if (ca > cb) { -+ return 1; -+ } -+ if (la < lb) { -+ return -1; -+ } -+ if (la > lb) { -+ return 1; -+ } -+ return 0; -+} -+ -+static size_t domain_common_label(CompactDomain *a, CompactDomain *b) -+{ -+ size_t res, doff = domain_suffix_diffoff(a, b); -+ uint8_t *first_eq_pos = a->labels + (a->len - doff); -+ uint8_t *label = a->labels; -+ -+ while (*label && label < first_eq_pos) { -+ label += *label + 1; -+ } -+ res = a->len - (label - a->labels); -+ /* only report if it can help to reduce the packet size */ -+ return (res > REFERENCE_LEN) ? res : 0; -+} -+ -+static void domain_fixup_order(CompactDomain *cd, size_t n) -+{ -+ size_t i; -+ -+ for (i = 0; i < n; i++) { -+ CompactDomain *cur = cd + i, *next = cd[i].self; -+ -+ while (!cur->common_octets) { -+ CompactDomain *tmp = next->self; /* backup target value */ -+ -+ next->self = cur; -+ cur->common_octets++; -+ -+ cur = next; -+ next = tmp; -+ } -+ } -+} -+ -+static void domain_mklabels(CompactDomain *cd, const char *input) -+{ -+ uint8_t *len_marker = cd->labels; -+ uint8_t *output = len_marker; /* pre-incremented */ -+ const char *in = input; -+ char cur_chr; -+ size_t len = 0; -+ -+ if (cd->len == 0) { -+ goto fail; -+ } -+ cd->len++; -+ -+ do { -+ cur_chr = *in++; -+ if (cur_chr == '.' || cur_chr == '\0') { -+ len = output - len_marker; -+ if ((len == 0 && cur_chr == '.') || len >= 64) { -+ goto fail; -+ } -+ *len_marker = len; -+ -+ output++; -+ len_marker = output; -+ } else { -+ output++; -+ *output = cur_chr; -+ } -+ } while (cur_chr != '\0'); -+ -+ /* ensure proper zero-termination */ -+ if (len != 0) { -+ *len_marker = 0; -+ cd->len++; -+ } -+ return; -+ -+fail: -+ g_warning("failed to parse domain name '%s'\n", input); -+ cd->len = 0; -+} -+ -+static void domain_mkxrefs(CompactDomain *doms, CompactDomain *last, -+ size_t depth) -+{ -+ CompactDomain *i = doms, *target = doms; -+ -+ do { -+ if (i->labels < target->labels) { -+ target = i; -+ } -+ } while (i++ != last); -+ -+ for (i = doms; i != last; i++) { -+ CompactDomain *group_last; -+ size_t next_depth; -+ -+ if (i->common_octets == depth) { -+ continue; -+ } -+ -+ next_depth = -1; -+ for (group_last = i; group_last != last; group_last++) { -+ size_t co = group_last->common_octets; -+ if (co <= depth) { -+ break; -+ } -+ if (co < next_depth) { -+ next_depth = co; -+ } -+ } -+ domain_mkxrefs(i, group_last, next_depth); -+ -+ i = group_last; -+ if (i == last) { -+ break; -+ } -+ } -+ -+ if (depth == 0) { -+ return; -+ } -+ -+ i = doms; -+ do { -+ if (i != target && i->refdom == NULL) { -+ i->refdom = target; -+ i->common_octets = depth; -+ } -+ } while (i++ != last); -+} -+ -+static size_t domain_compactify(CompactDomain *domains, size_t n) -+{ -+ uint8_t *start = domains->self->labels, *outptr = start; -+ size_t i; -+ -+ for (i = 0; i < n; i++) { -+ CompactDomain *cd = domains[i].self; -+ CompactDomain *rd = cd->refdom; -+ -+ if (rd != NULL) { -+ size_t moff = (rd->labels - start) + (rd->len - cd->common_octets); -+ if (moff < 0x3FFFu) { -+ cd->len -= cd->common_octets - 2; -+ cd->labels[cd->len - 1] = moff & 0xFFu; -+ cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); -+ } -+ } -+ -+ if (cd->labels != outptr) { -+ memmove(outptr, cd->labels, cd->len); -+ cd->labels = outptr; -+ } -+ outptr += cd->len; -+ } -+ return outptr - start; -+} -+ -+int translate_dnssearch(Slirp *s, const char **names) -+{ -+ size_t blocks, bsrc_start, bsrc_end, bdst_start; -+ size_t i, num_domains, memreq = 0; -+ uint8_t *result = NULL, *outptr; -+ CompactDomain *domains = NULL; -+ -+ num_domains = g_strv_length((GStrv)(void *)names); -+ if (num_domains == 0) { -+ return -2; -+ } -+ -+ domains = g_malloc(num_domains * sizeof(*domains)); -+ -+ for (i = 0; i < num_domains; i++) { -+ size_t nlen = strlen(names[i]); -+ memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ -+ domains[i].self = domains + i; -+ domains[i].len = nlen; -+ domains[i].common_octets = 0; -+ domains[i].refdom = NULL; -+ } -+ -+ /* reserve extra 2 header bytes for each 255 bytes of output */ -+ memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; -+ result = g_malloc(memreq * sizeof(*result)); -+ -+ outptr = result; -+ for (i = 0; i < num_domains; i++) { -+ domains[i].labels = outptr; -+ domain_mklabels(domains + i, names[i]); -+ outptr += domains[i].len; -+ } -+ -+ if (outptr == result) { -+ g_free(domains); -+ g_free(result); -+ return -1; -+ } -+ -+ qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); -+ domain_fixup_order(domains, num_domains); -+ -+ for (i = 1; i < num_domains; i++) { -+ size_t cl = domain_common_label(domains + i - 1, domains + i); -+ domains[i - 1].common_octets = cl; -+ } -+ -+ domain_mkxrefs(domains, domains + num_domains - 1, 0); -+ memreq = domain_compactify(domains, num_domains); -+ -+ blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); -+ bsrc_end = memreq; -+ bsrc_start = (blocks - 1) * MAX_OPT_LEN; -+ bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; -+ memreq += blocks * OPT_HEADER_LEN; -+ -+ while (blocks--) { -+ size_t len = bsrc_end - bsrc_start; -+ memmove(result + bdst_start, result + bsrc_start, len); -+ result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; -+ result[bdst_start - 1] = len; -+ bsrc_end = bsrc_start; -+ bsrc_start -= MAX_OPT_LEN; -+ bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; -+ } -+ -+ g_free(domains); -+ s->vdnssearch = result; -+ s->vdnssearch_len = memreq; -+ return 0; -+} -diff --git a/slirp/src/if.c b/slirp/src/if.c -new file mode 100644 -index 0000000000..23190b5593 ---- /dev/null -+++ b/slirp/src/if.c -@@ -0,0 +1,213 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+static void ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) -+{ -+ ifm->ifs_next = ifmhead->ifs_next; -+ ifmhead->ifs_next = ifm; -+ ifm->ifs_prev = ifmhead; -+ ifm->ifs_next->ifs_prev = ifm; -+} -+ -+static void ifs_remque(struct mbuf *ifm) -+{ -+ ifm->ifs_prev->ifs_next = ifm->ifs_next; -+ ifm->ifs_next->ifs_prev = ifm->ifs_prev; -+} -+ -+void if_init(Slirp *slirp) -+{ -+ slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; -+ slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; -+} -+ -+/* -+ * if_output: Queue packet into an output queue. -+ * There are 2 output queue's, if_fastq and if_batchq. -+ * Each output queue is a doubly linked list of double linked lists -+ * of mbufs, each list belonging to one "session" (socket). This -+ * way, we can output packets fairly by sending one packet from each -+ * session, instead of all the packets from one session, then all packets -+ * from the next session, etc. Packets on the if_fastq get absolute -+ * priority, but if one session hogs the link, it gets "downgraded" -+ * to the batchq until it runs out of packets, then it'll return -+ * to the fastq (eg. if the user does an ls -alR in a telnet session, -+ * it'll temporarily get downgraded to the batchq) -+ */ -+void if_output(struct socket *so, struct mbuf *ifm) -+{ -+ Slirp *slirp = ifm->slirp; -+ struct mbuf *ifq; -+ int on_fastq = 1; -+ -+ DEBUG_CALL("if_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("ifm = %p", ifm); -+ -+ /* -+ * First remove the mbuf from m_usedlist, -+ * since we're gonna use m_next and m_prev ourselves -+ * XXX Shouldn't need this, gotta change dtom() etc. -+ */ -+ if (ifm->m_flags & M_USEDLIST) { -+ remque(ifm); -+ ifm->m_flags &= ~M_USEDLIST; -+ } -+ -+ /* -+ * See if there's already a batchq list for this session. -+ * This can include an interactive session, which should go on fastq, -+ * but gets too greedy... hence it'll be downgraded from fastq to batchq. -+ * We mustn't put this packet back on the fastq (or we'll send it out of -+ * order) -+ * XXX add cache here? -+ */ -+ if (so) { -+ for (ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; -+ (struct quehead *)ifq != &slirp->if_batchq; ifq = ifq->ifq_prev) { -+ if (so == ifq->ifq_so) { -+ /* A match! */ -+ ifm->ifq_so = so; -+ ifs_insque(ifm, ifq->ifs_prev); -+ goto diddit; -+ } -+ } -+ } -+ -+ /* No match, check which queue to put it on */ -+ if (so && (so->so_iptos & IPTOS_LOWDELAY)) { -+ ifq = (struct mbuf *)slirp->if_fastq.qh_rlink; -+ on_fastq = 1; -+ /* -+ * Check if this packet is a part of the last -+ * packet's session -+ */ -+ if (ifq->ifq_so == so) { -+ ifm->ifq_so = so; -+ ifs_insque(ifm, ifq->ifs_prev); -+ goto diddit; -+ } -+ } else { -+ ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; -+ } -+ -+ /* Create a new doubly linked list for this session */ -+ ifm->ifq_so = so; -+ ifs_init(ifm); -+ insque(ifm, ifq); -+ -+diddit: -+ if (so) { -+ /* Update *_queued */ -+ so->so_queued++; -+ so->so_nqueued++; -+ /* -+ * Check if the interactive session should be downgraded to -+ * the batchq. A session is downgraded if it has queued 6 -+ * packets without pausing, and at least 3 of those packets -+ * have been sent over the link -+ * (XXX These are arbitrary numbers, probably not optimal..) -+ */ -+ if (on_fastq && -+ ((so->so_nqueued >= 6) && (so->so_nqueued - so->so_queued) >= 3)) { -+ /* Remove from current queue... */ -+ remque(ifm->ifs_next); -+ -+ /* ...And insert in the new. That'll teach ya! */ -+ insque(ifm->ifs_next, &slirp->if_batchq); -+ } -+ } -+ -+ /* -+ * This prevents us from malloc()ing too many mbufs -+ */ -+ if_start(ifm->slirp); -+} -+ -+/* -+ * Send one packet from each session. -+ * If there are packets on the fastq, they are sent FIFO, before -+ * everything else. Then we choose the first packet from each -+ * batchq session (socket) and send it. -+ * For example, if there are 3 ftp sessions fighting for bandwidth, -+ * one packet will be sent from the first session, then one packet -+ * from the second session, then one packet from the third. -+ */ -+void if_start(Slirp *slirp) -+{ -+ uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); -+ bool from_batchq = false; -+ struct mbuf *ifm, *ifm_next, *ifqt; -+ -+ DEBUG_CALL("if_start"); -+ -+ if (slirp->if_start_busy) { -+ return; -+ } -+ slirp->if_start_busy = true; -+ -+ struct mbuf *batch_head = NULL; -+ if (slirp->if_batchq.qh_link != &slirp->if_batchq) { -+ batch_head = (struct mbuf *)slirp->if_batchq.qh_link; -+ } -+ -+ if (slirp->if_fastq.qh_link != &slirp->if_fastq) { -+ ifm_next = (struct mbuf *)slirp->if_fastq.qh_link; -+ } else if (batch_head) { -+ /* Nothing on fastq, pick up from batchq */ -+ ifm_next = batch_head; -+ from_batchq = true; -+ } else { -+ ifm_next = NULL; -+ } -+ -+ while (ifm_next) { -+ ifm = ifm_next; -+ -+ ifm_next = ifm->ifq_next; -+ if ((struct quehead *)ifm_next == &slirp->if_fastq) { -+ /* No more packets in fastq, switch to batchq */ -+ ifm_next = batch_head; -+ from_batchq = true; -+ } -+ if ((struct quehead *)ifm_next == &slirp->if_batchq) { -+ /* end of batchq */ -+ ifm_next = NULL; -+ } -+ -+ /* Try to send packet unless it already expired */ -+ if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { -+ /* Packet is delayed due to pending ARP or NDP resolution */ -+ continue; -+ } -+ -+ /* Remove it from the queue */ -+ ifqt = ifm->ifq_prev; -+ remque(ifm); -+ -+ /* If there are more packets for this session, re-queue them */ -+ if (ifm->ifs_next != ifm) { -+ struct mbuf *next = ifm->ifs_next; -+ -+ insque(next, ifqt); -+ ifs_remque(ifm); -+ if (!from_batchq) { -+ ifm_next = next; -+ } -+ } -+ -+ /* Update so_queued */ -+ if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { -+ /* If there's no more queued, reset nqueued */ -+ ifm->ifq_so->so_nqueued = 0; -+ } -+ -+ m_free(ifm); -+ } -+ -+ slirp->if_start_busy = false; -+} -diff --git a/slirp/src/if.h b/slirp/src/if.h -new file mode 100644 -index 0000000000..7cf9d2750e ---- /dev/null -+++ b/slirp/src/if.h -@@ -0,0 +1,25 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef IF_H -+#define IF_H -+ -+#define IF_COMPRESS 0x01 /* We want compression */ -+#define IF_NOCOMPRESS 0x02 /* Do not do compression */ -+#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ -+#define IF_NOCIDCOMP 0x08 /* CID compression */ -+ -+#define IF_MTU_DEFAULT 1500 -+#define IF_MTU_MIN 68 -+#define IF_MTU_MAX 65521 -+#define IF_MRU_DEFAULT 1500 -+#define IF_MRU_MIN 68 -+#define IF_MRU_MAX 65521 -+#define IF_COMP IF_AUTOCOMP /* Flags for compression */ -+ -+/* 2 for alignment, 14 for ethernet */ -+#define IF_MAXLINKHDR (2 + ETH_HLEN) -+ -+#endif -diff --git a/slirp/src/ip.h b/slirp/src/ip.h -new file mode 100644 -index 0000000000..e5d4aa8a6d ---- /dev/null -+++ b/slirp/src/ip.h -@@ -0,0 +1,242 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip.h 8.1 (Berkeley) 6/10/93 -+ * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp -+ */ -+ -+#ifndef IP_H -+#define IP_H -+ -+#include -+ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+#undef NTOHL -+#undef NTOHS -+#undef HTONL -+#undef HTONS -+#define NTOHL(d) -+#define NTOHS(d) -+#define HTONL(d) -+#define HTONS(d) -+#else -+#ifndef NTOHL -+#define NTOHL(d) ((d) = ntohl((d))) -+#endif -+#ifndef NTOHS -+#define NTOHS(d) ((d) = ntohs((uint16_t)(d))) -+#endif -+#ifndef HTONL -+#define HTONL(d) ((d) = htonl((d))) -+#endif -+#ifndef HTONS -+#define HTONS(d) ((d) = htons((uint16_t)(d))) -+#endif -+#endif -+ -+typedef uint32_t n_long; /* long as received from the net */ -+ -+/* -+ * Definitions for internet protocol version 4. -+ * Per RFC 791, September 1981. -+ */ -+#define IPVERSION 4 -+ -+/* -+ * Structure of an internet header, naked of options. -+ */ -+struct ip { -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t ip_v : 4, /* version */ -+ ip_hl : 4; /* header length */ -+#else -+ uint8_t ip_hl : 4, /* header length */ -+ ip_v : 4; /* version */ -+#endif -+ uint8_t ip_tos; /* type of service */ -+ uint16_t ip_len; /* total length */ -+ uint16_t ip_id; /* identification */ -+ uint16_t ip_off; /* fragment offset field */ -+#define IP_DF 0x4000 /* don't fragment flag */ -+#define IP_MF 0x2000 /* more fragments flag */ -+#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ -+ uint8_t ip_ttl; /* time to live */ -+ uint8_t ip_p; /* protocol */ -+ uint16_t ip_sum; /* checksum */ -+ struct in_addr ip_src, ip_dst; /* source and dest address */ -+} SLIRP_PACKED; -+ -+#define IP_MAXPACKET 65535 /* maximum packet size */ -+ -+/* -+ * Definitions for IP type of service (ip_tos) -+ */ -+#define IPTOS_LOWDELAY 0x10 -+#define IPTOS_THROUGHPUT 0x08 -+#define IPTOS_RELIABILITY 0x04 -+ -+/* -+ * Definitions for options. -+ */ -+#define IPOPT_COPIED(o) ((o)&0x80) -+#define IPOPT_CLASS(o) ((o)&0x60) -+#define IPOPT_NUMBER(o) ((o)&0x1f) -+ -+#define IPOPT_CONTROL 0x00 -+#define IPOPT_RESERVED1 0x20 -+#define IPOPT_DEBMEAS 0x40 -+#define IPOPT_RESERVED2 0x60 -+ -+#define IPOPT_EOL 0 /* end of option list */ -+#define IPOPT_NOP 1 /* no operation */ -+ -+#define IPOPT_RR 7 /* record packet route */ -+#define IPOPT_TS 68 /* timestamp */ -+#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ -+#define IPOPT_LSRR 131 /* loose source route */ -+#define IPOPT_SATID 136 /* satnet id */ -+#define IPOPT_SSRR 137 /* strict source route */ -+ -+/* -+ * Offsets to fields in options other than EOL and NOP. -+ */ -+#define IPOPT_OPTVAL 0 /* option ID */ -+#define IPOPT_OLEN 1 /* option length */ -+#define IPOPT_OFFSET 2 /* offset within option */ -+#define IPOPT_MINOFF 4 /* min value of above */ -+ -+/* -+ * Time stamp option structure. -+ */ -+struct ip_timestamp { -+ uint8_t ipt_code; /* IPOPT_TS */ -+ uint8_t ipt_len; /* size of structure (variable) */ -+ uint8_t ipt_ptr; /* index of current entry */ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t ipt_oflw : 4, /* overflow counter */ -+ ipt_flg : 4; /* flags, see below */ -+#else -+ uint8_t ipt_flg : 4, /* flags, see below */ -+ ipt_oflw : 4; /* overflow counter */ -+#endif -+ union ipt_timestamp { -+ n_long ipt_time[1]; -+ struct ipt_ta { -+ struct in_addr ipt_addr; -+ n_long ipt_time; -+ } ipt_ta[1]; -+ } ipt_timestamp; -+} SLIRP_PACKED; -+ -+/* flag bits for ipt_flg */ -+#define IPOPT_TS_TSONLY 0 /* timestamps only */ -+#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ -+#define IPOPT_TS_PRESPEC 3 /* specified modules only */ -+ -+/* bits for security (not byte swapped) */ -+#define IPOPT_SECUR_UNCLASS 0x0000 -+#define IPOPT_SECUR_CONFID 0xf135 -+#define IPOPT_SECUR_EFTO 0x789a -+#define IPOPT_SECUR_MMMM 0xbc4d -+#define IPOPT_SECUR_RESTR 0xaf13 -+#define IPOPT_SECUR_SECRET 0xd788 -+#define IPOPT_SECUR_TOPSECRET 0x6bc5 -+ -+/* -+ * Internet implementation parameters. -+ */ -+#define MAXTTL 255 /* maximum time to live (seconds) */ -+#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ -+#define IPFRAGTTL 60 /* time to live for frags, slowhz */ -+#define IPTTLDEC 1 /* subtracted when forwarding */ -+ -+#define IP_MSS 576 /* default maximum segment size */ -+ -+#if GLIB_SIZEOF_VOID_P == 4 -+struct mbuf_ptr { -+ struct mbuf *mptr; -+ uint32_t dummy; -+} SLIRP_PACKED; -+#else -+struct mbuf_ptr { -+ struct mbuf *mptr; -+} SLIRP_PACKED; -+#endif -+struct qlink { -+ void *next, *prev; -+}; -+ -+/* -+ * Overlay for ip header used by other protocols (tcp, udp). -+ */ -+struct ipovly { -+ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ -+ uint8_t ih_x1; /* (unused) */ -+ uint8_t ih_pr; /* protocol */ -+ uint16_t ih_len; /* protocol length */ -+ struct in_addr ih_src; /* source internet address */ -+ struct in_addr ih_dst; /* destination internet address */ -+} SLIRP_PACKED; -+ -+/* -+ * Ip reassembly queue structure. Each fragment -+ * being reassembled is attached to one of these structures. -+ * They are timed out after ipq_ttl drops to 0, and may also -+ * be reclaimed if memory becomes tight. -+ * size 28 bytes -+ */ -+struct ipq { -+ struct qlink frag_link; /* to ip headers of fragments */ -+ struct qlink ip_link; /* to other reass headers */ -+ uint8_t ipq_ttl; /* time for reass q to live */ -+ uint8_t ipq_p; /* protocol of this fragment */ -+ uint16_t ipq_id; /* sequence id for reassembly */ -+ struct in_addr ipq_src, ipq_dst; -+}; -+ -+/* -+ * Ip header, when holding a fragment. -+ * -+ * Note: ipf_link must be at same offset as frag_link above -+ */ -+struct ipasfrag { -+ struct qlink ipf_link; -+ struct ip ipf_ip; -+}; -+ -+G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == -+ offsetof(struct ipasfrag, ipf_link)); -+ -+#define ipf_off ipf_ip.ip_off -+#define ipf_tos ipf_ip.ip_tos -+#define ipf_len ipf_ip.ip_len -+#define ipf_next ipf_link.next -+#define ipf_prev ipf_link.prev -+ -+#endif -diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h -new file mode 100644 -index 0000000000..0630309d29 ---- /dev/null -+++ b/slirp/src/ip6.h -@@ -0,0 +1,214 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#ifndef SLIRP_IP6_H -+#define SLIRP_IP6_H -+ -+#include -+#include -+ -+#define ALLNODES_MULTICAST \ -+ { \ -+ .s6_addr = { \ -+ 0xff, \ -+ 0x02, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x01 \ -+ } \ -+ } -+ -+#define SOLICITED_NODE_PREFIX \ -+ { \ -+ .s6_addr = { \ -+ 0xff, \ -+ 0x02, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x01, \ -+ 0xff, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00 \ -+ } \ -+ } -+ -+#define LINKLOCAL_ADDR \ -+ { \ -+ .s6_addr = { \ -+ 0xfe, \ -+ 0x80, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x02 \ -+ } \ -+ } -+ -+#define ZERO_ADDR \ -+ { \ -+ .s6_addr = { \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00, \ -+ 0x00 \ -+ } \ -+ } -+ -+static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) -+{ -+ return memcmp(a, b, sizeof(*a)) == 0; -+} -+ -+static inline bool in6_equal_net(const struct in6_addr *a, -+ const struct in6_addr *b, int prefix_len) -+{ -+ if (memcmp(a, b, prefix_len / 8) != 0) { -+ return 0; -+ } -+ -+ if (prefix_len % 8 == 0) { -+ return 1; -+ } -+ -+ return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) == -+ b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); -+} -+ -+static inline bool in6_equal_mach(const struct in6_addr *a, -+ const struct in6_addr *b, int prefix_len) -+{ -+ if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), -+ &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), -+ 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { -+ return 0; -+ } -+ -+ if (prefix_len % 8 == 0) { -+ return 1; -+ } -+ -+ return (a->s6_addr[prefix_len / 8] & -+ ((1U << (8 - (prefix_len % 8))) - 1)) == -+ (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); -+} -+ -+ -+#define in6_equal_router(a) \ -+ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ -+ in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len)) || \ -+ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ -+ in6_equal_mach(a, &slirp->vhost_addr6, 64))) -+ -+#define in6_equal_dns(a) \ -+ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ -+ in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len)) || \ -+ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ -+ in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) -+ -+#define in6_equal_host(a) (in6_equal_router(a) || in6_equal_dns(a)) -+ -+#define in6_solicitednode_multicast(a) \ -+ (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) -+ -+#define in6_zero(a) (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) -+ -+/* Compute emulated host MAC address from its ipv6 address */ -+static inline void in6_compute_ethaddr(struct in6_addr ip, -+ uint8_t eth[ETH_ALEN]) -+{ -+ eth[0] = 0x52; -+ eth[1] = 0x56; -+ memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); -+} -+ -+/* -+ * Definitions for internet protocol version 6. -+ * Per RFC 2460, December 1998. -+ */ -+#define IP6VERSION 6 -+#define IP6_HOP_LIMIT 255 -+ -+/* -+ * Structure of an internet header, naked of options. -+ */ -+struct ip6 { -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint32_t ip_v : 4, /* version */ -+ ip_tc_hi : 4, /* traffic class */ -+ ip_tc_lo : 4, ip_fl_hi : 4, /* flow label */ -+ ip_fl_lo : 16; -+#else -+ uint32_t ip_tc_hi : 4, ip_v : 4, ip_fl_hi : 4, ip_tc_lo : 4, ip_fl_lo : 16; -+#endif -+ uint16_t ip_pl; /* payload length */ -+ uint8_t ip_nh; /* next header */ -+ uint8_t ip_hl; /* hop limit */ -+ struct in6_addr ip_src, ip_dst; /* source and dest address */ -+}; -+ -+/* -+ * IPv6 pseudo-header used by upper-layer protocols -+ */ -+struct ip6_pseudohdr { -+ struct in6_addr ih_src; /* source internet address */ -+ struct in6_addr ih_dst; /* destination internet address */ -+ uint32_t ih_pl; /* upper-layer packet length */ -+ uint16_t ih_zero_hi; /* zero */ -+ uint8_t ih_zero_lo; /* zero */ -+ uint8_t ih_nh; /* next header */ -+}; -+ -+/* -+ * We don't want to mark these ip6 structs as packed as they are naturally -+ * correctly aligned; instead assert that there is no stray padding. -+ * If we marked the struct as packed then we would be unable to take -+ * the address of any of the fields in it. -+ */ -+G_STATIC_ASSERT(sizeof(struct ip6) == 40); -+G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); -+ -+#endif -diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c -new file mode 100644 -index 0000000000..d9c872bc97 ---- /dev/null -+++ b/slirp/src/ip6_icmp.c -@@ -0,0 +1,433 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#include "slirp.h" -+#include "ip6_icmp.h" -+ -+#define NDP_Interval \ -+ g_rand_int_range(slirp->grand, NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) -+ -+static void ra_timer_handler(void *opaque) -+{ -+ Slirp *slirp = opaque; -+ -+ slirp->cb->timer_mod(slirp->ra_timer, -+ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + -+ NDP_Interval, -+ slirp->opaque); -+ ndp_send_ra(slirp); -+} -+ -+void icmp6_init(Slirp *slirp) -+{ -+ if (!slirp->in6_enabled) { -+ return; -+ } -+ -+ slirp->ra_timer = -+ slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); -+ slirp->cb->timer_mod(slirp->ra_timer, -+ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + -+ NDP_Interval, -+ slirp->opaque); -+} -+ -+void icmp6_cleanup(Slirp *slirp) -+{ -+ if (!slirp->in6_enabled) { -+ return; -+ } -+ -+ slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); -+} -+ -+static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, -+ struct icmp6 *icmp) -+{ -+ struct mbuf *t = m_get(slirp); -+ t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); -+ memcpy(t->m_data, m->m_data, t->m_len); -+ -+ /* IPv6 Packet */ -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ rip->ip_dst = ip->ip_src; -+ rip->ip_src = ip->ip_dst; -+ -+ /* ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = ICMP6_ECHO_REPLY; -+ ricmp->icmp6_cksum = 0; -+ -+ /* Checksum */ -+ t->m_data -= sizeof(struct ip6); -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 0); -+} -+ -+void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) -+{ -+ Slirp *slirp = m->slirp; -+ struct mbuf *t; -+ struct ip6 *ip = mtod(m, struct ip6 *); -+ char addrstr[INET6_ADDRSTRLEN]; -+ -+ DEBUG_CALL("icmp6_send_error"); -+ DEBUG_ARG("type = %d, code = %d", type, code); -+ -+ if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || in6_zero(&ip->ip_src)) { -+ /* TODO icmp error? */ -+ return; -+ } -+ -+ t = m_get(slirp); -+ -+ /* IPv6 packet */ -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; -+ rip->ip_dst = ip->ip_src; -+ inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); -+ DEBUG_ARG("target = %s", addrstr); -+ -+ rip->ip_nh = IPPROTO_ICMPV6; -+ const int error_data_len = MIN( -+ m->m_len, slirp->if_mtu - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); -+ rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); -+ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); -+ -+ /* ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = type; -+ ricmp->icmp6_code = code; -+ ricmp->icmp6_cksum = 0; -+ -+ switch (type) { -+ case ICMP6_UNREACH: -+ case ICMP6_TIMXCEED: -+ ricmp->icmp6_err.unused = 0; -+ break; -+ case ICMP6_TOOBIG: -+ ricmp->icmp6_err.mtu = htonl(slirp->if_mtu); -+ break; -+ case ICMP6_PARAMPROB: -+ /* TODO: Handle this case */ -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ t->m_data += ICMP6_ERROR_MINLEN; -+ memcpy(t->m_data, m->m_data, error_data_len); -+ -+ /* Checksum */ -+ t->m_data -= ICMP6_ERROR_MINLEN; -+ t->m_data -= sizeof(struct ip6); -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 0); -+} -+ -+/* -+ * Send NDP Router Advertisement -+ */ -+void ndp_send_ra(Slirp *slirp) -+{ -+ DEBUG_CALL("ndp_send_ra"); -+ -+ /* Build IPv6 packet */ -+ struct mbuf *t = m_get(slirp); -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ size_t pl_size = 0; -+ struct in6_addr addr; -+ uint32_t scope_id; -+ -+ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; -+ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; -+ rip->ip_nh = IPPROTO_ICMPV6; -+ -+ /* Build ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = ICMP6_NDP_RA; -+ ricmp->icmp6_code = 0; -+ ricmp->icmp6_cksum = 0; -+ -+ /* NDP */ -+ ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; -+ ricmp->icmp6_nra.M = NDP_AdvManagedFlag; -+ ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; -+ ricmp->icmp6_nra.reserved = 0; -+ ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); -+ ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); -+ ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); -+ t->m_data += ICMP6_NDP_RA_MINLEN; -+ pl_size += ICMP6_NDP_RA_MINLEN; -+ -+ /* Source link-layer address (NDP option) */ -+ struct ndpopt *opt = mtod(t, struct ndpopt *); -+ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; -+ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; -+ in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); -+ t->m_data += NDPOPT_LINKLAYER_LEN; -+ pl_size += NDPOPT_LINKLAYER_LEN; -+ -+ /* Prefix information (NDP option) */ -+ struct ndpopt *opt2 = mtod(t, struct ndpopt *); -+ opt2->ndpopt_type = NDPOPT_PREFIX_INFO; -+ opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; -+ opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; -+ opt2->ndpopt_prefixinfo.L = 1; -+ opt2->ndpopt_prefixinfo.A = 1; -+ opt2->ndpopt_prefixinfo.reserved1 = 0; -+ opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); -+ opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); -+ opt2->ndpopt_prefixinfo.reserved2 = 0; -+ opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; -+ t->m_data += NDPOPT_PREFIXINFO_LEN; -+ pl_size += NDPOPT_PREFIXINFO_LEN; -+ -+ /* Prefix information (NDP option) */ -+ if (get_dns6_addr(&addr, &scope_id) >= 0) { -+ /* Host system does have an IPv6 DNS server, announce our proxy. */ -+ struct ndpopt *opt3 = mtod(t, struct ndpopt *); -+ opt3->ndpopt_type = NDPOPT_RDNSS; -+ opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; -+ opt3->ndpopt_rdnss.reserved = 0; -+ opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); -+ opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; -+ t->m_data += NDPOPT_RDNSS_LEN; -+ pl_size += NDPOPT_RDNSS_LEN; -+ } -+ -+ rip->ip_pl = htons(pl_size); -+ t->m_data -= sizeof(struct ip6) + pl_size; -+ t->m_len = sizeof(struct ip6) + pl_size; -+ -+ /* ICMPv6 Checksum */ -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 0); -+} -+ -+/* -+ * Send NDP Neighbor Solitication -+ */ -+void ndp_send_ns(Slirp *slirp, struct in6_addr addr) -+{ -+ char addrstr[INET6_ADDRSTRLEN]; -+ -+ inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); -+ -+ DEBUG_CALL("ndp_send_ns"); -+ DEBUG_ARG("target = %s", addrstr); -+ -+ /* Build IPv6 packet */ -+ struct mbuf *t = m_get(slirp); -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ rip->ip_src = slirp->vhost_addr6; -+ rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; -+ memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); -+ rip->ip_nh = IPPROTO_ICMPV6; -+ rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); -+ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); -+ -+ /* Build ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = ICMP6_NDP_NS; -+ ricmp->icmp6_code = 0; -+ ricmp->icmp6_cksum = 0; -+ -+ /* NDP */ -+ ricmp->icmp6_nns.reserved = 0; -+ ricmp->icmp6_nns.target = addr; -+ -+ /* Build NDP option */ -+ t->m_data += ICMP6_NDP_NS_MINLEN; -+ struct ndpopt *opt = mtod(t, struct ndpopt *); -+ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; -+ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; -+ in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); -+ -+ /* ICMPv6 Checksum */ -+ t->m_data -= ICMP6_NDP_NA_MINLEN; -+ t->m_data -= sizeof(struct ip6); -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 1); -+} -+ -+/* -+ * Send NDP Neighbor Advertisement -+ */ -+static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) -+{ -+ /* Build IPv6 packet */ -+ struct mbuf *t = m_get(slirp); -+ struct ip6 *rip = mtod(t, struct ip6 *); -+ rip->ip_src = icmp->icmp6_nns.target; -+ if (in6_zero(&ip->ip_src)) { -+ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; -+ } else { -+ rip->ip_dst = ip->ip_src; -+ } -+ rip->ip_nh = IPPROTO_ICMPV6; -+ rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + NDPOPT_LINKLAYER_LEN); -+ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); -+ -+ /* Build ICMPv6 packet */ -+ t->m_data += sizeof(struct ip6); -+ struct icmp6 *ricmp = mtod(t, struct icmp6 *); -+ ricmp->icmp6_type = ICMP6_NDP_NA; -+ ricmp->icmp6_code = 0; -+ ricmp->icmp6_cksum = 0; -+ -+ /* NDP */ -+ ricmp->icmp6_nna.R = NDP_IsRouter; -+ ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); -+ ricmp->icmp6_nna.O = 1; -+ ricmp->icmp6_nna.reserved_hi = 0; -+ ricmp->icmp6_nna.reserved_lo = 0; -+ ricmp->icmp6_nna.target = icmp->icmp6_nns.target; -+ -+ /* Build NDP option */ -+ t->m_data += ICMP6_NDP_NA_MINLEN; -+ struct ndpopt *opt = mtod(t, struct ndpopt *); -+ opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; -+ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; -+ in6_compute_ethaddr(ricmp->icmp6_nna.target, opt->ndpopt_linklayer); -+ -+ /* ICMPv6 Checksum */ -+ t->m_data -= ICMP6_NDP_NA_MINLEN; -+ t->m_data -= sizeof(struct ip6); -+ ricmp->icmp6_cksum = ip6_cksum(t); -+ -+ ip6_output(NULL, t, 0); -+} -+ -+/* -+ * Process a NDP message -+ */ -+static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, -+ struct icmp6 *icmp) -+{ -+ m->m_len += ETH_HLEN; -+ m->m_data -= ETH_HLEN; -+ struct ethhdr *eth = mtod(m, struct ethhdr *); -+ m->m_len -= ETH_HLEN; -+ m->m_data += ETH_HLEN; -+ -+ switch (icmp->icmp6_type) { -+ case ICMP6_NDP_RS: -+ DEBUG_CALL(" type = Router Solicitation"); -+ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && -+ ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { -+ /* Gratuitous NDP */ -+ ndp_table_add(slirp, ip->ip_src, eth->h_source); -+ -+ ndp_send_ra(slirp); -+ } -+ break; -+ -+ case ICMP6_NDP_RA: -+ DEBUG_CALL(" type = Router Advertisement"); -+ slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", -+ slirp->opaque); -+ break; -+ -+ case ICMP6_NDP_NS: -+ DEBUG_CALL(" type = Neighbor Solicitation"); -+ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && -+ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) && -+ ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN && -+ (!in6_zero(&ip->ip_src) || -+ in6_solicitednode_multicast(&ip->ip_dst))) { -+ if (in6_equal_host(&icmp->icmp6_nns.target)) { -+ /* Gratuitous NDP */ -+ ndp_table_add(slirp, ip->ip_src, eth->h_source); -+ ndp_send_na(slirp, ip, icmp); -+ } -+ } -+ break; -+ -+ case ICMP6_NDP_NA: -+ DEBUG_CALL(" type = Neighbor Advertisement"); -+ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && -+ ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN && -+ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) && -+ (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) || icmp->icmp6_nna.S == 0)) { -+ ndp_table_add(slirp, ip->ip_src, eth->h_source); -+ } -+ break; -+ -+ case ICMP6_NDP_REDIRECT: -+ DEBUG_CALL(" type = Redirect"); -+ slirp->cb->guest_error( -+ "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); -+ break; -+ } -+} -+ -+/* -+ * Process a received ICMPv6 message. -+ */ -+void icmp6_input(struct mbuf *m) -+{ -+ struct icmp6 *icmp; -+ struct ip6 *ip = mtod(m, struct ip6 *); -+ Slirp *slirp = m->slirp; -+ int hlen = sizeof(struct ip6); -+ -+ DEBUG_CALL("icmp6_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m_len = %d", m->m_len); -+ -+ if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { -+ goto end; -+ } -+ -+ if (ip6_cksum(m)) { -+ goto end; -+ } -+ -+ m->m_len -= hlen; -+ m->m_data += hlen; -+ icmp = mtod(m, struct icmp6 *); -+ m->m_len += hlen; -+ m->m_data -= hlen; -+ -+ DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); -+ switch (icmp->icmp6_type) { -+ case ICMP6_ECHO_REQUEST: -+ if (in6_equal_host(&ip->ip_dst)) { -+ icmp6_send_echoreply(m, slirp, ip, icmp); -+ } else { -+ /* TODO */ -+ g_critical("external icmpv6 not supported yet"); -+ } -+ break; -+ -+ case ICMP6_NDP_RS: -+ case ICMP6_NDP_RA: -+ case ICMP6_NDP_NS: -+ case ICMP6_NDP_NA: -+ case ICMP6_NDP_REDIRECT: -+ ndp_input(m, slirp, ip, icmp); -+ break; -+ -+ case ICMP6_UNREACH: -+ case ICMP6_TOOBIG: -+ case ICMP6_TIMXCEED: -+ case ICMP6_PARAMPROB: -+ /* XXX? report error? close socket? */ -+ default: -+ break; -+ } -+ -+end: -+ m_free(m); -+} -diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h -new file mode 100644 -index 0000000000..c37e60f28d ---- /dev/null -+++ b/slirp/src/ip6_icmp.h -@@ -0,0 +1,219 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#ifndef SLIRP_IP6_ICMP_H -+#define SLIRP_IP6_ICMP_H -+ -+/* -+ * Interface Control Message Protocol version 6 Definitions. -+ * Per RFC 4443, March 2006. -+ * -+ * Network Discover Protocol Definitions. -+ * Per RFC 4861, September 2007. -+ */ -+ -+struct icmp6_echo { /* Echo Messages */ -+ uint16_t id; -+ uint16_t seq_num; -+}; -+ -+union icmp6_error_body { -+ uint32_t unused; -+ uint32_t pointer; -+ uint32_t mtu; -+}; -+ -+/* -+ * NDP Messages -+ */ -+struct ndp_rs { /* Router Solicitation Message */ -+ uint32_t reserved; -+}; -+ -+struct ndp_ra { /* Router Advertisement Message */ -+ uint8_t chl; /* Cur Hop Limit */ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t M : 1, O : 1, reserved : 6; -+#else -+ uint8_t reserved : 6, O : 1, M : 1; -+#endif -+ uint16_t lifetime; /* Router Lifetime */ -+ uint32_t reach_time; /* Reachable Time */ -+ uint32_t retrans_time; /* Retrans Timer */ -+}; -+ -+G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); -+ -+struct ndp_ns { /* Neighbor Solicitation Message */ -+ uint32_t reserved; -+ struct in6_addr target; /* Target Address */ -+}; -+ -+G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); -+ -+struct ndp_na { /* Neighbor Advertisement Message */ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint32_t R : 1, /* Router Flag */ -+ S : 1, /* Solicited Flag */ -+ O : 1, /* Override Flag */ -+ reserved_hi : 5, reserved_lo : 24; -+#else -+ uint32_t reserved_hi : 5, O : 1, S : 1, R : 1, reserved_lo : 24; -+#endif -+ struct in6_addr target; /* Target Address */ -+}; -+ -+G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); -+ -+struct ndp_redirect { -+ uint32_t reserved; -+ struct in6_addr target; /* Target Address */ -+ struct in6_addr dest; /* Destination Address */ -+}; -+ -+G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); -+ -+/* -+ * Structure of an icmpv6 header. -+ */ -+struct icmp6 { -+ uint8_t icmp6_type; /* type of message, see below */ -+ uint8_t icmp6_code; /* type sub code */ -+ uint16_t icmp6_cksum; /* ones complement cksum of struct */ -+ union { -+ union icmp6_error_body error_body; -+ struct icmp6_echo echo; -+ struct ndp_rs ndp_rs; -+ struct ndp_ra ndp_ra; -+ struct ndp_ns ndp_ns; -+ struct ndp_na ndp_na; -+ struct ndp_redirect ndp_redirect; -+ } icmp6_body; -+#define icmp6_err icmp6_body.error_body -+#define icmp6_echo icmp6_body.echo -+#define icmp6_nrs icmp6_body.ndp_rs -+#define icmp6_nra icmp6_body.ndp_ra -+#define icmp6_nns icmp6_body.ndp_ns -+#define icmp6_nna icmp6_body.ndp_na -+#define icmp6_redirect icmp6_body.ndp_redirect -+}; -+ -+G_STATIC_ASSERT(sizeof(struct icmp6) == 40); -+ -+#define ICMP6_MINLEN 4 -+#define ICMP6_ERROR_MINLEN 8 -+#define ICMP6_ECHO_MINLEN 8 -+#define ICMP6_NDP_RS_MINLEN 8 -+#define ICMP6_NDP_RA_MINLEN 16 -+#define ICMP6_NDP_NS_MINLEN 24 -+#define ICMP6_NDP_NA_MINLEN 24 -+#define ICMP6_NDP_REDIRECT_MINLEN 40 -+ -+/* -+ * NDP Options -+ */ -+struct ndpopt { -+ uint8_t ndpopt_type; /* Option type */ -+ uint8_t ndpopt_len; /* /!\ In units of 8 octets */ -+ union { -+ unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ -+#define ndpopt_linklayer ndpopt_body.linklayer_addr -+ struct prefixinfo { /* Prefix Information */ -+ uint8_t prefix_length; -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t L : 1, A : 1, reserved1 : 6; -+#else -+ uint8_t reserved1 : 6, A : 1, L : 1; -+#endif -+ uint32_t valid_lt; /* Valid Lifetime */ -+ uint32_t pref_lt; /* Preferred Lifetime */ -+ uint32_t reserved2; -+ struct in6_addr prefix; -+ } SLIRP_PACKED prefixinfo; -+#define ndpopt_prefixinfo ndpopt_body.prefixinfo -+ struct rdnss { -+ uint16_t reserved; -+ uint32_t lifetime; -+ struct in6_addr addr; -+ } SLIRP_PACKED rdnss; -+#define ndpopt_rdnss ndpopt_body.rdnss -+ } ndpopt_body; -+} SLIRP_PACKED; -+ -+/* NDP options type */ -+#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ -+#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ -+#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ -+#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ -+ -+/* NDP options size, in octets. */ -+#define NDPOPT_LINKLAYER_LEN 8 -+#define NDPOPT_PREFIXINFO_LEN 32 -+#define NDPOPT_RDNSS_LEN 24 -+ -+/* -+ * Definition of type and code field values. -+ * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml -+ * Last Updated 2012-11-12 -+ */ -+ -+/* Errors */ -+#define ICMP6_UNREACH 1 /* Destination Unreachable */ -+#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ -+#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ -+#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ -+#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ -+#define ICMP6_UNREACH_PORT 4 /* port unreachable */ -+#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ -+#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ -+#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ -+#define ICMP6_TOOBIG 2 /* Packet Too Big */ -+#define ICMP6_TIMXCEED 3 /* Time Exceeded */ -+#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ -+#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ -+#define ICMP6_PARAMPROB 4 /* Parameter Problem */ -+#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ -+#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ -+#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ -+ -+/* Informational Messages */ -+#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ -+#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ -+#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ -+#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ -+#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ -+#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ -+#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ -+ -+/* -+ * Router Configuration Variables (rfc4861#section-6) -+ */ -+#define NDP_IsRouter 1 -+#define NDP_AdvSendAdvertisements 1 -+#define NDP_MaxRtrAdvInterval 600000 -+#define NDP_MinRtrAdvInterval \ -+ ((NDP_MaxRtrAdvInterval >= 9) ? NDP_MaxRtrAdvInterval / 3 : \ -+ NDP_MaxRtrAdvInterval) -+#define NDP_AdvManagedFlag 0 -+#define NDP_AdvOtherConfigFlag 0 -+#define NDP_AdvLinkMTU 0 -+#define NDP_AdvReachableTime 0 -+#define NDP_AdvRetransTime 0 -+#define NDP_AdvCurHopLimit 64 -+#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) -+#define NDP_AdvValidLifetime 86400 -+#define NDP_AdvOnLinkFlag 1 -+#define NDP_AdvPrefLifetime 14400 -+#define NDP_AdvAutonomousFlag 1 -+ -+void icmp6_init(Slirp *slirp); -+void icmp6_cleanup(Slirp *slirp); -+void icmp6_input(struct mbuf *); -+void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); -+void ndp_send_ra(Slirp *slirp); -+void ndp_send_ns(Slirp *slirp, struct in6_addr addr); -+ -+#endif -diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c -new file mode 100644 -index 0000000000..a83e4f8e3d ---- /dev/null -+++ b/slirp/src/ip6_input.c -@@ -0,0 +1,85 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#include "slirp.h" -+#include "ip6_icmp.h" -+ -+/* -+ * IP initialization: fill in IP protocol switch table. -+ * All protocols not implemented in kernel go to raw IP protocol handler. -+ */ -+void ip6_init(Slirp *slirp) -+{ -+ icmp6_init(slirp); -+} -+ -+void ip6_cleanup(Slirp *slirp) -+{ -+ icmp6_cleanup(slirp); -+} -+ -+void ip6_input(struct mbuf *m) -+{ -+ struct ip6 *ip6; -+ Slirp *slirp = m->slirp; -+ -+ if (!slirp->in6_enabled) { -+ goto bad; -+ } -+ -+ DEBUG_CALL("ip6_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m_len = %d", m->m_len); -+ -+ if (m->m_len < sizeof(struct ip6)) { -+ goto bad; -+ } -+ -+ ip6 = mtod(m, struct ip6 *); -+ -+ if (ip6->ip_v != IP6VERSION) { -+ goto bad; -+ } -+ -+ if (ntohs(ip6->ip_pl) + sizeof(struct ip6) > slirp->if_mtu) { -+ icmp6_send_error(m, ICMP6_TOOBIG, 0); -+ goto bad; -+ } -+ -+ // Check if the message size is big enough to hold what's -+ // set in the payload length header. If not this is an invalid -+ // packet -+ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { -+ goto bad; -+ } -+ -+ /* check ip_ttl for a correct ICMP reply */ -+ if (ip6->ip_hl == 0) { -+ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); -+ goto bad; -+ } -+ -+ /* -+ * Switch out to protocol's input routine. -+ */ -+ switch (ip6->ip_nh) { -+ case IPPROTO_TCP: -+ NTOHS(ip6->ip_pl); -+ tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); -+ break; -+ case IPPROTO_UDP: -+ udp6_input(m); -+ break; -+ case IPPROTO_ICMPV6: -+ icmp6_input(m); -+ break; -+ default: -+ m_free(m); -+ } -+ return; -+bad: -+ m_free(m); -+} -diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c -new file mode 100644 -index 0000000000..b86110662c ---- /dev/null -+++ b/slirp/src/ip6_output.c -@@ -0,0 +1,39 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#include "slirp.h" -+ -+/* Number of packets queued before we start sending -+ * (to prevent allocing too many mbufs) */ -+#define IF6_THRESH 10 -+ -+/* -+ * IPv6 output. The packet in mbuf chain m contains a IP header -+ */ -+int ip6_output(struct socket *so, struct mbuf *m, int fast) -+{ -+ struct ip6 *ip = mtod(m, struct ip6 *); -+ -+ DEBUG_CALL("ip6_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ -+ /* Fill IPv6 header */ -+ ip->ip_v = IP6VERSION; -+ ip->ip_hl = IP6_HOP_LIMIT; -+ ip->ip_tc_hi = 0; -+ ip->ip_tc_lo = 0; -+ ip->ip_fl_hi = 0; -+ ip->ip_fl_lo = 0; -+ -+ if (fast) { -+ if_encap(m->slirp, m); -+ } else { -+ if_output(so, m); -+ } -+ -+ return 0; -+} -diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c -new file mode 100644 -index 0000000000..13a0e55085 ---- /dev/null -+++ b/slirp/src/ip_icmp.c -@@ -0,0 +1,492 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 -+ * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+ -+#ifndef WITH_ICMP_ERROR_MSG -+#define WITH_ICMP_ERROR_MSG 0 -+#endif -+ -+/* The message sent when emulating PING */ -+/* Be nice and tell them it's just a pseudo-ping packet */ -+static const char icmp_ping_msg[] = -+ "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST " -+ "packets.\n"; -+ -+/* list of actions for icmp_send_error() on RX of an icmp message */ -+static const int icmp_flush[19] = { -+ /* ECHO REPLY (0) */ 0, -+ 1, -+ 1, -+ /* DEST UNREACH (3) */ 1, -+ /* SOURCE QUENCH (4)*/ 1, -+ /* REDIRECT (5) */ 1, -+ 1, -+ 1, -+ /* ECHO (8) */ 0, -+ /* ROUTERADVERT (9) */ 1, -+ /* ROUTERSOLICIT (10) */ 1, -+ /* TIME EXCEEDED (11) */ 1, -+ /* PARAMETER PROBLEM (12) */ 1, -+ /* TIMESTAMP (13) */ 0, -+ /* TIMESTAMP REPLY (14) */ 0, -+ /* INFO (15) */ 0, -+ /* INFO REPLY (16) */ 0, -+ /* ADDR MASK (17) */ 0, -+ /* ADDR MASK REPLY (18) */ 0 -+}; -+ -+void icmp_init(Slirp *slirp) -+{ -+ slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; -+ slirp->icmp_last_so = &slirp->icmp; -+} -+ -+void icmp_cleanup(Slirp *slirp) -+{ -+ struct socket *so, *so_next; -+ -+ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { -+ so_next = so->so_next; -+ icmp_detach(so); -+ } -+} -+ -+static int icmp_send(struct socket *so, struct mbuf *m, int hlen) -+{ -+ struct ip *ip = mtod(m, struct ip *); -+ struct sockaddr_in addr; -+ -+ so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); -+ if (so->s == -1) { -+ return -1; -+ } -+ -+ if (slirp_bind_outbound(so, AF_INET) != 0) { -+ // bind failed - close socket -+ closesocket(so->s); -+ so->s = -1; -+ return -1; -+ } -+ -+ so->so_m = m; -+ so->so_faddr = ip->ip_dst; -+ so->so_laddr = ip->ip_src; -+ so->so_iptos = ip->ip_tos; -+ so->so_type = IPPROTO_ICMP; -+ so->so_state = SS_ISFCONNECTED; -+ so->so_expire = curtime + SO_EXPIRE; -+ -+ addr.sin_family = AF_INET; -+ addr.sin_addr = so->so_faddr; -+ -+ insque(so, &so->slirp->icmp); -+ -+ if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, -+ (struct sockaddr *)&addr, sizeof(addr)) == -1) { -+ DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", errno, -+ strerror(errno)); -+ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); -+ icmp_detach(so); -+ } -+ -+ return 0; -+} -+ -+void icmp_detach(struct socket *so) -+{ -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ sofree(so); -+} -+ -+/* -+ * Process a received ICMP message. -+ */ -+void icmp_input(struct mbuf *m, int hlen) -+{ -+ register struct icmp *icp; -+ register struct ip *ip = mtod(m, struct ip *); -+ int icmplen = ip->ip_len; -+ Slirp *slirp = m->slirp; -+ -+ DEBUG_CALL("icmp_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m_len = %d", m->m_len); -+ -+ /* -+ * Locate icmp structure in mbuf, and check -+ * that its not corrupted and of at least minimum length. -+ */ -+ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ -+ freeit: -+ m_free(m); -+ goto end_error; -+ } -+ -+ m->m_len -= hlen; -+ m->m_data += hlen; -+ icp = mtod(m, struct icmp *); -+ if (cksum(m, icmplen)) { -+ goto freeit; -+ } -+ m->m_len += hlen; -+ m->m_data -= hlen; -+ -+ DEBUG_ARG("icmp_type = %d", icp->icmp_type); -+ switch (icp->icmp_type) { -+ case ICMP_ECHO: -+ ip->ip_len += hlen; /* since ip_input subtracts this */ -+ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || -+ ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { -+ icmp_reflect(m); -+ } else if (slirp->restricted) { -+ goto freeit; -+ } else { -+ struct socket *so; -+ struct sockaddr_storage addr; -+ so = socreate(slirp); -+ if (icmp_send(so, m, hlen) == 0) { -+ return; -+ } -+ if (udp_attach(so, AF_INET) == -1) { -+ DEBUG_MISC("icmp_input udp_attach errno = %d-%s", errno, -+ strerror(errno)); -+ sofree(so); -+ m_free(m); -+ goto end_error; -+ } -+ so->so_m = m; -+ so->so_ffamily = AF_INET; -+ so->so_faddr = ip->ip_dst; -+ so->so_fport = htons(7); -+ so->so_lfamily = AF_INET; -+ so->so_laddr = ip->ip_src; -+ so->so_lport = htons(9); -+ so->so_iptos = ip->ip_tos; -+ so->so_type = IPPROTO_ICMP; -+ so->so_state = SS_ISFCONNECTED; -+ -+ /* Send the packet */ -+ addr = so->fhost.ss; -+ if (sotranslate_out(so, &addr) < 0) { -+ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, -+ strerror(errno)); -+ udp_detach(so); -+ return; -+ } -+ -+ if (sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, -+ (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { -+ DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", errno, -+ strerror(errno)); -+ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, -+ strerror(errno)); -+ udp_detach(so); -+ } -+ } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ -+ break; -+ case ICMP_UNREACH: -+ /* XXX? report error? close socket? */ -+ case ICMP_TIMXCEED: -+ case ICMP_PARAMPROB: -+ case ICMP_SOURCEQUENCH: -+ case ICMP_TSTAMP: -+ case ICMP_MASKREQ: -+ case ICMP_REDIRECT: -+ m_free(m); -+ break; -+ -+ default: -+ m_free(m); -+ } /* swith */ -+ -+end_error: -+ /* m is m_free()'d xor put in a socket xor or given to ip_send */ -+ return; -+} -+ -+ -+/* -+ * Send an ICMP message in response to a situation -+ * -+ * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. -+ *MAY send more (we do). MUST NOT change this header information. MUST NOT reply -+ *to a multicast/broadcast IP address. MUST NOT reply to a multicast/broadcast -+ *MAC address. MUST reply to only the first fragment. -+ */ -+/* -+ * Send ICMP_UNREACH back to the source regarding msrc. -+ * mbuf *msrc is used as a template, but is NOT m_free()'d. -+ * It is reported as the bad ip packet. The header should -+ * be fully correct and in host byte order. -+ * ICMP fragmentation is illegal. All machines must accept 576 bytes in one -+ * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 -+ */ -+ -+#define ICMP_MAXDATALEN (IP_MSS - 28) -+void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, -+ const char *message) -+{ -+ unsigned hlen, shlen, s_ip_len; -+ register struct ip *ip; -+ register struct icmp *icp; -+ register struct mbuf *m; -+ -+ DEBUG_CALL("icmp_send_error"); -+ DEBUG_ARG("msrc = %p", msrc); -+ DEBUG_ARG("msrc_len = %d", msrc->m_len); -+ -+ if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) -+ goto end_error; -+ -+ /* check msrc */ -+ if (!msrc) -+ goto end_error; -+ ip = mtod(msrc, struct ip *); -+ if (slirp_debug & DBG_MISC) { -+ char bufa[20], bufb[20]; -+ slirp_pstrcpy(bufa, sizeof(bufa), inet_ntoa(ip->ip_src)); -+ slirp_pstrcpy(bufb, sizeof(bufb), inet_ntoa(ip->ip_dst)); -+ DEBUG_MISC(" %.16s to %.16s", bufa, bufb); -+ } -+ if (ip->ip_off & IP_OFFMASK) -+ goto end_error; /* Only reply to fragment 0 */ -+ -+ /* Do not reply to source-only IPs */ -+ if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { -+ goto end_error; -+ } -+ -+ shlen = ip->ip_hl << 2; -+ s_ip_len = ip->ip_len; -+ if (ip->ip_p == IPPROTO_ICMP) { -+ icp = (struct icmp *)((char *)ip + shlen); -+ /* -+ * Assume any unknown ICMP type is an error. This isn't -+ * specified by the RFC, but think about it.. -+ */ -+ if (icp->icmp_type > 18 || icmp_flush[icp->icmp_type]) -+ goto end_error; -+ } -+ -+ /* make a copy */ -+ m = m_get(msrc->slirp); -+ if (!m) { -+ goto end_error; -+ } -+ -+ { -+ int new_m_size; -+ new_m_size = -+ sizeof(struct ip) + ICMP_MINLEN + msrc->m_len + ICMP_MAXDATALEN; -+ if (new_m_size > m->m_size) -+ m_inc(m, new_m_size); -+ } -+ memcpy(m->m_data, msrc->m_data, msrc->m_len); -+ m->m_len = msrc->m_len; /* copy msrc to m */ -+ -+ /* make the header of the reply packet */ -+ ip = mtod(m, struct ip *); -+ hlen = sizeof(struct ip); /* no options in reply */ -+ -+ /* fill in icmp */ -+ m->m_data += hlen; -+ m->m_len -= hlen; -+ -+ icp = mtod(m, struct icmp *); -+ -+ if (minsize) -+ s_ip_len = shlen + ICMP_MINLEN; /* return header+8b only */ -+ else if (s_ip_len > ICMP_MAXDATALEN) /* maximum size */ -+ s_ip_len = ICMP_MAXDATALEN; -+ -+ m->m_len = ICMP_MINLEN + s_ip_len; /* 8 bytes ICMP header */ -+ -+ /* min. size = 8+sizeof(struct ip)+8 */ -+ -+ icp->icmp_type = type; -+ icp->icmp_code = code; -+ icp->icmp_id = 0; -+ icp->icmp_seq = 0; -+ -+ memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ -+ HTONS(icp->icmp_ip.ip_len); -+ HTONS(icp->icmp_ip.ip_id); -+ HTONS(icp->icmp_ip.ip_off); -+ -+ if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ -+ int message_len; -+ char *cpnt; -+ message_len = strlen(message); -+ if (message_len > ICMP_MAXDATALEN) -+ message_len = ICMP_MAXDATALEN; -+ cpnt = (char *)m->m_data + m->m_len; -+ memcpy(cpnt, message, message_len); -+ m->m_len += message_len; -+ } -+ -+ icp->icmp_cksum = 0; -+ icp->icmp_cksum = cksum(m, m->m_len); -+ -+ m->m_data -= hlen; -+ m->m_len += hlen; -+ -+ /* fill in ip */ -+ ip->ip_hl = hlen >> 2; -+ ip->ip_len = m->m_len; -+ -+ ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ -+ -+ ip->ip_ttl = MAXTTL; -+ ip->ip_p = IPPROTO_ICMP; -+ ip->ip_dst = ip->ip_src; /* ip addresses */ -+ ip->ip_src = m->slirp->vhost_addr; -+ -+ (void)ip_output((struct socket *)NULL, m); -+ -+end_error: -+ return; -+} -+#undef ICMP_MAXDATALEN -+ -+/* -+ * Reflect the ip packet back to the source -+ */ -+void icmp_reflect(struct mbuf *m) -+{ -+ register struct ip *ip = mtod(m, struct ip *); -+ int hlen = ip->ip_hl << 2; -+ int optlen = hlen - sizeof(struct ip); -+ register struct icmp *icp; -+ -+ /* -+ * Send an icmp packet back to the ip level, -+ * after supplying a checksum. -+ */ -+ m->m_data += hlen; -+ m->m_len -= hlen; -+ icp = mtod(m, struct icmp *); -+ -+ icp->icmp_type = ICMP_ECHOREPLY; -+ icp->icmp_cksum = 0; -+ icp->icmp_cksum = cksum(m, ip->ip_len - hlen); -+ -+ m->m_data -= hlen; -+ m->m_len += hlen; -+ -+ /* fill in ip */ -+ if (optlen > 0) { -+ /* -+ * Strip out original options by copying rest of first -+ * mbuf's data back, and adjust the IP length. -+ */ -+ memmove((char *)(ip + 1), (char *)ip + hlen, -+ (unsigned)(m->m_len - hlen)); -+ hlen -= optlen; -+ ip->ip_hl = hlen >> 2; -+ ip->ip_len -= optlen; -+ m->m_len -= optlen; -+ } -+ -+ ip->ip_ttl = MAXTTL; -+ { /* swap */ -+ struct in_addr icmp_dst; -+ icmp_dst = ip->ip_dst; -+ ip->ip_dst = ip->ip_src; -+ ip->ip_src = icmp_dst; -+ } -+ -+ (void)ip_output((struct socket *)NULL, m); -+} -+ -+void icmp_receive(struct socket *so) -+{ -+ struct mbuf *m = so->so_m; -+ struct ip *ip = mtod(m, struct ip *); -+ int hlen = ip->ip_hl << 2; -+ uint8_t error_code; -+ struct icmp *icp; -+ int id, len; -+ -+ m->m_data += hlen; -+ m->m_len -= hlen; -+ icp = mtod(m, struct icmp *); -+ -+ id = icp->icmp_id; -+ len = recv(so->s, icp, M_ROOM(m), 0); -+ /* -+ * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent -+ * between host OSes. On Linux, only the ICMP header and payload is -+ * included. On macOS/Darwin, the socket acts like a raw socket and -+ * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP -+ * sockets aren't supported at all, so we treat them like raw sockets. It -+ * isn't possible to detect this difference at runtime, so we must use an -+ * #ifdef to determine if we need to remove the IP header. -+ */ -+#ifdef CONFIG_BSD -+ if (len >= sizeof(struct ip)) { -+ struct ip *inner_ip = mtod(m, struct ip *); -+ int inner_hlen = inner_ip->ip_hl << 2; -+ if (inner_hlen > len) { -+ len = -1; -+ errno = -EINVAL; -+ } else { -+ len -= inner_hlen; -+ memmove(icp, (unsigned char *)icp + inner_hlen, len); -+ } -+ } else { -+ len = -1; -+ errno = -EINVAL; -+ } -+#endif -+ icp->icmp_id = id; -+ -+ m->m_data -= hlen; -+ m->m_len += hlen; -+ -+ if (len == -1 || len == 0) { -+ if (errno == ENETUNREACH) { -+ error_code = ICMP_UNREACH_NET; -+ } else { -+ error_code = ICMP_UNREACH_HOST; -+ } -+ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); -+ icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); -+ } else { -+ icmp_reflect(so->so_m); -+ so->so_m = NULL; /* Don't m_free() it again! */ -+ } -+ icmp_detach(so); -+} -diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h -new file mode 100644 -index 0000000000..84707db247 ---- /dev/null -+++ b/slirp/src/ip_icmp.h -@@ -0,0 +1,166 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 -+ * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp -+ */ -+ -+#ifndef NETINET_IP_ICMP_H -+#define NETINET_IP_ICMP_H -+ -+/* -+ * Interface Control Message Protocol Definitions. -+ * Per RFC 792, September 1981. -+ */ -+ -+typedef uint32_t n_time; -+ -+/* -+ * Structure of an icmp header. -+ */ -+struct icmp { -+ uint8_t icmp_type; /* type of message, see below */ -+ uint8_t icmp_code; /* type sub code */ -+ uint16_t icmp_cksum; /* ones complement cksum of struct */ -+ union { -+ uint8_t ih_pptr; /* ICMP_PARAMPROB */ -+ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ -+ struct ih_idseq { -+ uint16_t icd_id; -+ uint16_t icd_seq; -+ } ih_idseq; -+ int ih_void; -+ -+ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ -+ struct ih_pmtu { -+ uint16_t ipm_void; -+ uint16_t ipm_nextmtu; -+ } ih_pmtu; -+ } icmp_hun; -+#define icmp_pptr icmp_hun.ih_pptr -+#define icmp_gwaddr icmp_hun.ih_gwaddr -+#define icmp_id icmp_hun.ih_idseq.icd_id -+#define icmp_seq icmp_hun.ih_idseq.icd_seq -+#define icmp_void icmp_hun.ih_void -+#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void -+#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu -+ union { -+ struct id_ts { -+ n_time its_otime; -+ n_time its_rtime; -+ n_time its_ttime; -+ } id_ts; -+ struct id_ip { -+ struct ip idi_ip; -+ /* options and then 64 bits of data */ -+ } id_ip; -+ uint32_t id_mask; -+ char id_data[1]; -+ } icmp_dun; -+#define icmp_otime icmp_dun.id_ts.its_otime -+#define icmp_rtime icmp_dun.id_ts.its_rtime -+#define icmp_ttime icmp_dun.id_ts.its_ttime -+#define icmp_ip icmp_dun.id_ip.idi_ip -+#define icmp_mask icmp_dun.id_mask -+#define icmp_data icmp_dun.id_data -+}; -+ -+/* -+ * Lower bounds on packet lengths for various types. -+ * For the error advice packets must first ensure that the -+ * packet is large enough to contain the returned ip header. -+ * Only then can we do the check to see if 64 bits of packet -+ * data have been returned, since we need to check the returned -+ * ip header length. -+ */ -+#define ICMP_MINLEN 8 /* abs minimum */ -+#define ICMP_TSLEN (8 + 3 * sizeof(n_time)) /* timestamp */ -+#define ICMP_MASKLEN 12 /* address mask */ -+#define ICMP_ADVLENMIN (8 + sizeof(struct ip) + 8) /* min */ -+#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) -+/* N.B.: must separately check that ip_hl >= 5 */ -+ -+/* -+ * Definition of type and code field values. -+ */ -+#define ICMP_ECHOREPLY 0 /* echo reply */ -+#define ICMP_UNREACH 3 /* dest unreachable, codes: */ -+#define ICMP_UNREACH_NET 0 /* bad net */ -+#define ICMP_UNREACH_HOST 1 /* bad host */ -+#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ -+#define ICMP_UNREACH_PORT 3 /* bad port */ -+#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ -+#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ -+#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ -+#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ -+#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ -+#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ -+#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ -+#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ -+#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ -+#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ -+#define ICMP_REDIRECT 5 /* shorter route, codes: */ -+#define ICMP_REDIRECT_NET 0 /* for network */ -+#define ICMP_REDIRECT_HOST 1 /* for host */ -+#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ -+#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ -+#define ICMP_ECHO 8 /* echo service */ -+#define ICMP_ROUTERADVERT 9 /* router advertisement */ -+#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ -+#define ICMP_TIMXCEED 11 /* time exceeded, code: */ -+#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ -+#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ -+#define ICMP_PARAMPROB 12 /* ip header bad */ -+#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ -+#define ICMP_TSTAMP 13 /* timestamp request */ -+#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ -+#define ICMP_IREQ 15 /* information request */ -+#define ICMP_IREQREPLY 16 /* information reply */ -+#define ICMP_MASKREQ 17 /* address mask request */ -+#define ICMP_MASKREPLY 18 /* address mask reply */ -+ -+#define ICMP_MAXTYPE 18 -+ -+#define ICMP_INFOTYPE(type) \ -+ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ -+ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ -+ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ -+ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ -+ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) -+ -+void icmp_init(Slirp *slirp); -+void icmp_cleanup(Slirp *slirp); -+void icmp_input(struct mbuf *, int); -+void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, -+ const char *message); -+void icmp_reflect(struct mbuf *); -+void icmp_receive(struct socket *so); -+void icmp_detach(struct socket *so); -+ -+#endif -diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c -new file mode 100644 -index 0000000000..7f017a238a ---- /dev/null -+++ b/slirp/src/ip_input.c -@@ -0,0 +1,461 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 -+ * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP are -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+ -+static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); -+static void ip_freef(Slirp *slirp, struct ipq *fp); -+static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev); -+static void ip_deq(register struct ipasfrag *p); -+ -+/* -+ * IP initialization: fill in IP protocol switch table. -+ * All protocols not implemented in kernel go to raw IP protocol handler. -+ */ -+void ip_init(Slirp *slirp) -+{ -+ slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; -+ udp_init(slirp); -+ tcp_init(slirp); -+ icmp_init(slirp); -+} -+ -+void ip_cleanup(Slirp *slirp) -+{ -+ udp_cleanup(slirp); -+ tcp_cleanup(slirp); -+ icmp_cleanup(slirp); -+} -+ -+/* -+ * Ip input routine. Checksum and byte swap header. If fragmented -+ * try to reassemble. Process options. Pass to next level. -+ */ -+void ip_input(struct mbuf *m) -+{ -+ Slirp *slirp = m->slirp; -+ register struct ip *ip; -+ int hlen; -+ -+ if (!slirp->in_enabled) { -+ goto bad; -+ } -+ -+ DEBUG_CALL("ip_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m_len = %d", m->m_len); -+ -+ if (m->m_len < sizeof(struct ip)) { -+ goto bad; -+ } -+ -+ ip = mtod(m, struct ip *); -+ -+ if (ip->ip_v != IPVERSION) { -+ goto bad; -+ } -+ -+ hlen = ip->ip_hl << 2; -+ if (hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ -+ goto bad; /* or packet too short */ -+ } -+ -+ /* keep ip header intact for ICMP reply -+ * ip->ip_sum = cksum(m, hlen); -+ * if (ip->ip_sum) { -+ */ -+ if (cksum(m, hlen)) { -+ goto bad; -+ } -+ -+ /* -+ * Convert fields to host representation. -+ */ -+ NTOHS(ip->ip_len); -+ if (ip->ip_len < hlen) { -+ goto bad; -+ } -+ NTOHS(ip->ip_id); -+ NTOHS(ip->ip_off); -+ -+ /* -+ * Check that the amount of data in the buffers -+ * is as at least much as the IP header would have us expect. -+ * Trim mbufs if longer than we expect. -+ * Drop packet if shorter than we expect. -+ */ -+ if (m->m_len < ip->ip_len) { -+ goto bad; -+ } -+ -+ /* Should drop packet if mbuf too long? hmmm... */ -+ if (m->m_len > ip->ip_len) -+ m_adj(m, ip->ip_len - m->m_len); -+ -+ /* check ip_ttl for a correct ICMP reply */ -+ if (ip->ip_ttl == 0) { -+ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); -+ goto bad; -+ } -+ -+ /* -+ * If offset or IP_MF are set, must reassemble. -+ * Otherwise, nothing need be done. -+ * (We could look in the reassembly queue to see -+ * if the packet was previously fragmented, -+ * but it's not worth the time; just let them time out.) -+ * -+ * XXX This should fail, don't fragment yet -+ */ -+ if (ip->ip_off & ~IP_DF) { -+ register struct ipq *fp; -+ struct qlink *l; -+ /* -+ * Look for queue of fragments -+ * of this datagram. -+ */ -+ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; -+ l = l->next) { -+ fp = container_of(l, struct ipq, ip_link); -+ if (ip->ip_id == fp->ipq_id && -+ ip->ip_src.s_addr == fp->ipq_src.s_addr && -+ ip->ip_dst.s_addr == fp->ipq_dst.s_addr && -+ ip->ip_p == fp->ipq_p) -+ goto found; -+ } -+ fp = NULL; -+ found: -+ -+ /* -+ * Adjust ip_len to not reflect header, -+ * set ip_mff if more fragments are expected, -+ * convert offset of this to bytes. -+ */ -+ ip->ip_len -= hlen; -+ if (ip->ip_off & IP_MF) -+ ip->ip_tos |= 1; -+ else -+ ip->ip_tos &= ~1; -+ -+ ip->ip_off <<= 3; -+ -+ /* -+ * If datagram marked as having more fragments -+ * or if this is not the first fragment, -+ * attempt reassembly; if it succeeds, proceed. -+ */ -+ if (ip->ip_tos & 1 || ip->ip_off) { -+ ip = ip_reass(slirp, ip, fp); -+ if (ip == NULL) -+ return; -+ m = dtom(slirp, ip); -+ } else if (fp) -+ ip_freef(slirp, fp); -+ -+ } else -+ ip->ip_len -= hlen; -+ -+ /* -+ * Switch out to protocol's input routine. -+ */ -+ switch (ip->ip_p) { -+ case IPPROTO_TCP: -+ tcp_input(m, hlen, (struct socket *)NULL, AF_INET); -+ break; -+ case IPPROTO_UDP: -+ udp_input(m, hlen); -+ break; -+ case IPPROTO_ICMP: -+ icmp_input(m, hlen); -+ break; -+ default: -+ m_free(m); -+ } -+ return; -+bad: -+ m_free(m); -+} -+ -+#define iptofrag(P) ((struct ipasfrag *)(((char *)(P)) - sizeof(struct qlink))) -+#define fragtoip(P) ((struct ip *)(((char *)(P)) + sizeof(struct qlink))) -+/* -+ * Take incoming datagram fragment and try to -+ * reassemble it into whole datagram. If a chain for -+ * reassembly of this datagram already exists, then it -+ * is given as fp; otherwise have to make a chain. -+ */ -+static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) -+{ -+ register struct mbuf *m = dtom(slirp, ip); -+ register struct ipasfrag *q; -+ int hlen = ip->ip_hl << 2; -+ int i, next; -+ -+ DEBUG_CALL("ip_reass"); -+ DEBUG_ARG("ip = %p", ip); -+ DEBUG_ARG("fp = %p", fp); -+ DEBUG_ARG("m = %p", m); -+ -+ /* -+ * Presence of header sizes in mbufs -+ * would confuse code below. -+ * Fragment m_data is concatenated. -+ */ -+ m->m_data += hlen; -+ m->m_len -= hlen; -+ -+ /* -+ * If first fragment to arrive, create a reassembly queue. -+ */ -+ if (fp == NULL) { -+ struct mbuf *t = m_get(slirp); -+ -+ if (t == NULL) { -+ goto dropfrag; -+ } -+ fp = mtod(t, struct ipq *); -+ insque(&fp->ip_link, &slirp->ipq.ip_link); -+ fp->ipq_ttl = IPFRAGTTL; -+ fp->ipq_p = ip->ip_p; -+ fp->ipq_id = ip->ip_id; -+ fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; -+ fp->ipq_src = ip->ip_src; -+ fp->ipq_dst = ip->ip_dst; -+ q = (struct ipasfrag *)fp; -+ goto insert; -+ } -+ -+ /* -+ * Find a segment which begins after this one does. -+ */ -+ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; -+ q = q->ipf_next) -+ if (q->ipf_off > ip->ip_off) -+ break; -+ -+ /* -+ * If there is a preceding segment, it may provide some of -+ * our data already. If so, drop the data from the incoming -+ * segment. If it provides all of our data, drop us. -+ */ -+ if (q->ipf_prev != &fp->frag_link) { -+ struct ipasfrag *pq = q->ipf_prev; -+ i = pq->ipf_off + pq->ipf_len - ip->ip_off; -+ if (i > 0) { -+ if (i >= ip->ip_len) -+ goto dropfrag; -+ m_adj(dtom(slirp, ip), i); -+ ip->ip_off += i; -+ ip->ip_len -= i; -+ } -+ } -+ -+ /* -+ * While we overlap succeeding segments trim them or, -+ * if they are completely covered, dequeue them. -+ */ -+ while (q != (struct ipasfrag *)&fp->frag_link && -+ ip->ip_off + ip->ip_len > q->ipf_off) { -+ struct ipasfrag *prev; -+ i = (ip->ip_off + ip->ip_len) - q->ipf_off; -+ if (i < q->ipf_len) { -+ q->ipf_len -= i; -+ q->ipf_off += i; -+ m_adj(dtom(slirp, q), i); -+ break; -+ } -+ prev = q; -+ q = q->ipf_next; -+ ip_deq(prev); -+ m_free(dtom(slirp, prev)); -+ } -+ -+insert: -+ /* -+ * Stick new segment in its place; -+ * check for complete reassembly. -+ */ -+ ip_enq(iptofrag(ip), q->ipf_prev); -+ next = 0; -+ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; -+ q = q->ipf_next) { -+ if (q->ipf_off != next) -+ return NULL; -+ next += q->ipf_len; -+ } -+ if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) -+ return NULL; -+ -+ /* -+ * Reassembly is complete; concatenate fragments. -+ */ -+ q = fp->frag_link.next; -+ m = dtom(slirp, q); -+ int delta = (char *)q - (m->m_flags & M_EXT ? m->m_ext : m->m_dat); -+ -+ q = (struct ipasfrag *)q->ipf_next; -+ while (q != (struct ipasfrag *)&fp->frag_link) { -+ struct mbuf *t = dtom(slirp, q); -+ q = (struct ipasfrag *)q->ipf_next; -+ m_cat(m, t); -+ } -+ -+ /* -+ * Create header for new ip packet by -+ * modifying header of first packet; -+ * dequeue and discard fragment reassembly header. -+ * Make header visible. -+ */ -+ q = fp->frag_link.next; -+ -+ /* -+ * If the fragments concatenated to an mbuf that's bigger than the total -+ * size of the fragment and the mbuf was not already using an m_ext buffer, -+ * then an m_ext buffer was alloced. But fp->ipq_next points to the old -+ * buffer (in the mbuf), so we must point ip into the new buffer. -+ */ -+ if (m->m_flags & M_EXT) { -+ q = (struct ipasfrag *)(m->m_ext + delta); -+ } -+ -+ ip = fragtoip(q); -+ ip->ip_len = next; -+ ip->ip_tos &= ~1; -+ ip->ip_src = fp->ipq_src; -+ ip->ip_dst = fp->ipq_dst; -+ remque(&fp->ip_link); -+ (void)m_free(dtom(slirp, fp)); -+ m->m_len += (ip->ip_hl << 2); -+ m->m_data -= (ip->ip_hl << 2); -+ -+ return ip; -+ -+dropfrag: -+ m_free(m); -+ return NULL; -+} -+ -+/* -+ * Free a fragment reassembly header and all -+ * associated datagrams. -+ */ -+static void ip_freef(Slirp *slirp, struct ipq *fp) -+{ -+ register struct ipasfrag *q, *p; -+ -+ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; -+ q = p) { -+ p = q->ipf_next; -+ ip_deq(q); -+ m_free(dtom(slirp, q)); -+ } -+ remque(&fp->ip_link); -+ (void)m_free(dtom(slirp, fp)); -+} -+ -+/* -+ * Put an ip fragment on a reassembly chain. -+ * Like insque, but pointers in middle of structure. -+ */ -+static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) -+{ -+ DEBUG_CALL("ip_enq"); -+ DEBUG_ARG("prev = %p", prev); -+ p->ipf_prev = prev; -+ p->ipf_next = prev->ipf_next; -+ ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; -+ prev->ipf_next = p; -+} -+ -+/* -+ * To ip_enq as remque is to insque. -+ */ -+static void ip_deq(register struct ipasfrag *p) -+{ -+ ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; -+ ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; -+} -+ -+/* -+ * IP timer processing; -+ * if a timer expires on a reassembly -+ * queue, discard it. -+ */ -+void ip_slowtimo(Slirp *slirp) -+{ -+ struct qlink *l; -+ -+ DEBUG_CALL("ip_slowtimo"); -+ -+ l = slirp->ipq.ip_link.next; -+ -+ if (l == NULL) -+ return; -+ -+ while (l != &slirp->ipq.ip_link) { -+ struct ipq *fp = container_of(l, struct ipq, ip_link); -+ l = l->next; -+ if (--fp->ipq_ttl == 0) { -+ ip_freef(slirp, fp); -+ } -+ } -+} -+ -+/* -+ * Strip out IP options, at higher -+ * level protocol in the kernel. -+ * Second argument is buffer to which options -+ * will be moved, and return value is their length. -+ * (XXX) should be deleted; last arg currently ignored. -+ */ -+void ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) -+{ -+ register int i; -+ struct ip *ip = mtod(m, struct ip *); -+ register char *opts; -+ int olen; -+ -+ olen = (ip->ip_hl << 2) - sizeof(struct ip); -+ opts = (char *)(ip + 1); -+ i = m->m_len - (sizeof(struct ip) + olen); -+ memmove(opts, opts + olen, (unsigned)i); -+ m->m_len -= olen; -+ -+ ip->ip_hl = sizeof(struct ip) >> 2; -+} -diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c -new file mode 100644 -index 0000000000..22916a37df ---- /dev/null -+++ b/slirp/src/ip_output.c -@@ -0,0 +1,169 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 -+ * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP are -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+/* Number of packets queued before we start sending -+ * (to prevent allocing too many mbufs) */ -+#define IF_THRESH 10 -+ -+/* -+ * IP output. The packet in mbuf chain m contains a skeletal IP -+ * header (with len, off, ttl, proto, tos, src, dst). -+ * The mbuf chain containing the packet will be freed. -+ * The mbuf opt, if present, will not be freed. -+ */ -+int ip_output(struct socket *so, struct mbuf *m0) -+{ -+ Slirp *slirp = m0->slirp; -+ register struct ip *ip; -+ register struct mbuf *m = m0; -+ register int hlen = sizeof(struct ip); -+ int len, off, error = 0; -+ -+ DEBUG_CALL("ip_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m0 = %p", m0); -+ -+ ip = mtod(m, struct ip *); -+ /* -+ * Fill in IP header. -+ */ -+ ip->ip_v = IPVERSION; -+ ip->ip_off &= IP_DF; -+ ip->ip_id = htons(slirp->ip_id++); -+ ip->ip_hl = hlen >> 2; -+ -+ /* -+ * If small enough for interface, can just send directly. -+ */ -+ if ((uint16_t)ip->ip_len <= slirp->if_mtu) { -+ ip->ip_len = htons((uint16_t)ip->ip_len); -+ ip->ip_off = htons((uint16_t)ip->ip_off); -+ ip->ip_sum = 0; -+ ip->ip_sum = cksum(m, hlen); -+ -+ if_output(so, m); -+ goto done; -+ } -+ -+ /* -+ * Too large for interface; fragment if possible. -+ * Must be able to put at least 8 bytes per fragment. -+ */ -+ if (ip->ip_off & IP_DF) { -+ error = -1; -+ goto bad; -+ } -+ -+ len = (slirp->if_mtu - hlen) & ~7; /* ip databytes per packet */ -+ if (len < 8) { -+ error = -1; -+ goto bad; -+ } -+ -+ { -+ int mhlen, firstlen = len; -+ struct mbuf **mnext = &m->m_nextpkt; -+ -+ /* -+ * Loop through length of segment after first fragment, -+ * make new header and copy data of each part and link onto chain. -+ */ -+ m0 = m; -+ mhlen = sizeof(struct ip); -+ for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { -+ register struct ip *mhip; -+ m = m_get(slirp); -+ if (m == NULL) { -+ error = -1; -+ goto sendorfree; -+ } -+ m->m_data += IF_MAXLINKHDR; -+ mhip = mtod(m, struct ip *); -+ *mhip = *ip; -+ -+ m->m_len = mhlen; -+ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); -+ if (ip->ip_off & IP_MF) -+ mhip->ip_off |= IP_MF; -+ if (off + len >= (uint16_t)ip->ip_len) -+ len = (uint16_t)ip->ip_len - off; -+ else -+ mhip->ip_off |= IP_MF; -+ mhip->ip_len = htons((uint16_t)(len + mhlen)); -+ -+ if (m_copy(m, m0, off, len) < 0) { -+ error = -1; -+ goto sendorfree; -+ } -+ -+ mhip->ip_off = htons((uint16_t)mhip->ip_off); -+ mhip->ip_sum = 0; -+ mhip->ip_sum = cksum(m, mhlen); -+ *mnext = m; -+ mnext = &m->m_nextpkt; -+ } -+ /* -+ * Update first fragment by trimming what's been copied out -+ * and updating header, then send each fragment (in order). -+ */ -+ m = m0; -+ m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); -+ ip->ip_len = htons((uint16_t)m->m_len); -+ ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); -+ ip->ip_sum = 0; -+ ip->ip_sum = cksum(m, hlen); -+ sendorfree: -+ for (m = m0; m; m = m0) { -+ m0 = m->m_nextpkt; -+ m->m_nextpkt = NULL; -+ if (error == 0) -+ if_output(so, m); -+ else -+ m_free(m); -+ } -+ } -+ -+done: -+ return (error); -+ -+bad: -+ m_free(m0); -+ goto done; -+} -diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in -new file mode 100644 -index 0000000000..faa6c85952 ---- /dev/null -+++ b/slirp/src/libslirp-version.h.in -@@ -0,0 +1,24 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#ifndef LIBSLIRP_VERSION_H_ -+#define LIBSLIRP_VERSION_H_ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define SLIRP_MAJOR_VERSION @SLIRP_MAJOR_VERSION@ -+#define SLIRP_MINOR_VERSION @SLIRP_MINOR_VERSION@ -+#define SLIRP_MICRO_VERSION @SLIRP_MICRO_VERSION@ -+#define SLIRP_VERSION_STRING @SLIRP_VERSION_STRING@ -+ -+#define SLIRP_CHECK_VERSION(major,minor,micro) \ -+ (SLIRP_MAJOR_VERSION > (major) || \ -+ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION > (minor)) || \ -+ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION == (minor) && \ -+ SLIRP_MICRO_VERSION >= (micro))) -+ -+#ifdef __cplusplus -+} /* extern "C" */ -+#endif -+ -+#endif /* LIBSLIRP_VERSION_H_ */ -diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h -new file mode 100644 -index 0000000000..fb4c7e882c ---- /dev/null -+++ b/slirp/src/libslirp.h -@@ -0,0 +1,171 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#ifndef LIBSLIRP_H -+#define LIBSLIRP_H -+ -+#include -+#include -+#include -+ -+#ifdef _WIN32 -+#include -+#include -+#else -+#include -+#include -+#endif -+ -+#include "libslirp-version.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+typedef struct Slirp Slirp; -+ -+enum { -+ SLIRP_POLL_IN = 1 << 0, -+ SLIRP_POLL_OUT = 1 << 1, -+ SLIRP_POLL_PRI = 1 << 2, -+ SLIRP_POLL_ERR = 1 << 3, -+ SLIRP_POLL_HUP = 1 << 4, -+}; -+ -+typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); -+typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); -+typedef void (*SlirpTimerCb)(void *opaque); -+typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); -+typedef int (*SlirpGetREventsCb)(int idx, void *opaque); -+ -+/* -+ * Callbacks from slirp -+ */ -+typedef struct SlirpCb { -+ /* -+ * Send an ethernet frame to the guest network. The opaque -+ * parameter is the one given to slirp_init(). The function -+ * doesn't need to send all the data and may return m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; -+ slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; -+} -+ -+void m_cleanup(Slirp *slirp) -+{ -+ struct mbuf *m, *next; -+ -+ m = (struct mbuf *)slirp->m_usedlist.qh_link; -+ while ((struct quehead *)m != &slirp->m_usedlist) { -+ next = m->m_next; -+ if (m->m_flags & M_EXT) { -+ g_free(m->m_ext); -+ } -+ g_free(m); -+ m = next; -+ } -+ m = (struct mbuf *)slirp->m_freelist.qh_link; -+ while ((struct quehead *)m != &slirp->m_freelist) { -+ next = m->m_next; -+ g_free(m); -+ m = next; -+ } -+} -+ -+/* -+ * Get an mbuf from the free list, if there are none -+ * allocate one -+ * -+ * Because fragmentation can occur if we alloc new mbufs and -+ * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, -+ * which tells m_free to actually g_free() it -+ */ -+struct mbuf *m_get(Slirp *slirp) -+{ -+ register struct mbuf *m; -+ int flags = 0; -+ -+ DEBUG_CALL("m_get"); -+ -+ if (slirp->m_freelist.qh_link == &slirp->m_freelist) { -+ m = g_malloc(SLIRP_MSIZE(slirp->if_mtu)); -+ slirp->mbuf_alloced++; -+ if (slirp->mbuf_alloced > MBUF_THRESH) -+ flags = M_DOFREE; -+ m->slirp = slirp; -+ } else { -+ m = (struct mbuf *)slirp->m_freelist.qh_link; -+ remque(m); -+ } -+ -+ /* Insert it in the used list */ -+ insque(m, &slirp->m_usedlist); -+ m->m_flags = (flags | M_USEDLIST); -+ -+ /* Initialise it */ -+ m->m_size = SLIRP_MSIZE(slirp->if_mtu) - offsetof(struct mbuf, m_dat); -+ m->m_data = m->m_dat; -+ m->m_len = 0; -+ m->m_nextpkt = NULL; -+ m->m_prevpkt = NULL; -+ m->resolution_requested = false; -+ m->expiration_date = (uint64_t)-1; -+ DEBUG_ARG("m = %p", m); -+ return m; -+} -+ -+void m_free(struct mbuf *m) -+{ -+ DEBUG_CALL("m_free"); -+ DEBUG_ARG("m = %p", m); -+ -+ if (m) { -+ /* Remove from m_usedlist */ -+ if (m->m_flags & M_USEDLIST) -+ remque(m); -+ -+ /* If it's M_EXT, free() it */ -+ if (m->m_flags & M_EXT) { -+ g_free(m->m_ext); -+ } -+ /* -+ * Either free() it or put it on the free list -+ */ -+ if (m->m_flags & M_DOFREE) { -+ m->slirp->mbuf_alloced--; -+ g_free(m); -+ } else if ((m->m_flags & M_FREELIST) == 0) { -+ insque(m, &m->slirp->m_freelist); -+ m->m_flags = M_FREELIST; /* Clobber other flags */ -+ } -+ } /* if(m) */ -+} -+ -+/* -+ * Copy data from one mbuf to the end of -+ * the other.. if result is too big for one mbuf, allocate -+ * an M_EXT data segment -+ */ -+void m_cat(struct mbuf *m, struct mbuf *n) -+{ -+ /* -+ * If there's no room, realloc -+ */ -+ if (M_FREEROOM(m) < n->m_len) -+ m_inc(m, m->m_len + n->m_len); -+ -+ memcpy(m->m_data + m->m_len, n->m_data, n->m_len); -+ m->m_len += n->m_len; -+ -+ m_free(n); -+} -+ -+ -+/* make m 'size' bytes large from m_data */ -+void m_inc(struct mbuf *m, int size) -+{ -+ int gapsize; -+ -+ /* some compilers throw up on gotos. This one we can fake. */ -+ if (M_ROOM(m) > size) { -+ return; -+ } -+ -+ if (m->m_flags & M_EXT) { -+ gapsize = m->m_data - m->m_ext; -+ m->m_ext = g_realloc(m->m_ext, size + gapsize); -+ } else { -+ gapsize = m->m_data - m->m_dat; -+ m->m_ext = g_malloc(size + gapsize); -+ memcpy(m->m_ext, m->m_dat, m->m_size); -+ m->m_flags |= M_EXT; -+ } -+ -+ m->m_data = m->m_ext + gapsize; -+ m->m_size = size + gapsize; -+} -+ -+ -+void m_adj(struct mbuf *m, int len) -+{ -+ if (m == NULL) -+ return; -+ if (len >= 0) { -+ /* Trim from head */ -+ m->m_data += len; -+ m->m_len -= len; -+ } else { -+ /* Trim from tail */ -+ len = -len; -+ m->m_len -= len; -+ } -+} -+ -+ -+/* -+ * Copy len bytes from m, starting off bytes into n -+ */ -+int m_copy(struct mbuf *n, struct mbuf *m, int off, int len) -+{ -+ if (len > M_FREEROOM(n)) -+ return -1; -+ -+ memcpy((n->m_data + n->m_len), (m->m_data + off), len); -+ n->m_len += len; -+ return 0; -+} -+ -+ -+/* -+ * Given a pointer into an mbuf, return the mbuf -+ * XXX This is a kludge, I should eliminate the need for it -+ * Fortunately, it's not used often -+ */ -+struct mbuf *dtom(Slirp *slirp, void *dat) -+{ -+ struct mbuf *m; -+ -+ DEBUG_CALL("dtom"); -+ DEBUG_ARG("dat = %p", dat); -+ -+ /* bug corrected for M_EXT buffers */ -+ for (m = (struct mbuf *)slirp->m_usedlist.qh_link; -+ (struct quehead *)m != &slirp->m_usedlist; m = m->m_next) { -+ if (m->m_flags & M_EXT) { -+ if ((char *)dat >= m->m_ext && (char *)dat < (m->m_ext + m->m_size)) -+ return m; -+ } else { -+ if ((char *)dat >= m->m_dat && (char *)dat < (m->m_dat + m->m_size)) -+ return m; -+ } -+ } -+ -+ DEBUG_ERROR("dtom failed"); -+ -+ return (struct mbuf *)0; -+} -diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h -new file mode 100644 -index 0000000000..546e7852c5 ---- /dev/null -+++ b/slirp/src/mbuf.h -@@ -0,0 +1,127 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 -+ * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp -+ */ -+ -+#ifndef MBUF_H -+#define MBUF_H -+ -+/* -+ * Macros for type conversion -+ * mtod(m,t) - convert mbuf pointer to data pointer of correct type -+ */ -+#define mtod(m, t) ((t)(m)->m_data) -+ -+/* XXX About mbufs for slirp: -+ * Only one mbuf is ever used in a chain, for each "cell" of data. -+ * m_nextpkt points to the next packet, if fragmented. -+ * If the data is too large, the M_EXT is used, and a larger block -+ * is alloced. Therefore, m_free[m] must check for M_EXT and if set -+ * free the m_ext. This is inefficient memory-wise, but who cares. -+ */ -+ -+/* -+ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if -+ * M_EXT is set, m_dat otherwise) and the in-use data: -+ * -+ * |--gapsize----->|---m_len-------> -+ * |----------m_size------------------------------> -+ * |----M_ROOM--------------------> -+ * |-M_FREEROOM--> -+ * -+ * ^ ^ ^ -+ * m_dat/m_ext m_data end of buffer -+ */ -+ -+/* -+ * How much room is in the mbuf, from m_data to the end of the mbuf -+ */ -+#define M_ROOM(m) \ -+ ((m->m_flags & M_EXT) ? (((m)->m_ext + (m)->m_size) - (m)->m_data) : \ -+ (((m)->m_dat + (m)->m_size) - (m)->m_data)) -+ -+/* -+ * How much free room there is -+ */ -+#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) -+ -+struct mbuf { -+ /* XXX should union some of these! */ -+ /* header at beginning of each mbuf: */ -+ struct mbuf *m_next; /* Linked list of mbufs */ -+ struct mbuf *m_prev; -+ struct mbuf *m_nextpkt; /* Next packet in queue/record */ -+ struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ -+ int m_flags; /* Misc flags */ -+ -+ int m_size; /* Size of mbuf, from m_dat or m_ext */ -+ struct socket *m_so; -+ -+ char *m_data; /* Current location of data */ -+ int m_len; /* Amount of data in this mbuf, from m_data */ -+ -+ Slirp *slirp; -+ bool resolution_requested; -+ uint64_t expiration_date; -+ char *m_ext; -+ /* start of dynamic buffer area, must be last element */ -+ char m_dat[]; -+}; -+ -+#define ifq_prev m_prev -+#define ifq_next m_next -+#define ifs_prev m_prevpkt -+#define ifs_next m_nextpkt -+#define ifq_so m_so -+ -+#define M_EXT 0x01 /* m_ext points to more (malloced) data */ -+#define M_FREELIST 0x02 /* mbuf is on free list */ -+#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ -+#define M_DOFREE \ -+ 0x08 /* when m_free is called on the mbuf, free() \ -+ * it rather than putting it on the free list */ -+ -+void m_init(Slirp *); -+void m_cleanup(Slirp *slirp); -+struct mbuf *m_get(Slirp *); -+void m_free(struct mbuf *); -+void m_cat(register struct mbuf *, register struct mbuf *); -+void m_inc(struct mbuf *, int); -+void m_adj(struct mbuf *, int); -+int m_copy(struct mbuf *, struct mbuf *, int, int); -+struct mbuf *dtom(Slirp *, void *); -+ -+static inline void ifs_init(struct mbuf *ifm) -+{ -+ ifm->ifs_next = ifm->ifs_prev = ifm; -+} -+ -+#endif -diff --git a/slirp/src/misc.c b/slirp/src/misc.c -new file mode 100644 -index 0000000000..e6bc0a207d ---- /dev/null -+++ b/slirp/src/misc.c -@@ -0,0 +1,390 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+#ifdef G_OS_UNIX -+#include -+#endif -+ -+inline void insque(void *a, void *b) -+{ -+ register struct quehead *element = (struct quehead *)a; -+ register struct quehead *head = (struct quehead *)b; -+ element->qh_link = head->qh_link; -+ head->qh_link = (struct quehead *)element; -+ element->qh_rlink = (struct quehead *)head; -+ ((struct quehead *)(element->qh_link))->qh_rlink = -+ (struct quehead *)element; -+} -+ -+inline void remque(void *a) -+{ -+ register struct quehead *element = (struct quehead *)a; -+ ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; -+ ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; -+ element->qh_rlink = NULL; -+} -+ -+/* TODO: IPv6 */ -+struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, -+ void *opaque, struct in_addr addr, int port) -+{ -+ struct gfwd_list *f = g_new0(struct gfwd_list, 1); -+ -+ f->write_cb = write_cb; -+ f->opaque = opaque; -+ f->ex_fport = port; -+ f->ex_addr = addr; -+ f->ex_next = *ex_ptr; -+ *ex_ptr = f; -+ -+ return f; -+} -+ -+struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, -+ struct in_addr addr, int port) -+{ -+ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); -+ -+ f->ex_exec = g_strdup(cmdline); -+ -+ return f; -+} -+ -+struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, -+ struct in_addr addr, int port) -+{ -+ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); -+ -+ f->ex_unix = g_strdup(unixsock); -+ -+ return f; -+} -+ -+int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port) -+{ -+ for (; *ex_ptr != NULL; ex_ptr = &((*ex_ptr)->ex_next)) { -+ struct gfwd_list *f = *ex_ptr; -+ if (f->ex_addr.s_addr == addr.s_addr && f->ex_fport == port) { -+ *ex_ptr = f->ex_next; -+ g_free(f->ex_exec); -+ g_free(f); -+ return 0; -+ } -+ } -+ return -1; -+} -+ -+static int slirp_socketpair_with_oob(int sv[2]) -+{ -+ struct sockaddr_in addr = { -+ .sin_family = AF_INET, -+ .sin_port = 0, -+ .sin_addr.s_addr = INADDR_ANY, -+ }; -+ socklen_t addrlen = sizeof(addr); -+ int ret, s; -+ -+ sv[1] = -1; -+ s = slirp_socket(AF_INET, SOCK_STREAM, 0); -+ if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || -+ listen(s, 1) < 0 || -+ getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { -+ goto err; -+ } -+ -+ sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); -+ if (sv[1] < 0) { -+ goto err; -+ } -+ /* -+ * This connect won't block because we've already listen()ed on -+ * the server end (even though we won't accept() the connection -+ * until later on). -+ */ -+ do { -+ ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); -+ } while (ret < 0 && errno == EINTR); -+ if (ret < 0) { -+ goto err; -+ } -+ -+ do { -+ sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); -+ } while (sv[0] < 0 && errno == EINTR); -+ if (sv[0] < 0) { -+ goto err; -+ } -+ -+ closesocket(s); -+ return 0; -+ -+err: -+ g_critical("slirp_socketpair(): %s", strerror(errno)); -+ if (s >= 0) { -+ closesocket(s); -+ } -+ if (sv[1] >= 0) { -+ closesocket(sv[1]); -+ } -+ return -1; -+} -+ -+static void fork_exec_child_setup(gpointer data) -+{ -+#ifndef _WIN32 -+ setsid(); -+#endif -+} -+ -+#pragma GCC diagnostic push -+#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -+ -+#if !GLIB_CHECK_VERSION(2, 58, 0) -+typedef struct SlirpGSpawnFds { -+ GSpawnChildSetupFunc child_setup; -+ gpointer user_data; -+ gint stdin_fd; -+ gint stdout_fd; -+ gint stderr_fd; -+} SlirpGSpawnFds; -+ -+static inline void slirp_gspawn_fds_setup(gpointer user_data) -+{ -+ SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; -+ -+ dup2(q->stdin_fd, 0); -+ dup2(q->stdout_fd, 1); -+ dup2(q->stderr_fd, 2); -+ q->child_setup(q->user_data); -+} -+#endif -+ -+static inline gboolean -+g_spawn_async_with_fds_slirp(const gchar *working_directory, gchar **argv, -+ gchar **envp, GSpawnFlags flags, -+ GSpawnChildSetupFunc child_setup, -+ gpointer user_data, GPid *child_pid, gint stdin_fd, -+ gint stdout_fd, gint stderr_fd, GError **error) -+{ -+#if GLIB_CHECK_VERSION(2, 58, 0) -+ return g_spawn_async_with_fds(working_directory, argv, envp, flags, -+ child_setup, user_data, child_pid, stdin_fd, -+ stdout_fd, stderr_fd, error); -+#else -+ SlirpGSpawnFds setup = { -+ .child_setup = child_setup, -+ .user_data = user_data, -+ .stdin_fd = stdin_fd, -+ .stdout_fd = stdout_fd, -+ .stderr_fd = stderr_fd, -+ }; -+ -+ return g_spawn_async(working_directory, argv, envp, flags, -+ slirp_gspawn_fds_setup, &setup, child_pid, error); -+#endif -+} -+ -+#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ -+ g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) -+ -+#pragma GCC diagnostic pop -+ -+int fork_exec(struct socket *so, const char *ex) -+{ -+ GError *err = NULL; -+ gint argc = 0; -+ gchar **argv = NULL; -+ int opt, sp[2]; -+ -+ DEBUG_CALL("fork_exec"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("ex = %p", ex); -+ -+ if (slirp_socketpair_with_oob(sp) < 0) { -+ return 0; -+ } -+ -+ if (!g_shell_parse_argv(ex, &argc, &argv, &err)) { -+ g_critical("fork_exec invalid command: %s\nerror: %s", ex, err->message); -+ g_error_free(err); -+ return 0; -+ } -+ -+ g_spawn_async_with_fds(NULL /* cwd */, argv, NULL /* env */, -+ G_SPAWN_SEARCH_PATH, fork_exec_child_setup, -+ NULL /* data */, NULL /* child_pid */, sp[1], sp[1], -+ sp[1], &err); -+ g_strfreev(argv); -+ -+ if (err) { -+ g_critical("fork_exec: %s", err->message); -+ g_error_free(err); -+ closesocket(sp[0]); -+ closesocket(sp[1]); -+ return 0; -+ } -+ -+ so->s = sp[0]; -+ closesocket(sp[1]); -+ slirp_socket_set_fast_reuse(so->s); -+ opt = 1; -+ setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); -+ slirp_set_nonblock(so->s); -+ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); -+ return 1; -+} -+ -+int open_unix(struct socket *so, const char *unixpath) -+{ -+#ifdef G_OS_UNIX -+ struct sockaddr_un sa; -+ int s; -+ -+ DEBUG_CALL("open_unix"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("unixpath = %s", unixpath); -+ -+ memset(&sa, 0, sizeof(sa)); -+ sa.sun_family = AF_UNIX; -+ if (g_strlcpy(sa.sun_path, unixpath, sizeof(sa.sun_path)) >= sizeof(sa.sun_path)) { -+ g_critical("Bad unix path: %s", unixpath); -+ return 0; -+ } -+ -+ s = slirp_socket(PF_UNIX, SOCK_STREAM, 0); -+ if (s < 0) { -+ g_critical("open_unix(): %s", strerror(errno)); -+ return 0; -+ } -+ -+ if (connect(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { -+ g_critical("open_unix(): %s", strerror(errno)); -+ closesocket(s); -+ return 0; -+ } -+ -+ so->s = s; -+ slirp_set_nonblock(so->s); -+ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); -+ -+ return 1; -+#else -+ g_assert_not_reached(); -+#endif -+} -+ -+char *slirp_connection_info(Slirp *slirp) -+{ -+ GString *str = g_string_new(NULL); -+ const char *const tcpstates[] = { -+ [TCPS_CLOSED] = "CLOSED", [TCPS_LISTEN] = "LISTEN", -+ [TCPS_SYN_SENT] = "SYN_SENT", [TCPS_SYN_RECEIVED] = "SYN_RCVD", -+ [TCPS_ESTABLISHED] = "ESTABLISHED", [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", -+ [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", [TCPS_CLOSING] = "CLOSING", -+ [TCPS_LAST_ACK] = "LAST_ACK", [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", -+ [TCPS_TIME_WAIT] = "TIME_WAIT", -+ }; -+ struct in_addr dst_addr; -+ struct sockaddr_in src; -+ socklen_t src_len; -+ uint16_t dst_port; -+ struct socket *so; -+ const char *state; -+ char buf[20]; -+ -+ g_string_append_printf(str, -+ " Protocol[State] FD Source Address Port " -+ "Dest. Address Port RecvQ SendQ\n"); -+ -+ /* TODO: IPv6 */ -+ -+ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { -+ if (so->so_state & SS_HOSTFWD) { -+ state = "HOST_FORWARD"; -+ } else if (so->so_tcpcb) { -+ state = tcpstates[so->so_tcpcb->t_state]; -+ } else { -+ state = "NONE"; -+ } -+ if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { -+ src_len = sizeof(src); -+ getsockname(so->s, (struct sockaddr *)&src, &src_len); -+ dst_addr = so->so_laddr; -+ dst_port = so->so_lport; -+ } else { -+ src.sin_addr = so->so_laddr; -+ src.sin_port = so->so_lport; -+ dst_addr = so->so_faddr; -+ dst_port = so->so_fport; -+ } -+ slirp_fmt0(buf, sizeof(buf), " TCP[%s]", state); -+ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, -+ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : -+ "*", -+ ntohs(src.sin_port)); -+ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), -+ ntohs(dst_port), so->so_rcv.sb_cc, -+ so->so_snd.sb_cc); -+ } -+ -+ for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { -+ if (so->so_state & SS_HOSTFWD) { -+ slirp_fmt0(buf, sizeof(buf), " UDP[HOST_FORWARD]"); -+ src_len = sizeof(src); -+ getsockname(so->s, (struct sockaddr *)&src, &src_len); -+ dst_addr = so->so_laddr; -+ dst_port = so->so_lport; -+ } else { -+ slirp_fmt0(buf, sizeof(buf), " UDP[%d sec]", -+ (so->so_expire - curtime) / 1000); -+ src.sin_addr = so->so_laddr; -+ src.sin_port = so->so_lport; -+ dst_addr = so->so_faddr; -+ dst_port = so->so_fport; -+ } -+ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, -+ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : -+ "*", -+ ntohs(src.sin_port)); -+ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), -+ ntohs(dst_port), so->so_rcv.sb_cc, -+ so->so_snd.sb_cc); -+ } -+ -+ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { -+ slirp_fmt0(buf, sizeof(buf), " ICMP[%d sec]", -+ (so->so_expire - curtime) / 1000); -+ src.sin_addr = so->so_laddr; -+ dst_addr = so->so_faddr; -+ g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, -+ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : -+ "*"); -+ g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), -+ so->so_rcv.sb_cc, so->so_snd.sb_cc); -+ } -+ -+ return g_string_free(str, FALSE); -+} -+ -+int slirp_bind_outbound(struct socket *so, unsigned short af) -+{ -+ int ret = 0; -+ struct sockaddr *addr = NULL; -+ int addr_size = 0; -+ -+ if (af == AF_INET && so->slirp->outbound_addr != NULL) { -+ addr = (struct sockaddr *)so->slirp->outbound_addr; -+ addr_size = sizeof(struct sockaddr_in); -+ } else if (af == AF_INET6 && so->slirp->outbound_addr6 != NULL) { -+ addr = (struct sockaddr *)so->slirp->outbound_addr6; -+ addr_size = sizeof(struct sockaddr_in6); -+ } -+ -+ if (addr != NULL) { -+ ret = bind(so->s, addr, addr_size); -+ } -+ return ret; -+} -\ No newline at end of file -diff --git a/slirp/src/misc.h b/slirp/src/misc.h -new file mode 100644 -index 0000000000..81b370cfb1 ---- /dev/null -+++ b/slirp/src/misc.h -@@ -0,0 +1,72 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef MISC_H -+#define MISC_H -+ -+#include "libslirp.h" -+ -+struct gfwd_list { -+ SlirpWriteCb write_cb; -+ void *opaque; -+ struct in_addr ex_addr; /* Server address */ -+ int ex_fport; /* Port to telnet to */ -+ char *ex_exec; /* Command line of what to exec */ -+ char *ex_unix; /* unix socket */ -+ struct gfwd_list *ex_next; -+}; -+ -+#define EMU_NONE 0x0 -+ -+/* TCP emulations */ -+#define EMU_CTL 0x1 -+#define EMU_FTP 0x2 -+#define EMU_KSH 0x3 -+#define EMU_IRC 0x4 -+#define EMU_REALAUDIO 0x5 -+#define EMU_RLOGIN 0x6 -+#define EMU_IDENT 0x7 -+ -+#define EMU_NOCONNECT 0x10 /* Don't connect */ -+ -+struct tos_t { -+ uint16_t lport; -+ uint16_t fport; -+ uint8_t tos; -+ uint8_t emu; -+}; -+ -+struct emu_t { -+ uint16_t lport; -+ uint16_t fport; -+ uint8_t tos; -+ uint8_t emu; -+ struct emu_t *next; -+}; -+ -+struct slirp_quehead { -+ struct slirp_quehead *qh_link; -+ struct slirp_quehead *qh_rlink; -+}; -+ -+void slirp_insque(void *, void *); -+void slirp_remque(void *); -+int fork_exec(struct socket *so, const char *ex); -+int open_unix(struct socket *so, const char *unixsock); -+ -+struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, -+ void *opaque, struct in_addr addr, int port); -+ -+struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, -+ struct in_addr addr, int port); -+ -+struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, -+ struct in_addr addr, int port); -+ -+int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port); -+ -+int slirp_bind_outbound(struct socket *so, unsigned short af); -+ -+#endif -diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h -new file mode 100644 -index 0000000000..7795ad83ee ---- /dev/null -+++ b/slirp/src/ncsi-pkt.h -@@ -0,0 +1,445 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright Gavin Shan, IBM Corporation 2016. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#ifndef NCSI_PKT_H -+#define NCSI_PKT_H -+ -+/* from linux/net/ncsi/ncsi-pkt.h */ -+#define __be32 uint32_t -+#define __be16 uint16_t -+ -+struct ncsi_pkt_hdr { -+ unsigned char mc_id; /* Management controller ID */ -+ unsigned char revision; /* NCSI version - 0x01 */ -+ unsigned char reserved; /* Reserved */ -+ unsigned char id; /* Packet sequence number */ -+ unsigned char type; /* Packet type */ -+ unsigned char channel; /* Network controller ID */ -+ __be16 length; /* Payload length */ -+ __be32 reserved1[2]; /* Reserved */ -+}; -+ -+struct ncsi_cmd_pkt_hdr { -+ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ -+}; -+ -+struct ncsi_rsp_pkt_hdr { -+ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ -+ __be16 code; /* Response code */ -+ __be16 reason; /* Response reason */ -+}; -+ -+struct ncsi_aen_pkt_hdr { -+ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ -+ unsigned char reserved2[3]; /* Reserved */ -+ unsigned char type; /* AEN packet type */ -+}; -+ -+/* NCSI common command packet */ -+struct ncsi_cmd_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[26]; -+}; -+ -+struct ncsi_rsp_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Select Package */ -+struct ncsi_cmd_sp_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char hw_arbitration; /* HW arbitration */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Disable Channel */ -+struct ncsi_cmd_dc_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char ald; /* Allow link down */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Reset Channel */ -+struct ncsi_cmd_rc_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 reserved; /* Reserved */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* AEN Enable */ -+struct ncsi_cmd_ae_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char mc_id; /* MC ID */ -+ __be32 mode; /* AEN working mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[18]; -+}; -+ -+/* Set Link */ -+struct ncsi_cmd_sl_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 mode; /* Link working mode */ -+ __be32 oem_mode; /* OEM link mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[18]; -+}; -+ -+/* Set VLAN Filter */ -+struct ncsi_cmd_svf_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be16 reserved; /* Reserved */ -+ __be16 vlan; /* VLAN ID */ -+ __be16 reserved1; /* Reserved */ -+ unsigned char index; /* VLAN table index */ -+ unsigned char enable; /* Enable or disable */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[14]; -+}; -+ -+/* Enable VLAN */ -+struct ncsi_cmd_ev_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char mode; /* VLAN filter mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Set MAC Address */ -+struct ncsi_cmd_sma_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char mac[6]; /* MAC address */ -+ unsigned char index; /* MAC table index */ -+ unsigned char at_e; /* Addr type and operation */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[18]; -+}; -+ -+/* Enable Broadcast Filter */ -+struct ncsi_cmd_ebf_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 mode; /* Filter mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Enable Global Multicast Filter */ -+struct ncsi_cmd_egmf_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ __be32 mode; /* Global MC mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Set NCSI Flow Control */ -+struct ncsi_cmd_snfc_pkt { -+ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char mode; /* Flow control mode */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* Get Link Status */ -+struct ncsi_rsp_gls_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 status; /* Link status */ -+ __be32 other; /* Other indications */ -+ __be32 oem_status; /* OEM link status */ -+ __be32 checksum; -+ unsigned char pad[10]; -+}; -+ -+/* Get Version ID */ -+struct ncsi_rsp_gvi_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 ncsi_version; /* NCSI version */ -+ unsigned char reserved[3]; /* Reserved */ -+ unsigned char alpha2; /* NCSI version */ -+ unsigned char fw_name[12]; /* f/w name string */ -+ __be32 fw_version; /* f/w version */ -+ __be16 pci_ids[4]; /* PCI IDs */ -+ __be32 mf_id; /* Manufacture ID */ -+ __be32 checksum; -+}; -+ -+/* Get Capabilities */ -+struct ncsi_rsp_gc_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 cap; /* Capabilities */ -+ __be32 bc_cap; /* Broadcast cap */ -+ __be32 mc_cap; /* Multicast cap */ -+ __be32 buf_cap; /* Buffering cap */ -+ __be32 aen_cap; /* AEN cap */ -+ unsigned char vlan_cnt; /* VLAN filter count */ -+ unsigned char mixed_cnt; /* Mix filter count */ -+ unsigned char mc_cnt; /* MC filter count */ -+ unsigned char uc_cnt; /* UC filter count */ -+ unsigned char reserved[2]; /* Reserved */ -+ unsigned char vlan_mode; /* VLAN mode */ -+ unsigned char channel_cnt; /* Channel count */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get Parameters */ -+struct ncsi_rsp_gp_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ unsigned char mac_cnt; /* Number of MAC addr */ -+ unsigned char reserved[2]; /* Reserved */ -+ unsigned char mac_enable; /* MAC addr enable flags */ -+ unsigned char vlan_cnt; /* VLAN tag count */ -+ unsigned char reserved1; /* Reserved */ -+ __be16 vlan_enable; /* VLAN tag enable flags */ -+ __be32 link_mode; /* Link setting */ -+ __be32 bc_mode; /* BC filter mode */ -+ __be32 valid_modes; /* Valid mode parameters */ -+ unsigned char vlan_mode; /* VLAN mode */ -+ unsigned char fc_mode; /* Flow control mode */ -+ unsigned char reserved2[2]; /* Reserved */ -+ __be32 aen_mode; /* AEN mode */ -+ unsigned char mac[6]; /* Supported MAC addr */ -+ __be16 vlan; /* Supported VLAN tags */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get Controller Packet Statistics */ -+struct ncsi_rsp_gcps_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 cnt_hi; /* Counter cleared */ -+ __be32 cnt_lo; /* Counter cleared */ -+ __be32 rx_bytes; /* Rx bytes */ -+ __be32 tx_bytes; /* Tx bytes */ -+ __be32 rx_uc_pkts; /* Rx UC packets */ -+ __be32 rx_mc_pkts; /* Rx MC packets */ -+ __be32 rx_bc_pkts; /* Rx BC packets */ -+ __be32 tx_uc_pkts; /* Tx UC packets */ -+ __be32 tx_mc_pkts; /* Tx MC packets */ -+ __be32 tx_bc_pkts; /* Tx BC packets */ -+ __be32 fcs_err; /* FCS errors */ -+ __be32 align_err; /* Alignment errors */ -+ __be32 false_carrier; /* False carrier detection */ -+ __be32 runt_pkts; /* Rx runt packets */ -+ __be32 jabber_pkts; /* Rx jabber packets */ -+ __be32 rx_pause_xon; /* Rx pause XON frames */ -+ __be32 rx_pause_xoff; /* Rx XOFF frames */ -+ __be32 tx_pause_xon; /* Tx XON frames */ -+ __be32 tx_pause_xoff; /* Tx XOFF frames */ -+ __be32 tx_s_collision; /* Single collision frames */ -+ __be32 tx_m_collision; /* Multiple collision frames */ -+ __be32 l_collision; /* Late collision frames */ -+ __be32 e_collision; /* Excessive collision frames */ -+ __be32 rx_ctl_frames; /* Rx control frames */ -+ __be32 rx_64_frames; /* Rx 64-bytes frames */ -+ __be32 rx_127_frames; /* Rx 65-127 bytes frames */ -+ __be32 rx_255_frames; /* Rx 128-255 bytes frames */ -+ __be32 rx_511_frames; /* Rx 256-511 bytes frames */ -+ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ -+ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ -+ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ -+ __be32 tx_64_frames; /* Tx 64-bytes frames */ -+ __be32 tx_127_frames; /* Tx 65-127 bytes frames */ -+ __be32 tx_255_frames; /* Tx 128-255 bytes frames */ -+ __be32 tx_511_frames; /* Tx 256-511 bytes frames */ -+ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ -+ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ -+ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ -+ __be32 rx_valid_bytes; /* Rx valid bytes */ -+ __be32 rx_runt_pkts; /* Rx error runt packets */ -+ __be32 rx_jabber_pkts; /* Rx error jabber packets */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get NCSI Statistics */ -+struct ncsi_rsp_gns_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 rx_cmds; /* Rx NCSI commands */ -+ __be32 dropped_cmds; /* Dropped commands */ -+ __be32 cmd_type_errs; /* Command type errors */ -+ __be32 cmd_csum_errs; /* Command checksum errors */ -+ __be32 rx_pkts; /* Rx NCSI packets */ -+ __be32 tx_pkts; /* Tx NCSI packets */ -+ __be32 tx_aen_pkts; /* Tx AEN packets */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get NCSI Pass-through Statistics */ -+struct ncsi_rsp_gnpts_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 tx_pkts; /* Tx packets */ -+ __be32 tx_dropped; /* Tx dropped packets */ -+ __be32 tx_channel_err; /* Tx channel errors */ -+ __be32 tx_us_err; /* Tx undersize errors */ -+ __be32 rx_pkts; /* Rx packets */ -+ __be32 rx_dropped; /* Rx dropped packets */ -+ __be32 rx_channel_err; /* Rx channel errors */ -+ __be32 rx_us_err; /* Rx undersize errors */ -+ __be32 rx_os_err; /* Rx oversize errors */ -+ __be32 checksum; /* Checksum */ -+}; -+ -+/* Get package status */ -+struct ncsi_rsp_gps_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ __be32 status; /* Hardware arbitration status */ -+ __be32 checksum; -+}; -+ -+/* Get package UUID */ -+struct ncsi_rsp_gpuuid_pkt { -+ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ -+ unsigned char uuid[16]; /* UUID */ -+ __be32 checksum; -+}; -+ -+/* AEN: Link State Change */ -+struct ncsi_aen_lsc_pkt { -+ struct ncsi_aen_pkt_hdr aen; /* AEN header */ -+ __be32 status; /* Link status */ -+ __be32 oem_status; /* OEM link status */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[14]; -+}; -+ -+/* AEN: Configuration Required */ -+struct ncsi_aen_cr_pkt { -+ struct ncsi_aen_pkt_hdr aen; /* AEN header */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[22]; -+}; -+ -+/* AEN: Host Network Controller Driver Status Change */ -+struct ncsi_aen_hncdsc_pkt { -+ struct ncsi_aen_pkt_hdr aen; /* AEN header */ -+ __be32 status; /* Status */ -+ __be32 checksum; /* Checksum */ -+ unsigned char pad[18]; -+}; -+ -+/* NCSI packet revision */ -+#define NCSI_PKT_REVISION 0x01 -+ -+/* NCSI packet commands */ -+#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ -+#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ -+#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ -+#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ -+#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ -+#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ -+#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ -+#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ -+#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ -+#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ -+#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ -+#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ -+#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ -+#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ -+#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ -+#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ -+#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ -+#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ -+#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ -+#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ -+#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ -+#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ -+#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ -+#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ -+#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ -+#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ -+#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ -+#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ -+#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ -+#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ -+ -+/* NCSI packet responses */ -+#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) -+#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) -+#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) -+#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) -+#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) -+#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) -+#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) -+#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) -+#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) -+#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) -+#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) -+#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) -+#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) -+#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) -+#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) -+#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) -+#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) -+#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) -+#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) -+#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) -+#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) -+#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) -+#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) -+#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) -+#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) -+#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) -+#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) -+#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) -+#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) -+#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) -+ -+/* NCSI response code/reason */ -+#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ -+#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ -+#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ -+#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ -+#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ -+#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ -+#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ -+#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ -+#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ -+#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ -+#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ -+ -+/* NCSI AEN packet type */ -+#define NCSI_PKT_AEN 0xFF /* AEN Packet */ -+#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ -+#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ -+#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ -+ -+#endif /* NCSI_PKT_H */ -diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c -new file mode 100644 -index 0000000000..75dcc08356 ---- /dev/null -+++ b/slirp/src/ncsi.c -@@ -0,0 +1,197 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * NC-SI (Network Controller Sideband Interface) "echo" model -+ * -+ * Copyright (C) 2016-2018 IBM Corp. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include "slirp.h" -+ -+#include "ncsi-pkt.h" -+ -+static uint32_t ncsi_calculate_checksum(uint16_t *data, int len) -+{ -+ uint32_t checksum = 0; -+ int i; -+ -+ /* -+ * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet -+ * payload interpreted as a series of 16-bit unsigned integer values. -+ */ -+ for (i = 0; i < len / 2; i++) { -+ checksum += htons(data[i]); -+ } -+ -+ checksum = (~checksum + 1); -+ return checksum; -+} -+ -+/* Get Capabilities */ -+static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) -+{ -+ struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *)rnh; -+ -+ rsp->cap = htonl(~0); -+ rsp->bc_cap = htonl(~0); -+ rsp->mc_cap = htonl(~0); -+ rsp->buf_cap = htonl(~0); -+ rsp->aen_cap = htonl(~0); -+ rsp->vlan_mode = 0xff; -+ rsp->uc_cnt = 2; -+ return 0; -+} -+ -+/* Get Link status */ -+static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) -+{ -+ struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *)rnh; -+ -+ rsp->status = htonl(0x1); -+ return 0; -+} -+ -+/* Get Parameters */ -+static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) -+{ -+ struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *)rnh; -+ -+ /* no MAC address filters or VLAN filters on the channel */ -+ rsp->mac_cnt = 0; -+ rsp->mac_enable = 0; -+ rsp->vlan_cnt = 0; -+ rsp->vlan_enable = 0; -+ -+ return 0; -+} -+ -+static const struct ncsi_rsp_handler { -+ unsigned char type; -+ int payload; -+ int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); -+} ncsi_rsp_handlers[] = { { NCSI_PKT_RSP_CIS, 4, NULL }, -+ { NCSI_PKT_RSP_SP, 4, NULL }, -+ { NCSI_PKT_RSP_DP, 4, NULL }, -+ { NCSI_PKT_RSP_EC, 4, NULL }, -+ { NCSI_PKT_RSP_DC, 4, NULL }, -+ { NCSI_PKT_RSP_RC, 4, NULL }, -+ { NCSI_PKT_RSP_ECNT, 4, NULL }, -+ { NCSI_PKT_RSP_DCNT, 4, NULL }, -+ { NCSI_PKT_RSP_AE, 4, NULL }, -+ { NCSI_PKT_RSP_SL, 4, NULL }, -+ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, -+ { NCSI_PKT_RSP_SVF, 4, NULL }, -+ { NCSI_PKT_RSP_EV, 4, NULL }, -+ { NCSI_PKT_RSP_DV, 4, NULL }, -+ { NCSI_PKT_RSP_SMA, 4, NULL }, -+ { NCSI_PKT_RSP_EBF, 4, NULL }, -+ { NCSI_PKT_RSP_DBF, 4, NULL }, -+ { NCSI_PKT_RSP_EGMF, 4, NULL }, -+ { NCSI_PKT_RSP_DGMF, 4, NULL }, -+ { NCSI_PKT_RSP_SNFC, 4, NULL }, -+ { NCSI_PKT_RSP_GVI, 40, NULL }, -+ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, -+ { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, -+ { NCSI_PKT_RSP_GCPS, 172, NULL }, -+ { NCSI_PKT_RSP_GNS, 172, NULL }, -+ { NCSI_PKT_RSP_GNPTS, 172, NULL }, -+ { NCSI_PKT_RSP_GPS, 8, NULL }, -+ { NCSI_PKT_RSP_OEM, 0, NULL }, -+ { NCSI_PKT_RSP_PLDM, 0, NULL }, -+ { NCSI_PKT_RSP_GPUUID, 20, NULL } }; -+ -+/* -+ * packet format : ncsi header + payload + checksum -+ */ -+#define NCSI_MAX_PAYLOAD 172 -+#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) -+ -+void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) -+{ -+ const struct ncsi_pkt_hdr *nh = -+ (const struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); -+ uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; -+ struct ethhdr *reh = (struct ethhdr *)ncsi_reply; -+ struct ncsi_rsp_pkt_hdr *rnh = -+ (struct ncsi_rsp_pkt_hdr *)(ncsi_reply + ETH_HLEN); -+ const struct ncsi_rsp_handler *handler = NULL; -+ int i; -+ int ncsi_rsp_len = sizeof(*nh); -+ uint32_t checksum; -+ uint32_t *pchecksum; -+ -+ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { -+ return; /* packet too short */ -+ } -+ -+ memset(ncsi_reply, 0, sizeof(ncsi_reply)); -+ -+ memset(reh->h_dest, 0xff, ETH_ALEN); -+ memset(reh->h_source, 0xff, ETH_ALEN); -+ reh->h_proto = htons(ETH_P_NCSI); -+ -+ for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { -+ if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { -+ handler = &ncsi_rsp_handlers[i]; -+ break; -+ } -+ } -+ -+ rnh->common.mc_id = nh->mc_id; -+ rnh->common.revision = NCSI_PKT_REVISION; -+ rnh->common.id = nh->id; -+ rnh->common.type = nh->type + 0x80; -+ rnh->common.channel = nh->channel; -+ -+ if (handler) { -+ rnh->common.length = htons(handler->payload); -+ rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); -+ rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); -+ -+ if (handler->handler) { -+ /* TODO: handle errors */ -+ handler->handler(rnh); -+ } -+ ncsi_rsp_len += handler->payload; -+ } else { -+ rnh->common.length = 0; -+ rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); -+ rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); -+ } -+ -+ /* Add the optional checksum at the end of the frame. */ -+ checksum = ncsi_calculate_checksum((uint16_t *)rnh, ncsi_rsp_len); -+ pchecksum = (uint32_t *)((void *)rnh + ncsi_rsp_len); -+ *pchecksum = htonl(checksum); -+ ncsi_rsp_len += 4; -+ -+ slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); -+} -diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c -new file mode 100644 -index 0000000000..110d6ea0e4 ---- /dev/null -+++ b/slirp/src/ndp_table.c -@@ -0,0 +1,87 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. -+ */ -+ -+#include "slirp.h" -+ -+void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, -+ uint8_t ethaddr[ETH_ALEN]) -+{ -+ char addrstr[INET6_ADDRSTRLEN]; -+ NdpTable *ndp_table = &slirp->ndp_table; -+ int i; -+ -+ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); -+ -+ DEBUG_CALL("ndp_table_add"); -+ DEBUG_ARG("ip = %s", addrstr); -+ DEBUG_ARG("hw addr = %02x:%02x:%02x:%02x:%02x:%02x", ethaddr[0], ethaddr[1], -+ ethaddr[2], ethaddr[3], ethaddr[4], ethaddr[5]); -+ -+ if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { -+ /* Do not register multicast or unspecified addresses */ -+ DEBUG_CALL(" abort: do not register multicast or unspecified address"); -+ return; -+ } -+ -+ /* Search for an entry */ -+ for (i = 0; i < NDP_TABLE_SIZE; i++) { -+ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { -+ DEBUG_CALL(" already in table: update the entry"); -+ /* Update the entry */ -+ memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); -+ return; -+ } -+ } -+ -+ /* No entry found, create a new one */ -+ DEBUG_CALL(" create new entry"); -+ ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; -+ memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, ethaddr, -+ ETH_ALEN); -+ ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; -+} -+ -+bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, -+ uint8_t out_ethaddr[ETH_ALEN]) -+{ -+ char addrstr[INET6_ADDRSTRLEN]; -+ NdpTable *ndp_table = &slirp->ndp_table; -+ int i; -+ -+ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); -+ -+ DEBUG_CALL("ndp_table_search"); -+ DEBUG_ARG("ip = %s", addrstr); -+ -+ assert(!in6_zero(&ip_addr)); -+ -+ /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ -+ if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { -+ out_ethaddr[0] = 0x33; -+ out_ethaddr[1] = 0x33; -+ out_ethaddr[2] = ip_addr.s6_addr[12]; -+ out_ethaddr[3] = ip_addr.s6_addr[13]; -+ out_ethaddr[4] = ip_addr.s6_addr[14]; -+ out_ethaddr[5] = ip_addr.s6_addr[15]; -+ DEBUG_ARG("multicast addr = %02x:%02x:%02x:%02x:%02x:%02x", -+ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], -+ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); -+ return 1; -+ } -+ -+ for (i = 0; i < NDP_TABLE_SIZE; i++) { -+ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { -+ memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); -+ DEBUG_ARG("found hw addr = %02x:%02x:%02x:%02x:%02x:%02x", -+ out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], -+ out_ethaddr[3], out_ethaddr[4], out_ethaddr[5]); -+ return 1; -+ } -+ } -+ -+ DEBUG_CALL(" ip not found in table"); -+ return 0; -+} -diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c -new file mode 100644 -index 0000000000..2fb9176144 ---- /dev/null -+++ b/slirp/src/sbuf.c -@@ -0,0 +1,168 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+static void sbappendsb(struct sbuf *sb, struct mbuf *m); -+ -+void sbfree(struct sbuf *sb) -+{ -+ g_free(sb->sb_data); -+} -+ -+bool sbdrop(struct sbuf *sb, size_t num) -+{ -+ int limit = sb->sb_datalen / 2; -+ -+ g_warn_if_fail(num <= sb->sb_cc); -+ if (num > sb->sb_cc) -+ num = sb->sb_cc; -+ -+ sb->sb_cc -= num; -+ sb->sb_rptr += num; -+ if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) -+ sb->sb_rptr -= sb->sb_datalen; -+ -+ if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { -+ return true; -+ } -+ -+ return false; -+} -+ -+void sbreserve(struct sbuf *sb, size_t size) -+{ -+ sb->sb_wptr = sb->sb_rptr = sb->sb_data = g_realloc(sb->sb_data, size); -+ sb->sb_cc = 0; -+ sb->sb_datalen = size; -+} -+ -+/* -+ * Try and write() to the socket, whatever doesn't get written -+ * append to the buffer... for a host with a fast net connection, -+ * this prevents an unnecessary copy of the data -+ * (the socket is non-blocking, so we won't hang) -+ */ -+void sbappend(struct socket *so, struct mbuf *m) -+{ -+ int ret = 0; -+ -+ DEBUG_CALL("sbappend"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("m->m_len = %d", m->m_len); -+ -+ /* Shouldn't happen, but... e.g. foreign host closes connection */ -+ if (m->m_len <= 0) { -+ m_free(m); -+ return; -+ } -+ -+ /* -+ * If there is urgent data, call sosendoob -+ * if not all was sent, sowrite will take care of the rest -+ * (The rest of this function is just an optimisation) -+ */ -+ if (so->so_urgc) { -+ sbappendsb(&so->so_rcv, m); -+ m_free(m); -+ (void)sosendoob(so); -+ return; -+ } -+ -+ /* -+ * We only write if there's nothing in the buffer, -+ * ottherwise it'll arrive out of order, and hence corrupt -+ */ -+ if (!so->so_rcv.sb_cc) -+ ret = slirp_send(so, m->m_data, m->m_len, 0); -+ -+ if (ret <= 0) { -+ /* -+ * Nothing was written -+ * It's possible that the socket has closed, but -+ * we don't need to check because if it has closed, -+ * it will be detected in the normal way by soread() -+ */ -+ sbappendsb(&so->so_rcv, m); -+ } else if (ret != m->m_len) { -+ /* -+ * Something was written, but not everything.. -+ * sbappendsb the rest -+ */ -+ m->m_len -= ret; -+ m->m_data += ret; -+ sbappendsb(&so->so_rcv, m); -+ } /* else */ -+ /* Whatever happened, we free the mbuf */ -+ m_free(m); -+} -+ -+/* -+ * Copy the data from m into sb -+ * The caller is responsible to make sure there's enough room -+ */ -+static void sbappendsb(struct sbuf *sb, struct mbuf *m) -+{ -+ int len, n, nn; -+ -+ len = m->m_len; -+ -+ if (sb->sb_wptr < sb->sb_rptr) { -+ n = sb->sb_rptr - sb->sb_wptr; -+ if (n > len) -+ n = len; -+ memcpy(sb->sb_wptr, m->m_data, n); -+ } else { -+ /* Do the right edge first */ -+ n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; -+ if (n > len) -+ n = len; -+ memcpy(sb->sb_wptr, m->m_data, n); -+ len -= n; -+ if (len) { -+ /* Now the left edge */ -+ nn = sb->sb_rptr - sb->sb_data; -+ if (nn > len) -+ nn = len; -+ memcpy(sb->sb_data, m->m_data + n, nn); -+ n += nn; -+ } -+ } -+ -+ sb->sb_cc += n; -+ sb->sb_wptr += n; -+ if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) -+ sb->sb_wptr -= sb->sb_datalen; -+} -+ -+/* -+ * Copy data from sbuf to a normal, straight buffer -+ * Don't update the sbuf rptr, this will be -+ * done in sbdrop when the data is acked -+ */ -+void sbcopy(struct sbuf *sb, size_t off, size_t len, char *to) -+{ -+ char *from; -+ -+ g_assert(len + off <= sb->sb_cc); -+ -+ from = sb->sb_rptr + off; -+ if (from >= sb->sb_data + sb->sb_datalen) -+ from -= sb->sb_datalen; -+ -+ if (from < sb->sb_wptr) { -+ memcpy(to, from, len); -+ } else { -+ /* re-use off */ -+ off = (sb->sb_data + sb->sb_datalen) - from; -+ if (off > len) -+ off = len; -+ memcpy(to, from, off); -+ len -= off; -+ if (len) -+ memcpy(to + off, sb->sb_data, len); -+ } -+} -diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h -new file mode 100644 -index 0000000000..01886fbd01 ---- /dev/null -+++ b/slirp/src/sbuf.h -@@ -0,0 +1,27 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef SBUF_H -+#define SBUF_H -+ -+#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) -+ -+struct sbuf { -+ uint32_t sb_cc; /* actual chars in buffer */ -+ uint32_t sb_datalen; /* Length of data */ -+ char *sb_wptr; /* write pointer. points to where the next -+ * bytes should be written in the sbuf */ -+ char *sb_rptr; /* read pointer. points to where the next -+ * byte should be read from the sbuf */ -+ char *sb_data; /* Actual data */ -+}; -+ -+void sbfree(struct sbuf *sb); -+bool sbdrop(struct sbuf *sb, size_t len); -+void sbreserve(struct sbuf *sb, size_t size); -+void sbappend(struct socket *sb, struct mbuf *mb); -+void sbcopy(struct sbuf *sb, size_t off, size_t len, char *p); -+ -+#endif -diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c -new file mode 100644 -index 0000000000..9be58e2add ---- /dev/null -+++ b/slirp/src/slirp.c -@@ -0,0 +1,1189 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * libslirp glue -+ * -+ * Copyright (c) 2004-2008 Fabrice Bellard -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "slirp.h" -+ -+ -+#ifndef _WIN32 -+#include -+#endif -+ -+/* https://gitlab.freedesktop.org/slirp/libslirp/issues/18 */ -+#if defined(__NetBSD__) && defined(if_mtu) -+#undef if_mtu -+#endif -+ -+int slirp_debug; -+ -+/* Define to 1 if you want KEEPALIVE timers */ -+bool slirp_do_keepalive; -+ -+/* host loopback address */ -+struct in_addr loopback_addr; -+/* host loopback network mask */ -+unsigned long loopback_mask; -+ -+/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ -+static const uint8_t special_ethaddr[ETH_ALEN] = { 0x52, 0x55, 0x00, -+ 0x00, 0x00, 0x00 }; -+ -+unsigned curtime; -+ -+static struct in_addr dns_addr; -+#ifndef _WIN32 -+static struct in6_addr dns6_addr; -+#endif -+static unsigned dns_addr_time; -+#ifndef _WIN32 -+static unsigned dns6_addr_time; -+#endif -+ -+#define TIMEOUT_FAST 2 /* milliseconds */ -+#define TIMEOUT_SLOW 499 /* milliseconds */ -+/* for the aging of certain requests like DNS */ -+#define TIMEOUT_DEFAULT 1000 /* milliseconds */ -+ -+#ifdef _WIN32 -+ -+int get_dns_addr(struct in_addr *pdns_addr) -+{ -+ FIXED_INFO *FixedInfo = NULL; -+ ULONG BufLen; -+ DWORD ret; -+ IP_ADDR_STRING *pIPAddr; -+ struct in_addr tmp_addr; -+ -+ if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { -+ *pdns_addr = dns_addr; -+ return 0; -+ } -+ -+ FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); -+ BufLen = sizeof(FIXED_INFO); -+ -+ if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { -+ if (FixedInfo) { -+ GlobalFree(FixedInfo); -+ FixedInfo = NULL; -+ } -+ FixedInfo = GlobalAlloc(GPTR, BufLen); -+ } -+ -+ if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { -+ printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret); -+ if (FixedInfo) { -+ GlobalFree(FixedInfo); -+ FixedInfo = NULL; -+ } -+ return -1; -+ } -+ -+ pIPAddr = &(FixedInfo->DnsServerList); -+ inet_aton(pIPAddr->IpAddress.String, &tmp_addr); -+ *pdns_addr = tmp_addr; -+ dns_addr = tmp_addr; -+ dns_addr_time = curtime; -+ if (FixedInfo) { -+ GlobalFree(FixedInfo); -+ FixedInfo = NULL; -+ } -+ return 0; -+} -+ -+int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) -+{ -+ return -1; -+} -+ -+static void winsock_cleanup(void) -+{ -+ WSACleanup(); -+} -+ -+#else -+ -+static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, -+ socklen_t addrlen, struct stat *cached_stat, -+ unsigned *cached_time) -+{ -+ struct stat old_stat; -+ if (curtime - *cached_time < TIMEOUT_DEFAULT) { -+ memcpy(pdns_addr, cached_addr, addrlen); -+ return 0; -+ } -+ old_stat = *cached_stat; -+ if (stat("/etc/resolv.conf", cached_stat) != 0) { -+ return -1; -+ } -+ if (cached_stat->st_dev == old_stat.st_dev && -+ cached_stat->st_ino == old_stat.st_ino && -+ cached_stat->st_size == old_stat.st_size && -+ cached_stat->st_mtime == old_stat.st_mtime) { -+ memcpy(pdns_addr, cached_addr, addrlen); -+ return 0; -+ } -+ return 1; -+} -+ -+static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, -+ socklen_t addrlen, uint32_t *scope_id, -+ unsigned *cached_time) -+{ -+ char buff[512]; -+ char buff2[257]; -+ FILE *f; -+ int found = 0; -+ void *tmp_addr = alloca(addrlen); -+ unsigned if_index; -+ -+ f = fopen("/etc/resolv.conf", "r"); -+ if (!f) -+ return -1; -+ -+ DEBUG_MISC("IP address of your DNS(s):"); -+ while (fgets(buff, 512, f) != NULL) { -+ if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { -+ char *c = strchr(buff2, '%'); -+ if (c) { -+ if_index = if_nametoindex(c + 1); -+ *c = '\0'; -+ } else { -+ if_index = 0; -+ } -+ -+ if (!inet_pton(af, buff2, tmp_addr)) { -+ continue; -+ } -+ /* If it's the first one, set it to dns_addr */ -+ if (!found) { -+ memcpy(pdns_addr, tmp_addr, addrlen); -+ memcpy(cached_addr, tmp_addr, addrlen); -+ if (scope_id) { -+ *scope_id = if_index; -+ } -+ *cached_time = curtime; -+ } -+ -+ if (++found > 3) { -+ DEBUG_MISC(" (more)"); -+ break; -+ } else if (slirp_debug & DBG_MISC) { -+ char s[INET6_ADDRSTRLEN]; -+ const char *res = inet_ntop(af, tmp_addr, s, sizeof(s)); -+ if (!res) { -+ res = " (string conversion error)"; -+ } -+ DEBUG_MISC(" %s", res); -+ } -+ } -+ } -+ fclose(f); -+ if (!found) -+ return -1; -+ return 0; -+} -+ -+int get_dns_addr(struct in_addr *pdns_addr) -+{ -+ static struct stat dns_addr_stat; -+ -+ if (dns_addr.s_addr != 0) { -+ int ret; -+ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), -+ &dns_addr_stat, &dns_addr_time); -+ if (ret <= 0) { -+ return ret; -+ } -+ } -+ return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, -+ sizeof(dns_addr), NULL, &dns_addr_time); -+} -+ -+int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) -+{ -+ static struct stat dns6_addr_stat; -+ -+ if (!in6_zero(&dns6_addr)) { -+ int ret; -+ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), -+ &dns6_addr_stat, &dns6_addr_time); -+ if (ret <= 0) { -+ return ret; -+ } -+ } -+ return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, -+ sizeof(dns6_addr), scope_id, -+ &dns6_addr_time); -+} -+ -+#endif -+ -+static void slirp_init_once(void) -+{ -+ static int initialized; -+ const char *debug; -+#ifdef _WIN32 -+ WSADATA Data; -+#endif -+ -+ if (initialized) { -+ return; -+ } -+ initialized = 1; -+ -+#ifdef _WIN32 -+ WSAStartup(MAKEWORD(2, 0), &Data); -+ atexit(winsock_cleanup); -+#endif -+ -+ loopback_addr.s_addr = htonl(INADDR_LOOPBACK); -+ loopback_mask = htonl(IN_CLASSA_NET); -+ -+ debug = g_getenv("SLIRP_DEBUG"); -+ if (debug) { -+ const GDebugKey keys[] = { -+ { "call", DBG_CALL }, -+ { "misc", DBG_MISC }, -+ { "error", DBG_ERROR }, -+ { "tftp", DBG_TFTP }, -+ }; -+ slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); -+ } -+} -+ -+Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, void *opaque) -+{ -+ Slirp *slirp; -+ -+ g_return_val_if_fail(cfg != NULL, NULL); -+ g_return_val_if_fail(cfg->version >= SLIRP_CONFIG_VERSION_MIN, NULL); -+ g_return_val_if_fail(cfg->version <= SLIRP_CONFIG_VERSION_MAX, NULL); -+ g_return_val_if_fail(cfg->if_mtu >= IF_MTU_MIN || cfg->if_mtu == 0, NULL); -+ g_return_val_if_fail(cfg->if_mtu <= IF_MTU_MAX, NULL); -+ g_return_val_if_fail(cfg->if_mru >= IF_MRU_MIN || cfg->if_mru == 0, NULL); -+ g_return_val_if_fail(cfg->if_mru <= IF_MRU_MAX, NULL); -+ g_return_val_if_fail(!cfg->bootfile || -+ (strlen(cfg->bootfile) < -+ G_SIZEOF_MEMBER(struct bootp_t, bp_file)), NULL); -+ -+ slirp = g_malloc0(sizeof(Slirp)); -+ -+ slirp_init_once(); -+ -+ slirp->opaque = opaque; -+ slirp->cb = callbacks; -+ slirp->grand = g_rand_new(); -+ slirp->restricted = cfg->restricted; -+ -+ slirp->in_enabled = cfg->in_enabled; -+ slirp->in6_enabled = cfg->in6_enabled; -+ -+ if_init(slirp); -+ ip_init(slirp); -+ ip6_init(slirp); -+ -+ m_init(slirp); -+ -+ slirp->vnetwork_addr = cfg->vnetwork; -+ slirp->vnetwork_mask = cfg->vnetmask; -+ slirp->vhost_addr = cfg->vhost; -+ slirp->vprefix_addr6 = cfg->vprefix_addr6; -+ slirp->vprefix_len = cfg->vprefix_len; -+ slirp->vhost_addr6 = cfg->vhost6; -+ if (cfg->vhostname) { -+ slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), -+ cfg->vhostname); -+ } -+ slirp->tftp_prefix = g_strdup(cfg->tftp_path); -+ slirp->bootp_filename = g_strdup(cfg->bootfile); -+ slirp->vdomainname = g_strdup(cfg->vdomainname); -+ slirp->vdhcp_startaddr = cfg->vdhcp_start; -+ slirp->vnameserver_addr = cfg->vnameserver; -+ slirp->vnameserver_addr6 = cfg->vnameserver6; -+ slirp->tftp_server_name = g_strdup(cfg->tftp_server_name); -+ -+ if (cfg->vdnssearch) { -+ translate_dnssearch(slirp, cfg->vdnssearch); -+ } -+ slirp->if_mtu = cfg->if_mtu == 0 ? IF_MTU_DEFAULT : cfg->if_mtu; -+ slirp->if_mru = cfg->if_mru == 0 ? IF_MRU_DEFAULT : cfg->if_mru; -+ slirp->disable_host_loopback = cfg->disable_host_loopback; -+ slirp->enable_emu = cfg->enable_emu; -+ -+ if (cfg->version >= 2) { -+ slirp->outbound_addr = cfg->outbound_addr; -+ slirp->outbound_addr6 = cfg->outbound_addr6; -+ } else { -+ slirp->outbound_addr = NULL; -+ slirp->outbound_addr6 = NULL; -+ } -+ return slirp; -+} -+ -+Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, -+ struct in_addr vnetmask, struct in_addr vhost, -+ bool in6_enabled, struct in6_addr vprefix_addr6, -+ uint8_t vprefix_len, struct in6_addr vhost6, -+ const char *vhostname, const char *tftp_server_name, -+ const char *tftp_path, const char *bootfile, -+ struct in_addr vdhcp_start, struct in_addr vnameserver, -+ struct in6_addr vnameserver6, const char **vdnssearch, -+ const char *vdomainname, const SlirpCb *callbacks, -+ void *opaque) -+{ -+ SlirpConfig cfg; -+ memset(&cfg, 0, sizeof(cfg)); -+ cfg.version = 1; -+ cfg.restricted = restricted; -+ cfg.in_enabled = in_enabled; -+ cfg.vnetwork = vnetwork; -+ cfg.vnetmask = vnetmask; -+ cfg.vhost = vhost; -+ cfg.in6_enabled = in6_enabled; -+ cfg.vprefix_addr6 = vprefix_addr6; -+ cfg.vprefix_len = vprefix_len; -+ cfg.vhost6 = vhost6; -+ cfg.vhostname = vhostname; -+ cfg.tftp_server_name = tftp_server_name; -+ cfg.tftp_path = tftp_path; -+ cfg.bootfile = bootfile; -+ cfg.vdhcp_start = vdhcp_start; -+ cfg.vnameserver = vnameserver; -+ cfg.vnameserver6 = vnameserver6; -+ cfg.vdnssearch = vdnssearch; -+ cfg.vdomainname = vdomainname; -+ return slirp_new(&cfg, callbacks, opaque); -+} -+ -+void slirp_cleanup(Slirp *slirp) -+{ -+ struct gfwd_list *e, *next; -+ -+ for (e = slirp->guestfwd_list; e; e = next) { -+ next = e->ex_next; -+ g_free(e->ex_exec); -+ g_free(e->ex_unix); -+ g_free(e); -+ } -+ -+ ip_cleanup(slirp); -+ ip6_cleanup(slirp); -+ m_cleanup(slirp); -+ -+ g_rand_free(slirp->grand); -+ -+ g_free(slirp->vdnssearch); -+ g_free(slirp->tftp_prefix); -+ g_free(slirp->bootp_filename); -+ g_free(slirp->vdomainname); -+ g_free(slirp); -+} -+ -+#define CONN_CANFSEND(so) \ -+ (((so)->so_state & (SS_FCANTSENDMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) -+#define CONN_CANFRCV(so) \ -+ (((so)->so_state & (SS_FCANTRCVMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) -+ -+static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) -+{ -+ uint32_t t; -+ -+ if (*timeout <= TIMEOUT_FAST) { -+ return; -+ } -+ -+ t = MIN(1000, *timeout); -+ -+ /* If we have tcp timeout with slirp, then we will fill @timeout with -+ * more precise value. -+ */ -+ if (slirp->time_fasttimo) { -+ *timeout = TIMEOUT_FAST; -+ return; -+ } -+ if (slirp->do_slowtimo) { -+ t = MIN(TIMEOUT_SLOW, t); -+ } -+ *timeout = t; -+} -+ -+void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, -+ SlirpAddPollCb add_poll, void *opaque) -+{ -+ struct socket *so, *so_next; -+ -+ /* -+ * First, TCP sockets -+ */ -+ -+ /* -+ * *_slowtimo needs calling if there are IP fragments -+ * in the fragment queue, or there are TCP connections active -+ */ -+ slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || -+ (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); -+ -+ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { -+ int events = 0; -+ -+ so_next = so->so_next; -+ -+ so->pollfds_idx = -1; -+ -+ /* -+ * See if we need a tcp_fasttimo -+ */ -+ if (slirp->time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) { -+ slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ -+ } -+ -+ /* -+ * NOFDREF can include still connecting to local-host, -+ * newly socreated() sockets etc. Don't want to select these. -+ */ -+ if (so->so_state & SS_NOFDREF || so->s == -1) { -+ continue; -+ } -+ -+ /* -+ * Set for reading sockets which are accepting -+ */ -+ if (so->so_state & SS_FACCEPTCONN) { -+ so->pollfds_idx = add_poll( -+ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); -+ continue; -+ } -+ -+ /* -+ * Set for writing sockets which are connecting -+ */ -+ if (so->so_state & SS_ISFCONNECTING) { -+ so->pollfds_idx = -+ add_poll(so->s, SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); -+ continue; -+ } -+ -+ /* -+ * Set for writing if we are connected, can send more, and -+ * we have something to send -+ */ -+ if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { -+ events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; -+ } -+ -+ /* -+ * Set for reading (and urgent data) if we are connected, can -+ * receive more, and we have room for it XXX /2 ? -+ */ -+ if (CONN_CANFRCV(so) && -+ (so->so_snd.sb_cc < (so->so_snd.sb_datalen / 2))) { -+ events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | -+ SLIRP_POLL_PRI; -+ } -+ -+ if (events) { -+ so->pollfds_idx = add_poll(so->s, events, opaque); -+ } -+ } -+ -+ /* -+ * UDP sockets -+ */ -+ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { -+ so_next = so->so_next; -+ -+ so->pollfds_idx = -1; -+ -+ /* -+ * See if it's timed out -+ */ -+ if (so->so_expire) { -+ if (so->so_expire <= curtime) { -+ udp_detach(so); -+ continue; -+ } else { -+ slirp->do_slowtimo = true; /* Let socket expire */ -+ } -+ } -+ -+ /* -+ * When UDP packets are received from over the -+ * link, they're sendto()'d straight away, so -+ * no need for setting for writing -+ * Limit the number of packets queued by this session -+ * to 4. Note that even though we try and limit this -+ * to 4 packets, the session could have more queued -+ * if the packets needed to be fragmented -+ * (XXX <= 4 ?) -+ */ -+ if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { -+ so->pollfds_idx = add_poll( -+ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); -+ } -+ } -+ -+ /* -+ * ICMP sockets -+ */ -+ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { -+ so_next = so->so_next; -+ -+ so->pollfds_idx = -1; -+ -+ /* -+ * See if it's timed out -+ */ -+ if (so->so_expire) { -+ if (so->so_expire <= curtime) { -+ icmp_detach(so); -+ continue; -+ } else { -+ slirp->do_slowtimo = true; /* Let socket expire */ -+ } -+ } -+ -+ if (so->so_state & SS_ISFCONNECTED) { -+ so->pollfds_idx = add_poll( -+ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); -+ } -+ } -+ -+ slirp_update_timeout(slirp, timeout); -+} -+ -+void slirp_pollfds_poll(Slirp *slirp, int select_error, -+ SlirpGetREventsCb get_revents, void *opaque) -+{ -+ struct socket *so, *so_next; -+ int ret; -+ -+ curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; -+ -+ /* -+ * See if anything has timed out -+ */ -+ if (slirp->time_fasttimo && -+ ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { -+ tcp_fasttimo(slirp); -+ slirp->time_fasttimo = 0; -+ } -+ if (slirp->do_slowtimo && -+ ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { -+ ip_slowtimo(slirp); -+ tcp_slowtimo(slirp); -+ slirp->last_slowtimo = curtime; -+ } -+ -+ /* -+ * Check sockets -+ */ -+ if (!select_error) { -+ /* -+ * Check TCP sockets -+ */ -+ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { -+ int revents; -+ -+ so_next = so->so_next; -+ -+ revents = 0; -+ if (so->pollfds_idx != -1) { -+ revents = get_revents(so->pollfds_idx, opaque); -+ } -+ -+ if (so->so_state & SS_NOFDREF || so->s == -1) { -+ continue; -+ } -+ -+ /* -+ * Check for URG data -+ * This will soread as well, so no need to -+ * test for SLIRP_POLL_IN below if this succeeds -+ */ -+ if (revents & SLIRP_POLL_PRI) { -+ ret = sorecvoob(so); -+ if (ret < 0) { -+ /* Socket error might have resulted in the socket being -+ * removed, do not try to do anything more with it. */ -+ continue; -+ } -+ } -+ /* -+ * Check sockets for reading -+ */ -+ else if (revents & -+ (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR)) { -+ /* -+ * Check for incoming connections -+ */ -+ if (so->so_state & SS_FACCEPTCONN) { -+ tcp_connect(so); -+ continue; -+ } /* else */ -+ ret = soread(so); -+ -+ /* Output it if we read something */ -+ if (ret > 0) { -+ tcp_output(sototcpcb(so)); -+ } -+ if (ret < 0) { -+ /* Socket error might have resulted in the socket being -+ * removed, do not try to do anything more with it. */ -+ continue; -+ } -+ } -+ -+ /* -+ * Check sockets for writing -+ */ -+ if (!(so->so_state & SS_NOFDREF) && -+ (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { -+ /* -+ * Check for non-blocking, still-connecting sockets -+ */ -+ if (so->so_state & SS_ISFCONNECTING) { -+ /* Connected */ -+ so->so_state &= ~SS_ISFCONNECTING; -+ -+ ret = send(so->s, (const void *)&ret, 0, 0); -+ if (ret < 0) { -+ /* XXXXX Must fix, zero bytes is a NOP */ -+ if (errno == EAGAIN || errno == EWOULDBLOCK || -+ errno == EINPROGRESS || errno == ENOTCONN) { -+ continue; -+ } -+ -+ /* else failed */ -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_NOFDREF; -+ } -+ /* else so->so_state &= ~SS_ISFCONNECTING; */ -+ -+ /* -+ * Continue tcp_input -+ */ -+ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, -+ so->so_ffamily); -+ /* continue; */ -+ } else { -+ ret = sowrite(so); -+ if (ret > 0) { -+ /* Call tcp_output in case we need to send a window -+ * update to the guest, otherwise it will be stuck -+ * until it sends a window probe. */ -+ tcp_output(sototcpcb(so)); -+ } -+ } -+ } -+ } -+ -+ /* -+ * Now UDP sockets. -+ * Incoming packets are sent straight away, they're not buffered. -+ * Incoming UDP data isn't buffered either. -+ */ -+ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { -+ int revents; -+ -+ so_next = so->so_next; -+ -+ revents = 0; -+ if (so->pollfds_idx != -1) { -+ revents = get_revents(so->pollfds_idx, opaque); -+ } -+ -+ if (so->s != -1 && -+ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { -+ sorecvfrom(so); -+ } -+ } -+ -+ /* -+ * Check incoming ICMP relies. -+ */ -+ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { -+ int revents; -+ -+ so_next = so->so_next; -+ -+ revents = 0; -+ if (so->pollfds_idx != -1) { -+ revents = get_revents(so->pollfds_idx, opaque); -+ } -+ -+ if (so->s != -1 && -+ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { -+ icmp_receive(so); -+ } -+ } -+ } -+ -+ if_start(slirp); -+} -+ -+static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) -+{ -+ const struct slirp_arphdr *ah = -+ (const struct slirp_arphdr *)(pkt + ETH_HLEN); -+ uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; -+ struct ethhdr *reh = (struct ethhdr *)arp_reply; -+ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); -+ int ar_op; -+ struct gfwd_list *ex_ptr; -+ -+ if (!slirp->in_enabled) { -+ return; -+ } -+ -+ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { -+ return; /* packet too short */ -+ } -+ -+ ar_op = ntohs(ah->ar_op); -+ switch (ar_op) { -+ case ARPOP_REQUEST: -+ if (ah->ar_tip == ah->ar_sip) { -+ /* Gratuitous ARP */ -+ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); -+ return; -+ } -+ -+ if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == -+ slirp->vnetwork_addr.s_addr) { -+ if (ah->ar_tip == slirp->vnameserver_addr.s_addr || -+ ah->ar_tip == slirp->vhost_addr.s_addr) -+ goto arp_ok; -+ /* TODO: IPv6 */ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; -+ ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->ex_addr.s_addr == ah->ar_tip) -+ goto arp_ok; -+ } -+ return; -+ arp_ok: -+ memset(arp_reply, 0, sizeof(arp_reply)); -+ -+ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); -+ -+ /* ARP request for alias/dns mac address */ -+ memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); -+ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); -+ memcpy(&reh->h_source[2], &ah->ar_tip, 4); -+ reh->h_proto = htons(ETH_P_ARP); -+ -+ rah->ar_hrd = htons(1); -+ rah->ar_pro = htons(ETH_P_IP); -+ rah->ar_hln = ETH_ALEN; -+ rah->ar_pln = 4; -+ rah->ar_op = htons(ARPOP_REPLY); -+ memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); -+ rah->ar_sip = ah->ar_tip; -+ memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); -+ rah->ar_tip = ah->ar_sip; -+ slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); -+ } -+ break; -+ case ARPOP_REPLY: -+ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); -+ break; -+ default: -+ break; -+ } -+} -+ -+void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) -+{ -+ struct mbuf *m; -+ int proto; -+ -+ if (pkt_len < ETH_HLEN) -+ return; -+ -+ proto = (((uint16_t)pkt[12]) << 8) + pkt[13]; -+ switch (proto) { -+ case ETH_P_ARP: -+ arp_input(slirp, pkt, pkt_len); -+ break; -+ case ETH_P_IP: -+ case ETH_P_IPV6: -+ m = m_get(slirp); -+ if (!m) -+ return; -+ /* Note: we add 2 to align the IP header on 4 bytes, -+ * and add the margin for the tcpiphdr overhead */ -+ if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { -+ m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); -+ } -+ m->m_len = pkt_len + TCPIPHDR_DELTA + 2; -+ memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); -+ -+ m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; -+ m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; -+ -+ if (proto == ETH_P_IP) { -+ ip_input(m); -+ } else if (proto == ETH_P_IPV6) { -+ ip6_input(m); -+ } -+ break; -+ -+ case ETH_P_NCSI: -+ ncsi_input(slirp, pkt, pkt_len); -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no -+ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet -+ * is ready to go. -+ */ -+static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, -+ uint8_t ethaddr[ETH_ALEN]) -+{ -+ const struct ip *iph = (const struct ip *)ifm->m_data; -+ -+ if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { -+ uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; -+ struct ethhdr *reh = (struct ethhdr *)arp_req; -+ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); -+ -+ if (!ifm->resolution_requested) { -+ /* If the client addr is not known, send an ARP request */ -+ memset(reh->h_dest, 0xff, ETH_ALEN); -+ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); -+ memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); -+ reh->h_proto = htons(ETH_P_ARP); -+ rah->ar_hrd = htons(1); -+ rah->ar_pro = htons(ETH_P_IP); -+ rah->ar_hln = ETH_ALEN; -+ rah->ar_pln = 4; -+ rah->ar_op = htons(ARPOP_REQUEST); -+ -+ /* source hw addr */ -+ memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); -+ memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); -+ -+ /* source IP */ -+ rah->ar_sip = slirp->vhost_addr.s_addr; -+ -+ /* target hw addr (none) */ -+ memset(rah->ar_tha, 0, ETH_ALEN); -+ -+ /* target IP */ -+ rah->ar_tip = iph->ip_dst.s_addr; -+ slirp->client_ipaddr = iph->ip_dst; -+ slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); -+ ifm->resolution_requested = true; -+ -+ /* Expire request and drop outgoing packet after 1 second */ -+ ifm->expiration_date = -+ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; -+ } -+ return 0; -+ } else { -+ memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); -+ /* XXX: not correct */ -+ memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); -+ eh->h_proto = htons(ETH_P_IP); -+ -+ /* Send this */ -+ return 2; -+ } -+} -+ -+/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no -+ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet -+ * is ready to go. -+ */ -+static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, -+ uint8_t ethaddr[ETH_ALEN]) -+{ -+ const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); -+ if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { -+ if (!ifm->resolution_requested) { -+ ndp_send_ns(slirp, ip6h->ip_dst); -+ ifm->resolution_requested = true; -+ ifm->expiration_date = -+ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; -+ } -+ return 0; -+ } else { -+ eh->h_proto = htons(ETH_P_IPV6); -+ in6_compute_ethaddr(ip6h->ip_src, eh->h_source); -+ -+ /* Send this */ -+ return 2; -+ } -+} -+ -+/* Output the IP packet to the ethernet device. Returns 0 if the packet must be -+ * re-queued. -+ */ -+int if_encap(Slirp *slirp, struct mbuf *ifm) -+{ -+ uint8_t buf[IF_MTU_MAX + 100]; -+ struct ethhdr *eh = (struct ethhdr *)buf; -+ uint8_t ethaddr[ETH_ALEN]; -+ const struct ip *iph = (const struct ip *)ifm->m_data; -+ int ret; -+ -+ if (ifm->m_len + ETH_HLEN > sizeof(buf)) { -+ return 1; -+ } -+ -+ switch (iph->ip_v) { -+ case IPVERSION: -+ ret = if_encap4(slirp, ifm, eh, ethaddr); -+ if (ret < 2) { -+ return ret; -+ } -+ break; -+ -+ case IP6VERSION: -+ ret = if_encap6(slirp, ifm, eh, ethaddr); -+ if (ret < 2) { -+ return ret; -+ } -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ } -+ -+ memcpy(eh->h_dest, ethaddr, ETH_ALEN); -+ DEBUG_ARG("src = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_source[0], -+ eh->h_source[1], eh->h_source[2], eh->h_source[3], -+ eh->h_source[4], eh->h_source[5]); -+ DEBUG_ARG("dst = %02x:%02x:%02x:%02x:%02x:%02x", eh->h_dest[0], -+ eh->h_dest[1], eh->h_dest[2], eh->h_dest[3], eh->h_dest[4], -+ eh->h_dest[5]); -+ memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); -+ slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); -+ return 1; -+} -+ -+/* Drop host forwarding rule, return 0 if found. */ -+/* TODO: IPv6 */ -+int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, -+ int host_port) -+{ -+ struct socket *so; -+ struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); -+ struct sockaddr_in addr; -+ int port = htons(host_port); -+ socklen_t addr_len; -+ -+ for (so = head->so_next; so != head; so = so->so_next) { -+ addr_len = sizeof(addr); -+ if ((so->so_state & SS_HOSTFWD) && -+ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && -+ addr.sin_addr.s_addr == host_addr.s_addr && addr.sin_port == port) { -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ sofree(so); -+ return 0; -+ } -+ } -+ -+ return -1; -+} -+ -+/* TODO: IPv6 */ -+int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, -+ int host_port, struct in_addr guest_addr, int guest_port) -+{ -+ if (!guest_addr.s_addr) { -+ guest_addr = slirp->vdhcp_startaddr; -+ } -+ if (is_udp) { -+ if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), -+ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) -+ return -1; -+ } else { -+ if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), -+ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) -+ return -1; -+ } -+ return 0; -+} -+ -+/* TODO: IPv6 */ -+static bool check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, -+ int guest_port) -+{ -+ struct gfwd_list *tmp_ptr; -+ -+ if (!guest_addr->s_addr) { -+ guest_addr->s_addr = slirp->vnetwork_addr.s_addr | -+ (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); -+ } -+ if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != -+ slirp->vnetwork_addr.s_addr || -+ guest_addr->s_addr == slirp->vhost_addr.s_addr || -+ guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { -+ return false; -+ } -+ -+ /* check if the port is "bound" */ -+ for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { -+ if (guest_port == tmp_ptr->ex_fport && -+ guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) -+ return false; -+ } -+ -+ return true; -+} -+ -+int slirp_add_exec(Slirp *slirp, const char *cmdline, -+ struct in_addr *guest_addr, int guest_port) -+{ -+ if (!check_guestfwd(slirp, guest_addr, guest_port)) { -+ return -1; -+ } -+ -+ add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); -+ return 0; -+} -+ -+int slirp_add_unix(Slirp *slirp, const char *unixsock, -+ struct in_addr *guest_addr, int guest_port) -+{ -+#ifdef G_OS_UNIX -+ if (!check_guestfwd(slirp, guest_addr, guest_port)) { -+ return -1; -+ } -+ -+ add_unix(&slirp->guestfwd_list, unixsock, *guest_addr, htons(guest_port)); -+ return 0; -+#else -+ g_warn_if_reached(); -+ return -1; -+#endif -+} -+ -+int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, -+ struct in_addr *guest_addr, int guest_port) -+{ -+ if (!check_guestfwd(slirp, guest_addr, guest_port)) { -+ return -1; -+ } -+ -+ add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, *guest_addr, -+ htons(guest_port)); -+ return 0; -+} -+ -+int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, -+ int guest_port) -+{ -+ return remove_guestfwd(&slirp->guestfwd_list, guest_addr, -+ htons(guest_port)); -+} -+ -+ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) -+{ -+ if (so->s == -1 && so->guestfwd) { -+ /* XXX this blocks entire thread. Rewrite to use -+ * qemu_chr_fe_write and background I/O callbacks */ -+ so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); -+ return len; -+ } -+ -+ if (so->s == -1) { -+ /* -+ * This should in theory not happen but it is hard to be -+ * sure because some code paths will end up with so->s == -1 -+ * on a failure but don't dispose of the struct socket. -+ * Check specifically, so we don't pass -1 to send(). -+ */ -+ errno = EBADF; -+ return -1; -+ } -+ -+ return send(so->s, buf, len, flags); -+} -+ -+struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, -+ int guest_port) -+{ -+ struct socket *so; -+ -+ /* TODO: IPv6 */ -+ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { -+ if (so->so_faddr.s_addr == guest_addr.s_addr && -+ htons(so->so_fport) == guest_port) { -+ return so; -+ } -+ } -+ return NULL; -+} -+ -+size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, -+ int guest_port) -+{ -+ struct iovec iov[2]; -+ struct socket *so; -+ -+ so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); -+ -+ if (!so || so->so_state & SS_NOFDREF) { -+ return 0; -+ } -+ -+ if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen / 2)) { -+ return 0; -+ } -+ -+ return sopreprbuf(so, iov, NULL); -+} -+ -+void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, -+ const uint8_t *buf, int size) -+{ -+ int ret; -+ struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); -+ -+ if (!so) -+ return; -+ -+ ret = soreadbuf(so, (const char *)buf, size); -+ -+ if (ret > 0) -+ tcp_output(sototcpcb(so)); -+} -+ -+void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) -+{ -+ ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); -+ -+ if (ret < 0) { -+ g_critical("Failed to send packet, ret: %ld", (long)ret); -+ } else if (ret < len) { -+ DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", (long)ret, -+ (unsigned long)len); -+ } -+} -diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h -new file mode 100644 -index 0000000000..763a65b9ef ---- /dev/null -+++ b/slirp/src/slirp.h -@@ -0,0 +1,284 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#ifndef SLIRP_H -+#define SLIRP_H -+ -+#ifdef _WIN32 -+ -+/* as defined in sdkddkver.h */ -+#ifndef _WIN32_WINNT -+#define _WIN32_WINNT 0x0600 /* Vista */ -+#endif -+/* reduces the number of implicitly included headers */ -+#ifndef WIN32_LEAN_AND_MEAN -+#define WIN32_LEAN_AND_MEAN -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+ -+#else -+#if !defined(__HAIKU__) -+#define O_BINARY 0 -+#endif -+#endif -+ -+#ifndef _WIN32 -+#include -+#include -+#include -+#include -+#include -+#endif -+ -+#ifdef __APPLE__ -+#include -+#endif -+ -+/* Avoid conflicting with the libc insque() and remque(), which -+ have different prototypes. */ -+#define insque slirp_insque -+#define remque slirp_remque -+#define quehead slirp_quehead -+ -+#include "debug.h" -+#include "util.h" -+ -+#include "libslirp.h" -+#include "ip.h" -+#include "ip6.h" -+#include "tcp.h" -+#include "tcp_timer.h" -+#include "tcp_var.h" -+#include "tcpip.h" -+#include "udp.h" -+#include "ip_icmp.h" -+#include "ip6_icmp.h" -+#include "mbuf.h" -+#include "sbuf.h" -+#include "socket.h" -+#include "if.h" -+#include "main.h" -+#include "misc.h" -+ -+#include "bootp.h" -+#include "tftp.h" -+ -+#define ARPOP_REQUEST 1 /* ARP request */ -+#define ARPOP_REPLY 2 /* ARP reply */ -+ -+struct ethhdr { -+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ -+ unsigned char h_source[ETH_ALEN]; /* source ether addr */ -+ unsigned short h_proto; /* packet type ID field */ -+}; -+ -+struct slirp_arphdr { -+ unsigned short ar_hrd; /* format of hardware address */ -+ unsigned short ar_pro; /* format of protocol address */ -+ unsigned char ar_hln; /* length of hardware address */ -+ unsigned char ar_pln; /* length of protocol address */ -+ unsigned short ar_op; /* ARP opcode (command) */ -+ -+ /* -+ * Ethernet looks like this : This bit is variable sized however... -+ */ -+ unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ -+ uint32_t ar_sip; /* sender IP address */ -+ unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ -+ uint32_t ar_tip; /* target IP address */ -+} SLIRP_PACKED; -+ -+#define ARP_TABLE_SIZE 16 -+ -+typedef struct ArpTable { -+ struct slirp_arphdr table[ARP_TABLE_SIZE]; -+ int next_victim; -+} ArpTable; -+ -+void arp_table_add(Slirp *slirp, uint32_t ip_addr, -+ const uint8_t ethaddr[ETH_ALEN]); -+ -+bool arp_table_search(Slirp *slirp, uint32_t ip_addr, -+ uint8_t out_ethaddr[ETH_ALEN]); -+ -+struct ndpentry { -+ unsigned char eth_addr[ETH_ALEN]; /* sender hardware address */ -+ struct in6_addr ip_addr; /* sender IP address */ -+}; -+ -+#define NDP_TABLE_SIZE 16 -+ -+typedef struct NdpTable { -+ struct ndpentry table[NDP_TABLE_SIZE]; -+ int next_victim; -+} NdpTable; -+ -+void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, -+ uint8_t ethaddr[ETH_ALEN]); -+bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, -+ uint8_t out_ethaddr[ETH_ALEN]); -+ -+struct Slirp { -+ unsigned time_fasttimo; -+ unsigned last_slowtimo; -+ bool do_slowtimo; -+ -+ bool in_enabled, in6_enabled; -+ -+ /* virtual network configuration */ -+ struct in_addr vnetwork_addr; -+ struct in_addr vnetwork_mask; -+ struct in_addr vhost_addr; -+ struct in6_addr vprefix_addr6; -+ uint8_t vprefix_len; -+ struct in6_addr vhost_addr6; -+ struct in_addr vdhcp_startaddr; -+ struct in_addr vnameserver_addr; -+ struct in6_addr vnameserver_addr6; -+ -+ struct in_addr client_ipaddr; -+ char client_hostname[33]; -+ -+ int restricted; -+ struct gfwd_list *guestfwd_list; -+ -+ int if_mtu; -+ int if_mru; -+ -+ bool disable_host_loopback; -+ -+ /* mbuf states */ -+ struct quehead m_freelist; -+ struct quehead m_usedlist; -+ int mbuf_alloced; -+ -+ /* if states */ -+ struct quehead if_fastq; /* fast queue (for interactive data) */ -+ struct quehead if_batchq; /* queue for non-interactive data */ -+ bool if_start_busy; /* avoid if_start recursion */ -+ -+ /* ip states */ -+ struct ipq ipq; /* ip reass. queue */ -+ uint16_t ip_id; /* ip packet ctr, for ids */ -+ -+ /* bootp/dhcp states */ -+ BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; -+ char *bootp_filename; -+ size_t vdnssearch_len; -+ uint8_t *vdnssearch; -+ char *vdomainname; -+ -+ /* tcp states */ -+ struct socket tcb; -+ struct socket *tcp_last_so; -+ tcp_seq tcp_iss; /* tcp initial send seq # */ -+ uint32_t tcp_now; /* for RFC 1323 timestamps */ -+ -+ /* udp states */ -+ struct socket udb; -+ struct socket *udp_last_so; -+ -+ /* icmp states */ -+ struct socket icmp; -+ struct socket *icmp_last_so; -+ -+ /* tftp states */ -+ char *tftp_prefix; -+ struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; -+ char *tftp_server_name; -+ -+ ArpTable arp_table; -+ NdpTable ndp_table; -+ -+ GRand *grand; -+ void *ra_timer; -+ -+ bool enable_emu; -+ -+ const SlirpCb *cb; -+ void *opaque; -+ -+ struct sockaddr_in *outbound_addr; -+ struct sockaddr_in6 *outbound_addr6; -+}; -+ -+void if_start(Slirp *); -+ -+int get_dns_addr(struct in_addr *pdns_addr); -+int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); -+ -+/* ncsi.c */ -+void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); -+ -+#ifndef _WIN32 -+#include -+#endif -+ -+ -+extern bool slirp_do_keepalive; -+ -+#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) -+ -+/* dnssearch.c */ -+int translate_dnssearch(Slirp *s, const char **names); -+ -+/* cksum.c */ -+int cksum(struct mbuf *m, int len); -+int ip6_cksum(struct mbuf *m); -+ -+/* if.c */ -+void if_init(Slirp *); -+void if_output(struct socket *, struct mbuf *); -+ -+/* ip_input.c */ -+void ip_init(Slirp *); -+void ip_cleanup(Slirp *); -+void ip_input(struct mbuf *); -+void ip_slowtimo(Slirp *); -+void ip_stripoptions(register struct mbuf *, struct mbuf *); -+ -+/* ip_output.c */ -+int ip_output(struct socket *, struct mbuf *); -+ -+/* ip6_input.c */ -+void ip6_init(Slirp *); -+void ip6_cleanup(Slirp *); -+void ip6_input(struct mbuf *); -+ -+/* ip6_output */ -+int ip6_output(struct socket *, struct mbuf *, int fast); -+ -+/* tcp_input.c */ -+void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); -+int tcp_mss(register struct tcpcb *, unsigned); -+ -+/* tcp_output.c */ -+int tcp_output(register struct tcpcb *); -+void tcp_setpersist(register struct tcpcb *); -+ -+/* tcp_subr.c */ -+void tcp_init(Slirp *); -+void tcp_cleanup(Slirp *); -+void tcp_template(struct tcpcb *); -+void tcp_respond(struct tcpcb *, register struct tcpiphdr *, -+ register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); -+struct tcpcb *tcp_newtcpcb(struct socket *); -+struct tcpcb *tcp_close(register struct tcpcb *); -+void tcp_sockclosed(struct tcpcb *); -+int tcp_fconnect(struct socket *, unsigned short af); -+void tcp_connect(struct socket *); -+void tcp_attach(struct socket *); -+uint8_t tcp_tos(struct socket *); -+int tcp_emu(struct socket *, struct mbuf *); -+int tcp_ctl(struct socket *); -+struct tcpcb *tcp_drop(struct tcpcb *tp, int err); -+ -+struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, -+ int guest_port); -+ -+void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); -+ -+#endif -diff --git a/slirp/src/socket.c b/slirp/src/socket.c -new file mode 100644 -index 0000000000..1e385df0d8 ---- /dev/null -+++ b/slirp/src/socket.c -@@ -0,0 +1,954 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+#ifdef __sun__ -+#include -+#endif -+ -+static void sofcantrcvmore(struct socket *so); -+static void sofcantsendmore(struct socket *so); -+ -+struct socket *solookup(struct socket **last, struct socket *head, -+ struct sockaddr_storage *lhost, -+ struct sockaddr_storage *fhost) -+{ -+ struct socket *so = *last; -+ -+ /* Optimisation */ -+ if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) && -+ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { -+ return so; -+ } -+ -+ for (so = head->so_next; so != head; so = so->so_next) { -+ if (sockaddr_equal(&(so->lhost.ss), lhost) && -+ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { -+ *last = so; -+ return so; -+ } -+ } -+ -+ return (struct socket *)NULL; -+} -+ -+/* -+ * Create a new socket, initialise the fields -+ * It is the responsibility of the caller to -+ * insque() it into the correct linked-list -+ */ -+struct socket *socreate(Slirp *slirp) -+{ -+ struct socket *so = g_new(struct socket, 1); -+ -+ memset(so, 0, sizeof(struct socket)); -+ so->so_state = SS_NOFDREF; -+ so->s = -1; -+ so->slirp = slirp; -+ so->pollfds_idx = -1; -+ -+ return so; -+} -+ -+/* -+ * Remove references to so from the given message queue. -+ */ -+static void soqfree(struct socket *so, struct quehead *qh) -+{ -+ struct mbuf *ifq; -+ -+ for (ifq = (struct mbuf *)qh->qh_link; (struct quehead *)ifq != qh; -+ ifq = ifq->ifq_next) { -+ if (ifq->ifq_so == so) { -+ struct mbuf *ifm; -+ ifq->ifq_so = NULL; -+ for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { -+ ifm->ifq_so = NULL; -+ } -+ } -+ } -+} -+ -+/* -+ * remque and free a socket, clobber cache -+ */ -+void sofree(struct socket *so) -+{ -+ Slirp *slirp = so->slirp; -+ -+ soqfree(so, &slirp->if_fastq); -+ soqfree(so, &slirp->if_batchq); -+ -+ if (so == slirp->tcp_last_so) { -+ slirp->tcp_last_so = &slirp->tcb; -+ } else if (so == slirp->udp_last_so) { -+ slirp->udp_last_so = &slirp->udb; -+ } else if (so == slirp->icmp_last_so) { -+ slirp->icmp_last_so = &slirp->icmp; -+ } -+ m_free(so->so_m); -+ -+ if (so->so_next && so->so_prev) -+ remque(so); /* crashes if so is not in a queue */ -+ -+ if (so->so_tcpcb) { -+ g_free(so->so_tcpcb); -+ } -+ g_free(so); -+} -+ -+size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) -+{ -+ int n, lss, total; -+ struct sbuf *sb = &so->so_snd; -+ int len = sb->sb_datalen - sb->sb_cc; -+ int mss = so->so_tcpcb->t_maxseg; -+ -+ DEBUG_CALL("sopreprbuf"); -+ DEBUG_ARG("so = %p", so); -+ -+ if (len <= 0) -+ return 0; -+ -+ iov[0].iov_base = sb->sb_wptr; -+ iov[1].iov_base = NULL; -+ iov[1].iov_len = 0; -+ if (sb->sb_wptr < sb->sb_rptr) { -+ iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; -+ /* Should never succeed, but... */ -+ if (iov[0].iov_len > len) -+ iov[0].iov_len = len; -+ if (iov[0].iov_len > mss) -+ iov[0].iov_len -= iov[0].iov_len % mss; -+ n = 1; -+ } else { -+ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; -+ /* Should never succeed, but... */ -+ if (iov[0].iov_len > len) -+ iov[0].iov_len = len; -+ len -= iov[0].iov_len; -+ if (len) { -+ iov[1].iov_base = sb->sb_data; -+ iov[1].iov_len = sb->sb_rptr - sb->sb_data; -+ if (iov[1].iov_len > len) -+ iov[1].iov_len = len; -+ total = iov[0].iov_len + iov[1].iov_len; -+ if (total > mss) { -+ lss = total % mss; -+ if (iov[1].iov_len > lss) { -+ iov[1].iov_len -= lss; -+ n = 2; -+ } else { -+ lss -= iov[1].iov_len; -+ iov[0].iov_len -= lss; -+ n = 1; -+ } -+ } else -+ n = 2; -+ } else { -+ if (iov[0].iov_len > mss) -+ iov[0].iov_len -= iov[0].iov_len % mss; -+ n = 1; -+ } -+ } -+ if (np) -+ *np = n; -+ -+ return iov[0].iov_len + (n - 1) * iov[1].iov_len; -+} -+ -+/* -+ * Read from so's socket into sb_snd, updating all relevant sbuf fields -+ * NOTE: This will only be called if it is select()ed for reading, so -+ * a read() of 0 (or less) means it's disconnected -+ */ -+int soread(struct socket *so) -+{ -+ int n, nn; -+ size_t buf_len; -+ struct sbuf *sb = &so->so_snd; -+ struct iovec iov[2]; -+ -+ DEBUG_CALL("soread"); -+ DEBUG_ARG("so = %p", so); -+ -+ /* -+ * No need to check if there's enough room to read. -+ * soread wouldn't have been called if there weren't -+ */ -+ buf_len = sopreprbuf(so, iov, &n); -+ assert(buf_len != 0); -+ -+ nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0); -+ if (nn <= 0) { -+ if (nn < 0 && (errno == EINTR || errno == EAGAIN)) -+ return 0; -+ else { -+ int err; -+ socklen_t elen = sizeof err; -+ struct sockaddr_storage addr; -+ struct sockaddr *paddr = (struct sockaddr *)&addr; -+ socklen_t alen = sizeof addr; -+ -+ err = errno; -+ if (nn == 0) { -+ int shutdown_wr = so->so_state & SS_FCANTSENDMORE; -+ -+ if (!shutdown_wr && getpeername(so->s, paddr, &alen) < 0) { -+ err = errno; -+ } else { -+ getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &elen); -+ } -+ } -+ -+ DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", nn, -+ errno, strerror(errno)); -+ sofcantrcvmore(so); -+ -+ if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || -+ err == EPIPE) { -+ tcp_drop(sototcpcb(so), err); -+ } else { -+ tcp_sockclosed(sototcpcb(so)); -+ } -+ return -1; -+ } -+ } -+ -+ /* -+ * If there was no error, try and read the second time round -+ * We read again if n = 2 (ie, there's another part of the buffer) -+ * and we read as much as we could in the first read -+ * We don't test for <= 0 this time, because there legitimately -+ * might not be any more data (since the socket is non-blocking), -+ * a close will be detected on next iteration. -+ * A return of -1 won't (shouldn't) happen, since it didn't happen above -+ */ -+ if (n == 2 && nn == iov[0].iov_len) { -+ int ret; -+ ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); -+ if (ret > 0) -+ nn += ret; -+ } -+ -+ DEBUG_MISC(" ... read nn = %d bytes", nn); -+ -+ /* Update fields */ -+ sb->sb_cc += nn; -+ sb->sb_wptr += nn; -+ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) -+ sb->sb_wptr -= sb->sb_datalen; -+ return nn; -+} -+ -+int soreadbuf(struct socket *so, const char *buf, int size) -+{ -+ int n, nn, copy = size; -+ struct sbuf *sb = &so->so_snd; -+ struct iovec iov[2]; -+ -+ DEBUG_CALL("soreadbuf"); -+ DEBUG_ARG("so = %p", so); -+ -+ /* -+ * No need to check if there's enough room to read. -+ * soread wouldn't have been called if there weren't -+ */ -+ assert(size > 0); -+ if (sopreprbuf(so, iov, &n) < size) -+ goto err; -+ -+ nn = MIN(iov[0].iov_len, copy); -+ memcpy(iov[0].iov_base, buf, nn); -+ -+ copy -= nn; -+ buf += nn; -+ -+ if (copy == 0) -+ goto done; -+ -+ memcpy(iov[1].iov_base, buf, copy); -+ -+done: -+ /* Update fields */ -+ sb->sb_cc += size; -+ sb->sb_wptr += size; -+ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) -+ sb->sb_wptr -= sb->sb_datalen; -+ return size; -+err: -+ -+ sofcantrcvmore(so); -+ tcp_sockclosed(sototcpcb(so)); -+ g_critical("soreadbuf buffer too small"); -+ return -1; -+} -+ -+/* -+ * Get urgent data -+ * -+ * When the socket is created, we set it SO_OOBINLINE, -+ * so when OOB data arrives, we soread() it and everything -+ * in the send buffer is sent as urgent data -+ */ -+int sorecvoob(struct socket *so) -+{ -+ struct tcpcb *tp = sototcpcb(so); -+ int ret; -+ -+ DEBUG_CALL("sorecvoob"); -+ DEBUG_ARG("so = %p", so); -+ -+ /* -+ * We take a guess at how much urgent data has arrived. -+ * In most situations, when urgent data arrives, the next -+ * read() should get all the urgent data. This guess will -+ * be wrong however if more data arrives just after the -+ * urgent data, or the read() doesn't return all the -+ * urgent data. -+ */ -+ ret = soread(so); -+ if (ret > 0) { -+ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; -+ tp->t_force = 1; -+ tcp_output(tp); -+ tp->t_force = 0; -+ } -+ -+ return ret; -+} -+ -+/* -+ * Send urgent data -+ * There's a lot duplicated code here, but... -+ */ -+int sosendoob(struct socket *so) -+{ -+ struct sbuf *sb = &so->so_rcv; -+ char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ -+ -+ int n; -+ -+ DEBUG_CALL("sosendoob"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); -+ -+ if (so->so_urgc > 2048) -+ so->so_urgc = 2048; /* XXXX */ -+ -+ if (sb->sb_rptr < sb->sb_wptr) { -+ /* We can send it directly */ -+ n = slirp_send(so, sb->sb_rptr, so->so_urgc, -+ (MSG_OOB)); /* |MSG_DONTWAIT)); */ -+ } else { -+ /* -+ * Since there's no sendv or sendtov like writev, -+ * we must copy all data to a linear buffer then -+ * send it all -+ */ -+ uint32_t urgc = so->so_urgc; -+ int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; -+ if (len > urgc) { -+ len = urgc; -+ } -+ memcpy(buff, sb->sb_rptr, len); -+ urgc -= len; -+ if (urgc) { -+ n = sb->sb_wptr - sb->sb_data; -+ if (n > urgc) { -+ n = urgc; -+ } -+ memcpy((buff + len), sb->sb_data, n); -+ len += n; -+ } -+ n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ -+#ifdef DEBUG -+ if (n != len) { -+ DEBUG_ERROR("Didn't send all data urgently XXXXX"); -+ } -+#endif -+ } -+ -+ if (n < 0) { -+ return n; -+ } -+ so->so_urgc -= n; -+ DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, -+ so->so_urgc); -+ -+ sb->sb_cc -= n; -+ sb->sb_rptr += n; -+ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) -+ sb->sb_rptr -= sb->sb_datalen; -+ -+ return n; -+} -+ -+/* -+ * Write data from so_rcv to so's socket, -+ * updating all sbuf field as necessary -+ */ -+int sowrite(struct socket *so) -+{ -+ int n, nn; -+ struct sbuf *sb = &so->so_rcv; -+ int len = sb->sb_cc; -+ struct iovec iov[2]; -+ -+ DEBUG_CALL("sowrite"); -+ DEBUG_ARG("so = %p", so); -+ -+ if (so->so_urgc) { -+ uint32_t expected = so->so_urgc; -+ if (sosendoob(so) < expected) { -+ /* Treat a short write as a fatal error too, -+ * rather than continuing on and sending the urgent -+ * data as if it were non-urgent and leaving the -+ * so_urgc count wrong. -+ */ -+ goto err_disconnected; -+ } -+ if (sb->sb_cc == 0) -+ return 0; -+ } -+ -+ /* -+ * No need to check if there's something to write, -+ * sowrite wouldn't have been called otherwise -+ */ -+ -+ iov[0].iov_base = sb->sb_rptr; -+ iov[1].iov_base = NULL; -+ iov[1].iov_len = 0; -+ if (sb->sb_rptr < sb->sb_wptr) { -+ iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; -+ /* Should never succeed, but... */ -+ if (iov[0].iov_len > len) -+ iov[0].iov_len = len; -+ n = 1; -+ } else { -+ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; -+ if (iov[0].iov_len > len) -+ iov[0].iov_len = len; -+ len -= iov[0].iov_len; -+ if (len) { -+ iov[1].iov_base = sb->sb_data; -+ iov[1].iov_len = sb->sb_wptr - sb->sb_data; -+ if (iov[1].iov_len > len) -+ iov[1].iov_len = len; -+ n = 2; -+ } else -+ n = 1; -+ } -+ /* Check if there's urgent data to send, and if so, send it */ -+ -+ nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len, 0); -+ /* This should never happen, but people tell me it does *shrug* */ -+ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) -+ return 0; -+ -+ if (nn <= 0) { -+ goto err_disconnected; -+ } -+ -+ if (n == 2 && nn == iov[0].iov_len) { -+ int ret; -+ ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len, 0); -+ if (ret > 0) -+ nn += ret; -+ } -+ DEBUG_MISC(" ... wrote nn = %d bytes", nn); -+ -+ /* Update sbuf */ -+ sb->sb_cc -= nn; -+ sb->sb_rptr += nn; -+ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) -+ sb->sb_rptr -= sb->sb_datalen; -+ -+ /* -+ * If in DRAIN mode, and there's no more data, set -+ * it CANTSENDMORE -+ */ -+ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) -+ sofcantsendmore(so); -+ -+ return nn; -+ -+err_disconnected: -+ DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", -+ so->so_state, errno); -+ sofcantsendmore(so); -+ tcp_sockclosed(sototcpcb(so)); -+ return -1; -+} -+ -+/* -+ * recvfrom() a UDP socket -+ */ -+void sorecvfrom(struct socket *so) -+{ -+ struct sockaddr_storage addr; -+ struct sockaddr_storage saddr, daddr; -+ socklen_t addrlen = sizeof(struct sockaddr_storage); -+ -+ DEBUG_CALL("sorecvfrom"); -+ DEBUG_ARG("so = %p", so); -+ -+ if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ -+ char buff[256]; -+ int len; -+ -+ len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); -+ /* XXX Check if reply is "correct"? */ -+ -+ if (len == -1 || len == 0) { -+ uint8_t code = ICMP_UNREACH_PORT; -+ -+ if (errno == EHOSTUNREACH) -+ code = ICMP_UNREACH_HOST; -+ else if (errno == ENETUNREACH) -+ code = ICMP_UNREACH_NET; -+ -+ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); -+ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); -+ } else { -+ icmp_reflect(so->so_m); -+ so->so_m = NULL; /* Don't m_free() it again! */ -+ } -+ /* No need for this socket anymore, udp_detach it */ -+ udp_detach(so); -+ } else { /* A "normal" UDP packet */ -+ struct mbuf *m; -+ int len; -+#ifdef _WIN32 -+ unsigned long n; -+#else -+ int n; -+#endif -+ -+ if (ioctlsocket(so->s, FIONREAD, &n) != 0) { -+ DEBUG_MISC(" ioctlsocket errno = %d-%s\n", errno, strerror(errno)); -+ return; -+ } -+ if (n == 0) { -+ return; -+ } -+ -+ m = m_get(so->slirp); -+ if (!m) { -+ return; -+ } -+ switch (so->so_ffamily) { -+ case AF_INET: -+ m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); -+ break; -+ case AF_INET6: -+ m->m_data += -+ IF_MAXLINKHDR + sizeof(struct ip6) + sizeof(struct udphdr); -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ -+ /* -+ * XXX Shouldn't FIONREAD packets destined for port 53, -+ * but I don't know the max packet size for DNS lookups -+ */ -+ len = M_FREEROOM(m); -+ /* if (so->so_fport != htons(53)) { */ -+ -+ if (n > len) { -+ n = (m->m_data - m->m_dat) + m->m_len + n + 1; -+ m_inc(m, n); -+ len = M_FREEROOM(m); -+ } -+ /* } */ -+ -+ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, -+ &addrlen); -+ DEBUG_MISC(" did recvfrom %d, errno = %d-%s", m->m_len, errno, -+ strerror(errno)); -+ if (m->m_len < 0) { -+ /* Report error as ICMP */ -+ switch (so->so_lfamily) { -+ uint8_t code; -+ case AF_INET: -+ code = ICMP_UNREACH_PORT; -+ -+ if (errno == EHOSTUNREACH) { -+ code = ICMP_UNREACH_HOST; -+ } else if (errno == ENETUNREACH) { -+ code = ICMP_UNREACH_NET; -+ } -+ -+ DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); -+ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, -+ strerror(errno)); -+ break; -+ case AF_INET6: -+ code = ICMP6_UNREACH_PORT; -+ -+ if (errno == EHOSTUNREACH) { -+ code = ICMP6_UNREACH_ADDRESS; -+ } else if (errno == ENETUNREACH) { -+ code = ICMP6_UNREACH_NO_ROUTE; -+ } -+ -+ DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); -+ icmp6_send_error(so->so_m, ICMP6_UNREACH, code); -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ m_free(m); -+ } else { -+ /* -+ * Hack: domain name lookup will be used the most for UDP, -+ * and since they'll only be used once there's no need -+ * for the 4 minute (or whatever) timeout... So we time them -+ * out much quicker (10 seconds for now...) -+ */ -+ if (so->so_expire) { -+ if (so->so_fport == htons(53)) -+ so->so_expire = curtime + SO_EXPIREFAST; -+ else -+ so->so_expire = curtime + SO_EXPIRE; -+ } -+ -+ /* -+ * If this packet was destined for CTL_ADDR, -+ * make it look like that's where it came from -+ */ -+ saddr = addr; -+ sotranslate_in(so, &saddr); -+ daddr = so->lhost.ss; -+ -+ switch (so->so_ffamily) { -+ case AF_INET: -+ udp_output(so, m, (struct sockaddr_in *)&saddr, -+ (struct sockaddr_in *)&daddr, so->so_iptos); -+ break; -+ case AF_INET6: -+ udp6_output(so, m, (struct sockaddr_in6 *)&saddr, -+ (struct sockaddr_in6 *)&daddr); -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ } /* rx error */ -+ } /* if ping packet */ -+} -+ -+/* -+ * sendto() a socket -+ */ -+int sosendto(struct socket *so, struct mbuf *m) -+{ -+ int ret; -+ struct sockaddr_storage addr; -+ -+ DEBUG_CALL("sosendto"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ -+ addr = so->fhost.ss; -+ DEBUG_CALL(" sendto()ing)"); -+ if (sotranslate_out(so, &addr) < 0) { -+ return -1; -+ } -+ -+ /* Don't care what port we get */ -+ ret = sendto(so->s, m->m_data, m->m_len, 0, (struct sockaddr *)&addr, -+ sockaddr_size(&addr)); -+ if (ret < 0) -+ return -1; -+ -+ /* -+ * Kill the socket if there's no reply in 4 minutes, -+ * but only if it's an expirable socket -+ */ -+ if (so->so_expire) -+ so->so_expire = curtime + SO_EXPIRE; -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ -+ return 0; -+} -+ -+/* -+ * Listen for incoming TCP connections -+ */ -+struct socket *tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, -+ uint32_t laddr, unsigned lport, int flags) -+{ -+ /* TODO: IPv6 */ -+ struct sockaddr_in addr; -+ struct socket *so; -+ int s, opt = 1; -+ socklen_t addrlen = sizeof(addr); -+ memset(&addr, 0, addrlen); -+ -+ DEBUG_CALL("tcp_listen"); -+ DEBUG_ARG("haddr = %s", inet_ntoa((struct in_addr){ .s_addr = haddr })); -+ DEBUG_ARG("hport = %d", ntohs(hport)); -+ DEBUG_ARG("laddr = %s", inet_ntoa((struct in_addr){ .s_addr = laddr })); -+ DEBUG_ARG("lport = %d", ntohs(lport)); -+ DEBUG_ARG("flags = %x", flags); -+ -+ so = socreate(slirp); -+ -+ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ -+ if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) { -+ g_free(so); -+ return NULL; -+ } -+ insque(so, &slirp->tcb); -+ -+ /* -+ * SS_FACCEPTONCE sockets must time out. -+ */ -+ if (flags & SS_FACCEPTONCE) -+ so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT * 2; -+ -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= (SS_FACCEPTCONN | flags); -+ so->so_lfamily = AF_INET; -+ so->so_lport = lport; /* Kept in network format */ -+ so->so_laddr.s_addr = laddr; /* Ditto */ -+ -+ addr.sin_family = AF_INET; -+ addr.sin_addr.s_addr = haddr; -+ addr.sin_port = hport; -+ -+ if (((s = slirp_socket(AF_INET, SOCK_STREAM, 0)) < 0) || -+ (slirp_socket_set_fast_reuse(s) < 0) || -+ (bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) || -+ (listen(s, 1) < 0)) { -+ int tmperrno = errno; /* Don't clobber the real reason we failed */ -+ -+ if (s >= 0) { -+ closesocket(s); -+ } -+ sofree(so); -+ /* Restore the real errno */ -+#ifdef _WIN32 -+ WSASetLastError(tmperrno); -+#else -+ errno = tmperrno; -+#endif -+ return NULL; -+ } -+ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); -+ opt = 1; -+ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(int)); -+ -+ getsockname(s, (struct sockaddr *)&addr, &addrlen); -+ so->so_ffamily = AF_INET; -+ so->so_fport = addr.sin_port; -+ if (addr.sin_addr.s_addr == 0 || -+ addr.sin_addr.s_addr == loopback_addr.s_addr) -+ so->so_faddr = slirp->vhost_addr; -+ else -+ so->so_faddr = addr.sin_addr; -+ -+ so->s = s; -+ return so; -+} -+ -+/* -+ * Various session state calls -+ * XXX Should be #define's -+ * The socket state stuff needs work, these often get call 2 or 3 -+ * times each when only 1 was needed -+ */ -+void soisfconnecting(struct socket *so) -+{ -+ so->so_state &= ~(SS_NOFDREF | SS_ISFCONNECTED | SS_FCANTRCVMORE | -+ SS_FCANTSENDMORE | SS_FWDRAIN); -+ so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ -+} -+ -+void soisfconnected(struct socket *so) -+{ -+ so->so_state &= ~(SS_ISFCONNECTING | SS_FWDRAIN | SS_NOFDREF); -+ so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ -+} -+ -+static void sofcantrcvmore(struct socket *so) -+{ -+ if ((so->so_state & SS_NOFDREF) == 0) { -+ shutdown(so->s, 0); -+ } -+ so->so_state &= ~(SS_ISFCONNECTING); -+ if (so->so_state & SS_FCANTSENDMORE) { -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_NOFDREF; /* Don't select it */ -+ } else { -+ so->so_state |= SS_FCANTRCVMORE; -+ } -+} -+ -+static void sofcantsendmore(struct socket *so) -+{ -+ if ((so->so_state & SS_NOFDREF) == 0) { -+ shutdown(so->s, 1); /* send FIN to fhost */ -+ } -+ so->so_state &= ~(SS_ISFCONNECTING); -+ if (so->so_state & SS_FCANTRCVMORE) { -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_NOFDREF; /* as above */ -+ } else { -+ so->so_state |= SS_FCANTSENDMORE; -+ } -+} -+ -+/* -+ * Set write drain mode -+ * Set CANTSENDMORE once all data has been write()n -+ */ -+void sofwdrain(struct socket *so) -+{ -+ if (so->so_rcv.sb_cc) -+ so->so_state |= SS_FWDRAIN; -+ else -+ sofcantsendmore(so); -+} -+ -+static bool sotranslate_out4(Slirp *s, struct socket *so, struct sockaddr_in *sin) -+{ -+ if (so->so_faddr.s_addr == s->vnameserver_addr.s_addr) { -+ return get_dns_addr(&sin->sin_addr) >= 0; -+ } -+ -+ if (so->so_faddr.s_addr == s->vhost_addr.s_addr || -+ so->so_faddr.s_addr == 0xffffffff) { -+ if (s->disable_host_loopback) { -+ return false; -+ } -+ -+ sin->sin_addr = loopback_addr; -+ } -+ -+ return true; -+} -+ -+static bool sotranslate_out6(Slirp *s, struct socket *so, struct sockaddr_in6 *sin) -+{ -+ if (in6_equal(&so->so_faddr6, &s->vnameserver_addr6)) { -+ uint32_t scope_id; -+ if (get_dns6_addr(&sin->sin6_addr, &scope_id) >= 0) { -+ sin->sin6_scope_id = scope_id; -+ return true; -+ } -+ return false; -+ } -+ -+ if (in6_equal_net(&so->so_faddr6, &s->vprefix_addr6, s->vprefix_len) || -+ in6_equal(&so->so_faddr6, &(struct in6_addr)ALLNODES_MULTICAST)) { -+ if (s->disable_host_loopback) { -+ return false; -+ } -+ -+ sin->sin6_addr = in6addr_loopback; -+ } -+ -+ return true; -+} -+ -+ -+/* -+ * Translate addr in host addr when it is a virtual address -+ */ -+int sotranslate_out(struct socket *so, struct sockaddr_storage *addr) -+{ -+ bool ok = true; -+ -+ switch (addr->ss_family) { -+ case AF_INET: -+ ok = sotranslate_out4(so->slirp, so, (struct sockaddr_in *)addr); -+ break; -+ case AF_INET6: -+ ok = sotranslate_out6(so->slirp, so, (struct sockaddr_in6 *)addr); -+ break; -+ } -+ -+ if (!ok) { -+ errno = EPERM; -+ return -1; -+ } -+ -+ return 0; -+} -+ -+void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) -+{ -+ Slirp *slirp = so->slirp; -+ struct sockaddr_in *sin = (struct sockaddr_in *)addr; -+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; -+ -+ switch (addr->ss_family) { -+ case AF_INET: -+ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == -+ slirp->vnetwork_addr.s_addr) { -+ uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; -+ -+ if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { -+ sin->sin_addr = slirp->vhost_addr; -+ } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || -+ so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { -+ sin->sin_addr = so->so_faddr; -+ } -+ } -+ break; -+ -+ case AF_INET6: -+ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, -+ slirp->vprefix_len)) { -+ if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) || -+ !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { -+ sin6->sin6_addr = so->so_faddr6; -+ } -+ } -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+/* -+ * Translate connections from localhost to the real hostname -+ */ -+void sotranslate_accept(struct socket *so) -+{ -+ Slirp *slirp = so->slirp; -+ -+ switch (so->so_ffamily) { -+ case AF_INET: -+ if (so->so_faddr.s_addr == INADDR_ANY || -+ (so->so_faddr.s_addr & loopback_mask) == -+ (loopback_addr.s_addr & loopback_mask)) { -+ so->so_faddr = slirp->vhost_addr; -+ } -+ break; -+ -+ case AF_INET6: -+ if (in6_equal(&so->so_faddr6, &in6addr_any) || -+ in6_equal(&so->so_faddr6, &in6addr_loopback)) { -+ so->so_faddr6 = slirp->vhost_addr6; -+ } -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+void sodrop(struct socket *s, int num) -+{ -+ if (sbdrop(&s->so_snd, num)) { -+ s->slirp->cb->notify(s->slirp->opaque); -+ } -+} -diff --git a/slirp/src/socket.h b/slirp/src/socket.h -new file mode 100644 -index 0000000000..a6a1e5e214 ---- /dev/null -+++ b/slirp/src/socket.h -@@ -0,0 +1,164 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#ifndef SLIRP_SOCKET_H -+#define SLIRP_SOCKET_H -+ -+#include "misc.h" -+ -+#define SO_EXPIRE 240000 -+#define SO_EXPIREFAST 10000 -+ -+/* -+ * Our socket structure -+ */ -+ -+union slirp_sockaddr { -+ struct sockaddr_storage ss; -+ struct sockaddr_in sin; -+ struct sockaddr_in6 sin6; -+}; -+ -+struct socket { -+ struct socket *so_next, *so_prev; /* For a linked list of sockets */ -+ -+ int s; /* The actual socket */ -+ struct gfwd_list *guestfwd; -+ -+ int pollfds_idx; /* GPollFD GArray index */ -+ -+ Slirp *slirp; /* managing slirp instance */ -+ -+ /* XXX union these with not-yet-used sbuf params */ -+ struct mbuf *so_m; /* Pointer to the original SYN packet, -+ * for non-blocking connect()'s, and -+ * PING reply's */ -+ struct tcpiphdr *so_ti; /* Pointer to the original ti within -+ * so_mconn, for non-blocking connections */ -+ uint32_t so_urgc; -+ union slirp_sockaddr fhost; /* Foreign host */ -+#define so_faddr fhost.sin.sin_addr -+#define so_fport fhost.sin.sin_port -+#define so_faddr6 fhost.sin6.sin6_addr -+#define so_fport6 fhost.sin6.sin6_port -+#define so_ffamily fhost.ss.ss_family -+ -+ union slirp_sockaddr lhost; /* Local host */ -+#define so_laddr lhost.sin.sin_addr -+#define so_lport lhost.sin.sin_port -+#define so_laddr6 lhost.sin6.sin6_addr -+#define so_lport6 lhost.sin6.sin6_port -+#define so_lfamily lhost.ss.ss_family -+ -+ uint8_t so_iptos; /* Type of service */ -+ uint8_t so_emu; /* Is the socket emulated? */ -+ -+ uint8_t so_type; /* Type of socket, UDP or TCP */ -+ int32_t so_state; /* internal state flags SS_*, below */ -+ -+ struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ -+ unsigned so_expire; /* When the socket will expire */ -+ -+ int so_queued; /* Number of packets queued from this socket */ -+ int so_nqueued; /* Number of packets queued in a row -+ * Used to determine when to "downgrade" a session -+ * from fastq to batchq */ -+ -+ struct sbuf so_rcv; /* Receive buffer */ -+ struct sbuf so_snd; /* Send buffer */ -+}; -+ -+ -+/* -+ * Socket state bits. (peer means the host on the Internet, -+ * local host means the host on the other end of the modem) -+ */ -+#define SS_NOFDREF 0x001 /* No fd reference */ -+ -+#define SS_ISFCONNECTING \ -+ 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ -+#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ -+#define SS_FCANTRCVMORE \ -+ 0x008 /* Socket can't receive more from peer (for half-closes) */ -+#define SS_FCANTSENDMORE \ -+ 0x010 /* Socket can't send more to peer (for half-closes) */ -+#define SS_FWDRAIN \ -+ 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ -+ -+#define SS_CTL 0x080 -+#define SS_FACCEPTCONN \ -+ 0x100 /* Socket is accepting connections from a host on the internet */ -+#define SS_FACCEPTONCE \ -+ 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ -+ -+#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ -+#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ -+#define SS_INCOMING \ -+ 0x2000 /* Connection was initiated by a host on the internet */ -+ -+static inline int sockaddr_equal(struct sockaddr_storage *a, -+ struct sockaddr_storage *b) -+{ -+ if (a->ss_family != b->ss_family) { -+ return 0; -+ } -+ -+ switch (a->ss_family) { -+ case AF_INET: { -+ struct sockaddr_in *a4 = (struct sockaddr_in *)a; -+ struct sockaddr_in *b4 = (struct sockaddr_in *)b; -+ return a4->sin_addr.s_addr == b4->sin_addr.s_addr && -+ a4->sin_port == b4->sin_port; -+ } -+ case AF_INET6: { -+ struct sockaddr_in6 *a6 = (struct sockaddr_in6 *)a; -+ struct sockaddr_in6 *b6 = (struct sockaddr_in6 *)b; -+ return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) && -+ a6->sin6_port == b6->sin6_port); -+ } -+ default: -+ g_assert_not_reached(); -+ } -+ -+ return 0; -+} -+ -+static inline socklen_t sockaddr_size(struct sockaddr_storage *a) -+{ -+ switch (a->ss_family) { -+ case AF_INET: -+ return sizeof(struct sockaddr_in); -+ case AF_INET6: -+ return sizeof(struct sockaddr_in6); -+ default: -+ g_assert_not_reached(); -+ } -+} -+ -+struct socket *solookup(struct socket **, struct socket *, -+ struct sockaddr_storage *, struct sockaddr_storage *); -+struct socket *socreate(Slirp *); -+void sofree(struct socket *); -+int soread(struct socket *); -+int sorecvoob(struct socket *); -+int sosendoob(struct socket *); -+int sowrite(struct socket *); -+void sorecvfrom(struct socket *); -+int sosendto(struct socket *, struct mbuf *); -+struct socket *tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); -+void soisfconnecting(register struct socket *); -+void soisfconnected(register struct socket *); -+void sofwdrain(struct socket *); -+struct iovec; /* For win32 */ -+size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); -+int soreadbuf(struct socket *so, const char *buf, int size); -+ -+int sotranslate_out(struct socket *, struct sockaddr_storage *); -+void sotranslate_in(struct socket *, struct sockaddr_storage *); -+void sotranslate_accept(struct socket *); -+void sodrop(struct socket *, int num); -+ -+ -+#endif /* SLIRP_SOCKET_H */ -diff --git a/slirp/src/state.c b/slirp/src/state.c -new file mode 100644 -index 0000000000..22af77b256 ---- /dev/null -+++ b/slirp/src/state.c -@@ -0,0 +1,379 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * libslirp -+ * -+ * Copyright (c) 2004-2008 Fabrice Bellard -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "slirp.h" -+#include "vmstate.h" -+#include "stream.h" -+ -+static int slirp_tcp_post_load(void *opaque, int version) -+{ -+ tcp_template((struct tcpcb *)opaque); -+ -+ return 0; -+} -+ -+static const VMStateDescription vmstate_slirp_tcp = { -+ .name = "slirp-tcp", -+ .version_id = 0, -+ .post_load = slirp_tcp_post_load, -+ .fields = (VMStateField[]){ VMSTATE_INT16(t_state, struct tcpcb), -+ VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, -+ TCPT_NTIMERS), -+ VMSTATE_INT16(t_rxtshift, struct tcpcb), -+ VMSTATE_INT16(t_rxtcur, struct tcpcb), -+ VMSTATE_INT16(t_dupacks, struct tcpcb), -+ VMSTATE_UINT16(t_maxseg, struct tcpcb), -+ VMSTATE_UINT8(t_force, struct tcpcb), -+ VMSTATE_UINT16(t_flags, struct tcpcb), -+ VMSTATE_UINT32(snd_una, struct tcpcb), -+ VMSTATE_UINT32(snd_nxt, struct tcpcb), -+ VMSTATE_UINT32(snd_up, struct tcpcb), -+ VMSTATE_UINT32(snd_wl1, struct tcpcb), -+ VMSTATE_UINT32(snd_wl2, struct tcpcb), -+ VMSTATE_UINT32(iss, struct tcpcb), -+ VMSTATE_UINT32(snd_wnd, struct tcpcb), -+ VMSTATE_UINT32(rcv_wnd, struct tcpcb), -+ VMSTATE_UINT32(rcv_nxt, struct tcpcb), -+ VMSTATE_UINT32(rcv_up, struct tcpcb), -+ VMSTATE_UINT32(irs, struct tcpcb), -+ VMSTATE_UINT32(rcv_adv, struct tcpcb), -+ VMSTATE_UINT32(snd_max, struct tcpcb), -+ VMSTATE_UINT32(snd_cwnd, struct tcpcb), -+ VMSTATE_UINT32(snd_ssthresh, struct tcpcb), -+ VMSTATE_INT16(t_idle, struct tcpcb), -+ VMSTATE_INT16(t_rtt, struct tcpcb), -+ VMSTATE_UINT32(t_rtseq, struct tcpcb), -+ VMSTATE_INT16(t_srtt, struct tcpcb), -+ VMSTATE_INT16(t_rttvar, struct tcpcb), -+ VMSTATE_UINT16(t_rttmin, struct tcpcb), -+ VMSTATE_UINT32(max_sndwnd, struct tcpcb), -+ VMSTATE_UINT8(t_oobflags, struct tcpcb), -+ VMSTATE_UINT8(t_iobc, struct tcpcb), -+ VMSTATE_INT16(t_softerror, struct tcpcb), -+ VMSTATE_UINT8(snd_scale, struct tcpcb), -+ VMSTATE_UINT8(rcv_scale, struct tcpcb), -+ VMSTATE_UINT8(request_r_scale, struct tcpcb), -+ VMSTATE_UINT8(requested_s_scale, struct tcpcb), -+ VMSTATE_UINT32(ts_recent, struct tcpcb), -+ VMSTATE_UINT32(ts_recent_age, struct tcpcb), -+ VMSTATE_UINT32(last_ack_sent, struct tcpcb), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+/* The sbuf has a pair of pointers that are migrated as offsets; -+ * we calculate the offsets and restore the pointers using -+ * pre_save/post_load on a tmp structure. -+ */ -+struct sbuf_tmp { -+ struct sbuf *parent; -+ uint32_t roff, woff; -+}; -+ -+static int sbuf_tmp_pre_save(void *opaque) -+{ -+ struct sbuf_tmp *tmp = opaque; -+ tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; -+ tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; -+ -+ return 0; -+} -+ -+static int sbuf_tmp_post_load(void *opaque, int version) -+{ -+ struct sbuf_tmp *tmp = opaque; -+ uint32_t requested_len = tmp->parent->sb_datalen; -+ -+ /* Allocate the buffer space used by the field after the tmp */ -+ sbreserve(tmp->parent, tmp->parent->sb_datalen); -+ -+ if (tmp->woff >= requested_len || tmp->roff >= requested_len) { -+ g_critical("invalid sbuf offsets r/w=%u/%u len=%u", tmp->roff, -+ tmp->woff, requested_len); -+ return -EINVAL; -+ } -+ -+ tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; -+ tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; -+ -+ return 0; -+} -+ -+ -+static const VMStateDescription vmstate_slirp_sbuf_tmp = { -+ .name = "slirp-sbuf-tmp", -+ .post_load = sbuf_tmp_post_load, -+ .pre_save = sbuf_tmp_pre_save, -+ .version_id = 0, -+ .fields = (VMStateField[]){ VMSTATE_UINT32(woff, struct sbuf_tmp), -+ VMSTATE_UINT32(roff, struct sbuf_tmp), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp_sbuf = { -+ .name = "slirp-sbuf", -+ .version_id = 0, -+ .fields = (VMStateField[]){ VMSTATE_UINT32(sb_cc, struct sbuf), -+ VMSTATE_UINT32(sb_datalen, struct sbuf), -+ VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, -+ vmstate_slirp_sbuf_tmp), -+ VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, -+ NULL, sb_datalen), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static bool slirp_older_than_v4(void *opaque, int version_id) -+{ -+ return version_id < 4; -+} -+ -+static bool slirp_family_inet(void *opaque, int version_id) -+{ -+ union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; -+ return ssa->ss.ss_family == AF_INET; -+} -+ -+static int slirp_socket_pre_load(void *opaque) -+{ -+ struct socket *so = opaque; -+ -+ tcp_attach(so); -+ /* Older versions don't load these fields */ -+ so->so_ffamily = AF_INET; -+ so->so_lfamily = AF_INET; -+ return 0; -+} -+ -+#ifndef _WIN32 -+#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) -+#else -+/* Win uses u_long rather than uint32_t - but it's still 32bits long */ -+#define VMSTATE_SIN4_ADDR(f, s, t) \ -+ VMSTATE_SINGLE_TEST(f, s, t, 0, slirp_vmstate_info_uint32, u_long) -+#endif -+ -+/* The OS provided ss_family field isn't that portable; it's size -+ * and type varies (16/8 bit, signed, unsigned) -+ * and the values it contains aren't fully portable. -+ */ -+typedef struct SS_FamilyTmpStruct { -+ union slirp_sockaddr *parent; -+ uint16_t portable_family; -+} SS_FamilyTmpStruct; -+ -+#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ -+#define SS_FAMILY_MIG_IPV6 10 /* Linux */ -+#define SS_FAMILY_MIG_OTHER 0xffff -+ -+static int ss_family_pre_save(void *opaque) -+{ -+ SS_FamilyTmpStruct *tss = opaque; -+ -+ tss->portable_family = SS_FAMILY_MIG_OTHER; -+ -+ if (tss->parent->ss.ss_family == AF_INET) { -+ tss->portable_family = SS_FAMILY_MIG_IPV4; -+ } else if (tss->parent->ss.ss_family == AF_INET6) { -+ tss->portable_family = SS_FAMILY_MIG_IPV6; -+ } -+ -+ return 0; -+} -+ -+static int ss_family_post_load(void *opaque, int version_id) -+{ -+ SS_FamilyTmpStruct *tss = opaque; -+ -+ switch (tss->portable_family) { -+ case SS_FAMILY_MIG_IPV4: -+ tss->parent->ss.ss_family = AF_INET; -+ break; -+ case SS_FAMILY_MIG_IPV6: -+ case 23: /* compatibility: AF_INET6 from mingw */ -+ case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ -+ tss->parent->ss.ss_family = AF_INET6; -+ break; -+ default: -+ g_critical("invalid ss_family type %x", tss->portable_family); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static const VMStateDescription vmstate_slirp_ss_family = { -+ .name = "slirp-socket-addr/ss_family", -+ .pre_save = ss_family_pre_save, -+ .post_load = ss_family_post_load, -+ .fields = -+ (VMStateField[]){ VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp_socket_addr = { -+ .name = "slirp-socket-addr", -+ .version_id = 4, -+ .fields = -+ (VMStateField[]){ -+ VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, -+ vmstate_slirp_ss_family), -+ VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, -+ slirp_family_inet), -+ VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, -+ slirp_family_inet), -+ -+#if 0 -+ /* Untested: Needs checking by someone with IPv6 test */ -+ VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, -+ slirp_family_inet6), -+ VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, -+ slirp_family_inet6), -+ VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, -+ slirp_family_inet6), -+ VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, -+ slirp_family_inet6), -+#endif -+ -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp_socket = { -+ .name = "slirp-socket", -+ .version_id = 4, -+ .pre_load = slirp_socket_pre_load, -+ .fields = -+ (VMStateField[]){ -+ VMSTATE_UINT32(so_urgc, struct socket), -+ /* Pre-v4 versions */ -+ VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, -+ slirp_older_than_v4), -+ VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, -+ slirp_older_than_v4), -+ VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), -+ VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), -+ /* v4 and newer */ -+ VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, -+ union slirp_sockaddr), -+ VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, -+ union slirp_sockaddr), -+ -+ VMSTATE_UINT8(so_iptos, struct socket), -+ VMSTATE_UINT8(so_emu, struct socket), -+ VMSTATE_UINT8(so_type, struct socket), -+ VMSTATE_INT32(so_state, struct socket), -+ VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, -+ struct sbuf), -+ VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, -+ struct sbuf), -+ VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, -+ struct tcpcb), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp_bootp_client = { -+ .name = "slirp_bootpclient", -+ .fields = (VMStateField[]){ VMSTATE_UINT16(allocated, BOOTPClient), -+ VMSTATE_BUFFER(macaddr, BOOTPClient), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+static const VMStateDescription vmstate_slirp = { -+ .name = "slirp", -+ .version_id = 4, -+ .fields = (VMStateField[]){ VMSTATE_UINT16_V(ip_id, Slirp, 2), -+ VMSTATE_STRUCT_ARRAY( -+ bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, -+ vmstate_slirp_bootp_client, BOOTPClient), -+ VMSTATE_END_OF_LIST() } -+}; -+ -+void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) -+{ -+ struct gfwd_list *ex_ptr; -+ SlirpOStream f = { -+ .write_cb = write_cb, -+ .opaque = opaque, -+ }; -+ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) -+ if (ex_ptr->write_cb) { -+ struct socket *so; -+ so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, -+ ntohs(ex_ptr->ex_fport)); -+ if (!so) { -+ continue; -+ } -+ -+ slirp_ostream_write_u8(&f, 42); -+ slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); -+ } -+ slirp_ostream_write_u8(&f, 0); -+ -+ slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); -+} -+ -+ -+int slirp_state_load(Slirp *slirp, int version_id, SlirpReadCb read_cb, -+ void *opaque) -+{ -+ struct gfwd_list *ex_ptr; -+ SlirpIStream f = { -+ .read_cb = read_cb, -+ .opaque = opaque, -+ }; -+ -+ while (slirp_istream_read_u8(&f)) { -+ int ret; -+ struct socket *so = socreate(slirp); -+ -+ ret = -+ slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != -+ slirp->vnetwork_addr.s_addr) { -+ return -EINVAL; -+ } -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->write_cb && -+ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && -+ so->so_fport == ex_ptr->ex_fport) { -+ break; -+ } -+ } -+ if (!ex_ptr) { -+ return -EINVAL; -+ } -+ -+ so->guestfwd = ex_ptr; -+ } -+ -+ return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); -+} -+ -+int slirp_state_version(void) -+{ -+ return 4; -+} -diff --git a/slirp/src/stream.c b/slirp/src/stream.c -new file mode 100644 -index 0000000000..6cf326f669 ---- /dev/null -+++ b/slirp/src/stream.c -@@ -0,0 +1,120 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * libslirp io streams -+ * -+ * Copyright (c) 2018 Red Hat, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "stream.h" -+#include -+ -+bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) -+{ -+ return f->read_cb(buf, size, f->opaque) == size; -+} -+ -+bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) -+{ -+ return f->write_cb(buf, size, f->opaque) == size; -+} -+ -+uint8_t slirp_istream_read_u8(SlirpIStream *f) -+{ -+ uint8_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return b; -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) -+{ -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -+ -+uint16_t slirp_istream_read_u16(SlirpIStream *f) -+{ -+ uint16_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return GUINT16_FROM_BE(b); -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) -+{ -+ b = GUINT16_TO_BE(b); -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -+ -+uint32_t slirp_istream_read_u32(SlirpIStream *f) -+{ -+ uint32_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return GUINT32_FROM_BE(b); -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) -+{ -+ b = GUINT32_TO_BE(b); -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -+ -+int16_t slirp_istream_read_i16(SlirpIStream *f) -+{ -+ int16_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return GINT16_FROM_BE(b); -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) -+{ -+ b = GINT16_TO_BE(b); -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -+ -+int32_t slirp_istream_read_i32(SlirpIStream *f) -+{ -+ int32_t b; -+ -+ if (slirp_istream_read(f, &b, sizeof(b))) { -+ return GINT32_FROM_BE(b); -+ } -+ -+ return 0; -+} -+ -+bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) -+{ -+ b = GINT32_TO_BE(b); -+ return slirp_ostream_write(f, &b, sizeof(b)); -+} -diff --git a/slirp/src/stream.h b/slirp/src/stream.h -new file mode 100644 -index 0000000000..08bb5b6610 ---- /dev/null -+++ b/slirp/src/stream.h -@@ -0,0 +1,35 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#ifndef STREAM_H_ -+#define STREAM_H_ -+ -+#include "libslirp.h" -+ -+typedef struct SlirpIStream { -+ SlirpReadCb read_cb; -+ void *opaque; -+} SlirpIStream; -+ -+typedef struct SlirpOStream { -+ SlirpWriteCb write_cb; -+ void *opaque; -+} SlirpOStream; -+ -+bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); -+bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); -+ -+uint8_t slirp_istream_read_u8(SlirpIStream *f); -+bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); -+ -+uint16_t slirp_istream_read_u16(SlirpIStream *f); -+bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); -+ -+uint32_t slirp_istream_read_u32(SlirpIStream *f); -+bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); -+ -+int16_t slirp_istream_read_i16(SlirpIStream *f); -+bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); -+ -+int32_t slirp_istream_read_i32(SlirpIStream *f); -+bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); -+ -+#endif /* STREAM_H_ */ -diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h -new file mode 100644 -index 0000000000..70a9760664 ---- /dev/null -+++ b/slirp/src/tcp.h -@@ -0,0 +1,169 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 -+ * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp -+ */ -+ -+#ifndef TCP_H -+#define TCP_H -+ -+#include -+ -+typedef uint32_t tcp_seq; -+ -+#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ -+#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ -+ -+#define TCP_SNDSPACE 1024 * 128 -+#define TCP_RCVSPACE 1024 * 128 -+#define TCP_MAXSEG_MAX 32768 -+ -+/* -+ * TCP header. -+ * Per RFC 793, September, 1981. -+ */ -+#define tcphdr slirp_tcphdr -+struct tcphdr { -+ uint16_t th_sport; /* source port */ -+ uint16_t th_dport; /* destination port */ -+ tcp_seq th_seq; /* sequence number */ -+ tcp_seq th_ack; /* acknowledgement number */ -+#if G_BYTE_ORDER == G_BIG_ENDIAN -+ uint8_t th_off : 4, /* data offset */ -+ th_x2 : 4; /* (unused) */ -+#else -+ uint8_t th_x2 : 4, /* (unused) */ -+ th_off : 4; /* data offset */ -+#endif -+ uint8_t th_flags; -+ uint16_t th_win; /* window */ -+ uint16_t th_sum; /* checksum */ -+ uint16_t th_urp; /* urgent pointer */ -+}; -+ -+#include "tcp_var.h" -+ -+#ifndef TH_FIN -+#define TH_FIN 0x01 -+#define TH_SYN 0x02 -+#define TH_RST 0x04 -+#define TH_PUSH 0x08 -+#define TH_ACK 0x10 -+#define TH_URG 0x20 -+#endif -+ -+#ifndef TCPOPT_EOL -+#define TCPOPT_EOL 0 -+#define TCPOPT_NOP 1 -+#define TCPOPT_MAXSEG 2 -+#define TCPOPT_WINDOW 3 -+#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ -+#define TCPOPT_SACK 5 /* Experimental */ -+#define TCPOPT_TIMESTAMP 8 -+ -+#define TCPOPT_TSTAMP_HDR \ -+ (TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | \ -+ TCPOLEN_TIMESTAMP) -+#endif -+ -+#ifndef TCPOLEN_MAXSEG -+#define TCPOLEN_MAXSEG 4 -+#define TCPOLEN_WINDOW 3 -+#define TCPOLEN_SACK_PERMITTED 2 -+#define TCPOLEN_TIMESTAMP 10 -+#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP + 2) /* appendix A */ -+#endif -+ -+#undef TCP_MAXWIN -+#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ -+ -+#undef TCP_MAX_WINSHIFT -+#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ -+ -+/* -+ * User-settable options (used with setsockopt). -+ * -+ * We don't use the system headers on unix because we have conflicting -+ * local structures. We can't avoid the system definitions on Windows, -+ * so we undefine them. -+ */ -+#undef TCP_NODELAY -+#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ -+#undef TCP_MAXSEG -+ -+/* -+ * TCP FSM state definitions. -+ * Per RFC793, September, 1981. -+ */ -+ -+#define TCP_NSTATES 11 -+ -+#define TCPS_CLOSED 0 /* closed */ -+#define TCPS_LISTEN 1 /* listening for connection */ -+#define TCPS_SYN_SENT 2 /* active, have sent syn */ -+#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ -+/* states < TCPS_ESTABLISHED are those where connections not established */ -+#define TCPS_ESTABLISHED 4 /* established */ -+#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ -+/* states > TCPS_CLOSE_WAIT are those where user has closed */ -+#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ -+#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ -+#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ -+/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ -+#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ -+#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ -+ -+#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) -+#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) -+#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) -+ -+/* -+ * TCP sequence numbers are 32 bit integers operated -+ * on with modular arithmetic. These macros can be -+ * used to compare such integers. -+ */ -+#define SEQ_LT(a, b) ((int)((a) - (b)) < 0) -+#define SEQ_LEQ(a, b) ((int)((a) - (b)) <= 0) -+#define SEQ_GT(a, b) ((int)((a) - (b)) > 0) -+#define SEQ_GEQ(a, b) ((int)((a) - (b)) >= 0) -+ -+/* -+ * Macros to initialize tcp sequence numbers for -+ * send and receive from initial send and receive -+ * sequence numbers. -+ */ -+#define tcp_rcvseqinit(tp) (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 -+ -+#define tcp_sendseqinit(tp) \ -+ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss -+ -+#define TCP_ISSINCR (125 * 1024) /* increment for tcp_iss each second */ -+ -+#endif -diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c -new file mode 100644 -index 0000000000..d55b0c81dc ---- /dev/null -+++ b/slirp/src/tcp_input.c -@@ -0,0 +1,1539 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 -+ * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+ -+#define TCPREXMTTHRESH 3 -+ -+#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) -+ -+/* for modulo comparisons of timestamps */ -+#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0) -+#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0) -+ -+/* -+ * Insert segment ti into reassembly queue of tcp with -+ * control block tp. Return TH_FIN if reassembly now includes -+ * a segment with FIN. The macro form does the common case inline -+ * (segment is the next to be received on an established connection, -+ * and the queue is empty), avoiding linkage into and removal -+ * from the queue and repetition of various conversions. -+ * Set DELACK for segments received in order, but ack immediately -+ * when segments are out of order (so fast retransmit can work). -+ */ -+#define TCP_REASS(tp, ti, m, so, flags) \ -+ { \ -+ if ((ti)->ti_seq == (tp)->rcv_nxt && tcpfrag_list_empty(tp) && \ -+ (tp)->t_state == TCPS_ESTABLISHED) { \ -+ tp->t_flags |= TF_DELACK; \ -+ (tp)->rcv_nxt += (ti)->ti_len; \ -+ flags = (ti)->ti_flags & TH_FIN; \ -+ if (so->so_emu) { \ -+ if (tcp_emu((so), (m))) \ -+ sbappend(so, (m)); \ -+ } else \ -+ sbappend((so), (m)); \ -+ } else { \ -+ (flags) = tcp_reass((tp), (ti), (m)); \ -+ tp->t_flags |= TF_ACKNOW; \ -+ } \ -+ } -+ -+static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, -+ struct tcpiphdr *ti); -+static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); -+ -+static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, -+ struct mbuf *m) -+{ -+ register struct tcpiphdr *q; -+ struct socket *so = tp->t_socket; -+ int flags; -+ -+ /* -+ * Call with ti==NULL after become established to -+ * force pre-ESTABLISHED data up to user socket. -+ */ -+ if (ti == NULL) -+ goto present; -+ -+ /* -+ * Find a segment which begins after this one does. -+ */ -+ for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); -+ q = tcpiphdr_next(q)) -+ if (SEQ_GT(q->ti_seq, ti->ti_seq)) -+ break; -+ -+ /* -+ * If there is a preceding segment, it may provide some of -+ * our data already. If so, drop the data from the incoming -+ * segment. If it provides all of our data, drop us. -+ */ -+ if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { -+ register int i; -+ q = tcpiphdr_prev(q); -+ /* conversion to int (in i) handles seq wraparound */ -+ i = q->ti_seq + q->ti_len - ti->ti_seq; -+ if (i > 0) { -+ if (i >= ti->ti_len) { -+ m_free(m); -+ /* -+ * Try to present any queued data -+ * at the left window edge to the user. -+ * This is needed after the 3-WHS -+ * completes. -+ */ -+ goto present; /* ??? */ -+ } -+ m_adj(m, i); -+ ti->ti_len -= i; -+ ti->ti_seq += i; -+ } -+ q = tcpiphdr_next(q); -+ } -+ ti->ti_mbuf = m; -+ -+ /* -+ * While we overlap succeeding segments trim them or, -+ * if they are completely covered, dequeue them. -+ */ -+ while (!tcpfrag_list_end(q, tp)) { -+ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; -+ if (i <= 0) -+ break; -+ if (i < q->ti_len) { -+ q->ti_seq += i; -+ q->ti_len -= i; -+ m_adj(q->ti_mbuf, i); -+ break; -+ } -+ q = tcpiphdr_next(q); -+ m = tcpiphdr_prev(q)->ti_mbuf; -+ remque(tcpiphdr2qlink(tcpiphdr_prev(q))); -+ m_free(m); -+ } -+ -+ /* -+ * Stick new segment in its place. -+ */ -+ insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); -+ -+present: -+ /* -+ * Present data to user, advancing rcv_nxt through -+ * completed sequence space. -+ */ -+ if (!TCPS_HAVEESTABLISHED(tp->t_state)) -+ return (0); -+ ti = tcpfrag_list_first(tp); -+ if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) -+ return (0); -+ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) -+ return (0); -+ do { -+ tp->rcv_nxt += ti->ti_len; -+ flags = ti->ti_flags & TH_FIN; -+ remque(tcpiphdr2qlink(ti)); -+ m = ti->ti_mbuf; -+ ti = tcpiphdr_next(ti); -+ if (so->so_state & SS_FCANTSENDMORE) -+ m_free(m); -+ else { -+ if (so->so_emu) { -+ if (tcp_emu(so, m)) -+ sbappend(so, m); -+ } else -+ sbappend(so, m); -+ } -+ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); -+ return (flags); -+} -+ -+/* -+ * TCP input routine, follows pages 65-76 of the -+ * protocol specification dated September, 1981 very closely. -+ */ -+void tcp_input(struct mbuf *m, int iphlen, struct socket *inso, -+ unsigned short af) -+{ -+ struct ip save_ip, *ip; -+ struct ip6 save_ip6, *ip6; -+ register struct tcpiphdr *ti; -+ char *optp = NULL; -+ int optlen = 0; -+ int len, tlen, off; -+ register struct tcpcb *tp = NULL; -+ register int tiflags; -+ struct socket *so = NULL; -+ int todrop, acked, ourfinisacked, needoutput = 0; -+ int iss = 0; -+ uint32_t tiwin; -+ int ret; -+ struct sockaddr_storage lhost, fhost; -+ struct sockaddr_in *lhost4, *fhost4; -+ struct sockaddr_in6 *lhost6, *fhost6; -+ struct gfwd_list *ex_ptr; -+ Slirp *slirp; -+ -+ DEBUG_CALL("tcp_input"); -+ DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso); -+ -+ /* -+ * If called with m == 0, then we're continuing the connect -+ */ -+ if (m == NULL) { -+ so = inso; -+ slirp = so->slirp; -+ -+ /* Re-set a few variables */ -+ tp = sototcpcb(so); -+ m = so->so_m; -+ so->so_m = NULL; -+ ti = so->so_ti; -+ tiwin = ti->ti_win; -+ tiflags = ti->ti_flags; -+ -+ goto cont_conn; -+ } -+ slirp = m->slirp; -+ -+ ip = mtod(m, struct ip *); -+ ip6 = mtod(m, struct ip6 *); -+ -+ switch (af) { -+ case AF_INET: -+ if (iphlen > sizeof(struct ip)) { -+ ip_stripoptions(m, (struct mbuf *)0); -+ iphlen = sizeof(struct ip); -+ } -+ /* XXX Check if too short */ -+ -+ -+ /* -+ * Save a copy of the IP header in case we want restore it -+ * for sending an ICMP error message in response. -+ */ -+ save_ip = *ip; -+ save_ip.ip_len += iphlen; -+ -+ /* -+ * Get IP and TCP header together in first mbuf. -+ * Note: IP leaves IP header in first mbuf. -+ */ -+ m->m_data -= -+ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); -+ m->m_len += -+ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); -+ ti = mtod(m, struct tcpiphdr *); -+ -+ /* -+ * Checksum extended TCP header and data. -+ */ -+ tlen = ip->ip_len; -+ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; -+ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); -+ memset(&ti->ti, 0, sizeof(ti->ti)); -+ ti->ti_x0 = 0; -+ ti->ti_src = save_ip.ip_src; -+ ti->ti_dst = save_ip.ip_dst; -+ ti->ti_pr = save_ip.ip_p; -+ ti->ti_len = htons((uint16_t)tlen); -+ break; -+ -+ case AF_INET6: -+ /* -+ * Save a copy of the IP header in case we want restore it -+ * for sending an ICMP error message in response. -+ */ -+ save_ip6 = *ip6; -+ /* -+ * Get IP and TCP header together in first mbuf. -+ * Note: IP leaves IP header in first mbuf. -+ */ -+ m->m_data -= sizeof(struct tcpiphdr) - -+ (sizeof(struct ip6) + sizeof(struct tcphdr)); -+ m->m_len += sizeof(struct tcpiphdr) - -+ (sizeof(struct ip6) + sizeof(struct tcphdr)); -+ ti = mtod(m, struct tcpiphdr *); -+ -+ tlen = ip6->ip_pl; -+ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; -+ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); -+ memset(&ti->ti, 0, sizeof(ti->ti)); -+ ti->ti_x0 = 0; -+ ti->ti_src6 = save_ip6.ip_src; -+ ti->ti_dst6 = save_ip6.ip_dst; -+ ti->ti_nh6 = save_ip6.ip_nh; -+ ti->ti_len = htons((uint16_t)tlen); -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ } -+ -+ len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); -+ if (cksum(m, len)) { -+ goto drop; -+ } -+ -+ /* -+ * Check that TCP offset makes sense, -+ * pull out TCP options and adjust length. XXX -+ */ -+ off = ti->ti_off << 2; -+ if (off < sizeof(struct tcphdr) || off > tlen) { -+ goto drop; -+ } -+ tlen -= off; -+ ti->ti_len = tlen; -+ if (off > sizeof(struct tcphdr)) { -+ optlen = off - sizeof(struct tcphdr); -+ optp = mtod(m, char *) + sizeof(struct tcpiphdr); -+ } -+ tiflags = ti->ti_flags; -+ -+ /* -+ * Convert TCP protocol specific fields to host format. -+ */ -+ NTOHL(ti->ti_seq); -+ NTOHL(ti->ti_ack); -+ NTOHS(ti->ti_win); -+ NTOHS(ti->ti_urp); -+ -+ /* -+ * Drop TCP, IP headers and TCP options. -+ */ -+ m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); -+ m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); -+ -+ /* -+ * Locate pcb for segment. -+ */ -+findso: -+ lhost.ss_family = af; -+ fhost.ss_family = af; -+ switch (af) { -+ case AF_INET: -+ lhost4 = (struct sockaddr_in *)&lhost; -+ lhost4->sin_addr = ti->ti_src; -+ lhost4->sin_port = ti->ti_sport; -+ fhost4 = (struct sockaddr_in *)&fhost; -+ fhost4->sin_addr = ti->ti_dst; -+ fhost4->sin_port = ti->ti_dport; -+ break; -+ case AF_INET6: -+ lhost6 = (struct sockaddr_in6 *)&lhost; -+ lhost6->sin6_addr = ti->ti_src6; -+ lhost6->sin6_port = ti->ti_sport; -+ fhost6 = (struct sockaddr_in6 *)&fhost; -+ fhost6->sin6_addr = ti->ti_dst6; -+ fhost6->sin6_port = ti->ti_dport; -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ -+ so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); -+ -+ /* -+ * If the state is CLOSED (i.e., TCB does not exist) then -+ * all data in the incoming segment is discarded. -+ * If the TCB exists but is in CLOSED state, it is embryonic, -+ * but should either do a listen or a connect soon. -+ * -+ * state == CLOSED means we've done socreate() but haven't -+ * attached it to a protocol yet... -+ * -+ * XXX If a TCB does not exist, and the TH_SYN flag is -+ * the only flag set, then create a session, mark it -+ * as if it was LISTENING, and continue... -+ */ -+ if (so == NULL) { -+ /* TODO: IPv6 */ -+ if (slirp->restricted) { -+ /* Any hostfwds will have an existing socket, so we only get here -+ * for non-hostfwd connections. These should be dropped, unless it -+ * happens to be a guestfwd. -+ */ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; -+ ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->ex_fport == ti->ti_dport && -+ ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { -+ break; -+ } -+ } -+ if (!ex_ptr) { -+ goto dropwithreset; -+ } -+ } -+ -+ if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN) -+ goto dropwithreset; -+ -+ so = socreate(slirp); -+ tcp_attach(so); -+ -+ sbreserve(&so->so_snd, TCP_SNDSPACE); -+ sbreserve(&so->so_rcv, TCP_RCVSPACE); -+ -+ so->lhost.ss = lhost; -+ so->fhost.ss = fhost; -+ -+ so->so_iptos = tcp_tos(so); -+ if (so->so_iptos == 0) { -+ switch (af) { -+ case AF_INET: -+ so->so_iptos = ((struct ip *)ti)->ip_tos; -+ break; -+ case AF_INET6: -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ } -+ -+ tp = sototcpcb(so); -+ tp->t_state = TCPS_LISTEN; -+ } -+ -+ /* -+ * If this is a still-connecting socket, this probably -+ * a retransmit of the SYN. Whether it's a retransmit SYN -+ * or something else, we nuke it. -+ */ -+ if (so->so_state & SS_ISFCONNECTING) -+ goto drop; -+ -+ tp = sototcpcb(so); -+ -+ /* XXX Should never fail */ -+ if (tp == NULL) -+ goto dropwithreset; -+ if (tp->t_state == TCPS_CLOSED) -+ goto drop; -+ -+ tiwin = ti->ti_win; -+ -+ /* -+ * Segment received on connection. -+ * Reset idle time and keep-alive timer. -+ */ -+ tp->t_idle = 0; -+ if (slirp_do_keepalive) -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; -+ else -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; -+ -+ /* -+ * Process options if not in LISTEN state, -+ * else do it below (after getting remote address). -+ */ -+ if (optp && tp->t_state != TCPS_LISTEN) -+ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); -+ -+ /* -+ * Header prediction: check for the two common cases -+ * of a uni-directional data xfer. If the packet has -+ * no control flags, is in-sequence, the window didn't -+ * change and we're not retransmitting, it's a -+ * candidate. If the length is zero and the ack moved -+ * forward, we're the sender side of the xfer. Just -+ * free the data acked & wake any higher level process -+ * that was blocked waiting for space. If the length -+ * is non-zero and the ack didn't move, we're the -+ * receiver side. If we're getting packets in-order -+ * (the reassembly queue is empty), add the data to -+ * the socket buffer and note that we need a delayed ack. -+ * -+ * XXX Some of these tests are not needed -+ * eg: the tiwin == tp->snd_wnd prevents many more -+ * predictions.. with no *real* advantage.. -+ */ -+ if (tp->t_state == TCPS_ESTABLISHED && -+ (tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK && -+ ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && -+ tp->snd_nxt == tp->snd_max) { -+ if (ti->ti_len == 0) { -+ if (SEQ_GT(ti->ti_ack, tp->snd_una) && -+ SEQ_LEQ(ti->ti_ack, tp->snd_max) && -+ tp->snd_cwnd >= tp->snd_wnd) { -+ /* -+ * this is a pure ack for outstanding data. -+ */ -+ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) -+ tcp_xmit_timer(tp, tp->t_rtt); -+ acked = ti->ti_ack - tp->snd_una; -+ sodrop(so, acked); -+ tp->snd_una = ti->ti_ack; -+ m_free(m); -+ -+ /* -+ * If all outstanding data are acked, stop -+ * retransmit timer, otherwise restart timer -+ * using current (possibly backed-off) value. -+ * If process is waiting for space, -+ * wakeup/selwakeup/signal. If data -+ * are ready to send, let tcp_output -+ * decide between more output or persist. -+ */ -+ if (tp->snd_una == tp->snd_max) -+ tp->t_timer[TCPT_REXMT] = 0; -+ else if (tp->t_timer[TCPT_PERSIST] == 0) -+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; -+ -+ /* -+ * This is called because sowwakeup might have -+ * put data into so_snd. Since we don't so sowwakeup, -+ * we don't need this.. XXX??? -+ */ -+ if (so->so_snd.sb_cc) -+ (void)tcp_output(tp); -+ -+ return; -+ } -+ } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && -+ ti->ti_len <= sbspace(&so->so_rcv)) { -+ /* -+ * this is a pure, in-sequence data packet -+ * with nothing on the reassembly queue and -+ * we have enough buffer space to take it. -+ */ -+ tp->rcv_nxt += ti->ti_len; -+ /* -+ * Add data to socket buffer. -+ */ -+ if (so->so_emu) { -+ if (tcp_emu(so, m)) -+ sbappend(so, m); -+ } else -+ sbappend(so, m); -+ -+ /* -+ * If this is a short packet, then ACK now - with Nagel -+ * congestion avoidance sender won't send more until -+ * he gets an ACK. -+ * -+ * It is better to not delay acks at all to maximize -+ * TCP throughput. See RFC 2581. -+ */ -+ tp->t_flags |= TF_ACKNOW; -+ tcp_output(tp); -+ return; -+ } -+ } /* header prediction */ -+ /* -+ * Calculate amount of space in receive window, -+ * and then do TCP input processing. -+ * Receive window is amount of space in rcv queue, -+ * but not less than advertised window. -+ */ -+ { -+ int win; -+ win = sbspace(&so->so_rcv); -+ if (win < 0) -+ win = 0; -+ tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); -+ } -+ -+ switch (tp->t_state) { -+ /* -+ * If the state is LISTEN then ignore segment if it contains an RST. -+ * If the segment contains an ACK then it is bad and send a RST. -+ * If it does not contain a SYN then it is not interesting; drop it. -+ * Don't bother responding if the destination was a broadcast. -+ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial -+ * tp->iss, and send a segment: -+ * -+ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. -+ * Fill in remote peer address fields if not previously specified. -+ * Enter SYN_RECEIVED state, and process any other fields of this -+ * segment in this state. -+ */ -+ case TCPS_LISTEN: { -+ if (tiflags & TH_RST) -+ goto drop; -+ if (tiflags & TH_ACK) -+ goto dropwithreset; -+ if ((tiflags & TH_SYN) == 0) -+ goto drop; -+ -+ /* -+ * This has way too many gotos... -+ * But a bit of spaghetti code never hurt anybody :) -+ */ -+ -+ /* -+ * If this is destined for the control address, then flag to -+ * tcp_ctl once connected, otherwise connect -+ */ -+ /* TODO: IPv6 */ -+ if (af == AF_INET && -+ (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == -+ slirp->vnetwork_addr.s_addr) { -+ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && -+ so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { -+ /* May be an add exec */ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; -+ ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->ex_fport == so->so_fport && -+ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { -+ so->so_state |= SS_CTL; -+ break; -+ } -+ } -+ if (so->so_state & SS_CTL) { -+ goto cont_input; -+ } -+ } -+ /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ -+ } -+ -+ if (so->so_emu & EMU_NOCONNECT) { -+ so->so_emu &= ~EMU_NOCONNECT; -+ goto cont_input; -+ } -+ -+ if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) && -+ (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { -+ uint8_t code; -+ DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); -+ if (errno == ECONNREFUSED) { -+ /* ACK the SYN, send RST to refuse the connection */ -+ tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0, -+ TH_RST | TH_ACK, af); -+ } else { -+ switch (af) { -+ case AF_INET: -+ code = ICMP_UNREACH_NET; -+ if (errno == EHOSTUNREACH) { -+ code = ICMP_UNREACH_HOST; -+ } -+ break; -+ case AF_INET6: -+ code = ICMP6_UNREACH_NO_ROUTE; -+ if (errno == EHOSTUNREACH) { -+ code = ICMP6_UNREACH_ADDRESS; -+ } -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ HTONL(ti->ti_seq); /* restore tcp header */ -+ HTONL(ti->ti_ack); -+ HTONS(ti->ti_win); -+ HTONS(ti->ti_urp); -+ m->m_data -= -+ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); -+ m->m_len += -+ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); -+ switch (af) { -+ case AF_INET: -+ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) - -+ sizeof(struct tcphdr); -+ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) - -+ sizeof(struct tcphdr); -+ *ip = save_ip; -+ icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); -+ break; -+ case AF_INET6: -+ m->m_data += sizeof(struct tcpiphdr) - -+ (sizeof(struct ip6) + sizeof(struct tcphdr)); -+ m->m_len -= sizeof(struct tcpiphdr) - -+ (sizeof(struct ip6) + sizeof(struct tcphdr)); -+ *ip6 = save_ip6; -+ icmp6_send_error(m, ICMP6_UNREACH, code); -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ } -+ tcp_close(tp); -+ m_free(m); -+ } else { -+ /* -+ * Haven't connected yet, save the current mbuf -+ * and ti, and return -+ * XXX Some OS's don't tell us whether the connect() -+ * succeeded or not. So we must time it out. -+ */ -+ so->so_m = m; -+ so->so_ti = ti; -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; -+ tp->t_state = TCPS_SYN_RECEIVED; -+ /* -+ * Initialize receive sequence numbers now so that we can send a -+ * valid RST if the remote end rejects our connection. -+ */ -+ tp->irs = ti->ti_seq; -+ tcp_rcvseqinit(tp); -+ tcp_template(tp); -+ } -+ return; -+ -+ cont_conn: -+ /* m==NULL -+ * Check if the connect succeeded -+ */ -+ if (so->so_state & SS_NOFDREF) { -+ tp = tcp_close(tp); -+ goto dropwithreset; -+ } -+ cont_input: -+ tcp_template(tp); -+ -+ if (optp) -+ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); -+ -+ if (iss) -+ tp->iss = iss; -+ else -+ tp->iss = slirp->tcp_iss; -+ slirp->tcp_iss += TCP_ISSINCR / 2; -+ tp->irs = ti->ti_seq; -+ tcp_sendseqinit(tp); -+ tcp_rcvseqinit(tp); -+ tp->t_flags |= TF_ACKNOW; -+ tp->t_state = TCPS_SYN_RECEIVED; -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; -+ goto trimthenstep6; -+ } /* case TCPS_LISTEN */ -+ -+ /* -+ * If the state is SYN_SENT: -+ * if seg contains an ACK, but not for our SYN, drop the input. -+ * if seg contains a RST, then drop the connection. -+ * if seg does not contain SYN, then drop it. -+ * Otherwise this is an acceptable SYN segment -+ * initialize tp->rcv_nxt and tp->irs -+ * if seg contains ack then advance tp->snd_una -+ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state -+ * arrange for segment to be acked (eventually) -+ * continue processing rest of data/controls, beginning with URG -+ */ -+ case TCPS_SYN_SENT: -+ if ((tiflags & TH_ACK) && -+ (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) -+ goto dropwithreset; -+ -+ if (tiflags & TH_RST) { -+ if (tiflags & TH_ACK) { -+ tcp_drop(tp, 0); /* XXX Check t_softerror! */ -+ } -+ goto drop; -+ } -+ -+ if ((tiflags & TH_SYN) == 0) -+ goto drop; -+ if (tiflags & TH_ACK) { -+ tp->snd_una = ti->ti_ack; -+ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) -+ tp->snd_nxt = tp->snd_una; -+ } -+ -+ tp->t_timer[TCPT_REXMT] = 0; -+ tp->irs = ti->ti_seq; -+ tcp_rcvseqinit(tp); -+ tp->t_flags |= TF_ACKNOW; -+ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { -+ soisfconnected(so); -+ tp->t_state = TCPS_ESTABLISHED; -+ -+ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); -+ /* -+ * if we didn't have to retransmit the SYN, -+ * use its rtt as our initial srtt & rtt var. -+ */ -+ if (tp->t_rtt) -+ tcp_xmit_timer(tp, tp->t_rtt); -+ } else -+ tp->t_state = TCPS_SYN_RECEIVED; -+ -+ trimthenstep6: -+ /* -+ * Advance ti->ti_seq to correspond to first data byte. -+ * If data, trim to stay within window, -+ * dropping FIN if necessary. -+ */ -+ ti->ti_seq++; -+ if (ti->ti_len > tp->rcv_wnd) { -+ todrop = ti->ti_len - tp->rcv_wnd; -+ m_adj(m, -todrop); -+ ti->ti_len = tp->rcv_wnd; -+ tiflags &= ~TH_FIN; -+ } -+ tp->snd_wl1 = ti->ti_seq - 1; -+ tp->rcv_up = ti->ti_seq; -+ goto step6; -+ } /* switch tp->t_state */ -+ /* -+ * States other than LISTEN or SYN_SENT. -+ * Check that at least some bytes of segment are within -+ * receive window. If segment begins before rcv_nxt, -+ * drop leading data (and SYN); if nothing left, just ack. -+ */ -+ todrop = tp->rcv_nxt - ti->ti_seq; -+ if (todrop > 0) { -+ if (tiflags & TH_SYN) { -+ tiflags &= ~TH_SYN; -+ ti->ti_seq++; -+ if (ti->ti_urp > 1) -+ ti->ti_urp--; -+ else -+ tiflags &= ~TH_URG; -+ todrop--; -+ } -+ /* -+ * Following if statement from Stevens, vol. 2, p. 960. -+ */ -+ if (todrop > ti->ti_len || -+ (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { -+ /* -+ * Any valid FIN must be to the left of the window. -+ * At this point the FIN must be a duplicate or out -+ * of sequence; drop it. -+ */ -+ tiflags &= ~TH_FIN; -+ -+ /* -+ * Send an ACK to resynchronize and drop any data. -+ * But keep on processing for RST or ACK. -+ */ -+ tp->t_flags |= TF_ACKNOW; -+ todrop = ti->ti_len; -+ } -+ m_adj(m, todrop); -+ ti->ti_seq += todrop; -+ ti->ti_len -= todrop; -+ if (ti->ti_urp > todrop) -+ ti->ti_urp -= todrop; -+ else { -+ tiflags &= ~TH_URG; -+ ti->ti_urp = 0; -+ } -+ } -+ /* -+ * If new data are received on a connection after the -+ * user processes are gone, then RST the other end. -+ */ -+ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && -+ ti->ti_len) { -+ tp = tcp_close(tp); -+ goto dropwithreset; -+ } -+ -+ /* -+ * If segment ends after window, drop trailing data -+ * (and PUSH and FIN); if nothing left, just ACK. -+ */ -+ todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd); -+ if (todrop > 0) { -+ if (todrop >= ti->ti_len) { -+ /* -+ * If a new connection request is received -+ * while in TIME_WAIT, drop the old connection -+ * and start over if the sequence numbers -+ * are above the previous ones. -+ */ -+ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && -+ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { -+ iss = tp->rcv_nxt + TCP_ISSINCR; -+ tp = tcp_close(tp); -+ goto findso; -+ } -+ /* -+ * If window is closed can only take segments at -+ * window edge, and have to drop data and PUSH from -+ * incoming segments. Continue processing, but -+ * remember to ack. Otherwise, drop segment -+ * and ack. -+ */ -+ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { -+ tp->t_flags |= TF_ACKNOW; -+ } else { -+ goto dropafterack; -+ } -+ } -+ m_adj(m, -todrop); -+ ti->ti_len -= todrop; -+ tiflags &= ~(TH_PUSH | TH_FIN); -+ } -+ -+ /* -+ * If the RST bit is set examine the state: -+ * SYN_RECEIVED STATE: -+ * If passive open, return to LISTEN state. -+ * If active open, inform user that connection was refused. -+ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: -+ * Inform user that connection was reset, and close tcb. -+ * CLOSING, LAST_ACK, TIME_WAIT STATES -+ * Close the tcb. -+ */ -+ if (tiflags & TH_RST) -+ switch (tp->t_state) { -+ case TCPS_SYN_RECEIVED: -+ case TCPS_ESTABLISHED: -+ case TCPS_FIN_WAIT_1: -+ case TCPS_FIN_WAIT_2: -+ case TCPS_CLOSE_WAIT: -+ tp->t_state = TCPS_CLOSED; -+ tcp_close(tp); -+ goto drop; -+ -+ case TCPS_CLOSING: -+ case TCPS_LAST_ACK: -+ case TCPS_TIME_WAIT: -+ tcp_close(tp); -+ goto drop; -+ } -+ -+ /* -+ * If a SYN is in the window, then this is an -+ * error and we send an RST and drop the connection. -+ */ -+ if (tiflags & TH_SYN) { -+ tp = tcp_drop(tp, 0); -+ goto dropwithreset; -+ } -+ -+ /* -+ * If the ACK bit is off we drop the segment and return. -+ */ -+ if ((tiflags & TH_ACK) == 0) -+ goto drop; -+ -+ /* -+ * Ack processing. -+ */ -+ switch (tp->t_state) { -+ /* -+ * In SYN_RECEIVED state if the ack ACKs our SYN then enter -+ * ESTABLISHED state and continue processing, otherwise -+ * send an RST. una<=ack<=max -+ */ -+ case TCPS_SYN_RECEIVED: -+ -+ if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) -+ goto dropwithreset; -+ tp->t_state = TCPS_ESTABLISHED; -+ /* -+ * The sent SYN is ack'ed with our sequence number +1 -+ * The first data byte already in the buffer will get -+ * lost if no correction is made. This is only needed for -+ * SS_CTL since the buffer is empty otherwise. -+ * tp->snd_una++; or: -+ */ -+ tp->snd_una = ti->ti_ack; -+ if (so->so_state & SS_CTL) { -+ /* So tcp_ctl reports the right state */ -+ ret = tcp_ctl(so); -+ if (ret == 1) { -+ soisfconnected(so); -+ so->so_state &= ~SS_CTL; /* success XXX */ -+ } else if (ret == 2) { -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_NOFDREF; /* CTL_CMD */ -+ } else { -+ needoutput = 1; -+ tp->t_state = TCPS_FIN_WAIT_1; -+ } -+ } else { -+ soisfconnected(so); -+ } -+ -+ (void)tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); -+ tp->snd_wl1 = ti->ti_seq - 1; -+ /* Avoid ack processing; snd_una==ti_ack => dup ack */ -+ goto synrx_to_est; -+ /* fall into ... */ -+ -+ /* -+ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range -+ * ACKs. If the ack is in the range -+ * tp->snd_una < ti->ti_ack <= tp->snd_max -+ * then advance tp->snd_una to ti->ti_ack and drop -+ * data from the retransmission queue. If this ACK reflects -+ * more up to date window information we update our window information. -+ */ -+ case TCPS_ESTABLISHED: -+ case TCPS_FIN_WAIT_1: -+ case TCPS_FIN_WAIT_2: -+ case TCPS_CLOSE_WAIT: -+ case TCPS_CLOSING: -+ case TCPS_LAST_ACK: -+ case TCPS_TIME_WAIT: -+ -+ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { -+ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { -+ DEBUG_MISC(" dup ack m = %p so = %p", m, so); -+ /* -+ * If we have outstanding data (other than -+ * a window probe), this is a completely -+ * duplicate ack (ie, window info didn't -+ * change), the ack is the biggest we've -+ * seen and we've seen exactly our rexmt -+ * threshold of them, assume a packet -+ * has been dropped and retransmit it. -+ * Kludge snd_nxt & the congestion -+ * window so we send only this one -+ * packet. -+ * -+ * We know we're losing at the current -+ * window size so do congestion avoidance -+ * (set ssthresh to half the current window -+ * and pull our congestion window back to -+ * the new ssthresh). -+ * -+ * Dup acks mean that packets have left the -+ * network (they're now cached at the receiver) -+ * so bump cwnd by the amount in the receiver -+ * to keep a constant cwnd packets in the -+ * network. -+ */ -+ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) -+ tp->t_dupacks = 0; -+ else if (++tp->t_dupacks == TCPREXMTTHRESH) { -+ tcp_seq onxt = tp->snd_nxt; -+ unsigned win = -+ MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; -+ -+ if (win < 2) -+ win = 2; -+ tp->snd_ssthresh = win * tp->t_maxseg; -+ tp->t_timer[TCPT_REXMT] = 0; -+ tp->t_rtt = 0; -+ tp->snd_nxt = ti->ti_ack; -+ tp->snd_cwnd = tp->t_maxseg; -+ (void)tcp_output(tp); -+ tp->snd_cwnd = -+ tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; -+ if (SEQ_GT(onxt, tp->snd_nxt)) -+ tp->snd_nxt = onxt; -+ goto drop; -+ } else if (tp->t_dupacks > TCPREXMTTHRESH) { -+ tp->snd_cwnd += tp->t_maxseg; -+ (void)tcp_output(tp); -+ goto drop; -+ } -+ } else -+ tp->t_dupacks = 0; -+ break; -+ } -+ synrx_to_est: -+ /* -+ * If the congestion window was inflated to account -+ * for the other side's cached packets, retract it. -+ */ -+ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) -+ tp->snd_cwnd = tp->snd_ssthresh; -+ tp->t_dupacks = 0; -+ if (SEQ_GT(ti->ti_ack, tp->snd_max)) { -+ goto dropafterack; -+ } -+ acked = ti->ti_ack - tp->snd_una; -+ -+ /* -+ * If transmit timer is running and timed sequence -+ * number was acked, update smoothed round trip time. -+ * Since we now have an rtt measurement, cancel the -+ * timer backoff (cf., Phil Karn's retransmit alg.). -+ * Recompute the initial retransmit timer. -+ */ -+ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) -+ tcp_xmit_timer(tp, tp->t_rtt); -+ -+ /* -+ * If all outstanding data is acked, stop retransmit -+ * timer and remember to restart (more output or persist). -+ * If there is more data to be acked, restart retransmit -+ * timer, using current (possibly backed-off) value. -+ */ -+ if (ti->ti_ack == tp->snd_max) { -+ tp->t_timer[TCPT_REXMT] = 0; -+ needoutput = 1; -+ } else if (tp->t_timer[TCPT_PERSIST] == 0) -+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; -+ /* -+ * When new data is acked, open the congestion window. -+ * If the window gives us less than ssthresh packets -+ * in flight, open exponentially (maxseg per packet). -+ * Otherwise open linearly: maxseg per window -+ * (maxseg^2 / cwnd per packet). -+ */ -+ { -+ register unsigned cw = tp->snd_cwnd; -+ register unsigned incr = tp->t_maxseg; -+ -+ if (cw > tp->snd_ssthresh) -+ incr = incr * incr / cw; -+ tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); -+ } -+ if (acked > so->so_snd.sb_cc) { -+ tp->snd_wnd -= so->so_snd.sb_cc; -+ sodrop(so, (int)so->so_snd.sb_cc); -+ ourfinisacked = 1; -+ } else { -+ sodrop(so, acked); -+ tp->snd_wnd -= acked; -+ ourfinisacked = 0; -+ } -+ tp->snd_una = ti->ti_ack; -+ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) -+ tp->snd_nxt = tp->snd_una; -+ -+ switch (tp->t_state) { -+ /* -+ * In FIN_WAIT_1 STATE in addition to the processing -+ * for the ESTABLISHED state if our FIN is now acknowledged -+ * then enter FIN_WAIT_2. -+ */ -+ case TCPS_FIN_WAIT_1: -+ if (ourfinisacked) { -+ /* -+ * If we can't receive any more -+ * data, then closing user can proceed. -+ * Starting the timer is contrary to the -+ * specification, but if we don't get a FIN -+ * we'll hang forever. -+ */ -+ if (so->so_state & SS_FCANTRCVMORE) { -+ tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; -+ } -+ tp->t_state = TCPS_FIN_WAIT_2; -+ } -+ break; -+ -+ /* -+ * In CLOSING STATE in addition to the processing for -+ * the ESTABLISHED state if the ACK acknowledges our FIN -+ * then enter the TIME-WAIT state, otherwise ignore -+ * the segment. -+ */ -+ case TCPS_CLOSING: -+ if (ourfinisacked) { -+ tp->t_state = TCPS_TIME_WAIT; -+ tcp_canceltimers(tp); -+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; -+ } -+ break; -+ -+ /* -+ * In LAST_ACK, we may still be waiting for data to drain -+ * and/or to be acked, as well as for the ack of our FIN. -+ * If our FIN is now acknowledged, delete the TCB, -+ * enter the closed state and return. -+ */ -+ case TCPS_LAST_ACK: -+ if (ourfinisacked) { -+ tcp_close(tp); -+ goto drop; -+ } -+ break; -+ -+ /* -+ * In TIME_WAIT state the only thing that should arrive -+ * is a retransmission of the remote FIN. Acknowledge -+ * it and restart the finack timer. -+ */ -+ case TCPS_TIME_WAIT: -+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; -+ goto dropafterack; -+ } -+ } /* switch(tp->t_state) */ -+ -+step6: -+ /* -+ * Update window information. -+ * Don't look at window if no ACK: TAC's send garbage on first SYN. -+ */ -+ if ((tiflags & TH_ACK) && -+ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || -+ (tp->snd_wl1 == ti->ti_seq && -+ (SEQ_LT(tp->snd_wl2, ti->ti_ack) || -+ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { -+ tp->snd_wnd = tiwin; -+ tp->snd_wl1 = ti->ti_seq; -+ tp->snd_wl2 = ti->ti_ack; -+ if (tp->snd_wnd > tp->max_sndwnd) -+ tp->max_sndwnd = tp->snd_wnd; -+ needoutput = 1; -+ } -+ -+ /* -+ * Process segments with URG. -+ */ -+ if ((tiflags & TH_URG) && ti->ti_urp && -+ TCPS_HAVERCVDFIN(tp->t_state) == 0) { -+ /* -+ * This is a kludge, but if we receive and accept -+ * random urgent pointers, we'll crash in -+ * soreceive. It's hard to imagine someone -+ * actually wanting to send this much urgent data. -+ */ -+ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { -+ ti->ti_urp = 0; -+ tiflags &= ~TH_URG; -+ goto dodata; -+ } -+ /* -+ * If this segment advances the known urgent pointer, -+ * then mark the data stream. This should not happen -+ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since -+ * a FIN has been received from the remote side. -+ * In these states we ignore the URG. -+ * -+ * According to RFC961 (Assigned Protocols), -+ * the urgent pointer points to the last octet -+ * of urgent data. We continue, however, -+ * to consider it to indicate the first octet -+ * of data past the urgent section as the original -+ * spec states (in one of two places). -+ */ -+ if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) { -+ tp->rcv_up = ti->ti_seq + ti->ti_urp; -+ so->so_urgc = -+ so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ -+ tp->rcv_up = ti->ti_seq + ti->ti_urp; -+ } -+ } else -+ /* -+ * If no out of band data is expected, -+ * pull receive urgent pointer along -+ * with the receive window. -+ */ -+ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) -+ tp->rcv_up = tp->rcv_nxt; -+dodata: -+ -+ /* -+ * If this is a small packet, then ACK now - with Nagel -+ * congestion avoidance sender won't send more until -+ * he gets an ACK. -+ */ -+ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && -+ ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { -+ tp->t_flags |= TF_ACKNOW; -+ } -+ -+ /* -+ * Process the segment text, merging it into the TCP sequencing queue, -+ * and arranging for acknowledgment of receipt if necessary. -+ * This process logically involves adjusting tp->rcv_wnd as data -+ * is presented to the user (this happens in tcp_usrreq.c, -+ * case PRU_RCVD). If a FIN has already been received on this -+ * connection then we just ignore the text. -+ */ -+ if ((ti->ti_len || (tiflags & TH_FIN)) && -+ TCPS_HAVERCVDFIN(tp->t_state) == 0) { -+ TCP_REASS(tp, ti, m, so, tiflags); -+ } else { -+ m_free(m); -+ tiflags &= ~TH_FIN; -+ } -+ -+ /* -+ * If FIN is received ACK the FIN and let the user know -+ * that the connection is closing. -+ */ -+ if (tiflags & TH_FIN) { -+ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { -+ /* -+ * If we receive a FIN we can't send more data, -+ * set it SS_FDRAIN -+ * Shutdown the socket if there is no rx data in the -+ * buffer. -+ * soread() is called on completion of shutdown() and -+ * will got to TCPS_LAST_ACK, and use tcp_output() -+ * to send the FIN. -+ */ -+ sofwdrain(so); -+ -+ tp->t_flags |= TF_ACKNOW; -+ tp->rcv_nxt++; -+ } -+ switch (tp->t_state) { -+ /* -+ * In SYN_RECEIVED and ESTABLISHED STATES -+ * enter the CLOSE_WAIT state. -+ */ -+ case TCPS_SYN_RECEIVED: -+ case TCPS_ESTABLISHED: -+ if (so->so_emu == EMU_CTL) /* no shutdown on socket */ -+ tp->t_state = TCPS_LAST_ACK; -+ else -+ tp->t_state = TCPS_CLOSE_WAIT; -+ break; -+ -+ /* -+ * If still in FIN_WAIT_1 STATE FIN has not been acked so -+ * enter the CLOSING state. -+ */ -+ case TCPS_FIN_WAIT_1: -+ tp->t_state = TCPS_CLOSING; -+ break; -+ -+ /* -+ * In FIN_WAIT_2 state enter the TIME_WAIT state, -+ * starting the time-wait timer, turning off the other -+ * standard timers. -+ */ -+ case TCPS_FIN_WAIT_2: -+ tp->t_state = TCPS_TIME_WAIT; -+ tcp_canceltimers(tp); -+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; -+ break; -+ -+ /* -+ * In TIME_WAIT state restart the 2 MSL time_wait timer. -+ */ -+ case TCPS_TIME_WAIT: -+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; -+ break; -+ } -+ } -+ -+ /* -+ * Return any desired output. -+ */ -+ if (needoutput || (tp->t_flags & TF_ACKNOW)) { -+ (void)tcp_output(tp); -+ } -+ return; -+ -+dropafterack: -+ /* -+ * Generate an ACK dropping incoming segment if it occupies -+ * sequence space, where the ACK reflects our state. -+ */ -+ if (tiflags & TH_RST) -+ goto drop; -+ m_free(m); -+ tp->t_flags |= TF_ACKNOW; -+ (void)tcp_output(tp); -+ return; -+ -+dropwithreset: -+ /* reuses m if m!=NULL, m_free() unnecessary */ -+ if (tiflags & TH_ACK) -+ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); -+ else { -+ if (tiflags & TH_SYN) -+ ti->ti_len++; -+ tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0, -+ TH_RST | TH_ACK, af); -+ } -+ -+ return; -+ -+drop: -+ /* -+ * Drop space held by incoming segment and return. -+ */ -+ m_free(m); -+} -+ -+static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, -+ struct tcpiphdr *ti) -+{ -+ uint16_t mss; -+ int opt, optlen; -+ -+ DEBUG_CALL("tcp_dooptions"); -+ DEBUG_ARG("tp = %p cnt=%i", tp, cnt); -+ -+ for (; cnt > 0; cnt -= optlen, cp += optlen) { -+ opt = cp[0]; -+ if (opt == TCPOPT_EOL) -+ break; -+ if (opt == TCPOPT_NOP) -+ optlen = 1; -+ else { -+ optlen = cp[1]; -+ if (optlen <= 0) -+ break; -+ } -+ switch (opt) { -+ default: -+ continue; -+ -+ case TCPOPT_MAXSEG: -+ if (optlen != TCPOLEN_MAXSEG) -+ continue; -+ if (!(ti->ti_flags & TH_SYN)) -+ continue; -+ memcpy((char *)&mss, (char *)cp + 2, sizeof(mss)); -+ NTOHS(mss); -+ (void)tcp_mss(tp, mss); /* sets t_maxseg */ -+ break; -+ } -+ } -+} -+ -+/* -+ * Collect new round-trip time estimate -+ * and update averages and current timeout. -+ */ -+ -+static void tcp_xmit_timer(register struct tcpcb *tp, int rtt) -+{ -+ register short delta; -+ -+ DEBUG_CALL("tcp_xmit_timer"); -+ DEBUG_ARG("tp = %p", tp); -+ DEBUG_ARG("rtt = %d", rtt); -+ -+ if (tp->t_srtt != 0) { -+ /* -+ * srtt is stored as fixed point with 3 bits after the -+ * binary point (i.e., scaled by 8). The following magic -+ * is equivalent to the smoothing algorithm in rfc793 with -+ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed -+ * point). Adjust rtt to origin 0. -+ */ -+ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); -+ if ((tp->t_srtt += delta) <= 0) -+ tp->t_srtt = 1; -+ /* -+ * We accumulate a smoothed rtt variance (actually, a -+ * smoothed mean difference), then set the retransmit -+ * timer to smoothed rtt + 4 times the smoothed variance. -+ * rttvar is stored as fixed point with 2 bits after the -+ * binary point (scaled by 4). The following is -+ * equivalent to rfc793 smoothing with an alpha of .75 -+ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces -+ * rfc793's wired-in beta. -+ */ -+ if (delta < 0) -+ delta = -delta; -+ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); -+ if ((tp->t_rttvar += delta) <= 0) -+ tp->t_rttvar = 1; -+ } else { -+ /* -+ * No rtt measurement yet - use the unsmoothed rtt. -+ * Set the variance to half the rtt (so our first -+ * retransmit happens at 3*rtt). -+ */ -+ tp->t_srtt = rtt << TCP_RTT_SHIFT; -+ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); -+ } -+ tp->t_rtt = 0; -+ tp->t_rxtshift = 0; -+ -+ /* -+ * the retransmit should happen at rtt + 4 * rttvar. -+ * Because of the way we do the smoothing, srtt and rttvar -+ * will each average +1/2 tick of bias. When we compute -+ * the retransmit timer, we want 1/2 tick of rounding and -+ * 1 extra tick because of +-1/2 tick uncertainty in the -+ * firing of the timer. The bias will give us exactly the -+ * 1.5 tick we need. But, because the bias is -+ * statistical, we have to test that we don't drop below -+ * the minimum feasible timer (which is 2 ticks). -+ */ -+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin, -+ TCPTV_REXMTMAX); /* XXX */ -+ -+ /* -+ * We received an ack for a packet that wasn't retransmitted; -+ * it is probably safe to discard any error indications we've -+ * received recently. This isn't quite right, but close enough -+ * for now (a route might have failed after we sent a segment, -+ * and the return path might not be symmetrical). -+ */ -+ tp->t_softerror = 0; -+} -+ -+/* -+ * Determine a reasonable value for maxseg size. -+ * If the route is known, check route for mtu. -+ * If none, use an mss that can be handled on the outgoing -+ * interface without forcing IP to fragment; if bigger than -+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES -+ * to utilize large mbufs. If no route is found, route has no mtu, -+ * or the destination isn't local, use a default, hopefully conservative -+ * size (usually 512 or the default IP max size, but no more than the mtu -+ * of the interface), as we can't discover anything about intervening -+ * gateways or networks. We also initialize the congestion/slow start -+ * window to be a single segment if the destination isn't local. -+ * While looking at the routing entry, we also initialize other path-dependent -+ * parameters from pre-set or cached values in the routing entry. -+ */ -+ -+int tcp_mss(struct tcpcb *tp, unsigned offer) -+{ -+ struct socket *so = tp->t_socket; -+ int mss; -+ -+ DEBUG_CALL("tcp_mss"); -+ DEBUG_ARG("tp = %p", tp); -+ DEBUG_ARG("offer = %d", offer); -+ -+ switch (so->so_ffamily) { -+ case AF_INET: -+ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - -+ sizeof(struct tcphdr) - sizeof(struct ip); -+ break; -+ case AF_INET6: -+ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - -+ sizeof(struct tcphdr) - sizeof(struct ip6); -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ -+ if (offer) -+ mss = MIN(mss, offer); -+ mss = MAX(mss, 32); -+ if (mss < tp->t_maxseg || offer != 0) -+ tp->t_maxseg = MIN(mss, TCP_MAXSEG_MAX); -+ -+ tp->snd_cwnd = mss; -+ -+ sbreserve(&so->so_snd, -+ TCP_SNDSPACE + -+ ((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0)); -+ sbreserve(&so->so_rcv, -+ TCP_RCVSPACE + -+ ((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0)); -+ -+ DEBUG_MISC(" returning mss = %d", mss); -+ -+ return mss; -+} -diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c -new file mode 100644 -index 0000000000..383fe31dcf ---- /dev/null -+++ b/slirp/src/tcp_output.c -@@ -0,0 +1,516 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 -+ * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+static const uint8_t tcp_outflags[TCP_NSTATES] = { -+ TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK, -+ TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK, -+ TH_FIN | TH_ACK, TH_ACK, TH_ACK, -+}; -+ -+ -+#undef MAX_TCPOPTLEN -+#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ -+ -+/* -+ * Tcp output routine: figure out what should be sent and send it. -+ */ -+int tcp_output(struct tcpcb *tp) -+{ -+ register struct socket *so = tp->t_socket; -+ register long len, win; -+ int off, flags, error; -+ register struct mbuf *m; -+ register struct tcpiphdr *ti, tcpiph_save; -+ struct ip *ip; -+ struct ip6 *ip6; -+ uint8_t opt[MAX_TCPOPTLEN]; -+ unsigned optlen, hdrlen; -+ int idle, sendalot; -+ -+ DEBUG_CALL("tcp_output"); -+ DEBUG_ARG("tp = %p", tp); -+ -+ /* -+ * Determine length of data that should be transmitted, -+ * and flags that will be used. -+ * If there is some data or critical controls (SYN, RST) -+ * to send, then transmit; otherwise, investigate further. -+ */ -+ idle = (tp->snd_max == tp->snd_una); -+ if (idle && tp->t_idle >= tp->t_rxtcur) -+ /* -+ * We have been idle for "a while" and no acks are -+ * expected to clock out any data we send -- -+ * slow start to get ack "clock" running again. -+ */ -+ tp->snd_cwnd = tp->t_maxseg; -+again: -+ sendalot = 0; -+ off = tp->snd_nxt - tp->snd_una; -+ win = MIN(tp->snd_wnd, tp->snd_cwnd); -+ -+ flags = tcp_outflags[tp->t_state]; -+ -+ DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); -+ -+ /* -+ * If in persist timeout with window of 0, send 1 byte. -+ * Otherwise, if window is small but nonzero -+ * and timer expired, we will send what we can -+ * and go to transmit state. -+ */ -+ if (tp->t_force) { -+ if (win == 0) { -+ /* -+ * If we still have some data to send, then -+ * clear the FIN bit. Usually this would -+ * happen below when it realizes that we -+ * aren't sending all the data. However, -+ * if we have exactly 1 byte of unset data, -+ * then it won't clear the FIN bit below, -+ * and if we are in persist state, we wind -+ * up sending the packet without recording -+ * that we sent the FIN bit. -+ * -+ * We can't just blindly clear the FIN bit, -+ * because if we don't have any more data -+ * to send then the probe will be the FIN -+ * itself. -+ */ -+ if (off < so->so_snd.sb_cc) -+ flags &= ~TH_FIN; -+ win = 1; -+ } else { -+ tp->t_timer[TCPT_PERSIST] = 0; -+ tp->t_rxtshift = 0; -+ } -+ } -+ -+ len = MIN(so->so_snd.sb_cc, win) - off; -+ -+ if (len < 0) { -+ /* -+ * If FIN has been sent but not acked, -+ * but we haven't been called to retransmit, -+ * len will be -1. Otherwise, window shrank -+ * after we sent into it. If window shrank to 0, -+ * cancel pending retransmit and pull snd_nxt -+ * back to (closed) window. We will enter persist -+ * state below. If the window didn't close completely, -+ * just wait for an ACK. -+ */ -+ len = 0; -+ if (win == 0) { -+ tp->t_timer[TCPT_REXMT] = 0; -+ tp->snd_nxt = tp->snd_una; -+ } -+ } -+ -+ if (len > tp->t_maxseg) { -+ len = tp->t_maxseg; -+ sendalot = 1; -+ } -+ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) -+ flags &= ~TH_FIN; -+ -+ win = sbspace(&so->so_rcv); -+ -+ /* -+ * Sender silly window avoidance. If connection is idle -+ * and can send all data, a maximum segment, -+ * at least a maximum default-size segment do it, -+ * or are forced, do it; otherwise don't bother. -+ * If peer's buffer is tiny, then send -+ * when window is at least half open. -+ * If retransmitting (possibly after persist timer forced us -+ * to send into a small window), then must resend. -+ */ -+ if (len) { -+ if (len == tp->t_maxseg) -+ goto send; -+ if ((1 || idle || tp->t_flags & TF_NODELAY) && -+ len + off >= so->so_snd.sb_cc) -+ goto send; -+ if (tp->t_force) -+ goto send; -+ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) -+ goto send; -+ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) -+ goto send; -+ } -+ -+ /* -+ * Compare available window to amount of window -+ * known to peer (as advertised window less -+ * next expected input). If the difference is at least two -+ * max size segments, or at least 50% of the maximum possible -+ * window, then want to send a window update to peer. -+ */ -+ if (win > 0) { -+ /* -+ * "adv" is the amount we can increase the window, -+ * taking into account that we are limited by -+ * TCP_MAXWIN << tp->rcv_scale. -+ */ -+ long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - -+ (tp->rcv_adv - tp->rcv_nxt); -+ -+ if (adv >= (long)(2 * tp->t_maxseg)) -+ goto send; -+ if (2 * adv >= (long)so->so_rcv.sb_datalen) -+ goto send; -+ } -+ -+ /* -+ * Send if we owe peer an ACK. -+ */ -+ if (tp->t_flags & TF_ACKNOW) -+ goto send; -+ if (flags & (TH_SYN | TH_RST)) -+ goto send; -+ if (SEQ_GT(tp->snd_up, tp->snd_una)) -+ goto send; -+ /* -+ * If our state indicates that FIN should be sent -+ * and we have not yet done so, or we're retransmitting the FIN, -+ * then we need to send. -+ */ -+ if (flags & TH_FIN && -+ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) -+ goto send; -+ -+ /* -+ * TCP window updates are not reliable, rather a polling protocol -+ * using ``persist'' packets is used to insure receipt of window -+ * updates. The three ``states'' for the output side are: -+ * idle not doing retransmits or persists -+ * persisting to move a small or zero window -+ * (re)transmitting and thereby not persisting -+ * -+ * tp->t_timer[TCPT_PERSIST] -+ * is set when we are in persist state. -+ * tp->t_force -+ * is set when we are called to send a persist packet. -+ * tp->t_timer[TCPT_REXMT] -+ * is set when we are retransmitting -+ * The output side is idle when both timers are zero. -+ * -+ * If send window is too small, there is data to transmit, and no -+ * retransmit or persist is pending, then go to persist state. -+ * If nothing happens soon, send when timer expires: -+ * if window is nonzero, transmit what we can, -+ * otherwise force out a byte. -+ */ -+ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && -+ tp->t_timer[TCPT_PERSIST] == 0) { -+ tp->t_rxtshift = 0; -+ tcp_setpersist(tp); -+ } -+ -+ /* -+ * No reason to send a segment, just return. -+ */ -+ return (0); -+ -+send: -+ /* -+ * Before ESTABLISHED, force sending of initial options -+ * unless TCP set not to do any options. -+ * NOTE: we assume that the IP/TCP header plus TCP options -+ * always fit in a single mbuf, leaving room for a maximum -+ * link header, i.e. -+ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN -+ */ -+ optlen = 0; -+ hdrlen = sizeof(struct tcpiphdr); -+ if (flags & TH_SYN) { -+ tp->snd_nxt = tp->iss; -+ if ((tp->t_flags & TF_NOOPT) == 0) { -+ uint16_t mss; -+ -+ opt[0] = TCPOPT_MAXSEG; -+ opt[1] = 4; -+ mss = htons((uint16_t)tcp_mss(tp, 0)); -+ memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); -+ optlen = 4; -+ } -+ } -+ -+ hdrlen += optlen; -+ -+ /* -+ * Adjust data length if insertion of options will -+ * bump the packet length beyond the t_maxseg length. -+ */ -+ if (len > tp->t_maxseg - optlen) { -+ len = tp->t_maxseg - optlen; -+ sendalot = 1; -+ } -+ -+ /* -+ * Grab a header mbuf, attaching a copy of data to -+ * be transmitted, and initialize the header from -+ * the template for sends on this connection. -+ */ -+ if (len) { -+ m = m_get(so->slirp); -+ if (m == NULL) { -+ error = 1; -+ goto out; -+ } -+ m->m_data += IF_MAXLINKHDR; -+ m->m_len = hdrlen; -+ -+ sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen); -+ m->m_len += len; -+ -+ /* -+ * If we're sending everything we've got, set PUSH. -+ * (This will keep happy those implementations which only -+ * give data to the user when a buffer fills or -+ * a PUSH comes in.) -+ */ -+ if (off + len == so->so_snd.sb_cc) -+ flags |= TH_PUSH; -+ } else { -+ m = m_get(so->slirp); -+ if (m == NULL) { -+ error = 1; -+ goto out; -+ } -+ m->m_data += IF_MAXLINKHDR; -+ m->m_len = hdrlen; -+ } -+ -+ ti = mtod(m, struct tcpiphdr *); -+ -+ memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr)); -+ -+ /* -+ * Fill in fields, remembering maximum advertised -+ * window for use in delaying messages about window sizes. -+ * If resending a FIN, be sure not to use a new sequence number. -+ */ -+ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && -+ tp->snd_nxt == tp->snd_max) -+ tp->snd_nxt--; -+ /* -+ * If we are doing retransmissions, then snd_nxt will -+ * not reflect the first unsent octet. For ACK only -+ * packets, we do not want the sequence number of the -+ * retransmitted packet, we want the sequence number -+ * of the next unsent octet. So, if there is no data -+ * (and no SYN or FIN), use snd_max instead of snd_nxt -+ * when filling in ti_seq. But if we are in persist -+ * state, snd_max might reflect one byte beyond the -+ * right edge of the window, so use snd_nxt in that -+ * case, since we know we aren't doing a retransmission. -+ * (retransmit and persist are mutually exclusive...) -+ */ -+ if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST]) -+ ti->ti_seq = htonl(tp->snd_nxt); -+ else -+ ti->ti_seq = htonl(tp->snd_max); -+ ti->ti_ack = htonl(tp->rcv_nxt); -+ if (optlen) { -+ memcpy((char *)(ti + 1), (char *)opt, optlen); -+ ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2; -+ } -+ ti->ti_flags = flags; -+ /* -+ * Calculate receive window. Don't shrink window, -+ * but avoid silly window syndrome. -+ */ -+ if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) -+ win = 0; -+ if (win > (long)TCP_MAXWIN << tp->rcv_scale) -+ win = (long)TCP_MAXWIN << tp->rcv_scale; -+ if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) -+ win = (long)(tp->rcv_adv - tp->rcv_nxt); -+ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); -+ -+ if (SEQ_GT(tp->snd_up, tp->snd_una)) { -+ ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); -+ ti->ti_flags |= TH_URG; -+ } else -+ /* -+ * If no urgent pointer to send, then we pull -+ * the urgent pointer to the left edge of the send window -+ * so that it doesn't drift into the send window on sequence -+ * number wraparound. -+ */ -+ tp->snd_up = tp->snd_una; /* drag it along */ -+ -+ /* -+ * Put TCP length in extended header, and then -+ * checksum extended header and data. -+ */ -+ if (len + optlen) -+ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len)); -+ ti->ti_sum = cksum(m, (int)(hdrlen + len)); -+ -+ /* -+ * In transmit state, time the transmission and arrange for -+ * the retransmit. In persist state, just set snd_max. -+ */ -+ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { -+ tcp_seq startseq = tp->snd_nxt; -+ -+ /* -+ * Advance snd_nxt over sequence space of this segment. -+ */ -+ if (flags & (TH_SYN | TH_FIN)) { -+ if (flags & TH_SYN) -+ tp->snd_nxt++; -+ if (flags & TH_FIN) { -+ tp->snd_nxt++; -+ tp->t_flags |= TF_SENTFIN; -+ } -+ } -+ tp->snd_nxt += len; -+ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { -+ tp->snd_max = tp->snd_nxt; -+ /* -+ * Time this transmission if not a retransmission and -+ * not currently timing anything. -+ */ -+ if (tp->t_rtt == 0) { -+ tp->t_rtt = 1; -+ tp->t_rtseq = startseq; -+ } -+ } -+ -+ /* -+ * Set retransmit timer if not currently set, -+ * and not doing an ack or a keep-alive probe. -+ * Initial value for retransmit timer is smoothed -+ * round-trip time + 2 * round-trip time variance. -+ * Initialize shift counter which is used for backoff -+ * of retransmit time. -+ */ -+ if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { -+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; -+ if (tp->t_timer[TCPT_PERSIST]) { -+ tp->t_timer[TCPT_PERSIST] = 0; -+ tp->t_rxtshift = 0; -+ } -+ } -+ } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) -+ tp->snd_max = tp->snd_nxt + len; -+ -+ /* -+ * Fill in IP length and desired time to live and -+ * send to IP level. There should be a better way -+ * to handle ttl and tos; we could keep them in -+ * the template, but need a way to checksum without them. -+ */ -+ m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ -+ tcpiph_save = *mtod(m, struct tcpiphdr *); -+ -+ switch (so->so_ffamily) { -+ case AF_INET: -+ m->m_data += -+ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); -+ m->m_len -= -+ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); -+ ip = mtod(m, struct ip *); -+ -+ ip->ip_len = m->m_len; -+ ip->ip_dst = tcpiph_save.ti_dst; -+ ip->ip_src = tcpiph_save.ti_src; -+ ip->ip_p = tcpiph_save.ti_pr; -+ -+ ip->ip_ttl = IPDEFTTL; -+ ip->ip_tos = so->so_iptos; -+ error = ip_output(so, m); -+ break; -+ -+ case AF_INET6: -+ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - -+ sizeof(struct ip6); -+ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - -+ sizeof(struct ip6); -+ ip6 = mtod(m, struct ip6 *); -+ -+ ip6->ip_pl = tcpiph_save.ti_len; -+ ip6->ip_dst = tcpiph_save.ti_dst6; -+ ip6->ip_src = tcpiph_save.ti_src6; -+ ip6->ip_nh = tcpiph_save.ti_nh6; -+ -+ error = ip6_output(so, m, 0); -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ } -+ -+ if (error) { -+ out: -+ return (error); -+ } -+ -+ /* -+ * Data sent (as far as we can tell). -+ * If this advertises a larger window than any other segment, -+ * then remember the size of the advertised window. -+ * Any pending ACK has now been sent. -+ */ -+ if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv)) -+ tp->rcv_adv = tp->rcv_nxt + win; -+ tp->last_ack_sent = tp->rcv_nxt; -+ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); -+ if (sendalot) -+ goto again; -+ -+ return (0); -+} -+ -+void tcp_setpersist(struct tcpcb *tp) -+{ -+ int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; -+ -+ /* -+ * Start/restart persistence timer. -+ */ -+ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift], -+ TCPTV_PERSMIN, TCPTV_PERSMAX); -+ if (tp->t_rxtshift < TCP_MAXRXTSHIFT) -+ tp->t_rxtshift++; -+} -diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -new file mode 100644 -index 0000000000..a1016d90df ---- /dev/null -+++ b/slirp/src/tcp_subr.c -@@ -0,0 +1,980 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 -+ * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP -+ * Copyright (c) 1995 Danny Gasparovski. -+ */ -+ -+#include "slirp.h" -+ -+/* patchable/settable parameters for tcp */ -+/* Don't do rfc1323 performance enhancements */ -+#define TCP_DO_RFC1323 0 -+ -+/* -+ * Tcp initialization -+ */ -+void tcp_init(Slirp *slirp) -+{ -+ slirp->tcp_iss = 1; /* wrong */ -+ slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; -+ slirp->tcp_last_so = &slirp->tcb; -+} -+ -+void tcp_cleanup(Slirp *slirp) -+{ -+ while (slirp->tcb.so_next != &slirp->tcb) { -+ tcp_close(sototcpcb(slirp->tcb.so_next)); -+ } -+} -+ -+/* -+ * Create template to be used to send tcp packets on a connection. -+ * Call after host entry created, fills -+ * in a skeletal tcp/ip header, minimizing the amount of work -+ * necessary when the connection is used. -+ */ -+void tcp_template(struct tcpcb *tp) -+{ -+ struct socket *so = tp->t_socket; -+ register struct tcpiphdr *n = &tp->t_template; -+ -+ n->ti_mbuf = NULL; -+ memset(&n->ti, 0, sizeof(n->ti)); -+ n->ti_x0 = 0; -+ switch (so->so_ffamily) { -+ case AF_INET: -+ n->ti_pr = IPPROTO_TCP; -+ n->ti_len = htons(sizeof(struct tcphdr)); -+ n->ti_src = so->so_faddr; -+ n->ti_dst = so->so_laddr; -+ n->ti_sport = so->so_fport; -+ n->ti_dport = so->so_lport; -+ break; -+ -+ case AF_INET6: -+ n->ti_nh6 = IPPROTO_TCP; -+ n->ti_len = htons(sizeof(struct tcphdr)); -+ n->ti_src6 = so->so_faddr6; -+ n->ti_dst6 = so->so_laddr6; -+ n->ti_sport = so->so_fport6; -+ n->ti_dport = so->so_lport6; -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ } -+ -+ n->ti_seq = 0; -+ n->ti_ack = 0; -+ n->ti_x2 = 0; -+ n->ti_off = 5; -+ n->ti_flags = 0; -+ n->ti_win = 0; -+ n->ti_sum = 0; -+ n->ti_urp = 0; -+} -+ -+/* -+ * Send a single message to the TCP at address specified by -+ * the given TCP/IP header. If m == 0, then we make a copy -+ * of the tcpiphdr at ti and send directly to the addressed host. -+ * This is used to force keep alive messages out using the TCP -+ * template for a connection tp->t_template. If flags are given -+ * then we send a message back to the TCP which originated the -+ * segment ti, and discard the mbuf containing it and any other -+ * attached mbufs. -+ * -+ * In any case the ack and sequence number of the transmitted -+ * segment are as specified by the parameters. -+ */ -+void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, -+ tcp_seq ack, tcp_seq seq, int flags, unsigned short af) -+{ -+ register int tlen; -+ int win = 0; -+ -+ DEBUG_CALL("tcp_respond"); -+ DEBUG_ARG("tp = %p", tp); -+ DEBUG_ARG("ti = %p", ti); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("ack = %u", ack); -+ DEBUG_ARG("seq = %u", seq); -+ DEBUG_ARG("flags = %x", flags); -+ -+ if (tp) -+ win = sbspace(&tp->t_socket->so_rcv); -+ if (m == NULL) { -+ if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) -+ return; -+ tlen = 0; -+ m->m_data += IF_MAXLINKHDR; -+ *mtod(m, struct tcpiphdr *) = *ti; -+ ti = mtod(m, struct tcpiphdr *); -+ switch (af) { -+ case AF_INET: -+ ti->ti.ti_i4.ih_x1 = 0; -+ break; -+ case AF_INET6: -+ ti->ti.ti_i6.ih_x1 = 0; -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+ flags = TH_ACK; -+ } else { -+ /* -+ * ti points into m so the next line is just making -+ * the mbuf point to ti -+ */ -+ m->m_data = (char *)ti; -+ -+ m->m_len = sizeof(struct tcpiphdr); -+ tlen = 0; -+#define xchg(a, b, type) \ -+ { \ -+ type t; \ -+ t = a; \ -+ a = b; \ -+ b = t; \ -+ } -+ switch (af) { -+ case AF_INET: -+ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); -+ xchg(ti->ti_dport, ti->ti_sport, uint16_t); -+ break; -+ case AF_INET6: -+ xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); -+ xchg(ti->ti_dport, ti->ti_sport, uint16_t); -+ break; -+ default: -+ g_assert_not_reached(); -+ } -+#undef xchg -+ } -+ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + tlen)); -+ tlen += sizeof(struct tcpiphdr); -+ m->m_len = tlen; -+ -+ ti->ti_mbuf = NULL; -+ ti->ti_x0 = 0; -+ ti->ti_seq = htonl(seq); -+ ti->ti_ack = htonl(ack); -+ ti->ti_x2 = 0; -+ ti->ti_off = sizeof(struct tcphdr) >> 2; -+ ti->ti_flags = flags; -+ if (tp) -+ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); -+ else -+ ti->ti_win = htons((uint16_t)win); -+ ti->ti_urp = 0; -+ ti->ti_sum = 0; -+ ti->ti_sum = cksum(m, tlen); -+ -+ struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); -+ struct ip *ip; -+ struct ip6 *ip6; -+ -+ switch (af) { -+ case AF_INET: -+ m->m_data += -+ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); -+ m->m_len -= -+ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); -+ ip = mtod(m, struct ip *); -+ ip->ip_len = m->m_len; -+ ip->ip_dst = tcpiph_save.ti_dst; -+ ip->ip_src = tcpiph_save.ti_src; -+ ip->ip_p = tcpiph_save.ti_pr; -+ -+ if (flags & TH_RST) { -+ ip->ip_ttl = MAXTTL; -+ } else { -+ ip->ip_ttl = IPDEFTTL; -+ } -+ -+ ip_output(NULL, m); -+ break; -+ -+ case AF_INET6: -+ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - -+ sizeof(struct ip6); -+ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - -+ sizeof(struct ip6); -+ ip6 = mtod(m, struct ip6 *); -+ ip6->ip_pl = tcpiph_save.ti_len; -+ ip6->ip_dst = tcpiph_save.ti_dst6; -+ ip6->ip_src = tcpiph_save.ti_src6; -+ ip6->ip_nh = tcpiph_save.ti_nh6; -+ -+ ip6_output(NULL, m, 0); -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ } -+} -+ -+/* -+ * Create a new TCP control block, making an -+ * empty reassembly queue and hooking it to the argument -+ * protocol control block. -+ */ -+struct tcpcb *tcp_newtcpcb(struct socket *so) -+{ -+ register struct tcpcb *tp; -+ -+ tp = g_new0(struct tcpcb, 1); -+ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; -+ /* -+ * 40: length of IPv4 header (20) + TCP header (20) -+ * 60: length of IPv6 header (40) + TCP header (20) -+ */ -+ tp->t_maxseg = -+ MIN(so->slirp->if_mtu - ((so->so_ffamily == AF_INET) ? 40 : 60), -+ TCP_MAXSEG_MAX); -+ -+ tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE | TF_REQ_TSTMP) : 0; -+ tp->t_socket = so; -+ -+ /* -+ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no -+ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives -+ * reasonable initial retransmit time. -+ */ -+ tp->t_srtt = TCPTV_SRTTBASE; -+ tp->t_rttvar = TCPTV_SRTTDFLT << 2; -+ tp->t_rttmin = TCPTV_MIN; -+ -+ TCPT_RANGESET(tp->t_rxtcur, -+ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, -+ TCPTV_MIN, TCPTV_REXMTMAX); -+ -+ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; -+ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; -+ tp->t_state = TCPS_CLOSED; -+ -+ so->so_tcpcb = tp; -+ -+ return (tp); -+} -+ -+/* -+ * Drop a TCP connection, reporting -+ * the specified error. If connection is synchronized, -+ * then send a RST to peer. -+ */ -+struct tcpcb *tcp_drop(struct tcpcb *tp, int err) -+{ -+ DEBUG_CALL("tcp_drop"); -+ DEBUG_ARG("tp = %p", tp); -+ DEBUG_ARG("errno = %d", errno); -+ -+ if (TCPS_HAVERCVDSYN(tp->t_state)) { -+ tp->t_state = TCPS_CLOSED; -+ (void)tcp_output(tp); -+ } -+ return (tcp_close(tp)); -+} -+ -+/* -+ * Close a TCP control block: -+ * discard all space held by the tcp -+ * discard internet protocol block -+ * wake up any sleepers -+ */ -+struct tcpcb *tcp_close(struct tcpcb *tp) -+{ -+ register struct tcpiphdr *t; -+ struct socket *so = tp->t_socket; -+ Slirp *slirp = so->slirp; -+ register struct mbuf *m; -+ -+ DEBUG_CALL("tcp_close"); -+ DEBUG_ARG("tp = %p", tp); -+ -+ /* free the reassembly queue, if any */ -+ t = tcpfrag_list_first(tp); -+ while (!tcpfrag_list_end(t, tp)) { -+ t = tcpiphdr_next(t); -+ m = tcpiphdr_prev(t)->ti_mbuf; -+ remque(tcpiphdr2qlink(tcpiphdr_prev(t))); -+ m_free(m); -+ } -+ g_free(tp); -+ so->so_tcpcb = NULL; -+ /* clobber input socket cache if we're closing the cached connection */ -+ if (so == slirp->tcp_last_so) -+ slirp->tcp_last_so = &slirp->tcb; -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ sbfree(&so->so_rcv); -+ sbfree(&so->so_snd); -+ sofree(so); -+ return ((struct tcpcb *)0); -+} -+ -+/* -+ * TCP protocol interface to socket abstraction. -+ */ -+ -+/* -+ * User issued close, and wish to trail through shutdown states: -+ * if never received SYN, just forget it. If got a SYN from peer, -+ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. -+ * If already got a FIN from peer, then almost done; go to LAST_ACK -+ * state. In all other cases, have already sent FIN to peer (e.g. -+ * after PRU_SHUTDOWN), and just have to play tedious game waiting -+ * for peer to send FIN or not respond to keep-alives, etc. -+ * We can let the user exit from the close as soon as the FIN is acked. -+ */ -+void tcp_sockclosed(struct tcpcb *tp) -+{ -+ DEBUG_CALL("tcp_sockclosed"); -+ DEBUG_ARG("tp = %p", tp); -+ -+ if (!tp) { -+ return; -+ } -+ -+ switch (tp->t_state) { -+ case TCPS_CLOSED: -+ case TCPS_LISTEN: -+ case TCPS_SYN_SENT: -+ tp->t_state = TCPS_CLOSED; -+ tcp_close(tp); -+ return; -+ -+ case TCPS_SYN_RECEIVED: -+ case TCPS_ESTABLISHED: -+ tp->t_state = TCPS_FIN_WAIT_1; -+ break; -+ -+ case TCPS_CLOSE_WAIT: -+ tp->t_state = TCPS_LAST_ACK; -+ break; -+ } -+ tcp_output(tp); -+} -+ -+/* -+ * Connect to a host on the Internet -+ * Called by tcp_input -+ * Only do a connect, the tcp fields will be set in tcp_input -+ * return 0 if there's a result of the connect, -+ * else return -1 means we're still connecting -+ * The return value is almost always -1 since the socket is -+ * nonblocking. Connect returns after the SYN is sent, and does -+ * not wait for ACK+SYN. -+ */ -+int tcp_fconnect(struct socket *so, unsigned short af) -+{ -+ int ret = 0; -+ -+ DEBUG_CALL("tcp_fconnect"); -+ DEBUG_ARG("so = %p", so); -+ -+ ret = so->s = slirp_socket(af, SOCK_STREAM, 0); -+ if (ret >= 0) { -+ ret = slirp_bind_outbound(so, af); -+ if (ret < 0) { -+ // bind failed - close socket -+ closesocket(so->s); -+ so->s = -1; -+ return (ret); -+ } -+ } -+ -+ if (ret >= 0) { -+ int opt, s = so->s; -+ struct sockaddr_storage addr; -+ -+ slirp_set_nonblock(s); -+ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); -+ slirp_socket_set_fast_reuse(s); -+ opt = 1; -+ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); -+ opt = 1; -+ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); -+ -+ addr = so->fhost.ss; -+ DEBUG_CALL(" connect()ing"); -+ if (sotranslate_out(so, &addr) < 0) { -+ return -1; -+ } -+ -+ /* We don't care what port we get */ -+ ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); -+ -+ /* -+ * If it's not in progress, it failed, so we just return 0, -+ * without clearing SS_NOFDREF -+ */ -+ soisfconnecting(so); -+ } -+ -+ return (ret); -+} -+ -+/* -+ * Accept the socket and connect to the local-host -+ * -+ * We have a problem. The correct thing to do would be -+ * to first connect to the local-host, and only if the -+ * connection is accepted, then do an accept() here. -+ * But, a) we need to know who's trying to connect -+ * to the socket to be able to SYN the local-host, and -+ * b) we are already connected to the foreign host by -+ * the time it gets to accept(), so... We simply accept -+ * here and SYN the local-host. -+ */ -+void tcp_connect(struct socket *inso) -+{ -+ Slirp *slirp = inso->slirp; -+ struct socket *so; -+ struct sockaddr_storage addr; -+ socklen_t addrlen = sizeof(struct sockaddr_storage); -+ struct tcpcb *tp; -+ int s, opt; -+ -+ DEBUG_CALL("tcp_connect"); -+ DEBUG_ARG("inso = %p", inso); -+ -+ /* -+ * If it's an SS_ACCEPTONCE socket, no need to socreate() -+ * another socket, just use the accept() socket. -+ */ -+ if (inso->so_state & SS_FACCEPTONCE) { -+ /* FACCEPTONCE already have a tcpcb */ -+ so = inso; -+ } else { -+ so = socreate(slirp); -+ tcp_attach(so); -+ so->lhost = inso->lhost; -+ so->so_ffamily = inso->so_ffamily; -+ } -+ -+ tcp_mss(sototcpcb(so), 0); -+ -+ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); -+ if (s < 0) { -+ tcp_close(sototcpcb(so)); /* This will sofree() as well */ -+ return; -+ } -+ slirp_set_nonblock(s); -+ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); -+ slirp_socket_set_fast_reuse(s); -+ opt = 1; -+ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); -+ slirp_socket_set_nodelay(s); -+ -+ so->fhost.ss = addr; -+ sotranslate_accept(so); -+ -+ /* Close the accept() socket, set right state */ -+ if (inso->so_state & SS_FACCEPTONCE) { -+ /* If we only accept once, close the accept() socket */ -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ -+ /* Don't select it yet, even though we have an FD */ -+ /* if it's not FACCEPTONCE, it's already NOFDREF */ -+ so->so_state = SS_NOFDREF; -+ } -+ so->s = s; -+ so->so_state |= SS_INCOMING; -+ -+ so->so_iptos = tcp_tos(so); -+ tp = sototcpcb(so); -+ -+ tcp_template(tp); -+ -+ tp->t_state = TCPS_SYN_SENT; -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; -+ tp->iss = slirp->tcp_iss; -+ slirp->tcp_iss += TCP_ISSINCR / 2; -+ tcp_sendseqinit(tp); -+ tcp_output(tp); -+} -+ -+/* -+ * Attach a TCPCB to a socket. -+ */ -+void tcp_attach(struct socket *so) -+{ -+ so->so_tcpcb = tcp_newtcpcb(so); -+ insque(so, &so->slirp->tcb); -+} -+ -+/* -+ * Set the socket's type of service field -+ */ -+static const struct tos_t tcptos[] = { -+ { 0, 20, IPTOS_THROUGHPUT, 0 }, /* ftp data */ -+ { 21, 21, IPTOS_LOWDELAY, EMU_FTP }, /* ftp control */ -+ { 0, 23, IPTOS_LOWDELAY, 0 }, /* telnet */ -+ { 0, 80, IPTOS_THROUGHPUT, 0 }, /* WWW */ -+ { 0, 513, IPTOS_LOWDELAY, EMU_RLOGIN | EMU_NOCONNECT }, /* rlogin */ -+ { 0, 544, IPTOS_LOWDELAY, EMU_KSH }, /* kshell */ -+ { 0, 543, IPTOS_LOWDELAY, 0 }, /* klogin */ -+ { 0, 6667, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC */ -+ { 0, 6668, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC undernet */ -+ { 0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ -+ { 0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ -+ { 0, 0, 0, 0 } -+}; -+ -+/* -+ * Return TOS according to the above table -+ */ -+uint8_t tcp_tos(struct socket *so) -+{ -+ int i = 0; -+ -+ while (tcptos[i].tos) { -+ if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || -+ (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { -+ if (so->slirp->enable_emu) -+ so->so_emu = tcptos[i].emu; -+ return tcptos[i].tos; -+ } -+ i++; -+ } -+ return 0; -+} -+ -+/* -+ * Emulate programs that try and connect to us -+ * This includes ftp (the data connection is -+ * initiated by the server) and IRC (DCC CHAT and -+ * DCC SEND) for now -+ * -+ * NOTE: It's possible to crash SLiRP by sending it -+ * unstandard strings to emulate... if this is a problem, -+ * more checks are needed here -+ * -+ * XXX Assumes the whole command came in one packet -+ * XXX If there is more than one command in the packet, the others may -+ * be truncated. -+ * XXX If the command is too long, it may be truncated. -+ * -+ * XXX Some ftp clients will have their TOS set to -+ * LOWDELAY and so Nagel will kick in. Because of this, -+ * we'll get the first letter, followed by the rest, so -+ * we simply scan for ORT instead of PORT... -+ * DCC doesn't have this problem because there's other stuff -+ * in the packet before the DCC command. -+ * -+ * Return 1 if the mbuf m is still valid and should be -+ * sbappend()ed -+ * -+ * NOTE: if you return 0 you MUST m_free() the mbuf! -+ */ -+int tcp_emu(struct socket *so, struct mbuf *m) -+{ -+ Slirp *slirp = so->slirp; -+ unsigned n1, n2, n3, n4, n5, n6; -+ char buff[257]; -+ uint32_t laddr; -+ unsigned lport; -+ char *bptr; -+ -+ DEBUG_CALL("tcp_emu"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ -+ switch (so->so_emu) { -+ int x, i; -+ -+ /* TODO: IPv6 */ -+ case EMU_IDENT: -+ /* -+ * Identification protocol as per rfc-1413 -+ */ -+ -+ { -+ struct socket *tmpso; -+ struct sockaddr_in addr; -+ socklen_t addrlen = sizeof(struct sockaddr_in); -+ char *eol = g_strstr_len(m->m_data, m->m_len, "\r\n"); -+ -+ if (!eol) { -+ return 1; -+ } -+ -+ *eol = '\0'; -+ if (sscanf(m->m_data, "%u%*[ ,]%u", &n1, &n2) == 2) { -+ HTONS(n1); -+ HTONS(n2); -+ /* n2 is the one on our host */ -+ for (tmpso = slirp->tcb.so_next; tmpso != &slirp->tcb; -+ tmpso = tmpso->so_next) { -+ if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && -+ tmpso->so_lport == n2 && -+ tmpso->so_faddr.s_addr == so->so_faddr.s_addr && -+ tmpso->so_fport == n1) { -+ if (getsockname(tmpso->s, (struct sockaddr *)&addr, -+ &addrlen) == 0) -+ n2 = addr.sin_port; -+ break; -+ } -+ } -+ NTOHS(n1); -+ NTOHS(n2); -+ m_inc(m, g_snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); -+ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); -+ } else { -+ *eol = '\r'; -+ } -+ -+ return 1; -+ } -+ -+ case EMU_FTP: /* ftp */ -+ m_inc(m, m->m_len + 1); -+ *(m->m_data + m->m_len) = 0; /* NUL terminate for strstr */ -+ if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { -+ /* -+ * Need to emulate the PORT command -+ */ -+ x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", &n1, &n2, -+ &n3, &n4, &n5, &n6, buff); -+ if (x < 6) -+ return 1; -+ -+ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); -+ lport = htons((n5 << 8) | (n6)); -+ -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, -+ SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ n6 = ntohs(so->so_fport); -+ -+ n5 = (n6 >> 8) & 0xff; -+ n6 &= 0xff; -+ -+ laddr = ntohl(so->so_faddr.s_addr); -+ -+ n1 = ((laddr >> 24) & 0xff); -+ n2 = ((laddr >> 16) & 0xff); -+ n3 = ((laddr >> 8) & 0xff); -+ n4 = (laddr & 0xff); -+ -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "ORT %d,%d,%d,%d,%d,%d\r\n%s", -+ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); -+ return 1; -+ } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { -+ /* -+ * Need to emulate the PASV response -+ */ -+ x = sscanf( -+ bptr, -+ "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", -+ &n1, &n2, &n3, &n4, &n5, &n6, buff); -+ if (x < 6) -+ return 1; -+ -+ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); -+ lport = htons((n5 << 8) | (n6)); -+ -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, -+ SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ n6 = ntohs(so->so_fport); -+ -+ n5 = (n6 >> 8) & 0xff; -+ n6 &= 0xff; -+ -+ laddr = ntohl(so->so_faddr.s_addr); -+ -+ n1 = ((laddr >> 24) & 0xff); -+ n2 = ((laddr >> 16) & 0xff); -+ n3 = ((laddr >> 8) & 0xff); -+ n4 = (laddr & 0xff); -+ -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", -+ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); -+ return 1; -+ } -+ -+ return 1; -+ -+ case EMU_KSH: -+ /* -+ * The kshell (Kerberos rsh) and shell services both pass -+ * a local port port number to carry signals to the server -+ * and stderr to the client. It is passed at the beginning -+ * of the connection as a NUL-terminated decimal ASCII string. -+ */ -+ so->so_emu = 0; -+ for (lport = 0, i = 0; i < m->m_len - 1; ++i) { -+ if (m->m_data[i] < '0' || m->m_data[i] > '9') -+ return 1; /* invalid number */ -+ lport *= 10; -+ lport += m->m_data[i] - '0'; -+ } -+ if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && -+ (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, -+ htons(lport), SS_FACCEPTONCE)) != NULL) -+ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), -+ "%d", ntohs(so->so_fport)); -+ return 1; -+ -+ case EMU_IRC: -+ /* -+ * Need to emulate DCC CHAT, DCC SEND and DCC MOVE -+ */ -+ m_inc(m, m->m_len + 1); -+ *(m->m_data + m->m_len) = 0; /* NULL terminate the string for strstr */ -+ if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) -+ return 1; -+ -+ /* The %256s is for the broken mIRC */ -+ if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -+ htons(lport), SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC CHAT chat %lu %u%c\n", -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), 1); -+ } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, -+ &n1) == 4) { -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -+ htons(lport), SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC SEND %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); -+ } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, -+ &n1) == 4) { -+ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), -+ htons(lport), SS_FACCEPTONCE)) == NULL) { -+ return 1; -+ } -+ m->m_len = bptr - m->m_data; /* Adjust length */ -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), -+ "DCC MOVE %s %lu %u %u%c\n", buff, -+ (unsigned long)ntohl(so->so_faddr.s_addr), -+ ntohs(so->so_fport), n1, 1); -+ } -+ return 1; -+ -+ case EMU_REALAUDIO: -+ /* -+ * RealAudio emulation - JP. We must try to parse the incoming -+ * data and try to find the two characters that contain the -+ * port number. Then we redirect an udp port and replace the -+ * number with the real port we got. -+ * -+ * The 1.0 beta versions of the player are not supported -+ * any more. -+ * -+ * A typical packet for player version 1.0 (release version): -+ * -+ * 0000:50 4E 41 00 05 -+ * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P -+ * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH -+ * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v -+ * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB -+ * -+ * Now the port number 0x1BD7 is found at offset 0x04 of the -+ * Now the port number 0x1BD7 is found at offset 0x04 of the -+ * second packet. This time we received five bytes first and -+ * then the rest. You never know how many bytes you get. -+ * -+ * A typical packet for player version 2.0 (beta): -+ * -+ * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. -+ * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 -+ * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ -+ * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas -+ * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B -+ * -+ * Port number 0x1BC1 is found at offset 0x0d. -+ * -+ * This is just a horrible switch statement. Variable ra tells -+ * us where we're going. -+ */ -+ -+ bptr = m->m_data; -+ while (bptr < m->m_data + m->m_len) { -+ uint16_t p; -+ static int ra = 0; -+ char ra_tbl[4]; -+ -+ ra_tbl[0] = 0x50; -+ ra_tbl[1] = 0x4e; -+ ra_tbl[2] = 0x41; -+ ra_tbl[3] = 0; -+ -+ switch (ra) { -+ case 0: -+ case 2: -+ case 3: -+ if (*bptr++ != ra_tbl[ra]) { -+ ra = 0; -+ continue; -+ } -+ break; -+ -+ case 1: -+ /* -+ * We may get 0x50 several times, ignore them -+ */ -+ if (*bptr == 0x50) { -+ ra = 1; -+ bptr++; -+ continue; -+ } else if (*bptr++ != ra_tbl[ra]) { -+ ra = 0; -+ continue; -+ } -+ break; -+ -+ case 4: -+ /* -+ * skip version number -+ */ -+ bptr++; -+ break; -+ -+ case 5: -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ -+ /* -+ * The difference between versions 1.0 and -+ * 2.0 is here. For future versions of -+ * the player this may need to be modified. -+ */ -+ if (*(bptr + 1) == 0x02) -+ bptr += 8; -+ else -+ bptr += 4; -+ break; -+ -+ case 6: -+ /* This is the field containing the port -+ * number that RA-player is listening to. -+ */ -+ -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ -+ -+ lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; -+ if (lport < 6970) -+ lport += 256; /* don't know why */ -+ if (lport < 6970 || lport > 7170) -+ return 1; /* failed */ -+ -+ /* try to get udp port between 6970 - 7170 */ -+ for (p = 6970; p < 7071; p++) { -+ if (udp_listen(slirp, INADDR_ANY, htons(p), -+ so->so_laddr.s_addr, htons(lport), -+ SS_FACCEPTONCE)) { -+ break; -+ } -+ } -+ if (p == 7071) -+ p = 0; -+ *(uint8_t *)bptr++ = (p >> 8) & 0xff; -+ *(uint8_t *)bptr = p & 0xff; -+ ra = 0; -+ return 1; /* port redirected, we're done */ -+ break; -+ -+ default: -+ ra = 0; -+ } -+ ra++; -+ } -+ return 1; -+ -+ default: -+ /* Ooops, not emulated, won't call tcp_emu again */ -+ so->so_emu = 0; -+ return 1; -+ } -+} -+ -+/* -+ * Do misc. config of SLiRP while its running. -+ * Return 0 if this connections is to be closed, 1 otherwise, -+ * return 2 if this is a command-line connection -+ */ -+int tcp_ctl(struct socket *so) -+{ -+ Slirp *slirp = so->slirp; -+ struct sbuf *sb = &so->so_snd; -+ struct gfwd_list *ex_ptr; -+ -+ DEBUG_CALL("tcp_ctl"); -+ DEBUG_ARG("so = %p", so); -+ -+ /* TODO: IPv6 */ -+ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { -+ /* Check if it's pty_exec */ -+ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { -+ if (ex_ptr->ex_fport == so->so_fport && -+ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { -+ if (ex_ptr->write_cb) { -+ so->s = -1; -+ so->guestfwd = ex_ptr; -+ return 1; -+ } -+ DEBUG_MISC(" executing %s", ex_ptr->ex_exec); -+ if (ex_ptr->ex_unix) -+ return open_unix(so, ex_ptr->ex_unix); -+ else -+ return fork_exec(so, ex_ptr->ex_exec); -+ } -+ } -+ } -+ sb->sb_cc = slirp_fmt(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), -+ "Error: No application configured.\r\n"); -+ sb->sb_wptr += sb->sb_cc; -+ return 0; -+} -diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c -new file mode 100644 -index 0000000000..102023e7cd ---- /dev/null -+++ b/slirp/src/tcp_timer.c -@@ -0,0 +1,286 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 -+ * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp -+ */ -+ -+#include "slirp.h" -+ -+static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); -+ -+/* -+ * Fast timeout routine for processing delayed acks -+ */ -+void tcp_fasttimo(Slirp *slirp) -+{ -+ register struct socket *so; -+ register struct tcpcb *tp; -+ -+ DEBUG_CALL("tcp_fasttimo"); -+ -+ so = slirp->tcb.so_next; -+ if (so) -+ for (; so != &slirp->tcb; so = so->so_next) -+ if ((tp = (struct tcpcb *)so->so_tcpcb) && -+ (tp->t_flags & TF_DELACK)) { -+ tp->t_flags &= ~TF_DELACK; -+ tp->t_flags |= TF_ACKNOW; -+ (void)tcp_output(tp); -+ } -+} -+ -+/* -+ * Tcp protocol timeout routine called every 500 ms. -+ * Updates the timers in all active tcb's and -+ * causes finite state machine actions if timers expire. -+ */ -+void tcp_slowtimo(Slirp *slirp) -+{ -+ register struct socket *ip, *ipnxt; -+ register struct tcpcb *tp; -+ register int i; -+ -+ DEBUG_CALL("tcp_slowtimo"); -+ -+ /* -+ * Search through tcb's and update active timers. -+ */ -+ ip = slirp->tcb.so_next; -+ if (ip == NULL) { -+ return; -+ } -+ for (; ip != &slirp->tcb; ip = ipnxt) { -+ ipnxt = ip->so_next; -+ tp = sototcpcb(ip); -+ if (tp == NULL) { -+ continue; -+ } -+ for (i = 0; i < TCPT_NTIMERS; i++) { -+ if (tp->t_timer[i] && --tp->t_timer[i] == 0) { -+ tcp_timers(tp, i); -+ if (ipnxt->so_prev != ip) -+ goto tpgone; -+ } -+ } -+ tp->t_idle++; -+ if (tp->t_rtt) -+ tp->t_rtt++; -+ tpgone:; -+ } -+ slirp->tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ -+ slirp->tcp_now++; /* for timestamps */ -+} -+ -+/* -+ * Cancel all timers for TCP tp. -+ */ -+void tcp_canceltimers(struct tcpcb *tp) -+{ -+ register int i; -+ -+ for (i = 0; i < TCPT_NTIMERS; i++) -+ tp->t_timer[i] = 0; -+} -+ -+const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, -+ 64, 64, 64, 64, 64, 64 }; -+ -+/* -+ * TCP timer processing. -+ */ -+static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer) -+{ -+ register int rexmt; -+ -+ DEBUG_CALL("tcp_timers"); -+ -+ switch (timer) { -+ /* -+ * 2 MSL timeout in shutdown went off. If we're closed but -+ * still waiting for peer to close and connection has been idle -+ * too long, or if 2MSL time is up from TIME_WAIT, delete connection -+ * control block. Otherwise, check again in a bit. -+ */ -+ case TCPT_2MSL: -+ if (tp->t_state != TCPS_TIME_WAIT && tp->t_idle <= TCP_MAXIDLE) -+ tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; -+ else -+ tp = tcp_close(tp); -+ break; -+ -+ /* -+ * Retransmission timer went off. Message has not -+ * been acked within retransmit interval. Back off -+ * to a longer retransmit interval and retransmit one segment. -+ */ -+ case TCPT_REXMT: -+ -+ /* -+ * XXXXX If a packet has timed out, then remove all the queued -+ * packets for that session. -+ */ -+ -+ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { -+ /* -+ * This is a hack to suit our terminal server here at the uni of -+ * canberra since they have trouble with zeroes... It usually lets -+ * them through unharmed, but under some conditions, it'll eat the -+ * zeros. If we keep retransmitting it, it'll keep eating the -+ * zeroes, so we keep retransmitting, and eventually the connection -+ * dies... (this only happens on incoming data) -+ * -+ * So, if we were gonna drop the connection from too many -+ * retransmits, don't... instead halve the t_maxseg, which might -+ * break up the NULLs and let them through -+ * -+ * *sigh* -+ */ -+ -+ tp->t_maxseg >>= 1; -+ if (tp->t_maxseg < 32) { -+ /* -+ * We tried our best, now the connection must die! -+ */ -+ tp->t_rxtshift = TCP_MAXRXTSHIFT; -+ tp = tcp_drop(tp, tp->t_softerror); -+ /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ -+ return (tp); /* XXX */ -+ } -+ -+ /* -+ * Set rxtshift to 6, which is still at the maximum -+ * backoff time -+ */ -+ tp->t_rxtshift = 6; -+ } -+ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; -+ TCPT_RANGESET(tp->t_rxtcur, rexmt, (short)tp->t_rttmin, -+ TCPTV_REXMTMAX); /* XXX */ -+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; -+ /* -+ * If losing, let the lower level know and try for -+ * a better route. Also, if we backed off this far, -+ * our srtt estimate is probably bogus. Clobber it -+ * so we'll take the next rtt measurement as our srtt; -+ * move the current srtt into rttvar to keep the current -+ * retransmit times until then. -+ */ -+ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { -+ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); -+ tp->t_srtt = 0; -+ } -+ tp->snd_nxt = tp->snd_una; -+ /* -+ * If timing a segment in this window, stop the timer. -+ */ -+ tp->t_rtt = 0; -+ /* -+ * Close the congestion window down to one segment -+ * (we'll open it by one segment for each ack we get). -+ * Since we probably have a window's worth of unacked -+ * data accumulated, this "slow start" keeps us from -+ * dumping all that data as back-to-back packets (which -+ * might overwhelm an intermediate gateway). -+ * -+ * There are two phases to the opening: Initially we -+ * open by one mss on each ack. This makes the window -+ * size increase exponentially with time. If the -+ * window is larger than the path can handle, this -+ * exponential growth results in dropped packet(s) -+ * almost immediately. To get more time between -+ * drops but still "push" the network to take advantage -+ * of improving conditions, we switch from exponential -+ * to linear window opening at some threshold size. -+ * For a threshold, we use half the current window -+ * size, truncated to a multiple of the mss. -+ * -+ * (the minimum cwnd that will give us exponential -+ * growth is 2 mss. We don't allow the threshold -+ * to go below this.) -+ */ -+ { -+ unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; -+ if (win < 2) -+ win = 2; -+ tp->snd_cwnd = tp->t_maxseg; -+ tp->snd_ssthresh = win * tp->t_maxseg; -+ tp->t_dupacks = 0; -+ } -+ (void)tcp_output(tp); -+ break; -+ -+ /* -+ * Persistence timer into zero window. -+ * Force a byte to be output, if possible. -+ */ -+ case TCPT_PERSIST: -+ tcp_setpersist(tp); -+ tp->t_force = 1; -+ (void)tcp_output(tp); -+ tp->t_force = 0; -+ break; -+ -+ /* -+ * Keep-alive timer went off; send something -+ * or drop connection if idle for too long. -+ */ -+ case TCPT_KEEP: -+ if (tp->t_state < TCPS_ESTABLISHED) -+ goto dropit; -+ -+ if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { -+ if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) -+ goto dropit; -+ /* -+ * Send a packet designed to force a response -+ * if the peer is up and reachable: -+ * either an ACK if the connection is still alive, -+ * or an RST if the peer has closed the connection -+ * due to timeout or reboot. -+ * Using sequence number tp->snd_una-1 -+ * causes the transmitted zero-length segment -+ * to lie outside the receive window; -+ * by the protocol spec, this requires the -+ * correspondent TCP to respond. -+ */ -+ tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, tp->rcv_nxt, -+ tp->snd_una - 1, 0, tp->t_socket->so_ffamily); -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; -+ } else -+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; -+ break; -+ -+ dropit: -+ tp = tcp_drop(tp, 0); -+ break; -+ } -+ -+ return (tp); -+} -diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h -new file mode 100644 -index 0000000000..584a5594e4 ---- /dev/null -+++ b/slirp/src/tcp_timer.h -@@ -0,0 +1,130 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 -+ * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp -+ */ -+ -+#ifndef TCP_TIMER_H -+#define TCP_TIMER_H -+ -+/* -+ * Definitions of the TCP timers. These timers are counted -+ * down PR_SLOWHZ times a second. -+ */ -+#define TCPT_NTIMERS 4 -+ -+#define TCPT_REXMT 0 /* retransmit */ -+#define TCPT_PERSIST 1 /* retransmit persistence */ -+#define TCPT_KEEP 2 /* keep alive */ -+#define TCPT_2MSL 3 /* 2*msl quiet time timer */ -+ -+/* -+ * The TCPT_REXMT timer is used to force retransmissions. -+ * The TCP has the TCPT_REXMT timer set whenever segments -+ * have been sent for which ACKs are expected but not yet -+ * received. If an ACK is received which advances tp->snd_una, -+ * then the retransmit timer is cleared (if there are no more -+ * outstanding segments) or reset to the base value (if there -+ * are more ACKs expected). Whenever the retransmit timer goes off, -+ * we retransmit one unacknowledged segment, and do a backoff -+ * on the retransmit timer. -+ * -+ * The TCPT_PERSIST timer is used to keep window size information -+ * flowing even if the window goes shut. If all previous transmissions -+ * have been acknowledged (so that there are no retransmissions in progress), -+ * and the window is too small to bother sending anything, then we start -+ * the TCPT_PERSIST timer. When it expires, if the window is nonzero, -+ * we go to transmit state. Otherwise, at intervals send a single byte -+ * into the peer's window to force him to update our window information. -+ * We do this at most as often as TCPT_PERSMIN time intervals, -+ * but no more frequently than the current estimate of round-trip -+ * packet time. The TCPT_PERSIST timer is cleared whenever we receive -+ * a window update from the peer. -+ * -+ * The TCPT_KEEP timer is used to keep connections alive. If an -+ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, -+ * but not yet established, then we drop the connection. Once the connection -+ * is established, if the connection is idle for TCPTV_KEEP_IDLE time -+ * (and keepalives have been enabled on the socket), we begin to probe -+ * the connection. We force the peer to send us a segment by sending: -+ * -+ * This segment is (deliberately) outside the window, and should elicit -+ * an ack segment in response from the peer. If, despite the TCPT_KEEP -+ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE -+ * amount of time probing, then we drop the connection. -+ */ -+ -+/* -+ * Time constants. -+ */ -+#define TCPTV_MSL (5 * PR_SLOWHZ) /* max seg lifetime (hah!) */ -+ -+#define TCPTV_SRTTBASE \ -+ 0 /* base roundtrip time; \ -+ if 0, no idea yet */ -+#define TCPTV_SRTTDFLT (3 * PR_SLOWHZ) /* assumed RTT if no info */ -+ -+#define TCPTV_PERSMIN (5 * PR_SLOWHZ) /* retransmit persistence */ -+#define TCPTV_PERSMAX (60 * PR_SLOWHZ) /* maximum persist interval */ -+ -+#define TCPTV_KEEP_INIT (75 * PR_SLOWHZ) /* initial connect keep alive */ -+#define TCPTV_KEEP_IDLE (120 * 60 * PR_SLOWHZ) /* dflt time before probing */ -+#define TCPTV_KEEPINTVL (75 * PR_SLOWHZ) /* default probe interval */ -+#define TCPTV_KEEPCNT 8 /* max probes before drop */ -+ -+#define TCPTV_MIN (1 * PR_SLOWHZ) /* minimum allowable value */ -+#define TCPTV_REXMTMAX (12 * PR_SLOWHZ) /* max allowable REXMT value */ -+ -+#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ -+ -+#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ -+ -+ -+/* -+ * Force a time value to be in a certain range. -+ */ -+#define TCPT_RANGESET(tv, value, tvmin, tvmax) \ -+ { \ -+ (tv) = (value); \ -+ if ((tv) < (tvmin)) \ -+ (tv) = (tvmin); \ -+ else if ((tv) > (tvmax)) \ -+ (tv) = (tvmax); \ -+ } -+ -+extern const int tcp_backoff[]; -+ -+struct tcpcb; -+ -+void tcp_fasttimo(Slirp *); -+void tcp_slowtimo(Slirp *); -+void tcp_canceltimers(struct tcpcb *); -+ -+#endif -diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h -new file mode 100644 -index 0000000000..c8da8cbd16 ---- /dev/null -+++ b/slirp/src/tcp_var.h -@@ -0,0 +1,161 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993, 1994 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 -+ * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp -+ */ -+ -+#ifndef TCP_VAR_H -+#define TCP_VAR_H -+ -+#include "tcpip.h" -+#include "tcp_timer.h" -+ -+/* -+ * Tcp control block, one per tcp; fields: -+ */ -+struct tcpcb { -+ struct tcpiphdr *seg_next; /* sequencing queue */ -+ struct tcpiphdr *seg_prev; -+ short t_state; /* state of this connection */ -+ short t_timer[TCPT_NTIMERS]; /* tcp timers */ -+ short t_rxtshift; /* log(2) of rexmt exp. backoff */ -+ short t_rxtcur; /* current retransmit value */ -+ short t_dupacks; /* consecutive dup acks recd */ -+ uint16_t t_maxseg; /* maximum segment size */ -+ uint8_t t_force; /* 1 if forcing out a byte */ -+ uint16_t t_flags; -+#define TF_ACKNOW 0x0001 /* ack peer immediately */ -+#define TF_DELACK 0x0002 /* ack, but try to delay it */ -+#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ -+#define TF_NOOPT 0x0008 /* don't use tcp options */ -+#define TF_SENTFIN 0x0010 /* have sent FIN */ -+#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ -+#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ -+#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ -+#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ -+#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ -+ -+ struct tcpiphdr t_template; /* static skeletal packet for xmit */ -+ -+ struct socket *t_socket; /* back pointer to socket */ -+ /* -+ * The following fields are used as in the protocol specification. -+ * See RFC783, Dec. 1981, page 21. -+ */ -+ /* send sequence variables */ -+ tcp_seq snd_una; /* send unacknowledged */ -+ tcp_seq snd_nxt; /* send next */ -+ tcp_seq snd_up; /* send urgent pointer */ -+ tcp_seq snd_wl1; /* window update seg seq number */ -+ tcp_seq snd_wl2; /* window update seg ack number */ -+ tcp_seq iss; /* initial send sequence number */ -+ uint32_t snd_wnd; /* send window */ -+ /* receive sequence variables */ -+ uint32_t rcv_wnd; /* receive window */ -+ tcp_seq rcv_nxt; /* receive next */ -+ tcp_seq rcv_up; /* receive urgent pointer */ -+ tcp_seq irs; /* initial receive sequence number */ -+ /* -+ * Additional variables for this implementation. -+ */ -+ /* receive variables */ -+ tcp_seq rcv_adv; /* advertised window */ -+ /* retransmit variables */ -+ tcp_seq snd_max; /* highest sequence number sent; -+ * used to recognize retransmits -+ */ -+ /* congestion control (for slow start, source quench, retransmit after loss) -+ */ -+ uint32_t snd_cwnd; /* congestion-controlled window */ -+ uint32_t snd_ssthresh; /* snd_cwnd size threshold for -+ * for slow start exponential to -+ * linear switch -+ */ -+ /* -+ * transmit timing stuff. See below for scale of srtt and rttvar. -+ * "Variance" is actually smoothed difference. -+ */ -+ short t_idle; /* inactivity time */ -+ short t_rtt; /* round trip time */ -+ tcp_seq t_rtseq; /* sequence number being timed */ -+ short t_srtt; /* smoothed round-trip time */ -+ short t_rttvar; /* variance in round-trip time */ -+ uint16_t t_rttmin; /* minimum rtt allowed */ -+ uint32_t max_sndwnd; /* largest window peer has offered */ -+ -+ /* out-of-band data */ -+ uint8_t t_oobflags; /* have some */ -+ uint8_t t_iobc; /* input character */ -+#define TCPOOB_HAVEDATA 0x01 -+#define TCPOOB_HADDATA 0x02 -+ short t_softerror; /* possible error not yet reported */ -+ -+ /* RFC 1323 variables */ -+ uint8_t snd_scale; /* window scaling for send window */ -+ uint8_t rcv_scale; /* window scaling for recv window */ -+ uint8_t request_r_scale; /* pending window scaling */ -+ uint8_t requested_s_scale; -+ uint32_t ts_recent; /* timestamp echo data */ -+ uint32_t ts_recent_age; /* when last updated */ -+ tcp_seq last_ack_sent; -+}; -+ -+#define sototcpcb(so) ((so)->so_tcpcb) -+ -+/* -+ * The smoothed round-trip time and estimated variance -+ * are stored as fixed point numbers scaled by the values below. -+ * For convenience, these scales are also used in smoothing the average -+ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). -+ * With these scales, srtt has 3 bits to the right of the binary point, -+ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the -+ * binary point, and is smoothed with an ALPHA of 0.75. -+ */ -+#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ -+#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ -+#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ -+#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ -+ -+/* -+ * The initial retransmission should happen at rtt + 4 * rttvar. -+ * Because of the way we do the smoothing, srtt and rttvar -+ * will each average +1/2 tick of bias. When we compute -+ * the retransmit timer, we want 1/2 tick of rounding and -+ * 1 extra tick because of +-1/2 tick uncertainty in the -+ * firing of the timer. The bias will give us exactly the -+ * 1.5 tick we need. But, because the bias is -+ * statistical, we have to test that we don't drop below -+ * the minimum feasible timer (which is 2 ticks). -+ * This macro assumes that the value of TCP_RTTVAR_SCALE -+ * is the same as the multiplier for rttvar. -+ */ -+#define TCP_REXMTVAL(tp) (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) -+ -+#endif -diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h -new file mode 100644 -index 0000000000..d3df021493 ---- /dev/null -+++ b/slirp/src/tcpip.h -@@ -0,0 +1,104 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 -+ * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp -+ */ -+ -+#ifndef TCPIP_H -+#define TCPIP_H -+ -+/* -+ * Tcp+ip header, after ip options removed. -+ */ -+struct tcpiphdr { -+ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ -+ union { -+ struct { -+ struct in_addr ih_src; /* source internet address */ -+ struct in_addr ih_dst; /* destination internet address */ -+ uint8_t ih_x1; /* (unused) */ -+ uint8_t ih_pr; /* protocol */ -+ } ti_i4; -+ struct { -+ struct in6_addr ih_src; -+ struct in6_addr ih_dst; -+ uint8_t ih_x1; -+ uint8_t ih_nh; -+ } ti_i6; -+ } ti; -+ uint16_t ti_x0; -+ uint16_t ti_len; /* protocol length */ -+ struct tcphdr ti_t; /* tcp header */ -+}; -+#define ti_mbuf ih_mbuf.mptr -+#define ti_pr ti.ti_i4.ih_pr -+#define ti_src ti.ti_i4.ih_src -+#define ti_dst ti.ti_i4.ih_dst -+#define ti_src6 ti.ti_i6.ih_src -+#define ti_dst6 ti.ti_i6.ih_dst -+#define ti_nh6 ti.ti_i6.ih_nh -+#define ti_sport ti_t.th_sport -+#define ti_dport ti_t.th_dport -+#define ti_seq ti_t.th_seq -+#define ti_ack ti_t.th_ack -+#define ti_x2 ti_t.th_x2 -+#define ti_off ti_t.th_off -+#define ti_flags ti_t.th_flags -+#define ti_win ti_t.th_win -+#define ti_sum ti_t.th_sum -+#define ti_urp ti_t.th_urp -+ -+#define tcpiphdr2qlink(T) \ -+ ((struct qlink *)(((char *)(T)) - sizeof(struct qlink))) -+#define qlink2tcpiphdr(Q) \ -+ ((struct tcpiphdr *)(((char *)(Q)) + sizeof(struct qlink))) -+#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) -+#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) -+#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) -+#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink *)(T)) -+#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr *)(T)) -+ -+/* This is the difference between the size of a tcpiphdr structure, and the -+ * size of actual ip+tcp headers, rounded up since we need to align data. */ -+#define TCPIPHDR_DELTA \ -+ (MAX(0, (sizeof(struct tcpiphdr) - sizeof(struct ip) - \ -+ sizeof(struct tcphdr) + 3) & \ -+ ~3)) -+ -+/* -+ * Just a clean way to get to the first byte -+ * of the packet -+ */ -+struct tcpiphdr_2 { -+ struct tcpiphdr dummy; -+ char first_char; -+}; -+ -+#endif -diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c -new file mode 100644 -index 0000000000..c6950ee10f ---- /dev/null -+++ b/slirp/src/tftp.c -@@ -0,0 +1,464 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * tftp.c - a simple, read-only tftp server for qemu -+ * -+ * Copyright (c) 2004 Magnus Damm -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "slirp.h" -+ -+#include -+#include -+#include -+ -+static inline int tftp_session_in_use(struct tftp_session *spt) -+{ -+ return (spt->slirp != NULL); -+} -+ -+static inline void tftp_session_update(struct tftp_session *spt) -+{ -+ spt->timestamp = curtime; -+} -+ -+static void tftp_session_terminate(struct tftp_session *spt) -+{ -+ if (spt->fd >= 0) { -+ close(spt->fd); -+ spt->fd = -1; -+ } -+ g_free(spt->filename); -+ spt->slirp = NULL; -+} -+ -+static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp) -+{ -+ struct tftp_session *spt; -+ int k; -+ -+ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { -+ spt = &slirp->tftp_sessions[k]; -+ -+ if (!tftp_session_in_use(spt)) -+ goto found; -+ -+ /* sessions time out after 5 inactive seconds */ -+ if ((int)(curtime - spt->timestamp) > 5000) { -+ tftp_session_terminate(spt); -+ goto found; -+ } -+ } -+ -+ return -1; -+ -+found: -+ memset(spt, 0, sizeof(*spt)); -+ memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); -+ spt->fd = -1; -+ spt->block_size = 512; -+ spt->client_port = tp->udp.uh_sport; -+ spt->slirp = slirp; -+ -+ tftp_session_update(spt); -+ -+ return k; -+} -+ -+static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp) -+{ -+ struct tftp_session *spt; -+ int k; -+ -+ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { -+ spt = &slirp->tftp_sessions[k]; -+ -+ if (tftp_session_in_use(spt)) { -+ if (sockaddr_equal(&spt->client_addr, srcsas)) { -+ if (spt->client_port == tp->udp.uh_sport) { -+ return k; -+ } -+ } -+ } -+ } -+ -+ return -1; -+} -+ -+static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, -+ uint8_t *buf, int len) -+{ -+ int bytes_read = 0; -+ -+ if (spt->fd < 0) { -+ spt->fd = open(spt->filename, O_RDONLY | O_BINARY); -+ } -+ -+ if (spt->fd < 0) { -+ return -1; -+ } -+ -+ if (len) { -+ if (lseek(spt->fd, block_nr * spt->block_size, SEEK_SET) == (off_t)-1) { -+ return -1; -+ } -+ -+ bytes_read = read(spt->fd, buf, len); -+ } -+ -+ return bytes_read; -+} -+ -+static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, -+ struct mbuf *m) -+{ -+ struct tftp_t *tp; -+ -+ memset(m->m_data, 0, m->m_size); -+ -+ m->m_data += IF_MAXLINKHDR; -+ if (spt->client_addr.ss_family == AF_INET6) { -+ m->m_data += sizeof(struct ip6); -+ } else { -+ m->m_data += sizeof(struct ip); -+ } -+ tp = (void *)m->m_data; -+ m->m_data += sizeof(struct udphdr); -+ -+ return tp; -+} -+ -+static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, -+ struct tftp_t *recv_tp) -+{ -+ if (spt->client_addr.ss_family == AF_INET6) { -+ struct sockaddr_in6 sa6, da6; -+ -+ sa6.sin6_addr = spt->slirp->vhost_addr6; -+ sa6.sin6_port = recv_tp->udp.uh_dport; -+ da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; -+ da6.sin6_port = spt->client_port; -+ -+ udp6_output(NULL, m, &sa6, &da6); -+ } else { -+ struct sockaddr_in sa4, da4; -+ -+ sa4.sin_addr = spt->slirp->vhost_addr; -+ sa4.sin_port = recv_tp->udp.uh_dport; -+ da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; -+ da4.sin_port = spt->client_port; -+ -+ udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); -+ } -+} -+ -+static int tftp_send_oack(struct tftp_session *spt, const char *keys[], -+ uint32_t values[], int nb, struct tftp_t *recv_tp) -+{ -+ struct mbuf *m; -+ struct tftp_t *tp; -+ int i, n = 0; -+ -+ m = m_get(spt->slirp); -+ -+ if (!m) -+ return -1; -+ -+ tp = tftp_prep_mbuf_data(spt, m); -+ -+ tp->tp_op = htons(TFTP_OACK); -+ for (i = 0; i < nb; i++) { -+ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", keys[i]); -+ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", values[i]); -+ } -+ -+ m->m_len = G_SIZEOF_MEMBER(struct tftp_t, tp_op) + n; -+ tftp_udp_output(spt, m, recv_tp); -+ -+ return 0; -+} -+ -+static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, -+ const char *msg, struct tftp_t *recv_tp) -+{ -+ struct mbuf *m; -+ struct tftp_t *tp; -+ -+ DEBUG_TFTP("tftp error msg: %s", msg); -+ -+ m = m_get(spt->slirp); -+ -+ if (!m) { -+ goto out; -+ } -+ -+ tp = tftp_prep_mbuf_data(spt, m); -+ -+ tp->tp_op = htons(TFTP_ERROR); -+ tp->x.tp_error.tp_error_code = htons(errorcode); -+ slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), -+ msg); -+ -+ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + -+ strlen(msg) - sizeof(struct udphdr); -+ tftp_udp_output(spt, m, recv_tp); -+ -+out: -+ tftp_session_terminate(spt); -+} -+ -+static void tftp_send_next_block(struct tftp_session *spt, -+ struct tftp_t *recv_tp) -+{ -+ struct mbuf *m; -+ struct tftp_t *tp; -+ int nobytes; -+ -+ m = m_get(spt->slirp); -+ -+ if (!m) { -+ return; -+ } -+ -+ tp = tftp_prep_mbuf_data(spt, m); -+ -+ tp->tp_op = htons(TFTP_DATA); -+ tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); -+ -+ nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, -+ spt->block_size); -+ -+ if (nobytes < 0) { -+ m_free(m); -+ -+ /* send "file not found" error back */ -+ -+ tftp_send_error(spt, 1, "File not found", tp); -+ -+ return; -+ } -+ -+ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - -+ sizeof(struct udphdr); -+ tftp_udp_output(spt, m, recv_tp); -+ -+ if (nobytes == spt->block_size) { -+ tftp_session_update(spt); -+ } else { -+ tftp_session_terminate(spt); -+ } -+ -+ spt->block_nr++; -+} -+ -+static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp, int pktlen) -+{ -+ struct tftp_session *spt; -+ int s, k; -+ size_t prefix_len; -+ char *req_fname; -+ const char *option_name[2]; -+ uint32_t option_value[2]; -+ int nb_options = 0; -+ -+ /* check if a session already exists and if so terminate it */ -+ s = tftp_session_find(slirp, srcsas, tp); -+ if (s >= 0) { -+ tftp_session_terminate(&slirp->tftp_sessions[s]); -+ } -+ -+ s = tftp_session_allocate(slirp, srcsas, tp); -+ -+ if (s < 0) { -+ return; -+ } -+ -+ spt = &slirp->tftp_sessions[s]; -+ -+ /* unspecified prefix means service disabled */ -+ if (!slirp->tftp_prefix) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ /* skip header fields */ -+ k = 0; -+ pktlen -= offsetof(struct tftp_t, x.tp_buf); -+ -+ /* prepend tftp_prefix */ -+ prefix_len = strlen(slirp->tftp_prefix); -+ spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); -+ memcpy(spt->filename, slirp->tftp_prefix, prefix_len); -+ spt->filename[prefix_len] = '/'; -+ -+ /* get name */ -+ req_fname = spt->filename + prefix_len + 1; -+ -+ while (1) { -+ if (k >= TFTP_FILENAME_MAX || k >= pktlen) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ req_fname[k] = tp->x.tp_buf[k]; -+ if (req_fname[k++] == '\0') { -+ break; -+ } -+ } -+ -+ DEBUG_TFTP("tftp rrq file: %s", req_fname); -+ -+ /* check mode */ -+ if ((pktlen - k) < 6) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { -+ tftp_send_error(spt, 4, "Unsupported transfer mode", tp); -+ return; -+ } -+ -+ k += 6; /* skipping octet */ -+ -+ /* do sanity checks on the filename */ -+ if ( -+#ifdef G_OS_WIN32 -+ strstr(req_fname, "..\\") || -+ req_fname[strlen(req_fname) - 1] == '\\' || -+#endif -+ strstr(req_fname, "../") || -+ req_fname[strlen(req_fname) - 1] == '/') { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ /* check if the file exists */ -+ if (tftp_read_data(spt, 0, NULL, 0) < 0) { -+ tftp_send_error(spt, 1, "File not found", tp); -+ return; -+ } -+ -+ if (tp->x.tp_buf[pktlen - 1] != 0) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { -+ const char *key, *value; -+ -+ key = &tp->x.tp_buf[k]; -+ k += strlen(key) + 1; -+ -+ if (k >= pktlen) { -+ tftp_send_error(spt, 2, "Access violation", tp); -+ return; -+ } -+ -+ value = &tp->x.tp_buf[k]; -+ k += strlen(value) + 1; -+ -+ if (strcasecmp(key, "tsize") == 0) { -+ int tsize = atoi(value); -+ struct stat stat_p; -+ -+ if (tsize == 0) { -+ if (stat(spt->filename, &stat_p) == 0) -+ tsize = stat_p.st_size; -+ else { -+ tftp_send_error(spt, 1, "File not found", tp); -+ return; -+ } -+ } -+ -+ option_name[nb_options] = "tsize"; -+ option_value[nb_options] = tsize; -+ nb_options++; -+ } else if (strcasecmp(key, "blksize") == 0) { -+ int blksize = atoi(value); -+ -+ /* Accept blksize up to our maximum size */ -+ if (blksize > 0) { -+ spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); -+ option_name[nb_options] = "blksize"; -+ option_value[nb_options] = spt->block_size; -+ nb_options++; -+ } -+ } -+ } -+ -+ if (nb_options > 0) { -+ assert(nb_options <= G_N_ELEMENTS(option_name)); -+ tftp_send_oack(spt, option_name, option_value, nb_options, tp); -+ return; -+ } -+ -+ spt->block_nr = 0; -+ tftp_send_next_block(spt, tp); -+} -+ -+static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp, int pktlen) -+{ -+ int s; -+ -+ s = tftp_session_find(slirp, srcsas, tp); -+ -+ if (s < 0) { -+ return; -+ } -+ -+ tftp_send_next_block(&slirp->tftp_sessions[s], tp); -+} -+ -+static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, -+ struct tftp_t *tp, int pktlen) -+{ -+ int s; -+ -+ s = tftp_session_find(slirp, srcsas, tp); -+ -+ if (s < 0) { -+ return; -+ } -+ -+ tftp_session_terminate(&slirp->tftp_sessions[s]); -+} -+ -+void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) -+{ -+ struct tftp_t *tp = (struct tftp_t *)m->m_data; -+ -+ switch (ntohs(tp->tp_op)) { -+ case TFTP_RRQ: -+ tftp_handle_rrq(m->slirp, srcsas, tp, m->m_len); -+ break; -+ -+ case TFTP_ACK: -+ tftp_handle_ack(m->slirp, srcsas, tp, m->m_len); -+ break; -+ -+ case TFTP_ERROR: -+ tftp_handle_error(m->slirp, srcsas, tp, m->m_len); -+ break; -+ } -+} -diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h -new file mode 100644 -index 0000000000..6d75478e83 ---- /dev/null -+++ b/slirp/src/tftp.h -@@ -0,0 +1,54 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* tftp defines */ -+ -+#ifndef SLIRP_TFTP_H -+#define SLIRP_TFTP_H -+ -+#include "util.h" -+ -+#define TFTP_SESSIONS_MAX 20 -+ -+#define TFTP_SERVER 69 -+ -+#define TFTP_RRQ 1 -+#define TFTP_WRQ 2 -+#define TFTP_DATA 3 -+#define TFTP_ACK 4 -+#define TFTP_ERROR 5 -+#define TFTP_OACK 6 -+ -+#define TFTP_FILENAME_MAX 512 -+#define TFTP_BLOCKSIZE_MAX 1428 -+ -+struct tftp_t { -+ struct udphdr udp; -+ uint16_t tp_op; -+ union { -+ struct { -+ uint16_t tp_block_nr; -+ uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; -+ } tp_data; -+ struct { -+ uint16_t tp_error_code; -+ uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; -+ } tp_error; -+ char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; -+ } x; -+} SLIRP_PACKED; -+ -+struct tftp_session { -+ Slirp *slirp; -+ char *filename; -+ int fd; -+ uint16_t block_size; -+ -+ struct sockaddr_storage client_addr; -+ uint16_t client_port; -+ uint32_t block_nr; -+ -+ int timestamp; -+}; -+ -+void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); -+ -+#endif -diff --git a/slirp/src/udp.c b/slirp/src/udp.c -new file mode 100644 -index 0000000000..0ad44d7c03 ---- /dev/null -+++ b/slirp/src/udp.c -@@ -0,0 +1,365 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1988, 1990, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 -+ * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp -+ */ -+ -+/* -+ * Changes and additions relating to SLiRP -+ * Copyright (c) 1995 Danny Gasparovski. -+ * -+ * Please read the file COPYRIGHT for the -+ * terms and conditions of the copyright. -+ */ -+ -+#include "slirp.h" -+#include "ip_icmp.h" -+ -+static uint8_t udp_tos(struct socket *so); -+ -+void udp_init(Slirp *slirp) -+{ -+ slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; -+ slirp->udp_last_so = &slirp->udb; -+} -+ -+void udp_cleanup(Slirp *slirp) -+{ -+ struct socket *so, *so_next; -+ -+ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { -+ so_next = so->so_next; -+ udp_detach(slirp->udb.so_next); -+ } -+} -+ -+/* m->m_data points at ip packet header -+ * m->m_len length ip packet -+ * ip->ip_len length data (IPDU) -+ */ -+void udp_input(register struct mbuf *m, int iphlen) -+{ -+ Slirp *slirp = m->slirp; -+ register struct ip *ip; -+ register struct udphdr *uh; -+ int len; -+ struct ip save_ip; -+ struct socket *so; -+ struct sockaddr_storage lhost; -+ struct sockaddr_in *lhost4; -+ -+ DEBUG_CALL("udp_input"); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("iphlen = %d", iphlen); -+ -+ /* -+ * Strip IP options, if any; should skip this, -+ * make available to user, and use on returned packets, -+ * but we don't yet have a way to check the checksum -+ * with options still present. -+ */ -+ if (iphlen > sizeof(struct ip)) { -+ ip_stripoptions(m, (struct mbuf *)0); -+ iphlen = sizeof(struct ip); -+ } -+ -+ /* -+ * Get IP and UDP header together in first mbuf. -+ */ -+ ip = mtod(m, struct ip *); -+ uh = (struct udphdr *)((char *)ip + iphlen); -+ -+ /* -+ * Make mbuf data length reflect UDP length. -+ * If not enough data to reflect UDP length, drop. -+ */ -+ len = ntohs((uint16_t)uh->uh_ulen); -+ -+ if (ip->ip_len != len) { -+ if (len > ip->ip_len) { -+ goto bad; -+ } -+ m_adj(m, len - ip->ip_len); -+ ip->ip_len = len; -+ } -+ -+ /* -+ * Save a copy of the IP header in case we want restore it -+ * for sending an ICMP error message in response. -+ */ -+ save_ip = *ip; -+ save_ip.ip_len += iphlen; /* tcp_input subtracts this */ -+ -+ /* -+ * Checksum extended UDP header and data. -+ */ -+ if (uh->uh_sum) { -+ memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); -+ ((struct ipovly *)ip)->ih_x1 = 0; -+ ((struct ipovly *)ip)->ih_len = uh->uh_ulen; -+ if (cksum(m, len + sizeof(struct ip))) { -+ goto bad; -+ } -+ } -+ -+ lhost.ss_family = AF_INET; -+ lhost4 = (struct sockaddr_in *)&lhost; -+ lhost4->sin_addr = ip->ip_src; -+ lhost4->sin_port = uh->uh_sport; -+ -+ /* -+ * handle DHCP/BOOTP -+ */ -+ if (ntohs(uh->uh_dport) == BOOTP_SERVER && -+ (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || -+ ip->ip_dst.s_addr == 0xffffffff)) { -+ bootp_input(m); -+ goto bad; -+ } -+ -+ /* -+ * handle TFTP -+ */ -+ if (ntohs(uh->uh_dport) == TFTP_SERVER && -+ ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { -+ m->m_data += iphlen; -+ m->m_len -= iphlen; -+ tftp_input(&lhost, m); -+ m->m_data -= iphlen; -+ m->m_len += iphlen; -+ goto bad; -+ } -+ -+ if (slirp->restricted) { -+ goto bad; -+ } -+ -+ /* -+ * Locate pcb for datagram. -+ */ -+ so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); -+ -+ if (so == NULL) { -+ /* -+ * If there's no socket for this packet, -+ * create one -+ */ -+ so = socreate(slirp); -+ if (udp_attach(so, AF_INET) == -1) { -+ DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); -+ sofree(so); -+ goto bad; -+ } -+ -+ /* -+ * Setup fields -+ */ -+ so->so_lfamily = AF_INET; -+ so->so_laddr = ip->ip_src; -+ so->so_lport = uh->uh_sport; -+ -+ if ((so->so_iptos = udp_tos(so)) == 0) -+ so->so_iptos = ip->ip_tos; -+ -+ /* -+ * XXXXX Here, check if it's in udpexec_list, -+ * and if it is, do the fork_exec() etc. -+ */ -+ } -+ -+ so->so_ffamily = AF_INET; -+ so->so_faddr = ip->ip_dst; /* XXX */ -+ so->so_fport = uh->uh_dport; /* XXX */ -+ -+ iphlen += sizeof(struct udphdr); -+ m->m_len -= iphlen; -+ m->m_data += iphlen; -+ -+ /* -+ * Now we sendto() the packet. -+ */ -+ if (sosendto(so, m) == -1) { -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ *ip = save_ip; -+ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); -+ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); -+ goto bad; -+ } -+ -+ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ -+ -+ /* restore the orig mbuf packet */ -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ *ip = save_ip; -+ so->so_m = m; /* ICMP backup */ -+ -+ return; -+bad: -+ m_free(m); -+} -+ -+int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, -+ struct sockaddr_in *daddr, int iptos) -+{ -+ register struct udpiphdr *ui; -+ int error = 0; -+ -+ DEBUG_CALL("udp_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); -+ DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); -+ -+ /* -+ * Adjust for header -+ */ -+ m->m_data -= sizeof(struct udpiphdr); -+ m->m_len += sizeof(struct udpiphdr); -+ -+ /* -+ * Fill in mbuf with extended UDP header -+ * and addresses and length put into network format. -+ */ -+ ui = mtod(m, struct udpiphdr *); -+ memset(&ui->ui_i.ih_mbuf, 0, sizeof(struct mbuf_ptr)); -+ ui->ui_x1 = 0; -+ ui->ui_pr = IPPROTO_UDP; -+ ui->ui_len = htons(m->m_len - sizeof(struct ip)); -+ /* XXXXX Check for from-one-location sockets, or from-any-location sockets -+ */ -+ ui->ui_src = saddr->sin_addr; -+ ui->ui_dst = daddr->sin_addr; -+ ui->ui_sport = saddr->sin_port; -+ ui->ui_dport = daddr->sin_port; -+ ui->ui_ulen = ui->ui_len; -+ -+ /* -+ * Stuff checksum and output datagram. -+ */ -+ ui->ui_sum = 0; -+ if ((ui->ui_sum = cksum(m, m->m_len)) == 0) -+ ui->ui_sum = 0xffff; -+ ((struct ip *)ui)->ip_len = m->m_len; -+ -+ ((struct ip *)ui)->ip_ttl = IPDEFTTL; -+ ((struct ip *)ui)->ip_tos = iptos; -+ -+ error = ip_output(so, m); -+ -+ return (error); -+} -+ -+int udp_attach(struct socket *so, unsigned short af) -+{ -+ so->s = slirp_socket(af, SOCK_DGRAM, 0); -+ if (so->s != -1) { -+ if (slirp_bind_outbound(so, af) != 0) { -+ // bind failed - close socket -+ closesocket(so->s); -+ so->s = -1; -+ return -1; -+ } -+ so->so_expire = curtime + SO_EXPIRE; -+ insque(so, &so->slirp->udb); -+ } -+ return (so->s); -+} -+ -+void udp_detach(struct socket *so) -+{ -+ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); -+ closesocket(so->s); -+ sofree(so); -+} -+ -+static const struct tos_t udptos[] = { { 0, 53, IPTOS_LOWDELAY, 0 }, /* DNS */ -+ { 0, 0, 0, 0 } }; -+ -+static uint8_t udp_tos(struct socket *so) -+{ -+ int i = 0; -+ -+ while (udptos[i].tos) { -+ if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || -+ (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { -+ if (so->slirp->enable_emu) -+ so->so_emu = udptos[i].emu; -+ return udptos[i].tos; -+ } -+ i++; -+ } -+ -+ return 0; -+} -+ -+struct socket *udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, -+ uint32_t laddr, unsigned lport, int flags) -+{ -+ /* TODO: IPv6 */ -+ struct sockaddr_in addr; -+ struct socket *so; -+ socklen_t addrlen = sizeof(struct sockaddr_in); -+ -+ memset(&addr, 0, sizeof(addr)); -+ so = socreate(slirp); -+ so->s = slirp_socket(AF_INET, SOCK_DGRAM, 0); -+ if (so->s < 0) { -+ sofree(so); -+ return NULL; -+ } -+ so->so_expire = curtime + SO_EXPIRE; -+ insque(so, &slirp->udb); -+ -+ addr.sin_family = AF_INET; -+ addr.sin_addr.s_addr = haddr; -+ addr.sin_port = hport; -+ -+ if (bind(so->s, (struct sockaddr *)&addr, addrlen) < 0) { -+ udp_detach(so); -+ return NULL; -+ } -+ slirp_socket_set_fast_reuse(so->s); -+ -+ getsockname(so->s, (struct sockaddr *)&addr, &addrlen); -+ so->fhost.sin = addr; -+ sotranslate_accept(so); -+ so->so_lfamily = AF_INET; -+ so->so_lport = lport; -+ so->so_laddr.s_addr = laddr; -+ if (flags != SS_FACCEPTONCE) -+ so->so_expire = 0; -+ -+ so->so_state &= SS_PERSISTENT_MASK; -+ so->so_state |= SS_ISFCONNECTED | flags; -+ -+ return so; -+} -diff --git a/slirp/src/udp.h b/slirp/src/udp.h -new file mode 100644 -index 0000000000..c3b83fdc56 ---- /dev/null -+++ b/slirp/src/udp.h -@@ -0,0 +1,90 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 1982, 1986, 1993 -+ * The Regents of the University of California. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * 3. Neither the name of the University nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -+ * SUCH DAMAGE. -+ * -+ * @(#)udp.h 8.1 (Berkeley) 6/10/93 -+ * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp -+ */ -+ -+#ifndef UDP_H -+#define UDP_H -+ -+#define UDP_TTL 0x60 -+#define UDP_UDPDATALEN 16192 -+ -+/* -+ * Udp protocol header. -+ * Per RFC 768, September, 1981. -+ */ -+struct udphdr { -+ uint16_t uh_sport; /* source port */ -+ uint16_t uh_dport; /* destination port */ -+ int16_t uh_ulen; /* udp length */ -+ uint16_t uh_sum; /* udp checksum */ -+}; -+ -+/* -+ * UDP kernel structures and variables. -+ */ -+struct udpiphdr { -+ struct ipovly ui_i; /* overlaid ip structure */ -+ struct udphdr ui_u; /* udp header */ -+}; -+#define ui_mbuf ui_i.ih_mbuf.mptr -+#define ui_x1 ui_i.ih_x1 -+#define ui_pr ui_i.ih_pr -+#define ui_len ui_i.ih_len -+#define ui_src ui_i.ih_src -+#define ui_dst ui_i.ih_dst -+#define ui_sport ui_u.uh_sport -+#define ui_dport ui_u.uh_dport -+#define ui_ulen ui_u.uh_ulen -+#define ui_sum ui_u.uh_sum -+ -+/* -+ * Names for UDP sysctl objects -+ */ -+#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ -+#define UDPCTL_MAXID 2 -+ -+struct mbuf; -+ -+void udp_init(Slirp *); -+void udp_cleanup(Slirp *); -+void udp_input(register struct mbuf *, int); -+int udp_attach(struct socket *, unsigned short af); -+void udp_detach(struct socket *); -+struct socket *udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); -+int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, -+ struct sockaddr_in *daddr, int iptos); -+ -+void udp6_input(register struct mbuf *); -+int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, -+ struct sockaddr_in6 *daddr); -+ -+#endif -diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c -new file mode 100644 -index 0000000000..6f9486bbca ---- /dev/null -+++ b/slirp/src/udp6.c -@@ -0,0 +1,173 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (c) 2013 -+ * Guillaume Subiron -+ */ -+ -+#include "slirp.h" -+#include "udp.h" -+#include "dhcpv6.h" -+ -+void udp6_input(struct mbuf *m) -+{ -+ Slirp *slirp = m->slirp; -+ struct ip6 *ip, save_ip; -+ struct udphdr *uh; -+ int iphlen = sizeof(struct ip6); -+ int len; -+ struct socket *so; -+ struct sockaddr_in6 lhost; -+ -+ DEBUG_CALL("udp6_input"); -+ DEBUG_ARG("m = %p", m); -+ -+ if (slirp->restricted) { -+ goto bad; -+ } -+ -+ ip = mtod(m, struct ip6 *); -+ m->m_len -= iphlen; -+ m->m_data += iphlen; -+ uh = mtod(m, struct udphdr *); -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ -+ if (ip6_cksum(m)) { -+ goto bad; -+ } -+ -+ len = ntohs((uint16_t)uh->uh_ulen); -+ -+ /* -+ * Make mbuf data length reflect UDP length. -+ * If not enough data to reflect UDP length, drop. -+ */ -+ if (ntohs(ip->ip_pl) != len) { -+ if (len > ntohs(ip->ip_pl)) { -+ goto bad; -+ } -+ m_adj(m, len - ntohs(ip->ip_pl)); -+ ip->ip_pl = htons(len); -+ } -+ -+ /* -+ * Save a copy of the IP header in case we want restore it -+ * for sending an ICMP error message in response. -+ */ -+ save_ip = *ip; -+ -+ /* Locate pcb for datagram. */ -+ lhost.sin6_family = AF_INET6; -+ lhost.sin6_addr = ip->ip_src; -+ lhost.sin6_port = uh->uh_sport; -+ -+ /* handle DHCPv6 */ -+ if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && -+ (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || -+ in6_dhcp_multicast(&ip->ip_dst))) { -+ m->m_data += iphlen; -+ m->m_len -= iphlen; -+ dhcpv6_input(&lhost, m); -+ m->m_data -= iphlen; -+ m->m_len += iphlen; -+ goto bad; -+ } -+ -+ /* handle TFTP */ -+ if (ntohs(uh->uh_dport) == TFTP_SERVER && -+ !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { -+ m->m_data += iphlen; -+ m->m_len -= iphlen; -+ tftp_input((struct sockaddr_storage *)&lhost, m); -+ m->m_data -= iphlen; -+ m->m_len += iphlen; -+ goto bad; -+ } -+ -+ so = solookup(&slirp->udp_last_so, &slirp->udb, -+ (struct sockaddr_storage *)&lhost, NULL); -+ -+ if (so == NULL) { -+ /* If there's no socket for this packet, create one. */ -+ so = socreate(slirp); -+ if (udp_attach(so, AF_INET6) == -1) { -+ DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); -+ sofree(so); -+ goto bad; -+ } -+ -+ /* Setup fields */ -+ so->so_lfamily = AF_INET6; -+ so->so_laddr6 = ip->ip_src; -+ so->so_lport6 = uh->uh_sport; -+ } -+ -+ so->so_ffamily = AF_INET6; -+ so->so_faddr6 = ip->ip_dst; /* XXX */ -+ so->so_fport6 = uh->uh_dport; /* XXX */ -+ -+ iphlen += sizeof(struct udphdr); -+ m->m_len -= iphlen; -+ m->m_data += iphlen; -+ -+ /* -+ * Now we sendto() the packet. -+ */ -+ if (sosendto(so, m) == -1) { -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ *ip = save_ip; -+ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); -+ icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); -+ goto bad; -+ } -+ -+ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ -+ -+ /* restore the orig mbuf packet */ -+ m->m_len += iphlen; -+ m->m_data -= iphlen; -+ *ip = save_ip; -+ so->so_m = m; -+ -+ return; -+bad: -+ m_free(m); -+} -+ -+int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, -+ struct sockaddr_in6 *daddr) -+{ -+ struct ip6 *ip; -+ struct udphdr *uh; -+ -+ DEBUG_CALL("udp6_output"); -+ DEBUG_ARG("so = %p", so); -+ DEBUG_ARG("m = %p", m); -+ -+ /* adjust for header */ -+ m->m_data -= sizeof(struct udphdr); -+ m->m_len += sizeof(struct udphdr); -+ uh = mtod(m, struct udphdr *); -+ m->m_data -= sizeof(struct ip6); -+ m->m_len += sizeof(struct ip6); -+ ip = mtod(m, struct ip6 *); -+ -+ /* Build IP header */ -+ ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); -+ ip->ip_nh = IPPROTO_UDP; -+ ip->ip_src = saddr->sin6_addr; -+ ip->ip_dst = daddr->sin6_addr; -+ -+ /* Build UDP header */ -+ uh->uh_sport = saddr->sin6_port; -+ uh->uh_dport = daddr->sin6_port; -+ uh->uh_ulen = ip->ip_pl; -+ uh->uh_sum = 0; -+ uh->uh_sum = ip6_cksum(m); -+ if (uh->uh_sum == 0) { -+ uh->uh_sum = 0xffff; -+ } -+ -+ return ip6_output(so, m, 0); -+} -diff --git a/slirp/src/util.c b/slirp/src/util.c -new file mode 100644 -index 0000000000..d3ed5faf8b ---- /dev/null -+++ b/slirp/src/util.c -@@ -0,0 +1,428 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * util.c (mostly based on QEMU os-win32.c) -+ * -+ * Copyright (c) 2003-2008 Fabrice Bellard -+ * Copyright (c) 2010-2016 Red Hat, Inc. -+ * -+ * QEMU library functions for win32 which are shared between QEMU and -+ * the QEMU tools. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#include "util.h" -+ -+#include -+#include -+#include -+ -+#if defined(_WIN32) -+int slirp_inet_aton(const char *cp, struct in_addr *ia) -+{ -+ uint32_t addr = inet_addr(cp); -+ if (addr == 0xffffffff) { -+ return 0; -+ } -+ ia->s_addr = addr; -+ return 1; -+} -+#endif -+ -+void slirp_set_nonblock(int fd) -+{ -+#ifndef _WIN32 -+ int f; -+ f = fcntl(fd, F_GETFL); -+ assert(f != -1); -+ f = fcntl(fd, F_SETFL, f | O_NONBLOCK); -+ assert(f != -1); -+#else -+ unsigned long opt = 1; -+ ioctlsocket(fd, FIONBIO, &opt); -+#endif -+} -+ -+static void slirp_set_cloexec(int fd) -+{ -+#ifndef _WIN32 -+ int f; -+ f = fcntl(fd, F_GETFD); -+ assert(f != -1); -+ f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); -+ assert(f != -1); -+#endif -+} -+ -+/* -+ * Opens a socket with FD_CLOEXEC set -+ */ -+int slirp_socket(int domain, int type, int protocol) -+{ -+ int ret; -+ -+#ifdef SOCK_CLOEXEC -+ ret = socket(domain, type | SOCK_CLOEXEC, protocol); -+ if (ret != -1 || errno != EINVAL) { -+ return ret; -+ } -+#endif -+ ret = socket(domain, type, protocol); -+ if (ret >= 0) { -+ slirp_set_cloexec(ret); -+ } -+ -+ return ret; -+} -+ -+#ifdef _WIN32 -+static int socket_error(void) -+{ -+ switch (WSAGetLastError()) { -+ case 0: -+ return 0; -+ case WSAEINTR: -+ return EINTR; -+ case WSAEINVAL: -+ return EINVAL; -+ case WSA_INVALID_HANDLE: -+ return EBADF; -+ case WSA_NOT_ENOUGH_MEMORY: -+ return ENOMEM; -+ case WSA_INVALID_PARAMETER: -+ return EINVAL; -+ case WSAENAMETOOLONG: -+ return ENAMETOOLONG; -+ case WSAENOTEMPTY: -+ return ENOTEMPTY; -+ case WSAEWOULDBLOCK: -+ /* not using EWOULDBLOCK as we don't want code to have -+ * to check both EWOULDBLOCK and EAGAIN */ -+ return EAGAIN; -+ case WSAEINPROGRESS: -+ return EINPROGRESS; -+ case WSAEALREADY: -+ return EALREADY; -+ case WSAENOTSOCK: -+ return ENOTSOCK; -+ case WSAEDESTADDRREQ: -+ return EDESTADDRREQ; -+ case WSAEMSGSIZE: -+ return EMSGSIZE; -+ case WSAEPROTOTYPE: -+ return EPROTOTYPE; -+ case WSAENOPROTOOPT: -+ return ENOPROTOOPT; -+ case WSAEPROTONOSUPPORT: -+ return EPROTONOSUPPORT; -+ case WSAEOPNOTSUPP: -+ return EOPNOTSUPP; -+ case WSAEAFNOSUPPORT: -+ return EAFNOSUPPORT; -+ case WSAEADDRINUSE: -+ return EADDRINUSE; -+ case WSAEADDRNOTAVAIL: -+ return EADDRNOTAVAIL; -+ case WSAENETDOWN: -+ return ENETDOWN; -+ case WSAENETUNREACH: -+ return ENETUNREACH; -+ case WSAENETRESET: -+ return ENETRESET; -+ case WSAECONNABORTED: -+ return ECONNABORTED; -+ case WSAECONNRESET: -+ return ECONNRESET; -+ case WSAENOBUFS: -+ return ENOBUFS; -+ case WSAEISCONN: -+ return EISCONN; -+ case WSAENOTCONN: -+ return ENOTCONN; -+ case WSAETIMEDOUT: -+ return ETIMEDOUT; -+ case WSAECONNREFUSED: -+ return ECONNREFUSED; -+ case WSAELOOP: -+ return ELOOP; -+ case WSAEHOSTUNREACH: -+ return EHOSTUNREACH; -+ default: -+ return EIO; -+ } -+} -+ -+#undef ioctlsocket -+int slirp_ioctlsocket_wrap(int fd, int req, void *val) -+{ -+ int ret; -+ ret = ioctlsocket(fd, req, val); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef closesocket -+int slirp_closesocket_wrap(int fd) -+{ -+ int ret; -+ ret = closesocket(fd); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef connect -+int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) -+{ -+ int ret; -+ ret = connect(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef listen -+int slirp_listen_wrap(int sockfd, int backlog) -+{ -+ int ret; -+ ret = listen(sockfd, backlog); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef bind -+int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) -+{ -+ int ret; -+ ret = bind(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef socket -+int slirp_socket_wrap(int domain, int type, int protocol) -+{ -+ int ret; -+ ret = socket(domain, type, protocol); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef accept -+int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) -+{ -+ int ret; -+ ret = accept(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef shutdown -+int slirp_shutdown_wrap(int sockfd, int how) -+{ -+ int ret; -+ ret = shutdown(sockfd, how); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef getsockopt -+int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, -+ int *optlen) -+{ -+ int ret; -+ ret = getsockopt(sockfd, level, optname, optval, optlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef setsockopt -+int slirp_setsockopt_wrap(int sockfd, int level, int optname, -+ const void *optval, int optlen) -+{ -+ int ret; -+ ret = setsockopt(sockfd, level, optname, optval, optlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef getpeername -+int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, int *addrlen) -+{ -+ int ret; -+ ret = getpeername(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef getsockname -+int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, int *addrlen) -+{ -+ int ret; -+ ret = getsockname(sockfd, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef send -+ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) -+{ -+ int ret; -+ ret = send(sockfd, buf, len, flags); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef sendto -+ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, -+ const struct sockaddr *addr, int addrlen) -+{ -+ int ret; -+ ret = sendto(sockfd, buf, len, flags, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef recv -+ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) -+{ -+ int ret; -+ ret = recv(sockfd, buf, len, flags); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+ -+#undef recvfrom -+ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, -+ struct sockaddr *addr, int *addrlen) -+{ -+ int ret; -+ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); -+ if (ret < 0) { -+ errno = socket_error(); -+ } -+ return ret; -+} -+#endif /* WIN32 */ -+ -+void slirp_pstrcpy(char *buf, int buf_size, const char *str) -+{ -+ int c; -+ char *q = buf; -+ -+ if (buf_size <= 0) -+ return; -+ -+ for (;;) { -+ c = *str++; -+ if (c == 0 || q >= buf + buf_size - 1) -+ break; -+ *q++ = c; -+ } -+ *q = '\0'; -+} -+ -+static int slirp_vsnprintf(char *str, size_t size, -+ const char *format, va_list args) -+{ -+ int rv = g_vsnprintf(str, size, format, args); -+ -+ if (rv < 0) { -+ g_error("g_vsnprintf() failed: %s", g_strerror(errno)); -+ } -+ -+ return rv; -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - returns the number of bytes written (excluding optional \0-ending) -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv >= size) { -+ g_critical("slirp_fmt() truncation"); -+ } -+ -+ return MIN(rv, size); -+} -+ -+/* -+ * A snprintf()-like function that: -+ * - always \0-end (unless size == 0) -+ * - returns the number of bytes actually written, including \0 ending -+ * - dies on error -+ * - warn on truncation -+ */ -+int slirp_fmt0(char *str, size_t size, const char *format, ...) -+{ -+ va_list args; -+ int rv; -+ -+ va_start(args, format); -+ rv = slirp_vsnprintf(str, size, format, args); -+ va_end(args); -+ -+ if (rv >= size) { -+ g_critical("slirp_fmt0() truncation"); -+ if (size > 0) -+ str[size - 1] = '\0'; -+ rv = size; -+ } else { -+ rv += 1; /* include \0 */ -+ } -+ -+ return rv; -+} -diff --git a/slirp/src/util.h b/slirp/src/util.h -new file mode 100644 -index 0000000000..d67b3d0de9 ---- /dev/null -+++ b/slirp/src/util.h -@@ -0,0 +1,189 @@ -+/* SPDX-License-Identifier: MIT */ -+/* -+ * Copyright (c) 2003-2008 Fabrice Bellard -+ * Copyright (c) 2010-2019 Red Hat, Inc. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+#ifndef UTIL_H_ -+#define UTIL_H_ -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef _WIN32 -+#include -+#include -+#else -+#include -+#include -+#include -+#endif -+ -+#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) -+#define SLIRP_PACKED __attribute__((gcc_struct, packed)) -+#else -+#define SLIRP_PACKED __attribute__((packed)) -+#endif -+ -+#ifndef DIV_ROUND_UP -+#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) -+#endif -+ -+#ifndef container_of -+#define container_of(ptr, type, member) \ -+ __extension__({ \ -+ void *__mptr = (void *)(ptr); \ -+ ((type *)(__mptr - offsetof(type, member))); \ -+ }) -+#endif -+ -+#ifndef G_SIZEOF_MEMBER -+#define G_SIZEOF_MEMBER(type, member) sizeof(((type *)0)->member) -+#endif -+ -+#if defined(_WIN32) /* CONFIG_IOVEC */ -+#if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ -+struct iovec { -+ void *iov_base; -+ size_t iov_len; -+}; -+#endif -+#else -+#include -+#endif -+ -+#define stringify(s) tostring(s) -+#define tostring(s) #s -+ -+#define SCALE_MS 1000000 -+ -+#define ETH_ALEN 6 -+#define ETH_HLEN 14 -+#define ETH_P_IP (0x0800) /* Internet Protocol packet */ -+#define ETH_P_ARP (0x0806) /* Address Resolution packet */ -+#define ETH_P_IPV6 (0x86dd) -+#define ETH_P_VLAN (0x8100) -+#define ETH_P_DVLAN (0x88a8) -+#define ETH_P_NCSI (0x88f8) -+#define ETH_P_UNKNOWN (0xffff) -+ -+/* FIXME: remove me when made standalone */ -+#ifdef _WIN32 -+#undef accept -+#undef bind -+#undef closesocket -+#undef connect -+#undef getpeername -+#undef getsockname -+#undef getsockopt -+#undef ioctlsocket -+#undef listen -+#undef recv -+#undef recvfrom -+#undef send -+#undef sendto -+#undef setsockopt -+#undef shutdown -+#undef socket -+#endif -+ -+#ifdef _WIN32 -+#define connect slirp_connect_wrap -+int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); -+#define listen slirp_listen_wrap -+int slirp_listen_wrap(int fd, int backlog); -+#define bind slirp_bind_wrap -+int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); -+#define socket slirp_socket_wrap -+int slirp_socket_wrap(int domain, int type, int protocol); -+#define accept slirp_accept_wrap -+int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); -+#define shutdown slirp_shutdown_wrap -+int slirp_shutdown_wrap(int fd, int how); -+#define getpeername slirp_getpeername_wrap -+int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); -+#define getsockname slirp_getsockname_wrap -+int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); -+#define send slirp_send_wrap -+ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); -+#define sendto slirp_sendto_wrap -+ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, -+ const struct sockaddr *dest_addr, int addrlen); -+#define recv slirp_recv_wrap -+ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); -+#define recvfrom slirp_recvfrom_wrap -+ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, -+ struct sockaddr *src_addr, int *addrlen); -+#define closesocket slirp_closesocket_wrap -+int slirp_closesocket_wrap(int fd); -+#define ioctlsocket slirp_ioctlsocket_wrap -+int slirp_ioctlsocket_wrap(int fd, int req, void *val); -+#define getsockopt slirp_getsockopt_wrap -+int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, -+ int *optlen); -+#define setsockopt slirp_setsockopt_wrap -+int slirp_setsockopt_wrap(int sockfd, int level, int optname, -+ const void *optval, int optlen); -+#define inet_aton slirp_inet_aton -+int slirp_inet_aton(const char *cp, struct in_addr *ia); -+#else -+#define closesocket(s) close(s) -+#define ioctlsocket(s, r, v) ioctl(s, r, v) -+#endif -+ -+int slirp_socket(int domain, int type, int protocol); -+void slirp_set_nonblock(int fd); -+ -+static inline int slirp_socket_set_nodelay(int fd) -+{ -+ int v = 1; -+ return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); -+} -+ -+static inline int slirp_socket_set_fast_reuse(int fd) -+{ -+#ifndef _WIN32 -+ int v = 1; -+ return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); -+#else -+ /* Enabling the reuse of an endpoint that was used by a socket still in -+ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows -+ * fast reuse is the default and SO_REUSEADDR does strange things. So we -+ * don't have to do anything here. More info can be found at: -+ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ -+ return 0; -+#endif -+} -+ -+void slirp_pstrcpy(char *buf, int buf_size, const char *str); -+ -+int slirp_fmt(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); -+int slirp_fmt0(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); -+ -+#endif -diff --git a/slirp/src/version.c b/slirp/src/version.c -new file mode 100644 -index 0000000000..93e0be9c24 ---- /dev/null -+++ b/slirp/src/version.c -@@ -0,0 +1,8 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+#include "libslirp.h" -+ -+const char * -+slirp_version_string(void) -+{ -+ return SLIRP_VERSION_STRING; -+} -diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c -new file mode 100644 -index 0000000000..68cc1729c5 ---- /dev/null -+++ b/slirp/src/vmstate.c -@@ -0,0 +1,444 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * VMState interpreter -+ * -+ * Copyright (c) 2009-2018 Red Hat Inc -+ * -+ * Authors: -+ * Juan Quintela -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include -+#include -+#include -+#include -+ -+#include "stream.h" -+#include "vmstate.h" -+ -+static int get_nullptr(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { -+ return 0; -+ } -+ g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); -+ return -EINVAL; -+} -+ -+static int put_nullptr(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+ -+{ -+ if (pv == NULL) { -+ slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); -+ return 0; -+ } -+ g_warning("vmstate: put_nullptr must be called with pv == NULL"); -+ return -EINVAL; -+} -+ -+const VMStateInfo slirp_vmstate_info_nullptr = { -+ .name = "uint64", -+ .get = get_nullptr, -+ .put = put_nullptr, -+}; -+ -+/* 8 bit unsigned int */ -+ -+static int get_uint8(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint8_t *v = pv; -+ *v = slirp_istream_read_u8(f); -+ return 0; -+} -+ -+static int put_uint8(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint8_t *v = pv; -+ slirp_ostream_write_u8(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_uint8 = { -+ .name = "uint8", -+ .get = get_uint8, -+ .put = put_uint8, -+}; -+ -+/* 16 bit unsigned int */ -+ -+static int get_uint16(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint16_t *v = pv; -+ *v = slirp_istream_read_u16(f); -+ return 0; -+} -+ -+static int put_uint16(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint16_t *v = pv; -+ slirp_ostream_write_u16(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_uint16 = { -+ .name = "uint16", -+ .get = get_uint16, -+ .put = put_uint16, -+}; -+ -+/* 32 bit unsigned int */ -+ -+static int get_uint32(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint32_t *v = pv; -+ *v = slirp_istream_read_u32(f); -+ return 0; -+} -+ -+static int put_uint32(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ uint32_t *v = pv; -+ slirp_ostream_write_u32(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_uint32 = { -+ .name = "uint32", -+ .get = get_uint32, -+ .put = put_uint32, -+}; -+ -+/* 16 bit int */ -+ -+static int get_int16(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int16_t *v = pv; -+ *v = slirp_istream_read_i16(f); -+ return 0; -+} -+ -+static int put_int16(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int16_t *v = pv; -+ slirp_ostream_write_i16(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_int16 = { -+ .name = "int16", -+ .get = get_int16, -+ .put = put_int16, -+}; -+ -+/* 32 bit int */ -+ -+static int get_int32(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int32_t *v = pv; -+ *v = slirp_istream_read_i32(f); -+ return 0; -+} -+ -+static int put_int32(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int32_t *v = pv; -+ slirp_ostream_write_i32(f, *v); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_int32 = { -+ .name = "int32", -+ .get = get_int32, -+ .put = put_int32, -+}; -+ -+/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate -+ * a temporary buffer and the pre_load/pre_save methods in the child vmsd -+ * copy stuff from the parent into the child and do calculations to fill -+ * in fields that don't really exist in the parent but need to be in the -+ * stream. -+ */ -+static int get_tmp(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ int ret; -+ const VMStateDescription *vmsd = field->vmsd; -+ int version_id = field->version_id; -+ void *tmp = g_malloc(size); -+ -+ /* Writes the parent field which is at the start of the tmp */ -+ *(void **)tmp = pv; -+ ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); -+ g_free(tmp); -+ return ret; -+} -+ -+static int put_tmp(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ const VMStateDescription *vmsd = field->vmsd; -+ void *tmp = g_malloc(size); -+ int ret; -+ -+ /* Writes the parent field which is at the start of the tmp */ -+ *(void **)tmp = pv; -+ ret = slirp_vmstate_save_state(f, vmsd, tmp); -+ g_free(tmp); -+ -+ return ret; -+} -+ -+const VMStateInfo slirp_vmstate_info_tmp = { -+ .name = "tmp", -+ .get = get_tmp, -+ .put = put_tmp, -+}; -+ -+/* uint8_t buffers */ -+ -+static int get_buffer(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ slirp_istream_read(f, pv, size); -+ return 0; -+} -+ -+static int put_buffer(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field) -+{ -+ slirp_ostream_write(f, pv, size); -+ return 0; -+} -+ -+const VMStateInfo slirp_vmstate_info_buffer = { -+ .name = "buffer", -+ .get = get_buffer, -+ .put = put_buffer, -+}; -+ -+static int vmstate_n_elems(void *opaque, const VMStateField *field) -+{ -+ int n_elems = 1; -+ -+ if (field->flags & VMS_ARRAY) { -+ n_elems = field->num; -+ } else if (field->flags & VMS_VARRAY_INT32) { -+ n_elems = *(int32_t *)(opaque + field->num_offset); -+ } else if (field->flags & VMS_VARRAY_UINT32) { -+ n_elems = *(uint32_t *)(opaque + field->num_offset); -+ } else if (field->flags & VMS_VARRAY_UINT16) { -+ n_elems = *(uint16_t *)(opaque + field->num_offset); -+ } else if (field->flags & VMS_VARRAY_UINT8) { -+ n_elems = *(uint8_t *)(opaque + field->num_offset); -+ } -+ -+ if (field->flags & VMS_MULTIPLY_ELEMENTS) { -+ n_elems *= field->num; -+ } -+ -+ return n_elems; -+} -+ -+static int vmstate_size(void *opaque, const VMStateField *field) -+{ -+ int size = field->size; -+ -+ if (field->flags & VMS_VBUFFER) { -+ size = *(int32_t *)(opaque + field->size_offset); -+ if (field->flags & VMS_MULTIPLY) { -+ size *= field->size; -+ } -+ } -+ -+ return size; -+} -+ -+static int vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, -+ void *opaque, int version_id) -+{ -+ int ret = 0; -+ const VMStateField *field = vmsd->fields; -+ -+ if (vmsd->pre_save) { -+ ret = vmsd->pre_save(opaque); -+ if (ret) { -+ g_warning("pre-save failed: %s", vmsd->name); -+ return ret; -+ } -+ } -+ -+ while (field->name) { -+ if ((field->field_exists && field->field_exists(opaque, version_id)) || -+ (!field->field_exists && field->version_id <= version_id)) { -+ void *first_elem = opaque + field->offset; -+ int i, n_elems = vmstate_n_elems(opaque, field); -+ int size = vmstate_size(opaque, field); -+ -+ if (field->flags & VMS_POINTER) { -+ first_elem = *(void **)first_elem; -+ assert(first_elem || !n_elems || !size); -+ } -+ for (i = 0; i < n_elems; i++) { -+ void *curr_elem = first_elem + size * i; -+ -+ if (field->flags & VMS_ARRAY_OF_POINTER) { -+ assert(curr_elem); -+ curr_elem = *(void **)curr_elem; -+ } -+ if (!curr_elem && size) { -+ /* if null pointer write placeholder and do not follow */ -+ assert(field->flags & VMS_ARRAY_OF_POINTER); -+ ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, -+ NULL); -+ } else if (field->flags & VMS_STRUCT) { -+ ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); -+ } else if (field->flags & VMS_VSTRUCT) { -+ ret = vmstate_save_state_v(f, field->vmsd, curr_elem, -+ field->struct_version_id); -+ } else { -+ ret = field->info->put(f, curr_elem, size, field); -+ } -+ if (ret) { -+ g_warning("Save of field %s/%s failed", vmsd->name, -+ field->name); -+ return ret; -+ } -+ } -+ } else { -+ if (field->flags & VMS_MUST_EXIST) { -+ g_warning("Output state validation failed: %s/%s", vmsd->name, -+ field->name); -+ assert(!(field->flags & VMS_MUST_EXIST)); -+ } -+ } -+ field++; -+ } -+ -+ return 0; -+} -+ -+int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, -+ void *opaque) -+{ -+ return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); -+} -+ -+static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) -+{ -+ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { -+ size_t size = vmstate_size(opaque, field); -+ size *= vmstate_n_elems(opaque, field); -+ if (size) { -+ *(void **)ptr = g_malloc(size); -+ } -+ } -+} -+ -+int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, -+ void *opaque, int version_id) -+{ -+ VMStateField *field = vmsd->fields; -+ int ret = 0; -+ -+ if (version_id > vmsd->version_id) { -+ g_warning("%s: incoming version_id %d is too new " -+ "for local version_id %d", -+ vmsd->name, version_id, vmsd->version_id); -+ return -EINVAL; -+ } -+ if (vmsd->pre_load) { -+ int ret = vmsd->pre_load(opaque); -+ if (ret) { -+ return ret; -+ } -+ } -+ while (field->name) { -+ if ((field->field_exists && field->field_exists(opaque, version_id)) || -+ (!field->field_exists && field->version_id <= version_id)) { -+ void *first_elem = opaque + field->offset; -+ int i, n_elems = vmstate_n_elems(opaque, field); -+ int size = vmstate_size(opaque, field); -+ -+ vmstate_handle_alloc(first_elem, field, opaque); -+ if (field->flags & VMS_POINTER) { -+ first_elem = *(void **)first_elem; -+ assert(first_elem || !n_elems || !size); -+ } -+ for (i = 0; i < n_elems; i++) { -+ void *curr_elem = first_elem + size * i; -+ -+ if (field->flags & VMS_ARRAY_OF_POINTER) { -+ curr_elem = *(void **)curr_elem; -+ } -+ if (!curr_elem && size) { -+ /* if null pointer check placeholder and do not follow */ -+ assert(field->flags & VMS_ARRAY_OF_POINTER); -+ ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, -+ NULL); -+ } else if (field->flags & VMS_STRUCT) { -+ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, -+ field->vmsd->version_id); -+ } else if (field->flags & VMS_VSTRUCT) { -+ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, -+ field->struct_version_id); -+ } else { -+ ret = field->info->get(f, curr_elem, size, field); -+ } -+ if (ret < 0) { -+ g_warning("Failed to load %s:%s", vmsd->name, field->name); -+ return ret; -+ } -+ } -+ } else if (field->flags & VMS_MUST_EXIST) { -+ g_warning("Input validation failed: %s/%s", vmsd->name, -+ field->name); -+ return -1; -+ } -+ field++; -+ } -+ if (vmsd->post_load) { -+ ret = vmsd->post_load(opaque, version_id); -+ } -+ return ret; -+} -diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h -new file mode 100644 -index 0000000000..94c6a4bc7b ---- /dev/null -+++ b/slirp/src/vmstate.h -@@ -0,0 +1,391 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * QEMU migration/snapshot declarations -+ * -+ * Copyright (c) 2009-2011 Red Hat, Inc. -+ * -+ * Original author: Juan Quintela -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials provided -+ * with the distribution. -+ * -+ * 3. Neither the name of the copyright holder nor the names of its -+ * contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#ifndef VMSTATE_H_ -+#define VMSTATE_H_ -+ -+#include -+#include -+#include -+#include "slirp.h" -+#include "stream.h" -+ -+#define stringify(s) tostring(s) -+#define tostring(s) #s -+ -+typedef struct VMStateInfo VMStateInfo; -+typedef struct VMStateDescription VMStateDescription; -+typedef struct VMStateField VMStateField; -+ -+int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, -+ void *opaque); -+int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, -+ void *opaque, int version_id); -+ -+/* VMStateInfo allows customized migration of objects that don't fit in -+ * any category in VMStateFlags. Additional information is always passed -+ * into get and put in terms of field and vmdesc parameters. However -+ * these two parameters should only be used in cases when customized -+ * handling is needed, such as QTAILQ. For primitive data types such as -+ * integer, field and vmdesc parameters should be ignored inside get/put. -+ */ -+struct VMStateInfo { -+ const char *name; -+ int (*get)(SlirpIStream *f, void *pv, size_t size, -+ const VMStateField *field); -+ int (*put)(SlirpOStream *f, void *pv, size_t size, -+ const VMStateField *field); -+}; -+ -+enum VMStateFlags { -+ /* Ignored */ -+ VMS_SINGLE = 0x001, -+ -+ /* The struct member at opaque + VMStateField.offset is a pointer -+ * to the actual field (e.g. struct a { uint8_t *b; -+ * }). Dereference the pointer before using it as basis for -+ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not -+ * affect the meaning of VMStateField.num_offset or -+ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for -+ * those. */ -+ VMS_POINTER = 0x002, -+ -+ /* The field is an array of fixed size. VMStateField.num contains -+ * the number of entries in the array. The size of each entry is -+ * given by VMStateField.size and / or opaque + -+ * VMStateField.size_offset; see VMS_VBUFFER and -+ * VMS_MULTIPLY. Each array entry will be processed individually -+ * (VMStateField.info.get()/put() if VMS_STRUCT is not set, -+ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not -+ * be combined with VMS_VARRAY*. */ -+ VMS_ARRAY = 0x004, -+ -+ /* The field is itself a struct, containing one or more -+ * fields. Recurse into VMStateField.vmsd. Most useful in -+ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each -+ * array entry. */ -+ VMS_STRUCT = 0x008, -+ -+ /* The field is an array of variable size. The int32_t at opaque + -+ * VMStateField.num_offset contains the number of entries in the -+ * array. See the VMS_ARRAY description regarding array handling -+ * in general. May not be combined with VMS_ARRAY or any other -+ * VMS_VARRAY*. */ -+ VMS_VARRAY_INT32 = 0x010, -+ -+ /* Ignored */ -+ VMS_BUFFER = 0x020, -+ -+ /* The field is a (fixed-size or variable-size) array of pointers -+ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry -+ * before using it. Note: Does not imply any one of VMS_ARRAY / -+ * VMS_VARRAY*; these need to be set explicitly. */ -+ VMS_ARRAY_OF_POINTER = 0x040, -+ -+ /* The field is an array of variable size. The uint16_t at opaque -+ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) -+ * contains the number of entries in the array. See the VMS_ARRAY -+ * description regarding array handling in general. May not be -+ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ -+ VMS_VARRAY_UINT16 = 0x080, -+ -+ /* The size of the individual entries (a single array entry if -+ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if -+ * neither is set) is variable (i.e. not known at compile-time), -+ * but the same for all entries. Use the int32_t at opaque + -+ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine -+ * the size of each (and every) entry. */ -+ VMS_VBUFFER = 0x100, -+ -+ /* Multiply the entry size given by the int32_t at opaque + -+ * VMStateField.size_offset (see VMS_VBUFFER description) with -+ * VMStateField.size to determine the number of bytes to be -+ * allocated. Only valid in combination with VMS_VBUFFER. */ -+ VMS_MULTIPLY = 0x200, -+ -+ /* The field is an array of variable size. The uint8_t at opaque + -+ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) -+ * contains the number of entries in the array. See the VMS_ARRAY -+ * description regarding array handling in general. May not be -+ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ -+ VMS_VARRAY_UINT8 = 0x400, -+ -+ /* The field is an array of variable size. The uint32_t at opaque -+ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) -+ * contains the number of entries in the array. See the VMS_ARRAY -+ * description regarding array handling in general. May not be -+ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ -+ VMS_VARRAY_UINT32 = 0x800, -+ -+ /* Fail loading the serialised VM state if this field is missing -+ * from the input. */ -+ VMS_MUST_EXIST = 0x1000, -+ -+ /* When loading serialised VM state, allocate memory for the -+ * (entire) field. Only valid in combination with -+ * VMS_POINTER. Note: Not all combinations with other flags are -+ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't -+ * cause the individual entries to be allocated. */ -+ VMS_ALLOC = 0x2000, -+ -+ /* Multiply the number of entries given by the integer at opaque + -+ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num -+ * to determine the number of entries in the array. Only valid in -+ * combination with one of VMS_VARRAY*. */ -+ VMS_MULTIPLY_ELEMENTS = 0x4000, -+ -+ /* A structure field that is like VMS_STRUCT, but uses -+ * VMStateField.struct_version_id to tell which version of the -+ * structure we are referencing to use. */ -+ VMS_VSTRUCT = 0x8000, -+}; -+ -+struct VMStateField { -+ const char *name; -+ size_t offset; -+ size_t size; -+ size_t start; -+ int num; -+ size_t num_offset; -+ size_t size_offset; -+ const VMStateInfo *info; -+ enum VMStateFlags flags; -+ const VMStateDescription *vmsd; -+ int version_id; -+ int struct_version_id; -+ bool (*field_exists)(void *opaque, int version_id); -+}; -+ -+struct VMStateDescription { -+ const char *name; -+ int version_id; -+ int (*pre_load)(void *opaque); -+ int (*post_load)(void *opaque, int version_id); -+ int (*pre_save)(void *opaque); -+ VMStateField *fields; -+}; -+ -+ -+extern const VMStateInfo slirp_vmstate_info_int16; -+extern const VMStateInfo slirp_vmstate_info_int32; -+extern const VMStateInfo slirp_vmstate_info_uint8; -+extern const VMStateInfo slirp_vmstate_info_uint16; -+extern const VMStateInfo slirp_vmstate_info_uint32; -+ -+/** Put this in the stream when migrating a null pointer.*/ -+#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ -+extern const VMStateInfo slirp_vmstate_info_nullptr; -+ -+extern const VMStateInfo slirp_vmstate_info_buffer; -+extern const VMStateInfo slirp_vmstate_info_tmp; -+ -+#define type_check_array(t1, t2, n) ((t1(*)[n])0 - (t2 *)0) -+#define type_check_pointer(t1, t2) ((t1 **)0 - (t2 *)0) -+#define typeof_field(type, field) typeof(((type *)0)->field) -+#define type_check(t1, t2) ((t1 *)0 - (t2 *)0) -+ -+#define vmstate_offset_value(_state, _field, _type) \ -+ (offsetof(_state, _field) + type_check(_type, typeof_field(_state, _field))) -+ -+#define vmstate_offset_pointer(_state, _field, _type) \ -+ (offsetof(_state, _field) + \ -+ type_check_pointer(_type, typeof_field(_state, _field))) -+ -+#define vmstate_offset_array(_state, _field, _type, _num) \ -+ (offsetof(_state, _field) + \ -+ type_check_array(_type, typeof_field(_state, _field), _num)) -+ -+#define vmstate_offset_buffer(_state, _field) \ -+ vmstate_offset_array(_state, _field, uint8_t, \ -+ sizeof(typeof_field(_state, _field))) -+ -+/* In the macros below, if there is a _version, that means the macro's -+ * field will be processed only if the version being received is >= -+ * the _version specified. In general, if you add a new field, you -+ * would increment the structure's version and put that version -+ * number into the new field so it would only be processed with the -+ * new version. -+ * -+ * In particular, for VMSTATE_STRUCT() and friends the _version does -+ * *NOT* pick the version of the sub-structure. It works just as -+ * specified above. The version of the top-level structure received -+ * is passed down to all sub-structures. This means that the -+ * sub-structures must have version that are compatible with all the -+ * structures that use them. -+ * -+ * If you want to specify the version of the sub-structure, use -+ * VMSTATE_VSTRUCT(), which allows the specific sub-structure version -+ * to be directly specified. -+ */ -+ -+#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .field_exists = (_test), .size = sizeof(_type), .info = &(_info), \ -+ .flags = VMS_SINGLE, \ -+ .offset = vmstate_offset_value(_state, _field, _type), \ -+ } -+ -+#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), .num = (_num), \ -+ .info = &(_info), .size = sizeof(_type), .flags = VMS_ARRAY, \ -+ .offset = vmstate_offset_array(_state, _field, _type, _num), \ -+ } -+ -+#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .field_exists = (_test), .vmsd = &(_vmsd), .size = sizeof(_type), \ -+ .flags = VMS_STRUCT, \ -+ .offset = vmstate_offset_value(_state, _field, _type), \ -+ } -+ -+#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .vmsd = &(_vmsd), .size = sizeof(_type *), \ -+ .flags = VMS_STRUCT | VMS_POINTER, \ -+ .offset = vmstate_offset_pointer(_state, _field, _type), \ -+ } -+ -+#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, \ -+ _vmsd, _type) \ -+ { \ -+ .name = (stringify(_field)), .num = (_num), .field_exists = (_test), \ -+ .version_id = (_version), .vmsd = &(_vmsd), .size = sizeof(_type), \ -+ .flags = VMS_STRUCT | VMS_ARRAY, \ -+ .offset = vmstate_offset_array(_state, _field, _type, _num), \ -+ } -+ -+#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .field_exists = (_test), .size = (_size - _start), \ -+ .info = &slirp_vmstate_info_buffer, .flags = VMS_BUFFER, \ -+ .offset = vmstate_offset_buffer(_state, _field) + _start, \ -+ } -+ -+#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) \ -+ { \ -+ .name = (stringify(_field)), .version_id = (_version), \ -+ .field_exists = (_test), \ -+ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t), \ -+ .info = &slirp_vmstate_info_buffer, \ -+ .flags = VMS_VBUFFER | VMS_POINTER, \ -+ .offset = offsetof(_state, _field), \ -+ } -+ -+#define QEMU_BUILD_BUG_ON_STRUCT(x) \ -+ struct { \ -+ int : (x) ? -1 : 1; \ -+ } -+ -+#define QEMU_BUILD_BUG_ON_ZERO(x) \ -+ (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) -+ -+/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state -+ * and execute the vmsd on the temporary. Note that we're working with -+ * the whole of _state here, not a field within it. -+ * We compile time check that: -+ * That _tmp_type contains a 'parent' member that's a pointer to the -+ * '_state' type -+ * That the pointer is right at the start of _tmp_type. -+ */ -+#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \ -+ { \ -+ .name = "tmp", \ -+ .size = sizeof(_tmp_type) + \ -+ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ -+ type_check_pointer(_state, typeof_field(_tmp_type, parent)), \ -+ .vmsd = &(_vmsd), .info = &slirp_vmstate_info_tmp, \ -+ } -+ -+#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ -+ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) -+ -+#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ -+ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) -+ -+#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ -+ VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) -+ -+#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ -+ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, _vmsd, \ -+ _type) -+ -+#define VMSTATE_INT16_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) -+#define VMSTATE_INT32_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) -+ -+#define VMSTATE_UINT8_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) -+#define VMSTATE_UINT16_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) -+#define VMSTATE_UINT32_V(_f, _s, _v) \ -+ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) -+ -+#define VMSTATE_INT16(_f, _s) VMSTATE_INT16_V(_f, _s, 0) -+#define VMSTATE_INT32(_f, _s) VMSTATE_INT32_V(_f, _s, 0) -+ -+#define VMSTATE_UINT8(_f, _s) VMSTATE_UINT8_V(_f, _s, 0) -+#define VMSTATE_UINT16(_f, _s) VMSTATE_UINT16_V(_f, _s, 0) -+#define VMSTATE_UINT32(_f, _s) VMSTATE_UINT32_V(_f, _s, 0) -+ -+#define VMSTATE_UINT16_TEST(_f, _s, _t) \ -+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) -+ -+#define VMSTATE_UINT32_TEST(_f, _s, _t) \ -+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) -+ -+#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ -+ VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) -+ -+#define VMSTATE_INT16_ARRAY(_f, _s, _n) VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) -+ -+#define VMSTATE_BUFFER_V(_f, _s, _v) \ -+ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) -+ -+#define VMSTATE_BUFFER(_f, _s) VMSTATE_BUFFER_V(_f, _s, 0) -+ -+#define VMSTATE_END_OF_LIST() \ -+ { \ -+ } -+ -+#endif --- -2.18.4 - diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch new file mode 100644 index 0000000..a697dd3 --- /dev/null +++ b/0004-Initial-redhat-build.patch @@ -0,0 +1,230 @@ +From 431955e872aa010376b1f94665908c2ba8194b44 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 12 Oct 2018 07:31:11 +0200 +Subject: Initial redhat build + +This patch introduces redhat build structure in redhat subdirectory. In addition, +several issues are fixed in QEMU tree: + + - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent + - Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree + +We disable make check due to issues with some of the tests. + +This rebase is based on qemu-kvm-5.2.0-16.el9 + +Signed-off-by: Miroslav Rezanina + +Merged patches (6.0.0): +- 605758c902 Limit build on Power to qemu-img and qemu-ga only +--- + .gitignore | 1 + + README.systemtap | 43 + + configure | 5 - + hw/remote/memory.c | 2 +- + hw/remote/proxy.c | 2 +- + meson.build | 8 +- + redhat/Makefile | 90 + + redhat/Makefile.common | 50 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 3609 +++++++++++++++++++++++ + redhat/scripts/extract_build_cmd.py | 5 +- + redhat/scripts/process-patches.sh | 20 +- + redhat/udev-kvm-check.c | 19 +- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 + + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + tests/check-block.sh | 2 + + ui/vnc.c | 2 +- + 18 files changed, 3860 insertions(+), 44 deletions(-) + create mode 100644 README.systemtap + create mode 100644 redhat/Makefile + create mode 100644 redhat/Makefile.common + create mode 100644 redhat/README.tests + create mode 100644 redhat/qemu-kvm.spec.template + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp + +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000000..ad913fc990 +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/configure b/configure +index 4f374b4889..53b2fa583a 100755 +--- a/configure ++++ b/configure +@@ -6383,10 +6383,6 @@ fi + mv $cross config-meson.cross + + rm -rf meson-private meson-info meson-logs +-unset staticpic +-if ! version_ge "$($meson --version)" 0.56.0; then +- staticpic=$(if test "$pie" = yes; then echo true; else echo false; fi) +-fi + NINJA=$ninja $meson setup \ + --prefix "$prefix" \ + --libdir "$libdir" \ +@@ -6406,7 +6402,6 @@ NINJA=$ninja $meson setup \ + -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \ + -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \ + -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \ +- ${staticpic:+-Db_staticpic=$staticpic} \ + -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \ + -Db_lto=$lto -Dcfi=$cfi -Dcfi_debug=$cfi_debug \ + -Dmalloc=$malloc -Dmalloc_trim=$malloc_trim -Dsparse=$sparse \ +diff --git a/hw/remote/memory.c b/hw/remote/memory.c +index 32085b1e05..bf0047a81b 100644 +--- a/hw/remote/memory.c ++++ b/hw/remote/memory.c +@@ -43,7 +43,7 @@ void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp) + remote_sysmem_reset(); + + for (region = 0; region < msg->num_fds; region++) { +- g_autofree char *name; ++ g_autofree char *name = NULL; + subregion = g_new(MemoryRegion, 1); + name = g_strdup_printf("remote-mem-%u", suffix++); + memory_region_init_ram_from_fd(subregion, NULL, +diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c +index 4fa4be079d..253c1aa7ec 100644 +--- a/hw/remote/proxy.c ++++ b/hw/remote/proxy.c +@@ -347,7 +347,7 @@ static void probe_pci_info(PCIDevice *dev, Error **errp) + PCI_BASE_ADDRESS_SPACE_IO : PCI_BASE_ADDRESS_SPACE_MEMORY; + + if (size) { +- g_autofree char *name; ++ g_autofree char *name = NULL; + pdev->region[i].dev = pdev; + pdev->region[i].present = true; + if (type == PCI_BASE_ADDRESS_SPACE_MEMORY) { +diff --git a/meson.build b/meson.build +index c6f4b0cf5e..06c15bd6d2 100644 +--- a/meson.build ++++ b/meson.build +@@ -1,6 +1,6 @@ + project('qemu', ['c'], meson_version: '>=0.55.0', +- default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto'] + +- (meson.version().version_compare('>=0.56.0') ? [ 'b_staticpic=false' ] : []), ++ default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto', ++ 'b_staticpic=false' ], + version: run_command('head', meson.source_root() / 'VERSION').stdout().strip()) + + not_found = dependency('', required: false) +@@ -1482,7 +1482,9 @@ if capstone_opt == 'internal' + # Include all configuration defines via a header file, which will wind up + # as a dependency on the object file, and thus changes here will result + # in a rebuild. +- '-include', 'capstone-defs.h' ++ '-include', 'capstone-defs.h', ++ ++ '-Wp,-D_GLIBCXX_ASSERTIONS', + ] + + libcapstone = static_library('capstone', +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd4845..e9b84ec028 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000000..372d8160a4 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000000..c04abf9449 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +diff --git a/tests/check-block.sh b/tests/check-block.sh +index f86cb863de..6d38340d49 100755 +--- a/tests/check-block.sh ++++ b/tests/check-block.sh +@@ -69,6 +69,8 @@ else + fi + fi + ++exit 0 ++ + cd tests/qemu-iotests + + # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests +diff --git a/ui/vnc.c b/ui/vnc.c +index 456db47d71..97ae92b181 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -4146,7 +4146,7 @@ void vnc_display_open(const char *id, Error **errp) + + #ifdef CONFIG_VNC_SASL + if (sasl) { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", +-- +2.27.0 + diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch similarity index 73% rename from 0006-Enable-disable-devices-for-RHEL.patch rename to 0005-Enable-disable-devices-for-RHEL.patch index 0827288..d80a533 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 0ad3e82af785512a5a77373d2ad95c63dfedeaba Mon Sep 17 00:00:00 2001 +From f46ca4c7e719e0a70f8e0ffe3de882c017c216e7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:11:07 +0200 Subject: Enable/disable devices for RHEL @@ -6,125 +6,49 @@ Subject: Enable/disable devices for RHEL This commit adds all changes related to changes in supported devices. Signed-off-by: Miroslav Rezanina - -Rebase notes (qemu 3.1.0) -- spapr_rng disabled in default_config -- new hyperv.mak in default configs -- Move changes from x86_64-softmmu.mak to i386-softmmu.mak -- Added CONFIG_VIRTIO_MMIO to aarch64-softmmu.mak -- Removed config_vga_isa.c changes as no longer needed -- Removed new devices - -Rebase notes (4.0.0): -- Added CONFIG_PCI_EXPRESS_GENERIC_BRIDGE for aarch64-softmmu.mak -- Added CONFIG_ARM_VIRT for aarch64-softmmu.mak -- Switch to KConfig (upstream) - - Using device whitelist + without-defualt-devices option - -Rebase notes (4.1.0): -- Added CONFIG_USB_OHCI_PCI for ppc64 -- Added CONFIG_XIVE_KVM for ppc64 -- Added CONFIG_ACPI_PCI for x86_64 -- Added CONFIG_SEMIHOSTING for aarch64 -- Cleanup aarch64 devices -- Do not build a15mpcore.c -- Removed ide-isa.c stub file -- Use CONFIG_USB_EHCI_PCI on x86_64 (new upstream) - -Rebase notes (4.2.0-rc0): -- Use conditional build for isa-superio.c (upstream change) -- Rename PCI_PIIX to PCI_I440FX (upstream change) - -Rebase notes (4.2.0-rc3): -- Disabled ccid-card-emulated (patch 92566) -- Disabled vfio-pci-igd-lpc-bridge (patch 92565) - -Rebase notes (5.1.0): -- added CONFIG_PCI_EXPRESS on ppc64 (due to upstream dependency) -- Added CONFIG_NVDIMM -- updated cortex-15 disabling to upstream code -- Add CONFIG_ACPI_APEI for aarch64 -- removed obsolete hw/bt/Makefile.objs chunk -- removed unnecessary changes in target/i386/cpu.c - -Rebase notes (5.2.0 rc0): -- Added CONFIG_USB_XHCI_PCI on aarch64 ppc64 and x86_64 -- remove vl.c hack for no hpet -- Enable CONFIG_PTIMER for aarch64 -- Do not package hw-display-virtio-gpu.so on s390x - -Rebase notes (5.2.0 rc1): -- Added CONFIG_ARM_GIC for aarch64 (required for build) - -Merged patches (qemu 3.1.0): -- d51e082 Re-enable CONFIG_HYPERV_TESTDEV -- 4b889f3 Declare cirrus-vga as deprecated -- b579d32 Do not build bluetooth support -- 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 -- 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 - -Merged patches (4.1.0): -- 20a51f6 fdc: Revert downstream disablement of device "floppy" -- f869cc0 fdc: Restrict floppy controllers to RHEL-7 machine types -- 5909721 aarch64: Compile out IOH3420 -- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) -- 495a27d x86_64-rh-devices: add missing TPM passthrough -- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) - -Merged patches (4.2.0): -- f7587dd RHEL: disable hostmem-memfd - -Merged patches (5.1.0): -- 4543a3c i386: Remove cpu64-rhel6 CPU model -- 96533 aarch64: Remove tcg cpu types (pjw commit) -- 559d589 Revert "RHEL: disable hostmem-memfd" -- 441128e enable ramfb - -Merged patches (5.2.0 rc0): -- f70eb50 RHEL-only: Enable vTPM for POWER in downstream configs -- 69d8ae7 redhat: fix 5.0 rebase missing ISA TPM TIS -- 8310f89 RHEL-only: Enable vTPM for ARM in downstream configs -- 4a8ccfd Disable TPM passthrough backend on ARM --- - .../devices/aarch64-rh-devices.mak | 27 +++++ + .../devices/aarch64-rh-devices.mak | 28 +++++ default-configs/devices/aarch64-softmmu.mak | 10 +- - default-configs/devices/ppc64-rh-devices.mak | 38 +++++++ + default-configs/devices/ppc64-rh-devices.mak | 36 ++++++ default-configs/devices/ppc64-softmmu.mak | 10 +- default-configs/devices/rh-virtio.mak | 10 ++ - default-configs/devices/s390x-rh-devices.mak | 15 +++ + default-configs/devices/s390x-rh-devices.mak | 16 +++ default-configs/devices/s390x-softmmu.mak | 4 +- - default-configs/devices/x86_64-rh-devices.mak | 101 ++++++++++++++++++ - default-configs/devices/x86_64-softmmu.mak | 4 +- + default-configs/devices/x86_64-rh-devices.mak | 104 ++++++++++++++++++ + default-configs/devices/x86_64-softmmu.mak | 10 +- + .../devices/x86_64-upstream-devices.mak | 4 + hw/acpi/ich9.c | 4 +- hw/arm/meson.build | 2 +- hw/block/fdc.c | 10 ++ + hw/char/parallel.c | 9 ++ hw/cpu/meson.build | 5 +- hw/display/cirrus_vga.c | 3 + hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + hw/ppc/spapr_cpu_core.c | 2 + + hw/timer/hpet.c | 8 ++ hw/usb/meson.build | 2 +- qemu-options.hx | 4 - - redhat/qemu-kvm.spec.template | 10 +- - target/arm/cpu.c | 4 +- - target/arm/cpu_tcg.c | 3 + + redhat/qemu-kvm.spec.template | 5 +- + target/arm/cpu_tcg.c | 10 ++ target/ppc/cpu-models.c | 10 ++ target/s390x/cpu_models.c | 3 + target/s390x/kvm.c | 8 ++ - 26 files changed, 273 insertions(+), 25 deletions(-) + 28 files changed, 304 insertions(+), 22 deletions(-) create mode 100644 default-configs/devices/aarch64-rh-devices.mak create mode 100644 default-configs/devices/ppc64-rh-devices.mak create mode 100644 default-configs/devices/rh-virtio.mak create mode 100644 default-configs/devices/s390x-rh-devices.mak create mode 100644 default-configs/devices/x86_64-rh-devices.mak + create mode 100644 default-configs/devices/x86_64-upstream-devices.mak diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..98319407de +index 0000000000..a4d67274c0 --- /dev/null +++ b/default-configs/devices/aarch64-rh-devices.mak -@@ -0,0 +1,27 @@ +@@ -0,0 +1,28 @@ +include rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -152,6 +76,7 @@ index 0000000000..98319407de +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_TIS_SYSBUS=y +CONFIG_PTIMER=y ++CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y diff --git a/default-configs/devices/aarch64-softmmu.mak b/default-configs/devices/aarch64-softmmu.mak index 958b1e08e4..8f6867d48a 100644 --- a/default-configs/devices/aarch64-softmmu.mak @@ -173,10 +98,10 @@ index 958b1e08e4..8f6867d48a 100644 +include aarch64-rh-devices.mak diff --git a/default-configs/devices/ppc64-rh-devices.mak b/default-configs/devices/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..467a16bdc2 +index 0000000000..5b01b7fac0 --- /dev/null +++ b/default-configs/devices/ppc64-rh-devices.mak -@@ -0,0 +1,38 @@ +@@ -0,0 +1,36 @@ +include rh-virtio.mak + +CONFIG_DIMM=y @@ -195,6 +120,8 @@ index 0000000000..467a16bdc2 +CONFIG_USB_OHCI_PCI=y +CONFIG_USB_SMARTCARD=y +CONFIG_USB_STORAGE_BOT=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y +CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_NEC=y +CONFIG_USB_XHCI_PCI=y @@ -207,11 +134,7 @@ index 0000000000..467a16bdc2 +CONFIG_VIRTIO_VGA=y +CONFIG_WDT_IB6300ESB=y +CONFIG_XICS=y -+CONFIG_XICS_KVM=y -+CONFIG_XICS_SPAPR=y +CONFIG_XIVE=y -+CONFIG_XIVE_SPAPR=y -+CONFIG_XIVE_KVM=y +CONFIG_TPM_SPAPR=y +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y @@ -255,10 +178,10 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/default-configs/devices/s390x-rh-devices.mak b/default-configs/devices/s390x-rh-devices.mak new file mode 100644 -index 0000000000..c3c73fe752 +index 0000000000..08a15f3e01 --- /dev/null +++ b/default-configs/devices/s390x-rh-devices.mak -@@ -0,0 +1,15 @@ +@@ -0,0 +1,16 @@ +include rh-virtio.mak + +CONFIG_PCI=y @@ -270,6 +193,7 @@ index 0000000000..c3c73fe752 +CONFIG_TERMINAL3270=y +CONFIG_VFIO=y +CONFIG_VFIO_AP=y ++CONFIG_VFIO_CCW=y +CONFIG_VFIO_PCI=y +CONFIG_VHOST_USER=y +CONFIG_VIRTIO_CCW=y @@ -288,10 +212,10 @@ index f2287a133f..3e2e388e91 100644 +include s390x-rh-devices.mak diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..e80877d4e2 +index 0000000000..9f41400530 --- /dev/null +++ b/default-configs/devices/x86_64-rh-devices.mak -@@ -0,0 +1,101 @@ +@@ -0,0 +1,104 @@ +include rh-virtio.mak + +CONFIG_AC97=y @@ -351,7 +275,7 @@ index 0000000000..e80877d4e2 +CONFIG_PC_ACPI=y +CONFIG_PC_PCI=y +CONFIG_PFLASH_CFI01=y -+CONFIG_PVPANIC=y ++CONFIG_PVPANIC_ISA=y +CONFIG_PXB=y +CONFIG_Q35=y +CONFIG_QXL=y @@ -371,6 +295,8 @@ index 0000000000..e80877d4e2 +CONFIG_USB_EHCI_PCI=y +CONFIG_USB_SMARTCARD=y +CONFIG_USB_STORAGE_BOT=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y +CONFIG_USB_UHCI=y +CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_NEC=y @@ -381,6 +307,7 @@ index 0000000000..e80877d4e2 +CONFIG_VGA_CIRRUS=y +CONFIG_VGA_PCI=y +CONFIG_VHOST_USER=y ++CONFIG_VHOST_USER_BLK=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VGA=y +CONFIG_VMMOUSE=y @@ -394,18 +321,34 @@ index 0000000000..e80877d4e2 +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_PASSTHROUGH=y diff --git a/default-configs/devices/x86_64-softmmu.mak b/default-configs/devices/x86_64-softmmu.mak -index 64b2ee2960..b5de7e5279 100644 +index 64b2ee2960..e57bcff7d9 100644 --- a/default-configs/devices/x86_64-softmmu.mak +++ b/default-configs/devices/x86_64-softmmu.mak -@@ -1,3 +1,5 @@ +@@ -1,3 +1,11 @@ # Default configuration for x86_64-softmmu -include i386-softmmu.mak +#include i386-softmmu.mak + +include x86_64-rh-devices.mak ++ ++# ++# RHEL: this is for the limited upstream machine type support, so to export ++# some more devices than what RHEL machines have. ++# ++include x86_64-upstream-devices.mak +diff --git a/default-configs/devices/x86_64-upstream-devices.mak b/default-configs/devices/x86_64-upstream-devices.mak +new file mode 100644 +index 0000000000..2cd20f54d2 +--- /dev/null ++++ b/default-configs/devices/x86_64-upstream-devices.mak +@@ -0,0 +1,4 @@ ++# We need "isa-parallel" ++CONFIG_PARALLEL=y ++# We need "hpet" ++CONFIG_HPET=y diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 5ff4e01c36..ac45ca4acb 100644 +index 853447cf9d..7f01fad64c 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) @@ -433,10 +376,10 @@ index be39117b9b..6fcc5ede50 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 4c2c35e223..e9eb7b8279 100644 +index a825c2acba..c62927bb3a 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -48,6 +48,8 @@ +@@ -49,6 +49,8 @@ #include "trace.h" #include "qom/object.h" @@ -445,8 +388,8 @@ index 4c2c35e223..e9eb7b8279 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2621,6 +2623,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, - int i, j; +@@ -2554,6 +2556,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, + FDrive *drive; static int command_tables_inited = 0; + /* Restricted for Red Hat Enterprise Linux: */ @@ -460,6 +403,33 @@ index 4c2c35e223..e9eb7b8279 100644 if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; +diff --git a/hw/char/parallel.c b/hw/char/parallel.c +index b45e67bfbb..e5f108211b 100644 +--- a/hw/char/parallel.c ++++ b/hw/char/parallel.c +@@ -29,6 +29,7 @@ + #include "chardev/char-parallel.h" + #include "chardev/char-fe.h" + #include "hw/acpi/aml-build.h" ++#include "hw/boards.h" + #include "hw/irq.h" + #include "hw/isa/isa.h" + #include "hw/qdev-properties.h" +@@ -534,6 +535,14 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) + int base; + uint8_t dummy; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Can't create parallel device, empty char device"); + return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build index 9e52fee9e7..bb71c9f3e7 100644 --- a/hw/cpu/meson.build @@ -525,10 +495,10 @@ index dde85ba6c6..62cf60c9c9 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index d7d05ae30a..aaea06d29c 100644 +index 4f75b44cfc..6f075fe235 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1796,6 +1796,7 @@ static const E1000Info e1000_devices[] = { +@@ -1797,6 +1797,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -536,7 +506,7 @@ index d7d05ae30a..aaea06d29c 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1808,6 +1809,7 @@ static const E1000Info e1000_devices[] = { +@@ -1809,6 +1810,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -545,10 +515,10 @@ index d7d05ae30a..aaea06d29c 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 2f7dc3c23d..55d36e0069 100644 +index 4f316a6f9d..64178f0f9a 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -376,10 +376,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -370,10 +370,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -561,11 +531,30 @@ index 2f7dc3c23d..55d36e0069 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), +diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c +index 9520471be2..202e032524 100644 +--- a/hw/timer/hpet.c ++++ b/hw/timer/hpet.c +@@ -733,6 +733,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) + int i; + HPETTimer *timer; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!s->intcap) { + warn_report("Hpet's intcap not initialized"); + } diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index 934e4fa675..e3abba548a 100644 +index fb7a74e73a..674993aa4f 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build -@@ -48,7 +48,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade +@@ -55,7 +55,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade if config_host.has_key('CONFIG_SMARTCARD') usbsmartcard_ss = ss.source_set() usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', @@ -575,10 +564,10 @@ index 934e4fa675..e3abba548a 100644 endif diff --git a/qemu-options.hx b/qemu-options.hx -index 104632ea34..363a15b4e8 100644 +index fd21002bd6..0d4fb61bf7 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2251,10 +2251,6 @@ ERST +@@ -2327,10 +2327,6 @@ ERST DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) @@ -589,35 +578,84 @@ index 104632ea34..363a15b4e8 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 07492e9f9a..a0487148e8 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2358,7 +2358,9 @@ static void arm_cpu_register_types(void) - - type_register_static(&idau_interface_type_info); - for (i = 0; i < cpu_count; ++i) { -- arm_cpu_register(&arm_cpus[i]); -+ /* RHEL specific: Filter out unsupported cpu models */ -+ if (!strcmp(arm_cpus[i].name, "cortex-a15")) -+ arm_cpu_register(&arm_cpus[i]); - } - } - } diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 0013e25412..6540046128 100644 +index 046e476f65..c3cd0ca039 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -679,6 +679,9 @@ static void arm_tcg_cpu_register_types(void) - { - size_t i; +@@ -22,6 +22,7 @@ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) -+ /* Disable TCG cpu types for Red Hat Enterprise Linux */ -+ return; -+ - for (i = 0; i < ARRAY_SIZE(arm_tcg_cpus); ++i) { - arm_cpu_register(&arm_tcg_cpus[i]); - } ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #ifdef CONFIG_TCG + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } ++#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } ++#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -927,6 +932,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } ++#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1004,6 +1010,7 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1019,7 +1026,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, ++#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1050,6 +1059,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, ++#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c index 87e4228614..6eaa65efff 100644 --- a/target/ppc/cpu-models.c @@ -689,10 +727,10 @@ index 87e4228614..6eaa65efff 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index b5abff8bef..abe09d73c2 100644 +index 050dcf2d42..9254ff46bf 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c -@@ -408,6 +408,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -430,6 +430,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -703,10 +741,10 @@ index b5abff8bef..abe09d73c2 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index b8385e6b95..1839cc6648 100644 +index 4fb3bbfef5..6c69d84b84 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c -@@ -2552,6 +2552,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2516,6 +2516,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -722,5 +760,5 @@ index b8385e6b95..1839cc6648 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -- -2.18.4 +2.27.0 diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch deleted file mode 100644 index fb98b03..0000000 --- a/0005-Initial-redhat-build.patch +++ /dev/null @@ -1,307 +0,0 @@ -From e4cd78dda8017f181fa94bbad1f0f015a99271db Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Fri, 12 Oct 2018 07:31:11 +0200 -Subject: Initial redhat build - -This patch introduces redhat build structure in redhat subdirectory. In addition, -several issues are fixed in QEMU tree: - - - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm - - As we use qemu-kvm as name in all places, this is updated to be consistent - - Man page renamed from qemu to qemu-kvm - - man page is installed using make install so we have to fix it in qemu tree - -This rebase includes changes up to qemu-kvm-5.1.0-16.el8 - -Rebase notes (3.1.0): -- added new configure options - -Rebase notes (4.0.0): -- Added dependency to perl-Test-Harness (upstream) -- Added dependency to python3-sphinx (upstream) -- Change location of icons (upstream) -- Remove .desktop file (added upstream) -- Added qemu-trace-stap (added upstream) -- Removed elf2dmp (added upstream) -- Remove .buildinfo -- Added pvh.bin rom (added upstream) -- Added interop documentation files -- Use python module instead of qemu.py (upstream) - -Rebase notes (4.1.0): -- Remove edk2 files generated by build -- Switch to rhel-8.1-candidate build target -- Remove specs documentation -- Switched from libssh2 to libssh -- Add rc0 tarball usage hacks -- Added BuildRequires for wget, rpm-build and python3-sphinx -- Removed new unpacked files -- Update configure line to use new options - -Rebase notes (4.2.0): -- Disable iotest run during make check -- README renamed to README.rst (upstream) -- Removed ui-spice-app.so -- Added relevant changes from "505f7f4 redhat: Adding slirp to the exploded tree" -- Removed qemu-ga.8 install from spec file - installed by make -- Removed spapr-rtas.bin (upstream) -- Require newer SLOF (20191022) - -Rebase notes (5.1.0): -- Use python3 for virtio_seg_max_adjust.py test -- Removed qemu-trace-stap shebang from spec file -- Added virtiofsd.1 (upstream) -- Use out-of-tree build -- New documentation structure (upstream) -- Update local build -- Removing installed qemu-storage-daemon (added upstream) -- Removing opensbi-riscv32-sifive_u-fw_jump.bin (added upstream) -- Disable iotests (moved from Enable make check commit) -- Added missing configure options -- Reorder configure options -- qemu-pr-helper moved to /usr/libexec/ (upstream) -- Added submodules for usb-redir, smartcard-reader and qxl display (upstream) -- Added setting rc version in Makefile for build -- removed --disable-vxhs configure option (removed upstream) -- bumped required libusbx-devel version to 1.0.23 -- bumped libfdt version to 1.6.0 - -Rebase notes (5.2.0 rc0): -- Move libfdt dependency to qemu-kvm-core -- Move manpage rename from Makefile to spec file -- rename with-confsuffix configure option to with-suffix (upstream) -- Bump libusbx Requires version to 1.0.234 -- Manual copy of keymaps in spec file (BZ 1875217) -- Removed /usr/share/qemu-kvm/npcm7xx_bootrom.bin, considering it - unpackaged for now. -- Removed /usr/share/qemu-kvm/qboot.rom, considering unpackaged. -- Added build dependency for meson and ninja-build -- hw/s390/s390-pci-vfio.c hack - set NULL for g_autofree variables -- Removed Chanelog (upstream) -- Fix in directory used for docs (upstream add %name so we do not pass it in configure) -- Package various .so as part of qemu-kvm-core package. - -Rebase notes (5.2.0 rc2): -- Added fix for dtrace build on RHEL 8.4.0 - -Rebase notes (5.2.0 rc3): -- Added man page for qemu-pr-helper -- Added new configure options -- Update qemu-kiwi patches to v4 - -Merged patches (3.1.0): -- 01f0c9f RHEL8: Add disable configure options to qemu spec file -- Spec file cleanups - -Merged patches (4.0.0): -- aa4297c Add edk2 Requires to qemu-kvm -- d124ff5779 Fixing brew build target -- eb204b5 Introduce the qemu-kvm-tests rpm -- 223cf0c Load kvm module during boot (partial) - -Merged patches (4.1.0): -- ebb6e97 redhat: Fix LOCALVERSION creation -- b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) -- 7cb3c4a Enable libpmem to support nvdimm -- 8943607 qemu-kvm.spec: bump libseccomp >= 2.4.0 -- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) -- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) - -Merged patches (4.2.0): -- 69e1fb2 enable virgla -- d4f6115 enable virgl, for real this time ... - -Merged patches (5.1.0): -- 5edf6bd Add support for rh-brew-module -- f77d52d redhat: ship virtiofsd vhost-user device backend -- 63f12d4 redhat: Always use module build target for rh-brew (modified) -- 9b1e140 redhat: updating the modular target -- 44b8bd0 spec: Fix python shenigans for tests - -Merged patches (5.2.0 rc0): -- 9238ce7 Add support for simpletrace -- 5797cff Remove explicit glusterfs-api dependency -- fd62478 disable virgl -- 0205018 redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ -- 3645097 redhat: Make all generated so files executable (not only block-*) - -Merged patches (5.2.0 rc2): -- pjw 99657 redhat: introduces disable_everything macro into the configure call -- pjw 99659 redhat: scripts/extract_build_cmd.py - Avoid listing empty lines -- pjw 99658 redhat: Fixing rh-local build -- pjw 99660 redhat: Add qemu-kiwi subpackage -- d2e59ce redhat: add (un/pre)install systemd hooks for qemu-ga - -Merged patches (5.2.0 rc3): -- pjw 99887 - redhat: allow Makefile rh-prep builddep to fail -- pjw 99885 - redhat: adding rh-rpm target ---- - .gitignore | 1 + - README.systemtap | 43 + - hw/s390x/s390-pci-vfio.c | 4 +- - meson.build | 4 +- - redhat/Makefile | 90 + - redhat/Makefile.common | 53 + - redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 3402 +++++++++++++++++++++++ - redhat/scripts/extract_build_cmd.py | 5 +- - redhat/scripts/process-patches.sh | 17 +- - scripts/qemu-guest-agent/fsfreeze-hook | 2 +- - scripts/systemtap/conf.d/qemu_kvm.conf | 4 + - scripts/systemtap/script.d/qemu_kvm.stp | 1 + - tests/check-block.sh | 2 + - ui/vnc.c | 2 +- - 15 files changed, 3653 insertions(+), 16 deletions(-) - create mode 100644 README.systemtap - create mode 100644 redhat/Makefile - create mode 100644 redhat/Makefile.common - create mode 100644 redhat/README.tests - create mode 100644 redhat/qemu-kvm.spec.template - create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf - create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp - -diff --git a/README.systemtap b/README.systemtap -new file mode 100644 -index 0000000000..ad913fc990 ---- /dev/null -+++ b/README.systemtap -@@ -0,0 +1,43 @@ -+QEMU tracing using systemtap-initscript -+--------------------------------------- -+ -+You can capture QEMU trace data all the time using systemtap-initscript. This -+uses SystemTap's flight recorder mode to trace all running guests to a -+fixed-size buffer on the host. Old trace entries are overwritten by new -+entries when the buffer size wraps. -+ -+1. Install the systemtap-initscript package: -+ # yum install systemtap-initscript -+ -+2. Install the systemtap scripts and the conf file: -+ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ -+ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ -+ -+The set of trace events to enable is given in qemu_kvm.stp. This SystemTap -+script can be customized to add or remove trace events provided in -+/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. -+ -+SystemTap customizations can be made to qemu_kvm.conf to control the flight -+recorder buffer size and whether to store traces in memory only or disk too. -+See stap(1) for option documentation. -+ -+3. Start the systemtap service. -+ # service systemtap start qemu_kvm -+ -+4. Make the service start at boot time. -+ # chkconfig systemtap on -+ -+5. Confirm that the service works. -+ # service systemtap status qemu_kvm -+ qemu_kvm is running... -+ -+When you want to inspect the trace buffer, perform the following steps: -+ -+1. Dump the trace buffer. -+ # staprun -A qemu_kvm >/tmp/trace.log -+ -+2. Start the systemtap service because the preceding step stops the service. -+ # service systemtap start qemu_kvm -+ -+3. Translate the trace record to readable format. -+ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index 9296e1bb6e..f70c5a8946 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -28,7 +28,7 @@ - */ - bool s390_pci_update_dma_avail(int fd, unsigned int *avail) - { -- g_autofree struct vfio_iommu_type1_info *info; -+ g_autofree struct vfio_iommu_type1_info *info = NULL; - uint32_t argsz; - - assert(avail); -@@ -229,7 +229,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, - */ - void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) - { -- g_autofree struct vfio_device_info *info; -+ g_autofree struct vfio_device_info *info = NULL; - VFIOPCIDevice *vfio_pci; - uint32_t argsz; - int fd; -diff --git a/meson.build b/meson.build -index e3386196ba..8c38b2ea36 100644 ---- a/meson.build -+++ b/meson.build -@@ -1148,7 +1148,9 @@ if capstone_opt == 'internal' - # Include all configuration defines via a header file, which will wind up - # as a dependency on the object file, and thus changes here will result - # in a rebuild. -- '-include', 'capstone-defs.h' -+ '-include', 'capstone-defs.h', -+ -+ '-Wp,-D_GLIBCXX_ASSERTIONS', - ] - - libcapstone = static_library('capstone', -diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook -index 13aafd4845..e9b84ec028 100755 ---- a/scripts/qemu-guest-agent/fsfreeze-hook -+++ b/scripts/qemu-guest-agent/fsfreeze-hook -@@ -8,7 +8,7 @@ - # request, it is issued with "thaw" argument after filesystem is thawed. - - LOGFILE=/var/log/qga-fsfreeze-hook.log --FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d -+FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d - - # Check whether file $1 is a backup or rpm-generated file and should be ignored - is_ignored_file() { -diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf -new file mode 100644 -index 0000000000..372d8160a4 ---- /dev/null -+++ b/scripts/systemtap/conf.d/qemu_kvm.conf -@@ -0,0 +1,4 @@ -+# Force load uprobes (see BZ#1118352) -+stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true -+ -+qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes -diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp -new file mode 100644 -index 0000000000..c04abf9449 ---- /dev/null -+++ b/scripts/systemtap/script.d/qemu_kvm.stp -@@ -0,0 +1 @@ -+probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -diff --git a/tests/check-block.sh b/tests/check-block.sh -index f6b1bda7b9..645b550af8 100755 ---- a/tests/check-block.sh -+++ b/tests/check-block.sh -@@ -58,6 +58,8 @@ if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then - fi - fi - -+exit 0 -+ - cd tests/qemu-iotests - - # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests -diff --git a/ui/vnc.c b/ui/vnc.c -index 49235056f7..eb5520ed73 100644 ---- a/ui/vnc.c -+++ b/ui/vnc.c -@@ -3982,7 +3982,7 @@ void vnc_display_open(const char *id, Error **errp) - - #ifdef CONFIG_VNC_SASL - if (sasl) { -- int saslErr = sasl_server_init(NULL, "qemu"); -+ int saslErr = sasl_server_init(NULL, "qemu-kvm"); - - if (saslErr != SASL_OK) { - error_setg(errp, "Failed to initialize SASL auth: %s", --- -2.18.4 - diff --git a/0007-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch similarity index 78% rename from 0007-Machine-type-related-general-changes.patch rename to 0006-Machine-type-related-general-changes.patch index a6f8696..5c503bc 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From b97fdd8e425f1c9a156ebdfbdce986d9351c0d19 Mon Sep 17 00:00:00 2001 +From 80e9b92048e6fe7c7aef0e64cbc0f855bd3a6272 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -8,68 +8,36 @@ split to allow easier review. It contains changes not related to any architecture. Signed-off-by: Miroslav Rezanina - -Rebase changes (4.0.0): -- Remove e1000 device duplication changes to reflect upstream solution -- Rewrite machine compat properties to upstream solution - -Rebase changes (4.1.0): -- Removed optional flag for machine compat properties (upstream) -- Remove c3e002cb chunk from hw/net/e1000.c -- Reorder compat structures -- Use one format for compat scructures -- Added compat for virtio-balloon-pci.any_layout for rhel71 - -Merged patches (4.0.0): -- d4c0957 compat: Generic HW_COMPAT_RHEL7_6 -- cbac773 virtio: Make disable-legacy/disable-modern compat properties optional - -Merged patches (4.1.0): -- 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments -- f19738e compat: Generic hw_compat_rhel_8_0 - -Merged patches (4.2.0): -- 9f2bfaa machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 -- ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional -- compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) - -Merged patches (5.1.0): -- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) -- 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw - -Merged patches (5.2.0 rc0): -- 8348642 redhat: define hw_compat_8_2 -- 45b8402 redhat: define hw_compat_8_2 -- 4effa71 redhat: Update hw_compat_8_2 -- 0e84dff virtio: skip legacy support check on machine types less than 5.1 (partialy) --- hw/acpi/ich9.c | 15 +++ hw/acpi/piix4.c | 5 +- hw/arm/virt.c | 2 +- hw/char/serial.c | 16 +++ - hw/core/machine.c | 213 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 251 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + - hw/net/e1000e.c | 21 ++++ + hw/net/e1000e.c | 21 +++ hw/net/rtl8139.c | 4 +- hw/rtc/mc146818rtc.c | 6 + - hw/smbios/smbios.c | 46 +++++++- + hw/smbios/smbios.c | 46 ++++++- hw/timer/i8254_common.c | 2 +- hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci.c | 20 ++++ + hw/usb/hcd-xhci-pci.c | 59 ++++++-- + hw/usb/hcd-xhci-pci.h | 1 + + hw/usb/hcd-xhci.c | 20 +++ hw/usb/hcd-xhci.h | 2 + include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 27 +++++ + include/hw/boards.h | 33 +++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - include/hw/usb.h | 4 + + include/hw/usb.h | 3 + migration/migration.c | 2 + migration/migration.h | 5 + - 23 files changed, 400 insertions(+), 11 deletions(-) + 25 files changed, 489 insertions(+), 25 deletions(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index ac45ca4acb..0b35b35b28 100644 +index 7f01fad64c..33b0c6e33c 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) @@ -102,10 +70,10 @@ index ac45ca4acb..0b35b35b28 100644 &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 669be5bbf6..2063131bcc 100644 +index 8f8b0e95e5..9865d1a349 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -277,6 +277,7 @@ static const VMStateDescription vmstate_acpi = { +@@ -278,6 +278,7 @@ static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, .minimum_version_id = 3, @@ -113,7 +81,7 @@ index 669be5bbf6..2063131bcc 100644 .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -633,8 +634,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -643,8 +644,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -125,10 +93,10 @@ index 669be5bbf6..2063131bcc 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 27dbeb549e..c908b5fcf4 100644 +index 9f01d9041b..f904d3e98e 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1441,7 +1441,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1522,7 +1522,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -138,18 +106,18 @@ index 27dbeb549e..c908b5fcf4 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len); diff --git a/hw/char/serial.c b/hw/char/serial.c -index 97f71879ff..aeb207ef73 100644 +index bc2e322970..cc378142a3 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c -@@ -35,6 +35,7 @@ - #include "qemu/error-report.h" +@@ -37,6 +37,7 @@ #include "trace.h" #include "hw/qdev-properties.h" + #include "hw/qdev-properties-system.h" +#include "migration/migration.h" #define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ -@@ -691,6 +692,9 @@ static int serial_post_load(void *opaque, int version_id) +@@ -689,6 +690,9 @@ static int serial_post_load(void *opaque, int version_id) static bool serial_thr_ipending_needed(void *opaque) { SerialState *s = opaque; @@ -159,7 +127,7 @@ index 97f71879ff..aeb207ef73 100644 if (s->ier & UART_IER_THRI) { bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); -@@ -772,6 +776,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { +@@ -770,6 +774,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { static bool serial_fifo_timeout_timer_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -170,7 +138,7 @@ index 97f71879ff..aeb207ef73 100644 return timer_pending(s->fifo_timeout_timer); } -@@ -789,6 +797,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { +@@ -787,6 +795,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { static bool serial_timeout_ipending_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -181,7 +149,7 @@ index 97f71879ff..aeb207ef73 100644 return s->timeout_ipending != 0; } -@@ -806,6 +818,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { +@@ -804,6 +816,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { static bool serial_poll_needed(void *opaque) { SerialState *s = (SerialState *)opaque; @@ -193,13 +161,49 @@ index 97f71879ff..aeb207ef73 100644 } diff --git a/hw/core/machine.c b/hw/core/machine.c -index d0408049b5..19d50dde45 100644 +index 40def78183..848e7fdff6 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -28,6 +28,219 @@ - #include "hw/mem/nvdimm.h" - #include "migration/vmstate.h" +@@ -36,6 +36,257 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" ++/* ++ * Mostly the same as hw_compat_5_2 ++ */ ++GlobalProperty hw_compat_rhel_8_4[] = { ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "ICH9-LPC", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "PIIX4_PM", "smm-compat", "on"}, ++}; ++const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); ++ ++/* ++ * Mostly the same as hw_compat_5_1 ++ */ ++GlobalProperty hw_compat_rhel_8_3[] = { ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-blk", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-blk-device", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-scsi-device", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "nvme", "use-intel-id", "on"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 bz 1912846 */ ++ { "pci-xhci", "x-rh-late-msi-cap", "off" }, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-pci", "x-ats-page-aligned", "off"}, ++}; ++const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); ++ +/* + * The same as hw_compat_4_2 + hw_compat_5_0 + */ @@ -226,6 +230,8 @@ index d0408049b5..19d50dde45 100644 + { "qxl-vga", "revision", "4" }, + /* hw_compat_rhel_8_2 from hw_compat_4_2 */ + { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-device", "use-disabled-flag", "false" }, + /* hw_compat_rhel_8_2 from hw_compat_5_0 */ + { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, + /* hw_compat_rhel_8_2 from hw_compat_5_0 */ @@ -413,9 +419,9 @@ index d0408049b5..19d50dde45 100644 +}; +const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); + - GlobalProperty hw_compat_5_1[] = { - { "vhost-scsi", "num_queues", "1"}, - { "vhost-user-blk", "num-queues", "1"}, + GlobalProperty hw_compat_5_2[] = { + { "ICH9-LPC", "smm-compat", "on"}, + { "PIIX4_PM", "smm-compat", "on"}, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 90851e730b..a91c5d7467 100644 --- a/hw/display/vga-isa.c @@ -430,7 +436,7 @@ index 90851e730b..a91c5d7467 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 13d1628f13..9fcc5aaf69 100644 +index 46cc951073..62433d8022 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -179,6 +179,8 @@ static void pc_init1(MachineState *machine, @@ -443,7 +449,7 @@ index 13d1628f13..9fcc5aaf69 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index a3f4959c43..f6c2ef4e43 100644 +index 53450190f5..fce52ca70b 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -198,6 +198,8 @@ static void pc_q35_init(MachineState *machine) @@ -530,7 +536,7 @@ index a8a77eca95..6d39c1f1c4 100644 e1000e_prop_disable_vnet, bool), DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index ba5ace1ab7..a2e6e83522 100644 +index 90b4fc63ce..3ffb9dd22c 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) @@ -553,10 +559,10 @@ index ba5ace1ab7..a2e6e83522 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 7a38540cb9..377d861913 100644 +index 5d0fcacd0c..4a2e52031b 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c -@@ -43,6 +43,7 @@ +@@ -44,6 +44,7 @@ #include "qapi/visitor.h" #include "exec/address-spaces.h" #include "hw/rtc/mc146818rtc_regs.h" @@ -564,7 +570,7 @@ index 7a38540cb9..377d861913 100644 #ifdef TARGET_I386 #include "qapi/qapi-commands-misc-target.h" -@@ -821,6 +822,11 @@ static int rtc_post_load(void *opaque, int version_id) +@@ -822,6 +823,11 @@ static int rtc_post_load(void *opaque, int version_id) static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) { RTCState *s = (RTCState *)opaque; @@ -577,7 +583,7 @@ index 7a38540cb9..377d861913 100644 } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 6a3d39793b..232fd61bf8 100644 +index f22c4f5b73..a305a4bcea 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -56,6 +56,9 @@ static bool smbios_legacy = true; @@ -672,10 +678,10 @@ index 050875b497..32935da46c 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 27ca237d71..eb24e39b81 100644 +index 0cb02a6432..962a9622e5 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c -@@ -1221,12 +1221,14 @@ static void usb_uhci_common_realize(PCIDevice *dev, Error **errp) +@@ -1167,12 +1167,14 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) UHCIState *s = UHCI(dev); uint8_t *pci_conf = s->dev.config; int i; @@ -691,11 +697,122 @@ index 27ca237d71..eb24e39b81 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; +diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c +index 9421734d0f..9bfe236a7d 100644 +--- a/hw/usb/hcd-xhci-pci.c ++++ b/hw/usb/hcd-xhci-pci.c +@@ -101,6 +101,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) + return 0; + } + ++/* RH bz 1912846 */ ++static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp) ++{ ++ int ret; ++ Error *err = NULL; ++ XHCIPciState *s = XHCI_PCI(dev); ++ ++ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); ++ /* ++ * Any error other than -ENOTSUP(board's MSI support is broken) ++ * is a programming error ++ */ ++ assert(!ret || ret == -ENOTSUP); ++ if (ret && s->msi == ON_OFF_AUTO_ON) { ++ /* Can't satisfy user's explicit msi=on request, fail */ ++ error_append_hint(&err, "You have to use msi=auto (default) or " ++ "msi=off with this machine type.\n"); ++ error_propagate(errp, err); ++ return true; ++ } ++ assert(!err || s->msi == ON_OFF_AUTO_AUTO); ++ /* With msi=auto, we fall back to MSI off silently */ ++ error_free(err); ++ ++ return false; ++} ++ + static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + { + int ret; +@@ -122,23 +149,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + s->xhci.nec_quirks = true; + } + +- if (s->msi != ON_OFF_AUTO_OFF) { +- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); +- /* +- * Any error other than -ENOTSUP(board's MSI support is broken) +- * is a programming error +- */ +- assert(!ret || ret == -ENOTSUP); +- if (ret && s->msi == ON_OFF_AUTO_ON) { +- /* Can't satisfy user's explicit msi=on request, fail */ +- error_append_hint(&err, "You have to use msi=auto (default) or " +- "msi=off with this machine type.\n"); ++ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) { ++ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { + error_propagate(errp, err); + return; + } +- assert(!err || s->msi == ON_OFF_AUTO_AUTO); +- /* With msi=auto, we fall back to MSI off silently */ +- error_free(err); + } + pci_register_bar(dev, 0, + PCI_BASE_ADDRESS_SPACE_MEMORY | +@@ -151,6 +167,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + assert(ret > 0); + } + ++ /* RH bz 1912846 */ ++ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) { ++ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { ++ error_propagate(errp, err); ++ return; ++ } ++ } + if (s->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, s->xhci.numintrs, +@@ -195,11 +219,18 @@ static void xhci_instance_init(Object *obj) + qdev_alias_all_properties(DEVICE(&s->xhci), obj); + } + ++static Property xhci_pci_properties[] = { ++ /* RH bz 1912846 */ ++ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void xhci_class_init(ObjectClass *klass, void *data) + { + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + ++ device_class_set_props(dc, xhci_pci_properties); + dc->reset = xhci_pci_reset; + dc->vmsd = &vmstate_xhci_pci; + set_bit(DEVICE_CATEGORY_USB, dc->categories); +diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h +index c193f79443..086a1feb1e 100644 +--- a/hw/usb/hcd-xhci-pci.h ++++ b/hw/usb/hcd-xhci-pci.h +@@ -39,6 +39,7 @@ typedef struct XHCIPciState { + XHCIState xhci; + OnOffAuto msi; + OnOffAuto msix; ++ bool rh_late_msi_cap; /* bz 1912846 */ + } XHCIPciState; + + #endif diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 9ce7ca706e..0af661ce1d 100644 +index 46212b1e69..6d1f278aad 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c -@@ -3491,9 +3491,27 @@ static const VMStateDescription vmstate_xhci_slot = { +@@ -3490,9 +3490,27 @@ static const VMStateDescription vmstate_xhci_slot = { } }; @@ -723,7 +840,7 @@ index 9ce7ca706e..0af661ce1d 100644 .fields = (VMStateField[]) { VMSTATE_UINT32(type, XHCIEvent), VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3502,6 +3520,8 @@ static const VMStateDescription vmstate_xhci_event = { +@@ -3501,6 +3519,8 @@ static const VMStateDescription vmstate_xhci_event = { VMSTATE_UINT32(flags, XHCIEvent), VMSTATE_UINT8(slotid, XHCIEvent), VMSTATE_UINT8(epid, XHCIEvent), @@ -733,7 +850,7 @@ index 9ce7ca706e..0af661ce1d 100644 } }; diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 02ebd76450..dfda04b125 100644 +index 7bba361f3b..f450ffd13b 100644 --- a/hw/usb/hcd-xhci.h +++ b/hw/usb/hcd-xhci.h @@ -149,6 +149,8 @@ typedef struct XHCIEvent { @@ -746,11 +863,11 @@ index 02ebd76450..dfda04b125 100644 typedef struct XHCIInterrupter { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index 54571c77e0..b3369dab9e 100644 +index df519e40b5..e1ecfbaf1f 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h -@@ -61,6 +61,9 @@ typedef struct ICH9LPCPMRegs { - uint8_t smm_enabled; +@@ -62,6 +62,9 @@ typedef struct ICH9LPCPMRegs { + bool smm_compat; bool enable_tco; TCOIORegs tco_regs; + @@ -760,13 +877,19 @@ index 54571c77e0..b3369dab9e 100644 #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index a49e3a6b44..dd18c9e94d 100644 +index ad6c8fd537..2d7a65724a 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -367,4 +367,31 @@ extern const size_t hw_compat_2_2_len; +@@ -413,4 +413,37 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_8_4[]; ++extern const size_t hw_compat_rhel_8_4_len; ++ ++extern GlobalProperty hw_compat_rhel_8_3[]; ++extern const size_t hw_compat_rhel_8_3_len; ++ +extern GlobalProperty hw_compat_rhel_8_2[]; +extern const size_t hw_compat_rhel_8_2_len; + @@ -812,10 +935,10 @@ index 02a0ced0a0..67e38a1b13 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 911e460097..ae6bf1d209 100644 +index dcf060b791..93c012ac95 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -104,6 +104,9 @@ struct PCMachineClass { +@@ -107,6 +107,9 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; @@ -826,25 +949,24 @@ index 911e460097..ae6bf1d209 100644 /* RAM / address space compat: */ bool gigabyte_align; diff --git a/include/hw/usb.h b/include/hw/usb.h -index a70a72e917..78b90436c9 100644 +index 436e07b304..edb2cd94b6 100644 --- a/include/hw/usb.h +++ b/include/hw/usb.h -@@ -570,4 +570,8 @@ int usb_get_quirks(uint16_t vendor_id, uint16_t product_id, - uint8_t interface_class, uint8_t interface_subclass, - uint8_t interface_protocol); +@@ -577,4 +577,7 @@ void usb_pcap_init(FILE *fp); + void usb_pcap_ctrl(USBPacket *p, bool setup); + void usb_pcap_data(USBPacket *p, bool setup); -+ +/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ +extern bool migrate_cve_2014_5263_xhci_fields; + #endif diff --git a/migration/migration.c b/migration/migration.c -index 87a9b59f83..1bb8d012e6 100644 +index 8ca034136b..4afc6069b6 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -134,6 +134,8 @@ enum mig_rp_message_type { - MIG_RP_MSG_MAX - }; +@@ -167,6 +167,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_X_COLO, + MIGRATION_CAPABILITY_VALIDATE_UUID); +bool migrate_pre_2_2; + @@ -852,10 +974,10 @@ index 87a9b59f83..1bb8d012e6 100644 migrations at once. For now we don't need to add dynamic creation of migration */ diff --git a/migration/migration.h b/migration/migration.h -index d096b77f74..6134a534b3 100644 +index db6708326b..1b6c69751c 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -364,6 +364,11 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, +@@ -368,6 +368,11 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); @@ -868,5 +990,5 @@ index d096b77f74..6134a534b3 100644 #define qemu_ram_foreach_block \ #warning "Use foreach_not_ignored_block in migration code" -- -2.18.4 +2.27.0 diff --git a/0008-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch similarity index 73% rename from 0008-Add-aarch64-machine-types.patch rename to 0007-Add-aarch64-machine-types.patch index e252a3c..ae95071 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From fcf44f2334a6d82709b9c64d45fa2ab1aec595b9 Mon Sep 17 00:00:00 2001 +From ee8aeb6b79bde21b581090c479faf10e716a7e6d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -6,59 +6,16 @@ Subject: Add aarch64 machine types Adding changes to add RHEL machine types for aarch64 architecture. Signed-off-by: Miroslav Rezanina - -Rebase notes (4.0.0): -- Use upstream compat handling - -Rebase notes (4.1.0-rc0): -- Removed a15memmap (upstream) -- Use virt_flash_create in rhel800_virt_instance_init - -Rebase notes (4.2.0-rc0): -- Set numa_mem_supported - -Rebase notes (4.2.0-rc3): -- aarch64: Add virt-rhel8.2.0 machine type for ARM (patch 92246) -- aarch64: virt: Allow more than 1TB of RAM (patch 92249) -- aarch64: virt: Allow PCDIMM instantiation (patch 92247) -- aarch64: virt: Enhance the comment related to gic-version (patch 92248) - -Rebase notes (5.0.0): -- Set default_ram_id in rhel_machine_class_init -- Added setting acpi properties - -Rebase notes (5.1.0): -- Added ras property -- Added to virt_machine_device_unplug_cb to machine type (upstream) -- added mte property (upstream) - -Merged patches (4.0.0): -- 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM -- 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 -- 4d20863 aarch64: Use 256MB ECAM region by default - -Merged patches (4.1.0): -- c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM -- 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine - -Merged patches (5.2.0 rc0): -- 12990ad hw/arm: Changes to rhel820 machine -- 46d5a79 hw/arm: Introduce rhel_virt_instance_init() helper -- 098954a hw/arm: Add rhel830 machine type -- ee8e99d arm: Set correct max_cpus value on virt-rhel* machine types -- e5edd38 RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic allocation in machvirt -- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) -- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) --- - hw/arm/virt.c | 191 +++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 211 +++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ - 2 files changed, 196 insertions(+), 3 deletions(-) + 2 files changed, 218 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index c908b5fcf4..21e0485ac5 100644 +index f904d3e98e..080cf54ef1 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -79,6 +79,7 @@ +@@ -80,6 +80,7 @@ #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -66,7 +23,7 @@ index c908b5fcf4..21e0485ac5 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -105,7 +106,49 @@ +@@ -106,7 +107,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -88,7 +45,6 @@ index c908b5fcf4..21e0485ac5 100644 + static const TypeInfo rhel##m##n##s##_machvirt_info = { \ + .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ + .parent = TYPE_RHEL_MACHINE, \ -+ .instance_init = rhel##m##n##s##_virt_instance_init, \ + .class_init = rhel##m##n##s##_virt_class_init, \ + }; \ + static void rhel##m##n##s##_machvirt_init(void) \ @@ -117,7 +73,7 @@ index c908b5fcf4..21e0485ac5 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -2027,6 +2070,7 @@ static void machvirt_init(MachineState *machine) +@@ -2113,6 +2155,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -125,15 +81,15 @@ index c908b5fcf4..21e0485ac5 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2055,6 +2099,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2140,6 +2183,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + vms->virt = value; } - +#endif /* disabled for RHEL */ + static bool virt_get_highmem(Object *obj, Error **errp) { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2108,6 +2153,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, +@@ -2237,6 +2281,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, visit_type_OnOffAuto(v, name, &vms->acpi, errp); } @@ -141,40 +97,23 @@ index c908b5fcf4..21e0485ac5 100644 static bool virt_get_ras(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2121,13 +2167,14 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) - - vms->ras = value; - } -- -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_mte(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); - - return vms->mte; - } -+#endif /* disabled for RHEL */ - - static void virt_set_mte(Object *obj, bool value, Error **errp) - { -@@ -2135,7 +2182,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2264,6 +2309,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } -- -+#endif ++#endif /* disabled for RHEL */ + static char *virt_get_gic_version(Object *obj, Error **errp) { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2442,6 +2489,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) - return requested_pa_size > 40 ? requested_pa_size : 0; +@@ -2584,6 +2630,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return fixed_ipa ? 0 : requested_pa_size; } +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2730,3 +2778,140 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2910,3 +2957,165 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -216,26 +155,24 @@ index c908b5fcf4..21e0485ac5 100644 + NULL, NULL); + object_class_property_set_description(oc, "acpi", + "Enable ACPI"); -+} + -+static const TypeInfo rhel_machine_info = { -+ .name = TYPE_RHEL_MACHINE, -+ .parent = TYPE_MACHINE, -+ .abstract = true, -+ .instance_size = sizeof(VirtMachineState), -+ .class_size = sizeof(VirtMachineClass), -+ .class_init = rhel_machine_class_init, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_HOTPLUG_HANDLER }, -+ { } -+ }, -+}; ++ object_class_property_add_str(oc, "x-oem-id", ++ virt_get_oem_id, ++ virt_set_oem_id); ++ object_class_property_set_description(oc, "x-oem-id", ++ "Override the default value of field OEMID " ++ "in ACPI table header." ++ "The string may be up to 6 bytes in size"); + -+static void rhel_machine_init(void) -+{ -+ type_register_static(&rhel_machine_info); ++ ++ object_class_property_add_str(oc, "x-oem-table-id", ++ virt_get_oem_table_id, ++ virt_set_oem_table_id); ++ object_class_property_set_description(oc, "x-oem-table-id", ++ "Override the default value of field OEM Table ID " ++ "in ACPI table header." ++ "The string may be up to 8 bytes in size"); +} -+type_init(rhel_machine_init); + +static void rhel_virt_instance_init(Object *obj) +{ @@ -244,22 +181,23 @@ index c908b5fcf4..21e0485ac5 100644 + + /* EL3 is disabled by default and non-configurable for RHEL */ + vms->secure = false; ++ + /* EL2 is disabled by default and non-configurable for RHEL */ + vms->virt = false; -+ /* High memory is enabled by default for RHEL */ ++ ++ /* High memory is enabled by default */ + vms->highmem = true; + object_property_add_bool(obj, "highmem", virt_get_highmem, + virt_set_highmem); + object_property_set_description(obj, "highmem", + "Set on/off to enable/disable using " + "physical address space above 32 bits"); -+ + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + object_property_add_str(obj, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_property_set_description(obj, "gic-version", + "Set GIC version. " -+ "Valid values are 2, 3 and host"); ++ "Valid values are 2, 3, host and max"); + + vms->highmem_ecam = !vmc->no_highmem_ecam; + @@ -282,44 +220,70 @@ index c908b5fcf4..21e0485ac5 100644 + "Set the IOMMU type. " + "Valid values are none and smmuv3"); + ++ /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; -+ /* MTE is disabled by default. */ ++ ++ /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + -+ vms->irqmap=a15irqmap; ++ vms->irqmap = a15irqmap; ++ + virt_flash_create(vms); ++ vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); ++ vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++ +} + -+static void rhel830_virt_instance_init(Object *obj) ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .instance_init = rhel_virt_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) +{ -+ rhel_virt_instance_init(obj); ++ type_register_static(&rhel_machine_info); +} ++type_init(rhel_machine_init); ++ ++static void rhel840_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 4, 0) + +static void rhel830_virt_options(MachineClass *mc) +{ -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + -+static void rhel820_virt_instance_init(Object *obj) -+{ -+ rhel_virt_instance_init(obj); ++ rhel840_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ vmc->no_kvm_steal_time = true; +} ++DEFINE_RHEL_MACHINE(8, 3, 0) + +static void rhel820_virt_options(MachineClass *mc) +{ + rhel830_virt_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, -+ hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); + mc->numa_mem_supported = true; + mc->auto_enable_numa_with_memdev = false; +} +DEFINE_RHEL_MACHINE(8, 2, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index aad6d69841..745b76b186 100644 +index 921416f918..6c34864a0a 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -167,9 +167,17 @@ struct VirtMachineState { +@@ -170,9 +170,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -338,5 +302,5 @@ index aad6d69841..745b76b186 100644 bool virt_is_acpi_enabled(VirtMachineState *vms); -- -2.18.4 +2.27.0 diff --git a/0009-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch similarity index 84% rename from 0009-Add-ppc64-machine-types.patch rename to 0008-Add-ppc64-machine-types.patch index bee2ba9..4504703 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 06a8855e3b36996d4478219c008986877a253674 Mon Sep 17 00:00:00 2001 +From d70214aa1d8bf7aae9ef3a6bbc04f01735722e3c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -6,56 +6,23 @@ Subject: Add ppc64 machine types Adding changes to add RHEL machine types for ppc64 architecture. Signed-off-by: Miroslav Rezanina - -Rebase changes (4.0.0): -- remove instance options and use upstream solution -- Use upstream compat handling -- Replace SPAPR_PCI_2_7_MMIO_WIN_SIZE with value (changed upstream) -- re-add handling of instance_options (removed upstream) -- Use p8 as default for rhel machine types (p9 default upstream) -- sPAPRMachineClass renamed to SpaprMachineClass (upstream) - -Rebase changes (4.1.0): -- Update format for compat structures - -Merged patches (4.0.0): -- 467d59a redhat: define pseries-rhel8.0.0 machine type - -Merged patches (4.1.0): -- f21757edc target/ppc/spapr: Enable mitigations by default for pseries-4.0 machine type -- 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 -- 89f01da redhat: define pseries-rhel8.1.0 machine type - -Merged patches (4.2.0): -- bcba728 redhat: update pseries-rhel8.1.0 machine type -- redhat: update pseries-rhel-7.6.0 machine type (patch 93039) -- redhat: define pseries-rhel8.2.0 machine type (patch 93041) - -Merged patches (5.1.0): -- eb121ff spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine (partial) - -Merged patches (5.2.0 rc0): -- 311a20f redhat: define pseries-rhel8.3.0 machine type -- 1284167 ppc: Set correct max_cpus value on spapr-rhel* machine types -- 1ab8783 redhat: update pseries-rhel8.2.0 machine type -- b162af531a target/ppc: Add experimental option for enabling secure guests --- - hw/ppc/spapr.c | 337 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr.c | 368 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 ++ include/hw/ppc/spapr.h | 4 + target/ppc/compat.c | 13 +- target/ppc/cpu.h | 1 + - target/ppc/kvm.c | 27 ++++ + target/ppc/kvm.c | 27 +++ target/ppc/kvm_ppc.h | 13 ++ - 7 files changed, 407 insertions(+), 1 deletion(-) + 7 files changed, 438 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 12a012d9dd..4a838cc955 100644 +index e4be00b732..f9e8dfdfc9 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1585,6 +1585,9 @@ static void spapr_machine_reset(MachineState *machine) +@@ -1568,6 +1568,9 @@ static void spapr_machine_reset(MachineState *machine) - kvmppc_svm_off(&error_fatal); + pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); + if (spapr->svm_allowed) { + kvmppc_svm_allow(&error_fatal); @@ -63,7 +30,7 @@ index 12a012d9dd..4a838cc955 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3266,6 +3269,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3254,6 +3257,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -84,7 +51,7 @@ index 12a012d9dd..4a838cc955 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3321,6 +3338,12 @@ static void spapr_instance_init(Object *obj) +@@ -3327,6 +3344,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -97,7 +64,7 @@ index 12a012d9dd..4a838cc955 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4459,6 +4482,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4554,6 +4577,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->smp_threads_vsmt = true; smc->nr_xirqs = SPAPR_NR_XIRQS; xfc->match_nvt = spapr_match_nvt; @@ -105,15 +72,15 @@ index 12a012d9dd..4a838cc955 100644 } static const TypeInfo spapr_machine_info = { -@@ -4509,6 +4533,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4604,6 +4628,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-5.2 + * pseries-6.0 */ -@@ -4588,6 +4613,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4694,6 +4719,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -121,36 +88,63 @@ index 12a012d9dd..4a838cc955 100644 /* * pseries-4.0 -@@ -4604,6 +4630,7 @@ static void phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4713,6 +4739,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; + return true; } - ++ +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4762,6 +4789,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4871,6 +4899,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ +#endif - static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4816,6 +4844,7 @@ static void phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, - *nv2atsd = 0; +@@ -4926,6 +4955,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + return true; } +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4930,6 +4959,314 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5040,6 +5070,344 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); +#endif + ++static void spapr_machine_rhel_default_class_options(MachineClass *mc) ++{ ++ /* ++ * Defaults for the latest behaviour inherited from the base class ++ * can be overriden here for all pseries-rhel* machines. ++ */ ++ ++ /* Maximum supported VCPU count */ ++ mc->max_cpus = 384; ++} ++ ++/* ++ * pseries-rhel8.4.0 ++ * like pseries-5.2 ++ */ ++ ++static void spapr_machine_rhel840_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", true); ++ +/* + * pseries-rhel8.3.0 + * like pseries-5.1 @@ -158,13 +152,17 @@ index 12a012d9dd..4a838cc955 100644 + +static void spapr_machine_rhel830_class_options(MachineClass *mc) +{ -+ /* Defaults for the latest behaviour inherited from the base class */ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + -+ /* Maximum supported VCPU count for all pseries-rhel* machines */ -+ mc->max_cpus = 384; ++ spapr_machine_rhel840_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ ++ /* from pseries-5.1 */ ++ smc->pre_5_2_numa_associativity = true; +} + -+DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false); + +/* + * pseries-rhel8.2.0 @@ -461,7 +459,7 @@ index 12a012d9dd..4a838cc955 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 55d36e0069..008074bae0 100644 +index 64178f0f9a..2bff13a6ab 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -499,18 +497,18 @@ index 55d36e0069..008074bae0 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 2e89e36cfb..ba2d81404b 100644 +index bf7cab7a2c..54cdde8980 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -140,6 +140,7 @@ struct SpaprMachineClass { +@@ -143,6 +143,7 @@ struct SpaprMachineClass { bool pre_5_1_assoc_refpoints; bool pre_5_2_numa_associativity; + bool has_power9_support; - void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, + bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -220,6 +221,9 @@ struct SpaprMachineState { +@@ -223,6 +224,9 @@ struct SpaprMachineState { int fwnmi_machine_check_interlock; QemuCond fwnmi_machine_check_interlock_cond; @@ -546,7 +544,7 @@ index 7949a24f5a..f207a9ba01 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 2609e4082e..21c63b5360 100644 +index e73416da68..4eb427a601 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1347,6 +1347,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) @@ -558,7 +556,7 @@ index 2609e4082e..21c63b5360 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index daf690a678..9bf3449adb 100644 +index 104a308abb..cb0fb67383 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -89,6 +89,7 @@ static int cap_ppc_count_cache_flush_assist; @@ -577,7 +575,7 @@ index daf690a678..9bf3449adb 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2538,6 +2540,16 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) +@@ -2551,6 +2553,16 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) return 0; } @@ -594,9 +592,9 @@ index daf690a678..9bf3449adb 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2947,3 +2959,18 @@ void kvmppc_svm_off(Error **errp) - error_setg_errno(errp, -rc, "KVM_PPC_SVM_OFF ioctl failed"); - } +@@ -2947,3 +2959,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; } + +void kvmppc_svm_allow(Error **errp) @@ -614,18 +612,18 @@ index daf690a678..9bf3449adb 100644 + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index 73ce2bc951..1239b841fd 100644 +index 989f61ace0..2e7a5d3fc1 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h -@@ -40,6 +40,7 @@ target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, +@@ -39,6 +39,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); + target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, bool radix, bool gtse, uint64_t proc_tbl); - void kvmppc_svm_off(Error **errp); +void kvmppc_svm_allow(Error **errp); #ifndef CONFIG_USER_ONLY bool kvmppc_spapr_use_multitce(void); int kvmppc_spapr_enable_inkernel_multitce(void); -@@ -73,6 +74,8 @@ int kvmppc_set_cap_nested_kvm_hv(int enable); +@@ -72,6 +73,8 @@ int kvmppc_set_cap_nested_kvm_hv(int enable); int kvmppc_get_cap_large_decr(void); int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); int kvmppc_enable_hwrng(void); @@ -634,7 +632,7 @@ index 73ce2bc951..1239b841fd 100644 int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); void kvmppc_check_papr_resize_hpt(Error **errp); -@@ -387,6 +390,16 @@ static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) +@@ -381,6 +384,16 @@ static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) return -1; } @@ -652,5 +650,5 @@ index 73ce2bc951..1239b841fd 100644 { return -1; -- -2.18.4 +2.27.0 diff --git a/0010-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch similarity index 76% rename from 0010-Add-s390x-machine-types.patch rename to 0009-Add-s390x-machine-types.patch index 606a004..c768dd7 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 36540969ad3b08f1964c71406f1fc14c0e5b47de Mon Sep 17 00:00:00 2001 +From 09eba380295aef0a27d3fbcdda43019ab2898e08 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -6,29 +6,15 @@ Subject: Add s390x machine types Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina - -Rebase changes (weekly-4.1.0): -- Use upstream compat handling - -Merged patches (3.1.0): -- 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later - -Merged patches (4.1.0): -- 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 - -Merged patches (4.2.0): -- fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 -- a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine -- hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) --- - hw/s390x/s390-virtio-ccw.c | 71 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 70 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 87 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 4e140bbead..b8dde7e4e1 100644 +index 2972b607f3..8df6dd1c71 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -765,7 +765,7 @@ bool css_migration_enabled(void) +@@ -771,7 +771,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -37,29 +23,44 @@ index 4e140bbead..b8dde7e4e1 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = true; \ -@@ -789,6 +789,7 @@ bool css_migration_enabled(void) +@@ -795,6 +795,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_5_2_instance_options(MachineState *machine) + static void ccw_machine_6_0_instance_options(MachineState *machine) { } -@@ -1053,6 +1054,74 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1071,6 +1072,90 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + ++static void ccw_machine_rhel840_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel840_class_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", true); ++ +static void ccw_machine_rhel820_instance_options(MachineState *machine) +{ ++ ccw_machine_rhel840_instance_options(machine); +} + +static void ccw_machine_rhel820_class_options(MachineClass *mc) +{ ++ ccw_machine_rhel840_class_options(mc); + mc->fixup_ram_size = s390_fixup_ram_size; ++ /* we did not publish a rhel8.3.0 machine */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); +} -+DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", true); ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false); + +static void ccw_machine_rhel760_instance_options(MachineState *machine) +{ @@ -81,6 +82,7 @@ index 4e140bbead..b8dde7e4e1 100644 +{ + ccw_machine_rhel820_class_options(mc); + /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); + compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); +} @@ -121,5 +123,5 @@ index 4e140bbead..b8dde7e4e1 100644 static void ccw_machine_register_types(void) { -- -2.18.4 +2.27.0 diff --git a/0011-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch similarity index 88% rename from 0011-Add-x86_64-machine-types.patch rename to 0010-Add-x86_64-machine-types.patch index 63656ab..28de463 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 004d31cf0e8bb83374a85ecab59eb22683a1e361 Mon Sep 17 00:00:00 2001 +From a082c53cc14afcd2ad77262575af50e164e75649 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -6,63 +6,23 @@ Subject: Add x86_64 machine types Adding changes to add RHEL machine types for x86_64 architecture. Signed-off-by: Miroslav Rezanina - -Rebase changes (qemu-4.0.0): -- Use upstream compat handling - -Rebase notes (3.1.0): -- Removed xsave changes - -Rebase notes (4.1.0): -- Updated format for compat structures - -Rebase notes (4.2.0-rc2): -- Use X86MachineClass for save_tsc_khz (upstream change) - -Merged patches (4.1.0): -- f4dc802 pc: 7.5 compat entries -- 456ed3e pc: PC_RHEL7_6_COMPAT -- 04119ee pc: Add compat for pc-i440fx-rhel7.6.0 machine type -- b3b3687 pc: Add pc-q35-8.0.0 machine type -- 8d46fc6 pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT -- 1de7949 kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types -- 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) -- 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types - -Merged patches (4.2.0): -- 7d5c2ef pc: Don't make die-id mandatory unless necessary -- e42808c x86 machine types: pc_rhel_8_0_compat -- 9de83a8 x86 machine types: q35: Fixup units_per_default_bus -- 6df1559 x86 machine types: Fixup dynamic sysbus entries -- 0784125 x86 machine types: add pc-q35-rhel8.1.0 -- machines/x86: Add rhel 8.2 machine type (patch 92959) - -Merged patches (5.1.0): -- 481357e RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support -- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) - -Merged patches (5.2.0 rc0): -- b02c9f5 x86: Add 8.3.0 x86_64 machine type -- f2edc4f q35: Set max_cpus to 512 -- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) -- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) -- e2d3209 x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' features (partialy) --- - hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 273 ++++++++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 215 +++++++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 185 ++++++++++++++++++++++++++++- - include/hw/boards.h | 2 + - include/hw/i386/pc.h | 36 ++++++ - target/i386/cpu.c | 3 +- - target/i386/kvm.c | 4 + - 8 files changed, 714 insertions(+), 7 deletions(-) + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 277 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 225 +++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 214 +++++++++++++++++++++++++++- + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 39 ++++++ + target/i386/cpu.c | 3 +- + target/i386/kvm/kvm.c | 4 + + tests/qtest/pvpanic-test.c | 5 +- + 9 files changed, 763 insertions(+), 9 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 1f5c211245..b1082bd412 100644 +index de98750aef..7bd67f7877 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c -@@ -217,6 +217,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) +@@ -231,6 +231,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->fadt.reset_reg = r; pm->fadt.reset_val = 0xf; pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; @@ -73,10 +33,10 @@ index 1f5c211245..b1082bd412 100644 pm->smi_on_cpuhp = !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 17b514d1da..f3fc695fe2 100644 +index 8a84b25a03..edc02a68ca 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -352,6 +352,271 @@ GlobalProperty pc_compat_1_4[] = { +@@ -355,6 +355,275 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -91,11 +51,15 @@ index 17b514d1da..f3fc695fe2 100644 + { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, -+ /* BZ 1846886 */ -+ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_3_compat[] = { ++ /* pc_rhel_8_3_compat from pc_compat_5_1 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, ++}; ++const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat); ++ +GlobalProperty pc_rhel_8_2_compat[] = { + /* pc_rhel_8_2_compat from pc_compat_4_2 */ + { "mch", "smbase-smram", "off" }, @@ -348,7 +312,7 @@ index 17b514d1da..f3fc695fe2 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -970,7 +1235,8 @@ void pc_memory_init(PCMachineState *pcms, +@@ -952,7 +1221,8 @@ void pc_memory_init(PCMachineState *pcms, option_rom_mr = g_malloc(sizeof(*option_rom_mr)); memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, &error_fatal); @@ -358,7 +322,7 @@ index 17b514d1da..f3fc695fe2 100644 memory_region_set_readonly(option_rom_mr, true); } memory_region_add_subregion_overlap(rom_memory, -@@ -1674,6 +1940,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1702,6 +1972,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -367,7 +331,7 @@ index 17b514d1da..f3fc695fe2 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1685,7 +1953,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1713,7 +1985,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->default_boot_order = "cad"; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -378,7 +342,7 @@ index 17b514d1da..f3fc695fe2 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 9fcc5aaf69..815da79108 100644 +index 62433d8022..d9c5df16d8 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -54,6 +54,7 @@ @@ -408,7 +372,7 @@ index 9fcc5aaf69..815da79108 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -1007,3 +1009,212 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -927,3 +929,222 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -421,8 +385,9 @@ index 9fcc5aaf69..815da79108 100644 +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + m->family = "pc_piix_Y"; -+ m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + pcmc->default_nic_model = "e1000"; ++ pcmc->pci_root_uid = 0; + m->default_display = "std"; + m->no_parallel = 1; + m->numa_mem_supported = true; @@ -448,6 +413,15 @@ index 9fcc5aaf69..815da79108 100644 + m->smbus_no_migration_support = true; + pcmc->pvh_enabled = false; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ pcmc->kvmclock_create_always = false; ++ /* From pc_i440fx_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_2, + hw_compat_rhel_8_2_len); + compat_props_add(m->compat_props, pc_rhel_8_2_compat, @@ -622,7 +596,7 @@ index 9fcc5aaf69..815da79108 100644 +DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, + pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index f6c2ef4e43..3340008c00 100644 +index fce52ca70b..44109e4876 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -195,8 +195,8 @@ static void pc_q35_init(MachineState *machine) @@ -644,7 +618,7 @@ index f6c2ef4e43..3340008c00 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -568,3 +569,183 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -581,3 +582,212 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -657,9 +631,10 @@ index f6c2ef4e43..3340008c00 100644 +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pcmc->default_nic_model = "e1000e"; ++ pcmc->pci_root_uid = 0; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; -+ m->default_machine_opts = "firmware=bios-256k.bin"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + m->default_display = "std"; + m->no_floppy = 1; + m->no_parallel = 1; @@ -668,10 +643,30 @@ index f6c2ef4e43..3340008c00 100644 + machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); + machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); + m->alias = "q35"; -+ m->max_cpus = 512; ++ m->max_cpus = 710; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel840(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel840_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.4.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, ++ pc_q35_machine_rhel840_options); ++ ++ +static void pc_q35_init_rhel830(MachineState *machine) +{ + pc_q35_init(machine); @@ -680,10 +675,19 @@ index f6c2ef4e43..3340008c00 100644 +static void pc_q35_machine_rhel830_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel840_options(m); + m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.3.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->kvmclock_create_always = false; ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; +} + +DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, @@ -697,9 +701,8 @@ index f6c2ef4e43..3340008c00 100644 +static void pc_q35_machine_rhel820_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel830_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; + m->numa_mem_supported = true; + m->auto_enable_numa_with_memdev = false; + pcmc->smbios_stream_product = "RHEL-AV"; @@ -829,10 +832,10 @@ index f6c2ef4e43..3340008c00 100644 +DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, + pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index dd18c9e94d..4e4a54b313 100644 +index 2d7a65724a..90ae100bfc 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -202,6 +202,8 @@ struct MachineClass { +@@ -243,6 +243,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -842,10 +845,10 @@ index dd18c9e94d..4e4a54b313 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index ae6bf1d209..e2ba9a4b58 100644 +index 93c012ac95..79a7803a2f 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -125,6 +125,9 @@ struct PCMachineClass { +@@ -128,6 +128,9 @@ struct PCMachineClass { /* create kvmclock device even when KVM PV features are not exposed */ bool kvmclock_create_always; @@ -855,13 +858,16 @@ index ae6bf1d209..e2ba9a4b58 100644 }; #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -266,6 +269,39 @@ extern const size_t pc_compat_1_5_len; +@@ -275,6 +278,42 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_3_compat[]; ++extern const size_t pc_rhel_8_3_compat_len; ++ +extern GlobalProperty pc_rhel_8_2_compat[]; +extern const size_t pc_rhel_8_2_compat_len; + @@ -896,10 +902,10 @@ index ae6bf1d209..e2ba9a4b58 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 5a8c96072e..dc592e990e 100644 +index ad99cad0e7..c30bb2a6b0 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1803,7 +1803,7 @@ static X86CPUDefinition builtin_x86_defs[] = { +@@ -1882,7 +1882,7 @@ static X86CPUDefinition builtin_x86_defs[] = { .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 6, @@ -908,7 +914,7 @@ index 5a8c96072e..dc592e990e 100644 .stepping = 3, .features[FEAT_1_EDX] = PPRO_FEATURES | -@@ -4117,6 +4117,7 @@ static PropValue kvm_default_props[] = { +@@ -4264,6 +4264,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -916,11 +922,11 @@ index 5a8c96072e..dc592e990e 100644 { NULL, NULL }, }; -diff --git a/target/i386/kvm.c b/target/i386/kvm.c -index a2934dda02..19bc39b9e3 100644 ---- a/target/i386/kvm.c -+++ b/target/i386/kvm.c -@@ -3126,6 +3126,7 @@ static int kvm_get_msrs(X86CPU *cpu) +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 7fe9f52710..4c69c2cb4b 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3181,6 +3181,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -928,7 +934,7 @@ index a2934dda02..19bc39b9e3 100644 kvm_msr_buf_reset(cpu); -@@ -3438,6 +3439,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3499,6 +3500,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -938,6 +944,29 @@ index a2934dda02..19bc39b9e3 100644 break; case MSR_KVM_ASYNC_PF_INT: env->async_pf_int_msr = msrs[i].data; +diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c +index 6dcad2db49..580c2c43d2 100644 +--- a/tests/qtest/pvpanic-test.c ++++ b/tests/qtest/pvpanic-test.c +@@ -17,7 +17,7 @@ static void test_panic_nopause(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=none"); ++ qts = qtest_init("-M q35 -device pvpanic -action panic=none"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +@@ -40,7 +40,8 @@ static void test_panic(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=pause"); ++ /* RHEL: Use q35 */ ++ qts = qtest_init("-M q35 -device pvpanic -action panic=pause"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); -- -2.18.4 +2.27.0 diff --git a/0012-Enable-make-check.patch b/0011-Enable-make-check.patch similarity index 65% rename from 0012-Enable-make-check.patch rename to 0011-Enable-make-check.patch index 906bb4e..b3af9a8 100644 --- a/0012-Enable-make-check.patch +++ b/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 7b8ca8c1cbd3763900e3e472556116c9832e06f8 Mon Sep 17 00:00:00 2001 +From 5f6a55a218029af944a8d02ab9264647315890d3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -7,88 +7,87 @@ Fixing tests after device disabling and machine types changes and enabling make check run during build. Signed-off-by: Miroslav Rezanina - -Rebase changes (4.0.0): -- Remove testing for pseries-2.7 in endianess test -- Disable device-plug-test on s390x as it use disabled device -- Do not run cpu-plug-tests on 7.3 and older machine types - -Rebase changes (4.1.0-rc0): -- removed iotests 068 - -Rebase changes (4.1.0-rc1): -- remove all 205 tests (unstable) - -Rebase changes (4.2.0-rc0): -- partially disable hd-geo-test (requires lsi53c895a) - -Rebase changes (5.1.0-rc1): -- Disable qtest/q35-test (uses upstream machine types) -- Do not run iotests on make checka -- Enabled iotests 071 and 099 - -Rebase changes (5.2.0 rc0): -- Disable cdrom tests (unsupported devices) on x86_64 -- disable fuzz test - -Merged patches (4.0.0): -- f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce - -Merged patches (4.1.0-rc0): -- 41288ff redhat: Remove raw iotest 205 - -Conflicts: - redhat/qemu-kvm.spec.template --- - redhat/qemu-kvm.spec.template | 4 ++-- - tests/qemu-iotests/051 | 12 ++++++------ + redhat/qemu-kvm.spec.template | 6 ++---- + tests/qemu-iotests/051 | 8 ++++---- + tests/qtest/bios-tables-test.c | 6 +++--- tests/qtest/boot-serial-test.c | 6 +++++- - tests/qtest/cdrom-test.c | 2 ++ + tests/qtest/cdrom-test.c | 4 ++++ tests/qtest/cpu-plug-test.c | 4 ++-- tests/qtest/e1000-test.c | 2 ++ + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- tests/qtest/hd-geo-test.c | 4 ++++ - tests/qtest/meson.build | 10 ++-------- + tests/qtest/libqos/meson.build | 2 +- + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 11 +++-------- tests/qtest/prom-env-test.c | 4 ++++ tests/qtest/test-x86-cpuid-compat.c | 2 ++ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - 11 files changed, 35 insertions(+), 19 deletions(-) + tests/unit/meson.build | 2 +- + 17 files changed, 44 insertions(+), 27 deletions(-) diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index bee26075b2..61d25c4ed7 100755 +index 7bf29343d7..fd63402d78 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 -@@ -183,11 +183,11 @@ run_qemu -drive if=virtio +@@ -174,9 +174,9 @@ run_qemu -drive if=virtio case "$QEMU_DEFAULT_MACHINE" in pc) run_qemu -drive if=none,id=disk -device ide-cd,drive=disk - run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk +# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive if=none,id=disk -device ide-drive,drive=disk run_qemu -drive if=none,id=disk -device ide-hd,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk - run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk +# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ;; *) ;; -@@ -236,11 +236,11 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on +@@ -225,9 +225,9 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on case "$QEMU_DEFAULT_MACHINE" in pc) run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk +# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-drive,drive=disk run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk +# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ;; *) ;; +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 156d4174aa..b4a1074b77 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1299,7 +1299,7 @@ static void test_acpi_virt_tcg_numamem(void) + free_test_data(&data); + + } +- ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_acpi_virt_tcg_pxb(void) + { + test_data data = { +@@ -1331,7 +1331,7 @@ static void test_acpi_virt_tcg_pxb(void) + + free_test_data(&data); + } +- ++#endif + static void test_acpi_tcg_acpi_hmat(const char *machine) + { + test_data data; +@@ -1561,7 +1561,7 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/virt", test_acpi_virt_tcg); + qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); + qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); +- qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); ++/* qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); */ + qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt); + } + ret = g_test_run(); diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c -index b6b1c23cd0..cefa1b38b7 100644 +index d74509b1c5..a64c55e384 100644 --- a/tests/qtest/boot-serial-test.c +++ b/tests/qtest/boot-serial-test.c @@ -120,19 +120,23 @@ static testdef_t tests[] = { @@ -117,7 +116,7 @@ index b6b1c23cd0..cefa1b38b7 100644 { "sparc", "LX", "", "TMS390S10" }, { "sparc", "SS-4", "", "MB86904" }, diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c -index 5af944a5fb..cd5b8e0f16 100644 +index 5af944a5fb..69d9bac38a 100644 --- a/tests/qtest/cdrom-test.c +++ b/tests/qtest/cdrom-test.c @@ -140,6 +140,7 @@ static void add_x86_tests(void) @@ -136,6 +135,22 @@ index 5af944a5fb..cd5b8e0f16 100644 } static void add_s390x_tests(void) +@@ -220,6 +222,7 @@ int main(int argc, char **argv) + "magnum", "malta", "pica61", NULL + }; + add_cdrom_param_tests(mips64machines); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } else if (g_str_equal(arch, "arm") || g_str_equal(arch, "aarch64")) { + const char *armmachines[] = { + "realview-eb", "realview-eb-mpcore", "realview-pb-a8", +@@ -227,6 +230,7 @@ int main(int argc, char **argv) + "vexpress-a9", "virt", NULL + }; + add_cdrom_param_tests(armmachines); ++#endif + } else { + const char *nonemachine[] = { "none", NULL }; + add_cdrom_param_tests(nonemachine); diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c index a1c689414b..a8f076711c 100644 --- a/tests/qtest/cpu-plug-test.c @@ -167,6 +182,32 @@ index ea286d1793..a1847ac8ed 100644 }; static void *e1000_get_driver(void *obj, const char *interface) +diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c +index 66229e6096..947fba73b7 100644 +--- a/tests/qtest/fuzz-e1000e-test.c ++++ b/tests/qtest/fuzz-e1000e-test.c +@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) + { + QTestState *s; + +- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); ++ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0"); + + qtest_outl(s, 0xcf8, 0x80001010); + qtest_outl(s, 0xcfc, 0xe1020000); +diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c +index aaf6d10e18..43727d62ac 100644 +--- a/tests/qtest/fuzz-virtio-scsi-test.c ++++ b/tests/qtest/fuzz-virtio-scsi-test.c +@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " + "-device virtio-scsi,num_queues=8,addr=03.0 "); + + qtest_outl(s, 0xcf8, 0x80001811); diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c index f7b7cfbc2d..99cccf8638 100644 --- a/tests/qtest/hd-geo-test.c @@ -199,20 +240,45 @@ index f7b7cfbc2d..99cccf8638 100644 qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); qtest_add_func("hd-geo/override/scsi_hot_unplug", +diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build +index 1cddf5bdaa..2f4a564105 100644 +--- a/tests/qtest/libqos/meson.build ++++ b/tests/qtest/libqos/meson.build +@@ -41,7 +41,7 @@ libqos_srcs = files('../libqtest.c', + 'virtio-serial.c', + + # qgraph machines: +- 'aarch64-xlnx-zcu102-machine.c', ++# 'aarch64-xlnx-zcu102-machine.c', + 'arm-imx25-pdk-machine.c', + 'arm-n800-machine.c', + 'arm-raspi2-machine.c', +diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c +index fe0bef9980..7a9d51579b 100644 +--- a/tests/qtest/lpc-ich9-test.c ++++ b/tests/qtest/lpc-ich9-test.c +@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.0 " ++ s = qtest_init("-M pc-q35-rhel8.4.0 " + "-nographic -monitor none -serial none"); + + qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index c19f1c8503..15ed460ff0 100644 +index 0c76738921..b9a7426a7b 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -51,16 +51,13 @@ qtests_i386 = \ +@@ -71,7 +71,6 @@ qtests_i386 = \ 'ide-test', 'hd-geo-test', 'boot-order-test', - 'bios-tables-test', 'rtc-test', 'i440fx-test', -- 'fuzz-test', 'fw_cfg-test', - 'device-plug-test', +@@ -79,7 +78,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -220,7 +286,7 @@ index c19f1c8503..15ed460ff0 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -111,17 +108,15 @@ qtests_moxie = [ 'boot-serial-test' ] +@@ -130,17 +128,15 @@ qtests_moxie = [ 'boot-serial-test' ] qtests_ppc = \ (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) + \ @@ -240,7 +306,16 @@ index c19f1c8503..15ed460ff0 100644 qtests_pci + ['migration-test', 'numa-test', 'cpu-plug-test', 'drive_del-test'] qtests_sh4 = (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) -@@ -164,7 +159,6 @@ qtests_s390x = \ +@@ -183,7 +179,7 @@ qtests_aarch64 = \ + ['arm-cpu-features', + 'numa-test', + 'boot-serial-test', +- 'xlnx-can-test', ++# 'xlnx-can-test', + 'migration-test'] + + qtests_s390x = \ +@@ -192,7 +188,6 @@ qtests_s390x = \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ ['boot-serial-test', 'drive_del-test', @@ -268,11 +343,11 @@ index f41d80154a..f8dc478ce8 100644 add_tests(sparc_machines); } else if (!strcmp(arch, "sparc64")) { diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c -index 7ca1883a29..983aa0719a 100644 +index f28848e06e..6b2fd398a2 100644 --- a/tests/qtest/test-x86-cpuid-compat.c +++ b/tests/qtest/test-x86-cpuid-compat.c @@ -300,6 +300,7 @@ int main(int argc, char **argv) - "-cpu 486,xlevel2=0xC0000002,+xstore", + "-cpu 486,xlevel2=0xC0000002,xstore=on", "xlevel2", 0xC0000002); +#if 0 /* Disabled in Red Hat Enterprise Linux */ @@ -281,7 +356,7 @@ index 7ca1883a29..983aa0719a 100644 @@ -350,6 +351,7 @@ int main(int argc, char **argv) add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", - "-machine pc-i440fx-2.4 -cpu SandyBridge,+svm,+npt", + "-machine pc-i440fx-2.4 -cpu SandyBridge,svm=on,npt=on", "xlevel", 0x80000008); +#endif @@ -317,6 +392,19 @@ index 10ef9d2a91..3855873050 100644 qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); qtest_start("-device nec-usb-xhci,id=xhci" +diff --git a/tests/unit/meson.build b/tests/unit/meson.build +index b3bc2109da..244d35f5d4 100644 +--- a/tests/unit/meson.build ++++ b/tests/unit/meson.build +@@ -65,7 +65,7 @@ if have_block + 'test-blockjob': [testblock], + 'test-blockjob-txn': [testblock], + 'test-block-backend': [testblock], +- 'test-block-iothread': [testblock], ++# 'test-block-iothread': [testblock], + 'test-write-threshold': [testblock], + 'test-crypto-hash': [crypto], + 'test-crypto-hmac': [crypto], -- -2.18.4 +2.27.0 diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 85% rename from 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 9575257..45abe27 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From da70823afbdbb904950068fe5f0323ff75b0d4fc Mon Sep 17 00:00:00 2001 +From 22c0f47f02c5db63f3857dabc6cc7cb6bfc78158 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -23,25 +23,16 @@ matches the number of slots on a PCI bus and is also a nice power of two. Signed-off-by: Bandan Das - -Rebase notes (2.8.0): -- removed return value for vfio_realize (commit 1a22aca) - -Merged patches (2.9.0): -- 17eb774 vfio: Use error_setg when reporting max assigned device overshoot - - Merged patches (4.1.0-rc3): -- 2b89558 vfio: increase the cap on number of assigned devices to 64 --- hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 51dc373695..06ce2a39aa 100644 +index 5c65aa0a98..327b86703a 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -45,6 +45,9 @@ +@@ -46,6 +46,9 @@ #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" @@ -51,7 +42,7 @@ index 51dc373695..06ce2a39aa 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2768,9 +2771,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2783,9 +2786,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -83,7 +74,7 @@ index 51dc373695..06ce2a39aa 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3207,6 +3231,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3222,6 +3246,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -94,7 +85,7 @@ index 51dc373695..06ce2a39aa 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 1574ef983f..fef907c112 100644 +index 64777516d1..e0fe6ca97e 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -139,6 +139,7 @@ struct VFIOPCIDevice { @@ -106,5 +97,5 @@ index 1574ef983f..fef907c112 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.18.4 +2.27.0 diff --git a/0014-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch similarity index 88% rename from 0014-Add-support-statement-to-help-output.patch rename to 0013-Add-support-statement-to-help-output.patch index 04d89d8..8739e82 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From f69c3b855ec419b4afe240bbd039141a59aad808 Mon Sep 17 00:00:00 2001 +From ffd8eff2ce1d7eda81d425324593924c098f6c39 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index e6e0ad5a92..065d52e8dc 100644 +index aadb526138..6c8498022b 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -1688,9 +1688,17 @@ static void version(void) +@@ -848,9 +848,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index e6e0ad5a92..065d52e8dc 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -1707,6 +1715,7 @@ static void help(int exitcode) +@@ -867,6 +875,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -51,5 +51,5 @@ index e6e0ad5a92..065d52e8dc 100644 } -- -2.18.4 +2.27.0 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0014-globally-limit-the-maximum-number-of-CPUs.patch similarity index 63% rename from 0015-globally-limit-the-maximum-number-of-CPUs.patch rename to 0014-globally-limit-the-maximum-number-of-CPUs.patch index 4a65df5..b44ad7c 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0014-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 9585c8927744d8b07b317063ef788e1f01773f0e Mon Sep 17 00:00:00 2001 +From b5dab6e678d9b53359b3a915421114258e803cad Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -13,35 +13,15 @@ default and minimize the ppc hack in kvm-all.c. Signed-off-by: David Hildenbrand Signed-off-by: Miroslav Rezanina Signed-off-by: Danilo Cesar Lemes de Paula - -Rebase notes (2.11.0): -- Removed CONFIG_RHV reference -- Update commit log - -Merged patches (2.11.0): -- 92fef14623 redhat: remove manual max_cpus limitations for ppc -- bb722e9eff redhat: globally limit the maximum number of CPUs -- fdeef3c1c7 RHEL: Set vcpus hard limit to 240 for Power -- 0584216921 Match POWER max cpus to x86 - -Signed-off-by: Andrew Jones - -Merged patches (5.1.0): -- redhat: globally limit the maximum number of CPUs -- redhat: remove manual max_cpus limitations for ppc -- use recommended max vcpu count - -Merged patches (5.2.0 rc0): -- f8a4123 vl: Remove downstream-only MAX_RHEL_CPUS code --- accel/kvm/kvm-all.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index baaa54249d..a1fbda0945 100644 +index b6d9f92f15..70a94ba76d 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -2108,6 +2108,18 @@ static int kvm_init(MachineState *ms) +@@ -2095,6 +2095,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -61,5 +41,5 @@ index baaa54249d..a1fbda0945 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " -- -2.18.4 +2.27.0 diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch similarity index 91% rename from 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename to 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 67f920a..599e101 100644 --- a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 091f9e47dc4609bfded5474cfe2797777cdd56f1 Mon Sep 17 00:00:00 2001 +From 55fde02ee1a9aa0e812af8534a9adf553accc522 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -16,12 +16,6 @@ We change the name and location of qemu-kvm binaries. Update documentation to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina - -Rebase notes (5.1.0 rc0): - - qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) - -Rebase notes (5.2.0 rc0): - - rewrite patch to new docs structure --- docs/defs.rst.inc | 4 ++-- docs/interop/live-block-operations.rst | 4 ++-- @@ -31,7 +25,7 @@ Rebase notes (5.2.0 rc0): 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc -index 48d05aaf33..d74dbdeca9 100644 +index 52d6454b93..d74dbdeca9 100644 --- a/docs/defs.rst.inc +++ b/docs/defs.rst.inc @@ -9,7 +9,7 @@ @@ -39,13 +33,13 @@ index 48d05aaf33..d74dbdeca9 100644 incorrectly in boldface. -.. |qemu_system| replace:: qemu-system-x86_64 --.. |qemu_system_x86| replace:: qemu_system-x86_64 +-.. |qemu_system_x86| replace:: qemu-system-x86_64 +.. |qemu_system| replace:: qemu-kvm +.. |qemu_system_x86| replace:: qemu-kvm .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst -index e13f5a21f8..6650b2c975 100644 +index 1073b930dc..881432253f 100644 --- a/docs/interop/live-block-operations.rst +++ b/docs/interop/live-block-operations.rst @@ -129,7 +129,7 @@ To show some example invocations of command-line, we will use the @@ -125,10 +119,10 @@ index fb70445c75..0d9a783112 100644 See also -------- diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst -index 866b7db3ee..5b3be8a6d6 100644 +index 00554c75bd..6e0fc94005 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst -@@ -297,7 +297,7 @@ Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket +@@ -301,7 +301,7 @@ Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket :: host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001 @@ -138,10 +132,10 @@ index 866b7db3ee..5b3be8a6d6 100644 -device vhost-user-fs-pci,chardev=char0,tag=myfs \ -object memory-backend-memfd,id=mem,size=4G,share=on \ diff --git a/qemu-options.hx b/qemu-options.hx -index 363a15b4e8..5e5e265331 100644 +index 0d4fb61bf7..79ca09feac 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2935,11 +2935,11 @@ SRST +@@ -3011,11 +3011,11 @@ SRST :: @@ -159,5 +153,5 @@ index 363a15b4e8..5e5e265331 100644 ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` Establish a vhost-vdpa netdev. -- -2.18.4 +2.27.0 diff --git a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch similarity index 92% rename from 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename to 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 7ab1831..cd00b4f 100644 --- a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 4d69dc90e66deec6bc6b46074ee44ef8c902266b Mon Sep 17 00:00:00 2001 +From 2ab1a61510036bd409532f24ea14fa693ec0362c Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -42,11 +42,11 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 3db9a8aae9..82c025146d 100644 +index 6d80730287..bba3d75707 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c -@@ -823,6 +823,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - SCSIDevice *sd = SCSI_DEVICE(dev); +@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; int ret; + /* XXX: Remove this check once block backend is capable of handling @@ -62,5 +62,5 @@ index 3db9a8aae9..82c025146d 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -2.18.4 +2.27.0 diff --git a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch similarity index 92% rename from 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename to 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 6c16c93..2e670a2 100644 --- a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 18c5a8c24e22b7c2ba9f7e26cac190cefc7ecf26 Mon Sep 17 00:00:00 2001 +From 7b3d9142f3b296b127dce35336765dc16265d155 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,10 +32,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 9341e9782a..f11428eae9 100644 +index 9ea7ddd1e9..1338b677d2 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c -@@ -333,12 +333,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, +@@ -332,12 +332,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { @@ -56,5 +56,5 @@ index 9341e9782a..f11428eae9 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -2.18.4 +2.27.0 diff --git a/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch similarity index 72% rename from 0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch rename to 0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch index c644891..b5e8f92 100644 --- a/0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ b/0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch @@ -1,4 +1,4 @@ -From 989cfded8fdd5df3b6b1f1a304ca16c128d7561b Mon Sep 17 00:00:00 2001 +From acdc84c1077be7d347414f781014ea785ce41d7b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 13 Mar 2020 12:34:32 +0000 Subject: block: Versioned x-blockdev-reopen API with feature flag @@ -26,13 +26,14 @@ Signed-off-by: Kevin Wolf Signed-off-by: Danilo C. L. de Paula --- qapi/block-core.json | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) + scripts/qapi/expr.py | 2 +- + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json -index 04ad80bc1e..2a7dca299f 100644 +index 6d227924d0..15ad8cee05 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json -@@ -4143,10 +4143,17 @@ +@@ -4166,10 +4166,17 @@ # image does not have a default backing file name as part of its # metadata. # @@ -51,6 +52,19 @@ index 04ad80bc1e..2a7dca299f 100644 ## # @blockdev-del: +diff --git a/scripts/qapi/expr.py b/scripts/qapi/expr.py +index 540b3982b1..884874d205 100644 +--- a/scripts/qapi/expr.py ++++ b/scripts/qapi/expr.py +@@ -215,7 +215,7 @@ def check_features(features, info): + check_keys(f, info, source, ['name'], ['if']) + check_name_is_str(f['name'], info, source) + source = "%s '%s'" % (source, f['name']) +- check_name_lower(f['name'], info, source) ++ check_name_lower(f['name'], info, source, permit_underscore=True) + check_if(f, info, source) + + -- -2.18.4 +2.27.0 diff --git a/0021-redhat-Define-hw_compat_8_3.patch b/0021-redhat-Define-hw_compat_8_3.patch deleted file mode 100644 index a5ca2c6..0000000 --- a/0021-redhat-Define-hw_compat_8_3.patch +++ /dev/null @@ -1,70 +0,0 @@ -From fa0063ba67071384d8c749cee8f4f4e5bbc8ef91 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Fri, 20 Nov 2020 14:00:31 -0500 -Subject: redhat: Define hw_compat_8_3 - -RH-Author: Greg Kurz -Message-id: <20201120140033.578472-2-gkurz@redhat.com> -Patchwork-id: 99790 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 1/3] redhat: Define hw_compat_8_3 -Bugzilla: 1893935 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 21 +++++++++++++++++++++ - include/hw/boards.h | 3 +++ - 2 files changed, 24 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 19d50dde45..aba05ad676 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -28,6 +28,27 @@ - #include "hw/mem/nvdimm.h" - #include "migration/vmstate.h" - -+/* -+ * The same as hw_compat_5_1 -+ */ -+GlobalProperty hw_compat_rhel_8_3[] = { -+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ -+ { "vhost-scsi", "num_queues", "1"}, -+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ -+ { "vhost-user-blk", "num-queues", "1"}, -+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ -+ { "vhost-user-scsi", "num_queues", "1"}, -+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ -+ { "virtio-blk-device", "num-queues", "1"}, -+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ -+ { "virtio-scsi-device", "num_queues", "1"}, -+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ -+ { "nvme", "use-intel-id", "on"}, -+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ -+ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ -+}; -+const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); -+ - /* - * The same as hw_compat_4_2 + hw_compat_5_0 - */ -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 4e4a54b313..526e5aea04 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -369,6 +369,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_8_3[]; -+extern const size_t hw_compat_rhel_8_3_len; -+ - extern GlobalProperty hw_compat_rhel_8_2[]; - extern const size_t hw_compat_rhel_8_2_len; - --- -2.18.4 - diff --git a/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch b/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch deleted file mode 100644 index f77916f..0000000 --- a/0022-redhat-Add-spapr_machine_rhel_default_class_options.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 943c936df3b6b5c3197ad727f2105e61778e749a Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Fri, 20 Nov 2020 14:00:32 -0500 -Subject: redhat: Add spapr_machine_rhel_default_class_options() - -RH-Author: Greg Kurz -Message-id: <20201120140033.578472-3-gkurz@redhat.com> -Patchwork-id: 99791 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 2/3] redhat: Add spapr_machine_rhel_default_class_options() -Bugzilla: 1893935 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -RHEL may need to override some default property inherited from upstream. -This is currently handled in the class_options() function of the latest -machine type, and thus the defaults need to be carried around each time -we add a new RHEL machine. - -Override the defaults in a dedicated function to be called by the -latest RHEL machine type. - -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 17 +++++++++++++---- - 1 file changed, 13 insertions(+), 4 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 4a838cc955..1d7482b2fb 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4961,6 +4961,17 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) - DEFINE_SPAPR_MACHINE(2_1, "2.1", false); - #endif - -+static void spapr_machine_rhel_default_class_options(MachineClass *mc) -+{ -+ /* -+ * Defaults for the latest behaviour inherited from the base class -+ * can be overriden here for all pseries-rhel* machines. -+ */ -+ -+ /* Maximum supported VCPU count */ -+ mc->max_cpus = 384; -+} -+ - /* - * pseries-rhel8.3.0 - * like pseries-5.1 -@@ -4968,10 +4979,8 @@ DEFINE_SPAPR_MACHINE(2_1, "2.1", false); - - static void spapr_machine_rhel830_class_options(MachineClass *mc) - { -- /* Defaults for the latest behaviour inherited from the base class */ -- -- /* Maximum supported VCPU count for all pseries-rhel* machines */ -- mc->max_cpus = 384; -+ /* The default machine type must apply the RHEL specific defaults */ -+ spapr_machine_rhel_default_class_options(mc); - } - - DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); --- -2.18.4 - diff --git a/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch b/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch deleted file mode 100644 index 406c7e1..0000000 --- a/0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 030b5e6fba510b8b9f8c8690ef6ea63f71628d25 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Fri, 20 Nov 2020 14:00:33 -0500 -Subject: redhat: Define pseries-rhel8.4.0 machine type - -RH-Author: Greg Kurz -Message-id: <20201120140033.578472-4-gkurz@redhat.com> -Patchwork-id: 99792 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 3/3] redhat: Define pseries-rhel8.4.0 machine type -Bugzilla: 1893935 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: Greg Kurz - -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr.c | 25 ++++++++++++++++++++++--- - 1 file changed, 22 insertions(+), 3 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 1d7482b2fb..4f61b64a21 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4972,6 +4972,19 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) - mc->max_cpus = 384; - } - -+/* -+ * pseries-rhel8.4.0 -+ * like pseries-5.2 -+ */ -+ -+static void spapr_machine_rhel840_class_options(MachineClass *mc) -+{ -+ /* The default machine type must apply the RHEL specific defaults */ -+ spapr_machine_rhel_default_class_options(mc); -+} -+ -+DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", true); -+ - /* - * pseries-rhel8.3.0 - * like pseries-5.1 -@@ -4979,11 +4992,17 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) - - static void spapr_machine_rhel830_class_options(MachineClass *mc) - { -- /* The default machine type must apply the RHEL specific defaults */ -- spapr_machine_rhel_default_class_options(mc); -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel840_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, -+ hw_compat_rhel_8_3_len); -+ -+ /* from pseries-5.1 */ -+ smc->pre_5_2_numa_associativity = true; - } - --DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", true); -+DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false); - - /* - * pseries-rhel8.2.0 --- -2.18.4 - diff --git a/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch b/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch deleted file mode 100644 index bedb835..0000000 --- a/0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch +++ /dev/null @@ -1,72 +0,0 @@ -From a6ae745cceee1acc3667f5ba5e007ca6c083f8a8 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Tue, 1 Dec 2020 17:53:41 -0500 -Subject: redhat: s390x: add rhel-8.4.0 compat machine - -RH-Author: Cornelia Huck -Message-id: <20201201175341.37537-3-cohuck@redhat.com> -Patchwork-id: 100195 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] redhat: s390x: add rhel-8.4.0 compat machine -Bugzilla: 1836282 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: David Hildenbrand - -Note that we did not publish a rhel-8.3.0 machine on s390x, so we -need to add the respective hw_compat entry in the rhel-8.2.0 machine. - -Also, the hw_compat entry for 8.1 was missing; however, the contents -there are not relevant for s390x. - -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula ---- - hw/s390x/s390-virtio-ccw.c | 17 ++++++++++++++++- - 1 file changed, 16 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index b8dde7e4e1..c7b5bcb06b 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1056,15 +1056,29 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) - DEFINE_CCW_MACHINE(2_4, "2.4", false); - #endif - -+static void ccw_machine_rhel840_instance_options(MachineState *machine) -+{ -+} -+ -+static void ccw_machine_rhel840_class_options(MachineClass *mc) -+{ -+} -+DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", true); -+ - static void ccw_machine_rhel820_instance_options(MachineState *machine) - { -+ ccw_machine_rhel840_instance_options(machine); - } - - static void ccw_machine_rhel820_class_options(MachineClass *mc) - { -+ ccw_machine_rhel840_class_options(mc); - mc->fixup_ram_size = s390_fixup_ram_size; -+ /* we did not publish a rhel8.3.0 machine */ -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); - } --DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", true); -+DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false); - - static void ccw_machine_rhel760_instance_options(MachineState *machine) - { -@@ -1086,6 +1100,7 @@ static void ccw_machine_rhel760_class_options(MachineClass *mc) - { - ccw_machine_rhel820_class_options(mc); - /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); - compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); - compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); - } --- -2.18.4 - diff --git a/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch b/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch deleted file mode 100644 index 7af0b8d..0000000 --- a/0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 974af930d4e5cae5611bb2e3a5ac18d3bda15a68 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 17 Dec 2020 17:58:43 +0100 -Subject: block/vpc: Make vpc_open() read the full dynamic header - -The dynamic header's size is 1024 bytes. - -vpc_open() reads only the 512 bytes of the dynamic header into buf[]. -Works, because it doesn't actually access the second half. However, a -colleague told me that GCC 11 warns: - - ../block/vpc.c:358:51: error: array subscript 'struct VHDDynDiskHeader[0]' is partly outside array bounds of 'uint8_t[512]' [-Werror=array-bounds] - -Clean up to read the full header. - -Rename buf[] to dyndisk_header_buf[] while there. - -Signed-off-by: Markus Armbruster ---- - block/vpc.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/block/vpc.c b/block/vpc.c -index 1ab55f9287..2fcf3f6283 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -220,7 +220,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, - QemuOpts *opts = NULL; - Error *local_err = NULL; - bool use_chs; -- uint8_t buf[HEADER_SIZE]; -+ uint8_t dyndisk_header_buf[1024]; - uint32_t checksum; - uint64_t computed_size; - uint64_t pagetable_size; -@@ -340,14 +340,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, - } - - if (disk_type == VHD_DYNAMIC) { -- ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, -- HEADER_SIZE); -+ ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), -+ dyndisk_header_buf, 1024); - if (ret < 0) { - error_setg(errp, "Error reading dynamic VHD header"); - goto fail; - } - -- dyndisk_header = (VHDDynDiskHeader *) buf; -+ dyndisk_header = (VHDDynDiskHeader *)dyndisk_header_buf; - - if (strncmp(dyndisk_header->magic, "cxsparse", 8)) { - error_setg(errp, "Invalid header magic"); --- -2.18.4 - diff --git a/0028-GCC-11-warnings-hacks.patch b/0028-GCC-11-warnings-hacks.patch deleted file mode 100644 index 86ae8c2..0000000 --- a/0028-GCC-11-warnings-hacks.patch +++ /dev/null @@ -1,163 +0,0 @@ -From 6e9564986a00456c6748cf888d9ba9f7f0db01bf Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 4 Jan 2021 07:47:03 +0100 -Subject: GCC 11 warnings hacks - ---- - hw/scsi/scsi-disk.c | 13 +++++++------ - net/eth.c | 4 +++- - target/s390x/kvm.c | 2 +- - target/s390x/misc_helper.c | 2 +- - tcg/aarch64/tcg-target.c.inc | 3 +-- - tests/test-block-iothread.c | 12 ++++++------ - 6 files changed, 19 insertions(+), 17 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 90841ad791..8ce77777d3 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -2578,14 +2578,15 @@ static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf) - int len = scsi_cdb_length(buf); - char *line_buffer, *p; - -- line_buffer = g_malloc(len * 5 + 1); -+ if (len > 0) { -+ line_buffer = g_malloc(len * 5 + 1); -+ for (i = 0, p = line_buffer; i < len; i++) { -+ p += sprintf(p, " 0x%02x", buf[i]); -+ } -+ trace_scsi_disk_new_request(lun, tag, line_buffer); - -- for (i = 0, p = line_buffer; i < len; i++) { -- p += sprintf(p, " 0x%02x", buf[i]); -+ g_free(line_buffer); - } -- trace_scsi_disk_new_request(lun, tag, line_buffer); -- -- g_free(line_buffer); - } - - static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun, -diff --git a/net/eth.c b/net/eth.c -index 1e0821c5f8..041ac4865a 100644 ---- a/net/eth.c -+++ b/net/eth.c -@@ -405,6 +405,8 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, - struct ip6_ext_hdr *ext_hdr, - struct in6_address *dst_addr) - { -+#pragma GCC diagnostic push -+#pragma GCC diagnostic ignored "-Warray-bounds" - struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; - - if ((rthdr->rtype == 2) && -@@ -424,7 +426,7 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, - - return bytes_read == sizeof(*dst_addr); - } -- -+#pragma GCC diagnostic pop - return false; - } - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 1839cc6648..ab1ca6b1bf 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -1918,7 +1918,7 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) - */ - if (qemu_name) { - strncpy((char *)sysib.ext_names[0], qemu_name, -- sizeof(sysib.ext_names[0])); -+ sizeof(sysib.ext_names[0])-1); - } else { - strcpy((char *)sysib.ext_names[0], "KVMguest"); - } -diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c -index 58dbc023eb..adaf4145e6 100644 ---- a/target/s390x/misc_helper.c -+++ b/target/s390x/misc_helper.c -@@ -370,7 +370,7 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1) - MIN(sizeof(sysib.sysib_322.vm[0].name), - strlen(qemu_name))); - strncpy((char *)sysib.sysib_322.ext_names[0], qemu_name, -- sizeof(sysib.sysib_322.ext_names[0])); -+ sizeof(sysib.sysib_322.ext_names[0])-1); - } else { - ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8); - strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest"); -diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc -index 26f71cb599..fe6bdbf721 100644 ---- a/tcg/aarch64/tcg-target.c.inc -+++ b/tcg/aarch64/tcg-target.c.inc -@@ -1852,8 +1852,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - static tcg_insn_unit *tb_ret_addr; - - static void tcg_out_op(TCGContext *s, TCGOpcode opc, -- const TCGArg args[TCG_MAX_OP_ARGS], -- const int const_args[TCG_MAX_OP_ARGS]) -+ const TCGArg *args, const int *const_args) - { - /* 99% of the time, we can signal the use of extension registers - by looking to see if the opcode handles 64-bit data. */ -diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c -index 3f866a35c6..bc64b50e66 100644 ---- a/tests/test-block-iothread.c -+++ b/tests/test-block-iothread.c -@@ -75,7 +75,7 @@ static BlockDriver bdrv_test = { - - static void test_sync_op_pread(BdrvChild *c) - { -- uint8_t buf[512]; -+ uint8_t buf[512] = {0}; - int ret; - - /* Success */ -@@ -89,7 +89,7 @@ static void test_sync_op_pread(BdrvChild *c) - - static void test_sync_op_pwrite(BdrvChild *c) - { -- uint8_t buf[512]; -+ uint8_t buf[512] = {0}; - int ret; - - /* Success */ -@@ -103,7 +103,7 @@ static void test_sync_op_pwrite(BdrvChild *c) - - static void test_sync_op_blk_pread(BlockBackend *blk) - { -- uint8_t buf[512]; -+ uint8_t buf[512] = {0}; - int ret; - - /* Success */ -@@ -117,7 +117,7 @@ static void test_sync_op_blk_pread(BlockBackend *blk) - - static void test_sync_op_blk_pwrite(BlockBackend *blk) - { -- uint8_t buf[512]; -+ uint8_t buf[512] = {0}; - int ret; - - /* Success */ -@@ -131,7 +131,7 @@ static void test_sync_op_blk_pwrite(BlockBackend *blk) - - static void test_sync_op_load_vmstate(BdrvChild *c) - { -- uint8_t buf[512]; -+ uint8_t buf[512] = {0}; - int ret; - - /* Error: Driver does not support snapshots */ -@@ -141,7 +141,7 @@ static void test_sync_op_load_vmstate(BdrvChild *c) - - static void test_sync_op_save_vmstate(BdrvChild *c) - { -- uint8_t buf[512]; -+ uint8_t buf[512] = {0}; - int ret; - - /* Error: Driver does not support snapshots */ --- -2.18.4 - diff --git a/0029-Disable-problematic-tests-for-initial-build.patch b/0029-Disable-problematic-tests-for-initial-build.patch deleted file mode 100644 index 84743b8..0000000 --- a/0029-Disable-problematic-tests-for-initial-build.patch +++ /dev/null @@ -1,41 +0,0 @@ -From bb42f8a495aa0da2410109de14aca901b8c4ac4f Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Tue, 5 Jan 2021 07:40:08 +0100 -Subject: Disable problematic tests for initial build - ---- - tests/meson.build | 2 +- - tests/qtest/meson.build | 4 ++-- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/tests/meson.build b/tests/meson.build -index afeb6be689..e562a0499e 100644 ---- a/tests/meson.build -+++ b/tests/meson.build -@@ -136,7 +136,7 @@ if have_block - 'test-blockjob': [testblock], - 'test-blockjob-txn': [testblock], - 'test-block-backend': [testblock], -- 'test-block-iothread': [testblock], -+# 'test-block-iothread': [testblock], - 'test-write-threshold': [testblock], - 'test-crypto-hash': [crypto], - 'test-crypto-hmac': [crypto], -diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 15ed460ff0..70ef8c236c 100644 ---- a/tests/qtest/meson.build -+++ b/tests/qtest/meson.build -@@ -150,8 +150,8 @@ qtests_aarch64 = \ - (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ - ['arm-cpu-features', - 'numa-test', -- 'boot-serial-test', -- 'migration-test'] -+ 'boot-serial-test'] -+# 'migration-test'] - - qtests_s390x = \ - (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ --- -2.18.4 - diff --git a/0030-Revert-GCC-11-warnings-hacks.patch b/0030-Revert-GCC-11-warnings-hacks.patch deleted file mode 100644 index 6f13efa..0000000 --- a/0030-Revert-GCC-11-warnings-hacks.patch +++ /dev/null @@ -1,166 +0,0 @@ -From f488becdbb12c6001a2524d049371196a05f5256 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Fri, 15 Jan 2021 09:27:40 +0100 -Subject: Revert "GCC 11 warnings hacks" - -This reverts commit 6e9564986a00456c6748cf888d9ba9f7f0db01bf. - -Hacks solved upstream. Going to import upstream solutions. ---- - hw/scsi/scsi-disk.c | 13 ++++++------- - net/eth.c | 4 +--- - target/s390x/kvm.c | 2 +- - target/s390x/misc_helper.c | 2 +- - tcg/aarch64/tcg-target.c.inc | 3 ++- - tests/test-block-iothread.c | 12 ++++++------ - 6 files changed, 17 insertions(+), 19 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 8ce77777d3..90841ad791 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -2578,15 +2578,14 @@ static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf) - int len = scsi_cdb_length(buf); - char *line_buffer, *p; - -- if (len > 0) { -- line_buffer = g_malloc(len * 5 + 1); -- for (i = 0, p = line_buffer; i < len; i++) { -- p += sprintf(p, " 0x%02x", buf[i]); -- } -- trace_scsi_disk_new_request(lun, tag, line_buffer); -+ line_buffer = g_malloc(len * 5 + 1); - -- g_free(line_buffer); -+ for (i = 0, p = line_buffer; i < len; i++) { -+ p += sprintf(p, " 0x%02x", buf[i]); - } -+ trace_scsi_disk_new_request(lun, tag, line_buffer); -+ -+ g_free(line_buffer); - } - - static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun, -diff --git a/net/eth.c b/net/eth.c -index 041ac4865a..1e0821c5f8 100644 ---- a/net/eth.c -+++ b/net/eth.c -@@ -405,8 +405,6 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, - struct ip6_ext_hdr *ext_hdr, - struct in6_address *dst_addr) - { --#pragma GCC diagnostic push --#pragma GCC diagnostic ignored "-Warray-bounds" - struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; - - if ((rthdr->rtype == 2) && -@@ -426,7 +424,7 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, - - return bytes_read == sizeof(*dst_addr); - } --#pragma GCC diagnostic pop -+ - return false; - } - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index ab1ca6b1bf..1839cc6648 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -1918,7 +1918,7 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) - */ - if (qemu_name) { - strncpy((char *)sysib.ext_names[0], qemu_name, -- sizeof(sysib.ext_names[0])-1); -+ sizeof(sysib.ext_names[0])); - } else { - strcpy((char *)sysib.ext_names[0], "KVMguest"); - } -diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c -index adaf4145e6..58dbc023eb 100644 ---- a/target/s390x/misc_helper.c -+++ b/target/s390x/misc_helper.c -@@ -370,7 +370,7 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1) - MIN(sizeof(sysib.sysib_322.vm[0].name), - strlen(qemu_name))); - strncpy((char *)sysib.sysib_322.ext_names[0], qemu_name, -- sizeof(sysib.sysib_322.ext_names[0])-1); -+ sizeof(sysib.sysib_322.ext_names[0])); - } else { - ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8); - strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest"); -diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc -index fe6bdbf721..26f71cb599 100644 ---- a/tcg/aarch64/tcg-target.c.inc -+++ b/tcg/aarch64/tcg-target.c.inc -@@ -1852,7 +1852,8 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, - static tcg_insn_unit *tb_ret_addr; - - static void tcg_out_op(TCGContext *s, TCGOpcode opc, -- const TCGArg *args, const int *const_args) -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]) - { - /* 99% of the time, we can signal the use of extension registers - by looking to see if the opcode handles 64-bit data. */ -diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c -index bc64b50e66..3f866a35c6 100644 ---- a/tests/test-block-iothread.c -+++ b/tests/test-block-iothread.c -@@ -75,7 +75,7 @@ static BlockDriver bdrv_test = { - - static void test_sync_op_pread(BdrvChild *c) - { -- uint8_t buf[512] = {0}; -+ uint8_t buf[512]; - int ret; - - /* Success */ -@@ -89,7 +89,7 @@ static void test_sync_op_pread(BdrvChild *c) - - static void test_sync_op_pwrite(BdrvChild *c) - { -- uint8_t buf[512] = {0}; -+ uint8_t buf[512]; - int ret; - - /* Success */ -@@ -103,7 +103,7 @@ static void test_sync_op_pwrite(BdrvChild *c) - - static void test_sync_op_blk_pread(BlockBackend *blk) - { -- uint8_t buf[512] = {0}; -+ uint8_t buf[512]; - int ret; - - /* Success */ -@@ -117,7 +117,7 @@ static void test_sync_op_blk_pread(BlockBackend *blk) - - static void test_sync_op_blk_pwrite(BlockBackend *blk) - { -- uint8_t buf[512] = {0}; -+ uint8_t buf[512]; - int ret; - - /* Success */ -@@ -131,7 +131,7 @@ static void test_sync_op_blk_pwrite(BlockBackend *blk) - - static void test_sync_op_load_vmstate(BdrvChild *c) - { -- uint8_t buf[512] = {0}; -+ uint8_t buf[512]; - int ret; - - /* Error: Driver does not support snapshots */ -@@ -141,7 +141,7 @@ static void test_sync_op_load_vmstate(BdrvChild *c) - - static void test_sync_op_save_vmstate(BdrvChild *c) - { -- uint8_t buf[512] = {0}; -+ uint8_t buf[512]; - int ret; - - /* Error: Driver does not support snapshots */ --- -2.18.4 - diff --git a/0031-s390x-Use-strpadcpy-for-copying-vm-name.patch b/0031-s390x-Use-strpadcpy-for-copying-vm-name.patch deleted file mode 100644 index a7827ee..0000000 --- a/0031-s390x-Use-strpadcpy-for-copying-vm-name.patch +++ /dev/null @@ -1,84 +0,0 @@ -From adbabd33e81f46c6b29c4b940c053e562e4f55fd Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Fri, 15 Jan 2021 09:28:59 +0100 -Subject: s390x: Use strpadcpy for copying vm name - -Using strncpy with length equal to the size of target array, GCC 11 -reports following warning: - - warning: '__builtin_strncpy' specified bound 256 equals destination size [-Wstringop-truncation] - -We can prevent this warning by using strpadcpy that copies string -up to specified length, zeroes target array after copied string -and does not raise warning when length is equal to target array -size (and ending '\0' is discarded). - -Signed-off-by: Miroslav Rezanina ---- - target/s390x/kvm.c | 12 +++++------- - target/s390x/misc_helper.c | 7 +++++-- - 2 files changed, 10 insertions(+), 9 deletions(-) - -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 1839cc6648..c08b5bc2de 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -29,6 +29,7 @@ - #include "internal.h" - #include "kvm_s390x.h" - #include "sysemu/kvm_int.h" -+#include "qemu/cutils.h" - #include "qapi/error.h" - #include "qemu/error-report.h" - #include "qemu/timer.h" -@@ -1910,18 +1911,15 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar) - strlen(qemu_name))); - } - sysib.vm[0].ext_name_encoding = 2; /* 2 = UTF-8 */ -- memset(sysib.ext_names[0], 0, sizeof(sysib.ext_names[0])); - /* If hypervisor specifies zero Extended Name in STSI322 SYSIB, it's - * considered by s390 as not capable of providing any Extended Name. - * Therefore if no name was specified on qemu invocation, we go with the - * same "KVMguest" default, which KVM has filled into short name field. - */ -- if (qemu_name) { -- strncpy((char *)sysib.ext_names[0], qemu_name, -- sizeof(sysib.ext_names[0])); -- } else { -- strcpy((char *)sysib.ext_names[0], "KVMguest"); -- } -+ strpadcpy((char *)sysib.ext_names[0], -+ sizeof(sysib.ext_names[0]), -+ qemu_name ?: "KVMguest", '\0'); -+ - /* Insert UUID */ - memcpy(sysib.vm[0].uuid, &qemu_uuid, sizeof(sysib.vm[0].uuid)); - -diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c -index 58dbc023eb..7ea90d414a 100644 ---- a/target/s390x/misc_helper.c -+++ b/target/s390x/misc_helper.c -@@ -19,6 +19,7 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/cutils.h" - #include "qemu/main-loop.h" - #include "cpu.h" - #include "internal.h" -@@ -369,8 +370,10 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0, uint64_t r0, uint64_t r1) - ebcdic_put(sysib.sysib_322.vm[0].name, qemu_name, - MIN(sizeof(sysib.sysib_322.vm[0].name), - strlen(qemu_name))); -- strncpy((char *)sysib.sysib_322.ext_names[0], qemu_name, -- sizeof(sysib.sysib_322.ext_names[0])); -+ strpadcpy((char *)sysib.sysib_322.ext_names[0], -+ sizeof(sysib.sysib_322.ext_names[0]), -+ qemu_name, '\0'); -+ - } else { - ebcdic_put(sysib.sysib_322.vm[0].name, "TCGguest", 8); - strcpy((char *)sysib.sysib_322.ext_names[0], "TCGguest"); --- -2.18.4 - diff --git a/0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch b/0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch deleted file mode 100644 index 6cd75ce..0000000 --- a/0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 8773f3688ca87e5e7da2e1a5170d0bde9a54eae0 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Fri, 15 Jan 2021 09:38:53 +0100 -Subject: tcg: Restrict tcg_out_op() to arrays of TCG_MAX_OP_ARGS elements - ---- - tcg/aarch64/tcg-target.c.inc | 3 ++- - tcg/i386/tcg-target.c.inc | 6 ++++-- - tcg/ppc/tcg-target.c.inc | 8 +++++--- - tcg/s390/tcg-target.c.inc | 3 ++- - tcg/tcg.c | 19 +++++++++++-------- - 5 files changed, 24 insertions(+), 15 deletions(-) - -diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc -index 26f71cb599..ce8689e889 100644 ---- a/tcg/aarch64/tcg-target.c.inc -+++ b/tcg/aarch64/tcg-target.c.inc -@@ -2271,7 +2271,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, - - static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, - unsigned vecl, unsigned vece, -- const TCGArg *args, const int *const_args) -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]) - { - static const AArch64Insn cmp_insn[16] = { - [TCG_COND_EQ] = I3616_CMEQ, -diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc -index d8797ed398..0e557d177a 100644 ---- a/tcg/i386/tcg-target.c.inc -+++ b/tcg/i386/tcg-target.c.inc -@@ -2242,7 +2242,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) - } - - static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, -- const TCGArg *args, const int *const_args) -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]) - { - TCGArg a0, a1, a2; - int c, const_a2, vexop, rexw = 0; -@@ -2679,7 +2680,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, - - static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, - unsigned vecl, unsigned vece, -- const TCGArg *args, const int *const_args) -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]) - { - static int const add_insn[4] = { - OPC_PADDB, OPC_PADDW, OPC_PADDD, OPC_PADDQ -diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc -index 18ee989f95..b2bc1fc0c4 100644 ---- a/tcg/ppc/tcg-target.c.inc -+++ b/tcg/ppc/tcg-target.c.inc -@@ -2353,8 +2353,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) - tcg_out32(s, BCLR | BO_ALWAYS); - } - --static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, -- const int *const_args) -+static void tcg_out_op(TCGContext *s, TCGOpcode opc, -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]) - { - TCGArg a0, a1, a2; - int c; -@@ -3151,7 +3152,8 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, - - static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, - unsigned vecl, unsigned vece, -- const TCGArg *args, const int *const_args) -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]) - { - static const uint32_t - add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM }, -diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc -index c5e096449b..79753c8af7 100644 ---- a/tcg/s390/tcg-target.c.inc -+++ b/tcg/s390/tcg-target.c.inc -@@ -1746,7 +1746,8 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, - case glue(glue(INDEX_op_,x),_i64) - - static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, -- const TCGArg *args, const int *const_args) -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]) - { - S390Opcode op, op2; - TCGArg a0, a1, a2; -diff --git a/tcg/tcg.c b/tcg/tcg.c -index 43c6cf8f52..2d0116d29f 100644 ---- a/tcg/tcg.c -+++ b/tcg/tcg.c -@@ -109,8 +109,9 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); - static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg); --static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, -- const int *const_args); -+static void tcg_out_op(TCGContext *s, TCGOpcode opc, -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]); - #if TCG_TARGET_MAYBE_vec - static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, - TCGReg dst, TCGReg src); -@@ -118,9 +119,10 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, - TCGReg dst, TCGReg base, intptr_t offset); - static void tcg_out_dupi_vec(TCGContext *s, TCGType type, - TCGReg dst, tcg_target_long arg); --static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, -- unsigned vece, const TCGArg *args, -- const int *const_args); -+static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, -+ unsigned vecl, unsigned vece, -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]); - #else - static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, - TCGReg dst, TCGReg src) -@@ -137,9 +139,10 @@ static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, - { - g_assert_not_reached(); - } --static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl, -- unsigned vece, const TCGArg *args, -- const int *const_args) -+static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, -+ unsigned vecl, unsigned vece, -+ const TCGArg args[TCG_MAX_OP_ARGS], -+ const int const_args[TCG_MAX_OP_ARGS]) - { - g_assert_not_reached(); - } --- -2.18.4 - diff --git a/0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch b/0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch deleted file mode 100644 index 29a1b7e..0000000 --- a/0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 76ed390a52769c5ca64db5496a2adcb43df72035 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Fri, 15 Jan 2021 09:42:33 +0100 -Subject: net/eth: Simplify _eth_get_rss_ex_dst_addr() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The length field is already contained in the ip6_ext_hdr structure. -Check it direcly in eth_parse_ipv6_hdr() before calling -_eth_get_rss_ex_dst_addr(), which gets a bit simplified. - -Signed-off-by: Philippe Mathieu-Daudé ---- - net/eth.c | 14 +++++++------- - 1 file changed, 7 insertions(+), 7 deletions(-) - -diff --git a/net/eth.c b/net/eth.c -index 1e0821c5f8..7d4dd48c1f 100644 ---- a/net/eth.c -+++ b/net/eth.c -@@ -407,9 +407,7 @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, - { - struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; - -- if ((rthdr->rtype == 2) && -- (rthdr->len == sizeof(struct in6_address) / 8) && -- (rthdr->segleft == 1)) { -+ if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) { - - size_t input_size = iov_size(pkt, pkt_frags); - size_t bytes_read; -@@ -528,10 +526,12 @@ bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, - } - - if (curr_ext_hdr_type == IP6_ROUTING) { -- info->rss_ex_dst_valid = -- _eth_get_rss_ex_dst_addr(pkt, pkt_frags, -- ip6hdr_off + info->full_hdr_len, -- &ext_hdr, &info->rss_ex_dst); -+ if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) { -+ info->rss_ex_dst_valid = -+ _eth_get_rss_ex_dst_addr(pkt, pkt_frags, -+ ip6hdr_off + info->full_hdr_len, -+ &ext_hdr, &info->rss_ex_dst); -+ } - } else if (curr_ext_hdr_type == IP6_DESTINATON) { - info->rss_ex_src_valid = - _eth_get_rss_ex_src_addr(pkt, pkt_frags, --- -2.18.4 - diff --git a/0034-net-eth-Fix-stack-buffer-overflow-in.patch b/0034-net-eth-Fix-stack-buffer-overflow-in.patch deleted file mode 100644 index 5be9d9e..0000000 --- a/0034-net-eth-Fix-stack-buffer-overflow-in.patch +++ /dev/null @@ -1,196 +0,0 @@ -From 9abf30d739cfe5a7808f1e30ec85c0cfd73b67cb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Fri, 15 Jan 2021 09:43:31 +0100 -Subject: net/eth: Fix stack-buffer-overflow in -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -QEMU fuzzer reported a buffer overflow in _eth_get_rss_ex_dst_addr() -reproducible as: - - $ cat << EOF | ./qemu-system-i386 -M pc-q35-5.0 \ - -accel qtest -monitor none \ - -serial none -nographic -qtest stdio - outl 0xcf8 0x80001010 - outl 0xcfc 0xe1020000 - outl 0xcf8 0x80001004 - outw 0xcfc 0x7 - write 0x25 0x1 0x86 - write 0x26 0x1 0xdd - write 0x4f 0x1 0x2b - write 0xe1020030 0x4 0x190002e1 - write 0xe102003a 0x2 0x0807 - write 0xe1020048 0x4 0x12077cdd - write 0xe1020400 0x4 0xba077cdd - write 0xe1020420 0x4 0x190002e1 - write 0xe1020428 0x4 0x3509d807 - write 0xe1020438 0x1 0xe2 - EOF - ================================================================= - ==2859770==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffdef904902 at pc 0x561ceefa78de bp 0x7ffdef904820 sp 0x7ffdef904818 - READ of size 1 at 0x7ffdef904902 thread T0 - #0 0x561ceefa78dd in _eth_get_rss_ex_dst_addr net/eth.c:410:17 - #1 0x561ceefa41fb in eth_parse_ipv6_hdr net/eth.c:532:17 - #2 0x561cef7de639 in net_tx_pkt_parse_headers hw/net/net_tx_pkt.c:228:14 - #3 0x561cef7dbef4 in net_tx_pkt_parse hw/net/net_tx_pkt.c:273:9 - #4 0x561ceec29f22 in e1000e_process_tx_desc hw/net/e1000e_core.c:730:29 - #5 0x561ceec28eac in e1000e_start_xmit hw/net/e1000e_core.c:927:9 - #6 0x561ceec1baab in e1000e_set_tdt hw/net/e1000e_core.c:2444:9 - #7 0x561ceebf300e in e1000e_core_write hw/net/e1000e_core.c:3256:9 - #8 0x561cef3cd4cd in e1000e_mmio_write hw/net/e1000e.c:110:5 - - Address 0x7ffdef904902 is located in stack of thread T0 at offset 34 in frame - #0 0x561ceefa320f in eth_parse_ipv6_hdr net/eth.c:486 - - This frame has 1 object(s): - [32, 34) 'ext_hdr' (line 487) <== Memory access at offset 34 overflows this variable - HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork - (longjmp and C++ exceptions *are* supported) - SUMMARY: AddressSanitizer: stack-buffer-overflow net/eth.c:410:17 in _eth_get_rss_ex_dst_addr - Shadow bytes around the buggy address: - 0x10003df188d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x10003df188e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x10003df188f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x10003df18900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x10003df18910: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 - =>0x10003df18920:[02]f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 - 0x10003df18930: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x10003df18940: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x10003df18950: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x10003df18960: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x10003df18970: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - Shadow byte legend (one shadow byte represents 8 application bytes): - Addressable: 00 - Partially addressable: 01 02 03 04 05 06 07 - Stack left redzone: f1 - Stack right redzone: f3 - ==2859770==ABORTING - -Similarly GCC 11 reports: - - net/eth.c: In function 'eth_parse_ipv6_hdr': - net/eth.c:410:15: error: array subscript 'struct ip6_ext_hdr_routing[0]' is partly outside array bounds of 'struct ip6_ext_hdr[1]' [-Werror=array-bounds] - 410 | if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) { - | ~~~~~^~~~~~~ - net/eth.c:485:24: note: while referencing 'ext_hdr' - 485 | struct ip6_ext_hdr ext_hdr; - | ^~~~~~~ - net/eth.c:410:38: error: array subscript 'struct ip6_ext_hdr_routing[0]' is partly outside array bounds of 'struct ip6_ext_hdr[1]' [-Werror=array-bounds] - 410 | if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) { - | ~~~~~^~~~~~~~~ - net/eth.c:485:24: note: while referencing 'ext_hdr' - 485 | struct ip6_ext_hdr ext_hdr; - | ^~~~~~~ - -In eth_parse_ipv6_hdr() we called iov_to_buf() to fill the 2 bytes of -the 'ext_hdr' buffer, then _eth_get_rss_ex_dst_addr() tries to access -beside the 2 filled bytes. - -Fix by reworking the function, filling the full rt_hdr buffer on the -stack calling iov_to_buf() again. - -Cc: qemu-stable@nongnu.org -Buglink: https://bugs.launchpad.net/qemu/+bug/1879531 -Reported-by: Alexander Bulekov -Reported-by: Miroslav Rezanina -Fixes: eb700029c78 ("net_pkt: Extend packet abstraction as required by e1000e functionality") -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Miroslav Rezanina ---- - net/eth.c | 25 +++++++++++-------------- - tests/qtest/fuzz-test.c | 29 +++++++++++++++++++++++++++++ - 2 files changed, 40 insertions(+), 14 deletions(-) - -diff --git a/net/eth.c b/net/eth.c -index 7d4dd48c1f..ae4db37888 100644 ---- a/net/eth.c -+++ b/net/eth.c -@@ -401,26 +401,23 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type) - - static bool - _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags, -- size_t rthdr_offset, -+ size_t ext_hdr_offset, - struct ip6_ext_hdr *ext_hdr, - struct in6_address *dst_addr) - { -- struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; -- -- if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) { -- -- size_t input_size = iov_size(pkt, pkt_frags); -- size_t bytes_read; -+ struct ip6_ext_hdr_routing rt_hdr; -+ size_t input_size = iov_size(pkt, pkt_frags); -+ size_t bytes_read; - -- if (input_size < rthdr_offset + sizeof(*ext_hdr)) { -- return false; -- } -+ if (input_size < ext_hdr_offset + sizeof(rt_hdr)) { -+ return false; -+ } - -- bytes_read = iov_to_buf(pkt, pkt_frags, -- rthdr_offset + sizeof(*ext_hdr), -- dst_addr, sizeof(*dst_addr)); -+ bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset, -+ &rt_hdr, sizeof(rt_hdr)); - -- return bytes_read == sizeof(*dst_addr); -+ if ((rt_hdr.rtype == 2) && (rt_hdr.segleft == 1)) { -+ return bytes_read == sizeof(*ext_hdr) + sizeof(*dst_addr); - } - - return false; -diff --git a/tests/qtest/fuzz-test.c b/tests/qtest/fuzz-test.c -index 9cb4c42bde..2692d556d9 100644 ---- a/tests/qtest/fuzz-test.c -+++ b/tests/qtest/fuzz-test.c -@@ -47,6 +47,32 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) - qtest_outl(s, 0x5d02, 0xebed205d); - } - -+/* -+ * https://bugs.launchpad.net/qemu/+bug/1879531 -+ */ -+static void test_lp1879531_eth_get_rss_ex_dst_addr(void) -+{ -+ QTestState *s; -+ -+ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); -+ -+ qtest_outl(s, 0xcf8 0x80001010); -+ qtest_outl(s, 0xcfc 0xe1020000); -+ qtest_outl(s, 0xcf8 0x80001004); -+ qtest_outw(s, 0xcfc 0x7); -+ qtest_writeb(s, 0x25 0x1 0x86); -+ qtest_writeb(s, 0x26 0x1 0xdd); -+ qtest_writeb(s, 0x4f 0x1 0x2b); -+ qtest_writel(s, 0xe1020030, 0x190002e1); -+ qtest_writew(s, 0xe102003a, 0x0807); -+ qtest_writel(s, 0xe1020048, 0x12077cdd); -+ qtest_writel(s, 0xe1020400, 0xba077cdd); -+ qtest_writel(s, 0xe1020420, 0x190002e1); -+ qtest_writel(s, 0xe1020428, 0x3509d807); -+ qtest_writeb(s, 0xe1020438, 0xe2); -+ qtest_quit(s); -+} -+ - int main(int argc, char **argv) - { - const char *arch = qtest_get_arch(); -@@ -58,6 +84,9 @@ int main(int argc, char **argv) - test_lp1878263_megasas_zero_iov_cnt); - qtest_add_func("fuzz/test_lp1878642_pci_bus_get_irq_level_assert", - test_lp1878642_pci_bus_get_irq_level_assert); -+ qtest_add_func("fuzz/test_lp1879531_eth_get_rss_ex_dst_addr", -+ test_lp1879531_eth_get_rss_ex_dst_addr); -+ - } - - return g_test_run(); --- -2.18.4 - diff --git a/0035-block-nvme-Implement-fake-truncate-coroutine.patch b/0035-block-nvme-Implement-fake-truncate-coroutine.patch deleted file mode 100644 index 237e9e1..0000000 --- a/0035-block-nvme-Implement-fake-truncate-coroutine.patch +++ /dev/null @@ -1,84 +0,0 @@ -From f4c65e14055e208e331a83b9340998ecbe796b5f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Fri, 1 Jan 2021 17:18:13 -0500 -Subject: block/nvme: Implement fake truncate() coroutine -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Philippe Mathieu-Daudé -Message-id: <20210101171813.1734014-2-philmd@redhat.com> -Patchwork-id: 100503 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] block/nvme: Implement fake truncate() coroutine -Bugzilla: 1848834 -RH-Acked-by: Thomas Huth -RH-Acked-by: Max Reitz -RH-Acked-by: Stefan Hajnoczi - -NVMe drive cannot be shrunk. - -Since commit c80d8b06cfa we can use the @exact parameter (set -to false) to return success if the block device is larger than -the requested offset (even if we can not be shrunk). - -Use this parameter to implement the NVMe truncate() coroutine, -similarly how it is done for the iscsi and file-posix drivers -(see commit 82325ae5f2f "Evaluate @exact in protocol drivers"). - -Reported-by: Xueqiang Wei -Suggested-by: Max Reitz -Signed-off-by: Philippe Mathieu-Daudé -Message-Id: <20201210125202.858656-1-philmd@redhat.com> -Signed-off-by: Max Reitz -(cherry picked from commit c8807c5edcc8bd8917a5b7531d47ef6a99e07bd8) -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Danilo C. L. de Paula ---- - block/nvme.c | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/block/nvme.c b/block/nvme.c -index a06a188d53..5a6fbacf4a 100644 ---- a/block/nvme.c -+++ b/block/nvme.c -@@ -1389,6 +1389,29 @@ out: - - } - -+static int coroutine_fn nvme_co_truncate(BlockDriverState *bs, int64_t offset, -+ bool exact, PreallocMode prealloc, -+ BdrvRequestFlags flags, Error **errp) -+{ -+ int64_t cur_length; -+ -+ if (prealloc != PREALLOC_MODE_OFF) { -+ error_setg(errp, "Unsupported preallocation mode '%s'", -+ PreallocMode_str(prealloc)); -+ return -ENOTSUP; -+ } -+ -+ cur_length = nvme_getlength(bs); -+ if (offset != cur_length && exact) { -+ error_setg(errp, "Cannot resize NVMe devices"); -+ return -ENOTSUP; -+ } else if (offset > cur_length) { -+ error_setg(errp, "Cannot grow NVMe devices"); -+ return -EINVAL; -+ } -+ -+ return 0; -+} - - static int nvme_reopen_prepare(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, Error **errp) -@@ -1523,6 +1546,7 @@ static BlockDriver bdrv_nvme = { - .bdrv_close = nvme_close, - .bdrv_getlength = nvme_getlength, - .bdrv_probe_blocksizes = nvme_probe_blocksizes, -+ .bdrv_co_truncate = nvme_co_truncate, - - .bdrv_co_preadv = nvme_co_preadv, - .bdrv_co_pwritev = nvme_co_pwritev, --- -2.18.4 - diff --git a/0037-build-system-use-b_staticpic-false.patch b/0037-build-system-use-b_staticpic-false.patch deleted file mode 100644 index b47336b..0000000 --- a/0037-build-system-use-b_staticpic-false.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 50b575b27b9daa331da08d10dbe6524de0580833 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Wed, 16 Dec 2020 17:53:08 -0500 -Subject: build-system: use b_staticpic=false -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20201216175308.1463822-3-pbonzini@redhat.com> -Patchwork-id: 100484 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] build-system: use b_staticpic=false -Bugzilla: 1899619 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Danilo de Paula - -Meson 0.56.0 correctly builds non-PIC static libraries with -fPIE if -b_pie=true, while Meson 0.55.3 has a bug that causes the library -to use non-PIE objects and fail to link. Therefore, upstream -QEMU looks at the meson version in order to decide between -b_staticpic=false and b_staticpic=$pie. - -Unfortunately, b_staticpic=$pie still has a negative effect -on performance when you QEMU is compiled with --enable-pie -like RHEL does. Therefore, we have backported the fix -to Meson 0.55.3-3.el8. We can require it and unconditionally -use b_staticpic=false. - -The patch is RHEL-specific, but a similar change is included -in the larger patch for "meson: switch minimum meson version to -0.56.0". - -Signed-off-by: Paolo Bonzini -Signed-off-by: Danilo C. L. de Paula ---- - configure | 5 ----- - meson.build | 4 ++-- - redhat/qemu-kvm.spec.template | 2 +- - 3 files changed, 3 insertions(+), 8 deletions(-) - -diff --git a/configure b/configure -index 18c26e0389..d60097c0d4 100755 ---- a/configure -+++ b/configure -@@ -6979,10 +6979,6 @@ fi - mv $cross config-meson.cross - - rm -rf meson-private meson-info meson-logs --unset staticpic --if ! version_ge "$($meson --version)" 0.56.0; then -- staticpic=$(if test "$pie" = yes; then echo true; else echo false; fi) --fi - NINJA=$ninja $meson setup \ - --prefix "$prefix" \ - --libdir "$libdir" \ -@@ -7002,7 +6998,6 @@ NINJA=$ninja $meson setup \ - -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \ - -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \ - -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \ -- ${staticpic:+-Db_staticpic=$staticpic} \ - -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \ - -Dmalloc=$malloc -Dmalloc_trim=$malloc_trim -Dsparse=$sparse \ - -Dkvm=$kvm -Dhax=$hax -Dwhpx=$whpx -Dhvf=$hvf \ -diff --git a/meson.build b/meson.build -index 8c38b2ea36..c482d075d5 100644 ---- a/meson.build -+++ b/meson.build -@@ -1,6 +1,6 @@ - project('qemu', ['c'], meson_version: '>=0.55.0', -- default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto'] + -- (meson.version().version_compare('>=0.56.0') ? [ 'b_staticpic=false' ] : []), -+ default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto', -+ 'b_staticpic=false' ], - version: run_command('head', meson.source_root() / 'VERSION').stdout().strip()) - - not_found = dependency('', required: false) diff --git a/0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch b/0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch deleted file mode 100644 index 5c7f88d..0000000 --- a/0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch +++ /dev/null @@ -1,131 +0,0 @@ -From d66ae008007853df7d3a24bd2d5e7494f53f007c Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Thu, 7 Jan 2021 10:10:20 -0500 -Subject: spapr: Fix buffer overflow in spapr_numa_associativity_init() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Greg Kurz -Message-id: <20210107101020.579456-2-gkurz@redhat.com> -Patchwork-id: 100515 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] spapr: Fix buffer overflow in spapr_numa_associativity_init() -Bugzilla: 1908693 -RH-Acked-by: David Gibson -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier - -From: Greg Kurz - -Running a guest with 128 NUMA nodes crashes QEMU: - -../../util/error.c:59: error_setv: Assertion `*errp == NULL' failed. - -The crash happens when setting the FWNMI migration blocker: - -2861 if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_ON) { -2862 /* Create the error string for live migration blocker */ -2863 error_setg(&spapr->fwnmi_migration_blocker, -2864 "A machine check is being handled during migration. The handler" -2865 "may run and log hardware error on the destination"); -2866 } - -Inspection reveals that papr->fwnmi_migration_blocker isn't NULL: - -(gdb) p spapr->fwnmi_migration_blocker -$1 = (Error *) 0x8000000004000000 - -Since this is the only place where papr->fwnmi_migration_blocker is -set, this means someone wrote there in our back. Further analysis -points to spapr_numa_associativity_init(), especially the part -that initializes the associative arrays for NVLink GPUs: - - max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM; - -ie. max_nodes_with_gpus = 128 + 6, but the array isn't sized to -accommodate the 6 extra nodes: - -struct SpaprMachineState { - . - . - . - uint32_t numa_assoc_array[MAX_NODES][NUMA_ASSOC_SIZE]; - - Error *fwnmi_migration_blocker; -}; - -and the following loops happily overwrite spapr->fwnmi_migration_blocker, -and probably more: - - for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) { - spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); - - for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) { - uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ? - SPAPR_GPU_NUMA_ID : cpu_to_be32(i); - spapr->numa_assoc_array[i][j] = gpu_assoc; - } - - spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); - } - -Fix the size of the array. This requires "hw/ppc/spapr.h" to see -NVGPU_MAX_NUM. Including "hw/pci-host/spapr.h" introduces a -circular dependency that breaks the build, so this moves the -definition of NVGPU_MAX_NUM to "hw/ppc/spapr.h" instead. - -Reported-by: Min Deng -BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1908693 -Fixes: dd7e1d7ae431 ("spapr_numa: move NVLink2 associativity handling to spapr_numa.c") -Cc: danielhb413@gmail.com -Signed-off-by: Greg Kurz -Message-Id: <160829960428.734871.12634150161215429514.stgit@bahia.lan> -Reviewed-by: Daniel Henrique Barboza -Signed-off-by: David Gibson -(cherry picked from commit 30499fdd9883026e106d74e8199e2f1311fd4011) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula ---- - include/hw/pci-host/spapr.h | 2 -- - include/hw/ppc/spapr.h | 5 ++++- - 2 files changed, 4 insertions(+), 3 deletions(-) - -diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h -index 4f58f0223b..bd014823a9 100644 ---- a/include/hw/pci-host/spapr.h -+++ b/include/hw/pci-host/spapr.h -@@ -115,8 +115,6 @@ struct SpaprPhbState { - #define SPAPR_PCI_NV2RAM64_WIN_BASE SPAPR_PCI_LIMIT - #define SPAPR_PCI_NV2RAM64_WIN_SIZE (2 * TiB) /* For up to 6 GPUs 256GB each */ - --/* Max number of these GPUsper a physical box */ --#define NVGPU_MAX_NUM 6 - /* Max number of NVLinks per GPU in any physical box */ - #define NVGPU_MAX_LINKS 3 - -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index ba2d81404b..28bbf07f8f 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -112,6 +112,9 @@ typedef enum { - #define NUMA_ASSOC_SIZE (MAX_DISTANCE_REF_POINTS + 1) - #define VCPU_ASSOC_SIZE (NUMA_ASSOC_SIZE + 1) - -+/* Max number of these GPUsper a physical box */ -+#define NVGPU_MAX_NUM 6 -+ - typedef struct SpaprCapabilities SpaprCapabilities; - struct SpaprCapabilities { - uint8_t caps[SPAPR_CAP_NUM]; -@@ -243,7 +246,7 @@ struct SpaprMachineState { - unsigned gpu_numa_id; - SpaprTpmProxy *tpm_proxy; - -- uint32_t numa_assoc_array[MAX_NODES][NUMA_ASSOC_SIZE]; -+ uint32_t numa_assoc_array[MAX_NODES + NVGPU_MAX_NUM][NUMA_ASSOC_SIZE]; - - Error *fwnmi_migration_blocker; - }; --- -2.18.4 - diff --git a/0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch b/0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch deleted file mode 100644 index 01edcf3..0000000 --- a/0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch +++ /dev/null @@ -1,175 +0,0 @@ -From e85ee5f0196b85ad6f9faa02571325831b612c37 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 7 Jan 2021 14:12:25 -0500 -Subject: usb/hcd-xhci-pci: Fixup capabilities ordering (again) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20210107141225.19709-2-dgilbert@redhat.com> -Patchwork-id: 100518 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] usb/hcd-xhci-pci: Fixup capabilities ordering (again) -Bugzilla: 1912846 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Gerd Hoffmann - -From: "Dr. David Alan Gilbert" - -Allow the reordering of the PCIe capabilities for MSI around the PCIe -capability. -This changed incompatibly way back in QEMU 2.7 and in RHEL we fixed -it up in bz 1447874 unconditionally putting it back. - -The xhci code got reorganised between 5.0 and 5.2; and we lost this -fixup on rebase. - -This time, add it as a property, and enable the property for old -machine types; this will allow us to drop this patch once the -old machine types go. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/machine.c | 4 ++- - hw/usb/hcd-xhci-pci.c | 59 +++++++++++++++++++++++++++++++++---------- - hw/usb/hcd-xhci-pci.h | 1 + - 3 files changed, 49 insertions(+), 15 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index aba05ad676..68495b9411 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -29,7 +29,7 @@ - #include "migration/vmstate.h" - - /* -- * The same as hw_compat_5_1 -+ * Mostly the same as hw_compat_5_1 - */ - GlobalProperty hw_compat_rhel_8_3[] = { - /* hw_compat_rhel_8_3 from hw_compat_5_1 */ -@@ -46,6 +46,8 @@ GlobalProperty hw_compat_rhel_8_3[] = { - { "nvme", "use-intel-id", "on"}, - /* hw_compat_rhel_8_3 from hw_compat_5_1 */ - { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ -+ /* hw_compat_rhel_8_3 bz 1912846 */ -+ { "pci-xhci", "x-rh-late-msi-cap", "off" }, - }; - const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); - -diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c -index bba628d3d2..d045a2a8be 100644 ---- a/hw/usb/hcd-xhci-pci.c -+++ b/hw/usb/hcd-xhci-pci.c -@@ -101,6 +101,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) - return 0; - } - -+/* RH bz 1912846 */ -+static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp) -+{ -+ int ret; -+ Error *err = NULL; -+ XHCIPciState *s = XHCI_PCI(dev); -+ -+ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); -+ /* -+ * Any error other than -ENOTSUP(board's MSI support is broken) -+ * is a programming error -+ */ -+ assert(!ret || ret == -ENOTSUP); -+ if (ret && s->msi == ON_OFF_AUTO_ON) { -+ /* Can't satisfy user's explicit msi=on request, fail */ -+ error_append_hint(&err, "You have to use msi=auto (default) or " -+ "msi=off with this machine type.\n"); -+ error_propagate(errp, err); -+ return true; -+ } -+ assert(!err || s->msi == ON_OFF_AUTO_AUTO); -+ /* With msi=auto, we fall back to MSI off silently */ -+ error_free(err); -+ -+ return false; -+} -+ - static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) - { - int ret; -@@ -124,23 +151,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) - s->xhci.nec_quirks = true; - } - -- if (s->msi != ON_OFF_AUTO_OFF) { -- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); -- /* -- * Any error other than -ENOTSUP(board's MSI support is broken) -- * is a programming error -- */ -- assert(!ret || ret == -ENOTSUP); -- if (ret && s->msi == ON_OFF_AUTO_ON) { -- /* Can't satisfy user's explicit msi=on request, fail */ -- error_append_hint(&err, "You have to use msi=auto (default) or " -- "msi=off with this machine type.\n"); -+ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) { -+ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */ -+ if (usb_xhci_pci_add_msi(dev, &err)) { - error_propagate(errp, err); - return; - } -- assert(!err || s->msi == ON_OFF_AUTO_AUTO); -- /* With msi=auto, we fall back to MSI off silently */ -- error_free(err); - } - pci_register_bar(dev, 0, - PCI_BASE_ADDRESS_SPACE_MEMORY | -@@ -153,6 +169,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) - assert(ret > 0); - } - -+ /* RH bz 1912846 */ -+ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) { -+ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */ -+ if (usb_xhci_pci_add_msi(dev, &err)) { -+ error_propagate(errp, err); -+ return; -+ } -+ } - if (s->msix != ON_OFF_AUTO_OFF) { - /* TODO check for errors, and should fail when msix=on */ - msix_init(dev, s->xhci.numintrs, -@@ -197,11 +221,18 @@ static void xhci_instance_init(Object *obj) - qdev_alias_all_properties(DEVICE(&s->xhci), obj); - } - -+static Property xhci_pci_properties[] = { -+ /* RH bz 1912846 */ -+ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), -+ DEFINE_PROP_END_OF_LIST() -+}; -+ - static void xhci_class_init(ObjectClass *klass, void *data) - { - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - DeviceClass *dc = DEVICE_CLASS(klass); - -+ device_class_set_props(dc, xhci_pci_properties); - dc->reset = xhci_pci_reset; - dc->vmsd = &vmstate_xhci_pci; - set_bit(DEVICE_CATEGORY_USB, dc->categories); -diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h -index c193f79443..086a1feb1e 100644 ---- a/hw/usb/hcd-xhci-pci.h -+++ b/hw/usb/hcd-xhci-pci.h -@@ -39,6 +39,7 @@ typedef struct XHCIPciState { - XHCIState xhci; - OnOffAuto msi; - OnOffAuto msix; -+ bool rh_late_msi_cap; /* bz 1912846 */ - } XHCIPciState; - - #endif --- -2.18.4 - diff --git a/0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch b/0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch deleted file mode 100644 index cd881aa..0000000 --- a/0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 0e1bc444240fb2d8d3ee65533baaa72a7267c53a Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jan 2021 12:27:19 -0500 -Subject: qga/commands-posix: Send CCW address on s390x with the fsinfo data -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -Message-id: <20210108122719.73201-2-thuth@redhat.com> -Patchwork-id: 100532 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 1/1] qga/commands-posix: Send CCW address on s390x with the fsinfo data -Bugzilla: 1755075 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -We need the CCW address on the libvirt side to correctly identify -the disk, so add this information to the GuestDiskAddress on s390x. - -Signed-off-by: Thomas Huth -Reviewed-by: Cornelia Huck -Reviewed-by: Michael Roth -Message-Id: <20201127082353.448251-1-thuth@redhat.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 5b723a5d8df44b69b8ba350e643059c8fd889315) -Signed-off-by: Danilo C. L. de Paula ---- - qga/commands-posix.c | 34 ++++++++++++++++++++++++++++++++++ - qga/qapi-schema.json | 20 +++++++++++++++++++- - 2 files changed, 53 insertions(+), 1 deletion(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index c089e38120..5aa5eff84f 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -1029,6 +1029,38 @@ static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath, - return true; - } - -+/* -+ * Store disk device info for CCW devices (s390x channel I/O devices). -+ * Returns true if information has been stored, or false for failure. -+ */ -+static bool build_guest_fsinfo_for_ccw_dev(char const *syspath, -+ GuestDiskAddress *disk, -+ Error **errp) -+{ -+ unsigned int cssid, ssid, subchno, devno; -+ char *p; -+ -+ p = strstr(syspath, "/devices/css"); -+ if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/", -+ &cssid, &ssid, &subchno, &devno) < 4) { -+ g_debug("could not parse ccw device sysfs path: %s", syspath); -+ return false; -+ } -+ -+ disk->has_ccw_address = true; -+ disk->ccw_address = g_new0(GuestCCWAddress, 1); -+ disk->ccw_address->cssid = cssid; -+ disk->ccw_address->ssid = ssid; -+ disk->ccw_address->subchno = subchno; -+ disk->ccw_address->devno = devno; -+ -+ if (strstr(p, "/virtio")) { -+ build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp); -+ } -+ -+ return true; -+} -+ - /* Store disk device info specified by @sysfs into @fs */ - static void build_guest_fsinfo_for_real_device(char const *syspath, - GuestFilesystemInfo *fs, -@@ -1081,6 +1113,8 @@ static void build_guest_fsinfo_for_real_device(char const *syspath, - - if (strstr(syspath, "/devices/pci")) { - has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp); -+ } else if (strstr(syspath, "/devices/css")) { -+ has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp); - } else if (strstr(syspath, "/virtio")) { - has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp); - } else { -diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json -index 3b3d1d0bd9..9a82b7e952 100644 ---- a/qga/qapi-schema.json -+++ b/qga/qapi-schema.json -@@ -846,6 +846,22 @@ - 'data': {'domain': 'int', 'bus': 'int', - 'slot': 'int', 'function': 'int'} } - -+## -+# @GuestCCWAddress: -+# -+# @cssid: channel subsystem image id -+# @ssid: subchannel set id -+# @subchno: subchannel number -+# @devno: device number -+# -+# Since: 6.0 -+## -+{ 'struct': 'GuestCCWAddress', -+ 'data': {'cssid': 'int', -+ 'ssid': 'int', -+ 'subchno': 'int', -+ 'devno': 'int'} } -+ - ## - # @GuestDiskAddress: - # -@@ -856,6 +872,7 @@ - # @unit: unit id - # @serial: serial number (since: 3.1) - # @dev: device node (POSIX) or device UNC (Windows) (since: 3.1) -+# @ccw-address: CCW address on s390x (since: 6.0) - # - # Since: 2.2 - ## -@@ -863,7 +880,8 @@ - 'data': {'pci-controller': 'GuestPCIAddress', - 'bus-type': 'GuestDiskBusType', - 'bus': 'int', 'target': 'int', 'unit': 'int', -- '*serial': 'str', '*dev': 'str'} } -+ '*serial': 'str', '*dev': 'str', -+ '*ccw-address': 'GuestCCWAddress'} } - - ## - # @GuestDiskInfo: --- -2.18.4 - diff --git a/0041-AArch64-machine-types-cleanup.patch b/0041-AArch64-machine-types-cleanup.patch deleted file mode 100644 index 0ac8f70..0000000 --- a/0041-AArch64-machine-types-cleanup.patch +++ /dev/null @@ -1,188 +0,0 @@ -From bfa3dc6e290c7b4f7f8825e4d4320ba062ed445a Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Sat, 9 Jan 2021 22:19:27 -0500 -Subject: AArch64 machine types cleanup - -RH-Author: Andrew Jones -Message-id: <20210109221928.31407-2-drjones@redhat.com> -Patchwork-id: 100547 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 1/2] AArch64 machine types cleanup -Bugzilla: 1895276 -RH-Acked-by: Gavin Shan -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Auger Eric -RH-Acked-by: Thomas Huth - -No functional change here, just a reduction of downstream-only -changes and whitespace differences. Also the removal of a nested -'#if 0 /* disabled for RHEL */' block. - -Signed-off-by: Andrew Jones -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 69 +++++++++++++++++++++++---------------------------- - 1 file changed, 31 insertions(+), 38 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 21e0485ac5..530072fce0 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -123,7 +123,6 @@ - static const TypeInfo rhel##m##n##s##_machvirt_info = { \ - .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ - .parent = TYPE_RHEL_MACHINE, \ -- .instance_init = rhel##m##n##s##_virt_instance_init, \ - .class_init = rhel##m##n##s##_virt_class_init, \ - }; \ - static void rhel##m##n##s##_machvirt_init(void) \ -@@ -2098,8 +2097,8 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) - - vms->virt = value; - } -- - #endif /* disabled for RHEL */ -+ - static bool virt_get_highmem(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2167,14 +2166,13 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) - - vms->ras = value; - } --#if 0 /* Disabled for Red Hat Enterprise Linux */ -+ - static bool virt_get_mte(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); - - return vms->mte; - } --#endif /* disabled for RHEL */ - - static void virt_set_mte(Object *obj, bool value, Error **errp) - { -@@ -2182,7 +2180,8 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) - - vms->mte = value; - } --#endif -+#endif /* disabled for RHEL */ -+ - static char *virt_get_gic_version(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2818,25 +2817,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Enable ACPI"); - } - --static const TypeInfo rhel_machine_info = { -- .name = TYPE_RHEL_MACHINE, -- .parent = TYPE_MACHINE, -- .abstract = true, -- .instance_size = sizeof(VirtMachineState), -- .class_size = sizeof(VirtMachineClass), -- .class_init = rhel_machine_class_init, -- .interfaces = (InterfaceInfo[]) { -- { TYPE_HOTPLUG_HANDLER }, -- { } -- }, --}; -- --static void rhel_machine_init(void) --{ -- type_register_static(&rhel_machine_info); --} --type_init(rhel_machine_init); -- - static void rhel_virt_instance_init(Object *obj) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2844,22 +2824,23 @@ static void rhel_virt_instance_init(Object *obj) - - /* EL3 is disabled by default and non-configurable for RHEL */ - vms->secure = false; -+ - /* EL2 is disabled by default and non-configurable for RHEL */ - vms->virt = false; -- /* High memory is enabled by default for RHEL */ -+ -+ /* High memory is enabled by default */ - vms->highmem = true; - object_property_add_bool(obj, "highmem", virt_get_highmem, - virt_set_highmem); - object_property_set_description(obj, "highmem", - "Set on/off to enable/disable using " - "physical address space above 32 bits"); -- - vms->gic_version = VIRT_GIC_VERSION_NOSEL; - object_property_add_str(obj, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_property_set_description(obj, "gic-version", - "Set GIC version. " -- "Valid values are 2, 3 and host"); -+ "Valid values are 2, 3, host and max"); - - vms->highmem_ecam = !vmc->no_highmem_ecam; - -@@ -2882,18 +2863,36 @@ static void rhel_virt_instance_init(Object *obj) - "Set the IOMMU type. " - "Valid values are none and smmuv3"); - -+ /* Default disallows RAS instantiation and is non-configurable for RHEL */ - vms->ras = false; -- /* MTE is disabled by default. */ -+ -+ /* MTE is disabled by default and non-configurable for RHEL */ - vms->mte = false; - -- vms->irqmap=a15irqmap; -+ vms->irqmap = a15irqmap; -+ - virt_flash_create(vms); - } - --static void rhel830_virt_instance_init(Object *obj) -+static const TypeInfo rhel_machine_info = { -+ .name = TYPE_RHEL_MACHINE, -+ .parent = TYPE_MACHINE, -+ .abstract = true, -+ .instance_size = sizeof(VirtMachineState), -+ .class_size = sizeof(VirtMachineClass), -+ .class_init = rhel_machine_class_init, -+ .instance_init = rhel_virt_instance_init, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_HOTPLUG_HANDLER }, -+ { } -+ }, -+}; -+ -+static void rhel_machine_init(void) - { -- rhel_virt_instance_init(obj); -+ type_register_static(&rhel_machine_info); - } -+type_init(rhel_machine_init); - - static void rhel830_virt_options(MachineClass *mc) - { -@@ -2901,16 +2900,10 @@ static void rhel830_virt_options(MachineClass *mc) - } - DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) - --static void rhel820_virt_instance_init(Object *obj) --{ -- rhel_virt_instance_init(obj); --} -- - static void rhel820_virt_options(MachineClass *mc) - { - rhel830_virt_options(mc); -- compat_props_add(mc->compat_props, hw_compat_rhel_8_2, -- hw_compat_rhel_8_2_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); - mc->numa_mem_supported = true; - mc->auto_enable_numa_with_memdev = false; - } --- -2.18.4 - diff --git a/0042-hw-arm-virt-Add-8.4-Machine-type.patch b/0042-hw-arm-virt-Add-8.4-Machine-type.patch deleted file mode 100644 index 89753c9..0000000 --- a/0042-hw-arm-virt-Add-8.4-Machine-type.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 1bc68127d1531ed519cb839844febaecb2a3f6d0 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Sat, 9 Jan 2021 22:19:28 -0500 -Subject: hw/arm/virt: Add 8.4 Machine type - -RH-Author: Andrew Jones -Message-id: <20210109221928.31407-3-drjones@redhat.com> -Patchwork-id: 100548 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 2/2] hw/arm/virt: Add 8.4 Machine type -Bugzilla: 1895276 -RH-Acked-by: Gavin Shan -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Auger Eric -RH-Acked-by: Thomas Huth - -8.4 isn't much different than 8.3, except it adds the steal-time -feature and enables it by default. - -Signed-off-by: Andrew Jones -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/virt.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 530072fce0..208c360342 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2894,11 +2894,21 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - --static void rhel830_virt_options(MachineClass *mc) -+static void rhel840_virt_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - } --DEFINE_RHEL_MACHINE_AS_LATEST(8, 3, 0) -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 4, 0) -+ -+static void rhel830_virt_options(MachineClass *mc) -+{ -+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); -+ -+ rhel840_virt_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); -+ vmc->no_kvm_steal_time = true; -+} -+DEFINE_RHEL_MACHINE(8, 3, 0) - - static void rhel820_virt_options(MachineClass *mc) - { --- -2.18.4 - diff --git a/0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch b/0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch deleted file mode 100644 index 419535a..0000000 --- a/0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch +++ /dev/null @@ -1,146 +0,0 @@ -From 256180b78107813b8e8c292bc799f5d7c7676cd2 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Mon, 11 Jan 2021 14:36:11 -0500 -Subject: memory: Rename memory_region_notify_one to - memory_region_notify_iommu_one -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210111143615.303645-2-eperezma@redhat.com> -Patchwork-id: 100570 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/5] memory: Rename memory_region_notify_one to memory_region_notify_iommu_one -Bugzilla: 1845758 -RH-Acked-by: Xiao Wang -RH-Acked-by: David Hildenbrand -RH-Acked-by: Peter Xu - -Previous name didn't reflect the iommu operation. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Peter Xu -Reviewed-by: David Gibson -Reviewed-by: Juan Quintela -Reviewed-by: Eric Auger -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3b5ebf8532afdc1518bd8b0961ed802bc3f5f07c) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 2 +- - hw/arm/smmuv3.c | 2 +- - hw/i386/intel_iommu.c | 4 ++-- - include/exec/memory.h | 6 +++--- - softmmu/memory.c | 6 +++--- - 5 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index 3838db1395..88d2c454f0 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -472,7 +472,7 @@ static void smmu_unmap_notifier_range(IOMMUNotifier *n) - entry.perm = IOMMU_NONE; - entry.addr_mask = n->end - n->start; - -- memory_region_notify_one(n, &entry); -+ memory_region_notify_iommu_one(n, &entry); - } - - /* Unmap all notifiers attached to @mr */ -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 22607c3784..273f5f7dce 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -828,7 +828,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - entry.addr_mask = num_pages * (1 << granule) - 1; - entry.perm = IOMMU_NONE; - -- memory_region_notify_one(n, &entry); -+ memory_region_notify_iommu_one(n, &entry); - } - - /* invalidate an asid/iova range tuple in all mr's */ -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index 70ac837733..067593b9e4 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -3497,7 +3497,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) - /* This field is meaningless for unmap */ - entry.translated_addr = 0; - -- memory_region_notify_one(n, &entry); -+ memory_region_notify_iommu_one(n, &entry); - - start += mask; - remain -= mask; -@@ -3535,7 +3535,7 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s) - - static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) - { -- memory_region_notify_one((IOMMUNotifier *)private, entry); -+ memory_region_notify_iommu_one((IOMMUNotifier *)private, entry); - return 0; - } - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 0f3e6bcd5e..d8456ccf52 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -236,7 +236,7 @@ enum IOMMUMemoryRegionAttr { - * The IOMMU implementation must use the IOMMU notifier infrastructure - * to report whenever mappings are changed, by calling - * memory_region_notify_iommu() (or, if necessary, by calling -- * memory_region_notify_one() for each registered notifier). -+ * memory_region_notify_iommu_one() for each registered notifier). - * - * Conceptually an IOMMU provides a mapping from input address - * to an output TLB entry. If the IOMMU is aware of memory transaction -@@ -1346,7 +1346,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - IOMMUTLBEntry entry); - - /** -- * memory_region_notify_one: notify a change in an IOMMU translation -+ * memory_region_notify_iommu_one: notify a change in an IOMMU translation - * entry to a single notifier - * - * This works just like memory_region_notify_iommu(), but it only -@@ -1357,7 +1357,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - * replaces all old entries for the same virtual I/O address range. - * Deleted entries have .@perm == 0. - */ --void memory_region_notify_one(IOMMUNotifier *notifier, -+void memory_region_notify_iommu_one(IOMMUNotifier *notifier, - IOMMUTLBEntry *entry); - - /** -diff --git a/softmmu/memory.c b/softmmu/memory.c -index 11ca94d037..44de610c72 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -1942,8 +1942,8 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, - memory_region_update_iommu_notify_flags(iommu_mr, NULL); - } - --void memory_region_notify_one(IOMMUNotifier *notifier, -- IOMMUTLBEntry *entry) -+void memory_region_notify_iommu_one(IOMMUNotifier *notifier, -+ IOMMUTLBEntry *entry) - { - IOMMUNotifierFlag request_flags; - hwaddr entry_end = entry->iova + entry->addr_mask; -@@ -1979,7 +1979,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - - IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { - if (iommu_notifier->iommu_idx == iommu_idx) { -- memory_region_notify_one(iommu_notifier, &entry); -+ memory_region_notify_iommu_one(iommu_notifier, &entry); - } - } - } --- -2.18.4 - diff --git a/0045-memory-Add-IOMMUTLBEvent.patch b/0045-memory-Add-IOMMUTLBEvent.patch deleted file mode 100644 index 0cc568b..0000000 --- a/0045-memory-Add-IOMMUTLBEvent.patch +++ /dev/null @@ -1,647 +0,0 @@ -From d282fdd88e60aa081365d8e0903ceb18743ccc9d Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Mon, 11 Jan 2021 14:36:12 -0500 -Subject: memory: Add IOMMUTLBEvent -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210111143615.303645-3-eperezma@redhat.com> -Patchwork-id: 100568 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/5] memory: Add IOMMUTLBEvent -Bugzilla: 1845758 -RH-Acked-by: Xiao Wang -RH-Acked-by: David Hildenbrand -RH-Acked-by: Peter Xu - -This way we can tell between regular IOMMUTLBEntry (entry of IOMMU -hardware) and notifications. - -In the notifications, we set explicitly if it is a MAPs or an UNMAP, -instead of trusting in entry permissions to differentiate them. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Matthew Rosato -Acked-by: David Gibson -(cherry picked from commit 5039caf3c449c49e625d34e134463260cf8e00e0) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/arm/smmu-common.c | 13 +++--- - hw/arm/smmuv3.c | 13 +++--- - hw/i386/intel_iommu.c | 88 ++++++++++++++++++++++------------------ - hw/misc/tz-mpc.c | 32 ++++++++------- - hw/ppc/spapr_iommu.c | 15 +++---- - hw/s390x/s390-pci-inst.c | 27 +++++++----- - hw/virtio/virtio-iommu.c | 30 +++++++------- - include/exec/memory.h | 27 ++++++------ - softmmu/memory.c | 20 ++++----- - 9 files changed, 143 insertions(+), 122 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index 88d2c454f0..405d5c5325 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -465,14 +465,15 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) - /* Unmap the whole notifier's range */ - static void smmu_unmap_notifier_range(IOMMUNotifier *n) - { -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - -- entry.target_as = &address_space_memory; -- entry.iova = n->start; -- entry.perm = IOMMU_NONE; -- entry.addr_mask = n->end - n->start; -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.target_as = &address_space_memory; -+ event.entry.iova = n->start; -+ event.entry.perm = IOMMU_NONE; -+ event.entry.addr_mask = n->end - n->start; - -- memory_region_notify_iommu_one(n, &entry); -+ memory_region_notify_iommu_one(n, &event); - } - - /* Unmap all notifiers attached to @mr */ -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 273f5f7dce..bbca0e9f20 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -800,7 +800,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - uint8_t tg, uint64_t num_pages) - { - SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - uint8_t granule = tg; - - if (!tg) { -@@ -823,12 +823,13 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - granule = tt->granule_sz; - } - -- entry.target_as = &address_space_memory; -- entry.iova = iova; -- entry.addr_mask = num_pages * (1 << granule) - 1; -- entry.perm = IOMMU_NONE; -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.target_as = &address_space_memory; -+ event.entry.iova = iova; -+ event.entry.addr_mask = num_pages * (1 << granule) - 1; -+ event.entry.perm = IOMMU_NONE; - -- memory_region_notify_iommu_one(n, &entry); -+ memory_region_notify_iommu_one(n, &event); - } - - /* invalidate an asid/iova range tuple in all mr's */ -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index 067593b9e4..56180b1c43 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -1073,7 +1073,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, - } - } - --typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); -+typedef int (*vtd_page_walk_hook)(IOMMUTLBEvent *event, void *private); - - /** - * Constant information used during page walking -@@ -1094,11 +1094,12 @@ typedef struct { - uint16_t domain_id; - } vtd_page_walk_info; - --static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) -+static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) - { - VTDAddressSpace *as = info->as; - vtd_page_walk_hook hook_fn = info->hook_fn; - void *private = info->private; -+ IOMMUTLBEntry *entry = &event->entry; - DMAMap target = { - .iova = entry->iova, - .size = entry->addr_mask, -@@ -1107,7 +1108,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) - }; - DMAMap *mapped = iova_tree_find(as->iova_tree, &target); - -- if (entry->perm == IOMMU_NONE && !info->notify_unmap) { -+ if (event->type == IOMMU_NOTIFIER_UNMAP && !info->notify_unmap) { - trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); - return 0; - } -@@ -1115,7 +1116,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) - assert(hook_fn); - - /* Update local IOVA mapped ranges */ -- if (entry->perm) { -+ if (event->type == IOMMU_NOTIFIER_MAP) { - if (mapped) { - /* If it's exactly the same translation, skip */ - if (!memcmp(mapped, &target, sizeof(target))) { -@@ -1141,19 +1142,21 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) - int ret; - - /* Emulate an UNMAP */ -+ event->type = IOMMU_NOTIFIER_UNMAP; - entry->perm = IOMMU_NONE; - trace_vtd_page_walk_one(info->domain_id, - entry->iova, - entry->translated_addr, - entry->addr_mask, - entry->perm); -- ret = hook_fn(entry, private); -+ ret = hook_fn(event, private); - if (ret) { - return ret; - } - /* Drop any existing mapping */ - iova_tree_remove(as->iova_tree, &target); -- /* Recover the correct permission */ -+ /* Recover the correct type */ -+ event->type = IOMMU_NOTIFIER_MAP; - entry->perm = cache_perm; - } - } -@@ -1170,7 +1173,7 @@ static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info) - trace_vtd_page_walk_one(info->domain_id, entry->iova, - entry->translated_addr, entry->addr_mask, - entry->perm); -- return hook_fn(entry, private); -+ return hook_fn(event, private); - } - - /** -@@ -1191,7 +1194,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, - uint32_t offset; - uint64_t slpte; - uint64_t subpage_size, subpage_mask; -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - uint64_t iova = start; - uint64_t iova_next; - int ret = 0; -@@ -1245,13 +1248,15 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, - * - * In either case, we send an IOTLB notification down. - */ -- entry.target_as = &address_space_memory; -- entry.iova = iova & subpage_mask; -- entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); -- entry.addr_mask = ~subpage_mask; -+ event.entry.target_as = &address_space_memory; -+ event.entry.iova = iova & subpage_mask; -+ event.entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); -+ event.entry.addr_mask = ~subpage_mask; - /* NOTE: this is only meaningful if entry_valid == true */ -- entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); -- ret = vtd_page_walk_one(&entry, info); -+ event.entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); -+ event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : -+ IOMMU_NOTIFIER_UNMAP; -+ ret = vtd_page_walk_one(&event, info); - } - - if (ret < 0) { -@@ -1430,10 +1435,10 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, - return 0; - } - --static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry, -+static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event, - void *private) - { -- memory_region_notify_iommu((IOMMUMemoryRegion *)private, 0, *entry); -+ memory_region_notify_iommu(private, 0, *event); - return 0; - } - -@@ -1993,14 +1998,17 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, - * page tables. We just deliver the PSI down to - * invalidate caches. - */ -- IOMMUTLBEntry entry = { -- .target_as = &address_space_memory, -- .iova = addr, -- .translated_addr = 0, -- .addr_mask = size - 1, -- .perm = IOMMU_NONE, -+ IOMMUTLBEvent event = { -+ .type = IOMMU_NOTIFIER_UNMAP, -+ .entry = { -+ .target_as = &address_space_memory, -+ .iova = addr, -+ .translated_addr = 0, -+ .addr_mask = size - 1, -+ .perm = IOMMU_NONE, -+ }, - }; -- memory_region_notify_iommu(&vtd_as->iommu, 0, entry); -+ memory_region_notify_iommu(&vtd_as->iommu, 0, event); - } - } - } -@@ -2412,7 +2420,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, - VTDInvDesc *inv_desc) - { - VTDAddressSpace *vtd_dev_as; -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - struct VTDBus *vtd_bus; - hwaddr addr; - uint64_t sz; -@@ -2460,12 +2468,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, - sz = VTD_PAGE_SIZE; - } - -- entry.target_as = &vtd_dev_as->as; -- entry.addr_mask = sz - 1; -- entry.iova = addr; -- entry.perm = IOMMU_NONE; -- entry.translated_addr = 0; -- memory_region_notify_iommu(&vtd_dev_as->iommu, 0, entry); -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.target_as = &vtd_dev_as->as; -+ event.entry.addr_mask = sz - 1; -+ event.entry.iova = addr; -+ event.entry.perm = IOMMU_NONE; -+ event.entry.translated_addr = 0; -+ memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); - - done: - return true; -@@ -3485,19 +3494,20 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) - size = remain = end - start + 1; - - while (remain >= VTD_PAGE_SIZE) { -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - uint64_t mask = get_naturally_aligned_size(start, remain, s->aw_bits); - - assert(mask); - -- entry.iova = start; -- entry.addr_mask = mask - 1; -- entry.target_as = &address_space_memory; -- entry.perm = IOMMU_NONE; -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.iova = start; -+ event.entry.addr_mask = mask - 1; -+ event.entry.target_as = &address_space_memory; -+ event.entry.perm = IOMMU_NONE; - /* This field is meaningless for unmap */ -- entry.translated_addr = 0; -+ event.entry.translated_addr = 0; - -- memory_region_notify_iommu_one(n, &entry); -+ memory_region_notify_iommu_one(n, &event); - - start += mask; - remain -= mask; -@@ -3533,9 +3543,9 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s) - vtd_switch_address_space_all(s); - } - --static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) -+static int vtd_replay_hook(IOMMUTLBEvent *event, void *private) - { -- memory_region_notify_iommu_one((IOMMUNotifier *)private, entry); -+ memory_region_notify_iommu_one(private, event); - return 0; - } - -diff --git a/hw/misc/tz-mpc.c b/hw/misc/tz-mpc.c -index 98f151237f..30481e1c90 100644 ---- a/hw/misc/tz-mpc.c -+++ b/hw/misc/tz-mpc.c -@@ -82,8 +82,10 @@ static void tz_mpc_iommu_notify(TZMPC *s, uint32_t lutidx, - /* Called when the LUT word at lutidx has changed from oldlut to newlut; - * must call the IOMMU notifiers for the changed blocks. - */ -- IOMMUTLBEntry entry = { -- .addr_mask = s->blocksize - 1, -+ IOMMUTLBEvent event = { -+ .entry = { -+ .addr_mask = s->blocksize - 1, -+ } - }; - hwaddr addr = lutidx * s->blocksize * 32; - int i; -@@ -100,26 +102,28 @@ static void tz_mpc_iommu_notify(TZMPC *s, uint32_t lutidx, - block_is_ns = newlut & (1 << i); - - trace_tz_mpc_iommu_notify(addr); -- entry.iova = addr; -- entry.translated_addr = addr; -+ event.entry.iova = addr; -+ event.entry.translated_addr = addr; - -- entry.perm = IOMMU_NONE; -- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); -- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.perm = IOMMU_NONE; -+ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, event); -+ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, event); - -- entry.perm = IOMMU_RW; -+ event.type = IOMMU_NOTIFIER_MAP; -+ event.entry.perm = IOMMU_RW; - if (block_is_ns) { -- entry.target_as = &s->blocked_io_as; -+ event.entry.target_as = &s->blocked_io_as; - } else { -- entry.target_as = &s->downstream_as; -+ event.entry.target_as = &s->downstream_as; - } -- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, entry); -+ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_S, event); - if (block_is_ns) { -- entry.target_as = &s->downstream_as; -+ event.entry.target_as = &s->downstream_as; - } else { -- entry.target_as = &s->blocked_io_as; -+ event.entry.target_as = &s->blocked_io_as; - } -- memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, entry); -+ memory_region_notify_iommu(&s->upstream, IOMMU_IDX_NS, event); - } - } - -diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c -index 0790239ba5..30352df00e 100644 ---- a/hw/ppc/spapr_iommu.c -+++ b/hw/ppc/spapr_iommu.c -@@ -445,7 +445,7 @@ static void spapr_tce_reset(DeviceState *dev) - static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba, - target_ulong tce) - { -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); - unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift; - -@@ -457,12 +457,13 @@ static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba, - - tcet->table[index] = tce; - -- entry.target_as = &address_space_memory, -- entry.iova = (ioba - tcet->bus_offset) & page_mask; -- entry.translated_addr = tce & page_mask; -- entry.addr_mask = ~page_mask; -- entry.perm = spapr_tce_iommu_access_flags(tce); -- memory_region_notify_iommu(&tcet->iommu, 0, entry); -+ event.entry.target_as = &address_space_memory, -+ event.entry.iova = (ioba - tcet->bus_offset) & page_mask; -+ event.entry.translated_addr = tce & page_mask; -+ event.entry.addr_mask = ~page_mask; -+ event.entry.perm = spapr_tce_iommu_access_flags(tce); -+ event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; -+ memory_region_notify_iommu(&tcet->iommu, 0, event); - - return H_SUCCESS; - } -diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c -index 70bfd91bf7..d9e1e29f1e 100644 ---- a/hw/s390x/s390-pci-inst.c -+++ b/hw/s390x/s390-pci-inst.c -@@ -602,15 +602,18 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, - S390IOTLBEntry *entry) - { - S390IOTLBEntry *cache = g_hash_table_lookup(iommu->iotlb, &entry->iova); -- IOMMUTLBEntry notify = { -- .target_as = &address_space_memory, -- .iova = entry->iova, -- .translated_addr = entry->translated_addr, -- .perm = entry->perm, -- .addr_mask = ~PAGE_MASK, -+ IOMMUTLBEvent event = { -+ .type = entry->perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP, -+ .entry = { -+ .target_as = &address_space_memory, -+ .iova = entry->iova, -+ .translated_addr = entry->translated_addr, -+ .perm = entry->perm, -+ .addr_mask = ~PAGE_MASK, -+ }, - }; - -- if (entry->perm == IOMMU_NONE) { -+ if (event.type == IOMMU_NOTIFIER_UNMAP) { - if (!cache) { - goto out; - } -@@ -623,9 +626,11 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, - goto out; - } - -- notify.perm = IOMMU_NONE; -- memory_region_notify_iommu(&iommu->iommu_mr, 0, notify); -- notify.perm = entry->perm; -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.perm = IOMMU_NONE; -+ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); -+ event.type = IOMMU_NOTIFIER_MAP; -+ event.entry.perm = entry->perm; - } - - cache = g_new(S390IOTLBEntry, 1); -@@ -637,7 +642,7 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, - dec_dma_avail(iommu); - } - -- memory_region_notify_iommu(&iommu->iommu_mr, 0, notify); -+ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); - - out: - return iommu->dma_limit ? iommu->dma_limit->avail : 1; -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index fc5c75d693..cea8811295 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -129,7 +129,7 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, - hwaddr virt_end, hwaddr paddr, - uint32_t flags) - { -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - IOMMUAccessFlags perm = IOMMU_ACCESS_FLAG(flags & VIRTIO_IOMMU_MAP_F_READ, - flags & VIRTIO_IOMMU_MAP_F_WRITE); - -@@ -141,19 +141,20 @@ static void virtio_iommu_notify_map(IOMMUMemoryRegion *mr, hwaddr virt_start, - trace_virtio_iommu_notify_map(mr->parent_obj.name, virt_start, virt_end, - paddr, perm); - -- entry.target_as = &address_space_memory; -- entry.addr_mask = virt_end - virt_start; -- entry.iova = virt_start; -- entry.perm = perm; -- entry.translated_addr = paddr; -+ event.type = IOMMU_NOTIFIER_MAP; -+ event.entry.target_as = &address_space_memory; -+ event.entry.addr_mask = virt_end - virt_start; -+ event.entry.iova = virt_start; -+ event.entry.perm = perm; -+ event.entry.translated_addr = paddr; - -- memory_region_notify_iommu(mr, 0, entry); -+ memory_region_notify_iommu(mr, 0, event); - } - - static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, - hwaddr virt_end) - { -- IOMMUTLBEntry entry; -+ IOMMUTLBEvent event; - - if (!(mr->iommu_notify_flags & IOMMU_NOTIFIER_UNMAP)) { - return; -@@ -161,13 +162,14 @@ static void virtio_iommu_notify_unmap(IOMMUMemoryRegion *mr, hwaddr virt_start, - - trace_virtio_iommu_notify_unmap(mr->parent_obj.name, virt_start, virt_end); - -- entry.target_as = &address_space_memory; -- entry.addr_mask = virt_end - virt_start; -- entry.iova = virt_start; -- entry.perm = IOMMU_NONE; -- entry.translated_addr = 0; -+ event.type = IOMMU_NOTIFIER_UNMAP; -+ event.entry.target_as = &address_space_memory; -+ event.entry.addr_mask = virt_end - virt_start; -+ event.entry.iova = virt_start; -+ event.entry.perm = IOMMU_NONE; -+ event.entry.translated_addr = 0; - -- memory_region_notify_iommu(mr, 0, entry); -+ memory_region_notify_iommu(mr, 0, event); - } - - static gboolean virtio_iommu_notify_unmap_cb(gpointer key, gpointer value, -diff --git a/include/exec/memory.h b/include/exec/memory.h -index d8456ccf52..e86b5e92da 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -116,6 +116,11 @@ struct IOMMUNotifier { - }; - typedef struct IOMMUNotifier IOMMUNotifier; - -+typedef struct IOMMUTLBEvent { -+ IOMMUNotifierFlag type; -+ IOMMUTLBEntry entry; -+} IOMMUTLBEvent; -+ - /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ - #define RAM_PREALLOC (1 << 0) - -@@ -1326,24 +1331,18 @@ uint64_t memory_region_iommu_get_min_page_size(IOMMUMemoryRegion *iommu_mr); - /** - * memory_region_notify_iommu: notify a change in an IOMMU translation entry. - * -- * The notification type will be decided by entry.perm bits: -- * -- * - For UNMAP (cache invalidation) notifies: set entry.perm to IOMMU_NONE. -- * - For MAP (newly added entry) notifies: set entry.perm to the -- * permission of the page (which is definitely !IOMMU_NONE). -- * - * Note: for any IOMMU implementation, an in-place mapping change - * should be notified with an UNMAP followed by a MAP. - * - * @iommu_mr: the memory region that was changed - * @iommu_idx: the IOMMU index for the translation table which has changed -- * @entry: the new entry in the IOMMU translation table. The entry -- * replaces all old entries for the same virtual I/O address range. -- * Deleted entries have .@perm == 0. -+ * @event: TLB event with the new entry in the IOMMU translation table. -+ * The entry replaces all old entries for the same virtual I/O address -+ * range. - */ - void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - int iommu_idx, -- IOMMUTLBEntry entry); -+ IOMMUTLBEvent event); - - /** - * memory_region_notify_iommu_one: notify a change in an IOMMU translation -@@ -1353,12 +1352,12 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - * notifies a specific notifier, not all of them. - * - * @notifier: the notifier to be notified -- * @entry: the new entry in the IOMMU translation table. The entry -- * replaces all old entries for the same virtual I/O address range. -- * Deleted entries have .@perm == 0. -+ * @event: TLB event with the new entry in the IOMMU translation table. -+ * The entry replaces all old entries for the same virtual I/O address -+ * range. - */ - void memory_region_notify_iommu_one(IOMMUNotifier *notifier, -- IOMMUTLBEntry *entry); -+ IOMMUTLBEvent *event); - - /** - * memory_region_register_iommu_notifier: register a notifier for changes to -diff --git a/softmmu/memory.c b/softmmu/memory.c -index 44de610c72..6ca87e8d73 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -1943,11 +1943,15 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, - } - - void memory_region_notify_iommu_one(IOMMUNotifier *notifier, -- IOMMUTLBEntry *entry) -+ IOMMUTLBEvent *event) - { -- IOMMUNotifierFlag request_flags; -+ IOMMUTLBEntry *entry = &event->entry; - hwaddr entry_end = entry->iova + entry->addr_mask; - -+ if (event->type == IOMMU_NOTIFIER_UNMAP) { -+ assert(entry->perm == IOMMU_NONE); -+ } -+ - /* - * Skip the notification if the notification does not overlap - * with registered range. -@@ -1958,20 +1962,14 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, - - assert(entry->iova >= notifier->start && entry_end <= notifier->end); - -- if (entry->perm & IOMMU_RW) { -- request_flags = IOMMU_NOTIFIER_MAP; -- } else { -- request_flags = IOMMU_NOTIFIER_UNMAP; -- } -- -- if (notifier->notifier_flags & request_flags) { -+ if (event->type & notifier->notifier_flags) { - notifier->notify(notifier, entry); - } - } - - void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - int iommu_idx, -- IOMMUTLBEntry entry) -+ IOMMUTLBEvent event) - { - IOMMUNotifier *iommu_notifier; - -@@ -1979,7 +1977,7 @@ void memory_region_notify_iommu(IOMMUMemoryRegion *iommu_mr, - - IOMMU_NOTIFIER_FOREACH(iommu_notifier, iommu_mr) { - if (iommu_notifier->iommu_idx == iommu_idx) { -- memory_region_notify_iommu_one(iommu_notifier, &entry); -+ memory_region_notify_iommu_one(iommu_notifier, &event); - } - } - } --- -2.18.4 - diff --git a/0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch b/0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch deleted file mode 100644 index 6201e2f..0000000 --- a/0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 6eb76ae169aaf695a5fb6ef052859828e3ea91bc Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Mon, 11 Jan 2021 14:36:13 -0500 -Subject: memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP IOMMUTLBNotificationType -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210111143615.303645-4-eperezma@redhat.com> -Patchwork-id: 100571 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/5] memory: Add IOMMU_NOTIFIER_DEVIOTLB_UNMAP IOMMUTLBNotificationType -Bugzilla: 1845758 -RH-Acked-by: Xiao Wang -RH-Acked-by: David Hildenbrand -RH-Acked-by: Peter Xu - -This allows us to differentiate between regular IOMMU map/unmap events -and DEVIOTLB unmap. Doing so, notifiers that only need device IOTLB -invalidations will not receive regular IOMMU unmappings. - -Adapt intel and vhost to use it. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit b68ba1ca57677acf870d5ab10579e6105c1f5338) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/intel_iommu.c | 2 +- - hw/virtio/vhost.c | 2 +- - include/exec/memory.h | 7 ++++++- - 3 files changed, 8 insertions(+), 3 deletions(-) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index 56180b1c43..edc3090f91 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -2468,7 +2468,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, - sz = VTD_PAGE_SIZE; - } - -- event.type = IOMMU_NOTIFIER_UNMAP; -+ event.type = IOMMU_NOTIFIER_DEVIOTLB_UNMAP; - event.entry.target_as = &vtd_dev_as->as; - event.entry.addr_mask = sz - 1; - event.entry.iova = addr; -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 614ccc2bcb..28c7d78172 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -718,7 +718,7 @@ static void vhost_iommu_region_add(MemoryListener *listener, - iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, - MEMTXATTRS_UNSPECIFIED); - iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify, -- IOMMU_NOTIFIER_UNMAP, -+ IOMMU_NOTIFIER_DEVIOTLB_UNMAP, - section->offset_within_region, - int128_get64(end), - iommu_idx); -diff --git a/include/exec/memory.h b/include/exec/memory.h -index e86b5e92da..521d9901d7 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -97,9 +97,14 @@ typedef enum { - IOMMU_NOTIFIER_UNMAP = 0x1, - /* Notify entry changes (newly created entries) */ - IOMMU_NOTIFIER_MAP = 0x2, -+ /* Notify changes on device IOTLB entries */ -+ IOMMU_NOTIFIER_DEVIOTLB_UNMAP = 0x04, - } IOMMUNotifierFlag; - --#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) -+#define IOMMU_NOTIFIER_IOTLB_EVENTS (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) -+#define IOMMU_NOTIFIER_DEVIOTLB_EVENTS IOMMU_NOTIFIER_DEVIOTLB_UNMAP -+#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_IOTLB_EVENTS | \ -+ IOMMU_NOTIFIER_DEVIOTLB_EVENTS) - - struct IOMMUNotifier; - typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier, --- -2.18.4 - diff --git a/0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch b/0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch deleted file mode 100644 index e5fd578..0000000 --- a/0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch +++ /dev/null @@ -1,57 +0,0 @@ -From add80ba59a85aca4c5e2619dee95557d2ec14169 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Mon, 11 Jan 2021 14:36:14 -0500 -Subject: intel_iommu: Skip page walking on device iotlb invalidations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210111143615.303645-5-eperezma@redhat.com> -Patchwork-id: 100572 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/5] intel_iommu: Skip page walking on device iotlb invalidations -Bugzilla: 1845758 -RH-Acked-by: Xiao Wang -RH-Acked-by: David Hildenbrand -RH-Acked-by: Peter Xu - -Although they didn't reach the notifier because of the filtering in -memory_region_notify_iommu_one, the vt-d was still splitting huge -memory invalidations in chunks. Skipping it. - -This improves performance in case of netperf with vhost-net: -* TCP_STREAM: From 1923.6Mbit/s to 2175.13Mbit/s (13%) -* TCP_RR: From 8464.73 trans/s to 8932.703333 trans/s (5.5%) -* UDP_RR: From 8562.08 trans/s to 9005.62/s (5.1%) -* UDP_STREAM: No change observed (insignificant 0.1% improvement) - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f7701e2c7983b680790af47117577b285b6a1aed) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/intel_iommu.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index edc3090f91..0cc71e4057 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -1478,6 +1478,10 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) - VTDContextEntry ce; - IOMMUNotifier *n; - -+ if (!(vtd_as->iommu.iommu_notify_flags & IOMMU_NOTIFIER_IOTLB_EVENTS)) { -+ return 0; -+ } -+ - ret = vtd_dev_to_context_entry(vtd_as->iommu_state, - pci_bus_num(vtd_as->bus), - vtd_as->devfn, &ce); --- -2.18.4 - diff --git a/0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch b/0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch deleted file mode 100644 index 25fb623..0000000 --- a/0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch +++ /dev/null @@ -1,69 +0,0 @@ -From ce5295813c0f1c94964cbd126f37a3202c360b92 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Mon, 11 Jan 2021 14:36:15 -0500 -Subject: memory: Skip bad range assertion if notifier is DEVIOTLB_UNMAP type -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210111143615.303645-6-eperezma@redhat.com> -Patchwork-id: 100573 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 5/5] memory: Skip bad range assertion if notifier is DEVIOTLB_UNMAP type -Bugzilla: 1845758 -RH-Acked-by: Xiao Wang -RH-Acked-by: David Hildenbrand -RH-Acked-by: Peter Xu - -Device IOTLB invalidations can unmap arbitrary ranges, eiter outside of -the memory region or even [0, ~0ULL] for all the space. The assertion -could be hit by a guest, and rhel7 guest effectively hit it. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Acked-by: Jason Wang -Message-Id: <20201116165506.31315-6-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1804857f19f612f6907832e35599cdb51d4ec764) -Signed-off-by: Eugenio Pérez -Signed-off-by: Danilo C. L. de Paula ---- - softmmu/memory.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/softmmu/memory.c b/softmmu/memory.c -index 6ca87e8d73..22bacbbc78 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -1947,6 +1947,7 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, - { - IOMMUTLBEntry *entry = &event->entry; - hwaddr entry_end = entry->iova + entry->addr_mask; -+ IOMMUTLBEntry tmp = *entry; - - if (event->type == IOMMU_NOTIFIER_UNMAP) { - assert(entry->perm == IOMMU_NONE); -@@ -1960,10 +1961,16 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, - return; - } - -- assert(entry->iova >= notifier->start && entry_end <= notifier->end); -+ if (notifier->notifier_flags & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { -+ /* Crop (iova, addr_mask) to range */ -+ tmp.iova = MAX(tmp.iova, notifier->start); -+ tmp.addr_mask = MIN(entry_end, notifier->end) - tmp.iova; -+ } else { -+ assert(entry->iova >= notifier->start && entry_end <= notifier->end); -+ } - - if (event->type & notifier->notifier_flags) { -- notifier->notify(notifier, entry); -+ notifier->notify(notifier, &tmp); - } - } - --- -2.18.4 - diff --git a/0049-RHEL-Switch-pvpanic-test-to-q35.patch b/0049-RHEL-Switch-pvpanic-test-to-q35.patch deleted file mode 100644 index 7b6f4bf..0000000 --- a/0049-RHEL-Switch-pvpanic-test-to-q35.patch +++ /dev/null @@ -1,47 +0,0 @@ -From c489d2cd175e879071a3c5504a17d7f656dd7b06 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 8 Dec 2020 16:27:15 -0500 -Subject: RHEL: Switch pvpanic test to q35 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -Message-id: <20201208162716.30836-3-dgilbert@redhat.com> -Patchwork-id: 100360 -O-Subject: [RHEL-av-8.4.0 qemu-kvm PATCH v2 2/3] RHEL: Switch pvpanic test to q35 -Bugzilla: 1885555 -RH-Acked-by: Thomas Huth -RH-Acked-by: Juan Quintela -RH-Acked-by: Philippe Mathieu-Daudé - -From: "Dr. David Alan Gilbert" - -Since b1b0393c3c5 the pvpanic test checks for a different -result (3) expecting it to get that on new machine types. -But, downstream, our 'pc' machine type is old, so switch the -test to q35, so it gets the new behaviour it's expecting. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - tests/qtest/pvpanic-test.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c -index 016b32ebee..f0a7282b47 100644 ---- a/tests/qtest/pvpanic-test.c -+++ b/tests/qtest/pvpanic-test.c -@@ -17,7 +17,8 @@ static void test_panic(void) - QDict *response, *data; - QTestState *qts; - -- qts = qtest_init("-device pvpanic"); -+ /* RHEL: Use q35 */ -+ qts = qtest_init("-M q35 -device pvpanic"); - - val = qtest_inb(qts, 0x505); - g_assert_cmpuint(val, ==, 3); --- -2.18.4 - diff --git a/0050-8.4-x86-machine-type.patch b/0050-8.4-x86-machine-type.patch deleted file mode 100644 index 70d0554..0000000 --- a/0050-8.4-x86-machine-type.patch +++ /dev/null @@ -1,144 +0,0 @@ -From cb95a2dd9f549a4b7fcfac97b9a83c46a232d41e Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 8 Dec 2020 16:27:16 -0500 -Subject: 8.4 x86 machine type - -RH-Author: Dr. David Alan Gilbert -Message-id: <20201208162716.30836-4-dgilbert@redhat.com> -Patchwork-id: 100362 -O-Subject: [RHEL-av-8.4.0 qemu-kvm PATCH v2 3/3] 8.4 x86 machine type -Bugzilla: 1885555 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Igor Mammedov -RH-Acked-by: Juan Quintela - -From: "Dr. David Alan Gilbert" - -Add pc-q35-rhel8.4.0 and fix all the compatiiblity glue up. - -Note the moving of x-smi-cpu-hotplug follows bz 1846886 comment 18 -part 2. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/pc.c | 8 ++++++-- - hw/i386/pc_piix.c | 5 +++++ - hw/i386/pc_q35.c | 30 +++++++++++++++++++++++++++--- - include/hw/i386/pc.h | 3 +++ - 4 files changed, 41 insertions(+), 5 deletions(-) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index f3fc695fe2..d5ea5b634c 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -363,11 +363,15 @@ GlobalProperty pc_rhel_compat[] = { - { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, - /* bz 1508330 */ - { "vfio-pci", "x-no-geforce-quirks", "on" }, -- /* BZ 1846886 */ -- { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_8_3_compat[] = { -+ /* pc_rhel_8_3_compat from pc_compat_5_1 */ -+ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, -+}; -+const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat); -+ - GlobalProperty pc_rhel_8_2_compat[] = { - /* pc_rhel_8_2_compat from pc_compat_4_2 */ - { "mch", "smbase-smram", "off" }, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 815da79108..1b1cc18ae0 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1045,6 +1045,11 @@ static void pc_machine_rhel760_options(MachineClass *m) - m->smbus_no_migration_support = true; - pcmc->pvh_enabled = false; - pcmc->default_cpu_version = CPU_VERSION_LEGACY; -+ pcmc->kvmclock_create_always = false; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_3, -+ hw_compat_rhel_8_3_len); -+ compat_props_add(m->compat_props, pc_rhel_8_3_compat, -+ pc_rhel_8_3_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_2, - hw_compat_rhel_8_2_len); - compat_props_add(m->compat_props, pc_rhel_8_2_compat, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 3340008c00..5acb47afcf 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -593,6 +593,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - -+static void pc_q35_init_rhel840(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel840_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL-AV"; -+ pcmc->smbios_stream_version = "8.4.0"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, -+ pc_q35_machine_rhel840_options); -+ -+ - static void pc_q35_init_rhel830(MachineState *machine) - { - pc_q35_init(machine); -@@ -601,10 +619,17 @@ static void pc_q35_init_rhel830(MachineState *machine) - static void pc_q35_machine_rhel830_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel_options(m); -+ pc_q35_machine_rhel840_options(m); - m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.3.0"; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_3, -+ hw_compat_rhel_8_3_len); -+ compat_props_add(m->compat_props, pc_rhel_8_3_compat, -+ pc_rhel_8_3_compat_len); -+ /* From pc_q35_5_1_machine_options() */ -+ pcmc->kvmclock_create_always = false; - } - - DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, -@@ -618,9 +643,8 @@ static void pc_q35_init_rhel820(MachineState *machine) - static void pc_q35_machine_rhel820_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel_options(m); -+ pc_q35_machine_rhel830_options(m); - m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; -- m->alias = NULL; - m->numa_mem_supported = true; - m->auto_enable_numa_with_memdev = false; - pcmc->smbios_stream_product = "RHEL-AV"; -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index e2ba9a4b58..68091bea98 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -272,6 +272,9 @@ extern const size_t pc_compat_1_4_len; - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_8_3_compat[]; -+extern const size_t pc_rhel_8_3_compat_len; -+ - extern GlobalProperty pc_rhel_8_2_compat[]; - extern const size_t pc_rhel_8_2_compat_len; - --- -2.18.4 - diff --git a/0051-memory-clamp-cached-translation-in-case-it-points-to.patch b/0051-memory-clamp-cached-translation-in-case-it-points-to.patch deleted file mode 100644 index 7700dcf..0000000 --- a/0051-memory-clamp-cached-translation-in-case-it-points-to.patch +++ /dev/null @@ -1,153 +0,0 @@ -From cf7723d08da5b371ef8b89a6e4edfaa21f88f03f Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 12 Jan 2021 21:01:25 -0500 -Subject: memory: clamp cached translation in case it points to an MMIO region - -RH-Author: Jon Maloy -Message-id: <20210112210125.851866-2-jmaloy@redhat.com> -Patchwork-id: 100614 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] memory: clamp cached translation in case it points to an MMIO region -Bugzilla: 1904392 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Thomas Huth - -From: Paolo Bonzini - -In using the address_space_translate_internal API, address_space_cache_init -forgot one piece of advice that can be found in the code for -address_space_translate_internal: - - /* MMIO registers can be expected to perform full-width accesses based only - * on their address, without considering adjacent registers that could - * decode to completely different MemoryRegions. When such registers - * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO - * regions overlap wildly. For this reason we cannot clamp the accesses - * here. - * - * If the length is small (as is the case for address_space_ldl/stl), - * everything works fine. If the incoming length is large, however, - * the caller really has to do the clamping through memory_access_size. - */ - -address_space_cache_init is exactly one such case where "the incoming length -is large", therefore we need to clamp the resulting length---not to -memory_access_size though, since we are not doing an access yet, but to -the size of the resulting section. This ensures that subsequent accesses -to the cached MemoryRegionSection will be in range. - -With this patch, the enclosed testcase notices that the used ring does -not fit into the MSI-X table and prints a "qemu-system-x86_64: Cannot map used" -error. - -Signed-off-by: Paolo Bonzini - -(cherry picked from 4bfb024bc76973d40a359476dc0291f46e435442) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - softmmu/physmem.c | 10 ++++++++ - tests/qtest/fuzz-test.c | 52 ++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 61 insertions(+), 1 deletion(-) - -diff --git a/softmmu/physmem.c b/softmmu/physmem.c -index 3027747c03..fb3f276844 100644 ---- a/softmmu/physmem.c -+++ b/softmmu/physmem.c -@@ -3255,6 +3255,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, - AddressSpaceDispatch *d; - hwaddr l; - MemoryRegion *mr; -+ Int128 diff; - - assert(len > 0); - -@@ -3263,6 +3264,15 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, - d = flatview_to_dispatch(cache->fv); - cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true); - -+ /* -+ * cache->xlat is now relative to cache->mrs.mr, not to the section itself. -+ * Take that into account to compute how many bytes are there between -+ * cache->xlat and the end of the section. -+ */ -+ diff = int128_sub(cache->mrs.size, -+ int128_make64(cache->xlat - cache->mrs.offset_within_region)); -+ l = int128_get64(int128_min(diff, int128_make64(l))); -+ - mr = cache->mrs.mr; - memory_region_ref(mr); - if (memory_access_is_direct(mr, is_write)) { -diff --git a/tests/qtest/fuzz-test.c b/tests/qtest/fuzz-test.c -index 2692d556d9..99d1a3ee12 100644 ---- a/tests/qtest/fuzz-test.c -+++ b/tests/qtest/fuzz-test.c -@@ -73,6 +73,55 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) - qtest_quit(s); - } - -+ /* -+ * Here a MemoryRegionCache pointed to an MMIO region but had a -+ * larger size than the underlying region. -+ */ -+static void test_mmio_oob_from_memory_region_cache(void) -+{ -+ QTestState *s; -+ -+ s = qtest_init("-M pc-q35-5.2 -display none -m 512M " -+ "-device virtio-scsi,num_queues=8,addr=03.0 "); -+ -+ qtest_outl(s, 0xcf8, 0x80001811); -+ qtest_outb(s, 0xcfc, 0x6e); -+ qtest_outl(s, 0xcf8, 0x80001824); -+ qtest_outl(s, 0xcf8, 0x80001813); -+ qtest_outl(s, 0xcfc, 0xa080000); -+ qtest_outl(s, 0xcf8, 0x80001802); -+ qtest_outl(s, 0xcfc, 0x5a175a63); -+ qtest_outb(s, 0x6e08, 0x9e); -+ qtest_writeb(s, 0x9f003, 0xff); -+ qtest_writeb(s, 0x9f004, 0x01); -+ qtest_writeb(s, 0x9e012, 0x0e); -+ qtest_writeb(s, 0x9e01b, 0x0e); -+ qtest_writeb(s, 0x9f006, 0x01); -+ qtest_writeb(s, 0x9f008, 0x01); -+ qtest_writeb(s, 0x9f00a, 0x01); -+ qtest_writeb(s, 0x9f00c, 0x01); -+ qtest_writeb(s, 0x9f00e, 0x01); -+ qtest_writeb(s, 0x9f010, 0x01); -+ qtest_writeb(s, 0x9f012, 0x01); -+ qtest_writeb(s, 0x9f014, 0x01); -+ qtest_writeb(s, 0x9f016, 0x01); -+ qtest_writeb(s, 0x9f018, 0x01); -+ qtest_writeb(s, 0x9f01a, 0x01); -+ qtest_writeb(s, 0x9f01c, 0x01); -+ qtest_writeb(s, 0x9f01e, 0x01); -+ qtest_writeb(s, 0x9f020, 0x01); -+ qtest_writeb(s, 0x9f022, 0x01); -+ qtest_writeb(s, 0x9f024, 0x01); -+ qtest_writeb(s, 0x9f026, 0x01); -+ qtest_writeb(s, 0x9f028, 0x01); -+ qtest_writeb(s, 0x9f02a, 0x01); -+ qtest_writeb(s, 0x9f02c, 0x01); -+ qtest_writeb(s, 0x9f02e, 0x01); -+ qtest_writeb(s, 0x9f030, 0x01); -+ qtest_outb(s, 0x6e10, 0x00); -+ qtest_quit(s); -+} -+ - int main(int argc, char **argv) - { - const char *arch = qtest_get_arch(); -@@ -86,7 +135,8 @@ int main(int argc, char **argv) - test_lp1878642_pci_bus_get_irq_level_assert); - qtest_add_func("fuzz/test_lp1879531_eth_get_rss_ex_dst_addr", - test_lp1879531_eth_get_rss_ex_dst_addr); -- -+ qtest_add_func("fuzz/test_mmio_oob_from_memory_region_cache", -+ test_mmio_oob_from_memory_region_cache); - } - - return g_test_run(); --- -2.18.4 - diff --git a/0054-Drop-bogus-IPv6-messages.patch b/0054-Drop-bogus-IPv6-messages.patch deleted file mode 100644 index 1ba8fd9..0000000 --- a/0054-Drop-bogus-IPv6-messages.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 1b118c53c70d9fa4ba3dcdf172039d29335bed73 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 20 Jan 2021 00:13:11 -0500 -Subject: Drop bogus IPv6 messages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210120001311.1356511-2-jmaloy@redhat.com> -Patchwork-id: 100699 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] Drop bogus IPv6 messages -Bugzilla: 1918061 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Thomas Huth - -From: Ralf Haferkamp - -Drop IPv6 message shorter than what's mentioned in the payload -length header (+ the size of the IPv6 header). They're invalid an could -lead to data leakage in icmp6_send_echoreply(). - -(cherry picked from libslirp commit c7ede54cbd2e2b25385325600958ba0124e31cc0) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - slirp/src/ip6_input.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c -index a83e4f8e3d..f7ef354ee4 100644 ---- a/slirp/src/ip6_input.c -+++ b/slirp/src/ip6_input.c -@@ -56,6 +56,13 @@ void ip6_input(struct mbuf *m) - goto bad; - } - -+ // Check if the message size is big enough to hold what's -+ // set in the payload length header. If not this is an invalid -+ // packet -+ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { -+ goto bad; -+ } -+ - /* check ip_ttl for a correct ICMP reply */ - if (ip6->ip_hl == 0) { - icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); --- -2.18.4 - diff --git a/README.rst b/README.rst deleted file mode 100644 index 4ca14c6..0000000 --- a/README.rst +++ /dev/null @@ -1,18 +0,0 @@ -=================== -qemu-kvm development -=================== - -qemu-kvm is maintained in a `source tree`_ rather than directly in dist-git -using packit service that provides way to develope using regular source code -structure and provides way to generate SRPM and build using koji service. - -Developers deliver all changes to source-git using merge request. Only maintainers -will be pushing changes sent to source-git to dist-git. - -Each release in dist-git is tagged in the source repository so you can easily -check out the source tree for a build. The tags are in the format -name-version-release, but note release doesn't contain the dist tag since the -source can be built in different build roots (Fedora, CentOS, etc.) - -.. _source tree: https://gitlab.com/redhat/centos-stream/src/qemu-kvm - diff --git a/kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch b/kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch deleted file mode 100644 index 59376b8..0000000 --- a/kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 0db52fa2553ba83454a347e0aca4896e1b0d9b41 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Thu, 11 Feb 2021 14:42:06 -0300 -Subject: [PATCH 4/6] block: Avoid processing BDS twice in - bdrv_set_aio_context_ignore() - -RH-Author: Sergio Lopez Pascual -Message-id: <20210211144208.58930-4-slp@redhat.com> -Patchwork-id: 101050 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/5] block: Avoid processing BDS twice in bdrv_set_aio_context_ignore() -Bugzilla: 1918966 1918968 -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake - -Some graphs may contain an indirect reference to the first BDS in the -chain that can be reached while walking it bottom->up from one its -children. - -Doubling-processing of a BDS is especially problematic for the -aio_notifiers, as they might attempt to work on both the old and the -new AIO contexts. - -To avoid this problem, add every child and parent to the ignore list -before actually processing them. - -Suggested-by: Kevin Wolf -Signed-off-by: Sergio Lopez -Message-Id: <20210201125032.44713-2-slp@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 722d8e73d65cb54f39d360ecb2147ac58f43c399) -Signed-off-by: Sergio Lopez -Signed-off-by: Eduardo Lima (Etrunko) ---- - block.c | 34 +++++++++++++++++++++++++++------- - 1 file changed, 27 insertions(+), 7 deletions(-) - -diff --git a/block.c b/block.c -index f1cedac362..8bfa446f9c 100644 ---- a/block.c -+++ b/block.c -@@ -6454,7 +6454,10 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, - AioContext *new_context, GSList **ignore) - { - AioContext *old_context = bdrv_get_aio_context(bs); -- BdrvChild *child; -+ GSList *children_to_process = NULL; -+ GSList *parents_to_process = NULL; -+ GSList *entry; -+ BdrvChild *child, *parent; - - g_assert(qemu_get_current_aio_context() == qemu_get_aio_context()); - -@@ -6469,16 +6472,33 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, - continue; - } - *ignore = g_slist_prepend(*ignore, child); -- bdrv_set_aio_context_ignore(child->bs, new_context, ignore); -+ children_to_process = g_slist_prepend(children_to_process, child); - } -- QLIST_FOREACH(child, &bs->parents, next_parent) { -- if (g_slist_find(*ignore, child)) { -+ -+ QLIST_FOREACH(parent, &bs->parents, next_parent) { -+ if (g_slist_find(*ignore, parent)) { - continue; - } -- assert(child->klass->set_aio_ctx); -- *ignore = g_slist_prepend(*ignore, child); -- child->klass->set_aio_ctx(child, new_context, ignore); -+ *ignore = g_slist_prepend(*ignore, parent); -+ parents_to_process = g_slist_prepend(parents_to_process, parent); -+ } -+ -+ for (entry = children_to_process; -+ entry != NULL; -+ entry = g_slist_next(entry)) { -+ child = entry->data; -+ bdrv_set_aio_context_ignore(child->bs, new_context, ignore); -+ } -+ g_slist_free(children_to_process); -+ -+ for (entry = parents_to_process; -+ entry != NULL; -+ entry = g_slist_next(entry)) { -+ parent = entry->data; -+ assert(parent->klass->set_aio_ctx); -+ parent->klass->set_aio_ctx(parent, new_context, ignore); - } -+ g_slist_free(parents_to_process); - - bdrv_detach_aio_context(bs); - --- -2.27.0 - diff --git a/kvm-block-Honor-blk_set_aio_context-context-requirements.patch b/kvm-block-Honor-blk_set_aio_context-context-requirements.patch deleted file mode 100644 index de75ecc..0000000 --- a/kvm-block-Honor-blk_set_aio_context-context-requirements.patch +++ /dev/null @@ -1,118 +0,0 @@ -From bc284d49a00a1a716b380c2245aa0b897a259a5d Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Thu, 11 Feb 2021 14:42:04 -0300 -Subject: [PATCH 2/6] block: Honor blk_set_aio_context() context requirements - -RH-Author: Sergio Lopez Pascual -Message-id: <20210211144208.58930-2-slp@redhat.com> -Patchwork-id: 101049 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/5] block: Honor blk_set_aio_context() context requirements -Bugzilla: 1918966 1918968 -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake - -The documentation for bdrv_set_aio_context_ignore() states this: - - * The caller must own the AioContext lock for the old AioContext of bs, but it - * must not own the AioContext lock for new_context (unless new_context is the - * same as the current context of bs). - -As blk_set_aio_context() makes use of this function, this rule also -applies to it. - -Fix all occurrences where this rule wasn't honored. - -Suggested-by: Kevin Wolf -Signed-off-by: Sergio Lopez -Message-Id: <20201214170519.223781-2-slp@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Eric Blake -(cherry picked from commit c7040ff64ec93ee925a81d3547db925fe7d1f1c0) -Signed-off-by: Sergio Lopez -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/block/dataplane/virtio-blk.c | 4 ++++ - hw/block/dataplane/xen-block.c | 7 ++++++- - hw/scsi/virtio-scsi.c | 6 ++++-- - 3 files changed, 14 insertions(+), 3 deletions(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 37499c5564..e9050c8987 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -172,6 +172,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - VirtIOBlockDataPlane *s = vblk->dataplane; - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); - VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); -+ AioContext *old_context; - unsigned i; - unsigned nvqs = s->conf->num_queues; - Error *local_err = NULL; -@@ -214,7 +215,10 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - vblk->dataplane_started = true; - trace_virtio_blk_data_plane_start(s); - -+ old_context = blk_get_aio_context(s->conf->conf.blk); -+ aio_context_acquire(old_context); - r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); -+ aio_context_release(old_context); - if (r < 0) { - error_report_err(local_err); - goto fail_guest_notifiers; -diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c -index 71c337c7b7..3675f8deaf 100644 ---- a/hw/block/dataplane/xen-block.c -+++ b/hw/block/dataplane/xen-block.c -@@ -725,6 +725,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, - { - ERRP_GUARD(); - XenDevice *xendev = dataplane->xendev; -+ AioContext *old_context; - unsigned int ring_size; - unsigned int i; - -@@ -808,10 +809,14 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, - goto stop; - } - -- aio_context_acquire(dataplane->ctx); -+ old_context = blk_get_aio_context(dataplane->blk); -+ aio_context_acquire(old_context); - /* If other users keep the BlockBackend in the iothread, that's ok */ - blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); -+ aio_context_release(old_context); -+ - /* Only reason for failure is a NULL channel */ -+ aio_context_acquire(dataplane->ctx); - xen_device_set_event_channel_context(xendev, dataplane->event_channel, - dataplane->ctx, &error_abort); - aio_context_release(dataplane->ctx); -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 82c025146d..66bdda5473 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -821,6 +821,7 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - SCSIDevice *sd = SCSI_DEVICE(dev); -+ AioContext *old_context; - int ret; - - /* XXX: Remove this check once block backend is capable of handling -@@ -836,9 +837,10 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; - } -- virtio_scsi_acquire(s); -+ old_context = blk_get_aio_context(sd->conf.blk); -+ aio_context_acquire(old_context); - ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); -- virtio_scsi_release(s); -+ aio_context_release(old_context); - if (ret < 0) { - return; - } --- -2.27.0 - diff --git a/kvm-block-export-fix-blk_size-double-byteswap.patch b/kvm-block-export-fix-blk_size-double-byteswap.patch deleted file mode 100644 index c93ab98..0000000 --- a/kvm-block-export-fix-blk_size-double-byteswap.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 29c5b94ae259f21b792a611096c60b240e0c0983 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 15 Mar 2021 18:16:25 -0400 -Subject: [PATCH 09/15] block/export: fix blk_size double byteswap - -RH-Author: Stefan Hajnoczi -Message-id: <20210315181629.212884-3-stefanha@redhat.com> -Patchwork-id: 101340 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/6] block/export: fix blk_size double byteswap -Bugzilla: 1937004 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -The config->blk_size field is little-endian. Use the native-endian -blk_size variable to avoid double byteswapping. - -Fixes: 11f60f7eaee2630dd6fa0c3a8c49f792e46c4cf1 ("block/export: make vhost-user-blk config space little-endian") -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210223144653.811468-8-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit a4f1542af58fd6ab061e594d4e161f1c8b4a4372) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - block/export/vhost-user-blk-server.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c -index 62672d1cb9..3003cff189 100644 ---- a/block/export/vhost-user-blk-server.c -+++ b/block/export/vhost-user-blk-server.c -@@ -354,7 +354,7 @@ vu_blk_initialize_config(BlockDriverState *bs, - config->num_queues = cpu_to_le16(num_queues); - config->max_discard_sectors = cpu_to_le32(32768); - config->max_discard_seg = cpu_to_le32(1); -- config->discard_sector_alignment = cpu_to_le32(config->blk_size >> 9); -+ config->discard_sector_alignment = cpu_to_le32(blk_size >> 9); - config->max_write_zeroes_sectors = cpu_to_le32(32768); - config->max_write_zeroes_seg = cpu_to_le32(1); - } --- -2.27.0 - diff --git a/kvm-block-export-fix-vhost-user-blk-export-sector-number.patch b/kvm-block-export-fix-vhost-user-blk-export-sector-number.patch deleted file mode 100644 index dee1102..0000000 --- a/kvm-block-export-fix-vhost-user-blk-export-sector-number.patch +++ /dev/null @@ -1,53 +0,0 @@ -From e158a830fa229937fcb2ef755b50695abd64533a Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 15 Mar 2021 18:16:27 -0400 -Subject: [PATCH 11/15] block/export: fix vhost-user-blk export sector number - calculation - -RH-Author: Stefan Hajnoczi -Message-id: <20210315181629.212884-5-stefanha@redhat.com> -Patchwork-id: 101341 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/6] block/export: fix vhost-user-blk export sector number calculation -Bugzilla: 1937004 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -The driver is supposed to honor the blk_size field but the protocol -still uses 512-byte sector numbers. It is incorrect to multiply -req->sector_num by blk_size. - -VIRTIO 1.1 5.2.5 Device Initialization says: - - blk_size can be read to determine the optimal sector size for the - driver to use. This does not affect the units used in the protocol - (always 512 bytes), but awareness of the correct value can affect - performance. - -Fixes: 3578389bcf76c824a5d82e6586a6f0c71e56f2aa ("block/export: vhost-user block device backend server") -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210223144653.811468-10-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit e44362ce317bcc46d409ed6c4a5ed2b46804bcbf) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - block/export/vhost-user-blk-server.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c -index feb139e067..bb07f499c8 100644 ---- a/block/export/vhost-user-blk-server.c -+++ b/block/export/vhost-user-blk-server.c -@@ -144,7 +144,7 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) - break; - } - -- int64_t offset = req->sector_num * vexp->blk_size; -+ int64_t offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS; - QEMUIOVector qiov; - if (is_write) { - qemu_iovec_init_external(&qiov, out_iov, out_num); --- -2.27.0 - diff --git a/kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch b/kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch deleted file mode 100644 index 7af3e89..0000000 --- a/kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch +++ /dev/null @@ -1,199 +0,0 @@ -From 400ddccbcd8ddc13c85dbb7796b15fe9d6a01c1f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 15 Mar 2021 18:16:28 -0400 -Subject: [PATCH 12/15] block/export: port virtio-blk discard/write zeroes - input validation - -RH-Author: Stefan Hajnoczi -Message-id: <20210315181629.212884-6-stefanha@redhat.com> -Patchwork-id: 101342 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 5/6] block/export: port virtio-blk discard/write zeroes input validation -Bugzilla: 1937004 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -Validate discard/write zeroes the same way we do for virtio-blk. Some of -these checks are mandated by the VIRTIO specification, others are -internal to QEMU. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210223144653.811468-11-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit db4eadf9f10e19f864d70d1df3a90fbda31b8c06) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - block/export/vhost-user-blk-server.c | 116 +++++++++++++++++++++------ - 1 file changed, 93 insertions(+), 23 deletions(-) - -diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c -index bb07f499c8..937bb5e9b4 100644 ---- a/block/export/vhost-user-blk-server.c -+++ b/block/export/vhost-user-blk-server.c -@@ -29,6 +29,8 @@ - - enum { - VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, -+ VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768, -+ VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768, - }; - struct virtio_blk_inhdr { - unsigned char status; -@@ -65,30 +67,102 @@ static void vu_blk_req_complete(VuBlkReq *req) - free(req); - } - -+static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector, -+ size_t size) -+{ -+ uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; -+ uint64_t total_sectors; -+ -+ if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { -+ return false; -+ } -+ if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) { -+ return false; -+ } -+ blk_get_geometry(vexp->export.blk, &total_sectors); -+ if (sector > total_sectors || nb_sectors > total_sectors - sector) { -+ return false; -+ } -+ return true; -+} -+ - static int coroutine_fn --vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov, -+vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, - uint32_t iovcnt, uint32_t type) - { -+ BlockBackend *blk = vexp->export.blk; - struct virtio_blk_discard_write_zeroes desc; -- ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc)); -+ ssize_t size; -+ uint64_t sector; -+ uint32_t num_sectors; -+ uint32_t max_sectors; -+ uint32_t flags; -+ int bytes; -+ -+ /* Only one desc is currently supported */ -+ if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) { -+ return VIRTIO_BLK_S_UNSUPP; -+ } -+ -+ size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc)); - if (unlikely(size != sizeof(desc))) { -- error_report("Invalid size %zd, expect %zu", size, sizeof(desc)); -- return -EINVAL; -+ error_report("Invalid size %zd, expected %zu", size, sizeof(desc)); -+ return VIRTIO_BLK_S_IOERR; - } - -- uint64_t range[2] = { le64_to_cpu(desc.sector) << 9, -- le32_to_cpu(desc.num_sectors) << 9 }; -- if (type == VIRTIO_BLK_T_DISCARD) { -- if (blk_co_pdiscard(blk, range[0], range[1]) == 0) { -- return 0; -+ sector = le64_to_cpu(desc.sector); -+ num_sectors = le32_to_cpu(desc.num_sectors); -+ flags = le32_to_cpu(desc.flags); -+ max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ? -+ VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS : -+ VHOST_USER_BLK_MAX_DISCARD_SECTORS; -+ -+ /* This check ensures that 'bytes' fits in an int */ -+ if (unlikely(num_sectors > max_sectors)) { -+ return VIRTIO_BLK_S_IOERR; -+ } -+ -+ bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS; -+ -+ if (unlikely(!vu_blk_sect_range_ok(vexp, sector, bytes))) { -+ return VIRTIO_BLK_S_IOERR; -+ } -+ -+ /* -+ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard -+ * and write zeroes commands if any unknown flag is set. -+ */ -+ if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { -+ return VIRTIO_BLK_S_UNSUPP; -+ } -+ -+ if (type == VIRTIO_BLK_T_WRITE_ZEROES) { -+ int blk_flags = 0; -+ -+ if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { -+ blk_flags |= BDRV_REQ_MAY_UNMAP; -+ } -+ -+ if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS, -+ bytes, blk_flags) == 0) { -+ return VIRTIO_BLK_S_OK; - } -- } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) { -- if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) { -- return 0; -+ } else if (type == VIRTIO_BLK_T_DISCARD) { -+ /* -+ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for -+ * discard commands if the unmap flag is set. -+ */ -+ if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) { -+ return VIRTIO_BLK_S_UNSUPP; -+ } -+ -+ if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS, -+ bytes) == 0) { -+ return VIRTIO_BLK_S_OK; - } - } - -- return -EINVAL; -+ return VIRTIO_BLK_S_IOERR; - } - - static void coroutine_fn vu_blk_virtio_process_req(void *opaque) -@@ -177,19 +251,13 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) - } - case VIRTIO_BLK_T_DISCARD: - case VIRTIO_BLK_T_WRITE_ZEROES: { -- int rc; -- - if (!vexp->writable) { - req->in->status = VIRTIO_BLK_S_IOERR; - break; - } - -- rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type); -- if (rc == 0) { -- req->in->status = VIRTIO_BLK_S_OK; -- } else { -- req->in->status = VIRTIO_BLK_S_IOERR; -- } -+ req->in->status = vu_blk_discard_write_zeroes(vexp, out_iov, out_num, -+ type); - break; - } - default: -@@ -360,11 +428,13 @@ vu_blk_initialize_config(BlockDriverState *bs, - config->min_io_size = cpu_to_le16(1); - config->opt_io_size = cpu_to_le32(1); - config->num_queues = cpu_to_le16(num_queues); -- config->max_discard_sectors = cpu_to_le32(32768); -+ config->max_discard_sectors = -+ cpu_to_le32(VHOST_USER_BLK_MAX_DISCARD_SECTORS); - config->max_discard_seg = cpu_to_le32(1); - config->discard_sector_alignment = - cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS); -- config->max_write_zeroes_sectors = cpu_to_le32(32768); -+ config->max_write_zeroes_sectors -+ = cpu_to_le32(VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS); - config->max_write_zeroes_seg = cpu_to_le32(1); - } - --- -2.27.0 - diff --git a/kvm-block-export-port-virtio-blk-read-write-range-check.patch b/kvm-block-export-port-virtio-blk-read-write-range-check.patch deleted file mode 100644 index f35ef1f..0000000 --- a/kvm-block-export-port-virtio-blk-read-write-range-check.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 03aeb30096eb0d48e0b493ed4925b99b0e27979e Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 15 Mar 2021 18:16:29 -0400 -Subject: [PATCH 13/15] block/export: port virtio-blk read/write range check - -RH-Author: Stefan Hajnoczi -Message-id: <20210315181629.212884-7-stefanha@redhat.com> -Patchwork-id: 101343 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 6/6] block/export: port virtio-blk read/write range check -Bugzilla: 1937004 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -Check that the sector number and byte count are valid. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210223144653.811468-13-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 05ae4e674e3d47342a7660ae7bc55b393e09f4c7) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - block/export/vhost-user-blk-server.c | 19 ++++++++++++++++--- - 1 file changed, 16 insertions(+), 3 deletions(-) - -diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c -index 937bb5e9b4..dbe3cfb9e8 100644 ---- a/block/export/vhost-user-blk-server.c -+++ b/block/export/vhost-user-blk-server.c -@@ -209,6 +209,8 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) - switch (type & ~VIRTIO_BLK_T_BARRIER) { - case VIRTIO_BLK_T_IN: - case VIRTIO_BLK_T_OUT: { -+ QEMUIOVector qiov; -+ int64_t offset; - ssize_t ret = 0; - bool is_write = type & VIRTIO_BLK_T_OUT; - req->sector_num = le64_to_cpu(req->out.sector); -@@ -218,13 +220,24 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) - break; - } - -- int64_t offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS; -- QEMUIOVector qiov; - if (is_write) { - qemu_iovec_init_external(&qiov, out_iov, out_num); -- ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0); - } else { - qemu_iovec_init_external(&qiov, in_iov, in_num); -+ } -+ -+ if (unlikely(!vu_blk_sect_range_ok(vexp, -+ req->sector_num, -+ qiov.size))) { -+ req->in->status = VIRTIO_BLK_S_IOERR; -+ break; -+ } -+ -+ offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS; -+ -+ if (is_write) { -+ ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0); -+ } else { - ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0); - } - if (ret >= 0) { --- -2.27.0 - diff --git a/kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch b/kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch deleted file mode 100644 index 45b022f..0000000 --- a/kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 38097598172fa6b5b66224ee3a17dcc7d8ff6488 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 15 Mar 2021 18:16:26 -0400 -Subject: [PATCH 10/15] block/export: use VIRTIO_BLK_SECTOR_BITS - -RH-Author: Stefan Hajnoczi -Message-id: <20210315181629.212884-4-stefanha@redhat.com> -Patchwork-id: 101339 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/6] block/export: use VIRTIO_BLK_SECTOR_BITS -Bugzilla: 1937004 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -Use VIRTIO_BLK_SECTOR_BITS and VIRTIO_BLK_SECTOR_SIZE when dealing with -virtio-blk sector numbers. Although the values happen to be the same as -BDRV_SECTOR_BITS and BDRV_SECTOR_SIZE, they are conceptually different. -This makes it clearer when we are dealing with virtio-blk sector units. - -Use VIRTIO_BLK_SECTOR_BITS in vu_blk_initialize_config(). Later patches -will use it the new constants the virtqueue request processing code -path. - -Suggested-by: Max Reitz -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210223144653.811468-9-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 524bac0744e5abf95856fb9e31c01fd2ef102188) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - block/export/vhost-user-blk-server.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c -index 3003cff189..feb139e067 100644 ---- a/block/export/vhost-user-blk-server.c -+++ b/block/export/vhost-user-blk-server.c -@@ -20,6 +20,13 @@ - #include "sysemu/block-backend.h" - #include "util/block-helpers.h" - -+/* -+ * Sector units are 512 bytes regardless of the -+ * virtio_blk_config->blk_size value. -+ */ -+#define VIRTIO_BLK_SECTOR_BITS 9 -+#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS) -+ - enum { - VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1, - }; -@@ -345,7 +352,8 @@ vu_blk_initialize_config(BlockDriverState *bs, - uint32_t blk_size, - uint16_t num_queues) - { -- config->capacity = cpu_to_le64(bdrv_getlength(bs) >> BDRV_SECTOR_BITS); -+ config->capacity = -+ cpu_to_le64(bdrv_getlength(bs) >> VIRTIO_BLK_SECTOR_BITS); - config->blk_size = cpu_to_le32(blk_size); - config->size_max = cpu_to_le32(0); - config->seg_max = cpu_to_le32(128 - 2); -@@ -354,7 +362,8 @@ vu_blk_initialize_config(BlockDriverState *bs, - config->num_queues = cpu_to_le16(num_queues); - config->max_discard_sectors = cpu_to_le32(32768); - config->max_discard_seg = cpu_to_le32(1); -- config->discard_sector_alignment = cpu_to_le32(blk_size >> 9); -+ config->discard_sector_alignment = -+ cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS); - config->max_write_zeroes_sectors = cpu_to_le32(32768); - config->max_write_zeroes_seg = cpu_to_le32(1); - } -@@ -381,7 +390,7 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, - if (vu_opts->has_logical_block_size) { - logical_block_size = vu_opts->logical_block_size; - } else { -- logical_block_size = BDRV_SECTOR_SIZE; -+ logical_block_size = VIRTIO_BLK_SECTOR_SIZE; - } - check_block_size(exp->id, "logical-block-size", logical_block_size, - &local_err); --- -2.27.0 - diff --git a/kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch b/kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch deleted file mode 100644 index dcda5bc..0000000 --- a/kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 661245e1baf416570295fad0db1fdd5ad8485e33 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Thu, 11 Feb 2021 14:42:08 -0300 -Subject: [PATCH 6/6] block: move blk_exp_close_all() to qemu_cleanup() - -RH-Author: Sergio Lopez Pascual -Message-id: <20210211144208.58930-6-slp@redhat.com> -Patchwork-id: 101052 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 5/5] block: move blk_exp_close_all() to qemu_cleanup() -Bugzilla: 1918966 1918968 -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake - -Move blk_exp_close_all() from bdrv_close() to qemu_cleanup(), before -bdrv_drain_all_begin(). - -Export drivers may have coroutines yielding at some point in the block -layer, so we need to shut them down before draining the block layer, -as otherwise they may get stuck blk_wait_while_drained(). - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1900505 -Signed-off-by: Sergio Lopez -Message-Id: <20210201125032.44713-3-slp@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 1895b977f9a69419ae45cfc25805f71efae32eaf) -Signed-off-by: Sergio Lopez -Signed-off-by: Eduardo Lima (Etrunko) ---- - block.c | 1 - - qemu-nbd.c | 1 + - softmmu/vl.c | 9 +++++++++ - storage-daemon/qemu-storage-daemon.c | 1 + - 4 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/block.c b/block.c -index 8bfa446f9c..57c60efc7f 100644 ---- a/block.c -+++ b/block.c -@@ -4472,7 +4472,6 @@ static void bdrv_close(BlockDriverState *bs) - void bdrv_close_all(void) - { - assert(job_next(NULL) == NULL); -- blk_exp_close_all(); - - /* Drop references from requests still in flight, such as canceled block - * jobs whose AIO context has not been polled yet */ -diff --git a/qemu-nbd.c b/qemu-nbd.c -index a7075c5419..1d337b7504 100644 ---- a/qemu-nbd.c -+++ b/qemu-nbd.c -@@ -509,6 +509,7 @@ static const char *socket_activation_validate_opts(const char *device, - static void qemu_nbd_shutdown(void) - { - job_cancel_sync_all(); -+ blk_exp_close_all(); - bdrv_close_all(); - } - -diff --git a/softmmu/vl.c b/softmmu/vl.c -index 065d52e8dc..3244ee5e12 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -66,6 +66,7 @@ - #include "qemu/log.h" - #include "sysemu/blockdev.h" - #include "hw/block/block.h" -+#include "block/export.h" - #include "migration/misc.h" - #include "migration/snapshot.h" - #include "migration/global_state.h" -@@ -4526,6 +4527,14 @@ void qemu_cleanup(void) - */ - migration_shutdown(); - -+ /* -+ * Close the exports before draining the block layer. The export -+ * drivers may have coroutines yielding on it, so we need to clean -+ * them up before the drain, as otherwise they may be get stuck in -+ * blk_wait_while_drained(). -+ */ -+ blk_exp_close_all(); -+ - /* - * We must cancel all block jobs while the block layer is drained, - * or cancelling will be affected by throttling and thus may block -diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c -index e0c87edbdd..d8d172cc60 100644 ---- a/storage-daemon/qemu-storage-daemon.c -+++ b/storage-daemon/qemu-storage-daemon.c -@@ -314,6 +314,7 @@ int main(int argc, char *argv[]) - main_loop_wait(false); - } - -+ blk_exp_close_all(); - bdrv_drain_all_begin(); - bdrv_close_all(); - --- -2.27.0 - diff --git a/kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch b/kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch deleted file mode 100644 index 96c0d86..0000000 --- a/kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 23d161ad92d783275ad56f3acb663f7a21b809f4 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Mon, 8 Feb 2021 22:56:59 -0300 -Subject: [PATCH 01/54] block/nbd: only detach existing iochannel from - aio_context - -RH-Author: Eric Blake -Message-id: <20210208225701.110110-2-eblake@redhat.com> -Patchwork-id: 101005 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v4 1/3] block/nbd: only detach existing iochannel from aio_context -Bugzilla: 1887883 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz - -From: Roman Kagan - -When the reconnect in NBD client is in progress, the iochannel used for -NBD connection doesn't exist. Therefore an attempt to detach it from -the aio_context of the parent BlockDriverState results in a NULL pointer -dereference. - -The problem is triggerable, in particular, when an outgoing migration is -about to finish, and stopping the dataplane tries to move the -BlockDriverState from the iothread aio_context to the main loop. If the -NBD connection is lost before this point, and the NBD client has entered -the reconnect procedure, QEMU crashes: - - #0 qemu_aio_coroutine_enter (ctx=0x5618056c7580, co=0x0) - at /build/qemu-6MF7tq/qemu-5.0.1/util/qemu-coroutine.c:109 - #1 0x00005618034b1b68 in nbd_client_attach_aio_context_bh ( - opaque=0x561805ed4c00) at /build/qemu-6MF7tq/qemu-5.0.1/block/nbd.c:164 - #2 0x000056180353116b in aio_wait_bh (opaque=0x7f60e1e63700) - at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-wait.c:55 - #3 0x0000561803530633 in aio_bh_call (bh=0x7f60d40a7e80) - at /build/qemu-6MF7tq/qemu-5.0.1/util/async.c:136 - #4 aio_bh_poll (ctx=ctx@entry=0x5618056c7580) - at /build/qemu-6MF7tq/qemu-5.0.1/util/async.c:164 - #5 0x0000561803533e5a in aio_poll (ctx=ctx@entry=0x5618056c7580, - blocking=blocking@entry=true) - at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-posix.c:650 - #6 0x000056180353128d in aio_wait_bh_oneshot (ctx=0x5618056c7580, - cb=, opaque=) - at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-wait.c:71 - #7 0x000056180345c50a in bdrv_attach_aio_context (new_context=0x5618056c7580, - bs=0x561805ed4c00) at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6172 - #8 bdrv_set_aio_context_ignore (bs=bs@entry=0x561805ed4c00, - new_context=new_context@entry=0x5618056c7580, - ignore=ignore@entry=0x7f60e1e63780) - at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6237 - #9 0x000056180345c969 in bdrv_child_try_set_aio_context ( - bs=bs@entry=0x561805ed4c00, ctx=0x5618056c7580, - ignore_child=, errp=) - at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6332 - #10 0x00005618034957db in blk_do_set_aio_context (blk=0x56180695b3f0, - new_context=0x5618056c7580, update_root_node=update_root_node@entry=true, - errp=errp@entry=0x0) - at /build/qemu-6MF7tq/qemu-5.0.1/block/block-backend.c:1989 - #11 0x00005618034980bd in blk_set_aio_context (blk=, - new_context=, errp=errp@entry=0x0) - at /build/qemu-6MF7tq/qemu-5.0.1/block/block-backend.c:2010 - #12 0x0000561803197953 in virtio_blk_data_plane_stop (vdev=) - at /build/qemu-6MF7tq/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292 - #13 0x00005618033d67bf in virtio_bus_stop_ioeventfd (bus=0x5618056d9f08) - at /build/qemu-6MF7tq/qemu-5.0.1/hw/virtio/virtio-bus.c:245 - #14 0x00005618031c9b2e in virtio_vmstate_change (opaque=0x5618056d9f90, - running=0, state=) - at /build/qemu-6MF7tq/qemu-5.0.1/hw/virtio/virtio.c:3220 - #15 0x0000561803208bfd in vm_state_notify (running=running@entry=0, - state=state@entry=RUN_STATE_FINISH_MIGRATE) - at /build/qemu-6MF7tq/qemu-5.0.1/softmmu/vl.c:1275 - #16 0x0000561803155c02 in do_vm_stop (state=RUN_STATE_FINISH_MIGRATE, - send_stop=) at /build/qemu-6MF7tq/qemu-5.0.1/cpus.c:1032 - #17 0x00005618033e3765 in migration_completion (s=0x5618056e6960) - at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:2914 - #18 migration_iteration_run (s=0x5618056e6960) - at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:3275 - #19 migration_thread (opaque=opaque@entry=0x5618056e6960) - at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:3439 - #20 0x0000561803536ad6 in qemu_thread_start (args=) - at /build/qemu-6MF7tq/qemu-5.0.1/util/qemu-thread-posix.c:519 - #21 0x00007f61085d06ba in start_thread () - from /lib/x86_64-linux-gnu/libpthread.so.0 - #22 0x00007f610830641d in sysctl () from /lib/x86_64-linux-gnu/libc.so.6 - #23 0x0000000000000000 in ?? () - -Fix it by checking that the iochannel is non-null before trying to -detach it from the aio_context. If it is null, no detaching is needed, -and it will get reattached in the proper aio_context once the connection -is reestablished. - -Signed-off-by: Roman Kagan -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20210129073859.683063-2-rvkagan@yandex-team.ru> -Signed-off-by: Eric Blake -(cherry picked from commit 3b5e4db6734d30e551101c0941b2a6140862ba40) -Signed-off-by: Eric Blake -Signed-off-by: Eduardo Lima (Etrunko) ---- - block/nbd.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/block/nbd.c b/block/nbd.c -index 42536702b6..ed7b6df10b 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -234,7 +234,14 @@ static void nbd_client_detach_aio_context(BlockDriverState *bs) - - /* Timer is deleted in nbd_client_co_drain_begin() */ - assert(!s->reconnect_delay_timer); -- qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); -+ /* -+ * If reconnect is in progress we may have no ->ioc. It will be -+ * re-instantiated in the proper aio context once the connection is -+ * reestablished. -+ */ -+ if (s->ioc) { -+ qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc)); -+ } - } - - static void nbd_client_attach_aio_context_bh(void *opaque) --- -2.27.0 - diff --git a/kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch b/kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch deleted file mode 100644 index 6ab629c..0000000 --- a/kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch +++ /dev/null @@ -1,124 +0,0 @@ -From ed5dbeb52152217fc7fe9023327dbacfac8b2322 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Mon, 8 Feb 2021 22:57:00 -0300 -Subject: [PATCH 02/54] block/nbd: only enter connection coroutine if it's - present - -RH-Author: Eric Blake -Message-id: <20210208225701.110110-3-eblake@redhat.com> -Patchwork-id: 101008 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v4 2/3] block/nbd: only enter connection coroutine if it's present -Bugzilla: 1887883 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz - -From: Roman Kagan - -When an NBD block driver state is moved from one aio_context to another -(e.g. when doing a drain in a migration thread), -nbd_client_attach_aio_context_bh is executed that enters the connection -coroutine. - -However, the assumption that ->connection_co is always present here -appears incorrect: the connection may have encountered an error other -than -EIO in the underlying transport, and thus may have decided to quit -rather than keep trying to reconnect, and therefore it may have -terminated the connection coroutine. As a result an attempt to reassign -the client in this state (NBD_CLIENT_QUIT) to a different aio_context -leads to a null pointer dereference: - - #0 qio_channel_detach_aio_context (ioc=0x0) - at /build/qemu-gYtjVn/qemu-5.0.1/io/channel.c:452 - #1 0x0000562a242824b3 in bdrv_detach_aio_context (bs=0x562a268d6a00) - at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6151 - #2 bdrv_set_aio_context_ignore (bs=bs@entry=0x562a268d6a00, - new_context=new_context@entry=0x562a260c9580, - ignore=ignore@entry=0x7feeadc9b780) - at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6230 - #3 0x0000562a24282969 in bdrv_child_try_set_aio_context - (bs=bs@entry=0x562a268d6a00, ctx=0x562a260c9580, - ignore_child=, errp=) - at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6332 - #4 0x0000562a242bb7db in blk_do_set_aio_context (blk=0x562a2735d0d0, - new_context=0x562a260c9580, - update_root_node=update_root_node@entry=true, errp=errp@entry=0x0) - at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:1989 - #5 0x0000562a242be0bd in blk_set_aio_context (blk=, - new_context=, errp=errp@entry=0x0) - at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:2010 - #6 0x0000562a23fbd953 in virtio_blk_data_plane_stop (vdev=) - at /build/qemu-gYtjVn/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292 - #7 0x0000562a241fc7bf in virtio_bus_stop_ioeventfd (bus=0x562a260dbf08) - at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio-bus.c:245 - #8 0x0000562a23fefb2e in virtio_vmstate_change (opaque=0x562a260dbf90, - running=0, state=) - at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio.c:3220 - #9 0x0000562a2402ebfd in vm_state_notify (running=running@entry=0, - state=state@entry=RUN_STATE_FINISH_MIGRATE) - at /build/qemu-gYtjVn/qemu-5.0.1/softmmu/vl.c:1275 - #10 0x0000562a23f7bc02 in do_vm_stop (state=RUN_STATE_FINISH_MIGRATE, - send_stop=) - at /build/qemu-gYtjVn/qemu-5.0.1/cpus.c:1032 - #11 0x0000562a24209765 in migration_completion (s=0x562a260e83a0) - at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:2914 - #12 migration_iteration_run (s=0x562a260e83a0) - at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3275 - #13 migration_thread (opaque=opaque@entry=0x562a260e83a0) - at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3439 - #14 0x0000562a2435ca96 in qemu_thread_start (args=) - at /build/qemu-gYtjVn/qemu-5.0.1/util/qemu-thread-posix.c:519 - #15 0x00007feed31466ba in start_thread (arg=0x7feeadc9c700) - at pthread_create.c:333 - #16 0x00007feed2e7c41d in __GI___sysctl (name=0x0, nlen=608471908, - oldval=0x562a2452b138, oldlenp=0x0, newval=0x562a2452c5e0 - <__func__.28102>, newlen=0) - at ../sysdeps/unix/sysv/linux/sysctl.c:30 - #17 0x0000000000000000 in ?? () - -Fix it by checking that the connection coroutine is non-null before -trying to enter it. If it is null, no entering is needed, as the -connection is probably going down anyway. - -Signed-off-by: Roman Kagan -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20210129073859.683063-3-rvkagan@yandex-team.ru> -Signed-off-by: Eric Blake -(cherry picked from commit ddde5ee769fcc84b96f879d7b94f35268f69ca3b) -Signed-off-by: Eric Blake -Signed-off-by: Eduardo Lima (Etrunko) ---- - block/nbd.c | 16 +++++++++------- - 1 file changed, 9 insertions(+), 7 deletions(-) - -diff --git a/block/nbd.c b/block/nbd.c -index ed7b6df10b..1bdba9fc49 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -249,13 +249,15 @@ static void nbd_client_attach_aio_context_bh(void *opaque) - BlockDriverState *bs = opaque; - BDRVNBDState *s = (BDRVNBDState *)bs->opaque; - -- /* -- * The node is still drained, so we know the coroutine has yielded in -- * nbd_read_eof(), the only place where bs->in_flight can reach 0, or it is -- * entered for the first time. Both places are safe for entering the -- * coroutine. -- */ -- qemu_aio_coroutine_enter(bs->aio_context, s->connection_co); -+ if (s->connection_co) { -+ /* -+ * The node is still drained, so we know the coroutine has yielded in -+ * nbd_read_eof(), the only place where bs->in_flight can reach 0, or -+ * it is entered for the first time. Both places are safe for entering -+ * the coroutine. -+ */ -+ qemu_aio_coroutine_enter(bs->aio_context, s->connection_co); -+ } - bdrv_dec_in_flight(bs); - } - --- -2.27.0 - diff --git a/kvm-config-enable-VFIO_CCW.patch b/kvm-config-enable-VFIO_CCW.patch deleted file mode 100644 index 50f6fc2..0000000 --- a/kvm-config-enable-VFIO_CCW.patch +++ /dev/null @@ -1,42 +0,0 @@ -From f6e6416e8267d302ba5ec40c2a26bc25cc0d1d55 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Fri, 29 Jan 2021 14:40:05 -0500 -Subject: [PATCH 5/5] config: enable VFIO_CCW - -RH-Author: Cornelia Huck -Message-id: <20210129144005.698097-1-cohuck@redhat.com> -Patchwork-id: 100941 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] config: enable VFIO_CCW -Bugzilla: 1922170 -RH-Acked-by: Alex Williamson -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1922170 -BRANCH: rhel-av-8.4.0 -UPSTREAM: n/a -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34609010 - -Enable vfio-ccw in RHEL AV builds. - -Signed-off-by: Cornelia Huck -Signed-off-by: Eduardo Lima (Etrunko) ---- - default-configs/devices/s390x-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/default-configs/devices/s390x-rh-devices.mak b/default-configs/devices/s390x-rh-devices.mak -index c3c73fe752..08a15f3e01 100644 ---- a/default-configs/devices/s390x-rh-devices.mak -+++ b/default-configs/devices/s390x-rh-devices.mak -@@ -9,6 +9,7 @@ CONFIG_SCSI=y - CONFIG_TERMINAL3270=y - CONFIG_VFIO=y - CONFIG_VFIO_AP=y -+CONFIG_VFIO_CCW=y - CONFIG_VFIO_PCI=y - CONFIG_VHOST_USER=y - CONFIG_VIRTIO_CCW=y --- -2.18.4 - diff --git a/kvm-default-configs-Enable-vhost-user-blk.patch b/kvm-default-configs-Enable-vhost-user-blk.patch deleted file mode 100644 index b56f834..0000000 --- a/kvm-default-configs-Enable-vhost-user-blk.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 5aadfd88a3438cee837d2e7e96fa0801d885d119 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 19 Feb 2021 16:14:09 -0500 -Subject: [PATCH 18/54] default-configs: Enable vhost-user-blk - -RH-Author: Kevin Wolf -Message-id: <20210219161409.53788-2-kwolf@redhat.com> -Patchwork-id: 101166 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] default-configs: Enable vhost-user-blk -Bugzilla: 1930033 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Max Reitz - -Now that we have qemu-storage-daemon with a vhost-user-blk export, -we want to be able to use that in guests. So enable vhost-user-blk in -our build configuration. - -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - default-configs/devices/x86_64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak -index e80877d4e2..993e2c1d2f 100644 ---- a/default-configs/devices/x86_64-rh-devices.mak -+++ b/default-configs/devices/x86_64-rh-devices.mak -@@ -87,6 +87,7 @@ CONFIG_VGA=y - CONFIG_VGA_CIRRUS=y - CONFIG_VGA_PCI=y - CONFIG_VHOST_USER=y -+CONFIG_VHOST_USER_BLK=y - CONFIG_VIRTIO_PCI=y - CONFIG_VIRTIO_VGA=y - CONFIG_VMMOUSE=y --- -2.27.0 - diff --git a/kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch b/kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch deleted file mode 100644 index b70409f..0000000 --- a/kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b3dbe8179b0f73d09bb90cbf92e991a187ef3534 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 16 Feb 2021 16:19:41 -0500 -Subject: [PATCH 15/54] docs: Add qemu-storage-daemon(1) manpage to meson.build -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -Message-id: <20210216161943.126728-4-kwolf@redhat.com> -Patchwork-id: 101104 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/5] docs: Add qemu-storage-daemon(1) manpage to meson.build -Bugzilla: 1901323 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -From: Peter Maydell - -In commit 1982e1602d15 we added a new qemu-storage-daemon(1) manpage. -At the moment new manpages have to be listed both in the conf.py for -Sphinx and also in docs/meson.build for Meson. We forgot the second -of those -- correct the omission. - -Signed-off-by: Peter Maydell -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Alex Bennée -Message-id: 20210108161416.21129-2-peter.maydell@linaro.org -(cherry picked from commit fa56cf7e86f99d5557a4fb730e375777b89d8b50) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - docs/meson.build | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/docs/meson.build b/docs/meson.build -index 71641b4fe0..fae9849b79 100644 ---- a/docs/meson.build -+++ b/docs/meson.build -@@ -62,6 +62,7 @@ if build_docs - 'qemu-img.1': (have_tools ? 'man1' : ''), - 'qemu-nbd.8': (have_tools ? 'man8' : ''), - 'qemu-pr-helper.8': (have_tools ? 'man8' : ''), -+ 'qemu-storage-daemon.1': (have_tools ? 'man1' : ''), - 'qemu-trace-stap.1': (config_host.has_key('CONFIG_TRACE_SYSTEMTAP') ? 'man1' : ''), - 'virtfs-proxy-helper.1': (have_virtfs_proxy_helper ? 'man1' : ''), - 'virtiofsd.1': (have_virtiofsd ? 'man1' : ''), --- -2.27.0 - diff --git a/kvm-docs-add-qemu-storage-daemon-1-man-page.patch b/kvm-docs-add-qemu-storage-daemon-1-man-page.patch deleted file mode 100644 index f0cbc3f..0000000 --- a/kvm-docs-add-qemu-storage-daemon-1-man-page.patch +++ /dev/null @@ -1,218 +0,0 @@ -From f3831252e618e420ea24e53dbdee8eb51e8cad3e Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 16 Feb 2021 16:19:40 -0500 -Subject: [PATCH 14/54] docs: add qemu-storage-daemon(1) man page - -RH-Author: Kevin Wolf -Message-id: <20210216161943.126728-3-kwolf@redhat.com> -Patchwork-id: 101102 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/5] docs: add qemu-storage-daemon(1) man page -Bugzilla: 1901323 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -From: Stefan Hajnoczi - -Document the qemu-storage-daemon tool. Most of the command-line options -are identical to their QEMU counterparts. Perhaps Sphinx hxtool -integration could be extended to extract documentation for individual -command-line options so they can be shared. For now the -qemu-storage-daemon simply refers to the qemu(1) man page where the -command-line options are identical. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20201209103802.350848-3-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 1982e1602d15313cd82f225e821c37733ece3404) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - docs/tools/conf.py | 2 + - docs/tools/index.rst | 1 + - docs/tools/qemu-storage-daemon.rst | 148 +++++++++++++++++++++++++++++ - 3 files changed, 151 insertions(+) - create mode 100644 docs/tools/qemu-storage-daemon.rst - -diff --git a/docs/tools/conf.py b/docs/tools/conf.py -index 4760d36ff2..7072d99324 100644 ---- a/docs/tools/conf.py -+++ b/docs/tools/conf.py -@@ -20,6 +20,8 @@ html_theme_options['description'] = \ - man_pages = [ - ('qemu-img', 'qemu-img', u'QEMU disk image utility', - ['Fabrice Bellard'], 1), -+ ('qemu-storage-daemon', 'qemu-storage-daemon', u'QEMU storage daemon', -+ [], 1), - ('qemu-nbd', 'qemu-nbd', u'QEMU Disk Network Block Device Server', - ['Anthony Liguori '], 8), - ('qemu-pr-helper', 'qemu-pr-helper', 'QEMU persistent reservation helper', -diff --git a/docs/tools/index.rst b/docs/tools/index.rst -index b99f86c7c6..3a5829c17a 100644 ---- a/docs/tools/index.rst -+++ b/docs/tools/index.rst -@@ -11,6 +11,7 @@ Contents: - :maxdepth: 2 - - qemu-img -+ qemu-storage-daemon - qemu-nbd - qemu-pr-helper - qemu-trace-stap -diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst -new file mode 100644 -index 0000000000..f63627eaf6 ---- /dev/null -+++ b/docs/tools/qemu-storage-daemon.rst -@@ -0,0 +1,148 @@ -+QEMU Storage Daemon -+=================== -+ -+Synopsis -+-------- -+ -+**qemu-storage-daemon** [options] -+ -+Description -+----------- -+ -+qemu-storage-daemon provides disk image functionality from QEMU, qemu-img, and -+qemu-nbd in a long-running process controlled via QMP commands without running -+a virtual machine. It can export disk images, run block job operations, and -+perform other disk-related operations. The daemon is controlled via a QMP -+monitor and initial configuration from the command-line. -+ -+The daemon offers the following subset of QEMU features: -+ -+* Block nodes -+* Block jobs -+* Block exports -+* Throttle groups -+* Character devices -+* Crypto and secrets -+* QMP -+* IOThreads -+ -+Commands can be sent over a QEMU Monitor Protocol (QMP) connection. See the -+:manpage:`qemu-storage-daemon-qmp-ref(7)` manual page for a description of the -+commands. -+ -+The daemon runs until it is stopped using the ``quit`` QMP command or -+SIGINT/SIGHUP/SIGTERM. -+ -+**Warning:** Never modify images in use by a running virtual machine or any -+other process; this may destroy the image. Also, be aware that querying an -+image that is being modified by another process may encounter inconsistent -+state. -+ -+Options -+------- -+ -+.. program:: qemu-storage-daemon -+ -+Standard options: -+ -+.. option:: -h, --help -+ -+ Display help and exit -+ -+.. option:: -V, --version -+ -+ Display version information and exit -+ -+.. option:: -T, --trace [[enable=]PATTERN][,events=FILE][,file=FILE] -+ -+ .. include:: ../qemu-option-trace.rst.inc -+ -+.. option:: --blockdev BLOCKDEVDEF -+ -+ is a block node definition. See the :manpage:`qemu(1)` manual page for a -+ description of block node properties and the :manpage:`qemu-block-drivers(7)` -+ manual page for a description of driver-specific parameters. -+ -+.. option:: --chardev CHARDEVDEF -+ -+ is a character device definition. See the :manpage:`qemu(1)` manual page for -+ a description of character device properties. A common character device -+ definition configures a UNIX domain socket:: -+ -+ --chardev socket,id=char1,path=/tmp/qmp.sock,server,nowait -+ -+.. option:: --export [type=]nbd,id=,node-name=[,name=][,writable=on|off][,bitmap=] -+ --export [type=]vhost-user-blk,id=,node-name=,addr.type=unix,addr.path=[,writable=on|off][,logical-block-size=][,num-queues=] -+ --export [type=]vhost-user-blk,id=,node-name=,addr.type=fd,addr.str=[,writable=on|off][,logical-block-size=][,num-queues=] -+ -+ is a block export definition. ``node-name`` is the block node that should be -+ exported. ``writable`` determines whether or not the export allows write -+ requests for modifying data (the default is off). -+ -+ The ``nbd`` export type requires ``--nbd-server`` (see below). ``name`` is -+ the NBD export name. ``bitmap`` is the name of a dirty bitmap reachable from -+ the block node, so the NBD client can use NBD_OPT_SET_META_CONTEXT with the -+ metadata context name "qemu:dirty-bitmap:BITMAP" to inspect the bitmap. -+ -+ The ``vhost-user-blk`` export type takes a vhost-user socket address on which -+ it accept incoming connections. Both -+ ``addr.type=unix,addr.path=`` for UNIX domain sockets and -+ ``addr.type=fd,addr.str=`` for file descriptor passing are supported. -+ ``logical-block-size`` sets the logical block size in bytes (the default is -+ 512). ``num-queues`` sets the number of virtqueues (the default is 1). -+ -+.. option:: --monitor MONITORDEF -+ -+ is a QMP monitor definition. See the :manpage:`qemu(1)` manual page for -+ a description of QMP monitor properties. A common QMP monitor definition -+ configures a monitor on character device ``char1``:: -+ -+ --monitor chardev=char1 -+ -+.. option:: --nbd-server addr.type=inet,addr.host=,addr.port=[,tls-creds=][,tls-authz=][,max-connections=] -+ --nbd-server addr.type=unix,addr.path=[,tls-creds=][,tls-authz=][,max-connections=] -+ -+ is a server for NBD exports. Both TCP and UNIX domain sockets are supported. -+ TLS encryption can be configured using ``--object`` tls-creds-* and authz-* -+ secrets (see below). -+ -+ To configure an NBD server on UNIX domain socket path ``/tmp/nbd.sock``:: -+ -+ --nbd-server addr.type=unix,addr.path=/tmp/nbd.sock -+ -+.. option:: --object help -+ --object ,help -+ --object [,=...] -+ -+ is a QEMU user creatable object definition. List object types with ``help``. -+ List object properties with ``,help``. See the :manpage:`qemu(1)` -+ manual page for a description of the object properties. -+ -+Examples -+-------- -+Launch the daemon with QMP monitor socket ``qmp.sock`` so clients can execute -+QMP commands:: -+ -+ $ qemu-storage-daemon \ -+ --chardev socket,path=qmp.sock,server,nowait,id=char1 \ -+ --monitor chardev=char1 -+ -+Export raw image file ``disk.img`` over NBD UNIX domain socket ``nbd.sock``:: -+ -+ $ qemu-storage-daemon \ -+ --blockdev driver=file,node-name=disk,filename=disk.img \ -+ --nbd-server addr.type=unix,addr.path=nbd.sock \ -+ --export type=nbd,id=export,node-name=disk,writable=on -+ -+Export a qcow2 image file ``disk.qcow2`` as a vhosts-user-blk device over UNIX -+domain socket ``vhost-user-blk.sock``:: -+ -+ $ qemu-storage-daemon \ -+ --blockdev driver=file,node-name=file,filename=disk.qcow2 \ -+ --blockdev driver=qcow2,node-name=qcow2,file=file \ -+ --export type=vhost-user-blk,id=export,addr.type=unix,addr.path=vhost-user-blk.sock,node-name=qcow2 -+ -+See also -+-------- -+ -+:manpage:`qemu(1)`, :manpage:`qemu-block-drivers(7)`, :manpage:`qemu-storage-daemon-qmp-ref(7)` --- -2.27.0 - diff --git a/kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch b/kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch deleted file mode 100644 index fbc0235..0000000 --- a/kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 5425716387734e0a782ac633021cd85eb4d4b914 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 16 Feb 2021 16:19:39 -0500 -Subject: [PATCH 13/54] docs: generate qemu-storage-daemon-qmp-ref(7) man page - -RH-Author: Kevin Wolf -Message-id: <20210216161943.126728-2-kwolf@redhat.com> -Patchwork-id: 101101 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/5] docs: generate qemu-storage-daemon-qmp-ref(7) man page -Bugzilla: 1901323 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -From: Stefan Hajnoczi - -Although individual qemu-storage-daemon QMP commands are identical to -QEMU QMP commands, qemu-storage-daemon only supports a subset of QEMU's -QMP commands. Generate a manual page of just the commands supported by -qemu-storage-daemon so that users know exactly what is available in -qemu-storage-daemon. - -Add an h1 heading in storage-daemon/qapi/qapi-schema.json so that -block-core.json is at the h2 heading level. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20201209103802.350848-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 23c02ace3508dba5f781ed9ecfde400e462f3a37) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - docs/interop/conf.py | 2 ++ - docs/interop/index.rst | 1 + - docs/interop/qemu-storage-daemon-qmp-ref.rst | 13 +++++++++++++ - docs/meson.build | 1 + - storage-daemon/qapi/qapi-schema.json | 3 +++ - 5 files changed, 20 insertions(+) - create mode 100644 docs/interop/qemu-storage-daemon-qmp-ref.rst - -diff --git a/docs/interop/conf.py b/docs/interop/conf.py -index 2634ca3410..f4370aaa13 100644 ---- a/docs/interop/conf.py -+++ b/docs/interop/conf.py -@@ -23,4 +23,6 @@ man_pages = [ - [], 7), - ('qemu-qmp-ref', 'qemu-qmp-ref', 'QEMU QMP Reference Manual', - [], 7), -+ ('qemu-storage-daemon-qmp-ref', 'qemu-storage-daemon-qmp-ref', -+ 'QEMU Storage Daemon QMP Reference Manual', [], 7), - ] -diff --git a/docs/interop/index.rst b/docs/interop/index.rst -index cd78d679d8..95d56495f6 100644 ---- a/docs/interop/index.rst -+++ b/docs/interop/index.rst -@@ -20,6 +20,7 @@ Contents: - qemu-ga - qemu-ga-ref - qemu-qmp-ref -+ qemu-storage-daemon-qmp-ref - vhost-user - vhost-user-gpu - vhost-vdpa -diff --git a/docs/interop/qemu-storage-daemon-qmp-ref.rst b/docs/interop/qemu-storage-daemon-qmp-ref.rst -new file mode 100644 -index 0000000000..caf9dad23a ---- /dev/null -+++ b/docs/interop/qemu-storage-daemon-qmp-ref.rst -@@ -0,0 +1,13 @@ -+QEMU Storage Daemon QMP Reference Manual -+======================================== -+ -+.. -+ TODO: the old Texinfo manual used to note that this manual -+ is GPL-v2-or-later. We should make that reader-visible -+ both here and in our Sphinx manuals more generally. -+ -+.. -+ TODO: display the QEMU version, both here and in our Sphinx manuals -+ more generally. -+ -+.. qapi-doc:: storage-daemon/qapi/qapi-schema.json -diff --git a/docs/meson.build b/docs/meson.build -index bb8fe4c9e4..71641b4fe0 100644 ---- a/docs/meson.build -+++ b/docs/meson.build -@@ -56,6 +56,7 @@ if build_docs - 'qemu-ga.8': (have_tools ? 'man8' : ''), - 'qemu-ga-ref.7': 'man7', - 'qemu-qmp-ref.7': 'man7', -+ 'qemu-storage-daemon-qmp-ref.7': (have_tools ? 'man7' : ''), - }, - 'tools': { - 'qemu-img.1': (have_tools ? 'man1' : ''), -diff --git a/storage-daemon/qapi/qapi-schema.json b/storage-daemon/qapi/qapi-schema.json -index c6ad5ae1e3..28117c3aac 100644 ---- a/storage-daemon/qapi/qapi-schema.json -+++ b/storage-daemon/qapi/qapi-schema.json -@@ -15,6 +15,9 @@ - - { 'include': '../../qapi/pragma.json' } - -+## -+# = Block devices -+## - { 'include': '../../qapi/block-core.json' } - { 'include': '../../qapi/block-export.json' } - { 'include': '../../qapi/char.json' } --- -2.27.0 - diff --git a/kvm-docs-set-CONFDIR-when-running-sphinx.patch b/kvm-docs-set-CONFDIR-when-running-sphinx.patch deleted file mode 100644 index db8c0c9..0000000 --- a/kvm-docs-set-CONFDIR-when-running-sphinx.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 16130479cc03434a85111608d9d2b0e179dc8b98 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 8 Feb 2021 09:37:30 -0500 -Subject: [PATCH 7/7] docs: set CONFDIR when running sphinx -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210208093730.1166952-1-marcandre.lureau@redhat.com> -Patchwork-id: 101004 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] docs: set CONFDIR when running sphinx -Bugzilla: 1902537 -RH-Acked-by: Eduardo Lima (Etrunko) -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Philippe Mathieu-Daudé - -From: Marc-André Lureau - -The default configuration path /etc/qemu can be overriden with configure -options, and the generated documentation used to reflect it. - -Fixes regression introduced in commit -f8aa24ea9a82da38370470c6bc0eaa393999edfe ("meson: sphinx-build"). - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1902537 -Signed-off-by: Marc-André Lureau -Message-Id: <20201201183704.299697-1-marcandre.lureau@redhat.com> -Signed-off-by: Paolo Bonzini - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1902537 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34816282 - -(cherry picked from commit daf07a6714b111340fe2d0234d1a5287d6ebe0ec) -Signed-off-by: Marc-André Lureau -Signed-off-by: Eduardo Lima (Etrunko) ---- - docs/meson.build | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/docs/meson.build b/docs/meson.build -index ebd85d59f9..bb8fe4c9e4 100644 ---- a/docs/meson.build -+++ b/docs/meson.build -@@ -9,7 +9,7 @@ endif - # Check if tools are available to build documentation. - build_docs = false - if sphinx_build.found() -- SPHINX_ARGS = [sphinx_build] -+ SPHINX_ARGS = ['env', 'CONFDIR=' + qemu_confdir, sphinx_build] - # If we're making warnings fatal, apply this to Sphinx runs as well - if get_option('werror') - SPHINX_ARGS += [ '-W' ] --- -2.18.4 - diff --git a/kvm-failover-Caller-of-this-two-functions-already-have-p.patch b/kvm-failover-Caller-of-this-two-functions-already-have-p.patch deleted file mode 100644 index de38731..0000000 --- a/kvm-failover-Caller-of-this-two-functions-already-have-p.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 4f94bc7cc479dba60fba841608b3da74b940a26d Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:44 -0500 -Subject: [PATCH 47/54] failover: Caller of this two functions already have - primary_dev - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-25-lvivier@redhat.com> -Patchwork-id: 101246 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 24/27] failover: Caller of this two functions already have primary_dev -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Pass it as an argument. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-26-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0e9a65c5b168b993b845ec2acb2568328c2353da) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 27 ++++++++++++++------------- - 1 file changed, 14 insertions(+), 13 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 746ed3fb71..b37e9cd1d9 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3093,17 +3093,17 @@ void virtio_net_set_netclient_name(VirtIONet *n, const char *name, - n->netclient_type = g_strdup(type); - } - --static bool failover_unplug_primary(VirtIONet *n) -+static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev) - { - HotplugHandler *hotplug_ctrl; - PCIDevice *pci_dev; - Error *err = NULL; - -- hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev); -+ hotplug_ctrl = qdev_get_hotplug_handler(dev); - if (hotplug_ctrl) { -- pci_dev = PCI_DEVICE(n->primary_dev); -+ pci_dev = PCI_DEVICE(dev); - pci_dev->partially_hotplugged = true; -- hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err); -+ hotplug_handler_unplug_request(hotplug_ctrl, dev, &err); - if (err) { - error_report_err(err); - return false; -@@ -3114,30 +3114,31 @@ static bool failover_unplug_primary(VirtIONet *n) - return true; - } - --static bool failover_replug_primary(VirtIONet *n, Error **errp) -+static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, -+ Error **errp) - { - Error *err = NULL; - HotplugHandler *hotplug_ctrl; -- PCIDevice *pdev = PCI_DEVICE(n->primary_dev); -+ PCIDevice *pdev = PCI_DEVICE(dev); - BusState *primary_bus; - - if (!pdev->partially_hotplugged) { - return true; - } -- primary_bus = n->primary_dev->parent_bus; -+ primary_bus = dev->parent_bus; - if (!primary_bus) { - error_setg(errp, "virtio_net: couldn't find primary bus"); - return false; - } -- qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort); -+ qdev_set_parent_bus(dev, primary_bus, &error_abort); - qatomic_set(&n->failover_primary_hidden, false); -- hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev); -+ hotplug_ctrl = qdev_get_hotplug_handler(dev); - if (hotplug_ctrl) { -- hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err); -+ hotplug_handler_pre_plug(hotplug_ctrl, dev, &err); - if (err) { - goto out; - } -- hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err); -+ hotplug_handler_plug(hotplug_ctrl, dev, &err); - } - - out: -@@ -3161,7 +3162,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - } - - if (migration_in_setup(s) && !should_be_hidden) { -- if (failover_unplug_primary(n)) { -+ if (failover_unplug_primary(n, n->primary_dev)) { - vmstate_unregister(VMSTATE_IF(n->primary_dev), - qdev_get_vmsd(n->primary_dev), - n->primary_dev); -@@ -3172,7 +3173,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - } - } else if (migration_has_failed(s)) { - /* We already unplugged the device let's plug it back */ -- if (!failover_replug_primary(n, &err)) { -+ if (!failover_replug_primary(n, n->primary_dev, &err)) { - if (err) { - error_report_err(err); - } --- -2.27.0 - diff --git a/kvm-failover-Remove-external-partially_hotplugged-proper.patch b/kvm-failover-Remove-external-partially_hotplugged-proper.patch deleted file mode 100644 index 833b268..0000000 --- a/kvm-failover-Remove-external-partially_hotplugged-proper.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 03ad2d1426775c5c993f59512932c4bbf62206c1 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:25 -0500 -Subject: [PATCH 28/54] failover: Remove external partially_hotplugged property - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-6-lvivier@redhat.com> -Patchwork-id: 101251 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 05/27] failover: Remove external partially_hotplugged property -Bugzilla: 1819991 -RH-Acked-by: Jens Freimann -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -It was only set "once", and with the wrong value. As far as I can see, -libvirt still don't use it. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-7-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 594d308b9314b446ed2ccc42de7b4d57ba1b7118) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 3f658d6246..6ca85627d8 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3135,10 +3135,6 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) - } - qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort); - qatomic_set(&n->primary_should_be_hidden, false); -- if (!qemu_opt_set_bool(n->primary_device_opts, -- "partially_hotplugged", true, errp)) { -- return false; -- } - hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev); - if (hotplug_ctrl) { - hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err); --- -2.27.0 - diff --git a/kvm-failover-Remove-memory-leak.patch b/kvm-failover-Remove-memory-leak.patch deleted file mode 100644 index c6d6701..0000000 --- a/kvm-failover-Remove-memory-leak.patch +++ /dev/null @@ -1,60 +0,0 @@ -From e9380df03375e871de088ad5aee8fd19d6ad3794 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:32 -0500 -Subject: [PATCH 35/54] failover: Remove memory leak - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-13-lvivier@redhat.com> -Patchwork-id: 101261 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 12/27] failover: Remove memory leak -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Two things, at this point: - -* n->primary_device_id has to be set, otherwise - virtio_net_find_primary don't work. So we have a leak here. - -* it has to be exactly the same that prim_dev->id because what - qdev_find_recursive() does is just compare this two values. - -So remove the unneeded assignment and leaky bits. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-14-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 7b3dc2f8c0b817bbe78ba347130b3c99fe2c4470) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 6e5a56a230..70fa372c08 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -854,9 +854,7 @@ static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) - Error *err = NULL; - - prim_dev = virtio_net_find_primary(n, &err); -- if (prim_dev) { -- n->primary_device_id = g_strdup(prim_dev->id); -- } else { -+ if (!prim_dev) { - error_propagate(errp, err); - } - --- -2.27.0 - diff --git a/kvm-failover-Remove-primary_dev-member.patch b/kvm-failover-Remove-primary_dev-member.patch deleted file mode 100644 index aff570e..0000000 --- a/kvm-failover-Remove-primary_dev-member.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 52dce3568320900c79e34eb2093058e5c3f60aa9 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:46 -0500 -Subject: [PATCH 49/54] failover: Remove primary_dev member - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-27-lvivier@redhat.com> -Patchwork-id: 101250 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 26/27] failover: Remove primary_dev member -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Only three uses remained, and we can remove them on that case. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-28-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 21e8709b29cd981c74565e75276ed476c954cbbf) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 55 +++++++++++++++------------------- - include/hw/virtio/virtio-net.h | 1 - - 2 files changed, 24 insertions(+), 32 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 9203d81780..044ac95f6f 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -832,13 +832,31 @@ static char *failover_find_primary_device_id(VirtIONet *n) - return fid.id; - } - -+/** -+ * Find the primary device for this failover virtio-net -+ * -+ * @n: VirtIONet device -+ * @errp: returns an error if this function fails -+ */ -+static DeviceState *failover_find_primary_device(VirtIONet *n) -+{ -+ char *id = failover_find_primary_device_id(n); -+ -+ if (!id) { -+ return NULL; -+ } -+ -+ return qdev_find_recursive(sysbus_get_default(), id); -+} -+ - static void failover_add_primary(VirtIONet *n, Error **errp) - { - Error *err = NULL; - QemuOpts *opts; - char *id; -+ DeviceState *dev = failover_find_primary_device(n); - -- if (n->primary_dev) { -+ if (dev) { - return; - } - -@@ -848,7 +866,7 @@ static void failover_add_primary(VirtIONet *n, Error **errp) - } - opts = qemu_opts_find(qemu_find_opts("device"), id); - if (opts) { -- n->primary_dev = qdev_device_add(opts, &err); -+ dev = qdev_device_add(opts, &err); - if (err) { - qemu_opts_del(opts); - } -@@ -861,23 +879,6 @@ static void failover_add_primary(VirtIONet *n, Error **errp) - error_propagate(errp, err); - } - --/** -- * Find the primary device for this failover virtio-net -- * -- * @n: VirtIONet device -- * @errp: returns an error if this function fails -- */ --static DeviceState *failover_find_primary_device(VirtIONet *n) --{ -- char *id = failover_find_primary_device_id(n); -- -- if (!id) { -- return NULL; -- } -- -- return qdev_find_recursive(sysbus_get_default(), id); --} -- - static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) - { - VirtIONet *n = VIRTIO_NET(vdev); -@@ -933,19 +934,9 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) - qatomic_set(&n->failover_primary_hidden, false); - failover_add_primary(n, &err); - if (err) { -- n->primary_dev = failover_find_primary_device(n); -- failover_add_primary(n, &err); -- if (err) { -- goto out_err; -- } -+ warn_report_err(err); - } - } -- return; -- --out_err: -- if (err) { -- warn_report_err(err); -- } - } - - static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd, -@@ -3420,13 +3411,15 @@ static int virtio_net_pre_save(void *opaque) - static bool primary_unplug_pending(void *opaque) - { - DeviceState *dev = opaque; -+ DeviceState *primary; - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VirtIONet *n = VIRTIO_NET(vdev); - - if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { - return false; - } -- return n->primary_dev ? n->primary_dev->pending_deleted_event : false; -+ primary = failover_find_primary_device(n); -+ return primary ? primary->pending_deleted_event : false; - } - - static bool dev_unplug_pending(void *opaque) -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index efef64e02f..7e96d193aa 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -202,7 +202,6 @@ struct VirtIONet { - AnnounceTimer announce_timer; - bool needs_vnet_hdr_swap; - bool mtu_bypass_backend; -- DeviceState *primary_dev; - /* primary failover device is hidden*/ - bool failover_primary_hidden; - bool failover; --- -2.27.0 - diff --git a/kvm-failover-Remove-primary_device_dict.patch b/kvm-failover-Remove-primary_device_dict.patch deleted file mode 100644 index 74c9aa6..0000000 --- a/kvm-failover-Remove-primary_device_dict.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 277c3c13377cc7f41d4121fdce918df3005fc063 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:31 -0500 -Subject: [PATCH 34/54] failover: Remove primary_device_dict - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-12-lvivier@redhat.com> -Patchwork-id: 101262 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 11/27] failover: Remove primary_device_dict -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -It was only used once. And we have there opts->id, so no need for it. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-13-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 9673a88e97d1eb428872bd261dbf56a0f3c2fd71) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 17 ++++------------- - include/hw/virtio/virtio-net.h | 1 - - 2 files changed, 4 insertions(+), 14 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 953d5c2bc8..6e5a56a230 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3186,28 +3186,21 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, - if (!device_opts) { - return -1; - } -- n->primary_device_dict = qemu_opts_to_qdict(device_opts, -- n->primary_device_dict); - standby_id = qemu_opt_get(device_opts, "failover_pair_id"); - if (g_strcmp0(standby_id, n->netclient_name) == 0) { - match_found = true; - } else { - match_found = false; - hide = false; -- n->primary_device_dict = NULL; - goto out; - } - - /* failover_primary_hidden is set during feature negotiation */ - hide = qatomic_read(&n->failover_primary_hidden); -- -- if (n->primary_device_dict) { -- g_free(n->primary_device_id); -- n->primary_device_id = g_strdup(qdict_get_try_str( -- n->primary_device_dict, "id")); -- if (!n->primary_device_id) { -- warn_report("primary_device_id not set"); -- } -+ g_free(n->primary_device_id); -+ n->primary_device_id = g_strdup(device_opts->id); -+ if (!n->primary_device_id) { -+ warn_report("primary_device_id not set"); - } - - out: -@@ -3396,8 +3389,6 @@ static void virtio_net_device_unrealize(DeviceState *dev) - if (n->failover) { - device_listener_unregister(&n->primary_listener); - g_free(n->primary_device_id); -- qobject_unref(n->primary_device_dict); -- n->primary_device_dict = NULL; - } - - max_queues = n->multiqueue ? n->max_queues : 1; -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index a055f39dd6..fe353d8299 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -202,7 +202,6 @@ struct VirtIONet { - AnnounceTimer announce_timer; - bool needs_vnet_hdr_swap; - bool mtu_bypass_backend; -- QDict *primary_device_dict; - DeviceState *primary_dev; - char *primary_device_id; - /* primary failover device is hidden*/ --- -2.27.0 - diff --git a/kvm-failover-Remove-primary_device_opts.patch b/kvm-failover-Remove-primary_device_opts.patch deleted file mode 100644 index e57fb51..0000000 --- a/kvm-failover-Remove-primary_device_opts.patch +++ /dev/null @@ -1,110 +0,0 @@ -From ec36f213983c0ea89fe8db8b44d1105df0bd3dc2 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:29 -0500 -Subject: [PATCH 32/54] failover: Remove primary_device_opts - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-10-lvivier@redhat.com> -Patchwork-id: 101259 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 09/27] failover: Remove primary_device_opts -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -It was really only used once, in failover_add_primary(). Just search -for it on global opts when it is needed. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-11-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 19e49bc2e984bd065719fc3595f35368b3ae87cd) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 21 +++++---------------- - include/hw/virtio/virtio-net.h | 1 - - 2 files changed, 5 insertions(+), 17 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index e334f05352..2a99b0e0f6 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -791,17 +791,17 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) - static void failover_add_primary(VirtIONet *n, Error **errp) - { - Error *err = NULL; -+ QemuOpts *opts; - - if (n->primary_dev) { - return; - } - -- n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"), -- n->primary_device_id); -- if (n->primary_device_opts) { -- n->primary_dev = qdev_device_add(n->primary_device_opts, &err); -+ opts = qemu_opts_find(qemu_find_opts("device"), n->primary_device_id); -+ if (opts) { -+ n->primary_dev = qdev_device_add(opts, &err); - if (err) { -- qemu_opts_del(n->primary_device_opts); -+ qemu_opts_del(opts); - } - } else { - error_setg(errp, "Primary device not found"); -@@ -856,7 +856,6 @@ static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) - prim_dev = virtio_net_find_primary(n, &err); - if (prim_dev) { - n->primary_device_id = g_strdup(prim_dev->id); -- n->primary_device_opts = prim_dev->opts; - } else { - error_propagate(errp, err); - } -@@ -3113,14 +3112,6 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) - if (!pdev->partially_hotplugged) { - return true; - } -- if (!n->primary_device_opts) { -- n->primary_device_opts = qemu_opts_from_qdict(qemu_find_opts("device"), -- n->primary_device_dict, -- errp); -- if (!n->primary_device_opts) { -- return false; -- } -- } - primary_bus = n->primary_dev->parent_bus; - if (!primary_bus) { - error_setg(errp, "virtio_net: couldn't find primary bus"); -@@ -3211,8 +3202,6 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, - goto out; - } - -- n->primary_device_opts = device_opts; -- - /* failover_primary_hidden is set during feature negotiation */ - hide = qatomic_read(&n->failover_primary_hidden); - -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index ca68be759f..7159e6c0a0 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -202,7 +202,6 @@ struct VirtIONet { - AnnounceTimer announce_timer; - bool needs_vnet_hdr_swap; - bool mtu_bypass_backend; -- QemuOpts *primary_device_opts; - QDict *primary_device_dict; - DeviceState *primary_dev; - char *primary_device_id; --- -2.27.0 - diff --git a/kvm-failover-Remove-unused-parameter.patch b/kvm-failover-Remove-unused-parameter.patch deleted file mode 100644 index b95c033..0000000 --- a/kvm-failover-Remove-unused-parameter.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 69ba4fc743b29e9e3f595c1e96596204abc1aa0e Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:24 -0500 -Subject: [PATCH 27/54] failover: Remove unused parameter - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-5-lvivier@redhat.com> -Patchwork-id: 101243 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 04/27] failover: Remove unused parameter -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Jens Freimann -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-6-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 82ceb65799855efb0db965a6ef86d81ae1c8bcd7) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 8 +++----- - 1 file changed, 3 insertions(+), 5 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 786d313330..3f658d6246 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -855,9 +855,7 @@ static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) - return dev; - } - --static DeviceState *virtio_connect_failover_devices(VirtIONet *n, -- DeviceState *dev, -- Error **errp) -+static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) - { - DeviceState *prim_dev = NULL; - Error *err = NULL; -@@ -928,7 +926,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) - qatomic_set(&n->primary_should_be_hidden, false); - failover_add_primary(n, &err); - if (err) { -- n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err); -+ n->primary_dev = virtio_connect_failover_devices(n, &err); - if (err) { - goto out_err; - } -@@ -3164,7 +3162,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - should_be_hidden = qatomic_read(&n->primary_should_be_hidden); - - if (!n->primary_dev) { -- n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err); -+ n->primary_dev = virtio_connect_failover_devices(n, &err); - if (!n->primary_dev) { - return; - } --- -2.27.0 - diff --git a/kvm-failover-Rename-bool-to-failover_primary_hidden.patch b/kvm-failover-Rename-bool-to-failover_primary_hidden.patch deleted file mode 100644 index f2c3ff1..0000000 --- a/kvm-failover-Rename-bool-to-failover_primary_hidden.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 6d228bc32fa1e6c9619dc99dc10bfa3a9116bbf0 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:27 -0500 -Subject: [PATCH 30/54] failover: Rename bool to failover_primary_hidden - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-8-lvivier@redhat.com> -Patchwork-id: 101260 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 07/27] failover: Rename bool to failover_primary_hidden -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Jens Freimann -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -You should not use passive naming variables. -And once there, be able to search for them. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-9-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit e2bde83e23d3cfc1d90911c74500fd2e3b0b04fa) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 14 +++++++------- - include/hw/virtio/virtio-net.h | 3 ++- - 2 files changed, 9 insertions(+), 8 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 3e82108d42..c221671852 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -916,7 +916,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) - - if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { - qapi_event_send_failover_negotiated(n->netclient_name); -- qatomic_set(&n->primary_should_be_hidden, false); -+ qatomic_set(&n->failover_primary_hidden, false); - failover_add_primary(n, &err); - if (err) { - n->primary_dev = virtio_connect_failover_devices(n, &err); -@@ -3127,7 +3127,7 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) - return false; - } - qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort); -- qatomic_set(&n->primary_should_be_hidden, false); -+ qatomic_set(&n->failover_primary_hidden, false); - hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev); - if (hotplug_ctrl) { - hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err); -@@ -3148,7 +3148,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - bool should_be_hidden; - Error *err = NULL; - -- should_be_hidden = qatomic_read(&n->primary_should_be_hidden); -+ should_be_hidden = qatomic_read(&n->failover_primary_hidden); - - if (!n->primary_dev) { - n->primary_dev = virtio_connect_failover_devices(n, &err); -@@ -3163,7 +3163,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - qdev_get_vmsd(n->primary_dev), - n->primary_dev); - qapi_event_send_unplug_primary(n->primary_device_id); -- qatomic_set(&n->primary_should_be_hidden, true); -+ qatomic_set(&n->failover_primary_hidden, true); - } else { - warn_report("couldn't unplug primary device"); - } -@@ -3213,8 +3213,8 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, - - n->primary_device_opts = device_opts; - -- /* primary_should_be_hidden is set during feature negotiation */ -- hide = qatomic_read(&n->primary_should_be_hidden); -+ /* failover_primary_hidden is set during feature negotiation */ -+ hide = qatomic_read(&n->failover_primary_hidden); - - if (n->primary_device_dict) { - g_free(n->primary_device_id); -@@ -3271,7 +3271,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) - if (n->failover) { - n->primary_listener.should_be_hidden = - virtio_net_primary_should_be_hidden; -- qatomic_set(&n->primary_should_be_hidden, true); -+ qatomic_set(&n->failover_primary_hidden, true); - device_listener_register(&n->primary_listener); - n->migration_state.notify = virtio_net_migration_state_notifier; - add_migration_state_change_notifier(&n->migration_state); -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index c8da637d40..ca68be759f 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -207,7 +207,8 @@ struct VirtIONet { - DeviceState *primary_dev; - char *primary_device_id; - char *standby_id; -- bool primary_should_be_hidden; -+ /* primary failover device is hidden*/ -+ bool failover_primary_hidden; - bool failover; - DeviceListener primary_listener; - Notifier migration_state; --- -2.27.0 - diff --git a/kvm-failover-Rename-function-to-hide_device.patch b/kvm-failover-Rename-function-to-hide_device.patch deleted file mode 100644 index bbc86b8..0000000 --- a/kvm-failover-Rename-function-to-hide_device.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 1fbde79ae60990ff0439f3f3bb060f7d723e4910 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:35 -0500 -Subject: [PATCH 38/54] failover: Rename function to hide_device() - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-16-lvivier@redhat.com> -Patchwork-id: 101264 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 15/27] failover: Rename function to hide_device() -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -You should not use pasive. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-17-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit b91ad981b867e15171234efc3f2ab4074d377cef) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/qdev.c | 4 ++-- - hw/net/virtio-net.c | 7 +++---- - include/hw/qdev-core.h | 28 +++++++++++++++------------- - 3 files changed, 20 insertions(+), 19 deletions(-) - -diff --git a/hw/core/qdev.c b/hw/core/qdev.c -index 8f4b8f3cc1..cbdff0b6c6 100644 ---- a/hw/core/qdev.c -+++ b/hw/core/qdev.c -@@ -217,8 +217,8 @@ bool qdev_should_hide_device(QemuOpts *opts) - DeviceListener *listener; - - QTAILQ_FOREACH(listener, &device_listeners, link) { -- if (listener->should_be_hidden) { -- if (listener->should_be_hidden(listener, opts)) { -+ if (listener->hide_device) { -+ if (listener->hide_device(listener, opts)) { - return true; - } - } -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 9f12d33da0..747614ff2a 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3161,8 +3161,8 @@ static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) - virtio_net_handle_migration_primary(n, s); - } - --static bool virtio_net_primary_should_be_hidden(DeviceListener *listener, -- QemuOpts *device_opts) -+static bool failover_hide_primary_device(DeviceListener *listener, -+ QemuOpts *device_opts) - { - VirtIONet *n = container_of(listener, VirtIONet, primary_listener); - bool hide; -@@ -3220,8 +3220,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) - } - - if (n->failover) { -- n->primary_listener.should_be_hidden = -- virtio_net_primary_should_be_hidden; -+ n->primary_listener.hide_device = failover_hide_primary_device; - qatomic_set(&n->failover_primary_hidden, true); - device_listener_register(&n->primary_listener); - n->migration_state.notify = virtio_net_migration_state_notifier; -diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h -index 250f4edef6..6ac86db44e 100644 ---- a/include/hw/qdev-core.h -+++ b/include/hw/qdev-core.h -@@ -81,16 +81,17 @@ typedef void (*BusUnrealize)(BusState *bus); - * - * - * # Hiding a device # -- * To hide a device, a DeviceListener function should_be_hidden() needs to -+ * To hide a device, a DeviceListener function hide_device() needs to - * be registered. -- * It can be used to defer adding a device and therefore hide it from the -- * guest. The handler registering to this DeviceListener can save the QOpts -- * passed to it for re-using it later and must return that it wants the device -- * to be/remain hidden or not. When the handler function decides the device -- * shall not be hidden it will be added in qdev_device_add() and -- * realized as any other device. Otherwise qdev_device_add() will return early -- * without adding the device. The guest will not see a "hidden" device -- * until it was marked don't hide and qdev_device_add called again. -+ * It can be used to defer adding a device and therefore hide it from -+ * the guest. The handler registering to this DeviceListener can save -+ * the QOpts passed to it for re-using it later. It must return if it -+ * wants the device to be hidden or visible. When the handler function -+ * decides the device shall be visible it will be added with -+ * qdev_device_add() and realized as any other device. Otherwise -+ * qdev_device_add() will return early without adding the device. The -+ * guest will not see a "hidden" device until it was marked visible -+ * and qdev_device_add called again. - * - */ - struct DeviceClass { -@@ -196,11 +197,12 @@ struct DeviceListener { - void (*realize)(DeviceListener *listener, DeviceState *dev); - void (*unrealize)(DeviceListener *listener, DeviceState *dev); - /* -- * This callback is called upon init of the DeviceState and allows to -- * inform qdev that a device should be hidden, depending on the device -- * opts, for example, to hide a standby device. -+ * This callback is called upon init of the DeviceState and -+ * informs qdev if a device should be visible or hidden. We can -+ * hide a failover device depending for example on the device -+ * opts. - */ -- bool (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts); -+ bool (*hide_device)(DeviceListener *listener, QemuOpts *device_opts); - QTAILQ_ENTRY(DeviceListener) link; - }; - --- -2.27.0 - diff --git a/kvm-failover-Rename-to-failover_find_primary_device.patch b/kvm-failover-Rename-to-failover_find_primary_device.patch deleted file mode 100644 index 89e6060..0000000 --- a/kvm-failover-Rename-to-failover_find_primary_device.patch +++ /dev/null @@ -1,77 +0,0 @@ -From e1ea7c178c1762dca02e2c85f57ccfad1063c753 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:37 -0500 -Subject: [PATCH 40/54] failover: Rename to failover_find_primary_device() - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-18-lvivier@redhat.com> -Patchwork-id: 101263 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 17/27] failover: Rename to failover_find_primary_device() -Bugzilla: 1819991 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -This commit: -* Rename them to failover_find_primary_devices() so - - it starts with failover_ - - it don't connect anything, just find the primary device -* Create documentation for the function - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-19-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 85d3b93196e43c4493c118aa9e3a82fe657636b5) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 12 +++++++++--- - 1 file changed, 9 insertions(+), 3 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index c6200b924e..ff82f1017d 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -826,7 +826,13 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) - return ret; - } - --static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) -+/** -+ * Find the primary device for this failover virtio-net -+ * -+ * @n: VirtIONet device -+ * @errp: returns an error if this function fails -+ */ -+static DeviceState *failover_find_primary_device(VirtIONet *n, Error **errp) - { - Error *err = NULL; - -@@ -891,7 +897,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) - qatomic_set(&n->failover_primary_hidden, false); - failover_add_primary(n, &err); - if (err) { -- n->primary_dev = virtio_net_find_primary(n, &err); -+ n->primary_dev = failover_find_primary_device(n, &err); - if (err) { - goto out_err; - } -@@ -3115,7 +3121,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - should_be_hidden = qatomic_read(&n->failover_primary_hidden); - - if (!n->primary_dev) { -- n->primary_dev = virtio_net_find_primary(n, &err); -+ n->primary_dev = failover_find_primary_device(n, &err); - if (!n->primary_dev) { - return; - } --- -2.27.0 - diff --git a/kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch b/kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch deleted file mode 100644 index 41fa84a..0000000 --- a/kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch +++ /dev/null @@ -1,49 +0,0 @@ -From be9147ddedc35a458b976a71fd947634ab71bb44 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:22 -0500 -Subject: [PATCH 25/54] failover: Use always atomics for - primary_should_be_hidden - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-3-lvivier@redhat.com> -Patchwork-id: 101247 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 02/27] failover: Use always atomics for primary_should_be_hidden -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Jens Freimann -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-4-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 587f2fcb93eddf69736e00731a2da018a0e0a726) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 1011a524bf..a0fa63e7cb 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3136,7 +3136,7 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) - return false; - } - qdev_set_parent_bus(n->primary_dev, n->primary_bus, &error_abort); -- n->primary_should_be_hidden = false; -+ qatomic_set(&n->primary_should_be_hidden, false); - if (!qemu_opt_set_bool(n->primary_device_opts, - "partially_hotplugged", true, errp)) { - return false; --- -2.27.0 - diff --git a/kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch b/kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch deleted file mode 100644 index 1012001..0000000 --- a/kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 845e4811506c58b8f1f4cfcb183994f1d0f4d66b Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:43 -0500 -Subject: [PATCH 46/54] failover: We don't need to cache primary_device_id - anymore - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-24-lvivier@redhat.com> -Patchwork-id: 101258 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 23/27] failover: We don't need to cache primary_device_id anymore -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-25-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3abad4a221e050d43fa8540677b285057642baaf) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 20 ++++++++++---------- - include/hw/virtio/virtio-net.h | 1 - - 2 files changed, 10 insertions(+), 11 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 2c502c13fd..746ed3fb71 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -824,6 +824,7 @@ static char *failover_find_primary_device_id(VirtIONet *n) - Error *err = NULL; - FailoverId fid; - -+ fid.n = n; - if (!qemu_opts_foreach(qemu_find_opts("device"), - failover_set_primary, &fid, &err)) { - return NULL; -@@ -835,12 +836,17 @@ static void failover_add_primary(VirtIONet *n, Error **errp) - { - Error *err = NULL; - QemuOpts *opts; -+ char *id; - - if (n->primary_dev) { - return; - } - -- opts = qemu_opts_find(qemu_find_opts("device"), n->primary_device_id); -+ id = failover_find_primary_device_id(n); -+ if (!id) { -+ return; -+ } -+ opts = qemu_opts_find(qemu_find_opts("device"), id); - if (opts) { - n->primary_dev = qdev_device_add(opts, &err); - if (err) { -@@ -868,9 +874,8 @@ static DeviceState *failover_find_primary_device(VirtIONet *n) - if (!id) { - return NULL; - } -- n->primary_device_id = g_strdup(id); - -- return qdev_find_recursive(sysbus_get_default(), n->primary_device_id); -+ return qdev_find_recursive(sysbus_get_default(), id); - } - - static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) -@@ -3160,7 +3165,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - vmstate_unregister(VMSTATE_IF(n->primary_dev), - qdev_get_vmsd(n->primary_dev), - n->primary_dev); -- qapi_event_send_unplug_primary(n->primary_device_id); -+ qapi_event_send_unplug_primary(n->primary_dev->id); - qatomic_set(&n->failover_primary_hidden, true); - } else { - warn_report("couldn't unplug primary device"); -@@ -3186,7 +3191,6 @@ static bool failover_hide_primary_device(DeviceListener *listener, - QemuOpts *device_opts) - { - VirtIONet *n = container_of(listener, VirtIONet, primary_listener); -- bool hide; - const char *standby_id; - - if (!device_opts) { -@@ -3198,10 +3202,7 @@ static bool failover_hide_primary_device(DeviceListener *listener, - } - - /* failover_primary_hidden is set during feature negotiation */ -- hide = qatomic_read(&n->failover_primary_hidden); -- g_free(n->primary_device_id); -- n->primary_device_id = g_strdup(device_opts->id); -- return hide; -+ return qatomic_read(&n->failover_primary_hidden); - } - - static void virtio_net_device_realize(DeviceState *dev, Error **errp) -@@ -3378,7 +3379,6 @@ static void virtio_net_device_unrealize(DeviceState *dev) - - if (n->failover) { - device_listener_unregister(&n->primary_listener); -- g_free(n->primary_device_id); - } - - max_queues = n->multiqueue ? n->max_queues : 1; -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index fe353d8299..efef64e02f 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -203,7 +203,6 @@ struct VirtIONet { - bool needs_vnet_hdr_swap; - bool mtu_bypass_backend; - DeviceState *primary_dev; -- char *primary_device_id; - /* primary failover device is hidden*/ - bool failover_primary_hidden; - bool failover; --- -2.27.0 - diff --git a/kvm-failover-fix-indentantion.patch b/kvm-failover-fix-indentantion.patch deleted file mode 100644 index 194ae92..0000000 --- a/kvm-failover-fix-indentantion.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 619e58f19e3e20c4144eb1259ce2f338d09176c1 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:21 -0500 -Subject: [PATCH 24/54] failover: fix indentantion - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-2-lvivier@redhat.com> -Patchwork-id: 101240 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 01/27] failover: fix indentantion -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Jens Freimann -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Once there, remove not needed cast. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-3-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1c775d65d4bff3a5a9876e398b2e689bc45aa1f7) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 33 +++++++++++++++------------------ - softmmu/qdev-monitor.c | 4 ++-- - 2 files changed, 17 insertions(+), 20 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 9179013ac4..1011a524bf 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -797,7 +797,7 @@ static void failover_add_primary(VirtIONet *n, Error **errp) - } - - n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"), -- n->primary_device_id); -+ n->primary_device_id); - if (n->primary_device_opts) { - n->primary_dev = qdev_device_add(n->primary_device_opts, &err); - if (err) { -@@ -814,9 +814,9 @@ static void failover_add_primary(VirtIONet *n, Error **errp) - } else { - error_setg(errp, "Primary device not found"); - error_append_hint(errp, "Virtio-net failover will not work. Make " -- "sure primary device has parameter" -- " failover_pair_id=\n"); --} -+ "sure primary device has parameter" -+ " failover_pair_id=\n"); -+ } - error_propagate(errp, err); - } - -@@ -824,7 +824,6 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) - { - VirtIONet *n = opaque; - int ret = 0; -- - const char *standby_id = qemu_opt_get(opts, "failover_pair_id"); - - if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) { -@@ -841,14 +840,14 @@ static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) - Error *err = NULL; - - if (qemu_opts_foreach(qemu_find_opts("device"), -- is_my_primary, n, &err)) { -+ is_my_primary, n, &err)) { - if (err) { - error_propagate(errp, err); - return NULL; - } - if (n->primary_device_id) { - dev = qdev_find_recursive(sysbus_get_default(), -- n->primary_device_id); -+ n->primary_device_id); - } else { - error_setg(errp, "Primary device id not found"); - return NULL; -@@ -857,8 +856,6 @@ static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) - return dev; - } - -- -- - static DeviceState *virtio_connect_failover_devices(VirtIONet *n, - DeviceState *dev, - Error **errp) -@@ -3126,9 +3123,9 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) - return true; - } - if (!n->primary_device_opts) { -- n->primary_device_opts = qemu_opts_from_qdict( -- qemu_find_opts("device"), -- n->primary_device_dict, errp); -+ n->primary_device_opts = qemu_opts_from_qdict(qemu_find_opts("device"), -+ n->primary_device_dict, -+ errp); - if (!n->primary_device_opts) { - return false; - } -@@ -3176,8 +3173,8 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - if (migration_in_setup(s) && !should_be_hidden) { - if (failover_unplug_primary(n)) { - vmstate_unregister(VMSTATE_IF(n->primary_dev), -- qdev_get_vmsd(n->primary_dev), -- n->primary_dev); -+ qdev_get_vmsd(n->primary_dev), -+ n->primary_dev); - qapi_event_send_unplug_primary(n->primary_device_id); - qatomic_set(&n->primary_should_be_hidden, true); - } else { -@@ -3201,7 +3198,7 @@ static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) - } - - static int virtio_net_primary_should_be_hidden(DeviceListener *listener, -- QemuOpts *device_opts) -+ QemuOpts *device_opts) - { - VirtIONet *n = container_of(listener, VirtIONet, primary_listener); - bool match_found = false; -@@ -3211,11 +3208,11 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, - return -1; - } - n->primary_device_dict = qemu_opts_to_qdict(device_opts, -- n->primary_device_dict); -+ n->primary_device_dict); - if (n->primary_device_dict) { - g_free(n->standby_id); - n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict, -- "failover_pair_id")); -+ "failover_pair_id")); - } - if (g_strcmp0(n->standby_id, n->netclient_name) == 0) { - match_found = true; -@@ -3235,7 +3232,7 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, - if (n->primary_device_dict) { - g_free(n->primary_device_id); - n->primary_device_id = g_strdup(qdict_get_try_str( -- n->primary_device_dict, "id")); -+ n->primary_device_dict, "id")); - if (!n->primary_device_id) { - warn_report("primary_device_id not set"); - } -diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c -index bf79d0bbcd..a25f5d612c 100644 ---- a/softmmu/qdev-monitor.c -+++ b/softmmu/qdev-monitor.c -@@ -573,10 +573,10 @@ void qdev_set_id(DeviceState *dev, const char *id) - } - - static int is_failover_device(void *opaque, const char *name, const char *value, -- Error **errp) -+ Error **errp) - { - if (strcmp(name, "failover_pair_id") == 0) { -- QemuOpts *opts = (QemuOpts *)opaque; -+ QemuOpts *opts = opaque; - - if (qdev_should_hide_device(opts)) { - return 1; --- -2.27.0 - diff --git a/kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch b/kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch deleted file mode 100644 index 20c58c0..0000000 --- a/kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 97b7137652441a3d458f3d9f7bc326047de185c3 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:28 -0500 -Subject: [PATCH 31/54] failover: g_strcmp0() knows how to handle NULL - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-9-lvivier@redhat.com> -Patchwork-id: 101249 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 08/27] failover: g_strcmp0() knows how to handle NULL -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Jens Freimann -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-10-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 518eda9fda49da910d47f5baf66a1c0d1d30cebd) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index c221671852..e334f05352 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -818,7 +818,7 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) - int ret = 0; - const char *standby_id = qemu_opt_get(opts, "failover_pair_id"); - -- if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) { -+ if (g_strcmp0(standby_id, n->netclient_name) == 0) { - n->primary_device_id = g_strdup(opts->id); - ret = 1; - } --- -2.27.0 - diff --git a/kvm-failover-make-sure-that-id-always-exist.patch b/kvm-failover-make-sure-that-id-always-exist.patch deleted file mode 100644 index ce1ed0a..0000000 --- a/kvm-failover-make-sure-that-id-always-exist.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 06c77533c61f65886bf0a9236d8f13085b2f3e51 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:40 -0500 -Subject: [PATCH 43/54] failover: make sure that id always exist - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-21-lvivier@redhat.com> -Patchwork-id: 101257 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 20/27] failover: make sure that id always exist -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -We check that it exist at device creation time, so we don't have to -check anywhere else. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-22-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit fec037c1e2da0a7ea54eabce65cc14d461fdc5eb) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 3 --- - softmmu/qdev-monitor.c | 4 ++++ - 2 files changed, 4 insertions(+), 3 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index ff82f1017d..c708c03cf6 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3173,9 +3173,6 @@ static bool failover_hide_primary_device(DeviceListener *listener, - hide = qatomic_read(&n->failover_primary_hidden); - g_free(n->primary_device_id); - n->primary_device_id = g_strdup(device_opts->id); -- if (!n->primary_device_id) { -- warn_report("primary_device_id not set"); -- } - return hide; - } - -diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c -index 0e10f0466f..301089eaea 100644 ---- a/softmmu/qdev-monitor.c -+++ b/softmmu/qdev-monitor.c -@@ -613,6 +613,10 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) - } - - if (qemu_opt_get(opts, "failover_pair_id")) { -+ if (!opts->id) { -+ error_setg(errp, "Device with failover_pair_id don't have id"); -+ return NULL; -+ } - if (qdev_should_hide_device(opts)) { - if (bus && !qbus_is_hotpluggable(bus)) { - error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); --- -2.27.0 - diff --git a/kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch b/kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch deleted file mode 100644 index b19d5d0..0000000 --- a/kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 0c0190ed0d933a6900230427c374e4b93faab73b Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:23 -0500 -Subject: [PATCH 26/54] failover: primary bus is only used once, and where it - is set - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-4-lvivier@redhat.com> -Patchwork-id: 101245 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 03/27] failover: primary bus is only used once, and where it is set -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Jens Freimann -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Just remove the struct member. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-5-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 78274682b79d48e8de76c817c67c3cfbb76dc2ee) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 8 ++++---- - include/hw/virtio/virtio-net.h | 1 - - 2 files changed, 4 insertions(+), 5 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index a0fa63e7cb..786d313330 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -804,7 +804,6 @@ static void failover_add_primary(VirtIONet *n, Error **errp) - qemu_opts_del(n->primary_device_opts); - } - if (n->primary_dev) { -- n->primary_bus = n->primary_dev->parent_bus; - if (err) { - qdev_unplug(n->primary_dev, &err); - qdev_set_id(n->primary_dev, ""); -@@ -3118,6 +3117,7 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) - Error *err = NULL; - HotplugHandler *hotplug_ctrl; - PCIDevice *pdev = PCI_DEVICE(n->primary_dev); -+ BusState *primary_bus; - - if (!pdev->partially_hotplugged) { - return true; -@@ -3130,12 +3130,12 @@ static bool failover_replug_primary(VirtIONet *n, Error **errp) - return false; - } - } -- n->primary_bus = n->primary_dev->parent_bus; -- if (!n->primary_bus) { -+ primary_bus = n->primary_dev->parent_bus; -+ if (!primary_bus) { - error_setg(errp, "virtio_net: couldn't find primary bus"); - return false; - } -- qdev_set_parent_bus(n->primary_dev, n->primary_bus, &error_abort); -+ qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort); - qatomic_set(&n->primary_should_be_hidden, false); - if (!qemu_opt_set_bool(n->primary_device_opts, - "partially_hotplugged", true, errp)) { -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index f4852ac27b..c8da637d40 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -205,7 +205,6 @@ struct VirtIONet { - QemuOpts *primary_device_opts; - QDict *primary_device_dict; - DeviceState *primary_dev; -- BusState *primary_bus; - char *primary_device_id; - char *standby_id; - bool primary_should_be_hidden; --- -2.27.0 - diff --git a/kvm-failover-qdev_device_add-returns-err-or-dev-set.patch b/kvm-failover-qdev_device_add-returns-err-or-dev-set.patch deleted file mode 100644 index 7b0dbf2..0000000 --- a/kvm-failover-qdev_device_add-returns-err-or-dev-set.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 63f2415d2cee7bcf24e7f3dc515c5155731071e6 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:26 -0500 -Subject: [PATCH 29/54] failover: qdev_device_add() returns err or dev set - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-7-lvivier@redhat.com> -Patchwork-id: 101252 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 06/27] failover: qdev_device_add() returns err or dev set -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Jens Freimann -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Never both. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-8-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3d1c7a9782d19052505aabc8f2c134ccd6f3f3fb) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 7 ------- - 1 file changed, 7 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 6ca85627d8..3e82108d42 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -803,13 +803,6 @@ static void failover_add_primary(VirtIONet *n, Error **errp) - if (err) { - qemu_opts_del(n->primary_device_opts); - } -- if (n->primary_dev) { -- if (err) { -- qdev_unplug(n->primary_dev, &err); -- qdev_set_id(n->primary_dev, ""); -- -- } -- } - } else { - error_setg(errp, "Primary device not found"); - error_append_hint(errp, "Virtio-net failover will not work. Make " --- -2.27.0 - diff --git a/kvm-failover-remove-failover_find_primary_device-error-p.patch b/kvm-failover-remove-failover_find_primary_device-error-p.patch deleted file mode 100644 index aa16347..0000000 --- a/kvm-failover-remove-failover_find_primary_device-error-p.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 96883a1a05766ac6c1a2a064f40aab6c0bd54861 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:41 -0500 -Subject: [PATCH 44/54] failover: remove failover_find_primary_device() error - parameter - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-22-lvivier@redhat.com> -Patchwork-id: 101265 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 21/27] failover: remove failover_find_primary_device() error parameter -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -It can never give one error. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-23-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0a0a27d66bcb275e5b984d8758880a7eff75464e) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index c708c03cf6..b994796734 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -832,7 +832,7 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) - * @n: VirtIONet device - * @errp: returns an error if this function fails - */ --static DeviceState *failover_find_primary_device(VirtIONet *n, Error **errp) -+static DeviceState *failover_find_primary_device(VirtIONet *n) - { - Error *err = NULL; - -@@ -897,10 +897,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) - qatomic_set(&n->failover_primary_hidden, false); - failover_add_primary(n, &err); - if (err) { -- n->primary_dev = failover_find_primary_device(n, &err); -- if (err) { -- goto out_err; -- } -+ n->primary_dev = failover_find_primary_device(n); - failover_add_primary(n, &err); - if (err) { - goto out_err; -@@ -3121,7 +3118,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - should_be_hidden = qatomic_read(&n->failover_primary_hidden); - - if (!n->primary_dev) { -- n->primary_dev = failover_find_primary_device(n, &err); -+ n->primary_dev = failover_find_primary_device(n); - if (!n->primary_dev) { - return; - } --- -2.27.0 - diff --git a/kvm-failover-remove-standby_id-variable.patch b/kvm-failover-remove-standby_id-variable.patch deleted file mode 100644 index c16f2ff..0000000 --- a/kvm-failover-remove-standby_id-variable.patch +++ /dev/null @@ -1,89 +0,0 @@ -From cead8b9c03911360666ac3bb56d7b1db068ade36 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:30 -0500 -Subject: [PATCH 33/54] failover: remove standby_id variable - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-11-lvivier@redhat.com> -Patchwork-id: 101248 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 10/27] failover: remove standby_id variable -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -We can calculate it, and we only use it once anyways. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-12-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 4f0303aed87f83715055e558176046a8a3d9b987) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 11 +++-------- - include/hw/virtio/virtio-net.h | 1 - - 2 files changed, 3 insertions(+), 9 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 2a99b0e0f6..953d5c2bc8 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3181,23 +3181,19 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, - VirtIONet *n = container_of(listener, VirtIONet, primary_listener); - bool match_found = false; - bool hide = false; -+ const char *standby_id; - - if (!device_opts) { - return -1; - } - n->primary_device_dict = qemu_opts_to_qdict(device_opts, - n->primary_device_dict); -- if (n->primary_device_dict) { -- g_free(n->standby_id); -- n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict, -- "failover_pair_id")); -- } -- if (g_strcmp0(n->standby_id, n->netclient_name) == 0) { -+ standby_id = qemu_opt_get(device_opts, "failover_pair_id"); -+ if (g_strcmp0(standby_id, n->netclient_name) == 0) { - match_found = true; - } else { - match_found = false; - hide = false; -- g_free(n->standby_id); - n->primary_device_dict = NULL; - goto out; - } -@@ -3400,7 +3396,6 @@ static void virtio_net_device_unrealize(DeviceState *dev) - if (n->failover) { - device_listener_unregister(&n->primary_listener); - g_free(n->primary_device_id); -- g_free(n->standby_id); - qobject_unref(n->primary_device_dict); - n->primary_device_dict = NULL; - } -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index 7159e6c0a0..a055f39dd6 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -205,7 +205,6 @@ struct VirtIONet { - QDict *primary_device_dict; - DeviceState *primary_dev; - char *primary_device_id; -- char *standby_id; - /* primary failover device is hidden*/ - bool failover_primary_hidden; - bool failover; --- -2.27.0 - diff --git a/kvm-failover-should_be_hidden-should-take-a-bool.patch b/kvm-failover-should_be_hidden-should-take-a-bool.patch deleted file mode 100644 index b0fb927..0000000 --- a/kvm-failover-should_be_hidden-should-take-a-bool.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 8dadc3183e8e75e47b5f5e39823b9eaf950cf4fe Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:34 -0500 -Subject: [PATCH 37/54] failover: should_be_hidden() should take a bool - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-15-lvivier@redhat.com> -Patchwork-id: 101241 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 14/27] failover: should_be_hidden() should take a bool -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -We didn't use at all the -1 value, and we don't really care. It was -only used for the cases when this is not the device that we are -searching for. And in that case we should not hide the device. - -Once there, simplify virtio-Snet_primary_should_be_hidden. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-16-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 89631fed27bd76b0292d8b2a78291ea96185c87d) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/core/qdev.c | 19 +++++-------------- - hw/net/virtio-net.c | 27 +++++++-------------------- - include/hw/qdev-core.h | 2 +- - 3 files changed, 13 insertions(+), 35 deletions(-) - -diff --git a/hw/core/qdev.c b/hw/core/qdev.c -index 262bca716f..8f4b8f3cc1 100644 ---- a/hw/core/qdev.c -+++ b/hw/core/qdev.c -@@ -214,26 +214,17 @@ void device_listener_unregister(DeviceListener *listener) - - bool qdev_should_hide_device(QemuOpts *opts) - { -- int rc = -1; - DeviceListener *listener; - - QTAILQ_FOREACH(listener, &device_listeners, link) { -- if (listener->should_be_hidden) { -- /* -- * should_be_hidden_will return -- * 1 if device matches opts and it should be hidden -- * 0 if device matches opts and should not be hidden -- * -1 if device doesn't match ops -- */ -- rc = listener->should_be_hidden(listener, opts); -- } -- -- if (rc > 0) { -- break; -+ if (listener->should_be_hidden) { -+ if (listener->should_be_hidden(listener, opts)) { -+ return true; -+ } - } - } - -- return rc > 0; -+ return false; - } - - void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id, -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 881907d1bd..9f12d33da0 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3161,24 +3161,19 @@ static void virtio_net_migration_state_notifier(Notifier *notifier, void *data) - virtio_net_handle_migration_primary(n, s); - } - --static int virtio_net_primary_should_be_hidden(DeviceListener *listener, -- QemuOpts *device_opts) -+static bool virtio_net_primary_should_be_hidden(DeviceListener *listener, -+ QemuOpts *device_opts) - { - VirtIONet *n = container_of(listener, VirtIONet, primary_listener); -- bool match_found = false; -- bool hide = false; -+ bool hide; - const char *standby_id; - - if (!device_opts) { -- return -1; -+ return false; - } - standby_id = qemu_opt_get(device_opts, "failover_pair_id"); -- if (g_strcmp0(standby_id, n->netclient_name) == 0) { -- match_found = true; -- } else { -- match_found = false; -- hide = false; -- goto out; -+ if (g_strcmp0(standby_id, n->netclient_name) != 0) { -+ return false; - } - - /* failover_primary_hidden is set during feature negotiation */ -@@ -3188,15 +3183,7 @@ static int virtio_net_primary_should_be_hidden(DeviceListener *listener, - if (!n->primary_device_id) { - warn_report("primary_device_id not set"); - } -- --out: -- if (match_found && hide) { -- return 1; -- } else if (match_found && !hide) { -- return 0; -- } else { -- return -1; -- } -+ return hide; - } - - static void virtio_net_device_realize(DeviceState *dev, Error **errp) -diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h -index 5e737195b5..250f4edef6 100644 ---- a/include/hw/qdev-core.h -+++ b/include/hw/qdev-core.h -@@ -200,7 +200,7 @@ struct DeviceListener { - * inform qdev that a device should be hidden, depending on the device - * opts, for example, to hide a standby device. - */ -- int (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts); -+ bool (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts); - QTAILQ_ENTRY(DeviceListener) link; - }; - --- -2.27.0 - diff --git a/kvm-failover-simplify-failover_unplug_primary.patch b/kvm-failover-simplify-failover_unplug_primary.patch deleted file mode 100644 index 523b8ab..0000000 --- a/kvm-failover-simplify-failover_unplug_primary.patch +++ /dev/null @@ -1,86 +0,0 @@ -From cf70ee739171e208243b5b06a57d2517df8c3d91 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:45 -0500 -Subject: [PATCH 48/54] failover: simplify failover_unplug_primary - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-26-lvivier@redhat.com> -Patchwork-id: 101242 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 25/27] failover: simplify failover_unplug_primary -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -We can calculate device just once. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-27-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 07a5d816d50f5f876d5fcd43724a6ff17cf59a4f) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 25 ++++++++++--------------- - 1 file changed, 10 insertions(+), 15 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index b37e9cd1d9..9203d81780 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3146,34 +3146,29 @@ out: - return !err; - } - --static void virtio_net_handle_migration_primary(VirtIONet *n, -- MigrationState *s) -+static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s) - { - bool should_be_hidden; - Error *err = NULL; -+ DeviceState *dev = failover_find_primary_device(n); - -- should_be_hidden = qatomic_read(&n->failover_primary_hidden); -- -- if (!n->primary_dev) { -- n->primary_dev = failover_find_primary_device(n); -- if (!n->primary_dev) { -- return; -- } -+ if (!dev) { -+ return; - } - -+ should_be_hidden = qatomic_read(&n->failover_primary_hidden); -+ - if (migration_in_setup(s) && !should_be_hidden) { -- if (failover_unplug_primary(n, n->primary_dev)) { -- vmstate_unregister(VMSTATE_IF(n->primary_dev), -- qdev_get_vmsd(n->primary_dev), -- n->primary_dev); -- qapi_event_send_unplug_primary(n->primary_dev->id); -+ if (failover_unplug_primary(n, dev)) { -+ vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev); -+ qapi_event_send_unplug_primary(dev->id); - qatomic_set(&n->failover_primary_hidden, true); - } else { - warn_report("couldn't unplug primary device"); - } - } else if (migration_has_failed(s)) { - /* We already unplugged the device let's plug it back */ -- if (!failover_replug_primary(n, n->primary_dev, &err)) { -+ if (!failover_replug_primary(n, dev, &err)) { - if (err) { - error_report_err(err); - } --- -2.27.0 - diff --git a/kvm-failover-simplify-qdev_device_add-failover-case.patch b/kvm-failover-simplify-qdev_device_add-failover-case.patch deleted file mode 100644 index dd04f26..0000000 --- a/kvm-failover-simplify-qdev_device_add-failover-case.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 27a1972d1a5961a8218d5a52fba16b67816635fe Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:38 -0500 -Subject: [PATCH 41/54] failover: simplify qdev_device_add() failover case - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-19-lvivier@redhat.com> -Patchwork-id: 101255 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 18/27] failover: simplify qdev_device_add() failover case -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -Just put allthe logic inside the same if. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-20-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 5f2ef3b0d032797b6bad9449dfece3a8111a8529) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - softmmu/qdev-monitor.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c -index a25f5d612c..12b7540f17 100644 ---- a/softmmu/qdev-monitor.c -+++ b/softmmu/qdev-monitor.c -@@ -600,7 +600,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) - const char *driver, *path; - DeviceState *dev = NULL; - BusState *bus = NULL; -- bool hide; - - driver = qemu_opt_get(opts, "driver"); - if (!driver) { -@@ -634,14 +633,16 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) - return NULL; - } - } -- hide = should_hide_device(opts); - -- if ((hide || qdev_hotplug) && bus && !qbus_is_hotpluggable(bus)) { -- error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); -+ if (should_hide_device(opts)) { -+ if (bus && !qbus_is_hotpluggable(bus)) { -+ error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); -+ } - return NULL; - } - -- if (hide) { -+ if (qdev_hotplug && bus && !qbus_is_hotpluggable(bus)) { -+ error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); - return NULL; - } - --- -2.27.0 - diff --git a/kvm-failover-simplify-qdev_device_add.patch b/kvm-failover-simplify-qdev_device_add.patch deleted file mode 100644 index d69b72e..0000000 --- a/kvm-failover-simplify-qdev_device_add.patch +++ /dev/null @@ -1,89 +0,0 @@ -From 7822f8042e26cca6c1307e26c6f08d5f99636d90 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:39 -0500 -Subject: [PATCH 42/54] failover: simplify qdev_device_add() - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-20-lvivier@redhat.com> -Patchwork-id: 101256 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 19/27] failover: simplify qdev_device_add() -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -We don't need to walk the opts by hand. qmp_opt_get() already does -that. And then we can remove the functions that did that walk. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-21-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 2e28095369f4eab516852fd49dde17c3bfd782f9) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - softmmu/qdev-monitor.c | 32 ++++++-------------------------- - 1 file changed, 6 insertions(+), 26 deletions(-) - -diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c -index 12b7540f17..0e10f0466f 100644 ---- a/softmmu/qdev-monitor.c -+++ b/softmmu/qdev-monitor.c -@@ -572,28 +572,6 @@ void qdev_set_id(DeviceState *dev, const char *id) - } - } - --static int is_failover_device(void *opaque, const char *name, const char *value, -- Error **errp) --{ -- if (strcmp(name, "failover_pair_id") == 0) { -- QemuOpts *opts = opaque; -- -- if (qdev_should_hide_device(opts)) { -- return 1; -- } -- } -- -- return 0; --} -- --static bool should_hide_device(QemuOpts *opts) --{ -- if (qemu_opt_foreach(opts, is_failover_device, opts, NULL) == 0) { -- return false; -- } -- return true; --} -- - DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) - { - DeviceClass *dc; -@@ -634,11 +612,13 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) - } - } - -- if (should_hide_device(opts)) { -- if (bus && !qbus_is_hotpluggable(bus)) { -- error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); -+ if (qemu_opt_get(opts, "failover_pair_id")) { -+ if (qdev_should_hide_device(opts)) { -+ if (bus && !qbus_is_hotpluggable(bus)) { -+ error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name); -+ } -+ return NULL; - } -- return NULL; - } - - if (qdev_hotplug && bus && !qbus_is_hotpluggable(bus)) { --- -2.27.0 - diff --git a/kvm-failover-simplify-virtio_net_find_primary.patch b/kvm-failover-simplify-virtio_net_find_primary.patch deleted file mode 100644 index 63e35aa..0000000 --- a/kvm-failover-simplify-virtio_net_find_primary.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 24bd4b43c3f59c9c28f924da8ef7a9dacc0f2f52 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:33 -0500 -Subject: [PATCH 36/54] failover: simplify virtio_net_find_primary() - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-14-lvivier@redhat.com> -Patchwork-id: 101253 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 13/27] failover: simplify virtio_net_find_primary() -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -a - is_my_primary() never sets one error -b - If we return 1, primary_device_id is always set - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-15-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 7cf05b7ed8e84e89b873701e3dfcd56aa81b2d13) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 18 +++--------------- - 1 file changed, 3 insertions(+), 15 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 70fa372c08..881907d1bd 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -828,24 +828,12 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) - - static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) - { -- DeviceState *dev = NULL; - Error *err = NULL; - -- if (qemu_opts_foreach(qemu_find_opts("device"), -- is_my_primary, n, &err)) { -- if (err) { -- error_propagate(errp, err); -- return NULL; -- } -- if (n->primary_device_id) { -- dev = qdev_find_recursive(sysbus_get_default(), -- n->primary_device_id); -- } else { -- error_setg(errp, "Primary device id not found"); -- return NULL; -- } -+ if (!qemu_opts_foreach(qemu_find_opts("device"), is_my_primary, n, &err)) { -+ return NULL; - } -- return dev; -+ return qdev_find_recursive(sysbus_get_default(), n->primary_device_id); - } - - static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) --- -2.27.0 - diff --git a/kvm-failover-split-failover_find_primary_device_id.patch b/kvm-failover-split-failover_find_primary_device_id.patch deleted file mode 100644 index 2b7efbb..0000000 --- a/kvm-failover-split-failover_find_primary_device_id.patch +++ /dev/null @@ -1,128 +0,0 @@ -From 2e3e87787776632d521ec5f08758973d42fc208e Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:42 -0500 -Subject: [PATCH 45/54] failover: split failover_find_primary_device_id() - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-23-lvivier@redhat.com> -Patchwork-id: 101244 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 22/27] failover: split failover_find_primary_device_id() -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -So we can calculate the device id when we need it. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-24-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f5e1847ba50a8d1adf66c0cf312e53c162e52487) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 63 +++++++++++++++++++++++++++++++++------------ - 1 file changed, 47 insertions(+), 16 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index b994796734..2c502c13fd 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -788,6 +788,49 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) - return virtio_net_guest_offloads_by_features(vdev->guest_features); - } - -+typedef struct { -+ VirtIONet *n; -+ char *id; -+} FailoverId; -+ -+/** -+ * Set the id of the failover primary device -+ * -+ * @opaque: FailoverId to setup -+ * @opts: opts for device we are handling -+ * @errp: returns an error if this function fails -+ */ -+static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp) -+{ -+ FailoverId *fid = opaque; -+ const char *standby_id = qemu_opt_get(opts, "failover_pair_id"); -+ -+ if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) { -+ fid->id = g_strdup(opts->id); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/** -+ * Find the primary device id for this failover virtio-net -+ * -+ * @n: VirtIONet device -+ * @errp: returns an error if this function fails -+ */ -+static char *failover_find_primary_device_id(VirtIONet *n) -+{ -+ Error *err = NULL; -+ FailoverId fid; -+ -+ if (!qemu_opts_foreach(qemu_find_opts("device"), -+ failover_set_primary, &fid, &err)) { -+ return NULL; -+ } -+ return fid.id; -+} -+ - static void failover_add_primary(VirtIONet *n, Error **errp) - { - Error *err = NULL; -@@ -812,20 +855,6 @@ static void failover_add_primary(VirtIONet *n, Error **errp) - error_propagate(errp, err); - } - --static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) --{ -- VirtIONet *n = opaque; -- int ret = 0; -- const char *standby_id = qemu_opt_get(opts, "failover_pair_id"); -- -- if (g_strcmp0(standby_id, n->netclient_name) == 0) { -- n->primary_device_id = g_strdup(opts->id); -- ret = 1; -- } -- -- return ret; --} -- - /** - * Find the primary device for this failover virtio-net - * -@@ -834,11 +863,13 @@ static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp) - */ - static DeviceState *failover_find_primary_device(VirtIONet *n) - { -- Error *err = NULL; -+ char *id = failover_find_primary_device_id(n); - -- if (!qemu_opts_foreach(qemu_find_opts("device"), is_my_primary, n, &err)) { -+ if (!id) { - return NULL; - } -+ n->primary_device_id = g_strdup(id); -+ - return qdev_find_recursive(sysbus_get_default(), n->primary_device_id); - } - --- -2.27.0 - diff --git a/kvm-failover-virtio_net_connect_failover_devices-does-no.patch b/kvm-failover-virtio_net_connect_failover_devices-does-no.patch deleted file mode 100644 index e04a77f..0000000 --- a/kvm-failover-virtio_net_connect_failover_devices-does-no.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 385df1f898e08c9cf0c90e543978cc68ee0c1097 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:36 -0500 -Subject: [PATCH 39/54] failover: virtio_net_connect_failover_devices() does - nothing - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-17-lvivier@redhat.com> -Patchwork-id: 101254 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 16/27] failover: virtio_net_connect_failover_devices() does nothing -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -From: Juan Quintela - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -It just calls virtio_net_find_primary(), so just update the callers. - -Signed-off-by: Juan Quintela -Message-Id: <20201118083748.1328-18-quintela@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 0763db4f2df3a92336d78e8b68a665f7d1a1bc66) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 17 ++--------------- - 1 file changed, 2 insertions(+), 15 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 747614ff2a..c6200b924e 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -836,19 +836,6 @@ static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp) - return qdev_find_recursive(sysbus_get_default(), n->primary_device_id); - } - --static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp) --{ -- DeviceState *prim_dev = NULL; -- Error *err = NULL; -- -- prim_dev = virtio_net_find_primary(n, &err); -- if (!prim_dev) { -- error_propagate(errp, err); -- } -- -- return prim_dev; --} -- - static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) - { - VirtIONet *n = VIRTIO_NET(vdev); -@@ -904,7 +891,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) - qatomic_set(&n->failover_primary_hidden, false); - failover_add_primary(n, &err); - if (err) { -- n->primary_dev = virtio_connect_failover_devices(n, &err); -+ n->primary_dev = virtio_net_find_primary(n, &err); - if (err) { - goto out_err; - } -@@ -3128,7 +3115,7 @@ static void virtio_net_handle_migration_primary(VirtIONet *n, - should_be_hidden = qatomic_read(&n->failover_primary_hidden); - - if (!n->primary_dev) { -- n->primary_dev = virtio_connect_failover_devices(n, &err); -+ n->primary_dev = virtio_net_find_primary(n, &err); - if (!n->primary_dev) { - return; - } --- -2.27.0 - diff --git a/kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch b/kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch deleted file mode 100644 index 1ac03d4..0000000 --- a/kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 9768ea83a3f23f112514ad34d4abcd6e9590bb71 Mon Sep 17 00:00:00 2001 -From: Auger Eric -Date: Wed, 3 Feb 2021 20:31:27 -0500 -Subject: [PATCH 4/7] hw/arm/smmuv3: Fix addr_mask for range-based invalidation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Auger Eric -Message-id: <20210203203127.3613-1-eric.auger@redhat.com> -Patchwork-id: 100971 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] hw/arm/smmuv3: Fix addr_mask for range-based invalidation -Bugzilla: 1834152 -RH-Acked-by: Gavin Shan -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Andrew Jones - -From: Zenghui Yu - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1834152 -BRANCH: rhel-av-8.4.0 -UPSTREAM: yes -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34711554 - -When handling guest range-based IOTLB invalidation, we should decode the TG -field into the corresponding translation granule size so that we can pass -the correct invalidation range to backend. Set @granule to (tg * 2 + 10) to -properly emulate the architecture. - -Fixes: d52915616c05 ("hw/arm/smmuv3: Get prepared for range invalidation") -Signed-off-by: Zenghui Yu -Acked-by: Eric Auger -Message-id: 20210130043220.1345-1-yuzenghui@huawei.com -Signed-off-by: Peter Maydell -(cherry picked from commit dcda883cd21125c699419a3fc0fe182ea989d9c4) -Signed-off-by: Eric Auger -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/arm/smmuv3.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index bbca0e9f20..98b99d4fe8 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -801,7 +801,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - { - SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); - IOMMUTLBEvent event; -- uint8_t granule = tg; -+ uint8_t granule; - - if (!tg) { - SMMUEventInfo event = {.inval_ste_allowed = true}; -@@ -821,6 +821,8 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, - return; - } - granule = tt->granule_sz; -+ } else { -+ granule = tg * 2 + 10; - } - - event.type = IOMMU_NOTIFIER_UNMAP; --- -2.18.4 - diff --git a/kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch b/kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch deleted file mode 100644 index a6227d9..0000000 --- a/kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch +++ /dev/null @@ -1,80 +0,0 @@ -From efdd1b8911d5ae5c0eacbc63fd4fe85f0cc4614b Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Sun, 14 Mar 2021 15:54:19 -0400 -Subject: [PATCH 06/15] hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -Message-id: <20210314155419.911760-2-jmaloy@redhat.com> -Patchwork-id: 101336 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register -Bugzilla: 1936948 -RH-Acked-by: Auger Eric -RH-Acked-by: Andrew Jones -RH-Acked-by: Philippe Mathieu-Daudé - -From: Philippe Mathieu-Daudé - -Per the ARM Generic Interrupt Controller Architecture specification -(document "ARM IHI 0048B.b (ID072613)"), the SGIINTID field is 4 bit, -not 10: - - - 4.3 Distributor register descriptions - - 4.3.15 Software Generated Interrupt Register, GICD_SG - - - Table 4-21 GICD_SGIR bit assignments - - The Interrupt ID of the SGI to forward to the specified CPU - interfaces. The value of this field is the Interrupt ID, in - the range 0-15, for example a value of 0b0011 specifies - Interrupt ID 3. - -Correct the irq mask to fix an undefined behavior (which eventually -lead to a heap-buffer-overflow, see [Buglink]): - - $ echo 'writel 0x8000f00 0xff4affb0' | qemu-system-aarch64 -M virt,accel=qtest -qtest stdio - [I 1612088147.116987] OPENED - [R +0.278293] writel 0x8000f00 0xff4affb0 - ../hw/intc/arm_gic.c:1498:13: runtime error: index 944 out of bounds for type 'uint8_t [16][8]' - SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ../hw/intc/arm_gic.c:1498:13 - -This fixes a security issue when running with KVM on Arm with -kernel-irqchip=off. (The default is kernel-irqchip=on, which is -unaffected, and which is also the correct choice for performance.) - -Cc: qemu-stable@nongnu.org -Fixes: CVE-2021-20221 -Fixes: 9ee6e8bb853 ("ARMv7 support.") -Buglink: https://bugs.launchpad.net/qemu/+bug/1913916 -Buglink: https://bugs.launchpad.net/qemu/+bug/1913917 -Reported-by: Alexander Bulekov -Signed-off-by: Philippe Mathieu-Daudé -Message-id: 20210131103401.217160-1-f4bug@amsat.org -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell - -(cherry picked from commit edfe2eb4360cde4ed5d95bda7777edcb3510f76a) -Signed-off-by: Jon Maloy -Signed-off-by: Danilo C. L. de Paula ---- - hw/intc/arm_gic.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c -index c60dc6b5e6..fbde60de05 100644 ---- a/hw/intc/arm_gic.c -+++ b/hw/intc/arm_gic.c -@@ -1474,7 +1474,7 @@ static void gic_dist_writel(void *opaque, hwaddr offset, - int target_cpu; - - cpu = gic_get_current_cpu(s); -- irq = value & 0x3ff; -+ irq = value & 0xf; - switch ((value >> 24) & 3) { - case 0: - mask = (value >> 16) & ALL_CPU_MASK; --- -2.27.0 - diff --git a/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch b/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch deleted file mode 100644 index 2e75110..0000000 --- a/kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch +++ /dev/null @@ -1,213 +0,0 @@ -From 78375038a68fee2e7b182b4f191d5ba53fbdcd72 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 23 Feb 2021 15:18:11 -0500 -Subject: [PATCH 52/54] i386: Add the support for AMD EPYC 3rd generation - processors - -RH-Author: Dr. David Alan Gilbert -Message-id: <20210223151811.27968-3-dgilbert@redhat.com> -Patchwork-id: 101198 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] i386: Add the support for AMD EPYC 3rd generation processors -Bugzilla: 1926785 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Peter Xu - -From: Babu Moger - -Adds the support for AMD 3rd generation processors. The model -display for the new processor will be EPYC-Milan. - -Adds the following new feature bits on top of the feature bits from -the first and second generation EPYC models. - -pcid : Process context identifiers support -ibrs : Indirect Branch Restricted Speculation -ssbd : Speculative Store Bypass Disable -erms : Enhanced REP MOVSB/STOSB support -fsrm : Fast Short REP MOVSB support -invpcid : Invalidate processor context ID -pku : Protection keys support -svme-addr-chk : SVM instructions address check for #GP handling - -Depends on the following kernel commits: -14c2bf81fcd2 ("KVM: SVM: Fix #GP handling for doubly-nested virtualization") -3b9c723ed7cf ("KVM: SVM: Add support for SVM instruction address check change") -4aa2691dcbd3 ("8ce1c461188799d863398dd2865d KVM: x86: Factor out x86 instruction emulation with decoding") -4407a797e941 ("KVM: SVM: Enable INVPCID feature on AMD") -9715092f8d7e ("KVM: X86: Move handling of INVPCID types to x86") -3f3393b3ce38 ("KVM: X86: Rename and move the function vmx_handle_memory_failure to x86.c") -830bd71f2c06 ("KVM: SVM: Remove set_cr_intercept, clr_cr_intercept and is_cr_intercept") -4c44e8d6c193 ("KVM: SVM: Add new intercept word in vmcb_control_area") -c62e2e94b9d4 ("KVM: SVM: Modify 64 bit intercept field to two 32 bit vectors") -9780d51dc2af ("KVM: SVM: Modify intercept_exceptions to generic intercepts") -30abaa88382c ("KVM: SVM: Change intercept_dr to generic intercepts") -03bfeeb988a9 ("KVM: SVM: Change intercept_cr to generic intercepts") -c45ad7229d13 ("KVM: SVM: Introduce vmcb_(set_intercept/clr_intercept/_is_intercept)") -a90c1ed9f11d ("(pcid) KVM: nSVM: Remove unused field") -fa44b82eb831 ("KVM: x86: Move MPK feature detection to common code") -38f3e775e9c2 ("x86/Kconfig: Update config and kernel doc for MPK feature on AMD") -37486135d3a7 ("KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c") - -Signed-off-by: Babu Moger -Message-Id: <161290460478.11352.8933244555799318236.stgit@bmoger-ubuntu> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 623972ceae091b31331ae4a1dc94fe5cbb891937) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 107 +++++++++++++++++++++++++++++++++++++++++++++- - target/i386/cpu.h | 4 ++ - 2 files changed, 110 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 372cba2942..523a97c0fb 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1029,7 +1029,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "clzero", NULL, "xsaveerptr", NULL, - NULL, NULL, NULL, NULL, - NULL, "wbnoinvd", NULL, NULL, -- "ibpb", NULL, NULL, "amd-stibp", -+ "ibpb", NULL, "ibrs", "amd-stibp", - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, -@@ -1769,6 +1769,56 @@ static CPUCaches epyc_rome_cache_info = { - }, - }; - -+static CPUCaches epyc_milan_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = true, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -4101,6 +4151,61 @@ static X86CPUDefinition builtin_x86_defs[] = { - .model_id = "AMD EPYC-Rome Processor", - .cache_info = &epyc_rome_cache_info, - }, -+ { -+ .name = "EPYC-Milan", -+ .level = 0xd, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 25, -+ .model = 1, -+ .stepping = 1, -+ .features[FEAT_1_EDX] = -+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | -+ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | -+ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | -+ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | -+ CPUID_VME | CPUID_FP87, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | -+ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | -+ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | -+ CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | -+ CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | -+ CPUID_EXT_PCID, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | -+ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | -+ CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | -+ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | -+ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, -+ .features[FEAT_8000_0008_EBX] = -+ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | -+ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | -+ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | -+ CPUID_8000_0008_EBX_AMD_SSBD, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | -+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | -+ CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | -+ CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_ERMS | -+ CPUID_7_0_EBX_INVPCID, -+ .features[FEAT_7_0_ECX] = -+ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_PKU, -+ .features[FEAT_7_0_EDX] = -+ CPUID_7_0_EDX_FSRM, -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .features[FEAT_SVM] = -+ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_SVME_ADDR_CHK, -+ .xlevel = 0x8000001E, -+ .model_id = "AMD EPYC-Milan Processor", -+ .cache_info = &epyc_milan_cache_info, -+ }, - }; - - /* KVM-specific features that are automatically added/removed -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 4fdb552f93..92ca64a21b 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -811,8 +811,12 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) - /* Indirect Branch Prediction Barrier */ - #define CPUID_8000_0008_EBX_IBPB (1U << 12) -+/* Indirect Branch Restricted Speculation */ -+#define CPUID_8000_0008_EBX_IBRS (1U << 14) - /* Single Thread Indirect Branch Predictors */ - #define CPUID_8000_0008_EBX_STIBP (1U << 15) -+/* Speculative Store Bypass Disable */ -+#define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) - - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) --- -2.27.0 - diff --git a/kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch b/kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch deleted file mode 100644 index ef0f424..0000000 --- a/kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 1f6e36fd98ba0610a438c2352117c5b1ed4f01ba Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Mon, 8 Mar 2021 18:10:41 -0500 -Subject: [PATCH 07/15] i386/acpi: restore device paths for pre-5.1 vms -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Igor Mammedov -Message-id: <20210308181041.2427279-1-imammedo@redhat.com> -Patchwork-id: 101321 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] i386/acpi: restore device paths for pre-5.1 vms -Bugzilla: 1934158 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Philippe Mathieu-Daudé - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1934158 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=35317870 -Upstream: 0a343a5add75f9f90c65e932863d57ddbcb28f5c - - From: Vitaly Cheptsov - Date: Mon Mar 1 22:59:18 2021 +0300 - - After fixing the _UID value for the primary PCI root bridge in - af1b80ae it was discovered that this change updates Windows - configuration in an incompatible way causing network configuration - failure unless DHCP is used. More details provided on the list: - - https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html - - This change reverts the _UID update from 1 to 0 for q35 and i440fx - VMs before version 5.2 to maintain the original behaviour when - upgrading. - - Cc: qemu-stable@nongnu.org - Cc: qemu-devel@nongnu.org - Reported-by: Thomas Lamprecht - Suggested-by: Michael S. Tsirkin - Signed-off-by: Vitaly Cheptsov - Message-Id: <20210301195919.9333-1-cheptsov@ispras.ru> - Tested-by: Thomas Lamprecht - Reviewed-by: Igor Mammedov - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - Fixes: af1b80ae56c9 ("i386/acpi: fix inconsistent QEMU/OVMF device paths") - -(cherry picked from commit 0a343a5add75f9f90c65e932863d57ddbcb28f5c) -Signed-off-by: Igor Mammedov - -Notes: -clean cherrypick + -adding the same quirk to RHEL's pc(7.6)/q35(8.3) machine types -to preserve old UID. pc-q35-rhel8.4.0 will have new UID as defined -by spec (but since it's not been released yet there is no risk of -breaking [non]existing Windows deployments and new installations -should pickup new PCI device enumeration just fine) - -Signed-off-by: Danilo C. L. de Paula ---- - hw/i386/acpi-build.c | 4 ++-- - hw/i386/pc_piix.c | 5 +++++ - hw/i386/pc_q35.c | 5 +++++ - include/hw/i386/pc.h | 1 + - 4 files changed, 13 insertions(+), 2 deletions(-) - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index b1082bd412..be6a260b85 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -1516,7 +1516,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, - dev = aml_device("PCI0"); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); - aml_append(dev, aml_name_decl("_ADR", aml_int(0))); -- aml_append(dev, aml_name_decl("_UID", aml_int(0))); -+ aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(sb_scope, dev); - aml_append(dsdt, sb_scope); - -@@ -1533,7 +1533,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); - aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); - aml_append(dev, aml_name_decl("_ADR", aml_int(0))); -- aml_append(dev, aml_name_decl("_UID", aml_int(0))); -+ aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid))); - aml_append(dev, build_q35_osc_method()); - aml_append(sb_scope, dev); - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 6e1f1ba082..819fb5fed9 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -421,6 +421,7 @@ static void pc_i440fx_machine_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pcmc->default_nic_model = "e1000"; -+ pcmc->pci_root_uid = 0; - - m->family = "pc_piix"; - m->desc = "Standard PC (i440FX + PIIX, 1996)"; -@@ -452,6 +453,7 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m) - compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len); - compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len); - pcmc->kvmclock_create_always = false; -+ pcmc->pci_root_uid = 1; - } - - DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL, -@@ -1020,6 +1022,7 @@ static void pc_machine_rhel7_options(MachineClass *m) - m->family = "pc_piix_Y"; - m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; - pcmc->default_nic_model = "e1000"; -+ pcmc->pci_root_uid = 0; - m->default_display = "std"; - m->no_parallel = 1; - m->numa_mem_supported = true; -@@ -1046,6 +1049,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->pvh_enabled = false; - pcmc->default_cpu_version = CPU_VERSION_LEGACY; - pcmc->kvmclock_create_always = false; -+ /* From pc_i440fx_5_1_machine_options() */ -+ pcmc->pci_root_uid = 1; - compat_props_add(m->compat_props, hw_compat_rhel_8_3, - hw_compat_rhel_8_3_len); - compat_props_add(m->compat_props, pc_rhel_8_3_compat, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index a8c0496c9f..f848f1484e 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -332,6 +332,7 @@ static void pc_q35_machine_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pcmc->default_nic_model = "e1000e"; -+ pcmc->pci_root_uid = 0; - - m->family = "pc_q35"; - m->desc = "Standard PC (Q35 + ICH9, 2009)"; -@@ -367,6 +368,7 @@ static void pc_q35_5_1_machine_options(MachineClass *m) - compat_props_add(m->compat_props, hw_compat_5_1, hw_compat_5_1_len); - compat_props_add(m->compat_props, pc_compat_5_1, pc_compat_5_1_len); - pcmc->kvmclock_create_always = false; -+ pcmc->pci_root_uid = 1; - } - - DEFINE_Q35_MACHINE(v5_1, "pc-q35-5.1", NULL, -@@ -578,6 +580,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pcmc->default_nic_model = "e1000e"; -+ pcmc->pci_root_uid = 0; - m->family = "pc_q35_Z"; - m->units_per_default_bus = 1; - m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; -@@ -630,6 +633,8 @@ static void pc_q35_machine_rhel830_options(MachineClass *m) - pc_rhel_8_3_compat_len); - /* From pc_q35_5_1_machine_options() */ - pcmc->kvmclock_create_always = false; -+ /* From pc_q35_5_1_machine_options() */ -+ pcmc->pci_root_uid = 1; - } - - DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 68091bea98..d2efc65cec 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -99,6 +99,7 @@ struct PCMachineClass { - int legacy_acpi_table_size; - unsigned acpi_data_size; - bool do_not_add_smb_acpi; -+ int pci_root_uid; - - /* SMBIOS compat: */ - bool smbios_defaults; --- -2.27.0 - diff --git a/kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch b/kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch deleted file mode 100644 index 940231e..0000000 --- a/kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 039775f93548382ec1b98f5a6004c3eee02fbd28 Mon Sep 17 00:00:00 2001 -From: Peter Krempa -Date: Mon, 22 Feb 2021 13:35:05 -0500 -Subject: [PATCH 22/54] migration: dirty-bitmap: Allow control of bitmap - persistence - -RH-Author: Peter Krempa -Message-id: -Patchwork-id: 101171 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/3] migration: dirty-bitmap: Allow control of bitmap persistence -Bugzilla: 1930757 -RH-Acked-by: John Snow -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake - -Bitmap's source persistence is transported over the migration stream and -the destination mirrors it. In some cases the destination might want to -persist bitmaps which are not persistent on the source (e.g. the result -of merging bitmaps from a number of layers on the source when migrating -into a squashed image) but currently it would need to create another set -of persistent bitmaps and merge them. - -This patch adds a 'transform' property to the alias map which allows -overriding the persistence of migrated bitmaps both on the source and -destination sides. - -Signed-off-by: Peter Krempa -Message-Id: -Reviewed-by: Eric Blake -[eblake: grammar tweaks, drop dead conditional] -Signed-off-by: Eric Blake -(cherry picked from commit 6e9f21a2aa8a78bc9a512a836a40c79fe50dd2b4) - -https://bugzilla.redhat.com/show_bug.cgi?id=1930757 -Signed-off-by: Danilo C. L. de Paula ---- - migration/block-dirty-bitmap.c | 29 ++++++++++++++++++++++++++--- - qapi/migration.json | 19 ++++++++++++++++++- - 2 files changed, 44 insertions(+), 4 deletions(-) - -diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c -index b39c13ce4e..975093610a 100644 ---- a/migration/block-dirty-bitmap.c -+++ b/migration/block-dirty-bitmap.c -@@ -150,6 +150,7 @@ typedef struct DBMLoadState { - BdrvDirtyBitmap *bitmap; - - bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */ -+ BitmapMigrationBitmapAlias *bmap_inner; - - /* - * cancelled -@@ -529,6 +530,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, - } - - FOR_EACH_DIRTY_BITMAP(bs, bitmap) { -+ BitmapMigrationBitmapAliasTransform *bitmap_transform = NULL; - bitmap_name = bdrv_dirty_bitmap_name(bitmap); - if (!bitmap_name) { - continue; -@@ -549,6 +551,9 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, - } - - bitmap_alias = bmap_inner->alias; -+ if (bmap_inner->has_transform) { -+ bitmap_transform = bmap_inner->transform; -+ } - } else { - if (strlen(bitmap_name) > UINT8_MAX) { - error_report("Cannot migrate bitmap '%s' on node '%s': " -@@ -574,8 +579,15 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, - if (bdrv_dirty_bitmap_enabled(bitmap)) { - dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED; - } -- if (bdrv_dirty_bitmap_get_persistence(bitmap)) { -- dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; -+ if (bitmap_transform && -+ bitmap_transform->has_persistent) { -+ if (bitmap_transform->persistent) { -+ dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; -+ } -+ } else { -+ if (bdrv_dirty_bitmap_get_persistence(bitmap)) { -+ dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; -+ } - } - - QSIMPLEQ_INSERT_TAIL(&s->dbms_list, dbms, entry); -@@ -783,6 +795,7 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s) - uint32_t granularity = qemu_get_be32(f); - uint8_t flags = qemu_get_byte(f); - LoadBitmapState *b; -+ bool persistent; - - if (s->cancelled) { - return 0; -@@ -807,7 +820,15 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s) - return -EINVAL; - } - -- if (flags & DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT) { -+ if (s->bmap_inner && -+ s->bmap_inner->has_transform && -+ s->bmap_inner->transform->has_persistent) { -+ persistent = s->bmap_inner->transform->persistent; -+ } else { -+ persistent = flags & DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; -+ } -+ -+ if (persistent) { - bdrv_dirty_bitmap_set_persistence(s->bitmap, true); - } - -@@ -1091,6 +1112,8 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s, - } else { - bitmap_name = bmap_inner->name; - } -+ -+ s->bmap_inner = bmap_inner; - } - - if (!s->cancelled) { -diff --git a/qapi/migration.json b/qapi/migration.json -index 3c75820527..19b796ab47 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -525,6 +525,19 @@ - 'data': [ 'none', 'zlib', - { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] } - -+## -+# @BitmapMigrationBitmapAliasTransform: -+# -+# @persistent: If present, the bitmap will be made persistent -+# or transient depending on this parameter. -+# -+# Since: 6.0 -+## -+{ 'struct': 'BitmapMigrationBitmapAliasTransform', -+ 'data': { -+ '*persistent': 'bool' -+ } } -+ - ## - # @BitmapMigrationBitmapAlias: - # -@@ -533,12 +546,16 @@ - # @alias: An alias name for migration (for example the bitmap name on - # the opposite site). - # -+# @transform: Allows the modification of the migrated bitmap. -+# (since 6.0) -+# - # Since: 5.2 - ## - { 'struct': 'BitmapMigrationBitmapAlias', - 'data': { - 'name': 'str', -- 'alias': 'str' -+ 'alias': 'str', -+ '*transform': 'BitmapMigrationBitmapAliasTransform' - } } - - ## --- -2.27.0 - diff --git a/kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch b/kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch deleted file mode 100644 index 156117f..0000000 --- a/kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch +++ /dev/null @@ -1,143 +0,0 @@ -From e49b317a80df94b769c01c2ae488a369921088d2 Mon Sep 17 00:00:00 2001 -From: Peter Krempa -Date: Mon, 22 Feb 2021 13:35:04 -0500 -Subject: [PATCH 21/54] migration: dirty-bitmap: Use struct for alias map inner - members - -RH-Author: Peter Krempa -Message-id: <943503323f3f97d576715d09736376cf07d6efab.1614000630.git.pkrempa@redhat.com> -Patchwork-id: 101170 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/3] migration: dirty-bitmap: Use struct for alias map inner members -Bugzilla: 1930757 -RH-Acked-by: John Snow -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake - -Currently the alias mapping hash stores just strings of the target -objects internally. In further patches we'll be adding another member -which will need to be stored in the map so pass a copy of the whole -BitmapMigrationBitmapAlias QAPI struct into the map. - -Signed-off-by: Peter Krempa -Message-Id: -Reviewed-by: Eric Blake -[eblake: adjust long lines] -Signed-off-by: Eric Blake -(cherry picked from commit 0d1e450c7b3117ee635a00c81d9a92666ebc7ffa) - -https://bugzilla.redhat.com/show_bug.cgi?id=1930757 -Signed-off-by: Danilo C. L. de Paula ---- - migration/block-dirty-bitmap.c | 33 +++++++++++++++++++++------------ - 1 file changed, 21 insertions(+), 12 deletions(-) - -diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c -index c61d382be8..b39c13ce4e 100644 ---- a/migration/block-dirty-bitmap.c -+++ b/migration/block-dirty-bitmap.c -@@ -75,6 +75,8 @@ - #include "qemu/id.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" -+#include "qapi/qapi-visit-migration.h" -+#include "qapi/clone-visitor.h" - #include "trace.h" - - #define CHUNK_SIZE (1 << 10) -@@ -224,6 +226,7 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, - AliasMapInnerNode *amin; - GHashTable *bitmaps_map; - const char *node_map_from, *node_map_to; -+ GDestroyNotify gdn; - - if (!id_wellformed(bmna->alias)) { - error_setg(errp, "The node alias '%s' is not well-formed", -@@ -263,8 +266,9 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, - node_map_to = bmna->node_name; - } - -- bitmaps_map = g_hash_table_new_full(g_str_hash, g_str_equal, -- g_free, g_free); -+ gdn = (GDestroyNotify) qapi_free_BitmapMigrationBitmapAlias; -+ bitmaps_map = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, -+ gdn); - - amin = g_new(AliasMapInnerNode, 1); - *amin = (AliasMapInnerNode){ -@@ -276,7 +280,7 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, - - for (bmbal = bmna->bitmaps; bmbal; bmbal = bmbal->next) { - const BitmapMigrationBitmapAlias *bmba = bmbal->value; -- const char *bmap_map_from, *bmap_map_to; -+ const char *bmap_map_from; - - if (strlen(bmba->alias) > UINT8_MAX) { - error_setg(errp, -@@ -293,7 +297,6 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, - - if (name_to_alias) { - bmap_map_from = bmba->name; -- bmap_map_to = bmba->alias; - - if (g_hash_table_contains(bitmaps_map, bmba->name)) { - error_setg(errp, "The bitmap '%s'/'%s' is mapped twice", -@@ -302,7 +305,6 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, - } - } else { - bmap_map_from = bmba->alias; -- bmap_map_to = bmba->name; - - if (g_hash_table_contains(bitmaps_map, bmba->alias)) { - error_setg(errp, "The bitmap alias '%s'/'%s' is used twice", -@@ -311,8 +313,8 @@ static GHashTable *construct_alias_map(const BitmapMigrationNodeAliasList *bbm, - } - } - -- g_hash_table_insert(bitmaps_map, -- g_strdup(bmap_map_from), g_strdup(bmap_map_to)); -+ g_hash_table_insert(bitmaps_map, g_strdup(bmap_map_from), -+ QAPI_CLONE(BitmapMigrationBitmapAlias, bmba)); - } - } - -@@ -538,11 +540,15 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, - } - - if (bitmap_aliases) { -- bitmap_alias = g_hash_table_lookup(bitmap_aliases, bitmap_name); -- if (!bitmap_alias) { -+ BitmapMigrationBitmapAlias *bmap_inner; -+ -+ bmap_inner = g_hash_table_lookup(bitmap_aliases, bitmap_name); -+ if (!bmap_inner) { - /* Skip bitmaps with no alias */ - continue; - } -+ -+ bitmap_alias = bmap_inner->alias; - } else { - if (strlen(bitmap_name) > UINT8_MAX) { - error_report("Cannot migrate bitmap '%s' on node '%s': " -@@ -1074,13 +1080,16 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s, - - bitmap_name = s->bitmap_alias; - if (!s->cancelled && bitmap_alias_map) { -- bitmap_name = g_hash_table_lookup(bitmap_alias_map, -- s->bitmap_alias); -- if (!bitmap_name) { -+ BitmapMigrationBitmapAlias *bmap_inner; -+ -+ bmap_inner = g_hash_table_lookup(bitmap_alias_map, s->bitmap_alias); -+ if (!bmap_inner) { - error_report("Error: Unknown bitmap alias '%s' on node " - "'%s' (alias '%s')", s->bitmap_alias, - s->bs->node_name, s->node_alias); - cancel_incoming_locked(s); -+ } else { -+ bitmap_name = bmap_inner->name; - } - } - --- -2.27.0 - diff --git a/kvm-nbd-make-nbd_read-return-EIO-on-error.patch b/kvm-nbd-make-nbd_read-return-EIO-on-error.patch deleted file mode 100644 index 9dacfa9..0000000 --- a/kvm-nbd-make-nbd_read-return-EIO-on-error.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 7b7974468656d2ceba6a7f6dba2b35dfe28a5d1f Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Mon, 8 Feb 2021 22:57:01 -0300 -Subject: [PATCH 03/54] nbd: make nbd_read* return -EIO on error - -RH-Author: Eric Blake -Message-id: <20210208225701.110110-4-eblake@redhat.com> -Patchwork-id: 101007 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v4 3/3] nbd: make nbd_read* return -EIO on error -Bugzilla: 1887883 -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Max Reitz - -From: Roman Kagan - -NBD reconnect logic considers the error code from the functions that -read NBD messages to tell if reconnect should be attempted or not: it is -attempted on -EIO, otherwise the client transitions to NBD_CLIENT_QUIT -state (see nbd_channel_error). This error code is propagated from the -primitives like nbd_read. - -The problem, however, is that nbd_read itself turns every error into -1 -rather than -EIO. As a result, if the NBD server happens to die while -sending the message, the client in QEMU receives less data than it -expects, considers it as a fatal error, and wouldn't attempt -reestablishing the connection. - -Fix it by turning every negative return from qio_channel_read_all into --EIO returned from nbd_read. Apparently that was the original behavior, -but got broken later. Also adjust nbd_readXX to follow. - -Fixes: e6798f06a6 ("nbd: generalize usage of nbd_read") -Signed-off-by: Roman Kagan -Reviewed-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20210129073859.683063-4-rvkagan@yandex-team.ru> -Signed-off-by: Eric Blake -(cherry picked from commit 5082fc82a6bc3fc06a04be47d39777c7cff61e5b) -Signed-off-by: Eric Blake -Signed-off-by: Eduardo Lima (Etrunko) ---- - include/block/nbd.h | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/include/block/nbd.h b/include/block/nbd.h -index 4a52a43ef5..5f34d23bb0 100644 ---- a/include/block/nbd.h -+++ b/include/block/nbd.h -@@ -364,7 +364,7 @@ static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size, - if (desc) { - error_prepend(errp, "Failed to read %s: ", desc); - } -- return -1; -+ return ret; - } - - return 0; -@@ -375,8 +375,9 @@ static inline int nbd_read##bits(QIOChannel *ioc, \ - uint##bits##_t *val, \ - const char *desc, Error **errp) \ - { \ -- if (nbd_read(ioc, val, sizeof(*val), desc, errp) < 0) { \ -- return -1; \ -+ int ret = nbd_read(ioc, val, sizeof(*val), desc, errp); \ -+ if (ret < 0) { \ -+ return ret; \ - } \ - *val = be##bits##_to_cpu(*val); \ - return 0; \ --- -2.27.0 - diff --git a/kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch b/kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch deleted file mode 100644 index d0080d2..0000000 --- a/kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch +++ /dev/null @@ -1,249 +0,0 @@ -From 7cadf68c46abcd097fcbcecb11a4a04f264d0316 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Thu, 11 Feb 2021 14:42:05 -0300 -Subject: [PATCH 3/6] nbd/server: Quiesce coroutines on context switch - -RH-Author: Sergio Lopez Pascual -Message-id: <20210211144208.58930-3-slp@redhat.com> -Patchwork-id: 101051 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/5] nbd/server: Quiesce coroutines on context switch -Bugzilla: 1918966 1918968 -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake - -When switching between AIO contexts we need to me make sure that both -recv_coroutine and send_coroutine are not scheduled to run. Otherwise, -QEMU may crash while attaching the new context with an error like -this one: - -aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule' - -To achieve this we need a local implementation of -'qio_channel_readv_all_eof' named 'nbd_read_eof' (a trick already done -by 'nbd/client.c') that allows us to interrupt the operation and to -know when recv_coroutine is yielding. - -With this in place, we delegate detaching the AIO context to the -owning context with a BH ('nbd_aio_detach_bh') scheduled using -'aio_wait_bh_oneshot'. This BH signals that we need to quiesce the -channel by setting 'client->quiescing' to 'true', and either waits for -the coroutine to finish using AIO_WAIT_WHILE or, if it's yielding in -'nbd_read_eof', actively enters the coroutine to interrupt it. - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1900326 -Signed-off-by: Sergio Lopez -Reviewed-by: Eric Blake -Message-Id: <20201214170519.223781-4-slp@redhat.com> -Signed-off-by: Eric Blake -(cherry picked from commit f148ae7d36cbb924447f4b528a94d7799836c749) -Signed-off-by: Sergio Lopez -Signed-off-by: Eduardo Lima (Etrunko) ---- - nbd/server.c | 120 +++++++++++++++++++++++++++++++++++++++++++++------ - 1 file changed, 106 insertions(+), 14 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index 613ed2634a..7229f487d2 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -132,6 +132,9 @@ struct NBDClient { - CoMutex send_lock; - Coroutine *send_coroutine; - -+ bool read_yielding; -+ bool quiescing; -+ - QTAILQ_ENTRY(NBDClient) next; - int nb_requests; - bool closing; -@@ -1352,14 +1355,60 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) - return 0; - } - --static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request, -+/* nbd_read_eof -+ * Tries to read @size bytes from @ioc. This is a local implementation of -+ * qio_channel_readv_all_eof. We have it here because we need it to be -+ * interruptible and to know when the coroutine is yielding. -+ * Returns 1 on success -+ * 0 on eof, when no data was read (errp is not set) -+ * negative errno on failure (errp is set) -+ */ -+static inline int coroutine_fn -+nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp) -+{ -+ bool partial = false; -+ -+ assert(size); -+ while (size > 0) { -+ struct iovec iov = { .iov_base = buffer, .iov_len = size }; -+ ssize_t len; -+ -+ len = qio_channel_readv(client->ioc, &iov, 1, errp); -+ if (len == QIO_CHANNEL_ERR_BLOCK) { -+ client->read_yielding = true; -+ qio_channel_yield(client->ioc, G_IO_IN); -+ client->read_yielding = false; -+ if (client->quiescing) { -+ return -EAGAIN; -+ } -+ continue; -+ } else if (len < 0) { -+ return -EIO; -+ } else if (len == 0) { -+ if (partial) { -+ error_setg(errp, -+ "Unexpected end-of-file before all bytes were read"); -+ return -EIO; -+ } else { -+ return 0; -+ } -+ } -+ -+ partial = true; -+ size -= len; -+ buffer = (uint8_t *) buffer + len; -+ } -+ return 1; -+} -+ -+static int nbd_receive_request(NBDClient *client, NBDRequest *request, - Error **errp) - { - uint8_t buf[NBD_REQUEST_SIZE]; - uint32_t magic; - int ret; - -- ret = nbd_read(ioc, buf, sizeof(buf), "request", errp); -+ ret = nbd_read_eof(client, buf, sizeof(buf), errp); - if (ret < 0) { - return ret; - } -@@ -1480,11 +1529,37 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) - - QTAILQ_FOREACH(client, &exp->clients, next) { - qio_channel_attach_aio_context(client->ioc, ctx); -+ -+ assert(client->recv_coroutine == NULL); -+ assert(client->send_coroutine == NULL); -+ -+ if (client->quiescing) { -+ client->quiescing = false; -+ nbd_client_receive_next_request(client); -+ } -+ } -+} -+ -+static void nbd_aio_detach_bh(void *opaque) -+{ -+ NBDExport *exp = opaque; -+ NBDClient *client; -+ -+ QTAILQ_FOREACH(client, &exp->clients, next) { -+ qio_channel_detach_aio_context(client->ioc); -+ client->quiescing = true; -+ - if (client->recv_coroutine) { -- aio_co_schedule(ctx, client->recv_coroutine); -+ if (client->read_yielding) { -+ qemu_aio_coroutine_enter(exp->common.ctx, -+ client->recv_coroutine); -+ } else { -+ AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL); -+ } - } -+ - if (client->send_coroutine) { -- aio_co_schedule(ctx, client->send_coroutine); -+ AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL); - } - } - } -@@ -1492,13 +1567,10 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) - static void blk_aio_detach(void *opaque) - { - NBDExport *exp = opaque; -- NBDClient *client; - - trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); - -- QTAILQ_FOREACH(client, &exp->clients, next) { -- qio_channel_detach_aio_context(client->ioc); -- } -+ aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp); - - exp->common.ctx = NULL; - } -@@ -2151,20 +2223,23 @@ static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle, - - /* nbd_co_receive_request - * Collect a client request. Return 0 if request looks valid, -EIO to drop -- * connection right away, and any other negative value to report an error to -- * the client (although the caller may still need to disconnect after reporting -- * the error). -+ * connection right away, -EAGAIN to indicate we were interrupted and the -+ * channel should be quiesced, and any other negative value to report an error -+ * to the client (although the caller may still need to disconnect after -+ * reporting the error). - */ - static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, - Error **errp) - { - NBDClient *client = req->client; - int valid_flags; -+ int ret; - - g_assert(qemu_in_coroutine()); - assert(client->recv_coroutine == qemu_coroutine_self()); -- if (nbd_receive_request(client->ioc, request, errp) < 0) { -- return -EIO; -+ ret = nbd_receive_request(client, request, errp); -+ if (ret < 0) { -+ return ret; - } - - trace_nbd_co_receive_request_decode_type(request->handle, request->type, -@@ -2507,6 +2582,17 @@ static coroutine_fn void nbd_trip(void *opaque) - return; - } - -+ if (client->quiescing) { -+ /* -+ * We're switching between AIO contexts. Don't attempt to receive a new -+ * request and kick the main context which may be waiting for us. -+ */ -+ nbd_client_put(client); -+ client->recv_coroutine = NULL; -+ aio_wait_kick(); -+ return; -+ } -+ - req = nbd_request_get(client); - ret = nbd_co_receive_request(req, &request, &local_err); - client->recv_coroutine = NULL; -@@ -2519,6 +2605,11 @@ static coroutine_fn void nbd_trip(void *opaque) - goto done; - } - -+ if (ret == -EAGAIN) { -+ assert(client->quiescing); -+ goto done; -+ } -+ - nbd_client_receive_next_request(client); - if (ret == -EIO) { - goto disconnect; -@@ -2565,7 +2656,8 @@ disconnect: - - static void nbd_client_receive_next_request(NBDClient *client) - { -- if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) { -+ if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && -+ !client->quiescing) { - nbd_client_get(client); - client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); - aio_co_schedule(client->exp->common.ctx, client->recv_coroutine); --- -2.27.0 - diff --git a/kvm-pci-add-romsize-property.patch b/kvm-pci-add-romsize-property.patch deleted file mode 100644 index 961073f..0000000 --- a/kvm-pci-add-romsize-property.patch +++ /dev/null @@ -1,137 +0,0 @@ -From aee681700e512679981e39928d8709eb226a4a6d Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 10 Feb 2021 17:04:45 -0300 -Subject: [PATCH 10/54] pci: add romsize property - -RH-Author: Peter Xu -Message-id: <20210210170445.128304-3-peterx@redhat.com> -Patchwork-id: 101041 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] pci: add romsize property -Bugzilla: 1917830 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Paolo Bonzini - -From: Paolo Bonzini - -This property can be useful for distros to set up known-good ROM sizes for -migration purposes. The VM will fail to start if the ROM is too large, -and migration compatibility will not be broken if the ROM is too small. - -Note that even though romsize is a uint32_t, it has to be between 1 -(because empty ROM files are not accepted, and romsize must be greater -than the file) and 2^31 (because values above are not powers of two and -are rejected). - -Signed-off-by: Paolo Bonzini -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Peter Xu -Message-Id: <20201218182736.1634344-1-pbonzini@redhat.com> -Signed-off-by: Paolo Bonzini -Message-Id: <20210203131828.156467-3-pbonzini@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: David Edmondson -Acked-by: Laszlo Ersek -(cherry picked from commit 08b1df8ff463e72b0875538fb991d5393047606c) -Signed-off-by: Peter Xu -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/pci/pci.c | 19 +++++++++++++++++-- - hw/xen/xen_pt_load_rom.c | 14 ++++++++++++-- - include/hw/pci/pci.h | 1 + - 3 files changed, 30 insertions(+), 4 deletions(-) - -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index e4caad33c5..9619b8d068 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -67,6 +67,7 @@ static void pcibus_reset(BusState *qbus); - static Property pci_props[] = { - DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), - DEFINE_PROP_STRING("romfile", PCIDevice, romfile), -+ DEFINE_PROP_UINT32("romsize", PCIDevice, romsize, -1), - DEFINE_PROP_UINT32("rombar", PCIDevice, rom_bar, 1), - DEFINE_PROP_BIT("multifunction", PCIDevice, cap_present, - QEMU_PCI_CAP_MULTIFUNCTION_BITNR, false), -@@ -2106,6 +2107,11 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) - bool is_default_rom; - uint16_t class_id; - -+ if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) { -+ error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize); -+ return; -+ } -+ - /* initialize cap_present for pci_is_express() and pci_config_size(), - * Note that hybrid PCIs are not set automatically and need to manage - * QEMU_PCI_CAP_EXPRESS manually */ -@@ -2371,7 +2377,16 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, - g_free(path); - return; - } -- size = pow2ceil(size); -+ if (pdev->romsize != -1) { -+ if (size > pdev->romsize) { -+ error_setg(errp, "romfile \"%s\" (%u bytes) is too large for ROM size %u", -+ pdev->romfile, (uint32_t)size, pdev->romsize); -+ g_free(path); -+ return; -+ } -+ } else { -+ pdev->romsize = pow2ceil(size); -+ } - - vmsd = qdev_get_vmsd(DEVICE(pdev)); - -@@ -2381,7 +2396,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, - snprintf(name, sizeof(name), "%s.rom", object_get_typename(OBJECT(pdev))); - } - pdev->has_rom = true; -- memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, size, &error_fatal); -+ memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, &error_fatal); - ptr = memory_region_get_ram_ptr(&pdev->rom); - if (load_image_size(path, ptr, size) < 0) { - error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); -diff --git a/hw/xen/xen_pt_load_rom.c b/hw/xen/xen_pt_load_rom.c -index a50a80837e..03422a8a71 100644 ---- a/hw/xen/xen_pt_load_rom.c -+++ b/hw/xen/xen_pt_load_rom.c -@@ -53,10 +53,20 @@ void *pci_assign_dev_load_option_rom(PCIDevice *dev, - } - fseek(fp, 0, SEEK_SET); - -+ if (dev->romsize != -1) { -+ if (st.st_size > dev->romsize) { -+ error_report("ROM BAR \"%s\" (%ld bytes) is too large for ROM size %u", -+ rom_file, (long) st.st_size, dev->romsize); -+ goto close_rom; -+ } -+ } else { -+ dev->romsize = st.st_size; -+ } -+ - snprintf(name, sizeof(name), "%s.rom", object_get_typename(owner)); -- memory_region_init_ram(&dev->rom, owner, name, st.st_size, &error_abort); -+ memory_region_init_ram(&dev->rom, owner, name, dev->romsize, &error_abort); - ptr = memory_region_get_ram_ptr(&dev->rom); -- memset(ptr, 0xff, st.st_size); -+ memset(ptr, 0xff, dev->romsize); - - if (!fread(ptr, 1, st.st_size, fp)) { - error_report("pci-assign: Cannot read from host %s", rom_file); -diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h -index 72ce649eee..75a6b15757 100644 ---- a/include/hw/pci/pci.h -+++ b/include/hw/pci/pci.h -@@ -343,6 +343,7 @@ struct PCIDevice { - - /* Location of option rom */ - char *romfile; -+ uint32_t romsize; - bool has_rom; - MemoryRegion rom; - uint32_t rom_bar; --- -2.27.0 - diff --git a/kvm-pci-reject-too-large-ROMs.patch b/kvm-pci-reject-too-large-ROMs.patch deleted file mode 100644 index 739b908..0000000 --- a/kvm-pci-reject-too-large-ROMs.patch +++ /dev/null @@ -1,89 +0,0 @@ -From a6e34aa76d86319d15355fd55fa6d12eb49a816f Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 10 Feb 2021 17:04:44 -0300 -Subject: [PATCH 09/54] pci: reject too large ROMs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -Message-id: <20210210170445.128304-2-peterx@redhat.com> -Patchwork-id: 101039 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] pci: reject too large ROMs -Bugzilla: 1917830 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Paolo Bonzini - -From: Paolo Bonzini - -get_image_size() returns an int64_t, which pci_add_option_rom() assigns -to an "int" without any range checking. A 32-bit BAR could be up to -2 GiB in size, so reject anything above it. In order to accomodate -a rounded-up size of 2 GiB, change pci_patch_ids's size argument -to unsigned. - -Conflicts: - hw/pci/pci.c: missing 2c65db5e58d ("vl: extract softmmu/datadir.c") so - there's no "#include " yet - -Reviewed-by: Peter Xu -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Laszlo Ersek -Signed-off-by: Paolo Bonzini -Message-Id: <20210203131828.156467-2-pbonzini@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: David Edmondson -(cherry picked from commit 7c16b5bbb6c0f797945327d17e4be60f25a4427d) -Signed-off-by: Peter Xu -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/pci/pci.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index 0131d9d02c..e4caad33c5 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -24,6 +24,7 @@ - - #include "qemu/osdep.h" - #include "qemu-common.h" -+#include "qemu/units.h" - #include "hw/irq.h" - #include "hw/pci/pci.h" - #include "hw/pci/pci_bridge.h" -@@ -2256,7 +2257,7 @@ static uint8_t pci_find_capability_at_offset(PCIDevice *pdev, uint8_t offset) - - /* Patch the PCI vendor and device ids in a PCI rom image if necessary. - This is needed for an option rom which is used for more than one device. */ --static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, int size) -+static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) - { - uint16_t vendor_id; - uint16_t device_id; -@@ -2314,7 +2315,7 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, int size) - static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, - Error **errp) - { -- int size; -+ int64_t size; - char *path; - void *ptr; - char name[32]; -@@ -2364,6 +2365,11 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, - error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); - g_free(path); - return; -+ } else if (size > 2 * GiB) { -+ error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)", -+ pdev->romfile); -+ g_free(path); -+ return; - } - size = pow2ceil(size); - --- -2.27.0 - diff --git a/kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch b/kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch deleted file mode 100644 index 6ffcc2c..0000000 --- a/kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch +++ /dev/null @@ -1,146 +0,0 @@ -From 20eb8dc4f6679e3325e1f1f434b17e2dc6a60eee Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 21:42:47 -0500 -Subject: [PATCH 20/54] pcie: don't set link state active if the slot is empty - -RH-Author: Laurent Vivier -Message-id: <20210225214247.1336554-1-lvivier@redhat.com> -Patchwork-id: 101211 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] pcie: don't set link state active if the slot is empty -Bugzilla: 1917654 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Alex Williamson - -BZ: https://bugzilla.redhat.com/1917654 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=35163495 - -When the pcie slot is initialized, by default PCI_EXP_LNKSTA_DLLLA -(Data Link Layer Link Active) is set in PCI_EXP_LNKSTA -(Link Status) without checking if the slot is empty or not. - -This is confusing for the kernel because as it sees the link is up -it tries to read the vendor ID and fails: - -(From https://bugzilla.kernel.org/show_bug.cgi?id=211691) - -[ 1.661105] pcieport 0000:00:02.2: pciehp: Slot Capabilities : 0x0002007b -[ 1.661115] pcieport 0000:00:02.2: pciehp: Slot Status : 0x0010 -[ 1.661123] pcieport 0000:00:02.2: pciehp: Slot Control : 0x07c0 -[ 1.661138] pcieport 0000:00:02.2: pciehp: Slot #0 AttnBtn+ PwrCtrl+ MRL- AttnInd+ PwrInd+ HotPlug+ Surprise+ Interlock+ NoCompl- IbPresDis- LLActRep+ -[ 1.662581] pcieport 0000:00:02.2: pciehp: pciehp_get_power_status: SLOTCTRL 6c value read 7c0 -[ 1.662597] pcieport 0000:00:02.2: pciehp: pciehp_check_link_active: lnk_status = 2204 -[ 1.662703] pcieport 0000:00:02.2: pciehp: pending interrupts 0x0010 from Slot Status -[ 1.662706] pcieport 0000:00:02.2: pciehp: pcie_enable_notification: SLOTCTRL 6c write cmd 1031 -[ 1.662730] pcieport 0000:00:02.2: pciehp: pciehp_check_link_active: lnk_status = 2204 -[ 1.662748] pcieport 0000:00:02.2: pciehp: pciehp_check_link_active: lnk_status = 2204 -[ 1.662750] pcieport 0000:00:02.2: pciehp: Slot(0-2): Link Up -[ 2.896132] pcieport 0000:00:02.2: pciehp: pciehp_check_link_status: lnk_status = 2204 -[ 2.896135] pcieport 0000:00:02.2: pciehp: Slot(0-2): No device found -[ 2.896900] pcieport 0000:00:02.2: pciehp: pending interrupts 0x0010 from Slot Status -[ 2.896903] pcieport 0000:00:02.2: pciehp: pciehp_power_off_slot: SLOTCTRL 6c write cmd 400 -[ 3.656901] pcieport 0000:00:02.2: pciehp: pending interrupts 0x0009 from Slot Status - -This is really a problem with virtio-net failover that hotplugs a VFIO -card during the boot process. The kernel can shutdown the slot while -QEMU is hotplugging it, and this likely ends by an automatic unplug of -the card. At the end of the boot sequence the card has disappeared. - -To fix that, don't set the "Link Active" state in the init function, but -rely on the plug function to do it, as the mechanism has already been -introduced by 2f2b18f60bf1. - -Fixes: 2f2b18f60bf1 ("pcie: set link state inactive/active after hot unplug/plug") -Cc: zhengxiang9@huawei.com -Fixes: 3d67447fe7c2 ("pcie: Fill PCIESlot link fields to support higher speeds and widths") -Cc: alex.williamson@redhat.com -Fixes: b2101eae63ea ("pcie: Set the "link active" in the link status register") -Cc: benh@kernel.crashing.org -Signed-off-by: Laurent Vivier -Message-Id: <20210212135250.2738750-5-lvivier@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit df72184ec15829053b3bb5a0d5801773b6d9ec25) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/pci/pcie.c | 19 +++++++++---------- - 1 file changed, 9 insertions(+), 10 deletions(-) - -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index d4010cf8f3..a733e2fb87 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -75,11 +75,6 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) - QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1) | - QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT)); - -- if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { -- pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, -- PCI_EXP_LNKSTA_DLLLA); -- } -- - /* We changed link status bits over time, and changing them across - * migrations is generally fine as hardware changes them too. - * Let's not bother checking. -@@ -125,8 +120,7 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) - */ - pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, - PCI_EXP_LNKCAP_DLLLARC); -- pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, -- PCI_EXP_LNKSTA_DLLLA); -+ /* the PCI_EXP_LNKSTA_DLLLA will be set in the hotplug function */ - - /* - * Target Link Speed defaults to the highest link speed supported by -@@ -427,6 +421,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); - uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; - PCIDevice *pci_dev = PCI_DEVICE(dev); -+ uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); - - /* Don't send event when device is enabled during qemu machine creation: - * it is present on boot, no hotplug event is necessary. We do send an -@@ -434,7 +429,8 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - if (!dev->hotplugged) { - pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); -- if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { -+ if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || -+ (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { - pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); - } -@@ -448,7 +444,8 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - if (pci_get_function_0(pci_dev)) { - pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); -- if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { -+ if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || -+ (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { - pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); - } -@@ -640,6 +637,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, - uint32_t pos = dev->exp.exp_cap; - uint8_t *exp_cap = dev->config + pos; - uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); -+ uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); - - if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { - /* -@@ -695,7 +693,8 @@ void pcie_cap_slot_write_config(PCIDevice *dev, - - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); -- if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { -+ if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || -+ (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); - } --- -2.27.0 - diff --git a/kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch b/kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch deleted file mode 100644 index ac1341e..0000000 --- a/kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 8fa6654712c7cba73fd1c8d93b094d90c1757000 Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Tue, 26 Jan 2021 23:46:44 -0500 -Subject: [PATCH 4/5] q35: Increase max_cpus to 710 on pc-q35-rhel8* machine - types - -RH-Author: Eduardo Habkost -Message-id: <20210126234644.3091529-1-ehabkost@redhat.com> -Patchwork-id: 100791 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] q35: Increase max_cpus to 710 on pc-q35-rhel8* machine types -Bugzilla: 1904268 -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1904268 -Upstream: not applicable -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34536802 - -The original goal was to support 1024 VCPUs, but 710 VCPUs is the -maximum number we can reach before hitting SMBIOS table size -limits. - -Signed-off-by: Eduardo Habkost -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/i386/pc_q35.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 5acb47afcf..72854192a9 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -589,7 +589,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); - machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); - m->alias = "q35"; -- m->max_cpus = 512; -+ m->max_cpus = 710; - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - --- -2.18.4 - diff --git a/kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch b/kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch deleted file mode 100644 index 0cf96d6..0000000 --- a/kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch +++ /dev/null @@ -1,154 +0,0 @@ -From b76dbfedc47366039a08f68de82792b9c70a6be9 Mon Sep 17 00:00:00 2001 -From: Peter Krempa -Date: Mon, 22 Feb 2021 13:35:06 -0500 -Subject: [PATCH 23/54] qemu-iotests: 300: Add test case for modifying - persistence of bitmap - -RH-Author: Peter Krempa -Message-id: -Patchwork-id: 101172 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/3] qemu-iotests: 300: Add test case for modifying persistence of bitmap -Bugzilla: 1930757 -RH-Acked-by: John Snow -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake - -Verify that the modification of the bitmap persistence over migration -which is controlled via BitmapMigrationBitmapAliasTransform works -properly. - -Based on TestCrossAliasMigration - -Signed-off-by: Peter Krempa -Message-Id: -Reviewed-by: Eric Blake -[eblake: Adjust test for explicit read_zeroes=False] -Signed-off-by: Eric Blake -(cherry picked from commit ca4bfec41d56a1154da89b105048b3462361d0f0) - -https://bugzilla.redhat.com/show_bug.cgi?id=1930757 -Signed-off-by: Danilo C. L. de Paula ---- - tests/qemu-iotests/300 | 93 ++++++++++++++++++++++++++++++++++++++ - tests/qemu-iotests/300.out | 4 +- - 2 files changed, 95 insertions(+), 2 deletions(-) - -diff --git a/tests/qemu-iotests/300 b/tests/qemu-iotests/300 -index 5b75121b84..b25d8b04c0 100755 ---- a/tests/qemu-iotests/300 -+++ b/tests/qemu-iotests/300 -@@ -588,6 +588,99 @@ class TestCrossAliasMigration(TestDirtyBitmapMigration): - self.verify_dest_has_all_bitmaps() - self.verify_dest_error(None) - -+class TestAliasTransformMigration(TestDirtyBitmapMigration): -+ """ -+ Tests the 'transform' option which modifies bitmap persistence on migration. -+ """ -+ -+ src_node_name = 'node-a' -+ dst_node_name = 'node-b' -+ src_bmap_name = 'bmap-a' -+ dst_bmap_name = 'bmap-b' -+ -+ def setUp(self) -> None: -+ TestDirtyBitmapMigration.setUp(self) -+ -+ # Now create another block device and let both have two bitmaps each -+ result = self.vm_a.qmp('blockdev-add', -+ node_name='node-b', driver='null-co', -+ read_zeroes=False) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.vm_b.qmp('blockdev-add', -+ node_name='node-a', driver='null-co', -+ read_zeroes=False) -+ self.assert_qmp(result, 'return', {}) -+ -+ bmaps_to_add = (('node-a', 'bmap-b'), -+ ('node-b', 'bmap-a'), -+ ('node-b', 'bmap-b')) -+ -+ for (node, bmap) in bmaps_to_add: -+ result = self.vm_a.qmp('block-dirty-bitmap-add', -+ node=node, name=bmap) -+ self.assert_qmp(result, 'return', {}) -+ -+ @staticmethod -+ def transform_mapping() -> BlockBitmapMapping: -+ return [ -+ { -+ 'node-name': 'node-a', -+ 'alias': 'node-a', -+ 'bitmaps': [ -+ { -+ 'name': 'bmap-a', -+ 'alias': 'bmap-a', -+ 'transform': -+ { -+ 'persistent': True -+ } -+ }, -+ { -+ 'name': 'bmap-b', -+ 'alias': 'bmap-b' -+ } -+ ] -+ }, -+ { -+ 'node-name': 'node-b', -+ 'alias': 'node-b', -+ 'bitmaps': [ -+ { -+ 'name': 'bmap-a', -+ 'alias': 'bmap-a' -+ }, -+ { -+ 'name': 'bmap-b', -+ 'alias': 'bmap-b' -+ } -+ ] -+ } -+ ] -+ -+ def verify_dest_bitmap_state(self) -> None: -+ bitmaps = self.vm_b.query_bitmaps() -+ -+ for node in bitmaps: -+ bitmaps[node] = sorted(((bmap['name'], bmap['persistent']) for bmap in bitmaps[node])) -+ -+ self.assertEqual(bitmaps, -+ {'node-a': [('bmap-a', True), ('bmap-b', False)], -+ 'node-b': [('bmap-a', False), ('bmap-b', False)]}) -+ -+ def test_transform_on_src(self) -> None: -+ self.set_mapping(self.vm_a, self.transform_mapping()) -+ -+ self.migrate() -+ self.verify_dest_bitmap_state() -+ self.verify_dest_error(None) -+ -+ def test_transform_on_dst(self) -> None: -+ self.set_mapping(self.vm_b, self.transform_mapping()) -+ -+ self.migrate() -+ self.verify_dest_bitmap_state() -+ self.verify_dest_error(None) - - if __name__ == '__main__': - iotests.main(supported_protocols=['file']) -diff --git a/tests/qemu-iotests/300.out b/tests/qemu-iotests/300.out -index cafb8161f7..12e9ab7d57 100644 ---- a/tests/qemu-iotests/300.out -+++ b/tests/qemu-iotests/300.out -@@ -1,5 +1,5 @@ --..................................... -+....................................... - ---------------------------------------------------------------------- --Ran 37 tests -+Ran 39 tests - - OK --- -2.27.0 - diff --git a/kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch b/kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch deleted file mode 100644 index 573aeaf..0000000 --- a/kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 1107799dade18adccfca5097341b6dfb4977e69e Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Mon, 22 Feb 2021 21:34:55 -0500 -Subject: [PATCH 19/54] qemu-nbd: Use SOMAXCONN for socket listen() backlog -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Blake -Message-id: <20210222213455.320104-2-eblake@redhat.com> -Patchwork-id: 101192 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] qemu-nbd: Use SOMAXCONN for socket listen() backlog -Bugzilla: 1925345 -RH-Acked-by: Richard Jones -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Daniel P. Berrange - -Our default of a backlog of 1 connection is rather puny; it gets in -the way when we are explicitly allowing multiple clients (such as -qemu-nbd -e N [--shared], or nbd-server-start with its default -"max-connections":0 for unlimited), but is even a problem when we -stick to qemu-nbd's default of only 1 active client but use -t -[--persistent] where a second client can start using the server once -the first finishes. While the effects are less noticeable on TCP -sockets (since the client can poll() to learn when the server is ready -again), it is definitely observable on Unix sockets, where on Linux, a -client will fail with EAGAIN and no recourse but to sleep an arbitrary -amount of time before retrying if the server backlog is already full. - -Since QMP nbd-server-start is always persistent, it now always -requests a backlog of SOMAXCONN; meanwhile, qemu-nbd will request -SOMAXCONN if persistent, otherwise its backlog should be based on the -expected number of clients. - -See https://bugzilla.redhat.com/1925045 for a demonstration of where -our low backlog prevents libnbd from connecting as many parallel -clients as it wants. - -Reported-by: Richard W.M. Jones -Signed-off-by: Eric Blake -CC: qemu-stable@nongnu.org -Message-Id: <20210209152759.209074-2-eblake@redhat.com> -Tested-by: Richard W.M. Jones -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Eric Blake -(cherry picked from commit 582d4210eb2f2ab5baac328fe4b479cd86da1647) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula ---- - blockdev-nbd.c | 7 ++++++- - qemu-nbd.c | 10 +++++++++- - 2 files changed, 15 insertions(+), 2 deletions(-) - -diff --git a/blockdev-nbd.c b/blockdev-nbd.c -index d8443d235b..b264620b98 100644 ---- a/blockdev-nbd.c -+++ b/blockdev-nbd.c -@@ -134,7 +134,12 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, - qio_net_listener_set_name(nbd_server->listener, - "nbd-listener"); - -- if (qio_net_listener_open_sync(nbd_server->listener, addr, 1, errp) < 0) { -+ /* -+ * Because this server is persistent, a backlog of SOMAXCONN is -+ * better than trying to size it to max_connections. -+ */ -+ if (qio_net_listener_open_sync(nbd_server->listener, addr, SOMAXCONN, -+ errp) < 0) { - goto error; - } - -diff --git a/qemu-nbd.c b/qemu-nbd.c -index 1d337b7504..ce1dc43d69 100644 ---- a/qemu-nbd.c -+++ b/qemu-nbd.c -@@ -970,8 +970,16 @@ int main(int argc, char **argv) - - server = qio_net_listener_new(); - if (socket_activation == 0) { -+ int backlog; -+ -+ if (persistent) { -+ backlog = SOMAXCONN; -+ } else { -+ backlog = MIN(shared, SOMAXCONN); -+ } - saddr = nbd_build_socket_address(sockpath, bindto, port); -- if (qio_net_listener_open_sync(server, saddr, 1, &local_err) < 0) { -+ if (qio_net_listener_open_sync(server, saddr, backlog, -+ &local_err) < 0) { - object_unref(OBJECT(server)); - error_report_err(local_err); - exit(EXIT_FAILURE); --- -2.27.0 - diff --git a/kvm-qemu-storage-daemon-Enable-object-add.patch b/kvm-qemu-storage-daemon-Enable-object-add.patch deleted file mode 100644 index 8f48b5a..0000000 --- a/kvm-qemu-storage-daemon-Enable-object-add.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 6707057bc09cef526579bddb54ef7d4c3a7883ad Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 16 Feb 2021 16:19:42 -0500 -Subject: [PATCH 16/54] qemu-storage-daemon: Enable object-add -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -Message-id: <20210216161943.126728-5-kwolf@redhat.com> -Patchwork-id: 101103 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/5] qemu-storage-daemon: Enable object-add -Bugzilla: 1901323 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -As we don't have a fully QAPIfied version of object-add yet and it still -has 'gen': false in the schema, it needs to be registered explicitly in -init_qmp_commands() to be available for users. - -Fixes: 2af282ec51a27116d0402cab237b8970800f870c -Signed-off-by: Kevin Wolf -Message-Id: <20210204072137.19663-1-kwolf@redhat.com> -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Kevin Wolf -(cherry picked from commit 15d40e9204eb3d89577187f117a1dde2237bdc4d) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - storage-daemon/qemu-storage-daemon.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c -index d8d172cc60..9021a46b3a 100644 ---- a/storage-daemon/qemu-storage-daemon.c -+++ b/storage-daemon/qemu-storage-daemon.c -@@ -144,6 +144,8 @@ static void init_qmp_commands(void) - qmp_init_marshal(&qmp_commands); - qmp_register_command(&qmp_commands, "query-qmp-schema", - qmp_query_qmp_schema, QCO_ALLOW_PRECONFIG); -+ qmp_register_command(&qmp_commands, "object-add", qmp_object_add, -+ QCO_NO_OPTIONS); - - QTAILQ_INIT(&qmp_cap_negotiation_commands); - qmp_register_command(&qmp_cap_negotiation_commands, "qmp_capabilities", --- -2.27.0 - diff --git a/kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch b/kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch deleted file mode 100644 index 52532b4..0000000 --- a/kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 51c6fc79d712c73bfeec2e4ff6779da3cab649fd Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 1 Mar 2021 08:39:20 -0500 -Subject: [PATCH 2/4] qxl: also notify the rendering is done when skipping it -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210301083920.895324-3-marcandre.lureau@redhat.com> -Patchwork-id: 101275 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] qxl: also notify the rendering is done when skipping it -Bugzilla: 1932190 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Sergio Lopez Pascual - -From: Marc-André Lureau - -Asynchronous handlers may be waiting for the graphic_hw_update_done() to -be called in this case too. - -Fixes: 4d6316218 ("console: add graphic_hw_update_done()") -Signed-off-by: Marc-André Lureau -Message-Id: <20210201201422.446552-3-marcandre.lureau@redhat.com> -Signed-off-by: Gerd Hoffmann - -(cherry picked from commit b577ab2dda3afc7d6a7befabcf226507ff06c17c) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - hw/display/qxl-render.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c -index 3ce2e57b8f..d28849b121 100644 ---- a/hw/display/qxl-render.c -+++ b/hw/display/qxl-render.c -@@ -181,6 +181,7 @@ void qxl_render_update(PCIQXLDevice *qxl) - qxl->mode == QXL_MODE_UNDEFINED) { - qxl_render_update_area_unlocked(qxl); - qemu_mutex_unlock(&qxl->ssd.lock); -+ graphic_hw_update_done(qxl->ssd.dcl.con); - return; - } - --- -2.27.0 - diff --git a/kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch b/kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch deleted file mode 100644 index 55ea413..0000000 --- a/kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 94dc0414a7d5dadbbfc29a19617df7facb0ea7d6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 1 Mar 2021 08:39:19 -0500 -Subject: [PATCH 1/4] qxl: set qxl.ssd.dcl.con on secondary devices -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -Message-id: <20210301083920.895324-2-marcandre.lureau@redhat.com> -Patchwork-id: 101274 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] qxl: set qxl.ssd.dcl.con on secondary devices -Bugzilla: 1932190 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Sergio Lopez Pascual - -From: Marc-André Lureau - -On secondary QXL devices, the console is only set on qxl.vga.con. But -graphic_hw_update_done() is called with qxl.ssd.dcl.con. - -Like for primary QXL devices, set qxl.sdd.dcl.con = qxl.vga.con. - -Signed-off-by: Marc-André Lureau -Message-Id: <20210201201422.446552-2-marcandre.lureau@redhat.com> -Signed-off-by: Gerd Hoffmann - -(cherry picked from commit c502758670432195d61ff848b1b47b0f78918ae2) -Signed-off-by: Marc-André Lureau -Signed-off-by: Danilo C. L. de Paula ---- - hw/display/qxl.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/display/qxl.c b/hw/display/qxl.c -index 431c107096..50f4756b6a 100644 ---- a/hw/display/qxl.c -+++ b/hw/display/qxl.c -@@ -2266,6 +2266,7 @@ static void qxl_realize_secondary(PCIDevice *dev, Error **errp) - qxl->vga.vram_size, &error_fatal); - qxl->vga.vram_ptr = memory_region_get_ram_ptr(&qxl->vga.vram); - qxl->vga.con = graphic_console_init(DEVICE(dev), 0, &qxl_ops, qxl); -+ qxl->ssd.dcl.con = qxl->vga.con; - qxl->id = qemu_console_get_index(qxl->vga.con); /* == channel_id */ - - qxl_realize_common(qxl, errp); --- -2.27.0 - diff --git a/kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch b/kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch deleted file mode 100644 index 46e9ec7..0000000 --- a/kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch +++ /dev/null @@ -1,135 +0,0 @@ -From 1b6e1cc1f3d8033620bc0c04670d252180bd2c36 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 10 Feb 2021 17:10:34 -0300 -Subject: [PATCH 11/54] redhat: Add some devices for exporting upstream machine - types - -RH-Author: Peter Xu -Message-id: <20210210171034.129116-2-peterx@redhat.com> -Patchwork-id: 101043 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] redhat: Add some devices for exporting upstream machine types -Bugzilla: 1917826 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Dr. David Alan Gilbert - -Both "isa-parallel" and "hpet" will be required for the to-be-exported upstream -x86 machine types, so add them back into config. - -Since HPET was disabled for rhel machine types previously, we need to -explicitly do that for RHEL now after we add HPET back. - -Meanwhile, add blockers for the two devices so that they can never be created -on RHEL machine types. That should keep the old behavior for RHEL-AV. - -Signed-off-by: Peter Xu -Signed-off-by: Eduardo Lima (Etrunko) ---- - default-configs/devices/x86_64-softmmu.mak | 6 ++++++ - default-configs/devices/x86_64-upstream-devices.mak | 4 ++++ - hw/char/parallel.c | 9 +++++++++ - hw/i386/pc_piix.c | 2 +- - hw/i386/pc_q35.c | 2 +- - hw/timer/hpet.c | 8 ++++++++ - 6 files changed, 29 insertions(+), 2 deletions(-) - create mode 100644 default-configs/devices/x86_64-upstream-devices.mak - -diff --git a/default-configs/devices/x86_64-softmmu.mak b/default-configs/devices/x86_64-softmmu.mak -index b5de7e5279..e57bcff7d9 100644 ---- a/default-configs/devices/x86_64-softmmu.mak -+++ b/default-configs/devices/x86_64-softmmu.mak -@@ -3,3 +3,9 @@ - #include i386-softmmu.mak - - include x86_64-rh-devices.mak -+ -+# -+# RHEL: this is for the limited upstream machine type support, so to export -+# some more devices than what RHEL machines have. -+# -+include x86_64-upstream-devices.mak -diff --git a/default-configs/devices/x86_64-upstream-devices.mak b/default-configs/devices/x86_64-upstream-devices.mak -new file mode 100644 -index 0000000000..2cd20f54d2 ---- /dev/null -+++ b/default-configs/devices/x86_64-upstream-devices.mak -@@ -0,0 +1,4 @@ -+# We need "isa-parallel" -+CONFIG_PARALLEL=y -+# We need "hpet" -+CONFIG_HPET=y -diff --git a/hw/char/parallel.c b/hw/char/parallel.c -index 8b418abf71..6b3696a237 100644 ---- a/hw/char/parallel.c -+++ b/hw/char/parallel.c -@@ -29,6 +29,7 @@ - #include "chardev/char-parallel.h" - #include "chardev/char-fe.h" - #include "hw/acpi/aml-build.h" -+#include "hw/boards.h" - #include "hw/irq.h" - #include "hw/isa/isa.h" - #include "hw/qdev-properties.h" -@@ -533,6 +534,14 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) - int base; - uint8_t dummy; - -+ /* Restricted for Red Hat Enterprise Linux */ -+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); -+ if (strstr(mc->name, "rhel")) { -+ error_setg(errp, "Device %s is not supported with machine type %s", -+ object_get_typename(OBJECT(dev)), mc->name); -+ return; -+ } -+ - if (!qemu_chr_fe_backend_connected(&s->chr)) { - error_setg(errp, "Can't create parallel device, empty char device"); - return; -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 1b1cc18ae0..6e1f1ba082 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1018,7 +1018,7 @@ static void pc_machine_rhel7_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - m->family = "pc_piix_Y"; -- m->default_machine_opts = "firmware=bios-256k.bin"; -+ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; - pcmc->default_nic_model = "e1000"; - m->default_display = "std"; - m->no_parallel = 1; -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 72854192a9..a8c0496c9f 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -580,7 +580,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - pcmc->default_nic_model = "e1000e"; - m->family = "pc_q35_Z"; - m->units_per_default_bus = 1; -- m->default_machine_opts = "firmware=bios-256k.bin"; -+ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; - m->default_display = "std"; - m->no_floppy = 1; - m->no_parallel = 1; -diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c -index 9520471be2..202e032524 100644 ---- a/hw/timer/hpet.c -+++ b/hw/timer/hpet.c -@@ -733,6 +733,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) - int i; - HPETTimer *timer; - -+ /* Restricted for Red Hat Enterprise Linux */ -+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); -+ if (strstr(mc->name, "rhel")) { -+ error_setg(errp, "Device %s is not supported with machine type %s", -+ object_get_typename(OBJECT(dev)), mc->name); -+ return; -+ } -+ - if (!s->intcap) { - warn_report("Hpet's intcap not initialized"); - } --- -2.27.0 - diff --git a/kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch b/kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch deleted file mode 100644 index 1e18da6..0000000 --- a/kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 570d5034b8c6124df1830857144dc1ac08c13d06 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 8 Mar 2021 10:48:59 -0500 -Subject: [PATCH 02/15] scsi-disk: do not complete requests early for - rerror/werror=ignore -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20210308104902.149906-3-pbonzini@redhat.com> -Patchwork-id: 101309 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/5] scsi-disk: do not complete requests early for rerror/werror=ignore -Bugzilla: 1927530 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -When requested to ignore errors, just do nothing and let the -request complete normally. This means that the request will -be accounted correctly. - -This is what commit 40dce4ee61 ("scsi-disk: fix rerror/werror=ignore", -2018-10-19) was supposed to do: - -Fixes: 40dce4ee61 ("scsi-disk: fix rerror/werror=ignore", 2018-10-19) -Signed-off-by: Paolo Bonzini -(cherry picked from commit 424740def9a42da88550410de9a41ef07cc4a010) -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/scsi-disk.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index cecdea2640..e8de15f549 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -252,8 +252,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) - - blk_error_action(s->qdev.conf.blk, action, is_read, error); - if (action == BLOCK_ERROR_ACTION_IGNORE) { -- scsi_req_complete(&r->req, 0); -- return true; -+ return false; - } - - if (action == BLOCK_ERROR_ACTION_STOP) { --- -2.27.0 - diff --git a/kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch b/kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch deleted file mode 100644 index 766321a..0000000 --- a/kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch +++ /dev/null @@ -1,222 +0,0 @@ -From c029d041853805ba612d27886f769c0e004c35e6 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 8 Mar 2021 10:48:58 -0500 -Subject: [PATCH 01/15] scsi-disk: move scsi_handle_rw_error earlier -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20210308104902.149906-2-pbonzini@redhat.com> -Patchwork-id: 101307 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/5] scsi-disk: move scsi_handle_rw_error earlier -Bugzilla: 1927530 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -Remove the forward declaration. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit f95f61c2c9618fae7d8ea4c1d63e7416884bad52) -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/scsi-disk.c | 168 ++++++++++++++++++++++---------------------- - 1 file changed, 83 insertions(+), 85 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 90841ad791..cecdea2640 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -110,8 +110,6 @@ struct SCSIDiskState { - uint16_t rotation_rate; - }; - --static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed); -- - static void scsi_free_request(SCSIRequest *req) - { - SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); -@@ -181,6 +179,89 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) - qemu_iovec_init_external(&r->qiov, &r->iov, 1); - } - -+/* -+ * scsi_handle_rw_error has two return values. False means that the error -+ * must be ignored, true means that the error has been processed and the -+ * caller should not do anything else for this request. Note that -+ * scsi_handle_rw_error always manages its reference counts, independent -+ * of the return value. -+ */ -+static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) -+{ -+ bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); -+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); -+ SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); -+ BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, -+ is_read, error); -+ -+ if (action == BLOCK_ERROR_ACTION_REPORT) { -+ if (acct_failed) { -+ block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); -+ } -+ switch (error) { -+ case 0: -+ /* A passthrough command has run and has produced sense data; check -+ * whether the error has to be handled by the guest or should rather -+ * pause the host. -+ */ -+ assert(r->status && *r->status); -+ if (scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { -+ /* These errors are handled by guest. */ -+ sdc->update_sense(&r->req); -+ scsi_req_complete(&r->req, *r->status); -+ return true; -+ } -+ error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); -+ break; -+#ifdef CONFIG_LINUX -+ /* These errno mapping are specific to Linux. For more information: -+ * - scsi_decide_disposition in drivers/scsi/scsi_error.c -+ * - scsi_result_to_blk_status in drivers/scsi/scsi_lib.c -+ * - blk_errors[] in block/blk-core.c -+ */ -+ case EBADE: -+ /* DID_NEXUS_FAILURE -> BLK_STS_NEXUS. */ -+ scsi_req_complete(&r->req, RESERVATION_CONFLICT); -+ break; -+ case ENODATA: -+ /* DID_MEDIUM_ERROR -> BLK_STS_MEDIUM. */ -+ scsi_check_condition(r, SENSE_CODE(READ_ERROR)); -+ break; -+ case EREMOTEIO: -+ /* DID_TARGET_FAILURE -> BLK_STS_TARGET. */ -+ scsi_req_complete(&r->req, HARDWARE_ERROR); -+ break; -+#endif -+ case ENOMEDIUM: -+ scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); -+ break; -+ case ENOMEM: -+ scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE)); -+ break; -+ case EINVAL: -+ scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); -+ break; -+ case ENOSPC: -+ scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED)); -+ break; -+ default: -+ scsi_check_condition(r, SENSE_CODE(IO_ERROR)); -+ break; -+ } -+ } -+ -+ blk_error_action(s->qdev.conf.blk, action, is_read, error); -+ if (action == BLOCK_ERROR_ACTION_IGNORE) { -+ scsi_req_complete(&r->req, 0); -+ return true; -+ } -+ -+ if (action == BLOCK_ERROR_ACTION_STOP) { -+ scsi_req_retry(&r->req); -+ } -+ return true; -+} -+ - static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) - { - if (r->req.io_canceled) { -@@ -427,89 +508,6 @@ static void scsi_read_data(SCSIRequest *req) - } - } - --/* -- * scsi_handle_rw_error has two return values. False means that the error -- * must be ignored, true means that the error has been processed and the -- * caller should not do anything else for this request. Note that -- * scsi_handle_rw_error always manages its reference counts, independent -- * of the return value. -- */ --static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) --{ -- bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); -- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); -- SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); -- BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, -- is_read, error); -- -- if (action == BLOCK_ERROR_ACTION_REPORT) { -- if (acct_failed) { -- block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); -- } -- switch (error) { -- case 0: -- /* A passthrough command has run and has produced sense data; check -- * whether the error has to be handled by the guest or should rather -- * pause the host. -- */ -- assert(r->status && *r->status); -- if (scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { -- /* These errors are handled by guest. */ -- sdc->update_sense(&r->req); -- scsi_req_complete(&r->req, *r->status); -- return true; -- } -- error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); -- break; --#ifdef CONFIG_LINUX -- /* These errno mapping are specific to Linux. For more information: -- * - scsi_decide_disposition in drivers/scsi/scsi_error.c -- * - scsi_result_to_blk_status in drivers/scsi/scsi_lib.c -- * - blk_errors[] in block/blk-core.c -- */ -- case EBADE: -- /* DID_NEXUS_FAILURE -> BLK_STS_NEXUS. */ -- scsi_req_complete(&r->req, RESERVATION_CONFLICT); -- break; -- case ENODATA: -- /* DID_MEDIUM_ERROR -> BLK_STS_MEDIUM. */ -- scsi_check_condition(r, SENSE_CODE(READ_ERROR)); -- break; -- case EREMOTEIO: -- /* DID_TARGET_FAILURE -> BLK_STS_TARGET. */ -- scsi_req_complete(&r->req, HARDWARE_ERROR); -- break; --#endif -- case ENOMEDIUM: -- scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); -- break; -- case ENOMEM: -- scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE)); -- break; -- case EINVAL: -- scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); -- break; -- case ENOSPC: -- scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED)); -- break; -- default: -- scsi_check_condition(r, SENSE_CODE(IO_ERROR)); -- break; -- } -- } -- -- blk_error_action(s->qdev.conf.blk, action, is_read, error); -- if (action == BLOCK_ERROR_ACTION_IGNORE) { -- scsi_req_complete(&r->req, 0); -- return true; -- } -- -- if (action == BLOCK_ERROR_ACTION_STOP) { -- scsi_req_retry(&r->req); -- } -- return true; --} -- - static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) - { - uint32_t n; --- -2.27.0 - diff --git a/kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch b/kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch deleted file mode 100644 index eb49e97..0000000 --- a/kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 620d646367a38ff9908de811e1f0a24a3f105529 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 8 Mar 2021 10:49:01 -0500 -Subject: [PATCH 04/15] scsi-disk: pass SCSI status to scsi_handle_rw_error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20210308104902.149906-5-pbonzini@redhat.com> -Patchwork-id: 101310 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/5] scsi-disk: pass SCSI status to scsi_handle_rw_error -Bugzilla: 1927530 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -Instead of fishing it from *r->status, just pass the SCSI status -as a positive value of the second parameter and an errno as a -negative value. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit f63c68bc0f514694a958b2e84a204b7792d28b17) -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/scsi-disk.c | 38 +++++++++++++++++++++++++++----------- - 1 file changed, 27 insertions(+), 11 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 7393f33ee2..c545f0b674 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -186,34 +186,48 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) - * scsi_handle_rw_error always manages its reference counts, independent - * of the return value. - */ --static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) -+static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) - { - bool is_read = (r->req.cmd.mode == SCSI_XFER_FROM_DEV); - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); -- BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, -- is_read, error); -- SCSISense sense; -+ SCSISense sense = SENSE_CODE(NO_SENSE); -+ int error = 0; -+ bool req_has_sense = false; -+ BlockErrorAction action; -+ int status; - -+ if (ret < 0) { -+ status = scsi_sense_from_errno(-ret, &sense); -+ error = -ret; -+ } else { -+ /* A passthrough command has completed with nonzero status. */ -+ status = ret; -+ if (status == CHECK_CONDITION) { -+ req_has_sense = true; -+ error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); -+ } else { -+ error = EINVAL; -+ } -+ } -+ -+ action = blk_get_error_action(s->qdev.conf.blk, is_read, error); - if (action == BLOCK_ERROR_ACTION_REPORT) { - if (acct_failed) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } -- if (error == 0) { -+ if (req_has_sense) { - /* A passthrough command has run and has produced sense data; check - * whether the error has to be handled by the guest or should rather - * pause the host. - */ -- assert(r->status && *r->status); - if (scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { - /* These errors are handled by guest. */ - sdc->update_sense(&r->req); -- scsi_req_complete(&r->req, *r->status); -+ scsi_req_complete(&r->req, status); - return true; - } -- error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); - } else { -- int status = scsi_sense_from_errno(error, &sense); - if (status == CHECK_CONDITION) { - scsi_req_build_sense(&r->req, sense); - } -@@ -239,8 +253,10 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) - return true; - } - -- if (ret < 0 || (r->status && *r->status)) { -- return scsi_handle_rw_error(r, -ret, acct_failed); -+ if (ret < 0) { -+ return scsi_handle_rw_error(r, ret, acct_failed); -+ } else if (r->status && *r->status) { -+ return scsi_handle_rw_error(r, *r->status, acct_failed); - } - - return false; --- -2.27.0 - diff --git a/kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch b/kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch deleted file mode 100644 index 58fac83..0000000 --- a/kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 9cf10f41fc8a89cd80f27e3b2674dec7eead60d4 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 8 Mar 2021 10:49:02 -0500 -Subject: [PATCH 05/15] scsi-disk: pass guest recoverable errors through even - for rerror=stop -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20210308104902.149906-6-pbonzini@redhat.com> -Patchwork-id: 101311 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 5/5] scsi-disk: pass guest recoverable errors through even for rerror=stop -Bugzilla: 1927530 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -Right now, recoverable sense values are only passed directly to the -guest only for rerror=report. However, when rerror/werror are 'stop' -we still don't want the host to be involved on every UNIT ATTENTION -(especially considered that the QMP event will not have enough information -to act on the report). - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 782a78c9e994c2be23467262f50e885a0eb0d9fc) -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/scsi-disk.c | 51 +++++++++++++++++++++++++-------------------- - 1 file changed, 28 insertions(+), 23 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index c545f0b674..f2abbf0d87 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -211,39 +211,44 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) - } - } - -- action = blk_get_error_action(s->qdev.conf.blk, is_read, error); -- if (action == BLOCK_ERROR_ACTION_REPORT) { -+ /* -+ * Check whether the error has to be handled by the guest or should -+ * rather follow the rerror=/werror= settings. Guest-handled errors -+ * are usually retried immediately, so do not post them to QMP and -+ * do not account them as failed I/O. -+ */ -+ if (req_has_sense && -+ scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { -+ action = BLOCK_ERROR_ACTION_REPORT; -+ acct_failed = false; -+ } else { -+ action = blk_get_error_action(s->qdev.conf.blk, is_read, error); -+ blk_error_action(s->qdev.conf.blk, action, is_read, error); -+ } -+ -+ switch (action) { -+ case BLOCK_ERROR_ACTION_REPORT: - if (acct_failed) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } - if (req_has_sense) { -- /* A passthrough command has run and has produced sense data; check -- * whether the error has to be handled by the guest or should rather -- * pause the host. -- */ -- if (scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { -- /* These errors are handled by guest. */ -- sdc->update_sense(&r->req); -- scsi_req_complete(&r->req, status); -- return true; -- } -- } else { -- if (status == CHECK_CONDITION) { -- scsi_req_build_sense(&r->req, sense); -- } -- scsi_req_complete(&r->req, status); -+ sdc->update_sense(&r->req); -+ } else if (status == CHECK_CONDITION) { -+ scsi_req_build_sense(&r->req, sense); - } -- } -+ scsi_req_complete(&r->req, status); -+ return true; - -- blk_error_action(s->qdev.conf.blk, action, is_read, error); -- if (action == BLOCK_ERROR_ACTION_IGNORE) { -+ case BLOCK_ERROR_ACTION_IGNORE: - return false; -- } - -- if (action == BLOCK_ERROR_ACTION_STOP) { -+ case BLOCK_ERROR_ACTION_STOP: - scsi_req_retry(&r->req); -+ return true; -+ -+ default: -+ g_assert_not_reached(); - } -- return true; - } - - static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) --- -2.27.0 - diff --git a/kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch b/kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch deleted file mode 100644 index 75baf13..0000000 --- a/kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 3b537ab3eb342af4222a8cf825062d17893cd18f Mon Sep 17 00:00:00 2001 -From: Maxim Levitsky -Date: Wed, 27 Jan 2021 11:47:54 -0500 -Subject: [PATCH 1/7] scsi: fix device removal race vs IO restart callback on - resume - -RH-Author: Maxim Levitsky -Message-id: <20210127114754.477582-2-mlevitsk@redhat.com> -Patchwork-id: 100795 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] scsi: fix device removal race vs IO restart callback on resume -Bugzilla: 1854811 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: John Snow - -There is (mostly theoretical) race between removal of a scsi device and -scsi_dma_restart_bh. - -It used to be easier to hit this race prior to my / Paulo's patch series -that added rcu to scsi bus device handling code, but IMHO this race -should still be possible to hit, at least in theory. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1854811 - -Fix it anyway with a patch that was proposed by Paulo in the above bugzilla. - -Suggested-by: Paolo Bonzini -Signed-off-by: Maxim Levitsky -Message-Id: <20201210125929.1136390-2-mlevitsk@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit cfd4e36352d4426221aa94da44a172da1aaa741b) -Signed-off-by: Maxim Levitsky -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/scsi/scsi-bus.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index b901e701f0..edb5c3492a 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -170,6 +170,8 @@ static void scsi_dma_restart_bh(void *opaque) - scsi_req_unref(req); - } - aio_context_release(blk_get_aio_context(s->conf.blk)); -+ /* Drop the reference that was acquired in scsi_dma_restart_cb */ -+ object_unref(OBJECT(s)); - } - - void scsi_req_retry(SCSIRequest *req) -@@ -188,6 +190,8 @@ static void scsi_dma_restart_cb(void *opaque, int running, RunState state) - } - if (!s->bh) { - AioContext *ctx = blk_get_aio_context(s->conf.blk); -+ /* The reference is dropped in scsi_dma_restart_bh.*/ -+ object_ref(OBJECT(s)); - s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); - qemu_bh_schedule(s->bh); - } --- -2.18.4 - diff --git a/kvm-scsi-introduce-scsi_sense_from_errno.patch b/kvm-scsi-introduce-scsi_sense_from_errno.patch deleted file mode 100644 index 4a13519..0000000 --- a/kvm-scsi-introduce-scsi_sense_from_errno.patch +++ /dev/null @@ -1,181 +0,0 @@ -From 38a29a168f4b377eb6381469af16887e12ebfa3d Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Mon, 8 Mar 2021 10:49:00 -0500 -Subject: [PATCH 03/15] scsi: introduce scsi_sense_from_errno() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -Message-id: <20210308104902.149906-4-pbonzini@redhat.com> -Patchwork-id: 101308 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/5] scsi: introduce scsi_sense_from_errno() -Bugzilla: 1927530 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Philippe Mathieu-Daudé - -The new function is an extension of the switch statement in scsi-disk.c -which also includes the errno cases only found in sg_io_sense_from_errno. -This allows us to consolidate the errno handling. - -Extracted from a patch by Hannes Reinecke . - -Signed-off-by: Paolo Bonzini -(cherry picked from commit d7a84021db8eeddcd5d24ab591a1434763caff6c) -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/scsi-disk.c | 45 +++++++------------------------------- - include/scsi/utils.h | 2 ++ - scsi/utils.c | 51 +++++++++++++++++++++++++++++++++++--------- - 3 files changed, 51 insertions(+), 47 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index e8de15f549..7393f33ee2 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -193,13 +193,13 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) - SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); - BlockErrorAction action = blk_get_error_action(s->qdev.conf.blk, - is_read, error); -+ SCSISense sense; - - if (action == BLOCK_ERROR_ACTION_REPORT) { - if (acct_failed) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } -- switch (error) { -- case 0: -+ if (error == 0) { - /* A passthrough command has run and has produced sense data; check - * whether the error has to be handled by the guest or should rather - * pause the host. -@@ -212,41 +212,12 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) - return true; - } - error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); -- break; --#ifdef CONFIG_LINUX -- /* These errno mapping are specific to Linux. For more information: -- * - scsi_decide_disposition in drivers/scsi/scsi_error.c -- * - scsi_result_to_blk_status in drivers/scsi/scsi_lib.c -- * - blk_errors[] in block/blk-core.c -- */ -- case EBADE: -- /* DID_NEXUS_FAILURE -> BLK_STS_NEXUS. */ -- scsi_req_complete(&r->req, RESERVATION_CONFLICT); -- break; -- case ENODATA: -- /* DID_MEDIUM_ERROR -> BLK_STS_MEDIUM. */ -- scsi_check_condition(r, SENSE_CODE(READ_ERROR)); -- break; -- case EREMOTEIO: -- /* DID_TARGET_FAILURE -> BLK_STS_TARGET. */ -- scsi_req_complete(&r->req, HARDWARE_ERROR); -- break; --#endif -- case ENOMEDIUM: -- scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); -- break; -- case ENOMEM: -- scsi_check_condition(r, SENSE_CODE(TARGET_FAILURE)); -- break; -- case EINVAL: -- scsi_check_condition(r, SENSE_CODE(INVALID_FIELD)); -- break; -- case ENOSPC: -- scsi_check_condition(r, SENSE_CODE(SPACE_ALLOC_FAILED)); -- break; -- default: -- scsi_check_condition(r, SENSE_CODE(IO_ERROR)); -- break; -+ } else { -+ int status = scsi_sense_from_errno(error, &sense); -+ if (status == CHECK_CONDITION) { -+ scsi_req_build_sense(&r->req, sense); -+ } -+ scsi_req_complete(&r->req, status); - } - } - -diff --git a/include/scsi/utils.h b/include/scsi/utils.h -index fbc5588279..878434a8f5 100644 ---- a/include/scsi/utils.h -+++ b/include/scsi/utils.h -@@ -133,4 +133,6 @@ int sg_io_sense_from_errno(int errno_value, struct sg_io_hdr *io_hdr, - SCSISense *sense); - #endif - -+int scsi_sense_from_errno(int errno_value, SCSISense *sense); -+ - #endif -diff --git a/scsi/utils.c b/scsi/utils.c -index b37c283014..c93458b80e 100644 ---- a/scsi/utils.c -+++ b/scsi/utils.c -@@ -560,21 +560,52 @@ const char *scsi_command_name(uint8_t cmd) - return names[cmd]; - } - -+int scsi_sense_from_errno(int errno_value, SCSISense *sense) -+{ -+ switch (errno_value) { -+ case 0: -+ return GOOD; -+ case EDOM: -+ return TASK_SET_FULL; -+#ifdef CONFIG_LINUX -+ /* These errno mapping are specific to Linux. For more information: -+ * - scsi_decide_disposition in drivers/scsi/scsi_error.c -+ * - scsi_result_to_blk_status in drivers/scsi/scsi_lib.c -+ * - blk_errors[] in block/blk-core.c -+ */ -+ case EBADE: -+ return RESERVATION_CONFLICT; -+ case ENODATA: -+ *sense = SENSE_CODE(READ_ERROR); -+ return CHECK_CONDITION; -+ case EREMOTEIO: -+ *sense = SENSE_CODE(LUN_COMM_FAILURE); -+ return CHECK_CONDITION; -+#endif -+ case ENOMEDIUM: -+ *sense = SENSE_CODE(NO_MEDIUM); -+ return CHECK_CONDITION; -+ case ENOMEM: -+ *sense = SENSE_CODE(TARGET_FAILURE); -+ return CHECK_CONDITION; -+ case EINVAL: -+ *sense = SENSE_CODE(INVALID_FIELD); -+ return CHECK_CONDITION; -+ case ENOSPC: -+ *sense = SENSE_CODE(SPACE_ALLOC_FAILED); -+ return CHECK_CONDITION; -+ default: -+ *sense = SENSE_CODE(IO_ERROR); -+ return CHECK_CONDITION; -+ } -+} -+ - #ifdef CONFIG_LINUX - int sg_io_sense_from_errno(int errno_value, struct sg_io_hdr *io_hdr, - SCSISense *sense) - { - if (errno_value != 0) { -- switch (errno_value) { -- case EDOM: -- return TASK_SET_FULL; -- case ENOMEM: -- *sense = SENSE_CODE(TARGET_FAILURE); -- return CHECK_CONDITION; -- default: -- *sense = SENSE_CODE(IO_ERROR); -- return CHECK_CONDITION; -- } -+ return scsi_sense_from_errno(errno_value, sense); - } else { - if (io_hdr->host_status == SG_ERR_DID_NO_CONNECT || - io_hdr->host_status == SG_ERR_DID_BUS_BUSY || --- -2.27.0 - diff --git a/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch b/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch deleted file mode 100644 index e5a4937..0000000 --- a/kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 2cb473c2e1cd671da4458b58a0f760f4f1c36cbc Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Wed, 10 Feb 2021 16:54:03 -0300 -Subject: [PATCH 08/54] spapr: Adjust firmware path of PCI devices -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Greg Kurz -Message-id: <20210210165403.469213-2-gkurz@redhat.com> -Patchwork-id: 101036 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] spapr: Adjust firmware path of PCI devices -Bugzilla: 1920941 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: David Gibson -RH-Acked-by: Laszlo Ersek - -From: Greg Kurz - -It is currently not possible to perform a strict boot from USB storage: - -$ qemu-system-ppc64 -accel kvm -nodefaults -nographic -serial stdio \ - -boot strict=on \ - -device qemu-xhci \ - -device usb-storage,drive=disk,bootindex=0 \ - -blockdev driver=file,node-name=disk,filename=fedora-ppc64le.qcow2 - -SLOF ********************************************************************** -QEMU Starting - Build Date = Jul 17 2020 11:15:24 - FW Version = git-e18ddad8516ff2cf - Press "s" to enter Open Firmware. - -Populating /vdevice methods -Populating /vdevice/vty@71000000 -Populating /vdevice/nvram@71000001 -Populating /pci@800000020000000 - 00 0000 (D) : 1b36 000d serial bus [ usb-xhci ] -No NVRAM common partition, re-initializing... -Scanning USB - XHCI: Initializing - USB Storage - SCSI: Looking for devices - 101000000000000 DISK : "QEMU QEMU HARDDISK 2.5+" -Using default console: /vdevice/vty@71000000 - - Welcome to Open Firmware - - Copyright (c) 2004, 2017 IBM Corporation All rights reserved. - This program and the accompanying materials are made available - under the terms of the BSD License available at - http://www.opensource.org/licenses/bsd-license.php - -Trying to load: from: /pci@800000020000000/usb@0/storage@1/disk@101000000000000 ... -E3405: No such device - -E3407: Load failed - - Type 'boot' and press return to continue booting the system. - Type 'reset-all' and press return to reboot the system. - -Ready! -0 > - -The device tree handed over by QEMU to SLOF indeed contains: - -qemu,boot-list = - "/pci@800000020000000/usb@0/storage@1/disk@101000000000000 HALT"; - -but the device node is named usb-xhci@0, not usb@0. - -This happens because the firmware names of PCI devices returned -by get_boot_devices_list() come from pcibus_get_fw_dev_path(), -while the sPAPR PHB code uses a different naming scheme for -device nodes. This inconsistency has always been there but it was -hidden for a long time because SLOF used to rename USB device -nodes, until this commit, merged in QEMU 4.2.0 : - -commit 85164ad4ed9960cac842fa4cc067c6b6699b0994 -Author: Alexey Kardashevskiy -Date: Wed Sep 11 16:24:32 2019 +1000 - - pseries: Update SLOF firmware image - - This fixes USB host bus adapter name in the device tree to match QEMU's - one. - - Signed-off-by: Alexey Kardashevskiy - Signed-off-by: David Gibson - -Fortunately, sPAPR implements the firmware path provider interface. -This provides a way to override the default firmware paths. - -Just factor out the sPAPR PHB naming logic from spapr_dt_pci_device() -to a helper, and use it in the sPAPR firmware path provider hook. - -Fixes: 85164ad4ed99 ("pseries: Update SLOF firmware image") -Signed-off-by: Greg Kurz -Message-Id: <20210122170157.246374-1-groug@kaod.org> -Reviewed-by: Daniel Henrique Barboza -Signed-off-by: David Gibson -(cherry picked from commit 040bdafce12f750816d879442014df2999a995c4) -Signed-off-by: Greg Kurz -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/ppc/spapr.c | 5 +++++ - hw/ppc/spapr_pci.c | 33 ++++++++++++++++++--------------- - include/hw/pci-host/spapr.h | 2 ++ - 3 files changed, 25 insertions(+), 15 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index a67df8cb26..c6a97e7964 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -3057,6 +3057,7 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, - SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE); - SpaprPhbState *phb = CAST(SpaprPhbState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE); - VHostSCSICommon *vsc = CAST(VHostSCSICommon, dev, TYPE_VHOST_SCSI_COMMON); -+ PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE); - - if (d) { - void *spapr = CAST(void, bus->parent, "spapr-vscsi"); -@@ -3130,6 +3131,10 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, - return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn)); - } - -+ if (pcidev) { -+ return spapr_pci_fw_dev_name(pcidev); -+ } -+ - return NULL; - } - -diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c -index 88ce87f130..e78e917af1 100644 ---- a/hw/ppc/spapr_pci.c -+++ b/hw/ppc/spapr_pci.c -@@ -1334,15 +1334,29 @@ static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus, - return offset; - } - -+char *spapr_pci_fw_dev_name(PCIDevice *dev) -+{ -+ const gchar *basename; -+ int slot = PCI_SLOT(dev->devfn); -+ int func = PCI_FUNC(dev->devfn); -+ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); -+ -+ basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, -+ ccode & 0xff); -+ -+ if (func != 0) { -+ return g_strdup_printf("%s@%x,%x", basename, slot, func); -+ } else { -+ return g_strdup_printf("%s@%x", basename, slot); -+ } -+} -+ - /* create OF node for pci device and required OF DT properties */ - static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, - void *fdt, int parent_offset) - { - int offset; -- const gchar *basename; -- gchar *nodename; -- int slot = PCI_SLOT(dev->devfn); -- int func = PCI_FUNC(dev->devfn); -+ g_autofree gchar *nodename = spapr_pci_fw_dev_name(dev); - PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); - ResourceProps rp; - SpaprDrc *drc = drc_from_dev(sphb, dev); -@@ -1359,19 +1373,8 @@ static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, - uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2); - gchar *loc_code; - -- basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, -- ccode & 0xff); -- -- if (func != 0) { -- nodename = g_strdup_printf("%s@%x,%x", basename, slot, func); -- } else { -- nodename = g_strdup_printf("%s@%x", basename, slot); -- } -- - _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename)); - -- g_free(nodename); -- - /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */ - _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id)); - _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id)); -diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h -index bd014823a9..5b03a7b0eb 100644 ---- a/include/hw/pci-host/spapr.h -+++ b/include/hw/pci-host/spapr.h -@@ -210,4 +210,6 @@ static inline unsigned spapr_phb_windows_supported(SpaprPhbState *sphb) - return sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; - } - -+char *spapr_pci_fw_dev_name(PCIDevice *dev); -+ - #endif /* PCI_HOST_SPAPR_H */ --- -2.27.0 - diff --git a/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch b/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch deleted file mode 100644 index f7a6b9a..0000000 --- a/kvm-spapr-Allow-memory-unplug-to-always-succeed.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 1365bf10ad49fd7c0a3b4e2eabeaacd1abf60d18 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Tue, 19 Jan 2021 15:20:43 -0500 -Subject: [PATCH 1/5] spapr: Allow memory unplug to always succeed - -RH-Author: Greg Kurz -Message-id: <20210119152044.1019191-2-gkurz@redhat.com> -Patchwork-id: 100690 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] spapr: Allow memory unplug to always succeed -Bugzilla: 1914069 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: Greg Kurz - -It is currently impossible to hot-unplug a memory device between -machine reset and CAS. - -(qemu) device_del dimm1 -Error: Memory hot unplug not supported for this guest - -This limitation was introduced in order to provide an explicit -error path for older guests that didn't support hot-plug event -sources (and thus memory hot-unplug). - -The linux kernel has been supporting these since 4.11. All recent -enough guests are thus capable of handling the removal of a memory -device at all time, including during early boot. - -Lift the limitation for the latest machine type. This means that -trying to unplug memory from a guest that doesn't support it will -likely just do nothing and the memory will only get removed at -next reboot. Such older guests can still get the existing behavior -by using an older machine type. - -Signed-off-by: Greg Kurz -Message-Id: <160794035064.23292.17560963281911312439.stgit@bahia.lan> -Signed-off-by: David Gibson -(cherry picked from commit 1e8b5b1aa16b7d73ba8ba52c95d0b52329d5c9d0) -Signed-off-by: Greg Kurz - -Conflicts: - hw/ppc/spapr.c - -Conflict because RHEL-AV doesn't have upstream 576a00bdeb5b ("hw: add -compat machines for 6.0"). Just ignore the change that sets -pre_6_0_memory_unplug for older machine types since the next patch -removes the flag. - -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/ppc/spapr.c | 3 ++- - hw/ppc/spapr_events.c | 3 ++- - include/hw/ppc/spapr.h | 1 + - 3 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 4f61b64a21..65a647134a 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4064,7 +4064,8 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { -- if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { -+ if (!smc->pre_6_0_memory_unplug || -+ spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { - spapr_memory_unplug_request(hotplug_dev, dev, errp); - } else { - /* NOTE: this means there is a window after guest reset, prior to -diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c -index 1add53547e..c30123177b 100644 ---- a/hw/ppc/spapr_events.c -+++ b/hw/ppc/spapr_events.c -@@ -659,7 +659,8 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, - /* we should not be using count_indexed value unless the guest - * supports dedicated hotplug event source - */ -- g_assert(spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); -+ g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug || -+ spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); - hp->drc_id.count_indexed.count = - cpu_to_be32(drc_id->count_indexed.count); - hp->drc_id.count_indexed.index = -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 28bbf07f8f..4941fe9b4f 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -142,6 +142,7 @@ struct SpaprMachineClass { - hwaddr rma_limit; /* clamp the RMA to this size */ - bool pre_5_1_assoc_refpoints; - bool pre_5_2_numa_associativity; -+ bool pre_6_0_memory_unplug; - - bool has_power9_support; - void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, --- -2.18.4 - diff --git a/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch b/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch deleted file mode 100644 index 94bad1d..0000000 --- a/kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch +++ /dev/null @@ -1,168 +0,0 @@ -From cd719765bd751142c4040ee7daf615b859fb3e9d Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Tue, 19 Jan 2021 15:20:44 -0500 -Subject: [PATCH 2/5] spapr: Improve handling of memory unplug with old guests - -RH-Author: Greg Kurz -Message-id: <20210119152044.1019191-3-gkurz@redhat.com> -Patchwork-id: 100691 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] spapr: Improve handling of memory unplug with old guests -Bugzilla: 1914069 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Gibson - -From: Greg Kurz - -Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed") -trying to unplug memory from a guest that doesn't support it (eg. rhel6) -no longer generates an error like it used to. Instead, it leaves the -memory around : only a subsequent reboot or manual use of drmgr within -the guest can complete the hot-unplug sequence. A flag was added to -SpaprMachineClass so that this new behavior only applies to the default -machine type. - -We can do better. CAS processes all pending hot-unplug requests. This -means that we don't really care about what the guest supports if -the hot-unplug request happens before CAS. - -All guests that we care for, even old ones, set enough bits in OV5 -that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a -heuristic to decide if CAS has already occured or not. - -Always accept unplug requests that happen before CAS since CAS will -process them. Restore the previous behavior of rejecting them after -CAS when we know that the guest doesn't support memory hot-unplug. - -This behavior is suitable for all machine types : this allows to -drop the pre_6_0_memory_unplug flag. - -Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed") -Signed-off-by: Greg Kurz -Message-Id: <161012708715.801107.11418801796987916516.stgit@bahia.lan> -Reviewed-by: Daniel Henrique Barboza -Signed-off-by: David Gibson -(cherry picked from commit 73598c75df0585e039825e642adede21912dabc7) -Signed-off-by: Greg Kurz - -Conflicts: - hw/ppc/spapr.c - -Conflict around the removal of pre_6_0_memory_unplug, which was only -partially backported from upstream 1e8b5b1aa16b. - -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/ppc/spapr.c | 21 +++++++++++++-------- - hw/ppc/spapr_events.c | 3 +-- - hw/ppc/spapr_ovec.c | 7 +++++++ - include/hw/ppc/spapr.h | 2 +- - include/hw/ppc/spapr_ovec.h | 1 + - 5 files changed, 23 insertions(+), 11 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 65a647134a..a67df8cb26 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4056,6 +4056,18 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, - } - } - -+bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr) -+{ -+ return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) || -+ /* -+ * CAS will process all pending unplug requests. -+ * -+ * HACK: a guest could theoretically have cleared all bits in OV5, -+ * but none of the guests we care for do. -+ */ -+ spapr_ovec_empty(spapr->ov5_cas); -+} -+ - static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) - { -@@ -4064,16 +4076,9 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); - - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { -- if (!smc->pre_6_0_memory_unplug || -- spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) { -+ if (spapr_memory_hot_unplug_supported(sms)) { - spapr_memory_unplug_request(hotplug_dev, dev, errp); - } else { -- /* NOTE: this means there is a window after guest reset, prior to -- * CAS negotiation, where unplug requests will fail due to the -- * capability not being detected yet. This is a bit different than -- * the case with PCI unplug, where the events will be queued and -- * eventually handled by the guest after boot -- */ - error_setg(errp, "Memory hot unplug not supported for this guest"); - } - } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { -diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c -index c30123177b..80b918ff5f 100644 ---- a/hw/ppc/spapr_events.c -+++ b/hw/ppc/spapr_events.c -@@ -659,8 +659,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, - /* we should not be using count_indexed value unless the guest - * supports dedicated hotplug event source - */ -- g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug || -- spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)); -+ g_assert(spapr_memory_hot_unplug_supported(spapr)); - hp->drc_id.count_indexed.count = - cpu_to_be32(drc_id->count_indexed.count); - hp->drc_id.count_indexed.index = -diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c -index dd003f1763..b2567caa5c 100644 ---- a/hw/ppc/spapr_ovec.c -+++ b/hw/ppc/spapr_ovec.c -@@ -125,6 +125,13 @@ bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr) - return test_bit(bitnr, ov->bitmap) ? true : false; - } - -+bool spapr_ovec_empty(SpaprOptionVector *ov) -+{ -+ g_assert(ov); -+ -+ return bitmap_empty(ov->bitmap, OV_MAXBITS); -+} -+ - static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap, - long bitmap_offset) - { -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 4941fe9b4f..5952942362 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -142,7 +142,6 @@ struct SpaprMachineClass { - hwaddr rma_limit; /* clamp the RMA to this size */ - bool pre_5_1_assoc_refpoints; - bool pre_5_2_numa_associativity; -- bool pre_6_0_memory_unplug; - - bool has_power9_support; - void (*phb_placement)(SpaprMachineState *spapr, uint32_t index, -@@ -954,4 +953,5 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, - - void spapr_set_all_lpcrs(target_ulong value, target_ulong mask); - hwaddr spapr_get_rtas_addr(void); -+bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr); - #endif /* HW_SPAPR_H */ -diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h -index d4dee9e06a..48b716a060 100644 ---- a/include/hw/ppc/spapr_ovec.h -+++ b/include/hw/ppc/spapr_ovec.h -@@ -71,6 +71,7 @@ void spapr_ovec_cleanup(SpaprOptionVector *ov); - void spapr_ovec_set(SpaprOptionVector *ov, long bitnr); - void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr); - bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr); -+bool spapr_ovec_empty(SpaprOptionVector *ov); - SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector); - int spapr_dt_ovec(void *fdt, int fdt_offset, - SpaprOptionVector *ov, const char *name); --- -2.18.4 - diff --git a/kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch b/kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch deleted file mode 100644 index 7b0472c..0000000 --- a/kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch +++ /dev/null @@ -1,48 +0,0 @@ -From b1883ddf10c2ec31ac72866494687d8897535a82 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Thu, 11 Feb 2021 14:42:07 -0300 -Subject: [PATCH 5/6] storage-daemon: Call bdrv_close_all() on exit - -RH-Author: Sergio Lopez Pascual -Message-id: <20210211144208.58930-5-slp@redhat.com> -Patchwork-id: 101048 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 4/5] storage-daemon: Call bdrv_close_all() on exit -Bugzilla: 1918966 1918968 -RH-Acked-by: Max Reitz -RH-Acked-by: Kevin Wolf -RH-Acked-by: Eric Blake - -From: Max Reitz - -Otherwise, exports and block devices are not properly shut down and -closed, unless the users explicitly issues blockdev-del and -block-export-del commands for each of them. - -Signed-off-by: Max Reitz -Reviewed-by: Kevin Wolf -Message-Id: <20201027190600.192171-17-mreitz@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit b55a3c8860b763b62b2cc2f4a6f55379977bbde5) -Signed-off-by: Sergio Lopez -Signed-off-by: Eduardo Lima (Etrunko) ---- - storage-daemon/qemu-storage-daemon.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c -index 7c914b0dc1..e0c87edbdd 100644 ---- a/storage-daemon/qemu-storage-daemon.c -+++ b/storage-daemon/qemu-storage-daemon.c -@@ -314,6 +314,9 @@ int main(int argc, char *argv[]) - main_loop_wait(false); - } - -+ bdrv_drain_all_begin(); -+ bdrv_close_all(); -+ - monitor_cleanup(); - qemu_chr_cleanup(); - user_creatable_cleanup(); --- -2.27.0 - diff --git a/kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch b/kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch deleted file mode 100644 index 7d519a1..0000000 --- a/kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch +++ /dev/null @@ -1,69 +0,0 @@ -From b51851d9684443028c2568e70bb203481ecd533a Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 2 Feb 2021 14:03:34 -0500 -Subject: [PATCH 2/7] tracetool: also strip %l and %ll from systemtap format - strings -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -Message-id: <20210202140334.1798082-2-lvivier@redhat.com> -Patchwork-id: 100948 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] tracetool: also strip %l and %ll from systemtap format strings -Bugzilla: 1907264 -RH-Acked-by: Jon Maloy -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Daniel P. Berrange - -From: Daniel P. Berrangé - -All variables are 64-bit and so %l / %ll are not required, and the -latter is actually invalid: - - $ sudo stap -e 'probe begin{printf ("BEGIN")}' -I . - parse error: invalid or missing conversion specifier - saw: operator ',' at ./qemu-system-x86_64-log.stp:15118:101 - source: printf("%d@%d vhost_vdpa_set_log_base dev: %p base: 0x%x size: %llu -refcnt: %d fd: %d log: %p\n", pid(), gettimeofday_ns(), dev, base, size, refcnt, fd, log) - - ^ - -Signed-off-by: Daniel P. Berrangé -Reviewed-by: Laurent Vivier -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Laurent Vivier -Message-id: 20210106130239.1004729-1-berrange@redhat.com - -[Fixed "simiarly" typo found by Laurent Vivier ---Stefan] - -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 09612de7e9adbe9666a8fa4cc60bab0a29a68ed1) -Signed-off-by: Laurent Vivier -Signed-off-by: Eduardo Lima (Etrunko) ---- - scripts/tracetool/format/log_stap.py | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/scripts/tracetool/format/log_stap.py b/scripts/tracetool/format/log_stap.py -index b486beb672..2d910ced82 100644 ---- a/scripts/tracetool/format/log_stap.py -+++ b/scripts/tracetool/format/log_stap.py -@@ -77,7 +77,12 @@ def c_fmt_to_stap(fmt): - elif state == STATE_LITERAL: - bits.append(literal) - -- fmt = re.sub("%(\d*)z(x|u|d)", "%\\1\\2", "".join(bits)) -+ # All variables in systemtap are 64-bit in size -+ # The "%l" integer size qualifier is thus redundant -+ # and "%ll" is not valid at all. Similarly the size_t -+ # based "%z" size qualifier is not valid. We just -+ # strip all size qualifiers for sanity. -+ fmt = re.sub("%(\d*)(l+|z)(x|u|d)", "%\\1\\3", "".join(bits)) - return fmt - - def generate(events, backend, group): --- -2.18.4 - diff --git a/kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch b/kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch deleted file mode 100644 index ddd67b7..0000000 --- a/kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 9142072649d593acbd118e71f5d257bd9996ab36 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Fri, 19 Feb 2021 08:49:50 -0300 -Subject: [PATCH 12/54] vhost: Check for valid vdev in - vhost_backend_handle_iotlb_msg -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210219084950.2159701-1-eperezma@redhat.com> -Patchwork-id: 101110 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] vhost: Check for valid vdev in vhost_backend_handle_iotlb_msg -Bugzilla: 1880299 -RH-Acked-by: Peter Xu -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Xiao Wang - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1880299 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=35031170 - -Not checking this can lead to invalid dev->vdev member access in -vhost_device_iotlb_miss if backend issue an iotlb message in a bad -timing, either maliciously or by a bug. - -Reproduced rebooting a guest with testpmd in txonly forward mode. - #0 0x0000559ffff94394 in vhost_device_iotlb_miss ( - dev=dev@entry=0x55a0012f6680, iova=10245279744, write=1) - at ../hw/virtio/vhost.c:1013 - #1 0x0000559ffff9ac31 in vhost_backend_handle_iotlb_msg ( - imsg=0x7ffddcfd32c0, dev=0x55a0012f6680) - at ../hw/virtio/vhost-backend.c:411 - #2 vhost_backend_handle_iotlb_msg (dev=dev@entry=0x55a0012f6680, - imsg=imsg@entry=0x7ffddcfd32c0) - at ../hw/virtio/vhost-backend.c:404 - #3 0x0000559fffeded7b in slave_read (opaque=0x55a0012f6680) - at ../hw/virtio/vhost-user.c:1464 - #4 0x000055a0000c541b in aio_dispatch_handler ( - ctx=ctx@entry=0x55a0010a2120, node=0x55a0012d9e00) - at ../util/aio-posix.c:329 - -Fixes: 020e571b8b ("vhost: rework IOTLB messaging") -Signed-off-by: Eugenio Pérez -Message-Id: <20210129090728.831208-1-eperezma@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 4d1ccc17f40f73313e13c84914f70ec3d40ac738) -Signed-off-by: Eugenio Pérez -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/virtio/vhost-backend.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c -index 222bbcc62d..31b33bde37 100644 ---- a/hw/virtio/vhost-backend.c -+++ b/hw/virtio/vhost-backend.c -@@ -406,6 +406,11 @@ int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, - { - int ret = 0; - -+ if (unlikely(!dev->vdev)) { -+ error_report("Unexpected IOTLB message when virtio device is stopped"); -+ return -EINVAL; -+ } -+ - switch (imsg->type) { - case VHOST_IOTLB_MISS: - ret = vhost_device_iotlb_miss(dev, imsg->iova, --- -2.27.0 - diff --git a/kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch b/kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch deleted file mode 100644 index 2926e5b..0000000 --- a/kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch +++ /dev/null @@ -1,124 +0,0 @@ -From 26c3b9b2a5e904f2799ac097c91588cb2248a6e0 Mon Sep 17 00:00:00 2001 -From: Auger Eric -Date: Fri, 5 Feb 2021 18:58:52 -0500 -Subject: [PATCH 6/7] vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support - -RH-Author: Auger Eric -Message-id: <20210205185852.12830-1-eric.auger@redhat.com> -Patchwork-id: 100996 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH] vhost: Unbreak SMMU and virtio-iommu on dev-iotlb support -Bugzilla: 1925028 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Peter Xu - -From: Peter Xu - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1925028 -BRANCH: rhel-av-8.4.0 -UPSTREAM: merged -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34788078 - -Previous work on dev-iotlb message broke vhost on either SMMU or virtio-iommu -since dev-iotlb (or PCIe ATS) is not yet supported for those archs. - -An initial idea is that we can let IOMMU to export this information to vhost so -that vhost would know whether the vIOMMU would support dev-iotlb, then vhost -can conditionally register to dev-iotlb or the old iotlb way. We can work -based on some previous patch to introduce PCIIOMMUOps as Yi Liu proposed [1]. - -However it's not as easy as I thought since vhost_iommu_region_add() does not -have a PCIDevice context at all since it's completely a backend. It seems -non-trivial to pass over a PCI device to the backend during init. E.g. when -the IOMMU notifier registered hdev->vdev is still NULL. - -To make the fix smaller and easier, this patch goes the other way to leverage -the flag_changed() hook of vIOMMUs so that SMMU and virtio-iommu can trap the -dev-iotlb registration and fail it. Then vhost could try the fallback solution -as using UNMAP invalidation for it's translations. - -[1] https://lore.kernel.org/qemu-devel/1599735398-6829-4-git-send-email-yi.l.liu@intel.com/ - -Reported-by: Eric Auger -Fixes: b68ba1ca57677acf870d5ab10579e6105c1f5338 -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Signed-off-by: Peter Xu -Message-Id: <20210204191228.187550-1-peterx@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 958ec334bca3fa9862289e4cfe31bf1019e55816) -Signed-off-by: Eric Auger -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/arm/smmuv3.c | 5 +++++ - hw/virtio/vhost.c | 13 +++++++++++-- - hw/virtio/virtio-iommu.c | 5 +++++ - 3 files changed, 21 insertions(+), 2 deletions(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 98b99d4fe8..bd1f97000d 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -1497,6 +1497,11 @@ static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, - SMMUv3State *s3 = sdev->smmu; - SMMUState *s = &(s3->smmu_state); - -+ if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { -+ error_setg(errp, "SMMUv3 does not support dev-iotlb yet"); -+ return -EINVAL; -+ } -+ - if (new & IOMMU_NOTIFIER_MAP) { - error_setg(errp, - "device %02x.%02x.%x requires iommu MAP notifier which is " -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 28c7d78172..6e17d631f7 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -704,6 +704,7 @@ static void vhost_iommu_region_add(MemoryListener *listener, - Int128 end; - int iommu_idx; - IOMMUMemoryRegion *iommu_mr; -+ int ret; - - if (!memory_region_is_iommu(section->mr)) { - return; -@@ -726,8 +727,16 @@ static void vhost_iommu_region_add(MemoryListener *listener, - iommu->iommu_offset = section->offset_within_address_space - - section->offset_within_region; - iommu->hdev = dev; -- memory_region_register_iommu_notifier(section->mr, &iommu->n, -- &error_fatal); -+ ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL); -+ if (ret) { -+ /* -+ * Some vIOMMUs do not support dev-iotlb yet. If so, try to use the -+ * UNMAP legacy message -+ */ -+ iommu->n.notifier_flags = IOMMU_NOTIFIER_UNMAP; -+ memory_region_register_iommu_notifier(section->mr, &iommu->n, -+ &error_fatal); -+ } - QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next); - /* TODO: can replay help performance here? */ - } -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index cea8811295..65184f6e43 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -893,6 +893,11 @@ static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, - IOMMUNotifierFlag new, - Error **errp) - { -+ if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { -+ error_setg(errp, "Virtio-iommu does not support dev-iotlb yet"); -+ return -EINVAL; -+ } -+ - if (old == IOMMU_NOTIFIER_NONE) { - trace_virtio_iommu_notify_flag_add(iommu_mr->parent_obj.name); - } else if (new == IOMMU_NOTIFIER_NONE) { --- -2.18.4 - diff --git a/kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch b/kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch deleted file mode 100644 index efb0b34..0000000 --- a/kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch +++ /dev/null @@ -1,68 +0,0 @@ -From f6ad6b772dce72042afbe8779cd9c52d5e352418 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 15 Mar 2021 18:16:24 -0400 -Subject: [PATCH 08/15] vhost-user-blk: fix blkcfg->num_queues endianness -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -Message-id: <20210315181629.212884-2-stefanha@redhat.com> -Patchwork-id: 101338 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/6] vhost-user-blk: fix blkcfg->num_queues endianness -Bugzilla: 1937004 -RH-Acked-by: Danilo de Paula -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Kevin Wolf -RH-Acked-by: Max Reitz - -Treat the num_queues field as virtio-endian. On big-endian hosts the -vhost-user-blk num_queues field was in the wrong endianness. - -Move the blkcfg.num_queues store operation from realize to -vhost_user_blk_update_config() so feature negotiation has finished and -we know the endianness of the device. VIRTIO 1.0 devices are -little-endian, but in case someone wants to use legacy VIRTIO we support -all endianness cases. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Raphael Norwitz -Reviewed-by: Michael S. Tsirkin -Message-Id: <20210223144653.811468-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 535255b43898d2e96744057eb86f8497d4d7a461) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula ---- - hw/block/vhost-user-blk.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index 2dd3d93ca0..d9d9dc8a89 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -53,6 +53,9 @@ static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) - { - VHostUserBlk *s = VHOST_USER_BLK(vdev); - -+ /* Our num_queues overrides the device backend */ -+ virtio_stw_p(vdev, &s->blkcfg.num_queues, s->num_queues); -+ - memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config)); - } - -@@ -490,10 +493,6 @@ reconnect: - goto reconnect; - } - -- if (s->blkcfg.num_queues != s->num_queues) { -- s->blkcfg.num_queues = s->num_queues; -- } -- - return; - - virtio_err: --- -2.27.0 - diff --git a/kvm-virtio-Add-corresponding-memory_listener_unregister-.patch b/kvm-virtio-Add-corresponding-memory_listener_unregister-.patch deleted file mode 100644 index 46c96b0..0000000 --- a/kvm-virtio-Add-corresponding-memory_listener_unregister-.patch +++ /dev/null @@ -1,234 +0,0 @@ -From ac9e40a75eba0019fb9930835804e8daceead981 Mon Sep 17 00:00:00 2001 -From: eperezma -Date: Tue, 9 Feb 2021 10:38:16 -0300 -Subject: [PATCH 1/6] virtio: Add corresponding memory_listener_unregister to - unrealize -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: eperezma -Message-id: <20210209103816.1636200-2-eperezma@redhat.com> -Patchwork-id: 101009 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] virtio: Add corresponding memory_listener_unregister to unrealize -Bugzilla: 1903521 -RH-Acked-by: Peter Xu -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Stefano Garzarella - -Address space is destroyed without proper removal of its listeners with -current code. They are expected to be removed in -virtio_device_instance_finalize [1], but qemu calls it through -object_deinit, after address_space_destroy call through -device_set_realized [2]. - -Move it to virtio_device_unrealize, called before device_set_realized -[3] and making it symmetric with memory_listener_register in -virtio_device_realize. - -v2: Delete no-op call of virtio_device_instance_finalize. - Add backtraces. - -[1] - - #0 virtio_device_instance_finalize (obj=0x555557de5120) - at /home/qemu/include/hw/virtio/virtio.h:71 - #1 0x0000555555b703c9 in object_deinit (type=0x555556639860, - obj=) at ../qom/object.c:671 - #2 object_finalize (data=0x555557de5120) at ../qom/object.c:685 - #3 object_unref (objptr=0x555557de5120) at ../qom/object.c:1184 - #4 0x0000555555b4de9d in bus_free_bus_child (kid=0x555557df0660) - at ../hw/core/qdev.c:55 - #5 0x0000555555c65003 in call_rcu_thread (opaque=opaque@entry=0x0) - at ../util/rcu.c:281 - -Queued by: - - #0 bus_remove_child (bus=0x555557de5098, - child=child@entry=0x555557de5120) at ../hw/core/qdev.c:60 - #1 0x0000555555b4ee31 in device_unparent (obj=) - at ../hw/core/qdev.c:984 - #2 0x0000555555b70465 in object_finalize_child_property ( - obj=, name=, opaque=0x555557de5120) - at ../qom/object.c:1725 - #3 0x0000555555b6fa17 in object_property_del_child ( - child=0x555557de5120, obj=0x555557ddcf90) at ../qom/object.c:645 - #4 object_unparent (obj=0x555557de5120) at ../qom/object.c:664 - #5 0x0000555555b4c071 in bus_unparent (obj=) - at ../hw/core/bus.c:147 - #6 0x0000555555b70465 in object_finalize_child_property ( - obj=, name=, opaque=0x555557de5098) - at ../qom/object.c:1725 - #7 0x0000555555b6fa17 in object_property_del_child ( - child=0x555557de5098, obj=0x555557ddcf90) at ../qom/object.c:645 - #8 object_unparent (obj=0x555557de5098) at ../qom/object.c:664 - #9 0x0000555555b4ee19 in device_unparent (obj=) - at ../hw/core/qdev.c:981 - #10 0x0000555555b70465 in object_finalize_child_property ( - obj=, name=, opaque=0x555557ddcf90) - at ../qom/object.c:1725 - #11 0x0000555555b6fa17 in object_property_del_child ( - child=0x555557ddcf90, obj=0x55555685da10) at ../qom/object.c:645 - #12 object_unparent (obj=0x555557ddcf90) at ../qom/object.c:664 - #13 0x00005555558dc331 in pci_for_each_device_under_bus ( - opaque=, fn=, bus=) - at ../hw/pci/pci.c:1654 - -[2] - -Optimizer omits pci_qdev_unrealize, called by device_set_realized, and -do_pci_unregister_device, called by pci_qdev_unrealize and caller of -address_space_destroy. - - #0 address_space_destroy (as=0x555557ddd1b8) - at ../softmmu/memory.c:2840 - #1 0x0000555555b4fc53 in device_set_realized (obj=0x555557ddcf90, - value=, errp=0x7fffeea8f1e0) - at ../hw/core/qdev.c:850 - #2 0x0000555555b6eaa6 in property_set_bool (obj=0x555557ddcf90, - v=, name=, opaque=0x555556650ba0, - errp=0x7fffeea8f1e0) at ../qom/object.c:2255 - #3 0x0000555555b70e07 in object_property_set ( - obj=obj@entry=0x555557ddcf90, - name=name@entry=0x555555db99df "realized", - v=v@entry=0x7fffe46b7500, - errp=errp@entry=0x5555565bbf38 ) - at ../qom/object.c:1400 - #4 0x0000555555b73c5f in object_property_set_qobject ( - obj=obj@entry=0x555557ddcf90, - name=name@entry=0x555555db99df "realized", - value=value@entry=0x7fffe44f6180, - errp=errp@entry=0x5555565bbf38 ) - at ../qom/qom-qobject.c:28 - #5 0x0000555555b71044 in object_property_set_bool ( - obj=0x555557ddcf90, name=0x555555db99df "realized", - value=, errp=0x5555565bbf38 ) - at ../qom/object.c:1470 - #6 0x0000555555921cb7 in pcie_unplug_device (bus=, - dev=0x555557ddcf90, - opaque=) at /home/qemu/include/hw/qdev-core.h:17 - #7 0x00005555558dc331 in pci_for_each_device_under_bus ( - opaque=, fn=, - bus=) at ../hw/pci/pci.c:1654 - -[3] - - #0 virtio_device_unrealize (dev=0x555557de5120) - at ../hw/virtio/virtio.c:3680 - #1 0x0000555555b4fc63 in device_set_realized (obj=0x555557de5120, - value=, errp=0x7fffee28df90) - at ../hw/core/qdev.c:850 - #2 0x0000555555b6eab6 in property_set_bool (obj=0x555557de5120, - v=, name=, opaque=0x555556650ba0, - errp=0x7fffee28df90) at ../qom/object.c:2255 - #3 0x0000555555b70e17 in object_property_set ( - obj=obj@entry=0x555557de5120, - name=name@entry=0x555555db99ff "realized", - v=v@entry=0x7ffdd8035040, - errp=errp@entry=0x5555565bbf38 ) - at ../qom/object.c:1400 - #4 0x0000555555b73c6f in object_property_set_qobject ( - obj=obj@entry=0x555557de5120, - name=name@entry=0x555555db99ff "realized", - value=value@entry=0x7ffdd8035020, - errp=errp@entry=0x5555565bbf38 ) - at ../qom/qom-qobject.c:28 - #5 0x0000555555b71054 in object_property_set_bool ( - obj=0x555557de5120, name=name@entry=0x555555db99ff "realized", - value=value@entry=false, errp=0x5555565bbf38 ) - at ../qom/object.c:1470 - #6 0x0000555555b4edc5 in qdev_unrealize (dev=) - at ../hw/core/qdev.c:403 - #7 0x0000555555b4c2a9 in bus_set_realized (obj=, - value=, errp=) - at ../hw/core/bus.c:204 - #8 0x0000555555b6eab6 in property_set_bool (obj=0x555557de5098, - v=, name=, opaque=0x555557df04c0, - errp=0x7fffee28e0a0) at ../qom/object.c:2255 - #9 0x0000555555b70e17 in object_property_set ( - obj=obj@entry=0x555557de5098, - name=name@entry=0x555555db99ff "realized", - v=v@entry=0x7ffdd8034f50, - errp=errp@entry=0x5555565bbf38 ) - at ../qom/object.c:1400 - #10 0x0000555555b73c6f in object_property_set_qobject ( - obj=obj@entry=0x555557de5098, - name=name@entry=0x555555db99ff "realized", - value=value@entry=0x7ffdd8020630, - errp=errp@entry=0x5555565bbf38 ) - at ../qom/qom-qobject.c:28 - #11 0x0000555555b71054 in object_property_set_bool ( - obj=obj@entry=0x555557de5098, - name=name@entry=0x555555db99ff "realized", - value=value@entry=false, errp=0x5555565bbf38 ) - at ../qom/object.c:1470 - #12 0x0000555555b4c725 in qbus_unrealize ( - bus=bus@entry=0x555557de5098) at ../hw/core/bus.c:178 - #13 0x0000555555b4fc00 in device_set_realized (obj=0x555557ddcf90, - value=, errp=0x7fffee28e1e0) - at ../hw/core/qdev.c:844 - #14 0x0000555555b6eab6 in property_set_bool (obj=0x555557ddcf90, - v=, name=, opaque=0x555556650ba0, - errp=0x7fffee28e1e0) at ../qom/object.c:2255 - #15 0x0000555555b70e17 in object_property_set ( - obj=obj@entry=0x555557ddcf90, - name=name@entry=0x555555db99ff "realized", - v=v@entry=0x7ffdd8020560, - errp=errp@entry=0x5555565bbf38 ) - at ../qom/object.c:1400 - #16 0x0000555555b73c6f in object_property_set_qobject ( - obj=obj@entry=0x555557ddcf90, - name=name@entry=0x555555db99ff "realized", - value=value@entry=0x7ffdd8020540, - errp=errp@entry=0x5555565bbf38 ) - at ../qom/qom-qobject.c:28 - #17 0x0000555555b71054 in object_property_set_bool ( - obj=0x555557ddcf90, name=0x555555db99ff "realized", - value=, errp=0x5555565bbf38 ) - at ../qom/object.c:1470 - #18 0x0000555555921cb7 in pcie_unplug_device (bus=, - dev=0x555557ddcf90, opaque=) - at /home/qemu/include/hw/qdev-core.h:17 - #19 0x00005555558dc331 in pci_for_each_device_under_bus ( - opaque=, fn=, bus=) - at ../hw/pci/pci.c:1654 - -Fixes: c611c76417f ("virtio: add MemoryListener to cache ring translations") -Buglink: https://bugs.launchpad.net/qemu/+bug/1912846 -Signed-off-by: Eugenio Pérez -Message-Id: <20210125192505.390554-1-eperezma@redhat.com> -Reviewed-by: Peter Xu -Acked-by: Jason Wang -Reviewed-by: Stefano Garzarella -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f6ab64c05f8a6229bf6569d3791c23abb9f6eee4) -Signed-off-by: Eugenio Pérez -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/virtio/virtio.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index ceb58fda6c..9312e7191b 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -3677,6 +3677,7 @@ static void virtio_device_unrealize(DeviceState *dev) - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); - -+ memory_listener_unregister(&vdev->listener); - virtio_bus_device_unplugged(vdev); - - if (vdc->unrealize != NULL) { -@@ -3707,7 +3708,6 @@ static void virtio_device_instance_finalize(Object *obj) - { - VirtIODevice *vdev = VIRTIO_DEVICE(obj); - -- memory_listener_unregister(&vdev->listener); - virtio_device_free_virtqueues(vdev); - - g_free(vdev->config); --- -2.27.0 - diff --git a/kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch b/kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch deleted file mode 100644 index 1869297..0000000 --- a/kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 4a1b30af56f99b9fe7ecdd47aa9691fed5d3a0c3 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 9 Feb 2021 17:15:33 -0300 -Subject: [PATCH 04/54] virtio: move 'use-disabled-flag' property to - hw_compat_4_2 - -RH-Author: Stefano Garzarella -Message-id: <20210209171533.133268-2-sgarzare@redhat.com> -Patchwork-id: 101012 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/1] virtio: move 'use-disabled-flag' property to hw_compat_4_2 -Bugzilla: 1907255 -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Dr. David Alan Gilbert - -Commit 9d7bd0826f introduced a new 'use-disabled-flag' property -set to true by default. -To allow the migration, we set this property to false in the hw_compat, -but in the wrong place (hw_compat_4_1). - -Since commit 9d7bd0826f was released with QEMU 5.0, we move -'use-disabled-flag' property to hw_compat_4_2, so 4.2 machine types -will have the pre-patch behavior and the migration can work. - -The issue was discovered with vhost-vsock device and 4.2 machine -type without running any kernel in the VM: - $ qemu-4.2 -M pc-q35-4.2,accel=kvm \ - -device vhost-vsock-pci,guest-cid=4 \ - -monitor stdio -incoming tcp:0:3333 - - $ qemu-5.2 -M pc-q35-4.2,accel=kvm \ - -device vhost-vsock-pci,guest-cid=3 \ - -monitor stdio - (qemu) migrate -d tcp:0:3333 - - # qemu-4.2 output - qemu-system-x86_64: Failed to load virtio-vhost_vsock:virtio - qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-vhost_vsock' - qemu-system-x86_64: load of migration failed: No such file or directory - -Reported-by: Jing Zhao -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1907255 -Fixes: 9d7bd0826f ("virtio-pci: disable vring processing when bus-mastering is disabled") -Cc: mdroth@linux.vnet.ibm.com -CC: qemu-stable@nongnu.org -Signed-off-by: Stefano Garzarella -Message-Id: <20210108171252.209502-1-sgarzare@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit c126b4c57e0164549de606ca35d1512762051083) -[sgarzare: add 'use-disabled-flag' property to hw_compat_rhel_8_2] -Signed-off-by: Stefano Garzarella -Signed-off-by: Eduardo Lima (Etrunko) ---- - hw/core/machine.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 68495b9411..92398966a4 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -77,6 +77,8 @@ GlobalProperty hw_compat_rhel_8_2[] = { - { "qxl-vga", "revision", "4" }, - /* hw_compat_rhel_8_2 from hw_compat_4_2 */ - { "fw_cfg", "acpi-mr-restore", "false" }, -+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ -+ { "virtio-device", "use-disabled-flag", "false" }, - /* hw_compat_rhel_8_2 from hw_compat_5_0 */ - { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, - /* hw_compat_rhel_8_2 from hw_compat_5_0 */ -@@ -298,12 +300,12 @@ GlobalProperty hw_compat_4_2[] = { - { "qxl", "revision", "4" }, - { "qxl-vga", "revision", "4" }, - { "fw_cfg", "acpi-mr-restore", "false" }, -+ { "virtio-device", "use-disabled-flag", "false" }, - }; - const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2); - - GlobalProperty hw_compat_4_1[] = { - { "virtio-pci", "x-pcie-flr-init", "off" }, -- { "virtio-device", "use-disabled-flag", "false" }, - }; - const size_t hw_compat_4_1_len = G_N_ELEMENTS(hw_compat_4_1); - --- -2.27.0 - diff --git a/kvm-virtio-net-add-missing-object_unref.patch b/kvm-virtio-net-add-missing-object_unref.patch deleted file mode 100644 index efa7b44..0000000 --- a/kvm-virtio-net-add-missing-object_unref.patch +++ /dev/null @@ -1,66 +0,0 @@ -From d66b778c2f888507dedc3e4111006215dd394b95 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 25 Feb 2021 23:14:47 -0500 -Subject: [PATCH 50/54] virtio-net: add missing object_unref() - -RH-Author: Laurent Vivier -Message-id: <20210225231447.2187738-28-lvivier@redhat.com> -Patchwork-id: 101266 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH v2 27/27] virtio-net: add missing object_unref() -Bugzilla: 1819991 -RH-Acked-by: Juan Quintela -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Jens Freimann -RH-Acked-by: Michael S. Tsirkin - -BZ: https://bugzilla.redhat.com/1819991 -BRANCH: rhel-av-8.4.0 -UPSTREAM: Merged - -failover_add_primary() calls qdev_device_add() and doesn't unref -the device. Because of that, when the device is unplugged a reference -is remaining and prevents the cleanup of the object. - -This prevents to be able to plugin back the failover primary device, -with errors like: - - (qemu) device_add vfio-pci,host=0000:41:00.0,id=hostdev0,bus=root.3,failover_pair_id=net0 - (qemu) device_del hostdev0 - -We can check with "info qtree" and "info pci" that the device has been removed, and then: - - (qemu) device_add vfio-pci,host=0000:41:00.0,id=hostdev1,bus=root.3,failover_pair_id=net0 - Error: vfio 0000:41:00.0: device is already attached - (qemu) device_add vfio-pci,host=0000:41:00.0,id=hostdev0,bus=root.3,failover_pair_id=net0 - qemu-kvm: Duplicate ID 'hostdev0' for device - -Fixes: 21e8709b29cd ("failover: Remove primary_dev member") -Cc: quintela@redhat.com -Signed-off-by: Laurent Vivier -Message-Id: <20210212135250.2738750-3-lvivier@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jens Freimann -(cherry picked from commit 00e7b1299599384dfdda2a2a4570a0fb2d69eb6b) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula ---- - hw/net/virtio-net.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 044ac95f6f..7faaa829b6 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -869,6 +869,8 @@ static void failover_add_primary(VirtIONet *n, Error **errp) - dev = qdev_device_add(opts, &err); - if (err) { - qemu_opts_del(opts); -+ } else { -+ object_unref(OBJECT(dev)); - } - } else { - error_setg(errp, "Primary device not found"); --- -2.27.0 - diff --git a/kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch b/kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch deleted file mode 100644 index 72fe0a4..0000000 --- a/kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch +++ /dev/null @@ -1,224 +0,0 @@ -From 6a0564e81d5e329f955c4391809daf248f078481 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 4 Mar 2021 15:49:01 -0500 -Subject: [PATCH 4/4] virtiofs: drop remapped security.capability xattr as - needed - -RH-Author: Dr. David Alan Gilbert -Message-id: <20210304154901.47930-3-dgilbert@redhat.com> -Patchwork-id: 101305 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/2] virtiofs: drop remapped security.capability xattr as needed -Bugzilla: 1935071 -RH-Acked-by: Connor Kuehl -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Stefan Hajnoczi - -From: "Dr. David Alan Gilbert" - -On Linux, the 'security.capability' xattr holds a set of -capabilities that can change when an executable is run, giving -a limited form of privilege escalation to those programs that -the writer of the file deemed worthy. - -Any write causes the 'security.capability' xattr to be dropped, -stopping anyone from gaining privilege by modifying a blessed -file. - -Fuse relies on the daemon to do this dropping, and in turn the -daemon relies on the host kernel to drop the xattr for it. However, -with the addition of -o xattrmap, the xattr that the guest -stores its capabilities in is now not the same as the one that -the host kernel automatically clears. - -Where the mapping changes 'security.capability', explicitly clear -the remapped name to preserve the same behaviour. - -This bug is assigned CVE-2021-20263. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Vivek Goyal -(cherry picked from commit e586edcb410543768ef009eaa22a2d9dd4a53846) -Signed-off-by: Dr. David Alan Gilbert - Downstream slight context difference due to missing d64907ac FUSE_HANDLE_KILLPRIV_V2 -Signed-off-by: Danilo C. L. de Paula ---- - docs/tools/virtiofsd.rst | 4 ++ - tools/virtiofsd/passthrough_ll.c | 77 +++++++++++++++++++++++++++++++- - 2 files changed, 80 insertions(+), 1 deletion(-) - -diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst -index 5b3be8a6d6..6e0fc94005 100644 ---- a/docs/tools/virtiofsd.rst -+++ b/docs/tools/virtiofsd.rst -@@ -228,6 +228,10 @@ The 'map' type adds a number of separate rules to add **prepend** as a prefix - to the matched **key** (or all attributes if **key** is empty). - There may be at most one 'map' rule and it must be the last rule in the set. - -+Note: When the 'security.capability' xattr is remapped, the daemon has to do -+extra work to remove it during many operations, which the host kernel normally -+does itself. -+ - xattr-mapping Examples - ---------------------- - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index f06074d81f..9c33b0344b 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -160,6 +160,7 @@ struct lo_data { - int posix_lock; - int xattr; - char *xattrmap; -+ char *xattr_security_capability; - char *source; - char *modcaps; - double timeout; -@@ -226,6 +227,8 @@ static __thread bool cap_loaded = 0; - - static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, - uint64_t mnt_id); -+static int xattr_map_client(const struct lo_data *lo, const char *client_name, -+ char **out_name); - - static int is_dot_or_dotdot(const char *name) - { -@@ -365,6 +368,37 @@ out: - return ret; - } - -+/* -+ * The host kernel normally drops security.capability xattr's on -+ * any write, however if we're remapping xattr names we need to drop -+ * whatever the clients security.capability is actually stored as. -+ */ -+static int drop_security_capability(const struct lo_data *lo, int fd) -+{ -+ if (!lo->xattr_security_capability) { -+ /* We didn't remap the name, let the host kernel do it */ -+ return 0; -+ } -+ if (!fremovexattr(fd, lo->xattr_security_capability)) { -+ /* All good */ -+ return 0; -+ } -+ -+ switch (errno) { -+ case ENODATA: -+ /* Attribute didn't exist, that's fine */ -+ return 0; -+ -+ case ENOTSUP: -+ /* FS didn't support attribute anyway, also fine */ -+ return 0; -+ -+ default: -+ /* Hmm other error */ -+ return errno; -+ } -+} -+ - static void lo_map_init(struct lo_map *map) - { - map->elems = NULL; -@@ -718,6 +752,11 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; - gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; - -+ saverr = drop_security_capability(lo, ifd); -+ if (saverr) { -+ goto out_err; -+ } -+ - res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); - if (res == -1) { - saverr = errno; -@@ -737,6 +776,14 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } - } - -+ saverr = drop_security_capability(lo, truncfd); -+ if (saverr) { -+ if (!fi) { -+ close(truncfd); -+ } -+ goto out_err; -+ } -+ - res = ftruncate(truncfd, attr->st_size); - saverr = res == -1 ? errno : 0; - if (!fi) { -@@ -1727,6 +1774,13 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, - if (fd < 0) { - return -fd; - } -+ if (fi->flags & (O_TRUNC)) { -+ int err = drop_security_capability(lo, fd); -+ if (err) { -+ close(fd); -+ return err; -+ } -+ } - } - - pthread_mutex_lock(&lo->mutex); -@@ -2115,6 +2169,12 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, - "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, - out_buf.buf[0].size, (unsigned long)off); - -+ res = drop_security_capability(lo_data(req), out_buf.buf[0].fd); -+ if (res) { -+ fuse_reply_err(req, res); -+ return; -+ } -+ - /* - * If kill_priv is set, drop CAP_FSETID which should lead to kernel - * clearing setuid/setgid on file. -@@ -2354,6 +2414,7 @@ static void parse_xattrmap(struct lo_data *lo) - { - const char *map = lo->xattrmap; - const char *tmp; -+ int ret; - - lo->xattr_map_nentries = 0; - while (*map) { -@@ -2384,7 +2445,7 @@ static void parse_xattrmap(struct lo_data *lo) - * the last entry. - */ - parse_xattrmap_map(lo, map, sep); -- return; -+ break; - } else { - fuse_log(FUSE_LOG_ERR, - "%s: Unexpected type;" -@@ -2453,6 +2514,19 @@ static void parse_xattrmap(struct lo_data *lo) - fuse_log(FUSE_LOG_ERR, "Empty xattr map\n"); - exit(1); - } -+ -+ ret = xattr_map_client(lo, "security.capability", -+ &lo->xattr_security_capability); -+ if (ret) { -+ fuse_log(FUSE_LOG_ERR, "Failed to map security.capability: %s\n", -+ strerror(ret)); -+ exit(1); -+ } -+ if (!strcmp(lo->xattr_security_capability, "security.capability")) { -+ /* 1-1 mapping, don't need to do anything */ -+ free(lo->xattr_security_capability); -+ lo->xattr_security_capability = NULL; -+ } - } - - /* -@@ -3481,6 +3555,7 @@ static void fuse_lo_data_cleanup(struct lo_data *lo) - - free(lo->xattrmap); - free_xattrmap(lo); -+ free(lo->xattr_security_capability); - free(lo->source); - } - --- -2.27.0 - diff --git a/kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch b/kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch deleted file mode 100644 index 72e42ac..0000000 --- a/kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 22fe525a532619088a135c0f5f80bde12da68109 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 4 Mar 2021 15:49:00 -0500 -Subject: [PATCH 3/4] virtiofsd: Save error code early at the failure callsite - -RH-Author: Dr. David Alan Gilbert -Message-id: <20210304154901.47930-2-dgilbert@redhat.com> -Patchwork-id: 101304 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] virtiofsd: Save error code early at the failure callsite -Bugzilla: 1935071 -RH-Acked-by: Connor Kuehl -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Stefan Hajnoczi - -From: Vivek Goyal - -Change error code handling slightly in lo_setattr(). Right now we seem -to jump to out_err and assume that "errno" is valid and use that to -send reply. - -But if caller has to do some other operations before jumping to out_err, -then it does the dance of first saving errno to saverr and the restore -errno before jumping to out_err. This makes it more confusing. - -I am about to make more changes where caller will have to do some -work after error before jumping to out_err. I found it easier to -change the convention a bit. That is caller saves error in "saverr" -before jumping to out_err. And out_err uses "saverr" to send error -back and does not rely on "errno" having actual error. - -v3: Resolved conflicts in lo_setattr() due to lo_inode_open() changes. - -Signed-off-by: Vivek Goyal -Reviewed-by: Dr. David Alan Gilbert -Message-Id: <20210208224024.43555-2-vgoyal@redhat.com> -(cherry picked from commit 1e08f164e9fdc9528ad6990012301b9a04b0bc90) -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula ---- - tools/virtiofsd/passthrough_ll.c | 9 +++++---- - 1 file changed, 5 insertions(+), 4 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 03c5e0d13c..f06074d81f 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -710,6 +710,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); - } - if (res == -1) { -+ saverr = errno; - goto out_err; - } - } -@@ -719,6 +720,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - - res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); - if (res == -1) { -+ saverr = errno; - goto out_err; - } - } -@@ -730,16 +732,15 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - } else { - truncfd = lo_inode_open(lo, inode, O_RDWR); - if (truncfd < 0) { -- errno = -truncfd; -+ saverr = -truncfd; - goto out_err; - } - } - - res = ftruncate(truncfd, attr->st_size); -+ saverr = res == -1 ? errno : 0; - if (!fi) { -- saverr = errno; - close(truncfd); -- errno = saverr; - } - if (res == -1) { - goto out_err; -@@ -772,6 +773,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - res = utimensat(lo->proc_self_fd, procname, tv, 0); - } - if (res == -1) { -+ saverr = errno; - goto out_err; - } - } -@@ -780,7 +782,6 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - return lo_getattr(req, ino, fi); - - out_err: -- saverr = errno; - lo_inode_put(lo, &inode); - fuse_reply_err(req, saverr); - } --- -2.27.0 - diff --git a/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch b/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch deleted file mode 100644 index eb73a1c..0000000 --- a/kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 745a04765f21dad1991be89e23dd97a0543d3fce Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 10 Feb 2021 11:15:16 -0300 -Subject: [PATCH 05/54] virtiofsd: extract lo_do_open() from lo_open() - -RH-Author: Stefan Hajnoczi -Message-id: <20210210111518.228148-2-stefanha@redhat.com> -Patchwork-id: 101032 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/3] virtiofsd: extract lo_do_open() from lo_open() -Bugzilla: 1920740 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Greg Kurz -RH-Acked-by: Dr. David Alan Gilbert - -Both lo_open() and lo_create() have similar code to open a file. Extract -a common lo_do_open() function from lo_open() that will be used by -lo_create() in a later commit. - -Since lo_do_open() does not otherwise need fuse_req_t req, convert -lo_add_fd_mapping() to use struct lo_data *lo instead. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210204150208.367837-2-stefanha@redhat.com> -Reviewed-by: Greg Kurz -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 8afaaee976965b7fb90ec225a51d60f35c5f173c) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Eduardo Lima (Etrunko) ---- - tools/virtiofsd/passthrough_ll.c | 73 ++++++++++++++++++++------------ - 1 file changed, 46 insertions(+), 27 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 97485b22b4..218e20e9d7 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -471,17 +471,17 @@ static void lo_map_remove(struct lo_map *map, size_t key) - } - - /* Assumes lo->mutex is held */ --static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) -+static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) - { - struct lo_map_elem *elem; - -- elem = lo_map_alloc_elem(&lo_data(req)->fd_map); -+ elem = lo_map_alloc_elem(&lo->fd_map); - if (!elem) { - return -1; - } - - elem->fd = fd; -- return elem - lo_data(req)->fd_map.elems; -+ return elem - lo->fd_map.elems; - } - - /* Assumes lo->mutex is held */ -@@ -1661,6 +1661,38 @@ static void update_open_flags(int writeback, int allow_direct_io, - } - } - -+static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, -+ struct fuse_file_info *fi) -+{ -+ char buf[64]; -+ ssize_t fh; -+ int fd; -+ -+ update_open_flags(lo->writeback, lo->allow_direct_io, fi); -+ -+ sprintf(buf, "%i", inode->fd); -+ fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); -+ if (fd == -1) { -+ return errno; -+ } -+ -+ pthread_mutex_lock(&lo->mutex); -+ fh = lo_add_fd_mapping(lo, fd); -+ pthread_mutex_unlock(&lo->mutex); -+ if (fh == -1) { -+ close(fd); -+ return ENOMEM; -+ } -+ -+ fi->fh = fh; -+ if (lo->cache == CACHE_NONE) { -+ fi->direct_io = 1; -+ } else if (lo->cache == CACHE_ALWAYS) { -+ fi->keep_cache = 1; -+ } -+ return 0; -+} -+ - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode, struct fuse_file_info *fi) - { -@@ -1701,7 +1733,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - ssize_t fh; - - pthread_mutex_lock(&lo->mutex); -- fh = lo_add_fd_mapping(req, fd); -+ fh = lo_add_fd_mapping(lo, fd); - pthread_mutex_unlock(&lo->mutex); - if (fh == -1) { - close(fd); -@@ -1892,38 +1924,25 @@ static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, - - static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - { -- int fd; -- ssize_t fh; -- char buf[64]; - struct lo_data *lo = lo_data(req); -+ struct lo_inode *inode = lo_inode(req, ino); -+ int err; - - fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, - fi->flags); - -- update_open_flags(lo->writeback, lo->allow_direct_io, fi); -- -- sprintf(buf, "%i", lo_fd(req, ino)); -- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); -- if (fd == -1) { -- return (void)fuse_reply_err(req, errno); -- } -- -- pthread_mutex_lock(&lo->mutex); -- fh = lo_add_fd_mapping(req, fd); -- pthread_mutex_unlock(&lo->mutex); -- if (fh == -1) { -- close(fd); -- fuse_reply_err(req, ENOMEM); -+ if (!inode) { -+ fuse_reply_err(req, EBADF); - return; - } - -- fi->fh = fh; -- if (lo->cache == CACHE_NONE) { -- fi->direct_io = 1; -- } else if (lo->cache == CACHE_ALWAYS) { -- fi->keep_cache = 1; -+ err = lo_do_open(lo, inode, fi); -+ lo_inode_put(lo, &inode); -+ if (err) { -+ fuse_reply_err(req, err); -+ } else { -+ fuse_reply_open(req, fi); - } -- fuse_reply_open(req, fi); - } - - static void lo_release(fuse_req_t req, fuse_ino_t ino, --- -2.27.0 - diff --git a/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch b/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch deleted file mode 100644 index 95d8085..0000000 --- a/kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 24833a2db44e39ec7652779a0fa2e70983b9cb4e Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 10 Feb 2021 11:15:17 -0300 -Subject: [PATCH 06/54] virtiofsd: optionally return inode pointer from - lo_do_lookup() - -RH-Author: Stefan Hajnoczi -Message-id: <20210210111518.228148-3-stefanha@redhat.com> -Patchwork-id: 101033 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 2/3] virtiofsd: optionally return inode pointer from lo_do_lookup() -Bugzilla: 1920740 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Greg Kurz -RH-Acked-by: Dr. David Alan Gilbert - -lo_do_lookup() finds an existing inode or allocates a new one. It -increments nlookup so that the inode stays alive until the client -releases it. - -Existing callers don't need the struct lo_inode so the function doesn't -return it. Extend the function to optionally return the inode. The next -commit will need it. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Greg Kurz -Message-Id: <20210204150208.367837-3-stefanha@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 22d2ece71e533310da31f2857ebc4a00d91968b3) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Eduardo Lima (Etrunko) ---- - tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++-------- - 1 file changed, 21 insertions(+), 8 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 218e20e9d7..2bd050b620 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -843,11 +843,13 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname, - } - - /* -- * Increments nlookup and caller must release refcount using -- * lo_inode_put(&parent). -+ * Increments nlookup on the inode on success. unref_inode_lolocked() must be -+ * called eventually to decrement nlookup again. If inodep is non-NULL, the -+ * inode pointer is stored and the caller must call lo_inode_put(). - */ - static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, -- struct fuse_entry_param *e) -+ struct fuse_entry_param *e, -+ struct lo_inode **inodep) - { - int newfd; - int res; -@@ -857,6 +859,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_inode *inode = NULL; - struct lo_inode *dir = lo_inode(req, parent); - -+ if (inodep) { -+ *inodep = NULL; -+ } -+ - /* - * name_to_handle_at() and open_by_handle_at() can reach here with fuse - * mount point in guest, but we don't have its inode info in the -@@ -924,7 +930,14 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - pthread_mutex_unlock(&lo->mutex); - } - e->ino = inode->fuse_ino; -- lo_inode_put(lo, &inode); -+ -+ /* Transfer ownership of inode pointer to caller or drop it */ -+ if (inodep) { -+ *inodep = inode; -+ } else { -+ lo_inode_put(lo, &inode); -+ } -+ - lo_inode_put(lo, &dir); - - fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, -@@ -959,7 +972,7 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - return; - } - -- err = lo_do_lookup(req, parent, name, &e); -+ err = lo_do_lookup(req, parent, name, &e, NULL); - if (err) { - fuse_reply_err(req, err); - } else { -@@ -1067,7 +1080,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, - goto out; - } - -- saverr = lo_do_lookup(req, parent, name, &e); -+ saverr = lo_do_lookup(req, parent, name, &e, NULL); - if (saverr) { - goto out; - } -@@ -1544,7 +1557,7 @@ static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, - - if (plus) { - if (!is_dot_or_dotdot(name)) { -- err = lo_do_lookup(req, ino, name, &e); -+ err = lo_do_lookup(req, ino, name, &e, NULL); - if (err) { - goto error; - } -@@ -1742,7 +1755,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - } - - fi->fh = fh; -- err = lo_do_lookup(req, parent, name, &e); -+ err = lo_do_lookup(req, parent, name, &e, NULL); - } - if (lo->cache == CACHE_NONE) { - fi->direct_io = 1; --- -2.27.0 - diff --git a/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch b/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch deleted file mode 100644 index a4ded98..0000000 --- a/kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch +++ /dev/null @@ -1,311 +0,0 @@ -From 8cc13bdaa45cca3ef907cad9697683390aff2545 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 10 Feb 2021 11:15:18 -0300 -Subject: [PATCH 07/54] virtiofsd: prevent opening of special files - (CVE-2020-35517) - -RH-Author: Stefan Hajnoczi -Message-id: <20210210111518.228148-4-stefanha@redhat.com> -Patchwork-id: 101034 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 3/3] virtiofsd: prevent opening of special files (CVE-2020-35517) -Bugzilla: 1920740 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Greg Kurz -RH-Acked-by: Dr. David Alan Gilbert - -A well-behaved FUSE client does not attempt to open special files with -FUSE_OPEN because they are handled on the client side (e.g. device nodes -are handled by client-side device drivers). - -The check to prevent virtiofsd from opening special files is missing in -a few cases, most notably FUSE_OPEN. A malicious client can cause -virtiofsd to open a device node, potentially allowing the guest to -escape. This can be exploited by a modified guest device driver. It is -not exploitable from guest userspace since the guest kernel will handle -special files inside the guest instead of sending FUSE requests. - -This patch fixes this issue by introducing the lo_inode_open() function -to check the file type before opening it. This is a short-term solution -because it does not prevent a compromised virtiofsd process from opening -device nodes on the host. - -Restructure lo_create() to try O_CREAT | O_EXCL first. Note that O_CREAT -| O_EXCL does not follow symlinks, so O_NOFOLLOW masking is not -necessary here. If the file exists and the user did not specify O_EXCL, -open it via lo_do_open(). - -Reported-by: Alex Xu -Fixes: CVE-2020-35517 -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Vivek Goyal -Reviewed-by: Greg Kurz -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210204150208.367837-4-stefanha@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit a3fdbbc7f271bff7d53d0501b29d910ece0b3789) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Eduardo Lima (Etrunko) ---- - tools/virtiofsd/passthrough_ll.c | 144 ++++++++++++++++++++----------- - 1 file changed, 92 insertions(+), 52 deletions(-) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 2bd050b620..03c5e0d13c 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -567,6 +567,38 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino) - return fd; - } - -+/* -+ * Open a file descriptor for an inode. Returns -EBADF if the inode is not a -+ * regular file or a directory. -+ * -+ * Use this helper function instead of raw openat(2) to prevent security issues -+ * when a malicious client opens special files such as block device nodes. -+ * Symlink inodes are also rejected since symlinks must already have been -+ * traversed on the client side. -+ */ -+static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, -+ int open_flags) -+{ -+ g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); -+ int fd; -+ -+ if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { -+ return -EBADF; -+ } -+ -+ /* -+ * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier -+ * that the inode is not a special file but if an external process races -+ * with us then symlinks are traversed here. It is not possible to escape -+ * the shared directory since it is mounted as "/" though. -+ */ -+ fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); -+ if (fd < 0) { -+ return -errno; -+ } -+ return fd; -+} -+ - static void lo_init(void *userdata, struct fuse_conn_info *conn) - { - struct lo_data *lo = (struct lo_data *)userdata; -@@ -696,9 +728,9 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, - if (fi) { - truncfd = fd; - } else { -- sprintf(procname, "%i", ifd); -- truncfd = openat(lo->proc_self_fd, procname, O_RDWR); -+ truncfd = lo_inode_open(lo, inode, O_RDWR); - if (truncfd < 0) { -+ errno = -truncfd; - goto out_err; - } - } -@@ -860,7 +892,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, - struct lo_inode *dir = lo_inode(req, parent); - - if (inodep) { -- *inodep = NULL; -+ *inodep = NULL; /* in case there is an error */ - } - - /* -@@ -1674,19 +1706,26 @@ static void update_open_flags(int writeback, int allow_direct_io, - } - } - -+/* -+ * Open a regular file, set up an fd mapping, and fill out the struct -+ * fuse_file_info for it. If existing_fd is not negative, use that fd instead -+ * opening a new one. Takes ownership of existing_fd. -+ * -+ * Returns 0 on success or a positive errno. -+ */ - static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, -- struct fuse_file_info *fi) -+ int existing_fd, struct fuse_file_info *fi) - { -- char buf[64]; - ssize_t fh; -- int fd; -+ int fd = existing_fd; - - update_open_flags(lo->writeback, lo->allow_direct_io, fi); - -- sprintf(buf, "%i", inode->fd); -- fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); -- if (fd == -1) { -- return errno; -+ if (fd < 0) { -+ fd = lo_inode_open(lo, inode, fi->flags); -+ if (fd < 0) { -+ return -fd; -+ } - } - - pthread_mutex_lock(&lo->mutex); -@@ -1709,9 +1748,10 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, - static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - mode_t mode, struct fuse_file_info *fi) - { -- int fd; -+ int fd = -1; - struct lo_data *lo = lo_data(req); - struct lo_inode *parent_inode; -+ struct lo_inode *inode = NULL; - struct fuse_entry_param e; - int err; - struct lo_cred old = {}; -@@ -1737,36 +1777,38 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, - - update_open_flags(lo->writeback, lo->allow_direct_io, fi); - -- fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, -- mode); -+ /* Try to create a new file but don't open existing files */ -+ fd = openat(parent_inode->fd, name, fi->flags | O_CREAT | O_EXCL, mode); - err = fd == -1 ? errno : 0; -- lo_restore_cred(&old); - -- if (!err) { -- ssize_t fh; -+ lo_restore_cred(&old); - -- pthread_mutex_lock(&lo->mutex); -- fh = lo_add_fd_mapping(lo, fd); -- pthread_mutex_unlock(&lo->mutex); -- if (fh == -1) { -- close(fd); -- err = ENOMEM; -- goto out; -- } -+ /* Ignore the error if file exists and O_EXCL was not given */ -+ if (err && (err != EEXIST || (fi->flags & O_EXCL))) { -+ goto out; -+ } - -- fi->fh = fh; -- err = lo_do_lookup(req, parent, name, &e, NULL); -+ err = lo_do_lookup(req, parent, name, &e, &inode); -+ if (err) { -+ goto out; - } -- if (lo->cache == CACHE_NONE) { -- fi->direct_io = 1; -- } else if (lo->cache == CACHE_ALWAYS) { -- fi->keep_cache = 1; -+ -+ err = lo_do_open(lo, inode, fd, fi); -+ fd = -1; /* lo_do_open() takes ownership of fd */ -+ if (err) { -+ /* Undo lo_do_lookup() nlookup ref */ -+ unref_inode_lolocked(lo, inode, 1); - } - - out: -+ lo_inode_put(lo, &inode); - lo_inode_put(lo, &parent_inode); - - if (err) { -+ if (fd >= 0) { -+ close(fd); -+ } -+ - fuse_reply_err(req, err); - } else { - fuse_reply_create(req, &e, fi); -@@ -1780,7 +1822,6 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, - pid_t pid, int *err) - { - struct lo_inode_plock *plock; -- char procname[64]; - int fd; - - plock = -@@ -1797,12 +1838,10 @@ static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, - } - - /* Open another instance of file which can be used for ofd locks. */ -- sprintf(procname, "%i", inode->fd); -- - /* TODO: What if file is not writable? */ -- fd = openat(lo->proc_self_fd, procname, O_RDWR); -- if (fd == -1) { -- *err = errno; -+ fd = lo_inode_open(lo, inode, O_RDWR); -+ if (fd < 0) { -+ *err = -fd; - free(plock); - return NULL; - } -@@ -1949,7 +1988,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - return; - } - -- err = lo_do_open(lo, inode, fi); -+ err = lo_do_open(lo, inode, -1, fi); - lo_inode_put(lo, &inode); - if (err) { - fuse_reply_err(req, err); -@@ -2005,39 +2044,40 @@ static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) - static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, - struct fuse_file_info *fi) - { -+ struct lo_inode *inode = lo_inode(req, ino); -+ struct lo_data *lo = lo_data(req); - int res; - int fd; -- char *buf; - - fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, - (void *)fi); - -- if (!fi) { -- struct lo_data *lo = lo_data(req); -- -- res = asprintf(&buf, "%i", lo_fd(req, ino)); -- if (res == -1) { -- return (void)fuse_reply_err(req, errno); -- } -+ if (!inode) { -+ fuse_reply_err(req, EBADF); -+ return; -+ } - -- fd = openat(lo->proc_self_fd, buf, O_RDWR); -- free(buf); -- if (fd == -1) { -- return (void)fuse_reply_err(req, errno); -+ if (!fi) { -+ fd = lo_inode_open(lo, inode, O_RDWR); -+ if (fd < 0) { -+ res = -fd; -+ goto out; - } - } else { - fd = lo_fi_fd(req, fi); - } - - if (datasync) { -- res = fdatasync(fd); -+ res = fdatasync(fd) == -1 ? errno : 0; - } else { -- res = fsync(fd); -+ res = fsync(fd) == -1 ? errno : 0; - } - if (!fi) { - close(fd); - } -- fuse_reply_err(req, res == -1 ? errno : 0); -+out: -+ lo_inode_put(lo, &inode); -+ fuse_reply_err(req, res); - } - - static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, --- -2.27.0 - diff --git a/kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch b/kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch deleted file mode 100644 index f04d944..0000000 --- a/kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 5840880e2ed3747464242e0559a6cf7ec4e55a11 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 21 Jan 2021 17:43:53 -0500 -Subject: [PATCH 3/5] x86/cpu: Add AVX512_FP16 cpu feature - -RH-Author: plai@redhat.com -Message-id: <20210121174353.16032-1-plai@redhat.com> -Patchwork-id: 100758 -O-Subject: [RHEL8.4 AV qemu-kvm PATCH] x86/cpu: Add AVX512_FP16 cpu feature -Bugzilla: 1838738 -RH-Acked-by: Eduardo Habkost -RH-Acked-by: Michael S. Tsirkin -RH-Acked-by: Bandan Das - -From: Cathy Zhang - -BZ https://bugzilla.redhat.com/show_bug.cgi?id=1838738 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34299228 - x86 https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=34299267 -Branch rhel-av-8.4.0 - -Tested on intel-eaglestream-spr-01.khw1.lab.eng.bos.redhat.com. -All flags found as expected - avx512_vp2intersect, serialize, and avx512_fp16 -except: - tsxldtrk - -Cpuid reports CPUID.(7.0).EDX[16] isn't enabled on this CPU. -Leaf Subleaf EAX EBX ECX EDX -00000007 00000000: 00000001 .... f3bfbfef .... fa417f5e ^.A. ff8c4532 2E.. - -Already in rhel-av-8.4.0 (rebased to v5.2.0 ): - 353f98c9a x86/cpu: Enable AVX512_VP2INTERSECT cpu feature - 5dd13f2a5 target/i386: Add SERIALIZE cpu feature - b3c7344e3 target/i386: Enable TSX Suspend Load Address Tracking feature - -Signed-off-by: Eduardo Lima (Etrunko) ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 2 ++ - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index dc592e990e..f944b41573 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -977,7 +977,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "avx512-vp2intersect", NULL, "md-clear", NULL, - NULL, NULL, "serialize", NULL, - "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, -- NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, "avx512-fp16", - NULL, NULL, "spec-ctrl", "stibp", - NULL, "arch-capabilities", "core-capability", "ssbd", - }, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 88e8586f8f..a3db7e3c6c 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -783,6 +783,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_7_0_EDX_SERIALIZE (1U << 14) - /* TSX Suspend Load Address Tracking instruction */ - #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) -+/* AVX512_FP16 instruction */ -+#define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) - /* Speculation Control */ - #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) - /* Single Thread Indirect Branch Predictors */ --- -2.18.4 - diff --git a/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch b/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch deleted file mode 100644 index 68d2cd8..0000000 --- a/kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch +++ /dev/null @@ -1,91 +0,0 @@ -From ed5fe7ae48c263ff69602b55361806f896ed12fb Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 23 Feb 2021 15:18:10 -0500 -Subject: [PATCH 51/54] x86/cpu: Populate SVM CPUID feature bits - -RH-Author: Dr. David Alan Gilbert -Message-id: <20210223151811.27968-2-dgilbert@redhat.com> -Patchwork-id: 101197 -O-Subject: [RHEL-AV-8.4.0 qemu-kvm PATCH 1/2] x86/cpu: Populate SVM CPUID feature bits -Bugzilla: 1926785 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Sergio Lopez Pascual -RH-Acked-by: Peter Xu - -From: Wei Huang - -Newer AMD CPUs will add CPUID_0x8000000A_EDX[28] bit, which indicates -that SVM instructions (VMRUN/VMSAVE/VMLOAD) will trigger #VMEXIT before -CPU checking their EAX against reserved memory regions. This change will -allow the hypervisor to avoid intercepting #GP and emulating SVM -instructions. KVM turns on this CPUID bit for nested VMs. In order to -support it, let us populate this bit, along with other SVM feature bits, -in FEAT_SVM. - -Signed-off-by: Wei Huang -Message-Id: <20210126202456.589932-1-wei.huang2@amd.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 5447089c2b3b084b51670af36fc86ee3979e04be) -Signed-off-by: Danilo C. L. de Paula ---- - target/i386/cpu.c | 6 +++--- - target/i386/cpu.h | 24 ++++++++++++++---------- - 2 files changed, 17 insertions(+), 13 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index f944b41573..372cba2942 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -922,11 +922,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "npt", "lbrv", "svm-lock", "nrip-save", - "tsc-scale", "vmcb-clean", "flushbyasid", "decodeassists", - NULL, NULL, "pause-filter", NULL, -- "pfthreshold", NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ "pfthreshold", "avic", NULL, "v-vmsave-vmload", -+ "vgif", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -+ "svme-addr-chk", NULL, NULL, NULL, - }, - .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, - .tcg_features = TCG_SVM_FEATURES, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index a3db7e3c6c..4fdb552f93 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -672,16 +672,20 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - #define CPUID_EXT3_PERFCORE (1U << 23) - #define CPUID_EXT3_PERFNB (1U << 24) - --#define CPUID_SVM_NPT (1U << 0) --#define CPUID_SVM_LBRV (1U << 1) --#define CPUID_SVM_SVMLOCK (1U << 2) --#define CPUID_SVM_NRIPSAVE (1U << 3) --#define CPUID_SVM_TSCSCALE (1U << 4) --#define CPUID_SVM_VMCBCLEAN (1U << 5) --#define CPUID_SVM_FLUSHASID (1U << 6) --#define CPUID_SVM_DECODEASSIST (1U << 7) --#define CPUID_SVM_PAUSEFILTER (1U << 10) --#define CPUID_SVM_PFTHRESHOLD (1U << 12) -+#define CPUID_SVM_NPT (1U << 0) -+#define CPUID_SVM_LBRV (1U << 1) -+#define CPUID_SVM_SVMLOCK (1U << 2) -+#define CPUID_SVM_NRIPSAVE (1U << 3) -+#define CPUID_SVM_TSCSCALE (1U << 4) -+#define CPUID_SVM_VMCBCLEAN (1U << 5) -+#define CPUID_SVM_FLUSHASID (1U << 6) -+#define CPUID_SVM_DECODEASSIST (1U << 7) -+#define CPUID_SVM_PAUSEFILTER (1U << 10) -+#define CPUID_SVM_PFTHRESHOLD (1U << 12) -+#define CPUID_SVM_AVIC (1U << 13) -+#define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) -+#define CPUID_SVM_VGIF (1U << 16) -+#define CPUID_SVM_SVME_ADDR_CHK (1U << 28) - - /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ - #define CPUID_7_0_EBX_FSGSBASE (1U << 0) --- -2.27.0 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 91601d0..fa7f3bb 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,8 +69,8 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 5.2.0 -Release: 16%{?dist} +Version: 6.0.0 +Release: 1%{?rcversion}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -79,7 +79,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-5.2.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-6.0.0.tar.xz # KSM control scripts Source4: ksm.service @@ -107,223 +107,21 @@ Source35: udev-kvm-check.c Source36: README.tests -Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch -Patch0005: 0005-Initial-redhat-build.patch -Patch0006: 0006-Enable-disable-devices-for-RHEL.patch -Patch0007: 0007-Machine-type-related-general-changes.patch -Patch0008: 0008-Add-aarch64-machine-types.patch -Patch0009: 0009-Add-ppc64-machine-types.patch -Patch0010: 0010-Add-s390x-machine-types.patch -Patch0011: 0011-Add-x86_64-machine-types.patch -Patch0012: 0012-Enable-make-check.patch -Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0014: 0014-Add-support-statement-to-help-output.patch -Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch -Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0019: 0019-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -Patch0021: 0021-redhat-Define-hw_compat_8_3.patch -Patch0022: 0022-redhat-Add-spapr_machine_rhel_default_class_options.patch -Patch0023: 0023-redhat-Define-pseries-rhel8.4.0-machine-type.patch -Patch0024: 0024-redhat-s390x-add-rhel-8.4.0-compat-machine.patch -Patch0027: 0027-block-vpc-Make-vpc_open-read-the-full-dynamic-header.patch -Patch0028: 0028-GCC-11-warnings-hacks.patch -Patch0029: 0029-Disable-problematic-tests-for-initial-build.patch -Patch0030: 0030-Revert-GCC-11-warnings-hacks.patch -Patch0031: 0031-s390x-Use-strpadcpy-for-copying-vm-name.patch -Patch0032: 0032-tcg-Restrict-tcg_out_op-to-arrays-of-TCG_MAX_OP_ARGS.patch -Patch0033: 0033-net-eth-Simplify-_eth_get_rss_ex_dst_addr.patch -Patch0034: 0034-net-eth-Fix-stack-buffer-overflow-in.patch -Patch0035: 0035-block-nvme-Implement-fake-truncate-coroutine.patch -Patch0037: 0037-build-system-use-b_staticpic-false.patch -Patch0038: 0038-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch -Patch0039: 0039-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch -Patch0040: 0040-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch -Patch0041: 0041-AArch64-machine-types-cleanup.patch -Patch0042: 0042-hw-arm-virt-Add-8.4-Machine-type.patch -Patch0044: 0044-memory-Rename-memory_region_notify_one-to-memory_reg.patch -Patch0045: 0045-memory-Add-IOMMUTLBEvent.patch -Patch0046: 0046-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch -Patch0047: 0047-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch -Patch0048: 0048-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch -Patch0049: 0049-RHEL-Switch-pvpanic-test-to-q35.patch -Patch0050: 0050-8.4-x86-machine-type.patch -Patch0051: 0051-memory-clamp-cached-translation-in-case-it-points-to.patch -Patch0054: 0054-Drop-bogus-IPv6-messages.patch -# For bz#1914069 - [ppc64le] have this fix for rhel8.4 av (spapr: Allow memory unplug to always succeed) -Patch55: kvm-spapr-Allow-memory-unplug-to-always-succeed.patch -# For bz#1914069 - [ppc64le] have this fix for rhel8.4 av (spapr: Allow memory unplug to always succeed) -Patch56: kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch -# For bz#1838738 - [Intel 8.4 FEAT] qemu-kvm Sapphire Rapids (SPR) New Instructions (NIs) - Fast Train -Patch57: kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch -# For bz#1904268 - [RFE] [HPEMC] qemu-kvm: support up to 710 VCPUs -Patch58: kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch -# For bz#1922170 - Enable vfio-ccw in AV -Patch59: kvm-config-enable-VFIO_CCW.patch -# For bz#1854811 - scsi-bus.c: use-after-free due to race between device unplug and I/O operation causes guest crash -Patch60: kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch -# For bz#1907264 - systemtap: invalid or missing conversion specifier at the trace event vhost_vdpa_set_log_base -Patch61: kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch -# For bz#1834152 - [aarch64] QEMU SMMUv3 device: Support range invalidation -Patch63: kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch -# For bz#1925028 - vsmmuv3/vhost and virtio-iommu/vhost regression -Patch65: kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch -# For bz#1902537 - The default fsfreeze-hook path from man page and qemu-ga --help command are different -Patch66: kvm-docs-set-CONFDIR-when-running-sphinx.patch -# For bz#1903521 - hot unplug vhost-user cause qemu crash: qemu-kvm: ../softmmu/memory.c:2818: do_address_space_destroy: Assertion `QTAILQ_EMPTY(&as->listeners)' failed. -Patch67: kvm-virtio-Add-corresponding-memory_listener_unregister-.patch -# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" -# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() -Patch68: kvm-block-Honor-blk_set_aio_context-context-requirements.patch -# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" -# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() -Patch69: kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch -# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" -# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() -Patch70: kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch -# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" -# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() -Patch71: kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch -# For bz#1918966 - [incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'" -# For bz#1918968 - [incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all() -Patch72: kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch -# For bz#1887883 - qemu blocks client progress with various NBD actions -Patch73: kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch -# For bz#1887883 - qemu blocks client progress with various NBD actions -Patch74: kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch -# For bz#1887883 - qemu blocks client progress with various NBD actions -Patch75: kvm-nbd-make-nbd_read-return-EIO-on-error.patch -# For bz#1907255 - Migrate failed with vhost-vsock-pci from RHEL-AV 8.3.1 to RHEL-AV 8.2.1 -Patch76: kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch -# For bz#1920740 - CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0] -Patch77: kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch -# For bz#1920740 - CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0] -Patch78: kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch -# For bz#1920740 - CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0] -Patch79: kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch -# For bz#1920941 - [ppc64le] [AV]--disk cdimage.iso,bus=usb fails to boot -Patch80: kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch -# For bz#1917830 - Add romsize property to qemu-kvm -Patch81: kvm-pci-reject-too-large-ROMs.patch -# For bz#1917830 - Add romsize property to qemu-kvm -Patch82: kvm-pci-add-romsize-property.patch -# For bz#1917826 - Add extra device support to qemu-kvm, but not to rhel machine types -Patch83: kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch -# For bz#1880299 - vhost-user mq connection fails to restart after kill host testpmd which acts as vhost-user client -Patch84: kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch -# For bz#1901323 - QSD (QEMU Storage Daemon): basic support - TechPreview -Patch85: kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch -# For bz#1901323 - QSD (QEMU Storage Daemon): basic support - TechPreview -Patch86: kvm-docs-add-qemu-storage-daemon-1-man-page.patch -# For bz#1901323 - QSD (QEMU Storage Daemon): basic support - TechPreview -Patch87: kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch -# For bz#1901323 - QSD (QEMU Storage Daemon): basic support - TechPreview -Patch88: kvm-qemu-storage-daemon-Enable-object-add.patch -# For bz#1930033 - enable vhost-user-blk device [TechPreview] -Patch90: kvm-default-configs-Enable-vhost-user-blk.patch -# For bz#1925345 - qemu-nbd needs larger backlog for Unix socket listen() -Patch91: kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch -# For bz#1917654 - [failover vf migration][RHEL84 vm] After start a vm with a failover vf + a failover virtio net device, the failvoer vf do not exist in the vm -Patch92: kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch -# For bz#1930757 - Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping' -Patch93: kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch -# For bz#1930757 - Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping' -Patch94: kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch -# For bz#1930757 - Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping' -Patch95: kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch96: kvm-failover-fix-indentantion.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch97: kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch98: kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch99: kvm-failover-Remove-unused-parameter.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch100: kvm-failover-Remove-external-partially_hotplugged-proper.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch101: kvm-failover-qdev_device_add-returns-err-or-dev-set.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch102: kvm-failover-Rename-bool-to-failover_primary_hidden.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch103: kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch104: kvm-failover-Remove-primary_device_opts.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch105: kvm-failover-remove-standby_id-variable.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch106: kvm-failover-Remove-primary_device_dict.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch107: kvm-failover-Remove-memory-leak.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch108: kvm-failover-simplify-virtio_net_find_primary.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch109: kvm-failover-should_be_hidden-should-take-a-bool.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch110: kvm-failover-Rename-function-to-hide_device.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch111: kvm-failover-virtio_net_connect_failover_devices-does-no.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch112: kvm-failover-Rename-to-failover_find_primary_device.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch113: kvm-failover-simplify-qdev_device_add-failover-case.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch114: kvm-failover-simplify-qdev_device_add.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch115: kvm-failover-make-sure-that-id-always-exist.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch116: kvm-failover-remove-failover_find_primary_device-error-p.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch117: kvm-failover-split-failover_find_primary_device_id.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch118: kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch119: kvm-failover-Caller-of-this-two-functions-already-have-p.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch120: kvm-failover-simplify-failover_unplug_primary.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch121: kvm-failover-Remove-primary_dev-member.patch -# For bz#1819991 - Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug -Patch122: kvm-virtio-net-add-missing-object_unref.patch -# For bz#1926785 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train -Patch123: kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch -# For bz#1926785 - [RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train -Patch124: kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch -# For bz#1932190 - Timeout when dump the screen from 2nd VGA -Patch125: kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch -# For bz#1932190 - Timeout when dump the screen from 2nd VGA -Patch126: kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch -# For bz#1935071 - CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8] -Patch127: kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch -# For bz#1935071 - CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8] -Patch128: kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch -# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning -Patch129: kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch -# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning -Patch130: kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch -# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning -Patch131: kvm-scsi-introduce-scsi_sense_from_errno.patch -# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning -Patch132: kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch -# For bz#1927530 - RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning -Patch133: kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch -# For bz#1936948 - CVE-2021-20221 virt:av/qemu-kvm: qemu: out-of-bound heap buffer access via an interrupt ID field [rhel-av-8.4.0] -Patch134: kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch -# For bz#1934158 - Windows guest looses network connectivity when NIC was configured with static IP -Patch135: kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch -# For bz#1937004 - vhost-user-blk server endianness and input validation fixes -Patch136: kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch -# For bz#1937004 - vhost-user-blk server endianness and input validation fixes -Patch137: kvm-block-export-fix-blk_size-double-byteswap.patch -# For bz#1937004 - vhost-user-blk server endianness and input validation fixes -Patch138: kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch -# For bz#1937004 - vhost-user-blk server endianness and input validation fixes -Patch139: kvm-block-export-fix-vhost-user-blk-export-sector-number.patch -# For bz#1937004 - vhost-user-blk server endianness and input validation fixes -Patch140: kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch -# For bz#1937004 - vhost-user-blk server endianness and input validation fixes -Patch141: kvm-block-export-port-virtio-blk-read-write-range-check.patch +Patch0004: 0004-Initial-redhat-build.patch +Patch0005: 0005-Enable-disable-devices-for-RHEL.patch +Patch0006: 0006-Machine-type-related-general-changes.patch +Patch0007: 0007-Add-aarch64-machine-types.patch +Patch0008: 0008-Add-ppc64-machine-types.patch +Patch0009: 0009-Add-s390x-machine-types.patch +Patch0010: 0010-Add-x86_64-machine-types.patch +Patch0011: 0011-Enable-make-check.patch +Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0013: 0013-Add-support-statement-to-help-output.patch +Patch0014: 0014-globally-limit-the-maximum-number-of-CPUs.patch +Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0016: 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0018: 0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch BuildRequires: wget BuildRequires: rpm-build @@ -432,6 +230,7 @@ BuildRequires: pkgconfig(gbm) %endif BuildRequires: perl-Test-Harness +BuildRequires: libslirp-devel Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} @@ -479,15 +278,6 @@ emulation for the KVM hypervisor. qemu-kvm acts as a virtual machine monitor together with the KVM kernel modules, and emulates the hardware for a full system such as a PC and its associated peripherals. -%package -n qemu-kiwi -Summary: qemu-kiwi components -Requires: qemu-kvm-common = %{epoch}:%{version}-%{release} - -%description -n qemu-kiwi -qemu-kiwi is a version of qemu-kvm with a restricted set of features -intended for use by specific applications. -It's experimental and unsupported. - %package -n qemu-kvm-docs Summary: qemu-kvm documentation @@ -635,29 +425,17 @@ This package provides opengl support. %prep -%setup -n qemu-%{version}%{?rcversion} -# Remove slirp content in scratchbuilds because it's being applyed as a patch -rm -fr slirp -mkdir slirp +%if 0%{?rcversion} +%setup -n qemu-%{version}-%{?rcversion} +%else +%setup -n qemu-%{version} +%endif %autopatch -p1 %global qemu_kvm_build qemu_kvm_build -%global qemu_kiwi_build qemu_kiwi_src/build - -%ifnarch %{power64} -# XXX: ugly hack to copy source tree into a new folder. -# it allows to build qemu-kiwi without touching the original source tree. -# This is required as the build isolation is not 100% as we also have to -# change the source tree when building qemu-kiwi. And, when we do that, -# calling "make check" on qemu-kvm see that change and behaves baddly. -# Newer version of qemu allow us to create a better sollution, and this -# hack can be dropped. -cp -fpr . ../qemu_kiwi_src -mv ../qemu_kiwi_src ./qemu_kiwi_src -mkdir -p %{qemu_kiwi_build} -%endif mkdir -p %{qemu_kvm_build} + %build %global buildarch %{kvm_target}-softmmu @@ -682,6 +460,8 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-bzip2 \\\ --disable-cap-ng \\\ --disable-capstone \\\ + --disable-cfi \\\ + --disable-cfi-debug \\\ --disable-cloop \\\ --disable-cocoa \\\ --disable-coroutine-pool \\\ @@ -694,8 +474,10 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-dmg \\\ --disable-docs \\\ --disable-fdt \\\ + --disable-fuse \\\ + --disable-fuse-lseek \\\ --disable-gcrypt \\\ - --disable-git-update \\\ + --disable-gio \\\ --disable-glusterfs \\\ --disable-gnutls \\\ --disable-gtk \\\ @@ -718,6 +500,7 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-linux-io-uring \\\ --disable-linux-user \\\ --disable-live-block-migration \\\ + --disable-lto \\\ --disable-lzfse \\\ --disable-lzo \\\ --disable-malloc-trim \\\ @@ -725,6 +508,7 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-modules \\\ --disable-module-upgrades \\\ --disable-mpath \\\ + --disable-multiprocess \\\ --disable-netmap \\\ --disable-nettle \\\ --disable-numa \\\ @@ -745,6 +529,7 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-sdl-image \\\ --disable-seccomp \\\ --disable-sheepdog \\\ + --disable-slirp \\\ --disable-smartcard \\\ --disable-snappy \\\ --disable-sparse \\\ @@ -783,7 +568,8 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-xen-pci-passthrough \\\ --disable-xfsctl \\\ --disable-xkbcommon \\\ - --disable-zstd + --disable-zstd \\\ + --with-git-submodules=ignore pushd %{qemu_kvm_build} ../configure \ @@ -854,6 +640,7 @@ pushd %{qemu_kvm_build} --enable-rdma \ %endif --enable-seccomp \ + --enable-slirp=system \ --enable-snappy \ %if 0%{have_spice} --enable-smartcard \ @@ -907,117 +694,25 @@ make V=1 %{?_smp_mflags} $buildldflags # Setup back compat qemu-kvm binary %{__python3} scripts/tracetool.py --backend dtrace --format stap \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ - trace/trace-events-all > qemu-kvm.stp + trace/trace-events-all qemu-kvm.stp %{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ - trace/trace-events-all > qemu-kvm-log.stp + trace/trace-events-all qemu-kvm-log.stp %{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ - trace/trace-events-all > qemu-kvm-simpletrace.stp + trace/trace-events-all qemu-kvm-simpletrace.stp cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check - -popd -echo "Starting qemu-kiwi build" - -pushd %{qemu_kiwi_build} -# XXX: removing QXL and CONFIG_TPM.* mak configuration, -# which causes problem with the config options used by qemu-kiwi. -# Ideally we should be able to do this at configure time. -find ../default-configs -name "*-rh-devices.mak" \ - -exec sed -i '/CONFIG_QXL=/d' {} \; -find ../default-configs -name "*-rh-devices.mak" \ - -exec sed -i '/CONFIG_TPM.*=/d' {} \; - -../configure \ - --prefix="%{_prefix}" \ - --libdir="%{_libdir}" \ - --sysconfdir="%{_sysconfdir}" \ - --interp-prefix=%{_prefix}/qemu-%M \ - --localstatedir="%{_localstatedir}" \ - --libexecdir="%{_libexecdir}" \ - --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ - --extra-cflags="%{optflags}" \ - --with-pkgversion="%{name}-%{version}-%{release}" \ - --with-suffix="%{name}" \ - --firmwarepath=%{_prefix}/share/qemu-firmware \ - --meson="%{__meson}" \ - --target-list="%{buildarch}" \ - --block-drv-rw-whitelist=%{block_drivers_list} \ - --audio-drv-list= \ - --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ - --with-coroutine=ucontext \ - --with-git=git \ - --tls-priority=@QEMU,SYSTEM \ - %{disable_everything} \ - --enable-attr \ -%ifarch %{ix86} x86_64 - --enable-avx2 \ -%endif - --enable-cap-ng \ - --enable-coroutine-pool \ - --enable-debug-info \ -%if 0%{have_fdt} - --enable-fdt \ -%endif - --enable-kvm \ -%ifarch x86_64 - --enable-libpmem \ -%endif - --enable-linux-aio \ - --enable-libudev \ - --enable-malloc-trim \ - --enable-mpath \ -%ifnarch s390x - --enable-numa \ -%endif - --enable-pie \ - --enable-seccomp \ - --enable-system \ - --enable-tcg \ - --enable-trace-backend=dtrace \ - --enable-vhost-kernel \ - --enable-vhost-net \ - --enable-vhost-user \ - --enable-vhost-user-blk-server \ - --enable-vhost-vdpa \ - --enable-vhost-vsock \ - --enable-werror \ - --enable-xkbcommon \ - --without-default-devices - - -echo "qemu-kiki config-host.mak contents:" -echo "===" -cat config-host.mak -echo "===" - -make V=1 %{?_smp_mflags} $buildldflags - -%{__python3} scripts/tracetool.py --backend dtrace --format stap \ - --group=all --binary %{_libexecdir}/qemu-kiwi --probe-prefix qemu.kvm \ - trace/trace-events-all > qemu-kiwi.stp - -%{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ - --group=all --binary %{_libexecdir}/qemu-kiwi --probe-prefix qemu.kvm \ - trace/trace-events-all > qemu-kiwi-log.stp - -%{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ - --group=all --binary %{_libexecdir}/qemu-kiwi --probe-prefix qemu.kvm \ - trace/trace-events-all > qemu-kiwi-simpletrace.stp - -cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kiwi %endif popd %install pushd %{qemu_kvm_build} - %define _udevdir %(pkg-config --variable=udevdir udev) %define _udevrulesdir %{_udevdir}/rules.d @@ -1218,11 +913,16 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/npcm7xx_bootrom.bin rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so +# Remove virtfs-proxy-helper files +rm -rf ${RPM_BUILD_ROOT}%{_libexecdir}/virtfs-proxy-helper +rm -rf ${RPM_BUILD_ROOT}%{_mandir}/man1/virtfs-proxy-helper* + %ifarch s390x # Use the s390-ccw.img that we've just built, not the pre-built one install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ %else rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img + rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so %endif %ifnarch x86_64 @@ -1316,17 +1016,11 @@ rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/system/.buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/tools/.buildinfo rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/devel/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/.buildinfo # Remove spec rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs - -popd - -pushd %{qemu_kiwi_build} -install -m 0755 %{kvm_target}-softmmu/qemu-system-%{kvm_target} $RPM_BUILD_ROOT%{_libexecdir}/qemu-kiwi -install -m 0644 qemu-kiwi.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ -install -m 0644 qemu-kiwi-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ -install -m 0644 qemu-kiwi-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ popd %check @@ -1335,11 +1029,6 @@ echo "Testing qemu-kvm-build" export DIFF=diff; make check V=1 popd -echo "Testing qemu-kiwi" -pushd %{qemu_kiwi_build} -export DIFF=diff; make check V=1 -popd - %post -n qemu-kvm-common %systemd_post ksm.service %systemd_post ksmtuned.service @@ -1389,6 +1078,10 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %files -n qemu-kvm-docs %defattr(-,root,root) %dir %{qemudocdir} +%doc %{qemudocdir}/genindex.html +%doc %{qemudocdir}/search.html +%doc %{qemudocdir}/objects.inv +%doc %{qemudocdir}/searchindex.js %doc %{qemudocdir}/README.rst %doc %{qemudocdir}/COPYING %doc %{qemudocdir}/COPYING.LIB @@ -1400,6 +1093,8 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %doc %{qemudocdir}/system/* %doc %{qemudocdir}/tools/* %doc %{qemudocdir}/user/* +%doc %{qemudocdir}/devel/* +%doc %{qemudocdir}/_static/* %files -n qemu-kvm-common %defattr(-,root,root) @@ -1498,16 +1193,11 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_libdir}/qemu-kvm/hw-usb-redirect.so %endif %{_libdir}/qemu-kvm/hw-display-virtio-gpu.so -%ifnarch s390x +%ifarch s390x + %{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so +%else %{_libdir}/qemu-kvm/hw-display-virtio-gpu-pci.so %endif - -%files -n qemu-kiwi -%defattr(-,root,root) -%{_libexecdir}/qemu-kiwi -%{_datadir}/systemtap/tapset/qemu-kiwi.stp -%{_datadir}/systemtap/tapset/qemu-kiwi-log.stp -%{_datadir}/systemtap/tapset/qemu-kiwi-simpletrace.stp %endif %files -n qemu-img @@ -1574,6 +1264,10 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %changelog +* Fri Apr 30 2021 Miroslav Rezanina - 6.0.0-1 +- Rebase to QEMU 6.0 +- Resolves: bz#1872569 + * Mon Apr 26 2021 Miroslav Rezanina - 5.2.0-16 - kvm-Limit-build-on-Power-to-qemu-img-and-qemu-ga-only.patch [bz#1944056] - Resolves: bz#1944056 @@ -1723,8 +1417,7 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : - Resolves: bz#1918968 ([incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all()) - -* Fri Feb 12 2021 Miroslav Rezanina - 5.2.0-6.el8 +* Tue Feb 09 2021 Eduardo Lima (Etrunko) - 5.2.0-6.el8 - kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch [bz#1854811] - kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch [bz#1907264] - kvm-redhat-moving-all-documentation-files-to-qemu-kvm-do.patch [bz#1881170 bz#1924766] diff --git a/rpminspect.yaml b/rpminspect.yaml new file mode 100644 index 0000000..3b74418 --- /dev/null +++ b/rpminspect.yaml @@ -0,0 +1,5 @@ +--- +elf: + exclude_path:(^/usr/share/qemu-kvm/s390-ccw.img$)|(^/usr/share/qemu-kvm/s390-netboot.img$) +inspections: + badfuncs: off diff --git a/sources b/sources index 6a86af7..c713614 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-5.2.0.tar.xz) = bddd633ce111471ebc651e03080251515178808556b49a308a724909e55dac0be0cc0c79c536ac12d239678ae94c60100dc124be9b9d9538340c03a2f27177f3 +SHA512 (qemu-6.0.0.tar.xz) = ee3ff00aebec4d8891d2ff6dabe4e667e510b2a4fe3f6190aa34673a91ea32dcd2db2e9bf94c2f1bf05aa79788f17cfbbedc6027c0988ea08a92587b79ee05e4 From 93c38bfc163d3efac9f32834894610cb2c5893dd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 13 May 2021 07:49:49 +0200 Subject: [PATCH 119/195] * Thu May 13 2021 Miroslav Rezanina - 6.0.0-2 - kvm-Remove-message-with-running-VM-count.patch [bz#1914461] - kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch [bz#1906168] - kvm-spec-file-build-qemu-kvm-without-SPICE-and-QXL.patch [bz#1906168] - kvm-spec-file-Obsolete-qemu-kvm-ui-spice.patch [bz#1906168] - Resolves: bz#1914461 (Remove KVM guest count and limit info message) - Resolves: bz#1906168 ([RHEL-9] qemu-kvm spec-file: Do not BuildRequire spice) --- 81-kvm-rhel.rules | 1 - ...E-and-QXL-from-x86_64-rh-devices.mak.patch | 43 +++++ qemu-kvm.spec | 69 +++----- rpminspect.yaml | 3 +- udev-kvm-check.c | 155 ------------------ 5 files changed, 64 insertions(+), 207 deletions(-) delete mode 100644 81-kvm-rhel.rules create mode 100644 kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch delete mode 100644 udev-kvm-check.c diff --git a/81-kvm-rhel.rules b/81-kvm-rhel.rules deleted file mode 100644 index 787cad6..0000000 --- a/81-kvm-rhel.rules +++ /dev/null @@ -1 +0,0 @@ -DEVPATH=="*/kvm", ACTION=="change", RUN+="/lib/udev/udev-kvm-check $env{COUNT} $env{EVENT}" diff --git a/kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch b/kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch new file mode 100644 index 0000000..7fdb392 --- /dev/null +++ b/kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch @@ -0,0 +1,43 @@ +From c51bf45304812b4da010bdd3db1b1d59c44af4ee Mon Sep 17 00:00:00 2001 +From: Uri Lublin +Date: Tue, 27 Apr 2021 18:37:09 +0300 +Subject: [PATCH 2/4] Remove SPICE and QXL from x86_64-rh-devices.mak +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Uri Lublin (uril) +RH-MergeRequest: 3: Build qemu-kvm without SPICE and QXL +RH-Commit: [1/3] 8f6186049d22c3a6e1bfb1cf2bfe88a8fc8c2271 +RH-Bugzilla: 1906168 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Danilo de Paula +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Daniel P. Berrangé +--- + default-configs/devices/x86_64-rh-devices.mak | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak +index 9f41400530..d09c138fc6 100644 +--- a/default-configs/devices/x86_64-rh-devices.mak ++++ b/default-configs/devices/x86_64-rh-devices.mak +@@ -60,7 +60,6 @@ CONFIG_PFLASH_CFI01=y + CONFIG_PVPANIC_ISA=y + CONFIG_PXB=y + CONFIG_Q35=y +-CONFIG_QXL=y + CONFIG_RTL8139_PCI=y + CONFIG_SCSI=y + CONFIG_SERIAL=y +@@ -70,7 +69,6 @@ CONFIG_SEV=y + CONFIG_SGA=y + CONFIG_SMBIOS=y + CONFIG_SMBUS_EEPROM=y +-CONFIG_SPICE=y + CONFIG_TEST_DEVICES=y + CONFIG_USB=y + CONFIG_USB_EHCI=y +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index fa7f3bb..3f01cf8 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -2,7 +2,6 @@ %global SLOF_gittagcommit 899d9883 %global have_usbredir 1 -%global have_spice 1 %global have_opengl 1 %global have_fdt 0 %global have_gluster 1 @@ -27,7 +26,6 @@ %ifarch x86_64 %global kvm_target x86_64 %else - %global have_spice 0 %global have_opengl 0 %global have_gluster 0 %endif @@ -53,9 +51,6 @@ #Versions of various parts: %global requires_all_modules \ -%if %{have_spice} \ -Requires: %{name}-ui-spice = %{epoch}:%{version}-%{release} \ -%endif \ %if %{have_opengl} \ Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ %endif \ @@ -70,7 +65,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 1%{?rcversion}%{?dist} +Release: 2%{?rcversion}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -102,8 +97,6 @@ Source30: kvm-s390x.conf Source31: kvm-x86.conf Source32: qemu-pr-helper.service Source33: qemu-pr-helper.socket -Source34: 81-kvm-rhel.rules -Source35: udev-kvm-check.c Source36: README.tests @@ -122,6 +115,8 @@ Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0016: 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0018: 0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +# For bz#1906168 - [RHEL-9] qemu-kvm spec-file: Do not BuildRequire spice +Patch19: kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch BuildRequires: wget BuildRequires: rpm-build @@ -146,13 +141,6 @@ BuildRequires: usbredir-devel >= 0.7.1 %endif BuildRequires: texinfo BuildRequires: python3-sphinx -%if %{have_spice} -BuildRequires: spice-protocol >= 0.12.12 -BuildRequires: spice-server-devel >= 0.12.8 -BuildRequires: libcacard-devel -# For smartcard NSS support -BuildRequires: nss-devel -%endif BuildRequires: libseccomp-devel >= 2.4.0 # For network block driver BuildRequires: libcurl-devel @@ -272,6 +260,12 @@ Requires: usbredir >= 0.7.1 Requires: libfdt >= 1.6.0 %endif +# Since SPICE is removed from RHEL-9, the following Obsoletes: +# removes qemu-kvm-ui-spice for upgrades from RHEL-8 +# The "<= {version}" assumes RHEL-9 version >= RHEL-8 version (in +# other words RHEL-9 rebases are done together/before RHEL-8 ones) +Obsoletes: qemu-kvm-ui-spice <= %{version} + %description -n qemu-kvm-core qemu-kvm is an open source virtualizer that provides hardware emulation for the KVM hypervisor. qemu-kvm acts as a virtual @@ -398,19 +392,6 @@ Install this package if you want to access remote disks using the Secure Shell (SSH) protocol. -%if %{have_spice} -%package ui-spice -Summary: QEMU spice support -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%if %{have_opengl} -Requires: %{name}-ui-opengl%{?_isa} = %{epoch}:%{version}-%{release} -%endif - -%description ui-spice -This package provides spice support. -%endif - - %if %{have_opengl} %package ui-opengl Summary: QEMU opengl support @@ -642,10 +623,6 @@ pushd %{qemu_kvm_build} --enable-seccomp \ --enable-slirp=system \ --enable-snappy \ -%if 0%{have_spice} - --enable-smartcard \ - --enable-spice \ -%endif --enable-system \ --enable-tcg \ --enable-tools \ @@ -707,7 +684,6 @@ make V=1 %{?_smp_mflags} $buildldflags cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl -gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check %endif popd @@ -761,8 +737,6 @@ mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/acceptance mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp -install -p -m 0755 udev-kvm-check $RPM_BUILD_ROOT%{_udevdir} -install -p -m 0644 %{SOURCE34} $RPM_BUILD_ROOT%{_udevrulesdir} install -m 0644 scripts/dump-guest-memory.py \ $RPM_BUILD_ROOT%{_datadir}/%{name} @@ -911,8 +885,6 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/npcm7xx_bootrom.bin -rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so - # Remove virtfs-proxy-helper files rm -rf ${RPM_BUILD_ROOT}%{_libexecdir}/virtfs-proxy-helper rm -rf ${RPM_BUILD_ROOT}%{_mandir}/man1/virtfs-proxy-helper* @@ -1122,8 +1094,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %config(noreplace) %{_sysconfdir}/sysconfig/ksm %{_unitdir}/ksmtuned.service %{_sbindir}/ksmtuned -%{_udevdir}/udev-kvm-check -%{_udevrulesdir}/81-kvm-rhel.rules %ghost %{_sysconfdir}/kvm %config(noreplace) %{_sysconfdir}/ksmtuned.conf %dir %{_sysconfdir}/%{name} @@ -1245,17 +1215,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %files block-ssh %{_libdir}/qemu-kvm/block-ssh.so -%if 0%{have_spice} -%files ui-spice - %{_libdir}/qemu-kvm/hw-usb-smartcard.so - %{_libdir}/qemu-kvm/audio-spice.so - %{_libdir}/qemu-kvm/ui-spice-core.so - %{_libdir}/qemu-kvm/chardev-spice.so -%ifarch x86_64 - %{_libdir}/qemu-kvm/hw-display-qxl.so -%endif -%endif - %if 0%{have_opengl} %files ui-opengl %{_libdir}/qemu-kvm/ui-egl-headless.so @@ -1264,6 +1223,16 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %changelog +* Thu May 13 2021 Miroslav Rezanina - 6.0.0-2 +- kvm-Remove-message-with-running-VM-count.patch [bz#1914461] +- kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch [bz#1906168] +- kvm-spec-file-build-qemu-kvm-without-SPICE-and-QXL.patch [bz#1906168] +- kvm-spec-file-Obsolete-qemu-kvm-ui-spice.patch [bz#1906168] +- Resolves: bz#1914461 + (Remove KVM guest count and limit info message) +- Resolves: bz#1906168 + ([RHEL-9] qemu-kvm spec-file: Do not BuildRequire spice) + * Fri Apr 30 2021 Miroslav Rezanina - 6.0.0-1 - Rebase to QEMU 6.0 - Resolves: bz#1872569 diff --git a/rpminspect.yaml b/rpminspect.yaml index 3b74418..51de3b4 100644 --- a/rpminspect.yaml +++ b/rpminspect.yaml @@ -1,5 +1,6 @@ --- elf: - exclude_path:(^/usr/share/qemu-kvm/s390-ccw.img$)|(^/usr/share/qemu-kvm/s390-netboot.img$) + exclude_path:(.*s390-ccw.img.*)|(.*s390-netboot.img.*) inspections: badfuncs: off + diff --git a/udev-kvm-check.c b/udev-kvm-check.c deleted file mode 100644 index 928b9de..0000000 --- a/udev-kvm-check.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * udev-kvm-check.c - * - * Copyright 2018 Red Hat, Inc. - * - * This is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - */ - -#include -#include -#include -#include -#include - -#define DEFAULT 0 -#define FACILITY "kvm" -#define SYSCONFIG_KVM "/etc/sysconfig/kvm" - -#define COUNT_MSG \ - "%d %s now active" - -int get_threshold_from_file(FILE *fp) -{ - static const char key[] = "THRESHOLD="; - int pos = 0; - int thres; - int ch; - -start: - /* State START - at beginning of line, search for beginning of "THRESHOLD=" - * string. - */ - ch = getc(fp); - if (ch == EOF) { - return DEFAULT; - } - if (isspace(ch)) { - goto start; - } - if (ch == 'T') { - pos = 1; - goto key; - } - goto eol; - -eol: - /* State EOL - loop until end of line */ - ch = getc(fp); - if (ch == EOF) { - return DEFAULT; - } - if (ch == '\n') { - goto start; - } - goto eol; - -key: - /* State KEY - match "THRESHOLD=" string, go to THRESHOLD if found */ - ch = getc(fp); - if (ch == EOF) { - return DEFAULT; - } - if (ch == key[pos]) { - pos++; - if (key[pos] == 0) { - goto threshold; - } else { - goto key; - } - } - goto eol; - -threshold: - /* State THRESHOLD - parse number using fscanf, expect comment or space - * or EOL. - */ - ch = getc(fp); - if (ch == EOF) { - return DEFAULT; - } - if (!isdigit(ch)) { - goto eol; - } - ungetc(ch, fp); - if (fscanf(fp, "%d", &thres) != 1) { - return DEFAULT; - } - ch = getc(fp); - if (ch == '#' || ch == EOF || ch == '\n' || isspace(ch)) { - return thres; - } - goto eol; -} - -int get_threshold() -{ - FILE *fp = fopen(SYSCONFIG_KVM, "r"); - int val; - - if (!fp) { - return DEFAULT; - } - - val = get_threshold_from_file(fp); - fclose (fp); - return val; -} - -const char *guest(int count) -{ - return (count == 1 ? "guest" : "guests"); -} - -void emit_count_message(int count) -{ - openlog(FACILITY, LOG_CONS, LOG_USER); - syslog(LOG_INFO, COUNT_MSG, count, guest(count)); - closelog(); -} - -int main(int argc, char **argv) -{ - int count, threshold; - - if (argc < 3) - exit(1); - - count = atoi(argv[1]); - threshold = get_threshold(); - - if (!strcmp(argv[2], "create")) { - if (threshold == 0 || count > threshold) { - emit_count_message(count); - } - } else { - if (count >= threshold) { - emit_count_message(count); - } - } - - return 0; -} From 7abe74f2acd9b04aba0dcd101271f4a0874123c0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 22 Apr 2021 13:57:41 +0200 Subject: [PATCH 120/195] Add README file Resolves: rhbz#1948979 --- README.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 README.rst diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..4ca14c6 --- /dev/null +++ b/README.rst @@ -0,0 +1,18 @@ +=================== +qemu-kvm development +=================== + +qemu-kvm is maintained in a `source tree`_ rather than directly in dist-git +using packit service that provides way to develope using regular source code +structure and provides way to generate SRPM and build using koji service. + +Developers deliver all changes to source-git using merge request. Only maintainers +will be pushing changes sent to source-git to dist-git. + +Each release in dist-git is tagged in the source repository so you can easily +check out the source tree for a build. The tags are in the format +name-version-release, but note release doesn't contain the dist tag since the +source can be built in different build roots (Fedora, CentOS, etc.) + +.. _source tree: https://gitlab.com/redhat/centos-stream/src/qemu-kvm + From bcf0166539acf5a9f715ff1a42438a84ea3399e6 Mon Sep 17 00:00:00 2001 From: Yanan Fu Date: Tue, 18 May 2021 15:38:31 +0800 Subject: [PATCH 121/195] Initial gating.yaml for RHEL9 OSCI gating BZ:https://bugzilla.redhat.com/show_bug.cgi?id=1950605 Signed-off-by: Yanan Fu --- gating.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 gating.yaml diff --git a/gating.yaml b/gating.yaml new file mode 100644 index 0000000..8d17eb8 --- /dev/null +++ b/gating.yaml @@ -0,0 +1,9 @@ +# recipients: kvmqe-ci, yfu +--- !Policy +product_versions: + - rhel-9 +decision_context: osci_compose_gate +subject_type: brew-build +rules: + - !PassingTestCaseRule {test_case_name: kvm-ci.qemu-kvm.x86_64-intel.brew-build.gating.tier1.functional} + - !PassingTestCaseRule {test_case_name: kvm-ci.qemu-kvm.x86_64-amd.brew-build.gating.tier1.functional} From 013897ee56e02701dd56ecf11a3efdc343cd1c7c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 25 May 2021 09:43:37 +0200 Subject: [PATCH 122/195] * Tue May 25 2021 Miroslav Reznaina - 6.0.0-3 - kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch [bz#1944730] - Resolves: bz#1944730 (Remove RHEL7 machine type (s390-ccw-virtio-rhel7.5.0)) --- ...x-Remove-the-RHEL7-only-machine-type.patch | 67 +++++++++++++++++++ qemu-kvm.spec | 9 ++- rpminspect.yaml | 2 +- 3 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch diff --git a/kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch b/kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch new file mode 100644 index 0000000..cf4e6bb --- /dev/null +++ b/kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch @@ -0,0 +1,67 @@ +From 64a9a5c971c424ff2d8074f52d48dd6233dc97ac Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 18 May 2021 18:27:54 +0200 +Subject: [PATCH] hw/s390x: Remove the RHEL7-only machine type + +RH-Author: Thomas Huth +RH-MergeRequest: 7: hw/s390x: Remove the RHEL7-only machine type +RH-Commit: [1/1] 8c53d4ae81 (thuth/qemu-kvm) +RH-Bugzilla: 1944730 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +We only support live migration from RHEL8 in RHEL9, so we can remove +the RHEL7 machine type "s390-ccw-virtio-rhel7.5.0". + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 32 -------------------------------- + 1 file changed, 32 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 8df6dd1c71..432f36bce5 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1125,38 +1125,6 @@ static void ccw_machine_rhel760_class_options(MachineClass *mc) + } + DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); + +-static void ccw_machine_rhel750_instance_options(MachineState *machine) +-{ +- static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; +- ccw_machine_rhel760_instance_options(machine); +- +- /* before 2.12 we emulated the very first z900, and RHEL 7.5 is +- based on 2.10 */ +- s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); +- +- /* bpb and ppa15 were only in the full model in RHEL 7.5 */ +- s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); +- s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); +-} +- +-GlobalProperty ccw_compat_rhel_7_5[] = { +- { +- .driver = TYPE_SCLP_EVENT_FACILITY, +- .property = "allow_all_mask_sizes", +- .value = "off", +- }, +-}; +-const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); +- +-static void ccw_machine_rhel750_class_options(MachineClass *mc) +-{ +- ccw_machine_rhel760_class_options(mc); +- compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); +- compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); +- S390_CCW_MACHINE_CLASS(mc)->hpage_1m_allowed = false; +-} +-DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); +- + static void ccw_machine_register_types(void) + { + type_register_static(&ccw_machine_info); +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 3f01cf8..2a6e82a 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -65,7 +65,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 2%{?rcversion}%{?dist} +Release: 3%{?rcversion}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -117,6 +117,8 @@ Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0018: 0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch # For bz#1906168 - [RHEL-9] qemu-kvm spec-file: Do not BuildRequire spice Patch19: kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch +# For bz#1944730 - Remove RHEL7 machine type (s390-ccw-virtio-rhel7.5.0) +Patch20: kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch BuildRequires: wget BuildRequires: rpm-build @@ -1223,6 +1225,11 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %changelog +* Tue May 25 2021 Miroslav Reznaina - 6.0.0-3 +- kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch [bz#1944730] +- Resolves: bz#1944730 + (Remove RHEL7 machine type (s390-ccw-virtio-rhel7.5.0)) + * Thu May 13 2021 Miroslav Rezanina - 6.0.0-2 - kvm-Remove-message-with-running-VM-count.patch [bz#1914461] - kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch [bz#1906168] diff --git a/rpminspect.yaml b/rpminspect.yaml index 51de3b4..53ce59a 100644 --- a/rpminspect.yaml +++ b/rpminspect.yaml @@ -1,6 +1,6 @@ --- elf: - exclude_path:(.*s390-ccw.img.*)|(.*s390-netboot.img.*) + exclude_path: (.*s390-ccw.img.*)|(.*s390-netboot.img.*) inspections: badfuncs: off From 261bc7bc3b647f38f110cc375dcb4b0e0ccfcdb4 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 31 May 2021 09:54:40 +0200 Subject: [PATCH 123/195] * Mon May 31 2021 Miroslav Rezanina - 6.0.0-4 - kvm-s390x-redhat-disable-experimental-3270-device.patch - Resolves: bz#1962479 (Disable the 'x-terminal3270' device in qemu-kvm on s390x) --- ...hat-disable-experimental-3270-device.patch | 40 +++++++++++++++++++ qemu-kvm.spec | 9 ++++- 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 kvm-s390x-redhat-disable-experimental-3270-device.patch diff --git a/kvm-s390x-redhat-disable-experimental-3270-device.patch b/kvm-s390x-redhat-disable-experimental-3270-device.patch new file mode 100644 index 0000000..232da25 --- /dev/null +++ b/kvm-s390x-redhat-disable-experimental-3270-device.patch @@ -0,0 +1,40 @@ +From 5101527fb425c2d17ef04b0ed87b3810cf6db7bc Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Wed, 26 May 2021 11:01:00 +0200 +Subject: [PATCH] s390x/redhat: disable experimental 3270 device + +RH-Author: Cornelia Huck +RH-MergeRequest: 9: Disable the 'x-terminal3270' device in qemu-kvm on s390x +RH-Commit: [1/1] 1df70e856fba434d06587a05c44c4dc5a367f1f7 (cohuck/qemu-kvm) +RH-Bugzilla: 1962479 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina + +The "x-terminal3270" device has been experimental for years, +has known limitations, has no known users anymore, and is not +supported by libvirt. + +Therefore, disable it in our downstream builds, as it is not +really supported. + +Signed-off-by: Cornelia Huck +--- + default-configs/devices/s390x-rh-devices.mak | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/default-configs/devices/s390x-rh-devices.mak b/default-configs/devices/s390x-rh-devices.mak +index 08a15f3e01..fe8613b8f6 100644 +--- a/default-configs/devices/s390x-rh-devices.mak ++++ b/default-configs/devices/s390x-rh-devices.mak +@@ -6,7 +6,6 @@ CONFIG_S390_FLIC=y + CONFIG_S390_FLIC_KVM=y + CONFIG_SCLPCONSOLE=y + CONFIG_SCSI=y +-CONFIG_TERMINAL3270=y + CONFIG_VFIO=y + CONFIG_VFIO_AP=y + CONFIG_VFIO_CCW=y +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 2a6e82a..3eb1eaa 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -65,7 +65,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 3%{?rcversion}%{?dist} +Release: 4%{?rcversion}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -119,6 +119,8 @@ Patch0018: 0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch Patch19: kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch # For bz#1944730 - Remove RHEL7 machine type (s390-ccw-virtio-rhel7.5.0) Patch20: kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch +# For bz#1962479 - Disable the 'x-terminal3270' device in qemu-kvm on s390x +Patch21: kvm-s390x-redhat-disable-experimental-3270-device.patch BuildRequires: wget BuildRequires: rpm-build @@ -1225,6 +1227,11 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %changelog +* Mon May 31 2021 Miroslav Rezanina - 6.0.0-4 +- kvm-s390x-redhat-disable-experimental-3270-device.patch +- Resolves: bz#1962479 + (Disable the 'x-terminal3270' device in qemu-kvm on s390x) + * Tue May 25 2021 Miroslav Reznaina - 6.0.0-3 - kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch [bz#1944730] - Resolves: bz#1944730 From 7b74d99404be1082c47902dd5c147285b9db9f98 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 8 Jun 2021 02:39:22 -0400 Subject: [PATCH 124/195] * Tue Jun 08 2021 Miroslav Rezanina - 6.0.0-5 - kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch [bz#1952449] - kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch [bz#1952449] - kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch [bz#1747467] - kvm-spec-Do-not-build-qemu-kvm-block-gluster.patch [bz#1964795] - kvm-spec-Do-not-link-pcnet-and-ne2k_pci-roms.patch [bz#1965961] - kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch [bz#1957194] - kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch [bz#1957194] - kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch [bz#1957194] - kvm-virtio-net-failover-add-missing-remove_migration_sta.patch [bz#1957194] - kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch [bz#1957194] - kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch [bz#1957194] - kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch [bz#1957194] - kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch [bz#1957194] - kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch [bz#1957194] - kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch [bz#1957194] - Resolves: bz#1952449 ([aarch64] define RHEL9 machine types) - Resolves: bz#1747467 ([aarch64] [qemu] PVPANIC support) - Resolves: bz#1964795 (Remove qemu-kvm-block-gluster package) - Resolves: bz#1965961 (Remove links to not build roms) - Resolves: bz#1957194 (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) --- .gitignore | 3 + ...64-rh-devices-add-CONFIG_PVPANIC_PCI.patch | 38 ++++++ ...r-highmem-and-gic-version-as-class-p.patch | 77 +++++++++++ ...mmuv3-Another-range-invalidation-fix.patch | 111 ++++++++++++++++ ...8.5-and-9.0-machine-types-and-remove.patch | 63 +++++++++ ...ble-PL011-clock-migration-through-hw.patch | 51 +++++++ ...efine-pseries-rhel8.5.0-machine-type.patch | 67 ++++++++++ ...issing-entries-in-hw_compat_rhel_8_4.patch | 42 ++++++ ...-s390x-add-rhel-8.5.0-compat-machine.patch | 59 +++++++++ ...gure-all-host-notifiers-in-a-single-.patch | 107 +++++++++++++++ ...ollback-path-in-virtio_blk_data_plan.patch | 83 ++++++++++++ ...ver-add-missing-remove_migration_sta.patch | 77 +++++++++++ ...igure-all-host-notifiers-in-a-single.patch | 91 +++++++++++++ ...host-notifiers-and-callbacks-separat.patch | 125 ++++++++++++++++++ qemu-kvm.spec | 98 ++++++++------ 15 files changed, 1052 insertions(+), 40 deletions(-) create mode 100644 kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch create mode 100644 kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch create mode 100644 kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch create mode 100644 kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch create mode 100644 kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch create mode 100644 kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch create mode 100644 kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch create mode 100644 kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch create mode 100644 kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch create mode 100644 kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch create mode 100644 kvm-virtio-net-failover-add-missing-remove_migration_sta.patch create mode 100644 kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch create mode 100644 kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch diff --git a/.gitignore b/.gitignore index 9bf151b..b919b12 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,6 @@ /qemu-5.2.0-rc3.tar.xz /qemu-5.2.0.tar.xz /qemu-6.0.0.tar.xz +/qemu-kvm*.src.rpm +/patches.* +/*.orig diff --git a/kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch b/kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch new file mode 100644 index 0000000..800b567 --- /dev/null +++ b/kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch @@ -0,0 +1,38 @@ +From 5cc3aacc241c0d26f63d51efd1b6fa35490d37c9 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 26 May 2021 10:33:20 -0400 +Subject: [PATCH 03/15] aarch64-rh-devices: add CONFIG_PVPANIC_PCI +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 2: aarch64-rh-devices: add CONFIG_PVPANIC_PCI +RH-Commit: [1/1] f3d0a94a91ea1b3fff925f32affce1b77469e206 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1747467 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones + +We want to enable the PVPANIC facility on ARM. On aarch64, +the PVPANIC PCI device is used (on x86_64 the ISA device is used). +so let's set the PVPANIC_PCI config. + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + default-configs/devices/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak +index a4d67274c0..4220469178 100644 +--- a/default-configs/devices/aarch64-rh-devices.mak ++++ b/default-configs/devices/aarch64-rh-devices.mak +@@ -26,3 +26,4 @@ CONFIG_TPM_EMULATOR=y + CONFIG_TPM_TIS_SYSBUS=y + CONFIG_PTIMER=y + CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y ++CONFIG_PVPANIC_PCI=y +-- +2.27.0 + diff --git a/kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch b/kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch new file mode 100644 index 0000000..52641bd --- /dev/null +++ b/kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch @@ -0,0 +1,77 @@ +From e808acaa50effe471c56a48b80d5e0d2d196b495 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 25 May 2021 09:22:23 +0200 +Subject: [PATCH 01/15] arm/virt: Register highmem and gic-version as class + properties + +RH-Author: Eric Auger +RH-MergeRequest: 1: Add 9.0.0 and 8.5.0 arm-virt machine types +RH-Commit: [1/2] 1ff3970773e09f2efb194430511928ae852c02ba (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1952449 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones + +This mirrors changes made in commit +b91def7b8382 ("rm/virt: Register most properties as class properties") +for the highmem and gic-version properties. This makes the +code easier to diff against upstream. + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 080cf54ef1..51a415570c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2996,6 +2996,18 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + object_class_property_set_description(oc, "acpi", + "Enable ACPI"); + ++ object_class_property_add_bool(oc, "highmem", virt_get_highmem, ++ virt_set_highmem); ++ object_class_property_set_description(oc, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits"); ++ ++ object_class_property_add_str(oc, "gic-version", virt_get_gic_version, ++ virt_set_gic_version); ++ object_class_property_set_description(oc, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3, host and max"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3004,7 +3016,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "in ACPI table header." + "The string may be up to 6 bytes in size"); + +- + object_class_property_add_str(oc, "x-oem-table-id", + virt_get_oem_table_id, + virt_set_oem_table_id); +@@ -3027,17 +3038,7 @@ static void rhel_virt_instance_init(Object *obj) + + /* High memory is enabled by default */ + vms->highmem = true; +- object_property_add_bool(obj, "highmem", virt_get_highmem, +- virt_set_highmem); +- object_property_set_description(obj, "highmem", +- "Set on/off to enable/disable using " +- "physical address space above 32 bits"); + vms->gic_version = VIRT_GIC_VERSION_NOSEL; +- object_property_add_str(obj, "gic-version", virt_get_gic_version, +- virt_set_gic_version); +- object_property_set_description(obj, "gic-version", +- "Set GIC version. " +- "Valid values are 2, 3, host and max"); + + vms->highmem_ecam = !vmc->no_highmem_ecam; + +-- +2.27.0 + diff --git a/kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch b/kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch new file mode 100644 index 0000000..7fb1c3a --- /dev/null +++ b/kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch @@ -0,0 +1,111 @@ +From e9abef24fae799febf81cd4ac02efe8987a698e8 Mon Sep 17 00:00:00 2001 +From: Auger Eric +Date: Wed, 26 May 2021 16:07:40 -0400 +Subject: [PATCH 15/15] hw/arm/smmuv3: Another range invalidation fix + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [12/12] dc064684e5f3f11d955565b05d37b0f2d9f79b91 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +6d9cd115b9 ("hw/arm/smmuv3: Enforce invalidation on a power of two range") +failed to completely fix misalignment issues with range +invalidation. For instance invalidations patterns like "invalidate 32 +4kB pages starting from 0xff395000 are not correctly handled" due +to the fact the previous fix only made sure the number of invalidated +pages were a power of 2 but did not properly handle the start +address was not aligned with the range. This can be noticed when +boothing a fedora 33 with protected virtio-blk-pci. + +Signed-off-by: Eric Auger +Fixes: 6d9cd115b9 ("hw/arm/smmuv3: Enforce invalidation on a power of two range") +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +(cherry picked from commit 219729cfbf9e979020bffedac6a790144173ec62) +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/smmuv3.c | 50 +++++++++++++++++++++++++------------------------ + 1 file changed, 26 insertions(+), 24 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 8705612535..e1979282e4 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -856,43 +856,45 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, + + static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) + { +- uint8_t scale = 0, num = 0, ttl = 0; +- dma_addr_t addr = CMD_ADDR(cmd); ++ dma_addr_t end, addr = CMD_ADDR(cmd); + uint8_t type = CMD_TYPE(cmd); + uint16_t vmid = CMD_VMID(cmd); ++ uint8_t scale = CMD_SCALE(cmd); ++ uint8_t num = CMD_NUM(cmd); ++ uint8_t ttl = CMD_TTL(cmd); + bool leaf = CMD_LEAF(cmd); + uint8_t tg = CMD_TG(cmd); +- uint64_t first_page = 0, last_page; +- uint64_t num_pages = 1; ++ uint64_t num_pages; ++ uint8_t granule; + int asid = -1; + +- if (tg) { +- scale = CMD_SCALE(cmd); +- num = CMD_NUM(cmd); +- ttl = CMD_TTL(cmd); +- num_pages = (num + 1) * BIT_ULL(scale); +- } +- + if (type == SMMU_CMD_TLBI_NH_VA) { + asid = CMD_ASID(cmd); + } + +- /* Split invalidations into ^2 range invalidations */ +- last_page = num_pages - 1; +- while (num_pages) { +- uint8_t granule = tg * 2 + 10; +- uint64_t mask, count; ++ if (!tg) { ++ trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, 1, ttl, leaf); ++ smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1); ++ smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl); ++ return; ++ } ++ ++ /* RIL in use */ + +- mask = dma_aligned_pow2_mask(first_page, last_page, 64 - granule); +- count = mask + 1; ++ num_pages = (num + 1) * BIT_ULL(scale); ++ granule = tg * 2 + 10; ++ ++ /* Split invalidations into ^2 range invalidations */ ++ end = addr + (num_pages << granule) - 1; + +- trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, count, ttl, leaf); +- smmuv3_inv_notifiers_iova(s, asid, addr, tg, count); +- smmu_iotlb_inv_iova(s, asid, addr, tg, count, ttl); ++ while (addr != end + 1) { ++ uint64_t mask = dma_aligned_pow2_mask(addr, end, 64); + +- num_pages -= count; +- first_page += count; +- addr += count * BIT_ULL(granule); ++ num_pages = (mask + 1) >> granule; ++ trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf); ++ smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages); ++ smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl); ++ addr += mask + 1; + } + } + +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch b/kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch new file mode 100644 index 0000000..6415284 --- /dev/null +++ b/kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch @@ -0,0 +1,63 @@ +From ee0be09f3598596e41b3fc2dbefef3382c5b0541 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 25 May 2021 09:22:24 +0200 +Subject: [PATCH 02/15] hw/arm/virt: Add 8.5 and 9.0 machine types and remove + older ones + +RH-Author: Eric Auger +RH-MergeRequest: 1: Add 9.0.0 and 8.5.0 arm-virt machine types +RH-Commit: [2/2] ace4619a1d505a3b552a236260b259bd6ddabc00 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1952449 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones + +Add 8.5 and 9.0 machine types and remove older ones. + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 24 +++++------------------- + 1 file changed, 5 insertions(+), 19 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 51a415570c..e4aa794f83 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3095,28 +3095,14 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + +-static void rhel840_virt_options(MachineClass *mc) ++static void rhel900_virt_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); +- compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 4, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) + +-static void rhel830_virt_options(MachineClass *mc) ++static void rhel850_virt_options(MachineClass *mc) + { +- VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); +- +- rhel840_virt_options(mc); +- compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); +- vmc->no_kvm_steal_time = true; +-} +-DEFINE_RHEL_MACHINE(8, 3, 0) +- +-static void rhel820_virt_options(MachineClass *mc) +-{ +- rhel830_virt_options(mc); +- compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); +- mc->numa_mem_supported = true; +- mc->auto_enable_numa_with_memdev = false; ++ rhel900_virt_options(mc); + } +-DEFINE_RHEL_MACHINE(8, 2, 0) ++DEFINE_RHEL_MACHINE(8, 5, 0) +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch b/kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch new file mode 100644 index 0000000..6ce6dd0 --- /dev/null +++ b/kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch @@ -0,0 +1,51 @@ +From 7f76c347f17c5fc60f3bcb99ad65e26f9da4ed9f Mon Sep 17 00:00:00 2001 +From: Auger Eric +Date: Thu, 20 May 2021 19:23:26 -0400 +Subject: [PATCH 10/15] hw/arm/virt: Disable PL011 clock migration through + hw_compat_rhel_8_3 + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [6/12] 5f52975350b2497ee82cc5c9b8ba930e3a9b8c3d (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +Disable PL011 clock migration for machine types before +virt-rhel8.4.0. + +The regression was introduced by aac63e0e6ea3 +("hw/char/pl011: add a clock input"), in 8.4, +causing failure of migration between qemu 8.4 towards +older ones. + +The fix was taken in 8.5 as part of the rebase, +e6fa978d8343 ("hw/arm/virt: Disable pl011 clock migration +if needed"). But the compat needs to be applied in +hw_compat_rhel_8_3[]. + +Signed-off-by: Eric Auger +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index c665e869de..6c534e14fa 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -69,6 +69,8 @@ GlobalProperty hw_compat_rhel_8_3[] = { + { "nvme", "use-intel-id", "on"}, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ + { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pl011", "migrate-clk", "off" }, + /* hw_compat_rhel_8_3 bz 1912846 */ + { "pci-xhci", "x-rh-late-msi-cap", "off" }, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ +-- +2.27.0 + diff --git a/kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch b/kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch new file mode 100644 index 0000000..eb9f32a --- /dev/null +++ b/kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch @@ -0,0 +1,67 @@ +From 1194549a01a472b9ce21819cd32fe253d6263cd6 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Mon, 10 May 2021 15:37:40 -0400 +Subject: [PATCH 08/15] redhat: Define pseries-rhel8.5.0 machine type + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [3/12] accc2ed549b94360bc6ab180c4266466816f122e (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +Note that the compat entries for 8.4.0 were already wired up +in the rhel-8.4.0 machine type. + +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/spapr.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index f9e8dfdfc9..653574ba91 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5083,6 +5083,19 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + mc->max_cpus = 384; + } + ++/* ++ * pseries-rhel8.5.0 ++ * like pseries-6.0 ++ */ ++ ++static void spapr_machine_rhel850_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++ + /* + * pseries-rhel8.4.0 + * like pseries-5.2 +@@ -5090,13 +5103,12 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + + static void spapr_machine_rhel840_class_options(MachineClass *mc) + { +- /* The default machine type must apply the RHEL specific defaults */ +- spapr_machine_rhel_default_class_options(mc); ++ spapr_machine_rhel850_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); + } + +-DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", true); ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false); + + /* + * pseries-rhel8.3.0 +-- +2.27.0 + diff --git a/kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch b/kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch new file mode 100644 index 0000000..b70c6de --- /dev/null +++ b/kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch @@ -0,0 +1,42 @@ +From 8be260b07df50891463e6efbd45f84b1b8323983 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Mon, 10 May 2021 15:37:39 -0400 +Subject: [PATCH 07/15] redhat: add missing entries in hw_compat_rhel_8_4 + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [2/12] 69b6e4dff872478b6d0b09b2587f55967e1c9740 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +Some entries from hw_compat_5_2 were missing. + +Signed-off-by: Cornelia Huck +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 848e7fdff6..c665e869de 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -44,6 +44,10 @@ GlobalProperty hw_compat_rhel_8_4[] = { + { "ICH9-LPC", "smm-compat", "on"}, + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ + { "PIIX4_PM", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-blk-device", "report-discard-granularity", "off" }, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-net-pci", "vectors", "3"}, + }; + const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); + +-- +2.27.0 + diff --git a/kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch b/kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch new file mode 100644 index 0000000..1be8506 --- /dev/null +++ b/kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch @@ -0,0 +1,59 @@ +From a5e149c13279386c4fc3fae130289ac4ac53bd3e Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Mon, 10 May 2021 14:41:31 -0400 +Subject: [PATCH 06/15] redhat: s390x: add rhel-8.5.0 compat machine + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [1/12] 3560ef3f773425f1479a131df7a351df2cbb502c (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +Note that the compat entries for 8.4.0 were already wired up +in the rhel-8.4.0 machine type. + +Signed-off-by: Cornelia Huck +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/s390x/s390-virtio-ccw.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 432f36bce5..667a99f336 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1074,15 +1074,26 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++static void ccw_machine_rhel850_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel850_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++ + static void ccw_machine_rhel840_instance_options(MachineState *machine) + { ++ ccw_machine_rhel850_instance_options(machine); + } + + static void ccw_machine_rhel840_class_options(MachineClass *mc) + { ++ ccw_machine_rhel850_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); + } +-DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", true); ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false); + + static void ccw_machine_rhel820_instance_options(MachineState *machine) + { +-- +2.27.0 + diff --git a/kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch b/kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch new file mode 100644 index 0000000..e670a6c --- /dev/null +++ b/kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch @@ -0,0 +1,107 @@ +From f02134f067150d02d1a74ff5aea151096679492e Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Wed, 26 May 2021 09:03:52 -0400 +Subject: [PATCH 12/15] virtio-blk: Configure all host notifiers in a single MR + transaction + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [9/12] 609631835874ea62dad1ffbd469e83744299ee07 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +This allows the virtio-blk-pci device to batch the setup of all its +host notifiers. This significantly improves boot time of VMs with a +high number of vCPUs, e.g. from 3m26.186s down to 0m58.023s for a +pseries machine with 384 vCPUs. + +Note that memory_region_transaction_commit() must be called before +virtio_bus_cleanup_host_notifier() because the latter might close +ioeventfds that the transaction still assumes to be around when it +commits. + +Signed-off-by: Greg Kurz +Message-Id: <20210407143501.244343-3-groug@kaod.org> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d0267da614890b8f817364ae25850cdbb580a569) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/block/dataplane/virtio-blk.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index d7b5c95d26..cd81893d1d 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -198,19 +198,30 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + goto fail_guest_notifiers; + } + ++ memory_region_transaction_begin(); ++ + /* Set up virtqueue notify */ + for (i = 0; i < nvqs; i++) { + r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); + if (r != 0) { ++ int j = i; ++ + fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); + while (i--) { + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ memory_region_transaction_commit(); ++ ++ while (j--) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + goto fail_host_notifiers; + } + } + ++ memory_region_transaction_commit(); ++ + s->starting = false; + vblk->dataplane_started = true; + trace_virtio_blk_data_plane_start(s); +@@ -246,8 +257,15 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + return 0; + + fail_aio_context: ++ memory_region_transaction_begin(); ++ + for (i = 0; i < nvqs; i++) { + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < nvqs; i++) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + fail_host_notifiers: +@@ -312,8 +330,15 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + + aio_context_release(s->ctx); + ++ memory_region_transaction_begin(); ++ + for (i = 0; i < nvqs; i++) { + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < nvqs; i++) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + +-- +2.27.0 + diff --git a/kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch b/kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch new file mode 100644 index 0000000..d08e1aa --- /dev/null +++ b/kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch @@ -0,0 +1,83 @@ +From b8febd2f080de1f12e68cc233c68c7a39835a3c4 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Wed, 26 May 2021 09:03:51 -0400 +Subject: [PATCH 11/15] virtio-blk: Fix rollback path in + virtio_blk_data_plane_start() + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [8/12] 73d05400a537ce1847605dca58aff99e0a905919 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +When dataplane multiqueue support was added in QEMU 2.7, the path +that would rollback guest notifiers assignment in case of error +simply got dropped. + +Later on, when Error was added to blk_set_aio_context() in QEMU 4.1, +another error path was introduced, but it ommits to rollback both +host and guest notifiers. + +It seems cleaner to fix the rollback path in one go. The patch is +simple enough that it can be adjusted if backported to a pre-4.1 +QEMU. + +Fixes: 51b04ac5c6a6 ("virtio-blk: dataplane multiqueue support") +Cc: stefanha@redhat.com +Fixes: 97896a4887a0 ("block: Add Error to blk_set_aio_context()") +Cc: kwolf@redhat.com +Signed-off-by: Greg Kurz +Reviewed-by: Stefan Hajnoczi +Message-Id: <20210407143501.244343-2-groug@kaod.org> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 570fe439e5d1b8626cf344c6bc97d90cfcaf0c79) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/block/dataplane/virtio-blk.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index e9050c8987..d7b5c95d26 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -207,7 +207,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } +- goto fail_guest_notifiers; ++ goto fail_host_notifiers; + } + } + +@@ -221,7 +221,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + aio_context_release(old_context); + if (r < 0) { + error_report_err(local_err); +- goto fail_guest_notifiers; ++ goto fail_aio_context; + } + + /* Process queued requests before the ones in vring */ +@@ -245,6 +245,13 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + aio_context_release(s->ctx); + return 0; + ++ fail_aio_context: ++ for (i = 0; i < nvqs; i++) { ++ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); ++ } ++ fail_host_notifiers: ++ k->set_guest_notifiers(qbus->parent, nvqs, false); + fail_guest_notifiers: + /* + * If we failed to set up the guest notifiers queued requests will be +-- +2.27.0 + diff --git a/kvm-virtio-net-failover-add-missing-remove_migration_sta.patch b/kvm-virtio-net-failover-add-missing-remove_migration_sta.patch new file mode 100644 index 0000000..acd5a66 --- /dev/null +++ b/kvm-virtio-net-failover-add-missing-remove_migration_sta.patch @@ -0,0 +1,77 @@ +From 21027e308bf410293a745d4358a848e9aa037df1 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 10 May 2021 13:08:20 -0400 +Subject: [PATCH 09/15] virtio-net: failover: add missing + remove_migration_state_change_notifier() + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [4/12] 884f0f99fbbba4312663ec6232b1d8c9576df84e (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +In the failover case configuration, virtio_net_device_realize() uses an +add_migration_state_change_notifier() to add a state notifier, but this +notifier is not removed by the unrealize function when the virtio-net +card is unplugged. + +If the card is unplugged and a migration is started, the notifier is +called and as it is not valid anymore QEMU crashes. + +This patch fixes the problem by adding the +remove_migration_state_change_notifier() in virtio_net_device_unrealize(). + +The problem can be reproduced with: + + $ qemu-system-x86_64 -enable-kvm -m 1g -M q35 \ + -device pcie-root-port,slot=4,id=root1 \ + -device pcie-root-port,slot=5,id=root2 \ + -device virtio-net-pci,id=net1,mac=52:54:00:6f:55:cc,failover=on,bus=root1 \ + -monitor stdio disk.qcow2 + (qemu) device_del net1 + (qemu) migrate "exec:gzip -c > STATEFILE.gz" + + Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault. + 0x0000000000000000 in ?? () + (gdb) bt + #0 0x0000000000000000 in () + #1 0x0000555555d726d7 in notifier_list_notify (...) + at .../util/notify.c:39 + #2 0x0000555555842c1a in migrate_fd_connect (...) + at .../migration/migration.c:3975 + #3 0x0000555555950f7d in migration_channel_connect (...) + error@entry=0x0) at .../migration/channel.c:107 + #4 0x0000555555910922 in exec_start_outgoing_migration (...) + at .../migration/exec.c:42 + +Reported-by: Igor Mammedov +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit a7eca58380f9589bb1bb6333ccfb58869734edb6) +Signed-off-by: Laurent Vivier +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/net/virtio-net.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 66b9ff4511..914051feb7 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3373,6 +3373,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) + + if (n->failover) { + device_listener_unregister(&n->primary_listener); ++ remove_migration_state_change_notifier(&n->migration_state); + } + + max_queues = n->multiqueue ? n->max_queues : 1; +-- +2.27.0 + diff --git a/kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch b/kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch new file mode 100644 index 0000000..a58b251 --- /dev/null +++ b/kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch @@ -0,0 +1,91 @@ +From 5e7855d3c9d5a59d2c0ac05444428b058ca9abb8 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Wed, 26 May 2021 09:03:54 -0400 +Subject: [PATCH 14/15] virtio-scsi: Configure all host notifiers in a single + MR transaction + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [11/12] f3c785c4b63aeb3a7b153f2fc17e86c983f98c23 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +This allows the virtio-scsi-pci device to batch the setup of all its +host notifiers. This significantly improves boot time of VMs with a +high number of vCPUs, e.g. from 6m5.563s down to 1m2.884s for a +pseries machine with 384 vCPUs. + +Note that memory_region_transaction_commit() must be called before +virtio_bus_cleanup_host_notifier() because the latter might close +ioeventfds that the transaction still assumes to be around when it +commits. + +Signed-off-by: Greg Kurz +Message-Id: <20210407143501.244343-5-groug@kaod.org> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit c4f5dcc4360a02085a633fd7a90b7ac395ca1ba4) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/scsi/virtio-scsi-dataplane.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c +index b2cb3d9dcc..28e003250a 100644 +--- a/hw/scsi/virtio-scsi-dataplane.c ++++ b/hw/scsi/virtio-scsi-dataplane.c +@@ -152,6 +152,8 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + goto fail_guest_notifiers; + } + ++ memory_region_transaction_begin(); ++ + rc = virtio_scsi_set_host_notifier(s, vs->ctrl_vq, 0); + if (rc != 0) { + goto fail_host_notifiers; +@@ -173,6 +175,8 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + vq_init_count++; + } + ++ memory_region_transaction_commit(); ++ + aio_context_acquire(s->ctx); + virtio_queue_aio_set_host_notifier_handler(vs->ctrl_vq, s->ctx, + virtio_scsi_data_plane_handle_ctrl); +@@ -192,6 +196,11 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + fail_host_notifiers: + for (i = 0; i < vq_init_count; i++) { + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < vq_init_count; i++) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); +@@ -229,8 +238,15 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) + + blk_drain_all(); /* ensure there are no in-flight requests */ + ++ memory_region_transaction_begin(); ++ + for (i = 0; i < vs->conf.num_queues + 2; i++) { + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < vs->conf.num_queues + 2; i++) { + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + +-- +2.27.0 + diff --git a/kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch b/kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch new file mode 100644 index 0000000..ab10168 --- /dev/null +++ b/kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch @@ -0,0 +1,125 @@ +From 0ca53acb7ee0a3b3b72685f47df1fb2466989d6c Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Wed, 26 May 2021 09:03:53 -0400 +Subject: [PATCH 13/15] virtio-scsi: Set host notifiers and callbacks + separately + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta +RH-Commit: [10/12] 61f873b494c52dc34eb60a705046bfead08532da (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier + +Host notifiers are guaranteed to be idle until the callbacks are +hooked up with virtio_queue_aio_set_host_notifier_handler(). They +thus don't need to be set or unset with the AioContext lock held. + +Do this outside the critical section, like virtio-blk already +does : basically downgrading virtio_scsi_vring_init() to only +setup the host notifier and set the callback in the caller. + +This will allow to batch addition/deletion of ioeventds in +a single memory transaction, which is expected to greatly +improve initialization time. + +Signed-off-by: Greg Kurz +Message-Id: <20210407143501.244343-4-groug@kaod.org> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 61fc57bfc464c3584bd7ab810c86833661f0188c) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/scsi/virtio-scsi-dataplane.c | 40 ++++++++++++++++++--------------- + 1 file changed, 22 insertions(+), 18 deletions(-) + +diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c +index 4ad8793406..b2cb3d9dcc 100644 +--- a/hw/scsi/virtio-scsi-dataplane.c ++++ b/hw/scsi/virtio-scsi-dataplane.c +@@ -94,8 +94,7 @@ static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev, + return progress; + } + +-static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, +- VirtIOHandleAIOOutput fn) ++static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n) + { + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + int rc; +@@ -109,7 +108,6 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, + return rc; + } + +- virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, fn); + return 0; + } + +@@ -154,38 +152,44 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + goto fail_guest_notifiers; + } + +- aio_context_acquire(s->ctx); +- rc = virtio_scsi_vring_init(s, vs->ctrl_vq, 0, +- virtio_scsi_data_plane_handle_ctrl); +- if (rc) { +- goto fail_vrings; ++ rc = virtio_scsi_set_host_notifier(s, vs->ctrl_vq, 0); ++ if (rc != 0) { ++ goto fail_host_notifiers; + } + + vq_init_count++; +- rc = virtio_scsi_vring_init(s, vs->event_vq, 1, +- virtio_scsi_data_plane_handle_event); +- if (rc) { +- goto fail_vrings; ++ rc = virtio_scsi_set_host_notifier(s, vs->event_vq, 1); ++ if (rc != 0) { ++ goto fail_host_notifiers; + } + + vq_init_count++; ++ + for (i = 0; i < vs->conf.num_queues; i++) { +- rc = virtio_scsi_vring_init(s, vs->cmd_vqs[i], i + 2, +- virtio_scsi_data_plane_handle_cmd); ++ rc = virtio_scsi_set_host_notifier(s, vs->cmd_vqs[i], i + 2); + if (rc) { +- goto fail_vrings; ++ goto fail_host_notifiers; + } + vq_init_count++; + } + ++ aio_context_acquire(s->ctx); ++ virtio_queue_aio_set_host_notifier_handler(vs->ctrl_vq, s->ctx, ++ virtio_scsi_data_plane_handle_ctrl); ++ virtio_queue_aio_set_host_notifier_handler(vs->event_vq, s->ctx, ++ virtio_scsi_data_plane_handle_event); ++ ++ for (i = 0; i < vs->conf.num_queues; i++) { ++ virtio_queue_aio_set_host_notifier_handler(vs->cmd_vqs[i], s->ctx, ++ virtio_scsi_data_plane_handle_cmd); ++ } ++ + s->dataplane_starting = false; + s->dataplane_started = true; + aio_context_release(s->ctx); + return 0; + +-fail_vrings: +- aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); +- aio_context_release(s->ctx); ++fail_host_notifiers: + for (i = 0; i < vq_init_count; i++) { + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 3eb1eaa..ae2bcd2 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -4,7 +4,6 @@ %global have_usbredir 1 %global have_opengl 1 %global have_fdt 0 -%global have_gluster 1 %global have_kvm_setup 0 %global have_memlock_limits 0 @@ -27,7 +26,6 @@ %global kvm_target x86_64 %else %global have_opengl 0 - %global have_gluster 0 %endif %ifarch %{power64} %global kvm_target ppc64 @@ -55,9 +53,6 @@ Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ -%if %{have_gluster} \ -Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ -%endif \ Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} @@ -65,9 +60,11 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 4%{?rcversion}%{?dist} +Release: 5%{?rcversion}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped -Epoch: 15 +# Epoch 15 used for RHEL 8 +# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) +Epoch: 17 License: GPLv2 and GPLv2+ and CC-BY Group: Development/Tools URL: http://www.qemu.org/ @@ -121,6 +118,32 @@ Patch19: kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch Patch20: kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch # For bz#1962479 - Disable the 'x-terminal3270' device in qemu-kvm on s390x Patch21: kvm-s390x-redhat-disable-experimental-3270-device.patch +# For bz#1952449 - [aarch64] define RHEL9 machine types +Patch22: kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch +# For bz#1952449 - [aarch64] define RHEL9 machine types +Patch23: kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch +# For bz#1747467 - [aarch64] [qemu] PVPANIC support +Patch24: kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch25: kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch26: kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch27: kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch28: kvm-virtio-net-failover-add-missing-remove_migration_sta.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch29: kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch30: kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch31: kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch32: kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch33: kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch34: kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch BuildRequires: wget BuildRequires: rpm-build @@ -151,11 +174,6 @@ BuildRequires: libcurl-devel BuildRequires: libssh-devel BuildRequires: librados-devel BuildRequires: librbd-devel -%if %{have_gluster} -# For gluster block driver -BuildRequires: glusterfs-api-devel -BuildRequires: glusterfs-devel -%endif # We need both because the 'stap' binary is probed for by configure BuildRequires: systemtap BuildRequires: systemtap-sdt-devel @@ -269,6 +287,7 @@ Requires: libfdt >= 1.6.0 # The "<= {version}" assumes RHEL-9 version >= RHEL-8 version (in # other words RHEL-9 rebases are done together/before RHEL-8 ones) Obsoletes: qemu-kvm-ui-spice <= %{version} +Obsoletes: qemu-kvm-block-gluster <= %{version} %description -n qemu-kvm-core qemu-kvm is an open source virtualizer that provides hardware @@ -353,17 +372,6 @@ Install this package if you want to access remote disks over http, https, ftp and other transports provided by the CURL library. -%if %{have_gluster} -%package block-gluster -Summary: QEMU Gluster block driver -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description block-gluster -This package provides the additional Gluster block driver for QEMU. - -Install this package if you want to access remote Gluster storage. -%endif - - %package block-iscsi Summary: QEMU iSCSI block driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} @@ -429,11 +437,6 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" %global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle -%if 0%{have_gluster} - %global block_drivers_list %{block_drivers_list},gluster -%endif - - %define disable_everything \\\ --disable-attr \\\ --disable-auth-pam \\\ @@ -594,9 +597,6 @@ pushd %{qemu_kvm_build} --enable-fdt \ %endif --enable-gcrypt \ -%if 0%{have_gluster} - --enable-glusterfs \ -%endif --enable-gnutls \ --enable-guest-agent \ --enable-iconv \ @@ -937,8 +937,6 @@ pxe_link() { %ifnarch aarch64 s390x pxe_link e1000 8086100e -pxe_link ne2k_pci 10ec8029 -pxe_link pcnet 10222000 pxe_link rtl8139 10ec8139 pxe_link virtio 1af41000 pxe_link e1000e 808610d3 @@ -1134,9 +1132,7 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_datadir}/%{name}/efi-e1000.rom %{_datadir}/%{name}/efi-e1000e.rom %{_datadir}/%{name}/efi-virtio.rom - %{_datadir}/%{name}/efi-pcnet.rom %{_datadir}/%{name}/efi-rtl8139.rom - %{_datadir}/%{name}/efi-ne2k_pci.rom %{_libdir}/qemu-kvm/hw-display-virtio-vga.so %endif %{_datadir}/icons/* @@ -1205,11 +1201,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %files block-curl %{_libdir}/qemu-kvm/block-curl.so -%if %{have_gluster} -%files block-gluster -%{_libdir}/qemu-kvm/block-gluster.so -%endif - %files block-iscsi %{_libdir}/qemu-kvm/block-iscsi.so @@ -1227,6 +1218,33 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %changelog +* Tue Jun 08 2021 Miroslav Rezanina - 6.0.0-5 +- kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch [bz#1952449] +- kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch [bz#1952449] +- kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch [bz#1747467] +- kvm-spec-Do-not-build-qemu-kvm-block-gluster.patch [bz#1964795] +- kvm-spec-Do-not-link-pcnet-and-ne2k_pci-roms.patch [bz#1965961] +- kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch [bz#1957194] +- kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch [bz#1957194] +- kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch [bz#1957194] +- kvm-virtio-net-failover-add-missing-remove_migration_sta.patch [bz#1957194] +- kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch [bz#1957194] +- kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch [bz#1957194] +- kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch [bz#1957194] +- kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch [bz#1957194] +- kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch [bz#1957194] +- kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch [bz#1957194] +- Resolves: bz#1952449 + ([aarch64] define RHEL9 machine types) +- Resolves: bz#1747467 + ([aarch64] [qemu] PVPANIC support) +- Resolves: bz#1964795 + (Remove qemu-kvm-block-gluster package) +- Resolves: bz#1965961 + (Remove links to not build roms) +- Resolves: bz#1957194 + (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) + * Mon May 31 2021 Miroslav Rezanina - 6.0.0-4 - kvm-s390x-redhat-disable-experimental-3270-device.patch - Resolves: bz#1962479 From 73d2f941dba897e69d8c173299529c9519580858 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 21 Jun 2021 05:04:48 -0400 Subject: [PATCH 125/195] * Mon Jun 21 2021 Miroslav Rezanina - 6.0.0-6 - kvm-yank-Unregister-function-when-using-TLS-migration.patch [bz#1972462] - kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch [bz#1957194] - kvm-redhat-Install-the-s390-netboot.img-that-we-ve-built.patch [bz#1957194] - kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch [bz#1957194] - kvm-target-i386-sev-add-support-to-query-the-attestation.patch [bz#1957194] - kvm-spapr-Don-t-hijack-current_machine-boot_order.patch [bz#1957194] - kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch [bz#1957194] - kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch [bz#1957194] - kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch [bz#1957194] - Specfile cleanup [bz#1973029] - Resolves: bz#1972462 (QEMU core dump when doing TLS migration via TCP) - Resolves: bz#1957194 (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) - Resolves: bz#1973029 (Spec file cleanups) --- ...-don-t-try-to-read-the-next-block-if.patch | 51 +++ ...OCKET_ADDRESS_TYPE_FD-listen-2-backl.patch | 115 +++++++ ...-t-hijack-current_machine-boot_order.patch | 115 +++++++ ...le-comment-about-power-saving-LPCR-b.patch | 52 +++ ...o-current-AIL-mode-when-starting-a-n.patch | 90 +++++ ...CPU-model-versions-supporting-xsaves.patch | 322 ++++++++++++++++++ ...add-support-to-query-the-attestation.patch | 262 ++++++++++++++ ...er-function-when-using-TLS-migration.patch | 146 ++++++++ qemu-kvm.spec | 167 ++++----- 9 files changed, 1243 insertions(+), 77 deletions(-) create mode 100644 kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch create mode 100644 kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch create mode 100644 kvm-spapr-Don-t-hijack-current_machine-boot_order.patch create mode 100644 kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch create mode 100644 kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch create mode 100644 kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch create mode 100644 kvm-target-i386-sev-add-support-to-query-the-attestation.patch create mode 100644 kvm-yank-Unregister-function-when-using-TLS-migration.patch diff --git a/kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch b/kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch new file mode 100644 index 0000000..77db467 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch @@ -0,0 +1,51 @@ +From bd1d37e3536136130df41ac8162ce5bb4f361f87 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 1 Jun 2021 08:52:10 -0400 +Subject: [PATCH 02/21] pc-bios/s390-ccw: don't try to read the next block if + end of chunk is reached +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 +RH-Commit: [1/8] 69a43520a9e7f0ab92bdfdc47281c7606f5159e7 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier +RH-Acked-by: Vitaly Kuznetsov + +From: Marc Hartmayer + +Don't read the block if a null block number is reached, because this means that +the end of chunk is reached. + +Reviewed-by: Collin Walling +Signed-off-by: Marc Hartmayer +Message-Id: <20210416074736.17409-1-mhartmay@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit a6625d38cce3901a7c1cba069f0abcf743a293f1) +Signed-off-by: Thomas Huth +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + pc-bios/s390-ccw/bootmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c +index 44df7d16af..b46997c0b7 100644 +--- a/pc-bios/s390-ccw/bootmap.c ++++ b/pc-bios/s390-ccw/bootmap.c +@@ -213,7 +213,7 @@ static int eckd_get_boot_menu_index(block_number_t s1b_block_nr) + next_block_nr = eckd_block_num(&s1b->seek[i + 1].chs); + } + +- if (next_block_nr) { ++ if (next_block_nr && !is_null_block_number(next_block_nr)) { + read_block(next_block_nr, s2_next_blk, + "Cannot read stage2 boot loader"); + } +-- +2.27.0 + diff --git a/kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch b/kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch new file mode 100644 index 0000000..75d505c --- /dev/null +++ b/kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch @@ -0,0 +1,115 @@ +From 415a6a1ced90cc8b8691eb7ab027bba4611fc236 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 2 Jun 2021 15:51:02 -0400 +Subject: [PATCH 04/21] sockets: update SOCKET_ADDRESS_TYPE_FD listen(2) + backlog +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 +RH-Commit: [3/8] a8fd97eb477ad51fca75c1cc344185e1de59caf1 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier +RH-Acked-by: Vitaly Kuznetsov + +socket_get_fd() fails with the error "socket_get_fd: too many +connections" if the given listen backlog value is not 1. + +Not all callers set the backlog to 1. For example, commit +582d4210eb2f2ab5baac328fe4b479cd86da1647 ("qemu-nbd: Use SOMAXCONN for +socket listen() backlog") uses SOMAXCONN. This will always fail with in +socket_get_fd(). + +This patch calls listen(2) on the fd to update the backlog value. The +socket may already be in the listen state. I have tested that this works +on Linux 5.10 and macOS Catalina. + +As a bonus this allows us to detect when the fd cannot listen. Now we'll +be able to catch unbound or connected fds in socket_listen(). + +Drop the num argument from socket_get_fd() since this function is also +called by socket_connect() where a listen backlog value does not make +sense. + +Fixes: e5b6353cf25c99c3f08bf51e29933352f7140e8f ("socket: Add backlog parameter to socket_listen") +Reported-by: Richard W.M. Jones +Cc: Juan Quintela +Cc: Eric Blake +Signed-off-by: Stefan Hajnoczi +Message-Id: <20210310173004.420190-1-stefanha@redhat.com> +Tested-by: Richard W.M. Jones +Reviewed-by: Eric Blake +Reviewed-by: Stefano Garzarella +Signed-off-by: Eric Blake +(cherry picked from commit 37179e9ea45d6428b29ae789209c119ac18c1d39) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + util/qemu-sockets.c | 29 ++++++++++++++++++++++------- + 1 file changed, 22 insertions(+), 7 deletions(-) + +diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c +index 8af0278f15..2463c49773 100644 +--- a/util/qemu-sockets.c ++++ b/util/qemu-sockets.c +@@ -1116,14 +1116,10 @@ fail: + return NULL; + } + +-static int socket_get_fd(const char *fdstr, int num, Error **errp) ++static int socket_get_fd(const char *fdstr, Error **errp) + { + Monitor *cur_mon = monitor_cur(); + int fd; +- if (num != 1) { +- error_setg_errno(errp, EINVAL, "socket_get_fd: too many connections"); +- return -1; +- } + if (cur_mon) { + fd = monitor_get_fd(cur_mon, fdstr, errp); + if (fd < 0) { +@@ -1159,7 +1155,7 @@ int socket_connect(SocketAddress *addr, Error **errp) + break; + + case SOCKET_ADDRESS_TYPE_FD: +- fd = socket_get_fd(addr->u.fd.str, 1, errp); ++ fd = socket_get_fd(addr->u.fd.str, errp); + break; + + case SOCKET_ADDRESS_TYPE_VSOCK: +@@ -1187,7 +1183,26 @@ int socket_listen(SocketAddress *addr, int num, Error **errp) + break; + + case SOCKET_ADDRESS_TYPE_FD: +- fd = socket_get_fd(addr->u.fd.str, num, errp); ++ fd = socket_get_fd(addr->u.fd.str, errp); ++ if (fd < 0) { ++ return -1; ++ } ++ ++ /* ++ * If the socket is not yet in the listen state, then transition it to ++ * the listen state now. ++ * ++ * If it's already listening then this updates the backlog value as ++ * requested. ++ * ++ * If this socket cannot listen because it's already in another state ++ * (e.g. unbound or connected) then we'll catch the error here. ++ */ ++ if (listen(fd, num) != 0) { ++ error_setg_errno(errp, errno, "Failed to listen on fd socket"); ++ closesocket(fd); ++ return -1; ++ } + break; + + case SOCKET_ADDRESS_TYPE_VSOCK: +-- +2.27.0 + diff --git a/kvm-spapr-Don-t-hijack-current_machine-boot_order.patch b/kvm-spapr-Don-t-hijack-current_machine-boot_order.patch new file mode 100644 index 0000000..3cc304d --- /dev/null +++ b/kvm-spapr-Don-t-hijack-current_machine-boot_order.patch @@ -0,0 +1,115 @@ +From b859b919acc83ea12c5c5b2991afac47e9532660 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 3 Jun 2021 13:29:40 -0400 +Subject: [PATCH 06/21] spapr: Don't hijack current_machine->boot_order +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 +RH-Commit: [5/8] 04822ea86e438f013915cd46e09a33627a640a47 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier +RH-Acked-by: Vitaly Kuznetsov + +From: Greg Kurz + +QEMU 6.0 moved all the -boot variables to the machine. Especially, the +removal of the boot_order static changed the handling of '-boot once' +from: + + if (boot_once) { + qemu_boot_set(boot_once, &error_fatal); + qemu_register_reset(restore_boot_order, g_strdup(boot_order)); + } + +to + + if (current_machine->boot_once) { + qemu_boot_set(current_machine->boot_once, &error_fatal); + qemu_register_reset(restore_boot_order, + g_strdup(current_machine->boot_order)); + } + +This means that we now register as subsequent boot order a copy +of current_machine->boot_once that was just set with the previous +call to qemu_boot_set(), i.e. we never transition away from the +once boot order. + +It is certainly fragile^Wwrong for the spapr code to hijack a +field of the base machine type object like that. The boot order +rework simply turned this software boundary violation into an +actual bug. + +Have the spapr code to handle that with its own field in +SpaprMachineState. Also kfree() the initial boot device +string when "once" was used. + +Fixes: 4b7acd2ac821 ("vl: clean up -boot variables") +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1960119 +Cc: pbonzini@redhat.com +Signed-off-by: Greg Kurz +Message-Id: <20210521160735.1901914-1-groug@kaod.org> +Signed-off-by: David Gibson +(cherry picked from commit 3bf0844f3be77b24cc8f56fc8df9ff199f8324cb) +Signed-off-by: Greg Kurz + +Conflicts: + include/hw/ppc/spapr.h + +Trivial context conflict because downstream has experimental support +for secure guests (f23e4b5090ba). + +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/spapr.c | 8 +++++--- + include/hw/ppc/spapr.h | 3 +++ + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 653574ba91..11db32c537 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1006,7 +1006,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) + _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen")); + + if (reset) { +- const char *boot_device = machine->boot_order; ++ const char *boot_device = spapr->boot_device; + char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); + size_t cb = 0; + char *bootlist = get_boot_devices_list(&cb); +@@ -2364,8 +2364,10 @@ static SaveVMHandlers savevm_htab_handlers = { + static void spapr_boot_set(void *opaque, const char *boot_device, + Error **errp) + { +- MachineState *machine = MACHINE(opaque); +- machine->boot_order = g_strdup(boot_device); ++ SpaprMachineState *spapr = SPAPR_MACHINE(opaque); ++ ++ g_free(spapr->boot_device); ++ spapr->boot_device = g_strdup(boot_device); + } + + static void spapr_create_lmb_dr_connectors(SpaprMachineState *spapr) +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index 54cdde8980..6d15066bc3 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -227,6 +227,9 @@ struct SpaprMachineState { + /* Secure Guest support via x-svm-allowed */ + bool svm_allowed; + ++ /* Set by -boot */ ++ char *boot_device; ++ + /*< public >*/ + char *kvm_type; + char *host_model; +-- +2.27.0 + diff --git a/kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch b/kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch new file mode 100644 index 0000000..4c7c9a1 --- /dev/null +++ b/kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch @@ -0,0 +1,52 @@ +From 63933b51e447d4acb08d3900ff64d150a3f2bdf8 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 9 Jun 2021 05:58:34 -0400 +Subject: [PATCH 08/21] spapr: Remove stale comment about power-saving LPCR + bits +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 +RH-Commit: [7/8] 12872a0193d22915de5d71d8055094f9f15e63b0 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier +RH-Acked-by: Vitaly Kuznetsov + +From: Nicholas Piggin + +Commit 47a9b551547 ("spapr: Clean up handling of LPCR power-saving exit +bits") moved this logic but did not remove the comment from the +previous location. + +Signed-off-by: Nicholas Piggin +Message-Id: <20210526091626.3388262-2-npiggin@gmail.com> +Reviewed-by: Cédric Le Goater +Reviewed-by: Greg Kurz +Signed-off-by: David Gibson +Signed-off-by: Laurent Vivier +(cherry picked from commit 7be3bf6c8429969f97728bb712d9a99997835607) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/spapr_rtas.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c +index 8a79f9c628..91c71d1c94 100644 +--- a/hw/ppc/spapr_rtas.c ++++ b/hw/ppc/spapr_rtas.c +@@ -164,7 +164,6 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, + + env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); + +- /* Enable Power-saving mode Exit Cause exceptions for the new CPU */ + lpcr = env->spr[SPR_LPCR]; + if (!pcc->interrupts_big_endian(callcpu)) { + lpcr |= LPCR_ILE; +-- +2.27.0 + diff --git a/kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch b/kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch new file mode 100644 index 0000000..c30b63e --- /dev/null +++ b/kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch @@ -0,0 +1,90 @@ +From 8bb294ea3f26a8ce01ad76c19a6de359dce0c113 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 9 Jun 2021 05:58:35 -0400 +Subject: [PATCH 09/21] spapr: Set LPCR to current AIL mode when starting a new + CPU +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 +RH-Commit: [8/8] 7a1cb27881f93c245ab9e8b8540cbd06d4f8c14f (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier +RH-Acked-by: Vitaly Kuznetsov + +From: Nicholas Piggin + +TCG does not keep track of AIL mode in a central place, it's based on +the current LPCR[AIL] bits. Synchronize the new CPU's LPCR to the +current LPCR in rtas_start_cpu(), similarly to the way the ILE bit is +synchronized. + +Open-code the ILE setting as well now that the caller's LPCR is +available directly, there is no need for the indirection. + +Without this, under both TCG and KVM, adding a POWER8/9/10 class CPU +with a new core ID after a modern Linux has booted results in the new +CPU's LPCR missing the LPCR[AIL]=0b11 setting that the other CPUs have. +This can cause crashes and unexpected behaviour. + +Signed-off-by: Nicholas Piggin +Message-Id: <20210526091626.3388262-3-npiggin@gmail.com> +Reviewed-by: Cédric Le Goater +Reviewed-by: Greg Kurz +Signed-off-by: David Gibson +Signed-off-by: Laurent Vivier +(cherry picked from commit ac559ecbea2649819e7b3fdd09f4e0243e0128db) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/spapr_rtas.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c +index 91c71d1c94..27ab339b0c 100644 +--- a/hw/ppc/spapr_rtas.c ++++ b/hw/ppc/spapr_rtas.c +@@ -133,8 +133,8 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, + target_ulong id, start, r3; + PowerPCCPU *newcpu; + CPUPPCState *env; +- PowerPCCPUClass *pcc; + target_ulong lpcr; ++ target_ulong caller_lpcr; + + if (nargs != 3 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +@@ -153,7 +153,6 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, + } + + env = &newcpu->env; +- pcc = POWERPC_CPU_GET_CLASS(newcpu); + + if (!CPU(newcpu)->halted) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); +@@ -164,10 +163,15 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, + + env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); + ++ caller_lpcr = callcpu->env.spr[SPR_LPCR]; + lpcr = env->spr[SPR_LPCR]; +- if (!pcc->interrupts_big_endian(callcpu)) { +- lpcr |= LPCR_ILE; +- } ++ ++ /* Set ILE the same way */ ++ lpcr = (lpcr & ~LPCR_ILE) | (caller_lpcr & LPCR_ILE); ++ ++ /* Set AIL the same way */ ++ lpcr = (lpcr & ~LPCR_AIL) | (caller_lpcr & LPCR_AIL); ++ + if (env->mmu_model == POWERPC_MMU_3_00) { + /* + * New cpus are expected to start in the same radix/hash mode +-- +2.27.0 + diff --git a/kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch b/kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch new file mode 100644 index 0000000..bdf810c --- /dev/null +++ b/kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch @@ -0,0 +1,322 @@ +From a7752067b45bc05f1127a62e39c38a3361bb1840 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 3 Jun 2021 14:04:18 -0400 +Subject: [PATCH 07/21] target/i386: Add CPU model versions supporting 'xsaves' +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 +RH-Commit: [6/8] 814973113f19a21d10a90fcbbcd881eef354933d (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier +RH-Acked-by: Vitaly Kuznetsov + +Hyper-V 2016 refuses to boot on Skylake+ CPU models because they lack +'xsaves'/'vmx-xsaves' features and this diverges from real hardware. The +same issue emerges with AMD "EPYC" CPU model prior to version 3 which got +'xsaves' added. EPYC-Rome/EPYC-Milan CPU models have 'xsaves' enabled from +the very beginning so the comment blaming KVM to explain why other CPUs +lack 'xsaves' is likely outdated. + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20210412073952.860944-1-vkuznets@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 7bde6b18575dd79c26ce1616e0c33151e83d9d7e) + +Conflicts: + target/i386/cpu.c (context, skipping c1826ea6a0520) + +Signed-off-by: Vitaly Kuznetsov +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 150 +++++++++++++++++++++++++++++----------------- + 1 file changed, 94 insertions(+), 56 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index c30bb2a6b0..da47c3e50e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2881,12 +2881,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP, +- /* Missing: XSAVES (not supported by some Linux versions, +- * including v4.1 to v4.12). +- * KVM doesn't yet expose any XSAVES state save component, +- * and the only one defined in Skylake (processor tracing) +- * probably will block migration anyway. +- */ ++ /* XSAVES is added in version 4 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, +@@ -2962,6 +2957,15 @@ static X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .version = 4, ++ .note = "IBRS, XSAVES, no TSX", ++ .props = (PropValue[]) { ++ { "xsaves", "on" }, ++ { "vmx-xsaves", "on" }, ++ { /* end of list */ } ++ } ++ }, + { /* end of list */ } + } + }, +@@ -3001,12 +3005,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_PKU, +- /* Missing: XSAVES (not supported by some Linux versions, +- * including v4.1 to v4.12). +- * KVM doesn't yet expose any XSAVES state save component, +- * and the only one defined in Skylake (processor tracing) +- * probably will block migration anyway. +- */ ++ /* XSAVES is added in version 5 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, +@@ -3094,6 +3093,15 @@ static X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .version = 5, ++ .note = "IBRS, XSAVES, EPT switching, no TSX", ++ .props = (PropValue[]) { ++ { "xsaves", "on" }, ++ { "vmx-xsaves", "on" }, ++ { /* end of list */ } ++ } ++ }, + { /* end of list */ } + } + }, +@@ -3136,12 +3144,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_ECX_AVX512VNNI, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, +- /* Missing: XSAVES (not supported by some Linux versions, +- * including v4.1 to v4.12). +- * KVM doesn't yet expose any XSAVES state save component, +- * and the only one defined in Skylake (processor tracing) +- * probably will block migration anyway. +- */ ++ /* XSAVES is added in version 5 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, +@@ -3225,6 +3228,14 @@ static X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { .version = 5, ++ .note = "ARCH_CAPABILITIES, EPT switching, XSAVES, no TSX", ++ .props = (PropValue[]) { ++ { "xsaves", "on" }, ++ { "vmx-xsaves", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +@@ -3274,13 +3285,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX512_BF16, +- /* +- * Missing: XSAVES (not supported by some Linux versions, +- * including v4.1 to v4.12). +- * KVM doesn't yet expose any XSAVES state save component, +- * and the only one defined in Skylake (processor tracing) +- * probably will block migration anyway. +- */ ++ /* XSAVES is added in version 2 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, +@@ -3336,6 +3341,18 @@ static X86CPUDefinition builtin_x86_defs[] = { + .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (Cooperlake)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { .version = 2, ++ .note = "XSAVES", ++ .props = (PropValue[]) { ++ { "xsaves", "on" }, ++ { "vmx-xsaves", "on" }, ++ { /* end of list */ } ++ }, ++ }, ++ { /* end of list */ } ++ } + }, + { + .name = "Icelake-Client", +@@ -3378,12 +3395,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_ECX_AVX512_VPOPCNTDQ, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, +- /* Missing: XSAVES (not supported by some Linux versions, +- * including v4.1 to v4.12). +- * KVM doesn't yet expose any XSAVES state save component, +- * and the only one defined in Skylake (processor tracing) +- * probably will block migration anyway. +- */ ++ /* XSAVES is added in version 3 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, +@@ -3451,6 +3463,15 @@ static X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 3, ++ .note = "no TSX, XSAVES, deprecated", ++ .props = (PropValue[]) { ++ { "xsaves", "on" }, ++ { "vmx-xsaves", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + }, + .deprecation_note = "use Icelake-Server instead" +@@ -3499,12 +3520,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, +- /* Missing: XSAVES (not supported by some Linux versions, +- * including v4.1 to v4.12). +- * KVM doesn't yet expose any XSAVES state save component, +- * and the only one defined in Skylake (processor tracing) +- * probably will block migration anyway. +- */ ++ /* XSAVES is added in version 5 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, +@@ -3597,6 +3613,15 @@ static X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 5, ++ .note = "XSAVES", ++ .props = (PropValue[]) { ++ { "xsaves", "on" }, ++ { "vmx-xsaves", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +@@ -3631,13 +3656,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | + CPUID_7_0_EDX_SPEC_CTRL_SSBD, +- /* +- * Missing: XSAVES (not supported by some Linux versions, +- * including v4.1 to v4.12). +- * KVM doesn't yet expose any XSAVES state save component, +- * and the only one defined in Skylake (processor tracing) +- * probably will block migration anyway. +- */ ++ /* XSAVES is added in version 3 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = +@@ -3704,6 +3723,15 @@ static X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .version = 3, ++ .note = "XSAVES, no MPX, no MONITOR", ++ .props = (PropValue[]) { ++ { "xsaves", "on" }, ++ { "vmx-xsaves", "on" }, ++ { /* end of list */ }, ++ }, ++ }, + { /* end of list */ }, + }, + }, +@@ -3762,13 +3790,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EDX_CORE_CAPABILITY, + .features[FEAT_CORE_CAPABILITY] = + MSR_CORE_CAP_SPLIT_LOCK_DETECT, +- /* +- * Missing: XSAVES (not supported by some Linux versions, +- * including v4.1 to v4.12). +- * KVM doesn't yet expose any XSAVES state save component, +- * and the only one defined in Skylake (processor tracing) +- * probably will block migration anyway. +- */ ++ /* XSAVES is is added in version 3 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, +@@ -3833,6 +3855,15 @@ static X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .version = 3, ++ .note = "XSAVES, no MPX", ++ .props = (PropValue[]) { ++ { "xsaves", "on" }, ++ { "vmx-xsaves", "on" }, ++ { /* end of list */ }, ++ }, ++ }, + { /* end of list */ }, + }, + }, +@@ -4114,11 +4145,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT, +- /* +- * Missing: XSAVES (not supported by some Linux versions, +- * including v4.1 to v4.12). +- * KVM doesn't yet expose any XSAVES state save component. +- */ ++ /* XSAVES is added in version 2 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, +@@ -4129,6 +4156,17 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x8000001E, + .model_id = "Hygon Dhyana Processor", + .cache_info = &epyc_cache_info, ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { .version = 2, ++ .note = "XSAVES", ++ .props = (PropValue[]) { ++ { "xsaves", "on" }, ++ { /* end of list */ } ++ }, ++ }, ++ { /* end of list */ } ++ } + }, + { + .name = "EPYC-Rome", +-- +2.27.0 + diff --git a/kvm-target-i386-sev-add-support-to-query-the-attestation.patch b/kvm-target-i386-sev-add-support-to-query-the-attestation.patch new file mode 100644 index 0000000..fd08786 --- /dev/null +++ b/kvm-target-i386-sev-add-support-to-query-the-attestation.patch @@ -0,0 +1,262 @@ +From ba750c8ed71bc73c79fecefa895192793ef6b7db Mon Sep 17 00:00:00 2001 +From: Connor Kuehl +Date: Wed, 2 Jun 2021 19:39:20 -0400 +Subject: [PATCH 05/21] target/i386/sev: add support to query the attestation + report +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 +RH-Commit: [4/8] de6088cb0cd1db779b85a50be87846e967f8c92c (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Greg Kurz +RH-Acked-by: Laurent Vivier +RH-Acked-by: Vitaly Kuznetsov + +From: Brijesh Singh + +The SEV FW >= 0.23 added a new command that can be used to query the +attestation report containing the SHA-256 digest of the guest memory +and VMSA encrypted with the LAUNCH_UPDATE and sign it with the PEK. + +Note, we already have a command (LAUNCH_MEASURE) that can be used to +query the SHA-256 digest of the guest memory encrypted through the +LAUNCH_UPDATE. The main difference between previous and this command +is that the report is signed with the PEK and unlike the LAUNCH_MEASURE +command the ATTESATION_REPORT command can be called while the guest +is running. + +Add a QMP interface "query-sev-attestation-report" that can be used +to get the report encoded in base64. + +Cc: James Bottomley +Cc: Tom Lendacky +Cc: Eric Blake +Cc: Paolo Bonzini +Cc: kvm@vger.kernel.org +Reviewed-by: James Bottomley +Tested-by: James Bottomley +Signed-off-by: Brijesh Singh +Reviewed-by: Connor Kuehl +Message-Id: <20210429170728.24322-1-brijesh.singh@amd.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 3ea1a80243d5b5ba23d8c2b7d3a86034ea0ade22) +Signed-off-by: Connor Kuehl +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + linux-headers/linux/kvm.h | 8 +++++ + qapi/misc-target.json | 38 ++++++++++++++++++++++ + target/i386/monitor.c | 6 ++++ + target/i386/sev-stub.c | 7 ++++ + target/i386/sev.c | 67 +++++++++++++++++++++++++++++++++++++++ + target/i386/sev_i386.h | 2 ++ + target/i386/trace-events | 1 + + 7 files changed, 129 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 020b62a619..897f831374 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1591,6 +1591,8 @@ enum sev_cmd_id { + KVM_SEV_DBG_ENCRYPT, + /* Guest certificates commands */ + KVM_SEV_CERT_EXPORT, ++ /* Attestation report */ ++ KVM_SEV_GET_ATTESTATION_REPORT, + + KVM_SEV_NR_MAX, + }; +@@ -1643,6 +1645,12 @@ struct kvm_sev_dbg { + __u32 len; + }; + ++struct kvm_sev_attestation_report { ++ __u8 mnonce[16]; ++ __u64 uaddr; ++ __u32 len; ++}; ++ + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) + #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 0c7491cd82..4b62f0ac05 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -285,3 +285,41 @@ + ## + { 'command': 'query-gic-capabilities', 'returns': ['GICCapability'], + 'if': 'defined(TARGET_ARM)' } ++ ++ ++## ++# @SevAttestationReport: ++# ++# The struct describes attestation report for a Secure Encrypted Virtualization ++# feature. ++# ++# @data: guest attestation report (base64 encoded) ++# ++# ++# Since: 6.1 ++## ++{ 'struct': 'SevAttestationReport', ++ 'data': { 'data': 'str'}, ++ 'if': 'defined(TARGET_I386)' } ++ ++## ++# @query-sev-attestation-report: ++# ++# This command is used to get the SEV attestation report, and is supported on AMD ++# X86 platforms only. ++# ++# @mnonce: a random 16 bytes value encoded in base64 (it will be included in report) ++# ++# Returns: SevAttestationReport objects. ++# ++# Since: 6.1 ++# ++# Example: ++# ++# -> { "execute" : "query-sev-attestation-report", "arguments": { "mnonce": "aaaaaaa" } } ++# <- { "return" : { "data": "aaaaaaaabbbddddd"} } ++# ++## ++{ 'command': 'query-sev-attestation-report', 'data': { 'mnonce': 'str' }, ++ 'returns': 'SevAttestationReport', ++ 'if': 'defined(TARGET_I386)' } +diff --git a/target/i386/monitor.c b/target/i386/monitor.c +index 5994408bee..119211f0b0 100644 +--- a/target/i386/monitor.c ++++ b/target/i386/monitor.c +@@ -757,3 +757,9 @@ void qmp_sev_inject_launch_secret(const char *packet_hdr, + + sev_inject_launch_secret(packet_hdr, secret, gpa, errp); + } ++ ++SevAttestationReport * ++qmp_query_sev_attestation_report(const char *mnonce, Error **errp) ++{ ++ return sev_get_attestation_report(mnonce, errp); ++} +diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c +index 0207f1c5aa..0227cb5177 100644 +--- a/target/i386/sev-stub.c ++++ b/target/i386/sev-stub.c +@@ -74,3 +74,10 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + { + abort(); + } ++ ++SevAttestationReport * ++sev_get_attestation_report(const char *mnonce, Error **errp) ++{ ++ error_setg(errp, "SEV is not available in this QEMU"); ++ return NULL; ++} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 72b9e2ab40..740548f213 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -491,6 +491,73 @@ out: + return cap; + } + ++SevAttestationReport * ++sev_get_attestation_report(const char *mnonce, Error **errp) ++{ ++ struct kvm_sev_attestation_report input = {}; ++ SevAttestationReport *report = NULL; ++ SevGuestState *sev = sev_guest; ++ guchar *data; ++ guchar *buf; ++ gsize len; ++ int err = 0, ret; ++ ++ if (!sev_enabled()) { ++ error_setg(errp, "SEV is not enabled"); ++ return NULL; ++ } ++ ++ /* lets decode the mnonce string */ ++ buf = g_base64_decode(mnonce, &len); ++ if (!buf) { ++ error_setg(errp, "SEV: failed to decode mnonce input"); ++ return NULL; ++ } ++ ++ /* verify the input mnonce length */ ++ if (len != sizeof(input.mnonce)) { ++ error_setg(errp, "SEV: mnonce must be %zu bytes (got %" G_GSIZE_FORMAT ")", ++ sizeof(input.mnonce), len); ++ g_free(buf); ++ return NULL; ++ } ++ ++ /* Query the report length */ ++ ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ &input, &err); ++ if (ret < 0) { ++ if (err != SEV_RET_INVALID_LEN) { ++ error_setg(errp, "failed to query the attestation report length " ++ "ret=%d fw_err=%d (%s)", ret, err, fw_error_to_str(err)); ++ g_free(buf); ++ return NULL; ++ } ++ } ++ ++ data = g_malloc(input.len); ++ input.uaddr = (unsigned long)data; ++ memcpy(input.mnonce, buf, sizeof(input.mnonce)); ++ ++ /* Query the report */ ++ ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ &input, &err); ++ if (ret) { ++ error_setg_errno(errp, errno, "Failed to get attestation report" ++ " ret=%d fw_err=%d (%s)", ret, err, fw_error_to_str(err)); ++ goto e_free_data; ++ } ++ ++ report = g_new0(SevAttestationReport, 1); ++ report->data = g_base64_encode(data, input.len); ++ ++ trace_kvm_sev_attestation_report(mnonce, report->data); ++ ++e_free_data: ++ g_free(data); ++ g_free(buf); ++ return report; ++} ++ + static int + sev_read_file_base64(const char *filename, guchar **data, gsize *len) + { +diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h +index ae221d4c72..ae6d840478 100644 +--- a/target/i386/sev_i386.h ++++ b/target/i386/sev_i386.h +@@ -35,5 +35,7 @@ extern uint32_t sev_get_cbit_position(void); + extern uint32_t sev_get_reduced_phys_bits(void); + extern char *sev_get_launch_measurement(void); + extern SevCapability *sev_get_capabilities(Error **errp); ++extern SevAttestationReport * ++sev_get_attestation_report(const char *mnonce, Error **errp); + + #endif +diff --git a/target/i386/trace-events b/target/i386/trace-events +index a22ab24e21..8d6437404d 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -10,3 +10,4 @@ kvm_sev_launch_update_data(void *addr, uint64_t len) "addr %p len 0x%" PRIx64 + kvm_sev_launch_measurement(const char *value) "data %s" + kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" ++kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" +-- +2.27.0 + diff --git a/kvm-yank-Unregister-function-when-using-TLS-migration.patch b/kvm-yank-Unregister-function-when-using-TLS-migration.patch new file mode 100644 index 0000000..1691c50 --- /dev/null +++ b/kvm-yank-Unregister-function-when-using-TLS-migration.patch @@ -0,0 +1,146 @@ +From a722ec5614bbaa36992653250add7dabd39b12ad Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 1 Jun 2021 02:40:31 -0300 +Subject: [PATCH 01/21] yank: Unregister function when using TLS migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 10: yank: Unregister function when using TLS migration +RH-Commit: [1/1] b39e19a3a92dc7f881527198824ce7f402aa9289 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1972462 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu > +RH-Acked-by: Dr. David Alan Gilbert + +After yank feature was introduced in migration, whenever migration +is started using TLS, the following error happens in both source and +destination hosts: + +(qemu) qemu-kvm: ../util/yank.c:107: yank_unregister_instance: +Assertion `QLIST_EMPTY(&entry->yankfns)' failed. + +This happens because of a missing yank_unregister_function() when using +qio-channel-tls. + +Fix this by also allowing TYPE_QIO_CHANNEL_TLS object type to perform +yank_unregister_function() in channel_close() and multifd_load_cleanup(). + +Also, inside migration_channel_connect() and +migration_channel_process_incoming() move yank_register_function() so +it only runs once on a TLS migration. + +Fixes: b5eea99ec2f ("migration: Add yank feature", 2021-01-13) +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1964326 +Signed-off-by: Leonardo Bras +Reviewed-by: Lukas Straub +Reviewed-by: Peter Xu +Message-Id: <20210601054030.1153249-1-leobras.c@gmail.com> + +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 7de2e8565335c13fb3516cddbe2e40e366cce273) +Signed-off-by: Leonardo Bras +Signed-off-by: Miroslav Rezanina +--- + migration/channel.c | 26 ++++++++++++++------------ + migration/multifd.c | 3 ++- + migration/qemu-file-channel.c | 4 +++- + 3 files changed, 19 insertions(+), 14 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index c9ee902021..01275a9162 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -38,18 +38,19 @@ void migration_channel_process_incoming(QIOChannel *ioc) + trace_migration_set_incoming_channel( + ioc, object_get_typename(OBJECT(ioc))); + +- if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)) { +- yank_register_function(MIGRATION_YANK_INSTANCE, +- migration_yank_iochannel, +- QIO_CHANNEL(ioc)); +- } +- + if (s->parameters.tls_creds && + *s->parameters.tls_creds && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + migration_tls_channel_process_incoming(s, ioc, &local_err); + } else { ++ if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || ++ object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { ++ yank_register_function(MIGRATION_YANK_INSTANCE, ++ migration_yank_iochannel, ++ QIO_CHANNEL(ioc)); ++ } ++ + migration_ioc_process_incoming(ioc, &local_err); + } + +@@ -76,12 +77,6 @@ void migration_channel_connect(MigrationState *s, + ioc, object_get_typename(OBJECT(ioc)), hostname, error); + + if (!error) { +- if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)) { +- yank_register_function(MIGRATION_YANK_INSTANCE, +- migration_yank_iochannel, +- QIO_CHANNEL(ioc)); +- } +- + if (s->parameters.tls_creds && + *s->parameters.tls_creds && + !object_dynamic_cast(OBJECT(ioc), +@@ -99,6 +94,13 @@ void migration_channel_connect(MigrationState *s, + } else { + QEMUFile *f = qemu_fopen_channel_output(ioc); + ++ if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || ++ object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { ++ yank_register_function(MIGRATION_YANK_INSTANCE, ++ migration_yank_iochannel, ++ QIO_CHANNEL(ioc)); ++ } ++ + qemu_mutex_lock(&s->qemu_file_lock); + s->to_dst_file = f; + qemu_mutex_unlock(&s->qemu_file_lock); +diff --git a/migration/multifd.c b/migration/multifd.c +index a6677c45c8..a8dedcf822 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -987,7 +987,8 @@ int multifd_load_cleanup(Error **errp) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + +- if (object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET) ++ if ((object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET) || ++ object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_TLS)) + && OBJECT(p->c)->ref == 1) { + yank_unregister_function(MIGRATION_YANK_INSTANCE, + migration_yank_iochannel, +diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c +index 876d05a540..fad340ea7a 100644 +--- a/migration/qemu-file-channel.c ++++ b/migration/qemu-file-channel.c +@@ -26,6 +26,7 @@ + #include "qemu-file-channel.h" + #include "qemu-file.h" + #include "io/channel-socket.h" ++#include "io/channel-tls.h" + #include "qemu/iov.h" + #include "qemu/yank.h" + #include "yank_functions.h" +@@ -106,7 +107,8 @@ static int channel_close(void *opaque, Error **errp) + int ret; + QIOChannel *ioc = QIO_CHANNEL(opaque); + ret = qio_channel_close(ioc, errp); +- if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ++ if ((object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || ++ object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) + && OBJECT(ioc)->ref == 1) { + yank_unregister_function(MIGRATION_YANK_INSTANCE, + migration_yank_iochannel, +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index ae2bcd2..e812783 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -1,13 +1,27 @@ -%global SLOF_gittagdate 20191022 -%global SLOF_gittagcommit 899d9883 +%global libfdt_version 1.6.0 +%global libseccomp_version 2.4.0 +%global libusbx_version 1.0.23 +%global meson_version 0.55.3-3 +%global usbredir_version 0.7.1 %global have_usbredir 1 %global have_opengl 1 %global have_fdt 0 %global have_kvm_setup 0 %global have_memlock_limits 0 +# have_block_rbd is not relevant for RHEL but makes it +# easier to sync spec dependency list with Fedora +%global have_block_rbd 1 +%global have_pmem 1 +%ifnarch x86_64 + %global have_pmem 0 +%endif +%global have_numactl 1 +%ifarch s390x + %global have_numactl 0 +%endif %ifnarch %{ix86} x86_64 %global have_usbredir 0 @@ -60,13 +74,12 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 5%{?rcversion}%{?dist} +Release: 6%{?rcversion}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) Epoch: 17 License: GPLv2 and GPLv2+ and CC-BY -Group: Development/Tools URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x @@ -144,101 +157,83 @@ Patch32: kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch Patch33: kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch # For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta Patch34: kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch +# For bz#1972462 - QEMU core dump when doing TLS migration via TCP +Patch35: kvm-yank-Unregister-function-when-using-TLS-migration.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch36: kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch37: kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch38: kvm-target-i386-sev-add-support-to-query-the-attestation.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch39: kvm-spapr-Don-t-hijack-current_machine-boot_order.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch40: kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch41: kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch42: kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch -BuildRequires: wget -BuildRequires: rpm-build -BuildRequires: ninja-build -BuildRequires: meson >= 0.55.3-3 +# Source-git patches + +BuildRequires: meson >= %{meson_version} BuildRequires: zlib-devel BuildRequires: glib2-devel -BuildRequires: which BuildRequires: gnutls-devel BuildRequires: cyrus-sasl-devel -BuildRequires: libtool BuildRequires: libaio-devel -BuildRequires: rsync BuildRequires: python3-devel -BuildRequires: pciutils-devel BuildRequires: libiscsi-devel -BuildRequires: ncurses-devel BuildRequires: libattr-devel -BuildRequires: libusbx-devel >= 1.0.23 +BuildRequires: libusbx-devel >= %{libusbx_version} %if %{have_usbredir} -BuildRequires: usbredir-devel >= 0.7.1 +BuildRequires: usbredir-devel >= %{usbredir_version} %endif BuildRequires: texinfo BuildRequires: python3-sphinx -BuildRequires: libseccomp-devel >= 2.4.0 +BuildRequires: libseccomp-devel >= %{libseccomp_version} # For network block driver BuildRequires: libcurl-devel BuildRequires: libssh-devel -BuildRequires: librados-devel +%if %{have_block_rbd} BuildRequires: librbd-devel +%endif # We need both because the 'stap' binary is probed for by configure BuildRequires: systemtap BuildRequires: systemtap-sdt-devel # For VNC PNG support BuildRequires: libpng-devel -# For uuid generation -BuildRequires: libuuid-devel -# For Braille device support -BuildRequires: brlapi-devel -# For test suite -BuildRequires: check-devel # For virtiofs BuildRequires: libcap-ng-devel # Hard requirement for version >= 1.3 BuildRequires: pixman-devel -# Documentation requirement -BuildRequires: perl-podlators -BuildRequires: texinfo -BuildRequires: python3-sphinx # For rdma -%if 0%{?have_librdma} +%if %{have_librdma} BuildRequires: rdma-core-devel %endif %if %{have_fdt} -BuildRequires: libfdt-devel >= 1.6.0 -%endif -# iasl and cpp for acpi generation (not a hard requirement as we can use -# pre-compiled files, but it's better to use this) -%ifarch %{ix86} x86_64 -BuildRequires: iasl -BuildRequires: cpp +BuildRequires: libfdt-devel >= %{libfdt_version} %endif # For compressed guest memory dumps BuildRequires: lzo-devel snappy-devel # For NUMA memory binding -%ifnarch s390x +%if %{have_numactl} BuildRequires: numactl-devel %endif BuildRequires: libgcrypt-devel # qemu-pr-helper multipath support (requires libudev too) BuildRequires: device-mapper-multipath-devel BuildRequires: systemd-devel -# used by qemu-bridge-helper and qemu-pr-helper -BuildRequires: libcap-ng-devel - -BuildRequires: diffutils -%ifarch x86_64 +%if %{have_pmem} BuildRequires: libpmem-devel -Requires: libpmem %endif - # qemu-keymap BuildRequires: pkgconfig(xkbcommon) - -# For s390-pgste flag -%ifarch s390x -BuildRequires: binutils >= 2.27-16 -%endif - %if %{have_opengl} BuildRequires: pkgconfig(epoxy) BuildRequires: pkgconfig(libdrm) BuildRequires: pkgconfig(gbm) %endif - BuildRequires: perl-Test-Harness BuildRequires: libslirp-devel @@ -267,19 +262,17 @@ Requires: edk2-ovmf Requires: edk2-aarch64 %endif -Requires: libseccomp >= 2.4.0 -# For compressed guest memory dumps -Requires: lzo snappy +Requires: libseccomp >= %{libseccomp_version} %if %{have_kvm_setup} Requires(post): systemd-units Requires(preun): systemd-units %endif -Requires: libusbx >= 1.0.23 +Requires: libusbx >= %{libusbx_version} %if %{have_usbredir} -Requires: usbredir >= 0.7.1 +Requires: usbredir >= %{usbredir_version} %endif %if %{have_fdt} -Requires: libfdt >= 1.6.0 +Requires: libfdt >= %{libfdt_version} %endif # Since SPICE is removed from RHEL-9, the following Obsoletes: @@ -303,14 +296,12 @@ qemu-kvm-docs provides documentation files regarding qemu-kvm. %package -n qemu-img Summary: QEMU command line tool for manipulating disk images -Group: Development/Tools %description -n qemu-img This package provides a command line tool for manipulating disk images. %package -n qemu-kvm-common Summary: QEMU common files needed by all QEMU targets -Group: Development/Tools Requires(post): /usr/bin/getent Requires(post): /usr/sbin/groupadd Requires(post): /usr/sbin/useradd @@ -593,7 +584,7 @@ pushd %{qemu_kvm_build} --enable-curl \ --enable-debug-info \ --enable-docs \ -%if 0%{have_fdt} +%if %{have_fdt} --enable-fdt \ %endif --enable-gcrypt \ @@ -602,7 +593,7 @@ pushd %{qemu_kvm_build} --enable-iconv \ --enable-kvm \ --enable-libiscsi \ -%ifarch x86_64 +%if %{have_pmem} --enable-libpmem \ %endif --enable-libssh \ @@ -613,15 +604,17 @@ pushd %{qemu_kvm_build} --enable-malloc-trim \ --enable-modules \ --enable-mpath \ -%ifnarch s390x +%if %{have_numactl} --enable-numa \ %endif -%if 0%{have_opengl} +%if %{have_opengl} --enable-opengl \ %endif --enable-pie \ +%if %{have_block_rbd} --enable-rbd \ -%if 0%{have_librdma} +%endif +%if %{have_librdma} --enable-rdma \ %endif --enable-seccomp \ @@ -632,7 +625,7 @@ pushd %{qemu_kvm_build} --enable-tools \ --enable-tpm \ --enable-trace-backend=dtrace \ -%if 0%{have_usbredir} +%if %{have_usbredir} --enable-usb-redir \ %endif --enable-virtiofsd \ @@ -687,6 +680,12 @@ make V=1 %{?_smp_mflags} $buildldflags cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm +%ifarch s390x + # Copy the built new images into place for "make check": + cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ +%endif + + gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl %endif popd @@ -872,6 +871,7 @@ rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qboot.rom rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/canyonlands.dtb rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin @@ -894,10 +894,10 @@ rm -rf ${RPM_BUILD_ROOT}%{_libexecdir}/virtfs-proxy-helper rm -rf ${RPM_BUILD_ROOT}%{_mandir}/man1/virtfs-proxy-helper* %ifarch s390x - # Use the s390-ccw.img that we've just built, not the pre-built one + # Use the s390-*.img that we've just built, not the pre-built ones install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ + install -m 0644 pc-bios/s390-ccw/s390-netboot.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ %else - rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so %endif @@ -962,13 +962,13 @@ rom_link() { rom_link ../sgabios/sgabios.bin sgabios.bin %endif -%if 0%{have_kvm_setup} +%if %{have_kvm_setup} install -D -p -m 755 %{SOURCE21} $RPM_BUILD_ROOT%{_prefix}/lib/systemd/kvm-setup install -D -p -m 644 %{SOURCE22} $RPM_BUILD_ROOT%{_unitdir}/kvm-setup.service install -D -p -m 644 %{SOURCE23} $RPM_BUILD_ROOT%{_presetdir}/85-kvm.preset %endif -%if 0%{have_memlock_limits} +%if %{have_memlock_limits} install -D -p -m 644 %{SOURCE28} $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/95-kvm-memlock.conf %endif @@ -1000,7 +1000,7 @@ popd %check pushd %{qemu_kvm_build} echo "Testing qemu-kvm-build" -export DIFF=diff; make check V=1 +make check V=1 popd %post -n qemu-kvm-common @@ -1050,7 +1050,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : # Deliberately empty %files -n qemu-kvm-docs -%defattr(-,root,root) %dir %{qemudocdir} %doc %{qemudocdir}/genindex.html %doc %{qemudocdir}/search.html @@ -1071,7 +1070,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %doc %{qemudocdir}/_static/* %files -n qemu-kvm-common -%defattr(-,root,root) %{_mandir}/man7/qemu-qmp-ref.7* %{_mandir}/man7/qemu-cpu-models.7* %{_bindir}/qemu-keymap @@ -1139,19 +1137,18 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_datadir}/%{name}/linuxboot_dma.bin %{_datadir}/%{name}/dump-guest-memory.py* %{_datadir}/%{name}/trace-events-all -%if 0%{have_kvm_setup} +%if %{have_kvm_setup} %{_prefix}/lib/systemd/kvm-setup %{_unitdir}/kvm-setup.service %{_presetdir}/85-kvm.preset %endif -%if 0%{have_memlock_limits} +%if %{have_memlock_limits} %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf %endif %{_libexecdir}/virtiofsd %{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json %files -n qemu-kvm-core -%defattr(-,root,root) %{_libexecdir}/qemu-kvm %{_datadir}/systemtap/tapset/qemu-kvm.stp %{_datadir}/systemtap/tapset/qemu-kvm-log.stp @@ -1171,7 +1168,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %files -n qemu-img -%defattr(-,root,root) %{_bindir}/qemu-img %{_bindir}/qemu-io %{_bindir}/qemu-nbd @@ -1182,7 +1178,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_mandir}/man7/qemu-storage-daemon-qmp-ref.7* %files -n qemu-guest-agent -%defattr(-,root,root,-) %doc COPYING README.rst %{_bindir}/qemu-ga %{_mandir}/man8/qemu-ga.8* @@ -1210,7 +1205,7 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %files block-ssh %{_libdir}/qemu-kvm/block-ssh.so -%if 0%{have_opengl} +%if %{have_opengl} %files ui-opengl %{_libdir}/qemu-kvm/ui-egl-headless.so %{_libdir}/qemu-kvm/ui-opengl.so @@ -1218,6 +1213,24 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %changelog +* Mon Jun 21 2021 Miroslav Rezanina - 6.0.0-6 +- kvm-yank-Unregister-function-when-using-TLS-migration.patch [bz#1972462] +- kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch [bz#1957194] +- kvm-redhat-Install-the-s390-netboot.img-that-we-ve-built.patch [bz#1957194] +- kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch [bz#1957194] +- kvm-target-i386-sev-add-support-to-query-the-attestation.patch [bz#1957194] +- kvm-spapr-Don-t-hijack-current_machine-boot_order.patch [bz#1957194] +- kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch [bz#1957194] +- kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch [bz#1957194] +- kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch [bz#1957194] +- Specfile cleanup [bz#1973029] +- Resolves: bz#1972462 + (QEMU core dump when doing TLS migration via TCP) +- Resolves: bz#1957194 + (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) +- Resolves: bz#1973029 + (Spec file cleanups) + * Tue Jun 08 2021 Miroslav Rezanina - 6.0.0-5 - kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch [bz#1952449] - kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch [bz#1952449] From 0253757df89621ab32fff3c84616f062b278a66b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 28 Jun 2021 03:28:59 -0400 Subject: [PATCH 126/195] * Mon Jun 28 2021 Miroslav Rezanina - 6.0.0-7 - kvm-aarch64-rh-devices-add-CONFIG_PXB.patch [bz#1967502] - kvm-virtio-gpu-handle-partial-maps-properly.patch [bz#1974795] - kvm-x86-Add-x86-rhel8.5-machine-types.patch [bz#1957194] - kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch [bz#1957194] - kvm-block-backend-add-drained_poll.patch [bz#1957194] - kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch [bz#1957194] - kvm-disable-CONFIG_USB_STORAGE_BOT.patch [bz#1957194] - kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch [bz#1957194] - kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch [bz#1957194] - kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch [bz#1957194] - kvm-qga-drop-StandardError-syslog.patch [bz#1947977] - kvm-Remove-iscsi-support.patch [bz#1967133] - Resolves: bz#1967502 ([aarch64] [qemu] Compile the PCIe expander bridge) - Resolves: bz#1974795 ([RHEL9-beta] [aarch64] Launch guest with virtio-gpu-pci and virtual smmu causes "virtio_gpu_dequeue_ctrl_func" ERROR) - Resolves: bz#1957194 (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) - Resolves: bz#1947977 (remove StandardError=syslog from qemu-guest-agent.service) - Resolves: bz#1967133 (QEMU: disable libiscsi in RHEL-9) --- kvm-aarch64-rh-devices-add-CONFIG_PXB.patch | 37 ++++ kvm-block-backend-add-drained_poll.patch | 74 +++++++ kvm-disable-CONFIG_USB_STORAGE_BOT.patch | 49 +++++ ...me-mistakes-in-the-SEV-documentation.patch | 151 +++++++++++++ ...documentation-to-amd-memory-encrypti.patch | 141 ++++++++++++ ...rop-firmware.json-Add-SEV-ES-support.patch | 110 ++++++++++ ...rained-block-ops-to-quiesce-the-serv.patch | 191 +++++++++++++++++ ...86-Enable-kvm-asyncpf-int-by-default.patch | 49 +++++ ...tio-gpu-handle-partial-maps-properly.patch | 201 ++++++++++++++++++ kvm-x86-Add-x86-rhel8.5-machine-types.patch | 130 +++++++++++ qemu-guest-agent.service | 1 - qemu-kvm.spec | 63 ++++-- 12 files changed, 1179 insertions(+), 18 deletions(-) create mode 100644 kvm-aarch64-rh-devices-add-CONFIG_PXB.patch create mode 100644 kvm-block-backend-add-drained_poll.patch create mode 100644 kvm-disable-CONFIG_USB_STORAGE_BOT.patch create mode 100644 kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch create mode 100644 kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch create mode 100644 kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch create mode 100644 kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch create mode 100644 kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch create mode 100644 kvm-virtio-gpu-handle-partial-maps-properly.patch create mode 100644 kvm-x86-Add-x86-rhel8.5-machine-types.patch diff --git a/kvm-aarch64-rh-devices-add-CONFIG_PXB.patch b/kvm-aarch64-rh-devices-add-CONFIG_PXB.patch new file mode 100644 index 0000000..37b85fb --- /dev/null +++ b/kvm-aarch64-rh-devices-add-CONFIG_PXB.patch @@ -0,0 +1,37 @@ +From d05ba1e2208cb17b8cf7dac050d95137a67dd988 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 24 Jun 2021 10:32:08 +0200 +Subject: [PATCH 01/12] aarch64-rh-devices: add CONFIG_PXB +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 14: aarch64-rh-devices: add CONFIG_PXB +RH-Commit: [1/1] 6a9e6a96ea6ba1bee220a60e5a691a174a0a044b (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1967502 +RH-Acked-by: Gavin Shan +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Andrew Jones + +We want to enable the PCIe expander bridge on aarch64. So let's +set CONFIG_PXB. + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + default-configs/devices/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak +index 4220469178..d8ce902720 100644 +--- a/default-configs/devices/aarch64-rh-devices.mak ++++ b/default-configs/devices/aarch64-rh-devices.mak +@@ -27,3 +27,4 @@ CONFIG_TPM_TIS_SYSBUS=y + CONFIG_PTIMER=y + CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y + CONFIG_PVPANIC_PCI=y ++CONFIG_PXB=y +-- +2.27.0 + diff --git a/kvm-block-backend-add-drained_poll.patch b/kvm-block-backend-add-drained_poll.patch new file mode 100644 index 0000000..b3cbc54 --- /dev/null +++ b/kvm-block-backend-add-drained_poll.patch @@ -0,0 +1,74 @@ +From e23a2be8c57666e091d9192e113a30ea06cd83ef Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Thu, 17 Jun 2021 09:13:20 -0400 +Subject: [PATCH 05/12] block-backend: add drained_poll +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 +RH-Commit: [3/8] 4ad1f536b00a762a1b094d76383b74826228892a (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Daniel P. Berrangé + +Allow block backends to poll their devices/users to check if they have +been quiesced when entering a drained section. + +This will be used in the next patch to wait for the NBD server to be +completely quiesced. + +Suggested-by: Kevin Wolf +Reviewed-by: Kevin Wolf +Reviewed-by: Eric Blake +Signed-off-by: Sergio Lopez +Message-Id: <20210602060552.17433-2-slp@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 095cc4d0f62513d75e9bc1da37f08d9e97f267c4) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + block/block-backend.c | 7 ++++++- + include/sysemu/block-backend.h | 4 ++++ + 2 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 413af51f3b..05d8e5fb5d 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -2378,8 +2378,13 @@ static void blk_root_drained_begin(BdrvChild *child) + static bool blk_root_drained_poll(BdrvChild *child) + { + BlockBackend *blk = child->opaque; ++ bool busy = false; + assert(blk->quiesce_counter); +- return !!blk->in_flight; ++ ++ if (blk->dev_ops && blk->dev_ops->drained_poll) { ++ busy = blk->dev_ops->drained_poll(blk->dev_opaque); ++ } ++ return busy || !!blk->in_flight; + } + + static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index 880e903293..5423e3d9c6 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -66,6 +66,10 @@ typedef struct BlockDevOps { + * Runs when the backend's last drain request ends. + */ + void (*drained_end)(void *opaque); ++ /* ++ * Is the device still busy? ++ */ ++ bool (*drained_poll)(void *opaque); + } BlockDevOps; + + /* This struct is embedded in (the private) BlockBackend struct and contains +-- +2.27.0 + diff --git a/kvm-disable-CONFIG_USB_STORAGE_BOT.patch b/kvm-disable-CONFIG_USB_STORAGE_BOT.patch new file mode 100644 index 0000000..017b5ae --- /dev/null +++ b/kvm-disable-CONFIG_USB_STORAGE_BOT.patch @@ -0,0 +1,49 @@ +From 64ec0505fccf6f277430f3be1829a9e44cd00dbb Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Fri, 18 Jun 2021 12:04:24 -0400 +Subject: [PATCH 07/12] disable CONFIG_USB_STORAGE_BOT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 +RH-Commit: [5/8] 73d3ee0a17590c8cddf6bd812e6a758951c36ea4 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Daniel P. Berrangé + +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + default-configs/devices/ppc64-rh-devices.mak | 1 - + default-configs/devices/x86_64-rh-devices.mak | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/default-configs/devices/ppc64-rh-devices.mak b/default-configs/devices/ppc64-rh-devices.mak +index 5b01b7fac0..3ec5603ace 100644 +--- a/default-configs/devices/ppc64-rh-devices.mak ++++ b/default-configs/devices/ppc64-rh-devices.mak +@@ -15,7 +15,6 @@ CONFIG_USB=y + CONFIG_USB_OHCI=y + CONFIG_USB_OHCI_PCI=y + CONFIG_USB_SMARTCARD=y +-CONFIG_USB_STORAGE_BOT=y + CONFIG_USB_STORAGE_CORE=y + CONFIG_USB_STORAGE_CLASSIC=y + CONFIG_USB_XHCI=y +diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak +index d09c138fc6..81bda09f4c 100644 +--- a/default-configs/devices/x86_64-rh-devices.mak ++++ b/default-configs/devices/x86_64-rh-devices.mak +@@ -74,7 +74,6 @@ CONFIG_USB=y + CONFIG_USB_EHCI=y + CONFIG_USB_EHCI_PCI=y + CONFIG_USB_SMARTCARD=y +-CONFIG_USB_STORAGE_BOT=y + CONFIG_USB_STORAGE_CORE=y + CONFIG_USB_STORAGE_CLASSIC=y + CONFIG_USB_UHCI=y +-- +2.27.0 + diff --git a/kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch b/kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch new file mode 100644 index 0000000..7439afd --- /dev/null +++ b/kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch @@ -0,0 +1,151 @@ +From 17c1559139d6a58794944901f84dd4e8cd1f5335 Mon Sep 17 00:00:00 2001 +From: Connor Kuehl +Date: Tue, 22 Jun 2021 20:00:20 -0400 +Subject: [PATCH 08/12] doc: Fix some mistakes in the SEV documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 +RH-Commit: [6/8] ce828f81de1320a1833241700cb13dfdcf7d82e7 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Daniel P. Berrangé + +From: Tom Lendacky + +Fix some spelling and grammar mistakes in the amd-memory-encryption.txt +file. No new information added. + +Signed-off-by: Tom Lendacky +Reviewed-by: Laszlo Ersek +Reviewed-by: Connor Kuehl +Message-Id: +Signed-off-by: Eduardo Habkost +(cherry picked from commit f538adeccf4554e6402fe661a0a51bcc8d6bd227) +Signed-off-by: Connor Kuehl +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + docs/amd-memory-encryption.txt | 59 +++++++++++++++++----------------- + 1 file changed, 29 insertions(+), 30 deletions(-) + +diff --git a/docs/amd-memory-encryption.txt b/docs/amd-memory-encryption.txt +index 145896aec7..ed85159ea7 100644 +--- a/docs/amd-memory-encryption.txt ++++ b/docs/amd-memory-encryption.txt +@@ -1,38 +1,38 @@ + Secure Encrypted Virtualization (SEV) is a feature found on AMD processors. + + SEV is an extension to the AMD-V architecture which supports running encrypted +-virtual machine (VMs) under the control of KVM. Encrypted VMs have their pages ++virtual machines (VMs) under the control of KVM. Encrypted VMs have their pages + (code and data) secured such that only the guest itself has access to the + unencrypted version. Each encrypted VM is associated with a unique encryption +-key; if its data is accessed to a different entity using a different key the ++key; if its data is accessed by a different entity using a different key the + encrypted guests data will be incorrectly decrypted, leading to unintelligible + data. + +-The key management of this feature is handled by separate processor known as +-AMD secure processor (AMD-SP) which is present in AMD SOCs. Firmware running +-inside the AMD-SP provide commands to support common VM lifecycle. This ++Key management for this feature is handled by a separate processor known as the ++AMD secure processor (AMD-SP), which is present in AMD SOCs. Firmware running ++inside the AMD-SP provides commands to support a common VM lifecycle. This + includes commands for launching, snapshotting, migrating and debugging the +-encrypted guest. Those SEV command can be issued via KVM_MEMORY_ENCRYPT_OP ++encrypted guest. These SEV commands can be issued via KVM_MEMORY_ENCRYPT_OP + ioctls. + + Launching + --------- +-Boot images (such as bios) must be encrypted before guest can be booted. +-MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images :LAUNCH_START, ++Boot images (such as bios) must be encrypted before a guest can be booted. The ++MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images: LAUNCH_START, + LAUNCH_UPDATE_DATA, LAUNCH_MEASURE and LAUNCH_FINISH. These four commands + together generate a fresh memory encryption key for the VM, encrypt the boot +-images and provide a measurement than can be used as an attestation of the ++images and provide a measurement than can be used as an attestation of a + successful launch. + + LAUNCH_START is called first to create a cryptographic launch context within +-the firmware. To create this context, guest owner must provides guest policy, ++the firmware. To create this context, guest owner must provide a guest policy, + its public Diffie-Hellman key (PDH) and session parameters. These inputs +-should be treated as binary blob and must be passed as-is to the SEV firmware. ++should be treated as a binary blob and must be passed as-is to the SEV firmware. + +-The guest policy is passed as plaintext and hypervisor may able to read it ++The guest policy is passed as plaintext. A hypervisor may choose to read it, + but should not modify it (any modification of the policy bits will result + in bad measurement). The guest policy is a 4-byte data structure containing +-several flags that restricts what can be done on running SEV guest. ++several flags that restricts what can be done on a running SEV guest. + See KM Spec section 3 and 6.2 for more details. + + The guest policy can be provided via the 'policy' property (see below) +@@ -40,31 +40,30 @@ The guest policy can be provided via the 'policy' property (see below) + # ${QEMU} \ + sev-guest,id=sev0,policy=0x1...\ + +-Guest owners provided DH certificate and session parameters will be used to ++The guest owner provided DH certificate and session parameters will be used to + establish a cryptographic session with the guest owner to negotiate keys used + for the attestation. + +-The DH certificate and session blob can be provided via 'dh-cert-file' and +-'session-file' property (see below ++The DH certificate and session blob can be provided via the 'dh-cert-file' and ++'session-file' properties (see below) + + # ${QEMU} \ + sev-guest,id=sev0,dh-cert-file=,session-file= + + LAUNCH_UPDATE_DATA encrypts the memory region using the cryptographic context +-created via LAUNCH_START command. If required, this command can be called ++created via the LAUNCH_START command. If required, this command can be called + multiple times to encrypt different memory regions. The command also calculates + the measurement of the memory contents as it encrypts. + +-LAUNCH_MEASURE command can be used to retrieve the measurement of encrypted +-memory. This measurement is a signature of the memory contents that can be +-sent to the guest owner as an attestation that the memory was encrypted +-correctly by the firmware. The guest owner may wait to provide the guest +-confidential information until it can verify the attestation measurement. +-Since the guest owner knows the initial contents of the guest at boot, the +-attestation measurement can be verified by comparing it to what the guest owner +-expects. ++LAUNCH_MEASURE can be used to retrieve the measurement of encrypted memory. ++This measurement is a signature of the memory contents that can be sent to the ++guest owner as an attestation that the memory was encrypted correctly by the ++firmware. The guest owner may wait to provide the guest confidential information ++until it can verify the attestation measurement. Since the guest owner knows the ++initial contents of the guest at boot, the attestation measurement can be ++verified by comparing it to what the guest owner expects. + +-LAUNCH_FINISH command finalizes the guest launch and destroy's the cryptographic ++LAUNCH_FINISH finalizes the guest launch and destroys the cryptographic + context. + + See SEV KM API Spec [1] 'Launching a guest' usage flow (Appendix A) for the +@@ -78,10 +77,10 @@ To launch a SEV guest + + Debugging + ----------- +-Since memory contents of SEV guest is encrypted hence hypervisor access to the +-guest memory will get a cipher text. If guest policy allows debugging, then +-hypervisor can use DEBUG_DECRYPT and DEBUG_ENCRYPT commands access the guest +-memory region for debug purposes. This is not supported in QEMU yet. ++Since the memory contents of a SEV guest are encrypted, hypervisor access to ++the guest memory will return cipher text. If the guest policy allows debugging, ++then a hypervisor can use the DEBUG_DECRYPT and DEBUG_ENCRYPT commands to access ++the guest memory region for debug purposes. This is not supported in QEMU yet. + + Snapshot/Restore + ----------------- +-- +2.27.0 + diff --git a/kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch b/kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch new file mode 100644 index 0000000..2aabcbd --- /dev/null +++ b/kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch @@ -0,0 +1,141 @@ +From 1bd5660666d2a1f704ebabeed8a2bbfa02410f41 Mon Sep 17 00:00:00 2001 +From: Connor Kuehl +Date: Tue, 22 Jun 2021 20:00:21 -0400 +Subject: [PATCH 09/12] docs: Add SEV-ES documentation to + amd-memory-encryption.txt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 +RH-Commit: [7/8] 36e49577484813866132b90c64cf99779326db74 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Daniel P. Berrangé + +From: Tom Lendacky + +Update the amd-memory-encryption.txt file with information about SEV-ES, +including how to launch an SEV-ES guest and some of the differences +between SEV and SEV-ES guests in regards to launching and measuring the +guest. + +Signed-off-by: Tom Lendacky +Acked-by: Laszlo Ersek +Reviewed-by: Connor Kuehl +Message-Id: +Signed-off-by: Eduardo Habkost +(cherry picked from commit 61b7d7098cd53dd386939610d534f8bd79240881) +Signed-off-by: Connor Kuehl +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + docs/amd-memory-encryption.txt | 54 +++++++++++++++++++++++++++++----- + 1 file changed, 47 insertions(+), 7 deletions(-) + +diff --git a/docs/amd-memory-encryption.txt b/docs/amd-memory-encryption.txt +index ed85159ea7..ffca382b5f 100644 +--- a/docs/amd-memory-encryption.txt ++++ b/docs/amd-memory-encryption.txt +@@ -15,6 +15,13 @@ includes commands for launching, snapshotting, migrating and debugging the + encrypted guest. These SEV commands can be issued via KVM_MEMORY_ENCRYPT_OP + ioctls. + ++Secure Encrypted Virtualization - Encrypted State (SEV-ES) builds on the SEV ++support to additionally protect the guest register state. In order to allow a ++hypervisor to perform functions on behalf of a guest, there is architectural ++support for notifying a guest's operating system when certain types of VMEXITs ++are about to occur. This allows the guest to selectively share information with ++the hypervisor to satisfy the requested function. ++ + Launching + --------- + Boot images (such as bios) must be encrypted before a guest can be booted. The +@@ -24,6 +31,9 @@ together generate a fresh memory encryption key for the VM, encrypt the boot + images and provide a measurement than can be used as an attestation of a + successful launch. + ++For a SEV-ES guest, the LAUNCH_UPDATE_VMSA command is also used to encrypt the ++guest register state, or VM save area (VMSA), for all of the guest vCPUs. ++ + LAUNCH_START is called first to create a cryptographic launch context within + the firmware. To create this context, guest owner must provide a guest policy, + its public Diffie-Hellman key (PDH) and session parameters. These inputs +@@ -40,6 +50,12 @@ The guest policy can be provided via the 'policy' property (see below) + # ${QEMU} \ + sev-guest,id=sev0,policy=0x1...\ + ++Setting the "SEV-ES required" policy bit (bit 2) will launch the guest as a ++SEV-ES guest (see below) ++ ++# ${QEMU} \ ++ sev-guest,id=sev0,policy=0x5...\ ++ + The guest owner provided DH certificate and session parameters will be used to + establish a cryptographic session with the guest owner to negotiate keys used + for the attestation. +@@ -55,13 +71,19 @@ created via the LAUNCH_START command. If required, this command can be called + multiple times to encrypt different memory regions. The command also calculates + the measurement of the memory contents as it encrypts. + +-LAUNCH_MEASURE can be used to retrieve the measurement of encrypted memory. +-This measurement is a signature of the memory contents that can be sent to the +-guest owner as an attestation that the memory was encrypted correctly by the +-firmware. The guest owner may wait to provide the guest confidential information +-until it can verify the attestation measurement. Since the guest owner knows the +-initial contents of the guest at boot, the attestation measurement can be +-verified by comparing it to what the guest owner expects. ++LAUNCH_UPDATE_VMSA encrypts all the vCPU VMSAs for a SEV-ES guest using the ++cryptographic context created via the LAUNCH_START command. The command also ++calculates the measurement of the VMSAs as it encrypts them. ++ ++LAUNCH_MEASURE can be used to retrieve the measurement of encrypted memory and, ++for a SEV-ES guest, encrypted VMSAs. This measurement is a signature of the ++memory contents and, for a SEV-ES guest, the VMSA contents, that can be sent ++to the guest owner as an attestation that the memory and VMSAs were encrypted ++correctly by the firmware. The guest owner may wait to provide the guest ++confidential information until it can verify the attestation measurement. ++Since the guest owner knows the initial contents of the guest at boot, the ++attestation measurement can be verified by comparing it to what the guest owner ++expects. + + LAUNCH_FINISH finalizes the guest launch and destroys the cryptographic + context. +@@ -75,6 +97,22 @@ To launch a SEV guest + -machine ...,confidential-guest-support=sev0 \ + -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 + ++To launch a SEV-ES guest ++ ++# ${QEMU} \ ++ -machine ...,confidential-guest-support=sev0 \ ++ -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1,policy=0x5 ++ ++An SEV-ES guest has some restrictions as compared to a SEV guest. Because the ++guest register state is encrypted and cannot be updated by the VMM/hypervisor, ++a SEV-ES guest: ++ - Does not support SMM - SMM support requires updating the guest register ++ state. ++ - Does not support reboot - a system reset requires updating the guest register ++ state. ++ - Requires in-kernel irqchip - the burden is placed on the hypervisor to ++ manage booting APs. ++ + Debugging + ----------- + Since the memory contents of a SEV guest are encrypted, hypervisor access to +@@ -101,8 +139,10 @@ Secure Encrypted Virtualization Key Management: + + KVM Forum slides: + http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf ++https://www.linux-kvm.org/images/9/94/Extending-Secure-Encrypted-Virtualization-with-SEV-ES-Thomas-Lendacky-AMD.pdf + + AMD64 Architecture Programmer's Manual: + http://support.amd.com/TechDocs/24593.pdf + SME is section 7.10 + SEV is section 15.34 ++ SEV-ES is section 15.35 +-- +2.27.0 + diff --git a/kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch b/kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch new file mode 100644 index 0000000..e900ba7 --- /dev/null +++ b/kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch @@ -0,0 +1,110 @@ +From e408203bab17e32f8d42ae9ad61e94a73bfaec67 Mon Sep 17 00:00:00 2001 +From: Connor Kuehl +Date: Tue, 22 Jun 2021 20:00:22 -0400 +Subject: [PATCH 10/12] docs/interop/firmware.json: Add SEV-ES support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 +RH-Commit: [8/8] b49ebbaf40b56d95c67475a0373d6906a3e4f0e3 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Daniel P. Berrangé + +From: Tom Lendacky + +Create an enum definition, '@amd-sev-es', for SEV-ES and add documention +for the new enum. Add an example that shows some of the requirements for +SEV-ES, including not having SMM support and the requirement for an +X64-only build. + +Signed-off-by: Tom Lendacky +Reviewed-by: Laszlo Ersek +Reviewed-by: Connor Kuehl +Message-Id: +Signed-off-by: Eduardo Habkost +(cherry picked from commit d44df1d73ce04d7f4b8f94cba5f715e2dadc998b) +Signed-off-by: Connor Kuehl +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + docs/interop/firmware.json | 47 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 46 insertions(+), 1 deletion(-) + +diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json +index 9d94ccafa9..8d8b0be030 100644 +--- a/docs/interop/firmware.json ++++ b/docs/interop/firmware.json +@@ -115,6 +115,12 @@ + # this feature are documented in + # "docs/amd-memory-encryption.txt". + # ++# @amd-sev-es: The firmware supports running under AMD Secure Encrypted ++# Virtualization - Encrypted State, as specified in the AMD64 ++# Architecture Programmer's Manual. QEMU command line options ++# related to this feature are documented in ++# "docs/amd-memory-encryption.txt". ++# + # @enrolled-keys: The variable store (NVRAM) template associated with + # the firmware binary has the UEFI Secure Boot + # operational mode turned on, with certificates +@@ -179,7 +185,7 @@ + # Since: 3.0 + ## + { 'enum' : 'FirmwareFeature', +- 'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'enrolled-keys', ++ 'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'amd-sev-es', 'enrolled-keys', + 'requires-smm', 'secure-boot', 'verbose-dynamic', + 'verbose-static' ] } + +@@ -504,6 +510,45 @@ + # } + # + # { ++# "description": "OVMF with SEV-ES support", ++# "interface-types": [ ++# "uefi" ++# ], ++# "mapping": { ++# "device": "flash", ++# "executable": { ++# "filename": "/usr/share/OVMF/OVMF_CODE.fd", ++# "format": "raw" ++# }, ++# "nvram-template": { ++# "filename": "/usr/share/OVMF/OVMF_VARS.fd", ++# "format": "raw" ++# } ++# }, ++# "targets": [ ++# { ++# "architecture": "x86_64", ++# "machines": [ ++# "pc-q35-*" ++# ] ++# } ++# ], ++# "features": [ ++# "acpi-s3", ++# "amd-sev", ++# "amd-sev-es", ++# "verbose-dynamic" ++# ], ++# "tags": [ ++# "-a X64", ++# "-p OvmfPkg/OvmfPkgX64.dsc", ++# "-t GCC48", ++# "-b DEBUG", ++# "-D FD_SIZE_4MB" ++# ] ++# } ++# ++# { + # "description": "UEFI firmware for ARM64 virtual machines", + # "interface-types": [ + # "uefi" +-- +2.27.0 + diff --git a/kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch b/kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch new file mode 100644 index 0000000..af8a82c --- /dev/null +++ b/kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch @@ -0,0 +1,191 @@ +From 9182af6a819e60a079349fd6d8b28a28adea90b1 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez Pascual +Date: Thu, 17 Jun 2021 09:13:21 -0400 +Subject: [PATCH 06/12] nbd/server: Use drained block ops to quiesce the server +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 +RH-Commit: [4/8] ca32c99563254a8a31104948e41fa691453d0399 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Daniel P. Berrangé + +Before switching between AioContexts we need to make sure that we're +fully quiesced ("nb_requests == 0" for every client) when entering the +drained section. + +To do this, we set "quiescing = true" for every client on +".drained_begin" to prevent new coroutines from being created, and +check if "nb_requests == 0" on ".drained_poll". Finally, once we're +exiting the drained section, on ".drained_end" we set "quiescing = +false" and call "nbd_client_receive_next_request()" to resume the +processing of new requests. + +With these changes, "blk_aio_attach()" and "blk_aio_detach()" can be +reverted to be as simple as they were before f148ae7d36. + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1960137 +Suggested-by: Kevin Wolf +Signed-off-by: Sergio Lopez +Message-Id: <20210602060552.17433-3-slp@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit fd6afc501a019682d1b8468b562355a2887087bd) +Signed-off-by: Sergio Lopez +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + nbd/server.c | 82 ++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 61 insertions(+), 21 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 86a44a9b41..b60ebc3ab6 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1513,6 +1513,11 @@ static void nbd_request_put(NBDRequestData *req) + g_free(req); + + client->nb_requests--; ++ ++ if (client->quiescing && client->nb_requests == 0) { ++ aio_wait_kick(); ++ } ++ + nbd_client_receive_next_request(client); + + nbd_client_put(client); +@@ -1530,49 +1535,68 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) + QTAILQ_FOREACH(client, &exp->clients, next) { + qio_channel_attach_aio_context(client->ioc, ctx); + ++ assert(client->nb_requests == 0); + assert(client->recv_coroutine == NULL); + assert(client->send_coroutine == NULL); +- +- if (client->quiescing) { +- client->quiescing = false; +- nbd_client_receive_next_request(client); +- } + } + } + +-static void nbd_aio_detach_bh(void *opaque) ++static void blk_aio_detach(void *opaque) + { + NBDExport *exp = opaque; + NBDClient *client; + ++ trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); ++ + QTAILQ_FOREACH(client, &exp->clients, next) { + qio_channel_detach_aio_context(client->ioc); ++ } ++ ++ exp->common.ctx = NULL; ++} ++ ++static void nbd_drained_begin(void *opaque) ++{ ++ NBDExport *exp = opaque; ++ NBDClient *client; ++ ++ QTAILQ_FOREACH(client, &exp->clients, next) { + client->quiescing = true; ++ } ++} + +- if (client->recv_coroutine) { +- if (client->read_yielding) { +- qemu_aio_coroutine_enter(exp->common.ctx, +- client->recv_coroutine); +- } else { +- AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL); +- } +- } ++static void nbd_drained_end(void *opaque) ++{ ++ NBDExport *exp = opaque; ++ NBDClient *client; + +- if (client->send_coroutine) { +- AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL); +- } ++ QTAILQ_FOREACH(client, &exp->clients, next) { ++ client->quiescing = false; ++ nbd_client_receive_next_request(client); + } + } + +-static void blk_aio_detach(void *opaque) ++static bool nbd_drained_poll(void *opaque) + { + NBDExport *exp = opaque; ++ NBDClient *client; + +- trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); ++ QTAILQ_FOREACH(client, &exp->clients, next) { ++ if (client->nb_requests != 0) { ++ /* ++ * If there's a coroutine waiting for a request on nbd_read_eof() ++ * enter it here so we don't depend on the client to wake it up. ++ */ ++ if (client->recv_coroutine != NULL && client->read_yielding) { ++ qemu_aio_coroutine_enter(exp->common.ctx, ++ client->recv_coroutine); ++ } + +- aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp); ++ return true; ++ } ++ } + +- exp->common.ctx = NULL; ++ return false; + } + + static void nbd_eject_notifier(Notifier *n, void *data) +@@ -1594,6 +1618,12 @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk) + blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier); + } + ++static const BlockDevOps nbd_block_ops = { ++ .drained_begin = nbd_drained_begin, ++ .drained_end = nbd_drained_end, ++ .drained_poll = nbd_drained_poll, ++}; ++ + static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, + Error **errp) + { +@@ -1715,8 +1745,17 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, + + exp->allocation_depth = arg->allocation_depth; + ++ /* ++ * We need to inhibit request queuing in the block layer to ensure we can ++ * be properly quiesced when entering a drained section, as our coroutines ++ * servicing pending requests might enter blk_pread(). ++ */ ++ blk_set_disable_request_queuing(blk, true); ++ + blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); + ++ blk_set_dev_ops(blk, &nbd_block_ops, exp); ++ + QTAILQ_INSERT_TAIL(&exports, exp, next); + + return 0; +@@ -1788,6 +1827,7 @@ static void nbd_export_delete(BlockExport *blk_exp) + } + blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached, + blk_aio_detach, exp); ++ blk_set_disable_request_queuing(exp->common.blk, false); + } + + for (i = 0; i < exp->nr_export_bitmaps; i++) { +-- +2.27.0 + diff --git a/kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch b/kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch new file mode 100644 index 0000000..7fd2a56 --- /dev/null +++ b/kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch @@ -0,0 +1,49 @@ +From a9546384e1fe8b4dad9ab00c52f45dac3a8fbc00 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Tue, 8 Jun 2021 10:29:07 -0400 +Subject: [PATCH 04/12] redhat: x86: Enable 'kvm-asyncpf-int' by default +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 +RH-Commit: [2/8] 2ea940445291df74dfed2d2f9f2b1f88a3eca31b (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Daniel P. Berrangé + +'kvm-asyncpf-int' feature is supported by KVM starting with RHEL-8.4 +kernel, enable the feature by default starting with RHEL-8.5 machine +type. + +Signed-off-by: Vitaly Kuznetsov +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 0a374dec39..cdbfa84d2e 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -366,12 +366,15 @@ GlobalProperty pc_rhel_compat[] = { + { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* bz 1941397 */ ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + + GlobalProperty pc_rhel_8_4_compat[] = { + /* pc_rhel_8_4_compat from pc_compat_5_2 */ + { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" }, + }; + const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); + +-- +2.27.0 + diff --git a/kvm-virtio-gpu-handle-partial-maps-properly.patch b/kvm-virtio-gpu-handle-partial-maps-properly.patch new file mode 100644 index 0000000..4028f08 --- /dev/null +++ b/kvm-virtio-gpu-handle-partial-maps-properly.patch @@ -0,0 +1,201 @@ +From cdc537ada9528e09f8c70219f5a9a1ce8a4efa7e Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Thu, 6 May 2021 11:10:01 +0200 +Subject: [PATCH 02/12] virtio-gpu: handle partial maps properly + +RH-Author: Eric Auger +RH-MergeRequest: 15: virtio-gpu: handle partial maps properly +RH-Commit: [1/1] f2b0fd9758251d1f3a5ff9563911c8bdb4b191f0 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1974795 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Peter Xu + +dma_memory_map() may map only a part of the request. Happens if the +request can't be mapped in one go, for example due to a iommu creating +a linear dma mapping for scattered physical pages. Should that be the +case virtio-gpu must call dma_memory_map() again with the remaining +range instead of simply throwing an error. + +Note that this change implies the number of iov entries may differ from +the number of mapping entries sent by the guest. Therefore the iov_len +bookkeeping needs some updates too, we have to explicitly pass around +the iov length now. + +Reported-by: Auger Eric +Signed-off-by: Gerd Hoffmann +Message-id: 20210506091001.1301250-1-kraxel@redhat.com +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Message-Id: <20210506091001.1301250-1-kraxel@redhat.com> +(cherry picked from commit 9049f8bc445d50c0b5fe5500c0ec51fcc821c2ef) +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/display/virtio-gpu-3d.c | 7 ++-- + hw/display/virtio-gpu.c | 76 ++++++++++++++++++++-------------- + include/hw/virtio/virtio-gpu.h | 3 +- + 3 files changed, 52 insertions(+), 34 deletions(-) + +diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c +index d98964858e..72c14d9132 100644 +--- a/hw/display/virtio-gpu-3d.c ++++ b/hw/display/virtio-gpu-3d.c +@@ -283,22 +283,23 @@ static void virgl_resource_attach_backing(VirtIOGPU *g, + { + struct virtio_gpu_resource_attach_backing att_rb; + struct iovec *res_iovs; ++ uint32_t res_niov; + int ret; + + VIRTIO_GPU_FILL_CMD(att_rb); + trace_virtio_gpu_cmd_res_back_attach(att_rb.resource_id); + +- ret = virtio_gpu_create_mapping_iov(g, &att_rb, cmd, NULL, &res_iovs); ++ ret = virtio_gpu_create_mapping_iov(g, &att_rb, cmd, NULL, &res_iovs, &res_niov); + if (ret != 0) { + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + return; + } + + ret = virgl_renderer_resource_attach_iov(att_rb.resource_id, +- res_iovs, att_rb.nr_entries); ++ res_iovs, res_niov); + + if (ret != 0) +- virtio_gpu_cleanup_mapping_iov(g, res_iovs, att_rb.nr_entries); ++ virtio_gpu_cleanup_mapping_iov(g, res_iovs, res_niov); + } + + static void virgl_resource_detach_backing(VirtIOGPU *g, +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index c9f5e36fd0..6f3791deb3 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -608,11 +608,12 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g, + int virtio_gpu_create_mapping_iov(VirtIOGPU *g, + struct virtio_gpu_resource_attach_backing *ab, + struct virtio_gpu_ctrl_command *cmd, +- uint64_t **addr, struct iovec **iov) ++ uint64_t **addr, struct iovec **iov, ++ uint32_t *niov) + { + struct virtio_gpu_mem_entry *ents; + size_t esize, s; +- int i; ++ int e, v; + + if (ab->nr_entries > 16384) { + qemu_log_mask(LOG_GUEST_ERROR, +@@ -633,37 +634,53 @@ int virtio_gpu_create_mapping_iov(VirtIOGPU *g, + return -1; + } + +- *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); ++ *iov = NULL; + if (addr) { +- *addr = g_malloc0(sizeof(uint64_t) * ab->nr_entries); ++ *addr = NULL; + } +- for (i = 0; i < ab->nr_entries; i++) { +- uint64_t a = le64_to_cpu(ents[i].addr); +- uint32_t l = le32_to_cpu(ents[i].length); +- hwaddr len = l; +- (*iov)[i].iov_base = dma_memory_map(VIRTIO_DEVICE(g)->dma_as, +- a, &len, DMA_DIRECTION_TO_DEVICE); +- (*iov)[i].iov_len = len; +- if (addr) { +- (*addr)[i] = a; +- } +- if (!(*iov)[i].iov_base || len != l) { +- qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for" +- " resource %d element %d\n", +- __func__, ab->resource_id, i); +- if ((*iov)[i].iov_base) { +- i++; /* cleanup the 'i'th map */ ++ for (e = 0, v = 0; e < ab->nr_entries; e++) { ++ uint64_t a = le64_to_cpu(ents[e].addr); ++ uint32_t l = le32_to_cpu(ents[e].length); ++ hwaddr len; ++ void *map; ++ ++ do { ++ len = l; ++ map = dma_memory_map(VIRTIO_DEVICE(g)->dma_as, ++ a, &len, DMA_DIRECTION_TO_DEVICE); ++ if (!map) { ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for" ++ " resource %d element %d\n", ++ __func__, ab->resource_id, e); ++ virtio_gpu_cleanup_mapping_iov(g, *iov, v); ++ g_free(ents); ++ *iov = NULL; ++ if (addr) { ++ g_free(*addr); ++ *addr = NULL; ++ } ++ return -1; ++ } ++ ++ if (!(v % 16)) { ++ *iov = g_realloc(*iov, sizeof(struct iovec) * (v + 16)); ++ if (addr) { ++ *addr = g_realloc(*addr, sizeof(uint64_t) * (v + 16)); ++ } + } +- virtio_gpu_cleanup_mapping_iov(g, *iov, i); +- g_free(ents); +- *iov = NULL; ++ (*iov)[v].iov_base = map; ++ (*iov)[v].iov_len = len; + if (addr) { +- g_free(*addr); +- *addr = NULL; ++ (*addr)[v] = a; + } +- return -1; +- } ++ ++ a += len; ++ l -= len; ++ v += 1; ++ } while (l > 0); + } ++ *niov = v; ++ + g_free(ents); + return 0; + } +@@ -717,13 +734,12 @@ virtio_gpu_resource_attach_backing(VirtIOGPU *g, + return; + } + +- ret = virtio_gpu_create_mapping_iov(g, &ab, cmd, &res->addrs, &res->iov); ++ ret = virtio_gpu_create_mapping_iov(g, &ab, cmd, &res->addrs, ++ &res->iov, &res->iov_cnt); + if (ret != 0) { + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; + return; + } +- +- res->iov_cnt = ab.nr_entries; + } + + static void +diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h +index fae149235c..0d15af41d9 100644 +--- a/include/hw/virtio/virtio-gpu.h ++++ b/include/hw/virtio/virtio-gpu.h +@@ -209,7 +209,8 @@ void virtio_gpu_get_edid(VirtIOGPU *g, + int virtio_gpu_create_mapping_iov(VirtIOGPU *g, + struct virtio_gpu_resource_attach_backing *ab, + struct virtio_gpu_ctrl_command *cmd, +- uint64_t **addr, struct iovec **iov); ++ uint64_t **addr, struct iovec **iov, ++ uint32_t *niov); + void virtio_gpu_cleanup_mapping_iov(VirtIOGPU *g, + struct iovec *iov, uint32_t count); + void virtio_gpu_process_cmdq(VirtIOGPU *g); +-- +2.27.0 + diff --git a/kvm-x86-Add-x86-rhel8.5-machine-types.patch b/kvm-x86-Add-x86-rhel8.5-machine-types.patch new file mode 100644 index 0000000..9100f47 --- /dev/null +++ b/kvm-x86-Add-x86-rhel8.5-machine-types.patch @@ -0,0 +1,130 @@ +From 1497b5d371a63dd20d3b14ca2f8cce99845a1c2c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 19 May 2021 15:46:27 -0400 +Subject: [PATCH 03/12] x86: Add x86 rhel8.5 machine types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 +RH-Commit: [1/8] db81806d99b545abe4dcba576fb33c02ec283dd7 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Daniel P. Berrangé + +From: "Dr. David Alan Gilbert" + +Add the 8.5 machine type and the compat entries. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 6 ++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 24 ++++++++++++++++++++++-- + include/hw/i386/pc.h | 3 +++ + 4 files changed, 33 insertions(+), 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index edc02a68ca..0a374dec39 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -369,6 +369,12 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_4_compat[] = { ++ /* pc_rhel_8_4_compat from pc_compat_5_2 */ ++ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, ++}; ++const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); ++ + GlobalProperty pc_rhel_8_3_compat[] = { + /* pc_rhel_8_3_compat from pc_compat_5_1 */ + { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index d9c5df16d8..5d61c9b833 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -971,6 +971,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->pci_root_uid = 1; + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_3, + hw_compat_rhel_8_3_len); + compat_props_add(m->compat_props, pc_rhel_8_3_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 44109e4876..01ff3e0544 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -607,6 +607,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel850(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel850_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.5.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, ++ pc_q35_machine_rhel850_options); ++ ++ + static void pc_q35_init_rhel840(MachineState *machine) + { + pc_q35_init(machine); +@@ -615,12 +633,15 @@ static void pc_q35_init_rhel840(MachineState *machine) + static void pc_q35_machine_rhel840_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel850_options(m); + m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.4.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, +@@ -637,7 +658,6 @@ static void pc_q35_machine_rhel830_options(MachineClass *m) + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel840_options(m); + m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; +- m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.3.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_3, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 79a7803a2f..1980c93f41 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -281,6 +281,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_4_compat[]; ++extern const size_t pc_rhel_8_4_compat_len; ++ + extern GlobalProperty pc_rhel_8_3_compat[]; + extern const size_t pc_rhel_8_3_compat_len; + +-- +2.27.0 + diff --git a/qemu-guest-agent.service b/qemu-guest-agent.service index b33e951..b3157d5 100644 --- a/qemu-guest-agent.service +++ b/qemu-guest-agent.service @@ -12,7 +12,6 @@ ExecStart=/usr/bin/qemu-ga \ --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ --blacklist=${BLACKLIST_RPC} \ -F${FSFREEZE_HOOK_PATHNAME} -StandardError=syslog Restart=always RestartSec=0 diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e812783..65b0566 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -67,14 +67,13 @@ Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ -Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 6%{?rcversion}%{?dist} +Release: 7%{?rcversion}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -173,6 +172,26 @@ Patch40: kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch Patch41: kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch # For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta Patch42: kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch +# For bz#1967502 - [aarch64] [qemu] Compile the PCIe expander bridge +Patch43: kvm-aarch64-rh-devices-add-CONFIG_PXB.patch +# For bz#1974795 - [RHEL9-beta] [aarch64] Launch guest with virtio-gpu-pci and virtual smmu causes "virtio_gpu_dequeue_ctrl_func" ERROR +Patch44: kvm-virtio-gpu-handle-partial-maps-properly.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch45: kvm-x86-Add-x86-rhel8.5-machine-types.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch46: kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch47: kvm-block-backend-add-drained_poll.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch48: kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch49: kvm-disable-CONFIG_USB_STORAGE_BOT.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch50: kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch51: kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch52: kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch # Source-git patches @@ -183,7 +202,6 @@ BuildRequires: gnutls-devel BuildRequires: cyrus-sasl-devel BuildRequires: libaio-devel BuildRequires: python3-devel -BuildRequires: libiscsi-devel BuildRequires: libattr-devel BuildRequires: libusbx-devel >= %{libusbx_version} %if %{have_usbredir} @@ -281,6 +299,7 @@ Requires: libfdt >= %{libfdt_version} # other words RHEL-9 rebases are done together/before RHEL-8 ones) Obsoletes: qemu-kvm-ui-spice <= %{version} Obsoletes: qemu-kvm-block-gluster <= %{version} +Obsoletes: %{name}-block-iscsi <= %{version} %description -n qemu-kvm-core qemu-kvm is an open source virtualizer that provides hardware @@ -363,16 +382,6 @@ Install this package if you want to access remote disks over http, https, ftp and other transports provided by the CURL library. -%package block-iscsi -Summary: QEMU iSCSI block driver -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} - -%description block-iscsi -This package provides the additional iSCSI block driver for QEMU. - -Install this package if you want to access iSCSI volumes. - - %package block-rbd Summary: QEMU Ceph/RBD block driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} @@ -592,7 +601,6 @@ pushd %{qemu_kvm_build} --enable-guest-agent \ --enable-iconv \ --enable-kvm \ - --enable-libiscsi \ %if %{have_pmem} --enable-libpmem \ %endif @@ -1196,9 +1204,6 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %files block-curl %{_libdir}/qemu-kvm/block-curl.so -%files block-iscsi -%{_libdir}/qemu-kvm/block-iscsi.so - %files block-rbd %{_libdir}/qemu-kvm/block-rbd.so @@ -1213,6 +1218,30 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %changelog +* Mon Jun 28 2021 Miroslav Rezanina - 6.0.0-7 +- kvm-aarch64-rh-devices-add-CONFIG_PXB.patch [bz#1967502] +- kvm-virtio-gpu-handle-partial-maps-properly.patch [bz#1974795] +- kvm-x86-Add-x86-rhel8.5-machine-types.patch [bz#1957194] +- kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch [bz#1957194] +- kvm-block-backend-add-drained_poll.patch [bz#1957194] +- kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch [bz#1957194] +- kvm-disable-CONFIG_USB_STORAGE_BOT.patch [bz#1957194] +- kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch [bz#1957194] +- kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch [bz#1957194] +- kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch [bz#1957194] +- kvm-qga-drop-StandardError-syslog.patch [bz#1947977] +- kvm-Remove-iscsi-support.patch [bz#1967133] +- Resolves: bz#1967502 + ([aarch64] [qemu] Compile the PCIe expander bridge) +- Resolves: bz#1974795 + ([RHEL9-beta] [aarch64] Launch guest with virtio-gpu-pci and virtual smmu causes "virtio_gpu_dequeue_ctrl_func" ERROR) +- Resolves: bz#1957194 + (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) +- Resolves: bz#1947977 + (remove StandardError=syslog from qemu-guest-agent.service) +- Resolves: bz#1967133 + (QEMU: disable libiscsi in RHEL-9) + * Mon Jun 21 2021 Miroslav Rezanina - 6.0.0-6 - kvm-yank-Unregister-function-when-using-TLS-migration.patch [bz#1972462] - kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch [bz#1957194] From 0cede3b3b7196d0fcc95e638d7e5c74a39619e46 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 12 Jul 2021 03:27:57 -0400 Subject: [PATCH 127/195] * Mon Jul 12 2021 Miroslav Rezanina - 6.0.0-8 - kvm-Disable-TPM-passthrough.patch [bz#1978911] - kvm-redhat-Replace-the-kvm-setup.service-with-a-etc-modu.patch [bz#1978837] - Resolves: bz#1978911 (Remove TPM Passthrough option from RHEL 9) - Resolves: bz#1978837 (Remove/replace kvm-setup.service) --- 85-kvm.preset | 5 --- kvm-Disable-TPM-passthrough.patch | 44 ++++++++++++++++++++++++ kvm-setup | 49 -------------------------- kvm-setup.service | 14 -------- modules-load.conf | 4 +++ qemu-kvm.spec | 57 ++++++++++++++----------------- 6 files changed, 73 insertions(+), 100 deletions(-) delete mode 100644 85-kvm.preset create mode 100644 kvm-Disable-TPM-passthrough.patch delete mode 100644 kvm-setup delete mode 100644 kvm-setup.service create mode 100644 modules-load.conf diff --git a/85-kvm.preset b/85-kvm.preset deleted file mode 100644 index 8024052..0000000 --- a/85-kvm.preset +++ /dev/null @@ -1,5 +0,0 @@ -# Enable kvm-setup by default. This can have odd side effects on -# PowerNV systems that aren't intended as KVM hosts, but at present we -# only support RHEL on PowerNV for the purpose of being a RHEV host. - -enable kvm-setup.service diff --git a/kvm-Disable-TPM-passthrough.patch b/kvm-Disable-TPM-passthrough.patch new file mode 100644 index 0000000..b0cc0b7 --- /dev/null +++ b/kvm-Disable-TPM-passthrough.patch @@ -0,0 +1,44 @@ +From 651798d03827dda7eb8dc33fb3482f872ec81d16 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 5 Jul 2021 15:23:48 +0400 +Subject: [PATCH 1/2] Disable TPM passthrough +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 17: Disable TPM passthrough +RH-Commit: [1/1] 735b79065149b968350b3f14d763030d5ef66457 +RH-Bugzilla: 1978911 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Eric Auger +RH-Acked-by: Daniel P. Berrangé + +Signed-off-by: Marc-André Lureau +Signed-off-by: Miroslav Rezanina +--- + default-configs/devices/ppc64-rh-devices.mak | 1 - + default-configs/devices/x86_64-rh-devices.mak | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/default-configs/devices/ppc64-rh-devices.mak b/default-configs/devices/ppc64-rh-devices.mak +index 3ec5603ace..d6e424540e 100644 +--- a/default-configs/devices/ppc64-rh-devices.mak ++++ b/default-configs/devices/ppc64-rh-devices.mak +@@ -32,4 +32,3 @@ CONFIG_XICS=y + CONFIG_XIVE=y + CONFIG_TPM_SPAPR=y + CONFIG_TPM_EMULATOR=y +-CONFIG_TPM_PASSTHROUGH=y +diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak +index 81bda09f4c..c2dd112f81 100644 +--- a/default-configs/devices/x86_64-rh-devices.mak ++++ b/default-configs/devices/x86_64-rh-devices.mak +@@ -98,4 +98,3 @@ CONFIG_XIO3130=y + CONFIG_TPM_CRB=y + CONFIG_TPM_TIS_ISA=y + CONFIG_TPM_EMULATOR=y +-CONFIG_TPM_PASSTHROUGH=y +-- +2.27.0 + diff --git a/kvm-setup b/kvm-setup deleted file mode 100644 index 3bfedf6..0000000 --- a/kvm-setup +++ /dev/null @@ -1,49 +0,0 @@ -#! /bin/bash - -kvm_setup_powerpc () { - if grep '^platform[[:space:]]*:[[:space:]]*PowerNV' /proc/cpuinfo > /dev/null; then - # PowerNV platform, which is KVM HV capable - - if [ -z "$SUBCORES" ]; then - SUBCORES=1 - fi - - # Step 1. Load the KVM HVmodule - if ! modprobe -b kvm_hv; then - return - fi - - # On POWER8 a host core can only run threads of a single - # guest, meaning that SMT must be disabled on the host in - # order to run KVM guests. (Also applieds to POWER7, but we - # don't support that). - # - # POWER9 doesn't have this limitation (though it will for hash - # guests on radix host when that's implemented). So, only set - # up subcores and disable SMT for POWER*. - if grep '^cpu[[:space:]]*:[[:space:]]*POWER8' /proc/cpuinfo > /dev/null; then - # Step 2. Configure subcore mode - /usr/sbin/ppc64_cpu --subcores-per-core=$SUBCORES - - # Step 3. Disable SMT (multithreading) - /usr/sbin/ppc64_cpu --smt=off - fi - fi -} - -kvm_setup_s390x () { - if grep -q "^features.*sie" /proc/cpuinfo; then - modprobe kvm - fi -} - -case $(uname -m) in - ppc64|ppc64le) - kvm_setup_powerpc - ;; - s390x) - kvm_setup_s390x - ;; -esac - -exit 0 diff --git a/kvm-setup.service b/kvm-setup.service deleted file mode 100644 index 9c4bf97..0000000 --- a/kvm-setup.service +++ /dev/null @@ -1,14 +0,0 @@ -[Unit] -Description=Perform system configuration to prepare system to run KVM guests -# Offlining CPUs can cause irqbalance to throw warnings if it's running -Before=irqbalance.service -# libvirtd reads CPU topology at startup, so change it before -Before=libvirtd.service - -[Service] -Type=oneshot -EnvironmentFile=-/etc/sysconfig/kvm -ExecStart=/usr/lib/systemd/kvm-setup - -[Install] -WantedBy=multi-user.target diff --git a/modules-load.conf b/modules-load.conf new file mode 100644 index 0000000..45b477d --- /dev/null +++ b/modules-load.conf @@ -0,0 +1,4 @@ +# When using SELinux in libvirt, automatic loading of the kvm.ko kernel +# module might not work when qemu-kvm tries to access /dev/kvm - thus we +# simply always load this module during the boot process already. +kvm diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 65b0566..f43a170 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -7,7 +7,7 @@ %global have_usbredir 1 %global have_opengl 1 %global have_fdt 0 -%global have_kvm_setup 0 +%global have_modules_load 0 %global have_memlock_limits 0 # have_block_rbd is not relevant for RHEL but makes it # easier to sync spec dependency list with Fedora @@ -44,12 +44,11 @@ %ifarch %{power64} %global kvm_target ppc64 %global have_fdt 1 - %global have_kvm_setup 1 %global have_memlock_limits 1 %endif %ifarch s390x %global kvm_target s390x - %global have_kvm_setup 1 + %global have_modules_load 1 %endif %ifarch ppc %global kvm_target ppc @@ -73,7 +72,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 7%{?rcversion}%{?dist} +Release: 8%{?rcversion}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -96,9 +95,7 @@ Source10: qemu-guest-agent.service Source11: 99-qemu-guest-agent.rules Source12: bridge.conf Source13: qemu-ga.sysconfig -Source21: kvm-setup -Source22: kvm-setup.service -Source23: 85-kvm.preset +Source21: modules-load.conf Source26: vhost.conf Source27: kvm.conf Source28: 95-kvm-memlock.conf @@ -192,6 +189,8 @@ Patch50: kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch Patch51: kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch # For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta Patch52: kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch +# For bz#1978911 - Remove TPM Passthrough option from RHEL 9 +Patch53: kvm-Disable-TPM-passthrough.patch # Source-git patches @@ -281,10 +280,6 @@ Requires: edk2-aarch64 %endif Requires: libseccomp >= %{libseccomp_version} -%if %{have_kvm_setup} -Requires(post): systemd-units -Requires(preun): systemd-units -%endif Requires: libusbx >= %{libusbx_version} %if %{have_usbredir} Requires: usbredir >= %{usbredir_version} @@ -970,10 +965,8 @@ rom_link() { rom_link ../sgabios/sgabios.bin sgabios.bin %endif -%if %{have_kvm_setup} - install -D -p -m 755 %{SOURCE21} $RPM_BUILD_ROOT%{_prefix}/lib/systemd/kvm-setup - install -D -p -m 644 %{SOURCE22} $RPM_BUILD_ROOT%{_unitdir}/kvm-setup.service - install -D -p -m 644 %{SOURCE23} $RPM_BUILD_ROOT%{_presetdir}/85-kvm.preset +%if %{have_modules_load} + install -D -p -m 644 %{SOURCE21} $RPM_BUILD_ROOT%{_sysconfdir}/modules-load.d/kvm.conf %endif %if %{have_memlock_limits} @@ -1021,25 +1014,19 @@ getent passwd qemu >/dev/null || \ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ -c "qemu user" qemu -# load kvm modules now, so we can make sure no reboot is needed. -# If there's already a kvm module installed, we don't mess with it -%udev_rules_update -sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : - udevadm trigger --subsystem-match=misc --sysname-match=kvm --action=add || : -%if %{have_kvm_setup} - systemctl daemon-reload # Make sure it sees the new presets and unitfile - %systemd_post kvm-setup.service - if systemctl is-enabled kvm-setup.service > /dev/null; then - systemctl start kvm-setup.service +# If this is a new installation, then load kvm modules now, so we can make +# sure that the user gets a system where KVM is ready to use. In case of +# an upgrade, don't try to modprobe again in case the user unloaded the +# kvm module on purpose. +%if %{have_modules_load} + if [ "$1" = "1" ]; then + modprobe -b kvm &> /dev/null || : fi %endif %preun -n qemu-kvm-common %systemd_preun ksm.service %systemd_preun ksmtuned.service -%if %{have_kvm_setup} -%systemd_preun kvm-setup.service -%endif %postun -n qemu-kvm-common %systemd_postun_with_restart ksm.service @@ -1145,10 +1132,8 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %{_datadir}/%{name}/linuxboot_dma.bin %{_datadir}/%{name}/dump-guest-memory.py* %{_datadir}/%{name}/trace-events-all -%if %{have_kvm_setup} - %{_prefix}/lib/systemd/kvm-setup - %{_unitdir}/kvm-setup.service - %{_presetdir}/85-kvm.preset +%if %{have_modules_load} + %{_sysconfdir}/modules-load.d/kvm.conf %endif %if %{have_memlock_limits} %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf @@ -1218,6 +1203,14 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %changelog +* Mon Jul 12 2021 Miroslav Rezanina - 6.0.0-8 +- kvm-Disable-TPM-passthrough.patch [bz#1978911] +- kvm-redhat-Replace-the-kvm-setup.service-with-a-etc-modu.patch [bz#1978837] +- Resolves: bz#1978911 + (Remove TPM Passthrough option from RHEL 9) +- Resolves: bz#1978837 + (Remove/replace kvm-setup.service) + * Mon Jun 28 2021 Miroslav Rezanina - 6.0.0-7 - kvm-aarch64-rh-devices-add-CONFIG_PXB.patch [bz#1967502] - kvm-virtio-gpu-handle-partial-maps-properly.patch [bz#1974795] From 0e98626e1b87a90dfbba1d48deb07422139e31e7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 19 Jul 2021 03:04:28 -0400 Subject: [PATCH 128/195] * Mon Jul 19 2021 Miroslav Rezanina - 6.0.0-9 - kvm-s390x-cpumodel-add-3931-and-3932.patch [bz#1932191] - kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch [bz#1957194] - kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch [bz#1957194] - kvm-redhat-Move-qemu-kvm-docs-dependency-to-qemu-kvm.patch [bz#1957194] - kvm-redhat-introducting-qemu-kvm-hw-usbredir.patch [bz#1957194] - kvm-redhat-use-the-standard-vhost-user-JSON-path.patch [bz#1957194] - Resolves: bz#1932191 ([IBM 9.0 FEAT] CPU Model for new IBM Z Hardware - qemu part (kvm)) - Resolves: bz#1957194 (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) --- ...tialize-cgs-ready-in-kvmppc_svm_init.patch | 69 ++++++++ kvm-s390x-cpumodel-add-3931-and-3932.patch | 134 +++++++++++++++ ...pability-issue-on-KVM-guest-for-PCI-.patch | 160 ++++++++++++++++++ qemu-kvm.spec | 55 ++++-- 4 files changed, 408 insertions(+), 10 deletions(-) create mode 100644 kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch create mode 100644 kvm-s390x-cpumodel-add-3931-and-3932.patch create mode 100644 kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch diff --git a/kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch b/kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch new file mode 100644 index 0000000..dd982e3 --- /dev/null +++ b/kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch @@ -0,0 +1,69 @@ +From e496360f627cdc4202f185b63175ced08c8b1f07 Mon Sep 17 00:00:00 2001 +From: Daniel Henrique Barboza +Date: Wed, 23 Jun 2021 19:39:32 -0400 +Subject: [PATCH 3/6] ppc/pef.c: initialize cgs->ready in kvmppc_svm_init() + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 22: Synchronize with RHEL-AV 8.5 release 23 to RHEL 9 +RH-Commit: [2/5] b204f898d2333686e30b14c050ac7a9289670f23 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 + +QEMU is failing to launch a CGS pSeries guest in a host that has PEF +support: + +qemu-system-ppc64: ../softmmu/vl.c:2585: qemu_machine_creation_done: Assertion `machine->cgs->ready' failed. +Aborted + +This is happening because we're not setting the cgs->ready flag that is +asserted in qemu_machine_creation_done() during machine start. + +cgs->ready is set in s390_pv_kvm_init() and sev_kvm_init(). Let's set it +in kvmppc_svm_init() as well. + +Reported-by: Ram Pai +Signed-off-by: Daniel Henrique Barboza +Message-Id: <20210528201619.52363-1-danielhb413@gmail.com> +Acked-by: Ram Pai +Signed-off-by: David Gibson +(cherry picked from commit b873ed83311d96644b544b10f6869a430660585a) +Signed-off-by: Daniel Henrique Barboza +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/pef.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c +index 573be3ed79..cc44d5e339 100644 +--- a/hw/ppc/pef.c ++++ b/hw/ppc/pef.c +@@ -41,7 +41,7 @@ struct PefGuest { + ConfidentialGuestSupport parent_obj; + }; + +-static int kvmppc_svm_init(Error **errp) ++static int kvmppc_svm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + #ifdef CONFIG_KVM + static Error *pef_mig_blocker; +@@ -65,6 +65,8 @@ static int kvmppc_svm_init(Error **errp) + /* NB: This can fail if --only-migratable is used */ + migrate_add_blocker(pef_mig_blocker, &error_fatal); + ++ cgs->ready = true; ++ + return 0; + #else + g_assert_not_reached(); +@@ -102,7 +104,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return -1; + } + +- return kvmppc_svm_init(errp); ++ return kvmppc_svm_init(cgs, errp); + } + + int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) +-- +2.27.0 + diff --git a/kvm-s390x-cpumodel-add-3931-and-3932.patch b/kvm-s390x-cpumodel-add-3931-and-3932.patch new file mode 100644 index 0000000..70ad80b --- /dev/null +++ b/kvm-s390x-cpumodel-add-3931-and-3932.patch @@ -0,0 +1,134 @@ +From 5536da8458b7825b084bfc94256bfdc1ca0127a3 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Tue, 22 Jun 2021 22:19:23 +0200 +Subject: [PATCH 1/6] s390x/cpumodel: add 3931 and 3932 + +RH-Author: Cornelia Huck +RH-MergeRequest: 21: s390x/cpumodel: add 3931 and 3932 +RH-Commit: [1/1] b678fdf9364407c615678980330e496676e04f9e (cohuck/qemu-kvm-c9s) +RH-Bugzilla: 1932191 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Thomas Huth + +This defines 5 new facilities and the new 3931 and 3932 machines. +As before the name is not yet known and we do use gen16a and gen16b. +The new features are part of the full model. + +The default model is still empty (same as z15) and will be added +in a separate patch at a later point in time. + +Also add the dependencies of new facilities and as a fix for z15 add +a dependency from S390_FEAT_VECTOR_PACKED_DECIMAL_ENH to +S390_VECTOR_PACKED_DECIMAL. + +[merged <20210701084348.26556-1-borntraeger@de.ibm.com>] +Signed-off-by: Christian Borntraeger +Message-Id: <20210622201923.150205-2-borntraeger@de.ibm.com> +Reviewed-by: David Hildenbrand +Signed-off-by: Cornelia Huck +(cherry picked from commit fb4a08121695a88acefcbcd86f1376df079eefee) +Signed-off-by: Cornelia Huck +Signed-off-by: Miroslav Rezanina +--- + target/s390x/cpu_features_def.h.inc | 5 +++++ + target/s390x/cpu_models.c | 6 ++++++ + target/s390x/gen-features.c | 14 ++++++++++++++ + 3 files changed, 25 insertions(+) + +diff --git a/target/s390x/cpu_features_def.h.inc b/target/s390x/cpu_features_def.h.inc +index 7db3449e04..e86662bb3b 100644 +--- a/target/s390x/cpu_features_def.h.inc ++++ b/target/s390x/cpu_features_def.h.inc +@@ -109,6 +109,11 @@ DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal- + DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)") + DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility") + DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility") ++DEF_FEAT(NNPA, "nnpa", STFL, 165, "NNPA facility") ++DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH2, "vxpdeh2", STFL, 192, "Vector-Packed-Decimal-Enhancement facility 2") ++DEF_FEAT(BEAR_ENH, "beareh", STFL, 193, "BEAR-enhancement facility") ++DEF_FEAT(RDP, "rdp", STFL, 194, "Reset-DAT-protection facility") ++DEF_FEAT(PAI, "pai", STFL, 196, "Processor-Activity-Instrumentation facility") + + /* Features exposed via SCLP SCCB Byte 80 - 98 (bit numbers relative to byte-80) */ + DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility") +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index 9254ff46bf..3cb4d25a10 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -88,6 +88,8 @@ static S390CPUDef s390_cpu_defs[] = { + CPUDEF_INIT(0x3907, 14, 1, 47, 0x08000000U, "z14ZR1", "IBM z14 Model ZR1 GA1"), + CPUDEF_INIT(0x8561, 15, 1, 47, 0x08000000U, "gen15a", "IBM z15 T01 GA1"), + CPUDEF_INIT(0x8562, 15, 1, 47, 0x08000000U, "gen15b", "IBM z15 T02 GA1"), ++ CPUDEF_INIT(0x3931, 16, 1, 47, 0x08000000U, "gen16a", "IBM 3931 GA1"), ++ CPUDEF_INIT(0x3932, 16, 1, 47, 0x08000000U, "gen16b", "IBM 3932 GA1"), + }; + + #define QEMU_MAX_CPU_TYPE 0x2964 +@@ -815,6 +817,8 @@ static void check_consistency(const S390CPUModel *model) + { S390_FEAT_MSA_EXT_9, S390_FEAT_MSA_EXT_4 }, + { S390_FEAT_MULTIPLE_EPOCH, S390_FEAT_TOD_CLOCK_STEERING }, + { S390_FEAT_VECTOR_PACKED_DECIMAL, S390_FEAT_VECTOR }, ++ { S390_FEAT_VECTOR_PACKED_DECIMAL_ENH, S390_FEAT_VECTOR_PACKED_DECIMAL }, ++ { S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH }, + { S390_FEAT_VECTOR_ENH, S390_FEAT_VECTOR }, + { S390_FEAT_INSTRUCTION_EXEC_PROT, S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2 }, + { S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2, S390_FEAT_ESOP }, +@@ -846,6 +850,8 @@ static void check_consistency(const S390CPUModel *model) + { S390_FEAT_PTFF_STOUE, S390_FEAT_MULTIPLE_EPOCH }, + { S390_FEAT_AP_QUEUE_INTERRUPT_CONTROL, S390_FEAT_AP }, + { S390_FEAT_DIAG_318, S390_FEAT_EXTENDED_LENGTH_SCCB }, ++ { S390_FEAT_NNPA, S390_FEAT_VECTOR }, ++ { S390_FEAT_RDP, S390_FEAT_LOCAL_TLB_CLEARING }, + }; + int i; + +diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c +index a6ec918e90..8f99cea665 100644 +--- a/target/s390x/gen-features.c ++++ b/target/s390x/gen-features.c +@@ -424,6 +424,8 @@ static uint16_t base_GEN15_GA1[] = { + S390_FEAT_MISC_INSTRUCTION_EXT3, + }; + ++#define base_GEN16_GA1 EmptyFeat ++ + /* Full features (in order of release) + * Automatically includes corresponding base features. + * Full features are all features this hardware supports even if kvm/QEMU do not +@@ -567,6 +569,15 @@ static uint16_t full_GEN15_GA1[] = { + S390_FEAT_UNPACK, + }; + ++static uint16_t full_GEN16_GA1[] = { ++ S390_FEAT_NNPA, ++ S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2, ++ S390_FEAT_BEAR_ENH, ++ S390_FEAT_RDP, ++ S390_FEAT_PAI, ++}; ++ ++ + /* Default features (in order of release) + * Automatically includes corresponding base features. + * Default features are all features this version of QEMU supports for this +@@ -652,6 +663,8 @@ static uint16_t default_GEN15_GA1[] = { + S390_FEAT_ETOKEN, + }; + ++#define default_GEN16_GA1 EmptyFeat ++ + /* QEMU (CPU model) features */ + + static uint16_t qemu_V2_11[] = { +@@ -782,6 +795,7 @@ static CpuFeatDefSpec CpuFeatDef[] = { + CPU_FEAT_INITIALIZER(GEN14_GA1), + CPU_FEAT_INITIALIZER(GEN14_GA2), + CPU_FEAT_INITIALIZER(GEN15_GA1), ++ CPU_FEAT_INITIALIZER(GEN16_GA1), + }; + + #define FEAT_GROUP_INITIALIZER(_name) \ +-- +2.27.0 + diff --git a/kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch b/kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch new file mode 100644 index 0000000..e556d49 --- /dev/null +++ b/kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch @@ -0,0 +1,160 @@ +From 389b2a01f9b75575996eaab195a9697840ae5f29 Mon Sep 17 00:00:00 2001 +From: Mahesh Salgaonkar +Date: Wed, 30 Jun 2021 13:27:47 -0400 +Subject: [PATCH 2/6] spapr: Fix EEH capability issue on KVM guest for PCI + passthru + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 22: Synchronize with RHEL-AV 8.5 release 23 to RHEL 9 +RH-Commit: [1/5] 86642761bad229c080e180ea9ebd0a4f67d2a4f7 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 + +With upstream kernel, especially after commit 98ba956f6a389 +("powerpc/pseries/eeh: Rework device EEH PE determination") we see that KVM +guest isn't able to enable EEH option for PCI pass-through devices anymore. + +[root@atest-guest ~]# dmesg | grep EEH +[ 0.032337] EEH: pSeries platform initialized +[ 0.298207] EEH: No capable adapters found: recovery disabled. +[root@atest-guest ~]# + +So far the linux kernel was assuming pe_config_addr equal to device's +config_addr and using it to enable EEH on the PE through ibm,set-eeh-option +RTAS call. Which wasn't the correct way as per PAPR. The linux kernel +commit 98ba956f6a389 fixed this flow. With that fixed, linux now uses PE +config address returned by ibm,get-config-addr-info2 RTAS call to enable +EEH option per-PE basis instead of per-device basis. However this has +uncovered a bug in qemu where ibm,set-eeh-option is treating PE config +address as per-device config address. + +Hence in qemu guest with recent kernel the ibm,set-eeh-option RTAS call +fails with -3 return value indicating that there is no PCI device exist for +the specified PE config address. The rtas_ibm_set_eeh_option call uses +pci_find_device() to get the PC device that matches specific bus and devfn +extracted from PE config address passed as argument. Thus it tries to map +the PE config address to a single specific PCI device 'bus->devices[devfn]' +which always results into checking device on slot 0 'bus->devices[0]'. +This succeeds when there is a pass-through device (vfio-pci) present on +slot 0. But in cases where there is no pass-through device present in slot +0, but present in non-zero slots, ibm,set-eeh-option call fails to enable +the EEH capability. + +hw/ppc/spapr_pci_vfio.c: spapr_phb_vfio_eeh_set_option() + case RTAS_EEH_ENABLE: { + PCIHostState *phb; + PCIDevice *pdev; + + /* + * The EEH functionality is enabled on basis of PCI device, + * instead of PE. We need check the validity of the PCI + * device address. + */ + phb = PCI_HOST_BRIDGE(sphb); + pdev = pci_find_device(phb->bus, + (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); + if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + return RTAS_OUT_PARAM_ERROR; + } + +hw/pci/pci.c:pci_find_device() + +PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn) +{ + bus = pci_find_bus_nr(bus, bus_num); + + if (!bus) + return NULL; + + return bus->devices[devfn]; +} + +This patch fixes ibm,set-eeh-option to check for presence of any PCI device +(vfio-pci) under specified bus and enable the EEH if found. The current +code already makes sure that all the devices on that bus are from same +iommu group (within same PE) and fail very early if it does not. + +After this fix guest is able to find EEH capable devices and enable EEH +recovery on it. + +[root@atest-guest ~]# dmesg | grep EEH +[ 0.048139] EEH: pSeries platform initialized +[ 0.405115] EEH: Capable adapter found: recovery enabled. +[root@atest-guest ~]# + +Reviewed-by: Daniel Henrique Barboza +Signed-off-by: Mahesh Salgaonkar +Message-Id: <162158429107.145117.5843504911924013125.stgit@jupiter> +Signed-off-by: David Gibson +(cherry picked from commit ac9ef668321ebb6eb871a0c4dd380fa7d7891b4e) +Signed-off-by: Daniel Henrique Barboza +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/spapr_pci_vfio.c | 40 +++++++++++++++++++++++++++++++++------- + 1 file changed, 33 insertions(+), 7 deletions(-) + +diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c +index e0547b1740..6587c8cb5b 100644 +--- a/hw/ppc/spapr_pci_vfio.c ++++ b/hw/ppc/spapr_pci_vfio.c +@@ -47,6 +47,16 @@ void spapr_phb_vfio_reset(DeviceState *qdev) + spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); + } + ++static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev, ++ void *opaque) ++{ ++ bool *found = opaque; ++ ++ if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { ++ *found = true; ++ } ++} ++ + int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, + unsigned int addr, int option) + { +@@ -59,17 +69,33 @@ int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, + break; + case RTAS_EEH_ENABLE: { + PCIHostState *phb; +- PCIDevice *pdev; ++ bool found = false; + + /* +- * The EEH functionality is enabled on basis of PCI device, +- * instead of PE. We need check the validity of the PCI +- * device address. ++ * The EEH functionality is enabled per sphb level instead of ++ * per PCI device. We have already identified this specific sphb ++ * based on buid passed as argument to ibm,set-eeh-option rtas ++ * call. Now we just need to check the validity of the PCI ++ * pass-through devices (vfio-pci) under this sphb bus. ++ * We have already validated that all the devices under this sphb ++ * are from same iommu group (within same PE) before comming here. ++ * ++ * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh: ++ * Rework device EEH PE determination") kernel would call ++ * eeh-set-option for each device in the PE using the device's ++ * config_address as the argument rather than the PE address. ++ * Hence if we check validity of supplied config_addr whether ++ * it matches to this PHB will cause issues with older kernel ++ * versions v5.9 and older. If we return an error from ++ * eeh-set-option when the argument isn't a valid PE address ++ * then older kernels (v5.9 and older) will interpret that as ++ * EEH not being supported. + */ + phb = PCI_HOST_BRIDGE(sphb); +- pdev = pci_find_device(phb->bus, +- (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); +- if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { ++ pci_for_each_device(phb->bus, (addr >> 16) & 0xFF, ++ spapr_eeh_pci_find_device, &found); ++ ++ if (!found) { + return RTAS_OUT_PARAM_ERROR; + } + +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index f43a170..9bc16fe 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -65,6 +65,9 @@ %if %{have_opengl} \ Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ %endif \ +%if %{have_usbredir} \ +Requires: %{name}-hw-usbredir = %{epoch}:%{version}-%{release} \ +%endif \ Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} @@ -72,7 +75,7 @@ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 8%{?rcversion}%{?dist} +Release: 9%{?rcversion}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -191,6 +194,12 @@ Patch51: kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch Patch52: kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch # For bz#1978911 - Remove TPM Passthrough option from RHEL 9 Patch53: kvm-Disable-TPM-passthrough.patch +# For bz#1932191 - [IBM 9.0 FEAT] CPU Model for new IBM Z Hardware - qemu part (kvm) +Patch54: kvm-s390x-cpumodel-add-3931-and-3932.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch55: kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch56: kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch # Source-git patches @@ -255,7 +264,7 @@ BuildRequires: perl-Test-Harness BuildRequires: libslirp-devel Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} - +Requires: %{name}-docs = %{epoch}:%{version}-%{release} %{requires_all_modules} %define qemudocdir %{_docdir}/%{name} @@ -271,7 +280,6 @@ hardware for a full system such as a PC and its associated peripherals. Summary: qemu-kvm core components Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: qemu-img = %{epoch}:%{version}-%{release} -Recommends: qemu-kvm-docs %ifarch %{ix86} x86_64 Requires: edk2-ovmf %endif @@ -281,9 +289,6 @@ Requires: edk2-aarch64 Requires: libseccomp >= %{libseccomp_version} Requires: libusbx >= %{libusbx_version} -%if %{have_usbredir} -Requires: usbredir >= %{usbredir_version} -%endif %if %{have_fdt} Requires: libfdt >= %{libfdt_version} %endif @@ -411,6 +416,15 @@ Requires: mesa-dri-drivers This package provides opengl support. %endif +%if %{have_usbredir} +%package hw-usbredir +Summary: QEMU usbredir support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: usbredir >= 0.7.1 + +%description hw-usbredir +This package provides usbredir support. +%endif %prep %if 0%{?rcversion} @@ -771,6 +785,10 @@ make DESTDIR=$RPM_BUILD_ROOT \ install mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset + +# Move vhost-user JSON files to the standard "qemu" directory +mkdir -p $RPM_BUILD_ROOT%{_datadir}/qemu +mv $RPM_BUILD_ROOT%{_datadir}/%{name}/vhost-user $RPM_BUILD_ROOT%{_datadir}/qemu/ %endif # Install qemu-guest-agent service and udev rules @@ -1139,7 +1157,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf %endif %{_libexecdir}/virtiofsd -%{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json +# This is the standard location for vhost-user JSON files defined in the +# vhost-user specification for interoperability with other software. Unlike +# most other paths we use it's "qemu" instead of "qemu-kvm". +%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json %files -n qemu-kvm-core %{_libexecdir}/qemu-kvm @@ -1149,9 +1170,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp %{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf -%if %{have_usbredir} - %{_libdir}/qemu-kvm/hw-usb-redirect.so -%endif %{_libdir}/qemu-kvm/hw-display-virtio-gpu.so %ifarch s390x %{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so @@ -1200,9 +1218,26 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_libdir}/qemu-kvm/ui-egl-headless.so %{_libdir}/qemu-kvm/ui-opengl.so %endif + +%if %{have_usbredir} +%files hw-usbredir + %{_libdir}/qemu-kvm/hw-usb-redirect.so +%endif %endif %changelog +* Mon Jul 19 2021 Miroslav Rezanina - 6.0.0-9 +- kvm-s390x-cpumodel-add-3931-and-3932.patch [bz#1932191] +- kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch [bz#1957194] +- kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch [bz#1957194] +- kvm-redhat-Move-qemu-kvm-docs-dependency-to-qemu-kvm.patch [bz#1957194] +- kvm-redhat-introducting-qemu-kvm-hw-usbredir.patch [bz#1957194] +- kvm-redhat-use-the-standard-vhost-user-JSON-path.patch [bz#1957194] +- Resolves: bz#1932191 + ([IBM 9.0 FEAT] CPU Model for new IBM Z Hardware - qemu part (kvm)) +- Resolves: bz#1957194 + (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) + * Mon Jul 12 2021 Miroslav Rezanina - 6.0.0-8 - kvm-Disable-TPM-passthrough.patch [bz#1978911] - kvm-redhat-Replace-the-kvm-setup.service-with-a-etc-modu.patch [bz#1978837] From 88586bd4807a8e3e588d36a520f1191f507fed1c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Sun, 25 Jul 2021 22:54:48 -0400 Subject: [PATCH 129/195] * Sun Jul 25 2021 Miroslav Rezanina - 6.0.0-10 - kvm-s390x-css-Introduce-an-ESW-struct.patch [bz#1957194] - kvm-s390x-css-Split-out-the-IRB-sense-data.patch [bz#1957194] - kvm-s390x-css-Refactor-IRB-construction.patch [bz#1957194] - kvm-s390x-css-Add-passthrough-IRB.patch [bz#1957194] - kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch [bz#1957194] - kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch [bz#1957194] - kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch [bz#1957194] - kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch [bz#1957194] - kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch [bz#1957194] - kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch [bz#1957194] - kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch [bz#1957194] - kvm-vhost-user-Fix-backends-without-multiqueue-support.patch [bz#1957194] - kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch [bz#1957194] - kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch [bz#1957194] - kvm-osdep-provide-ROUND_DOWN-macro.patch [bz#1957194] - kvm-block-backend-align-max_transfer-to-request-alignmen.patch [bz#1957194] - kvm-block-add-max_hw_transfer-to-BlockLimits.patch [bz#1957194] - kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch [bz#1957194] - kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch [bz#1957782] - kvm-spec-Restrict-block-drivers-in-tools.patch [bz#1957782] - kvm-Move-tools-to-separate-package.patch [bz#1972285] - kvm-Split-qemu-pr-helper-to-separate-package.patch [bz#1972300] - kvm-spec-RPM_BUILD_ROOT-buildroot.patch [bz#1973029] - kvm-spec-More-use-of-name-instead-of-qemu-kvm.patch [bz#1973029] - kvm-spec-Use-qemu-pr-helper.service-from-qemu.git.patch [bz#1973029] - kvm-spec-Use-_sourcedir-for-referencing-sources.patch [bz#1973029] - kvm-spec-Add-tools_only.patch [bz#1973029] - kvm-spec-build-Add-run_configure-helper.patch [bz#1973029] - kvm-spec-build-Disable-more-bits-with-disable_everything.patch [bz#1973029] - kvm-spec-build-Add-macros-for-some-configure-parameters.patch [bz#1973029] - kvm-spec-files-Move-qemu-guest-agent-and-qemu-img-earlie.patch [bz#1973029] - kvm-spec-install-Remove-redundant-bits.patch [bz#1973029] - kvm-spec-install-Add-modprobe_kvm_conf-macro.patch [bz#1973029] - kvm-spec-install-Remove-qemu-guest-agent-etc-qemu-kvm-us.patch [bz#1973029] - kvm-spec-install-clean-up-qemu-ga-section.patch [bz#1973029] - kvm-spec-install-Use-a-single-tools_only-section.patch [bz#1973029] - kvm-spec-Make-tools_only-not-cross-spec-sections.patch [bz#1973029] - kvm-spec-install-Limit-time-spent-in-qemu_kvm_build.patch [bz#1973029] - kvm-spec-misc-syntactic-merges-with-Fedora.patch [bz#1973029] - kvm-spec-Use-Fedora-s-pattern-for-specifying-rc-version.patch [bz#1973029] - kvm-spec-files-don-t-use-fine-grained-docs-file-list.patch [bz#1973029] - kvm-spec-files-Add-licenses-to-qemu-common-too.patch [bz#1973029] - kvm-spec-install-Drop-python3-shebang-fixup.patch [bz#1973029] - Resolves: bz#1957194 (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) - Resolves: bz#1957782 (VMDK support should be read-only) - Resolves: bz#1972285 (Split out a qemu-kvm-tools subpackage) - Resolves: bz#1972300 (Split out a qemu-pr-helper subpackage) - Resolves: bz#1973029 (Spec file cleanups) --- ...-to-use-driver-whitelist-even-in-too.patch | 121 +++ ...k-add-max_hw_transfer-to-BlockLimits.patch | 131 +++ ...ign-max_transfer-to-request-alignmen.patch | 47 + ...posix-fix-max_iov-for-dev-sg-devices.patch | 50 + ...LKSECTGET-on-block-devices-too-do-no.patch | 140 +++ kvm-osdep-provide-ROUND_DOWN-macro.patch | 75 ++ kvm-s390x-css-Add-passthrough-IRB.patch | 127 +++ kvm-s390x-css-Introduce-an-ESW-struct.patch | 111 +++ kvm-s390x-css-Refactor-IRB-construction.patch | 144 +++ ...90x-css-Split-out-the-IRB-sense-data.patch | 63 ++ ...s-max_segments-via-max_iov-field-in-.patch | 64 ++ ...-backends-without-multiqueue-support.patch | 46 + ...heck-that-num-queues-is-supported-by.patch | 83 ++ ...on-t-reconnect-during-initialisation.patch | 179 ++++ ...ail-gracefully-on-too-large-queue-si.patch | 55 ++ ...et-more-feature-flags-from-vhost-dev.patch | 44 + ...k-Improve-error-reporting-in-realize.patch | 120 +++ ...ake-sure-to-set-Error-on-realize-fai.patch | 53 ++ ...ommu_platform-is-requested-but-unsup.patch | 53 ++ qemu-kvm.spec | 876 ++++++++++-------- qemu-pr-helper.service | 15 - qemu-pr-helper.socket | 9 - 22 files changed, 2197 insertions(+), 409 deletions(-) create mode 100644 kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch create mode 100644 kvm-block-add-max_hw_transfer-to-BlockLimits.patch create mode 100644 kvm-block-backend-align-max_transfer-to-request-alignmen.patch create mode 100644 kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch create mode 100644 kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch create mode 100644 kvm-osdep-provide-ROUND_DOWN-macro.patch create mode 100644 kvm-s390x-css-Add-passthrough-IRB.patch create mode 100644 kvm-s390x-css-Introduce-an-ESW-struct.patch create mode 100644 kvm-s390x-css-Refactor-IRB-construction.patch create mode 100644 kvm-s390x-css-Split-out-the-IRB-sense-data.patch create mode 100644 kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch create mode 100644 kvm-vhost-user-Fix-backends-without-multiqueue-support.patch create mode 100644 kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch create mode 100644 kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch create mode 100644 kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch create mode 100644 kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch create mode 100644 kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch create mode 100644 kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch create mode 100644 kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch delete mode 100644 qemu-pr-helper.service delete mode 100644 qemu-pr-helper.socket diff --git a/kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch b/kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch new file mode 100644 index 0000000..4934ffe --- /dev/null +++ b/kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch @@ -0,0 +1,121 @@ +From 0739f735f99a6f1760a422023c262c1aa542a2e5 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 9 Jul 2021 18:41:41 +0200 +Subject: [PATCH 19/43] block: Add option to use driver whitelist even in tools +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 26: block: Disable unsupported/read-only block drivers even in tools +RH-Commit: [1/2] 6755d5ff4ef43f275ae530de2b2a568ffd2d3497 (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1957782 +RH-Acked-by: Max Reitz +RH-Acked-by: Richard W.M. Jones +RH-Acked-by: Philippe Mathieu-Daudé + +Currently, the block driver whitelists are only applied for the system +emulator. All other binaries still give unrestricted access to all block +drivers. There are use cases where this made sense because the main +concern was avoiding customers running VMs on less optimised block +drivers and getting bad performance. Allowing the same image format e.g. +as a target for 'qemu-img convert' is not a problem then. + +However, if the concern is the supportability of the driver in general, +either in full or when used read-write, not applying the list driver +whitelist in tools doesn't help - especially since qemu-nbd and +qemu-storage-daemon now give access to more or less the same operations +in block drivers as running a system emulator. + +In order to address this, introduce a new configure option that enforces +the driver whitelist in all binaries. + +Signed-off-by: Kevin Wolf +Message-Id: <20210709164141.254097-1-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit e5f05f8c375157211c7da625a0d3f3ccdb4957d5) +Signed-off-by: Kevin Wolf +--- + block.c | 3 +++ + configure | 14 ++++++++++++-- + meson.build | 1 + + 3 files changed, 16 insertions(+), 2 deletions(-) + +diff --git a/block.c b/block.c +index c5b887cec1..76ecede5af 100644 +--- a/block.c ++++ b/block.c +@@ -5817,6 +5817,9 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + + void bdrv_init(void) + { ++#ifdef CONFIG_BDRV_WHITELIST_TOOLS ++ use_bdrv_whitelist = 1; ++#endif + module_call_init(MODULE_INIT_BLOCK); + } + +diff --git a/configure b/configure +index 53b2fa583a..7edc08afb3 100755 +--- a/configure ++++ b/configure +@@ -243,6 +243,7 @@ cross_prefix="" + audio_drv_list="" + block_drv_rw_whitelist="" + block_drv_ro_whitelist="" ++block_drv_whitelist_tools="no" + host_cc="cc" + audio_win_int="" + libs_qga="" +@@ -1029,6 +1030,10 @@ for opt do + ;; + --block-drv-ro-whitelist=*) block_drv_ro_whitelist=$(echo "$optarg" | sed -e 's/,/ /g') + ;; ++ --enable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="yes" ++ ;; ++ --disable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="no" ++ ;; + --enable-debug-tcg) debug_tcg="yes" + ;; + --disable-debug-tcg) debug_tcg="no" +@@ -1764,10 +1769,12 @@ Advanced options (experts only): + --block-drv-whitelist=L Same as --block-drv-rw-whitelist=L + --block-drv-rw-whitelist=L + set block driver read-write whitelist +- (affects only QEMU, not qemu-img) ++ (by default affects only QEMU, not tools like qemu-img) + --block-drv-ro-whitelist=L + set block driver read-only whitelist +- (affects only QEMU, not qemu-img) ++ (by default affects only QEMU, not tools like qemu-img) ++ --enable-block-drv-whitelist-in-tools ++ use block whitelist also in tools instead of only QEMU + --enable-trace-backends=B Set trace backend + Available backends: $trace_backend_list + --with-trace-file=NAME Full PATH,NAME of file to store traces +@@ -5571,6 +5578,9 @@ if test "$audio_win_int" = "yes" ; then + fi + echo "CONFIG_BDRV_RW_WHITELIST=$block_drv_rw_whitelist" >> $config_host_mak + echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak ++if test "$block_drv_whitelist_tools" = "yes" ; then ++ echo "CONFIG_BDRV_WHITELIST_TOOLS=y" >> $config_host_mak ++fi + if test "$xfs" = "yes" ; then + echo "CONFIG_XFS=y" >> $config_host_mak + fi +diff --git a/meson.build b/meson.build +index 06c15bd6d2..49b8164ade 100644 +--- a/meson.build ++++ b/meson.build +@@ -2606,6 +2606,7 @@ summary_info += {'coroutine pool': config_host['CONFIG_COROUTINE_POOL'] == '1 + if have_block + summary_info += {'Block whitelist (rw)': config_host['CONFIG_BDRV_RW_WHITELIST']} + summary_info += {'Block whitelist (ro)': config_host['CONFIG_BDRV_RO_WHITELIST']} ++ summary_info += {'Use block whitelist in tools': config_host.has_key('CONFIG_BDRV_WHITELIST_TOOLS')} + summary_info += {'VirtFS support': have_virtfs} + summary_info += {'build virtiofs daemon': have_virtiofsd} + summary_info += {'Live block migration': config_host.has_key('CONFIG_LIVE_BLOCK_MIGRATION')} +-- +2.27.0 + diff --git a/kvm-block-add-max_hw_transfer-to-BlockLimits.patch b/kvm-block-add-max_hw_transfer-to-BlockLimits.patch new file mode 100644 index 0000000..75f1b5d --- /dev/null +++ b/kvm-block-add-max_hw_transfer-to-BlockLimits.patch @@ -0,0 +1,131 @@ +From 6773549977d94c504ec76aed67506ae85adff973 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 16 Jul 2021 16:51:33 -0400 +Subject: [PATCH 17/43] block: add max_hw_transfer to BlockLimits + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +For block host devices, I/O can happen through either the kernel file +descriptor I/O system calls (preadv/pwritev, io_submit, io_uring) +or the SCSI passthrough ioctl SG_IO. + +In the latter case, the size of each transfer can be limited by the +HBA, while for file descriptor I/O the kernel is able to split and +merge I/O in smaller pieces as needed. Applying the HBA limits to +file descriptor I/O results in more system calls and suboptimal +performance, so this patch splits the max_transfer limit in two: +max_transfer remains valid and is used in general, while max_hw_transfer +is limited to the maximum hardware size. max_hw_transfer can then be +included by the scsi-generic driver in the block limits page, to ensure +that the stricter hardware limit is used. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 24b36e9813ec15da7db62e3b3621730710c5f020) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + block/block-backend.c | 13 +++++++++++++ + block/file-posix.c | 2 +- + block/io.c | 2 ++ + hw/scsi/scsi-generic.c | 2 +- + include/block/block_int.h | 7 +++++++ + include/sysemu/block-backend.h | 1 + + 6 files changed, 25 insertions(+), 2 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 136cc602c5..b5f5b4b048 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1939,6 +1939,19 @@ uint32_t blk_get_request_alignment(BlockBackend *blk) + return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE; + } + ++/* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */ ++uint64_t blk_get_max_hw_transfer(BlockBackend *blk) ++{ ++ BlockDriverState *bs = blk_bs(blk); ++ uint64_t max = INT_MAX; ++ ++ if (bs) { ++ max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer); ++ max = MIN_NON_ZERO(max, bs->bl.max_transfer); ++ } ++ return ROUND_DOWN(max, blk_get_request_alignment(blk)); ++} ++ + /* Returns the maximum transfer length, in bytes; guaranteed nonzero */ + uint32_t blk_get_max_transfer(BlockBackend *blk) + { +diff --git a/block/file-posix.c b/block/file-posix.c +index 1a6c799e19..44325a635d 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1258,7 +1258,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) + int ret = sg_get_max_transfer_length(s->fd); + + if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { +- bs->bl.max_transfer = pow2floor(ret); ++ bs->bl.max_hw_transfer = pow2floor(ret); + } + + ret = sg_get_max_segments(s->fd); +diff --git a/block/io.c b/block/io.c +index ca2dca3007..a4b2e3adf1 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -126,6 +126,8 @@ static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) + { + dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); + dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer); ++ dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer, ++ src->max_hw_transfer); + dst->opt_mem_alignment = MAX(dst->opt_mem_alignment, + src->opt_mem_alignment); + dst->min_mem_alignment = MAX(dst->min_mem_alignment, +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 82e1e2ee79..3762dce749 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -179,7 +179,7 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) + (r->req.cmd.buf[1] & 0x01)) { + page = r->req.cmd.buf[2]; + if (page == 0xb0) { +- uint32_t max_transfer = blk_get_max_transfer(s->conf.blk); ++ uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); + uint32_t max_iov = blk_get_max_iov(s->conf.blk); + + assert(max_transfer); +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 88e4111939..09d8630ec4 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -695,6 +695,13 @@ typedef struct BlockLimits { + * clamped down. */ + uint32_t max_transfer; + ++ /* Maximal hardware transfer length in bytes. Applies whenever ++ * transfers to the device bypass the kernel I/O scheduler, for ++ * example with SG_IO. If larger than max_transfer or if zero, ++ * blk_get_max_hw_transfer will fall back to max_transfer. ++ */ ++ uint64_t max_hw_transfer; ++ + /* memory alignment, in bytes so that no bounce buffer is needed */ + size_t min_mem_alignment; + +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index 5423e3d9c6..9ac5f7bbd3 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -208,6 +208,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag); + int blk_get_flags(BlockBackend *blk); + uint32_t blk_get_request_alignment(BlockBackend *blk); + uint32_t blk_get_max_transfer(BlockBackend *blk); ++uint64_t blk_get_max_hw_transfer(BlockBackend *blk); + int blk_get_max_iov(BlockBackend *blk); + void blk_set_guest_block_size(BlockBackend *blk, int align); + void *blk_try_blockalign(BlockBackend *blk, size_t size); +-- +2.27.0 + diff --git a/kvm-block-backend-align-max_transfer-to-request-alignmen.patch b/kvm-block-backend-align-max_transfer-to-request-alignmen.patch new file mode 100644 index 0000000..c788c86 --- /dev/null +++ b/kvm-block-backend-align-max_transfer-to-request-alignmen.patch @@ -0,0 +1,47 @@ +From 643c979c2bfa0fc3c45ec8ec5f05a77e0b075356 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 16 Jul 2021 16:51:32 -0400 +Subject: [PATCH 16/43] block-backend: align max_transfer to request alignment + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +Block device requests must be aligned to bs->bl.request_alignment. +It makes sense for drivers to align bs->bl.max_transfer the same +way; however when there is no specified limit, blk_get_max_transfer +just returns INT_MAX. Since the contract of the function does not +specify that INT_MAX means "no maximum", just align the outcome +of the function (whether INT_MAX or bs->bl.max_transfer) before +returning it. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit b99f7fa08a3df8b8a6a907642e5851cdcf43fa9f) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + block/block-backend.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 05d8e5fb5d..136cc602c5 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1943,12 +1943,12 @@ uint32_t blk_get_request_alignment(BlockBackend *blk) + uint32_t blk_get_max_transfer(BlockBackend *blk) + { + BlockDriverState *bs = blk_bs(blk); +- uint32_t max = 0; ++ uint32_t max = INT_MAX; + + if (bs) { +- max = bs->bl.max_transfer; ++ max = MIN_NON_ZERO(max, bs->bl.max_transfer); + } +- return MIN_NON_ZERO(max, INT_MAX); ++ return ROUND_DOWN(max, blk_get_request_alignment(blk)); + } + + int blk_get_max_iov(BlockBackend *blk) +-- +2.27.0 + diff --git a/kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch b/kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch new file mode 100644 index 0000000..3027bec --- /dev/null +++ b/kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch @@ -0,0 +1,50 @@ +From 0111d01afe82c46656a40269bf21eb7702c02a09 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 16 Jul 2021 16:51:29 -0400 +Subject: [PATCH 13/43] file-posix: fix max_iov for /dev/sg devices + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +Even though it was only called for devices that have bs->sg set (which +must be character devices), sg_get_max_segments looked at /sys/dev/block +which only works for block devices. + +On Linux the sg driver has its own way to provide the maximum number of +iovecs in a scatter/gather list, so add support for it. The block device +path is kept because it will be reinstated in the next patches. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Max Reitz +(cherry picked from commit 8ad5ab6148dca8aad297c134c09c84b0b92d45ed) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + block/file-posix.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 20e14f8e96..74d4903dc1 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1204,6 +1204,17 @@ static int sg_get_max_segments(int fd) + goto out; + } + ++ if (S_ISCHR(st.st_mode)) { ++ if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) { ++ return ret; ++ } ++ return -ENOTSUP; ++ } ++ ++ if (!S_ISBLK(st.st_mode)) { ++ return -ENOTSUP; ++ } ++ + sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", + major(st.st_rdev), minor(st.st_rdev)); + sysfd = open(sysfspath, O_RDONLY); +-- +2.27.0 + diff --git a/kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch b/kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch new file mode 100644 index 0000000..c80576b --- /dev/null +++ b/kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch @@ -0,0 +1,140 @@ +From 9c8493d3a6d2e4d879d1ef67ff1abebd532c87a0 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 16 Jul 2021 16:51:34 -0400 +Subject: [PATCH 18/43] file-posix: try BLKSECTGET on block devices too, do not + round to power of 2 + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +bs->sg is only true for character devices, but block devices can also +be used with scsi-block and scsi-generic. Unfortunately BLKSECTGET +returns bytes in an int for /dev/sgN devices, and sectors in a short +for block devices, so account for that in the code. + +The maximum transfer also need not be a power of 2 (for example I have +seen disks with 1280 KiB maximum transfer) so there's no need to pass +the result through pow2floor. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 18473467d55a20d643b6c9b3a52de42f705b4d35) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + block/file-posix.c | 57 +++++++++++++++++++++++++++------------------- + 1 file changed, 33 insertions(+), 24 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 44325a635d..7b4ebf65d5 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1173,22 +1173,27 @@ static void raw_reopen_abort(BDRVReopenState *state) + s->reopen_state = NULL; + } + +-static int sg_get_max_transfer_length(int fd) ++static int hdev_get_max_hw_transfer(int fd, struct stat *st) + { + #ifdef BLKSECTGET +- int max_bytes = 0; +- +- if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) { +- return max_bytes; ++ if (S_ISBLK(st->st_mode)) { ++ unsigned short max_sectors = 0; ++ if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) { ++ return max_sectors * 512; ++ } + } else { +- return -errno; ++ int max_bytes = 0; ++ if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) { ++ return max_bytes; ++ } + } ++ return -errno; + #else + return -ENOSYS; + #endif + } + +-static int sg_get_max_segments(int fd) ++static int hdev_get_max_segments(int fd, struct stat *st) + { + #ifdef CONFIG_LINUX + char buf[32]; +@@ -1197,26 +1202,20 @@ static int sg_get_max_segments(int fd) + int ret; + int sysfd = -1; + long max_segments; +- struct stat st; +- +- if (fstat(fd, &st)) { +- ret = -errno; +- goto out; +- } + +- if (S_ISCHR(st.st_mode)) { ++ if (S_ISCHR(st->st_mode)) { + if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) { + return ret; + } + return -ENOTSUP; + } + +- if (!S_ISBLK(st.st_mode)) { ++ if (!S_ISBLK(st->st_mode)) { + return -ENOTSUP; + } + + sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", +- major(st.st_rdev), minor(st.st_rdev)); ++ major(st->st_rdev), minor(st->st_rdev)); + sysfd = open(sysfspath, O_RDONLY); + if (sysfd == -1) { + ret = -errno; +@@ -1253,23 +1252,33 @@ out: + static void raw_refresh_limits(BlockDriverState *bs, Error **errp) + { + BDRVRawState *s = bs->opaque; ++ struct stat st; + +- if (bs->sg) { +- int ret = sg_get_max_transfer_length(s->fd); ++ raw_probe_alignment(bs, s->fd, errp); ++ bs->bl.min_mem_alignment = s->buf_align; ++ bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); ++ ++ /* ++ * Maximum transfers are best effort, so it is okay to ignore any ++ * errors. That said, based on the man page errors in fstat would be ++ * very much unexpected; the only possible case seems to be ENOMEM. ++ */ ++ if (fstat(s->fd, &st)) { ++ return; ++ } ++ ++ if (bs->sg || S_ISBLK(st.st_mode)) { ++ int ret = hdev_get_max_hw_transfer(s->fd, &st); + + if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { +- bs->bl.max_hw_transfer = pow2floor(ret); ++ bs->bl.max_hw_transfer = ret; + } + +- ret = sg_get_max_segments(s->fd); ++ ret = hdev_get_max_segments(s->fd, &st); + if (ret > 0) { + bs->bl.max_iov = ret; + } + } +- +- raw_probe_alignment(bs, s->fd, errp); +- bs->bl.min_mem_alignment = s->buf_align; +- bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); + } + + static int check_for_dasd(int fd) +-- +2.27.0 + diff --git a/kvm-osdep-provide-ROUND_DOWN-macro.patch b/kvm-osdep-provide-ROUND_DOWN-macro.patch new file mode 100644 index 0000000..cf8a1a3 --- /dev/null +++ b/kvm-osdep-provide-ROUND_DOWN-macro.patch @@ -0,0 +1,75 @@ +From d9fa07a04ee19ad713b053f6a649178361d822a8 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 16 Jul 2021 16:51:31 -0400 +Subject: [PATCH 15/43] osdep: provide ROUND_DOWN macro + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +osdep.h provides a ROUND_UP macro to hide bitwise operations for the +purpose of rounding a number up to a power of two; add a ROUND_DOWN +macro that does the same with truncation towards zero. + +While at it, change the formatting of some comments. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit c9797456f64ce72c03eb2969d97ac1dd4698d91e) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + include/qemu/osdep.h | 28 ++++++++++++++++++++++------ + 1 file changed, 22 insertions(+), 6 deletions(-) + +diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h +index cb2a07e472..e327220992 100644 +--- a/include/qemu/osdep.h ++++ b/include/qemu/osdep.h +@@ -316,11 +316,16 @@ extern "C" { + }) + #endif + +-/* Round number down to multiple */ ++/* ++ * Round number down to multiple. Safe when m is not a power of 2 (see ++ * ROUND_DOWN for a faster version when a power of 2 is guaranteed). ++ */ + #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m)) + +-/* Round number up to multiple. Safe when m is not a power of 2 (see +- * ROUND_UP for a faster version when a power of 2 is guaranteed) */ ++/* ++ * Round number up to multiple. Safe when m is not a power of 2 (see ++ * ROUND_UP for a faster version when a power of 2 is guaranteed). ++ */ + #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) + + /* Check if n is a multiple of m */ +@@ -337,11 +342,22 @@ extern "C" { + /* Check if pointer p is n-bytes aligned */ + #define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n)) + +-/* Round number up to multiple. Requires that d be a power of 2 (see ++/* ++ * Round number down to multiple. Requires that d be a power of 2 (see + * QEMU_ALIGN_UP for a safer but slower version on arbitrary +- * numbers); works even if d is a smaller type than n. */ ++ * numbers); works even if d is a smaller type than n. ++ */ ++#ifndef ROUND_DOWN ++#define ROUND_DOWN(n, d) ((n) & -(0 ? (n) : (d))) ++#endif ++ ++/* ++ * Round number up to multiple. Requires that d be a power of 2 (see ++ * QEMU_ALIGN_UP for a safer but slower version on arbitrary ++ * numbers); works even if d is a smaller type than n. ++ */ + #ifndef ROUND_UP +-#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d))) ++#define ROUND_UP(n, d) ROUND_DOWN((n) + (d) - 1, (d)) + #endif + + #ifndef DIV_ROUND_UP +-- +2.27.0 + diff --git a/kvm-s390x-css-Add-passthrough-IRB.patch b/kvm-s390x-css-Add-passthrough-IRB.patch new file mode 100644 index 0000000..339a45c --- /dev/null +++ b/kvm-s390x-css-Add-passthrough-IRB.patch @@ -0,0 +1,127 @@ +From 4eb1f0936bfc921cad9af37f1573075148843b1d Mon Sep 17 00:00:00 2001 +From: Eric Farman +Date: Thu, 24 Jun 2021 14:15:16 -0400 +Subject: [PATCH 04/43] s390x/css: Add passthrough IRB + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +Wire in the subchannel callback for building the IRB +ESW and ECW space for passthrough devices, and copy +the hardware's ESW into the IRB we are building. + +If the hardware presented concurrent sense, then copy +that sense data into the IRB's ECW space. + +Signed-off-by: Eric Farman +Message-Id: <20210617232537.1337506-5-farman@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit c626710fc755628d0d6b88aab0514c9238a84522) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/s390x/css.c | 16 +++++++++++++++- + hw/s390x/s390-ccw.c | 1 + + hw/vfio/ccw.c | 4 ++++ + include/hw/s390x/css.h | 3 +++ + 4 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/css.c b/hw/s390x/css.c +index e77a0e523d..c3150da4f7 100644 +--- a/hw/s390x/css.c ++++ b/hw/s390x/css.c +@@ -1336,7 +1336,7 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) + } + } + +-static void copy_esw_to_guest(ESW *dest, const ESW *src) ++void copy_esw_to_guest(ESW *dest, const ESW *src) + { + dest->word0 = cpu_to_be32(src->word0); + dest->erw = cpu_to_be32(src->erw); +@@ -1651,6 +1651,20 @@ static void build_irb_sense_data(SubchDev *sch, IRB *irb) + } + } + ++void build_irb_passthrough(SubchDev *sch, IRB *irb) ++{ ++ /* Copy ESW from hardware */ ++ irb->esw = sch->esw; ++ ++ /* ++ * If (irb->esw.erw & ESW_ERW_SENSE) is true, then the contents ++ * of the ECW is sense data. If false, then it is model-dependent ++ * information. Either way, copy it into the IRB for the guest to ++ * read/decide what to do with. ++ */ ++ build_irb_sense_data(sch, irb); ++} ++ + void build_irb_virtual(SubchDev *sch, IRB *irb) + { + SCHIB *schib = &sch->curr_status; +diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c +index b497571863..39cbea615b 100644 +--- a/hw/s390x/s390-ccw.c ++++ b/hw/s390x/s390-ccw.c +@@ -125,6 +125,7 @@ static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp) + } + sch->driver_data = cdev; + sch->do_subchannel_work = do_subchannel_work_passthrough; ++ sch->irb_cb = build_irb_passthrough; + + ccw_dev->sch = sch; + ret = css_sch_build_schib(sch, &cdev->hostid); +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index b2df708e4b..5f141d44a4 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -322,6 +322,7 @@ static void vfio_ccw_io_notifier_handler(void *opaque) + SCHIB *schib = &sch->curr_status; + SCSW s; + IRB irb; ++ ESW esw; + int size; + + if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { +@@ -372,6 +373,9 @@ static void vfio_ccw_io_notifier_handler(void *opaque) + copy_scsw_to_guest(&s, &irb.scsw); + schib->scsw = s; + ++ copy_esw_to_guest(&esw, &irb.esw); ++ sch->esw = esw; ++ + /* If a uint check is pending, copy sense data. */ + if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && + (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { +diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h +index 7c23a13f3d..10ed1df1bb 100644 +--- a/include/hw/s390x/css.h ++++ b/include/hw/s390x/css.h +@@ -141,6 +141,7 @@ struct SubchDev { + void (*irb_cb)(SubchDev *, IRB *); + SenseId id; + void *driver_data; ++ ESW esw; + }; + + static inline void sch_gen_unit_exception(SubchDev *sch) +@@ -202,6 +203,7 @@ int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id); + unsigned int css_find_free_chpid(uint8_t cssid); + uint16_t css_build_subchannel_id(SubchDev *sch); + void copy_scsw_to_guest(SCSW *dest, const SCSW *src); ++void copy_esw_to_guest(ESW *dest, const ESW *src); + void css_inject_io_interrupt(SubchDev *sch); + void css_reset(void); + void css_reset_sch(SubchDev *sch); +@@ -216,6 +218,7 @@ void css_clear_sei_pending(void); + IOInstEnding s390_ccw_cmd_request(SubchDev *sch); + IOInstEnding do_subchannel_work_virtual(SubchDev *sub); + IOInstEnding do_subchannel_work_passthrough(SubchDev *sub); ++void build_irb_passthrough(SubchDev *sch, IRB *irb); + void build_irb_virtual(SubchDev *sch, IRB *irb); + + int s390_ccw_halt(SubchDev *sch); +-- +2.27.0 + diff --git a/kvm-s390x-css-Introduce-an-ESW-struct.patch b/kvm-s390x-css-Introduce-an-ESW-struct.patch new file mode 100644 index 0000000..de99f03 --- /dev/null +++ b/kvm-s390x-css-Introduce-an-ESW-struct.patch @@ -0,0 +1,111 @@ +From 9a12329325d94ab56dbab976b4423fe7db0e8d0b Mon Sep 17 00:00:00 2001 +From: Eric Farman +Date: Thu, 24 Jun 2021 14:15:13 -0400 +Subject: [PATCH 01/43] s390x/css: Introduce an ESW struct + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +The Interrupt Response Block is comprised of several other +structures concatenated together, but only the 12-byte +Subchannel-Status Word (SCSW) is defined as a proper struct. +Everything else is a simple array of 32-bit words. + +Let's define a proper struct for the 20-byte Extended-Status +Word (ESW) so that we can make good decisions about the sense +data that would go into the ECW area for virtual vs +passthrough devices. + +[CH: adapted ESW definition to build with mingw, as discussed] +Signed-off-by: Eric Farman +Message-Id: <20210617232537.1337506-2-farman@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 3fdc622ad79636f3d7f8bed50a53bc28af1850e1) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/s390x/css.c | 19 +++++++++++++------ + include/hw/s390x/ioinst.h | 12 +++++++++++- + 2 files changed, 24 insertions(+), 7 deletions(-) + +diff --git a/hw/s390x/css.c b/hw/s390x/css.c +index 4149b8e5a7..bd3172a688 100644 +--- a/hw/s390x/css.c ++++ b/hw/s390x/css.c +@@ -1336,6 +1336,14 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) + } + } + ++static void copy_esw_to_guest(ESW *dest, const ESW *src) ++{ ++ dest->word0 = cpu_to_be32(src->word0); ++ dest->erw = cpu_to_be32(src->erw); ++ dest->word2 = cpu_to_be64(src->word2); ++ dest->word4 = cpu_to_be32(src->word4); ++} ++ + IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib) + { + int ret; +@@ -1605,9 +1613,8 @@ static void copy_irb_to_guest(IRB *dest, const IRB *src, const PMCW *pmcw, + + copy_scsw_to_guest(&dest->scsw, &src->scsw); + +- for (i = 0; i < ARRAY_SIZE(dest->esw); i++) { +- dest->esw[i] = cpu_to_be32(src->esw[i]); +- } ++ copy_esw_to_guest(&dest->esw, &src->esw); ++ + for (i = 0; i < ARRAY_SIZE(dest->ecw); i++) { + dest->ecw[i] = cpu_to_be32(src->ecw[i]); + } +@@ -1656,9 +1663,9 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) + SCSW_CSTAT_CHN_CTRL_CHK | + SCSW_CSTAT_INTF_CTRL_CHK)) { + irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF; +- irb.esw[0] = 0x04804000; ++ irb.esw.word0 = 0x04804000; + } else { +- irb.esw[0] = 0x00800000; ++ irb.esw.word0 = 0x00800000; + } + /* If a unit check is pending, copy sense data. */ + if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && +@@ -1671,7 +1678,7 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) + for (i = 0; i < ARRAY_SIZE(irb.ecw); i++) { + irb.ecw[i] = be32_to_cpu(irb.ecw[i]); + } +- irb.esw[1] = 0x01000000 | (sizeof(sch->sense_data) << 8); ++ irb.esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8); + } + } + /* Store the irb to the guest. */ +diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h +index c6737a30d4..3771fff9d4 100644 +--- a/include/hw/s390x/ioinst.h ++++ b/include/hw/s390x/ioinst.h +@@ -123,10 +123,20 @@ typedef struct SCHIB { + uint8_t mda[4]; + } QEMU_PACKED SCHIB; + ++/* format-0 extended-status word */ ++typedef struct ESW { ++ uint32_t word0; /* subchannel logout for format 0 */ ++ uint32_t erw; ++ uint64_t word2; /* failing-storage address for format 0 */ ++ uint32_t word4; /* secondary-CCW address for format 0 */ ++} QEMU_PACKED ESW; ++ ++#define ESW_ERW_SENSE 0x01000000 ++ + /* interruption response block */ + typedef struct IRB { + SCSW scsw; +- uint32_t esw[5]; ++ ESW esw; + uint32_t ecw[8]; + uint32_t emw[8]; + } IRB; +-- +2.27.0 + diff --git a/kvm-s390x-css-Refactor-IRB-construction.patch b/kvm-s390x-css-Refactor-IRB-construction.patch new file mode 100644 index 0000000..f55bf79 --- /dev/null +++ b/kvm-s390x-css-Refactor-IRB-construction.patch @@ -0,0 +1,144 @@ +From 0f4d8c51b51a23a87f1e3e9e764151352f652f3b Mon Sep 17 00:00:00 2001 +From: Eric Farman +Date: Thu, 24 Jun 2021 14:15:15 -0400 +Subject: [PATCH 03/43] s390x/css: Refactor IRB construction + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +Currently, all subchannel types have "sense data" copied into +the IRB.ECW space, and a couple flags enabled in the IRB.SCSW +and IRB.ESW. But for passthrough (vfio-ccw) subchannels, +this data isn't populated in the first place, so enabling +those flags leads to unexpected behavior if the guest tries to +process the sense data (zeros) in the IRB.ECW. + +Let's add a subchannel callback that builds these portions of +the IRB, and move the existing code into a routine for those +virtual subchannels. The passthrough subchannels will be able +to piggy-back onto this later. + +Signed-off-by: Eric Farman +Message-Id: <20210617232537.1337506-4-farman@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 0599a046acf1b625e97cef0aa702b5d86528c642) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/s390x/3270-ccw.c | 1 + + hw/s390x/css.c | 45 +++++++++++++++++++++++++++--------------- + hw/s390x/virtio-ccw.c | 1 + + include/hw/s390x/css.h | 2 ++ + 4 files changed, 33 insertions(+), 16 deletions(-) + +diff --git a/hw/s390x/3270-ccw.c b/hw/s390x/3270-ccw.c +index f3e7342b1e..9efee591f9 100644 +--- a/hw/s390x/3270-ccw.c ++++ b/hw/s390x/3270-ccw.c +@@ -130,6 +130,7 @@ static void emulated_ccw_3270_realize(DeviceState *ds, Error **errp) + EMULATED_CCW_3270_CHPID_TYPE); + sch->do_subchannel_work = do_subchannel_work_virtual; + sch->ccw_cb = emulated_ccw_3270_cb; ++ sch->irb_cb = build_irb_virtual; + + ck->init(dev, &err); + if (err) { +diff --git a/hw/s390x/css.c b/hw/s390x/css.c +index fac7d5b39d..e77a0e523d 100644 +--- a/hw/s390x/css.c ++++ b/hw/s390x/css.c +@@ -1651,6 +1651,30 @@ static void build_irb_sense_data(SubchDev *sch, IRB *irb) + } + } + ++void build_irb_virtual(SubchDev *sch, IRB *irb) ++{ ++ SCHIB *schib = &sch->curr_status; ++ uint16_t stctl = schib->scsw.ctrl & SCSW_CTRL_MASK_STCTL; ++ ++ if (stctl & SCSW_STCTL_STATUS_PEND) { ++ if (schib->scsw.cstat & (SCSW_CSTAT_DATA_CHECK | ++ SCSW_CSTAT_CHN_CTRL_CHK | ++ SCSW_CSTAT_INTF_CTRL_CHK)) { ++ irb->scsw.flags |= SCSW_FLAGS_MASK_ESWF; ++ irb->esw.word0 = 0x04804000; ++ } else { ++ irb->esw.word0 = 0x00800000; ++ } ++ /* If a unit check is pending, copy sense data. */ ++ if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && ++ (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { ++ irb->scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; ++ build_irb_sense_data(sch, irb); ++ irb->esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8); ++ } ++ } ++} ++ + int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) + { + SCHIB *schib = &sch->curr_status; +@@ -1669,23 +1693,12 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) + + /* Copy scsw from current status. */ + irb.scsw = schib->scsw; +- if (stctl & SCSW_STCTL_STATUS_PEND) { +- if (schib->scsw.cstat & (SCSW_CSTAT_DATA_CHECK | +- SCSW_CSTAT_CHN_CTRL_CHK | +- SCSW_CSTAT_INTF_CTRL_CHK)) { +- irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF; +- irb.esw.word0 = 0x04804000; +- } else { +- irb.esw.word0 = 0x00800000; +- } +- /* If a unit check is pending, copy sense data. */ +- if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && +- (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { +- irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; +- build_irb_sense_data(sch, &irb); +- irb.esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8); +- } ++ ++ /* Build other IRB data, if necessary */ ++ if (sch->irb_cb) { ++ sch->irb_cb(sch, &irb); + } ++ + /* Store the irb to the guest. */ + p = schib->pmcw; + copy_irb_to_guest(target_irb, &irb, &p, irb_len); +diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c +index 8195f3546e..5a1eb39325 100644 +--- a/hw/s390x/virtio-ccw.c ++++ b/hw/s390x/virtio-ccw.c +@@ -754,6 +754,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) + sch->id.reserved = 0xff; + sch->id.cu_type = VIRTIO_CCW_CU_TYPE; + sch->do_subchannel_work = do_subchannel_work_virtual; ++ sch->irb_cb = build_irb_virtual; + ccw_dev->sch = sch; + dev->indicators = NULL; + dev->revision = -1; +diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h +index bba7593d2e..7c23a13f3d 100644 +--- a/include/hw/s390x/css.h ++++ b/include/hw/s390x/css.h +@@ -138,6 +138,7 @@ struct SubchDev { + int (*ccw_cb) (SubchDev *, CCW1); + void (*disable_cb)(SubchDev *); + IOInstEnding (*do_subchannel_work) (SubchDev *); ++ void (*irb_cb)(SubchDev *, IRB *); + SenseId id; + void *driver_data; + }; +@@ -215,6 +216,7 @@ void css_clear_sei_pending(void); + IOInstEnding s390_ccw_cmd_request(SubchDev *sch); + IOInstEnding do_subchannel_work_virtual(SubchDev *sub); + IOInstEnding do_subchannel_work_passthrough(SubchDev *sub); ++void build_irb_virtual(SubchDev *sch, IRB *irb); + + int s390_ccw_halt(SubchDev *sch); + int s390_ccw_clear(SubchDev *sch); +-- +2.27.0 + diff --git a/kvm-s390x-css-Split-out-the-IRB-sense-data.patch b/kvm-s390x-css-Split-out-the-IRB-sense-data.patch new file mode 100644 index 0000000..2c8a3f8 --- /dev/null +++ b/kvm-s390x-css-Split-out-the-IRB-sense-data.patch @@ -0,0 +1,63 @@ +From a987dfced200adf1e4c2d3c39f0b5da0fb7e6ead Mon Sep 17 00:00:00 2001 +From: Eric Farman +Date: Thu, 24 Jun 2021 14:15:14 -0400 +Subject: [PATCH 02/43] s390x/css: Split out the IRB sense data + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +Let's move this logic into its own routine, +so it can be reused later. + +Signed-off-by: Eric Farman +Reviewed-by: Thomas Huth +Message-Id: <20210617232537.1337506-3-farman@linux.ibm.com> +Signed-off-by: Cornelia Huck +(cherry picked from commit 1b01dedaed41c2ca6129475c22b7b778b109fae8) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/s390x/css.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/hw/s390x/css.c b/hw/s390x/css.c +index bd3172a688..fac7d5b39d 100644 +--- a/hw/s390x/css.c ++++ b/hw/s390x/css.c +@@ -1640,6 +1640,17 @@ static void copy_irb_to_guest(IRB *dest, const IRB *src, const PMCW *pmcw, + *irb_len = sizeof(*dest); + } + ++static void build_irb_sense_data(SubchDev *sch, IRB *irb) ++{ ++ int i; ++ ++ /* Attention: sense_data is already BE! */ ++ memcpy(irb->ecw, sch->sense_data, sizeof(sch->sense_data)); ++ for (i = 0; i < ARRAY_SIZE(irb->ecw); i++) { ++ irb->ecw[i] = be32_to_cpu(irb->ecw[i]); ++ } ++} ++ + int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) + { + SCHIB *schib = &sch->curr_status; +@@ -1670,14 +1681,8 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) + /* If a unit check is pending, copy sense data. */ + if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && + (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { +- int i; +- + irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; +- /* Attention: sense_data is already BE! */ +- memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data)); +- for (i = 0; i < ARRAY_SIZE(irb.ecw); i++) { +- irb.ecw[i] = be32_to_cpu(irb.ecw[i]); +- } ++ build_irb_sense_data(sch, &irb); + irb.esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8); + } + } +-- +2.27.0 + diff --git a/kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch b/kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch new file mode 100644 index 0000000..612fb9a --- /dev/null +++ b/kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch @@ -0,0 +1,64 @@ +From 05038edf628c6bca7cef061b7b1fac2b0b2163af Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 16 Jul 2021 16:51:30 -0400 +Subject: [PATCH 14/43] scsi-generic: pass max_segments via max_iov field in + BlockLimits + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +I/O to a disk via read/write is not limited by the number of segments allowed +by the host adapter; the kernel can split requests if needed, and the limit +imposed by the host adapter can be very low (256k or so) to avoid that SG_IO +returns EINVAL if memory is heavily fragmented. + +Since this value is only interesting for SG_IO-based I/O, do not include +it in the max_transfer and only take it into account when patching the +block limits VPD page in the scsi-generic device. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Max Reitz +(cherry picked from commit 01ef8185b809af9d287e1a03a3f9d8ea8231118a) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + block/file-posix.c | 3 +-- + hw/scsi/scsi-generic.c | 6 ++++-- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 74d4903dc1..1a6c799e19 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1263,8 +1263,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) + + ret = sg_get_max_segments(s->fd); + if (ret > 0) { +- bs->bl.max_transfer = MIN(bs->bl.max_transfer, +- ret * qemu_real_host_page_size); ++ bs->bl.max_iov = ret; + } + } + +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 98c30c5d5c..82e1e2ee79 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -179,10 +179,12 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) + (r->req.cmd.buf[1] & 0x01)) { + page = r->req.cmd.buf[2]; + if (page == 0xb0) { +- uint32_t max_transfer = +- blk_get_max_transfer(s->conf.blk) / s->blocksize; ++ uint32_t max_transfer = blk_get_max_transfer(s->conf.blk); ++ uint32_t max_iov = blk_get_max_iov(s->conf.blk); + + assert(max_transfer); ++ max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size) ++ / s->blocksize; + stl_be_p(&r->buf[8], max_transfer); + /* Also take care of the opt xfer len. */ + stl_be_p(&r->buf[12], +-- +2.27.0 + diff --git a/kvm-vhost-user-Fix-backends-without-multiqueue-support.patch b/kvm-vhost-user-Fix-backends-without-multiqueue-support.patch new file mode 100644 index 0000000..eb08089 --- /dev/null +++ b/kvm-vhost-user-Fix-backends-without-multiqueue-support.patch @@ -0,0 +1,46 @@ +From de25a5f05b76ca99299e09dabe04e7d59b9bed79 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 12 Jul 2021 10:22:32 -0400 +Subject: [PATCH 12/43] vhost-user: Fix backends without multiqueue support + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +dev->max_queues was never initialised for backends that don't support +VHOST_USER_PROTOCOL_F_MQ, so it would use 0 as the maximum number of +queues to check against and consequently fail for any such backend. + +Set it to 1 if the backend doesn't have multiqueue support. + +Fixes: c90bd505a3e8210c23d69fecab9ee6f56ec4a161 +Signed-off-by: Kevin Wolf +Message-Id: <20210705171429.29286-1-kwolf@redhat.com> +Reviewed-by: Cornelia Huck +Reviewed-by: Raphael Norwitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 84affad1fd4c5251d7cccf4df43b29e9157983a9) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/vhost-user.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index ee57abe045..53f50adcba 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -1908,7 +1908,10 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) + if (err < 0) { + return err; + } ++ } else { ++ dev->max_queues = 1; + } ++ + if (dev->num_queues && dev->max_queues < dev->num_queues) { + error_report("The maximum number of queues supported by the " + "backend is %" PRIu64, dev->max_queues); +-- +2.27.0 + diff --git a/kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch b/kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch new file mode 100644 index 0000000..8b466dd --- /dev/null +++ b/kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch @@ -0,0 +1,83 @@ +From f3cec652012b0b5ab1d881f6377719b0984bce63 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 12 Jul 2021 10:22:31 -0400 +Subject: [PATCH 11/43] vhost-user-blk: Check that num-queues is supported by + backend + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +Creating a device with a number of queues that isn't supported by the +backend is pointless, the device won't work properly and the error +messages are rather confusing. + +Just fail to create the device if num-queues is higher than what the +backend supports. + +Since the relationship between num-queues and the number of virtqueues +depends on the specific device, this is an additional value that needs +to be initialised by the device. For convenience, allow leaving it 0 if +the check should be skipped. This makes sense for vhost-user-net where +separate vhost devices are used for the queues and custom initialisation +code is needed to perform the check. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1935031 +Signed-off-by: Kevin Wolf +Reviewed-by: Raphael Norwitz +Message-Id: <20210429171316.162022-7-kwolf@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +(cherry picked from commit c90bd505a3e8210c23d69fecab9ee6f56ec4a161) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/block/vhost-user-blk.c | 1 + + hw/virtio/vhost-user.c | 5 +++++ + include/hw/virtio/vhost.h | 2 ++ + 3 files changed, 8 insertions(+) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index c7e502f4c7..c6210fad0c 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -324,6 +324,7 @@ static int vhost_user_blk_connect(DeviceState *dev, Error **errp) + } + s->connected = true; + ++ s->dev.num_queues = s->num_queues; + s->dev.nvqs = s->num_queues; + s->dev.vqs = s->vhost_vqs; + s->dev.vq_index = 0; +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index ded0c10453..ee57abe045 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -1909,6 +1909,11 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) + return err; + } + } ++ if (dev->num_queues && dev->max_queues < dev->num_queues) { ++ error_report("The maximum number of queues supported by the " ++ "backend is %" PRIu64, dev->max_queues); ++ return -EINVAL; ++ } + + if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && + !(virtio_has_feature(dev->protocol_features, +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 4a8bc75415..21a9a52088 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -74,6 +74,8 @@ struct vhost_dev { + int nvqs; + /* the first virtqueue which would be used by this vhost dev */ + int vq_index; ++ /* if non-zero, minimum required value for max_queues */ ++ int num_queues; + uint64_t features; + uint64_t acked_features; + uint64_t backend_features; +-- +2.27.0 + diff --git a/kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch b/kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch new file mode 100644 index 0000000..c1b98d8 --- /dev/null +++ b/kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch @@ -0,0 +1,179 @@ +From 5d39cb265db6ea2159662a2d071d340712940d33 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 12 Jul 2021 10:22:27 -0400 +Subject: [PATCH 07/43] vhost-user-blk: Don't reconnect during initialisation + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +This is a partial revert of commits 77542d43149 and bc79c87bcde. + +Usually, an error during initialisation means that the configuration was +wrong. Reconnecting won't make the error go away, but just turn the +error condition into an endless loop. Avoid this and return errors +again. + +Additionally, calling vhost_user_blk_disconnect() from the chardev event +handler could result in use-after-free because none of the +initialisation code expects that the device could just go away in the +middle. So removing the call fixes crashes in several places. + +For example, using a num-queues setting that is incompatible with the +backend would result in a crash like this (dereferencing dev->opaque, +which is already NULL): + + #0 0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313 + #1 0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 , user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84 + #2 0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 + #3 0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0 + #4 0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0 + #5 0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402 + #6 0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133 + #7 0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566 + #8 0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510 + #9 0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660 + +Note that this removes the ability to reconnect during initialisation +(but not during operation) when there is no permanent error, but the +backend restarts, as the implementation was buggy. This feature can be +added back in a follow-up series after changing error paths to +distinguish cases where retrying could help from cases with permanent +errors. + +Signed-off-by: Kevin Wolf +Message-Id: <20210429171316.162022-3-kwolf@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +(cherry picked from commit dabefdd6abcbc7d858e9413e4734aab2e0b5c8d9) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/block/vhost-user-blk.c | 59 +++++++++++---------------------------- + 1 file changed, 17 insertions(+), 42 deletions(-) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index 7c85248a7b..c0b9958da1 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -50,6 +50,8 @@ static const int user_feature_bits[] = { + VHOST_INVALID_FEATURE_BIT + }; + ++static void vhost_user_blk_event(void *opaque, QEMUChrEvent event); ++ + static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) + { + VHostUserBlk *s = VHOST_USER_BLK(vdev); +@@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev) + vhost_dev_cleanup(&s->dev); + } + +-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, +- bool realized); +- +-static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event) +-{ +- vhost_user_blk_event(opaque, event, false); +-} +- +-static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event) +-{ +- vhost_user_blk_event(opaque, event, true); +-} +- + static void vhost_user_blk_chr_closed_bh(void *opaque) + { + DeviceState *dev = opaque; +@@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque) + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + vhost_user_blk_disconnect(dev); +- qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, +- vhost_user_blk_event_oper, NULL, opaque, NULL, true); ++ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, ++ NULL, opaque, NULL, true); + } + +-static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, +- bool realized) ++static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) + { + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); +@@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, + } + break; + case CHR_EVENT_CLOSED: +- /* +- * Closing the connection should happen differently on device +- * initialization and operation stages. +- * On initalization, we want to re-start vhost_dev initialization +- * from the very beginning right away when the connection is closed, +- * so we clean up vhost_dev on each connection closing. +- * On operation, we want to postpone vhost_dev cleanup to let the +- * other code perform its own cleanup sequence using vhost_dev data +- * (e.g. vhost_dev_set_log). +- */ +- if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) { ++ if (!runstate_check(RUN_STATE_SHUTDOWN)) { + /* + * A close event may happen during a read/write, but vhost + * code assumes the vhost_dev remains setup, so delay the +@@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, + * knowing its type (in this case vhost-user). + */ + s->dev.started = false; +- } else { +- vhost_user_blk_disconnect(dev); + } + break; + case CHR_EVENT_BREAK: +@@ -489,33 +465,32 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) + s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); + s->connected = false; + +- qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, +- vhost_user_blk_event_realize, NULL, (void *)dev, +- NULL, true); +- +-reconnect: + if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) { + goto virtio_err; + } + +- /* check whether vhost_user_blk_connect() failed or not */ +- if (!s->connected) { +- goto reconnect; ++ if (vhost_user_blk_connect(dev) < 0) { ++ error_setg(errp, "vhost-user-blk: could not connect"); ++ qemu_chr_fe_disconnect(&s->chardev); ++ goto virtio_err; + } ++ assert(s->connected); + + ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, + sizeof(struct virtio_blk_config)); + if (ret < 0) { +- error_report("vhost-user-blk: get block config failed"); +- goto reconnect; ++ error_setg(errp, "vhost-user-blk: get block config failed"); ++ goto vhost_err; + } + +- /* we're fully initialized, now we can operate, so change the handler */ ++ /* we're fully initialized, now we can operate, so add the handler */ + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, +- vhost_user_blk_event_oper, NULL, (void *)dev, ++ vhost_user_blk_event, NULL, (void *)dev, + NULL, true); + return; + ++vhost_err: ++ vhost_dev_cleanup(&s->dev); + virtio_err: + g_free(s->vhost_vqs); + s->vhost_vqs = NULL; +-- +2.27.0 + diff --git a/kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch b/kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch new file mode 100644 index 0000000..29d6bd7 --- /dev/null +++ b/kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch @@ -0,0 +1,55 @@ +From bb551b71851c8d5a37b29aae373a99e97885a4df Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 12 Jul 2021 10:22:25 -0400 +Subject: [PATCH 05/43] vhost-user-blk: Fail gracefully on too large queue size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +virtio_add_queue() aborts when queue_size > VIRTQUEUE_MAX_SIZE, so +vhost_user_blk_device_realize() should check this before calling it. + +Simple reproducer: + +qemu-system-x86_64 \ + -chardev null,id=foo \ + -device vhost-user-blk-pci,queue-size=4096,chardev=foo + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1935014 +Signed-off-by: Kevin Wolf +Message-Id: <20210413165654.50810-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Raphael Norwitz +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit 68bf7336533faa6aa90fdd4558edddbf5d8ef814) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/block/vhost-user-blk.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index 0b5b9d44cd..f5e9682703 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -467,6 +467,11 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) + error_setg(errp, "vhost-user-blk: queue size must be non-zero"); + return; + } ++ if (s->queue_size > VIRTQUEUE_MAX_SIZE) { ++ error_setg(errp, "vhost-user-blk: queue size must not exceed %d", ++ VIRTQUEUE_MAX_SIZE); ++ return; ++ } + + if (!vhost_user_init(&s->vhost_user, &s->chardev, errp)) { + return; +-- +2.27.0 + diff --git a/kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch b/kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch new file mode 100644 index 0000000..310985d --- /dev/null +++ b/kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch @@ -0,0 +1,44 @@ +From 28ab6c187224be79fe02fb1b5037d1c0b300a778 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 12 Jul 2021 10:22:29 -0400 +Subject: [PATCH 09/43] vhost-user-blk: Get more feature flags from vhost + device + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +VIRTIO_F_RING_PACKED and VIRTIO_F_IOMMU_PLATFORM need to be supported by +the vhost device, otherwise advertising it to the guest doesn't result +in a working configuration. They are currently not supported by the +vhost-user-blk export in QEMU. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1935020 +Signed-off-by: Kevin Wolf +Acked-by: Raphael Norwitz +Message-Id: <20210429171316.162022-5-kwolf@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +(cherry picked from commit 7556a320c98812ca6648b707393f4513387faf73) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/block/vhost-user-blk.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index f3a45af97c..c7e502f4c7 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -47,6 +47,8 @@ static const int user_feature_bits[] = { + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_F_NOTIFY_ON_EMPTY, ++ VIRTIO_F_RING_PACKED, ++ VIRTIO_F_IOMMU_PLATFORM, + VHOST_INVALID_FEATURE_BIT + }; + +-- +2.27.0 + diff --git a/kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch b/kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch new file mode 100644 index 0000000..e690e2c --- /dev/null +++ b/kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch @@ -0,0 +1,120 @@ +From a0fcc5faf35fb266dbe45259b79a57ba057e3144 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 12 Jul 2021 10:22:28 -0400 +Subject: [PATCH 08/43] vhost-user-blk: Improve error reporting in realize + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +Now that vhost_user_blk_connect() is not called from an event handler +any more, but directly from vhost_user_blk_device_realize(), we can +actually make use of Error again instead of calling error_report() in +the inner function and setting a more generic and therefore less useful +error message in realize() itself. + +With Error, the callers are responsible for adding context if necessary +(such as the "-device" option the error refers to). Additional prefixes +are redundant and better omitted. + +Signed-off-by: Kevin Wolf +Acked-by: Raphael Norwitz +Message-Id: <20210429171316.162022-4-kwolf@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +(cherry picked from commit 5b9243d2654adc58ce472d0536a7a177b4fe0f90) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/block/vhost-user-blk.c | 23 +++++++++++------------ + 1 file changed, 11 insertions(+), 12 deletions(-) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index c0b9958da1..f3a45af97c 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -311,7 +311,7 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) + vhost_dev_free_inflight(s->inflight); + } + +-static int vhost_user_blk_connect(DeviceState *dev) ++static int vhost_user_blk_connect(DeviceState *dev, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); +@@ -331,8 +331,7 @@ static int vhost_user_blk_connect(DeviceState *dev) + + ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { +- error_report("vhost-user-blk: vhost initialization failed: %s", +- strerror(-ret)); ++ error_setg_errno(errp, -ret, "vhost initialization failed"); + return ret; + } + +@@ -340,8 +339,7 @@ static int vhost_user_blk_connect(DeviceState *dev) + if (virtio_device_started(vdev, vdev->status)) { + ret = vhost_user_blk_start(vdev); + if (ret < 0) { +- error_report("vhost-user-blk: vhost start failed: %s", +- strerror(-ret)); ++ error_setg_errno(errp, -ret, "vhost start failed"); + return ret; + } + } +@@ -380,10 +378,12 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); ++ Error *local_err = NULL; + + switch (event) { + case CHR_EVENT_OPENED: +- if (vhost_user_blk_connect(dev) < 0) { ++ if (vhost_user_blk_connect(dev, &local_err) < 0) { ++ error_report_err(local_err); + qemu_chr_fe_disconnect(&s->chardev); + return; + } +@@ -426,7 +426,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) + int i, ret; + + if (!s->chardev.chr) { +- error_setg(errp, "vhost-user-blk: chardev is mandatory"); ++ error_setg(errp, "chardev is mandatory"); + return; + } + +@@ -434,16 +434,16 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) + s->num_queues = 1; + } + if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) { +- error_setg(errp, "vhost-user-blk: invalid number of IO queues"); ++ error_setg(errp, "invalid number of IO queues"); + return; + } + + if (!s->queue_size) { +- error_setg(errp, "vhost-user-blk: queue size must be non-zero"); ++ error_setg(errp, "queue size must be non-zero"); + return; + } + if (s->queue_size > VIRTQUEUE_MAX_SIZE) { +- error_setg(errp, "vhost-user-blk: queue size must not exceed %d", ++ error_setg(errp, "queue size must not exceed %d", + VIRTQUEUE_MAX_SIZE); + return; + } +@@ -469,8 +469,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) + goto virtio_err; + } + +- if (vhost_user_blk_connect(dev) < 0) { +- error_setg(errp, "vhost-user-blk: could not connect"); ++ if (vhost_user_blk_connect(dev, errp) < 0) { + qemu_chr_fe_disconnect(&s->chardev); + goto virtio_err; + } +-- +2.27.0 + diff --git a/kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch b/kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch new file mode 100644 index 0000000..0bef5dc --- /dev/null +++ b/kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch @@ -0,0 +1,53 @@ +From f90c21b271d88733e604b3645986f8362048b0f0 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 12 Jul 2021 10:22:26 -0400 +Subject: [PATCH 06/43] vhost-user-blk: Make sure to set Error on realize + failure + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +We have to set errp before jumping to virtio_err, otherwise the caller +(virtio_device_realize()) will take this as success and crash when it +later tries to access things that we've already freed in the error path. + +Fixes: 77542d431491788d1e8e79d93ce10172ef207775 +Signed-off-by: Kevin Wolf +Message-Id: <20210429171316.162022-2-kwolf@redhat.com> +Reviewed-by: Michael S. Tsirkin +Reviewed-by: Eric Blake +Acked-by: Raphael Norwitz +Signed-off-by: Kevin Wolf +(cherry picked from commit f26729715ef21325f972f693607580a829ad1cbb) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/block/vhost-user-blk.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index f5e9682703..7c85248a7b 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -447,7 +447,6 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); +- Error *err = NULL; + int i, ret; + + if (!s->chardev.chr) { +@@ -495,8 +494,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) + NULL, true); + + reconnect: +- if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { +- error_report_err(err); ++ if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) { + goto virtio_err; + } + +-- +2.27.0 + diff --git a/kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch b/kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch new file mode 100644 index 0000000..475feb0 --- /dev/null +++ b/kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch @@ -0,0 +1,53 @@ +From 0834f460b52a1a6b2bc5575ff2e05458d7036257 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 12 Jul 2021 10:22:30 -0400 +Subject: [PATCH 10/43] virtio: Fail if iommu_platform is requested, but + unsupported + +RH-Author: Miroslav Rezanina +RH-Bugzilla: 1957194 + +Commit 2943b53f6 (' virtio: force VIRTIO_F_IOMMU_PLATFORM') made sure +that vhost can't just reject VIRTIO_F_IOMMU_PLATFORM when it was +requested. However, just adding it back to the negotiated flags isn't +right either because it promises support to the guest that the device +actually doesn't support. One example of a vhost-user device that +doesn't have support for the flag is the vhost-user-blk export of QEMU. + +Instead of successfully creating a device that doesn't work, just fail +to plug the device when it doesn't support the feature, but it was +requested. This results in much clearer error messages. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1935019 +Signed-off-by: Kevin Wolf +Reviewed-by: Raphael Norwitz +Message-Id: <20210429171316.162022-6-kwolf@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +(cherry picked from commit 04ceb61a4075fadbf374ef89662c41999da83489) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + hw/virtio/virtio-bus.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index d6332d45c3..859978d248 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -69,6 +69,11 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + ++ if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { ++ error_setg(errp, "iommu_platform=true is not supported by the device"); ++ return; ++ } ++ + if (klass->device_plugged != NULL) { + klass->device_plugged(qbus->parent, &local_err); + } +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 9bc16fe..7154a71 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -9,9 +9,10 @@ %global have_fdt 0 %global have_modules_load 0 %global have_memlock_limits 0 -# have_block_rbd is not relevant for RHEL but makes it -# easier to sync spec dependency list with Fedora +# Some of these are not relevant for RHEL, but defining them +# makes it easier to sync the dependency list with Fedora %global have_block_rbd 1 +%global enable_werror 1 %global have_pmem 1 %ifnarch x86_64 @@ -23,6 +24,11 @@ %global have_numactl 0 %endif +%global tools_only 0 +%ifarch %{power64} + %global tools_only 1 +%endif + %ifnarch %{ix86} x86_64 %global have_usbredir 0 %endif @@ -33,6 +39,14 @@ %global have_librdma 0 %endif +%global modprobe_kvm_conf %{_sourcedir}/kvm.conf +%ifarch s390x + %global modprobe_kvm_conf %{_sourcedir}/kvm-s390x.conf +%endif +%ifarch %{ix86} x86_64 + %global modprobe_kvm_conf %{_sourcedir}/kvm-x86.conf +%endif + %ifarch %{ix86} %global kvm_target i386 %endif @@ -59,6 +73,12 @@ %global have_fdt 1 %endif +%global target_list %{kvm_target}-softmmu +%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle +%global block_drivers_ro_list vmdk,vhdx,vpc,https,ssh +%define qemudocdir %{_docdir}/%{name} + + #Versions of various parts: %global requires_all_modules \ @@ -72,10 +92,27 @@ Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} +# Since SPICE is removed from RHEL-9, the following Obsoletes: +# removes {name}-ui-spice for upgrades from RHEL-8 +# The "<= {version}" assumes RHEL-9 version >= RHEL-8 version (in +# other words RHEL-9 rebases are done together/before RHEL-8 ones) +%global obsoletes_some_modules \ +Obsoletes: %{name}-ui-spice <= %{version} \ +Obsoletes: %{name}-block-gluster <= %{version} \ +Obsoletes: %{name}-block-iscsi <= %{version} \ + +# Release candidate version tracking +# global rcver rc4 +%if 0%{?rcver:1} +%global rcrel .%{rcver} +%global rcstr -%{rcver} +%endif + + Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 9%{?rcversion}%{?dist} +Release: 10%{?rcrel}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -85,7 +122,7 @@ URL: http://www.qemu.org/ ExclusiveArch: x86_64 %{power64} aarch64 s390x -Source0: http://wiki.qemu.org/download/qemu-6.0.0.tar.xz +Source0: http://wiki.qemu.org/download/qemu-%{version}%{?rcstr}.tar.xz # KSM control scripts Source4: ksm.service @@ -104,8 +141,6 @@ Source27: kvm.conf Source28: 95-kvm-memlock.conf Source30: kvm-s390x.conf Source31: kvm-x86.conf -Source32: qemu-pr-helper.service -Source33: qemu-pr-helper.socket Source36: README.tests @@ -200,6 +235,44 @@ Patch54: kvm-s390x-cpumodel-add-3931-and-3932.patch Patch55: kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch # For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta Patch56: kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch57: kvm-s390x-css-Introduce-an-ESW-struct.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch58: kvm-s390x-css-Split-out-the-IRB-sense-data.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch59: kvm-s390x-css-Refactor-IRB-construction.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch60: kvm-s390x-css-Add-passthrough-IRB.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch61: kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch62: kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch63: kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch64: kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch65: kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch66: kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch67: kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch68: kvm-vhost-user-Fix-backends-without-multiqueue-support.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch69: kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch70: kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch71: kvm-osdep-provide-ROUND_DOWN-macro.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch72: kvm-block-backend-align-max_transfer-to-request-alignmen.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch73: kvm-block-add-max_hw_transfer-to-BlockLimits.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch74: kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch +# For bz#1957782 - VMDK support should be read-only +Patch75: kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch # Source-git patches @@ -263,21 +336,24 @@ BuildRequires: pkgconfig(gbm) BuildRequires: perl-Test-Harness BuildRequires: libslirp-devel -Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} + +# Requires for qemu-kvm package +Requires: %{name}-core = %{epoch}:%{version}-%{release} Requires: %{name}-docs = %{epoch}:%{version}-%{release} +Requires: %{name}-tools = %{epoch}:%{version}-%{release} +Requires: qemu-pr-helper = %{epoch}:%{version}-%{release} %{requires_all_modules} -%define qemudocdir %{_docdir}/%{name} - %description -qemu-kvm is an open source virtualizer that provides hardware -emulation for the KVM hypervisor. qemu-kvm acts as a virtual +%{name} is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. %{name} acts as a virtual machine monitor together with the KVM kernel modules, and emulates the hardware for a full system such as a PC and its associated peripherals. -%package -n qemu-kvm-core -Summary: qemu-kvm core components +%package core +Summary: %{name} core components +%{obsoletes_some_modules} Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: qemu-img = %{epoch}:%{version}-%{release} %ifarch %{ix86} x86_64 @@ -293,33 +369,14 @@ Requires: libusbx >= %{libusbx_version} Requires: libfdt >= %{libfdt_version} %endif -# Since SPICE is removed from RHEL-9, the following Obsoletes: -# removes qemu-kvm-ui-spice for upgrades from RHEL-8 -# The "<= {version}" assumes RHEL-9 version >= RHEL-8 version (in -# other words RHEL-9 rebases are done together/before RHEL-8 ones) -Obsoletes: qemu-kvm-ui-spice <= %{version} -Obsoletes: qemu-kvm-block-gluster <= %{version} -Obsoletes: %{name}-block-iscsi <= %{version} - -%description -n qemu-kvm-core -qemu-kvm is an open source virtualizer that provides hardware -emulation for the KVM hypervisor. qemu-kvm acts as a virtual +%description core +%{name} is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. %{name} acts as a virtual machine monitor together with the KVM kernel modules, and emulates the hardware for a full system such as a PC and its associated peripherals. -%package -n qemu-kvm-docs -Summary: qemu-kvm documentation -%description -n qemu-kvm-docs -qemu-kvm-docs provides documentation files regarding qemu-kvm. - -%package -n qemu-img -Summary: QEMU command line tool for manipulating disk images - -%description -n qemu-img -This package provides a command line tool for manipulating disk images. - -%package -n qemu-kvm-common +%package common Summary: QEMU common files needed by all QEMU targets Requires(post): /usr/bin/getent Requires(post): /usr/sbin/groupadd @@ -336,11 +393,36 @@ Requires: seavgabios-bin >= 1.12.0-3 Requires: ipxe-roms-qemu >= 20170123-1 %endif -%description -n qemu-kvm-common -qemu-kvm is an open source virtualizer that provides hardware emulation for +%description common +%{name} is an open source virtualizer that provides hardware emulation for the KVM hypervisor. -This package provides documentation and auxiliary programs used with qemu-kvm. +This package provides documentation and auxiliary programs used with %{name}. + + +%package tools +Summary: %{name} support tools +%description tools +%{name}-tools provides various tools related to %{name} usage. + + +%package docs +Summary: %{name} documentation +%description docs +%{name}-docs provides documentation files regarding %{name}. + + +%package -n qemu-pr-helper +Summary: qemu-pr-helper utility for %{name} +%description -n qemu-pr-helper +This package provides the qemu-pr-helper utility that is required for certain +SCSI features. + + +%package -n qemu-img +Summary: QEMU command line tool for manipulating disk images +%description -n qemu-img +This package provides a command line tool for manipulating disk images. %package -n qemu-guest-agent @@ -348,9 +430,8 @@ Summary: QEMU guest agent Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units - %description -n qemu-guest-agent -qemu-kvm is an open source virtualizer that provides hardware emulation for +%{name} is an open source virtualizer that provides hardware emulation for the KVM hypervisor. This package provides an agent to run inside guests, which communicates @@ -358,23 +439,24 @@ with the host over a virtio-serial channel named "org.qemu.guest_agent.0" This package does not need to be installed on the host OS. + %package tests -Summary: tests for the qemu-kvm package +Summary: tests for the %{name} package Requires: %{name} = %{epoch}:%{version}-%{release} %define testsdir %{_libdir}/%{name}/tests-src %description tests -The qemu-kvm-tests rpm contains tests that can be used to verify -the functionality of the installed qemu-kvm package +The %{name}-tests rpm contains tests that can be used to verify +the functionality of the installed %{name} package Install this package if you want access to the avocado_qemu tests, or qemu-iotests. + %package block-curl Summary: QEMU CURL block driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} - %description block-curl This package provides the additional CURL block driver for QEMU. @@ -382,21 +464,21 @@ Install this package if you want to access remote disks over http, https, ftp and other transports provided by the CURL library. +%if %{have_block_rbd} %package block-rbd Summary: QEMU Ceph/RBD block driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} - %description block-rbd This package provides the additional Ceph/RBD block driver for QEMU. Install this package if you want to access remote Ceph volumes using the rbd protocol. +%endif %package block-ssh Summary: QEMU SSH block driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} - %description block-ssh This package provides the additional SSH block driver for QEMU. @@ -411,7 +493,6 @@ Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} Requires: mesa-libGL Requires: mesa-libEGL Requires: mesa-dri-drivers - %description ui-opengl This package provides opengl support. %endif @@ -427,11 +508,7 @@ This package provides usbredir support. %endif %prep -%if 0%{?rcversion} -%setup -n qemu-%{version}-%{?rcversion} -%else -%setup -n qemu-%{version} -%endif +%setup -q -n qemu-%{version}%{?rcstr} %autopatch -p1 %global qemu_kvm_build qemu_kvm_build @@ -439,14 +516,11 @@ mkdir -p %{qemu_kvm_build} %build -%global buildarch %{kvm_target}-softmmu - # --build-id option is used for giving info to the debug packages. buildldflags="VL_LDFLAGS=-Wl,--build-id" -%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle - %define disable_everything \\\ + --audio-drv-list= \\\ --disable-attr \\\ --disable-auth-pam \\\ --disable-avx2 \\\ @@ -566,32 +640,54 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-xfsctl \\\ --disable-xkbcommon \\\ --disable-zstd \\\ - --with-git-submodules=ignore + --with-git-submodules=ignore \\\ + --without-default-devices + + +run_configure() { + ../configure \ + --cc=%{__cc} \ + --cxx=%{__cxx} \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --datadir="%{_datadir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{_docdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-suffix="%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ + --meson="%{__meson}" \ + --enable-trace-backend=dtrace \ + --with-coroutine=ucontext \ + --with-git=git \ + --tls-priority=@QEMU,SYSTEM \ + %{disable_everything} \ + "$@" + + echo "config-host.mak contents:" + echo "===" + cat config-host.mak + echo "===" +} + pushd %{qemu_kvm_build} -../configure \ - --prefix="%{_prefix}" \ - --libdir="%{_libdir}" \ - --datadir="%{_datadir}" \ - --sysconfdir="%{_sysconfdir}" \ - --interp-prefix=%{_prefix}/qemu-%M \ - --localstatedir="%{_localstatedir}" \ - --docdir="%{_docdir}" \ - --libexecdir="%{_libexecdir}" \ - --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ - --extra-cflags="%{optflags}" \ - --with-pkgversion="%{name}-%{version}-%{release}" \ - --with-suffix="%{name}" \ - --firmwarepath=%{_prefix}/share/qemu-firmware \ - --meson="%{__meson}" \ - --target-list="%{buildarch}" \ - --block-drv-rw-whitelist=%{block_drivers_list} \ - --audio-drv-list= \ - --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ - --with-coroutine=ucontext \ - --with-git=git \ - --tls-priority=@QEMU,SYSTEM \ - %{disable_everything} \ +run_configure \ +%if %{defined target_list} + --target-list="%{target_list}" \ +%endif +%if %{defined block_drivers_rw_list} + --block-drv-rw-whitelist=%{block_drivers_rw_list} \ +%endif +%if %{defined block_drivers_ro_list} + --block-drv-ro-whitelist=%{block_drivers_ro_list} \ +%endif + --enable-block-drv-whitelist-in-tools \ --enable-attr \ %ifarch %{ix86} x86_64 --enable-avx2 \ @@ -641,7 +737,6 @@ pushd %{qemu_kvm_build} --enable-tcg \ --enable-tools \ --enable-tpm \ - --enable-trace-backend=dtrace \ %if %{have_usbredir} --enable-usb-redir \ %endif @@ -655,17 +750,13 @@ pushd %{qemu_kvm_build} --enable-vnc \ --enable-vnc-png \ --enable-vnc-sasl \ +%if %{enable_werror} --enable-werror \ +%endif --enable-xkbcommon \ - --without-default-devices -echo "qemu-kvm config-host.mak contents:" -echo "===" -cat config-host.mak -echo "===" - -%ifarch %{power64} +%if %{tools_only} make V=1 %{?_smp_mflags} $buildldflags qemu-img make V=1 %{?_smp_mflags} $buildldflags qemu-io make V=1 %{?_smp_mflags} $buildldflags qemu-nbd @@ -678,8 +769,11 @@ make V=1 %{?_smp_mflags} $buildldflags docs/qemu-storage-daemon-qmp-ref.7 make V=1 %{?_smp_mflags} $buildldflags qga/qemu-ga make V=1 %{?_smp_mflags} $buildldflags docs/qemu-ga.8 -%else +# endif tools_only +%endif + +%if !%{tools_only} make V=1 %{?_smp_mflags} $buildldflags # Setup back compat qemu-kvm binary @@ -702,252 +796,213 @@ cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ %endif - -gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl -%endif +gcc %{_sourcedir}/ksmctl.c $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl popd +# endif !tools_only +%endif + + %install -pushd %{qemu_kvm_build} -%define _udevdir %(pkg-config --variable=udevdir udev) -%define _udevrulesdir %{_udevdir}/rules.d - -%ifarch %{power64} -install -D -p -m 0755 qemu-img $RPM_BUILD_ROOT%{_bindir}/qemu-img -install -D -p -m 0755 qemu-io $RPM_BUILD_ROOT%{_bindir}/qemu-io -install -D -p -m 0755 qemu-nbd $RPM_BUILD_ROOT%{_bindir}/qemu-nbd -install -D -p -m 0755 storage-daemon/qemu-storage-daemon $RPM_BUILD_ROOT%{_bindir}/qemu-storage-daemon - -mkdir -p $RPM_BUILD_ROOT%{_mandir}/man1/ -mkdir -p $RPM_BUILD_ROOT%{_mandir}/man7/ -mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8/ - -install -D -p -m 644 docs/qemu-img.1* $RPM_BUILD_ROOT%{_mandir}/man1 -install -D -p -m 644 docs/qemu-nbd.8* $RPM_BUILD_ROOT%{_mandir}/man8 -install -D -p -p -m 644 docs/qemu-storage-daemon.1* $RPM_BUILD_ROOT%{_mandir}/man1 -install -D -p -p -m 644 docs/qemu-storage-daemon-qmp-ref.7* $RPM_BUILD_ROOT%{_mandir}/man7 -install -D -p -m 644 docs/qemu-ga.8* $RPM_BUILD_ROOT%{_mandir}/man8 -%else -install -D -p -m 0644 %{SOURCE4} $RPM_BUILD_ROOT%{_unitdir}/ksm.service -install -D -p -m 0644 %{SOURCE5} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ksm -install -D -p -m 0755 ksmctl $RPM_BUILD_ROOT%{_libexecdir}/ksmctl - -install -D -p -m 0644 %{SOURCE7} $RPM_BUILD_ROOT%{_unitdir}/ksmtuned.service -install -D -p -m 0755 %{SOURCE8} $RPM_BUILD_ROOT%{_sbindir}/ksmtuned -install -D -p -m 0644 %{SOURCE9} $RPM_BUILD_ROOT%{_sysconfdir}/ksmtuned.conf -install -D -p -m 0644 %{SOURCE26} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/vhost.conf -%ifarch s390x - install -D -p -m 0644 %{SOURCE30} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf -%else -%ifarch %{ix86} x86_64 - install -D -p -m 0644 %{SOURCE31} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf -%else - install -D -p -m 0644 %{SOURCE27} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf -%endif -%endif - -mkdir -p $RPM_BUILD_ROOT%{_bindir}/ -mkdir -p $RPM_BUILD_ROOT%{_udevrulesdir}/ -mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} - -# Create new directories and put them all under tests-src -mkdir -p $RPM_BUILD_ROOT%{testsdir}/python -mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests -mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/acceptance -mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests -mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp - - -install -m 0644 scripts/dump-guest-memory.py \ - $RPM_BUILD_ROOT%{_datadir}/%{name} - -# Install avocado_qemu tests -cp -R tests/acceptance/* $RPM_BUILD_ROOT%{testsdir}/tests/acceptance/ - -# Install qemu.py and qmp/ scripts required to run avocado_qemu tests -cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python -cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp -install -p -m 0755 ../tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ - -# Install qemu-iotests -cp -R ../tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ -cp -ur tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ -# Avoid ambiguous 'python' interpreter name -find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; -find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; -find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/\(python\|python3\)+%{__python3}+' {} \; - -install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README - -make DESTDIR=$RPM_BUILD_ROOT \ - sharedir="%{_datadir}/%{name}" \ - datadir="%{_datadir}/%{name}" \ - install - -mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset - -# Move vhost-user JSON files to the standard "qemu" directory -mkdir -p $RPM_BUILD_ROOT%{_datadir}/qemu -mv $RPM_BUILD_ROOT%{_datadir}/%{name}/vhost-user $RPM_BUILD_ROOT%{_datadir}/qemu/ -%endif - # Install qemu-guest-agent service and udev rules install -D -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir}/qemu-guest-agent.service install -D -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga install -D -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrulesdir}/99-qemu-guest-agent.rules -# - the fsfreeze hook script: -install -D --preserve-timestamps \ - scripts/qemu-guest-agent/fsfreeze-hook \ - $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook -# Workaround for the missing /etc/qemu-kvm/fsfreeze-hook -# Please, do not carry this over to RHEL-9 -mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/ -ln -s %{_sysconfdir}/qemu-ga/fsfreeze-hook \ - $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/fsfreeze-hook -# - the directory for user scripts: -mkdir $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook.d - -# - and the fsfreeze script samples: -mkdir --parents $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ -install --preserve-timestamps --mode=0644 \ - scripts/qemu-guest-agent/fsfreeze-hook.d/*.sample \ - $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ - -# - Install dedicated log directory: -mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ - -mkdir -p $RPM_BUILD_ROOT%{_bindir} -install -c -m 0755 qga/qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga - -%ifnarch %{power64} -mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 - -install -m 0755 %{kvm_target}-softmmu/qemu-system-%{kvm_target} $RPM_BUILD_ROOT%{_libexecdir}/qemu-kvm -install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ -install -m 0644 qemu-kvm-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ -install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ -install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d" -install -c -m 0644 scripts/systemtap/script.d/qemu_kvm.stp "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d/" -install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d" -install -c -m 0644 scripts/systemtap/conf.d/qemu_kvm.conf "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d/" +# Install qemu-ga fsfreeze bits +mkdir -p %{buildroot}%{_sysconfdir}/qemu-ga/fsfreeze-hook.d +install -p scripts/qemu-guest-agent/fsfreeze-hook %{buildroot}%{_sysconfdir}/qemu-ga/fsfreeze-hook +mkdir -p %{buildroot}%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ +install -p -m 0644 scripts/qemu-guest-agent/fsfreeze-hook.d/*.sample %{buildroot}%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ +mkdir -p -v %{buildroot}%{_localstatedir}/log/qemu-ga/ -rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop -rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} -rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp -rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp -rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp -rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp +%if %{tools_only} +pushd %{qemu_kvm_build} +install -D -p -m 0755 qga/qemu-ga %{buildroot}%{_bindir}/qemu-ga +install -D -p -m 0755 qemu-img %{buildroot}%{_bindir}/qemu-img +install -D -p -m 0755 qemu-io %{buildroot}%{_bindir}/qemu-io +install -D -p -m 0755 qemu-nbd %{buildroot}%{_bindir}/qemu-nbd +install -D -p -m 0755 storage-daemon/qemu-storage-daemon %{buildroot}%{_bindir}/qemu-storage-daemon + +mkdir -p %{buildroot}%{_mandir}/man1/ +mkdir -p %{buildroot}%{_mandir}/man7/ +mkdir -p %{buildroot}%{_mandir}/man8/ + +install -D -p -m 644 docs/qemu-img.1* %{buildroot}%{_mandir}/man1 +install -D -p -m 644 docs/qemu-nbd.8* %{buildroot}%{_mandir}/man8 +install -D -p -m 644 docs/qemu-storage-daemon.1* %{buildroot}%{_mandir}/man1 +install -D -p -m 644 docs/qemu-storage-daemon-qmp-ref.7* %{buildroot}%{_mandir}/man7 +install -D -p -m 644 docs/qemu-ga.8* %{buildroot}%{_mandir}/man8 +popd +# endif tools_only +%endif + +%if !%{tools_only} +install -D -p -m 0644 %{_sourcedir}/ksm.service %{buildroot}%{_unitdir}/ksm.service +install -D -p -m 0644 %{_sourcedir}/ksm.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/ksm +install -D -p -m 0755 %{qemu_kvm_build}/ksmctl %{buildroot}%{_libexecdir}/ksmctl + +install -D -p -m 0644 %{_sourcedir}/ksmtuned.service %{buildroot}%{_unitdir}/ksmtuned.service +install -D -p -m 0755 %{_sourcedir}/ksmtuned %{buildroot}%{_sbindir}/ksmtuned +install -D -p -m 0644 %{_sourcedir}/ksmtuned.conf %{buildroot}%{_sysconfdir}/ksmtuned.conf +install -D -p -m 0644 %{_sourcedir}/vhost.conf %{buildroot}%{_sysconfdir}/modprobe.d/vhost.conf +install -D -p -m 0644 %{modprobe_kvm_conf} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf + +# Create new directories and put them all under tests-src +mkdir -p %{buildroot}%{testsdir}/python +mkdir -p %{buildroot}%{testsdir}/tests +mkdir -p %{buildroot}%{testsdir}/tests/acceptance +mkdir -p %{buildroot}%{testsdir}/tests/qemu-iotests +mkdir -p %{buildroot}%{testsdir}/scripts/qmp + + +install -m 0644 scripts/dump-guest-memory.py \ + %{buildroot}%{_datadir}/%{name} + +# Install avocado_qemu tests +cp -R %{qemu_kvm_build}/tests/acceptance/* %{buildroot}%{testsdir}/tests/acceptance/ + +# Install qemu.py and qmp/ scripts required to run avocado_qemu tests +cp -R %{qemu_kvm_build}/python/qemu %{buildroot}%{testsdir}/python +cp -R %{qemu_kvm_build}/scripts/qmp/* %{buildroot}%{testsdir}/scripts/qmp +install -p -m 0755 tests/Makefile.include %{buildroot}%{testsdir}/tests/ + +# Install qemu-iotests +cp -R tests/qemu-iotests/* %{buildroot}%{testsdir}/tests/qemu-iotests/ +cp -ur %{qemu_kvm_build}/tests/qemu-iotests/* %{buildroot}%{testsdir}/tests/qemu-iotests/ + +install -p -m 0644 %{_sourcedir}/README.tests %{buildroot}%{testsdir}/README + +# Do the actual qemu tree install +pushd %{qemu_kvm_build} +make DESTDIR=%{buildroot} \ + sharedir="%{_datadir}/%{name}" \ + datadir="%{_datadir}/%{name}" \ + install +popd + +mkdir -p %{buildroot}%{_datadir}/systemtap/tapset + +# Move vhost-user JSON files to the standard "qemu" directory +mkdir -p %{buildroot}%{_datadir}/qemu +mv %{buildroot}%{_datadir}/%{name}/vhost-user %{buildroot}%{_datadir}/qemu/ + +install -m 0755 %{qemu_kvm_build}/%{kvm_target}-softmmu/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm +install -m 0644 %{qemu_kvm_build}/qemu-kvm.stp %{buildroot}%{_datadir}/systemtap/tapset/ +install -m 0644 %{qemu_kvm_build}/qemu-kvm-log.stp %{buildroot}%{_datadir}/systemtap/tapset/ +install -m 0644 %{qemu_kvm_build}/qemu-kvm-simpletrace.stp %{buildroot}%{_datadir}/systemtap/tapset/ +install -d -m 0755 "%{buildroot}%{_datadir}/%{name}/systemtap/script.d" +install -c -m 0644 %{qemu_kvm_build}/scripts/systemtap/script.d/qemu_kvm.stp "%{buildroot}%{_datadir}/%{name}/systemtap/script.d/" +install -d -m 0755 "%{buildroot}%{_datadir}/%{name}/systemtap/conf.d" +install -c -m 0644 %{qemu_kvm_build}/scripts/systemtap/conf.d/qemu_kvm.conf "%{buildroot}%{_datadir}/%{name}/systemtap/conf.d/" + + +rm %{buildroot}/%{_datadir}/applications/qemu.desktop +rm %{buildroot}%{_bindir}/qemu-system-%{kvm_target} +rm %{buildroot}%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp +rm %{buildroot}%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp +rm %{buildroot}%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp +rm %{buildroot}%{_bindir}/elf2dmp # Install simpletrace -install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +install -m 0755 scripts/simpletrace.py %{buildroot}%{_datadir}/%{name}/simpletrace.py # Avoid ambiguous 'python' interpreter name -mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool -install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py -mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend -install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend scripts/tracetool/backend/*.py -mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format -install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py +mkdir -p %{buildroot}%{_datadir}/%{name}/tracetool +install -m 0644 -t %{buildroot}%{_datadir}/%{name}/tracetool scripts/tracetool/*.py +mkdir -p %{buildroot}%{_datadir}/%{name}/tracetool/backend +install -m 0644 -t %{buildroot}%{_datadir}/%{name}/tracetool/backend scripts/tracetool/backend/*.py +mkdir -p %{buildroot}%{_datadir}/%{name}/tracetool/format +install -m 0644 -t %{buildroot}%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py -mkdir -p $RPM_BUILD_ROOT%{qemudocdir} -install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} ../README.rst ../README.systemtap ../COPYING ../COPYING.LIB ../LICENSE ../docs/interop/qmp-spec.txt +mkdir -p %{buildroot}%{qemudocdir} +install -p -m 0644 -t %{buildroot}%{qemudocdir} README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt # Rename man page -pushd ${RPM_BUILD_ROOT}%{_mandir}/man1/ +pushd %{buildroot}%{_mandir}/man1/ for fn in qemu.1*; do mv $fn "qemu-kvm${fn#qemu}" done popd -chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* -chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* -install -D -p -m 0644 ../qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf - -# Install keymaps -pushd pc-bios/keymaps -for kmp in *; do - install $kmp ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/ -done -rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/*.stamp -popd +install -D -p -m 0644 qemu.sasl %{buildroot}%{_sysconfdir}/sasl2/%{name}.conf # Provided by package openbios -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc32 -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc64 +rm -rf %{buildroot}%{_datadir}/%{name}/openbios-ppc +rm -rf %{buildroot}%{_datadir}/%{name}/openbios-sparc32 +rm -rf %{buildroot}%{_datadir}/%{name}/openbios-sparc64 # Provided by package SLOF -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/slof.bin +rm -rf %{buildroot}%{_datadir}/%{name}/slof.bin # Remove unpackaged files. -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/palcode-clipper -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/petalogix*.dtb -rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bamboo.dtb -rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/ppc_rom.bin -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-zipl.rom -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot.e500 -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu_vga.ndrv -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qboot.rom +rm -rf %{buildroot}%{_datadir}/%{name}/palcode-clipper +rm -rf %{buildroot}%{_datadir}/%{name}/petalogix*.dtb +rm -f %{buildroot}%{_datadir}/%{name}/bamboo.dtb +rm -f %{buildroot}%{_datadir}/%{name}/ppc_rom.bin +rm -rf %{buildroot}%{_datadir}/%{name}/s390-zipl.rom +rm -rf %{buildroot}%{_datadir}/%{name}/u-boot.e500 +rm -rf %{buildroot}%{_datadir}/%{name}/qemu_vga.ndrv +rm -rf %{buildroot}%{_datadir}/%{name}/skiboot.lid +rm -rf %{buildroot}%{_datadir}/%{name}/qboot.rom -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/canyonlands.dtb -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin +rm -rf %{buildroot}%{_datadir}/%{name}/s390-ccw.img +rm -rf %{buildroot}%{_datadir}/%{name}/s390-netboot.img +rm -rf %{buildroot}%{_datadir}/%{name}/hppa-firmware.img +rm -rf %{buildroot}%{_datadir}/%{name}/canyonlands.dtb +rm -rf %{buildroot}%{_datadir}/%{name}/u-boot-sam460-20100605.bin -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt +rm -rf %{buildroot}%{_datadir}/%{name}/firmware +rm -rf %{buildroot}%{_datadir}/%{name}/edk2-*.fd +rm -rf %{buildroot}%{_datadir}/%{name}/edk2-licenses.txt -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-sifive_u-fw_jump.bin -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-generic-fw_dynamic.* -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/npcm7xx_bootrom.bin +rm -rf %{buildroot}%{_datadir}/%{name}/opensbi-riscv32-sifive_u-fw_jump.bin +rm -rf %{buildroot}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin +rm -rf %{buildroot}%{_datadir}/%{name}/opensbi-riscv32-generic-fw_dynamic.* +rm -rf %{buildroot}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin +rm -rf %{buildroot}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin +rm -rf %{buildroot}%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* +rm -rf %{buildroot}%{_datadir}/%{name}/qemu-nsis.bmp +rm -rf %{buildroot}%{_datadir}/%{name}/npcm7xx_bootrom.bin # Remove virtfs-proxy-helper files -rm -rf ${RPM_BUILD_ROOT}%{_libexecdir}/virtfs-proxy-helper -rm -rf ${RPM_BUILD_ROOT}%{_mandir}/man1/virtfs-proxy-helper* +rm -rf %{buildroot}%{_libexecdir}/virtfs-proxy-helper +rm -rf %{buildroot}%{_mandir}/man1/virtfs-proxy-helper* %ifarch s390x # Use the s390-*.img that we've just built, not the pre-built ones - install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ - install -m 0644 pc-bios/s390-ccw/s390-netboot.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ + install -m 0644 %{qemu_kvm_build}/pc-bios/s390-ccw/s390-ccw.img %{buildroot}%{_datadir}/%{name}/ + install -m 0644 %{qemu_kvm_build}/pc-bios/s390-ccw/s390-netboot.img %{buildroot}%{_datadir}/%{name}/ %else - rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so + rm -rf %{buildroot}%{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so %endif %ifnarch x86_64 - rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin - rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin - rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot.bin - rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pvh.bin + rm -rf %{buildroot}%{_datadir}/%{name}/kvmvapic.bin + rm -rf %{buildroot}%{_datadir}/%{name}/linuxboot.bin + rm -rf %{buildroot}%{_datadir}/%{name}/multiboot.bin + rm -rf %{buildroot}%{_datadir}/%{name}/pvh.bin %endif # Remove sparc files -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,tcx.bin -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin +rm -rf %{buildroot}%{_datadir}/%{name}/QEMU,tcx.bin +rm -rf %{buildroot}%{_datadir}/%{name}/QEMU,cgthree.bin # Remove ivshmem example programs -rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client -rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server +rm -rf %{buildroot}%{_bindir}/ivshmem-client +rm -rf %{buildroot}%{_bindir}/ivshmem-server # Remove efi roms -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom +rm -rf %{buildroot}%{_datadir}/%{name}/efi*.rom # Provided by package ipxe -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pxe*rom +rm -rf %{buildroot}%{_datadir}/%{name}/pxe*rom # Provided by package vgabios -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/vgabios*bin +rm -rf %{buildroot}%{_datadir}/%{name}/vgabios*bin # Provided by package seabios -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bios*.bin +rm -rf %{buildroot}%{_datadir}/%{name}/bios*.bin # Provided by package sgabios -rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/sgabios.bin +rm -rf %{buildroot}%{_datadir}/%{name}/sgabios.bin # the pxe gpxe images will be symlinks to the images on # /usr/share/ipxe, as QEMU doesn't know how to look @@ -984,48 +1039,56 @@ rom_link() { %endif %if %{have_modules_load} - install -D -p -m 644 %{SOURCE21} $RPM_BUILD_ROOT%{_sysconfdir}/modules-load.d/kvm.conf + install -D -p -m 644 %{_sourcedir}/modules-load.conf %{buildroot}%{_sysconfdir}/modules-load.d/kvm.conf %endif %if %{have_memlock_limits} - install -D -p -m 644 %{SOURCE28} $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/95-kvm-memlock.conf + install -D -p -m 644 %{_sourcedir}/95-kvm-memlock.conf %{buildroot}%{_sysconfdir}/security/limits.d/95-kvm-memlock.conf %endif # Install rules to use the bridge helper with libvirt's virbr0 -install -D -m 0644 %{SOURCE12} $RPM_BUILD_ROOT%{_sysconfdir}/%{name}/bridge.conf +install -D -m 0644 %{_sourcedir}/bridge.conf %{buildroot}%{_sysconfdir}/%{name}/bridge.conf # Install qemu-pr-helper service -install -m 0644 %{_sourcedir}/qemu-pr-helper.service %{buildroot}%{_unitdir} -install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} - -find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f +install -m 0644 contrib/systemd/qemu-pr-helper.service %{buildroot}%{_unitdir} +install -m 0644 contrib/systemd/qemu-pr-helper.socket %{buildroot}%{_unitdir} # We need to make the block device modules and other qemu SO files executable # otherwise RPM won't pick up their dependencies. -chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/*.so +chmod +x %{buildroot}%{_libdir}/%{name}/*.so + +# Remove docs we don't care about +find %{buildroot}%{qemudocdir} -name .buildinfo -delete +rm -rf %{buildroot}%{qemudocdir}/specs + +# endif !tools_only +%endif -# Remove buildinfo -rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo -rm -rf $RPM_BUILD_ROOT%{qemudocdir}/system/.buildinfo -rm -rf $RPM_BUILD_ROOT%{qemudocdir}/tools/.buildinfo -rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo -rm -rf $RPM_BUILD_ROOT%{qemudocdir}/devel/.buildinfo -rm -rf $RPM_BUILD_ROOT%{qemudocdir}/.buildinfo -# Remove spec -rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs -popd %check +%if !%{tools_only} + pushd %{qemu_kvm_build} -echo "Testing qemu-kvm-build" +echo "Testing %{name}-build" make check V=1 popd -%post -n qemu-kvm-common -%systemd_post ksm.service -%systemd_post ksmtuned.service +# endif !tools_only +%endif + + +%post -n qemu-guest-agent +%systemd_post qemu-guest-agent.service +%preun -n qemu-guest-agent +%systemd_preun qemu-guest-agent.service +%postun -n qemu-guest-agent +%systemd_postun_with_restart qemu-guest-agent.service + + +%if !%{tools_only} +%post common getent group kvm >/dev/null || groupadd -g 36 -r kvm getent group qemu >/dev/null || groupadd -g 107 -r qemu getent passwd qemu >/dev/null || \ @@ -1042,63 +1105,79 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ fi %endif -%preun -n qemu-kvm-common +%systemd_post ksm.service +%systemd_post ksmtuned.service + +%preun common %systemd_preun ksm.service %systemd_preun ksmtuned.service -%postun -n qemu-kvm-common +%postun common %systemd_postun_with_restart ksm.service %systemd_postun_with_restart ksmtuned.service +# endif !tools_only %endif -%post -n qemu-guest-agent -%systemd_post qemu-guest-agent.service -%preun -n qemu-guest-agent -%systemd_preun qemu-guest-agent.service -%postun -n qemu-guest-agent -%systemd_postun_with_restart qemu-guest-agent.service -%ifnarch %{power64} + +%files -n qemu-img +%{_bindir}/qemu-img +%{_bindir}/qemu-io +%{_bindir}/qemu-nbd +%{_bindir}/qemu-storage-daemon +%{_mandir}/man1/qemu-img.1* +%{_mandir}/man8/qemu-nbd.8* +%{_mandir}/man1/qemu-storage-daemon.1* +%{_mandir}/man7/qemu-storage-daemon-qmp-ref.7* + + +%files -n qemu-guest-agent +%doc COPYING README.rst +%{_bindir}/qemu-ga +%{_mandir}/man8/qemu-ga.8* +%{_unitdir}/qemu-guest-agent.service +%{_udevrulesdir}/99-qemu-guest-agent.rules +%config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga +%{_sysconfdir}/qemu-ga +%{_datadir}/%{name}/qemu-ga +%dir %{_localstatedir}/log/qemu-ga + + +%if !%{tools_only} %files # Deliberately empty -%files -n qemu-kvm-docs -%dir %{qemudocdir} -%doc %{qemudocdir}/genindex.html -%doc %{qemudocdir}/search.html -%doc %{qemudocdir}/objects.inv -%doc %{qemudocdir}/searchindex.js -%doc %{qemudocdir}/README.rst -%doc %{qemudocdir}/COPYING -%doc %{qemudocdir}/COPYING.LIB -%doc %{qemudocdir}/LICENSE -%doc %{qemudocdir}/README.systemtap -%doc %{qemudocdir}/qmp-spec.txt -%doc %{qemudocdir}/interop/* -%doc %{qemudocdir}/index.html -%doc %{qemudocdir}/system/* -%doc %{qemudocdir}/tools/* -%doc %{qemudocdir}/user/* -%doc %{qemudocdir}/devel/* -%doc %{qemudocdir}/_static/* - -%files -n qemu-kvm-common -%{_mandir}/man7/qemu-qmp-ref.7* -%{_mandir}/man7/qemu-cpu-models.7* +%files tools %{_bindir}/qemu-keymap -%{_bindir}/qemu-pr-helper %{_bindir}/qemu-edid %{_bindir}/qemu-trace-stap +%{_datadir}/%{name}/simpletrace.py* +%{_datadir}/%{name}/tracetool/*.py* +%{_datadir}/%{name}/tracetool/backend/*.py* +%{_datadir}/%{name}/tracetool/format/*.py* +%{_datadir}/%{name}/dump-guest-memory.py* +%{_datadir}/%{name}/trace-events-all +%{_mandir}/man1/qemu-trace-stap.1* + +%files -n qemu-pr-helper +%{_bindir}/qemu-pr-helper %{_unitdir}/qemu-pr-helper.service %{_unitdir}/qemu-pr-helper.socket -%{_mandir}/man7/qemu-ga-ref.7* %{_mandir}/man8/qemu-pr-helper.8* + +%files docs +%doc %{qemudocdir} + +%files common +%license COPYING COPYING.LIB LICENSE +%{_mandir}/man7/qemu-qmp-ref.7* +%{_mandir}/man7/qemu-cpu-models.7* +%{_mandir}/man7/qemu-ga-ref.7* %{_mandir}/man1/virtiofsd.1* %dir %{_datadir}/%{name}/ %{_datadir}/%{name}/keymaps/ %{_mandir}/man1/%{name}.1* -%{_mandir}/man1/qemu-trace-stap.1* %{_mandir}/man7/qemu-block-drivers.7* %attr(4755, -, -) %{_libexecdir}/qemu-bridge-helper %config(noreplace) %{_sysconfdir}/sasl2/%{name}.conf @@ -1113,10 +1192,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %config(noreplace) %{_sysconfdir}/%{name}/bridge.conf %config(noreplace) %{_sysconfdir}/modprobe.d/vhost.conf %config(noreplace) %{_sysconfdir}/modprobe.d/kvm.conf -%{_datadir}/%{name}/simpletrace.py* -%{_datadir}/%{name}/tracetool/*.py* -%{_datadir}/%{name}/tracetool/backend/*.py* -%{_datadir}/%{name}/tracetool/format/*.py* %ifarch x86_64 %{_datadir}/%{name}/bios.bin @@ -1144,12 +1219,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/efi-e1000e.rom %{_datadir}/%{name}/efi-virtio.rom %{_datadir}/%{name}/efi-rtl8139.rom - %{_libdir}/qemu-kvm/hw-display-virtio-vga.so + %{_libdir}/%{name}/hw-display-virtio-vga.so %endif %{_datadir}/icons/* %{_datadir}/%{name}/linuxboot_dma.bin -%{_datadir}/%{name}/dump-guest-memory.py* -%{_datadir}/%{name}/trace-events-all %if %{have_modules_load} %{_sysconfdir}/modules-load.d/kvm.conf %endif @@ -1162,7 +1235,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ # most other paths we use it's "qemu" instead of "qemu-kvm". %{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json -%files -n qemu-kvm-core +%files core %{_libexecdir}/qemu-kvm %{_datadir}/systemtap/tapset/qemu-kvm.stp %{_datadir}/systemtap/tapset/qemu-kvm-log.stp @@ -1170,62 +1243,95 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp %{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf -%{_libdir}/qemu-kvm/hw-display-virtio-gpu.so +%{_libdir}/%{name}/hw-display-virtio-gpu.so %ifarch s390x - %{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so + %{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so %else - %{_libdir}/qemu-kvm/hw-display-virtio-gpu-pci.so -%endif + %{_libdir}/%{name}/hw-display-virtio-gpu-pci.so %endif -%files -n qemu-img -%{_bindir}/qemu-img -%{_bindir}/qemu-io -%{_bindir}/qemu-nbd -%{_bindir}/qemu-storage-daemon -%{_mandir}/man1/qemu-img.1* -%{_mandir}/man8/qemu-nbd.8* -%{_mandir}/man1/qemu-storage-daemon.1* -%{_mandir}/man7/qemu-storage-daemon-qmp-ref.7* - -%files -n qemu-guest-agent -%doc COPYING README.rst -%{_bindir}/qemu-ga -%{_mandir}/man8/qemu-ga.8* -%{_unitdir}/qemu-guest-agent.service -%{_udevrulesdir}/99-qemu-guest-agent.rules -%config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga -%{_sysconfdir}/qemu-ga -%{_sysconfdir}/qemu-kvm/fsfreeze-hook -%{_datadir}/%{name}/qemu-ga -%dir %{_localstatedir}/log/qemu-ga - -%ifnarch %{power64} %files tests %{testsdir} %files block-curl -%{_libdir}/qemu-kvm/block-curl.so - +%{_libdir}/%{name}/block-curl.so +%if %{have_block_rbd} %files block-rbd -%{_libdir}/qemu-kvm/block-rbd.so - +%{_libdir}/%{name}/block-rbd.so +%endif %files block-ssh -%{_libdir}/qemu-kvm/block-ssh.so +%{_libdir}/%{name}/block-ssh.so %if %{have_opengl} %files ui-opengl - %{_libdir}/qemu-kvm/ui-egl-headless.so - %{_libdir}/qemu-kvm/ui-opengl.so +%{_libdir}/%{name}/ui-egl-headless.so +%{_libdir}/%{name}/ui-opengl.so %endif %if %{have_usbredir} %files hw-usbredir - %{_libdir}/qemu-kvm/hw-usb-redirect.so + %{_libdir}/%{name}/hw-usb-redirect.so %endif + +# endif !tools_only %endif %changelog +* Sun Jul 25 2021 Miroslav Rezanina - 6.0.0-10 +- kvm-s390x-css-Introduce-an-ESW-struct.patch [bz#1957194] +- kvm-s390x-css-Split-out-the-IRB-sense-data.patch [bz#1957194] +- kvm-s390x-css-Refactor-IRB-construction.patch [bz#1957194] +- kvm-s390x-css-Add-passthrough-IRB.patch [bz#1957194] +- kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch [bz#1957194] +- kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch [bz#1957194] +- kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch [bz#1957194] +- kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch [bz#1957194] +- kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch [bz#1957194] +- kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch [bz#1957194] +- kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch [bz#1957194] +- kvm-vhost-user-Fix-backends-without-multiqueue-support.patch [bz#1957194] +- kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch [bz#1957194] +- kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch [bz#1957194] +- kvm-osdep-provide-ROUND_DOWN-macro.patch [bz#1957194] +- kvm-block-backend-align-max_transfer-to-request-alignmen.patch [bz#1957194] +- kvm-block-add-max_hw_transfer-to-BlockLimits.patch [bz#1957194] +- kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch [bz#1957194] +- kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch [bz#1957782] +- kvm-spec-Restrict-block-drivers-in-tools.patch [bz#1957782] +- kvm-Move-tools-to-separate-package.patch [bz#1972285] +- kvm-Split-qemu-pr-helper-to-separate-package.patch [bz#1972300] +- kvm-spec-RPM_BUILD_ROOT-buildroot.patch [bz#1973029] +- kvm-spec-More-use-of-name-instead-of-qemu-kvm.patch [bz#1973029] +- kvm-spec-Use-qemu-pr-helper.service-from-qemu.git.patch [bz#1973029] +- kvm-spec-Use-_sourcedir-for-referencing-sources.patch [bz#1973029] +- kvm-spec-Add-tools_only.patch [bz#1973029] +- kvm-spec-build-Add-run_configure-helper.patch [bz#1973029] +- kvm-spec-build-Disable-more-bits-with-disable_everything.patch [bz#1973029] +- kvm-spec-build-Add-macros-for-some-configure-parameters.patch [bz#1973029] +- kvm-spec-files-Move-qemu-guest-agent-and-qemu-img-earlie.patch [bz#1973029] +- kvm-spec-install-Remove-redundant-bits.patch [bz#1973029] +- kvm-spec-install-Add-modprobe_kvm_conf-macro.patch [bz#1973029] +- kvm-spec-install-Remove-qemu-guest-agent-etc-qemu-kvm-us.patch [bz#1973029] +- kvm-spec-install-clean-up-qemu-ga-section.patch [bz#1973029] +- kvm-spec-install-Use-a-single-tools_only-section.patch [bz#1973029] +- kvm-spec-Make-tools_only-not-cross-spec-sections.patch [bz#1973029] +- kvm-spec-install-Limit-time-spent-in-qemu_kvm_build.patch [bz#1973029] +- kvm-spec-misc-syntactic-merges-with-Fedora.patch [bz#1973029] +- kvm-spec-Use-Fedora-s-pattern-for-specifying-rc-version.patch [bz#1973029] +- kvm-spec-files-don-t-use-fine-grained-docs-file-list.patch [bz#1973029] +- kvm-spec-files-Add-licenses-to-qemu-common-too.patch [bz#1973029] +- kvm-spec-install-Drop-python3-shebang-fixup.patch [bz#1973029] +- Resolves: bz#1957194 + (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) +- Resolves: bz#1957782 + (VMDK support should be read-only) +- Resolves: bz#1972285 + (Split out a qemu-kvm-tools subpackage) +- Resolves: bz#1972300 + (Split out a qemu-pr-helper subpackage) +- Resolves: bz#1973029 + (Spec file cleanups) + * Mon Jul 19 2021 Miroslav Rezanina - 6.0.0-9 - kvm-s390x-cpumodel-add-3931-and-3932.patch [bz#1932191] - kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch [bz#1957194] diff --git a/qemu-pr-helper.service b/qemu-pr-helper.service deleted file mode 100644 index a1d27b0..0000000 --- a/qemu-pr-helper.service +++ /dev/null @@ -1,15 +0,0 @@ -[Unit] -Description=Persistent Reservation Daemon for QEMU - -[Service] -WorkingDirectory=/tmp -Type=simple -ExecStart=/usr/bin/qemu-pr-helper -PrivateTmp=yes -ProtectSystem=strict -ReadWritePaths=/var/run -RestrictAddressFamilies=AF_UNIX -Restart=always -RestartSec=0 - -[Install] diff --git a/qemu-pr-helper.socket b/qemu-pr-helper.socket deleted file mode 100644 index 9d7c3e5..0000000 --- a/qemu-pr-helper.socket +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Persistent Reservation Daemon for QEMU - -[Socket] -ListenStream=/run/qemu-pr-helper.sock -SocketMode=0600 - -[Install] -WantedBy=multi-user.target From 457e3fe8c2e29b8fc43bb20bf5dc63b58311d873 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Sat, 7 Aug 2021 04:46:16 -0400 Subject: [PATCH 130/195] * Sat Aug 07 2021 Miroslav Rezanina - 6.0.0-11 - kvm-arm-virt-Register-iommu-as-a-class-property.patch [bz#1838608] - kvm-arm-virt-Register-its-as-a-class-property.patch [bz#1838608] - kvm-arm-virt-Enable-ARM-RAS-support.patch [bz#1838608] - kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch [bz#1972079] - kvm-spec-Remove-buildldflags.patch [bz#1973029] - kvm-spec-Use-make_build-macro.patch [bz#1973029] - kvm-spec-Drop-make-install-sharedir-and-datadir-usage.patch [bz#1973029] - kvm-spec-use-make_install-macro.patch [bz#1973029] - kvm-spec-parallelize-make-check.patch [bz#1973029] - kvm-spec-Drop-explicit-build-id.patch [bz#1973029] - kvm-spec-use-build_ldflags.patch [bz#1973029] - kvm-Move-virtiofsd-to-separate-package.patch [bz#1979728] - kvm-Utilize-firmware-configure-option.patch [bz#1980139] - Resolves: bz#1838608 (aarch64: Enable ARMv8 RAS virtualization support) - Resolves: bz#1972079 (Windows Installation blocked on 4k disk when using blk+raw+iothread) - Resolves: bz#1973029 (Spec file cleanups) - Resolves: bz#1979728 (Split out virtiofsd subpackage) - Resolves: bz#1980139 (Use configure --firmwarepath more) --- kvm-arm-virt-Enable-ARM-RAS-support.patch | 68 ++++++++ ...t-Register-iommu-as-a-class-property.patch | 54 +++++++ ...irt-Register-its-as-a-class-property.patch | 56 +++++++ ...ght-leak-in-request-padding-error-pa.patch | 71 +++++++++ qemu-kvm.spec | 148 +++++++++--------- 5 files changed, 319 insertions(+), 78 deletions(-) create mode 100644 kvm-arm-virt-Enable-ARM-RAS-support.patch create mode 100644 kvm-arm-virt-Register-iommu-as-a-class-property.patch create mode 100644 kvm-arm-virt-Register-its-as-a-class-property.patch create mode 100644 kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch diff --git a/kvm-arm-virt-Enable-ARM-RAS-support.patch b/kvm-arm-virt-Enable-ARM-RAS-support.patch new file mode 100644 index 0000000..a3881b8 --- /dev/null +++ b/kvm-arm-virt-Enable-ARM-RAS-support.patch @@ -0,0 +1,68 @@ +From 170a9c7dc044a0094b48c658e0d57c97c4b854e0 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 30 Jun 2021 13:38:03 +0200 +Subject: [PATCH 03/13] arm/virt: Enable ARM RAS support + +RH-Author: Eric Auger +RH-MergeRequest: 19: arm/virt: Support RAS +RH-Commit: [3/3] 1572368bb0f47463a1f6ffa3f5baa97242440c98 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1838608 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Igor Mammedov + +We want to support ARM RAS (Reliability, Availability & Serviceability). +So let's register the RAS property as a class property. It is +unset by default. + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index fe1111d527..0084935ec8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2281,7 +2281,6 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + visit_type_OnOffAuto(v, name, &vms->acpi, errp); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_ras(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2296,6 +2295,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) + vms->ras = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_mte(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -3013,6 +3013,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set the IOMMU type. " + "Valid values are none and smmuv3"); + ++ object_class_property_add_bool(oc, "ras", virt_get_ras, ++ virt_set_ras); ++ object_class_property_set_description(oc, "ras", ++ "Set on/off to enable/disable reporting host memory errors " ++ "to a KVM guest using ACPI and guest external abort exceptions"); ++ + object_class_property_add_bool(oc, "its", virt_get_its, + virt_set_its); + object_class_property_set_description(oc, "its", +@@ -3063,7 +3069,7 @@ static void rhel_virt_instance_init(Object *obj) + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; + +- /* Default disallows RAS instantiation and is non-configurable for RHEL */ ++ /* Default disallows RAS instantiation */ + vms->ras = false; + + /* MTE is disabled by default and non-configurable for RHEL */ +-- +2.27.0 + diff --git a/kvm-arm-virt-Register-iommu-as-a-class-property.patch b/kvm-arm-virt-Register-iommu-as-a-class-property.patch new file mode 100644 index 0000000..5235851 --- /dev/null +++ b/kvm-arm-virt-Register-iommu-as-a-class-property.patch @@ -0,0 +1,54 @@ +From 42e530c513914b83273ec8c6a29100eb3c737604 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 30 Jun 2021 07:28:38 -0400 +Subject: [PATCH 01/13] arm/virt: Register iommu as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 19: arm/virt: Support RAS +RH-Commit: [1/3] 48428de4deb5af94891aa5552fb9f4ea6d69acef (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1838608 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Igor Mammedov + +As done for highmem and gic-version, let's register the iommu property +as a class property. This is closer to the upstream code. This +change was originally made by upstream commit: +b91def7b8382 ("arm/virt: Register most properties as class properties") + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e4aa794f83..bdae24a753 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3008,6 +3008,11 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set GIC version. " + "Valid values are 2, 3, host and max"); + ++ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); ++ object_class_property_set_description(oc, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3056,10 +3061,6 @@ static void rhel_virt_instance_init(Object *obj) + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; +- object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); +- object_property_set_description(obj, "iommu", +- "Set the IOMMU type. " +- "Valid values are none and smmuv3"); + + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; +-- +2.27.0 + diff --git a/kvm-arm-virt-Register-its-as-a-class-property.patch b/kvm-arm-virt-Register-its-as-a-class-property.patch new file mode 100644 index 0000000..a0f9ff7 --- /dev/null +++ b/kvm-arm-virt-Register-its-as-a-class-property.patch @@ -0,0 +1,56 @@ +From 29d18bdaf3dad52a052b2b058cd8f74652a72f76 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 8 Jul 2021 05:14:47 -0400 +Subject: [PATCH 02/13] arm/virt: Register its as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 19: arm/virt: Support RAS +RH-Commit: [2/3] 95debacfd59d06ed2dcaeb120b20a280ff515434 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1838608 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Igor Mammedov + +As done for highmem, gic-version, iommu, let's register the its property +as a class property. This is closer to the upstream code. This +change was originally made by upstream commit: +b91def7b8382 ("arm/virt: Register most properties as class properties") + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bdae24a753..fe1111d527 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3013,6 +3013,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set the IOMMU type. " + "Valid values are none and smmuv3"); + ++ object_class_property_add_bool(oc, "its", virt_get_its, ++ virt_set_its); ++ object_class_property_set_description(oc, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3052,11 +3058,6 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; +- object_property_add_bool(obj, "its", virt_get_its, +- virt_set_its); +- object_property_set_description(obj, "its", +- "Set on/off to enable/disable " +- "ITS instantiation"); + } + + /* Default disallows iommu instantiation */ +-- +2.27.0 + diff --git a/kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch b/kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch new file mode 100644 index 0000000..6545209 --- /dev/null +++ b/kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch @@ -0,0 +1,71 @@ +From 872e82621b1341e8b96bda47f7f43dfffd356249 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 27 Jul 2021 17:49:23 +0200 +Subject: [PATCH 04/13] block: Fix in_flight leak in request padding error path + +RH-Author: Kevin Wolf +RH-MergeRequest: 31: block: Fix in_flight leak in request padding error path +RH-Commit: [1/1] a0d1bf38d9a69818cd6cefc3779f2988b484605a (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1972079 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz + +When bdrv_pad_request() fails in bdrv_co_preadv_part(), bs->in_flight +has been increased, but is never decreased again. This leads to a hang +when trying to drain the block node. + +This bug was observed with Windows guests which issue a request that +fully uses IOV_MAX during installation, so that when padding is +necessary (O_DIRECT with a 4k sector size block device on the host), +adding another entry causes failure. + +Call bdrv_dec_in_flight() to fix this. There is a larger problem to +solve here because this request shouldn't even fail, but Windows doesn't +seem to care and with this minimal fix the installation succeeds. So +given that we're already in freeze, let's take this minimal fix for 6.1. + +Fixes: 98ca45494fcd6bf0336ecd559e440b6de6ea4cd3 +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1972079 +Reported-by: Qing Wang +Signed-off-by: Kevin Wolf +Message-Id: <20210727154923.91067-1-kwolf@redhat.com> +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 87ab88025247b893aad5071fd38301b67be76d1a) +Signed-off-by: Kevin Wolf +Signed-off-by: Miroslav Rezanina +--- + block/io.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/block/io.c b/block/io.c +index a4b2e3adf1..5033d51334 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1811,7 +1811,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, + NULL); + if (ret < 0) { +- return ret; ++ goto fail; + } + + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); +@@ -1819,10 +1819,11 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + bs->bl.request_alignment, + qiov, qiov_offset, flags); + tracked_request_end(&req); +- bdrv_dec_in_flight(bs); +- + bdrv_padding_destroy(&pad); + ++fail: ++ bdrv_dec_in_flight(bs); ++ + return ret; + } + +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 7154a71..4ee3077 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -3,6 +3,7 @@ %global libusbx_version 1.0.23 %global meson_version 0.55.3-3 %global usbredir_version 0.7.1 +%global ipxe_version 20200823-5.git4bd064de %global have_usbredir 1 %global have_opengl 1 @@ -77,7 +78,7 @@ %global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle %global block_drivers_ro_list vmdk,vhdx,vpc,https,ssh %define qemudocdir %{_docdir}/%{name} - +%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios:%{_datadir}/sgabios" #Versions of various parts: @@ -112,7 +113,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 10%{?rcrel}%{?dist} +Release: 11%{?rcrel}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -273,6 +274,14 @@ Patch73: kvm-block-add-max_hw_transfer-to-BlockLimits.patch Patch74: kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch # For bz#1957782 - VMDK support should be read-only Patch75: kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch +# For bz#1838608 - aarch64: Enable ARMv8 RAS virtualization support +Patch76: kvm-arm-virt-Register-iommu-as-a-class-property.patch +# For bz#1838608 - aarch64: Enable ARMv8 RAS virtualization support +Patch77: kvm-arm-virt-Register-its-as-a-class-property.patch +# For bz#1838608 - aarch64: Enable ARMv8 RAS virtualization support +Patch78: kvm-arm-virt-Enable-ARM-RAS-support.patch +# For bz#1972079 - Windows Installation blocked on 4k disk when using blk+raw+iothread +Patch79: kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch # Source-git patches @@ -342,6 +351,7 @@ Requires: %{name}-core = %{epoch}:%{version}-%{release} Requires: %{name}-docs = %{epoch}:%{version}-%{release} Requires: %{name}-tools = %{epoch}:%{version}-%{release} Requires: qemu-pr-helper = %{epoch}:%{version}-%{release} +Requires: virtiofsd = %{epoch}:%{version}-%{release} %{requires_all_modules} %description @@ -390,7 +400,7 @@ Requires: sgabios-bin %endif %ifnarch aarch64 s390x Requires: seavgabios-bin >= 1.12.0-3 -Requires: ipxe-roms-qemu >= 20170123-1 +Requires: ipxe-roms-qemu >= %{ipxe_version} %endif %description common @@ -419,6 +429,15 @@ This package provides the qemu-pr-helper utility that is required for certain SCSI features. +%package -n qemu-virtiofsd +Summary: QEMU virtio-fs shared file system daemon +Provides: virtiofsd +%description -n qemu-virtiofsd +This package provides virtiofsd daemon. This program is a vhost-user backend +that implements the virtio-fs device that is used for sharing a host directory +tree with a guest. + + %package -n qemu-img Summary: QEMU command line tool for manipulating disk images %description -n qemu-img @@ -516,9 +535,6 @@ mkdir -p %{qemu_kvm_build} %build -# --build-id option is used for giving info to the debug packages. -buildldflags="VL_LDFLAGS=-Wl,--build-id" - %define disable_everything \\\ --audio-drv-list= \\\ --disable-attr \\\ @@ -656,11 +672,11 @@ run_configure() { --localstatedir="%{_localstatedir}" \ --docdir="%{_docdir}" \ --libexecdir="%{_libexecdir}" \ - --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-ldflags="%{build_ldflags}" \ --extra-cflags="%{optflags}" \ --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ - --firmwarepath=%{_prefix}/share/qemu-firmware \ + --firmwarepath=%{firmwaredirs} \ --meson="%{__meson}" \ --enable-trace-backend=dtrace \ --with-coroutine=ucontext \ @@ -757,24 +773,24 @@ run_configure \ %if %{tools_only} -make V=1 %{?_smp_mflags} $buildldflags qemu-img -make V=1 %{?_smp_mflags} $buildldflags qemu-io -make V=1 %{?_smp_mflags} $buildldflags qemu-nbd -make V=1 %{?_smp_mflags} $buildldflags storage-daemon/qemu-storage-daemon +%make_build qemu-img +%make_build qemu-io +%make_build qemu-nbd +%make_build storage-daemon/qemu-storage-daemon -make V=1 %{?_smp_mflags} $buildldflags docs/qemu-img.1 -make V=1 %{?_smp_mflags} $buildldflags docs/qemu-nbd.8 -make V=1 %{?_smp_mflags} $buildldflags docs/qemu-storage-daemon.1 -make V=1 %{?_smp_mflags} $buildldflags docs/qemu-storage-daemon-qmp-ref.7 +%make_build docs/qemu-img.1 +%make_build docs/qemu-nbd.8 +%make_build docs/qemu-storage-daemon.1 +%make_build docs/qemu-storage-daemon-qmp-ref.7 -make V=1 %{?_smp_mflags} $buildldflags qga/qemu-ga -make V=1 %{?_smp_mflags} $buildldflags docs/qemu-ga.8 +%make_build qga/qemu-ga +%make_build docs/qemu-ga.8 # endif tools_only %endif %if !%{tools_only} -make V=1 %{?_smp_mflags} $buildldflags +%make_build # Setup back compat qemu-kvm binary %{__python3} scripts/tracetool.py --backend dtrace --format stap \ @@ -877,10 +893,7 @@ install -p -m 0644 %{_sourcedir}/README.tests %{buildroot}%{testsdir}/README # Do the actual qemu tree install pushd %{qemu_kvm_build} -make DESTDIR=%{buildroot} \ - sharedir="%{_datadir}/%{name}" \ - datadir="%{_datadir}/%{name}" \ - install +%make_install popd mkdir -p %{buildroot}%{_datadir}/systemtap/tapset @@ -1004,39 +1017,6 @@ rm -rf %{buildroot}%{_datadir}/%{name}/bios*.bin # Provided by package sgabios rm -rf %{buildroot}%{_datadir}/%{name}/sgabios.bin -# the pxe gpxe images will be symlinks to the images on -# /usr/share/ipxe, as QEMU doesn't know how to look -# for other paths, yet. -pxe_link() { - ln -s ../ipxe.efi/$2.rom %{buildroot}%{_datadir}/%{name}/efi-$1.rom -} - -%ifnarch aarch64 s390x -pxe_link e1000 8086100e -pxe_link rtl8139 10ec8139 -pxe_link virtio 1af41000 -pxe_link e1000e 808610d3 -%endif - -rom_link() { - ln -s $1 %{buildroot}%{_datadir}/%{name}/$2 -} - -%ifnarch aarch64 s390x - rom_link ../seavgabios/vgabios-isavga.bin vgabios.bin - rom_link ../seavgabios/vgabios-cirrus.bin vgabios-cirrus.bin - rom_link ../seavgabios/vgabios-qxl.bin vgabios-qxl.bin - rom_link ../seavgabios/vgabios-stdvga.bin vgabios-stdvga.bin - rom_link ../seavgabios/vgabios-vmware.bin vgabios-vmware.bin - rom_link ../seavgabios/vgabios-virtio.bin vgabios-virtio.bin - rom_link ../seavgabios/vgabios-ramfb.bin vgabios-ramfb.bin - rom_link ../seavgabios/vgabios-bochs-display.bin vgabios-bochs-display.bin -%endif -%ifarch x86_64 - rom_link ../seabios/bios.bin bios.bin - rom_link ../seabios/bios-256k.bin bios-256k.bin - rom_link ../sgabios/sgabios.bin sgabios.bin -%endif %if %{have_modules_load} install -D -p -m 644 %{_sourcedir}/modules-load.conf %{buildroot}%{_sysconfdir}/modules-load.d/kvm.conf @@ -1071,7 +1051,7 @@ rm -rf %{buildroot}%{qemudocdir}/specs pushd %{qemu_kvm_build} echo "Testing %{name}-build" -make check V=1 +%make_build check popd # endif !tools_only @@ -1165,6 +1145,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_unitdir}/qemu-pr-helper.socket %{_mandir}/man8/qemu-pr-helper.8* +%files -n qemu-virtiofsd +%{_mandir}/man1/virtiofsd.1* +%{_libexecdir}/virtiofsd +# This is the standard location for vhost-user JSON files defined in the +# vhost-user specification for interoperability with other software. Unlike +# most other paths we use it's "qemu" instead of "qemu-kvm". +%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json + %files docs %doc %{qemudocdir} @@ -1173,7 +1161,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_mandir}/man7/qemu-qmp-ref.7* %{_mandir}/man7/qemu-cpu-models.7* %{_mandir}/man7/qemu-ga-ref.7* -%{_mandir}/man1/virtiofsd.1* %dir %{_datadir}/%{name}/ %{_datadir}/%{name}/keymaps/ @@ -1194,12 +1181,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %config(noreplace) %{_sysconfdir}/modprobe.d/kvm.conf %ifarch x86_64 - %{_datadir}/%{name}/bios.bin - %{_datadir}/%{name}/bios-256k.bin %{_datadir}/%{name}/linuxboot.bin %{_datadir}/%{name}/multiboot.bin %{_datadir}/%{name}/kvmvapic.bin - %{_datadir}/%{name}/sgabios.bin %{_datadir}/%{name}/pvh.bin %endif %ifarch s390x @@ -1207,18 +1191,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/s390-netboot.img %endif %ifnarch aarch64 s390x - %{_datadir}/%{name}/vgabios.bin - %{_datadir}/%{name}/vgabios-cirrus.bin - %{_datadir}/%{name}/vgabios-qxl.bin - %{_datadir}/%{name}/vgabios-stdvga.bin - %{_datadir}/%{name}/vgabios-vmware.bin - %{_datadir}/%{name}/vgabios-virtio.bin - %{_datadir}/%{name}/vgabios-ramfb.bin - %{_datadir}/%{name}/vgabios-bochs-display.bin - %{_datadir}/%{name}/efi-e1000.rom - %{_datadir}/%{name}/efi-e1000e.rom - %{_datadir}/%{name}/efi-virtio.rom - %{_datadir}/%{name}/efi-rtl8139.rom %{_libdir}/%{name}/hw-display-virtio-vga.so %endif %{_datadir}/icons/* @@ -1229,11 +1201,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %if %{have_memlock_limits} %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf %endif -%{_libexecdir}/virtiofsd -# This is the standard location for vhost-user JSON files defined in the -# vhost-user specification for interoperability with other software. Unlike -# most other paths we use it's "qemu" instead of "qemu-kvm". -%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json %files core %{_libexecdir}/qemu-kvm @@ -1277,6 +1244,31 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Sat Aug 07 2021 Miroslav Rezanina - 6.0.0-11 +- kvm-arm-virt-Register-iommu-as-a-class-property.patch [bz#1838608] +- kvm-arm-virt-Register-its-as-a-class-property.patch [bz#1838608] +- kvm-arm-virt-Enable-ARM-RAS-support.patch [bz#1838608] +- kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch [bz#1972079] +- kvm-spec-Remove-buildldflags.patch [bz#1973029] +- kvm-spec-Use-make_build-macro.patch [bz#1973029] +- kvm-spec-Drop-make-install-sharedir-and-datadir-usage.patch [bz#1973029] +- kvm-spec-use-make_install-macro.patch [bz#1973029] +- kvm-spec-parallelize-make-check.patch [bz#1973029] +- kvm-spec-Drop-explicit-build-id.patch [bz#1973029] +- kvm-spec-use-build_ldflags.patch [bz#1973029] +- kvm-Move-virtiofsd-to-separate-package.patch [bz#1979728] +- kvm-Utilize-firmware-configure-option.patch [bz#1980139] +- Resolves: bz#1838608 + (aarch64: Enable ARMv8 RAS virtualization support) +- Resolves: bz#1972079 + (Windows Installation blocked on 4k disk when using blk+raw+iothread) +- Resolves: bz#1973029 + (Spec file cleanups) +- Resolves: bz#1979728 + (Split out virtiofsd subpackage) +- Resolves: bz#1980139 + (Use configure --firmwarepath more) + * Sun Jul 25 2021 Miroslav Rezanina - 6.0.0-10 - kvm-s390x-css-Introduce-an-ESW-struct.patch [bz#1957194] - kvm-s390x-css-Split-out-the-IRB-sense-data.patch [bz#1957194] From f691be9771f2bf5fae0e423758d5703692514996 Mon Sep 17 00:00:00 2001 From: Mohan Boddu Date: Tue, 10 Aug 2021 00:29:15 +0000 Subject: [PATCH 131/195] Rebuilt for IMA sigs, glibc 2.34, aarch64 flags Related: rhbz#1991688 Signed-off-by: Mohan Boddu --- qemu-kvm.spec | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4ee3077..e271b0b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -113,7 +113,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 11%{?rcrel}%{?dist} +Release: 11%{?rcrel}%{?dist}.1 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -1244,6 +1244,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Aug 10 2021 Mohan Boddu - 17:6.0.0-11.1 +- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags + Related: rhbz#1991688 + * Sat Aug 07 2021 Miroslav Rezanina - 6.0.0-11 - kvm-arm-virt-Register-iommu-as-a-class-property.patch [bz#1838608] - kvm-arm-virt-Register-its-as-a-class-property.patch [bz#1838608] From b7fbd568b2fe898170b2a4345d428230c52c68ce Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 20 Aug 2021 06:50:35 -0400 Subject: [PATCH 132/195] * Fri Aug 20 2021 Miroslav Rezanina - 6.0.0-12.el9 - kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch [bz#1974683] - kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch [bz#1974683] - kvm-Remove-RHEL-7.0.0-machine-type.patch [bz#1968519] - kvm-Remove-RHEL-7.1.0-machine-type.patch [bz#1968519] - kvm-Remove-RHEL-7.2.0-machine-type.patch [bz#1968519] - kvm-Remove-RHEL-7.3.0-machine-types.patch [bz#1968519] - kvm-Remove-RHEL-7.4.0-machine-types.patch [bz#1968519] - kvm-Remove-RHEL-7.5.0-machine-types.patch [bz#1968519] - kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch [bz#1957194] - kvm-migration-failover-reset-partially_hotplugged.patch [bz#1957194] - kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch [bz#1957194] - kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch [bz#1957194] - kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch [bz#1957194] - kvm-ratelimit-protect-with-a-mutex.patch [bz#1957194] - kvm-Update-Linux-headers-to-5.13-rc4.patch [bz#1957194] - kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch [bz#1957194] - kvm-iothread-generalize-iothread_set_param-iothread_get_.patch [bz#1957194] - kvm-iothread-add-aio-max-batch-parameter.patch [bz#1957194] - kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch [bz#1957194] - kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch [bz#1957194] - kvm-migration-move-wait-unplug-loop-to-its-own-function.patch [bz#1957194] - kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch [bz#1957194] - kvm-aarch64-Add-USB-storage-devices.patch [bz#1957194] - kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch [bz#1957194] - kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch [bz#1957194] - kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch [bz#1957194] - kvm-audio-Never-send-migration-section.patch [bz#1957194] - kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch [bz#1939509 bz#1940132] - kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch [bz#1939509 bz#1940132] - kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch [bz#1939509 bz#1940132] - kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch [bz#1939509 bz#1940132] - kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch [bz#1939509 bz#1940132] - kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch [bz#1939509 bz#1940132] - kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch [bz#1939509 bz#1940132] - kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch [bz#1939509 bz#1940132] - kvm-configure-Fix-endianess-test-with-LTO.patch [bz#1939509 bz#1940132] - kvm-spec-Switch-toolchain-to-Clang-LLVM.patch [bz#1939509 bz#1940132] - kvm-spec-Use-safe-stack-for-x86_64.patch [bz#1939509 bz#1940132] - kvm-spec-Reenable-write-support-for-VMDK-etc.-in-tools.patch [bz#1989841] - Resolves: bz#1974683 (Fail to set migrate incoming for 2nd time after the first time failed) - Resolves: bz#1968519 (Remove all the old 7.0-7.5 machine types) - Resolves: bz#1957194 (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) - Resolves: bz#1939509 (QEMU: enable SafeStack) - Resolves: bz#1940132 (QEMU: switch build toolchain to Clang/LLVM) - Resolves: bz#1989841 (RFE: qemu-img cannot convert images into vmdk and vpc formats) --- kvm-Remove-RHEL-7.0.0-machine-type.patch | 197 + kvm-Remove-RHEL-7.1.0-machine-type.patch | 292 + kvm-Remove-RHEL-7.2.0-machine-type.patch | 192 + kvm-Remove-RHEL-7.3.0-machine-types.patch | 315 + kvm-Remove-RHEL-7.4.0-machine-types.patch | 301 + kvm-Remove-RHEL-7.5.0-machine-types.patch | 210 + kvm-Update-Linux-headers-to-5.13-rc4.patch | 5465 +++++++++++++++++ kvm-aarch64-Add-USB-storage-devices.patch | 41 + ...rt-back-to-v5.2-PCI-slot-enumeration.patch | 111 + kvm-audio-Never-send-migration-section.patch | 63 + ...FIO_MAP_DMA-failed-No-space-left-on-.patch | 106 + ...onfigure-Fix-endianess-test-with-LTO.patch | 82 + ...o-resume-the-VM-on-success-instead-o.patch | 51 + ...imit-for-bus-locks-acquired-in-guest.patch | 219 + ...Expose-AVX_VNNI-instruction-to-guest.patch | 82 + ...and-rename-test-291-to-qemu-img-bitm.patch | 178 + ...iothread-add-aio-max-batch-parameter.patch | 324 + ...ize-iothread_set_param-iothread_get_.patch | 96 + ...the-batch-size-using-aio-max-batch-p.patch | 84 + ...reset-of-postcopy_recover_triggered-.patch | 66 + ...itmap_mutex-out-of-migration_bitmap_.patch | 111 + ...ank-outside-qemu_start_incoming_migr.patch | 94 + ...er-continue-to-wait-card-unplug-on-e.patch | 59 + ...-failover-reset-partially_hotplugged.patch | 44 + ...wait-unplug-loop-to-its-own-function.patch | 118 + ...90-ccw-Allow-building-with-Clang-too.patch | 77 + ...-Fix-inline-assembly-for-older-versi.patch | 106 + ...-Fix-the-cc-option-macro-in-the-Make.patch | 48 + ...-Silence-GCC-11-stringop-overflow-wa.patch | 75 + ...-Silence-warning-from-Clang-by-marki.patch | 59 + ...-Use-reset_psw-pointer-instead-of-ha.patch | 60 + ...-bootmap-Silence-compiler-warning-fr.patch | 56 + ...-netboot-Use-Wl-prefix-to-pass-param.patch | 44 + ...p-broken-bitmaps-for-convert-bitmaps.patch | 265 + ...st-on-convert-bitmaps-with-inconsist.patch | 145 + kvm-ratelimit-protect-with-a-mutex.patch | 133 + qemu-kvm.spec | 165 +- 37 files changed, 10130 insertions(+), 4 deletions(-) create mode 100644 kvm-Remove-RHEL-7.0.0-machine-type.patch create mode 100644 kvm-Remove-RHEL-7.1.0-machine-type.patch create mode 100644 kvm-Remove-RHEL-7.2.0-machine-type.patch create mode 100644 kvm-Remove-RHEL-7.3.0-machine-types.patch create mode 100644 kvm-Remove-RHEL-7.4.0-machine-types.patch create mode 100644 kvm-Remove-RHEL-7.5.0-machine-types.patch create mode 100644 kvm-Update-Linux-headers-to-5.13-rc4.patch create mode 100644 kvm-aarch64-Add-USB-storage-devices.patch create mode 100644 kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch create mode 100644 kvm-audio-Never-send-migration-section.patch create mode 100644 kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch create mode 100644 kvm-configure-Fix-endianess-test-with-LTO.patch create mode 100644 kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch create mode 100644 kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch create mode 100644 kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch create mode 100644 kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch create mode 100644 kvm-iothread-add-aio-max-batch-parameter.patch create mode 100644 kvm-iothread-generalize-iothread_set_param-iothread_get_.patch create mode 100644 kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch create mode 100644 kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch create mode 100644 kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch create mode 100644 kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch create mode 100644 kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch create mode 100644 kvm-migration-failover-reset-partially_hotplugged.patch create mode 100644 kvm-migration-move-wait-unplug-loop-to-its-own-function.patch create mode 100644 kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch create mode 100644 kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch create mode 100644 kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch create mode 100644 kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch create mode 100644 kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch create mode 100644 kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch create mode 100644 kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch create mode 100644 kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch create mode 100644 kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch create mode 100644 kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch create mode 100644 kvm-ratelimit-protect-with-a-mutex.patch diff --git a/kvm-Remove-RHEL-7.0.0-machine-type.patch b/kvm-Remove-RHEL-7.0.0-machine-type.patch new file mode 100644 index 0000000..0e0ca56 --- /dev/null +++ b/kvm-Remove-RHEL-7.0.0-machine-type.patch @@ -0,0 +1,197 @@ +From 4fa7a78aa900988816d07034943be384d57117f0 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Aug 2021 10:24:56 +0200 +Subject: [PATCH 03/39] Remove RHEL 7.0.0 machine type + +RH-Author: quintela1 +RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types +RH-Commit: [1/6] f182af372d3c38c3c1960af0d5cd37aba7205848 (juan.quintela/qemu-kvm) +RH-Bugzilla: 1968519 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina + +Everything is used elsewhere except for cve_2014_5263 fields. Remove those. + +Signed-off-by: Juan Quintela +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 35 ----------------------------------- + hw/i386/pc_piix.c | 34 ---------------------------------- + hw/usb/hcd-xhci.c | 20 -------------------- + hw/usb/hcd-xhci.h | 2 -- + include/hw/i386/pc.h | 3 --- + include/hw/usb.h | 3 --- + 6 files changed, 97 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index cdbfa84d2e..516ca50353 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -598,41 +598,6 @@ const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); + * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. + */ + +-/* +- * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* +- * between our base and 1.5, less stuff backported to RHEL-7.0 +- * (usb-device.msos-desc), less stuff for devices we changed +- * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, +- * pci-serial-4x) in 7.0. +- */ +-GlobalProperty pc_rhel_7_0_compat[] = { +- { "virtio-scsi-pci", "any_layout", "off" }, +- { "PIIX4_PM", "memory-hotplug-support", "off" }, +- { "apic", "version", stringify(0x11) }, +- { "nec-usb-xhci", "superspeed-ports-first", "off" }, +- { "nec-usb-xhci", "force-pcie-endcap", "on" }, +- { "pci-serial", "prog_if", stringify(0) }, +- { "virtio-net-pci", "guest_announce", "off" }, +- { "ICH9-LPC", "memory-hotplug-support", "off" }, +- { "xio3130-downstream", COMPAT_PROP_PCP, "off" }, +- { "ioh3420", COMPAT_PROP_PCP, "off" }, +- { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, +- { "e1000", "mitigation", "off" }, +- { "virtio-net-pci", "ctrl_guest_offloads", "off" }, +- { "Conroe" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Penryn" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Nehalem" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Nehalem-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Westmere" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Westmere-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Opteron_G1" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Opteron_G2" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Opteron_G3" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Opteron_G4" "-" TYPE_X86_CPU, "x2apic", "on" }, +- { "Opteron_G5" "-" TYPE_X86_CPU, "x2apic", "on" }, +-}; +-const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); +- + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) + { + GSIState *s; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 5d61c9b833..b4e8034671 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1116,37 +1116,3 @@ static void pc_machine_rhel710_options(MachineClass *m) + + DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, + pc_machine_rhel710_options); +- +-static void pc_compat_rhel700(MachineState *machine) +-{ +- PCMachineState *pcms = PC_MACHINE(machine); +- PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); +- +- pc_compat_rhel710(machine); +- +- /* Upstream enables it for everyone, we're a little more selective */ +- x86_cpu_change_kvm_default("x2apic", NULL); +- x86_cpu_change_kvm_default("svm", NULL); +- pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ +- pcmc->smbios_legacy_mode = true; +- pcmc->has_reserved_memory = false; +- migrate_cve_2014_5263_xhci_fields = true; +-} +- +-static void pc_init_rhel700(MachineState *machine) +-{ +- pc_compat_rhel700(machine); +- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ +- TYPE_I440FX_PCI_DEVICE); +-} +- +-static void pc_machine_rhel700_options(MachineClass *m) +-{ +- pc_machine_rhel710_options(m); +- m->family = "pc_piix_Y"; +- m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; +- compat_props_add(m->compat_props, pc_rhel_7_0_compat, pc_rhel_7_0_compat_len); +-} +- +-DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, +- pc_machine_rhel700_options); +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index 6d1f278aad..46212b1e69 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3490,27 +3490,9 @@ static const VMStateDescription vmstate_xhci_slot = { + } + }; + +-static int xhci_event_pre_save(void *opaque) +-{ +- XHCIEvent *s = opaque; +- +- s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; +- s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; +- +- return 0; +-} +- +-bool migrate_cve_2014_5263_xhci_fields; +- +-static bool xhci_event_cve_2014_5263(void *opaque, int version_id) +-{ +- return migrate_cve_2014_5263_xhci_fields; +-} +- + static const VMStateDescription vmstate_xhci_event = { + .name = "xhci-event", + .version_id = 1, +- .pre_save = xhci_event_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(type, XHCIEvent), + VMSTATE_UINT32(ccode, XHCIEvent), +@@ -3519,8 +3501,6 @@ static const VMStateDescription vmstate_xhci_event = { + VMSTATE_UINT32(flags, XHCIEvent), + VMSTATE_UINT8(slotid, XHCIEvent), + VMSTATE_UINT8(epid, XHCIEvent), +- VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), +- VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), + VMSTATE_END_OF_LIST() + } + }; +diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h +index f450ffd13b..7bba361f3b 100644 +--- a/hw/usb/hcd-xhci.h ++++ b/hw/usb/hcd-xhci.h +@@ -149,8 +149,6 @@ typedef struct XHCIEvent { + uint32_t flags; + uint8_t slotid; + uint8_t epid; +- uint8_t cve_2014_5263_a; +- uint8_t cve_2014_5263_b; + } XHCIEvent; + + typedef struct XHCIInterrupter { +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 1980c93f41..2d17daf71f 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -314,9 +314,6 @@ extern const size_t pc_rhel_7_2_compat_len; + extern GlobalProperty pc_rhel_7_1_compat[]; + extern const size_t pc_rhel_7_1_compat_len; + +-extern GlobalProperty pc_rhel_7_0_compat[]; +-extern const size_t pc_rhel_7_0_compat_len; +- + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +diff --git a/include/hw/usb.h b/include/hw/usb.h +index edb2cd94b6..436e07b304 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -577,7 +577,4 @@ void usb_pcap_init(FILE *fp); + void usb_pcap_ctrl(USBPacket *p, bool setup); + void usb_pcap_data(USBPacket *p, bool setup); + +-/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ +-extern bool migrate_cve_2014_5263_xhci_fields; +- + #endif +-- +2.27.0 + diff --git a/kvm-Remove-RHEL-7.1.0-machine-type.patch b/kvm-Remove-RHEL-7.1.0-machine-type.patch new file mode 100644 index 0000000..d3552f5 --- /dev/null +++ b/kvm-Remove-RHEL-7.1.0-machine-type.patch @@ -0,0 +1,292 @@ +From 224b28d337f36a937b34685519d7b657974a95d0 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Aug 2021 10:46:56 +0200 +Subject: [PATCH 04/39] Remove RHEL 7.1.0 machine type + +RH-Author: quintela1 +RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types +RH-Commit: [2/6] af1d8896d63756d1acc5af8658465808c4a13ca3 (juan.quintela/qemu-kvm) +RH-Bugzilla: 1968519 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina + +migrate_pre_2_2 was a redhat artifact, removed. +broken_reserved_end is already fixed, nothing to do. + +Signed-off-by: Juan Quintela +Signed-off-by: Miroslav Rezanina +--- + hw/char/serial.c | 12 ---------- + hw/core/machine.c | 23 ------------------- + hw/i386/pc.c | 51 ------------------------------------------- + hw/i386/pc_piix.c | 41 ---------------------------------- + hw/rtc/mc146818rtc.c | 4 ---- + include/hw/boards.h | 3 --- + include/hw/i386/pc.h | 3 --- + migration/migration.c | 2 -- + migration/migration.h | 5 ----- + 9 files changed, 144 deletions(-) + +diff --git a/hw/char/serial.c b/hw/char/serial.c +index cc378142a3..3e4344cb2a 100644 +--- a/hw/char/serial.c ++++ b/hw/char/serial.c +@@ -690,9 +690,6 @@ static int serial_post_load(void *opaque, int version_id) + static bool serial_thr_ipending_needed(void *opaque) + { + SerialState *s = opaque; +- if (migrate_pre_2_2) { +- return false; +- } + + if (s->ier & UART_IER_THRI) { + bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); +@@ -774,9 +771,6 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { + static bool serial_fifo_timeout_timer_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; +- if (migrate_pre_2_2) { +- return false; +- } + + return timer_pending(s->fifo_timeout_timer); + } +@@ -795,9 +789,6 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { + static bool serial_timeout_ipending_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; +- if (migrate_pre_2_2) { +- return false; +- } + + return s->timeout_ipending != 0; + } +@@ -816,9 +807,6 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { + static bool serial_poll_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; +- if (migrate_pre_2_2) { +- return false; +- } + + return s->poll_msl >= 0; + } +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 6c534e14fa..cc7c826593 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -270,29 +270,6 @@ GlobalProperty hw_compat_rhel_7_2[] = { + }; + const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); + +-/* Mostly like hw_compat_2_1 but: +- * we don't need virtio-scsi-pci since 7.0 already had that on +- * +- * RH: Note, qemu-extended-regs should have been enabled in the 7.1 +- * machine type, but was accidentally turned off in 7.2 onwards. +- */ +-GlobalProperty hw_compat_rhel_7_1[] = { +- { "intel-hda-generic", "old_msi_addr", "on" }, +- { "VGA", "qemu-extended-regs", "off" }, +- { "secondary-vga", "qemu-extended-regs", "off" }, +- { "usb-mouse", "usb_version", stringify(1) }, +- { "usb-kbd", "usb_version", stringify(1) }, +- { "virtio-pci", "virtio-pci-bus-master-bug-migration", "on" }, +- { "virtio-blk-pci", "any_layout", "off" }, +- { "virtio-balloon-pci", "any_layout", "off" }, +- { "virtio-serial-pci", "any_layout", "off" }, +- { "virtio-9p-pci", "any_layout", "off" }, +- { "virtio-rng-pci", "any_layout", "off" }, +- /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ +- { "migration", "send-configuration", "off" }, +-}; +-const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); +- + GlobalProperty hw_compat_5_2[] = { + { "ICH9-LPC", "smm-compat", "on"}, + { "PIIX4_PM", "smm-compat", "on"}, +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 516ca50353..3c1f5be4fa 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -541,57 +541,6 @@ GlobalProperty pc_rhel_7_2_compat[] = { + }; + const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); + +-GlobalProperty pc_rhel_7_1_compat[] = { +- { "kvm64" "-" TYPE_X86_CPU, "vme", "off" }, +- { "kvm32" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Conroe" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Penryn" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Nehalem-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Westmere" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Westmere-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, +- { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" }, +- { "SandyBridge-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Haswell" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Haswell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Broadwell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" }, +- { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" }, +- { "Haswell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, +- { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" }, +- { "Haswell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, +- { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" }, +- { "Broadwell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, +- { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" }, +- { "Broadwell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, +- { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" }, +- { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" }, +- { "qemu64" "-" TYPE_X86_CPU, "min-level", stringify(4) }, +- { "kvm64" "-" TYPE_X86_CPU, "min-level", stringify(5) }, +- { "pentium3" "-" TYPE_X86_CPU, "min-level", stringify(2) }, +- { "n270" "-" TYPE_X86_CPU, "min-level", stringify(5) }, +- { "Conroe" "-" TYPE_X86_CPU, "min-level", stringify(4) }, +- { "Penryn" "-" TYPE_X86_CPU, "min-level", stringify(4) }, +- { "Nehalem" "-" TYPE_X86_CPU, "min-level", stringify(4) }, +- { "n270" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +- { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, +-}; +-const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); +- + /* + * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine + * types as the PC_COMPAT_* do for upstream types. +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index b4e8034671..2a6a28f1e5 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1075,44 +1075,3 @@ static void pc_machine_rhel720_options(MachineClass *m) + + DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, + pc_machine_rhel720_options); +- +-static void pc_compat_rhel710(MachineState *machine) +-{ +- PCMachineState *pcms = PC_MACHINE(machine); +- PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); +- +- /* From pc_compat_2_2 */ +- pcmc->rsdp_in_ram = false; +- machine->suppress_vmdesc = true; +- +- /* From pc_compat_2_1 */ +- pcmc->smbios_uuid_encoded = false; +- x86_cpu_change_kvm_default("svm", NULL); +- pcmc->enforce_aligned_dimm = false; +- +- /* Disable all the extra subsections that were added in 2.2 */ +- migrate_pre_2_2 = true; +- +- /* From pc_i440fx_2_4_machine_options */ +- pcmc->broken_reserved_end = true; +-} +- +-static void pc_init_rhel710(MachineState *machine) +-{ +- pc_compat_rhel710(machine); +- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ +- TYPE_I440FX_PCI_DEVICE); +-} +- +-static void pc_machine_rhel710_options(MachineClass *m) +-{ +- pc_machine_rhel720_options(m); +- m->family = "pc_piix_Y"; +- m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; +- m->default_display = "cirrus"; +- compat_props_add(m->compat_props, hw_compat_rhel_7_1, hw_compat_rhel_7_1_len); +- compat_props_add(m->compat_props, pc_rhel_7_1_compat, pc_rhel_7_1_compat_len); +-} +- +-DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, +- pc_machine_rhel710_options); +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index 4a2e52031b..3a70a988e1 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -824,10 +824,6 @@ static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) + { + RTCState *s = (RTCState *)opaque; + +- if (migrate_pre_2_2) { +- return false; +- } +- + return s->irq_reinject_on_ack_count != 0; + } + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 90ae100bfc..070db0b9b1 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -445,7 +445,4 @@ extern const size_t hw_compat_rhel_7_3_len; + extern GlobalProperty hw_compat_rhel_7_2[]; + extern const size_t hw_compat_rhel_7_2_len; + +-extern GlobalProperty hw_compat_rhel_7_1[]; +-extern const size_t hw_compat_rhel_7_1_len; +- + #endif +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 2d17daf71f..84d985f57c 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -311,9 +311,6 @@ extern const size_t pc_rhel_7_3_compat_len; + extern GlobalProperty pc_rhel_7_2_compat[]; + extern const size_t pc_rhel_7_2_compat_len; + +-extern GlobalProperty pc_rhel_7_1_compat[]; +-extern const size_t pc_rhel_7_1_compat_len; +- + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +diff --git a/migration/migration.c b/migration/migration.c +index 9d185f0e28..9cf1cde39d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -167,8 +167,6 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_X_COLO, + MIGRATION_CAPABILITY_VALIDATE_UUID); + +-bool migrate_pre_2_2; +- + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +diff --git a/migration/migration.h b/migration/migration.h +index 1b6c69751c..db6708326b 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -368,11 +368,6 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, + void migrate_add_address(SocketAddress *address); + + int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); +-/* +- * Disables a load of subsections that were added in 2.2/rh7.2 for backwards +- * migration compatibility. +- */ +-extern bool migrate_pre_2_2; + + #define qemu_ram_foreach_block \ + #warning "Use foreach_not_ignored_block in migration code" +-- +2.27.0 + diff --git a/kvm-Remove-RHEL-7.2.0-machine-type.patch b/kvm-Remove-RHEL-7.2.0-machine-type.patch new file mode 100644 index 0000000..d014be6 --- /dev/null +++ b/kvm-Remove-RHEL-7.2.0-machine-type.patch @@ -0,0 +1,192 @@ +From 999b28683394c2939ca8bd6b692ed2169860ced9 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Aug 2021 11:02:24 +0200 +Subject: [PATCH 05/39] Remove RHEL 7.2.0 machine type + +RH-Author: quintela1 +RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types +RH-Commit: [3/6] 575f0fe16c1928a41628f1f704a4d5d370679a82 (juan.quintela/qemu-kvm) +RH-Bugzilla: 1968519 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina + +Signed-off-by: Juan Quintela +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 24 ------------------------ + hw/i386/pc.c | 22 ---------------------- + hw/i386/pc_piix.c | 26 -------------------------- + hw/ppc/spapr.c | 26 -------------------------- + include/hw/boards.h | 3 --- + include/hw/i386/pc.h | 3 --- + 6 files changed, 104 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index cc7c826593..54eb8376a7 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -246,30 +246,6 @@ GlobalProperty hw_compat_rhel_7_3[] = { + }; + const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); + +-/* Mostly like hw_compat_2_4 + 2_3 but: +- * we don't need "any_layout" as it has been backported to 7.2 +- */ +-GlobalProperty hw_compat_rhel_7_2[] = { +- { "virtio-blk-device", "scsi", "true" }, +- { "e1000-82540em", "extra_mac_registers", "off" }, +- { "virtio-pci", "x-disable-pcie", "on" }, +- { "virtio-pci", "migrate-extra", "off" }, +- { "fw_cfg_mem", "dma_enabled", "off" }, +- { "fw_cfg_io", "dma_enabled", "off" }, +- { "isa-fdc", "fallback", "144" }, +- /* Optional because not all virtio-pci devices support legacy mode */ +- { "virtio-pci", "disable-modern", "on", .optional = true }, +- { "virtio-pci", "disable-legacy", "off", .optional = true }, +- { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, +- { "virtio-pci", "page-per-vq", "on" }, +- /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ +- { "migration", "send-section-footer", "off" }, +- /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ +- { "migration", "store-global-state", "off", +- }, +-}; +-const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); +- + GlobalProperty hw_compat_5_2[] = { + { "ICH9-LPC", "smm-compat", "on"}, + { "PIIX4_PM", "smm-compat", "on"}, +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 3c1f5be4fa..938cb82818 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -519,28 +519,6 @@ GlobalProperty pc_rhel_7_3_compat[] = { + }; + const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); + +-GlobalProperty pc_rhel_7_2_compat[] = { +- { "phenom" "-" TYPE_X86_CPU, "rdtscp", "off"}, +- { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, +- { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, +- { "Haswell-" TYPE_X86_CPU, "abm", "off" }, +- { "Haswell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, +- { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, +- { "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, +- { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, +- { "Broadwell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, +- { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, +- { "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, +- { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, +- { TYPE_X86_CPU, "check", "off" }, +- { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, +- { TYPE_X86_CPU, "arat", "off" }, +- { "usb-redir", "streams", "off" }, +- { TYPE_X86_CPU, "fill-mtrr-mask", "off" }, +- { "apic-common", "legacy-instance-id", "on" }, +-}; +-const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); +- + /* + * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine + * types as the PC_COMPAT_* do for upstream types. +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 2a6a28f1e5..201cbbdb01 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1049,29 +1049,3 @@ static void pc_machine_rhel730_options(MachineClass *m) + + DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, + pc_machine_rhel730_options); +- +- +-static void pc_init_rhel720(MachineState *machine) +-{ +- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ +- TYPE_I440FX_PCI_DEVICE); +-} +- +-static void pc_machine_rhel720_options(MachineClass *m) +-{ +- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- X86MachineClass *x86mc = X86_MACHINE_CLASS(m); +- pc_machine_rhel730_options(m); +- m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; +- /* From pc_i440fx_2_5_machine_options */ +- x86mc->save_tsc_khz = false; +- m->legacy_fw_cfg_order = 1; +- /* Note: broken_reserved_end was already in 7.2 */ +- /* From pc_i440fx_2_6_machine_options */ +- pcmc->legacy_cpu_hotplug = true; +- compat_props_add(m->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); +- compat_props_add(m->compat_props, pc_rhel_7_2_compat, pc_rhel_7_2_compat_len); +-} +- +-DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, +- pc_machine_rhel720_options); +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 11db32c537..0bc558e0b7 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5397,32 +5397,6 @@ static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) + + DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); + +-/* +- * pseries-rhel7.2.0 +- */ +-/* Should be like spapr_compat_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" +- * has been backported to RHEL7_2 so we don't need it here. +- */ +- +-GlobalProperty spapr_compat_rhel7_2[] = { +- { "spapr-vlan", "use-rx-buffer-pools", "off" }, +- { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, +-}; +-const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); +- +-static void spapr_machine_rhel720_class_options(MachineClass *mc) +-{ +- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +- +- spapr_machine_rhel730_class_options(mc); +- smc->use_ohci_by_default = true; +- mc->has_hotpluggable_cpus = NULL; +- compat_props_add(mc->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); +- compat_props_add(mc->compat_props, spapr_compat_rhel7_2, spapr_compat_rhel7_2_len); +-} +- +-DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); +- + static void spapr_machine_register_types(void) + { + type_register_static(&spapr_machine_info); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 070db0b9b1..43eb868ceb 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -442,7 +442,4 @@ extern const size_t hw_compat_rhel_7_4_len; + extern GlobalProperty hw_compat_rhel_7_3[]; + extern const size_t hw_compat_rhel_7_3_len; + +-extern GlobalProperty hw_compat_rhel_7_2[]; +-extern const size_t hw_compat_rhel_7_2_len; +- + #endif +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 84d985f57c..c26c6dcc72 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -308,9 +308,6 @@ extern const size_t pc_rhel_7_4_compat_len; + extern GlobalProperty pc_rhel_7_3_compat[]; + extern const size_t pc_rhel_7_3_compat_len; + +-extern GlobalProperty pc_rhel_7_2_compat[]; +-extern const size_t pc_rhel_7_2_compat_len; +- + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +-- +2.27.0 + diff --git a/kvm-Remove-RHEL-7.3.0-machine-types.patch b/kvm-Remove-RHEL-7.3.0-machine-types.patch new file mode 100644 index 0000000..dbf9b49 --- /dev/null +++ b/kvm-Remove-RHEL-7.3.0-machine-types.patch @@ -0,0 +1,315 @@ +From 1610bd2ce98e3d93296fd3bc4d2c24e905428a4a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Aug 2021 11:24:26 +0200 +Subject: [PATCH 06/39] Remove RHEL 7.3.0 machine types + +RH-Author: quintela1 +RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types +RH-Commit: [4/6] 7a674496dd29a7a87843770fd0459b85831cc866 (juan.quintela/qemu-kvm) +RH-Bugzilla: 1968519 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina + +Only thing to remove with care was __redhat_e1000e_7_3_intr_state. + +Signed-off-by: Juan Quintela +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 22 --------------------- + hw/i386/pc.c | 28 --------------------------- + hw/i386/pc_piix.c | 19 ------------------ + hw/i386/pc_q35.c | 19 ------------------ + hw/net/e1000e.c | 22 --------------------- + hw/ppc/spapr.c | 46 -------------------------------------------- + include/hw/boards.h | 3 --- + include/hw/i386/pc.h | 3 --- + 8 files changed, 162 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 54eb8376a7..0bcaabd8a9 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -224,28 +224,6 @@ GlobalProperty hw_compat_rhel_7_4[] = { + }; + + const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); +-/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except +- * disable-modern, disable-legacy, page-per-vq have already been +- * backported to RHEL7.3 +- */ +-GlobalProperty hw_compat_rhel_7_3[] = { +- { "virtio-mmio", "format_transport_address", "off" }, +- { "virtio-serial-device", "emergency-write", "off" }, +- { "ioapic", "version", "0x11" }, +- { "intel-iommu", "x-buggy-eim", "true" }, +- { "virtio-pci", "x-ignore-backend-features", "on" }, +- { "fw_cfg_mem", "x-file-slots", stringify(0x10) }, +- { "fw_cfg_io", "x-file-slots", stringify(0x10) }, +- { "pflash_cfi01", "old-multiple-chip-handling", "on" }, +- { TYPE_PCI_DEVICE, "x-pcie-extcap-init", "off" }, +- { "virtio-pci", "x-pcie-deverr-init", "off" }, +- { "virtio-pci", "x-pcie-lnkctl-init", "off" }, +- { "virtio-pci", "x-pcie-pm-init", "off" }, +- { "virtio-net-device", "x-mtu-bypass-backend", "off" }, +- { "e1000e", "__redhat_e1000e_7_3_intr_state", "on" }, +-}; +-const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); +- + GlobalProperty hw_compat_5_2[] = { + { "ICH9-LPC", "smm-compat", "on"}, + { "PIIX4_PM", "smm-compat", "on"}, +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 938cb82818..75abe0acc2 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -491,34 +491,6 @@ GlobalProperty pc_rhel_7_4_compat[] = { + }; + const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); + +-GlobalProperty pc_rhel_7_3_compat[] = { +- /* pc_rhel_7_3_compat from pc_compat_2_8 */ +- { "kvmclock", "x-mach-use-reliable-get-clock", "off" }, +- /* pc_rhel_7_3_compat from pc_compat_2_7 */ +- { TYPE_X86_CPU, "l3-cache", "off" }, +- /* pc_rhel_7_3_compat from pc_compat_2_7 */ +- { TYPE_X86_CPU, "full-cpuid-auto-level", "off" }, +- /* pc_rhel_7_3_compat from pc_compat_2_7 */ +- { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" }, +- /* pc_rhel_7_3_compat from pc_compat_2_7 */ +- { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" }, +- /* pc_rhel_7_3_compat from pc_compat_2_7 */ +- { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" }, +- /* pc_rhel_7_3_compat from pc_compat_2_7 */ +- { "isa-pcspk", "migrate", "off" }, +- /* pc_rhel_7_3_compat from pc_compat_2_6 */ +- { TYPE_X86_CPU, "cpuid-0xb", "off" }, +- /* pc_rhel_7_3_compat from pc_compat_2_8 */ +- { "ICH9-LPC", "x-smi-broadcast", "off" }, +- /* pc_rhel_7_3_compat from pc_compat_2_8 */ +- { TYPE_X86_CPU, "vmware-cpuid-freq", "off" }, +- /* pc_rhel_7_3_compat from pc_compat_2_8 */ +- { "Haswell-" TYPE_X86_CPU, "stepping", "1" }, +- /* pc_rhel_7_3_compat from pc_compat_2_3 added in 2.9*/ +- { TYPE_X86_CPU, "kvm-no-smi-migration", "on" }, +-}; +-const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); +- + /* + * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine + * types as the PC_COMPAT_* do for upstream types. +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 201cbbdb01..64662cc3d5 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1030,22 +1030,3 @@ static void pc_machine_rhel740_options(MachineClass *m) + + DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, + pc_machine_rhel740_options); +- +-static void pc_init_rhel730(MachineState *machine) +-{ +- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ +- TYPE_I440FX_PCI_DEVICE); +-} +- +-static void pc_machine_rhel730_options(MachineClass *m) +-{ +- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_machine_rhel740_options(m); +- m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; +- pcmc->linuxboot_dma_enabled = false; +- compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); +- compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); +-} +- +-DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, +- pc_machine_rhel730_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 01ff3e0544..bf49a943dc 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -792,22 +792,3 @@ static void pc_q35_machine_rhel740_options(MachineClass *m) + + DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, + pc_q35_machine_rhel740_options); +- +-static void pc_q35_init_rhel730(MachineState *machine) +-{ +- pc_q35_init(machine); +-} +- +-static void pc_q35_machine_rhel730_options(MachineClass *m) +-{ +- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel740_options(m); +- m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; +- m->max_cpus = 255; +- pcmc->linuxboot_dma_enabled = false; +- compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); +- compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); +-} +- +-DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, +- pc_q35_machine_rhel730_options); +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index 6d39c1f1c4..b22f232863 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -79,12 +79,6 @@ struct E1000EState { + bool disable_vnet; + + E1000ECore core; +- +- /* 7.3 had the intr_state field that was in the original e1000e code +- * but that was removed prior to 2.7's release +- */ +- bool redhat_7_3_intr_state_enable; +- uint32_t redhat_7_3_intr_state; + }; + + #define E1000E_MMIO_IDX 0 +@@ -100,10 +94,6 @@ struct E1000EState { + #define E1000E_MSIX_TABLE (0x0000) + #define E1000E_MSIX_PBA (0x2000) + +-/* Values as in RHEL 7.3 build and original upstream */ +-#define RH_E1000E_USE_MSI BIT(0) +-#define RH_E1000E_USE_MSIX BIT(1) +- + static uint64_t + e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) + { +@@ -315,8 +305,6 @@ e1000e_init_msix(E1000EState *s) + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); +- } else { +- s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; + } + } + } +@@ -488,8 +476,6 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) + ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); + if (ret) { + trace_e1000e_msi_init_fail(ret); +- } else { +- s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; + } + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, +@@ -613,11 +599,6 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + +-static bool rhel_7_3_check(void *opaque, int version_id) +-{ +- return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; +-} +- + static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, +@@ -629,7 +610,6 @@ static const VMStateDescription e1000e_vmstate = { + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), +- VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, +@@ -678,8 +658,6 @@ static PropertyInfo e1000e_prop_disable_vnet, + + static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), +- DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, +- redhat_7_3_intr_state_enable, false), + DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 0bc558e0b7..ca0b99403e 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5351,52 +5351,6 @@ static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) + + DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); + +-/* +- * pseries-rhel7.3.0 +- * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 +- */ +-GlobalProperty spapr_compat_rhel7_3[] = { +- { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000" }, +- { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0" }, +- { TYPE_POWERPC_CPU, "pre-2.8-migration", "on" }, +- { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on" }, +- { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, +-}; +-const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); +- +-static void spapr_machine_rhel730_class_options(MachineClass *mc) +-{ +- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +- +- spapr_machine_rhel740_class_options(mc); +- mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); +- mc->default_machine_opts = "modern-hotplug-events=off"; +- compat_props_add(mc->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); +- compat_props_add(mc->compat_props, spapr_compat_rhel7_3, spapr_compat_rhel7_3_len); +- +- smc->phb_placement = phb_placement_2_7; +-} +- +-DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); +- +-/* +- * pseries-rhel7.3.0-sxxm +- * +- * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default +- */ +- +-static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) +-{ +- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +- +- spapr_machine_rhel730_class_options(mc); +- smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; +- smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; +- smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; +-} +- +-DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); +- + static void spapr_machine_register_types(void) + { + type_register_static(&spapr_machine_info); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 43eb868ceb..24d4d726d8 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -439,7 +439,4 @@ extern const size_t hw_compat_rhel_7_5_len; + extern GlobalProperty hw_compat_rhel_7_4[]; + extern const size_t hw_compat_rhel_7_4_len; + +-extern GlobalProperty hw_compat_rhel_7_3[]; +-extern const size_t hw_compat_rhel_7_3_len; +- + #endif +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index c26c6dcc72..820fb3f577 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -305,9 +305,6 @@ extern const size_t pc_rhel_7_5_compat_len; + extern GlobalProperty pc_rhel_7_4_compat[]; + extern const size_t pc_rhel_7_4_compat_len; + +-extern GlobalProperty pc_rhel_7_3_compat[]; +-extern const size_t pc_rhel_7_3_compat_len; +- + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +-- +2.27.0 + diff --git a/kvm-Remove-RHEL-7.4.0-machine-types.patch b/kvm-Remove-RHEL-7.4.0-machine-types.patch new file mode 100644 index 0000000..9484fc1 --- /dev/null +++ b/kvm-Remove-RHEL-7.4.0-machine-types.patch @@ -0,0 +1,301 @@ +From b64f8848efe610c52791587ec41301b73ec9165a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Aug 2021 11:53:07 +0200 +Subject: [PATCH 07/39] Remove RHEL 7.4.0 machine types + +RH-Author: quintela1 +RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types +RH-Commit: [5/6] a1940ac35591cebff52379f85656caf6a03328dd (juan.quintela/qemu-kvm) +RH-Bugzilla: 1968519 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina + +Revert pc_rom_ro hack. +Remove force_rev1_fadt hack, it is not needed anymore. + +Signed-off-by: Juan Quintela +Signed-off-by: Miroslav Rezanina +--- + hw/acpi/ich9.c | 15 --------------- + hw/core/machine.c | 9 --------- + hw/i386/acpi-build.c | 3 --- + hw/i386/pc.c | 18 +----------------- + hw/i386/pc_piix.c | 19 ------------------- + hw/i386/pc_q35.c | 18 ------------------ + hw/ppc/spapr.c | 43 ------------------------------------------ + include/hw/acpi/ich9.h | 3 --- + include/hw/boards.h | 3 --- + include/hw/i386/pc.h | 6 ------ + 10 files changed, 1 insertion(+), 136 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index 33b0c6e33c..7f01fad64c 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -369,18 +369,6 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) + s->pm.enable_tco = value; + } + +-static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) +-{ +- ICH9LPCState *s = ICH9_LPC_DEVICE(obj); +- return s->pm.force_rev1_fadt; +-} +- +-static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) +-{ +- ICH9LPCState *s = ICH9_LPC_DEVICE(obj); +- s->pm.force_rev1_fadt = value; +-} +- + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + { + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; +@@ -403,9 +391,6 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + object_property_add_bool(obj, "cpu-hotplug-legacy", + ich9_pm_get_cpu_hotplug_legacy, + ich9_pm_set_cpu_hotplug_legacy); +- object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", +- ich9_pm_get_force_rev1_fadt, +- ich9_pm_set_force_rev1_fadt); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, + &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 0bcaabd8a9..6dc2651d73 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -215,15 +215,6 @@ GlobalProperty hw_compat_rhel_7_5[] = { + }; + const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); + +-/* Mostly like hw_compat_2_9 except +- * x-mtu-bypass-backend, x-migrate-msix has already been +- * backported to RHEL7.4. shpc was already on in 7.4. +- */ +-GlobalProperty hw_compat_rhel_7_4[] = { +- { "intel-iommu", "pt", "off" }, +-}; +- +-const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); + GlobalProperty hw_compat_5_2[] = { + { "ICH9-LPC", "smm-compat", "on"}, + { "PIIX4_PM", "smm-compat", "on"}, +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 7bd67f7877..de98750aef 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -231,9 +231,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; +- if (object_property_get_bool(lpc, +- "__com.redhat_force-rev1-fadt", NULL)) +- pm->fadt.rev = 1; + pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; + pm->smi_on_cpuhp = + !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 75abe0acc2..6b39abce02 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -477,20 +477,6 @@ GlobalProperty pc_rhel_7_5_compat[] = { + }; + const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); + +-GlobalProperty pc_rhel_7_4_compat[] = { +- /* pc_rhel_7_4_compat from pc_compat_2_9 */ +- { "mch", "extended-tseg-mbytes", stringify(0) }, +- /* bz 1489800 */ +- { "ICH9-LPC", "__com.redhat_force-rev1-fadt", "on" }, +- /* pc_rhel_7_4_compat from pc_compat_2_10 */ +- { "i440FX-pcihost", "x-pci-hole64-fix", "off" }, +- /* pc_rhel_7_4_compat from pc_compat_2_10 */ +- { "q35-pcihost", "x-pci-hole64-fix", "off" }, +- /* pc_rhel_7_4_compat from pc_compat_2_10 */ +- { TYPE_X86_CPU, "x-hv-max-vps", "0x40" }, +-}; +-const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); +- + /* + * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine + * types as the PC_COMPAT_* do for upstream types. +@@ -1094,8 +1080,7 @@ void pc_memory_init(PCMachineState *pcms, + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); +- /* RH difference: See bz 1489800, explicitly make ROM ro */ +- if (pcmc->pc_rom_ro) { ++ if (pcmc->pci_enabled) { + memory_region_set_readonly(option_rom_mr, true); + } + memory_region_add_subregion_overlap(rom_memory, +@@ -1845,7 +1830,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; + assert(!mc->get_hotplug_handler); +- pcmc->pc_rom_ro = true; + mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 64662cc3d5..fe2ac7593a 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1011,22 +1011,3 @@ static void pc_machine_rhel750_options(MachineClass *m) + + DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, + pc_machine_rhel750_options); +- +-static void pc_init_rhel740(MachineState *machine) +-{ +- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ +- TYPE_I440FX_PCI_DEVICE); +-} +- +-static void pc_machine_rhel740_options(MachineClass *m) +-{ +- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_machine_rhel750_options(m); +- m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; +- pcmc->pc_rom_ro = false; +- compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); +- compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); +-} +- +-DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, +- pc_machine_rhel740_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index bf49a943dc..9958ed9184 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -774,21 +774,3 @@ static void pc_q35_machine_rhel750_options(MachineClass *m) + + DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, + pc_q35_machine_rhel750_options); +- +-static void pc_q35_init_rhel740(MachineState *machine) +-{ +- pc_q35_init(machine); +-} +- +-static void pc_q35_machine_rhel740_options(MachineClass *m) +-{ +- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel750_options(m); +- m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; +- pcmc->pc_rom_ro = false; +- compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); +- compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); +-} +- +-DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, +- pc_q35_machine_rhel740_options); +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index ca0b99403e..bdabbf8b81 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5308,49 +5308,6 @@ static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); +- +-/* +- * pseries-rhel7.4.0 +- * like spapr_compat_2_9 +- */ +-GlobalProperty spapr_compat_rhel7_4[] = { +- { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, +-}; +-const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); +- +-static void spapr_machine_rhel740_class_options(MachineClass *mc) +-{ +- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +- +- spapr_machine_rhel750_class_options(mc); +- compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); +- compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); +- smc->has_power9_support = false; +- smc->pre_2_10_has_unused_icps = true; +- smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; +- smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; +-} +- +-DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); +- +-/* +- * pseries-rhel7.4.0-sxxm +- * +- * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default +- */ +- +-static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) +-{ +- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +- +- spapr_machine_rhel740_class_options(mc); +- smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; +- smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; +- smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; +-} +- +-DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); +- + static void spapr_machine_register_types(void) + { + type_register_static(&spapr_machine_info); +diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h +index e1ecfbaf1f..df519e40b5 100644 +--- a/include/hw/acpi/ich9.h ++++ b/include/hw/acpi/ich9.h +@@ -62,9 +62,6 @@ typedef struct ICH9LPCPMRegs { + bool smm_compat; + bool enable_tco; + TCOIORegs tco_regs; +- +- /* RH addition, see bz 1489800 */ +- bool force_rev1_fadt; + } ICH9LPCPMRegs; + + #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 24d4d726d8..f27170b6fb 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -436,7 +436,4 @@ extern const size_t hw_compat_rhel_7_6_len; + extern GlobalProperty hw_compat_rhel_7_5[]; + extern const size_t hw_compat_rhel_7_5_len; + +-extern GlobalProperty hw_compat_rhel_7_4[]; +-extern const size_t hw_compat_rhel_7_4_len; +- + #endif +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 820fb3f577..ff93dfb372 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -128,9 +128,6 @@ struct PCMachineClass { + + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; +- +- /* RH only, see bz 1489800 */ +- bool pc_rom_ro; + }; + + #define TYPE_PC_MACHINE "generic-pc-machine" +@@ -302,9 +299,6 @@ extern const size_t pc_rhel_7_6_compat_len; + extern GlobalProperty pc_rhel_7_5_compat[]; + extern const size_t pc_rhel_7_5_compat_len; + +-extern GlobalProperty pc_rhel_7_4_compat[]; +-extern const size_t pc_rhel_7_4_compat_len; +- + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +-- +2.27.0 + diff --git a/kvm-Remove-RHEL-7.5.0-machine-types.patch b/kvm-Remove-RHEL-7.5.0-machine-types.patch new file mode 100644 index 0000000..ed51c11 --- /dev/null +++ b/kvm-Remove-RHEL-7.5.0-machine-types.patch @@ -0,0 +1,210 @@ +From 6de2502ef6f2ee68842bed7d09b0a2c7ac57b11b Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Aug 2021 12:24:36 +0200 +Subject: [PATCH 08/39] Remove RHEL 7.5.0 machine types + +RH-Author: quintela1 +RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types +RH-Commit: [6/6] a4a72853fda905fe5036520f6095032e308cb51f (juan.quintela/qemu-kvm) +RH-Bugzilla: 1968519 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina + +Signed-off-by: Juan Quintela +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 20 -------------------- + hw/i386/pc.c | 18 ------------------ + hw/i386/pc_piix.c | 20 -------------------- + hw/i386/pc_q35.c | 20 -------------------- + hw/ppc/spapr.c | 31 ------------------------------- + include/hw/boards.h | 3 --- + include/hw/i386/pc.h | 3 --- + 7 files changed, 115 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 6dc2651d73..b271389681 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -195,26 +195,6 @@ GlobalProperty hw_compat_rhel_7_6[] = { + }; + const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + +-/* The same as hw_compat_2_11 + hw_compat_2_10 */ +-GlobalProperty hw_compat_rhel_7_5[] = { +- /* hw_compat_rhel_7_5 from hw_compat_2_11 */ +- { "hpet", "hpet-offset-saved", "false" }, +- /* hw_compat_rhel_7_5 from hw_compat_2_11 */ +- { "virtio-blk-pci", "vectors", "2" }, +- /* hw_compat_rhel_7_5 from hw_compat_2_11 */ +- { "vhost-user-blk-pci", "vectors", "2" }, +- /* hw_compat_rhel_7_5 from hw_compat_2_11 +- bz 1608778 modified for our naming */ +- { "e1000-82540em", "migrate_tso_props", "off" }, +- /* hw_compat_rhel_7_5 from hw_compat_2_10 */ +- { "virtio-mouse-device", "wheel-axis", "false" }, +- /* hw_compat_rhel_7_5 from hw_compat_2_10 */ +- { "virtio-tablet-device", "wheel-axis", "false" }, +- { "cirrus-vga", "vgamem_mb", "16" }, +- { "migration", "decompress-error-check", "off" }, +-}; +-const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); +- + GlobalProperty hw_compat_5_2[] = { + { "ICH9-LPC", "smm-compat", "on"}, + { "PIIX4_PM", "smm-compat", "on"}, +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 6b39abce02..88972dc3e6 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -459,24 +459,6 @@ GlobalProperty pc_rhel_7_6_compat[] = { + }; + const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); + +-/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: +- * - x-hv-max-vps was backported to 7.5 +- * - x-pci-hole64-fix was backported to 7.5 +- */ +-GlobalProperty pc_rhel_7_5_compat[] = { +- /* pc_rhel_7_5_compat from pc_compat_2_11 */ +- { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" }, +- /* pc_rhel_7_5_compat from pc_compat_2_12 */ +- { TYPE_X86_CPU, "legacy-cache", "on" }, +- /* pc_rhel_7_5_compat from pc_compat_2_12 */ +- { TYPE_X86_CPU, "topoext", "off" }, +- /* pc_rhel_7_5_compat from pc_compat_2_12 */ +- { "EPYC-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, +- /* pc_rhel_7_5_compat from pc_compat_2_12 */ +- { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, +-}; +-const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); +- + /* + * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine + * types as the PC_COMPAT_* do for upstream types. +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index fe2ac7593a..ba7926cfae 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -991,23 +991,3 @@ static void pc_machine_rhel760_options(MachineClass *m) + + DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); +- +-static void pc_init_rhel750(MachineState *machine) +-{ +- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ +- TYPE_I440FX_PCI_DEVICE); +-} +- +-static void pc_machine_rhel750_options(MachineClass *m) +-{ +- pc_machine_rhel760_options(m); +- m->alias = NULL; +- m->is_default = 0; +- m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; +- m->auto_enable_numa_with_memhp = false; +- compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); +- compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); +-} +- +-DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, +- pc_machine_rhel750_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 9958ed9184..f93825d603 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -754,23 +754,3 @@ static void pc_q35_machine_rhel760_options(MachineClass *m) + + DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); +- +-static void pc_q35_init_rhel750(MachineState *machine) +-{ +- pc_q35_init(machine); +-} +- +-static void pc_q35_machine_rhel750_options(MachineClass *m) +-{ +- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel760_options(m); +- m->alias = NULL; +- m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; +- m->auto_enable_numa_with_memhp = false; +- pcmc->default_nic_model = "e1000"; +- compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); +- compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); +-} +- +-DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, +- pc_q35_machine_rhel750_options); +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index bdabbf8b81..cda75a8cfe 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5282,35 +5282,4 @@ static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) + + DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); + +-static void spapr_machine_rhel750_class_options(MachineClass *mc) +-{ +- spapr_machine_rhel760_class_options(mc); +- compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); +- +-} +- +-DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); +- +-/* +- * pseries-rhel7.5.0-sxxm +- * +- * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default +- */ +- +-static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) +-{ +- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +- +- spapr_machine_rhel750_class_options(mc); +- smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; +- smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; +- smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; +-} +- +-DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); +-static void spapr_machine_register_types(void) +-{ +- type_register_static(&spapr_machine_info); +-} +- + type_init(spapr_machine_register_types) +diff --git a/include/hw/boards.h b/include/hw/boards.h +index f27170b6fb..73f7dcdb35 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -433,7 +433,4 @@ extern const size_t hw_compat_rhel_8_0_len; + extern GlobalProperty hw_compat_rhel_7_6[]; + extern const size_t hw_compat_rhel_7_6_len; + +-extern GlobalProperty hw_compat_rhel_7_5[]; +-extern const size_t hw_compat_rhel_7_5_len; +- + #endif +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index ff93dfb372..549e581a98 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -296,9 +296,6 @@ extern const size_t pc_rhel_8_0_compat_len; + extern GlobalProperty pc_rhel_7_6_compat[]; + extern const size_t pc_rhel_7_6_compat_len; + +-extern GlobalProperty pc_rhel_7_5_compat[]; +-extern const size_t pc_rhel_7_5_compat_len; +- + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +-- +2.27.0 + diff --git a/kvm-Update-Linux-headers-to-5.13-rc4.patch b/kvm-Update-Linux-headers-to-5.13-rc4.patch new file mode 100644 index 0000000..a06d102 --- /dev/null +++ b/kvm-Update-Linux-headers-to-5.13-rc4.patch @@ -0,0 +1,5465 @@ +From 5df5b2094167f0bc6c728933b990982fe012e33d Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 29 Jul 2021 07:42:25 -0400 +Subject: [PATCH 15/39] Update Linux headers to 5.13-rc4 + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [7/15] 2e8a71b8e0d02d2a16cd6dd2234895a9db59fa0d (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +Signed-off-by: Eduardo Habkost +Message-Id: <20210603191541.2862286-1-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 278f064e452468d66ee15c3f453826e697ec6832) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + .../infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 35 - + include/standard-headers/drm/drm_fourcc.h | 23 +- + include/standard-headers/linux/ethtool.h | 109 ++- + include/standard-headers/linux/fuse.h | 17 +- + include/standard-headers/linux/input.h | 2 +- + include/standard-headers/linux/virtio_bt.h | 31 + + include/standard-headers/linux/virtio_ids.h | 2 + + include/standard-headers/linux/virtio_snd.h | 334 +++++++ + .../standard-headers/rdma/vmw_pvrdma-abi.h | 7 + + linux-headers/asm-generic/unistd.h | 13 +- + linux-headers/asm-mips/unistd_n32.h | 751 +++++++-------- + linux-headers/asm-mips/unistd_n64.h | 703 +++++++------- + linux-headers/asm-mips/unistd_o32.h | 843 ++++++++--------- + linux-headers/asm-powerpc/kvm.h | 2 + + linux-headers/asm-powerpc/unistd_32.h | 856 +++++++++--------- + linux-headers/asm-powerpc/unistd_64.h | 800 ++++++++-------- + linux-headers/asm-s390/unistd_32.h | 4 + + linux-headers/asm-s390/unistd_64.h | 4 + + linux-headers/asm-x86/kvm.h | 3 + + linux-headers/asm-x86/unistd_32.h | 4 + + linux-headers/asm-x86/unistd_64.h | 4 + + linux-headers/asm-x86/unistd_x32.h | 4 + + linux-headers/linux/kvm.h | 131 ++- + linux-headers/linux/userfaultfd.h | 36 +- + linux-headers/linux/vfio.h | 35 + + 25 files changed, 2707 insertions(+), 2046 deletions(-) + create mode 100644 include/standard-headers/linux/virtio_bt.h + create mode 100644 include/standard-headers/linux/virtio_snd.h + +diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +index 1677208a41..94d41b202c 100644 +--- a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h ++++ b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +@@ -70,30 +70,6 @@ enum pvrdma_mtu { + PVRDMA_MTU_4096 = 5, + }; + +-static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu) +-{ +- switch (mtu) { +- case PVRDMA_MTU_256: return 256; +- case PVRDMA_MTU_512: return 512; +- case PVRDMA_MTU_1024: return 1024; +- case PVRDMA_MTU_2048: return 2048; +- case PVRDMA_MTU_4096: return 4096; +- default: return -1; +- } +-} +- +-static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu) +-{ +- switch (mtu) { +- case 256: return PVRDMA_MTU_256; +- case 512: return PVRDMA_MTU_512; +- case 1024: return PVRDMA_MTU_1024; +- case 2048: return PVRDMA_MTU_2048; +- case 4096: +- default: return PVRDMA_MTU_4096; +- } +-} +- + enum pvrdma_port_state { + PVRDMA_PORT_NOP = 0, + PVRDMA_PORT_DOWN = 1, +@@ -138,17 +114,6 @@ enum pvrdma_port_width { + PVRDMA_WIDTH_12X = 8, + }; + +-static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width) +-{ +- switch (width) { +- case PVRDMA_WIDTH_1X: return 1; +- case PVRDMA_WIDTH_4X: return 4; +- case PVRDMA_WIDTH_8X: return 8; +- case PVRDMA_WIDTH_12X: return 12; +- default: return -1; +- } +-} +- + enum pvrdma_port_speed { + PVRDMA_SPEED_SDR = 1, + PVRDMA_SPEED_DDR = 2, +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index c47e19810c..a61ae520c2 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -526,6 +526,25 @@ extern "C" { + */ + #define I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7) + ++/* ++ * Intel Color Control Surface with Clear Color (CCS) for Gen-12 render ++ * compression. ++ * ++ * The main surface is Y-tiled and is at plane index 0 whereas CCS is linear ++ * and at index 1. The clear color is stored at index 2, and the pitch should ++ * be ignored. The clear color structure is 256 bits. The first 128 bits ++ * represents Raw Clear Color Red, Green, Blue and Alpha color each represented ++ * by 32 bits. The raw clear color is consumed by the 3d engine and generates ++ * the converted clear color of size 64 bits. The first 32 bits store the Lower ++ * Converted Clear Color value and the next 32 bits store the Higher Converted ++ * Clear Color value when applicable. The Converted Clear Color values are ++ * consumed by the DE. The last 64 bits are used to store Color Discard Enable ++ * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line ++ * corresponds to an area of 4x1 tiles in the main surface. The main surface ++ * pitch is required to be a multiple of 4 tile widths. ++ */ ++#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8) ++ + /* + * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks + * +@@ -1035,9 +1054,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) + * Not all combinations are valid, and different SoCs may support different + * combinations of layout and options. + */ +-#define __fourcc_mod_amlogic_layout_mask 0xf ++#define __fourcc_mod_amlogic_layout_mask 0xff + #define __fourcc_mod_amlogic_options_shift 8 +-#define __fourcc_mod_amlogic_options_mask 0xf ++#define __fourcc_mod_amlogic_options_mask 0xff + + #define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \ + fourcc_mod_code(AMLOGIC, \ +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index 8bfd01d230..218d944a17 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -26,6 +26,14 @@ + * have the same layout for 32-bit and 64-bit userland. + */ + ++/* Note on reserved space. ++ * Reserved fields must not be accessed directly by user space because ++ * they may be replaced by a different field in the future. They must ++ * be initialized to zero before making the request, e.g. via memset ++ * of the entire structure or implicitly by not being set in a structure ++ * initializer. ++ */ ++ + /** + * struct ethtool_cmd - DEPRECATED, link control and status + * This structure is DEPRECATED, please use struct ethtool_link_settings. +@@ -67,6 +75,7 @@ + * and other link features that the link partner advertised + * through autonegotiation; 0 if unknown or not applicable. + * Read-only. ++ * @reserved: Reserved for future use; see the note on reserved space. + * + * The link speed in Mbps is split between @speed and @speed_hi. Use + * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to +@@ -155,6 +164,7 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep) + * @bus_info: Device bus address. This should match the dev_name() + * string for the underlying bus device, if there is one. May be + * an empty string. ++ * @reserved2: Reserved for future use; see the note on reserved space. + * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and + * %ETHTOOL_SPFLAGS commands; also the number of strings in the + * %ETH_SS_PRIV_FLAGS set +@@ -356,6 +366,7 @@ struct ethtool_eeprom { + * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting + * its tx lpi (after reaching 'idle' state). Effective only when eee + * was negotiated and tx_lpi_enabled was set. ++ * @reserved: Reserved for future use; see the note on reserved space. + */ + struct ethtool_eee { + uint32_t cmd; +@@ -374,6 +385,7 @@ struct ethtool_eee { + * @cmd: %ETHTOOL_GMODULEINFO + * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx + * @eeprom_len: Length of the eeprom ++ * @reserved: Reserved for future use; see the note on reserved space. + * + * This structure is used to return the information to + * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM. +@@ -579,9 +591,7 @@ struct ethtool_pauseparam { + uint32_t tx_pause; + }; + +-/** +- * enum ethtool_link_ext_state - link extended state +- */ ++/* Link extended state */ + enum ethtool_link_ext_state { + ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, +@@ -595,10 +605,7 @@ enum ethtool_link_ext_state { + ETHTOOL_LINK_EXT_STATE_OVERHEAT, + }; + +-/** +- * enum ethtool_link_ext_substate_autoneg - more information in addition to +- * ETHTOOL_LINK_EXT_STATE_AUTONEG. +- */ ++/* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */ + enum ethtool_link_ext_substate_autoneg { + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED, +@@ -608,9 +615,7 @@ enum ethtool_link_ext_substate_autoneg { + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD, + }; + +-/** +- * enum ethtool_link_ext_substate_link_training - more information in addition to +- * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. ++/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. + */ + enum ethtool_link_ext_substate_link_training { + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1, +@@ -619,9 +624,7 @@ enum ethtool_link_ext_substate_link_training { + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT, + }; + +-/** +- * enum ethtool_link_ext_substate_logical_mismatch - more information in addition +- * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. ++/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. + */ + enum ethtool_link_ext_substate_link_logical_mismatch { + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1, +@@ -631,19 +634,14 @@ enum ethtool_link_ext_substate_link_logical_mismatch { + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED, + }; + +-/** +- * enum ethtool_link_ext_substate_bad_signal_integrity - more information in +- * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. ++/* More information in addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. + */ + enum ethtool_link_ext_substate_bad_signal_integrity { + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, + }; + +-/** +- * enum ethtool_link_ext_substate_cable_issue - more information in +- * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. +- */ ++/* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ + enum ethtool_link_ext_substate_cable_issue { + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1, + ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, +@@ -661,6 +659,7 @@ enum ethtool_link_ext_substate_cable_issue { + * now deprecated + * @ETH_SS_FEATURES: Device feature names + * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names ++ * @ETH_SS_TUNABLES: tunable names + * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS + * @ETH_SS_PHY_TUNABLES: PHY tunable names + * @ETH_SS_LINK_MODES: link mode names +@@ -670,6 +669,13 @@ enum ethtool_link_ext_substate_cable_issue { + * @ETH_SS_TS_TX_TYPES: timestamping Tx types + * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters + * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types ++ * @ETH_SS_STATS_STD: standardized stats ++ * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics ++ * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics ++ * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics ++ * @ETH_SS_STATS_RMON: names of RMON statistics ++ * ++ * @ETH_SS_COUNT: number of defined string sets + */ + enum ethtool_stringset { + ETH_SS_TEST = 0, +@@ -688,6 +694,11 @@ enum ethtool_stringset { + ETH_SS_TS_TX_TYPES, + ETH_SS_TS_RX_FILTERS, + ETH_SS_UDP_TUNNEL_TYPES, ++ ETH_SS_STATS_STD, ++ ETH_SS_STATS_ETH_PHY, ++ ETH_SS_STATS_ETH_MAC, ++ ETH_SS_STATS_ETH_CTRL, ++ ETH_SS_STATS_RMON, + + /* add new constants above here */ + ETH_SS_COUNT +@@ -715,6 +726,7 @@ struct ethtool_gstrings { + /** + * struct ethtool_sset_info - string set information + * @cmd: Command number = %ETHTOOL_GSSET_INFO ++ * @reserved: Reserved for future use; see the note on reserved space. + * @sset_mask: On entry, a bitmask of string sets to query, with bits + * numbered according to &enum ethtool_stringset. On return, a + * bitmask of those string sets queried that are supported. +@@ -759,6 +771,7 @@ enum ethtool_test_flags { + * @flags: A bitmask of flags from &enum ethtool_test_flags. Some + * flags may be set by the user on entry; others may be set by + * the driver on return. ++ * @reserved: Reserved for future use; see the note on reserved space. + * @len: On return, the number of test results + * @data: Array of test results + * +@@ -959,6 +972,7 @@ union ethtool_flow_union { + * @vlan_etype: VLAN EtherType + * @vlan_tci: VLAN tag control information + * @data: user defined data ++ * @padding: Reserved for future use; see the note on reserved space. + * + * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT + * is set in &struct ethtool_rx_flow_spec @flow_type. +@@ -1134,7 +1148,8 @@ struct ethtool_rxfh_indir { + * hardware hash key. + * @hfunc: Defines the current RSS hash function used by HW (or to be set to). + * Valid values are one of the %ETH_RSS_HASH_*. +- * @rsvd: Reserved for future extensions. ++ * @rsvd8: Reserved for future use; see the note on reserved space. ++ * @rsvd32: Reserved for future use; see the note on reserved space. + * @rss_config: RX ring/queue index for each hash value i.e., indirection table + * of @indir_size uint32_t elements, followed by hash key of @key_size + * bytes. +@@ -1302,7 +1317,9 @@ struct ethtool_sfeatures { + * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags + * @phc_index: device index of the associated PHC, or -1 if there is none + * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values ++ * @tx_reserved: Reserved for future use; see the note on reserved space. + * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values ++ * @rx_reserved: Reserved for future use; see the note on reserved space. + * + * The bits in the 'tx_types' and 'rx_filters' fields correspond to + * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values, +@@ -1376,15 +1393,33 @@ struct ethtool_per_queue_op { + }; + + /** +- * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters ++ * struct ethtool_fecparam - Ethernet Forward Error Correction parameters + * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM +- * @active_fec: FEC mode which is active on porte +- * @fec: Bitmask of supported/configured FEC modes +- * @rsvd: Reserved for future extensions. i.e FEC bypass feature. ++ * @active_fec: FEC mode which is active on the port, single bit set, GET only. ++ * @fec: Bitmask of configured FEC modes. ++ * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET. + * +- * Drivers should reject a non-zero setting of @autoneg when +- * autoneogotiation is disabled (or not supported) for the link. ++ * Note that @reserved was never validated on input and ethtool user space ++ * left it uninitialized when calling SET. Hence going forward it can only be ++ * used to return a value to userspace with GET. ++ * ++ * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS. ++ * FEC settings are configured by link autonegotiation whenever it's enabled. ++ * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode. ++ * ++ * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings. ++ * It is recommended that drivers only accept a single bit set in @fec. ++ * When multiple bits are set in @fec drivers may pick mode in an implementation ++ * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other ++ * FEC modes, because it's unclear whether in this case other modes constrain ++ * AUTO or are independent choices. ++ * Drivers must reject SET requests if they support none of the requested modes. ++ * ++ * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead ++ * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM. + * ++ * See enum ethtool_fec_config_bits for definition of valid bits for both ++ * @fec and @active_fec. + */ + struct ethtool_fecparam { + uint32_t cmd; +@@ -1396,11 +1431,16 @@ struct ethtool_fecparam { + + /** + * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration +- * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported +- * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver +- * @ETHTOOL_FEC_OFF: No FEC Mode +- * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode +- * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode ++ * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not ++ * be used together with other bits. GET only. ++ * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually ++ * based link mode and SFP parameters read from module's ++ * EEPROM. This bit does _not_ mean autonegotiation. ++ * @ETHTOOL_FEC_OFF_BIT: No FEC Mode ++ * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode ++ * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode ++ * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet ++ * Consortium) + */ + enum ethtool_fec_config_bits { + ETHTOOL_FEC_NONE_BIT, +@@ -1958,6 +1998,11 @@ enum ethtool_reset_flags { + * autonegotiation; 0 if unknown or not applicable. Read-only. + * @transceiver: Used to distinguish different possible PHY types, + * reported consistently by PHYLIB. Read-only. ++ * @master_slave_cfg: Master/slave port mode. ++ * @master_slave_state: Master/slave port state. ++ * @reserved: Reserved for future use; see the note on reserved space. ++ * @reserved1: Reserved for future use; see the note on reserved space. ++ * @link_mode_masks: Variable length bitmaps. + * + * If autonegotiation is disabled, the speed and @duplex represent the + * fixed link mode and are writable if the driver supports multiple +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index 950d7edb7e..cce105bfba 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -179,6 +179,8 @@ + * 7.33 + * - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID + * - add FUSE_OPEN_KILL_SUIDGID ++ * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT ++ * - add FUSE_SETXATTR_ACL_KILL_SGID + */ + + #ifndef _LINUX_FUSE_H +@@ -326,6 +328,7 @@ struct fuse_file_lock { + * does not have CAP_FSETID. Additionally upon + * write/truncate sgid is killed only if file has group + * execute permission. (Same as Linux VFS behavior). ++ * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -356,6 +359,7 @@ struct fuse_file_lock { + #define FUSE_MAP_ALIGNMENT (1 << 26) + #define FUSE_SUBMOUNTS (1 << 27) + #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) ++#define FUSE_SETXATTR_EXT (1 << 29) + + /** + * CUSE INIT request/reply flags +@@ -447,6 +451,12 @@ struct fuse_file_lock { + */ + #define FUSE_OPEN_KILL_SUIDGID (1 << 0) + ++/** ++ * setxattr flags ++ * FUSE_SETXATTR_ACL_KILL_SGID: Clear SGID when system.posix_acl_access is set ++ */ ++#define FUSE_SETXATTR_ACL_KILL_SGID (1 << 0) ++ + enum fuse_opcode { + FUSE_LOOKUP = 1, + FUSE_FORGET = 2, /* no reply */ +@@ -677,9 +687,13 @@ struct fuse_fsync_in { + uint32_t padding; + }; + ++#define FUSE_COMPAT_SETXATTR_IN_SIZE 8 ++ + struct fuse_setxattr_in { + uint32_t size; + uint32_t flags; ++ uint32_t setxattr_flags; ++ uint32_t padding; + }; + + struct fuse_getxattr_in { +@@ -899,7 +913,8 @@ struct fuse_notify_retrieve_in { + }; + + /* Device ioctls: */ +-#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) ++#define FUSE_DEV_IOC_MAGIC 229 ++#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) + + struct fuse_lseek_in { + uint64_t fh; +diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h +index f89c986190..7822c24178 100644 +--- a/include/standard-headers/linux/input.h ++++ b/include/standard-headers/linux/input.h +@@ -81,7 +81,7 @@ struct input_id { + * in units per radian. + * When INPUT_PROP_ACCELEROMETER is set the resolution changes. + * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in +- * in units per g (units/g) and in units per degree per second ++ * units per g (units/g) and in units per degree per second + * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ). + */ + struct input_absinfo { +diff --git a/include/standard-headers/linux/virtio_bt.h b/include/standard-headers/linux/virtio_bt.h +new file mode 100644 +index 0000000000..245e1eff4b +--- /dev/null ++++ b/include/standard-headers/linux/virtio_bt.h +@@ -0,0 +1,31 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++ ++#ifndef _LINUX_VIRTIO_BT_H ++#define _LINUX_VIRTIO_BT_H ++ ++#include "standard-headers/linux/virtio_types.h" ++ ++/* Feature bits */ ++#define VIRTIO_BT_F_VND_HCI 0 /* Indicates vendor command support */ ++#define VIRTIO_BT_F_MSFT_EXT 1 /* Indicates MSFT vendor support */ ++#define VIRTIO_BT_F_AOSP_EXT 2 /* Indicates AOSP vendor support */ ++ ++enum virtio_bt_config_type { ++ VIRTIO_BT_CONFIG_TYPE_PRIMARY = 0, ++ VIRTIO_BT_CONFIG_TYPE_AMP = 1, ++}; ++ ++enum virtio_bt_config_vendor { ++ VIRTIO_BT_CONFIG_VENDOR_NONE = 0, ++ VIRTIO_BT_CONFIG_VENDOR_ZEPHYR = 1, ++ VIRTIO_BT_CONFIG_VENDOR_INTEL = 2, ++ VIRTIO_BT_CONFIG_VENDOR_REALTEK = 3, ++}; ++ ++struct virtio_bt_config { ++ uint8_t type; ++ uint16_t vendor; ++ uint16_t msft_opcode; ++} QEMU_PACKED; ++ ++#endif /* _LINUX_VIRTIO_BT_H */ +diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h +index bc1c0621f5..f0c35ce862 100644 +--- a/include/standard-headers/linux/virtio_ids.h ++++ b/include/standard-headers/linux/virtio_ids.h +@@ -51,8 +51,10 @@ + #define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ + #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ + #define VIRTIO_ID_MEM 24 /* virtio mem */ ++#define VIRTIO_ID_SOUND 25 /* virtio sound */ + #define VIRTIO_ID_FS 26 /* virtio filesystem */ + #define VIRTIO_ID_PMEM 27 /* virtio pmem */ ++#define VIRTIO_ID_BT 28 /* virtio bluetooth */ + #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ + + #endif /* _LINUX_VIRTIO_IDS_H */ +diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h +new file mode 100644 +index 0000000000..1af96b9fc6 +--- /dev/null ++++ b/include/standard-headers/linux/virtio_snd.h +@@ -0,0 +1,334 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (C) 2021 OpenSynergy GmbH ++ */ ++#ifndef VIRTIO_SND_IF_H ++#define VIRTIO_SND_IF_H ++ ++#include "standard-headers/linux/virtio_types.h" ++ ++/******************************************************************************* ++ * CONFIGURATION SPACE ++ */ ++struct virtio_snd_config { ++ /* # of available physical jacks */ ++ uint32_t jacks; ++ /* # of available PCM streams */ ++ uint32_t streams; ++ /* # of available channel maps */ ++ uint32_t chmaps; ++}; ++ ++enum { ++ /* device virtqueue indexes */ ++ VIRTIO_SND_VQ_CONTROL = 0, ++ VIRTIO_SND_VQ_EVENT, ++ VIRTIO_SND_VQ_TX, ++ VIRTIO_SND_VQ_RX, ++ /* # of device virtqueues */ ++ VIRTIO_SND_VQ_MAX ++}; ++ ++/******************************************************************************* ++ * COMMON DEFINITIONS ++ */ ++ ++/* supported dataflow directions */ ++enum { ++ VIRTIO_SND_D_OUTPUT = 0, ++ VIRTIO_SND_D_INPUT ++}; ++ ++enum { ++ /* jack control request types */ ++ VIRTIO_SND_R_JACK_INFO = 1, ++ VIRTIO_SND_R_JACK_REMAP, ++ ++ /* PCM control request types */ ++ VIRTIO_SND_R_PCM_INFO = 0x0100, ++ VIRTIO_SND_R_PCM_SET_PARAMS, ++ VIRTIO_SND_R_PCM_PREPARE, ++ VIRTIO_SND_R_PCM_RELEASE, ++ VIRTIO_SND_R_PCM_START, ++ VIRTIO_SND_R_PCM_STOP, ++ ++ /* channel map control request types */ ++ VIRTIO_SND_R_CHMAP_INFO = 0x0200, ++ ++ /* jack event types */ ++ VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, ++ VIRTIO_SND_EVT_JACK_DISCONNECTED, ++ ++ /* PCM event types */ ++ VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, ++ VIRTIO_SND_EVT_PCM_XRUN, ++ ++ /* common status codes */ ++ VIRTIO_SND_S_OK = 0x8000, ++ VIRTIO_SND_S_BAD_MSG, ++ VIRTIO_SND_S_NOT_SUPP, ++ VIRTIO_SND_S_IO_ERR ++}; ++ ++/* common header */ ++struct virtio_snd_hdr { ++ uint32_t code; ++}; ++ ++/* event notification */ ++struct virtio_snd_event { ++ /* VIRTIO_SND_EVT_XXX */ ++ struct virtio_snd_hdr hdr; ++ /* optional event data */ ++ uint32_t data; ++}; ++ ++/* common control request to query an item information */ ++struct virtio_snd_query_info { ++ /* VIRTIO_SND_R_XXX_INFO */ ++ struct virtio_snd_hdr hdr; ++ /* item start identifier */ ++ uint32_t start_id; ++ /* item count to query */ ++ uint32_t count; ++ /* item information size in bytes */ ++ uint32_t size; ++}; ++ ++/* common item information header */ ++struct virtio_snd_info { ++ /* function group node id (High Definition Audio Specification 7.1.2) */ ++ uint32_t hda_fn_nid; ++}; ++ ++/******************************************************************************* ++ * JACK CONTROL MESSAGES ++ */ ++struct virtio_snd_jack_hdr { ++ /* VIRTIO_SND_R_JACK_XXX */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::jacks - 1 */ ++ uint32_t jack_id; ++}; ++ ++/* supported jack features */ ++enum { ++ VIRTIO_SND_JACK_F_REMAP = 0 ++}; ++ ++struct virtio_snd_jack_info { ++ /* common header */ ++ struct virtio_snd_info hdr; ++ /* supported feature bit map (1 << VIRTIO_SND_JACK_F_XXX) */ ++ uint32_t features; ++ /* pin configuration (High Definition Audio Specification 7.3.3.31) */ ++ uint32_t hda_reg_defconf; ++ /* pin capabilities (High Definition Audio Specification 7.3.4.9) */ ++ uint32_t hda_reg_caps; ++ /* current jack connection status (0: disconnected, 1: connected) */ ++ uint8_t connected; ++ ++ uint8_t padding[7]; ++}; ++ ++/* jack remapping control request */ ++struct virtio_snd_jack_remap { ++ /* .code = VIRTIO_SND_R_JACK_REMAP */ ++ struct virtio_snd_jack_hdr hdr; ++ /* selected association number */ ++ uint32_t association; ++ /* selected sequence number */ ++ uint32_t sequence; ++}; ++ ++/******************************************************************************* ++ * PCM CONTROL MESSAGES ++ */ ++struct virtio_snd_pcm_hdr { ++ /* VIRTIO_SND_R_PCM_XXX */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::streams - 1 */ ++ uint32_t stream_id; ++}; ++ ++/* supported PCM stream features */ ++enum { ++ VIRTIO_SND_PCM_F_SHMEM_HOST = 0, ++ VIRTIO_SND_PCM_F_SHMEM_GUEST, ++ VIRTIO_SND_PCM_F_MSG_POLLING, ++ VIRTIO_SND_PCM_F_EVT_SHMEM_PERIODS, ++ VIRTIO_SND_PCM_F_EVT_XRUNS ++}; ++ ++/* supported PCM sample formats */ ++enum { ++ /* analog formats (width / physical width) */ ++ VIRTIO_SND_PCM_FMT_IMA_ADPCM = 0, /* 4 / 4 bits */ ++ VIRTIO_SND_PCM_FMT_MU_LAW, /* 8 / 8 bits */ ++ VIRTIO_SND_PCM_FMT_A_LAW, /* 8 / 8 bits */ ++ VIRTIO_SND_PCM_FMT_S8, /* 8 / 8 bits */ ++ VIRTIO_SND_PCM_FMT_U8, /* 8 / 8 bits */ ++ VIRTIO_SND_PCM_FMT_S16, /* 16 / 16 bits */ ++ VIRTIO_SND_PCM_FMT_U16, /* 16 / 16 bits */ ++ VIRTIO_SND_PCM_FMT_S18_3, /* 18 / 24 bits */ ++ VIRTIO_SND_PCM_FMT_U18_3, /* 18 / 24 bits */ ++ VIRTIO_SND_PCM_FMT_S20_3, /* 20 / 24 bits */ ++ VIRTIO_SND_PCM_FMT_U20_3, /* 20 / 24 bits */ ++ VIRTIO_SND_PCM_FMT_S24_3, /* 24 / 24 bits */ ++ VIRTIO_SND_PCM_FMT_U24_3, /* 24 / 24 bits */ ++ VIRTIO_SND_PCM_FMT_S20, /* 20 / 32 bits */ ++ VIRTIO_SND_PCM_FMT_U20, /* 20 / 32 bits */ ++ VIRTIO_SND_PCM_FMT_S24, /* 24 / 32 bits */ ++ VIRTIO_SND_PCM_FMT_U24, /* 24 / 32 bits */ ++ VIRTIO_SND_PCM_FMT_S32, /* 32 / 32 bits */ ++ VIRTIO_SND_PCM_FMT_U32, /* 32 / 32 bits */ ++ VIRTIO_SND_PCM_FMT_FLOAT, /* 32 / 32 bits */ ++ VIRTIO_SND_PCM_FMT_FLOAT64, /* 64 / 64 bits */ ++ /* digital formats (width / physical width) */ ++ VIRTIO_SND_PCM_FMT_DSD_U8, /* 8 / 8 bits */ ++ VIRTIO_SND_PCM_FMT_DSD_U16, /* 16 / 16 bits */ ++ VIRTIO_SND_PCM_FMT_DSD_U32, /* 32 / 32 bits */ ++ VIRTIO_SND_PCM_FMT_IEC958_SUBFRAME /* 32 / 32 bits */ ++}; ++ ++/* supported PCM frame rates */ ++enum { ++ VIRTIO_SND_PCM_RATE_5512 = 0, ++ VIRTIO_SND_PCM_RATE_8000, ++ VIRTIO_SND_PCM_RATE_11025, ++ VIRTIO_SND_PCM_RATE_16000, ++ VIRTIO_SND_PCM_RATE_22050, ++ VIRTIO_SND_PCM_RATE_32000, ++ VIRTIO_SND_PCM_RATE_44100, ++ VIRTIO_SND_PCM_RATE_48000, ++ VIRTIO_SND_PCM_RATE_64000, ++ VIRTIO_SND_PCM_RATE_88200, ++ VIRTIO_SND_PCM_RATE_96000, ++ VIRTIO_SND_PCM_RATE_176400, ++ VIRTIO_SND_PCM_RATE_192000, ++ VIRTIO_SND_PCM_RATE_384000 ++}; ++ ++struct virtio_snd_pcm_info { ++ /* common header */ ++ struct virtio_snd_info hdr; ++ /* supported feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */ ++ uint32_t features; ++ /* supported sample format bit map (1 << VIRTIO_SND_PCM_FMT_XXX) */ ++ uint64_t formats; ++ /* supported frame rate bit map (1 << VIRTIO_SND_PCM_RATE_XXX) */ ++ uint64_t rates; ++ /* dataflow direction (VIRTIO_SND_D_XXX) */ ++ uint8_t direction; ++ /* minimum # of supported channels */ ++ uint8_t channels_min; ++ /* maximum # of supported channels */ ++ uint8_t channels_max; ++ ++ uint8_t padding[5]; ++}; ++ ++/* set PCM stream format */ ++struct virtio_snd_pcm_set_params { ++ /* .code = VIRTIO_SND_R_PCM_SET_PARAMS */ ++ struct virtio_snd_pcm_hdr hdr; ++ /* size of the hardware buffer */ ++ uint32_t buffer_bytes; ++ /* size of the hardware period */ ++ uint32_t period_bytes; ++ /* selected feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */ ++ uint32_t features; ++ /* selected # of channels */ ++ uint8_t channels; ++ /* selected sample format (VIRTIO_SND_PCM_FMT_XXX) */ ++ uint8_t format; ++ /* selected frame rate (VIRTIO_SND_PCM_RATE_XXX) */ ++ uint8_t rate; ++ ++ uint8_t padding; ++}; ++ ++/******************************************************************************* ++ * PCM I/O MESSAGES ++ */ ++ ++/* I/O request header */ ++struct virtio_snd_pcm_xfer { ++ /* 0 ... virtio_snd_config::streams - 1 */ ++ uint32_t stream_id; ++}; ++ ++/* I/O request status */ ++struct virtio_snd_pcm_status { ++ /* VIRTIO_SND_S_XXX */ ++ uint32_t status; ++ /* current device latency */ ++ uint32_t latency_bytes; ++}; ++ ++/******************************************************************************* ++ * CHANNEL MAP CONTROL MESSAGES ++ */ ++struct virtio_snd_chmap_hdr { ++ /* VIRTIO_SND_R_CHMAP_XXX */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::chmaps - 1 */ ++ uint32_t chmap_id; ++}; ++ ++/* standard channel position definition */ ++enum { ++ VIRTIO_SND_CHMAP_NONE = 0, /* undefined */ ++ VIRTIO_SND_CHMAP_NA, /* silent */ ++ VIRTIO_SND_CHMAP_MONO, /* mono stream */ ++ VIRTIO_SND_CHMAP_FL, /* front left */ ++ VIRTIO_SND_CHMAP_FR, /* front right */ ++ VIRTIO_SND_CHMAP_RL, /* rear left */ ++ VIRTIO_SND_CHMAP_RR, /* rear right */ ++ VIRTIO_SND_CHMAP_FC, /* front center */ ++ VIRTIO_SND_CHMAP_LFE, /* low frequency (LFE) */ ++ VIRTIO_SND_CHMAP_SL, /* side left */ ++ VIRTIO_SND_CHMAP_SR, /* side right */ ++ VIRTIO_SND_CHMAP_RC, /* rear center */ ++ VIRTIO_SND_CHMAP_FLC, /* front left center */ ++ VIRTIO_SND_CHMAP_FRC, /* front right center */ ++ VIRTIO_SND_CHMAP_RLC, /* rear left center */ ++ VIRTIO_SND_CHMAP_RRC, /* rear right center */ ++ VIRTIO_SND_CHMAP_FLW, /* front left wide */ ++ VIRTIO_SND_CHMAP_FRW, /* front right wide */ ++ VIRTIO_SND_CHMAP_FLH, /* front left high */ ++ VIRTIO_SND_CHMAP_FCH, /* front center high */ ++ VIRTIO_SND_CHMAP_FRH, /* front right high */ ++ VIRTIO_SND_CHMAP_TC, /* top center */ ++ VIRTIO_SND_CHMAP_TFL, /* top front left */ ++ VIRTIO_SND_CHMAP_TFR, /* top front right */ ++ VIRTIO_SND_CHMAP_TFC, /* top front center */ ++ VIRTIO_SND_CHMAP_TRL, /* top rear left */ ++ VIRTIO_SND_CHMAP_TRR, /* top rear right */ ++ VIRTIO_SND_CHMAP_TRC, /* top rear center */ ++ VIRTIO_SND_CHMAP_TFLC, /* top front left center */ ++ VIRTIO_SND_CHMAP_TFRC, /* top front right center */ ++ VIRTIO_SND_CHMAP_TSL, /* top side left */ ++ VIRTIO_SND_CHMAP_TSR, /* top side right */ ++ VIRTIO_SND_CHMAP_LLFE, /* left LFE */ ++ VIRTIO_SND_CHMAP_RLFE, /* right LFE */ ++ VIRTIO_SND_CHMAP_BC, /* bottom center */ ++ VIRTIO_SND_CHMAP_BLC, /* bottom left center */ ++ VIRTIO_SND_CHMAP_BRC /* bottom right center */ ++}; ++ ++/* maximum possible number of channels */ ++#define VIRTIO_SND_CHMAP_MAX_SIZE 18 ++ ++struct virtio_snd_chmap_info { ++ /* common header */ ++ struct virtio_snd_info hdr; ++ /* dataflow direction (VIRTIO_SND_D_XXX) */ ++ uint8_t direction; ++ /* # of valid channel position values */ ++ uint8_t channels; ++ /* channel position values (VIRTIO_SND_CHMAP_XXX) */ ++ uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; ++}; ++ ++#endif /* VIRTIO_SND_IF_H */ +diff --git a/include/standard-headers/rdma/vmw_pvrdma-abi.h b/include/standard-headers/rdma/vmw_pvrdma-abi.h +index 0989426a3f..c30182a7ae 100644 +--- a/include/standard-headers/rdma/vmw_pvrdma-abi.h ++++ b/include/standard-headers/rdma/vmw_pvrdma-abi.h +@@ -133,6 +133,13 @@ enum pvrdma_wc_flags { + PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, + }; + ++enum pvrdma_network_type { ++ PVRDMA_NETWORK_IB, ++ PVRDMA_NETWORK_ROCE_V1 = PVRDMA_NETWORK_IB, ++ PVRDMA_NETWORK_IPV4, ++ PVRDMA_NETWORK_IPV6 ++}; ++ + struct pvrdma_alloc_ucontext_resp { + uint32_t qp_tab_size; + uint32_t reserved; +diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h +index 7287529177..6de5a7fc06 100644 +--- a/linux-headers/asm-generic/unistd.h ++++ b/linux-headers/asm-generic/unistd.h +@@ -861,9 +861,20 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2) + __SYSCALL(__NR_process_madvise, sys_process_madvise) + #define __NR_epoll_pwait2 441 + __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) ++#define __NR_mount_setattr 442 ++__SYSCALL(__NR_mount_setattr, sys_mount_setattr) ++#define __NR_quotactl_path 443 ++__SYSCALL(__NR_quotactl_path, sys_quotactl_path) ++ ++#define __NR_landlock_create_ruleset 444 ++__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) ++#define __NR_landlock_add_rule 445 ++__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) ++#define __NR_landlock_restrict_self 446 ++__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) + + #undef __NR_syscalls +-#define __NR_syscalls 442 ++#define __NR_syscalls 447 + + /* + * 32 bit systems traditionally used different +diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h +index 59e53b6e07..fce51fee09 100644 +--- a/linux-headers/asm-mips/unistd_n32.h ++++ b/linux-headers/asm-mips/unistd_n32.h +@@ -1,376 +1,379 @@ +-#ifndef _ASM_MIPS_UNISTD_N32_H +-#define _ASM_MIPS_UNISTD_N32_H ++#ifndef _ASM_UNISTD_N32_H ++#define _ASM_UNISTD_N32_H + +-#define __NR_read (__NR_Linux + 0) +-#define __NR_write (__NR_Linux + 1) +-#define __NR_open (__NR_Linux + 2) +-#define __NR_close (__NR_Linux + 3) +-#define __NR_stat (__NR_Linux + 4) +-#define __NR_fstat (__NR_Linux + 5) +-#define __NR_lstat (__NR_Linux + 6) +-#define __NR_poll (__NR_Linux + 7) +-#define __NR_lseek (__NR_Linux + 8) +-#define __NR_mmap (__NR_Linux + 9) +-#define __NR_mprotect (__NR_Linux + 10) +-#define __NR_munmap (__NR_Linux + 11) +-#define __NR_brk (__NR_Linux + 12) +-#define __NR_rt_sigaction (__NR_Linux + 13) +-#define __NR_rt_sigprocmask (__NR_Linux + 14) +-#define __NR_ioctl (__NR_Linux + 15) +-#define __NR_pread64 (__NR_Linux + 16) +-#define __NR_pwrite64 (__NR_Linux + 17) +-#define __NR_readv (__NR_Linux + 18) +-#define __NR_writev (__NR_Linux + 19) +-#define __NR_access (__NR_Linux + 20) +-#define __NR_pipe (__NR_Linux + 21) +-#define __NR__newselect (__NR_Linux + 22) +-#define __NR_sched_yield (__NR_Linux + 23) +-#define __NR_mremap (__NR_Linux + 24) +-#define __NR_msync (__NR_Linux + 25) +-#define __NR_mincore (__NR_Linux + 26) +-#define __NR_madvise (__NR_Linux + 27) +-#define __NR_shmget (__NR_Linux + 28) +-#define __NR_shmat (__NR_Linux + 29) +-#define __NR_shmctl (__NR_Linux + 30) +-#define __NR_dup (__NR_Linux + 31) +-#define __NR_dup2 (__NR_Linux + 32) +-#define __NR_pause (__NR_Linux + 33) +-#define __NR_nanosleep (__NR_Linux + 34) +-#define __NR_getitimer (__NR_Linux + 35) +-#define __NR_setitimer (__NR_Linux + 36) +-#define __NR_alarm (__NR_Linux + 37) +-#define __NR_getpid (__NR_Linux + 38) +-#define __NR_sendfile (__NR_Linux + 39) +-#define __NR_socket (__NR_Linux + 40) +-#define __NR_connect (__NR_Linux + 41) +-#define __NR_accept (__NR_Linux + 42) +-#define __NR_sendto (__NR_Linux + 43) +-#define __NR_recvfrom (__NR_Linux + 44) +-#define __NR_sendmsg (__NR_Linux + 45) +-#define __NR_recvmsg (__NR_Linux + 46) +-#define __NR_shutdown (__NR_Linux + 47) +-#define __NR_bind (__NR_Linux + 48) +-#define __NR_listen (__NR_Linux + 49) +-#define __NR_getsockname (__NR_Linux + 50) +-#define __NR_getpeername (__NR_Linux + 51) +-#define __NR_socketpair (__NR_Linux + 52) +-#define __NR_setsockopt (__NR_Linux + 53) +-#define __NR_getsockopt (__NR_Linux + 54) +-#define __NR_clone (__NR_Linux + 55) +-#define __NR_fork (__NR_Linux + 56) +-#define __NR_execve (__NR_Linux + 57) +-#define __NR_exit (__NR_Linux + 58) +-#define __NR_wait4 (__NR_Linux + 59) +-#define __NR_kill (__NR_Linux + 60) +-#define __NR_uname (__NR_Linux + 61) +-#define __NR_semget (__NR_Linux + 62) +-#define __NR_semop (__NR_Linux + 63) +-#define __NR_semctl (__NR_Linux + 64) +-#define __NR_shmdt (__NR_Linux + 65) +-#define __NR_msgget (__NR_Linux + 66) +-#define __NR_msgsnd (__NR_Linux + 67) +-#define __NR_msgrcv (__NR_Linux + 68) +-#define __NR_msgctl (__NR_Linux + 69) +-#define __NR_fcntl (__NR_Linux + 70) +-#define __NR_flock (__NR_Linux + 71) +-#define __NR_fsync (__NR_Linux + 72) +-#define __NR_fdatasync (__NR_Linux + 73) +-#define __NR_truncate (__NR_Linux + 74) +-#define __NR_ftruncate (__NR_Linux + 75) +-#define __NR_getdents (__NR_Linux + 76) +-#define __NR_getcwd (__NR_Linux + 77) +-#define __NR_chdir (__NR_Linux + 78) +-#define __NR_fchdir (__NR_Linux + 79) +-#define __NR_rename (__NR_Linux + 80) +-#define __NR_mkdir (__NR_Linux + 81) +-#define __NR_rmdir (__NR_Linux + 82) +-#define __NR_creat (__NR_Linux + 83) +-#define __NR_link (__NR_Linux + 84) +-#define __NR_unlink (__NR_Linux + 85) +-#define __NR_symlink (__NR_Linux + 86) +-#define __NR_readlink (__NR_Linux + 87) +-#define __NR_chmod (__NR_Linux + 88) +-#define __NR_fchmod (__NR_Linux + 89) +-#define __NR_chown (__NR_Linux + 90) +-#define __NR_fchown (__NR_Linux + 91) +-#define __NR_lchown (__NR_Linux + 92) +-#define __NR_umask (__NR_Linux + 93) +-#define __NR_gettimeofday (__NR_Linux + 94) +-#define __NR_getrlimit (__NR_Linux + 95) +-#define __NR_getrusage (__NR_Linux + 96) +-#define __NR_sysinfo (__NR_Linux + 97) +-#define __NR_times (__NR_Linux + 98) +-#define __NR_ptrace (__NR_Linux + 99) +-#define __NR_getuid (__NR_Linux + 100) +-#define __NR_syslog (__NR_Linux + 101) +-#define __NR_getgid (__NR_Linux + 102) +-#define __NR_setuid (__NR_Linux + 103) +-#define __NR_setgid (__NR_Linux + 104) +-#define __NR_geteuid (__NR_Linux + 105) +-#define __NR_getegid (__NR_Linux + 106) +-#define __NR_setpgid (__NR_Linux + 107) +-#define __NR_getppid (__NR_Linux + 108) +-#define __NR_getpgrp (__NR_Linux + 109) +-#define __NR_setsid (__NR_Linux + 110) +-#define __NR_setreuid (__NR_Linux + 111) +-#define __NR_setregid (__NR_Linux + 112) +-#define __NR_getgroups (__NR_Linux + 113) +-#define __NR_setgroups (__NR_Linux + 114) +-#define __NR_setresuid (__NR_Linux + 115) +-#define __NR_getresuid (__NR_Linux + 116) +-#define __NR_setresgid (__NR_Linux + 117) +-#define __NR_getresgid (__NR_Linux + 118) +-#define __NR_getpgid (__NR_Linux + 119) +-#define __NR_setfsuid (__NR_Linux + 120) +-#define __NR_setfsgid (__NR_Linux + 121) +-#define __NR_getsid (__NR_Linux + 122) +-#define __NR_capget (__NR_Linux + 123) +-#define __NR_capset (__NR_Linux + 124) +-#define __NR_rt_sigpending (__NR_Linux + 125) +-#define __NR_rt_sigtimedwait (__NR_Linux + 126) +-#define __NR_rt_sigqueueinfo (__NR_Linux + 127) +-#define __NR_rt_sigsuspend (__NR_Linux + 128) +-#define __NR_sigaltstack (__NR_Linux + 129) +-#define __NR_utime (__NR_Linux + 130) +-#define __NR_mknod (__NR_Linux + 131) +-#define __NR_personality (__NR_Linux + 132) +-#define __NR_ustat (__NR_Linux + 133) +-#define __NR_statfs (__NR_Linux + 134) +-#define __NR_fstatfs (__NR_Linux + 135) +-#define __NR_sysfs (__NR_Linux + 136) +-#define __NR_getpriority (__NR_Linux + 137) +-#define __NR_setpriority (__NR_Linux + 138) +-#define __NR_sched_setparam (__NR_Linux + 139) +-#define __NR_sched_getparam (__NR_Linux + 140) +-#define __NR_sched_setscheduler (__NR_Linux + 141) +-#define __NR_sched_getscheduler (__NR_Linux + 142) +-#define __NR_sched_get_priority_max (__NR_Linux + 143) +-#define __NR_sched_get_priority_min (__NR_Linux + 144) +-#define __NR_sched_rr_get_interval (__NR_Linux + 145) +-#define __NR_mlock (__NR_Linux + 146) +-#define __NR_munlock (__NR_Linux + 147) +-#define __NR_mlockall (__NR_Linux + 148) +-#define __NR_munlockall (__NR_Linux + 149) +-#define __NR_vhangup (__NR_Linux + 150) +-#define __NR_pivot_root (__NR_Linux + 151) +-#define __NR__sysctl (__NR_Linux + 152) +-#define __NR_prctl (__NR_Linux + 153) +-#define __NR_adjtimex (__NR_Linux + 154) +-#define __NR_setrlimit (__NR_Linux + 155) +-#define __NR_chroot (__NR_Linux + 156) +-#define __NR_sync (__NR_Linux + 157) +-#define __NR_acct (__NR_Linux + 158) +-#define __NR_settimeofday (__NR_Linux + 159) +-#define __NR_mount (__NR_Linux + 160) +-#define __NR_umount2 (__NR_Linux + 161) +-#define __NR_swapon (__NR_Linux + 162) +-#define __NR_swapoff (__NR_Linux + 163) +-#define __NR_reboot (__NR_Linux + 164) +-#define __NR_sethostname (__NR_Linux + 165) +-#define __NR_setdomainname (__NR_Linux + 166) +-#define __NR_create_module (__NR_Linux + 167) +-#define __NR_init_module (__NR_Linux + 168) +-#define __NR_delete_module (__NR_Linux + 169) +-#define __NR_get_kernel_syms (__NR_Linux + 170) +-#define __NR_query_module (__NR_Linux + 171) +-#define __NR_quotactl (__NR_Linux + 172) +-#define __NR_nfsservctl (__NR_Linux + 173) +-#define __NR_getpmsg (__NR_Linux + 174) +-#define __NR_putpmsg (__NR_Linux + 175) +-#define __NR_afs_syscall (__NR_Linux + 176) +-#define __NR_reserved177 (__NR_Linux + 177) +-#define __NR_gettid (__NR_Linux + 178) +-#define __NR_readahead (__NR_Linux + 179) +-#define __NR_setxattr (__NR_Linux + 180) +-#define __NR_lsetxattr (__NR_Linux + 181) +-#define __NR_fsetxattr (__NR_Linux + 182) +-#define __NR_getxattr (__NR_Linux + 183) +-#define __NR_lgetxattr (__NR_Linux + 184) +-#define __NR_fgetxattr (__NR_Linux + 185) +-#define __NR_listxattr (__NR_Linux + 186) +-#define __NR_llistxattr (__NR_Linux + 187) +-#define __NR_flistxattr (__NR_Linux + 188) +-#define __NR_removexattr (__NR_Linux + 189) +-#define __NR_lremovexattr (__NR_Linux + 190) +-#define __NR_fremovexattr (__NR_Linux + 191) +-#define __NR_tkill (__NR_Linux + 192) +-#define __NR_reserved193 (__NR_Linux + 193) +-#define __NR_futex (__NR_Linux + 194) +-#define __NR_sched_setaffinity (__NR_Linux + 195) +-#define __NR_sched_getaffinity (__NR_Linux + 196) +-#define __NR_cacheflush (__NR_Linux + 197) +-#define __NR_cachectl (__NR_Linux + 198) +-#define __NR_sysmips (__NR_Linux + 199) +-#define __NR_io_setup (__NR_Linux + 200) +-#define __NR_io_destroy (__NR_Linux + 201) +-#define __NR_io_getevents (__NR_Linux + 202) +-#define __NR_io_submit (__NR_Linux + 203) +-#define __NR_io_cancel (__NR_Linux + 204) +-#define __NR_exit_group (__NR_Linux + 205) +-#define __NR_lookup_dcookie (__NR_Linux + 206) +-#define __NR_epoll_create (__NR_Linux + 207) +-#define __NR_epoll_ctl (__NR_Linux + 208) +-#define __NR_epoll_wait (__NR_Linux + 209) +-#define __NR_remap_file_pages (__NR_Linux + 210) +-#define __NR_rt_sigreturn (__NR_Linux + 211) +-#define __NR_fcntl64 (__NR_Linux + 212) +-#define __NR_set_tid_address (__NR_Linux + 213) +-#define __NR_restart_syscall (__NR_Linux + 214) +-#define __NR_semtimedop (__NR_Linux + 215) +-#define __NR_fadvise64 (__NR_Linux + 216) +-#define __NR_statfs64 (__NR_Linux + 217) +-#define __NR_fstatfs64 (__NR_Linux + 218) +-#define __NR_sendfile64 (__NR_Linux + 219) +-#define __NR_timer_create (__NR_Linux + 220) +-#define __NR_timer_settime (__NR_Linux + 221) +-#define __NR_timer_gettime (__NR_Linux + 222) +-#define __NR_timer_getoverrun (__NR_Linux + 223) +-#define __NR_timer_delete (__NR_Linux + 224) +-#define __NR_clock_settime (__NR_Linux + 225) +-#define __NR_clock_gettime (__NR_Linux + 226) +-#define __NR_clock_getres (__NR_Linux + 227) +-#define __NR_clock_nanosleep (__NR_Linux + 228) +-#define __NR_tgkill (__NR_Linux + 229) +-#define __NR_utimes (__NR_Linux + 230) +-#define __NR_mbind (__NR_Linux + 231) +-#define __NR_get_mempolicy (__NR_Linux + 232) +-#define __NR_set_mempolicy (__NR_Linux + 233) +-#define __NR_mq_open (__NR_Linux + 234) +-#define __NR_mq_unlink (__NR_Linux + 235) +-#define __NR_mq_timedsend (__NR_Linux + 236) +-#define __NR_mq_timedreceive (__NR_Linux + 237) +-#define __NR_mq_notify (__NR_Linux + 238) +-#define __NR_mq_getsetattr (__NR_Linux + 239) +-#define __NR_vserver (__NR_Linux + 240) +-#define __NR_waitid (__NR_Linux + 241) +-#define __NR_add_key (__NR_Linux + 243) +-#define __NR_request_key (__NR_Linux + 244) +-#define __NR_keyctl (__NR_Linux + 245) +-#define __NR_set_thread_area (__NR_Linux + 246) +-#define __NR_inotify_init (__NR_Linux + 247) +-#define __NR_inotify_add_watch (__NR_Linux + 248) +-#define __NR_inotify_rm_watch (__NR_Linux + 249) +-#define __NR_migrate_pages (__NR_Linux + 250) +-#define __NR_openat (__NR_Linux + 251) +-#define __NR_mkdirat (__NR_Linux + 252) +-#define __NR_mknodat (__NR_Linux + 253) +-#define __NR_fchownat (__NR_Linux + 254) +-#define __NR_futimesat (__NR_Linux + 255) +-#define __NR_newfstatat (__NR_Linux + 256) +-#define __NR_unlinkat (__NR_Linux + 257) +-#define __NR_renameat (__NR_Linux + 258) +-#define __NR_linkat (__NR_Linux + 259) +-#define __NR_symlinkat (__NR_Linux + 260) +-#define __NR_readlinkat (__NR_Linux + 261) +-#define __NR_fchmodat (__NR_Linux + 262) +-#define __NR_faccessat (__NR_Linux + 263) +-#define __NR_pselect6 (__NR_Linux + 264) +-#define __NR_ppoll (__NR_Linux + 265) +-#define __NR_unshare (__NR_Linux + 266) +-#define __NR_splice (__NR_Linux + 267) +-#define __NR_sync_file_range (__NR_Linux + 268) +-#define __NR_tee (__NR_Linux + 269) +-#define __NR_vmsplice (__NR_Linux + 270) +-#define __NR_move_pages (__NR_Linux + 271) +-#define __NR_set_robust_list (__NR_Linux + 272) +-#define __NR_get_robust_list (__NR_Linux + 273) +-#define __NR_kexec_load (__NR_Linux + 274) +-#define __NR_getcpu (__NR_Linux + 275) +-#define __NR_epoll_pwait (__NR_Linux + 276) +-#define __NR_ioprio_set (__NR_Linux + 277) +-#define __NR_ioprio_get (__NR_Linux + 278) +-#define __NR_utimensat (__NR_Linux + 279) +-#define __NR_signalfd (__NR_Linux + 280) +-#define __NR_timerfd (__NR_Linux + 281) +-#define __NR_eventfd (__NR_Linux + 282) +-#define __NR_fallocate (__NR_Linux + 283) +-#define __NR_timerfd_create (__NR_Linux + 284) +-#define __NR_timerfd_gettime (__NR_Linux + 285) +-#define __NR_timerfd_settime (__NR_Linux + 286) +-#define __NR_signalfd4 (__NR_Linux + 287) +-#define __NR_eventfd2 (__NR_Linux + 288) +-#define __NR_epoll_create1 (__NR_Linux + 289) +-#define __NR_dup3 (__NR_Linux + 290) +-#define __NR_pipe2 (__NR_Linux + 291) +-#define __NR_inotify_init1 (__NR_Linux + 292) +-#define __NR_preadv (__NR_Linux + 293) +-#define __NR_pwritev (__NR_Linux + 294) +-#define __NR_rt_tgsigqueueinfo (__NR_Linux + 295) +-#define __NR_perf_event_open (__NR_Linux + 296) +-#define __NR_accept4 (__NR_Linux + 297) +-#define __NR_recvmmsg (__NR_Linux + 298) +-#define __NR_getdents64 (__NR_Linux + 299) +-#define __NR_fanotify_init (__NR_Linux + 300) +-#define __NR_fanotify_mark (__NR_Linux + 301) +-#define __NR_prlimit64 (__NR_Linux + 302) +-#define __NR_name_to_handle_at (__NR_Linux + 303) +-#define __NR_open_by_handle_at (__NR_Linux + 304) +-#define __NR_clock_adjtime (__NR_Linux + 305) +-#define __NR_syncfs (__NR_Linux + 306) +-#define __NR_sendmmsg (__NR_Linux + 307) +-#define __NR_setns (__NR_Linux + 308) +-#define __NR_process_vm_readv (__NR_Linux + 309) +-#define __NR_process_vm_writev (__NR_Linux + 310) +-#define __NR_kcmp (__NR_Linux + 311) +-#define __NR_finit_module (__NR_Linux + 312) +-#define __NR_sched_setattr (__NR_Linux + 313) +-#define __NR_sched_getattr (__NR_Linux + 314) +-#define __NR_renameat2 (__NR_Linux + 315) +-#define __NR_seccomp (__NR_Linux + 316) +-#define __NR_getrandom (__NR_Linux + 317) +-#define __NR_memfd_create (__NR_Linux + 318) +-#define __NR_bpf (__NR_Linux + 319) +-#define __NR_execveat (__NR_Linux + 320) +-#define __NR_userfaultfd (__NR_Linux + 321) +-#define __NR_membarrier (__NR_Linux + 322) +-#define __NR_mlock2 (__NR_Linux + 323) +-#define __NR_copy_file_range (__NR_Linux + 324) +-#define __NR_preadv2 (__NR_Linux + 325) +-#define __NR_pwritev2 (__NR_Linux + 326) +-#define __NR_pkey_mprotect (__NR_Linux + 327) +-#define __NR_pkey_alloc (__NR_Linux + 328) +-#define __NR_pkey_free (__NR_Linux + 329) +-#define __NR_statx (__NR_Linux + 330) +-#define __NR_rseq (__NR_Linux + 331) +-#define __NR_io_pgetevents (__NR_Linux + 332) +-#define __NR_clock_gettime64 (__NR_Linux + 403) +-#define __NR_clock_settime64 (__NR_Linux + 404) +-#define __NR_clock_adjtime64 (__NR_Linux + 405) +-#define __NR_clock_getres_time64 (__NR_Linux + 406) +-#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) +-#define __NR_timer_gettime64 (__NR_Linux + 408) +-#define __NR_timer_settime64 (__NR_Linux + 409) +-#define __NR_timerfd_gettime64 (__NR_Linux + 410) +-#define __NR_timerfd_settime64 (__NR_Linux + 411) +-#define __NR_utimensat_time64 (__NR_Linux + 412) +-#define __NR_pselect6_time64 (__NR_Linux + 413) +-#define __NR_ppoll_time64 (__NR_Linux + 414) +-#define __NR_io_pgetevents_time64 (__NR_Linux + 416) +-#define __NR_recvmmsg_time64 (__NR_Linux + 417) +-#define __NR_mq_timedsend_time64 (__NR_Linux + 418) +-#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) +-#define __NR_semtimedop_time64 (__NR_Linux + 420) +-#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) +-#define __NR_futex_time64 (__NR_Linux + 422) +-#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) +-#define __NR_pidfd_send_signal (__NR_Linux + 424) +-#define __NR_io_uring_setup (__NR_Linux + 425) +-#define __NR_io_uring_enter (__NR_Linux + 426) +-#define __NR_io_uring_register (__NR_Linux + 427) +-#define __NR_open_tree (__NR_Linux + 428) +-#define __NR_move_mount (__NR_Linux + 429) +-#define __NR_fsopen (__NR_Linux + 430) +-#define __NR_fsconfig (__NR_Linux + 431) +-#define __NR_fsmount (__NR_Linux + 432) +-#define __NR_fspick (__NR_Linux + 433) +-#define __NR_pidfd_open (__NR_Linux + 434) +-#define __NR_clone3 (__NR_Linux + 435) +-#define __NR_close_range (__NR_Linux + 436) +-#define __NR_openat2 (__NR_Linux + 437) +-#define __NR_pidfd_getfd (__NR_Linux + 438) +-#define __NR_faccessat2 (__NR_Linux + 439) +-#define __NR_process_madvise (__NR_Linux + 440) +-#define __NR_epoll_pwait2 (__NR_Linux + 441) ++#define __NR_read (__NR_Linux + 0) ++#define __NR_write (__NR_Linux + 1) ++#define __NR_open (__NR_Linux + 2) ++#define __NR_close (__NR_Linux + 3) ++#define __NR_stat (__NR_Linux + 4) ++#define __NR_fstat (__NR_Linux + 5) ++#define __NR_lstat (__NR_Linux + 6) ++#define __NR_poll (__NR_Linux + 7) ++#define __NR_lseek (__NR_Linux + 8) ++#define __NR_mmap (__NR_Linux + 9) ++#define __NR_mprotect (__NR_Linux + 10) ++#define __NR_munmap (__NR_Linux + 11) ++#define __NR_brk (__NR_Linux + 12) ++#define __NR_rt_sigaction (__NR_Linux + 13) ++#define __NR_rt_sigprocmask (__NR_Linux + 14) ++#define __NR_ioctl (__NR_Linux + 15) ++#define __NR_pread64 (__NR_Linux + 16) ++#define __NR_pwrite64 (__NR_Linux + 17) ++#define __NR_readv (__NR_Linux + 18) ++#define __NR_writev (__NR_Linux + 19) ++#define __NR_access (__NR_Linux + 20) ++#define __NR_pipe (__NR_Linux + 21) ++#define __NR__newselect (__NR_Linux + 22) ++#define __NR_sched_yield (__NR_Linux + 23) ++#define __NR_mremap (__NR_Linux + 24) ++#define __NR_msync (__NR_Linux + 25) ++#define __NR_mincore (__NR_Linux + 26) ++#define __NR_madvise (__NR_Linux + 27) ++#define __NR_shmget (__NR_Linux + 28) ++#define __NR_shmat (__NR_Linux + 29) ++#define __NR_shmctl (__NR_Linux + 30) ++#define __NR_dup (__NR_Linux + 31) ++#define __NR_dup2 (__NR_Linux + 32) ++#define __NR_pause (__NR_Linux + 33) ++#define __NR_nanosleep (__NR_Linux + 34) ++#define __NR_getitimer (__NR_Linux + 35) ++#define __NR_setitimer (__NR_Linux + 36) ++#define __NR_alarm (__NR_Linux + 37) ++#define __NR_getpid (__NR_Linux + 38) ++#define __NR_sendfile (__NR_Linux + 39) ++#define __NR_socket (__NR_Linux + 40) ++#define __NR_connect (__NR_Linux + 41) ++#define __NR_accept (__NR_Linux + 42) ++#define __NR_sendto (__NR_Linux + 43) ++#define __NR_recvfrom (__NR_Linux + 44) ++#define __NR_sendmsg (__NR_Linux + 45) ++#define __NR_recvmsg (__NR_Linux + 46) ++#define __NR_shutdown (__NR_Linux + 47) ++#define __NR_bind (__NR_Linux + 48) ++#define __NR_listen (__NR_Linux + 49) ++#define __NR_getsockname (__NR_Linux + 50) ++#define __NR_getpeername (__NR_Linux + 51) ++#define __NR_socketpair (__NR_Linux + 52) ++#define __NR_setsockopt (__NR_Linux + 53) ++#define __NR_getsockopt (__NR_Linux + 54) ++#define __NR_clone (__NR_Linux + 55) ++#define __NR_fork (__NR_Linux + 56) ++#define __NR_execve (__NR_Linux + 57) ++#define __NR_exit (__NR_Linux + 58) ++#define __NR_wait4 (__NR_Linux + 59) ++#define __NR_kill (__NR_Linux + 60) ++#define __NR_uname (__NR_Linux + 61) ++#define __NR_semget (__NR_Linux + 62) ++#define __NR_semop (__NR_Linux + 63) ++#define __NR_semctl (__NR_Linux + 64) ++#define __NR_shmdt (__NR_Linux + 65) ++#define __NR_msgget (__NR_Linux + 66) ++#define __NR_msgsnd (__NR_Linux + 67) ++#define __NR_msgrcv (__NR_Linux + 68) ++#define __NR_msgctl (__NR_Linux + 69) ++#define __NR_fcntl (__NR_Linux + 70) ++#define __NR_flock (__NR_Linux + 71) ++#define __NR_fsync (__NR_Linux + 72) ++#define __NR_fdatasync (__NR_Linux + 73) ++#define __NR_truncate (__NR_Linux + 74) ++#define __NR_ftruncate (__NR_Linux + 75) ++#define __NR_getdents (__NR_Linux + 76) ++#define __NR_getcwd (__NR_Linux + 77) ++#define __NR_chdir (__NR_Linux + 78) ++#define __NR_fchdir (__NR_Linux + 79) ++#define __NR_rename (__NR_Linux + 80) ++#define __NR_mkdir (__NR_Linux + 81) ++#define __NR_rmdir (__NR_Linux + 82) ++#define __NR_creat (__NR_Linux + 83) ++#define __NR_link (__NR_Linux + 84) ++#define __NR_unlink (__NR_Linux + 85) ++#define __NR_symlink (__NR_Linux + 86) ++#define __NR_readlink (__NR_Linux + 87) ++#define __NR_chmod (__NR_Linux + 88) ++#define __NR_fchmod (__NR_Linux + 89) ++#define __NR_chown (__NR_Linux + 90) ++#define __NR_fchown (__NR_Linux + 91) ++#define __NR_lchown (__NR_Linux + 92) ++#define __NR_umask (__NR_Linux + 93) ++#define __NR_gettimeofday (__NR_Linux + 94) ++#define __NR_getrlimit (__NR_Linux + 95) ++#define __NR_getrusage (__NR_Linux + 96) ++#define __NR_sysinfo (__NR_Linux + 97) ++#define __NR_times (__NR_Linux + 98) ++#define __NR_ptrace (__NR_Linux + 99) ++#define __NR_getuid (__NR_Linux + 100) ++#define __NR_syslog (__NR_Linux + 101) ++#define __NR_getgid (__NR_Linux + 102) ++#define __NR_setuid (__NR_Linux + 103) ++#define __NR_setgid (__NR_Linux + 104) ++#define __NR_geteuid (__NR_Linux + 105) ++#define __NR_getegid (__NR_Linux + 106) ++#define __NR_setpgid (__NR_Linux + 107) ++#define __NR_getppid (__NR_Linux + 108) ++#define __NR_getpgrp (__NR_Linux + 109) ++#define __NR_setsid (__NR_Linux + 110) ++#define __NR_setreuid (__NR_Linux + 111) ++#define __NR_setregid (__NR_Linux + 112) ++#define __NR_getgroups (__NR_Linux + 113) ++#define __NR_setgroups (__NR_Linux + 114) ++#define __NR_setresuid (__NR_Linux + 115) ++#define __NR_getresuid (__NR_Linux + 116) ++#define __NR_setresgid (__NR_Linux + 117) ++#define __NR_getresgid (__NR_Linux + 118) ++#define __NR_getpgid (__NR_Linux + 119) ++#define __NR_setfsuid (__NR_Linux + 120) ++#define __NR_setfsgid (__NR_Linux + 121) ++#define __NR_getsid (__NR_Linux + 122) ++#define __NR_capget (__NR_Linux + 123) ++#define __NR_capset (__NR_Linux + 124) ++#define __NR_rt_sigpending (__NR_Linux + 125) ++#define __NR_rt_sigtimedwait (__NR_Linux + 126) ++#define __NR_rt_sigqueueinfo (__NR_Linux + 127) ++#define __NR_rt_sigsuspend (__NR_Linux + 128) ++#define __NR_sigaltstack (__NR_Linux + 129) ++#define __NR_utime (__NR_Linux + 130) ++#define __NR_mknod (__NR_Linux + 131) ++#define __NR_personality (__NR_Linux + 132) ++#define __NR_ustat (__NR_Linux + 133) ++#define __NR_statfs (__NR_Linux + 134) ++#define __NR_fstatfs (__NR_Linux + 135) ++#define __NR_sysfs (__NR_Linux + 136) ++#define __NR_getpriority (__NR_Linux + 137) ++#define __NR_setpriority (__NR_Linux + 138) ++#define __NR_sched_setparam (__NR_Linux + 139) ++#define __NR_sched_getparam (__NR_Linux + 140) ++#define __NR_sched_setscheduler (__NR_Linux + 141) ++#define __NR_sched_getscheduler (__NR_Linux + 142) ++#define __NR_sched_get_priority_max (__NR_Linux + 143) ++#define __NR_sched_get_priority_min (__NR_Linux + 144) ++#define __NR_sched_rr_get_interval (__NR_Linux + 145) ++#define __NR_mlock (__NR_Linux + 146) ++#define __NR_munlock (__NR_Linux + 147) ++#define __NR_mlockall (__NR_Linux + 148) ++#define __NR_munlockall (__NR_Linux + 149) ++#define __NR_vhangup (__NR_Linux + 150) ++#define __NR_pivot_root (__NR_Linux + 151) ++#define __NR__sysctl (__NR_Linux + 152) ++#define __NR_prctl (__NR_Linux + 153) ++#define __NR_adjtimex (__NR_Linux + 154) ++#define __NR_setrlimit (__NR_Linux + 155) ++#define __NR_chroot (__NR_Linux + 156) ++#define __NR_sync (__NR_Linux + 157) ++#define __NR_acct (__NR_Linux + 158) ++#define __NR_settimeofday (__NR_Linux + 159) ++#define __NR_mount (__NR_Linux + 160) ++#define __NR_umount2 (__NR_Linux + 161) ++#define __NR_swapon (__NR_Linux + 162) ++#define __NR_swapoff (__NR_Linux + 163) ++#define __NR_reboot (__NR_Linux + 164) ++#define __NR_sethostname (__NR_Linux + 165) ++#define __NR_setdomainname (__NR_Linux + 166) ++#define __NR_create_module (__NR_Linux + 167) ++#define __NR_init_module (__NR_Linux + 168) ++#define __NR_delete_module (__NR_Linux + 169) ++#define __NR_get_kernel_syms (__NR_Linux + 170) ++#define __NR_query_module (__NR_Linux + 171) ++#define __NR_quotactl (__NR_Linux + 172) ++#define __NR_nfsservctl (__NR_Linux + 173) ++#define __NR_getpmsg (__NR_Linux + 174) ++#define __NR_putpmsg (__NR_Linux + 175) ++#define __NR_afs_syscall (__NR_Linux + 176) ++#define __NR_reserved177 (__NR_Linux + 177) ++#define __NR_gettid (__NR_Linux + 178) ++#define __NR_readahead (__NR_Linux + 179) ++#define __NR_setxattr (__NR_Linux + 180) ++#define __NR_lsetxattr (__NR_Linux + 181) ++#define __NR_fsetxattr (__NR_Linux + 182) ++#define __NR_getxattr (__NR_Linux + 183) ++#define __NR_lgetxattr (__NR_Linux + 184) ++#define __NR_fgetxattr (__NR_Linux + 185) ++#define __NR_listxattr (__NR_Linux + 186) ++#define __NR_llistxattr (__NR_Linux + 187) ++#define __NR_flistxattr (__NR_Linux + 188) ++#define __NR_removexattr (__NR_Linux + 189) ++#define __NR_lremovexattr (__NR_Linux + 190) ++#define __NR_fremovexattr (__NR_Linux + 191) ++#define __NR_tkill (__NR_Linux + 192) ++#define __NR_reserved193 (__NR_Linux + 193) ++#define __NR_futex (__NR_Linux + 194) ++#define __NR_sched_setaffinity (__NR_Linux + 195) ++#define __NR_sched_getaffinity (__NR_Linux + 196) ++#define __NR_cacheflush (__NR_Linux + 197) ++#define __NR_cachectl (__NR_Linux + 198) ++#define __NR_sysmips (__NR_Linux + 199) ++#define __NR_io_setup (__NR_Linux + 200) ++#define __NR_io_destroy (__NR_Linux + 201) ++#define __NR_io_getevents (__NR_Linux + 202) ++#define __NR_io_submit (__NR_Linux + 203) ++#define __NR_io_cancel (__NR_Linux + 204) ++#define __NR_exit_group (__NR_Linux + 205) ++#define __NR_lookup_dcookie (__NR_Linux + 206) ++#define __NR_epoll_create (__NR_Linux + 207) ++#define __NR_epoll_ctl (__NR_Linux + 208) ++#define __NR_epoll_wait (__NR_Linux + 209) ++#define __NR_remap_file_pages (__NR_Linux + 210) ++#define __NR_rt_sigreturn (__NR_Linux + 211) ++#define __NR_fcntl64 (__NR_Linux + 212) ++#define __NR_set_tid_address (__NR_Linux + 213) ++#define __NR_restart_syscall (__NR_Linux + 214) ++#define __NR_semtimedop (__NR_Linux + 215) ++#define __NR_fadvise64 (__NR_Linux + 216) ++#define __NR_statfs64 (__NR_Linux + 217) ++#define __NR_fstatfs64 (__NR_Linux + 218) ++#define __NR_sendfile64 (__NR_Linux + 219) ++#define __NR_timer_create (__NR_Linux + 220) ++#define __NR_timer_settime (__NR_Linux + 221) ++#define __NR_timer_gettime (__NR_Linux + 222) ++#define __NR_timer_getoverrun (__NR_Linux + 223) ++#define __NR_timer_delete (__NR_Linux + 224) ++#define __NR_clock_settime (__NR_Linux + 225) ++#define __NR_clock_gettime (__NR_Linux + 226) ++#define __NR_clock_getres (__NR_Linux + 227) ++#define __NR_clock_nanosleep (__NR_Linux + 228) ++#define __NR_tgkill (__NR_Linux + 229) ++#define __NR_utimes (__NR_Linux + 230) ++#define __NR_mbind (__NR_Linux + 231) ++#define __NR_get_mempolicy (__NR_Linux + 232) ++#define __NR_set_mempolicy (__NR_Linux + 233) ++#define __NR_mq_open (__NR_Linux + 234) ++#define __NR_mq_unlink (__NR_Linux + 235) ++#define __NR_mq_timedsend (__NR_Linux + 236) ++#define __NR_mq_timedreceive (__NR_Linux + 237) ++#define __NR_mq_notify (__NR_Linux + 238) ++#define __NR_mq_getsetattr (__NR_Linux + 239) ++#define __NR_vserver (__NR_Linux + 240) ++#define __NR_waitid (__NR_Linux + 241) ++#define __NR_add_key (__NR_Linux + 243) ++#define __NR_request_key (__NR_Linux + 244) ++#define __NR_keyctl (__NR_Linux + 245) ++#define __NR_set_thread_area (__NR_Linux + 246) ++#define __NR_inotify_init (__NR_Linux + 247) ++#define __NR_inotify_add_watch (__NR_Linux + 248) ++#define __NR_inotify_rm_watch (__NR_Linux + 249) ++#define __NR_migrate_pages (__NR_Linux + 250) ++#define __NR_openat (__NR_Linux + 251) ++#define __NR_mkdirat (__NR_Linux + 252) ++#define __NR_mknodat (__NR_Linux + 253) ++#define __NR_fchownat (__NR_Linux + 254) ++#define __NR_futimesat (__NR_Linux + 255) ++#define __NR_newfstatat (__NR_Linux + 256) ++#define __NR_unlinkat (__NR_Linux + 257) ++#define __NR_renameat (__NR_Linux + 258) ++#define __NR_linkat (__NR_Linux + 259) ++#define __NR_symlinkat (__NR_Linux + 260) ++#define __NR_readlinkat (__NR_Linux + 261) ++#define __NR_fchmodat (__NR_Linux + 262) ++#define __NR_faccessat (__NR_Linux + 263) ++#define __NR_pselect6 (__NR_Linux + 264) ++#define __NR_ppoll (__NR_Linux + 265) ++#define __NR_unshare (__NR_Linux + 266) ++#define __NR_splice (__NR_Linux + 267) ++#define __NR_sync_file_range (__NR_Linux + 268) ++#define __NR_tee (__NR_Linux + 269) ++#define __NR_vmsplice (__NR_Linux + 270) ++#define __NR_move_pages (__NR_Linux + 271) ++#define __NR_set_robust_list (__NR_Linux + 272) ++#define __NR_get_robust_list (__NR_Linux + 273) ++#define __NR_kexec_load (__NR_Linux + 274) ++#define __NR_getcpu (__NR_Linux + 275) ++#define __NR_epoll_pwait (__NR_Linux + 276) ++#define __NR_ioprio_set (__NR_Linux + 277) ++#define __NR_ioprio_get (__NR_Linux + 278) ++#define __NR_utimensat (__NR_Linux + 279) ++#define __NR_signalfd (__NR_Linux + 280) ++#define __NR_timerfd (__NR_Linux + 281) ++#define __NR_eventfd (__NR_Linux + 282) ++#define __NR_fallocate (__NR_Linux + 283) ++#define __NR_timerfd_create (__NR_Linux + 284) ++#define __NR_timerfd_gettime (__NR_Linux + 285) ++#define __NR_timerfd_settime (__NR_Linux + 286) ++#define __NR_signalfd4 (__NR_Linux + 287) ++#define __NR_eventfd2 (__NR_Linux + 288) ++#define __NR_epoll_create1 (__NR_Linux + 289) ++#define __NR_dup3 (__NR_Linux + 290) ++#define __NR_pipe2 (__NR_Linux + 291) ++#define __NR_inotify_init1 (__NR_Linux + 292) ++#define __NR_preadv (__NR_Linux + 293) ++#define __NR_pwritev (__NR_Linux + 294) ++#define __NR_rt_tgsigqueueinfo (__NR_Linux + 295) ++#define __NR_perf_event_open (__NR_Linux + 296) ++#define __NR_accept4 (__NR_Linux + 297) ++#define __NR_recvmmsg (__NR_Linux + 298) ++#define __NR_getdents64 (__NR_Linux + 299) ++#define __NR_fanotify_init (__NR_Linux + 300) ++#define __NR_fanotify_mark (__NR_Linux + 301) ++#define __NR_prlimit64 (__NR_Linux + 302) ++#define __NR_name_to_handle_at (__NR_Linux + 303) ++#define __NR_open_by_handle_at (__NR_Linux + 304) ++#define __NR_clock_adjtime (__NR_Linux + 305) ++#define __NR_syncfs (__NR_Linux + 306) ++#define __NR_sendmmsg (__NR_Linux + 307) ++#define __NR_setns (__NR_Linux + 308) ++#define __NR_process_vm_readv (__NR_Linux + 309) ++#define __NR_process_vm_writev (__NR_Linux + 310) ++#define __NR_kcmp (__NR_Linux + 311) ++#define __NR_finit_module (__NR_Linux + 312) ++#define __NR_sched_setattr (__NR_Linux + 313) ++#define __NR_sched_getattr (__NR_Linux + 314) ++#define __NR_renameat2 (__NR_Linux + 315) ++#define __NR_seccomp (__NR_Linux + 316) ++#define __NR_getrandom (__NR_Linux + 317) ++#define __NR_memfd_create (__NR_Linux + 318) ++#define __NR_bpf (__NR_Linux + 319) ++#define __NR_execveat (__NR_Linux + 320) ++#define __NR_userfaultfd (__NR_Linux + 321) ++#define __NR_membarrier (__NR_Linux + 322) ++#define __NR_mlock2 (__NR_Linux + 323) ++#define __NR_copy_file_range (__NR_Linux + 324) ++#define __NR_preadv2 (__NR_Linux + 325) ++#define __NR_pwritev2 (__NR_Linux + 326) ++#define __NR_pkey_mprotect (__NR_Linux + 327) ++#define __NR_pkey_alloc (__NR_Linux + 328) ++#define __NR_pkey_free (__NR_Linux + 329) ++#define __NR_statx (__NR_Linux + 330) ++#define __NR_rseq (__NR_Linux + 331) ++#define __NR_io_pgetevents (__NR_Linux + 332) ++#define __NR_clock_gettime64 (__NR_Linux + 403) ++#define __NR_clock_settime64 (__NR_Linux + 404) ++#define __NR_clock_adjtime64 (__NR_Linux + 405) ++#define __NR_clock_getres_time64 (__NR_Linux + 406) ++#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) ++#define __NR_timer_gettime64 (__NR_Linux + 408) ++#define __NR_timer_settime64 (__NR_Linux + 409) ++#define __NR_timerfd_gettime64 (__NR_Linux + 410) ++#define __NR_timerfd_settime64 (__NR_Linux + 411) ++#define __NR_utimensat_time64 (__NR_Linux + 412) ++#define __NR_pselect6_time64 (__NR_Linux + 413) ++#define __NR_ppoll_time64 (__NR_Linux + 414) ++#define __NR_io_pgetevents_time64 (__NR_Linux + 416) ++#define __NR_recvmmsg_time64 (__NR_Linux + 417) ++#define __NR_mq_timedsend_time64 (__NR_Linux + 418) ++#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) ++#define __NR_semtimedop_time64 (__NR_Linux + 420) ++#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) ++#define __NR_futex_time64 (__NR_Linux + 422) ++#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) ++#define __NR_pidfd_send_signal (__NR_Linux + 424) ++#define __NR_io_uring_setup (__NR_Linux + 425) ++#define __NR_io_uring_enter (__NR_Linux + 426) ++#define __NR_io_uring_register (__NR_Linux + 427) ++#define __NR_open_tree (__NR_Linux + 428) ++#define __NR_move_mount (__NR_Linux + 429) ++#define __NR_fsopen (__NR_Linux + 430) ++#define __NR_fsconfig (__NR_Linux + 431) ++#define __NR_fsmount (__NR_Linux + 432) ++#define __NR_fspick (__NR_Linux + 433) ++#define __NR_pidfd_open (__NR_Linux + 434) ++#define __NR_clone3 (__NR_Linux + 435) ++#define __NR_close_range (__NR_Linux + 436) ++#define __NR_openat2 (__NR_Linux + 437) ++#define __NR_pidfd_getfd (__NR_Linux + 438) ++#define __NR_faccessat2 (__NR_Linux + 439) ++#define __NR_process_madvise (__NR_Linux + 440) ++#define __NR_epoll_pwait2 (__NR_Linux + 441) ++#define __NR_mount_setattr (__NR_Linux + 442) ++#define __NR_landlock_create_ruleset (__NR_Linux + 444) ++#define __NR_landlock_add_rule (__NR_Linux + 445) ++#define __NR_landlock_restrict_self (__NR_Linux + 446) + +- +-#endif /* _ASM_MIPS_UNISTD_N32_H */ ++#endif /* _ASM_UNISTD_N32_H */ +diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h +index 683558a7f8..0996001802 100644 +--- a/linux-headers/asm-mips/unistd_n64.h ++++ b/linux-headers/asm-mips/unistd_n64.h +@@ -1,352 +1,355 @@ +-#ifndef _ASM_MIPS_UNISTD_N64_H +-#define _ASM_MIPS_UNISTD_N64_H ++#ifndef _ASM_UNISTD_N64_H ++#define _ASM_UNISTD_N64_H + +-#define __NR_read (__NR_Linux + 0) +-#define __NR_write (__NR_Linux + 1) +-#define __NR_open (__NR_Linux + 2) +-#define __NR_close (__NR_Linux + 3) +-#define __NR_stat (__NR_Linux + 4) +-#define __NR_fstat (__NR_Linux + 5) +-#define __NR_lstat (__NR_Linux + 6) +-#define __NR_poll (__NR_Linux + 7) +-#define __NR_lseek (__NR_Linux + 8) +-#define __NR_mmap (__NR_Linux + 9) +-#define __NR_mprotect (__NR_Linux + 10) +-#define __NR_munmap (__NR_Linux + 11) +-#define __NR_brk (__NR_Linux + 12) +-#define __NR_rt_sigaction (__NR_Linux + 13) +-#define __NR_rt_sigprocmask (__NR_Linux + 14) +-#define __NR_ioctl (__NR_Linux + 15) +-#define __NR_pread64 (__NR_Linux + 16) +-#define __NR_pwrite64 (__NR_Linux + 17) +-#define __NR_readv (__NR_Linux + 18) +-#define __NR_writev (__NR_Linux + 19) +-#define __NR_access (__NR_Linux + 20) +-#define __NR_pipe (__NR_Linux + 21) +-#define __NR__newselect (__NR_Linux + 22) +-#define __NR_sched_yield (__NR_Linux + 23) +-#define __NR_mremap (__NR_Linux + 24) +-#define __NR_msync (__NR_Linux + 25) +-#define __NR_mincore (__NR_Linux + 26) +-#define __NR_madvise (__NR_Linux + 27) +-#define __NR_shmget (__NR_Linux + 28) +-#define __NR_shmat (__NR_Linux + 29) +-#define __NR_shmctl (__NR_Linux + 30) +-#define __NR_dup (__NR_Linux + 31) +-#define __NR_dup2 (__NR_Linux + 32) +-#define __NR_pause (__NR_Linux + 33) +-#define __NR_nanosleep (__NR_Linux + 34) +-#define __NR_getitimer (__NR_Linux + 35) +-#define __NR_setitimer (__NR_Linux + 36) +-#define __NR_alarm (__NR_Linux + 37) +-#define __NR_getpid (__NR_Linux + 38) +-#define __NR_sendfile (__NR_Linux + 39) +-#define __NR_socket (__NR_Linux + 40) +-#define __NR_connect (__NR_Linux + 41) +-#define __NR_accept (__NR_Linux + 42) +-#define __NR_sendto (__NR_Linux + 43) +-#define __NR_recvfrom (__NR_Linux + 44) +-#define __NR_sendmsg (__NR_Linux + 45) +-#define __NR_recvmsg (__NR_Linux + 46) +-#define __NR_shutdown (__NR_Linux + 47) +-#define __NR_bind (__NR_Linux + 48) +-#define __NR_listen (__NR_Linux + 49) +-#define __NR_getsockname (__NR_Linux + 50) +-#define __NR_getpeername (__NR_Linux + 51) +-#define __NR_socketpair (__NR_Linux + 52) +-#define __NR_setsockopt (__NR_Linux + 53) +-#define __NR_getsockopt (__NR_Linux + 54) +-#define __NR_clone (__NR_Linux + 55) +-#define __NR_fork (__NR_Linux + 56) +-#define __NR_execve (__NR_Linux + 57) +-#define __NR_exit (__NR_Linux + 58) +-#define __NR_wait4 (__NR_Linux + 59) +-#define __NR_kill (__NR_Linux + 60) +-#define __NR_uname (__NR_Linux + 61) +-#define __NR_semget (__NR_Linux + 62) +-#define __NR_semop (__NR_Linux + 63) +-#define __NR_semctl (__NR_Linux + 64) +-#define __NR_shmdt (__NR_Linux + 65) +-#define __NR_msgget (__NR_Linux + 66) +-#define __NR_msgsnd (__NR_Linux + 67) +-#define __NR_msgrcv (__NR_Linux + 68) +-#define __NR_msgctl (__NR_Linux + 69) +-#define __NR_fcntl (__NR_Linux + 70) +-#define __NR_flock (__NR_Linux + 71) +-#define __NR_fsync (__NR_Linux + 72) +-#define __NR_fdatasync (__NR_Linux + 73) +-#define __NR_truncate (__NR_Linux + 74) +-#define __NR_ftruncate (__NR_Linux + 75) +-#define __NR_getdents (__NR_Linux + 76) +-#define __NR_getcwd (__NR_Linux + 77) +-#define __NR_chdir (__NR_Linux + 78) +-#define __NR_fchdir (__NR_Linux + 79) +-#define __NR_rename (__NR_Linux + 80) +-#define __NR_mkdir (__NR_Linux + 81) +-#define __NR_rmdir (__NR_Linux + 82) +-#define __NR_creat (__NR_Linux + 83) +-#define __NR_link (__NR_Linux + 84) +-#define __NR_unlink (__NR_Linux + 85) +-#define __NR_symlink (__NR_Linux + 86) +-#define __NR_readlink (__NR_Linux + 87) +-#define __NR_chmod (__NR_Linux + 88) +-#define __NR_fchmod (__NR_Linux + 89) +-#define __NR_chown (__NR_Linux + 90) +-#define __NR_fchown (__NR_Linux + 91) +-#define __NR_lchown (__NR_Linux + 92) +-#define __NR_umask (__NR_Linux + 93) +-#define __NR_gettimeofday (__NR_Linux + 94) +-#define __NR_getrlimit (__NR_Linux + 95) +-#define __NR_getrusage (__NR_Linux + 96) +-#define __NR_sysinfo (__NR_Linux + 97) +-#define __NR_times (__NR_Linux + 98) +-#define __NR_ptrace (__NR_Linux + 99) +-#define __NR_getuid (__NR_Linux + 100) +-#define __NR_syslog (__NR_Linux + 101) +-#define __NR_getgid (__NR_Linux + 102) +-#define __NR_setuid (__NR_Linux + 103) +-#define __NR_setgid (__NR_Linux + 104) +-#define __NR_geteuid (__NR_Linux + 105) +-#define __NR_getegid (__NR_Linux + 106) +-#define __NR_setpgid (__NR_Linux + 107) +-#define __NR_getppid (__NR_Linux + 108) +-#define __NR_getpgrp (__NR_Linux + 109) +-#define __NR_setsid (__NR_Linux + 110) +-#define __NR_setreuid (__NR_Linux + 111) +-#define __NR_setregid (__NR_Linux + 112) +-#define __NR_getgroups (__NR_Linux + 113) +-#define __NR_setgroups (__NR_Linux + 114) +-#define __NR_setresuid (__NR_Linux + 115) +-#define __NR_getresuid (__NR_Linux + 116) +-#define __NR_setresgid (__NR_Linux + 117) +-#define __NR_getresgid (__NR_Linux + 118) +-#define __NR_getpgid (__NR_Linux + 119) +-#define __NR_setfsuid (__NR_Linux + 120) +-#define __NR_setfsgid (__NR_Linux + 121) +-#define __NR_getsid (__NR_Linux + 122) +-#define __NR_capget (__NR_Linux + 123) +-#define __NR_capset (__NR_Linux + 124) +-#define __NR_rt_sigpending (__NR_Linux + 125) +-#define __NR_rt_sigtimedwait (__NR_Linux + 126) +-#define __NR_rt_sigqueueinfo (__NR_Linux + 127) +-#define __NR_rt_sigsuspend (__NR_Linux + 128) +-#define __NR_sigaltstack (__NR_Linux + 129) +-#define __NR_utime (__NR_Linux + 130) +-#define __NR_mknod (__NR_Linux + 131) +-#define __NR_personality (__NR_Linux + 132) +-#define __NR_ustat (__NR_Linux + 133) +-#define __NR_statfs (__NR_Linux + 134) +-#define __NR_fstatfs (__NR_Linux + 135) +-#define __NR_sysfs (__NR_Linux + 136) +-#define __NR_getpriority (__NR_Linux + 137) +-#define __NR_setpriority (__NR_Linux + 138) +-#define __NR_sched_setparam (__NR_Linux + 139) +-#define __NR_sched_getparam (__NR_Linux + 140) +-#define __NR_sched_setscheduler (__NR_Linux + 141) +-#define __NR_sched_getscheduler (__NR_Linux + 142) +-#define __NR_sched_get_priority_max (__NR_Linux + 143) +-#define __NR_sched_get_priority_min (__NR_Linux + 144) +-#define __NR_sched_rr_get_interval (__NR_Linux + 145) +-#define __NR_mlock (__NR_Linux + 146) +-#define __NR_munlock (__NR_Linux + 147) +-#define __NR_mlockall (__NR_Linux + 148) +-#define __NR_munlockall (__NR_Linux + 149) +-#define __NR_vhangup (__NR_Linux + 150) +-#define __NR_pivot_root (__NR_Linux + 151) +-#define __NR__sysctl (__NR_Linux + 152) +-#define __NR_prctl (__NR_Linux + 153) +-#define __NR_adjtimex (__NR_Linux + 154) +-#define __NR_setrlimit (__NR_Linux + 155) +-#define __NR_chroot (__NR_Linux + 156) +-#define __NR_sync (__NR_Linux + 157) +-#define __NR_acct (__NR_Linux + 158) +-#define __NR_settimeofday (__NR_Linux + 159) +-#define __NR_mount (__NR_Linux + 160) +-#define __NR_umount2 (__NR_Linux + 161) +-#define __NR_swapon (__NR_Linux + 162) +-#define __NR_swapoff (__NR_Linux + 163) +-#define __NR_reboot (__NR_Linux + 164) +-#define __NR_sethostname (__NR_Linux + 165) +-#define __NR_setdomainname (__NR_Linux + 166) +-#define __NR_create_module (__NR_Linux + 167) +-#define __NR_init_module (__NR_Linux + 168) +-#define __NR_delete_module (__NR_Linux + 169) +-#define __NR_get_kernel_syms (__NR_Linux + 170) +-#define __NR_query_module (__NR_Linux + 171) +-#define __NR_quotactl (__NR_Linux + 172) +-#define __NR_nfsservctl (__NR_Linux + 173) +-#define __NR_getpmsg (__NR_Linux + 174) +-#define __NR_putpmsg (__NR_Linux + 175) +-#define __NR_afs_syscall (__NR_Linux + 176) +-#define __NR_reserved177 (__NR_Linux + 177) +-#define __NR_gettid (__NR_Linux + 178) +-#define __NR_readahead (__NR_Linux + 179) +-#define __NR_setxattr (__NR_Linux + 180) +-#define __NR_lsetxattr (__NR_Linux + 181) +-#define __NR_fsetxattr (__NR_Linux + 182) +-#define __NR_getxattr (__NR_Linux + 183) +-#define __NR_lgetxattr (__NR_Linux + 184) +-#define __NR_fgetxattr (__NR_Linux + 185) +-#define __NR_listxattr (__NR_Linux + 186) +-#define __NR_llistxattr (__NR_Linux + 187) +-#define __NR_flistxattr (__NR_Linux + 188) +-#define __NR_removexattr (__NR_Linux + 189) +-#define __NR_lremovexattr (__NR_Linux + 190) +-#define __NR_fremovexattr (__NR_Linux + 191) +-#define __NR_tkill (__NR_Linux + 192) +-#define __NR_reserved193 (__NR_Linux + 193) +-#define __NR_futex (__NR_Linux + 194) +-#define __NR_sched_setaffinity (__NR_Linux + 195) +-#define __NR_sched_getaffinity (__NR_Linux + 196) +-#define __NR_cacheflush (__NR_Linux + 197) +-#define __NR_cachectl (__NR_Linux + 198) +-#define __NR_sysmips (__NR_Linux + 199) +-#define __NR_io_setup (__NR_Linux + 200) +-#define __NR_io_destroy (__NR_Linux + 201) +-#define __NR_io_getevents (__NR_Linux + 202) +-#define __NR_io_submit (__NR_Linux + 203) +-#define __NR_io_cancel (__NR_Linux + 204) +-#define __NR_exit_group (__NR_Linux + 205) +-#define __NR_lookup_dcookie (__NR_Linux + 206) +-#define __NR_epoll_create (__NR_Linux + 207) +-#define __NR_epoll_ctl (__NR_Linux + 208) +-#define __NR_epoll_wait (__NR_Linux + 209) +-#define __NR_remap_file_pages (__NR_Linux + 210) +-#define __NR_rt_sigreturn (__NR_Linux + 211) +-#define __NR_set_tid_address (__NR_Linux + 212) +-#define __NR_restart_syscall (__NR_Linux + 213) +-#define __NR_semtimedop (__NR_Linux + 214) +-#define __NR_fadvise64 (__NR_Linux + 215) +-#define __NR_timer_create (__NR_Linux + 216) +-#define __NR_timer_settime (__NR_Linux + 217) +-#define __NR_timer_gettime (__NR_Linux + 218) +-#define __NR_timer_getoverrun (__NR_Linux + 219) +-#define __NR_timer_delete (__NR_Linux + 220) +-#define __NR_clock_settime (__NR_Linux + 221) +-#define __NR_clock_gettime (__NR_Linux + 222) +-#define __NR_clock_getres (__NR_Linux + 223) +-#define __NR_clock_nanosleep (__NR_Linux + 224) +-#define __NR_tgkill (__NR_Linux + 225) +-#define __NR_utimes (__NR_Linux + 226) +-#define __NR_mbind (__NR_Linux + 227) +-#define __NR_get_mempolicy (__NR_Linux + 228) +-#define __NR_set_mempolicy (__NR_Linux + 229) +-#define __NR_mq_open (__NR_Linux + 230) +-#define __NR_mq_unlink (__NR_Linux + 231) +-#define __NR_mq_timedsend (__NR_Linux + 232) +-#define __NR_mq_timedreceive (__NR_Linux + 233) +-#define __NR_mq_notify (__NR_Linux + 234) +-#define __NR_mq_getsetattr (__NR_Linux + 235) +-#define __NR_vserver (__NR_Linux + 236) +-#define __NR_waitid (__NR_Linux + 237) +-#define __NR_add_key (__NR_Linux + 239) +-#define __NR_request_key (__NR_Linux + 240) +-#define __NR_keyctl (__NR_Linux + 241) +-#define __NR_set_thread_area (__NR_Linux + 242) +-#define __NR_inotify_init (__NR_Linux + 243) +-#define __NR_inotify_add_watch (__NR_Linux + 244) +-#define __NR_inotify_rm_watch (__NR_Linux + 245) +-#define __NR_migrate_pages (__NR_Linux + 246) +-#define __NR_openat (__NR_Linux + 247) +-#define __NR_mkdirat (__NR_Linux + 248) +-#define __NR_mknodat (__NR_Linux + 249) +-#define __NR_fchownat (__NR_Linux + 250) +-#define __NR_futimesat (__NR_Linux + 251) +-#define __NR_newfstatat (__NR_Linux + 252) +-#define __NR_unlinkat (__NR_Linux + 253) +-#define __NR_renameat (__NR_Linux + 254) +-#define __NR_linkat (__NR_Linux + 255) +-#define __NR_symlinkat (__NR_Linux + 256) +-#define __NR_readlinkat (__NR_Linux + 257) +-#define __NR_fchmodat (__NR_Linux + 258) +-#define __NR_faccessat (__NR_Linux + 259) +-#define __NR_pselect6 (__NR_Linux + 260) +-#define __NR_ppoll (__NR_Linux + 261) +-#define __NR_unshare (__NR_Linux + 262) +-#define __NR_splice (__NR_Linux + 263) +-#define __NR_sync_file_range (__NR_Linux + 264) +-#define __NR_tee (__NR_Linux + 265) +-#define __NR_vmsplice (__NR_Linux + 266) +-#define __NR_move_pages (__NR_Linux + 267) +-#define __NR_set_robust_list (__NR_Linux + 268) +-#define __NR_get_robust_list (__NR_Linux + 269) +-#define __NR_kexec_load (__NR_Linux + 270) +-#define __NR_getcpu (__NR_Linux + 271) +-#define __NR_epoll_pwait (__NR_Linux + 272) +-#define __NR_ioprio_set (__NR_Linux + 273) +-#define __NR_ioprio_get (__NR_Linux + 274) +-#define __NR_utimensat (__NR_Linux + 275) +-#define __NR_signalfd (__NR_Linux + 276) +-#define __NR_timerfd (__NR_Linux + 277) +-#define __NR_eventfd (__NR_Linux + 278) +-#define __NR_fallocate (__NR_Linux + 279) +-#define __NR_timerfd_create (__NR_Linux + 280) +-#define __NR_timerfd_gettime (__NR_Linux + 281) +-#define __NR_timerfd_settime (__NR_Linux + 282) +-#define __NR_signalfd4 (__NR_Linux + 283) +-#define __NR_eventfd2 (__NR_Linux + 284) +-#define __NR_epoll_create1 (__NR_Linux + 285) +-#define __NR_dup3 (__NR_Linux + 286) +-#define __NR_pipe2 (__NR_Linux + 287) +-#define __NR_inotify_init1 (__NR_Linux + 288) +-#define __NR_preadv (__NR_Linux + 289) +-#define __NR_pwritev (__NR_Linux + 290) +-#define __NR_rt_tgsigqueueinfo (__NR_Linux + 291) +-#define __NR_perf_event_open (__NR_Linux + 292) +-#define __NR_accept4 (__NR_Linux + 293) +-#define __NR_recvmmsg (__NR_Linux + 294) +-#define __NR_fanotify_init (__NR_Linux + 295) +-#define __NR_fanotify_mark (__NR_Linux + 296) +-#define __NR_prlimit64 (__NR_Linux + 297) +-#define __NR_name_to_handle_at (__NR_Linux + 298) +-#define __NR_open_by_handle_at (__NR_Linux + 299) +-#define __NR_clock_adjtime (__NR_Linux + 300) +-#define __NR_syncfs (__NR_Linux + 301) +-#define __NR_sendmmsg (__NR_Linux + 302) +-#define __NR_setns (__NR_Linux + 303) +-#define __NR_process_vm_readv (__NR_Linux + 304) +-#define __NR_process_vm_writev (__NR_Linux + 305) +-#define __NR_kcmp (__NR_Linux + 306) +-#define __NR_finit_module (__NR_Linux + 307) +-#define __NR_getdents64 (__NR_Linux + 308) +-#define __NR_sched_setattr (__NR_Linux + 309) +-#define __NR_sched_getattr (__NR_Linux + 310) +-#define __NR_renameat2 (__NR_Linux + 311) +-#define __NR_seccomp (__NR_Linux + 312) +-#define __NR_getrandom (__NR_Linux + 313) +-#define __NR_memfd_create (__NR_Linux + 314) +-#define __NR_bpf (__NR_Linux + 315) +-#define __NR_execveat (__NR_Linux + 316) +-#define __NR_userfaultfd (__NR_Linux + 317) +-#define __NR_membarrier (__NR_Linux + 318) +-#define __NR_mlock2 (__NR_Linux + 319) +-#define __NR_copy_file_range (__NR_Linux + 320) +-#define __NR_preadv2 (__NR_Linux + 321) +-#define __NR_pwritev2 (__NR_Linux + 322) +-#define __NR_pkey_mprotect (__NR_Linux + 323) +-#define __NR_pkey_alloc (__NR_Linux + 324) +-#define __NR_pkey_free (__NR_Linux + 325) +-#define __NR_statx (__NR_Linux + 326) +-#define __NR_rseq (__NR_Linux + 327) +-#define __NR_io_pgetevents (__NR_Linux + 328) +-#define __NR_pidfd_send_signal (__NR_Linux + 424) +-#define __NR_io_uring_setup (__NR_Linux + 425) +-#define __NR_io_uring_enter (__NR_Linux + 426) +-#define __NR_io_uring_register (__NR_Linux + 427) +-#define __NR_open_tree (__NR_Linux + 428) +-#define __NR_move_mount (__NR_Linux + 429) +-#define __NR_fsopen (__NR_Linux + 430) +-#define __NR_fsconfig (__NR_Linux + 431) +-#define __NR_fsmount (__NR_Linux + 432) +-#define __NR_fspick (__NR_Linux + 433) +-#define __NR_pidfd_open (__NR_Linux + 434) +-#define __NR_clone3 (__NR_Linux + 435) +-#define __NR_close_range (__NR_Linux + 436) +-#define __NR_openat2 (__NR_Linux + 437) +-#define __NR_pidfd_getfd (__NR_Linux + 438) +-#define __NR_faccessat2 (__NR_Linux + 439) +-#define __NR_process_madvise (__NR_Linux + 440) +-#define __NR_epoll_pwait2 (__NR_Linux + 441) ++#define __NR_read (__NR_Linux + 0) ++#define __NR_write (__NR_Linux + 1) ++#define __NR_open (__NR_Linux + 2) ++#define __NR_close (__NR_Linux + 3) ++#define __NR_stat (__NR_Linux + 4) ++#define __NR_fstat (__NR_Linux + 5) ++#define __NR_lstat (__NR_Linux + 6) ++#define __NR_poll (__NR_Linux + 7) ++#define __NR_lseek (__NR_Linux + 8) ++#define __NR_mmap (__NR_Linux + 9) ++#define __NR_mprotect (__NR_Linux + 10) ++#define __NR_munmap (__NR_Linux + 11) ++#define __NR_brk (__NR_Linux + 12) ++#define __NR_rt_sigaction (__NR_Linux + 13) ++#define __NR_rt_sigprocmask (__NR_Linux + 14) ++#define __NR_ioctl (__NR_Linux + 15) ++#define __NR_pread64 (__NR_Linux + 16) ++#define __NR_pwrite64 (__NR_Linux + 17) ++#define __NR_readv (__NR_Linux + 18) ++#define __NR_writev (__NR_Linux + 19) ++#define __NR_access (__NR_Linux + 20) ++#define __NR_pipe (__NR_Linux + 21) ++#define __NR__newselect (__NR_Linux + 22) ++#define __NR_sched_yield (__NR_Linux + 23) ++#define __NR_mremap (__NR_Linux + 24) ++#define __NR_msync (__NR_Linux + 25) ++#define __NR_mincore (__NR_Linux + 26) ++#define __NR_madvise (__NR_Linux + 27) ++#define __NR_shmget (__NR_Linux + 28) ++#define __NR_shmat (__NR_Linux + 29) ++#define __NR_shmctl (__NR_Linux + 30) ++#define __NR_dup (__NR_Linux + 31) ++#define __NR_dup2 (__NR_Linux + 32) ++#define __NR_pause (__NR_Linux + 33) ++#define __NR_nanosleep (__NR_Linux + 34) ++#define __NR_getitimer (__NR_Linux + 35) ++#define __NR_setitimer (__NR_Linux + 36) ++#define __NR_alarm (__NR_Linux + 37) ++#define __NR_getpid (__NR_Linux + 38) ++#define __NR_sendfile (__NR_Linux + 39) ++#define __NR_socket (__NR_Linux + 40) ++#define __NR_connect (__NR_Linux + 41) ++#define __NR_accept (__NR_Linux + 42) ++#define __NR_sendto (__NR_Linux + 43) ++#define __NR_recvfrom (__NR_Linux + 44) ++#define __NR_sendmsg (__NR_Linux + 45) ++#define __NR_recvmsg (__NR_Linux + 46) ++#define __NR_shutdown (__NR_Linux + 47) ++#define __NR_bind (__NR_Linux + 48) ++#define __NR_listen (__NR_Linux + 49) ++#define __NR_getsockname (__NR_Linux + 50) ++#define __NR_getpeername (__NR_Linux + 51) ++#define __NR_socketpair (__NR_Linux + 52) ++#define __NR_setsockopt (__NR_Linux + 53) ++#define __NR_getsockopt (__NR_Linux + 54) ++#define __NR_clone (__NR_Linux + 55) ++#define __NR_fork (__NR_Linux + 56) ++#define __NR_execve (__NR_Linux + 57) ++#define __NR_exit (__NR_Linux + 58) ++#define __NR_wait4 (__NR_Linux + 59) ++#define __NR_kill (__NR_Linux + 60) ++#define __NR_uname (__NR_Linux + 61) ++#define __NR_semget (__NR_Linux + 62) ++#define __NR_semop (__NR_Linux + 63) ++#define __NR_semctl (__NR_Linux + 64) ++#define __NR_shmdt (__NR_Linux + 65) ++#define __NR_msgget (__NR_Linux + 66) ++#define __NR_msgsnd (__NR_Linux + 67) ++#define __NR_msgrcv (__NR_Linux + 68) ++#define __NR_msgctl (__NR_Linux + 69) ++#define __NR_fcntl (__NR_Linux + 70) ++#define __NR_flock (__NR_Linux + 71) ++#define __NR_fsync (__NR_Linux + 72) ++#define __NR_fdatasync (__NR_Linux + 73) ++#define __NR_truncate (__NR_Linux + 74) ++#define __NR_ftruncate (__NR_Linux + 75) ++#define __NR_getdents (__NR_Linux + 76) ++#define __NR_getcwd (__NR_Linux + 77) ++#define __NR_chdir (__NR_Linux + 78) ++#define __NR_fchdir (__NR_Linux + 79) ++#define __NR_rename (__NR_Linux + 80) ++#define __NR_mkdir (__NR_Linux + 81) ++#define __NR_rmdir (__NR_Linux + 82) ++#define __NR_creat (__NR_Linux + 83) ++#define __NR_link (__NR_Linux + 84) ++#define __NR_unlink (__NR_Linux + 85) ++#define __NR_symlink (__NR_Linux + 86) ++#define __NR_readlink (__NR_Linux + 87) ++#define __NR_chmod (__NR_Linux + 88) ++#define __NR_fchmod (__NR_Linux + 89) ++#define __NR_chown (__NR_Linux + 90) ++#define __NR_fchown (__NR_Linux + 91) ++#define __NR_lchown (__NR_Linux + 92) ++#define __NR_umask (__NR_Linux + 93) ++#define __NR_gettimeofday (__NR_Linux + 94) ++#define __NR_getrlimit (__NR_Linux + 95) ++#define __NR_getrusage (__NR_Linux + 96) ++#define __NR_sysinfo (__NR_Linux + 97) ++#define __NR_times (__NR_Linux + 98) ++#define __NR_ptrace (__NR_Linux + 99) ++#define __NR_getuid (__NR_Linux + 100) ++#define __NR_syslog (__NR_Linux + 101) ++#define __NR_getgid (__NR_Linux + 102) ++#define __NR_setuid (__NR_Linux + 103) ++#define __NR_setgid (__NR_Linux + 104) ++#define __NR_geteuid (__NR_Linux + 105) ++#define __NR_getegid (__NR_Linux + 106) ++#define __NR_setpgid (__NR_Linux + 107) ++#define __NR_getppid (__NR_Linux + 108) ++#define __NR_getpgrp (__NR_Linux + 109) ++#define __NR_setsid (__NR_Linux + 110) ++#define __NR_setreuid (__NR_Linux + 111) ++#define __NR_setregid (__NR_Linux + 112) ++#define __NR_getgroups (__NR_Linux + 113) ++#define __NR_setgroups (__NR_Linux + 114) ++#define __NR_setresuid (__NR_Linux + 115) ++#define __NR_getresuid (__NR_Linux + 116) ++#define __NR_setresgid (__NR_Linux + 117) ++#define __NR_getresgid (__NR_Linux + 118) ++#define __NR_getpgid (__NR_Linux + 119) ++#define __NR_setfsuid (__NR_Linux + 120) ++#define __NR_setfsgid (__NR_Linux + 121) ++#define __NR_getsid (__NR_Linux + 122) ++#define __NR_capget (__NR_Linux + 123) ++#define __NR_capset (__NR_Linux + 124) ++#define __NR_rt_sigpending (__NR_Linux + 125) ++#define __NR_rt_sigtimedwait (__NR_Linux + 126) ++#define __NR_rt_sigqueueinfo (__NR_Linux + 127) ++#define __NR_rt_sigsuspend (__NR_Linux + 128) ++#define __NR_sigaltstack (__NR_Linux + 129) ++#define __NR_utime (__NR_Linux + 130) ++#define __NR_mknod (__NR_Linux + 131) ++#define __NR_personality (__NR_Linux + 132) ++#define __NR_ustat (__NR_Linux + 133) ++#define __NR_statfs (__NR_Linux + 134) ++#define __NR_fstatfs (__NR_Linux + 135) ++#define __NR_sysfs (__NR_Linux + 136) ++#define __NR_getpriority (__NR_Linux + 137) ++#define __NR_setpriority (__NR_Linux + 138) ++#define __NR_sched_setparam (__NR_Linux + 139) ++#define __NR_sched_getparam (__NR_Linux + 140) ++#define __NR_sched_setscheduler (__NR_Linux + 141) ++#define __NR_sched_getscheduler (__NR_Linux + 142) ++#define __NR_sched_get_priority_max (__NR_Linux + 143) ++#define __NR_sched_get_priority_min (__NR_Linux + 144) ++#define __NR_sched_rr_get_interval (__NR_Linux + 145) ++#define __NR_mlock (__NR_Linux + 146) ++#define __NR_munlock (__NR_Linux + 147) ++#define __NR_mlockall (__NR_Linux + 148) ++#define __NR_munlockall (__NR_Linux + 149) ++#define __NR_vhangup (__NR_Linux + 150) ++#define __NR_pivot_root (__NR_Linux + 151) ++#define __NR__sysctl (__NR_Linux + 152) ++#define __NR_prctl (__NR_Linux + 153) ++#define __NR_adjtimex (__NR_Linux + 154) ++#define __NR_setrlimit (__NR_Linux + 155) ++#define __NR_chroot (__NR_Linux + 156) ++#define __NR_sync (__NR_Linux + 157) ++#define __NR_acct (__NR_Linux + 158) ++#define __NR_settimeofday (__NR_Linux + 159) ++#define __NR_mount (__NR_Linux + 160) ++#define __NR_umount2 (__NR_Linux + 161) ++#define __NR_swapon (__NR_Linux + 162) ++#define __NR_swapoff (__NR_Linux + 163) ++#define __NR_reboot (__NR_Linux + 164) ++#define __NR_sethostname (__NR_Linux + 165) ++#define __NR_setdomainname (__NR_Linux + 166) ++#define __NR_create_module (__NR_Linux + 167) ++#define __NR_init_module (__NR_Linux + 168) ++#define __NR_delete_module (__NR_Linux + 169) ++#define __NR_get_kernel_syms (__NR_Linux + 170) ++#define __NR_query_module (__NR_Linux + 171) ++#define __NR_quotactl (__NR_Linux + 172) ++#define __NR_nfsservctl (__NR_Linux + 173) ++#define __NR_getpmsg (__NR_Linux + 174) ++#define __NR_putpmsg (__NR_Linux + 175) ++#define __NR_afs_syscall (__NR_Linux + 176) ++#define __NR_reserved177 (__NR_Linux + 177) ++#define __NR_gettid (__NR_Linux + 178) ++#define __NR_readahead (__NR_Linux + 179) ++#define __NR_setxattr (__NR_Linux + 180) ++#define __NR_lsetxattr (__NR_Linux + 181) ++#define __NR_fsetxattr (__NR_Linux + 182) ++#define __NR_getxattr (__NR_Linux + 183) ++#define __NR_lgetxattr (__NR_Linux + 184) ++#define __NR_fgetxattr (__NR_Linux + 185) ++#define __NR_listxattr (__NR_Linux + 186) ++#define __NR_llistxattr (__NR_Linux + 187) ++#define __NR_flistxattr (__NR_Linux + 188) ++#define __NR_removexattr (__NR_Linux + 189) ++#define __NR_lremovexattr (__NR_Linux + 190) ++#define __NR_fremovexattr (__NR_Linux + 191) ++#define __NR_tkill (__NR_Linux + 192) ++#define __NR_reserved193 (__NR_Linux + 193) ++#define __NR_futex (__NR_Linux + 194) ++#define __NR_sched_setaffinity (__NR_Linux + 195) ++#define __NR_sched_getaffinity (__NR_Linux + 196) ++#define __NR_cacheflush (__NR_Linux + 197) ++#define __NR_cachectl (__NR_Linux + 198) ++#define __NR_sysmips (__NR_Linux + 199) ++#define __NR_io_setup (__NR_Linux + 200) ++#define __NR_io_destroy (__NR_Linux + 201) ++#define __NR_io_getevents (__NR_Linux + 202) ++#define __NR_io_submit (__NR_Linux + 203) ++#define __NR_io_cancel (__NR_Linux + 204) ++#define __NR_exit_group (__NR_Linux + 205) ++#define __NR_lookup_dcookie (__NR_Linux + 206) ++#define __NR_epoll_create (__NR_Linux + 207) ++#define __NR_epoll_ctl (__NR_Linux + 208) ++#define __NR_epoll_wait (__NR_Linux + 209) ++#define __NR_remap_file_pages (__NR_Linux + 210) ++#define __NR_rt_sigreturn (__NR_Linux + 211) ++#define __NR_set_tid_address (__NR_Linux + 212) ++#define __NR_restart_syscall (__NR_Linux + 213) ++#define __NR_semtimedop (__NR_Linux + 214) ++#define __NR_fadvise64 (__NR_Linux + 215) ++#define __NR_timer_create (__NR_Linux + 216) ++#define __NR_timer_settime (__NR_Linux + 217) ++#define __NR_timer_gettime (__NR_Linux + 218) ++#define __NR_timer_getoverrun (__NR_Linux + 219) ++#define __NR_timer_delete (__NR_Linux + 220) ++#define __NR_clock_settime (__NR_Linux + 221) ++#define __NR_clock_gettime (__NR_Linux + 222) ++#define __NR_clock_getres (__NR_Linux + 223) ++#define __NR_clock_nanosleep (__NR_Linux + 224) ++#define __NR_tgkill (__NR_Linux + 225) ++#define __NR_utimes (__NR_Linux + 226) ++#define __NR_mbind (__NR_Linux + 227) ++#define __NR_get_mempolicy (__NR_Linux + 228) ++#define __NR_set_mempolicy (__NR_Linux + 229) ++#define __NR_mq_open (__NR_Linux + 230) ++#define __NR_mq_unlink (__NR_Linux + 231) ++#define __NR_mq_timedsend (__NR_Linux + 232) ++#define __NR_mq_timedreceive (__NR_Linux + 233) ++#define __NR_mq_notify (__NR_Linux + 234) ++#define __NR_mq_getsetattr (__NR_Linux + 235) ++#define __NR_vserver (__NR_Linux + 236) ++#define __NR_waitid (__NR_Linux + 237) ++#define __NR_add_key (__NR_Linux + 239) ++#define __NR_request_key (__NR_Linux + 240) ++#define __NR_keyctl (__NR_Linux + 241) ++#define __NR_set_thread_area (__NR_Linux + 242) ++#define __NR_inotify_init (__NR_Linux + 243) ++#define __NR_inotify_add_watch (__NR_Linux + 244) ++#define __NR_inotify_rm_watch (__NR_Linux + 245) ++#define __NR_migrate_pages (__NR_Linux + 246) ++#define __NR_openat (__NR_Linux + 247) ++#define __NR_mkdirat (__NR_Linux + 248) ++#define __NR_mknodat (__NR_Linux + 249) ++#define __NR_fchownat (__NR_Linux + 250) ++#define __NR_futimesat (__NR_Linux + 251) ++#define __NR_newfstatat (__NR_Linux + 252) ++#define __NR_unlinkat (__NR_Linux + 253) ++#define __NR_renameat (__NR_Linux + 254) ++#define __NR_linkat (__NR_Linux + 255) ++#define __NR_symlinkat (__NR_Linux + 256) ++#define __NR_readlinkat (__NR_Linux + 257) ++#define __NR_fchmodat (__NR_Linux + 258) ++#define __NR_faccessat (__NR_Linux + 259) ++#define __NR_pselect6 (__NR_Linux + 260) ++#define __NR_ppoll (__NR_Linux + 261) ++#define __NR_unshare (__NR_Linux + 262) ++#define __NR_splice (__NR_Linux + 263) ++#define __NR_sync_file_range (__NR_Linux + 264) ++#define __NR_tee (__NR_Linux + 265) ++#define __NR_vmsplice (__NR_Linux + 266) ++#define __NR_move_pages (__NR_Linux + 267) ++#define __NR_set_robust_list (__NR_Linux + 268) ++#define __NR_get_robust_list (__NR_Linux + 269) ++#define __NR_kexec_load (__NR_Linux + 270) ++#define __NR_getcpu (__NR_Linux + 271) ++#define __NR_epoll_pwait (__NR_Linux + 272) ++#define __NR_ioprio_set (__NR_Linux + 273) ++#define __NR_ioprio_get (__NR_Linux + 274) ++#define __NR_utimensat (__NR_Linux + 275) ++#define __NR_signalfd (__NR_Linux + 276) ++#define __NR_timerfd (__NR_Linux + 277) ++#define __NR_eventfd (__NR_Linux + 278) ++#define __NR_fallocate (__NR_Linux + 279) ++#define __NR_timerfd_create (__NR_Linux + 280) ++#define __NR_timerfd_gettime (__NR_Linux + 281) ++#define __NR_timerfd_settime (__NR_Linux + 282) ++#define __NR_signalfd4 (__NR_Linux + 283) ++#define __NR_eventfd2 (__NR_Linux + 284) ++#define __NR_epoll_create1 (__NR_Linux + 285) ++#define __NR_dup3 (__NR_Linux + 286) ++#define __NR_pipe2 (__NR_Linux + 287) ++#define __NR_inotify_init1 (__NR_Linux + 288) ++#define __NR_preadv (__NR_Linux + 289) ++#define __NR_pwritev (__NR_Linux + 290) ++#define __NR_rt_tgsigqueueinfo (__NR_Linux + 291) ++#define __NR_perf_event_open (__NR_Linux + 292) ++#define __NR_accept4 (__NR_Linux + 293) ++#define __NR_recvmmsg (__NR_Linux + 294) ++#define __NR_fanotify_init (__NR_Linux + 295) ++#define __NR_fanotify_mark (__NR_Linux + 296) ++#define __NR_prlimit64 (__NR_Linux + 297) ++#define __NR_name_to_handle_at (__NR_Linux + 298) ++#define __NR_open_by_handle_at (__NR_Linux + 299) ++#define __NR_clock_adjtime (__NR_Linux + 300) ++#define __NR_syncfs (__NR_Linux + 301) ++#define __NR_sendmmsg (__NR_Linux + 302) ++#define __NR_setns (__NR_Linux + 303) ++#define __NR_process_vm_readv (__NR_Linux + 304) ++#define __NR_process_vm_writev (__NR_Linux + 305) ++#define __NR_kcmp (__NR_Linux + 306) ++#define __NR_finit_module (__NR_Linux + 307) ++#define __NR_getdents64 (__NR_Linux + 308) ++#define __NR_sched_setattr (__NR_Linux + 309) ++#define __NR_sched_getattr (__NR_Linux + 310) ++#define __NR_renameat2 (__NR_Linux + 311) ++#define __NR_seccomp (__NR_Linux + 312) ++#define __NR_getrandom (__NR_Linux + 313) ++#define __NR_memfd_create (__NR_Linux + 314) ++#define __NR_bpf (__NR_Linux + 315) ++#define __NR_execveat (__NR_Linux + 316) ++#define __NR_userfaultfd (__NR_Linux + 317) ++#define __NR_membarrier (__NR_Linux + 318) ++#define __NR_mlock2 (__NR_Linux + 319) ++#define __NR_copy_file_range (__NR_Linux + 320) ++#define __NR_preadv2 (__NR_Linux + 321) ++#define __NR_pwritev2 (__NR_Linux + 322) ++#define __NR_pkey_mprotect (__NR_Linux + 323) ++#define __NR_pkey_alloc (__NR_Linux + 324) ++#define __NR_pkey_free (__NR_Linux + 325) ++#define __NR_statx (__NR_Linux + 326) ++#define __NR_rseq (__NR_Linux + 327) ++#define __NR_io_pgetevents (__NR_Linux + 328) ++#define __NR_pidfd_send_signal (__NR_Linux + 424) ++#define __NR_io_uring_setup (__NR_Linux + 425) ++#define __NR_io_uring_enter (__NR_Linux + 426) ++#define __NR_io_uring_register (__NR_Linux + 427) ++#define __NR_open_tree (__NR_Linux + 428) ++#define __NR_move_mount (__NR_Linux + 429) ++#define __NR_fsopen (__NR_Linux + 430) ++#define __NR_fsconfig (__NR_Linux + 431) ++#define __NR_fsmount (__NR_Linux + 432) ++#define __NR_fspick (__NR_Linux + 433) ++#define __NR_pidfd_open (__NR_Linux + 434) ++#define __NR_clone3 (__NR_Linux + 435) ++#define __NR_close_range (__NR_Linux + 436) ++#define __NR_openat2 (__NR_Linux + 437) ++#define __NR_pidfd_getfd (__NR_Linux + 438) ++#define __NR_faccessat2 (__NR_Linux + 439) ++#define __NR_process_madvise (__NR_Linux + 440) ++#define __NR_epoll_pwait2 (__NR_Linux + 441) ++#define __NR_mount_setattr (__NR_Linux + 442) ++#define __NR_landlock_create_ruleset (__NR_Linux + 444) ++#define __NR_landlock_add_rule (__NR_Linux + 445) ++#define __NR_landlock_restrict_self (__NR_Linux + 446) + +- +-#endif /* _ASM_MIPS_UNISTD_N64_H */ ++#endif /* _ASM_UNISTD_N64_H */ +diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h +index ca6a7e5c0b..954303ad69 100644 +--- a/linux-headers/asm-mips/unistd_o32.h ++++ b/linux-headers/asm-mips/unistd_o32.h +@@ -1,422 +1,425 @@ +-#ifndef _ASM_MIPS_UNISTD_O32_H +-#define _ASM_MIPS_UNISTD_O32_H ++#ifndef _ASM_UNISTD_O32_H ++#define _ASM_UNISTD_O32_H + +-#define __NR_syscall (__NR_Linux + 0) +-#define __NR_exit (__NR_Linux + 1) +-#define __NR_fork (__NR_Linux + 2) +-#define __NR_read (__NR_Linux + 3) +-#define __NR_write (__NR_Linux + 4) +-#define __NR_open (__NR_Linux + 5) +-#define __NR_close (__NR_Linux + 6) +-#define __NR_waitpid (__NR_Linux + 7) +-#define __NR_creat (__NR_Linux + 8) +-#define __NR_link (__NR_Linux + 9) +-#define __NR_unlink (__NR_Linux + 10) +-#define __NR_execve (__NR_Linux + 11) +-#define __NR_chdir (__NR_Linux + 12) +-#define __NR_time (__NR_Linux + 13) +-#define __NR_mknod (__NR_Linux + 14) +-#define __NR_chmod (__NR_Linux + 15) +-#define __NR_lchown (__NR_Linux + 16) +-#define __NR_break (__NR_Linux + 17) +-#define __NR_unused18 (__NR_Linux + 18) +-#define __NR_lseek (__NR_Linux + 19) +-#define __NR_getpid (__NR_Linux + 20) +-#define __NR_mount (__NR_Linux + 21) +-#define __NR_umount (__NR_Linux + 22) +-#define __NR_setuid (__NR_Linux + 23) +-#define __NR_getuid (__NR_Linux + 24) +-#define __NR_stime (__NR_Linux + 25) +-#define __NR_ptrace (__NR_Linux + 26) +-#define __NR_alarm (__NR_Linux + 27) +-#define __NR_unused28 (__NR_Linux + 28) +-#define __NR_pause (__NR_Linux + 29) +-#define __NR_utime (__NR_Linux + 30) +-#define __NR_stty (__NR_Linux + 31) +-#define __NR_gtty (__NR_Linux + 32) +-#define __NR_access (__NR_Linux + 33) +-#define __NR_nice (__NR_Linux + 34) +-#define __NR_ftime (__NR_Linux + 35) +-#define __NR_sync (__NR_Linux + 36) +-#define __NR_kill (__NR_Linux + 37) +-#define __NR_rename (__NR_Linux + 38) +-#define __NR_mkdir (__NR_Linux + 39) +-#define __NR_rmdir (__NR_Linux + 40) +-#define __NR_dup (__NR_Linux + 41) +-#define __NR_pipe (__NR_Linux + 42) +-#define __NR_times (__NR_Linux + 43) +-#define __NR_prof (__NR_Linux + 44) +-#define __NR_brk (__NR_Linux + 45) +-#define __NR_setgid (__NR_Linux + 46) +-#define __NR_getgid (__NR_Linux + 47) +-#define __NR_signal (__NR_Linux + 48) +-#define __NR_geteuid (__NR_Linux + 49) +-#define __NR_getegid (__NR_Linux + 50) +-#define __NR_acct (__NR_Linux + 51) +-#define __NR_umount2 (__NR_Linux + 52) +-#define __NR_lock (__NR_Linux + 53) +-#define __NR_ioctl (__NR_Linux + 54) +-#define __NR_fcntl (__NR_Linux + 55) +-#define __NR_mpx (__NR_Linux + 56) +-#define __NR_setpgid (__NR_Linux + 57) +-#define __NR_ulimit (__NR_Linux + 58) +-#define __NR_unused59 (__NR_Linux + 59) +-#define __NR_umask (__NR_Linux + 60) +-#define __NR_chroot (__NR_Linux + 61) +-#define __NR_ustat (__NR_Linux + 62) +-#define __NR_dup2 (__NR_Linux + 63) +-#define __NR_getppid (__NR_Linux + 64) +-#define __NR_getpgrp (__NR_Linux + 65) +-#define __NR_setsid (__NR_Linux + 66) +-#define __NR_sigaction (__NR_Linux + 67) +-#define __NR_sgetmask (__NR_Linux + 68) +-#define __NR_ssetmask (__NR_Linux + 69) +-#define __NR_setreuid (__NR_Linux + 70) +-#define __NR_setregid (__NR_Linux + 71) +-#define __NR_sigsuspend (__NR_Linux + 72) +-#define __NR_sigpending (__NR_Linux + 73) +-#define __NR_sethostname (__NR_Linux + 74) +-#define __NR_setrlimit (__NR_Linux + 75) +-#define __NR_getrlimit (__NR_Linux + 76) +-#define __NR_getrusage (__NR_Linux + 77) +-#define __NR_gettimeofday (__NR_Linux + 78) +-#define __NR_settimeofday (__NR_Linux + 79) +-#define __NR_getgroups (__NR_Linux + 80) +-#define __NR_setgroups (__NR_Linux + 81) +-#define __NR_reserved82 (__NR_Linux + 82) +-#define __NR_symlink (__NR_Linux + 83) +-#define __NR_unused84 (__NR_Linux + 84) +-#define __NR_readlink (__NR_Linux + 85) +-#define __NR_uselib (__NR_Linux + 86) +-#define __NR_swapon (__NR_Linux + 87) +-#define __NR_reboot (__NR_Linux + 88) +-#define __NR_readdir (__NR_Linux + 89) +-#define __NR_mmap (__NR_Linux + 90) +-#define __NR_munmap (__NR_Linux + 91) +-#define __NR_truncate (__NR_Linux + 92) +-#define __NR_ftruncate (__NR_Linux + 93) +-#define __NR_fchmod (__NR_Linux + 94) +-#define __NR_fchown (__NR_Linux + 95) +-#define __NR_getpriority (__NR_Linux + 96) +-#define __NR_setpriority (__NR_Linux + 97) +-#define __NR_profil (__NR_Linux + 98) +-#define __NR_statfs (__NR_Linux + 99) +-#define __NR_fstatfs (__NR_Linux + 100) +-#define __NR_ioperm (__NR_Linux + 101) +-#define __NR_socketcall (__NR_Linux + 102) +-#define __NR_syslog (__NR_Linux + 103) +-#define __NR_setitimer (__NR_Linux + 104) +-#define __NR_getitimer (__NR_Linux + 105) +-#define __NR_stat (__NR_Linux + 106) +-#define __NR_lstat (__NR_Linux + 107) +-#define __NR_fstat (__NR_Linux + 108) +-#define __NR_unused109 (__NR_Linux + 109) +-#define __NR_iopl (__NR_Linux + 110) +-#define __NR_vhangup (__NR_Linux + 111) +-#define __NR_idle (__NR_Linux + 112) +-#define __NR_vm86 (__NR_Linux + 113) +-#define __NR_wait4 (__NR_Linux + 114) +-#define __NR_swapoff (__NR_Linux + 115) +-#define __NR_sysinfo (__NR_Linux + 116) +-#define __NR_ipc (__NR_Linux + 117) +-#define __NR_fsync (__NR_Linux + 118) +-#define __NR_sigreturn (__NR_Linux + 119) +-#define __NR_clone (__NR_Linux + 120) +-#define __NR_setdomainname (__NR_Linux + 121) +-#define __NR_uname (__NR_Linux + 122) +-#define __NR_modify_ldt (__NR_Linux + 123) +-#define __NR_adjtimex (__NR_Linux + 124) +-#define __NR_mprotect (__NR_Linux + 125) +-#define __NR_sigprocmask (__NR_Linux + 126) +-#define __NR_create_module (__NR_Linux + 127) +-#define __NR_init_module (__NR_Linux + 128) +-#define __NR_delete_module (__NR_Linux + 129) +-#define __NR_get_kernel_syms (__NR_Linux + 130) +-#define __NR_quotactl (__NR_Linux + 131) +-#define __NR_getpgid (__NR_Linux + 132) +-#define __NR_fchdir (__NR_Linux + 133) +-#define __NR_bdflush (__NR_Linux + 134) +-#define __NR_sysfs (__NR_Linux + 135) +-#define __NR_personality (__NR_Linux + 136) +-#define __NR_afs_syscall (__NR_Linux + 137) +-#define __NR_setfsuid (__NR_Linux + 138) +-#define __NR_setfsgid (__NR_Linux + 139) +-#define __NR__llseek (__NR_Linux + 140) +-#define __NR_getdents (__NR_Linux + 141) +-#define __NR__newselect (__NR_Linux + 142) +-#define __NR_flock (__NR_Linux + 143) +-#define __NR_msync (__NR_Linux + 144) +-#define __NR_readv (__NR_Linux + 145) +-#define __NR_writev (__NR_Linux + 146) +-#define __NR_cacheflush (__NR_Linux + 147) +-#define __NR_cachectl (__NR_Linux + 148) +-#define __NR_sysmips (__NR_Linux + 149) +-#define __NR_unused150 (__NR_Linux + 150) +-#define __NR_getsid (__NR_Linux + 151) +-#define __NR_fdatasync (__NR_Linux + 152) +-#define __NR__sysctl (__NR_Linux + 153) +-#define __NR_mlock (__NR_Linux + 154) +-#define __NR_munlock (__NR_Linux + 155) +-#define __NR_mlockall (__NR_Linux + 156) +-#define __NR_munlockall (__NR_Linux + 157) +-#define __NR_sched_setparam (__NR_Linux + 158) +-#define __NR_sched_getparam (__NR_Linux + 159) +-#define __NR_sched_setscheduler (__NR_Linux + 160) +-#define __NR_sched_getscheduler (__NR_Linux + 161) +-#define __NR_sched_yield (__NR_Linux + 162) +-#define __NR_sched_get_priority_max (__NR_Linux + 163) +-#define __NR_sched_get_priority_min (__NR_Linux + 164) +-#define __NR_sched_rr_get_interval (__NR_Linux + 165) +-#define __NR_nanosleep (__NR_Linux + 166) +-#define __NR_mremap (__NR_Linux + 167) +-#define __NR_accept (__NR_Linux + 168) +-#define __NR_bind (__NR_Linux + 169) +-#define __NR_connect (__NR_Linux + 170) +-#define __NR_getpeername (__NR_Linux + 171) +-#define __NR_getsockname (__NR_Linux + 172) +-#define __NR_getsockopt (__NR_Linux + 173) +-#define __NR_listen (__NR_Linux + 174) +-#define __NR_recv (__NR_Linux + 175) +-#define __NR_recvfrom (__NR_Linux + 176) +-#define __NR_recvmsg (__NR_Linux + 177) +-#define __NR_send (__NR_Linux + 178) +-#define __NR_sendmsg (__NR_Linux + 179) +-#define __NR_sendto (__NR_Linux + 180) +-#define __NR_setsockopt (__NR_Linux + 181) +-#define __NR_shutdown (__NR_Linux + 182) +-#define __NR_socket (__NR_Linux + 183) +-#define __NR_socketpair (__NR_Linux + 184) +-#define __NR_setresuid (__NR_Linux + 185) +-#define __NR_getresuid (__NR_Linux + 186) +-#define __NR_query_module (__NR_Linux + 187) +-#define __NR_poll (__NR_Linux + 188) +-#define __NR_nfsservctl (__NR_Linux + 189) +-#define __NR_setresgid (__NR_Linux + 190) +-#define __NR_getresgid (__NR_Linux + 191) +-#define __NR_prctl (__NR_Linux + 192) +-#define __NR_rt_sigreturn (__NR_Linux + 193) +-#define __NR_rt_sigaction (__NR_Linux + 194) +-#define __NR_rt_sigprocmask (__NR_Linux + 195) +-#define __NR_rt_sigpending (__NR_Linux + 196) +-#define __NR_rt_sigtimedwait (__NR_Linux + 197) +-#define __NR_rt_sigqueueinfo (__NR_Linux + 198) +-#define __NR_rt_sigsuspend (__NR_Linux + 199) +-#define __NR_pread64 (__NR_Linux + 200) +-#define __NR_pwrite64 (__NR_Linux + 201) +-#define __NR_chown (__NR_Linux + 202) +-#define __NR_getcwd (__NR_Linux + 203) +-#define __NR_capget (__NR_Linux + 204) +-#define __NR_capset (__NR_Linux + 205) +-#define __NR_sigaltstack (__NR_Linux + 206) +-#define __NR_sendfile (__NR_Linux + 207) +-#define __NR_getpmsg (__NR_Linux + 208) +-#define __NR_putpmsg (__NR_Linux + 209) +-#define __NR_mmap2 (__NR_Linux + 210) +-#define __NR_truncate64 (__NR_Linux + 211) +-#define __NR_ftruncate64 (__NR_Linux + 212) +-#define __NR_stat64 (__NR_Linux + 213) +-#define __NR_lstat64 (__NR_Linux + 214) +-#define __NR_fstat64 (__NR_Linux + 215) +-#define __NR_pivot_root (__NR_Linux + 216) +-#define __NR_mincore (__NR_Linux + 217) +-#define __NR_madvise (__NR_Linux + 218) +-#define __NR_getdents64 (__NR_Linux + 219) +-#define __NR_fcntl64 (__NR_Linux + 220) +-#define __NR_reserved221 (__NR_Linux + 221) +-#define __NR_gettid (__NR_Linux + 222) +-#define __NR_readahead (__NR_Linux + 223) +-#define __NR_setxattr (__NR_Linux + 224) +-#define __NR_lsetxattr (__NR_Linux + 225) +-#define __NR_fsetxattr (__NR_Linux + 226) +-#define __NR_getxattr (__NR_Linux + 227) +-#define __NR_lgetxattr (__NR_Linux + 228) +-#define __NR_fgetxattr (__NR_Linux + 229) +-#define __NR_listxattr (__NR_Linux + 230) +-#define __NR_llistxattr (__NR_Linux + 231) +-#define __NR_flistxattr (__NR_Linux + 232) +-#define __NR_removexattr (__NR_Linux + 233) +-#define __NR_lremovexattr (__NR_Linux + 234) +-#define __NR_fremovexattr (__NR_Linux + 235) +-#define __NR_tkill (__NR_Linux + 236) +-#define __NR_sendfile64 (__NR_Linux + 237) +-#define __NR_futex (__NR_Linux + 238) +-#define __NR_sched_setaffinity (__NR_Linux + 239) +-#define __NR_sched_getaffinity (__NR_Linux + 240) +-#define __NR_io_setup (__NR_Linux + 241) +-#define __NR_io_destroy (__NR_Linux + 242) +-#define __NR_io_getevents (__NR_Linux + 243) +-#define __NR_io_submit (__NR_Linux + 244) +-#define __NR_io_cancel (__NR_Linux + 245) +-#define __NR_exit_group (__NR_Linux + 246) +-#define __NR_lookup_dcookie (__NR_Linux + 247) +-#define __NR_epoll_create (__NR_Linux + 248) +-#define __NR_epoll_ctl (__NR_Linux + 249) +-#define __NR_epoll_wait (__NR_Linux + 250) +-#define __NR_remap_file_pages (__NR_Linux + 251) +-#define __NR_set_tid_address (__NR_Linux + 252) +-#define __NR_restart_syscall (__NR_Linux + 253) +-#define __NR_fadvise64 (__NR_Linux + 254) +-#define __NR_statfs64 (__NR_Linux + 255) +-#define __NR_fstatfs64 (__NR_Linux + 256) +-#define __NR_timer_create (__NR_Linux + 257) +-#define __NR_timer_settime (__NR_Linux + 258) +-#define __NR_timer_gettime (__NR_Linux + 259) +-#define __NR_timer_getoverrun (__NR_Linux + 260) +-#define __NR_timer_delete (__NR_Linux + 261) +-#define __NR_clock_settime (__NR_Linux + 262) +-#define __NR_clock_gettime (__NR_Linux + 263) +-#define __NR_clock_getres (__NR_Linux + 264) +-#define __NR_clock_nanosleep (__NR_Linux + 265) +-#define __NR_tgkill (__NR_Linux + 266) +-#define __NR_utimes (__NR_Linux + 267) +-#define __NR_mbind (__NR_Linux + 268) +-#define __NR_get_mempolicy (__NR_Linux + 269) +-#define __NR_set_mempolicy (__NR_Linux + 270) +-#define __NR_mq_open (__NR_Linux + 271) +-#define __NR_mq_unlink (__NR_Linux + 272) +-#define __NR_mq_timedsend (__NR_Linux + 273) +-#define __NR_mq_timedreceive (__NR_Linux + 274) +-#define __NR_mq_notify (__NR_Linux + 275) +-#define __NR_mq_getsetattr (__NR_Linux + 276) +-#define __NR_vserver (__NR_Linux + 277) +-#define __NR_waitid (__NR_Linux + 278) +-#define __NR_add_key (__NR_Linux + 280) +-#define __NR_request_key (__NR_Linux + 281) +-#define __NR_keyctl (__NR_Linux + 282) +-#define __NR_set_thread_area (__NR_Linux + 283) +-#define __NR_inotify_init (__NR_Linux + 284) +-#define __NR_inotify_add_watch (__NR_Linux + 285) +-#define __NR_inotify_rm_watch (__NR_Linux + 286) +-#define __NR_migrate_pages (__NR_Linux + 287) +-#define __NR_openat (__NR_Linux + 288) +-#define __NR_mkdirat (__NR_Linux + 289) +-#define __NR_mknodat (__NR_Linux + 290) +-#define __NR_fchownat (__NR_Linux + 291) +-#define __NR_futimesat (__NR_Linux + 292) +-#define __NR_fstatat64 (__NR_Linux + 293) +-#define __NR_unlinkat (__NR_Linux + 294) +-#define __NR_renameat (__NR_Linux + 295) +-#define __NR_linkat (__NR_Linux + 296) +-#define __NR_symlinkat (__NR_Linux + 297) +-#define __NR_readlinkat (__NR_Linux + 298) +-#define __NR_fchmodat (__NR_Linux + 299) +-#define __NR_faccessat (__NR_Linux + 300) +-#define __NR_pselect6 (__NR_Linux + 301) +-#define __NR_ppoll (__NR_Linux + 302) +-#define __NR_unshare (__NR_Linux + 303) +-#define __NR_splice (__NR_Linux + 304) +-#define __NR_sync_file_range (__NR_Linux + 305) +-#define __NR_tee (__NR_Linux + 306) +-#define __NR_vmsplice (__NR_Linux + 307) +-#define __NR_move_pages (__NR_Linux + 308) +-#define __NR_set_robust_list (__NR_Linux + 309) +-#define __NR_get_robust_list (__NR_Linux + 310) +-#define __NR_kexec_load (__NR_Linux + 311) +-#define __NR_getcpu (__NR_Linux + 312) +-#define __NR_epoll_pwait (__NR_Linux + 313) +-#define __NR_ioprio_set (__NR_Linux + 314) +-#define __NR_ioprio_get (__NR_Linux + 315) +-#define __NR_utimensat (__NR_Linux + 316) +-#define __NR_signalfd (__NR_Linux + 317) +-#define __NR_timerfd (__NR_Linux + 318) +-#define __NR_eventfd (__NR_Linux + 319) +-#define __NR_fallocate (__NR_Linux + 320) +-#define __NR_timerfd_create (__NR_Linux + 321) +-#define __NR_timerfd_gettime (__NR_Linux + 322) +-#define __NR_timerfd_settime (__NR_Linux + 323) +-#define __NR_signalfd4 (__NR_Linux + 324) +-#define __NR_eventfd2 (__NR_Linux + 325) +-#define __NR_epoll_create1 (__NR_Linux + 326) +-#define __NR_dup3 (__NR_Linux + 327) +-#define __NR_pipe2 (__NR_Linux + 328) +-#define __NR_inotify_init1 (__NR_Linux + 329) +-#define __NR_preadv (__NR_Linux + 330) +-#define __NR_pwritev (__NR_Linux + 331) +-#define __NR_rt_tgsigqueueinfo (__NR_Linux + 332) +-#define __NR_perf_event_open (__NR_Linux + 333) +-#define __NR_accept4 (__NR_Linux + 334) +-#define __NR_recvmmsg (__NR_Linux + 335) +-#define __NR_fanotify_init (__NR_Linux + 336) +-#define __NR_fanotify_mark (__NR_Linux + 337) +-#define __NR_prlimit64 (__NR_Linux + 338) +-#define __NR_name_to_handle_at (__NR_Linux + 339) +-#define __NR_open_by_handle_at (__NR_Linux + 340) +-#define __NR_clock_adjtime (__NR_Linux + 341) +-#define __NR_syncfs (__NR_Linux + 342) +-#define __NR_sendmmsg (__NR_Linux + 343) +-#define __NR_setns (__NR_Linux + 344) +-#define __NR_process_vm_readv (__NR_Linux + 345) +-#define __NR_process_vm_writev (__NR_Linux + 346) +-#define __NR_kcmp (__NR_Linux + 347) +-#define __NR_finit_module (__NR_Linux + 348) +-#define __NR_sched_setattr (__NR_Linux + 349) +-#define __NR_sched_getattr (__NR_Linux + 350) +-#define __NR_renameat2 (__NR_Linux + 351) +-#define __NR_seccomp (__NR_Linux + 352) +-#define __NR_getrandom (__NR_Linux + 353) +-#define __NR_memfd_create (__NR_Linux + 354) +-#define __NR_bpf (__NR_Linux + 355) +-#define __NR_execveat (__NR_Linux + 356) +-#define __NR_userfaultfd (__NR_Linux + 357) +-#define __NR_membarrier (__NR_Linux + 358) +-#define __NR_mlock2 (__NR_Linux + 359) +-#define __NR_copy_file_range (__NR_Linux + 360) +-#define __NR_preadv2 (__NR_Linux + 361) +-#define __NR_pwritev2 (__NR_Linux + 362) +-#define __NR_pkey_mprotect (__NR_Linux + 363) +-#define __NR_pkey_alloc (__NR_Linux + 364) +-#define __NR_pkey_free (__NR_Linux + 365) +-#define __NR_statx (__NR_Linux + 366) +-#define __NR_rseq (__NR_Linux + 367) +-#define __NR_io_pgetevents (__NR_Linux + 368) +-#define __NR_semget (__NR_Linux + 393) +-#define __NR_semctl (__NR_Linux + 394) +-#define __NR_shmget (__NR_Linux + 395) +-#define __NR_shmctl (__NR_Linux + 396) +-#define __NR_shmat (__NR_Linux + 397) +-#define __NR_shmdt (__NR_Linux + 398) +-#define __NR_msgget (__NR_Linux + 399) +-#define __NR_msgsnd (__NR_Linux + 400) +-#define __NR_msgrcv (__NR_Linux + 401) +-#define __NR_msgctl (__NR_Linux + 402) +-#define __NR_clock_gettime64 (__NR_Linux + 403) +-#define __NR_clock_settime64 (__NR_Linux + 404) +-#define __NR_clock_adjtime64 (__NR_Linux + 405) +-#define __NR_clock_getres_time64 (__NR_Linux + 406) +-#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) +-#define __NR_timer_gettime64 (__NR_Linux + 408) +-#define __NR_timer_settime64 (__NR_Linux + 409) +-#define __NR_timerfd_gettime64 (__NR_Linux + 410) +-#define __NR_timerfd_settime64 (__NR_Linux + 411) +-#define __NR_utimensat_time64 (__NR_Linux + 412) +-#define __NR_pselect6_time64 (__NR_Linux + 413) +-#define __NR_ppoll_time64 (__NR_Linux + 414) +-#define __NR_io_pgetevents_time64 (__NR_Linux + 416) +-#define __NR_recvmmsg_time64 (__NR_Linux + 417) +-#define __NR_mq_timedsend_time64 (__NR_Linux + 418) +-#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) +-#define __NR_semtimedop_time64 (__NR_Linux + 420) +-#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) +-#define __NR_futex_time64 (__NR_Linux + 422) +-#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) +-#define __NR_pidfd_send_signal (__NR_Linux + 424) +-#define __NR_io_uring_setup (__NR_Linux + 425) +-#define __NR_io_uring_enter (__NR_Linux + 426) +-#define __NR_io_uring_register (__NR_Linux + 427) +-#define __NR_open_tree (__NR_Linux + 428) +-#define __NR_move_mount (__NR_Linux + 429) +-#define __NR_fsopen (__NR_Linux + 430) +-#define __NR_fsconfig (__NR_Linux + 431) +-#define __NR_fsmount (__NR_Linux + 432) +-#define __NR_fspick (__NR_Linux + 433) +-#define __NR_pidfd_open (__NR_Linux + 434) +-#define __NR_clone3 (__NR_Linux + 435) +-#define __NR_close_range (__NR_Linux + 436) +-#define __NR_openat2 (__NR_Linux + 437) +-#define __NR_pidfd_getfd (__NR_Linux + 438) +-#define __NR_faccessat2 (__NR_Linux + 439) +-#define __NR_process_madvise (__NR_Linux + 440) +-#define __NR_epoll_pwait2 (__NR_Linux + 441) ++#define __NR_syscall (__NR_Linux + 0) ++#define __NR_exit (__NR_Linux + 1) ++#define __NR_fork (__NR_Linux + 2) ++#define __NR_read (__NR_Linux + 3) ++#define __NR_write (__NR_Linux + 4) ++#define __NR_open (__NR_Linux + 5) ++#define __NR_close (__NR_Linux + 6) ++#define __NR_waitpid (__NR_Linux + 7) ++#define __NR_creat (__NR_Linux + 8) ++#define __NR_link (__NR_Linux + 9) ++#define __NR_unlink (__NR_Linux + 10) ++#define __NR_execve (__NR_Linux + 11) ++#define __NR_chdir (__NR_Linux + 12) ++#define __NR_time (__NR_Linux + 13) ++#define __NR_mknod (__NR_Linux + 14) ++#define __NR_chmod (__NR_Linux + 15) ++#define __NR_lchown (__NR_Linux + 16) ++#define __NR_break (__NR_Linux + 17) ++#define __NR_unused18 (__NR_Linux + 18) ++#define __NR_lseek (__NR_Linux + 19) ++#define __NR_getpid (__NR_Linux + 20) ++#define __NR_mount (__NR_Linux + 21) ++#define __NR_umount (__NR_Linux + 22) ++#define __NR_setuid (__NR_Linux + 23) ++#define __NR_getuid (__NR_Linux + 24) ++#define __NR_stime (__NR_Linux + 25) ++#define __NR_ptrace (__NR_Linux + 26) ++#define __NR_alarm (__NR_Linux + 27) ++#define __NR_unused28 (__NR_Linux + 28) ++#define __NR_pause (__NR_Linux + 29) ++#define __NR_utime (__NR_Linux + 30) ++#define __NR_stty (__NR_Linux + 31) ++#define __NR_gtty (__NR_Linux + 32) ++#define __NR_access (__NR_Linux + 33) ++#define __NR_nice (__NR_Linux + 34) ++#define __NR_ftime (__NR_Linux + 35) ++#define __NR_sync (__NR_Linux + 36) ++#define __NR_kill (__NR_Linux + 37) ++#define __NR_rename (__NR_Linux + 38) ++#define __NR_mkdir (__NR_Linux + 39) ++#define __NR_rmdir (__NR_Linux + 40) ++#define __NR_dup (__NR_Linux + 41) ++#define __NR_pipe (__NR_Linux + 42) ++#define __NR_times (__NR_Linux + 43) ++#define __NR_prof (__NR_Linux + 44) ++#define __NR_brk (__NR_Linux + 45) ++#define __NR_setgid (__NR_Linux + 46) ++#define __NR_getgid (__NR_Linux + 47) ++#define __NR_signal (__NR_Linux + 48) ++#define __NR_geteuid (__NR_Linux + 49) ++#define __NR_getegid (__NR_Linux + 50) ++#define __NR_acct (__NR_Linux + 51) ++#define __NR_umount2 (__NR_Linux + 52) ++#define __NR_lock (__NR_Linux + 53) ++#define __NR_ioctl (__NR_Linux + 54) ++#define __NR_fcntl (__NR_Linux + 55) ++#define __NR_mpx (__NR_Linux + 56) ++#define __NR_setpgid (__NR_Linux + 57) ++#define __NR_ulimit (__NR_Linux + 58) ++#define __NR_unused59 (__NR_Linux + 59) ++#define __NR_umask (__NR_Linux + 60) ++#define __NR_chroot (__NR_Linux + 61) ++#define __NR_ustat (__NR_Linux + 62) ++#define __NR_dup2 (__NR_Linux + 63) ++#define __NR_getppid (__NR_Linux + 64) ++#define __NR_getpgrp (__NR_Linux + 65) ++#define __NR_setsid (__NR_Linux + 66) ++#define __NR_sigaction (__NR_Linux + 67) ++#define __NR_sgetmask (__NR_Linux + 68) ++#define __NR_ssetmask (__NR_Linux + 69) ++#define __NR_setreuid (__NR_Linux + 70) ++#define __NR_setregid (__NR_Linux + 71) ++#define __NR_sigsuspend (__NR_Linux + 72) ++#define __NR_sigpending (__NR_Linux + 73) ++#define __NR_sethostname (__NR_Linux + 74) ++#define __NR_setrlimit (__NR_Linux + 75) ++#define __NR_getrlimit (__NR_Linux + 76) ++#define __NR_getrusage (__NR_Linux + 77) ++#define __NR_gettimeofday (__NR_Linux + 78) ++#define __NR_settimeofday (__NR_Linux + 79) ++#define __NR_getgroups (__NR_Linux + 80) ++#define __NR_setgroups (__NR_Linux + 81) ++#define __NR_reserved82 (__NR_Linux + 82) ++#define __NR_symlink (__NR_Linux + 83) ++#define __NR_unused84 (__NR_Linux + 84) ++#define __NR_readlink (__NR_Linux + 85) ++#define __NR_uselib (__NR_Linux + 86) ++#define __NR_swapon (__NR_Linux + 87) ++#define __NR_reboot (__NR_Linux + 88) ++#define __NR_readdir (__NR_Linux + 89) ++#define __NR_mmap (__NR_Linux + 90) ++#define __NR_munmap (__NR_Linux + 91) ++#define __NR_truncate (__NR_Linux + 92) ++#define __NR_ftruncate (__NR_Linux + 93) ++#define __NR_fchmod (__NR_Linux + 94) ++#define __NR_fchown (__NR_Linux + 95) ++#define __NR_getpriority (__NR_Linux + 96) ++#define __NR_setpriority (__NR_Linux + 97) ++#define __NR_profil (__NR_Linux + 98) ++#define __NR_statfs (__NR_Linux + 99) ++#define __NR_fstatfs (__NR_Linux + 100) ++#define __NR_ioperm (__NR_Linux + 101) ++#define __NR_socketcall (__NR_Linux + 102) ++#define __NR_syslog (__NR_Linux + 103) ++#define __NR_setitimer (__NR_Linux + 104) ++#define __NR_getitimer (__NR_Linux + 105) ++#define __NR_stat (__NR_Linux + 106) ++#define __NR_lstat (__NR_Linux + 107) ++#define __NR_fstat (__NR_Linux + 108) ++#define __NR_unused109 (__NR_Linux + 109) ++#define __NR_iopl (__NR_Linux + 110) ++#define __NR_vhangup (__NR_Linux + 111) ++#define __NR_idle (__NR_Linux + 112) ++#define __NR_vm86 (__NR_Linux + 113) ++#define __NR_wait4 (__NR_Linux + 114) ++#define __NR_swapoff (__NR_Linux + 115) ++#define __NR_sysinfo (__NR_Linux + 116) ++#define __NR_ipc (__NR_Linux + 117) ++#define __NR_fsync (__NR_Linux + 118) ++#define __NR_sigreturn (__NR_Linux + 119) ++#define __NR_clone (__NR_Linux + 120) ++#define __NR_setdomainname (__NR_Linux + 121) ++#define __NR_uname (__NR_Linux + 122) ++#define __NR_modify_ldt (__NR_Linux + 123) ++#define __NR_adjtimex (__NR_Linux + 124) ++#define __NR_mprotect (__NR_Linux + 125) ++#define __NR_sigprocmask (__NR_Linux + 126) ++#define __NR_create_module (__NR_Linux + 127) ++#define __NR_init_module (__NR_Linux + 128) ++#define __NR_delete_module (__NR_Linux + 129) ++#define __NR_get_kernel_syms (__NR_Linux + 130) ++#define __NR_quotactl (__NR_Linux + 131) ++#define __NR_getpgid (__NR_Linux + 132) ++#define __NR_fchdir (__NR_Linux + 133) ++#define __NR_bdflush (__NR_Linux + 134) ++#define __NR_sysfs (__NR_Linux + 135) ++#define __NR_personality (__NR_Linux + 136) ++#define __NR_afs_syscall (__NR_Linux + 137) ++#define __NR_setfsuid (__NR_Linux + 138) ++#define __NR_setfsgid (__NR_Linux + 139) ++#define __NR__llseek (__NR_Linux + 140) ++#define __NR_getdents (__NR_Linux + 141) ++#define __NR__newselect (__NR_Linux + 142) ++#define __NR_flock (__NR_Linux + 143) ++#define __NR_msync (__NR_Linux + 144) ++#define __NR_readv (__NR_Linux + 145) ++#define __NR_writev (__NR_Linux + 146) ++#define __NR_cacheflush (__NR_Linux + 147) ++#define __NR_cachectl (__NR_Linux + 148) ++#define __NR_sysmips (__NR_Linux + 149) ++#define __NR_unused150 (__NR_Linux + 150) ++#define __NR_getsid (__NR_Linux + 151) ++#define __NR_fdatasync (__NR_Linux + 152) ++#define __NR__sysctl (__NR_Linux + 153) ++#define __NR_mlock (__NR_Linux + 154) ++#define __NR_munlock (__NR_Linux + 155) ++#define __NR_mlockall (__NR_Linux + 156) ++#define __NR_munlockall (__NR_Linux + 157) ++#define __NR_sched_setparam (__NR_Linux + 158) ++#define __NR_sched_getparam (__NR_Linux + 159) ++#define __NR_sched_setscheduler (__NR_Linux + 160) ++#define __NR_sched_getscheduler (__NR_Linux + 161) ++#define __NR_sched_yield (__NR_Linux + 162) ++#define __NR_sched_get_priority_max (__NR_Linux + 163) ++#define __NR_sched_get_priority_min (__NR_Linux + 164) ++#define __NR_sched_rr_get_interval (__NR_Linux + 165) ++#define __NR_nanosleep (__NR_Linux + 166) ++#define __NR_mremap (__NR_Linux + 167) ++#define __NR_accept (__NR_Linux + 168) ++#define __NR_bind (__NR_Linux + 169) ++#define __NR_connect (__NR_Linux + 170) ++#define __NR_getpeername (__NR_Linux + 171) ++#define __NR_getsockname (__NR_Linux + 172) ++#define __NR_getsockopt (__NR_Linux + 173) ++#define __NR_listen (__NR_Linux + 174) ++#define __NR_recv (__NR_Linux + 175) ++#define __NR_recvfrom (__NR_Linux + 176) ++#define __NR_recvmsg (__NR_Linux + 177) ++#define __NR_send (__NR_Linux + 178) ++#define __NR_sendmsg (__NR_Linux + 179) ++#define __NR_sendto (__NR_Linux + 180) ++#define __NR_setsockopt (__NR_Linux + 181) ++#define __NR_shutdown (__NR_Linux + 182) ++#define __NR_socket (__NR_Linux + 183) ++#define __NR_socketpair (__NR_Linux + 184) ++#define __NR_setresuid (__NR_Linux + 185) ++#define __NR_getresuid (__NR_Linux + 186) ++#define __NR_query_module (__NR_Linux + 187) ++#define __NR_poll (__NR_Linux + 188) ++#define __NR_nfsservctl (__NR_Linux + 189) ++#define __NR_setresgid (__NR_Linux + 190) ++#define __NR_getresgid (__NR_Linux + 191) ++#define __NR_prctl (__NR_Linux + 192) ++#define __NR_rt_sigreturn (__NR_Linux + 193) ++#define __NR_rt_sigaction (__NR_Linux + 194) ++#define __NR_rt_sigprocmask (__NR_Linux + 195) ++#define __NR_rt_sigpending (__NR_Linux + 196) ++#define __NR_rt_sigtimedwait (__NR_Linux + 197) ++#define __NR_rt_sigqueueinfo (__NR_Linux + 198) ++#define __NR_rt_sigsuspend (__NR_Linux + 199) ++#define __NR_pread64 (__NR_Linux + 200) ++#define __NR_pwrite64 (__NR_Linux + 201) ++#define __NR_chown (__NR_Linux + 202) ++#define __NR_getcwd (__NR_Linux + 203) ++#define __NR_capget (__NR_Linux + 204) ++#define __NR_capset (__NR_Linux + 205) ++#define __NR_sigaltstack (__NR_Linux + 206) ++#define __NR_sendfile (__NR_Linux + 207) ++#define __NR_getpmsg (__NR_Linux + 208) ++#define __NR_putpmsg (__NR_Linux + 209) ++#define __NR_mmap2 (__NR_Linux + 210) ++#define __NR_truncate64 (__NR_Linux + 211) ++#define __NR_ftruncate64 (__NR_Linux + 212) ++#define __NR_stat64 (__NR_Linux + 213) ++#define __NR_lstat64 (__NR_Linux + 214) ++#define __NR_fstat64 (__NR_Linux + 215) ++#define __NR_pivot_root (__NR_Linux + 216) ++#define __NR_mincore (__NR_Linux + 217) ++#define __NR_madvise (__NR_Linux + 218) ++#define __NR_getdents64 (__NR_Linux + 219) ++#define __NR_fcntl64 (__NR_Linux + 220) ++#define __NR_reserved221 (__NR_Linux + 221) ++#define __NR_gettid (__NR_Linux + 222) ++#define __NR_readahead (__NR_Linux + 223) ++#define __NR_setxattr (__NR_Linux + 224) ++#define __NR_lsetxattr (__NR_Linux + 225) ++#define __NR_fsetxattr (__NR_Linux + 226) ++#define __NR_getxattr (__NR_Linux + 227) ++#define __NR_lgetxattr (__NR_Linux + 228) ++#define __NR_fgetxattr (__NR_Linux + 229) ++#define __NR_listxattr (__NR_Linux + 230) ++#define __NR_llistxattr (__NR_Linux + 231) ++#define __NR_flistxattr (__NR_Linux + 232) ++#define __NR_removexattr (__NR_Linux + 233) ++#define __NR_lremovexattr (__NR_Linux + 234) ++#define __NR_fremovexattr (__NR_Linux + 235) ++#define __NR_tkill (__NR_Linux + 236) ++#define __NR_sendfile64 (__NR_Linux + 237) ++#define __NR_futex (__NR_Linux + 238) ++#define __NR_sched_setaffinity (__NR_Linux + 239) ++#define __NR_sched_getaffinity (__NR_Linux + 240) ++#define __NR_io_setup (__NR_Linux + 241) ++#define __NR_io_destroy (__NR_Linux + 242) ++#define __NR_io_getevents (__NR_Linux + 243) ++#define __NR_io_submit (__NR_Linux + 244) ++#define __NR_io_cancel (__NR_Linux + 245) ++#define __NR_exit_group (__NR_Linux + 246) ++#define __NR_lookup_dcookie (__NR_Linux + 247) ++#define __NR_epoll_create (__NR_Linux + 248) ++#define __NR_epoll_ctl (__NR_Linux + 249) ++#define __NR_epoll_wait (__NR_Linux + 250) ++#define __NR_remap_file_pages (__NR_Linux + 251) ++#define __NR_set_tid_address (__NR_Linux + 252) ++#define __NR_restart_syscall (__NR_Linux + 253) ++#define __NR_fadvise64 (__NR_Linux + 254) ++#define __NR_statfs64 (__NR_Linux + 255) ++#define __NR_fstatfs64 (__NR_Linux + 256) ++#define __NR_timer_create (__NR_Linux + 257) ++#define __NR_timer_settime (__NR_Linux + 258) ++#define __NR_timer_gettime (__NR_Linux + 259) ++#define __NR_timer_getoverrun (__NR_Linux + 260) ++#define __NR_timer_delete (__NR_Linux + 261) ++#define __NR_clock_settime (__NR_Linux + 262) ++#define __NR_clock_gettime (__NR_Linux + 263) ++#define __NR_clock_getres (__NR_Linux + 264) ++#define __NR_clock_nanosleep (__NR_Linux + 265) ++#define __NR_tgkill (__NR_Linux + 266) ++#define __NR_utimes (__NR_Linux + 267) ++#define __NR_mbind (__NR_Linux + 268) ++#define __NR_get_mempolicy (__NR_Linux + 269) ++#define __NR_set_mempolicy (__NR_Linux + 270) ++#define __NR_mq_open (__NR_Linux + 271) ++#define __NR_mq_unlink (__NR_Linux + 272) ++#define __NR_mq_timedsend (__NR_Linux + 273) ++#define __NR_mq_timedreceive (__NR_Linux + 274) ++#define __NR_mq_notify (__NR_Linux + 275) ++#define __NR_mq_getsetattr (__NR_Linux + 276) ++#define __NR_vserver (__NR_Linux + 277) ++#define __NR_waitid (__NR_Linux + 278) ++#define __NR_add_key (__NR_Linux + 280) ++#define __NR_request_key (__NR_Linux + 281) ++#define __NR_keyctl (__NR_Linux + 282) ++#define __NR_set_thread_area (__NR_Linux + 283) ++#define __NR_inotify_init (__NR_Linux + 284) ++#define __NR_inotify_add_watch (__NR_Linux + 285) ++#define __NR_inotify_rm_watch (__NR_Linux + 286) ++#define __NR_migrate_pages (__NR_Linux + 287) ++#define __NR_openat (__NR_Linux + 288) ++#define __NR_mkdirat (__NR_Linux + 289) ++#define __NR_mknodat (__NR_Linux + 290) ++#define __NR_fchownat (__NR_Linux + 291) ++#define __NR_futimesat (__NR_Linux + 292) ++#define __NR_fstatat64 (__NR_Linux + 293) ++#define __NR_unlinkat (__NR_Linux + 294) ++#define __NR_renameat (__NR_Linux + 295) ++#define __NR_linkat (__NR_Linux + 296) ++#define __NR_symlinkat (__NR_Linux + 297) ++#define __NR_readlinkat (__NR_Linux + 298) ++#define __NR_fchmodat (__NR_Linux + 299) ++#define __NR_faccessat (__NR_Linux + 300) ++#define __NR_pselect6 (__NR_Linux + 301) ++#define __NR_ppoll (__NR_Linux + 302) ++#define __NR_unshare (__NR_Linux + 303) ++#define __NR_splice (__NR_Linux + 304) ++#define __NR_sync_file_range (__NR_Linux + 305) ++#define __NR_tee (__NR_Linux + 306) ++#define __NR_vmsplice (__NR_Linux + 307) ++#define __NR_move_pages (__NR_Linux + 308) ++#define __NR_set_robust_list (__NR_Linux + 309) ++#define __NR_get_robust_list (__NR_Linux + 310) ++#define __NR_kexec_load (__NR_Linux + 311) ++#define __NR_getcpu (__NR_Linux + 312) ++#define __NR_epoll_pwait (__NR_Linux + 313) ++#define __NR_ioprio_set (__NR_Linux + 314) ++#define __NR_ioprio_get (__NR_Linux + 315) ++#define __NR_utimensat (__NR_Linux + 316) ++#define __NR_signalfd (__NR_Linux + 317) ++#define __NR_timerfd (__NR_Linux + 318) ++#define __NR_eventfd (__NR_Linux + 319) ++#define __NR_fallocate (__NR_Linux + 320) ++#define __NR_timerfd_create (__NR_Linux + 321) ++#define __NR_timerfd_gettime (__NR_Linux + 322) ++#define __NR_timerfd_settime (__NR_Linux + 323) ++#define __NR_signalfd4 (__NR_Linux + 324) ++#define __NR_eventfd2 (__NR_Linux + 325) ++#define __NR_epoll_create1 (__NR_Linux + 326) ++#define __NR_dup3 (__NR_Linux + 327) ++#define __NR_pipe2 (__NR_Linux + 328) ++#define __NR_inotify_init1 (__NR_Linux + 329) ++#define __NR_preadv (__NR_Linux + 330) ++#define __NR_pwritev (__NR_Linux + 331) ++#define __NR_rt_tgsigqueueinfo (__NR_Linux + 332) ++#define __NR_perf_event_open (__NR_Linux + 333) ++#define __NR_accept4 (__NR_Linux + 334) ++#define __NR_recvmmsg (__NR_Linux + 335) ++#define __NR_fanotify_init (__NR_Linux + 336) ++#define __NR_fanotify_mark (__NR_Linux + 337) ++#define __NR_prlimit64 (__NR_Linux + 338) ++#define __NR_name_to_handle_at (__NR_Linux + 339) ++#define __NR_open_by_handle_at (__NR_Linux + 340) ++#define __NR_clock_adjtime (__NR_Linux + 341) ++#define __NR_syncfs (__NR_Linux + 342) ++#define __NR_sendmmsg (__NR_Linux + 343) ++#define __NR_setns (__NR_Linux + 344) ++#define __NR_process_vm_readv (__NR_Linux + 345) ++#define __NR_process_vm_writev (__NR_Linux + 346) ++#define __NR_kcmp (__NR_Linux + 347) ++#define __NR_finit_module (__NR_Linux + 348) ++#define __NR_sched_setattr (__NR_Linux + 349) ++#define __NR_sched_getattr (__NR_Linux + 350) ++#define __NR_renameat2 (__NR_Linux + 351) ++#define __NR_seccomp (__NR_Linux + 352) ++#define __NR_getrandom (__NR_Linux + 353) ++#define __NR_memfd_create (__NR_Linux + 354) ++#define __NR_bpf (__NR_Linux + 355) ++#define __NR_execveat (__NR_Linux + 356) ++#define __NR_userfaultfd (__NR_Linux + 357) ++#define __NR_membarrier (__NR_Linux + 358) ++#define __NR_mlock2 (__NR_Linux + 359) ++#define __NR_copy_file_range (__NR_Linux + 360) ++#define __NR_preadv2 (__NR_Linux + 361) ++#define __NR_pwritev2 (__NR_Linux + 362) ++#define __NR_pkey_mprotect (__NR_Linux + 363) ++#define __NR_pkey_alloc (__NR_Linux + 364) ++#define __NR_pkey_free (__NR_Linux + 365) ++#define __NR_statx (__NR_Linux + 366) ++#define __NR_rseq (__NR_Linux + 367) ++#define __NR_io_pgetevents (__NR_Linux + 368) ++#define __NR_semget (__NR_Linux + 393) ++#define __NR_semctl (__NR_Linux + 394) ++#define __NR_shmget (__NR_Linux + 395) ++#define __NR_shmctl (__NR_Linux + 396) ++#define __NR_shmat (__NR_Linux + 397) ++#define __NR_shmdt (__NR_Linux + 398) ++#define __NR_msgget (__NR_Linux + 399) ++#define __NR_msgsnd (__NR_Linux + 400) ++#define __NR_msgrcv (__NR_Linux + 401) ++#define __NR_msgctl (__NR_Linux + 402) ++#define __NR_clock_gettime64 (__NR_Linux + 403) ++#define __NR_clock_settime64 (__NR_Linux + 404) ++#define __NR_clock_adjtime64 (__NR_Linux + 405) ++#define __NR_clock_getres_time64 (__NR_Linux + 406) ++#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) ++#define __NR_timer_gettime64 (__NR_Linux + 408) ++#define __NR_timer_settime64 (__NR_Linux + 409) ++#define __NR_timerfd_gettime64 (__NR_Linux + 410) ++#define __NR_timerfd_settime64 (__NR_Linux + 411) ++#define __NR_utimensat_time64 (__NR_Linux + 412) ++#define __NR_pselect6_time64 (__NR_Linux + 413) ++#define __NR_ppoll_time64 (__NR_Linux + 414) ++#define __NR_io_pgetevents_time64 (__NR_Linux + 416) ++#define __NR_recvmmsg_time64 (__NR_Linux + 417) ++#define __NR_mq_timedsend_time64 (__NR_Linux + 418) ++#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) ++#define __NR_semtimedop_time64 (__NR_Linux + 420) ++#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) ++#define __NR_futex_time64 (__NR_Linux + 422) ++#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) ++#define __NR_pidfd_send_signal (__NR_Linux + 424) ++#define __NR_io_uring_setup (__NR_Linux + 425) ++#define __NR_io_uring_enter (__NR_Linux + 426) ++#define __NR_io_uring_register (__NR_Linux + 427) ++#define __NR_open_tree (__NR_Linux + 428) ++#define __NR_move_mount (__NR_Linux + 429) ++#define __NR_fsopen (__NR_Linux + 430) ++#define __NR_fsconfig (__NR_Linux + 431) ++#define __NR_fsmount (__NR_Linux + 432) ++#define __NR_fspick (__NR_Linux + 433) ++#define __NR_pidfd_open (__NR_Linux + 434) ++#define __NR_clone3 (__NR_Linux + 435) ++#define __NR_close_range (__NR_Linux + 436) ++#define __NR_openat2 (__NR_Linux + 437) ++#define __NR_pidfd_getfd (__NR_Linux + 438) ++#define __NR_faccessat2 (__NR_Linux + 439) ++#define __NR_process_madvise (__NR_Linux + 440) ++#define __NR_epoll_pwait2 (__NR_Linux + 441) ++#define __NR_mount_setattr (__NR_Linux + 442) ++#define __NR_landlock_create_ruleset (__NR_Linux + 444) ++#define __NR_landlock_add_rule (__NR_Linux + 445) ++#define __NR_landlock_restrict_self (__NR_Linux + 446) + +- +-#endif /* _ASM_MIPS_UNISTD_O32_H */ ++#endif /* _ASM_UNISTD_O32_H */ +diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h +index c3af3f324c..9f18fa090f 100644 +--- a/linux-headers/asm-powerpc/kvm.h ++++ b/linux-headers/asm-powerpc/kvm.h +@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char { + #define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) + #define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) + #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) ++#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) ++#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) + + /* Transactional Memory checkpointed state: + * This is all GPRs, all VSX regs and a subset of SPRs +diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h +index 4624c90043..9155778c19 100644 +--- a/linux-headers/asm-powerpc/unistd_32.h ++++ b/linux-headers/asm-powerpc/unistd_32.h +@@ -1,429 +1,433 @@ +-#ifndef _ASM_POWERPC_UNISTD_32_H +-#define _ASM_POWERPC_UNISTD_32_H ++#ifndef _ASM_UNISTD_32_H ++#define _ASM_UNISTD_32_H + +-#define __NR_restart_syscall 0 +-#define __NR_exit 1 +-#define __NR_fork 2 +-#define __NR_read 3 +-#define __NR_write 4 +-#define __NR_open 5 +-#define __NR_close 6 +-#define __NR_waitpid 7 +-#define __NR_creat 8 +-#define __NR_link 9 +-#define __NR_unlink 10 +-#define __NR_execve 11 +-#define __NR_chdir 12 +-#define __NR_time 13 +-#define __NR_mknod 14 +-#define __NR_chmod 15 +-#define __NR_lchown 16 +-#define __NR_break 17 +-#define __NR_oldstat 18 +-#define __NR_lseek 19 +-#define __NR_getpid 20 +-#define __NR_mount 21 +-#define __NR_umount 22 +-#define __NR_setuid 23 +-#define __NR_getuid 24 +-#define __NR_stime 25 +-#define __NR_ptrace 26 +-#define __NR_alarm 27 +-#define __NR_oldfstat 28 +-#define __NR_pause 29 +-#define __NR_utime 30 +-#define __NR_stty 31 +-#define __NR_gtty 32 +-#define __NR_access 33 +-#define __NR_nice 34 +-#define __NR_ftime 35 +-#define __NR_sync 36 +-#define __NR_kill 37 +-#define __NR_rename 38 +-#define __NR_mkdir 39 +-#define __NR_rmdir 40 +-#define __NR_dup 41 +-#define __NR_pipe 42 +-#define __NR_times 43 +-#define __NR_prof 44 +-#define __NR_brk 45 +-#define __NR_setgid 46 +-#define __NR_getgid 47 +-#define __NR_signal 48 +-#define __NR_geteuid 49 +-#define __NR_getegid 50 +-#define __NR_acct 51 +-#define __NR_umount2 52 +-#define __NR_lock 53 +-#define __NR_ioctl 54 +-#define __NR_fcntl 55 +-#define __NR_mpx 56 +-#define __NR_setpgid 57 +-#define __NR_ulimit 58 +-#define __NR_oldolduname 59 +-#define __NR_umask 60 +-#define __NR_chroot 61 +-#define __NR_ustat 62 +-#define __NR_dup2 63 +-#define __NR_getppid 64 +-#define __NR_getpgrp 65 +-#define __NR_setsid 66 +-#define __NR_sigaction 67 +-#define __NR_sgetmask 68 +-#define __NR_ssetmask 69 +-#define __NR_setreuid 70 +-#define __NR_setregid 71 +-#define __NR_sigsuspend 72 +-#define __NR_sigpending 73 +-#define __NR_sethostname 74 +-#define __NR_setrlimit 75 +-#define __NR_getrlimit 76 +-#define __NR_getrusage 77 +-#define __NR_gettimeofday 78 +-#define __NR_settimeofday 79 +-#define __NR_getgroups 80 +-#define __NR_setgroups 81 +-#define __NR_select 82 +-#define __NR_symlink 83 +-#define __NR_oldlstat 84 +-#define __NR_readlink 85 +-#define __NR_uselib 86 +-#define __NR_swapon 87 +-#define __NR_reboot 88 +-#define __NR_readdir 89 +-#define __NR_mmap 90 +-#define __NR_munmap 91 +-#define __NR_truncate 92 +-#define __NR_ftruncate 93 +-#define __NR_fchmod 94 +-#define __NR_fchown 95 +-#define __NR_getpriority 96 +-#define __NR_setpriority 97 +-#define __NR_profil 98 +-#define __NR_statfs 99 +-#define __NR_fstatfs 100 +-#define __NR_ioperm 101 +-#define __NR_socketcall 102 +-#define __NR_syslog 103 +-#define __NR_setitimer 104 +-#define __NR_getitimer 105 +-#define __NR_stat 106 +-#define __NR_lstat 107 +-#define __NR_fstat 108 +-#define __NR_olduname 109 +-#define __NR_iopl 110 +-#define __NR_vhangup 111 +-#define __NR_idle 112 +-#define __NR_vm86 113 +-#define __NR_wait4 114 +-#define __NR_swapoff 115 +-#define __NR_sysinfo 116 +-#define __NR_ipc 117 +-#define __NR_fsync 118 +-#define __NR_sigreturn 119 +-#define __NR_clone 120 +-#define __NR_setdomainname 121 +-#define __NR_uname 122 +-#define __NR_modify_ldt 123 +-#define __NR_adjtimex 124 +-#define __NR_mprotect 125 +-#define __NR_sigprocmask 126 +-#define __NR_create_module 127 +-#define __NR_init_module 128 +-#define __NR_delete_module 129 +-#define __NR_get_kernel_syms 130 +-#define __NR_quotactl 131 +-#define __NR_getpgid 132 +-#define __NR_fchdir 133 +-#define __NR_bdflush 134 +-#define __NR_sysfs 135 +-#define __NR_personality 136 +-#define __NR_afs_syscall 137 +-#define __NR_setfsuid 138 +-#define __NR_setfsgid 139 +-#define __NR__llseek 140 +-#define __NR_getdents 141 +-#define __NR__newselect 142 +-#define __NR_flock 143 +-#define __NR_msync 144 +-#define __NR_readv 145 +-#define __NR_writev 146 +-#define __NR_getsid 147 +-#define __NR_fdatasync 148 +-#define __NR__sysctl 149 +-#define __NR_mlock 150 +-#define __NR_munlock 151 +-#define __NR_mlockall 152 +-#define __NR_munlockall 153 +-#define __NR_sched_setparam 154 +-#define __NR_sched_getparam 155 +-#define __NR_sched_setscheduler 156 +-#define __NR_sched_getscheduler 157 +-#define __NR_sched_yield 158 +-#define __NR_sched_get_priority_max 159 +-#define __NR_sched_get_priority_min 160 +-#define __NR_sched_rr_get_interval 161 +-#define __NR_nanosleep 162 +-#define __NR_mremap 163 +-#define __NR_setresuid 164 +-#define __NR_getresuid 165 +-#define __NR_query_module 166 +-#define __NR_poll 167 +-#define __NR_nfsservctl 168 +-#define __NR_setresgid 169 +-#define __NR_getresgid 170 +-#define __NR_prctl 171 +-#define __NR_rt_sigreturn 172 +-#define __NR_rt_sigaction 173 +-#define __NR_rt_sigprocmask 174 +-#define __NR_rt_sigpending 175 +-#define __NR_rt_sigtimedwait 176 +-#define __NR_rt_sigqueueinfo 177 +-#define __NR_rt_sigsuspend 178 +-#define __NR_pread64 179 +-#define __NR_pwrite64 180 +-#define __NR_chown 181 +-#define __NR_getcwd 182 +-#define __NR_capget 183 +-#define __NR_capset 184 +-#define __NR_sigaltstack 185 +-#define __NR_sendfile 186 +-#define __NR_getpmsg 187 +-#define __NR_putpmsg 188 +-#define __NR_vfork 189 +-#define __NR_ugetrlimit 190 +-#define __NR_readahead 191 +-#define __NR_mmap2 192 +-#define __NR_truncate64 193 +-#define __NR_ftruncate64 194 +-#define __NR_stat64 195 +-#define __NR_lstat64 196 +-#define __NR_fstat64 197 +-#define __NR_pciconfig_read 198 +-#define __NR_pciconfig_write 199 +-#define __NR_pciconfig_iobase 200 +-#define __NR_multiplexer 201 +-#define __NR_getdents64 202 +-#define __NR_pivot_root 203 +-#define __NR_fcntl64 204 +-#define __NR_madvise 205 +-#define __NR_mincore 206 +-#define __NR_gettid 207 +-#define __NR_tkill 208 +-#define __NR_setxattr 209 +-#define __NR_lsetxattr 210 +-#define __NR_fsetxattr 211 +-#define __NR_getxattr 212 +-#define __NR_lgetxattr 213 +-#define __NR_fgetxattr 214 +-#define __NR_listxattr 215 +-#define __NR_llistxattr 216 +-#define __NR_flistxattr 217 +-#define __NR_removexattr 218 +-#define __NR_lremovexattr 219 +-#define __NR_fremovexattr 220 +-#define __NR_futex 221 +-#define __NR_sched_setaffinity 222 +-#define __NR_sched_getaffinity 223 +-#define __NR_tuxcall 225 +-#define __NR_sendfile64 226 +-#define __NR_io_setup 227 +-#define __NR_io_destroy 228 +-#define __NR_io_getevents 229 +-#define __NR_io_submit 230 +-#define __NR_io_cancel 231 +-#define __NR_set_tid_address 232 +-#define __NR_fadvise64 233 +-#define __NR_exit_group 234 +-#define __NR_lookup_dcookie 235 +-#define __NR_epoll_create 236 +-#define __NR_epoll_ctl 237 +-#define __NR_epoll_wait 238 +-#define __NR_remap_file_pages 239 +-#define __NR_timer_create 240 +-#define __NR_timer_settime 241 +-#define __NR_timer_gettime 242 +-#define __NR_timer_getoverrun 243 +-#define __NR_timer_delete 244 +-#define __NR_clock_settime 245 +-#define __NR_clock_gettime 246 +-#define __NR_clock_getres 247 +-#define __NR_clock_nanosleep 248 +-#define __NR_swapcontext 249 +-#define __NR_tgkill 250 +-#define __NR_utimes 251 +-#define __NR_statfs64 252 +-#define __NR_fstatfs64 253 +-#define __NR_fadvise64_64 254 +-#define __NR_rtas 255 +-#define __NR_sys_debug_setcontext 256 +-#define __NR_migrate_pages 258 +-#define __NR_mbind 259 +-#define __NR_get_mempolicy 260 +-#define __NR_set_mempolicy 261 +-#define __NR_mq_open 262 +-#define __NR_mq_unlink 263 +-#define __NR_mq_timedsend 264 +-#define __NR_mq_timedreceive 265 +-#define __NR_mq_notify 266 +-#define __NR_mq_getsetattr 267 +-#define __NR_kexec_load 268 +-#define __NR_add_key 269 +-#define __NR_request_key 270 +-#define __NR_keyctl 271 +-#define __NR_waitid 272 +-#define __NR_ioprio_set 273 +-#define __NR_ioprio_get 274 +-#define __NR_inotify_init 275 +-#define __NR_inotify_add_watch 276 +-#define __NR_inotify_rm_watch 277 +-#define __NR_spu_run 278 +-#define __NR_spu_create 279 +-#define __NR_pselect6 280 +-#define __NR_ppoll 281 +-#define __NR_unshare 282 +-#define __NR_splice 283 +-#define __NR_tee 284 +-#define __NR_vmsplice 285 +-#define __NR_openat 286 +-#define __NR_mkdirat 287 +-#define __NR_mknodat 288 +-#define __NR_fchownat 289 +-#define __NR_futimesat 290 +-#define __NR_fstatat64 291 +-#define __NR_unlinkat 292 +-#define __NR_renameat 293 +-#define __NR_linkat 294 +-#define __NR_symlinkat 295 +-#define __NR_readlinkat 296 +-#define __NR_fchmodat 297 +-#define __NR_faccessat 298 +-#define __NR_get_robust_list 299 +-#define __NR_set_robust_list 300 +-#define __NR_move_pages 301 +-#define __NR_getcpu 302 +-#define __NR_epoll_pwait 303 +-#define __NR_utimensat 304 +-#define __NR_signalfd 305 +-#define __NR_timerfd_create 306 +-#define __NR_eventfd 307 +-#define __NR_sync_file_range2 308 +-#define __NR_fallocate 309 +-#define __NR_subpage_prot 310 +-#define __NR_timerfd_settime 311 +-#define __NR_timerfd_gettime 312 +-#define __NR_signalfd4 313 +-#define __NR_eventfd2 314 +-#define __NR_epoll_create1 315 +-#define __NR_dup3 316 +-#define __NR_pipe2 317 +-#define __NR_inotify_init1 318 +-#define __NR_perf_event_open 319 +-#define __NR_preadv 320 +-#define __NR_pwritev 321 +-#define __NR_rt_tgsigqueueinfo 322 +-#define __NR_fanotify_init 323 +-#define __NR_fanotify_mark 324 +-#define __NR_prlimit64 325 +-#define __NR_socket 326 +-#define __NR_bind 327 +-#define __NR_connect 328 +-#define __NR_listen 329 +-#define __NR_accept 330 +-#define __NR_getsockname 331 +-#define __NR_getpeername 332 +-#define __NR_socketpair 333 +-#define __NR_send 334 +-#define __NR_sendto 335 +-#define __NR_recv 336 +-#define __NR_recvfrom 337 +-#define __NR_shutdown 338 +-#define __NR_setsockopt 339 +-#define __NR_getsockopt 340 +-#define __NR_sendmsg 341 +-#define __NR_recvmsg 342 +-#define __NR_recvmmsg 343 +-#define __NR_accept4 344 +-#define __NR_name_to_handle_at 345 +-#define __NR_open_by_handle_at 346 +-#define __NR_clock_adjtime 347 +-#define __NR_syncfs 348 +-#define __NR_sendmmsg 349 +-#define __NR_setns 350 +-#define __NR_process_vm_readv 351 +-#define __NR_process_vm_writev 352 +-#define __NR_finit_module 353 +-#define __NR_kcmp 354 +-#define __NR_sched_setattr 355 +-#define __NR_sched_getattr 356 +-#define __NR_renameat2 357 +-#define __NR_seccomp 358 +-#define __NR_getrandom 359 +-#define __NR_memfd_create 360 +-#define __NR_bpf 361 +-#define __NR_execveat 362 +-#define __NR_switch_endian 363 +-#define __NR_userfaultfd 364 +-#define __NR_membarrier 365 +-#define __NR_mlock2 378 +-#define __NR_copy_file_range 379 +-#define __NR_preadv2 380 +-#define __NR_pwritev2 381 +-#define __NR_kexec_file_load 382 +-#define __NR_statx 383 +-#define __NR_pkey_alloc 384 +-#define __NR_pkey_free 385 +-#define __NR_pkey_mprotect 386 +-#define __NR_rseq 387 +-#define __NR_io_pgetevents 388 +-#define __NR_semget 393 +-#define __NR_semctl 394 +-#define __NR_shmget 395 +-#define __NR_shmctl 396 +-#define __NR_shmat 397 +-#define __NR_shmdt 398 +-#define __NR_msgget 399 +-#define __NR_msgsnd 400 +-#define __NR_msgrcv 401 +-#define __NR_msgctl 402 +-#define __NR_clock_gettime64 403 +-#define __NR_clock_settime64 404 +-#define __NR_clock_adjtime64 405 +-#define __NR_clock_getres_time64 406 +-#define __NR_clock_nanosleep_time64 407 +-#define __NR_timer_gettime64 408 +-#define __NR_timer_settime64 409 +-#define __NR_timerfd_gettime64 410 +-#define __NR_timerfd_settime64 411 +-#define __NR_utimensat_time64 412 +-#define __NR_pselect6_time64 413 +-#define __NR_ppoll_time64 414 +-#define __NR_io_pgetevents_time64 416 +-#define __NR_recvmmsg_time64 417 +-#define __NR_mq_timedsend_time64 418 +-#define __NR_mq_timedreceive_time64 419 +-#define __NR_semtimedop_time64 420 +-#define __NR_rt_sigtimedwait_time64 421 +-#define __NR_futex_time64 422 +-#define __NR_sched_rr_get_interval_time64 423 +-#define __NR_pidfd_send_signal 424 +-#define __NR_io_uring_setup 425 +-#define __NR_io_uring_enter 426 +-#define __NR_io_uring_register 427 +-#define __NR_open_tree 428 +-#define __NR_move_mount 429 +-#define __NR_fsopen 430 +-#define __NR_fsconfig 431 +-#define __NR_fsmount 432 +-#define __NR_fspick 433 +-#define __NR_pidfd_open 434 +-#define __NR_clone3 435 +-#define __NR_close_range 436 +-#define __NR_openat2 437 +-#define __NR_pidfd_getfd 438 +-#define __NR_faccessat2 439 +-#define __NR_process_madvise 440 +-#define __NR_epoll_pwait2 441 ++#define __NR_restart_syscall 0 ++#define __NR_exit 1 ++#define __NR_fork 2 ++#define __NR_read 3 ++#define __NR_write 4 ++#define __NR_open 5 ++#define __NR_close 6 ++#define __NR_waitpid 7 ++#define __NR_creat 8 ++#define __NR_link 9 ++#define __NR_unlink 10 ++#define __NR_execve 11 ++#define __NR_chdir 12 ++#define __NR_time 13 ++#define __NR_mknod 14 ++#define __NR_chmod 15 ++#define __NR_lchown 16 ++#define __NR_break 17 ++#define __NR_oldstat 18 ++#define __NR_lseek 19 ++#define __NR_getpid 20 ++#define __NR_mount 21 ++#define __NR_umount 22 ++#define __NR_setuid 23 ++#define __NR_getuid 24 ++#define __NR_stime 25 ++#define __NR_ptrace 26 ++#define __NR_alarm 27 ++#define __NR_oldfstat 28 ++#define __NR_pause 29 ++#define __NR_utime 30 ++#define __NR_stty 31 ++#define __NR_gtty 32 ++#define __NR_access 33 ++#define __NR_nice 34 ++#define __NR_ftime 35 ++#define __NR_sync 36 ++#define __NR_kill 37 ++#define __NR_rename 38 ++#define __NR_mkdir 39 ++#define __NR_rmdir 40 ++#define __NR_dup 41 ++#define __NR_pipe 42 ++#define __NR_times 43 ++#define __NR_prof 44 ++#define __NR_brk 45 ++#define __NR_setgid 46 ++#define __NR_getgid 47 ++#define __NR_signal 48 ++#define __NR_geteuid 49 ++#define __NR_getegid 50 ++#define __NR_acct 51 ++#define __NR_umount2 52 ++#define __NR_lock 53 ++#define __NR_ioctl 54 ++#define __NR_fcntl 55 ++#define __NR_mpx 56 ++#define __NR_setpgid 57 ++#define __NR_ulimit 58 ++#define __NR_oldolduname 59 ++#define __NR_umask 60 ++#define __NR_chroot 61 ++#define __NR_ustat 62 ++#define __NR_dup2 63 ++#define __NR_getppid 64 ++#define __NR_getpgrp 65 ++#define __NR_setsid 66 ++#define __NR_sigaction 67 ++#define __NR_sgetmask 68 ++#define __NR_ssetmask 69 ++#define __NR_setreuid 70 ++#define __NR_setregid 71 ++#define __NR_sigsuspend 72 ++#define __NR_sigpending 73 ++#define __NR_sethostname 74 ++#define __NR_setrlimit 75 ++#define __NR_getrlimit 76 ++#define __NR_getrusage 77 ++#define __NR_gettimeofday 78 ++#define __NR_settimeofday 79 ++#define __NR_getgroups 80 ++#define __NR_setgroups 81 ++#define __NR_select 82 ++#define __NR_symlink 83 ++#define __NR_oldlstat 84 ++#define __NR_readlink 85 ++#define __NR_uselib 86 ++#define __NR_swapon 87 ++#define __NR_reboot 88 ++#define __NR_readdir 89 ++#define __NR_mmap 90 ++#define __NR_munmap 91 ++#define __NR_truncate 92 ++#define __NR_ftruncate 93 ++#define __NR_fchmod 94 ++#define __NR_fchown 95 ++#define __NR_getpriority 96 ++#define __NR_setpriority 97 ++#define __NR_profil 98 ++#define __NR_statfs 99 ++#define __NR_fstatfs 100 ++#define __NR_ioperm 101 ++#define __NR_socketcall 102 ++#define __NR_syslog 103 ++#define __NR_setitimer 104 ++#define __NR_getitimer 105 ++#define __NR_stat 106 ++#define __NR_lstat 107 ++#define __NR_fstat 108 ++#define __NR_olduname 109 ++#define __NR_iopl 110 ++#define __NR_vhangup 111 ++#define __NR_idle 112 ++#define __NR_vm86 113 ++#define __NR_wait4 114 ++#define __NR_swapoff 115 ++#define __NR_sysinfo 116 ++#define __NR_ipc 117 ++#define __NR_fsync 118 ++#define __NR_sigreturn 119 ++#define __NR_clone 120 ++#define __NR_setdomainname 121 ++#define __NR_uname 122 ++#define __NR_modify_ldt 123 ++#define __NR_adjtimex 124 ++#define __NR_mprotect 125 ++#define __NR_sigprocmask 126 ++#define __NR_create_module 127 ++#define __NR_init_module 128 ++#define __NR_delete_module 129 ++#define __NR_get_kernel_syms 130 ++#define __NR_quotactl 131 ++#define __NR_getpgid 132 ++#define __NR_fchdir 133 ++#define __NR_bdflush 134 ++#define __NR_sysfs 135 ++#define __NR_personality 136 ++#define __NR_afs_syscall 137 ++#define __NR_setfsuid 138 ++#define __NR_setfsgid 139 ++#define __NR__llseek 140 ++#define __NR_getdents 141 ++#define __NR__newselect 142 ++#define __NR_flock 143 ++#define __NR_msync 144 ++#define __NR_readv 145 ++#define __NR_writev 146 ++#define __NR_getsid 147 ++#define __NR_fdatasync 148 ++#define __NR__sysctl 149 ++#define __NR_mlock 150 ++#define __NR_munlock 151 ++#define __NR_mlockall 152 ++#define __NR_munlockall 153 ++#define __NR_sched_setparam 154 ++#define __NR_sched_getparam 155 ++#define __NR_sched_setscheduler 156 ++#define __NR_sched_getscheduler 157 ++#define __NR_sched_yield 158 ++#define __NR_sched_get_priority_max 159 ++#define __NR_sched_get_priority_min 160 ++#define __NR_sched_rr_get_interval 161 ++#define __NR_nanosleep 162 ++#define __NR_mremap 163 ++#define __NR_setresuid 164 ++#define __NR_getresuid 165 ++#define __NR_query_module 166 ++#define __NR_poll 167 ++#define __NR_nfsservctl 168 ++#define __NR_setresgid 169 ++#define __NR_getresgid 170 ++#define __NR_prctl 171 ++#define __NR_rt_sigreturn 172 ++#define __NR_rt_sigaction 173 ++#define __NR_rt_sigprocmask 174 ++#define __NR_rt_sigpending 175 ++#define __NR_rt_sigtimedwait 176 ++#define __NR_rt_sigqueueinfo 177 ++#define __NR_rt_sigsuspend 178 ++#define __NR_pread64 179 ++#define __NR_pwrite64 180 ++#define __NR_chown 181 ++#define __NR_getcwd 182 ++#define __NR_capget 183 ++#define __NR_capset 184 ++#define __NR_sigaltstack 185 ++#define __NR_sendfile 186 ++#define __NR_getpmsg 187 ++#define __NR_putpmsg 188 ++#define __NR_vfork 189 ++#define __NR_ugetrlimit 190 ++#define __NR_readahead 191 ++#define __NR_mmap2 192 ++#define __NR_truncate64 193 ++#define __NR_ftruncate64 194 ++#define __NR_stat64 195 ++#define __NR_lstat64 196 ++#define __NR_fstat64 197 ++#define __NR_pciconfig_read 198 ++#define __NR_pciconfig_write 199 ++#define __NR_pciconfig_iobase 200 ++#define __NR_multiplexer 201 ++#define __NR_getdents64 202 ++#define __NR_pivot_root 203 ++#define __NR_fcntl64 204 ++#define __NR_madvise 205 ++#define __NR_mincore 206 ++#define __NR_gettid 207 ++#define __NR_tkill 208 ++#define __NR_setxattr 209 ++#define __NR_lsetxattr 210 ++#define __NR_fsetxattr 211 ++#define __NR_getxattr 212 ++#define __NR_lgetxattr 213 ++#define __NR_fgetxattr 214 ++#define __NR_listxattr 215 ++#define __NR_llistxattr 216 ++#define __NR_flistxattr 217 ++#define __NR_removexattr 218 ++#define __NR_lremovexattr 219 ++#define __NR_fremovexattr 220 ++#define __NR_futex 221 ++#define __NR_sched_setaffinity 222 ++#define __NR_sched_getaffinity 223 ++#define __NR_tuxcall 225 ++#define __NR_sendfile64 226 ++#define __NR_io_setup 227 ++#define __NR_io_destroy 228 ++#define __NR_io_getevents 229 ++#define __NR_io_submit 230 ++#define __NR_io_cancel 231 ++#define __NR_set_tid_address 232 ++#define __NR_fadvise64 233 ++#define __NR_exit_group 234 ++#define __NR_lookup_dcookie 235 ++#define __NR_epoll_create 236 ++#define __NR_epoll_ctl 237 ++#define __NR_epoll_wait 238 ++#define __NR_remap_file_pages 239 ++#define __NR_timer_create 240 ++#define __NR_timer_settime 241 ++#define __NR_timer_gettime 242 ++#define __NR_timer_getoverrun 243 ++#define __NR_timer_delete 244 ++#define __NR_clock_settime 245 ++#define __NR_clock_gettime 246 ++#define __NR_clock_getres 247 ++#define __NR_clock_nanosleep 248 ++#define __NR_swapcontext 249 ++#define __NR_tgkill 250 ++#define __NR_utimes 251 ++#define __NR_statfs64 252 ++#define __NR_fstatfs64 253 ++#define __NR_fadvise64_64 254 ++#define __NR_rtas 255 ++#define __NR_sys_debug_setcontext 256 ++#define __NR_migrate_pages 258 ++#define __NR_mbind 259 ++#define __NR_get_mempolicy 260 ++#define __NR_set_mempolicy 261 ++#define __NR_mq_open 262 ++#define __NR_mq_unlink 263 ++#define __NR_mq_timedsend 264 ++#define __NR_mq_timedreceive 265 ++#define __NR_mq_notify 266 ++#define __NR_mq_getsetattr 267 ++#define __NR_kexec_load 268 ++#define __NR_add_key 269 ++#define __NR_request_key 270 ++#define __NR_keyctl 271 ++#define __NR_waitid 272 ++#define __NR_ioprio_set 273 ++#define __NR_ioprio_get 274 ++#define __NR_inotify_init 275 ++#define __NR_inotify_add_watch 276 ++#define __NR_inotify_rm_watch 277 ++#define __NR_spu_run 278 ++#define __NR_spu_create 279 ++#define __NR_pselect6 280 ++#define __NR_ppoll 281 ++#define __NR_unshare 282 ++#define __NR_splice 283 ++#define __NR_tee 284 ++#define __NR_vmsplice 285 ++#define __NR_openat 286 ++#define __NR_mkdirat 287 ++#define __NR_mknodat 288 ++#define __NR_fchownat 289 ++#define __NR_futimesat 290 ++#define __NR_fstatat64 291 ++#define __NR_unlinkat 292 ++#define __NR_renameat 293 ++#define __NR_linkat 294 ++#define __NR_symlinkat 295 ++#define __NR_readlinkat 296 ++#define __NR_fchmodat 297 ++#define __NR_faccessat 298 ++#define __NR_get_robust_list 299 ++#define __NR_set_robust_list 300 ++#define __NR_move_pages 301 ++#define __NR_getcpu 302 ++#define __NR_epoll_pwait 303 ++#define __NR_utimensat 304 ++#define __NR_signalfd 305 ++#define __NR_timerfd_create 306 ++#define __NR_eventfd 307 ++#define __NR_sync_file_range2 308 ++#define __NR_fallocate 309 ++#define __NR_subpage_prot 310 ++#define __NR_timerfd_settime 311 ++#define __NR_timerfd_gettime 312 ++#define __NR_signalfd4 313 ++#define __NR_eventfd2 314 ++#define __NR_epoll_create1 315 ++#define __NR_dup3 316 ++#define __NR_pipe2 317 ++#define __NR_inotify_init1 318 ++#define __NR_perf_event_open 319 ++#define __NR_preadv 320 ++#define __NR_pwritev 321 ++#define __NR_rt_tgsigqueueinfo 322 ++#define __NR_fanotify_init 323 ++#define __NR_fanotify_mark 324 ++#define __NR_prlimit64 325 ++#define __NR_socket 326 ++#define __NR_bind 327 ++#define __NR_connect 328 ++#define __NR_listen 329 ++#define __NR_accept 330 ++#define __NR_getsockname 331 ++#define __NR_getpeername 332 ++#define __NR_socketpair 333 ++#define __NR_send 334 ++#define __NR_sendto 335 ++#define __NR_recv 336 ++#define __NR_recvfrom 337 ++#define __NR_shutdown 338 ++#define __NR_setsockopt 339 ++#define __NR_getsockopt 340 ++#define __NR_sendmsg 341 ++#define __NR_recvmsg 342 ++#define __NR_recvmmsg 343 ++#define __NR_accept4 344 ++#define __NR_name_to_handle_at 345 ++#define __NR_open_by_handle_at 346 ++#define __NR_clock_adjtime 347 ++#define __NR_syncfs 348 ++#define __NR_sendmmsg 349 ++#define __NR_setns 350 ++#define __NR_process_vm_readv 351 ++#define __NR_process_vm_writev 352 ++#define __NR_finit_module 353 ++#define __NR_kcmp 354 ++#define __NR_sched_setattr 355 ++#define __NR_sched_getattr 356 ++#define __NR_renameat2 357 ++#define __NR_seccomp 358 ++#define __NR_getrandom 359 ++#define __NR_memfd_create 360 ++#define __NR_bpf 361 ++#define __NR_execveat 362 ++#define __NR_switch_endian 363 ++#define __NR_userfaultfd 364 ++#define __NR_membarrier 365 ++#define __NR_mlock2 378 ++#define __NR_copy_file_range 379 ++#define __NR_preadv2 380 ++#define __NR_pwritev2 381 ++#define __NR_kexec_file_load 382 ++#define __NR_statx 383 ++#define __NR_pkey_alloc 384 ++#define __NR_pkey_free 385 ++#define __NR_pkey_mprotect 386 ++#define __NR_rseq 387 ++#define __NR_io_pgetevents 388 ++#define __NR_semget 393 ++#define __NR_semctl 394 ++#define __NR_shmget 395 ++#define __NR_shmctl 396 ++#define __NR_shmat 397 ++#define __NR_shmdt 398 ++#define __NR_msgget 399 ++#define __NR_msgsnd 400 ++#define __NR_msgrcv 401 ++#define __NR_msgctl 402 ++#define __NR_clock_gettime64 403 ++#define __NR_clock_settime64 404 ++#define __NR_clock_adjtime64 405 ++#define __NR_clock_getres_time64 406 ++#define __NR_clock_nanosleep_time64 407 ++#define __NR_timer_gettime64 408 ++#define __NR_timer_settime64 409 ++#define __NR_timerfd_gettime64 410 ++#define __NR_timerfd_settime64 411 ++#define __NR_utimensat_time64 412 ++#define __NR_pselect6_time64 413 ++#define __NR_ppoll_time64 414 ++#define __NR_io_pgetevents_time64 416 ++#define __NR_recvmmsg_time64 417 ++#define __NR_mq_timedsend_time64 418 ++#define __NR_mq_timedreceive_time64 419 ++#define __NR_semtimedop_time64 420 ++#define __NR_rt_sigtimedwait_time64 421 ++#define __NR_futex_time64 422 ++#define __NR_sched_rr_get_interval_time64 423 ++#define __NR_pidfd_send_signal 424 ++#define __NR_io_uring_setup 425 ++#define __NR_io_uring_enter 426 ++#define __NR_io_uring_register 427 ++#define __NR_open_tree 428 ++#define __NR_move_mount 429 ++#define __NR_fsopen 430 ++#define __NR_fsconfig 431 ++#define __NR_fsmount 432 ++#define __NR_fspick 433 ++#define __NR_pidfd_open 434 ++#define __NR_clone3 435 ++#define __NR_close_range 436 ++#define __NR_openat2 437 ++#define __NR_pidfd_getfd 438 ++#define __NR_faccessat2 439 ++#define __NR_process_madvise 440 ++#define __NR_epoll_pwait2 441 ++#define __NR_mount_setattr 442 ++#define __NR_landlock_create_ruleset 444 ++#define __NR_landlock_add_rule 445 ++#define __NR_landlock_restrict_self 446 + + +-#endif /* _ASM_POWERPC_UNISTD_32_H */ ++#endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h +index 7e851b30bb..3cefa88932 100644 +--- a/linux-headers/asm-powerpc/unistd_64.h ++++ b/linux-headers/asm-powerpc/unistd_64.h +@@ -1,401 +1,405 @@ +-#ifndef _ASM_POWERPC_UNISTD_64_H +-#define _ASM_POWERPC_UNISTD_64_H ++#ifndef _ASM_UNISTD_64_H ++#define _ASM_UNISTD_64_H + +-#define __NR_restart_syscall 0 +-#define __NR_exit 1 +-#define __NR_fork 2 +-#define __NR_read 3 +-#define __NR_write 4 +-#define __NR_open 5 +-#define __NR_close 6 +-#define __NR_waitpid 7 +-#define __NR_creat 8 +-#define __NR_link 9 +-#define __NR_unlink 10 +-#define __NR_execve 11 +-#define __NR_chdir 12 +-#define __NR_time 13 +-#define __NR_mknod 14 +-#define __NR_chmod 15 +-#define __NR_lchown 16 +-#define __NR_break 17 +-#define __NR_oldstat 18 +-#define __NR_lseek 19 +-#define __NR_getpid 20 +-#define __NR_mount 21 +-#define __NR_umount 22 +-#define __NR_setuid 23 +-#define __NR_getuid 24 +-#define __NR_stime 25 +-#define __NR_ptrace 26 +-#define __NR_alarm 27 +-#define __NR_oldfstat 28 +-#define __NR_pause 29 +-#define __NR_utime 30 +-#define __NR_stty 31 +-#define __NR_gtty 32 +-#define __NR_access 33 +-#define __NR_nice 34 +-#define __NR_ftime 35 +-#define __NR_sync 36 +-#define __NR_kill 37 +-#define __NR_rename 38 +-#define __NR_mkdir 39 +-#define __NR_rmdir 40 +-#define __NR_dup 41 +-#define __NR_pipe 42 +-#define __NR_times 43 +-#define __NR_prof 44 +-#define __NR_brk 45 +-#define __NR_setgid 46 +-#define __NR_getgid 47 +-#define __NR_signal 48 +-#define __NR_geteuid 49 +-#define __NR_getegid 50 +-#define __NR_acct 51 +-#define __NR_umount2 52 +-#define __NR_lock 53 +-#define __NR_ioctl 54 +-#define __NR_fcntl 55 +-#define __NR_mpx 56 +-#define __NR_setpgid 57 +-#define __NR_ulimit 58 +-#define __NR_oldolduname 59 +-#define __NR_umask 60 +-#define __NR_chroot 61 +-#define __NR_ustat 62 +-#define __NR_dup2 63 +-#define __NR_getppid 64 +-#define __NR_getpgrp 65 +-#define __NR_setsid 66 +-#define __NR_sigaction 67 +-#define __NR_sgetmask 68 +-#define __NR_ssetmask 69 +-#define __NR_setreuid 70 +-#define __NR_setregid 71 +-#define __NR_sigsuspend 72 +-#define __NR_sigpending 73 +-#define __NR_sethostname 74 +-#define __NR_setrlimit 75 +-#define __NR_getrlimit 76 +-#define __NR_getrusage 77 +-#define __NR_gettimeofday 78 +-#define __NR_settimeofday 79 +-#define __NR_getgroups 80 +-#define __NR_setgroups 81 +-#define __NR_select 82 +-#define __NR_symlink 83 +-#define __NR_oldlstat 84 +-#define __NR_readlink 85 +-#define __NR_uselib 86 +-#define __NR_swapon 87 +-#define __NR_reboot 88 +-#define __NR_readdir 89 +-#define __NR_mmap 90 +-#define __NR_munmap 91 +-#define __NR_truncate 92 +-#define __NR_ftruncate 93 +-#define __NR_fchmod 94 +-#define __NR_fchown 95 +-#define __NR_getpriority 96 +-#define __NR_setpriority 97 +-#define __NR_profil 98 +-#define __NR_statfs 99 +-#define __NR_fstatfs 100 +-#define __NR_ioperm 101 +-#define __NR_socketcall 102 +-#define __NR_syslog 103 +-#define __NR_setitimer 104 +-#define __NR_getitimer 105 +-#define __NR_stat 106 +-#define __NR_lstat 107 +-#define __NR_fstat 108 +-#define __NR_olduname 109 +-#define __NR_iopl 110 +-#define __NR_vhangup 111 +-#define __NR_idle 112 +-#define __NR_vm86 113 +-#define __NR_wait4 114 +-#define __NR_swapoff 115 +-#define __NR_sysinfo 116 +-#define __NR_ipc 117 +-#define __NR_fsync 118 +-#define __NR_sigreturn 119 +-#define __NR_clone 120 +-#define __NR_setdomainname 121 +-#define __NR_uname 122 +-#define __NR_modify_ldt 123 +-#define __NR_adjtimex 124 +-#define __NR_mprotect 125 +-#define __NR_sigprocmask 126 +-#define __NR_create_module 127 +-#define __NR_init_module 128 +-#define __NR_delete_module 129 +-#define __NR_get_kernel_syms 130 +-#define __NR_quotactl 131 +-#define __NR_getpgid 132 +-#define __NR_fchdir 133 +-#define __NR_bdflush 134 +-#define __NR_sysfs 135 +-#define __NR_personality 136 +-#define __NR_afs_syscall 137 +-#define __NR_setfsuid 138 +-#define __NR_setfsgid 139 +-#define __NR__llseek 140 +-#define __NR_getdents 141 +-#define __NR__newselect 142 +-#define __NR_flock 143 +-#define __NR_msync 144 +-#define __NR_readv 145 +-#define __NR_writev 146 +-#define __NR_getsid 147 +-#define __NR_fdatasync 148 +-#define __NR__sysctl 149 +-#define __NR_mlock 150 +-#define __NR_munlock 151 +-#define __NR_mlockall 152 +-#define __NR_munlockall 153 +-#define __NR_sched_setparam 154 +-#define __NR_sched_getparam 155 +-#define __NR_sched_setscheduler 156 +-#define __NR_sched_getscheduler 157 +-#define __NR_sched_yield 158 +-#define __NR_sched_get_priority_max 159 +-#define __NR_sched_get_priority_min 160 +-#define __NR_sched_rr_get_interval 161 +-#define __NR_nanosleep 162 +-#define __NR_mremap 163 +-#define __NR_setresuid 164 +-#define __NR_getresuid 165 +-#define __NR_query_module 166 +-#define __NR_poll 167 +-#define __NR_nfsservctl 168 +-#define __NR_setresgid 169 +-#define __NR_getresgid 170 +-#define __NR_prctl 171 +-#define __NR_rt_sigreturn 172 +-#define __NR_rt_sigaction 173 +-#define __NR_rt_sigprocmask 174 +-#define __NR_rt_sigpending 175 +-#define __NR_rt_sigtimedwait 176 +-#define __NR_rt_sigqueueinfo 177 +-#define __NR_rt_sigsuspend 178 +-#define __NR_pread64 179 +-#define __NR_pwrite64 180 +-#define __NR_chown 181 +-#define __NR_getcwd 182 +-#define __NR_capget 183 +-#define __NR_capset 184 +-#define __NR_sigaltstack 185 +-#define __NR_sendfile 186 +-#define __NR_getpmsg 187 +-#define __NR_putpmsg 188 +-#define __NR_vfork 189 +-#define __NR_ugetrlimit 190 +-#define __NR_readahead 191 +-#define __NR_pciconfig_read 198 +-#define __NR_pciconfig_write 199 +-#define __NR_pciconfig_iobase 200 +-#define __NR_multiplexer 201 +-#define __NR_getdents64 202 +-#define __NR_pivot_root 203 +-#define __NR_madvise 205 +-#define __NR_mincore 206 +-#define __NR_gettid 207 +-#define __NR_tkill 208 +-#define __NR_setxattr 209 +-#define __NR_lsetxattr 210 +-#define __NR_fsetxattr 211 +-#define __NR_getxattr 212 +-#define __NR_lgetxattr 213 +-#define __NR_fgetxattr 214 +-#define __NR_listxattr 215 +-#define __NR_llistxattr 216 +-#define __NR_flistxattr 217 +-#define __NR_removexattr 218 +-#define __NR_lremovexattr 219 +-#define __NR_fremovexattr 220 +-#define __NR_futex 221 +-#define __NR_sched_setaffinity 222 +-#define __NR_sched_getaffinity 223 +-#define __NR_tuxcall 225 +-#define __NR_io_setup 227 +-#define __NR_io_destroy 228 +-#define __NR_io_getevents 229 +-#define __NR_io_submit 230 +-#define __NR_io_cancel 231 +-#define __NR_set_tid_address 232 +-#define __NR_fadvise64 233 +-#define __NR_exit_group 234 +-#define __NR_lookup_dcookie 235 +-#define __NR_epoll_create 236 +-#define __NR_epoll_ctl 237 +-#define __NR_epoll_wait 238 +-#define __NR_remap_file_pages 239 +-#define __NR_timer_create 240 +-#define __NR_timer_settime 241 +-#define __NR_timer_gettime 242 +-#define __NR_timer_getoverrun 243 +-#define __NR_timer_delete 244 +-#define __NR_clock_settime 245 +-#define __NR_clock_gettime 246 +-#define __NR_clock_getres 247 +-#define __NR_clock_nanosleep 248 +-#define __NR_swapcontext 249 +-#define __NR_tgkill 250 +-#define __NR_utimes 251 +-#define __NR_statfs64 252 +-#define __NR_fstatfs64 253 +-#define __NR_rtas 255 +-#define __NR_sys_debug_setcontext 256 +-#define __NR_migrate_pages 258 +-#define __NR_mbind 259 +-#define __NR_get_mempolicy 260 +-#define __NR_set_mempolicy 261 +-#define __NR_mq_open 262 +-#define __NR_mq_unlink 263 +-#define __NR_mq_timedsend 264 +-#define __NR_mq_timedreceive 265 +-#define __NR_mq_notify 266 +-#define __NR_mq_getsetattr 267 +-#define __NR_kexec_load 268 +-#define __NR_add_key 269 +-#define __NR_request_key 270 +-#define __NR_keyctl 271 +-#define __NR_waitid 272 +-#define __NR_ioprio_set 273 +-#define __NR_ioprio_get 274 +-#define __NR_inotify_init 275 +-#define __NR_inotify_add_watch 276 +-#define __NR_inotify_rm_watch 277 +-#define __NR_spu_run 278 +-#define __NR_spu_create 279 +-#define __NR_pselect6 280 +-#define __NR_ppoll 281 +-#define __NR_unshare 282 +-#define __NR_splice 283 +-#define __NR_tee 284 +-#define __NR_vmsplice 285 +-#define __NR_openat 286 +-#define __NR_mkdirat 287 +-#define __NR_mknodat 288 +-#define __NR_fchownat 289 +-#define __NR_futimesat 290 +-#define __NR_newfstatat 291 +-#define __NR_unlinkat 292 +-#define __NR_renameat 293 +-#define __NR_linkat 294 +-#define __NR_symlinkat 295 +-#define __NR_readlinkat 296 +-#define __NR_fchmodat 297 +-#define __NR_faccessat 298 +-#define __NR_get_robust_list 299 +-#define __NR_set_robust_list 300 +-#define __NR_move_pages 301 +-#define __NR_getcpu 302 +-#define __NR_epoll_pwait 303 +-#define __NR_utimensat 304 +-#define __NR_signalfd 305 +-#define __NR_timerfd_create 306 +-#define __NR_eventfd 307 +-#define __NR_sync_file_range2 308 +-#define __NR_fallocate 309 +-#define __NR_subpage_prot 310 +-#define __NR_timerfd_settime 311 +-#define __NR_timerfd_gettime 312 +-#define __NR_signalfd4 313 +-#define __NR_eventfd2 314 +-#define __NR_epoll_create1 315 +-#define __NR_dup3 316 +-#define __NR_pipe2 317 +-#define __NR_inotify_init1 318 +-#define __NR_perf_event_open 319 +-#define __NR_preadv 320 +-#define __NR_pwritev 321 +-#define __NR_rt_tgsigqueueinfo 322 +-#define __NR_fanotify_init 323 +-#define __NR_fanotify_mark 324 +-#define __NR_prlimit64 325 +-#define __NR_socket 326 +-#define __NR_bind 327 +-#define __NR_connect 328 +-#define __NR_listen 329 +-#define __NR_accept 330 +-#define __NR_getsockname 331 +-#define __NR_getpeername 332 +-#define __NR_socketpair 333 +-#define __NR_send 334 +-#define __NR_sendto 335 +-#define __NR_recv 336 +-#define __NR_recvfrom 337 +-#define __NR_shutdown 338 +-#define __NR_setsockopt 339 +-#define __NR_getsockopt 340 +-#define __NR_sendmsg 341 +-#define __NR_recvmsg 342 +-#define __NR_recvmmsg 343 +-#define __NR_accept4 344 +-#define __NR_name_to_handle_at 345 +-#define __NR_open_by_handle_at 346 +-#define __NR_clock_adjtime 347 +-#define __NR_syncfs 348 +-#define __NR_sendmmsg 349 +-#define __NR_setns 350 +-#define __NR_process_vm_readv 351 +-#define __NR_process_vm_writev 352 +-#define __NR_finit_module 353 +-#define __NR_kcmp 354 +-#define __NR_sched_setattr 355 +-#define __NR_sched_getattr 356 +-#define __NR_renameat2 357 +-#define __NR_seccomp 358 +-#define __NR_getrandom 359 +-#define __NR_memfd_create 360 +-#define __NR_bpf 361 +-#define __NR_execveat 362 +-#define __NR_switch_endian 363 +-#define __NR_userfaultfd 364 +-#define __NR_membarrier 365 +-#define __NR_mlock2 378 +-#define __NR_copy_file_range 379 +-#define __NR_preadv2 380 +-#define __NR_pwritev2 381 +-#define __NR_kexec_file_load 382 +-#define __NR_statx 383 +-#define __NR_pkey_alloc 384 +-#define __NR_pkey_free 385 +-#define __NR_pkey_mprotect 386 +-#define __NR_rseq 387 +-#define __NR_io_pgetevents 388 +-#define __NR_semtimedop 392 +-#define __NR_semget 393 +-#define __NR_semctl 394 +-#define __NR_shmget 395 +-#define __NR_shmctl 396 +-#define __NR_shmat 397 +-#define __NR_shmdt 398 +-#define __NR_msgget 399 +-#define __NR_msgsnd 400 +-#define __NR_msgrcv 401 +-#define __NR_msgctl 402 +-#define __NR_pidfd_send_signal 424 +-#define __NR_io_uring_setup 425 +-#define __NR_io_uring_enter 426 +-#define __NR_io_uring_register 427 +-#define __NR_open_tree 428 +-#define __NR_move_mount 429 +-#define __NR_fsopen 430 +-#define __NR_fsconfig 431 +-#define __NR_fsmount 432 +-#define __NR_fspick 433 +-#define __NR_pidfd_open 434 +-#define __NR_clone3 435 +-#define __NR_close_range 436 +-#define __NR_openat2 437 +-#define __NR_pidfd_getfd 438 +-#define __NR_faccessat2 439 +-#define __NR_process_madvise 440 +-#define __NR_epoll_pwait2 441 ++#define __NR_restart_syscall 0 ++#define __NR_exit 1 ++#define __NR_fork 2 ++#define __NR_read 3 ++#define __NR_write 4 ++#define __NR_open 5 ++#define __NR_close 6 ++#define __NR_waitpid 7 ++#define __NR_creat 8 ++#define __NR_link 9 ++#define __NR_unlink 10 ++#define __NR_execve 11 ++#define __NR_chdir 12 ++#define __NR_time 13 ++#define __NR_mknod 14 ++#define __NR_chmod 15 ++#define __NR_lchown 16 ++#define __NR_break 17 ++#define __NR_oldstat 18 ++#define __NR_lseek 19 ++#define __NR_getpid 20 ++#define __NR_mount 21 ++#define __NR_umount 22 ++#define __NR_setuid 23 ++#define __NR_getuid 24 ++#define __NR_stime 25 ++#define __NR_ptrace 26 ++#define __NR_alarm 27 ++#define __NR_oldfstat 28 ++#define __NR_pause 29 ++#define __NR_utime 30 ++#define __NR_stty 31 ++#define __NR_gtty 32 ++#define __NR_access 33 ++#define __NR_nice 34 ++#define __NR_ftime 35 ++#define __NR_sync 36 ++#define __NR_kill 37 ++#define __NR_rename 38 ++#define __NR_mkdir 39 ++#define __NR_rmdir 40 ++#define __NR_dup 41 ++#define __NR_pipe 42 ++#define __NR_times 43 ++#define __NR_prof 44 ++#define __NR_brk 45 ++#define __NR_setgid 46 ++#define __NR_getgid 47 ++#define __NR_signal 48 ++#define __NR_geteuid 49 ++#define __NR_getegid 50 ++#define __NR_acct 51 ++#define __NR_umount2 52 ++#define __NR_lock 53 ++#define __NR_ioctl 54 ++#define __NR_fcntl 55 ++#define __NR_mpx 56 ++#define __NR_setpgid 57 ++#define __NR_ulimit 58 ++#define __NR_oldolduname 59 ++#define __NR_umask 60 ++#define __NR_chroot 61 ++#define __NR_ustat 62 ++#define __NR_dup2 63 ++#define __NR_getppid 64 ++#define __NR_getpgrp 65 ++#define __NR_setsid 66 ++#define __NR_sigaction 67 ++#define __NR_sgetmask 68 ++#define __NR_ssetmask 69 ++#define __NR_setreuid 70 ++#define __NR_setregid 71 ++#define __NR_sigsuspend 72 ++#define __NR_sigpending 73 ++#define __NR_sethostname 74 ++#define __NR_setrlimit 75 ++#define __NR_getrlimit 76 ++#define __NR_getrusage 77 ++#define __NR_gettimeofday 78 ++#define __NR_settimeofday 79 ++#define __NR_getgroups 80 ++#define __NR_setgroups 81 ++#define __NR_select 82 ++#define __NR_symlink 83 ++#define __NR_oldlstat 84 ++#define __NR_readlink 85 ++#define __NR_uselib 86 ++#define __NR_swapon 87 ++#define __NR_reboot 88 ++#define __NR_readdir 89 ++#define __NR_mmap 90 ++#define __NR_munmap 91 ++#define __NR_truncate 92 ++#define __NR_ftruncate 93 ++#define __NR_fchmod 94 ++#define __NR_fchown 95 ++#define __NR_getpriority 96 ++#define __NR_setpriority 97 ++#define __NR_profil 98 ++#define __NR_statfs 99 ++#define __NR_fstatfs 100 ++#define __NR_ioperm 101 ++#define __NR_socketcall 102 ++#define __NR_syslog 103 ++#define __NR_setitimer 104 ++#define __NR_getitimer 105 ++#define __NR_stat 106 ++#define __NR_lstat 107 ++#define __NR_fstat 108 ++#define __NR_olduname 109 ++#define __NR_iopl 110 ++#define __NR_vhangup 111 ++#define __NR_idle 112 ++#define __NR_vm86 113 ++#define __NR_wait4 114 ++#define __NR_swapoff 115 ++#define __NR_sysinfo 116 ++#define __NR_ipc 117 ++#define __NR_fsync 118 ++#define __NR_sigreturn 119 ++#define __NR_clone 120 ++#define __NR_setdomainname 121 ++#define __NR_uname 122 ++#define __NR_modify_ldt 123 ++#define __NR_adjtimex 124 ++#define __NR_mprotect 125 ++#define __NR_sigprocmask 126 ++#define __NR_create_module 127 ++#define __NR_init_module 128 ++#define __NR_delete_module 129 ++#define __NR_get_kernel_syms 130 ++#define __NR_quotactl 131 ++#define __NR_getpgid 132 ++#define __NR_fchdir 133 ++#define __NR_bdflush 134 ++#define __NR_sysfs 135 ++#define __NR_personality 136 ++#define __NR_afs_syscall 137 ++#define __NR_setfsuid 138 ++#define __NR_setfsgid 139 ++#define __NR__llseek 140 ++#define __NR_getdents 141 ++#define __NR__newselect 142 ++#define __NR_flock 143 ++#define __NR_msync 144 ++#define __NR_readv 145 ++#define __NR_writev 146 ++#define __NR_getsid 147 ++#define __NR_fdatasync 148 ++#define __NR__sysctl 149 ++#define __NR_mlock 150 ++#define __NR_munlock 151 ++#define __NR_mlockall 152 ++#define __NR_munlockall 153 ++#define __NR_sched_setparam 154 ++#define __NR_sched_getparam 155 ++#define __NR_sched_setscheduler 156 ++#define __NR_sched_getscheduler 157 ++#define __NR_sched_yield 158 ++#define __NR_sched_get_priority_max 159 ++#define __NR_sched_get_priority_min 160 ++#define __NR_sched_rr_get_interval 161 ++#define __NR_nanosleep 162 ++#define __NR_mremap 163 ++#define __NR_setresuid 164 ++#define __NR_getresuid 165 ++#define __NR_query_module 166 ++#define __NR_poll 167 ++#define __NR_nfsservctl 168 ++#define __NR_setresgid 169 ++#define __NR_getresgid 170 ++#define __NR_prctl 171 ++#define __NR_rt_sigreturn 172 ++#define __NR_rt_sigaction 173 ++#define __NR_rt_sigprocmask 174 ++#define __NR_rt_sigpending 175 ++#define __NR_rt_sigtimedwait 176 ++#define __NR_rt_sigqueueinfo 177 ++#define __NR_rt_sigsuspend 178 ++#define __NR_pread64 179 ++#define __NR_pwrite64 180 ++#define __NR_chown 181 ++#define __NR_getcwd 182 ++#define __NR_capget 183 ++#define __NR_capset 184 ++#define __NR_sigaltstack 185 ++#define __NR_sendfile 186 ++#define __NR_getpmsg 187 ++#define __NR_putpmsg 188 ++#define __NR_vfork 189 ++#define __NR_ugetrlimit 190 ++#define __NR_readahead 191 ++#define __NR_pciconfig_read 198 ++#define __NR_pciconfig_write 199 ++#define __NR_pciconfig_iobase 200 ++#define __NR_multiplexer 201 ++#define __NR_getdents64 202 ++#define __NR_pivot_root 203 ++#define __NR_madvise 205 ++#define __NR_mincore 206 ++#define __NR_gettid 207 ++#define __NR_tkill 208 ++#define __NR_setxattr 209 ++#define __NR_lsetxattr 210 ++#define __NR_fsetxattr 211 ++#define __NR_getxattr 212 ++#define __NR_lgetxattr 213 ++#define __NR_fgetxattr 214 ++#define __NR_listxattr 215 ++#define __NR_llistxattr 216 ++#define __NR_flistxattr 217 ++#define __NR_removexattr 218 ++#define __NR_lremovexattr 219 ++#define __NR_fremovexattr 220 ++#define __NR_futex 221 ++#define __NR_sched_setaffinity 222 ++#define __NR_sched_getaffinity 223 ++#define __NR_tuxcall 225 ++#define __NR_io_setup 227 ++#define __NR_io_destroy 228 ++#define __NR_io_getevents 229 ++#define __NR_io_submit 230 ++#define __NR_io_cancel 231 ++#define __NR_set_tid_address 232 ++#define __NR_fadvise64 233 ++#define __NR_exit_group 234 ++#define __NR_lookup_dcookie 235 ++#define __NR_epoll_create 236 ++#define __NR_epoll_ctl 237 ++#define __NR_epoll_wait 238 ++#define __NR_remap_file_pages 239 ++#define __NR_timer_create 240 ++#define __NR_timer_settime 241 ++#define __NR_timer_gettime 242 ++#define __NR_timer_getoverrun 243 ++#define __NR_timer_delete 244 ++#define __NR_clock_settime 245 ++#define __NR_clock_gettime 246 ++#define __NR_clock_getres 247 ++#define __NR_clock_nanosleep 248 ++#define __NR_swapcontext 249 ++#define __NR_tgkill 250 ++#define __NR_utimes 251 ++#define __NR_statfs64 252 ++#define __NR_fstatfs64 253 ++#define __NR_rtas 255 ++#define __NR_sys_debug_setcontext 256 ++#define __NR_migrate_pages 258 ++#define __NR_mbind 259 ++#define __NR_get_mempolicy 260 ++#define __NR_set_mempolicy 261 ++#define __NR_mq_open 262 ++#define __NR_mq_unlink 263 ++#define __NR_mq_timedsend 264 ++#define __NR_mq_timedreceive 265 ++#define __NR_mq_notify 266 ++#define __NR_mq_getsetattr 267 ++#define __NR_kexec_load 268 ++#define __NR_add_key 269 ++#define __NR_request_key 270 ++#define __NR_keyctl 271 ++#define __NR_waitid 272 ++#define __NR_ioprio_set 273 ++#define __NR_ioprio_get 274 ++#define __NR_inotify_init 275 ++#define __NR_inotify_add_watch 276 ++#define __NR_inotify_rm_watch 277 ++#define __NR_spu_run 278 ++#define __NR_spu_create 279 ++#define __NR_pselect6 280 ++#define __NR_ppoll 281 ++#define __NR_unshare 282 ++#define __NR_splice 283 ++#define __NR_tee 284 ++#define __NR_vmsplice 285 ++#define __NR_openat 286 ++#define __NR_mkdirat 287 ++#define __NR_mknodat 288 ++#define __NR_fchownat 289 ++#define __NR_futimesat 290 ++#define __NR_newfstatat 291 ++#define __NR_unlinkat 292 ++#define __NR_renameat 293 ++#define __NR_linkat 294 ++#define __NR_symlinkat 295 ++#define __NR_readlinkat 296 ++#define __NR_fchmodat 297 ++#define __NR_faccessat 298 ++#define __NR_get_robust_list 299 ++#define __NR_set_robust_list 300 ++#define __NR_move_pages 301 ++#define __NR_getcpu 302 ++#define __NR_epoll_pwait 303 ++#define __NR_utimensat 304 ++#define __NR_signalfd 305 ++#define __NR_timerfd_create 306 ++#define __NR_eventfd 307 ++#define __NR_sync_file_range2 308 ++#define __NR_fallocate 309 ++#define __NR_subpage_prot 310 ++#define __NR_timerfd_settime 311 ++#define __NR_timerfd_gettime 312 ++#define __NR_signalfd4 313 ++#define __NR_eventfd2 314 ++#define __NR_epoll_create1 315 ++#define __NR_dup3 316 ++#define __NR_pipe2 317 ++#define __NR_inotify_init1 318 ++#define __NR_perf_event_open 319 ++#define __NR_preadv 320 ++#define __NR_pwritev 321 ++#define __NR_rt_tgsigqueueinfo 322 ++#define __NR_fanotify_init 323 ++#define __NR_fanotify_mark 324 ++#define __NR_prlimit64 325 ++#define __NR_socket 326 ++#define __NR_bind 327 ++#define __NR_connect 328 ++#define __NR_listen 329 ++#define __NR_accept 330 ++#define __NR_getsockname 331 ++#define __NR_getpeername 332 ++#define __NR_socketpair 333 ++#define __NR_send 334 ++#define __NR_sendto 335 ++#define __NR_recv 336 ++#define __NR_recvfrom 337 ++#define __NR_shutdown 338 ++#define __NR_setsockopt 339 ++#define __NR_getsockopt 340 ++#define __NR_sendmsg 341 ++#define __NR_recvmsg 342 ++#define __NR_recvmmsg 343 ++#define __NR_accept4 344 ++#define __NR_name_to_handle_at 345 ++#define __NR_open_by_handle_at 346 ++#define __NR_clock_adjtime 347 ++#define __NR_syncfs 348 ++#define __NR_sendmmsg 349 ++#define __NR_setns 350 ++#define __NR_process_vm_readv 351 ++#define __NR_process_vm_writev 352 ++#define __NR_finit_module 353 ++#define __NR_kcmp 354 ++#define __NR_sched_setattr 355 ++#define __NR_sched_getattr 356 ++#define __NR_renameat2 357 ++#define __NR_seccomp 358 ++#define __NR_getrandom 359 ++#define __NR_memfd_create 360 ++#define __NR_bpf 361 ++#define __NR_execveat 362 ++#define __NR_switch_endian 363 ++#define __NR_userfaultfd 364 ++#define __NR_membarrier 365 ++#define __NR_mlock2 378 ++#define __NR_copy_file_range 379 ++#define __NR_preadv2 380 ++#define __NR_pwritev2 381 ++#define __NR_kexec_file_load 382 ++#define __NR_statx 383 ++#define __NR_pkey_alloc 384 ++#define __NR_pkey_free 385 ++#define __NR_pkey_mprotect 386 ++#define __NR_rseq 387 ++#define __NR_io_pgetevents 388 ++#define __NR_semtimedop 392 ++#define __NR_semget 393 ++#define __NR_semctl 394 ++#define __NR_shmget 395 ++#define __NR_shmctl 396 ++#define __NR_shmat 397 ++#define __NR_shmdt 398 ++#define __NR_msgget 399 ++#define __NR_msgsnd 400 ++#define __NR_msgrcv 401 ++#define __NR_msgctl 402 ++#define __NR_pidfd_send_signal 424 ++#define __NR_io_uring_setup 425 ++#define __NR_io_uring_enter 426 ++#define __NR_io_uring_register 427 ++#define __NR_open_tree 428 ++#define __NR_move_mount 429 ++#define __NR_fsopen 430 ++#define __NR_fsconfig 431 ++#define __NR_fsmount 432 ++#define __NR_fspick 433 ++#define __NR_pidfd_open 434 ++#define __NR_clone3 435 ++#define __NR_close_range 436 ++#define __NR_openat2 437 ++#define __NR_pidfd_getfd 438 ++#define __NR_faccessat2 439 ++#define __NR_process_madvise 440 ++#define __NR_epoll_pwait2 441 ++#define __NR_mount_setattr 442 ++#define __NR_landlock_create_ruleset 444 ++#define __NR_landlock_add_rule 445 ++#define __NR_landlock_restrict_self 446 + + +-#endif /* _ASM_POWERPC_UNISTD_64_H */ ++#endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h +index c94d2c3a22..e8cd34334f 100644 +--- a/linux-headers/asm-s390/unistd_32.h ++++ b/linux-headers/asm-s390/unistd_32.h +@@ -414,5 +414,9 @@ + #define __NR_faccessat2 439 + #define __NR_process_madvise 440 + #define __NR_epoll_pwait2 441 ++#define __NR_mount_setattr 442 ++#define __NR_landlock_create_ruleset 444 ++#define __NR_landlock_add_rule 445 ++#define __NR_landlock_restrict_self 446 + + #endif /* _ASM_S390_UNISTD_32_H */ +diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h +index 984a06b7eb..86830e1e83 100644 +--- a/linux-headers/asm-s390/unistd_64.h ++++ b/linux-headers/asm-s390/unistd_64.h +@@ -362,5 +362,9 @@ + #define __NR_faccessat2 439 + #define __NR_process_madvise 440 + #define __NR_epoll_pwait2 441 ++#define __NR_mount_setattr 442 ++#define __NR_landlock_create_ruleset 444 ++#define __NR_landlock_add_rule 445 ++#define __NR_landlock_restrict_self 446 + + #endif /* _ASM_S390_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 8e76d3701d..0662f644aa 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -112,6 +112,7 @@ struct kvm_ioapic_state { + #define KVM_NR_IRQCHIPS 3 + + #define KVM_RUN_X86_SMM (1 << 0) ++#define KVM_RUN_X86_BUS_LOCK (1 << 1) + + /* for KVM_GET_REGS and KVM_SET_REGS */ + struct kvm_regs { +@@ -436,6 +437,8 @@ struct kvm_vmx_nested_state_hdr { + __u16 flags; + } smm; + ++ __u16 pad; ++ + __u32 flags; + __u64 preemption_timer_deadline; + }; +diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h +index 18fb99dfa2..8f6ac8c19f 100644 +--- a/linux-headers/asm-x86/unistd_32.h ++++ b/linux-headers/asm-x86/unistd_32.h +@@ -432,6 +432,10 @@ + #define __NR_faccessat2 439 + #define __NR_process_madvise 440 + #define __NR_epoll_pwait2 441 ++#define __NR_mount_setattr 442 ++#define __NR_landlock_create_ruleset 444 ++#define __NR_landlock_add_rule 445 ++#define __NR_landlock_restrict_self 446 + + + #endif /* _ASM_X86_UNISTD_32_H */ +diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h +index bde959328d..bb187a9268 100644 +--- a/linux-headers/asm-x86/unistd_64.h ++++ b/linux-headers/asm-x86/unistd_64.h +@@ -354,6 +354,10 @@ + #define __NR_faccessat2 439 + #define __NR_process_madvise 440 + #define __NR_epoll_pwait2 441 ++#define __NR_mount_setattr 442 ++#define __NR_landlock_create_ruleset 444 ++#define __NR_landlock_add_rule 445 ++#define __NR_landlock_restrict_self 446 + + + #endif /* _ASM_X86_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h +index 4ff6b17d3b..4edd0103ac 100644 +--- a/linux-headers/asm-x86/unistd_x32.h ++++ b/linux-headers/asm-x86/unistd_x32.h +@@ -307,6 +307,10 @@ + #define __NR_faccessat2 (__X32_SYSCALL_BIT + 439) + #define __NR_process_madvise (__X32_SYSCALL_BIT + 440) + #define __NR_epoll_pwait2 (__X32_SYSCALL_BIT + 441) ++#define __NR_mount_setattr (__X32_SYSCALL_BIT + 442) ++#define __NR_landlock_create_ruleset (__X32_SYSCALL_BIT + 444) ++#define __NR_landlock_add_rule (__X32_SYSCALL_BIT + 445) ++#define __NR_landlock_restrict_self (__X32_SYSCALL_BIT + 446) + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) + #define __NR_ioctl (__X32_SYSCALL_BIT + 514) +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 897f831374..20d6a263bb 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -8,6 +8,7 @@ + * Note: you must update KVM_API_VERSION if you change this interface. + */ + ++#include + #include + + #include +@@ -216,6 +217,20 @@ struct kvm_hyperv_exit { + } u; + }; + ++struct kvm_xen_exit { ++#define KVM_EXIT_XEN_HCALL 1 ++ __u32 type; ++ union { ++ struct { ++ __u32 longmode; ++ __u32 cpl; ++ __u64 input; ++ __u64 result; ++ __u64 params[6]; ++ } hcall; ++ } u; ++}; ++ + #define KVM_S390_GET_SKEYS_NONE 1 + #define KVM_S390_SKEYS_MAX 1048576 + +@@ -251,6 +266,9 @@ struct kvm_hyperv_exit { + #define KVM_EXIT_X86_RDMSR 29 + #define KVM_EXIT_X86_WRMSR 30 + #define KVM_EXIT_DIRTY_RING_FULL 31 ++#define KVM_EXIT_AP_RESET_HOLD 32 ++#define KVM_EXIT_X86_BUS_LOCK 33 ++#define KVM_EXIT_XEN 34 + + /* For KVM_EXIT_INTERNAL_ERROR */ + /* Emulate instruction failed. */ +@@ -427,6 +445,8 @@ struct kvm_run { + __u32 index; /* kernel -> user */ + __u64 data; /* kernel <-> user */ + } msr; ++ /* KVM_EXIT_XEN */ ++ struct kvm_xen_exit xen; + /* Fix the size of the union. */ + char padding[256]; + }; +@@ -573,6 +593,7 @@ struct kvm_vapic_addr { + #define KVM_MP_STATE_CHECK_STOP 6 + #define KVM_MP_STATE_OPERATING 7 + #define KVM_MP_STATE_LOAD 8 ++#define KVM_MP_STATE_AP_RESET_HOLD 9 + + struct kvm_mp_state { + __u32 mp_state; +@@ -1056,6 +1077,12 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 + #define KVM_CAP_SYS_HYPERV_CPUID 191 + #define KVM_CAP_DIRTY_LOG_RING 192 ++#define KVM_CAP_X86_BUS_LOCK_EXIT 193 ++#define KVM_CAP_PPC_DAWR1 194 ++#define KVM_CAP_SET_GUEST_DEBUG2 195 ++#define KVM_CAP_SGX_ATTRIBUTE 196 ++#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 ++#define KVM_CAP_PTP_KVM 198 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1129,6 +1156,11 @@ struct kvm_x86_mce { + #endif + + #ifdef KVM_CAP_XEN_HVM ++#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) ++#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) ++#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) ++#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) ++ + struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; +@@ -1563,6 +1595,57 @@ struct kvm_pv_cmd { + /* Available with KVM_CAP_DIRTY_LOG_RING */ + #define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + ++/* Per-VM Xen attributes */ ++#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) ++#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) ++ ++struct kvm_xen_hvm_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u8 long_mode; ++ __u8 vector; ++ struct { ++ __u64 gfn; ++ } shared_info; ++ __u64 pad[8]; ++ } u; ++}; ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 ++#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 ++#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 ++ ++/* Per-vCPU Xen attributes */ ++#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) ++#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) ++ ++struct kvm_xen_vcpu_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u64 gpa; ++ __u64 pad[8]; ++ struct { ++ __u64 state; ++ __u64 state_entry_time; ++ __u64 time_running; ++ __u64 time_runnable; ++ __u64 time_blocked; ++ __u64 time_offline; ++ } runstate; ++ } u; ++}; ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 ++ + /* Secure Encrypted Virtualization command */ + enum sev_cmd_id { + /* Guest initialization commands */ +@@ -1593,6 +1676,8 @@ enum sev_cmd_id { + KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, ++ /* Guest Migration Extension */ ++ KVM_SEV_SEND_CANCEL, + + KVM_SEV_NR_MAX, + }; +@@ -1651,6 +1736,45 @@ struct kvm_sev_attestation_report { + __u32 len; + }; + ++struct kvm_sev_send_start { ++ __u32 policy; ++ __u64 pdh_cert_uaddr; ++ __u32 pdh_cert_len; ++ __u64 plat_certs_uaddr; ++ __u32 plat_certs_len; ++ __u64 amd_certs_uaddr; ++ __u32 amd_certs_len; ++ __u64 session_uaddr; ++ __u32 session_len; ++}; ++ ++struct kvm_sev_send_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++}; ++ ++struct kvm_sev_receive_start { ++ __u32 handle; ++ __u32 policy; ++ __u64 pdh_uaddr; ++ __u32 pdh_len; ++ __u64 session_uaddr; ++ __u32 session_len; ++}; ++ ++struct kvm_sev_receive_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++}; ++ + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) + #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +@@ -1756,8 +1880,8 @@ struct kvm_hyperv_eventfd { + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +-#define KVM_DIRTY_GFN_F_DIRTY BIT(0) +-#define KVM_DIRTY_GFN_F_RESET BIT(1) ++#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0) ++#define KVM_DIRTY_GFN_F_RESET _BITUL(1) + #define KVM_DIRTY_GFN_F_MASK 0x3 + + /* +@@ -1772,4 +1896,7 @@ struct kvm_dirty_gfn { + __u64 offset; + }; + ++#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) ++#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) ++ + #endif /* __LINUX_KVM_H */ +diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h +index 1ba9a9feeb..b9ac97b70f 100644 +--- a/linux-headers/linux/userfaultfd.h ++++ b/linux-headers/linux/userfaultfd.h +@@ -19,15 +19,19 @@ + * means the userland is reading). + */ + #define UFFD_API ((__u64)0xAA) ++#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ ++ UFFDIO_REGISTER_MODE_WP | \ ++ UFFDIO_REGISTER_MODE_MINOR) + #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ + UFFD_FEATURE_EVENT_FORK | \ + UFFD_FEATURE_EVENT_REMAP | \ +- UFFD_FEATURE_EVENT_REMOVE | \ ++ UFFD_FEATURE_EVENT_REMOVE | \ + UFFD_FEATURE_EVENT_UNMAP | \ + UFFD_FEATURE_MISSING_HUGETLBFS | \ + UFFD_FEATURE_MISSING_SHMEM | \ + UFFD_FEATURE_SIGBUS | \ +- UFFD_FEATURE_THREAD_ID) ++ UFFD_FEATURE_THREAD_ID | \ ++ UFFD_FEATURE_MINOR_HUGETLBFS) + #define UFFD_API_IOCTLS \ + ((__u64)1 << _UFFDIO_REGISTER | \ + (__u64)1 << _UFFDIO_UNREGISTER | \ +@@ -36,10 +40,12 @@ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_ZEROPAGE | \ +- (__u64)1 << _UFFDIO_WRITEPROTECT) ++ (__u64)1 << _UFFDIO_WRITEPROTECT | \ ++ (__u64)1 << _UFFDIO_CONTINUE) + #define UFFD_API_RANGE_IOCTLS_BASIC \ + ((__u64)1 << _UFFDIO_WAKE | \ +- (__u64)1 << _UFFDIO_COPY) ++ (__u64)1 << _UFFDIO_COPY | \ ++ (__u64)1 << _UFFDIO_CONTINUE) + + /* + * Valid ioctl command number range with this API is from 0x00 to +@@ -55,6 +61,7 @@ + #define _UFFDIO_COPY (0x03) + #define _UFFDIO_ZEROPAGE (0x04) + #define _UFFDIO_WRITEPROTECT (0x06) ++#define _UFFDIO_CONTINUE (0x07) + #define _UFFDIO_API (0x3F) + + /* userfaultfd ioctl ids */ +@@ -73,6 +80,8 @@ + struct uffdio_zeropage) + #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ + struct uffdio_writeprotect) ++#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \ ++ struct uffdio_continue) + + /* read() structure */ + struct uffd_msg { +@@ -127,6 +136,7 @@ struct uffd_msg { + /* flags for UFFD_EVENT_PAGEFAULT */ + #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ + #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ ++#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ + + struct uffdio_api { + /* userland asks for an API number and the features to enable */ +@@ -171,6 +181,10 @@ struct uffdio_api { + * + * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will + * be returned, if feature is not requested 0 will be returned. ++ * ++ * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults ++ * can be intercepted (via REGISTER_MODE_MINOR) for ++ * hugetlbfs-backed pages. + */ + #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) + #define UFFD_FEATURE_EVENT_FORK (1<<1) +@@ -181,6 +195,7 @@ struct uffdio_api { + #define UFFD_FEATURE_EVENT_UNMAP (1<<6) + #define UFFD_FEATURE_SIGBUS (1<<7) + #define UFFD_FEATURE_THREAD_ID (1<<8) ++#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) + __u64 features; + + __u64 ioctls; +@@ -195,6 +210,7 @@ struct uffdio_register { + struct uffdio_range range; + #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) + #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) ++#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) + __u64 mode; + + /* +@@ -257,6 +273,18 @@ struct uffdio_writeprotect { + __u64 mode; + }; + ++struct uffdio_continue { ++ struct uffdio_range range; ++#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) ++ __u64 mode; ++ ++ /* ++ * Fields below here are written by the ioctl and must be at the end: ++ * the copy_from_user will not read past here. ++ */ ++ __s64 mapped; ++}; ++ + /* + * Flags for the userfaultfd(2) system call itself. + */ +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 609099e455..e680594f27 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -46,6 +46,12 @@ + */ + #define VFIO_NOIOMMU_IOMMU 8 + ++/* Supports VFIO_DMA_UNMAP_FLAG_ALL */ ++#define VFIO_UNMAP_ALL 9 ++ ++/* Supports the vaddr flag for DMA map and unmap */ ++#define VFIO_UPDATE_VADDR 10 ++ + /* + * The IOCTL interface is designed for extensibility by embedding the + * structure length (argsz) and flags into structures passed between +@@ -329,6 +335,8 @@ struct vfio_region_info_cap_type { + /* 10de vendor PCI sub-types */ + /* + * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. ++ * ++ * Deprecated, region no longer provided + */ + #define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) + +@@ -336,6 +344,8 @@ struct vfio_region_info_cap_type { + /* + * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU + * to do TLB invalidation on a GPU. ++ * ++ * Deprecated, region no longer provided + */ + #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) + +@@ -635,6 +645,8 @@ struct vfio_device_migration_info { + * Capability with compressed real address (aka SSA - small system address) + * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing + * and by the userspace to associate a NVLink bridge with a GPU. ++ * ++ * Deprecated, capability no longer provided + */ + #define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4 + +@@ -649,6 +661,8 @@ struct vfio_region_info_cap_nvlink2_ssatgt { + * property in the device tree. The value is fixed in the hardware + * and failing to provide the correct value results in the link + * not working with no indication from the driver why. ++ * ++ * Deprecated, capability no longer provided + */ + #define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5 + +@@ -1074,12 +1088,22 @@ struct vfio_iommu_type1_info_dma_avail { + * + * Map process virtual addresses to IO virtual addresses using the + * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. ++ * ++ * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and ++ * unblock translation of host virtual addresses in the iova range. The vaddr ++ * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To ++ * maintain memory consistency within the user application, the updated vaddr ++ * must address the same memory object as originally mapped. Failure to do so ++ * will result in user memory corruption and/or device misbehavior. iova and ++ * size must match those in the original MAP_DMA call. Protection is not ++ * changed, and the READ & WRITE flags must be 0. + */ + struct vfio_iommu_type1_dma_map { + __u32 argsz; + __u32 flags; + #define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */ + #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */ ++#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2) + __u64 vaddr; /* Process virtual address */ + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of mapping (bytes) */ +@@ -1102,6 +1126,7 @@ struct vfio_bitmap { + * field. No guarantee is made to the user that arbitrary unmaps of iova + * or size different from those used in the original mapping call will + * succeed. ++ * + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap + * before unmapping IO virtual addresses. When this flag is set, the user must + * provide a struct vfio_bitmap in data[]. User must provide zero-allocated +@@ -1111,11 +1136,21 @@ struct vfio_bitmap { + * indicates that the page at that offset from iova is dirty. A Bitmap of the + * pages in the range of unmapped size is returned in the user-provided + * vfio_bitmap.data. ++ * ++ * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size ++ * must be 0. This cannot be combined with the get-dirty-bitmap flag. ++ * ++ * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host ++ * virtual addresses in the iova range. Tasks that attempt to translate an ++ * iova's vaddr will block. DMA to already-mapped pages continues. This ++ * cannot be combined with the get-dirty-bitmap flag. + */ + struct vfio_iommu_type1_dma_unmap { + __u32 argsz; + __u32 flags; + #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) ++#define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1) ++#define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2) + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of mapping (bytes) */ + __u8 data[]; +-- +2.27.0 + diff --git a/kvm-aarch64-Add-USB-storage-devices.patch b/kvm-aarch64-Add-USB-storage-devices.patch new file mode 100644 index 0000000..4059db2 --- /dev/null +++ b/kvm-aarch64-Add-USB-storage-devices.patch @@ -0,0 +1,41 @@ +From 7fd7892a21cf930f3d44dc3205bed9fb9128c11c Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 4 Aug 2021 07:10:15 -0400 +Subject: [PATCH 23/39] aarch64: Add USB storage devices + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [15/15] 7706801381d8dfc97231fa87f6a7c8de7e3c8e84 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +There's no reason not to support these devices and doing so allows us +to match x86 and ppc. Also to match the other architectures we do not +enable CONFIG_USB_STORAGE_BOT, as that was disabled for the other +architectures for BZ1866133. + +Signed-off-by: Andrew Jones +Signed-off-by: Miroslav Rezanina +--- + default-configs/devices/aarch64-rh-devices.mak | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak +index d8ce902720..a5bab23925 100644 +--- a/default-configs/devices/aarch64-rh-devices.mak ++++ b/default-configs/devices/aarch64-rh-devices.mak +@@ -15,6 +15,8 @@ CONFIG_SEMIHOSTING=y + CONFIG_USB=y + CONFIG_USB_XHCI=y + CONFIG_USB_XHCI_PCI=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y + CONFIG_VFIO=y + CONFIG_VFIO_PCI=y + CONFIG_VIRTIO_MMIO=y +-- +2.27.0 + diff --git a/kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch b/kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch new file mode 100644 index 0000000..bff1686 --- /dev/null +++ b/kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch @@ -0,0 +1,111 @@ +From 456bb6cb658b9d332fa0b5b91946916b48ed449e Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Thu, 29 Jul 2021 07:42:10 -0400 +Subject: [PATCH 09/39] acpi: pc: revert back to v5.2 PCI slot enumeration + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [1/15] 57222343ccae17b99b4e166798d4d0eecca2e22b (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +Commit [1] moved _SUN variable from only hot-pluggable to +all devices. This made linux kernel enumerate extra slots +that weren't present before. If extra slot happens to be +be enumerated first and there is a device in th same slot +but on other bridge, linux kernel will add -N suffix to +slot name of the later, thus changing NIC name compared to +QEMU 5.2. This in some case confuses systemd, if it is +using SLOT NIC naming scheme and interface name becomes +not the same as it was under QEMU-5.2. + +Reproducer QEMU CLI: + -M pc-i440fx-5.2 -nodefaults \ + -device pci-bridge,chassis_nr=1,id=pci.1,bus=pci.0,addr=0x3 \ + -device virtio-net-pci,id=nic1,bus=pci.1,addr=0x1 \ + -device virtio-net-pci,id=nic2,bus=pci.1,addr=0x2 \ + -device virtio-net-pci,id=nic3,bus=pci.1,addr=0x3 + +with RHEL8 guest produces following results: + v5.2: + kernel: virtio_net virtio0 ens1: renamed from eth0 + kernel: virtio_net virtio2 ens3: renamed from eth2 + kernel: virtio_net virtio1 enp1s2: renamed from eth1 + (slot 2 is assigned to empty bus 0 slot and virtio1 + is assigned to 2-2 slot, and renaming falls back, + for some reason, to path based naming scheme) + + v6.0: + kernel: virtio_net virtio0 ens1: renamed from eth0 + kernel: virtio_net virtio2 ens3: renamed from eth2 + systemd-udevd[299]: Error changing net interface name 'eth1' to 'ens3': File exists + systemd-udevd[299]: could not rename interface '3' from 'eth1' to 'ens3': File exists + (with commit [1] kernel assigns virtio2 to 3-2 slot + since bridge advertises _SUN=0x3 and kernel assigns + slot 3 to bridge. Still it manages to rename virtio2 + correctly to ens3, however systemd gets confused with virtio1 + where slot allocation exactly the same (2-2) as in 5.2 case + and tries to rename it to ens3 which is rightfully taken by + virtio2) + +I'm not sure what breaks in systemd interface renaming (it probably +should be investigated), but on QEMU side we can safely revert +_SUN to 5.2 behavior (i.e. avoid cold-plugged bridges and non +hot-pluggable device classes), without breaking acpi-index, which uses +slot numbers but it doesn't have to use _SUN, it could use an arbitrary +variable name that has the same slot value). +It will help existing VMs to keep networking with non trivial +configs in working order since systemd will do its interface +renaming magic as it used to do. + +1) +Fixes: b7f23f62e40 (pci: acpi: add _DSM method to PCI devices) +Signed-off-by: Igor Mammedov +Message-Id: <20210624204229.998824-3-imammedo@redhat.com> +Reviewed-by: Stefan Hajnoczi +Tested-by: John Sucaet +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7193d7cdd93e50f0e5f09803b98d27d3f9b147ac) +Signed-off-by: Igor Mammedov +Signed-off-by: Miroslav Rezanina +--- + hw/i386/acpi-build.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index de98750aef..dbee0cd3bc 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -432,11 +432,15 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, + aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); + + if (bsel) { +- aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); ++ /* ++ * Can't declare _SUN here for every device as it changes 'slot' ++ * enumeration order in linux kernel, so use another variable for it ++ */ ++ aml_append(dev, aml_name_decl("ASUN", aml_int(slot))); + method = aml_method("_DSM", 4, AML_SERIALIZED); + aml_append(method, aml_return( + aml_call6("PDSM", aml_arg(0), aml_arg(1), aml_arg(2), +- aml_arg(3), aml_name("BSEL"), aml_name("_SUN")) ++ aml_arg(3), aml_name("BSEL"), aml_name("ASUN")) + )); + aml_append(dev, method); + } +@@ -463,6 +467,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, + aml_append(method, aml_return(aml_int(s3d))); + aml_append(dev, method); + } else if (hotplug_enabled_dev) { ++ aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); + /* add _EJ0 to make slot hotpluggable */ + method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); + aml_append(method, +-- +2.27.0 + diff --git a/kvm-audio-Never-send-migration-section.patch b/kvm-audio-Never-send-migration-section.patch new file mode 100644 index 0000000..474612d --- /dev/null +++ b/kvm-audio-Never-send-migration-section.patch @@ -0,0 +1,63 @@ +From 1e69dbe01e1cad1680723e1bc086cc52a1772c17 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 11 Aug 2021 08:40:38 -0400 +Subject: [PATCH 27/39] audio: Never send migration section +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 35: Synchronize with RHEL-AV 8.5 release 28 to RHEL 9 +RH-Commit: [4/4] 0b6ec114a3573ac7efcbe5ab3094d8020899c82d (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Philippe Mathieu-Daudé + +The audio migration vmstate is empty, and always has been; we can't +just remove it though because an old qemu might send it us. +Changes with -audiodev now mean it's sometimes created when it didn't +used to be, and can confuse migration to old qemu. + +Change it so that vmstate_audio is never sent; if it's received it +should still be accepted, and old qemu's shouldn't be too upset if it's +missing. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Tested-by: Daniel P. Berrangé +Message-Id: <20210809170956.78536-1-dgilbert@redhat.com> +Signed-off-by: Gerd Hoffmann +(cherry picked from commit da77adbaf619c4d170cb42d769145ad1803fbad9) +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + audio/audio.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/audio/audio.c b/audio/audio.c +index 534278edfe..fa724ea8e0 100644 +--- a/audio/audio.c ++++ b/audio/audio.c +@@ -1621,10 +1621,20 @@ void audio_cleanup(void) + } + } + ++static bool vmstate_audio_needed(void *opaque) ++{ ++ /* ++ * Never needed, this vmstate only exists in case ++ * an old qemu sends it to us. ++ */ ++ return false; ++} ++ + static const VMStateDescription vmstate_audio = { + .name = "audio", + .version_id = 1, + .minimum_version_id = 1, ++ .needed = vmstate_audio_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +-- +2.27.0 + diff --git a/kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch b/kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch new file mode 100644 index 0000000..df36313 --- /dev/null +++ b/kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch @@ -0,0 +1,106 @@ +From 1d85424fe5208986fc07fe9baa1e9b33d77b185a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 29 Jul 2021 07:42:35 -0400 +Subject: [PATCH 20/39] block/nvme: Fix VFIO_MAP_DMA failed: No space left on + device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [12/15] f4b3456e4ce1a876a64f9fb92c56f8f981076953 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +When the NVMe block driver was introduced (see commit bdd6a90a9e5, +January 2018), Linux VFIO_IOMMU_MAP_DMA ioctl was only returning +-ENOMEM in case of error. The driver was correctly handling the +error path to recycle its volatile IOVA mappings. + +To fix CVE-2019-3882, Linux commit 492855939bdb ("vfio/type1: Limit +DMA mappings per container", April 2019) added the -ENOSPC error to +signal the user exhausted the DMA mappings available for a container. + +The block driver started to mis-behave: + + qemu-system-x86_64: VFIO_MAP_DMA failed: No space left on device + (qemu) + (qemu) info status + VM status: paused (io-error) + (qemu) c + VFIO_MAP_DMA failed: No space left on device + (qemu) c + VFIO_MAP_DMA failed: No space left on device + +(The VM is not resumable from here, hence stuck.) + +Fix by handling the new -ENOSPC error (when DMA mappings are +exhausted) without any distinction to the current -ENOMEM error, +so we don't change the behavior on old kernels where the CVE-2019-3882 +fix is not present. + +An easy way to reproduce this bug is to restrict the DMA mapping +limit (65535 by default) when loading the VFIO IOMMU module: + + # modprobe vfio_iommu_type1 dma_entry_limit=666 + +Cc: qemu-stable@nongnu.org +Cc: Fam Zheng +Cc: Maxim Levitsky +Cc: Alex Williamson +Reported-by: Michal Prívozník +Signed-off-by: Philippe Mathieu-Daudé +Message-id: 20210723195843.1032825-1-philmd@redhat.com +Fixes: bdd6a90a9e5 ("block: Add VFIO based NVMe driver") +Buglink: https://bugs.launchpad.net/qemu/+bug/1863333 +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/65 +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 15a730e7a3aaac180df72cd5730e0617bcf44a5a) +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Miroslav Rezanina +--- + block/nvme.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/block/nvme.c b/block/nvme.c +index 2b5421e7aa..e8dbbc2317 100644 +--- a/block/nvme.c ++++ b/block/nvme.c +@@ -1030,7 +1030,29 @@ try_map: + r = qemu_vfio_dma_map(s->vfio, + qiov->iov[i].iov_base, + len, true, &iova); ++ if (r == -ENOSPC) { ++ /* ++ * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA ++ * ioctl returns -ENOSPC to signal the user exhausted the DMA ++ * mappings available for a container since Linux kernel commit ++ * 492855939bdb ("vfio/type1: Limit DMA mappings per container", ++ * April 2019, see CVE-2019-3882). ++ * ++ * This block driver already handles this error path by checking ++ * for the -ENOMEM error, so we directly replace -ENOSPC by ++ * -ENOMEM. Beside, -ENOSPC has a specific meaning for blockdev ++ * coroutines: it triggers BLOCKDEV_ON_ERROR_ENOSPC and ++ * BLOCK_ERROR_ACTION_STOP which stops the VM, asking the operator ++ * to add more storage to the blockdev. Not something we can do ++ * easily with an IOMMU :) ++ */ ++ r = -ENOMEM; ++ } + if (r == -ENOMEM && retry) { ++ /* ++ * We exhausted the DMA mappings available for our container: ++ * recycle the volatile IOVA mappings. ++ */ + retry = false; + trace_nvme_dma_flush_queue_wait(s); + if (s->dma_map_count) { +-- +2.27.0 + diff --git a/kvm-configure-Fix-endianess-test-with-LTO.patch b/kvm-configure-Fix-endianess-test-with-LTO.patch new file mode 100644 index 0000000..9dd8745 --- /dev/null +++ b/kvm-configure-Fix-endianess-test-with-LTO.patch @@ -0,0 +1,82 @@ +From 40e5138fb1e615c927a21d0b3f2e24eca885ede4 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 15 Jul 2021 10:39:28 +0200 +Subject: [PATCH 36/39] configure: Fix endianess test with LTO + +RH-Author: Jon Maloy +RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack +RH-Commit: [9/11] c4be415076356fe74efab6f74d7b347064bbdb40 (jmaloy/qemu-kvm-centos-jon) +RH-Bugzilla: 1939509 1940132 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +If a user is trying to compile QEMU with link-time optimization +enabled by running the configure script like this: + + .../configure --extra-cflags="-flto" + +then the endianess test is failing since the magic values do not +show up in the intermediate object files there. If the host is +a big endian machine (like s390x), the QEMU binary is then unusable +since the corresponding variable "bigendian" is pre-initialized +with "no". + +To fix this issue, we should rather create a full binary and look +for the magic strings there instead. +And we really should not continue the build if the endianess check +failed, to make it clear right from the start that something went +wrong here, thus let's also add some "exit 1" statements here +after emitting the error message. + +Message-Id: <20210715083928.933806-1-thuth@redhat.com> +Reviewed-by: Richard Henderson +Signed-off-by: Thomas Huth +(cherry picked from commit 659eb157a55666bf379f5362238a86d855e262e2) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + configure | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/configure b/configure +index 83d8af7fe4..dcd9520bed 100755 +--- a/configure ++++ b/configure +@@ -2323,24 +2323,27 @@ feature_not_found() { + # --- + # big/little endian test + cat > $TMPC << EOF ++#include + short big_endian[] = { 0x4269, 0x4765, 0x4e64, 0x4961, 0x4e00, 0, }; + short little_endian[] = { 0x694c, 0x7454, 0x654c, 0x6e45, 0x6944, 0x6e41, 0, }; +-extern int foo(short *, short *); +-int main(int argc, char *argv[]) { +- return foo(big_endian, little_endian); ++int main(int argc, char *argv[]) ++{ ++ return printf("%s %s\n", (char *)big_endian, (char *)little_endian); + } + EOF + +-if compile_object ; then +- if strings -a $TMPO | grep -q BiGeNdIaN ; then ++if compile_prog ; then ++ if strings -a $TMPE | grep -q BiGeNdIaN ; then + bigendian="yes" +- elif strings -a $TMPO | grep -q LiTtLeEnDiAn ; then ++ elif strings -a $TMPE | grep -q LiTtLeEnDiAn ; then + bigendian="no" + else + echo big/little test failed ++ exit 1 + fi + else + echo big/little test failed ++ exit 1 + fi + + ########################################## +-- +2.27.0 + diff --git a/kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch b/kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch new file mode 100644 index 0000000..6ed6a6b --- /dev/null +++ b/kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch @@ -0,0 +1,51 @@ +From 3347d61ff783d05f41f03097551460dc5825b301 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 29 Jul 2021 07:42:14 -0400 +Subject: [PATCH 11/39] hmp: Fix loadvm to resume the VM on success instead of + failure + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [3/15] 492cfb8ef252805b988a256abe73628605f630e9 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +Commit f61fe11aa6f broke hmp_loadvm() by adding an incorrect negation +when converting from 0/-errno return values to a bool value. The result +is that loadvm resumes the VM now if it failed and keeps it stopped if +it failed. Fix it to restore the old behaviour and do it the other way +around. + +Fixes: f61fe11aa6f7f8f0ffe4ddaa56a8108f3ab57854 +Cc: qemu-stable@nongnu.org +Reported-by: Yanhui Ma +Signed-off-by: Kevin Wolf +Message-Id: <20210511163151.45167-1-kwolf@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit c53cd04e70641fdf9410aac40c617d074047b3e1) +Signed-off-by: Kevin Wolf +Signed-off-by: Miroslav Rezanina +--- + monitor/hmp-cmds.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 0ad5b77477..cc15d9b6ee 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1133,7 +1133,7 @@ void hmp_loadvm(Monitor *mon, const QDict *qdict) + + vm_stop(RUN_STATE_RESTORE_VM); + +- if (!load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) { ++ if (load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) { + vm_start(); + } + hmp_handle_error(mon, err); +-- +2.27.0 + diff --git a/kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch b/kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch new file mode 100644 index 0000000..71f143a --- /dev/null +++ b/kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch @@ -0,0 +1,219 @@ +From e92a6c64cb4b1437c5b75f25a638dbb6eb041383 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 29 Jul 2021 07:42:27 -0400 +Subject: [PATCH 16/39] i386: Add ratelimit for bus locks acquired in guest + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [8/15] 2b8f01e05e44388c2f90d5281a9fe5537ab2433d (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +A bus lock is acquired through either split locked access to writeback +(WB) memory or any locked access to non-WB memory. It is typically >1000 +cycles slower than an atomic operation within a cache and can also +disrupts performance on other cores. + +Virtual Machines can exploit bus locks to degrade the performance of +system. To address this kind of performance DOS attack coming from the +VMs, bus lock VM exit is introduced in KVM and it can report the bus +locks detected in guest. If enabled in KVM, it would exit to the +userspace to let the user enforce throttling policies once bus locks +acquired in VMs. + +The availability of bus lock VM exit can be detected through the +KVM_CAP_X86_BUS_LOCK_EXIT. The returned bitmap contains the potential +policies supported by KVM. The field KVM_BUS_LOCK_DETECTION_EXIT in +bitmap is the only supported strategy at present. It indicates that KVM +will exit to userspace to handle the bus locks. + +This patch adds a ratelimit on the bus locks acquired in guest as a +mitigation policy. + +Introduce a new field "bus_lock_ratelimit" to record the limited speed +of bus locks in the target VM. The user can specify it through the +"bus-lock-ratelimit" as a machine property. In current implementation, +the default value of the speed is 0 per second, which means no +restrictions on the bus locks. + +As for ratelimit on detected bus locks, simply set the ratelimit +interval to 1s and restrict the quota of bus lock occurence to the value +of "bus_lock_ratelimit". A potential alternative is to introduce the +time slice as a property which can help the user achieve more precise +control. + +The detail of bus lock VM exit can be found in spec: +https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html + +Signed-off-by: Chenyi Qiang +Message-Id: <20210521043820.29678-1-chenyi.qiang@intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit 035d1ef26565f8f8eae058c37f5731a9ae304b96) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + hw/i386/x86.c | 24 ++++++++++++++++++++++++ + include/hw/i386/x86.h | 8 ++++++++ + target/i386/kvm/kvm.c | 41 +++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 73 insertions(+) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index ed796fe6ba..d30cf27e29 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1246,6 +1246,23 @@ static void x86_machine_set_oem_table_id(Object *obj, const char *value, + strncpy(x86ms->oem_table_id, value, 8); + } + ++static void x86_machine_get_bus_lock_ratelimit(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ X86MachineState *x86ms = X86_MACHINE(obj); ++ uint64_t bus_lock_ratelimit = x86ms->bus_lock_ratelimit; ++ ++ visit_type_uint64(v, name, &bus_lock_ratelimit, errp); ++} ++ ++static void x86_machine_set_bus_lock_ratelimit(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ X86MachineState *x86ms = X86_MACHINE(obj); ++ ++ visit_type_uint64(v, name, &x86ms->bus_lock_ratelimit, errp); ++} ++ + static void x86_machine_initfn(Object *obj) + { + X86MachineState *x86ms = X86_MACHINE(obj); +@@ -1256,6 +1273,7 @@ static void x86_machine_initfn(Object *obj) + x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS; + x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++ x86ms->bus_lock_ratelimit = 0; + } + + static void x86_machine_class_init(ObjectClass *oc, void *data) +@@ -1299,6 +1317,12 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); ++ ++ object_class_property_add(oc, X86_MACHINE_BUS_LOCK_RATELIMIT, "uint64_t", ++ x86_machine_get_bus_lock_ratelimit, ++ x86_machine_set_bus_lock_ratelimit, NULL, NULL); ++ object_class_property_set_description(oc, X86_MACHINE_BUS_LOCK_RATELIMIT, ++ "Set the ratelimit for the bus locks acquired in VMs"); + } + + static const TypeInfo x86_machine_info = { +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index c09b648dff..25a1f16f01 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -74,12 +74,20 @@ struct X86MachineState { + * will be translated to MSI messages in the address space. + */ + AddressSpace *ioapic_as; ++ ++ /* ++ * Ratelimit enforced on detected bus locks in guest. ++ * The default value of the bus_lock_ratelimit is 0 per second, ++ * which means no limitation on the guest's bus locks. ++ */ ++ uint64_t bus_lock_ratelimit; + }; + + #define X86_MACHINE_SMM "smm" + #define X86_MACHINE_ACPI "acpi" + #define X86_MACHINE_OEM_ID "x-oem-id" + #define X86_MACHINE_OEM_TABLE_ID "x-oem-table-id" ++#define X86_MACHINE_BUS_LOCK_RATELIMIT "bus-lock-ratelimit" + + #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") + OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 4c69c2cb4b..af030af116 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -130,6 +130,9 @@ static bool has_msr_mcg_ext_ctl; + static struct kvm_cpuid2 *cpuid_cache; + static struct kvm_msr_list *kvm_feature_msrs; + ++#define BUS_LOCK_SLICE_TIME 1000000000ULL /* ns */ ++static RateLimit bus_lock_ratelimit_ctrl; ++ + int kvm_has_pit_state2(void) + { + return has_pit_state2; +@@ -2267,6 +2270,28 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + } + } + ++ if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) { ++ X86MachineState *x86ms = X86_MACHINE(ms); ++ ++ if (x86ms->bus_lock_ratelimit > 0) { ++ ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT); ++ if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) { ++ error_report("kvm: bus lock detection unsupported"); ++ return -ENOTSUP; ++ } ++ ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0, ++ KVM_BUS_LOCK_DETECTION_EXIT); ++ if (ret < 0) { ++ error_report("kvm: Failed to enable bus lock detection cap: %s", ++ strerror(-ret)); ++ return ret; ++ } ++ ratelimit_init(&bus_lock_ratelimit_ctrl); ++ ratelimit_set_speed(&bus_lock_ratelimit_ctrl, ++ x86ms->bus_lock_ratelimit, BUS_LOCK_SLICE_TIME); ++ } ++ } ++ + return 0; + } + +@@ -4225,6 +4250,15 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) + } + } + ++static void kvm_rate_limit_on_bus_lock(void) ++{ ++ uint64_t delay_ns = ratelimit_calculate_delay(&bus_lock_ratelimit_ctrl, 1); ++ ++ if (delay_ns) { ++ g_usleep(delay_ns / SCALE_US); ++ } ++} ++ + MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) + { + X86CPU *x86_cpu = X86_CPU(cpu); +@@ -4240,6 +4274,9 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) + } else { + env->eflags &= ~IF_MASK; + } ++ if (run->flags & KVM_RUN_X86_BUS_LOCK) { ++ kvm_rate_limit_on_bus_lock(); ++ } + + /* We need to protect the apic state against concurrent accesses from + * different threads in case the userspace irqchip is used. */ +@@ -4598,6 +4635,10 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ioapic_eoi_broadcast(run->eoi.vector); + ret = 0; + break; ++ case KVM_EXIT_X86_BUS_LOCK: ++ /* already handled in kvm_arch_post_run */ ++ ret = 0; ++ break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; +-- +2.27.0 + diff --git a/kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch b/kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch new file mode 100644 index 0000000..9f007d8 --- /dev/null +++ b/kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch @@ -0,0 +1,82 @@ +From c24fcdf0712ef81ec25ca3a4a1144cca18303fbe Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 29 Jul 2021 07:42:19 -0400 +Subject: [PATCH 13/39] i386/cpu: Expose AVX_VNNI instruction to guest + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [5/15] 56381e35a1dc06af7d457d1fe61b1c108dd25d06 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +Expose AVX (VEX-encoded) versions of the Vector Neural Network +Instructions to guest. + +The bit definition: +CPUID.(EAX=7,ECX=1):EAX[bit 4] AVX_VNNI + +The following instructions are available when this feature is +present in the guest. + 1. VPDPBUS: Multiply and Add Unsigned and Signed Bytes + 2. VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation + 3. VPDPWSSD: Multiply and Add Signed Word Integers + 4. VPDPWSSDS: Multiply and Add Signed Integers with Saturation + +As for the kvm related code, please reference Linux commit id 1085a6b585d7. + +The release document ref below link: +https://software.intel.com/content/www/us/en/develop/download/\ +intel-architecture-instruction-set-extensions-programming-reference.html + +Signed-off-by: Yang Zhong +Message-Id: <20210407015609.22936-1-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c1826ea6a052084f2e6a0bae9dd5932a727df039) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + target/i386/cpu.c | 4 ++-- + target/i386/cpu.h | 2 ++ + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index da47c3e50e..0de2932c79 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -996,7 +996,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, +- NULL, "avx512-bf16", NULL, NULL, ++ "avx-vnni", "avx512-bf16", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +@@ -3284,7 +3284,7 @@ static X86CPUDefinition builtin_x86_defs[] = { + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, + .features[FEAT_7_1_EAX] = +- CPUID_7_1_EAX_AVX512_BF16, ++ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16, + /* XSAVES is added in version 2 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 570f916878..edc8984448 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -804,6 +804,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + /* Speculative Store Bypass Disable */ + #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) + ++/* AVX VNNI Instruction */ ++#define CPUID_7_1_EAX_AVX_VNNI (1U << 4) + /* AVX512 BFloat16 Instruction */ + #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) + +-- +2.27.0 + diff --git a/kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch b/kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch new file mode 100644 index 0000000..1d1ebf0 --- /dev/null +++ b/kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch @@ -0,0 +1,178 @@ +From a6ab9f3d290c2ff3c2fc0187c69cf8cf69feff40 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 6 Aug 2021 15:07:47 -0400 +Subject: [PATCH 24/39] iotests: Improve and rename test 291 to qemu-img-bitmap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 35: Synchronize with RHEL-AV 8.5 release 28 to RHEL 9 +RH-Commit: [1/4] bf400ceb9ef48b81c5f7cade97bc1cbf7bc4842c (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Philippe Mathieu-Daudé + +Enhance the test to demonstrate existing less-than-stellar behavior of +qemu-img with a qcow2 image containing an inconsistent bitmap: we +don't diagnose the problem until after copying the entire image (a +potentially long time), and when we do diagnose the failure, we still +end up leaving an empty bitmap in the destination. This mess will be +cleaned up in the next patch. + +While at it, rename the test now that we support useful iotest names, +and fix a missing newline in the error message thus exposed. + +Signed-off-by: Eric Blake +Message-Id: <20210709153951.2801666-2-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Nir Soffer + +(cherry picked from commit 94075c28eea0755173939dfaf1eae688b224a74e) +Conflicts: + tests/qemu-iotests/tests/qemu-img-bitmaps.out - commit 8417e1378c not backported +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + block/dirty-bitmap.c | 2 +- + .../{291 => tests/qemu-img-bitmaps} | 21 +++++++- + .../{291.out => tests/qemu-img-bitmaps.out} | 49 ++++++++++++++++++- + 3 files changed, 69 insertions(+), 3 deletions(-) + rename tests/qemu-iotests/{291 => tests/qemu-img-bitmaps} (87%) + rename tests/qemu-iotests/{291.out => tests/qemu-img-bitmaps.out} (75%) + +diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c +index 68d295d6e3..0ef46163e3 100644 +--- a/block/dirty-bitmap.c ++++ b/block/dirty-bitmap.c +@@ -193,7 +193,7 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags, + error_setg(errp, "Bitmap '%s' is inconsistent and cannot be used", + bitmap->name); + error_append_hint(errp, "Try block-dirty-bitmap-remove to delete" +- " this bitmap from disk"); ++ " this bitmap from disk\n"); + return -1; + } + +diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/tests/qemu-img-bitmaps +similarity index 87% +rename from tests/qemu-iotests/291 +rename to tests/qemu-iotests/tests/qemu-img-bitmaps +index 20efb080a6..409c4497a3 100755 +--- a/tests/qemu-iotests/291 ++++ b/tests/qemu-iotests/tests/qemu-img-bitmaps +@@ -3,7 +3,7 @@ + # + # Test qemu-img bitmap handling + # +-# Copyright (C) 2018-2020 Red Hat, Inc. ++# Copyright (C) 2018-2021 Red Hat, Inc. + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by +@@ -27,11 +27,13 @@ status=1 # failure is the default! + _cleanup() + { + _cleanup_test_img ++ _rm_test_img "$TEST_IMG.copy" + nbd_server_stop + } + trap "_cleanup; exit \$status" 0 1 2 3 15 + + # get standard environment, filters and checks ++cd .. + . ./common.rc + . ./common.filter + . ./common.nbd +@@ -129,6 +131,23 @@ $QEMU_IMG map --output=json --image-opts \ + + nbd_server_stop + ++echo ++echo "=== Check handling of inconsistent bitmap ===" ++echo ++ ++# Prepare image with corrupted bitmap ++$QEMU_IO -c abort "$TEST_IMG" 2>/dev/null ++$QEMU_IMG bitmap --add "$TEST_IMG" b4 ++$QEMU_IMG bitmap --remove "$TEST_IMG" b1 ++_img_info --format-specific | _filter_irrelevant_img_info ++echo ++$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" && ++ echo "unexpected success" ++# Bug - even though we failed at conversion, we left a file around with ++# a bitmap marked as not corrupt ++TEST_IMG=$TEST_IMG.copy _img_info --format-specific \ ++ | _filter_irrelevant_img_info ++ + # success, all done + echo '*** done' + rm -f $seq.full +diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/tests/qemu-img-bitmaps.out +similarity index 75% +rename from tests/qemu-iotests/291.out +rename to tests/qemu-iotests/tests/qemu-img-bitmaps.out +index 23411c0ff4..543b028da6 100644 +--- a/tests/qemu-iotests/291.out ++++ b/tests/qemu-iotests/tests/qemu-img-bitmaps.out +@@ -1,4 +1,4 @@ +-QA output created by 291 ++QA output created by qemu-img-bitmaps + + === Initial image setup === + +@@ -115,4 +115,51 @@ Format specific information: + [{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, + { "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false}, + { "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] ++ ++=== Check handling of inconsistent bitmap === ++ ++image: TEST_DIR/t.IMGFMT ++file format: IMGFMT ++virtual size: 10 MiB (10485760 bytes) ++cluster_size: 65536 ++backing file: TEST_DIR/t.IMGFMT.base ++backing file format: IMGFMT ++Format specific information: ++ bitmaps: ++ [0]: ++ flags: ++ [0]: in-use ++ [1]: auto ++ name: b2 ++ granularity: 65536 ++ [1]: ++ flags: ++ [0]: in-use ++ name: b0 ++ granularity: 65536 ++ [2]: ++ flags: ++ [0]: auto ++ name: b4 ++ granularity: 65536 ++ corrupt: false ++ ++qemu-img: Failed to populate bitmap b0: Bitmap 'b0' is inconsistent and cannot be used ++Try block-dirty-bitmap-remove to delete this bitmap from disk ++image: TEST_DIR/t.IMGFMT.copy ++file format: IMGFMT ++virtual size: 10 MiB (10485760 bytes) ++cluster_size: 65536 ++Format specific information: ++ bitmaps: ++ [0]: ++ flags: ++ name: b0 ++ granularity: 65536 ++ [1]: ++ flags: ++ [0]: auto ++ name: b4 ++ granularity: 65536 ++ corrupt: false + *** done +-- +2.27.0 + diff --git a/kvm-iothread-add-aio-max-batch-parameter.patch b/kvm-iothread-add-aio-max-batch-parameter.patch new file mode 100644 index 0000000..78a6c23 --- /dev/null +++ b/kvm-iothread-add-aio-max-batch-parameter.patch @@ -0,0 +1,324 @@ +From d8682ef60e3a658d776473fee2299015dd5105d7 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 29 Jul 2021 07:42:31 -0400 +Subject: [PATCH 18/39] iothread: add aio-max-batch parameter + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [10/15] d033b3c8ddd71bae799103832039d6daa6dfad52 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +The `aio-max-batch` parameter will be propagated to AIO engines +and it will be used to control the maximum number of queued requests. + +When there are in queue a number of requests equal to `aio-max-batch`, +the engine invokes the system call to forward the requests to the kernel. + +This parameter allows us to control the maximum batch size to reduce +the latency that requests might accumulate while queued in the AIO +engine queue. + +If `aio-max-batch` is equal to 0 (default value), the AIO engine will +use its default maximum batch size value. + +Signed-off-by: Stefano Garzarella +Message-id: 20210721094211.69853-3-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 1793ad0247cad35db1ebbc04fbea0446c30a27ca) +Signed-off-by: Stefano Garzarella +Signed-off-by: Miroslav Rezanina +--- + include/block/aio.h | 12 +++++++++ + include/sysemu/iothread.h | 3 +++ + iothread.c | 55 +++++++++++++++++++++++++++++++++++---- + monitor/hmp-cmds.c | 2 ++ + qapi/misc.json | 6 ++++- + qapi/qom.json | 7 ++++- + qemu-options.hx | 8 ++++-- + util/aio-posix.c | 12 +++++++++ + util/aio-win32.c | 5 ++++ + util/async.c | 2 ++ + 10 files changed, 103 insertions(+), 9 deletions(-) + +diff --git a/include/block/aio.h b/include/block/aio.h +index 5f342267d5..ea68a139c8 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -232,6 +232,9 @@ struct AioContext { + int64_t poll_grow; /* polling time growth factor */ + int64_t poll_shrink; /* polling time shrink factor */ + ++ /* AIO engine parameters */ ++ int64_t aio_max_batch; /* maximum number of requests in a batch */ ++ + /* + * List of handlers participating in userspace polling. Protected by + * ctx->list_lock. Iterated and modified mostly by the event loop thread +@@ -727,4 +730,13 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + int64_t grow, int64_t shrink, + Error **errp); + ++/** ++ * aio_context_set_aio_params: ++ * @ctx: the aio context ++ * @max_batch: maximum number of requests in a batch, 0 means that the ++ * engine will use its default ++ */ ++void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, ++ Error **errp); ++ + #endif +diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h +index f177142f16..7f714bd136 100644 +--- a/include/sysemu/iothread.h ++++ b/include/sysemu/iothread.h +@@ -37,6 +37,9 @@ struct IOThread { + int64_t poll_max_ns; + int64_t poll_grow; + int64_t poll_shrink; ++ ++ /* AioContext AIO engine parameters */ ++ int64_t aio_max_batch; + }; + typedef struct IOThread IOThread; + +diff --git a/iothread.c b/iothread.c +index a12de6e455..272be5e146 100644 +--- a/iothread.c ++++ b/iothread.c +@@ -159,6 +159,24 @@ static void iothread_init_gcontext(IOThread *iothread) + iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); + } + ++static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) ++{ ++ ERRP_GUARD(); ++ ++ aio_context_set_poll_params(iothread->ctx, ++ iothread->poll_max_ns, ++ iothread->poll_grow, ++ iothread->poll_shrink, ++ errp); ++ if (*errp) { ++ return; ++ } ++ ++ aio_context_set_aio_params(iothread->ctx, ++ iothread->aio_max_batch, ++ errp); ++} ++ + static void iothread_complete(UserCreatable *obj, Error **errp) + { + Error *local_error = NULL; +@@ -178,11 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) + */ + iothread_init_gcontext(iothread); + +- aio_context_set_poll_params(iothread->ctx, +- iothread->poll_max_ns, +- iothread->poll_grow, +- iothread->poll_shrink, +- &local_error); ++ iothread_set_aio_context_params(iothread, &local_error); + if (local_error) { + error_propagate(errp, local_error); + aio_context_unref(iothread->ctx); +@@ -219,6 +233,9 @@ static PollParamInfo poll_grow_info = { + static PollParamInfo poll_shrink_info = { + "poll-shrink", offsetof(IOThread, poll_shrink), + }; ++static PollParamInfo aio_max_batch_info = { ++ "aio-max-batch", offsetof(IOThread, aio_max_batch), ++}; + + static void iothread_get_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +@@ -278,6 +295,29 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, + } + } + ++static void iothread_get_aio_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ ++ iothread_get_param(obj, v, name, opaque, errp); ++} ++ ++static void iothread_set_aio_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ IOThread *iothread = IOTHREAD(obj); ++ ++ if (!iothread_set_param(obj, v, name, opaque, errp)) { ++ return; ++ } ++ ++ if (iothread->ctx) { ++ aio_context_set_aio_params(iothread->ctx, ++ iothread->aio_max_batch, ++ errp); ++ } ++} ++ + static void iothread_class_init(ObjectClass *klass, void *class_data) + { + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); +@@ -295,6 +335,10 @@ static void iothread_class_init(ObjectClass *klass, void *class_data) + iothread_get_poll_param, + iothread_set_poll_param, + NULL, &poll_shrink_info); ++ object_class_property_add(klass, "aio-max-batch", "int", ++ iothread_get_aio_param, ++ iothread_set_aio_param, ++ NULL, &aio_max_batch_info); + } + + static const TypeInfo iothread_info = { +@@ -344,6 +388,7 @@ static int query_one_iothread(Object *object, void *opaque) + info->poll_max_ns = iothread->poll_max_ns; + info->poll_grow = iothread->poll_grow; + info->poll_shrink = iothread->poll_shrink; ++ info->aio_max_batch = iothread->aio_max_batch; + + QAPI_LIST_APPEND(*tail, info); + return 0; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index cc15d9b6ee..2905bc1594 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1889,6 +1889,8 @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict) + monitor_printf(mon, " poll-max-ns=%" PRId64 "\n", value->poll_max_ns); + monitor_printf(mon, " poll-grow=%" PRId64 "\n", value->poll_grow); + monitor_printf(mon, " poll-shrink=%" PRId64 "\n", value->poll_shrink); ++ monitor_printf(mon, " aio-max-batch=%" PRId64 "\n", ++ value->aio_max_batch); + } + + qapi_free_IOThreadInfoList(info_list); +diff --git a/qapi/misc.json b/qapi/misc.json +index 156f98203e..5c2ca3b556 100644 +--- a/qapi/misc.json ++++ b/qapi/misc.json +@@ -86,6 +86,9 @@ + # @poll-shrink: how many ns will be removed from polling time, 0 means that + # it's not configured (since 2.9) + # ++# @aio-max-batch: maximum number of requests in a batch for the AIO engine, ++# 0 means that the engine will use its default (since 6.1) ++# + # Since: 2.0 + ## + { 'struct': 'IOThreadInfo', +@@ -93,7 +96,8 @@ + 'thread-id': 'int', + 'poll-max-ns': 'int', + 'poll-grow': 'int', +- 'poll-shrink': 'int' } } ++ 'poll-shrink': 'int', ++ 'aio-max-batch': 'int' } } + + ## + # @query-iothreads: +diff --git a/qapi/qom.json b/qapi/qom.json +index cd0e76d564..f361157903 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -516,12 +516,17 @@ + # algorithm detects it is spending too long polling without + # encountering events. 0 selects a default behaviour (default: 0) + # ++# @aio-max-batch: maximum number of requests in a batch for the AIO engine, ++# 0 means that the engine will use its default ++# (default:0, since 6.1) ++# + # Since: 2.0 + ## + { 'struct': 'IothreadProperties', + 'data': { '*poll-max-ns': 'int', + '*poll-grow': 'int', +- '*poll-shrink': 'int' } } ++ '*poll-shrink': 'int', ++ '*aio-max-batch': 'int' } } + + ## + # @MemoryBackendProperties: +diff --git a/qemu-options.hx b/qemu-options.hx +index 79ca09feac..d5f1ec27c5 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -5185,7 +5185,7 @@ SRST + + CN=laptop.example.com,O=Example Home,L=London,ST=London,C=GB + +- ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink`` ++ ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink,aio-max-batch=aio-max-batch`` + Creates a dedicated event loop thread that devices can be + assigned to. This is known as an IOThread. By default device + emulation happens in vCPU threads or the main event loop thread. +@@ -5221,7 +5221,11 @@ SRST + the polling time when the algorithm detects it is spending too + long polling without encountering events. + +- The polling parameters can be modified at run-time using the ++ The ``aio-max-batch`` parameter is the maximum number of requests ++ in a batch for the AIO engine, 0 means that the engine will use ++ its default. ++ ++ The IOThread parameters can be modified at run-time using the + ``qom-set`` command (where ``iothread1`` is the IOThread's + ``id``): + +diff --git a/util/aio-posix.c b/util/aio-posix.c +index 30f5354b1e..2b86777e91 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -716,3 +716,15 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + + aio_notify(ctx); + } ++ ++void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, ++ Error **errp) ++{ ++ /* ++ * No thread synchronization here, it doesn't matter if an incorrect value ++ * is used once. ++ */ ++ ctx->aio_max_batch = max_batch; ++ ++ aio_notify(ctx); ++} +diff --git a/util/aio-win32.c b/util/aio-win32.c +index 168717b51b..d5b09a1193 100644 +--- a/util/aio-win32.c ++++ b/util/aio-win32.c +@@ -440,3 +440,8 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + error_setg(errp, "AioContext polling is not implemented on Windows"); + } + } ++ ++void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, ++ Error **errp) ++{ ++} +diff --git a/util/async.c b/util/async.c +index 674dbefb7c..6a9588d86b 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -537,6 +537,8 @@ AioContext *aio_context_new(Error **errp) + ctx->poll_grow = 0; + ctx->poll_shrink = 0; + ++ ctx->aio_max_batch = 0; ++ + return ctx; + fail: + g_source_destroy(&ctx->source); +-- +2.27.0 + diff --git a/kvm-iothread-generalize-iothread_set_param-iothread_get_.patch b/kvm-iothread-generalize-iothread_set_param-iothread_get_.patch new file mode 100644 index 0000000..8910da2 --- /dev/null +++ b/kvm-iothread-generalize-iothread_set_param-iothread_get_.patch @@ -0,0 +1,96 @@ +From 6f827f890e68c3b8bda80822edc09369e93da01f Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 29 Jul 2021 07:42:29 -0400 +Subject: [PATCH 17/39] iothread: generalize + iothread_set_param/iothread_get_param + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [9/15] 7c624847cfc636bdfa0d4f35062500a7f9e8437f (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +Changes in preparation for next patches where we add a new +parameter not related to the poll mechanism. + +Let's add two new generic functions (iothread_set_param and +iothread_get_param) that we use to set and get IOThread +parameters. + +Signed-off-by: Stefano Garzarella +Message-id: 20210721094211.69853-2-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 0445409d7497bededa1047f0d8298b0d4bb3b1a3) +Signed-off-by: Stefano Garzarella +Signed-off-by: Miroslav Rezanina +--- + iothread.c | 27 +++++++++++++++++++++++---- + 1 file changed, 23 insertions(+), 4 deletions(-) + +diff --git a/iothread.c b/iothread.c +index 7f086387be..a12de6e455 100644 +--- a/iothread.c ++++ b/iothread.c +@@ -220,7 +220,7 @@ static PollParamInfo poll_shrink_info = { + "poll-shrink", offsetof(IOThread, poll_shrink), + }; + +-static void iothread_get_poll_param(Object *obj, Visitor *v, ++static void iothread_get_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) + { + IOThread *iothread = IOTHREAD(obj); +@@ -230,7 +230,7 @@ static void iothread_get_poll_param(Object *obj, Visitor *v, + visit_type_int64(v, name, field, errp); + } + +-static void iothread_set_poll_param(Object *obj, Visitor *v, ++static bool iothread_set_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) + { + IOThread *iothread = IOTHREAD(obj); +@@ -239,17 +239,36 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, + int64_t value; + + if (!visit_type_int64(v, name, &value, errp)) { +- return; ++ return false; + } + + if (value < 0) { + error_setg(errp, "%s value must be in range [0, %" PRId64 "]", + info->name, INT64_MAX); +- return; ++ return false; + } + + *field = value; + ++ return true; ++} ++ ++static void iothread_get_poll_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ ++ iothread_get_param(obj, v, name, opaque, errp); ++} ++ ++static void iothread_set_poll_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ IOThread *iothread = IOTHREAD(obj); ++ ++ if (!iothread_set_param(obj, v, name, opaque, errp)) { ++ return; ++ } ++ + if (iothread->ctx) { + aio_context_set_poll_params(iothread->ctx, + iothread->poll_max_ns, +-- +2.27.0 + diff --git a/kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch b/kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch new file mode 100644 index 0000000..93fcc9b --- /dev/null +++ b/kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch @@ -0,0 +1,84 @@ +From 6f4cb3e1e5d718356f16645e806d47cb2159ae98 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 29 Jul 2021 07:42:33 -0400 +Subject: [PATCH 19/39] linux-aio: limit the batch size using `aio-max-batch` + parameter + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [11/15] 44e2f2d294d8ed1d13fb29c5c1599543b86c67e5 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +When there are multiple queues attached to the same AIO context, +some requests may experience high latency, since in the worst case +the AIO engine queue is only flushed when it is full (MAX_EVENTS) or +there are no more queues plugged. + +Commit 2558cb8dd4 ("linux-aio: increasing MAX_EVENTS to a larger +hardcoded value") changed MAX_EVENTS from 128 to 1024, to increase +the number of in-flight requests. But this change also increased +the potential maximum batch to 1024 elements. + +When there is a single queue attached to the AIO context, the issue +is mitigated from laio_io_unplug() that will flush the queue every +time is invoked since there can't be others queue plugged. + +Let's use the new `aio-max-batch` IOThread parameter to mitigate +this issue, limiting the number of requests in a batch. + +We also define a default value (32): this value is obtained running +some benchmarks and it represents a good tradeoff between the latency +increase while a request is queued and the cost of the io_submit(2) +system call. + +Signed-off-by: Stefano Garzarella +Message-id: 20210721094211.69853-4-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit d7ddd0a1618a75b31dc308bb37365ce1da972154) +Signed-off-by: Stefano Garzarella +Signed-off-by: Miroslav Rezanina +--- + block/linux-aio.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 3c0527c2bf..0dab507b71 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -28,6 +28,9 @@ + */ + #define MAX_EVENTS 1024 + ++/* Maximum number of requests in a batch. (default value) */ ++#define DEFAULT_MAX_BATCH 32 ++ + struct qemu_laiocb { + Coroutine *co; + LinuxAioState *ctx; +@@ -351,6 +354,10 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, + LinuxAioState *s = laiocb->ctx; + struct iocb *iocbs = &laiocb->iocb; + QEMUIOVector *qiov = laiocb->qiov; ++ int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH; ++ ++ /* limit the batch with the number of available events */ ++ max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch); + + switch (type) { + case QEMU_AIO_WRITE: +@@ -371,7 +378,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, + s->io_q.in_queue++; + if (!s->io_q.blocked && + (!s->io_q.plugged || +- s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) { ++ s->io_q.in_queue >= max_batch)) { + ioq_submit(s); + } + +-- +2.27.0 + diff --git a/kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch b/kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch new file mode 100644 index 0000000..980466c --- /dev/null +++ b/kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch @@ -0,0 +1,66 @@ +From 5e1535771bba299aae4de2d810100fa7fedfeca8 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 29 Jun 2021 14:13:56 -0400 +Subject: [PATCH 02/39] migration: Allow reset of postcopy_recover_triggered + when failed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 25: migration: Move yank outside qemu_start_incoming_migration() +RH-Commit: [2/2] b766a7f36df4f889d74a2e8d518e1100e52ab726 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1974683 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu > + +It's possible qemu_start_incoming_migration() failed at any point, when it +happens we should reset postcopy_recover_triggered to false so that the user +can still retry with a saner incoming port. + +Signed-off-by: Peter Xu +Message-Id: <20210629181356.217312-3-peterx@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b7f9afd48e7bc5c341e55348f2c2eed08314be7d) +Fixes: b5eea99e ("migration: Add yank feature", 2021-01-13) +Signed-off-by: Leonardo Bras +Signed-off-by: Miroslav Rezanina +--- + migration/migration.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index f077640df2..9d185f0e28 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2109,6 +2109,13 @@ void qmp_migrate_recover(const char *uri, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + ++ /* ++ * Don't even bother to use ERRP_GUARD() as it _must_ always be set by ++ * callers (no one should ignore a recover failure); if there is, it's a ++ * programming error. ++ */ ++ assert(errp); ++ + if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { + error_setg(errp, "Migrate recover can only be run " + "when postcopy is paused."); +@@ -2127,6 +2134,12 @@ void qmp_migrate_recover(const char *uri, Error **errp) + * to continue using that newly established channel. + */ + qemu_start_incoming_migration(uri, errp); ++ ++ /* Safe to dereference with the assert above */ ++ if (*errp) { ++ /* Reset the flag so user could still retry */ ++ qatomic_set(&mis->postcopy_recover_triggered, false); ++ } + } + + void qmp_migrate_pause(Error **errp) +-- +2.27.0 + diff --git a/kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch b/kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch new file mode 100644 index 0000000..d163571 --- /dev/null +++ b/kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch @@ -0,0 +1,111 @@ +From 7726f6461eebf2c4a4b129f1c98add25c0b1bee2 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 29 Jul 2021 07:42:16 -0400 +Subject: [PATCH 12/39] migration: Move bitmap_mutex out of + migration_bitmap_clear_dirty() + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [4/15] cc207372dab253a4db3b6d351fa2fb2f442437ad (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +Taking the mutex every time for each dirty bit to clear is too slow, especially +we'll take/release even if the dirty bit is cleared. So far it's only used to +sync with special cases with qemu_guest_free_page_hint() against migration +thread, nothing really that serious yet. Let's move the lock to be upper. + +There're two callers of migration_bitmap_clear_dirty(). + +For migration, move it into ram_save_iterate(). With the help of MAX_WAIT +logic, we'll only run ram_save_iterate() for no more than 50ms-ish time, so +taking the lock once there at the entry. It also means any call sites to +qemu_guest_free_page_hint() can be delayed; but it should be very rare, only +during migration, and I don't see a problem with it. + +For COLO, move it up to colo_flush_ram_cache(). I think COLO forgot to take +that lock even when calling ramblock_sync_dirty_bitmap(), where another example +is migration_bitmap_sync() who took it right. So let the mutex cover both the +ramblock_sync_dirty_bitmap() and migration_bitmap_clear_dirty() calls. + +It's even possible to drop the lock so we use atomic operations upon rb->bmap +and the variable migration_dirty_pages. I didn't do it just to still be safe, +also not predictable whether the frequent atomic ops could bring overhead too +e.g. on huge vms when it happens very often. When that really comes, we can +keep a local counter and periodically call atomic ops. Keep it simple for now. + +Cc: Wei Wang +Cc: David Hildenbrand +Cc: Hailiang Zhang +Cc: Dr. David Alan Gilbert +Cc: Juan Quintela +Cc: Leonardo Bras Soares Passos +Signed-off-by: Peter Xu +Message-Id: <20210630200805.280905-1-peterx@redhat.com> +Reviewed-by: Wei Wang +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 63268c4970a5f126cc9af75f3ccb8057abef5ec0) +Signed-off-by: Peter Xu +Signed-off-by: Miroslav Rezanina +--- + migration/ram.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 4682f3625c..5d64917dce 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -819,8 +819,6 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, + { + bool ret; + +- QEMU_LOCK_GUARD(&rs->bitmap_mutex); +- + /* + * Clear dirty bitmap if needed. This _must_ be called before we + * send any of the page in the chunk because we need to make sure +@@ -2869,6 +2867,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + goto out; + } + ++ /* ++ * We'll take this lock a little bit long, but it's okay for two reasons. ++ * Firstly, the only possible other thread to take it is who calls ++ * qemu_guest_free_page_hint(), which should be rare; secondly, see ++ * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which ++ * guarantees that we'll at least released it in a regular basis. ++ */ ++ qemu_mutex_lock(&rs->bitmap_mutex); + WITH_RCU_READ_LOCK_GUARD() { + if (ram_list.version != rs->last_version) { + ram_state_reset(rs); +@@ -2928,6 +2934,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + i++; + } + } ++ qemu_mutex_unlock(&rs->bitmap_mutex); + + /* + * Must occur before EOS (or any QEMUFile operation) +@@ -3710,6 +3717,7 @@ void colo_flush_ram_cache(void) + unsigned long offset = 0; + + memory_global_dirty_log_sync(); ++ qemu_mutex_lock(&ram_state->bitmap_mutex); + WITH_RCU_READ_LOCK_GUARD() { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { + ramblock_sync_dirty_bitmap(ram_state, block); +@@ -3738,6 +3746,7 @@ void colo_flush_ram_cache(void) + } + } + trace_colo_flush_ram_cache_end(); ++ qemu_mutex_unlock(&ram_state->bitmap_mutex); + } + + /** +-- +2.27.0 + diff --git a/kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch b/kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch new file mode 100644 index 0000000..9822ab6 --- /dev/null +++ b/kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch @@ -0,0 +1,94 @@ +From 4c11e06222ca5a88f48f2d47adc3a7da306bb345 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 29 Jun 2021 14:13:55 -0400 +Subject: [PATCH 01/39] migration: Move yank outside + qemu_start_incoming_migration() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 25: migration: Move yank outside qemu_start_incoming_migration() +RH-Commit: [1/2] e5694b0ae9a55f6b147c336e86fce6f4f2163db6 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1974683 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu > + +Starting from commit b5eea99ec2f5c, qmp_migrate_recover() calls unregister +before calling qemu_start_incoming_migration(). I believe it wanted to mitigate +the next call to yank_register_instance(), but I think that's wrong. + +Firstly, if during recover, we should keep the yank instance there, not +"quickly removing and adding it back". + +Meanwhile, calling qmp_migrate_recover() twice with b5eea99ec2f5c will directly +crash the dest qemu (right now it can't; but it'll start to work right after +the next patch) because the 1st call of qmp_migrate_recover() will unregister +permanently when the channel failed to establish, then the 2nd call of +qmp_migrate_recover() crashes at yank_unregister_instance(). + +This patch fixes it by moving yank ops out of qemu_start_incoming_migration() +into qmp_migrate_incoming. For qmp_migrate_recover(), drop the unregister of +yank instance too since we keep it there during the recovery phase. + +Signed-off-by: Peter Xu +Reviewed-by: Dr. David Alan Gilbert +Message-Id: <20210629181356.217312-2-peterx@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cc48c587d25ff5dd7dddb4e5072de9ca8464c832) +Fixes: b5eea99e ("migration: Add yank feature", 2021-01-13) +Signed-off-by: Leonardo Bras +Signed-off-by: Miroslav Rezanina +--- + migration/migration.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 4afc6069b6..f077640df2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -454,10 +454,6 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) + { + const char *p = NULL; + +- if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { +- return; +- } +- + qapi_event_send_migration(MIGRATION_STATUS_SETUP); + if (strstart(uri, "tcp:", &p) || + strstart(uri, "unix:", NULL) || +@@ -472,7 +468,6 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) + } else if (strstart(uri, "fd:", &p)) { + fd_start_incoming_migration(p, errp); + } else { +- yank_unregister_instance(MIGRATION_YANK_INSTANCE); + error_setg(errp, "unknown migration protocol: %s", uri); + } + } +@@ -2095,9 +2090,14 @@ void qmp_migrate_incoming(const char *uri, Error **errp) + return; + } + ++ if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { ++ return; ++ } ++ + qemu_start_incoming_migration(uri, &local_err); + + if (local_err) { ++ yank_unregister_instance(MIGRATION_YANK_INSTANCE); + error_propagate(errp, local_err); + return; + } +@@ -2126,7 +2126,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) + * only re-setup the migration stream and poke existing migration + * to continue using that newly established channel. + */ +- yank_unregister_instance(MIGRATION_YANK_INSTANCE); + qemu_start_incoming_migration(uri, errp); + } + +-- +2.27.0 + diff --git a/kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch b/kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch new file mode 100644 index 0000000..c0cf7e6 --- /dev/null +++ b/kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch @@ -0,0 +1,59 @@ +From 0f41423f2bb57e77400cfec0683b7514c9798c55 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 4 Aug 2021 07:10:13 -0400 +Subject: [PATCH 22/39] migration: failover: continue to wait card unplug on + error + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [14/15] e3bfdeee808b2ecd5b141464193b6ee97a4bca3a (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +If the user cancels the migration in the unplug-wait state, +QEMU will try to plug back the card and this fails because the card +is partially unplugged. +To avoid the problem, continue to wait the card unplug, but to +allow the migration to be canceled if the card never finishes to unplug +use a timeout. + +Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1976852 +Signed-off-by: Laurent Vivier +Reviewed-by: Dr. David Alan Gilbert +Message-Id: <20210629155007.629086-3-lvivier@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 944bc528421aa848ca218ee535ea923a4147a525) +Signed-off-by: Laurent Vivier +Signed-off-by: Miroslav Rezanina +--- + migration/migration.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index 58df1dac05..fc1d0db04a 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3696,6 +3696,17 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, + qemu_savevm_state_guest_unplug_pending()) { + qemu_sem_timedwait(&s->wait_unplug_sem, 250); + } ++ if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) { ++ int timeout = 120; /* 30 seconds */ ++ /* ++ * migration has been canceled ++ * but as we have started an unplug we must wait the end ++ * to be able to plug back the card ++ */ ++ while (timeout-- && qemu_savevm_state_guest_unplug_pending()) { ++ qemu_sem_timedwait(&s->wait_unplug_sem, 250); ++ } ++ } + + migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); + } else { +-- +2.27.0 + diff --git a/kvm-migration-failover-reset-partially_hotplugged.patch b/kvm-migration-failover-reset-partially_hotplugged.patch new file mode 100644 index 0000000..97fb427 --- /dev/null +++ b/kvm-migration-failover-reset-partially_hotplugged.patch @@ -0,0 +1,44 @@ +From 2e07c4c66a1199ef33fb2e89164e03ca2acdcf10 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 29 Jul 2021 07:42:12 -0400 +Subject: [PATCH 10/39] migration: failover: reset partially_hotplugged + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [2/15] b01f5640ce93192b2239ad4ef15ff53d977f2341 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +When the card is plugged back, reset the partially_hotplugged flag to false + +Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1787194 +Signed-off-by: Laurent Vivier +Message-Id: <20210629152937.619193-1-lvivier@redhat.com> +Reviewed-by: Juan Quintela +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 109c20ea28cc0d82fa353e692345b172cb5721cc) +Signed-off-by: Laurent Vivier +Signed-off-by: Miroslav Rezanina +--- + hw/net/virtio-net.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 914051feb7..c2e32fedbf 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3134,6 +3134,7 @@ static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, + } + hotplug_handler_plug(hotplug_ctrl, dev, &err); + } ++ pdev->partially_hotplugged = false; + + out: + error_propagate(errp, err); +-- +2.27.0 + diff --git a/kvm-migration-move-wait-unplug-loop-to-its-own-function.patch b/kvm-migration-move-wait-unplug-loop-to-its-own-function.patch new file mode 100644 index 0000000..524cda3 --- /dev/null +++ b/kvm-migration-move-wait-unplug-loop-to-its-own-function.patch @@ -0,0 +1,118 @@ +From e9848f4a4f45960bff1a2a7bc4a4670670de37a5 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 4 Aug 2021 07:10:12 -0400 +Subject: [PATCH 21/39] migration: move wait-unplug loop to its own function + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [13/15] ab3f26a86e4ea955678323608512e54af2a7c83b (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +The loop is used in migration_thread() and bg_migration_thread(), +so we can move it to its own function and call it from these both places. + +Moreover, in migration_thread() we have a wrong state transition from +SETUP to ACTIVE while state could be WAIT_UNPLUG. This is correctly +managed in bg_migration_thread() so use this code instead. + +Signed-off-by: Laurent Vivier +Message-Id: <20210629155007.629086-2-lvivier@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit fde93d99d9c208c07e2dcc24cd04e824d2b65b35) +Signed-off-by: Laurent Vivier +Signed-off-by: Miroslav Rezanina +--- + migration/migration.c | 54 +++++++++++++++++++++---------------------- + 1 file changed, 26 insertions(+), 28 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 9cf1cde39d..58df1dac05 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3681,6 +3681,28 @@ bool migration_rate_limit(void) + return urgent; + } + ++/* ++ * if failover devices are present, wait they are completely ++ * unplugged ++ */ ++ ++static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, ++ int new_state) ++{ ++ if (qemu_savevm_state_guest_unplug_pending()) { ++ migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG); ++ ++ while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && ++ qemu_savevm_state_guest_unplug_pending()) { ++ qemu_sem_timedwait(&s->wait_unplug_sem, 250); ++ } ++ ++ migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); ++ } else { ++ migrate_set_state(&s->state, old_state, new_state); ++ } ++} ++ + /* + * Master migration thread on the source VM. + * It drives the migration and pumps the data down the outgoing channel. +@@ -3727,22 +3749,10 @@ static void *migration_thread(void *opaque) + + qemu_savevm_state_setup(s->to_dst_file); + +- if (qemu_savevm_state_guest_unplug_pending()) { +- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, +- MIGRATION_STATUS_WAIT_UNPLUG); +- +- while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && +- qemu_savevm_state_guest_unplug_pending()) { +- qemu_sem_timedwait(&s->wait_unplug_sem, 250); +- } +- +- migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, +- MIGRATION_STATUS_ACTIVE); +- } ++ qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, ++ MIGRATION_STATUS_ACTIVE); + + s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; +- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, +- MIGRATION_STATUS_ACTIVE); + + trace_migration_thread_setup_complete(); + +@@ -3850,21 +3860,9 @@ static void *bg_migration_thread(void *opaque) + qemu_savevm_state_header(s->to_dst_file); + qemu_savevm_state_setup(s->to_dst_file); + +- if (qemu_savevm_state_guest_unplug_pending()) { +- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, +- MIGRATION_STATUS_WAIT_UNPLUG); +- +- while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && +- qemu_savevm_state_guest_unplug_pending()) { +- qemu_sem_timedwait(&s->wait_unplug_sem, 250); +- } ++ qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, ++ MIGRATION_STATUS_ACTIVE); + +- migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, +- MIGRATION_STATUS_ACTIVE); +- } else { +- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, +- MIGRATION_STATUS_ACTIVE); +- } + s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; + + trace_migration_thread_setup_complete(); +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch b/kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch new file mode 100644 index 0000000..e443376 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch @@ -0,0 +1,77 @@ +From 9da71839fdc4e8b9d034998b0ed8a7ee9dfe7645 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Sun, 2 May 2021 13:22:21 +0200 +Subject: [PATCH 34/39] pc-bios/s390-ccw: Allow building with Clang, too +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack +RH-Commit: [7/11] d7c510f1e5f6434f6b3e4bab5b5f75403cbc7e1b (jmaloy/qemu-kvm-centos-jon) +RH-Bugzilla: 1939509 1940132 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +Clang unfortunately does not support generating code for the z900 +architecture level and starts with the z10 instead. Thus to be able +to support compiling with Clang, we have to check for the supported +compiler flags. The disadvantage is of course that the bios image +will only run with z10 guest CPUs upwards (which is what most people +use anyway), so just in case let's also emit a warning in that case +(we will continue to ship firmware images that have been pre-built +with GCC in future releases, so this should not impact normal users, +too). + +Message-Id: <20210502174836.838816-5-thuth@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit a5b2afd522dde375c38cf94b7c696ffa3faba2fb) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + configure | 9 ++++++++- + pc-bios/s390-ccw/Makefile | 3 ++- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/configure b/configure +index 7edc08afb3..83d8af7fe4 100755 +--- a/configure ++++ b/configure +@@ -5424,9 +5424,16 @@ if { test "$cpu" = "i386" || test "$cpu" = "x86_64"; } && \ + fi + + # Only build s390-ccw bios if we're on s390x and the compiler has -march=z900 ++# or -march=z10 (which is the lowest architecture level that Clang supports) + if test "$cpu" = "s390x" ; then + write_c_skeleton +- if compile_prog "-march=z900" ""; then ++ compile_prog "-march=z900" "" ++ has_z900=$? ++ if [ $has_z900 = 0 ] || compile_prog "-march=z10" ""; then ++ if [ $has_z900 != 0 ]; then ++ echo "WARNING: Your compiler does not support the z900!" ++ echo " The s390-ccw bios will only work with guest CPUs >= z10." ++ fi + roms="$roms s390-ccw" + # SLOF is required for building the s390-ccw firmware on s390x, + # since it is using the libnet code from SLOF for network booting. +diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile +index 83fb1afb73..cee9d2c63b 100644 +--- a/pc-bios/s390-ccw/Makefile ++++ b/pc-bios/s390-ccw/Makefile +@@ -34,7 +34,8 @@ QEMU_CFLAGS += $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-stringop-overflow) + QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -fno-common -fPIE + QEMU_CFLAGS += -fwrapv -fno-strict-aliasing -fno-asynchronous-unwind-tables + QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), -fno-stack-protector) +-QEMU_CFLAGS += -msoft-float -march=z900 ++QEMU_CFLAGS += -msoft-float ++QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS),-march=z900,-march=z10) + QEMU_CFLAGS += -std=gnu99 + LDFLAGS += -Wl,-pie -nostdlib + +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch b/kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch new file mode 100644 index 0000000..aeca11f --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch @@ -0,0 +1,106 @@ +From c783eab8a3770703a39bbbd7edd23af7b9cb8f14 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 12 May 2021 19:15:48 +0200 +Subject: [PATCH 35/39] pc-bios/s390-ccw: Fix inline assembly for older + versions of Clang +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack +RH-Commit: [8/11] 9a69c7705b8128b4098f818c6b672d484e459c83 (jmaloy/qemu-kvm-centos-jon) +RH-Bugzilla: 1939509 1940132 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +Clang versions before v11.0 insist on having the %rX or %cX register +names instead of just a number. Since our Travis-CI is currently +still using Clang v6.0, we have to fix this to avoid failing jobs. + +Message-Id: <20210512171550.476130-2-thuth@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +(cherry picked from commit 052b66e7211af64964e005126eaa3c944b296b0e) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + pc-bios/s390-ccw/helper.h | 2 +- + pc-bios/s390-ccw/jump2ipl.c | 4 ++-- + pc-bios/s390-ccw/menu.c | 8 ++++---- + pc-bios/s390-ccw/virtio.c | 2 +- + 4 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/pc-bios/s390-ccw/helper.h b/pc-bios/s390-ccw/helper.h +index dfcfea0ff0..3d0731c4c6 100644 +--- a/pc-bios/s390-ccw/helper.h ++++ b/pc-bios/s390-ccw/helper.h +@@ -31,7 +31,7 @@ static inline void *u32toptr(uint32_t n) + + static inline void yield(void) + { +- asm volatile ("diag 0,0,0x44" ++ asm volatile ("diag %%r0,%%r0,0x44" + : : + : "memory", "cc"); + } +diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c +index 73e4367e09..78f5f46533 100644 +--- a/pc-bios/s390-ccw/jump2ipl.c ++++ b/pc-bios/s390-ccw/jump2ipl.c +@@ -64,8 +64,8 @@ void jump_to_IPL_code(uint64_t address) + * We use the load normal reset to keep r15 unchanged. jump_to_IPL_2 + * can then use r15 as its stack pointer. + */ +- asm volatile("lghi 1,1\n\t" +- "diag 1,1,0x308\n\t" ++ asm volatile("lghi %%r1,1\n\t" ++ "diag %%r1,%%r1,0x308\n\t" + : : : "1", "memory"); + panic("\n! IPL returns !\n"); + } +diff --git a/pc-bios/s390-ccw/menu.c b/pc-bios/s390-ccw/menu.c +index de8260a5d6..d601952d3e 100644 +--- a/pc-bios/s390-ccw/menu.c ++++ b/pc-bios/s390-ccw/menu.c +@@ -36,9 +36,9 @@ static inline void enable_clock_int(void) + uint64_t tmp = 0; + + asm volatile( +- "stctg 0,0,%0\n" ++ "stctg %%c0,%%c0,%0\n" + "oi 6+%0, 0x8\n" +- "lctlg 0,0,%0" ++ "lctlg %%c0,%%c0,%0" + : : "Q" (tmp) : "memory" + ); + } +@@ -48,9 +48,9 @@ static inline void disable_clock_int(void) + uint64_t tmp = 0; + + asm volatile( +- "stctg 0,0,%0\n" ++ "stctg %%c0,%%c0,%0\n" + "ni 6+%0, 0xf7\n" +- "lctlg 0,0,%0" ++ "lctlg %%c0,%%c0,%0" + : : "Q" (tmp) : "memory" + ); + } +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index ab49840db8..5d2c6e3381 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -54,7 +54,7 @@ static long kvm_hypercall(unsigned long nr, unsigned long param1, + register ulong r_param3 asm("4") = param3; + register long retval asm("2"); + +- asm volatile ("diag 2,4,0x500" ++ asm volatile ("diag %%r2,%%r4,0x500" + : "=d" (retval) + : "d" (r_nr), "0" (r_param1), "r"(r_param2), "d"(r_param3) + : "memory", "cc"); +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch b/kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch new file mode 100644 index 0000000..06d0d4d --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch @@ -0,0 +1,48 @@ +From f2d40216872a40bc5f5089de760c7ba0e3a710bc Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Sun, 2 May 2021 13:07:46 +0200 +Subject: [PATCH 32/39] pc-bios/s390-ccw: Fix the cc-option macro in the + Makefile + +RH-Author: Jon Maloy +RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack +RH-Commit: [5/11] 75379671567451e12ca32a3ea35d1ad2aa04bf5f (jmaloy/qemu-kvm-centos-jon) +RH-Bugzilla: 1939509 1940132 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +The cc-option macro is not doing what it should - compared with the +original from the rules.mak file that got removed with commit +660f793093 ("Makefile: inline the relevant parts of rules.mak"), +the arguments got changed and thus the macro is rather doubling +the QEMU_CFLAGS than adding the flag that should be tested. + +Message-Id: <20210502174836.838816-3-thuth@redhat.com> +Fixes: 22fb2ab096 ("pc-bios/s390-ccw: do not use rules.mak") +Signed-off-by: Thomas Huth +(cherry picked from commit 3462ff35512e925df5ee8c079ed46d4c93b633a7) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + pc-bios/s390-ccw/Makefile | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile +index 29fd9019b8..f0fe84c9eb 100644 +--- a/pc-bios/s390-ccw/Makefile ++++ b/pc-bios/s390-ccw/Makefile +@@ -6,8 +6,8 @@ include ../../config-host.mak + CFLAGS = -O2 -g + + quiet-command = $(if $(V),$1,$(if $(2),@printf " %-7s %s\n" $2 $3 && $1, @$1)) +-cc-option = $(if $(shell $(CC) $1 -S -o /dev/null -xc /dev/null > /dev/null \ +- 2>&1 && echo OK), $1, $2) ++cc-option = $(if $(shell $(CC) $1 $2 -S -o /dev/null -xc /dev/null \ ++ >/dev/null 2>&1 && echo OK),$2,$3) + + VPATH_SUFFIXES = %.c %.h %.S %.m %.mak %.sh %.rc Kconfig% %.json.in + set-vpath = $(if $1,$(foreach PATTERN,$(VPATH_SUFFIXES),$(eval vpath $(PATTERN) $1))) +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch b/kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch new file mode 100644 index 0000000..ee9f702 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch @@ -0,0 +1,75 @@ +From c5b348e6d0334333295332c55fc4be51ce2668b8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 22 Apr 2021 16:59:11 +0200 +Subject: [PATCH 33/39] pc-bios/s390-ccw: Silence GCC 11 stringop-overflow + warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack +RH-Commit: [6/11] 92851a154f2425363aa1f5ed2bb12740f589229e (jmaloy/qemu-kvm-centos-jon) +RH-Bugzilla: 1939509 1940132 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +When building on Fedora 34 (gcc version 11.0.0 20210210) we get: + + In file included from pc-bios/s390-ccw/main.c:11: + In function ‘memset’, + inlined from ‘boot_setup’ at pc-bios/s390-ccw/main.c:185:5, + inlined from ‘main’ at pc-bios/s390-ccw/main.c:288:5: + pc-bios/s390-ccw/libc.h:28:14: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=] + 28 | p[i] = c; + | ~~~~~^~~ + +The offending code is: + + memset((char *)S390EP, 0, 6); + +where S390EP is a const address: + + #define S390EP 0x10008 + +The compiler doesn't know how big that pointed area is, so it assume that +its length is zero. This has been reported as BZ#99578 to GCC: +"gcc-11 -Warray-bounds or -Wstringop-overread warning when accessing a +pointer from integer literal" +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578 + +As this warning does us more harm than good in the BIOS code (where +lot of direct accesses to low memory are done), silence this warning +for all BIOS objects. + +Suggested-by: Thomas Huth +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20210422145911.2513980-1-philmd@redhat.com> +Acked-by: Christian Borntraeger +Message-Id: <20210502174836.838816-4-thuth@redhat.com> +[thuth: Use the pre-existing cc-option macro instead of adding a new one] +Reviewed-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit da231910d33084ccf63f07de210b145e0fa31d98) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + pc-bios/s390-ccw/Makefile | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile +index f0fe84c9eb..83fb1afb73 100644 +--- a/pc-bios/s390-ccw/Makefile ++++ b/pc-bios/s390-ccw/Makefile +@@ -30,6 +30,7 @@ OBJECTS = start.o main.o bootmap.o jump2ipl.o sclp.o menu.o \ + virtio.o virtio-scsi.o virtio-blkdev.o libc.o cio.o dasd-ipl.o + + QEMU_CFLAGS := -Wall $(filter -W%, $(QEMU_CFLAGS)) ++QEMU_CFLAGS += $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-stringop-overflow) + QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -fno-common -fPIE + QEMU_CFLAGS += -fwrapv -fno-strict-aliasing -fno-asynchronous-unwind-tables + QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), -fno-stack-protector) +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch b/kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch new file mode 100644 index 0000000..e361980 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch @@ -0,0 +1,59 @@ +From 12acb42f2e6317a530fa01b5cf55a199231bfdce Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Sun, 2 May 2021 13:49:20 +0200 +Subject: [PATCH 31/39] pc-bios/s390-ccw: Silence warning from Clang by marking + panic() as noreturn +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack +RH-Commit: [4/11] 806b776bf01b733e04664534641cf89d1cb48f1b (jmaloy/qemu-kvm-centos-jon) +RH-Bugzilla: 1939509 1940132 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +When compiling the s390-ccw bios with Clang, the compiler emits a warning: + + pc-bios/s390-ccw/main.c:210:5: warning: variable 'found' is used uninitialized + whenever switch default is taken [-Wsometimes-uninitialized] + default: + ^~~~~~~ + pc-bios/s390-ccw/main.c:214:16: note: uninitialized use occurs here + IPL_assert(found, "Boot device not found\n"); + ^~~~~ + +It's a false positive, it only happens because Clang is not smart enough +to see that the panic() function in the "default:" case can never return. + +Anyway, let's explicitely mark panic() with "noreturn" to shut up the +warning. + +Message-Id: <20210502174836.838816-2-thuth@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit 679196a646c91b8ce9a97b0aa81ffb3776cf8046) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + pc-bios/s390-ccw/s390-ccw.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h +index 6cd92669e9..79db69ff54 100644 +--- a/pc-bios/s390-ccw/s390-ccw.h ++++ b/pc-bios/s390-ccw/s390-ccw.h +@@ -89,6 +89,7 @@ bool menu_is_enabled_enum(void); + + #define MAX_BOOT_ENTRIES 31 + ++__attribute__ ((__noreturn__)) + static inline void panic(const char *string) + { + sclp_print(string); +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch b/kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch new file mode 100644 index 0000000..c98c1c2 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch @@ -0,0 +1,60 @@ +From 609d8661171760c7ead04f64359d47a77c31d474 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 23 Apr 2021 10:30:51 +0200 +Subject: [PATCH 29/39] pc-bios/s390-ccw: Use reset_psw pointer instead of + hard-coded null pointer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack +RH-Commit: [2/11] c65a986104a1830847e772879ca6eaf76c86b2f3 (jmaloy/qemu-kvm-centos-jon) +RH-Bugzilla: 1939509 1940132 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +When compiling the s390-ccw bios with clang, it emits a warning like this: + + pc-bios/s390-ccw/jump2ipl.c:86:9: warning: indirection of non-volatile null + pointer will be deleted, not trap [-Wnull-dereference] + if (*((uint64_t *)0) & RESET_PSW_MASK) { + ^~~~~~~~~~~~~~~~ + pc-bios/s390-ccw/jump2ipl.c:86:9: note: consider using __builtin_trap() or + qualifying pointer with 'volatile' + +We could add a "volatile" here to shut it up, but on the other hand, +we also have a pointer variable called "reset_psw" in this file already +that points to the PSW at address 0, so we can simply use that pointer +variable instead. + +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20210423142440.582188-1-thuth@redhat.com> +Reviewed-by: Janosch Frank +Signed-off-by: Thomas Huth +(cherry picked from commit ff77712a8a2e15e5901fad35b9a6bb65974b2e4a) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + pc-bios/s390-ccw/jump2ipl.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c +index b9c70d64a5..73e4367e09 100644 +--- a/pc-bios/s390-ccw/jump2ipl.c ++++ b/pc-bios/s390-ccw/jump2ipl.c +@@ -82,8 +82,8 @@ void jump_to_low_kernel(void) + jump_to_IPL_code(KERN_IMAGE_START); + } + +- /* Trying to get PSW at zero address */ +- if (*((uint64_t *)0) & RESET_PSW_MASK) { ++ /* Trying to get PSW at zero address (pointed to by reset_psw) */ ++ if (*reset_psw & RESET_PSW_MASK) { + /* + * Surely nobody will try running directly from lowcore, so + * let's use 0 as an indication that we want to load the reset +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch b/kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch new file mode 100644 index 0000000..bace1cc --- /dev/null +++ b/kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch @@ -0,0 +1,56 @@ +From c00df86dd570d78767c5435f97bbe1d06407e470 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 21 Apr 2021 17:48:48 +0200 +Subject: [PATCH 28/39] pc-bios/s390-ccw/bootmap: Silence compiler warning from + Clang +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack +RH-Commit: [1/11] 85e41a04a0f47afe23e62f70397a5f79b2703499 (jmaloy/qemu-kvm-centos-jon) +RH-Bugzilla: 1939509 1940132 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +When compiling the s390-ccw bios with Clang, the compiler complains: + + pc-bios/s390-ccw/bootmap.c:302:9: warning: logical not is only applied + to the left hand side of this comparison [-Wlogical-not-parentheses] + if (!mbr->dev_type == DEV_TYPE_ECKD) { + ^ ~~ + +The code works (more or less by accident), since dev_type can only be +0 or 1, but it's better of course to use the intended != operator here +instead. + +Fixes: 5dc739f343 ("Allow booting in case the first virtio-blk disk is bad") +Message-Id: <20210421163331.358178-1-thuth@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Christian Borntraeger +Signed-off-by: Thomas Huth +(cherry picked from commit d08a64940452060ab7ad5eb49cd5801131c2b9ec) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + pc-bios/s390-ccw/bootmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c +index b46997c0b7..56411ab3b6 100644 +--- a/pc-bios/s390-ccw/bootmap.c ++++ b/pc-bios/s390-ccw/bootmap.c +@@ -299,7 +299,7 @@ static void ipl_eckd_cdl(void) + sclp_print("Bad block size in zIPL section of IPL2 record.\n"); + return; + } +- if (!mbr->dev_type == DEV_TYPE_ECKD) { ++ if (mbr->dev_type != DEV_TYPE_ECKD) { + sclp_print("Non-ECKD device type in zIPL section of IPL2 record.\n"); + return; + } +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch b/kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch new file mode 100644 index 0000000..95c276d --- /dev/null +++ b/kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch @@ -0,0 +1,44 @@ +From 354026a79551358a5be4ed561e080ff550738e92 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 23 Apr 2021 17:20:46 +0200 +Subject: [PATCH 30/39] pc-bios/s390-ccw/netboot: Use "-Wl," prefix to pass + parameter to the linker + +RH-Author: Jon Maloy +RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack +RH-Commit: [3/11] 6a22a1705fbeb5fb2eab6c0e149a433286f98e5f (jmaloy/qemu-kvm-centos-jon) +RH-Bugzilla: 1939509 1940132 +RH-Acked-by: Danilo Cesar Lemes de Paula +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck + +We are using the compiler to do the linking of the bios files. GCC still +accepts the "-Ttext=..." linker flag directly and is smart enough to +pass it to the linker, but in case we are compiling with Clang, we have +to use the official way with the "-Wl," prefix instead. + +Message-Id: <20210423153646.593153-1-thuth@redhat.com> +Signed-off-by: Thomas Huth +(cherry picked from commit b460a220872c28a8da95cbc7e9369d26aa268848) +Signed-off-by: Jon Maloy +Signed-off-by: Miroslav Rezanina +--- + pc-bios/s390-ccw/netboot.mak | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak +index 577c023afe..68b4d7edcb 100644 +--- a/pc-bios/s390-ccw/netboot.mak ++++ b/pc-bios/s390-ccw/netboot.mak +@@ -6,7 +6,7 @@ NETOBJS := start.o sclp.o cio.o virtio.o virtio-net.o jump2ipl.o netmain.o + LIBC_INC := -nostdinc -I$(SLOF_DIR)/lib/libc/include + LIBNET_INC := -I$(SLOF_DIR)/lib/libnet + +-NETLDFLAGS := $(LDFLAGS) -Ttext=0x7800000 ++NETLDFLAGS := $(LDFLAGS) -Wl,-Ttext=0x7800000 + + $(NETOBJS): QEMU_CFLAGS += $(LIBC_INC) $(LIBNET_INC) + +-- +2.27.0 + diff --git a/kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch b/kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch new file mode 100644 index 0000000..aa106c9 --- /dev/null +++ b/kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch @@ -0,0 +1,265 @@ +From c5a2313ba173568087d78f76cc0258e7a353830b Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 6 Aug 2021 15:07:49 -0400 +Subject: [PATCH 26/39] qemu-img: Add --skip-broken-bitmaps for 'convert + --bitmaps' +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 35: Synchronize with RHEL-AV 8.5 release 28 to RHEL 9 +RH-Commit: [3/4] 4b7203c66367c601f9710bbcd91bdbdd56f0f8bd (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Philippe Mathieu-Daudé + +The point of 'qemu-img convert --bitmaps' is to be a convenience for +actions that are already possible through a string of smaller +'qemu-img bitmap' sub-commands. One situation not accounted for +already is that if a source image contains an inconsistent bitmap (for +example, because a qemu process died abruptly before flushing bitmap +state), the user MUST delete those inconsistent bitmaps before +anything else useful can be done with the image. + +We don't want to delete inconsistent bitmaps by default: although a +corrupt bitmap is only a loss of optimization rather than a corruption +of user-visible data, it is still nice to require the user to opt in +to the fact that they are aware of the loss of the bitmap. Still, +requiring the user to check 'qemu-img info' to see whether bitmaps are +consistent, then use 'qemu-img bitmap --remove' to remove offenders, +all before using 'qemu-img convert', is a lot more work than just +adding a knob 'qemu-img convert --bitmaps --skip-broken-bitmaps' which +opts in to skipping the broken bitmaps. + +After testing the new option, also demonstrate the way to manually fix +things (either deleting bad bitmaps, or re-creating them as empty) so +that it is possible to convert without the option. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1946084 +Signed-off-by: Eric Blake +Message-Id: <20210709153951.2801666-4-eblake@redhat.com> +[eblake: warning message tweak, test enhancements] +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 955171e4417bf39edb5503e694501e082a757731) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + docs/tools/qemu-img.rst | 8 ++++- + qemu-img.c | 29 +++++++++++---- + tests/qemu-iotests/tests/qemu-img-bitmaps | 16 ++++++++- + tests/qemu-iotests/tests/qemu-img-bitmaps.out | 35 ++++++++++++++++++- + 4 files changed, 79 insertions(+), 9 deletions(-) + +diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst +index c9efcfaefc..3df6277d6a 100644 +--- a/docs/tools/qemu-img.rst ++++ b/docs/tools/qemu-img.rst +@@ -414,7 +414,7 @@ Command description: + 4 + Error on reading data + +-.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME ++.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps [--skip-broken-bitmaps]] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME + + Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM* + to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can +@@ -456,6 +456,12 @@ Command description: + *NUM_COROUTINES* specifies how many coroutines work in parallel during + the convert process (defaults to 8). + ++ Use of ``--bitmaps`` requests that any persistent bitmaps present in ++ the original are also copied to the destination. If any bitmap is ++ inconsistent in the source, the conversion will fail unless ++ ``--skip-broken-bitmaps`` is also specified to copy only the ++ consistent bitmaps. ++ + .. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE] + + Create the new disk image *FILENAME* of size *SIZE* and format +diff --git a/qemu-img.c b/qemu-img.c +index 7684684bfa..75bab32416 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -82,6 +82,7 @@ enum { + OPTION_MERGE = 274, + OPTION_BITMAPS = 275, + OPTION_FORCE = 276, ++ OPTION_SKIP_BROKEN = 277, + }; + + typedef enum OutputFormat { +@@ -2099,7 +2100,7 @@ static int convert_do_copy(ImgConvertState *s) + } + + /* Check that bitmaps can be copied, or output an error */ +-static int convert_check_bitmaps(BlockDriverState *src) ++static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken) + { + BdrvDirtyBitmap *bm; + +@@ -2111,17 +2112,19 @@ static int convert_check_bitmaps(BlockDriverState *src) + if (!bdrv_dirty_bitmap_get_persistence(bm)) { + continue; + } +- if (bdrv_dirty_bitmap_inconsistent(bm)) { ++ if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) { + error_report("Cannot copy inconsistent bitmap '%s'", + bdrv_dirty_bitmap_name(bm)); +- error_printf("Try 'qemu-img bitmap --remove' to delete it\n"); ++ error_printf("Try --skip-broken-bitmaps, or " ++ "use 'qemu-img bitmap --remove' to delete it\n"); + return -1; + } + } + return 0; + } + +-static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) ++static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst, ++ bool skip_broken) + { + BdrvDirtyBitmap *bm; + Error *err = NULL; +@@ -2133,6 +2136,10 @@ static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) + continue; + } + name = bdrv_dirty_bitmap_name(bm); ++ if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) { ++ warn_report("Skipping inconsistent bitmap '%s'", name); ++ continue; ++ } + qmp_block_dirty_bitmap_add(dst->node_name, name, + true, bdrv_dirty_bitmap_granularity(bm), + true, true, +@@ -2188,6 +2195,7 @@ static int img_convert(int argc, char **argv) + bool force_share = false; + bool explict_min_sparse = false; + bool bitmaps = false; ++ bool skip_broken = false; + int64_t rate_limit = 0; + + ImgConvertState s = (ImgConvertState) { +@@ -2209,6 +2217,7 @@ static int img_convert(int argc, char **argv) + {"salvage", no_argument, 0, OPTION_SALVAGE}, + {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO}, + {"bitmaps", no_argument, 0, OPTION_BITMAPS}, ++ {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN}, + {0, 0, 0, 0} + }; + c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WUr:", +@@ -2337,6 +2346,9 @@ static int img_convert(int argc, char **argv) + case OPTION_BITMAPS: + bitmaps = true; + break; ++ case OPTION_SKIP_BROKEN: ++ skip_broken = true; ++ break; + } + } + +@@ -2344,6 +2356,11 @@ static int img_convert(int argc, char **argv) + out_fmt = "raw"; + } + ++ if (skip_broken && !bitmaps) { ++ error_report("Use of --skip-broken-bitmaps requires --bitmaps"); ++ goto fail_getopt; ++ } ++ + if (s.compressed && s.copy_range) { + error_report("Cannot enable copy offloading when -c is used"); + goto fail_getopt; +@@ -2573,7 +2590,7 @@ static int img_convert(int argc, char **argv) + ret = -1; + goto out; + } +- ret = convert_check_bitmaps(blk_bs(s.src[0])); ++ ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken); + if (ret < 0) { + goto out; + } +@@ -2698,7 +2715,7 @@ static int img_convert(int argc, char **argv) + + /* Now copy the bitmaps */ + if (bitmaps && ret == 0) { +- ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs); ++ ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken); + } + + out: +diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps b/tests/qemu-iotests/tests/qemu-img-bitmaps +index 09c3d395d1..7a3fe8c3d3 100755 +--- a/tests/qemu-iotests/tests/qemu-img-bitmaps ++++ b/tests/qemu-iotests/tests/qemu-img-bitmaps +@@ -144,7 +144,21 @@ _img_info --format-specific | _filter_irrelevant_img_info + echo + $QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" && + echo "unexpected success" +-TEST_IMG=$TEST_IMG.copy _img_info --format-specific \ ++TEST_IMG="$TEST_IMG.copy" _img_info --format-specific \ ++ | _filter_irrelevant_img_info ++# Skipping the broken bitmaps works,... ++echo ++$QEMU_IMG convert --bitmaps --skip-broken-bitmaps \ ++ -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" ++TEST_IMG="$TEST_IMG.copy" _img_info --format-specific \ ++ | _filter_irrelevant_img_info ++# ...as does removing them ++echo ++_rm_test_img "$TEST_IMG.copy" ++$QEMU_IMG bitmap --remove "$TEST_IMG" b0 ++$QEMU_IMG bitmap --remove --add "$TEST_IMG" b2 ++$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" ++TEST_IMG="$TEST_IMG.copy" _img_info --format-specific \ + | _filter_irrelevant_img_info + + # success, all done +diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps.out b/tests/qemu-iotests/tests/qemu-img-bitmaps.out +index 1e32833bf1..7a7429e320 100644 +--- a/tests/qemu-iotests/tests/qemu-img-bitmaps.out ++++ b/tests/qemu-iotests/tests/qemu-img-bitmaps.out +@@ -145,6 +145,39 @@ Format specific information: + corrupt: false + + qemu-img: Cannot copy inconsistent bitmap 'b0' +-Try 'qemu-img bitmap --remove' to delete it ++Try --skip-broken-bitmaps, or use 'qemu-img bitmap --remove' to delete it + qemu-img: Could not open 'TEST_DIR/t.IMGFMT.copy': Could not open 'TEST_DIR/t.IMGFMT.copy': No such file or directory ++ ++qemu-img: warning: Skipping inconsistent bitmap 'b0' ++qemu-img: warning: Skipping inconsistent bitmap 'b2' ++image: TEST_DIR/t.IMGFMT.copy ++file format: IMGFMT ++virtual size: 10 MiB (10485760 bytes) ++cluster_size: 65536 ++Format specific information: ++ bitmaps: ++ [0]: ++ flags: ++ [0]: auto ++ name: b4 ++ granularity: 65536 ++ corrupt: false ++ ++image: TEST_DIR/t.IMGFMT.copy ++file format: IMGFMT ++virtual size: 10 MiB (10485760 bytes) ++cluster_size: 65536 ++Format specific information: ++ bitmaps: ++ [0]: ++ flags: ++ [0]: auto ++ name: b4 ++ granularity: 65536 ++ [1]: ++ flags: ++ [0]: auto ++ name: b2 ++ granularity: 65536 ++ corrupt: false + *** done +-- +2.27.0 + diff --git a/kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch b/kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch new file mode 100644 index 0000000..bdd40b9 --- /dev/null +++ b/kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch @@ -0,0 +1,145 @@ +From 5e8f99ea87409e1423c2e1c5e445003cf4a032a9 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 6 Aug 2021 15:07:48 -0400 +Subject: [PATCH 25/39] qemu-img: Fail fast on convert --bitmaps with + inconsistent bitmap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 35: Synchronize with RHEL-AV 8.5 release 28 to RHEL 9 +RH-Commit: [2/4] 3fd8d357c3a365d4bc142b3d339745e5b15c5894 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Philippe Mathieu-Daudé + +Waiting until the end of the convert operation (a potentially +time-consuming task) to finally detect that we can't copy a bitmap is +bad, comparing to failing fast up front. Furthermore, this prevents +us from leaving a file behind with a bitmap that is not marked as +inconsistent even though it does not have sane contents. + +This fixes the problems exposed in the previous patch to the iotest: +it adds a fast failure up front, and even if we don't fail early, it +ensures that any bitmap we add but do not properly populate is removed +again rather than left behind incomplete. + +Signed-off-by: Eric Blake +Message-Id: <20210709153951.2801666-3-eblake@redhat.com> +[eblake: add a hint to the warning message, simplify name computation] +Reviewed-by: Nir Soffer +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 74a4320f30632fa539507861b3835698282e462e) +Signed-off-by: Eric Blake +Signed-off-by: Danilo C. L. de Paula +Signed-off-by: Miroslav Rezanina +--- + qemu-img.c | 29 +++++++++++++++++-- + tests/qemu-iotests/tests/qemu-img-bitmaps | 3 +- + tests/qemu-iotests/tests/qemu-img-bitmaps.out | 21 ++------------ + 3 files changed, 30 insertions(+), 23 deletions(-) + +diff --git a/qemu-img.c b/qemu-img.c +index babb5573ab..7684684bfa 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2098,6 +2098,29 @@ static int convert_do_copy(ImgConvertState *s) + return s->ret; + } + ++/* Check that bitmaps can be copied, or output an error */ ++static int convert_check_bitmaps(BlockDriverState *src) ++{ ++ BdrvDirtyBitmap *bm; ++ ++ if (!bdrv_supports_persistent_dirty_bitmap(src)) { ++ error_report("Source lacks bitmap support"); ++ return -1; ++ } ++ FOR_EACH_DIRTY_BITMAP(src, bm) { ++ if (!bdrv_dirty_bitmap_get_persistence(bm)) { ++ continue; ++ } ++ if (bdrv_dirty_bitmap_inconsistent(bm)) { ++ error_report("Cannot copy inconsistent bitmap '%s'", ++ bdrv_dirty_bitmap_name(bm)); ++ error_printf("Try 'qemu-img bitmap --remove' to delete it\n"); ++ return -1; ++ } ++ } ++ return 0; ++} ++ + static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) + { + BdrvDirtyBitmap *bm; +@@ -2124,6 +2147,7 @@ static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) + &err); + if (err) { + error_reportf_err(err, "Failed to populate bitmap %s: ", name); ++ qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL); + return -1; + } + } +@@ -2549,9 +2573,8 @@ static int img_convert(int argc, char **argv) + ret = -1; + goto out; + } +- if (!bdrv_supports_persistent_dirty_bitmap(blk_bs(s.src[0]))) { +- error_report("Source lacks bitmap support"); +- ret = -1; ++ ret = convert_check_bitmaps(blk_bs(s.src[0])); ++ if (ret < 0) { + goto out; + } + } +diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps b/tests/qemu-iotests/tests/qemu-img-bitmaps +index 409c4497a3..09c3d395d1 100755 +--- a/tests/qemu-iotests/tests/qemu-img-bitmaps ++++ b/tests/qemu-iotests/tests/qemu-img-bitmaps +@@ -140,11 +140,10 @@ $QEMU_IO -c abort "$TEST_IMG" 2>/dev/null + $QEMU_IMG bitmap --add "$TEST_IMG" b4 + $QEMU_IMG bitmap --remove "$TEST_IMG" b1 + _img_info --format-specific | _filter_irrelevant_img_info ++# Proof that we fail fast if bitmaps can't be copied + echo + $QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" && + echo "unexpected success" +-# Bug - even though we failed at conversion, we left a file around with +-# a bitmap marked as not corrupt + TEST_IMG=$TEST_IMG.copy _img_info --format-specific \ + | _filter_irrelevant_img_info + +diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps.out b/tests/qemu-iotests/tests/qemu-img-bitmaps.out +index 543b028da6..1e32833bf1 100644 +--- a/tests/qemu-iotests/tests/qemu-img-bitmaps.out ++++ b/tests/qemu-iotests/tests/qemu-img-bitmaps.out +@@ -144,22 +144,7 @@ Format specific information: + granularity: 65536 + corrupt: false + +-qemu-img: Failed to populate bitmap b0: Bitmap 'b0' is inconsistent and cannot be used +-Try block-dirty-bitmap-remove to delete this bitmap from disk +-image: TEST_DIR/t.IMGFMT.copy +-file format: IMGFMT +-virtual size: 10 MiB (10485760 bytes) +-cluster_size: 65536 +-Format specific information: +- bitmaps: +- [0]: +- flags: +- name: b0 +- granularity: 65536 +- [1]: +- flags: +- [0]: auto +- name: b4 +- granularity: 65536 +- corrupt: false ++qemu-img: Cannot copy inconsistent bitmap 'b0' ++Try 'qemu-img bitmap --remove' to delete it ++qemu-img: Could not open 'TEST_DIR/t.IMGFMT.copy': Could not open 'TEST_DIR/t.IMGFMT.copy': No such file or directory + *** done +-- +2.27.0 + diff --git a/kvm-ratelimit-protect-with-a-mutex.patch b/kvm-ratelimit-protect-with-a-mutex.patch new file mode 100644 index 0000000..6d18a15 --- /dev/null +++ b/kvm-ratelimit-protect-with-a-mutex.patch @@ -0,0 +1,133 @@ +From 05efd4675b1241f2cc68fd54c92a8a834699f212 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 29 Jul 2021 07:42:21 -0400 +Subject: [PATCH 14/39] ratelimit: protect with a mutex + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 +RH-Commit: [6/15] 48586bf5d422cb676a6f2d2cd0abb0822cfc17b2 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1957194 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Acked-by: Igor Mammedov +RH-Acked-by: Andrew Jones + +Right now, rate limiting is protected by the AioContext mutex, which is +taken for example both by the block jobs and by qmp_block_job_set_speed +(via find_block_job). + +We would like to remove the dependency of block layer code on the +AioContext mutex, since most drivers and the core I/O code are already +not relying on it. However, there is no existing lock that can easily +be taken by both ratelimit_set_speed and ratelimit_calculate_delay, +especially because the latter might run in coroutine context (and +therefore under a CoMutex) but the former will not. + +Since concurrent calls to ratelimit_calculate_delay are not possible, +one idea could be to use a seqlock to get a snapshot of slice_ns and +slice_quota. But for now keep it simple, and just add a mutex to the +RateLimit struct; block jobs are generally not performance critical to +the point of optimizing the clock cycles spent in synchronization. + +This also requires the introduction of init/destroy functions, so +add them to the two users of ratelimit.h. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4951967d84a0acbf47895add9158e2d4c6056ea0) +Signed-off-by: Paul Lai +Signed-off-by: Miroslav Rezanina +--- + block/block-copy.c | 2 ++ + blockjob.c | 3 +++ + include/qemu/ratelimit.h | 14 ++++++++++++++ + 3 files changed, 19 insertions(+) + +diff --git a/block/block-copy.c b/block/block-copy.c +index 39ae481c8b..9b4af00614 100644 +--- a/block/block-copy.c ++++ b/block/block-copy.c +@@ -230,6 +230,7 @@ void block_copy_state_free(BlockCopyState *s) + return; + } + ++ ratelimit_destroy(&s->rate_limit); + bdrv_release_dirty_bitmap(s->copy_bitmap); + shres_destroy(s->mem); + g_free(s); +@@ -289,6 +290,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, + s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); + } + ++ ratelimit_init(&s->rate_limit); + QLIST_INIT(&s->tasks); + QLIST_INIT(&s->calls); + +diff --git a/blockjob.c b/blockjob.c +index 207e8c7fd9..46f15befe8 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -87,6 +87,7 @@ void block_job_free(Job *job) + + block_job_remove_all_bdrv(bjob); + blk_unref(bjob->blk); ++ ratelimit_destroy(&bjob->limit); + error_free(bjob->blocker); + } + +@@ -435,6 +436,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, + assert(job->job.driver->free == &block_job_free); + assert(job->job.driver->user_resume == &block_job_user_resume); + ++ ratelimit_init(&job->limit); ++ + job->blk = blk; + + job->finalize_cancelled_notifier.notify = block_job_event_cancelled; +diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h +index 01da8d63f1..003ea6d5a3 100644 +--- a/include/qemu/ratelimit.h ++++ b/include/qemu/ratelimit.h +@@ -14,9 +14,11 @@ + #ifndef QEMU_RATELIMIT_H + #define QEMU_RATELIMIT_H + ++#include "qemu/lockable.h" + #include "qemu/timer.h" + + typedef struct { ++ QemuMutex lock; + int64_t slice_start_time; + int64_t slice_end_time; + uint64_t slice_quota; +@@ -40,6 +42,7 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + double delay_slices; + ++ QEMU_LOCK_GUARD(&limit->lock); + assert(limit->slice_quota && limit->slice_ns); + + if (limit->slice_end_time < now) { +@@ -65,9 +68,20 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) + return limit->slice_end_time - now; + } + ++static inline void ratelimit_init(RateLimit *limit) ++{ ++ qemu_mutex_init(&limit->lock); ++} ++ ++static inline void ratelimit_destroy(RateLimit *limit) ++{ ++ qemu_mutex_destroy(&limit->lock); ++} ++ + static inline void ratelimit_set_speed(RateLimit *limit, uint64_t speed, + uint64_t slice_ns) + { ++ QEMU_LOCK_GUARD(&limit->lock); + limit->slice_ns = slice_ns; + limit->slice_quota = MAX(((double)speed * slice_ns) / 1000000000ULL, 1); + } +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e271b0b..b92bd72 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -14,6 +14,18 @@ # makes it easier to sync the dependency list with Fedora %global have_block_rbd 1 %global enable_werror 1 +%global have_clang 1 +%global have_safe_stack 0 + +%if %{have_clang} +%global toolchain clang +%ifarch x86_64 +%global have_safe_stack 1 +%endif +%else +%global toolchain gcc +%global cc_suffix .gcc +%endif %global have_pmem 1 %ifnarch x86_64 @@ -113,7 +125,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 11%{?rcrel}%{?dist}.1 +Release: 12%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -282,9 +294,98 @@ Patch77: kvm-arm-virt-Register-its-as-a-class-property.patch Patch78: kvm-arm-virt-Enable-ARM-RAS-support.patch # For bz#1972079 - Windows Installation blocked on 4k disk when using blk+raw+iothread Patch79: kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch +# For bz#1974683 - Fail to set migrate incoming for 2nd time after the first time failed +Patch80: kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch +# For bz#1974683 - Fail to set migrate incoming for 2nd time after the first time failed +Patch81: kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch +# For bz#1968519 - Remove all the old 7.0-7.5 machine types +Patch82: kvm-Remove-RHEL-7.0.0-machine-type.patch +# For bz#1968519 - Remove all the old 7.0-7.5 machine types +Patch83: kvm-Remove-RHEL-7.1.0-machine-type.patch +# For bz#1968519 - Remove all the old 7.0-7.5 machine types +Patch84: kvm-Remove-RHEL-7.2.0-machine-type.patch +# For bz#1968519 - Remove all the old 7.0-7.5 machine types +Patch85: kvm-Remove-RHEL-7.3.0-machine-types.patch +# For bz#1968519 - Remove all the old 7.0-7.5 machine types +Patch86: kvm-Remove-RHEL-7.4.0-machine-types.patch +# For bz#1968519 - Remove all the old 7.0-7.5 machine types +Patch87: kvm-Remove-RHEL-7.5.0-machine-types.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch88: kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch89: kvm-migration-failover-reset-partially_hotplugged.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch90: kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch91: kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch92: kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch93: kvm-ratelimit-protect-with-a-mutex.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch94: kvm-Update-Linux-headers-to-5.13-rc4.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch95: kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch96: kvm-iothread-generalize-iothread_set_param-iothread_get_.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch97: kvm-iothread-add-aio-max-batch-parameter.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch98: kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch99: kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch100: kvm-migration-move-wait-unplug-loop-to-its-own-function.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch101: kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch102: kvm-aarch64-Add-USB-storage-devices.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch103: kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch104: kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch105: kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch +# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta +Patch106: kvm-audio-Never-send-migration-section.patch +# For bz#1939509 - QEMU: enable SafeStack +# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM +Patch107: kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch +# For bz#1939509 - QEMU: enable SafeStack +# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM +Patch108: kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch +# For bz#1939509 - QEMU: enable SafeStack +# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM +Patch109: kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch +# For bz#1939509 - QEMU: enable SafeStack +# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM +Patch110: kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch +# For bz#1939509 - QEMU: enable SafeStack +# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM +Patch111: kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch +# For bz#1939509 - QEMU: enable SafeStack +# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM +Patch112: kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch +# For bz#1939509 - QEMU: enable SafeStack +# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM +Patch113: kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch +# For bz#1939509 - QEMU: enable SafeStack +# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM +Patch114: kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch +# For bz#1939509 - QEMU: enable SafeStack +# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM +Patch115: kvm-configure-Fix-endianess-test-with-LTO.patch # Source-git patches +%if %{have_clang} +BuildRequires: clang +%if %{have_safe_stack} +BuildRequires: compiler-rt +%endif +%else +BuildRequires: gcc +%endif BuildRequires: meson >= %{meson_version} BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -541,6 +642,7 @@ mkdir -p %{qemu_kvm_build} --disable-auth-pam \\\ --disable-avx2 \\\ --disable-avx512f \\\ + --disable-block-drv-whitelist-in-tools \\\ --disable-bochs \\\ --disable-brlapi \\\ --disable-bsd-user \\\ @@ -663,7 +765,7 @@ mkdir -p %{qemu_kvm_build} run_configure() { ../configure \ --cc=%{__cc} \ - --cxx=%{__cxx} \ + --cxx=/bin/false \ --prefix="%{_prefix}" \ --libdir="%{_libdir}" \ --datadir="%{_datadir}" \ @@ -703,7 +805,6 @@ run_configure \ %if %{defined block_drivers_ro_list} --block-drv-ro-whitelist=%{block_drivers_ro_list} \ %endif - --enable-block-drv-whitelist-in-tools \ --enable-attr \ %ifarch %{ix86} x86_64 --enable-avx2 \ @@ -770,6 +871,9 @@ run_configure \ --enable-werror \ %endif --enable-xkbcommon \ +%if %{have_safe_stack} + --enable-safe-stack \ +%endif %if %{tools_only} @@ -812,7 +916,7 @@ cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ %endif -gcc %{_sourcedir}/ksmctl.c $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl +%{__cc} %{_sourcedir}/ksmctl.c %{optflags} %{?build_ldflags} -o ksmctl popd # endif !tools_only %endif @@ -1244,6 +1348,59 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Aug 20 2021 Miroslav Rezanina - 6.0.0-12.el9 +- kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch [bz#1974683] +- kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch [bz#1974683] +- kvm-Remove-RHEL-7.0.0-machine-type.patch [bz#1968519] +- kvm-Remove-RHEL-7.1.0-machine-type.patch [bz#1968519] +- kvm-Remove-RHEL-7.2.0-machine-type.patch [bz#1968519] +- kvm-Remove-RHEL-7.3.0-machine-types.patch [bz#1968519] +- kvm-Remove-RHEL-7.4.0-machine-types.patch [bz#1968519] +- kvm-Remove-RHEL-7.5.0-machine-types.patch [bz#1968519] +- kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch [bz#1957194] +- kvm-migration-failover-reset-partially_hotplugged.patch [bz#1957194] +- kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch [bz#1957194] +- kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch [bz#1957194] +- kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch [bz#1957194] +- kvm-ratelimit-protect-with-a-mutex.patch [bz#1957194] +- kvm-Update-Linux-headers-to-5.13-rc4.patch [bz#1957194] +- kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch [bz#1957194] +- kvm-iothread-generalize-iothread_set_param-iothread_get_.patch [bz#1957194] +- kvm-iothread-add-aio-max-batch-parameter.patch [bz#1957194] +- kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch [bz#1957194] +- kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch [bz#1957194] +- kvm-migration-move-wait-unplug-loop-to-its-own-function.patch [bz#1957194] +- kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch [bz#1957194] +- kvm-aarch64-Add-USB-storage-devices.patch [bz#1957194] +- kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch [bz#1957194] +- kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch [bz#1957194] +- kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch [bz#1957194] +- kvm-audio-Never-send-migration-section.patch [bz#1957194] +- kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch [bz#1939509 bz#1940132] +- kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch [bz#1939509 bz#1940132] +- kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch [bz#1939509 bz#1940132] +- kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch [bz#1939509 bz#1940132] +- kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch [bz#1939509 bz#1940132] +- kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch [bz#1939509 bz#1940132] +- kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch [bz#1939509 bz#1940132] +- kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch [bz#1939509 bz#1940132] +- kvm-configure-Fix-endianess-test-with-LTO.patch [bz#1939509 bz#1940132] +- kvm-spec-Switch-toolchain-to-Clang-LLVM.patch [bz#1939509 bz#1940132] +- kvm-spec-Use-safe-stack-for-x86_64.patch [bz#1939509 bz#1940132] +- kvm-spec-Reenable-write-support-for-VMDK-etc.-in-tools.patch [bz#1989841] +- Resolves: bz#1974683 + (Fail to set migrate incoming for 2nd time after the first time failed) +- Resolves: bz#1968519 + (Remove all the old 7.0-7.5 machine types) +- Resolves: bz#1957194 + (Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta) +- Resolves: bz#1939509 + (QEMU: enable SafeStack) +- Resolves: bz#1940132 + (QEMU: switch build toolchain to Clang/LLVM) +- Resolves: bz#1989841 + (RFE: qemu-img cannot convert images into vmdk and vpc formats) + * Tue Aug 10 2021 Mohan Boddu - 17:6.0.0-11.1 - Rebuilt for IMA sigs, glibc 2.34, aarch64 flags Related: rhbz#1991688 From 9b74567af7c9f9944071a1091a0393fe07ff5a68 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 27 Aug 2021 02:31:07 -0400 Subject: [PATCH 133/195] * Fri Aug 27 2021 Miroslav Rezanina - 6.0.0-13 - kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch [bz#1951814] - kvm-disable-ac97-audio.patch [bz#1995819] - kvm-redhat-Disable-LTO-on-non-x86-architectures.patch [bz#1950192] - kvm-redhat-Enable-the-test-block-iothread-test-again.patch [bz#1950192] - Resolves: bz#1951814 (RFE: Warning when using qcow2-v2 (compat=0.10)) - Resolves: bz#1995819 (RFE: Remove ac97 audio support from QEMU) - Resolves: bz#1950192 (RHEL9: when ioeventfd=off and 8.4guest, (qemu) qemu-kvm: ../util/qemu-coroutine-lock.c:57: qemu_co_queue_wait_impl: Assertion `qemu_in_coroutine()' failed.) --- kvm-disable-ac97-audio.patch | 37 ++++++++++ ...on-warning-when-opening-v2-images-rw.patch | 70 +++++++++++++++++++ ...e-the-test-block-iothread-test-again.patch | 43 ++++++++++++ qemu-kvm.spec | 26 ++++++- 4 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 kvm-disable-ac97-audio.patch create mode 100644 kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch create mode 100644 kvm-redhat-Enable-the-test-block-iothread-test-again.patch diff --git a/kvm-disable-ac97-audio.patch b/kvm-disable-ac97-audio.patch new file mode 100644 index 0000000..469c6c4 --- /dev/null +++ b/kvm-disable-ac97-audio.patch @@ -0,0 +1,37 @@ +From e2bb4b752d68856c4c307640ae310f47f680aed6 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Fri, 20 Aug 2021 10:21:07 +0200 +Subject: [PATCH 2/4] disable ac97 audio +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gerd Hoffmann +RH-MergeRequest: 39: disable ac97 audio +RH-Commit: [1/1] 5d1bd969d20f960cb0a023f0d6cd1ae5adda22e6 (kraxel/centos-qemu-kvm) +RH-Bugzilla: 1995819 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Thomas Huth + +RH-Bugzilla: 1995819 + +Signed-off-by: Gerd Hoffmann +--- + default-configs/devices/x86_64-rh-devices.mak | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak +index c2dd112f81..8ae2747bf6 100644 +--- a/default-configs/devices/x86_64-rh-devices.mak ++++ b/default-configs/devices/x86_64-rh-devices.mak +@@ -1,6 +1,5 @@ + include rh-virtio.mak + +-CONFIG_AC97=y + CONFIG_ACPI=y + CONFIG_ACPI_PCI=y + CONFIG_ACPI_CPU_HOTPLUG=y +-- +2.27.0 + diff --git a/kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch new file mode 100644 index 0000000..8b618c0 --- /dev/null +++ b/kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -0,0 +1,70 @@ +From 9d2700ed836cdfabbd031da74cd5a3d01c9127dd Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 20 Aug 2021 18:25:12 +0200 +Subject: [PATCH 1/4] qcow2: Deprecation warning when opening v2 images rw +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 37: qcow2: Deprecation warning when opening v2 images rw +RH-Commit: [1/1] f450d0ae32d35063b28c72c4f2d2ebb9e6d8db3e (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1951814 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +qcow2 v3 has been around for a long time (since QEMU 1.1/RHEL 7), so +there is no real reason any more to use it. People still using it might +do so unintentionally. Warn about it and suggest upgrading during the +RHEL 9 timeframe so that the code can possibly be disabled in RHEL 10. + +The warning is restricted to read-write mode and the system emulator. +The primary motivation for not having it in qemu-img is that 'qemu-img +amend' for upgrades would warn otherwise. It also avoids having to make +too many changes to the test suite. + +bdrv_uses_whitelist() is used as a proxy for deciding whether we are +running in a tool or the system emulator. This is not entirely clean, +but it's what is available and the same function qcow2_do_open() already +uses it this way for another warning. + +Signed-off-by: Kevin Wolf +--- + block/qcow2.c | 6 ++++++ + tests/qemu-iotests/common.filter | 1 + + 2 files changed, 7 insertions(+) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 9727ae8fe3..7c061bf315 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, + ret = -ENOTSUP; + goto fail; + } ++ if (header.version < 3 && !bs->read_only && bdrv_uses_whitelist()) { ++ warn_report_once("qcow2 v2 images are deprecated and may not be " ++ "supported in future versions. Please consider " ++ "upgrading the image with 'qemu-img amend " ++ "-o compat=v3'."); ++ } + + s->qcow_version = header.version; + +diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter +index 268b749e2f..1f243a8bc1 100644 +--- a/tests/qemu-iotests/common.filter ++++ b/tests/qemu-iotests/common.filter +@@ -82,6 +82,7 @@ _filter_qemu() + { + $SED -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ + -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ ++ -e "/qcow2 v2 images are deprecated/d" \ + -e $'s#\r##' # QEMU monitor uses \r\n line endings + } + +-- +2.27.0 + diff --git a/kvm-redhat-Enable-the-test-block-iothread-test-again.patch b/kvm-redhat-Enable-the-test-block-iothread-test-again.patch new file mode 100644 index 0000000..28dce0f --- /dev/null +++ b/kvm-redhat-Enable-the-test-block-iothread-test-again.patch @@ -0,0 +1,43 @@ +From 4231cac75289acd643c2daaa2dece485b958bef1 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 20 Aug 2021 10:27:12 +0200 +Subject: [PATCH 4/4] redhat: Enable the 'test-block-iothread' test again +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 36: Disable LTO on non-x86 architectures +RH-Commit: [2/2] e010396f3ee0f3f39ff8bcd4749f8ae2e8624980 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 1950192 +RH-Acked-by: Andrew Jones +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Philippe Mathieu-Daudé + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1950192 + +Now that we disabled LTO to fix the coroutines/iothreads on s390x and +aarch64, we can also enable the 'test-block-iothread' test again. + +Signed-off-by: Thomas Huth +--- + tests/unit/meson.build | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/unit/meson.build b/tests/unit/meson.build +index 244d35f5d4..b3bc2109da 100644 +--- a/tests/unit/meson.build ++++ b/tests/unit/meson.build +@@ -65,7 +65,7 @@ if have_block + 'test-blockjob': [testblock], + 'test-blockjob-txn': [testblock], + 'test-block-backend': [testblock], +-# 'test-block-iothread': [testblock], ++ 'test-block-iothread': [testblock], + 'test-write-threshold': [testblock], + 'test-crypto-hash': [crypto], + 'test-crypto-hmac': [crypto], +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b92bd72..d33f3c0 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -5,6 +5,12 @@ %global usbredir_version 0.7.1 %global ipxe_version 20200823-5.git4bd064de +# LTO does not work with the coroutines of QEMU on non-x86 architectures +# (see BZ 1952483 and 1950192 for more information) +%ifnarch x86_64 + %global _lto_cflags %%{nil} +%endif + %global have_usbredir 1 %global have_opengl 1 %global have_fdt 0 @@ -125,7 +131,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.0.0 -Release: 12%{?rcrel}%{?dist}%{?cc_suffix} +Release: 13%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -375,6 +381,12 @@ Patch114: kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch # For bz#1939509 - QEMU: enable SafeStack # For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM Patch115: kvm-configure-Fix-endianess-test-with-LTO.patch +# For bz#1951814 - RFE: Warning when using qcow2-v2 (compat=0.10) +Patch116: kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +# For bz#1995819 - RFE: Remove ac97 audio support from QEMU +Patch117: kvm-disable-ac97-audio.patch +# For bz#1950192 - RHEL9: when ioeventfd=off and 8.4guest, (qemu) qemu-kvm: ../util/qemu-coroutine-lock.c:57: qemu_co_queue_wait_impl: Assertion `qemu_in_coroutine()' failed. +Patch118: kvm-redhat-Enable-the-test-block-iothread-test-again.patch # Source-git patches @@ -1348,6 +1360,18 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Aug 27 2021 Miroslav Rezanina - 6.0.0-13 +- kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch [bz#1951814] +- kvm-disable-ac97-audio.patch [bz#1995819] +- kvm-redhat-Disable-LTO-on-non-x86-architectures.patch [bz#1950192] +- kvm-redhat-Enable-the-test-block-iothread-test-again.patch [bz#1950192] +- Resolves: bz#1951814 + (RFE: Warning when using qcow2-v2 (compat=0.10)) +- Resolves: bz#1995819 + (RFE: Remove ac97 audio support from QEMU) +- Resolves: bz#1950192 + (RHEL9: when ioeventfd=off and 8.4guest, (qemu) qemu-kvm: ../util/qemu-coroutine-lock.c:57: qemu_co_queue_wait_impl: Assertion `qemu_in_coroutine()' failed.) + * Fri Aug 20 2021 Miroslav Rezanina - 6.0.0-12.el9 - kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch [bz#1974683] - kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch [bz#1974683] From f788c1346a0dd98e82c699c484f3d9574df32cda Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 2 Sep 2021 02:44:51 -0400 Subject: [PATCH 134/195] * Thu Sep 02 2021 Miroslav Rezanina - 6.1.0-1 - Rebase to QEMU 6.1.0 [bz#1997408] - Resolves: #bz#1997408 (Rebase to QEMU 6.1.0) --- .gitignore | 1 + 0004-Initial-redhat-build.patch | 222 +- 0005-Enable-disable-devices-for-RHEL.patch | 271 +- ...Machine-type-related-general-changes.patch | 472 +- 0007-Add-aarch64-machine-types.patch | 97 +- 0008-Add-ppc64-machine-types.patch | 277 +- 0009-Add-s390x-machine-types.patch | 65 +- 0010-Add-x86_64-machine-types.patch | 564 +- 0011-Enable-make-check.patch | 83 +- ...mber-of-devices-that-can-be-assigned.patch | 13 +- ...Add-support-statement-to-help-output.patch | 8 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 6 +- ...documentation-instead-of-qemu-system.patch | 51 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 2 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 4 +- ...x-blockdev-reopen-API-with-feature-f.patch | 70 - ...on-warning-when-opening-v2-images-rw.patch | 17 +- README.rst | 18 - kvm-Disable-TPM-passthrough.patch | 44 - kvm-Remove-RHEL-7.0.0-machine-type.patch | 197 - kvm-Remove-RHEL-7.1.0-machine-type.patch | 292 - kvm-Remove-RHEL-7.2.0-machine-type.patch | 192 - kvm-Remove-RHEL-7.3.0-machine-types.patch | 315 - kvm-Remove-RHEL-7.4.0-machine-types.patch | 301 - kvm-Remove-RHEL-7.5.0-machine-types.patch | 210 - ...E-and-QXL-from-x86_64-rh-devices.mak.patch | 43 - kvm-Update-Linux-headers-to-5.13-rc4.patch | 5465 ----------------- kvm-aarch64-Add-USB-storage-devices.patch | 41 - ...64-rh-devices-add-CONFIG_PVPANIC_PCI.patch | 38 - kvm-aarch64-rh-devices-add-CONFIG_PXB.patch | 37 - ...rt-back-to-v5.2-PCI-slot-enumeration.patch | 111 - kvm-arm-virt-Enable-ARM-RAS-support.patch | 68 - ...r-highmem-and-gic-version-as-class-p.patch | 77 - ...t-Register-iommu-as-a-class-property.patch | 54 - ...irt-Register-its-as-a-class-property.patch | 56 - kvm-audio-Never-send-migration-section.patch | 63 - ...-to-use-driver-whitelist-even-in-too.patch | 121 - ...ght-leak-in-request-padding-error-pa.patch | 71 - ...k-add-max_hw_transfer-to-BlockLimits.patch | 131 - kvm-block-backend-add-drained_poll.patch | 74 - ...ign-max_transfer-to-request-alignmen.patch | 47 - ...FIO_MAP_DMA-failed-No-space-left-on-.patch | 106 - ...onfigure-Fix-endianess-test-with-LTO.patch | 82 - kvm-disable-CONFIG_USB_STORAGE_BOT.patch | 49 - kvm-disable-ac97-audio.patch | 37 - ...me-mistakes-in-the-SEV-documentation.patch | 151 - ...documentation-to-amd-memory-encrypti.patch | 141 - ...rop-firmware.json-Add-SEV-ES-support.patch | 110 - ...posix-fix-max_iov-for-dev-sg-devices.patch | 50 - ...LKSECTGET-on-block-devices-too-do-no.patch | 140 - ...o-resume-the-VM-on-success-instead-o.patch | 51 - ...mmuv3-Another-range-invalidation-fix.patch | 111 - ...8.5-and-9.0-machine-types-and-remove.patch | 63 - ...ble-PL011-clock-migration-through-hw.patch | 51 - ...x-Remove-the-RHEL7-only-machine-type.patch | 67 - ...imit-for-bus-locks-acquired-in-guest.patch | 219 - ...Expose-AVX_VNNI-instruction-to-guest.patch | 82 - ...and-rename-test-291-to-qemu-img-bitm.patch | 178 - ...iothread-add-aio-max-batch-parameter.patch | 324 - ...ize-iothread_set_param-iothread_get_.patch | 96 - ...the-batch-size-using-aio-max-batch-p.patch | 84 - ...reset-of-postcopy_recover_triggered-.patch | 66 - ...itmap_mutex-out-of-migration_bitmap_.patch | 111 - ...ank-outside-qemu_start_incoming_migr.patch | 94 - ...er-continue-to-wait-card-unplug-on-e.patch | 59 - ...-failover-reset-partially_hotplugged.patch | 44 - ...wait-unplug-loop-to-its-own-function.patch | 118 - ...rained-block-ops-to-quiesce-the-serv.patch | 191 - kvm-osdep-provide-ROUND_DOWN-macro.patch | 75 - ...90-ccw-Allow-building-with-Clang-too.patch | 77 - ...-Fix-inline-assembly-for-older-versi.patch | 106 - ...-Fix-the-cc-option-macro-in-the-Make.patch | 48 - ...-Silence-GCC-11-stringop-overflow-wa.patch | 75 - ...-Silence-warning-from-Clang-by-marki.patch | 59 - ...-Use-reset_psw-pointer-instead-of-ha.patch | 60 - ...-bootmap-Silence-compiler-warning-fr.patch | 56 - ...-don-t-try-to-read-the-next-block-if.patch | 51 - ...-netboot-Use-Wl-prefix-to-pass-param.patch | 44 - ...tialize-cgs-ready-in-kvmppc_svm_init.patch | 69 - ...p-broken-bitmaps-for-convert-bitmaps.patch | 265 - ...st-on-convert-bitmaps-with-inconsist.patch | 145 - kvm-ratelimit-protect-with-a-mutex.patch | 133 - ...efine-pseries-rhel8.5.0-machine-type.patch | 67 - ...e-the-test-block-iothread-test-again.patch | 43 - ...issing-entries-in-hw_compat_rhel_8_4.patch | 42 - ...-s390x-add-rhel-8.5.0-compat-machine.patch | 59 - ...86-Enable-kvm-asyncpf-int-by-default.patch | 49 - kvm-s390x-cpumodel-add-3931-and-3932.patch | 134 - kvm-s390x-css-Add-passthrough-IRB.patch | 127 - kvm-s390x-css-Introduce-an-ESW-struct.patch | 111 - kvm-s390x-css-Refactor-IRB-construction.patch | 144 - ...90x-css-Split-out-the-IRB-sense-data.patch | 63 - ...hat-disable-experimental-3270-device.patch | 40 - ...s-max_segments-via-max_iov-field-in-.patch | 64 - ...OCKET_ADDRESS_TYPE_FD-listen-2-backl.patch | 115 - ...-t-hijack-current_machine-boot_order.patch | 115 - ...pability-issue-on-KVM-guest-for-PCI-.patch | 160 - ...le-comment-about-power-saving-LPCR-b.patch | 52 - ...o-current-AIL-mode-when-starting-a-n.patch | 90 - ...CPU-model-versions-supporting-xsaves.patch | 322 - ...add-support-to-query-the-attestation.patch | 262 - ...-backends-without-multiqueue-support.patch | 46 - ...heck-that-num-queues-is-supported-by.patch | 83 - ...on-t-reconnect-during-initialisation.patch | 179 - ...ail-gracefully-on-too-large-queue-si.patch | 55 - ...et-more-feature-flags-from-vhost-dev.patch | 44 - ...k-Improve-error-reporting-in-realize.patch | 120 - ...ake-sure-to-set-Error-on-realize-fai.patch | 53 - ...ommu_platform-is-requested-but-unsup.patch | 53 - ...gure-all-host-notifiers-in-a-single-.patch | 107 - ...ollback-path-in-virtio_blk_data_plan.patch | 83 - ...tio-gpu-handle-partial-maps-properly.patch | 201 - ...ver-add-missing-remove_migration_sta.patch | 77 - ...igure-all-host-notifiers-in-a-single.patch | 91 - ...host-notifiers-and-callbacks-separat.patch | 125 - kvm-x86-Add-x86-rhel8.5-machine-types.patch | 130 - ...er-function-when-using-TLS-migration.patch | 146 - qemu-kvm.spec | 265 +- rpminspect.yaml | 6 - sources | 2 +- 120 files changed, 656 insertions(+), 17696 deletions(-) delete mode 100644 0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch rename kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch => 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch (84%) delete mode 100644 README.rst delete mode 100644 kvm-Disable-TPM-passthrough.patch delete mode 100644 kvm-Remove-RHEL-7.0.0-machine-type.patch delete mode 100644 kvm-Remove-RHEL-7.1.0-machine-type.patch delete mode 100644 kvm-Remove-RHEL-7.2.0-machine-type.patch delete mode 100644 kvm-Remove-RHEL-7.3.0-machine-types.patch delete mode 100644 kvm-Remove-RHEL-7.4.0-machine-types.patch delete mode 100644 kvm-Remove-RHEL-7.5.0-machine-types.patch delete mode 100644 kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch delete mode 100644 kvm-Update-Linux-headers-to-5.13-rc4.patch delete mode 100644 kvm-aarch64-Add-USB-storage-devices.patch delete mode 100644 kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch delete mode 100644 kvm-aarch64-rh-devices-add-CONFIG_PXB.patch delete mode 100644 kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch delete mode 100644 kvm-arm-virt-Enable-ARM-RAS-support.patch delete mode 100644 kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch delete mode 100644 kvm-arm-virt-Register-iommu-as-a-class-property.patch delete mode 100644 kvm-arm-virt-Register-its-as-a-class-property.patch delete mode 100644 kvm-audio-Never-send-migration-section.patch delete mode 100644 kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch delete mode 100644 kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch delete mode 100644 kvm-block-add-max_hw_transfer-to-BlockLimits.patch delete mode 100644 kvm-block-backend-add-drained_poll.patch delete mode 100644 kvm-block-backend-align-max_transfer-to-request-alignmen.patch delete mode 100644 kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch delete mode 100644 kvm-configure-Fix-endianess-test-with-LTO.patch delete mode 100644 kvm-disable-CONFIG_USB_STORAGE_BOT.patch delete mode 100644 kvm-disable-ac97-audio.patch delete mode 100644 kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch delete mode 100644 kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch delete mode 100644 kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch delete mode 100644 kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch delete mode 100644 kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch delete mode 100644 kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch delete mode 100644 kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch delete mode 100644 kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch delete mode 100644 kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch delete mode 100644 kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch delete mode 100644 kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch delete mode 100644 kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch delete mode 100644 kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch delete mode 100644 kvm-iothread-add-aio-max-batch-parameter.patch delete mode 100644 kvm-iothread-generalize-iothread_set_param-iothread_get_.patch delete mode 100644 kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch delete mode 100644 kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch delete mode 100644 kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch delete mode 100644 kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch delete mode 100644 kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch delete mode 100644 kvm-migration-failover-reset-partially_hotplugged.patch delete mode 100644 kvm-migration-move-wait-unplug-loop-to-its-own-function.patch delete mode 100644 kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch delete mode 100644 kvm-osdep-provide-ROUND_DOWN-macro.patch delete mode 100644 kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch delete mode 100644 kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch delete mode 100644 kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch delete mode 100644 kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch delete mode 100644 kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch delete mode 100644 kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch delete mode 100644 kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch delete mode 100644 kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch delete mode 100644 kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch delete mode 100644 kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch delete mode 100644 kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch delete mode 100644 kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch delete mode 100644 kvm-ratelimit-protect-with-a-mutex.patch delete mode 100644 kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch delete mode 100644 kvm-redhat-Enable-the-test-block-iothread-test-again.patch delete mode 100644 kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch delete mode 100644 kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch delete mode 100644 kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch delete mode 100644 kvm-s390x-cpumodel-add-3931-and-3932.patch delete mode 100644 kvm-s390x-css-Add-passthrough-IRB.patch delete mode 100644 kvm-s390x-css-Introduce-an-ESW-struct.patch delete mode 100644 kvm-s390x-css-Refactor-IRB-construction.patch delete mode 100644 kvm-s390x-css-Split-out-the-IRB-sense-data.patch delete mode 100644 kvm-s390x-redhat-disable-experimental-3270-device.patch delete mode 100644 kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch delete mode 100644 kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch delete mode 100644 kvm-spapr-Don-t-hijack-current_machine-boot_order.patch delete mode 100644 kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch delete mode 100644 kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch delete mode 100644 kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch delete mode 100644 kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch delete mode 100644 kvm-target-i386-sev-add-support-to-query-the-attestation.patch delete mode 100644 kvm-vhost-user-Fix-backends-without-multiqueue-support.patch delete mode 100644 kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch delete mode 100644 kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch delete mode 100644 kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch delete mode 100644 kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch delete mode 100644 kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch delete mode 100644 kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch delete mode 100644 kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch delete mode 100644 kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch delete mode 100644 kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch delete mode 100644 kvm-virtio-gpu-handle-partial-maps-properly.patch delete mode 100644 kvm-virtio-net-failover-add-missing-remove_migration_sta.patch delete mode 100644 kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch delete mode 100644 kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch delete mode 100644 kvm-x86-Add-x86-rhel8.5-machine-types.patch delete mode 100644 kvm-yank-Unregister-function-when-using-TLS-migration.patch delete mode 100644 rpminspect.yaml diff --git a/.gitignore b/.gitignore index b919b12..13f0595 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ /qemu-kvm*.src.rpm /patches.* /*.orig +/qemu-6.1.0.tar.xz diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch index a697dd3..94653b6 100644 --- a/0004-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,49 +1,149 @@ -From 431955e872aa010376b1f94665908c2ba8194b44 Mon Sep 17 00:00:00 2001 +From 4a0a5d5019938a8b4b5526d33e1bf3d7dcfc56a5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Fri, 12 Oct 2018 07:31:11 +0200 +Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build This patch introduces redhat build structure in redhat subdirectory. In addition, several issues are fixed in QEMU tree: - - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm - - As we use qemu-kvm as name in all places, this is updated to be consistent - - Man page renamed from qemu to qemu-kvm - - man page is installed using make install so we have to fix it in qemu tree +- Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent +- Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-5.2.0-16.el9 +This rebase is based on qemu-kvm-5.2.0-13.el9 Signed-off-by: Miroslav Rezanina +-- +Rebase changes (210526): +- Move build to .distro +- Move changes for support file to related commit +- Added dependency for python3-sphinx-rtd_theme +- Removed --disable-sheepdog configure option +- Added new hw-display modules + +Rebase changes (210623): +- SASL initialization moved to ui/vnc-auth-sasl.c + +Rebase changes (210714): +- Add accel-qtest- and accel-tcg-x86_64 libraries + +Rebase changes (6.1.0 rc1): +- Added hw-usb-host module + +Rebase changes (6.1.0 rc4): +- Disable new configure options (bpf, nvmm, slirp-smbd) + +Rebase chagnes (6.1.0): +- Use -pie for ksmctl build (annocheck complain fix) Merged patches (6.0.0): -- 605758c902 Limit build on Power to qemu-img and qemu-ga only + - 605758c902 Limit build on Power to qemu-img and qemu-ga only + +Merged patches (6.1.0 RC1): +- f04f91751f Use cached tarballs +- 6581165c65 Remove message with running VM count +- 03c3cac9fc spec-file: build qemu-kvm without SPICE and QXL +- e0ae6c1f6c spec-file: Obsolete qemu-kvm-ui-spice +- 9d2e9f9ecf spec: Do not build qemu-kvm-block-gluster +- cf470b4234 spec: Do not link pcnet and ne2k_pci roms +- e981284a6b redhat: Install the s390-netboot.img that we've built +- 24ef557f33 spec: Remove usage of Group: tag +- c40d69b4f4 spec: Drop %defattr usage +- f8e98798ce spec: Clean up BuildRequires +- 47246b43ee spec: Remove iasl BuildRequires +- 170dc1cbe0 spec: Remove redundant 0 in conditionals +- 8718f6fa11 spec: Add more have_XXX conditionals +- a001269ce9 spec: Remove binutils versioned Requires +- 34545ee641 spec: Remove diffutils BuildRequires +- c2c82beac9 spec: Remove redundant Requires: +- 9314c231f4 spec: Add XXX_version macros +- c43db0bf0f spec: Add have_block_rbd +- 3ecb0c0319 qga: drop StandardError=syslog +- 018049dc80 Remove iscsi support +- a2edf18777 redhat: Replace the kvm-setup.service with a /etc/modules-load.d config file +- 387b5fbcfe redhat: Move qemu-kvm-docs dependency to qemu-kvm +- 4ead693178 redhat: introducting qemu-kvm-hw-usbredir +- 4dc6fc3035 redhat: use the standard vhost-user JSON path +- 84757178b4 Fix local build +- 8c394227dd spec: Restrict block drivers in tools +- b6aa7c1fae Move tools to separate package +- eafd82e509 Split qemu-pr-helper to separate package +- 2c0182e2aa spec: RPM_BUILD_ROOT -> %{buildroot} +- 91bd55ca13 spec: More use of %{name} instead of 'qemu-kvm' +- 50ba299c61 spec: Use qemu-pr-helper.service from qemu.git (partial) +- ee08d4e0a3 spec: Use %{_sourcedir} for referencing sources +- 039e7f7d02 spec: Add tools_only +- 884ba71617 spec: %build: Add run_configure helper +- 8ebd864d65 spec: %build: Disable more bits with %{disable_everything} (partial) +- f23fdb53f5 spec: %build: Add macros for some 'configure' parameters +- fe951a8bd8 spec: %files: Move qemu-guest-agent and qemu-img earlier +- 353b632e37 spec: %install: Remove redundant bits +- 9d2015b752 spec: %install: Add %{modprobe_kvm_conf} macro +- 6d05134e8c spec: %install: Remove qemu-guest-agent /etc/qemu-kvm usage +- 985b226467 spec: %install: clean up qemu-ga section +- dfaf9c600d spec: %install: Use a single %{tools_only} section +- f6978ddb46 spec: Make tools_only not cross spec sections +- 071c211098 spec: %install: Limit time spent in %{qemu_kvm_build} +- 1b65c674be spec: misc syntactic merges with Fedora +- 4da16294cf spec: Use Fedora's pattern for specifying rc version +- d7ee259a79 spec: %files: don't use fine grained -docs file list +- 64cad0c60f spec: %files: Add licenses to qemu-common too +- c3de4f080a spec: %install: Drop python3 shebang fixup + +Merged patches (6.1.0 RC2): +- 46fc216115 Update local build to work with spec file improvements + +Merged patches (6.1.0 RC3): +- bab9531548 spec: Remove buildldflags +- c8360ab6a9 spec: Use %make_build macro +- f6966c66e9 spec: Drop make install sharedir and datadir usage +- 86982421bc spec: use %make_install macro +- 191c405d22 spec: parallelize `make check` +- 251a1fb958 spec: Drop explicit --build-id +- 44c7dda6c3 spec: use %{build_ldflags} +- 0009a34354 Move virtiofsd to separate package +- 34d1b200b3 Utilize --firmware configure option + +Merged patches (6.1.0): +- 2800e1dd03 spec: Switch toolchain to Clang/LLVM (except process-patches.sh) +- e8a70f500f spec: Use safe-stack for x86_64 +- e29445d50d spec: Reenable write support for VMDK etc. in tools +- a4fe2a3e16 redhat: Disable LTO on non-x86 architectures + +fixes --- + .distro/85-kvm.preset | 5 - + .distro/Makefile | 100 + + .distro/Makefile.common | 45 + + .distro/README.tests | 39 + + .distro/kvm-setup | 49 - + .distro/kvm-setup.service | 14 - + .distro/modules-load.conf | 4 + + .distro/qemu-guest-agent.service | 1 - + .distro/qemu-kvm.spec.template | 3767 +++++++++++++++++++++++ + .distro/scripts/extract_build_cmd.py | 12 + .gitignore | 1 + README.systemtap | 43 + - configure | 5 - - hw/remote/memory.c | 2 +- - hw/remote/proxy.c | 2 +- - meson.build | 8 +- - redhat/Makefile | 90 + - redhat/Makefile.common | 50 + - redhat/README.tests | 39 + - redhat/qemu-kvm.spec.template | 3609 +++++++++++++++++++++++ - redhat/scripts/extract_build_cmd.py | 5 +- - redhat/scripts/process-patches.sh | 20 +- - redhat/udev-kvm-check.c | 19 +- + configure | 1 - + meson.build | 6 +- scripts/qemu-guest-agent/fsfreeze-hook | 2 +- scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + tests/check-block.sh | 2 + - ui/vnc.c | 2 +- - 18 files changed, 3860 insertions(+), 44 deletions(-) + ui/vnc-auth-sasl.c | 2 +- + 19 files changed, 4024 insertions(+), 74 deletions(-) + delete mode 100644 .distro/85-kvm.preset + create mode 100644 .distro/Makefile + create mode 100644 .distro/Makefile.common + create mode 100644 .distro/README.tests + delete mode 100644 .distro/kvm-setup + delete mode 100644 .distro/kvm-setup.service + create mode 100644 .distro/modules-load.conf + create mode 100644 .distro/qemu-kvm.spec.template create mode 100644 README.systemtap - create mode 100644 redhat/Makefile - create mode 100644 redhat/Makefile.common - create mode 100644 redhat/README.tests - create mode 100644 redhat/qemu-kvm.spec.template create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp @@ -97,21 +197,10 @@ index 0000000000..ad913fc990 +3. Translate the trace record to readable format. + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/configure b/configure -index 4f374b4889..53b2fa583a 100755 +index 9a79a004d7..b0dbdded06 100755 --- a/configure +++ b/configure -@@ -6383,10 +6383,6 @@ fi - mv $cross config-meson.cross - - rm -rf meson-private meson-info meson-logs --unset staticpic --if ! version_ge "$($meson --version)" 0.56.0; then -- staticpic=$(if test "$pie" = yes; then echo true; else echo false; fi) --fi - NINJA=$ninja $meson setup \ - --prefix "$prefix" \ - --libdir "$libdir" \ -@@ -6406,7 +6402,6 @@ NINJA=$ninja $meson setup \ +@@ -5189,7 +5189,6 @@ if test "$skip_meson" = no; then -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \ -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \ -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \ @@ -119,46 +208,19 @@ index 4f374b4889..53b2fa583a 100755 -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \ -Db_lto=$lto -Dcfi=$cfi -Dcfi_debug=$cfi_debug \ -Dmalloc=$malloc -Dmalloc_trim=$malloc_trim -Dsparse=$sparse \ -diff --git a/hw/remote/memory.c b/hw/remote/memory.c -index 32085b1e05..bf0047a81b 100644 ---- a/hw/remote/memory.c -+++ b/hw/remote/memory.c -@@ -43,7 +43,7 @@ void remote_sysmem_reconfig(MPQemuMsg *msg, Error **errp) - remote_sysmem_reset(); - - for (region = 0; region < msg->num_fds; region++) { -- g_autofree char *name; -+ g_autofree char *name = NULL; - subregion = g_new(MemoryRegion, 1); - name = g_strdup_printf("remote-mem-%u", suffix++); - memory_region_init_ram_from_fd(subregion, NULL, -diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c -index 4fa4be079d..253c1aa7ec 100644 ---- a/hw/remote/proxy.c -+++ b/hw/remote/proxy.c -@@ -347,7 +347,7 @@ static void probe_pci_info(PCIDevice *dev, Error **errp) - PCI_BASE_ADDRESS_SPACE_IO : PCI_BASE_ADDRESS_SPACE_MEMORY; - - if (size) { -- g_autofree char *name; -+ g_autofree char *name = NULL; - pdev->region[i].dev = pdev; - pdev->region[i].present = true; - if (type == PCI_BASE_ADDRESS_SPACE_MEMORY) { diff --git a/meson.build b/meson.build -index c6f4b0cf5e..06c15bd6d2 100644 +index b3e7ec0e92..17707c8748 100644 --- a/meson.build +++ b/meson.build @@ -1,6 +1,6 @@ project('qemu', ['c'], meson_version: '>=0.55.0', -- default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto'] + + default_options: ['warning_level=1', 'c_std=gnu11', 'cpp_std=gnu++11', 'b_colorout=auto'] + - (meson.version().version_compare('>=0.56.0') ? [ 'b_staticpic=false' ] : []), -+ default_options: ['warning_level=1', 'c_std=gnu99', 'cpp_std=gnu++11', 'b_colorout=auto', -+ 'b_staticpic=false' ], ++ [ 'b_staticpic=false' ], version: run_command('head', meson.source_root() / 'VERSION').stdout().strip()) not_found = dependency('', required: false) -@@ -1482,7 +1482,9 @@ if capstone_opt == 'internal' +@@ -1790,7 +1790,9 @@ if capstone_opt == 'internal' # Include all configuration defines via a header file, which will wind up # as a dependency on the object file, and thus changes here will result # in a rebuild. @@ -212,19 +274,19 @@ index f86cb863de..6d38340d49 100755 cd tests/qemu-iotests # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests -diff --git a/ui/vnc.c b/ui/vnc.c -index 456db47d71..97ae92b181 100644 ---- a/ui/vnc.c -+++ b/ui/vnc.c -@@ -4146,7 +4146,7 @@ void vnc_display_open(const char *id, Error **errp) +diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c +index 47fdae5b21..2a950caa2a 100644 +--- a/ui/vnc-auth-sasl.c ++++ b/ui/vnc-auth-sasl.c +@@ -42,7 +42,7 @@ - #ifdef CONFIG_VNC_SASL - if (sasl) { -- int saslErr = sasl_server_init(NULL, "qemu"); -+ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + bool vnc_sasl_server_init(Error **errp) + { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); - if (saslErr != SASL_OK) { - error_setg(errp, "Failed to initialize SASL auth: %s", + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", -- 2.27.0 diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch index d80a533..6ef5467 100644 --- a/0005-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,22 +1,41 @@ -From f46ca4c7e719e0a70f8e0ffe3de882c017c216e7 Mon Sep 17 00:00:00 2001 +From 0818ec19b1626de85d061b240e0c369a6fb524fb Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Wed, 2 Sep 2020 09:11:07 +0200 +Date: Thu, 15 Jul 2021 03:22:36 -0400 Subject: Enable/disable devices for RHEL This commit adds all changes related to changes in supported devices. Signed-off-by: Miroslav Rezanina +-- +Rebase notes (210623): +- Added CONFIG_TPM (except s390x) + +Rebase notes (210714): +- default-configs moved to configs + +Rebaes notes (6.1.0 RC2): +- Use --with-device- configure option to use rhel configs + +Merged patches (210526): +- c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak + +Merged patches (6.1.0 RC1): +- 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI +- f2fe835153 aarch64-rh-devices: add CONFIG_PXB +- b5431733ad disable CONFIG_USB_STORAGE_BOT +- 478ba0cdf6 Disable TPM passthrough + +Merged patches (6.1.0): +- 2504d68a7c aarch64: Add USB storage devices +- 51c2a3253c disable ac97 audio --- - .../devices/aarch64-rh-devices.mak | 28 +++++ - default-configs/devices/aarch64-softmmu.mak | 10 +- - default-configs/devices/ppc64-rh-devices.mak | 36 ++++++ - default-configs/devices/ppc64-softmmu.mak | 10 +- - default-configs/devices/rh-virtio.mak | 10 ++ - default-configs/devices/s390x-rh-devices.mak | 16 +++ - default-configs/devices/s390x-softmmu.mak | 4 +- - default-configs/devices/x86_64-rh-devices.mak | 104 ++++++++++++++++++ - default-configs/devices/x86_64-softmmu.mak | 10 +- - .../devices/x86_64-upstream-devices.mak | 4 + + .distro/qemu-kvm.spec.template | 12 ++- + .../aarch64-softmmu/aarch64-rh-devices.mak | 33 ++++++ + .../ppc64-softmmu/ppc64-rh-devices.mak | 35 ++++++ + configs/devices/rh-virtio.mak | 10 ++ + .../s390x-softmmu/s390x-rh-devices.mak | 15 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 102 ++++++++++++++++++ + .../x86_64-upstream-devices.mak | 4 + hw/acpi/ich9.c | 4 +- hw/arm/meson.build | 2 +- hw/block/fdc.c | 10 ++ @@ -30,26 +49,25 @@ Signed-off-by: Miroslav Rezanina hw/timer/hpet.c | 8 ++ hw/usb/meson.build | 2 +- qemu-options.hx | 4 - - redhat/qemu-kvm.spec.template | 5 +- target/arm/cpu_tcg.c | 10 ++ target/ppc/cpu-models.c | 10 ++ - target/s390x/cpu_models.c | 3 + - target/s390x/kvm.c | 8 ++ - 28 files changed, 304 insertions(+), 22 deletions(-) - create mode 100644 default-configs/devices/aarch64-rh-devices.mak - create mode 100644 default-configs/devices/ppc64-rh-devices.mak - create mode 100644 default-configs/devices/rh-virtio.mak - create mode 100644 default-configs/devices/s390x-rh-devices.mak - create mode 100644 default-configs/devices/x86_64-rh-devices.mak - create mode 100644 default-configs/devices/x86_64-upstream-devices.mak + target/s390x/cpu_models_sysemu.c | 3 + + target/s390x/kvm/kvm.c | 8 ++ + 24 files changed, 285 insertions(+), 15 deletions(-) + create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak + create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak + create mode 100644 configs/devices/rh-virtio.mak + create mode 100644 configs/devices/s390x-softmmu/s390x-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak -diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..a4d67274c0 +index 0000000000..cd9c7c5127 --- /dev/null -+++ b/default-configs/devices/aarch64-rh-devices.mak -@@ -0,0 +1,28 @@ -+include rh-virtio.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -0,0 +1,33 @@ ++include ../rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y +CONFIG_ARM_GIC=y @@ -66,6 +84,8 @@ index 0000000000..a4d67274c0 +CONFIG_USB=y +CONFIG_USB_XHCI=y +CONFIG_USB_XHCI_PCI=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y +CONFIG_VFIO=y +CONFIG_VFIO_PCI=y +CONFIG_VIRTIO_MMIO=y @@ -73,36 +93,20 @@ index 0000000000..a4d67274c0 +CONFIG_XIO3130=y +CONFIG_NVDIMM=y +CONFIG_ACPI_APEI=y ++CONFIG_TPM=y +CONFIG_TPM_EMULATOR=y +CONFIG_TPM_TIS_SYSBUS=y +CONFIG_PTIMER=y +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y -diff --git a/default-configs/devices/aarch64-softmmu.mak b/default-configs/devices/aarch64-softmmu.mak -index 958b1e08e4..8f6867d48a 100644 ---- a/default-configs/devices/aarch64-softmmu.mak -+++ b/default-configs/devices/aarch64-softmmu.mak -@@ -1,8 +1,10 @@ - # Default configuration for aarch64-softmmu - - # We support all the 32 bit boards so need all their config --include arm-softmmu.mak -+#include arm-softmmu.mak - --CONFIG_XLNX_ZYNQMP_ARM=y --CONFIG_XLNX_VERSAL=y --CONFIG_SBSA_REF=y -+#CONFIG_XLNX_ZYNQMP_ARM=y -+#CONFIG_XLNX_VERSAL=y -+#CONFIG_SBSA_REF=y -+ -+include aarch64-rh-devices.mak -diff --git a/default-configs/devices/ppc64-rh-devices.mak b/default-configs/devices/ppc64-rh-devices.mak ++CONFIG_PVPANIC_PCI=y ++CONFIG_PXB=y +diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..5b01b7fac0 +index 0000000000..6a3e3f0227 --- /dev/null -+++ b/default-configs/devices/ppc64-rh-devices.mak -@@ -0,0 +1,36 @@ -+include rh-virtio.mak ++++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +@@ -0,0 +1,35 @@ ++include ../rh-virtio.mak + +CONFIG_DIMM=y +CONFIG_MEM_DEVICE=y @@ -119,7 +123,6 @@ index 0000000000..5b01b7fac0 +CONFIG_USB_OHCI=y +CONFIG_USB_OHCI_PCI=y +CONFIG_USB_SMARTCARD=y -+CONFIG_USB_STORAGE_BOT=y +CONFIG_USB_STORAGE_CORE=y +CONFIG_USB_STORAGE_CLASSIC=y +CONFIG_USB_XHCI=y @@ -135,36 +138,14 @@ index 0000000000..5b01b7fac0 +CONFIG_WDT_IB6300ESB=y +CONFIG_XICS=y +CONFIG_XIVE=y ++CONFIG_TPM=y +CONFIG_TPM_SPAPR=y +CONFIG_TPM_EMULATOR=y -+CONFIG_TPM_PASSTHROUGH=y -diff --git a/default-configs/devices/ppc64-softmmu.mak b/default-configs/devices/ppc64-softmmu.mak -index ae0841fa3a..040e5575e7 100644 ---- a/default-configs/devices/ppc64-softmmu.mak -+++ b/default-configs/devices/ppc64-softmmu.mak -@@ -1,11 +1,13 @@ - # Default configuration for ppc64-softmmu - - # Include all 32-bit boards --include ppc-softmmu.mak -+#include ppc-softmmu.mak - - # For PowerNV --CONFIG_POWERNV=y -+#CONFIG_POWERNV=y - - # For pSeries --CONFIG_PSERIES=y --CONFIG_NVDIMM=y -+#CONFIG_PSERIES=y -+#CONFIG_NVDIMM=y -+ -+include ppc64-rh-devices.mak -diff --git a/default-configs/devices/rh-virtio.mak b/default-configs/devices/rh-virtio.mak +diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak new file mode 100644 index 0000000000..94ede1b5f6 --- /dev/null -+++ b/default-configs/devices/rh-virtio.mak ++++ b/configs/devices/rh-virtio.mak @@ -0,0 +1,10 @@ +CONFIG_VIRTIO=y +CONFIG_VIRTIO_BALLOON=y @@ -176,13 +157,13 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_RNG=y +CONFIG_VIRTIO_SCSI=y +CONFIG_VIRTIO_SERIAL=y -diff --git a/default-configs/devices/s390x-rh-devices.mak b/default-configs/devices/s390x-rh-devices.mak +diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak new file mode 100644 -index 0000000000..08a15f3e01 +index 0000000000..d3b38312e1 --- /dev/null -+++ b/default-configs/devices/s390x-rh-devices.mak -@@ -0,0 +1,16 @@ -+include rh-virtio.mak ++++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +@@ -0,0 +1,15 @@ ++include ../rh-virtio.mak + +CONFIG_PCI=y +CONFIG_S390_CCW_VIRTIO=y @@ -190,7 +171,6 @@ index 0000000000..08a15f3e01 +CONFIG_S390_FLIC_KVM=y +CONFIG_SCLPCONSOLE=y +CONFIG_SCSI=y -+CONFIG_TERMINAL3270=y +CONFIG_VFIO=y +CONFIG_VFIO_AP=y +CONFIG_VFIO_CCW=y @@ -198,27 +178,15 @@ index 0000000000..08a15f3e01 +CONFIG_VHOST_USER=y +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y -diff --git a/default-configs/devices/s390x-softmmu.mak b/default-configs/devices/s390x-softmmu.mak -index f2287a133f..3e2e388e91 100644 ---- a/default-configs/devices/s390x-softmmu.mak -+++ b/default-configs/devices/s390x-softmmu.mak -@@ -10,4 +10,6 @@ - - # Boards: - # --CONFIG_S390_CCW_VIRTIO=y -+#CONFIG_S390_CCW_VIRTIO=y -+ -+include s390x-rh-devices.mak -diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..9f41400530 +index 0000000000..24b96ba0c4 --- /dev/null -+++ b/default-configs/devices/x86_64-rh-devices.mak -@@ -0,0 +1,104 @@ -+include rh-virtio.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -0,0 +1,102 @@ ++include ../rh-virtio.mak ++include x86_64-upstream-devices.mak + -+CONFIG_AC97=y +CONFIG_ACPI=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_CPU_HOTPLUG=y @@ -237,6 +205,7 @@ index 0000000000..9f41400530 +CONFIG_E1000_PCI=y +CONFIG_EDU=y +CONFIG_FDC=y ++CONFIG_FDC_SYSBUS=y +CONFIG_FW_CFG_DMA=y +CONFIG_HDA=y +CONFIG_HYPERV=y @@ -278,7 +247,6 @@ index 0000000000..9f41400530 +CONFIG_PVPANIC_ISA=y +CONFIG_PXB=y +CONFIG_Q35=y -+CONFIG_QXL=y +CONFIG_RTL8139_PCI=y +CONFIG_SCSI=y +CONFIG_SERIAL=y @@ -288,13 +256,11 @@ index 0000000000..9f41400530 +CONFIG_SGA=y +CONFIG_SMBIOS=y +CONFIG_SMBUS_EEPROM=y -+CONFIG_SPICE=y +CONFIG_TEST_DEVICES=y +CONFIG_USB=y +CONFIG_USB_EHCI=y +CONFIG_USB_EHCI_PCI=y +CONFIG_USB_SMARTCARD=y -+CONFIG_USB_STORAGE_BOT=y +CONFIG_USB_STORAGE_CORE=y +CONFIG_USB_STORAGE_CLASSIC=y +CONFIG_USB_UHCI=y @@ -316,42 +282,25 @@ index 0000000000..9f41400530 +CONFIG_WDT_IB6300ESB=y +CONFIG_WDT_IB700=y +CONFIG_XIO3130=y ++CONFIG_TPM=y +CONFIG_TPM_CRB=y +CONFIG_TPM_TIS_ISA=y +CONFIG_TPM_EMULATOR=y -+CONFIG_TPM_PASSTHROUGH=y -diff --git a/default-configs/devices/x86_64-softmmu.mak b/default-configs/devices/x86_64-softmmu.mak -index 64b2ee2960..e57bcff7d9 100644 ---- a/default-configs/devices/x86_64-softmmu.mak -+++ b/default-configs/devices/x86_64-softmmu.mak -@@ -1,3 +1,11 @@ - # Default configuration for x86_64-softmmu - --include i386-softmmu.mak -+#include i386-softmmu.mak -+ -+include x86_64-rh-devices.mak -+ -+# -+# RHEL: this is for the limited upstream machine type support, so to export -+# some more devices than what RHEL machines have. -+# -+include x86_64-upstream-devices.mak -diff --git a/default-configs/devices/x86_64-upstream-devices.mak b/default-configs/devices/x86_64-upstream-devices.mak +diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak new file mode 100644 index 0000000000..2cd20f54d2 --- /dev/null -+++ b/default-configs/devices/x86_64-upstream-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak @@ -0,0 +1,4 @@ +# We need "isa-parallel" +CONFIG_PARALLEL=y +# We need "hpet" +CONFIG_HPET=y diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 853447cf9d..7f01fad64c 100644 +index 778e27b659..802501881f 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -374,8 +374,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) +@@ -424,8 +424,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; pm->acpi_memory_hotplug.is_enabled = true; pm->cpu_hotplug_legacy = true; @@ -360,13 +309,13 @@ index 853447cf9d..7f01fad64c 100644 + pm->disable_s3 = 1; + pm->disable_s4 = 1; pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; - object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index be39117b9b..6fcc5ede50 100644 +index 721a8eb8be..87ed4dd914 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build -@@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) +@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) @@ -376,19 +325,19 @@ index be39117b9b..6fcc5ede50 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index a825c2acba..c62927bb3a 100644 +index 9014cd30b3..46cee8c544 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -49,6 +49,8 @@ - #include "trace.h" +@@ -48,6 +48,8 @@ #include "qom/object.h" + #include "fdc-internal.h" +#include "hw/boards.h" + /********************************************************/ /* debug Floppy devices */ -@@ -2554,6 +2556,14 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, +@@ -2320,6 +2322,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) FDrive *drive; static int command_tables_inited = 0; @@ -459,10 +408,10 @@ index fdca6ca659..fa1a7eee51 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index b9860e35a5..beb1ea6c46 100644 +index d3e738320b..7c77312463 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -220,7 +220,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -472,7 +421,7 @@ index b9860e35a5..beb1ea6c46 100644 } static const TypeInfo piix3_ide_info = { -@@ -249,6 +250,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -482,10 +431,10 @@ index b9860e35a5..beb1ea6c46 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index dde85ba6c6..62cf60c9c9 100644 +index baba62f357..bc360347ea 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -597,6 +597,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -796,6 +796,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; isa->build_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -495,10 +444,10 @@ index dde85ba6c6..62cf60c9c9 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 4f75b44cfc..6f075fe235 100644 +index a30546c5d5..c2877978d9 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1797,6 +1797,7 @@ static const E1000Info e1000_devices[] = { +@@ -1814,6 +1814,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -506,7 +455,7 @@ index 4f75b44cfc..6f075fe235 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1809,6 +1810,7 @@ static const E1000Info e1000_devices[] = { +@@ -1826,6 +1827,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -551,11 +500,11 @@ index 9520471be2..202e032524 100644 warn_report("Hpet's intcap not initialized"); } diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index fb7a74e73a..674993aa4f 100644 +index de853d780d..0776ae6a20 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build -@@ -55,7 +55,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade - if config_host.has_key('CONFIG_SMARTCARD') +@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade + if cacard.found() usbsmartcard_ss = ss.source_set() usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', - if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')]) @@ -564,10 +513,10 @@ index fb7a74e73a..674993aa4f 100644 endif diff --git a/qemu-options.hx b/qemu-options.hx -index fd21002bd6..0d4fb61bf7 100644 +index 83aa59a920..ac596e01a1 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -2327,10 +2327,6 @@ ERST +@@ -2412,10 +2412,6 @@ ERST DEF("no-hpet", 0, QEMU_OPTION_no_hpet, "-no-hpet disable HPET\n", QEMU_ARCH_I386) @@ -579,7 +528,7 @@ index fd21002bd6..0d4fb61bf7 100644 DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 046e476f65..c3cd0ca039 100644 +index ed444bf436..fafd019539 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c @@ -22,6 +22,7 @@ @@ -622,7 +571,7 @@ index 046e476f65..c3cd0ca039 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -927,6 +932,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -630,7 +579,7 @@ index 046e476f65..c3cd0ca039 100644 #ifndef TARGET_AARCH64 /* -@@ -1004,6 +1010,7 @@ static void arm_max_initfn(Object *obj) +@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -638,7 +587,7 @@ index 046e476f65..c3cd0ca039 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1019,7 +1026,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -648,7 +597,7 @@ index 046e476f65..c3cd0ca039 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1050,6 +1059,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -726,11 +675,11 @@ index 87e4228614..6eaa65efff 100644 +#endif { NULL, NULL } }; -diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 050dcf2d42..9254ff46bf 100644 ---- a/target/s390x/cpu_models.c -+++ b/target/s390x/cpu_models.c -@@ -430,6 +430,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 05c3ccaaff..6a04ccab1b 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -740,11 +689,11 @@ index 050dcf2d42..9254ff46bf 100644 } /* detect missing features if any to properly report them */ -diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c -index 4fb3bbfef5..6c69d84b84 100644 ---- a/target/s390x/kvm.c -+++ b/target/s390x/kvm.c -@@ -2516,6 +2516,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 5b1fdb55c4..c52434985b 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2508,6 +2508,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } diff --git a/0006-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch index 5c503bc..3c5c4df 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 80e9b92048e6fe7c7aef0e64cbc0f855bd3a6272 Mon Sep 17 00:00:00 2001 +From fd2a04cf2221d8c541a70a66021c12a9b9c93aaa Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -8,72 +8,43 @@ split to allow easier review. It contains changes not related to any architecture. Signed-off-by: Miroslav Rezanina + +Merged patches (6.1.0 RC1): +- f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 +- 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 + +Merged patches (6.1.0): +- a3995e2eff Remove RHEL 7.0.0 machine type (only generic changes) +- ad3190a79b Remove RHEL 7.1.0 machine type (only generic changes) +- 84bbe15d4e Remove RHEL 7.2.0 machine type (only generic changes) +- 0215eb3356 Remove RHEL 7.3.0 machine types (only generic changes) +- af69d1ca6e Remove RHEL 7.4.0 machine types (only generic changes) +- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only generic changes) --- - hw/acpi/ich9.c | 15 +++ hw/acpi/piix4.c | 5 +- hw/arm/virt.c | 2 +- - hw/char/serial.c | 16 +++ - hw/core/machine.c | 251 +++++++++++++++++++++++++++++++++++ + hw/char/serial.c | 4 + + hw/core/machine.c | 159 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + - hw/net/e1000e.c | 21 +++ hw/net/rtl8139.c | 4 +- - hw/rtc/mc146818rtc.c | 6 + - hw/smbios/smbios.c | 46 ++++++- + hw/rtc/mc146818rtc.c | 2 + + hw/smbios/smbios.c | 46 +++++++++- hw/timer/i8254_common.c | 2 +- hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci-pci.c | 59 ++++++-- + hw/usb/hcd-xhci-pci.c | 59 ++++++++++--- hw/usb/hcd-xhci-pci.h | 1 + - hw/usb/hcd-xhci.c | 20 +++ - hw/usb/hcd-xhci.h | 2 + - include/hw/acpi/ich9.h | 3 + - include/hw/boards.h | 33 +++++ + include/hw/boards.h | 18 ++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - include/hw/usb.h | 3 + - migration/migration.c | 2 + - migration/migration.h | 5 + - 25 files changed, 489 insertions(+), 25 deletions(-) + 17 files changed, 295 insertions(+), 25 deletions(-) -diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 7f01fad64c..33b0c6e33c 100644 ---- a/hw/acpi/ich9.c -+++ b/hw/acpi/ich9.c -@@ -369,6 +369,18 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) - s->pm.enable_tco = value; - } - -+static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) -+{ -+ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); -+ return s->pm.force_rev1_fadt; -+} -+ -+static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) -+{ -+ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); -+ s->pm.force_rev1_fadt = value; -+} -+ - void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) - { - static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; -@@ -391,6 +403,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) - object_property_add_bool(obj, "cpu-hotplug-legacy", - ich9_pm_get_cpu_hotplug_legacy, - ich9_pm_set_cpu_hotplug_legacy); -+ object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", -+ ich9_pm_get_force_rev1_fadt, -+ ich9_pm_set_force_rev1_fadt); - object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, - &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); - object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 8f8b0e95e5..9865d1a349 100644 +index 48f7a1edbc..af21cb4ac3 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -278,6 +278,7 @@ static const VMStateDescription vmstate_acpi = { +@@ -279,6 +279,7 @@ static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, .minimum_version_id = 3, @@ -81,7 +52,7 @@ index 8f8b0e95e5..9865d1a349 100644 .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -643,8 +644,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -644,8 +645,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -93,20 +64,20 @@ index 8f8b0e95e5..9865d1a349 100644 DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 9f01d9041b..f904d3e98e 100644 +index 81eda46b0b..cd1a2d985d 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1522,7 +1522,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1524,7 +1524,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, - true, SMBIOS_ENTRY_POINT_30); + true, NULL, NULL, SMBIOS_ENTRY_POINT_30); - smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, - &smbios_anchor, &smbios_anchor_len); + smbios_get_tables(MACHINE(vms), NULL, 0, + &smbios_tables, &smbios_tables_len, diff --git a/hw/char/serial.c b/hw/char/serial.c -index bc2e322970..cc378142a3 100644 +index 7061aacbce..8fa5ab1e8e 100644 --- a/hw/char/serial.c +++ b/hw/char/serial.c @@ -37,6 +37,7 @@ @@ -117,54 +88,35 @@ index bc2e322970..cc378142a3 100644 #define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ -@@ -689,6 +690,9 @@ static int serial_post_load(void *opaque, int version_id) - static bool serial_thr_ipending_needed(void *opaque) - { - SerialState *s = opaque; -+ if (migrate_pre_2_2) { -+ return false; -+ } - - if (s->ier & UART_IER_THRI) { - bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); -@@ -770,6 +774,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { +@@ -770,6 +771,7 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { static bool serial_fifo_timeout_timer_needed(void *opaque) { SerialState *s = (SerialState *)opaque; -+ if (migrate_pre_2_2) { -+ return false; -+ } + return timer_pending(s->fifo_timeout_timer); } -@@ -787,6 +795,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { +@@ -787,6 +789,7 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { static bool serial_timeout_ipending_needed(void *opaque) { SerialState *s = (SerialState *)opaque; -+ if (migrate_pre_2_2) { -+ return false; -+ } + return s->timeout_ipending != 0; } -@@ -804,6 +816,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { +@@ -804,6 +807,7 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { static bool serial_poll_needed(void *opaque) { SerialState *s = (SerialState *)opaque; -+ if (migrate_pre_2_2) { -+ return false; -+ } + return s->poll_msl >= 0; } diff --git a/hw/core/machine.c b/hw/core/machine.c -index 40def78183..848e7fdff6 100644 +index 54e040587d..d681a06a47 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -36,6 +36,257 @@ +@@ -37,6 +37,165 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" @@ -176,6 +128,10 @@ index 40def78183..848e7fdff6 100644 + { "ICH9-LPC", "smm-compat", "on"}, + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ + { "PIIX4_PM", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-blk-device", "report-discard-granularity", "off" }, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-net-pci", "vectors", "3"}, +}; +const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); + @@ -197,6 +153,8 @@ index 40def78183..848e7fdff6 100644 + { "nvme", "use-intel-id", "on"}, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ + { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pl011", "migrate-clk", "off" }, + /* hw_compat_rhel_8_3 bz 1912846 */ + { "pci-xhci", "x-rh-late-msi-cap", "off" }, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ @@ -321,107 +279,9 @@ index 40def78183..848e7fdff6 100644 +}; +const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + -+/* The same as hw_compat_2_11 + hw_compat_2_10 */ -+GlobalProperty hw_compat_rhel_7_5[] = { -+ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ -+ { "hpet", "hpet-offset-saved", "false" }, -+ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ -+ { "virtio-blk-pci", "vectors", "2" }, -+ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ -+ { "vhost-user-blk-pci", "vectors", "2" }, -+ /* hw_compat_rhel_7_5 from hw_compat_2_11 -+ bz 1608778 modified for our naming */ -+ { "e1000-82540em", "migrate_tso_props", "off" }, -+ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ -+ { "virtio-mouse-device", "wheel-axis", "false" }, -+ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ -+ { "virtio-tablet-device", "wheel-axis", "false" }, -+ { "cirrus-vga", "vgamem_mb", "16" }, -+ { "migration", "decompress-error-check", "off" }, -+}; -+const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); -+ -+/* Mostly like hw_compat_2_9 except -+ * x-mtu-bypass-backend, x-migrate-msix has already been -+ * backported to RHEL7.4. shpc was already on in 7.4. -+ */ -+GlobalProperty hw_compat_rhel_7_4[] = { -+ { "intel-iommu", "pt", "off" }, -+}; -+ -+const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); -+/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except -+ * disable-modern, disable-legacy, page-per-vq have already been -+ * backported to RHEL7.3 -+ */ -+GlobalProperty hw_compat_rhel_7_3[] = { -+ { "virtio-mmio", "format_transport_address", "off" }, -+ { "virtio-serial-device", "emergency-write", "off" }, -+ { "ioapic", "version", "0x11" }, -+ { "intel-iommu", "x-buggy-eim", "true" }, -+ { "virtio-pci", "x-ignore-backend-features", "on" }, -+ { "fw_cfg_mem", "x-file-slots", stringify(0x10) }, -+ { "fw_cfg_io", "x-file-slots", stringify(0x10) }, -+ { "pflash_cfi01", "old-multiple-chip-handling", "on" }, -+ { TYPE_PCI_DEVICE, "x-pcie-extcap-init", "off" }, -+ { "virtio-pci", "x-pcie-deverr-init", "off" }, -+ { "virtio-pci", "x-pcie-lnkctl-init", "off" }, -+ { "virtio-pci", "x-pcie-pm-init", "off" }, -+ { "virtio-net-device", "x-mtu-bypass-backend", "off" }, -+ { "e1000e", "__redhat_e1000e_7_3_intr_state", "on" }, -+}; -+const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); -+ -+/* Mostly like hw_compat_2_4 + 2_3 but: -+ * we don't need "any_layout" as it has been backported to 7.2 -+ */ -+GlobalProperty hw_compat_rhel_7_2[] = { -+ { "virtio-blk-device", "scsi", "true" }, -+ { "e1000-82540em", "extra_mac_registers", "off" }, -+ { "virtio-pci", "x-disable-pcie", "on" }, -+ { "virtio-pci", "migrate-extra", "off" }, -+ { "fw_cfg_mem", "dma_enabled", "off" }, -+ { "fw_cfg_io", "dma_enabled", "off" }, -+ { "isa-fdc", "fallback", "144" }, -+ /* Optional because not all virtio-pci devices support legacy mode */ -+ { "virtio-pci", "disable-modern", "on", .optional = true }, -+ { "virtio-pci", "disable-legacy", "off", .optional = true }, -+ { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, -+ { "virtio-pci", "page-per-vq", "on" }, -+ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ -+ { "migration", "send-section-footer", "off" }, -+ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ -+ { "migration", "store-global-state", "off", -+ }, -+}; -+const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); -+ -+/* Mostly like hw_compat_2_1 but: -+ * we don't need virtio-scsi-pci since 7.0 already had that on -+ * -+ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 -+ * machine type, but was accidentally turned off in 7.2 onwards. -+ */ -+GlobalProperty hw_compat_rhel_7_1[] = { -+ { "intel-hda-generic", "old_msi_addr", "on" }, -+ { "VGA", "qemu-extended-regs", "off" }, -+ { "secondary-vga", "qemu-extended-regs", "off" }, -+ { "usb-mouse", "usb_version", stringify(1) }, -+ { "usb-kbd", "usb_version", stringify(1) }, -+ { "virtio-pci", "virtio-pci-bus-master-bug-migration", "on" }, -+ { "virtio-blk-pci", "any_layout", "off" }, -+ { "virtio-balloon-pci", "any_layout", "off" }, -+ { "virtio-serial-pci", "any_layout", "off" }, -+ { "virtio-9p-pci", "any_layout", "off" }, -+ { "virtio-rng-pci", "any_layout", "off" }, -+ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ -+ { "migration", "send-configuration", "off" }, -+}; -+const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); -+ - GlobalProperty hw_compat_5_2[] = { - { "ICH9-LPC", "smm-compat", "on"}, - { "PIIX4_PM", "smm-compat", "on"}, + GlobalProperty hw_compat_6_0[] = { + { "gpex-pcihost", "allow-unmapped-accesses", "false" }, + { "i8042", "extended-state", "false"}, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 90851e730b..a91c5d7467 100644 --- a/hw/display/vga-isa.c @@ -436,10 +296,10 @@ index 90851e730b..a91c5d7467 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 46cc951073..62433d8022 100644 +index 30b8bd6ea9..eebb4f3141 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -179,6 +179,8 @@ static void pc_init1(MachineState *machine, +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -449,10 +309,10 @@ index 46cc951073..62433d8022 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 53450190f5..fce52ca70b 100644 +index 04b4a4788d..e7724fd02c 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -198,6 +198,8 @@ static void pc_q35_init(MachineState *machine) +@@ -199,6 +199,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -461,80 +321,6 @@ index 53450190f5..fce52ca70b 100644 SMBIOS_ENTRY_POINT_21); } -diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index a8a77eca95..6d39c1f1c4 100644 ---- a/hw/net/e1000e.c -+++ b/hw/net/e1000e.c -@@ -80,6 +80,11 @@ struct E1000EState { - - E1000ECore core; - -+ /* 7.3 had the intr_state field that was in the original e1000e code -+ * but that was removed prior to 2.7's release -+ */ -+ bool redhat_7_3_intr_state_enable; -+ uint32_t redhat_7_3_intr_state; - }; - - #define E1000E_MMIO_IDX 0 -@@ -95,6 +100,10 @@ struct E1000EState { - #define E1000E_MSIX_TABLE (0x0000) - #define E1000E_MSIX_PBA (0x2000) - -+/* Values as in RHEL 7.3 build and original upstream */ -+#define RH_E1000E_USE_MSI BIT(0) -+#define RH_E1000E_USE_MSIX BIT(1) -+ - static uint64_t - e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) - { -@@ -306,6 +315,8 @@ e1000e_init_msix(E1000EState *s) - } else { - if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { - msix_uninit(d, &s->msix, &s->msix); -+ } else { -+ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; - } - } - } -@@ -477,6 +488,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) - ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); - if (ret) { - trace_e1000e_msi_init_fail(ret); -+ } else { -+ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; - } - - if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, -@@ -600,6 +613,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { - VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ - e1000e_vmstate_intr_timer, E1000IntrDelayTimer) - -+static bool rhel_7_3_check(void *opaque, int version_id) -+{ -+ return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; -+} -+ - static const VMStateDescription e1000e_vmstate = { - .name = "e1000e", - .version_id = 1, -@@ -611,6 +629,7 @@ static const VMStateDescription e1000e_vmstate = { - VMSTATE_MSIX(parent_obj, E1000EState), - - VMSTATE_UINT32(ioaddr, E1000EState), -+ VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), - VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), - VMSTATE_UINT8(core.rx_desc_len, E1000EState), - VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, -@@ -659,6 +678,8 @@ static PropertyInfo e1000e_prop_disable_vnet, - - static Property e1000e_properties[] = { - DEFINE_NIC_PROPERTIES(E1000EState, conf), -+ DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, -+ redhat_7_3_intr_state_enable, false), - DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, - e1000e_prop_disable_vnet, bool), - DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index 90b4fc63ce..3ffb9dd22c 100644 --- a/hw/net/rtl8139.c @@ -559,34 +345,30 @@ index 90b4fc63ce..3ffb9dd22c 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 5d0fcacd0c..4a2e52031b 100644 +index 4fbafddb22..6c42cc22cd 100644 --- a/hw/rtc/mc146818rtc.c +++ b/hw/rtc/mc146818rtc.c -@@ -44,6 +44,7 @@ +@@ -43,6 +43,7 @@ + #include "qapi/qapi-events-misc-target.h" #include "qapi/visitor.h" - #include "exec/address-spaces.h" #include "hw/rtc/mc146818rtc_regs.h" +#include "migration/migration.h" #ifdef TARGET_I386 #include "qapi/qapi-commands-misc-target.h" -@@ -822,6 +823,11 @@ static int rtc_post_load(void *opaque, int version_id) +@@ -821,6 +822,7 @@ static int rtc_post_load(void *opaque, int version_id) static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) { RTCState *s = (RTCState *)opaque; -+ -+ if (migrate_pre_2_2) { -+ return false; -+ } + return s->irq_reinject_on_ack_count != 0; } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index f22c4f5b73..a305a4bcea 100644 +index 7397e56737..3a4bb894ba 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -56,6 +56,9 @@ static bool smbios_legacy = true; +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; static bool smbios_uuid_encoded = true; /* end: legacy structures & constants for <= 2.0 machines */ @@ -596,7 +378,7 @@ index f22c4f5b73..a305a4bcea 100644 uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -570,7 +573,7 @@ static void smbios_build_type_1_table(void) +@@ -619,7 +622,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -605,7 +387,7 @@ index f22c4f5b73..a305a4bcea 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -792,7 +795,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -888,7 +891,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -617,7 +399,7 @@ index f22c4f5b73..a305a4bcea 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -813,11 +819,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -909,11 +915,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -698,10 +480,10 @@ index 0cb02a6432..962a9622e5 100644 if (s->masterbus) { USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c -index 9421734d0f..9bfe236a7d 100644 +index e934b1a5b1..e18b05e528 100644 --- a/hw/usb/hcd-xhci-pci.c +++ b/hw/usb/hcd-xhci-pci.c -@@ -101,6 +101,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) +@@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) return 0; } @@ -735,7 +517,7 @@ index 9421734d0f..9bfe236a7d 100644 static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) { int ret; -@@ -122,23 +149,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) +@@ -125,23 +152,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) s->xhci.nec_quirks = true; } @@ -762,7 +544,7 @@ index 9421734d0f..9bfe236a7d 100644 } pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | -@@ -151,6 +167,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) +@@ -154,6 +170,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) assert(ret > 0); } @@ -777,7 +559,7 @@ index 9421734d0f..9bfe236a7d 100644 if (s->msix != ON_OFF_AUTO_OFF) { /* TODO check for errors, and should fail when msix=on */ msix_init(dev, s->xhci.numintrs, -@@ -195,11 +219,18 @@ static void xhci_instance_init(Object *obj) +@@ -198,11 +222,18 @@ static void xhci_instance_init(Object *obj) qdev_alias_all_properties(DEVICE(&s->xhci), obj); } @@ -808,79 +590,11 @@ index c193f79443..086a1feb1e 100644 } XHCIPciState; #endif -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 46212b1e69..6d1f278aad 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -3490,9 +3490,27 @@ static const VMStateDescription vmstate_xhci_slot = { - } - }; - -+static int xhci_event_pre_save(void *opaque) -+{ -+ XHCIEvent *s = opaque; -+ -+ s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; -+ s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; -+ -+ return 0; -+} -+ -+bool migrate_cve_2014_5263_xhci_fields; -+ -+static bool xhci_event_cve_2014_5263(void *opaque, int version_id) -+{ -+ return migrate_cve_2014_5263_xhci_fields; -+} -+ - static const VMStateDescription vmstate_xhci_event = { - .name = "xhci-event", - .version_id = 1, -+ .pre_save = xhci_event_pre_save, - .fields = (VMStateField[]) { - VMSTATE_UINT32(type, XHCIEvent), - VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3501,6 +3519,8 @@ static const VMStateDescription vmstate_xhci_event = { - VMSTATE_UINT32(flags, XHCIEvent), - VMSTATE_UINT8(slotid, XHCIEvent), - VMSTATE_UINT8(epid, XHCIEvent), -+ VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), -+ VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), - VMSTATE_END_OF_LIST() - } - }; -diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index 7bba361f3b..f450ffd13b 100644 ---- a/hw/usb/hcd-xhci.h -+++ b/hw/usb/hcd-xhci.h -@@ -149,6 +149,8 @@ typedef struct XHCIEvent { - uint32_t flags; - uint8_t slotid; - uint8_t epid; -+ uint8_t cve_2014_5263_a; -+ uint8_t cve_2014_5263_b; - } XHCIEvent; - - typedef struct XHCIInterrupter { -diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index df519e40b5..e1ecfbaf1f 100644 ---- a/include/hw/acpi/ich9.h -+++ b/include/hw/acpi/ich9.h -@@ -62,6 +62,9 @@ typedef struct ICH9LPCPMRegs { - bool smm_compat; - bool enable_tco; - TCOIORegs tco_regs; -+ -+ /* RH addition, see bz 1489800 */ -+ bool force_rev1_fadt; - } ICH9LPCPMRegs; - - #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" diff --git a/include/hw/boards.h b/include/hw/boards.h -index ad6c8fd537..2d7a65724a 100644 +index accd6eff35..f5423f351a 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -413,4 +413,37 @@ extern const size_t hw_compat_2_2_len; +@@ -416,4 +416,22 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -901,28 +615,13 @@ index ad6c8fd537..2d7a65724a 100644 + +extern GlobalProperty hw_compat_rhel_7_6[]; +extern const size_t hw_compat_rhel_7_6_len; -+ -+extern GlobalProperty hw_compat_rhel_7_5[]; -+extern const size_t hw_compat_rhel_7_5_len; -+ -+extern GlobalProperty hw_compat_rhel_7_4[]; -+extern const size_t hw_compat_rhel_7_4_len; -+ -+extern GlobalProperty hw_compat_rhel_7_3[]; -+extern const size_t hw_compat_rhel_7_3_len; -+ -+extern GlobalProperty hw_compat_rhel_7_2[]; -+extern const size_t hw_compat_rhel_7_2_len; -+ -+extern GlobalProperty hw_compat_rhel_7_1[]; -+extern const size_t hw_compat_rhel_7_1_len; + #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 02a0ced0a0..67e38a1b13 100644 +index 5a0dd0c8cf..2cb1ec2bab 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h -@@ -267,7 +267,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); +@@ -278,7 +278,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); void smbios_set_cpuid(uint32_t version, uint32_t features); void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -935,7 +634,7 @@ index 02a0ced0a0..67e38a1b13 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index dcf060b791..93c012ac95 100644 +index 88dffe7517..17ad7bfc3a 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -107,6 +107,9 @@ struct PCMachineClass { @@ -948,47 +647,6 @@ index dcf060b791..93c012ac95 100644 /* RAM / address space compat: */ bool gigabyte_align; -diff --git a/include/hw/usb.h b/include/hw/usb.h -index 436e07b304..edb2cd94b6 100644 ---- a/include/hw/usb.h -+++ b/include/hw/usb.h -@@ -577,4 +577,7 @@ void usb_pcap_init(FILE *fp); - void usb_pcap_ctrl(USBPacket *p, bool setup); - void usb_pcap_data(USBPacket *p, bool setup); - -+/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ -+extern bool migrate_cve_2014_5263_xhci_fields; -+ - #endif -diff --git a/migration/migration.c b/migration/migration.c -index 8ca034136b..4afc6069b6 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -167,6 +167,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, - MIGRATION_CAPABILITY_X_COLO, - MIGRATION_CAPABILITY_VALIDATE_UUID); - -+bool migrate_pre_2_2; -+ - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add - dynamic creation of migration */ -diff --git a/migration/migration.h b/migration/migration.h -index db6708326b..1b6c69751c 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -368,6 +368,11 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, - void migrate_add_address(SocketAddress *address); - - int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); -+/* -+ * Disables a load of subsections that were added in 2.2/rh7.2 for backwards -+ * migration compatibility. -+ */ -+extern bool migrate_pre_2_2; - - #define qemu_ram_foreach_block \ - #warning "Use foreach_not_ignored_block in migration code" -- 2.27.0 diff --git a/0007-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch index ae95071..eb79d4f 100644 --- a/0007-Add-aarch64-machine-types.patch +++ b/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From ee8aeb6b79bde21b581090c479faf10e716a7e6d Mon Sep 17 00:00:00 2001 +From 38c87d1469539d2b5224d298db85b0fd43d8bb0c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -7,15 +7,25 @@ Adding changes to add RHEL machine types for aarch64 architecture. Signed-off-by: Miroslav Rezanina --- - hw/arm/virt.c | 211 +++++++++++++++++++++++++++++++++++++++++- +Rebase notes (210623): +- Use CONFIG_TPM check when using TPM structures + +Rebase notes (6.1.0 rc0): +- Add support for default_bus_bypass_iommu + +Merged patches (6.1.0 rc1): +- ea4c0b32d9 arm/virt: Register highmem and gic-version as class properties +- 895e1fa86a hw/arm/virt: Add 8.5 and 9.0 machine types and remove older ones +--- + hw/arm/virt.c | 210 +++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ - 2 files changed, 218 insertions(+), 1 deletion(-) + 2 files changed, 217 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index f904d3e98e..080cf54ef1 100644 +index cd1a2d985d..3c8e6de36d 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -80,6 +80,7 @@ +@@ -78,6 +78,7 @@ #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -23,7 +33,7 @@ index f904d3e98e..080cf54ef1 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -106,7 +107,48 @@ +@@ -104,7 +105,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -73,7 +83,7 @@ index f904d3e98e..080cf54ef1 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -2113,6 +2155,7 @@ static void machvirt_init(MachineState *machine) +@@ -2117,6 +2159,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -81,7 +91,7 @@ index f904d3e98e..080cf54ef1 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2140,6 +2183,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2144,6 +2187,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -89,7 +99,7 @@ index f904d3e98e..080cf54ef1 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2237,6 +2281,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, +@@ -2241,6 +2285,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, visit_type_OnOffAuto(v, name, &vms->acpi, errp); } @@ -97,7 +107,7 @@ index f904d3e98e..080cf54ef1 100644 static bool virt_get_ras(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2264,6 +2309,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2268,6 +2313,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -105,7 +115,7 @@ index f904d3e98e..080cf54ef1 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2584,6 +2630,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2603,6 +2649,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -113,7 +123,7 @@ index f904d3e98e..080cf54ef1 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2910,3 +2957,165 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -2948,3 +2995,164 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -128,7 +138,9 @@ index f904d3e98e..080cf54ef1 100644 + mc->init = machvirt_init; + /* Maximum supported VCPU count for all virt-rhel* machines */ + mc->max_cpus = 384; ++#ifdef CONFIG_TPM + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); ++#endif + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; + mc->pci_allow_0_address = true; @@ -156,6 +168,18 @@ index f904d3e98e..080cf54ef1 100644 + object_class_property_set_description(oc, "acpi", + "Enable ACPI"); + ++ object_class_property_add_bool(oc, "highmem", virt_get_highmem, ++ virt_set_highmem); ++ object_class_property_set_description(oc, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits"); ++ ++ object_class_property_add_str(oc, "gic-version", virt_get_gic_version, ++ virt_set_gic_version); ++ object_class_property_set_description(oc, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3, host and max"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); @@ -164,7 +188,6 @@ index f904d3e98e..080cf54ef1 100644 + "in ACPI table header." + "The string may be up to 6 bytes in size"); + -+ + object_class_property_add_str(oc, "x-oem-table-id", + virt_get_oem_table_id, + virt_set_oem_table_id); @@ -172,6 +195,13 @@ index f904d3e98e..080cf54ef1 100644 + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ "Set on/off to enable/disable " ++ "bypass_iommu for default root bus"); ++ +} + +static void rhel_virt_instance_init(Object *obj) @@ -187,17 +217,7 @@ index f904d3e98e..080cf54ef1 100644 + + /* High memory is enabled by default */ + vms->highmem = true; -+ object_property_add_bool(obj, "highmem", virt_get_highmem, -+ virt_set_highmem); -+ object_property_set_description(obj, "highmem", -+ "Set on/off to enable/disable using " -+ "physical address space above 32 bits"); + vms->gic_version = VIRT_GIC_VERSION_NOSEL; -+ object_property_add_str(obj, "gic-version", virt_get_gic_version, -+ virt_set_gic_version); -+ object_property_set_description(obj, "gic-version", -+ "Set GIC version. " -+ "Valid values are 2, 3, host and max"); + + vms->highmem_ecam = !vmc->no_highmem_ecam; + @@ -226,6 +246,9 @@ index f904d3e98e..080cf54ef1 100644 + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + ++ /* The default root bus is attached to iommu by default */ ++ vms->default_bus_bypass_iommu = false; ++ + vms->irqmap = a15irqmap; + + virt_flash_create(vms); @@ -254,36 +277,22 @@ index f904d3e98e..080cf54ef1 100644 +} +type_init(rhel_machine_init); + -+static void rhel840_virt_options(MachineClass *mc) ++static void rhel900_virt_options(MachineClass *mc) +{ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); +} -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 4, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) + -+static void rhel830_virt_options(MachineClass *mc) ++static void rhel850_virt_options(MachineClass *mc) +{ -+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); -+ -+ rhel840_virt_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); -+ vmc->no_kvm_steal_time = true; ++ rhel900_virt_options(mc); +} -+DEFINE_RHEL_MACHINE(8, 3, 0) -+ -+static void rhel820_virt_options(MachineClass *mc) -+{ -+ rhel830_virt_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); -+ mc->numa_mem_supported = true; -+ mc->auto_enable_numa_with_memdev = false; -+} -+DEFINE_RHEL_MACHINE(8, 2, 0) ++DEFINE_RHEL_MACHINE(8, 5, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 921416f918..6c34864a0a 100644 +index 9661c46699..ac21ca712a 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -170,9 +170,17 @@ struct VirtMachineState { +@@ -171,9 +171,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) diff --git a/0008-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch index 4504703..932a2a8 100644 --- a/0008-Add-ppc64-machine-types.patch +++ b/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From d70214aa1d8bf7aae9ef3a6bbc04f01735722e3c Mon Sep 17 00:00:00 2001 +From 01fe9632c4dcc7a6ce9c0100db6256e815e6bf87 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -6,21 +6,32 @@ Subject: Add ppc64 machine types Adding changes to add RHEL machine types for ppc64 architecture. Signed-off-by: Miroslav Rezanina + +Merged patches (6.1.0 RC1): +- c438c25ac3 redhat: Define pseries-rhel8.5.0 machine type + +Merged patches (6.1.0): +- a3995e2eff Remove RHEL 7.0.0 machine type (only ppc64 changes) +- ad3190a79b Remove RHEL 7.1.0 machine type (only ppc64 changes) +- 84bbe15d4e Remove RHEL 7.2.0 machine type (only ppc64 changes) +- 0215eb3356 Remove RHEL 7.3.0 machine types (only ppc64 changes) +- af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes) +- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes) --- - hw/ppc/spapr.c | 368 ++++++++++++++++++++++++++++++++++++++++ - hw/ppc/spapr_cpu_core.c | 13 ++ + hw/ppc/spapr.c | 238 +++++++++++++++++++++++++++++++++++++++- + hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 4 + - target/ppc/compat.c | 13 +- + target/ppc/compat.c | 13 ++- target/ppc/cpu.h | 1 + - target/ppc/kvm.c | 27 +++ - target/ppc/kvm_ppc.h | 13 ++ - 7 files changed, 438 insertions(+), 1 deletion(-) + target/ppc/kvm.c | 28 +++++ + target/ppc/kvm_ppc.h | 13 +++ + 7 files changed, 307 insertions(+), 3 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index e4be00b732..f9e8dfdfc9 100644 +index 81699d4f8b..1386e45e70 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1568,6 +1568,9 @@ static void spapr_machine_reset(MachineState *machine) +@@ -1592,6 +1592,9 @@ static void spapr_machine_reset(MachineState *machine) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -30,7 +41,7 @@ index e4be00b732..f9e8dfdfc9 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3254,6 +3257,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3315,6 +3318,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -51,7 +62,7 @@ index e4be00b732..f9e8dfdfc9 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3327,6 +3344,12 @@ static void spapr_instance_init(Object *obj) +@@ -3393,6 +3410,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -64,23 +75,23 @@ index e4be00b732..f9e8dfdfc9 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4554,6 +4577,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) - smc->smp_threads_vsmt = true; - smc->nr_xirqs = SPAPR_NR_XIRQS; - xfc->match_nvt = spapr_match_nvt; +@@ -4634,6 +4657,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; + smc->has_power9_support = true; } static const TypeInfo spapr_machine_info = { -@@ -4604,6 +4628,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4685,6 +4709,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-6.0 + * pseries-6.1 */ -@@ -4694,6 +4719,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4786,6 +4811,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -88,7 +99,7 @@ index e4be00b732..f9e8dfdfc9 100644 /* * pseries-4.0 -@@ -4713,6 +4739,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4805,6 +4831,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; return true; } @@ -97,7 +108,7 @@ index e4be00b732..f9e8dfdfc9 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4871,6 +4899,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); +@@ -4963,6 +4991,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); /* * pseries-2.7 */ @@ -105,7 +116,7 @@ index e4be00b732..f9e8dfdfc9 100644 static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, -@@ -4926,6 +4955,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, +@@ -5018,6 +5047,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, return true; } @@ -113,7 +124,7 @@ index e4be00b732..f9e8dfdfc9 100644 static void spapr_machine_2_7_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5040,6 +5070,344 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5132,10 +5162,214 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -131,19 +142,31 @@ index e4be00b732..f9e8dfdfc9 100644 +} + +/* ++ * pseries-rhel8.5.0 ++ * like pseries-6.0 ++ */ ++ ++static void spapr_machine_rhel850_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++ ++/* + * pseries-rhel8.4.0 + * like pseries-5.2 + */ + +static void spapr_machine_rhel840_class_options(MachineClass *mc) +{ -+ /* The default machine type must apply the RHEL specific defaults */ -+ spapr_machine_rhel_default_class_options(mc); ++ spapr_machine_rhel850_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); +} + -+DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", true); ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false); + +/* + * pseries-rhel8.3.0 @@ -216,7 +239,8 @@ index e4be00b732..f9e8dfdfc9 100644 + compat_props_add(mc->compat_props, hw_compat_rhel_8_1, + hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); -+ + +-static void spapr_machine_register_types(void) + /* from pseries-4.2 */ + smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; +} @@ -231,7 +255,8 @@ index e4be00b732..f9e8dfdfc9 100644 + */ + +static void spapr_machine_rhel800_class_options(MachineClass *mc) -+{ + { +- type_register_static(&spapr_machine_info); + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_rhel810_class_options(mc); @@ -249,8 +274,8 @@ index e4be00b732..f9e8dfdfc9 100644 + smc->dr_phb_enabled = false; + smc->broken_host_serial_model = true; + smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; -+} -+ + } + +DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); + +/* @@ -315,149 +340,7 @@ index e4be00b732..f9e8dfdfc9 100644 + +DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); + -+static void spapr_machine_rhel750_class_options(MachineClass *mc) -+{ -+ spapr_machine_rhel760_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); -+ -+} -+ -+DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); -+ -+/* -+ * pseries-rhel7.5.0-sxxm -+ * -+ * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default -+ */ -+ -+static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) -+{ -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel750_class_options(mc); -+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); -+ -+/* -+ * pseries-rhel7.4.0 -+ * like spapr_compat_2_9 -+ */ -+GlobalProperty spapr_compat_rhel7_4[] = { -+ { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, -+}; -+const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); -+ -+static void spapr_machine_rhel740_class_options(MachineClass *mc) -+{ -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel750_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); -+ compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); -+ smc->has_power9_support = false; -+ smc->pre_2_10_has_unused_icps = true; -+ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; -+ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); -+ -+/* -+ * pseries-rhel7.4.0-sxxm -+ * -+ * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default -+ */ -+ -+static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) -+{ -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel740_class_options(mc); -+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); -+ -+/* -+ * pseries-rhel7.3.0 -+ * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 -+ */ -+GlobalProperty spapr_compat_rhel7_3[] = { -+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000" }, -+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0" }, -+ { TYPE_POWERPC_CPU, "pre-2.8-migration", "on" }, -+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on" }, -+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, -+}; -+const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); -+ -+static void spapr_machine_rhel730_class_options(MachineClass *mc) -+{ -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel740_class_options(mc); -+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); -+ mc->default_machine_opts = "modern-hotplug-events=off"; -+ compat_props_add(mc->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); -+ compat_props_add(mc->compat_props, spapr_compat_rhel7_3, spapr_compat_rhel7_3_len); -+ -+ smc->phb_placement = phb_placement_2_7; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); -+ -+/* -+ * pseries-rhel7.3.0-sxxm -+ * -+ * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default -+ */ -+ -+static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) -+{ -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel730_class_options(mc); -+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; -+} -+ -+DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); -+ -+/* -+ * pseries-rhel7.2.0 -+ */ -+/* Should be like spapr_compat_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" -+ * has been backported to RHEL7_2 so we don't need it here. -+ */ -+ -+GlobalProperty spapr_compat_rhel7_2[] = { -+ { "spapr-vlan", "use-rx-buffer-pools", "off" }, -+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, -+}; -+const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); -+ -+static void spapr_machine_rhel720_class_options(MachineClass *mc) -+{ -+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -+ -+ spapr_machine_rhel730_class_options(mc); -+ smc->use_ohci_by_default = true; -+ mc->has_hotpluggable_cpus = NULL; -+ compat_props_add(mc->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); -+ compat_props_add(mc->compat_props, spapr_compat_rhel7_2, spapr_compat_rhel7_2_len); -+} -+ -+DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); - - static void spapr_machine_register_types(void) - { + type_init(spapr_machine_register_types) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 64178f0f9a..2bff13a6ab 100644 --- a/hw/ppc/spapr_cpu_core.c @@ -497,10 +380,10 @@ index 64178f0f9a..2bff13a6ab 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index bf7cab7a2c..54cdde8980 100644 +index 637652ad16..589d1a262c 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -143,6 +143,7 @@ struct SpaprMachineClass { +@@ -146,6 +146,7 @@ struct SpaprMachineClass { bool pre_5_1_assoc_refpoints; bool pre_5_2_numa_associativity; @@ -508,9 +391,9 @@ index bf7cab7a2c..54cdde8980 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -223,6 +224,9 @@ struct SpaprMachineState { - int fwnmi_machine_check_interlock; - QemuCond fwnmi_machine_check_interlock_cond; +@@ -230,6 +231,9 @@ struct SpaprMachineState { + /* Set by -boot */ + char *boot_device; + /* Secure Guest support via x-svm-allowed */ + bool svm_allowed; @@ -544,10 +427,10 @@ index 7949a24f5a..f207a9ba01 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index e73416da68..4eb427a601 100644 +index 93d308ac8f..dca9bdf846 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1347,6 +1347,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1378,6 +1378,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -556,18 +439,18 @@ index e73416da68..4eb427a601 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 104a308abb..cb0fb67383 100644 +index dc93b99189..0f4f072fbd 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c -@@ -89,6 +89,7 @@ static int cap_ppc_count_cache_flush_assist; - static int cap_ppc_nested_kvm_hv; +@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; static int cap_large_decr; static int cap_fwnmi; + static int cap_rpt_invalidate; +static int cap_ppc_secure_guest; static uint32_t debug_inst_opcode; -@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) +@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -575,10 +458,15 @@ index 104a308abb..cb0fb67383 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2551,6 +2553,16 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) +@@ -2558,9 +2560,20 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) return 0; } ++<<<<<<< HEAD + int kvmppc_has_cap_rpt_invalidate(void) + { + return cap_rpt_invalidate; ++======= +bool kvmppc_has_cap_secure_guest(void) +{ + return !!cap_ppc_secure_guest; @@ -587,12 +475,11 @@ index 104a308abb..cb0fb67383 100644 +int kvmppc_enable_cap_secure_guest(void) +{ + return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); -+} -+ ++>>>>>>> 89c02f0e37... Add ppc64 machine types + } + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) - { - uint32_t host_pvr = mfpvr(); -@@ -2947,3 +2959,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2959,3 +2972,18 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } @@ -612,10 +499,10 @@ index 104a308abb..cb0fb67383 100644 + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index 989f61ace0..2e7a5d3fc1 100644 +index ee9325bf9a..20dbb95989 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h -@@ -39,6 +39,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); +@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, bool radix, bool gtse, uint64_t proc_tbl); @@ -623,17 +510,17 @@ index 989f61ace0..2e7a5d3fc1 100644 #ifndef CONFIG_USER_ONLY bool kvmppc_spapr_use_multitce(void); int kvmppc_spapr_enable_inkernel_multitce(void); -@@ -72,6 +73,8 @@ int kvmppc_set_cap_nested_kvm_hv(int enable); - int kvmppc_get_cap_large_decr(void); +@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + int kvmppc_has_cap_rpt_invalidate(void); int kvmppc_enable_hwrng(void); +bool kvmppc_has_cap_secure_guest(void); +int kvmppc_enable_cap_secure_guest(void); int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); void kvmppc_check_papr_resize_hpt(Error **errp); -@@ -381,6 +384,16 @@ static inline int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) - return -1; +@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) + return false; } +static inline bool kvmppc_has_cap_secure_guest(void) diff --git a/0009-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch index c768dd7..1dda855 100644 --- a/0009-Add-s390x-machine-types.patch +++ b/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 09eba380295aef0a27d3fbcdda43019ab2898e08 Mon Sep 17 00:00:00 2001 +From 8ae9e40c13f4beb1eedaef46e6b00f562de692e2 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -6,15 +6,19 @@ Subject: Add s390x machine types Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina +-- +Merged patches (6.1.0 RC1): +- 64a9a5c971 hw/s390x: Remove the RHEL7-only machine type +- 395516d62b redhat: s390x: add rhel-8.5.0 compat machine --- - hw/s390x/s390-virtio-ccw.c | 87 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 86 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 66 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 2972b607f3..8df6dd1c71 100644 +index e4b18aef49..9a51dd8de2 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -771,7 +771,7 @@ bool css_migration_enabled(void) +@@ -767,7 +767,7 @@ bool css_migration_enabled(void) { \ MachineClass *mc = MACHINE_CLASS(oc); \ ccw_machine_##suffix##_class_options(mc); \ @@ -23,29 +27,40 @@ index 2972b607f3..8df6dd1c71 100644 if (latest) { \ mc->alias = "s390-ccw-virtio"; \ mc->is_default = true; \ -@@ -795,6 +795,7 @@ bool css_migration_enabled(void) +@@ -791,6 +791,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_6_0_instance_options(MachineState *machine) + static void ccw_machine_6_1_instance_options(MachineState *machine) { } -@@ -1071,6 +1072,90 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1082,6 +1083,69 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + ++static void ccw_machine_rhel850_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel850_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++ +static void ccw_machine_rhel840_instance_options(MachineState *machine) +{ ++ ccw_machine_rhel850_instance_options(machine); +} + +static void ccw_machine_rhel840_class_options(MachineClass *mc) +{ ++ ccw_machine_rhel850_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); +} -+DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", true); ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false); + +static void ccw_machine_rhel820_instance_options(MachineState *machine) +{ @@ -87,38 +102,6 @@ index 2972b607f3..8df6dd1c71 100644 + compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); +} +DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); -+ -+static void ccw_machine_rhel750_instance_options(MachineState *machine) -+{ -+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; -+ ccw_machine_rhel760_instance_options(machine); -+ -+ /* before 2.12 we emulated the very first z900, and RHEL 7.5 is -+ based on 2.10 */ -+ s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); -+ -+ /* bpb and ppa15 were only in the full model in RHEL 7.5 */ -+ s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); -+ s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); -+} -+ -+GlobalProperty ccw_compat_rhel_7_5[] = { -+ { -+ .driver = TYPE_SCLP_EVENT_FACILITY, -+ .property = "allow_all_mask_sizes", -+ .value = "off", -+ }, -+}; -+const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); -+ -+static void ccw_machine_rhel750_class_options(MachineClass *mc) -+{ -+ ccw_machine_rhel760_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); -+ compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); -+ S390_CCW_MACHINE_CLASS(mc)->hpage_1m_allowed = false; -+} -+DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); static void ccw_machine_register_types(void) { diff --git a/0010-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch index 28de463..59c7c14 100644 --- a/0010-Add-x86_64-machine-types.patch +++ b/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From a082c53cc14afcd2ad77262575af50e164e75649 Mon Sep 17 00:00:00 2001 +From afc727ec3b2ad5a5c4033f25a63c877500b0194d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -6,37 +6,37 @@ Subject: Add x86_64 machine types Adding changes to add RHEL machine types for x86_64 architecture. Signed-off-by: Miroslav Rezanina + +Rebase notes (210609): +- Update qemu64 cpu spec + +Merged patches (6.1.0 RC1): +- 59c284ad3b x86: Add x86 rhel8.5 machine types +- a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default + +Merged patches (6.1.0): +- a3995e2eff Remove RHEL 7.0.0 machine type (only x86_64 changes) +- ad3190a79b Remove RHEL 7.1.0 machine type (only x86_64 changes) +- 84bbe15d4e Remove RHEL 7.2.0 machine type (only x86_64 changes) +- 0215eb3356 Remove RHEL 7.3.0 machine types (only x86_64 changes) +- af69d1ca6e Remove RHEL 7.4.0 machine types (only x86_64 changes) +- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only x86_64 changes) --- - hw/i386/acpi-build.c | 3 + - hw/i386/pc.c | 277 ++++++++++++++++++++++++++++++++++++- - hw/i386/pc_piix.c | 225 +++++++++++++++++++++++++++++- - hw/i386/pc_q35.c | 214 +++++++++++++++++++++++++++- + hw/i386/pc.c | 114 +++++++++++++++++++++++- + hw/i386/pc_piix.c | 68 +++++++++++++- + hw/i386/pc_q35.c | 177 ++++++++++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 39 ++++++ - target/i386/cpu.c | 3 +- + include/hw/i386/pc.h | 21 +++++ + target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 9 files changed, 763 insertions(+), 9 deletions(-) + 8 files changed, 385 insertions(+), 7 deletions(-) -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index de98750aef..7bd67f7877 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -231,6 +231,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) - pm->fadt.reset_reg = r; - pm->fadt.reset_val = 0xf; - pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; -+ if (object_property_get_bool(lpc, -+ "__com.redhat_force-rev1-fadt", NULL)) -+ pm->fadt.rev = 1; - pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; - pm->smi_on_cpuhp = - !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 8a84b25a03..edc02a68ca 100644 +index c2b9d62a35..719cedaa4b 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -355,6 +355,275 @@ GlobalProperty pc_compat_1_4[] = { +@@ -362,6 +362,116 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -51,9 +51,18 @@ index 8a84b25a03..edc02a68ca 100644 + { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, + /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* bz 1941397 */ ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_4_compat[] = { ++ /* pc_rhel_8_4_compat from pc_compat_5_2 */ ++ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" }, ++}; ++const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); ++ +GlobalProperty pc_rhel_8_3_compat[] = { + /* pc_rhel_8_3_compat from pc_compat_5_1 */ + { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, @@ -135,203 +144,24 @@ index 8a84b25a03..edc02a68ca 100644 +}; +const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); + -+/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: -+ * - x-hv-max-vps was backported to 7.5 -+ * - x-pci-hole64-fix was backported to 7.5 -+ */ -+GlobalProperty pc_rhel_7_5_compat[] = { -+ /* pc_rhel_7_5_compat from pc_compat_2_11 */ -+ { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" }, -+ /* pc_rhel_7_5_compat from pc_compat_2_12 */ -+ { TYPE_X86_CPU, "legacy-cache", "on" }, -+ /* pc_rhel_7_5_compat from pc_compat_2_12 */ -+ { TYPE_X86_CPU, "topoext", "off" }, -+ /* pc_rhel_7_5_compat from pc_compat_2_12 */ -+ { "EPYC-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, -+ /* pc_rhel_7_5_compat from pc_compat_2_12 */ -+ { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, -+}; -+const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); -+ -+GlobalProperty pc_rhel_7_4_compat[] = { -+ /* pc_rhel_7_4_compat from pc_compat_2_9 */ -+ { "mch", "extended-tseg-mbytes", stringify(0) }, -+ /* bz 1489800 */ -+ { "ICH9-LPC", "__com.redhat_force-rev1-fadt", "on" }, -+ /* pc_rhel_7_4_compat from pc_compat_2_10 */ -+ { "i440FX-pcihost", "x-pci-hole64-fix", "off" }, -+ /* pc_rhel_7_4_compat from pc_compat_2_10 */ -+ { "q35-pcihost", "x-pci-hole64-fix", "off" }, -+ /* pc_rhel_7_4_compat from pc_compat_2_10 */ -+ { TYPE_X86_CPU, "x-hv-max-vps", "0x40" }, -+}; -+const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); -+ -+GlobalProperty pc_rhel_7_3_compat[] = { -+ /* pc_rhel_7_3_compat from pc_compat_2_8 */ -+ { "kvmclock", "x-mach-use-reliable-get-clock", "off" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_7 */ -+ { TYPE_X86_CPU, "l3-cache", "off" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_7 */ -+ { TYPE_X86_CPU, "full-cpuid-auto-level", "off" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_7 */ -+ { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_7 */ -+ { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_7 */ -+ { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_7 */ -+ { "isa-pcspk", "migrate", "off" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_6 */ -+ { TYPE_X86_CPU, "cpuid-0xb", "off" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_8 */ -+ { "ICH9-LPC", "x-smi-broadcast", "off" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_8 */ -+ { TYPE_X86_CPU, "vmware-cpuid-freq", "off" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_8 */ -+ { "Haswell-" TYPE_X86_CPU, "stepping", "1" }, -+ /* pc_rhel_7_3_compat from pc_compat_2_3 added in 2.9*/ -+ { TYPE_X86_CPU, "kvm-no-smi-migration", "on" }, -+}; -+const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); -+ -+GlobalProperty pc_rhel_7_2_compat[] = { -+ { "phenom" "-" TYPE_X86_CPU, "rdtscp", "off"}, -+ { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, -+ { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, -+ { "Haswell-" TYPE_X86_CPU, "abm", "off" }, -+ { "Haswell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, -+ { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, -+ { "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, -+ { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, -+ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, -+ { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, -+ { "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, -+ { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, -+ { TYPE_X86_CPU, "check", "off" }, -+ { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, -+ { TYPE_X86_CPU, "arat", "off" }, -+ { "usb-redir", "streams", "off" }, -+ { TYPE_X86_CPU, "fill-mtrr-mask", "off" }, -+ { "apic-common", "legacy-instance-id", "on" }, -+}; -+const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); -+ -+GlobalProperty pc_rhel_7_1_compat[] = { -+ { "kvm64" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "kvm32" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Conroe" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Penryn" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Westmere" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Westmere-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "SandyBridge-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Haswell" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Haswell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" }, -+ { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" }, -+ { "Haswell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, -+ { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" }, -+ { "Haswell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, -+ { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" }, -+ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, -+ { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" }, -+ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, -+ { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" }, -+ { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" }, -+ { "qemu64" "-" TYPE_X86_CPU, "min-level", stringify(4) }, -+ { "kvm64" "-" TYPE_X86_CPU, "min-level", stringify(5) }, -+ { "pentium3" "-" TYPE_X86_CPU, "min-level", stringify(2) }, -+ { "n270" "-" TYPE_X86_CPU, "min-level", stringify(5) }, -+ { "Conroe" "-" TYPE_X86_CPU, "min-level", stringify(4) }, -+ { "Penryn" "-" TYPE_X86_CPU, "min-level", stringify(4) }, -+ { "Nehalem" "-" TYPE_X86_CPU, "min-level", stringify(4) }, -+ { "n270" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+ { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -+}; -+const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); -+ +/* + * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine + * types as the PC_COMPAT_* do for upstream types. + * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. + */ -+ -+/* -+ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* -+ * between our base and 1.5, less stuff backported to RHEL-7.0 -+ * (usb-device.msos-desc), less stuff for devices we changed -+ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, -+ * pci-serial-4x) in 7.0. -+ */ -+GlobalProperty pc_rhel_7_0_compat[] = { -+ { "virtio-scsi-pci", "any_layout", "off" }, -+ { "PIIX4_PM", "memory-hotplug-support", "off" }, -+ { "apic", "version", stringify(0x11) }, -+ { "nec-usb-xhci", "superspeed-ports-first", "off" }, -+ { "nec-usb-xhci", "force-pcie-endcap", "on" }, -+ { "pci-serial", "prog_if", stringify(0) }, -+ { "virtio-net-pci", "guest_announce", "off" }, -+ { "ICH9-LPC", "memory-hotplug-support", "off" }, -+ { "xio3130-downstream", COMPAT_PROP_PCP, "off" }, -+ { "ioh3420", COMPAT_PROP_PCP, "off" }, -+ { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, -+ { "e1000", "mitigation", "off" }, -+ { "virtio-net-pci", "ctrl_guest_offloads", "off" }, -+ { "Conroe" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Penryn" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Nehalem" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Westmere" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Westmere-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Opteron_G1" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Opteron_G2" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Opteron_G3" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Opteron_G4" "-" TYPE_X86_CPU, "x2apic", "on" }, -+ { "Opteron_G5" "-" TYPE_X86_CPU, "x2apic", "on" }, -+}; -+const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -952,7 +1221,8 @@ void pc_memory_init(PCMachineState *pcms, - option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); -- if (pcmc->pci_enabled) { -+ /* RH difference: See bz 1489800, explicitly make ROM ro */ -+ if (pcmc->pc_rom_ro) { - memory_region_set_readonly(option_rom_mr, true); - } - memory_region_add_subregion_overlap(rom_memory, -@@ -1702,6 +1972,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1713,6 +1823,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); -+ pcmc->pc_rom_ro = true; + mc->async_pf_vmexit_disable = false; mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1713,7 +1985,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1724,7 +1835,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->default_boot_order = "cad"; mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; @@ -342,10 +172,10 @@ index 8a84b25a03..edc02a68ca 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 62433d8022..d9c5df16d8 100644 +index eebb4f3141..19d6721f3d 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -54,6 +54,7 @@ +@@ -51,6 +51,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -353,7 +183,7 @@ index 62433d8022..d9c5df16d8 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -176,8 +177,8 @@ static void pc_init1(MachineState *machine, +@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -364,7 +194,7 @@ index 62433d8022..d9c5df16d8 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -316,6 +317,7 @@ static void pc_init1(MachineState *machine, +@@ -314,6 +315,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -372,7 +202,7 @@ index 62433d8022..d9c5df16d8 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -927,3 +929,222 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -937,3 +939,65 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -418,6 +248,8 @@ index 62433d8022..d9c5df16d8 100644 + pcmc->pci_root_uid = 1; + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_3, + hw_compat_rhel_8_3_len); + compat_props_add(m->compat_props, pc_rhel_8_3_compat, @@ -436,170 +268,11 @@ index 62433d8022..d9c5df16d8 100644 + +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); -+ -+static void pc_init_rhel750(MachineState *machine) -+{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel750_options(MachineClass *m) -+{ -+ pc_machine_rhel760_options(m); -+ m->alias = NULL; -+ m->is_default = 0; -+ m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; -+ m->auto_enable_numa_with_memhp = false; -+ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); -+ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); -+} -+ -+DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, -+ pc_machine_rhel750_options); -+ -+static void pc_init_rhel740(MachineState *machine) -+{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel740_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_machine_rhel750_options(m); -+ m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; -+ pcmc->pc_rom_ro = false; -+ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); -+ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); -+} -+ -+DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, -+ pc_machine_rhel740_options); -+ -+static void pc_init_rhel730(MachineState *machine) -+{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel730_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_machine_rhel740_options(m); -+ m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; -+ pcmc->linuxboot_dma_enabled = false; -+ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); -+ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); -+} -+ -+DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, -+ pc_machine_rhel730_options); -+ -+ -+static void pc_init_rhel720(MachineState *machine) -+{ -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel720_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); -+ pc_machine_rhel730_options(m); -+ m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; -+ /* From pc_i440fx_2_5_machine_options */ -+ x86mc->save_tsc_khz = false; -+ m->legacy_fw_cfg_order = 1; -+ /* Note: broken_reserved_end was already in 7.2 */ -+ /* From pc_i440fx_2_6_machine_options */ -+ pcmc->legacy_cpu_hotplug = true; -+ compat_props_add(m->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); -+ compat_props_add(m->compat_props, pc_rhel_7_2_compat, pc_rhel_7_2_compat_len); -+} -+ -+DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, -+ pc_machine_rhel720_options); -+ -+static void pc_compat_rhel710(MachineState *machine) -+{ -+ PCMachineState *pcms = PC_MACHINE(machine); -+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); -+ -+ /* From pc_compat_2_2 */ -+ pcmc->rsdp_in_ram = false; -+ machine->suppress_vmdesc = true; -+ -+ /* From pc_compat_2_1 */ -+ pcmc->smbios_uuid_encoded = false; -+ x86_cpu_change_kvm_default("svm", NULL); -+ pcmc->enforce_aligned_dimm = false; -+ -+ /* Disable all the extra subsections that were added in 2.2 */ -+ migrate_pre_2_2 = true; -+ -+ /* From pc_i440fx_2_4_machine_options */ -+ pcmc->broken_reserved_end = true; -+} -+ -+static void pc_init_rhel710(MachineState *machine) -+{ -+ pc_compat_rhel710(machine); -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel710_options(MachineClass *m) -+{ -+ pc_machine_rhel720_options(m); -+ m->family = "pc_piix_Y"; -+ m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; -+ m->default_display = "cirrus"; -+ compat_props_add(m->compat_props, hw_compat_rhel_7_1, hw_compat_rhel_7_1_len); -+ compat_props_add(m->compat_props, pc_rhel_7_1_compat, pc_rhel_7_1_compat_len); -+} -+ -+DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, -+ pc_machine_rhel710_options); -+ -+static void pc_compat_rhel700(MachineState *machine) -+{ -+ PCMachineState *pcms = PC_MACHINE(machine); -+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); -+ -+ pc_compat_rhel710(machine); -+ -+ /* Upstream enables it for everyone, we're a little more selective */ -+ x86_cpu_change_kvm_default("x2apic", NULL); -+ x86_cpu_change_kvm_default("svm", NULL); -+ pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ -+ pcmc->smbios_legacy_mode = true; -+ pcmc->has_reserved_memory = false; -+ migrate_cve_2014_5263_xhci_fields = true; -+} -+ -+static void pc_init_rhel700(MachineState *machine) -+{ -+ pc_compat_rhel700(machine); -+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -+ TYPE_I440FX_PCI_DEVICE); -+} -+ -+static void pc_machine_rhel700_options(MachineClass *m) -+{ -+ pc_machine_rhel710_options(m); -+ m->family = "pc_piix_Y"; -+ m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; -+ compat_props_add(m->compat_props, pc_rhel_7_0_compat, pc_rhel_7_0_compat_len); -+} -+ -+DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, -+ pc_machine_rhel700_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index fce52ca70b..44109e4876 100644 +index e7724fd02c..8032babda5 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -195,8 +195,8 @@ static void pc_q35_init(MachineState *machine) +@@ -196,8 +196,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -610,7 +283,7 @@ index fce52ca70b..44109e4876 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -327,6 +327,7 @@ static void pc_q35_init(MachineState *machine) +@@ -337,6 +337,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -618,7 +291,7 @@ index fce52ca70b..44109e4876 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -581,3 +582,212 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -602,3 +603,175 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -647,6 +320,24 @@ index fce52ca70b..44109e4876 100644 + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel850(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel850_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.5.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, ++ pc_q35_machine_rhel850_options); ++ ++ +static void pc_q35_init_rhel840(MachineState *machine) +{ + pc_q35_init(machine); @@ -655,12 +346,15 @@ index fce52ca70b..44109e4876 100644 +static void pc_q35_machine_rhel840_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel850_options(m); + m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.4.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, @@ -677,7 +371,6 @@ index fce52ca70b..44109e4876 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel840_options(m); + m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.3.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_3, @@ -774,68 +467,11 @@ index fce52ca70b..44109e4876 100644 + +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); -+ -+static void pc_q35_init_rhel750(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel750_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel760_options(m); -+ m->alias = NULL; -+ m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; -+ m->auto_enable_numa_with_memhp = false; -+ pcmc->default_nic_model = "e1000"; -+ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); -+ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); -+} -+ -+DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, -+ pc_q35_machine_rhel750_options); -+ -+static void pc_q35_init_rhel740(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel740_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel750_options(m); -+ m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->pc_rom_ro = false; -+ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); -+ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); -+} -+ -+DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, -+ pc_q35_machine_rhel740_options); -+ -+static void pc_q35_init_rhel730(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel730_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel740_options(m); -+ m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; -+ m->max_cpus = 255; -+ pcmc->linuxboot_dma_enabled = false; -+ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); -+ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); -+} -+ -+DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, -+ pc_q35_machine_rhel730_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index 2d7a65724a..90ae100bfc 100644 +index f5423f351a..a2b1681027 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -243,6 +243,8 @@ struct MachineClass { +@@ -242,6 +242,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -845,26 +481,19 @@ index 2d7a65724a..90ae100bfc 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 93c012ac95..79a7803a2f 100644 +index 17ad7bfc3a..75b338d66d 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -128,6 +128,9 @@ struct PCMachineClass { - - /* create kvmclock device even when KVM PV features are not exposed */ - bool kvmclock_create_always; -+ -+ /* RH only, see bz 1489800 */ -+ bool pc_rom_ro; - }; - - #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -275,6 +278,42 @@ extern const size_t pc_compat_1_5_len; +@@ -277,6 +277,27 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_4_compat[]; ++extern const size_t pc_rhel_8_4_compat_len; ++ +extern GlobalProperty pc_rhel_8_3_compat[]; +extern const size_t pc_rhel_8_3_compat_len; + @@ -879,42 +508,15 @@ index 93c012ac95..79a7803a2f 100644 + +extern GlobalProperty pc_rhel_7_6_compat[]; +extern const size_t pc_rhel_7_6_compat_len; -+ -+extern GlobalProperty pc_rhel_7_5_compat[]; -+extern const size_t pc_rhel_7_5_compat_len; -+ -+extern GlobalProperty pc_rhel_7_4_compat[]; -+extern const size_t pc_rhel_7_4_compat_len; -+ -+extern GlobalProperty pc_rhel_7_3_compat[]; -+extern const size_t pc_rhel_7_3_compat_len; -+ -+extern GlobalProperty pc_rhel_7_2_compat[]; -+extern const size_t pc_rhel_7_2_compat_len; -+ -+extern GlobalProperty pc_rhel_7_1_compat[]; -+extern const size_t pc_rhel_7_1_compat_len; -+ -+extern GlobalProperty pc_rhel_7_0_compat[]; -+extern const size_t pc_rhel_7_0_compat_len; + /* Helper for setting model-id for CPU models that changed model-id * depending on QEMU versions up to QEMU 2.4. */ -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index ad99cad0e7..c30bb2a6b0 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1882,7 +1882,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - .level = 0xd, - .vendor = CPUID_VENDOR_AMD, - .family = 6, -- .model = 6, -+ .model = 13, - .stepping = 3, - .features[FEAT_1_EDX] = - PPRO_FEATURES | -@@ -4264,6 +4264,7 @@ static PropValue kvm_default_props[] = { +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index d95028018e..7b004065ae 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -131,6 +131,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -923,10 +525,10 @@ index ad99cad0e7..c30bb2a6b0 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 7fe9f52710..4c69c2cb4b 100644 +index e69abe48e3..4a3077b3f0 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3181,6 +3181,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3261,6 +3261,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -934,7 +536,7 @@ index 7fe9f52710..4c69c2cb4b 100644 kvm_msr_buf_reset(cpu); -@@ -3499,6 +3500,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3579,6 +3580,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch index b3af9a8..ce8ef90 100644 --- a/0011-Enable-make-check.patch +++ b/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 5f6a55a218029af944a8d02ab9264647315890d3 Mon Sep 17 00:00:00 2001 +From e05f30c4899bfec4084cda3223e7141ddc97be6f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -8,24 +8,27 @@ make check run during build. Signed-off-by: Miroslav Rezanina --- - redhat/qemu-kvm.spec.template | 6 ++---- +Rebase changes (6.1.0): +- removed unnecessary test changes + +Merged patches (6.1.0): +- 2f129df7d3 redhat: Enable the 'test-block-iothread' test again +--- + .distro/qemu-kvm.spec.template | 5 ++--- tests/qemu-iotests/051 | 8 ++++---- tests/qtest/bios-tables-test.c | 6 +++--- tests/qtest/boot-serial-test.c | 6 +++++- tests/qtest/cdrom-test.c | 4 ++++ tests/qtest/cpu-plug-test.c | 4 ++-- - tests/qtest/e1000-test.c | 2 ++ tests/qtest/fuzz-e1000e-test.c | 2 +- tests/qtest/fuzz-virtio-scsi-test.c | 2 +- tests/qtest/hd-geo-test.c | 4 ++++ - tests/qtest/libqos/meson.build | 2 +- tests/qtest/lpc-ich9-test.c | 2 +- tests/qtest/meson.build | 11 +++-------- tests/qtest/prom-env-test.c | 4 ++++ tests/qtest/test-x86-cpuid-compat.c | 2 ++ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - tests/unit/meson.build | 2 +- - 17 files changed, 44 insertions(+), 27 deletions(-) + 14 files changed, 40 insertions(+), 24 deletions(-) diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index 7bf29343d7..fd63402d78 100755 @@ -56,10 +59,10 @@ index 7bf29343d7..fd63402d78 100755 *) ;; diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c -index 156d4174aa..b4a1074b77 100644 +index 51d3a4e239..fe01fd8b7d 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c -@@ -1299,7 +1299,7 @@ static void test_acpi_virt_tcg_numamem(void) +@@ -1303,7 +1303,7 @@ static void test_acpi_virt_tcg_numamem(void) free_test_data(&data); } @@ -68,7 +71,7 @@ index 156d4174aa..b4a1074b77 100644 static void test_acpi_virt_tcg_pxb(void) { test_data data = { -@@ -1331,7 +1331,7 @@ static void test_acpi_virt_tcg_pxb(void) +@@ -1335,7 +1335,7 @@ static void test_acpi_virt_tcg_pxb(void) free_test_data(&data); } @@ -77,7 +80,7 @@ index 156d4174aa..b4a1074b77 100644 static void test_acpi_tcg_acpi_hmat(const char *machine) { test_data data; -@@ -1561,7 +1561,7 @@ int main(int argc, char *argv[]) +@@ -1565,7 +1565,7 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/virt", test_acpi_virt_tcg); qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); @@ -87,10 +90,10 @@ index 156d4174aa..b4a1074b77 100644 } ret = g_test_run(); diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c -index d74509b1c5..a64c55e384 100644 +index 96849cec91..81df62e01a 100644 --- a/tests/qtest/boot-serial-test.c +++ b/tests/qtest/boot-serial-test.c -@@ -120,19 +120,23 @@ static testdef_t tests[] = { +@@ -148,19 +148,23 @@ static testdef_t tests[] = { { "ppc", "g3beige", "", "PowerPC,750" }, { "ppc", "mac99", "", "PowerPC,G4" }, { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, @@ -166,22 +169,6 @@ index a1c689414b..a8f076711c 100644 return; } data = g_new(PlugTestData, 1); -diff --git a/tests/qtest/e1000-test.c b/tests/qtest/e1000-test.c -index ea286d1793..a1847ac8ed 100644 ---- a/tests/qtest/e1000-test.c -+++ b/tests/qtest/e1000-test.c -@@ -22,9 +22,11 @@ struct QE1000 { - - static const char *models[] = { - "e1000", -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - "e1000-82540em", - "e1000-82544gc", - "e1000-82545em", -+#endif - }; - - static void *e1000_get_driver(void *obj, const char *interface) diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c index 66229e6096..947fba73b7 100644 --- a/tests/qtest/fuzz-e1000e-test.c @@ -209,7 +196,7 @@ index aaf6d10e18..43727d62ac 100644 qtest_outl(s, 0xcf8, 0x80001811); diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c -index f7b7cfbc2d..99cccf8638 100644 +index 113126ae06..999ef2aace 100644 --- a/tests/qtest/hd-geo-test.c +++ b/tests/qtest/hd-geo-test.c @@ -737,6 +737,7 @@ static void test_override_ide(void) @@ -240,19 +227,6 @@ index f7b7cfbc2d..99cccf8638 100644 qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); qtest_add_func("hd-geo/override/scsi_hot_unplug", -diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index 1cddf5bdaa..2f4a564105 100644 ---- a/tests/qtest/libqos/meson.build -+++ b/tests/qtest/libqos/meson.build -@@ -41,7 +41,7 @@ libqos_srcs = files('../libqtest.c', - 'virtio-serial.c', - - # qgraph machines: -- 'aarch64-xlnx-zcu102-machine.c', -+# 'aarch64-xlnx-zcu102-machine.c', - 'arm-imx25-pdk-machine.c', - 'arm-n800-machine.c', - 'arm-raspi2-machine.c', diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c index fe0bef9980..7a9d51579b 100644 --- a/tests/qtest/lpc-ich9-test.c @@ -267,10 +241,10 @@ index fe0bef9980..7a9d51579b 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 0c76738921..b9a7426a7b 100644 +index 2bc3efd49f..53ce4b6416 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -71,7 +71,6 @@ qtests_i386 = \ +@@ -73,7 +73,6 @@ qtests_i386 = \ 'ide-test', 'hd-geo-test', 'boot-order-test', @@ -278,7 +252,7 @@ index 0c76738921..b9a7426a7b 100644 'rtc-test', 'i440fx-test', 'fw_cfg-test', -@@ -79,7 +78,6 @@ qtests_i386 = \ +@@ -81,7 +80,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -286,7 +260,7 @@ index 0c76738921..b9a7426a7b 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -130,17 +128,15 @@ qtests_moxie = [ 'boot-serial-test' ] +@@ -130,17 +128,15 @@ qtests_mips64el = \ qtests_ppc = \ (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) + \ @@ -306,7 +280,7 @@ index 0c76738921..b9a7426a7b 100644 qtests_pci + ['migration-test', 'numa-test', 'cpu-plug-test', 'drive_del-test'] qtests_sh4 = (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) -@@ -183,7 +179,7 @@ qtests_aarch64 = \ +@@ -186,7 +182,7 @@ qtests_aarch64 = \ ['arm-cpu-features', 'numa-test', 'boot-serial-test', @@ -315,7 +289,7 @@ index 0c76738921..b9a7426a7b 100644 'migration-test'] qtests_s390x = \ -@@ -192,7 +188,6 @@ qtests_s390x = \ +@@ -195,7 +191,6 @@ qtests_s390x = \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ ['boot-serial-test', 'drive_del-test', @@ -392,19 +366,6 @@ index 10ef9d2a91..3855873050 100644 qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); qtest_start("-device nec-usb-xhci,id=xhci" -diff --git a/tests/unit/meson.build b/tests/unit/meson.build -index b3bc2109da..244d35f5d4 100644 ---- a/tests/unit/meson.build -+++ b/tests/unit/meson.build -@@ -65,7 +65,7 @@ if have_block - 'test-blockjob': [testblock], - 'test-blockjob-txn': [testblock], - 'test-block-backend': [testblock], -- 'test-block-iothread': [testblock], -+# 'test-block-iothread': [testblock], - 'test-write-threshold': [testblock], - 'test-crypto-hash': [crypto], - 'test-crypto-hmac': [crypto], -- 2.27.0 diff --git a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 45abe27..0186f8a 100644 --- a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 22c0f47f02c5db63f3857dabc6cc7cb6bfc78158 Mon Sep 17 00:00:00 2001 +From 79697eeae5ecc81c6e334b4a5164c9a664541a22 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -22,6 +22,9 @@ for other uses. Stepping down a bit, the number 32 arbitrarily matches the number of slots on a PCI bus and is also a nice power of two. +Count of slots increased to 509 later so we could increase limit +to 64 as some usecases require more than 32 devices. + Signed-off-by: Bandan Das --- hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- @@ -29,10 +32,10 @@ Signed-off-by: Bandan Das 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 5c65aa0a98..327b86703a 100644 +index e1ea1d8a23..717af74b9a 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -46,6 +46,9 @@ +@@ -45,6 +45,9 @@ #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" @@ -42,7 +45,7 @@ index 5c65aa0a98..327b86703a 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2783,9 +2786,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2790,9 +2793,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -74,7 +77,7 @@ index 5c65aa0a98..327b86703a 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3222,6 +3246,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3229,6 +3253,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), diff --git a/0013-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch index 8739e82..e7ff04f 100644 --- a/0013-Add-support-statement-to-help-output.patch +++ b/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From ffd8eff2ce1d7eda81d425324593924c098f6c39 Mon Sep 17 00:00:00 2001 +From 5209e0f91395c9240142f75123edd55a6fcce8d7 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index aadb526138..6c8498022b 100644 +index 5ca11e7469..55673d7302 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -848,9 +848,17 @@ static void version(void) +@@ -802,9 +802,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index aadb526138..6c8498022b 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -867,6 +875,7 @@ static void help(int exitcode) +@@ -830,6 +838,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/0014-globally-limit-the-maximum-number-of-CPUs.patch b/0014-globally-limit-the-maximum-number-of-CPUs.patch index b44ad7c..0eba4ee 100644 --- a/0014-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0014-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From b5dab6e678d9b53359b3a915421114258e803cad Mon Sep 17 00:00:00 2001 +From 9ca40e0d1c7644ab781dc382effa8eb32f589322 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -18,10 +18,10 @@ Signed-off-by: Danilo Cesar Lemes de Paula 1 file changed, 12 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index b6d9f92f15..70a94ba76d 100644 +index 0125c17edb..b5d488a027 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -2095,6 +2095,18 @@ static int kvm_init(MachineState *ms) +@@ -2413,6 +2413,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); diff --git a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 599e101..dd00655 100644 --- a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 55fde02ee1a9aa0e812af8534a9adf553accc522 Mon Sep 17 00:00:00 2001 +From 67b5eb11440c75e0fea275eccea43266760b831e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -17,12 +17,10 @@ to reflect this change. Only architectures available in RHEL are updated. Signed-off-by: Miroslav Rezanina --- - docs/defs.rst.inc | 4 ++-- - docs/interop/live-block-operations.rst | 4 ++-- - docs/tools/qemu-trace-stap.rst | 14 +++++++------- - docs/tools/virtiofsd.rst | 2 +- - qemu-options.hx | 10 +++++----- - 5 files changed, 17 insertions(+), 17 deletions(-) + docs/defs.rst.inc | 4 ++-- + docs/tools/qemu-trace-stap.rst | 14 +++++++------- + qemu-options.hx | 10 +++++----- + 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc index 52d6454b93..d74dbdeca9 100644 @@ -38,28 +36,6 @@ index 52d6454b93..d74dbdeca9 100644 +.. |qemu_system_x86| replace:: qemu-kvm .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S -diff --git a/docs/interop/live-block-operations.rst b/docs/interop/live-block-operations.rst -index 1073b930dc..881432253f 100644 ---- a/docs/interop/live-block-operations.rst -+++ b/docs/interop/live-block-operations.rst -@@ -129,7 +129,7 @@ To show some example invocations of command-line, we will use the - following invocation of QEMU, with a QMP server running over UNIX - socket:: - -- $ ./qemu-system-x86_64 -display none -no-user-config \ -+ $ qemu-kvm -display none -no-user-config \ - -M q35 -nodefaults -m 512 \ - -blockdev node-name=node-A,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./a.qcow2 \ - -device virtio-blk,drive=node-A,id=virtio0 \ -@@ -694,7 +694,7 @@ instance, with the following invocation. (As noted earlier, for - simplicity's sake, the destination QEMU is started on the same host, but - it could be located elsewhere):: - -- $ ./qemu-system-x86_64 -display none -no-user-config \ -+ $ qemu-kvm -display none -no-user-config \ - -M q35 -nodefaults -m 512 \ - -blockdev node-name=node-TargetDisk,driver=qcow2,file.driver=file,file.node-name=file,file.filename=./target-disk.qcow2 \ - -device virtio-blk,drive=node-TargetDisk,id=virtio0 \ diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst index fb70445c75..0d9a783112 100644 --- a/docs/tools/qemu-trace-stap.rst @@ -118,24 +94,11 @@ index fb70445c75..0d9a783112 100644 See also -------- -diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst -index 00554c75bd..6e0fc94005 100644 ---- a/docs/tools/virtiofsd.rst -+++ b/docs/tools/virtiofsd.rst -@@ -301,7 +301,7 @@ Export ``/var/lib/fs/vm001/`` on vhost-user UNIX domain socket - :: - - host# virtiofsd --socket-path=/var/run/vm001-vhost-fs.sock -o source=/var/lib/fs/vm001 -- host# qemu-system-x86_64 \ -+ host# qemu-kvm \ - -chardev socket,id=char0,path=/var/run/vm001-vhost-fs.sock \ - -device vhost-user-fs-pci,chardev=char0,tag=myfs \ - -object memory-backend-memfd,id=mem,size=4G,share=on \ diff --git a/qemu-options.hx b/qemu-options.hx -index 0d4fb61bf7..79ca09feac 100644 +index ac596e01a1..eea5fe9f68 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3011,11 +3011,11 @@ SRST +@@ -3124,11 +3124,11 @@ SRST :: diff --git a/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index cd00b4f..c6e26e6 100644 --- a/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 2ab1a61510036bd409532f24ea14fa693ec0362c Mon Sep 17 00:00:00 2001 +From 121a8bb81575086f84563f1dffc4d045cdc65a64 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] diff --git a/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 2e670a2..75ebb16 100644 --- a/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 7b3d9142f3b296b127dce35336765dc16265d155 Mon Sep 17 00:00:00 2001 +From 6e87ea18ca021e10fd99698bb3f744ed3dc06552 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,7 +32,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 9ea7ddd1e9..1338b677d2 100644 +index ed7c077a0d..48a8efe678 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -332,12 +332,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, diff --git a/0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch b/0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch deleted file mode 100644 index b5e8f92..0000000 --- a/0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch +++ /dev/null @@ -1,70 +0,0 @@ -From acdc84c1077be7d347414f781014ea785ce41d7b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 13 Mar 2020 12:34:32 +0000 -Subject: block: Versioned x-blockdev-reopen API with feature flag - -RH-Author: Kevin Wolf -Message-id: <20200313123439.10548-7-kwolf@redhat.com> -Patchwork-id: 94283 -O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 06/13] block: Versioned x-blockdev-reopen API with feature flag -Bugzilla: 1790482 1805143 -RH-Acked-by: Eric Blake -RH-Acked-by: John Snow -RH-Acked-by: Daniel P. Berrange -RH-Acked-by: Peter Krempa - -x-blockdev-reopen is still considered unstable upstream. libvirt needs -(a small subset of) it for incremental backups, though. - -Add a downstream-only feature flag that effectively makes this a -versioned interface. As long as the feature is present, we promise that -we won't change the interface incompatibly. Incompatible changes to the -command will require us to drop the feature flag (and possibly introduce -a new one if the new version is still not stable upstream). - -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula ---- - qapi/block-core.json | 9 ++++++++- - scripts/qapi/expr.py | 2 +- - 2 files changed, 9 insertions(+), 2 deletions(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 6d227924d0..15ad8cee05 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -4166,10 +4166,17 @@ - # image does not have a default backing file name as part of its - # metadata. - # -+# Features: -+# @__com.redhat_rhel-av-8_2_0-api: Versioning the downstream interface while -+# it's still unstable upstream. As long as -+# this flag is present, this command will not -+# change incompatibly. -+# - # Since: 4.0 - ## - { 'command': 'x-blockdev-reopen', -- 'data': 'BlockdevOptions', 'boxed': true } -+ 'data': 'BlockdevOptions', 'boxed': true, -+ 'features': [ '__com.redhat_rhel-av-8_2_0-api' ] } - - ## - # @blockdev-del: -diff --git a/scripts/qapi/expr.py b/scripts/qapi/expr.py -index 540b3982b1..884874d205 100644 ---- a/scripts/qapi/expr.py -+++ b/scripts/qapi/expr.py -@@ -215,7 +215,7 @@ def check_features(features, info): - check_keys(f, info, source, ['name'], ['if']) - check_name_is_str(f['name'], info, source) - source = "%s '%s'" % (source, f['name']) -- check_name_lower(f['name'], info, source) -+ check_name_lower(f['name'], info, source, permit_underscore=True) - check_if(f, info, source) - - --- -2.27.0 - diff --git a/kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch similarity index 84% rename from kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch rename to 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 8b618c0..3fc1c9c 100644 --- a/kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,7 +1,7 @@ -From 9d2700ed836cdfabbd031da74cd5a3d01c9127dd Mon Sep 17 00:00:00 2001 +From 0486a5468aee70d74e8ad65f3b2e5fa16cacba56 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 -Subject: [PATCH 1/4] qcow2: Deprecation warning when opening v2 images rw +Subject: qcow2: Deprecation warning when opening v2 images rw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @@ -31,20 +31,27 @@ but it's what is available and the same function qcow2_do_open() already uses it this way for another warning. Signed-off-by: Kevin Wolf + +patch_name: kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +present_in_specfile: true +location_in_specfile: 116 +--- +Rebase notes (6.1.0): +- Replace bs->read_only with bdrv_is_read_only --- block/qcow2.c | 6 ++++++ tests/qemu-iotests/common.filter | 1 + 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 9727ae8fe3..7c061bf315 100644 +index 9f1b6461c8..4c303e6666 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, ret = -ENOTSUP; goto fail; } -+ if (header.version < 3 && !bs->read_only && bdrv_uses_whitelist()) { ++ if (header.version < 3 && !bdrv_is_read_only(bs) && bdrv_uses_whitelist()) { + warn_report_once("qcow2 v2 images are deprecated and may not be " + "supported in future versions. Please consider " + "upgrading the image with 'qemu-img amend " @@ -54,7 +61,7 @@ index 9727ae8fe3..7c061bf315 100644 s->qcow_version = header.version; diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index 268b749e2f..1f243a8bc1 100644 +index 2b2b53946c..c5c2dc39bd 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -82,6 +82,7 @@ _filter_qemu() diff --git a/README.rst b/README.rst deleted file mode 100644 index 4ca14c6..0000000 --- a/README.rst +++ /dev/null @@ -1,18 +0,0 @@ -=================== -qemu-kvm development -=================== - -qemu-kvm is maintained in a `source tree`_ rather than directly in dist-git -using packit service that provides way to develope using regular source code -structure and provides way to generate SRPM and build using koji service. - -Developers deliver all changes to source-git using merge request. Only maintainers -will be pushing changes sent to source-git to dist-git. - -Each release in dist-git is tagged in the source repository so you can easily -check out the source tree for a build. The tags are in the format -name-version-release, but note release doesn't contain the dist tag since the -source can be built in different build roots (Fedora, CentOS, etc.) - -.. _source tree: https://gitlab.com/redhat/centos-stream/src/qemu-kvm - diff --git a/kvm-Disable-TPM-passthrough.patch b/kvm-Disable-TPM-passthrough.patch deleted file mode 100644 index b0cc0b7..0000000 --- a/kvm-Disable-TPM-passthrough.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 651798d03827dda7eb8dc33fb3482f872ec81d16 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 5 Jul 2021 15:23:48 +0400 -Subject: [PATCH 1/2] Disable TPM passthrough -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -RH-MergeRequest: 17: Disable TPM passthrough -RH-Commit: [1/1] 735b79065149b968350b3f14d763030d5ef66457 -RH-Bugzilla: 1978911 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Eric Auger -RH-Acked-by: Daniel P. Berrangé - -Signed-off-by: Marc-André Lureau -Signed-off-by: Miroslav Rezanina ---- - default-configs/devices/ppc64-rh-devices.mak | 1 - - default-configs/devices/x86_64-rh-devices.mak | 1 - - 2 files changed, 2 deletions(-) - -diff --git a/default-configs/devices/ppc64-rh-devices.mak b/default-configs/devices/ppc64-rh-devices.mak -index 3ec5603ace..d6e424540e 100644 ---- a/default-configs/devices/ppc64-rh-devices.mak -+++ b/default-configs/devices/ppc64-rh-devices.mak -@@ -32,4 +32,3 @@ CONFIG_XICS=y - CONFIG_XIVE=y - CONFIG_TPM_SPAPR=y - CONFIG_TPM_EMULATOR=y --CONFIG_TPM_PASSTHROUGH=y -diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak -index 81bda09f4c..c2dd112f81 100644 ---- a/default-configs/devices/x86_64-rh-devices.mak -+++ b/default-configs/devices/x86_64-rh-devices.mak -@@ -98,4 +98,3 @@ CONFIG_XIO3130=y - CONFIG_TPM_CRB=y - CONFIG_TPM_TIS_ISA=y - CONFIG_TPM_EMULATOR=y --CONFIG_TPM_PASSTHROUGH=y --- -2.27.0 - diff --git a/kvm-Remove-RHEL-7.0.0-machine-type.patch b/kvm-Remove-RHEL-7.0.0-machine-type.patch deleted file mode 100644 index 0e0ca56..0000000 --- a/kvm-Remove-RHEL-7.0.0-machine-type.patch +++ /dev/null @@ -1,197 +0,0 @@ -From 4fa7a78aa900988816d07034943be384d57117f0 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Aug 2021 10:24:56 +0200 -Subject: [PATCH 03/39] Remove RHEL 7.0.0 machine type - -RH-Author: quintela1 -RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types -RH-Commit: [1/6] f182af372d3c38c3c1960af0d5cd37aba7205848 (juan.quintela/qemu-kvm) -RH-Bugzilla: 1968519 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina - -Everything is used elsewhere except for cve_2014_5263 fields. Remove those. - -Signed-off-by: Juan Quintela -Signed-off-by: Miroslav Rezanina ---- - hw/i386/pc.c | 35 ----------------------------------- - hw/i386/pc_piix.c | 34 ---------------------------------- - hw/usb/hcd-xhci.c | 20 -------------------- - hw/usb/hcd-xhci.h | 2 -- - include/hw/i386/pc.h | 3 --- - include/hw/usb.h | 3 --- - 6 files changed, 97 deletions(-) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index cdbfa84d2e..516ca50353 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -598,41 +598,6 @@ const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); - * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. - */ - --/* -- * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* -- * between our base and 1.5, less stuff backported to RHEL-7.0 -- * (usb-device.msos-desc), less stuff for devices we changed -- * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, -- * pci-serial-4x) in 7.0. -- */ --GlobalProperty pc_rhel_7_0_compat[] = { -- { "virtio-scsi-pci", "any_layout", "off" }, -- { "PIIX4_PM", "memory-hotplug-support", "off" }, -- { "apic", "version", stringify(0x11) }, -- { "nec-usb-xhci", "superspeed-ports-first", "off" }, -- { "nec-usb-xhci", "force-pcie-endcap", "on" }, -- { "pci-serial", "prog_if", stringify(0) }, -- { "virtio-net-pci", "guest_announce", "off" }, -- { "ICH9-LPC", "memory-hotplug-support", "off" }, -- { "xio3130-downstream", COMPAT_PROP_PCP, "off" }, -- { "ioh3420", COMPAT_PROP_PCP, "off" }, -- { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, -- { "e1000", "mitigation", "off" }, -- { "virtio-net-pci", "ctrl_guest_offloads", "off" }, -- { "Conroe" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Penryn" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Nehalem" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Nehalem-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Westmere" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Westmere-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Opteron_G1" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Opteron_G2" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Opteron_G3" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Opteron_G4" "-" TYPE_X86_CPU, "x2apic", "on" }, -- { "Opteron_G5" "-" TYPE_X86_CPU, "x2apic", "on" }, --}; --const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); -- - GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) - { - GSIState *s; -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 5d61c9b833..b4e8034671 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1116,37 +1116,3 @@ static void pc_machine_rhel710_options(MachineClass *m) - - DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, - pc_machine_rhel710_options); -- --static void pc_compat_rhel700(MachineState *machine) --{ -- PCMachineState *pcms = PC_MACHINE(machine); -- PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); -- -- pc_compat_rhel710(machine); -- -- /* Upstream enables it for everyone, we're a little more selective */ -- x86_cpu_change_kvm_default("x2apic", NULL); -- x86_cpu_change_kvm_default("svm", NULL); -- pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ -- pcmc->smbios_legacy_mode = true; -- pcmc->has_reserved_memory = false; -- migrate_cve_2014_5263_xhci_fields = true; --} -- --static void pc_init_rhel700(MachineState *machine) --{ -- pc_compat_rhel700(machine); -- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -- TYPE_I440FX_PCI_DEVICE); --} -- --static void pc_machine_rhel700_options(MachineClass *m) --{ -- pc_machine_rhel710_options(m); -- m->family = "pc_piix_Y"; -- m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; -- compat_props_add(m->compat_props, pc_rhel_7_0_compat, pc_rhel_7_0_compat_len); --} -- --DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, -- pc_machine_rhel700_options); -diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c -index 6d1f278aad..46212b1e69 100644 ---- a/hw/usb/hcd-xhci.c -+++ b/hw/usb/hcd-xhci.c -@@ -3490,27 +3490,9 @@ static const VMStateDescription vmstate_xhci_slot = { - } - }; - --static int xhci_event_pre_save(void *opaque) --{ -- XHCIEvent *s = opaque; -- -- s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; -- s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; -- -- return 0; --} -- --bool migrate_cve_2014_5263_xhci_fields; -- --static bool xhci_event_cve_2014_5263(void *opaque, int version_id) --{ -- return migrate_cve_2014_5263_xhci_fields; --} -- - static const VMStateDescription vmstate_xhci_event = { - .name = "xhci-event", - .version_id = 1, -- .pre_save = xhci_event_pre_save, - .fields = (VMStateField[]) { - VMSTATE_UINT32(type, XHCIEvent), - VMSTATE_UINT32(ccode, XHCIEvent), -@@ -3519,8 +3501,6 @@ static const VMStateDescription vmstate_xhci_event = { - VMSTATE_UINT32(flags, XHCIEvent), - VMSTATE_UINT8(slotid, XHCIEvent), - VMSTATE_UINT8(epid, XHCIEvent), -- VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), -- VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), - VMSTATE_END_OF_LIST() - } - }; -diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h -index f450ffd13b..7bba361f3b 100644 ---- a/hw/usb/hcd-xhci.h -+++ b/hw/usb/hcd-xhci.h -@@ -149,8 +149,6 @@ typedef struct XHCIEvent { - uint32_t flags; - uint8_t slotid; - uint8_t epid; -- uint8_t cve_2014_5263_a; -- uint8_t cve_2014_5263_b; - } XHCIEvent; - - typedef struct XHCIInterrupter { -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 1980c93f41..2d17daf71f 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -314,9 +314,6 @@ extern const size_t pc_rhel_7_2_compat_len; - extern GlobalProperty pc_rhel_7_1_compat[]; - extern const size_t pc_rhel_7_1_compat_len; - --extern GlobalProperty pc_rhel_7_0_compat[]; --extern const size_t pc_rhel_7_0_compat_len; -- - /* Helper for setting model-id for CPU models that changed model-id - * depending on QEMU versions up to QEMU 2.4. - */ -diff --git a/include/hw/usb.h b/include/hw/usb.h -index edb2cd94b6..436e07b304 100644 ---- a/include/hw/usb.h -+++ b/include/hw/usb.h -@@ -577,7 +577,4 @@ void usb_pcap_init(FILE *fp); - void usb_pcap_ctrl(USBPacket *p, bool setup); - void usb_pcap_data(USBPacket *p, bool setup); - --/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ --extern bool migrate_cve_2014_5263_xhci_fields; -- - #endif --- -2.27.0 - diff --git a/kvm-Remove-RHEL-7.1.0-machine-type.patch b/kvm-Remove-RHEL-7.1.0-machine-type.patch deleted file mode 100644 index d3552f5..0000000 --- a/kvm-Remove-RHEL-7.1.0-machine-type.patch +++ /dev/null @@ -1,292 +0,0 @@ -From 224b28d337f36a937b34685519d7b657974a95d0 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Aug 2021 10:46:56 +0200 -Subject: [PATCH 04/39] Remove RHEL 7.1.0 machine type - -RH-Author: quintela1 -RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types -RH-Commit: [2/6] af1d8896d63756d1acc5af8658465808c4a13ca3 (juan.quintela/qemu-kvm) -RH-Bugzilla: 1968519 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina - -migrate_pre_2_2 was a redhat artifact, removed. -broken_reserved_end is already fixed, nothing to do. - -Signed-off-by: Juan Quintela -Signed-off-by: Miroslav Rezanina ---- - hw/char/serial.c | 12 ---------- - hw/core/machine.c | 23 ------------------- - hw/i386/pc.c | 51 ------------------------------------------- - hw/i386/pc_piix.c | 41 ---------------------------------- - hw/rtc/mc146818rtc.c | 4 ---- - include/hw/boards.h | 3 --- - include/hw/i386/pc.h | 3 --- - migration/migration.c | 2 -- - migration/migration.h | 5 ----- - 9 files changed, 144 deletions(-) - -diff --git a/hw/char/serial.c b/hw/char/serial.c -index cc378142a3..3e4344cb2a 100644 ---- a/hw/char/serial.c -+++ b/hw/char/serial.c -@@ -690,9 +690,6 @@ static int serial_post_load(void *opaque, int version_id) - static bool serial_thr_ipending_needed(void *opaque) - { - SerialState *s = opaque; -- if (migrate_pre_2_2) { -- return false; -- } - - if (s->ier & UART_IER_THRI) { - bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); -@@ -774,9 +771,6 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { - static bool serial_fifo_timeout_timer_needed(void *opaque) - { - SerialState *s = (SerialState *)opaque; -- if (migrate_pre_2_2) { -- return false; -- } - - return timer_pending(s->fifo_timeout_timer); - } -@@ -795,9 +789,6 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { - static bool serial_timeout_ipending_needed(void *opaque) - { - SerialState *s = (SerialState *)opaque; -- if (migrate_pre_2_2) { -- return false; -- } - - return s->timeout_ipending != 0; - } -@@ -816,9 +807,6 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { - static bool serial_poll_needed(void *opaque) - { - SerialState *s = (SerialState *)opaque; -- if (migrate_pre_2_2) { -- return false; -- } - - return s->poll_msl >= 0; - } -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 6c534e14fa..cc7c826593 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -270,29 +270,6 @@ GlobalProperty hw_compat_rhel_7_2[] = { - }; - const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); - --/* Mostly like hw_compat_2_1 but: -- * we don't need virtio-scsi-pci since 7.0 already had that on -- * -- * RH: Note, qemu-extended-regs should have been enabled in the 7.1 -- * machine type, but was accidentally turned off in 7.2 onwards. -- */ --GlobalProperty hw_compat_rhel_7_1[] = { -- { "intel-hda-generic", "old_msi_addr", "on" }, -- { "VGA", "qemu-extended-regs", "off" }, -- { "secondary-vga", "qemu-extended-regs", "off" }, -- { "usb-mouse", "usb_version", stringify(1) }, -- { "usb-kbd", "usb_version", stringify(1) }, -- { "virtio-pci", "virtio-pci-bus-master-bug-migration", "on" }, -- { "virtio-blk-pci", "any_layout", "off" }, -- { "virtio-balloon-pci", "any_layout", "off" }, -- { "virtio-serial-pci", "any_layout", "off" }, -- { "virtio-9p-pci", "any_layout", "off" }, -- { "virtio-rng-pci", "any_layout", "off" }, -- /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ -- { "migration", "send-configuration", "off" }, --}; --const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); -- - GlobalProperty hw_compat_5_2[] = { - { "ICH9-LPC", "smm-compat", "on"}, - { "PIIX4_PM", "smm-compat", "on"}, -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 516ca50353..3c1f5be4fa 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -541,57 +541,6 @@ GlobalProperty pc_rhel_7_2_compat[] = { - }; - const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); - --GlobalProperty pc_rhel_7_1_compat[] = { -- { "kvm64" "-" TYPE_X86_CPU, "vme", "off" }, -- { "kvm32" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Conroe" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Penryn" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Nehalem-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Westmere" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Westmere-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -- { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" }, -- { "SandyBridge-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Haswell" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Haswell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Broadwell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" }, -- { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" }, -- { "Haswell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, -- { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" }, -- { "Haswell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, -- { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" }, -- { "Broadwell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, -- { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" }, -- { "Broadwell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, -- { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" }, -- { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" }, -- { "qemu64" "-" TYPE_X86_CPU, "min-level", stringify(4) }, -- { "kvm64" "-" TYPE_X86_CPU, "min-level", stringify(5) }, -- { "pentium3" "-" TYPE_X86_CPU, "min-level", stringify(2) }, -- { "n270" "-" TYPE_X86_CPU, "min-level", stringify(5) }, -- { "Conroe" "-" TYPE_X86_CPU, "min-level", stringify(4) }, -- { "Penryn" "-" TYPE_X86_CPU, "min-level", stringify(4) }, -- { "Nehalem" "-" TYPE_X86_CPU, "min-level", stringify(4) }, -- { "n270" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, -- { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, --}; --const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); -- - /* - * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine - * types as the PC_COMPAT_* do for upstream types. -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index b4e8034671..2a6a28f1e5 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1075,44 +1075,3 @@ static void pc_machine_rhel720_options(MachineClass *m) - - DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, - pc_machine_rhel720_options); -- --static void pc_compat_rhel710(MachineState *machine) --{ -- PCMachineState *pcms = PC_MACHINE(machine); -- PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); -- -- /* From pc_compat_2_2 */ -- pcmc->rsdp_in_ram = false; -- machine->suppress_vmdesc = true; -- -- /* From pc_compat_2_1 */ -- pcmc->smbios_uuid_encoded = false; -- x86_cpu_change_kvm_default("svm", NULL); -- pcmc->enforce_aligned_dimm = false; -- -- /* Disable all the extra subsections that were added in 2.2 */ -- migrate_pre_2_2 = true; -- -- /* From pc_i440fx_2_4_machine_options */ -- pcmc->broken_reserved_end = true; --} -- --static void pc_init_rhel710(MachineState *machine) --{ -- pc_compat_rhel710(machine); -- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -- TYPE_I440FX_PCI_DEVICE); --} -- --static void pc_machine_rhel710_options(MachineClass *m) --{ -- pc_machine_rhel720_options(m); -- m->family = "pc_piix_Y"; -- m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; -- m->default_display = "cirrus"; -- compat_props_add(m->compat_props, hw_compat_rhel_7_1, hw_compat_rhel_7_1_len); -- compat_props_add(m->compat_props, pc_rhel_7_1_compat, pc_rhel_7_1_compat_len); --} -- --DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, -- pc_machine_rhel710_options); -diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 4a2e52031b..3a70a988e1 100644 ---- a/hw/rtc/mc146818rtc.c -+++ b/hw/rtc/mc146818rtc.c -@@ -824,10 +824,6 @@ static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) - { - RTCState *s = (RTCState *)opaque; - -- if (migrate_pre_2_2) { -- return false; -- } -- - return s->irq_reinject_on_ack_count != 0; - } - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 90ae100bfc..070db0b9b1 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -445,7 +445,4 @@ extern const size_t hw_compat_rhel_7_3_len; - extern GlobalProperty hw_compat_rhel_7_2[]; - extern const size_t hw_compat_rhel_7_2_len; - --extern GlobalProperty hw_compat_rhel_7_1[]; --extern const size_t hw_compat_rhel_7_1_len; -- - #endif -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 2d17daf71f..84d985f57c 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -311,9 +311,6 @@ extern const size_t pc_rhel_7_3_compat_len; - extern GlobalProperty pc_rhel_7_2_compat[]; - extern const size_t pc_rhel_7_2_compat_len; - --extern GlobalProperty pc_rhel_7_1_compat[]; --extern const size_t pc_rhel_7_1_compat_len; -- - /* Helper for setting model-id for CPU models that changed model-id - * depending on QEMU versions up to QEMU 2.4. - */ -diff --git a/migration/migration.c b/migration/migration.c -index 9d185f0e28..9cf1cde39d 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -167,8 +167,6 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, - MIGRATION_CAPABILITY_X_COLO, - MIGRATION_CAPABILITY_VALIDATE_UUID); - --bool migrate_pre_2_2; -- - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add - dynamic creation of migration */ -diff --git a/migration/migration.h b/migration/migration.h -index 1b6c69751c..db6708326b 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -368,11 +368,6 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, - void migrate_add_address(SocketAddress *address); - - int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); --/* -- * Disables a load of subsections that were added in 2.2/rh7.2 for backwards -- * migration compatibility. -- */ --extern bool migrate_pre_2_2; - - #define qemu_ram_foreach_block \ - #warning "Use foreach_not_ignored_block in migration code" --- -2.27.0 - diff --git a/kvm-Remove-RHEL-7.2.0-machine-type.patch b/kvm-Remove-RHEL-7.2.0-machine-type.patch deleted file mode 100644 index d014be6..0000000 --- a/kvm-Remove-RHEL-7.2.0-machine-type.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 999b28683394c2939ca8bd6b692ed2169860ced9 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Aug 2021 11:02:24 +0200 -Subject: [PATCH 05/39] Remove RHEL 7.2.0 machine type - -RH-Author: quintela1 -RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types -RH-Commit: [3/6] 575f0fe16c1928a41628f1f704a4d5d370679a82 (juan.quintela/qemu-kvm) -RH-Bugzilla: 1968519 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina - -Signed-off-by: Juan Quintela -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 24 ------------------------ - hw/i386/pc.c | 22 ---------------------- - hw/i386/pc_piix.c | 26 -------------------------- - hw/ppc/spapr.c | 26 -------------------------- - include/hw/boards.h | 3 --- - include/hw/i386/pc.h | 3 --- - 6 files changed, 104 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index cc7c826593..54eb8376a7 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -246,30 +246,6 @@ GlobalProperty hw_compat_rhel_7_3[] = { - }; - const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); - --/* Mostly like hw_compat_2_4 + 2_3 but: -- * we don't need "any_layout" as it has been backported to 7.2 -- */ --GlobalProperty hw_compat_rhel_7_2[] = { -- { "virtio-blk-device", "scsi", "true" }, -- { "e1000-82540em", "extra_mac_registers", "off" }, -- { "virtio-pci", "x-disable-pcie", "on" }, -- { "virtio-pci", "migrate-extra", "off" }, -- { "fw_cfg_mem", "dma_enabled", "off" }, -- { "fw_cfg_io", "dma_enabled", "off" }, -- { "isa-fdc", "fallback", "144" }, -- /* Optional because not all virtio-pci devices support legacy mode */ -- { "virtio-pci", "disable-modern", "on", .optional = true }, -- { "virtio-pci", "disable-legacy", "off", .optional = true }, -- { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, -- { "virtio-pci", "page-per-vq", "on" }, -- /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ -- { "migration", "send-section-footer", "off" }, -- /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ -- { "migration", "store-global-state", "off", -- }, --}; --const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); -- - GlobalProperty hw_compat_5_2[] = { - { "ICH9-LPC", "smm-compat", "on"}, - { "PIIX4_PM", "smm-compat", "on"}, -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 3c1f5be4fa..938cb82818 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -519,28 +519,6 @@ GlobalProperty pc_rhel_7_3_compat[] = { - }; - const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); - --GlobalProperty pc_rhel_7_2_compat[] = { -- { "phenom" "-" TYPE_X86_CPU, "rdtscp", "off"}, -- { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, -- { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, -- { "Haswell-" TYPE_X86_CPU, "abm", "off" }, -- { "Haswell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, -- { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, -- { "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, -- { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, -- { "Broadwell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, -- { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, -- { "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, -- { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, -- { TYPE_X86_CPU, "check", "off" }, -- { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, -- { TYPE_X86_CPU, "arat", "off" }, -- { "usb-redir", "streams", "off" }, -- { TYPE_X86_CPU, "fill-mtrr-mask", "off" }, -- { "apic-common", "legacy-instance-id", "on" }, --}; --const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); -- - /* - * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine - * types as the PC_COMPAT_* do for upstream types. -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 2a6a28f1e5..201cbbdb01 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1049,29 +1049,3 @@ static void pc_machine_rhel730_options(MachineClass *m) - - DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, - pc_machine_rhel730_options); -- -- --static void pc_init_rhel720(MachineState *machine) --{ -- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -- TYPE_I440FX_PCI_DEVICE); --} -- --static void pc_machine_rhel720_options(MachineClass *m) --{ -- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- X86MachineClass *x86mc = X86_MACHINE_CLASS(m); -- pc_machine_rhel730_options(m); -- m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; -- /* From pc_i440fx_2_5_machine_options */ -- x86mc->save_tsc_khz = false; -- m->legacy_fw_cfg_order = 1; -- /* Note: broken_reserved_end was already in 7.2 */ -- /* From pc_i440fx_2_6_machine_options */ -- pcmc->legacy_cpu_hotplug = true; -- compat_props_add(m->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); -- compat_props_add(m->compat_props, pc_rhel_7_2_compat, pc_rhel_7_2_compat_len); --} -- --DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, -- pc_machine_rhel720_options); -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 11db32c537..0bc558e0b7 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -5397,32 +5397,6 @@ static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) - - DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); - --/* -- * pseries-rhel7.2.0 -- */ --/* Should be like spapr_compat_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" -- * has been backported to RHEL7_2 so we don't need it here. -- */ -- --GlobalProperty spapr_compat_rhel7_2[] = { -- { "spapr-vlan", "use-rx-buffer-pools", "off" }, -- { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, --}; --const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); -- --static void spapr_machine_rhel720_class_options(MachineClass *mc) --{ -- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -- -- spapr_machine_rhel730_class_options(mc); -- smc->use_ohci_by_default = true; -- mc->has_hotpluggable_cpus = NULL; -- compat_props_add(mc->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); -- compat_props_add(mc->compat_props, spapr_compat_rhel7_2, spapr_compat_rhel7_2_len); --} -- --DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); -- - static void spapr_machine_register_types(void) - { - type_register_static(&spapr_machine_info); -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 070db0b9b1..43eb868ceb 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -442,7 +442,4 @@ extern const size_t hw_compat_rhel_7_4_len; - extern GlobalProperty hw_compat_rhel_7_3[]; - extern const size_t hw_compat_rhel_7_3_len; - --extern GlobalProperty hw_compat_rhel_7_2[]; --extern const size_t hw_compat_rhel_7_2_len; -- - #endif -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 84d985f57c..c26c6dcc72 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -308,9 +308,6 @@ extern const size_t pc_rhel_7_4_compat_len; - extern GlobalProperty pc_rhel_7_3_compat[]; - extern const size_t pc_rhel_7_3_compat_len; - --extern GlobalProperty pc_rhel_7_2_compat[]; --extern const size_t pc_rhel_7_2_compat_len; -- - /* Helper for setting model-id for CPU models that changed model-id - * depending on QEMU versions up to QEMU 2.4. - */ --- -2.27.0 - diff --git a/kvm-Remove-RHEL-7.3.0-machine-types.patch b/kvm-Remove-RHEL-7.3.0-machine-types.patch deleted file mode 100644 index dbf9b49..0000000 --- a/kvm-Remove-RHEL-7.3.0-machine-types.patch +++ /dev/null @@ -1,315 +0,0 @@ -From 1610bd2ce98e3d93296fd3bc4d2c24e905428a4a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Aug 2021 11:24:26 +0200 -Subject: [PATCH 06/39] Remove RHEL 7.3.0 machine types - -RH-Author: quintela1 -RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types -RH-Commit: [4/6] 7a674496dd29a7a87843770fd0459b85831cc866 (juan.quintela/qemu-kvm) -RH-Bugzilla: 1968519 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina - -Only thing to remove with care was __redhat_e1000e_7_3_intr_state. - -Signed-off-by: Juan Quintela -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 22 --------------------- - hw/i386/pc.c | 28 --------------------------- - hw/i386/pc_piix.c | 19 ------------------ - hw/i386/pc_q35.c | 19 ------------------ - hw/net/e1000e.c | 22 --------------------- - hw/ppc/spapr.c | 46 -------------------------------------------- - include/hw/boards.h | 3 --- - include/hw/i386/pc.h | 3 --- - 8 files changed, 162 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 54eb8376a7..0bcaabd8a9 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -224,28 +224,6 @@ GlobalProperty hw_compat_rhel_7_4[] = { - }; - - const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); --/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except -- * disable-modern, disable-legacy, page-per-vq have already been -- * backported to RHEL7.3 -- */ --GlobalProperty hw_compat_rhel_7_3[] = { -- { "virtio-mmio", "format_transport_address", "off" }, -- { "virtio-serial-device", "emergency-write", "off" }, -- { "ioapic", "version", "0x11" }, -- { "intel-iommu", "x-buggy-eim", "true" }, -- { "virtio-pci", "x-ignore-backend-features", "on" }, -- { "fw_cfg_mem", "x-file-slots", stringify(0x10) }, -- { "fw_cfg_io", "x-file-slots", stringify(0x10) }, -- { "pflash_cfi01", "old-multiple-chip-handling", "on" }, -- { TYPE_PCI_DEVICE, "x-pcie-extcap-init", "off" }, -- { "virtio-pci", "x-pcie-deverr-init", "off" }, -- { "virtio-pci", "x-pcie-lnkctl-init", "off" }, -- { "virtio-pci", "x-pcie-pm-init", "off" }, -- { "virtio-net-device", "x-mtu-bypass-backend", "off" }, -- { "e1000e", "__redhat_e1000e_7_3_intr_state", "on" }, --}; --const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); -- - GlobalProperty hw_compat_5_2[] = { - { "ICH9-LPC", "smm-compat", "on"}, - { "PIIX4_PM", "smm-compat", "on"}, -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 938cb82818..75abe0acc2 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -491,34 +491,6 @@ GlobalProperty pc_rhel_7_4_compat[] = { - }; - const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); - --GlobalProperty pc_rhel_7_3_compat[] = { -- /* pc_rhel_7_3_compat from pc_compat_2_8 */ -- { "kvmclock", "x-mach-use-reliable-get-clock", "off" }, -- /* pc_rhel_7_3_compat from pc_compat_2_7 */ -- { TYPE_X86_CPU, "l3-cache", "off" }, -- /* pc_rhel_7_3_compat from pc_compat_2_7 */ -- { TYPE_X86_CPU, "full-cpuid-auto-level", "off" }, -- /* pc_rhel_7_3_compat from pc_compat_2_7 */ -- { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" }, -- /* pc_rhel_7_3_compat from pc_compat_2_7 */ -- { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" }, -- /* pc_rhel_7_3_compat from pc_compat_2_7 */ -- { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" }, -- /* pc_rhel_7_3_compat from pc_compat_2_7 */ -- { "isa-pcspk", "migrate", "off" }, -- /* pc_rhel_7_3_compat from pc_compat_2_6 */ -- { TYPE_X86_CPU, "cpuid-0xb", "off" }, -- /* pc_rhel_7_3_compat from pc_compat_2_8 */ -- { "ICH9-LPC", "x-smi-broadcast", "off" }, -- /* pc_rhel_7_3_compat from pc_compat_2_8 */ -- { TYPE_X86_CPU, "vmware-cpuid-freq", "off" }, -- /* pc_rhel_7_3_compat from pc_compat_2_8 */ -- { "Haswell-" TYPE_X86_CPU, "stepping", "1" }, -- /* pc_rhel_7_3_compat from pc_compat_2_3 added in 2.9*/ -- { TYPE_X86_CPU, "kvm-no-smi-migration", "on" }, --}; --const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); -- - /* - * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine - * types as the PC_COMPAT_* do for upstream types. -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 201cbbdb01..64662cc3d5 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1030,22 +1030,3 @@ static void pc_machine_rhel740_options(MachineClass *m) - - DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, - pc_machine_rhel740_options); -- --static void pc_init_rhel730(MachineState *machine) --{ -- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -- TYPE_I440FX_PCI_DEVICE); --} -- --static void pc_machine_rhel730_options(MachineClass *m) --{ -- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_machine_rhel740_options(m); -- m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; -- pcmc->linuxboot_dma_enabled = false; -- compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); -- compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); --} -- --DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, -- pc_machine_rhel730_options); -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 01ff3e0544..bf49a943dc 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -792,22 +792,3 @@ static void pc_q35_machine_rhel740_options(MachineClass *m) - - DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, - pc_q35_machine_rhel740_options); -- --static void pc_q35_init_rhel730(MachineState *machine) --{ -- pc_q35_init(machine); --} -- --static void pc_q35_machine_rhel730_options(MachineClass *m) --{ -- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel740_options(m); -- m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; -- m->max_cpus = 255; -- pcmc->linuxboot_dma_enabled = false; -- compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); -- compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); --} -- --DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, -- pc_q35_machine_rhel730_options); -diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c -index 6d39c1f1c4..b22f232863 100644 ---- a/hw/net/e1000e.c -+++ b/hw/net/e1000e.c -@@ -79,12 +79,6 @@ struct E1000EState { - bool disable_vnet; - - E1000ECore core; -- -- /* 7.3 had the intr_state field that was in the original e1000e code -- * but that was removed prior to 2.7's release -- */ -- bool redhat_7_3_intr_state_enable; -- uint32_t redhat_7_3_intr_state; - }; - - #define E1000E_MMIO_IDX 0 -@@ -100,10 +94,6 @@ struct E1000EState { - #define E1000E_MSIX_TABLE (0x0000) - #define E1000E_MSIX_PBA (0x2000) - --/* Values as in RHEL 7.3 build and original upstream */ --#define RH_E1000E_USE_MSI BIT(0) --#define RH_E1000E_USE_MSIX BIT(1) -- - static uint64_t - e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) - { -@@ -315,8 +305,6 @@ e1000e_init_msix(E1000EState *s) - } else { - if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { - msix_uninit(d, &s->msix, &s->msix); -- } else { -- s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; - } - } - } -@@ -488,8 +476,6 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) - ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); - if (ret) { - trace_e1000e_msi_init_fail(ret); -- } else { -- s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; - } - - if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, -@@ -613,11 +599,6 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { - VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ - e1000e_vmstate_intr_timer, E1000IntrDelayTimer) - --static bool rhel_7_3_check(void *opaque, int version_id) --{ -- return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; --} -- - static const VMStateDescription e1000e_vmstate = { - .name = "e1000e", - .version_id = 1, -@@ -629,7 +610,6 @@ static const VMStateDescription e1000e_vmstate = { - VMSTATE_MSIX(parent_obj, E1000EState), - - VMSTATE_UINT32(ioaddr, E1000EState), -- VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), - VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), - VMSTATE_UINT8(core.rx_desc_len, E1000EState), - VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, -@@ -678,8 +658,6 @@ static PropertyInfo e1000e_prop_disable_vnet, - - static Property e1000e_properties[] = { - DEFINE_NIC_PROPERTIES(E1000EState, conf), -- DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, -- redhat_7_3_intr_state_enable, false), - DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, - e1000e_prop_disable_vnet, bool), - DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 0bc558e0b7..ca0b99403e 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -5351,52 +5351,6 @@ static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) - - DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); - --/* -- * pseries-rhel7.3.0 -- * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 -- */ --GlobalProperty spapr_compat_rhel7_3[] = { -- { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000" }, -- { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0" }, -- { TYPE_POWERPC_CPU, "pre-2.8-migration", "on" }, -- { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on" }, -- { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, --}; --const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); -- --static void spapr_machine_rhel730_class_options(MachineClass *mc) --{ -- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -- -- spapr_machine_rhel740_class_options(mc); -- mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); -- mc->default_machine_opts = "modern-hotplug-events=off"; -- compat_props_add(mc->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); -- compat_props_add(mc->compat_props, spapr_compat_rhel7_3, spapr_compat_rhel7_3_len); -- -- smc->phb_placement = phb_placement_2_7; --} -- --DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); -- --/* -- * pseries-rhel7.3.0-sxxm -- * -- * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default -- */ -- --static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) --{ -- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -- -- spapr_machine_rhel730_class_options(mc); -- smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -- smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -- smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; --} -- --DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); -- - static void spapr_machine_register_types(void) - { - type_register_static(&spapr_machine_info); -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 43eb868ceb..24d4d726d8 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -439,7 +439,4 @@ extern const size_t hw_compat_rhel_7_5_len; - extern GlobalProperty hw_compat_rhel_7_4[]; - extern const size_t hw_compat_rhel_7_4_len; - --extern GlobalProperty hw_compat_rhel_7_3[]; --extern const size_t hw_compat_rhel_7_3_len; -- - #endif -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index c26c6dcc72..820fb3f577 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -305,9 +305,6 @@ extern const size_t pc_rhel_7_5_compat_len; - extern GlobalProperty pc_rhel_7_4_compat[]; - extern const size_t pc_rhel_7_4_compat_len; - --extern GlobalProperty pc_rhel_7_3_compat[]; --extern const size_t pc_rhel_7_3_compat_len; -- - /* Helper for setting model-id for CPU models that changed model-id - * depending on QEMU versions up to QEMU 2.4. - */ --- -2.27.0 - diff --git a/kvm-Remove-RHEL-7.4.0-machine-types.patch b/kvm-Remove-RHEL-7.4.0-machine-types.patch deleted file mode 100644 index 9484fc1..0000000 --- a/kvm-Remove-RHEL-7.4.0-machine-types.patch +++ /dev/null @@ -1,301 +0,0 @@ -From b64f8848efe610c52791587ec41301b73ec9165a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Aug 2021 11:53:07 +0200 -Subject: [PATCH 07/39] Remove RHEL 7.4.0 machine types - -RH-Author: quintela1 -RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types -RH-Commit: [5/6] a1940ac35591cebff52379f85656caf6a03328dd (juan.quintela/qemu-kvm) -RH-Bugzilla: 1968519 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina - -Revert pc_rom_ro hack. -Remove force_rev1_fadt hack, it is not needed anymore. - -Signed-off-by: Juan Quintela -Signed-off-by: Miroslav Rezanina ---- - hw/acpi/ich9.c | 15 --------------- - hw/core/machine.c | 9 --------- - hw/i386/acpi-build.c | 3 --- - hw/i386/pc.c | 18 +----------------- - hw/i386/pc_piix.c | 19 ------------------- - hw/i386/pc_q35.c | 18 ------------------ - hw/ppc/spapr.c | 43 ------------------------------------------ - include/hw/acpi/ich9.h | 3 --- - include/hw/boards.h | 3 --- - include/hw/i386/pc.h | 6 ------ - 10 files changed, 1 insertion(+), 136 deletions(-) - -diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 33b0c6e33c..7f01fad64c 100644 ---- a/hw/acpi/ich9.c -+++ b/hw/acpi/ich9.c -@@ -369,18 +369,6 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) - s->pm.enable_tco = value; - } - --static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) --{ -- ICH9LPCState *s = ICH9_LPC_DEVICE(obj); -- return s->pm.force_rev1_fadt; --} -- --static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) --{ -- ICH9LPCState *s = ICH9_LPC_DEVICE(obj); -- s->pm.force_rev1_fadt = value; --} -- - void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) - { - static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; -@@ -403,9 +391,6 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) - object_property_add_bool(obj, "cpu-hotplug-legacy", - ich9_pm_get_cpu_hotplug_legacy, - ich9_pm_set_cpu_hotplug_legacy); -- object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", -- ich9_pm_get_force_rev1_fadt, -- ich9_pm_set_force_rev1_fadt); - object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, - &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); - object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 0bcaabd8a9..6dc2651d73 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -215,15 +215,6 @@ GlobalProperty hw_compat_rhel_7_5[] = { - }; - const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); - --/* Mostly like hw_compat_2_9 except -- * x-mtu-bypass-backend, x-migrate-msix has already been -- * backported to RHEL7.4. shpc was already on in 7.4. -- */ --GlobalProperty hw_compat_rhel_7_4[] = { -- { "intel-iommu", "pt", "off" }, --}; -- --const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); - GlobalProperty hw_compat_5_2[] = { - { "ICH9-LPC", "smm-compat", "on"}, - { "PIIX4_PM", "smm-compat", "on"}, -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index 7bd67f7877..de98750aef 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -231,9 +231,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) - pm->fadt.reset_reg = r; - pm->fadt.reset_val = 0xf; - pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; -- if (object_property_get_bool(lpc, -- "__com.redhat_force-rev1-fadt", NULL)) -- pm->fadt.rev = 1; - pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; - pm->smi_on_cpuhp = - !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 75abe0acc2..6b39abce02 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -477,20 +477,6 @@ GlobalProperty pc_rhel_7_5_compat[] = { - }; - const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); - --GlobalProperty pc_rhel_7_4_compat[] = { -- /* pc_rhel_7_4_compat from pc_compat_2_9 */ -- { "mch", "extended-tseg-mbytes", stringify(0) }, -- /* bz 1489800 */ -- { "ICH9-LPC", "__com.redhat_force-rev1-fadt", "on" }, -- /* pc_rhel_7_4_compat from pc_compat_2_10 */ -- { "i440FX-pcihost", "x-pci-hole64-fix", "off" }, -- /* pc_rhel_7_4_compat from pc_compat_2_10 */ -- { "q35-pcihost", "x-pci-hole64-fix", "off" }, -- /* pc_rhel_7_4_compat from pc_compat_2_10 */ -- { TYPE_X86_CPU, "x-hv-max-vps", "0x40" }, --}; --const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); -- - /* - * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine - * types as the PC_COMPAT_* do for upstream types. -@@ -1094,8 +1080,7 @@ void pc_memory_init(PCMachineState *pcms, - option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); -- /* RH difference: See bz 1489800, explicitly make ROM ro */ -- if (pcmc->pc_rom_ro) { -+ if (pcmc->pci_enabled) { - memory_region_set_readonly(option_rom_mr, true); - } - memory_region_add_subregion_overlap(rom_memory, -@@ -1845,7 +1830,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->pvh_enabled = true; - pcmc->kvmclock_create_always = true; - assert(!mc->get_hotplug_handler); -- pcmc->pc_rom_ro = true; - mc->async_pf_vmexit_disable = false; - mc->get_hotplug_handler = pc_get_hotplug_handler; - mc->hotplug_allowed = pc_hotplug_allowed; -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 64662cc3d5..fe2ac7593a 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1011,22 +1011,3 @@ static void pc_machine_rhel750_options(MachineClass *m) - - DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, - pc_machine_rhel750_options); -- --static void pc_init_rhel740(MachineState *machine) --{ -- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -- TYPE_I440FX_PCI_DEVICE); --} -- --static void pc_machine_rhel740_options(MachineClass *m) --{ -- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_machine_rhel750_options(m); -- m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; -- pcmc->pc_rom_ro = false; -- compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); -- compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); --} -- --DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, -- pc_machine_rhel740_options); -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index bf49a943dc..9958ed9184 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -774,21 +774,3 @@ static void pc_q35_machine_rhel750_options(MachineClass *m) - - DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, - pc_q35_machine_rhel750_options); -- --static void pc_q35_init_rhel740(MachineState *machine) --{ -- pc_q35_init(machine); --} -- --static void pc_q35_machine_rhel740_options(MachineClass *m) --{ -- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel750_options(m); -- m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; -- pcmc->pc_rom_ro = false; -- compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); -- compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); --} -- --DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, -- pc_q35_machine_rhel740_options); -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index ca0b99403e..bdabbf8b81 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -5308,49 +5308,6 @@ static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) - } - - DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); -- --/* -- * pseries-rhel7.4.0 -- * like spapr_compat_2_9 -- */ --GlobalProperty spapr_compat_rhel7_4[] = { -- { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, --}; --const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); -- --static void spapr_machine_rhel740_class_options(MachineClass *mc) --{ -- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -- -- spapr_machine_rhel750_class_options(mc); -- compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); -- compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); -- smc->has_power9_support = false; -- smc->pre_2_10_has_unused_icps = true; -- smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; -- smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; --} -- --DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); -- --/* -- * pseries-rhel7.4.0-sxxm -- * -- * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default -- */ -- --static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) --{ -- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -- -- spapr_machine_rhel740_class_options(mc); -- smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -- smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -- smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; --} -- --DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); -- - static void spapr_machine_register_types(void) - { - type_register_static(&spapr_machine_info); -diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h -index e1ecfbaf1f..df519e40b5 100644 ---- a/include/hw/acpi/ich9.h -+++ b/include/hw/acpi/ich9.h -@@ -62,9 +62,6 @@ typedef struct ICH9LPCPMRegs { - bool smm_compat; - bool enable_tco; - TCOIORegs tco_regs; -- -- /* RH addition, see bz 1489800 */ -- bool force_rev1_fadt; - } ICH9LPCPMRegs; - - #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 24d4d726d8..f27170b6fb 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -436,7 +436,4 @@ extern const size_t hw_compat_rhel_7_6_len; - extern GlobalProperty hw_compat_rhel_7_5[]; - extern const size_t hw_compat_rhel_7_5_len; - --extern GlobalProperty hw_compat_rhel_7_4[]; --extern const size_t hw_compat_rhel_7_4_len; -- - #endif -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 820fb3f577..ff93dfb372 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -128,9 +128,6 @@ struct PCMachineClass { - - /* create kvmclock device even when KVM PV features are not exposed */ - bool kvmclock_create_always; -- -- /* RH only, see bz 1489800 */ -- bool pc_rom_ro; - }; - - #define TYPE_PC_MACHINE "generic-pc-machine" -@@ -302,9 +299,6 @@ extern const size_t pc_rhel_7_6_compat_len; - extern GlobalProperty pc_rhel_7_5_compat[]; - extern const size_t pc_rhel_7_5_compat_len; - --extern GlobalProperty pc_rhel_7_4_compat[]; --extern const size_t pc_rhel_7_4_compat_len; -- - /* Helper for setting model-id for CPU models that changed model-id - * depending on QEMU versions up to QEMU 2.4. - */ --- -2.27.0 - diff --git a/kvm-Remove-RHEL-7.5.0-machine-types.patch b/kvm-Remove-RHEL-7.5.0-machine-types.patch deleted file mode 100644 index ed51c11..0000000 --- a/kvm-Remove-RHEL-7.5.0-machine-types.patch +++ /dev/null @@ -1,210 +0,0 @@ -From 6de2502ef6f2ee68842bed7d09b0a2c7ac57b11b Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 3 Aug 2021 12:24:36 +0200 -Subject: [PATCH 08/39] Remove RHEL 7.5.0 machine types - -RH-Author: quintela1 -RH-MergeRequest: 30: Remove RHEL 7.x.0 machine types -RH-Commit: [6/6] a4a72853fda905fe5036520f6095032e308cb51f (juan.quintela/qemu-kvm) -RH-Bugzilla: 1968519 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina - -Signed-off-by: Juan Quintela -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 20 -------------------- - hw/i386/pc.c | 18 ------------------ - hw/i386/pc_piix.c | 20 -------------------- - hw/i386/pc_q35.c | 20 -------------------- - hw/ppc/spapr.c | 31 ------------------------------- - include/hw/boards.h | 3 --- - include/hw/i386/pc.h | 3 --- - 7 files changed, 115 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 6dc2651d73..b271389681 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -195,26 +195,6 @@ GlobalProperty hw_compat_rhel_7_6[] = { - }; - const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); - --/* The same as hw_compat_2_11 + hw_compat_2_10 */ --GlobalProperty hw_compat_rhel_7_5[] = { -- /* hw_compat_rhel_7_5 from hw_compat_2_11 */ -- { "hpet", "hpet-offset-saved", "false" }, -- /* hw_compat_rhel_7_5 from hw_compat_2_11 */ -- { "virtio-blk-pci", "vectors", "2" }, -- /* hw_compat_rhel_7_5 from hw_compat_2_11 */ -- { "vhost-user-blk-pci", "vectors", "2" }, -- /* hw_compat_rhel_7_5 from hw_compat_2_11 -- bz 1608778 modified for our naming */ -- { "e1000-82540em", "migrate_tso_props", "off" }, -- /* hw_compat_rhel_7_5 from hw_compat_2_10 */ -- { "virtio-mouse-device", "wheel-axis", "false" }, -- /* hw_compat_rhel_7_5 from hw_compat_2_10 */ -- { "virtio-tablet-device", "wheel-axis", "false" }, -- { "cirrus-vga", "vgamem_mb", "16" }, -- { "migration", "decompress-error-check", "off" }, --}; --const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); -- - GlobalProperty hw_compat_5_2[] = { - { "ICH9-LPC", "smm-compat", "on"}, - { "PIIX4_PM", "smm-compat", "on"}, -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 6b39abce02..88972dc3e6 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -459,24 +459,6 @@ GlobalProperty pc_rhel_7_6_compat[] = { - }; - const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); - --/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: -- * - x-hv-max-vps was backported to 7.5 -- * - x-pci-hole64-fix was backported to 7.5 -- */ --GlobalProperty pc_rhel_7_5_compat[] = { -- /* pc_rhel_7_5_compat from pc_compat_2_11 */ -- { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" }, -- /* pc_rhel_7_5_compat from pc_compat_2_12 */ -- { TYPE_X86_CPU, "legacy-cache", "on" }, -- /* pc_rhel_7_5_compat from pc_compat_2_12 */ -- { TYPE_X86_CPU, "topoext", "off" }, -- /* pc_rhel_7_5_compat from pc_compat_2_12 */ -- { "EPYC-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, -- /* pc_rhel_7_5_compat from pc_compat_2_12 */ -- { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, --}; --const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); -- - /* - * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine - * types as the PC_COMPAT_* do for upstream types. -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index fe2ac7593a..ba7926cfae 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -991,23 +991,3 @@ static void pc_machine_rhel760_options(MachineClass *m) - - DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, - pc_machine_rhel760_options); -- --static void pc_init_rhel750(MachineState *machine) --{ -- pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ -- TYPE_I440FX_PCI_DEVICE); --} -- --static void pc_machine_rhel750_options(MachineClass *m) --{ -- pc_machine_rhel760_options(m); -- m->alias = NULL; -- m->is_default = 0; -- m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; -- m->auto_enable_numa_with_memhp = false; -- compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); -- compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); --} -- --DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, -- pc_machine_rhel750_options); -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 9958ed9184..f93825d603 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -754,23 +754,3 @@ static void pc_q35_machine_rhel760_options(MachineClass *m) - - DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, - pc_q35_machine_rhel760_options); -- --static void pc_q35_init_rhel750(MachineState *machine) --{ -- pc_q35_init(machine); --} -- --static void pc_q35_machine_rhel750_options(MachineClass *m) --{ -- PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel760_options(m); -- m->alias = NULL; -- m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; -- m->auto_enable_numa_with_memhp = false; -- pcmc->default_nic_model = "e1000"; -- compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); -- compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); --} -- --DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, -- pc_q35_machine_rhel750_options); -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index bdabbf8b81..cda75a8cfe 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -5282,35 +5282,4 @@ static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) - - DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); - --static void spapr_machine_rhel750_class_options(MachineClass *mc) --{ -- spapr_machine_rhel760_class_options(mc); -- compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); -- --} -- --DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); -- --/* -- * pseries-rhel7.5.0-sxxm -- * -- * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default -- */ -- --static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) --{ -- SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -- -- spapr_machine_rhel750_class_options(mc); -- smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; -- smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; -- smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; --} -- --DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); --static void spapr_machine_register_types(void) --{ -- type_register_static(&spapr_machine_info); --} -- - type_init(spapr_machine_register_types) -diff --git a/include/hw/boards.h b/include/hw/boards.h -index f27170b6fb..73f7dcdb35 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -433,7 +433,4 @@ extern const size_t hw_compat_rhel_8_0_len; - extern GlobalProperty hw_compat_rhel_7_6[]; - extern const size_t hw_compat_rhel_7_6_len; - --extern GlobalProperty hw_compat_rhel_7_5[]; --extern const size_t hw_compat_rhel_7_5_len; -- - #endif -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index ff93dfb372..549e581a98 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -296,9 +296,6 @@ extern const size_t pc_rhel_8_0_compat_len; - extern GlobalProperty pc_rhel_7_6_compat[]; - extern const size_t pc_rhel_7_6_compat_len; - --extern GlobalProperty pc_rhel_7_5_compat[]; --extern const size_t pc_rhel_7_5_compat_len; -- - /* Helper for setting model-id for CPU models that changed model-id - * depending on QEMU versions up to QEMU 2.4. - */ --- -2.27.0 - diff --git a/kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch b/kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch deleted file mode 100644 index 7fdb392..0000000 --- a/kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch +++ /dev/null @@ -1,43 +0,0 @@ -From c51bf45304812b4da010bdd3db1b1d59c44af4ee Mon Sep 17 00:00:00 2001 -From: Uri Lublin -Date: Tue, 27 Apr 2021 18:37:09 +0300 -Subject: [PATCH 2/4] Remove SPICE and QXL from x86_64-rh-devices.mak -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Uri Lublin (uril) -RH-MergeRequest: 3: Build qemu-kvm without SPICE and QXL -RH-Commit: [1/3] 8f6186049d22c3a6e1bfb1cf2bfe88a8fc8c2271 -RH-Bugzilla: 1906168 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Danilo de Paula -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Daniel P. Berrangé ---- - default-configs/devices/x86_64-rh-devices.mak | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak -index 9f41400530..d09c138fc6 100644 ---- a/default-configs/devices/x86_64-rh-devices.mak -+++ b/default-configs/devices/x86_64-rh-devices.mak -@@ -60,7 +60,6 @@ CONFIG_PFLASH_CFI01=y - CONFIG_PVPANIC_ISA=y - CONFIG_PXB=y - CONFIG_Q35=y --CONFIG_QXL=y - CONFIG_RTL8139_PCI=y - CONFIG_SCSI=y - CONFIG_SERIAL=y -@@ -70,7 +69,6 @@ CONFIG_SEV=y - CONFIG_SGA=y - CONFIG_SMBIOS=y - CONFIG_SMBUS_EEPROM=y --CONFIG_SPICE=y - CONFIG_TEST_DEVICES=y - CONFIG_USB=y - CONFIG_USB_EHCI=y --- -2.27.0 - diff --git a/kvm-Update-Linux-headers-to-5.13-rc4.patch b/kvm-Update-Linux-headers-to-5.13-rc4.patch deleted file mode 100644 index a06d102..0000000 --- a/kvm-Update-Linux-headers-to-5.13-rc4.patch +++ /dev/null @@ -1,5465 +0,0 @@ -From 5df5b2094167f0bc6c728933b990982fe012e33d Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 29 Jul 2021 07:42:25 -0400 -Subject: [PATCH 15/39] Update Linux headers to 5.13-rc4 - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [7/15] 2e8a71b8e0d02d2a16cd6dd2234895a9db59fa0d (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -Signed-off-by: Eduardo Habkost -Message-Id: <20210603191541.2862286-1-ehabkost@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 278f064e452468d66ee15c3f453826e697ec6832) -Signed-off-by: Paul Lai -Signed-off-by: Miroslav Rezanina ---- - .../infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 35 - - include/standard-headers/drm/drm_fourcc.h | 23 +- - include/standard-headers/linux/ethtool.h | 109 ++- - include/standard-headers/linux/fuse.h | 17 +- - include/standard-headers/linux/input.h | 2 +- - include/standard-headers/linux/virtio_bt.h | 31 + - include/standard-headers/linux/virtio_ids.h | 2 + - include/standard-headers/linux/virtio_snd.h | 334 +++++++ - .../standard-headers/rdma/vmw_pvrdma-abi.h | 7 + - linux-headers/asm-generic/unistd.h | 13 +- - linux-headers/asm-mips/unistd_n32.h | 751 +++++++-------- - linux-headers/asm-mips/unistd_n64.h | 703 +++++++------- - linux-headers/asm-mips/unistd_o32.h | 843 ++++++++--------- - linux-headers/asm-powerpc/kvm.h | 2 + - linux-headers/asm-powerpc/unistd_32.h | 856 +++++++++--------- - linux-headers/asm-powerpc/unistd_64.h | 800 ++++++++-------- - linux-headers/asm-s390/unistd_32.h | 4 + - linux-headers/asm-s390/unistd_64.h | 4 + - linux-headers/asm-x86/kvm.h | 3 + - linux-headers/asm-x86/unistd_32.h | 4 + - linux-headers/asm-x86/unistd_64.h | 4 + - linux-headers/asm-x86/unistd_x32.h | 4 + - linux-headers/linux/kvm.h | 131 ++- - linux-headers/linux/userfaultfd.h | 36 +- - linux-headers/linux/vfio.h | 35 + - 25 files changed, 2707 insertions(+), 2046 deletions(-) - create mode 100644 include/standard-headers/linux/virtio_bt.h - create mode 100644 include/standard-headers/linux/virtio_snd.h - -diff --git a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h -index 1677208a41..94d41b202c 100644 ---- a/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h -+++ b/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h -@@ -70,30 +70,6 @@ enum pvrdma_mtu { - PVRDMA_MTU_4096 = 5, - }; - --static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu) --{ -- switch (mtu) { -- case PVRDMA_MTU_256: return 256; -- case PVRDMA_MTU_512: return 512; -- case PVRDMA_MTU_1024: return 1024; -- case PVRDMA_MTU_2048: return 2048; -- case PVRDMA_MTU_4096: return 4096; -- default: return -1; -- } --} -- --static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu) --{ -- switch (mtu) { -- case 256: return PVRDMA_MTU_256; -- case 512: return PVRDMA_MTU_512; -- case 1024: return PVRDMA_MTU_1024; -- case 2048: return PVRDMA_MTU_2048; -- case 4096: -- default: return PVRDMA_MTU_4096; -- } --} -- - enum pvrdma_port_state { - PVRDMA_PORT_NOP = 0, - PVRDMA_PORT_DOWN = 1, -@@ -138,17 +114,6 @@ enum pvrdma_port_width { - PVRDMA_WIDTH_12X = 8, - }; - --static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width) --{ -- switch (width) { -- case PVRDMA_WIDTH_1X: return 1; -- case PVRDMA_WIDTH_4X: return 4; -- case PVRDMA_WIDTH_8X: return 8; -- case PVRDMA_WIDTH_12X: return 12; -- default: return -1; -- } --} -- - enum pvrdma_port_speed { - PVRDMA_SPEED_SDR = 1, - PVRDMA_SPEED_DDR = 2, -diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h -index c47e19810c..a61ae520c2 100644 ---- a/include/standard-headers/drm/drm_fourcc.h -+++ b/include/standard-headers/drm/drm_fourcc.h -@@ -526,6 +526,25 @@ extern "C" { - */ - #define I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7) - -+/* -+ * Intel Color Control Surface with Clear Color (CCS) for Gen-12 render -+ * compression. -+ * -+ * The main surface is Y-tiled and is at plane index 0 whereas CCS is linear -+ * and at index 1. The clear color is stored at index 2, and the pitch should -+ * be ignored. The clear color structure is 256 bits. The first 128 bits -+ * represents Raw Clear Color Red, Green, Blue and Alpha color each represented -+ * by 32 bits. The raw clear color is consumed by the 3d engine and generates -+ * the converted clear color of size 64 bits. The first 32 bits store the Lower -+ * Converted Clear Color value and the next 32 bits store the Higher Converted -+ * Clear Color value when applicable. The Converted Clear Color values are -+ * consumed by the DE. The last 64 bits are used to store Color Discard Enable -+ * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line -+ * corresponds to an area of 4x1 tiles in the main surface. The main surface -+ * pitch is required to be a multiple of 4 tile widths. -+ */ -+#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8) -+ - /* - * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks - * -@@ -1035,9 +1054,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) - * Not all combinations are valid, and different SoCs may support different - * combinations of layout and options. - */ --#define __fourcc_mod_amlogic_layout_mask 0xf -+#define __fourcc_mod_amlogic_layout_mask 0xff - #define __fourcc_mod_amlogic_options_shift 8 --#define __fourcc_mod_amlogic_options_mask 0xf -+#define __fourcc_mod_amlogic_options_mask 0xff - - #define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \ - fourcc_mod_code(AMLOGIC, \ -diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h -index 8bfd01d230..218d944a17 100644 ---- a/include/standard-headers/linux/ethtool.h -+++ b/include/standard-headers/linux/ethtool.h -@@ -26,6 +26,14 @@ - * have the same layout for 32-bit and 64-bit userland. - */ - -+/* Note on reserved space. -+ * Reserved fields must not be accessed directly by user space because -+ * they may be replaced by a different field in the future. They must -+ * be initialized to zero before making the request, e.g. via memset -+ * of the entire structure or implicitly by not being set in a structure -+ * initializer. -+ */ -+ - /** - * struct ethtool_cmd - DEPRECATED, link control and status - * This structure is DEPRECATED, please use struct ethtool_link_settings. -@@ -67,6 +75,7 @@ - * and other link features that the link partner advertised - * through autonegotiation; 0 if unknown or not applicable. - * Read-only. -+ * @reserved: Reserved for future use; see the note on reserved space. - * - * The link speed in Mbps is split between @speed and @speed_hi. Use - * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to -@@ -155,6 +164,7 @@ static inline uint32_t ethtool_cmd_speed(const struct ethtool_cmd *ep) - * @bus_info: Device bus address. This should match the dev_name() - * string for the underlying bus device, if there is one. May be - * an empty string. -+ * @reserved2: Reserved for future use; see the note on reserved space. - * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and - * %ETHTOOL_SPFLAGS commands; also the number of strings in the - * %ETH_SS_PRIV_FLAGS set -@@ -356,6 +366,7 @@ struct ethtool_eeprom { - * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting - * its tx lpi (after reaching 'idle' state). Effective only when eee - * was negotiated and tx_lpi_enabled was set. -+ * @reserved: Reserved for future use; see the note on reserved space. - */ - struct ethtool_eee { - uint32_t cmd; -@@ -374,6 +385,7 @@ struct ethtool_eee { - * @cmd: %ETHTOOL_GMODULEINFO - * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx - * @eeprom_len: Length of the eeprom -+ * @reserved: Reserved for future use; see the note on reserved space. - * - * This structure is used to return the information to - * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM. -@@ -579,9 +591,7 @@ struct ethtool_pauseparam { - uint32_t tx_pause; - }; - --/** -- * enum ethtool_link_ext_state - link extended state -- */ -+/* Link extended state */ - enum ethtool_link_ext_state { - ETHTOOL_LINK_EXT_STATE_AUTONEG, - ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, -@@ -595,10 +605,7 @@ enum ethtool_link_ext_state { - ETHTOOL_LINK_EXT_STATE_OVERHEAT, - }; - --/** -- * enum ethtool_link_ext_substate_autoneg - more information in addition to -- * ETHTOOL_LINK_EXT_STATE_AUTONEG. -- */ -+/* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */ - enum ethtool_link_ext_substate_autoneg { - ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1, - ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED, -@@ -608,9 +615,7 @@ enum ethtool_link_ext_substate_autoneg { - ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD, - }; - --/** -- * enum ethtool_link_ext_substate_link_training - more information in addition to -- * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. -+/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE. - */ - enum ethtool_link_ext_substate_link_training { - ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1, -@@ -619,9 +624,7 @@ enum ethtool_link_ext_substate_link_training { - ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT, - }; - --/** -- * enum ethtool_link_ext_substate_logical_mismatch - more information in addition -- * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. -+/* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH. - */ - enum ethtool_link_ext_substate_link_logical_mismatch { - ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1, -@@ -631,19 +634,14 @@ enum ethtool_link_ext_substate_link_logical_mismatch { - ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED, - }; - --/** -- * enum ethtool_link_ext_substate_bad_signal_integrity - more information in -- * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. -+/* More information in addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY. - */ - enum ethtool_link_ext_substate_bad_signal_integrity { - ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, - ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, - }; - --/** -- * enum ethtool_link_ext_substate_cable_issue - more information in -- * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. -- */ -+/* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ - enum ethtool_link_ext_substate_cable_issue { - ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1, - ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, -@@ -661,6 +659,7 @@ enum ethtool_link_ext_substate_cable_issue { - * now deprecated - * @ETH_SS_FEATURES: Device feature names - * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names -+ * @ETH_SS_TUNABLES: tunable names - * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS - * @ETH_SS_PHY_TUNABLES: PHY tunable names - * @ETH_SS_LINK_MODES: link mode names -@@ -670,6 +669,13 @@ enum ethtool_link_ext_substate_cable_issue { - * @ETH_SS_TS_TX_TYPES: timestamping Tx types - * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters - * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types -+ * @ETH_SS_STATS_STD: standardized stats -+ * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics -+ * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics -+ * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics -+ * @ETH_SS_STATS_RMON: names of RMON statistics -+ * -+ * @ETH_SS_COUNT: number of defined string sets - */ - enum ethtool_stringset { - ETH_SS_TEST = 0, -@@ -688,6 +694,11 @@ enum ethtool_stringset { - ETH_SS_TS_TX_TYPES, - ETH_SS_TS_RX_FILTERS, - ETH_SS_UDP_TUNNEL_TYPES, -+ ETH_SS_STATS_STD, -+ ETH_SS_STATS_ETH_PHY, -+ ETH_SS_STATS_ETH_MAC, -+ ETH_SS_STATS_ETH_CTRL, -+ ETH_SS_STATS_RMON, - - /* add new constants above here */ - ETH_SS_COUNT -@@ -715,6 +726,7 @@ struct ethtool_gstrings { - /** - * struct ethtool_sset_info - string set information - * @cmd: Command number = %ETHTOOL_GSSET_INFO -+ * @reserved: Reserved for future use; see the note on reserved space. - * @sset_mask: On entry, a bitmask of string sets to query, with bits - * numbered according to &enum ethtool_stringset. On return, a - * bitmask of those string sets queried that are supported. -@@ -759,6 +771,7 @@ enum ethtool_test_flags { - * @flags: A bitmask of flags from &enum ethtool_test_flags. Some - * flags may be set by the user on entry; others may be set by - * the driver on return. -+ * @reserved: Reserved for future use; see the note on reserved space. - * @len: On return, the number of test results - * @data: Array of test results - * -@@ -959,6 +972,7 @@ union ethtool_flow_union { - * @vlan_etype: VLAN EtherType - * @vlan_tci: VLAN tag control information - * @data: user defined data -+ * @padding: Reserved for future use; see the note on reserved space. - * - * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT - * is set in &struct ethtool_rx_flow_spec @flow_type. -@@ -1134,7 +1148,8 @@ struct ethtool_rxfh_indir { - * hardware hash key. - * @hfunc: Defines the current RSS hash function used by HW (or to be set to). - * Valid values are one of the %ETH_RSS_HASH_*. -- * @rsvd: Reserved for future extensions. -+ * @rsvd8: Reserved for future use; see the note on reserved space. -+ * @rsvd32: Reserved for future use; see the note on reserved space. - * @rss_config: RX ring/queue index for each hash value i.e., indirection table - * of @indir_size uint32_t elements, followed by hash key of @key_size - * bytes. -@@ -1302,7 +1317,9 @@ struct ethtool_sfeatures { - * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags - * @phc_index: device index of the associated PHC, or -1 if there is none - * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values -+ * @tx_reserved: Reserved for future use; see the note on reserved space. - * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values -+ * @rx_reserved: Reserved for future use; see the note on reserved space. - * - * The bits in the 'tx_types' and 'rx_filters' fields correspond to - * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values, -@@ -1376,15 +1393,33 @@ struct ethtool_per_queue_op { - }; - - /** -- * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters -+ * struct ethtool_fecparam - Ethernet Forward Error Correction parameters - * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM -- * @active_fec: FEC mode which is active on porte -- * @fec: Bitmask of supported/configured FEC modes -- * @rsvd: Reserved for future extensions. i.e FEC bypass feature. -+ * @active_fec: FEC mode which is active on the port, single bit set, GET only. -+ * @fec: Bitmask of configured FEC modes. -+ * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET. - * -- * Drivers should reject a non-zero setting of @autoneg when -- * autoneogotiation is disabled (or not supported) for the link. -+ * Note that @reserved was never validated on input and ethtool user space -+ * left it uninitialized when calling SET. Hence going forward it can only be -+ * used to return a value to userspace with GET. -+ * -+ * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS. -+ * FEC settings are configured by link autonegotiation whenever it's enabled. -+ * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode. -+ * -+ * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings. -+ * It is recommended that drivers only accept a single bit set in @fec. -+ * When multiple bits are set in @fec drivers may pick mode in an implementation -+ * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other -+ * FEC modes, because it's unclear whether in this case other modes constrain -+ * AUTO or are independent choices. -+ * Drivers must reject SET requests if they support none of the requested modes. -+ * -+ * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead -+ * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM. - * -+ * See enum ethtool_fec_config_bits for definition of valid bits for both -+ * @fec and @active_fec. - */ - struct ethtool_fecparam { - uint32_t cmd; -@@ -1396,11 +1431,16 @@ struct ethtool_fecparam { - - /** - * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration -- * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported -- * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver -- * @ETHTOOL_FEC_OFF: No FEC Mode -- * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode -- * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode -+ * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not -+ * be used together with other bits. GET only. -+ * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually -+ * based link mode and SFP parameters read from module's -+ * EEPROM. This bit does _not_ mean autonegotiation. -+ * @ETHTOOL_FEC_OFF_BIT: No FEC Mode -+ * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode -+ * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode -+ * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet -+ * Consortium) - */ - enum ethtool_fec_config_bits { - ETHTOOL_FEC_NONE_BIT, -@@ -1958,6 +1998,11 @@ enum ethtool_reset_flags { - * autonegotiation; 0 if unknown or not applicable. Read-only. - * @transceiver: Used to distinguish different possible PHY types, - * reported consistently by PHYLIB. Read-only. -+ * @master_slave_cfg: Master/slave port mode. -+ * @master_slave_state: Master/slave port state. -+ * @reserved: Reserved for future use; see the note on reserved space. -+ * @reserved1: Reserved for future use; see the note on reserved space. -+ * @link_mode_masks: Variable length bitmaps. - * - * If autonegotiation is disabled, the speed and @duplex represent the - * fixed link mode and are writable if the driver supports multiple -diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h -index 950d7edb7e..cce105bfba 100644 ---- a/include/standard-headers/linux/fuse.h -+++ b/include/standard-headers/linux/fuse.h -@@ -179,6 +179,8 @@ - * 7.33 - * - add FUSE_HANDLE_KILLPRIV_V2, FUSE_WRITE_KILL_SUIDGID, FATTR_KILL_SUIDGID - * - add FUSE_OPEN_KILL_SUIDGID -+ * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT -+ * - add FUSE_SETXATTR_ACL_KILL_SGID - */ - - #ifndef _LINUX_FUSE_H -@@ -326,6 +328,7 @@ struct fuse_file_lock { - * does not have CAP_FSETID. Additionally upon - * write/truncate sgid is killed only if file has group - * execute permission. (Same as Linux VFS behavior). -+ * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in - */ - #define FUSE_ASYNC_READ (1 << 0) - #define FUSE_POSIX_LOCKS (1 << 1) -@@ -356,6 +359,7 @@ struct fuse_file_lock { - #define FUSE_MAP_ALIGNMENT (1 << 26) - #define FUSE_SUBMOUNTS (1 << 27) - #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) -+#define FUSE_SETXATTR_EXT (1 << 29) - - /** - * CUSE INIT request/reply flags -@@ -447,6 +451,12 @@ struct fuse_file_lock { - */ - #define FUSE_OPEN_KILL_SUIDGID (1 << 0) - -+/** -+ * setxattr flags -+ * FUSE_SETXATTR_ACL_KILL_SGID: Clear SGID when system.posix_acl_access is set -+ */ -+#define FUSE_SETXATTR_ACL_KILL_SGID (1 << 0) -+ - enum fuse_opcode { - FUSE_LOOKUP = 1, - FUSE_FORGET = 2, /* no reply */ -@@ -677,9 +687,13 @@ struct fuse_fsync_in { - uint32_t padding; - }; - -+#define FUSE_COMPAT_SETXATTR_IN_SIZE 8 -+ - struct fuse_setxattr_in { - uint32_t size; - uint32_t flags; -+ uint32_t setxattr_flags; -+ uint32_t padding; - }; - - struct fuse_getxattr_in { -@@ -899,7 +913,8 @@ struct fuse_notify_retrieve_in { - }; - - /* Device ioctls: */ --#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) -+#define FUSE_DEV_IOC_MAGIC 229 -+#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) - - struct fuse_lseek_in { - uint64_t fh; -diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h -index f89c986190..7822c24178 100644 ---- a/include/standard-headers/linux/input.h -+++ b/include/standard-headers/linux/input.h -@@ -81,7 +81,7 @@ struct input_id { - * in units per radian. - * When INPUT_PROP_ACCELEROMETER is set the resolution changes. - * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in -- * in units per g (units/g) and in units per degree per second -+ * units per g (units/g) and in units per degree per second - * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ). - */ - struct input_absinfo { -diff --git a/include/standard-headers/linux/virtio_bt.h b/include/standard-headers/linux/virtio_bt.h -new file mode 100644 -index 0000000000..245e1eff4b ---- /dev/null -+++ b/include/standard-headers/linux/virtio_bt.h -@@ -0,0 +1,31 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+ -+#ifndef _LINUX_VIRTIO_BT_H -+#define _LINUX_VIRTIO_BT_H -+ -+#include "standard-headers/linux/virtio_types.h" -+ -+/* Feature bits */ -+#define VIRTIO_BT_F_VND_HCI 0 /* Indicates vendor command support */ -+#define VIRTIO_BT_F_MSFT_EXT 1 /* Indicates MSFT vendor support */ -+#define VIRTIO_BT_F_AOSP_EXT 2 /* Indicates AOSP vendor support */ -+ -+enum virtio_bt_config_type { -+ VIRTIO_BT_CONFIG_TYPE_PRIMARY = 0, -+ VIRTIO_BT_CONFIG_TYPE_AMP = 1, -+}; -+ -+enum virtio_bt_config_vendor { -+ VIRTIO_BT_CONFIG_VENDOR_NONE = 0, -+ VIRTIO_BT_CONFIG_VENDOR_ZEPHYR = 1, -+ VIRTIO_BT_CONFIG_VENDOR_INTEL = 2, -+ VIRTIO_BT_CONFIG_VENDOR_REALTEK = 3, -+}; -+ -+struct virtio_bt_config { -+ uint8_t type; -+ uint16_t vendor; -+ uint16_t msft_opcode; -+} QEMU_PACKED; -+ -+#endif /* _LINUX_VIRTIO_BT_H */ -diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h -index bc1c0621f5..f0c35ce862 100644 ---- a/include/standard-headers/linux/virtio_ids.h -+++ b/include/standard-headers/linux/virtio_ids.h -@@ -51,8 +51,10 @@ - #define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ - #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ - #define VIRTIO_ID_MEM 24 /* virtio mem */ -+#define VIRTIO_ID_SOUND 25 /* virtio sound */ - #define VIRTIO_ID_FS 26 /* virtio filesystem */ - #define VIRTIO_ID_PMEM 27 /* virtio pmem */ -+#define VIRTIO_ID_BT 28 /* virtio bluetooth */ - #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ - - #endif /* _LINUX_VIRTIO_IDS_H */ -diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h -new file mode 100644 -index 0000000000..1af96b9fc6 ---- /dev/null -+++ b/include/standard-headers/linux/virtio_snd.h -@@ -0,0 +1,334 @@ -+/* SPDX-License-Identifier: BSD-3-Clause */ -+/* -+ * Copyright (C) 2021 OpenSynergy GmbH -+ */ -+#ifndef VIRTIO_SND_IF_H -+#define VIRTIO_SND_IF_H -+ -+#include "standard-headers/linux/virtio_types.h" -+ -+/******************************************************************************* -+ * CONFIGURATION SPACE -+ */ -+struct virtio_snd_config { -+ /* # of available physical jacks */ -+ uint32_t jacks; -+ /* # of available PCM streams */ -+ uint32_t streams; -+ /* # of available channel maps */ -+ uint32_t chmaps; -+}; -+ -+enum { -+ /* device virtqueue indexes */ -+ VIRTIO_SND_VQ_CONTROL = 0, -+ VIRTIO_SND_VQ_EVENT, -+ VIRTIO_SND_VQ_TX, -+ VIRTIO_SND_VQ_RX, -+ /* # of device virtqueues */ -+ VIRTIO_SND_VQ_MAX -+}; -+ -+/******************************************************************************* -+ * COMMON DEFINITIONS -+ */ -+ -+/* supported dataflow directions */ -+enum { -+ VIRTIO_SND_D_OUTPUT = 0, -+ VIRTIO_SND_D_INPUT -+}; -+ -+enum { -+ /* jack control request types */ -+ VIRTIO_SND_R_JACK_INFO = 1, -+ VIRTIO_SND_R_JACK_REMAP, -+ -+ /* PCM control request types */ -+ VIRTIO_SND_R_PCM_INFO = 0x0100, -+ VIRTIO_SND_R_PCM_SET_PARAMS, -+ VIRTIO_SND_R_PCM_PREPARE, -+ VIRTIO_SND_R_PCM_RELEASE, -+ VIRTIO_SND_R_PCM_START, -+ VIRTIO_SND_R_PCM_STOP, -+ -+ /* channel map control request types */ -+ VIRTIO_SND_R_CHMAP_INFO = 0x0200, -+ -+ /* jack event types */ -+ VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, -+ VIRTIO_SND_EVT_JACK_DISCONNECTED, -+ -+ /* PCM event types */ -+ VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, -+ VIRTIO_SND_EVT_PCM_XRUN, -+ -+ /* common status codes */ -+ VIRTIO_SND_S_OK = 0x8000, -+ VIRTIO_SND_S_BAD_MSG, -+ VIRTIO_SND_S_NOT_SUPP, -+ VIRTIO_SND_S_IO_ERR -+}; -+ -+/* common header */ -+struct virtio_snd_hdr { -+ uint32_t code; -+}; -+ -+/* event notification */ -+struct virtio_snd_event { -+ /* VIRTIO_SND_EVT_XXX */ -+ struct virtio_snd_hdr hdr; -+ /* optional event data */ -+ uint32_t data; -+}; -+ -+/* common control request to query an item information */ -+struct virtio_snd_query_info { -+ /* VIRTIO_SND_R_XXX_INFO */ -+ struct virtio_snd_hdr hdr; -+ /* item start identifier */ -+ uint32_t start_id; -+ /* item count to query */ -+ uint32_t count; -+ /* item information size in bytes */ -+ uint32_t size; -+}; -+ -+/* common item information header */ -+struct virtio_snd_info { -+ /* function group node id (High Definition Audio Specification 7.1.2) */ -+ uint32_t hda_fn_nid; -+}; -+ -+/******************************************************************************* -+ * JACK CONTROL MESSAGES -+ */ -+struct virtio_snd_jack_hdr { -+ /* VIRTIO_SND_R_JACK_XXX */ -+ struct virtio_snd_hdr hdr; -+ /* 0 ... virtio_snd_config::jacks - 1 */ -+ uint32_t jack_id; -+}; -+ -+/* supported jack features */ -+enum { -+ VIRTIO_SND_JACK_F_REMAP = 0 -+}; -+ -+struct virtio_snd_jack_info { -+ /* common header */ -+ struct virtio_snd_info hdr; -+ /* supported feature bit map (1 << VIRTIO_SND_JACK_F_XXX) */ -+ uint32_t features; -+ /* pin configuration (High Definition Audio Specification 7.3.3.31) */ -+ uint32_t hda_reg_defconf; -+ /* pin capabilities (High Definition Audio Specification 7.3.4.9) */ -+ uint32_t hda_reg_caps; -+ /* current jack connection status (0: disconnected, 1: connected) */ -+ uint8_t connected; -+ -+ uint8_t padding[7]; -+}; -+ -+/* jack remapping control request */ -+struct virtio_snd_jack_remap { -+ /* .code = VIRTIO_SND_R_JACK_REMAP */ -+ struct virtio_snd_jack_hdr hdr; -+ /* selected association number */ -+ uint32_t association; -+ /* selected sequence number */ -+ uint32_t sequence; -+}; -+ -+/******************************************************************************* -+ * PCM CONTROL MESSAGES -+ */ -+struct virtio_snd_pcm_hdr { -+ /* VIRTIO_SND_R_PCM_XXX */ -+ struct virtio_snd_hdr hdr; -+ /* 0 ... virtio_snd_config::streams - 1 */ -+ uint32_t stream_id; -+}; -+ -+/* supported PCM stream features */ -+enum { -+ VIRTIO_SND_PCM_F_SHMEM_HOST = 0, -+ VIRTIO_SND_PCM_F_SHMEM_GUEST, -+ VIRTIO_SND_PCM_F_MSG_POLLING, -+ VIRTIO_SND_PCM_F_EVT_SHMEM_PERIODS, -+ VIRTIO_SND_PCM_F_EVT_XRUNS -+}; -+ -+/* supported PCM sample formats */ -+enum { -+ /* analog formats (width / physical width) */ -+ VIRTIO_SND_PCM_FMT_IMA_ADPCM = 0, /* 4 / 4 bits */ -+ VIRTIO_SND_PCM_FMT_MU_LAW, /* 8 / 8 bits */ -+ VIRTIO_SND_PCM_FMT_A_LAW, /* 8 / 8 bits */ -+ VIRTIO_SND_PCM_FMT_S8, /* 8 / 8 bits */ -+ VIRTIO_SND_PCM_FMT_U8, /* 8 / 8 bits */ -+ VIRTIO_SND_PCM_FMT_S16, /* 16 / 16 bits */ -+ VIRTIO_SND_PCM_FMT_U16, /* 16 / 16 bits */ -+ VIRTIO_SND_PCM_FMT_S18_3, /* 18 / 24 bits */ -+ VIRTIO_SND_PCM_FMT_U18_3, /* 18 / 24 bits */ -+ VIRTIO_SND_PCM_FMT_S20_3, /* 20 / 24 bits */ -+ VIRTIO_SND_PCM_FMT_U20_3, /* 20 / 24 bits */ -+ VIRTIO_SND_PCM_FMT_S24_3, /* 24 / 24 bits */ -+ VIRTIO_SND_PCM_FMT_U24_3, /* 24 / 24 bits */ -+ VIRTIO_SND_PCM_FMT_S20, /* 20 / 32 bits */ -+ VIRTIO_SND_PCM_FMT_U20, /* 20 / 32 bits */ -+ VIRTIO_SND_PCM_FMT_S24, /* 24 / 32 bits */ -+ VIRTIO_SND_PCM_FMT_U24, /* 24 / 32 bits */ -+ VIRTIO_SND_PCM_FMT_S32, /* 32 / 32 bits */ -+ VIRTIO_SND_PCM_FMT_U32, /* 32 / 32 bits */ -+ VIRTIO_SND_PCM_FMT_FLOAT, /* 32 / 32 bits */ -+ VIRTIO_SND_PCM_FMT_FLOAT64, /* 64 / 64 bits */ -+ /* digital formats (width / physical width) */ -+ VIRTIO_SND_PCM_FMT_DSD_U8, /* 8 / 8 bits */ -+ VIRTIO_SND_PCM_FMT_DSD_U16, /* 16 / 16 bits */ -+ VIRTIO_SND_PCM_FMT_DSD_U32, /* 32 / 32 bits */ -+ VIRTIO_SND_PCM_FMT_IEC958_SUBFRAME /* 32 / 32 bits */ -+}; -+ -+/* supported PCM frame rates */ -+enum { -+ VIRTIO_SND_PCM_RATE_5512 = 0, -+ VIRTIO_SND_PCM_RATE_8000, -+ VIRTIO_SND_PCM_RATE_11025, -+ VIRTIO_SND_PCM_RATE_16000, -+ VIRTIO_SND_PCM_RATE_22050, -+ VIRTIO_SND_PCM_RATE_32000, -+ VIRTIO_SND_PCM_RATE_44100, -+ VIRTIO_SND_PCM_RATE_48000, -+ VIRTIO_SND_PCM_RATE_64000, -+ VIRTIO_SND_PCM_RATE_88200, -+ VIRTIO_SND_PCM_RATE_96000, -+ VIRTIO_SND_PCM_RATE_176400, -+ VIRTIO_SND_PCM_RATE_192000, -+ VIRTIO_SND_PCM_RATE_384000 -+}; -+ -+struct virtio_snd_pcm_info { -+ /* common header */ -+ struct virtio_snd_info hdr; -+ /* supported feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */ -+ uint32_t features; -+ /* supported sample format bit map (1 << VIRTIO_SND_PCM_FMT_XXX) */ -+ uint64_t formats; -+ /* supported frame rate bit map (1 << VIRTIO_SND_PCM_RATE_XXX) */ -+ uint64_t rates; -+ /* dataflow direction (VIRTIO_SND_D_XXX) */ -+ uint8_t direction; -+ /* minimum # of supported channels */ -+ uint8_t channels_min; -+ /* maximum # of supported channels */ -+ uint8_t channels_max; -+ -+ uint8_t padding[5]; -+}; -+ -+/* set PCM stream format */ -+struct virtio_snd_pcm_set_params { -+ /* .code = VIRTIO_SND_R_PCM_SET_PARAMS */ -+ struct virtio_snd_pcm_hdr hdr; -+ /* size of the hardware buffer */ -+ uint32_t buffer_bytes; -+ /* size of the hardware period */ -+ uint32_t period_bytes; -+ /* selected feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */ -+ uint32_t features; -+ /* selected # of channels */ -+ uint8_t channels; -+ /* selected sample format (VIRTIO_SND_PCM_FMT_XXX) */ -+ uint8_t format; -+ /* selected frame rate (VIRTIO_SND_PCM_RATE_XXX) */ -+ uint8_t rate; -+ -+ uint8_t padding; -+}; -+ -+/******************************************************************************* -+ * PCM I/O MESSAGES -+ */ -+ -+/* I/O request header */ -+struct virtio_snd_pcm_xfer { -+ /* 0 ... virtio_snd_config::streams - 1 */ -+ uint32_t stream_id; -+}; -+ -+/* I/O request status */ -+struct virtio_snd_pcm_status { -+ /* VIRTIO_SND_S_XXX */ -+ uint32_t status; -+ /* current device latency */ -+ uint32_t latency_bytes; -+}; -+ -+/******************************************************************************* -+ * CHANNEL MAP CONTROL MESSAGES -+ */ -+struct virtio_snd_chmap_hdr { -+ /* VIRTIO_SND_R_CHMAP_XXX */ -+ struct virtio_snd_hdr hdr; -+ /* 0 ... virtio_snd_config::chmaps - 1 */ -+ uint32_t chmap_id; -+}; -+ -+/* standard channel position definition */ -+enum { -+ VIRTIO_SND_CHMAP_NONE = 0, /* undefined */ -+ VIRTIO_SND_CHMAP_NA, /* silent */ -+ VIRTIO_SND_CHMAP_MONO, /* mono stream */ -+ VIRTIO_SND_CHMAP_FL, /* front left */ -+ VIRTIO_SND_CHMAP_FR, /* front right */ -+ VIRTIO_SND_CHMAP_RL, /* rear left */ -+ VIRTIO_SND_CHMAP_RR, /* rear right */ -+ VIRTIO_SND_CHMAP_FC, /* front center */ -+ VIRTIO_SND_CHMAP_LFE, /* low frequency (LFE) */ -+ VIRTIO_SND_CHMAP_SL, /* side left */ -+ VIRTIO_SND_CHMAP_SR, /* side right */ -+ VIRTIO_SND_CHMAP_RC, /* rear center */ -+ VIRTIO_SND_CHMAP_FLC, /* front left center */ -+ VIRTIO_SND_CHMAP_FRC, /* front right center */ -+ VIRTIO_SND_CHMAP_RLC, /* rear left center */ -+ VIRTIO_SND_CHMAP_RRC, /* rear right center */ -+ VIRTIO_SND_CHMAP_FLW, /* front left wide */ -+ VIRTIO_SND_CHMAP_FRW, /* front right wide */ -+ VIRTIO_SND_CHMAP_FLH, /* front left high */ -+ VIRTIO_SND_CHMAP_FCH, /* front center high */ -+ VIRTIO_SND_CHMAP_FRH, /* front right high */ -+ VIRTIO_SND_CHMAP_TC, /* top center */ -+ VIRTIO_SND_CHMAP_TFL, /* top front left */ -+ VIRTIO_SND_CHMAP_TFR, /* top front right */ -+ VIRTIO_SND_CHMAP_TFC, /* top front center */ -+ VIRTIO_SND_CHMAP_TRL, /* top rear left */ -+ VIRTIO_SND_CHMAP_TRR, /* top rear right */ -+ VIRTIO_SND_CHMAP_TRC, /* top rear center */ -+ VIRTIO_SND_CHMAP_TFLC, /* top front left center */ -+ VIRTIO_SND_CHMAP_TFRC, /* top front right center */ -+ VIRTIO_SND_CHMAP_TSL, /* top side left */ -+ VIRTIO_SND_CHMAP_TSR, /* top side right */ -+ VIRTIO_SND_CHMAP_LLFE, /* left LFE */ -+ VIRTIO_SND_CHMAP_RLFE, /* right LFE */ -+ VIRTIO_SND_CHMAP_BC, /* bottom center */ -+ VIRTIO_SND_CHMAP_BLC, /* bottom left center */ -+ VIRTIO_SND_CHMAP_BRC /* bottom right center */ -+}; -+ -+/* maximum possible number of channels */ -+#define VIRTIO_SND_CHMAP_MAX_SIZE 18 -+ -+struct virtio_snd_chmap_info { -+ /* common header */ -+ struct virtio_snd_info hdr; -+ /* dataflow direction (VIRTIO_SND_D_XXX) */ -+ uint8_t direction; -+ /* # of valid channel position values */ -+ uint8_t channels; -+ /* channel position values (VIRTIO_SND_CHMAP_XXX) */ -+ uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; -+}; -+ -+#endif /* VIRTIO_SND_IF_H */ -diff --git a/include/standard-headers/rdma/vmw_pvrdma-abi.h b/include/standard-headers/rdma/vmw_pvrdma-abi.h -index 0989426a3f..c30182a7ae 100644 ---- a/include/standard-headers/rdma/vmw_pvrdma-abi.h -+++ b/include/standard-headers/rdma/vmw_pvrdma-abi.h -@@ -133,6 +133,13 @@ enum pvrdma_wc_flags { - PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, - }; - -+enum pvrdma_network_type { -+ PVRDMA_NETWORK_IB, -+ PVRDMA_NETWORK_ROCE_V1 = PVRDMA_NETWORK_IB, -+ PVRDMA_NETWORK_IPV4, -+ PVRDMA_NETWORK_IPV6 -+}; -+ - struct pvrdma_alloc_ucontext_resp { - uint32_t qp_tab_size; - uint32_t reserved; -diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h -index 7287529177..6de5a7fc06 100644 ---- a/linux-headers/asm-generic/unistd.h -+++ b/linux-headers/asm-generic/unistd.h -@@ -861,9 +861,20 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2) - __SYSCALL(__NR_process_madvise, sys_process_madvise) - #define __NR_epoll_pwait2 441 - __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) -+#define __NR_mount_setattr 442 -+__SYSCALL(__NR_mount_setattr, sys_mount_setattr) -+#define __NR_quotactl_path 443 -+__SYSCALL(__NR_quotactl_path, sys_quotactl_path) -+ -+#define __NR_landlock_create_ruleset 444 -+__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) -+#define __NR_landlock_add_rule 445 -+__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) -+#define __NR_landlock_restrict_self 446 -+__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) - - #undef __NR_syscalls --#define __NR_syscalls 442 -+#define __NR_syscalls 447 - - /* - * 32 bit systems traditionally used different -diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h -index 59e53b6e07..fce51fee09 100644 ---- a/linux-headers/asm-mips/unistd_n32.h -+++ b/linux-headers/asm-mips/unistd_n32.h -@@ -1,376 +1,379 @@ --#ifndef _ASM_MIPS_UNISTD_N32_H --#define _ASM_MIPS_UNISTD_N32_H -+#ifndef _ASM_UNISTD_N32_H -+#define _ASM_UNISTD_N32_H - --#define __NR_read (__NR_Linux + 0) --#define __NR_write (__NR_Linux + 1) --#define __NR_open (__NR_Linux + 2) --#define __NR_close (__NR_Linux + 3) --#define __NR_stat (__NR_Linux + 4) --#define __NR_fstat (__NR_Linux + 5) --#define __NR_lstat (__NR_Linux + 6) --#define __NR_poll (__NR_Linux + 7) --#define __NR_lseek (__NR_Linux + 8) --#define __NR_mmap (__NR_Linux + 9) --#define __NR_mprotect (__NR_Linux + 10) --#define __NR_munmap (__NR_Linux + 11) --#define __NR_brk (__NR_Linux + 12) --#define __NR_rt_sigaction (__NR_Linux + 13) --#define __NR_rt_sigprocmask (__NR_Linux + 14) --#define __NR_ioctl (__NR_Linux + 15) --#define __NR_pread64 (__NR_Linux + 16) --#define __NR_pwrite64 (__NR_Linux + 17) --#define __NR_readv (__NR_Linux + 18) --#define __NR_writev (__NR_Linux + 19) --#define __NR_access (__NR_Linux + 20) --#define __NR_pipe (__NR_Linux + 21) --#define __NR__newselect (__NR_Linux + 22) --#define __NR_sched_yield (__NR_Linux + 23) --#define __NR_mremap (__NR_Linux + 24) --#define __NR_msync (__NR_Linux + 25) --#define __NR_mincore (__NR_Linux + 26) --#define __NR_madvise (__NR_Linux + 27) --#define __NR_shmget (__NR_Linux + 28) --#define __NR_shmat (__NR_Linux + 29) --#define __NR_shmctl (__NR_Linux + 30) --#define __NR_dup (__NR_Linux + 31) --#define __NR_dup2 (__NR_Linux + 32) --#define __NR_pause (__NR_Linux + 33) --#define __NR_nanosleep (__NR_Linux + 34) --#define __NR_getitimer (__NR_Linux + 35) --#define __NR_setitimer (__NR_Linux + 36) --#define __NR_alarm (__NR_Linux + 37) --#define __NR_getpid (__NR_Linux + 38) --#define __NR_sendfile (__NR_Linux + 39) --#define __NR_socket (__NR_Linux + 40) --#define __NR_connect (__NR_Linux + 41) --#define __NR_accept (__NR_Linux + 42) --#define __NR_sendto (__NR_Linux + 43) --#define __NR_recvfrom (__NR_Linux + 44) --#define __NR_sendmsg (__NR_Linux + 45) --#define __NR_recvmsg (__NR_Linux + 46) --#define __NR_shutdown (__NR_Linux + 47) --#define __NR_bind (__NR_Linux + 48) --#define __NR_listen (__NR_Linux + 49) --#define __NR_getsockname (__NR_Linux + 50) --#define __NR_getpeername (__NR_Linux + 51) --#define __NR_socketpair (__NR_Linux + 52) --#define __NR_setsockopt (__NR_Linux + 53) --#define __NR_getsockopt (__NR_Linux + 54) --#define __NR_clone (__NR_Linux + 55) --#define __NR_fork (__NR_Linux + 56) --#define __NR_execve (__NR_Linux + 57) --#define __NR_exit (__NR_Linux + 58) --#define __NR_wait4 (__NR_Linux + 59) --#define __NR_kill (__NR_Linux + 60) --#define __NR_uname (__NR_Linux + 61) --#define __NR_semget (__NR_Linux + 62) --#define __NR_semop (__NR_Linux + 63) --#define __NR_semctl (__NR_Linux + 64) --#define __NR_shmdt (__NR_Linux + 65) --#define __NR_msgget (__NR_Linux + 66) --#define __NR_msgsnd (__NR_Linux + 67) --#define __NR_msgrcv (__NR_Linux + 68) --#define __NR_msgctl (__NR_Linux + 69) --#define __NR_fcntl (__NR_Linux + 70) --#define __NR_flock (__NR_Linux + 71) --#define __NR_fsync (__NR_Linux + 72) --#define __NR_fdatasync (__NR_Linux + 73) --#define __NR_truncate (__NR_Linux + 74) --#define __NR_ftruncate (__NR_Linux + 75) --#define __NR_getdents (__NR_Linux + 76) --#define __NR_getcwd (__NR_Linux + 77) --#define __NR_chdir (__NR_Linux + 78) --#define __NR_fchdir (__NR_Linux + 79) --#define __NR_rename (__NR_Linux + 80) --#define __NR_mkdir (__NR_Linux + 81) --#define __NR_rmdir (__NR_Linux + 82) --#define __NR_creat (__NR_Linux + 83) --#define __NR_link (__NR_Linux + 84) --#define __NR_unlink (__NR_Linux + 85) --#define __NR_symlink (__NR_Linux + 86) --#define __NR_readlink (__NR_Linux + 87) --#define __NR_chmod (__NR_Linux + 88) --#define __NR_fchmod (__NR_Linux + 89) --#define __NR_chown (__NR_Linux + 90) --#define __NR_fchown (__NR_Linux + 91) --#define __NR_lchown (__NR_Linux + 92) --#define __NR_umask (__NR_Linux + 93) --#define __NR_gettimeofday (__NR_Linux + 94) --#define __NR_getrlimit (__NR_Linux + 95) --#define __NR_getrusage (__NR_Linux + 96) --#define __NR_sysinfo (__NR_Linux + 97) --#define __NR_times (__NR_Linux + 98) --#define __NR_ptrace (__NR_Linux + 99) --#define __NR_getuid (__NR_Linux + 100) --#define __NR_syslog (__NR_Linux + 101) --#define __NR_getgid (__NR_Linux + 102) --#define __NR_setuid (__NR_Linux + 103) --#define __NR_setgid (__NR_Linux + 104) --#define __NR_geteuid (__NR_Linux + 105) --#define __NR_getegid (__NR_Linux + 106) --#define __NR_setpgid (__NR_Linux + 107) --#define __NR_getppid (__NR_Linux + 108) --#define __NR_getpgrp (__NR_Linux + 109) --#define __NR_setsid (__NR_Linux + 110) --#define __NR_setreuid (__NR_Linux + 111) --#define __NR_setregid (__NR_Linux + 112) --#define __NR_getgroups (__NR_Linux + 113) --#define __NR_setgroups (__NR_Linux + 114) --#define __NR_setresuid (__NR_Linux + 115) --#define __NR_getresuid (__NR_Linux + 116) --#define __NR_setresgid (__NR_Linux + 117) --#define __NR_getresgid (__NR_Linux + 118) --#define __NR_getpgid (__NR_Linux + 119) --#define __NR_setfsuid (__NR_Linux + 120) --#define __NR_setfsgid (__NR_Linux + 121) --#define __NR_getsid (__NR_Linux + 122) --#define __NR_capget (__NR_Linux + 123) --#define __NR_capset (__NR_Linux + 124) --#define __NR_rt_sigpending (__NR_Linux + 125) --#define __NR_rt_sigtimedwait (__NR_Linux + 126) --#define __NR_rt_sigqueueinfo (__NR_Linux + 127) --#define __NR_rt_sigsuspend (__NR_Linux + 128) --#define __NR_sigaltstack (__NR_Linux + 129) --#define __NR_utime (__NR_Linux + 130) --#define __NR_mknod (__NR_Linux + 131) --#define __NR_personality (__NR_Linux + 132) --#define __NR_ustat (__NR_Linux + 133) --#define __NR_statfs (__NR_Linux + 134) --#define __NR_fstatfs (__NR_Linux + 135) --#define __NR_sysfs (__NR_Linux + 136) --#define __NR_getpriority (__NR_Linux + 137) --#define __NR_setpriority (__NR_Linux + 138) --#define __NR_sched_setparam (__NR_Linux + 139) --#define __NR_sched_getparam (__NR_Linux + 140) --#define __NR_sched_setscheduler (__NR_Linux + 141) --#define __NR_sched_getscheduler (__NR_Linux + 142) --#define __NR_sched_get_priority_max (__NR_Linux + 143) --#define __NR_sched_get_priority_min (__NR_Linux + 144) --#define __NR_sched_rr_get_interval (__NR_Linux + 145) --#define __NR_mlock (__NR_Linux + 146) --#define __NR_munlock (__NR_Linux + 147) --#define __NR_mlockall (__NR_Linux + 148) --#define __NR_munlockall (__NR_Linux + 149) --#define __NR_vhangup (__NR_Linux + 150) --#define __NR_pivot_root (__NR_Linux + 151) --#define __NR__sysctl (__NR_Linux + 152) --#define __NR_prctl (__NR_Linux + 153) --#define __NR_adjtimex (__NR_Linux + 154) --#define __NR_setrlimit (__NR_Linux + 155) --#define __NR_chroot (__NR_Linux + 156) --#define __NR_sync (__NR_Linux + 157) --#define __NR_acct (__NR_Linux + 158) --#define __NR_settimeofday (__NR_Linux + 159) --#define __NR_mount (__NR_Linux + 160) --#define __NR_umount2 (__NR_Linux + 161) --#define __NR_swapon (__NR_Linux + 162) --#define __NR_swapoff (__NR_Linux + 163) --#define __NR_reboot (__NR_Linux + 164) --#define __NR_sethostname (__NR_Linux + 165) --#define __NR_setdomainname (__NR_Linux + 166) --#define __NR_create_module (__NR_Linux + 167) --#define __NR_init_module (__NR_Linux + 168) --#define __NR_delete_module (__NR_Linux + 169) --#define __NR_get_kernel_syms (__NR_Linux + 170) --#define __NR_query_module (__NR_Linux + 171) --#define __NR_quotactl (__NR_Linux + 172) --#define __NR_nfsservctl (__NR_Linux + 173) --#define __NR_getpmsg (__NR_Linux + 174) --#define __NR_putpmsg (__NR_Linux + 175) --#define __NR_afs_syscall (__NR_Linux + 176) --#define __NR_reserved177 (__NR_Linux + 177) --#define __NR_gettid (__NR_Linux + 178) --#define __NR_readahead (__NR_Linux + 179) --#define __NR_setxattr (__NR_Linux + 180) --#define __NR_lsetxattr (__NR_Linux + 181) --#define __NR_fsetxattr (__NR_Linux + 182) --#define __NR_getxattr (__NR_Linux + 183) --#define __NR_lgetxattr (__NR_Linux + 184) --#define __NR_fgetxattr (__NR_Linux + 185) --#define __NR_listxattr (__NR_Linux + 186) --#define __NR_llistxattr (__NR_Linux + 187) --#define __NR_flistxattr (__NR_Linux + 188) --#define __NR_removexattr (__NR_Linux + 189) --#define __NR_lremovexattr (__NR_Linux + 190) --#define __NR_fremovexattr (__NR_Linux + 191) --#define __NR_tkill (__NR_Linux + 192) --#define __NR_reserved193 (__NR_Linux + 193) --#define __NR_futex (__NR_Linux + 194) --#define __NR_sched_setaffinity (__NR_Linux + 195) --#define __NR_sched_getaffinity (__NR_Linux + 196) --#define __NR_cacheflush (__NR_Linux + 197) --#define __NR_cachectl (__NR_Linux + 198) --#define __NR_sysmips (__NR_Linux + 199) --#define __NR_io_setup (__NR_Linux + 200) --#define __NR_io_destroy (__NR_Linux + 201) --#define __NR_io_getevents (__NR_Linux + 202) --#define __NR_io_submit (__NR_Linux + 203) --#define __NR_io_cancel (__NR_Linux + 204) --#define __NR_exit_group (__NR_Linux + 205) --#define __NR_lookup_dcookie (__NR_Linux + 206) --#define __NR_epoll_create (__NR_Linux + 207) --#define __NR_epoll_ctl (__NR_Linux + 208) --#define __NR_epoll_wait (__NR_Linux + 209) --#define __NR_remap_file_pages (__NR_Linux + 210) --#define __NR_rt_sigreturn (__NR_Linux + 211) --#define __NR_fcntl64 (__NR_Linux + 212) --#define __NR_set_tid_address (__NR_Linux + 213) --#define __NR_restart_syscall (__NR_Linux + 214) --#define __NR_semtimedop (__NR_Linux + 215) --#define __NR_fadvise64 (__NR_Linux + 216) --#define __NR_statfs64 (__NR_Linux + 217) --#define __NR_fstatfs64 (__NR_Linux + 218) --#define __NR_sendfile64 (__NR_Linux + 219) --#define __NR_timer_create (__NR_Linux + 220) --#define __NR_timer_settime (__NR_Linux + 221) --#define __NR_timer_gettime (__NR_Linux + 222) --#define __NR_timer_getoverrun (__NR_Linux + 223) --#define __NR_timer_delete (__NR_Linux + 224) --#define __NR_clock_settime (__NR_Linux + 225) --#define __NR_clock_gettime (__NR_Linux + 226) --#define __NR_clock_getres (__NR_Linux + 227) --#define __NR_clock_nanosleep (__NR_Linux + 228) --#define __NR_tgkill (__NR_Linux + 229) --#define __NR_utimes (__NR_Linux + 230) --#define __NR_mbind (__NR_Linux + 231) --#define __NR_get_mempolicy (__NR_Linux + 232) --#define __NR_set_mempolicy (__NR_Linux + 233) --#define __NR_mq_open (__NR_Linux + 234) --#define __NR_mq_unlink (__NR_Linux + 235) --#define __NR_mq_timedsend (__NR_Linux + 236) --#define __NR_mq_timedreceive (__NR_Linux + 237) --#define __NR_mq_notify (__NR_Linux + 238) --#define __NR_mq_getsetattr (__NR_Linux + 239) --#define __NR_vserver (__NR_Linux + 240) --#define __NR_waitid (__NR_Linux + 241) --#define __NR_add_key (__NR_Linux + 243) --#define __NR_request_key (__NR_Linux + 244) --#define __NR_keyctl (__NR_Linux + 245) --#define __NR_set_thread_area (__NR_Linux + 246) --#define __NR_inotify_init (__NR_Linux + 247) --#define __NR_inotify_add_watch (__NR_Linux + 248) --#define __NR_inotify_rm_watch (__NR_Linux + 249) --#define __NR_migrate_pages (__NR_Linux + 250) --#define __NR_openat (__NR_Linux + 251) --#define __NR_mkdirat (__NR_Linux + 252) --#define __NR_mknodat (__NR_Linux + 253) --#define __NR_fchownat (__NR_Linux + 254) --#define __NR_futimesat (__NR_Linux + 255) --#define __NR_newfstatat (__NR_Linux + 256) --#define __NR_unlinkat (__NR_Linux + 257) --#define __NR_renameat (__NR_Linux + 258) --#define __NR_linkat (__NR_Linux + 259) --#define __NR_symlinkat (__NR_Linux + 260) --#define __NR_readlinkat (__NR_Linux + 261) --#define __NR_fchmodat (__NR_Linux + 262) --#define __NR_faccessat (__NR_Linux + 263) --#define __NR_pselect6 (__NR_Linux + 264) --#define __NR_ppoll (__NR_Linux + 265) --#define __NR_unshare (__NR_Linux + 266) --#define __NR_splice (__NR_Linux + 267) --#define __NR_sync_file_range (__NR_Linux + 268) --#define __NR_tee (__NR_Linux + 269) --#define __NR_vmsplice (__NR_Linux + 270) --#define __NR_move_pages (__NR_Linux + 271) --#define __NR_set_robust_list (__NR_Linux + 272) --#define __NR_get_robust_list (__NR_Linux + 273) --#define __NR_kexec_load (__NR_Linux + 274) --#define __NR_getcpu (__NR_Linux + 275) --#define __NR_epoll_pwait (__NR_Linux + 276) --#define __NR_ioprio_set (__NR_Linux + 277) --#define __NR_ioprio_get (__NR_Linux + 278) --#define __NR_utimensat (__NR_Linux + 279) --#define __NR_signalfd (__NR_Linux + 280) --#define __NR_timerfd (__NR_Linux + 281) --#define __NR_eventfd (__NR_Linux + 282) --#define __NR_fallocate (__NR_Linux + 283) --#define __NR_timerfd_create (__NR_Linux + 284) --#define __NR_timerfd_gettime (__NR_Linux + 285) --#define __NR_timerfd_settime (__NR_Linux + 286) --#define __NR_signalfd4 (__NR_Linux + 287) --#define __NR_eventfd2 (__NR_Linux + 288) --#define __NR_epoll_create1 (__NR_Linux + 289) --#define __NR_dup3 (__NR_Linux + 290) --#define __NR_pipe2 (__NR_Linux + 291) --#define __NR_inotify_init1 (__NR_Linux + 292) --#define __NR_preadv (__NR_Linux + 293) --#define __NR_pwritev (__NR_Linux + 294) --#define __NR_rt_tgsigqueueinfo (__NR_Linux + 295) --#define __NR_perf_event_open (__NR_Linux + 296) --#define __NR_accept4 (__NR_Linux + 297) --#define __NR_recvmmsg (__NR_Linux + 298) --#define __NR_getdents64 (__NR_Linux + 299) --#define __NR_fanotify_init (__NR_Linux + 300) --#define __NR_fanotify_mark (__NR_Linux + 301) --#define __NR_prlimit64 (__NR_Linux + 302) --#define __NR_name_to_handle_at (__NR_Linux + 303) --#define __NR_open_by_handle_at (__NR_Linux + 304) --#define __NR_clock_adjtime (__NR_Linux + 305) --#define __NR_syncfs (__NR_Linux + 306) --#define __NR_sendmmsg (__NR_Linux + 307) --#define __NR_setns (__NR_Linux + 308) --#define __NR_process_vm_readv (__NR_Linux + 309) --#define __NR_process_vm_writev (__NR_Linux + 310) --#define __NR_kcmp (__NR_Linux + 311) --#define __NR_finit_module (__NR_Linux + 312) --#define __NR_sched_setattr (__NR_Linux + 313) --#define __NR_sched_getattr (__NR_Linux + 314) --#define __NR_renameat2 (__NR_Linux + 315) --#define __NR_seccomp (__NR_Linux + 316) --#define __NR_getrandom (__NR_Linux + 317) --#define __NR_memfd_create (__NR_Linux + 318) --#define __NR_bpf (__NR_Linux + 319) --#define __NR_execveat (__NR_Linux + 320) --#define __NR_userfaultfd (__NR_Linux + 321) --#define __NR_membarrier (__NR_Linux + 322) --#define __NR_mlock2 (__NR_Linux + 323) --#define __NR_copy_file_range (__NR_Linux + 324) --#define __NR_preadv2 (__NR_Linux + 325) --#define __NR_pwritev2 (__NR_Linux + 326) --#define __NR_pkey_mprotect (__NR_Linux + 327) --#define __NR_pkey_alloc (__NR_Linux + 328) --#define __NR_pkey_free (__NR_Linux + 329) --#define __NR_statx (__NR_Linux + 330) --#define __NR_rseq (__NR_Linux + 331) --#define __NR_io_pgetevents (__NR_Linux + 332) --#define __NR_clock_gettime64 (__NR_Linux + 403) --#define __NR_clock_settime64 (__NR_Linux + 404) --#define __NR_clock_adjtime64 (__NR_Linux + 405) --#define __NR_clock_getres_time64 (__NR_Linux + 406) --#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) --#define __NR_timer_gettime64 (__NR_Linux + 408) --#define __NR_timer_settime64 (__NR_Linux + 409) --#define __NR_timerfd_gettime64 (__NR_Linux + 410) --#define __NR_timerfd_settime64 (__NR_Linux + 411) --#define __NR_utimensat_time64 (__NR_Linux + 412) --#define __NR_pselect6_time64 (__NR_Linux + 413) --#define __NR_ppoll_time64 (__NR_Linux + 414) --#define __NR_io_pgetevents_time64 (__NR_Linux + 416) --#define __NR_recvmmsg_time64 (__NR_Linux + 417) --#define __NR_mq_timedsend_time64 (__NR_Linux + 418) --#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) --#define __NR_semtimedop_time64 (__NR_Linux + 420) --#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) --#define __NR_futex_time64 (__NR_Linux + 422) --#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) --#define __NR_pidfd_send_signal (__NR_Linux + 424) --#define __NR_io_uring_setup (__NR_Linux + 425) --#define __NR_io_uring_enter (__NR_Linux + 426) --#define __NR_io_uring_register (__NR_Linux + 427) --#define __NR_open_tree (__NR_Linux + 428) --#define __NR_move_mount (__NR_Linux + 429) --#define __NR_fsopen (__NR_Linux + 430) --#define __NR_fsconfig (__NR_Linux + 431) --#define __NR_fsmount (__NR_Linux + 432) --#define __NR_fspick (__NR_Linux + 433) --#define __NR_pidfd_open (__NR_Linux + 434) --#define __NR_clone3 (__NR_Linux + 435) --#define __NR_close_range (__NR_Linux + 436) --#define __NR_openat2 (__NR_Linux + 437) --#define __NR_pidfd_getfd (__NR_Linux + 438) --#define __NR_faccessat2 (__NR_Linux + 439) --#define __NR_process_madvise (__NR_Linux + 440) --#define __NR_epoll_pwait2 (__NR_Linux + 441) -+#define __NR_read (__NR_Linux + 0) -+#define __NR_write (__NR_Linux + 1) -+#define __NR_open (__NR_Linux + 2) -+#define __NR_close (__NR_Linux + 3) -+#define __NR_stat (__NR_Linux + 4) -+#define __NR_fstat (__NR_Linux + 5) -+#define __NR_lstat (__NR_Linux + 6) -+#define __NR_poll (__NR_Linux + 7) -+#define __NR_lseek (__NR_Linux + 8) -+#define __NR_mmap (__NR_Linux + 9) -+#define __NR_mprotect (__NR_Linux + 10) -+#define __NR_munmap (__NR_Linux + 11) -+#define __NR_brk (__NR_Linux + 12) -+#define __NR_rt_sigaction (__NR_Linux + 13) -+#define __NR_rt_sigprocmask (__NR_Linux + 14) -+#define __NR_ioctl (__NR_Linux + 15) -+#define __NR_pread64 (__NR_Linux + 16) -+#define __NR_pwrite64 (__NR_Linux + 17) -+#define __NR_readv (__NR_Linux + 18) -+#define __NR_writev (__NR_Linux + 19) -+#define __NR_access (__NR_Linux + 20) -+#define __NR_pipe (__NR_Linux + 21) -+#define __NR__newselect (__NR_Linux + 22) -+#define __NR_sched_yield (__NR_Linux + 23) -+#define __NR_mremap (__NR_Linux + 24) -+#define __NR_msync (__NR_Linux + 25) -+#define __NR_mincore (__NR_Linux + 26) -+#define __NR_madvise (__NR_Linux + 27) -+#define __NR_shmget (__NR_Linux + 28) -+#define __NR_shmat (__NR_Linux + 29) -+#define __NR_shmctl (__NR_Linux + 30) -+#define __NR_dup (__NR_Linux + 31) -+#define __NR_dup2 (__NR_Linux + 32) -+#define __NR_pause (__NR_Linux + 33) -+#define __NR_nanosleep (__NR_Linux + 34) -+#define __NR_getitimer (__NR_Linux + 35) -+#define __NR_setitimer (__NR_Linux + 36) -+#define __NR_alarm (__NR_Linux + 37) -+#define __NR_getpid (__NR_Linux + 38) -+#define __NR_sendfile (__NR_Linux + 39) -+#define __NR_socket (__NR_Linux + 40) -+#define __NR_connect (__NR_Linux + 41) -+#define __NR_accept (__NR_Linux + 42) -+#define __NR_sendto (__NR_Linux + 43) -+#define __NR_recvfrom (__NR_Linux + 44) -+#define __NR_sendmsg (__NR_Linux + 45) -+#define __NR_recvmsg (__NR_Linux + 46) -+#define __NR_shutdown (__NR_Linux + 47) -+#define __NR_bind (__NR_Linux + 48) -+#define __NR_listen (__NR_Linux + 49) -+#define __NR_getsockname (__NR_Linux + 50) -+#define __NR_getpeername (__NR_Linux + 51) -+#define __NR_socketpair (__NR_Linux + 52) -+#define __NR_setsockopt (__NR_Linux + 53) -+#define __NR_getsockopt (__NR_Linux + 54) -+#define __NR_clone (__NR_Linux + 55) -+#define __NR_fork (__NR_Linux + 56) -+#define __NR_execve (__NR_Linux + 57) -+#define __NR_exit (__NR_Linux + 58) -+#define __NR_wait4 (__NR_Linux + 59) -+#define __NR_kill (__NR_Linux + 60) -+#define __NR_uname (__NR_Linux + 61) -+#define __NR_semget (__NR_Linux + 62) -+#define __NR_semop (__NR_Linux + 63) -+#define __NR_semctl (__NR_Linux + 64) -+#define __NR_shmdt (__NR_Linux + 65) -+#define __NR_msgget (__NR_Linux + 66) -+#define __NR_msgsnd (__NR_Linux + 67) -+#define __NR_msgrcv (__NR_Linux + 68) -+#define __NR_msgctl (__NR_Linux + 69) -+#define __NR_fcntl (__NR_Linux + 70) -+#define __NR_flock (__NR_Linux + 71) -+#define __NR_fsync (__NR_Linux + 72) -+#define __NR_fdatasync (__NR_Linux + 73) -+#define __NR_truncate (__NR_Linux + 74) -+#define __NR_ftruncate (__NR_Linux + 75) -+#define __NR_getdents (__NR_Linux + 76) -+#define __NR_getcwd (__NR_Linux + 77) -+#define __NR_chdir (__NR_Linux + 78) -+#define __NR_fchdir (__NR_Linux + 79) -+#define __NR_rename (__NR_Linux + 80) -+#define __NR_mkdir (__NR_Linux + 81) -+#define __NR_rmdir (__NR_Linux + 82) -+#define __NR_creat (__NR_Linux + 83) -+#define __NR_link (__NR_Linux + 84) -+#define __NR_unlink (__NR_Linux + 85) -+#define __NR_symlink (__NR_Linux + 86) -+#define __NR_readlink (__NR_Linux + 87) -+#define __NR_chmod (__NR_Linux + 88) -+#define __NR_fchmod (__NR_Linux + 89) -+#define __NR_chown (__NR_Linux + 90) -+#define __NR_fchown (__NR_Linux + 91) -+#define __NR_lchown (__NR_Linux + 92) -+#define __NR_umask (__NR_Linux + 93) -+#define __NR_gettimeofday (__NR_Linux + 94) -+#define __NR_getrlimit (__NR_Linux + 95) -+#define __NR_getrusage (__NR_Linux + 96) -+#define __NR_sysinfo (__NR_Linux + 97) -+#define __NR_times (__NR_Linux + 98) -+#define __NR_ptrace (__NR_Linux + 99) -+#define __NR_getuid (__NR_Linux + 100) -+#define __NR_syslog (__NR_Linux + 101) -+#define __NR_getgid (__NR_Linux + 102) -+#define __NR_setuid (__NR_Linux + 103) -+#define __NR_setgid (__NR_Linux + 104) -+#define __NR_geteuid (__NR_Linux + 105) -+#define __NR_getegid (__NR_Linux + 106) -+#define __NR_setpgid (__NR_Linux + 107) -+#define __NR_getppid (__NR_Linux + 108) -+#define __NR_getpgrp (__NR_Linux + 109) -+#define __NR_setsid (__NR_Linux + 110) -+#define __NR_setreuid (__NR_Linux + 111) -+#define __NR_setregid (__NR_Linux + 112) -+#define __NR_getgroups (__NR_Linux + 113) -+#define __NR_setgroups (__NR_Linux + 114) -+#define __NR_setresuid (__NR_Linux + 115) -+#define __NR_getresuid (__NR_Linux + 116) -+#define __NR_setresgid (__NR_Linux + 117) -+#define __NR_getresgid (__NR_Linux + 118) -+#define __NR_getpgid (__NR_Linux + 119) -+#define __NR_setfsuid (__NR_Linux + 120) -+#define __NR_setfsgid (__NR_Linux + 121) -+#define __NR_getsid (__NR_Linux + 122) -+#define __NR_capget (__NR_Linux + 123) -+#define __NR_capset (__NR_Linux + 124) -+#define __NR_rt_sigpending (__NR_Linux + 125) -+#define __NR_rt_sigtimedwait (__NR_Linux + 126) -+#define __NR_rt_sigqueueinfo (__NR_Linux + 127) -+#define __NR_rt_sigsuspend (__NR_Linux + 128) -+#define __NR_sigaltstack (__NR_Linux + 129) -+#define __NR_utime (__NR_Linux + 130) -+#define __NR_mknod (__NR_Linux + 131) -+#define __NR_personality (__NR_Linux + 132) -+#define __NR_ustat (__NR_Linux + 133) -+#define __NR_statfs (__NR_Linux + 134) -+#define __NR_fstatfs (__NR_Linux + 135) -+#define __NR_sysfs (__NR_Linux + 136) -+#define __NR_getpriority (__NR_Linux + 137) -+#define __NR_setpriority (__NR_Linux + 138) -+#define __NR_sched_setparam (__NR_Linux + 139) -+#define __NR_sched_getparam (__NR_Linux + 140) -+#define __NR_sched_setscheduler (__NR_Linux + 141) -+#define __NR_sched_getscheduler (__NR_Linux + 142) -+#define __NR_sched_get_priority_max (__NR_Linux + 143) -+#define __NR_sched_get_priority_min (__NR_Linux + 144) -+#define __NR_sched_rr_get_interval (__NR_Linux + 145) -+#define __NR_mlock (__NR_Linux + 146) -+#define __NR_munlock (__NR_Linux + 147) -+#define __NR_mlockall (__NR_Linux + 148) -+#define __NR_munlockall (__NR_Linux + 149) -+#define __NR_vhangup (__NR_Linux + 150) -+#define __NR_pivot_root (__NR_Linux + 151) -+#define __NR__sysctl (__NR_Linux + 152) -+#define __NR_prctl (__NR_Linux + 153) -+#define __NR_adjtimex (__NR_Linux + 154) -+#define __NR_setrlimit (__NR_Linux + 155) -+#define __NR_chroot (__NR_Linux + 156) -+#define __NR_sync (__NR_Linux + 157) -+#define __NR_acct (__NR_Linux + 158) -+#define __NR_settimeofday (__NR_Linux + 159) -+#define __NR_mount (__NR_Linux + 160) -+#define __NR_umount2 (__NR_Linux + 161) -+#define __NR_swapon (__NR_Linux + 162) -+#define __NR_swapoff (__NR_Linux + 163) -+#define __NR_reboot (__NR_Linux + 164) -+#define __NR_sethostname (__NR_Linux + 165) -+#define __NR_setdomainname (__NR_Linux + 166) -+#define __NR_create_module (__NR_Linux + 167) -+#define __NR_init_module (__NR_Linux + 168) -+#define __NR_delete_module (__NR_Linux + 169) -+#define __NR_get_kernel_syms (__NR_Linux + 170) -+#define __NR_query_module (__NR_Linux + 171) -+#define __NR_quotactl (__NR_Linux + 172) -+#define __NR_nfsservctl (__NR_Linux + 173) -+#define __NR_getpmsg (__NR_Linux + 174) -+#define __NR_putpmsg (__NR_Linux + 175) -+#define __NR_afs_syscall (__NR_Linux + 176) -+#define __NR_reserved177 (__NR_Linux + 177) -+#define __NR_gettid (__NR_Linux + 178) -+#define __NR_readahead (__NR_Linux + 179) -+#define __NR_setxattr (__NR_Linux + 180) -+#define __NR_lsetxattr (__NR_Linux + 181) -+#define __NR_fsetxattr (__NR_Linux + 182) -+#define __NR_getxattr (__NR_Linux + 183) -+#define __NR_lgetxattr (__NR_Linux + 184) -+#define __NR_fgetxattr (__NR_Linux + 185) -+#define __NR_listxattr (__NR_Linux + 186) -+#define __NR_llistxattr (__NR_Linux + 187) -+#define __NR_flistxattr (__NR_Linux + 188) -+#define __NR_removexattr (__NR_Linux + 189) -+#define __NR_lremovexattr (__NR_Linux + 190) -+#define __NR_fremovexattr (__NR_Linux + 191) -+#define __NR_tkill (__NR_Linux + 192) -+#define __NR_reserved193 (__NR_Linux + 193) -+#define __NR_futex (__NR_Linux + 194) -+#define __NR_sched_setaffinity (__NR_Linux + 195) -+#define __NR_sched_getaffinity (__NR_Linux + 196) -+#define __NR_cacheflush (__NR_Linux + 197) -+#define __NR_cachectl (__NR_Linux + 198) -+#define __NR_sysmips (__NR_Linux + 199) -+#define __NR_io_setup (__NR_Linux + 200) -+#define __NR_io_destroy (__NR_Linux + 201) -+#define __NR_io_getevents (__NR_Linux + 202) -+#define __NR_io_submit (__NR_Linux + 203) -+#define __NR_io_cancel (__NR_Linux + 204) -+#define __NR_exit_group (__NR_Linux + 205) -+#define __NR_lookup_dcookie (__NR_Linux + 206) -+#define __NR_epoll_create (__NR_Linux + 207) -+#define __NR_epoll_ctl (__NR_Linux + 208) -+#define __NR_epoll_wait (__NR_Linux + 209) -+#define __NR_remap_file_pages (__NR_Linux + 210) -+#define __NR_rt_sigreturn (__NR_Linux + 211) -+#define __NR_fcntl64 (__NR_Linux + 212) -+#define __NR_set_tid_address (__NR_Linux + 213) -+#define __NR_restart_syscall (__NR_Linux + 214) -+#define __NR_semtimedop (__NR_Linux + 215) -+#define __NR_fadvise64 (__NR_Linux + 216) -+#define __NR_statfs64 (__NR_Linux + 217) -+#define __NR_fstatfs64 (__NR_Linux + 218) -+#define __NR_sendfile64 (__NR_Linux + 219) -+#define __NR_timer_create (__NR_Linux + 220) -+#define __NR_timer_settime (__NR_Linux + 221) -+#define __NR_timer_gettime (__NR_Linux + 222) -+#define __NR_timer_getoverrun (__NR_Linux + 223) -+#define __NR_timer_delete (__NR_Linux + 224) -+#define __NR_clock_settime (__NR_Linux + 225) -+#define __NR_clock_gettime (__NR_Linux + 226) -+#define __NR_clock_getres (__NR_Linux + 227) -+#define __NR_clock_nanosleep (__NR_Linux + 228) -+#define __NR_tgkill (__NR_Linux + 229) -+#define __NR_utimes (__NR_Linux + 230) -+#define __NR_mbind (__NR_Linux + 231) -+#define __NR_get_mempolicy (__NR_Linux + 232) -+#define __NR_set_mempolicy (__NR_Linux + 233) -+#define __NR_mq_open (__NR_Linux + 234) -+#define __NR_mq_unlink (__NR_Linux + 235) -+#define __NR_mq_timedsend (__NR_Linux + 236) -+#define __NR_mq_timedreceive (__NR_Linux + 237) -+#define __NR_mq_notify (__NR_Linux + 238) -+#define __NR_mq_getsetattr (__NR_Linux + 239) -+#define __NR_vserver (__NR_Linux + 240) -+#define __NR_waitid (__NR_Linux + 241) -+#define __NR_add_key (__NR_Linux + 243) -+#define __NR_request_key (__NR_Linux + 244) -+#define __NR_keyctl (__NR_Linux + 245) -+#define __NR_set_thread_area (__NR_Linux + 246) -+#define __NR_inotify_init (__NR_Linux + 247) -+#define __NR_inotify_add_watch (__NR_Linux + 248) -+#define __NR_inotify_rm_watch (__NR_Linux + 249) -+#define __NR_migrate_pages (__NR_Linux + 250) -+#define __NR_openat (__NR_Linux + 251) -+#define __NR_mkdirat (__NR_Linux + 252) -+#define __NR_mknodat (__NR_Linux + 253) -+#define __NR_fchownat (__NR_Linux + 254) -+#define __NR_futimesat (__NR_Linux + 255) -+#define __NR_newfstatat (__NR_Linux + 256) -+#define __NR_unlinkat (__NR_Linux + 257) -+#define __NR_renameat (__NR_Linux + 258) -+#define __NR_linkat (__NR_Linux + 259) -+#define __NR_symlinkat (__NR_Linux + 260) -+#define __NR_readlinkat (__NR_Linux + 261) -+#define __NR_fchmodat (__NR_Linux + 262) -+#define __NR_faccessat (__NR_Linux + 263) -+#define __NR_pselect6 (__NR_Linux + 264) -+#define __NR_ppoll (__NR_Linux + 265) -+#define __NR_unshare (__NR_Linux + 266) -+#define __NR_splice (__NR_Linux + 267) -+#define __NR_sync_file_range (__NR_Linux + 268) -+#define __NR_tee (__NR_Linux + 269) -+#define __NR_vmsplice (__NR_Linux + 270) -+#define __NR_move_pages (__NR_Linux + 271) -+#define __NR_set_robust_list (__NR_Linux + 272) -+#define __NR_get_robust_list (__NR_Linux + 273) -+#define __NR_kexec_load (__NR_Linux + 274) -+#define __NR_getcpu (__NR_Linux + 275) -+#define __NR_epoll_pwait (__NR_Linux + 276) -+#define __NR_ioprio_set (__NR_Linux + 277) -+#define __NR_ioprio_get (__NR_Linux + 278) -+#define __NR_utimensat (__NR_Linux + 279) -+#define __NR_signalfd (__NR_Linux + 280) -+#define __NR_timerfd (__NR_Linux + 281) -+#define __NR_eventfd (__NR_Linux + 282) -+#define __NR_fallocate (__NR_Linux + 283) -+#define __NR_timerfd_create (__NR_Linux + 284) -+#define __NR_timerfd_gettime (__NR_Linux + 285) -+#define __NR_timerfd_settime (__NR_Linux + 286) -+#define __NR_signalfd4 (__NR_Linux + 287) -+#define __NR_eventfd2 (__NR_Linux + 288) -+#define __NR_epoll_create1 (__NR_Linux + 289) -+#define __NR_dup3 (__NR_Linux + 290) -+#define __NR_pipe2 (__NR_Linux + 291) -+#define __NR_inotify_init1 (__NR_Linux + 292) -+#define __NR_preadv (__NR_Linux + 293) -+#define __NR_pwritev (__NR_Linux + 294) -+#define __NR_rt_tgsigqueueinfo (__NR_Linux + 295) -+#define __NR_perf_event_open (__NR_Linux + 296) -+#define __NR_accept4 (__NR_Linux + 297) -+#define __NR_recvmmsg (__NR_Linux + 298) -+#define __NR_getdents64 (__NR_Linux + 299) -+#define __NR_fanotify_init (__NR_Linux + 300) -+#define __NR_fanotify_mark (__NR_Linux + 301) -+#define __NR_prlimit64 (__NR_Linux + 302) -+#define __NR_name_to_handle_at (__NR_Linux + 303) -+#define __NR_open_by_handle_at (__NR_Linux + 304) -+#define __NR_clock_adjtime (__NR_Linux + 305) -+#define __NR_syncfs (__NR_Linux + 306) -+#define __NR_sendmmsg (__NR_Linux + 307) -+#define __NR_setns (__NR_Linux + 308) -+#define __NR_process_vm_readv (__NR_Linux + 309) -+#define __NR_process_vm_writev (__NR_Linux + 310) -+#define __NR_kcmp (__NR_Linux + 311) -+#define __NR_finit_module (__NR_Linux + 312) -+#define __NR_sched_setattr (__NR_Linux + 313) -+#define __NR_sched_getattr (__NR_Linux + 314) -+#define __NR_renameat2 (__NR_Linux + 315) -+#define __NR_seccomp (__NR_Linux + 316) -+#define __NR_getrandom (__NR_Linux + 317) -+#define __NR_memfd_create (__NR_Linux + 318) -+#define __NR_bpf (__NR_Linux + 319) -+#define __NR_execveat (__NR_Linux + 320) -+#define __NR_userfaultfd (__NR_Linux + 321) -+#define __NR_membarrier (__NR_Linux + 322) -+#define __NR_mlock2 (__NR_Linux + 323) -+#define __NR_copy_file_range (__NR_Linux + 324) -+#define __NR_preadv2 (__NR_Linux + 325) -+#define __NR_pwritev2 (__NR_Linux + 326) -+#define __NR_pkey_mprotect (__NR_Linux + 327) -+#define __NR_pkey_alloc (__NR_Linux + 328) -+#define __NR_pkey_free (__NR_Linux + 329) -+#define __NR_statx (__NR_Linux + 330) -+#define __NR_rseq (__NR_Linux + 331) -+#define __NR_io_pgetevents (__NR_Linux + 332) -+#define __NR_clock_gettime64 (__NR_Linux + 403) -+#define __NR_clock_settime64 (__NR_Linux + 404) -+#define __NR_clock_adjtime64 (__NR_Linux + 405) -+#define __NR_clock_getres_time64 (__NR_Linux + 406) -+#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) -+#define __NR_timer_gettime64 (__NR_Linux + 408) -+#define __NR_timer_settime64 (__NR_Linux + 409) -+#define __NR_timerfd_gettime64 (__NR_Linux + 410) -+#define __NR_timerfd_settime64 (__NR_Linux + 411) -+#define __NR_utimensat_time64 (__NR_Linux + 412) -+#define __NR_pselect6_time64 (__NR_Linux + 413) -+#define __NR_ppoll_time64 (__NR_Linux + 414) -+#define __NR_io_pgetevents_time64 (__NR_Linux + 416) -+#define __NR_recvmmsg_time64 (__NR_Linux + 417) -+#define __NR_mq_timedsend_time64 (__NR_Linux + 418) -+#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) -+#define __NR_semtimedop_time64 (__NR_Linux + 420) -+#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) -+#define __NR_futex_time64 (__NR_Linux + 422) -+#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) -+#define __NR_pidfd_send_signal (__NR_Linux + 424) -+#define __NR_io_uring_setup (__NR_Linux + 425) -+#define __NR_io_uring_enter (__NR_Linux + 426) -+#define __NR_io_uring_register (__NR_Linux + 427) -+#define __NR_open_tree (__NR_Linux + 428) -+#define __NR_move_mount (__NR_Linux + 429) -+#define __NR_fsopen (__NR_Linux + 430) -+#define __NR_fsconfig (__NR_Linux + 431) -+#define __NR_fsmount (__NR_Linux + 432) -+#define __NR_fspick (__NR_Linux + 433) -+#define __NR_pidfd_open (__NR_Linux + 434) -+#define __NR_clone3 (__NR_Linux + 435) -+#define __NR_close_range (__NR_Linux + 436) -+#define __NR_openat2 (__NR_Linux + 437) -+#define __NR_pidfd_getfd (__NR_Linux + 438) -+#define __NR_faccessat2 (__NR_Linux + 439) -+#define __NR_process_madvise (__NR_Linux + 440) -+#define __NR_epoll_pwait2 (__NR_Linux + 441) -+#define __NR_mount_setattr (__NR_Linux + 442) -+#define __NR_landlock_create_ruleset (__NR_Linux + 444) -+#define __NR_landlock_add_rule (__NR_Linux + 445) -+#define __NR_landlock_restrict_self (__NR_Linux + 446) - -- --#endif /* _ASM_MIPS_UNISTD_N32_H */ -+#endif /* _ASM_UNISTD_N32_H */ -diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h -index 683558a7f8..0996001802 100644 ---- a/linux-headers/asm-mips/unistd_n64.h -+++ b/linux-headers/asm-mips/unistd_n64.h -@@ -1,352 +1,355 @@ --#ifndef _ASM_MIPS_UNISTD_N64_H --#define _ASM_MIPS_UNISTD_N64_H -+#ifndef _ASM_UNISTD_N64_H -+#define _ASM_UNISTD_N64_H - --#define __NR_read (__NR_Linux + 0) --#define __NR_write (__NR_Linux + 1) --#define __NR_open (__NR_Linux + 2) --#define __NR_close (__NR_Linux + 3) --#define __NR_stat (__NR_Linux + 4) --#define __NR_fstat (__NR_Linux + 5) --#define __NR_lstat (__NR_Linux + 6) --#define __NR_poll (__NR_Linux + 7) --#define __NR_lseek (__NR_Linux + 8) --#define __NR_mmap (__NR_Linux + 9) --#define __NR_mprotect (__NR_Linux + 10) --#define __NR_munmap (__NR_Linux + 11) --#define __NR_brk (__NR_Linux + 12) --#define __NR_rt_sigaction (__NR_Linux + 13) --#define __NR_rt_sigprocmask (__NR_Linux + 14) --#define __NR_ioctl (__NR_Linux + 15) --#define __NR_pread64 (__NR_Linux + 16) --#define __NR_pwrite64 (__NR_Linux + 17) --#define __NR_readv (__NR_Linux + 18) --#define __NR_writev (__NR_Linux + 19) --#define __NR_access (__NR_Linux + 20) --#define __NR_pipe (__NR_Linux + 21) --#define __NR__newselect (__NR_Linux + 22) --#define __NR_sched_yield (__NR_Linux + 23) --#define __NR_mremap (__NR_Linux + 24) --#define __NR_msync (__NR_Linux + 25) --#define __NR_mincore (__NR_Linux + 26) --#define __NR_madvise (__NR_Linux + 27) --#define __NR_shmget (__NR_Linux + 28) --#define __NR_shmat (__NR_Linux + 29) --#define __NR_shmctl (__NR_Linux + 30) --#define __NR_dup (__NR_Linux + 31) --#define __NR_dup2 (__NR_Linux + 32) --#define __NR_pause (__NR_Linux + 33) --#define __NR_nanosleep (__NR_Linux + 34) --#define __NR_getitimer (__NR_Linux + 35) --#define __NR_setitimer (__NR_Linux + 36) --#define __NR_alarm (__NR_Linux + 37) --#define __NR_getpid (__NR_Linux + 38) --#define __NR_sendfile (__NR_Linux + 39) --#define __NR_socket (__NR_Linux + 40) --#define __NR_connect (__NR_Linux + 41) --#define __NR_accept (__NR_Linux + 42) --#define __NR_sendto (__NR_Linux + 43) --#define __NR_recvfrom (__NR_Linux + 44) --#define __NR_sendmsg (__NR_Linux + 45) --#define __NR_recvmsg (__NR_Linux + 46) --#define __NR_shutdown (__NR_Linux + 47) --#define __NR_bind (__NR_Linux + 48) --#define __NR_listen (__NR_Linux + 49) --#define __NR_getsockname (__NR_Linux + 50) --#define __NR_getpeername (__NR_Linux + 51) --#define __NR_socketpair (__NR_Linux + 52) --#define __NR_setsockopt (__NR_Linux + 53) --#define __NR_getsockopt (__NR_Linux + 54) --#define __NR_clone (__NR_Linux + 55) --#define __NR_fork (__NR_Linux + 56) --#define __NR_execve (__NR_Linux + 57) --#define __NR_exit (__NR_Linux + 58) --#define __NR_wait4 (__NR_Linux + 59) --#define __NR_kill (__NR_Linux + 60) --#define __NR_uname (__NR_Linux + 61) --#define __NR_semget (__NR_Linux + 62) --#define __NR_semop (__NR_Linux + 63) --#define __NR_semctl (__NR_Linux + 64) --#define __NR_shmdt (__NR_Linux + 65) --#define __NR_msgget (__NR_Linux + 66) --#define __NR_msgsnd (__NR_Linux + 67) --#define __NR_msgrcv (__NR_Linux + 68) --#define __NR_msgctl (__NR_Linux + 69) --#define __NR_fcntl (__NR_Linux + 70) --#define __NR_flock (__NR_Linux + 71) --#define __NR_fsync (__NR_Linux + 72) --#define __NR_fdatasync (__NR_Linux + 73) --#define __NR_truncate (__NR_Linux + 74) --#define __NR_ftruncate (__NR_Linux + 75) --#define __NR_getdents (__NR_Linux + 76) --#define __NR_getcwd (__NR_Linux + 77) --#define __NR_chdir (__NR_Linux + 78) --#define __NR_fchdir (__NR_Linux + 79) --#define __NR_rename (__NR_Linux + 80) --#define __NR_mkdir (__NR_Linux + 81) --#define __NR_rmdir (__NR_Linux + 82) --#define __NR_creat (__NR_Linux + 83) --#define __NR_link (__NR_Linux + 84) --#define __NR_unlink (__NR_Linux + 85) --#define __NR_symlink (__NR_Linux + 86) --#define __NR_readlink (__NR_Linux + 87) --#define __NR_chmod (__NR_Linux + 88) --#define __NR_fchmod (__NR_Linux + 89) --#define __NR_chown (__NR_Linux + 90) --#define __NR_fchown (__NR_Linux + 91) --#define __NR_lchown (__NR_Linux + 92) --#define __NR_umask (__NR_Linux + 93) --#define __NR_gettimeofday (__NR_Linux + 94) --#define __NR_getrlimit (__NR_Linux + 95) --#define __NR_getrusage (__NR_Linux + 96) --#define __NR_sysinfo (__NR_Linux + 97) --#define __NR_times (__NR_Linux + 98) --#define __NR_ptrace (__NR_Linux + 99) --#define __NR_getuid (__NR_Linux + 100) --#define __NR_syslog (__NR_Linux + 101) --#define __NR_getgid (__NR_Linux + 102) --#define __NR_setuid (__NR_Linux + 103) --#define __NR_setgid (__NR_Linux + 104) --#define __NR_geteuid (__NR_Linux + 105) --#define __NR_getegid (__NR_Linux + 106) --#define __NR_setpgid (__NR_Linux + 107) --#define __NR_getppid (__NR_Linux + 108) --#define __NR_getpgrp (__NR_Linux + 109) --#define __NR_setsid (__NR_Linux + 110) --#define __NR_setreuid (__NR_Linux + 111) --#define __NR_setregid (__NR_Linux + 112) --#define __NR_getgroups (__NR_Linux + 113) --#define __NR_setgroups (__NR_Linux + 114) --#define __NR_setresuid (__NR_Linux + 115) --#define __NR_getresuid (__NR_Linux + 116) --#define __NR_setresgid (__NR_Linux + 117) --#define __NR_getresgid (__NR_Linux + 118) --#define __NR_getpgid (__NR_Linux + 119) --#define __NR_setfsuid (__NR_Linux + 120) --#define __NR_setfsgid (__NR_Linux + 121) --#define __NR_getsid (__NR_Linux + 122) --#define __NR_capget (__NR_Linux + 123) --#define __NR_capset (__NR_Linux + 124) --#define __NR_rt_sigpending (__NR_Linux + 125) --#define __NR_rt_sigtimedwait (__NR_Linux + 126) --#define __NR_rt_sigqueueinfo (__NR_Linux + 127) --#define __NR_rt_sigsuspend (__NR_Linux + 128) --#define __NR_sigaltstack (__NR_Linux + 129) --#define __NR_utime (__NR_Linux + 130) --#define __NR_mknod (__NR_Linux + 131) --#define __NR_personality (__NR_Linux + 132) --#define __NR_ustat (__NR_Linux + 133) --#define __NR_statfs (__NR_Linux + 134) --#define __NR_fstatfs (__NR_Linux + 135) --#define __NR_sysfs (__NR_Linux + 136) --#define __NR_getpriority (__NR_Linux + 137) --#define __NR_setpriority (__NR_Linux + 138) --#define __NR_sched_setparam (__NR_Linux + 139) --#define __NR_sched_getparam (__NR_Linux + 140) --#define __NR_sched_setscheduler (__NR_Linux + 141) --#define __NR_sched_getscheduler (__NR_Linux + 142) --#define __NR_sched_get_priority_max (__NR_Linux + 143) --#define __NR_sched_get_priority_min (__NR_Linux + 144) --#define __NR_sched_rr_get_interval (__NR_Linux + 145) --#define __NR_mlock (__NR_Linux + 146) --#define __NR_munlock (__NR_Linux + 147) --#define __NR_mlockall (__NR_Linux + 148) --#define __NR_munlockall (__NR_Linux + 149) --#define __NR_vhangup (__NR_Linux + 150) --#define __NR_pivot_root (__NR_Linux + 151) --#define __NR__sysctl (__NR_Linux + 152) --#define __NR_prctl (__NR_Linux + 153) --#define __NR_adjtimex (__NR_Linux + 154) --#define __NR_setrlimit (__NR_Linux + 155) --#define __NR_chroot (__NR_Linux + 156) --#define __NR_sync (__NR_Linux + 157) --#define __NR_acct (__NR_Linux + 158) --#define __NR_settimeofday (__NR_Linux + 159) --#define __NR_mount (__NR_Linux + 160) --#define __NR_umount2 (__NR_Linux + 161) --#define __NR_swapon (__NR_Linux + 162) --#define __NR_swapoff (__NR_Linux + 163) --#define __NR_reboot (__NR_Linux + 164) --#define __NR_sethostname (__NR_Linux + 165) --#define __NR_setdomainname (__NR_Linux + 166) --#define __NR_create_module (__NR_Linux + 167) --#define __NR_init_module (__NR_Linux + 168) --#define __NR_delete_module (__NR_Linux + 169) --#define __NR_get_kernel_syms (__NR_Linux + 170) --#define __NR_query_module (__NR_Linux + 171) --#define __NR_quotactl (__NR_Linux + 172) --#define __NR_nfsservctl (__NR_Linux + 173) --#define __NR_getpmsg (__NR_Linux + 174) --#define __NR_putpmsg (__NR_Linux + 175) --#define __NR_afs_syscall (__NR_Linux + 176) --#define __NR_reserved177 (__NR_Linux + 177) --#define __NR_gettid (__NR_Linux + 178) --#define __NR_readahead (__NR_Linux + 179) --#define __NR_setxattr (__NR_Linux + 180) --#define __NR_lsetxattr (__NR_Linux + 181) --#define __NR_fsetxattr (__NR_Linux + 182) --#define __NR_getxattr (__NR_Linux + 183) --#define __NR_lgetxattr (__NR_Linux + 184) --#define __NR_fgetxattr (__NR_Linux + 185) --#define __NR_listxattr (__NR_Linux + 186) --#define __NR_llistxattr (__NR_Linux + 187) --#define __NR_flistxattr (__NR_Linux + 188) --#define __NR_removexattr (__NR_Linux + 189) --#define __NR_lremovexattr (__NR_Linux + 190) --#define __NR_fremovexattr (__NR_Linux + 191) --#define __NR_tkill (__NR_Linux + 192) --#define __NR_reserved193 (__NR_Linux + 193) --#define __NR_futex (__NR_Linux + 194) --#define __NR_sched_setaffinity (__NR_Linux + 195) --#define __NR_sched_getaffinity (__NR_Linux + 196) --#define __NR_cacheflush (__NR_Linux + 197) --#define __NR_cachectl (__NR_Linux + 198) --#define __NR_sysmips (__NR_Linux + 199) --#define __NR_io_setup (__NR_Linux + 200) --#define __NR_io_destroy (__NR_Linux + 201) --#define __NR_io_getevents (__NR_Linux + 202) --#define __NR_io_submit (__NR_Linux + 203) --#define __NR_io_cancel (__NR_Linux + 204) --#define __NR_exit_group (__NR_Linux + 205) --#define __NR_lookup_dcookie (__NR_Linux + 206) --#define __NR_epoll_create (__NR_Linux + 207) --#define __NR_epoll_ctl (__NR_Linux + 208) --#define __NR_epoll_wait (__NR_Linux + 209) --#define __NR_remap_file_pages (__NR_Linux + 210) --#define __NR_rt_sigreturn (__NR_Linux + 211) --#define __NR_set_tid_address (__NR_Linux + 212) --#define __NR_restart_syscall (__NR_Linux + 213) --#define __NR_semtimedop (__NR_Linux + 214) --#define __NR_fadvise64 (__NR_Linux + 215) --#define __NR_timer_create (__NR_Linux + 216) --#define __NR_timer_settime (__NR_Linux + 217) --#define __NR_timer_gettime (__NR_Linux + 218) --#define __NR_timer_getoverrun (__NR_Linux + 219) --#define __NR_timer_delete (__NR_Linux + 220) --#define __NR_clock_settime (__NR_Linux + 221) --#define __NR_clock_gettime (__NR_Linux + 222) --#define __NR_clock_getres (__NR_Linux + 223) --#define __NR_clock_nanosleep (__NR_Linux + 224) --#define __NR_tgkill (__NR_Linux + 225) --#define __NR_utimes (__NR_Linux + 226) --#define __NR_mbind (__NR_Linux + 227) --#define __NR_get_mempolicy (__NR_Linux + 228) --#define __NR_set_mempolicy (__NR_Linux + 229) --#define __NR_mq_open (__NR_Linux + 230) --#define __NR_mq_unlink (__NR_Linux + 231) --#define __NR_mq_timedsend (__NR_Linux + 232) --#define __NR_mq_timedreceive (__NR_Linux + 233) --#define __NR_mq_notify (__NR_Linux + 234) --#define __NR_mq_getsetattr (__NR_Linux + 235) --#define __NR_vserver (__NR_Linux + 236) --#define __NR_waitid (__NR_Linux + 237) --#define __NR_add_key (__NR_Linux + 239) --#define __NR_request_key (__NR_Linux + 240) --#define __NR_keyctl (__NR_Linux + 241) --#define __NR_set_thread_area (__NR_Linux + 242) --#define __NR_inotify_init (__NR_Linux + 243) --#define __NR_inotify_add_watch (__NR_Linux + 244) --#define __NR_inotify_rm_watch (__NR_Linux + 245) --#define __NR_migrate_pages (__NR_Linux + 246) --#define __NR_openat (__NR_Linux + 247) --#define __NR_mkdirat (__NR_Linux + 248) --#define __NR_mknodat (__NR_Linux + 249) --#define __NR_fchownat (__NR_Linux + 250) --#define __NR_futimesat (__NR_Linux + 251) --#define __NR_newfstatat (__NR_Linux + 252) --#define __NR_unlinkat (__NR_Linux + 253) --#define __NR_renameat (__NR_Linux + 254) --#define __NR_linkat (__NR_Linux + 255) --#define __NR_symlinkat (__NR_Linux + 256) --#define __NR_readlinkat (__NR_Linux + 257) --#define __NR_fchmodat (__NR_Linux + 258) --#define __NR_faccessat (__NR_Linux + 259) --#define __NR_pselect6 (__NR_Linux + 260) --#define __NR_ppoll (__NR_Linux + 261) --#define __NR_unshare (__NR_Linux + 262) --#define __NR_splice (__NR_Linux + 263) --#define __NR_sync_file_range (__NR_Linux + 264) --#define __NR_tee (__NR_Linux + 265) --#define __NR_vmsplice (__NR_Linux + 266) --#define __NR_move_pages (__NR_Linux + 267) --#define __NR_set_robust_list (__NR_Linux + 268) --#define __NR_get_robust_list (__NR_Linux + 269) --#define __NR_kexec_load (__NR_Linux + 270) --#define __NR_getcpu (__NR_Linux + 271) --#define __NR_epoll_pwait (__NR_Linux + 272) --#define __NR_ioprio_set (__NR_Linux + 273) --#define __NR_ioprio_get (__NR_Linux + 274) --#define __NR_utimensat (__NR_Linux + 275) --#define __NR_signalfd (__NR_Linux + 276) --#define __NR_timerfd (__NR_Linux + 277) --#define __NR_eventfd (__NR_Linux + 278) --#define __NR_fallocate (__NR_Linux + 279) --#define __NR_timerfd_create (__NR_Linux + 280) --#define __NR_timerfd_gettime (__NR_Linux + 281) --#define __NR_timerfd_settime (__NR_Linux + 282) --#define __NR_signalfd4 (__NR_Linux + 283) --#define __NR_eventfd2 (__NR_Linux + 284) --#define __NR_epoll_create1 (__NR_Linux + 285) --#define __NR_dup3 (__NR_Linux + 286) --#define __NR_pipe2 (__NR_Linux + 287) --#define __NR_inotify_init1 (__NR_Linux + 288) --#define __NR_preadv (__NR_Linux + 289) --#define __NR_pwritev (__NR_Linux + 290) --#define __NR_rt_tgsigqueueinfo (__NR_Linux + 291) --#define __NR_perf_event_open (__NR_Linux + 292) --#define __NR_accept4 (__NR_Linux + 293) --#define __NR_recvmmsg (__NR_Linux + 294) --#define __NR_fanotify_init (__NR_Linux + 295) --#define __NR_fanotify_mark (__NR_Linux + 296) --#define __NR_prlimit64 (__NR_Linux + 297) --#define __NR_name_to_handle_at (__NR_Linux + 298) --#define __NR_open_by_handle_at (__NR_Linux + 299) --#define __NR_clock_adjtime (__NR_Linux + 300) --#define __NR_syncfs (__NR_Linux + 301) --#define __NR_sendmmsg (__NR_Linux + 302) --#define __NR_setns (__NR_Linux + 303) --#define __NR_process_vm_readv (__NR_Linux + 304) --#define __NR_process_vm_writev (__NR_Linux + 305) --#define __NR_kcmp (__NR_Linux + 306) --#define __NR_finit_module (__NR_Linux + 307) --#define __NR_getdents64 (__NR_Linux + 308) --#define __NR_sched_setattr (__NR_Linux + 309) --#define __NR_sched_getattr (__NR_Linux + 310) --#define __NR_renameat2 (__NR_Linux + 311) --#define __NR_seccomp (__NR_Linux + 312) --#define __NR_getrandom (__NR_Linux + 313) --#define __NR_memfd_create (__NR_Linux + 314) --#define __NR_bpf (__NR_Linux + 315) --#define __NR_execveat (__NR_Linux + 316) --#define __NR_userfaultfd (__NR_Linux + 317) --#define __NR_membarrier (__NR_Linux + 318) --#define __NR_mlock2 (__NR_Linux + 319) --#define __NR_copy_file_range (__NR_Linux + 320) --#define __NR_preadv2 (__NR_Linux + 321) --#define __NR_pwritev2 (__NR_Linux + 322) --#define __NR_pkey_mprotect (__NR_Linux + 323) --#define __NR_pkey_alloc (__NR_Linux + 324) --#define __NR_pkey_free (__NR_Linux + 325) --#define __NR_statx (__NR_Linux + 326) --#define __NR_rseq (__NR_Linux + 327) --#define __NR_io_pgetevents (__NR_Linux + 328) --#define __NR_pidfd_send_signal (__NR_Linux + 424) --#define __NR_io_uring_setup (__NR_Linux + 425) --#define __NR_io_uring_enter (__NR_Linux + 426) --#define __NR_io_uring_register (__NR_Linux + 427) --#define __NR_open_tree (__NR_Linux + 428) --#define __NR_move_mount (__NR_Linux + 429) --#define __NR_fsopen (__NR_Linux + 430) --#define __NR_fsconfig (__NR_Linux + 431) --#define __NR_fsmount (__NR_Linux + 432) --#define __NR_fspick (__NR_Linux + 433) --#define __NR_pidfd_open (__NR_Linux + 434) --#define __NR_clone3 (__NR_Linux + 435) --#define __NR_close_range (__NR_Linux + 436) --#define __NR_openat2 (__NR_Linux + 437) --#define __NR_pidfd_getfd (__NR_Linux + 438) --#define __NR_faccessat2 (__NR_Linux + 439) --#define __NR_process_madvise (__NR_Linux + 440) --#define __NR_epoll_pwait2 (__NR_Linux + 441) -+#define __NR_read (__NR_Linux + 0) -+#define __NR_write (__NR_Linux + 1) -+#define __NR_open (__NR_Linux + 2) -+#define __NR_close (__NR_Linux + 3) -+#define __NR_stat (__NR_Linux + 4) -+#define __NR_fstat (__NR_Linux + 5) -+#define __NR_lstat (__NR_Linux + 6) -+#define __NR_poll (__NR_Linux + 7) -+#define __NR_lseek (__NR_Linux + 8) -+#define __NR_mmap (__NR_Linux + 9) -+#define __NR_mprotect (__NR_Linux + 10) -+#define __NR_munmap (__NR_Linux + 11) -+#define __NR_brk (__NR_Linux + 12) -+#define __NR_rt_sigaction (__NR_Linux + 13) -+#define __NR_rt_sigprocmask (__NR_Linux + 14) -+#define __NR_ioctl (__NR_Linux + 15) -+#define __NR_pread64 (__NR_Linux + 16) -+#define __NR_pwrite64 (__NR_Linux + 17) -+#define __NR_readv (__NR_Linux + 18) -+#define __NR_writev (__NR_Linux + 19) -+#define __NR_access (__NR_Linux + 20) -+#define __NR_pipe (__NR_Linux + 21) -+#define __NR__newselect (__NR_Linux + 22) -+#define __NR_sched_yield (__NR_Linux + 23) -+#define __NR_mremap (__NR_Linux + 24) -+#define __NR_msync (__NR_Linux + 25) -+#define __NR_mincore (__NR_Linux + 26) -+#define __NR_madvise (__NR_Linux + 27) -+#define __NR_shmget (__NR_Linux + 28) -+#define __NR_shmat (__NR_Linux + 29) -+#define __NR_shmctl (__NR_Linux + 30) -+#define __NR_dup (__NR_Linux + 31) -+#define __NR_dup2 (__NR_Linux + 32) -+#define __NR_pause (__NR_Linux + 33) -+#define __NR_nanosleep (__NR_Linux + 34) -+#define __NR_getitimer (__NR_Linux + 35) -+#define __NR_setitimer (__NR_Linux + 36) -+#define __NR_alarm (__NR_Linux + 37) -+#define __NR_getpid (__NR_Linux + 38) -+#define __NR_sendfile (__NR_Linux + 39) -+#define __NR_socket (__NR_Linux + 40) -+#define __NR_connect (__NR_Linux + 41) -+#define __NR_accept (__NR_Linux + 42) -+#define __NR_sendto (__NR_Linux + 43) -+#define __NR_recvfrom (__NR_Linux + 44) -+#define __NR_sendmsg (__NR_Linux + 45) -+#define __NR_recvmsg (__NR_Linux + 46) -+#define __NR_shutdown (__NR_Linux + 47) -+#define __NR_bind (__NR_Linux + 48) -+#define __NR_listen (__NR_Linux + 49) -+#define __NR_getsockname (__NR_Linux + 50) -+#define __NR_getpeername (__NR_Linux + 51) -+#define __NR_socketpair (__NR_Linux + 52) -+#define __NR_setsockopt (__NR_Linux + 53) -+#define __NR_getsockopt (__NR_Linux + 54) -+#define __NR_clone (__NR_Linux + 55) -+#define __NR_fork (__NR_Linux + 56) -+#define __NR_execve (__NR_Linux + 57) -+#define __NR_exit (__NR_Linux + 58) -+#define __NR_wait4 (__NR_Linux + 59) -+#define __NR_kill (__NR_Linux + 60) -+#define __NR_uname (__NR_Linux + 61) -+#define __NR_semget (__NR_Linux + 62) -+#define __NR_semop (__NR_Linux + 63) -+#define __NR_semctl (__NR_Linux + 64) -+#define __NR_shmdt (__NR_Linux + 65) -+#define __NR_msgget (__NR_Linux + 66) -+#define __NR_msgsnd (__NR_Linux + 67) -+#define __NR_msgrcv (__NR_Linux + 68) -+#define __NR_msgctl (__NR_Linux + 69) -+#define __NR_fcntl (__NR_Linux + 70) -+#define __NR_flock (__NR_Linux + 71) -+#define __NR_fsync (__NR_Linux + 72) -+#define __NR_fdatasync (__NR_Linux + 73) -+#define __NR_truncate (__NR_Linux + 74) -+#define __NR_ftruncate (__NR_Linux + 75) -+#define __NR_getdents (__NR_Linux + 76) -+#define __NR_getcwd (__NR_Linux + 77) -+#define __NR_chdir (__NR_Linux + 78) -+#define __NR_fchdir (__NR_Linux + 79) -+#define __NR_rename (__NR_Linux + 80) -+#define __NR_mkdir (__NR_Linux + 81) -+#define __NR_rmdir (__NR_Linux + 82) -+#define __NR_creat (__NR_Linux + 83) -+#define __NR_link (__NR_Linux + 84) -+#define __NR_unlink (__NR_Linux + 85) -+#define __NR_symlink (__NR_Linux + 86) -+#define __NR_readlink (__NR_Linux + 87) -+#define __NR_chmod (__NR_Linux + 88) -+#define __NR_fchmod (__NR_Linux + 89) -+#define __NR_chown (__NR_Linux + 90) -+#define __NR_fchown (__NR_Linux + 91) -+#define __NR_lchown (__NR_Linux + 92) -+#define __NR_umask (__NR_Linux + 93) -+#define __NR_gettimeofday (__NR_Linux + 94) -+#define __NR_getrlimit (__NR_Linux + 95) -+#define __NR_getrusage (__NR_Linux + 96) -+#define __NR_sysinfo (__NR_Linux + 97) -+#define __NR_times (__NR_Linux + 98) -+#define __NR_ptrace (__NR_Linux + 99) -+#define __NR_getuid (__NR_Linux + 100) -+#define __NR_syslog (__NR_Linux + 101) -+#define __NR_getgid (__NR_Linux + 102) -+#define __NR_setuid (__NR_Linux + 103) -+#define __NR_setgid (__NR_Linux + 104) -+#define __NR_geteuid (__NR_Linux + 105) -+#define __NR_getegid (__NR_Linux + 106) -+#define __NR_setpgid (__NR_Linux + 107) -+#define __NR_getppid (__NR_Linux + 108) -+#define __NR_getpgrp (__NR_Linux + 109) -+#define __NR_setsid (__NR_Linux + 110) -+#define __NR_setreuid (__NR_Linux + 111) -+#define __NR_setregid (__NR_Linux + 112) -+#define __NR_getgroups (__NR_Linux + 113) -+#define __NR_setgroups (__NR_Linux + 114) -+#define __NR_setresuid (__NR_Linux + 115) -+#define __NR_getresuid (__NR_Linux + 116) -+#define __NR_setresgid (__NR_Linux + 117) -+#define __NR_getresgid (__NR_Linux + 118) -+#define __NR_getpgid (__NR_Linux + 119) -+#define __NR_setfsuid (__NR_Linux + 120) -+#define __NR_setfsgid (__NR_Linux + 121) -+#define __NR_getsid (__NR_Linux + 122) -+#define __NR_capget (__NR_Linux + 123) -+#define __NR_capset (__NR_Linux + 124) -+#define __NR_rt_sigpending (__NR_Linux + 125) -+#define __NR_rt_sigtimedwait (__NR_Linux + 126) -+#define __NR_rt_sigqueueinfo (__NR_Linux + 127) -+#define __NR_rt_sigsuspend (__NR_Linux + 128) -+#define __NR_sigaltstack (__NR_Linux + 129) -+#define __NR_utime (__NR_Linux + 130) -+#define __NR_mknod (__NR_Linux + 131) -+#define __NR_personality (__NR_Linux + 132) -+#define __NR_ustat (__NR_Linux + 133) -+#define __NR_statfs (__NR_Linux + 134) -+#define __NR_fstatfs (__NR_Linux + 135) -+#define __NR_sysfs (__NR_Linux + 136) -+#define __NR_getpriority (__NR_Linux + 137) -+#define __NR_setpriority (__NR_Linux + 138) -+#define __NR_sched_setparam (__NR_Linux + 139) -+#define __NR_sched_getparam (__NR_Linux + 140) -+#define __NR_sched_setscheduler (__NR_Linux + 141) -+#define __NR_sched_getscheduler (__NR_Linux + 142) -+#define __NR_sched_get_priority_max (__NR_Linux + 143) -+#define __NR_sched_get_priority_min (__NR_Linux + 144) -+#define __NR_sched_rr_get_interval (__NR_Linux + 145) -+#define __NR_mlock (__NR_Linux + 146) -+#define __NR_munlock (__NR_Linux + 147) -+#define __NR_mlockall (__NR_Linux + 148) -+#define __NR_munlockall (__NR_Linux + 149) -+#define __NR_vhangup (__NR_Linux + 150) -+#define __NR_pivot_root (__NR_Linux + 151) -+#define __NR__sysctl (__NR_Linux + 152) -+#define __NR_prctl (__NR_Linux + 153) -+#define __NR_adjtimex (__NR_Linux + 154) -+#define __NR_setrlimit (__NR_Linux + 155) -+#define __NR_chroot (__NR_Linux + 156) -+#define __NR_sync (__NR_Linux + 157) -+#define __NR_acct (__NR_Linux + 158) -+#define __NR_settimeofday (__NR_Linux + 159) -+#define __NR_mount (__NR_Linux + 160) -+#define __NR_umount2 (__NR_Linux + 161) -+#define __NR_swapon (__NR_Linux + 162) -+#define __NR_swapoff (__NR_Linux + 163) -+#define __NR_reboot (__NR_Linux + 164) -+#define __NR_sethostname (__NR_Linux + 165) -+#define __NR_setdomainname (__NR_Linux + 166) -+#define __NR_create_module (__NR_Linux + 167) -+#define __NR_init_module (__NR_Linux + 168) -+#define __NR_delete_module (__NR_Linux + 169) -+#define __NR_get_kernel_syms (__NR_Linux + 170) -+#define __NR_query_module (__NR_Linux + 171) -+#define __NR_quotactl (__NR_Linux + 172) -+#define __NR_nfsservctl (__NR_Linux + 173) -+#define __NR_getpmsg (__NR_Linux + 174) -+#define __NR_putpmsg (__NR_Linux + 175) -+#define __NR_afs_syscall (__NR_Linux + 176) -+#define __NR_reserved177 (__NR_Linux + 177) -+#define __NR_gettid (__NR_Linux + 178) -+#define __NR_readahead (__NR_Linux + 179) -+#define __NR_setxattr (__NR_Linux + 180) -+#define __NR_lsetxattr (__NR_Linux + 181) -+#define __NR_fsetxattr (__NR_Linux + 182) -+#define __NR_getxattr (__NR_Linux + 183) -+#define __NR_lgetxattr (__NR_Linux + 184) -+#define __NR_fgetxattr (__NR_Linux + 185) -+#define __NR_listxattr (__NR_Linux + 186) -+#define __NR_llistxattr (__NR_Linux + 187) -+#define __NR_flistxattr (__NR_Linux + 188) -+#define __NR_removexattr (__NR_Linux + 189) -+#define __NR_lremovexattr (__NR_Linux + 190) -+#define __NR_fremovexattr (__NR_Linux + 191) -+#define __NR_tkill (__NR_Linux + 192) -+#define __NR_reserved193 (__NR_Linux + 193) -+#define __NR_futex (__NR_Linux + 194) -+#define __NR_sched_setaffinity (__NR_Linux + 195) -+#define __NR_sched_getaffinity (__NR_Linux + 196) -+#define __NR_cacheflush (__NR_Linux + 197) -+#define __NR_cachectl (__NR_Linux + 198) -+#define __NR_sysmips (__NR_Linux + 199) -+#define __NR_io_setup (__NR_Linux + 200) -+#define __NR_io_destroy (__NR_Linux + 201) -+#define __NR_io_getevents (__NR_Linux + 202) -+#define __NR_io_submit (__NR_Linux + 203) -+#define __NR_io_cancel (__NR_Linux + 204) -+#define __NR_exit_group (__NR_Linux + 205) -+#define __NR_lookup_dcookie (__NR_Linux + 206) -+#define __NR_epoll_create (__NR_Linux + 207) -+#define __NR_epoll_ctl (__NR_Linux + 208) -+#define __NR_epoll_wait (__NR_Linux + 209) -+#define __NR_remap_file_pages (__NR_Linux + 210) -+#define __NR_rt_sigreturn (__NR_Linux + 211) -+#define __NR_set_tid_address (__NR_Linux + 212) -+#define __NR_restart_syscall (__NR_Linux + 213) -+#define __NR_semtimedop (__NR_Linux + 214) -+#define __NR_fadvise64 (__NR_Linux + 215) -+#define __NR_timer_create (__NR_Linux + 216) -+#define __NR_timer_settime (__NR_Linux + 217) -+#define __NR_timer_gettime (__NR_Linux + 218) -+#define __NR_timer_getoverrun (__NR_Linux + 219) -+#define __NR_timer_delete (__NR_Linux + 220) -+#define __NR_clock_settime (__NR_Linux + 221) -+#define __NR_clock_gettime (__NR_Linux + 222) -+#define __NR_clock_getres (__NR_Linux + 223) -+#define __NR_clock_nanosleep (__NR_Linux + 224) -+#define __NR_tgkill (__NR_Linux + 225) -+#define __NR_utimes (__NR_Linux + 226) -+#define __NR_mbind (__NR_Linux + 227) -+#define __NR_get_mempolicy (__NR_Linux + 228) -+#define __NR_set_mempolicy (__NR_Linux + 229) -+#define __NR_mq_open (__NR_Linux + 230) -+#define __NR_mq_unlink (__NR_Linux + 231) -+#define __NR_mq_timedsend (__NR_Linux + 232) -+#define __NR_mq_timedreceive (__NR_Linux + 233) -+#define __NR_mq_notify (__NR_Linux + 234) -+#define __NR_mq_getsetattr (__NR_Linux + 235) -+#define __NR_vserver (__NR_Linux + 236) -+#define __NR_waitid (__NR_Linux + 237) -+#define __NR_add_key (__NR_Linux + 239) -+#define __NR_request_key (__NR_Linux + 240) -+#define __NR_keyctl (__NR_Linux + 241) -+#define __NR_set_thread_area (__NR_Linux + 242) -+#define __NR_inotify_init (__NR_Linux + 243) -+#define __NR_inotify_add_watch (__NR_Linux + 244) -+#define __NR_inotify_rm_watch (__NR_Linux + 245) -+#define __NR_migrate_pages (__NR_Linux + 246) -+#define __NR_openat (__NR_Linux + 247) -+#define __NR_mkdirat (__NR_Linux + 248) -+#define __NR_mknodat (__NR_Linux + 249) -+#define __NR_fchownat (__NR_Linux + 250) -+#define __NR_futimesat (__NR_Linux + 251) -+#define __NR_newfstatat (__NR_Linux + 252) -+#define __NR_unlinkat (__NR_Linux + 253) -+#define __NR_renameat (__NR_Linux + 254) -+#define __NR_linkat (__NR_Linux + 255) -+#define __NR_symlinkat (__NR_Linux + 256) -+#define __NR_readlinkat (__NR_Linux + 257) -+#define __NR_fchmodat (__NR_Linux + 258) -+#define __NR_faccessat (__NR_Linux + 259) -+#define __NR_pselect6 (__NR_Linux + 260) -+#define __NR_ppoll (__NR_Linux + 261) -+#define __NR_unshare (__NR_Linux + 262) -+#define __NR_splice (__NR_Linux + 263) -+#define __NR_sync_file_range (__NR_Linux + 264) -+#define __NR_tee (__NR_Linux + 265) -+#define __NR_vmsplice (__NR_Linux + 266) -+#define __NR_move_pages (__NR_Linux + 267) -+#define __NR_set_robust_list (__NR_Linux + 268) -+#define __NR_get_robust_list (__NR_Linux + 269) -+#define __NR_kexec_load (__NR_Linux + 270) -+#define __NR_getcpu (__NR_Linux + 271) -+#define __NR_epoll_pwait (__NR_Linux + 272) -+#define __NR_ioprio_set (__NR_Linux + 273) -+#define __NR_ioprio_get (__NR_Linux + 274) -+#define __NR_utimensat (__NR_Linux + 275) -+#define __NR_signalfd (__NR_Linux + 276) -+#define __NR_timerfd (__NR_Linux + 277) -+#define __NR_eventfd (__NR_Linux + 278) -+#define __NR_fallocate (__NR_Linux + 279) -+#define __NR_timerfd_create (__NR_Linux + 280) -+#define __NR_timerfd_gettime (__NR_Linux + 281) -+#define __NR_timerfd_settime (__NR_Linux + 282) -+#define __NR_signalfd4 (__NR_Linux + 283) -+#define __NR_eventfd2 (__NR_Linux + 284) -+#define __NR_epoll_create1 (__NR_Linux + 285) -+#define __NR_dup3 (__NR_Linux + 286) -+#define __NR_pipe2 (__NR_Linux + 287) -+#define __NR_inotify_init1 (__NR_Linux + 288) -+#define __NR_preadv (__NR_Linux + 289) -+#define __NR_pwritev (__NR_Linux + 290) -+#define __NR_rt_tgsigqueueinfo (__NR_Linux + 291) -+#define __NR_perf_event_open (__NR_Linux + 292) -+#define __NR_accept4 (__NR_Linux + 293) -+#define __NR_recvmmsg (__NR_Linux + 294) -+#define __NR_fanotify_init (__NR_Linux + 295) -+#define __NR_fanotify_mark (__NR_Linux + 296) -+#define __NR_prlimit64 (__NR_Linux + 297) -+#define __NR_name_to_handle_at (__NR_Linux + 298) -+#define __NR_open_by_handle_at (__NR_Linux + 299) -+#define __NR_clock_adjtime (__NR_Linux + 300) -+#define __NR_syncfs (__NR_Linux + 301) -+#define __NR_sendmmsg (__NR_Linux + 302) -+#define __NR_setns (__NR_Linux + 303) -+#define __NR_process_vm_readv (__NR_Linux + 304) -+#define __NR_process_vm_writev (__NR_Linux + 305) -+#define __NR_kcmp (__NR_Linux + 306) -+#define __NR_finit_module (__NR_Linux + 307) -+#define __NR_getdents64 (__NR_Linux + 308) -+#define __NR_sched_setattr (__NR_Linux + 309) -+#define __NR_sched_getattr (__NR_Linux + 310) -+#define __NR_renameat2 (__NR_Linux + 311) -+#define __NR_seccomp (__NR_Linux + 312) -+#define __NR_getrandom (__NR_Linux + 313) -+#define __NR_memfd_create (__NR_Linux + 314) -+#define __NR_bpf (__NR_Linux + 315) -+#define __NR_execveat (__NR_Linux + 316) -+#define __NR_userfaultfd (__NR_Linux + 317) -+#define __NR_membarrier (__NR_Linux + 318) -+#define __NR_mlock2 (__NR_Linux + 319) -+#define __NR_copy_file_range (__NR_Linux + 320) -+#define __NR_preadv2 (__NR_Linux + 321) -+#define __NR_pwritev2 (__NR_Linux + 322) -+#define __NR_pkey_mprotect (__NR_Linux + 323) -+#define __NR_pkey_alloc (__NR_Linux + 324) -+#define __NR_pkey_free (__NR_Linux + 325) -+#define __NR_statx (__NR_Linux + 326) -+#define __NR_rseq (__NR_Linux + 327) -+#define __NR_io_pgetevents (__NR_Linux + 328) -+#define __NR_pidfd_send_signal (__NR_Linux + 424) -+#define __NR_io_uring_setup (__NR_Linux + 425) -+#define __NR_io_uring_enter (__NR_Linux + 426) -+#define __NR_io_uring_register (__NR_Linux + 427) -+#define __NR_open_tree (__NR_Linux + 428) -+#define __NR_move_mount (__NR_Linux + 429) -+#define __NR_fsopen (__NR_Linux + 430) -+#define __NR_fsconfig (__NR_Linux + 431) -+#define __NR_fsmount (__NR_Linux + 432) -+#define __NR_fspick (__NR_Linux + 433) -+#define __NR_pidfd_open (__NR_Linux + 434) -+#define __NR_clone3 (__NR_Linux + 435) -+#define __NR_close_range (__NR_Linux + 436) -+#define __NR_openat2 (__NR_Linux + 437) -+#define __NR_pidfd_getfd (__NR_Linux + 438) -+#define __NR_faccessat2 (__NR_Linux + 439) -+#define __NR_process_madvise (__NR_Linux + 440) -+#define __NR_epoll_pwait2 (__NR_Linux + 441) -+#define __NR_mount_setattr (__NR_Linux + 442) -+#define __NR_landlock_create_ruleset (__NR_Linux + 444) -+#define __NR_landlock_add_rule (__NR_Linux + 445) -+#define __NR_landlock_restrict_self (__NR_Linux + 446) - -- --#endif /* _ASM_MIPS_UNISTD_N64_H */ -+#endif /* _ASM_UNISTD_N64_H */ -diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h -index ca6a7e5c0b..954303ad69 100644 ---- a/linux-headers/asm-mips/unistd_o32.h -+++ b/linux-headers/asm-mips/unistd_o32.h -@@ -1,422 +1,425 @@ --#ifndef _ASM_MIPS_UNISTD_O32_H --#define _ASM_MIPS_UNISTD_O32_H -+#ifndef _ASM_UNISTD_O32_H -+#define _ASM_UNISTD_O32_H - --#define __NR_syscall (__NR_Linux + 0) --#define __NR_exit (__NR_Linux + 1) --#define __NR_fork (__NR_Linux + 2) --#define __NR_read (__NR_Linux + 3) --#define __NR_write (__NR_Linux + 4) --#define __NR_open (__NR_Linux + 5) --#define __NR_close (__NR_Linux + 6) --#define __NR_waitpid (__NR_Linux + 7) --#define __NR_creat (__NR_Linux + 8) --#define __NR_link (__NR_Linux + 9) --#define __NR_unlink (__NR_Linux + 10) --#define __NR_execve (__NR_Linux + 11) --#define __NR_chdir (__NR_Linux + 12) --#define __NR_time (__NR_Linux + 13) --#define __NR_mknod (__NR_Linux + 14) --#define __NR_chmod (__NR_Linux + 15) --#define __NR_lchown (__NR_Linux + 16) --#define __NR_break (__NR_Linux + 17) --#define __NR_unused18 (__NR_Linux + 18) --#define __NR_lseek (__NR_Linux + 19) --#define __NR_getpid (__NR_Linux + 20) --#define __NR_mount (__NR_Linux + 21) --#define __NR_umount (__NR_Linux + 22) --#define __NR_setuid (__NR_Linux + 23) --#define __NR_getuid (__NR_Linux + 24) --#define __NR_stime (__NR_Linux + 25) --#define __NR_ptrace (__NR_Linux + 26) --#define __NR_alarm (__NR_Linux + 27) --#define __NR_unused28 (__NR_Linux + 28) --#define __NR_pause (__NR_Linux + 29) --#define __NR_utime (__NR_Linux + 30) --#define __NR_stty (__NR_Linux + 31) --#define __NR_gtty (__NR_Linux + 32) --#define __NR_access (__NR_Linux + 33) --#define __NR_nice (__NR_Linux + 34) --#define __NR_ftime (__NR_Linux + 35) --#define __NR_sync (__NR_Linux + 36) --#define __NR_kill (__NR_Linux + 37) --#define __NR_rename (__NR_Linux + 38) --#define __NR_mkdir (__NR_Linux + 39) --#define __NR_rmdir (__NR_Linux + 40) --#define __NR_dup (__NR_Linux + 41) --#define __NR_pipe (__NR_Linux + 42) --#define __NR_times (__NR_Linux + 43) --#define __NR_prof (__NR_Linux + 44) --#define __NR_brk (__NR_Linux + 45) --#define __NR_setgid (__NR_Linux + 46) --#define __NR_getgid (__NR_Linux + 47) --#define __NR_signal (__NR_Linux + 48) --#define __NR_geteuid (__NR_Linux + 49) --#define __NR_getegid (__NR_Linux + 50) --#define __NR_acct (__NR_Linux + 51) --#define __NR_umount2 (__NR_Linux + 52) --#define __NR_lock (__NR_Linux + 53) --#define __NR_ioctl (__NR_Linux + 54) --#define __NR_fcntl (__NR_Linux + 55) --#define __NR_mpx (__NR_Linux + 56) --#define __NR_setpgid (__NR_Linux + 57) --#define __NR_ulimit (__NR_Linux + 58) --#define __NR_unused59 (__NR_Linux + 59) --#define __NR_umask (__NR_Linux + 60) --#define __NR_chroot (__NR_Linux + 61) --#define __NR_ustat (__NR_Linux + 62) --#define __NR_dup2 (__NR_Linux + 63) --#define __NR_getppid (__NR_Linux + 64) --#define __NR_getpgrp (__NR_Linux + 65) --#define __NR_setsid (__NR_Linux + 66) --#define __NR_sigaction (__NR_Linux + 67) --#define __NR_sgetmask (__NR_Linux + 68) --#define __NR_ssetmask (__NR_Linux + 69) --#define __NR_setreuid (__NR_Linux + 70) --#define __NR_setregid (__NR_Linux + 71) --#define __NR_sigsuspend (__NR_Linux + 72) --#define __NR_sigpending (__NR_Linux + 73) --#define __NR_sethostname (__NR_Linux + 74) --#define __NR_setrlimit (__NR_Linux + 75) --#define __NR_getrlimit (__NR_Linux + 76) --#define __NR_getrusage (__NR_Linux + 77) --#define __NR_gettimeofday (__NR_Linux + 78) --#define __NR_settimeofday (__NR_Linux + 79) --#define __NR_getgroups (__NR_Linux + 80) --#define __NR_setgroups (__NR_Linux + 81) --#define __NR_reserved82 (__NR_Linux + 82) --#define __NR_symlink (__NR_Linux + 83) --#define __NR_unused84 (__NR_Linux + 84) --#define __NR_readlink (__NR_Linux + 85) --#define __NR_uselib (__NR_Linux + 86) --#define __NR_swapon (__NR_Linux + 87) --#define __NR_reboot (__NR_Linux + 88) --#define __NR_readdir (__NR_Linux + 89) --#define __NR_mmap (__NR_Linux + 90) --#define __NR_munmap (__NR_Linux + 91) --#define __NR_truncate (__NR_Linux + 92) --#define __NR_ftruncate (__NR_Linux + 93) --#define __NR_fchmod (__NR_Linux + 94) --#define __NR_fchown (__NR_Linux + 95) --#define __NR_getpriority (__NR_Linux + 96) --#define __NR_setpriority (__NR_Linux + 97) --#define __NR_profil (__NR_Linux + 98) --#define __NR_statfs (__NR_Linux + 99) --#define __NR_fstatfs (__NR_Linux + 100) --#define __NR_ioperm (__NR_Linux + 101) --#define __NR_socketcall (__NR_Linux + 102) --#define __NR_syslog (__NR_Linux + 103) --#define __NR_setitimer (__NR_Linux + 104) --#define __NR_getitimer (__NR_Linux + 105) --#define __NR_stat (__NR_Linux + 106) --#define __NR_lstat (__NR_Linux + 107) --#define __NR_fstat (__NR_Linux + 108) --#define __NR_unused109 (__NR_Linux + 109) --#define __NR_iopl (__NR_Linux + 110) --#define __NR_vhangup (__NR_Linux + 111) --#define __NR_idle (__NR_Linux + 112) --#define __NR_vm86 (__NR_Linux + 113) --#define __NR_wait4 (__NR_Linux + 114) --#define __NR_swapoff (__NR_Linux + 115) --#define __NR_sysinfo (__NR_Linux + 116) --#define __NR_ipc (__NR_Linux + 117) --#define __NR_fsync (__NR_Linux + 118) --#define __NR_sigreturn (__NR_Linux + 119) --#define __NR_clone (__NR_Linux + 120) --#define __NR_setdomainname (__NR_Linux + 121) --#define __NR_uname (__NR_Linux + 122) --#define __NR_modify_ldt (__NR_Linux + 123) --#define __NR_adjtimex (__NR_Linux + 124) --#define __NR_mprotect (__NR_Linux + 125) --#define __NR_sigprocmask (__NR_Linux + 126) --#define __NR_create_module (__NR_Linux + 127) --#define __NR_init_module (__NR_Linux + 128) --#define __NR_delete_module (__NR_Linux + 129) --#define __NR_get_kernel_syms (__NR_Linux + 130) --#define __NR_quotactl (__NR_Linux + 131) --#define __NR_getpgid (__NR_Linux + 132) --#define __NR_fchdir (__NR_Linux + 133) --#define __NR_bdflush (__NR_Linux + 134) --#define __NR_sysfs (__NR_Linux + 135) --#define __NR_personality (__NR_Linux + 136) --#define __NR_afs_syscall (__NR_Linux + 137) --#define __NR_setfsuid (__NR_Linux + 138) --#define __NR_setfsgid (__NR_Linux + 139) --#define __NR__llseek (__NR_Linux + 140) --#define __NR_getdents (__NR_Linux + 141) --#define __NR__newselect (__NR_Linux + 142) --#define __NR_flock (__NR_Linux + 143) --#define __NR_msync (__NR_Linux + 144) --#define __NR_readv (__NR_Linux + 145) --#define __NR_writev (__NR_Linux + 146) --#define __NR_cacheflush (__NR_Linux + 147) --#define __NR_cachectl (__NR_Linux + 148) --#define __NR_sysmips (__NR_Linux + 149) --#define __NR_unused150 (__NR_Linux + 150) --#define __NR_getsid (__NR_Linux + 151) --#define __NR_fdatasync (__NR_Linux + 152) --#define __NR__sysctl (__NR_Linux + 153) --#define __NR_mlock (__NR_Linux + 154) --#define __NR_munlock (__NR_Linux + 155) --#define __NR_mlockall (__NR_Linux + 156) --#define __NR_munlockall (__NR_Linux + 157) --#define __NR_sched_setparam (__NR_Linux + 158) --#define __NR_sched_getparam (__NR_Linux + 159) --#define __NR_sched_setscheduler (__NR_Linux + 160) --#define __NR_sched_getscheduler (__NR_Linux + 161) --#define __NR_sched_yield (__NR_Linux + 162) --#define __NR_sched_get_priority_max (__NR_Linux + 163) --#define __NR_sched_get_priority_min (__NR_Linux + 164) --#define __NR_sched_rr_get_interval (__NR_Linux + 165) --#define __NR_nanosleep (__NR_Linux + 166) --#define __NR_mremap (__NR_Linux + 167) --#define __NR_accept (__NR_Linux + 168) --#define __NR_bind (__NR_Linux + 169) --#define __NR_connect (__NR_Linux + 170) --#define __NR_getpeername (__NR_Linux + 171) --#define __NR_getsockname (__NR_Linux + 172) --#define __NR_getsockopt (__NR_Linux + 173) --#define __NR_listen (__NR_Linux + 174) --#define __NR_recv (__NR_Linux + 175) --#define __NR_recvfrom (__NR_Linux + 176) --#define __NR_recvmsg (__NR_Linux + 177) --#define __NR_send (__NR_Linux + 178) --#define __NR_sendmsg (__NR_Linux + 179) --#define __NR_sendto (__NR_Linux + 180) --#define __NR_setsockopt (__NR_Linux + 181) --#define __NR_shutdown (__NR_Linux + 182) --#define __NR_socket (__NR_Linux + 183) --#define __NR_socketpair (__NR_Linux + 184) --#define __NR_setresuid (__NR_Linux + 185) --#define __NR_getresuid (__NR_Linux + 186) --#define __NR_query_module (__NR_Linux + 187) --#define __NR_poll (__NR_Linux + 188) --#define __NR_nfsservctl (__NR_Linux + 189) --#define __NR_setresgid (__NR_Linux + 190) --#define __NR_getresgid (__NR_Linux + 191) --#define __NR_prctl (__NR_Linux + 192) --#define __NR_rt_sigreturn (__NR_Linux + 193) --#define __NR_rt_sigaction (__NR_Linux + 194) --#define __NR_rt_sigprocmask (__NR_Linux + 195) --#define __NR_rt_sigpending (__NR_Linux + 196) --#define __NR_rt_sigtimedwait (__NR_Linux + 197) --#define __NR_rt_sigqueueinfo (__NR_Linux + 198) --#define __NR_rt_sigsuspend (__NR_Linux + 199) --#define __NR_pread64 (__NR_Linux + 200) --#define __NR_pwrite64 (__NR_Linux + 201) --#define __NR_chown (__NR_Linux + 202) --#define __NR_getcwd (__NR_Linux + 203) --#define __NR_capget (__NR_Linux + 204) --#define __NR_capset (__NR_Linux + 205) --#define __NR_sigaltstack (__NR_Linux + 206) --#define __NR_sendfile (__NR_Linux + 207) --#define __NR_getpmsg (__NR_Linux + 208) --#define __NR_putpmsg (__NR_Linux + 209) --#define __NR_mmap2 (__NR_Linux + 210) --#define __NR_truncate64 (__NR_Linux + 211) --#define __NR_ftruncate64 (__NR_Linux + 212) --#define __NR_stat64 (__NR_Linux + 213) --#define __NR_lstat64 (__NR_Linux + 214) --#define __NR_fstat64 (__NR_Linux + 215) --#define __NR_pivot_root (__NR_Linux + 216) --#define __NR_mincore (__NR_Linux + 217) --#define __NR_madvise (__NR_Linux + 218) --#define __NR_getdents64 (__NR_Linux + 219) --#define __NR_fcntl64 (__NR_Linux + 220) --#define __NR_reserved221 (__NR_Linux + 221) --#define __NR_gettid (__NR_Linux + 222) --#define __NR_readahead (__NR_Linux + 223) --#define __NR_setxattr (__NR_Linux + 224) --#define __NR_lsetxattr (__NR_Linux + 225) --#define __NR_fsetxattr (__NR_Linux + 226) --#define __NR_getxattr (__NR_Linux + 227) --#define __NR_lgetxattr (__NR_Linux + 228) --#define __NR_fgetxattr (__NR_Linux + 229) --#define __NR_listxattr (__NR_Linux + 230) --#define __NR_llistxattr (__NR_Linux + 231) --#define __NR_flistxattr (__NR_Linux + 232) --#define __NR_removexattr (__NR_Linux + 233) --#define __NR_lremovexattr (__NR_Linux + 234) --#define __NR_fremovexattr (__NR_Linux + 235) --#define __NR_tkill (__NR_Linux + 236) --#define __NR_sendfile64 (__NR_Linux + 237) --#define __NR_futex (__NR_Linux + 238) --#define __NR_sched_setaffinity (__NR_Linux + 239) --#define __NR_sched_getaffinity (__NR_Linux + 240) --#define __NR_io_setup (__NR_Linux + 241) --#define __NR_io_destroy (__NR_Linux + 242) --#define __NR_io_getevents (__NR_Linux + 243) --#define __NR_io_submit (__NR_Linux + 244) --#define __NR_io_cancel (__NR_Linux + 245) --#define __NR_exit_group (__NR_Linux + 246) --#define __NR_lookup_dcookie (__NR_Linux + 247) --#define __NR_epoll_create (__NR_Linux + 248) --#define __NR_epoll_ctl (__NR_Linux + 249) --#define __NR_epoll_wait (__NR_Linux + 250) --#define __NR_remap_file_pages (__NR_Linux + 251) --#define __NR_set_tid_address (__NR_Linux + 252) --#define __NR_restart_syscall (__NR_Linux + 253) --#define __NR_fadvise64 (__NR_Linux + 254) --#define __NR_statfs64 (__NR_Linux + 255) --#define __NR_fstatfs64 (__NR_Linux + 256) --#define __NR_timer_create (__NR_Linux + 257) --#define __NR_timer_settime (__NR_Linux + 258) --#define __NR_timer_gettime (__NR_Linux + 259) --#define __NR_timer_getoverrun (__NR_Linux + 260) --#define __NR_timer_delete (__NR_Linux + 261) --#define __NR_clock_settime (__NR_Linux + 262) --#define __NR_clock_gettime (__NR_Linux + 263) --#define __NR_clock_getres (__NR_Linux + 264) --#define __NR_clock_nanosleep (__NR_Linux + 265) --#define __NR_tgkill (__NR_Linux + 266) --#define __NR_utimes (__NR_Linux + 267) --#define __NR_mbind (__NR_Linux + 268) --#define __NR_get_mempolicy (__NR_Linux + 269) --#define __NR_set_mempolicy (__NR_Linux + 270) --#define __NR_mq_open (__NR_Linux + 271) --#define __NR_mq_unlink (__NR_Linux + 272) --#define __NR_mq_timedsend (__NR_Linux + 273) --#define __NR_mq_timedreceive (__NR_Linux + 274) --#define __NR_mq_notify (__NR_Linux + 275) --#define __NR_mq_getsetattr (__NR_Linux + 276) --#define __NR_vserver (__NR_Linux + 277) --#define __NR_waitid (__NR_Linux + 278) --#define __NR_add_key (__NR_Linux + 280) --#define __NR_request_key (__NR_Linux + 281) --#define __NR_keyctl (__NR_Linux + 282) --#define __NR_set_thread_area (__NR_Linux + 283) --#define __NR_inotify_init (__NR_Linux + 284) --#define __NR_inotify_add_watch (__NR_Linux + 285) --#define __NR_inotify_rm_watch (__NR_Linux + 286) --#define __NR_migrate_pages (__NR_Linux + 287) --#define __NR_openat (__NR_Linux + 288) --#define __NR_mkdirat (__NR_Linux + 289) --#define __NR_mknodat (__NR_Linux + 290) --#define __NR_fchownat (__NR_Linux + 291) --#define __NR_futimesat (__NR_Linux + 292) --#define __NR_fstatat64 (__NR_Linux + 293) --#define __NR_unlinkat (__NR_Linux + 294) --#define __NR_renameat (__NR_Linux + 295) --#define __NR_linkat (__NR_Linux + 296) --#define __NR_symlinkat (__NR_Linux + 297) --#define __NR_readlinkat (__NR_Linux + 298) --#define __NR_fchmodat (__NR_Linux + 299) --#define __NR_faccessat (__NR_Linux + 300) --#define __NR_pselect6 (__NR_Linux + 301) --#define __NR_ppoll (__NR_Linux + 302) --#define __NR_unshare (__NR_Linux + 303) --#define __NR_splice (__NR_Linux + 304) --#define __NR_sync_file_range (__NR_Linux + 305) --#define __NR_tee (__NR_Linux + 306) --#define __NR_vmsplice (__NR_Linux + 307) --#define __NR_move_pages (__NR_Linux + 308) --#define __NR_set_robust_list (__NR_Linux + 309) --#define __NR_get_robust_list (__NR_Linux + 310) --#define __NR_kexec_load (__NR_Linux + 311) --#define __NR_getcpu (__NR_Linux + 312) --#define __NR_epoll_pwait (__NR_Linux + 313) --#define __NR_ioprio_set (__NR_Linux + 314) --#define __NR_ioprio_get (__NR_Linux + 315) --#define __NR_utimensat (__NR_Linux + 316) --#define __NR_signalfd (__NR_Linux + 317) --#define __NR_timerfd (__NR_Linux + 318) --#define __NR_eventfd (__NR_Linux + 319) --#define __NR_fallocate (__NR_Linux + 320) --#define __NR_timerfd_create (__NR_Linux + 321) --#define __NR_timerfd_gettime (__NR_Linux + 322) --#define __NR_timerfd_settime (__NR_Linux + 323) --#define __NR_signalfd4 (__NR_Linux + 324) --#define __NR_eventfd2 (__NR_Linux + 325) --#define __NR_epoll_create1 (__NR_Linux + 326) --#define __NR_dup3 (__NR_Linux + 327) --#define __NR_pipe2 (__NR_Linux + 328) --#define __NR_inotify_init1 (__NR_Linux + 329) --#define __NR_preadv (__NR_Linux + 330) --#define __NR_pwritev (__NR_Linux + 331) --#define __NR_rt_tgsigqueueinfo (__NR_Linux + 332) --#define __NR_perf_event_open (__NR_Linux + 333) --#define __NR_accept4 (__NR_Linux + 334) --#define __NR_recvmmsg (__NR_Linux + 335) --#define __NR_fanotify_init (__NR_Linux + 336) --#define __NR_fanotify_mark (__NR_Linux + 337) --#define __NR_prlimit64 (__NR_Linux + 338) --#define __NR_name_to_handle_at (__NR_Linux + 339) --#define __NR_open_by_handle_at (__NR_Linux + 340) --#define __NR_clock_adjtime (__NR_Linux + 341) --#define __NR_syncfs (__NR_Linux + 342) --#define __NR_sendmmsg (__NR_Linux + 343) --#define __NR_setns (__NR_Linux + 344) --#define __NR_process_vm_readv (__NR_Linux + 345) --#define __NR_process_vm_writev (__NR_Linux + 346) --#define __NR_kcmp (__NR_Linux + 347) --#define __NR_finit_module (__NR_Linux + 348) --#define __NR_sched_setattr (__NR_Linux + 349) --#define __NR_sched_getattr (__NR_Linux + 350) --#define __NR_renameat2 (__NR_Linux + 351) --#define __NR_seccomp (__NR_Linux + 352) --#define __NR_getrandom (__NR_Linux + 353) --#define __NR_memfd_create (__NR_Linux + 354) --#define __NR_bpf (__NR_Linux + 355) --#define __NR_execveat (__NR_Linux + 356) --#define __NR_userfaultfd (__NR_Linux + 357) --#define __NR_membarrier (__NR_Linux + 358) --#define __NR_mlock2 (__NR_Linux + 359) --#define __NR_copy_file_range (__NR_Linux + 360) --#define __NR_preadv2 (__NR_Linux + 361) --#define __NR_pwritev2 (__NR_Linux + 362) --#define __NR_pkey_mprotect (__NR_Linux + 363) --#define __NR_pkey_alloc (__NR_Linux + 364) --#define __NR_pkey_free (__NR_Linux + 365) --#define __NR_statx (__NR_Linux + 366) --#define __NR_rseq (__NR_Linux + 367) --#define __NR_io_pgetevents (__NR_Linux + 368) --#define __NR_semget (__NR_Linux + 393) --#define __NR_semctl (__NR_Linux + 394) --#define __NR_shmget (__NR_Linux + 395) --#define __NR_shmctl (__NR_Linux + 396) --#define __NR_shmat (__NR_Linux + 397) --#define __NR_shmdt (__NR_Linux + 398) --#define __NR_msgget (__NR_Linux + 399) --#define __NR_msgsnd (__NR_Linux + 400) --#define __NR_msgrcv (__NR_Linux + 401) --#define __NR_msgctl (__NR_Linux + 402) --#define __NR_clock_gettime64 (__NR_Linux + 403) --#define __NR_clock_settime64 (__NR_Linux + 404) --#define __NR_clock_adjtime64 (__NR_Linux + 405) --#define __NR_clock_getres_time64 (__NR_Linux + 406) --#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) --#define __NR_timer_gettime64 (__NR_Linux + 408) --#define __NR_timer_settime64 (__NR_Linux + 409) --#define __NR_timerfd_gettime64 (__NR_Linux + 410) --#define __NR_timerfd_settime64 (__NR_Linux + 411) --#define __NR_utimensat_time64 (__NR_Linux + 412) --#define __NR_pselect6_time64 (__NR_Linux + 413) --#define __NR_ppoll_time64 (__NR_Linux + 414) --#define __NR_io_pgetevents_time64 (__NR_Linux + 416) --#define __NR_recvmmsg_time64 (__NR_Linux + 417) --#define __NR_mq_timedsend_time64 (__NR_Linux + 418) --#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) --#define __NR_semtimedop_time64 (__NR_Linux + 420) --#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) --#define __NR_futex_time64 (__NR_Linux + 422) --#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) --#define __NR_pidfd_send_signal (__NR_Linux + 424) --#define __NR_io_uring_setup (__NR_Linux + 425) --#define __NR_io_uring_enter (__NR_Linux + 426) --#define __NR_io_uring_register (__NR_Linux + 427) --#define __NR_open_tree (__NR_Linux + 428) --#define __NR_move_mount (__NR_Linux + 429) --#define __NR_fsopen (__NR_Linux + 430) --#define __NR_fsconfig (__NR_Linux + 431) --#define __NR_fsmount (__NR_Linux + 432) --#define __NR_fspick (__NR_Linux + 433) --#define __NR_pidfd_open (__NR_Linux + 434) --#define __NR_clone3 (__NR_Linux + 435) --#define __NR_close_range (__NR_Linux + 436) --#define __NR_openat2 (__NR_Linux + 437) --#define __NR_pidfd_getfd (__NR_Linux + 438) --#define __NR_faccessat2 (__NR_Linux + 439) --#define __NR_process_madvise (__NR_Linux + 440) --#define __NR_epoll_pwait2 (__NR_Linux + 441) -+#define __NR_syscall (__NR_Linux + 0) -+#define __NR_exit (__NR_Linux + 1) -+#define __NR_fork (__NR_Linux + 2) -+#define __NR_read (__NR_Linux + 3) -+#define __NR_write (__NR_Linux + 4) -+#define __NR_open (__NR_Linux + 5) -+#define __NR_close (__NR_Linux + 6) -+#define __NR_waitpid (__NR_Linux + 7) -+#define __NR_creat (__NR_Linux + 8) -+#define __NR_link (__NR_Linux + 9) -+#define __NR_unlink (__NR_Linux + 10) -+#define __NR_execve (__NR_Linux + 11) -+#define __NR_chdir (__NR_Linux + 12) -+#define __NR_time (__NR_Linux + 13) -+#define __NR_mknod (__NR_Linux + 14) -+#define __NR_chmod (__NR_Linux + 15) -+#define __NR_lchown (__NR_Linux + 16) -+#define __NR_break (__NR_Linux + 17) -+#define __NR_unused18 (__NR_Linux + 18) -+#define __NR_lseek (__NR_Linux + 19) -+#define __NR_getpid (__NR_Linux + 20) -+#define __NR_mount (__NR_Linux + 21) -+#define __NR_umount (__NR_Linux + 22) -+#define __NR_setuid (__NR_Linux + 23) -+#define __NR_getuid (__NR_Linux + 24) -+#define __NR_stime (__NR_Linux + 25) -+#define __NR_ptrace (__NR_Linux + 26) -+#define __NR_alarm (__NR_Linux + 27) -+#define __NR_unused28 (__NR_Linux + 28) -+#define __NR_pause (__NR_Linux + 29) -+#define __NR_utime (__NR_Linux + 30) -+#define __NR_stty (__NR_Linux + 31) -+#define __NR_gtty (__NR_Linux + 32) -+#define __NR_access (__NR_Linux + 33) -+#define __NR_nice (__NR_Linux + 34) -+#define __NR_ftime (__NR_Linux + 35) -+#define __NR_sync (__NR_Linux + 36) -+#define __NR_kill (__NR_Linux + 37) -+#define __NR_rename (__NR_Linux + 38) -+#define __NR_mkdir (__NR_Linux + 39) -+#define __NR_rmdir (__NR_Linux + 40) -+#define __NR_dup (__NR_Linux + 41) -+#define __NR_pipe (__NR_Linux + 42) -+#define __NR_times (__NR_Linux + 43) -+#define __NR_prof (__NR_Linux + 44) -+#define __NR_brk (__NR_Linux + 45) -+#define __NR_setgid (__NR_Linux + 46) -+#define __NR_getgid (__NR_Linux + 47) -+#define __NR_signal (__NR_Linux + 48) -+#define __NR_geteuid (__NR_Linux + 49) -+#define __NR_getegid (__NR_Linux + 50) -+#define __NR_acct (__NR_Linux + 51) -+#define __NR_umount2 (__NR_Linux + 52) -+#define __NR_lock (__NR_Linux + 53) -+#define __NR_ioctl (__NR_Linux + 54) -+#define __NR_fcntl (__NR_Linux + 55) -+#define __NR_mpx (__NR_Linux + 56) -+#define __NR_setpgid (__NR_Linux + 57) -+#define __NR_ulimit (__NR_Linux + 58) -+#define __NR_unused59 (__NR_Linux + 59) -+#define __NR_umask (__NR_Linux + 60) -+#define __NR_chroot (__NR_Linux + 61) -+#define __NR_ustat (__NR_Linux + 62) -+#define __NR_dup2 (__NR_Linux + 63) -+#define __NR_getppid (__NR_Linux + 64) -+#define __NR_getpgrp (__NR_Linux + 65) -+#define __NR_setsid (__NR_Linux + 66) -+#define __NR_sigaction (__NR_Linux + 67) -+#define __NR_sgetmask (__NR_Linux + 68) -+#define __NR_ssetmask (__NR_Linux + 69) -+#define __NR_setreuid (__NR_Linux + 70) -+#define __NR_setregid (__NR_Linux + 71) -+#define __NR_sigsuspend (__NR_Linux + 72) -+#define __NR_sigpending (__NR_Linux + 73) -+#define __NR_sethostname (__NR_Linux + 74) -+#define __NR_setrlimit (__NR_Linux + 75) -+#define __NR_getrlimit (__NR_Linux + 76) -+#define __NR_getrusage (__NR_Linux + 77) -+#define __NR_gettimeofday (__NR_Linux + 78) -+#define __NR_settimeofday (__NR_Linux + 79) -+#define __NR_getgroups (__NR_Linux + 80) -+#define __NR_setgroups (__NR_Linux + 81) -+#define __NR_reserved82 (__NR_Linux + 82) -+#define __NR_symlink (__NR_Linux + 83) -+#define __NR_unused84 (__NR_Linux + 84) -+#define __NR_readlink (__NR_Linux + 85) -+#define __NR_uselib (__NR_Linux + 86) -+#define __NR_swapon (__NR_Linux + 87) -+#define __NR_reboot (__NR_Linux + 88) -+#define __NR_readdir (__NR_Linux + 89) -+#define __NR_mmap (__NR_Linux + 90) -+#define __NR_munmap (__NR_Linux + 91) -+#define __NR_truncate (__NR_Linux + 92) -+#define __NR_ftruncate (__NR_Linux + 93) -+#define __NR_fchmod (__NR_Linux + 94) -+#define __NR_fchown (__NR_Linux + 95) -+#define __NR_getpriority (__NR_Linux + 96) -+#define __NR_setpriority (__NR_Linux + 97) -+#define __NR_profil (__NR_Linux + 98) -+#define __NR_statfs (__NR_Linux + 99) -+#define __NR_fstatfs (__NR_Linux + 100) -+#define __NR_ioperm (__NR_Linux + 101) -+#define __NR_socketcall (__NR_Linux + 102) -+#define __NR_syslog (__NR_Linux + 103) -+#define __NR_setitimer (__NR_Linux + 104) -+#define __NR_getitimer (__NR_Linux + 105) -+#define __NR_stat (__NR_Linux + 106) -+#define __NR_lstat (__NR_Linux + 107) -+#define __NR_fstat (__NR_Linux + 108) -+#define __NR_unused109 (__NR_Linux + 109) -+#define __NR_iopl (__NR_Linux + 110) -+#define __NR_vhangup (__NR_Linux + 111) -+#define __NR_idle (__NR_Linux + 112) -+#define __NR_vm86 (__NR_Linux + 113) -+#define __NR_wait4 (__NR_Linux + 114) -+#define __NR_swapoff (__NR_Linux + 115) -+#define __NR_sysinfo (__NR_Linux + 116) -+#define __NR_ipc (__NR_Linux + 117) -+#define __NR_fsync (__NR_Linux + 118) -+#define __NR_sigreturn (__NR_Linux + 119) -+#define __NR_clone (__NR_Linux + 120) -+#define __NR_setdomainname (__NR_Linux + 121) -+#define __NR_uname (__NR_Linux + 122) -+#define __NR_modify_ldt (__NR_Linux + 123) -+#define __NR_adjtimex (__NR_Linux + 124) -+#define __NR_mprotect (__NR_Linux + 125) -+#define __NR_sigprocmask (__NR_Linux + 126) -+#define __NR_create_module (__NR_Linux + 127) -+#define __NR_init_module (__NR_Linux + 128) -+#define __NR_delete_module (__NR_Linux + 129) -+#define __NR_get_kernel_syms (__NR_Linux + 130) -+#define __NR_quotactl (__NR_Linux + 131) -+#define __NR_getpgid (__NR_Linux + 132) -+#define __NR_fchdir (__NR_Linux + 133) -+#define __NR_bdflush (__NR_Linux + 134) -+#define __NR_sysfs (__NR_Linux + 135) -+#define __NR_personality (__NR_Linux + 136) -+#define __NR_afs_syscall (__NR_Linux + 137) -+#define __NR_setfsuid (__NR_Linux + 138) -+#define __NR_setfsgid (__NR_Linux + 139) -+#define __NR__llseek (__NR_Linux + 140) -+#define __NR_getdents (__NR_Linux + 141) -+#define __NR__newselect (__NR_Linux + 142) -+#define __NR_flock (__NR_Linux + 143) -+#define __NR_msync (__NR_Linux + 144) -+#define __NR_readv (__NR_Linux + 145) -+#define __NR_writev (__NR_Linux + 146) -+#define __NR_cacheflush (__NR_Linux + 147) -+#define __NR_cachectl (__NR_Linux + 148) -+#define __NR_sysmips (__NR_Linux + 149) -+#define __NR_unused150 (__NR_Linux + 150) -+#define __NR_getsid (__NR_Linux + 151) -+#define __NR_fdatasync (__NR_Linux + 152) -+#define __NR__sysctl (__NR_Linux + 153) -+#define __NR_mlock (__NR_Linux + 154) -+#define __NR_munlock (__NR_Linux + 155) -+#define __NR_mlockall (__NR_Linux + 156) -+#define __NR_munlockall (__NR_Linux + 157) -+#define __NR_sched_setparam (__NR_Linux + 158) -+#define __NR_sched_getparam (__NR_Linux + 159) -+#define __NR_sched_setscheduler (__NR_Linux + 160) -+#define __NR_sched_getscheduler (__NR_Linux + 161) -+#define __NR_sched_yield (__NR_Linux + 162) -+#define __NR_sched_get_priority_max (__NR_Linux + 163) -+#define __NR_sched_get_priority_min (__NR_Linux + 164) -+#define __NR_sched_rr_get_interval (__NR_Linux + 165) -+#define __NR_nanosleep (__NR_Linux + 166) -+#define __NR_mremap (__NR_Linux + 167) -+#define __NR_accept (__NR_Linux + 168) -+#define __NR_bind (__NR_Linux + 169) -+#define __NR_connect (__NR_Linux + 170) -+#define __NR_getpeername (__NR_Linux + 171) -+#define __NR_getsockname (__NR_Linux + 172) -+#define __NR_getsockopt (__NR_Linux + 173) -+#define __NR_listen (__NR_Linux + 174) -+#define __NR_recv (__NR_Linux + 175) -+#define __NR_recvfrom (__NR_Linux + 176) -+#define __NR_recvmsg (__NR_Linux + 177) -+#define __NR_send (__NR_Linux + 178) -+#define __NR_sendmsg (__NR_Linux + 179) -+#define __NR_sendto (__NR_Linux + 180) -+#define __NR_setsockopt (__NR_Linux + 181) -+#define __NR_shutdown (__NR_Linux + 182) -+#define __NR_socket (__NR_Linux + 183) -+#define __NR_socketpair (__NR_Linux + 184) -+#define __NR_setresuid (__NR_Linux + 185) -+#define __NR_getresuid (__NR_Linux + 186) -+#define __NR_query_module (__NR_Linux + 187) -+#define __NR_poll (__NR_Linux + 188) -+#define __NR_nfsservctl (__NR_Linux + 189) -+#define __NR_setresgid (__NR_Linux + 190) -+#define __NR_getresgid (__NR_Linux + 191) -+#define __NR_prctl (__NR_Linux + 192) -+#define __NR_rt_sigreturn (__NR_Linux + 193) -+#define __NR_rt_sigaction (__NR_Linux + 194) -+#define __NR_rt_sigprocmask (__NR_Linux + 195) -+#define __NR_rt_sigpending (__NR_Linux + 196) -+#define __NR_rt_sigtimedwait (__NR_Linux + 197) -+#define __NR_rt_sigqueueinfo (__NR_Linux + 198) -+#define __NR_rt_sigsuspend (__NR_Linux + 199) -+#define __NR_pread64 (__NR_Linux + 200) -+#define __NR_pwrite64 (__NR_Linux + 201) -+#define __NR_chown (__NR_Linux + 202) -+#define __NR_getcwd (__NR_Linux + 203) -+#define __NR_capget (__NR_Linux + 204) -+#define __NR_capset (__NR_Linux + 205) -+#define __NR_sigaltstack (__NR_Linux + 206) -+#define __NR_sendfile (__NR_Linux + 207) -+#define __NR_getpmsg (__NR_Linux + 208) -+#define __NR_putpmsg (__NR_Linux + 209) -+#define __NR_mmap2 (__NR_Linux + 210) -+#define __NR_truncate64 (__NR_Linux + 211) -+#define __NR_ftruncate64 (__NR_Linux + 212) -+#define __NR_stat64 (__NR_Linux + 213) -+#define __NR_lstat64 (__NR_Linux + 214) -+#define __NR_fstat64 (__NR_Linux + 215) -+#define __NR_pivot_root (__NR_Linux + 216) -+#define __NR_mincore (__NR_Linux + 217) -+#define __NR_madvise (__NR_Linux + 218) -+#define __NR_getdents64 (__NR_Linux + 219) -+#define __NR_fcntl64 (__NR_Linux + 220) -+#define __NR_reserved221 (__NR_Linux + 221) -+#define __NR_gettid (__NR_Linux + 222) -+#define __NR_readahead (__NR_Linux + 223) -+#define __NR_setxattr (__NR_Linux + 224) -+#define __NR_lsetxattr (__NR_Linux + 225) -+#define __NR_fsetxattr (__NR_Linux + 226) -+#define __NR_getxattr (__NR_Linux + 227) -+#define __NR_lgetxattr (__NR_Linux + 228) -+#define __NR_fgetxattr (__NR_Linux + 229) -+#define __NR_listxattr (__NR_Linux + 230) -+#define __NR_llistxattr (__NR_Linux + 231) -+#define __NR_flistxattr (__NR_Linux + 232) -+#define __NR_removexattr (__NR_Linux + 233) -+#define __NR_lremovexattr (__NR_Linux + 234) -+#define __NR_fremovexattr (__NR_Linux + 235) -+#define __NR_tkill (__NR_Linux + 236) -+#define __NR_sendfile64 (__NR_Linux + 237) -+#define __NR_futex (__NR_Linux + 238) -+#define __NR_sched_setaffinity (__NR_Linux + 239) -+#define __NR_sched_getaffinity (__NR_Linux + 240) -+#define __NR_io_setup (__NR_Linux + 241) -+#define __NR_io_destroy (__NR_Linux + 242) -+#define __NR_io_getevents (__NR_Linux + 243) -+#define __NR_io_submit (__NR_Linux + 244) -+#define __NR_io_cancel (__NR_Linux + 245) -+#define __NR_exit_group (__NR_Linux + 246) -+#define __NR_lookup_dcookie (__NR_Linux + 247) -+#define __NR_epoll_create (__NR_Linux + 248) -+#define __NR_epoll_ctl (__NR_Linux + 249) -+#define __NR_epoll_wait (__NR_Linux + 250) -+#define __NR_remap_file_pages (__NR_Linux + 251) -+#define __NR_set_tid_address (__NR_Linux + 252) -+#define __NR_restart_syscall (__NR_Linux + 253) -+#define __NR_fadvise64 (__NR_Linux + 254) -+#define __NR_statfs64 (__NR_Linux + 255) -+#define __NR_fstatfs64 (__NR_Linux + 256) -+#define __NR_timer_create (__NR_Linux + 257) -+#define __NR_timer_settime (__NR_Linux + 258) -+#define __NR_timer_gettime (__NR_Linux + 259) -+#define __NR_timer_getoverrun (__NR_Linux + 260) -+#define __NR_timer_delete (__NR_Linux + 261) -+#define __NR_clock_settime (__NR_Linux + 262) -+#define __NR_clock_gettime (__NR_Linux + 263) -+#define __NR_clock_getres (__NR_Linux + 264) -+#define __NR_clock_nanosleep (__NR_Linux + 265) -+#define __NR_tgkill (__NR_Linux + 266) -+#define __NR_utimes (__NR_Linux + 267) -+#define __NR_mbind (__NR_Linux + 268) -+#define __NR_get_mempolicy (__NR_Linux + 269) -+#define __NR_set_mempolicy (__NR_Linux + 270) -+#define __NR_mq_open (__NR_Linux + 271) -+#define __NR_mq_unlink (__NR_Linux + 272) -+#define __NR_mq_timedsend (__NR_Linux + 273) -+#define __NR_mq_timedreceive (__NR_Linux + 274) -+#define __NR_mq_notify (__NR_Linux + 275) -+#define __NR_mq_getsetattr (__NR_Linux + 276) -+#define __NR_vserver (__NR_Linux + 277) -+#define __NR_waitid (__NR_Linux + 278) -+#define __NR_add_key (__NR_Linux + 280) -+#define __NR_request_key (__NR_Linux + 281) -+#define __NR_keyctl (__NR_Linux + 282) -+#define __NR_set_thread_area (__NR_Linux + 283) -+#define __NR_inotify_init (__NR_Linux + 284) -+#define __NR_inotify_add_watch (__NR_Linux + 285) -+#define __NR_inotify_rm_watch (__NR_Linux + 286) -+#define __NR_migrate_pages (__NR_Linux + 287) -+#define __NR_openat (__NR_Linux + 288) -+#define __NR_mkdirat (__NR_Linux + 289) -+#define __NR_mknodat (__NR_Linux + 290) -+#define __NR_fchownat (__NR_Linux + 291) -+#define __NR_futimesat (__NR_Linux + 292) -+#define __NR_fstatat64 (__NR_Linux + 293) -+#define __NR_unlinkat (__NR_Linux + 294) -+#define __NR_renameat (__NR_Linux + 295) -+#define __NR_linkat (__NR_Linux + 296) -+#define __NR_symlinkat (__NR_Linux + 297) -+#define __NR_readlinkat (__NR_Linux + 298) -+#define __NR_fchmodat (__NR_Linux + 299) -+#define __NR_faccessat (__NR_Linux + 300) -+#define __NR_pselect6 (__NR_Linux + 301) -+#define __NR_ppoll (__NR_Linux + 302) -+#define __NR_unshare (__NR_Linux + 303) -+#define __NR_splice (__NR_Linux + 304) -+#define __NR_sync_file_range (__NR_Linux + 305) -+#define __NR_tee (__NR_Linux + 306) -+#define __NR_vmsplice (__NR_Linux + 307) -+#define __NR_move_pages (__NR_Linux + 308) -+#define __NR_set_robust_list (__NR_Linux + 309) -+#define __NR_get_robust_list (__NR_Linux + 310) -+#define __NR_kexec_load (__NR_Linux + 311) -+#define __NR_getcpu (__NR_Linux + 312) -+#define __NR_epoll_pwait (__NR_Linux + 313) -+#define __NR_ioprio_set (__NR_Linux + 314) -+#define __NR_ioprio_get (__NR_Linux + 315) -+#define __NR_utimensat (__NR_Linux + 316) -+#define __NR_signalfd (__NR_Linux + 317) -+#define __NR_timerfd (__NR_Linux + 318) -+#define __NR_eventfd (__NR_Linux + 319) -+#define __NR_fallocate (__NR_Linux + 320) -+#define __NR_timerfd_create (__NR_Linux + 321) -+#define __NR_timerfd_gettime (__NR_Linux + 322) -+#define __NR_timerfd_settime (__NR_Linux + 323) -+#define __NR_signalfd4 (__NR_Linux + 324) -+#define __NR_eventfd2 (__NR_Linux + 325) -+#define __NR_epoll_create1 (__NR_Linux + 326) -+#define __NR_dup3 (__NR_Linux + 327) -+#define __NR_pipe2 (__NR_Linux + 328) -+#define __NR_inotify_init1 (__NR_Linux + 329) -+#define __NR_preadv (__NR_Linux + 330) -+#define __NR_pwritev (__NR_Linux + 331) -+#define __NR_rt_tgsigqueueinfo (__NR_Linux + 332) -+#define __NR_perf_event_open (__NR_Linux + 333) -+#define __NR_accept4 (__NR_Linux + 334) -+#define __NR_recvmmsg (__NR_Linux + 335) -+#define __NR_fanotify_init (__NR_Linux + 336) -+#define __NR_fanotify_mark (__NR_Linux + 337) -+#define __NR_prlimit64 (__NR_Linux + 338) -+#define __NR_name_to_handle_at (__NR_Linux + 339) -+#define __NR_open_by_handle_at (__NR_Linux + 340) -+#define __NR_clock_adjtime (__NR_Linux + 341) -+#define __NR_syncfs (__NR_Linux + 342) -+#define __NR_sendmmsg (__NR_Linux + 343) -+#define __NR_setns (__NR_Linux + 344) -+#define __NR_process_vm_readv (__NR_Linux + 345) -+#define __NR_process_vm_writev (__NR_Linux + 346) -+#define __NR_kcmp (__NR_Linux + 347) -+#define __NR_finit_module (__NR_Linux + 348) -+#define __NR_sched_setattr (__NR_Linux + 349) -+#define __NR_sched_getattr (__NR_Linux + 350) -+#define __NR_renameat2 (__NR_Linux + 351) -+#define __NR_seccomp (__NR_Linux + 352) -+#define __NR_getrandom (__NR_Linux + 353) -+#define __NR_memfd_create (__NR_Linux + 354) -+#define __NR_bpf (__NR_Linux + 355) -+#define __NR_execveat (__NR_Linux + 356) -+#define __NR_userfaultfd (__NR_Linux + 357) -+#define __NR_membarrier (__NR_Linux + 358) -+#define __NR_mlock2 (__NR_Linux + 359) -+#define __NR_copy_file_range (__NR_Linux + 360) -+#define __NR_preadv2 (__NR_Linux + 361) -+#define __NR_pwritev2 (__NR_Linux + 362) -+#define __NR_pkey_mprotect (__NR_Linux + 363) -+#define __NR_pkey_alloc (__NR_Linux + 364) -+#define __NR_pkey_free (__NR_Linux + 365) -+#define __NR_statx (__NR_Linux + 366) -+#define __NR_rseq (__NR_Linux + 367) -+#define __NR_io_pgetevents (__NR_Linux + 368) -+#define __NR_semget (__NR_Linux + 393) -+#define __NR_semctl (__NR_Linux + 394) -+#define __NR_shmget (__NR_Linux + 395) -+#define __NR_shmctl (__NR_Linux + 396) -+#define __NR_shmat (__NR_Linux + 397) -+#define __NR_shmdt (__NR_Linux + 398) -+#define __NR_msgget (__NR_Linux + 399) -+#define __NR_msgsnd (__NR_Linux + 400) -+#define __NR_msgrcv (__NR_Linux + 401) -+#define __NR_msgctl (__NR_Linux + 402) -+#define __NR_clock_gettime64 (__NR_Linux + 403) -+#define __NR_clock_settime64 (__NR_Linux + 404) -+#define __NR_clock_adjtime64 (__NR_Linux + 405) -+#define __NR_clock_getres_time64 (__NR_Linux + 406) -+#define __NR_clock_nanosleep_time64 (__NR_Linux + 407) -+#define __NR_timer_gettime64 (__NR_Linux + 408) -+#define __NR_timer_settime64 (__NR_Linux + 409) -+#define __NR_timerfd_gettime64 (__NR_Linux + 410) -+#define __NR_timerfd_settime64 (__NR_Linux + 411) -+#define __NR_utimensat_time64 (__NR_Linux + 412) -+#define __NR_pselect6_time64 (__NR_Linux + 413) -+#define __NR_ppoll_time64 (__NR_Linux + 414) -+#define __NR_io_pgetevents_time64 (__NR_Linux + 416) -+#define __NR_recvmmsg_time64 (__NR_Linux + 417) -+#define __NR_mq_timedsend_time64 (__NR_Linux + 418) -+#define __NR_mq_timedreceive_time64 (__NR_Linux + 419) -+#define __NR_semtimedop_time64 (__NR_Linux + 420) -+#define __NR_rt_sigtimedwait_time64 (__NR_Linux + 421) -+#define __NR_futex_time64 (__NR_Linux + 422) -+#define __NR_sched_rr_get_interval_time64 (__NR_Linux + 423) -+#define __NR_pidfd_send_signal (__NR_Linux + 424) -+#define __NR_io_uring_setup (__NR_Linux + 425) -+#define __NR_io_uring_enter (__NR_Linux + 426) -+#define __NR_io_uring_register (__NR_Linux + 427) -+#define __NR_open_tree (__NR_Linux + 428) -+#define __NR_move_mount (__NR_Linux + 429) -+#define __NR_fsopen (__NR_Linux + 430) -+#define __NR_fsconfig (__NR_Linux + 431) -+#define __NR_fsmount (__NR_Linux + 432) -+#define __NR_fspick (__NR_Linux + 433) -+#define __NR_pidfd_open (__NR_Linux + 434) -+#define __NR_clone3 (__NR_Linux + 435) -+#define __NR_close_range (__NR_Linux + 436) -+#define __NR_openat2 (__NR_Linux + 437) -+#define __NR_pidfd_getfd (__NR_Linux + 438) -+#define __NR_faccessat2 (__NR_Linux + 439) -+#define __NR_process_madvise (__NR_Linux + 440) -+#define __NR_epoll_pwait2 (__NR_Linux + 441) -+#define __NR_mount_setattr (__NR_Linux + 442) -+#define __NR_landlock_create_ruleset (__NR_Linux + 444) -+#define __NR_landlock_add_rule (__NR_Linux + 445) -+#define __NR_landlock_restrict_self (__NR_Linux + 446) - -- --#endif /* _ASM_MIPS_UNISTD_O32_H */ -+#endif /* _ASM_UNISTD_O32_H */ -diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h -index c3af3f324c..9f18fa090f 100644 ---- a/linux-headers/asm-powerpc/kvm.h -+++ b/linux-headers/asm-powerpc/kvm.h -@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char { - #define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) - #define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) - #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) -+#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) -+#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) - - /* Transactional Memory checkpointed state: - * This is all GPRs, all VSX regs and a subset of SPRs -diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h -index 4624c90043..9155778c19 100644 ---- a/linux-headers/asm-powerpc/unistd_32.h -+++ b/linux-headers/asm-powerpc/unistd_32.h -@@ -1,429 +1,433 @@ --#ifndef _ASM_POWERPC_UNISTD_32_H --#define _ASM_POWERPC_UNISTD_32_H -+#ifndef _ASM_UNISTD_32_H -+#define _ASM_UNISTD_32_H - --#define __NR_restart_syscall 0 --#define __NR_exit 1 --#define __NR_fork 2 --#define __NR_read 3 --#define __NR_write 4 --#define __NR_open 5 --#define __NR_close 6 --#define __NR_waitpid 7 --#define __NR_creat 8 --#define __NR_link 9 --#define __NR_unlink 10 --#define __NR_execve 11 --#define __NR_chdir 12 --#define __NR_time 13 --#define __NR_mknod 14 --#define __NR_chmod 15 --#define __NR_lchown 16 --#define __NR_break 17 --#define __NR_oldstat 18 --#define __NR_lseek 19 --#define __NR_getpid 20 --#define __NR_mount 21 --#define __NR_umount 22 --#define __NR_setuid 23 --#define __NR_getuid 24 --#define __NR_stime 25 --#define __NR_ptrace 26 --#define __NR_alarm 27 --#define __NR_oldfstat 28 --#define __NR_pause 29 --#define __NR_utime 30 --#define __NR_stty 31 --#define __NR_gtty 32 --#define __NR_access 33 --#define __NR_nice 34 --#define __NR_ftime 35 --#define __NR_sync 36 --#define __NR_kill 37 --#define __NR_rename 38 --#define __NR_mkdir 39 --#define __NR_rmdir 40 --#define __NR_dup 41 --#define __NR_pipe 42 --#define __NR_times 43 --#define __NR_prof 44 --#define __NR_brk 45 --#define __NR_setgid 46 --#define __NR_getgid 47 --#define __NR_signal 48 --#define __NR_geteuid 49 --#define __NR_getegid 50 --#define __NR_acct 51 --#define __NR_umount2 52 --#define __NR_lock 53 --#define __NR_ioctl 54 --#define __NR_fcntl 55 --#define __NR_mpx 56 --#define __NR_setpgid 57 --#define __NR_ulimit 58 --#define __NR_oldolduname 59 --#define __NR_umask 60 --#define __NR_chroot 61 --#define __NR_ustat 62 --#define __NR_dup2 63 --#define __NR_getppid 64 --#define __NR_getpgrp 65 --#define __NR_setsid 66 --#define __NR_sigaction 67 --#define __NR_sgetmask 68 --#define __NR_ssetmask 69 --#define __NR_setreuid 70 --#define __NR_setregid 71 --#define __NR_sigsuspend 72 --#define __NR_sigpending 73 --#define __NR_sethostname 74 --#define __NR_setrlimit 75 --#define __NR_getrlimit 76 --#define __NR_getrusage 77 --#define __NR_gettimeofday 78 --#define __NR_settimeofday 79 --#define __NR_getgroups 80 --#define __NR_setgroups 81 --#define __NR_select 82 --#define __NR_symlink 83 --#define __NR_oldlstat 84 --#define __NR_readlink 85 --#define __NR_uselib 86 --#define __NR_swapon 87 --#define __NR_reboot 88 --#define __NR_readdir 89 --#define __NR_mmap 90 --#define __NR_munmap 91 --#define __NR_truncate 92 --#define __NR_ftruncate 93 --#define __NR_fchmod 94 --#define __NR_fchown 95 --#define __NR_getpriority 96 --#define __NR_setpriority 97 --#define __NR_profil 98 --#define __NR_statfs 99 --#define __NR_fstatfs 100 --#define __NR_ioperm 101 --#define __NR_socketcall 102 --#define __NR_syslog 103 --#define __NR_setitimer 104 --#define __NR_getitimer 105 --#define __NR_stat 106 --#define __NR_lstat 107 --#define __NR_fstat 108 --#define __NR_olduname 109 --#define __NR_iopl 110 --#define __NR_vhangup 111 --#define __NR_idle 112 --#define __NR_vm86 113 --#define __NR_wait4 114 --#define __NR_swapoff 115 --#define __NR_sysinfo 116 --#define __NR_ipc 117 --#define __NR_fsync 118 --#define __NR_sigreturn 119 --#define __NR_clone 120 --#define __NR_setdomainname 121 --#define __NR_uname 122 --#define __NR_modify_ldt 123 --#define __NR_adjtimex 124 --#define __NR_mprotect 125 --#define __NR_sigprocmask 126 --#define __NR_create_module 127 --#define __NR_init_module 128 --#define __NR_delete_module 129 --#define __NR_get_kernel_syms 130 --#define __NR_quotactl 131 --#define __NR_getpgid 132 --#define __NR_fchdir 133 --#define __NR_bdflush 134 --#define __NR_sysfs 135 --#define __NR_personality 136 --#define __NR_afs_syscall 137 --#define __NR_setfsuid 138 --#define __NR_setfsgid 139 --#define __NR__llseek 140 --#define __NR_getdents 141 --#define __NR__newselect 142 --#define __NR_flock 143 --#define __NR_msync 144 --#define __NR_readv 145 --#define __NR_writev 146 --#define __NR_getsid 147 --#define __NR_fdatasync 148 --#define __NR__sysctl 149 --#define __NR_mlock 150 --#define __NR_munlock 151 --#define __NR_mlockall 152 --#define __NR_munlockall 153 --#define __NR_sched_setparam 154 --#define __NR_sched_getparam 155 --#define __NR_sched_setscheduler 156 --#define __NR_sched_getscheduler 157 --#define __NR_sched_yield 158 --#define __NR_sched_get_priority_max 159 --#define __NR_sched_get_priority_min 160 --#define __NR_sched_rr_get_interval 161 --#define __NR_nanosleep 162 --#define __NR_mremap 163 --#define __NR_setresuid 164 --#define __NR_getresuid 165 --#define __NR_query_module 166 --#define __NR_poll 167 --#define __NR_nfsservctl 168 --#define __NR_setresgid 169 --#define __NR_getresgid 170 --#define __NR_prctl 171 --#define __NR_rt_sigreturn 172 --#define __NR_rt_sigaction 173 --#define __NR_rt_sigprocmask 174 --#define __NR_rt_sigpending 175 --#define __NR_rt_sigtimedwait 176 --#define __NR_rt_sigqueueinfo 177 --#define __NR_rt_sigsuspend 178 --#define __NR_pread64 179 --#define __NR_pwrite64 180 --#define __NR_chown 181 --#define __NR_getcwd 182 --#define __NR_capget 183 --#define __NR_capset 184 --#define __NR_sigaltstack 185 --#define __NR_sendfile 186 --#define __NR_getpmsg 187 --#define __NR_putpmsg 188 --#define __NR_vfork 189 --#define __NR_ugetrlimit 190 --#define __NR_readahead 191 --#define __NR_mmap2 192 --#define __NR_truncate64 193 --#define __NR_ftruncate64 194 --#define __NR_stat64 195 --#define __NR_lstat64 196 --#define __NR_fstat64 197 --#define __NR_pciconfig_read 198 --#define __NR_pciconfig_write 199 --#define __NR_pciconfig_iobase 200 --#define __NR_multiplexer 201 --#define __NR_getdents64 202 --#define __NR_pivot_root 203 --#define __NR_fcntl64 204 --#define __NR_madvise 205 --#define __NR_mincore 206 --#define __NR_gettid 207 --#define __NR_tkill 208 --#define __NR_setxattr 209 --#define __NR_lsetxattr 210 --#define __NR_fsetxattr 211 --#define __NR_getxattr 212 --#define __NR_lgetxattr 213 --#define __NR_fgetxattr 214 --#define __NR_listxattr 215 --#define __NR_llistxattr 216 --#define __NR_flistxattr 217 --#define __NR_removexattr 218 --#define __NR_lremovexattr 219 --#define __NR_fremovexattr 220 --#define __NR_futex 221 --#define __NR_sched_setaffinity 222 --#define __NR_sched_getaffinity 223 --#define __NR_tuxcall 225 --#define __NR_sendfile64 226 --#define __NR_io_setup 227 --#define __NR_io_destroy 228 --#define __NR_io_getevents 229 --#define __NR_io_submit 230 --#define __NR_io_cancel 231 --#define __NR_set_tid_address 232 --#define __NR_fadvise64 233 --#define __NR_exit_group 234 --#define __NR_lookup_dcookie 235 --#define __NR_epoll_create 236 --#define __NR_epoll_ctl 237 --#define __NR_epoll_wait 238 --#define __NR_remap_file_pages 239 --#define __NR_timer_create 240 --#define __NR_timer_settime 241 --#define __NR_timer_gettime 242 --#define __NR_timer_getoverrun 243 --#define __NR_timer_delete 244 --#define __NR_clock_settime 245 --#define __NR_clock_gettime 246 --#define __NR_clock_getres 247 --#define __NR_clock_nanosleep 248 --#define __NR_swapcontext 249 --#define __NR_tgkill 250 --#define __NR_utimes 251 --#define __NR_statfs64 252 --#define __NR_fstatfs64 253 --#define __NR_fadvise64_64 254 --#define __NR_rtas 255 --#define __NR_sys_debug_setcontext 256 --#define __NR_migrate_pages 258 --#define __NR_mbind 259 --#define __NR_get_mempolicy 260 --#define __NR_set_mempolicy 261 --#define __NR_mq_open 262 --#define __NR_mq_unlink 263 --#define __NR_mq_timedsend 264 --#define __NR_mq_timedreceive 265 --#define __NR_mq_notify 266 --#define __NR_mq_getsetattr 267 --#define __NR_kexec_load 268 --#define __NR_add_key 269 --#define __NR_request_key 270 --#define __NR_keyctl 271 --#define __NR_waitid 272 --#define __NR_ioprio_set 273 --#define __NR_ioprio_get 274 --#define __NR_inotify_init 275 --#define __NR_inotify_add_watch 276 --#define __NR_inotify_rm_watch 277 --#define __NR_spu_run 278 --#define __NR_spu_create 279 --#define __NR_pselect6 280 --#define __NR_ppoll 281 --#define __NR_unshare 282 --#define __NR_splice 283 --#define __NR_tee 284 --#define __NR_vmsplice 285 --#define __NR_openat 286 --#define __NR_mkdirat 287 --#define __NR_mknodat 288 --#define __NR_fchownat 289 --#define __NR_futimesat 290 --#define __NR_fstatat64 291 --#define __NR_unlinkat 292 --#define __NR_renameat 293 --#define __NR_linkat 294 --#define __NR_symlinkat 295 --#define __NR_readlinkat 296 --#define __NR_fchmodat 297 --#define __NR_faccessat 298 --#define __NR_get_robust_list 299 --#define __NR_set_robust_list 300 --#define __NR_move_pages 301 --#define __NR_getcpu 302 --#define __NR_epoll_pwait 303 --#define __NR_utimensat 304 --#define __NR_signalfd 305 --#define __NR_timerfd_create 306 --#define __NR_eventfd 307 --#define __NR_sync_file_range2 308 --#define __NR_fallocate 309 --#define __NR_subpage_prot 310 --#define __NR_timerfd_settime 311 --#define __NR_timerfd_gettime 312 --#define __NR_signalfd4 313 --#define __NR_eventfd2 314 --#define __NR_epoll_create1 315 --#define __NR_dup3 316 --#define __NR_pipe2 317 --#define __NR_inotify_init1 318 --#define __NR_perf_event_open 319 --#define __NR_preadv 320 --#define __NR_pwritev 321 --#define __NR_rt_tgsigqueueinfo 322 --#define __NR_fanotify_init 323 --#define __NR_fanotify_mark 324 --#define __NR_prlimit64 325 --#define __NR_socket 326 --#define __NR_bind 327 --#define __NR_connect 328 --#define __NR_listen 329 --#define __NR_accept 330 --#define __NR_getsockname 331 --#define __NR_getpeername 332 --#define __NR_socketpair 333 --#define __NR_send 334 --#define __NR_sendto 335 --#define __NR_recv 336 --#define __NR_recvfrom 337 --#define __NR_shutdown 338 --#define __NR_setsockopt 339 --#define __NR_getsockopt 340 --#define __NR_sendmsg 341 --#define __NR_recvmsg 342 --#define __NR_recvmmsg 343 --#define __NR_accept4 344 --#define __NR_name_to_handle_at 345 --#define __NR_open_by_handle_at 346 --#define __NR_clock_adjtime 347 --#define __NR_syncfs 348 --#define __NR_sendmmsg 349 --#define __NR_setns 350 --#define __NR_process_vm_readv 351 --#define __NR_process_vm_writev 352 --#define __NR_finit_module 353 --#define __NR_kcmp 354 --#define __NR_sched_setattr 355 --#define __NR_sched_getattr 356 --#define __NR_renameat2 357 --#define __NR_seccomp 358 --#define __NR_getrandom 359 --#define __NR_memfd_create 360 --#define __NR_bpf 361 --#define __NR_execveat 362 --#define __NR_switch_endian 363 --#define __NR_userfaultfd 364 --#define __NR_membarrier 365 --#define __NR_mlock2 378 --#define __NR_copy_file_range 379 --#define __NR_preadv2 380 --#define __NR_pwritev2 381 --#define __NR_kexec_file_load 382 --#define __NR_statx 383 --#define __NR_pkey_alloc 384 --#define __NR_pkey_free 385 --#define __NR_pkey_mprotect 386 --#define __NR_rseq 387 --#define __NR_io_pgetevents 388 --#define __NR_semget 393 --#define __NR_semctl 394 --#define __NR_shmget 395 --#define __NR_shmctl 396 --#define __NR_shmat 397 --#define __NR_shmdt 398 --#define __NR_msgget 399 --#define __NR_msgsnd 400 --#define __NR_msgrcv 401 --#define __NR_msgctl 402 --#define __NR_clock_gettime64 403 --#define __NR_clock_settime64 404 --#define __NR_clock_adjtime64 405 --#define __NR_clock_getres_time64 406 --#define __NR_clock_nanosleep_time64 407 --#define __NR_timer_gettime64 408 --#define __NR_timer_settime64 409 --#define __NR_timerfd_gettime64 410 --#define __NR_timerfd_settime64 411 --#define __NR_utimensat_time64 412 --#define __NR_pselect6_time64 413 --#define __NR_ppoll_time64 414 --#define __NR_io_pgetevents_time64 416 --#define __NR_recvmmsg_time64 417 --#define __NR_mq_timedsend_time64 418 --#define __NR_mq_timedreceive_time64 419 --#define __NR_semtimedop_time64 420 --#define __NR_rt_sigtimedwait_time64 421 --#define __NR_futex_time64 422 --#define __NR_sched_rr_get_interval_time64 423 --#define __NR_pidfd_send_signal 424 --#define __NR_io_uring_setup 425 --#define __NR_io_uring_enter 426 --#define __NR_io_uring_register 427 --#define __NR_open_tree 428 --#define __NR_move_mount 429 --#define __NR_fsopen 430 --#define __NR_fsconfig 431 --#define __NR_fsmount 432 --#define __NR_fspick 433 --#define __NR_pidfd_open 434 --#define __NR_clone3 435 --#define __NR_close_range 436 --#define __NR_openat2 437 --#define __NR_pidfd_getfd 438 --#define __NR_faccessat2 439 --#define __NR_process_madvise 440 --#define __NR_epoll_pwait2 441 -+#define __NR_restart_syscall 0 -+#define __NR_exit 1 -+#define __NR_fork 2 -+#define __NR_read 3 -+#define __NR_write 4 -+#define __NR_open 5 -+#define __NR_close 6 -+#define __NR_waitpid 7 -+#define __NR_creat 8 -+#define __NR_link 9 -+#define __NR_unlink 10 -+#define __NR_execve 11 -+#define __NR_chdir 12 -+#define __NR_time 13 -+#define __NR_mknod 14 -+#define __NR_chmod 15 -+#define __NR_lchown 16 -+#define __NR_break 17 -+#define __NR_oldstat 18 -+#define __NR_lseek 19 -+#define __NR_getpid 20 -+#define __NR_mount 21 -+#define __NR_umount 22 -+#define __NR_setuid 23 -+#define __NR_getuid 24 -+#define __NR_stime 25 -+#define __NR_ptrace 26 -+#define __NR_alarm 27 -+#define __NR_oldfstat 28 -+#define __NR_pause 29 -+#define __NR_utime 30 -+#define __NR_stty 31 -+#define __NR_gtty 32 -+#define __NR_access 33 -+#define __NR_nice 34 -+#define __NR_ftime 35 -+#define __NR_sync 36 -+#define __NR_kill 37 -+#define __NR_rename 38 -+#define __NR_mkdir 39 -+#define __NR_rmdir 40 -+#define __NR_dup 41 -+#define __NR_pipe 42 -+#define __NR_times 43 -+#define __NR_prof 44 -+#define __NR_brk 45 -+#define __NR_setgid 46 -+#define __NR_getgid 47 -+#define __NR_signal 48 -+#define __NR_geteuid 49 -+#define __NR_getegid 50 -+#define __NR_acct 51 -+#define __NR_umount2 52 -+#define __NR_lock 53 -+#define __NR_ioctl 54 -+#define __NR_fcntl 55 -+#define __NR_mpx 56 -+#define __NR_setpgid 57 -+#define __NR_ulimit 58 -+#define __NR_oldolduname 59 -+#define __NR_umask 60 -+#define __NR_chroot 61 -+#define __NR_ustat 62 -+#define __NR_dup2 63 -+#define __NR_getppid 64 -+#define __NR_getpgrp 65 -+#define __NR_setsid 66 -+#define __NR_sigaction 67 -+#define __NR_sgetmask 68 -+#define __NR_ssetmask 69 -+#define __NR_setreuid 70 -+#define __NR_setregid 71 -+#define __NR_sigsuspend 72 -+#define __NR_sigpending 73 -+#define __NR_sethostname 74 -+#define __NR_setrlimit 75 -+#define __NR_getrlimit 76 -+#define __NR_getrusage 77 -+#define __NR_gettimeofday 78 -+#define __NR_settimeofday 79 -+#define __NR_getgroups 80 -+#define __NR_setgroups 81 -+#define __NR_select 82 -+#define __NR_symlink 83 -+#define __NR_oldlstat 84 -+#define __NR_readlink 85 -+#define __NR_uselib 86 -+#define __NR_swapon 87 -+#define __NR_reboot 88 -+#define __NR_readdir 89 -+#define __NR_mmap 90 -+#define __NR_munmap 91 -+#define __NR_truncate 92 -+#define __NR_ftruncate 93 -+#define __NR_fchmod 94 -+#define __NR_fchown 95 -+#define __NR_getpriority 96 -+#define __NR_setpriority 97 -+#define __NR_profil 98 -+#define __NR_statfs 99 -+#define __NR_fstatfs 100 -+#define __NR_ioperm 101 -+#define __NR_socketcall 102 -+#define __NR_syslog 103 -+#define __NR_setitimer 104 -+#define __NR_getitimer 105 -+#define __NR_stat 106 -+#define __NR_lstat 107 -+#define __NR_fstat 108 -+#define __NR_olduname 109 -+#define __NR_iopl 110 -+#define __NR_vhangup 111 -+#define __NR_idle 112 -+#define __NR_vm86 113 -+#define __NR_wait4 114 -+#define __NR_swapoff 115 -+#define __NR_sysinfo 116 -+#define __NR_ipc 117 -+#define __NR_fsync 118 -+#define __NR_sigreturn 119 -+#define __NR_clone 120 -+#define __NR_setdomainname 121 -+#define __NR_uname 122 -+#define __NR_modify_ldt 123 -+#define __NR_adjtimex 124 -+#define __NR_mprotect 125 -+#define __NR_sigprocmask 126 -+#define __NR_create_module 127 -+#define __NR_init_module 128 -+#define __NR_delete_module 129 -+#define __NR_get_kernel_syms 130 -+#define __NR_quotactl 131 -+#define __NR_getpgid 132 -+#define __NR_fchdir 133 -+#define __NR_bdflush 134 -+#define __NR_sysfs 135 -+#define __NR_personality 136 -+#define __NR_afs_syscall 137 -+#define __NR_setfsuid 138 -+#define __NR_setfsgid 139 -+#define __NR__llseek 140 -+#define __NR_getdents 141 -+#define __NR__newselect 142 -+#define __NR_flock 143 -+#define __NR_msync 144 -+#define __NR_readv 145 -+#define __NR_writev 146 -+#define __NR_getsid 147 -+#define __NR_fdatasync 148 -+#define __NR__sysctl 149 -+#define __NR_mlock 150 -+#define __NR_munlock 151 -+#define __NR_mlockall 152 -+#define __NR_munlockall 153 -+#define __NR_sched_setparam 154 -+#define __NR_sched_getparam 155 -+#define __NR_sched_setscheduler 156 -+#define __NR_sched_getscheduler 157 -+#define __NR_sched_yield 158 -+#define __NR_sched_get_priority_max 159 -+#define __NR_sched_get_priority_min 160 -+#define __NR_sched_rr_get_interval 161 -+#define __NR_nanosleep 162 -+#define __NR_mremap 163 -+#define __NR_setresuid 164 -+#define __NR_getresuid 165 -+#define __NR_query_module 166 -+#define __NR_poll 167 -+#define __NR_nfsservctl 168 -+#define __NR_setresgid 169 -+#define __NR_getresgid 170 -+#define __NR_prctl 171 -+#define __NR_rt_sigreturn 172 -+#define __NR_rt_sigaction 173 -+#define __NR_rt_sigprocmask 174 -+#define __NR_rt_sigpending 175 -+#define __NR_rt_sigtimedwait 176 -+#define __NR_rt_sigqueueinfo 177 -+#define __NR_rt_sigsuspend 178 -+#define __NR_pread64 179 -+#define __NR_pwrite64 180 -+#define __NR_chown 181 -+#define __NR_getcwd 182 -+#define __NR_capget 183 -+#define __NR_capset 184 -+#define __NR_sigaltstack 185 -+#define __NR_sendfile 186 -+#define __NR_getpmsg 187 -+#define __NR_putpmsg 188 -+#define __NR_vfork 189 -+#define __NR_ugetrlimit 190 -+#define __NR_readahead 191 -+#define __NR_mmap2 192 -+#define __NR_truncate64 193 -+#define __NR_ftruncate64 194 -+#define __NR_stat64 195 -+#define __NR_lstat64 196 -+#define __NR_fstat64 197 -+#define __NR_pciconfig_read 198 -+#define __NR_pciconfig_write 199 -+#define __NR_pciconfig_iobase 200 -+#define __NR_multiplexer 201 -+#define __NR_getdents64 202 -+#define __NR_pivot_root 203 -+#define __NR_fcntl64 204 -+#define __NR_madvise 205 -+#define __NR_mincore 206 -+#define __NR_gettid 207 -+#define __NR_tkill 208 -+#define __NR_setxattr 209 -+#define __NR_lsetxattr 210 -+#define __NR_fsetxattr 211 -+#define __NR_getxattr 212 -+#define __NR_lgetxattr 213 -+#define __NR_fgetxattr 214 -+#define __NR_listxattr 215 -+#define __NR_llistxattr 216 -+#define __NR_flistxattr 217 -+#define __NR_removexattr 218 -+#define __NR_lremovexattr 219 -+#define __NR_fremovexattr 220 -+#define __NR_futex 221 -+#define __NR_sched_setaffinity 222 -+#define __NR_sched_getaffinity 223 -+#define __NR_tuxcall 225 -+#define __NR_sendfile64 226 -+#define __NR_io_setup 227 -+#define __NR_io_destroy 228 -+#define __NR_io_getevents 229 -+#define __NR_io_submit 230 -+#define __NR_io_cancel 231 -+#define __NR_set_tid_address 232 -+#define __NR_fadvise64 233 -+#define __NR_exit_group 234 -+#define __NR_lookup_dcookie 235 -+#define __NR_epoll_create 236 -+#define __NR_epoll_ctl 237 -+#define __NR_epoll_wait 238 -+#define __NR_remap_file_pages 239 -+#define __NR_timer_create 240 -+#define __NR_timer_settime 241 -+#define __NR_timer_gettime 242 -+#define __NR_timer_getoverrun 243 -+#define __NR_timer_delete 244 -+#define __NR_clock_settime 245 -+#define __NR_clock_gettime 246 -+#define __NR_clock_getres 247 -+#define __NR_clock_nanosleep 248 -+#define __NR_swapcontext 249 -+#define __NR_tgkill 250 -+#define __NR_utimes 251 -+#define __NR_statfs64 252 -+#define __NR_fstatfs64 253 -+#define __NR_fadvise64_64 254 -+#define __NR_rtas 255 -+#define __NR_sys_debug_setcontext 256 -+#define __NR_migrate_pages 258 -+#define __NR_mbind 259 -+#define __NR_get_mempolicy 260 -+#define __NR_set_mempolicy 261 -+#define __NR_mq_open 262 -+#define __NR_mq_unlink 263 -+#define __NR_mq_timedsend 264 -+#define __NR_mq_timedreceive 265 -+#define __NR_mq_notify 266 -+#define __NR_mq_getsetattr 267 -+#define __NR_kexec_load 268 -+#define __NR_add_key 269 -+#define __NR_request_key 270 -+#define __NR_keyctl 271 -+#define __NR_waitid 272 -+#define __NR_ioprio_set 273 -+#define __NR_ioprio_get 274 -+#define __NR_inotify_init 275 -+#define __NR_inotify_add_watch 276 -+#define __NR_inotify_rm_watch 277 -+#define __NR_spu_run 278 -+#define __NR_spu_create 279 -+#define __NR_pselect6 280 -+#define __NR_ppoll 281 -+#define __NR_unshare 282 -+#define __NR_splice 283 -+#define __NR_tee 284 -+#define __NR_vmsplice 285 -+#define __NR_openat 286 -+#define __NR_mkdirat 287 -+#define __NR_mknodat 288 -+#define __NR_fchownat 289 -+#define __NR_futimesat 290 -+#define __NR_fstatat64 291 -+#define __NR_unlinkat 292 -+#define __NR_renameat 293 -+#define __NR_linkat 294 -+#define __NR_symlinkat 295 -+#define __NR_readlinkat 296 -+#define __NR_fchmodat 297 -+#define __NR_faccessat 298 -+#define __NR_get_robust_list 299 -+#define __NR_set_robust_list 300 -+#define __NR_move_pages 301 -+#define __NR_getcpu 302 -+#define __NR_epoll_pwait 303 -+#define __NR_utimensat 304 -+#define __NR_signalfd 305 -+#define __NR_timerfd_create 306 -+#define __NR_eventfd 307 -+#define __NR_sync_file_range2 308 -+#define __NR_fallocate 309 -+#define __NR_subpage_prot 310 -+#define __NR_timerfd_settime 311 -+#define __NR_timerfd_gettime 312 -+#define __NR_signalfd4 313 -+#define __NR_eventfd2 314 -+#define __NR_epoll_create1 315 -+#define __NR_dup3 316 -+#define __NR_pipe2 317 -+#define __NR_inotify_init1 318 -+#define __NR_perf_event_open 319 -+#define __NR_preadv 320 -+#define __NR_pwritev 321 -+#define __NR_rt_tgsigqueueinfo 322 -+#define __NR_fanotify_init 323 -+#define __NR_fanotify_mark 324 -+#define __NR_prlimit64 325 -+#define __NR_socket 326 -+#define __NR_bind 327 -+#define __NR_connect 328 -+#define __NR_listen 329 -+#define __NR_accept 330 -+#define __NR_getsockname 331 -+#define __NR_getpeername 332 -+#define __NR_socketpair 333 -+#define __NR_send 334 -+#define __NR_sendto 335 -+#define __NR_recv 336 -+#define __NR_recvfrom 337 -+#define __NR_shutdown 338 -+#define __NR_setsockopt 339 -+#define __NR_getsockopt 340 -+#define __NR_sendmsg 341 -+#define __NR_recvmsg 342 -+#define __NR_recvmmsg 343 -+#define __NR_accept4 344 -+#define __NR_name_to_handle_at 345 -+#define __NR_open_by_handle_at 346 -+#define __NR_clock_adjtime 347 -+#define __NR_syncfs 348 -+#define __NR_sendmmsg 349 -+#define __NR_setns 350 -+#define __NR_process_vm_readv 351 -+#define __NR_process_vm_writev 352 -+#define __NR_finit_module 353 -+#define __NR_kcmp 354 -+#define __NR_sched_setattr 355 -+#define __NR_sched_getattr 356 -+#define __NR_renameat2 357 -+#define __NR_seccomp 358 -+#define __NR_getrandom 359 -+#define __NR_memfd_create 360 -+#define __NR_bpf 361 -+#define __NR_execveat 362 -+#define __NR_switch_endian 363 -+#define __NR_userfaultfd 364 -+#define __NR_membarrier 365 -+#define __NR_mlock2 378 -+#define __NR_copy_file_range 379 -+#define __NR_preadv2 380 -+#define __NR_pwritev2 381 -+#define __NR_kexec_file_load 382 -+#define __NR_statx 383 -+#define __NR_pkey_alloc 384 -+#define __NR_pkey_free 385 -+#define __NR_pkey_mprotect 386 -+#define __NR_rseq 387 -+#define __NR_io_pgetevents 388 -+#define __NR_semget 393 -+#define __NR_semctl 394 -+#define __NR_shmget 395 -+#define __NR_shmctl 396 -+#define __NR_shmat 397 -+#define __NR_shmdt 398 -+#define __NR_msgget 399 -+#define __NR_msgsnd 400 -+#define __NR_msgrcv 401 -+#define __NR_msgctl 402 -+#define __NR_clock_gettime64 403 -+#define __NR_clock_settime64 404 -+#define __NR_clock_adjtime64 405 -+#define __NR_clock_getres_time64 406 -+#define __NR_clock_nanosleep_time64 407 -+#define __NR_timer_gettime64 408 -+#define __NR_timer_settime64 409 -+#define __NR_timerfd_gettime64 410 -+#define __NR_timerfd_settime64 411 -+#define __NR_utimensat_time64 412 -+#define __NR_pselect6_time64 413 -+#define __NR_ppoll_time64 414 -+#define __NR_io_pgetevents_time64 416 -+#define __NR_recvmmsg_time64 417 -+#define __NR_mq_timedsend_time64 418 -+#define __NR_mq_timedreceive_time64 419 -+#define __NR_semtimedop_time64 420 -+#define __NR_rt_sigtimedwait_time64 421 -+#define __NR_futex_time64 422 -+#define __NR_sched_rr_get_interval_time64 423 -+#define __NR_pidfd_send_signal 424 -+#define __NR_io_uring_setup 425 -+#define __NR_io_uring_enter 426 -+#define __NR_io_uring_register 427 -+#define __NR_open_tree 428 -+#define __NR_move_mount 429 -+#define __NR_fsopen 430 -+#define __NR_fsconfig 431 -+#define __NR_fsmount 432 -+#define __NR_fspick 433 -+#define __NR_pidfd_open 434 -+#define __NR_clone3 435 -+#define __NR_close_range 436 -+#define __NR_openat2 437 -+#define __NR_pidfd_getfd 438 -+#define __NR_faccessat2 439 -+#define __NR_process_madvise 440 -+#define __NR_epoll_pwait2 441 -+#define __NR_mount_setattr 442 -+#define __NR_landlock_create_ruleset 444 -+#define __NR_landlock_add_rule 445 -+#define __NR_landlock_restrict_self 446 - - --#endif /* _ASM_POWERPC_UNISTD_32_H */ -+#endif /* _ASM_UNISTD_32_H */ -diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h -index 7e851b30bb..3cefa88932 100644 ---- a/linux-headers/asm-powerpc/unistd_64.h -+++ b/linux-headers/asm-powerpc/unistd_64.h -@@ -1,401 +1,405 @@ --#ifndef _ASM_POWERPC_UNISTD_64_H --#define _ASM_POWERPC_UNISTD_64_H -+#ifndef _ASM_UNISTD_64_H -+#define _ASM_UNISTD_64_H - --#define __NR_restart_syscall 0 --#define __NR_exit 1 --#define __NR_fork 2 --#define __NR_read 3 --#define __NR_write 4 --#define __NR_open 5 --#define __NR_close 6 --#define __NR_waitpid 7 --#define __NR_creat 8 --#define __NR_link 9 --#define __NR_unlink 10 --#define __NR_execve 11 --#define __NR_chdir 12 --#define __NR_time 13 --#define __NR_mknod 14 --#define __NR_chmod 15 --#define __NR_lchown 16 --#define __NR_break 17 --#define __NR_oldstat 18 --#define __NR_lseek 19 --#define __NR_getpid 20 --#define __NR_mount 21 --#define __NR_umount 22 --#define __NR_setuid 23 --#define __NR_getuid 24 --#define __NR_stime 25 --#define __NR_ptrace 26 --#define __NR_alarm 27 --#define __NR_oldfstat 28 --#define __NR_pause 29 --#define __NR_utime 30 --#define __NR_stty 31 --#define __NR_gtty 32 --#define __NR_access 33 --#define __NR_nice 34 --#define __NR_ftime 35 --#define __NR_sync 36 --#define __NR_kill 37 --#define __NR_rename 38 --#define __NR_mkdir 39 --#define __NR_rmdir 40 --#define __NR_dup 41 --#define __NR_pipe 42 --#define __NR_times 43 --#define __NR_prof 44 --#define __NR_brk 45 --#define __NR_setgid 46 --#define __NR_getgid 47 --#define __NR_signal 48 --#define __NR_geteuid 49 --#define __NR_getegid 50 --#define __NR_acct 51 --#define __NR_umount2 52 --#define __NR_lock 53 --#define __NR_ioctl 54 --#define __NR_fcntl 55 --#define __NR_mpx 56 --#define __NR_setpgid 57 --#define __NR_ulimit 58 --#define __NR_oldolduname 59 --#define __NR_umask 60 --#define __NR_chroot 61 --#define __NR_ustat 62 --#define __NR_dup2 63 --#define __NR_getppid 64 --#define __NR_getpgrp 65 --#define __NR_setsid 66 --#define __NR_sigaction 67 --#define __NR_sgetmask 68 --#define __NR_ssetmask 69 --#define __NR_setreuid 70 --#define __NR_setregid 71 --#define __NR_sigsuspend 72 --#define __NR_sigpending 73 --#define __NR_sethostname 74 --#define __NR_setrlimit 75 --#define __NR_getrlimit 76 --#define __NR_getrusage 77 --#define __NR_gettimeofday 78 --#define __NR_settimeofday 79 --#define __NR_getgroups 80 --#define __NR_setgroups 81 --#define __NR_select 82 --#define __NR_symlink 83 --#define __NR_oldlstat 84 --#define __NR_readlink 85 --#define __NR_uselib 86 --#define __NR_swapon 87 --#define __NR_reboot 88 --#define __NR_readdir 89 --#define __NR_mmap 90 --#define __NR_munmap 91 --#define __NR_truncate 92 --#define __NR_ftruncate 93 --#define __NR_fchmod 94 --#define __NR_fchown 95 --#define __NR_getpriority 96 --#define __NR_setpriority 97 --#define __NR_profil 98 --#define __NR_statfs 99 --#define __NR_fstatfs 100 --#define __NR_ioperm 101 --#define __NR_socketcall 102 --#define __NR_syslog 103 --#define __NR_setitimer 104 --#define __NR_getitimer 105 --#define __NR_stat 106 --#define __NR_lstat 107 --#define __NR_fstat 108 --#define __NR_olduname 109 --#define __NR_iopl 110 --#define __NR_vhangup 111 --#define __NR_idle 112 --#define __NR_vm86 113 --#define __NR_wait4 114 --#define __NR_swapoff 115 --#define __NR_sysinfo 116 --#define __NR_ipc 117 --#define __NR_fsync 118 --#define __NR_sigreturn 119 --#define __NR_clone 120 --#define __NR_setdomainname 121 --#define __NR_uname 122 --#define __NR_modify_ldt 123 --#define __NR_adjtimex 124 --#define __NR_mprotect 125 --#define __NR_sigprocmask 126 --#define __NR_create_module 127 --#define __NR_init_module 128 --#define __NR_delete_module 129 --#define __NR_get_kernel_syms 130 --#define __NR_quotactl 131 --#define __NR_getpgid 132 --#define __NR_fchdir 133 --#define __NR_bdflush 134 --#define __NR_sysfs 135 --#define __NR_personality 136 --#define __NR_afs_syscall 137 --#define __NR_setfsuid 138 --#define __NR_setfsgid 139 --#define __NR__llseek 140 --#define __NR_getdents 141 --#define __NR__newselect 142 --#define __NR_flock 143 --#define __NR_msync 144 --#define __NR_readv 145 --#define __NR_writev 146 --#define __NR_getsid 147 --#define __NR_fdatasync 148 --#define __NR__sysctl 149 --#define __NR_mlock 150 --#define __NR_munlock 151 --#define __NR_mlockall 152 --#define __NR_munlockall 153 --#define __NR_sched_setparam 154 --#define __NR_sched_getparam 155 --#define __NR_sched_setscheduler 156 --#define __NR_sched_getscheduler 157 --#define __NR_sched_yield 158 --#define __NR_sched_get_priority_max 159 --#define __NR_sched_get_priority_min 160 --#define __NR_sched_rr_get_interval 161 --#define __NR_nanosleep 162 --#define __NR_mremap 163 --#define __NR_setresuid 164 --#define __NR_getresuid 165 --#define __NR_query_module 166 --#define __NR_poll 167 --#define __NR_nfsservctl 168 --#define __NR_setresgid 169 --#define __NR_getresgid 170 --#define __NR_prctl 171 --#define __NR_rt_sigreturn 172 --#define __NR_rt_sigaction 173 --#define __NR_rt_sigprocmask 174 --#define __NR_rt_sigpending 175 --#define __NR_rt_sigtimedwait 176 --#define __NR_rt_sigqueueinfo 177 --#define __NR_rt_sigsuspend 178 --#define __NR_pread64 179 --#define __NR_pwrite64 180 --#define __NR_chown 181 --#define __NR_getcwd 182 --#define __NR_capget 183 --#define __NR_capset 184 --#define __NR_sigaltstack 185 --#define __NR_sendfile 186 --#define __NR_getpmsg 187 --#define __NR_putpmsg 188 --#define __NR_vfork 189 --#define __NR_ugetrlimit 190 --#define __NR_readahead 191 --#define __NR_pciconfig_read 198 --#define __NR_pciconfig_write 199 --#define __NR_pciconfig_iobase 200 --#define __NR_multiplexer 201 --#define __NR_getdents64 202 --#define __NR_pivot_root 203 --#define __NR_madvise 205 --#define __NR_mincore 206 --#define __NR_gettid 207 --#define __NR_tkill 208 --#define __NR_setxattr 209 --#define __NR_lsetxattr 210 --#define __NR_fsetxattr 211 --#define __NR_getxattr 212 --#define __NR_lgetxattr 213 --#define __NR_fgetxattr 214 --#define __NR_listxattr 215 --#define __NR_llistxattr 216 --#define __NR_flistxattr 217 --#define __NR_removexattr 218 --#define __NR_lremovexattr 219 --#define __NR_fremovexattr 220 --#define __NR_futex 221 --#define __NR_sched_setaffinity 222 --#define __NR_sched_getaffinity 223 --#define __NR_tuxcall 225 --#define __NR_io_setup 227 --#define __NR_io_destroy 228 --#define __NR_io_getevents 229 --#define __NR_io_submit 230 --#define __NR_io_cancel 231 --#define __NR_set_tid_address 232 --#define __NR_fadvise64 233 --#define __NR_exit_group 234 --#define __NR_lookup_dcookie 235 --#define __NR_epoll_create 236 --#define __NR_epoll_ctl 237 --#define __NR_epoll_wait 238 --#define __NR_remap_file_pages 239 --#define __NR_timer_create 240 --#define __NR_timer_settime 241 --#define __NR_timer_gettime 242 --#define __NR_timer_getoverrun 243 --#define __NR_timer_delete 244 --#define __NR_clock_settime 245 --#define __NR_clock_gettime 246 --#define __NR_clock_getres 247 --#define __NR_clock_nanosleep 248 --#define __NR_swapcontext 249 --#define __NR_tgkill 250 --#define __NR_utimes 251 --#define __NR_statfs64 252 --#define __NR_fstatfs64 253 --#define __NR_rtas 255 --#define __NR_sys_debug_setcontext 256 --#define __NR_migrate_pages 258 --#define __NR_mbind 259 --#define __NR_get_mempolicy 260 --#define __NR_set_mempolicy 261 --#define __NR_mq_open 262 --#define __NR_mq_unlink 263 --#define __NR_mq_timedsend 264 --#define __NR_mq_timedreceive 265 --#define __NR_mq_notify 266 --#define __NR_mq_getsetattr 267 --#define __NR_kexec_load 268 --#define __NR_add_key 269 --#define __NR_request_key 270 --#define __NR_keyctl 271 --#define __NR_waitid 272 --#define __NR_ioprio_set 273 --#define __NR_ioprio_get 274 --#define __NR_inotify_init 275 --#define __NR_inotify_add_watch 276 --#define __NR_inotify_rm_watch 277 --#define __NR_spu_run 278 --#define __NR_spu_create 279 --#define __NR_pselect6 280 --#define __NR_ppoll 281 --#define __NR_unshare 282 --#define __NR_splice 283 --#define __NR_tee 284 --#define __NR_vmsplice 285 --#define __NR_openat 286 --#define __NR_mkdirat 287 --#define __NR_mknodat 288 --#define __NR_fchownat 289 --#define __NR_futimesat 290 --#define __NR_newfstatat 291 --#define __NR_unlinkat 292 --#define __NR_renameat 293 --#define __NR_linkat 294 --#define __NR_symlinkat 295 --#define __NR_readlinkat 296 --#define __NR_fchmodat 297 --#define __NR_faccessat 298 --#define __NR_get_robust_list 299 --#define __NR_set_robust_list 300 --#define __NR_move_pages 301 --#define __NR_getcpu 302 --#define __NR_epoll_pwait 303 --#define __NR_utimensat 304 --#define __NR_signalfd 305 --#define __NR_timerfd_create 306 --#define __NR_eventfd 307 --#define __NR_sync_file_range2 308 --#define __NR_fallocate 309 --#define __NR_subpage_prot 310 --#define __NR_timerfd_settime 311 --#define __NR_timerfd_gettime 312 --#define __NR_signalfd4 313 --#define __NR_eventfd2 314 --#define __NR_epoll_create1 315 --#define __NR_dup3 316 --#define __NR_pipe2 317 --#define __NR_inotify_init1 318 --#define __NR_perf_event_open 319 --#define __NR_preadv 320 --#define __NR_pwritev 321 --#define __NR_rt_tgsigqueueinfo 322 --#define __NR_fanotify_init 323 --#define __NR_fanotify_mark 324 --#define __NR_prlimit64 325 --#define __NR_socket 326 --#define __NR_bind 327 --#define __NR_connect 328 --#define __NR_listen 329 --#define __NR_accept 330 --#define __NR_getsockname 331 --#define __NR_getpeername 332 --#define __NR_socketpair 333 --#define __NR_send 334 --#define __NR_sendto 335 --#define __NR_recv 336 --#define __NR_recvfrom 337 --#define __NR_shutdown 338 --#define __NR_setsockopt 339 --#define __NR_getsockopt 340 --#define __NR_sendmsg 341 --#define __NR_recvmsg 342 --#define __NR_recvmmsg 343 --#define __NR_accept4 344 --#define __NR_name_to_handle_at 345 --#define __NR_open_by_handle_at 346 --#define __NR_clock_adjtime 347 --#define __NR_syncfs 348 --#define __NR_sendmmsg 349 --#define __NR_setns 350 --#define __NR_process_vm_readv 351 --#define __NR_process_vm_writev 352 --#define __NR_finit_module 353 --#define __NR_kcmp 354 --#define __NR_sched_setattr 355 --#define __NR_sched_getattr 356 --#define __NR_renameat2 357 --#define __NR_seccomp 358 --#define __NR_getrandom 359 --#define __NR_memfd_create 360 --#define __NR_bpf 361 --#define __NR_execveat 362 --#define __NR_switch_endian 363 --#define __NR_userfaultfd 364 --#define __NR_membarrier 365 --#define __NR_mlock2 378 --#define __NR_copy_file_range 379 --#define __NR_preadv2 380 --#define __NR_pwritev2 381 --#define __NR_kexec_file_load 382 --#define __NR_statx 383 --#define __NR_pkey_alloc 384 --#define __NR_pkey_free 385 --#define __NR_pkey_mprotect 386 --#define __NR_rseq 387 --#define __NR_io_pgetevents 388 --#define __NR_semtimedop 392 --#define __NR_semget 393 --#define __NR_semctl 394 --#define __NR_shmget 395 --#define __NR_shmctl 396 --#define __NR_shmat 397 --#define __NR_shmdt 398 --#define __NR_msgget 399 --#define __NR_msgsnd 400 --#define __NR_msgrcv 401 --#define __NR_msgctl 402 --#define __NR_pidfd_send_signal 424 --#define __NR_io_uring_setup 425 --#define __NR_io_uring_enter 426 --#define __NR_io_uring_register 427 --#define __NR_open_tree 428 --#define __NR_move_mount 429 --#define __NR_fsopen 430 --#define __NR_fsconfig 431 --#define __NR_fsmount 432 --#define __NR_fspick 433 --#define __NR_pidfd_open 434 --#define __NR_clone3 435 --#define __NR_close_range 436 --#define __NR_openat2 437 --#define __NR_pidfd_getfd 438 --#define __NR_faccessat2 439 --#define __NR_process_madvise 440 --#define __NR_epoll_pwait2 441 -+#define __NR_restart_syscall 0 -+#define __NR_exit 1 -+#define __NR_fork 2 -+#define __NR_read 3 -+#define __NR_write 4 -+#define __NR_open 5 -+#define __NR_close 6 -+#define __NR_waitpid 7 -+#define __NR_creat 8 -+#define __NR_link 9 -+#define __NR_unlink 10 -+#define __NR_execve 11 -+#define __NR_chdir 12 -+#define __NR_time 13 -+#define __NR_mknod 14 -+#define __NR_chmod 15 -+#define __NR_lchown 16 -+#define __NR_break 17 -+#define __NR_oldstat 18 -+#define __NR_lseek 19 -+#define __NR_getpid 20 -+#define __NR_mount 21 -+#define __NR_umount 22 -+#define __NR_setuid 23 -+#define __NR_getuid 24 -+#define __NR_stime 25 -+#define __NR_ptrace 26 -+#define __NR_alarm 27 -+#define __NR_oldfstat 28 -+#define __NR_pause 29 -+#define __NR_utime 30 -+#define __NR_stty 31 -+#define __NR_gtty 32 -+#define __NR_access 33 -+#define __NR_nice 34 -+#define __NR_ftime 35 -+#define __NR_sync 36 -+#define __NR_kill 37 -+#define __NR_rename 38 -+#define __NR_mkdir 39 -+#define __NR_rmdir 40 -+#define __NR_dup 41 -+#define __NR_pipe 42 -+#define __NR_times 43 -+#define __NR_prof 44 -+#define __NR_brk 45 -+#define __NR_setgid 46 -+#define __NR_getgid 47 -+#define __NR_signal 48 -+#define __NR_geteuid 49 -+#define __NR_getegid 50 -+#define __NR_acct 51 -+#define __NR_umount2 52 -+#define __NR_lock 53 -+#define __NR_ioctl 54 -+#define __NR_fcntl 55 -+#define __NR_mpx 56 -+#define __NR_setpgid 57 -+#define __NR_ulimit 58 -+#define __NR_oldolduname 59 -+#define __NR_umask 60 -+#define __NR_chroot 61 -+#define __NR_ustat 62 -+#define __NR_dup2 63 -+#define __NR_getppid 64 -+#define __NR_getpgrp 65 -+#define __NR_setsid 66 -+#define __NR_sigaction 67 -+#define __NR_sgetmask 68 -+#define __NR_ssetmask 69 -+#define __NR_setreuid 70 -+#define __NR_setregid 71 -+#define __NR_sigsuspend 72 -+#define __NR_sigpending 73 -+#define __NR_sethostname 74 -+#define __NR_setrlimit 75 -+#define __NR_getrlimit 76 -+#define __NR_getrusage 77 -+#define __NR_gettimeofday 78 -+#define __NR_settimeofday 79 -+#define __NR_getgroups 80 -+#define __NR_setgroups 81 -+#define __NR_select 82 -+#define __NR_symlink 83 -+#define __NR_oldlstat 84 -+#define __NR_readlink 85 -+#define __NR_uselib 86 -+#define __NR_swapon 87 -+#define __NR_reboot 88 -+#define __NR_readdir 89 -+#define __NR_mmap 90 -+#define __NR_munmap 91 -+#define __NR_truncate 92 -+#define __NR_ftruncate 93 -+#define __NR_fchmod 94 -+#define __NR_fchown 95 -+#define __NR_getpriority 96 -+#define __NR_setpriority 97 -+#define __NR_profil 98 -+#define __NR_statfs 99 -+#define __NR_fstatfs 100 -+#define __NR_ioperm 101 -+#define __NR_socketcall 102 -+#define __NR_syslog 103 -+#define __NR_setitimer 104 -+#define __NR_getitimer 105 -+#define __NR_stat 106 -+#define __NR_lstat 107 -+#define __NR_fstat 108 -+#define __NR_olduname 109 -+#define __NR_iopl 110 -+#define __NR_vhangup 111 -+#define __NR_idle 112 -+#define __NR_vm86 113 -+#define __NR_wait4 114 -+#define __NR_swapoff 115 -+#define __NR_sysinfo 116 -+#define __NR_ipc 117 -+#define __NR_fsync 118 -+#define __NR_sigreturn 119 -+#define __NR_clone 120 -+#define __NR_setdomainname 121 -+#define __NR_uname 122 -+#define __NR_modify_ldt 123 -+#define __NR_adjtimex 124 -+#define __NR_mprotect 125 -+#define __NR_sigprocmask 126 -+#define __NR_create_module 127 -+#define __NR_init_module 128 -+#define __NR_delete_module 129 -+#define __NR_get_kernel_syms 130 -+#define __NR_quotactl 131 -+#define __NR_getpgid 132 -+#define __NR_fchdir 133 -+#define __NR_bdflush 134 -+#define __NR_sysfs 135 -+#define __NR_personality 136 -+#define __NR_afs_syscall 137 -+#define __NR_setfsuid 138 -+#define __NR_setfsgid 139 -+#define __NR__llseek 140 -+#define __NR_getdents 141 -+#define __NR__newselect 142 -+#define __NR_flock 143 -+#define __NR_msync 144 -+#define __NR_readv 145 -+#define __NR_writev 146 -+#define __NR_getsid 147 -+#define __NR_fdatasync 148 -+#define __NR__sysctl 149 -+#define __NR_mlock 150 -+#define __NR_munlock 151 -+#define __NR_mlockall 152 -+#define __NR_munlockall 153 -+#define __NR_sched_setparam 154 -+#define __NR_sched_getparam 155 -+#define __NR_sched_setscheduler 156 -+#define __NR_sched_getscheduler 157 -+#define __NR_sched_yield 158 -+#define __NR_sched_get_priority_max 159 -+#define __NR_sched_get_priority_min 160 -+#define __NR_sched_rr_get_interval 161 -+#define __NR_nanosleep 162 -+#define __NR_mremap 163 -+#define __NR_setresuid 164 -+#define __NR_getresuid 165 -+#define __NR_query_module 166 -+#define __NR_poll 167 -+#define __NR_nfsservctl 168 -+#define __NR_setresgid 169 -+#define __NR_getresgid 170 -+#define __NR_prctl 171 -+#define __NR_rt_sigreturn 172 -+#define __NR_rt_sigaction 173 -+#define __NR_rt_sigprocmask 174 -+#define __NR_rt_sigpending 175 -+#define __NR_rt_sigtimedwait 176 -+#define __NR_rt_sigqueueinfo 177 -+#define __NR_rt_sigsuspend 178 -+#define __NR_pread64 179 -+#define __NR_pwrite64 180 -+#define __NR_chown 181 -+#define __NR_getcwd 182 -+#define __NR_capget 183 -+#define __NR_capset 184 -+#define __NR_sigaltstack 185 -+#define __NR_sendfile 186 -+#define __NR_getpmsg 187 -+#define __NR_putpmsg 188 -+#define __NR_vfork 189 -+#define __NR_ugetrlimit 190 -+#define __NR_readahead 191 -+#define __NR_pciconfig_read 198 -+#define __NR_pciconfig_write 199 -+#define __NR_pciconfig_iobase 200 -+#define __NR_multiplexer 201 -+#define __NR_getdents64 202 -+#define __NR_pivot_root 203 -+#define __NR_madvise 205 -+#define __NR_mincore 206 -+#define __NR_gettid 207 -+#define __NR_tkill 208 -+#define __NR_setxattr 209 -+#define __NR_lsetxattr 210 -+#define __NR_fsetxattr 211 -+#define __NR_getxattr 212 -+#define __NR_lgetxattr 213 -+#define __NR_fgetxattr 214 -+#define __NR_listxattr 215 -+#define __NR_llistxattr 216 -+#define __NR_flistxattr 217 -+#define __NR_removexattr 218 -+#define __NR_lremovexattr 219 -+#define __NR_fremovexattr 220 -+#define __NR_futex 221 -+#define __NR_sched_setaffinity 222 -+#define __NR_sched_getaffinity 223 -+#define __NR_tuxcall 225 -+#define __NR_io_setup 227 -+#define __NR_io_destroy 228 -+#define __NR_io_getevents 229 -+#define __NR_io_submit 230 -+#define __NR_io_cancel 231 -+#define __NR_set_tid_address 232 -+#define __NR_fadvise64 233 -+#define __NR_exit_group 234 -+#define __NR_lookup_dcookie 235 -+#define __NR_epoll_create 236 -+#define __NR_epoll_ctl 237 -+#define __NR_epoll_wait 238 -+#define __NR_remap_file_pages 239 -+#define __NR_timer_create 240 -+#define __NR_timer_settime 241 -+#define __NR_timer_gettime 242 -+#define __NR_timer_getoverrun 243 -+#define __NR_timer_delete 244 -+#define __NR_clock_settime 245 -+#define __NR_clock_gettime 246 -+#define __NR_clock_getres 247 -+#define __NR_clock_nanosleep 248 -+#define __NR_swapcontext 249 -+#define __NR_tgkill 250 -+#define __NR_utimes 251 -+#define __NR_statfs64 252 -+#define __NR_fstatfs64 253 -+#define __NR_rtas 255 -+#define __NR_sys_debug_setcontext 256 -+#define __NR_migrate_pages 258 -+#define __NR_mbind 259 -+#define __NR_get_mempolicy 260 -+#define __NR_set_mempolicy 261 -+#define __NR_mq_open 262 -+#define __NR_mq_unlink 263 -+#define __NR_mq_timedsend 264 -+#define __NR_mq_timedreceive 265 -+#define __NR_mq_notify 266 -+#define __NR_mq_getsetattr 267 -+#define __NR_kexec_load 268 -+#define __NR_add_key 269 -+#define __NR_request_key 270 -+#define __NR_keyctl 271 -+#define __NR_waitid 272 -+#define __NR_ioprio_set 273 -+#define __NR_ioprio_get 274 -+#define __NR_inotify_init 275 -+#define __NR_inotify_add_watch 276 -+#define __NR_inotify_rm_watch 277 -+#define __NR_spu_run 278 -+#define __NR_spu_create 279 -+#define __NR_pselect6 280 -+#define __NR_ppoll 281 -+#define __NR_unshare 282 -+#define __NR_splice 283 -+#define __NR_tee 284 -+#define __NR_vmsplice 285 -+#define __NR_openat 286 -+#define __NR_mkdirat 287 -+#define __NR_mknodat 288 -+#define __NR_fchownat 289 -+#define __NR_futimesat 290 -+#define __NR_newfstatat 291 -+#define __NR_unlinkat 292 -+#define __NR_renameat 293 -+#define __NR_linkat 294 -+#define __NR_symlinkat 295 -+#define __NR_readlinkat 296 -+#define __NR_fchmodat 297 -+#define __NR_faccessat 298 -+#define __NR_get_robust_list 299 -+#define __NR_set_robust_list 300 -+#define __NR_move_pages 301 -+#define __NR_getcpu 302 -+#define __NR_epoll_pwait 303 -+#define __NR_utimensat 304 -+#define __NR_signalfd 305 -+#define __NR_timerfd_create 306 -+#define __NR_eventfd 307 -+#define __NR_sync_file_range2 308 -+#define __NR_fallocate 309 -+#define __NR_subpage_prot 310 -+#define __NR_timerfd_settime 311 -+#define __NR_timerfd_gettime 312 -+#define __NR_signalfd4 313 -+#define __NR_eventfd2 314 -+#define __NR_epoll_create1 315 -+#define __NR_dup3 316 -+#define __NR_pipe2 317 -+#define __NR_inotify_init1 318 -+#define __NR_perf_event_open 319 -+#define __NR_preadv 320 -+#define __NR_pwritev 321 -+#define __NR_rt_tgsigqueueinfo 322 -+#define __NR_fanotify_init 323 -+#define __NR_fanotify_mark 324 -+#define __NR_prlimit64 325 -+#define __NR_socket 326 -+#define __NR_bind 327 -+#define __NR_connect 328 -+#define __NR_listen 329 -+#define __NR_accept 330 -+#define __NR_getsockname 331 -+#define __NR_getpeername 332 -+#define __NR_socketpair 333 -+#define __NR_send 334 -+#define __NR_sendto 335 -+#define __NR_recv 336 -+#define __NR_recvfrom 337 -+#define __NR_shutdown 338 -+#define __NR_setsockopt 339 -+#define __NR_getsockopt 340 -+#define __NR_sendmsg 341 -+#define __NR_recvmsg 342 -+#define __NR_recvmmsg 343 -+#define __NR_accept4 344 -+#define __NR_name_to_handle_at 345 -+#define __NR_open_by_handle_at 346 -+#define __NR_clock_adjtime 347 -+#define __NR_syncfs 348 -+#define __NR_sendmmsg 349 -+#define __NR_setns 350 -+#define __NR_process_vm_readv 351 -+#define __NR_process_vm_writev 352 -+#define __NR_finit_module 353 -+#define __NR_kcmp 354 -+#define __NR_sched_setattr 355 -+#define __NR_sched_getattr 356 -+#define __NR_renameat2 357 -+#define __NR_seccomp 358 -+#define __NR_getrandom 359 -+#define __NR_memfd_create 360 -+#define __NR_bpf 361 -+#define __NR_execveat 362 -+#define __NR_switch_endian 363 -+#define __NR_userfaultfd 364 -+#define __NR_membarrier 365 -+#define __NR_mlock2 378 -+#define __NR_copy_file_range 379 -+#define __NR_preadv2 380 -+#define __NR_pwritev2 381 -+#define __NR_kexec_file_load 382 -+#define __NR_statx 383 -+#define __NR_pkey_alloc 384 -+#define __NR_pkey_free 385 -+#define __NR_pkey_mprotect 386 -+#define __NR_rseq 387 -+#define __NR_io_pgetevents 388 -+#define __NR_semtimedop 392 -+#define __NR_semget 393 -+#define __NR_semctl 394 -+#define __NR_shmget 395 -+#define __NR_shmctl 396 -+#define __NR_shmat 397 -+#define __NR_shmdt 398 -+#define __NR_msgget 399 -+#define __NR_msgsnd 400 -+#define __NR_msgrcv 401 -+#define __NR_msgctl 402 -+#define __NR_pidfd_send_signal 424 -+#define __NR_io_uring_setup 425 -+#define __NR_io_uring_enter 426 -+#define __NR_io_uring_register 427 -+#define __NR_open_tree 428 -+#define __NR_move_mount 429 -+#define __NR_fsopen 430 -+#define __NR_fsconfig 431 -+#define __NR_fsmount 432 -+#define __NR_fspick 433 -+#define __NR_pidfd_open 434 -+#define __NR_clone3 435 -+#define __NR_close_range 436 -+#define __NR_openat2 437 -+#define __NR_pidfd_getfd 438 -+#define __NR_faccessat2 439 -+#define __NR_process_madvise 440 -+#define __NR_epoll_pwait2 441 -+#define __NR_mount_setattr 442 -+#define __NR_landlock_create_ruleset 444 -+#define __NR_landlock_add_rule 445 -+#define __NR_landlock_restrict_self 446 - - --#endif /* _ASM_POWERPC_UNISTD_64_H */ -+#endif /* _ASM_UNISTD_64_H */ -diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h -index c94d2c3a22..e8cd34334f 100644 ---- a/linux-headers/asm-s390/unistd_32.h -+++ b/linux-headers/asm-s390/unistd_32.h -@@ -414,5 +414,9 @@ - #define __NR_faccessat2 439 - #define __NR_process_madvise 440 - #define __NR_epoll_pwait2 441 -+#define __NR_mount_setattr 442 -+#define __NR_landlock_create_ruleset 444 -+#define __NR_landlock_add_rule 445 -+#define __NR_landlock_restrict_self 446 - - #endif /* _ASM_S390_UNISTD_32_H */ -diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h -index 984a06b7eb..86830e1e83 100644 ---- a/linux-headers/asm-s390/unistd_64.h -+++ b/linux-headers/asm-s390/unistd_64.h -@@ -362,5 +362,9 @@ - #define __NR_faccessat2 439 - #define __NR_process_madvise 440 - #define __NR_epoll_pwait2 441 -+#define __NR_mount_setattr 442 -+#define __NR_landlock_create_ruleset 444 -+#define __NR_landlock_add_rule 445 -+#define __NR_landlock_restrict_self 446 - - #endif /* _ASM_S390_UNISTD_64_H */ -diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h -index 8e76d3701d..0662f644aa 100644 ---- a/linux-headers/asm-x86/kvm.h -+++ b/linux-headers/asm-x86/kvm.h -@@ -112,6 +112,7 @@ struct kvm_ioapic_state { - #define KVM_NR_IRQCHIPS 3 - - #define KVM_RUN_X86_SMM (1 << 0) -+#define KVM_RUN_X86_BUS_LOCK (1 << 1) - - /* for KVM_GET_REGS and KVM_SET_REGS */ - struct kvm_regs { -@@ -436,6 +437,8 @@ struct kvm_vmx_nested_state_hdr { - __u16 flags; - } smm; - -+ __u16 pad; -+ - __u32 flags; - __u64 preemption_timer_deadline; - }; -diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h -index 18fb99dfa2..8f6ac8c19f 100644 ---- a/linux-headers/asm-x86/unistd_32.h -+++ b/linux-headers/asm-x86/unistd_32.h -@@ -432,6 +432,10 @@ - #define __NR_faccessat2 439 - #define __NR_process_madvise 440 - #define __NR_epoll_pwait2 441 -+#define __NR_mount_setattr 442 -+#define __NR_landlock_create_ruleset 444 -+#define __NR_landlock_add_rule 445 -+#define __NR_landlock_restrict_self 446 - - - #endif /* _ASM_X86_UNISTD_32_H */ -diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h -index bde959328d..bb187a9268 100644 ---- a/linux-headers/asm-x86/unistd_64.h -+++ b/linux-headers/asm-x86/unistd_64.h -@@ -354,6 +354,10 @@ - #define __NR_faccessat2 439 - #define __NR_process_madvise 440 - #define __NR_epoll_pwait2 441 -+#define __NR_mount_setattr 442 -+#define __NR_landlock_create_ruleset 444 -+#define __NR_landlock_add_rule 445 -+#define __NR_landlock_restrict_self 446 - - - #endif /* _ASM_X86_UNISTD_64_H */ -diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h -index 4ff6b17d3b..4edd0103ac 100644 ---- a/linux-headers/asm-x86/unistd_x32.h -+++ b/linux-headers/asm-x86/unistd_x32.h -@@ -307,6 +307,10 @@ - #define __NR_faccessat2 (__X32_SYSCALL_BIT + 439) - #define __NR_process_madvise (__X32_SYSCALL_BIT + 440) - #define __NR_epoll_pwait2 (__X32_SYSCALL_BIT + 441) -+#define __NR_mount_setattr (__X32_SYSCALL_BIT + 442) -+#define __NR_landlock_create_ruleset (__X32_SYSCALL_BIT + 444) -+#define __NR_landlock_add_rule (__X32_SYSCALL_BIT + 445) -+#define __NR_landlock_restrict_self (__X32_SYSCALL_BIT + 446) - #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) - #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) - #define __NR_ioctl (__X32_SYSCALL_BIT + 514) -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index 897f831374..20d6a263bb 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -8,6 +8,7 @@ - * Note: you must update KVM_API_VERSION if you change this interface. - */ - -+#include - #include - - #include -@@ -216,6 +217,20 @@ struct kvm_hyperv_exit { - } u; - }; - -+struct kvm_xen_exit { -+#define KVM_EXIT_XEN_HCALL 1 -+ __u32 type; -+ union { -+ struct { -+ __u32 longmode; -+ __u32 cpl; -+ __u64 input; -+ __u64 result; -+ __u64 params[6]; -+ } hcall; -+ } u; -+}; -+ - #define KVM_S390_GET_SKEYS_NONE 1 - #define KVM_S390_SKEYS_MAX 1048576 - -@@ -251,6 +266,9 @@ struct kvm_hyperv_exit { - #define KVM_EXIT_X86_RDMSR 29 - #define KVM_EXIT_X86_WRMSR 30 - #define KVM_EXIT_DIRTY_RING_FULL 31 -+#define KVM_EXIT_AP_RESET_HOLD 32 -+#define KVM_EXIT_X86_BUS_LOCK 33 -+#define KVM_EXIT_XEN 34 - - /* For KVM_EXIT_INTERNAL_ERROR */ - /* Emulate instruction failed. */ -@@ -427,6 +445,8 @@ struct kvm_run { - __u32 index; /* kernel -> user */ - __u64 data; /* kernel <-> user */ - } msr; -+ /* KVM_EXIT_XEN */ -+ struct kvm_xen_exit xen; - /* Fix the size of the union. */ - char padding[256]; - }; -@@ -573,6 +593,7 @@ struct kvm_vapic_addr { - #define KVM_MP_STATE_CHECK_STOP 6 - #define KVM_MP_STATE_OPERATING 7 - #define KVM_MP_STATE_LOAD 8 -+#define KVM_MP_STATE_AP_RESET_HOLD 9 - - struct kvm_mp_state { - __u32 mp_state; -@@ -1056,6 +1077,12 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 - #define KVM_CAP_SYS_HYPERV_CPUID 191 - #define KVM_CAP_DIRTY_LOG_RING 192 -+#define KVM_CAP_X86_BUS_LOCK_EXIT 193 -+#define KVM_CAP_PPC_DAWR1 194 -+#define KVM_CAP_SET_GUEST_DEBUG2 195 -+#define KVM_CAP_SGX_ATTRIBUTE 196 -+#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 -+#define KVM_CAP_PTP_KVM 198 - - #ifdef KVM_CAP_IRQ_ROUTING - -@@ -1129,6 +1156,11 @@ struct kvm_x86_mce { - #endif - - #ifdef KVM_CAP_XEN_HVM -+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) -+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) -+#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) -+#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) -+ - struct kvm_xen_hvm_config { - __u32 flags; - __u32 msr; -@@ -1563,6 +1595,57 @@ struct kvm_pv_cmd { - /* Available with KVM_CAP_DIRTY_LOG_RING */ - #define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) - -+/* Per-VM Xen attributes */ -+#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) -+#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) -+ -+struct kvm_xen_hvm_attr { -+ __u16 type; -+ __u16 pad[3]; -+ union { -+ __u8 long_mode; -+ __u8 vector; -+ struct { -+ __u64 gfn; -+ } shared_info; -+ __u64 pad[8]; -+ } u; -+}; -+ -+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -+#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 -+#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 -+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 -+ -+/* Per-vCPU Xen attributes */ -+#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) -+#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) -+ -+struct kvm_xen_vcpu_attr { -+ __u16 type; -+ __u16 pad[3]; -+ union { -+ __u64 gpa; -+ __u64 pad[8]; -+ struct { -+ __u64 state; -+ __u64 state_entry_time; -+ __u64 time_running; -+ __u64 time_runnable; -+ __u64 time_blocked; -+ __u64 time_offline; -+ } runstate; -+ } u; -+}; -+ -+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 -+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 -+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 -+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 -+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 -+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 -+ - /* Secure Encrypted Virtualization command */ - enum sev_cmd_id { - /* Guest initialization commands */ -@@ -1593,6 +1676,8 @@ enum sev_cmd_id { - KVM_SEV_CERT_EXPORT, - /* Attestation report */ - KVM_SEV_GET_ATTESTATION_REPORT, -+ /* Guest Migration Extension */ -+ KVM_SEV_SEND_CANCEL, - - KVM_SEV_NR_MAX, - }; -@@ -1651,6 +1736,45 @@ struct kvm_sev_attestation_report { - __u32 len; - }; - -+struct kvm_sev_send_start { -+ __u32 policy; -+ __u64 pdh_cert_uaddr; -+ __u32 pdh_cert_len; -+ __u64 plat_certs_uaddr; -+ __u32 plat_certs_len; -+ __u64 amd_certs_uaddr; -+ __u32 amd_certs_len; -+ __u64 session_uaddr; -+ __u32 session_len; -+}; -+ -+struct kvm_sev_send_update_data { -+ __u64 hdr_uaddr; -+ __u32 hdr_len; -+ __u64 guest_uaddr; -+ __u32 guest_len; -+ __u64 trans_uaddr; -+ __u32 trans_len; -+}; -+ -+struct kvm_sev_receive_start { -+ __u32 handle; -+ __u32 policy; -+ __u64 pdh_uaddr; -+ __u32 pdh_len; -+ __u64 session_uaddr; -+ __u32 session_len; -+}; -+ -+struct kvm_sev_receive_update_data { -+ __u64 hdr_uaddr; -+ __u32 hdr_len; -+ __u64 guest_uaddr; -+ __u32 guest_len; -+ __u64 trans_uaddr; -+ __u32 trans_len; -+}; -+ - #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) - #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) - #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) -@@ -1756,8 +1880,8 @@ struct kvm_hyperv_eventfd { - * conversion after harvesting an entry. Also, it must not skip any - * dirty bits, so that dirty bits are always harvested in sequence. - */ --#define KVM_DIRTY_GFN_F_DIRTY BIT(0) --#define KVM_DIRTY_GFN_F_RESET BIT(1) -+#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0) -+#define KVM_DIRTY_GFN_F_RESET _BITUL(1) - #define KVM_DIRTY_GFN_F_MASK 0x3 - - /* -@@ -1772,4 +1896,7 @@ struct kvm_dirty_gfn { - __u64 offset; - }; - -+#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) -+#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) -+ - #endif /* __LINUX_KVM_H */ -diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h -index 1ba9a9feeb..b9ac97b70f 100644 ---- a/linux-headers/linux/userfaultfd.h -+++ b/linux-headers/linux/userfaultfd.h -@@ -19,15 +19,19 @@ - * means the userland is reading). - */ - #define UFFD_API ((__u64)0xAA) -+#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ -+ UFFDIO_REGISTER_MODE_WP | \ -+ UFFDIO_REGISTER_MODE_MINOR) - #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ - UFFD_FEATURE_EVENT_FORK | \ - UFFD_FEATURE_EVENT_REMAP | \ -- UFFD_FEATURE_EVENT_REMOVE | \ -+ UFFD_FEATURE_EVENT_REMOVE | \ - UFFD_FEATURE_EVENT_UNMAP | \ - UFFD_FEATURE_MISSING_HUGETLBFS | \ - UFFD_FEATURE_MISSING_SHMEM | \ - UFFD_FEATURE_SIGBUS | \ -- UFFD_FEATURE_THREAD_ID) -+ UFFD_FEATURE_THREAD_ID | \ -+ UFFD_FEATURE_MINOR_HUGETLBFS) - #define UFFD_API_IOCTLS \ - ((__u64)1 << _UFFDIO_REGISTER | \ - (__u64)1 << _UFFDIO_UNREGISTER | \ -@@ -36,10 +40,12 @@ - ((__u64)1 << _UFFDIO_WAKE | \ - (__u64)1 << _UFFDIO_COPY | \ - (__u64)1 << _UFFDIO_ZEROPAGE | \ -- (__u64)1 << _UFFDIO_WRITEPROTECT) -+ (__u64)1 << _UFFDIO_WRITEPROTECT | \ -+ (__u64)1 << _UFFDIO_CONTINUE) - #define UFFD_API_RANGE_IOCTLS_BASIC \ - ((__u64)1 << _UFFDIO_WAKE | \ -- (__u64)1 << _UFFDIO_COPY) -+ (__u64)1 << _UFFDIO_COPY | \ -+ (__u64)1 << _UFFDIO_CONTINUE) - - /* - * Valid ioctl command number range with this API is from 0x00 to -@@ -55,6 +61,7 @@ - #define _UFFDIO_COPY (0x03) - #define _UFFDIO_ZEROPAGE (0x04) - #define _UFFDIO_WRITEPROTECT (0x06) -+#define _UFFDIO_CONTINUE (0x07) - #define _UFFDIO_API (0x3F) - - /* userfaultfd ioctl ids */ -@@ -73,6 +80,8 @@ - struct uffdio_zeropage) - #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ - struct uffdio_writeprotect) -+#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \ -+ struct uffdio_continue) - - /* read() structure */ - struct uffd_msg { -@@ -127,6 +136,7 @@ struct uffd_msg { - /* flags for UFFD_EVENT_PAGEFAULT */ - #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ - #define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ -+#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ - - struct uffdio_api { - /* userland asks for an API number and the features to enable */ -@@ -171,6 +181,10 @@ struct uffdio_api { - * - * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will - * be returned, if feature is not requested 0 will be returned. -+ * -+ * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults -+ * can be intercepted (via REGISTER_MODE_MINOR) for -+ * hugetlbfs-backed pages. - */ - #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) - #define UFFD_FEATURE_EVENT_FORK (1<<1) -@@ -181,6 +195,7 @@ struct uffdio_api { - #define UFFD_FEATURE_EVENT_UNMAP (1<<6) - #define UFFD_FEATURE_SIGBUS (1<<7) - #define UFFD_FEATURE_THREAD_ID (1<<8) -+#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) - __u64 features; - - __u64 ioctls; -@@ -195,6 +210,7 @@ struct uffdio_register { - struct uffdio_range range; - #define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) - #define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) -+#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) - __u64 mode; - - /* -@@ -257,6 +273,18 @@ struct uffdio_writeprotect { - __u64 mode; - }; - -+struct uffdio_continue { -+ struct uffdio_range range; -+#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) -+ __u64 mode; -+ -+ /* -+ * Fields below here are written by the ioctl and must be at the end: -+ * the copy_from_user will not read past here. -+ */ -+ __s64 mapped; -+}; -+ - /* - * Flags for the userfaultfd(2) system call itself. - */ -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index 609099e455..e680594f27 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -46,6 +46,12 @@ - */ - #define VFIO_NOIOMMU_IOMMU 8 - -+/* Supports VFIO_DMA_UNMAP_FLAG_ALL */ -+#define VFIO_UNMAP_ALL 9 -+ -+/* Supports the vaddr flag for DMA map and unmap */ -+#define VFIO_UPDATE_VADDR 10 -+ - /* - * The IOCTL interface is designed for extensibility by embedding the - * structure length (argsz) and flags into structures passed between -@@ -329,6 +335,8 @@ struct vfio_region_info_cap_type { - /* 10de vendor PCI sub-types */ - /* - * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. -+ * -+ * Deprecated, region no longer provided - */ - #define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) - -@@ -336,6 +344,8 @@ struct vfio_region_info_cap_type { - /* - * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU - * to do TLB invalidation on a GPU. -+ * -+ * Deprecated, region no longer provided - */ - #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) - -@@ -635,6 +645,8 @@ struct vfio_device_migration_info { - * Capability with compressed real address (aka SSA - small system address) - * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing - * and by the userspace to associate a NVLink bridge with a GPU. -+ * -+ * Deprecated, capability no longer provided - */ - #define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4 - -@@ -649,6 +661,8 @@ struct vfio_region_info_cap_nvlink2_ssatgt { - * property in the device tree. The value is fixed in the hardware - * and failing to provide the correct value results in the link - * not working with no indication from the driver why. -+ * -+ * Deprecated, capability no longer provided - */ - #define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5 - -@@ -1074,12 +1088,22 @@ struct vfio_iommu_type1_info_dma_avail { - * - * Map process virtual addresses to IO virtual addresses using the - * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. -+ * -+ * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and -+ * unblock translation of host virtual addresses in the iova range. The vaddr -+ * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To -+ * maintain memory consistency within the user application, the updated vaddr -+ * must address the same memory object as originally mapped. Failure to do so -+ * will result in user memory corruption and/or device misbehavior. iova and -+ * size must match those in the original MAP_DMA call. Protection is not -+ * changed, and the READ & WRITE flags must be 0. - */ - struct vfio_iommu_type1_dma_map { - __u32 argsz; - __u32 flags; - #define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */ - #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */ -+#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2) - __u64 vaddr; /* Process virtual address */ - __u64 iova; /* IO virtual address */ - __u64 size; /* Size of mapping (bytes) */ -@@ -1102,6 +1126,7 @@ struct vfio_bitmap { - * field. No guarantee is made to the user that arbitrary unmaps of iova - * or size different from those used in the original mapping call will - * succeed. -+ * - * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap - * before unmapping IO virtual addresses. When this flag is set, the user must - * provide a struct vfio_bitmap in data[]. User must provide zero-allocated -@@ -1111,11 +1136,21 @@ struct vfio_bitmap { - * indicates that the page at that offset from iova is dirty. A Bitmap of the - * pages in the range of unmapped size is returned in the user-provided - * vfio_bitmap.data. -+ * -+ * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size -+ * must be 0. This cannot be combined with the get-dirty-bitmap flag. -+ * -+ * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host -+ * virtual addresses in the iova range. Tasks that attempt to translate an -+ * iova's vaddr will block. DMA to already-mapped pages continues. This -+ * cannot be combined with the get-dirty-bitmap flag. - */ - struct vfio_iommu_type1_dma_unmap { - __u32 argsz; - __u32 flags; - #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) -+#define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1) -+#define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2) - __u64 iova; /* IO virtual address */ - __u64 size; /* Size of mapping (bytes) */ - __u8 data[]; --- -2.27.0 - diff --git a/kvm-aarch64-Add-USB-storage-devices.patch b/kvm-aarch64-Add-USB-storage-devices.patch deleted file mode 100644 index 4059db2..0000000 --- a/kvm-aarch64-Add-USB-storage-devices.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 7fd7892a21cf930f3d44dc3205bed9fb9128c11c Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Wed, 4 Aug 2021 07:10:15 -0400 -Subject: [PATCH 23/39] aarch64: Add USB storage devices - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [15/15] 7706801381d8dfc97231fa87f6a7c8de7e3c8e84 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -There's no reason not to support these devices and doing so allows us -to match x86 and ppc. Also to match the other architectures we do not -enable CONFIG_USB_STORAGE_BOT, as that was disabled for the other -architectures for BZ1866133. - -Signed-off-by: Andrew Jones -Signed-off-by: Miroslav Rezanina ---- - default-configs/devices/aarch64-rh-devices.mak | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak -index d8ce902720..a5bab23925 100644 ---- a/default-configs/devices/aarch64-rh-devices.mak -+++ b/default-configs/devices/aarch64-rh-devices.mak -@@ -15,6 +15,8 @@ CONFIG_SEMIHOSTING=y - CONFIG_USB=y - CONFIG_USB_XHCI=y - CONFIG_USB_XHCI_PCI=y -+CONFIG_USB_STORAGE_CORE=y -+CONFIG_USB_STORAGE_CLASSIC=y - CONFIG_VFIO=y - CONFIG_VFIO_PCI=y - CONFIG_VIRTIO_MMIO=y --- -2.27.0 - diff --git a/kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch b/kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch deleted file mode 100644 index 800b567..0000000 --- a/kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 5cc3aacc241c0d26f63d51efd1b6fa35490d37c9 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 26 May 2021 10:33:20 -0400 -Subject: [PATCH 03/15] aarch64-rh-devices: add CONFIG_PVPANIC_PCI -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 2: aarch64-rh-devices: add CONFIG_PVPANIC_PCI -RH-Commit: [1/1] f3d0a94a91ea1b3fff925f32affce1b77469e206 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1747467 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones - -We want to enable the PVPANIC facility on ARM. On aarch64, -the PVPANIC PCI device is used (on x86_64 the ISA device is used). -so let's set the PVPANIC_PCI config. - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - default-configs/devices/aarch64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak -index a4d67274c0..4220469178 100644 ---- a/default-configs/devices/aarch64-rh-devices.mak -+++ b/default-configs/devices/aarch64-rh-devices.mak -@@ -26,3 +26,4 @@ CONFIG_TPM_EMULATOR=y - CONFIG_TPM_TIS_SYSBUS=y - CONFIG_PTIMER=y - CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y -+CONFIG_PVPANIC_PCI=y --- -2.27.0 - diff --git a/kvm-aarch64-rh-devices-add-CONFIG_PXB.patch b/kvm-aarch64-rh-devices-add-CONFIG_PXB.patch deleted file mode 100644 index 37b85fb..0000000 --- a/kvm-aarch64-rh-devices-add-CONFIG_PXB.patch +++ /dev/null @@ -1,37 +0,0 @@ -From d05ba1e2208cb17b8cf7dac050d95137a67dd988 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 24 Jun 2021 10:32:08 +0200 -Subject: [PATCH 01/12] aarch64-rh-devices: add CONFIG_PXB -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 14: aarch64-rh-devices: add CONFIG_PXB -RH-Commit: [1/1] 6a9e6a96ea6ba1bee220a60e5a691a174a0a044b (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1967502 -RH-Acked-by: Gavin Shan -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Andrew Jones - -We want to enable the PCIe expander bridge on aarch64. So let's -set CONFIG_PXB. - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - default-configs/devices/aarch64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/default-configs/devices/aarch64-rh-devices.mak b/default-configs/devices/aarch64-rh-devices.mak -index 4220469178..d8ce902720 100644 ---- a/default-configs/devices/aarch64-rh-devices.mak -+++ b/default-configs/devices/aarch64-rh-devices.mak -@@ -27,3 +27,4 @@ CONFIG_TPM_TIS_SYSBUS=y - CONFIG_PTIMER=y - CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y - CONFIG_PVPANIC_PCI=y -+CONFIG_PXB=y --- -2.27.0 - diff --git a/kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch b/kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch deleted file mode 100644 index bff1686..0000000 --- a/kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 456bb6cb658b9d332fa0b5b91946916b48ed449e Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Thu, 29 Jul 2021 07:42:10 -0400 -Subject: [PATCH 09/39] acpi: pc: revert back to v5.2 PCI slot enumeration - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [1/15] 57222343ccae17b99b4e166798d4d0eecca2e22b (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -Commit [1] moved _SUN variable from only hot-pluggable to -all devices. This made linux kernel enumerate extra slots -that weren't present before. If extra slot happens to be -be enumerated first and there is a device in th same slot -but on other bridge, linux kernel will add -N suffix to -slot name of the later, thus changing NIC name compared to -QEMU 5.2. This in some case confuses systemd, if it is -using SLOT NIC naming scheme and interface name becomes -not the same as it was under QEMU-5.2. - -Reproducer QEMU CLI: - -M pc-i440fx-5.2 -nodefaults \ - -device pci-bridge,chassis_nr=1,id=pci.1,bus=pci.0,addr=0x3 \ - -device virtio-net-pci,id=nic1,bus=pci.1,addr=0x1 \ - -device virtio-net-pci,id=nic2,bus=pci.1,addr=0x2 \ - -device virtio-net-pci,id=nic3,bus=pci.1,addr=0x3 - -with RHEL8 guest produces following results: - v5.2: - kernel: virtio_net virtio0 ens1: renamed from eth0 - kernel: virtio_net virtio2 ens3: renamed from eth2 - kernel: virtio_net virtio1 enp1s2: renamed from eth1 - (slot 2 is assigned to empty bus 0 slot and virtio1 - is assigned to 2-2 slot, and renaming falls back, - for some reason, to path based naming scheme) - - v6.0: - kernel: virtio_net virtio0 ens1: renamed from eth0 - kernel: virtio_net virtio2 ens3: renamed from eth2 - systemd-udevd[299]: Error changing net interface name 'eth1' to 'ens3': File exists - systemd-udevd[299]: could not rename interface '3' from 'eth1' to 'ens3': File exists - (with commit [1] kernel assigns virtio2 to 3-2 slot - since bridge advertises _SUN=0x3 and kernel assigns - slot 3 to bridge. Still it manages to rename virtio2 - correctly to ens3, however systemd gets confused with virtio1 - where slot allocation exactly the same (2-2) as in 5.2 case - and tries to rename it to ens3 which is rightfully taken by - virtio2) - -I'm not sure what breaks in systemd interface renaming (it probably -should be investigated), but on QEMU side we can safely revert -_SUN to 5.2 behavior (i.e. avoid cold-plugged bridges and non -hot-pluggable device classes), without breaking acpi-index, which uses -slot numbers but it doesn't have to use _SUN, it could use an arbitrary -variable name that has the same slot value). -It will help existing VMs to keep networking with non trivial -configs in working order since systemd will do its interface -renaming magic as it used to do. - -1) -Fixes: b7f23f62e40 (pci: acpi: add _DSM method to PCI devices) -Signed-off-by: Igor Mammedov -Message-Id: <20210624204229.998824-3-imammedo@redhat.com> -Reviewed-by: Stefan Hajnoczi -Tested-by: John Sucaet -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 7193d7cdd93e50f0e5f09803b98d27d3f9b147ac) -Signed-off-by: Igor Mammedov -Signed-off-by: Miroslav Rezanina ---- - hw/i386/acpi-build.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index de98750aef..dbee0cd3bc 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -432,11 +432,15 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, - aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); - - if (bsel) { -- aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); -+ /* -+ * Can't declare _SUN here for every device as it changes 'slot' -+ * enumeration order in linux kernel, so use another variable for it -+ */ -+ aml_append(dev, aml_name_decl("ASUN", aml_int(slot))); - method = aml_method("_DSM", 4, AML_SERIALIZED); - aml_append(method, aml_return( - aml_call6("PDSM", aml_arg(0), aml_arg(1), aml_arg(2), -- aml_arg(3), aml_name("BSEL"), aml_name("_SUN")) -+ aml_arg(3), aml_name("BSEL"), aml_name("ASUN")) - )); - aml_append(dev, method); - } -@@ -463,6 +467,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, - aml_append(method, aml_return(aml_int(s3d))); - aml_append(dev, method); - } else if (hotplug_enabled_dev) { -+ aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); - /* add _EJ0 to make slot hotpluggable */ - method = aml_method("_EJ0", 1, AML_NOTSERIALIZED); - aml_append(method, --- -2.27.0 - diff --git a/kvm-arm-virt-Enable-ARM-RAS-support.patch b/kvm-arm-virt-Enable-ARM-RAS-support.patch deleted file mode 100644 index a3881b8..0000000 --- a/kvm-arm-virt-Enable-ARM-RAS-support.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 170a9c7dc044a0094b48c658e0d57c97c4b854e0 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 30 Jun 2021 13:38:03 +0200 -Subject: [PATCH 03/13] arm/virt: Enable ARM RAS support - -RH-Author: Eric Auger -RH-MergeRequest: 19: arm/virt: Support RAS -RH-Commit: [3/3] 1572368bb0f47463a1f6ffa3f5baa97242440c98 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1838608 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Igor Mammedov - -We want to support ARM RAS (Reliability, Availability & Serviceability). -So let's register the RAS property as a class property. It is -unset by default. - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index fe1111d527..0084935ec8 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2281,7 +2281,6 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, - visit_type_OnOffAuto(v, name, &vms->acpi, errp); - } - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_ras(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2296,6 +2295,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) - vms->ras = value; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_mte(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -3013,6 +3013,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Set the IOMMU type. " - "Valid values are none and smmuv3"); - -+ object_class_property_add_bool(oc, "ras", virt_get_ras, -+ virt_set_ras); -+ object_class_property_set_description(oc, "ras", -+ "Set on/off to enable/disable reporting host memory errors " -+ "to a KVM guest using ACPI and guest external abort exceptions"); -+ - object_class_property_add_bool(oc, "its", virt_get_its, - virt_set_its); - object_class_property_set_description(oc, "its", -@@ -3063,7 +3069,7 @@ static void rhel_virt_instance_init(Object *obj) - /* Default disallows iommu instantiation */ - vms->iommu = VIRT_IOMMU_NONE; - -- /* Default disallows RAS instantiation and is non-configurable for RHEL */ -+ /* Default disallows RAS instantiation */ - vms->ras = false; - - /* MTE is disabled by default and non-configurable for RHEL */ --- -2.27.0 - diff --git a/kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch b/kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch deleted file mode 100644 index 52641bd..0000000 --- a/kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch +++ /dev/null @@ -1,77 +0,0 @@ -From e808acaa50effe471c56a48b80d5e0d2d196b495 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 25 May 2021 09:22:23 +0200 -Subject: [PATCH 01/15] arm/virt: Register highmem and gic-version as class - properties - -RH-Author: Eric Auger -RH-MergeRequest: 1: Add 9.0.0 and 8.5.0 arm-virt machine types -RH-Commit: [1/2] 1ff3970773e09f2efb194430511928ae852c02ba (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1952449 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones - -This mirrors changes made in commit -b91def7b8382 ("rm/virt: Register most properties as class properties") -for the highmem and gic-version properties. This makes the -code easier to diff against upstream. - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 23 ++++++++++++----------- - 1 file changed, 12 insertions(+), 11 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 080cf54ef1..51a415570c 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2996,6 +2996,18 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - object_class_property_set_description(oc, "acpi", - "Enable ACPI"); - -+ object_class_property_add_bool(oc, "highmem", virt_get_highmem, -+ virt_set_highmem); -+ object_class_property_set_description(oc, "highmem", -+ "Set on/off to enable/disable using " -+ "physical address space above 32 bits"); -+ -+ object_class_property_add_str(oc, "gic-version", virt_get_gic_version, -+ virt_set_gic_version); -+ object_class_property_set_description(oc, "gic-version", -+ "Set GIC version. " -+ "Valid values are 2, 3, host and max"); -+ - object_class_property_add_str(oc, "x-oem-id", - virt_get_oem_id, - virt_set_oem_id); -@@ -3004,7 +3016,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "in ACPI table header." - "The string may be up to 6 bytes in size"); - -- - object_class_property_add_str(oc, "x-oem-table-id", - virt_get_oem_table_id, - virt_set_oem_table_id); -@@ -3027,17 +3038,7 @@ static void rhel_virt_instance_init(Object *obj) - - /* High memory is enabled by default */ - vms->highmem = true; -- object_property_add_bool(obj, "highmem", virt_get_highmem, -- virt_set_highmem); -- object_property_set_description(obj, "highmem", -- "Set on/off to enable/disable using " -- "physical address space above 32 bits"); - vms->gic_version = VIRT_GIC_VERSION_NOSEL; -- object_property_add_str(obj, "gic-version", virt_get_gic_version, -- virt_set_gic_version); -- object_property_set_description(obj, "gic-version", -- "Set GIC version. " -- "Valid values are 2, 3, host and max"); - - vms->highmem_ecam = !vmc->no_highmem_ecam; - --- -2.27.0 - diff --git a/kvm-arm-virt-Register-iommu-as-a-class-property.patch b/kvm-arm-virt-Register-iommu-as-a-class-property.patch deleted file mode 100644 index 5235851..0000000 --- a/kvm-arm-virt-Register-iommu-as-a-class-property.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 42e530c513914b83273ec8c6a29100eb3c737604 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 30 Jun 2021 07:28:38 -0400 -Subject: [PATCH 01/13] arm/virt: Register iommu as a class property - -RH-Author: Eric Auger -RH-MergeRequest: 19: arm/virt: Support RAS -RH-Commit: [1/3] 48428de4deb5af94891aa5552fb9f4ea6d69acef (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1838608 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Igor Mammedov - -As done for highmem and gic-version, let's register the iommu property -as a class property. This is closer to the upstream code. This -change was originally made by upstream commit: -b91def7b8382 ("arm/virt: Register most properties as class properties") - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 9 +++++---- - 1 file changed, 5 insertions(+), 4 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e4aa794f83..bdae24a753 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3008,6 +3008,11 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Set GIC version. " - "Valid values are 2, 3, host and max"); - -+ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); -+ object_class_property_set_description(oc, "iommu", -+ "Set the IOMMU type. " -+ "Valid values are none and smmuv3"); -+ - object_class_property_add_str(oc, "x-oem-id", - virt_get_oem_id, - virt_set_oem_id); -@@ -3056,10 +3061,6 @@ static void rhel_virt_instance_init(Object *obj) - - /* Default disallows iommu instantiation */ - vms->iommu = VIRT_IOMMU_NONE; -- object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); -- object_property_set_description(obj, "iommu", -- "Set the IOMMU type. " -- "Valid values are none and smmuv3"); - - /* Default disallows RAS instantiation and is non-configurable for RHEL */ - vms->ras = false; --- -2.27.0 - diff --git a/kvm-arm-virt-Register-its-as-a-class-property.patch b/kvm-arm-virt-Register-its-as-a-class-property.patch deleted file mode 100644 index a0f9ff7..0000000 --- a/kvm-arm-virt-Register-its-as-a-class-property.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 29d18bdaf3dad52a052b2b058cd8f74652a72f76 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 8 Jul 2021 05:14:47 -0400 -Subject: [PATCH 02/13] arm/virt: Register its as a class property - -RH-Author: Eric Auger -RH-MergeRequest: 19: arm/virt: Support RAS -RH-Commit: [2/3] 95debacfd59d06ed2dcaeb120b20a280ff515434 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1838608 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Igor Mammedov - -As done for highmem, gic-version, iommu, let's register the its property -as a class property. This is closer to the upstream code. This -change was originally made by upstream commit: -b91def7b8382 ("arm/virt: Register most properties as class properties") - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index bdae24a753..fe1111d527 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3013,6 +3013,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Set the IOMMU type. " - "Valid values are none and smmuv3"); - -+ object_class_property_add_bool(oc, "its", virt_get_its, -+ virt_set_its); -+ object_class_property_set_description(oc, "its", -+ "Set on/off to enable/disable " -+ "ITS instantiation"); -+ - object_class_property_add_str(oc, "x-oem-id", - virt_get_oem_id, - virt_set_oem_id); -@@ -3052,11 +3058,6 @@ static void rhel_virt_instance_init(Object *obj) - } else { - /* Default allows ITS instantiation */ - vms->its = true; -- object_property_add_bool(obj, "its", virt_get_its, -- virt_set_its); -- object_property_set_description(obj, "its", -- "Set on/off to enable/disable " -- "ITS instantiation"); - } - - /* Default disallows iommu instantiation */ --- -2.27.0 - diff --git a/kvm-audio-Never-send-migration-section.patch b/kvm-audio-Never-send-migration-section.patch deleted file mode 100644 index 474612d..0000000 --- a/kvm-audio-Never-send-migration-section.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 1e69dbe01e1cad1680723e1bc086cc52a1772c17 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 11 Aug 2021 08:40:38 -0400 -Subject: [PATCH 27/39] audio: Never send migration section -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 35: Synchronize with RHEL-AV 8.5 release 28 to RHEL 9 -RH-Commit: [4/4] 0b6ec114a3573ac7efcbe5ab3094d8020899c82d (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Philippe Mathieu-Daudé - -The audio migration vmstate is empty, and always has been; we can't -just remove it though because an old qemu might send it us. -Changes with -audiodev now mean it's sometimes created when it didn't -used to be, and can confuse migration to old qemu. - -Change it so that vmstate_audio is never sent; if it's received it -should still be accepted, and old qemu's shouldn't be too upset if it's -missing. - -Signed-off-by: Dr. David Alan Gilbert -Reviewed-by: Daniel P. Berrangé -Tested-by: Daniel P. Berrangé -Message-Id: <20210809170956.78536-1-dgilbert@redhat.com> -Signed-off-by: Gerd Hoffmann -(cherry picked from commit da77adbaf619c4d170cb42d769145ad1803fbad9) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - audio/audio.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/audio/audio.c b/audio/audio.c -index 534278edfe..fa724ea8e0 100644 ---- a/audio/audio.c -+++ b/audio/audio.c -@@ -1621,10 +1621,20 @@ void audio_cleanup(void) - } - } - -+static bool vmstate_audio_needed(void *opaque) -+{ -+ /* -+ * Never needed, this vmstate only exists in case -+ * an old qemu sends it to us. -+ */ -+ return false; -+} -+ - static const VMStateDescription vmstate_audio = { - .name = "audio", - .version_id = 1, - .minimum_version_id = 1, -+ .needed = vmstate_audio_needed, - .fields = (VMStateField[]) { - VMSTATE_END_OF_LIST() - } --- -2.27.0 - diff --git a/kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch b/kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch deleted file mode 100644 index 4934ffe..0000000 --- a/kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 0739f735f99a6f1760a422023c262c1aa542a2e5 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 9 Jul 2021 18:41:41 +0200 -Subject: [PATCH 19/43] block: Add option to use driver whitelist even in tools -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 26: block: Disable unsupported/read-only block drivers even in tools -RH-Commit: [1/2] 6755d5ff4ef43f275ae530de2b2a568ffd2d3497 (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 1957782 -RH-Acked-by: Max Reitz -RH-Acked-by: Richard W.M. Jones -RH-Acked-by: Philippe Mathieu-Daudé - -Currently, the block driver whitelists are only applied for the system -emulator. All other binaries still give unrestricted access to all block -drivers. There are use cases where this made sense because the main -concern was avoiding customers running VMs on less optimised block -drivers and getting bad performance. Allowing the same image format e.g. -as a target for 'qemu-img convert' is not a problem then. - -However, if the concern is the supportability of the driver in general, -either in full or when used read-write, not applying the list driver -whitelist in tools doesn't help - especially since qemu-nbd and -qemu-storage-daemon now give access to more or less the same operations -in block drivers as running a system emulator. - -In order to address this, introduce a new configure option that enforces -the driver whitelist in all binaries. - -Signed-off-by: Kevin Wolf -Message-Id: <20210709164141.254097-1-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit e5f05f8c375157211c7da625a0d3f3ccdb4957d5) -Signed-off-by: Kevin Wolf ---- - block.c | 3 +++ - configure | 14 ++++++++++++-- - meson.build | 1 + - 3 files changed, 16 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index c5b887cec1..76ecede5af 100644 ---- a/block.c -+++ b/block.c -@@ -5817,6 +5817,9 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, - - void bdrv_init(void) - { -+#ifdef CONFIG_BDRV_WHITELIST_TOOLS -+ use_bdrv_whitelist = 1; -+#endif - module_call_init(MODULE_INIT_BLOCK); - } - -diff --git a/configure b/configure -index 53b2fa583a..7edc08afb3 100755 ---- a/configure -+++ b/configure -@@ -243,6 +243,7 @@ cross_prefix="" - audio_drv_list="" - block_drv_rw_whitelist="" - block_drv_ro_whitelist="" -+block_drv_whitelist_tools="no" - host_cc="cc" - audio_win_int="" - libs_qga="" -@@ -1029,6 +1030,10 @@ for opt do - ;; - --block-drv-ro-whitelist=*) block_drv_ro_whitelist=$(echo "$optarg" | sed -e 's/,/ /g') - ;; -+ --enable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="yes" -+ ;; -+ --disable-block-drv-whitelist-in-tools) block_drv_whitelist_tools="no" -+ ;; - --enable-debug-tcg) debug_tcg="yes" - ;; - --disable-debug-tcg) debug_tcg="no" -@@ -1764,10 +1769,12 @@ Advanced options (experts only): - --block-drv-whitelist=L Same as --block-drv-rw-whitelist=L - --block-drv-rw-whitelist=L - set block driver read-write whitelist -- (affects only QEMU, not qemu-img) -+ (by default affects only QEMU, not tools like qemu-img) - --block-drv-ro-whitelist=L - set block driver read-only whitelist -- (affects only QEMU, not qemu-img) -+ (by default affects only QEMU, not tools like qemu-img) -+ --enable-block-drv-whitelist-in-tools -+ use block whitelist also in tools instead of only QEMU - --enable-trace-backends=B Set trace backend - Available backends: $trace_backend_list - --with-trace-file=NAME Full PATH,NAME of file to store traces -@@ -5571,6 +5578,9 @@ if test "$audio_win_int" = "yes" ; then - fi - echo "CONFIG_BDRV_RW_WHITELIST=$block_drv_rw_whitelist" >> $config_host_mak - echo "CONFIG_BDRV_RO_WHITELIST=$block_drv_ro_whitelist" >> $config_host_mak -+if test "$block_drv_whitelist_tools" = "yes" ; then -+ echo "CONFIG_BDRV_WHITELIST_TOOLS=y" >> $config_host_mak -+fi - if test "$xfs" = "yes" ; then - echo "CONFIG_XFS=y" >> $config_host_mak - fi -diff --git a/meson.build b/meson.build -index 06c15bd6d2..49b8164ade 100644 ---- a/meson.build -+++ b/meson.build -@@ -2606,6 +2606,7 @@ summary_info += {'coroutine pool': config_host['CONFIG_COROUTINE_POOL'] == '1 - if have_block - summary_info += {'Block whitelist (rw)': config_host['CONFIG_BDRV_RW_WHITELIST']} - summary_info += {'Block whitelist (ro)': config_host['CONFIG_BDRV_RO_WHITELIST']} -+ summary_info += {'Use block whitelist in tools': config_host.has_key('CONFIG_BDRV_WHITELIST_TOOLS')} - summary_info += {'VirtFS support': have_virtfs} - summary_info += {'build virtiofs daemon': have_virtiofsd} - summary_info += {'Live block migration': config_host.has_key('CONFIG_LIVE_BLOCK_MIGRATION')} --- -2.27.0 - diff --git a/kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch b/kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch deleted file mode 100644 index 6545209..0000000 --- a/kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 872e82621b1341e8b96bda47f7f43dfffd356249 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 27 Jul 2021 17:49:23 +0200 -Subject: [PATCH 04/13] block: Fix in_flight leak in request padding error path - -RH-Author: Kevin Wolf -RH-MergeRequest: 31: block: Fix in_flight leak in request padding error path -RH-Commit: [1/1] a0d1bf38d9a69818cd6cefc3779f2988b484605a (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 1972079 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Max Reitz - -When bdrv_pad_request() fails in bdrv_co_preadv_part(), bs->in_flight -has been increased, but is never decreased again. This leads to a hang -when trying to drain the block node. - -This bug was observed with Windows guests which issue a request that -fully uses IOV_MAX during installation, so that when padding is -necessary (O_DIRECT with a 4k sector size block device on the host), -adding another entry causes failure. - -Call bdrv_dec_in_flight() to fix this. There is a larger problem to -solve here because this request shouldn't even fail, but Windows doesn't -seem to care and with this minimal fix the installation succeeds. So -given that we're already in freeze, let's take this minimal fix for 6.1. - -Fixes: 98ca45494fcd6bf0336ecd559e440b6de6ea4cd3 -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1972079 -Reported-by: Qing Wang -Signed-off-by: Kevin Wolf -Message-Id: <20210727154923.91067-1-kwolf@redhat.com> -Reviewed-by: Max Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 87ab88025247b893aad5071fd38301b67be76d1a) -Signed-off-by: Kevin Wolf -Signed-off-by: Miroslav Rezanina ---- - block/io.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/block/io.c b/block/io.c -index a4b2e3adf1..5033d51334 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1811,7 +1811,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, - NULL); - if (ret < 0) { -- return ret; -+ goto fail; - } - - tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); -@@ -1819,10 +1819,11 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - bs->bl.request_alignment, - qiov, qiov_offset, flags); - tracked_request_end(&req); -- bdrv_dec_in_flight(bs); -- - bdrv_padding_destroy(&pad); - -+fail: -+ bdrv_dec_in_flight(bs); -+ - return ret; - } - --- -2.27.0 - diff --git a/kvm-block-add-max_hw_transfer-to-BlockLimits.patch b/kvm-block-add-max_hw_transfer-to-BlockLimits.patch deleted file mode 100644 index 75f1b5d..0000000 --- a/kvm-block-add-max_hw_transfer-to-BlockLimits.patch +++ /dev/null @@ -1,131 +0,0 @@ -From 6773549977d94c504ec76aed67506ae85adff973 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 16 Jul 2021 16:51:33 -0400 -Subject: [PATCH 17/43] block: add max_hw_transfer to BlockLimits - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -For block host devices, I/O can happen through either the kernel file -descriptor I/O system calls (preadv/pwritev, io_submit, io_uring) -or the SCSI passthrough ioctl SG_IO. - -In the latter case, the size of each transfer can be limited by the -HBA, while for file descriptor I/O the kernel is able to split and -merge I/O in smaller pieces as needed. Applying the HBA limits to -file descriptor I/O results in more system calls and suboptimal -performance, so this patch splits the max_transfer limit in two: -max_transfer remains valid and is used in general, while max_hw_transfer -is limited to the maximum hardware size. max_hw_transfer can then be -included by the scsi-generic driver in the block limits page, to ensure -that the stricter hardware limit is used. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 24b36e9813ec15da7db62e3b3621730710c5f020) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - block/block-backend.c | 13 +++++++++++++ - block/file-posix.c | 2 +- - block/io.c | 2 ++ - hw/scsi/scsi-generic.c | 2 +- - include/block/block_int.h | 7 +++++++ - include/sysemu/block-backend.h | 1 + - 6 files changed, 25 insertions(+), 2 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 136cc602c5..b5f5b4b048 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1939,6 +1939,19 @@ uint32_t blk_get_request_alignment(BlockBackend *blk) - return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE; - } - -+/* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */ -+uint64_t blk_get_max_hw_transfer(BlockBackend *blk) -+{ -+ BlockDriverState *bs = blk_bs(blk); -+ uint64_t max = INT_MAX; -+ -+ if (bs) { -+ max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer); -+ max = MIN_NON_ZERO(max, bs->bl.max_transfer); -+ } -+ return ROUND_DOWN(max, blk_get_request_alignment(blk)); -+} -+ - /* Returns the maximum transfer length, in bytes; guaranteed nonzero */ - uint32_t blk_get_max_transfer(BlockBackend *blk) - { -diff --git a/block/file-posix.c b/block/file-posix.c -index 1a6c799e19..44325a635d 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1258,7 +1258,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) - int ret = sg_get_max_transfer_length(s->fd); - - if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { -- bs->bl.max_transfer = pow2floor(ret); -+ bs->bl.max_hw_transfer = pow2floor(ret); - } - - ret = sg_get_max_segments(s->fd); -diff --git a/block/io.c b/block/io.c -index ca2dca3007..a4b2e3adf1 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -126,6 +126,8 @@ static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) - { - dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); - dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer); -+ dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer, -+ src->max_hw_transfer); - dst->opt_mem_alignment = MAX(dst->opt_mem_alignment, - src->opt_mem_alignment); - dst->min_mem_alignment = MAX(dst->min_mem_alignment, -diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c -index 82e1e2ee79..3762dce749 100644 ---- a/hw/scsi/scsi-generic.c -+++ b/hw/scsi/scsi-generic.c -@@ -179,7 +179,7 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) - (r->req.cmd.buf[1] & 0x01)) { - page = r->req.cmd.buf[2]; - if (page == 0xb0) { -- uint32_t max_transfer = blk_get_max_transfer(s->conf.blk); -+ uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); - uint32_t max_iov = blk_get_max_iov(s->conf.blk); - - assert(max_transfer); -diff --git a/include/block/block_int.h b/include/block/block_int.h -index 88e4111939..09d8630ec4 100644 ---- a/include/block/block_int.h -+++ b/include/block/block_int.h -@@ -695,6 +695,13 @@ typedef struct BlockLimits { - * clamped down. */ - uint32_t max_transfer; - -+ /* Maximal hardware transfer length in bytes. Applies whenever -+ * transfers to the device bypass the kernel I/O scheduler, for -+ * example with SG_IO. If larger than max_transfer or if zero, -+ * blk_get_max_hw_transfer will fall back to max_transfer. -+ */ -+ uint64_t max_hw_transfer; -+ - /* memory alignment, in bytes so that no bounce buffer is needed */ - size_t min_mem_alignment; - -diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h -index 5423e3d9c6..9ac5f7bbd3 100644 ---- a/include/sysemu/block-backend.h -+++ b/include/sysemu/block-backend.h -@@ -208,6 +208,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag); - int blk_get_flags(BlockBackend *blk); - uint32_t blk_get_request_alignment(BlockBackend *blk); - uint32_t blk_get_max_transfer(BlockBackend *blk); -+uint64_t blk_get_max_hw_transfer(BlockBackend *blk); - int blk_get_max_iov(BlockBackend *blk); - void blk_set_guest_block_size(BlockBackend *blk, int align); - void *blk_try_blockalign(BlockBackend *blk, size_t size); --- -2.27.0 - diff --git a/kvm-block-backend-add-drained_poll.patch b/kvm-block-backend-add-drained_poll.patch deleted file mode 100644 index b3cbc54..0000000 --- a/kvm-block-backend-add-drained_poll.patch +++ /dev/null @@ -1,74 +0,0 @@ -From e23a2be8c57666e091d9192e113a30ea06cd83ef Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Thu, 17 Jun 2021 09:13:20 -0400 -Subject: [PATCH 05/12] block-backend: add drained_poll -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 -RH-Commit: [3/8] 4ad1f536b00a762a1b094d76383b74826228892a (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Daniel P. Berrangé - -Allow block backends to poll their devices/users to check if they have -been quiesced when entering a drained section. - -This will be used in the next patch to wait for the NBD server to be -completely quiesced. - -Suggested-by: Kevin Wolf -Reviewed-by: Kevin Wolf -Reviewed-by: Eric Blake -Signed-off-by: Sergio Lopez -Message-Id: <20210602060552.17433-2-slp@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 095cc4d0f62513d75e9bc1da37f08d9e97f267c4) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - block/block-backend.c | 7 ++++++- - include/sysemu/block-backend.h | 4 ++++ - 2 files changed, 10 insertions(+), 1 deletion(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 413af51f3b..05d8e5fb5d 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -2378,8 +2378,13 @@ static void blk_root_drained_begin(BdrvChild *child) - static bool blk_root_drained_poll(BdrvChild *child) - { - BlockBackend *blk = child->opaque; -+ bool busy = false; - assert(blk->quiesce_counter); -- return !!blk->in_flight; -+ -+ if (blk->dev_ops && blk->dev_ops->drained_poll) { -+ busy = blk->dev_ops->drained_poll(blk->dev_opaque); -+ } -+ return busy || !!blk->in_flight; - } - - static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) -diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h -index 880e903293..5423e3d9c6 100644 ---- a/include/sysemu/block-backend.h -+++ b/include/sysemu/block-backend.h -@@ -66,6 +66,10 @@ typedef struct BlockDevOps { - * Runs when the backend's last drain request ends. - */ - void (*drained_end)(void *opaque); -+ /* -+ * Is the device still busy? -+ */ -+ bool (*drained_poll)(void *opaque); - } BlockDevOps; - - /* This struct is embedded in (the private) BlockBackend struct and contains --- -2.27.0 - diff --git a/kvm-block-backend-align-max_transfer-to-request-alignmen.patch b/kvm-block-backend-align-max_transfer-to-request-alignmen.patch deleted file mode 100644 index c788c86..0000000 --- a/kvm-block-backend-align-max_transfer-to-request-alignmen.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 643c979c2bfa0fc3c45ec8ec5f05a77e0b075356 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 16 Jul 2021 16:51:32 -0400 -Subject: [PATCH 16/43] block-backend: align max_transfer to request alignment - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -Block device requests must be aligned to bs->bl.request_alignment. -It makes sense for drivers to align bs->bl.max_transfer the same -way; however when there is no specified limit, blk_get_max_transfer -just returns INT_MAX. Since the contract of the function does not -specify that INT_MAX means "no maximum", just align the outcome -of the function (whether INT_MAX or bs->bl.max_transfer) before -returning it. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit b99f7fa08a3df8b8a6a907642e5851cdcf43fa9f) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - block/block-backend.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 05d8e5fb5d..136cc602c5 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -1943,12 +1943,12 @@ uint32_t blk_get_request_alignment(BlockBackend *blk) - uint32_t blk_get_max_transfer(BlockBackend *blk) - { - BlockDriverState *bs = blk_bs(blk); -- uint32_t max = 0; -+ uint32_t max = INT_MAX; - - if (bs) { -- max = bs->bl.max_transfer; -+ max = MIN_NON_ZERO(max, bs->bl.max_transfer); - } -- return MIN_NON_ZERO(max, INT_MAX); -+ return ROUND_DOWN(max, blk_get_request_alignment(blk)); - } - - int blk_get_max_iov(BlockBackend *blk) --- -2.27.0 - diff --git a/kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch b/kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch deleted file mode 100644 index df36313..0000000 --- a/kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 1d85424fe5208986fc07fe9baa1e9b33d77b185a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Thu, 29 Jul 2021 07:42:35 -0400 -Subject: [PATCH 20/39] block/nvme: Fix VFIO_MAP_DMA failed: No space left on - device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [12/15] f4b3456e4ce1a876a64f9fb92c56f8f981076953 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -When the NVMe block driver was introduced (see commit bdd6a90a9e5, -January 2018), Linux VFIO_IOMMU_MAP_DMA ioctl was only returning --ENOMEM in case of error. The driver was correctly handling the -error path to recycle its volatile IOVA mappings. - -To fix CVE-2019-3882, Linux commit 492855939bdb ("vfio/type1: Limit -DMA mappings per container", April 2019) added the -ENOSPC error to -signal the user exhausted the DMA mappings available for a container. - -The block driver started to mis-behave: - - qemu-system-x86_64: VFIO_MAP_DMA failed: No space left on device - (qemu) - (qemu) info status - VM status: paused (io-error) - (qemu) c - VFIO_MAP_DMA failed: No space left on device - (qemu) c - VFIO_MAP_DMA failed: No space left on device - -(The VM is not resumable from here, hence stuck.) - -Fix by handling the new -ENOSPC error (when DMA mappings are -exhausted) without any distinction to the current -ENOMEM error, -so we don't change the behavior on old kernels where the CVE-2019-3882 -fix is not present. - -An easy way to reproduce this bug is to restrict the DMA mapping -limit (65535 by default) when loading the VFIO IOMMU module: - - # modprobe vfio_iommu_type1 dma_entry_limit=666 - -Cc: qemu-stable@nongnu.org -Cc: Fam Zheng -Cc: Maxim Levitsky -Cc: Alex Williamson -Reported-by: Michal Prívozník -Signed-off-by: Philippe Mathieu-Daudé -Message-id: 20210723195843.1032825-1-philmd@redhat.com -Fixes: bdd6a90a9e5 ("block: Add VFIO based NVMe driver") -Buglink: https://bugs.launchpad.net/qemu/+bug/1863333 -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/65 -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 15a730e7a3aaac180df72cd5730e0617bcf44a5a) -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Miroslav Rezanina ---- - block/nvme.c | 22 ++++++++++++++++++++++ - 1 file changed, 22 insertions(+) - -diff --git a/block/nvme.c b/block/nvme.c -index 2b5421e7aa..e8dbbc2317 100644 ---- a/block/nvme.c -+++ b/block/nvme.c -@@ -1030,7 +1030,29 @@ try_map: - r = qemu_vfio_dma_map(s->vfio, - qiov->iov[i].iov_base, - len, true, &iova); -+ if (r == -ENOSPC) { -+ /* -+ * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA -+ * ioctl returns -ENOSPC to signal the user exhausted the DMA -+ * mappings available for a container since Linux kernel commit -+ * 492855939bdb ("vfio/type1: Limit DMA mappings per container", -+ * April 2019, see CVE-2019-3882). -+ * -+ * This block driver already handles this error path by checking -+ * for the -ENOMEM error, so we directly replace -ENOSPC by -+ * -ENOMEM. Beside, -ENOSPC has a specific meaning for blockdev -+ * coroutines: it triggers BLOCKDEV_ON_ERROR_ENOSPC and -+ * BLOCK_ERROR_ACTION_STOP which stops the VM, asking the operator -+ * to add more storage to the blockdev. Not something we can do -+ * easily with an IOMMU :) -+ */ -+ r = -ENOMEM; -+ } - if (r == -ENOMEM && retry) { -+ /* -+ * We exhausted the DMA mappings available for our container: -+ * recycle the volatile IOVA mappings. -+ */ - retry = false; - trace_nvme_dma_flush_queue_wait(s); - if (s->dma_map_count) { --- -2.27.0 - diff --git a/kvm-configure-Fix-endianess-test-with-LTO.patch b/kvm-configure-Fix-endianess-test-with-LTO.patch deleted file mode 100644 index 9dd8745..0000000 --- a/kvm-configure-Fix-endianess-test-with-LTO.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 40e5138fb1e615c927a21d0b3f2e24eca885ede4 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 15 Jul 2021 10:39:28 +0200 -Subject: [PATCH 36/39] configure: Fix endianess test with LTO - -RH-Author: Jon Maloy -RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack -RH-Commit: [9/11] c4be415076356fe74efab6f74d7b347064bbdb40 (jmaloy/qemu-kvm-centos-jon) -RH-Bugzilla: 1939509 1940132 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -If a user is trying to compile QEMU with link-time optimization -enabled by running the configure script like this: - - .../configure --extra-cflags="-flto" - -then the endianess test is failing since the magic values do not -show up in the intermediate object files there. If the host is -a big endian machine (like s390x), the QEMU binary is then unusable -since the corresponding variable "bigendian" is pre-initialized -with "no". - -To fix this issue, we should rather create a full binary and look -for the magic strings there instead. -And we really should not continue the build if the endianess check -failed, to make it clear right from the start that something went -wrong here, thus let's also add some "exit 1" statements here -after emitting the error message. - -Message-Id: <20210715083928.933806-1-thuth@redhat.com> -Reviewed-by: Richard Henderson -Signed-off-by: Thomas Huth -(cherry picked from commit 659eb157a55666bf379f5362238a86d855e262e2) -Signed-off-by: Jon Maloy -Signed-off-by: Miroslav Rezanina ---- - configure | 15 +++++++++------ - 1 file changed, 9 insertions(+), 6 deletions(-) - -diff --git a/configure b/configure -index 83d8af7fe4..dcd9520bed 100755 ---- a/configure -+++ b/configure -@@ -2323,24 +2323,27 @@ feature_not_found() { - # --- - # big/little endian test - cat > $TMPC << EOF -+#include - short big_endian[] = { 0x4269, 0x4765, 0x4e64, 0x4961, 0x4e00, 0, }; - short little_endian[] = { 0x694c, 0x7454, 0x654c, 0x6e45, 0x6944, 0x6e41, 0, }; --extern int foo(short *, short *); --int main(int argc, char *argv[]) { -- return foo(big_endian, little_endian); -+int main(int argc, char *argv[]) -+{ -+ return printf("%s %s\n", (char *)big_endian, (char *)little_endian); - } - EOF - --if compile_object ; then -- if strings -a $TMPO | grep -q BiGeNdIaN ; then -+if compile_prog ; then -+ if strings -a $TMPE | grep -q BiGeNdIaN ; then - bigendian="yes" -- elif strings -a $TMPO | grep -q LiTtLeEnDiAn ; then -+ elif strings -a $TMPE | grep -q LiTtLeEnDiAn ; then - bigendian="no" - else - echo big/little test failed -+ exit 1 - fi - else - echo big/little test failed -+ exit 1 - fi - - ########################################## --- -2.27.0 - diff --git a/kvm-disable-CONFIG_USB_STORAGE_BOT.patch b/kvm-disable-CONFIG_USB_STORAGE_BOT.patch deleted file mode 100644 index 017b5ae..0000000 --- a/kvm-disable-CONFIG_USB_STORAGE_BOT.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 64ec0505fccf6f277430f3be1829a9e44cd00dbb Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Fri, 18 Jun 2021 12:04:24 -0400 -Subject: [PATCH 07/12] disable CONFIG_USB_STORAGE_BOT -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 -RH-Commit: [5/8] 73d3ee0a17590c8cddf6bd812e6a758951c36ea4 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Daniel P. Berrangé - -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - default-configs/devices/ppc64-rh-devices.mak | 1 - - default-configs/devices/x86_64-rh-devices.mak | 1 - - 2 files changed, 2 deletions(-) - -diff --git a/default-configs/devices/ppc64-rh-devices.mak b/default-configs/devices/ppc64-rh-devices.mak -index 5b01b7fac0..3ec5603ace 100644 ---- a/default-configs/devices/ppc64-rh-devices.mak -+++ b/default-configs/devices/ppc64-rh-devices.mak -@@ -15,7 +15,6 @@ CONFIG_USB=y - CONFIG_USB_OHCI=y - CONFIG_USB_OHCI_PCI=y - CONFIG_USB_SMARTCARD=y --CONFIG_USB_STORAGE_BOT=y - CONFIG_USB_STORAGE_CORE=y - CONFIG_USB_STORAGE_CLASSIC=y - CONFIG_USB_XHCI=y -diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak -index d09c138fc6..81bda09f4c 100644 ---- a/default-configs/devices/x86_64-rh-devices.mak -+++ b/default-configs/devices/x86_64-rh-devices.mak -@@ -74,7 +74,6 @@ CONFIG_USB=y - CONFIG_USB_EHCI=y - CONFIG_USB_EHCI_PCI=y - CONFIG_USB_SMARTCARD=y --CONFIG_USB_STORAGE_BOT=y - CONFIG_USB_STORAGE_CORE=y - CONFIG_USB_STORAGE_CLASSIC=y - CONFIG_USB_UHCI=y --- -2.27.0 - diff --git a/kvm-disable-ac97-audio.patch b/kvm-disable-ac97-audio.patch deleted file mode 100644 index 469c6c4..0000000 --- a/kvm-disable-ac97-audio.patch +++ /dev/null @@ -1,37 +0,0 @@ -From e2bb4b752d68856c4c307640ae310f47f680aed6 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Fri, 20 Aug 2021 10:21:07 +0200 -Subject: [PATCH 2/4] disable ac97 audio -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -RH-MergeRequest: 39: disable ac97 audio -RH-Commit: [1/1] 5d1bd969d20f960cb0a023f0d6cd1ae5adda22e6 (kraxel/centos-qemu-kvm) -RH-Bugzilla: 1995819 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Thomas Huth - -RH-Bugzilla: 1995819 - -Signed-off-by: Gerd Hoffmann ---- - default-configs/devices/x86_64-rh-devices.mak | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/default-configs/devices/x86_64-rh-devices.mak b/default-configs/devices/x86_64-rh-devices.mak -index c2dd112f81..8ae2747bf6 100644 ---- a/default-configs/devices/x86_64-rh-devices.mak -+++ b/default-configs/devices/x86_64-rh-devices.mak -@@ -1,6 +1,5 @@ - include rh-virtio.mak - --CONFIG_AC97=y - CONFIG_ACPI=y - CONFIG_ACPI_PCI=y - CONFIG_ACPI_CPU_HOTPLUG=y --- -2.27.0 - diff --git a/kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch b/kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch deleted file mode 100644 index 7439afd..0000000 --- a/kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 17c1559139d6a58794944901f84dd4e8cd1f5335 Mon Sep 17 00:00:00 2001 -From: Connor Kuehl -Date: Tue, 22 Jun 2021 20:00:20 -0400 -Subject: [PATCH 08/12] doc: Fix some mistakes in the SEV documentation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 -RH-Commit: [6/8] ce828f81de1320a1833241700cb13dfdcf7d82e7 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Daniel P. Berrangé - -From: Tom Lendacky - -Fix some spelling and grammar mistakes in the amd-memory-encryption.txt -file. No new information added. - -Signed-off-by: Tom Lendacky -Reviewed-by: Laszlo Ersek -Reviewed-by: Connor Kuehl -Message-Id: -Signed-off-by: Eduardo Habkost -(cherry picked from commit f538adeccf4554e6402fe661a0a51bcc8d6bd227) -Signed-off-by: Connor Kuehl -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - docs/amd-memory-encryption.txt | 59 +++++++++++++++++----------------- - 1 file changed, 29 insertions(+), 30 deletions(-) - -diff --git a/docs/amd-memory-encryption.txt b/docs/amd-memory-encryption.txt -index 145896aec7..ed85159ea7 100644 ---- a/docs/amd-memory-encryption.txt -+++ b/docs/amd-memory-encryption.txt -@@ -1,38 +1,38 @@ - Secure Encrypted Virtualization (SEV) is a feature found on AMD processors. - - SEV is an extension to the AMD-V architecture which supports running encrypted --virtual machine (VMs) under the control of KVM. Encrypted VMs have their pages -+virtual machines (VMs) under the control of KVM. Encrypted VMs have their pages - (code and data) secured such that only the guest itself has access to the - unencrypted version. Each encrypted VM is associated with a unique encryption --key; if its data is accessed to a different entity using a different key the -+key; if its data is accessed by a different entity using a different key the - encrypted guests data will be incorrectly decrypted, leading to unintelligible - data. - --The key management of this feature is handled by separate processor known as --AMD secure processor (AMD-SP) which is present in AMD SOCs. Firmware running --inside the AMD-SP provide commands to support common VM lifecycle. This -+Key management for this feature is handled by a separate processor known as the -+AMD secure processor (AMD-SP), which is present in AMD SOCs. Firmware running -+inside the AMD-SP provides commands to support a common VM lifecycle. This - includes commands for launching, snapshotting, migrating and debugging the --encrypted guest. Those SEV command can be issued via KVM_MEMORY_ENCRYPT_OP -+encrypted guest. These SEV commands can be issued via KVM_MEMORY_ENCRYPT_OP - ioctls. - - Launching - --------- --Boot images (such as bios) must be encrypted before guest can be booted. --MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images :LAUNCH_START, -+Boot images (such as bios) must be encrypted before a guest can be booted. The -+MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images: LAUNCH_START, - LAUNCH_UPDATE_DATA, LAUNCH_MEASURE and LAUNCH_FINISH. These four commands - together generate a fresh memory encryption key for the VM, encrypt the boot --images and provide a measurement than can be used as an attestation of the -+images and provide a measurement than can be used as an attestation of a - successful launch. - - LAUNCH_START is called first to create a cryptographic launch context within --the firmware. To create this context, guest owner must provides guest policy, -+the firmware. To create this context, guest owner must provide a guest policy, - its public Diffie-Hellman key (PDH) and session parameters. These inputs --should be treated as binary blob and must be passed as-is to the SEV firmware. -+should be treated as a binary blob and must be passed as-is to the SEV firmware. - --The guest policy is passed as plaintext and hypervisor may able to read it -+The guest policy is passed as plaintext. A hypervisor may choose to read it, - but should not modify it (any modification of the policy bits will result - in bad measurement). The guest policy is a 4-byte data structure containing --several flags that restricts what can be done on running SEV guest. -+several flags that restricts what can be done on a running SEV guest. - See KM Spec section 3 and 6.2 for more details. - - The guest policy can be provided via the 'policy' property (see below) -@@ -40,31 +40,30 @@ The guest policy can be provided via the 'policy' property (see below) - # ${QEMU} \ - sev-guest,id=sev0,policy=0x1...\ - --Guest owners provided DH certificate and session parameters will be used to -+The guest owner provided DH certificate and session parameters will be used to - establish a cryptographic session with the guest owner to negotiate keys used - for the attestation. - --The DH certificate and session blob can be provided via 'dh-cert-file' and --'session-file' property (see below -+The DH certificate and session blob can be provided via the 'dh-cert-file' and -+'session-file' properties (see below) - - # ${QEMU} \ - sev-guest,id=sev0,dh-cert-file=,session-file= - - LAUNCH_UPDATE_DATA encrypts the memory region using the cryptographic context --created via LAUNCH_START command. If required, this command can be called -+created via the LAUNCH_START command. If required, this command can be called - multiple times to encrypt different memory regions. The command also calculates - the measurement of the memory contents as it encrypts. - --LAUNCH_MEASURE command can be used to retrieve the measurement of encrypted --memory. This measurement is a signature of the memory contents that can be --sent to the guest owner as an attestation that the memory was encrypted --correctly by the firmware. The guest owner may wait to provide the guest --confidential information until it can verify the attestation measurement. --Since the guest owner knows the initial contents of the guest at boot, the --attestation measurement can be verified by comparing it to what the guest owner --expects. -+LAUNCH_MEASURE can be used to retrieve the measurement of encrypted memory. -+This measurement is a signature of the memory contents that can be sent to the -+guest owner as an attestation that the memory was encrypted correctly by the -+firmware. The guest owner may wait to provide the guest confidential information -+until it can verify the attestation measurement. Since the guest owner knows the -+initial contents of the guest at boot, the attestation measurement can be -+verified by comparing it to what the guest owner expects. - --LAUNCH_FINISH command finalizes the guest launch and destroy's the cryptographic -+LAUNCH_FINISH finalizes the guest launch and destroys the cryptographic - context. - - See SEV KM API Spec [1] 'Launching a guest' usage flow (Appendix A) for the -@@ -78,10 +77,10 @@ To launch a SEV guest - - Debugging - ----------- --Since memory contents of SEV guest is encrypted hence hypervisor access to the --guest memory will get a cipher text. If guest policy allows debugging, then --hypervisor can use DEBUG_DECRYPT and DEBUG_ENCRYPT commands access the guest --memory region for debug purposes. This is not supported in QEMU yet. -+Since the memory contents of a SEV guest are encrypted, hypervisor access to -+the guest memory will return cipher text. If the guest policy allows debugging, -+then a hypervisor can use the DEBUG_DECRYPT and DEBUG_ENCRYPT commands to access -+the guest memory region for debug purposes. This is not supported in QEMU yet. - - Snapshot/Restore - ----------------- --- -2.27.0 - diff --git a/kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch b/kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch deleted file mode 100644 index 2aabcbd..0000000 --- a/kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 1bd5660666d2a1f704ebabeed8a2bbfa02410f41 Mon Sep 17 00:00:00 2001 -From: Connor Kuehl -Date: Tue, 22 Jun 2021 20:00:21 -0400 -Subject: [PATCH 09/12] docs: Add SEV-ES documentation to - amd-memory-encryption.txt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 -RH-Commit: [7/8] 36e49577484813866132b90c64cf99779326db74 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Daniel P. Berrangé - -From: Tom Lendacky - -Update the amd-memory-encryption.txt file with information about SEV-ES, -including how to launch an SEV-ES guest and some of the differences -between SEV and SEV-ES guests in regards to launching and measuring the -guest. - -Signed-off-by: Tom Lendacky -Acked-by: Laszlo Ersek -Reviewed-by: Connor Kuehl -Message-Id: -Signed-off-by: Eduardo Habkost -(cherry picked from commit 61b7d7098cd53dd386939610d534f8bd79240881) -Signed-off-by: Connor Kuehl -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - docs/amd-memory-encryption.txt | 54 +++++++++++++++++++++++++++++----- - 1 file changed, 47 insertions(+), 7 deletions(-) - -diff --git a/docs/amd-memory-encryption.txt b/docs/amd-memory-encryption.txt -index ed85159ea7..ffca382b5f 100644 ---- a/docs/amd-memory-encryption.txt -+++ b/docs/amd-memory-encryption.txt -@@ -15,6 +15,13 @@ includes commands for launching, snapshotting, migrating and debugging the - encrypted guest. These SEV commands can be issued via KVM_MEMORY_ENCRYPT_OP - ioctls. - -+Secure Encrypted Virtualization - Encrypted State (SEV-ES) builds on the SEV -+support to additionally protect the guest register state. In order to allow a -+hypervisor to perform functions on behalf of a guest, there is architectural -+support for notifying a guest's operating system when certain types of VMEXITs -+are about to occur. This allows the guest to selectively share information with -+the hypervisor to satisfy the requested function. -+ - Launching - --------- - Boot images (such as bios) must be encrypted before a guest can be booted. The -@@ -24,6 +31,9 @@ together generate a fresh memory encryption key for the VM, encrypt the boot - images and provide a measurement than can be used as an attestation of a - successful launch. - -+For a SEV-ES guest, the LAUNCH_UPDATE_VMSA command is also used to encrypt the -+guest register state, or VM save area (VMSA), for all of the guest vCPUs. -+ - LAUNCH_START is called first to create a cryptographic launch context within - the firmware. To create this context, guest owner must provide a guest policy, - its public Diffie-Hellman key (PDH) and session parameters. These inputs -@@ -40,6 +50,12 @@ The guest policy can be provided via the 'policy' property (see below) - # ${QEMU} \ - sev-guest,id=sev0,policy=0x1...\ - -+Setting the "SEV-ES required" policy bit (bit 2) will launch the guest as a -+SEV-ES guest (see below) -+ -+# ${QEMU} \ -+ sev-guest,id=sev0,policy=0x5...\ -+ - The guest owner provided DH certificate and session parameters will be used to - establish a cryptographic session with the guest owner to negotiate keys used - for the attestation. -@@ -55,13 +71,19 @@ created via the LAUNCH_START command. If required, this command can be called - multiple times to encrypt different memory regions. The command also calculates - the measurement of the memory contents as it encrypts. - --LAUNCH_MEASURE can be used to retrieve the measurement of encrypted memory. --This measurement is a signature of the memory contents that can be sent to the --guest owner as an attestation that the memory was encrypted correctly by the --firmware. The guest owner may wait to provide the guest confidential information --until it can verify the attestation measurement. Since the guest owner knows the --initial contents of the guest at boot, the attestation measurement can be --verified by comparing it to what the guest owner expects. -+LAUNCH_UPDATE_VMSA encrypts all the vCPU VMSAs for a SEV-ES guest using the -+cryptographic context created via the LAUNCH_START command. The command also -+calculates the measurement of the VMSAs as it encrypts them. -+ -+LAUNCH_MEASURE can be used to retrieve the measurement of encrypted memory and, -+for a SEV-ES guest, encrypted VMSAs. This measurement is a signature of the -+memory contents and, for a SEV-ES guest, the VMSA contents, that can be sent -+to the guest owner as an attestation that the memory and VMSAs were encrypted -+correctly by the firmware. The guest owner may wait to provide the guest -+confidential information until it can verify the attestation measurement. -+Since the guest owner knows the initial contents of the guest at boot, the -+attestation measurement can be verified by comparing it to what the guest owner -+expects. - - LAUNCH_FINISH finalizes the guest launch and destroys the cryptographic - context. -@@ -75,6 +97,22 @@ To launch a SEV guest - -machine ...,confidential-guest-support=sev0 \ - -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 - -+To launch a SEV-ES guest -+ -+# ${QEMU} \ -+ -machine ...,confidential-guest-support=sev0 \ -+ -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1,policy=0x5 -+ -+An SEV-ES guest has some restrictions as compared to a SEV guest. Because the -+guest register state is encrypted and cannot be updated by the VMM/hypervisor, -+a SEV-ES guest: -+ - Does not support SMM - SMM support requires updating the guest register -+ state. -+ - Does not support reboot - a system reset requires updating the guest register -+ state. -+ - Requires in-kernel irqchip - the burden is placed on the hypervisor to -+ manage booting APs. -+ - Debugging - ----------- - Since the memory contents of a SEV guest are encrypted, hypervisor access to -@@ -101,8 +139,10 @@ Secure Encrypted Virtualization Key Management: - - KVM Forum slides: - http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf -+https://www.linux-kvm.org/images/9/94/Extending-Secure-Encrypted-Virtualization-with-SEV-ES-Thomas-Lendacky-AMD.pdf - - AMD64 Architecture Programmer's Manual: - http://support.amd.com/TechDocs/24593.pdf - SME is section 7.10 - SEV is section 15.34 -+ SEV-ES is section 15.35 --- -2.27.0 - diff --git a/kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch b/kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch deleted file mode 100644 index e900ba7..0000000 --- a/kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch +++ /dev/null @@ -1,110 +0,0 @@ -From e408203bab17e32f8d42ae9ad61e94a73bfaec67 Mon Sep 17 00:00:00 2001 -From: Connor Kuehl -Date: Tue, 22 Jun 2021 20:00:22 -0400 -Subject: [PATCH 10/12] docs/interop/firmware.json: Add SEV-ES support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 -RH-Commit: [8/8] b49ebbaf40b56d95c67475a0373d6906a3e4f0e3 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Daniel P. Berrangé - -From: Tom Lendacky - -Create an enum definition, '@amd-sev-es', for SEV-ES and add documention -for the new enum. Add an example that shows some of the requirements for -SEV-ES, including not having SMM support and the requirement for an -X64-only build. - -Signed-off-by: Tom Lendacky -Reviewed-by: Laszlo Ersek -Reviewed-by: Connor Kuehl -Message-Id: -Signed-off-by: Eduardo Habkost -(cherry picked from commit d44df1d73ce04d7f4b8f94cba5f715e2dadc998b) -Signed-off-by: Connor Kuehl -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - docs/interop/firmware.json | 47 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 46 insertions(+), 1 deletion(-) - -diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json -index 9d94ccafa9..8d8b0be030 100644 ---- a/docs/interop/firmware.json -+++ b/docs/interop/firmware.json -@@ -115,6 +115,12 @@ - # this feature are documented in - # "docs/amd-memory-encryption.txt". - # -+# @amd-sev-es: The firmware supports running under AMD Secure Encrypted -+# Virtualization - Encrypted State, as specified in the AMD64 -+# Architecture Programmer's Manual. QEMU command line options -+# related to this feature are documented in -+# "docs/amd-memory-encryption.txt". -+# - # @enrolled-keys: The variable store (NVRAM) template associated with - # the firmware binary has the UEFI Secure Boot - # operational mode turned on, with certificates -@@ -179,7 +185,7 @@ - # Since: 3.0 - ## - { 'enum' : 'FirmwareFeature', -- 'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'enrolled-keys', -+ 'data' : [ 'acpi-s3', 'acpi-s4', 'amd-sev', 'amd-sev-es', 'enrolled-keys', - 'requires-smm', 'secure-boot', 'verbose-dynamic', - 'verbose-static' ] } - -@@ -504,6 +510,45 @@ - # } - # - # { -+# "description": "OVMF with SEV-ES support", -+# "interface-types": [ -+# "uefi" -+# ], -+# "mapping": { -+# "device": "flash", -+# "executable": { -+# "filename": "/usr/share/OVMF/OVMF_CODE.fd", -+# "format": "raw" -+# }, -+# "nvram-template": { -+# "filename": "/usr/share/OVMF/OVMF_VARS.fd", -+# "format": "raw" -+# } -+# }, -+# "targets": [ -+# { -+# "architecture": "x86_64", -+# "machines": [ -+# "pc-q35-*" -+# ] -+# } -+# ], -+# "features": [ -+# "acpi-s3", -+# "amd-sev", -+# "amd-sev-es", -+# "verbose-dynamic" -+# ], -+# "tags": [ -+# "-a X64", -+# "-p OvmfPkg/OvmfPkgX64.dsc", -+# "-t GCC48", -+# "-b DEBUG", -+# "-D FD_SIZE_4MB" -+# ] -+# } -+# -+# { - # "description": "UEFI firmware for ARM64 virtual machines", - # "interface-types": [ - # "uefi" --- -2.27.0 - diff --git a/kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch b/kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch deleted file mode 100644 index 3027bec..0000000 --- a/kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 0111d01afe82c46656a40269bf21eb7702c02a09 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 16 Jul 2021 16:51:29 -0400 -Subject: [PATCH 13/43] file-posix: fix max_iov for /dev/sg devices - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -Even though it was only called for devices that have bs->sg set (which -must be character devices), sg_get_max_segments looked at /sys/dev/block -which only works for block devices. - -On Linux the sg driver has its own way to provide the maximum number of -iovecs in a scatter/gather list, so add support for it. The block device -path is kept because it will be reinstated in the next patches. - -Signed-off-by: Paolo Bonzini -Reviewed-by: Max Reitz -(cherry picked from commit 8ad5ab6148dca8aad297c134c09c84b0b92d45ed) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - block/file-posix.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 20e14f8e96..74d4903dc1 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1204,6 +1204,17 @@ static int sg_get_max_segments(int fd) - goto out; - } - -+ if (S_ISCHR(st.st_mode)) { -+ if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) { -+ return ret; -+ } -+ return -ENOTSUP; -+ } -+ -+ if (!S_ISBLK(st.st_mode)) { -+ return -ENOTSUP; -+ } -+ - sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", - major(st.st_rdev), minor(st.st_rdev)); - sysfd = open(sysfspath, O_RDONLY); --- -2.27.0 - diff --git a/kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch b/kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch deleted file mode 100644 index c80576b..0000000 --- a/kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 9c8493d3a6d2e4d879d1ef67ff1abebd532c87a0 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 16 Jul 2021 16:51:34 -0400 -Subject: [PATCH 18/43] file-posix: try BLKSECTGET on block devices too, do not - round to power of 2 - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -bs->sg is only true for character devices, but block devices can also -be used with scsi-block and scsi-generic. Unfortunately BLKSECTGET -returns bytes in an int for /dev/sgN devices, and sectors in a short -for block devices, so account for that in the code. - -The maximum transfer also need not be a power of 2 (for example I have -seen disks with 1280 KiB maximum transfer) so there's no need to pass -the result through pow2floor. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 18473467d55a20d643b6c9b3a52de42f705b4d35) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - block/file-posix.c | 57 +++++++++++++++++++++++++++------------------- - 1 file changed, 33 insertions(+), 24 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 44325a635d..7b4ebf65d5 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1173,22 +1173,27 @@ static void raw_reopen_abort(BDRVReopenState *state) - s->reopen_state = NULL; - } - --static int sg_get_max_transfer_length(int fd) -+static int hdev_get_max_hw_transfer(int fd, struct stat *st) - { - #ifdef BLKSECTGET -- int max_bytes = 0; -- -- if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) { -- return max_bytes; -+ if (S_ISBLK(st->st_mode)) { -+ unsigned short max_sectors = 0; -+ if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) { -+ return max_sectors * 512; -+ } - } else { -- return -errno; -+ int max_bytes = 0; -+ if (ioctl(fd, BLKSECTGET, &max_bytes) == 0) { -+ return max_bytes; -+ } - } -+ return -errno; - #else - return -ENOSYS; - #endif - } - --static int sg_get_max_segments(int fd) -+static int hdev_get_max_segments(int fd, struct stat *st) - { - #ifdef CONFIG_LINUX - char buf[32]; -@@ -1197,26 +1202,20 @@ static int sg_get_max_segments(int fd) - int ret; - int sysfd = -1; - long max_segments; -- struct stat st; -- -- if (fstat(fd, &st)) { -- ret = -errno; -- goto out; -- } - -- if (S_ISCHR(st.st_mode)) { -+ if (S_ISCHR(st->st_mode)) { - if (ioctl(fd, SG_GET_SG_TABLESIZE, &ret) == 0) { - return ret; - } - return -ENOTSUP; - } - -- if (!S_ISBLK(st.st_mode)) { -+ if (!S_ISBLK(st->st_mode)) { - return -ENOTSUP; - } - - sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", -- major(st.st_rdev), minor(st.st_rdev)); -+ major(st->st_rdev), minor(st->st_rdev)); - sysfd = open(sysfspath, O_RDONLY); - if (sysfd == -1) { - ret = -errno; -@@ -1253,23 +1252,33 @@ out: - static void raw_refresh_limits(BlockDriverState *bs, Error **errp) - { - BDRVRawState *s = bs->opaque; -+ struct stat st; - -- if (bs->sg) { -- int ret = sg_get_max_transfer_length(s->fd); -+ raw_probe_alignment(bs, s->fd, errp); -+ bs->bl.min_mem_alignment = s->buf_align; -+ bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); -+ -+ /* -+ * Maximum transfers are best effort, so it is okay to ignore any -+ * errors. That said, based on the man page errors in fstat would be -+ * very much unexpected; the only possible case seems to be ENOMEM. -+ */ -+ if (fstat(s->fd, &st)) { -+ return; -+ } -+ -+ if (bs->sg || S_ISBLK(st.st_mode)) { -+ int ret = hdev_get_max_hw_transfer(s->fd, &st); - - if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { -- bs->bl.max_hw_transfer = pow2floor(ret); -+ bs->bl.max_hw_transfer = ret; - } - -- ret = sg_get_max_segments(s->fd); -+ ret = hdev_get_max_segments(s->fd, &st); - if (ret > 0) { - bs->bl.max_iov = ret; - } - } -- -- raw_probe_alignment(bs, s->fd, errp); -- bs->bl.min_mem_alignment = s->buf_align; -- bs->bl.opt_mem_alignment = MAX(s->buf_align, qemu_real_host_page_size); - } - - static int check_for_dasd(int fd) --- -2.27.0 - diff --git a/kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch b/kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch deleted file mode 100644 index 6ed6a6b..0000000 --- a/kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 3347d61ff783d05f41f03097551460dc5825b301 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 29 Jul 2021 07:42:14 -0400 -Subject: [PATCH 11/39] hmp: Fix loadvm to resume the VM on success instead of - failure - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [3/15] 492cfb8ef252805b988a256abe73628605f630e9 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -Commit f61fe11aa6f broke hmp_loadvm() by adding an incorrect negation -when converting from 0/-errno return values to a bool value. The result -is that loadvm resumes the VM now if it failed and keeps it stopped if -it failed. Fix it to restore the old behaviour and do it the other way -around. - -Fixes: f61fe11aa6f7f8f0ffe4ddaa56a8108f3ab57854 -Cc: qemu-stable@nongnu.org -Reported-by: Yanhui Ma -Signed-off-by: Kevin Wolf -Message-Id: <20210511163151.45167-1-kwolf@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit c53cd04e70641fdf9410aac40c617d074047b3e1) -Signed-off-by: Kevin Wolf -Signed-off-by: Miroslav Rezanina ---- - monitor/hmp-cmds.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 0ad5b77477..cc15d9b6ee 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -1133,7 +1133,7 @@ void hmp_loadvm(Monitor *mon, const QDict *qdict) - - vm_stop(RUN_STATE_RESTORE_VM); - -- if (!load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) { -+ if (load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) { - vm_start(); - } - hmp_handle_error(mon, err); --- -2.27.0 - diff --git a/kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch b/kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch deleted file mode 100644 index 7fb1c3a..0000000 --- a/kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch +++ /dev/null @@ -1,111 +0,0 @@ -From e9abef24fae799febf81cd4ac02efe8987a698e8 Mon Sep 17 00:00:00 2001 -From: Auger Eric -Date: Wed, 26 May 2021 16:07:40 -0400 -Subject: [PATCH 15/15] hw/arm/smmuv3: Another range invalidation fix - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [12/12] dc064684e5f3f11d955565b05d37b0f2d9f79b91 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -6d9cd115b9 ("hw/arm/smmuv3: Enforce invalidation on a power of two range") -failed to completely fix misalignment issues with range -invalidation. For instance invalidations patterns like "invalidate 32 -4kB pages starting from 0xff395000 are not correctly handled" due -to the fact the previous fix only made sure the number of invalidated -pages were a power of 2 but did not properly handle the start -address was not aligned with the range. This can be noticed when -boothing a fedora 33 with protected virtio-blk-pci. - -Signed-off-by: Eric Auger -Fixes: 6d9cd115b9 ("hw/arm/smmuv3: Enforce invalidation on a power of two range") -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell -(cherry picked from commit 219729cfbf9e979020bffedac6a790144173ec62) -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - hw/arm/smmuv3.c | 50 +++++++++++++++++++++++++------------------------ - 1 file changed, 26 insertions(+), 24 deletions(-) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 8705612535..e1979282e4 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -856,43 +856,45 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, - - static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd) - { -- uint8_t scale = 0, num = 0, ttl = 0; -- dma_addr_t addr = CMD_ADDR(cmd); -+ dma_addr_t end, addr = CMD_ADDR(cmd); - uint8_t type = CMD_TYPE(cmd); - uint16_t vmid = CMD_VMID(cmd); -+ uint8_t scale = CMD_SCALE(cmd); -+ uint8_t num = CMD_NUM(cmd); -+ uint8_t ttl = CMD_TTL(cmd); - bool leaf = CMD_LEAF(cmd); - uint8_t tg = CMD_TG(cmd); -- uint64_t first_page = 0, last_page; -- uint64_t num_pages = 1; -+ uint64_t num_pages; -+ uint8_t granule; - int asid = -1; - -- if (tg) { -- scale = CMD_SCALE(cmd); -- num = CMD_NUM(cmd); -- ttl = CMD_TTL(cmd); -- num_pages = (num + 1) * BIT_ULL(scale); -- } -- - if (type == SMMU_CMD_TLBI_NH_VA) { - asid = CMD_ASID(cmd); - } - -- /* Split invalidations into ^2 range invalidations */ -- last_page = num_pages - 1; -- while (num_pages) { -- uint8_t granule = tg * 2 + 10; -- uint64_t mask, count; -+ if (!tg) { -+ trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, 1, ttl, leaf); -+ smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1); -+ smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl); -+ return; -+ } -+ -+ /* RIL in use */ - -- mask = dma_aligned_pow2_mask(first_page, last_page, 64 - granule); -- count = mask + 1; -+ num_pages = (num + 1) * BIT_ULL(scale); -+ granule = tg * 2 + 10; -+ -+ /* Split invalidations into ^2 range invalidations */ -+ end = addr + (num_pages << granule) - 1; - -- trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, count, ttl, leaf); -- smmuv3_inv_notifiers_iova(s, asid, addr, tg, count); -- smmu_iotlb_inv_iova(s, asid, addr, tg, count, ttl); -+ while (addr != end + 1) { -+ uint64_t mask = dma_aligned_pow2_mask(addr, end, 64); - -- num_pages -= count; -- first_page += count; -- addr += count * BIT_ULL(granule); -+ num_pages = (mask + 1) >> granule; -+ trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf); -+ smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages); -+ smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl); -+ addr += mask + 1; - } - } - --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch b/kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch deleted file mode 100644 index 6415284..0000000 --- a/kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch +++ /dev/null @@ -1,63 +0,0 @@ -From ee0be09f3598596e41b3fc2dbefef3382c5b0541 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 25 May 2021 09:22:24 +0200 -Subject: [PATCH 02/15] hw/arm/virt: Add 8.5 and 9.0 machine types and remove - older ones - -RH-Author: Eric Auger -RH-MergeRequest: 1: Add 9.0.0 and 8.5.0 arm-virt machine types -RH-Commit: [2/2] ace4619a1d505a3b552a236260b259bd6ddabc00 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1952449 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones - -Add 8.5 and 9.0 machine types and remove older ones. - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 24 +++++------------------- - 1 file changed, 5 insertions(+), 19 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 51a415570c..e4aa794f83 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3095,28 +3095,14 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - --static void rhel840_virt_options(MachineClass *mc) -+static void rhel900_virt_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -- compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); - } --DEFINE_RHEL_MACHINE_AS_LATEST(8, 4, 0) -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) - --static void rhel830_virt_options(MachineClass *mc) -+static void rhel850_virt_options(MachineClass *mc) - { -- VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); -- -- rhel840_virt_options(mc); -- compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); -- vmc->no_kvm_steal_time = true; --} --DEFINE_RHEL_MACHINE(8, 3, 0) -- --static void rhel820_virt_options(MachineClass *mc) --{ -- rhel830_virt_options(mc); -- compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); -- mc->numa_mem_supported = true; -- mc->auto_enable_numa_with_memdev = false; -+ rhel900_virt_options(mc); - } --DEFINE_RHEL_MACHINE(8, 2, 0) -+DEFINE_RHEL_MACHINE(8, 5, 0) --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch b/kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch deleted file mode 100644 index 6ce6dd0..0000000 --- a/kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 7f76c347f17c5fc60f3bcb99ad65e26f9da4ed9f Mon Sep 17 00:00:00 2001 -From: Auger Eric -Date: Thu, 20 May 2021 19:23:26 -0400 -Subject: [PATCH 10/15] hw/arm/virt: Disable PL011 clock migration through - hw_compat_rhel_8_3 - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [6/12] 5f52975350b2497ee82cc5c9b8ba930e3a9b8c3d (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -Disable PL011 clock migration for machine types before -virt-rhel8.4.0. - -The regression was introduced by aac63e0e6ea3 -("hw/char/pl011: add a clock input"), in 8.4, -causing failure of migration between qemu 8.4 towards -older ones. - -The fix was taken in 8.5 as part of the rebase, -e6fa978d8343 ("hw/arm/virt: Disable pl011 clock migration -if needed"). But the compat needs to be applied in -hw_compat_rhel_8_3[]. - -Signed-off-by: Eric Auger -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index c665e869de..6c534e14fa 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -69,6 +69,8 @@ GlobalProperty hw_compat_rhel_8_3[] = { - { "nvme", "use-intel-id", "on"}, - /* hw_compat_rhel_8_3 from hw_compat_5_1 */ - { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ -+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ -+ { "pl011", "migrate-clk", "off" }, - /* hw_compat_rhel_8_3 bz 1912846 */ - { "pci-xhci", "x-rh-late-msi-cap", "off" }, - /* hw_compat_rhel_8_3 from hw_compat_5_1 */ --- -2.27.0 - diff --git a/kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch b/kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch deleted file mode 100644 index cf4e6bb..0000000 --- a/kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 64a9a5c971c424ff2d8074f52d48dd6233dc97ac Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 18 May 2021 18:27:54 +0200 -Subject: [PATCH] hw/s390x: Remove the RHEL7-only machine type - -RH-Author: Thomas Huth -RH-MergeRequest: 7: hw/s390x: Remove the RHEL7-only machine type -RH-Commit: [1/1] 8c53d4ae81 (thuth/qemu-kvm) -RH-Bugzilla: 1944730 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -We only support live migration from RHEL8 in RHEL9, so we can remove -the RHEL7 machine type "s390-ccw-virtio-rhel7.5.0". - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 32 -------------------------------- - 1 file changed, 32 deletions(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 8df6dd1c71..432f36bce5 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1125,38 +1125,6 @@ static void ccw_machine_rhel760_class_options(MachineClass *mc) - } - DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); - --static void ccw_machine_rhel750_instance_options(MachineState *machine) --{ -- static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; -- ccw_machine_rhel760_instance_options(machine); -- -- /* before 2.12 we emulated the very first z900, and RHEL 7.5 is -- based on 2.10 */ -- s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); -- -- /* bpb and ppa15 were only in the full model in RHEL 7.5 */ -- s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); -- s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); --} -- --GlobalProperty ccw_compat_rhel_7_5[] = { -- { -- .driver = TYPE_SCLP_EVENT_FACILITY, -- .property = "allow_all_mask_sizes", -- .value = "off", -- }, --}; --const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); -- --static void ccw_machine_rhel750_class_options(MachineClass *mc) --{ -- ccw_machine_rhel760_class_options(mc); -- compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); -- compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); -- S390_CCW_MACHINE_CLASS(mc)->hpage_1m_allowed = false; --} --DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); -- - static void ccw_machine_register_types(void) - { - type_register_static(&ccw_machine_info); --- -2.27.0 - diff --git a/kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch b/kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch deleted file mode 100644 index 71f143a..0000000 --- a/kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch +++ /dev/null @@ -1,219 +0,0 @@ -From e92a6c64cb4b1437c5b75f25a638dbb6eb041383 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 29 Jul 2021 07:42:27 -0400 -Subject: [PATCH 16/39] i386: Add ratelimit for bus locks acquired in guest - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [8/15] 2b8f01e05e44388c2f90d5281a9fe5537ab2433d (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -A bus lock is acquired through either split locked access to writeback -(WB) memory or any locked access to non-WB memory. It is typically >1000 -cycles slower than an atomic operation within a cache and can also -disrupts performance on other cores. - -Virtual Machines can exploit bus locks to degrade the performance of -system. To address this kind of performance DOS attack coming from the -VMs, bus lock VM exit is introduced in KVM and it can report the bus -locks detected in guest. If enabled in KVM, it would exit to the -userspace to let the user enforce throttling policies once bus locks -acquired in VMs. - -The availability of bus lock VM exit can be detected through the -KVM_CAP_X86_BUS_LOCK_EXIT. The returned bitmap contains the potential -policies supported by KVM. The field KVM_BUS_LOCK_DETECTION_EXIT in -bitmap is the only supported strategy at present. It indicates that KVM -will exit to userspace to handle the bus locks. - -This patch adds a ratelimit on the bus locks acquired in guest as a -mitigation policy. - -Introduce a new field "bus_lock_ratelimit" to record the limited speed -of bus locks in the target VM. The user can specify it through the -"bus-lock-ratelimit" as a machine property. In current implementation, -the default value of the speed is 0 per second, which means no -restrictions on the bus locks. - -As for ratelimit on detected bus locks, simply set the ratelimit -interval to 1s and restrict the quota of bus lock occurence to the value -of "bus_lock_ratelimit". A potential alternative is to introduce the -time slice as a property which can help the user achieve more precise -control. - -The detail of bus lock VM exit can be found in spec: -https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html - -Signed-off-by: Chenyi Qiang -Message-Id: <20210521043820.29678-1-chenyi.qiang@intel.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 035d1ef26565f8f8eae058c37f5731a9ae304b96) -Signed-off-by: Paul Lai -Signed-off-by: Miroslav Rezanina ---- - hw/i386/x86.c | 24 ++++++++++++++++++++++++ - include/hw/i386/x86.h | 8 ++++++++ - target/i386/kvm/kvm.c | 41 +++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 73 insertions(+) - -diff --git a/hw/i386/x86.c b/hw/i386/x86.c -index ed796fe6ba..d30cf27e29 100644 ---- a/hw/i386/x86.c -+++ b/hw/i386/x86.c -@@ -1246,6 +1246,23 @@ static void x86_machine_set_oem_table_id(Object *obj, const char *value, - strncpy(x86ms->oem_table_id, value, 8); - } - -+static void x86_machine_get_bus_lock_ratelimit(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ X86MachineState *x86ms = X86_MACHINE(obj); -+ uint64_t bus_lock_ratelimit = x86ms->bus_lock_ratelimit; -+ -+ visit_type_uint64(v, name, &bus_lock_ratelimit, errp); -+} -+ -+static void x86_machine_set_bus_lock_ratelimit(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ X86MachineState *x86ms = X86_MACHINE(obj); -+ -+ visit_type_uint64(v, name, &x86ms->bus_lock_ratelimit, errp); -+} -+ - static void x86_machine_initfn(Object *obj) - { - X86MachineState *x86ms = X86_MACHINE(obj); -@@ -1256,6 +1273,7 @@ static void x86_machine_initfn(Object *obj) - x86ms->pci_irq_mask = ACPI_BUILD_PCI_IRQS; - x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); - x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); -+ x86ms->bus_lock_ratelimit = 0; - } - - static void x86_machine_class_init(ObjectClass *oc, void *data) -@@ -1299,6 +1317,12 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) - "Override the default value of field OEM Table ID " - "in ACPI table header." - "The string may be up to 8 bytes in size"); -+ -+ object_class_property_add(oc, X86_MACHINE_BUS_LOCK_RATELIMIT, "uint64_t", -+ x86_machine_get_bus_lock_ratelimit, -+ x86_machine_set_bus_lock_ratelimit, NULL, NULL); -+ object_class_property_set_description(oc, X86_MACHINE_BUS_LOCK_RATELIMIT, -+ "Set the ratelimit for the bus locks acquired in VMs"); - } - - static const TypeInfo x86_machine_info = { -diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h -index c09b648dff..25a1f16f01 100644 ---- a/include/hw/i386/x86.h -+++ b/include/hw/i386/x86.h -@@ -74,12 +74,20 @@ struct X86MachineState { - * will be translated to MSI messages in the address space. - */ - AddressSpace *ioapic_as; -+ -+ /* -+ * Ratelimit enforced on detected bus locks in guest. -+ * The default value of the bus_lock_ratelimit is 0 per second, -+ * which means no limitation on the guest's bus locks. -+ */ -+ uint64_t bus_lock_ratelimit; - }; - - #define X86_MACHINE_SMM "smm" - #define X86_MACHINE_ACPI "acpi" - #define X86_MACHINE_OEM_ID "x-oem-id" - #define X86_MACHINE_OEM_TABLE_ID "x-oem-table-id" -+#define X86_MACHINE_BUS_LOCK_RATELIMIT "bus-lock-ratelimit" - - #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") - OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) -diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 4c69c2cb4b..af030af116 100644 ---- a/target/i386/kvm/kvm.c -+++ b/target/i386/kvm/kvm.c -@@ -130,6 +130,9 @@ static bool has_msr_mcg_ext_ctl; - static struct kvm_cpuid2 *cpuid_cache; - static struct kvm_msr_list *kvm_feature_msrs; - -+#define BUS_LOCK_SLICE_TIME 1000000000ULL /* ns */ -+static RateLimit bus_lock_ratelimit_ctrl; -+ - int kvm_has_pit_state2(void) - { - return has_pit_state2; -@@ -2267,6 +2270,28 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - } - } - -+ if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) { -+ X86MachineState *x86ms = X86_MACHINE(ms); -+ -+ if (x86ms->bus_lock_ratelimit > 0) { -+ ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT); -+ if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) { -+ error_report("kvm: bus lock detection unsupported"); -+ return -ENOTSUP; -+ } -+ ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0, -+ KVM_BUS_LOCK_DETECTION_EXIT); -+ if (ret < 0) { -+ error_report("kvm: Failed to enable bus lock detection cap: %s", -+ strerror(-ret)); -+ return ret; -+ } -+ ratelimit_init(&bus_lock_ratelimit_ctrl); -+ ratelimit_set_speed(&bus_lock_ratelimit_ctrl, -+ x86ms->bus_lock_ratelimit, BUS_LOCK_SLICE_TIME); -+ } -+ } -+ - return 0; - } - -@@ -4225,6 +4250,15 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) - } - } - -+static void kvm_rate_limit_on_bus_lock(void) -+{ -+ uint64_t delay_ns = ratelimit_calculate_delay(&bus_lock_ratelimit_ctrl, 1); -+ -+ if (delay_ns) { -+ g_usleep(delay_ns / SCALE_US); -+ } -+} -+ - MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) - { - X86CPU *x86_cpu = X86_CPU(cpu); -@@ -4240,6 +4274,9 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) - } else { - env->eflags &= ~IF_MASK; - } -+ if (run->flags & KVM_RUN_X86_BUS_LOCK) { -+ kvm_rate_limit_on_bus_lock(); -+ } - - /* We need to protect the apic state against concurrent accesses from - * different threads in case the userspace irqchip is used. */ -@@ -4598,6 +4635,10 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) - ioapic_eoi_broadcast(run->eoi.vector); - ret = 0; - break; -+ case KVM_EXIT_X86_BUS_LOCK: -+ /* already handled in kvm_arch_post_run */ -+ ret = 0; -+ break; - default: - fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); - ret = -1; --- -2.27.0 - diff --git a/kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch b/kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch deleted file mode 100644 index 9f007d8..0000000 --- a/kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch +++ /dev/null @@ -1,82 +0,0 @@ -From c24fcdf0712ef81ec25ca3a4a1144cca18303fbe Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 29 Jul 2021 07:42:19 -0400 -Subject: [PATCH 13/39] i386/cpu: Expose AVX_VNNI instruction to guest - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [5/15] 56381e35a1dc06af7d457d1fe61b1c108dd25d06 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -Expose AVX (VEX-encoded) versions of the Vector Neural Network -Instructions to guest. - -The bit definition: -CPUID.(EAX=7,ECX=1):EAX[bit 4] AVX_VNNI - -The following instructions are available when this feature is -present in the guest. - 1. VPDPBUS: Multiply and Add Unsigned and Signed Bytes - 2. VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation - 3. VPDPWSSD: Multiply and Add Signed Word Integers - 4. VPDPWSSDS: Multiply and Add Signed Integers with Saturation - -As for the kvm related code, please reference Linux commit id 1085a6b585d7. - -The release document ref below link: -https://software.intel.com/content/www/us/en/develop/download/\ -intel-architecture-instruction-set-extensions-programming-reference.html - -Signed-off-by: Yang Zhong -Message-Id: <20210407015609.22936-1-yang.zhong@intel.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit c1826ea6a052084f2e6a0bae9dd5932a727df039) -Signed-off-by: Paul Lai -Signed-off-by: Miroslav Rezanina ---- - target/i386/cpu.c | 4 ++-- - target/i386/cpu.h | 2 ++ - 2 files changed, 4 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index da47c3e50e..0de2932c79 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -996,7 +996,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { - NULL, NULL, NULL, NULL, -- NULL, "avx512-bf16", NULL, NULL, -+ "avx-vnni", "avx512-bf16", NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -@@ -3284,7 +3284,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | - MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, - .features[FEAT_7_1_EAX] = -- CPUID_7_1_EAX_AVX512_BF16, -+ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16, - /* XSAVES is added in version 2 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 570f916878..edc8984448 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -804,6 +804,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; - /* Speculative Store Bypass Disable */ - #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) - -+/* AVX VNNI Instruction */ -+#define CPUID_7_1_EAX_AVX_VNNI (1U << 4) - /* AVX512 BFloat16 Instruction */ - #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) - --- -2.27.0 - diff --git a/kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch b/kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch deleted file mode 100644 index 1d1ebf0..0000000 --- a/kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch +++ /dev/null @@ -1,178 +0,0 @@ -From a6ab9f3d290c2ff3c2fc0187c69cf8cf69feff40 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 6 Aug 2021 15:07:47 -0400 -Subject: [PATCH 24/39] iotests: Improve and rename test 291 to qemu-img-bitmap -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 35: Synchronize with RHEL-AV 8.5 release 28 to RHEL 9 -RH-Commit: [1/4] bf400ceb9ef48b81c5f7cade97bc1cbf7bc4842c (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Philippe Mathieu-Daudé - -Enhance the test to demonstrate existing less-than-stellar behavior of -qemu-img with a qcow2 image containing an inconsistent bitmap: we -don't diagnose the problem until after copying the entire image (a -potentially long time), and when we do diagnose the failure, we still -end up leaving an empty bitmap in the destination. This mess will be -cleaned up in the next patch. - -While at it, rename the test now that we support useful iotest names, -and fix a missing newline in the error message thus exposed. - -Signed-off-by: Eric Blake -Message-Id: <20210709153951.2801666-2-eblake@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Nir Soffer - -(cherry picked from commit 94075c28eea0755173939dfaf1eae688b224a74e) -Conflicts: - tests/qemu-iotests/tests/qemu-img-bitmaps.out - commit 8417e1378c not backported -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - block/dirty-bitmap.c | 2 +- - .../{291 => tests/qemu-img-bitmaps} | 21 +++++++- - .../{291.out => tests/qemu-img-bitmaps.out} | 49 ++++++++++++++++++- - 3 files changed, 69 insertions(+), 3 deletions(-) - rename tests/qemu-iotests/{291 => tests/qemu-img-bitmaps} (87%) - rename tests/qemu-iotests/{291.out => tests/qemu-img-bitmaps.out} (75%) - -diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c -index 68d295d6e3..0ef46163e3 100644 ---- a/block/dirty-bitmap.c -+++ b/block/dirty-bitmap.c -@@ -193,7 +193,7 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags, - error_setg(errp, "Bitmap '%s' is inconsistent and cannot be used", - bitmap->name); - error_append_hint(errp, "Try block-dirty-bitmap-remove to delete" -- " this bitmap from disk"); -+ " this bitmap from disk\n"); - return -1; - } - -diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/tests/qemu-img-bitmaps -similarity index 87% -rename from tests/qemu-iotests/291 -rename to tests/qemu-iotests/tests/qemu-img-bitmaps -index 20efb080a6..409c4497a3 100755 ---- a/tests/qemu-iotests/291 -+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps -@@ -3,7 +3,7 @@ - # - # Test qemu-img bitmap handling - # --# Copyright (C) 2018-2020 Red Hat, Inc. -+# Copyright (C) 2018-2021 Red Hat, Inc. - # - # This program is free software; you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by -@@ -27,11 +27,13 @@ status=1 # failure is the default! - _cleanup() - { - _cleanup_test_img -+ _rm_test_img "$TEST_IMG.copy" - nbd_server_stop - } - trap "_cleanup; exit \$status" 0 1 2 3 15 - - # get standard environment, filters and checks -+cd .. - . ./common.rc - . ./common.filter - . ./common.nbd -@@ -129,6 +131,23 @@ $QEMU_IMG map --output=json --image-opts \ - - nbd_server_stop - -+echo -+echo "=== Check handling of inconsistent bitmap ===" -+echo -+ -+# Prepare image with corrupted bitmap -+$QEMU_IO -c abort "$TEST_IMG" 2>/dev/null -+$QEMU_IMG bitmap --add "$TEST_IMG" b4 -+$QEMU_IMG bitmap --remove "$TEST_IMG" b1 -+_img_info --format-specific | _filter_irrelevant_img_info -+echo -+$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" && -+ echo "unexpected success" -+# Bug - even though we failed at conversion, we left a file around with -+# a bitmap marked as not corrupt -+TEST_IMG=$TEST_IMG.copy _img_info --format-specific \ -+ | _filter_irrelevant_img_info -+ - # success, all done - echo '*** done' - rm -f $seq.full -diff --git a/tests/qemu-iotests/291.out b/tests/qemu-iotests/tests/qemu-img-bitmaps.out -similarity index 75% -rename from tests/qemu-iotests/291.out -rename to tests/qemu-iotests/tests/qemu-img-bitmaps.out -index 23411c0ff4..543b028da6 100644 ---- a/tests/qemu-iotests/291.out -+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps.out -@@ -1,4 +1,4 @@ --QA output created by 291 -+QA output created by qemu-img-bitmaps - - === Initial image setup === - -@@ -115,4 +115,51 @@ Format specific information: - [{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, - { "start": 2097152, "length": 1048576, "depth": 0, "zero": false, "data": false}, - { "start": 3145728, "length": 7340032, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] -+ -+=== Check handling of inconsistent bitmap === -+ -+image: TEST_DIR/t.IMGFMT -+file format: IMGFMT -+virtual size: 10 MiB (10485760 bytes) -+cluster_size: 65536 -+backing file: TEST_DIR/t.IMGFMT.base -+backing file format: IMGFMT -+Format specific information: -+ bitmaps: -+ [0]: -+ flags: -+ [0]: in-use -+ [1]: auto -+ name: b2 -+ granularity: 65536 -+ [1]: -+ flags: -+ [0]: in-use -+ name: b0 -+ granularity: 65536 -+ [2]: -+ flags: -+ [0]: auto -+ name: b4 -+ granularity: 65536 -+ corrupt: false -+ -+qemu-img: Failed to populate bitmap b0: Bitmap 'b0' is inconsistent and cannot be used -+Try block-dirty-bitmap-remove to delete this bitmap from disk -+image: TEST_DIR/t.IMGFMT.copy -+file format: IMGFMT -+virtual size: 10 MiB (10485760 bytes) -+cluster_size: 65536 -+Format specific information: -+ bitmaps: -+ [0]: -+ flags: -+ name: b0 -+ granularity: 65536 -+ [1]: -+ flags: -+ [0]: auto -+ name: b4 -+ granularity: 65536 -+ corrupt: false - *** done --- -2.27.0 - diff --git a/kvm-iothread-add-aio-max-batch-parameter.patch b/kvm-iothread-add-aio-max-batch-parameter.patch deleted file mode 100644 index 78a6c23..0000000 --- a/kvm-iothread-add-aio-max-batch-parameter.patch +++ /dev/null @@ -1,324 +0,0 @@ -From d8682ef60e3a658d776473fee2299015dd5105d7 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 29 Jul 2021 07:42:31 -0400 -Subject: [PATCH 18/39] iothread: add aio-max-batch parameter - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [10/15] d033b3c8ddd71bae799103832039d6daa6dfad52 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -The `aio-max-batch` parameter will be propagated to AIO engines -and it will be used to control the maximum number of queued requests. - -When there are in queue a number of requests equal to `aio-max-batch`, -the engine invokes the system call to forward the requests to the kernel. - -This parameter allows us to control the maximum batch size to reduce -the latency that requests might accumulate while queued in the AIO -engine queue. - -If `aio-max-batch` is equal to 0 (default value), the AIO engine will -use its default maximum batch size value. - -Signed-off-by: Stefano Garzarella -Message-id: 20210721094211.69853-3-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 1793ad0247cad35db1ebbc04fbea0446c30a27ca) -Signed-off-by: Stefano Garzarella -Signed-off-by: Miroslav Rezanina ---- - include/block/aio.h | 12 +++++++++ - include/sysemu/iothread.h | 3 +++ - iothread.c | 55 +++++++++++++++++++++++++++++++++++---- - monitor/hmp-cmds.c | 2 ++ - qapi/misc.json | 6 ++++- - qapi/qom.json | 7 ++++- - qemu-options.hx | 8 ++++-- - util/aio-posix.c | 12 +++++++++ - util/aio-win32.c | 5 ++++ - util/async.c | 2 ++ - 10 files changed, 103 insertions(+), 9 deletions(-) - -diff --git a/include/block/aio.h b/include/block/aio.h -index 5f342267d5..ea68a139c8 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -232,6 +232,9 @@ struct AioContext { - int64_t poll_grow; /* polling time growth factor */ - int64_t poll_shrink; /* polling time shrink factor */ - -+ /* AIO engine parameters */ -+ int64_t aio_max_batch; /* maximum number of requests in a batch */ -+ - /* - * List of handlers participating in userspace polling. Protected by - * ctx->list_lock. Iterated and modified mostly by the event loop thread -@@ -727,4 +730,13 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, - int64_t grow, int64_t shrink, - Error **errp); - -+/** -+ * aio_context_set_aio_params: -+ * @ctx: the aio context -+ * @max_batch: maximum number of requests in a batch, 0 means that the -+ * engine will use its default -+ */ -+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, -+ Error **errp); -+ - #endif -diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h -index f177142f16..7f714bd136 100644 ---- a/include/sysemu/iothread.h -+++ b/include/sysemu/iothread.h -@@ -37,6 +37,9 @@ struct IOThread { - int64_t poll_max_ns; - int64_t poll_grow; - int64_t poll_shrink; -+ -+ /* AioContext AIO engine parameters */ -+ int64_t aio_max_batch; - }; - typedef struct IOThread IOThread; - -diff --git a/iothread.c b/iothread.c -index a12de6e455..272be5e146 100644 ---- a/iothread.c -+++ b/iothread.c -@@ -159,6 +159,24 @@ static void iothread_init_gcontext(IOThread *iothread) - iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); - } - -+static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) -+{ -+ ERRP_GUARD(); -+ -+ aio_context_set_poll_params(iothread->ctx, -+ iothread->poll_max_ns, -+ iothread->poll_grow, -+ iothread->poll_shrink, -+ errp); -+ if (*errp) { -+ return; -+ } -+ -+ aio_context_set_aio_params(iothread->ctx, -+ iothread->aio_max_batch, -+ errp); -+} -+ - static void iothread_complete(UserCreatable *obj, Error **errp) - { - Error *local_error = NULL; -@@ -178,11 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) - */ - iothread_init_gcontext(iothread); - -- aio_context_set_poll_params(iothread->ctx, -- iothread->poll_max_ns, -- iothread->poll_grow, -- iothread->poll_shrink, -- &local_error); -+ iothread_set_aio_context_params(iothread, &local_error); - if (local_error) { - error_propagate(errp, local_error); - aio_context_unref(iothread->ctx); -@@ -219,6 +233,9 @@ static PollParamInfo poll_grow_info = { - static PollParamInfo poll_shrink_info = { - "poll-shrink", offsetof(IOThread, poll_shrink), - }; -+static PollParamInfo aio_max_batch_info = { -+ "aio-max-batch", offsetof(IOThread, aio_max_batch), -+}; - - static void iothread_get_param(Object *obj, Visitor *v, - const char *name, void *opaque, Error **errp) -@@ -278,6 +295,29 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, - } - } - -+static void iothread_get_aio_param(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ -+ iothread_get_param(obj, v, name, opaque, errp); -+} -+ -+static void iothread_set_aio_param(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ IOThread *iothread = IOTHREAD(obj); -+ -+ if (!iothread_set_param(obj, v, name, opaque, errp)) { -+ return; -+ } -+ -+ if (iothread->ctx) { -+ aio_context_set_aio_params(iothread->ctx, -+ iothread->aio_max_batch, -+ errp); -+ } -+} -+ - static void iothread_class_init(ObjectClass *klass, void *class_data) - { - UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); -@@ -295,6 +335,10 @@ static void iothread_class_init(ObjectClass *klass, void *class_data) - iothread_get_poll_param, - iothread_set_poll_param, - NULL, &poll_shrink_info); -+ object_class_property_add(klass, "aio-max-batch", "int", -+ iothread_get_aio_param, -+ iothread_set_aio_param, -+ NULL, &aio_max_batch_info); - } - - static const TypeInfo iothread_info = { -@@ -344,6 +388,7 @@ static int query_one_iothread(Object *object, void *opaque) - info->poll_max_ns = iothread->poll_max_ns; - info->poll_grow = iothread->poll_grow; - info->poll_shrink = iothread->poll_shrink; -+ info->aio_max_batch = iothread->aio_max_batch; - - QAPI_LIST_APPEND(*tail, info); - return 0; -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index cc15d9b6ee..2905bc1594 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -1889,6 +1889,8 @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict) - monitor_printf(mon, " poll-max-ns=%" PRId64 "\n", value->poll_max_ns); - monitor_printf(mon, " poll-grow=%" PRId64 "\n", value->poll_grow); - monitor_printf(mon, " poll-shrink=%" PRId64 "\n", value->poll_shrink); -+ monitor_printf(mon, " aio-max-batch=%" PRId64 "\n", -+ value->aio_max_batch); - } - - qapi_free_IOThreadInfoList(info_list); -diff --git a/qapi/misc.json b/qapi/misc.json -index 156f98203e..5c2ca3b556 100644 ---- a/qapi/misc.json -+++ b/qapi/misc.json -@@ -86,6 +86,9 @@ - # @poll-shrink: how many ns will be removed from polling time, 0 means that - # it's not configured (since 2.9) - # -+# @aio-max-batch: maximum number of requests in a batch for the AIO engine, -+# 0 means that the engine will use its default (since 6.1) -+# - # Since: 2.0 - ## - { 'struct': 'IOThreadInfo', -@@ -93,7 +96,8 @@ - 'thread-id': 'int', - 'poll-max-ns': 'int', - 'poll-grow': 'int', -- 'poll-shrink': 'int' } } -+ 'poll-shrink': 'int', -+ 'aio-max-batch': 'int' } } - - ## - # @query-iothreads: -diff --git a/qapi/qom.json b/qapi/qom.json -index cd0e76d564..f361157903 100644 ---- a/qapi/qom.json -+++ b/qapi/qom.json -@@ -516,12 +516,17 @@ - # algorithm detects it is spending too long polling without - # encountering events. 0 selects a default behaviour (default: 0) - # -+# @aio-max-batch: maximum number of requests in a batch for the AIO engine, -+# 0 means that the engine will use its default -+# (default:0, since 6.1) -+# - # Since: 2.0 - ## - { 'struct': 'IothreadProperties', - 'data': { '*poll-max-ns': 'int', - '*poll-grow': 'int', -- '*poll-shrink': 'int' } } -+ '*poll-shrink': 'int', -+ '*aio-max-batch': 'int' } } - - ## - # @MemoryBackendProperties: -diff --git a/qemu-options.hx b/qemu-options.hx -index 79ca09feac..d5f1ec27c5 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -5185,7 +5185,7 @@ SRST - - CN=laptop.example.com,O=Example Home,L=London,ST=London,C=GB - -- ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink`` -+ ``-object iothread,id=id,poll-max-ns=poll-max-ns,poll-grow=poll-grow,poll-shrink=poll-shrink,aio-max-batch=aio-max-batch`` - Creates a dedicated event loop thread that devices can be - assigned to. This is known as an IOThread. By default device - emulation happens in vCPU threads or the main event loop thread. -@@ -5221,7 +5221,11 @@ SRST - the polling time when the algorithm detects it is spending too - long polling without encountering events. - -- The polling parameters can be modified at run-time using the -+ The ``aio-max-batch`` parameter is the maximum number of requests -+ in a batch for the AIO engine, 0 means that the engine will use -+ its default. -+ -+ The IOThread parameters can be modified at run-time using the - ``qom-set`` command (where ``iothread1`` is the IOThread's - ``id``): - -diff --git a/util/aio-posix.c b/util/aio-posix.c -index 30f5354b1e..2b86777e91 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -716,3 +716,15 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, - - aio_notify(ctx); - } -+ -+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, -+ Error **errp) -+{ -+ /* -+ * No thread synchronization here, it doesn't matter if an incorrect value -+ * is used once. -+ */ -+ ctx->aio_max_batch = max_batch; -+ -+ aio_notify(ctx); -+} -diff --git a/util/aio-win32.c b/util/aio-win32.c -index 168717b51b..d5b09a1193 100644 ---- a/util/aio-win32.c -+++ b/util/aio-win32.c -@@ -440,3 +440,8 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, - error_setg(errp, "AioContext polling is not implemented on Windows"); - } - } -+ -+void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, -+ Error **errp) -+{ -+} -diff --git a/util/async.c b/util/async.c -index 674dbefb7c..6a9588d86b 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -537,6 +537,8 @@ AioContext *aio_context_new(Error **errp) - ctx->poll_grow = 0; - ctx->poll_shrink = 0; - -+ ctx->aio_max_batch = 0; -+ - return ctx; - fail: - g_source_destroy(&ctx->source); --- -2.27.0 - diff --git a/kvm-iothread-generalize-iothread_set_param-iothread_get_.patch b/kvm-iothread-generalize-iothread_set_param-iothread_get_.patch deleted file mode 100644 index 8910da2..0000000 --- a/kvm-iothread-generalize-iothread_set_param-iothread_get_.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 6f827f890e68c3b8bda80822edc09369e93da01f Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 29 Jul 2021 07:42:29 -0400 -Subject: [PATCH 17/39] iothread: generalize - iothread_set_param/iothread_get_param - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [9/15] 7c624847cfc636bdfa0d4f35062500a7f9e8437f (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -Changes in preparation for next patches where we add a new -parameter not related to the poll mechanism. - -Let's add two new generic functions (iothread_set_param and -iothread_get_param) that we use to set and get IOThread -parameters. - -Signed-off-by: Stefano Garzarella -Message-id: 20210721094211.69853-2-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 0445409d7497bededa1047f0d8298b0d4bb3b1a3) -Signed-off-by: Stefano Garzarella -Signed-off-by: Miroslav Rezanina ---- - iothread.c | 27 +++++++++++++++++++++++---- - 1 file changed, 23 insertions(+), 4 deletions(-) - -diff --git a/iothread.c b/iothread.c -index 7f086387be..a12de6e455 100644 ---- a/iothread.c -+++ b/iothread.c -@@ -220,7 +220,7 @@ static PollParamInfo poll_shrink_info = { - "poll-shrink", offsetof(IOThread, poll_shrink), - }; - --static void iothread_get_poll_param(Object *obj, Visitor *v, -+static void iothread_get_param(Object *obj, Visitor *v, - const char *name, void *opaque, Error **errp) - { - IOThread *iothread = IOTHREAD(obj); -@@ -230,7 +230,7 @@ static void iothread_get_poll_param(Object *obj, Visitor *v, - visit_type_int64(v, name, field, errp); - } - --static void iothread_set_poll_param(Object *obj, Visitor *v, -+static bool iothread_set_param(Object *obj, Visitor *v, - const char *name, void *opaque, Error **errp) - { - IOThread *iothread = IOTHREAD(obj); -@@ -239,17 +239,36 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, - int64_t value; - - if (!visit_type_int64(v, name, &value, errp)) { -- return; -+ return false; - } - - if (value < 0) { - error_setg(errp, "%s value must be in range [0, %" PRId64 "]", - info->name, INT64_MAX); -- return; -+ return false; - } - - *field = value; - -+ return true; -+} -+ -+static void iothread_get_poll_param(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ -+ iothread_get_param(obj, v, name, opaque, errp); -+} -+ -+static void iothread_set_poll_param(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ IOThread *iothread = IOTHREAD(obj); -+ -+ if (!iothread_set_param(obj, v, name, opaque, errp)) { -+ return; -+ } -+ - if (iothread->ctx) { - aio_context_set_poll_params(iothread->ctx, - iothread->poll_max_ns, --- -2.27.0 - diff --git a/kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch b/kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch deleted file mode 100644 index 93fcc9b..0000000 --- a/kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 6f4cb3e1e5d718356f16645e806d47cb2159ae98 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 29 Jul 2021 07:42:33 -0400 -Subject: [PATCH 19/39] linux-aio: limit the batch size using `aio-max-batch` - parameter - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [11/15] 44e2f2d294d8ed1d13fb29c5c1599543b86c67e5 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -When there are multiple queues attached to the same AIO context, -some requests may experience high latency, since in the worst case -the AIO engine queue is only flushed when it is full (MAX_EVENTS) or -there are no more queues plugged. - -Commit 2558cb8dd4 ("linux-aio: increasing MAX_EVENTS to a larger -hardcoded value") changed MAX_EVENTS from 128 to 1024, to increase -the number of in-flight requests. But this change also increased -the potential maximum batch to 1024 elements. - -When there is a single queue attached to the AIO context, the issue -is mitigated from laio_io_unplug() that will flush the queue every -time is invoked since there can't be others queue plugged. - -Let's use the new `aio-max-batch` IOThread parameter to mitigate -this issue, limiting the number of requests in a batch. - -We also define a default value (32): this value is obtained running -some benchmarks and it represents a good tradeoff between the latency -increase while a request is queued and the cost of the io_submit(2) -system call. - -Signed-off-by: Stefano Garzarella -Message-id: 20210721094211.69853-4-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit d7ddd0a1618a75b31dc308bb37365ce1da972154) -Signed-off-by: Stefano Garzarella -Signed-off-by: Miroslav Rezanina ---- - block/linux-aio.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/block/linux-aio.c b/block/linux-aio.c -index 3c0527c2bf..0dab507b71 100644 ---- a/block/linux-aio.c -+++ b/block/linux-aio.c -@@ -28,6 +28,9 @@ - */ - #define MAX_EVENTS 1024 - -+/* Maximum number of requests in a batch. (default value) */ -+#define DEFAULT_MAX_BATCH 32 -+ - struct qemu_laiocb { - Coroutine *co; - LinuxAioState *ctx; -@@ -351,6 +354,10 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, - LinuxAioState *s = laiocb->ctx; - struct iocb *iocbs = &laiocb->iocb; - QEMUIOVector *qiov = laiocb->qiov; -+ int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH; -+ -+ /* limit the batch with the number of available events */ -+ max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch); - - switch (type) { - case QEMU_AIO_WRITE: -@@ -371,7 +378,7 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, - s->io_q.in_queue++; - if (!s->io_q.blocked && - (!s->io_q.plugged || -- s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) { -+ s->io_q.in_queue >= max_batch)) { - ioq_submit(s); - } - --- -2.27.0 - diff --git a/kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch b/kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch deleted file mode 100644 index 980466c..0000000 --- a/kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 5e1535771bba299aae4de2d810100fa7fedfeca8 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 29 Jun 2021 14:13:56 -0400 -Subject: [PATCH 02/39] migration: Allow reset of postcopy_recover_triggered - when failed -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 25: migration: Move yank outside qemu_start_incoming_migration() -RH-Commit: [2/2] b766a7f36df4f889d74a2e8d518e1100e52ab726 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1974683 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu > - -It's possible qemu_start_incoming_migration() failed at any point, when it -happens we should reset postcopy_recover_triggered to false so that the user -can still retry with a saner incoming port. - -Signed-off-by: Peter Xu -Message-Id: <20210629181356.217312-3-peterx@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b7f9afd48e7bc5c341e55348f2c2eed08314be7d) -Fixes: b5eea99e ("migration: Add yank feature", 2021-01-13) -Signed-off-by: Leonardo Bras -Signed-off-by: Miroslav Rezanina ---- - migration/migration.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/migration/migration.c b/migration/migration.c -index f077640df2..9d185f0e28 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2109,6 +2109,13 @@ void qmp_migrate_recover(const char *uri, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - -+ /* -+ * Don't even bother to use ERRP_GUARD() as it _must_ always be set by -+ * callers (no one should ignore a recover failure); if there is, it's a -+ * programming error. -+ */ -+ assert(errp); -+ - if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) { - error_setg(errp, "Migrate recover can only be run " - "when postcopy is paused."); -@@ -2127,6 +2134,12 @@ void qmp_migrate_recover(const char *uri, Error **errp) - * to continue using that newly established channel. - */ - qemu_start_incoming_migration(uri, errp); -+ -+ /* Safe to dereference with the assert above */ -+ if (*errp) { -+ /* Reset the flag so user could still retry */ -+ qatomic_set(&mis->postcopy_recover_triggered, false); -+ } - } - - void qmp_migrate_pause(Error **errp) --- -2.27.0 - diff --git a/kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch b/kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch deleted file mode 100644 index d163571..0000000 --- a/kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 7726f6461eebf2c4a4b129f1c98add25c0b1bee2 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Thu, 29 Jul 2021 07:42:16 -0400 -Subject: [PATCH 12/39] migration: Move bitmap_mutex out of - migration_bitmap_clear_dirty() - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [4/15] cc207372dab253a4db3b6d351fa2fb2f442437ad (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -Taking the mutex every time for each dirty bit to clear is too slow, especially -we'll take/release even if the dirty bit is cleared. So far it's only used to -sync with special cases with qemu_guest_free_page_hint() against migration -thread, nothing really that serious yet. Let's move the lock to be upper. - -There're two callers of migration_bitmap_clear_dirty(). - -For migration, move it into ram_save_iterate(). With the help of MAX_WAIT -logic, we'll only run ram_save_iterate() for no more than 50ms-ish time, so -taking the lock once there at the entry. It also means any call sites to -qemu_guest_free_page_hint() can be delayed; but it should be very rare, only -during migration, and I don't see a problem with it. - -For COLO, move it up to colo_flush_ram_cache(). I think COLO forgot to take -that lock even when calling ramblock_sync_dirty_bitmap(), where another example -is migration_bitmap_sync() who took it right. So let the mutex cover both the -ramblock_sync_dirty_bitmap() and migration_bitmap_clear_dirty() calls. - -It's even possible to drop the lock so we use atomic operations upon rb->bmap -and the variable migration_dirty_pages. I didn't do it just to still be safe, -also not predictable whether the frequent atomic ops could bring overhead too -e.g. on huge vms when it happens very often. When that really comes, we can -keep a local counter and periodically call atomic ops. Keep it simple for now. - -Cc: Wei Wang -Cc: David Hildenbrand -Cc: Hailiang Zhang -Cc: Dr. David Alan Gilbert -Cc: Juan Quintela -Cc: Leonardo Bras Soares Passos -Signed-off-by: Peter Xu -Message-Id: <20210630200805.280905-1-peterx@redhat.com> -Reviewed-by: Wei Wang -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 63268c4970a5f126cc9af75f3ccb8057abef5ec0) -Signed-off-by: Peter Xu -Signed-off-by: Miroslav Rezanina ---- - migration/ram.c | 13 +++++++++++-- - 1 file changed, 11 insertions(+), 2 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 4682f3625c..5d64917dce 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -819,8 +819,6 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, - { - bool ret; - -- QEMU_LOCK_GUARD(&rs->bitmap_mutex); -- - /* - * Clear dirty bitmap if needed. This _must_ be called before we - * send any of the page in the chunk because we need to make sure -@@ -2869,6 +2867,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - goto out; - } - -+ /* -+ * We'll take this lock a little bit long, but it's okay for two reasons. -+ * Firstly, the only possible other thread to take it is who calls -+ * qemu_guest_free_page_hint(), which should be rare; secondly, see -+ * MAX_WAIT (if curious, further see commit 4508bd9ed8053ce) below, which -+ * guarantees that we'll at least released it in a regular basis. -+ */ -+ qemu_mutex_lock(&rs->bitmap_mutex); - WITH_RCU_READ_LOCK_GUARD() { - if (ram_list.version != rs->last_version) { - ram_state_reset(rs); -@@ -2928,6 +2934,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - i++; - } - } -+ qemu_mutex_unlock(&rs->bitmap_mutex); - - /* - * Must occur before EOS (or any QEMUFile operation) -@@ -3710,6 +3717,7 @@ void colo_flush_ram_cache(void) - unsigned long offset = 0; - - memory_global_dirty_log_sync(); -+ qemu_mutex_lock(&ram_state->bitmap_mutex); - WITH_RCU_READ_LOCK_GUARD() { - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - ramblock_sync_dirty_bitmap(ram_state, block); -@@ -3738,6 +3746,7 @@ void colo_flush_ram_cache(void) - } - } - trace_colo_flush_ram_cache_end(); -+ qemu_mutex_unlock(&ram_state->bitmap_mutex); - } - - /** --- -2.27.0 - diff --git a/kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch b/kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch deleted file mode 100644 index 9822ab6..0000000 --- a/kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 4c11e06222ca5a88f48f2d47adc3a7da306bb345 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 29 Jun 2021 14:13:55 -0400 -Subject: [PATCH 01/39] migration: Move yank outside - qemu_start_incoming_migration() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 25: migration: Move yank outside qemu_start_incoming_migration() -RH-Commit: [1/2] e5694b0ae9a55f6b147c336e86fce6f4f2163db6 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1974683 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu > - -Starting from commit b5eea99ec2f5c, qmp_migrate_recover() calls unregister -before calling qemu_start_incoming_migration(). I believe it wanted to mitigate -the next call to yank_register_instance(), but I think that's wrong. - -Firstly, if during recover, we should keep the yank instance there, not -"quickly removing and adding it back". - -Meanwhile, calling qmp_migrate_recover() twice with b5eea99ec2f5c will directly -crash the dest qemu (right now it can't; but it'll start to work right after -the next patch) because the 1st call of qmp_migrate_recover() will unregister -permanently when the channel failed to establish, then the 2nd call of -qmp_migrate_recover() crashes at yank_unregister_instance(). - -This patch fixes it by moving yank ops out of qemu_start_incoming_migration() -into qmp_migrate_incoming. For qmp_migrate_recover(), drop the unregister of -yank instance too since we keep it there during the recovery phase. - -Signed-off-by: Peter Xu -Reviewed-by: Dr. David Alan Gilbert -Message-Id: <20210629181356.217312-2-peterx@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit cc48c587d25ff5dd7dddb4e5072de9ca8464c832) -Fixes: b5eea99e ("migration: Add yank feature", 2021-01-13) -Signed-off-by: Leonardo Bras -Signed-off-by: Miroslav Rezanina ---- - migration/migration.c | 11 +++++------ - 1 file changed, 5 insertions(+), 6 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 4afc6069b6..f077640df2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -454,10 +454,6 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) - { - const char *p = NULL; - -- if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { -- return; -- } -- - qapi_event_send_migration(MIGRATION_STATUS_SETUP); - if (strstart(uri, "tcp:", &p) || - strstart(uri, "unix:", NULL) || -@@ -472,7 +468,6 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) - } else if (strstart(uri, "fd:", &p)) { - fd_start_incoming_migration(p, errp); - } else { -- yank_unregister_instance(MIGRATION_YANK_INSTANCE); - error_setg(errp, "unknown migration protocol: %s", uri); - } - } -@@ -2095,9 +2090,14 @@ void qmp_migrate_incoming(const char *uri, Error **errp) - return; - } - -+ if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) { -+ return; -+ } -+ - qemu_start_incoming_migration(uri, &local_err); - - if (local_err) { -+ yank_unregister_instance(MIGRATION_YANK_INSTANCE); - error_propagate(errp, local_err); - return; - } -@@ -2126,7 +2126,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) - * only re-setup the migration stream and poke existing migration - * to continue using that newly established channel. - */ -- yank_unregister_instance(MIGRATION_YANK_INSTANCE); - qemu_start_incoming_migration(uri, errp); - } - --- -2.27.0 - diff --git a/kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch b/kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch deleted file mode 100644 index c0cf7e6..0000000 --- a/kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 0f41423f2bb57e77400cfec0683b7514c9798c55 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 4 Aug 2021 07:10:13 -0400 -Subject: [PATCH 22/39] migration: failover: continue to wait card unplug on - error - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [14/15] e3bfdeee808b2ecd5b141464193b6ee97a4bca3a (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -If the user cancels the migration in the unplug-wait state, -QEMU will try to plug back the card and this fails because the card -is partially unplugged. -To avoid the problem, continue to wait the card unplug, but to -allow the migration to be canceled if the card never finishes to unplug -use a timeout. - -Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1976852 -Signed-off-by: Laurent Vivier -Reviewed-by: Dr. David Alan Gilbert -Message-Id: <20210629155007.629086-3-lvivier@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 944bc528421aa848ca218ee535ea923a4147a525) -Signed-off-by: Laurent Vivier -Signed-off-by: Miroslav Rezanina ---- - migration/migration.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/migration/migration.c b/migration/migration.c -index 58df1dac05..fc1d0db04a 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3696,6 +3696,17 @@ static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, - qemu_savevm_state_guest_unplug_pending()) { - qemu_sem_timedwait(&s->wait_unplug_sem, 250); - } -+ if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) { -+ int timeout = 120; /* 30 seconds */ -+ /* -+ * migration has been canceled -+ * but as we have started an unplug we must wait the end -+ * to be able to plug back the card -+ */ -+ while (timeout-- && qemu_savevm_state_guest_unplug_pending()) { -+ qemu_sem_timedwait(&s->wait_unplug_sem, 250); -+ } -+ } - - migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); - } else { --- -2.27.0 - diff --git a/kvm-migration-failover-reset-partially_hotplugged.patch b/kvm-migration-failover-reset-partially_hotplugged.patch deleted file mode 100644 index 97fb427..0000000 --- a/kvm-migration-failover-reset-partially_hotplugged.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 2e07c4c66a1199ef33fb2e89164e03ca2acdcf10 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 29 Jul 2021 07:42:12 -0400 -Subject: [PATCH 10/39] migration: failover: reset partially_hotplugged - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [2/15] b01f5640ce93192b2239ad4ef15ff53d977f2341 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -When the card is plugged back, reset the partially_hotplugged flag to false - -Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1787194 -Signed-off-by: Laurent Vivier -Message-Id: <20210629152937.619193-1-lvivier@redhat.com> -Reviewed-by: Juan Quintela -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 109c20ea28cc0d82fa353e692345b172cb5721cc) -Signed-off-by: Laurent Vivier -Signed-off-by: Miroslav Rezanina ---- - hw/net/virtio-net.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 914051feb7..c2e32fedbf 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3134,6 +3134,7 @@ static bool failover_replug_primary(VirtIONet *n, DeviceState *dev, - } - hotplug_handler_plug(hotplug_ctrl, dev, &err); - } -+ pdev->partially_hotplugged = false; - - out: - error_propagate(errp, err); --- -2.27.0 - diff --git a/kvm-migration-move-wait-unplug-loop-to-its-own-function.patch b/kvm-migration-move-wait-unplug-loop-to-its-own-function.patch deleted file mode 100644 index 524cda3..0000000 --- a/kvm-migration-move-wait-unplug-loop-to-its-own-function.patch +++ /dev/null @@ -1,118 +0,0 @@ -From e9848f4a4f45960bff1a2a7bc4a4670670de37a5 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 4 Aug 2021 07:10:12 -0400 -Subject: [PATCH 21/39] migration: move wait-unplug loop to its own function - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [13/15] ab3f26a86e4ea955678323608512e54af2a7c83b (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -The loop is used in migration_thread() and bg_migration_thread(), -so we can move it to its own function and call it from these both places. - -Moreover, in migration_thread() we have a wrong state transition from -SETUP to ACTIVE while state could be WAIT_UNPLUG. This is correctly -managed in bg_migration_thread() so use this code instead. - -Signed-off-by: Laurent Vivier -Message-Id: <20210629155007.629086-2-lvivier@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit fde93d99d9c208c07e2dcc24cd04e824d2b65b35) -Signed-off-by: Laurent Vivier -Signed-off-by: Miroslav Rezanina ---- - migration/migration.c | 54 +++++++++++++++++++++---------------------- - 1 file changed, 26 insertions(+), 28 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 9cf1cde39d..58df1dac05 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3681,6 +3681,28 @@ bool migration_rate_limit(void) - return urgent; - } - -+/* -+ * if failover devices are present, wait they are completely -+ * unplugged -+ */ -+ -+static void qemu_savevm_wait_unplug(MigrationState *s, int old_state, -+ int new_state) -+{ -+ if (qemu_savevm_state_guest_unplug_pending()) { -+ migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG); -+ -+ while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && -+ qemu_savevm_state_guest_unplug_pending()) { -+ qemu_sem_timedwait(&s->wait_unplug_sem, 250); -+ } -+ -+ migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state); -+ } else { -+ migrate_set_state(&s->state, old_state, new_state); -+ } -+} -+ - /* - * Master migration thread on the source VM. - * It drives the migration and pumps the data down the outgoing channel. -@@ -3727,22 +3749,10 @@ static void *migration_thread(void *opaque) - - qemu_savevm_state_setup(s->to_dst_file); - -- if (qemu_savevm_state_guest_unplug_pending()) { -- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, -- MIGRATION_STATUS_WAIT_UNPLUG); -- -- while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && -- qemu_savevm_state_guest_unplug_pending()) { -- qemu_sem_timedwait(&s->wait_unplug_sem, 250); -- } -- -- migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, -- MIGRATION_STATUS_ACTIVE); -- } -+ qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, -+ MIGRATION_STATUS_ACTIVE); - - s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; -- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, -- MIGRATION_STATUS_ACTIVE); - - trace_migration_thread_setup_complete(); - -@@ -3850,21 +3860,9 @@ static void *bg_migration_thread(void *opaque) - qemu_savevm_state_header(s->to_dst_file); - qemu_savevm_state_setup(s->to_dst_file); - -- if (qemu_savevm_state_guest_unplug_pending()) { -- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, -- MIGRATION_STATUS_WAIT_UNPLUG); -- -- while (s->state == MIGRATION_STATUS_WAIT_UNPLUG && -- qemu_savevm_state_guest_unplug_pending()) { -- qemu_sem_timedwait(&s->wait_unplug_sem, 250); -- } -+ qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, -+ MIGRATION_STATUS_ACTIVE); - -- migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, -- MIGRATION_STATUS_ACTIVE); -- } else { -- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, -- MIGRATION_STATUS_ACTIVE); -- } - s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; - - trace_migration_thread_setup_complete(); --- -2.27.0 - diff --git a/kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch b/kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch deleted file mode 100644 index af8a82c..0000000 --- a/kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch +++ /dev/null @@ -1,191 +0,0 @@ -From 9182af6a819e60a079349fd6d8b28a28adea90b1 Mon Sep 17 00:00:00 2001 -From: Sergio Lopez Pascual -Date: Thu, 17 Jun 2021 09:13:21 -0400 -Subject: [PATCH 06/12] nbd/server: Use drained block ops to quiesce the server -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 -RH-Commit: [4/8] ca32c99563254a8a31104948e41fa691453d0399 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Daniel P. Berrangé - -Before switching between AioContexts we need to make sure that we're -fully quiesced ("nb_requests == 0" for every client) when entering the -drained section. - -To do this, we set "quiescing = true" for every client on -".drained_begin" to prevent new coroutines from being created, and -check if "nb_requests == 0" on ".drained_poll". Finally, once we're -exiting the drained section, on ".drained_end" we set "quiescing = -false" and call "nbd_client_receive_next_request()" to resume the -processing of new requests. - -With these changes, "blk_aio_attach()" and "blk_aio_detach()" can be -reverted to be as simple as they were before f148ae7d36. - -RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1960137 -Suggested-by: Kevin Wolf -Signed-off-by: Sergio Lopez -Message-Id: <20210602060552.17433-3-slp@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit fd6afc501a019682d1b8468b562355a2887087bd) -Signed-off-by: Sergio Lopez -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - nbd/server.c | 82 ++++++++++++++++++++++++++++++++++++++-------------- - 1 file changed, 61 insertions(+), 21 deletions(-) - -diff --git a/nbd/server.c b/nbd/server.c -index 86a44a9b41..b60ebc3ab6 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1513,6 +1513,11 @@ static void nbd_request_put(NBDRequestData *req) - g_free(req); - - client->nb_requests--; -+ -+ if (client->quiescing && client->nb_requests == 0) { -+ aio_wait_kick(); -+ } -+ - nbd_client_receive_next_request(client); - - nbd_client_put(client); -@@ -1530,49 +1535,68 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) - QTAILQ_FOREACH(client, &exp->clients, next) { - qio_channel_attach_aio_context(client->ioc, ctx); - -+ assert(client->nb_requests == 0); - assert(client->recv_coroutine == NULL); - assert(client->send_coroutine == NULL); -- -- if (client->quiescing) { -- client->quiescing = false; -- nbd_client_receive_next_request(client); -- } - } - } - --static void nbd_aio_detach_bh(void *opaque) -+static void blk_aio_detach(void *opaque) - { - NBDExport *exp = opaque; - NBDClient *client; - -+ trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); -+ - QTAILQ_FOREACH(client, &exp->clients, next) { - qio_channel_detach_aio_context(client->ioc); -+ } -+ -+ exp->common.ctx = NULL; -+} -+ -+static void nbd_drained_begin(void *opaque) -+{ -+ NBDExport *exp = opaque; -+ NBDClient *client; -+ -+ QTAILQ_FOREACH(client, &exp->clients, next) { - client->quiescing = true; -+ } -+} - -- if (client->recv_coroutine) { -- if (client->read_yielding) { -- qemu_aio_coroutine_enter(exp->common.ctx, -- client->recv_coroutine); -- } else { -- AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL); -- } -- } -+static void nbd_drained_end(void *opaque) -+{ -+ NBDExport *exp = opaque; -+ NBDClient *client; - -- if (client->send_coroutine) { -- AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL); -- } -+ QTAILQ_FOREACH(client, &exp->clients, next) { -+ client->quiescing = false; -+ nbd_client_receive_next_request(client); - } - } - --static void blk_aio_detach(void *opaque) -+static bool nbd_drained_poll(void *opaque) - { - NBDExport *exp = opaque; -+ NBDClient *client; - -- trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); -+ QTAILQ_FOREACH(client, &exp->clients, next) { -+ if (client->nb_requests != 0) { -+ /* -+ * If there's a coroutine waiting for a request on nbd_read_eof() -+ * enter it here so we don't depend on the client to wake it up. -+ */ -+ if (client->recv_coroutine != NULL && client->read_yielding) { -+ qemu_aio_coroutine_enter(exp->common.ctx, -+ client->recv_coroutine); -+ } - -- aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp); -+ return true; -+ } -+ } - -- exp->common.ctx = NULL; -+ return false; - } - - static void nbd_eject_notifier(Notifier *n, void *data) -@@ -1594,6 +1618,12 @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk) - blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier); - } - -+static const BlockDevOps nbd_block_ops = { -+ .drained_begin = nbd_drained_begin, -+ .drained_end = nbd_drained_end, -+ .drained_poll = nbd_drained_poll, -+}; -+ - static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, - Error **errp) - { -@@ -1715,8 +1745,17 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, - - exp->allocation_depth = arg->allocation_depth; - -+ /* -+ * We need to inhibit request queuing in the block layer to ensure we can -+ * be properly quiesced when entering a drained section, as our coroutines -+ * servicing pending requests might enter blk_pread(). -+ */ -+ blk_set_disable_request_queuing(blk, true); -+ - blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp); - -+ blk_set_dev_ops(blk, &nbd_block_ops, exp); -+ - QTAILQ_INSERT_TAIL(&exports, exp, next); - - return 0; -@@ -1788,6 +1827,7 @@ static void nbd_export_delete(BlockExport *blk_exp) - } - blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached, - blk_aio_detach, exp); -+ blk_set_disable_request_queuing(exp->common.blk, false); - } - - for (i = 0; i < exp->nr_export_bitmaps; i++) { --- -2.27.0 - diff --git a/kvm-osdep-provide-ROUND_DOWN-macro.patch b/kvm-osdep-provide-ROUND_DOWN-macro.patch deleted file mode 100644 index cf8a1a3..0000000 --- a/kvm-osdep-provide-ROUND_DOWN-macro.patch +++ /dev/null @@ -1,75 +0,0 @@ -From d9fa07a04ee19ad713b053f6a649178361d822a8 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 16 Jul 2021 16:51:31 -0400 -Subject: [PATCH 15/43] osdep: provide ROUND_DOWN macro - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -osdep.h provides a ROUND_UP macro to hide bitwise operations for the -purpose of rounding a number up to a power of two; add a ROUND_DOWN -macro that does the same with truncation towards zero. - -While at it, change the formatting of some comments. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit c9797456f64ce72c03eb2969d97ac1dd4698d91e) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - include/qemu/osdep.h | 28 ++++++++++++++++++++++------ - 1 file changed, 22 insertions(+), 6 deletions(-) - -diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h -index cb2a07e472..e327220992 100644 ---- a/include/qemu/osdep.h -+++ b/include/qemu/osdep.h -@@ -316,11 +316,16 @@ extern "C" { - }) - #endif - --/* Round number down to multiple */ -+/* -+ * Round number down to multiple. Safe when m is not a power of 2 (see -+ * ROUND_DOWN for a faster version when a power of 2 is guaranteed). -+ */ - #define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m)) - --/* Round number up to multiple. Safe when m is not a power of 2 (see -- * ROUND_UP for a faster version when a power of 2 is guaranteed) */ -+/* -+ * Round number up to multiple. Safe when m is not a power of 2 (see -+ * ROUND_UP for a faster version when a power of 2 is guaranteed). -+ */ - #define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) - - /* Check if n is a multiple of m */ -@@ -337,11 +342,22 @@ extern "C" { - /* Check if pointer p is n-bytes aligned */ - #define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n)) - --/* Round number up to multiple. Requires that d be a power of 2 (see -+/* -+ * Round number down to multiple. Requires that d be a power of 2 (see - * QEMU_ALIGN_UP for a safer but slower version on arbitrary -- * numbers); works even if d is a smaller type than n. */ -+ * numbers); works even if d is a smaller type than n. -+ */ -+#ifndef ROUND_DOWN -+#define ROUND_DOWN(n, d) ((n) & -(0 ? (n) : (d))) -+#endif -+ -+/* -+ * Round number up to multiple. Requires that d be a power of 2 (see -+ * QEMU_ALIGN_UP for a safer but slower version on arbitrary -+ * numbers); works even if d is a smaller type than n. -+ */ - #ifndef ROUND_UP --#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d))) -+#define ROUND_UP(n, d) ROUND_DOWN((n) + (d) - 1, (d)) - #endif - - #ifndef DIV_ROUND_UP --- -2.27.0 - diff --git a/kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch b/kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch deleted file mode 100644 index e443376..0000000 --- a/kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 9da71839fdc4e8b9d034998b0ed8a7ee9dfe7645 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Sun, 2 May 2021 13:22:21 +0200 -Subject: [PATCH 34/39] pc-bios/s390-ccw: Allow building with Clang, too -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack -RH-Commit: [7/11] d7c510f1e5f6434f6b3e4bab5b5f75403cbc7e1b (jmaloy/qemu-kvm-centos-jon) -RH-Bugzilla: 1939509 1940132 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -Clang unfortunately does not support generating code for the z900 -architecture level and starts with the z10 instead. Thus to be able -to support compiling with Clang, we have to check for the supported -compiler flags. The disadvantage is of course that the bios image -will only run with z10 guest CPUs upwards (which is what most people -use anyway), so just in case let's also emit a warning in that case -(we will continue to ship firmware images that have been pre-built -with GCC in future releases, so this should not impact normal users, -too). - -Message-Id: <20210502174836.838816-5-thuth@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Cornelia Huck -Signed-off-by: Thomas Huth -(cherry picked from commit a5b2afd522dde375c38cf94b7c696ffa3faba2fb) -Signed-off-by: Jon Maloy -Signed-off-by: Miroslav Rezanina ---- - configure | 9 ++++++++- - pc-bios/s390-ccw/Makefile | 3 ++- - 2 files changed, 10 insertions(+), 2 deletions(-) - -diff --git a/configure b/configure -index 7edc08afb3..83d8af7fe4 100755 ---- a/configure -+++ b/configure -@@ -5424,9 +5424,16 @@ if { test "$cpu" = "i386" || test "$cpu" = "x86_64"; } && \ - fi - - # Only build s390-ccw bios if we're on s390x and the compiler has -march=z900 -+# or -march=z10 (which is the lowest architecture level that Clang supports) - if test "$cpu" = "s390x" ; then - write_c_skeleton -- if compile_prog "-march=z900" ""; then -+ compile_prog "-march=z900" "" -+ has_z900=$? -+ if [ $has_z900 = 0 ] || compile_prog "-march=z10" ""; then -+ if [ $has_z900 != 0 ]; then -+ echo "WARNING: Your compiler does not support the z900!" -+ echo " The s390-ccw bios will only work with guest CPUs >= z10." -+ fi - roms="$roms s390-ccw" - # SLOF is required for building the s390-ccw firmware on s390x, - # since it is using the libnet code from SLOF for network booting. -diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile -index 83fb1afb73..cee9d2c63b 100644 ---- a/pc-bios/s390-ccw/Makefile -+++ b/pc-bios/s390-ccw/Makefile -@@ -34,7 +34,8 @@ QEMU_CFLAGS += $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-stringop-overflow) - QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -fno-common -fPIE - QEMU_CFLAGS += -fwrapv -fno-strict-aliasing -fno-asynchronous-unwind-tables - QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), -fno-stack-protector) --QEMU_CFLAGS += -msoft-float -march=z900 -+QEMU_CFLAGS += -msoft-float -+QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS),-march=z900,-march=z10) - QEMU_CFLAGS += -std=gnu99 - LDFLAGS += -Wl,-pie -nostdlib - --- -2.27.0 - diff --git a/kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch b/kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch deleted file mode 100644 index aeca11f..0000000 --- a/kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch +++ /dev/null @@ -1,106 +0,0 @@ -From c783eab8a3770703a39bbbd7edd23af7b9cb8f14 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 12 May 2021 19:15:48 +0200 -Subject: [PATCH 35/39] pc-bios/s390-ccw: Fix inline assembly for older - versions of Clang -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack -RH-Commit: [8/11] 9a69c7705b8128b4098f818c6b672d484e459c83 (jmaloy/qemu-kvm-centos-jon) -RH-Bugzilla: 1939509 1940132 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -Clang versions before v11.0 insist on having the %rX or %cX register -names instead of just a number. Since our Travis-CI is currently -still using Clang v6.0, we have to fix this to avoid failing jobs. - -Message-Id: <20210512171550.476130-2-thuth@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth -(cherry picked from commit 052b66e7211af64964e005126eaa3c944b296b0e) -Signed-off-by: Jon Maloy -Signed-off-by: Miroslav Rezanina ---- - pc-bios/s390-ccw/helper.h | 2 +- - pc-bios/s390-ccw/jump2ipl.c | 4 ++-- - pc-bios/s390-ccw/menu.c | 8 ++++---- - pc-bios/s390-ccw/virtio.c | 2 +- - 4 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/pc-bios/s390-ccw/helper.h b/pc-bios/s390-ccw/helper.h -index dfcfea0ff0..3d0731c4c6 100644 ---- a/pc-bios/s390-ccw/helper.h -+++ b/pc-bios/s390-ccw/helper.h -@@ -31,7 +31,7 @@ static inline void *u32toptr(uint32_t n) - - static inline void yield(void) - { -- asm volatile ("diag 0,0,0x44" -+ asm volatile ("diag %%r0,%%r0,0x44" - : : - : "memory", "cc"); - } -diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c -index 73e4367e09..78f5f46533 100644 ---- a/pc-bios/s390-ccw/jump2ipl.c -+++ b/pc-bios/s390-ccw/jump2ipl.c -@@ -64,8 +64,8 @@ void jump_to_IPL_code(uint64_t address) - * We use the load normal reset to keep r15 unchanged. jump_to_IPL_2 - * can then use r15 as its stack pointer. - */ -- asm volatile("lghi 1,1\n\t" -- "diag 1,1,0x308\n\t" -+ asm volatile("lghi %%r1,1\n\t" -+ "diag %%r1,%%r1,0x308\n\t" - : : : "1", "memory"); - panic("\n! IPL returns !\n"); - } -diff --git a/pc-bios/s390-ccw/menu.c b/pc-bios/s390-ccw/menu.c -index de8260a5d6..d601952d3e 100644 ---- a/pc-bios/s390-ccw/menu.c -+++ b/pc-bios/s390-ccw/menu.c -@@ -36,9 +36,9 @@ static inline void enable_clock_int(void) - uint64_t tmp = 0; - - asm volatile( -- "stctg 0,0,%0\n" -+ "stctg %%c0,%%c0,%0\n" - "oi 6+%0, 0x8\n" -- "lctlg 0,0,%0" -+ "lctlg %%c0,%%c0,%0" - : : "Q" (tmp) : "memory" - ); - } -@@ -48,9 +48,9 @@ static inline void disable_clock_int(void) - uint64_t tmp = 0; - - asm volatile( -- "stctg 0,0,%0\n" -+ "stctg %%c0,%%c0,%0\n" - "ni 6+%0, 0xf7\n" -- "lctlg 0,0,%0" -+ "lctlg %%c0,%%c0,%0" - : : "Q" (tmp) : "memory" - ); - } -diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c -index ab49840db8..5d2c6e3381 100644 ---- a/pc-bios/s390-ccw/virtio.c -+++ b/pc-bios/s390-ccw/virtio.c -@@ -54,7 +54,7 @@ static long kvm_hypercall(unsigned long nr, unsigned long param1, - register ulong r_param3 asm("4") = param3; - register long retval asm("2"); - -- asm volatile ("diag 2,4,0x500" -+ asm volatile ("diag %%r2,%%r4,0x500" - : "=d" (retval) - : "d" (r_nr), "0" (r_param1), "r"(r_param2), "d"(r_param3) - : "memory", "cc"); --- -2.27.0 - diff --git a/kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch b/kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch deleted file mode 100644 index 06d0d4d..0000000 --- a/kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch +++ /dev/null @@ -1,48 +0,0 @@ -From f2d40216872a40bc5f5089de760c7ba0e3a710bc Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Sun, 2 May 2021 13:07:46 +0200 -Subject: [PATCH 32/39] pc-bios/s390-ccw: Fix the cc-option macro in the - Makefile - -RH-Author: Jon Maloy -RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack -RH-Commit: [5/11] 75379671567451e12ca32a3ea35d1ad2aa04bf5f (jmaloy/qemu-kvm-centos-jon) -RH-Bugzilla: 1939509 1940132 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -The cc-option macro is not doing what it should - compared with the -original from the rules.mak file that got removed with commit -660f793093 ("Makefile: inline the relevant parts of rules.mak"), -the arguments got changed and thus the macro is rather doubling -the QEMU_CFLAGS than adding the flag that should be tested. - -Message-Id: <20210502174836.838816-3-thuth@redhat.com> -Fixes: 22fb2ab096 ("pc-bios/s390-ccw: do not use rules.mak") -Signed-off-by: Thomas Huth -(cherry picked from commit 3462ff35512e925df5ee8c079ed46d4c93b633a7) -Signed-off-by: Jon Maloy -Signed-off-by: Miroslav Rezanina ---- - pc-bios/s390-ccw/Makefile | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile -index 29fd9019b8..f0fe84c9eb 100644 ---- a/pc-bios/s390-ccw/Makefile -+++ b/pc-bios/s390-ccw/Makefile -@@ -6,8 +6,8 @@ include ../../config-host.mak - CFLAGS = -O2 -g - - quiet-command = $(if $(V),$1,$(if $(2),@printf " %-7s %s\n" $2 $3 && $1, @$1)) --cc-option = $(if $(shell $(CC) $1 -S -o /dev/null -xc /dev/null > /dev/null \ -- 2>&1 && echo OK), $1, $2) -+cc-option = $(if $(shell $(CC) $1 $2 -S -o /dev/null -xc /dev/null \ -+ >/dev/null 2>&1 && echo OK),$2,$3) - - VPATH_SUFFIXES = %.c %.h %.S %.m %.mak %.sh %.rc Kconfig% %.json.in - set-vpath = $(if $1,$(foreach PATTERN,$(VPATH_SUFFIXES),$(eval vpath $(PATTERN) $1))) --- -2.27.0 - diff --git a/kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch b/kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch deleted file mode 100644 index ee9f702..0000000 --- a/kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch +++ /dev/null @@ -1,75 +0,0 @@ -From c5b348e6d0334333295332c55fc4be51ce2668b8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Thu, 22 Apr 2021 16:59:11 +0200 -Subject: [PATCH 33/39] pc-bios/s390-ccw: Silence GCC 11 stringop-overflow - warning -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack -RH-Commit: [6/11] 92851a154f2425363aa1f5ed2bb12740f589229e (jmaloy/qemu-kvm-centos-jon) -RH-Bugzilla: 1939509 1940132 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -When building on Fedora 34 (gcc version 11.0.0 20210210) we get: - - In file included from pc-bios/s390-ccw/main.c:11: - In function ‘memset’, - inlined from ‘boot_setup’ at pc-bios/s390-ccw/main.c:185:5, - inlined from ‘main’ at pc-bios/s390-ccw/main.c:288:5: - pc-bios/s390-ccw/libc.h:28:14: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=] - 28 | p[i] = c; - | ~~~~~^~~ - -The offending code is: - - memset((char *)S390EP, 0, 6); - -where S390EP is a const address: - - #define S390EP 0x10008 - -The compiler doesn't know how big that pointed area is, so it assume that -its length is zero. This has been reported as BZ#99578 to GCC: -"gcc-11 -Warray-bounds or -Wstringop-overread warning when accessing a -pointer from integer literal" -https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578 - -As this warning does us more harm than good in the BIOS code (where -lot of direct accesses to low memory are done), silence this warning -for all BIOS objects. - -Suggested-by: Thomas Huth -Signed-off-by: Philippe Mathieu-Daudé -Message-Id: <20210422145911.2513980-1-philmd@redhat.com> -Acked-by: Christian Borntraeger -Message-Id: <20210502174836.838816-4-thuth@redhat.com> -[thuth: Use the pre-existing cc-option macro instead of adding a new one] -Reviewed-by: Cornelia Huck -Signed-off-by: Thomas Huth -(cherry picked from commit da231910d33084ccf63f07de210b145e0fa31d98) -Signed-off-by: Jon Maloy -Signed-off-by: Miroslav Rezanina ---- - pc-bios/s390-ccw/Makefile | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile -index f0fe84c9eb..83fb1afb73 100644 ---- a/pc-bios/s390-ccw/Makefile -+++ b/pc-bios/s390-ccw/Makefile -@@ -30,6 +30,7 @@ OBJECTS = start.o main.o bootmap.o jump2ipl.o sclp.o menu.o \ - virtio.o virtio-scsi.o virtio-blkdev.o libc.o cio.o dasd-ipl.o - - QEMU_CFLAGS := -Wall $(filter -W%, $(QEMU_CFLAGS)) -+QEMU_CFLAGS += $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-stringop-overflow) - QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -fno-common -fPIE - QEMU_CFLAGS += -fwrapv -fno-strict-aliasing -fno-asynchronous-unwind-tables - QEMU_CFLAGS += $(call cc-option, $(QEMU_CFLAGS), -fno-stack-protector) --- -2.27.0 - diff --git a/kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch b/kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch deleted file mode 100644 index e361980..0000000 --- a/kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 12acb42f2e6317a530fa01b5cf55a199231bfdce Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Sun, 2 May 2021 13:49:20 +0200 -Subject: [PATCH 31/39] pc-bios/s390-ccw: Silence warning from Clang by marking - panic() as noreturn -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack -RH-Commit: [4/11] 806b776bf01b733e04664534641cf89d1cb48f1b (jmaloy/qemu-kvm-centos-jon) -RH-Bugzilla: 1939509 1940132 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -When compiling the s390-ccw bios with Clang, the compiler emits a warning: - - pc-bios/s390-ccw/main.c:210:5: warning: variable 'found' is used uninitialized - whenever switch default is taken [-Wsometimes-uninitialized] - default: - ^~~~~~~ - pc-bios/s390-ccw/main.c:214:16: note: uninitialized use occurs here - IPL_assert(found, "Boot device not found\n"); - ^~~~~ - -It's a false positive, it only happens because Clang is not smart enough -to see that the panic() function in the "default:" case can never return. - -Anyway, let's explicitely mark panic() with "noreturn" to shut up the -warning. - -Message-Id: <20210502174836.838816-2-thuth@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Cornelia Huck -Signed-off-by: Thomas Huth -(cherry picked from commit 679196a646c91b8ce9a97b0aa81ffb3776cf8046) -Signed-off-by: Jon Maloy -Signed-off-by: Miroslav Rezanina ---- - pc-bios/s390-ccw/s390-ccw.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h -index 6cd92669e9..79db69ff54 100644 ---- a/pc-bios/s390-ccw/s390-ccw.h -+++ b/pc-bios/s390-ccw/s390-ccw.h -@@ -89,6 +89,7 @@ bool menu_is_enabled_enum(void); - - #define MAX_BOOT_ENTRIES 31 - -+__attribute__ ((__noreturn__)) - static inline void panic(const char *string) - { - sclp_print(string); --- -2.27.0 - diff --git a/kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch b/kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch deleted file mode 100644 index c98c1c2..0000000 --- a/kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 609d8661171760c7ead04f64359d47a77c31d474 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 23 Apr 2021 10:30:51 +0200 -Subject: [PATCH 29/39] pc-bios/s390-ccw: Use reset_psw pointer instead of - hard-coded null pointer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack -RH-Commit: [2/11] c65a986104a1830847e772879ca6eaf76c86b2f3 (jmaloy/qemu-kvm-centos-jon) -RH-Bugzilla: 1939509 1940132 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -When compiling the s390-ccw bios with clang, it emits a warning like this: - - pc-bios/s390-ccw/jump2ipl.c:86:9: warning: indirection of non-volatile null - pointer will be deleted, not trap [-Wnull-dereference] - if (*((uint64_t *)0) & RESET_PSW_MASK) { - ^~~~~~~~~~~~~~~~ - pc-bios/s390-ccw/jump2ipl.c:86:9: note: consider using __builtin_trap() or - qualifying pointer with 'volatile' - -We could add a "volatile" here to shut it up, but on the other hand, -we also have a pointer variable called "reset_psw" in this file already -that points to the PSW at address 0, so we can simply use that pointer -variable instead. - -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20210423142440.582188-1-thuth@redhat.com> -Reviewed-by: Janosch Frank -Signed-off-by: Thomas Huth -(cherry picked from commit ff77712a8a2e15e5901fad35b9a6bb65974b2e4a) -Signed-off-by: Jon Maloy -Signed-off-by: Miroslav Rezanina ---- - pc-bios/s390-ccw/jump2ipl.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/pc-bios/s390-ccw/jump2ipl.c b/pc-bios/s390-ccw/jump2ipl.c -index b9c70d64a5..73e4367e09 100644 ---- a/pc-bios/s390-ccw/jump2ipl.c -+++ b/pc-bios/s390-ccw/jump2ipl.c -@@ -82,8 +82,8 @@ void jump_to_low_kernel(void) - jump_to_IPL_code(KERN_IMAGE_START); - } - -- /* Trying to get PSW at zero address */ -- if (*((uint64_t *)0) & RESET_PSW_MASK) { -+ /* Trying to get PSW at zero address (pointed to by reset_psw) */ -+ if (*reset_psw & RESET_PSW_MASK) { - /* - * Surely nobody will try running directly from lowcore, so - * let's use 0 as an indication that we want to load the reset --- -2.27.0 - diff --git a/kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch b/kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch deleted file mode 100644 index bace1cc..0000000 --- a/kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch +++ /dev/null @@ -1,56 +0,0 @@ -From c00df86dd570d78767c5435f97bbe1d06407e470 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Wed, 21 Apr 2021 17:48:48 +0200 -Subject: [PATCH 28/39] pc-bios/s390-ccw/bootmap: Silence compiler warning from - Clang -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack -RH-Commit: [1/11] 85e41a04a0f47afe23e62f70397a5f79b2703499 (jmaloy/qemu-kvm-centos-jon) -RH-Bugzilla: 1939509 1940132 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -When compiling the s390-ccw bios with Clang, the compiler complains: - - pc-bios/s390-ccw/bootmap.c:302:9: warning: logical not is only applied - to the left hand side of this comparison [-Wlogical-not-parentheses] - if (!mbr->dev_type == DEV_TYPE_ECKD) { - ^ ~~ - -The code works (more or less by accident), since dev_type can only be -0 or 1, but it's better of course to use the intended != operator here -instead. - -Fixes: 5dc739f343 ("Allow booting in case the first virtio-blk disk is bad") -Message-Id: <20210421163331.358178-1-thuth@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Christian Borntraeger -Signed-off-by: Thomas Huth -(cherry picked from commit d08a64940452060ab7ad5eb49cd5801131c2b9ec) -Signed-off-by: Jon Maloy -Signed-off-by: Miroslav Rezanina ---- - pc-bios/s390-ccw/bootmap.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c -index b46997c0b7..56411ab3b6 100644 ---- a/pc-bios/s390-ccw/bootmap.c -+++ b/pc-bios/s390-ccw/bootmap.c -@@ -299,7 +299,7 @@ static void ipl_eckd_cdl(void) - sclp_print("Bad block size in zIPL section of IPL2 record.\n"); - return; - } -- if (!mbr->dev_type == DEV_TYPE_ECKD) { -+ if (mbr->dev_type != DEV_TYPE_ECKD) { - sclp_print("Non-ECKD device type in zIPL section of IPL2 record.\n"); - return; - } --- -2.27.0 - diff --git a/kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch b/kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch deleted file mode 100644 index 77db467..0000000 --- a/kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch +++ /dev/null @@ -1,51 +0,0 @@ -From bd1d37e3536136130df41ac8162ce5bb4f361f87 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 1 Jun 2021 08:52:10 -0400 -Subject: [PATCH 02/21] pc-bios/s390-ccw: don't try to read the next block if - end of chunk is reached -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 -RH-Commit: [1/8] 69a43520a9e7f0ab92bdfdc47281c7606f5159e7 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier -RH-Acked-by: Vitaly Kuznetsov - -From: Marc Hartmayer - -Don't read the block if a null block number is reached, because this means that -the end of chunk is reached. - -Reviewed-by: Collin Walling -Signed-off-by: Marc Hartmayer -Message-Id: <20210416074736.17409-1-mhartmay@linux.ibm.com> -Signed-off-by: Thomas Huth -(cherry picked from commit a6625d38cce3901a7c1cba069f0abcf743a293f1) -Signed-off-by: Thomas Huth -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - pc-bios/s390-ccw/bootmap.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c -index 44df7d16af..b46997c0b7 100644 ---- a/pc-bios/s390-ccw/bootmap.c -+++ b/pc-bios/s390-ccw/bootmap.c -@@ -213,7 +213,7 @@ static int eckd_get_boot_menu_index(block_number_t s1b_block_nr) - next_block_nr = eckd_block_num(&s1b->seek[i + 1].chs); - } - -- if (next_block_nr) { -+ if (next_block_nr && !is_null_block_number(next_block_nr)) { - read_block(next_block_nr, s2_next_blk, - "Cannot read stage2 boot loader"); - } --- -2.27.0 - diff --git a/kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch b/kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch deleted file mode 100644 index 95c276d..0000000 --- a/kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 354026a79551358a5be4ed561e080ff550738e92 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 23 Apr 2021 17:20:46 +0200 -Subject: [PATCH 30/39] pc-bios/s390-ccw/netboot: Use "-Wl," prefix to pass - parameter to the linker - -RH-Author: Jon Maloy -RH-MergeRequest: 24: v7: Add support for building qemu-kvm with clang and safe-stack -RH-Commit: [3/11] 6a22a1705fbeb5fb2eab6c0e149a433286f98e5f (jmaloy/qemu-kvm-centos-jon) -RH-Bugzilla: 1939509 1940132 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck - -We are using the compiler to do the linking of the bios files. GCC still -accepts the "-Ttext=..." linker flag directly and is smart enough to -pass it to the linker, but in case we are compiling with Clang, we have -to use the official way with the "-Wl," prefix instead. - -Message-Id: <20210423153646.593153-1-thuth@redhat.com> -Signed-off-by: Thomas Huth -(cherry picked from commit b460a220872c28a8da95cbc7e9369d26aa268848) -Signed-off-by: Jon Maloy -Signed-off-by: Miroslav Rezanina ---- - pc-bios/s390-ccw/netboot.mak | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak -index 577c023afe..68b4d7edcb 100644 ---- a/pc-bios/s390-ccw/netboot.mak -+++ b/pc-bios/s390-ccw/netboot.mak -@@ -6,7 +6,7 @@ NETOBJS := start.o sclp.o cio.o virtio.o virtio-net.o jump2ipl.o netmain.o - LIBC_INC := -nostdinc -I$(SLOF_DIR)/lib/libc/include - LIBNET_INC := -I$(SLOF_DIR)/lib/libnet - --NETLDFLAGS := $(LDFLAGS) -Ttext=0x7800000 -+NETLDFLAGS := $(LDFLAGS) -Wl,-Ttext=0x7800000 - - $(NETOBJS): QEMU_CFLAGS += $(LIBC_INC) $(LIBNET_INC) - --- -2.27.0 - diff --git a/kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch b/kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch deleted file mode 100644 index dd982e3..0000000 --- a/kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch +++ /dev/null @@ -1,69 +0,0 @@ -From e496360f627cdc4202f185b63175ced08c8b1f07 Mon Sep 17 00:00:00 2001 -From: Daniel Henrique Barboza -Date: Wed, 23 Jun 2021 19:39:32 -0400 -Subject: [PATCH 3/6] ppc/pef.c: initialize cgs->ready in kvmppc_svm_init() - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 22: Synchronize with RHEL-AV 8.5 release 23 to RHEL 9 -RH-Commit: [2/5] b204f898d2333686e30b14c050ac7a9289670f23 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 - -QEMU is failing to launch a CGS pSeries guest in a host that has PEF -support: - -qemu-system-ppc64: ../softmmu/vl.c:2585: qemu_machine_creation_done: Assertion `machine->cgs->ready' failed. -Aborted - -This is happening because we're not setting the cgs->ready flag that is -asserted in qemu_machine_creation_done() during machine start. - -cgs->ready is set in s390_pv_kvm_init() and sev_kvm_init(). Let's set it -in kvmppc_svm_init() as well. - -Reported-by: Ram Pai -Signed-off-by: Daniel Henrique Barboza -Message-Id: <20210528201619.52363-1-danielhb413@gmail.com> -Acked-by: Ram Pai -Signed-off-by: David Gibson -(cherry picked from commit b873ed83311d96644b544b10f6869a430660585a) -Signed-off-by: Daniel Henrique Barboza -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/ppc/pef.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c -index 573be3ed79..cc44d5e339 100644 ---- a/hw/ppc/pef.c -+++ b/hw/ppc/pef.c -@@ -41,7 +41,7 @@ struct PefGuest { - ConfidentialGuestSupport parent_obj; - }; - --static int kvmppc_svm_init(Error **errp) -+static int kvmppc_svm_init(ConfidentialGuestSupport *cgs, Error **errp) - { - #ifdef CONFIG_KVM - static Error *pef_mig_blocker; -@@ -65,6 +65,8 @@ static int kvmppc_svm_init(Error **errp) - /* NB: This can fail if --only-migratable is used */ - migrate_add_blocker(pef_mig_blocker, &error_fatal); - -+ cgs->ready = true; -+ - return 0; - #else - g_assert_not_reached(); -@@ -102,7 +104,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - return -1; - } - -- return kvmppc_svm_init(errp); -+ return kvmppc_svm_init(cgs, errp); - } - - int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) --- -2.27.0 - diff --git a/kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch b/kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch deleted file mode 100644 index aa106c9..0000000 --- a/kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch +++ /dev/null @@ -1,265 +0,0 @@ -From c5a2313ba173568087d78f76cc0258e7a353830b Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 6 Aug 2021 15:07:49 -0400 -Subject: [PATCH 26/39] qemu-img: Add --skip-broken-bitmaps for 'convert - --bitmaps' -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 35: Synchronize with RHEL-AV 8.5 release 28 to RHEL 9 -RH-Commit: [3/4] 4b7203c66367c601f9710bbcd91bdbdd56f0f8bd (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Philippe Mathieu-Daudé - -The point of 'qemu-img convert --bitmaps' is to be a convenience for -actions that are already possible through a string of smaller -'qemu-img bitmap' sub-commands. One situation not accounted for -already is that if a source image contains an inconsistent bitmap (for -example, because a qemu process died abruptly before flushing bitmap -state), the user MUST delete those inconsistent bitmaps before -anything else useful can be done with the image. - -We don't want to delete inconsistent bitmaps by default: although a -corrupt bitmap is only a loss of optimization rather than a corruption -of user-visible data, it is still nice to require the user to opt in -to the fact that they are aware of the loss of the bitmap. Still, -requiring the user to check 'qemu-img info' to see whether bitmaps are -consistent, then use 'qemu-img bitmap --remove' to remove offenders, -all before using 'qemu-img convert', is a lot more work than just -adding a knob 'qemu-img convert --bitmaps --skip-broken-bitmaps' which -opts in to skipping the broken bitmaps. - -After testing the new option, also demonstrate the way to manually fix -things (either deleting bad bitmaps, or re-creating them as empty) so -that it is possible to convert without the option. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1946084 -Signed-off-by: Eric Blake -Message-Id: <20210709153951.2801666-4-eblake@redhat.com> -[eblake: warning message tweak, test enhancements] -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 955171e4417bf39edb5503e694501e082a757731) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - docs/tools/qemu-img.rst | 8 ++++- - qemu-img.c | 29 +++++++++++---- - tests/qemu-iotests/tests/qemu-img-bitmaps | 16 ++++++++- - tests/qemu-iotests/tests/qemu-img-bitmaps.out | 35 ++++++++++++++++++- - 4 files changed, 79 insertions(+), 9 deletions(-) - -diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst -index c9efcfaefc..3df6277d6a 100644 ---- a/docs/tools/qemu-img.rst -+++ b/docs/tools/qemu-img.rst -@@ -414,7 +414,7 @@ Command description: - 4 - Error on reading data - --.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME -+.. option:: convert [--object OBJECTDEF] [--image-opts] [--target-image-opts] [--target-is-zero] [--bitmaps [--skip-broken-bitmaps]] [-U] [-C] [-c] [-p] [-q] [-n] [-f FMT] [-t CACHE] [-T SRC_CACHE] [-O OUTPUT_FMT] [-B BACKING_FILE] [-o OPTIONS] [-l SNAPSHOT_PARAM] [-S SPARSE_SIZE] [-r RATE_LIMIT] [-m NUM_COROUTINES] [-W] FILENAME [FILENAME2 [...]] OUTPUT_FILENAME - - Convert the disk image *FILENAME* or a snapshot *SNAPSHOT_PARAM* - to disk image *OUTPUT_FILENAME* using format *OUTPUT_FMT*. It can -@@ -456,6 +456,12 @@ Command description: - *NUM_COROUTINES* specifies how many coroutines work in parallel during - the convert process (defaults to 8). - -+ Use of ``--bitmaps`` requests that any persistent bitmaps present in -+ the original are also copied to the destination. If any bitmap is -+ inconsistent in the source, the conversion will fail unless -+ ``--skip-broken-bitmaps`` is also specified to copy only the -+ consistent bitmaps. -+ - .. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE] [-F BACKING_FMT] [-u] [-o OPTIONS] FILENAME [SIZE] - - Create the new disk image *FILENAME* of size *SIZE* and format -diff --git a/qemu-img.c b/qemu-img.c -index 7684684bfa..75bab32416 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -82,6 +82,7 @@ enum { - OPTION_MERGE = 274, - OPTION_BITMAPS = 275, - OPTION_FORCE = 276, -+ OPTION_SKIP_BROKEN = 277, - }; - - typedef enum OutputFormat { -@@ -2099,7 +2100,7 @@ static int convert_do_copy(ImgConvertState *s) - } - - /* Check that bitmaps can be copied, or output an error */ --static int convert_check_bitmaps(BlockDriverState *src) -+static int convert_check_bitmaps(BlockDriverState *src, bool skip_broken) - { - BdrvDirtyBitmap *bm; - -@@ -2111,17 +2112,19 @@ static int convert_check_bitmaps(BlockDriverState *src) - if (!bdrv_dirty_bitmap_get_persistence(bm)) { - continue; - } -- if (bdrv_dirty_bitmap_inconsistent(bm)) { -+ if (!skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) { - error_report("Cannot copy inconsistent bitmap '%s'", - bdrv_dirty_bitmap_name(bm)); -- error_printf("Try 'qemu-img bitmap --remove' to delete it\n"); -+ error_printf("Try --skip-broken-bitmaps, or " -+ "use 'qemu-img bitmap --remove' to delete it\n"); - return -1; - } - } - return 0; - } - --static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) -+static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst, -+ bool skip_broken) - { - BdrvDirtyBitmap *bm; - Error *err = NULL; -@@ -2133,6 +2136,10 @@ static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) - continue; - } - name = bdrv_dirty_bitmap_name(bm); -+ if (skip_broken && bdrv_dirty_bitmap_inconsistent(bm)) { -+ warn_report("Skipping inconsistent bitmap '%s'", name); -+ continue; -+ } - qmp_block_dirty_bitmap_add(dst->node_name, name, - true, bdrv_dirty_bitmap_granularity(bm), - true, true, -@@ -2188,6 +2195,7 @@ static int img_convert(int argc, char **argv) - bool force_share = false; - bool explict_min_sparse = false; - bool bitmaps = false; -+ bool skip_broken = false; - int64_t rate_limit = 0; - - ImgConvertState s = (ImgConvertState) { -@@ -2209,6 +2217,7 @@ static int img_convert(int argc, char **argv) - {"salvage", no_argument, 0, OPTION_SALVAGE}, - {"target-is-zero", no_argument, 0, OPTION_TARGET_IS_ZERO}, - {"bitmaps", no_argument, 0, OPTION_BITMAPS}, -+ {"skip-broken-bitmaps", no_argument, 0, OPTION_SKIP_BROKEN}, - {0, 0, 0, 0} - }; - c = getopt_long(argc, argv, ":hf:O:B:Cco:l:S:pt:T:qnm:WUr:", -@@ -2337,6 +2346,9 @@ static int img_convert(int argc, char **argv) - case OPTION_BITMAPS: - bitmaps = true; - break; -+ case OPTION_SKIP_BROKEN: -+ skip_broken = true; -+ break; - } - } - -@@ -2344,6 +2356,11 @@ static int img_convert(int argc, char **argv) - out_fmt = "raw"; - } - -+ if (skip_broken && !bitmaps) { -+ error_report("Use of --skip-broken-bitmaps requires --bitmaps"); -+ goto fail_getopt; -+ } -+ - if (s.compressed && s.copy_range) { - error_report("Cannot enable copy offloading when -c is used"); - goto fail_getopt; -@@ -2573,7 +2590,7 @@ static int img_convert(int argc, char **argv) - ret = -1; - goto out; - } -- ret = convert_check_bitmaps(blk_bs(s.src[0])); -+ ret = convert_check_bitmaps(blk_bs(s.src[0]), skip_broken); - if (ret < 0) { - goto out; - } -@@ -2698,7 +2715,7 @@ static int img_convert(int argc, char **argv) - - /* Now copy the bitmaps */ - if (bitmaps && ret == 0) { -- ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs); -+ ret = convert_copy_bitmaps(blk_bs(s.src[0]), out_bs, skip_broken); - } - - out: -diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps b/tests/qemu-iotests/tests/qemu-img-bitmaps -index 09c3d395d1..7a3fe8c3d3 100755 ---- a/tests/qemu-iotests/tests/qemu-img-bitmaps -+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps -@@ -144,7 +144,21 @@ _img_info --format-specific | _filter_irrelevant_img_info - echo - $QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" && - echo "unexpected success" --TEST_IMG=$TEST_IMG.copy _img_info --format-specific \ -+TEST_IMG="$TEST_IMG.copy" _img_info --format-specific \ -+ | _filter_irrelevant_img_info -+# Skipping the broken bitmaps works,... -+echo -+$QEMU_IMG convert --bitmaps --skip-broken-bitmaps \ -+ -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" -+TEST_IMG="$TEST_IMG.copy" _img_info --format-specific \ -+ | _filter_irrelevant_img_info -+# ...as does removing them -+echo -+_rm_test_img "$TEST_IMG.copy" -+$QEMU_IMG bitmap --remove "$TEST_IMG" b0 -+$QEMU_IMG bitmap --remove --add "$TEST_IMG" b2 -+$QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" -+TEST_IMG="$TEST_IMG.copy" _img_info --format-specific \ - | _filter_irrelevant_img_info - - # success, all done -diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps.out b/tests/qemu-iotests/tests/qemu-img-bitmaps.out -index 1e32833bf1..7a7429e320 100644 ---- a/tests/qemu-iotests/tests/qemu-img-bitmaps.out -+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps.out -@@ -145,6 +145,39 @@ Format specific information: - corrupt: false - - qemu-img: Cannot copy inconsistent bitmap 'b0' --Try 'qemu-img bitmap --remove' to delete it -+Try --skip-broken-bitmaps, or use 'qemu-img bitmap --remove' to delete it - qemu-img: Could not open 'TEST_DIR/t.IMGFMT.copy': Could not open 'TEST_DIR/t.IMGFMT.copy': No such file or directory -+ -+qemu-img: warning: Skipping inconsistent bitmap 'b0' -+qemu-img: warning: Skipping inconsistent bitmap 'b2' -+image: TEST_DIR/t.IMGFMT.copy -+file format: IMGFMT -+virtual size: 10 MiB (10485760 bytes) -+cluster_size: 65536 -+Format specific information: -+ bitmaps: -+ [0]: -+ flags: -+ [0]: auto -+ name: b4 -+ granularity: 65536 -+ corrupt: false -+ -+image: TEST_DIR/t.IMGFMT.copy -+file format: IMGFMT -+virtual size: 10 MiB (10485760 bytes) -+cluster_size: 65536 -+Format specific information: -+ bitmaps: -+ [0]: -+ flags: -+ [0]: auto -+ name: b4 -+ granularity: 65536 -+ [1]: -+ flags: -+ [0]: auto -+ name: b2 -+ granularity: 65536 -+ corrupt: false - *** done --- -2.27.0 - diff --git a/kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch b/kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch deleted file mode 100644 index bdd40b9..0000000 --- a/kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 5e8f99ea87409e1423c2e1c5e445003cf4a032a9 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 6 Aug 2021 15:07:48 -0400 -Subject: [PATCH 25/39] qemu-img: Fail fast on convert --bitmaps with - inconsistent bitmap -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 35: Synchronize with RHEL-AV 8.5 release 28 to RHEL 9 -RH-Commit: [2/4] 3fd8d357c3a365d4bc142b3d339745e5b15c5894 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Philippe Mathieu-Daudé - -Waiting until the end of the convert operation (a potentially -time-consuming task) to finally detect that we can't copy a bitmap is -bad, comparing to failing fast up front. Furthermore, this prevents -us from leaving a file behind with a bitmap that is not marked as -inconsistent even though it does not have sane contents. - -This fixes the problems exposed in the previous patch to the iotest: -it adds a fast failure up front, and even if we don't fail early, it -ensures that any bitmap we add but do not properly populate is removed -again rather than left behind incomplete. - -Signed-off-by: Eric Blake -Message-Id: <20210709153951.2801666-3-eblake@redhat.com> -[eblake: add a hint to the warning message, simplify name computation] -Reviewed-by: Nir Soffer -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 74a4320f30632fa539507861b3835698282e462e) -Signed-off-by: Eric Blake -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - qemu-img.c | 29 +++++++++++++++++-- - tests/qemu-iotests/tests/qemu-img-bitmaps | 3 +- - tests/qemu-iotests/tests/qemu-img-bitmaps.out | 21 ++------------ - 3 files changed, 30 insertions(+), 23 deletions(-) - -diff --git a/qemu-img.c b/qemu-img.c -index babb5573ab..7684684bfa 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2098,6 +2098,29 @@ static int convert_do_copy(ImgConvertState *s) - return s->ret; - } - -+/* Check that bitmaps can be copied, or output an error */ -+static int convert_check_bitmaps(BlockDriverState *src) -+{ -+ BdrvDirtyBitmap *bm; -+ -+ if (!bdrv_supports_persistent_dirty_bitmap(src)) { -+ error_report("Source lacks bitmap support"); -+ return -1; -+ } -+ FOR_EACH_DIRTY_BITMAP(src, bm) { -+ if (!bdrv_dirty_bitmap_get_persistence(bm)) { -+ continue; -+ } -+ if (bdrv_dirty_bitmap_inconsistent(bm)) { -+ error_report("Cannot copy inconsistent bitmap '%s'", -+ bdrv_dirty_bitmap_name(bm)); -+ error_printf("Try 'qemu-img bitmap --remove' to delete it\n"); -+ return -1; -+ } -+ } -+ return 0; -+} -+ - static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) - { - BdrvDirtyBitmap *bm; -@@ -2124,6 +2147,7 @@ static int convert_copy_bitmaps(BlockDriverState *src, BlockDriverState *dst) - &err); - if (err) { - error_reportf_err(err, "Failed to populate bitmap %s: ", name); -+ qmp_block_dirty_bitmap_remove(dst->node_name, name, NULL); - return -1; - } - } -@@ -2549,9 +2573,8 @@ static int img_convert(int argc, char **argv) - ret = -1; - goto out; - } -- if (!bdrv_supports_persistent_dirty_bitmap(blk_bs(s.src[0]))) { -- error_report("Source lacks bitmap support"); -- ret = -1; -+ ret = convert_check_bitmaps(blk_bs(s.src[0])); -+ if (ret < 0) { - goto out; - } - } -diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps b/tests/qemu-iotests/tests/qemu-img-bitmaps -index 409c4497a3..09c3d395d1 100755 ---- a/tests/qemu-iotests/tests/qemu-img-bitmaps -+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps -@@ -140,11 +140,10 @@ $QEMU_IO -c abort "$TEST_IMG" 2>/dev/null - $QEMU_IMG bitmap --add "$TEST_IMG" b4 - $QEMU_IMG bitmap --remove "$TEST_IMG" b1 - _img_info --format-specific | _filter_irrelevant_img_info -+# Proof that we fail fast if bitmaps can't be copied - echo - $QEMU_IMG convert --bitmaps -O qcow2 "$TEST_IMG" "$TEST_IMG.copy" && - echo "unexpected success" --# Bug - even though we failed at conversion, we left a file around with --# a bitmap marked as not corrupt - TEST_IMG=$TEST_IMG.copy _img_info --format-specific \ - | _filter_irrelevant_img_info - -diff --git a/tests/qemu-iotests/tests/qemu-img-bitmaps.out b/tests/qemu-iotests/tests/qemu-img-bitmaps.out -index 543b028da6..1e32833bf1 100644 ---- a/tests/qemu-iotests/tests/qemu-img-bitmaps.out -+++ b/tests/qemu-iotests/tests/qemu-img-bitmaps.out -@@ -144,22 +144,7 @@ Format specific information: - granularity: 65536 - corrupt: false - --qemu-img: Failed to populate bitmap b0: Bitmap 'b0' is inconsistent and cannot be used --Try block-dirty-bitmap-remove to delete this bitmap from disk --image: TEST_DIR/t.IMGFMT.copy --file format: IMGFMT --virtual size: 10 MiB (10485760 bytes) --cluster_size: 65536 --Format specific information: -- bitmaps: -- [0]: -- flags: -- name: b0 -- granularity: 65536 -- [1]: -- flags: -- [0]: auto -- name: b4 -- granularity: 65536 -- corrupt: false -+qemu-img: Cannot copy inconsistent bitmap 'b0' -+Try 'qemu-img bitmap --remove' to delete it -+qemu-img: Could not open 'TEST_DIR/t.IMGFMT.copy': Could not open 'TEST_DIR/t.IMGFMT.copy': No such file or directory - *** done --- -2.27.0 - diff --git a/kvm-ratelimit-protect-with-a-mutex.patch b/kvm-ratelimit-protect-with-a-mutex.patch deleted file mode 100644 index 6d18a15..0000000 --- a/kvm-ratelimit-protect-with-a-mutex.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 05efd4675b1241f2cc68fd54c92a8a834699f212 Mon Sep 17 00:00:00 2001 -From: "plai@redhat.com" -Date: Thu, 29 Jul 2021 07:42:21 -0400 -Subject: [PATCH 14/39] ratelimit: protect with a mutex - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 32: Synchronize with RHEL-AV 8.5 release 27 to RHEL 9 -RH-Commit: [6/15] 48586bf5d422cb676a6f2d2cd0abb0822cfc17b2 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Acked-by: Igor Mammedov -RH-Acked-by: Andrew Jones - -Right now, rate limiting is protected by the AioContext mutex, which is -taken for example both by the block jobs and by qmp_block_job_set_speed -(via find_block_job). - -We would like to remove the dependency of block layer code on the -AioContext mutex, since most drivers and the core I/O code are already -not relying on it. However, there is no existing lock that can easily -be taken by both ratelimit_set_speed and ratelimit_calculate_delay, -especially because the latter might run in coroutine context (and -therefore under a CoMutex) but the former will not. - -Since concurrent calls to ratelimit_calculate_delay are not possible, -one idea could be to use a seqlock to get a snapshot of slice_ns and -slice_quota. But for now keep it simple, and just add a mutex to the -RateLimit struct; block jobs are generally not performance critical to -the point of optimizing the clock cycles spent in synchronization. - -This also requires the introduction of init/destroy functions, so -add them to the two users of ratelimit.h. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4951967d84a0acbf47895add9158e2d4c6056ea0) -Signed-off-by: Paul Lai -Signed-off-by: Miroslav Rezanina ---- - block/block-copy.c | 2 ++ - blockjob.c | 3 +++ - include/qemu/ratelimit.h | 14 ++++++++++++++ - 3 files changed, 19 insertions(+) - -diff --git a/block/block-copy.c b/block/block-copy.c -index 39ae481c8b..9b4af00614 100644 ---- a/block/block-copy.c -+++ b/block/block-copy.c -@@ -230,6 +230,7 @@ void block_copy_state_free(BlockCopyState *s) - return; - } - -+ ratelimit_destroy(&s->rate_limit); - bdrv_release_dirty_bitmap(s->copy_bitmap); - shres_destroy(s->mem); - g_free(s); -@@ -289,6 +290,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, - s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER); - } - -+ ratelimit_init(&s->rate_limit); - QLIST_INIT(&s->tasks); - QLIST_INIT(&s->calls); - -diff --git a/blockjob.c b/blockjob.c -index 207e8c7fd9..46f15befe8 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -87,6 +87,7 @@ void block_job_free(Job *job) - - block_job_remove_all_bdrv(bjob); - blk_unref(bjob->blk); -+ ratelimit_destroy(&bjob->limit); - error_free(bjob->blocker); - } - -@@ -435,6 +436,8 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, - assert(job->job.driver->free == &block_job_free); - assert(job->job.driver->user_resume == &block_job_user_resume); - -+ ratelimit_init(&job->limit); -+ - job->blk = blk; - - job->finalize_cancelled_notifier.notify = block_job_event_cancelled; -diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h -index 01da8d63f1..003ea6d5a3 100644 ---- a/include/qemu/ratelimit.h -+++ b/include/qemu/ratelimit.h -@@ -14,9 +14,11 @@ - #ifndef QEMU_RATELIMIT_H - #define QEMU_RATELIMIT_H - -+#include "qemu/lockable.h" - #include "qemu/timer.h" - - typedef struct { -+ QemuMutex lock; - int64_t slice_start_time; - int64_t slice_end_time; - uint64_t slice_quota; -@@ -40,6 +42,7 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) - int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - double delay_slices; - -+ QEMU_LOCK_GUARD(&limit->lock); - assert(limit->slice_quota && limit->slice_ns); - - if (limit->slice_end_time < now) { -@@ -65,9 +68,20 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) - return limit->slice_end_time - now; - } - -+static inline void ratelimit_init(RateLimit *limit) -+{ -+ qemu_mutex_init(&limit->lock); -+} -+ -+static inline void ratelimit_destroy(RateLimit *limit) -+{ -+ qemu_mutex_destroy(&limit->lock); -+} -+ - static inline void ratelimit_set_speed(RateLimit *limit, uint64_t speed, - uint64_t slice_ns) - { -+ QEMU_LOCK_GUARD(&limit->lock); - limit->slice_ns = slice_ns; - limit->slice_quota = MAX(((double)speed * slice_ns) / 1000000000ULL, 1); - } --- -2.27.0 - diff --git a/kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch b/kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch deleted file mode 100644 index eb9f32a..0000000 --- a/kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 1194549a01a472b9ce21819cd32fe253d6263cd6 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Mon, 10 May 2021 15:37:40 -0400 -Subject: [PATCH 08/15] redhat: Define pseries-rhel8.5.0 machine type - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [3/12] accc2ed549b94360bc6ab180c4266466816f122e (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -Note that the compat entries for 8.4.0 were already wired up -in the rhel-8.4.0 machine type. - -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/ppc/spapr.c | 18 +++++++++++++++--- - 1 file changed, 15 insertions(+), 3 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index f9e8dfdfc9..653574ba91 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -5083,6 +5083,19 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) - mc->max_cpus = 384; - } - -+/* -+ * pseries-rhel8.5.0 -+ * like pseries-6.0 -+ */ -+ -+static void spapr_machine_rhel850_class_options(MachineClass *mc) -+{ -+ /* The default machine type must apply the RHEL specific defaults */ -+ spapr_machine_rhel_default_class_options(mc); -+} -+ -+DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); -+ - /* - * pseries-rhel8.4.0 - * like pseries-5.2 -@@ -5090,13 +5103,12 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) - - static void spapr_machine_rhel840_class_options(MachineClass *mc) - { -- /* The default machine type must apply the RHEL specific defaults */ -- spapr_machine_rhel_default_class_options(mc); -+ spapr_machine_rhel850_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_4, - hw_compat_rhel_8_4_len); - } - --DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", true); -+DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false); - - /* - * pseries-rhel8.3.0 --- -2.27.0 - diff --git a/kvm-redhat-Enable-the-test-block-iothread-test-again.patch b/kvm-redhat-Enable-the-test-block-iothread-test-again.patch deleted file mode 100644 index 28dce0f..0000000 --- a/kvm-redhat-Enable-the-test-block-iothread-test-again.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 4231cac75289acd643c2daaa2dece485b958bef1 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 20 Aug 2021 10:27:12 +0200 -Subject: [PATCH 4/4] redhat: Enable the 'test-block-iothread' test again -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 36: Disable LTO on non-x86 architectures -RH-Commit: [2/2] e010396f3ee0f3f39ff8bcd4749f8ae2e8624980 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 1950192 -RH-Acked-by: Andrew Jones -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Acked-by: Philippe Mathieu-Daudé - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1950192 - -Now that we disabled LTO to fix the coroutines/iothreads on s390x and -aarch64, we can also enable the 'test-block-iothread' test again. - -Signed-off-by: Thomas Huth ---- - tests/unit/meson.build | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tests/unit/meson.build b/tests/unit/meson.build -index 244d35f5d4..b3bc2109da 100644 ---- a/tests/unit/meson.build -+++ b/tests/unit/meson.build -@@ -65,7 +65,7 @@ if have_block - 'test-blockjob': [testblock], - 'test-blockjob-txn': [testblock], - 'test-block-backend': [testblock], --# 'test-block-iothread': [testblock], -+ 'test-block-iothread': [testblock], - 'test-write-threshold': [testblock], - 'test-crypto-hash': [crypto], - 'test-crypto-hmac': [crypto], --- -2.27.0 - diff --git a/kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch b/kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch deleted file mode 100644 index b70c6de..0000000 --- a/kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 8be260b07df50891463e6efbd45f84b1b8323983 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Mon, 10 May 2021 15:37:39 -0400 -Subject: [PATCH 07/15] redhat: add missing entries in hw_compat_rhel_8_4 - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [2/12] 69b6e4dff872478b6d0b09b2587f55967e1c9740 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -Some entries from hw_compat_5_2 were missing. - -Signed-off-by: Cornelia Huck -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 848e7fdff6..c665e869de 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -44,6 +44,10 @@ GlobalProperty hw_compat_rhel_8_4[] = { - { "ICH9-LPC", "smm-compat", "on"}, - /* hw_compat_rhel_8_4 from hw_compat_5_2 */ - { "PIIX4_PM", "smm-compat", "on"}, -+ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ -+ { "virtio-blk-device", "report-discard-granularity", "off" }, -+ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ -+ { "virtio-net-pci", "vectors", "3"}, - }; - const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); - --- -2.27.0 - diff --git a/kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch b/kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch deleted file mode 100644 index 1be8506..0000000 --- a/kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch +++ /dev/null @@ -1,59 +0,0 @@ -From a5e149c13279386c4fc3fae130289ac4ac53bd3e Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Mon, 10 May 2021 14:41:31 -0400 -Subject: [PATCH 06/15] redhat: s390x: add rhel-8.5.0 compat machine - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [1/12] 3560ef3f773425f1479a131df7a351df2cbb502c (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -Note that the compat entries for 8.4.0 were already wired up -in the rhel-8.4.0 machine type. - -Signed-off-by: Cornelia Huck -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/s390x/s390-virtio-ccw.c | 13 ++++++++++++- - 1 file changed, 12 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 432f36bce5..667a99f336 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1074,15 +1074,26 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) - DEFINE_CCW_MACHINE(2_4, "2.4", false); - #endif - -+static void ccw_machine_rhel850_instance_options(MachineState *machine) -+{ -+} -+ -+static void ccw_machine_rhel850_class_options(MachineClass *mc) -+{ -+} -+DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); -+ - static void ccw_machine_rhel840_instance_options(MachineState *machine) - { -+ ccw_machine_rhel850_instance_options(machine); - } - - static void ccw_machine_rhel840_class_options(MachineClass *mc) - { -+ ccw_machine_rhel850_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); - } --DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", true); -+DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false); - - static void ccw_machine_rhel820_instance_options(MachineState *machine) - { --- -2.27.0 - diff --git a/kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch b/kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch deleted file mode 100644 index 7fd2a56..0000000 --- a/kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch +++ /dev/null @@ -1,49 +0,0 @@ -From a9546384e1fe8b4dad9ab00c52f45dac3a8fbc00 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Tue, 8 Jun 2021 10:29:07 -0400 -Subject: [PATCH 04/12] redhat: x86: Enable 'kvm-asyncpf-int' by default -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 -RH-Commit: [2/8] 2ea940445291df74dfed2d2f9f2b1f88a3eca31b (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Daniel P. Berrangé - -'kvm-asyncpf-int' feature is supported by KVM starting with RHEL-8.4 -kernel, enable the feature by default starting with RHEL-8.5 machine -type. - -Signed-off-by: Vitaly Kuznetsov -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/i386/pc.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 0a374dec39..cdbfa84d2e 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -366,12 +366,15 @@ GlobalProperty pc_rhel_compat[] = { - { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, - /* bz 1508330 */ - { "vfio-pci", "x-no-geforce-quirks", "on" }, -+ /* bz 1941397 */ -+ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - - GlobalProperty pc_rhel_8_4_compat[] = { - /* pc_rhel_8_4_compat from pc_compat_5_2 */ - { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, -+ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" }, - }; - const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); - --- -2.27.0 - diff --git a/kvm-s390x-cpumodel-add-3931-and-3932.patch b/kvm-s390x-cpumodel-add-3931-and-3932.patch deleted file mode 100644 index 70ad80b..0000000 --- a/kvm-s390x-cpumodel-add-3931-and-3932.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 5536da8458b7825b084bfc94256bfdc1ca0127a3 Mon Sep 17 00:00:00 2001 -From: Christian Borntraeger -Date: Tue, 22 Jun 2021 22:19:23 +0200 -Subject: [PATCH 1/6] s390x/cpumodel: add 3931 and 3932 - -RH-Author: Cornelia Huck -RH-MergeRequest: 21: s390x/cpumodel: add 3931 and 3932 -RH-Commit: [1/1] b678fdf9364407c615678980330e496676e04f9e (cohuck/qemu-kvm-c9s) -RH-Bugzilla: 1932191 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Thomas Huth - -This defines 5 new facilities and the new 3931 and 3932 machines. -As before the name is not yet known and we do use gen16a and gen16b. -The new features are part of the full model. - -The default model is still empty (same as z15) and will be added -in a separate patch at a later point in time. - -Also add the dependencies of new facilities and as a fix for z15 add -a dependency from S390_FEAT_VECTOR_PACKED_DECIMAL_ENH to -S390_VECTOR_PACKED_DECIMAL. - -[merged <20210701084348.26556-1-borntraeger@de.ibm.com>] -Signed-off-by: Christian Borntraeger -Message-Id: <20210622201923.150205-2-borntraeger@de.ibm.com> -Reviewed-by: David Hildenbrand -Signed-off-by: Cornelia Huck -(cherry picked from commit fb4a08121695a88acefcbcd86f1376df079eefee) -Signed-off-by: Cornelia Huck -Signed-off-by: Miroslav Rezanina ---- - target/s390x/cpu_features_def.h.inc | 5 +++++ - target/s390x/cpu_models.c | 6 ++++++ - target/s390x/gen-features.c | 14 ++++++++++++++ - 3 files changed, 25 insertions(+) - -diff --git a/target/s390x/cpu_features_def.h.inc b/target/s390x/cpu_features_def.h.inc -index 7db3449e04..e86662bb3b 100644 ---- a/target/s390x/cpu_features_def.h.inc -+++ b/target/s390x/cpu_features_def.h.inc -@@ -109,6 +109,11 @@ DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH, "vxpdeh", STFL, 152, "Vector-Packed-Decimal- - DEF_FEAT(MSA_EXT_9, "msa9-base", STFL, 155, "Message-security-assist-extension-9 facility (excluding subfunctions)") - DEF_FEAT(ETOKEN, "etoken", STFL, 156, "Etoken facility") - DEF_FEAT(UNPACK, "unpack", STFL, 161, "Unpack facility") -+DEF_FEAT(NNPA, "nnpa", STFL, 165, "NNPA facility") -+DEF_FEAT(VECTOR_PACKED_DECIMAL_ENH2, "vxpdeh2", STFL, 192, "Vector-Packed-Decimal-Enhancement facility 2") -+DEF_FEAT(BEAR_ENH, "beareh", STFL, 193, "BEAR-enhancement facility") -+DEF_FEAT(RDP, "rdp", STFL, 194, "Reset-DAT-protection facility") -+DEF_FEAT(PAI, "pai", STFL, 196, "Processor-Activity-Instrumentation facility") - - /* Features exposed via SCLP SCCB Byte 80 - 98 (bit numbers relative to byte-80) */ - DEF_FEAT(SIE_GSLS, "gsls", SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility") -diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 9254ff46bf..3cb4d25a10 100644 ---- a/target/s390x/cpu_models.c -+++ b/target/s390x/cpu_models.c -@@ -88,6 +88,8 @@ static S390CPUDef s390_cpu_defs[] = { - CPUDEF_INIT(0x3907, 14, 1, 47, 0x08000000U, "z14ZR1", "IBM z14 Model ZR1 GA1"), - CPUDEF_INIT(0x8561, 15, 1, 47, 0x08000000U, "gen15a", "IBM z15 T01 GA1"), - CPUDEF_INIT(0x8562, 15, 1, 47, 0x08000000U, "gen15b", "IBM z15 T02 GA1"), -+ CPUDEF_INIT(0x3931, 16, 1, 47, 0x08000000U, "gen16a", "IBM 3931 GA1"), -+ CPUDEF_INIT(0x3932, 16, 1, 47, 0x08000000U, "gen16b", "IBM 3932 GA1"), - }; - - #define QEMU_MAX_CPU_TYPE 0x2964 -@@ -815,6 +817,8 @@ static void check_consistency(const S390CPUModel *model) - { S390_FEAT_MSA_EXT_9, S390_FEAT_MSA_EXT_4 }, - { S390_FEAT_MULTIPLE_EPOCH, S390_FEAT_TOD_CLOCK_STEERING }, - { S390_FEAT_VECTOR_PACKED_DECIMAL, S390_FEAT_VECTOR }, -+ { S390_FEAT_VECTOR_PACKED_DECIMAL_ENH, S390_FEAT_VECTOR_PACKED_DECIMAL }, -+ { S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH }, - { S390_FEAT_VECTOR_ENH, S390_FEAT_VECTOR }, - { S390_FEAT_INSTRUCTION_EXEC_PROT, S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2 }, - { S390_FEAT_SIDE_EFFECT_ACCESS_ESOP2, S390_FEAT_ESOP }, -@@ -846,6 +850,8 @@ static void check_consistency(const S390CPUModel *model) - { S390_FEAT_PTFF_STOUE, S390_FEAT_MULTIPLE_EPOCH }, - { S390_FEAT_AP_QUEUE_INTERRUPT_CONTROL, S390_FEAT_AP }, - { S390_FEAT_DIAG_318, S390_FEAT_EXTENDED_LENGTH_SCCB }, -+ { S390_FEAT_NNPA, S390_FEAT_VECTOR }, -+ { S390_FEAT_RDP, S390_FEAT_LOCAL_TLB_CLEARING }, - }; - int i; - -diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c -index a6ec918e90..8f99cea665 100644 ---- a/target/s390x/gen-features.c -+++ b/target/s390x/gen-features.c -@@ -424,6 +424,8 @@ static uint16_t base_GEN15_GA1[] = { - S390_FEAT_MISC_INSTRUCTION_EXT3, - }; - -+#define base_GEN16_GA1 EmptyFeat -+ - /* Full features (in order of release) - * Automatically includes corresponding base features. - * Full features are all features this hardware supports even if kvm/QEMU do not -@@ -567,6 +569,15 @@ static uint16_t full_GEN15_GA1[] = { - S390_FEAT_UNPACK, - }; - -+static uint16_t full_GEN16_GA1[] = { -+ S390_FEAT_NNPA, -+ S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2, -+ S390_FEAT_BEAR_ENH, -+ S390_FEAT_RDP, -+ S390_FEAT_PAI, -+}; -+ -+ - /* Default features (in order of release) - * Automatically includes corresponding base features. - * Default features are all features this version of QEMU supports for this -@@ -652,6 +663,8 @@ static uint16_t default_GEN15_GA1[] = { - S390_FEAT_ETOKEN, - }; - -+#define default_GEN16_GA1 EmptyFeat -+ - /* QEMU (CPU model) features */ - - static uint16_t qemu_V2_11[] = { -@@ -782,6 +795,7 @@ static CpuFeatDefSpec CpuFeatDef[] = { - CPU_FEAT_INITIALIZER(GEN14_GA1), - CPU_FEAT_INITIALIZER(GEN14_GA2), - CPU_FEAT_INITIALIZER(GEN15_GA1), -+ CPU_FEAT_INITIALIZER(GEN16_GA1), - }; - - #define FEAT_GROUP_INITIALIZER(_name) \ --- -2.27.0 - diff --git a/kvm-s390x-css-Add-passthrough-IRB.patch b/kvm-s390x-css-Add-passthrough-IRB.patch deleted file mode 100644 index 339a45c..0000000 --- a/kvm-s390x-css-Add-passthrough-IRB.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 4eb1f0936bfc921cad9af37f1573075148843b1d Mon Sep 17 00:00:00 2001 -From: Eric Farman -Date: Thu, 24 Jun 2021 14:15:16 -0400 -Subject: [PATCH 04/43] s390x/css: Add passthrough IRB - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -Wire in the subchannel callback for building the IRB -ESW and ECW space for passthrough devices, and copy -the hardware's ESW into the IRB we are building. - -If the hardware presented concurrent sense, then copy -that sense data into the IRB's ECW space. - -Signed-off-by: Eric Farman -Message-Id: <20210617232537.1337506-5-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit c626710fc755628d0d6b88aab0514c9238a84522) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/s390x/css.c | 16 +++++++++++++++- - hw/s390x/s390-ccw.c | 1 + - hw/vfio/ccw.c | 4 ++++ - include/hw/s390x/css.h | 3 +++ - 4 files changed, 23 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/css.c b/hw/s390x/css.c -index e77a0e523d..c3150da4f7 100644 ---- a/hw/s390x/css.c -+++ b/hw/s390x/css.c -@@ -1336,7 +1336,7 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) - } - } - --static void copy_esw_to_guest(ESW *dest, const ESW *src) -+void copy_esw_to_guest(ESW *dest, const ESW *src) - { - dest->word0 = cpu_to_be32(src->word0); - dest->erw = cpu_to_be32(src->erw); -@@ -1651,6 +1651,20 @@ static void build_irb_sense_data(SubchDev *sch, IRB *irb) - } - } - -+void build_irb_passthrough(SubchDev *sch, IRB *irb) -+{ -+ /* Copy ESW from hardware */ -+ irb->esw = sch->esw; -+ -+ /* -+ * If (irb->esw.erw & ESW_ERW_SENSE) is true, then the contents -+ * of the ECW is sense data. If false, then it is model-dependent -+ * information. Either way, copy it into the IRB for the guest to -+ * read/decide what to do with. -+ */ -+ build_irb_sense_data(sch, irb); -+} -+ - void build_irb_virtual(SubchDev *sch, IRB *irb) - { - SCHIB *schib = &sch->curr_status; -diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c -index b497571863..39cbea615b 100644 ---- a/hw/s390x/s390-ccw.c -+++ b/hw/s390x/s390-ccw.c -@@ -125,6 +125,7 @@ static void s390_ccw_realize(S390CCWDevice *cdev, char *sysfsdev, Error **errp) - } - sch->driver_data = cdev; - sch->do_subchannel_work = do_subchannel_work_passthrough; -+ sch->irb_cb = build_irb_passthrough; - - ccw_dev->sch = sch; - ret = css_sch_build_schib(sch, &cdev->hostid); -diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c -index b2df708e4b..5f141d44a4 100644 ---- a/hw/vfio/ccw.c -+++ b/hw/vfio/ccw.c -@@ -322,6 +322,7 @@ static void vfio_ccw_io_notifier_handler(void *opaque) - SCHIB *schib = &sch->curr_status; - SCSW s; - IRB irb; -+ ESW esw; - int size; - - if (!event_notifier_test_and_clear(&vcdev->io_notifier)) { -@@ -372,6 +373,9 @@ static void vfio_ccw_io_notifier_handler(void *opaque) - copy_scsw_to_guest(&s, &irb.scsw); - schib->scsw = s; - -+ copy_esw_to_guest(&esw, &irb.esw); -+ sch->esw = esw; -+ - /* If a uint check is pending, copy sense data. */ - if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && - (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { -diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h -index 7c23a13f3d..10ed1df1bb 100644 ---- a/include/hw/s390x/css.h -+++ b/include/hw/s390x/css.h -@@ -141,6 +141,7 @@ struct SubchDev { - void (*irb_cb)(SubchDev *, IRB *); - SenseId id; - void *driver_data; -+ ESW esw; - }; - - static inline void sch_gen_unit_exception(SubchDev *sch) -@@ -202,6 +203,7 @@ int css_sch_build_schib(SubchDev *sch, CssDevId *dev_id); - unsigned int css_find_free_chpid(uint8_t cssid); - uint16_t css_build_subchannel_id(SubchDev *sch); - void copy_scsw_to_guest(SCSW *dest, const SCSW *src); -+void copy_esw_to_guest(ESW *dest, const ESW *src); - void css_inject_io_interrupt(SubchDev *sch); - void css_reset(void); - void css_reset_sch(SubchDev *sch); -@@ -216,6 +218,7 @@ void css_clear_sei_pending(void); - IOInstEnding s390_ccw_cmd_request(SubchDev *sch); - IOInstEnding do_subchannel_work_virtual(SubchDev *sub); - IOInstEnding do_subchannel_work_passthrough(SubchDev *sub); -+void build_irb_passthrough(SubchDev *sch, IRB *irb); - void build_irb_virtual(SubchDev *sch, IRB *irb); - - int s390_ccw_halt(SubchDev *sch); --- -2.27.0 - diff --git a/kvm-s390x-css-Introduce-an-ESW-struct.patch b/kvm-s390x-css-Introduce-an-ESW-struct.patch deleted file mode 100644 index de99f03..0000000 --- a/kvm-s390x-css-Introduce-an-ESW-struct.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 9a12329325d94ab56dbab976b4423fe7db0e8d0b Mon Sep 17 00:00:00 2001 -From: Eric Farman -Date: Thu, 24 Jun 2021 14:15:13 -0400 -Subject: [PATCH 01/43] s390x/css: Introduce an ESW struct - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -The Interrupt Response Block is comprised of several other -structures concatenated together, but only the 12-byte -Subchannel-Status Word (SCSW) is defined as a proper struct. -Everything else is a simple array of 32-bit words. - -Let's define a proper struct for the 20-byte Extended-Status -Word (ESW) so that we can make good decisions about the sense -data that would go into the ECW area for virtual vs -passthrough devices. - -[CH: adapted ESW definition to build with mingw, as discussed] -Signed-off-by: Eric Farman -Message-Id: <20210617232537.1337506-2-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 3fdc622ad79636f3d7f8bed50a53bc28af1850e1) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/s390x/css.c | 19 +++++++++++++------ - include/hw/s390x/ioinst.h | 12 +++++++++++- - 2 files changed, 24 insertions(+), 7 deletions(-) - -diff --git a/hw/s390x/css.c b/hw/s390x/css.c -index 4149b8e5a7..bd3172a688 100644 ---- a/hw/s390x/css.c -+++ b/hw/s390x/css.c -@@ -1336,6 +1336,14 @@ static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src) - } - } - -+static void copy_esw_to_guest(ESW *dest, const ESW *src) -+{ -+ dest->word0 = cpu_to_be32(src->word0); -+ dest->erw = cpu_to_be32(src->erw); -+ dest->word2 = cpu_to_be64(src->word2); -+ dest->word4 = cpu_to_be32(src->word4); -+} -+ - IOInstEnding css_do_stsch(SubchDev *sch, SCHIB *schib) - { - int ret; -@@ -1605,9 +1613,8 @@ static void copy_irb_to_guest(IRB *dest, const IRB *src, const PMCW *pmcw, - - copy_scsw_to_guest(&dest->scsw, &src->scsw); - -- for (i = 0; i < ARRAY_SIZE(dest->esw); i++) { -- dest->esw[i] = cpu_to_be32(src->esw[i]); -- } -+ copy_esw_to_guest(&dest->esw, &src->esw); -+ - for (i = 0; i < ARRAY_SIZE(dest->ecw); i++) { - dest->ecw[i] = cpu_to_be32(src->ecw[i]); - } -@@ -1656,9 +1663,9 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) - SCSW_CSTAT_CHN_CTRL_CHK | - SCSW_CSTAT_INTF_CTRL_CHK)) { - irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF; -- irb.esw[0] = 0x04804000; -+ irb.esw.word0 = 0x04804000; - } else { -- irb.esw[0] = 0x00800000; -+ irb.esw.word0 = 0x00800000; - } - /* If a unit check is pending, copy sense data. */ - if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && -@@ -1671,7 +1678,7 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) - for (i = 0; i < ARRAY_SIZE(irb.ecw); i++) { - irb.ecw[i] = be32_to_cpu(irb.ecw[i]); - } -- irb.esw[1] = 0x01000000 | (sizeof(sch->sense_data) << 8); -+ irb.esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8); - } - } - /* Store the irb to the guest. */ -diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h -index c6737a30d4..3771fff9d4 100644 ---- a/include/hw/s390x/ioinst.h -+++ b/include/hw/s390x/ioinst.h -@@ -123,10 +123,20 @@ typedef struct SCHIB { - uint8_t mda[4]; - } QEMU_PACKED SCHIB; - -+/* format-0 extended-status word */ -+typedef struct ESW { -+ uint32_t word0; /* subchannel logout for format 0 */ -+ uint32_t erw; -+ uint64_t word2; /* failing-storage address for format 0 */ -+ uint32_t word4; /* secondary-CCW address for format 0 */ -+} QEMU_PACKED ESW; -+ -+#define ESW_ERW_SENSE 0x01000000 -+ - /* interruption response block */ - typedef struct IRB { - SCSW scsw; -- uint32_t esw[5]; -+ ESW esw; - uint32_t ecw[8]; - uint32_t emw[8]; - } IRB; --- -2.27.0 - diff --git a/kvm-s390x-css-Refactor-IRB-construction.patch b/kvm-s390x-css-Refactor-IRB-construction.patch deleted file mode 100644 index f55bf79..0000000 --- a/kvm-s390x-css-Refactor-IRB-construction.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 0f4d8c51b51a23a87f1e3e9e764151352f652f3b Mon Sep 17 00:00:00 2001 -From: Eric Farman -Date: Thu, 24 Jun 2021 14:15:15 -0400 -Subject: [PATCH 03/43] s390x/css: Refactor IRB construction - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -Currently, all subchannel types have "sense data" copied into -the IRB.ECW space, and a couple flags enabled in the IRB.SCSW -and IRB.ESW. But for passthrough (vfio-ccw) subchannels, -this data isn't populated in the first place, so enabling -those flags leads to unexpected behavior if the guest tries to -process the sense data (zeros) in the IRB.ECW. - -Let's add a subchannel callback that builds these portions of -the IRB, and move the existing code into a routine for those -virtual subchannels. The passthrough subchannels will be able -to piggy-back onto this later. - -Signed-off-by: Eric Farman -Message-Id: <20210617232537.1337506-4-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 0599a046acf1b625e97cef0aa702b5d86528c642) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/s390x/3270-ccw.c | 1 + - hw/s390x/css.c | 45 +++++++++++++++++++++++++++--------------- - hw/s390x/virtio-ccw.c | 1 + - include/hw/s390x/css.h | 2 ++ - 4 files changed, 33 insertions(+), 16 deletions(-) - -diff --git a/hw/s390x/3270-ccw.c b/hw/s390x/3270-ccw.c -index f3e7342b1e..9efee591f9 100644 ---- a/hw/s390x/3270-ccw.c -+++ b/hw/s390x/3270-ccw.c -@@ -130,6 +130,7 @@ static void emulated_ccw_3270_realize(DeviceState *ds, Error **errp) - EMULATED_CCW_3270_CHPID_TYPE); - sch->do_subchannel_work = do_subchannel_work_virtual; - sch->ccw_cb = emulated_ccw_3270_cb; -+ sch->irb_cb = build_irb_virtual; - - ck->init(dev, &err); - if (err) { -diff --git a/hw/s390x/css.c b/hw/s390x/css.c -index fac7d5b39d..e77a0e523d 100644 ---- a/hw/s390x/css.c -+++ b/hw/s390x/css.c -@@ -1651,6 +1651,30 @@ static void build_irb_sense_data(SubchDev *sch, IRB *irb) - } - } - -+void build_irb_virtual(SubchDev *sch, IRB *irb) -+{ -+ SCHIB *schib = &sch->curr_status; -+ uint16_t stctl = schib->scsw.ctrl & SCSW_CTRL_MASK_STCTL; -+ -+ if (stctl & SCSW_STCTL_STATUS_PEND) { -+ if (schib->scsw.cstat & (SCSW_CSTAT_DATA_CHECK | -+ SCSW_CSTAT_CHN_CTRL_CHK | -+ SCSW_CSTAT_INTF_CTRL_CHK)) { -+ irb->scsw.flags |= SCSW_FLAGS_MASK_ESWF; -+ irb->esw.word0 = 0x04804000; -+ } else { -+ irb->esw.word0 = 0x00800000; -+ } -+ /* If a unit check is pending, copy sense data. */ -+ if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && -+ (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { -+ irb->scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; -+ build_irb_sense_data(sch, irb); -+ irb->esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8); -+ } -+ } -+} -+ - int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) - { - SCHIB *schib = &sch->curr_status; -@@ -1669,23 +1693,12 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) - - /* Copy scsw from current status. */ - irb.scsw = schib->scsw; -- if (stctl & SCSW_STCTL_STATUS_PEND) { -- if (schib->scsw.cstat & (SCSW_CSTAT_DATA_CHECK | -- SCSW_CSTAT_CHN_CTRL_CHK | -- SCSW_CSTAT_INTF_CTRL_CHK)) { -- irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF; -- irb.esw.word0 = 0x04804000; -- } else { -- irb.esw.word0 = 0x00800000; -- } -- /* If a unit check is pending, copy sense data. */ -- if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && -- (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { -- irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; -- build_irb_sense_data(sch, &irb); -- irb.esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8); -- } -+ -+ /* Build other IRB data, if necessary */ -+ if (sch->irb_cb) { -+ sch->irb_cb(sch, &irb); - } -+ - /* Store the irb to the guest. */ - p = schib->pmcw; - copy_irb_to_guest(target_irb, &irb, &p, irb_len); -diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c -index 8195f3546e..5a1eb39325 100644 ---- a/hw/s390x/virtio-ccw.c -+++ b/hw/s390x/virtio-ccw.c -@@ -754,6 +754,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) - sch->id.reserved = 0xff; - sch->id.cu_type = VIRTIO_CCW_CU_TYPE; - sch->do_subchannel_work = do_subchannel_work_virtual; -+ sch->irb_cb = build_irb_virtual; - ccw_dev->sch = sch; - dev->indicators = NULL; - dev->revision = -1; -diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h -index bba7593d2e..7c23a13f3d 100644 ---- a/include/hw/s390x/css.h -+++ b/include/hw/s390x/css.h -@@ -138,6 +138,7 @@ struct SubchDev { - int (*ccw_cb) (SubchDev *, CCW1); - void (*disable_cb)(SubchDev *); - IOInstEnding (*do_subchannel_work) (SubchDev *); -+ void (*irb_cb)(SubchDev *, IRB *); - SenseId id; - void *driver_data; - }; -@@ -215,6 +216,7 @@ void css_clear_sei_pending(void); - IOInstEnding s390_ccw_cmd_request(SubchDev *sch); - IOInstEnding do_subchannel_work_virtual(SubchDev *sub); - IOInstEnding do_subchannel_work_passthrough(SubchDev *sub); -+void build_irb_virtual(SubchDev *sch, IRB *irb); - - int s390_ccw_halt(SubchDev *sch); - int s390_ccw_clear(SubchDev *sch); --- -2.27.0 - diff --git a/kvm-s390x-css-Split-out-the-IRB-sense-data.patch b/kvm-s390x-css-Split-out-the-IRB-sense-data.patch deleted file mode 100644 index 2c8a3f8..0000000 --- a/kvm-s390x-css-Split-out-the-IRB-sense-data.patch +++ /dev/null @@ -1,63 +0,0 @@ -From a987dfced200adf1e4c2d3c39f0b5da0fb7e6ead Mon Sep 17 00:00:00 2001 -From: Eric Farman -Date: Thu, 24 Jun 2021 14:15:14 -0400 -Subject: [PATCH 02/43] s390x/css: Split out the IRB sense data - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -Let's move this logic into its own routine, -so it can be reused later. - -Signed-off-by: Eric Farman -Reviewed-by: Thomas Huth -Message-Id: <20210617232537.1337506-3-farman@linux.ibm.com> -Signed-off-by: Cornelia Huck -(cherry picked from commit 1b01dedaed41c2ca6129475c22b7b778b109fae8) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/s390x/css.c | 19 ++++++++++++------- - 1 file changed, 12 insertions(+), 7 deletions(-) - -diff --git a/hw/s390x/css.c b/hw/s390x/css.c -index bd3172a688..fac7d5b39d 100644 ---- a/hw/s390x/css.c -+++ b/hw/s390x/css.c -@@ -1640,6 +1640,17 @@ static void copy_irb_to_guest(IRB *dest, const IRB *src, const PMCW *pmcw, - *irb_len = sizeof(*dest); - } - -+static void build_irb_sense_data(SubchDev *sch, IRB *irb) -+{ -+ int i; -+ -+ /* Attention: sense_data is already BE! */ -+ memcpy(irb->ecw, sch->sense_data, sizeof(sch->sense_data)); -+ for (i = 0; i < ARRAY_SIZE(irb->ecw); i++) { -+ irb->ecw[i] = be32_to_cpu(irb->ecw[i]); -+ } -+} -+ - int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) - { - SCHIB *schib = &sch->curr_status; -@@ -1670,14 +1681,8 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len) - /* If a unit check is pending, copy sense data. */ - if ((schib->scsw.dstat & SCSW_DSTAT_UNIT_CHECK) && - (schib->pmcw.chars & PMCW_CHARS_MASK_CSENSE)) { -- int i; -- - irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL; -- /* Attention: sense_data is already BE! */ -- memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data)); -- for (i = 0; i < ARRAY_SIZE(irb.ecw); i++) { -- irb.ecw[i] = be32_to_cpu(irb.ecw[i]); -- } -+ build_irb_sense_data(sch, &irb); - irb.esw.erw = ESW_ERW_SENSE | (sizeof(sch->sense_data) << 8); - } - } --- -2.27.0 - diff --git a/kvm-s390x-redhat-disable-experimental-3270-device.patch b/kvm-s390x-redhat-disable-experimental-3270-device.patch deleted file mode 100644 index 232da25..0000000 --- a/kvm-s390x-redhat-disable-experimental-3270-device.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 5101527fb425c2d17ef04b0ed87b3810cf6db7bc Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Wed, 26 May 2021 11:01:00 +0200 -Subject: [PATCH] s390x/redhat: disable experimental 3270 device - -RH-Author: Cornelia Huck -RH-MergeRequest: 9: Disable the 'x-terminal3270' device in qemu-kvm on s390x -RH-Commit: [1/1] 1df70e856fba434d06587a05c44c4dc5a367f1f7 (cohuck/qemu-kvm) -RH-Bugzilla: 1962479 -RH-Acked-by: Thomas Huth -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina - -The "x-terminal3270" device has been experimental for years, -has known limitations, has no known users anymore, and is not -supported by libvirt. - -Therefore, disable it in our downstream builds, as it is not -really supported. - -Signed-off-by: Cornelia Huck ---- - default-configs/devices/s390x-rh-devices.mak | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/default-configs/devices/s390x-rh-devices.mak b/default-configs/devices/s390x-rh-devices.mak -index 08a15f3e01..fe8613b8f6 100644 ---- a/default-configs/devices/s390x-rh-devices.mak -+++ b/default-configs/devices/s390x-rh-devices.mak -@@ -6,7 +6,6 @@ CONFIG_S390_FLIC=y - CONFIG_S390_FLIC_KVM=y - CONFIG_SCLPCONSOLE=y - CONFIG_SCSI=y --CONFIG_TERMINAL3270=y - CONFIG_VFIO=y - CONFIG_VFIO_AP=y - CONFIG_VFIO_CCW=y --- -2.27.0 - diff --git a/kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch b/kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch deleted file mode 100644 index 612fb9a..0000000 --- a/kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 05038edf628c6bca7cef061b7b1fac2b0b2163af Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 16 Jul 2021 16:51:30 -0400 -Subject: [PATCH 14/43] scsi-generic: pass max_segments via max_iov field in - BlockLimits - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -I/O to a disk via read/write is not limited by the number of segments allowed -by the host adapter; the kernel can split requests if needed, and the limit -imposed by the host adapter can be very low (256k or so) to avoid that SG_IO -returns EINVAL if memory is heavily fragmented. - -Since this value is only interesting for SG_IO-based I/O, do not include -it in the max_transfer and only take it into account when patching the -block limits VPD page in the scsi-generic device. - -Signed-off-by: Paolo Bonzini -Reviewed-by: Max Reitz -(cherry picked from commit 01ef8185b809af9d287e1a03a3f9d8ea8231118a) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - block/file-posix.c | 3 +-- - hw/scsi/scsi-generic.c | 6 ++++-- - 2 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 74d4903dc1..1a6c799e19 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -1263,8 +1263,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) - - ret = sg_get_max_segments(s->fd); - if (ret > 0) { -- bs->bl.max_transfer = MIN(bs->bl.max_transfer, -- ret * qemu_real_host_page_size); -+ bs->bl.max_iov = ret; - } - } - -diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c -index 98c30c5d5c..82e1e2ee79 100644 ---- a/hw/scsi/scsi-generic.c -+++ b/hw/scsi/scsi-generic.c -@@ -179,10 +179,12 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) - (r->req.cmd.buf[1] & 0x01)) { - page = r->req.cmd.buf[2]; - if (page == 0xb0) { -- uint32_t max_transfer = -- blk_get_max_transfer(s->conf.blk) / s->blocksize; -+ uint32_t max_transfer = blk_get_max_transfer(s->conf.blk); -+ uint32_t max_iov = blk_get_max_iov(s->conf.blk); - - assert(max_transfer); -+ max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size) -+ / s->blocksize; - stl_be_p(&r->buf[8], max_transfer); - /* Also take care of the opt xfer len. */ - stl_be_p(&r->buf[12], --- -2.27.0 - diff --git a/kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch b/kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch deleted file mode 100644 index 75d505c..0000000 --- a/kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 415a6a1ced90cc8b8691eb7ab027bba4611fc236 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 2 Jun 2021 15:51:02 -0400 -Subject: [PATCH 04/21] sockets: update SOCKET_ADDRESS_TYPE_FD listen(2) - backlog -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 -RH-Commit: [3/8] a8fd97eb477ad51fca75c1cc344185e1de59caf1 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier -RH-Acked-by: Vitaly Kuznetsov - -socket_get_fd() fails with the error "socket_get_fd: too many -connections" if the given listen backlog value is not 1. - -Not all callers set the backlog to 1. For example, commit -582d4210eb2f2ab5baac328fe4b479cd86da1647 ("qemu-nbd: Use SOMAXCONN for -socket listen() backlog") uses SOMAXCONN. This will always fail with in -socket_get_fd(). - -This patch calls listen(2) on the fd to update the backlog value. The -socket may already be in the listen state. I have tested that this works -on Linux 5.10 and macOS Catalina. - -As a bonus this allows us to detect when the fd cannot listen. Now we'll -be able to catch unbound or connected fds in socket_listen(). - -Drop the num argument from socket_get_fd() since this function is also -called by socket_connect() where a listen backlog value does not make -sense. - -Fixes: e5b6353cf25c99c3f08bf51e29933352f7140e8f ("socket: Add backlog parameter to socket_listen") -Reported-by: Richard W.M. Jones -Cc: Juan Quintela -Cc: Eric Blake -Signed-off-by: Stefan Hajnoczi -Message-Id: <20210310173004.420190-1-stefanha@redhat.com> -Tested-by: Richard W.M. Jones -Reviewed-by: Eric Blake -Reviewed-by: Stefano Garzarella -Signed-off-by: Eric Blake -(cherry picked from commit 37179e9ea45d6428b29ae789209c119ac18c1d39) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - util/qemu-sockets.c | 29 ++++++++++++++++++++++------- - 1 file changed, 22 insertions(+), 7 deletions(-) - -diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c -index 8af0278f15..2463c49773 100644 ---- a/util/qemu-sockets.c -+++ b/util/qemu-sockets.c -@@ -1116,14 +1116,10 @@ fail: - return NULL; - } - --static int socket_get_fd(const char *fdstr, int num, Error **errp) -+static int socket_get_fd(const char *fdstr, Error **errp) - { - Monitor *cur_mon = monitor_cur(); - int fd; -- if (num != 1) { -- error_setg_errno(errp, EINVAL, "socket_get_fd: too many connections"); -- return -1; -- } - if (cur_mon) { - fd = monitor_get_fd(cur_mon, fdstr, errp); - if (fd < 0) { -@@ -1159,7 +1155,7 @@ int socket_connect(SocketAddress *addr, Error **errp) - break; - - case SOCKET_ADDRESS_TYPE_FD: -- fd = socket_get_fd(addr->u.fd.str, 1, errp); -+ fd = socket_get_fd(addr->u.fd.str, errp); - break; - - case SOCKET_ADDRESS_TYPE_VSOCK: -@@ -1187,7 +1183,26 @@ int socket_listen(SocketAddress *addr, int num, Error **errp) - break; - - case SOCKET_ADDRESS_TYPE_FD: -- fd = socket_get_fd(addr->u.fd.str, num, errp); -+ fd = socket_get_fd(addr->u.fd.str, errp); -+ if (fd < 0) { -+ return -1; -+ } -+ -+ /* -+ * If the socket is not yet in the listen state, then transition it to -+ * the listen state now. -+ * -+ * If it's already listening then this updates the backlog value as -+ * requested. -+ * -+ * If this socket cannot listen because it's already in another state -+ * (e.g. unbound or connected) then we'll catch the error here. -+ */ -+ if (listen(fd, num) != 0) { -+ error_setg_errno(errp, errno, "Failed to listen on fd socket"); -+ closesocket(fd); -+ return -1; -+ } - break; - - case SOCKET_ADDRESS_TYPE_VSOCK: --- -2.27.0 - diff --git a/kvm-spapr-Don-t-hijack-current_machine-boot_order.patch b/kvm-spapr-Don-t-hijack-current_machine-boot_order.patch deleted file mode 100644 index 3cc304d..0000000 --- a/kvm-spapr-Don-t-hijack-current_machine-boot_order.patch +++ /dev/null @@ -1,115 +0,0 @@ -From b859b919acc83ea12c5c5b2991afac47e9532660 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Thu, 3 Jun 2021 13:29:40 -0400 -Subject: [PATCH 06/21] spapr: Don't hijack current_machine->boot_order -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 -RH-Commit: [5/8] 04822ea86e438f013915cd46e09a33627a640a47 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier -RH-Acked-by: Vitaly Kuznetsov - -From: Greg Kurz - -QEMU 6.0 moved all the -boot variables to the machine. Especially, the -removal of the boot_order static changed the handling of '-boot once' -from: - - if (boot_once) { - qemu_boot_set(boot_once, &error_fatal); - qemu_register_reset(restore_boot_order, g_strdup(boot_order)); - } - -to - - if (current_machine->boot_once) { - qemu_boot_set(current_machine->boot_once, &error_fatal); - qemu_register_reset(restore_boot_order, - g_strdup(current_machine->boot_order)); - } - -This means that we now register as subsequent boot order a copy -of current_machine->boot_once that was just set with the previous -call to qemu_boot_set(), i.e. we never transition away from the -once boot order. - -It is certainly fragile^Wwrong for the spapr code to hijack a -field of the base machine type object like that. The boot order -rework simply turned this software boundary violation into an -actual bug. - -Have the spapr code to handle that with its own field in -SpaprMachineState. Also kfree() the initial boot device -string when "once" was used. - -Fixes: 4b7acd2ac821 ("vl: clean up -boot variables") -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1960119 -Cc: pbonzini@redhat.com -Signed-off-by: Greg Kurz -Message-Id: <20210521160735.1901914-1-groug@kaod.org> -Signed-off-by: David Gibson -(cherry picked from commit 3bf0844f3be77b24cc8f56fc8df9ff199f8324cb) -Signed-off-by: Greg Kurz - -Conflicts: - include/hw/ppc/spapr.h - -Trivial context conflict because downstream has experimental support -for secure guests (f23e4b5090ba). - -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/ppc/spapr.c | 8 +++++--- - include/hw/ppc/spapr.h | 3 +++ - 2 files changed, 8 insertions(+), 3 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 653574ba91..11db32c537 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -1006,7 +1006,7 @@ static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) - _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen")); - - if (reset) { -- const char *boot_device = machine->boot_order; -+ const char *boot_device = spapr->boot_device; - char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); - size_t cb = 0; - char *bootlist = get_boot_devices_list(&cb); -@@ -2364,8 +2364,10 @@ static SaveVMHandlers savevm_htab_handlers = { - static void spapr_boot_set(void *opaque, const char *boot_device, - Error **errp) - { -- MachineState *machine = MACHINE(opaque); -- machine->boot_order = g_strdup(boot_device); -+ SpaprMachineState *spapr = SPAPR_MACHINE(opaque); -+ -+ g_free(spapr->boot_device); -+ spapr->boot_device = g_strdup(boot_device); - } - - static void spapr_create_lmb_dr_connectors(SpaprMachineState *spapr) -diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 54cdde8980..6d15066bc3 100644 ---- a/include/hw/ppc/spapr.h -+++ b/include/hw/ppc/spapr.h -@@ -227,6 +227,9 @@ struct SpaprMachineState { - /* Secure Guest support via x-svm-allowed */ - bool svm_allowed; - -+ /* Set by -boot */ -+ char *boot_device; -+ - /*< public >*/ - char *kvm_type; - char *host_model; --- -2.27.0 - diff --git a/kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch b/kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch deleted file mode 100644 index e556d49..0000000 --- a/kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch +++ /dev/null @@ -1,160 +0,0 @@ -From 389b2a01f9b75575996eaab195a9697840ae5f29 Mon Sep 17 00:00:00 2001 -From: Mahesh Salgaonkar -Date: Wed, 30 Jun 2021 13:27:47 -0400 -Subject: [PATCH 2/6] spapr: Fix EEH capability issue on KVM guest for PCI - passthru - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 22: Synchronize with RHEL-AV 8.5 release 23 to RHEL 9 -RH-Commit: [1/5] 86642761bad229c080e180ea9ebd0a4f67d2a4f7 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 - -With upstream kernel, especially after commit 98ba956f6a389 -("powerpc/pseries/eeh: Rework device EEH PE determination") we see that KVM -guest isn't able to enable EEH option for PCI pass-through devices anymore. - -[root@atest-guest ~]# dmesg | grep EEH -[ 0.032337] EEH: pSeries platform initialized -[ 0.298207] EEH: No capable adapters found: recovery disabled. -[root@atest-guest ~]# - -So far the linux kernel was assuming pe_config_addr equal to device's -config_addr and using it to enable EEH on the PE through ibm,set-eeh-option -RTAS call. Which wasn't the correct way as per PAPR. The linux kernel -commit 98ba956f6a389 fixed this flow. With that fixed, linux now uses PE -config address returned by ibm,get-config-addr-info2 RTAS call to enable -EEH option per-PE basis instead of per-device basis. However this has -uncovered a bug in qemu where ibm,set-eeh-option is treating PE config -address as per-device config address. - -Hence in qemu guest with recent kernel the ibm,set-eeh-option RTAS call -fails with -3 return value indicating that there is no PCI device exist for -the specified PE config address. The rtas_ibm_set_eeh_option call uses -pci_find_device() to get the PC device that matches specific bus and devfn -extracted from PE config address passed as argument. Thus it tries to map -the PE config address to a single specific PCI device 'bus->devices[devfn]' -which always results into checking device on slot 0 'bus->devices[0]'. -This succeeds when there is a pass-through device (vfio-pci) present on -slot 0. But in cases where there is no pass-through device present in slot -0, but present in non-zero slots, ibm,set-eeh-option call fails to enable -the EEH capability. - -hw/ppc/spapr_pci_vfio.c: spapr_phb_vfio_eeh_set_option() - case RTAS_EEH_ENABLE: { - PCIHostState *phb; - PCIDevice *pdev; - - /* - * The EEH functionality is enabled on basis of PCI device, - * instead of PE. We need check the validity of the PCI - * device address. - */ - phb = PCI_HOST_BRIDGE(sphb); - pdev = pci_find_device(phb->bus, - (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); - if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { - return RTAS_OUT_PARAM_ERROR; - } - -hw/pci/pci.c:pci_find_device() - -PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn) -{ - bus = pci_find_bus_nr(bus, bus_num); - - if (!bus) - return NULL; - - return bus->devices[devfn]; -} - -This patch fixes ibm,set-eeh-option to check for presence of any PCI device -(vfio-pci) under specified bus and enable the EEH if found. The current -code already makes sure that all the devices on that bus are from same -iommu group (within same PE) and fail very early if it does not. - -After this fix guest is able to find EEH capable devices and enable EEH -recovery on it. - -[root@atest-guest ~]# dmesg | grep EEH -[ 0.048139] EEH: pSeries platform initialized -[ 0.405115] EEH: Capable adapter found: recovery enabled. -[root@atest-guest ~]# - -Reviewed-by: Daniel Henrique Barboza -Signed-off-by: Mahesh Salgaonkar -Message-Id: <162158429107.145117.5843504911924013125.stgit@jupiter> -Signed-off-by: David Gibson -(cherry picked from commit ac9ef668321ebb6eb871a0c4dd380fa7d7891b4e) -Signed-off-by: Daniel Henrique Barboza -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/ppc/spapr_pci_vfio.c | 40 +++++++++++++++++++++++++++++++++------- - 1 file changed, 33 insertions(+), 7 deletions(-) - -diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c -index e0547b1740..6587c8cb5b 100644 ---- a/hw/ppc/spapr_pci_vfio.c -+++ b/hw/ppc/spapr_pci_vfio.c -@@ -47,6 +47,16 @@ void spapr_phb_vfio_reset(DeviceState *qdev) - spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); - } - -+static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev, -+ void *opaque) -+{ -+ bool *found = opaque; -+ -+ if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { -+ *found = true; -+ } -+} -+ - int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, - unsigned int addr, int option) - { -@@ -59,17 +69,33 @@ int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, - break; - case RTAS_EEH_ENABLE: { - PCIHostState *phb; -- PCIDevice *pdev; -+ bool found = false; - - /* -- * The EEH functionality is enabled on basis of PCI device, -- * instead of PE. We need check the validity of the PCI -- * device address. -+ * The EEH functionality is enabled per sphb level instead of -+ * per PCI device. We have already identified this specific sphb -+ * based on buid passed as argument to ibm,set-eeh-option rtas -+ * call. Now we just need to check the validity of the PCI -+ * pass-through devices (vfio-pci) under this sphb bus. -+ * We have already validated that all the devices under this sphb -+ * are from same iommu group (within same PE) before comming here. -+ * -+ * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh: -+ * Rework device EEH PE determination") kernel would call -+ * eeh-set-option for each device in the PE using the device's -+ * config_address as the argument rather than the PE address. -+ * Hence if we check validity of supplied config_addr whether -+ * it matches to this PHB will cause issues with older kernel -+ * versions v5.9 and older. If we return an error from -+ * eeh-set-option when the argument isn't a valid PE address -+ * then older kernels (v5.9 and older) will interpret that as -+ * EEH not being supported. - */ - phb = PCI_HOST_BRIDGE(sphb); -- pdev = pci_find_device(phb->bus, -- (addr >> 16) & 0xFF, (addr >> 8) & 0xFF); -- if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { -+ pci_for_each_device(phb->bus, (addr >> 16) & 0xFF, -+ spapr_eeh_pci_find_device, &found); -+ -+ if (!found) { - return RTAS_OUT_PARAM_ERROR; - } - --- -2.27.0 - diff --git a/kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch b/kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch deleted file mode 100644 index 4c7c9a1..0000000 --- a/kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 63933b51e447d4acb08d3900ff64d150a3f2bdf8 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 9 Jun 2021 05:58:34 -0400 -Subject: [PATCH 08/21] spapr: Remove stale comment about power-saving LPCR - bits -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 -RH-Commit: [7/8] 12872a0193d22915de5d71d8055094f9f15e63b0 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier -RH-Acked-by: Vitaly Kuznetsov - -From: Nicholas Piggin - -Commit 47a9b551547 ("spapr: Clean up handling of LPCR power-saving exit -bits") moved this logic but did not remove the comment from the -previous location. - -Signed-off-by: Nicholas Piggin -Message-Id: <20210526091626.3388262-2-npiggin@gmail.com> -Reviewed-by: Cédric Le Goater -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -Signed-off-by: Laurent Vivier -(cherry picked from commit 7be3bf6c8429969f97728bb712d9a99997835607) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/ppc/spapr_rtas.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c -index 8a79f9c628..91c71d1c94 100644 ---- a/hw/ppc/spapr_rtas.c -+++ b/hw/ppc/spapr_rtas.c -@@ -164,7 +164,6 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, - - env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); - -- /* Enable Power-saving mode Exit Cause exceptions for the new CPU */ - lpcr = env->spr[SPR_LPCR]; - if (!pcc->interrupts_big_endian(callcpu)) { - lpcr |= LPCR_ILE; --- -2.27.0 - diff --git a/kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch b/kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch deleted file mode 100644 index c30b63e..0000000 --- a/kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 8bb294ea3f26a8ce01ad76c19a6de359dce0c113 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 9 Jun 2021 05:58:35 -0400 -Subject: [PATCH 09/21] spapr: Set LPCR to current AIL mode when starting a new - CPU -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 -RH-Commit: [8/8] 7a1cb27881f93c245ab9e8b8540cbd06d4f8c14f (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier -RH-Acked-by: Vitaly Kuznetsov - -From: Nicholas Piggin - -TCG does not keep track of AIL mode in a central place, it's based on -the current LPCR[AIL] bits. Synchronize the new CPU's LPCR to the -current LPCR in rtas_start_cpu(), similarly to the way the ILE bit is -synchronized. - -Open-code the ILE setting as well now that the caller's LPCR is -available directly, there is no need for the indirection. - -Without this, under both TCG and KVM, adding a POWER8/9/10 class CPU -with a new core ID after a modern Linux has booted results in the new -CPU's LPCR missing the LPCR[AIL]=0b11 setting that the other CPUs have. -This can cause crashes and unexpected behaviour. - -Signed-off-by: Nicholas Piggin -Message-Id: <20210526091626.3388262-3-npiggin@gmail.com> -Reviewed-by: Cédric Le Goater -Reviewed-by: Greg Kurz -Signed-off-by: David Gibson -Signed-off-by: Laurent Vivier -(cherry picked from commit ac559ecbea2649819e7b3fdd09f4e0243e0128db) -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/ppc/spapr_rtas.c | 14 +++++++++----- - 1 file changed, 9 insertions(+), 5 deletions(-) - -diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c -index 91c71d1c94..27ab339b0c 100644 ---- a/hw/ppc/spapr_rtas.c -+++ b/hw/ppc/spapr_rtas.c -@@ -133,8 +133,8 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, - target_ulong id, start, r3; - PowerPCCPU *newcpu; - CPUPPCState *env; -- PowerPCCPUClass *pcc; - target_ulong lpcr; -+ target_ulong caller_lpcr; - - if (nargs != 3 || nret != 1) { - rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); -@@ -153,7 +153,6 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, - } - - env = &newcpu->env; -- pcc = POWERPC_CPU_GET_CLASS(newcpu); - - if (!CPU(newcpu)->halted) { - rtas_st(rets, 0, RTAS_OUT_HW_ERROR); -@@ -164,10 +163,15 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, - - env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); - -+ caller_lpcr = callcpu->env.spr[SPR_LPCR]; - lpcr = env->spr[SPR_LPCR]; -- if (!pcc->interrupts_big_endian(callcpu)) { -- lpcr |= LPCR_ILE; -- } -+ -+ /* Set ILE the same way */ -+ lpcr = (lpcr & ~LPCR_ILE) | (caller_lpcr & LPCR_ILE); -+ -+ /* Set AIL the same way */ -+ lpcr = (lpcr & ~LPCR_AIL) | (caller_lpcr & LPCR_AIL); -+ - if (env->mmu_model == POWERPC_MMU_3_00) { - /* - * New cpus are expected to start in the same radix/hash mode --- -2.27.0 - diff --git a/kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch b/kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch deleted file mode 100644 index bdf810c..0000000 --- a/kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch +++ /dev/null @@ -1,322 +0,0 @@ -From a7752067b45bc05f1127a62e39c38a3361bb1840 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Thu, 3 Jun 2021 14:04:18 -0400 -Subject: [PATCH 07/21] target/i386: Add CPU model versions supporting 'xsaves' -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 -RH-Commit: [6/8] 814973113f19a21d10a90fcbbcd881eef354933d (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier -RH-Acked-by: Vitaly Kuznetsov - -Hyper-V 2016 refuses to boot on Skylake+ CPU models because they lack -'xsaves'/'vmx-xsaves' features and this diverges from real hardware. The -same issue emerges with AMD "EPYC" CPU model prior to version 3 which got -'xsaves' added. EPYC-Rome/EPYC-Milan CPU models have 'xsaves' enabled from -the very beginning so the comment blaming KVM to explain why other CPUs -lack 'xsaves' is likely outdated. - -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20210412073952.860944-1-vkuznets@redhat.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 7bde6b18575dd79c26ce1616e0c33151e83d9d7e) - -Conflicts: - target/i386/cpu.c (context, skipping c1826ea6a0520) - -Signed-off-by: Vitaly Kuznetsov -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - target/i386/cpu.c | 150 +++++++++++++++++++++++++++++----------------- - 1 file changed, 94 insertions(+), 56 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index c30bb2a6b0..da47c3e50e 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -2881,12 +2881,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP, -- /* Missing: XSAVES (not supported by some Linux versions, -- * including v4.1 to v4.12). -- * KVM doesn't yet expose any XSAVES state save component, -- * and the only one defined in Skylake (processor tracing) -- * probably will block migration anyway. -- */ -+ /* XSAVES is added in version 4 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, -@@ -2962,6 +2957,15 @@ static X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .version = 4, -+ .note = "IBRS, XSAVES, no TSX", -+ .props = (PropValue[]) { -+ { "xsaves", "on" }, -+ { "vmx-xsaves", "on" }, -+ { /* end of list */ } -+ } -+ }, - { /* end of list */ } - } - }, -@@ -3001,12 +3005,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, - .features[FEAT_7_0_ECX] = - CPUID_7_0_ECX_PKU, -- /* Missing: XSAVES (not supported by some Linux versions, -- * including v4.1 to v4.12). -- * KVM doesn't yet expose any XSAVES state save component, -- * and the only one defined in Skylake (processor tracing) -- * probably will block migration anyway. -- */ -+ /* XSAVES is added in version 5 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, -@@ -3094,6 +3093,15 @@ static X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .version = 5, -+ .note = "IBRS, XSAVES, EPT switching, no TSX", -+ .props = (PropValue[]) { -+ { "xsaves", "on" }, -+ { "vmx-xsaves", "on" }, -+ { /* end of list */ } -+ } -+ }, - { /* end of list */ } - } - }, -@@ -3136,12 +3144,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_ECX_AVX512VNNI, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, -- /* Missing: XSAVES (not supported by some Linux versions, -- * including v4.1 to v4.12). -- * KVM doesn't yet expose any XSAVES state save component, -- * and the only one defined in Skylake (processor tracing) -- * probably will block migration anyway. -- */ -+ /* XSAVES is added in version 5 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, -@@ -3225,6 +3228,14 @@ static X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - }, - }, -+ { .version = 5, -+ .note = "ARCH_CAPABILITIES, EPT switching, XSAVES, no TSX", -+ .props = (PropValue[]) { -+ { "xsaves", "on" }, -+ { "vmx-xsaves", "on" }, -+ { /* end of list */ } -+ }, -+ }, - { /* end of list */ } - } - }, -@@ -3274,13 +3285,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, - .features[FEAT_7_1_EAX] = - CPUID_7_1_EAX_AVX512_BF16, -- /* -- * Missing: XSAVES (not supported by some Linux versions, -- * including v4.1 to v4.12). -- * KVM doesn't yet expose any XSAVES state save component, -- * and the only one defined in Skylake (processor tracing) -- * probably will block migration anyway. -- */ -+ /* XSAVES is added in version 2 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, -@@ -3336,6 +3341,18 @@ static X86CPUDefinition builtin_x86_defs[] = { - .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, - .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Cooperlake)", -+ .versions = (X86CPUVersionDefinition[]) { -+ { .version = 1 }, -+ { .version = 2, -+ .note = "XSAVES", -+ .props = (PropValue[]) { -+ { "xsaves", "on" }, -+ { "vmx-xsaves", "on" }, -+ { /* end of list */ } -+ }, -+ }, -+ { /* end of list */ } -+ } - }, - { - .name = "Icelake-Client", -@@ -3378,12 +3395,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_ECX_AVX512_VPOPCNTDQ, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, -- /* Missing: XSAVES (not supported by some Linux versions, -- * including v4.1 to v4.12). -- * KVM doesn't yet expose any XSAVES state save component, -- * and the only one defined in Skylake (processor tracing) -- * probably will block migration anyway. -- */ -+ /* XSAVES is added in version 3 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, -@@ -3451,6 +3463,15 @@ static X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - }, - }, -+ { -+ .version = 3, -+ .note = "no TSX, XSAVES, deprecated", -+ .props = (PropValue[]) { -+ { "xsaves", "on" }, -+ { "vmx-xsaves", "on" }, -+ { /* end of list */ } -+ }, -+ }, - { /* end of list */ } - }, - .deprecation_note = "use Icelake-Server instead" -@@ -3499,12 +3520,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, -- /* Missing: XSAVES (not supported by some Linux versions, -- * including v4.1 to v4.12). -- * KVM doesn't yet expose any XSAVES state save component, -- * and the only one defined in Skylake (processor tracing) -- * probably will block migration anyway. -- */ -+ /* XSAVES is added in version 5 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, -@@ -3597,6 +3613,15 @@ static X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - }, - }, -+ { -+ .version = 5, -+ .note = "XSAVES", -+ .props = (PropValue[]) { -+ { "xsaves", "on" }, -+ { "vmx-xsaves", "on" }, -+ { /* end of list */ } -+ }, -+ }, - { /* end of list */ } - } - }, -@@ -3631,13 +3656,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | - CPUID_7_0_EDX_SPEC_CTRL_SSBD, -- /* -- * Missing: XSAVES (not supported by some Linux versions, -- * including v4.1 to v4.12). -- * KVM doesn't yet expose any XSAVES state save component, -- * and the only one defined in Skylake (processor tracing) -- * probably will block migration anyway. -- */ -+ /* XSAVES is added in version 3 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, - .features[FEAT_6_EAX] = -@@ -3704,6 +3723,15 @@ static X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ }, - }, - }, -+ { -+ .version = 3, -+ .note = "XSAVES, no MPX, no MONITOR", -+ .props = (PropValue[]) { -+ { "xsaves", "on" }, -+ { "vmx-xsaves", "on" }, -+ { /* end of list */ }, -+ }, -+ }, - { /* end of list */ }, - }, - }, -@@ -3762,13 +3790,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EDX_CORE_CAPABILITY, - .features[FEAT_CORE_CAPABILITY] = - MSR_CORE_CAP_SPLIT_LOCK_DETECT, -- /* -- * Missing: XSAVES (not supported by some Linux versions, -- * including v4.1 to v4.12). -- * KVM doesn't yet expose any XSAVES state save component, -- * and the only one defined in Skylake (processor tracing) -- * probably will block migration anyway. -- */ -+ /* XSAVES is is added in version 3 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, -@@ -3833,6 +3855,15 @@ static X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ }, - }, - }, -+ { -+ .version = 3, -+ .note = "XSAVES, no MPX", -+ .props = (PropValue[]) { -+ { "xsaves", "on" }, -+ { "vmx-xsaves", "on" }, -+ { /* end of list */ }, -+ }, -+ }, - { /* end of list */ }, - }, - }, -@@ -4114,11 +4145,7 @@ static X86CPUDefinition builtin_x86_defs[] = { - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | - CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | - CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT, -- /* -- * Missing: XSAVES (not supported by some Linux versions, -- * including v4.1 to v4.12). -- * KVM doesn't yet expose any XSAVES state save component. -- */ -+ /* XSAVES is added in version 2 */ - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, -@@ -4129,6 +4156,17 @@ static X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x8000001E, - .model_id = "Hygon Dhyana Processor", - .cache_info = &epyc_cache_info, -+ .versions = (X86CPUVersionDefinition[]) { -+ { .version = 1 }, -+ { .version = 2, -+ .note = "XSAVES", -+ .props = (PropValue[]) { -+ { "xsaves", "on" }, -+ { /* end of list */ } -+ }, -+ }, -+ { /* end of list */ } -+ } - }, - { - .name = "EPYC-Rome", --- -2.27.0 - diff --git a/kvm-target-i386-sev-add-support-to-query-the-attestation.patch b/kvm-target-i386-sev-add-support-to-query-the-attestation.patch deleted file mode 100644 index fd08786..0000000 --- a/kvm-target-i386-sev-add-support-to-query-the-attestation.patch +++ /dev/null @@ -1,262 +0,0 @@ -From ba750c8ed71bc73c79fecefa895192793ef6b7db Mon Sep 17 00:00:00 2001 -From: Connor Kuehl -Date: Wed, 2 Jun 2021 19:39:20 -0400 -Subject: [PATCH 05/21] target/i386/sev: add support to query the attestation - report -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 8: Synchronize with RHEL-AV 8.5 release 19 to RHEL 9 -RH-Commit: [4/8] de6088cb0cd1db779b85a50be87846e967f8c92c (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier -RH-Acked-by: Vitaly Kuznetsov - -From: Brijesh Singh - -The SEV FW >= 0.23 added a new command that can be used to query the -attestation report containing the SHA-256 digest of the guest memory -and VMSA encrypted with the LAUNCH_UPDATE and sign it with the PEK. - -Note, we already have a command (LAUNCH_MEASURE) that can be used to -query the SHA-256 digest of the guest memory encrypted through the -LAUNCH_UPDATE. The main difference between previous and this command -is that the report is signed with the PEK and unlike the LAUNCH_MEASURE -command the ATTESATION_REPORT command can be called while the guest -is running. - -Add a QMP interface "query-sev-attestation-report" that can be used -to get the report encoded in base64. - -Cc: James Bottomley -Cc: Tom Lendacky -Cc: Eric Blake -Cc: Paolo Bonzini -Cc: kvm@vger.kernel.org -Reviewed-by: James Bottomley -Tested-by: James Bottomley -Signed-off-by: Brijesh Singh -Reviewed-by: Connor Kuehl -Message-Id: <20210429170728.24322-1-brijesh.singh@amd.com> -Signed-off-by: Eduardo Habkost -(cherry picked from commit 3ea1a80243d5b5ba23d8c2b7d3a86034ea0ade22) -Signed-off-by: Connor Kuehl -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - linux-headers/linux/kvm.h | 8 +++++ - qapi/misc-target.json | 38 ++++++++++++++++++++++ - target/i386/monitor.c | 6 ++++ - target/i386/sev-stub.c | 7 ++++ - target/i386/sev.c | 67 +++++++++++++++++++++++++++++++++++++++ - target/i386/sev_i386.h | 2 ++ - target/i386/trace-events | 1 + - 7 files changed, 129 insertions(+) - -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index 020b62a619..897f831374 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -1591,6 +1591,8 @@ enum sev_cmd_id { - KVM_SEV_DBG_ENCRYPT, - /* Guest certificates commands */ - KVM_SEV_CERT_EXPORT, -+ /* Attestation report */ -+ KVM_SEV_GET_ATTESTATION_REPORT, - - KVM_SEV_NR_MAX, - }; -@@ -1643,6 +1645,12 @@ struct kvm_sev_dbg { - __u32 len; - }; - -+struct kvm_sev_attestation_report { -+ __u8 mnonce[16]; -+ __u64 uaddr; -+ __u32 len; -+}; -+ - #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) - #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) - #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) -diff --git a/qapi/misc-target.json b/qapi/misc-target.json -index 0c7491cd82..4b62f0ac05 100644 ---- a/qapi/misc-target.json -+++ b/qapi/misc-target.json -@@ -285,3 +285,41 @@ - ## - { 'command': 'query-gic-capabilities', 'returns': ['GICCapability'], - 'if': 'defined(TARGET_ARM)' } -+ -+ -+## -+# @SevAttestationReport: -+# -+# The struct describes attestation report for a Secure Encrypted Virtualization -+# feature. -+# -+# @data: guest attestation report (base64 encoded) -+# -+# -+# Since: 6.1 -+## -+{ 'struct': 'SevAttestationReport', -+ 'data': { 'data': 'str'}, -+ 'if': 'defined(TARGET_I386)' } -+ -+## -+# @query-sev-attestation-report: -+# -+# This command is used to get the SEV attestation report, and is supported on AMD -+# X86 platforms only. -+# -+# @mnonce: a random 16 bytes value encoded in base64 (it will be included in report) -+# -+# Returns: SevAttestationReport objects. -+# -+# Since: 6.1 -+# -+# Example: -+# -+# -> { "execute" : "query-sev-attestation-report", "arguments": { "mnonce": "aaaaaaa" } } -+# <- { "return" : { "data": "aaaaaaaabbbddddd"} } -+# -+## -+{ 'command': 'query-sev-attestation-report', 'data': { 'mnonce': 'str' }, -+ 'returns': 'SevAttestationReport', -+ 'if': 'defined(TARGET_I386)' } -diff --git a/target/i386/monitor.c b/target/i386/monitor.c -index 5994408bee..119211f0b0 100644 ---- a/target/i386/monitor.c -+++ b/target/i386/monitor.c -@@ -757,3 +757,9 @@ void qmp_sev_inject_launch_secret(const char *packet_hdr, - - sev_inject_launch_secret(packet_hdr, secret, gpa, errp); - } -+ -+SevAttestationReport * -+qmp_query_sev_attestation_report(const char *mnonce, Error **errp) -+{ -+ return sev_get_attestation_report(mnonce, errp); -+} -diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c -index 0207f1c5aa..0227cb5177 100644 ---- a/target/i386/sev-stub.c -+++ b/target/i386/sev-stub.c -@@ -74,3 +74,10 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) - { - abort(); - } -+ -+SevAttestationReport * -+sev_get_attestation_report(const char *mnonce, Error **errp) -+{ -+ error_setg(errp, "SEV is not available in this QEMU"); -+ return NULL; -+} -diff --git a/target/i386/sev.c b/target/i386/sev.c -index 72b9e2ab40..740548f213 100644 ---- a/target/i386/sev.c -+++ b/target/i386/sev.c -@@ -491,6 +491,73 @@ out: - return cap; - } - -+SevAttestationReport * -+sev_get_attestation_report(const char *mnonce, Error **errp) -+{ -+ struct kvm_sev_attestation_report input = {}; -+ SevAttestationReport *report = NULL; -+ SevGuestState *sev = sev_guest; -+ guchar *data; -+ guchar *buf; -+ gsize len; -+ int err = 0, ret; -+ -+ if (!sev_enabled()) { -+ error_setg(errp, "SEV is not enabled"); -+ return NULL; -+ } -+ -+ /* lets decode the mnonce string */ -+ buf = g_base64_decode(mnonce, &len); -+ if (!buf) { -+ error_setg(errp, "SEV: failed to decode mnonce input"); -+ return NULL; -+ } -+ -+ /* verify the input mnonce length */ -+ if (len != sizeof(input.mnonce)) { -+ error_setg(errp, "SEV: mnonce must be %zu bytes (got %" G_GSIZE_FORMAT ")", -+ sizeof(input.mnonce), len); -+ g_free(buf); -+ return NULL; -+ } -+ -+ /* Query the report length */ -+ ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, -+ &input, &err); -+ if (ret < 0) { -+ if (err != SEV_RET_INVALID_LEN) { -+ error_setg(errp, "failed to query the attestation report length " -+ "ret=%d fw_err=%d (%s)", ret, err, fw_error_to_str(err)); -+ g_free(buf); -+ return NULL; -+ } -+ } -+ -+ data = g_malloc(input.len); -+ input.uaddr = (unsigned long)data; -+ memcpy(input.mnonce, buf, sizeof(input.mnonce)); -+ -+ /* Query the report */ -+ ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, -+ &input, &err); -+ if (ret) { -+ error_setg_errno(errp, errno, "Failed to get attestation report" -+ " ret=%d fw_err=%d (%s)", ret, err, fw_error_to_str(err)); -+ goto e_free_data; -+ } -+ -+ report = g_new0(SevAttestationReport, 1); -+ report->data = g_base64_encode(data, input.len); -+ -+ trace_kvm_sev_attestation_report(mnonce, report->data); -+ -+e_free_data: -+ g_free(data); -+ g_free(buf); -+ return report; -+} -+ - static int - sev_read_file_base64(const char *filename, guchar **data, gsize *len) - { -diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h -index ae221d4c72..ae6d840478 100644 ---- a/target/i386/sev_i386.h -+++ b/target/i386/sev_i386.h -@@ -35,5 +35,7 @@ extern uint32_t sev_get_cbit_position(void); - extern uint32_t sev_get_reduced_phys_bits(void); - extern char *sev_get_launch_measurement(void); - extern SevCapability *sev_get_capabilities(Error **errp); -+extern SevAttestationReport * -+sev_get_attestation_report(const char *mnonce, Error **errp); - - #endif -diff --git a/target/i386/trace-events b/target/i386/trace-events -index a22ab24e21..8d6437404d 100644 ---- a/target/i386/trace-events -+++ b/target/i386/trace-events -@@ -10,3 +10,4 @@ kvm_sev_launch_update_data(void *addr, uint64_t len) "addr %p len 0x%" PRIx64 - kvm_sev_launch_measurement(const char *value) "data %s" - kvm_sev_launch_finish(void) "" - kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" -+kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" --- -2.27.0 - diff --git a/kvm-vhost-user-Fix-backends-without-multiqueue-support.patch b/kvm-vhost-user-Fix-backends-without-multiqueue-support.patch deleted file mode 100644 index eb08089..0000000 --- a/kvm-vhost-user-Fix-backends-without-multiqueue-support.patch +++ /dev/null @@ -1,46 +0,0 @@ -From de25a5f05b76ca99299e09dabe04e7d59b9bed79 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 12 Jul 2021 10:22:32 -0400 -Subject: [PATCH 12/43] vhost-user: Fix backends without multiqueue support - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -dev->max_queues was never initialised for backends that don't support -VHOST_USER_PROTOCOL_F_MQ, so it would use 0 as the maximum number of -queues to check against and consequently fail for any such backend. - -Set it to 1 if the backend doesn't have multiqueue support. - -Fixes: c90bd505a3e8210c23d69fecab9ee6f56ec4a161 -Signed-off-by: Kevin Wolf -Message-Id: <20210705171429.29286-1-kwolf@redhat.com> -Reviewed-by: Cornelia Huck -Reviewed-by: Raphael Norwitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 84affad1fd4c5251d7cccf4df43b29e9157983a9) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/vhost-user.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index ee57abe045..53f50adcba 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -1908,7 +1908,10 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) - if (err < 0) { - return err; - } -+ } else { -+ dev->max_queues = 1; - } -+ - if (dev->num_queues && dev->max_queues < dev->num_queues) { - error_report("The maximum number of queues supported by the " - "backend is %" PRIu64, dev->max_queues); --- -2.27.0 - diff --git a/kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch b/kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch deleted file mode 100644 index 8b466dd..0000000 --- a/kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch +++ /dev/null @@ -1,83 +0,0 @@ -From f3cec652012b0b5ab1d881f6377719b0984bce63 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 12 Jul 2021 10:22:31 -0400 -Subject: [PATCH 11/43] vhost-user-blk: Check that num-queues is supported by - backend - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -Creating a device with a number of queues that isn't supported by the -backend is pointless, the device won't work properly and the error -messages are rather confusing. - -Just fail to create the device if num-queues is higher than what the -backend supports. - -Since the relationship between num-queues and the number of virtqueues -depends on the specific device, this is an additional value that needs -to be initialised by the device. For convenience, allow leaving it 0 if -the check should be skipped. This makes sense for vhost-user-net where -separate vhost devices are used for the queues and custom initialisation -code is needed to perform the check. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1935031 -Signed-off-by: Kevin Wolf -Reviewed-by: Raphael Norwitz -Message-Id: <20210429171316.162022-7-kwolf@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf -(cherry picked from commit c90bd505a3e8210c23d69fecab9ee6f56ec4a161) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/block/vhost-user-blk.c | 1 + - hw/virtio/vhost-user.c | 5 +++++ - include/hw/virtio/vhost.h | 2 ++ - 3 files changed, 8 insertions(+) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index c7e502f4c7..c6210fad0c 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -324,6 +324,7 @@ static int vhost_user_blk_connect(DeviceState *dev, Error **errp) - } - s->connected = true; - -+ s->dev.num_queues = s->num_queues; - s->dev.nvqs = s->num_queues; - s->dev.vqs = s->vhost_vqs; - s->dev.vq_index = 0; -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index ded0c10453..ee57abe045 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -1909,6 +1909,11 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) - return err; - } - } -+ if (dev->num_queues && dev->max_queues < dev->num_queues) { -+ error_report("The maximum number of queues supported by the " -+ "backend is %" PRIu64, dev->max_queues); -+ return -EINVAL; -+ } - - if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && - !(virtio_has_feature(dev->protocol_features, -diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h -index 4a8bc75415..21a9a52088 100644 ---- a/include/hw/virtio/vhost.h -+++ b/include/hw/virtio/vhost.h -@@ -74,6 +74,8 @@ struct vhost_dev { - int nvqs; - /* the first virtqueue which would be used by this vhost dev */ - int vq_index; -+ /* if non-zero, minimum required value for max_queues */ -+ int num_queues; - uint64_t features; - uint64_t acked_features; - uint64_t backend_features; --- -2.27.0 - diff --git a/kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch b/kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch deleted file mode 100644 index c1b98d8..0000000 --- a/kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 5d39cb265db6ea2159662a2d071d340712940d33 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 12 Jul 2021 10:22:27 -0400 -Subject: [PATCH 07/43] vhost-user-blk: Don't reconnect during initialisation - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -This is a partial revert of commits 77542d43149 and bc79c87bcde. - -Usually, an error during initialisation means that the configuration was -wrong. Reconnecting won't make the error go away, but just turn the -error condition into an endless loop. Avoid this and return errors -again. - -Additionally, calling vhost_user_blk_disconnect() from the chardev event -handler could result in use-after-free because none of the -initialisation code expects that the device could just go away in the -middle. So removing the call fixes crashes in several places. - -For example, using a num-queues setting that is incompatible with the -backend would result in a crash like this (dereferencing dev->opaque, -which is already NULL): - - #0 0x0000555555d0a4bd in vhost_user_read_cb (source=0x5555568f4690, condition=(G_IO_IN | G_IO_HUP), opaque=0x7fffffffcbf0) at ../hw/virtio/vhost-user.c:313 - #1 0x0000555555d950d3 in qio_channel_fd_source_dispatch (source=0x555557c3f750, callback=0x555555d0a478 , user_data=0x7fffffffcbf0) at ../io/channel-watch.c:84 - #2 0x00007ffff7b32a9f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 - #3 0x00007ffff7b84a98 in g_main_context_iterate.constprop () at /lib64/libglib-2.0.so.0 - #4 0x00007ffff7b32163 in g_main_loop_run () at /lib64/libglib-2.0.so.0 - #5 0x0000555555d0a724 in vhost_user_read (dev=0x555557bc62f8, msg=0x7fffffffcc50) at ../hw/virtio/vhost-user.c:402 - #6 0x0000555555d0ee6b in vhost_user_get_config (dev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost-user.c:2133 - #7 0x0000555555d56d46 in vhost_dev_get_config (hdev=0x555557bc62f8, config=0x555557bc62ac "", config_len=60) at ../hw/virtio/vhost.c:1566 - #8 0x0000555555cdd150 in vhost_user_blk_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcf90) at ../hw/block/vhost-user-blk.c:510 - #9 0x0000555555d08f6d in virtio_device_realize (dev=0x555557bc60b0, errp=0x7fffffffcff0) at ../hw/virtio/virtio.c:3660 - -Note that this removes the ability to reconnect during initialisation -(but not during operation) when there is no permanent error, but the -backend restarts, as the implementation was buggy. This feature can be -added back in a follow-up series after changing error paths to -distinguish cases where retrying could help from cases with permanent -errors. - -Signed-off-by: Kevin Wolf -Message-Id: <20210429171316.162022-3-kwolf@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf -(cherry picked from commit dabefdd6abcbc7d858e9413e4734aab2e0b5c8d9) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/block/vhost-user-blk.c | 59 +++++++++++---------------------------- - 1 file changed, 17 insertions(+), 42 deletions(-) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index 7c85248a7b..c0b9958da1 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -50,6 +50,8 @@ static const int user_feature_bits[] = { - VHOST_INVALID_FEATURE_BIT - }; - -+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event); -+ - static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) - { - VHostUserBlk *s = VHOST_USER_BLK(vdev); -@@ -362,19 +364,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev) - vhost_dev_cleanup(&s->dev); - } - --static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, -- bool realized); -- --static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event) --{ -- vhost_user_blk_event(opaque, event, false); --} -- --static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event) --{ -- vhost_user_blk_event(opaque, event, true); --} -- - static void vhost_user_blk_chr_closed_bh(void *opaque) - { - DeviceState *dev = opaque; -@@ -382,12 +371,11 @@ static void vhost_user_blk_chr_closed_bh(void *opaque) - VHostUserBlk *s = VHOST_USER_BLK(vdev); - - vhost_user_blk_disconnect(dev); -- qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, -- vhost_user_blk_event_oper, NULL, opaque, NULL, true); -+ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, -+ NULL, opaque, NULL, true); - } - --static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, -- bool realized) -+static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) - { - DeviceState *dev = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(dev); -@@ -401,17 +389,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, - } - break; - case CHR_EVENT_CLOSED: -- /* -- * Closing the connection should happen differently on device -- * initialization and operation stages. -- * On initalization, we want to re-start vhost_dev initialization -- * from the very beginning right away when the connection is closed, -- * so we clean up vhost_dev on each connection closing. -- * On operation, we want to postpone vhost_dev cleanup to let the -- * other code perform its own cleanup sequence using vhost_dev data -- * (e.g. vhost_dev_set_log). -- */ -- if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) { -+ if (!runstate_check(RUN_STATE_SHUTDOWN)) { - /* - * A close event may happen during a read/write, but vhost - * code assumes the vhost_dev remains setup, so delay the -@@ -431,8 +409,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, - * knowing its type (in this case vhost-user). - */ - s->dev.started = false; -- } else { -- vhost_user_blk_disconnect(dev); - } - break; - case CHR_EVENT_BREAK: -@@ -489,33 +465,32 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) - s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); - s->connected = false; - -- qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, -- vhost_user_blk_event_realize, NULL, (void *)dev, -- NULL, true); -- --reconnect: - if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) { - goto virtio_err; - } - -- /* check whether vhost_user_blk_connect() failed or not */ -- if (!s->connected) { -- goto reconnect; -+ if (vhost_user_blk_connect(dev) < 0) { -+ error_setg(errp, "vhost-user-blk: could not connect"); -+ qemu_chr_fe_disconnect(&s->chardev); -+ goto virtio_err; - } -+ assert(s->connected); - - ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, - sizeof(struct virtio_blk_config)); - if (ret < 0) { -- error_report("vhost-user-blk: get block config failed"); -- goto reconnect; -+ error_setg(errp, "vhost-user-blk: get block config failed"); -+ goto vhost_err; - } - -- /* we're fully initialized, now we can operate, so change the handler */ -+ /* we're fully initialized, now we can operate, so add the handler */ - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, -- vhost_user_blk_event_oper, NULL, (void *)dev, -+ vhost_user_blk_event, NULL, (void *)dev, - NULL, true); - return; - -+vhost_err: -+ vhost_dev_cleanup(&s->dev); - virtio_err: - g_free(s->vhost_vqs); - s->vhost_vqs = NULL; --- -2.27.0 - diff --git a/kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch b/kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch deleted file mode 100644 index 29d6bd7..0000000 --- a/kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch +++ /dev/null @@ -1,55 +0,0 @@ -From bb551b71851c8d5a37b29aae373a99e97885a4df Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 12 Jul 2021 10:22:25 -0400 -Subject: [PATCH 05/43] vhost-user-blk: Fail gracefully on too large queue size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -virtio_add_queue() aborts when queue_size > VIRTQUEUE_MAX_SIZE, so -vhost_user_blk_device_realize() should check this before calling it. - -Simple reproducer: - -qemu-system-x86_64 \ - -chardev null,id=foo \ - -device vhost-user-blk-pci,queue-size=4096,chardev=foo - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1935014 -Signed-off-by: Kevin Wolf -Message-Id: <20210413165654.50810-1-kwolf@redhat.com> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Raphael Norwitz -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit 68bf7336533faa6aa90fdd4558edddbf5d8ef814) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/block/vhost-user-blk.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index 0b5b9d44cd..f5e9682703 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -467,6 +467,11 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) - error_setg(errp, "vhost-user-blk: queue size must be non-zero"); - return; - } -+ if (s->queue_size > VIRTQUEUE_MAX_SIZE) { -+ error_setg(errp, "vhost-user-blk: queue size must not exceed %d", -+ VIRTQUEUE_MAX_SIZE); -+ return; -+ } - - if (!vhost_user_init(&s->vhost_user, &s->chardev, errp)) { - return; --- -2.27.0 - diff --git a/kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch b/kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch deleted file mode 100644 index 310985d..0000000 --- a/kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 28ab6c187224be79fe02fb1b5037d1c0b300a778 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 12 Jul 2021 10:22:29 -0400 -Subject: [PATCH 09/43] vhost-user-blk: Get more feature flags from vhost - device - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -VIRTIO_F_RING_PACKED and VIRTIO_F_IOMMU_PLATFORM need to be supported by -the vhost device, otherwise advertising it to the guest doesn't result -in a working configuration. They are currently not supported by the -vhost-user-blk export in QEMU. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1935020 -Signed-off-by: Kevin Wolf -Acked-by: Raphael Norwitz -Message-Id: <20210429171316.162022-5-kwolf@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf -(cherry picked from commit 7556a320c98812ca6648b707393f4513387faf73) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/block/vhost-user-blk.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index f3a45af97c..c7e502f4c7 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -47,6 +47,8 @@ static const int user_feature_bits[] = { - VIRTIO_RING_F_INDIRECT_DESC, - VIRTIO_RING_F_EVENT_IDX, - VIRTIO_F_NOTIFY_ON_EMPTY, -+ VIRTIO_F_RING_PACKED, -+ VIRTIO_F_IOMMU_PLATFORM, - VHOST_INVALID_FEATURE_BIT - }; - --- -2.27.0 - diff --git a/kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch b/kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch deleted file mode 100644 index e690e2c..0000000 --- a/kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch +++ /dev/null @@ -1,120 +0,0 @@ -From a0fcc5faf35fb266dbe45259b79a57ba057e3144 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 12 Jul 2021 10:22:28 -0400 -Subject: [PATCH 08/43] vhost-user-blk: Improve error reporting in realize - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -Now that vhost_user_blk_connect() is not called from an event handler -any more, but directly from vhost_user_blk_device_realize(), we can -actually make use of Error again instead of calling error_report() in -the inner function and setting a more generic and therefore less useful -error message in realize() itself. - -With Error, the callers are responsible for adding context if necessary -(such as the "-device" option the error refers to). Additional prefixes -are redundant and better omitted. - -Signed-off-by: Kevin Wolf -Acked-by: Raphael Norwitz -Message-Id: <20210429171316.162022-4-kwolf@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf -(cherry picked from commit 5b9243d2654adc58ce472d0536a7a177b4fe0f90) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/block/vhost-user-blk.c | 23 +++++++++++------------ - 1 file changed, 11 insertions(+), 12 deletions(-) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index c0b9958da1..f3a45af97c 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -311,7 +311,7 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) - vhost_dev_free_inflight(s->inflight); - } - --static int vhost_user_blk_connect(DeviceState *dev) -+static int vhost_user_blk_connect(DeviceState *dev, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserBlk *s = VHOST_USER_BLK(vdev); -@@ -331,8 +331,7 @@ static int vhost_user_blk_connect(DeviceState *dev) - - ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); - if (ret < 0) { -- error_report("vhost-user-blk: vhost initialization failed: %s", -- strerror(-ret)); -+ error_setg_errno(errp, -ret, "vhost initialization failed"); - return ret; - } - -@@ -340,8 +339,7 @@ static int vhost_user_blk_connect(DeviceState *dev) - if (virtio_device_started(vdev, vdev->status)) { - ret = vhost_user_blk_start(vdev); - if (ret < 0) { -- error_report("vhost-user-blk: vhost start failed: %s", -- strerror(-ret)); -+ error_setg_errno(errp, -ret, "vhost start failed"); - return ret; - } - } -@@ -380,10 +378,12 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) - DeviceState *dev = opaque; - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserBlk *s = VHOST_USER_BLK(vdev); -+ Error *local_err = NULL; - - switch (event) { - case CHR_EVENT_OPENED: -- if (vhost_user_blk_connect(dev) < 0) { -+ if (vhost_user_blk_connect(dev, &local_err) < 0) { -+ error_report_err(local_err); - qemu_chr_fe_disconnect(&s->chardev); - return; - } -@@ -426,7 +426,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) - int i, ret; - - if (!s->chardev.chr) { -- error_setg(errp, "vhost-user-blk: chardev is mandatory"); -+ error_setg(errp, "chardev is mandatory"); - return; - } - -@@ -434,16 +434,16 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) - s->num_queues = 1; - } - if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) { -- error_setg(errp, "vhost-user-blk: invalid number of IO queues"); -+ error_setg(errp, "invalid number of IO queues"); - return; - } - - if (!s->queue_size) { -- error_setg(errp, "vhost-user-blk: queue size must be non-zero"); -+ error_setg(errp, "queue size must be non-zero"); - return; - } - if (s->queue_size > VIRTQUEUE_MAX_SIZE) { -- error_setg(errp, "vhost-user-blk: queue size must not exceed %d", -+ error_setg(errp, "queue size must not exceed %d", - VIRTQUEUE_MAX_SIZE); - return; - } -@@ -469,8 +469,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) - goto virtio_err; - } - -- if (vhost_user_blk_connect(dev) < 0) { -- error_setg(errp, "vhost-user-blk: could not connect"); -+ if (vhost_user_blk_connect(dev, errp) < 0) { - qemu_chr_fe_disconnect(&s->chardev); - goto virtio_err; - } --- -2.27.0 - diff --git a/kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch b/kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch deleted file mode 100644 index 0bef5dc..0000000 --- a/kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch +++ /dev/null @@ -1,53 +0,0 @@ -From f90c21b271d88733e604b3645986f8362048b0f0 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 12 Jul 2021 10:22:26 -0400 -Subject: [PATCH 06/43] vhost-user-blk: Make sure to set Error on realize - failure - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -We have to set errp before jumping to virtio_err, otherwise the caller -(virtio_device_realize()) will take this as success and crash when it -later tries to access things that we've already freed in the error path. - -Fixes: 77542d431491788d1e8e79d93ce10172ef207775 -Signed-off-by: Kevin Wolf -Message-Id: <20210429171316.162022-2-kwolf@redhat.com> -Reviewed-by: Michael S. Tsirkin -Reviewed-by: Eric Blake -Acked-by: Raphael Norwitz -Signed-off-by: Kevin Wolf -(cherry picked from commit f26729715ef21325f972f693607580a829ad1cbb) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/block/vhost-user-blk.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c -index f5e9682703..7c85248a7b 100644 ---- a/hw/block/vhost-user-blk.c -+++ b/hw/block/vhost-user-blk.c -@@ -447,7 +447,6 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(dev); - VHostUserBlk *s = VHOST_USER_BLK(vdev); -- Error *err = NULL; - int i, ret; - - if (!s->chardev.chr) { -@@ -495,8 +494,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) - NULL, true); - - reconnect: -- if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { -- error_report_err(err); -+ if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) { - goto virtio_err; - } - --- -2.27.0 - diff --git a/kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch b/kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch deleted file mode 100644 index 475feb0..0000000 --- a/kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 0834f460b52a1a6b2bc5575ff2e05458d7036257 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 12 Jul 2021 10:22:30 -0400 -Subject: [PATCH 10/43] virtio: Fail if iommu_platform is requested, but - unsupported - -RH-Author: Miroslav Rezanina -RH-Bugzilla: 1957194 - -Commit 2943b53f6 (' virtio: force VIRTIO_F_IOMMU_PLATFORM') made sure -that vhost can't just reject VIRTIO_F_IOMMU_PLATFORM when it was -requested. However, just adding it back to the negotiated flags isn't -right either because it promises support to the guest that the device -actually doesn't support. One example of a vhost-user device that -doesn't have support for the flag is the vhost-user-blk export of QEMU. - -Instead of successfully creating a device that doesn't work, just fail -to plug the device when it doesn't support the feature, but it was -requested. This results in much clearer error messages. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1935019 -Signed-off-by: Kevin Wolf -Reviewed-by: Raphael Norwitz -Message-Id: <20210429171316.162022-6-kwolf@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Kevin Wolf -(cherry picked from commit 04ceb61a4075fadbf374ef89662c41999da83489) -Signed-off-by: Kevin Wolf -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/virtio/virtio-bus.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c -index d6332d45c3..859978d248 100644 ---- a/hw/virtio/virtio-bus.c -+++ b/hw/virtio/virtio-bus.c -@@ -69,6 +69,11 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) - return; - } - -+ if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { -+ error_setg(errp, "iommu_platform=true is not supported by the device"); -+ return; -+ } -+ - if (klass->device_plugged != NULL) { - klass->device_plugged(qbus->parent, &local_err); - } --- -2.27.0 - diff --git a/kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch b/kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch deleted file mode 100644 index e670a6c..0000000 --- a/kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch +++ /dev/null @@ -1,107 +0,0 @@ -From f02134f067150d02d1a74ff5aea151096679492e Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Wed, 26 May 2021 09:03:52 -0400 -Subject: [PATCH 12/15] virtio-blk: Configure all host notifiers in a single MR - transaction - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [9/12] 609631835874ea62dad1ffbd469e83744299ee07 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -This allows the virtio-blk-pci device to batch the setup of all its -host notifiers. This significantly improves boot time of VMs with a -high number of vCPUs, e.g. from 3m26.186s down to 0m58.023s for a -pseries machine with 384 vCPUs. - -Note that memory_region_transaction_commit() must be called before -virtio_bus_cleanup_host_notifier() because the latter might close -ioeventfds that the transaction still assumes to be around when it -commits. - -Signed-off-by: Greg Kurz -Message-Id: <20210407143501.244343-3-groug@kaod.org> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit d0267da614890b8f817364ae25850cdbb580a569) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/block/dataplane/virtio-blk.c | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index d7b5c95d26..cd81893d1d 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -198,19 +198,30 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - goto fail_guest_notifiers; - } - -+ memory_region_transaction_begin(); -+ - /* Set up virtqueue notify */ - for (i = 0; i < nvqs; i++) { - r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); - if (r != 0) { -+ int j = i; -+ - fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); - while (i--) { - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ memory_region_transaction_commit(); -+ -+ while (j--) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } - goto fail_host_notifiers; - } - } - -+ memory_region_transaction_commit(); -+ - s->starting = false; - vblk->dataplane_started = true; - trace_virtio_blk_data_plane_start(s); -@@ -246,8 +257,15 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - return 0; - - fail_aio_context: -+ memory_region_transaction_begin(); -+ - for (i = 0; i < nvqs; i++) { - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < nvqs; i++) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } - fail_host_notifiers: -@@ -312,8 +330,15 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) - - aio_context_release(s->ctx); - -+ memory_region_transaction_begin(); -+ - for (i = 0; i < nvqs; i++) { - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < nvqs; i++) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } - --- -2.27.0 - diff --git a/kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch b/kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch deleted file mode 100644 index d08e1aa..0000000 --- a/kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch +++ /dev/null @@ -1,83 +0,0 @@ -From b8febd2f080de1f12e68cc233c68c7a39835a3c4 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Wed, 26 May 2021 09:03:51 -0400 -Subject: [PATCH 11/15] virtio-blk: Fix rollback path in - virtio_blk_data_plane_start() - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [8/12] 73d05400a537ce1847605dca58aff99e0a905919 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -When dataplane multiqueue support was added in QEMU 2.7, the path -that would rollback guest notifiers assignment in case of error -simply got dropped. - -Later on, when Error was added to blk_set_aio_context() in QEMU 4.1, -another error path was introduced, but it ommits to rollback both -host and guest notifiers. - -It seems cleaner to fix the rollback path in one go. The patch is -simple enough that it can be adjusted if backported to a pre-4.1 -QEMU. - -Fixes: 51b04ac5c6a6 ("virtio-blk: dataplane multiqueue support") -Cc: stefanha@redhat.com -Fixes: 97896a4887a0 ("block: Add Error to blk_set_aio_context()") -Cc: kwolf@redhat.com -Signed-off-by: Greg Kurz -Reviewed-by: Stefan Hajnoczi -Message-Id: <20210407143501.244343-2-groug@kaod.org> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 570fe439e5d1b8626cf344c6bc97d90cfcaf0c79) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/block/dataplane/virtio-blk.c | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index e9050c8987..d7b5c95d26 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -207,7 +207,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } -- goto fail_guest_notifiers; -+ goto fail_host_notifiers; - } - } - -@@ -221,7 +221,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - aio_context_release(old_context); - if (r < 0) { - error_report_err(local_err); -- goto fail_guest_notifiers; -+ goto fail_aio_context; - } - - /* Process queued requests before the ones in vring */ -@@ -245,6 +245,13 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - aio_context_release(s->ctx); - return 0; - -+ fail_aio_context: -+ for (i = 0; i < nvqs; i++) { -+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); -+ } -+ fail_host_notifiers: -+ k->set_guest_notifiers(qbus->parent, nvqs, false); - fail_guest_notifiers: - /* - * If we failed to set up the guest notifiers queued requests will be --- -2.27.0 - diff --git a/kvm-virtio-gpu-handle-partial-maps-properly.patch b/kvm-virtio-gpu-handle-partial-maps-properly.patch deleted file mode 100644 index 4028f08..0000000 --- a/kvm-virtio-gpu-handle-partial-maps-properly.patch +++ /dev/null @@ -1,201 +0,0 @@ -From cdc537ada9528e09f8c70219f5a9a1ce8a4efa7e Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Thu, 6 May 2021 11:10:01 +0200 -Subject: [PATCH 02/12] virtio-gpu: handle partial maps properly - -RH-Author: Eric Auger -RH-MergeRequest: 15: virtio-gpu: handle partial maps properly -RH-Commit: [1/1] f2b0fd9758251d1f3a5ff9563911c8bdb4b191f0 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1974795 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Peter Xu - -dma_memory_map() may map only a part of the request. Happens if the -request can't be mapped in one go, for example due to a iommu creating -a linear dma mapping for scattered physical pages. Should that be the -case virtio-gpu must call dma_memory_map() again with the remaining -range instead of simply throwing an error. - -Note that this change implies the number of iov entries may differ from -the number of mapping entries sent by the guest. Therefore the iov_len -bookkeeping needs some updates too, we have to explicitly pass around -the iov length now. - -Reported-by: Auger Eric -Signed-off-by: Gerd Hoffmann -Message-id: 20210506091001.1301250-1-kraxel@redhat.com -Reviewed-by: Eric Auger -Tested-by: Eric Auger -Message-Id: <20210506091001.1301250-1-kraxel@redhat.com> -(cherry picked from commit 9049f8bc445d50c0b5fe5500c0ec51fcc821c2ef) -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - hw/display/virtio-gpu-3d.c | 7 ++-- - hw/display/virtio-gpu.c | 76 ++++++++++++++++++++-------------- - include/hw/virtio/virtio-gpu.h | 3 +- - 3 files changed, 52 insertions(+), 34 deletions(-) - -diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c -index d98964858e..72c14d9132 100644 ---- a/hw/display/virtio-gpu-3d.c -+++ b/hw/display/virtio-gpu-3d.c -@@ -283,22 +283,23 @@ static void virgl_resource_attach_backing(VirtIOGPU *g, - { - struct virtio_gpu_resource_attach_backing att_rb; - struct iovec *res_iovs; -+ uint32_t res_niov; - int ret; - - VIRTIO_GPU_FILL_CMD(att_rb); - trace_virtio_gpu_cmd_res_back_attach(att_rb.resource_id); - -- ret = virtio_gpu_create_mapping_iov(g, &att_rb, cmd, NULL, &res_iovs); -+ ret = virtio_gpu_create_mapping_iov(g, &att_rb, cmd, NULL, &res_iovs, &res_niov); - if (ret != 0) { - cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; - return; - } - - ret = virgl_renderer_resource_attach_iov(att_rb.resource_id, -- res_iovs, att_rb.nr_entries); -+ res_iovs, res_niov); - - if (ret != 0) -- virtio_gpu_cleanup_mapping_iov(g, res_iovs, att_rb.nr_entries); -+ virtio_gpu_cleanup_mapping_iov(g, res_iovs, res_niov); - } - - static void virgl_resource_detach_backing(VirtIOGPU *g, -diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c -index c9f5e36fd0..6f3791deb3 100644 ---- a/hw/display/virtio-gpu.c -+++ b/hw/display/virtio-gpu.c -@@ -608,11 +608,12 @@ static void virtio_gpu_set_scanout(VirtIOGPU *g, - int virtio_gpu_create_mapping_iov(VirtIOGPU *g, - struct virtio_gpu_resource_attach_backing *ab, - struct virtio_gpu_ctrl_command *cmd, -- uint64_t **addr, struct iovec **iov) -+ uint64_t **addr, struct iovec **iov, -+ uint32_t *niov) - { - struct virtio_gpu_mem_entry *ents; - size_t esize, s; -- int i; -+ int e, v; - - if (ab->nr_entries > 16384) { - qemu_log_mask(LOG_GUEST_ERROR, -@@ -633,37 +634,53 @@ int virtio_gpu_create_mapping_iov(VirtIOGPU *g, - return -1; - } - -- *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries); -+ *iov = NULL; - if (addr) { -- *addr = g_malloc0(sizeof(uint64_t) * ab->nr_entries); -+ *addr = NULL; - } -- for (i = 0; i < ab->nr_entries; i++) { -- uint64_t a = le64_to_cpu(ents[i].addr); -- uint32_t l = le32_to_cpu(ents[i].length); -- hwaddr len = l; -- (*iov)[i].iov_base = dma_memory_map(VIRTIO_DEVICE(g)->dma_as, -- a, &len, DMA_DIRECTION_TO_DEVICE); -- (*iov)[i].iov_len = len; -- if (addr) { -- (*addr)[i] = a; -- } -- if (!(*iov)[i].iov_base || len != l) { -- qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for" -- " resource %d element %d\n", -- __func__, ab->resource_id, i); -- if ((*iov)[i].iov_base) { -- i++; /* cleanup the 'i'th map */ -+ for (e = 0, v = 0; e < ab->nr_entries; e++) { -+ uint64_t a = le64_to_cpu(ents[e].addr); -+ uint32_t l = le32_to_cpu(ents[e].length); -+ hwaddr len; -+ void *map; -+ -+ do { -+ len = l; -+ map = dma_memory_map(VIRTIO_DEVICE(g)->dma_as, -+ a, &len, DMA_DIRECTION_TO_DEVICE); -+ if (!map) { -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for" -+ " resource %d element %d\n", -+ __func__, ab->resource_id, e); -+ virtio_gpu_cleanup_mapping_iov(g, *iov, v); -+ g_free(ents); -+ *iov = NULL; -+ if (addr) { -+ g_free(*addr); -+ *addr = NULL; -+ } -+ return -1; -+ } -+ -+ if (!(v % 16)) { -+ *iov = g_realloc(*iov, sizeof(struct iovec) * (v + 16)); -+ if (addr) { -+ *addr = g_realloc(*addr, sizeof(uint64_t) * (v + 16)); -+ } - } -- virtio_gpu_cleanup_mapping_iov(g, *iov, i); -- g_free(ents); -- *iov = NULL; -+ (*iov)[v].iov_base = map; -+ (*iov)[v].iov_len = len; - if (addr) { -- g_free(*addr); -- *addr = NULL; -+ (*addr)[v] = a; - } -- return -1; -- } -+ -+ a += len; -+ l -= len; -+ v += 1; -+ } while (l > 0); - } -+ *niov = v; -+ - g_free(ents); - return 0; - } -@@ -717,13 +734,12 @@ virtio_gpu_resource_attach_backing(VirtIOGPU *g, - return; - } - -- ret = virtio_gpu_create_mapping_iov(g, &ab, cmd, &res->addrs, &res->iov); -+ ret = virtio_gpu_create_mapping_iov(g, &ab, cmd, &res->addrs, -+ &res->iov, &res->iov_cnt); - if (ret != 0) { - cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; - return; - } -- -- res->iov_cnt = ab.nr_entries; - } - - static void -diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h -index fae149235c..0d15af41d9 100644 ---- a/include/hw/virtio/virtio-gpu.h -+++ b/include/hw/virtio/virtio-gpu.h -@@ -209,7 +209,8 @@ void virtio_gpu_get_edid(VirtIOGPU *g, - int virtio_gpu_create_mapping_iov(VirtIOGPU *g, - struct virtio_gpu_resource_attach_backing *ab, - struct virtio_gpu_ctrl_command *cmd, -- uint64_t **addr, struct iovec **iov); -+ uint64_t **addr, struct iovec **iov, -+ uint32_t *niov); - void virtio_gpu_cleanup_mapping_iov(VirtIOGPU *g, - struct iovec *iov, uint32_t count); - void virtio_gpu_process_cmdq(VirtIOGPU *g); --- -2.27.0 - diff --git a/kvm-virtio-net-failover-add-missing-remove_migration_sta.patch b/kvm-virtio-net-failover-add-missing-remove_migration_sta.patch deleted file mode 100644 index acd5a66..0000000 --- a/kvm-virtio-net-failover-add-missing-remove_migration_sta.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 21027e308bf410293a745d4358a848e9aa037df1 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Mon, 10 May 2021 13:08:20 -0400 -Subject: [PATCH 09/15] virtio-net: failover: add missing - remove_migration_state_change_notifier() - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [4/12] 884f0f99fbbba4312663ec6232b1d8c9576df84e (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -In the failover case configuration, virtio_net_device_realize() uses an -add_migration_state_change_notifier() to add a state notifier, but this -notifier is not removed by the unrealize function when the virtio-net -card is unplugged. - -If the card is unplugged and a migration is started, the notifier is -called and as it is not valid anymore QEMU crashes. - -This patch fixes the problem by adding the -remove_migration_state_change_notifier() in virtio_net_device_unrealize(). - -The problem can be reproduced with: - - $ qemu-system-x86_64 -enable-kvm -m 1g -M q35 \ - -device pcie-root-port,slot=4,id=root1 \ - -device pcie-root-port,slot=5,id=root2 \ - -device virtio-net-pci,id=net1,mac=52:54:00:6f:55:cc,failover=on,bus=root1 \ - -monitor stdio disk.qcow2 - (qemu) device_del net1 - (qemu) migrate "exec:gzip -c > STATEFILE.gz" - - Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault. - 0x0000000000000000 in ?? () - (gdb) bt - #0 0x0000000000000000 in () - #1 0x0000555555d726d7 in notifier_list_notify (...) - at .../util/notify.c:39 - #2 0x0000555555842c1a in migrate_fd_connect (...) - at .../migration/migration.c:3975 - #3 0x0000555555950f7d in migration_channel_connect (...) - error@entry=0x0) at .../migration/channel.c:107 - #4 0x0000555555910922 in exec_start_outgoing_migration (...) - at .../migration/exec.c:42 - -Reported-by: Igor Mammedov -Reviewed-by: Dr. David Alan Gilbert -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit a7eca58380f9589bb1bb6333ccfb58869734edb6) -Signed-off-by: Laurent Vivier -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/net/virtio-net.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 66b9ff4511..914051feb7 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3373,6 +3373,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) - - if (n->failover) { - device_listener_unregister(&n->primary_listener); -+ remove_migration_state_change_notifier(&n->migration_state); - } - - max_queues = n->multiqueue ? n->max_queues : 1; --- -2.27.0 - diff --git a/kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch b/kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch deleted file mode 100644 index a58b251..0000000 --- a/kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch +++ /dev/null @@ -1,91 +0,0 @@ -From 5e7855d3c9d5a59d2c0ac05444428b058ca9abb8 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Wed, 26 May 2021 09:03:54 -0400 -Subject: [PATCH 14/15] virtio-scsi: Configure all host notifiers in a single - MR transaction - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [11/12] f3c785c4b63aeb3a7b153f2fc17e86c983f98c23 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -This allows the virtio-scsi-pci device to batch the setup of all its -host notifiers. This significantly improves boot time of VMs with a -high number of vCPUs, e.g. from 6m5.563s down to 1m2.884s for a -pseries machine with 384 vCPUs. - -Note that memory_region_transaction_commit() must be called before -virtio_bus_cleanup_host_notifier() because the latter might close -ioeventfds that the transaction still assumes to be around when it -commits. - -Signed-off-by: Greg Kurz -Message-Id: <20210407143501.244343-5-groug@kaod.org> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit c4f5dcc4360a02085a633fd7a90b7ac395ca1ba4) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/scsi/virtio-scsi-dataplane.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c -index b2cb3d9dcc..28e003250a 100644 ---- a/hw/scsi/virtio-scsi-dataplane.c -+++ b/hw/scsi/virtio-scsi-dataplane.c -@@ -152,6 +152,8 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - goto fail_guest_notifiers; - } - -+ memory_region_transaction_begin(); -+ - rc = virtio_scsi_set_host_notifier(s, vs->ctrl_vq, 0); - if (rc != 0) { - goto fail_host_notifiers; -@@ -173,6 +175,8 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - vq_init_count++; - } - -+ memory_region_transaction_commit(); -+ - aio_context_acquire(s->ctx); - virtio_queue_aio_set_host_notifier_handler(vs->ctrl_vq, s->ctx, - virtio_scsi_data_plane_handle_ctrl); -@@ -192,6 +196,11 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - fail_host_notifiers: - for (i = 0; i < vq_init_count; i++) { - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < vq_init_count; i++) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } - k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); -@@ -229,8 +238,15 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) - - blk_drain_all(); /* ensure there are no in-flight requests */ - -+ memory_region_transaction_begin(); -+ - for (i = 0; i < vs->conf.num_queues + 2; i++) { - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); -+ } -+ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < vs->conf.num_queues + 2; i++) { - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); - } - --- -2.27.0 - diff --git a/kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch b/kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch deleted file mode 100644 index ab10168..0000000 --- a/kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 0ca53acb7ee0a3b3b72685f47df1fb2466989d6c Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Wed, 26 May 2021 09:03:53 -0400 -Subject: [PATCH 13/15] virtio-scsi: Set host notifiers and callbacks - separately - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 5: Synchronize RHEL-AV 8.5 release 18 to RHEL 9 Beta -RH-Commit: [10/12] 61f873b494c52dc34eb60a705046bfead08532da (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Danilo Cesar Lemes de Paula -RH-Acked-by: Cornelia Huck -RH-Acked-by: Greg Kurz -RH-Acked-by: Laurent Vivier - -Host notifiers are guaranteed to be idle until the callbacks are -hooked up with virtio_queue_aio_set_host_notifier_handler(). They -thus don't need to be set or unset with the AioContext lock held. - -Do this outside the critical section, like virtio-blk already -does : basically downgrading virtio_scsi_vring_init() to only -setup the host notifier and set the callback in the caller. - -This will allow to batch addition/deletion of ioeventds in -a single memory transaction, which is expected to greatly -improve initialization time. - -Signed-off-by: Greg Kurz -Message-Id: <20210407143501.244343-4-groug@kaod.org> -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 61fc57bfc464c3584bd7ab810c86833661f0188c) -Signed-off-by: Greg Kurz -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/scsi/virtio-scsi-dataplane.c | 40 ++++++++++++++++++--------------- - 1 file changed, 22 insertions(+), 18 deletions(-) - -diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c -index 4ad8793406..b2cb3d9dcc 100644 ---- a/hw/scsi/virtio-scsi-dataplane.c -+++ b/hw/scsi/virtio-scsi-dataplane.c -@@ -94,8 +94,7 @@ static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev, - return progress; - } - --static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, -- VirtIOHandleAIOOutput fn) -+static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n) - { - BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); - int rc; -@@ -109,7 +108,6 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, - return rc; - } - -- virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, fn); - return 0; - } - -@@ -154,38 +152,44 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - goto fail_guest_notifiers; - } - -- aio_context_acquire(s->ctx); -- rc = virtio_scsi_vring_init(s, vs->ctrl_vq, 0, -- virtio_scsi_data_plane_handle_ctrl); -- if (rc) { -- goto fail_vrings; -+ rc = virtio_scsi_set_host_notifier(s, vs->ctrl_vq, 0); -+ if (rc != 0) { -+ goto fail_host_notifiers; - } - - vq_init_count++; -- rc = virtio_scsi_vring_init(s, vs->event_vq, 1, -- virtio_scsi_data_plane_handle_event); -- if (rc) { -- goto fail_vrings; -+ rc = virtio_scsi_set_host_notifier(s, vs->event_vq, 1); -+ if (rc != 0) { -+ goto fail_host_notifiers; - } - - vq_init_count++; -+ - for (i = 0; i < vs->conf.num_queues; i++) { -- rc = virtio_scsi_vring_init(s, vs->cmd_vqs[i], i + 2, -- virtio_scsi_data_plane_handle_cmd); -+ rc = virtio_scsi_set_host_notifier(s, vs->cmd_vqs[i], i + 2); - if (rc) { -- goto fail_vrings; -+ goto fail_host_notifiers; - } - vq_init_count++; - } - -+ aio_context_acquire(s->ctx); -+ virtio_queue_aio_set_host_notifier_handler(vs->ctrl_vq, s->ctx, -+ virtio_scsi_data_plane_handle_ctrl); -+ virtio_queue_aio_set_host_notifier_handler(vs->event_vq, s->ctx, -+ virtio_scsi_data_plane_handle_event); -+ -+ for (i = 0; i < vs->conf.num_queues; i++) { -+ virtio_queue_aio_set_host_notifier_handler(vs->cmd_vqs[i], s->ctx, -+ virtio_scsi_data_plane_handle_cmd); -+ } -+ - s->dataplane_starting = false; - s->dataplane_started = true; - aio_context_release(s->ctx); - return 0; - --fail_vrings: -- aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s); -- aio_context_release(s->ctx); -+fail_host_notifiers: - for (i = 0; i < vq_init_count; i++) { - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); --- -2.27.0 - diff --git a/kvm-x86-Add-x86-rhel8.5-machine-types.patch b/kvm-x86-Add-x86-rhel8.5-machine-types.patch deleted file mode 100644 index 9100f47..0000000 --- a/kvm-x86-Add-x86-rhel8.5-machine-types.patch +++ /dev/null @@ -1,130 +0,0 @@ -From 1497b5d371a63dd20d3b14ca2f8cce99845a1c2c Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 19 May 2021 15:46:27 -0400 -Subject: [PATCH 03/12] x86: Add x86 rhel8.5 machine types -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 16: Synchronize with RHEL-AV 8.5 release 21 to RHEL 9 -RH-Commit: [1/8] db81806d99b545abe4dcba576fb33c02ec283dd7 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1957194 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Daniel P. Berrangé - -From: "Dr. David Alan Gilbert" - -Add the 8.5 machine type and the compat entries. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Danilo C. L. de Paula -Signed-off-by: Miroslav Rezanina ---- - hw/i386/pc.c | 6 ++++++ - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 24 ++++++++++++++++++++++-- - include/hw/i386/pc.h | 3 +++ - 4 files changed, 33 insertions(+), 2 deletions(-) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index edc02a68ca..0a374dec39 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -369,6 +369,12 @@ GlobalProperty pc_rhel_compat[] = { - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_8_4_compat[] = { -+ /* pc_rhel_8_4_compat from pc_compat_5_2 */ -+ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, -+}; -+const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); -+ - GlobalProperty pc_rhel_8_3_compat[] = { - /* pc_rhel_8_3_compat from pc_compat_5_1 */ - { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index d9c5df16d8..5d61c9b833 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -971,6 +971,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->pci_root_uid = 1; - compat_props_add(m->compat_props, hw_compat_rhel_8_4, - hw_compat_rhel_8_4_len); -+ compat_props_add(m->compat_props, pc_rhel_8_4_compat, -+ pc_rhel_8_4_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_3, - hw_compat_rhel_8_3_len); - compat_props_add(m->compat_props, pc_rhel_8_3_compat, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 44109e4876..01ff3e0544 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -607,6 +607,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - -+static void pc_q35_init_rhel850(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel850_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL-AV"; -+ pcmc->smbios_stream_version = "8.5.0"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, -+ pc_q35_machine_rhel850_options); -+ -+ - static void pc_q35_init_rhel840(MachineState *machine) - { - pc_q35_init(machine); -@@ -615,12 +633,15 @@ static void pc_q35_init_rhel840(MachineState *machine) - static void pc_q35_machine_rhel840_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel_options(m); -+ pc_q35_machine_rhel850_options(m); - m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.4.0"; - compat_props_add(m->compat_props, hw_compat_rhel_8_4, - hw_compat_rhel_8_4_len); -+ compat_props_add(m->compat_props, pc_rhel_8_4_compat, -+ pc_rhel_8_4_compat_len); - } - - DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, -@@ -637,7 +658,6 @@ static void pc_q35_machine_rhel830_options(MachineClass *m) - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_rhel840_options(m); - m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; -- m->alias = NULL; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.3.0"; - compat_props_add(m->compat_props, hw_compat_rhel_8_3, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 79a7803a2f..1980c93f41 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -281,6 +281,9 @@ extern const size_t pc_compat_1_4_len; - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_8_4_compat[]; -+extern const size_t pc_rhel_8_4_compat_len; -+ - extern GlobalProperty pc_rhel_8_3_compat[]; - extern const size_t pc_rhel_8_3_compat_len; - --- -2.27.0 - diff --git a/kvm-yank-Unregister-function-when-using-TLS-migration.patch b/kvm-yank-Unregister-function-when-using-TLS-migration.patch deleted file mode 100644 index 1691c50..0000000 --- a/kvm-yank-Unregister-function-when-using-TLS-migration.patch +++ /dev/null @@ -1,146 +0,0 @@ -From a722ec5614bbaa36992653250add7dabd39b12ad Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Tue, 1 Jun 2021 02:40:31 -0300 -Subject: [PATCH 01/21] yank: Unregister function when using TLS migration -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 10: yank: Unregister function when using TLS migration -RH-Commit: [1/1] b39e19a3a92dc7f881527198824ce7f402aa9289 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1972462 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu > -RH-Acked-by: Dr. David Alan Gilbert - -After yank feature was introduced in migration, whenever migration -is started using TLS, the following error happens in both source and -destination hosts: - -(qemu) qemu-kvm: ../util/yank.c:107: yank_unregister_instance: -Assertion `QLIST_EMPTY(&entry->yankfns)' failed. - -This happens because of a missing yank_unregister_function() when using -qio-channel-tls. - -Fix this by also allowing TYPE_QIO_CHANNEL_TLS object type to perform -yank_unregister_function() in channel_close() and multifd_load_cleanup(). - -Also, inside migration_channel_connect() and -migration_channel_process_incoming() move yank_register_function() so -it only runs once on a TLS migration. - -Fixes: b5eea99ec2f ("migration: Add yank feature", 2021-01-13) -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1964326 -Signed-off-by: Leonardo Bras -Reviewed-by: Lukas Straub -Reviewed-by: Peter Xu -Message-Id: <20210601054030.1153249-1-leobras.c@gmail.com> - -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 7de2e8565335c13fb3516cddbe2e40e366cce273) -Signed-off-by: Leonardo Bras -Signed-off-by: Miroslav Rezanina ---- - migration/channel.c | 26 ++++++++++++++------------ - migration/multifd.c | 3 ++- - migration/qemu-file-channel.c | 4 +++- - 3 files changed, 19 insertions(+), 14 deletions(-) - -diff --git a/migration/channel.c b/migration/channel.c -index c9ee902021..01275a9162 100644 ---- a/migration/channel.c -+++ b/migration/channel.c -@@ -38,18 +38,19 @@ void migration_channel_process_incoming(QIOChannel *ioc) - trace_migration_set_incoming_channel( - ioc, object_get_typename(OBJECT(ioc))); - -- if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)) { -- yank_register_function(MIGRATION_YANK_INSTANCE, -- migration_yank_iochannel, -- QIO_CHANNEL(ioc)); -- } -- - if (s->parameters.tls_creds && - *s->parameters.tls_creds && - !object_dynamic_cast(OBJECT(ioc), - TYPE_QIO_CHANNEL_TLS)) { - migration_tls_channel_process_incoming(s, ioc, &local_err); - } else { -+ if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || -+ object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { -+ yank_register_function(MIGRATION_YANK_INSTANCE, -+ migration_yank_iochannel, -+ QIO_CHANNEL(ioc)); -+ } -+ - migration_ioc_process_incoming(ioc, &local_err); - } - -@@ -76,12 +77,6 @@ void migration_channel_connect(MigrationState *s, - ioc, object_get_typename(OBJECT(ioc)), hostname, error); - - if (!error) { -- if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)) { -- yank_register_function(MIGRATION_YANK_INSTANCE, -- migration_yank_iochannel, -- QIO_CHANNEL(ioc)); -- } -- - if (s->parameters.tls_creds && - *s->parameters.tls_creds && - !object_dynamic_cast(OBJECT(ioc), -@@ -99,6 +94,13 @@ void migration_channel_connect(MigrationState *s, - } else { - QEMUFile *f = qemu_fopen_channel_output(ioc); - -+ if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || -+ object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { -+ yank_register_function(MIGRATION_YANK_INSTANCE, -+ migration_yank_iochannel, -+ QIO_CHANNEL(ioc)); -+ } -+ - qemu_mutex_lock(&s->qemu_file_lock); - s->to_dst_file = f; - qemu_mutex_unlock(&s->qemu_file_lock); -diff --git a/migration/multifd.c b/migration/multifd.c -index a6677c45c8..a8dedcf822 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -987,7 +987,8 @@ int multifd_load_cleanup(Error **errp) - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDRecvParams *p = &multifd_recv_state->params[i]; - -- if (object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET) -+ if ((object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET) || -+ object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_TLS)) - && OBJECT(p->c)->ref == 1) { - yank_unregister_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, -diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c -index 876d05a540..fad340ea7a 100644 ---- a/migration/qemu-file-channel.c -+++ b/migration/qemu-file-channel.c -@@ -26,6 +26,7 @@ - #include "qemu-file-channel.h" - #include "qemu-file.h" - #include "io/channel-socket.h" -+#include "io/channel-tls.h" - #include "qemu/iov.h" - #include "qemu/yank.h" - #include "yank_functions.h" -@@ -106,7 +107,8 @@ static int channel_close(void *opaque, Error **errp) - int ret; - QIOChannel *ioc = QIO_CHANNEL(opaque); - ret = qio_channel_close(ioc, errp); -- if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) -+ if ((object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || -+ object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) - && OBJECT(ioc)->ref == 1) { - yank_unregister_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, --- -2.27.0 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index d33f3c0..fdad09e 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -23,6 +23,7 @@ %global have_clang 1 %global have_safe_stack 0 + %if %{have_clang} %global toolchain clang %ifarch x86_64 @@ -33,6 +34,15 @@ %global cc_suffix .gcc %endif + + +# Release candidate version tracking +# global rcver rc4 +%if 0%{?rcver:1} +%global rcrel .%{rcver} +%global rcstr -%{rcver} +%endif + %global have_pmem 1 %ifnarch x86_64 %global have_pmem 0 @@ -120,18 +130,11 @@ Obsoletes: %{name}-ui-spice <= %{version} \ Obsoletes: %{name}-block-gluster <= %{version} \ Obsoletes: %{name}-block-iscsi <= %{version} \ -# Release candidate version tracking -# global rcver rc4 -%if 0%{?rcver:1} -%global rcrel .%{rcver} -%global rcstr -%{rcver} -%endif - Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 6.0.0 -Release: 13%{?rcrel}%{?dist}%{?cc_suffix} +Version: 6.1.0 +Release: 1%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -177,218 +180,7 @@ Patch0014: 0014-globally-limit-the-maximum-number-of-CPUs.patch Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0016: 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0018: 0018-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch -# For bz#1906168 - [RHEL-9] qemu-kvm spec-file: Do not BuildRequire spice -Patch19: kvm-Remove-SPICE-and-QXL-from-x86_64-rh-devices.mak.patch -# For bz#1944730 - Remove RHEL7 machine type (s390-ccw-virtio-rhel7.5.0) -Patch20: kvm-hw-s390x-Remove-the-RHEL7-only-machine-type.patch -# For bz#1962479 - Disable the 'x-terminal3270' device in qemu-kvm on s390x -Patch21: kvm-s390x-redhat-disable-experimental-3270-device.patch -# For bz#1952449 - [aarch64] define RHEL9 machine types -Patch22: kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch -# For bz#1952449 - [aarch64] define RHEL9 machine types -Patch23: kvm-hw-arm-virt-Add-8.5-and-9.0-machine-types-and-remove.patch -# For bz#1747467 - [aarch64] [qemu] PVPANIC support -Patch24: kvm-aarch64-rh-devices-add-CONFIG_PVPANIC_PCI.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch25: kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch26: kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch27: kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch28: kvm-virtio-net-failover-add-missing-remove_migration_sta.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch29: kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch30: kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch31: kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch32: kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch33: kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch34: kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch -# For bz#1972462 - QEMU core dump when doing TLS migration via TCP -Patch35: kvm-yank-Unregister-function-when-using-TLS-migration.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch36: kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch37: kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch38: kvm-target-i386-sev-add-support-to-query-the-attestation.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch39: kvm-spapr-Don-t-hijack-current_machine-boot_order.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch40: kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch41: kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch42: kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch -# For bz#1967502 - [aarch64] [qemu] Compile the PCIe expander bridge -Patch43: kvm-aarch64-rh-devices-add-CONFIG_PXB.patch -# For bz#1974795 - [RHEL9-beta] [aarch64] Launch guest with virtio-gpu-pci and virtual smmu causes "virtio_gpu_dequeue_ctrl_func" ERROR -Patch44: kvm-virtio-gpu-handle-partial-maps-properly.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch45: kvm-x86-Add-x86-rhel8.5-machine-types.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch46: kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch47: kvm-block-backend-add-drained_poll.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch48: kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch49: kvm-disable-CONFIG_USB_STORAGE_BOT.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch50: kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch51: kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch52: kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch -# For bz#1978911 - Remove TPM Passthrough option from RHEL 9 -Patch53: kvm-Disable-TPM-passthrough.patch -# For bz#1932191 - [IBM 9.0 FEAT] CPU Model for new IBM Z Hardware - qemu part (kvm) -Patch54: kvm-s390x-cpumodel-add-3931-and-3932.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch55: kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch56: kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch57: kvm-s390x-css-Introduce-an-ESW-struct.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch58: kvm-s390x-css-Split-out-the-IRB-sense-data.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch59: kvm-s390x-css-Refactor-IRB-construction.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch60: kvm-s390x-css-Add-passthrough-IRB.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch61: kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch62: kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch63: kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch64: kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch65: kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch66: kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch67: kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch68: kvm-vhost-user-Fix-backends-without-multiqueue-support.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch69: kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch70: kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch71: kvm-osdep-provide-ROUND_DOWN-macro.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch72: kvm-block-backend-align-max_transfer-to-request-alignmen.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch73: kvm-block-add-max_hw_transfer-to-BlockLimits.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch74: kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch -# For bz#1957782 - VMDK support should be read-only -Patch75: kvm-block-Add-option-to-use-driver-whitelist-even-in-too.patch -# For bz#1838608 - aarch64: Enable ARMv8 RAS virtualization support -Patch76: kvm-arm-virt-Register-iommu-as-a-class-property.patch -# For bz#1838608 - aarch64: Enable ARMv8 RAS virtualization support -Patch77: kvm-arm-virt-Register-its-as-a-class-property.patch -# For bz#1838608 - aarch64: Enable ARMv8 RAS virtualization support -Patch78: kvm-arm-virt-Enable-ARM-RAS-support.patch -# For bz#1972079 - Windows Installation blocked on 4k disk when using blk+raw+iothread -Patch79: kvm-block-Fix-in_flight-leak-in-request-padding-error-pa.patch -# For bz#1974683 - Fail to set migrate incoming for 2nd time after the first time failed -Patch80: kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch -# For bz#1974683 - Fail to set migrate incoming for 2nd time after the first time failed -Patch81: kvm-migration-Allow-reset-of-postcopy_recover_triggered-.patch -# For bz#1968519 - Remove all the old 7.0-7.5 machine types -Patch82: kvm-Remove-RHEL-7.0.0-machine-type.patch -# For bz#1968519 - Remove all the old 7.0-7.5 machine types -Patch83: kvm-Remove-RHEL-7.1.0-machine-type.patch -# For bz#1968519 - Remove all the old 7.0-7.5 machine types -Patch84: kvm-Remove-RHEL-7.2.0-machine-type.patch -# For bz#1968519 - Remove all the old 7.0-7.5 machine types -Patch85: kvm-Remove-RHEL-7.3.0-machine-types.patch -# For bz#1968519 - Remove all the old 7.0-7.5 machine types -Patch86: kvm-Remove-RHEL-7.4.0-machine-types.patch -# For bz#1968519 - Remove all the old 7.0-7.5 machine types -Patch87: kvm-Remove-RHEL-7.5.0-machine-types.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch88: kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch89: kvm-migration-failover-reset-partially_hotplugged.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch90: kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch91: kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch92: kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch93: kvm-ratelimit-protect-with-a-mutex.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch94: kvm-Update-Linux-headers-to-5.13-rc4.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch95: kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch96: kvm-iothread-generalize-iothread_set_param-iothread_get_.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch97: kvm-iothread-add-aio-max-batch-parameter.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch98: kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch99: kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch100: kvm-migration-move-wait-unplug-loop-to-its-own-function.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch101: kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch102: kvm-aarch64-Add-USB-storage-devices.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch103: kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch104: kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch105: kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch -# For bz#1957194 - Synchronize RHEL-AV 8.5.0 changes to RHEL 9.0.0 Beta -Patch106: kvm-audio-Never-send-migration-section.patch -# For bz#1939509 - QEMU: enable SafeStack -# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM -Patch107: kvm-pc-bios-s390-ccw-bootmap-Silence-compiler-warning-fr.patch -# For bz#1939509 - QEMU: enable SafeStack -# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM -Patch108: kvm-pc-bios-s390-ccw-Use-reset_psw-pointer-instead-of-ha.patch -# For bz#1939509 - QEMU: enable SafeStack -# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM -Patch109: kvm-pc-bios-s390-ccw-netboot-Use-Wl-prefix-to-pass-param.patch -# For bz#1939509 - QEMU: enable SafeStack -# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM -Patch110: kvm-pc-bios-s390-ccw-Silence-warning-from-Clang-by-marki.patch -# For bz#1939509 - QEMU: enable SafeStack -# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM -Patch111: kvm-pc-bios-s390-ccw-Fix-the-cc-option-macro-in-the-Make.patch -# For bz#1939509 - QEMU: enable SafeStack -# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM -Patch112: kvm-pc-bios-s390-ccw-Silence-GCC-11-stringop-overflow-wa.patch -# For bz#1939509 - QEMU: enable SafeStack -# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM -Patch113: kvm-pc-bios-s390-ccw-Allow-building-with-Clang-too.patch -# For bz#1939509 - QEMU: enable SafeStack -# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM -Patch114: kvm-pc-bios-s390-ccw-Fix-inline-assembly-for-older-versi.patch -# For bz#1939509 - QEMU: enable SafeStack -# For bz#1940132 - QEMU: switch build toolchain to Clang/LLVM -Patch115: kvm-configure-Fix-endianess-test-with-LTO.patch -# For bz#1951814 - RFE: Warning when using qcow2-v2 (compat=0.10) -Patch116: kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -# For bz#1995819 - RFE: Remove ac97 audio support from QEMU -Patch117: kvm-disable-ac97-audio.patch -# For bz#1950192 - RHEL9: when ioeventfd=off and 8.4guest, (qemu) qemu-kvm: ../util/qemu-coroutine-lock.c:57: qemu_co_queue_wait_impl: Assertion `qemu_in_coroutine()' failed. -Patch118: kvm-redhat-Enable-the-test-block-iothread-test-again.patch - -# Source-git patches +Patch0018: 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch %if %{have_clang} BuildRequires: clang @@ -412,6 +204,7 @@ BuildRequires: usbredir-devel >= %{usbredir_version} %endif BuildRequires: texinfo BuildRequires: python3-sphinx +BuildRequires: python3-sphinx_rtd_theme BuildRequires: libseccomp-devel >= %{libseccomp_version} # For network block driver BuildRequires: libcurl-devel @@ -656,6 +449,7 @@ mkdir -p %{qemu_kvm_build} --disable-avx512f \\\ --disable-block-drv-whitelist-in-tools \\\ --disable-bochs \\\ + --disable-bpf \\\ --disable-brlapi \\\ --disable-bsd-user \\\ --disable-bzip2 \\\ @@ -713,6 +507,7 @@ mkdir -p %{qemu_kvm_build} --disable-netmap \\\ --disable-nettle \\\ --disable-numa \\\ + --disable-nvmm \\\ --disable-opengl \\\ --disable-parallels \\\ --disable-pie \\\ @@ -729,8 +524,8 @@ mkdir -p %{qemu_kvm_build} --disable-sdl \\\ --disable-sdl-image \\\ --disable-seccomp \\\ - --disable-sheepdog \\\ --disable-slirp \\\ + --disable-slirp-smbd \\\ --disable-smartcard \\\ --disable-snappy \\\ --disable-sparse \\\ @@ -797,6 +592,7 @@ run_configure() { --with-git=git \ --tls-priority=@QEMU,SYSTEM \ %{disable_everything} \ + --with-devices-%{kvm_target}=%{kvm_target}-rh-devices \ "$@" echo "config-host.mak contents:" @@ -928,7 +724,7 @@ cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ %endif -%{__cc} %{_sourcedir}/ksmctl.c %{optflags} %{?build_ldflags} -o ksmctl +%{__cc} %{_sourcedir}/ksmctl.c %{optflags} -pie %{?build_ldflags} -o ksmctl popd # endif !tools_only %endif @@ -1161,7 +957,6 @@ rm -rf %{buildroot}%{qemudocdir}/specs %endif - %check %if !%{tools_only} @@ -1173,8 +968,6 @@ popd # endif !tools_only %endif - - %post -n qemu-guest-agent %systemd_post qemu-guest-agent.service %preun -n qemu-guest-agent @@ -1327,11 +1120,21 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf %{_libdir}/%{name}/hw-display-virtio-gpu.so +%{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +%ifarch x86_64 %{power64} + %{_libdir}/%{name}/hw-display-virtio-vga-gl.so +%endif %ifarch s390x %{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so %else %{_libdir}/%{name}/hw-display-virtio-gpu-pci.so + %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so %endif + %{_libdir}/%{name}/accel-qtest-%{kvm_target}.so +%ifarch x86_64 + %{_libdir}/%{name}/accel-tcg-%{kvm_target}.so +%endif +%{_libdir}/%{name}/hw-usb-host.so %files tests %{testsdir} @@ -1360,6 +1163,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Sep 02 2021 Miroslav Rezanina - 6.1.0-1 +- Rebase to QEMU 6.1.0 [bz#1997408] +- Resolves: #bz#1997408 + (Rebase to QEMU 6.1.0) + * Fri Aug 27 2021 Miroslav Rezanina - 6.0.0-13 - kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch [bz#1951814] - kvm-disable-ac97-audio.patch [bz#1995819] @@ -3099,7 +2907,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1640044 (Disable CONFIG_I2C and CONFIG_IPMI in default-configs/ppc64-softmmu.mak) -* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 - kvm-qcow2-Give-the-refcount-cache-the-minimum-possible-s.patch [bz#1656507] - kvm-docs-Document-the-new-default-sizes-of-the-qcow2-cac.patch [bz#1656507] - kvm-qcow2-Fix-Coverity-warning-when-calculating-the-refc.patch [bz#1656507] @@ -3226,7 +3034,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ (rbd json format of 7.6 is incompatible with 7.5) * Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-40.el8 - - kvm-vnc-call-sasl_server_init-only-when-required.patch [bz#1609327] - kvm-nbd-server-fix-NBD_CMD_CACHE.patch [bz#1636142] - kvm-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch [bz#1636142] @@ -3285,7 +3092,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ - Resolves: bz#1639374 (qemu-img map 'Aborted (core dumped)' when specifying a plain file) -* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - - kvm-linux-headers-update.patch [bz#1508142] - kvm-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch [bz#1508142] - kvm-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch [bz#1508142] diff --git a/rpminspect.yaml b/rpminspect.yaml deleted file mode 100644 index 53ce59a..0000000 --- a/rpminspect.yaml +++ /dev/null @@ -1,6 +0,0 @@ ---- -elf: - exclude_path: (.*s390-ccw.img.*)|(.*s390-netboot.img.*) -inspections: - badfuncs: off - diff --git a/sources b/sources index c713614..00e7175 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-6.0.0.tar.xz) = ee3ff00aebec4d8891d2ff6dabe4e667e510b2a4fe3f6190aa34673a91ea32dcd2db2e9bf94c2f1bf05aa79788f17cfbbedc6027c0988ea08a92587b79ee05e4 +SHA512 (qemu-6.1.0.tar.xz) = 3378ae21c75b77ee6a759827f1fcf7b2a50a0fef07e3b0e89117108022a8d8655fa977e4d65596f4f24f7c735c6594d44b0c6f69732ea4465e88a7406b1d5d3c From 0e8a3698bc24e9fff776892e85229061a255fc58 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 2 Sep 2021 08:53:10 -0400 Subject: [PATCH 135/195] Add post-rebase missing files During rebase rpminspect.yaml and README.rst files were removed as they are not part of the build. Re-adding these files. In addition, add spec marker for source-git handling. Signed-off-by: Miroslav Rezanina Resolves: #bz#1997408 --- README.rst | 19 +++++++++++++++++++ qemu-kvm.spec | 2 ++ rpminspect.yaml | 6 ++++++ 3 files changed, 27 insertions(+) create mode 100644 README.rst create mode 100644 rpminspect.yaml diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..15f93e6 --- /dev/null +++ b/README.rst @@ -0,0 +1,19 @@ +=================== +qemu-kvm development +=================== + +qemu-kvm is maintained in a `source tree`_ rather than directly in dist-git. +This provides way to develope using regular source code structure and provides +way to generate SRPM and build using koji service. In addition, local build using +CentOS 9 Stream specific configuration. + +Developers deliver all changes to source-git using merge request. Only maintainers +will be pushing changes sent to source-git to dist-git. + +Each release in dist-git is tagged in the source repository so you can easily +check out the source tree for a build. The tags are in the format +name-version-release, but note release doesn't contain the dist tag since the +source can be built in different build roots (Fedora, CentOS, etc.) + +.. _source tree: https://gitlab.com/redhat/centos-stream/src/qemu-kvm + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index fdad09e..3e0ff61 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -182,6 +182,8 @@ Patch0016: 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0018: 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +# Source-git patches + %if %{have_clang} BuildRequires: clang %if %{have_safe_stack} diff --git a/rpminspect.yaml b/rpminspect.yaml new file mode 100644 index 0000000..53ce59a --- /dev/null +++ b/rpminspect.yaml @@ -0,0 +1,6 @@ +--- +elf: + exclude_path: (.*s390-ccw.img.*)|(.*s390-netboot.img.*) +inspections: + badfuncs: off + From 58405e9e363accf19106e92baf71af8deef6fb56 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 10 Sep 2021 04:46:46 -0400 Subject: [PATCH 136/195] * Fri Sep 10 2021 Miroslav Rezanina - 6.1.0-2 - kvm-hw-arm-virt-Remove-9.0-machine-type.patch [bz#2002937] - kvm-remove-sgabios-dependency.patch [bz#2000845] - kvm-enable-pulseaudio.patch [bz#1997725] - kvm-spec-disable-use-of-gcrypt-for-crypto-backends-in-fa.patch [bz#1990068] - Resolves: bz#2002937 ([qemu][aarch64] Remove 9.0 machine types in arm virt for 9-Beta) - Resolves: bz#2000845 (RFE: Remove SGA, deprecate cirrus, and set defaults for QEMU machine-types in RHEL9) - Resolves: bz#1997725 (RFE: enable pulseaudio backend on QEMU) - Resolves: bz#1990068 (Disable use of gcrypt for crypto backends in favour of gnutls) --- kvm-hw-arm-virt-Remove-9.0-machine-type.patch | 53 +++++++++++++++++++ qemu-kvm.spec | 38 ++++++++++--- 2 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 kvm-hw-arm-virt-Remove-9.0-machine-type.patch diff --git a/kvm-hw-arm-virt-Remove-9.0-machine-type.patch b/kvm-hw-arm-virt-Remove-9.0-machine-type.patch new file mode 100644 index 0000000..4ccd33c --- /dev/null +++ b/kvm-hw-arm-virt-Remove-9.0-machine-type.patch @@ -0,0 +1,53 @@ +From 8dc162b2ae2e412c7d4e0d46f7a11001c34263dc Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Mon, 23 Aug 2021 18:57:11 +0200 +Subject: [PATCH 1/4] hw/arm/virt: Remove 9.0 machine type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Andrew Jones +RH-MergeRequest: 38: hw/arm/virt: Remove 9.0 machine type +RH-Commit: [1/1 35c0734886622b88f6a715f13fba8f65331f7a82 +RH-Bugzilla: 2002937 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Gavin Shan +RH-Acked-by: Philippe Mathieu-Daudé + +Testing: '/usr/libexec/qemu-kvm -M help' and sample VM run + +Remove the 9.0 machine type for RHEL9 Beta to align with other +architectures. Also, like other architectures, the 8.5 machine +type should be the RHEL9 Beta default. + +Signed-off-by: Andrew Jones +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 3c8e6de36d..9e7cb687dc 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3145,14 +3145,8 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + +-static void rhel900_virt_options(MachineClass *mc) +-{ +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); +-} +-DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) +- + static void rhel850_virt_options(MachineClass *mc) + { +- rhel900_virt_options(mc); ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + } +-DEFINE_RHEL_MACHINE(8, 5, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 3e0ff61..ff7f339 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -106,7 +106,7 @@ %global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle %global block_drivers_ro_list vmdk,vhdx,vpc,https,ssh %define qemudocdir %{_docdir}/%{name} -%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios:%{_datadir}/sgabios" +%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios" #Versions of various parts: @@ -119,7 +119,8 @@ Requires: %{name}-hw-usbredir = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ -Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} +Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} \ +Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release} # Since SPICE is removed from RHEL-9, the following Obsoletes: # removes {name}-ui-spice for upgrades from RHEL-8 @@ -134,7 +135,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.1.0 -Release: 1%{?rcrel}%{?dist}%{?cc_suffix} +Release: 2%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -181,6 +182,8 @@ Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0016: 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0018: 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +# For bz#2002937 - [qemu][aarch64] Remove 9.0 machine types in arm virt for 9-Beta +Patch19: kvm-hw-arm-virt-Remove-9.0-machine-type.patch # Source-git patches @@ -236,7 +239,6 @@ BuildRequires: lzo-devel snappy-devel %if %{have_numactl} BuildRequires: numactl-devel %endif -BuildRequires: libgcrypt-devel # qemu-pr-helper multipath support (requires libudev too) BuildRequires: device-mapper-multipath-devel BuildRequires: systemd-devel @@ -252,6 +254,7 @@ BuildRequires: pkgconfig(gbm) %endif BuildRequires: perl-Test-Harness BuildRequires: libslirp-devel +BuildRequires: pulseaudio-libs-devel # Requires for qemu-kvm package @@ -304,7 +307,6 @@ Requires(preun): systemd-units Requires(postun): systemd-units %ifarch %{ix86} x86_64 Requires: seabios-bin >= 1.10.2-1 -Requires: sgabios-bin %endif %ifnarch aarch64 s390x Requires: seavgabios-bin >= 1.12.0-3 @@ -413,6 +415,13 @@ Install this package if you want to access remote disks using the Secure Shell (SSH) protocol. +%package audio-pa +Summary: QEMU PulseAudio audio driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description audio-pa +This package provides the additional PulseAudio audio driver for QEMU. + + %if %{have_opengl} %package ui-opengl Summary: QEMU opengl support @@ -615,6 +624,7 @@ run_configure \ %if %{defined block_drivers_ro_list} --block-drv-ro-whitelist=%{block_drivers_ro_list} \ %endif + --audio-drv-list=pa \ --enable-attr \ %ifarch %{ix86} x86_64 --enable-avx2 \ @@ -628,7 +638,6 @@ run_configure \ %if %{have_fdt} --enable-fdt \ %endif - --enable-gcrypt \ --enable-gnutls \ --enable-guest-agent \ --enable-iconv \ @@ -928,7 +937,6 @@ rm -rf %{buildroot}%{_datadir}/%{name}/pxe*rom rm -rf %{buildroot}%{_datadir}/%{name}/vgabios*bin # Provided by package seabios rm -rf %{buildroot}%{_datadir}/%{name}/bios*.bin -# Provided by package sgabios rm -rf %{buildroot}%{_datadir}/%{name}/sgabios.bin @@ -1149,6 +1157,8 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %files block-ssh %{_libdir}/%{name}/block-ssh.so +%files audio-pa +%{_libdir}/%{name}/audio-pa.so %if %{have_opengl} %files ui-opengl @@ -1165,6 +1175,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Sep 10 2021 Miroslav Rezanina - 6.1.0-2 +- kvm-hw-arm-virt-Remove-9.0-machine-type.patch [bz#2002937] +- kvm-remove-sgabios-dependency.patch [bz#2000845] +- kvm-enable-pulseaudio.patch [bz#1997725] +- kvm-spec-disable-use-of-gcrypt-for-crypto-backends-in-fa.patch [bz#1990068] +- Resolves: bz#2002937 + ([qemu][aarch64] Remove 9.0 machine types in arm virt for 9-Beta) +- Resolves: bz#2000845 + (RFE: Remove SGA, deprecate cirrus, and set defaults for QEMU machine-types in RHEL9) +- Resolves: bz#1997725 + (RFE: enable pulseaudio backend on QEMU) +- Resolves: bz#1990068 + (Disable use of gcrypt for crypto backends in favour of gnutls) + * Thu Sep 02 2021 Miroslav Rezanina - 6.1.0-1 - Rebase to QEMU 6.1.0 [bz#1997408] - Resolves: #bz#1997408 From 2d3f868ca90897037bdee74fa9c362b09a4e89bc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 24 Sep 2021 01:21:34 -0400 Subject: [PATCH 137/195] * Fri Sep 24 2021 Miroslav Rezanina - 6.1.0-3 - kvm-disable-sga-device.patch [bz#2000845] - kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch [bz#2005026] - Resolves: bz#2000845 (RFE: Remove SGA, deprecate cirrus, and set defaults for QEMU machine-types in RHEL9) - Resolves: bz#2005026 ([s390][virtio-fs] Umount virtiofs shared folder failure from guest side [rhel-9.0.0]) --- kvm-disable-sga-device.patch | 38 ++++++++++++++ ...Add-fstatfs64-syscall-to-the-seccomp.patch | 51 +++++++++++++++++++ qemu-kvm.spec | 14 ++++- 3 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 kvm-disable-sga-device.patch create mode 100644 kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch diff --git a/kvm-disable-sga-device.patch b/kvm-disable-sga-device.patch new file mode 100644 index 0000000..75118bd --- /dev/null +++ b/kvm-disable-sga-device.patch @@ -0,0 +1,38 @@ +From 77d18ece20f69ff1e1f6afd4b2d8cf2a1f252f3a Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 20 Sep 2021 03:46:51 -0400 +Subject: [PATCH 1/2] disable sga device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 44: Apply RHEL 9.0.0 Beta fixes to RHEL 9.0.0 +RH-Commit: [1/2] 3d8e1b51b496175de71162c612abbd64adbcb9e5 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 2000845 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Gerd Hoffmann + +Disabling sga device we are going to not support anymore. + +Signed-off-by: Gerd Hoffmann +Signed-off-by: Miroslav Rezanina +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index 24b96ba0c4..20c2991941 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -67,7 +67,6 @@ CONFIG_SERIAL=y + CONFIG_SERIAL_ISA=y + CONFIG_SERIAL_PCI=y + CONFIG_SEV=y +-CONFIG_SGA=y + CONFIG_SMBIOS=y + CONFIG_SMBUS_EEPROM=y + CONFIG_TEST_DEVICES=y +-- +2.27.0 + diff --git a/kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch b/kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch new file mode 100644 index 0000000..1666b15 --- /dev/null +++ b/kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch @@ -0,0 +1,51 @@ +From b615b79feaa73bbaa32bb8c30401a4f6f0c0205e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 14 Sep 2021 13:29:59 +0200 +Subject: [PATCH 2/2] tools/virtiofsd: Add fstatfs64 syscall to the seccomp + allowlist +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 44: Apply RHEL 9.0.0 Beta fixes to RHEL 9.0.0 +RH-Commit: [2/2] 0085289cefb57d49d2423b4f3376e8cf4a970012 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 2005026 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Gerd Hoffmann + +The virtiofsd currently crashes on s390x when doing something like +this in the guest: + + mkdir -p /mnt/myfs + mount -t virtiofs myfs /mnt/myfs + touch /mnt/myfs/foo.txt + stat -f /mnt/myfs/foo.txt + +The problem is that the fstatfs64 syscall is called in this case +from the virtiofsd. We have to put it on the seccomp allowlist to +avoid that the daemon gets killed in this case. + +(cherry picked from commit 8cfd339b3d402f913fe520a4f35f30152fb4fb80) +Suggested-by: Vivek Goyal +Signed-off-by: Thomas Huth +Signed-off-by: Miroslav Rezanina +--- + tools/virtiofsd/passthrough_seccomp.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c +index f49ed94b5e..a3ce9f898d 100644 +--- a/tools/virtiofsd/passthrough_seccomp.c ++++ b/tools/virtiofsd/passthrough_seccomp.c +@@ -51,6 +51,7 @@ static const int syscall_allowlist[] = { + SCMP_SYS(fsetxattr), + SCMP_SYS(fstat), + SCMP_SYS(fstatfs), ++ SCMP_SYS(fstatfs64), + SCMP_SYS(fsync), + SCMP_SYS(ftruncate), + SCMP_SYS(futex), +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index ff7f339..985bf5a 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -135,7 +135,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.1.0 -Release: 2%{?rcrel}%{?dist}%{?cc_suffix} +Release: 3%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -184,6 +184,10 @@ Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0018: 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch # For bz#2002937 - [qemu][aarch64] Remove 9.0 machine types in arm virt for 9-Beta Patch19: kvm-hw-arm-virt-Remove-9.0-machine-type.patch +# For bz#2000845 - RFE: Remove SGA, deprecate cirrus, and set defaults for QEMU machine-types in RHEL9 +Patch20: kvm-disable-sga-device.patch +# For bz#2005026 - [s390][virtio-fs] Umount virtiofs shared folder failure from guest side [rhel-9.0.0] +Patch21: kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch # Source-git patches @@ -1175,6 +1179,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Sep 24 2021 Miroslav Rezanina - 6.1.0-3 +- kvm-disable-sga-device.patch [bz#2000845] +- kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch [bz#2005026] +- Resolves: bz#2000845 + (RFE: Remove SGA, deprecate cirrus, and set defaults for QEMU machine-types in RHEL9) +- Resolves: bz#2005026 + ([s390][virtio-fs] Umount virtiofs shared folder failure from guest side [rhel-9.0.0]) + * Fri Sep 10 2021 Miroslav Rezanina - 6.1.0-2 - kvm-hw-arm-virt-Remove-9.0-machine-type.patch [bz#2002937] - kvm-remove-sgabios-dependency.patch [bz#2000845] From 21b6f1766a47b6c19154b5ab9d3bef11bec358a9 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 5 Oct 2021 08:38:38 -0400 Subject: [PATCH 138/195] * Tue Oct 05 2021 Miroslav Rezanina - 6.1.0-4 - kvm-redhat-Define-hw_compat_rhel_8_5.patch [bz#1998943] - kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch [bz#1998943] - Resolves: bz#1998943 (Add machine type compatibility update for 6.1 rebase [s390x]) --- ...-machine-type-compatibility-update-f.patch | 45 +++++++++++ kvm-redhat-Define-hw_compat_rhel_8_5.patch | 74 +++++++++++++++++++ qemu-kvm.spec | 12 ++- 3 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch create mode 100644 kvm-redhat-Define-hw_compat_rhel_8_5.patch diff --git a/kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch b/kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch new file mode 100644 index 0000000..16f5957 --- /dev/null +++ b/kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch @@ -0,0 +1,45 @@ +From 9b72a86292fb2c34d7be7b928ac06f2609de0f43 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 30 Sep 2021 09:25:23 +0200 +Subject: [PATCH 2/2] redhat: Add s390x machine type compatibility update for + 6.1 rebase +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 45: Add s390x machine type compatibility update for 6.1 rebase +RH-Commit: [2/2] 6f71801ac2d77b82b010eac46cd82cd74c53f246 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 1998943 +RH-Acked-by: Greg Kurz +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +Add hw_compat_rhel_8_5 for the s390-ccw-virtio-rhel8.5.0 machine +type. It's currently not urgently required, since hw_compat_rhel_8_5 +only contains entries that are not related to s390x, but just in +case some other entries are added there later, it's better if we +add this right from the start to the s390x machine, too. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1998943 +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9a51dd8de2..4ee8d266ec 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1091,6 +1091,7 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); + +-- +2.27.0 + diff --git a/kvm-redhat-Define-hw_compat_rhel_8_5.patch b/kvm-redhat-Define-hw_compat_rhel_8_5.patch new file mode 100644 index 0000000..60f7fa1 --- /dev/null +++ b/kvm-redhat-Define-hw_compat_rhel_8_5.patch @@ -0,0 +1,74 @@ +From 1f0a5d3ae9c835e35b83cf8bbedd0f814df3451d Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Mon, 27 Sep 2021 10:02:46 +0200 +Subject: [PATCH 1/2] redhat: Define hw_compat_rhel_8_5 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 45: Add s390x machine type compatibility update for 6.1 rebase +RH-Commit: [1/2] 5d304edf2bee7abc57843deb9e5d85ab5f19a34c (thuth/qemu-kvm-cs9) +RH-Bugzilla: 1998943 +RH-Acked-by: Greg Kurz +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +The QEMU 6.1 rebase changed the default value of some properties +for existing machine types. Prepare ground by introducing +hw_compat_rhel_8_5. + +Signed-off-by: Greg Kurz +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1998943 +Signed-off-by: Thomas Huth +--- + hw/core/machine.c | 17 +++++++++++++++++ + include/hw/boards.h | 3 +++ + 2 files changed, 20 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index d681a06a47..a14503cc30 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,23 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * Mostly the same as hw_compat_6_0 ++ */ ++GlobalProperty hw_compat_rhel_8_5[] = { ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "gpex-pcihost", "allow-unmapped-accesses", "false" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "i8042", "extended-state", "false"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "nvme-ns", "eui64-default", "off"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000e", "init-vet", "off" }, ++}; ++const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); ++ + /* + * Mostly the same as hw_compat_5_2 + */ +diff --git a/include/hw/boards.h b/include/hw/boards.h +index a2b1681027..7b9208ef7a 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -418,6 +418,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_5[]; ++extern const size_t hw_compat_rhel_8_5_len; ++ + extern GlobalProperty hw_compat_rhel_8_4[]; + extern const size_t hw_compat_rhel_8_4_len; + +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 985bf5a..6cc3dd2 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -135,7 +135,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.1.0 -Release: 3%{?rcrel}%{?dist}%{?cc_suffix} +Release: 4%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -188,6 +188,10 @@ Patch19: kvm-hw-arm-virt-Remove-9.0-machine-type.patch Patch20: kvm-disable-sga-device.patch # For bz#2005026 - [s390][virtio-fs] Umount virtiofs shared folder failure from guest side [rhel-9.0.0] Patch21: kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch +# For bz#1998943 - Add machine type compatibility update for 6.1 rebase [s390x] +Patch22: kvm-redhat-Define-hw_compat_rhel_8_5.patch +# For bz#1998943 - Add machine type compatibility update for 6.1 rebase [s390x] +Patch23: kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch # Source-git patches @@ -1179,6 +1183,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Oct 05 2021 Miroslav Rezanina - 6.1.0-4 +- kvm-redhat-Define-hw_compat_rhel_8_5.patch [bz#1998943] +- kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch [bz#1998943] +- Resolves: bz#1998943 + (Add machine type compatibility update for 6.1 rebase [s390x]) + * Fri Sep 24 2021 Miroslav Rezanina - 6.1.0-3 - kvm-disable-sga-device.patch [bz#2000845] - kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch [bz#2005026] From d9e6577b1172b0257633555591c95180a3e3969e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 11 Oct 2021 03:45:02 -0400 Subject: [PATCH 139/195] * Mon Oct 11 2021 Miroslav Rezanina - 6.1.0-5 - kvm-virtio-balloon-Fix-page-poison-subsection-name.patch [bz#1984401] - kvm-spec-Remove-block-curl-and-block-ssh-dependency.patch [bz#2010985] - Resolves: bz#1984401 (fails to revert snapshot of a VM [balloon/page-poison]) - Resolves: bz#2010985 (Remove dependency on qemu-kvm-block-curl and qemu-kvm-block-ssh [rhel-9.0.0]) --- ...loon-Fix-page-poison-subsection-name.patch | 61 +++++++++++++++++++ qemu-kvm.spec | 14 ++++- 2 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 kvm-virtio-balloon-Fix-page-poison-subsection-name.patch diff --git a/kvm-virtio-balloon-Fix-page-poison-subsection-name.patch b/kvm-virtio-balloon-Fix-page-poison-subsection-name.patch new file mode 100644 index 0000000..68e542a --- /dev/null +++ b/kvm-virtio-balloon-Fix-page-poison-subsection-name.patch @@ -0,0 +1,61 @@ +From 91465d5e7cb407604fd8697587a52aafe0040ad7 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 14 Sep 2021 14:17:16 +0100 +Subject: [PATCH 1/2] virtio-balloon: Fix page-poison subsection name +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 46: virtio-balloon: Fix page-poison subsection name +RH-Commit: [1/1] b5907c252dae636b4b145a6f13fbed4bf5fad9d2 (dagrh/c-9-s-qemu-kvm) +RH-Bugzilla: 1984401 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +The subsection name for page-poison was typo'd as: + + vitio-balloon-device/page-poison + +Note the missing 'r' in virtio. + +When we have a machine type that enables page poison, and the guest +enables it (which needs a new kernel), things fail rather unpredictably. + +The fallout from this is that most of the other subsections fail to +load, including things like the feature bits in the device, one +possible fallout is that the physical addresses of the queues +then get aligned differently and we fail with an error about +last_avail_idx being wrong. +It's not obvious to me why this doesn't produce a more obvious failure, +but virtio's vmstate loading is a bit open-coded. + +Fixes: 7483cbbaf82 ("virtio-balloon: Implement support for page poison reporting feature") +bz: https://bugzilla.redhat.com/show_bug.cgi?id=1984401 +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20210914131716.102851-1-dgilbert@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: David Hildenbrand +(cherry picked from commit 243a9284a989a38e32ceb3990eb795f5cf6f3be0) +--- + hw/virtio/virtio-balloon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 4b5d9e5e50..ced070d64f 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -850,7 +850,7 @@ static const VMStateDescription vmstate_virtio_balloon_free_page_hint = { + }; + + static const VMStateDescription vmstate_virtio_balloon_page_poison = { +- .name = "vitio-balloon-device/page-poison", ++ .name = "virtio-balloon-device/page-poison", + .version_id = 1, + .minimum_version_id = 1, + .needed = virtio_balloon_page_poison_support, +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 6cc3dd2..bdf83cd 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -117,9 +117,7 @@ Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ %if %{have_usbredir} \ Requires: %{name}-hw-usbredir = %{epoch}:%{version}-%{release} \ %endif \ -Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ -Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} \ Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release} # Since SPICE is removed from RHEL-9, the following Obsoletes: @@ -135,7 +133,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.1.0 -Release: 4%{?rcrel}%{?dist}%{?cc_suffix} +Release: 5%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -192,6 +190,8 @@ Patch21: kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch Patch22: kvm-redhat-Define-hw_compat_rhel_8_5.patch # For bz#1998943 - Add machine type compatibility update for 6.1 rebase [s390x] Patch23: kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch +# For bz#1984401 - fails to revert snapshot of a VM [balloon/page-poison] +Patch24: kvm-virtio-balloon-Fix-page-poison-subsection-name.patch # Source-git patches @@ -1183,6 +1183,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Oct 11 2021 Miroslav Rezanina - 6.1.0-5 +- kvm-virtio-balloon-Fix-page-poison-subsection-name.patch [bz#1984401] +- kvm-spec-Remove-block-curl-and-block-ssh-dependency.patch [bz#2010985] +- Resolves: bz#1984401 + (fails to revert snapshot of a VM [balloon/page-poison]) +- Resolves: bz#2010985 + (Remove dependency on qemu-kvm-block-curl and qemu-kvm-block-ssh [rhel-9.0.0]) + * Tue Oct 05 2021 Miroslav Rezanina - 6.1.0-4 - kvm-redhat-Define-hw_compat_rhel_8_5.patch [bz#1998943] - kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch [bz#1998943] From 17e28947d428316e90c974357bfd759eb80b1f28 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 18 Oct 2021 07:42:35 -0400 Subject: [PATCH 140/195] * Mon Oct 18 2021 Miroslav Rezanina - 6.1.0-6 - kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch [bz#1998942] - Resolves: bz#1998942 (Add machine type compatibility update for 6.1 rebase [aarch64]) --- ...hw_compat_rhel_8_5-to-8.5-machine-ty.patch | 38 +++++++++++++++++++ qemu-kvm.spec | 9 ++++- 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch diff --git a/kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch b/kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch new file mode 100644 index 0000000..5d7af51 --- /dev/null +++ b/kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch @@ -0,0 +1,38 @@ +From c0fa5e8ad618a980752f7053de4d4fedd46b5b53 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 12 Oct 2021 17:53:07 +0200 +Subject: [PATCH] hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type + +RH-Author: Eric Auger +RH-MergeRequest: 48: hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type +RH-Commit: [1/1] f4230d3bbd94beb44afb23cb0d1561ff2bc71340 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1998942 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones + +branch: c9s +Upstream: no +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=40295089 + +Add hw_compat_rhel_8_5 to rhel8.5 virt options. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 9e7cb687dc..79286c40ec 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3148,5 +3148,6 @@ type_init(rhel_machine_init); + static void rhel850_virt_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index bdf83cd..c826698 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -133,7 +133,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.1.0 -Release: 5%{?rcrel}%{?dist}%{?cc_suffix} +Release: 6%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -192,6 +192,8 @@ Patch22: kvm-redhat-Define-hw_compat_rhel_8_5.patch Patch23: kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch # For bz#1984401 - fails to revert snapshot of a VM [balloon/page-poison] Patch24: kvm-virtio-balloon-Fix-page-poison-subsection-name.patch +# For bz#1998942 - Add machine type compatibility update for 6.1 rebase [aarch64] +Patch25: kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch # Source-git patches @@ -1183,6 +1185,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Oct 18 2021 Miroslav Rezanina - 6.1.0-6 +- kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch [bz#1998942] +- Resolves: bz#1998942 + (Add machine type compatibility update for 6.1 rebase [aarch64]) + * Mon Oct 11 2021 Miroslav Rezanina - 6.1.0-5 - kvm-virtio-balloon-Fix-page-poison-subsection-name.patch [bz#1984401] - kvm-spec-Remove-block-curl-and-block-ssh-dependency.patch [bz#2010985] From 3cb15f8e488475a11482ffbc7e3a7d596642203b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Nov 2021 02:04:52 -0500 Subject: [PATCH 141/195] * Fri Nov 19 2021 Miroslav Rezanina - 6.1.0-7 - kvm-migration-Make-migration-blocker-work-for-snapshots-.patch [bz#1996609] - kvm-migration-Add-migrate_add_blocker_internal.patch [bz#1996609] - kvm-dump-guest-memory-Block-live-migration.patch [bz#1996609] - kvm-spec-Build-the-VDI-block-driver.patch [bz#2013331] - kvm-spec-Explicitly-include-compress-filter.patch [bz#1980035] - Resolves: bz#1996609 (Qemu hit core dump when dump guest memory during live migration) - Resolves: bz#2013331 (RFE: qemu-img cannot convert from vdi format) - Resolves: bz#1980035 (RFE: Enable compress filter so we can create new, compressed qcow2 files via qemu-nbd) --- kvm-Fix-for-ppc64le-build.patch | 73 +++++++++++++ ...mp-guest-memory-Block-live-migration.patch | 93 ++++++++++++++++ ...ion-Add-migrate_add_blocker_internal.patch | 100 ++++++++++++++++++ ...igration-blocker-work-for-snapshots-.patch | 61 +++++++++++ qemu-kvm.spec | 27 ++++- 5 files changed, 351 insertions(+), 3 deletions(-) create mode 100644 kvm-Fix-for-ppc64le-build.patch create mode 100644 kvm-dump-guest-memory-Block-live-migration.patch create mode 100644 kvm-migration-Add-migrate_add_blocker_internal.patch create mode 100644 kvm-migration-Make-migration-blocker-work-for-snapshots-.patch diff --git a/kvm-Fix-for-ppc64le-build.patch b/kvm-Fix-for-ppc64le-build.patch new file mode 100644 index 0000000..263286c --- /dev/null +++ b/kvm-Fix-for-ppc64le-build.patch @@ -0,0 +1,73 @@ +From 81c5a3cec15194bf75366813274f63d596f04807 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 15 Nov 2021 05:46:15 -0500 +Subject: [PATCH 6/6] Fix for ppc64le build + +Although we do not support ppc64 build, there can be some custom builds running +it. Fix code so the ppc64le build is buildable. + +Signed-off-by: Miroslav Rezanina +--- + hw/ppc/spapr.c | 7 +++++-- + target/ppc/kvm.c | 5 ++--- + 2 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 1386e45e70..8699cc3d0c 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -4991,7 +4991,6 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + /* + * pseries-2.7 + */ +-#endif + + static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, +@@ -5047,7 +5046,6 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + return true; + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_2_7_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -5372,4 +5370,9 @@ static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) + + DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); + ++static void spapr_machine_register_types(void) ++{ ++ type_register_static(&spapr_machine_info); ++} ++ + type_init(spapr_machine_register_types) +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index 0f4f072fbd..154888cce5 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -2560,11 +2560,11 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) + return 0; + } + +-<<<<<<< HEAD + int kvmppc_has_cap_rpt_invalidate(void) + { + return cap_rpt_invalidate; +-======= ++} ++ + bool kvmppc_has_cap_secure_guest(void) + { + return !!cap_ppc_secure_guest; +@@ -2573,7 +2573,6 @@ bool kvmppc_has_cap_secure_guest(void) + int kvmppc_enable_cap_secure_guest(void) + { + return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); +->>>>>>> 89c02f0e37... Add ppc64 machine types + } + + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) +-- +2.27.0 + diff --git a/kvm-dump-guest-memory-Block-live-migration.patch b/kvm-dump-guest-memory-Block-live-migration.patch new file mode 100644 index 0000000..08da454 --- /dev/null +++ b/kvm-dump-guest-memory-Block-live-migration.patch @@ -0,0 +1,93 @@ +From d1c6d059f8936adf7b8c3e2b29b2eb290b0792ac Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 22 Sep 2021 12:20:09 -0400 +Subject: [PATCH 3/6] dump-guest-memory: Block live migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 51: dump-guest-memory: Add blocker for migration +RH-Commit: [3/3] 711dcc0018f70bcb87496c5aa235633a6daf5c2d (peterx/qemu-kvm) +RH-Bugzilla: 1996609 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Laszlo Ersek + +Both dump-guest-memory and live migration caches vm state at the beginning. +Either of them entering the other one will cause race on the vm state, and even +more severe on that (please refer to the crash report in the bug link). + +Let's block live migration in dump-guest-memory, and that'll also block +dump-guest-memory if it detected that we're during a live migration. + +Side note: migrate_del_blocker() can be called even if the blocker is not +inserted yet, so it's safe to unconditionally delete that blocker in +dump_cleanup (g_slist_remove allows no-entry-found case). + +Suggested-by: Dr. David Alan Gilbert +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1996609 +Signed-off-by: Peter Xu +Reviewed-by: Marc-André Lureau +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit b7bc6b182883bb3097dde2a25d041f28bde2b89c) +Signed-off-by: Peter Xu +--- + dump/dump.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/dump/dump.c b/dump/dump.c +index ab625909f3..662d0a62cd 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -29,6 +29,7 @@ + #include "qemu/error-report.h" + #include "qemu/main-loop.h" + #include "hw/misc/vmcoreinfo.h" ++#include "migration/blocker.h" + + #ifdef TARGET_X86_64 + #include "win_dump.h" +@@ -47,6 +48,8 @@ + + #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */ + ++static Error *dump_migration_blocker; ++ + #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size) \ + ((DIV_ROUND_UP((hdr_size), 4) + \ + DIV_ROUND_UP((name_size), 4) + \ +@@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s) + qemu_mutex_unlock_iothread(); + } + } ++ migrate_del_blocker(dump_migration_blocker); + + return 0; + } +@@ -2005,6 +2009,21 @@ void qmp_dump_guest_memory(bool paging, const char *file, + return; + } + ++ if (!dump_migration_blocker) { ++ error_setg(&dump_migration_blocker, ++ "Live migration disabled: dump-guest-memory in progress"); ++ } ++ ++ /* ++ * Allows even for -only-migratable, but forbid migration during the ++ * process of dump guest memory. ++ */ ++ if (migrate_add_blocker_internal(dump_migration_blocker, errp)) { ++ /* Remember to release the fd before passing it over to dump state */ ++ close(fd); ++ return; ++ } ++ + s = &dump_state_global; + dump_state_prepare(s); + +-- +2.27.0 + diff --git a/kvm-migration-Add-migrate_add_blocker_internal.patch b/kvm-migration-Add-migrate_add_blocker_internal.patch new file mode 100644 index 0000000..f00183f --- /dev/null +++ b/kvm-migration-Add-migrate_add_blocker_internal.patch @@ -0,0 +1,100 @@ +From 58d47d795b91beb1f87929fb07d9f1b4af79cb14 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 22 Sep 2021 12:20:08 -0400 +Subject: [PATCH 2/6] migration: Add migrate_add_blocker_internal() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 51: dump-guest-memory: Add blocker for migration +RH-Commit: [2/3] 44cf3879ee66cc9974dd3d8a5f9a0d4f762b7c01 (peterx/qemu-kvm) +RH-Bugzilla: 1996609 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Laszlo Ersek + +An internal version that removes -only-migratable implications. It can be used +for temporary migration blockers like dump-guest-memory. + +Reviewed-by: Marc-André Lureau +Reviewed-by: Juan Quintela +Signed-off-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit 60fd680193119e7e4d50eccff7b55a0aadc934ab) +Signed-off-by: Peter Xu +--- + include/migration/blocker.h | 16 ++++++++++++++++ + migration/migration.c | 21 +++++++++++++-------- + 2 files changed, 29 insertions(+), 8 deletions(-) + +diff --git a/include/migration/blocker.h b/include/migration/blocker.h +index acd27018e9..9cebe2ba06 100644 +--- a/include/migration/blocker.h ++++ b/include/migration/blocker.h +@@ -25,6 +25,22 @@ + */ + int migrate_add_blocker(Error *reason, Error **errp); + ++/** ++ * @migrate_add_blocker_internal - prevent migration from proceeding without ++ * only-migrate implications ++ * ++ * @reason - an error to be returned whenever migration is attempted ++ * ++ * @errp - [out] The reason (if any) we cannot block migration right now. ++ * ++ * @returns - 0 on success, -EBUSY on failure, with errp set. ++ * ++ * Some of the migration blockers can be temporary (e.g., for a few seconds), ++ * so it shouldn't need to conflict with "-only-migratable". For those cases, ++ * we can call this function rather than @migrate_add_blocker(). ++ */ ++int migrate_add_blocker_internal(Error *reason, Error **errp); ++ + /** + * @migrate_del_blocker - remove a blocking error from migration + * +diff --git a/migration/migration.c b/migration/migration.c +index f476e2101e..2aaf2fd449 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2053,15 +2053,8 @@ void migrate_init(MigrationState *s) + s->threshold_size = 0; + } + +-int migrate_add_blocker(Error *reason, Error **errp) ++int migrate_add_blocker_internal(Error *reason, Error **errp) + { +- if (only_migratable) { +- error_propagate_prepend(errp, error_copy(reason), +- "disallowing migration blocker " +- "(--only-migratable) for: "); +- return -EACCES; +- } +- + /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ + if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { + error_propagate_prepend(errp, error_copy(reason), +@@ -2074,6 +2067,18 @@ int migrate_add_blocker(Error *reason, Error **errp) + return 0; + } + ++int migrate_add_blocker(Error *reason, Error **errp) ++{ ++ if (only_migratable) { ++ error_propagate_prepend(errp, error_copy(reason), ++ "disallowing migration blocker " ++ "(--only-migratable) for: "); ++ return -EACCES; ++ } ++ ++ return migrate_add_blocker_internal(reason, errp); ++} ++ + void migrate_del_blocker(Error *reason) + { + migration_blockers = g_slist_remove(migration_blockers, reason); +-- +2.27.0 + diff --git a/kvm-migration-Make-migration-blocker-work-for-snapshots-.patch b/kvm-migration-Make-migration-blocker-work-for-snapshots-.patch new file mode 100644 index 0000000..afdbdb1 --- /dev/null +++ b/kvm-migration-Make-migration-blocker-work-for-snapshots-.patch @@ -0,0 +1,61 @@ +From 0a9b55b44c5c548ff6f3da7335acf2138ecb4376 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 22 Sep 2021 12:20:07 -0400 +Subject: [PATCH 1/6] migration: Make migration blocker work for snapshots too +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 51: dump-guest-memory: Add blocker for migration +RH-Commit: [1/3] 241888d12c7197dd4ee1e1ba6e3115f70901636e (peterx/qemu-kvm) +RH-Bugzilla: 1996609 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Laszlo Ersek + +save_snapshot() checks migration blocker, which looks sane. At the meantime we +should also teach the blocker add helper to fail if during a snapshot, just +like for migrations. + +Reviewed-by: Marc-André Lureau +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 4c170330aae4a4ed75c3a8638b7d4c5d9f365244) +Signed-off-by: Peter Xu +--- + migration/migration.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 041b8451a6..f476e2101e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2062,15 +2062,16 @@ int migrate_add_blocker(Error *reason, Error **errp) + return -EACCES; + } + +- if (migration_is_idle()) { +- migration_blockers = g_slist_prepend(migration_blockers, reason); +- return 0; ++ /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ ++ if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { ++ error_propagate_prepend(errp, error_copy(reason), ++ "disallowing migration blocker " ++ "(migration/snapshot in progress) for: "); ++ return -EBUSY; + } + +- error_propagate_prepend(errp, error_copy(reason), +- "disallowing migration blocker " +- "(migration in progress) for: "); +- return -EBUSY; ++ migration_blockers = g_slist_prepend(migration_blockers, reason); ++ return 0; + } + + void migrate_del_blocker(Error *reason) +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index c826698..5f3f3d0 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -103,8 +103,8 @@ %endif %global target_list %{kvm_target}-softmmu -%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle -%global block_drivers_ro_list vmdk,vhdx,vpc,https,ssh +%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress +%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https,ssh %define qemudocdir %{_docdir}/%{name} %global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios" @@ -133,7 +133,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.1.0 -Release: 6%{?rcrel}%{?dist}%{?cc_suffix} +Release: 7%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -194,6 +194,13 @@ Patch23: kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch Patch24: kvm-virtio-balloon-Fix-page-poison-subsection-name.patch # For bz#1998942 - Add machine type compatibility update for 6.1 rebase [aarch64] Patch25: kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch +# For bz#1996609 - Qemu hit core dump when dump guest memory during live migration +Patch26: kvm-migration-Make-migration-blocker-work-for-snapshots-.patch +# For bz#1996609 - Qemu hit core dump when dump guest memory during live migration +Patch27: kvm-migration-Add-migrate_add_blocker_internal.patch +# For bz#1996609 - Qemu hit core dump when dump guest memory during live migration +Patch28: kvm-dump-guest-memory-Block-live-migration.patch +Patch29: kvm-Fix-for-ppc64le-build.patch # Source-git patches @@ -686,6 +693,7 @@ run_configure \ %if %{have_usbredir} --enable-usb-redir \ %endif + --enable-vdi \ --enable-virtiofsd \ --enable-vhost-kernel \ --enable-vhost-net \ @@ -1185,6 +1193,19 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Nov 19 2021 Miroslav Rezanina - 6.1.0-7 +- kvm-migration-Make-migration-blocker-work-for-snapshots-.patch [bz#1996609] +- kvm-migration-Add-migrate_add_blocker_internal.patch [bz#1996609] +- kvm-dump-guest-memory-Block-live-migration.patch [bz#1996609] +- kvm-spec-Build-the-VDI-block-driver.patch [bz#2013331] +- kvm-spec-Explicitly-include-compress-filter.patch [bz#1980035] +- Resolves: bz#1996609 + (Qemu hit core dump when dump guest memory during live migration) +- Resolves: bz#2013331 + (RFE: qemu-img cannot convert from vdi format) +- Resolves: bz#1980035 + (RFE: Enable compress filter so we can create new, compressed qcow2 files via qemu-nbd) + * Mon Oct 18 2021 Miroslav Rezanina - 6.1.0-6 - kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch [bz#1998942] - Resolves: bz#1998942 From 295f9d9b79ba1138fa89c66a00ca46a0a1a50853 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 24 Nov 2021 00:20:05 -0500 Subject: [PATCH 142/195] * Wed Nov 24 2021 Miroslav Rezanina - 6.1.0-8 - kvm-Move-ksmtuned-files-to-separate-package.patch [bz#1971678] - Resolves: bz#1971678 (Split out ksmtuned package from qemu-kvm) --- ksm.service | 13 ----- ksm.sysconfig | 4 -- ksmctl.c | 77 -------------------------- ksmtuned | 139 ----------------------------------------------- ksmtuned.conf | 21 ------- ksmtuned.service | 12 ---- qemu-kvm.spec | 38 ++----------- 7 files changed, 6 insertions(+), 298 deletions(-) delete mode 100644 ksm.service delete mode 100644 ksm.sysconfig delete mode 100644 ksmctl.c delete mode 100644 ksmtuned delete mode 100644 ksmtuned.conf delete mode 100644 ksmtuned.service diff --git a/ksm.service b/ksm.service deleted file mode 100644 index 35c6f1d..0000000 --- a/ksm.service +++ /dev/null @@ -1,13 +0,0 @@ -[Unit] -Description=Kernel Samepage Merging -ConditionPathExists=/sys/kernel/mm/ksm - -[Service] -Type=oneshot -RemainAfterExit=yes -EnvironmentFile=-/etc/sysconfig/ksm -ExecStart=/usr/libexec/ksmctl start -ExecStop=/usr/libexec/ksmctl stop - -[Install] -WantedBy=multi-user.target diff --git a/ksm.sysconfig b/ksm.sysconfig deleted file mode 100644 index d99656d..0000000 --- a/ksm.sysconfig +++ /dev/null @@ -1,4 +0,0 @@ -# The maximum number of unswappable kernel pages -# which may be allocated by ksm (0 for unlimited) -# If unset, defaults to half of total memory -# KSM_MAX_KERNEL_PAGES= diff --git a/ksmctl.c b/ksmctl.c deleted file mode 100644 index af39591..0000000 --- a/ksmctl.c +++ /dev/null @@ -1,77 +0,0 @@ -/* Start/stop KSM, for systemd. - * Copyright (C) 2009, 2011 Red Hat, Inc. - * Written by Paolo Bonzini . - * Based on the original sysvinit script by Dan Kenigsberg - * This file is distributed under the GNU General Public License, version 2 - * or later. */ - -#include -#include -#include -#include -#include -#include - -#define KSM_MAX_KERNEL_PAGES_FILE "/sys/kernel/mm/ksm/max_kernel_pages" -#define KSM_RUN_FILE "/sys/kernel/mm/ksm/run" - -char *program_name; - -int usage(void) -{ - fprintf(stderr, "Usage: %s {start|stop}\n", program_name); - return 1; -} - -int write_value(uint64_t value, char *filename) -{ - FILE *fp; - if (!(fp = fopen(filename, "w")) || - fprintf(fp, "%llu\n", (unsigned long long) value) == EOF || - fflush(fp) == EOF || - fclose(fp) == EOF) - return 1; - - return 0; -} - -uint64_t ksm_max_kernel_pages() -{ - char *var = getenv("KSM_MAX_KERNEL_PAGES"); - char *endptr; - uint64_t value; - if (var && *var) { - value = strtoll(var, &endptr, 0); - if (value < LLONG_MAX && !*endptr) - return value; - } - /* Unless KSM_MAX_KERNEL_PAGES is set, let KSM munch up to half of - * total memory. */ - return sysconf(_SC_PHYS_PAGES) / 2; -} - -int start(void) -{ - if (access(KSM_MAX_KERNEL_PAGES_FILE, R_OK) >= 0) - write_value(ksm_max_kernel_pages(), KSM_MAX_KERNEL_PAGES_FILE); - return write_value(1, KSM_RUN_FILE); -} - -int stop(void) -{ - return write_value(0, KSM_RUN_FILE); -} - -int main(int argc, char **argv) -{ - program_name = argv[0]; - if (argc < 2) { - return usage(); - } else if (!strcmp(argv[1], "start")) { - return start(); - } else if (!strcmp(argv[1], "stop")) { - return stop(); - } else { - return usage(); - } -} diff --git a/ksmtuned b/ksmtuned deleted file mode 100644 index 7bc5743..0000000 --- a/ksmtuned +++ /dev/null @@ -1,139 +0,0 @@ -#!/bin/bash -# -# Copyright 2009 Red Hat, Inc. and/or its affiliates. -# Released under the GPL -# -# Author: Dan Kenigsberg -# -# ksmtuned - a simple script that controls whether (and with what vigor) ksm -# should search for duplicated pages. -# -# starts ksm when memory commited to qemu processes exceeds a threshold, and -# make ksm work harder and harder untill memory load falls below that -# threshold. -# -# send SIGUSR1 to this process right after a new qemu process is started, or -# following its death, to retune ksm accordingly -# -# needs testing and ironing. contact danken@redhat.com if something breaks. - -if [ -f /etc/ksmtuned.conf ]; then - . /etc/ksmtuned.conf -fi - -debug() { - if [ -n "$DEBUG" ]; then - s="`/bin/date`: $*" - [ -n "$LOGFILE" ] && echo "$s" >> "$LOGFILE" || echo "$s" - fi -} - - -KSM_MONITOR_INTERVAL=${KSM_MONITOR_INTERVAL:-60} -KSM_NPAGES_BOOST=${KSM_NPAGES_BOOST:-300} -KSM_NPAGES_DECAY=${KSM_NPAGES_DECAY:--50} - -KSM_NPAGES_MIN=${KSM_NPAGES_MIN:-64} -KSM_NPAGES_MAX=${KSM_NPAGES_MAX:-1250} -# millisecond sleep between ksm scans for 16Gb server. Smaller servers sleep -# more, bigger sleep less. -KSM_SLEEP_MSEC=${KSM_SLEEP_MSEC:-10} - -KSM_THRES_COEF=${KSM_THRES_COEF:-20} -KSM_THRES_CONST=${KSM_THRES_CONST:-2048} - -total=`awk '/^MemTotal:/ {print $2}' /proc/meminfo` -debug total $total - -npages=0 -sleep=$[KSM_SLEEP_MSEC * 16 * 1024 * 1024 / total] -[ $sleep -le 10 ] && sleep=10 -debug sleep $sleep -thres=$[total * KSM_THRES_COEF / 100] -if [ $KSM_THRES_CONST -gt $thres ]; then - thres=$KSM_THRES_CONST -fi -debug thres $thres - -KSMCTL () { - case x$1 in - xstop) - echo 0 > /sys/kernel/mm/ksm/run - ;; - xstart) - echo $2 > /sys/kernel/mm/ksm/pages_to_scan - echo $3 > /sys/kernel/mm/ksm/sleep_millisecs - echo 1 > /sys/kernel/mm/ksm/run - ;; - esac -} - -committed_memory () { - # calculate how much memory is committed to running qemu processes - local pidlist - pidlist=$(pgrep -d ' ' -- '^qemu(-(kvm|system-.+)|:.{1,11})$') - if [ -n "$pidlist" ]; then - ps -p "$pidlist" -o rsz= - fi | awk '{ sum += $1 }; END { print 0+sum }' -} - -free_memory () { - awk '/^(MemFree|Buffers|Cached):/ {free += $2}; END {print free}' \ - /proc/meminfo -} - -increase_npages() { - local delta - delta=${1:-0} - npages=$[npages + delta] - if [ $npages -lt $KSM_NPAGES_MIN ]; then - npages=$KSM_NPAGES_MIN - elif [ $npages -gt $KSM_NPAGES_MAX ]; then - npages=$KSM_NPAGES_MAX - fi - echo $npages -} - - -adjust () { - local free committed - free=`free_memory` - committed=`committed_memory` - debug committed $committed free $free - if [ $[committed + thres] -lt $total -a $free -gt $thres ]; then - KSMCTL stop - debug "$[committed + thres] < $total and free > $thres, stop ksm" - return 1 - fi - debug "$[committed + thres] > $total, start ksm" - if [ $free -lt $thres ]; then - npages=`increase_npages $KSM_NPAGES_BOOST` - debug "$free < $thres, boost" - else - npages=`increase_npages $KSM_NPAGES_DECAY` - debug "$free > $thres, decay" - fi - KSMCTL start $npages $sleep - debug "KSMCTL start $npages $sleep" - return 0 -} - -function nothing () { - : -} - -loop () { - trap nothing SIGUSR1 - while true - do - sleep $KSM_MONITOR_INTERVAL & - wait $! - adjust - done -} - -PIDFILE=${PIDFILE-/var/run/ksmtune.pid} -if touch "$PIDFILE"; then - loop & - echo $! > "$PIDFILE" -fi diff --git a/ksmtuned.conf b/ksmtuned.conf deleted file mode 100644 index fc4518c..0000000 --- a/ksmtuned.conf +++ /dev/null @@ -1,21 +0,0 @@ -# Configuration file for ksmtuned. - -# How long ksmtuned should sleep between tuning adjustments -# KSM_MONITOR_INTERVAL=60 - -# Millisecond sleep between ksm scans for 16Gb server. -# Smaller servers sleep more, bigger sleep less. -# KSM_SLEEP_MSEC=10 - -# KSM_NPAGES_BOOST=300 -# KSM_NPAGES_DECAY=-50 -# KSM_NPAGES_MIN=64 -# KSM_NPAGES_MAX=1250 - -# KSM_THRES_COEF=20 -# KSM_THRES_CONST=2048 - -# uncomment the following if you want ksmtuned debug info - -# LOGFILE=/var/log/ksmtuned -# DEBUG=1 diff --git a/ksmtuned.service b/ksmtuned.service deleted file mode 100644 index 39febcc..0000000 --- a/ksmtuned.service +++ /dev/null @@ -1,12 +0,0 @@ -[Unit] -Description=Kernel Samepage Merging (KSM) Tuning Daemon -After=ksm.service -Requires=ksm.service - -[Service] -ExecStart=/usr/sbin/ksmtuned -ExecReload=/bin/kill -USR1 $MAINPID -Type=forking - -[Install] -WantedBy=multi-user.target diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 5f3f3d0..989bc02 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -133,7 +133,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.1.0 -Release: 7%{?rcrel}%{?dist}%{?cc_suffix} +Release: 8%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -145,13 +145,6 @@ ExclusiveArch: x86_64 %{power64} aarch64 s390x Source0: http://wiki.qemu.org/download/qemu-%{version}%{?rcstr}.tar.xz -# KSM control scripts -Source4: ksm.service -Source5: ksm.sysconfig -Source6: ksmctl.c -Source7: ksmtuned.service -Source8: ksmtuned -Source9: ksmtuned.conf Source10: qemu-guest-agent.service Source11: 99-qemu-guest-agent.rules Source12: bridge.conf @@ -753,7 +746,6 @@ cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ %endif -%{__cc} %{_sourcedir}/ksmctl.c %{optflags} -pie %{?build_ldflags} -o ksmctl popd # endif !tools_only %endif @@ -797,13 +789,7 @@ popd %endif %if !%{tools_only} -install -D -p -m 0644 %{_sourcedir}/ksm.service %{buildroot}%{_unitdir}/ksm.service -install -D -p -m 0644 %{_sourcedir}/ksm.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/ksm -install -D -p -m 0755 %{qemu_kvm_build}/ksmctl %{buildroot}%{_libexecdir}/ksmctl -install -D -p -m 0644 %{_sourcedir}/ksmtuned.service %{buildroot}%{_unitdir}/ksmtuned.service -install -D -p -m 0755 %{_sourcedir}/ksmtuned %{buildroot}%{_sbindir}/ksmtuned -install -D -p -m 0644 %{_sourcedir}/ksmtuned.conf %{buildroot}%{_sysconfdir}/ksmtuned.conf install -D -p -m 0644 %{_sourcedir}/vhost.conf %{buildroot}%{_sysconfdir}/modprobe.d/vhost.conf install -D -p -m 0644 %{modprobe_kvm_conf} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf @@ -1021,17 +1007,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ modprobe -b kvm &> /dev/null || : fi %endif - -%systemd_post ksm.service -%systemd_post ksmtuned.service - -%preun common -%systemd_preun ksm.service -%systemd_preun ksmtuned.service - -%postun common -%systemd_postun_with_restart ksm.service -%systemd_postun_with_restart ksmtuned.service # endif !tools_only %endif @@ -1105,13 +1080,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_mandir}/man7/qemu-block-drivers.7* %attr(4755, -, -) %{_libexecdir}/qemu-bridge-helper %config(noreplace) %{_sysconfdir}/sasl2/%{name}.conf -%{_unitdir}/ksm.service -%{_libexecdir}/ksmctl -%config(noreplace) %{_sysconfdir}/sysconfig/ksm -%{_unitdir}/ksmtuned.service -%{_sbindir}/ksmtuned %ghost %{_sysconfdir}/kvm -%config(noreplace) %{_sysconfdir}/ksmtuned.conf %dir %{_sysconfdir}/%{name} %config(noreplace) %{_sysconfdir}/%{name}/bridge.conf %config(noreplace) %{_sysconfdir}/modprobe.d/vhost.conf @@ -1193,6 +1162,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Wed Nov 24 2021 Miroslav Rezanina - 6.1.0-8 +- kvm-Move-ksmtuned-files-to-separate-package.patch [bz#1971678] +- Resolves: bz#1971678 + (Split out ksmtuned package from qemu-kvm) + * Fri Nov 19 2021 Miroslav Rezanina - 6.1.0-7 - kvm-migration-Make-migration-blocker-work-for-snapshots-.patch [bz#1996609] - kvm-migration-Add-migrate_add_blocker_internal.patch [bz#1996609] From 037e637fa30d6650f9c990c860a070aa9c15edb4 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 16 Dec 2021 02:06:10 -0500 Subject: [PATCH 143/195] * Thu Dec 16 2021 Miroslav Rezanina - 6.2.0-1 - Rebase to QEMU 6.2.0 [bz#2027697] - Resolves: bz#2027697 (Rebase to QEMU 6.2.0) --- .gitignore | 1 + ...-t-use-g_autoptr-just-to-free-a-vari.patch | 49 +++++++ ...d.patch => 0005-Initial-redhat-build.patch | 105 +++++++------- ...0006-Enable-disable-devices-for-RHEL.patch | 83 +++++------ ...Machine-type-related-general-changes.patch | 136 +++++++++--------- ...ch => 0008-Add-aarch64-machine-types.patch | 49 +++---- ...atch => 0009-Add-ppc64-machine-types.patch | 105 ++++++-------- ...atch => 0010-Add-s390x-machine-types.patch | 18 ++- ...tch => 0011-Add-x86_64-machine-types.patch | 46 +++--- ...heck.patch => 0012-Enable-make-check.patch | 57 ++++---- ...mber-of-devices-that-can-be-assigned.patch | 8 +- ...Add-support-statement-to-help-output.patch | 8 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 6 +- ...documentation-instead-of-qemu-system.patch | 14 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 4 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 2 +- ...on-warning-when-opening-v2-images-rw.patch | 4 +- 0020-Fix-virtio-net-pci-vectors-compat.patch | 46 ++++++ ...machine-types-Add-pc_rhel_8_5_compat.patch | 75 ++++++++++ ...-types-Wire-compat-into-q35-and-i440.patch | 55 +++++++ ...8.5.0-Update-machine-type-compatibil.patch | 50 +++++++ ...-machine-type-compatibility-handling.patch | 58 ++++++++ ..._compat_rhel_8_5-with-6.2.0-RC2-chan.patch | 31 ++++ kvm-Fix-for-ppc64le-build.patch | 73 ---------- kvm-disable-sga-device.patch | 38 ----- ...mp-guest-memory-Block-live-migration.patch | 93 ------------ ...hw_compat_rhel_8_5-to-8.5-machine-ty.patch | 38 ----- kvm-hw-arm-virt-Remove-9.0-machine-type.patch | 53 ------- ...ion-Add-migrate_add_blocker_internal.patch | 100 ------------- ...igration-blocker-work-for-snapshots-.patch | 61 -------- ...-machine-type-compatibility-update-f.patch | 45 ------ kvm-redhat-Define-hw_compat_rhel_8_5.patch | 74 ---------- ...Add-fstatfs64-syscall-to-the-seccomp.patch | 51 ------- ...loon-Fix-page-poison-subsection-name.patch | 61 -------- qemu-kvm.spec | 108 +++++++------- sources | 2 +- 36 files changed, 731 insertions(+), 1076 deletions(-) create mode 100644 0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch rename 0004-Initial-redhat-build.patch => 0005-Initial-redhat-build.patch (82%) rename 0005-Enable-disable-devices-for-RHEL.patch => 0006-Enable-disable-devices-for-RHEL.patch (93%) rename 0006-Machine-type-related-general-changes.patch => 0007-Machine-type-related-general-changes.patch (89%) rename 0007-Add-aarch64-machine-types.patch => 0008-Add-aarch64-machine-types.patch (91%) rename 0008-Add-ppc64-machine-types.patch => 0009-Add-ppc64-machine-types.patch (88%) rename 0009-Add-s390x-machine-types.patch => 0010-Add-s390x-machine-types.patch (87%) rename 0010-Add-x86_64-machine-types.patch => 0011-Add-x86_64-machine-types.patch (95%) rename 0011-Enable-make-check.patch => 0012-Enable-make-check.patch (89%) rename 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch (93%) rename 0013-Add-support-statement-to-help-output.patch => 0014-Add-support-statement-to-help-output.patch (88%) rename 0014-globally-limit-the-maximum-number-of-CPUs.patch => 0015-globally-limit-the-maximum-number-of-CPUs.patch (89%) rename 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch => 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch (92%) rename 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch => 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch (96%) rename 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch => 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch (97%) rename 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch => 0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch (96%) create mode 100644 0020-Fix-virtio-net-pci-vectors-compat.patch create mode 100644 0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch create mode 100644 0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch create mode 100644 0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch create mode 100644 0024-redhat-Add-s390x-machine-type-compatibility-handling.patch create mode 100644 0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch delete mode 100644 kvm-Fix-for-ppc64le-build.patch delete mode 100644 kvm-disable-sga-device.patch delete mode 100644 kvm-dump-guest-memory-Block-live-migration.patch delete mode 100644 kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch delete mode 100644 kvm-hw-arm-virt-Remove-9.0-machine-type.patch delete mode 100644 kvm-migration-Add-migrate_add_blocker_internal.patch delete mode 100644 kvm-migration-Make-migration-blocker-work-for-snapshots-.patch delete mode 100644 kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch delete mode 100644 kvm-redhat-Define-hw_compat_rhel_8_5.patch delete mode 100644 kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch delete mode 100644 kvm-virtio-balloon-Fix-page-poison-subsection-name.patch diff --git a/.gitignore b/.gitignore index 13f0595..c8fe0b4 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ /patches.* /*.orig /qemu-6.1.0.tar.xz +/qemu-6.2.0.tar.xz diff --git a/0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch b/0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch new file mode 100644 index 0000000..5dcba33 --- /dev/null +++ b/0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch @@ -0,0 +1,49 @@ +From cc2f3e2ce9e2a9ab9e52e8f44bee4876e69843da Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Wed, 17 Nov 2021 09:51:46 -0500 +Subject: ui/clipboard: Don't use g_autoptr just to free a variable + +Clang doesn't recognize that the variable is being "used" and will emit +a warning: + + ../ui/clipboard.c:47:34: error: variable 'old' set but not used [-Werror,-Wunused-but-set-variable] + g_autoptr(QemuClipboardInfo) old = NULL; + ^ + 1 error generated. + +OK, fine. Just do things the old way. + +Signed-off-by: John Snow +Signed-off-by: Miroslav Rezanina + +--- + +This is temporary commit from upstream submission necessary for build to pass. +We expect proper fix included upstream later. +--- + ui/clipboard.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index d7b008d62a..d53576b0f6 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -44,13 +44,14 @@ void qemu_clipboard_peer_release(QemuClipboardPeer *peer, + + void qemu_clipboard_update(QemuClipboardInfo *info) + { +- g_autoptr(QemuClipboardInfo) old = NULL; ++ QemuClipboardInfo *old = NULL; + assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT); + + notifier_list_notify(&clipboard_notifiers, info); + + old = cbinfo[info->selection]; + cbinfo[info->selection] = qemu_clipboard_info_ref(info); ++ g_free(old); + } + + QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection) +-- +2.27.0 + diff --git a/0004-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch similarity index 82% rename from 0004-Initial-redhat-build.patch rename to 0005-Initial-redhat-build.patch index 94653b6..3ff2dce 100644 --- a/0004-Initial-redhat-build.patch +++ b/0005-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 4a0a5d5019938a8b4b5526d33e1bf3d7dcfc56a5 Mon Sep 17 00:00:00 2001 +From 3308eb892f03c7169f712fe88e74dacd6f05b1fe Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,36 +13,35 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-5.2.0-13.el9 +This rebase is based on qemu-kvm-6.1.0-8.el9 Signed-off-by: Miroslav Rezanina -- -Rebase changes (210526): +Rebase changes (6.1.0): - Move build to .distro - Move changes for support file to related commit - Added dependency for python3-sphinx-rtd_theme - Removed --disable-sheepdog configure option - Added new hw-display modules - -Rebase changes (210623): - SASL initialization moved to ui/vnc-auth-sasl.c - -Rebase changes (210714): - Add accel-qtest- and accel-tcg-x86_64 libraries - -Rebase changes (6.1.0 rc1): - Added hw-usb-host module - -Rebase changes (6.1.0 rc4): - Disable new configure options (bpf, nvmm, slirp-smbd) - -Rebase chagnes (6.1.0): - Use -pie for ksmctl build (annocheck complain fix) +Rebase changes (6.2.0): +- removed --disable-jemalloc and --disable-tcmalloc configure options +- added audio-oss.so +- added fdt requirement for x86_64 +- tests/acceptance renamed to tests/avocado +- added multiboot_dma.bin +- Add -Wno-string-plus-int to extra flags +- Updated configure options + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only -Merged patches (6.1.0 RC1): +Merged patches (6.1.0): - f04f91751f Use cached tarballs - 6581165c65 Remove message with running VM count - 03c3cac9fc spec-file: build qemu-kvm without SPICE and QXL @@ -92,11 +91,7 @@ Merged patches (6.1.0 RC1): - d7ee259a79 spec: %files: don't use fine grained -docs file list - 64cad0c60f spec: %files: Add licenses to qemu-common too - c3de4f080a spec: %install: Drop python3 shebang fixup - -Merged patches (6.1.0 RC2): - 46fc216115 Update local build to work with spec file improvements - -Merged patches (6.1.0 RC3): - bab9531548 spec: Remove buildldflags - c8360ab6a9 spec: Use %make_build macro - f6966c66e9 spec: Drop make install sharedir and datadir usage @@ -106,39 +101,73 @@ Merged patches (6.1.0 RC3): - 44c7dda6c3 spec: use %{build_ldflags} - 0009a34354 Move virtiofsd to separate package - 34d1b200b3 Utilize --firmware configure option - -Merged patches (6.1.0): - 2800e1dd03 spec: Switch toolchain to Clang/LLVM (except process-patches.sh) - e8a70f500f spec: Use safe-stack for x86_64 - e29445d50d spec: Reenable write support for VMDK etc. in tools - a4fe2a3e16 redhat: Disable LTO on non-x86 architectures -fixes +Merged patches (6.2.0): +- 333452440b remove sgabios dependency +- 7d3633f184 enable pulseaudio +- bd898709b0 spec: disable use of gcrypt for crypto backends in favour of gnutls +- e4f0c6dee6 spec: Remove block-curl and block-ssh dependency +- 4dc13bfe63 spec: Build the VDI block driver +- d2f2ff3c74 spec: Explicitly include compress filter +- a7d047f9c2 Move ksmtuned files to separate package + +With rebase new configure options are introducesed. We use two steps +configuration - first we disable all options and then enable supported +options. + +With 6.2.0, following changes are done: +- disabled all audiodev and enable only pa + - not use audio-drv-list anymore + - disabling oss driver removes oss module (added during rebase to 6.2.0) +- disable gettext +- disable l2tpv3 +- enable selinux +- enable spice-protocol + - added needed BuildRequire +- specify used capstone version +- specify used fdt version + +Signed-off-by: Miroslav Rezanina --- .distro/85-kvm.preset | 5 - .distro/Makefile | 100 + - .distro/Makefile.common | 45 + + .distro/Makefile.common | 38 + .distro/README.tests | 39 + + .distro/ksm.service | 13 - + .distro/ksm.sysconfig | 4 - + .distro/ksmctl.c | 77 - + .distro/ksmtuned | 139 - + .distro/ksmtuned.conf | 21 - + .distro/ksmtuned.service | 12 - .distro/kvm-setup | 49 - .distro/kvm-setup.service | 14 - .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 3767 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 3817 +++++++++++++++++++++++ .distro/scripts/extract_build_cmd.py | 12 + .gitignore | 1 + README.systemtap | 43 + - configure | 1 - - meson.build | 6 +- + meson.build | 4 +- scripts/qemu-guest-agent/fsfreeze-hook | 2 +- scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + tests/check-block.sh | 2 + ui/vnc-auth-sasl.c | 2 +- - 19 files changed, 4024 insertions(+), 74 deletions(-) + 24 files changed, 4066 insertions(+), 338 deletions(-) delete mode 100644 .distro/85-kvm.preset create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests + delete mode 100644 .distro/ksm.service + delete mode 100644 .distro/ksm.sysconfig + delete mode 100644 .distro/ksmctl.c + delete mode 100644 .distro/ksmtuned + delete mode 100644 .distro/ksmtuned.conf + delete mode 100644 .distro/ksmtuned.service delete mode 100644 .distro/kvm-setup delete mode 100644 .distro/kvm-setup.service create mode 100644 .distro/modules-load.conf @@ -196,31 +225,11 @@ index 0000000000..ad913fc990 + +3. Translate the trace record to readable format. + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log -diff --git a/configure b/configure -index 9a79a004d7..b0dbdded06 100755 ---- a/configure -+++ b/configure -@@ -5189,7 +5189,6 @@ if test "$skip_meson" = no; then - -Dwerror=$(if test "$werror" = yes; then echo true; else echo false; fi) \ - -Dstrip=$(if test "$strip_opt" = yes; then echo true; else echo false; fi) \ - -Db_pie=$(if test "$pie" = yes; then echo true; else echo false; fi) \ -- ${staticpic:+-Db_staticpic=$staticpic} \ - -Db_coverage=$(if test "$gcov" = yes; then echo true; else echo false; fi) \ - -Db_lto=$lto -Dcfi=$cfi -Dcfi_debug=$cfi_debug \ - -Dmalloc=$malloc -Dmalloc_trim=$malloc_trim -Dsparse=$sparse \ diff --git a/meson.build b/meson.build -index b3e7ec0e92..17707c8748 100644 +index 96de1a6ef9..5f6ba86dbb 100644 --- a/meson.build +++ b/meson.build -@@ -1,6 +1,6 @@ - project('qemu', ['c'], meson_version: '>=0.55.0', - default_options: ['warning_level=1', 'c_std=gnu11', 'cpp_std=gnu++11', 'b_colorout=auto'] + -- (meson.version().version_compare('>=0.56.0') ? [ 'b_staticpic=false' ] : []), -+ [ 'b_staticpic=false' ], - version: run_command('head', meson.source_root() / 'VERSION').stdout().strip()) - - not_found = dependency('', required: false) -@@ -1790,7 +1790,9 @@ if capstone_opt == 'internal' +@@ -2108,7 +2108,9 @@ if capstone_opt == 'internal' # Include all configuration defines via a header file, which will wind up # as a dependency on the object file, and thus changes here will result # in a rebuild. diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch similarity index 93% rename from 0005-Enable-disable-devices-for-RHEL.patch rename to 0006-Enable-disable-devices-for-RHEL.patch index 6ef5467..345bd20 100644 --- a/0005-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 0818ec19b1626de85d061b240e0c369a6fb524fb Mon Sep 17 00:00:00 2001 +From af4c83ed637bfda003ae86133413d53cefda3654 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 15 Jul 2021 03:22:36 -0400 Subject: Enable/disable devices for RHEL @@ -7,29 +7,30 @@ This commit adds all changes related to changes in supported devices. Signed-off-by: Miroslav Rezanina -- -Rebase notes (210623): +Rebase notes (6.1.0): - Added CONFIG_TPM (except s390x) - -Rebase notes (210714): - default-configs moved to configs - -Rebaes notes (6.1.0 RC2): - Use --with-device- configure option to use rhel configs -Merged patches (210526): -- c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak +Rebase notes (6.2.0 RC0): +- Add CONFIG_ISA_FDC -Merged patches (6.1.0 RC1): +Rebase notes (6.2.0 RC3): +- Do not remove -no-hpet documentation + +Merged patches (6.1.0): +- c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI - f2fe835153 aarch64-rh-devices: add CONFIG_PXB - b5431733ad disable CONFIG_USB_STORAGE_BOT - 478ba0cdf6 Disable TPM passthrough - -Merged patches (6.1.0): - 2504d68a7c aarch64: Add USB storage devices - 51c2a3253c disable ac97 audio + +Merged patches (6.2.0 RC0): +- 9f2f9fa2ba disable sga device --- - .distro/qemu-kvm.spec.template | 12 ++- + .distro/qemu-kvm.spec.template | 9 +- .../aarch64-softmmu/aarch64-rh-devices.mak | 33 ++++++ .../ppc64-softmmu/ppc64-rh-devices.mak | 35 ++++++ configs/devices/rh-virtio.mak | 10 ++ @@ -48,12 +49,11 @@ Merged patches (6.1.0): hw/ppc/spapr_cpu_core.c | 2 + hw/timer/hpet.c | 8 ++ hw/usb/meson.build | 2 +- - qemu-options.hx | 4 - target/arm/cpu_tcg.c | 10 ++ target/ppc/cpu-models.c | 10 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 24 files changed, 285 insertions(+), 15 deletions(-) + 23 files changed, 283 insertions(+), 10 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -180,7 +180,7 @@ index 0000000000..d3b38312e1 +CONFIG_WDT_DIAG288=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..24b96ba0c4 +index 0000000000..1f7a9ab024 --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak @@ -0,0 +1,102 @@ @@ -206,6 +206,7 @@ index 0000000000..24b96ba0c4 +CONFIG_EDU=y +CONFIG_FDC=y +CONFIG_FDC_SYSBUS=y ++CONFIG_FDC_ISA=y +CONFIG_FW_CFG_DMA=y +CONFIG_HDA=y +CONFIG_HYPERV=y @@ -253,7 +254,6 @@ index 0000000000..24b96ba0c4 +CONFIG_SERIAL_ISA=y +CONFIG_SERIAL_PCI=y +CONFIG_SEV=y -+CONFIG_SGA=y +CONFIG_SMBIOS=y +CONFIG_SMBUS_EEPROM=y +CONFIG_TEST_DEVICES=y @@ -297,10 +297,10 @@ index 0000000000..2cd20f54d2 +# We need "hpet" +CONFIG_HPET=y diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index 778e27b659..802501881f 100644 +index ebe08ed831..381ef2ddcf 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -424,8 +424,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) +@@ -438,8 +438,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; pm->acpi_memory_hotplug.is_enabled = true; pm->cpu_hotplug_legacy = true; @@ -310,7 +310,7 @@ index 778e27b659..802501881f 100644 + pm->disable_s4 = 1; pm->s4_val = 2; pm->use_acpi_hotplug_bridge = true; - + pm->keep_pci_slot_hpc = true; diff --git a/hw/arm/meson.build b/hw/arm/meson.build index 721a8eb8be..87ed4dd914 100644 --- a/hw/arm/meson.build @@ -325,7 +325,7 @@ index 721a8eb8be..87ed4dd914 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 9014cd30b3..46cee8c544 100644 +index 21d18ac2e3..97fa6de423 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -48,6 +48,8 @@ @@ -337,7 +337,7 @@ index 9014cd30b3..46cee8c544 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2320,6 +2322,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) +@@ -2337,6 +2339,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) FDrive *drive; static int command_tables_inited = 0; @@ -408,7 +408,7 @@ index fdca6ca659..fa1a7eee51 100644 Also accept 8 MB/16 MB for backward compatibility. */ if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index d3e738320b..7c77312463 100644 +index ce89fd0aa3..fbcf802b13 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) @@ -444,10 +444,10 @@ index baba62f357..bc360347ea 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index a30546c5d5..c2877978d9 100644 +index f5bc81296d..282d01e374 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1814,6 +1814,7 @@ static const E1000Info e1000_devices[] = { +@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -455,7 +455,7 @@ index a30546c5d5..c2877978d9 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1826,6 +1827,7 @@ static const E1000Info e1000_devices[] = { +@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -464,7 +464,7 @@ index a30546c5d5..c2877978d9 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 4f316a6f9d..64178f0f9a 100644 +index 58e7341cb7..8ba34f6a1d 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -370,10 +370,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { @@ -512,23 +512,8 @@ index de853d780d..0776ae6a20 100644 hw_usb_modules += {'smartcard': usbsmartcard_ss} endif -diff --git a/qemu-options.hx b/qemu-options.hx -index 83aa59a920..ac596e01a1 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -2412,10 +2412,6 @@ ERST - - DEF("no-hpet", 0, QEMU_OPTION_no_hpet, - "-no-hpet disable HPET\n", QEMU_ARCH_I386) --SRST --``-no-hpet`` -- Disable HPET support. --ERST - - DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, - "-acpitable [sig=str][,rev=n][,oem_id=str][,oem_table_id=str][,oem_rev=n][,asl_compiler_id=str][,asl_compiler_rev=n][,{data|file}=file1[:file2]...]\n" diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index ed444bf436..fafd019539 100644 +index 13d0e9b195..3826fa5122 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c @@ -22,6 +22,7 @@ @@ -536,7 +521,7 @@ index ed444bf436..fafd019539 100644 #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - #ifdef CONFIG_TCG + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { @@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) @@ -606,7 +591,7 @@ index ed444bf436..fafd019539 100644 { .name = "max", .initfn = arm_max_initfn }, #endif diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 87e4228614..6eaa65efff 100644 +index 4baa111713..d779c4d1d5 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -636,7 +621,7 @@ index 87e4228614..6eaa65efff 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -782,6 +786,7 @@ +@@ -784,6 +788,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -644,7 +629,7 @@ index 87e4228614..6eaa65efff 100644 { "403", "403gc" }, { "405", "405d4" }, { "405cr", "405crc" }, -@@ -940,12 +945,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -942,12 +947,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -660,15 +645,15 @@ index 87e4228614..6eaa65efff 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -955,6 +963,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { - { "power10", "power10_v1.0" }, +@@ -957,6 +965,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power10", "power10_v2.0" }, #endif +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* Generic PowerPCs */ #if defined(TARGET_PPC64) { "ppc64", "970fx_v3.1" }, -@@ -962,5 +971,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -964,5 +973,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "ppc32", "604" }, { "ppc", "604" }, { "default", "604" }, diff --git a/0006-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch similarity index 89% rename from 0006-Machine-type-related-general-changes.patch rename to 0007-Machine-type-related-general-changes.patch index 3c5c4df..9baf215 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From fd2a04cf2221d8c541a70a66021c12a9b9c93aaa Mon Sep 17 00:00:00 2001 +From ef9b78c3f2810541eac453a3f8a8753763b1378d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -8,51 +8,60 @@ split to allow easier review. It contains changes not related to any architecture. Signed-off-by: Miroslav Rezanina +-- +Rebase notes (6.2.0 RC0): +- Do not duplicate minimal_version_id for piix4_pm +- Remove empty line chunks in serial.c +- Remove migration.h include in serial.c -Merged patches (6.1.0 RC1): -- f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 -- 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 +Rebase notes (6.2.0 RC1): +- Update hw_compat_rhel_8_5 (from MR 66) Merged patches (6.1.0): +- f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 +- 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 - a3995e2eff Remove RHEL 7.0.0 machine type (only generic changes) - ad3190a79b Remove RHEL 7.1.0 machine type (only generic changes) - 84bbe15d4e Remove RHEL 7.2.0 machine type (only generic changes) - 0215eb3356 Remove RHEL 7.3.0 machine types (only generic changes) - af69d1ca6e Remove RHEL 7.4.0 machine types (only generic changes) - 8f7a74ab78 Remove RHEL 7.5.0 machine types (only generic changes) + +Merged patches (6.2.0 RC0): +- d687ac13d2 redhat: Define hw_compat_rhel_8_5 --- - hw/acpi/piix4.c | 5 +- + hw/acpi/piix4.c | 6 +- hw/arm/virt.c | 2 +- - hw/char/serial.c | 4 + - hw/core/machine.c | 159 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 180 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + hw/net/rtl8139.c | 4 +- hw/rtc/mc146818rtc.c | 2 + - hw/smbios/smbios.c | 46 +++++++++- + hw/smbios/smbios.c | 46 ++++++++- hw/timer/i8254_common.c | 2 +- hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci-pci.c | 59 ++++++++++--- + hw/usb/hcd-xhci-pci.c | 59 +++++++++--- hw/usb/hcd-xhci-pci.h | 1 + - include/hw/boards.h | 18 ++++ + include/hw/boards.h | 21 ++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 17 files changed, 295 insertions(+), 25 deletions(-) + 16 files changed, 315 insertions(+), 26 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 48f7a1edbc..af21cb4ac3 100644 +index f0b5fac44a..8d6011c0a3 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -279,6 +279,7 @@ static const VMStateDescription vmstate_acpi = { +@@ -278,7 +278,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) + static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, - .minimum_version_id = 3, +- .minimum_version_id = 3, + .minimum_version_id = 2, .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -644,8 +645,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -644,8 +644,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -61,13 +70,13 @@ index 48f7a1edbc..af21cb4ac3 100644 + DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), - DEFINE_PROP_BOOL("acpi-pci-hotplug-with-bridge-support", PIIX4PMState, + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 81eda46b0b..cd1a2d985d 100644 +index 30da05dfe0..5de4d9d73b 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1524,7 +1524,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1590,7 +1590,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -76,50 +85,35 @@ index 81eda46b0b..cd1a2d985d 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, -diff --git a/hw/char/serial.c b/hw/char/serial.c -index 7061aacbce..8fa5ab1e8e 100644 ---- a/hw/char/serial.c -+++ b/hw/char/serial.c -@@ -37,6 +37,7 @@ - #include "trace.h" - #include "hw/qdev-properties.h" - #include "hw/qdev-properties-system.h" -+#include "migration/migration.h" - - #define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ - -@@ -770,6 +771,7 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { - static bool serial_fifo_timeout_timer_needed(void *opaque) - { - SerialState *s = (SerialState *)opaque; -+ - return timer_pending(s->fifo_timeout_timer); - } - -@@ -787,6 +789,7 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { - static bool serial_timeout_ipending_needed(void *opaque) - { - SerialState *s = (SerialState *)opaque; -+ - return s->timeout_ipending != 0; - } - -@@ -804,6 +807,7 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { - static bool serial_poll_needed(void *opaque) - { - SerialState *s = (SerialState *)opaque; -+ - return s->poll_msl >= 0; - } - diff --git a/hw/core/machine.c b/hw/core/machine.c -index 54e040587d..d681a06a47 100644 +index 53a99abc56..53a3caf4fb 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -37,6 +37,165 @@ +@@ -37,6 +37,186 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" ++/* ++ * Mostly the same as hw_compat_6_0 and hw_compat_6_1 ++ */ ++GlobalProperty hw_compat_rhel_8_5[] = { ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "gpex-pcihost", "allow-unmapped-accesses", "false" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "i8042", "extended-state", "false"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "nvme-ns", "eui64-default", "off"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000e", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "vhost-user-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); ++ +/* + * Mostly the same as hw_compat_5_2 + */ @@ -279,9 +273,9 @@ index 54e040587d..d681a06a47 100644 +}; +const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + - GlobalProperty hw_compat_6_0[] = { - { "gpex-pcihost", "allow-unmapped-accesses", "false" }, - { "i8042", "extended-state", "false"}, + GlobalProperty hw_compat_6_1[] = { + { "vhost-user-vsock-device", "seqpacket", "off" }, + { "nvme-ns", "shared", "off" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 90851e730b..a91c5d7467 100644 --- a/hw/display/vga-isa.c @@ -296,7 +290,7 @@ index 90851e730b..a91c5d7467 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 30b8bd6ea9..eebb4f3141 100644 +index 223dd3e05d..dda3f64f19 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, @@ -309,10 +303,10 @@ index 30b8bd6ea9..eebb4f3141 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 04b4a4788d..e7724fd02c 100644 +index e1e100316d..235054a643 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -199,6 +199,8 @@ static void pc_q35_init(MachineState *machine) +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -460,10 +454,10 @@ index 050875b497..32935da46c 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 0cb02a6432..962a9622e5 100644 +index d1b5657d72..7930b868fa 100644 --- a/hw/usb/hcd-uhci.c +++ b/hw/usb/hcd-uhci.c -@@ -1167,12 +1167,14 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) +@@ -1166,11 +1166,13 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) UHCIState *s = UHCI(dev); uint8_t *pci_conf = s->dev.config; int i; @@ -471,14 +465,13 @@ index 0cb02a6432..962a9622e5 100644 pci_conf[PCI_CLASS_PROG] = 0x00; /* TODO: reset value should be 0. */ - pci_conf[USB_SBRN] = USB_RELEASE_1; // release number - + pci_conf[USB_SBRN] = USB_RELEASE_1; /* release number */ - pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); + irq_pin = u->info.irq_pin; + pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); + s->irq = pci_allocate_irq(dev); if (s->masterbus) { - USBPort *ports[NB_PORTS]; diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c index e934b1a5b1..e18b05e528 100644 --- a/hw/usb/hcd-xhci-pci.c @@ -591,13 +584,16 @@ index c193f79443..086a1feb1e 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index accd6eff35..f5423f351a 100644 +index 9c1c190104..b0a6e05b48 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -416,4 +416,22 @@ extern const size_t hw_compat_2_2_len; +@@ -441,4 +441,25 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_8_5[]; ++extern const size_t hw_compat_rhel_8_5_len; ++ +extern GlobalProperty hw_compat_rhel_8_4[]; +extern const size_t hw_compat_rhel_8_4_len; + @@ -634,7 +630,7 @@ index 5a0dd0c8cf..2cb1ec2bab 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 88dffe7517..17ad7bfc3a 100644 +index 9ab39e428f..7ccc9a1a07 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -107,6 +107,9 @@ struct PCMachineClass { diff --git a/0007-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch similarity index 91% rename from 0007-Add-aarch64-machine-types.patch rename to 0008-Add-aarch64-machine-types.patch index eb79d4f..07beb75 100644 --- a/0007-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 38c87d1469539d2b5224d298db85b0fd43d8bb0c Mon Sep 17 00:00:00 2001 +From 19d36c4519a1a560cce60b212e3afdf7eb026e45 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -7,25 +7,25 @@ Adding changes to add RHEL machine types for aarch64 architecture. Signed-off-by: Miroslav Rezanina --- -Rebase notes (210623): +Rebase notes (6.1.0): - Use CONFIG_TPM check when using TPM structures - -Rebase notes (6.1.0 rc0): - Add support for default_bus_bypass_iommu - -Merged patches (6.1.0 rc1): - ea4c0b32d9 arm/virt: Register highmem and gic-version as class properties - 895e1fa86a hw/arm/virt: Add 8.5 and 9.0 machine types and remove older ones + +Merged patches (6.2.0 RC0): +- 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type +- f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type --- - hw/arm/virt.c | 210 +++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 205 +++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ - 2 files changed, 217 insertions(+), 1 deletion(-) + 2 files changed, 212 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index cd1a2d985d..3c8e6de36d 100644 +index 5de4d9d73b..7d51824263 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -78,6 +78,7 @@ +@@ -79,6 +79,7 @@ #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -33,7 +33,7 @@ index cd1a2d985d..3c8e6de36d 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -104,7 +105,48 @@ +@@ -105,7 +106,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -83,7 +83,7 @@ index cd1a2d985d..3c8e6de36d 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -2117,6 +2159,7 @@ static void machvirt_init(MachineState *machine) +@@ -2180,6 +2222,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -91,7 +91,7 @@ index cd1a2d985d..3c8e6de36d 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2144,6 +2187,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2207,6 +2250,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -99,7 +99,7 @@ index cd1a2d985d..3c8e6de36d 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2241,6 +2285,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, +@@ -2304,6 +2348,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, visit_type_OnOffAuto(v, name, &vms->acpi, errp); } @@ -107,7 +107,7 @@ index cd1a2d985d..3c8e6de36d 100644 static bool virt_get_ras(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2268,6 +2313,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2331,6 +2376,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -115,7 +115,7 @@ index cd1a2d985d..3c8e6de36d 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2603,6 +2649,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2666,6 +2712,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -123,7 +123,7 @@ index cd1a2d985d..3c8e6de36d 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -2948,3 +2995,164 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3031,3 +3078,159 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -277,22 +277,17 @@ index cd1a2d985d..3c8e6de36d 100644 +} +type_init(rhel_machine_init); + -+static void rhel900_virt_options(MachineClass *mc) -+{ -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) -+ +static void rhel850_virt_options(MachineClass *mc) +{ -+ rhel900_virt_options(mc); ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); +} -+DEFINE_RHEL_MACHINE(8, 5, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 9661c46699..ac21ca712a 100644 +index dc6b66ffc8..9364628847 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -171,9 +171,17 @@ struct VirtMachineState { +@@ -175,9 +175,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) diff --git a/0008-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch similarity index 88% rename from 0008-Add-ppc64-machine-types.patch rename to 0009-Add-ppc64-machine-types.patch index 932a2a8..90a6ff4 100644 --- a/0008-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 01fe9632c4dcc7a6ce9c0100db6256e815e6bf87 Mon Sep 17 00:00:00 2001 +From 2d595bc1744fc764ef506fd6ed6555f267d01ea4 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -7,10 +7,12 @@ Adding changes to add RHEL machine types for ppc64 architecture. Signed-off-by: Miroslav Rezanina -Merged patches (6.1.0 RC1): -- c438c25ac3 redhat: Define pseries-rhel8.5.0 machine type +Rebase notes (6.2.0 rc1): +- Fixed rebase conflict relicts +- Update machine type compat for 6.2 (from MR 66) Merged patches (6.1.0): +- c438c25ac3 redhat: Define pseries-rhel8.5.0 machine type - a3995e2eff Remove RHEL 7.0.0 machine type (only ppc64 changes) - ad3190a79b Remove RHEL 7.1.0 machine type (only ppc64 changes) - 84bbe15d4e Remove RHEL 7.2.0 machine type (only ppc64 changes) @@ -18,20 +20,20 @@ Merged patches (6.1.0): - af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes) - 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes) --- - hw/ppc/spapr.c | 238 +++++++++++++++++++++++++++++++++++++++- + hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 4 + target/ppc/compat.c | 13 ++- target/ppc/cpu.h | 1 + - target/ppc/kvm.c | 28 +++++ + target/ppc/kvm.c | 27 +++++ target/ppc/kvm_ppc.h | 13 +++ - 7 files changed, 307 insertions(+), 3 deletions(-) + 7 files changed, 313 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 81699d4f8b..1386e45e70 100644 +index 3b5fd749be..f4bb5f15f0 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1592,6 +1592,9 @@ static void spapr_machine_reset(MachineState *machine) +@@ -1593,6 +1593,9 @@ static void spapr_machine_reset(MachineState *machine) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -41,7 +43,7 @@ index 81699d4f8b..1386e45e70 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3315,6 +3318,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3288,6 +3291,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -62,7 +64,7 @@ index 81699d4f8b..1386e45e70 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3393,6 +3410,12 @@ static void spapr_instance_init(Object *obj) +@@ -3366,6 +3383,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -75,7 +77,7 @@ index 81699d4f8b..1386e45e70 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4634,6 +4657,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4614,6 +4637,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -83,15 +85,15 @@ index 81699d4f8b..1386e45e70 100644 } static const TypeInfo spapr_machine_info = { -@@ -4685,6 +4709,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4665,6 +4689,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-6.1 + * pseries-6.2 */ -@@ -4786,6 +4811,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4781,6 +4806,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -99,7 +101,7 @@ index 81699d4f8b..1386e45e70 100644 /* * pseries-4.0 -@@ -4805,6 +4831,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4800,6 +4826,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; return true; } @@ -108,23 +110,7 @@ index 81699d4f8b..1386e45e70 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -4963,6 +4991,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); - /* - * pseries-2.7 - */ -+#endif - - static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, -@@ -5018,6 +5047,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, - return true; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void spapr_machine_2_7_class_options(MachineClass *mc) - { - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5132,10 +5162,214 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5127,6 +5155,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -148,8 +134,14 @@ index 81699d4f8b..1386e45e70 100644 + +static void spapr_machine_rhel850_class_options(MachineClass *mc) +{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ + /* The default machine type must apply the RHEL specific defaults */ + spapr_machine_rhel_default_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ smc->pre_6_2_numa_affinity = true; ++ mc->smp_props.prefer_sockets = true; +} + +DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); @@ -239,8 +231,7 @@ index 81699d4f8b..1386e45e70 100644 + compat_props_add(mc->compat_props, hw_compat_rhel_8_1, + hw_compat_rhel_8_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - --static void spapr_machine_register_types(void) ++ + /* from pseries-4.2 */ + smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; +} @@ -255,8 +246,7 @@ index 81699d4f8b..1386e45e70 100644 + */ + +static void spapr_machine_rhel800_class_options(MachineClass *mc) - { -- type_register_static(&spapr_machine_info); ++{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_rhel810_class_options(mc); @@ -274,8 +264,8 @@ index 81699d4f8b..1386e45e70 100644 + smc->dr_phb_enabled = false; + smc->broken_host_serial_model = true; + smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; - } - ++} ++ +DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); + +/* @@ -339,10 +329,11 @@ index 81699d4f8b..1386e45e70 100644 +} + +DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); -+ - type_init(spapr_machine_register_types) + + static void spapr_machine_register_types(void) + { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 64178f0f9a..2bff13a6ab 100644 +index 8ba34f6a1d..78eca1c04a 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -24,6 +24,7 @@ @@ -380,18 +371,18 @@ index 64178f0f9a..2bff13a6ab 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 637652ad16..589d1a262c 100644 +index ee7504b976..fcd5bf9302 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -146,6 +146,7 @@ struct SpaprMachineClass { - bool pre_5_1_assoc_refpoints; +@@ -154,6 +154,7 @@ struct SpaprMachineClass { bool pre_5_2_numa_associativity; + bool pre_6_2_numa_affinity; + bool has_power9_support; bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -230,6 +231,9 @@ struct SpaprMachineState { +@@ -238,6 +239,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -427,10 +418,10 @@ index 7949a24f5a..f207a9ba01 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 93d308ac8f..dca9bdf846 100644 +index e946da5f3a..23e8b76c85 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1378,6 +1378,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1401,6 +1401,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -439,7 +430,7 @@ index 93d308ac8f..dca9bdf846 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index dc93b99189..0f4f072fbd 100644 +index dc93b99189..154888cce5 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; @@ -458,15 +449,10 @@ index dc93b99189..0f4f072fbd 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2558,9 +2560,20 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) - return 0; +@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void) + return cap_rpt_invalidate; } -+<<<<<<< HEAD - int kvmppc_has_cap_rpt_invalidate(void) - { - return cap_rpt_invalidate; -+======= +bool kvmppc_has_cap_secure_guest(void) +{ + return !!cap_ppc_secure_guest; @@ -475,11 +461,12 @@ index dc93b99189..0f4f072fbd 100644 +int kvmppc_enable_cap_secure_guest(void) +{ + return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); -+>>>>>>> 89c02f0e37... Add ppc64 machine types - } - ++} ++ PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) -@@ -2959,3 +2972,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + uint32_t host_pvr = mfpvr(); +@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } diff --git a/0009-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch similarity index 87% rename from 0009-Add-s390x-machine-types.patch rename to 0010-Add-s390x-machine-types.patch index 1dda855..1095f3a 100644 --- a/0009-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 8ae9e40c13f4beb1eedaef46e6b00f562de692e2 Mon Sep 17 00:00:00 2001 +From ea22b5ae0a89ef53f31f67bb6845fd6c45d4f412 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -7,15 +7,18 @@ Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina -- -Merged patches (6.1.0 RC1): +Merged patches (6.1.0): - 64a9a5c971 hw/s390x: Remove the RHEL7-only machine type - 395516d62b redhat: s390x: add rhel-8.5.0 compat machine + +Merged patches (6.2.0 RC0): +- 3bf66f4520 redhat: Add s390x machine type compatibility update for 6.1 rebase --- - hw/s390x/s390-virtio-ccw.c | 66 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 65 insertions(+), 1 deletion(-) + hw/s390x/s390-virtio-ccw.c | 67 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index e4b18aef49..9a51dd8de2 100644 +index 653587ea62..4af14cb9ca 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -767,7 +767,7 @@ bool css_migration_enabled(void) @@ -32,10 +35,10 @@ index e4b18aef49..9a51dd8de2 100644 type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_6_1_instance_options(MachineState *machine) + static void ccw_machine_6_2_instance_options(MachineState *machine) { } -@@ -1082,6 +1083,69 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1100,6 +1101,70 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); @@ -47,6 +50,7 @@ index e4b18aef49..9a51dd8de2 100644 + +static void ccw_machine_rhel850_class_options(MachineClass *mc) +{ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); +} +DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); + diff --git a/0010-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch similarity index 95% rename from 0010-Add-x86_64-machine-types.patch rename to 0011-Add-x86_64-machine-types.patch index 59c7c14..aecc3fb 100644 --- a/0010-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From afc727ec3b2ad5a5c4033f25a63c877500b0194d Mon Sep 17 00:00:00 2001 +From afe0cbc5cdb98998b37cf48e9a1c87a110d9fbb3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -7,14 +7,12 @@ Adding changes to add RHEL machine types for x86_64 architecture. Signed-off-by: Miroslav Rezanina -Rebase notes (210609): +Rebase notes (6.1.0): - Update qemu64 cpu spec -Merged patches (6.1.0 RC1): +Merged patches (6.1.0): - 59c284ad3b x86: Add x86 rhel8.5 machine types - a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default - -Merged patches (6.1.0): - a3995e2eff Remove RHEL 7.0.0 machine type (only x86_64 changes) - ad3190a79b Remove RHEL 7.1.0 machine type (only x86_64 changes) - 84bbe15d4e Remove RHEL 7.2.0 machine type (only x86_64 changes) @@ -33,10 +31,10 @@ Merged patches (6.1.0): 8 files changed, 385 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index c2b9d62a35..719cedaa4b 100644 +index a2ef40ecbc..b6d2db8d04 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -362,6 +362,116 @@ GlobalProperty pc_compat_1_4[] = { +@@ -371,6 +371,116 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -153,7 +151,7 @@ index c2b9d62a35..719cedaa4b 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1713,6 +1823,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1694,6 +1804,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -161,9 +159,9 @@ index c2b9d62a35..719cedaa4b 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1724,7 +1835,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1704,7 +1815,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; - mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; - mc->max_cpus = 255; + /* 240: max CPU count for RHEL */ @@ -172,10 +170,10 @@ index c2b9d62a35..719cedaa4b 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index eebb4f3141..19d6721f3d 100644 +index dda3f64f19..dabc6c1933 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -51,6 +51,7 @@ +@@ -50,6 +50,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -202,7 +200,7 @@ index eebb4f3141..19d6721f3d 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -937,3 +939,65 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -951,3 +953,65 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -269,10 +267,10 @@ index eebb4f3141..19d6721f3d 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index e7724fd02c..8032babda5 100644 +index 235054a643..04c911da18 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -196,8 +196,8 @@ static void pc_q35_init(MachineState *machine) +@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -283,7 +281,7 @@ index e7724fd02c..8032babda5 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -337,6 +337,7 @@ static void pc_q35_init(MachineState *machine) +@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -291,7 +289,7 @@ index e7724fd02c..8032babda5 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -602,3 +603,175 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -620,3 +621,175 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -468,10 +466,10 @@ index e7724fd02c..8032babda5 100644 +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index f5423f351a..a2b1681027 100644 +index b0a6e05b48..3c3d2ad450 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -242,6 +242,8 @@ struct MachineClass { +@@ -263,6 +263,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -481,10 +479,10 @@ index f5423f351a..a2b1681027 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 17ad7bfc3a..75b338d66d 100644 +index 7ccc9a1a07..9689a58b14 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -277,6 +277,27 @@ extern const size_t pc_compat_1_5_len; +@@ -280,6 +280,27 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; @@ -525,10 +523,10 @@ index d95028018e..7b004065ae 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index e69abe48e3..4a3077b3f0 100644 +index 5a698bde19..a668f521ac 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3261,6 +3261,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3336,6 +3336,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -536,7 +534,7 @@ index e69abe48e3..4a3077b3f0 100644 kvm_msr_buf_reset(cpu); -@@ -3579,6 +3580,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3665,6 +3666,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; diff --git a/0011-Enable-make-check.patch b/0012-Enable-make-check.patch similarity index 89% rename from 0011-Enable-make-check.patch rename to 0012-Enable-make-check.patch index ce8ef90..4cd1ecc 100644 --- a/0011-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From e05f30c4899bfec4084cda3223e7141ddc97be6f Mon Sep 17 00:00:00 2001 +From b071f3eaa77dde1567d70b43d0b2975efe380da2 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -11,6 +11,9 @@ Signed-off-by: Miroslav Rezanina Rebase changes (6.1.0): - removed unnecessary test changes +Rebase changes (6.2.0 RC0): +- new way of disabling bios-table-test + Merged patches (6.1.0): - 2f129df7d3 redhat: Enable the 'test-block-iothread' test again --- @@ -24,14 +27,14 @@ Merged patches (6.1.0): tests/qtest/fuzz-virtio-scsi-test.c | 2 +- tests/qtest/hd-geo-test.c | 4 ++++ tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 11 +++-------- + tests/qtest/meson.build | 13 ++++--------- tests/qtest/prom-env-test.c | 4 ++++ tests/qtest/test-x86-cpuid-compat.c | 2 ++ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - 14 files changed, 40 insertions(+), 24 deletions(-) + 14 files changed, 41 insertions(+), 25 deletions(-) diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 7bf29343d7..fd63402d78 100755 +index 1d2fa93a11..c8a2815f54 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -174,9 +174,9 @@ run_qemu -drive if=virtio @@ -59,10 +62,10 @@ index 7bf29343d7..fd63402d78 100755 *) ;; diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c -index 51d3a4e239..fe01fd8b7d 100644 +index 258874167e..62745181a8 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c -@@ -1303,7 +1303,7 @@ static void test_acpi_virt_tcg_numamem(void) +@@ -1371,7 +1371,7 @@ static void test_acpi_virt_tcg_numamem(void) free_test_data(&data); } @@ -71,7 +74,7 @@ index 51d3a4e239..fe01fd8b7d 100644 static void test_acpi_virt_tcg_pxb(void) { test_data data = { -@@ -1335,7 +1335,7 @@ static void test_acpi_virt_tcg_pxb(void) +@@ -1403,7 +1403,7 @@ static void test_acpi_virt_tcg_pxb(void) free_test_data(&data); } @@ -80,17 +83,17 @@ index 51d3a4e239..fe01fd8b7d 100644 static void test_acpi_tcg_acpi_hmat(const char *machine) { test_data data; -@@ -1565,7 +1565,7 @@ int main(int argc, char *argv[]) - qtest_add_func("acpi/virt", test_acpi_virt_tcg); - qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); - qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); -- qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); -+/* qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); */ - qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt); +@@ -1644,7 +1644,7 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/virt", test_acpi_virt_tcg); + qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); + qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); +- qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); ++/* qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); */ + qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt); + } } - ret = g_test_run(); diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c -index 96849cec91..81df62e01a 100644 +index 83828ba270..294476b959 100644 --- a/tests/qtest/boot-serial-test.c +++ b/tests/qtest/boot-serial-test.c @@ -148,19 +148,23 @@ static testdef_t tests[] = { @@ -241,17 +244,17 @@ index fe0bef9980..7a9d51579b 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 2bc3efd49f..53ce4b6416 100644 +index c9d8458062..049e06c057 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -73,7 +73,6 @@ qtests_i386 = \ +@@ -68,7 +68,6 @@ qtests_i386 = \ + (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + \ + (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) + \ + (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \ +- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + qtests_pci + \ + ['fdc-test', 'ide-test', - 'hd-geo-test', - 'boot-order-test', -- 'bios-tables-test', - 'rtc-test', - 'i440fx-test', - 'fw_cfg-test', @@ -81,7 +80,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', @@ -280,16 +283,18 @@ index 2bc3efd49f..53ce4b6416 100644 qtests_pci + ['migration-test', 'numa-test', 'cpu-plug-test', 'drive_del-test'] qtests_sh4 = (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) -@@ -186,7 +182,7 @@ qtests_aarch64 = \ +@@ -186,8 +182,8 @@ qtests_aarch64 = \ ['arm-cpu-features', 'numa-test', 'boot-serial-test', - 'xlnx-can-test', +- 'fuzz-xlnx-dp-test', +# 'xlnx-can-test', ++# 'fuzz-xlnx-dp-test', 'migration-test'] qtests_s390x = \ -@@ -195,7 +191,6 @@ qtests_s390x = \ +@@ -196,7 +192,6 @@ qtests_s390x = \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ ['boot-serial-test', 'drive_del-test', diff --git a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 93% rename from 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index 0186f8a..44db7e1 100644 --- a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 79697eeae5ecc81c6e334b4a5164c9a664541a22 Mon Sep 17 00:00:00 2001 +From 55dcef9d806aa530f10e3ca42eb24d52f850d674 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,7 +32,7 @@ Signed-off-by: Bandan Das 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index e1ea1d8a23..717af74b9a 100644 +index 7b45353ce2..eb725a3aee 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -45,6 +45,9 @@ @@ -45,7 +45,7 @@ index e1ea1d8a23..717af74b9a 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2790,9 +2793,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2807,9 +2810,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -77,7 +77,7 @@ index e1ea1d8a23..717af74b9a 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3229,6 +3253,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3246,6 +3270,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), diff --git a/0013-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch similarity index 88% rename from 0013-Add-support-statement-to-help-output.patch rename to 0014-Add-support-statement-to-help-output.patch index e7ff04f..095b9db 100644 --- a/0013-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 5209e0f91395c9240142f75123edd55a6fcce8d7 Mon Sep 17 00:00:00 2001 +From fcccb5c061b8bbae29de59637c5ad4cf4416281b Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 5ca11e7469..55673d7302 100644 +index 620a1f1367..d46b8fb4ab 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -802,9 +802,17 @@ static void version(void) +@@ -827,9 +827,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index 5ca11e7469..55673d7302 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", error_get_progname()); -@@ -830,6 +838,7 @@ static void help(int exitcode) +@@ -855,6 +863,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/0014-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch similarity index 89% rename from 0014-globally-limit-the-maximum-number-of-CPUs.patch rename to 0015-globally-limit-the-maximum-number-of-CPUs.patch index 0eba4ee..50c1e79 100644 --- a/0014-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 9ca40e0d1c7644ab781dc382effa8eb32f589322 Mon Sep 17 00:00:00 2001 +From 354c9ce982e566ddb3c724a57252986dcb7c36db Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -18,10 +18,10 @@ Signed-off-by: Danilo Cesar Lemes de Paula 1 file changed, 12 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 0125c17edb..b5d488a027 100644 +index eecd8031cf..8f2a53438f 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -2413,6 +2413,18 @@ static int kvm_init(MachineState *ms) +@@ -2423,6 +2423,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); diff --git a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch similarity index 92% rename from 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename to 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index dd00655..415bcc2 100644 --- a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 67b5eb11440c75e0fea275eccea43266760b831e Mon Sep 17 00:00:00 2001 +From b057b4ebec0f87f21ba4a15adbb2a1bde7240ed5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -37,10 +37,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst -index fb70445c75..0d9a783112 100644 +index d53073b52b..9e93df084f 100644 --- a/docs/tools/qemu-trace-stap.rst +++ b/docs/tools/qemu-trace-stap.rst -@@ -45,19 +45,19 @@ The following commands are valid: +@@ -46,19 +46,19 @@ The following commands are valid: any of the listed names. If no *PATTERN* is given, the all possible probes will be listed. @@ -63,7 +63,7 @@ index fb70445c75..0d9a783112 100644 .. option:: run OPTIONS BINARY PATTERN... -@@ -89,18 +89,18 @@ The following commands are valid: +@@ -90,18 +90,18 @@ The following commands are valid: Restrict the tracing session so that it only triggers for the process identified by *PID*. @@ -85,7 +85,7 @@ index fb70445c75..0d9a783112 100644 To monitor QEMU processes running an alternative binary outside of ``$PATH``, displaying verbose information about setup of the -@@ -108,7 +108,7 @@ The following commands are valid: +@@ -109,7 +109,7 @@ The following commands are valid: :: @@ -95,10 +95,10 @@ index fb70445c75..0d9a783112 100644 See also -------- diff --git a/qemu-options.hx b/qemu-options.hx -index ac596e01a1..eea5fe9f68 100644 +index ae2c6dbbfc..94c4a8dbaf 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3124,11 +3124,11 @@ SRST +@@ -3150,11 +3150,11 @@ SRST :: diff --git a/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch similarity index 96% rename from 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename to 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index c6e26e6..031b551 100644 --- a/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 121a8bb81575086f84563f1dffc4d045cdc65a64 Mon Sep 17 00:00:00 2001 +From 41fe05330d095f69f12973b0540466439e030047 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -42,7 +42,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 6d80730287..bba3d75707 100644 +index 51fd09522a..a35257c35a 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, diff --git a/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch similarity index 97% rename from 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename to 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 75ebb16..7f161e8 100644 --- a/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 6e87ea18ca021e10fd99698bb3f744ed3dc06552 Mon Sep 17 00:00:00 2001 +From 295ee942608d48ab167aa76adabf3697c28e6910 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts diff --git a/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch similarity index 96% rename from 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch rename to 0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 3fc1c9c..7ff46fd 100644 --- a/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From 0486a5468aee70d74e8ad65f3b2e5fa16cacba56 Mon Sep 17 00:00:00 2001 +From a3bb2330bbf68faabc57b768cdf3ae2f4f4d86f3 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,7 +44,7 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 9f1b6461c8..4c303e6666 100644 +index d509016756..37d1c26a71 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, diff --git a/0020-Fix-virtio-net-pci-vectors-compat.patch b/0020-Fix-virtio-net-pci-vectors-compat.patch new file mode 100644 index 0000000..6caad1d --- /dev/null +++ b/0020-Fix-virtio-net-pci-vectors-compat.patch @@ -0,0 +1,46 @@ +From ef5afcc86dc44d1c9d3030a8ceca2018df86c6ec Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 19 Oct 2021 13:17:06 -0400 +Subject: Fix virtio-net-pci* "vectors" compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 76: 9.0/6.2.0-rc1 x86 machine type fixes +RH-Commit: [20/22] ebb570f053f96d3558bac49962dc7dc88296c207 +RH-Bugzilla: 2025468 +RH-Acked-by: quintela1 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Cornelia Huck + +hw_compat_rhel_8_4 has an issue: it affects only "virtio-net-pci" +but not "virtio-net-pci-transitional" and +"virtio-net-pci-non-transitional". The solution is to use the +"virtio-net-pci-base" type in compat_props. + +An equivalent fix will be submitted for hw_compat_5_2 upstream. + +Signed-off-by: Eduardo Habkost +(cherry picked from commit d45823ab0d0138b2fbaf2ed1e1896d2052f3ccb3) +--- + hw/core/machine.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 53a3caf4fb..448a8dd127 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -69,7 +69,11 @@ GlobalProperty hw_compat_rhel_8_4[] = { + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ + { "virtio-blk-device", "report-discard-granularity", "off" }, + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ +- { "virtio-net-pci", "vectors", "3"}, ++ /* ++ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base", ++ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141) ++ */ ++ { "virtio-net-pci-base", "vectors", "3"}, + }; + const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); + +-- +2.27.0 + diff --git a/0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch b/0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch new file mode 100644 index 0000000..8ef276d --- /dev/null +++ b/0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch @@ -0,0 +1,75 @@ +From eae7d8dd3c3b9aa859a619933f52a4759a42bf66 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 17:57:42 +0000 +Subject: x86/rhel machine types: Add pc_rhel_8_5_compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 76: 9.0/6.2.0-rc1 x86 machine type fixes +RH-Commit: [21/22] dd23060695bc0ad892bbfa51d93afe31f5d745c7 +RH-Bugzilla: 2025468 +RH-Acked-by: quintela1 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Cornelia Huck + +Add pc_rhel_8_5_compat as the merge of pc_compat_6_1 and pc_compat_6_0 +(since 8.5 was based on 6.0). + +Note, x-keep-pci-slot-hpc flipped back and forward, leaving it out +looks like it leaves us with the original. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 21 +++++++++++++++++++++ + include/hw/i386/pc.h | 3 +++ + 2 files changed, 24 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b6d2db8d04..4661473d2a 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -387,6 +387,27 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_5_compat[] = { ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, ++ ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, ++}; ++const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat); ++ + GlobalProperty pc_rhel_8_4_compat[] = { + /* pc_rhel_8_4_compat from pc_compat_5_2 */ + { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 9689a58b14..afb570ba14 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -283,6 +283,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_5_compat[]; ++extern const size_t pc_rhel_8_5_compat_len; ++ + extern GlobalProperty pc_rhel_8_4_compat[]; + extern const size_t pc_rhel_8_4_compat_len; + +-- +2.27.0 + diff --git a/0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch b/0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch new file mode 100644 index 0000000..85c6b26 --- /dev/null +++ b/0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch @@ -0,0 +1,55 @@ +From 6762f5646943c759ece5972f08eb88364cf0a8ad Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 18:07:49 +0000 +Subject: x86/rhel machine types: Wire compat into q35 and i440fx + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 76: 9.0/6.2.0-rc1 x86 machine type fixes +RH-Commit: [22/22] e2767df0d920773057cb52d346e0106a76cb0a28 +RH-Bugzilla: 2025468 +RH-Acked-by: quintela1 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Cornelia Huck +Wire the pc_rhel_8_5 compat data into both piix and q35 +to keep the existing machine types compatible. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index dabc6c1933..183b5d5464 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -993,6 +993,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); + compat_props_add(m->compat_props, pc_rhel_8_4_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 04c911da18..0e7e885e78 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -658,6 +658,10 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch b/0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch new file mode 100644 index 0000000..293854f --- /dev/null +++ b/0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch @@ -0,0 +1,50 @@ +From 3b82be3dd3d5254baedf82ba2a6cf0412e84a991 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 16 Nov 2021 17:03:07 +0100 +Subject: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU + 6.2.0 update + +RH-Author: Eric Auger +RH-MergeRequest: 75: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update +RH-Commit: [21/21] f027d13654944e3d34e3356affe7af952eec2bed +RH-Bugzilla: 2022607 +RH-Acked-by: Gavin Shan +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck +RH-Acked-by: Laurent Vivier + +To keep compatibility with 8.5-AV machine type we need to +turn few new options on by default: +smp_props.prefer_sockets, no_cpu_topology, no_tcg_its + +TESTED: migrate from rhel-av-8.5.0 to rhel-8.6.0 and vice-versa +with upstream fix: 33a0c404fb hw/intc/arm_gicv3_its: Revert version +increments in vmstate_its + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 7d51824263..6ba9a2c2e1 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3230,7 +3230,12 @@ type_init(rhel_machine_init); + + static void rhel850_virt_options(MachineClass *mc) + { ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; ++ vmc->no_cpu_topology = true; ++ vmc->no_tcg_its = true; + } + DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) +-- +2.27.0 + diff --git a/0024-redhat-Add-s390x-machine-type-compatibility-handling.patch b/0024-redhat-Add-s390x-machine-type-compatibility-handling.patch new file mode 100644 index 0000000..9762048 --- /dev/null +++ b/0024-redhat-Add-s390x-machine-type-compatibility-handling.patch @@ -0,0 +1,58 @@ +From e6ff4de4f7036f88ee63adad6de5ee5dd74f1d99 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 26 Nov 2021 09:37:11 +0100 +Subject: redhat: Add s390x machine type compatibility handling for the rebase + to v6.2 + +RH-Author: Thomas Huth +RH-MergeRequest: 80: Add s390x machine type compatibility handling for the rebase to v6.2 +RH-Commit: [26/26] c45cf594604f6dd23954696b9c84d2025e328d11 +RH-Bugzilla: 2022602 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck + +Add compatibility handling for the rhel8.5.0 machine type (and +recursively older, of course). + +Based on the following upstream commits: + + 463e50da8b - s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2 + 30e398f796 - s390x/cpumodel: Add more feature to gen16 default model + 4a0af2930a - machine: Prefer cores over sockets in smp parsing since 6.2 + 2b52619994 - machine: Move smp_prefer_sockets to struct SMPCompatProps + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 4af14cb9ca..c654045964 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1105,11 +1105,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; ++ ++ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); ++ ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); + } + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); + +-- +2.27.0 + diff --git a/0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch b/0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch new file mode 100644 index 0000000..1fdd794 --- /dev/null +++ b/0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch @@ -0,0 +1,31 @@ +From 168f0d56e3a37a7d5fcc59483e2b1181824a23d2 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 24 Nov 2021 23:51:52 -0500 +Subject: compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes + +In RC2 nvme-ns 'shared' default was changed (commit 916b0f0b52). + +Adding compat record for RHEL 8.5.0 so we keep it off for downstream +machinetypes. + +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 448a8dd127..669d3d8b91 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -55,6 +55,8 @@ GlobalProperty hw_compat_rhel_8_5[] = { + { "vhost-vsock-device", "seqpacket", "off" }, + /* hw_compat_rhel_8_5 from hw_compat_6_1 */ + { "vhost-user-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "nvme-ns", "shared", "off" }, + }; + const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); + +-- +2.27.0 + diff --git a/kvm-Fix-for-ppc64le-build.patch b/kvm-Fix-for-ppc64le-build.patch deleted file mode 100644 index 263286c..0000000 --- a/kvm-Fix-for-ppc64le-build.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 81c5a3cec15194bf75366813274f63d596f04807 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 15 Nov 2021 05:46:15 -0500 -Subject: [PATCH 6/6] Fix for ppc64le build - -Although we do not support ppc64 build, there can be some custom builds running -it. Fix code so the ppc64le build is buildable. - -Signed-off-by: Miroslav Rezanina ---- - hw/ppc/spapr.c | 7 +++++-- - target/ppc/kvm.c | 5 ++--- - 2 files changed, 7 insertions(+), 5 deletions(-) - -diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 1386e45e70..8699cc3d0c 100644 ---- a/hw/ppc/spapr.c -+++ b/hw/ppc/spapr.c -@@ -4991,7 +4991,6 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); - /* - * pseries-2.7 - */ --#endif - - static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, - uint64_t *buid, hwaddr *pio, -@@ -5047,7 +5046,6 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, - return true; - } - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void spapr_machine_2_7_class_options(MachineClass *mc) - { - SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5372,4 +5370,9 @@ static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) - - DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); - -+static void spapr_machine_register_types(void) -+{ -+ type_register_static(&spapr_machine_info); -+} -+ - type_init(spapr_machine_register_types) -diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 0f4f072fbd..154888cce5 100644 ---- a/target/ppc/kvm.c -+++ b/target/ppc/kvm.c -@@ -2560,11 +2560,11 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) - return 0; - } - --<<<<<<< HEAD - int kvmppc_has_cap_rpt_invalidate(void) - { - return cap_rpt_invalidate; --======= -+} -+ - bool kvmppc_has_cap_secure_guest(void) - { - return !!cap_ppc_secure_guest; -@@ -2573,7 +2573,6 @@ bool kvmppc_has_cap_secure_guest(void) - int kvmppc_enable_cap_secure_guest(void) - { - return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); -->>>>>>> 89c02f0e37... Add ppc64 machine types - } - - PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) --- -2.27.0 - diff --git a/kvm-disable-sga-device.patch b/kvm-disable-sga-device.patch deleted file mode 100644 index 75118bd..0000000 --- a/kvm-disable-sga-device.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 77d18ece20f69ff1e1f6afd4b2d8cf2a1f252f3a Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Mon, 20 Sep 2021 03:46:51 -0400 -Subject: [PATCH 1/2] disable sga device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 44: Apply RHEL 9.0.0 Beta fixes to RHEL 9.0.0 -RH-Commit: [1/2] 3d8e1b51b496175de71162c612abbd64adbcb9e5 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 2000845 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Gerd Hoffmann - -Disabling sga device we are going to not support anymore. - -Signed-off-by: Gerd Hoffmann -Signed-off-by: Miroslav Rezanina ---- - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index 24b96ba0c4..20c2991941 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -67,7 +67,6 @@ CONFIG_SERIAL=y - CONFIG_SERIAL_ISA=y - CONFIG_SERIAL_PCI=y - CONFIG_SEV=y --CONFIG_SGA=y - CONFIG_SMBIOS=y - CONFIG_SMBUS_EEPROM=y - CONFIG_TEST_DEVICES=y --- -2.27.0 - diff --git a/kvm-dump-guest-memory-Block-live-migration.patch b/kvm-dump-guest-memory-Block-live-migration.patch deleted file mode 100644 index 08da454..0000000 --- a/kvm-dump-guest-memory-Block-live-migration.patch +++ /dev/null @@ -1,93 +0,0 @@ -From d1c6d059f8936adf7b8c3e2b29b2eb290b0792ac Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 22 Sep 2021 12:20:09 -0400 -Subject: [PATCH 3/6] dump-guest-memory: Block live migration -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 51: dump-guest-memory: Add blocker for migration -RH-Commit: [3/3] 711dcc0018f70bcb87496c5aa235633a6daf5c2d (peterx/qemu-kvm) -RH-Bugzilla: 1996609 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Laszlo Ersek - -Both dump-guest-memory and live migration caches vm state at the beginning. -Either of them entering the other one will cause race on the vm state, and even -more severe on that (please refer to the crash report in the bug link). - -Let's block live migration in dump-guest-memory, and that'll also block -dump-guest-memory if it detected that we're during a live migration. - -Side note: migrate_del_blocker() can be called even if the blocker is not -inserted yet, so it's safe to unconditionally delete that blocker in -dump_cleanup (g_slist_remove allows no-entry-found case). - -Suggested-by: Dr. David Alan Gilbert -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1996609 -Signed-off-by: Peter Xu -Reviewed-by: Marc-André Lureau -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit b7bc6b182883bb3097dde2a25d041f28bde2b89c) -Signed-off-by: Peter Xu ---- - dump/dump.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/dump/dump.c b/dump/dump.c -index ab625909f3..662d0a62cd 100644 ---- a/dump/dump.c -+++ b/dump/dump.c -@@ -29,6 +29,7 @@ - #include "qemu/error-report.h" - #include "qemu/main-loop.h" - #include "hw/misc/vmcoreinfo.h" -+#include "migration/blocker.h" - - #ifdef TARGET_X86_64 - #include "win_dump.h" -@@ -47,6 +48,8 @@ - - #define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */ - -+static Error *dump_migration_blocker; -+ - #define ELF_NOTE_SIZE(hdr_size, name_size, desc_size) \ - ((DIV_ROUND_UP((hdr_size), 4) + \ - DIV_ROUND_UP((name_size), 4) + \ -@@ -101,6 +104,7 @@ static int dump_cleanup(DumpState *s) - qemu_mutex_unlock_iothread(); - } - } -+ migrate_del_blocker(dump_migration_blocker); - - return 0; - } -@@ -2005,6 +2009,21 @@ void qmp_dump_guest_memory(bool paging, const char *file, - return; - } - -+ if (!dump_migration_blocker) { -+ error_setg(&dump_migration_blocker, -+ "Live migration disabled: dump-guest-memory in progress"); -+ } -+ -+ /* -+ * Allows even for -only-migratable, but forbid migration during the -+ * process of dump guest memory. -+ */ -+ if (migrate_add_blocker_internal(dump_migration_blocker, errp)) { -+ /* Remember to release the fd before passing it over to dump state */ -+ close(fd); -+ return; -+ } -+ - s = &dump_state_global; - dump_state_prepare(s); - --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch b/kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch deleted file mode 100644 index 5d7af51..0000000 --- a/kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch +++ /dev/null @@ -1,38 +0,0 @@ -From c0fa5e8ad618a980752f7053de4d4fedd46b5b53 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 12 Oct 2021 17:53:07 +0200 -Subject: [PATCH] hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type - -RH-Author: Eric Auger -RH-MergeRequest: 48: hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type -RH-Commit: [1/1] f4230d3bbd94beb44afb23cb0d1561ff2bc71340 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1998942 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones - -branch: c9s -Upstream: no -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=40295089 - -Add hw_compat_rhel_8_5 to rhel8.5 virt options. - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 9e7cb687dc..79286c40ec 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3148,5 +3148,6 @@ type_init(rhel_machine_init); - static void rhel850_virt_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); - } - DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Remove-9.0-machine-type.patch b/kvm-hw-arm-virt-Remove-9.0-machine-type.patch deleted file mode 100644 index 4ccd33c..0000000 --- a/kvm-hw-arm-virt-Remove-9.0-machine-type.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 8dc162b2ae2e412c7d4e0d46f7a11001c34263dc Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Mon, 23 Aug 2021 18:57:11 +0200 -Subject: [PATCH 1/4] hw/arm/virt: Remove 9.0 machine type -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Andrew Jones -RH-MergeRequest: 38: hw/arm/virt: Remove 9.0 machine type -RH-Commit: [1/1 35c0734886622b88f6a715f13fba8f65331f7a82 -RH-Bugzilla: 2002937 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Gavin Shan -RH-Acked-by: Philippe Mathieu-Daudé - -Testing: '/usr/libexec/qemu-kvm -M help' and sample VM run - -Remove the 9.0 machine type for RHEL9 Beta to align with other -architectures. Also, like other architectures, the 8.5 machine -type should be the RHEL9 Beta default. - -Signed-off-by: Andrew Jones -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 3c8e6de36d..9e7cb687dc 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3145,14 +3145,8 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - --static void rhel900_virt_options(MachineClass *mc) --{ -- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); --} --DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) -- - static void rhel850_virt_options(MachineClass *mc) - { -- rhel900_virt_options(mc); -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - } --DEFINE_RHEL_MACHINE(8, 5, 0) -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) --- -2.27.0 - diff --git a/kvm-migration-Add-migrate_add_blocker_internal.patch b/kvm-migration-Add-migrate_add_blocker_internal.patch deleted file mode 100644 index f00183f..0000000 --- a/kvm-migration-Add-migrate_add_blocker_internal.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 58d47d795b91beb1f87929fb07d9f1b4af79cb14 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 22 Sep 2021 12:20:08 -0400 -Subject: [PATCH 2/6] migration: Add migrate_add_blocker_internal() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 51: dump-guest-memory: Add blocker for migration -RH-Commit: [2/3] 44cf3879ee66cc9974dd3d8a5f9a0d4f762b7c01 (peterx/qemu-kvm) -RH-Bugzilla: 1996609 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Laszlo Ersek - -An internal version that removes -only-migratable implications. It can be used -for temporary migration blockers like dump-guest-memory. - -Reviewed-by: Marc-André Lureau -Reviewed-by: Juan Quintela -Signed-off-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit 60fd680193119e7e4d50eccff7b55a0aadc934ab) -Signed-off-by: Peter Xu ---- - include/migration/blocker.h | 16 ++++++++++++++++ - migration/migration.c | 21 +++++++++++++-------- - 2 files changed, 29 insertions(+), 8 deletions(-) - -diff --git a/include/migration/blocker.h b/include/migration/blocker.h -index acd27018e9..9cebe2ba06 100644 ---- a/include/migration/blocker.h -+++ b/include/migration/blocker.h -@@ -25,6 +25,22 @@ - */ - int migrate_add_blocker(Error *reason, Error **errp); - -+/** -+ * @migrate_add_blocker_internal - prevent migration from proceeding without -+ * only-migrate implications -+ * -+ * @reason - an error to be returned whenever migration is attempted -+ * -+ * @errp - [out] The reason (if any) we cannot block migration right now. -+ * -+ * @returns - 0 on success, -EBUSY on failure, with errp set. -+ * -+ * Some of the migration blockers can be temporary (e.g., for a few seconds), -+ * so it shouldn't need to conflict with "-only-migratable". For those cases, -+ * we can call this function rather than @migrate_add_blocker(). -+ */ -+int migrate_add_blocker_internal(Error *reason, Error **errp); -+ - /** - * @migrate_del_blocker - remove a blocking error from migration - * -diff --git a/migration/migration.c b/migration/migration.c -index f476e2101e..2aaf2fd449 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2053,15 +2053,8 @@ void migrate_init(MigrationState *s) - s->threshold_size = 0; - } - --int migrate_add_blocker(Error *reason, Error **errp) -+int migrate_add_blocker_internal(Error *reason, Error **errp) - { -- if (only_migratable) { -- error_propagate_prepend(errp, error_copy(reason), -- "disallowing migration blocker " -- "(--only-migratable) for: "); -- return -EACCES; -- } -- - /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ - if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { - error_propagate_prepend(errp, error_copy(reason), -@@ -2074,6 +2067,18 @@ int migrate_add_blocker(Error *reason, Error **errp) - return 0; - } - -+int migrate_add_blocker(Error *reason, Error **errp) -+{ -+ if (only_migratable) { -+ error_propagate_prepend(errp, error_copy(reason), -+ "disallowing migration blocker " -+ "(--only-migratable) for: "); -+ return -EACCES; -+ } -+ -+ return migrate_add_blocker_internal(reason, errp); -+} -+ - void migrate_del_blocker(Error *reason) - { - migration_blockers = g_slist_remove(migration_blockers, reason); --- -2.27.0 - diff --git a/kvm-migration-Make-migration-blocker-work-for-snapshots-.patch b/kvm-migration-Make-migration-blocker-work-for-snapshots-.patch deleted file mode 100644 index afdbdb1..0000000 --- a/kvm-migration-Make-migration-blocker-work-for-snapshots-.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 0a9b55b44c5c548ff6f3da7335acf2138ecb4376 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 22 Sep 2021 12:20:07 -0400 -Subject: [PATCH 1/6] migration: Make migration blocker work for snapshots too -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 51: dump-guest-memory: Add blocker for migration -RH-Commit: [1/3] 241888d12c7197dd4ee1e1ba6e3115f70901636e (peterx/qemu-kvm) -RH-Bugzilla: 1996609 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Laszlo Ersek - -save_snapshot() checks migration blocker, which looks sane. At the meantime we -should also teach the blocker add helper to fail if during a snapshot, just -like for migrations. - -Reviewed-by: Marc-André Lureau -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 4c170330aae4a4ed75c3a8638b7d4c5d9f365244) -Signed-off-by: Peter Xu ---- - migration/migration.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 041b8451a6..f476e2101e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2062,15 +2062,16 @@ int migrate_add_blocker(Error *reason, Error **errp) - return -EACCES; - } - -- if (migration_is_idle()) { -- migration_blockers = g_slist_prepend(migration_blockers, reason); -- return 0; -+ /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ -+ if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { -+ error_propagate_prepend(errp, error_copy(reason), -+ "disallowing migration blocker " -+ "(migration/snapshot in progress) for: "); -+ return -EBUSY; - } - -- error_propagate_prepend(errp, error_copy(reason), -- "disallowing migration blocker " -- "(migration in progress) for: "); -- return -EBUSY; -+ migration_blockers = g_slist_prepend(migration_blockers, reason); -+ return 0; - } - - void migrate_del_blocker(Error *reason) --- -2.27.0 - diff --git a/kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch b/kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch deleted file mode 100644 index 16f5957..0000000 --- a/kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 9b72a86292fb2c34d7be7b928ac06f2609de0f43 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 30 Sep 2021 09:25:23 +0200 -Subject: [PATCH 2/2] redhat: Add s390x machine type compatibility update for - 6.1 rebase -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 45: Add s390x machine type compatibility update for 6.1 rebase -RH-Commit: [2/2] 6f71801ac2d77b82b010eac46cd82cd74c53f246 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 1998943 -RH-Acked-by: Greg Kurz -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -Add hw_compat_rhel_8_5 for the s390-ccw-virtio-rhel8.5.0 machine -type. It's currently not urgently required, since hw_compat_rhel_8_5 -only contains entries that are not related to s390x, but just in -case some other entries are added there later, it's better if we -add this right from the start to the s390x machine, too. - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1998943 -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 9a51dd8de2..4ee8d266ec 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1091,6 +1091,7 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) - - static void ccw_machine_rhel850_class_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); - } - DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); - --- -2.27.0 - diff --git a/kvm-redhat-Define-hw_compat_rhel_8_5.patch b/kvm-redhat-Define-hw_compat_rhel_8_5.patch deleted file mode 100644 index 60f7fa1..0000000 --- a/kvm-redhat-Define-hw_compat_rhel_8_5.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 1f0a5d3ae9c835e35b83cf8bbedd0f814df3451d Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Mon, 27 Sep 2021 10:02:46 +0200 -Subject: [PATCH 1/2] redhat: Define hw_compat_rhel_8_5 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 45: Add s390x machine type compatibility update for 6.1 rebase -RH-Commit: [1/2] 5d304edf2bee7abc57843deb9e5d85ab5f19a34c (thuth/qemu-kvm-cs9) -RH-Bugzilla: 1998943 -RH-Acked-by: Greg Kurz -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -The QEMU 6.1 rebase changed the default value of some properties -for existing machine types. Prepare ground by introducing -hw_compat_rhel_8_5. - -Signed-off-by: Greg Kurz -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1998943 -Signed-off-by: Thomas Huth ---- - hw/core/machine.c | 17 +++++++++++++++++ - include/hw/boards.h | 3 +++ - 2 files changed, 20 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index d681a06a47..a14503cc30 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -37,6 +37,23 @@ - #include "hw/virtio/virtio.h" - #include "hw/virtio/virtio-pci.h" - -+/* -+ * Mostly the same as hw_compat_6_0 -+ */ -+GlobalProperty hw_compat_rhel_8_5[] = { -+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ -+ { "gpex-pcihost", "allow-unmapped-accesses", "false" }, -+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ -+ { "i8042", "extended-state", "false"}, -+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ -+ { "nvme-ns", "eui64-default", "off"}, -+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ -+ { "e1000", "init-vet", "off" }, -+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ -+ { "e1000e", "init-vet", "off" }, -+}; -+const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); -+ - /* - * Mostly the same as hw_compat_5_2 - */ -diff --git a/include/hw/boards.h b/include/hw/boards.h -index a2b1681027..7b9208ef7a 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -418,6 +418,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_8_5[]; -+extern const size_t hw_compat_rhel_8_5_len; -+ - extern GlobalProperty hw_compat_rhel_8_4[]; - extern const size_t hw_compat_rhel_8_4_len; - --- -2.27.0 - diff --git a/kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch b/kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch deleted file mode 100644 index 1666b15..0000000 --- a/kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch +++ /dev/null @@ -1,51 +0,0 @@ -From b615b79feaa73bbaa32bb8c30401a4f6f0c0205e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 14 Sep 2021 13:29:59 +0200 -Subject: [PATCH 2/2] tools/virtiofsd: Add fstatfs64 syscall to the seccomp - allowlist -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 44: Apply RHEL 9.0.0 Beta fixes to RHEL 9.0.0 -RH-Commit: [2/2] 0085289cefb57d49d2423b4f3376e8cf4a970012 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 2005026 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Gerd Hoffmann - -The virtiofsd currently crashes on s390x when doing something like -this in the guest: - - mkdir -p /mnt/myfs - mount -t virtiofs myfs /mnt/myfs - touch /mnt/myfs/foo.txt - stat -f /mnt/myfs/foo.txt - -The problem is that the fstatfs64 syscall is called in this case -from the virtiofsd. We have to put it on the seccomp allowlist to -avoid that the daemon gets killed in this case. - -(cherry picked from commit 8cfd339b3d402f913fe520a4f35f30152fb4fb80) -Suggested-by: Vivek Goyal -Signed-off-by: Thomas Huth -Signed-off-by: Miroslav Rezanina ---- - tools/virtiofsd/passthrough_seccomp.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c -index f49ed94b5e..a3ce9f898d 100644 ---- a/tools/virtiofsd/passthrough_seccomp.c -+++ b/tools/virtiofsd/passthrough_seccomp.c -@@ -51,6 +51,7 @@ static const int syscall_allowlist[] = { - SCMP_SYS(fsetxattr), - SCMP_SYS(fstat), - SCMP_SYS(fstatfs), -+ SCMP_SYS(fstatfs64), - SCMP_SYS(fsync), - SCMP_SYS(ftruncate), - SCMP_SYS(futex), --- -2.27.0 - diff --git a/kvm-virtio-balloon-Fix-page-poison-subsection-name.patch b/kvm-virtio-balloon-Fix-page-poison-subsection-name.patch deleted file mode 100644 index 68e542a..0000000 --- a/kvm-virtio-balloon-Fix-page-poison-subsection-name.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 91465d5e7cb407604fd8697587a52aafe0040ad7 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 14 Sep 2021 14:17:16 +0100 -Subject: [PATCH 1/2] virtio-balloon: Fix page-poison subsection name -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 46: virtio-balloon: Fix page-poison subsection name -RH-Commit: [1/1] b5907c252dae636b4b145a6f13fbed4bf5fad9d2 (dagrh/c-9-s-qemu-kvm) -RH-Bugzilla: 1984401 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -The subsection name for page-poison was typo'd as: - - vitio-balloon-device/page-poison - -Note the missing 'r' in virtio. - -When we have a machine type that enables page poison, and the guest -enables it (which needs a new kernel), things fail rather unpredictably. - -The fallout from this is that most of the other subsections fail to -load, including things like the feature bits in the device, one -possible fallout is that the physical addresses of the queues -then get aligned differently and we fail with an error about -last_avail_idx being wrong. -It's not obvious to me why this doesn't produce a more obvious failure, -but virtio's vmstate loading is a bit open-coded. - -Fixes: 7483cbbaf82 ("virtio-balloon: Implement support for page poison reporting feature") -bz: https://bugzilla.redhat.com/show_bug.cgi?id=1984401 -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20210914131716.102851-1-dgilbert@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: David Hildenbrand -(cherry picked from commit 243a9284a989a38e32ceb3990eb795f5cf6f3be0) ---- - hw/virtio/virtio-balloon.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c -index 4b5d9e5e50..ced070d64f 100644 ---- a/hw/virtio/virtio-balloon.c -+++ b/hw/virtio/virtio-balloon.c -@@ -850,7 +850,7 @@ static const VMStateDescription vmstate_virtio_balloon_free_page_hint = { - }; - - static const VMStateDescription vmstate_virtio_balloon_page_poison = { -- .name = "vitio-balloon-device/page-poison", -+ .name = "virtio-balloon-device/page-poison", - .version_id = 1, - .minimum_version_id = 1, - .needed = virtio_balloon_page_poison_support, --- -2.27.0 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 989bc02..786b6db 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -1,7 +1,7 @@ %global libfdt_version 1.6.0 %global libseccomp_version 2.4.0 %global libusbx_version 1.0.23 -%global meson_version 0.55.3-3 +%global meson_version 0.58.2 %global usbredir_version 0.7.1 %global ipxe_version 20200823-5.git4bd064de @@ -13,7 +13,7 @@ %global have_usbredir 1 %global have_opengl 1 -%global have_fdt 0 +%global have_fdt 1 %global have_modules_load 0 %global have_memlock_limits 0 # Some of these are not relevant for RHEL, but defining them @@ -86,7 +86,6 @@ %endif %ifarch %{power64} %global kvm_target ppc64 - %global have_fdt 1 %global have_memlock_limits 1 %endif %ifarch s390x @@ -95,11 +94,9 @@ %endif %ifarch ppc %global kvm_target ppc - %global have_fdt 1 %endif %ifarch aarch64 %global kvm_target aarch64 - %global have_fdt 1 %endif %global target_list %{kvm_target}-softmmu @@ -132,8 +129,8 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 6.1.0 -Release: 8%{?rcrel}%{?dist}%{?cc_suffix} +Version: 6.2.0 +Release: 1%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -158,44 +155,28 @@ Source31: kvm-x86.conf Source36: README.tests -Patch0004: 0004-Initial-redhat-build.patch -Patch0005: 0005-Enable-disable-devices-for-RHEL.patch -Patch0006: 0006-Machine-type-related-general-changes.patch -Patch0007: 0007-Add-aarch64-machine-types.patch -Patch0008: 0008-Add-ppc64-machine-types.patch -Patch0009: 0009-Add-s390x-machine-types.patch -Patch0010: 0010-Add-x86_64-machine-types.patch -Patch0011: 0011-Enable-make-check.patch -Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0013: 0013-Add-support-statement-to-help-output.patch -Patch0014: 0014-globally-limit-the-maximum-number-of-CPUs.patch -Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0016: 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0018: 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -# For bz#2002937 - [qemu][aarch64] Remove 9.0 machine types in arm virt for 9-Beta -Patch19: kvm-hw-arm-virt-Remove-9.0-machine-type.patch -# For bz#2000845 - RFE: Remove SGA, deprecate cirrus, and set defaults for QEMU machine-types in RHEL9 -Patch20: kvm-disable-sga-device.patch -# For bz#2005026 - [s390][virtio-fs] Umount virtiofs shared folder failure from guest side [rhel-9.0.0] -Patch21: kvm-tools-virtiofsd-Add-fstatfs64-syscall-to-the-seccomp.patch -# For bz#1998943 - Add machine type compatibility update for 6.1 rebase [s390x] -Patch22: kvm-redhat-Define-hw_compat_rhel_8_5.patch -# For bz#1998943 - Add machine type compatibility update for 6.1 rebase [s390x] -Patch23: kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch -# For bz#1984401 - fails to revert snapshot of a VM [balloon/page-poison] -Patch24: kvm-virtio-balloon-Fix-page-poison-subsection-name.patch -# For bz#1998942 - Add machine type compatibility update for 6.1 rebase [aarch64] -Patch25: kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch -# For bz#1996609 - Qemu hit core dump when dump guest memory during live migration -Patch26: kvm-migration-Make-migration-blocker-work-for-snapshots-.patch -# For bz#1996609 - Qemu hit core dump when dump guest memory during live migration -Patch27: kvm-migration-Add-migrate_add_blocker_internal.patch -# For bz#1996609 - Qemu hit core dump when dump guest memory during live migration -Patch28: kvm-dump-guest-memory-Block-live-migration.patch -Patch29: kvm-Fix-for-ppc64le-build.patch - -# Source-git patches +Patch0001: 0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch +Patch0005: 0005-Initial-redhat-build.patch +Patch0006: 0006-Enable-disable-devices-for-RHEL.patch +Patch0007: 0007-Machine-type-related-general-changes.patch +Patch0008: 0008-Add-aarch64-machine-types.patch +Patch0009: 0009-Add-ppc64-machine-types.patch +Patch0010: 0010-Add-s390x-machine-types.patch +Patch0011: 0011-Add-x86_64-machine-types.patch +Patch0012: 0012-Enable-make-check.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch +Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch +Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0019: 0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +Patch0020: 0020-Fix-virtio-net-pci-vectors-compat.patch +Patch0021: 0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch +Patch0022: 0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch +Patch0023: 0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch +Patch0024: 0024-redhat-Add-s390x-machine-type-compatibility-handling.patch +Patch0025: 0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch %if %{have_clang} BuildRequires: clang @@ -206,6 +187,7 @@ BuildRequires: compiler-rt BuildRequires: gcc %endif BuildRequires: meson >= %{meson_version} +BuildRequires: ninja-build BuildRequires: zlib-devel BuildRequires: glib2-devel BuildRequires: gnutls-devel @@ -265,7 +247,7 @@ BuildRequires: pkgconfig(gbm) BuildRequires: perl-Test-Harness BuildRequires: libslirp-devel BuildRequires: pulseaudio-libs-devel - +BuildRequires: spice-protocol # Requires for qemu-kvm package Requires: %{name}-core = %{epoch}:%{version}-%{release} @@ -464,6 +446,7 @@ mkdir -p %{qemu_kvm_build} %build %define disable_everything \\\ --audio-drv-list= \\\ + --disable-alsa \\\ --disable-attr \\\ --disable-auth-pam \\\ --disable-avx2 \\\ @@ -480,6 +463,7 @@ mkdir -p %{qemu_kvm_build} --disable-cfi-debug \\\ --disable-cloop \\\ --disable-cocoa \\\ + --disable-coreaudio \\\ --disable-coroutine-pool \\\ --disable-crypto-afalg \\\ --disable-curl \\\ @@ -489,10 +473,12 @@ mkdir -p %{qemu_kvm_build} --disable-debug-tcg \\\ --disable-dmg \\\ --disable-docs \\\ + --disable-dsound \\\ --disable-fdt \\\ --disable-fuse \\\ --disable-fuse-lseek \\\ --disable-gcrypt \\\ + --disable-gettext \\\ --disable-gio \\\ --disable-glusterfs \\\ --disable-gnutls \\\ @@ -502,8 +488,9 @@ mkdir -p %{qemu_kvm_build} --disable-hax \\\ --disable-hvf \\\ --disable-iconv \\\ - --disable-jemalloc \\\ + --disable-jack \\\ --disable-kvm \\\ + --disable-l2tpv3 \\\ --disable-libdaxctl \\\ --disable-libiscsi \\\ --disable-libnfs \\\ @@ -530,6 +517,8 @@ mkdir -p %{qemu_kvm_build} --disable-numa \\\ --disable-nvmm \\\ --disable-opengl \\\ + --disable-oss \\\ + --disable-pa \\\ --disable-parallels \\\ --disable-pie \\\ --disable-pvrdma \\\ @@ -545,16 +534,17 @@ mkdir -p %{qemu_kvm_build} --disable-sdl \\\ --disable-sdl-image \\\ --disable-seccomp \\\ + --disable-selinux \\\ --disable-slirp \\\ --disable-slirp-smbd \\\ --disable-smartcard \\\ --disable-snappy \\\ --disable-sparse \\\ --disable-spice \\\ + --disable-spice-protocol \\\ --disable-strip \\\ --disable-system \\\ --disable-tcg \\\ - --disable-tcmalloc \\\ --disable-tools \\\ --disable-tpm \\\ --disable-u2f \\\ @@ -603,7 +593,7 @@ run_configure() { --docdir="%{_docdir}" \ --libexecdir="%{_libexecdir}" \ --extra-ldflags="%{build_ldflags}" \ - --extra-cflags="%{optflags}" \ + --extra-cflags="%{optflags} -Wno-string-plus-int" \ --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ @@ -634,19 +624,18 @@ run_configure \ %if %{defined block_drivers_ro_list} --block-drv-ro-whitelist=%{block_drivers_ro_list} \ %endif - --audio-drv-list=pa \ --enable-attr \ %ifarch %{ix86} x86_64 --enable-avx2 \ %endif --enable-cap-ng \ - --enable-capstone \ + --enable-capstone=internal \ --enable-coroutine-pool \ --enable-curl \ --enable-debug-info \ --enable-docs \ %if %{have_fdt} - --enable-fdt \ + --enable-fdt=system \ %endif --enable-gnutls \ --enable-guest-agent \ @@ -669,6 +658,7 @@ run_configure \ %if %{have_opengl} --enable-opengl \ %endif + --enable-pa \ --enable-pie \ %if %{have_block_rbd} --enable-rbd \ @@ -677,8 +667,10 @@ run_configure \ --enable-rdma \ %endif --enable-seccomp \ + --enable-selinux \ --enable-slirp=system \ --enable-snappy \ + --enable-spice-protocol \ --enable-system \ --enable-tcg \ --enable-tools \ @@ -796,7 +788,7 @@ install -D -p -m 0644 %{modprobe_kvm_conf} $RPM_BUILD_ROOT%{_sysconfdir}/modprob # Create new directories and put them all under tests-src mkdir -p %{buildroot}%{testsdir}/python mkdir -p %{buildroot}%{testsdir}/tests -mkdir -p %{buildroot}%{testsdir}/tests/acceptance +mkdir -p %{buildroot}%{testsdir}/tests/avocado mkdir -p %{buildroot}%{testsdir}/tests/qemu-iotests mkdir -p %{buildroot}%{testsdir}/scripts/qmp @@ -805,7 +797,7 @@ install -m 0644 scripts/dump-guest-memory.py \ %{buildroot}%{_datadir}/%{name} # Install avocado_qemu tests -cp -R %{qemu_kvm_build}/tests/acceptance/* %{buildroot}%{testsdir}/tests/acceptance/ +cp -R %{qemu_kvm_build}/tests/avocado/* %{buildroot}%{testsdir}/tests/avocado/ # Install qemu.py and qmp/ scripts required to run avocado_qemu tests cp -R %{qemu_kvm_build}/python/qemu %{buildroot}%{testsdir}/python @@ -921,6 +913,7 @@ rm -rf %{buildroot}%{_mandir}/man1/virtfs-proxy-helper* rm -rf %{buildroot}%{_datadir}/%{name}/kvmvapic.bin rm -rf %{buildroot}%{_datadir}/%{name}/linuxboot.bin rm -rf %{buildroot}%{_datadir}/%{name}/multiboot.bin + rm -rf %{buildroot}%{_datadir}/%{name}/multiboot_dma.bin rm -rf %{buildroot}%{_datadir}/%{name}/pvh.bin %endif @@ -989,7 +982,6 @@ popd %postun -n qemu-guest-agent %systemd_postun_with_restart qemu-guest-agent.service - %if !%{tools_only} %post common getent group kvm >/dev/null || groupadd -g 36 -r kvm @@ -1089,6 +1081,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %ifarch x86_64 %{_datadir}/%{name}/linuxboot.bin %{_datadir}/%{name}/multiboot.bin + %{_datadir}/%{name}/multiboot_dma.bin %{_datadir}/%{name}/kvmvapic.bin %{_datadir}/%{name}/pvh.bin %endif @@ -1162,6 +1155,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Dec 16 2021 Miroslav Rezanina - 6.2.0-1 +- Rebase to QEMU 6.2.0 [bz#2027697] +- Resolves: bz#2027697 + (Rebase to QEMU 6.2.0) + * Wed Nov 24 2021 Miroslav Rezanina - 6.1.0-8 - kvm-Move-ksmtuned-files-to-separate-package.patch [bz#1971678] - Resolves: bz#1971678 diff --git a/sources b/sources index 00e7175..f1545cf 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-6.1.0.tar.xz) = 3378ae21c75b77ee6a759827f1fcf7b2a50a0fef07e3b0e89117108022a8d8655fa977e4d65596f4f24f7c735c6594d44b0c6f69732ea4465e88a7406b1d5d3c +SHA512 (qemu-6.2.0.tar.xz) = e9f8231c9e1cfcc41cb47f10a55d63f6b8aee307af00cf6acf64acb7aa4f49fa7e9d6330703a2abea15d8b7bbaba7d3cb08c83edd98d82642367b527df730817 From 34dbcea312bd24bd2ccf57ef91f46375afe80824 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 7 Jan 2022 01:25:13 -0500 Subject: [PATCH 144/195] * Fri Jan 07 2022 Miroslav Rezanina - 6.2.0-2 - kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch [bz#2008060] - kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch [bz#2014484] - Resolves: bz#2008060 (Fix CPU Model for new IBM Z Hardware - qemu part) - Resolves: bz#2014484 ([RHEL9] Enable virtio-mem as tech-preview on x86-64 - QEMU) --- ....6.0-and-rhel9.0.0-machine-types-for.patch | 82 +++++++++++++++++++ ...virtio-mem-as-tech-preview-on-x86-64.patch | 43 ++++++++++ qemu-kvm.spec | 14 +++- 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch create mode 100644 kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch diff --git a/kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch b/kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch new file mode 100644 index 0000000..407aa1e --- /dev/null +++ b/kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch @@ -0,0 +1,82 @@ +From 0e85c10b73c36f386723c842a797d5e2155e758f Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 10 Dec 2021 10:07:40 +0100 +Subject: [PATCH 1/2] redhat: Add rhel8.6.0 and rhel9.0.0 machine types for + s390x +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 55: redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x +RH-Commit: [1/1] ad8fd5c825ae52a8fbb3a28f700a514509c59978 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2008060 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2008060 + +The new machine types have better default values for the upcoming +"generation 16" mainframe. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 26 +++++++++++++++++++++++++- + 1 file changed, 25 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index c654045964..9da6e9b1d4 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1103,10 +1103,33 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++static void ccw_machine_rhel900_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel900_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); ++ ++static void ccw_machine_rhel860_instance_options(MachineState *machine) ++{ ++ /* Note: The -rhel8.6.0 and -rhel9.0.0 machines are technically identical */ ++ ccw_machine_rhel900_instance_options(machine); ++} ++ ++static void ccw_machine_rhel860_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel900_class_options(mc); ++} ++DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", false); ++ + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; + ++ ccw_machine_rhel860_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); + + s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); +@@ -1118,10 +1141,11 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + mc->smp_props.prefer_sockets = true; + } +-DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); + + static void ccw_machine_rhel840_instance_options(MachineState *machine) + { +-- +2.27.0 + diff --git a/kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch b/kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch new file mode 100644 index 0000000..d3b2ba0 --- /dev/null +++ b/kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch @@ -0,0 +1,43 @@ +From 24b44713de4b2a47c42dfeea813c8911694367a0 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Fri, 17 Dec 2021 10:00:53 +0100 +Subject: [PATCH 2/2] redhat: Enable virtio-mem as tech-preview on x86-64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: David Hildenbrand +RH-MergeRequest: 56: redhat: Enable virtio-mem as tech-preview on x86-64 +RH-Commit: [1/1] 255749cd877b91ce1978b67a088f7c0e181fd85e +RH-Bugzilla: 2014484 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Thomas Huth +RH-Acked-by: Gavin Shan + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2014484 +Upstream-status: RHEL-only + +Everything necesssary for tech-preview of virtio-mem in RHEL9 is +included in QEMU v6.2. Let's enable it via the config option +CONFIG_VIRTIO_MEM on x86-64. + +Signed-off-by: David Hildenbrand +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index 1f7a9ab024..dc03fbb671 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -88,6 +88,7 @@ CONFIG_VGA_CIRRUS=y + CONFIG_VGA_PCI=y + CONFIG_VHOST_USER=y + CONFIG_VHOST_USER_BLK=y ++CONFIG_VIRTIO_MEM=y + CONFIG_VIRTIO_PCI=y + CONFIG_VIRTIO_VGA=y + CONFIG_VMMOUSE=y +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 786b6db..f7ab7dc 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -130,7 +130,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 1%{?rcrel}%{?dist}%{?cc_suffix} +Release: 2%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -177,6 +177,10 @@ Patch0022: 0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch Patch0023: 0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch Patch0024: 0024-redhat-Add-s390x-machine-type-compatibility-handling.patch Patch0025: 0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch +# For bz#2008060 - Fix CPU Model for new IBM Z Hardware - qemu part +Patch26: kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch +# For bz#2014484 - [RHEL9] Enable virtio-mem as tech-preview on x86-64 - QEMU +Patch27: kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch %if %{have_clang} BuildRequires: clang @@ -1155,6 +1159,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Jan 07 2022 Miroslav Rezanina - 6.2.0-2 +- kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch [bz#2008060] +- kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch [bz#2014484] +- Resolves: bz#2008060 + (Fix CPU Model for new IBM Z Hardware - qemu part) +- Resolves: bz#2014484 + ([RHEL9] Enable virtio-mem as tech-preview on x86-64 - QEMU) + * Thu Dec 16 2021 Miroslav Rezanina - 6.2.0-1 - Rebase to QEMU 6.2.0 [bz#2027697] - Resolves: bz#2027697 From e9000b6628510946ca18aa055e65096c0312fe0b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 10 Jan 2022 02:27:30 -0500 Subject: [PATCH 145/195] * Mon Jan 10 2022 Miroslav Rezanina - 6.2.0-3 - kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch [bz#2031044] - kvm-hw-arm-virt-Register-its-as-a-class-property.patch [bz#2031044] - kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch [bz#2031044] - kvm-hw-arm-virt-Expose-the-RAS-option.patch [bz#2031044] - kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch [bz#2031044] - kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch [bz#2031044] - Resolves: bz#2031044 (Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64]) --- ...-9.0-machine-type-and-remove-8.5-one.patch | 48 ++++++++++ ...k-no_tcg_its-and-minor-style-changes.patch | 88 +++++++++++++++++++ kvm-hw-arm-virt-Expose-the-RAS-option.patch | 60 +++++++++++++ ...t-Register-iommu-as-a-class-property.patch | 81 +++++++++++++++++ ...irt-Register-its-as-a-class-property.patch | 57 ++++++++++++ ...virt-Rename-default_bus_bypass_iommu.patch | 46 ++++++++++ qemu-kvm.spec | 24 ++++- 7 files changed, 403 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch create mode 100644 kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch create mode 100644 kvm-hw-arm-virt-Expose-the-RAS-option.patch create mode 100644 kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch create mode 100644 kvm-hw-arm-virt-Register-its-as-a-class-property.patch create mode 100644 kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch diff --git a/kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch b/kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch new file mode 100644 index 0000000..390a91c --- /dev/null +++ b/kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch @@ -0,0 +1,48 @@ +From 6b0e129f0758ccd076d1ecbf85c8f1e863788981 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 10:11:47 +0100 +Subject: [PATCH 5/6] hw/arm/virt: Add 9.0 machine type and remove 8.5 one + +RH-Author: Eric Auger +RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one +RH-Commit: [5/6] f573a2fb44882a010e2c6bf5f561f29d54e6e9b5 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2031044 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Miroslav Rezanina + +branch: c9s +Brew: 42213566 +Upstream: no + +Add 9.0 machine type and remove 8.5 one. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d433139479..d537706a86 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3236,14 +3236,8 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + +-static void rhel850_virt_options(MachineClass *mc) ++static void rhel900_virt_options(MachineClass *mc) + { +- VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); +- + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); +- compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); +- mc->smp_props.prefer_sockets = true; +- vmc->no_cpu_topology = true; +- vmc->no_tcg_its = true; + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch b/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch new file mode 100644 index 0000000..0e4acf4 --- /dev/null +++ b/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch @@ -0,0 +1,88 @@ +From 4098f7b5aea8871a655bab43d5114d067662e6c5 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jan 2022 16:17:10 +0100 +Subject: [PATCH 6/6] hw/arm/virt: Check no_tcg_its and minor style changes + +RH-Author: Eric Auger +RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one +RH-Commit: [6/6] 4480d569463fd3f637404539d3bd06b59cafbc88 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2031044 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Miroslav Rezanina + +branch: c9s +Brew: 42213566 +Upstream: no + +Truly allow TCG ITS instantiation according to the no_tcg_its +class flag. Otherwise it is always set to false. + +We also take benefit of this patch to do some minor non +functional style changes to be closer to the upstream code. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d537706a86..0e691cbe81 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3163,6 +3163,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "in ACPI table header." + "The string may be up to 6 bytes in size"); + ++ + object_class_property_add_str(oc, "x-oem-table-id", + virt_get_oem_table_id, + virt_set_oem_table_id); +@@ -3170,6 +3171,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); ++ + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3194,26 +3196,32 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; ++ ++ if (vmc->no_tcg_its) { ++ vms->tcg_its = false; ++ } else { ++ vms->tcg_its = true; ++ } + } + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; + ++ /* The default root bus is attached to iommu by default */ ++ vms->default_bus_bypass_iommu = false; ++ + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; + + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + +- /* The default root bus is attached to iommu by default */ +- vms->default_bus_bypass_iommu = false; +- + vms->irqmap = a15irqmap; + + virt_flash_create(vms); ++ + vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); +- + } + + static const TypeInfo rhel_machine_info = { +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Expose-the-RAS-option.patch b/kvm-hw-arm-virt-Expose-the-RAS-option.patch new file mode 100644 index 0000000..e0365c9 --- /dev/null +++ b/kvm-hw-arm-virt-Expose-the-RAS-option.patch @@ -0,0 +1,60 @@ +From 914d9f9eea5d0a944aa93682b03d3189ad37ec9b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:34:22 +0100 +Subject: [PATCH 4/6] hw/arm/virt: Expose the 'RAS' option + +RH-Author: Eric Auger +RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one +RH-Commit: [4/6] c8704564d31b23a0f08a6ced946c9a81e2e72c11 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2031044 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Miroslav Rezanina + +branch: c9s +Brew: 42213566 +Upstream: no + +In RHEL9.0 we want to expose the 'RAS' option. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c99ca93e75..d433139479 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2348,7 +2348,6 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + visit_type_OnOffAuto(v, name, &vms->acpi, errp); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_ras(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2363,6 +2362,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) + vms->ras = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_mte(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -3143,6 +3143,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + ++ object_class_property_add_bool(oc, "ras", virt_get_ras, ++ virt_set_ras); ++ object_class_property_set_description(oc, "ras", ++ "Set on/off to enable/disable reporting host memory errors " ++ "to a KVM guest using ACPI and guest external abort exceptions"); ++ + object_class_property_add_bool(oc, "its", virt_get_its, + virt_set_its); + object_class_property_set_description(oc, "its", +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch b/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch new file mode 100644 index 0000000..6c21c9a --- /dev/null +++ b/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch @@ -0,0 +1,81 @@ +From 545076d67ef27203e08538123d8bc3798caf9505 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:50:44 +0100 +Subject: [PATCH 1/6] hw/arm/virt: Register "iommu" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one +RH-Commit: [1/6] 2b6a22ebddd2da7505961ff4ffe90424f7489300 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2031044 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Miroslav Rezanina + +branch: c9s +Brew: 42213566 +Upstream: no + +Register the "iommu" option as a class property. This mirrors what +was done in upstream commit b91def7b ("arm/virt: Register +most properties as class properties"). + +While we are at it we also move the "x-oem-id" and "x-oem-table-id" +registrations at the very end of the rhel_machine_class_init() +function. This makes our life easier when comparing with upstream. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6ba9a2c2e1..7e227b1fa4 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3131,6 +3131,18 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set GIC version. " + "Valid values are 2, 3, host and max"); + ++ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); ++ object_class_property_set_description(oc, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ "Set on/off to enable/disable " ++ "bypass_iommu for default root bus"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3146,13 +3158,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", +- virt_get_default_bus_bypass_iommu, +- virt_set_default_bus_bypass_iommu); +- object_class_property_set_description(oc, "default_bus_bypass_iommu", +- "Set on/off to enable/disable " +- "bypass_iommu for default root bus"); +- + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3186,10 +3191,6 @@ static void rhel_virt_instance_init(Object *obj) + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; +- object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); +- object_property_set_description(obj, "iommu", +- "Set the IOMMU type. " +- "Valid values are none and smmuv3"); + + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Register-its-as-a-class-property.patch b/kvm-hw-arm-virt-Register-its-as-a-class-property.patch new file mode 100644 index 0000000..95f58a4 --- /dev/null +++ b/kvm-hw-arm-virt-Register-its-as-a-class-property.patch @@ -0,0 +1,57 @@ +From 789933e2598f9a525c2a638feca974ca1730a859 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 16:04:59 +0100 +Subject: [PATCH 2/6] hw/arm/virt: Register "its" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one +RH-Commit: [2/6] dbd3e994553f00cd19842824f6bd763863a4e484 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2031044 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Miroslav Rezanina + +branch: c9s +Brew: 42213566 +Upstream: no + +Register "its" as a class property. This mirrors what was done +in commit 27edeeaafe43 ("virt: Register "its" as class property"). + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 7e227b1fa4..984151b7dd 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3143,6 +3143,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + ++ object_class_property_add_bool(oc, "its", virt_get_its, ++ virt_set_its); ++ object_class_property_set_description(oc, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3182,11 +3188,6 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; +- object_property_add_bool(obj, "its", virt_get_its, +- virt_set_its); +- object_property_set_description(obj, "its", +- "Set on/off to enable/disable " +- "ITS instantiation"); + } + + /* Default disallows iommu instantiation */ +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch b/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch new file mode 100644 index 0000000..eab69d2 --- /dev/null +++ b/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch @@ -0,0 +1,46 @@ +From 94987c271c3bdc37216c5baa4c5766b9b7f053a1 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:58:38 +0100 +Subject: [PATCH 3/6] hw/arm/virt: Rename default_bus_bypass_iommu + +RH-Author: Eric Auger +RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one +RH-Commit: [3/6] 6b66844ae4629d8c879f7c6abbc5e5017a162e16 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2031044 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Miroslav Rezanina + +branch: c9s +Brew: 42213566 +Upstream: no + +Rename "default_bus_bypass_iommu" into "default-bus-bypass-iommu". +This mirrors what was done in upstream commit: +9dad363a223 ("hw/arm/virt: Rename default_bus_bypass_iommu") + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 984151b7dd..c99ca93e75 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3136,10 +3136,10 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set the IOMMU type. " + "Valid values are none and smmuv3"); + +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ object_class_property_add_bool(oc, "default-bus-bypass-iommu", + virt_get_default_bus_bypass_iommu, + virt_set_default_bus_bypass_iommu); +- object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ object_class_property_set_description(oc, "default-bus-bypass-iommu", + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index f7ab7dc..d35d448 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -130,7 +130,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 2%{?rcrel}%{?dist}%{?cc_suffix} +Release: 3%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -181,6 +181,18 @@ Patch0025: 0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch Patch26: kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch # For bz#2014484 - [RHEL9] Enable virtio-mem as tech-preview on x86-64 - QEMU Patch27: kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch +# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] +Patch28: kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch +# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] +Patch29: kvm-hw-arm-virt-Register-its-as-a-class-property.patch +# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] +Patch30: kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch +# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] +Patch31: kvm-hw-arm-virt-Expose-the-RAS-option.patch +# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] +Patch32: kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch +# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] +Patch33: kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch %if %{have_clang} BuildRequires: clang @@ -1159,6 +1171,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jan 10 2022 Miroslav Rezanina - 6.2.0-3 +- kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch [bz#2031044] +- kvm-hw-arm-virt-Register-its-as-a-class-property.patch [bz#2031044] +- kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch [bz#2031044] +- kvm-hw-arm-virt-Expose-the-RAS-option.patch [bz#2031044] +- kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch [bz#2031044] +- kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch [bz#2031044] +- Resolves: bz#2031044 + (Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64]) + * Fri Jan 07 2022 Miroslav Rezanina - 6.2.0-2 - kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch [bz#2008060] - kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch [bz#2014484] From 365a1410b6a94f099d06cd77b11c7b7b8483760c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 17 Jan 2022 06:13:57 -0500 Subject: [PATCH 146/195] * Mon Jan 17 2022 Miroslav Rezanina - 6.2.0-4 - kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch [bz#2024544] - kvm-rhel-machine-types-x86-set-prefer_sockets.patch [bz#2028623] - Resolves: bz#2024544 (Fio workers hangs when running fio with 32 jobs iodepth 32 and QEMU's userspace NVMe driver) - Resolves: bz#2028623 ([9.0] machine types: 6.2: Fix prefer_sockets) --- ...nfinite-loop-in-nvme_free_req_queue_.patch | 71 +++++++++++++++++++ ...machine-types-x86-set-prefer_sockets.patch | 52 ++++++++++++++ qemu-kvm.spec | 16 ++++- 3 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch create mode 100644 kvm-rhel-machine-types-x86-set-prefer_sockets.patch diff --git a/kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch b/kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch new file mode 100644 index 0000000..bdebdc3 --- /dev/null +++ b/kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch @@ -0,0 +1,71 @@ +From 6989be9d0aa08470f8b287c243dc4bf027d5fbcf Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 8 Dec 2021 15:22:46 +0000 +Subject: [PATCH 1/2] block/nvme: fix infinite loop in nvme_free_req_queue_cb() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 58: block/nvme: fix infinite loop in nvme_free_req_queue_cb() +RH-Commit: [1/1] 544b3f310d791a20c63b51947de0c6cbb60b0d5b (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2024544 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Hanna Reitz + +When the request free list is exhausted the coroutine waits on +q->free_req_queue for the next free request. Whenever a request is +completed a BH is scheduled to invoke nvme_free_req_queue_cb() and wake +up waiting coroutines. + +1. nvme_get_free_req() waits for a free request: + + while (q->free_req_head == -1) { + ... + trace_nvme_free_req_queue_wait(q->s, q->index); + qemu_co_queue_wait(&q->free_req_queue, &q->lock); + ... + } + +2. nvme_free_req_queue_cb() wakes up the coroutine: + + while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) { + ^--- infinite loop when free_req_head == -1 + } + +nvme_free_req_queue_cb() and the coroutine form an infinite loop when +q->free_req_head == -1. Fix this by checking q->free_req_head in +nvme_free_req_queue_cb(). If the free request list is exhausted, don't +wake waiting coroutines. Eventually an in-flight request will complete +and the BH will be scheduled again, guaranteeing forward progress. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20211208152246.244585-1-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit cf4fbc3030c974fff726756a7ceef8386cdf500b) +Signed-off-by: Stefan Hajnoczi +--- + block/nvme.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/nvme.c b/block/nvme.c +index e4f336d79c..fa360b9b3c 100644 +--- a/block/nvme.c ++++ b/block/nvme.c +@@ -206,8 +206,9 @@ static void nvme_free_req_queue_cb(void *opaque) + NVMeQueuePair *q = opaque; + + qemu_mutex_lock(&q->lock); +- while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) { +- /* Retry all pending requests */ ++ while (q->free_req_head != -1 && ++ qemu_co_enter_next(&q->free_req_queue, &q->lock)) { ++ /* Retry waiting requests */ + } + qemu_mutex_unlock(&q->lock); + } +-- +2.27.0 + diff --git a/kvm-rhel-machine-types-x86-set-prefer_sockets.patch b/kvm-rhel-machine-types-x86-set-prefer_sockets.patch new file mode 100644 index 0000000..83c912d --- /dev/null +++ b/kvm-rhel-machine-types-x86-set-prefer_sockets.patch @@ -0,0 +1,52 @@ +From ecadfaec992fda7f485522c9ee6e7c9b05614a22 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 7 Dec 2021 18:39:47 +0000 +Subject: [PATCH 2/2] rhel machine types/x86: set prefer_sockets + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 59: rhel machine types/x86: set prefer_sockets +RH-Commit: [1/1] 9bcd9e2c95154e39ef30a8a342ad6c713fa4f1fb (dagrh/c-9-s-qemu-kvm) +RH-Bugzilla: 2028623 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Acked-by: Cornelia Huck + +When I fixed up the machine types for 8.5 I missed the + prefer_sockets = true + +add them in; it looks like Power, ARM already have them, and I see them +in thuth's s390 patch. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 183b5d5464..fccb7f5fc9 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -973,6 +973,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; + m->is_default = 1; ++ m->smp_props.prefer_sockets = true; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 0e7e885e78..3b748ddd7b 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -662,6 +662,7 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, + pc_rhel_8_5_compat_len); ++ m->smp_props.prefer_sockets = true; + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index d35d448..7732d76 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -130,7 +130,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 3%{?rcrel}%{?dist}%{?cc_suffix} +Release: 4%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -193,6 +193,12 @@ Patch31: kvm-hw-arm-virt-Expose-the-RAS-option.patch Patch32: kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch # For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] Patch33: kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch +# For bz#2024544 - Fio workers hangs when running fio with 32 jobs iodepth 32 and QEMU's userspace NVMe driver +Patch34: kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch +# For bz#2028623 - [9.0] machine types: 6.2: Fix prefer_sockets +Patch35: kvm-rhel-machine-types-x86-set-prefer_sockets.patch + +# Source-git patches %if %{have_clang} BuildRequires: clang @@ -1171,6 +1177,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jan 17 2022 Miroslav Rezanina - 6.2.0-4 +- kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch [bz#2024544] +- kvm-rhel-machine-types-x86-set-prefer_sockets.patch [bz#2028623] +- Resolves: bz#2024544 + (Fio workers hangs when running fio with 32 jobs iodepth 32 and QEMU's userspace NVMe driver) +- Resolves: bz#2028623 + ([9.0] machine types: 6.2: Fix prefer_sockets) + * Mon Jan 10 2022 Miroslav Rezanina - 6.2.0-3 - kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch [bz#2031044] - kvm-hw-arm-virt-Register-its-as-a-class-property.patch [bz#2031044] From 419d9c867bfc69df094b6cc59a0b37e529fe254b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 25 Jan 2022 00:23:55 -0500 Subject: [PATCH 147/195] * Tue Jan 25 2022 Miroslav Rezanina - 6.2.0-5 - kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch [bz#1945666] - kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch [bz#1945666] - kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch [bz#2036669] - Resolves: bz#1945666 (9.0: x86 machine types) - Resolves: bz#2036669 (DEVICE_DELETED event is not delivered for device frontend if -device is configured via JSON) --- ...ce-deletion-events-with-device-JSON-.patch | 130 ++++++++++++++++++ kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch | 65 +++++++++ kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch | 75 ++++++++++ qemu-kvm.spec | 17 ++- 4 files changed, 286 insertions(+), 1 deletion(-) create mode 100644 kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch create mode 100644 kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch create mode 100644 kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch diff --git a/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch b/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch new file mode 100644 index 0000000..4ddfbe9 --- /dev/null +++ b/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch @@ -0,0 +1,130 @@ +From 005339f7deaee639c38d30e5bf2235c292ce3937 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Wed, 5 Jan 2022 12:38:47 +0000 +Subject: [PATCH 3/3] softmmu: fix device deletion events with -device JSON + syntax +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 62: Fix hot unplug of devices created with -device JSON syntax +RH-Commit: [1/1] 980e505ba215b5f9324c107481c5bb257ae03f42 (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 2036669 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Laurent Vivier +RH-Acked-by: Jano Tomko + +The -device JSON syntax impl leaks a reference on the created +DeviceState instance. As a result when you hot-unplug the +device, the device_finalize method won't be called and thus +it will fail to emit the required DEVICE_DELETED event. + +A 'json-cli' feature was previously added against the +'device_add' QMP command QAPI schema to indicated to mgmt +apps that -device supported JSON syntax. Given the hotplug +bug that feature flag is not usable for its purpose, so +we add a new 'json-cli-hotplug' feature to indicate the +-device supports JSON without breaking hotplug. + +Fixes: 5dacda5167560b3af8eadbce5814f60ba44b467e +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/802 +Signed-off-by: Daniel P. Berrangé +Message-Id: <20220105123847.4047954-2-berrange@redhat.com> +Reviewed-by: Laurent Vivier +Tested-by: Ján Tomko +Reviewed-by: Thomas Huth +Signed-off-by: Kevin Wolf +(cherry picked from commit 64b4529a432507ee84a924be69a03432639e87ba) +Signed-off-by: Kevin Wolf +--- + qapi/qdev.json | 5 ++++- + softmmu/vl.c | 4 +++- + tests/qtest/device-plug-test.c | 19 +++++++++++++++++++ + 3 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/qapi/qdev.json b/qapi/qdev.json +index 69656b14df..26cd10106b 100644 +--- a/qapi/qdev.json ++++ b/qapi/qdev.json +@@ -44,6 +44,9 @@ + # @json-cli: If present, the "-device" command line option supports JSON + # syntax with a structure identical to the arguments of this + # command. ++# @json-cli-hotplug: If present, the "-device" command line option supports JSON ++# syntax without the reference counting leak that broke ++# hot-unplug + # + # Notes: + # +@@ -74,7 +77,7 @@ + { 'command': 'device_add', + 'data': {'driver': 'str', '*bus': 'str', '*id': 'str'}, + 'gen': false, # so we can get the additional arguments +- 'features': ['json-cli'] } ++ 'features': ['json-cli', 'json-cli-hotplug'] } + + ## + # @device_del: +diff --git a/softmmu/vl.c b/softmmu/vl.c +index d46b8fb4ab..b3829e2edd 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -2690,6 +2690,7 @@ static void qemu_create_cli_devices(void) + qemu_opts_foreach(qemu_find_opts("device"), + device_init_func, NULL, &error_fatal); + QTAILQ_FOREACH(opt, &device_opts, next) { ++ DeviceState *dev; + loc_push_restore(&opt->loc); + /* + * TODO Eventually we should call qmp_device_add() here to make sure it +@@ -2698,7 +2699,8 @@ static void qemu_create_cli_devices(void) + * from the start, so call qdev_device_add_from_qdict() directly for + * now. + */ +- qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ object_unref(OBJECT(dev)); + loc_pop(&opt->loc); + } + rom_reset_order_override(); +diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c +index 559d47727a..ad79bd4c14 100644 +--- a/tests/qtest/device-plug-test.c ++++ b/tests/qtest/device-plug-test.c +@@ -77,6 +77,23 @@ static void test_pci_unplug_request(void) + qtest_quit(qtest); + } + ++static void test_pci_unplug_json_request(void) ++{ ++ QTestState *qtest = qtest_initf( ++ "-device '{\"driver\": \"virtio-mouse-pci\", \"id\": \"dev0\"}'"); ++ ++ /* ++ * Request device removal. As the guest is not running, the request won't ++ * be processed. However during system reset, the removal will be ++ * handled, removing the device. ++ */ ++ device_del(qtest, "dev0"); ++ system_reset(qtest); ++ wait_device_deleted_event(qtest, "dev0"); ++ ++ qtest_quit(qtest); ++} ++ + static void test_ccw_unplug(void) + { + QTestState *qtest = qtest_initf("-device virtio-balloon-ccw,id=dev0"); +@@ -145,6 +162,8 @@ int main(int argc, char **argv) + */ + qtest_add_func("/device-plug/pci-unplug-request", + test_pci_unplug_request); ++ qtest_add_func("/device-plug/pci-unplug-json-request", ++ test_pci_unplug_json_request); + + if (!strcmp(arch, "s390x")) { + qtest_add_func("/device-plug/ccw-unplug", +-- +2.27.0 + diff --git a/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch b/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch new file mode 100644 index 0000000..eb3273c --- /dev/null +++ b/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch @@ -0,0 +1,65 @@ +From 1b8eeb1323fa21c7b26d0396fae5ae4a8cdb1ace Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 11 Jan 2022 18:29:31 +0000 +Subject: [PATCH 1/3] x86: Add q35 RHEL 8.6.0 machine type + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 61: x86: Add rhel 8.6.0 & 9.0.0 machine types +RH-Commit: [1/2] 189335cf0e4ad117e3e401f23aa07cddbbac50df (dagrh/c-9-s-qemu-kvm) +RH-Bugzilla: 1945666 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cornelia Huck + +Add the new 8.6.0 machine type; note that while the -AV +notation has gone in the product naming, just keep the smbios +definitions the same for consistency. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_q35.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 3b748ddd7b..0c25305f15 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -646,6 +646,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel860(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel860_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.6.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, ++ pc_q35_machine_rhel860_options); ++ ++ + static void pc_q35_init_rhel850(MachineState *machine) + { + pc_q35_init(machine); +@@ -654,8 +672,9 @@ static void pc_q35_init_rhel850(MachineState *machine) + static void pc_q35_machine_rhel850_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel860_options(m); + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_5, +-- +2.27.0 + diff --git a/kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch b/kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch new file mode 100644 index 0000000..4367495 --- /dev/null +++ b/kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch @@ -0,0 +1,75 @@ +From 3d5024fb9c904a649d07f0def3a90b3d36611215 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 12 Jan 2022 13:21:57 +0000 +Subject: [PATCH 2/3] x86: Add q35 RHEL 9.0.0 machine type + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 61: x86: Add rhel 8.6.0 & 9.0.0 machine types +RH-Commit: [2/2] 743378502459b978efd632271f97ddb824422203 (dagrh/c-9-s-qemu-kvm) +RH-Bugzilla: 1945666 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cornelia Huck + +Add a rhel-9.0.0 q35 machine type; it's currently identical to 8.6.0; +but having a separate machine type will make life easier in the future +when the 8.x types go away. + +Note: The smbios stream product name has now changed to 'RHEL' + +bz: https://bugzilla.redhat.com/show_bug.cgi?id=1945666 +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_q35.c | 21 +++++++++++++++++++-- + 1 file changed, 19 insertions(+), 2 deletions(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 0c25305f15..bf9ad32f0e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -646,6 +646,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel900(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel900_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.0.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, ++ pc_q35_machine_rhel900_options); ++ + static void pc_q35_init_rhel860(MachineState *machine) + { + pc_q35_init(machine); +@@ -654,8 +671,9 @@ static void pc_q35_init_rhel860(MachineState *machine) + static void pc_q35_machine_rhel860_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel900_options(m); + m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.6.0"; + } +@@ -674,7 +692,6 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel860_options(m); + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; +- m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_5, +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 7732d76..01e6284 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -130,7 +130,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 4%{?rcrel}%{?dist}%{?cc_suffix} +Release: 5%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -197,6 +197,12 @@ Patch33: kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch Patch34: kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch # For bz#2028623 - [9.0] machine types: 6.2: Fix prefer_sockets Patch35: kvm-rhel-machine-types-x86-set-prefer_sockets.patch +# For bz#1945666 - 9.0: x86 machine types +Patch36: kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch +# For bz#1945666 - 9.0: x86 machine types +Patch37: kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch +# For bz#2036669 - DEVICE_DELETED event is not delivered for device frontend if -device is configured via JSON +Patch38: kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch # Source-git patches @@ -1177,6 +1183,15 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Jan 25 2022 Miroslav Rezanina - 6.2.0-5 +- kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch [bz#1945666] +- kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch [bz#1945666] +- kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch [bz#2036669] +- Resolves: bz#1945666 + (9.0: x86 machine types) +- Resolves: bz#2036669 + (DEVICE_DELETED event is not delivered for device frontend if -device is configured via JSON) + * Mon Jan 17 2022 Miroslav Rezanina - 6.2.0-4 - kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch [bz#2024544] - kvm-rhel-machine-types-x86-set-prefer_sockets.patch [bz#2028623] From 7434237517fee9b4aa4c96335a2f4e1a64701731 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Feb 2022 00:13:18 -0500 Subject: [PATCH 148/195] * Wed Feb 02 2022 Miroslav Rezanina - 6.2.0-6 - Moving feature support out of qemu-kvm-core to separate packages (can cause loss of functionality when using only qemu-kvm-core - qemu-kvm keeps same feature set). - kvm-spec-Rename-qemu-kvm-hw-usbredir-to-qemu-kvm-device-.patch [bz#2022847] - kvm-spec-Split-qemu-kvm-ui-opengl.patch [bz#2022847] - kvm-spec-Introduce-packages-for-virtio-gpu-modules.patch [bz#2022847] - kvm-spec-Introduce-device-display-virtio-vga-packages.patch [bz#2022847] - kvm-spec-Move-usb-host-module-to-separate-package.patch [bz#2022847] - kvm-spec-Move-qtest-accel-module-to-tests-package.patch [bz#2022847] - kvm-spec-Extend-qemu-kvm-core-description.patch [bz#2022847] - Resolves: bz#2022847 (qemu-kvm: Align package split with Fedora) --- qemu-kvm.spec | 152 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 129 insertions(+), 23 deletions(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 01e6284..be30965 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -110,9 +110,23 @@ %global requires_all_modules \ %if %{have_opengl} \ Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ +Requires: %{name}-ui-egl-headless = %{epoch}:%{version}-%{release} \ %endif \ +Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} \ +Requires: %{name}-device-display-virtio-gpu-gl = %{epoch}:%{version}-%{release} \ +%ifarch s390x \ +Requires: %{name}-device-display-virtio-gpu-ccw = %{epoch}:%{version}-%{release} \ +%else \ +Requires: %{name}-device-display-virtio-gpu-pci = %{epoch}:%{version}-%{release} \ +Requires: %{name}-device-display-virtio-gpu-pci-gl = %{epoch}:%{version}-%{release} \ +%endif \ +%ifarch x86_64 %{power64} \ +Requires: %{name}-device-display-virtio-vga = %{epoch}:%{version}-%{release} \ +Requires: %{name}-device-display-virtio-vga-gl = %{epoch}:%{version}-%{release} \ +%endif \ +Requires: %{name}-device-usb-host = %{epoch}:%{version}-%{release} \ %if %{have_usbredir} \ -Requires: %{name}-hw-usbredir = %{epoch}:%{version}-%{release} \ +Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release} @@ -130,7 +144,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 5%{?rcrel}%{?dist}%{?cc_suffix} +Release: 6%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -315,7 +329,15 @@ Requires: libfdt >= %{libfdt_version} emulation for the KVM hypervisor. %{name} acts as a virtual machine monitor together with the KVM kernel modules, and emulates the hardware for a full system such as a PC and its associated peripherals. - +This is a minimalistic installation of %{name}. Functionality provided by +this package is not ensured and it can change in a future version as some +functionality can be split out to separate package. +Before updating this package, it is recommended to check the package +changelog for information on functionality which might have been moved to +a separate package to prevent issues due to the moved functionality. +If apps opt-in to minimalist packaging by depending on %{name}-core, they +explicitly accept that features may disappear from %{name}-core in future +updates. %package common Summary: QEMU common files needed by all QEMU targets @@ -451,15 +473,76 @@ Requires: mesa-libEGL Requires: mesa-dri-drivers %description ui-opengl This package provides opengl support. + +%package ui-egl-headless +Summary: QEMU EGL headless driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: %{name}-ui-opengl%{?_isa} = %{epoch}:%{version}-%{release} +%description ui-egl-headless +This package provides the additional egl-headless UI for QEMU. %endif + +%package device-display-virtio-gpu +Summary: QEMU virtio-gpu display device +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description device-display-virtio-gpu +This package provides the virtio-gpu display device for QEMU. + +%package device-display-virtio-gpu-gl +Summary: QEMU virtio-gpu-gl display device +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description device-display-virtio-gpu-gl +This package provides the virtio-gpu-gl display device for QEMU. + +%ifarch s390x +%package device-display-virtio-gpu-ccw +Summary: QEMU virtio-gpu-ccw display device +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description device-display-virtio-gpu-ccw +This package provides the virtio-gpu-ccw display device for QEMU. +%else +%package device-display-virtio-gpu-pci +Summary: QEMU virtio-gpu-pci display device +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description device-display-virtio-gpu-pci +This package provides the virtio-gpu-pci display device for QEMU. + +%package device-display-virtio-gpu-pci-gl +Summary: QEMU virtio-gpu-pci-gl display device +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description device-display-virtio-gpu-pci-gl +This package provides the virtio-gpu-pci-gl display device for QEMU. +%endif + +%ifarch x86_64 %{power64} +%package device-display-virtio-vga +Summary: QEMU virtio-vga display device +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description device-display-virtio-vga +This package provides the virtio-vga display device for QEMU. + +%package device-display-virtio-vga-gl +Summary: QEMU virtio-vga-gl display device +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description device-display-virtio-vga-gl +This package provides the virtio-vga-gl display device for QEMU. +%endif + +%package device-usb-host +Summary: QEMU usb host device +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description device-usb-host +This package provides the USB pass through driver for QEMU. + %if %{have_usbredir} -%package hw-usbredir +%package device-usb-redirect Summary: QEMU usbredir support Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} Requires: usbredir >= 0.7.1 +Provides: %{name}-hw-usbredir -%description hw-usbredir +%description device-usb-redirect This package provides usbredir support. %endif @@ -1117,9 +1200,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/s390-ccw.img %{_datadir}/%{name}/s390-netboot.img %endif -%ifnarch aarch64 s390x - %{_libdir}/%{name}/hw-display-virtio-vga.so -%endif %{_datadir}/icons/* %{_datadir}/%{name}/linuxboot_dma.bin %if %{have_modules_load} @@ -1137,25 +1217,33 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp %{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf -%{_libdir}/%{name}/hw-display-virtio-gpu.so -%{_libdir}/%{name}/hw-display-virtio-gpu-gl.so -%ifarch x86_64 %{power64} - %{_libdir}/%{name}/hw-display-virtio-vga-gl.so -%endif -%ifarch s390x - %{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so -%else - %{_libdir}/%{name}/hw-display-virtio-gpu-pci.so - %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so -%endif - %{_libdir}/%{name}/accel-qtest-%{kvm_target}.so %ifarch x86_64 %{_libdir}/%{name}/accel-tcg-%{kvm_target}.so %endif -%{_libdir}/%{name}/hw-usb-host.so + +%files device-display-virtio-gpu +%{_libdir}/%{name}/hw-display-virtio-gpu.so +%files device-display-virtio-gpu-gl +%{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +%ifarch s390x +%files device-display-virtio-gpu-ccw + %{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so +%else +%files device-display-virtio-gpu-pci + %{_libdir}/%{name}/hw-display-virtio-gpu-pci.so +%files device-display-virtio-gpu-pci-gl + %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so +%endif +%ifarch x86_64 %{power64} +%files device-display-virtio-vga + %{_libdir}/%{name}/hw-display-virtio-vga.so +%files device-display-virtio-vga-gl + %{_libdir}/%{name}/hw-display-virtio-vga-gl.so +%endif %files tests %{testsdir} +%{_libdir}/%{name}/accel-qtest-%{kvm_target}.so %files block-curl %{_libdir}/%{name}/block-curl.so @@ -1170,12 +1258,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %if %{have_opengl} %files ui-opengl -%{_libdir}/%{name}/ui-egl-headless.so %{_libdir}/%{name}/ui-opengl.so +%files ui-egl-headless +%{_libdir}/%{name}/ui-egl-headless.so %endif +%files device-usb-host +%{_libdir}/%{name}/hw-usb-host.so + %if %{have_usbredir} -%files hw-usbredir +%files device-usb-redirect %{_libdir}/%{name}/hw-usb-redirect.so %endif @@ -1183,6 +1275,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Wed Feb 02 2022 Miroslav Rezanina - 6.2.0-6 +- Moving feature support out of qemu-kvm-core to separate packages (can + cause loss of functionality when using only qemu-kvm-core - qemu-kvm keeps + same feature set). +- kvm-spec-Rename-qemu-kvm-hw-usbredir-to-qemu-kvm-device-.patch [bz#2022847] +- kvm-spec-Split-qemu-kvm-ui-opengl.patch [bz#2022847] +- kvm-spec-Introduce-packages-for-virtio-gpu-modules.patch [bz#2022847] +- kvm-spec-Introduce-device-display-virtio-vga-packages.patch [bz#2022847] +- kvm-spec-Move-usb-host-module-to-separate-package.patch [bz#2022847] +- kvm-spec-Move-qtest-accel-module-to-tests-package.patch [bz#2022847] +- kvm-spec-Extend-qemu-kvm-core-description.patch [bz#2022847] +- Resolves: bz#2022847 + (qemu-kvm: Align package split with Fedora) + * Tue Jan 25 2022 Miroslav Rezanina - 6.2.0-5 - kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch [bz#1945666] - kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch [bz#1945666] From 9769489cb1ed070cb1527259bd54f654ec03cfd5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 7 Feb 2022 06:52:23 -0500 Subject: [PATCH 149/195] * Mon Feb 07 2022 Miroslav Rezanina - 6.2.0-7 - kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch [bz#1962088] - kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch [bz#1962088] - kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch [bz#2046201] - kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch [bz#2034791] - kvm-block-rbd-workaround-for-ceph-issue-53784.patch [bz#2034791] - Resolves: bz#1962088 ([QSD] wrong help message for the fuse) - Resolves: bz#2046201 (CVE-2022-0358 qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-9.0]) - Resolves: bz#2034791 (Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD) --- ...ndling-of-holes-in-.bdrv_co_block_st.patch | 59 ++++++++++ ...-rbd-workaround-for-ceph-issue-53784.patch | 103 ++++++++++++++++ ...orage-daemon-Add-vhost-user-blk-help.patch | 72 ++++++++++++ ...emon-Fix-typo-in-vhost-user-blk-help.patch | 41 +++++++ ...embership-of-all-supplementary-group.patch | 110 ++++++++++++++++++ qemu-kvm.spec | 25 +++- 6 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch create mode 100644 kvm-block-rbd-workaround-for-ceph-issue-53784.patch create mode 100644 kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch create mode 100644 kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch create mode 100644 kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch diff --git a/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch b/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch new file mode 100644 index 0000000..39aa96c --- /dev/null +++ b/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch @@ -0,0 +1,59 @@ +From d374d5aa4485a0c62d6b48eec64491cae2fd0873 Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:25 +0100 +Subject: [PATCH 4/5] block/rbd: fix handling of holes in .bdrv_co_block_status + +RH-Author: Stefano Garzarella +RH-MergeRequest: 68: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [1/2] 8ef178b01885e3c292f7844ccff865b1a8d4faf0 (sgarzarella/qemu-kvm-c-9-s) +RH-Bugzilla: 2034791 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +the assumption that we can't hit a hole if we do not diff against a snapshot was wrong. + +We can see a hole in an image if we diff against base if there exists an older snapshot +of the image and we have discarded blocks in the image where the snapshot has data. + +Fix this by simply handling a hole like an unallocated area. There are no callbacks +for unallocated areas so just bail out if we hit a hole. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Suggested-by: Ilya Dryomov +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-2-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit 9e302f64bb407a9bb097b626da97228c2654cfee) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index def96292e0..20bb896c4a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, + RBDDiffIterateReq *req = opaque; + + assert(req->offs + req->bytes <= offs); +- /* +- * we do not diff against a snapshot so we should never receive a callback +- * for a hole. +- */ +- assert(exists); ++ ++ /* treat a hole like an unallocated area and bail out */ ++ if (!exists) { ++ return 0; ++ } + + if (!req->exists && offs > req->offs) { + /* +-- +2.27.0 + diff --git a/kvm-block-rbd-workaround-for-ceph-issue-53784.patch b/kvm-block-rbd-workaround-for-ceph-issue-53784.patch new file mode 100644 index 0000000..dd3876e --- /dev/null +++ b/kvm-block-rbd-workaround-for-ceph-issue-53784.patch @@ -0,0 +1,103 @@ +From f035b5250529eed8d12e0b93b1b6d6f2c50003f6 Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:26 +0100 +Subject: [PATCH 5/5] block/rbd: workaround for ceph issue #53784 + +RH-Author: Stefano Garzarella +RH-MergeRequest: 68: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [2/2] 5feaa2e20a77886cc1a84cdf212ade3dcda28289 (sgarzarella/qemu-kvm-c-9-s) +RH-Bugzilla: 2034791 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +librbd had a bug until early 2022 that affected all versions of ceph that +supported fast-diff. This bug results in reporting of incorrect offsets +if the offset parameter to rbd_diff_iterate2 is not object aligned. + +This patch works around this bug for pre Quincy versions of librbd. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-3-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Tested-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit fc176116cdea816ceb8dd969080b2b95f58edbc0) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 42 ++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 40 insertions(+), 2 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index 20bb896c4a..8f183eba2a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + int status, r; + RBDDiffIterateReq req = { .offs = offset }; + uint64_t features, flags; ++ uint64_t head = 0; + + assert(offset + bytes <= s->image_size); + +@@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + return status; + } + +- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, ++#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0) ++ /* ++ * librbd had a bug until early 2022 that affected all versions of ceph that ++ * supported fast-diff. This bug results in reporting of incorrect offsets ++ * if the offset parameter to rbd_diff_iterate2 is not object aligned. ++ * Work around this bug by rounding down the offset to object boundaries. ++ * This is OK because we call rbd_diff_iterate2 with whole_object = true. ++ * However, this workaround only works for non cloned images with default ++ * striping. ++ * ++ * See: https://tracker.ceph.com/issues/53784 ++ */ ++ ++ /* check if RBD image has non-default striping enabled */ ++ if (features & RBD_FEATURE_STRIPINGV2) { ++ return status; ++ } ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ /* ++ * check if RBD image is a clone (= has a parent). ++ * ++ * rbd_get_parent_info is deprecated from Nautilus onwards, but the ++ * replacement rbd_get_parent is not present in Luminous and Mimic. ++ */ ++ if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) { ++ return status; ++ } ++#pragma GCC diagnostic pop ++ ++ head = req.offs & (s->object_size - 1); ++ req.offs -= head; ++ bytes += head; ++#endif ++ ++ r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true, + qemu_rbd_diff_iterate_cb, &req); + if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { + return status; +@@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; + } + +- *pnum = req.bytes; ++ assert(req.bytes > head); ++ *pnum = req.bytes - head; + return status; + } + +-- +2.27.0 + diff --git a/kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch b/kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch new file mode 100644 index 0000000..bc36f5c --- /dev/null +++ b/kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch @@ -0,0 +1,72 @@ +From 0f4592f79f8c24f84db18a8c39c6056b2a0be524 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 7 Jan 2022 11:54:19 +0100 +Subject: [PATCH 1/5] qemu-storage-daemon: Add vhost-user-blk help +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 63: qemu-storage-daemon: Add vhost-user-blk help +RH-Commit: [1/2] 6b08fec5d6ceea9f8f3810321099310069e08b53 (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1962088 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Hanna Reitz + +Add missing vhost-user-blk help: + + $ qemu-storage-daemon -h + ... + --export [type=]vhost-user-blk,id=,node-name=, + addr.type=unix,addr.path=[,writable=on|off] + [,logical-block-size=][,num-queues=] + export the specified block node as a + vhosts-user-blk device over UNIX domain socket + --export [type=]vhost-user-blk,id=,node-name=, + fd,addr.str=[,writable=on|off] + [,logical-block-size=][,num-queues=] + export the specified block node as a + vhosts-user-blk device over file descriptor + ... + +Fixes: 90fc91d50b7 ("convert vhost-user-blk server to block export API") +Reported-by: Qing Wang +Reviewed-by: Eric Blake +Signed-off-by: Philippe Mathieu-Daudé +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20220107105420.395011-3-f4bug@amsat.org> +Signed-off-by: Kevin Wolf +(cherry picked from commit c8cbc9524269d9583749aaaea8aa244add7e1900) +Signed-off-by: Kevin Wolf +--- + storage-daemon/qemu-storage-daemon.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c +index 52cf17e8ac..9d76d1114d 100644 +--- a/storage-daemon/qemu-storage-daemon.c ++++ b/storage-daemon/qemu-storage-daemon.c +@@ -104,6 +104,19 @@ static void help(void) + " export the specified block node over FUSE\n" + "\n" + #endif /* CONFIG_FUSE */ ++#ifdef CONFIG_VHOST_USER_BLK_SERVER ++" --export [type=]vhost-user-blk,id=,node-name=,\n" ++" addr.type=unix,addr.path=[,writable=on|off]\n" ++" [,logical-block-size=][,num-queues=]\n" ++" export the specified block node as a\n" ++" vhost-user-blk device over UNIX domain socket\n" ++" --export [type=]vhost-user-blk,id=,node-name=,\n" ++" fd,addr.str=[,writable=on|off]\n" ++" [,logical-block-size=][,num-queues=]\n" ++" export the specified block node as a\n" ++" vhost-user-blk device over file descriptor\n" ++"\n" ++#endif /* CONFIG_VHOST_USER_BLK_SERVER */ + " --monitor [chardev=]name[,mode=control][,pretty[=on|off]]\n" + " configure a QMP monitor\n" + "\n" +-- +2.27.0 + diff --git a/kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch b/kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch new file mode 100644 index 0000000..798a27e --- /dev/null +++ b/kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch @@ -0,0 +1,41 @@ +From 20edf203c8cb314e27409918399aa7cbdc6fdb02 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 25 Jan 2022 16:15:14 +0100 +Subject: [PATCH 2/5] qemu-storage-daemon: Fix typo in vhost-user-blk help + +RH-Author: Kevin Wolf +RH-MergeRequest: 63: qemu-storage-daemon: Add vhost-user-blk help +RH-Commit: [2/2] b7afb670c398799b6e49b926e296771453a55fba (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1962088 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Hanna Reitz + +The syntax of the fd passing case misses the "addr.type=" key. Add it. + +Signed-off-by: Kevin Wolf +Message-Id: <20220125151514.49035-1-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit e66e665f15736f5ee1fbd8087926cb0f1e52f61a) +Signed-off-by: Kevin Wolf +--- + storage-daemon/qemu-storage-daemon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c +index 9d76d1114d..ec9aa79b55 100644 +--- a/storage-daemon/qemu-storage-daemon.c ++++ b/storage-daemon/qemu-storage-daemon.c +@@ -111,7 +111,7 @@ static void help(void) + " export the specified block node as a\n" + " vhost-user-blk device over UNIX domain socket\n" + " --export [type=]vhost-user-blk,id=,node-name=,\n" +-" fd,addr.str=[,writable=on|off]\n" ++" addr.type=fd,addr.str=[,writable=on|off]\n" + " [,logical-block-size=][,num-queues=]\n" + " export the specified block node as a\n" + " vhost-user-blk device over file descriptor\n" +-- +2.27.0 + diff --git a/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch b/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch new file mode 100644 index 0000000..539b8fe --- /dev/null +++ b/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch @@ -0,0 +1,110 @@ +From 846192d22a1ddfa87682bb0b67febef5c30c9743 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Tue, 25 Jan 2022 13:51:14 -0500 +Subject: [PATCH 3/5] virtiofsd: Drop membership of all supplementary groups + (CVE-2022-0358) + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 66: c9s: virtiofsd security fix - drop secondary groups +RH-Commit: [1/1] cdf3b0405ea3369933e76761890f16b040641036 (redhat/centos-stream/src/qemu-kvm) +RH-Bugzilla: 2046201 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Sergio Lopez +RH-Acked-by: Vivek Goyal + +At the start, drop membership of all supplementary groups. This is +not required. + +If we have membership of "root" supplementary group and when we switch +uid/gid using setresuid/setsgid, we still retain membership of existing +supplemntary groups. And that can allow some operations which are not +normally allowed. + +For example, if root in guest creates a dir as follows. + +$ mkdir -m 03777 test_dir + +This sets SGID on dir as well as allows unprivileged users to write into +this dir. + +And now as unprivileged user open file as follows. + +$ su test +$ fd = open("test_dir/priviledge_id", O_RDWR|O_CREAT|O_EXCL, 02755); + +This will create SGID set executable in test_dir/. + +And that's a problem because now an unpriviliged user can execute it, +get egid=0 and get access to resources owned by "root" group. This is +privilege escalation. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2044863 +Fixes: CVE-2022-0358 +Reported-by: JIETAO XIAO +Suggested-by: Miklos Szeredi +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Message-Id: +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed missing {}'s style nit +(cherry picked from commit 449e8171f96a6a944d1f3b7d3627ae059eae21ca) +--- + tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 64b5b4fbb1..b3d0674f6d 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -54,6 +54,7 @@ + #include + #include + #include ++#include + + #include "qemu/cutils.h" + #include "passthrough_helpers.h" +@@ -1161,6 +1162,30 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + #define OURSYS_setresuid SYS_setresuid + #endif + ++static void drop_supplementary_groups(void) ++{ ++ int ret; ++ ++ ret = getgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++ ++ if (!ret) { ++ return; ++ } ++ ++ /* Drop all supplementary groups. We should not need it */ ++ ret = setgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++} ++ + /* + * Change to uid/gid of caller so that file is created with + * ownership of caller. +@@ -3926,6 +3951,8 @@ int main(int argc, char *argv[]) + + qemu_init_exec_dir(argv[0]); + ++ drop_supplementary_groups(); ++ + pthread_mutex_init(&lo.mutex, NULL); + lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); + lo.root.fd = -1; +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index be30965..d6b34c4 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -144,7 +144,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 6%{?rcrel}%{?dist}%{?cc_suffix} +Release: 7%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -217,6 +217,16 @@ Patch36: kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch Patch37: kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch # For bz#2036669 - DEVICE_DELETED event is not delivered for device frontend if -device is configured via JSON Patch38: kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch +# For bz#1962088 - [QSD] wrong help message for the fuse +Patch39: kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch +# For bz#1962088 - [QSD] wrong help message for the fuse +Patch40: kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch +# For bz#2046201 - CVE-2022-0358 qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-9.0] +Patch41: kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch +# For bz#2034791 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch42: kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch +# For bz#2034791 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch43: kvm-block-rbd-workaround-for-ceph-issue-53784.patch # Source-git patches @@ -1275,6 +1285,19 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Feb 07 2022 Miroslav Rezanina - 6.2.0-7 +- kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch [bz#1962088] +- kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch [bz#1962088] +- kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch [bz#2046201] +- kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch [bz#2034791] +- kvm-block-rbd-workaround-for-ceph-issue-53784.patch [bz#2034791] +- Resolves: bz#1962088 + ([QSD] wrong help message for the fuse) +- Resolves: bz#2046201 + (CVE-2022-0358 qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-9.0]) +- Resolves: bz#2034791 + (Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD) + * Wed Feb 02 2022 Miroslav Rezanina - 6.2.0-6 - Moving feature support out of qemu-kvm-core to separate packages (can cause loss of functionality when using only qemu-kvm-core - qemu-kvm keeps From 0daf0004a7cacc668016c8079db50f75911c44ff Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 14 Feb 2022 06:48:51 -0500 Subject: [PATCH 150/195] * Mon Feb 14 2022 Miroslav Rezanina - 6.2.0-8 - kvm-numa-Enable-numa-for-SGX-EPC-sections.patch [bz#2033708] - kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch [bz#2033708] - kvm-doc-Add-the-SGX-numa-description.patch [bz#2033708] - kvm-Enable-SGX-RH-Only.patch [bz#2033708] - kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch [bz#2033708] - kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch [bz#2041461] - kvm-iotests-block-status-cache-New-test.patch [bz#2041461] - kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch [bz#1882917] - kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch [bz#1882917] - kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch [bz#2040123] - kvm-iotests-stream-error-on-reset-New-test.patch [bz#2040123] - kvm-hw-arm-smmuv3-Fix-device-reset.patch [bz#2042481] - Resolves: bz#2033708 ([Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support) - Resolves: bz#2041461 (Inconsistent block status reply in qemu-nbd) - Resolves: bz#1882917 (the target image size is incorrect when converting a badly fragmented file) - Resolves: bz#2040123 (Qemu core dumped when do block-stream to a snapshot node on non-enough space storage) - Resolves: bz#2042481 ([aarch64] Launch guest with "default-bus-bypass-iommu=off,iommu=smmuv3" and "iommu_platform=on", guest hangs after system_reset) --- kvm-Enable-SGX-RH-Only.patch | 28 ++ ...event-dangling-BDS-pointers-across-a.patch | 129 ++++++++ ...Update-BSC-only-if-want_zero-is-true.patch | 56 ++++ kvm-doc-Add-the-SGX-numa-description.patch | 77 +++++ kvm-hw-arm-smmuv3-Fix-device-reset.patch | 61 ++++ ...u-img-convert-of-zeroed-data-cluster.patch | 81 +++++ kvm-iotests-block-status-cache-New-test.patch | 197 ++++++++++++ ...tests-stream-error-on-reset-New-test.patch | 196 ++++++++++++ ...uma-Enable-numa-for-SGX-EPC-sections.patch | 287 ++++++++++++++++++ ...-numa-in-the-monitor-and-Libvirt-int.patch | 210 +++++++++++++ ...-related-comments-and-restore-sectio.patch | 213 +++++++++++++ ...-is_allocated_sectors-more-efficient.patch | 108 +++++++ qemu-kvm.spec | 50 ++- 13 files changed, 1692 insertions(+), 1 deletion(-) create mode 100644 kvm-Enable-SGX-RH-Only.patch create mode 100644 kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch create mode 100644 kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch create mode 100644 kvm-doc-Add-the-SGX-numa-description.patch create mode 100644 kvm-hw-arm-smmuv3-Fix-device-reset.patch create mode 100644 kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch create mode 100644 kvm-iotests-block-status-cache-New-test.patch create mode 100644 kvm-iotests-stream-error-on-reset-New-test.patch create mode 100644 kvm-numa-Enable-numa-for-SGX-EPC-sections.patch create mode 100644 kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch create mode 100644 kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch create mode 100644 kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch diff --git a/kvm-Enable-SGX-RH-Only.patch b/kvm-Enable-SGX-RH-Only.patch new file mode 100644 index 0000000..63f335b --- /dev/null +++ b/kvm-Enable-SGX-RH-Only.patch @@ -0,0 +1,28 @@ +From f4f7c62a4658a570d3ad694b64463665fa4b80a7 Mon Sep 17 00:00:00 2001 +From: Paul Lai +Date: Fri, 21 Jan 2022 13:14:42 -0500 +Subject: [PATCH 04/12] Enable SGX -- RH Only + +RH-Author: Paul Lai +RH-MergeRequest: 65: Enable SGX and add SGX Numa support +RH-Commit: [4/5] 2cd4ee4a429f5e7b1c32e83a10bf488503603795 +RH-Bugzilla: 2033708 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index dc03fbb671..327b1bee62 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -101,3 +101,4 @@ CONFIG_TPM=y + CONFIG_TPM_CRB=y + CONFIG_TPM_TIS_ISA=y + CONFIG_TPM_EMULATOR=y ++CONFIG_SGX=y +-- +2.27.0 + diff --git a/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch b/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch new file mode 100644 index 0000000..8dbf30f --- /dev/null +++ b/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch @@ -0,0 +1,129 @@ +From 87f3b10dc600ac12272ee6cdc67571910ea722f6 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 11 Jan 2022 15:36:12 +0000 +Subject: [PATCH 10/12] block-backend: prevent dangling BDS pointers across + aio_poll() + +RH-Author: Hanna Reitz +RH-MergeRequest: 71: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [1/2] 1b4cab39bf8c933ab910293a29bfceaa9e821068 (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2040123 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Emanuele Giuseppe Esposito + +The BlockBackend root child can change when aio_poll() is invoked. This +happens when a temporary filter node is removed upon blockjob +completion, for example. + +Functions in block/block-backend.c must be aware of this when using a +blk_bs() pointer across aio_poll() because the BlockDriverState refcnt +may reach 0, resulting in a stale pointer. + +One example is scsi_device_purge_requests(), which calls blk_drain() to +wait for in-flight requests to cancel. If the backup blockjob is active, +then the BlockBackend root child is a temporary filter BDS owned by the +blockjob. The blockjob can complete during bdrv_drained_begin() and the +last reference to the BDS is released when the temporary filter node is +removed. This results in a use-after-free when blk_drain() calls +bdrv_drained_end(bs) on the dangling pointer. + +Explicitly hold a reference to bs across block APIs that invoke +aio_poll(). + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2021778 +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1e3552dbd28359d35967b7c28dc86cde1bc29205) +Signed-off-by: Hanna Reitz +--- + block/block-backend.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 12ef80ea17..23e727199b 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -822,16 +822,22 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) + void blk_remove_bs(BlockBackend *blk) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; +- BlockDriverState *bs; + BdrvChild *root; + + notifier_list_notify(&blk->remove_bs_notifiers, blk); + if (tgm->throttle_state) { +- bs = blk_bs(blk); ++ BlockDriverState *bs = blk_bs(blk); ++ ++ /* ++ * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for ++ * example, if a temporary filter node is removed by a blockjob. ++ */ ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + + blk_update_root_state(blk); +@@ -1705,6 +1711,7 @@ void blk_drain(BlockBackend *blk) + BlockDriverState *bs = blk_bs(blk); + + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + +@@ -1714,6 +1721,7 @@ void blk_drain(BlockBackend *blk) + + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +@@ -2044,10 +2052,13 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + int ret; + + if (bs) { ++ bdrv_ref(bs); ++ + if (update_root_node) { + ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, + errp); + if (ret < 0) { ++ bdrv_unref(bs); + return ret; + } + } +@@ -2057,6 +2068,8 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + throttle_group_attach_aio_context(tgm, new_context); + bdrv_drained_end(bs); + } ++ ++ bdrv_unref(bs); + } + + blk->ctx = new_context; +@@ -2326,11 +2339,13 @@ void blk_io_limits_disable(BlockBackend *blk) + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + assert(tgm->throttle_state); + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + throttle_group_unregister_tgm(tgm); + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +-- +2.27.0 + diff --git a/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch b/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch new file mode 100644 index 0000000..5fff268 --- /dev/null +++ b/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch @@ -0,0 +1,56 @@ +From a6b472de71f6ebbe44025e1348c90e6f1f2b2326 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 17:59:59 +0100 +Subject: [PATCH 06/12] block/io: Update BSC only if want_zero is true + +RH-Author: Hanna Reitz +RH-MergeRequest: 69: block/io: Update BSC only if want_zero is true +RH-Commit: [1/2] ad19ff86c3420cafe5a9e785ee210e482fbc8cd7 (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2041461 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +We update the block-status cache whenever we get new information from a +bdrv_co_block_status() call to the block driver. However, if we have +passed want_zero=false to that call, it may flag areas containing zeroes +as data, and so we would update the block-status cache with wrong +information. + +Therefore, we should not update the cache with want_zero=false. + +Reported-by: Nir Soffer +Fixes: 0bc329fbb00 ("block: block-status cache for data regions") +Reviewed-by: Nir Soffer +Cc: qemu-stable@nongnu.org +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-2-hreitz@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 113b727ce788335cf76f65355d670c9bc130fd75) +Signed-off-by: Hanna Reitz +--- + block/io.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/io.c b/block/io.c +index bb0a254def..4e4cb556c5 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -2497,8 +2497,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, + * non-protocol nodes, and then it is never used. However, filling + * the cache requires an RCU update, so double check here to avoid + * such an update if possible. ++ * ++ * Check want_zero, because we only want to update the cache when we ++ * have accurate information about what is zero and what is data. + */ +- if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && ++ if (want_zero && ++ ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && + QLIST_EMPTY(&bs->children)) + { + /* +-- +2.27.0 + diff --git a/kvm-doc-Add-the-SGX-numa-description.patch b/kvm-doc-Add-the-SGX-numa-description.patch new file mode 100644 index 0000000..8eac5fa --- /dev/null +++ b/kvm-doc-Add-the-SGX-numa-description.patch @@ -0,0 +1,77 @@ +From eb88a12ab1ecfe77bcc0d0067c96fce27a3bde01 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:08 -0400 +Subject: [PATCH 03/12] doc: Add the SGX numa description + +RH-Author: Paul Lai +RH-MergeRequest: 65: Enable SGX and add SGX Numa support +RH-Commit: [3/5] c27b3f6976cbe92cc3c0e1dab0191cdd25de596a +RH-Bugzilla: 2033708 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGX numa reference command and how to check if +SGX numa is support or not with multiple EPC sections. + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-5-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit d1889b36098c79e2e6ac90faf3d0dc5ec0057677) +Signed-off-by: Paul Lai +--- + docs/system/i386/sgx.rst | 31 +++++++++++++++++++++++++++---- + 1 file changed, 27 insertions(+), 4 deletions(-) + +diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst +index f8fade5ac2..0f0a73f758 100644 +--- a/docs/system/i386/sgx.rst ++++ b/docs/system/i386/sgx.rst +@@ -141,8 +141,7 @@ To launch a SGX guest: + |qemu_system_x86| \\ + -cpu host,+sgx-provisionkey \\ + -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\ +- -object memory-backend-epc,id=mem2,size=28M \\ +- -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 ++ -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0 + + Utilizing SGX in the guest requires a kernel/OS with SGX support. + The support can be determined in guest by:: +@@ -152,8 +151,32 @@ The support can be determined in guest by:: + and SGX epc info by:: + + $ dmesg | grep sgx +- [ 1.242142] sgx: EPC section 0x180000000-0x181bfffff +- [ 1.242319] sgx: EPC section 0x181c00000-0x1837fffff ++ [ 0.182807] sgx: EPC section 0x140000000-0x143ffffff ++ [ 0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. ++ ++To launch a SGX numa guest: ++ ++.. parsed-literal:: ++ ++ |qemu_system_x86| \\ ++ -cpu host,+sgx-provisionkey \\ ++ -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\ ++ -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\ ++ -numa node,nodeid=0,cpus=0-1,memdev=node0 \\ ++ -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\ ++ -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\ ++ -numa node,nodeid=1,cpus=2-3,memdev=node1 \\ ++ -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 ++ ++and SGX epc numa info by:: ++ ++ $ dmesg | grep sgx ++ [ 0.369937] sgx: EPC section 0x180000000-0x183ffffff ++ [ 0.370259] sgx: EPC section 0x184000000-0x185bfffff ++ ++ $ dmesg | grep SRAT ++ [ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] ++ [ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] + + References + ---------- +-- +2.27.0 + diff --git a/kvm-hw-arm-smmuv3-Fix-device-reset.patch b/kvm-hw-arm-smmuv3-Fix-device-reset.patch new file mode 100644 index 0000000..3b8f307 --- /dev/null +++ b/kvm-hw-arm-smmuv3-Fix-device-reset.patch @@ -0,0 +1,61 @@ +From c08c3fbb2bb8494738fd34ec8fc9dc434ce82f4b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 2 Feb 2022 12:16:02 +0100 +Subject: [PATCH 12/12] hw/arm/smmuv3: Fix device reset + +RH-Author: Eric Auger +RH-MergeRequest: 72: hw/arm/smmuv3: Fix device reset +RH-Commit: [1/1] 2cfee2f7a03692681224fed96bb4f28406bf460a (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2042481 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Andrew Jones + +branch: c9s +Brew: 42958737 +Upstream: yes + +We currently miss a bunch of register resets in the device reset +function. This sometimes prevents the guest from rebooting after +a system_reset (with virtio-blk-pci). For instance, we may get +the following errors: + +invalid STE +smmuv3-iommu-memory-region-0-0 translation failed for iova=0x13a9d2000(SMMU_EVT_C_BAD_STE) +Invalid read at addr 0x13A9D2000, size 2, region '(null)', reason: rejected +invalid STE +smmuv3-iommu-memory-region-0-0 translation failed for iova=0x13a9d2000(SMMU_EVT_C_BAD_STE) +Invalid write at addr 0x13A9D2000, size 2, region '(null)', reason: rejected +invalid STE + +Signed-off-by: Eric Auger +Message-id: 20220202111602.627429-1-eric.auger@redhat.com +Fixes: 10a83cb988 ("hw/arm/smmuv3: Skeleton") +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +(cherry picked from commit 43530095e18fd16dcd51a4b385ad2a22c36f5698) +Signed-off-by: Eric Auger +--- + hw/arm/smmuv3.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 01b60bee49..1b5640bb98 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -276,6 +276,12 @@ static void smmuv3_init_regs(SMMUv3State *s) + s->features = 0; + s->sid_split = 0; + s->aidr = 0x1; ++ s->cr[0] = 0; ++ s->cr0ack = 0; ++ s->irq_ctrl = 0; ++ s->gerror = 0; ++ s->gerrorn = 0; ++ s->statusr = 0; + } + + static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, +-- +2.27.0 + diff --git a/kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch b/kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch new file mode 100644 index 0000000..0ab3bcc --- /dev/null +++ b/kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch @@ -0,0 +1,81 @@ +From 51f691acd8042351d005873996d7bf4c7b045508 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 17 Dec 2021 17:46:53 +0100 +Subject: [PATCH 08/12] iotests: Test qemu-img convert of zeroed data cluster + +RH-Author: Kevin Wolf +RH-MergeRequest: 70: qemu-img convert: Fix sparseness of output image +RH-Commit: [1/2] 0770582c553ac6b0f18c035f9a0238599d4763cc (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1882917 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +This demonstrates what happens when the block status changes in +sub-min_sparse granularity, but all of the parts are zeroed out. The +alignment logic in is_allocated_sectors() prevents that the target image +remains fully sparse as expected, but turns it into a data cluster of +explicit zeros. + +Signed-off-by: Kevin Wolf +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20211217164654.1184218-2-vsementsov@virtuozzo.com> +Tested-by: Peter Lieven +Signed-off-by: Kevin Wolf +(cherry picked from commit 51cd8bddd63540514d44808f7920811439baa253) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/122 | 1 + + tests/qemu-iotests/122.out | 10 ++++++++-- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/tests/qemu-iotests/122 b/tests/qemu-iotests/122 +index efb260d822..be0f6b79e5 100755 +--- a/tests/qemu-iotests/122 ++++ b/tests/qemu-iotests/122 +@@ -251,6 +251,7 @@ $QEMU_IO -c "write -P 0 0 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_test + $QEMU_IO -c "write 0 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + $QEMU_IO -c "write 8k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + $QEMU_IO -c "write 17k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir ++$QEMU_IO -c "write -P 0 65k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir + + for min_sparse in 4k 8k; do + echo +diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out +index 8fbdac2b39..69b8e8b803 100644 +--- a/tests/qemu-iotests/122.out ++++ b/tests/qemu-iotests/122.out +@@ -192,6 +192,8 @@ wrote 1024/1024 bytes at offset 8192 + 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 1024/1024 bytes at offset 17408 + 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 1024/1024 bytes at offset 66560 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + + convert -S 4k + [{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, +@@ -199,7 +201,9 @@ convert -S 4k + { "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, + { "start": 12288, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false}, + { "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, +-{ "start": 20480, "length": 67088384, "depth": 0, "present": false, "zero": true, "data": false}] ++{ "start": 20480, "length": 46080, "depth": 0, "present": false, "zero": true, "data": false}, ++{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}] + + convert -c -S 4k + [{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true}, +@@ -211,7 +215,9 @@ convert -c -S 4k + + convert -S 8k + [{ "start": 0, "length": 24576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, +-{ "start": 24576, "length": 67084288, "depth": 0, "present": false, "zero": true, "data": false}] ++{ "start": 24576, "length": 41984, "depth": 0, "present": false, "zero": true, "data": false}, ++{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}] + + convert -c -S 8k + [{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true}, +-- +2.27.0 + diff --git a/kvm-iotests-block-status-cache-New-test.patch b/kvm-iotests-block-status-cache-New-test.patch new file mode 100644 index 0000000..cd9a198 --- /dev/null +++ b/kvm-iotests-block-status-cache-New-test.patch @@ -0,0 +1,197 @@ +From 89fe89491f89a7526ba864a9d94d3de930261d69 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 18:00:00 +0100 +Subject: [PATCH 07/12] iotests/block-status-cache: New test + +RH-Author: Hanna Reitz +RH-MergeRequest: 69: block/io: Update BSC only if want_zero is true +RH-Commit: [2/2] 3c5a55aca1ac7a71c175a124d63bcf7a4430a022 (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2041461 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Add a new test to verify that want_zero=false block-status calls do not +pollute the block-status cache for want_zero=true calls. + +We check want_zero=true calls and their results using `qemu-img map` +(over NBD), and want_zero=false calls also using `qemu-img map` over +NBD, but using the qemu:allocation-depth context. + +(This test case cannot be integrated into nbd-qemu-allocation, because +that is a qcow2 test, and this is a raw test.) + +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-3-hreitz@redhat.com> +Reviewed-by: Nir Soffer +Reviewed-by: Eric Blake +Tested-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 6384dd534d742123d26c008d9794b20bc41359d5) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/tests/block-status-cache | 139 ++++++++++++++++++ + .../qemu-iotests/tests/block-status-cache.out | 5 + + 2 files changed, 144 insertions(+) + create mode 100755 tests/qemu-iotests/tests/block-status-cache + create mode 100644 tests/qemu-iotests/tests/block-status-cache.out + +diff --git a/tests/qemu-iotests/tests/block-status-cache b/tests/qemu-iotests/tests/block-status-cache +new file mode 100755 +index 0000000000..6fa10bb8f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache +@@ -0,0 +1,139 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test cases for the block-status cache. ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import signal ++import iotests ++from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd ++ ++ ++image_size = 1 * 1024 * 1024 ++test_img = os.path.join(iotests.test_dir, 'test.img') ++ ++nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid') ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ ++ ++class TestBscWithNbd(iotests.QMPTestCase): ++ def setUp(self) -> None: ++ """Just create an empty image with a read-only NBD server on it""" ++ assert qemu_img_create('-f', iotests.imgfmt, test_img, ++ str(image_size)) == 0 ++ ++ # Pass --allocation-depth to enable the qemu:allocation-depth context, ++ # which we are going to query to provoke a block-status inquiry with ++ # want_zero=false. ++ assert qemu_nbd(f'--socket={nbd_sock}', ++ f'--format={iotests.imgfmt}', ++ '--persistent', ++ '--allocation-depth', ++ '--read-only', ++ f'--pid-file={nbd_pidfile}', ++ test_img) \ ++ == 0 ++ ++ def tearDown(self) -> None: ++ with open(nbd_pidfile, encoding='utf-8') as f: ++ pid = int(f.read()) ++ os.kill(pid, signal.SIGTERM) ++ os.remove(nbd_pidfile) ++ os.remove(test_img) ++ ++ def test_with_zero_bug(self) -> None: ++ """ ++ Verify that the block-status cache is not corrupted by a ++ want_zero=false call. ++ We can provoke a want_zero=false call with `qemu-img map` over NBD with ++ x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map` ++ (which results in want_zero=true), then using said ++ qemu:allocation-depth context, and finally another normal `map` to ++ verify that the cache has not been corrupted. ++ """ ++ ++ nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}' ++ nbd_img_opts_alloc_depth = nbd_img_opts + \ ++ ',x-dirty-bitmap=qemu:allocation-depth' ++ ++ # Normal map, results in want_zero=true. ++ # This will probably detect an allocated data sector first (qemu likes ++ # to allocate the first sector to facilitate alignment probing), and ++ # then the rest to be zero. The BSC will thus contain (if anything) ++ # one range covering the first sector. ++ map_pre = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ # qemu:allocation-depth maps for want_zero=false. ++ # want_zero=false should (with the file driver, which the server is ++ # using) report everything as data. While this is sufficient for ++ # want_zero=false, this is nothing that should end up in the ++ # block-status cache. ++ # Due to a bug, this information did end up in the cache, though, and ++ # this would lead to wrong information being returned on subsequent ++ # want_zero=true calls. ++ # ++ # We need to run this map twice: On the first call, we probably still ++ # have the first sector in the cache, and so this will be served from ++ # the cache; and only the subsequent range will be queried from the ++ # block driver. This subsequent range will then be entered into the ++ # cache. ++ # If we did a want_zero=true call at this point, we would thus get ++ # correct information: The first sector is not covered by the cache, so ++ # we would get fresh block-status information from the driver, which ++ # would return a data range, and this would then go into the cache, ++ # evicting the wrong range from the want_zero=false call before. ++ # ++ # Therefore, we need a second want_zero=false map to reproduce: ++ # Since the first sector is not in the cache, the query for its status ++ # will go to the driver, which will return a result that reports the ++ # whole image to be a single data area. This result will then go into ++ # the cache, and so the cache will then report the whole image to ++ # contain data. ++ # ++ # Note that once the cache reports the whole image to contain data, any ++ # subsequent map operation will be served from the cache, and so we can ++ # never loop too many times here. ++ for _ in range(2): ++ # (Ignore the result, this is just to contaminate the cache) ++ qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts_alloc_depth) ++ ++ # Now let's see whether the cache reports everything as data, or ++ # whether we get correct information (i.e. the same as we got on our ++ # first attempt). ++ map_post = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ if map_pre != map_post: ++ print('ERROR: Map information differs before and after querying ' + ++ 'qemu:allocation-depth') ++ print('Before:') ++ print(map_pre) ++ print('After:') ++ print(map_post) ++ ++ self.fail("Map information differs") ++ ++ ++if __name__ == '__main__': ++ # The block-status cache only works on the protocol layer, so to test it, ++ # we can only use the raw format ++ iotests.main(supported_fmts=['raw'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/block-status-cache.out b/tests/qemu-iotests/tests/block-status-cache.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/kvm-iotests-stream-error-on-reset-New-test.patch b/kvm-iotests-stream-error-on-reset-New-test.patch new file mode 100644 index 0000000..cf69e38 --- /dev/null +++ b/kvm-iotests-stream-error-on-reset-New-test.patch @@ -0,0 +1,196 @@ +From 300f912d4a5afe4ecca9c68a71429fbc9966ec34 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 11 Jan 2022 15:36:13 +0000 +Subject: [PATCH 11/12] iotests/stream-error-on-reset: New test + +RH-Author: Hanna Reitz +RH-MergeRequest: 71: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [2/2] 3167f31b91eb433f338564201f4ef336e39f7f7d (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2040123 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Emanuele Giuseppe Esposito + +Test the following scenario: +- Simple stream block in two-layer backing chain (base and top) +- The job is drained via blk_drain(), then an error occurs while the job + settles the ongoing request +- And so the job completes while in blk_drain() + +This was reported as a segfault, but is fixed by "block-backend: prevent +dangling BDS pointers across aio_poll()". + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Hanna Reitz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 2ca1d5d6b91f8a52a5c651f660b2f58c94bf97ba) +Signed-off-by: Hanna Reitz +--- + .../qemu-iotests/tests/stream-error-on-reset | 140 ++++++++++++++++++ + .../tests/stream-error-on-reset.out | 5 + + 2 files changed, 145 insertions(+) + create mode 100755 tests/qemu-iotests/tests/stream-error-on-reset + create mode 100644 tests/qemu-iotests/tests/stream-error-on-reset.out + +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset b/tests/qemu-iotests/tests/stream-error-on-reset +new file mode 100755 +index 0000000000..7eaedb24d7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset +@@ -0,0 +1,140 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test what happens when a stream job completes in a blk_drain(). ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import iotests ++from iotests import imgfmt, qemu_img_create, qemu_io_silent, QMPTestCase ++ ++ ++image_size = 1 * 1024 * 1024 ++data_size = 64 * 1024 ++base = os.path.join(iotests.test_dir, 'base.img') ++top = os.path.join(iotests.test_dir, 'top.img') ++ ++ ++# We want to test completing a stream job in a blk_drain(). ++# ++# The blk_drain() we are going to use is a virtio-scsi device resetting, ++# which we can trigger by resetting the system. ++# ++# In order to have the block job complete on drain, we (1) throttle its ++# base image so we can start the drain after it has begun, but before it ++# completes, and (2) make it encounter an I/O error on the ensuing write. ++# (If it completes regularly, the completion happens after the drain for ++# some reason.) ++ ++class TestStreamErrorOnReset(QMPTestCase): ++ def setUp(self) -> None: ++ """ ++ Create two images: ++ - base image {base} with {data_size} bytes allocated ++ - top image {top} without any data allocated ++ ++ And the following VM configuration: ++ - base image throttled to {data_size} ++ - top image with a blkdebug configuration so the first write access ++ to it will result in an error ++ - top image is attached to a virtio-scsi device ++ """ ++ assert qemu_img_create('-f', imgfmt, base, str(image_size)) == 0 ++ assert qemu_io_silent('-c', f'write 0 {data_size}', base) == 0 ++ assert qemu_img_create('-f', imgfmt, top, str(image_size)) == 0 ++ ++ self.vm = iotests.VM() ++ self.vm.add_args('-accel', 'tcg') # Make throttling work properly ++ self.vm.add_object(self.vm.qmp_to_opts({ ++ 'qom-type': 'throttle-group', ++ 'id': 'thrgr', ++ 'x-bps-total': str(data_size) ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'base', ++ 'file': { ++ 'driver': 'throttle', ++ 'throttle-group': 'thrgr', ++ 'file': { ++ 'driver': 'file', ++ 'filename': base ++ } ++ } ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'top', ++ 'file': { ++ 'driver': 'blkdebug', ++ 'node-name': 'top-blkdebug', ++ 'inject-error': [{ ++ 'event': 'pwritev', ++ 'immediately': 'true', ++ 'once': 'true' ++ }], ++ 'image': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }, ++ 'backing': 'base' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'virtio-scsi', ++ 'id': 'vscsi' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'scsi-hd', ++ 'bus': 'vscsi.0', ++ 'drive': 'top' ++ })) ++ self.vm.launch() ++ ++ def tearDown(self) -> None: ++ self.vm.shutdown() ++ os.remove(top) ++ os.remove(base) ++ ++ def test_stream_error_on_reset(self) -> None: ++ # Launch a stream job, which will take at least a second to ++ # complete, because the base image is throttled (so we can ++ # get in between it having started and it having completed) ++ res = self.vm.qmp('block-stream', job_id='stream', device='top') ++ self.assert_qmp(res, 'return', {}) ++ ++ while True: ++ ev = self.vm.event_wait('JOB_STATUS_CHANGE') ++ if ev['data']['status'] == 'running': ++ # Once the stream job is running, reset the system, which ++ # forces the virtio-scsi device to be reset, thus draining ++ # the stream job, and making it complete. Completing ++ # inside of that drain should not result in a segfault. ++ res = self.vm.qmp('system_reset') ++ self.assert_qmp(res, 'return', {}) ++ elif ev['data']['status'] == 'null': ++ # The test is done once the job is gone ++ break ++ ++ ++if __name__ == '__main__': ++ # Passes with any format with backing file support, but qed and ++ # qcow1 do not seem to exercise the used-to-be problematic code ++ # path, so there is no point in having them in this list ++ iotests.main(supported_fmts=['qcow2', 'vmdk'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset.out b/tests/qemu-iotests/tests/stream-error-on-reset.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch b/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch new file mode 100644 index 0000000..e26bfcf --- /dev/null +++ b/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch @@ -0,0 +1,287 @@ +From 6274a2a09a8931188889467b104bf2e2fc39cb54 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:05 -0400 +Subject: [PATCH 01/12] numa: Enable numa for SGX EPC sections + +RH-Author: Paul Lai +RH-MergeRequest: 65: Enable SGX and add SGX Numa support +RH-Commit: [1/5] ff69d138c3f5903096388ec7ccf8dc5e6c6c6ffb +RH-Bugzilla: 2033708 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The basic SGX did not enable numa for SGX EPC sections, which +result in all EPC sections located in numa node 0. This patch +enable SGX numa function in the guest and the EPC section can +work with RAM as one numa node. + +The Guest kernel related log: +[ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] +[ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] +The SRAT table can normally show SGX EPC sections menory info in different +numa nodes. + +The SGX EPC numa related command: + ...... + -m 4G,maxmem=20G \ + -smp sockets=2,cores=2 \ + -cpu host,+sgx-provisionkey \ + -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \ + -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \ + -numa node,nodeid=0,cpus=0-1,memdev=node0 \ + -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \ + -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \ + -numa node,nodeid=1,cpus=2-3,memdev=node1 \ + -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 \ + ...... + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-2-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1105812382e1126d86dddc16b3700f8c79dc93d1) +Signed-off-by: Paul Lai +--- + hw/core/numa.c | 5 ++--- + hw/i386/acpi-build.c | 2 ++ + hw/i386/sgx-epc.c | 3 +++ + hw/i386/sgx-stub.c | 4 ++++ + hw/i386/sgx.c | 44 +++++++++++++++++++++++++++++++++++++++ + include/hw/i386/sgx-epc.h | 3 +++ + monitor/hmp-cmds.c | 1 + + qapi/machine.json | 10 ++++++++- + qemu-options.hx | 4 ++-- + 9 files changed, 70 insertions(+), 6 deletions(-) + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index e6050b2273..1aa05dcf42 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -784,9 +784,8 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) + break; + case MEMORY_DEVICE_INFO_KIND_SGX_EPC: + se = value->u.sgx_epc.data; +- /* TODO: once we support numa, assign to right node */ +- node_mem[0].node_mem += se->size; +- node_mem[0].node_plugged_mem += se->size; ++ node_mem[se->node].node_mem += se->size; ++ node_mem[se->node].node_plugged_mem = 0; + break; + default: + g_assert_not_reached(); +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index a99c6e4fe3..8383b83ee3 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2068,6 +2068,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + nvdimm_build_srat(table_data); + } + ++ sgx_epc_build_srat(table_data); ++ + /* + * TODO: this part is not in ACPI spec and current linux kernel boots fine + * without these entries. But I recall there were issues the last time I +diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c +index e508827e78..96b2940d75 100644 +--- a/hw/i386/sgx-epc.c ++++ b/hw/i386/sgx-epc.c +@@ -21,6 +21,7 @@ + + static Property sgx_epc_properties[] = { + DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0), ++ DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0), + DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem, + TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *), + DEFINE_PROP_END_OF_LIST(), +@@ -139,6 +140,8 @@ static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md, + se->memaddr = epc->addr; + se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP, + NULL); ++ se->node = object_property_get_uint(OBJECT(epc), SGX_EPC_NUMA_NODE_PROP, ++ NULL); + se->memdev = object_get_canonical_path(OBJECT(epc->hostmem)); + + info->u.sgx_epc.data = se; +diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c +index c9b379e665..26833eb233 100644 +--- a/hw/i386/sgx-stub.c ++++ b/hw/i386/sgx-stub.c +@@ -6,6 +6,10 @@ + #include "qapi/error.h" + #include "qapi/qapi-commands-misc-target.h" + ++void sgx_epc_build_srat(GArray *table_data) ++{ ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + error_setg(errp, "SGX support is not compiled in"); +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 8fef3dd8fa..d04299904a 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -23,6 +23,7 @@ + #include "sysemu/hw_accel.h" + #include "sysemu/reset.h" + #include ++#include "hw/acpi/aml-build.h" + + #define SGX_MAX_EPC_SECTIONS 8 + #define SGX_CPUID_EPC_INVALID 0x0 +@@ -36,6 +37,46 @@ + + #define RETRY_NUM 2 + ++static int sgx_epc_device_list(Object *obj, void *opaque) ++{ ++ GSList **list = opaque; ++ ++ if (object_dynamic_cast(obj, TYPE_SGX_EPC)) { ++ *list = g_slist_append(*list, DEVICE(obj)); ++ } ++ ++ object_child_foreach(obj, sgx_epc_device_list, opaque); ++ return 0; ++} ++ ++static GSList *sgx_epc_get_device_list(void) ++{ ++ GSList *list = NULL; ++ ++ object_child_foreach(qdev_get_machine(), sgx_epc_device_list, &list); ++ return list; ++} ++ ++void sgx_epc_build_srat(GArray *table_data) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ uint64_t addr, size; ++ int node; ++ ++ node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ addr = object_property_get_uint(obj, SGX_EPC_ADDR_PROP, &error_abort); ++ size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, &error_abort); ++ ++ build_srat_memory(table_data, addr, size, node, MEM_AFFINITY_ENABLED); ++ } ++ g_slist_free(device_list); ++} ++ + static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + { + return (low & MAKE_64BIT_MASK(12, 20)) + +@@ -226,6 +267,9 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms) + /* set the memdev link with memory backend */ + object_property_parse(obj, SGX_EPC_MEMDEV_PROP, list->value->memdev, + &error_fatal); ++ /* set the numa node property for sgx epc object */ ++ object_property_set_uint(obj, SGX_EPC_NUMA_NODE_PROP, list->value->node, ++ &error_fatal); + object_property_set_bool(obj, "realized", true, &error_fatal); + object_unref(obj); + } +diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h +index a6a65be854..581fac389a 100644 +--- a/include/hw/i386/sgx-epc.h ++++ b/include/hw/i386/sgx-epc.h +@@ -25,6 +25,7 @@ + #define SGX_EPC_ADDR_PROP "addr" + #define SGX_EPC_SIZE_PROP "size" + #define SGX_EPC_MEMDEV_PROP "memdev" ++#define SGX_EPC_NUMA_NODE_PROP "node" + + /** + * SGXEPCDevice: +@@ -38,6 +39,7 @@ typedef struct SGXEPCDevice { + + /* public */ + uint64_t addr; ++ uint32_t node; + HostMemoryBackendEpc *hostmem; + } SGXEPCDevice; + +@@ -56,6 +58,7 @@ typedef struct SGXEPCState { + } SGXEPCState; + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size); ++void sgx_epc_build_srat(GArray *table_data); + + static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc) + { +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 9c91bf93e9..2669156b28 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1810,6 +1810,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) + se->id ? se->id : ""); + monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", se->memaddr); + monitor_printf(mon, " size: %" PRIu64 "\n", se->size); ++ monitor_printf(mon, " node: %" PRId64 "\n", se->node); + monitor_printf(mon, " memdev: %s\n", se->memdev); + break; + default: +diff --git a/qapi/machine.json b/qapi/machine.json +index 067e3f5378..16e771affc 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,12 +1207,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPCDeviceInfo', + 'data': { '*id': 'str', + 'memaddr': 'size', + 'size': 'size', ++ 'node': 'int', + 'memdev': 'str' + } + } +@@ -1285,10 +1288,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPC', +- 'data': { 'memdev': 'str' } } ++ 'data': { 'memdev': 'str', ++ 'node': 'int' ++ } ++} + + ## + # @SgxEPCProperties: +diff --git a/qemu-options.hx b/qemu-options.hx +index 94c4a8dbaf..4b7798088b 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -127,11 +127,11 @@ SRST + ERST + + DEF("M", HAS_ARG, QEMU_OPTION_M, +- " sgx-epc.0.memdev=memid\n", ++ " sgx-epc.0.memdev=memid,sgx-epc.0.node=numaid\n", + QEMU_ARCH_ALL) + + SRST +-``sgx-epc.0.memdev=@var{memid}`` ++``sgx-epc.0.memdev=@var{memid},sgx-epc.0.node=@var{numaid}`` + Define an SGX EPC section. + ERST + +-- +2.27.0 + diff --git a/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch b/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch new file mode 100644 index 0000000..de4c4b1 --- /dev/null +++ b/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch @@ -0,0 +1,210 @@ +From 0f75501ba348dc9fb3ce0198ceafc8093149457d Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:07 -0400 +Subject: [PATCH 02/12] numa: Support SGX numa in the monitor and Libvirt + interfaces + +RH-Author: Paul Lai +RH-MergeRequest: 65: Enable SGX and add SGX Numa support +RH-Commit: [2/5] 8c19cfb1a139fd4dbac771e695a133f16a68437f +RH-Bugzilla: 2033708 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGXEPCSection list into SGXInfo to show the multiple +SGX EPC sections detailed info, not the total size like before. +This patch can enable numa support for 'info sgx' command and +QMP interfaces. The new interfaces show each EPC section info +in one numa node. Libvirt can use QMP interface to get the +detailed host SGX EPC capabilities to decide how to allocate +host EPC sections to guest. + +(qemu) info sgx + SGX support: enabled + SGX1 support: enabled + SGX2 support: enabled + FLC support: enabled + NUMA node #0: size=67108864 + NUMA node #1: size=29360128 + +The QMP interface show: +(QEMU) query-sgx +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 67108864}, {"node": 1, "size": 29360128}], "flc": true}} + +(QEMU) query-sgx-capabilities +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 17070817280}, {"node": 1, "size": 17079205888}], "flc": true}} + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-4-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4755927ae12547c2e7cb22c5fa1b39038c6c11b1) +Signed-off-by: Paul Lai +--- + hw/i386/sgx.c | 51 +++++++++++++++++++++++++++++++++++-------- + qapi/misc-target.json | 19 ++++++++++++++-- + 2 files changed, 59 insertions(+), 11 deletions(-) + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index d04299904a..5de5dd0893 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,11 +83,13 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static uint64_t sgx_calc_host_epc_section_size(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + { ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; + uint32_t i, type; + uint32_t eax, ebx, ecx, edx; +- uint64_t size = 0; ++ uint32_t j = 0; + + for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) { + host_cpuid(0x12, i + 2, &eax, &ebx, &ecx, &edx); +@@ -101,10 +103,13 @@ static uint64_t sgx_calc_host_epc_section_size(void) + break; + } + +- size += sgx_calc_section_metric(ecx, edx); ++ section = g_new0(SGXEPCSection, 1); ++ section->node = j++; ++ section->size = sgx_calc_section_metric(ecx, edx); ++ QAPI_LIST_APPEND(tail, section); + } + +- return size; ++ return head; + } + + static void sgx_epc_reset(void *opaque) +@@ -168,13 +173,35 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->section_size = sgx_calc_host_epc_section_size(); ++ info->sections = sgx_calc_host_epc_sections(); + + close(fd); + + return info; + } + ++static SGXEPCSectionList *sgx_get_epc_sections_list(void) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ ++ section = g_new0(SGXEPCSection, 1); ++ section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, ++ &error_abort); ++ QAPI_LIST_APPEND(tail, section); ++ } ++ g_slist_free(device_list); ++ ++ return head; ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + SGXInfo *info = NULL; +@@ -193,14 +220,13 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + +- SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; +- info->section_size = sgx_epc->size; ++ info->sections = sgx_get_epc_sections_list(); + + return info; + } +@@ -208,6 +234,7 @@ SGXInfo *qmp_query_sgx(Error **errp) + void hmp_info_sgx(Monitor *mon, const QDict *qdict) + { + Error *err = NULL; ++ SGXEPCSectionList *section_list, *section; + g_autoptr(SGXInfo) info = qmp_query_sgx(&err); + + if (err) { +@@ -222,8 +249,14 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); +- monitor_printf(mon, "size: %" PRIu64 "\n", +- info->section_size); ++ ++ section_list = info->sections; ++ for (section = section_list; section; section = section->next) { ++ monitor_printf(mon, "NUMA node #%" PRId64 ": ", ++ section->value->node); ++ monitor_printf(mon, "size=%" PRIu64 "\n", ++ section->value->size); ++ } + } + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 5aa2b95b7d..1022aa0184 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -337,6 +337,21 @@ + 'if': 'TARGET_ARM' } + + ++## ++# @SGXEPCSection: ++# ++# Information about intel SGX EPC section info ++# ++# @node: the numa node ++# ++# @size: the size of epc section ++# ++# Since: 6.2 ++## ++{ 'struct': 'SGXEPCSection', ++ 'data': { 'node': 'int', ++ 'size': 'uint64'}} ++ + ## + # @SGXInfo: + # +@@ -350,7 +365,7 @@ + # + # @flc: true if FLC is supported + # +-# @section-size: The EPC section size for guest ++# @sections: The EPC sections info for guest + # + # Since: 6.2 + ## +@@ -359,7 +374,7 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', +- 'section-size': 'uint64'}, ++ 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + + ## +-- +2.27.0 + diff --git a/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch b/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch new file mode 100644 index 0000000..9e58f6c --- /dev/null +++ b/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch @@ -0,0 +1,213 @@ +From a6a327ae392c02b8e8c75b5d702d929ff8fe408d Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Thu, 20 Jan 2022 17:31:04 -0500 +Subject: [PATCH 05/12] qapi: Cleanup SGX related comments and restore + @section-size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 65: Enable SGX and add SGX Numa support +RH-Commit: [5/5] 0d3b9f37cd3cce202050ba3bd51eef4410ef3d38 +RH-Bugzilla: 2033708 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The SGX NUMA patches were merged into Qemu 7.0 release, we need +clarify detailed version history information and also change +some related comments, which make SGX related comments clearer. + +The QMP command schema promises backwards compatibility as standard. +We temporarily restore "@section-size", which can avoid incompatible +API breakage. The "@section-size" will be deprecated in 7.2 version. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Yang Zhong +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20220120223104.437161-1-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Paul Lai +--- + docs/about/deprecated.rst | 13 +++++++++++++ + hw/i386/sgx.c | 11 +++++++++-- + qapi/machine.json | 4 ++-- + qapi/misc-target.json | 22 +++++++++++++++++----- + 4 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst +index ff7488cb63..33925edf45 100644 +--- a/docs/about/deprecated.rst ++++ b/docs/about/deprecated.rst +@@ -270,6 +270,19 @@ accepted incorrect commands will return an error. Users should make sure that + all arguments passed to ``device_add`` are consistent with the documented + property types. + ++``query-sgx`` return value member ``section-size`` (since 7.0) ++'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ ++ ++``query-sgx-capabilities`` return value member ``section-size`` (since 7.0) ++''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ + System accelerators + ------------------- + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 5de5dd0893..a2b318dd93 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,7 +83,7 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static SGXEPCSectionList *sgx_calc_host_epc_sections(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(uint64_t *size) + { + SGXEPCSectionList *head = NULL, **tail = &head; + SGXEPCSection *section; +@@ -106,6 +106,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + section = g_new0(SGXEPCSection, 1); + section->node = j++; + section->size = sgx_calc_section_metric(ecx, edx); ++ *size += section->size; + QAPI_LIST_APPEND(tail, section); + } + +@@ -156,6 +157,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + { + SGXInfo *info = NULL; + uint32_t eax, ebx, ecx, edx; ++ uint64_t size = 0; + + int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); + if (fd < 0) { +@@ -173,7 +175,8 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->sections = sgx_calc_host_epc_sections(); ++ info->sections = sgx_calc_host_epc_sections(&size); ++ info->section_size = size; + + close(fd); + +@@ -220,12 +223,14 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + ++ SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; ++ info->section_size = sgx_epc->size; + info->sections = sgx_get_epc_sections_list(); + + return info; +@@ -249,6 +254,8 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); ++ monitor_printf(mon, "size: %" PRIu64 "\n", ++ info->section_size); + + section_list = info->sections; + for (section = section_list; section; section = section->next) { +diff --git a/qapi/machine.json b/qapi/machine.json +index 16e771affc..a9f33d0f27 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,7 +1207,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +@@ -1288,7 +1288,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 1022aa0184..4bc45d2474 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -344,9 +344,9 @@ + # + # @node: the numa node + # +-# @size: the size of epc section ++# @size: the size of EPC section + # +-# Since: 6.2 ++# Since: 7.0 + ## + { 'struct': 'SGXEPCSection', + 'data': { 'node': 'int', +@@ -365,7 +365,13 @@ + # + # @flc: true if FLC is supported + # +-# @sections: The EPC sections info for guest ++# @section-size: The EPC section size for guest ++# Redundant with @sections. Just for backward compatibility. ++# ++# @sections: The EPC sections info for guest (Since: 7.0) ++# ++# Features: ++# @deprecated: Member @section-size is deprecated. Use @sections instead. + # + # Since: 6.2 + ## +@@ -374,6 +380,8 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', ++ 'section-size': { 'type': 'uint64', ++ 'features': [ 'deprecated' ] }, + 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + +@@ -390,7 +398,9 @@ + # + # -> { "execute": "query-sgx" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "sections": [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +@@ -408,7 +418,9 @@ + # + # -> { "execute": "query-sgx-capabilities" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "section" : [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +-- +2.27.0 + diff --git a/kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch b/kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch new file mode 100644 index 0000000..2d67070 --- /dev/null +++ b/kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch @@ -0,0 +1,108 @@ +From a221f5a8ed02690687e6709c49ae0e1e01c5f466 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Fri, 17 Dec 2021 17:46:54 +0100 +Subject: [PATCH 09/12] qemu-img: make is_allocated_sectors() more efficient + +RH-Author: Kevin Wolf +RH-MergeRequest: 70: qemu-img convert: Fix sparseness of output image +RH-Commit: [2/2] cc05aa4ac506b57ff9b430c007618cdf1485a03f (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1882917 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +Consider the case when the whole buffer is zero and end is unaligned. + +If i <= tail, we return 1 and do one unaligned WRITE, RMW happens. + +If i > tail, we do on aligned WRITE_ZERO (or skip if target is zeroed) +and again one unaligned WRITE, RMW happens. + +Let's do better: don't fragment the whole-zero buffer and report it as +ZERO: in case of zeroed target we just do nothing and avoid RMW. If +target is not zeroes, one unaligned WRITE_ZERO should not be much worse +than one unaligned WRITE. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20211217164654.1184218-3-vsementsov@virtuozzo.com> +Tested-by: Peter Lieven +Signed-off-by: Kevin Wolf +(cherry picked from commit 96054c76ff2db74165385a69f234c57a6bbc941e) +Signed-off-by: Kevin Wolf +--- + qemu-img.c | 23 +++++++++++++++++++---- + tests/qemu-iotests/122.out | 8 ++------ + 2 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/qemu-img.c b/qemu-img.c +index f036a1d428..d7ddfcc528 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -1171,19 +1171,34 @@ static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum, + } + } + ++ if (i == n) { ++ /* ++ * The whole buf is the same. ++ * No reason to split it into chunks, so return now. ++ */ ++ *pnum = i; ++ return !is_zero; ++ } ++ + tail = (sector_num + i) & (alignment - 1); + if (tail) { + if (is_zero && i <= tail) { +- /* treat unallocated areas which only consist +- * of a small tail as allocated. */ ++ /* ++ * For sure next sector after i is data, and it will rewrite this ++ * tail anyway due to RMW. So, let's just write data now. ++ */ + is_zero = false; + } + if (!is_zero) { +- /* align up end offset of allocated areas. */ ++ /* If possible, align up end offset of allocated areas. */ + i += alignment - tail; + i = MIN(i, n); + } else { +- /* align down end offset of zero areas. */ ++ /* ++ * For sure next sector after i is data, and it will rewrite this ++ * tail anyway due to RMW. Better is avoid RMW and write zeroes up ++ * to aligned bound. ++ */ + i -= tail; + } + } +diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out +index 69b8e8b803..e18766e167 100644 +--- a/tests/qemu-iotests/122.out ++++ b/tests/qemu-iotests/122.out +@@ -201,9 +201,7 @@ convert -S 4k + { "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, + { "start": 12288, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false}, + { "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, +-{ "start": 20480, "length": 46080, "depth": 0, "present": false, "zero": true, "data": false}, +-{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, +-{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}] ++{ "start": 20480, "length": 67088384, "depth": 0, "present": false, "zero": true, "data": false}] + + convert -c -S 4k + [{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true}, +@@ -215,9 +213,7 @@ convert -c -S 4k + + convert -S 8k + [{ "start": 0, "length": 24576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, +-{ "start": 24576, "length": 41984, "depth": 0, "present": false, "zero": true, "data": false}, +-{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, +-{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}] ++{ "start": 24576, "length": 67084288, "depth": 0, "present": false, "zero": true, "data": false}] + + convert -c -S 8k + [{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true}, +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index d6b34c4..60541da 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -144,7 +144,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 7%{?rcrel}%{?dist}%{?cc_suffix} +Release: 8%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -227,6 +227,30 @@ Patch41: kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch Patch42: kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch # For bz#2034791 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD Patch43: kvm-block-rbd-workaround-for-ceph-issue-53784.patch +# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch44: kvm-numa-Enable-numa-for-SGX-EPC-sections.patch +# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch45: kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch +# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch46: kvm-doc-Add-the-SGX-numa-description.patch +# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch47: kvm-Enable-SGX-RH-Only.patch +# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch48: kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch +# For bz#2041461 - Inconsistent block status reply in qemu-nbd +Patch49: kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch +# For bz#2041461 - Inconsistent block status reply in qemu-nbd +Patch50: kvm-iotests-block-status-cache-New-test.patch +# For bz#1882917 - the target image size is incorrect when converting a badly fragmented file +Patch51: kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch +# For bz#1882917 - the target image size is incorrect when converting a badly fragmented file +Patch52: kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch +# For bz#2040123 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch53: kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch +# For bz#2040123 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch54: kvm-iotests-stream-error-on-reset-New-test.patch +# For bz#2042481 - [aarch64] Launch guest with "default-bus-bypass-iommu=off,iommu=smmuv3" and "iommu_platform=on", guest hangs after system_reset +Patch55: kvm-hw-arm-smmuv3-Fix-device-reset.patch # Source-git patches @@ -1285,6 +1309,30 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Feb 14 2022 Miroslav Rezanina - 6.2.0-8 +- kvm-numa-Enable-numa-for-SGX-EPC-sections.patch [bz#2033708] +- kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch [bz#2033708] +- kvm-doc-Add-the-SGX-numa-description.patch [bz#2033708] +- kvm-Enable-SGX-RH-Only.patch [bz#2033708] +- kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch [bz#2033708] +- kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch [bz#2041461] +- kvm-iotests-block-status-cache-New-test.patch [bz#2041461] +- kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch [bz#1882917] +- kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch [bz#1882917] +- kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch [bz#2040123] +- kvm-iotests-stream-error-on-reset-New-test.patch [bz#2040123] +- kvm-hw-arm-smmuv3-Fix-device-reset.patch [bz#2042481] +- Resolves: bz#2033708 + ([Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support) +- Resolves: bz#2041461 + (Inconsistent block status reply in qemu-nbd) +- Resolves: bz#1882917 + (the target image size is incorrect when converting a badly fragmented file) +- Resolves: bz#2040123 + (Qemu core dumped when do block-stream to a snapshot node on non-enough space storage) +- Resolves: bz#2042481 + ([aarch64] Launch guest with "default-bus-bypass-iommu=off,iommu=smmuv3" and "iommu_platform=on", guest hangs after system_reset) + * Mon Feb 07 2022 Miroslav Rezanina - 6.2.0-7 - kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch [bz#1962088] - kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch [bz#1962088] From ed795e95d874b1e848f40ade3c58516770d069fe Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 17 Feb 2022 01:48:18 -0500 Subject: [PATCH 151/195] * Thu Feb 17 2022 Miroslav Rezanina - 6.2.0-9 - kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch [bz#2046659] - kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch [bz#2046659] - kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch [bz#2033626] - kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch [bz#2033626] - kvm-iotests.py-Add-QemuStorageDaemon-class.patch [bz#2033626] - kvm-iotests-281-Test-lingering-timers.patch [bz#2033626] - kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch [bz#2033626] - kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch [bz#2033626] - Resolves: bz#2046659 (qemu crash after execute blockdev-reopen with iothread) - Resolves: bz#2033626 (Qemu core dump when start guest with nbd node or do block jobs to nbd node) --- ...ntext-for-drain_end-in-blockdev-reop.patch | 63 +++++++ ...sert-there-are-no-timers-when-closed.patch | 52 ++++++ ...lete-reconnect-delay-timer-when-done.patch | 54 ++++++ ...-nbd-Move-s-ioc-on-AioContext-change.patch | 107 +++++++++++ ...Let-NBD-connection-yield-in-iothread.patch | 108 +++++++++++ kvm-iotests-281-Test-lingering-timers.patch | 174 ++++++++++++++++++ ...ckdev-reopen-with-iothreads-and-thro.patch | 106 +++++++++++ ...tests.py-Add-QemuStorageDaemon-class.patch | 92 +++++++++ qemu-kvm.spec | 32 +++- rpminspect.yaml | 6 +- 10 files changed, 792 insertions(+), 2 deletions(-) create mode 100644 kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch create mode 100644 kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch create mode 100644 kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch create mode 100644 kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch create mode 100644 kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch create mode 100644 kvm-iotests-281-Test-lingering-timers.patch create mode 100644 kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch create mode 100644 kvm-iotests.py-Add-QemuStorageDaemon-class.patch diff --git a/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch b/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch new file mode 100644 index 0000000..6fc7f38 --- /dev/null +++ b/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch @@ -0,0 +1,63 @@ +From 7b973b9cb7b890eaf9a31c99f5c272b513322ac1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 3 Feb 2022 15:05:33 +0100 +Subject: [PATCH 1/8] block: Lock AioContext for drain_end in blockdev-reopen + +RH-Author: Kevin Wolf +RH-MergeRequest: 73: block: Lock AioContext for drain_end in blockdev-reopen +RH-Commit: [1/2] db25e999152b0e4f09decade1ac76b9f56cd9706 (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 2046659 +RH-Acked-by: Sergio Lopez +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +bdrv_subtree_drained_end() requires the caller to hold the AioContext +lock for the drained node. Not doing this for nodes outside of the main +AioContext leads to crashes when AIO_WAIT_WHILE() needs to wait and +tries to temporarily release the lock. + +Fixes: 3908b7a8994fa5ef7a89aa58cd5a02fc58141592 +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2046659 +Reported-by: Qing Wang +Signed-off-by: Kevin Wolf +Message-Id: <20220203140534.36522-2-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit aba8205be0707b9d108e32254e186ba88107a869) +Signed-off-by: Kevin Wolf +--- + blockdev.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/blockdev.c b/blockdev.c +index b35072644e..565f6a81fd 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3562,6 +3562,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + { + BlockReopenQueue *queue = NULL; + GSList *drained = NULL; ++ GSList *p; + + /* Add each one of the BDS that we want to reopen to the queue */ + for (; reopen_list != NULL; reopen_list = reopen_list->next) { +@@ -3611,7 +3612,15 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + + fail: + bdrv_reopen_queue_free(queue); +- g_slist_free_full(drained, (GDestroyNotify) bdrv_subtree_drained_end); ++ for (p = drained; p; p = p->next) { ++ BlockDriverState *bs = p->data; ++ AioContext *ctx = bdrv_get_aio_context(bs); ++ ++ aio_context_acquire(ctx); ++ bdrv_subtree_drained_end(bs); ++ aio_context_release(ctx); ++ } ++ g_slist_free(drained); + } + + void qmp_blockdev_del(const char *node_name, Error **errp) +-- +2.27.0 + diff --git a/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch b/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch new file mode 100644 index 0000000..24c5b8a --- /dev/null +++ b/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch @@ -0,0 +1,52 @@ +From 76b03619435d0b2f0125ee7aa5c94f2b889247de Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:08 +0100 +Subject: [PATCH 4/8] block/nbd: Assert there are no timers when closed + +RH-Author: Hanna Reitz +RH-MergeRequest: 74: block/nbd: Handle AioContext changes +RH-Commit: [2/6] 56903457ca35d9c596aeb6827a48f80e8eabd66a (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2033626 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Our two timers must not remain armed beyond nbd_clear_bdrvstate(), or +they will access freed data when they fire. + +This patch is separate from the patches that actually fix the issue +(HEAD^^ and HEAD^) so that you can run the associated regression iotest +(281) on a configuration that reproducibly exposes the bug. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8a39c381e5e407d2fe5500324323f90a8540fa90) + +Conflict: +- block/nbd.c: open_timer was introduced after the 6.2 release (for + nbd's @open-timeout parameter), and has not been backported, so drop + the assertion that it is NULL + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index b8e5a9b4cc..aab20125d8 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -108,6 +108,9 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs) + + yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); + ++ /* Must not leave timers behind that would access freed data */ ++ assert(!s->reconnect_delay_timer); ++ + object_unref(OBJECT(s->tlscreds)); + qapi_free_SocketAddress(s->saddr); + s->saddr = NULL; +-- +2.27.0 + diff --git a/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch b/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch new file mode 100644 index 0000000..0cdf622 --- /dev/null +++ b/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch @@ -0,0 +1,54 @@ +From eeb4683ad8c40a03a4e91463ec1d1b651974b744 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:06 +0100 +Subject: [PATCH 3/8] block/nbd: Delete reconnect delay timer when done + +RH-Author: Hanna Reitz +RH-MergeRequest: 74: block/nbd: Handle AioContext changes +RH-Commit: [1/6] 34f92910b6ffd256d781109a2b39737fc6ab449c (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2033626 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +We start the reconnect delay timer to cancel the reconnection attempt +after a while. Once nbd_co_do_establish_connection() has returned, this +attempt is over, and we no longer need the timer. + +Delete it before returning from nbd_reconnect_attempt(), so that it does +not persist beyond the I/O request that was paused for reconnecting; we +do not want it to fire in a drained section, because all sort of things +can happen in such a section (e.g. the AioContext might be changed, and +we do not want the timer to fire in the wrong context; or the BDS might +even be deleted, and so the timer CB would access already-freed data). + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 3ce1fc16bad9c3f8b7b10b451a224d6d76e5c551) +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index 5ef462db1b..b8e5a9b4cc 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -353,6 +353,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) + } + + nbd_co_do_establish_connection(s->bs, NULL); ++ ++ /* ++ * The reconnect attempt is done (maybe successfully, maybe not), so ++ * we no longer need this timer. Delete it so it will not outlive ++ * this I/O request (so draining removes all timers). ++ */ ++ reconnect_delay_timer_del(s); + } + + static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) +-- +2.27.0 + diff --git a/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch b/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch new file mode 100644 index 0000000..1cb29e9 --- /dev/null +++ b/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch @@ -0,0 +1,107 @@ +From 6d9d86cc4e6149d4c0793e8ceb65dab7535a4561 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:11 +0100 +Subject: [PATCH 7/8] block/nbd: Move s->ioc on AioContext change + +RH-Author: Hanna Reitz +RH-MergeRequest: 74: block/nbd: Handle AioContext changes +RH-Commit: [5/6] b3c1eb21ac70d64fdac6094468a72cfbe50a30a9 (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2033626 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +s->ioc must always be attached to the NBD node's AioContext. If that +context changes, s->ioc must be attached to the new context. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2033626 +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit e15f3a66c830e3fce99c9d56c493c2f7078a1225) + +Conflict: +- block/nbd.c: open_timer was added after the 6.2 release, so we need + not (and cannot) assert it is NULL here. + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 41 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 41 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index aab20125d8..a3896c7f5f 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -2003,6 +2003,38 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) + nbd_co_establish_connection_cancel(s->conn); + } + ++static void nbd_attach_aio_context(BlockDriverState *bs, ++ AioContext *new_context) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ /* ++ * The reconnect_delay_timer is scheduled in I/O paths when the ++ * connection is lost, to cancel the reconnection attempt after a ++ * given time. Once this attempt is done (successfully or not), ++ * nbd_reconnect_attempt() ensures the timer is deleted before the ++ * respective I/O request is resumed. ++ * Since the AioContext can only be changed when a node is drained, ++ * the reconnect_delay_timer cannot be active here. ++ */ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_attach_aio_context(s->ioc, new_context); ++ } ++} ++ ++static void nbd_detach_aio_context(BlockDriverState *bs) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_detach_aio_context(s->ioc); ++ } ++} ++ + static BlockDriver bdrv_nbd = { + .format_name = "nbd", + .protocol_name = "nbd", +@@ -2026,6 +2058,9 @@ static BlockDriver bdrv_nbd = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_tcp = { +@@ -2051,6 +2086,9 @@ static BlockDriver bdrv_nbd_tcp = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_unix = { +@@ -2076,6 +2114,9 @@ static BlockDriver bdrv_nbd_unix = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static void bdrv_nbd_init(void) +-- +2.27.0 + diff --git a/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch b/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch new file mode 100644 index 0000000..20bc3a5 --- /dev/null +++ b/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch @@ -0,0 +1,108 @@ +From 06583ce33fab2976157461ac4503d6f8eeb59e75 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:12 +0100 +Subject: [PATCH 8/8] iotests/281: Let NBD connection yield in iothread + +RH-Author: Hanna Reitz +RH-MergeRequest: 74: block/nbd: Handle AioContext changes +RH-Commit: [6/6] 632b9ef5177a80d1c0c00121e1acc37272076d3e (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2033626 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Put an NBD block device into an I/O thread, and then read data from it, +hoping that the NBD connection will yield during that read. When it +does, the coroutine must be reentered in the block device's I/O thread, +which will only happen if the NBD block driver attaches the connection's +QIOChannel to the new AioContext. It did not do that after 4ddb5d2fde +("block/nbd: drop connection_co") and prior to "block/nbd: Move s->ioc +on AioContext change", which would cause an assertion failure. + +To improve our chances of yielding, the NBD server is throttled to +reading 64 kB/s, and the NBD client reads 128 kB, so it should yield at +some point. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8cfbe929e8c26050f0a4580a1606a370a947d4ce) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 28 +++++++++++++++++++++++++--- + tests/qemu-iotests/281.out | 4 ++-- + 2 files changed, 27 insertions(+), 5 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 13c588be75..b2ead7f388 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -253,8 +253,9 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + self.create_nbd_export() + + # Simple VM with an NBD block device connected to the NBD export +- # provided by the QSD ++ # provided by the QSD, and an (initially unused) iothread + self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothr') + self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + + f'server.path={self.sock},export=exp,' + + 'reconnect-delay=1') +@@ -293,19 +294,40 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + # thus not see the error, and so the test will pass.) + time.sleep(2) + ++ def test_yield_in_iothread(self): ++ # Move the NBD node to the I/O thread; the NBD block driver should ++ # attach the connection's QIOChannel to that thread's AioContext, too ++ result = self.vm.qmp('x-blockdev-set-iothread', ++ node_name='nbd', iothread='iothr') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Do some I/O that will be throttled by the QSD, so that the network ++ # connection hopefully will yield here. When it is resumed, it must ++ # then be resumed in the I/O thread's AioContext. ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "read 0 128K"') ++ self.assert_qmp(result, 'return', '') ++ + def create_nbd_export(self): + assert self.qsd is None + +- # Simple NBD export of a null-co BDS ++ # Export a throttled null-co BDS: Reads are throttled (max 64 kB/s), ++ # writes are not. + self.qsd = QemuStorageDaemon( ++ '--object', ++ 'throttle-group,id=thrgr,x-bps-read=65536,x-bps-read-max=65536', ++ + '--blockdev', + 'null-co,node-name=null,read-zeroes=true', + ++ '--blockdev', ++ 'throttle,node-name=thr,file=null,throttle-group=thrgr', ++ + '--nbd-server', + f'addr.type=unix,addr.path={self.sock}', + + '--export', +- 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ 'nbd,id=exp,node-name=thr,name=exp,writable=true' + ) + + def stop_nbd_export(self): +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 914e3737bd..3f8a935a08 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-..... ++...... + ---------------------------------------------------------------------- +-Ran 5 tests ++Ran 6 tests + + OK +-- +2.27.0 + diff --git a/kvm-iotests-281-Test-lingering-timers.patch b/kvm-iotests-281-Test-lingering-timers.patch new file mode 100644 index 0000000..7175a31 --- /dev/null +++ b/kvm-iotests-281-Test-lingering-timers.patch @@ -0,0 +1,174 @@ +From 3d2d7a46713d362d2ff5137841e689593da976a3 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:10 +0100 +Subject: [PATCH 6/8] iotests/281: Test lingering timers + +RH-Author: Hanna Reitz +RH-MergeRequest: 74: block/nbd: Handle AioContext changes +RH-Commit: [4/6] d228ba3fcdfaab2d54dd5b023688a1c055cce2c2 (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2033626 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Prior to "block/nbd: Delete reconnect delay timer when done" and +"block/nbd: Delete open timer when done", both of those timers would +remain scheduled even after successfully (re-)connecting to the server, +and they would not even be deleted when the BDS is deleted. + +This test constructs exactly this situation: +(1) Configure an @open-timeout, so the open timer is armed, and +(2) Configure a @reconnect-delay and trigger a reconnect situation + (which succeeds immediately), so the reconnect delay timer is armed. +Then we immediately delete the BDS, and sleep for longer than the +@open-timeout and @reconnect-delay. Prior to said patches, this caused +one (or both) of the timer CBs to access already-freed data. + +Accessing freed data may or may not crash, so this test can produce +false successes, but I do not know how to show the problem in a better +or more reliable way. If you run this test on "block/nbd: Assert there +are no timers when closed" and without the fix patches mentioned above, +you should reliably see an assertion failure. +(But all other tests that use the reconnect delay timer (264 and 277) +will fail in that configuration, too; as will nbd-reconnect-on-open, +which uses the open timer.) + +Remove this test from the quick group because of the two second sleep +this patch introduces. + +(I decided to put this test case into 281, because the main bug this +series addresses is in the interaction of the NBD block driver and I/O +threads, which is precisely the scope of 281. The test case for that +other bug will also be put into the test class added here. + +Also, excuse the test class's name, I couldn't come up with anything +better. The "yield" part will make sense two patches from now.) + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit eaf1e85d4ddefdbd197f393fa9c5acc7ba8133b0) + +Conflict: +- @open-timeout was introduced after the 6.2 release, and has not been + backported. Consequently, there is no open_timer, and we can (and + must) drop the respective parts of the test here. + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 73 ++++++++++++++++++++++++++++++++++++-- + tests/qemu-iotests/281.out | 4 +-- + 2 files changed, 73 insertions(+), 4 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 956698083f..13c588be75 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -1,5 +1,5 @@ + #!/usr/bin/env python3 +-# group: rw quick ++# group: rw + # + # Test cases for blockdev + IOThread interactions + # +@@ -20,8 +20,9 @@ + # + + import os ++import time + import iotests +-from iotests import qemu_img ++from iotests import qemu_img, QemuStorageDaemon + + image_len = 64 * 1024 * 1024 + +@@ -243,6 +244,74 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase): + # Hangs on failure, we expect this error. + self.assert_qmp(result, 'error/class', 'GenericError') + ++# Test for RHBZ#2033626 ++class TestYieldingAndTimers(iotests.QMPTestCase): ++ sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ qsd = None ++ ++ def setUp(self): ++ self.create_nbd_export() ++ ++ # Simple VM with an NBD block device connected to the NBD export ++ # provided by the QSD ++ self.vm = iotests.VM() ++ self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + ++ f'server.path={self.sock},export=exp,' + ++ 'reconnect-delay=1') ++ ++ self.vm.launch() ++ ++ def tearDown(self): ++ self.stop_nbd_export() ++ self.vm.shutdown() ++ ++ def test_timers_with_blockdev_del(self): ++ # Stop and restart the NBD server, and do some I/O on the client to ++ # trigger a reconnect and start the reconnect delay timer ++ self.stop_nbd_export() ++ self.create_nbd_export() ++ ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "write 0 512"') ++ self.assert_qmp(result, 'return', '') ++ ++ # Reconnect is done, so the reconnect delay timer should be gone. ++ # (But there used to be a bug where it remained active, for which this ++ # is a regression test.) ++ ++ # Delete the BDS to see whether the timer is gone. If it is not, ++ # it will remain active, fire later, and then access freed data. ++ # (Or, with "block/nbd: Assert there are no timers when closed" ++ # applied, the assertion added in that patch will fail.) ++ result = self.vm.qmp('blockdev-del', node_name='nbd') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Give the timer some time to fire (it has a timeout of 1 s). ++ # (Sleeping in an iotest may ring some alarm bells, but note that if ++ # the timing is off here, the test will just always pass. If we kill ++ # the VM too early, then we just kill the timer before it can fire, ++ # thus not see the error, and so the test will pass.) ++ time.sleep(2) ++ ++ def create_nbd_export(self): ++ assert self.qsd is None ++ ++ # Simple NBD export of a null-co BDS ++ self.qsd = QemuStorageDaemon( ++ '--blockdev', ++ 'null-co,node-name=null,read-zeroes=true', ++ ++ '--nbd-server', ++ f'addr.type=unix,addr.path={self.sock}', ++ ++ '--export', ++ 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ ) ++ ++ def stop_nbd_export(self): ++ self.qsd.stop() ++ self.qsd = None ++ + if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2'], + supported_protocols=['file']) +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 89968f35d7..914e3737bd 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-.... ++..... + ---------------------------------------------------------------------- +-Ran 4 tests ++Ran 5 tests + + OK +-- +2.27.0 + diff --git a/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch b/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch new file mode 100644 index 0000000..8616f1c --- /dev/null +++ b/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch @@ -0,0 +1,106 @@ +From 37593348e7d95580fb2b0009dcb026c07367f1f8 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 3 Feb 2022 15:05:34 +0100 +Subject: [PATCH 2/8] iotests: Test blockdev-reopen with iothreads and + throttling + +RH-Author: Kevin Wolf +RH-MergeRequest: 73: block: Lock AioContext for drain_end in blockdev-reopen +RH-Commit: [2/2] d19d5fa9efa4813ece75708436891041754ab910 (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 2046659 +RH-Acked-by: Sergio Lopez +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +The 'throttle' block driver implements .bdrv_co_drain_end, so +blockdev-reopen will have to wait for it to complete in the polling +loop at the end of qmp_blockdev_reopen(). This makes AIO_WAIT_WHILE() +release the AioContext lock, which causes a crash if the lock hasn't +correctly been taken. + +Signed-off-by: Kevin Wolf +Message-Id: <20220203140534.36522-3-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit ee810602376125ca0e0afd6b7c715e13740978ea) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/245 | 36 +++++++++++++++++++++++++++++++++--- + tests/qemu-iotests/245.out | 4 ++-- + 2 files changed, 35 insertions(+), 5 deletions(-) + +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index 24ac43f70e..8cbed7821b 100755 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -1138,12 +1138,13 @@ class TestBlockdevReopen(iotests.QMPTestCase): + self.assertEqual(self.get_node('hd1'), None) + self.assert_qmp(self.get_node('hd2'), 'ro', True) + +- def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): +- opts = hd_opts(0) ++ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None, ++ opts_a = None, opts_b = None): ++ opts = opts_a or hd_opts(0) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) + self.assert_qmp(result, 'return', {}) + +- opts2 = hd_opts(2) ++ opts2 = opts_b or hd_opts(2) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts2) + self.assert_qmp(result, 'return', {}) + +@@ -1194,6 +1195,35 @@ class TestBlockdevReopen(iotests.QMPTestCase): + def test_iothreads_switch_overlay(self): + self.run_test_iothreads('', 'iothread0') + ++ def test_iothreads_with_throttling(self): ++ # Create a throttle-group object ++ opts = { 'qom-type': 'throttle-group', 'id': 'group0', ++ 'limits': { 'iops-total': 1000 } } ++ result = self.vm.qmp('object-add', conv_keys = False, **opts) ++ self.assert_qmp(result, 'return', {}) ++ ++ # Options with a throttle filter between format and protocol ++ opts = [ ++ { ++ 'driver': iotests.imgfmt, ++ 'node-name': f'hd{idx}', ++ 'file' : { ++ 'node-name': f'hd{idx}-throttle', ++ 'driver': 'throttle', ++ 'throttle-group': 'group0', ++ 'file': { ++ 'driver': 'file', ++ 'node-name': f'hd{idx}-file', ++ 'filename': hd_path[idx], ++ }, ++ }, ++ } ++ for idx in (0, 2) ++ ] ++ ++ self.run_test_iothreads('iothread0', 'iothread0', None, ++ opts[0], opts[1]) ++ + if __name__ == '__main__': + iotests.activate_logging() + iotests.main(supported_fmts=["qcow2"], +diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out +index 4eced19294..a4e04a3266 100644 +--- a/tests/qemu-iotests/245.out ++++ b/tests/qemu-iotests/245.out +@@ -17,8 +17,8 @@ read 1/1 bytes at offset 262152 + read 1/1 bytes at offset 262160 + 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +-............... ++................ + ---------------------------------------------------------------------- +-Ran 25 tests ++Ran 26 tests + + OK +-- +2.27.0 + diff --git a/kvm-iotests.py-Add-QemuStorageDaemon-class.patch b/kvm-iotests.py-Add-QemuStorageDaemon-class.patch new file mode 100644 index 0000000..b215d23 --- /dev/null +++ b/kvm-iotests.py-Add-QemuStorageDaemon-class.patch @@ -0,0 +1,92 @@ +From c21502a220d107261c9a8627158f357489d86543 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:09 +0100 +Subject: [PATCH 5/8] iotests.py: Add QemuStorageDaemon class + +RH-Author: Hanna Reitz +RH-MergeRequest: 74: block/nbd: Handle AioContext changes +RH-Commit: [3/6] 5da1cda4d025c1bd7029ed8071b4ccf25459a878 (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2033626 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +This is a rather simple class that allows creating a QSD instance +running in the background and stopping it when no longer needed. + +The __del__ handler is a safety net for when something goes so wrong in +a test that e.g. the tearDown() method is not called (e.g. setUp() +launches the QSD, but then launching a VM fails). We do not want the +QSD to continue running after the test has failed, so __del__() will +take care to kill it. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 091dc7b2b5553a529bff9a7bf9ad3bc85bc5bdcd) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/iotests.py | 40 +++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 83bfedb902..a51b5ce8cd 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -72,6 +72,8 @@ + qemu_prog = os.environ.get('QEMU_PROG', 'qemu') + qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ') + ++qsd_prog = os.environ.get('QSD_PROG', 'qemu-storage-daemon') ++ + gdb_qemu_env = os.environ.get('GDB_OPTIONS') + qemu_gdb = [] + if gdb_qemu_env: +@@ -312,6 +314,44 @@ def cmd(self, cmd): + return self._read_output() + + ++class QemuStorageDaemon: ++ def __init__(self, *args: str, instance_id: str = 'a'): ++ assert '--pidfile' not in args ++ self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') ++ all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] ++ ++ # Cannot use with here, we want the subprocess to stay around ++ # pylint: disable=consider-using-with ++ self._p = subprocess.Popen(all_args) ++ while not os.path.exists(self.pidfile): ++ if self._p.poll() is not None: ++ cmd = ' '.join(all_args) ++ raise RuntimeError( ++ 'qemu-storage-daemon terminated with exit code ' + ++ f'{self._p.returncode}: {cmd}') ++ ++ time.sleep(0.01) ++ ++ with open(self.pidfile, encoding='utf-8') as f: ++ self._pid = int(f.read().strip()) ++ ++ assert self._pid == self._p.pid ++ ++ def stop(self, kill_signal=15): ++ self._p.send_signal(kill_signal) ++ self._p.wait() ++ self._p = None ++ ++ try: ++ os.remove(self.pidfile) ++ except OSError: ++ pass ++ ++ def __del__(self): ++ if self._p is not None: ++ self.stop(kill_signal=9) ++ ++ + def qemu_nbd(*args): + '''Run qemu-nbd in daemon mode and return the parent's exit code''' + return subprocess.call(qemu_nbd_args + ['--fork'] + list(args)) +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 60541da..c403f49 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -144,7 +144,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 8%{?rcrel}%{?dist}%{?cc_suffix} +Release: 9%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -251,6 +251,22 @@ Patch53: kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch Patch54: kvm-iotests-stream-error-on-reset-New-test.patch # For bz#2042481 - [aarch64] Launch guest with "default-bus-bypass-iommu=off,iommu=smmuv3" and "iommu_platform=on", guest hangs after system_reset Patch55: kvm-hw-arm-smmuv3-Fix-device-reset.patch +# For bz#2046659 - qemu crash after execute blockdev-reopen with iothread +Patch56: kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch +# For bz#2046659 - qemu crash after execute blockdev-reopen with iothread +Patch57: kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch +# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch58: kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch +# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch59: kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch +# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch60: kvm-iotests.py-Add-QemuStorageDaemon-class.patch +# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch61: kvm-iotests-281-Test-lingering-timers.patch +# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch62: kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch +# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch63: kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch # Source-git patches @@ -1309,6 +1325,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Feb 17 2022 Miroslav Rezanina - 6.2.0-9 +- kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch [bz#2046659] +- kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch [bz#2046659] +- kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch [bz#2033626] +- kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch [bz#2033626] +- kvm-iotests.py-Add-QemuStorageDaemon-class.patch [bz#2033626] +- kvm-iotests-281-Test-lingering-timers.patch [bz#2033626] +- kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch [bz#2033626] +- kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch [bz#2033626] +- Resolves: bz#2046659 + (qemu crash after execute blockdev-reopen with iothread) +- Resolves: bz#2033626 + (Qemu core dump when start guest with nbd node or do block jobs to nbd node) + * Mon Feb 14 2022 Miroslav Rezanina - 6.2.0-8 - kvm-numa-Enable-numa-for-SGX-EPC-sections.patch [bz#2033708] - kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch [bz#2033708] diff --git a/rpminspect.yaml b/rpminspect.yaml index 53ce59a..2cc100e 100644 --- a/rpminspect.yaml +++ b/rpminspect.yaml @@ -3,4 +3,8 @@ elf: exclude_path: (.*s390-ccw.img.*)|(.*s390-netboot.img.*) inspections: badfuncs: off - +annocheck: + - hardened: --skip-cf-protection --skip-property-note --ignore-unknown --verbose + ignore: + - /usr/share/qemu-kvm/s390-ccw.img + - /usr/share/qemu-kvm/s390-netboot.img on s390x From 4d1d207d613891caa118ed8fd55c834ad07cbdc5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 24 Feb 2022 01:48:49 -0500 Subject: [PATCH 152/195] * Thu Feb 24 2022 Miroslav Rezanina - 6.2.0-10 - kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch [bz#2042820] - kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch [bz#2042820] - kvm-ui-clipboard-fix-use-after-free-regression.patch [bz#2042820] - kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch [bz#2042820] - kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch [bz#2044818] - kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch [bz#2044818] - Resolves: bz#2042820 (qemu crash when try to copy and paste contents from client to VM) - Resolves: bz#2044818 (Qemu Core Dumped when migrate -> migrate_cancel -> migrate again during guest is paused) --- ...ard-Don-t-use-g_autoptr-just-to-free.patch | 45 +++++ ...rrect-calls-of-log_global_start-stop.patch | 97 +++++++++++ ...crash-on-starting-dirty-log-twice-wi.patch | 156 ++++++++++++++++++ ...r-warnings-from-unused-clipboard-inf.patch | 63 +++++++ ...pboard-fix-use-after-free-regression.patch | 49 ++++++ kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch | 80 +++++++++ qemu-kvm.spec | 26 ++- 7 files changed, 515 insertions(+), 1 deletion(-) create mode 100644 kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch create mode 100644 kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch create mode 100644 kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch create mode 100644 kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch create mode 100644 kvm-ui-clipboard-fix-use-after-free-regression.patch create mode 100644 kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch diff --git a/kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch b/kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch new file mode 100644 index 0000000..d9d5145 --- /dev/null +++ b/kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch @@ -0,0 +1,45 @@ +From 213d2c6d3138f3570bca36edaacfd1ee86b18967 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Fri, 4 Feb 2022 06:45:51 +0100 +Subject: [PATCH 1/6] Revert "ui/clipboard: Don't use g_autoptr just to free a + variable" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gerd Hoffmann +RH-MergeRequest: 75: fix vnc cut+paste crash +RH-Commit: [1/4] 0937d15054ad6e902bc22d1872231504f442ddcc (kraxel/centos-qemu-kvm) +RH-Bugzilla: 2042820 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Miroslav Rezanina + +This reverts commit 8df1ea81ee6c674522967d056daa8d3748fa3883. +--- + ui/clipboard.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index d53576b0f6..d7b008d62a 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -44,14 +44,13 @@ void qemu_clipboard_peer_release(QemuClipboardPeer *peer, + + void qemu_clipboard_update(QemuClipboardInfo *info) + { +- QemuClipboardInfo *old = NULL; ++ g_autoptr(QemuClipboardInfo) old = NULL; + assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT); + + notifier_list_notify(&clipboard_notifiers, info); + + old = cbinfo[info->selection]; + cbinfo[info->selection] = qemu_clipboard_info_ref(info); +- g_free(old); + } + + QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection) +-- +2.27.0 + diff --git a/kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch b/kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch new file mode 100644 index 0000000..5ff2734 --- /dev/null +++ b/kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch @@ -0,0 +1,97 @@ +From b169059c8fbf15c3ffeec0f68b938cb9febd8db7 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 30 Nov 2021 16:00:28 +0800 +Subject: [PATCH 5/6] memory: Fix incorrect calls of log_global_start/stop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 77: memory: Fix qemu crash on continuous migrations of stopped VM +RH-Commit: [1/2] 6271ee689266b24d29d4c87f60e5b096ef5f5d63 (peterx/qemu-kvm) +RH-Bugzilla: 2044818 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: quintela1 + +We should only call the log_global_start/stop when the global dirty track +bitmask changes from zero<->non-zero. + +No real issue reported for this yet probably because no immediate user to +enable both dirty rate measurement and migration at the same time. However +it'll be good to be prepared for it. + +Fixes: 63b41db4bc ("memory: make global_dirty_tracking a bitmask") +Cc: qemu-stable@nongnu.org +Cc: Hyman Huang +Cc: Paolo Bonzini +Cc: Dr. David Alan Gilbert +Cc: Juan Quintela +Cc: David Hildenbrand +Signed-off-by: Peter Xu +Reviewed-by: David Hildenbrand +Message-Id: <20211130080028.6474-1-peterx@redhat.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 7b0538ed3a22ce30817f818449d10701fb0821f9) +Signed-off-by: Peter Xu +--- + softmmu/memory.c | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +diff --git a/softmmu/memory.c b/softmmu/memory.c +index 7340e19ff5..81d4bf1454 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -2773,6 +2773,8 @@ static VMChangeStateEntry *vmstate_change; + + void memory_global_dirty_log_start(unsigned int flags) + { ++ unsigned int old_flags = global_dirty_tracking; ++ + if (vmstate_change) { + qemu_del_vm_change_state_handler(vmstate_change); + vmstate_change = NULL; +@@ -2781,15 +2783,14 @@ void memory_global_dirty_log_start(unsigned int flags) + assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); + assert(!(global_dirty_tracking & flags)); + global_dirty_tracking |= flags; +- + trace_global_dirty_changed(global_dirty_tracking); + +- MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward); +- +- /* Refresh DIRTY_MEMORY_MIGRATION bit. */ +- memory_region_transaction_begin(); +- memory_region_update_pending = true; +- memory_region_transaction_commit(); ++ if (!old_flags) { ++ MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward); ++ memory_region_transaction_begin(); ++ memory_region_update_pending = true; ++ memory_region_transaction_commit(); ++ } + } + + static void memory_global_dirty_log_do_stop(unsigned int flags) +@@ -2800,12 +2801,12 @@ static void memory_global_dirty_log_do_stop(unsigned int flags) + + trace_global_dirty_changed(global_dirty_tracking); + +- /* Refresh DIRTY_MEMORY_MIGRATION bit. */ +- memory_region_transaction_begin(); +- memory_region_update_pending = true; +- memory_region_transaction_commit(); +- +- MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse); ++ if (!global_dirty_tracking) { ++ memory_region_transaction_begin(); ++ memory_region_update_pending = true; ++ memory_region_transaction_commit(); ++ MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse); ++ } + } + + static void memory_vm_change_state_handler(void *opaque, bool running, +-- +2.27.0 + diff --git a/kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch b/kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch new file mode 100644 index 0000000..5ea0007 --- /dev/null +++ b/kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch @@ -0,0 +1,156 @@ +From b3ed8e344c733bc8c2223c1b9e424a9fbcea56d4 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Mon, 7 Feb 2022 20:30:19 +0800 +Subject: [PATCH 6/6] memory: Fix qemu crash on starting dirty log twice with + stopped VM + +RH-Author: Peter Xu +RH-MergeRequest: 77: memory: Fix qemu crash on continuous migrations of stopped VM +RH-Commit: [2/2] 98ed2ef6226ec80a1896ebb554015aded0dc0c18 (peterx/qemu-kvm) +RH-Bugzilla: 2044818 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: quintela1 + +QEMU can now easily crash with two continuous migration carried out: + +(qemu) migrate -d exec:cat>out +(qemu) migrate_cancel +(qemu) migrate -d exec:cat>out +[crash] ../softmmu/memory.c:2782: memory_global_dirty_log_start: Assertion +`!(global_dirty_tracking & flags)' failed. + +It's because memory API provides a way to postpone dirty log stop if the VM is +stopped, and that'll be re-done until the next VM start. It was added in 2017 +with commit 1931076077 ("migration: optimize the downtime", 2017-08-01). + +However the recent work on allowing dirty tracking to be bitmask broke it, +which is commit 63b41db4bc ("memory: make global_dirty_tracking a bitmask", +2021-11-01). + +The fix proposed in this patch contains two things: + + (1) Instead of passing over the flags to postpone stop dirty track, we add a + global variable (along with current vmstate_change variable) to record + what flags to stop dirty tracking. + + (2) When start dirty tracking, instead if remove the vmstate hook directly, + we also execute the postponed stop process so that we make sure all the + starts and stops will be paired. + +This procedure is overlooked in the bitmask-ify work in 2021. + +Cc: Hyman Huang +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044818 +Fixes: 63b41db4bc ("memory: make global_dirty_tracking a bitmask") +Signed-off-by: Peter Xu +Message-Id: <20220207123019.27223-1-peterx@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a5c90c61a118027b86155cffdf4fe4e2e9de1020) +Signed-off-by: Peter Xu +--- + softmmu/memory.c | 61 +++++++++++++++++++++++++++++++++++------------- + 1 file changed, 45 insertions(+), 16 deletions(-) + +diff --git a/softmmu/memory.c b/softmmu/memory.c +index 81d4bf1454..0311e362ee 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -2769,19 +2769,32 @@ void memory_global_after_dirty_log_sync(void) + MEMORY_LISTENER_CALL_GLOBAL(log_global_after_sync, Forward); + } + ++/* ++ * Dirty track stop flags that are postponed due to VM being stopped. Should ++ * only be used within vmstate_change hook. ++ */ ++static unsigned int postponed_stop_flags; + static VMChangeStateEntry *vmstate_change; ++static void memory_global_dirty_log_stop_postponed_run(void); + + void memory_global_dirty_log_start(unsigned int flags) + { +- unsigned int old_flags = global_dirty_tracking; ++ unsigned int old_flags; ++ ++ assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); + + if (vmstate_change) { +- qemu_del_vm_change_state_handler(vmstate_change); +- vmstate_change = NULL; ++ /* If there is postponed stop(), operate on it first */ ++ postponed_stop_flags &= ~flags; ++ memory_global_dirty_log_stop_postponed_run(); + } + +- assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); +- assert(!(global_dirty_tracking & flags)); ++ flags &= ~global_dirty_tracking; ++ if (!flags) { ++ return; ++ } ++ ++ old_flags = global_dirty_tracking; + global_dirty_tracking |= flags; + trace_global_dirty_changed(global_dirty_tracking); + +@@ -2809,29 +2822,45 @@ static void memory_global_dirty_log_do_stop(unsigned int flags) + } + } + ++/* ++ * Execute the postponed dirty log stop operations if there is, then reset ++ * everything (including the flags and the vmstate change hook). ++ */ ++static void memory_global_dirty_log_stop_postponed_run(void) ++{ ++ /* This must be called with the vmstate handler registered */ ++ assert(vmstate_change); ++ ++ /* Note: postponed_stop_flags can be cleared in log start routine */ ++ if (postponed_stop_flags) { ++ memory_global_dirty_log_do_stop(postponed_stop_flags); ++ postponed_stop_flags = 0; ++ } ++ ++ qemu_del_vm_change_state_handler(vmstate_change); ++ vmstate_change = NULL; ++} ++ + static void memory_vm_change_state_handler(void *opaque, bool running, + RunState state) + { +- unsigned int flags = (unsigned int)(uintptr_t)opaque; + if (running) { +- memory_global_dirty_log_do_stop(flags); +- +- if (vmstate_change) { +- qemu_del_vm_change_state_handler(vmstate_change); +- vmstate_change = NULL; +- } ++ memory_global_dirty_log_stop_postponed_run(); + } + } + + void memory_global_dirty_log_stop(unsigned int flags) + { + if (!runstate_is_running()) { ++ /* Postpone the dirty log stop, e.g., to when VM starts again */ + if (vmstate_change) { +- return; ++ /* Batch with previous postponed flags */ ++ postponed_stop_flags |= flags; ++ } else { ++ postponed_stop_flags = flags; ++ vmstate_change = qemu_add_vm_change_state_handler( ++ memory_vm_change_state_handler, NULL); + } +- vmstate_change = qemu_add_vm_change_state_handler( +- memory_vm_change_state_handler, +- (void *)(uintptr_t)flags); + return; + } + +-- +2.27.0 + diff --git a/kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch b/kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch new file mode 100644 index 0000000..6b7173e --- /dev/null +++ b/kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch @@ -0,0 +1,63 @@ +From c5ff43026547ea20fbb496c5b6734b7e64362151 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 14 Feb 2022 12:37:49 +0100 +Subject: [PATCH 2/6] ui: avoid compiler warnings from unused clipboard info + variable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gerd Hoffmann +RH-MergeRequest: 75: fix vnc cut+paste crash +RH-Commit: [2/4] 6a7982a0bab86bf843fd65842e730a61b2fa2cb0 (kraxel/centos-qemu-kvm) +RH-Bugzilla: 2042820 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Miroslav Rezanina + +With latest clang 13.0.0 we get + +../ui/clipboard.c:47:34: error: variable 'old' set but not used [-Werror,-Wunused-but-set-variable] + g_autoptr(QemuClipboardInfo) old = NULL; + ^ + +The compiler can't tell that we only declared this variable in +order to get the side effect of free'ing it when out of scope. + +This pattern is a little dubious for a use of g_autoptr, so +rewrite the code to avoid it. + +Reviewed-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Daniel P. Berrangé +[AJB: fix merge conflict] +Signed-off-by: Alex Bennée +Message-Id: <20211215141949.3512719-2-berrange@redhat.com> +Message-Id: <20220105135009.1584676-2-alex.bennee@linaro.org> +(cherry picked from commit 70a54b01693eda3c61814b05d699aba41015ac48) +--- + ui/clipboard.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index d7b008d62a..7672058e84 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -44,12 +44,11 @@ void qemu_clipboard_peer_release(QemuClipboardPeer *peer, + + void qemu_clipboard_update(QemuClipboardInfo *info) + { +- g_autoptr(QemuClipboardInfo) old = NULL; + assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT); + + notifier_list_notify(&clipboard_notifiers, info); + +- old = cbinfo[info->selection]; ++ qemu_clipboard_info_unref(cbinfo[info->selection]); + cbinfo[info->selection] = qemu_clipboard_info_ref(info); + } + +-- +2.27.0 + diff --git a/kvm-ui-clipboard-fix-use-after-free-regression.patch b/kvm-ui-clipboard-fix-use-after-free-regression.patch new file mode 100644 index 0000000..e14cb23 --- /dev/null +++ b/kvm-ui-clipboard-fix-use-after-free-regression.patch @@ -0,0 +1,49 @@ +From 965275cd87f8008f129509c6d6fd0096e8ac2d96 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 14 Feb 2022 15:59:17 +0400 +Subject: [PATCH 3/6] ui/clipboard: fix use-after-free regression +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gerd Hoffmann +RH-MergeRequest: 75: fix vnc cut+paste crash +RH-Commit: [3/4] d8f68e0eb60d9aaa9a703d969f215816bf35f6f0 (kraxel/centos-qemu-kvm) +RH-Bugzilla: 2042820 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Miroslav Rezanina + +The same info may be used to update the clipboard, and may be freed +before being ref'ed again. + +Fixes: 70a54b01693ed ("ui: avoid compiler warnings from unused clipboard info variable") + +Signed-off-by: Marc-André Lureau +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220214115917.1679568-1-marcandre.lureau@redhat.com> +Signed-off-by: Gerd Hoffmann +--- + ui/clipboard.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index 7672058e84..d7dae13760 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -48,8 +48,10 @@ void qemu_clipboard_update(QemuClipboardInfo *info) + + notifier_list_notify(&clipboard_notifiers, info); + +- qemu_clipboard_info_unref(cbinfo[info->selection]); +- cbinfo[info->selection] = qemu_clipboard_info_ref(info); ++ if (cbinfo[info->selection] != info) { ++ qemu_clipboard_info_unref(cbinfo[info->selection]); ++ cbinfo[info->selection] = qemu_clipboard_info_ref(info); ++ } + } + + QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection) +-- +2.27.0 + diff --git a/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch b/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch new file mode 100644 index 0000000..6d3802c --- /dev/null +++ b/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch @@ -0,0 +1,80 @@ +From 14582cfec72e52894f16ed5c3fb14adb2d6d8e25 Mon Sep 17 00:00:00 2001 +From: Rao Lei +Date: Wed, 5 Jan 2022 10:08:08 +0800 +Subject: [PATCH 4/6] ui/vnc.c: Fixed a deadlock bug. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gerd Hoffmann +RH-MergeRequest: 75: fix vnc cut+paste crash +RH-Commit: [4/4] 5321e447de974d91e9a6c0cf01f4352166ffb7ce (kraxel/centos-qemu-kvm) +RH-Bugzilla: 2042820 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Miroslav Rezanina + +The GDB statck is as follows: +(gdb) bt +0 __lll_lock_wait (futex=futex@entry=0x56211df20360, private=0) at lowlevellock.c:52 +1 0x00007f263caf20a3 in __GI___pthread_mutex_lock (mutex=0x56211df20360) at ../nptl/pthread_mutex_lock.c:80 +2 0x000056211a757364 in qemu_mutex_lock_impl (mutex=0x56211df20360, file=0x56211a804857 "../ui/vnc-jobs.h", line=60) + at ../util/qemu-thread-posix.c:80 +3 0x000056211a0ef8c7 in vnc_lock_output (vs=0x56211df14200) at ../ui/vnc-jobs.h:60 +4 0x000056211a0efcb7 in vnc_clipboard_send (vs=0x56211df14200, count=1, dwords=0x7ffdf1701338) at ../ui/vnc-clipboard.c:138 +5 0x000056211a0f0129 in vnc_clipboard_notify (notifier=0x56211df244c8, data=0x56211dd1bbf0) at ../ui/vnc-clipboard.c:209 +6 0x000056211a75dde8 in notifier_list_notify (list=0x56211afa17d0 , data=0x56211dd1bbf0) at ../util/notify.c:39 +7 0x000056211a0bf0e6 in qemu_clipboard_update (info=0x56211dd1bbf0) at ../ui/clipboard.c:50 +8 0x000056211a0bf05d in qemu_clipboard_peer_release (peer=0x56211df244c0, selection=QEMU_CLIPBOARD_SELECTION_CLIPBOARD) + at ../ui/clipboard.c:41 +9 0x000056211a0bef9b in qemu_clipboard_peer_unregister (peer=0x56211df244c0) at ../ui/clipboard.c:19 +10 0x000056211a0d45f3 in vnc_disconnect_finish (vs=0x56211df14200) at ../ui/vnc.c:1358 +11 0x000056211a0d4c9d in vnc_client_read (vs=0x56211df14200) at ../ui/vnc.c:1611 +12 0x000056211a0d4df8 in vnc_client_io (ioc=0x56211ce70690, condition=G_IO_IN, opaque=0x56211df14200) at ../ui/vnc.c:1649 +13 0x000056211a5b976c in qio_channel_fd_source_dispatch + (source=0x56211ce50a00, callback=0x56211a0d4d71 , user_data=0x56211df14200) at ../io/channel-watch.c:84 +14 0x00007f263ccede8e in g_main_context_dispatch () at /lib/x86_64-linux-gnu/libglib-2.0.so.0 +15 0x000056211a77d4a1 in glib_pollfds_poll () at ../util/main-loop.c:232 +16 0x000056211a77d51f in os_host_main_loop_wait (timeout=958545) at ../util/main-loop.c:255 +17 0x000056211a77d630 in main_loop_wait (nonblocking=0) at ../util/main-loop.c:531 +18 0x000056211a45bc8e in qemu_main_loop () at ../softmmu/runstate.c:726 +19 0x000056211a0b45fa in main (argc=69, argv=0x7ffdf1701778, envp=0x7ffdf17019a8) at ../softmmu/main.c:50 + +From the call trace, we can see it is a deadlock bug. +vnc_disconnect_finish will acquire the output_mutex. +But, the output_mutex will be acquired again in vnc_clipboard_send. +Repeated locking will cause deadlock. So, I move +qemu_clipboard_peer_unregister() behind vnc_unlock_output(); + +Fixes: 0bf41cab93e ("ui/vnc: clipboard support") +Signed-off-by: Lei Rao +Reviewed-by: Marc-André Lureau +Message-Id: <20220105020808.597325-1-lei.rao@intel.com> +Signed-off-by: Gerd Hoffmann +(cherry picked from commit 1dbbe6f172810026c51dc84ed927a3cc23017949) +--- + ui/vnc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/ui/vnc.c b/ui/vnc.c +index af02522e84..b253e85c65 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -1354,12 +1354,12 @@ void vnc_disconnect_finish(VncState *vs) + /* last client gone */ + vnc_update_server_surface(vs->vd); + } ++ vnc_unlock_output(vs); ++ + if (vs->cbpeer.update.notify) { + qemu_clipboard_peer_unregister(&vs->cbpeer); + } + +- vnc_unlock_output(vs); +- + qemu_mutex_destroy(&vs->output_mutex); + if (vs->bh != NULL) { + qemu_bh_delete(vs->bh); +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index c403f49..55c53a7 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -144,7 +144,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 9%{?rcrel}%{?dist}%{?cc_suffix} +Release: 10%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -267,6 +267,18 @@ Patch61: kvm-iotests-281-Test-lingering-timers.patch Patch62: kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch # For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node Patch63: kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch +# For bz#2042820 - qemu crash when try to copy and paste contents from client to VM +Patch64: kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch +# For bz#2042820 - qemu crash when try to copy and paste contents from client to VM +Patch65: kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch +# For bz#2042820 - qemu crash when try to copy and paste contents from client to VM +Patch66: kvm-ui-clipboard-fix-use-after-free-regression.patch +# For bz#2042820 - qemu crash when try to copy and paste contents from client to VM +Patch67: kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch +# For bz#2044818 - Qemu Core Dumped when migrate -> migrate_cancel -> migrate again during guest is paused +Patch68: kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch +# For bz#2044818 - Qemu Core Dumped when migrate -> migrate_cancel -> migrate again during guest is paused +Patch69: kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch # Source-git patches @@ -1325,6 +1337,18 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Feb 24 2022 Miroslav Rezanina - 6.2.0-10 +- kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch [bz#2042820] +- kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch [bz#2042820] +- kvm-ui-clipboard-fix-use-after-free-regression.patch [bz#2042820] +- kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch [bz#2042820] +- kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch [bz#2044818] +- kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch [bz#2044818] +- Resolves: bz#2042820 + (qemu crash when try to copy and paste contents from client to VM) +- Resolves: bz#2044818 + (Qemu Core Dumped when migrate -> migrate_cancel -> migrate again during guest is paused) + * Thu Feb 17 2022 Miroslav Rezanina - 6.2.0-9 - kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch [bz#2046659] - kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch [bz#2046659] From 522a8f6bdbbc532c64c9e12ebe4b8e6b943661d1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 1 Mar 2022 05:45:09 -0500 Subject: [PATCH 153/195] * Tue Mar 01 2022 Miroslav Rezanina - 6.2.0-11 - kvm-spec-Remove-qemu-virtiofsd.patch [bz#2055284] - Resolves: bz#2055284 (Remove the qemu-virtiofsd subpackage) --- qemu-kvm.spec | 28 ++++++++++------------------ rpminspect.yaml | 2 +- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 55c53a7..2d4f8e0 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -144,7 +144,7 @@ Obsoletes: %{name}-block-iscsi <= %{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 10%{?rcrel}%{?dist}%{?cc_suffix} +Release: 11%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -443,15 +443,6 @@ This package provides the qemu-pr-helper utility that is required for certain SCSI features. -%package -n qemu-virtiofsd -Summary: QEMU virtio-fs shared file system daemon -Provides: virtiofsd -%description -n qemu-virtiofsd -This package provides virtiofsd daemon. This program is a vhost-user backend -that implements the virtio-fs device that is used for sharing a host directory -tree with a guest. - - %package -n qemu-img Summary: QEMU command line tool for manipulating disk images %description -n qemu-img @@ -1109,6 +1100,10 @@ rm -rf %{buildroot}%{_datadir}/%{name}/vgabios*bin rm -rf %{buildroot}%{_datadir}/%{name}/bios*.bin rm -rf %{buildroot}%{_datadir}/%{name}/sgabios.bin +# Remove virtiofsd (we use separate package for virtiofsd) +rm -rf %{buildroot}%{_mandir}/man1/virtiofsd.1* +rm -rf %{buildroot}%{_libexecdir}/virtiofsd +rm -rf %{buildroot}%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json %if %{have_modules_load} install -D -p -m 644 %{_sourcedir}/modules-load.conf %{buildroot}%{_sysconfdir}/modules-load.d/kvm.conf @@ -1222,14 +1217,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_unitdir}/qemu-pr-helper.socket %{_mandir}/man8/qemu-pr-helper.8* -%files -n qemu-virtiofsd -%{_mandir}/man1/virtiofsd.1* -%{_libexecdir}/virtiofsd -# This is the standard location for vhost-user JSON files defined in the -# vhost-user specification for interoperability with other software. Unlike -# most other paths we use it's "qemu" instead of "qemu-kvm". -%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json - %files docs %doc %{qemudocdir} @@ -1337,6 +1324,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Mar 01 2022 Miroslav Rezanina - 6.2.0-11 +- kvm-spec-Remove-qemu-virtiofsd.patch [bz#2055284] +- Resolves: bz#2055284 + (Remove the qemu-virtiofsd subpackage) + * Thu Feb 24 2022 Miroslav Rezanina - 6.2.0-10 - kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch [bz#2042820] - kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch [bz#2042820] diff --git a/rpminspect.yaml b/rpminspect.yaml index 2cc100e..16aec7d 100644 --- a/rpminspect.yaml +++ b/rpminspect.yaml @@ -7,4 +7,4 @@ annocheck: - hardened: --skip-cf-protection --skip-property-note --ignore-unknown --verbose ignore: - /usr/share/qemu-kvm/s390-ccw.img - - /usr/share/qemu-kvm/s390-netboot.img on s390x + - /usr/share/qemu-kvm/s390-netboot.img From 8793744c141a1ea1459d679590e58bcbf7f2819e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 21 Mar 2022 03:33:36 -0400 Subject: [PATCH 154/195] * Mon Mar 21 2022 Miroslav Rezanina - 6.2.0-12 - kvm-RHEL-mark-old-machine-types-as-deprecated.patch [bz#2062813] - kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch [bz#2062828] - kvm-spec-Fix-obsolete-for-spice-subpackages.patch [bz#2062819 bz#2062817] - kvm-spec-Obsolete-old-usb-redir-subpackage.patch [bz#2062819] - kvm-spec-Obsolete-ssh-driver.patch [bz#2062817] - Resolves: bz#2062828 ([virtual network][rhel9][vDPA] qemu crash after hot unplug vdpa device [rhel-9.1.0]) - Resolves: bz#2062819 (Broken upgrade path due to qemu-kvm-hw-usbredir rename [rhel-9.1.0]) - Resolves: bz#2062817 (Missing qemu-kvm-block-ssh obsolete breaks upgrade path [rhel-9.1.0]) - Resolves: bz#2062813 (Mark all RHEL-8 and earlier machine types as deprecated [rhel-9.1.0]) --- ...mark-old-machine-types-as-deprecated.patch | 108 ++++++++++++++++++ ...ix-leak-of-host-notifier-memory-regi.patch | 60 ++++++++++ qemu-kvm.spec | 35 +++++- rpminspect.yaml | 1 + 4 files changed, 200 insertions(+), 4 deletions(-) create mode 100644 kvm-RHEL-mark-old-machine-types-as-deprecated.patch create mode 100644 kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch diff --git a/kvm-RHEL-mark-old-machine-types-as-deprecated.patch b/kvm-RHEL-mark-old-machine-types-as-deprecated.patch new file mode 100644 index 0000000..0b203cf --- /dev/null +++ b/kvm-RHEL-mark-old-machine-types-as-deprecated.patch @@ -0,0 +1,108 @@ +From 5b1b0ebbc938127e7cd0ea1056d8f21b6d51ff0d Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Thu, 3 Mar 2022 10:57:37 +0100 +Subject: [PATCH 1/5] RHEL: mark old machine types as deprecated + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 78: Synchronize with RHEL 9.0.0 build qemu-kvm-6.2.0-11.el9_0.1 +RH-Commit: [1/5] 88a9377cac9d4e9796f63c5726db7dc093c6460d (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 2062828 2062819 2062817 2062813 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cornelia Huck + +We want to make it obvious that we consider machine types for older +RHEL major releases to be deprecated; we only carry them for +compatibility purposes. + +Let's mark all rhel-7.x and rhel-8.x machine type as deprecated via +QEMU's existing deprecation mechanism; those machine types will +continue to work as expected, but commands like 'virsh capabilities', +'virsh dominfo', or the libvirt log will tag the machine as +deprecated. + +Signed-off-by: Cornelia Huck + +Forward-port of RHEL 9.0.0 MR 119 (RHEL: mark old machine types as deprecated) +--- + hw/core/machine.c | 6 ++++++ + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 4 ++++ + hw/s390x/s390-virtio-ccw.c | 3 +++ + include/hw/boards.h | 2 ++ + 5 files changed, 19 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 669d3d8b91..5fae55d6cd 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,12 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * RHEL only: machine types for previous major releases are deprecated ++ */ ++const char *rhel_old_machine_deprecation = ++ "machine types for previous major releases are deprecated"; ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index fccb7f5fc9..cf68d7498c 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -989,6 +989,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; + m->async_pf_vmexit_disable = true; + m->smbus_no_migration_support = true; ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ m->deprecation_reason = rhel_old_machine_deprecation; ++ + pcmc->pvh_enabled = false; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; + pcmc->kvmclock_create_always = false; +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index bf9ad32f0e..c8e06da084 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -674,6 +674,10 @@ static void pc_q35_machine_rhel860_options(MachineClass *m) + pc_q35_machine_rhel900_options(m); + m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; + m->alias = NULL; ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ m->deprecation_reason = rhel_old_machine_deprecation; ++ + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.6.0"; + } +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9da6e9b1d4..cc78a315e3 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1121,6 +1121,9 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { + ccw_machine_rhel900_class_options(mc); ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ mc->deprecation_reason = rhel_old_machine_deprecation; + } + DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", false); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 3c3d2ad450..21d8d5528e 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -464,4 +464,6 @@ extern const size_t hw_compat_rhel_8_0_len; + extern GlobalProperty hw_compat_rhel_7_6[]; + extern const size_t hw_compat_rhel_7_6_len; + ++extern const char *rhel_old_machine_deprecation; ++ + #endif +-- +2.31.1 + diff --git a/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch b/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch new file mode 100644 index 0000000..767991d --- /dev/null +++ b/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch @@ -0,0 +1,60 @@ +From f62b9eb18b0cc7ceb5a842aa0db43dae9a568647 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 11 Feb 2022 18:02:59 +0100 +Subject: [PATCH 2/5] hw/virtio: vdpa: Fix leak of host-notifier memory-region + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 78: Synchronize with RHEL 9.0.0 build qemu-kvm-6.2.0-11.el9_0.1 +RH-Commit: [2/5] 38cb408826a6925fc7c482a03e4364c6f918396e (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 2062828 2062819 2062817 2062813 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cornelia Huck + +BZ: https://bugzilla.redhat.com/2059786 +BRANCH: rhel-9.0.0 +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=43688388 +UPTREAM: Merged + +If call virtio_queue_set_host_notifier_mr fails, should free +host-notifier memory-region. + +This problem can trigger a coredump with some vDPA drivers (mlx5, +but not with the vdpasim), if we unplug the virtio-net card from +the guest after a stop/start. + +The same fix has been done for vhost-user: + 1f89d3b91e3e ("hw/virtio: Fix leak of host-notifier memory-region") + +Fixes: d0416d487bd5 ("vhost-vdpa: map virtqueue notification area if possible") +Cc: jasowang@redhat.com +Resolves: https://bugzilla.redhat.com/2027208 +Signed-off-by: Laurent Vivier +Message-Id: <20220211170259.1388734-1-lvivier@redhat.com> +Cc: qemu-stable@nongnu.org +Acked-by: Jason Wang +Reviewed-by: Stefano Garzarella +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83) +Signed-off-by: Laurent Vivier + +Forward-port of RHEL 9.0.0 MR 123 (hw/virtio: vdpa: Fix leak of host-notifier memory-region) +--- + hw/virtio/vhost-vdpa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index bcaf00e09f..78da48a333 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -415,6 +415,7 @@ static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) + g_free(name); + + if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { ++ object_unparent(OBJECT(&n->mr)); + munmap(addr, page_size); + goto err; + } +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 2d4f8e0..0634b34 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -135,16 +135,23 @@ Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release} # removes {name}-ui-spice for upgrades from RHEL-8 # The "<= {version}" assumes RHEL-9 version >= RHEL-8 version (in # other words RHEL-9 rebases are done together/before RHEL-8 ones) + +# In addition, we obsolete some block drivers as we are no longer support +# them in default qemu-kvm installation. + +# Note: ssh driver wasn't removed yet just disabled due to late handling + %global obsoletes_some_modules \ -Obsoletes: %{name}-ui-spice <= %{version} \ -Obsoletes: %{name}-block-gluster <= %{version} \ -Obsoletes: %{name}-block-iscsi <= %{version} \ +Obsoletes: %{name}-ui-spice <= %{epoch}:%{version} \ +Obsoletes: %{name}-block-gluster <= %{epoch}:%{version} \ +Obsoletes: %{name}-block-iscsi <= %{epoch}:%{version} \ +Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 11%{?rcrel}%{?dist}%{?cc_suffix} +Release: 12%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -279,6 +286,10 @@ Patch67: kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch Patch68: kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch # For bz#2044818 - Qemu Core Dumped when migrate -> migrate_cancel -> migrate again during guest is paused Patch69: kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch +# For bz#2062813 - Mark all RHEL-8 and earlier machine types as deprecated [rhel-9.1.0] +Patch70: kvm-RHEL-mark-old-machine-types-as-deprecated.patch +# For bz#2062828 - [virtual network][rhel9][vDPA] qemu crash after hot unplug vdpa device [rhel-9.1.0] +Patch71: kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch # Source-git patches @@ -594,6 +605,7 @@ Summary: QEMU usbredir support Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} Requires: usbredir >= 0.7.1 Provides: %{name}-hw-usbredir +Obsoletes: %{name}-hw-usbredir <= %{epoch}:%{version} %description device-usb-redirect This package provides usbredir support. @@ -1324,6 +1336,21 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Mar 21 2022 Miroslav Rezanina - 6.2.0-12 +- kvm-RHEL-mark-old-machine-types-as-deprecated.patch [bz#2062813] +- kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch [bz#2062828] +- kvm-spec-Fix-obsolete-for-spice-subpackages.patch [bz#2062819 bz#2062817] +- kvm-spec-Obsolete-old-usb-redir-subpackage.patch [bz#2062819] +- kvm-spec-Obsolete-ssh-driver.patch [bz#2062817] +- Resolves: bz#2062828 + ([virtual network][rhel9][vDPA] qemu crash after hot unplug vdpa device [rhel-9.1.0]) +- Resolves: bz#2062819 + (Broken upgrade path due to qemu-kvm-hw-usbredir rename [rhel-9.1.0]) +- Resolves: bz#2062817 + (Missing qemu-kvm-block-ssh obsolete breaks upgrade path [rhel-9.1.0]) +- Resolves: bz#2062813 + (Mark all RHEL-8 and earlier machine types as deprecated [rhel-9.1.0]) + * Tue Mar 01 2022 Miroslav Rezanina - 6.2.0-11 - kvm-spec-Remove-qemu-virtiofsd.patch [bz#2055284] - Resolves: bz#2055284 diff --git a/rpminspect.yaml b/rpminspect.yaml index 16aec7d..889796d 100644 --- a/rpminspect.yaml +++ b/rpminspect.yaml @@ -5,6 +5,7 @@ inspections: badfuncs: off annocheck: - hardened: --skip-cf-protection --skip-property-note --ignore-unknown --verbose + - rhel-policy: --skip-cf-protection --skip-property-note --ignore-unknown --verbose ignore: - /usr/share/qemu-kvm/s390-ccw.img - /usr/share/qemu-kvm/s390-netboot.img From cfef67ad49a2296ea0c5fb9a799650245133cca9 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 14 Apr 2022 03:50:11 -0400 Subject: [PATCH 155/195] * Thu Apr 14 2022 Miroslav Rezanina - 6.2.0-13 - kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch [bz#2065589] - Resolves: bz#2065589 (RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-9.1.0]) --- ...packet-for-vhost-vsock-device-in-rhe.patch | 107 ++++++++++++++++++ qemu-kvm.spec | 9 +- 2 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch diff --git a/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch b/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch new file mode 100644 index 0000000..834092c --- /dev/null +++ b/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch @@ -0,0 +1,107 @@ +From 9ec1caad56435e14cd80ad23bc8bef8c301bdce4 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 24 Mar 2022 16:04:57 +0100 +Subject: [PATCH] RHEL: disable "seqpacket" for "vhost-vsock-device" in + rhel8.6.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefano Garzarella +RH-MergeRequest: 79: RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 +RH-Commit: [1/1] 1810d35c05538733f82f680aec27d09db8ccbf33 (sgarzarella/qemu-kvm-c-9-s) +RH-Bugzilla: 2065589 +RH-Acked-by: Jason Wang +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Dr. David Alan Gilbert + +vhost-vsock device in RHEL 8 kernels doesn't support seqpacket. +To avoid problems when migrating a VM from RHEL 9 host, we need to +disable it in rhel8-* machine types. + +Signed-off-by: Stefano Garzarella +--- + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 5 files changed, 18 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5fae55d6cd..7dcceb904a 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -43,6 +43,16 @@ + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2065589 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index cf68d7498c..08579366b6 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -998,6 +998,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index c8e06da084..23dacdd923 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -680,6 +680,8 @@ static void pc_q35_machine_rhel860_options(MachineClass *m) + + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.6.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + } + + DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index cc78a315e3..13bfa4253e 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1121,6 +1121,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { + ccw_machine_rhel900_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 21d8d5528e..b9c12c4bf2 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -443,6 +443,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ + extern GlobalProperty hw_compat_rhel_8_5[]; + extern const size_t hw_compat_rhel_8_5_len; + +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 0634b34..58fc073 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 12%{?rcrel}%{?dist}%{?cc_suffix} +Release: 13%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -290,6 +290,8 @@ Patch69: kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch Patch70: kvm-RHEL-mark-old-machine-types-as-deprecated.patch # For bz#2062828 - [virtual network][rhel9][vDPA] qemu crash after hot unplug vdpa device [rhel-9.1.0] Patch71: kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch +# For bz#2065589 - RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-9.1.0] +Patch72: kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch # Source-git patches @@ -1336,6 +1338,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Apr 14 2022 Miroslav Rezanina - 6.2.0-13 +- kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch [bz#2065589] +- Resolves: bz#2065589 + (RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-9.1.0]) + * Mon Mar 21 2022 Miroslav Rezanina - 6.2.0-12 - kvm-RHEL-mark-old-machine-types-as-deprecated.patch [bz#2062813] - kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch [bz#2062828] From 8b49639415ab24e0992f9c7e3a3f62a4778b0765 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 20 Apr 2022 03:49:16 -0400 Subject: [PATCH 156/195] * Wed Apr 20 2022 Miroslav Rezanina - 7.0.0-1 - Rebase to QEMU 7.0.0 [bz#2064757] - Do not build ssh block driver anymore [bz#2064500] - Removed hpet and parallel port support [bz#2065042] - Compatibility support [bz#2064782 bz#2064771] - Resolves: bz#2064757 (Rebase to QEMU 7.0.0) - Resolves: bz#2064500 (Install qemu-kvm-6.2.0-11.el9_0.1 failed as conflict with qemu-kvm-block-ssh-6.2.0-11.el9_0.1) - Resolves: bz#2065042 (Remove upstream-only devices from the qemu-kvm binary) - Resolves: bz#2064782 (Update machine type compatibility for QEMU 7.0.0 update [s390x]) - Resolves: bz#2064771 (Update machine type compatibility for QEMU 7.0.0 update [x86_64]) --- .gitignore | 1 + ...-t-use-g_autoptr-just-to-free-a-vari.patch | 49 --- ...d.patch => 0004-Initial-redhat-build.patch | 64 +-- ...0005-Enable-disable-devices-for-RHEL.patch | 166 +++----- ...Machine-type-related-general-changes.patch | 135 +++---- ...ch => 0007-Add-aarch64-machine-types.patch | 120 ++++-- ...atch => 0008-Add-ppc64-machine-types.patch | 46 +-- ...atch => 0009-Add-s390x-machine-types.patch | 92 ++++- ...tch => 0010-Add-x86_64-machine-types.patch | 194 +++++++-- 0011-Enable-make-check.patch | 186 +++++++++ 0012-Enable-make-check.patch | 376 ------------------ ...mber-of-devices-that-can-be-assigned.patch | 10 +- ...Add-support-statement-to-help-output.patch | 12 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 8 +- ...documentation-instead-of-qemu-system.patch | 61 +++ ...documentation-instead-of-qemu-system.patch | 120 ------ ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 6 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 8 +- ...on-warning-when-opening-v2-images-rw.patch | 14 +- ...oduce-RHEL-9.0.0-hw-compat-structure.patch | 135 +++++++ 0020-Fix-virtio-net-pci-vectors-compat.patch | 46 --- ...90x-machine-type-compatibility-for-r.patch | 38 ++ ...ve-s3-s4-suspend-disabling-to-compat.patch | 70 ++++ ...machine-types-Add-pc_rhel_8_5_compat.patch | 75 ---- ...-types-Wire-compat-into-q35-and-i440.patch | 55 --- ...8.5.0-Update-machine-type-compatibil.patch | 50 --- ...-machine-type-compatibility-handling.patch | 58 --- ..._compat_rhel_8_5-with-6.2.0-RC2-chan.patch | 31 -- kvm-Enable-SGX-RH-Only.patch | 28 -- ...packet-for-vhost-vsock-device-in-rhe.patch | 107 ----- ...mark-old-machine-types-as-deprecated.patch | 108 ----- ...ard-Don-t-use-g_autoptr-just-to-free.patch | 45 --- ...ntext-for-drain_end-in-blockdev-reop.patch | 63 --- ...event-dangling-BDS-pointers-across-a.patch | 129 ------ ...Update-BSC-only-if-want_zero-is-true.patch | 56 --- ...sert-there-are-no-timers-when-closed.patch | 52 --- ...lete-reconnect-delay-timer-when-done.patch | 54 --- ...-nbd-Move-s-ioc-on-AioContext-change.patch | 107 ----- ...nfinite-loop-in-nvme_free_req_queue_.patch | 71 ---- ...ndling-of-holes-in-.bdrv_co_block_st.patch | 59 --- ...-rbd-workaround-for-ceph-issue-53784.patch | 103 ----- kvm-doc-Add-the-SGX-numa-description.patch | 77 ---- kvm-hw-arm-smmuv3-Fix-device-reset.patch | 61 --- ...-9.0-machine-type-and-remove-8.5-one.patch | 48 --- ...k-no_tcg_its-and-minor-style-changes.patch | 88 ---- kvm-hw-arm-virt-Expose-the-RAS-option.patch | 60 --- ...t-Register-iommu-as-a-class-property.patch | 81 ---- ...irt-Register-its-as-a-class-property.patch | 57 --- ...virt-Rename-default_bus_bypass_iommu.patch | 46 --- ...ix-leak-of-host-notifier-memory-regi.patch | 60 --- ...Let-NBD-connection-yield-in-iothread.patch | 108 ----- kvm-iotests-281-Test-lingering-timers.patch | 174 -------- ...ckdev-reopen-with-iothreads-and-thro.patch | 106 ----- ...u-img-convert-of-zeroed-data-cluster.patch | 81 ---- kvm-iotests-block-status-cache-New-test.patch | 197 --------- ...tests-stream-error-on-reset-New-test.patch | 196 --------- ...tests.py-Add-QemuStorageDaemon-class.patch | 92 ----- ...rrect-calls-of-log_global_start-stop.patch | 97 ----- ...crash-on-starting-dirty-log-twice-wi.patch | 156 -------- ...uma-Enable-numa-for-SGX-EPC-sections.patch | 287 ------------- ...-numa-in-the-monitor-and-Libvirt-int.patch | 210 ---------- ...-related-comments-and-restore-sectio.patch | 213 ---------- ...-is_allocated_sectors-more-efficient.patch | 108 ----- ...orage-daemon-Add-vhost-user-blk-help.patch | 72 ---- ...emon-Fix-typo-in-vhost-user-blk-help.patch | 41 -- ....6.0-and-rhel9.0.0-machine-types-for.patch | 82 ---- ...virtio-mem-as-tech-preview-on-x86-64.patch | 43 -- ...machine-types-x86-set-prefer_sockets.patch | 52 --- ...ce-deletion-events-with-device-JSON-.patch | 130 ------ ...r-warnings-from-unused-clipboard-inf.patch | 63 --- ...pboard-fix-use-after-free-regression.patch | 49 --- kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch | 80 ---- ...embership-of-all-supplementary-group.patch | 110 ----- kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch | 65 --- kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch | 75 ---- qemu-kvm.spec | 199 +++------ sources | 2 +- 77 files changed, 1074 insertions(+), 5800 deletions(-) delete mode 100644 0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch rename 0005-Initial-redhat-build.patch => 0004-Initial-redhat-build.patch (86%) rename 0006-Enable-disable-devices-for-RHEL.patch => 0005-Enable-disable-devices-for-RHEL.patch (81%) rename 0007-Machine-type-related-general-changes.patch => 0006-Machine-type-related-general-changes.patch (87%) rename 0008-Add-aarch64-machine-types.patch => 0007-Add-aarch64-machine-types.patch (75%) rename 0009-Add-ppc64-machine-types.patch => 0008-Add-ppc64-machine-types.patch (93%) rename 0010-Add-s390x-machine-types.patch => 0009-Add-s390x-machine-types.patch (58%) rename 0011-Add-x86_64-machine-types.patch => 0010-Add-x86_64-machine-types.patch (76%) create mode 100644 0011-Enable-make-check.patch delete mode 100644 0012-Enable-make-check.patch rename 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch (93%) rename 0014-Add-support-statement-to-help-output.patch => 0013-Add-support-statement-to-help-output.patch (86%) rename 0015-globally-limit-the-maximum-number-of-CPUs.patch => 0014-globally-limit-the-maximum-number-of-CPUs.patch (89%) create mode 100644 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch delete mode 100644 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch => 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch (96%) rename 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch => 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch (92%) rename 0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch => 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch (90%) create mode 100644 0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch delete mode 100644 0020-Fix-virtio-net-pci-vectors-compat.patch create mode 100644 0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch create mode 100644 0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch delete mode 100644 0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch delete mode 100644 0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch delete mode 100644 0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch delete mode 100644 0024-redhat-Add-s390x-machine-type-compatibility-handling.patch delete mode 100644 0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch delete mode 100644 kvm-Enable-SGX-RH-Only.patch delete mode 100644 kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch delete mode 100644 kvm-RHEL-mark-old-machine-types-as-deprecated.patch delete mode 100644 kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch delete mode 100644 kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch delete mode 100644 kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch delete mode 100644 kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch delete mode 100644 kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch delete mode 100644 kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch delete mode 100644 kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch delete mode 100644 kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch delete mode 100644 kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch delete mode 100644 kvm-block-rbd-workaround-for-ceph-issue-53784.patch delete mode 100644 kvm-doc-Add-the-SGX-numa-description.patch delete mode 100644 kvm-hw-arm-smmuv3-Fix-device-reset.patch delete mode 100644 kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch delete mode 100644 kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch delete mode 100644 kvm-hw-arm-virt-Expose-the-RAS-option.patch delete mode 100644 kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch delete mode 100644 kvm-hw-arm-virt-Register-its-as-a-class-property.patch delete mode 100644 kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch delete mode 100644 kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch delete mode 100644 kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch delete mode 100644 kvm-iotests-281-Test-lingering-timers.patch delete mode 100644 kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch delete mode 100644 kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch delete mode 100644 kvm-iotests-block-status-cache-New-test.patch delete mode 100644 kvm-iotests-stream-error-on-reset-New-test.patch delete mode 100644 kvm-iotests.py-Add-QemuStorageDaemon-class.patch delete mode 100644 kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch delete mode 100644 kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch delete mode 100644 kvm-numa-Enable-numa-for-SGX-EPC-sections.patch delete mode 100644 kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch delete mode 100644 kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch delete mode 100644 kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch delete mode 100644 kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch delete mode 100644 kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch delete mode 100644 kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch delete mode 100644 kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch delete mode 100644 kvm-rhel-machine-types-x86-set-prefer_sockets.patch delete mode 100644 kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch delete mode 100644 kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch delete mode 100644 kvm-ui-clipboard-fix-use-after-free-regression.patch delete mode 100644 kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch delete mode 100644 kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch delete mode 100644 kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch delete mode 100644 kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch diff --git a/.gitignore b/.gitignore index c8fe0b4..908f9b5 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ /*.orig /qemu-6.1.0.tar.xz /qemu-6.2.0.tar.xz +/qemu-7.0.0.tar.xz diff --git a/0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch b/0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch deleted file mode 100644 index 5dcba33..0000000 --- a/0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch +++ /dev/null @@ -1,49 +0,0 @@ -From cc2f3e2ce9e2a9ab9e52e8f44bee4876e69843da Mon Sep 17 00:00:00 2001 -From: John Snow -Date: Wed, 17 Nov 2021 09:51:46 -0500 -Subject: ui/clipboard: Don't use g_autoptr just to free a variable - -Clang doesn't recognize that the variable is being "used" and will emit -a warning: - - ../ui/clipboard.c:47:34: error: variable 'old' set but not used [-Werror,-Wunused-but-set-variable] - g_autoptr(QemuClipboardInfo) old = NULL; - ^ - 1 error generated. - -OK, fine. Just do things the old way. - -Signed-off-by: John Snow -Signed-off-by: Miroslav Rezanina - ---- - -This is temporary commit from upstream submission necessary for build to pass. -We expect proper fix included upstream later. ---- - ui/clipboard.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/ui/clipboard.c b/ui/clipboard.c -index d7b008d62a..d53576b0f6 100644 ---- a/ui/clipboard.c -+++ b/ui/clipboard.c -@@ -44,13 +44,14 @@ void qemu_clipboard_peer_release(QemuClipboardPeer *peer, - - void qemu_clipboard_update(QemuClipboardInfo *info) - { -- g_autoptr(QemuClipboardInfo) old = NULL; -+ QemuClipboardInfo *old = NULL; - assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT); - - notifier_list_notify(&clipboard_notifiers, info); - - old = cbinfo[info->selection]; - cbinfo[info->selection] = qemu_clipboard_info_ref(info); -+ g_free(old); - } - - QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection) --- -2.27.0 - diff --git a/0005-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch similarity index 86% rename from 0005-Initial-redhat-build.patch rename to 0004-Initial-redhat-build.patch index 3ff2dce..94cf91c 100644 --- a/0005-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 3308eb892f03c7169f712fe88e74dacd6f05b1fe Mon Sep 17 00:00:00 2001 +From fc113ecd7c99646a7ced0b99570b5927ae6d595f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-6.1.0-8.el9 +This rebase is based on qemu-kvm-6.2.0-13.el9 Signed-off-by: Miroslav Rezanina -- @@ -38,6 +38,18 @@ Rebase changes (6.2.0): - Add -Wno-string-plus-int to extra flags - Updated configure options +Rebase changes (7.0.0): +- Do not use -mlittle CFLAG on ppc64le +- Used upstream handling issue with ui/clipboard.c +- Use -mlittle-endian on ppc64le instead of deleteing it in configure +- Drop --disable-libxml2 option for configure (upstream) +- Remove vof roms +- Disable AVX2 support +- Use internal meson +- Disable new configure options (dbus-display and qga-vss) +- Change permissions on installing tests/Makefile.include +- Remove ssh block driver + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -115,27 +127,26 @@ Merged patches (6.2.0): - d2f2ff3c74 spec: Explicitly include compress filter - a7d047f9c2 Move ksmtuned files to separate package -With rebase new configure options are introducesed. We use two steps -configuration - first we disable all options and then enable supported -options. - -With 6.2.0, following changes are done: -- disabled all audiodev and enable only pa - - not use audio-drv-list anymore - - disabling oss driver removes oss module (added during rebase to 6.2.0) -- disable gettext -- disable l2tpv3 -- enable selinux -- enable spice-protocol - - added needed BuildRequire -- specify used capstone version -- specify used fdt version +Merged patches (7.0.0): +- 098d4d08d0 spec: Rename qemu-kvm-hw-usbredir to qemu-kvm-device-usb-redirect +- c2bd0d6834 spec: Split qemu-kvm-ui-opengl +- 2c9cda805d spec: Introduce packages for virtio-gpu-* modules (changed as rhel device tree not set) +- d0414a3e0b spec: Introduce device-display-virtio-vga* packages +- 3534ec46d4 spec: Move usb-host module to separate package +- ddc14d4737 spec: Move qtest accel module to tests package +- 6f2c4befa6 spec: Extend qemu-kvm-core description +- 6f11866e4e (rhel/rhel-9.0.0) Update to qemu-kvm-6.2.0-6.el9 +- da0a28758f ui/clipboard: fix use-after-free regression +- 895d4d52eb spec: Remove qemu-virtiofsd +- c8c8c8bd84 spec: Fix obsolete for spice subpackages +- d46d2710b2 spec: Obsolete old usb redir subpackage +- 6f52a50b68 spec: Obsolete ssh driver Signed-off-by: Miroslav Rezanina --- .distro/85-kvm.preset | 5 - .distro/Makefile | 100 + - .distro/Makefile.common | 38 + + .distro/Makefile.common | 40 + .distro/README.tests | 39 + .distro/ksm.service | 13 - .distro/ksm.sysconfig | 4 - @@ -147,7 +158,8 @@ Signed-off-by: Miroslav Rezanina .distro/kvm-setup.service | 14 - .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 3817 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 4034 +++++++++++++++++++++++ + .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + .gitignore | 1 + README.systemtap | 43 + @@ -157,7 +169,7 @@ Signed-off-by: Miroslav Rezanina scripts/systemtap/script.d/qemu_kvm.stp | 1 + tests/check-block.sh | 2 + ui/vnc-auth-sasl.c | 2 +- - 24 files changed, 4066 insertions(+), 338 deletions(-) + 25 files changed, 4290 insertions(+), 339 deletions(-) delete mode 100644 .distro/85-kvm.preset create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common @@ -226,10 +238,10 @@ index 0000000000..ad913fc990 +3. Translate the trace record to readable format. + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log diff --git a/meson.build b/meson.build -index 96de1a6ef9..5f6ba86dbb 100644 +index 861de93c4f..6f7e430f0f 100644 --- a/meson.build +++ b/meson.build -@@ -2108,7 +2108,9 @@ if capstone_opt == 'internal' +@@ -2394,7 +2394,9 @@ if capstone_opt == 'internal' # Include all configuration defines via a header file, which will wind up # as a dependency on the object file, and thus changes here will result # in a rebuild. @@ -271,11 +283,11 @@ index 0000000000..c04abf9449 @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} diff --git a/tests/check-block.sh b/tests/check-block.sh -index f86cb863de..6d38340d49 100755 +index f59496396c..d900d8b35e 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh -@@ -69,6 +69,8 @@ else - fi +@@ -48,6 +48,8 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then + skip "bash version too old ==> Not running the qemu-iotests." fi +exit 0 @@ -297,5 +309,5 @@ index 47fdae5b21..2a950caa2a 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.27.0 +2.31.1 diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch similarity index 81% rename from 0006-Enable-disable-devices-for-RHEL.patch rename to 0005-Enable-disable-devices-for-RHEL.patch index 345bd20..1ffbe97 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From af4c83ed637bfda003ae86133413d53cefda3654 Mon Sep 17 00:00:00 2001 +From 51ec7495d69fe4b4d0b61642ca6c0e7fd7a1032d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 15 Jul 2021 03:22:36 -0400 Subject: Enable/disable devices for RHEL @@ -12,12 +12,16 @@ Rebase notes (6.1.0): - default-configs moved to configs - Use --with-device- configure option to use rhel configs -Rebase notes (6.2.0 RC0): +Rebase notes (6.2.0): - Add CONFIG_ISA_FDC - -Rebase notes (6.2.0 RC3): - Do not remove -no-hpet documentation +Rebase notes (7.0.0): +- Added CONFIG_ARM_GIC_TCG option for aarch64 +- Fixes necessary for layout change fixes +- Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG +- Removed upstream devices + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -27,49 +31,50 @@ Merged patches (6.1.0): - 2504d68a7c aarch64: Add USB storage devices - 51c2a3253c disable ac97 audio -Merged patches (6.2.0 RC0): +Merged patches (6.2.0): - 9f2f9fa2ba disable sga device + +Merged patches (7.0.0): +- fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64 +- c9e68ea451 Enable SGX -- RH Only --- - .distro/qemu-kvm.spec.template | 9 +- - .../aarch64-softmmu/aarch64-rh-devices.mak | 33 ++++++ + .distro/qemu-kvm.spec.template | 18 +-- + .../aarch64-softmmu/aarch64-rh-devices.mak | 34 ++++++ .../ppc64-softmmu/ppc64-rh-devices.mak | 35 ++++++ configs/devices/rh-virtio.mak | 10 ++ .../s390x-softmmu/s390x-rh-devices.mak | 15 +++ - .../x86_64-softmmu/x86_64-rh-devices.mak | 102 ++++++++++++++++++ - .../x86_64-upstream-devices.mak | 4 + + .../x86_64-softmmu/x86_64-rh-devices.mak | 103 ++++++++++++++++++ hw/acpi/ich9.c | 4 +- hw/arm/meson.build | 2 +- hw/block/fdc.c | 10 ++ - hw/char/parallel.c | 9 ++ hw/cpu/meson.build | 5 +- - hw/display/cirrus_vga.c | 3 + + hw/display/cirrus_vga.c | 5 +- hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + hw/ppc/spapr_cpu_core.c | 2 + - hw/timer/hpet.c | 8 ++ hw/usb/meson.build | 2 +- target/arm/cpu_tcg.c | 10 ++ - target/ppc/cpu-models.c | 10 ++ + target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 23 files changed, 283 insertions(+), 10 deletions(-) + 20 files changed, 269 insertions(+), 15 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak create mode 100644 configs/devices/s390x-softmmu/s390x-rh-devices.mak create mode 100644 configs/devices/x86_64-softmmu/x86_64-rh-devices.mak - create mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..cd9c7c5127 +index 0000000000..5f6ee1de5b --- /dev/null +++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -0,0 +1,33 @@ +@@ -0,0 +1,34 @@ +include ../rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_GICV3_TCG=y +CONFIG_ARM_GIC=y +CONFIG_ARM_SMMUV3=y +CONFIG_ARM_V7M=y @@ -180,12 +185,11 @@ index 0000000000..d3b38312e1 +CONFIG_WDT_DIAG288=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..1f7a9ab024 +index 0000000000..d0c9e66641 --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -0,0 +1,102 @@ +@@ -0,0 +1,103 @@ +include ../rh-virtio.mak -+include x86_64-upstream-devices.mak + +CONFIG_ACPI=y +CONFIG_ACPI_PCI=y @@ -274,6 +278,7 @@ index 0000000000..1f7a9ab024 +CONFIG_VGA_PCI=y +CONFIG_VHOST_USER=y +CONFIG_VHOST_USER_BLK=y ++CONFIG_VIRTIO_MEM=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VGA=y +CONFIG_VMMOUSE=y @@ -286,21 +291,12 @@ index 0000000000..1f7a9ab024 +CONFIG_TPM_CRB=y +CONFIG_TPM_TIS_ISA=y +CONFIG_TPM_EMULATOR=y -diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak -new file mode 100644 -index 0000000000..2cd20f54d2 ---- /dev/null -+++ b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak -@@ -0,0 +1,4 @@ -+# We need "isa-parallel" -+CONFIG_PARALLEL=y -+# We need "hpet" -+CONFIG_HPET=y ++CONFIG_SGX=y diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index ebe08ed831..381ef2ddcf 100644 +index bd9bbade70..de1e401cdf 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c -@@ -438,8 +438,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) +@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; pm->acpi_memory_hotplug.is_enabled = true; pm->cpu_hotplug_legacy = true; @@ -325,10 +321,10 @@ index 721a8eb8be..87ed4dd914 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 21d18ac2e3..97fa6de423 100644 +index 347875a0cd..ca1776121f 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -48,6 +48,8 @@ +@@ -49,6 +49,8 @@ #include "qom/object.h" #include "fdc-internal.h" @@ -337,7 +333,7 @@ index 21d18ac2e3..97fa6de423 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2337,6 +2339,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) +@@ -2338,6 +2340,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) FDrive *drive; static int command_tables_inited = 0; @@ -352,33 +348,6 @@ index 21d18ac2e3..97fa6de423 100644 if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; -diff --git a/hw/char/parallel.c b/hw/char/parallel.c -index b45e67bfbb..e5f108211b 100644 ---- a/hw/char/parallel.c -+++ b/hw/char/parallel.c -@@ -29,6 +29,7 @@ - #include "chardev/char-parallel.h" - #include "chardev/char-fe.h" - #include "hw/acpi/aml-build.h" -+#include "hw/boards.h" - #include "hw/irq.h" - #include "hw/isa/isa.h" - #include "hw/qdev-properties.h" -@@ -534,6 +535,14 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) - int base; - uint8_t dummy; - -+ /* Restricted for Red Hat Enterprise Linux */ -+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); -+ if (strstr(mc->name, "rhel")) { -+ error_setg(errp, "Device %s is not supported with machine type %s", -+ object_get_typename(OBJECT(dev)), mc->name); -+ return; -+ } -+ - if (!qemu_chr_fe_backend_connected(&s->chr)) { - error_setg(errp, "Can't create parallel device, empty char device"); - return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build index 9e52fee9e7..bb71c9f3e7 100644 --- a/hw/cpu/meson.build @@ -394,19 +363,21 @@ index 9e52fee9e7..bb71c9f3e7 100644 -specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) +#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index fdca6ca659..fa1a7eee51 100644 +index 3bb6a58698..6447fdb02e 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2945,6 +2945,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) - PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); - int16_t device_id = pc->device_id; +@@ -2945,7 +2945,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + int16_t device_id = pc->device_id; +- /* + warn_report("'cirrus-vga' is deprecated, " + "please use a different VGA card instead"); + - /* follow real hardware, cirrus card emulated has 4 MB video memory. - Also accept 8 MB/16 MB for backward compatibility. */ - if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && ++ /* + * Follow real hardware, cirrus card emulated has 4 MB video memory. + * Also accept 8 MB/16 MB for backward compatibility. + */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c index ce89fd0aa3..fbcf802b13 100644 --- a/hw/ide/piix.c @@ -431,10 +402,10 @@ index ce89fd0aa3..fbcf802b13 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index baba62f357..bc360347ea 100644 +index 4efdf75620..5143ebaa27 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -796,6 +796,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -814,6 +814,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; isa->build_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); @@ -464,10 +435,10 @@ index f5bc81296d..282d01e374 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 58e7341cb7..8ba34f6a1d 100644 +index 8a4861f45a..fcb5dfe792 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -370,10 +370,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -379,10 +379,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -480,25 +451,6 @@ index 58e7341cb7..8ba34f6a1d 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), -diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c -index 9520471be2..202e032524 100644 ---- a/hw/timer/hpet.c -+++ b/hw/timer/hpet.c -@@ -733,6 +733,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) - int i; - HPETTimer *timer; - -+ /* Restricted for Red Hat Enterprise Linux */ -+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); -+ if (strstr(mc->name, "rhel")) { -+ error_setg(errp, "Device %s is not supported with machine type %s", -+ object_get_typename(OBJECT(dev)), mc->name); -+ return; -+ } -+ - if (!s->intcap) { - warn_report("Hpet's intcap not initialized"); - } diff --git a/hw/usb/meson.build b/hw/usb/meson.build index de853d780d..0776ae6a20 100644 --- a/hw/usb/meson.build @@ -591,7 +543,7 @@ index 13d0e9b195..3826fa5122 100644 { .name = "max", .initfn = arm_max_initfn }, #endif diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 4baa111713..d779c4d1d5 100644 +index 976be5e0d1..dd78883410 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -600,9 +552,9 @@ index 4baa111713..d779c4d1d5 100644 +#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ /* Embedded PowerPC */ - /* PowerPC 401 family */ - POWERPC_DEF("401", CPU_POWERPC_401, 401, -@@ -740,8 +741,10 @@ + /* PowerPC 405 family */ + /* PowerPC 405 cores */ +@@ -698,8 +699,10 @@ "PowerPC 7447A v1.2 (G4)") POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, "PowerPC 7457A v1.2 (G4)") @@ -613,7 +565,7 @@ index 4baa111713..d779c4d1d5 100644 POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, "PowerPC 970 v2.2") POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, -@@ -760,6 +763,7 @@ +@@ -718,6 +721,7 @@ "PowerPC 970MP v1.1") POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, "POWER5+ v2.1") @@ -621,15 +573,7 @@ index 4baa111713..d779c4d1d5 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -784,6 +788,7 @@ - /* PowerPC CPU aliases */ - - PowerPCCPUAlias ppc_cpu_aliases[] = { -+#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ - { "403", "403gc" }, - { "405", "405d4" }, - { "405cr", "405crc" }, -@@ -942,12 +947,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -897,12 +901,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -645,7 +589,7 @@ index 4baa111713..d779c4d1d5 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -957,6 +965,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -912,6 +919,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power10", "power10_v2.0" }, #endif @@ -653,7 +597,7 @@ index 4baa111713..d779c4d1d5 100644 /* Generic PowerPCs */ #if defined(TARGET_PPC64) { "ppc64", "970fx_v3.1" }, -@@ -964,5 +973,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -919,5 +927,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "ppc32", "604" }, { "ppc", "604" }, { "default", "604" }, @@ -675,10 +619,10 @@ index 05c3ccaaff..6a04ccab1b 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 5b1fdb55c4..c52434985b 100644 +index 6acf14d5ec..74f089d87f 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c -@@ -2508,6 +2508,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2512,6 +2512,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -694,5 +638,5 @@ index 5b1fdb55c4..c52434985b 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -- -2.27.0 +2.31.1 diff --git a/0007-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch similarity index 87% rename from 0007-Machine-type-related-general-changes.patch rename to 0006-Machine-type-related-general-changes.patch index 9baf215..c3b08a4 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From ef9b78c3f2810541eac453a3f8a8753763b1378d Mon Sep 17 00:00:00 2001 +From a525db3951dc68c469d1f51bdc69ab6e75e72c37 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -9,14 +9,16 @@ architecture. Signed-off-by: Miroslav Rezanina -- -Rebase notes (6.2.0 RC0): +Rebase notes (6.2.0): - Do not duplicate minimal_version_id for piix4_pm - Remove empty line chunks in serial.c - Remove migration.h include in serial.c - -Rebase notes (6.2.0 RC1): - Update hw_compat_rhel_8_5 (from MR 66) +Rebase notes (7.0.0): +- Remove downstream changes leftovers in hw/rtc/mc146818rtc.c +- Remove unnecessary change in hw/usb/hcd-uhci.c + Merged patches (6.1.0): - f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 - 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 @@ -27,32 +29,34 @@ Merged patches (6.1.0): - af69d1ca6e Remove RHEL 7.4.0 machine types (only generic changes) - 8f7a74ab78 Remove RHEL 7.5.0 machine types (only generic changes) -Merged patches (6.2.0 RC0): +Merged patches (6.2.0): - d687ac13d2 redhat: Define hw_compat_rhel_8_5 + +Merged patches (7.0.0): +- ef5afcc86d Fix virtio-net-pci* "vectors" compat +- 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes --- hw/acpi/piix4.c | 6 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 180 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 186 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + hw/net/rtl8139.c | 4 +- - hw/rtc/mc146818rtc.c | 2 + hw/smbios/smbios.c | 46 ++++++++- hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-uhci.c | 4 +- - hw/usb/hcd-xhci-pci.c | 59 +++++++++--- + hw/usb/hcd-xhci-pci.c | 59 ++++++++--- hw/usb/hcd-xhci-pci.h | 1 + include/hw/boards.h | 21 ++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 16 files changed, 315 insertions(+), 26 deletions(-) + 14 files changed, 316 insertions(+), 25 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index f0b5fac44a..8d6011c0a3 100644 +index fe5625d07a..28544e78c3 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -278,7 +278,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) +@@ -287,7 +287,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -61,7 +65,7 @@ index f0b5fac44a..8d6011c0a3 100644 .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -644,8 +644,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static Property piix4_pm_properties[] = { DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), @@ -73,23 +77,23 @@ index f0b5fac44a..8d6011c0a3 100644 DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 30da05dfe0..5de4d9d73b 100644 +index d2e5ecd234..6a84031fd7 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1590,7 +1590,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1596,7 +1596,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, -- true, SMBIOS_ENTRY_POINT_30); -+ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); +- true, SMBIOS_ENTRY_POINT_TYPE_64); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, diff --git a/hw/core/machine.c b/hw/core/machine.c -index 53a99abc56..53a3caf4fb 100644 +index 1e23fdc14b..ea430d844e 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -37,6 +37,186 @@ +@@ -37,6 +37,192 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" @@ -111,6 +115,8 @@ index 53a99abc56..53a3caf4fb 100644 + { "vhost-vsock-device", "seqpacket", "off" }, + /* hw_compat_rhel_8_5 from hw_compat_6_1 */ + { "vhost-user-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "nvme-ns", "shared", "off" }, +}; +const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); + @@ -125,7 +131,11 @@ index 53a99abc56..53a3caf4fb 100644 + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ + { "virtio-blk-device", "report-discard-granularity", "off" }, + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ -+ { "virtio-net-pci", "vectors", "3"}, ++ /* ++ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base", ++ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141) ++ */ ++ { "virtio-net-pci-base", "vectors", "3"}, +}; +const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); + @@ -273,14 +283,14 @@ index 53a99abc56..53a3caf4fb 100644 +}; +const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + - GlobalProperty hw_compat_6_1[] = { - { "vhost-user-vsock-device", "seqpacket", "off" }, - { "nvme-ns", "shared", "off" }, + GlobalProperty hw_compat_6_2[] = { + { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, + }; diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 90851e730b..a91c5d7467 100644 +index 46abbc5653..505467059b 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c -@@ -85,7 +85,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) +@@ -88,7 +88,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) } static Property vga_isa_properties[] = { @@ -290,7 +300,7 @@ index 90851e730b..a91c5d7467 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 223dd3e05d..dda3f64f19 100644 +index b72c03d0a6..c797e98312 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, @@ -299,11 +309,11 @@ index 223dd3e05d..dda3f64f19 100644 pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, - SMBIOS_ENTRY_POINT_21); + pcms->smbios_entry_point_type); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index e1e100316d..235054a643 100644 +index 1780f79bc1..b695f88c45 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) @@ -312,11 +322,11 @@ index e1e100316d..235054a643 100644 pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, - SMBIOS_ENTRY_POINT_21); + pcms->smbios_entry_point_type); } diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 90b4fc63ce..3ffb9dd22c 100644 +index 6b65823b4b..75dacabc43 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) @@ -338,28 +348,8 @@ index 90b4fc63ce..3ffb9dd22c 100644 VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), -diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c -index 4fbafddb22..6c42cc22cd 100644 ---- a/hw/rtc/mc146818rtc.c -+++ b/hw/rtc/mc146818rtc.c -@@ -43,6 +43,7 @@ - #include "qapi/qapi-events-misc-target.h" - #include "qapi/visitor.h" - #include "hw/rtc/mc146818rtc_regs.h" -+#include "migration/migration.h" - - #ifdef TARGET_I386 - #include "qapi/qapi-commands-misc-target.h" -@@ -821,6 +822,7 @@ static int rtc_post_load(void *opaque, int version_id) - static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) - { - RTCState *s = (RTCState *)opaque; -+ - return s->irq_reinject_on_ack_count != 0; - } - diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 7397e56737..3a4bb894ba 100644 +index 60349ee402..0edcc98434 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -57,6 +57,9 @@ static bool smbios_legacy = true; @@ -372,16 +362,16 @@ index 7397e56737..3a4bb894ba 100644 uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -619,7 +622,7 @@ static void smbios_build_type_1_table(void) +@@ -639,7 +642,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { -- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ -+ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); +- SMBIOS_BUILD_TABLE_PRE(2, T2_BASE, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, T2_BASE, smbios_type2_required); SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -888,7 +891,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -914,7 +917,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -393,7 +383,7 @@ index 7397e56737..3a4bb894ba 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -909,11 +915,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -935,11 +941,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -453,25 +443,6 @@ index 050875b497..32935da46c 100644 VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, -diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index d1b5657d72..7930b868fa 100644 ---- a/hw/usb/hcd-uhci.c -+++ b/hw/usb/hcd-uhci.c -@@ -1166,11 +1166,13 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) - UHCIState *s = UHCI(dev); - uint8_t *pci_conf = s->dev.config; - int i; -+ int irq_pin; - - pci_conf[PCI_CLASS_PROG] = 0x00; - /* TODO: reset value should be 0. */ - pci_conf[USB_SBRN] = USB_RELEASE_1; /* release number */ -- pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); -+ irq_pin = u->info.irq_pin; -+ pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); - s->irq = pci_allocate_irq(dev); - - if (s->masterbus) { diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c index e934b1a5b1..e18b05e528 100644 --- a/hw/usb/hcd-xhci-pci.c @@ -584,10 +555,10 @@ index c193f79443..086a1feb1e 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index 9c1c190104..b0a6e05b48 100644 +index c92ac8815c..c90a19b4d1 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -441,4 +441,25 @@ extern const size_t hw_compat_2_2_len; +@@ -449,4 +449,25 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -614,10 +585,10 @@ index 9c1c190104..b0a6e05b48 100644 + #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 5a0dd0c8cf..2cb1ec2bab 100644 +index 4b7ad77a44..9acff96a86 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h -@@ -278,7 +278,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); +@@ -272,7 +272,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); void smbios_set_cpuid(uint32_t version, uint32_t features); void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -630,10 +601,10 @@ index 5a0dd0c8cf..2cb1ec2bab 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 9ab39e428f..7ccc9a1a07 100644 +index 1a27de9c8b..91331059d9 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -107,6 +107,9 @@ struct PCMachineClass { +@@ -113,6 +113,9 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; @@ -644,5 +615,5 @@ index 9ab39e428f..7ccc9a1a07 100644 /* RAM / address space compat: */ bool gigabyte_align; -- -2.27.0 +2.31.1 diff --git a/0008-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch similarity index 75% rename from 0008-Add-aarch64-machine-types.patch rename to 0007-Add-aarch64-machine-types.patch index 07beb75..3c44b11 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 19d36c4519a1a560cce60b212e3afdf7eb026e45 Mon Sep 17 00:00:00 2001 +From 697aaa43e3c0f20fc312f06be6c1093f1ba907e1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -13,19 +13,32 @@ Rebase notes (6.1.0): - ea4c0b32d9 arm/virt: Register highmem and gic-version as class properties - 895e1fa86a hw/arm/virt: Add 8.5 and 9.0 machine types and remove older ones -Merged patches (6.2.0 RC0): +Rebase notes (7.0.0): +- Added dtb-kaslr-seed option +- Set no_tcg_lpa2 to true + +Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type + +Merged patches (7.0.0): +- 3b82be3dd3 redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update +- c354a86c9b hw/arm/virt: Register "iommu" as a class property +- c1a2630dc9 hw/arm/virt: Register "its" as a class property +- 9d8c61dc93 hw/arm/virt: Rename default_bus_bypass_iommu +- a1d1b6eeb6 hw/arm/virt: Expose the 'RAS' option +- 47f8fe1b82 hw/arm/virt: Add 9.0 machine type and remove 8.5 one +- ed2346788f hw/arm/virt: Check no_tcg_its and minor style changes --- - hw/arm/virt.c | 205 +++++++++++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 234 +++++++++++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ - 2 files changed, 212 insertions(+), 1 deletion(-) + 2 files changed, 241 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 5de4d9d73b..7d51824263 100644 +index 6a84031fd7..e06862d22a 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -79,6 +79,7 @@ +@@ -80,6 +80,7 @@ #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -33,7 +46,7 @@ index 5de4d9d73b..7d51824263 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -105,7 +106,48 @@ +@@ -106,7 +107,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -83,7 +96,7 @@ index 5de4d9d73b..7d51824263 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -2180,6 +2222,7 @@ static void machvirt_init(MachineState *machine) +@@ -2250,6 +2292,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -91,7 +104,7 @@ index 5de4d9d73b..7d51824263 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2207,6 +2250,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2277,6 +2320,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -99,15 +112,15 @@ index 5de4d9d73b..7d51824263 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2304,6 +2348,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, - visit_type_OnOffAuto(v, name, &vms->acpi, errp); +@@ -2402,6 +2446,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) + vms->ras = value; } +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_ras(Object *obj, Error **errp) + static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2331,6 +2376,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2415,6 +2460,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -115,7 +128,7 @@ index 5de4d9d73b..7d51824263 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2666,6 +2712,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2818,6 +2864,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -123,7 +136,7 @@ index 5de4d9d73b..7d51824263 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3031,3 +3078,159 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3206,3 +3253,188 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -180,6 +193,30 @@ index 5de4d9d73b..7d51824263 100644 + "Set GIC version. " + "Valid values are 2, 3, host and max"); + ++ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); ++ object_class_property_set_description(oc, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ object_class_property_add_bool(oc, "default-bus-bypass-iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ object_class_property_set_description(oc, "default-bus-bypass-iommu", ++ "Set on/off to enable/disable " ++ "bypass_iommu for default root bus"); ++ ++ object_class_property_add_bool(oc, "ras", virt_get_ras, ++ virt_set_ras); ++ object_class_property_set_description(oc, "ras", ++ "Set on/off to enable/disable reporting host memory errors " ++ "to a KVM guest using ACPI and guest external abort exceptions"); ++ ++ object_class_property_add_bool(oc, "its", virt_get_its, ++ virt_set_its); ++ object_class_property_set_description(oc, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); @@ -188,6 +225,7 @@ index 5de4d9d73b..7d51824263 100644 + "in ACPI table header." + "The string may be up to 6 bytes in size"); + ++ + object_class_property_add_str(oc, "x-oem-table-id", + virt_get_oem_table_id, + virt_set_oem_table_id); @@ -195,13 +233,13 @@ index 5de4d9d73b..7d51824263 100644 + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); -+ object_class_property_add_bool(oc, "default_bus_bypass_iommu", -+ virt_get_default_bus_bypass_iommu, -+ virt_set_default_bus_bypass_iommu); -+ object_class_property_set_description(oc, "default_bus_bypass_iommu", -+ "Set on/off to enable/disable " -+ "bypass_iommu for default root bus"); + ++ object_class_property_add_bool(oc, "dtb-kaslr-seed", ++ virt_get_dtb_kaslr_seed, ++ virt_set_dtb_kaslr_seed); ++ object_class_property_set_description(oc, "dtb-kaslr-seed", ++ "Set off to disable passing of kaslr-seed " ++ "dtb node to guest"); +} + +static void rhel_virt_instance_init(Object *obj) @@ -226,19 +264,19 @@ index 5de4d9d73b..7d51824263 100644 + } else { + /* Default allows ITS instantiation */ + vms->its = true; -+ object_property_add_bool(obj, "its", virt_get_its, -+ virt_set_its); -+ object_property_set_description(obj, "its", -+ "Set on/off to enable/disable " -+ "ITS instantiation"); ++ ++ if (vmc->no_tcg_its) { ++ vms->tcg_its = false; ++ } else { ++ vms->tcg_its = true; ++ } + } + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; -+ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); -+ object_property_set_description(obj, "iommu", -+ "Set the IOMMU type. " -+ "Valid values are none and smmuv3"); ++ ++ /* The default root bus is attached to iommu by default */ ++ vms->default_bus_bypass_iommu = false; + + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; @@ -246,15 +284,15 @@ index 5de4d9d73b..7d51824263 100644 + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + -+ /* The default root bus is attached to iommu by default */ -+ vms->default_bus_bypass_iommu = false; ++ /* Supply a kaslr-seed by default */ ++ vms->dtb_kaslr_seed = true; + + vms->irqmap = a15irqmap; + + virt_flash_create(vms); ++ + vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); -+ +} + +static const TypeInfo rhel_machine_info = { @@ -277,17 +315,21 @@ index 5de4d9d73b..7d51824263 100644 +} +type_init(rhel_machine_init); + -+static void rhel850_virt_options(MachineClass *mc) ++static void rhel900_virt_options(MachineClass *mc) +{ ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ ++ /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ ++ vmc->no_tcg_lpa2 = true; +} -+DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index dc6b66ffc8..9364628847 100644 +index 7e76ee2619..9b1efe8f0e 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -175,9 +175,17 @@ struct VirtMachineState { +@@ -179,9 +179,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -306,5 +348,5 @@ index dc6b66ffc8..9364628847 100644 bool virt_is_acpi_enabled(VirtMachineState *vms); -- -2.27.0 +2.31.1 diff --git a/0009-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch similarity index 93% rename from 0009-Add-ppc64-machine-types.patch rename to 0008-Add-ppc64-machine-types.patch index 90a6ff4..860e803 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 2d595bc1744fc764ef506fd6ed6555f267d01ea4 Mon Sep 17 00:00:00 2001 +From f61b3d7dc000886e23943457ee9baf1d4cae43b4 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -7,7 +7,7 @@ Adding changes to add RHEL machine types for ppc64 architecture. Signed-off-by: Miroslav Rezanina -Rebase notes (6.2.0 rc1): +Rebase notes (6.2.0): - Fixed rebase conflict relicts - Update machine type compat for 6.2 (from MR 66) @@ -30,10 +30,10 @@ Merged patches (6.1.0): 7 files changed, 313 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 3b5fd749be..f4bb5f15f0 100644 +index a4372ba189..5fdf8b506d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1593,6 +1593,9 @@ static void spapr_machine_reset(MachineState *machine) +@@ -1622,6 +1622,9 @@ static void spapr_machine_reset(MachineState *machine) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -43,7 +43,7 @@ index 3b5fd749be..f4bb5f15f0 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3288,6 +3291,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3317,6 +3320,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -64,7 +64,7 @@ index 3b5fd749be..f4bb5f15f0 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3366,6 +3383,12 @@ static void spapr_instance_init(Object *obj) +@@ -3395,6 +3412,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -77,7 +77,7 @@ index 3b5fd749be..f4bb5f15f0 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4614,6 +4637,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4652,6 +4675,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -85,15 +85,15 @@ index 3b5fd749be..f4bb5f15f0 100644 } static const TypeInfo spapr_machine_info = { -@@ -4665,6 +4689,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4703,6 +4727,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-6.2 + * pseries-7.0 */ -@@ -4781,6 +4806,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4830,6 +4855,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -101,7 +101,7 @@ index 3b5fd749be..f4bb5f15f0 100644 /* * pseries-4.0 -@@ -4800,6 +4826,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4849,6 +4875,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; return true; } @@ -110,7 +110,7 @@ index 3b5fd749be..f4bb5f15f0 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5127,6 +5155,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5176,6 +5204,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -333,10 +333,10 @@ index 3b5fd749be..f4bb5f15f0 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 8ba34f6a1d..78eca1c04a 100644 +index fcb5dfe792..ab8fb5bf62 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -24,6 +24,7 @@ +@@ -25,6 +25,7 @@ #include "sysemu/reset.h" #include "sysemu/hw_accel.h" #include "qemu/error-report.h" @@ -344,7 +344,7 @@ index 8ba34f6a1d..78eca1c04a 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -250,6 +251,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -259,6 +260,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -352,9 +352,9 @@ index 8ba34f6a1d..78eca1c04a 100644 if (!qdev_realize(DEVICE(cpu), NULL, errp)) { return false; -@@ -261,6 +263,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, - cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); - kvmppc_set_papr(cpu); +@@ -270,6 +272,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + /* Set time-base frequency to 512 MHz. vhyp must be set first. */ + cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); + if (!smc->has_power9_support && + (((spapr->max_compat_pvr && @@ -371,7 +371,7 @@ index 8ba34f6a1d..78eca1c04a 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index ee7504b976..fcd5bf9302 100644 +index f5c33dcc86..4a68e0a901 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -154,6 +154,7 @@ struct SpaprMachineClass { @@ -382,7 +382,7 @@ index ee7504b976..fcd5bf9302 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -238,6 +239,9 @@ struct SpaprMachineState { +@@ -241,6 +242,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -418,10 +418,10 @@ index 7949a24f5a..f207a9ba01 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index e946da5f3a..23e8b76c85 100644 +index 047b24ba50..79c5ac50b9 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1401,6 +1401,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1462,6 +1462,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -524,5 +524,5 @@ index ee9325bf9a..20dbb95989 100644 { return -1; -- -2.27.0 +2.31.1 diff --git a/0010-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch similarity index 58% rename from 0010-Add-s390x-machine-types.patch rename to 0009-Add-s390x-machine-types.patch index 1095f3a..2d8b554 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From ea22b5ae0a89ef53f31f67bb6845fd6c45d4f412 Mon Sep 17 00:00:00 2001 +From 680f343e58a50a99d17bc7dedd3ee90980912023 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -11,14 +11,38 @@ Merged patches (6.1.0): - 64a9a5c971 hw/s390x: Remove the RHEL7-only machine type - 395516d62b redhat: s390x: add rhel-8.5.0 compat machine -Merged patches (6.2.0 RC0): +Merged patches (6.2.0): - 3bf66f4520 redhat: Add s390x machine type compatibility update for 6.1 rebase ---- - hw/s390x/s390-virtio-ccw.c | 67 +++++++++++++++++++++++++++++++++++++- - 1 file changed, 66 insertions(+), 1 deletion(-) +Merged patches (7.0.0): +- e6ff4de4f7 redhat: Add s390x machine type compatibility handling for the rebase to v6.2 +- 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x +- dcc64971bf RHEL: mark old machine types as deprecated (partialy) +--- + hw/core/machine.c | 6 +++ + hw/s390x/s390-virtio-ccw.c | 104 ++++++++++++++++++++++++++++++++++++- + include/hw/boards.h | 2 + + 3 files changed, 111 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index ea430d844e..77202a3570 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,12 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * RHEL only: machine types for previous major releases are deprecated ++ */ ++const char *rhel_old_machine_deprecation = ++ "machine types for previous major releases are deprecated"; ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 653587ea62..4af14cb9ca 100644 +index 90480e7cf9..ec4176a1e0 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -767,7 +767,7 @@ bool css_migration_enabled(void) @@ -35,24 +59,61 @@ index 653587ea62..4af14cb9ca 100644 type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_6_2_instance_options(MachineState *machine) + static void ccw_machine_7_0_instance_options(MachineState *machine) { } -@@ -1100,6 +1101,70 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1115,6 +1116,107 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + ++static void ccw_machine_rhel900_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel900_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); ++ ++static void ccw_machine_rhel860_instance_options(MachineState *machine) ++{ ++ /* Note: The -rhel8.6.0 and -rhel9.0.0 machines are technically identical */ ++ ccw_machine_rhel900_instance_options(machine); ++} ++ ++static void ccw_machine_rhel860_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel900_class_options(mc); ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ mc->deprecation_reason = rhel_old_machine_deprecation; ++} ++DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", false); ++ +static void ccw_machine_rhel850_instance_options(MachineState *machine) +{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; ++ ++ ccw_machine_rhel860_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); ++ ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); +} + +static void ccw_machine_rhel850_class_options(MachineClass *mc) +{ ++ ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; +} -+DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); + +static void ccw_machine_rhel840_instance_options(MachineState *machine) +{ @@ -109,6 +170,17 @@ index 653587ea62..4af14cb9ca 100644 static void ccw_machine_register_types(void) { +diff --git a/include/hw/boards.h b/include/hw/boards.h +index c90a19b4d1..bf59275f18 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -470,4 +470,6 @@ extern const size_t hw_compat_rhel_8_0_len; + extern GlobalProperty hw_compat_rhel_7_6[]; + extern const size_t hw_compat_rhel_7_6_len; + ++extern const char *rhel_old_machine_deprecation; ++ + #endif -- -2.27.0 +2.31.1 diff --git a/0011-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch similarity index 76% rename from 0011-Add-x86_64-machine-types.patch rename to 0010-Add-x86_64-machine-types.patch index aecc3fb..7c48967 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From afe0cbc5cdb98998b37cf48e9a1c87a110d9fbb3 Mon Sep 17 00:00:00 2001 +From 427a575ca57966bc72e1ebf218081da530d435d7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -10,6 +10,9 @@ Signed-off-by: Miroslav Rezanina Rebase notes (6.1.0): - Update qemu64 cpu spec +Rebase notes (7.0.0): +- Reset alias for all machine-types except latest one + Merged patches (6.1.0): - 59c284ad3b x86: Add x86 rhel8.5 machine types - a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default @@ -19,22 +22,54 @@ Merged patches (6.1.0): - 0215eb3356 Remove RHEL 7.3.0 machine types (only x86_64 changes) - af69d1ca6e Remove RHEL 7.4.0 machine types (only x86_64 changes) - 8f7a74ab78 Remove RHEL 7.5.0 machine types (only x86_64 changes) + +Merged patches (7.0.0): +- eae7d8dd3c x86/rhel machine types: Add pc_rhel_8_5_compat +- 6762f56469 x86/rhel machine types: Wire compat into q35 and i440fx +- 5762101438 rhel machine types/x86: set prefer_sockets +- 9ba9ddc632 x86: Add q35 RHEL 8.6.0 machine type +- 6110d865e5 x86: Add q35 RHEL 9.0.0 machine type +- dcc64971bf RHEL: mark old machine types as deprecated (partialy) +- 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 --- - hw/i386/pc.c | 114 +++++++++++++++++++++++- - hw/i386/pc_piix.c | 68 +++++++++++++- - hw/i386/pc_q35.c | 177 ++++++++++++++++++++++++++++++++++++- - include/hw/boards.h | 2 + - include/hw/i386/pc.h | 21 +++++ + hw/core/machine.c | 10 ++ + hw/i386/pc.c | 135 +++++++++++++++++++++- + hw/i386/pc_piix.c | 79 ++++++++++++- + hw/i386/pc_q35.c | 227 ++++++++++++++++++++++++++++++++++++- + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 5 + + include/hw/i386/pc.h | 24 ++++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 8 files changed, 385 insertions(+), 7 deletions(-) + 10 files changed, 484 insertions(+), 7 deletions(-) +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 77202a3570..28989b6e7b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -43,6 +43,16 @@ + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2065589 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index a2ef40ecbc..b6d2db8d04 100644 +index fd55fc725c..263d882af6 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -371,6 +371,116 @@ GlobalProperty pc_compat_1_4[] = { +@@ -375,6 +375,137 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -54,6 +89,27 @@ index a2ef40ecbc..b6d2db8d04 100644 +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_5_compat[] = { ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, ++ ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, ++}; ++const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat); ++ +GlobalProperty pc_rhel_8_4_compat[] = { + /* pc_rhel_8_4_compat from pc_compat_5_2 */ + { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, @@ -151,7 +207,7 @@ index a2ef40ecbc..b6d2db8d04 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1694,6 +1804,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1738,6 +1869,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -159,7 +215,7 @@ index a2ef40ecbc..b6d2db8d04 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1704,7 +1815,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1748,7 +1880,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -170,7 +226,7 @@ index a2ef40ecbc..b6d2db8d04 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index dda3f64f19..dabc6c1933 100644 +index c797e98312..0cacc0d623 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -50,6 +50,7 @@ @@ -200,7 +256,7 @@ index dda3f64f19..dabc6c1933 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -951,3 +953,65 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -967,3 +969,76 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -224,6 +280,7 @@ index dda3f64f19..dabc6c1933 100644 + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; + m->is_default = 1; ++ m->smp_props.prefer_sockets = true; +} + +static void pc_init_rhel760(MachineState *machine) @@ -239,11 +296,21 @@ index dda3f64f19..dabc6c1933 100644 + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; + m->async_pf_vmexit_disable = true; + m->smbus_no_migration_support = true; ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ m->deprecation_reason = rhel_old_machine_deprecation; ++ + pcmc->pvh_enabled = false; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); + compat_props_add(m->compat_props, pc_rhel_8_4_compat, @@ -267,7 +334,7 @@ index dda3f64f19..dabc6c1933 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 235054a643..04c911da18 100644 +index b695f88c45..157160e069 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) @@ -289,7 +356,7 @@ index 235054a643..04c911da18 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -620,3 +621,175 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -631,3 +632,225 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -318,6 +385,48 @@ index 235054a643..04c911da18 100644 + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel900(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel900_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.0.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, ++ pc_q35_machine_rhel900_options); ++ ++static void pc_q35_init_rhel860(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel860_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel900_options(m); ++ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ m->deprecation_reason = rhel_old_machine_deprecation; ++ ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.6.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, ++ pc_q35_machine_rhel860_options); ++ ++ +static void pc_q35_init_rhel850(MachineState *machine) +{ + pc_q35_init(machine); @@ -326,10 +435,16 @@ index 235054a643..04c911da18 100644 +static void pc_q35_machine_rhel850_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel860_options(m); + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); ++ m->smp_props.prefer_sockets = true; +} + +DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, @@ -369,6 +484,7 @@ index 235054a643..04c911da18 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel840_options(m); + m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.3.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_3, @@ -394,6 +510,7 @@ index 235054a643..04c911da18 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_rhel830_options(m); + m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + m->numa_mem_supported = true; + m->auto_enable_numa_with_memdev = false; + pcmc->smbios_stream_product = "RHEL-AV"; @@ -465,11 +582,23 @@ index 235054a643..04c911da18 100644 + +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index ec4176a1e0..465a2a09d2 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1136,6 +1136,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { + ccw_machine_rhel900_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; diff --git a/include/hw/boards.h b/include/hw/boards.h -index b0a6e05b48..3c3d2ad450 100644 +index bf59275f18..d1555665df 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -263,6 +263,8 @@ struct MachineClass { +@@ -266,6 +266,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -478,17 +607,30 @@ index b0a6e05b48..3c3d2ad450 100644 bool ignore_boot_device_suffixes; bool smbus_no_migration_support; bool nvdimm_supported; +@@ -449,6 +451,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ + extern GlobalProperty hw_compat_rhel_8_5[]; + extern const size_t hw_compat_rhel_8_5_len; + diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 7ccc9a1a07..9689a58b14 100644 +index 91331059d9..419a6ec24b 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -280,6 +280,27 @@ extern const size_t pc_compat_1_5_len; +@@ -289,6 +289,30 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_5_compat[]; ++extern const size_t pc_rhel_8_5_compat_len; ++ +extern GlobalProperty pc_rhel_8_4_compat[]; +extern const size_t pc_rhel_8_4_compat_len; + @@ -511,10 +653,10 @@ index 7ccc9a1a07..9689a58b14 100644 * depending on QEMU versions up to QEMU 2.4. */ diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c -index d95028018e..7b004065ae 100644 +index 5eb955ce9a..74c1396a93 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c -@@ -131,6 +131,7 @@ static PropValue kvm_default_props[] = { +@@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -523,10 +665,10 @@ index d95028018e..7b004065ae 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 5a698bde19..a668f521ac 100644 +index 9cf8e03669..6d1e009443 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3336,6 +3336,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3488,6 +3488,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -534,7 +676,7 @@ index 5a698bde19..a668f521ac 100644 kvm_msr_buf_reset(cpu); -@@ -3665,6 +3666,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3822,6 +3823,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -568,5 +710,5 @@ index 6dcad2db49..580c2c43d2 100644 val = qtest_inb(qts, 0x505); g_assert_cmpuint(val, ==, 3); -- -2.27.0 +2.31.1 diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch new file mode 100644 index 0000000..832b38d --- /dev/null +++ b/0011-Enable-make-check.patch @@ -0,0 +1,186 @@ +From 5e419e5e0a721bdbbfa6d9b82c8be5c5b3d26a01 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 2 Sep 2020 09:39:41 +0200 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina +--- +Rebase changes (6.1.0): +- removed unnecessary test changes + +Rebase changes (6.2.0): +- new way of disabling bios-table-test + +Rebase changes (7.0.0): +- Disable testing virtio-iommu-pci +- Rename default_bus_bypass_iommu property to default-bus-bypass-iommu +- Disable qtest-bios-table for aarch64 +- Removed redhat chunks for boot-serial-test.c, cdrom-test.c and cpu-plug-test.c qtests +- Do not disable boot-order-test, prom-env-test and boot-serial-test qtests +- Use rhel machine type for new intel hda qtest +- Remove unnecessary changes in iotest 051 +- Remove changes in bios-tables-test.c and prom-env-test.c qtests + +Merged patches (6.1.0): +- 2f129df7d3 redhat: Enable the 'test-block-iothread' test again +--- + .distro/qemu-kvm.spec.template | 5 ++--- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/intel-hda-test.c | 2 +- + tests/qtest/libqos/meson.build | 2 +- + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 4 ---- + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + tests/qtest/virtio-net-failover.c | 1 + + 9 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c +index 66229e6096..947fba73b7 100644 +--- a/tests/qtest/fuzz-e1000e-test.c ++++ b/tests/qtest/fuzz-e1000e-test.c +@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) + { + QTestState *s; + +- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); ++ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0"); + + qtest_outl(s, 0xcf8, 0x80001010); + qtest_outl(s, 0xcfc, 0xe1020000); +diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c +index aaf6d10e18..43727d62ac 100644 +--- a/tests/qtest/fuzz-virtio-scsi-test.c ++++ b/tests/qtest/fuzz-virtio-scsi-test.c +@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " + "-device virtio-scsi,num_queues=8,addr=03.0 "); + + qtest_outl(s, 0xcf8, 0x80001811); +diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c +index a58c98e4d1..c8387e39ce 100644 +--- a/tests/qtest/intel-hda-test.c ++++ b/tests/qtest/intel-hda-test.c +@@ -38,7 +38,7 @@ static void test_issue542_ich6(void) + { + QTestState *s; + +- s = qtest_init("-nographic -nodefaults -M pc-q35-6.2 " ++ s = qtest_init("-nographic -nodefaults -M pc-q35-rhel9.0.0 " + "-device intel-hda,id=" HDA_ID CODEC_DEVICES); + + qtest_outl(s, 0xcf8, 0x80000804); +diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build +index e988d15791..46f7dcb81a 100644 +--- a/tests/qtest/libqos/meson.build ++++ b/tests/qtest/libqos/meson.build +@@ -41,7 +41,7 @@ libqos_srcs = files('../libqtest.c', + 'virtio-rng.c', + 'virtio-scsi.c', + 'virtio-serial.c', +- 'virtio-iommu.c', ++# 'virtio-iommu.c', + + # qgraph machines: + 'aarch64-xlnx-zcu102-machine.c', +diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c +index fe0bef9980..7a9d51579b 100644 +--- a/tests/qtest/lpc-ich9-test.c ++++ b/tests/qtest/lpc-ich9-test.c +@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.0 " ++ s = qtest_init("-M pc-q35-rhel8.4.0 " + "-nographic -monitor none -serial none"); + + qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index d25f82bb5a..67cd32def1 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -73,7 +73,6 @@ qtests_i386 = \ + config_all_devices.has_key('CONFIG_Q35') and \ + config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ + slirp.found() ? ['virtio-net-failover'] : []) + \ +- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + qtests_pci + \ + ['fdc-test', + 'ide-test', +@@ -86,7 +85,6 @@ qtests_i386 = \ + 'drive_del-test', + 'tco-test', + 'cpu-plug-test', +- 'q35-test', + 'vmgenid-test', + 'migration-test', + 'test-x86-cpuid-compat', +@@ -216,7 +214,6 @@ qtests_arm = \ + + # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional + qtests_aarch64 = \ +- (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \ + (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ + (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \ +@@ -231,7 +228,6 @@ qtests_s390x = \ + (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ + ['boot-serial-test', + 'drive_del-test', +- 'device-plug-test', + 'virtio-ccw-test', + 'cpu-plug-test', + 'migration-test'] +diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c +index 10ef9d2a91..3855873050 100644 +--- a/tests/qtest/usb-hcd-xhci-test.c ++++ b/tests/qtest/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug(global_qtest, "xhci", "1", NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + QTestState *qts = global_qtest; +@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del(qts, "scsihd"); + qtest_qmp_device_del(qts, "uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -56,7 +58,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c +index 78811f1c92..44de8af00c 100644 +--- a/tests/qtest/virtio-net-failover.c ++++ b/tests/qtest/virtio-net-failover.c +@@ -25,6 +25,7 @@ + #define PCI_SEL_BASE 0x0010 + + #define BASE_MACHINE "-M q35 -nodefaults " \ ++ "-global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=on " \ + "-device pcie-root-port,id=root0,addr=0x1,bus=pcie.0,chassis=1 " \ + "-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 " + +-- +2.31.1 + diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch deleted file mode 100644 index 4cd1ecc..0000000 --- a/0012-Enable-make-check.patch +++ /dev/null @@ -1,376 +0,0 @@ -From b071f3eaa77dde1567d70b43d0b2975efe380da2 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 2 Sep 2020 09:39:41 +0200 -Subject: Enable make check - -Fixing tests after device disabling and machine types changes and enabling -make check run during build. - -Signed-off-by: Miroslav Rezanina ---- -Rebase changes (6.1.0): -- removed unnecessary test changes - -Rebase changes (6.2.0 RC0): -- new way of disabling bios-table-test - -Merged patches (6.1.0): -- 2f129df7d3 redhat: Enable the 'test-block-iothread' test again ---- - .distro/qemu-kvm.spec.template | 5 ++--- - tests/qemu-iotests/051 | 8 ++++---- - tests/qtest/bios-tables-test.c | 6 +++--- - tests/qtest/boot-serial-test.c | 6 +++++- - tests/qtest/cdrom-test.c | 4 ++++ - tests/qtest/cpu-plug-test.c | 4 ++-- - tests/qtest/fuzz-e1000e-test.c | 2 +- - tests/qtest/fuzz-virtio-scsi-test.c | 2 +- - tests/qtest/hd-geo-test.c | 4 ++++ - tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 13 ++++--------- - tests/qtest/prom-env-test.c | 4 ++++ - tests/qtest/test-x86-cpuid-compat.c | 2 ++ - tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - 14 files changed, 41 insertions(+), 25 deletions(-) - -diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 -index 1d2fa93a11..c8a2815f54 100755 ---- a/tests/qemu-iotests/051 -+++ b/tests/qemu-iotests/051 -@@ -174,9 +174,9 @@ run_qemu -drive if=virtio - case "$QEMU_DEFAULT_MACHINE" in - pc) - run_qemu -drive if=none,id=disk -device ide-cd,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive if=none,id=disk -device ide-hd,drive=disk -- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk - ;; - *) - ;; -@@ -225,9 +225,9 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on - case "$QEMU_DEFAULT_MACHINE" in - pc) - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk - run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk -- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk -+# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk - ;; - *) - ;; -diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c -index 258874167e..62745181a8 100644 ---- a/tests/qtest/bios-tables-test.c -+++ b/tests/qtest/bios-tables-test.c -@@ -1371,7 +1371,7 @@ static void test_acpi_virt_tcg_numamem(void) - free_test_data(&data); - - } -- -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void test_acpi_virt_tcg_pxb(void) - { - test_data data = { -@@ -1403,7 +1403,7 @@ static void test_acpi_virt_tcg_pxb(void) - - free_test_data(&data); - } -- -+#endif - static void test_acpi_tcg_acpi_hmat(const char *machine) - { - test_data data; -@@ -1644,7 +1644,7 @@ int main(int argc, char *argv[]) - qtest_add_func("acpi/virt", test_acpi_virt_tcg); - qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); - qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); -- qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); -+/* qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); */ - qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt); - } - } -diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c -index 83828ba270..294476b959 100644 ---- a/tests/qtest/boot-serial-test.c -+++ b/tests/qtest/boot-serial-test.c -@@ -148,19 +148,23 @@ static testdef_t tests[] = { - { "ppc", "g3beige", "", "PowerPC,750" }, - { "ppc", "mac99", "", "PowerPC,G4" }, - { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "ppce500", "", "U-Boot" }, - { "ppc64", "40p", "-m 192", "Memory: 192M" }, - { "ppc64", "mac99", "", "PowerPC,970FX" }, -+#endif - { "ppc64", "pseries", - "-machine " PSERIES_DEFAULT_CAPABILITIES, - "Open Firmware" }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "ppc64", "powernv8", "", "OPAL" }, - { "ppc64", "powernv9", "", "OPAL" }, - { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, -+#endif - { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "i386", "pc", "-device sga", "SGABIOS" }, - { "i386", "q35", "-device sga", "SGABIOS" }, -- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, -+ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, - { "x86_64", "q35", "-device sga", "SGABIOS" }, - { "sparc", "LX", "", "TMS390S10" }, - { "sparc", "SS-4", "", "MB86904" }, -diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c -index 5af944a5fb..69d9bac38a 100644 ---- a/tests/qtest/cdrom-test.c -+++ b/tests/qtest/cdrom-test.c -@@ -140,6 +140,7 @@ static void add_x86_tests(void) - qtest_add_data_func("cdrom/boot/isapc", "-M isapc " - "-drive if=ide,media=cdrom,file=", test_cdboot); - } -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - qtest_add_data_func("cdrom/boot/am53c974", - "-device am53c974 -device scsi-cd,drive=cd1 " - "-drive if=none,id=cd1,format=raw,file=", test_cdboot); -@@ -155,6 +156,7 @@ static void add_x86_tests(void) - qtest_add_data_func("cdrom/boot/megasas-gen2", "-M q35 " - "-device megasas-gen2 -device scsi-cd,drive=cd1 " - "-blockdev file,node-name=cd1,filename=", test_cdboot); -+#endif - } - - static void add_s390x_tests(void) -@@ -220,6 +222,7 @@ int main(int argc, char **argv) - "magnum", "malta", "pica61", NULL - }; - add_cdrom_param_tests(mips64machines); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - } else if (g_str_equal(arch, "arm") || g_str_equal(arch, "aarch64")) { - const char *armmachines[] = { - "realview-eb", "realview-eb-mpcore", "realview-pb-a8", -@@ -227,6 +230,7 @@ int main(int argc, char **argv) - "vexpress-a9", "virt", NULL - }; - add_cdrom_param_tests(armmachines); -+#endif - } else { - const char *nonemachine[] = { "none", NULL }; - add_cdrom_param_tests(nonemachine); -diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c -index a1c689414b..a8f076711c 100644 ---- a/tests/qtest/cpu-plug-test.c -+++ b/tests/qtest/cpu-plug-test.c -@@ -110,8 +110,8 @@ static void add_pseries_test_case(const char *mname) - char *path; - PlugTestData *data; - -- if (!g_str_has_prefix(mname, "pseries-") || -- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { -+ if (!g_str_has_prefix(mname, "pseries-rhel") || -+ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { - return; - } - data = g_new(PlugTestData, 1); -diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c -index 66229e6096..947fba73b7 100644 ---- a/tests/qtest/fuzz-e1000e-test.c -+++ b/tests/qtest/fuzz-e1000e-test.c -@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) - { - QTestState *s; - -- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); -+ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0"); - - qtest_outl(s, 0xcf8, 0x80001010); - qtest_outl(s, 0xcfc, 0xe1020000); -diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c -index aaf6d10e18..43727d62ac 100644 ---- a/tests/qtest/fuzz-virtio-scsi-test.c -+++ b/tests/qtest/fuzz-virtio-scsi-test.c -@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) - { - QTestState *s; - -- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " -+ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " - "-device virtio-scsi,num_queues=8,addr=03.0 "); - - qtest_outl(s, 0xcf8, 0x80001811); -diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c -index 113126ae06..999ef2aace 100644 ---- a/tests/qtest/hd-geo-test.c -+++ b/tests/qtest/hd-geo-test.c -@@ -737,6 +737,7 @@ static void test_override_ide(void) - test_override(args, expected); - } - -+#if 0 /* Require lsi53c895a - not supported on RHEL */ - static void test_override_scsi(void) - { - TestArgs *args = create_args(); -@@ -781,6 +782,7 @@ static void test_override_scsi_2_controllers(void) - add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); - test_override(args, expected); - } -+#endif - - static void test_override_virtio_blk(void) - { -@@ -960,9 +962,11 @@ int main(int argc, char **argv) - qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); - if (have_qemu_img()) { - qtest_add_func("hd-geo/override/ide", test_override_ide); -+#if 0 /* Require lsi53c895a - not supported on RHEL */ - qtest_add_func("hd-geo/override/scsi", test_override_scsi); - qtest_add_func("hd-geo/override/scsi_2_controllers", - test_override_scsi_2_controllers); -+#endif - qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); - qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); - qtest_add_func("hd-geo/override/scsi_hot_unplug", -diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c -index fe0bef9980..7a9d51579b 100644 ---- a/tests/qtest/lpc-ich9-test.c -+++ b/tests/qtest/lpc-ich9-test.c -@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) - { - QTestState *s; - -- s = qtest_init("-M pc-q35-5.0 " -+ s = qtest_init("-M pc-q35-rhel8.4.0 " - "-nographic -monitor none -serial none"); - - qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ -diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index c9d8458062..049e06c057 100644 ---- a/tests/qtest/meson.build -+++ b/tests/qtest/meson.build -@@ -68,7 +68,6 @@ qtests_i386 = \ - (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + \ - (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) + \ - (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \ -- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ - qtests_pci + \ - ['fdc-test', - 'ide-test', -@@ -81,7 +80,6 @@ qtests_i386 = \ - 'drive_del-test', - 'tco-test', - 'cpu-plug-test', -- 'q35-test', - 'vmgenid-test', - 'migration-test', - 'test-x86-cpuid-compat', -@@ -130,17 +128,15 @@ qtests_mips64el = \ - - qtests_ppc = \ - (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) + \ -- (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + \ -- ['boot-order-test', 'prom-env-test', 'boot-serial-test'] \ -+ (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) - - qtests_ppc64 = \ - (config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) + \ - (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) + \ - (config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) + \ -- (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ -+ (slirp.found() ? ['pxe-test'] : []) + \ - (config_all_devices.has_key('CONFIG_USB_UHCI') ? ['usb-hcd-uhci-test'] : []) + \ - (config_all_devices.has_key('CONFIG_USB_XHCI_NEC') ? ['usb-hcd-xhci-test'] : []) + \ -- (config_host.has_key('CONFIG_POSIX') ? ['test-filter-mirror'] : []) + \ - qtests_pci + ['migration-test', 'numa-test', 'cpu-plug-test', 'drive_del-test'] - - qtests_sh4 = (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) -@@ -186,8 +182,8 @@ qtests_aarch64 = \ - ['arm-cpu-features', - 'numa-test', - 'boot-serial-test', -- 'xlnx-can-test', -- 'fuzz-xlnx-dp-test', -+# 'xlnx-can-test', -+# 'fuzz-xlnx-dp-test', - 'migration-test'] - - qtests_s390x = \ -@@ -196,7 +192,6 @@ qtests_s390x = \ - (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ - ['boot-serial-test', - 'drive_del-test', -- 'device-plug-test', - 'virtio-ccw-test', - 'cpu-plug-test', - 'migration-test'] -diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c -index f41d80154a..f8dc478ce8 100644 ---- a/tests/qtest/prom-env-test.c -+++ b/tests/qtest/prom-env-test.c -@@ -89,10 +89,14 @@ int main(int argc, char *argv[]) - if (!strcmp(arch, "ppc")) { - add_tests(ppc_machines); - } else if (!strcmp(arch, "ppc64")) { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - add_tests(ppc_machines); - if (g_test_slow()) { -+#endif - qtest_add_data_func("prom-env/pseries", "pseries", test_machine); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - } -+#endif - } else if (!strcmp(arch, "sparc")) { - add_tests(sparc_machines); - } else if (!strcmp(arch, "sparc64")) { -diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c -index f28848e06e..6b2fd398a2 100644 ---- a/tests/qtest/test-x86-cpuid-compat.c -+++ b/tests/qtest/test-x86-cpuid-compat.c -@@ -300,6 +300,7 @@ int main(int argc, char **argv) - "-cpu 486,xlevel2=0xC0000002,xstore=on", - "xlevel2", 0xC0000002); - -+#if 0 /* Disabled in Red Hat Enterprise Linux */ - /* Check compatibility of old machine-types that didn't - * auto-increase level/xlevel/xlevel2: */ - -@@ -350,6 +351,7 @@ int main(int argc, char **argv) - add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", - "-machine pc-i440fx-2.4 -cpu SandyBridge,svm=on,npt=on", - "xlevel", 0x80000008); -+#endif - - /* Test feature parsing */ - add_feature_test("x86/cpuid/features/plus", -diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c -index 10ef9d2a91..3855873050 100644 ---- a/tests/qtest/usb-hcd-xhci-test.c -+++ b/tests/qtest/usb-hcd-xhci-test.c -@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) - usb_test_hotplug(global_qtest, "xhci", "1", NULL); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void test_usb_uas_hotplug(void) - { - QTestState *qts = global_qtest; -@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) - qtest_qmp_device_del(qts, "scsihd"); - qtest_qmp_device_del(qts, "uas"); - } -+#endif - - static void test_usb_ccid_hotplug(void) - { -@@ -56,7 +58,9 @@ int main(int argc, char **argv) - - qtest_add_func("/xhci/pci/init", test_xhci_init); - qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); -+#endif - qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); - - qtest_start("-device nec-usb-xhci,id=xhci" --- -2.27.0 - diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 93% rename from 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 44db7e1..c9e42b2 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 55dcef9d806aa530f10e3ca42eb24d52f850d674 Mon Sep 17 00:00:00 2001 +From c358fd4c224a9c3f64b4a8fff34cc6b1dc201fa0 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,7 +32,7 @@ Signed-off-by: Bandan Das 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 7b45353ce2..eb725a3aee 100644 +index 67a183f17b..1e20f9fd59 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -45,6 +45,9 @@ @@ -45,7 +45,7 @@ index 7b45353ce2..eb725a3aee 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); -@@ -2807,9 +2810,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) +@@ -2810,9 +2813,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -77,7 +77,7 @@ index 7b45353ce2..eb725a3aee 100644 if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3246,6 +3270,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3249,6 +3273,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -100,5 +100,5 @@ index 64777516d1..e0fe6ca97e 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.27.0 +2.31.1 diff --git a/0014-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch similarity index 86% rename from 0014-Add-support-statement-to-help-output.patch rename to 0013-Add-support-statement-to-help-output.patch index 095b9db..4826ea4 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From fcccb5c061b8bbae29de59637c5ad4cf4416281b Mon Sep 17 00:00:00 2001 +From ba0c7a5f6b9a1f75666db6b3b795ddf03695dc26 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 620a1f1367..d46b8fb4ab 100644 +index 6f646531a0..9d5dab43d2 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -827,9 +827,17 @@ static void version(void) +@@ -831,9 +831,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -41,8 +41,8 @@ index 620a1f1367..d46b8fb4ab 100644 + print_rh_warning(); printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", - error_get_progname()); -@@ -855,6 +863,7 @@ static void help(int exitcode) + g_get_prgname()); +@@ -859,6 +867,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -51,5 +51,5 @@ index 620a1f1367..d46b8fb4ab 100644 } -- -2.27.0 +2.31.1 diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0014-globally-limit-the-maximum-number-of-CPUs.patch similarity index 89% rename from 0015-globally-limit-the-maximum-number-of-CPUs.patch rename to 0014-globally-limit-the-maximum-number-of-CPUs.patch index 50c1e79..6764a84 100644 --- a/0015-globally-limit-the-maximum-number-of-CPUs.patch +++ b/0014-globally-limit-the-maximum-number-of-CPUs.patch @@ -1,4 +1,4 @@ -From 354c9ce982e566ddb3c724a57252986dcb7c36db Mon Sep 17 00:00:00 2001 +From 9ebfd2f6cfa8e79c92e58fd169f90cc768fb865a Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 21 Jan 2014 10:46:52 +0100 Subject: globally limit the maximum number of CPUs @@ -18,10 +18,10 @@ Signed-off-by: Danilo Cesar Lemes de Paula 1 file changed, 12 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index eecd8031cf..8f2a53438f 100644 +index 5f1377ca04..fdf0e4d429 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c -@@ -2423,6 +2423,18 @@ static int kvm_init(MachineState *ms) +@@ -2430,6 +2430,18 @@ static int kvm_init(MachineState *ms) soft_vcpus_limit = kvm_recommended_vcpus(s); hard_vcpus_limit = kvm_max_vcpus(s); @@ -41,5 +41,5 @@ index eecd8031cf..8f2a53438f 100644 if (nc->num > soft_vcpus_limit) { warn_report("Number of %s cpus requested (%d) exceeds " -- -2.27.0 +2.31.1 diff --git a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..722484d --- /dev/null +++ b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,61 @@ +From 4b6c8cdc52fdf94d4098d278defb3833dce1d189 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 8 Jul 2020 08:35:50 +0200 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina +--- + docs/defs.rst.inc | 4 ++-- + qemu-options.hx | 10 +++++----- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc +index 52d6454b93..d74dbdeca9 100644 +--- a/docs/defs.rst.inc ++++ b/docs/defs.rst.inc +@@ -9,7 +9,7 @@ + but the manpages will end up misrendered with following normal text + incorrectly in boldface. + +-.. |qemu_system| replace:: qemu-system-x86_64 +-.. |qemu_system_x86| replace:: qemu-system-x86_64 ++.. |qemu_system| replace:: qemu-kvm ++.. |qemu_system_x86| replace:: qemu-kvm + .. |I2C| replace:: I\ :sup:`2`\ C + .. |I2S| replace:: I\ :sup:`2`\ S +diff --git a/qemu-options.hx b/qemu-options.hx +index 34e9b32a5c..924f61ab6d 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -3233,11 +3233,11 @@ SRST + + :: + +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + + ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` + Establish a vhost-vdpa netdev. +-- +2.31.1 + diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch deleted file mode 100644 index 415bcc2..0000000 --- a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ /dev/null @@ -1,120 +0,0 @@ -From b057b4ebec0f87f21ba4a15adbb2a1bde7240ed5 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 8 Jul 2020 08:35:50 +0200 -Subject: Use qemu-kvm in documentation instead of qemu-system- - -Patchwork-id: 62380 -O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 -Bugzilla: 1140620 -RH-Acked-by: Laszlo Ersek -RH-Acked-by: Markus Armbruster -RH-Acked-by: Stefan Hajnoczi - -From: Miroslav Rezanina - -We change the name and location of qemu-kvm binaries. Update documentation -to reflect this change. Only architectures available in RHEL are updated. - -Signed-off-by: Miroslav Rezanina ---- - docs/defs.rst.inc | 4 ++-- - docs/tools/qemu-trace-stap.rst | 14 +++++++------- - qemu-options.hx | 10 +++++----- - 3 files changed, 14 insertions(+), 14 deletions(-) - -diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc -index 52d6454b93..d74dbdeca9 100644 ---- a/docs/defs.rst.inc -+++ b/docs/defs.rst.inc -@@ -9,7 +9,7 @@ - but the manpages will end up misrendered with following normal text - incorrectly in boldface. - --.. |qemu_system| replace:: qemu-system-x86_64 --.. |qemu_system_x86| replace:: qemu-system-x86_64 -+.. |qemu_system| replace:: qemu-kvm -+.. |qemu_system_x86| replace:: qemu-kvm - .. |I2C| replace:: I\ :sup:`2`\ C - .. |I2S| replace:: I\ :sup:`2`\ S -diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst -index d53073b52b..9e93df084f 100644 ---- a/docs/tools/qemu-trace-stap.rst -+++ b/docs/tools/qemu-trace-stap.rst -@@ -46,19 +46,19 @@ The following commands are valid: - any of the listed names. If no *PATTERN* is given, the all possible - probes will be listed. - -- For example, to list all probes available in the ``qemu-system-x86_64`` -+ For example, to list all probes available in the ``qemu-kvm`` - binary: - - :: - -- $ qemu-trace-stap list qemu-system-x86_64 -+ $ qemu-trace-stap list qemu-kvm - - To filter the list to only cover probes related to QEMU's cryptographic - subsystem, in a binary outside ``$PATH`` - - :: - -- $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-system-x86_64 'qcrypto*' -+ $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-kvm 'qcrypto*' - - .. option:: run OPTIONS BINARY PATTERN... - -@@ -90,18 +90,18 @@ The following commands are valid: - Restrict the tracing session so that it only triggers for the process - identified by *PID*. - -- For example, to monitor all processes executing ``qemu-system-x86_64`` -+ For example, to monitor all processes executing ``qemu-kvm`` - as found on ``$PATH``, displaying all I/O related probes: - - :: - -- $ qemu-trace-stap run qemu-system-x86_64 'qio*' -+ $ qemu-trace-stap run qemu-kvm 'qio*' - - To monitor only the QEMU process with PID 1732 - - :: - -- $ qemu-trace-stap run --pid=1732 qemu-system-x86_64 'qio*' -+ $ qemu-trace-stap run --pid=1732 qemu-kvm 'qio*' - - To monitor QEMU processes running an alternative binary outside of - ``$PATH``, displaying verbose information about setup of the -@@ -109,7 +109,7 @@ The following commands are valid: - - :: - -- $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-system-x86_64 'qio*' -+ $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-kvm 'qio*' - - See also - -------- -diff --git a/qemu-options.hx b/qemu-options.hx -index ae2c6dbbfc..94c4a8dbaf 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -3150,11 +3150,11 @@ SRST - - :: - -- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -- -numa node,memdev=mem \ -- -chardev socket,id=chr0,path=/path/to/socket \ -- -netdev type=vhost-user,id=net0,chardev=chr0 \ -- -device virtio-net-pci,netdev=net0 -+ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ -+ -numa node,memdev=mem \ -+ -chardev socket,id=chr0,path=/path/to/socket \ -+ -netdev type=vhost-user,id=net0,chardev=chr0 \ -+ -device virtio-net-pci,netdev=net0 - - ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` - Establish a vhost-vdpa netdev. --- -2.27.0 - diff --git a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch similarity index 96% rename from 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename to 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch index 031b551..9f08024 100644 --- a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -1,4 +1,4 @@ -From 41fe05330d095f69f12973b0540466439e030047 Mon Sep 17 00:00:00 2001 +From b72e04cb7e417d9e1c973223747ab3a27abda8b4 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 14 Jun 2017 15:37:01 +0200 Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] @@ -42,7 +42,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 51fd09522a..a35257c35a 100644 +index 34a968ecfb..7f6da33a8a 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -62,5 +62,5 @@ index 51fd09522a..a35257c35a 100644 if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; -- -2.27.0 +2.31.1 diff --git a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch similarity index 92% rename from 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename to 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 7f161e8..2bc687c 100644 --- a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 295ee942608d48ab167aa76adabf3697c28e6910 Mon Sep 17 00:00:00 2001 +From 64a06662cdea0ff62efb122be4eab506b2a842d9 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,10 +32,10 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index ed7c077a0d..48a8efe678 100644 +index 655ab856a0..6aa7f93df9 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c -@@ -332,12 +332,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, +@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { @@ -56,5 +56,5 @@ index ed7c077a0d..48a8efe678 100644 spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } -- -2.27.0 +2.31.1 diff --git a/0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch similarity index 90% rename from 0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch rename to 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 7ff46fd..d7401d5 100644 --- a/0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From a3bb2330bbf68faabc57b768cdf3ae2f4f4d86f3 Mon Sep 17 00:00:00 2001 +From 54f9157a918e1404f2f17ce89a9c8b9088c1bc06 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,10 +44,10 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index d509016756..37d1c26a71 100644 +index b5c47931ef..a795e457ac 100644 --- a/block/qcow2.c +++ b/block/qcow2.c -@@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, +@@ -1337,6 +1337,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, ret = -ENOTSUP; goto fail; } @@ -61,17 +61,17 @@ index d509016756..37d1c26a71 100644 s->qcow_version = header.version; diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index 2b2b53946c..c5c2dc39bd 100644 +index cc9f1a5891..6a13757177 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter -@@ -82,6 +82,7 @@ _filter_qemu() +@@ -83,6 +83,7 @@ _filter_qemu() { - $SED -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ + gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ + -e "/qcow2 v2 images are deprecated/d" \ -e $'s#\r##' # QEMU monitor uses \r\n line endings } -- -2.27.0 +2.31.1 diff --git a/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch b/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch new file mode 100644 index 0000000..285cd6b --- /dev/null +++ b/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch @@ -0,0 +1,135 @@ +From 1d6439527aa6ccabb58208c94417778ccc19de39 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 9 Feb 2022 04:16:25 -0500 +Subject: WRB: Introduce RHEL 9.0.0 hw compat structure + +General compatibility structure for post RHEL 9.0.0 rebase. + +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 9 +++++++++ + hw/i386/pc.c | 6 ++++++ + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 4 ++++ + hw/s390x/s390-virtio-ccw.c | 2 ++ + include/hw/boards.h | 3 +++ + include/hw/i386/pc.h | 3 +++ + 7 files changed, 31 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 28989b6e7b..dffc3ef4ab 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -53,6 +53,15 @@ GlobalProperty hw_compat_rhel_8_6[] = { + }; + const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); + ++/* ++ * Mostly the same as hw_compat_6_2 ++ */ ++GlobalProperty hw_compat_rhel_9_0[] = { ++ /* hw_compat_rhel_9_0 from hw_compat_6_2 */ ++ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, ++}; ++const size_t hw_compat_rhel_9_0_len = G_N_ELEMENTS(hw_compat_rhel_9_0); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 263d882af6..0886cfe3fe 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -391,6 +391,12 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_0_compat[] = { ++ /* pc_rhel_9_0_compat from pc_compat_6_2 */ ++ { "virtio-mem", "unplugged-inaccessible", "off" }, ++}; ++const size_t pc_rhel_9_0_compat_len = G_N_ELEMENTS(pc_rhel_9_0_compat); ++ + GlobalProperty pc_rhel_8_5_compat[] = { + /* pc_rhel_8_5_compat from pc_compat_6_0 */ + { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 0cacc0d623..dc987fe93b 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1014,6 +1014,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_6, + hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 157160e069..52c253c570 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -669,6 +669,10 @@ static void pc_q35_machine_rhel900_options(MachineClass *m) + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 465a2a09d2..08e0f6a79b 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1118,12 +1118,14 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++ + static void ccw_machine_rhel900_instance_options(MachineState *machine) + { + } + + static void ccw_machine_rhel900_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + } + DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index d1555665df..635e45dd71 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -451,6 +451,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_0[]; ++extern const size_t hw_compat_rhel_9_0_len; ++ + extern GlobalProperty hw_compat_rhel_8_6[]; + extern const size_t hw_compat_rhel_8_6_len; + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 419a6ec24b..a492c420b5 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -292,6 +292,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_0_compat[]; ++extern const size_t pc_rhel_9_0_compat_len; ++ + extern GlobalProperty pc_rhel_8_5_compat[]; + extern const size_t pc_rhel_8_5_compat_len; + +-- +2.31.1 + diff --git a/0020-Fix-virtio-net-pci-vectors-compat.patch b/0020-Fix-virtio-net-pci-vectors-compat.patch deleted file mode 100644 index 6caad1d..0000000 --- a/0020-Fix-virtio-net-pci-vectors-compat.patch +++ /dev/null @@ -1,46 +0,0 @@ -From ef5afcc86dc44d1c9d3030a8ceca2018df86c6ec Mon Sep 17 00:00:00 2001 -From: Eduardo Habkost -Date: Tue, 19 Oct 2021 13:17:06 -0400 -Subject: Fix virtio-net-pci* "vectors" compat - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 76: 9.0/6.2.0-rc1 x86 machine type fixes -RH-Commit: [20/22] ebb570f053f96d3558bac49962dc7dc88296c207 -RH-Bugzilla: 2025468 -RH-Acked-by: quintela1 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Cornelia Huck - -hw_compat_rhel_8_4 has an issue: it affects only "virtio-net-pci" -but not "virtio-net-pci-transitional" and -"virtio-net-pci-non-transitional". The solution is to use the -"virtio-net-pci-base" type in compat_props. - -An equivalent fix will be submitted for hw_compat_5_2 upstream. - -Signed-off-by: Eduardo Habkost -(cherry picked from commit d45823ab0d0138b2fbaf2ed1e1896d2052f3ccb3) ---- - hw/core/machine.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 53a3caf4fb..448a8dd127 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -69,7 +69,11 @@ GlobalProperty hw_compat_rhel_8_4[] = { - /* hw_compat_rhel_8_4 from hw_compat_5_2 */ - { "virtio-blk-device", "report-discard-granularity", "off" }, - /* hw_compat_rhel_8_4 from hw_compat_5_2 */ -- { "virtio-net-pci", "vectors", "3"}, -+ /* -+ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base", -+ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141) -+ */ -+ { "virtio-net-pci-base", "vectors", "3"}, - }; - const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); - --- -2.27.0 - diff --git a/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch b/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch new file mode 100644 index 0000000..d3b91d0 --- /dev/null +++ b/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch @@ -0,0 +1,38 @@ +From c8ad21ca31892f8798cf82508c2b2c61bf3b9895 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 4 Apr 2022 12:15:50 +0200 +Subject: redhat: Update s390x machine type compatibility for rebase to QEMU + 7.0.0 + +RH-Author: Thomas Huth +RH-MergeRequest: 143: Update machine type compatibility for QEMU 7.0.0 update [s390x] +RH-Commit: [23/23] 0ecf97d7bdddc50565b5779c64744b353f715cbd +RH-Bugzilla: 2064782 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +No s390x-specific machine class property updates required this time, +only an update to the default qemu cpu model. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 08e0f6a79b..4a491d4988 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1121,6 +1121,9 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + + static void ccw_machine_rhel900_instance_options(MachineState *machine) + { ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; ++ ++ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); + } + + static void ccw_machine_rhel900_class_options(MachineClass *mc) +-- +2.31.1 + diff --git a/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch b/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch new file mode 100644 index 0000000..f9535a8 --- /dev/null +++ b/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch @@ -0,0 +1,70 @@ +From 38b89dc24551258b630f09d1c654b6c72b265c79 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 14 Apr 2022 14:58:43 +0100 +Subject: pc: Move s3/s4 suspend disabling to compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 155: 7.0 machine type fixes (x86) +RH-Commit: [26/26] 7d666032d5f5dab1444ebba085f92f2de4e86699 +RH-Bugzilla: 2064771 + +Our downstream patches currently have tweaks in the C code to disable +s3/s4; Thomas pointed out we can just set the property. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/acpi/ich9.c | 4 ++-- + hw/acpi/piix4.c | 4 ++-- + hw/i386/pc.c | 6 ++++++ + 3 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index de1e401cdf..bd9bbade70 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 1; +- pm->disable_s4 = 1; ++ pm->disable_s3 = 0; ++ pm->disable_s4 = 0; + pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index 28544e78c3..2fb2b43248 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, + use_acpi_hotplug_bridge, true), +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 0886cfe3fe..f98f842f80 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -380,6 +380,12 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + * machine type. + */ + GlobalProperty pc_rhel_compat[] = { ++ /* we don't support s3/s4 suspend */ ++ { "PIIX4_PM", "disable_s3", "1" }, ++ { "PIIX4_PM", "disable_s4", "1" }, ++ { "ICH9-LPC", "disable_s3", "1" }, ++ { "ICH9-LPC", "disable_s4", "1" }, ++ + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, + { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, +-- +2.31.1 + diff --git a/0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch b/0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch deleted file mode 100644 index 8ef276d..0000000 --- a/0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch +++ /dev/null @@ -1,75 +0,0 @@ -From eae7d8dd3c3b9aa859a619933f52a4759a42bf66 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 23 Nov 2021 17:57:42 +0000 -Subject: x86/rhel machine types: Add pc_rhel_8_5_compat - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 76: 9.0/6.2.0-rc1 x86 machine type fixes -RH-Commit: [21/22] dd23060695bc0ad892bbfa51d93afe31f5d745c7 -RH-Bugzilla: 2025468 -RH-Acked-by: quintela1 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Cornelia Huck - -Add pc_rhel_8_5_compat as the merge of pc_compat_6_1 and pc_compat_6_0 -(since 8.5 was based on 6.0). - -Note, x-keep-pci-slot-hpc flipped back and forward, leaving it out -looks like it leaves us with the original. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Miroslav Rezanina ---- - hw/i386/pc.c | 21 +++++++++++++++++++++ - include/hw/i386/pc.h | 3 +++ - 2 files changed, 24 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index b6d2db8d04..4661473d2a 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -387,6 +387,27 @@ GlobalProperty pc_rhel_compat[] = { - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_8_5_compat[] = { -+ /* pc_rhel_8_5_compat from pc_compat_6_0 */ -+ { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, -+ /* pc_rhel_8_5_compat from pc_compat_6_0 */ -+ { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, -+ /* pc_rhel_8_5_compat from pc_compat_6_0 */ -+ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, -+ /* pc_rhel_8_5_compat from pc_compat_6_0 */ -+ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, -+ /* pc_rhel_8_5_compat from pc_compat_6_0 */ -+ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, -+ -+ /* pc_rhel_8_5_compat from pc_compat_6_1 */ -+ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, -+ /* pc_rhel_8_5_compat from pc_compat_6_1 */ -+ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, -+ /* pc_rhel_8_5_compat from pc_compat_6_1 */ -+ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, -+}; -+const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat); -+ - GlobalProperty pc_rhel_8_4_compat[] = { - /* pc_rhel_8_4_compat from pc_compat_5_2 */ - { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 9689a58b14..afb570ba14 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -283,6 +283,9 @@ extern const size_t pc_compat_1_4_len; - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_8_5_compat[]; -+extern const size_t pc_rhel_8_5_compat_len; -+ - extern GlobalProperty pc_rhel_8_4_compat[]; - extern const size_t pc_rhel_8_4_compat_len; - --- -2.27.0 - diff --git a/0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch b/0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch deleted file mode 100644 index 85c6b26..0000000 --- a/0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 6762f5646943c759ece5972f08eb88364cf0a8ad Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 23 Nov 2021 18:07:49 +0000 -Subject: x86/rhel machine types: Wire compat into q35 and i440fx - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 76: 9.0/6.2.0-rc1 x86 machine type fixes -RH-Commit: [22/22] e2767df0d920773057cb52d346e0106a76cb0a28 -RH-Bugzilla: 2025468 -RH-Acked-by: quintela1 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Cornelia Huck -Wire the pc_rhel_8_5 compat data into both piix and q35 -to keep the existing machine types compatible. - -Signed-off-by: Dr. David Alan Gilbert -Signed-off-by: Miroslav Rezanina ---- - hw/i386/pc_piix.c | 4 ++++ - hw/i386/pc_q35.c | 4 ++++ - 2 files changed, 8 insertions(+) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index dabc6c1933..183b5d5464 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -993,6 +993,10 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->kvmclock_create_always = false; - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_5, -+ hw_compat_rhel_8_5_len); -+ compat_props_add(m->compat_props, pc_rhel_8_5_compat, -+ pc_rhel_8_5_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_4, - hw_compat_rhel_8_4_len); - compat_props_add(m->compat_props, pc_rhel_8_4_compat, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 04c911da18..0e7e885e78 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -658,6 +658,10 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) - m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.5.0"; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_5, -+ hw_compat_rhel_8_5_len); -+ compat_props_add(m->compat_props, pc_rhel_8_5_compat, -+ pc_rhel_8_5_compat_len); - } - - DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, --- -2.27.0 - diff --git a/0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch b/0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch deleted file mode 100644 index 293854f..0000000 --- a/0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 3b82be3dd3d5254baedf82ba2a6cf0412e84a991 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 16 Nov 2021 17:03:07 +0100 -Subject: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU - 6.2.0 update - -RH-Author: Eric Auger -RH-MergeRequest: 75: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update -RH-Commit: [21/21] f027d13654944e3d34e3356affe7af952eec2bed -RH-Bugzilla: 2022607 -RH-Acked-by: Gavin Shan -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Andrew Jones -RH-Acked-by: Cornelia Huck -RH-Acked-by: Laurent Vivier - -To keep compatibility with 8.5-AV machine type we need to -turn few new options on by default: -smp_props.prefer_sockets, no_cpu_topology, no_tcg_its - -TESTED: migrate from rhel-av-8.5.0 to rhel-8.6.0 and vice-versa -with upstream fix: 33a0c404fb hw/intc/arm_gicv3_its: Revert version -increments in vmstate_its - -Signed-off-by: Eric Auger -Signed-off-by: Miroslav Rezanina ---- - hw/arm/virt.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 7d51824263..6ba9a2c2e1 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3230,7 +3230,12 @@ type_init(rhel_machine_init); - - static void rhel850_virt_options(MachineClass *mc) - { -+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); -+ - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); -+ mc->smp_props.prefer_sockets = true; -+ vmc->no_cpu_topology = true; -+ vmc->no_tcg_its = true; - } - DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) --- -2.27.0 - diff --git a/0024-redhat-Add-s390x-machine-type-compatibility-handling.patch b/0024-redhat-Add-s390x-machine-type-compatibility-handling.patch deleted file mode 100644 index 9762048..0000000 --- a/0024-redhat-Add-s390x-machine-type-compatibility-handling.patch +++ /dev/null @@ -1,58 +0,0 @@ -From e6ff4de4f7036f88ee63adad6de5ee5dd74f1d99 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 26 Nov 2021 09:37:11 +0100 -Subject: redhat: Add s390x machine type compatibility handling for the rebase - to v6.2 - -RH-Author: Thomas Huth -RH-MergeRequest: 80: Add s390x machine type compatibility handling for the rebase to v6.2 -RH-Commit: [26/26] c45cf594604f6dd23954696b9c84d2025e328d11 -RH-Bugzilla: 2022602 -RH-Acked-by: David Hildenbrand -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cornelia Huck - -Add compatibility handling for the rhel8.5.0 machine type (and -recursively older, of course). - -Based on the following upstream commits: - - 463e50da8b - s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2 - 30e398f796 - s390x/cpumodel: Add more feature to gen16 default model - 4a0af2930a - machine: Prefer cores over sockets in smp parsing since 6.2 - 2b52619994 - machine: Move smp_prefer_sockets to struct SMPCompatProps - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 4af14cb9ca..c654045964 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1105,11 +1105,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); - - static void ccw_machine_rhel850_instance_options(MachineState *machine) - { -+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; -+ -+ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); -+ -+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); -+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); -+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); -+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); -+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); - } - - static void ccw_machine_rhel850_class_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); -+ mc->smp_props.prefer_sockets = true; - } - DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); - --- -2.27.0 - diff --git a/0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch b/0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch deleted file mode 100644 index 1fdd794..0000000 --- a/0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 168f0d56e3a37a7d5fcc59483e2b1181824a23d2 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 24 Nov 2021 23:51:52 -0500 -Subject: compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes - -In RC2 nvme-ns 'shared' default was changed (commit 916b0f0b52). - -Adding compat record for RHEL 8.5.0 so we keep it off for downstream -machinetypes. - -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 448a8dd127..669d3d8b91 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -55,6 +55,8 @@ GlobalProperty hw_compat_rhel_8_5[] = { - { "vhost-vsock-device", "seqpacket", "off" }, - /* hw_compat_rhel_8_5 from hw_compat_6_1 */ - { "vhost-user-vsock-device", "seqpacket", "off" }, -+ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ -+ { "nvme-ns", "shared", "off" }, - }; - const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); - --- -2.27.0 - diff --git a/kvm-Enable-SGX-RH-Only.patch b/kvm-Enable-SGX-RH-Only.patch deleted file mode 100644 index 63f335b..0000000 --- a/kvm-Enable-SGX-RH-Only.patch +++ /dev/null @@ -1,28 +0,0 @@ -From f4f7c62a4658a570d3ad694b64463665fa4b80a7 Mon Sep 17 00:00:00 2001 -From: Paul Lai -Date: Fri, 21 Jan 2022 13:14:42 -0500 -Subject: [PATCH 04/12] Enable SGX -- RH Only - -RH-Author: Paul Lai -RH-MergeRequest: 65: Enable SGX and add SGX Numa support -RH-Commit: [4/5] 2cd4ee4a429f5e7b1c32e83a10bf488503603795 -RH-Bugzilla: 2033708 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Bandan Das -RH-Acked-by: Cornelia Huck ---- - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index dc03fbb671..327b1bee62 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -101,3 +101,4 @@ CONFIG_TPM=y - CONFIG_TPM_CRB=y - CONFIG_TPM_TIS_ISA=y - CONFIG_TPM_EMULATOR=y -+CONFIG_SGX=y --- -2.27.0 - diff --git a/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch b/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch deleted file mode 100644 index 834092c..0000000 --- a/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 9ec1caad56435e14cd80ad23bc8bef8c301bdce4 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 24 Mar 2022 16:04:57 +0100 -Subject: [PATCH] RHEL: disable "seqpacket" for "vhost-vsock-device" in - rhel8.6.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefano Garzarella -RH-MergeRequest: 79: RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 -RH-Commit: [1/1] 1810d35c05538733f82f680aec27d09db8ccbf33 (sgarzarella/qemu-kvm-c-9-s) -RH-Bugzilla: 2065589 -RH-Acked-by: Jason Wang -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Dr. David Alan Gilbert - -vhost-vsock device in RHEL 8 kernels doesn't support seqpacket. -To avoid problems when migrating a VM from RHEL 9 host, we need to -disable it in rhel8-* machine types. - -Signed-off-by: Stefano Garzarella ---- - hw/core/machine.c | 10 ++++++++++ - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 2 ++ - hw/s390x/s390-virtio-ccw.c | 1 + - include/hw/boards.h | 3 +++ - 5 files changed, 18 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5fae55d6cd..7dcceb904a 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -43,6 +43,16 @@ - const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - -+GlobalProperty hw_compat_rhel_8_6[] = { -+ /* hw_compat_rhel_8_6 bz 2065589 */ -+ /* -+ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so -+ * we need do disable it downstream on the latest hw_compat_rhel_8. -+ */ -+ { "vhost-vsock-device", "seqpacket", "off" }, -+}; -+const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); -+ - /* - * Mostly the same as hw_compat_6_0 and hw_compat_6_1 - */ -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index cf68d7498c..08579366b6 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -998,6 +998,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->kvmclock_create_always = false; - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_6, -+ hw_compat_rhel_8_6_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_5, - hw_compat_rhel_8_5_len); - compat_props_add(m->compat_props, pc_rhel_8_5_compat, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index c8e06da084..23dacdd923 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -680,6 +680,8 @@ static void pc_q35_machine_rhel860_options(MachineClass *m) - - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.6.0"; -+ compat_props_add(m->compat_props, hw_compat_rhel_8_6, -+ hw_compat_rhel_8_6_len); - } - - DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index cc78a315e3..13bfa4253e 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1121,6 +1121,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) - static void ccw_machine_rhel860_class_options(MachineClass *mc) - { - ccw_machine_rhel900_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); - - /* All RHEL machines for prior major releases are deprecated */ - mc->deprecation_reason = rhel_old_machine_deprecation; -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 21d8d5528e..b9c12c4bf2 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -443,6 +443,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_8_6[]; -+extern const size_t hw_compat_rhel_8_6_len; -+ - extern GlobalProperty hw_compat_rhel_8_5[]; - extern const size_t hw_compat_rhel_8_5_len; - --- -2.31.1 - diff --git a/kvm-RHEL-mark-old-machine-types-as-deprecated.patch b/kvm-RHEL-mark-old-machine-types-as-deprecated.patch deleted file mode 100644 index 0b203cf..0000000 --- a/kvm-RHEL-mark-old-machine-types-as-deprecated.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 5b1b0ebbc938127e7cd0ea1056d8f21b6d51ff0d Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Thu, 3 Mar 2022 10:57:37 +0100 -Subject: [PATCH 1/5] RHEL: mark old machine types as deprecated - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 78: Synchronize with RHEL 9.0.0 build qemu-kvm-6.2.0-11.el9_0.1 -RH-Commit: [1/5] 88a9377cac9d4e9796f63c5726db7dc093c6460d (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 2062828 2062819 2062817 2062813 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cornelia Huck - -We want to make it obvious that we consider machine types for older -RHEL major releases to be deprecated; we only carry them for -compatibility purposes. - -Let's mark all rhel-7.x and rhel-8.x machine type as deprecated via -QEMU's existing deprecation mechanism; those machine types will -continue to work as expected, but commands like 'virsh capabilities', -'virsh dominfo', or the libvirt log will tag the machine as -deprecated. - -Signed-off-by: Cornelia Huck - -Forward-port of RHEL 9.0.0 MR 119 (RHEL: mark old machine types as deprecated) ---- - hw/core/machine.c | 6 ++++++ - hw/i386/pc_piix.c | 4 ++++ - hw/i386/pc_q35.c | 4 ++++ - hw/s390x/s390-virtio-ccw.c | 3 +++ - include/hw/boards.h | 2 ++ - 5 files changed, 19 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 669d3d8b91..5fae55d6cd 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -37,6 +37,12 @@ - #include "hw/virtio/virtio.h" - #include "hw/virtio/virtio-pci.h" - -+/* -+ * RHEL only: machine types for previous major releases are deprecated -+ */ -+const char *rhel_old_machine_deprecation = -+ "machine types for previous major releases are deprecated"; -+ - /* - * Mostly the same as hw_compat_6_0 and hw_compat_6_1 - */ -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index fccb7f5fc9..cf68d7498c 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -989,6 +989,10 @@ static void pc_machine_rhel760_options(MachineClass *m) - m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; - m->async_pf_vmexit_disable = true; - m->smbus_no_migration_support = true; -+ -+ /* All RHEL machines for prior major releases are deprecated */ -+ m->deprecation_reason = rhel_old_machine_deprecation; -+ - pcmc->pvh_enabled = false; - pcmc->default_cpu_version = CPU_VERSION_LEGACY; - pcmc->kvmclock_create_always = false; -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index bf9ad32f0e..c8e06da084 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -674,6 +674,10 @@ static void pc_q35_machine_rhel860_options(MachineClass *m) - pc_q35_machine_rhel900_options(m); - m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; - m->alias = NULL; -+ -+ /* All RHEL machines for prior major releases are deprecated */ -+ m->deprecation_reason = rhel_old_machine_deprecation; -+ - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.6.0"; - } -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 9da6e9b1d4..cc78a315e3 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1121,6 +1121,9 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) - static void ccw_machine_rhel860_class_options(MachineClass *mc) - { - ccw_machine_rhel900_class_options(mc); -+ -+ /* All RHEL machines for prior major releases are deprecated */ -+ mc->deprecation_reason = rhel_old_machine_deprecation; - } - DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", false); - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 3c3d2ad450..21d8d5528e 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -464,4 +464,6 @@ extern const size_t hw_compat_rhel_8_0_len; - extern GlobalProperty hw_compat_rhel_7_6[]; - extern const size_t hw_compat_rhel_7_6_len; - -+extern const char *rhel_old_machine_deprecation; -+ - #endif --- -2.31.1 - diff --git a/kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch b/kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch deleted file mode 100644 index d9d5145..0000000 --- a/kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 213d2c6d3138f3570bca36edaacfd1ee86b18967 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Fri, 4 Feb 2022 06:45:51 +0100 -Subject: [PATCH 1/6] Revert "ui/clipboard: Don't use g_autoptr just to free a - variable" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -RH-MergeRequest: 75: fix vnc cut+paste crash -RH-Commit: [1/4] 0937d15054ad6e902bc22d1872231504f442ddcc (kraxel/centos-qemu-kvm) -RH-Bugzilla: 2042820 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Miroslav Rezanina - -This reverts commit 8df1ea81ee6c674522967d056daa8d3748fa3883. ---- - ui/clipboard.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/ui/clipboard.c b/ui/clipboard.c -index d53576b0f6..d7b008d62a 100644 ---- a/ui/clipboard.c -+++ b/ui/clipboard.c -@@ -44,14 +44,13 @@ void qemu_clipboard_peer_release(QemuClipboardPeer *peer, - - void qemu_clipboard_update(QemuClipboardInfo *info) - { -- QemuClipboardInfo *old = NULL; -+ g_autoptr(QemuClipboardInfo) old = NULL; - assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT); - - notifier_list_notify(&clipboard_notifiers, info); - - old = cbinfo[info->selection]; - cbinfo[info->selection] = qemu_clipboard_info_ref(info); -- g_free(old); - } - - QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection) --- -2.27.0 - diff --git a/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch b/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch deleted file mode 100644 index 6fc7f38..0000000 --- a/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 7b973b9cb7b890eaf9a31c99f5c272b513322ac1 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 3 Feb 2022 15:05:33 +0100 -Subject: [PATCH 1/8] block: Lock AioContext for drain_end in blockdev-reopen - -RH-Author: Kevin Wolf -RH-MergeRequest: 73: block: Lock AioContext for drain_end in blockdev-reopen -RH-Commit: [1/2] db25e999152b0e4f09decade1ac76b9f56cd9706 (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 2046659 -RH-Acked-by: Sergio Lopez -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Reitz - -bdrv_subtree_drained_end() requires the caller to hold the AioContext -lock for the drained node. Not doing this for nodes outside of the main -AioContext leads to crashes when AIO_WAIT_WHILE() needs to wait and -tries to temporarily release the lock. - -Fixes: 3908b7a8994fa5ef7a89aa58cd5a02fc58141592 -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2046659 -Reported-by: Qing Wang -Signed-off-by: Kevin Wolf -Message-Id: <20220203140534.36522-2-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit aba8205be0707b9d108e32254e186ba88107a869) -Signed-off-by: Kevin Wolf ---- - blockdev.c | 11 ++++++++++- - 1 file changed, 10 insertions(+), 1 deletion(-) - -diff --git a/blockdev.c b/blockdev.c -index b35072644e..565f6a81fd 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3562,6 +3562,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - { - BlockReopenQueue *queue = NULL; - GSList *drained = NULL; -+ GSList *p; - - /* Add each one of the BDS that we want to reopen to the queue */ - for (; reopen_list != NULL; reopen_list = reopen_list->next) { -@@ -3611,7 +3612,15 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - - fail: - bdrv_reopen_queue_free(queue); -- g_slist_free_full(drained, (GDestroyNotify) bdrv_subtree_drained_end); -+ for (p = drained; p; p = p->next) { -+ BlockDriverState *bs = p->data; -+ AioContext *ctx = bdrv_get_aio_context(bs); -+ -+ aio_context_acquire(ctx); -+ bdrv_subtree_drained_end(bs); -+ aio_context_release(ctx); -+ } -+ g_slist_free(drained); - } - - void qmp_blockdev_del(const char *node_name, Error **errp) --- -2.27.0 - diff --git a/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch b/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch deleted file mode 100644 index 8dbf30f..0000000 --- a/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 87f3b10dc600ac12272ee6cdc67571910ea722f6 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 11 Jan 2022 15:36:12 +0000 -Subject: [PATCH 10/12] block-backend: prevent dangling BDS pointers across - aio_poll() - -RH-Author: Hanna Reitz -RH-MergeRequest: 71: block-backend: prevent dangling BDS pointers across aio_poll() -RH-Commit: [1/2] 1b4cab39bf8c933ab910293a29bfceaa9e821068 (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2040123 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Emanuele Giuseppe Esposito - -The BlockBackend root child can change when aio_poll() is invoked. This -happens when a temporary filter node is removed upon blockjob -completion, for example. - -Functions in block/block-backend.c must be aware of this when using a -blk_bs() pointer across aio_poll() because the BlockDriverState refcnt -may reach 0, resulting in a stale pointer. - -One example is scsi_device_purge_requests(), which calls blk_drain() to -wait for in-flight requests to cancel. If the backup blockjob is active, -then the BlockBackend root child is a temporary filter BDS owned by the -blockjob. The blockjob can complete during bdrv_drained_begin() and the -last reference to the BDS is released when the temporary filter node is -removed. This results in a use-after-free when blk_drain() calls -bdrv_drained_end(bs) on the dangling pointer. - -Explicitly hold a reference to bs across block APIs that invoke -aio_poll(). - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2021778 -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220111153613.25453-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 1e3552dbd28359d35967b7c28dc86cde1bc29205) -Signed-off-by: Hanna Reitz ---- - block/block-backend.c | 19 +++++++++++++++++-- - 1 file changed, 17 insertions(+), 2 deletions(-) - -diff --git a/block/block-backend.c b/block/block-backend.c -index 12ef80ea17..23e727199b 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -822,16 +822,22 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) - void blk_remove_bs(BlockBackend *blk) - { - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; -- BlockDriverState *bs; - BdrvChild *root; - - notifier_list_notify(&blk->remove_bs_notifiers, blk); - if (tgm->throttle_state) { -- bs = blk_bs(blk); -+ BlockDriverState *bs = blk_bs(blk); -+ -+ /* -+ * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for -+ * example, if a temporary filter node is removed by a blockjob. -+ */ -+ bdrv_ref(bs); - bdrv_drained_begin(bs); - throttle_group_detach_aio_context(tgm); - throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); - bdrv_drained_end(bs); -+ bdrv_unref(bs); - } - - blk_update_root_state(blk); -@@ -1705,6 +1711,7 @@ void blk_drain(BlockBackend *blk) - BlockDriverState *bs = blk_bs(blk); - - if (bs) { -+ bdrv_ref(bs); - bdrv_drained_begin(bs); - } - -@@ -1714,6 +1721,7 @@ void blk_drain(BlockBackend *blk) - - if (bs) { - bdrv_drained_end(bs); -+ bdrv_unref(bs); - } - } - -@@ -2044,10 +2052,13 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, - int ret; - - if (bs) { -+ bdrv_ref(bs); -+ - if (update_root_node) { - ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, - errp); - if (ret < 0) { -+ bdrv_unref(bs); - return ret; - } - } -@@ -2057,6 +2068,8 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, - throttle_group_attach_aio_context(tgm, new_context); - bdrv_drained_end(bs); - } -+ -+ bdrv_unref(bs); - } - - blk->ctx = new_context; -@@ -2326,11 +2339,13 @@ void blk_io_limits_disable(BlockBackend *blk) - ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - assert(tgm->throttle_state); - if (bs) { -+ bdrv_ref(bs); - bdrv_drained_begin(bs); - } - throttle_group_unregister_tgm(tgm); - if (bs) { - bdrv_drained_end(bs); -+ bdrv_unref(bs); - } - } - --- -2.27.0 - diff --git a/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch b/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch deleted file mode 100644 index 5fff268..0000000 --- a/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch +++ /dev/null @@ -1,56 +0,0 @@ -From a6b472de71f6ebbe44025e1348c90e6f1f2b2326 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Tue, 18 Jan 2022 17:59:59 +0100 -Subject: [PATCH 06/12] block/io: Update BSC only if want_zero is true - -RH-Author: Hanna Reitz -RH-MergeRequest: 69: block/io: Update BSC only if want_zero is true -RH-Commit: [1/2] ad19ff86c3420cafe5a9e785ee210e482fbc8cd7 (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2041461 -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf - -We update the block-status cache whenever we get new information from a -bdrv_co_block_status() call to the block driver. However, if we have -passed want_zero=false to that call, it may flag areas containing zeroes -as data, and so we would update the block-status cache with wrong -information. - -Therefore, we should not update the cache with want_zero=false. - -Reported-by: Nir Soffer -Fixes: 0bc329fbb00 ("block: block-status cache for data regions") -Reviewed-by: Nir Soffer -Cc: qemu-stable@nongnu.org -Signed-off-by: Hanna Reitz -Message-Id: <20220118170000.49423-2-hreitz@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Eric Blake -(cherry picked from commit 113b727ce788335cf76f65355d670c9bc130fd75) -Signed-off-by: Hanna Reitz ---- - block/io.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/block/io.c b/block/io.c -index bb0a254def..4e4cb556c5 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -2497,8 +2497,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, - * non-protocol nodes, and then it is never used. However, filling - * the cache requires an RCU update, so double check here to avoid - * such an update if possible. -+ * -+ * Check want_zero, because we only want to update the cache when we -+ * have accurate information about what is zero and what is data. - */ -- if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && -+ if (want_zero && -+ ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && - QLIST_EMPTY(&bs->children)) - { - /* --- -2.27.0 - diff --git a/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch b/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch deleted file mode 100644 index 24c5b8a..0000000 --- a/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 76b03619435d0b2f0125ee7aa5c94f2b889247de Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Fri, 4 Feb 2022 12:10:08 +0100 -Subject: [PATCH 4/8] block/nbd: Assert there are no timers when closed - -RH-Author: Hanna Reitz -RH-MergeRequest: 74: block/nbd: Handle AioContext changes -RH-Commit: [2/6] 56903457ca35d9c596aeb6827a48f80e8eabd66a (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2033626 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -Our two timers must not remain armed beyond nbd_clear_bdrvstate(), or -they will access freed data when they fire. - -This patch is separate from the patches that actually fix the issue -(HEAD^^ and HEAD^) so that you can run the associated regression iotest -(281) on a configuration that reproducibly exposes the bug. - -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Reitz -Signed-off-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 8a39c381e5e407d2fe5500324323f90a8540fa90) - -Conflict: -- block/nbd.c: open_timer was introduced after the 6.2 release (for - nbd's @open-timeout parameter), and has not been backported, so drop - the assertion that it is NULL - -Signed-off-by: Hanna Reitz ---- - block/nbd.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/block/nbd.c b/block/nbd.c -index b8e5a9b4cc..aab20125d8 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -108,6 +108,9 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs) - - yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); - -+ /* Must not leave timers behind that would access freed data */ -+ assert(!s->reconnect_delay_timer); -+ - object_unref(OBJECT(s->tlscreds)); - qapi_free_SocketAddress(s->saddr); - s->saddr = NULL; --- -2.27.0 - diff --git a/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch b/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch deleted file mode 100644 index 0cdf622..0000000 --- a/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch +++ /dev/null @@ -1,54 +0,0 @@ -From eeb4683ad8c40a03a4e91463ec1d1b651974b744 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Fri, 4 Feb 2022 12:10:06 +0100 -Subject: [PATCH 3/8] block/nbd: Delete reconnect delay timer when done - -RH-Author: Hanna Reitz -RH-MergeRequest: 74: block/nbd: Handle AioContext changes -RH-Commit: [1/6] 34f92910b6ffd256d781109a2b39737fc6ab449c (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2033626 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -We start the reconnect delay timer to cancel the reconnection attempt -after a while. Once nbd_co_do_establish_connection() has returned, this -attempt is over, and we no longer need the timer. - -Delete it before returning from nbd_reconnect_attempt(), so that it does -not persist beyond the I/O request that was paused for reconnecting; we -do not want it to fire in a drained section, because all sort of things -can happen in such a section (e.g. the AioContext might be changed, and -we do not want the timer to fire in the wrong context; or the BDS might -even be deleted, and so the timer CB would access already-freed data). - -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Reitz -Signed-off-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 3ce1fc16bad9c3f8b7b10b451a224d6d76e5c551) -Signed-off-by: Hanna Reitz ---- - block/nbd.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/block/nbd.c b/block/nbd.c -index 5ef462db1b..b8e5a9b4cc 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -353,6 +353,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) - } - - nbd_co_do_establish_connection(s->bs, NULL); -+ -+ /* -+ * The reconnect attempt is done (maybe successfully, maybe not), so -+ * we no longer need this timer. Delete it so it will not outlive -+ * this I/O request (so draining removes all timers). -+ */ -+ reconnect_delay_timer_del(s); - } - - static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) --- -2.27.0 - diff --git a/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch b/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch deleted file mode 100644 index 1cb29e9..0000000 --- a/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 6d9d86cc4e6149d4c0793e8ceb65dab7535a4561 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Fri, 4 Feb 2022 12:10:11 +0100 -Subject: [PATCH 7/8] block/nbd: Move s->ioc on AioContext change - -RH-Author: Hanna Reitz -RH-MergeRequest: 74: block/nbd: Handle AioContext changes -RH-Commit: [5/6] b3c1eb21ac70d64fdac6094468a72cfbe50a30a9 (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2033626 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -s->ioc must always be attached to the NBD node's AioContext. If that -context changes, s->ioc must be attached to the new context. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2033626 -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Reitz -Signed-off-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit e15f3a66c830e3fce99c9d56c493c2f7078a1225) - -Conflict: -- block/nbd.c: open_timer was added after the 6.2 release, so we need - not (and cannot) assert it is NULL here. - -Signed-off-by: Hanna Reitz ---- - block/nbd.c | 41 +++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 41 insertions(+) - -diff --git a/block/nbd.c b/block/nbd.c -index aab20125d8..a3896c7f5f 100644 ---- a/block/nbd.c -+++ b/block/nbd.c -@@ -2003,6 +2003,38 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) - nbd_co_establish_connection_cancel(s->conn); - } - -+static void nbd_attach_aio_context(BlockDriverState *bs, -+ AioContext *new_context) -+{ -+ BDRVNBDState *s = bs->opaque; -+ -+ /* -+ * The reconnect_delay_timer is scheduled in I/O paths when the -+ * connection is lost, to cancel the reconnection attempt after a -+ * given time. Once this attempt is done (successfully or not), -+ * nbd_reconnect_attempt() ensures the timer is deleted before the -+ * respective I/O request is resumed. -+ * Since the AioContext can only be changed when a node is drained, -+ * the reconnect_delay_timer cannot be active here. -+ */ -+ assert(!s->reconnect_delay_timer); -+ -+ if (s->ioc) { -+ qio_channel_attach_aio_context(s->ioc, new_context); -+ } -+} -+ -+static void nbd_detach_aio_context(BlockDriverState *bs) -+{ -+ BDRVNBDState *s = bs->opaque; -+ -+ assert(!s->reconnect_delay_timer); -+ -+ if (s->ioc) { -+ qio_channel_detach_aio_context(s->ioc); -+ } -+} -+ - static BlockDriver bdrv_nbd = { - .format_name = "nbd", - .protocol_name = "nbd", -@@ -2026,6 +2058,9 @@ static BlockDriver bdrv_nbd = { - .bdrv_dirname = nbd_dirname, - .strong_runtime_opts = nbd_strong_runtime_opts, - .bdrv_cancel_in_flight = nbd_cancel_in_flight, -+ -+ .bdrv_attach_aio_context = nbd_attach_aio_context, -+ .bdrv_detach_aio_context = nbd_detach_aio_context, - }; - - static BlockDriver bdrv_nbd_tcp = { -@@ -2051,6 +2086,9 @@ static BlockDriver bdrv_nbd_tcp = { - .bdrv_dirname = nbd_dirname, - .strong_runtime_opts = nbd_strong_runtime_opts, - .bdrv_cancel_in_flight = nbd_cancel_in_flight, -+ -+ .bdrv_attach_aio_context = nbd_attach_aio_context, -+ .bdrv_detach_aio_context = nbd_detach_aio_context, - }; - - static BlockDriver bdrv_nbd_unix = { -@@ -2076,6 +2114,9 @@ static BlockDriver bdrv_nbd_unix = { - .bdrv_dirname = nbd_dirname, - .strong_runtime_opts = nbd_strong_runtime_opts, - .bdrv_cancel_in_flight = nbd_cancel_in_flight, -+ -+ .bdrv_attach_aio_context = nbd_attach_aio_context, -+ .bdrv_detach_aio_context = nbd_detach_aio_context, - }; - - static void bdrv_nbd_init(void) --- -2.27.0 - diff --git a/kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch b/kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch deleted file mode 100644 index bdebdc3..0000000 --- a/kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 6989be9d0aa08470f8b287c243dc4bf027d5fbcf Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 8 Dec 2021 15:22:46 +0000 -Subject: [PATCH 1/2] block/nvme: fix infinite loop in nvme_free_req_queue_cb() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 58: block/nvme: fix infinite loop in nvme_free_req_queue_cb() -RH-Commit: [1/1] 544b3f310d791a20c63b51947de0c6cbb60b0d5b (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2024544 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Hanna Reitz - -When the request free list is exhausted the coroutine waits on -q->free_req_queue for the next free request. Whenever a request is -completed a BH is scheduled to invoke nvme_free_req_queue_cb() and wake -up waiting coroutines. - -1. nvme_get_free_req() waits for a free request: - - while (q->free_req_head == -1) { - ... - trace_nvme_free_req_queue_wait(q->s, q->index); - qemu_co_queue_wait(&q->free_req_queue, &q->lock); - ... - } - -2. nvme_free_req_queue_cb() wakes up the coroutine: - - while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) { - ^--- infinite loop when free_req_head == -1 - } - -nvme_free_req_queue_cb() and the coroutine form an infinite loop when -q->free_req_head == -1. Fix this by checking q->free_req_head in -nvme_free_req_queue_cb(). If the free request list is exhausted, don't -wake waiting coroutines. Eventually an in-flight request will complete -and the BH will be scheduled again, guaranteeing forward progress. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Philippe Mathieu-Daudé -Message-id: 20211208152246.244585-1-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit cf4fbc3030c974fff726756a7ceef8386cdf500b) -Signed-off-by: Stefan Hajnoczi ---- - block/nvme.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/nvme.c b/block/nvme.c -index e4f336d79c..fa360b9b3c 100644 ---- a/block/nvme.c -+++ b/block/nvme.c -@@ -206,8 +206,9 @@ static void nvme_free_req_queue_cb(void *opaque) - NVMeQueuePair *q = opaque; - - qemu_mutex_lock(&q->lock); -- while (qemu_co_enter_next(&q->free_req_queue, &q->lock)) { -- /* Retry all pending requests */ -+ while (q->free_req_head != -1 && -+ qemu_co_enter_next(&q->free_req_queue, &q->lock)) { -+ /* Retry waiting requests */ - } - qemu_mutex_unlock(&q->lock); - } --- -2.27.0 - diff --git a/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch b/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch deleted file mode 100644 index 39aa96c..0000000 --- a/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch +++ /dev/null @@ -1,59 +0,0 @@ -From d374d5aa4485a0c62d6b48eec64491cae2fd0873 Mon Sep 17 00:00:00 2001 -From: Peter Lieven -Date: Thu, 13 Jan 2022 15:44:25 +0100 -Subject: [PATCH 4/5] block/rbd: fix handling of holes in .bdrv_co_block_status - -RH-Author: Stefano Garzarella -RH-MergeRequest: 68: block/rbd: fix handling of holes in .bdrv_co_block_status -RH-Commit: [1/2] 8ef178b01885e3c292f7844ccff865b1a8d4faf0 (sgarzarella/qemu-kvm-c-9-s) -RH-Bugzilla: 2034791 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Reitz - -the assumption that we can't hit a hole if we do not diff against a snapshot was wrong. - -We can see a hole in an image if we diff against base if there exists an older snapshot -of the image and we have discarded blocks in the image where the snapshot has data. - -Fix this by simply handling a hole like an unallocated area. There are no callbacks -for unallocated areas so just bail out if we hit a hole. - -Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b -Suggested-by: Ilya Dryomov -Cc: qemu-stable@nongnu.org -Signed-off-by: Peter Lieven -Message-Id: <20220113144426.4036493-2-pl@kamp.de> -Reviewed-by: Ilya Dryomov -Reviewed-by: Stefano Garzarella -Signed-off-by: Kevin Wolf -(cherry picked from commit 9e302f64bb407a9bb097b626da97228c2654cfee) -Signed-off-by: Stefano Garzarella ---- - block/rbd.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/block/rbd.c b/block/rbd.c -index def96292e0..20bb896c4a 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, - RBDDiffIterateReq *req = opaque; - - assert(req->offs + req->bytes <= offs); -- /* -- * we do not diff against a snapshot so we should never receive a callback -- * for a hole. -- */ -- assert(exists); -+ -+ /* treat a hole like an unallocated area and bail out */ -+ if (!exists) { -+ return 0; -+ } - - if (!req->exists && offs > req->offs) { - /* --- -2.27.0 - diff --git a/kvm-block-rbd-workaround-for-ceph-issue-53784.patch b/kvm-block-rbd-workaround-for-ceph-issue-53784.patch deleted file mode 100644 index dd3876e..0000000 --- a/kvm-block-rbd-workaround-for-ceph-issue-53784.patch +++ /dev/null @@ -1,103 +0,0 @@ -From f035b5250529eed8d12e0b93b1b6d6f2c50003f6 Mon Sep 17 00:00:00 2001 -From: Peter Lieven -Date: Thu, 13 Jan 2022 15:44:26 +0100 -Subject: [PATCH 5/5] block/rbd: workaround for ceph issue #53784 - -RH-Author: Stefano Garzarella -RH-MergeRequest: 68: block/rbd: fix handling of holes in .bdrv_co_block_status -RH-Commit: [2/2] 5feaa2e20a77886cc1a84cdf212ade3dcda28289 (sgarzarella/qemu-kvm-c-9-s) -RH-Bugzilla: 2034791 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Reitz - -librbd had a bug until early 2022 that affected all versions of ceph that -supported fast-diff. This bug results in reporting of incorrect offsets -if the offset parameter to rbd_diff_iterate2 is not object aligned. - -This patch works around this bug for pre Quincy versions of librbd. - -Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b -Cc: qemu-stable@nongnu.org -Signed-off-by: Peter Lieven -Message-Id: <20220113144426.4036493-3-pl@kamp.de> -Reviewed-by: Ilya Dryomov -Reviewed-by: Stefano Garzarella -Tested-by: Stefano Garzarella -Signed-off-by: Kevin Wolf -(cherry picked from commit fc176116cdea816ceb8dd969080b2b95f58edbc0) -Signed-off-by: Stefano Garzarella ---- - block/rbd.c | 42 ++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 40 insertions(+), 2 deletions(-) - -diff --git a/block/rbd.c b/block/rbd.c -index 20bb896c4a..8f183eba2a 100644 ---- a/block/rbd.c -+++ b/block/rbd.c -@@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, - int status, r; - RBDDiffIterateReq req = { .offs = offset }; - uint64_t features, flags; -+ uint64_t head = 0; - - assert(offset + bytes <= s->image_size); - -@@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, - return status; - } - -- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, -+#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0) -+ /* -+ * librbd had a bug until early 2022 that affected all versions of ceph that -+ * supported fast-diff. This bug results in reporting of incorrect offsets -+ * if the offset parameter to rbd_diff_iterate2 is not object aligned. -+ * Work around this bug by rounding down the offset to object boundaries. -+ * This is OK because we call rbd_diff_iterate2 with whole_object = true. -+ * However, this workaround only works for non cloned images with default -+ * striping. -+ * -+ * See: https://tracker.ceph.com/issues/53784 -+ */ -+ -+ /* check if RBD image has non-default striping enabled */ -+ if (features & RBD_FEATURE_STRIPINGV2) { -+ return status; -+ } -+ -+#pragma GCC diagnostic push -+#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -+ /* -+ * check if RBD image is a clone (= has a parent). -+ * -+ * rbd_get_parent_info is deprecated from Nautilus onwards, but the -+ * replacement rbd_get_parent is not present in Luminous and Mimic. -+ */ -+ if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) { -+ return status; -+ } -+#pragma GCC diagnostic pop -+ -+ head = req.offs & (s->object_size - 1); -+ req.offs -= head; -+ bytes += head; -+#endif -+ -+ r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true, - qemu_rbd_diff_iterate_cb, &req); - if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { - return status; -@@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, - status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; - } - -- *pnum = req.bytes; -+ assert(req.bytes > head); -+ *pnum = req.bytes - head; - return status; - } - --- -2.27.0 - diff --git a/kvm-doc-Add-the-SGX-numa-description.patch b/kvm-doc-Add-the-SGX-numa-description.patch deleted file mode 100644 index 8eac5fa..0000000 --- a/kvm-doc-Add-the-SGX-numa-description.patch +++ /dev/null @@ -1,77 +0,0 @@ -From eb88a12ab1ecfe77bcc0d0067c96fce27a3bde01 Mon Sep 17 00:00:00 2001 -From: Yang Zhong -Date: Mon, 1 Nov 2021 12:20:08 -0400 -Subject: [PATCH 03/12] doc: Add the SGX numa description - -RH-Author: Paul Lai -RH-MergeRequest: 65: Enable SGX and add SGX Numa support -RH-Commit: [3/5] c27b3f6976cbe92cc3c0e1dab0191cdd25de596a -RH-Bugzilla: 2033708 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Bandan Das -RH-Acked-by: Cornelia Huck - -Add the SGX numa reference command and how to check if -SGX numa is support or not with multiple EPC sections. - -Signed-off-by: Yang Zhong -Message-Id: <20211101162009.62161-5-yang.zhong@intel.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit d1889b36098c79e2e6ac90faf3d0dc5ec0057677) -Signed-off-by: Paul Lai ---- - docs/system/i386/sgx.rst | 31 +++++++++++++++++++++++++++---- - 1 file changed, 27 insertions(+), 4 deletions(-) - -diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst -index f8fade5ac2..0f0a73f758 100644 ---- a/docs/system/i386/sgx.rst -+++ b/docs/system/i386/sgx.rst -@@ -141,8 +141,7 @@ To launch a SGX guest: - |qemu_system_x86| \\ - -cpu host,+sgx-provisionkey \\ - -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\ -- -object memory-backend-epc,id=mem2,size=28M \\ -- -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 -+ -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0 - - Utilizing SGX in the guest requires a kernel/OS with SGX support. - The support can be determined in guest by:: -@@ -152,8 +151,32 @@ The support can be determined in guest by:: - and SGX epc info by:: - - $ dmesg | grep sgx -- [ 1.242142] sgx: EPC section 0x180000000-0x181bfffff -- [ 1.242319] sgx: EPC section 0x181c00000-0x1837fffff -+ [ 0.182807] sgx: EPC section 0x140000000-0x143ffffff -+ [ 0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. -+ -+To launch a SGX numa guest: -+ -+.. parsed-literal:: -+ -+ |qemu_system_x86| \\ -+ -cpu host,+sgx-provisionkey \\ -+ -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\ -+ -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\ -+ -numa node,nodeid=0,cpus=0-1,memdev=node0 \\ -+ -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\ -+ -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\ -+ -numa node,nodeid=1,cpus=2-3,memdev=node1 \\ -+ -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 -+ -+and SGX epc numa info by:: -+ -+ $ dmesg | grep sgx -+ [ 0.369937] sgx: EPC section 0x180000000-0x183ffffff -+ [ 0.370259] sgx: EPC section 0x184000000-0x185bfffff -+ -+ $ dmesg | grep SRAT -+ [ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] -+ [ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] - - References - ---------- --- -2.27.0 - diff --git a/kvm-hw-arm-smmuv3-Fix-device-reset.patch b/kvm-hw-arm-smmuv3-Fix-device-reset.patch deleted file mode 100644 index 3b8f307..0000000 --- a/kvm-hw-arm-smmuv3-Fix-device-reset.patch +++ /dev/null @@ -1,61 +0,0 @@ -From c08c3fbb2bb8494738fd34ec8fc9dc434ce82f4b Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 2 Feb 2022 12:16:02 +0100 -Subject: [PATCH 12/12] hw/arm/smmuv3: Fix device reset - -RH-Author: Eric Auger -RH-MergeRequest: 72: hw/arm/smmuv3: Fix device reset -RH-Commit: [1/1] 2cfee2f7a03692681224fed96bb4f28406bf460a (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2042481 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Andrew Jones - -branch: c9s -Brew: 42958737 -Upstream: yes - -We currently miss a bunch of register resets in the device reset -function. This sometimes prevents the guest from rebooting after -a system_reset (with virtio-blk-pci). For instance, we may get -the following errors: - -invalid STE -smmuv3-iommu-memory-region-0-0 translation failed for iova=0x13a9d2000(SMMU_EVT_C_BAD_STE) -Invalid read at addr 0x13A9D2000, size 2, region '(null)', reason: rejected -invalid STE -smmuv3-iommu-memory-region-0-0 translation failed for iova=0x13a9d2000(SMMU_EVT_C_BAD_STE) -Invalid write at addr 0x13A9D2000, size 2, region '(null)', reason: rejected -invalid STE - -Signed-off-by: Eric Auger -Message-id: 20220202111602.627429-1-eric.auger@redhat.com -Fixes: 10a83cb988 ("hw/arm/smmuv3: Skeleton") -Reviewed-by: Peter Maydell -Signed-off-by: Peter Maydell -(cherry picked from commit 43530095e18fd16dcd51a4b385ad2a22c36f5698) -Signed-off-by: Eric Auger ---- - hw/arm/smmuv3.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 01b60bee49..1b5640bb98 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -276,6 +276,12 @@ static void smmuv3_init_regs(SMMUv3State *s) - s->features = 0; - s->sid_split = 0; - s->aidr = 0x1; -+ s->cr[0] = 0; -+ s->cr0ack = 0; -+ s->irq_ctrl = 0; -+ s->gerror = 0; -+ s->gerrorn = 0; -+ s->statusr = 0; - } - - static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch b/kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch deleted file mode 100644 index 390a91c..0000000 --- a/kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 6b0e129f0758ccd076d1ecbf85c8f1e863788981 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 20 Dec 2021 10:11:47 +0100 -Subject: [PATCH 5/6] hw/arm/virt: Add 9.0 machine type and remove 8.5 one - -RH-Author: Eric Auger -RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one -RH-Commit: [5/6] f573a2fb44882a010e2c6bf5f561f29d54e6e9b5 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2031044 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Miroslav Rezanina - -branch: c9s -Brew: 42213566 -Upstream: no - -Add 9.0 machine type and remove 8.5 one. - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 10 ++-------- - 1 file changed, 2 insertions(+), 8 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d433139479..d537706a86 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3236,14 +3236,8 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - --static void rhel850_virt_options(MachineClass *mc) -+static void rhel900_virt_options(MachineClass *mc) - { -- VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); -- - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -- compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); -- mc->smp_props.prefer_sockets = true; -- vmc->no_cpu_topology = true; -- vmc->no_tcg_its = true; - } --DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch b/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch deleted file mode 100644 index 0e4acf4..0000000 --- a/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 4098f7b5aea8871a655bab43d5114d067662e6c5 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 5 Jan 2022 16:17:10 +0100 -Subject: [PATCH 6/6] hw/arm/virt: Check no_tcg_its and minor style changes - -RH-Author: Eric Auger -RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one -RH-Commit: [6/6] 4480d569463fd3f637404539d3bd06b59cafbc88 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2031044 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Miroslav Rezanina - -branch: c9s -Brew: 42213566 -Upstream: no - -Truly allow TCG ITS instantiation according to the no_tcg_its -class flag. Otherwise it is always set to false. - -We also take benefit of this patch to do some minor non -functional style changes to be closer to the upstream code. - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 16 ++++++++++++---- - 1 file changed, 12 insertions(+), 4 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d537706a86..0e691cbe81 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3163,6 +3163,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "in ACPI table header." - "The string may be up to 6 bytes in size"); - -+ - object_class_property_add_str(oc, "x-oem-table-id", - virt_get_oem_table_id, - virt_set_oem_table_id); -@@ -3170,6 +3171,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Override the default value of field OEM Table ID " - "in ACPI table header." - "The string may be up to 8 bytes in size"); -+ - } - - static void rhel_virt_instance_init(Object *obj) -@@ -3194,26 +3196,32 @@ static void rhel_virt_instance_init(Object *obj) - } else { - /* Default allows ITS instantiation */ - vms->its = true; -+ -+ if (vmc->no_tcg_its) { -+ vms->tcg_its = false; -+ } else { -+ vms->tcg_its = true; -+ } - } - - /* Default disallows iommu instantiation */ - vms->iommu = VIRT_IOMMU_NONE; - -+ /* The default root bus is attached to iommu by default */ -+ vms->default_bus_bypass_iommu = false; -+ - /* Default disallows RAS instantiation and is non-configurable for RHEL */ - vms->ras = false; - - /* MTE is disabled by default and non-configurable for RHEL */ - vms->mte = false; - -- /* The default root bus is attached to iommu by default */ -- vms->default_bus_bypass_iommu = false; -- - vms->irqmap = a15irqmap; - - virt_flash_create(vms); -+ - vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); - vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); -- - } - - static const TypeInfo rhel_machine_info = { --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Expose-the-RAS-option.patch b/kvm-hw-arm-virt-Expose-the-RAS-option.patch deleted file mode 100644 index e0365c9..0000000 --- a/kvm-hw-arm-virt-Expose-the-RAS-option.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 914d9f9eea5d0a944aa93682b03d3189ad37ec9b Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 20 Dec 2021 15:34:22 +0100 -Subject: [PATCH 4/6] hw/arm/virt: Expose the 'RAS' option - -RH-Author: Eric Auger -RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one -RH-Commit: [4/6] c8704564d31b23a0f08a6ced946c9a81e2e72c11 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2031044 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Miroslav Rezanina - -branch: c9s -Brew: 42213566 -Upstream: no - -In RHEL9.0 we want to expose the 'RAS' option. - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index c99ca93e75..d433139479 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2348,7 +2348,6 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, - visit_type_OnOffAuto(v, name, &vms->acpi, errp); - } - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_ras(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2363,6 +2362,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) - vms->ras = value; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_mte(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -3143,6 +3143,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable " - "bypass_iommu for default root bus"); - -+ object_class_property_add_bool(oc, "ras", virt_get_ras, -+ virt_set_ras); -+ object_class_property_set_description(oc, "ras", -+ "Set on/off to enable/disable reporting host memory errors " -+ "to a KVM guest using ACPI and guest external abort exceptions"); -+ - object_class_property_add_bool(oc, "its", virt_get_its, - virt_set_its); - object_class_property_set_description(oc, "its", --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch b/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch deleted file mode 100644 index 6c21c9a..0000000 --- a/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 545076d67ef27203e08538123d8bc3798caf9505 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 20 Dec 2021 15:50:44 +0100 -Subject: [PATCH 1/6] hw/arm/virt: Register "iommu" as a class property - -RH-Author: Eric Auger -RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one -RH-Commit: [1/6] 2b6a22ebddd2da7505961ff4ffe90424f7489300 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2031044 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Miroslav Rezanina - -branch: c9s -Brew: 42213566 -Upstream: no - -Register the "iommu" option as a class property. This mirrors what -was done in upstream commit b91def7b ("arm/virt: Register -most properties as class properties"). - -While we are at it we also move the "x-oem-id" and "x-oem-table-id" -registrations at the very end of the rhel_machine_class_init() -function. This makes our life easier when comparing with upstream. - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 23 ++++++++++++----------- - 1 file changed, 12 insertions(+), 11 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6ba9a2c2e1..7e227b1fa4 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3131,6 +3131,18 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Set GIC version. " - "Valid values are 2, 3, host and max"); - -+ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); -+ object_class_property_set_description(oc, "iommu", -+ "Set the IOMMU type. " -+ "Valid values are none and smmuv3"); -+ -+ object_class_property_add_bool(oc, "default_bus_bypass_iommu", -+ virt_get_default_bus_bypass_iommu, -+ virt_set_default_bus_bypass_iommu); -+ object_class_property_set_description(oc, "default_bus_bypass_iommu", -+ "Set on/off to enable/disable " -+ "bypass_iommu for default root bus"); -+ - object_class_property_add_str(oc, "x-oem-id", - virt_get_oem_id, - virt_set_oem_id); -@@ -3146,13 +3158,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Override the default value of field OEM Table ID " - "in ACPI table header." - "The string may be up to 8 bytes in size"); -- object_class_property_add_bool(oc, "default_bus_bypass_iommu", -- virt_get_default_bus_bypass_iommu, -- virt_set_default_bus_bypass_iommu); -- object_class_property_set_description(oc, "default_bus_bypass_iommu", -- "Set on/off to enable/disable " -- "bypass_iommu for default root bus"); -- - } - - static void rhel_virt_instance_init(Object *obj) -@@ -3186,10 +3191,6 @@ static void rhel_virt_instance_init(Object *obj) - - /* Default disallows iommu instantiation */ - vms->iommu = VIRT_IOMMU_NONE; -- object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); -- object_property_set_description(obj, "iommu", -- "Set the IOMMU type. " -- "Valid values are none and smmuv3"); - - /* Default disallows RAS instantiation and is non-configurable for RHEL */ - vms->ras = false; --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Register-its-as-a-class-property.patch b/kvm-hw-arm-virt-Register-its-as-a-class-property.patch deleted file mode 100644 index 95f58a4..0000000 --- a/kvm-hw-arm-virt-Register-its-as-a-class-property.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 789933e2598f9a525c2a638feca974ca1730a859 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 20 Dec 2021 16:04:59 +0100 -Subject: [PATCH 2/6] hw/arm/virt: Register "its" as a class property - -RH-Author: Eric Auger -RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one -RH-Commit: [2/6] dbd3e994553f00cd19842824f6bd763863a4e484 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2031044 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Miroslav Rezanina - -branch: c9s -Brew: 42213566 -Upstream: no - -Register "its" as a class property. This mirrors what was done -in commit 27edeeaafe43 ("virt: Register "its" as class property"). - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 7e227b1fa4..984151b7dd 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3143,6 +3143,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable " - "bypass_iommu for default root bus"); - -+ object_class_property_add_bool(oc, "its", virt_get_its, -+ virt_set_its); -+ object_class_property_set_description(oc, "its", -+ "Set on/off to enable/disable " -+ "ITS instantiation"); -+ - object_class_property_add_str(oc, "x-oem-id", - virt_get_oem_id, - virt_set_oem_id); -@@ -3182,11 +3188,6 @@ static void rhel_virt_instance_init(Object *obj) - } else { - /* Default allows ITS instantiation */ - vms->its = true; -- object_property_add_bool(obj, "its", virt_get_its, -- virt_set_its); -- object_property_set_description(obj, "its", -- "Set on/off to enable/disable " -- "ITS instantiation"); - } - - /* Default disallows iommu instantiation */ --- -2.27.0 - diff --git a/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch b/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch deleted file mode 100644 index eab69d2..0000000 --- a/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 94987c271c3bdc37216c5baa4c5766b9b7f053a1 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 20 Dec 2021 15:58:38 +0100 -Subject: [PATCH 3/6] hw/arm/virt: Rename default_bus_bypass_iommu - -RH-Author: Eric Auger -RH-MergeRequest: 57: hw/arm/virt: Add 9.0 machine type and remove 8.5 one -RH-Commit: [3/6] 6b66844ae4629d8c879f7c6abbc5e5017a162e16 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2031044 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Miroslav Rezanina - -branch: c9s -Brew: 42213566 -Upstream: no - -Rename "default_bus_bypass_iommu" into "default-bus-bypass-iommu". -This mirrors what was done in upstream commit: -9dad363a223 ("hw/arm/virt: Rename default_bus_bypass_iommu") - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 984151b7dd..c99ca93e75 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3136,10 +3136,10 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Set the IOMMU type. " - "Valid values are none and smmuv3"); - -- object_class_property_add_bool(oc, "default_bus_bypass_iommu", -+ object_class_property_add_bool(oc, "default-bus-bypass-iommu", - virt_get_default_bus_bypass_iommu, - virt_set_default_bus_bypass_iommu); -- object_class_property_set_description(oc, "default_bus_bypass_iommu", -+ object_class_property_set_description(oc, "default-bus-bypass-iommu", - "Set on/off to enable/disable " - "bypass_iommu for default root bus"); - --- -2.27.0 - diff --git a/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch b/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch deleted file mode 100644 index 767991d..0000000 --- a/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch +++ /dev/null @@ -1,60 +0,0 @@ -From f62b9eb18b0cc7ceb5a842aa0db43dae9a568647 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 11 Feb 2022 18:02:59 +0100 -Subject: [PATCH 2/5] hw/virtio: vdpa: Fix leak of host-notifier memory-region - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 78: Synchronize with RHEL 9.0.0 build qemu-kvm-6.2.0-11.el9_0.1 -RH-Commit: [2/5] 38cb408826a6925fc7c482a03e4364c6f918396e (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 2062828 2062819 2062817 2062813 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cornelia Huck - -BZ: https://bugzilla.redhat.com/2059786 -BRANCH: rhel-9.0.0 -BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=43688388 -UPTREAM: Merged - -If call virtio_queue_set_host_notifier_mr fails, should free -host-notifier memory-region. - -This problem can trigger a coredump with some vDPA drivers (mlx5, -but not with the vdpasim), if we unplug the virtio-net card from -the guest after a stop/start. - -The same fix has been done for vhost-user: - 1f89d3b91e3e ("hw/virtio: Fix leak of host-notifier memory-region") - -Fixes: d0416d487bd5 ("vhost-vdpa: map virtqueue notification area if possible") -Cc: jasowang@redhat.com -Resolves: https://bugzilla.redhat.com/2027208 -Signed-off-by: Laurent Vivier -Message-Id: <20220211170259.1388734-1-lvivier@redhat.com> -Cc: qemu-stable@nongnu.org -Acked-by: Jason Wang -Reviewed-by: Stefano Garzarella -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83) -Signed-off-by: Laurent Vivier - -Forward-port of RHEL 9.0.0 MR 123 (hw/virtio: vdpa: Fix leak of host-notifier memory-region) ---- - hw/virtio/vhost-vdpa.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index bcaf00e09f..78da48a333 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -415,6 +415,7 @@ static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) - g_free(name); - - if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { -+ object_unparent(OBJECT(&n->mr)); - munmap(addr, page_size); - goto err; - } --- -2.31.1 - diff --git a/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch b/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch deleted file mode 100644 index 20bc3a5..0000000 --- a/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 06583ce33fab2976157461ac4503d6f8eeb59e75 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Fri, 4 Feb 2022 12:10:12 +0100 -Subject: [PATCH 8/8] iotests/281: Let NBD connection yield in iothread - -RH-Author: Hanna Reitz -RH-MergeRequest: 74: block/nbd: Handle AioContext changes -RH-Commit: [6/6] 632b9ef5177a80d1c0c00121e1acc37272076d3e (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2033626 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -Put an NBD block device into an I/O thread, and then read data from it, -hoping that the NBD connection will yield during that read. When it -does, the coroutine must be reentered in the block device's I/O thread, -which will only happen if the NBD block driver attaches the connection's -QIOChannel to the new AioContext. It did not do that after 4ddb5d2fde -("block/nbd: drop connection_co") and prior to "block/nbd: Move s->ioc -on AioContext change", which would cause an assertion failure. - -To improve our chances of yielding, the NBD server is throttled to -reading 64 kB/s, and the NBD client reads 128 kB, so it should yield at -some point. - -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Reitz -Signed-off-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 8cfbe929e8c26050f0a4580a1606a370a947d4ce) -Signed-off-by: Hanna Reitz ---- - tests/qemu-iotests/281 | 28 +++++++++++++++++++++++++--- - tests/qemu-iotests/281.out | 4 ++-- - 2 files changed, 27 insertions(+), 5 deletions(-) - -diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 -index 13c588be75..b2ead7f388 100755 ---- a/tests/qemu-iotests/281 -+++ b/tests/qemu-iotests/281 -@@ -253,8 +253,9 @@ class TestYieldingAndTimers(iotests.QMPTestCase): - self.create_nbd_export() - - # Simple VM with an NBD block device connected to the NBD export -- # provided by the QSD -+ # provided by the QSD, and an (initially unused) iothread - self.vm = iotests.VM() -+ self.vm.add_object('iothread,id=iothr') - self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + - f'server.path={self.sock},export=exp,' + - 'reconnect-delay=1') -@@ -293,19 +294,40 @@ class TestYieldingAndTimers(iotests.QMPTestCase): - # thus not see the error, and so the test will pass.) - time.sleep(2) - -+ def test_yield_in_iothread(self): -+ # Move the NBD node to the I/O thread; the NBD block driver should -+ # attach the connection's QIOChannel to that thread's AioContext, too -+ result = self.vm.qmp('x-blockdev-set-iothread', -+ node_name='nbd', iothread='iothr') -+ self.assert_qmp(result, 'return', {}) -+ -+ # Do some I/O that will be throttled by the QSD, so that the network -+ # connection hopefully will yield here. When it is resumed, it must -+ # then be resumed in the I/O thread's AioContext. -+ result = self.vm.qmp('human-monitor-command', -+ command_line='qemu-io nbd "read 0 128K"') -+ self.assert_qmp(result, 'return', '') -+ - def create_nbd_export(self): - assert self.qsd is None - -- # Simple NBD export of a null-co BDS -+ # Export a throttled null-co BDS: Reads are throttled (max 64 kB/s), -+ # writes are not. - self.qsd = QemuStorageDaemon( -+ '--object', -+ 'throttle-group,id=thrgr,x-bps-read=65536,x-bps-read-max=65536', -+ - '--blockdev', - 'null-co,node-name=null,read-zeroes=true', - -+ '--blockdev', -+ 'throttle,node-name=thr,file=null,throttle-group=thrgr', -+ - '--nbd-server', - f'addr.type=unix,addr.path={self.sock}', - - '--export', -- 'nbd,id=exp,node-name=null,name=exp,writable=true' -+ 'nbd,id=exp,node-name=thr,name=exp,writable=true' - ) - - def stop_nbd_export(self): -diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out -index 914e3737bd..3f8a935a08 100644 ---- a/tests/qemu-iotests/281.out -+++ b/tests/qemu-iotests/281.out -@@ -1,5 +1,5 @@ --..... -+...... - ---------------------------------------------------------------------- --Ran 5 tests -+Ran 6 tests - - OK --- -2.27.0 - diff --git a/kvm-iotests-281-Test-lingering-timers.patch b/kvm-iotests-281-Test-lingering-timers.patch deleted file mode 100644 index 7175a31..0000000 --- a/kvm-iotests-281-Test-lingering-timers.patch +++ /dev/null @@ -1,174 +0,0 @@ -From 3d2d7a46713d362d2ff5137841e689593da976a3 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Fri, 4 Feb 2022 12:10:10 +0100 -Subject: [PATCH 6/8] iotests/281: Test lingering timers - -RH-Author: Hanna Reitz -RH-MergeRequest: 74: block/nbd: Handle AioContext changes -RH-Commit: [4/6] d228ba3fcdfaab2d54dd5b023688a1c055cce2c2 (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2033626 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -Prior to "block/nbd: Delete reconnect delay timer when done" and -"block/nbd: Delete open timer when done", both of those timers would -remain scheduled even after successfully (re-)connecting to the server, -and they would not even be deleted when the BDS is deleted. - -This test constructs exactly this situation: -(1) Configure an @open-timeout, so the open timer is armed, and -(2) Configure a @reconnect-delay and trigger a reconnect situation - (which succeeds immediately), so the reconnect delay timer is armed. -Then we immediately delete the BDS, and sleep for longer than the -@open-timeout and @reconnect-delay. Prior to said patches, this caused -one (or both) of the timer CBs to access already-freed data. - -Accessing freed data may or may not crash, so this test can produce -false successes, but I do not know how to show the problem in a better -or more reliable way. If you run this test on "block/nbd: Assert there -are no timers when closed" and without the fix patches mentioned above, -you should reliably see an assertion failure. -(But all other tests that use the reconnect delay timer (264 and 277) -will fail in that configuration, too; as will nbd-reconnect-on-open, -which uses the open timer.) - -Remove this test from the quick group because of the two second sleep -this patch introduces. - -(I decided to put this test case into 281, because the main bug this -series addresses is in the interaction of the NBD block driver and I/O -threads, which is precisely the scope of 281. The test case for that -other bug will also be put into the test class added here. - -Also, excuse the test class's name, I couldn't come up with anything -better. The "yield" part will make sense two patches from now.) - -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Reitz -Signed-off-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit eaf1e85d4ddefdbd197f393fa9c5acc7ba8133b0) - -Conflict: -- @open-timeout was introduced after the 6.2 release, and has not been - backported. Consequently, there is no open_timer, and we can (and - must) drop the respective parts of the test here. - -Signed-off-by: Hanna Reitz ---- - tests/qemu-iotests/281 | 73 ++++++++++++++++++++++++++++++++++++-- - tests/qemu-iotests/281.out | 4 +-- - 2 files changed, 73 insertions(+), 4 deletions(-) - -diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 -index 956698083f..13c588be75 100755 ---- a/tests/qemu-iotests/281 -+++ b/tests/qemu-iotests/281 -@@ -1,5 +1,5 @@ - #!/usr/bin/env python3 --# group: rw quick -+# group: rw - # - # Test cases for blockdev + IOThread interactions - # -@@ -20,8 +20,9 @@ - # - - import os -+import time - import iotests --from iotests import qemu_img -+from iotests import qemu_img, QemuStorageDaemon - - image_len = 64 * 1024 * 1024 - -@@ -243,6 +244,74 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase): - # Hangs on failure, we expect this error. - self.assert_qmp(result, 'error/class', 'GenericError') - -+# Test for RHBZ#2033626 -+class TestYieldingAndTimers(iotests.QMPTestCase): -+ sock = os.path.join(iotests.sock_dir, 'nbd.sock') -+ qsd = None -+ -+ def setUp(self): -+ self.create_nbd_export() -+ -+ # Simple VM with an NBD block device connected to the NBD export -+ # provided by the QSD -+ self.vm = iotests.VM() -+ self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + -+ f'server.path={self.sock},export=exp,' + -+ 'reconnect-delay=1') -+ -+ self.vm.launch() -+ -+ def tearDown(self): -+ self.stop_nbd_export() -+ self.vm.shutdown() -+ -+ def test_timers_with_blockdev_del(self): -+ # Stop and restart the NBD server, and do some I/O on the client to -+ # trigger a reconnect and start the reconnect delay timer -+ self.stop_nbd_export() -+ self.create_nbd_export() -+ -+ result = self.vm.qmp('human-monitor-command', -+ command_line='qemu-io nbd "write 0 512"') -+ self.assert_qmp(result, 'return', '') -+ -+ # Reconnect is done, so the reconnect delay timer should be gone. -+ # (But there used to be a bug where it remained active, for which this -+ # is a regression test.) -+ -+ # Delete the BDS to see whether the timer is gone. If it is not, -+ # it will remain active, fire later, and then access freed data. -+ # (Or, with "block/nbd: Assert there are no timers when closed" -+ # applied, the assertion added in that patch will fail.) -+ result = self.vm.qmp('blockdev-del', node_name='nbd') -+ self.assert_qmp(result, 'return', {}) -+ -+ # Give the timer some time to fire (it has a timeout of 1 s). -+ # (Sleeping in an iotest may ring some alarm bells, but note that if -+ # the timing is off here, the test will just always pass. If we kill -+ # the VM too early, then we just kill the timer before it can fire, -+ # thus not see the error, and so the test will pass.) -+ time.sleep(2) -+ -+ def create_nbd_export(self): -+ assert self.qsd is None -+ -+ # Simple NBD export of a null-co BDS -+ self.qsd = QemuStorageDaemon( -+ '--blockdev', -+ 'null-co,node-name=null,read-zeroes=true', -+ -+ '--nbd-server', -+ f'addr.type=unix,addr.path={self.sock}', -+ -+ '--export', -+ 'nbd,id=exp,node-name=null,name=exp,writable=true' -+ ) -+ -+ def stop_nbd_export(self): -+ self.qsd.stop() -+ self.qsd = None -+ - if __name__ == '__main__': - iotests.main(supported_fmts=['qcow2'], - supported_protocols=['file']) -diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out -index 89968f35d7..914e3737bd 100644 ---- a/tests/qemu-iotests/281.out -+++ b/tests/qemu-iotests/281.out -@@ -1,5 +1,5 @@ --.... -+..... - ---------------------------------------------------------------------- --Ran 4 tests -+Ran 5 tests - - OK --- -2.27.0 - diff --git a/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch b/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch deleted file mode 100644 index 8616f1c..0000000 --- a/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 37593348e7d95580fb2b0009dcb026c07367f1f8 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 3 Feb 2022 15:05:34 +0100 -Subject: [PATCH 2/8] iotests: Test blockdev-reopen with iothreads and - throttling - -RH-Author: Kevin Wolf -RH-MergeRequest: 73: block: Lock AioContext for drain_end in blockdev-reopen -RH-Commit: [2/2] d19d5fa9efa4813ece75708436891041754ab910 (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 2046659 -RH-Acked-by: Sergio Lopez -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Reitz - -The 'throttle' block driver implements .bdrv_co_drain_end, so -blockdev-reopen will have to wait for it to complete in the polling -loop at the end of qmp_blockdev_reopen(). This makes AIO_WAIT_WHILE() -release the AioContext lock, which causes a crash if the lock hasn't -correctly been taken. - -Signed-off-by: Kevin Wolf -Message-Id: <20220203140534.36522-3-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit ee810602376125ca0e0afd6b7c715e13740978ea) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/245 | 36 +++++++++++++++++++++++++++++++++--- - tests/qemu-iotests/245.out | 4 ++-- - 2 files changed, 35 insertions(+), 5 deletions(-) - -diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 -index 24ac43f70e..8cbed7821b 100755 ---- a/tests/qemu-iotests/245 -+++ b/tests/qemu-iotests/245 -@@ -1138,12 +1138,13 @@ class TestBlockdevReopen(iotests.QMPTestCase): - self.assertEqual(self.get_node('hd1'), None) - self.assert_qmp(self.get_node('hd2'), 'ro', True) - -- def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): -- opts = hd_opts(0) -+ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None, -+ opts_a = None, opts_b = None): -+ opts = opts_a or hd_opts(0) - result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) - self.assert_qmp(result, 'return', {}) - -- opts2 = hd_opts(2) -+ opts2 = opts_b or hd_opts(2) - result = self.vm.qmp('blockdev-add', conv_keys = False, **opts2) - self.assert_qmp(result, 'return', {}) - -@@ -1194,6 +1195,35 @@ class TestBlockdevReopen(iotests.QMPTestCase): - def test_iothreads_switch_overlay(self): - self.run_test_iothreads('', 'iothread0') - -+ def test_iothreads_with_throttling(self): -+ # Create a throttle-group object -+ opts = { 'qom-type': 'throttle-group', 'id': 'group0', -+ 'limits': { 'iops-total': 1000 } } -+ result = self.vm.qmp('object-add', conv_keys = False, **opts) -+ self.assert_qmp(result, 'return', {}) -+ -+ # Options with a throttle filter between format and protocol -+ opts = [ -+ { -+ 'driver': iotests.imgfmt, -+ 'node-name': f'hd{idx}', -+ 'file' : { -+ 'node-name': f'hd{idx}-throttle', -+ 'driver': 'throttle', -+ 'throttle-group': 'group0', -+ 'file': { -+ 'driver': 'file', -+ 'node-name': f'hd{idx}-file', -+ 'filename': hd_path[idx], -+ }, -+ }, -+ } -+ for idx in (0, 2) -+ ] -+ -+ self.run_test_iothreads('iothread0', 'iothread0', None, -+ opts[0], opts[1]) -+ - if __name__ == '__main__': - iotests.activate_logging() - iotests.main(supported_fmts=["qcow2"], -diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out -index 4eced19294..a4e04a3266 100644 ---- a/tests/qemu-iotests/245.out -+++ b/tests/qemu-iotests/245.out -@@ -17,8 +17,8 @@ read 1/1 bytes at offset 262152 - read 1/1 bytes at offset 262160 - 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - --............... -+................ - ---------------------------------------------------------------------- --Ran 25 tests -+Ran 26 tests - - OK --- -2.27.0 - diff --git a/kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch b/kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch deleted file mode 100644 index 0ab3bcc..0000000 --- a/kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 51f691acd8042351d005873996d7bf4c7b045508 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 17 Dec 2021 17:46:53 +0100 -Subject: [PATCH 08/12] iotests: Test qemu-img convert of zeroed data cluster - -RH-Author: Kevin Wolf -RH-MergeRequest: 70: qemu-img convert: Fix sparseness of output image -RH-Commit: [1/2] 0770582c553ac6b0f18c035f9a0238599d4763cc (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 1882917 -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Reitz - -This demonstrates what happens when the block status changes in -sub-min_sparse granularity, but all of the parts are zeroed out. The -alignment logic in is_allocated_sectors() prevents that the target image -remains fully sparse as expected, but turns it into a data cluster of -explicit zeros. - -Signed-off-by: Kevin Wolf -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20211217164654.1184218-2-vsementsov@virtuozzo.com> -Tested-by: Peter Lieven -Signed-off-by: Kevin Wolf -(cherry picked from commit 51cd8bddd63540514d44808f7920811439baa253) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/122 | 1 + - tests/qemu-iotests/122.out | 10 ++++++++-- - 2 files changed, 9 insertions(+), 2 deletions(-) - -diff --git a/tests/qemu-iotests/122 b/tests/qemu-iotests/122 -index efb260d822..be0f6b79e5 100755 ---- a/tests/qemu-iotests/122 -+++ b/tests/qemu-iotests/122 -@@ -251,6 +251,7 @@ $QEMU_IO -c "write -P 0 0 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_test - $QEMU_IO -c "write 0 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir - $QEMU_IO -c "write 8k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir - $QEMU_IO -c "write 17k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir -+$QEMU_IO -c "write -P 0 65k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir - - for min_sparse in 4k 8k; do - echo -diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out -index 8fbdac2b39..69b8e8b803 100644 ---- a/tests/qemu-iotests/122.out -+++ b/tests/qemu-iotests/122.out -@@ -192,6 +192,8 @@ wrote 1024/1024 bytes at offset 8192 - 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - wrote 1024/1024 bytes at offset 17408 - 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 1024/1024 bytes at offset 66560 -+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) - - convert -S 4k - [{ "start": 0, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, -@@ -199,7 +201,9 @@ convert -S 4k - { "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, - { "start": 12288, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false}, - { "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, --{ "start": 20480, "length": 67088384, "depth": 0, "present": false, "zero": true, "data": false}] -+{ "start": 20480, "length": 46080, "depth": 0, "present": false, "zero": true, "data": false}, -+{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}] - - convert -c -S 4k - [{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true}, -@@ -211,7 +215,9 @@ convert -c -S 4k - - convert -S 8k - [{ "start": 0, "length": 24576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, --{ "start": 24576, "length": 67084288, "depth": 0, "present": false, "zero": true, "data": false}] -+{ "start": 24576, "length": 41984, "depth": 0, "present": false, "zero": true, "data": false}, -+{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, -+{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}] - - convert -c -S 8k - [{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true}, --- -2.27.0 - diff --git a/kvm-iotests-block-status-cache-New-test.patch b/kvm-iotests-block-status-cache-New-test.patch deleted file mode 100644 index cd9a198..0000000 --- a/kvm-iotests-block-status-cache-New-test.patch +++ /dev/null @@ -1,197 +0,0 @@ -From 89fe89491f89a7526ba864a9d94d3de930261d69 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Tue, 18 Jan 2022 18:00:00 +0100 -Subject: [PATCH 07/12] iotests/block-status-cache: New test - -RH-Author: Hanna Reitz -RH-MergeRequest: 69: block/io: Update BSC only if want_zero is true -RH-Commit: [2/2] 3c5a55aca1ac7a71c175a124d63bcf7a4430a022 (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2041461 -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Kevin Wolf - -Add a new test to verify that want_zero=false block-status calls do not -pollute the block-status cache for want_zero=true calls. - -We check want_zero=true calls and their results using `qemu-img map` -(over NBD), and want_zero=false calls also using `qemu-img map` over -NBD, but using the qemu:allocation-depth context. - -(This test case cannot be integrated into nbd-qemu-allocation, because -that is a qcow2 test, and this is a raw test.) - -Signed-off-by: Hanna Reitz -Message-Id: <20220118170000.49423-3-hreitz@redhat.com> -Reviewed-by: Nir Soffer -Reviewed-by: Eric Blake -Tested-by: Eric Blake -Signed-off-by: Eric Blake -(cherry picked from commit 6384dd534d742123d26c008d9794b20bc41359d5) -Signed-off-by: Hanna Reitz ---- - tests/qemu-iotests/tests/block-status-cache | 139 ++++++++++++++++++ - .../qemu-iotests/tests/block-status-cache.out | 5 + - 2 files changed, 144 insertions(+) - create mode 100755 tests/qemu-iotests/tests/block-status-cache - create mode 100644 tests/qemu-iotests/tests/block-status-cache.out - -diff --git a/tests/qemu-iotests/tests/block-status-cache b/tests/qemu-iotests/tests/block-status-cache -new file mode 100755 -index 0000000000..6fa10bb8f8 ---- /dev/null -+++ b/tests/qemu-iotests/tests/block-status-cache -@@ -0,0 +1,139 @@ -+#!/usr/bin/env python3 -+# group: rw quick -+# -+# Test cases for the block-status cache. -+# -+# Copyright (C) 2022 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+import os -+import signal -+import iotests -+from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd -+ -+ -+image_size = 1 * 1024 * 1024 -+test_img = os.path.join(iotests.test_dir, 'test.img') -+ -+nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid') -+nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') -+ -+ -+class TestBscWithNbd(iotests.QMPTestCase): -+ def setUp(self) -> None: -+ """Just create an empty image with a read-only NBD server on it""" -+ assert qemu_img_create('-f', iotests.imgfmt, test_img, -+ str(image_size)) == 0 -+ -+ # Pass --allocation-depth to enable the qemu:allocation-depth context, -+ # which we are going to query to provoke a block-status inquiry with -+ # want_zero=false. -+ assert qemu_nbd(f'--socket={nbd_sock}', -+ f'--format={iotests.imgfmt}', -+ '--persistent', -+ '--allocation-depth', -+ '--read-only', -+ f'--pid-file={nbd_pidfile}', -+ test_img) \ -+ == 0 -+ -+ def tearDown(self) -> None: -+ with open(nbd_pidfile, encoding='utf-8') as f: -+ pid = int(f.read()) -+ os.kill(pid, signal.SIGTERM) -+ os.remove(nbd_pidfile) -+ os.remove(test_img) -+ -+ def test_with_zero_bug(self) -> None: -+ """ -+ Verify that the block-status cache is not corrupted by a -+ want_zero=false call. -+ We can provoke a want_zero=false call with `qemu-img map` over NBD with -+ x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map` -+ (which results in want_zero=true), then using said -+ qemu:allocation-depth context, and finally another normal `map` to -+ verify that the cache has not been corrupted. -+ """ -+ -+ nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}' -+ nbd_img_opts_alloc_depth = nbd_img_opts + \ -+ ',x-dirty-bitmap=qemu:allocation-depth' -+ -+ # Normal map, results in want_zero=true. -+ # This will probably detect an allocated data sector first (qemu likes -+ # to allocate the first sector to facilitate alignment probing), and -+ # then the rest to be zero. The BSC will thus contain (if anything) -+ # one range covering the first sector. -+ map_pre = qemu_img_pipe('map', '--output=json', '--image-opts', -+ nbd_img_opts) -+ -+ # qemu:allocation-depth maps for want_zero=false. -+ # want_zero=false should (with the file driver, which the server is -+ # using) report everything as data. While this is sufficient for -+ # want_zero=false, this is nothing that should end up in the -+ # block-status cache. -+ # Due to a bug, this information did end up in the cache, though, and -+ # this would lead to wrong information being returned on subsequent -+ # want_zero=true calls. -+ # -+ # We need to run this map twice: On the first call, we probably still -+ # have the first sector in the cache, and so this will be served from -+ # the cache; and only the subsequent range will be queried from the -+ # block driver. This subsequent range will then be entered into the -+ # cache. -+ # If we did a want_zero=true call at this point, we would thus get -+ # correct information: The first sector is not covered by the cache, so -+ # we would get fresh block-status information from the driver, which -+ # would return a data range, and this would then go into the cache, -+ # evicting the wrong range from the want_zero=false call before. -+ # -+ # Therefore, we need a second want_zero=false map to reproduce: -+ # Since the first sector is not in the cache, the query for its status -+ # will go to the driver, which will return a result that reports the -+ # whole image to be a single data area. This result will then go into -+ # the cache, and so the cache will then report the whole image to -+ # contain data. -+ # -+ # Note that once the cache reports the whole image to contain data, any -+ # subsequent map operation will be served from the cache, and so we can -+ # never loop too many times here. -+ for _ in range(2): -+ # (Ignore the result, this is just to contaminate the cache) -+ qemu_img_pipe('map', '--output=json', '--image-opts', -+ nbd_img_opts_alloc_depth) -+ -+ # Now let's see whether the cache reports everything as data, or -+ # whether we get correct information (i.e. the same as we got on our -+ # first attempt). -+ map_post = qemu_img_pipe('map', '--output=json', '--image-opts', -+ nbd_img_opts) -+ -+ if map_pre != map_post: -+ print('ERROR: Map information differs before and after querying ' + -+ 'qemu:allocation-depth') -+ print('Before:') -+ print(map_pre) -+ print('After:') -+ print(map_post) -+ -+ self.fail("Map information differs") -+ -+ -+if __name__ == '__main__': -+ # The block-status cache only works on the protocol layer, so to test it, -+ # we can only use the raw format -+ iotests.main(supported_fmts=['raw'], -+ supported_protocols=['file']) -diff --git a/tests/qemu-iotests/tests/block-status-cache.out b/tests/qemu-iotests/tests/block-status-cache.out -new file mode 100644 -index 0000000000..ae1213e6f8 ---- /dev/null -+++ b/tests/qemu-iotests/tests/block-status-cache.out -@@ -0,0 +1,5 @@ -+. -+---------------------------------------------------------------------- -+Ran 1 tests -+ -+OK --- -2.27.0 - diff --git a/kvm-iotests-stream-error-on-reset-New-test.patch b/kvm-iotests-stream-error-on-reset-New-test.patch deleted file mode 100644 index cf69e38..0000000 --- a/kvm-iotests-stream-error-on-reset-New-test.patch +++ /dev/null @@ -1,196 +0,0 @@ -From 300f912d4a5afe4ecca9c68a71429fbc9966ec34 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Tue, 11 Jan 2022 15:36:13 +0000 -Subject: [PATCH 11/12] iotests/stream-error-on-reset: New test - -RH-Author: Hanna Reitz -RH-MergeRequest: 71: block-backend: prevent dangling BDS pointers across aio_poll() -RH-Commit: [2/2] 3167f31b91eb433f338564201f4ef336e39f7f7d (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2040123 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Emanuele Giuseppe Esposito - -Test the following scenario: -- Simple stream block in two-layer backing chain (base and top) -- The job is drained via blk_drain(), then an error occurs while the job - settles the ongoing request -- And so the job completes while in blk_drain() - -This was reported as a segfault, but is fixed by "block-backend: prevent -dangling BDS pointers across aio_poll()". - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 -Signed-off-by: Hanna Reitz -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220111153613.25453-3-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 2ca1d5d6b91f8a52a5c651f660b2f58c94bf97ba) -Signed-off-by: Hanna Reitz ---- - .../qemu-iotests/tests/stream-error-on-reset | 140 ++++++++++++++++++ - .../tests/stream-error-on-reset.out | 5 + - 2 files changed, 145 insertions(+) - create mode 100755 tests/qemu-iotests/tests/stream-error-on-reset - create mode 100644 tests/qemu-iotests/tests/stream-error-on-reset.out - -diff --git a/tests/qemu-iotests/tests/stream-error-on-reset b/tests/qemu-iotests/tests/stream-error-on-reset -new file mode 100755 -index 0000000000..7eaedb24d7 ---- /dev/null -+++ b/tests/qemu-iotests/tests/stream-error-on-reset -@@ -0,0 +1,140 @@ -+#!/usr/bin/env python3 -+# group: rw quick -+# -+# Test what happens when a stream job completes in a blk_drain(). -+# -+# Copyright (C) 2022 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+import os -+import iotests -+from iotests import imgfmt, qemu_img_create, qemu_io_silent, QMPTestCase -+ -+ -+image_size = 1 * 1024 * 1024 -+data_size = 64 * 1024 -+base = os.path.join(iotests.test_dir, 'base.img') -+top = os.path.join(iotests.test_dir, 'top.img') -+ -+ -+# We want to test completing a stream job in a blk_drain(). -+# -+# The blk_drain() we are going to use is a virtio-scsi device resetting, -+# which we can trigger by resetting the system. -+# -+# In order to have the block job complete on drain, we (1) throttle its -+# base image so we can start the drain after it has begun, but before it -+# completes, and (2) make it encounter an I/O error on the ensuing write. -+# (If it completes regularly, the completion happens after the drain for -+# some reason.) -+ -+class TestStreamErrorOnReset(QMPTestCase): -+ def setUp(self) -> None: -+ """ -+ Create two images: -+ - base image {base} with {data_size} bytes allocated -+ - top image {top} without any data allocated -+ -+ And the following VM configuration: -+ - base image throttled to {data_size} -+ - top image with a blkdebug configuration so the first write access -+ to it will result in an error -+ - top image is attached to a virtio-scsi device -+ """ -+ assert qemu_img_create('-f', imgfmt, base, str(image_size)) == 0 -+ assert qemu_io_silent('-c', f'write 0 {data_size}', base) == 0 -+ assert qemu_img_create('-f', imgfmt, top, str(image_size)) == 0 -+ -+ self.vm = iotests.VM() -+ self.vm.add_args('-accel', 'tcg') # Make throttling work properly -+ self.vm.add_object(self.vm.qmp_to_opts({ -+ 'qom-type': 'throttle-group', -+ 'id': 'thrgr', -+ 'x-bps-total': str(data_size) -+ })) -+ self.vm.add_blockdev(self.vm.qmp_to_opts({ -+ 'driver': imgfmt, -+ 'node-name': 'base', -+ 'file': { -+ 'driver': 'throttle', -+ 'throttle-group': 'thrgr', -+ 'file': { -+ 'driver': 'file', -+ 'filename': base -+ } -+ } -+ })) -+ self.vm.add_blockdev(self.vm.qmp_to_opts({ -+ 'driver': imgfmt, -+ 'node-name': 'top', -+ 'file': { -+ 'driver': 'blkdebug', -+ 'node-name': 'top-blkdebug', -+ 'inject-error': [{ -+ 'event': 'pwritev', -+ 'immediately': 'true', -+ 'once': 'true' -+ }], -+ 'image': { -+ 'driver': 'file', -+ 'filename': top -+ } -+ }, -+ 'backing': 'base' -+ })) -+ self.vm.add_device(self.vm.qmp_to_opts({ -+ 'driver': 'virtio-scsi', -+ 'id': 'vscsi' -+ })) -+ self.vm.add_device(self.vm.qmp_to_opts({ -+ 'driver': 'scsi-hd', -+ 'bus': 'vscsi.0', -+ 'drive': 'top' -+ })) -+ self.vm.launch() -+ -+ def tearDown(self) -> None: -+ self.vm.shutdown() -+ os.remove(top) -+ os.remove(base) -+ -+ def test_stream_error_on_reset(self) -> None: -+ # Launch a stream job, which will take at least a second to -+ # complete, because the base image is throttled (so we can -+ # get in between it having started and it having completed) -+ res = self.vm.qmp('block-stream', job_id='stream', device='top') -+ self.assert_qmp(res, 'return', {}) -+ -+ while True: -+ ev = self.vm.event_wait('JOB_STATUS_CHANGE') -+ if ev['data']['status'] == 'running': -+ # Once the stream job is running, reset the system, which -+ # forces the virtio-scsi device to be reset, thus draining -+ # the stream job, and making it complete. Completing -+ # inside of that drain should not result in a segfault. -+ res = self.vm.qmp('system_reset') -+ self.assert_qmp(res, 'return', {}) -+ elif ev['data']['status'] == 'null': -+ # The test is done once the job is gone -+ break -+ -+ -+if __name__ == '__main__': -+ # Passes with any format with backing file support, but qed and -+ # qcow1 do not seem to exercise the used-to-be problematic code -+ # path, so there is no point in having them in this list -+ iotests.main(supported_fmts=['qcow2', 'vmdk'], -+ supported_protocols=['file']) -diff --git a/tests/qemu-iotests/tests/stream-error-on-reset.out b/tests/qemu-iotests/tests/stream-error-on-reset.out -new file mode 100644 -index 0000000000..ae1213e6f8 ---- /dev/null -+++ b/tests/qemu-iotests/tests/stream-error-on-reset.out -@@ -0,0 +1,5 @@ -+. -+---------------------------------------------------------------------- -+Ran 1 tests -+ -+OK --- -2.27.0 - diff --git a/kvm-iotests.py-Add-QemuStorageDaemon-class.patch b/kvm-iotests.py-Add-QemuStorageDaemon-class.patch deleted file mode 100644 index b215d23..0000000 --- a/kvm-iotests.py-Add-QemuStorageDaemon-class.patch +++ /dev/null @@ -1,92 +0,0 @@ -From c21502a220d107261c9a8627158f357489d86543 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Fri, 4 Feb 2022 12:10:09 +0100 -Subject: [PATCH 5/8] iotests.py: Add QemuStorageDaemon class - -RH-Author: Hanna Reitz -RH-MergeRequest: 74: block/nbd: Handle AioContext changes -RH-Commit: [3/6] 5da1cda4d025c1bd7029ed8071b4ccf25459a878 (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2033626 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -This is a rather simple class that allows creating a QSD instance -running in the background and stopping it when no longer needed. - -The __del__ handler is a safety net for when something goes so wrong in -a test that e.g. the tearDown() method is not called (e.g. setUp() -launches the QSD, but then launching a VM fails). We do not want the -QSD to continue running after the test has failed, so __del__() will -take care to kill it. - -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Reitz -Signed-off-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 091dc7b2b5553a529bff9a7bf9ad3bc85bc5bdcd) -Signed-off-by: Hanna Reitz ---- - tests/qemu-iotests/iotests.py | 40 +++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 83bfedb902..a51b5ce8cd 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -72,6 +72,8 @@ - qemu_prog = os.environ.get('QEMU_PROG', 'qemu') - qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ') - -+qsd_prog = os.environ.get('QSD_PROG', 'qemu-storage-daemon') -+ - gdb_qemu_env = os.environ.get('GDB_OPTIONS') - qemu_gdb = [] - if gdb_qemu_env: -@@ -312,6 +314,44 @@ def cmd(self, cmd): - return self._read_output() - - -+class QemuStorageDaemon: -+ def __init__(self, *args: str, instance_id: str = 'a'): -+ assert '--pidfile' not in args -+ self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') -+ all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] -+ -+ # Cannot use with here, we want the subprocess to stay around -+ # pylint: disable=consider-using-with -+ self._p = subprocess.Popen(all_args) -+ while not os.path.exists(self.pidfile): -+ if self._p.poll() is not None: -+ cmd = ' '.join(all_args) -+ raise RuntimeError( -+ 'qemu-storage-daemon terminated with exit code ' + -+ f'{self._p.returncode}: {cmd}') -+ -+ time.sleep(0.01) -+ -+ with open(self.pidfile, encoding='utf-8') as f: -+ self._pid = int(f.read().strip()) -+ -+ assert self._pid == self._p.pid -+ -+ def stop(self, kill_signal=15): -+ self._p.send_signal(kill_signal) -+ self._p.wait() -+ self._p = None -+ -+ try: -+ os.remove(self.pidfile) -+ except OSError: -+ pass -+ -+ def __del__(self): -+ if self._p is not None: -+ self.stop(kill_signal=9) -+ -+ - def qemu_nbd(*args): - '''Run qemu-nbd in daemon mode and return the parent's exit code''' - return subprocess.call(qemu_nbd_args + ['--fork'] + list(args)) --- -2.27.0 - diff --git a/kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch b/kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch deleted file mode 100644 index 5ff2734..0000000 --- a/kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch +++ /dev/null @@ -1,97 +0,0 @@ -From b169059c8fbf15c3ffeec0f68b938cb9febd8db7 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 30 Nov 2021 16:00:28 +0800 -Subject: [PATCH 5/6] memory: Fix incorrect calls of log_global_start/stop -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 77: memory: Fix qemu crash on continuous migrations of stopped VM -RH-Commit: [1/2] 6271ee689266b24d29d4c87f60e5b096ef5f5d63 (peterx/qemu-kvm) -RH-Bugzilla: 2044818 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: quintela1 - -We should only call the log_global_start/stop when the global dirty track -bitmask changes from zero<->non-zero. - -No real issue reported for this yet probably because no immediate user to -enable both dirty rate measurement and migration at the same time. However -it'll be good to be prepared for it. - -Fixes: 63b41db4bc ("memory: make global_dirty_tracking a bitmask") -Cc: qemu-stable@nongnu.org -Cc: Hyman Huang -Cc: Paolo Bonzini -Cc: Dr. David Alan Gilbert -Cc: Juan Quintela -Cc: David Hildenbrand -Signed-off-by: Peter Xu -Reviewed-by: David Hildenbrand -Message-Id: <20211130080028.6474-1-peterx@redhat.com> -Signed-off-by: Philippe Mathieu-Daudé -(cherry picked from commit 7b0538ed3a22ce30817f818449d10701fb0821f9) -Signed-off-by: Peter Xu ---- - softmmu/memory.c | 27 ++++++++++++++------------- - 1 file changed, 14 insertions(+), 13 deletions(-) - -diff --git a/softmmu/memory.c b/softmmu/memory.c -index 7340e19ff5..81d4bf1454 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -2773,6 +2773,8 @@ static VMChangeStateEntry *vmstate_change; - - void memory_global_dirty_log_start(unsigned int flags) - { -+ unsigned int old_flags = global_dirty_tracking; -+ - if (vmstate_change) { - qemu_del_vm_change_state_handler(vmstate_change); - vmstate_change = NULL; -@@ -2781,15 +2783,14 @@ void memory_global_dirty_log_start(unsigned int flags) - assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); - assert(!(global_dirty_tracking & flags)); - global_dirty_tracking |= flags; -- - trace_global_dirty_changed(global_dirty_tracking); - -- MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward); -- -- /* Refresh DIRTY_MEMORY_MIGRATION bit. */ -- memory_region_transaction_begin(); -- memory_region_update_pending = true; -- memory_region_transaction_commit(); -+ if (!old_flags) { -+ MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward); -+ memory_region_transaction_begin(); -+ memory_region_update_pending = true; -+ memory_region_transaction_commit(); -+ } - } - - static void memory_global_dirty_log_do_stop(unsigned int flags) -@@ -2800,12 +2801,12 @@ static void memory_global_dirty_log_do_stop(unsigned int flags) - - trace_global_dirty_changed(global_dirty_tracking); - -- /* Refresh DIRTY_MEMORY_MIGRATION bit. */ -- memory_region_transaction_begin(); -- memory_region_update_pending = true; -- memory_region_transaction_commit(); -- -- MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse); -+ if (!global_dirty_tracking) { -+ memory_region_transaction_begin(); -+ memory_region_update_pending = true; -+ memory_region_transaction_commit(); -+ MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse); -+ } - } - - static void memory_vm_change_state_handler(void *opaque, bool running, --- -2.27.0 - diff --git a/kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch b/kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch deleted file mode 100644 index 5ea0007..0000000 --- a/kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch +++ /dev/null @@ -1,156 +0,0 @@ -From b3ed8e344c733bc8c2223c1b9e424a9fbcea56d4 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Mon, 7 Feb 2022 20:30:19 +0800 -Subject: [PATCH 6/6] memory: Fix qemu crash on starting dirty log twice with - stopped VM - -RH-Author: Peter Xu -RH-MergeRequest: 77: memory: Fix qemu crash on continuous migrations of stopped VM -RH-Commit: [2/2] 98ed2ef6226ec80a1896ebb554015aded0dc0c18 (peterx/qemu-kvm) -RH-Bugzilla: 2044818 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: quintela1 - -QEMU can now easily crash with two continuous migration carried out: - -(qemu) migrate -d exec:cat>out -(qemu) migrate_cancel -(qemu) migrate -d exec:cat>out -[crash] ../softmmu/memory.c:2782: memory_global_dirty_log_start: Assertion -`!(global_dirty_tracking & flags)' failed. - -It's because memory API provides a way to postpone dirty log stop if the VM is -stopped, and that'll be re-done until the next VM start. It was added in 2017 -with commit 1931076077 ("migration: optimize the downtime", 2017-08-01). - -However the recent work on allowing dirty tracking to be bitmask broke it, -which is commit 63b41db4bc ("memory: make global_dirty_tracking a bitmask", -2021-11-01). - -The fix proposed in this patch contains two things: - - (1) Instead of passing over the flags to postpone stop dirty track, we add a - global variable (along with current vmstate_change variable) to record - what flags to stop dirty tracking. - - (2) When start dirty tracking, instead if remove the vmstate hook directly, - we also execute the postponed stop process so that we make sure all the - starts and stops will be paired. - -This procedure is overlooked in the bitmask-ify work in 2021. - -Cc: Hyman Huang -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044818 -Fixes: 63b41db4bc ("memory: make global_dirty_tracking a bitmask") -Signed-off-by: Peter Xu -Message-Id: <20220207123019.27223-1-peterx@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit a5c90c61a118027b86155cffdf4fe4e2e9de1020) -Signed-off-by: Peter Xu ---- - softmmu/memory.c | 61 +++++++++++++++++++++++++++++++++++------------- - 1 file changed, 45 insertions(+), 16 deletions(-) - -diff --git a/softmmu/memory.c b/softmmu/memory.c -index 81d4bf1454..0311e362ee 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -2769,19 +2769,32 @@ void memory_global_after_dirty_log_sync(void) - MEMORY_LISTENER_CALL_GLOBAL(log_global_after_sync, Forward); - } - -+/* -+ * Dirty track stop flags that are postponed due to VM being stopped. Should -+ * only be used within vmstate_change hook. -+ */ -+static unsigned int postponed_stop_flags; - static VMChangeStateEntry *vmstate_change; -+static void memory_global_dirty_log_stop_postponed_run(void); - - void memory_global_dirty_log_start(unsigned int flags) - { -- unsigned int old_flags = global_dirty_tracking; -+ unsigned int old_flags; -+ -+ assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); - - if (vmstate_change) { -- qemu_del_vm_change_state_handler(vmstate_change); -- vmstate_change = NULL; -+ /* If there is postponed stop(), operate on it first */ -+ postponed_stop_flags &= ~flags; -+ memory_global_dirty_log_stop_postponed_run(); - } - -- assert(flags && !(flags & (~GLOBAL_DIRTY_MASK))); -- assert(!(global_dirty_tracking & flags)); -+ flags &= ~global_dirty_tracking; -+ if (!flags) { -+ return; -+ } -+ -+ old_flags = global_dirty_tracking; - global_dirty_tracking |= flags; - trace_global_dirty_changed(global_dirty_tracking); - -@@ -2809,29 +2822,45 @@ static void memory_global_dirty_log_do_stop(unsigned int flags) - } - } - -+/* -+ * Execute the postponed dirty log stop operations if there is, then reset -+ * everything (including the flags and the vmstate change hook). -+ */ -+static void memory_global_dirty_log_stop_postponed_run(void) -+{ -+ /* This must be called with the vmstate handler registered */ -+ assert(vmstate_change); -+ -+ /* Note: postponed_stop_flags can be cleared in log start routine */ -+ if (postponed_stop_flags) { -+ memory_global_dirty_log_do_stop(postponed_stop_flags); -+ postponed_stop_flags = 0; -+ } -+ -+ qemu_del_vm_change_state_handler(vmstate_change); -+ vmstate_change = NULL; -+} -+ - static void memory_vm_change_state_handler(void *opaque, bool running, - RunState state) - { -- unsigned int flags = (unsigned int)(uintptr_t)opaque; - if (running) { -- memory_global_dirty_log_do_stop(flags); -- -- if (vmstate_change) { -- qemu_del_vm_change_state_handler(vmstate_change); -- vmstate_change = NULL; -- } -+ memory_global_dirty_log_stop_postponed_run(); - } - } - - void memory_global_dirty_log_stop(unsigned int flags) - { - if (!runstate_is_running()) { -+ /* Postpone the dirty log stop, e.g., to when VM starts again */ - if (vmstate_change) { -- return; -+ /* Batch with previous postponed flags */ -+ postponed_stop_flags |= flags; -+ } else { -+ postponed_stop_flags = flags; -+ vmstate_change = qemu_add_vm_change_state_handler( -+ memory_vm_change_state_handler, NULL); - } -- vmstate_change = qemu_add_vm_change_state_handler( -- memory_vm_change_state_handler, -- (void *)(uintptr_t)flags); - return; - } - --- -2.27.0 - diff --git a/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch b/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch deleted file mode 100644 index e26bfcf..0000000 --- a/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch +++ /dev/null @@ -1,287 +0,0 @@ -From 6274a2a09a8931188889467b104bf2e2fc39cb54 Mon Sep 17 00:00:00 2001 -From: Yang Zhong -Date: Mon, 1 Nov 2021 12:20:05 -0400 -Subject: [PATCH 01/12] numa: Enable numa for SGX EPC sections - -RH-Author: Paul Lai -RH-MergeRequest: 65: Enable SGX and add SGX Numa support -RH-Commit: [1/5] ff69d138c3f5903096388ec7ccf8dc5e6c6c6ffb -RH-Bugzilla: 2033708 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Bandan Das -RH-Acked-by: Cornelia Huck - -The basic SGX did not enable numa for SGX EPC sections, which -result in all EPC sections located in numa node 0. This patch -enable SGX numa function in the guest and the EPC section can -work with RAM as one numa node. - -The Guest kernel related log: -[ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] -[ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] -The SRAT table can normally show SGX EPC sections menory info in different -numa nodes. - -The SGX EPC numa related command: - ...... - -m 4G,maxmem=20G \ - -smp sockets=2,cores=2 \ - -cpu host,+sgx-provisionkey \ - -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \ - -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \ - -numa node,nodeid=0,cpus=0-1,memdev=node0 \ - -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \ - -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \ - -numa node,nodeid=1,cpus=2-3,memdev=node1 \ - -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 \ - ...... - -Signed-off-by: Yang Zhong -Message-Id: <20211101162009.62161-2-yang.zhong@intel.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 1105812382e1126d86dddc16b3700f8c79dc93d1) -Signed-off-by: Paul Lai ---- - hw/core/numa.c | 5 ++--- - hw/i386/acpi-build.c | 2 ++ - hw/i386/sgx-epc.c | 3 +++ - hw/i386/sgx-stub.c | 4 ++++ - hw/i386/sgx.c | 44 +++++++++++++++++++++++++++++++++++++++ - include/hw/i386/sgx-epc.h | 3 +++ - monitor/hmp-cmds.c | 1 + - qapi/machine.json | 10 ++++++++- - qemu-options.hx | 4 ++-- - 9 files changed, 70 insertions(+), 6 deletions(-) - -diff --git a/hw/core/numa.c b/hw/core/numa.c -index e6050b2273..1aa05dcf42 100644 ---- a/hw/core/numa.c -+++ b/hw/core/numa.c -@@ -784,9 +784,8 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) - break; - case MEMORY_DEVICE_INFO_KIND_SGX_EPC: - se = value->u.sgx_epc.data; -- /* TODO: once we support numa, assign to right node */ -- node_mem[0].node_mem += se->size; -- node_mem[0].node_plugged_mem += se->size; -+ node_mem[se->node].node_mem += se->size; -+ node_mem[se->node].node_plugged_mem = 0; - break; - default: - g_assert_not_reached(); -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index a99c6e4fe3..8383b83ee3 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -2068,6 +2068,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) - nvdimm_build_srat(table_data); - } - -+ sgx_epc_build_srat(table_data); -+ - /* - * TODO: this part is not in ACPI spec and current linux kernel boots fine - * without these entries. But I recall there were issues the last time I -diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c -index e508827e78..96b2940d75 100644 ---- a/hw/i386/sgx-epc.c -+++ b/hw/i386/sgx-epc.c -@@ -21,6 +21,7 @@ - - static Property sgx_epc_properties[] = { - DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0), -+ DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0), - DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem, - TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *), - DEFINE_PROP_END_OF_LIST(), -@@ -139,6 +140,8 @@ static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md, - se->memaddr = epc->addr; - se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP, - NULL); -+ se->node = object_property_get_uint(OBJECT(epc), SGX_EPC_NUMA_NODE_PROP, -+ NULL); - se->memdev = object_get_canonical_path(OBJECT(epc->hostmem)); - - info->u.sgx_epc.data = se; -diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c -index c9b379e665..26833eb233 100644 ---- a/hw/i386/sgx-stub.c -+++ b/hw/i386/sgx-stub.c -@@ -6,6 +6,10 @@ - #include "qapi/error.h" - #include "qapi/qapi-commands-misc-target.h" - -+void sgx_epc_build_srat(GArray *table_data) -+{ -+} -+ - SGXInfo *qmp_query_sgx(Error **errp) - { - error_setg(errp, "SGX support is not compiled in"); -diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c -index 8fef3dd8fa..d04299904a 100644 ---- a/hw/i386/sgx.c -+++ b/hw/i386/sgx.c -@@ -23,6 +23,7 @@ - #include "sysemu/hw_accel.h" - #include "sysemu/reset.h" - #include -+#include "hw/acpi/aml-build.h" - - #define SGX_MAX_EPC_SECTIONS 8 - #define SGX_CPUID_EPC_INVALID 0x0 -@@ -36,6 +37,46 @@ - - #define RETRY_NUM 2 - -+static int sgx_epc_device_list(Object *obj, void *opaque) -+{ -+ GSList **list = opaque; -+ -+ if (object_dynamic_cast(obj, TYPE_SGX_EPC)) { -+ *list = g_slist_append(*list, DEVICE(obj)); -+ } -+ -+ object_child_foreach(obj, sgx_epc_device_list, opaque); -+ return 0; -+} -+ -+static GSList *sgx_epc_get_device_list(void) -+{ -+ GSList *list = NULL; -+ -+ object_child_foreach(qdev_get_machine(), sgx_epc_device_list, &list); -+ return list; -+} -+ -+void sgx_epc_build_srat(GArray *table_data) -+{ -+ GSList *device_list = sgx_epc_get_device_list(); -+ -+ for (; device_list; device_list = device_list->next) { -+ DeviceState *dev = device_list->data; -+ Object *obj = OBJECT(dev); -+ uint64_t addr, size; -+ int node; -+ -+ node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, -+ &error_abort); -+ addr = object_property_get_uint(obj, SGX_EPC_ADDR_PROP, &error_abort); -+ size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, &error_abort); -+ -+ build_srat_memory(table_data, addr, size, node, MEM_AFFINITY_ENABLED); -+ } -+ g_slist_free(device_list); -+} -+ - static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) - { - return (low & MAKE_64BIT_MASK(12, 20)) + -@@ -226,6 +267,9 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms) - /* set the memdev link with memory backend */ - object_property_parse(obj, SGX_EPC_MEMDEV_PROP, list->value->memdev, - &error_fatal); -+ /* set the numa node property for sgx epc object */ -+ object_property_set_uint(obj, SGX_EPC_NUMA_NODE_PROP, list->value->node, -+ &error_fatal); - object_property_set_bool(obj, "realized", true, &error_fatal); - object_unref(obj); - } -diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h -index a6a65be854..581fac389a 100644 ---- a/include/hw/i386/sgx-epc.h -+++ b/include/hw/i386/sgx-epc.h -@@ -25,6 +25,7 @@ - #define SGX_EPC_ADDR_PROP "addr" - #define SGX_EPC_SIZE_PROP "size" - #define SGX_EPC_MEMDEV_PROP "memdev" -+#define SGX_EPC_NUMA_NODE_PROP "node" - - /** - * SGXEPCDevice: -@@ -38,6 +39,7 @@ typedef struct SGXEPCDevice { - - /* public */ - uint64_t addr; -+ uint32_t node; - HostMemoryBackendEpc *hostmem; - } SGXEPCDevice; - -@@ -56,6 +58,7 @@ typedef struct SGXEPCState { - } SGXEPCState; - - bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size); -+void sgx_epc_build_srat(GArray *table_data); - - static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc) - { -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 9c91bf93e9..2669156b28 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -1810,6 +1810,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) - se->id ? se->id : ""); - monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", se->memaddr); - monitor_printf(mon, " size: %" PRIu64 "\n", se->size); -+ monitor_printf(mon, " node: %" PRId64 "\n", se->node); - monitor_printf(mon, " memdev: %s\n", se->memdev); - break; - default: -diff --git a/qapi/machine.json b/qapi/machine.json -index 067e3f5378..16e771affc 100644 ---- a/qapi/machine.json -+++ b/qapi/machine.json -@@ -1207,12 +1207,15 @@ - # - # @memdev: memory backend linked with device - # -+# @node: the numa node -+# - # Since: 6.2 - ## - { 'struct': 'SgxEPCDeviceInfo', - 'data': { '*id': 'str', - 'memaddr': 'size', - 'size': 'size', -+ 'node': 'int', - 'memdev': 'str' - } - } -@@ -1285,10 +1288,15 @@ - # - # @memdev: memory backend linked with device - # -+# @node: the numa node -+# - # Since: 6.2 - ## - { 'struct': 'SgxEPC', -- 'data': { 'memdev': 'str' } } -+ 'data': { 'memdev': 'str', -+ 'node': 'int' -+ } -+} - - ## - # @SgxEPCProperties: -diff --git a/qemu-options.hx b/qemu-options.hx -index 94c4a8dbaf..4b7798088b 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -127,11 +127,11 @@ SRST - ERST - - DEF("M", HAS_ARG, QEMU_OPTION_M, -- " sgx-epc.0.memdev=memid\n", -+ " sgx-epc.0.memdev=memid,sgx-epc.0.node=numaid\n", - QEMU_ARCH_ALL) - - SRST --``sgx-epc.0.memdev=@var{memid}`` -+``sgx-epc.0.memdev=@var{memid},sgx-epc.0.node=@var{numaid}`` - Define an SGX EPC section. - ERST - --- -2.27.0 - diff --git a/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch b/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch deleted file mode 100644 index de4c4b1..0000000 --- a/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch +++ /dev/null @@ -1,210 +0,0 @@ -From 0f75501ba348dc9fb3ce0198ceafc8093149457d Mon Sep 17 00:00:00 2001 -From: Yang Zhong -Date: Mon, 1 Nov 2021 12:20:07 -0400 -Subject: [PATCH 02/12] numa: Support SGX numa in the monitor and Libvirt - interfaces - -RH-Author: Paul Lai -RH-MergeRequest: 65: Enable SGX and add SGX Numa support -RH-Commit: [2/5] 8c19cfb1a139fd4dbac771e695a133f16a68437f -RH-Bugzilla: 2033708 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Bandan Das -RH-Acked-by: Cornelia Huck - -Add the SGXEPCSection list into SGXInfo to show the multiple -SGX EPC sections detailed info, not the total size like before. -This patch can enable numa support for 'info sgx' command and -QMP interfaces. The new interfaces show each EPC section info -in one numa node. Libvirt can use QMP interface to get the -detailed host SGX EPC capabilities to decide how to allocate -host EPC sections to guest. - -(qemu) info sgx - SGX support: enabled - SGX1 support: enabled - SGX2 support: enabled - FLC support: enabled - NUMA node #0: size=67108864 - NUMA node #1: size=29360128 - -The QMP interface show: -(QEMU) query-sgx -{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ -[{"node": 0, "size": 67108864}, {"node": 1, "size": 29360128}], "flc": true}} - -(QEMU) query-sgx-capabilities -{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ -[{"node": 0, "size": 17070817280}, {"node": 1, "size": 17079205888}], "flc": true}} - -Signed-off-by: Yang Zhong -Message-Id: <20211101162009.62161-4-yang.zhong@intel.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4755927ae12547c2e7cb22c5fa1b39038c6c11b1) -Signed-off-by: Paul Lai ---- - hw/i386/sgx.c | 51 +++++++++++++++++++++++++++++++++++-------- - qapi/misc-target.json | 19 ++++++++++++++-- - 2 files changed, 59 insertions(+), 11 deletions(-) - -diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c -index d04299904a..5de5dd0893 100644 ---- a/hw/i386/sgx.c -+++ b/hw/i386/sgx.c -@@ -83,11 +83,13 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) - ((high & MAKE_64BIT_MASK(0, 20)) << 32); - } - --static uint64_t sgx_calc_host_epc_section_size(void) -+static SGXEPCSectionList *sgx_calc_host_epc_sections(void) - { -+ SGXEPCSectionList *head = NULL, **tail = &head; -+ SGXEPCSection *section; - uint32_t i, type; - uint32_t eax, ebx, ecx, edx; -- uint64_t size = 0; -+ uint32_t j = 0; - - for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) { - host_cpuid(0x12, i + 2, &eax, &ebx, &ecx, &edx); -@@ -101,10 +103,13 @@ static uint64_t sgx_calc_host_epc_section_size(void) - break; - } - -- size += sgx_calc_section_metric(ecx, edx); -+ section = g_new0(SGXEPCSection, 1); -+ section->node = j++; -+ section->size = sgx_calc_section_metric(ecx, edx); -+ QAPI_LIST_APPEND(tail, section); - } - -- return size; -+ return head; - } - - static void sgx_epc_reset(void *opaque) -@@ -168,13 +173,35 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) - info->sgx1 = eax & (1U << 0) ? true : false; - info->sgx2 = eax & (1U << 1) ? true : false; - -- info->section_size = sgx_calc_host_epc_section_size(); -+ info->sections = sgx_calc_host_epc_sections(); - - close(fd); - - return info; - } - -+static SGXEPCSectionList *sgx_get_epc_sections_list(void) -+{ -+ GSList *device_list = sgx_epc_get_device_list(); -+ SGXEPCSectionList *head = NULL, **tail = &head; -+ SGXEPCSection *section; -+ -+ for (; device_list; device_list = device_list->next) { -+ DeviceState *dev = device_list->data; -+ Object *obj = OBJECT(dev); -+ -+ section = g_new0(SGXEPCSection, 1); -+ section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, -+ &error_abort); -+ section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, -+ &error_abort); -+ QAPI_LIST_APPEND(tail, section); -+ } -+ g_slist_free(device_list); -+ -+ return head; -+} -+ - SGXInfo *qmp_query_sgx(Error **errp) - { - SGXInfo *info = NULL; -@@ -193,14 +220,13 @@ SGXInfo *qmp_query_sgx(Error **errp) - return NULL; - } - -- SGXEPCState *sgx_epc = &pcms->sgx_epc; - info = g_new0(SGXInfo, 1); - - info->sgx = true; - info->sgx1 = true; - info->sgx2 = true; - info->flc = true; -- info->section_size = sgx_epc->size; -+ info->sections = sgx_get_epc_sections_list(); - - return info; - } -@@ -208,6 +234,7 @@ SGXInfo *qmp_query_sgx(Error **errp) - void hmp_info_sgx(Monitor *mon, const QDict *qdict) - { - Error *err = NULL; -+ SGXEPCSectionList *section_list, *section; - g_autoptr(SGXInfo) info = qmp_query_sgx(&err); - - if (err) { -@@ -222,8 +249,14 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) - info->sgx2 ? "enabled" : "disabled"); - monitor_printf(mon, "FLC support: %s\n", - info->flc ? "enabled" : "disabled"); -- monitor_printf(mon, "size: %" PRIu64 "\n", -- info->section_size); -+ -+ section_list = info->sections; -+ for (section = section_list; section; section = section->next) { -+ monitor_printf(mon, "NUMA node #%" PRId64 ": ", -+ section->value->node); -+ monitor_printf(mon, "size=%" PRIu64 "\n", -+ section->value->size); -+ } - } - - bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) -diff --git a/qapi/misc-target.json b/qapi/misc-target.json -index 5aa2b95b7d..1022aa0184 100644 ---- a/qapi/misc-target.json -+++ b/qapi/misc-target.json -@@ -337,6 +337,21 @@ - 'if': 'TARGET_ARM' } - - -+## -+# @SGXEPCSection: -+# -+# Information about intel SGX EPC section info -+# -+# @node: the numa node -+# -+# @size: the size of epc section -+# -+# Since: 6.2 -+## -+{ 'struct': 'SGXEPCSection', -+ 'data': { 'node': 'int', -+ 'size': 'uint64'}} -+ - ## - # @SGXInfo: - # -@@ -350,7 +365,7 @@ - # - # @flc: true if FLC is supported - # --# @section-size: The EPC section size for guest -+# @sections: The EPC sections info for guest - # - # Since: 6.2 - ## -@@ -359,7 +374,7 @@ - 'sgx1': 'bool', - 'sgx2': 'bool', - 'flc': 'bool', -- 'section-size': 'uint64'}, -+ 'sections': ['SGXEPCSection']}, - 'if': 'TARGET_I386' } - - ## --- -2.27.0 - diff --git a/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch b/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch deleted file mode 100644 index 9e58f6c..0000000 --- a/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch +++ /dev/null @@ -1,213 +0,0 @@ -From a6a327ae392c02b8e8c75b5d702d929ff8fe408d Mon Sep 17 00:00:00 2001 -From: Yang Zhong -Date: Thu, 20 Jan 2022 17:31:04 -0500 -Subject: [PATCH 05/12] qapi: Cleanup SGX related comments and restore - @section-size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paul Lai -RH-MergeRequest: 65: Enable SGX and add SGX Numa support -RH-Commit: [5/5] 0d3b9f37cd3cce202050ba3bd51eef4410ef3d38 -RH-Bugzilla: 2033708 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Bandan Das -RH-Acked-by: Cornelia Huck - -The SGX NUMA patches were merged into Qemu 7.0 release, we need -clarify detailed version history information and also change -some related comments, which make SGX related comments clearer. - -The QMP command schema promises backwards compatibility as standard. -We temporarily restore "@section-size", which can avoid incompatible -API breakage. The "@section-size" will be deprecated in 7.2 version. - -Suggested-by: Daniel P. Berrangé -Signed-off-by: Yang Zhong -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20220120223104.437161-1-yang.zhong@intel.com> -Signed-off-by: Paolo Bonzini -Signed-off-by: Paul Lai ---- - docs/about/deprecated.rst | 13 +++++++++++++ - hw/i386/sgx.c | 11 +++++++++-- - qapi/machine.json | 4 ++-- - qapi/misc-target.json | 22 +++++++++++++++++----- - 4 files changed, 41 insertions(+), 9 deletions(-) - -diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst -index ff7488cb63..33925edf45 100644 ---- a/docs/about/deprecated.rst -+++ b/docs/about/deprecated.rst -@@ -270,6 +270,19 @@ accepted incorrect commands will return an error. Users should make sure that - all arguments passed to ``device_add`` are consistent with the documented - property types. - -+``query-sgx`` return value member ``section-size`` (since 7.0) -+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' -+ -+Member ``section-size`` in return value elements with meta-type ``uint64`` is -+deprecated. Use ``sections`` instead. -+ -+ -+``query-sgx-capabilities`` return value member ``section-size`` (since 7.0) -+''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' -+ -+Member ``section-size`` in return value elements with meta-type ``uint64`` is -+deprecated. Use ``sections`` instead. -+ - System accelerators - ------------------- - -diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c -index 5de5dd0893..a2b318dd93 100644 ---- a/hw/i386/sgx.c -+++ b/hw/i386/sgx.c -@@ -83,7 +83,7 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) - ((high & MAKE_64BIT_MASK(0, 20)) << 32); - } - --static SGXEPCSectionList *sgx_calc_host_epc_sections(void) -+static SGXEPCSectionList *sgx_calc_host_epc_sections(uint64_t *size) - { - SGXEPCSectionList *head = NULL, **tail = &head; - SGXEPCSection *section; -@@ -106,6 +106,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void) - section = g_new0(SGXEPCSection, 1); - section->node = j++; - section->size = sgx_calc_section_metric(ecx, edx); -+ *size += section->size; - QAPI_LIST_APPEND(tail, section); - } - -@@ -156,6 +157,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) - { - SGXInfo *info = NULL; - uint32_t eax, ebx, ecx, edx; -+ uint64_t size = 0; - - int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); - if (fd < 0) { -@@ -173,7 +175,8 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) - info->sgx1 = eax & (1U << 0) ? true : false; - info->sgx2 = eax & (1U << 1) ? true : false; - -- info->sections = sgx_calc_host_epc_sections(); -+ info->sections = sgx_calc_host_epc_sections(&size); -+ info->section_size = size; - - close(fd); - -@@ -220,12 +223,14 @@ SGXInfo *qmp_query_sgx(Error **errp) - return NULL; - } - -+ SGXEPCState *sgx_epc = &pcms->sgx_epc; - info = g_new0(SGXInfo, 1); - - info->sgx = true; - info->sgx1 = true; - info->sgx2 = true; - info->flc = true; -+ info->section_size = sgx_epc->size; - info->sections = sgx_get_epc_sections_list(); - - return info; -@@ -249,6 +254,8 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) - info->sgx2 ? "enabled" : "disabled"); - monitor_printf(mon, "FLC support: %s\n", - info->flc ? "enabled" : "disabled"); -+ monitor_printf(mon, "size: %" PRIu64 "\n", -+ info->section_size); - - section_list = info->sections; - for (section = section_list; section; section = section->next) { -diff --git a/qapi/machine.json b/qapi/machine.json -index 16e771affc..a9f33d0f27 100644 ---- a/qapi/machine.json -+++ b/qapi/machine.json -@@ -1207,7 +1207,7 @@ - # - # @memdev: memory backend linked with device - # --# @node: the numa node -+# @node: the numa node (Since: 7.0) - # - # Since: 6.2 - ## -@@ -1288,7 +1288,7 @@ - # - # @memdev: memory backend linked with device - # --# @node: the numa node -+# @node: the numa node (Since: 7.0) - # - # Since: 6.2 - ## -diff --git a/qapi/misc-target.json b/qapi/misc-target.json -index 1022aa0184..4bc45d2474 100644 ---- a/qapi/misc-target.json -+++ b/qapi/misc-target.json -@@ -344,9 +344,9 @@ - # - # @node: the numa node - # --# @size: the size of epc section -+# @size: the size of EPC section - # --# Since: 6.2 -+# Since: 7.0 - ## - { 'struct': 'SGXEPCSection', - 'data': { 'node': 'int', -@@ -365,7 +365,13 @@ - # - # @flc: true if FLC is supported - # --# @sections: The EPC sections info for guest -+# @section-size: The EPC section size for guest -+# Redundant with @sections. Just for backward compatibility. -+# -+# @sections: The EPC sections info for guest (Since: 7.0) -+# -+# Features: -+# @deprecated: Member @section-size is deprecated. Use @sections instead. - # - # Since: 6.2 - ## -@@ -374,6 +380,8 @@ - 'sgx1': 'bool', - 'sgx2': 'bool', - 'flc': 'bool', -+ 'section-size': { 'type': 'uint64', -+ 'features': [ 'deprecated' ] }, - 'sections': ['SGXEPCSection']}, - 'if': 'TARGET_I386' } - -@@ -390,7 +398,9 @@ - # - # -> { "execute": "query-sgx" } - # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, --# "flc": true, "section-size" : 0 } } -+# "flc": true, "section-size" : 96468992, -+# "sections": [{"node": 0, "size": 67108864}, -+# {"node": 1, "size": 29360128}]} } - # - ## - { 'command': 'query-sgx', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } -@@ -408,7 +418,9 @@ - # - # -> { "execute": "query-sgx-capabilities" } - # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, --# "flc": true, "section-size" : 0 } } -+# "flc": true, "section-size" : 96468992, -+# "section" : [{"node": 0, "size": 67108864}, -+# {"node": 1, "size": 29360128}]} } - # - ## - { 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } --- -2.27.0 - diff --git a/kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch b/kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch deleted file mode 100644 index 2d67070..0000000 --- a/kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch +++ /dev/null @@ -1,108 +0,0 @@ -From a221f5a8ed02690687e6709c49ae0e1e01c5f466 Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Fri, 17 Dec 2021 17:46:54 +0100 -Subject: [PATCH 09/12] qemu-img: make is_allocated_sectors() more efficient - -RH-Author: Kevin Wolf -RH-MergeRequest: 70: qemu-img convert: Fix sparseness of output image -RH-Commit: [2/2] cc05aa4ac506b57ff9b430c007618cdf1485a03f (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 1882917 -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Hanna Reitz - -Consider the case when the whole buffer is zero and end is unaligned. - -If i <= tail, we return 1 and do one unaligned WRITE, RMW happens. - -If i > tail, we do on aligned WRITE_ZERO (or skip if target is zeroed) -and again one unaligned WRITE, RMW happens. - -Let's do better: don't fragment the whole-zero buffer and report it as -ZERO: in case of zeroed target we just do nothing and avoid RMW. If -target is not zeroes, one unaligned WRITE_ZERO should not be much worse -than one unaligned WRITE. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20211217164654.1184218-3-vsementsov@virtuozzo.com> -Tested-by: Peter Lieven -Signed-off-by: Kevin Wolf -(cherry picked from commit 96054c76ff2db74165385a69f234c57a6bbc941e) -Signed-off-by: Kevin Wolf ---- - qemu-img.c | 23 +++++++++++++++++++---- - tests/qemu-iotests/122.out | 8 ++------ - 2 files changed, 21 insertions(+), 10 deletions(-) - -diff --git a/qemu-img.c b/qemu-img.c -index f036a1d428..d7ddfcc528 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -1171,19 +1171,34 @@ static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum, - } - } - -+ if (i == n) { -+ /* -+ * The whole buf is the same. -+ * No reason to split it into chunks, so return now. -+ */ -+ *pnum = i; -+ return !is_zero; -+ } -+ - tail = (sector_num + i) & (alignment - 1); - if (tail) { - if (is_zero && i <= tail) { -- /* treat unallocated areas which only consist -- * of a small tail as allocated. */ -+ /* -+ * For sure next sector after i is data, and it will rewrite this -+ * tail anyway due to RMW. So, let's just write data now. -+ */ - is_zero = false; - } - if (!is_zero) { -- /* align up end offset of allocated areas. */ -+ /* If possible, align up end offset of allocated areas. */ - i += alignment - tail; - i = MIN(i, n); - } else { -- /* align down end offset of zero areas. */ -+ /* -+ * For sure next sector after i is data, and it will rewrite this -+ * tail anyway due to RMW. Better is avoid RMW and write zeroes up -+ * to aligned bound. -+ */ - i -= tail; - } - } -diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out -index 69b8e8b803..e18766e167 100644 ---- a/tests/qemu-iotests/122.out -+++ b/tests/qemu-iotests/122.out -@@ -201,9 +201,7 @@ convert -S 4k - { "start": 8192, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, - { "start": 12288, "length": 4096, "depth": 0, "present": false, "zero": true, "data": false}, - { "start": 16384, "length": 4096, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, --{ "start": 20480, "length": 46080, "depth": 0, "present": false, "zero": true, "data": false}, --{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, --{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}] -+{ "start": 20480, "length": 67088384, "depth": 0, "present": false, "zero": true, "data": false}] - - convert -c -S 4k - [{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true}, -@@ -215,9 +213,7 @@ convert -c -S 4k - - convert -S 8k - [{ "start": 0, "length": 24576, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, --{ "start": 24576, "length": 41984, "depth": 0, "present": false, "zero": true, "data": false}, --{ "start": 66560, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "offset": OFFSET}, --{ "start": 67584, "length": 67041280, "depth": 0, "present": false, "zero": true, "data": false}] -+{ "start": 24576, "length": 67084288, "depth": 0, "present": false, "zero": true, "data": false}] - - convert -c -S 8k - [{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true}, --- -2.27.0 - diff --git a/kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch b/kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch deleted file mode 100644 index bc36f5c..0000000 --- a/kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 0f4592f79f8c24f84db18a8c39c6056b2a0be524 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Fri, 7 Jan 2022 11:54:19 +0100 -Subject: [PATCH 1/5] qemu-storage-daemon: Add vhost-user-blk help -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 63: qemu-storage-daemon: Add vhost-user-blk help -RH-Commit: [1/2] 6b08fec5d6ceea9f8f3810321099310069e08b53 (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 1962088 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Hanna Reitz - -Add missing vhost-user-blk help: - - $ qemu-storage-daemon -h - ... - --export [type=]vhost-user-blk,id=,node-name=, - addr.type=unix,addr.path=[,writable=on|off] - [,logical-block-size=][,num-queues=] - export the specified block node as a - vhosts-user-blk device over UNIX domain socket - --export [type=]vhost-user-blk,id=,node-name=, - fd,addr.str=[,writable=on|off] - [,logical-block-size=][,num-queues=] - export the specified block node as a - vhosts-user-blk device over file descriptor - ... - -Fixes: 90fc91d50b7 ("convert vhost-user-blk server to block export API") -Reported-by: Qing Wang -Reviewed-by: Eric Blake -Signed-off-by: Philippe Mathieu-Daudé -Signed-off-by: Philippe Mathieu-Daudé -Message-Id: <20220107105420.395011-3-f4bug@amsat.org> -Signed-off-by: Kevin Wolf -(cherry picked from commit c8cbc9524269d9583749aaaea8aa244add7e1900) -Signed-off-by: Kevin Wolf ---- - storage-daemon/qemu-storage-daemon.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c -index 52cf17e8ac..9d76d1114d 100644 ---- a/storage-daemon/qemu-storage-daemon.c -+++ b/storage-daemon/qemu-storage-daemon.c -@@ -104,6 +104,19 @@ static void help(void) - " export the specified block node over FUSE\n" - "\n" - #endif /* CONFIG_FUSE */ -+#ifdef CONFIG_VHOST_USER_BLK_SERVER -+" --export [type=]vhost-user-blk,id=,node-name=,\n" -+" addr.type=unix,addr.path=[,writable=on|off]\n" -+" [,logical-block-size=][,num-queues=]\n" -+" export the specified block node as a\n" -+" vhost-user-blk device over UNIX domain socket\n" -+" --export [type=]vhost-user-blk,id=,node-name=,\n" -+" fd,addr.str=[,writable=on|off]\n" -+" [,logical-block-size=][,num-queues=]\n" -+" export the specified block node as a\n" -+" vhost-user-blk device over file descriptor\n" -+"\n" -+#endif /* CONFIG_VHOST_USER_BLK_SERVER */ - " --monitor [chardev=]name[,mode=control][,pretty[=on|off]]\n" - " configure a QMP monitor\n" - "\n" --- -2.27.0 - diff --git a/kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch b/kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch deleted file mode 100644 index 798a27e..0000000 --- a/kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 20edf203c8cb314e27409918399aa7cbdc6fdb02 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 25 Jan 2022 16:15:14 +0100 -Subject: [PATCH 2/5] qemu-storage-daemon: Fix typo in vhost-user-blk help - -RH-Author: Kevin Wolf -RH-MergeRequest: 63: qemu-storage-daemon: Add vhost-user-blk help -RH-Commit: [2/2] b7afb670c398799b6e49b926e296771453a55fba (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 1962088 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Hanna Reitz - -The syntax of the fd passing case misses the "addr.type=" key. Add it. - -Signed-off-by: Kevin Wolf -Message-Id: <20220125151514.49035-1-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit e66e665f15736f5ee1fbd8087926cb0f1e52f61a) -Signed-off-by: Kevin Wolf ---- - storage-daemon/qemu-storage-daemon.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c -index 9d76d1114d..ec9aa79b55 100644 ---- a/storage-daemon/qemu-storage-daemon.c -+++ b/storage-daemon/qemu-storage-daemon.c -@@ -111,7 +111,7 @@ static void help(void) - " export the specified block node as a\n" - " vhost-user-blk device over UNIX domain socket\n" - " --export [type=]vhost-user-blk,id=,node-name=,\n" --" fd,addr.str=[,writable=on|off]\n" -+" addr.type=fd,addr.str=[,writable=on|off]\n" - " [,logical-block-size=][,num-queues=]\n" - " export the specified block node as a\n" - " vhost-user-blk device over file descriptor\n" --- -2.27.0 - diff --git a/kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch b/kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch deleted file mode 100644 index 407aa1e..0000000 --- a/kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 0e85c10b73c36f386723c842a797d5e2155e758f Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 10 Dec 2021 10:07:40 +0100 -Subject: [PATCH 1/2] redhat: Add rhel8.6.0 and rhel9.0.0 machine types for - s390x -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 55: redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x -RH-Commit: [1/1] ad8fd5c825ae52a8fbb3a28f700a514509c59978 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2008060 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Laurent Vivier -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2008060 - -The new machine types have better default values for the upcoming -"generation 16" mainframe. - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 26 +++++++++++++++++++++++++- - 1 file changed, 25 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index c654045964..9da6e9b1d4 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1103,10 +1103,33 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) - DEFINE_CCW_MACHINE(2_4, "2.4", false); - #endif - -+static void ccw_machine_rhel900_instance_options(MachineState *machine) -+{ -+} -+ -+static void ccw_machine_rhel900_class_options(MachineClass *mc) -+{ -+} -+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); -+ -+static void ccw_machine_rhel860_instance_options(MachineState *machine) -+{ -+ /* Note: The -rhel8.6.0 and -rhel9.0.0 machines are technically identical */ -+ ccw_machine_rhel900_instance_options(machine); -+} -+ -+static void ccw_machine_rhel860_class_options(MachineClass *mc) -+{ -+ ccw_machine_rhel900_class_options(mc); -+} -+DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", false); -+ - static void ccw_machine_rhel850_instance_options(MachineState *machine) - { - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; - -+ ccw_machine_rhel860_instance_options(machine); -+ - s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); - - s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); -@@ -1118,10 +1141,11 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) - - static void ccw_machine_rhel850_class_options(MachineClass *mc) - { -+ ccw_machine_rhel860_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); - mc->smp_props.prefer_sockets = true; - } --DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); -+DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); - - static void ccw_machine_rhel840_instance_options(MachineState *machine) - { --- -2.27.0 - diff --git a/kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch b/kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch deleted file mode 100644 index d3b2ba0..0000000 --- a/kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 24b44713de4b2a47c42dfeea813c8911694367a0 Mon Sep 17 00:00:00 2001 -From: David Hildenbrand -Date: Fri, 17 Dec 2021 10:00:53 +0100 -Subject: [PATCH 2/2] redhat: Enable virtio-mem as tech-preview on x86-64 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: David Hildenbrand -RH-MergeRequest: 56: redhat: Enable virtio-mem as tech-preview on x86-64 -RH-Commit: [1/1] 255749cd877b91ce1978b67a088f7c0e181fd85e -RH-Bugzilla: 2014484 -RH-Acked-by: Philippe Mathieu-Daudé -RH-Acked-by: Thomas Huth -RH-Acked-by: Gavin Shan - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2014484 -Upstream-status: RHEL-only - -Everything necesssary for tech-preview of virtio-mem in RHEL9 is -included in QEMU v6.2. Let's enable it via the config option -CONFIG_VIRTIO_MEM on x86-64. - -Signed-off-by: David Hildenbrand ---- - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index 1f7a9ab024..dc03fbb671 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -88,6 +88,7 @@ CONFIG_VGA_CIRRUS=y - CONFIG_VGA_PCI=y - CONFIG_VHOST_USER=y - CONFIG_VHOST_USER_BLK=y -+CONFIG_VIRTIO_MEM=y - CONFIG_VIRTIO_PCI=y - CONFIG_VIRTIO_VGA=y - CONFIG_VMMOUSE=y --- -2.27.0 - diff --git a/kvm-rhel-machine-types-x86-set-prefer_sockets.patch b/kvm-rhel-machine-types-x86-set-prefer_sockets.patch deleted file mode 100644 index 83c912d..0000000 --- a/kvm-rhel-machine-types-x86-set-prefer_sockets.patch +++ /dev/null @@ -1,52 +0,0 @@ -From ecadfaec992fda7f485522c9ee6e7c9b05614a22 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 7 Dec 2021 18:39:47 +0000 -Subject: [PATCH 2/2] rhel machine types/x86: set prefer_sockets - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 59: rhel machine types/x86: set prefer_sockets -RH-Commit: [1/1] 9bcd9e2c95154e39ef30a8a342ad6c713fa4f1fb (dagrh/c-9-s-qemu-kvm) -RH-Bugzilla: 2028623 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Acked-by: Cornelia Huck - -When I fixed up the machine types for 8.5 I missed the - prefer_sockets = true - -add them in; it looks like Power, ARM already have them, and I see them -in thuth's s390 patch. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/i386/pc_piix.c | 1 + - hw/i386/pc_q35.c | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 183b5d5464..fccb7f5fc9 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -973,6 +973,7 @@ static void pc_machine_rhel7_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - m->alias = "pc"; - m->is_default = 1; -+ m->smp_props.prefer_sockets = true; - } - - static void pc_init_rhel760(MachineState *machine) -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 0e7e885e78..3b748ddd7b 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -662,6 +662,7 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) - hw_compat_rhel_8_5_len); - compat_props_add(m->compat_props, pc_rhel_8_5_compat, - pc_rhel_8_5_compat_len); -+ m->smp_props.prefer_sockets = true; - } - - DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, --- -2.27.0 - diff --git a/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch b/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch deleted file mode 100644 index 4ddfbe9..0000000 --- a/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch +++ /dev/null @@ -1,130 +0,0 @@ -From 005339f7deaee639c38d30e5bf2235c292ce3937 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Wed, 5 Jan 2022 12:38:47 +0000 -Subject: [PATCH 3/3] softmmu: fix device deletion events with -device JSON - syntax -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 62: Fix hot unplug of devices created with -device JSON syntax -RH-Commit: [1/1] 980e505ba215b5f9324c107481c5bb257ae03f42 (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 2036669 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Laurent Vivier -RH-Acked-by: Jano Tomko - -The -device JSON syntax impl leaks a reference on the created -DeviceState instance. As a result when you hot-unplug the -device, the device_finalize method won't be called and thus -it will fail to emit the required DEVICE_DELETED event. - -A 'json-cli' feature was previously added against the -'device_add' QMP command QAPI schema to indicated to mgmt -apps that -device supported JSON syntax. Given the hotplug -bug that feature flag is not usable for its purpose, so -we add a new 'json-cli-hotplug' feature to indicate the --device supports JSON without breaking hotplug. - -Fixes: 5dacda5167560b3af8eadbce5814f60ba44b467e -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/802 -Signed-off-by: Daniel P. Berrangé -Message-Id: <20220105123847.4047954-2-berrange@redhat.com> -Reviewed-by: Laurent Vivier -Tested-by: Ján Tomko -Reviewed-by: Thomas Huth -Signed-off-by: Kevin Wolf -(cherry picked from commit 64b4529a432507ee84a924be69a03432639e87ba) -Signed-off-by: Kevin Wolf ---- - qapi/qdev.json | 5 ++++- - softmmu/vl.c | 4 +++- - tests/qtest/device-plug-test.c | 19 +++++++++++++++++++ - 3 files changed, 26 insertions(+), 2 deletions(-) - -diff --git a/qapi/qdev.json b/qapi/qdev.json -index 69656b14df..26cd10106b 100644 ---- a/qapi/qdev.json -+++ b/qapi/qdev.json -@@ -44,6 +44,9 @@ - # @json-cli: If present, the "-device" command line option supports JSON - # syntax with a structure identical to the arguments of this - # command. -+# @json-cli-hotplug: If present, the "-device" command line option supports JSON -+# syntax without the reference counting leak that broke -+# hot-unplug - # - # Notes: - # -@@ -74,7 +77,7 @@ - { 'command': 'device_add', - 'data': {'driver': 'str', '*bus': 'str', '*id': 'str'}, - 'gen': false, # so we can get the additional arguments -- 'features': ['json-cli'] } -+ 'features': ['json-cli', 'json-cli-hotplug'] } - - ## - # @device_del: -diff --git a/softmmu/vl.c b/softmmu/vl.c -index d46b8fb4ab..b3829e2edd 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -2690,6 +2690,7 @@ static void qemu_create_cli_devices(void) - qemu_opts_foreach(qemu_find_opts("device"), - device_init_func, NULL, &error_fatal); - QTAILQ_FOREACH(opt, &device_opts, next) { -+ DeviceState *dev; - loc_push_restore(&opt->loc); - /* - * TODO Eventually we should call qmp_device_add() here to make sure it -@@ -2698,7 +2699,8 @@ static void qemu_create_cli_devices(void) - * from the start, so call qdev_device_add_from_qdict() directly for - * now. - */ -- qdev_device_add_from_qdict(opt->opts, true, &error_fatal); -+ dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal); -+ object_unref(OBJECT(dev)); - loc_pop(&opt->loc); - } - rom_reset_order_override(); -diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c -index 559d47727a..ad79bd4c14 100644 ---- a/tests/qtest/device-plug-test.c -+++ b/tests/qtest/device-plug-test.c -@@ -77,6 +77,23 @@ static void test_pci_unplug_request(void) - qtest_quit(qtest); - } - -+static void test_pci_unplug_json_request(void) -+{ -+ QTestState *qtest = qtest_initf( -+ "-device '{\"driver\": \"virtio-mouse-pci\", \"id\": \"dev0\"}'"); -+ -+ /* -+ * Request device removal. As the guest is not running, the request won't -+ * be processed. However during system reset, the removal will be -+ * handled, removing the device. -+ */ -+ device_del(qtest, "dev0"); -+ system_reset(qtest); -+ wait_device_deleted_event(qtest, "dev0"); -+ -+ qtest_quit(qtest); -+} -+ - static void test_ccw_unplug(void) - { - QTestState *qtest = qtest_initf("-device virtio-balloon-ccw,id=dev0"); -@@ -145,6 +162,8 @@ int main(int argc, char **argv) - */ - qtest_add_func("/device-plug/pci-unplug-request", - test_pci_unplug_request); -+ qtest_add_func("/device-plug/pci-unplug-json-request", -+ test_pci_unplug_json_request); - - if (!strcmp(arch, "s390x")) { - qtest_add_func("/device-plug/ccw-unplug", --- -2.27.0 - diff --git a/kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch b/kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch deleted file mode 100644 index 6b7173e..0000000 --- a/kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch +++ /dev/null @@ -1,63 +0,0 @@ -From c5ff43026547ea20fbb496c5b6734b7e64362151 Mon Sep 17 00:00:00 2001 -From: Gerd Hoffmann -Date: Mon, 14 Feb 2022 12:37:49 +0100 -Subject: [PATCH 2/6] ui: avoid compiler warnings from unused clipboard info - variable -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -RH-MergeRequest: 75: fix vnc cut+paste crash -RH-Commit: [2/4] 6a7982a0bab86bf843fd65842e730a61b2fa2cb0 (kraxel/centos-qemu-kvm) -RH-Bugzilla: 2042820 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Miroslav Rezanina - -With latest clang 13.0.0 we get - -../ui/clipboard.c:47:34: error: variable 'old' set but not used [-Werror,-Wunused-but-set-variable] - g_autoptr(QemuClipboardInfo) old = NULL; - ^ - -The compiler can't tell that we only declared this variable in -order to get the side effect of free'ing it when out of scope. - -This pattern is a little dubious for a use of g_autoptr, so -rewrite the code to avoid it. - -Reviewed-by: Richard Henderson -Reviewed-by: Philippe Mathieu-Daudé -Tested-by: Philippe Mathieu-Daudé -Signed-off-by: Daniel P. Berrangé -[AJB: fix merge conflict] -Signed-off-by: Alex Bennée -Message-Id: <20211215141949.3512719-2-berrange@redhat.com> -Message-Id: <20220105135009.1584676-2-alex.bennee@linaro.org> -(cherry picked from commit 70a54b01693eda3c61814b05d699aba41015ac48) ---- - ui/clipboard.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/ui/clipboard.c b/ui/clipboard.c -index d7b008d62a..7672058e84 100644 ---- a/ui/clipboard.c -+++ b/ui/clipboard.c -@@ -44,12 +44,11 @@ void qemu_clipboard_peer_release(QemuClipboardPeer *peer, - - void qemu_clipboard_update(QemuClipboardInfo *info) - { -- g_autoptr(QemuClipboardInfo) old = NULL; - assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT); - - notifier_list_notify(&clipboard_notifiers, info); - -- old = cbinfo[info->selection]; -+ qemu_clipboard_info_unref(cbinfo[info->selection]); - cbinfo[info->selection] = qemu_clipboard_info_ref(info); - } - --- -2.27.0 - diff --git a/kvm-ui-clipboard-fix-use-after-free-regression.patch b/kvm-ui-clipboard-fix-use-after-free-regression.patch deleted file mode 100644 index e14cb23..0000000 --- a/kvm-ui-clipboard-fix-use-after-free-regression.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 965275cd87f8008f129509c6d6fd0096e8ac2d96 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Mon, 14 Feb 2022 15:59:17 +0400 -Subject: [PATCH 3/6] ui/clipboard: fix use-after-free regression -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -RH-MergeRequest: 75: fix vnc cut+paste crash -RH-Commit: [3/4] d8f68e0eb60d9aaa9a703d969f215816bf35f6f0 (kraxel/centos-qemu-kvm) -RH-Bugzilla: 2042820 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Miroslav Rezanina - -The same info may be used to update the clipboard, and may be freed -before being ref'ed again. - -Fixes: 70a54b01693ed ("ui: avoid compiler warnings from unused clipboard info variable") - -Signed-off-by: Marc-André Lureau -Reviewed-by: Daniel P. Berrangé -Message-Id: <20220214115917.1679568-1-marcandre.lureau@redhat.com> -Signed-off-by: Gerd Hoffmann ---- - ui/clipboard.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/ui/clipboard.c b/ui/clipboard.c -index 7672058e84..d7dae13760 100644 ---- a/ui/clipboard.c -+++ b/ui/clipboard.c -@@ -48,8 +48,10 @@ void qemu_clipboard_update(QemuClipboardInfo *info) - - notifier_list_notify(&clipboard_notifiers, info); - -- qemu_clipboard_info_unref(cbinfo[info->selection]); -- cbinfo[info->selection] = qemu_clipboard_info_ref(info); -+ if (cbinfo[info->selection] != info) { -+ qemu_clipboard_info_unref(cbinfo[info->selection]); -+ cbinfo[info->selection] = qemu_clipboard_info_ref(info); -+ } - } - - QemuClipboardInfo *qemu_clipboard_info(QemuClipboardSelection selection) --- -2.27.0 - diff --git a/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch b/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch deleted file mode 100644 index 6d3802c..0000000 --- a/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 14582cfec72e52894f16ed5c3fb14adb2d6d8e25 Mon Sep 17 00:00:00 2001 -From: Rao Lei -Date: Wed, 5 Jan 2022 10:08:08 +0800 -Subject: [PATCH 4/6] ui/vnc.c: Fixed a deadlock bug. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gerd Hoffmann -RH-MergeRequest: 75: fix vnc cut+paste crash -RH-Commit: [4/4] 5321e447de974d91e9a6c0cf01f4352166ffb7ce (kraxel/centos-qemu-kvm) -RH-Bugzilla: 2042820 -RH-Acked-by: Marc-André Lureau -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Miroslav Rezanina - -The GDB statck is as follows: -(gdb) bt -0 __lll_lock_wait (futex=futex@entry=0x56211df20360, private=0) at lowlevellock.c:52 -1 0x00007f263caf20a3 in __GI___pthread_mutex_lock (mutex=0x56211df20360) at ../nptl/pthread_mutex_lock.c:80 -2 0x000056211a757364 in qemu_mutex_lock_impl (mutex=0x56211df20360, file=0x56211a804857 "../ui/vnc-jobs.h", line=60) - at ../util/qemu-thread-posix.c:80 -3 0x000056211a0ef8c7 in vnc_lock_output (vs=0x56211df14200) at ../ui/vnc-jobs.h:60 -4 0x000056211a0efcb7 in vnc_clipboard_send (vs=0x56211df14200, count=1, dwords=0x7ffdf1701338) at ../ui/vnc-clipboard.c:138 -5 0x000056211a0f0129 in vnc_clipboard_notify (notifier=0x56211df244c8, data=0x56211dd1bbf0) at ../ui/vnc-clipboard.c:209 -6 0x000056211a75dde8 in notifier_list_notify (list=0x56211afa17d0 , data=0x56211dd1bbf0) at ../util/notify.c:39 -7 0x000056211a0bf0e6 in qemu_clipboard_update (info=0x56211dd1bbf0) at ../ui/clipboard.c:50 -8 0x000056211a0bf05d in qemu_clipboard_peer_release (peer=0x56211df244c0, selection=QEMU_CLIPBOARD_SELECTION_CLIPBOARD) - at ../ui/clipboard.c:41 -9 0x000056211a0bef9b in qemu_clipboard_peer_unregister (peer=0x56211df244c0) at ../ui/clipboard.c:19 -10 0x000056211a0d45f3 in vnc_disconnect_finish (vs=0x56211df14200) at ../ui/vnc.c:1358 -11 0x000056211a0d4c9d in vnc_client_read (vs=0x56211df14200) at ../ui/vnc.c:1611 -12 0x000056211a0d4df8 in vnc_client_io (ioc=0x56211ce70690, condition=G_IO_IN, opaque=0x56211df14200) at ../ui/vnc.c:1649 -13 0x000056211a5b976c in qio_channel_fd_source_dispatch - (source=0x56211ce50a00, callback=0x56211a0d4d71 , user_data=0x56211df14200) at ../io/channel-watch.c:84 -14 0x00007f263ccede8e in g_main_context_dispatch () at /lib/x86_64-linux-gnu/libglib-2.0.so.0 -15 0x000056211a77d4a1 in glib_pollfds_poll () at ../util/main-loop.c:232 -16 0x000056211a77d51f in os_host_main_loop_wait (timeout=958545) at ../util/main-loop.c:255 -17 0x000056211a77d630 in main_loop_wait (nonblocking=0) at ../util/main-loop.c:531 -18 0x000056211a45bc8e in qemu_main_loop () at ../softmmu/runstate.c:726 -19 0x000056211a0b45fa in main (argc=69, argv=0x7ffdf1701778, envp=0x7ffdf17019a8) at ../softmmu/main.c:50 - -From the call trace, we can see it is a deadlock bug. -vnc_disconnect_finish will acquire the output_mutex. -But, the output_mutex will be acquired again in vnc_clipboard_send. -Repeated locking will cause deadlock. So, I move -qemu_clipboard_peer_unregister() behind vnc_unlock_output(); - -Fixes: 0bf41cab93e ("ui/vnc: clipboard support") -Signed-off-by: Lei Rao -Reviewed-by: Marc-André Lureau -Message-Id: <20220105020808.597325-1-lei.rao@intel.com> -Signed-off-by: Gerd Hoffmann -(cherry picked from commit 1dbbe6f172810026c51dc84ed927a3cc23017949) ---- - ui/vnc.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/ui/vnc.c b/ui/vnc.c -index af02522e84..b253e85c65 100644 ---- a/ui/vnc.c -+++ b/ui/vnc.c -@@ -1354,12 +1354,12 @@ void vnc_disconnect_finish(VncState *vs) - /* last client gone */ - vnc_update_server_surface(vs->vd); - } -+ vnc_unlock_output(vs); -+ - if (vs->cbpeer.update.notify) { - qemu_clipboard_peer_unregister(&vs->cbpeer); - } - -- vnc_unlock_output(vs); -- - qemu_mutex_destroy(&vs->output_mutex); - if (vs->bh != NULL) { - qemu_bh_delete(vs->bh); --- -2.27.0 - diff --git a/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch b/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch deleted file mode 100644 index 539b8fe..0000000 --- a/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 846192d22a1ddfa87682bb0b67febef5c30c9743 Mon Sep 17 00:00:00 2001 -From: Vivek Goyal -Date: Tue, 25 Jan 2022 13:51:14 -0500 -Subject: [PATCH 3/5] virtiofsd: Drop membership of all supplementary groups - (CVE-2022-0358) - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 66: c9s: virtiofsd security fix - drop secondary groups -RH-Commit: [1/1] cdf3b0405ea3369933e76761890f16b040641036 (redhat/centos-stream/src/qemu-kvm) -RH-Bugzilla: 2046201 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Sergio Lopez -RH-Acked-by: Vivek Goyal - -At the start, drop membership of all supplementary groups. This is -not required. - -If we have membership of "root" supplementary group and when we switch -uid/gid using setresuid/setsgid, we still retain membership of existing -supplemntary groups. And that can allow some operations which are not -normally allowed. - -For example, if root in guest creates a dir as follows. - -$ mkdir -m 03777 test_dir - -This sets SGID on dir as well as allows unprivileged users to write into -this dir. - -And now as unprivileged user open file as follows. - -$ su test -$ fd = open("test_dir/priviledge_id", O_RDWR|O_CREAT|O_EXCL, 02755); - -This will create SGID set executable in test_dir/. - -And that's a problem because now an unpriviliged user can execute it, -get egid=0 and get access to resources owned by "root" group. This is -privilege escalation. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2044863 -Fixes: CVE-2022-0358 -Reported-by: JIETAO XIAO -Suggested-by: Miklos Szeredi -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Vivek Goyal -Message-Id: -Signed-off-by: Dr. David Alan Gilbert - dgilbert: Fixed missing {}'s style nit -(cherry picked from commit 449e8171f96a6a944d1f3b7d3627ae059eae21ca) ---- - tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c -index 64b5b4fbb1..b3d0674f6d 100644 ---- a/tools/virtiofsd/passthrough_ll.c -+++ b/tools/virtiofsd/passthrough_ll.c -@@ -54,6 +54,7 @@ - #include - #include - #include -+#include - - #include "qemu/cutils.h" - #include "passthrough_helpers.h" -@@ -1161,6 +1162,30 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) - #define OURSYS_setresuid SYS_setresuid - #endif - -+static void drop_supplementary_groups(void) -+{ -+ int ret; -+ -+ ret = getgroups(0, NULL); -+ if (ret == -1) { -+ fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n", -+ errno, strerror(errno)); -+ exit(1); -+ } -+ -+ if (!ret) { -+ return; -+ } -+ -+ /* Drop all supplementary groups. We should not need it */ -+ ret = setgroups(0, NULL); -+ if (ret == -1) { -+ fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n", -+ errno, strerror(errno)); -+ exit(1); -+ } -+} -+ - /* - * Change to uid/gid of caller so that file is created with - * ownership of caller. -@@ -3926,6 +3951,8 @@ int main(int argc, char *argv[]) - - qemu_init_exec_dir(argv[0]); - -+ drop_supplementary_groups(); -+ - pthread_mutex_init(&lo.mutex, NULL); - lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); - lo.root.fd = -1; --- -2.27.0 - diff --git a/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch b/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch deleted file mode 100644 index eb3273c..0000000 --- a/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 1b8eeb1323fa21c7b26d0396fae5ae4a8cdb1ace Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 11 Jan 2022 18:29:31 +0000 -Subject: [PATCH 1/3] x86: Add q35 RHEL 8.6.0 machine type - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 61: x86: Add rhel 8.6.0 & 9.0.0 machine types -RH-Commit: [1/2] 189335cf0e4ad117e3e401f23aa07cddbbac50df (dagrh/c-9-s-qemu-kvm) -RH-Bugzilla: 1945666 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cornelia Huck - -Add the new 8.6.0 machine type; note that while the -AV -notation has gone in the product naming, just keep the smbios -definitions the same for consistency. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/i386/pc_q35.c | 21 ++++++++++++++++++++- - 1 file changed, 20 insertions(+), 1 deletion(-) - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 3b748ddd7b..0c25305f15 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -646,6 +646,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - -+static void pc_q35_init_rhel860(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel860_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL-AV"; -+ pcmc->smbios_stream_version = "8.6.0"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, -+ pc_q35_machine_rhel860_options); -+ -+ - static void pc_q35_init_rhel850(MachineState *machine) - { - pc_q35_init(machine); -@@ -654,8 +672,9 @@ static void pc_q35_init_rhel850(MachineState *machine) - static void pc_q35_machine_rhel850_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel_options(m); -+ pc_q35_machine_rhel860_options(m); - m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.5.0"; - compat_props_add(m->compat_props, hw_compat_rhel_8_5, --- -2.27.0 - diff --git a/kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch b/kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch deleted file mode 100644 index 4367495..0000000 --- a/kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 3d5024fb9c904a649d07f0def3a90b3d36611215 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Wed, 12 Jan 2022 13:21:57 +0000 -Subject: [PATCH 2/3] x86: Add q35 RHEL 9.0.0 machine type - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 61: x86: Add rhel 8.6.0 & 9.0.0 machine types -RH-Commit: [2/2] 743378502459b978efd632271f97ddb824422203 (dagrh/c-9-s-qemu-kvm) -RH-Bugzilla: 1945666 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cornelia Huck - -Add a rhel-9.0.0 q35 machine type; it's currently identical to 8.6.0; -but having a separate machine type will make life easier in the future -when the 8.x types go away. - -Note: The smbios stream product name has now changed to 'RHEL' - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=1945666 -Signed-off-by: Dr. David Alan Gilbert ---- - hw/i386/pc_q35.c | 21 +++++++++++++++++++-- - 1 file changed, 19 insertions(+), 2 deletions(-) - -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 0c25305f15..bf9ad32f0e 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -646,6 +646,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - -+static void pc_q35_init_rhel900(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel900_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL"; -+ pcmc->smbios_stream_version = "9.0.0"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, -+ pc_q35_machine_rhel900_options); -+ - static void pc_q35_init_rhel860(MachineState *machine) - { - pc_q35_init(machine); -@@ -654,8 +671,9 @@ static void pc_q35_init_rhel860(MachineState *machine) - static void pc_q35_machine_rhel860_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel_options(m); -+ pc_q35_machine_rhel900_options(m); - m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.6.0"; - } -@@ -674,7 +692,6 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - pc_q35_machine_rhel860_options(m); - m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; -- m->alias = NULL; - pcmc->smbios_stream_product = "RHEL-AV"; - pcmc->smbios_stream_version = "8.5.0"; - compat_props_add(m->compat_props, hw_compat_rhel_8_5, --- -2.27.0 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 58fc073..b75c653 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -101,7 +101,7 @@ %global target_list %{kvm_target}-softmmu %global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress -%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https,ssh +%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https %define qemudocdir %{_docdir}/%{name} %global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios" @@ -150,8 +150,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 6.2.0 -Release: 13%{?rcrel}%{?dist}%{?cc_suffix} +Version: 7.0.0 +Release: 1%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -176,122 +176,24 @@ Source31: kvm-x86.conf Source36: README.tests -Patch0001: 0001-ui-clipboard-Don-t-use-g_autoptr-just-to-free-a-vari.patch -Patch0005: 0005-Initial-redhat-build.patch -Patch0006: 0006-Enable-disable-devices-for-RHEL.patch -Patch0007: 0007-Machine-type-related-general-changes.patch -Patch0008: 0008-Add-aarch64-machine-types.patch -Patch0009: 0009-Add-ppc64-machine-types.patch -Patch0010: 0010-Add-s390x-machine-types.patch -Patch0011: 0011-Add-x86_64-machine-types.patch -Patch0012: 0012-Enable-make-check.patch -Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0014: 0014-Add-support-statement-to-help-output.patch -Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch -Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0019: 0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0020: 0020-Fix-virtio-net-pci-vectors-compat.patch -Patch0021: 0021-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch -Patch0022: 0022-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch -Patch0023: 0023-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch -Patch0024: 0024-redhat-Add-s390x-machine-type-compatibility-handling.patch -Patch0025: 0025-compat-Update-hw_compat_rhel_8_5-with-6.2.0-RC2-chan.patch -# For bz#2008060 - Fix CPU Model for new IBM Z Hardware - qemu part -Patch26: kvm-redhat-Add-rhel8.6.0-and-rhel9.0.0-machine-types-for.patch -# For bz#2014484 - [RHEL9] Enable virtio-mem as tech-preview on x86-64 - QEMU -Patch27: kvm-redhat-Enable-virtio-mem-as-tech-preview-on-x86-64.patch -# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] -Patch28: kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch -# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] -Patch29: kvm-hw-arm-virt-Register-its-as-a-class-property.patch -# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] -Patch30: kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch -# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] -Patch31: kvm-hw-arm-virt-Expose-the-RAS-option.patch -# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] -Patch32: kvm-hw-arm-virt-Add-9.0-machine-type-and-remove-8.5-one.patch -# For bz#2031044 - Add rhel-9.0.0 machine types for RHEL 9.0 [aarch64] -Patch33: kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch -# For bz#2024544 - Fio workers hangs when running fio with 32 jobs iodepth 32 and QEMU's userspace NVMe driver -Patch34: kvm-block-nvme-fix-infinite-loop-in-nvme_free_req_queue_.patch -# For bz#2028623 - [9.0] machine types: 6.2: Fix prefer_sockets -Patch35: kvm-rhel-machine-types-x86-set-prefer_sockets.patch -# For bz#1945666 - 9.0: x86 machine types -Patch36: kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch -# For bz#1945666 - 9.0: x86 machine types -Patch37: kvm-x86-Add-q35-RHEL-9.0.0-machine-type.patch -# For bz#2036669 - DEVICE_DELETED event is not delivered for device frontend if -device is configured via JSON -Patch38: kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch -# For bz#1962088 - [QSD] wrong help message for the fuse -Patch39: kvm-qemu-storage-daemon-Add-vhost-user-blk-help.patch -# For bz#1962088 - [QSD] wrong help message for the fuse -Patch40: kvm-qemu-storage-daemon-Fix-typo-in-vhost-user-blk-help.patch -# For bz#2046201 - CVE-2022-0358 qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-9.0] -Patch41: kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch -# For bz#2034791 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD -Patch42: kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch -# For bz#2034791 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD -Patch43: kvm-block-rbd-workaround-for-ceph-issue-53784.patch -# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support -Patch44: kvm-numa-Enable-numa-for-SGX-EPC-sections.patch -# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support -Patch45: kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch -# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support -Patch46: kvm-doc-Add-the-SGX-numa-description.patch -# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support -Patch47: kvm-Enable-SGX-RH-Only.patch -# For bz#2033708 - [Intel 9.0 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support -Patch48: kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch -# For bz#2041461 - Inconsistent block status reply in qemu-nbd -Patch49: kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch -# For bz#2041461 - Inconsistent block status reply in qemu-nbd -Patch50: kvm-iotests-block-status-cache-New-test.patch -# For bz#1882917 - the target image size is incorrect when converting a badly fragmented file -Patch51: kvm-iotests-Test-qemu-img-convert-of-zeroed-data-cluster.patch -# For bz#1882917 - the target image size is incorrect when converting a badly fragmented file -Patch52: kvm-qemu-img-make-is_allocated_sectors-more-efficient.patch -# For bz#2040123 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage -Patch53: kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch -# For bz#2040123 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage -Patch54: kvm-iotests-stream-error-on-reset-New-test.patch -# For bz#2042481 - [aarch64] Launch guest with "default-bus-bypass-iommu=off,iommu=smmuv3" and "iommu_platform=on", guest hangs after system_reset -Patch55: kvm-hw-arm-smmuv3-Fix-device-reset.patch -# For bz#2046659 - qemu crash after execute blockdev-reopen with iothread -Patch56: kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch -# For bz#2046659 - qemu crash after execute blockdev-reopen with iothread -Patch57: kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch -# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node -Patch58: kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch -# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node -Patch59: kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch -# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node -Patch60: kvm-iotests.py-Add-QemuStorageDaemon-class.patch -# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node -Patch61: kvm-iotests-281-Test-lingering-timers.patch -# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node -Patch62: kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch -# For bz#2033626 - Qemu core dump when start guest with nbd node or do block jobs to nbd node -Patch63: kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch -# For bz#2042820 - qemu crash when try to copy and paste contents from client to VM -Patch64: kvm-Revert-ui-clipboard-Don-t-use-g_autoptr-just-to-free.patch -# For bz#2042820 - qemu crash when try to copy and paste contents from client to VM -Patch65: kvm-ui-avoid-compiler-warnings-from-unused-clipboard-inf.patch -# For bz#2042820 - qemu crash when try to copy and paste contents from client to VM -Patch66: kvm-ui-clipboard-fix-use-after-free-regression.patch -# For bz#2042820 - qemu crash when try to copy and paste contents from client to VM -Patch67: kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch -# For bz#2044818 - Qemu Core Dumped when migrate -> migrate_cancel -> migrate again during guest is paused -Patch68: kvm-memory-Fix-incorrect-calls-of-log_global_start-stop.patch -# For bz#2044818 - Qemu Core Dumped when migrate -> migrate_cancel -> migrate again during guest is paused -Patch69: kvm-memory-Fix-qemu-crash-on-starting-dirty-log-twice-wi.patch -# For bz#2062813 - Mark all RHEL-8 and earlier machine types as deprecated [rhel-9.1.0] -Patch70: kvm-RHEL-mark-old-machine-types-as-deprecated.patch -# For bz#2062828 - [virtual network][rhel9][vDPA] qemu crash after hot unplug vdpa device [rhel-9.1.0] -Patch71: kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch -# For bz#2065589 - RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-9.1.0] -Patch72: kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch +Patch0004: 0004-Initial-redhat-build.patch +Patch0005: 0005-Enable-disable-devices-for-RHEL.patch +Patch0006: 0006-Machine-type-related-general-changes.patch +Patch0007: 0007-Add-aarch64-machine-types.patch +Patch0008: 0008-Add-ppc64-machine-types.patch +Patch0009: 0009-Add-s390x-machine-types.patch +Patch0010: 0010-Add-x86_64-machine-types.patch +Patch0011: 0011-Enable-make-check.patch +Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0013: 0013-Add-support-statement-to-help-output.patch +Patch0014: 0014-globally-limit-the-maximum-number-of-CPUs.patch +Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0016: 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0018: 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +Patch0019: 0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch +Patch0020: 0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch +Patch0021: 0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch # Source-git patches @@ -322,7 +224,6 @@ BuildRequires: python3-sphinx_rtd_theme BuildRequires: libseccomp-devel >= %{libseccomp_version} # For network block driver BuildRequires: libcurl-devel -BuildRequires: libssh-devel %if %{have_block_rbd} BuildRequires: librbd-devel %endif @@ -513,16 +414,6 @@ using the rbd protocol. %endif -%package block-ssh -Summary: QEMU SSH block driver -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description block-ssh -This package provides the additional SSH block driver for QEMU. - -Install this package if you want to access remote disks using -the Secure Shell (SSH) protocol. - - %package audio-pa Summary: QEMU PulseAudio audio driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} @@ -646,6 +537,7 @@ mkdir -p %{qemu_kvm_build} --disable-crypto-afalg \\\ --disable-curl \\\ --disable-curses \\\ + --disable-dbus-display \\\ --disable-debug-info \\\ --disable-debug-mutex \\\ --disable-debug-tcg \\\ @@ -676,7 +568,6 @@ mkdir -p %{qemu_kvm_build} --disable-libssh \\\ --disable-libudev \\\ --disable-libusb \\\ - --disable-libxml2 \\\ --disable-linux-aio \\\ --disable-linux-io-uring \\\ --disable-linux-user \\\ @@ -702,6 +593,7 @@ mkdir -p %{qemu_kvm_build} --disable-pvrdma \\\ --disable-qcow1 \\\ --disable-qed \\\ + --disable-qga-vss \\\ --disable-qom-cast-debug \\\ --disable-rbd \\\ --disable-rdma \\\ @@ -751,7 +643,6 @@ mkdir -p %{qemu_kvm_build} --disable-whpx \\\ --disable-xen \\\ --disable-xen-pci-passthrough \\\ - --disable-xfsctl \\\ --disable-xkbcommon \\\ --disable-zstd \\\ --with-git-submodules=ignore \\\ @@ -775,7 +666,7 @@ run_configure() { --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ - --meson="%{__meson}" \ + --meson="internal" \ --enable-trace-backend=dtrace \ --with-coroutine=ucontext \ --with-git=git \ @@ -803,9 +694,6 @@ run_configure \ --block-drv-ro-whitelist=%{block_drivers_ro_list} \ %endif --enable-attr \ -%ifarch %{ix86} x86_64 - --enable-avx2 \ -%endif --enable-cap-ng \ --enable-capstone=internal \ --enable-coroutine-pool \ @@ -822,7 +710,6 @@ run_configure \ %if %{have_pmem} --enable-libpmem \ %endif - --enable-libssh \ --enable-libusb \ --enable-libudev \ --enable-linux-aio \ @@ -875,7 +762,6 @@ run_configure \ --enable-safe-stack \ %endif - %if %{tools_only} %make_build qemu-img %make_build qemu-io @@ -980,7 +866,7 @@ cp -R %{qemu_kvm_build}/tests/avocado/* %{buildroot}%{testsdir}/tests/avocado/ # Install qemu.py and qmp/ scripts required to run avocado_qemu tests cp -R %{qemu_kvm_build}/python/qemu %{buildroot}%{testsdir}/python cp -R %{qemu_kvm_build}/scripts/qmp/* %{buildroot}%{testsdir}/scripts/qmp -install -p -m 0755 tests/Makefile.include %{buildroot}%{testsdir}/tests/ +install -p -m 0644 tests/Makefile.include %{buildroot}%{testsdir}/tests/ # Install qemu-iotests cp -R tests/qemu-iotests/* %{buildroot}%{testsdir}/tests/qemu-iotests/ @@ -1114,10 +1000,9 @@ rm -rf %{buildroot}%{_datadir}/%{name}/vgabios*bin rm -rf %{buildroot}%{_datadir}/%{name}/bios*.bin rm -rf %{buildroot}%{_datadir}/%{name}/sgabios.bin -# Remove virtiofsd (we use separate package for virtiofsd) -rm -rf %{buildroot}%{_mandir}/man1/virtiofsd.1* -rm -rf %{buildroot}%{_libexecdir}/virtiofsd -rm -rf %{buildroot}%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json +# Remove vof roms +rm -rf %{buildroot}%{_datadir}/%{name}/vof-nvram.bin +rm -rf %{buildroot}%{_datadir}/%{name}/vof.bin %if %{have_modules_load} install -D -p -m 644 %{_sourcedir}/modules-load.conf %{buildroot}%{_sysconfdir}/modules-load.d/kvm.conf @@ -1145,6 +1030,10 @@ rm -rf %{buildroot}%{qemudocdir}/specs # endif !tools_only %endif +# Remove virtiofsd (we use separate package for virtiofsd) +rm -rf %{buildroot}%{_mandir}/man1/virtiofsd.1* +rm -rf %{buildroot}%{_libexecdir}/virtiofsd +rm -rf %{buildroot}%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json %check %if !%{tools_only} @@ -1286,17 +1175,21 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files device-display-virtio-gpu %{_libdir}/%{name}/hw-display-virtio-gpu.so + %files device-display-virtio-gpu-gl %{_libdir}/%{name}/hw-display-virtio-gpu-gl.so + %ifarch s390x %files device-display-virtio-gpu-ccw %{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so %else %files device-display-virtio-gpu-pci %{_libdir}/%{name}/hw-display-virtio-gpu-pci.so + %files device-display-virtio-gpu-pci-gl %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so %endif + %ifarch x86_64 %{power64} %files device-display-virtio-vga %{_libdir}/%{name}/hw-display-virtio-vga.so @@ -1314,8 +1207,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files block-rbd %{_libdir}/%{name}/block-rbd.so %endif -%files block-ssh -%{_libdir}/%{name}/block-ssh.so %files audio-pa %{_libdir}/%{name}/audio-pa.so @@ -1330,7 +1221,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{_libdir}/%{name}/hw-usb-host.so %if %{have_usbredir} -%files device-usb-redirect +%files device-usb-redirect %{_libdir}/%{name}/hw-usb-redirect.so %endif @@ -1338,6 +1229,22 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Wed Apr 20 2022 Miroslav Rezanina - 7.0.0-1 +- Rebase to QEMU 7.0.0 [bz#2064757] +- Do not build ssh block driver anymore [bz#2064500] +- Removed hpet and parallel port support [bz#2065042] +- Compatibility support [bz#2064782 bz#2064771] +- Resolves: bz#2064757 + (Rebase to QEMU 7.0.0) +- Resolves: bz#2064500 + (Install qemu-kvm-6.2.0-11.el9_0.1 failed as conflict with qemu-kvm-block-ssh-6.2.0-11.el9_0.1) +- Resolves: bz#2065042 + (Remove upstream-only devices from the qemu-kvm binary) +- Resolves: bz#2064782 + (Update machine type compatibility for QEMU 7.0.0 update [s390x]) +- Resolves: bz#2064771 + (Update machine type compatibility for QEMU 7.0.0 update [x86_64]) + * Thu Apr 14 2022 Miroslav Rezanina - 6.2.0-13 - kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch [bz#2065589] - Resolves: bz#2065589 diff --git a/sources b/sources index f1545cf..9f53caf 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-6.2.0.tar.xz) = e9f8231c9e1cfcc41cb47f10a55d63f6b8aee307af00cf6acf64acb7aa4f49fa7e9d6330703a2abea15d8b7bbaba7d3cb08c83edd98d82642367b527df730817 +SHA512 (qemu-7.0.0.tar.xz) = 44ecd10c018a3763e1bc87d1d35b98890d0d5636acd69fe9b5cadf5024d5af6a31684d60cbe1c3370e02986434c1fb0ad99224e0e6f6fe7eda169992508157b1 From 60bf970e5dbb15647cbae9451a1bddf38a8b2e3e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 6 May 2022 06:14:47 +0000 Subject: [PATCH 157/195] * Fri May 06 2022 Miroslav Rezanina - 7.0.0-2 - kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch [bz#2044162] - kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch [bz#2081022] - Resolves: bz#2044162 ([RHEL9.1] Enable virtio-mem as tech-preview on ARM64 QEMU) - Resolves: bz#2081022 (Build regression on ppc64le with c9s qemu-kvm 7.0.0-1 changes) --- ...aarch64-softmmu-Enable-CONFIG_VIRTIO.patch | 41 ++++++++++++++++ ...odels-Fix-ppc_cpu_aliases-list-for-R.patch | 48 +++++++++++++++++++ qemu-kvm.spec | 14 +++++- 3 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch create mode 100644 kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch diff --git a/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch b/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch new file mode 100644 index 0000000..a948e57 --- /dev/null +++ b/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch @@ -0,0 +1,41 @@ +From 3a0e9bb88e82cc76ca5efc0595ce94b5dc34749e Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Mon, 25 Apr 2022 13:42:46 +0800 +Subject: [PATCH 1/2] configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM + +RH-Author: Gavin Shan +RH-MergeRequest: 80: Enable virtio-mem for aarch64 +RH-Commit: [1/1] 1afbd08da6d7c860da8d617a0a932d3660514878 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2044162 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: David Hildenbrand + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044162 + +This enables virtio-mem device on aarch64 since all needed commits +are ready. + + b1b87327a9 hw/arm/virt: Support for virtio-mem-pci + 1263615efe virtio-mem: Correct default THP size for ARM64 + +Signed-off-by: Gavin Shan +--- + configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +index 5f6ee1de5b..187938573f 100644 +--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -22,6 +22,7 @@ CONFIG_VFIO=y + CONFIG_VFIO_PCI=y + CONFIG_VIRTIO_MMIO=y + CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_MEM=y + CONFIG_XIO3130=y + CONFIG_NVDIMM=y + CONFIG_ACPI_APEI=y +-- +2.35.1 + diff --git a/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch b/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch new file mode 100644 index 0000000..c940cdb --- /dev/null +++ b/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch @@ -0,0 +1,48 @@ +From 39642d0d37e2ef61ce7fde0bc284d37a365e4482 Mon Sep 17 00:00:00 2001 +From: Murilo Opsfelder Araujo +Date: Mon, 2 May 2022 17:59:11 -0300 +Subject: [PATCH 2/2] target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Murilo Opsfelder Araújo +RH-MergeRequest: 81: target/ppc/cpu-models: remove extraneous "#endif" +RH-Commit: [1/1] 5fff003ad3deb84c6a8e69ab90552a31edb3b058 (mopsfelder/centos-stream-src-qemu-kvm) +RH-Bugzilla: 2081022 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier + +The commit b9d28ecdedaf ("Enable/disable devices for RHEL") removed the +"#if 0" from the beginning of the ppc_cpu_aliases list, which broke the +build on ppc64le: + + ../target/ppc/cpu-models.c:904:2: error: #endif without #if + #endif + ^ + 1 error generated. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2081022 + +Fixes: b9d28ecdedaf (Enable/disable devices for RHEL) +Signed-off-by: Murilo Opsfelder Araujo +--- + target/ppc/cpu-models.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index dd78883410..528467eac1 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -746,6 +746,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "405", "405d4" }, + { "405cr", "405crc" }, + { "405gp", "405gpd" }, +-- +2.35.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b75c653..38d4980 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 1%{?rcrel}%{?dist}%{?cc_suffix} +Release: 2%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -194,6 +194,10 @@ Patch0018: 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch Patch0019: 0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch Patch0020: 0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch Patch0021: 0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch +# For bz#2044162 - [RHEL9.1] Enable virtio-mem as tech-preview on ARM64 QEMU +Patch22: kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch +# For bz#2081022 - Build regression on ppc64le with c9s qemu-kvm 7.0.0-1 changes +Patch23: kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch # Source-git patches @@ -1229,6 +1233,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri May 06 2022 Miroslav Rezanina - 7.0.0-2 +- kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch [bz#2044162] +- kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch [bz#2081022] +- Resolves: bz#2044162 + ([RHEL9.1] Enable virtio-mem as tech-preview on ARM64 QEMU) +- Resolves: bz#2081022 + (Build regression on ppc64le with c9s qemu-kvm 7.0.0-1 changes) + * Wed Apr 20 2022 Miroslav Rezanina - 7.0.0-1 - Rebase to QEMU 7.0.0 [bz#2064757] - Do not build ssh block driver anymore [bz#2064500] From 0b5c35c425aba6d529aeeb52c8bf42900c23b746 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 12 May 2022 03:31:32 -0400 Subject: [PATCH 158/195] * Thu May 12 2022 Miroslav Rezanina - 7.0.0-3 - kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch [bz#2046029] - kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch [bz#2046029] - kvm-Enable-virtio-iommu-pci-on-aarch64.patch [bz#1477099] - kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch [bz#2037612] - kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch [bz#2037612] - Resolves: bz#2046029 ([WRB] New machine type property - dtb-kaslr-seed) - Resolves: bz#1477099 (virtio-iommu (including ACPI, VHOST/VFIO integration, migration support)) - Resolves: bz#2037612 ([Win11][tpm][QL41112 PF] vfio_listener_region_add received unaligned region) --- kvm-Enable-virtio-iommu-pci-on-aarch64.patch | 41 +++++++ ...missing-initialization-in-instance-c.patch | 56 +++++++++ ...ve-the-dtb-kaslr-seed-machine-option.patch | 76 ++++++++++++ ...m-Add-a-stub-function-for-TPM_IS_CRB.patch | 54 +++++++++ ...ve-spurious-tpm-crb-cmd-misalignment.patch | 114 ++++++++++++++++++ qemu-kvm.spec | 25 +++- 6 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 kvm-Enable-virtio-iommu-pci-on-aarch64.patch create mode 100644 kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch create mode 100644 kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch create mode 100644 kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch create mode 100644 kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch diff --git a/kvm-Enable-virtio-iommu-pci-on-aarch64.patch b/kvm-Enable-virtio-iommu-pci-on-aarch64.patch new file mode 100644 index 0000000..3aafd3c --- /dev/null +++ b/kvm-Enable-virtio-iommu-pci-on-aarch64.patch @@ -0,0 +1,41 @@ +From 85781b8745fa1581a66f64011d61a4f0c4e103dc Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 May 2022 17:03:11 +0200 +Subject: [PATCH 3/5] Enable virtio-iommu-pci on aarch64 + +RH-Author: Eric Auger +RH-MergeRequest: 83: Enable virtio-iommu-pci on aarch64 +RH-Commit: [1/1] 23e5c0832e52c66adf5fd6daccdc3edddc7ecb8b (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1477099 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477099 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45128798 +Upstream Status: RHEL-only +Tested: With virtio-net-pci and virtio-block-pci + +let's enable the virtio-iommu-pci device on aarch64 by +turning CONFIG_VIRTIO_IOMMU on. + +Signed-off-by: Eric Auger +--- + configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +index 187938573f..1618d31b89 100644 +--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -23,6 +23,7 @@ CONFIG_VFIO_PCI=y + CONFIG_VIRTIO_MMIO=y + CONFIG_VIRTIO_PCI=y + CONFIG_VIRTIO_MEM=y ++CONFIG_VIRTIO_IOMMU=y + CONFIG_XIO3130=y + CONFIG_NVDIMM=y + CONFIG_ACPI_APEI=y +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch b/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch new file mode 100644 index 0000000..78b9ee0 --- /dev/null +++ b/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch @@ -0,0 +1,56 @@ +From e25c40735d2f022c07481b548d20476222006657 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 4 May 2022 11:11:54 +0200 +Subject: [PATCH 2/5] hw/arm/virt: Fix missing initialization in + instance/class_init() + +RH-Author: Eric Auger +RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option +RH-Commit: [2/2] 22cbbfc30cf57a09b8acfb25d8a4dff2754c630c (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2046029 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 +Upstream Status: RHEL-only +Tested: Boot RHEL guest and check migration from 8.6 to 9.1 + (with custom additions) + +During the 7.0 rebase, the initialization of highmem_mmio and +highmem_redists was forgotten in rhel_virt_instance_init(). +Fix it to match virt_instance_init() code. + +Also mc->smp_props.clusters_supported was missing in +rhel_machine_class_init(). + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bde4f77994..8be12e121d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3286,6 +3286,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + hc->unplug_request = virt_machine_device_unplug_request_cb; + hc->unplug = virt_machine_device_unplug_cb; + mc->nvdimm_supported = true; ++ mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; + mc->default_ram_id = "mach-virt.ram"; +@@ -3366,6 +3367,8 @@ static void rhel_virt_instance_init(Object *obj) + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; ++ vms->highmem_mmio = true; ++ vms->highmem_redists = true; + + if (vmc->no_its) { + vms->its = false; +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch b/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch new file mode 100644 index 0000000..10af6c0 --- /dev/null +++ b/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch @@ -0,0 +1,76 @@ +From 69f771c3dc641431f3e98497cbd3832edb69284f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 3 May 2022 08:56:52 +0200 +Subject: [PATCH 1/5] hw/arm/virt: Remove the dtb-kaslr-seed machine option + +RH-Author: Eric Auger +RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option +RH-Commit: [1/2] a89dcd7f22e04ae39de99795d3f34cdd0b831bc0 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2046029 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 +Upstream Status: RHEL-only +Tested: Boot RHEL guest and check the option is not available + +In RHEL we do not want to expose the dtb-kaslr-seed virt machine +option. Indeed the default 'on' value matches our need as +random data in the DTB does not cause any boot failure and we +want to support KASLR for the guest. + +Signed-off-by: Eric Auger + +--- +--- + hw/arm/virt.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e06862d22a..bde4f77994 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2350,6 +2350,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) + vms->its = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_dtb_kaslr_seed(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2363,6 +2364,7 @@ static void virt_set_dtb_kaslr_seed(Object *obj, bool value, Error **errp) + + vms->dtb_kaslr_seed = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_oem_id(Object *obj, Error **errp) + { +@@ -3346,13 +3348,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); +- +- object_class_property_add_bool(oc, "dtb-kaslr-seed", +- virt_get_dtb_kaslr_seed, +- virt_set_dtb_kaslr_seed); +- object_class_property_set_description(oc, "dtb-kaslr-seed", +- "Set off to disable passing of kaslr-seed " +- "dtb node to guest"); + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3397,7 +3392,7 @@ static void rhel_virt_instance_init(Object *obj) + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + +- /* Supply a kaslr-seed by default */ ++ /* Supply a kaslr-seed by default and non-configurable for RHEL */ + vms->dtb_kaslr_seed = true; + + vms->irqmap = a15irqmap; +-- +2.31.1 + diff --git a/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch b/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch new file mode 100644 index 0000000..8fd2e16 --- /dev/null +++ b/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch @@ -0,0 +1,54 @@ +From 74b3e92dcb9e343e135a681259514b4fd28086ea Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 May 2022 15:25:09 +0200 +Subject: [PATCH 4/5] sysemu: tpm: Add a stub function for TPM_IS_CRB + +RH-Author: Eric Auger +RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning +RH-Commit: [1/2] 0ab55ca1aa12a3a7cbdef5a378928f75e030e536 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2037612 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 +Upstream Status: YES +Tested: With TPM-CRB and VFIO + +In a subsequent patch, VFIO will need to recognize if +a memory region owner is a TPM CRB device. Hence VFIO +needs to use TPM_IS_CRB() even if CONFIG_TPM is unset. So +let's add a stub function. + +Signed-off-by: Eric Auger +Suggested-by: Cornelia Huck +Reviewed-by: Stefan Berger +Link: https://lore.kernel.org/r/20220506132510.1847942-2-eric.auger@redhat.com +Signed-off-by: Alex Williamson +(cherry picked from commit 4168cdad398843ed53d650a27651868b4d3e21c9) +Signed-off-by: Eric Auger +--- + include/sysemu/tpm.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h +index 68b2206463..fb40e30ff6 100644 +--- a/include/sysemu/tpm.h ++++ b/include/sysemu/tpm.h +@@ -80,6 +80,12 @@ static inline TPMVersion tpm_get_version(TPMIf *ti) + #define tpm_init() (0) + #define tpm_cleanup() + ++/* needed for an alignment check in non-tpm code */ ++static inline Object *TPM_IS_CRB(Object *obj) ++{ ++ return NULL; ++} ++ + #endif /* CONFIG_TPM */ + + #endif /* QEMU_TPM_H */ +-- +2.31.1 + diff --git a/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch b/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch new file mode 100644 index 0000000..26083c1 --- /dev/null +++ b/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch @@ -0,0 +1,114 @@ +From b90a5878355bd549200ed1eff52ea084325bfc8a Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 May 2022 15:25:10 +0200 +Subject: [PATCH 5/5] vfio/common: remove spurious tpm-crb-cmd misalignment + warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning +RH-Commit: [2/2] 9b73a9aec59cb50d5e3468cc553464bf4a73d0a1 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2037612 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 +Upstream Status: YES +Tested: With TPM-CRB and VFIO + +The CRB command buffer currently is a RAM MemoryRegion and given +its base address alignment, it causes an error report on +vfio_listener_region_add(). This region could have been a RAM device +region, easing the detection of such safe situation but this option +was not well received. So let's add a helper function that uses the +memory region owner type to detect the situation is safe wrt +the assignment. Other device types can be checked here if such kind +of problem occurs again. + +Conflicts in hw/vfio/common.c +We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") + +Signed-off-by: Eric Auger +Reviewed-by: Philippe Mathieu-Daudé +Acked-by: Stefan Berger +Reviewed-by: Cornelia Huck +Link: https://lore.kernel.org/r/20220506132510.1847942-3-eric.auger@redhat.com +Signed-off-by: Alex Williamson +(cherry picked from commit 851d6d1a0ff29a87ec588205842edf6b86d99b5c) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 27 ++++++++++++++++++++++++++- + hw/vfio/trace-events | 1 + + 2 files changed, 27 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 080046e3f5..0fbe0d47af 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -40,6 +40,7 @@ + #include "trace.h" + #include "qapi/error.h" + #include "migration/migration.h" ++#include "sysemu/tpm.h" + + VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); +@@ -861,6 +862,22 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container, + g_free(vrdl); + } + ++static bool vfio_known_safe_misalignment(MemoryRegionSection *section) ++{ ++ MemoryRegion *mr = section->mr; ++ ++ if (!TPM_IS_CRB(mr->owner)) { ++ return false; ++ } ++ ++ /* this is a known safe misaligned region, just trace for debug purpose */ ++ trace_vfio_known_safe_misalignment(memory_region_name(mr), ++ section->offset_within_address_space, ++ section->offset_within_region, ++ qemu_real_host_page_size); ++ return true; ++} ++ + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +@@ -884,7 +901,15 @@ static void vfio_listener_region_add(MemoryListener *listener, + if (unlikely((section->offset_within_address_space & + ~qemu_real_host_page_mask) != + (section->offset_within_region & ~qemu_real_host_page_mask))) { +- error_report("%s received unaligned region", __func__); ++ if (!vfio_known_safe_misalignment(section)) { ++ error_report("%s received unaligned region %s iova=0x%"PRIx64 ++ " offset_within_region=0x%"PRIx64 ++ " qemu_real_host_page_size=0x%"PRIxPTR, ++ __func__, memory_region_name(section->mr), ++ section->offset_within_address_space, ++ section->offset_within_region, ++ qemu_real_host_page_size); ++ } + return; + } + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 0ef1b5f4a6..582882db91 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -100,6 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add + vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" + vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" ++vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" + vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" + vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 38d4980..5feb6bf 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 2%{?rcrel}%{?dist}%{?cc_suffix} +Release: 3%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -198,6 +198,16 @@ Patch0021: 0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch Patch22: kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch # For bz#2081022 - Build regression on ppc64le with c9s qemu-kvm 7.0.0-1 changes Patch23: kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch +# For bz#2046029 - [WRB] New machine type property - dtb-kaslr-seed +Patch24: kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch +# For bz#2046029 - [WRB] New machine type property - dtb-kaslr-seed +Patch25: kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch +# For bz#1477099 - virtio-iommu (including ACPI, VHOST/VFIO integration, migration support) +Patch26: kvm-Enable-virtio-iommu-pci-on-aarch64.patch +# For bz#2037612 - [Win11][tpm][QL41112 PF] vfio_listener_region_add received unaligned region +Patch27: kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch +# For bz#2037612 - [Win11][tpm][QL41112 PF] vfio_listener_region_add received unaligned region +Patch28: kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch # Source-git patches @@ -1233,6 +1243,19 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu May 12 2022 Miroslav Rezanina - 7.0.0-3 +- kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch [bz#2046029] +- kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch [bz#2046029] +- kvm-Enable-virtio-iommu-pci-on-aarch64.patch [bz#1477099] +- kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch [bz#2037612] +- kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch [bz#2037612] +- Resolves: bz#2046029 + ([WRB] New machine type property - dtb-kaslr-seed) +- Resolves: bz#1477099 + (virtio-iommu (including ACPI, VHOST/VFIO integration, migration support)) +- Resolves: bz#2037612 + ([Win11][tpm][QL41112 PF] vfio_listener_region_add received unaligned region) + * Fri May 06 2022 Miroslav Rezanina - 7.0.0-2 - kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch [bz#2044162] - kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch [bz#2081022] From 550d33ded2851a9082e917eab994da37e391f276 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 19 May 2022 08:13:20 -0400 Subject: [PATCH 159/195] * Thu May 19 2022 Miroslav Rezanina - 7.0.0-4 - kvm-qapi-machine.json-Add-cluster-id.patch [bz#2041823] - kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch [bz#2041823] - kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch [bz#2041823] - kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch [bz#2041823] - kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch [bz#2041823] - kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch [bz#2041823] - kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch [bz#2079938] - kvm-coroutine-Revert-to-constant-batch-size.patch [bz#2079938] - kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch [bz#2079347] - kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch [bz#2079347] - kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch [bz#2079347] - kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch [bz#2079347] - kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch [bz#2079347] - kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch [bz#2079347] - kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch [bz#1995710] - kvm-migration-Fix-operator-type.patch [bz#2064530] - Resolves: bz#2041823 ([aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken') - Resolves: bz#2079938 (qemu coredump when boot with multi disks (qemu) failed to set up stack guard page: Cannot allocate memory) - Resolves: bz#2079347 (Guest boot blocked when scsi disks using same iothread and 100% CPU consumption) - Resolves: bz#1995710 (RFE: Allow virtio-scsi CD-ROM media change with IOThreads) - Resolves: bz#2064530 (Rebuild qemu-kvm with clang-14) --- ...si-Reject-scsi-cd-if-data-plane-enab.patch | 51 +++++ ...ame-qemu_coroutine_inc-dec_pool_size.patch | 101 ++++++++++ ...outine-Revert-to-constant-batch-size.patch | 138 ++++++++++++++ ...d-Use-existing-CPU-topology-to-build.patch | 179 ++++++++++++++++++ ...ider-SMP-configuration-in-CPU-topolo.patch | 74 ++++++++ ...-virt-Fix-CPU-s-default-NUMA-node-ID.patch | 88 +++++++++ kvm-migration-Fix-operator-type.patch | 47 +++++ kvm-qapi-machine.json-Add-cluster-id.patch | 126 ++++++++++++ ...Correct-CPU-and-NUMA-association-in-.patch | 100 ++++++++++ ...Specify-CPU-topology-in-aarch64_numa.patch | 68 +++++++ ...i-clean-up-virtio_scsi_handle_cmd_vq.patch | 77 ++++++++ ...-clean-up-virtio_scsi_handle_ctrl_vq.patch | 65 +++++++ ...clean-up-virtio_scsi_handle_event_vq.patch | 62 ++++++ ...t-waste-CPU-polling-the-event-virtqu.patch | 103 ++++++++++ ...ctrl-and-event-handler-functions-in-.patch | 119 ++++++++++++ ...-request-related-items-from-.h-to-.c.patch | 168 ++++++++++++++++ qemu-kvm.spec | 62 +++++- 17 files changed, 1627 insertions(+), 1 deletion(-) create mode 100644 kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch create mode 100644 kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch create mode 100644 kvm-coroutine-Revert-to-constant-batch-size.patch create mode 100644 kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch create mode 100644 kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch create mode 100644 kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch create mode 100644 kvm-migration-Fix-operator-type.patch create mode 100644 kvm-qapi-machine.json-Add-cluster-id.patch create mode 100644 kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch create mode 100644 kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch create mode 100644 kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch create mode 100644 kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch create mode 100644 kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch create mode 100644 kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch create mode 100644 kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch create mode 100644 kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch diff --git a/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch b/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch new file mode 100644 index 0000000..e8eb35d --- /dev/null +++ b/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch @@ -0,0 +1,51 @@ +From 733acef2caea0758edd74fb634b095ce09bf5914 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 9 May 2022 03:46:23 -0400 +Subject: [PATCH 15/16] Revert "virtio-scsi: Reject scsi-cd if data plane + enabled [RHEL only]" + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 91: Revert "virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]" +RH-Commit: [1/1] 1af55d792bc9166e5c86272afe8093c76ab41bb4 (eesposit/qemu-kvm) +RH-Bugzilla: 1995710 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi + +This reverts commit 4e17b1126e. + +Over time AioContext usage and coverage has increased, and now block +backend is capable of handling AioContext change upon eject and insert. +Therefore the above downstream-only commit is not necessary anymore, +and can be safely reverted. + +X-downstream-only: true + +Signed-off-by: Emanuele Giuseppe Esposito +--- + hw/scsi/virtio-scsi.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 2450c9438c..db54d104be 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -937,15 +937,6 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; + int ret; + +- /* XXX: Remove this check once block backend is capable of handling +- * AioContext change upon eject/insert. +- * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if +- * data plane is not used, both cases are safe for scsi-cd. */ +- if (s->ctx && s->ctx != qemu_get_aio_context() && +- object_dynamic_cast(OBJECT(dev), "scsi-cd")) { +- error_setg(errp, "scsi-cd is not supported by data plane"); +- return; +- } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.31.1 + diff --git a/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch b/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch new file mode 100644 index 0000000..c1f3683 --- /dev/null +++ b/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch @@ -0,0 +1,101 @@ +From e3cb8849862a9f0dd20f2913d540336a037d43c7 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 10 May 2022 17:10:19 +0200 +Subject: [PATCH 07/16] coroutine: Rename qemu_coroutine_inc/dec_pool_size() + +RH-Author: Kevin Wolf +RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size +RH-Commit: [1/2] 6389b11f70225f221784c270d9b90c1ea43ca8fb (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 2079938 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +It's true that these functions currently affect the batch size in which +coroutines are reused (i.e. moved from the global release pool to the +allocation pool of a specific thread), but this is a bug and will be +fixed in a separate patch. + +In fact, the comment in the header file already just promises that it +influences the pool size, so reflect this in the name of the functions. +As a nice side effect, the shorter function name makes some line +wrapping unnecessary. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20220510151020.105528-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 98e3ab35054b946f7c2aba5408822532b0920b53) +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 6 ++---- + include/qemu/coroutine.h | 6 +++--- + util/qemu-coroutine.c | 4 ++-- + 3 files changed, 7 insertions(+), 9 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 540c38f829..6a1cc41877 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + for (i = 0; i < conf->num_queues; i++) { + virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); + } +- qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size +- / 2); ++ qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); + virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); + if (err != NULL) { + error_propagate(errp, err); +@@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + for (i = 0; i < conf->num_queues; i++) { + virtio_del_queue(vdev, i); + } +- qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size +- / 2); ++ qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); + qemu_del_vm_change_state_handler(s->change); + blockdev_mark_auto_del(s->blk); + virtio_cleanup(vdev); +diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h +index c828a95ee0..5b621d1295 100644 +--- a/include/qemu/coroutine.h ++++ b/include/qemu/coroutine.h +@@ -334,12 +334,12 @@ void coroutine_fn yield_until_fd_readable(int fd); + /** + * Increase coroutine pool size + */ +-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size); ++void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size); + + /** +- * Devcrease coroutine pool size ++ * Decrease coroutine pool size + */ +-void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size); ++void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size); + + #include "qemu/lockable.h" + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index c03b2422ff..faca0ca97c 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -205,12 +205,12 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) + return co->ctx; + } + +-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size) ++void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { + qatomic_add(&pool_batch_size, additional_pool_size); + } + +-void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size) ++void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { + qatomic_sub(&pool_batch_size, removing_pool_size); + } +-- +2.31.1 + diff --git a/kvm-coroutine-Revert-to-constant-batch-size.patch b/kvm-coroutine-Revert-to-constant-batch-size.patch new file mode 100644 index 0000000..2973510 --- /dev/null +++ b/kvm-coroutine-Revert-to-constant-batch-size.patch @@ -0,0 +1,138 @@ +From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 10 May 2022 17:10:20 +0200 +Subject: [PATCH 08/16] coroutine: Revert to constant batch size + +RH-Author: Kevin Wolf +RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size +RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 2079938 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +Commit 4c41c69e changed the way the coroutine pool is sized because for +virtio-blk devices with a large queue size and heavy I/O, it was just +too small and caused coroutines to be deleted and reallocated soon +afterwards. The change made the size dynamic based on the number of +queues and the queue size of virtio-blk devices. + +There are two important numbers here: Slightly simplified, when a +coroutine terminates, it is generally stored in the global release pool +up to a certain pool size, and if the pool is full, it is freed. +Conversely, when allocating a new coroutine, the coroutines in the +release pool are reused if the pool already has reached a certain +minimum size (the batch size), otherwise we allocate new coroutines. + +The problem after commit 4c41c69e is that it not only increases the +maximum pool size (which is the intended effect), but also the batch +size for reusing coroutines (which is a bug). It means that in cases +with many devices and/or a large queue size (which defaults to the +number of vcpus for virtio-blk-pci), many thousand coroutines could be +sitting in the release pool without being reused. + +This is not only a waste of memory and allocations, but it actually +makes the QEMU process likely to hit the vm.max_map_count limit on Linux +because each coroutine requires two mappings (its stack and the guard +page for the stack), causing it to abort() in qemu_alloc_stack() because +when the limit is hit, mprotect() starts to fail with ENOMEM. + +In order to fix the problem, change the batch size back to 64 to avoid +uselessly accumulating coroutines in the release pool, but keep the +dynamic maximum pool size so that coroutines aren't freed too early +in heavy I/O scenarios. + +Note that this fix doesn't strictly make it impossible to hit the limit, +but this would only happen if most of the coroutines are actually in use +at the same time, not just sitting in a pool. This is the same behaviour +as we already had before commit 4c41c69e. Fully preventing this would +require allowing qemu_coroutine_create() to return an error, but it +doesn't seem to be a scenario that people hit in practice. + +Cc: qemu-stable@nongnu.org +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938 +Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b +Signed-off-by: Kevin Wolf +Message-Id: <20220510151020.105528-3-kwolf@redhat.com> +Tested-by: Hiroki Narukawa +Signed-off-by: Kevin Wolf +(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52) + +Conflicts: + util/qemu-coroutine.c + +Trivial merge conflict because we don't have commit ac387a08 downstream. + +Signed-off-by: Kevin Wolf +--- + util/qemu-coroutine.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index faca0ca97c..804f672e0a 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -20,14 +20,20 @@ + #include "qemu/coroutine_int.h" + #include "block/aio.h" + +-/** Initial batch size is 64, and is increased on demand */ ++/** ++ * The minimal batch size is always 64, coroutines from the release_pool are ++ * reused as soon as there are 64 coroutines in it. The maximum pool size starts ++ * with 64 and is increased on demand so that coroutines are not deleted even if ++ * they are not immediately reused. ++ */ + enum { +- POOL_INITIAL_BATCH_SIZE = 64, ++ POOL_MIN_BATCH_SIZE = 64, ++ POOL_INITIAL_MAX_SIZE = 64, + }; + + /** Free list to speed up creation */ + static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE; ++static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; + static unsigned int release_pool_size; + static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); + static __thread unsigned int alloc_pool_size; +@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + if (CONFIG_COROUTINE_POOL) { + co = QSLIST_FIRST(&alloc_pool); + if (!co) { +- if (release_pool_size > qatomic_read(&pool_batch_size)) { ++ if (release_pool_size > POOL_MIN_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ + if (!coroutine_pool_cleanup_notifier.notify) { + coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; +@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co) + co->caller = NULL; + + if (CONFIG_COROUTINE_POOL) { +- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) { ++ if (release_pool_size < qatomic_read(&pool_max_size) * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); + qatomic_inc(&release_pool_size); + return; + } +- if (alloc_pool_size < qatomic_read(&pool_batch_size)) { ++ if (alloc_pool_size < qatomic_read(&pool_max_size)) { + QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); + alloc_pool_size++; + return; +@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) + + void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { +- qatomic_add(&pool_batch_size, additional_pool_size); ++ qatomic_add(&pool_max_size, additional_pool_size); + } + + void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { +- qatomic_sub(&pool_batch_size, removing_pool_size); ++ qatomic_sub(&pool_max_size, removing_pool_size); + } +-- +2.31.1 + diff --git a/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch b/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch new file mode 100644 index 0000000..2795dcd --- /dev/null +++ b/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch @@ -0,0 +1,179 @@ +From 8a12049e97149056f61f7748d9869606d282d16e Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 06/16] hw/acpi/aml-build: Use existing CPU topology to build + PPTT table + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [6/6] 53fa376531c204cf706cc1a7a0499019756106cb (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +When the PPTT table is built, the CPU topology is re-calculated, but +it's unecessary because the CPU topology has been populated in +virt_possible_cpu_arch_ids() on arm/virt machine. + +This reworks build_pptt() to avoid by reusing the existing IDs in +ms->possible_cpus. Currently, the only user of build_pptt() is +arm/virt machine. + +Signed-off-by: Gavin Shan +Tested-by: Yanan Wang +Reviewed-by: Yanan Wang +Acked-by: Igor Mammedov +Acked-by: Michael S. Tsirkin +Message-id: 20220503140304.855514-7-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit ae9141d4a3265553503bf07d3574b40f84615a34) +Signed-off-by: Gavin Shan +--- + hw/acpi/aml-build.c | 111 +++++++++++++++++++------------------------- + 1 file changed, 48 insertions(+), 63 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 4086879ebf..e6bfac95c7 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -2002,86 +2002,71 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +- GQueue *list = g_queue_new(); +- guint pptt_start = table_data->len; +- guint parent_offset; +- guint length, i; +- int uid = 0; +- int socket; ++ CPUArchIdList *cpus = ms->possible_cpus; ++ int64_t socket_id = -1, cluster_id = -1, core_id = -1; ++ uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0; ++ uint32_t pptt_start = table_data->len; ++ int n; + AcpiTable table = { .sig = "PPTT", .rev = 2, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + + acpi_table_begin(&table, table_data); + +- for (socket = 0; socket < ms->smp.sockets; socket++) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- /* +- * Physical package - represents the boundary +- * of a physical package +- */ +- (1 << 0), +- 0, socket, NULL, 0); +- } +- +- if (mc->smp_props.clusters_supported) { +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int cluster; +- +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (cluster = 0; cluster < ms->smp.clusters; cluster++) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- (0 << 0), /* not a physical package */ +- parent_offset, cluster, NULL, 0); +- } ++ /* ++ * This works with the assumption that cpus[n].props.*_id has been ++ * sorted from top to down levels in mc->possible_cpu_arch_ids(). ++ * Otherwise, the unexpected and duplicated containers will be ++ * created. ++ */ ++ for (n = 0; n < cpus->len; n++) { ++ if (cpus->cpus[n].props.socket_id != socket_id) { ++ assert(cpus->cpus[n].props.socket_id > socket_id); ++ socket_id = cpus->cpus[n].props.socket_id; ++ cluster_id = -1; ++ core_id = -1; ++ socket_offset = table_data->len - pptt_start; ++ build_processor_hierarchy_node(table_data, ++ (1 << 0), /* Physical package */ ++ 0, socket_id, NULL, 0); + } +- } + +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int core; +- +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (core = 0; core < ms->smp.cores; core++) { +- if (ms->smp.threads > 1) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- (0 << 0), /* not a physical package */ +- parent_offset, core, NULL, 0); +- } else { +- build_processor_hierarchy_node( +- table_data, +- (1 << 1) | /* ACPI Processor ID valid */ +- (1 << 3), /* Node is a Leaf */ +- parent_offset, uid++, NULL, 0); ++ if (mc->smp_props.clusters_supported) { ++ if (cpus->cpus[n].props.cluster_id != cluster_id) { ++ assert(cpus->cpus[n].props.cluster_id > cluster_id); ++ cluster_id = cpus->cpus[n].props.cluster_id; ++ core_id = -1; ++ cluster_offset = table_data->len - pptt_start; ++ build_processor_hierarchy_node(table_data, ++ (0 << 0), /* Not a physical package */ ++ socket_offset, cluster_id, NULL, 0); + } ++ } else { ++ cluster_offset = socket_offset; + } +- } + +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int thread; ++ if (ms->smp.threads == 1) { ++ build_processor_hierarchy_node(table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 3), /* Node is a Leaf */ ++ cluster_offset, n, NULL, 0); ++ } else { ++ if (cpus->cpus[n].props.core_id != core_id) { ++ assert(cpus->cpus[n].props.core_id > core_id); ++ core_id = cpus->cpus[n].props.core_id; ++ core_offset = table_data->len - pptt_start; ++ build_processor_hierarchy_node(table_data, ++ (0 << 0), /* Not a physical package */ ++ cluster_offset, core_id, NULL, 0); ++ } + +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (thread = 0; thread < ms->smp.threads; thread++) { +- build_processor_hierarchy_node( +- table_data, ++ build_processor_hierarchy_node(table_data, + (1 << 1) | /* ACPI Processor ID valid */ + (1 << 2) | /* Processor is a Thread */ + (1 << 3), /* Node is a Leaf */ +- parent_offset, uid++, NULL, 0); ++ core_offset, n, NULL, 0); + } + } + +- g_queue_free(list); + acpi_table_end(linker, &table); + } + +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch b/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch new file mode 100644 index 0000000..240aead --- /dev/null +++ b/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch @@ -0,0 +1,74 @@ +From 3b05d3464945295112b5d02d142422f524a52054 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 03/16] hw/arm/virt: Consider SMP configuration in CPU topology + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [3/6] 7125b41f038c2b1cb33377d0ef1222f1ea42b648 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +Currently, the SMP configuration isn't considered when the CPU +topology is populated. In this case, it's impossible to provide +the default CPU-to-NUMA mapping or association based on the socket +ID of the given CPU. + +This takes account of SMP configuration when the CPU topology +is populated. The die ID for the given CPU isn't assigned since +it's not supported on arm/virt machine. Besides, the used SMP +configuration in qtest/numa-test/aarch64_numa_cpu() is corrcted +to avoid testing failure + +Signed-off-by: Gavin Shan +Reviewed-by: Yanan Wang +Acked-by: Igor Mammedov +Message-id: 20220503140304.855514-4-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit c9ec4cb5e4936f980889e717524e73896b0200ed) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8be12e121d..a87c8d396a 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2553,6 +2553,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + int n; + unsigned int max_cpus = ms->smp.max_cpus; + VirtMachineState *vms = VIRT_MACHINE(ms); ++ MachineClass *mc = MACHINE_GET_CLASS(vms); + + if (ms->possible_cpus) { + assert(ms->possible_cpus->len == max_cpus); +@@ -2566,8 +2567,20 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + ms->possible_cpus->cpus[n].type = ms->cpu_type; + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); ++ ++ assert(!mc->smp_props.dies_supported); ++ ms->possible_cpus->cpus[n].props.has_socket_id = true; ++ ms->possible_cpus->cpus[n].props.socket_id = ++ n / (ms->smp.clusters * ms->smp.cores * ms->smp.threads); ++ ms->possible_cpus->cpus[n].props.has_cluster_id = true; ++ ms->possible_cpus->cpus[n].props.cluster_id = ++ (n / (ms->smp.cores * ms->smp.threads)) % ms->smp.clusters; ++ ms->possible_cpus->cpus[n].props.has_core_id = true; ++ ms->possible_cpus->cpus[n].props.core_id = ++ (n / ms->smp.threads) % ms->smp.cores; + ms->possible_cpus->cpus[n].props.has_thread_id = true; +- ms->possible_cpus->cpus[n].props.thread_id = n; ++ ms->possible_cpus->cpus[n].props.thread_id = ++ n % ms->smp.threads; + } + return ms->possible_cpus; + } +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch b/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch new file mode 100644 index 0000000..6b60b70 --- /dev/null +++ b/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch @@ -0,0 +1,88 @@ +From 14e49ad3b98f01c1ad6fe456469d40a96a43dc3c Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 05/16] hw/arm/virt: Fix CPU's default NUMA node ID + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [5/6] 5336f62bc0c53c0417db1d71ef89544907bc28c0 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +When CPU-to-NUMA association isn't explicitly provided by users, +the default one is given by mc->get_default_cpu_node_id(). However, +the CPU topology isn't fully considered in the default association +and this causes CPU topology broken warnings on booting Linux guest. + +For example, the following warning messages are observed when the +Linux guest is booted with the following command lines. + +/home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ +-accel kvm -machine virt,gic-version=host \ +-cpu host \ +-smp 6,sockets=2,cores=3,threads=1 \ +-m 1024M,slots=16,maxmem=64G \ +-object memory-backend-ram,id=mem0,size=128M \ +-object memory-backend-ram,id=mem1,size=128M \ +-object memory-backend-ram,id=mem2,size=128M \ +-object memory-backend-ram,id=mem3,size=128M \ +-object memory-backend-ram,id=mem4,size=128M \ +-object memory-backend-ram,id=mem4,size=384M \ +-numa node,nodeid=0,memdev=mem0 \ +-numa node,nodeid=1,memdev=mem1 \ +-numa node,nodeid=2,memdev=mem2 \ +-numa node,nodeid=3,memdev=mem3 \ +-numa node,nodeid=4,memdev=mem4 \ +-numa node,nodeid=5,memdev=mem5 +: +alternatives: patching kernel code +BUG: arch topology borken +the CLS domain not a subset of the MC domain + +BUG: arch topology borken +the DIE domain not a subset of the NODE domain + +With current implementation of mc->get_default_cpu_node_id(), +CPU#0 to CPU#5 are associated with NODE#0 to NODE#5 separately. +That's incorrect because CPU#0/1/2 should be associated with same +NUMA node because they're seated in same socket. + +This fixes the issue by considering the socket ID when the default +CPU-to-NUMA association is provided in virt_possible_cpu_arch_ids(). +With this applied, no more CPU topology broken warnings are seen +from the Linux guest. The 6 CPUs are associated with NODE#0/1, but +there are no CPUs associated with NODE#2/3/4/5. + +Signed-off-by: Gavin Shan +Reviewed-by: Igor Mammedov +Reviewed-by: Yanan Wang +Message-id: 20220503140304.855514-6-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 4c18bc192386dfbca530e7f550e0992df657818a) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index a87c8d396a..95d012d6eb 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2545,7 +2545,9 @@ virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + + static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) + { +- return idx % ms->numa_state->num_nodes; ++ int64_t socket_id = ms->possible_cpus->cpus[idx].props.socket_id; ++ ++ return socket_id % ms->numa_state->num_nodes; + } + + static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +-- +2.31.1 + diff --git a/kvm-migration-Fix-operator-type.patch b/kvm-migration-Fix-operator-type.patch new file mode 100644 index 0000000..f6a462a --- /dev/null +++ b/kvm-migration-Fix-operator-type.patch @@ -0,0 +1,47 @@ +From 4bd48e784ae0c38c89f1a944b06c997fd28c4d37 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 19 May 2022 04:15:33 -0400 +Subject: [PATCH 16/16] migration: Fix operator type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 92: Fix build using clang 14 +RH-Commit: [1/1] ad9980e64cf2e39085d68f1ff601444bf2afe228 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 2064530 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Clang spotted an & that should have been an &&; fix it. + +Reported by: David Binderman / https://gitlab.com/dcb +Fixes: 65dacaa04fa ("migration: introduce save_normal_page()") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/963 +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20220406102515.96320-1-dgilbert@redhat.com> +Reviewed-by: Peter Maydell +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f912ec5b2d65644116ff496b58d7c9145c19e4c0) +Signed-off-by: Miroslav Rezanina +--- + migration/ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 3532f64ecb..0ef4bd63eb 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1289,7 +1289,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + offset | RAM_SAVE_FLAG_PAGE)); + if (async) { + qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, +- migrate_release_ram() & ++ migrate_release_ram() && + migration_in_postcopy()); + } else { + qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); +-- +2.31.1 + diff --git a/kvm-qapi-machine.json-Add-cluster-id.patch b/kvm-qapi-machine.json-Add-cluster-id.patch new file mode 100644 index 0000000..2b2a22a --- /dev/null +++ b/kvm-qapi-machine.json-Add-cluster-id.patch @@ -0,0 +1,126 @@ +From e97c563f7146098119839aa146a6f25070eb7148 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:02 +0800 +Subject: [PATCH 01/16] qapi/machine.json: Add cluster-id + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [1/6] 44d7d83008c6d28485ae44f7cced792f4987b919 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +This adds cluster-id in CPU instance properties, which will be used +by arm/virt machine. Besides, the cluster-id is also verified or +dumped in various spots: + + * hw/core/machine.c::machine_set_cpu_numa_node() to associate + CPU with its NUMA node. + + * hw/core/machine.c::machine_numa_finish_cpu_init() to record + CPU slots with no NUMA mapping set. + + * hw/core/machine-hmp-cmds.c::hmp_hotpluggable_cpus() to dump + cluster-id. + +Signed-off-by: Gavin Shan +Reviewed-by: Yanan Wang +Acked-by: Igor Mammedov +Message-id: 20220503140304.855514-2-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 1dcf7001d4bae651129d46d5628b29e93a411d0b) +Signed-off-by: Gavin Shan +--- + hw/core/machine-hmp-cmds.c | 4 ++++ + hw/core/machine.c | 16 ++++++++++++++++ + qapi/machine.json | 6 ++++-- + 3 files changed, 24 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c +index 4e2f319aeb..5cb5eecbfc 100644 +--- a/hw/core/machine-hmp-cmds.c ++++ b/hw/core/machine-hmp-cmds.c +@@ -77,6 +77,10 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) + if (c->has_die_id) { + monitor_printf(mon, " die-id: \"%" PRIu64 "\"\n", c->die_id); + } ++ if (c->has_cluster_id) { ++ monitor_printf(mon, " cluster-id: \"%" PRIu64 "\"\n", ++ c->cluster_id); ++ } + if (c->has_core_id) { + monitor_printf(mon, " core-id: \"%" PRIu64 "\"\n", c->core_id); + } +diff --git a/hw/core/machine.c b/hw/core/machine.c +index dffc3ef4ab..168f4de910 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -890,6 +890,11 @@ void machine_set_cpu_numa_node(MachineState *machine, + return; + } + ++ if (props->has_cluster_id && !slot->props.has_cluster_id) { ++ error_setg(errp, "cluster-id is not supported"); ++ return; ++ } ++ + if (props->has_socket_id && !slot->props.has_socket_id) { + error_setg(errp, "socket-id is not supported"); + return; +@@ -909,6 +914,11 @@ void machine_set_cpu_numa_node(MachineState *machine, + continue; + } + ++ if (props->has_cluster_id && ++ props->cluster_id != slot->props.cluster_id) { ++ continue; ++ } ++ + if (props->has_die_id && props->die_id != slot->props.die_id) { + continue; + } +@@ -1203,6 +1213,12 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) + } + g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id); + } ++ if (cpu->props.has_cluster_id) { ++ if (s->len) { ++ g_string_append_printf(s, ", "); ++ } ++ g_string_append_printf(s, "cluster-id: %"PRId64, cpu->props.cluster_id); ++ } + if (cpu->props.has_core_id) { + if (s->len) { + g_string_append_printf(s, ", "); +diff --git a/qapi/machine.json b/qapi/machine.json +index d25a481ce4..4c417e32a5 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -868,10 +868,11 @@ + # @node-id: NUMA node ID the CPU belongs to + # @socket-id: socket number within node/board the CPU belongs to + # @die-id: die number within socket the CPU belongs to (since 4.1) +-# @core-id: core number within die the CPU belongs to ++# @cluster-id: cluster number within die the CPU belongs to (since 7.1) ++# @core-id: core number within cluster the CPU belongs to + # @thread-id: thread number within core the CPU belongs to + # +-# Note: currently there are 5 properties that could be present ++# Note: currently there are 6 properties that could be present + # but management should be prepared to pass through other + # properties with device_add command to allow for future + # interface extension. This also requires the filed names to be kept in +@@ -883,6 +884,7 @@ + 'data': { '*node-id': 'int', + '*socket-id': 'int', + '*die-id': 'int', ++ '*cluster-id': 'int', + '*core-id': 'int', + '*thread-id': 'int' + } +-- +2.31.1 + diff --git a/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch b/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch new file mode 100644 index 0000000..9c2ac99 --- /dev/null +++ b/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch @@ -0,0 +1,100 @@ +From a039ed652e6d2f5edcef9d5d1d3baec17ce7f929 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 04/16] qtest/numa-test: Correct CPU and NUMA association in + aarch64_numa_cpu() + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [4/6] 64e9908a179eb4fb586d662f70f275a81808e50c (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +In aarch64_numa_cpu(), the CPU and NUMA association is something +like below. Two threads in the same core/cluster/socket are +associated with two individual NUMA nodes, which is unreal as +Igor Mammedov mentioned. We don't expect the association to break +NUMA-to-socket boundary, which matches with the real world. + +NUMA-node socket cluster core thread +------------------------------------------ +0 0 0 0 0 +1 0 0 0 1 + +This corrects the topology for CPUs and their association with +NUMA nodes. After this patch is applied, the CPU and NUMA +association becomes something like below, which looks real. +Besides, socket/cluster/core/thread IDs are all checked when +the NUMA node IDs are verified. It helps to check if the CPU +topology is properly populated or not. + +NUMA-node socket cluster core thread +------------------------------------------ +0 1 0 0 0 +1 0 0 0 0 + +Suggested-by: Igor Mammedov +Signed-off-by: Gavin Shan +Acked-by: Igor Mammedov +Message-id: 20220503140304.855514-5-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit e280ecb39bc1629f74ea5479d464fd1608dc8f76) +Signed-off-by: Gavin Shan +--- + tests/qtest/numa-test.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c +index aeda8c774c..32e35daaae 100644 +--- a/tests/qtest/numa-test.c ++++ b/tests/qtest/numa-test.c +@@ -224,17 +224,17 @@ static void aarch64_numa_cpu(const void *data) + g_autofree char *cli = NULL; + + cli = make_cli(data, "-machine " +- "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " ++ "smp.cpus=2,smp.sockets=2,smp.clusters=1,smp.cores=1,smp.threads=1 " + "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " +- "-numa cpu,node-id=1,thread-id=0 " +- "-numa cpu,node-id=0,thread-id=1"); ++ "-numa cpu,node-id=0,socket-id=1,cluster-id=0,core-id=0,thread-id=0 " ++ "-numa cpu,node-id=1,socket-id=0,cluster-id=0,core-id=0,thread-id=0"); + qts = qtest_init(cli); + cpus = get_cpus(qts, &resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; +- int64_t thread, node; ++ int64_t socket, cluster, core, thread, node; + + cpu = qobject_to(QDict, e); + g_assert(qdict_haskey(cpu, "props")); +@@ -242,12 +242,18 @@ static void aarch64_numa_cpu(const void *data) + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); ++ g_assert(qdict_haskey(props, "socket-id")); ++ socket = qdict_get_int(props, "socket-id"); ++ g_assert(qdict_haskey(props, "cluster-id")); ++ cluster = qdict_get_int(props, "cluster-id"); ++ g_assert(qdict_haskey(props, "core-id")); ++ core = qdict_get_int(props, "core-id"); + g_assert(qdict_haskey(props, "thread-id")); + thread = qdict_get_int(props, "thread-id"); + +- if (thread == 0) { ++ if (socket == 0 && cluster == 0 && core == 0 && thread == 0) { + g_assert_cmpint(node, ==, 1); +- } else if (thread == 1) { ++ } else if (socket == 1 && cluster == 0 && core == 0 && thread == 0) { + g_assert_cmpint(node, ==, 0); + } else { + g_assert(false); +-- +2.31.1 + diff --git a/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch b/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch new file mode 100644 index 0000000..a87abc0 --- /dev/null +++ b/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch @@ -0,0 +1,68 @@ +From 66f3928b40991d8467a3da086688f73d061886c8 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 02/16] qtest/numa-test: Specify CPU topology in + aarch64_numa_cpu() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [2/6] b851e7ad59e057825392ddf75e9040cc102a0385 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +The CPU topology isn't enabled on arm/virt machine yet, but we're +going to do it in next patch. After the CPU topology is enabled by +next patch, "thread-id=1" becomes invalid because the CPU core is +preferred on arm/virt machine. It means these two CPUs have 0/1 +as their core IDs, but their thread IDs are all 0. It will trigger +test failure as the following message indicates: + +[14/21 qemu:qtest+qtest-aarch64 / qtest-aarch64/numa-test ERROR +1.48s killed by signal 6 SIGABRT +>>> G_TEST_DBUS_DAEMON=/home/gavin/sandbox/qemu.main/tests/dbus-vmstate-daemon.sh \ +QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon \ +QTEST_QEMU_BINARY=./qemu-system-aarch64 \ +QTEST_QEMU_IMG=./qemu-img MALLOC_PERTURB_=83 \ +/home/gavin/sandbox/qemu.main/build/tests/qtest/numa-test --tap -k +―――――――――――――――――――――――――――――――――――――――――――――― +stderr: +qemu-system-aarch64: -numa cpu,node-id=0,thread-id=1: no match found + +This fixes the issue by providing comprehensive SMP configurations +in aarch64_numa_cpu(). The SMP configurations aren't used before +the CPU topology is enabled in next patch. + +Signed-off-by: Gavin Shan +Reviewed-by: Yanan Wang +Message-id: 20220503140304.855514-3-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit ac7199a2523ce2ccf8e685087a5d177eeca89b09) +Signed-off-by: Gavin Shan +--- + tests/qtest/numa-test.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c +index 90bf68a5b3..aeda8c774c 100644 +--- a/tests/qtest/numa-test.c ++++ b/tests/qtest/numa-test.c +@@ -223,7 +223,8 @@ static void aarch64_numa_cpu(const void *data) + QTestState *qts; + g_autofree char *cli = NULL; + +- cli = make_cli(data, "-machine smp.cpus=2 " ++ cli = make_cli(data, "-machine " ++ "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " + "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " + "-numa cpu,node-id=1,thread-id=0 " + "-numa cpu,node-id=0,thread-id=1"); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch new file mode 100644 index 0000000..897e04c --- /dev/null +++ b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch @@ -0,0 +1,77 @@ +From 975af1b9f1811e113e1babd928ae70f8e4ebefb5 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:19 +0100 +Subject: [PATCH 13/16] virtio-scsi: clean up virtio_scsi_handle_cmd_vq() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [5/6] 27b0225783fa9bbb8fe5ee692bd3f0a888d49d07 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +virtio_scsi_handle_cmd_vq() is only called from hw/scsi/virtio-scsi.c +now and its return value is no longer used. Remove the function +prototype from virtio-scsi.h and drop the return value. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-6-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit ad482b57ef841b2d4883c5079d20ba44ff5e4b3e) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 5 +---- + include/hw/virtio/virtio-scsi.h | 1 - + 2 files changed, 1 insertion(+), 5 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index a47033d91d..df5ff8bab7 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -685,12 +685,11 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req) + scsi_req_unref(sreq); + } + +-bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) ++static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req, *next; + int ret = 0; + bool suppress_notifications = virtio_queue_get_notification(vq); +- bool progress = false; + + QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); + +@@ -700,7 +699,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + } + + while ((req = virtio_scsi_pop_req(s, vq))) { +- progress = true; + ret = virtio_scsi_handle_cmd_req_prepare(s, req); + if (!ret) { + QTAILQ_INSERT_TAIL(&reqs, req, next); +@@ -725,7 +723,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { + virtio_scsi_handle_cmd_req_submit(s, req); + } +- return progress; + } + + static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 44dc3b81ec..2497530064 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + Error **errp); + + void virtio_scsi_common_unrealize(DeviceState *dev); +-bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); + void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); + void virtio_scsi_free_req(VirtIOSCSIReq *req); + void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +-- +2.31.1 + diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch new file mode 100644 index 0000000..30f012f --- /dev/null +++ b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch @@ -0,0 +1,65 @@ +From c6e16a7a5a18ec2bc4f8a6f5cc1c887e18b16cdf Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:12 +0100 +Subject: [PATCH 12/16] virtio-scsi: clean up virtio_scsi_handle_ctrl_vq() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [4/6] ca3751b7bfad5163c5b1c81b8525936a848d42ea (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +virtio_scsi_handle_ctrl_vq() is only called from hw/scsi/virtio-scsi.c +now and its return value is no longer used. Remove the function +prototype from virtio-scsi.h and drop the return value. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-5-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 73b3b49f1880f236b4d0ffd7efb00280c05a5fab) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 5 +---- + include/hw/virtio/virtio-scsi.h | 1 - + 2 files changed, 1 insertion(+), 5 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index dd2185b943..a47033d91d 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -460,16 +460,13 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req) + } + } + +-bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) ++static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req; +- bool progress = false; + + while ((req = virtio_scsi_pop_req(s, vq))) { +- progress = true; + virtio_scsi_handle_ctrl_req(s, req); + } +- return progress; + } + + /* +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 5957597825..44dc3b81ec 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -152,7 +152,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + + void virtio_scsi_common_unrealize(DeviceState *dev); + bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); +-bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); + void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); + void virtio_scsi_free_req(VirtIOSCSIReq *req); + void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +-- +2.31.1 + diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch new file mode 100644 index 0000000..bfdd39b --- /dev/null +++ b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch @@ -0,0 +1,62 @@ +From 019d5a0ca5d13f837a59b9e2815e2fd7ac120807 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:06 +0100 +Subject: [PATCH 11/16] virtio-scsi: clean up virtio_scsi_handle_event_vq() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [3/6] f8dbc4c1991c61e4cf8dea50942c3cd509c9c4bd (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +virtio_scsi_handle_event_vq() is only called from hw/scsi/virtio-scsi.c +now and its return value is no longer used. Remove the function +prototype from virtio-scsi.h and drop the return value. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-4-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 37ce2de95169dacab3fb53d11bd4509b9c2e3a4c) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 4 +--- + include/hw/virtio/virtio-scsi.h | 1 - + 2 files changed, 1 insertion(+), 4 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 7b69eeed64..dd2185b943 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -856,13 +856,11 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, + virtio_scsi_complete_req(req); + } + +-bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) ++static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) + { + if (s->events_dropped) { + virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); +- return true; + } +- return false; + } + + static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 543681bc18..5957597825 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + Error **errp); + + void virtio_scsi_common_unrealize(DeviceState *dev); +-bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); + bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); + bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); + void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch b/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch new file mode 100644 index 0000000..5ba11a2 --- /dev/null +++ b/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch @@ -0,0 +1,103 @@ +From 1b609b2af303fb6498b2ef94ac4f2e900dc8c1b2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:27:45 +0100 +Subject: [PATCH 10/16] virtio-scsi: don't waste CPU polling the event + virtqueue + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [2/6] 7e613d9b9fa8ceb668c78cb3ce7ebe1d73a004b5 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +The virtio-scsi event virtqueue is not emptied by its handler function. +This is typical for rx virtqueues where the device uses buffers when +some event occurs (e.g. a packet is received, an error condition +happens, etc). + +Polling non-empty virtqueues wastes CPU cycles. We are not waiting for +new buffers to become available, we are waiting for an event to occur, +so it's a misuse of CPU resources to poll for buffers. + +Introduce the new virtio_queue_aio_attach_host_notifier_no_poll() API, +which is identical to virtio_queue_aio_attach_host_notifier() except +that it does not poll the virtqueue. + +Before this patch the following command-line consumed 100% CPU in the +IOThread polling and calling virtio_scsi_handle_event(): + + $ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \ + --object iothread,id=iothread0 \ + --device virtio-scsi-pci,iothread=iothread0 \ + --blockdev file,filename=test.img,aio=native,cache.direct=on,node-name=drive0 \ + --device scsi-hd,drive=drive0 + +After this patch CPU is no longer wasted. + +Reported-by: Nir Soffer +Signed-off-by: Stefan Hajnoczi +Tested-by: Nir Soffer +Message-id: 20220427143541.119567-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 38738f7dbbda90fbc161757b7f4be35b52205552) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi-dataplane.c | 2 +- + hw/virtio/virtio.c | 13 +++++++++++++ + include/hw/virtio/virtio.h | 1 + + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c +index 29575cbaf6..8bb6e6acfc 100644 +--- a/hw/scsi/virtio-scsi-dataplane.c ++++ b/hw/scsi/virtio-scsi-dataplane.c +@@ -138,7 +138,7 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + + aio_context_acquire(s->ctx); + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); +- virtio_queue_aio_attach_host_notifier(vs->event_vq, s->ctx); ++ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); + + for (i = 0; i < vs->conf.num_queues; i++) { + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 9d637e043e..67a873f54a 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3534,6 +3534,19 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + virtio_queue_host_notifier_aio_poll_end); + } + ++/* ++ * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use ++ * this for rx virtqueues and similar cases where the virtqueue handler ++ * function does not pop all elements. When the virtqueue is left non-empty ++ * polling consumes CPU cycles and should not be used. ++ */ ++void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) ++{ ++ aio_set_event_notifier(ctx, &vq->host_notifier, true, ++ virtio_queue_host_notifier_read, ++ NULL, NULL); ++} ++ + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) + { + aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index b31c4507f5..b62a35fdca 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -317,6 +317,7 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); + void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled); + void virtio_queue_host_notifier_read(EventNotifier *n); + void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx); ++void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx); + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); + VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); + VirtQueue *virtio_vector_next_queue(VirtQueue *vq); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch b/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch new file mode 100644 index 0000000..1f22ba0 --- /dev/null +++ b/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch @@ -0,0 +1,119 @@ +From 5aaf33dbbbc89d58a52337985641723b9ee13541 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 27 Apr 2022 15:35:36 +0100 +Subject: [PATCH 09/16] virtio-scsi: fix ctrl and event handler functions in + dataplane mode + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [1/6] 3087889041b960f14a6b3893243f78523a78f637 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +Commit f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare +virtio_scsi_handle_cmd for dataplane") prepared the virtio-scsi cmd +virtqueue handler function to be used in both the dataplane and +non-datpalane code paths. + +It failed to convert the ctrl and event virtqueue handler functions, +which are not designed to be called from the dataplane code path but +will be since the ioeventfd is set up for those virtqueues when +dataplane starts. + +Convert the ctrl and event virtqueue handler functions now so they +operate correctly when called from the dataplane code path. Avoid code +duplication by extracting this code into a helper function. + +Fixes: f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare virtio_scsi_handle_cmd for dataplane") +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-2-stefanha@redhat.com +[Fixed s/by used/be used/ typo pointed out by Michael Tokarev +. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 2f743ef6366c2df4ef51ef3ae318138cdc0125ab) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 42 +++++++++++++++++++++++++++--------------- + 1 file changed, 27 insertions(+), 15 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 7f6da33a8a..7b69eeed64 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -472,16 +472,32 @@ bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) + return progress; + } + ++/* ++ * If dataplane is configured but not yet started, do so now and return true on ++ * success. ++ * ++ * Dataplane is started by the core virtio code but virtqueue handler functions ++ * can also be invoked when a guest kicks before DRIVER_OK, so this helper ++ * function helps us deal with manually starting ioeventfd in that case. ++ */ ++static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s) ++{ ++ if (!s->ctx || s->dataplane_started) { ++ return false; ++ } ++ ++ virtio_device_start_ioeventfd(&s->parent_obj.parent_obj); ++ return !s->dataplane_fenced; ++} ++ + static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + +- if (s->ctx) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_ctrl_vq(s, vq); + virtio_scsi_release(s); +@@ -720,12 +736,10 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) + /* use non-QOM casts in the data path */ + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + +- if (s->ctx && !s->dataplane_started) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_cmd_vq(s, vq); + virtio_scsi_release(s); +@@ -855,12 +869,10 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + +- if (s->ctx) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_event_vq(s, vq); + virtio_scsi_release(s); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch b/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch new file mode 100644 index 0000000..8487f5c --- /dev/null +++ b/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch @@ -0,0 +1,168 @@ +From 6603f216dbc07a1d221b1665409cfec6cc9960e2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:26 +0100 +Subject: [PATCH 14/16] virtio-scsi: move request-related items from .h to .c + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [6/6] ecdf5289abd04062c85c5ed8e577a5249684a3b0 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +There is no longer a need to expose the request and related APIs in +virtio-scsi.h since there are no callers outside virtio-scsi.c. + +Note the block comment in VirtIOSCSIReq has been adjusted to meet the +coding style. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-7-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 3dc584abeef0e1277c2de8c1c1974cb49444eb0a) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 45 ++++++++++++++++++++++++++++++--- + include/hw/virtio/virtio-scsi.h | 40 ----------------------------- + 2 files changed, 41 insertions(+), 44 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index df5ff8bab7..2450c9438c 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -29,6 +29,43 @@ + #include "hw/virtio/virtio-access.h" + #include "trace.h" + ++typedef struct VirtIOSCSIReq { ++ /* ++ * Note: ++ * - fields up to resp_iov are initialized by virtio_scsi_init_req; ++ * - fields starting at vring are zeroed by virtio_scsi_init_req. ++ */ ++ VirtQueueElement elem; ++ ++ VirtIOSCSI *dev; ++ VirtQueue *vq; ++ QEMUSGList qsgl; ++ QEMUIOVector resp_iov; ++ ++ union { ++ /* Used for two-stage request submission */ ++ QTAILQ_ENTRY(VirtIOSCSIReq) next; ++ ++ /* Used for cancellation of request during TMFs */ ++ int remaining; ++ }; ++ ++ SCSIRequest *sreq; ++ size_t resp_size; ++ enum SCSIXferMode mode; ++ union { ++ VirtIOSCSICmdResp cmd; ++ VirtIOSCSICtrlTMFResp tmf; ++ VirtIOSCSICtrlANResp an; ++ VirtIOSCSIEvent event; ++ } resp; ++ union { ++ VirtIOSCSICmdReq cmd; ++ VirtIOSCSICtrlTMFReq tmf; ++ VirtIOSCSICtrlANReq an; ++ } req; ++} VirtIOSCSIReq; ++ + static inline int virtio_scsi_get_lun(uint8_t *lun) + { + return ((lun[2] << 8) | lun[3]) & 0x3FFF; +@@ -45,7 +82,7 @@ static inline SCSIDevice *virtio_scsi_device_get(VirtIOSCSI *s, uint8_t *lun) + return scsi_device_get(&s->bus, 0, lun[1], virtio_scsi_get_lun(lun)); + } + +-void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) ++static void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) + { + VirtIODevice *vdev = VIRTIO_DEVICE(s); + const size_t zero_skip = +@@ -58,7 +95,7 @@ void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) + memset((uint8_t *)req + zero_skip, 0, sizeof(*req) - zero_skip); + } + +-void virtio_scsi_free_req(VirtIOSCSIReq *req) ++static void virtio_scsi_free_req(VirtIOSCSIReq *req) + { + qemu_iovec_destroy(&req->resp_iov); + qemu_sglist_destroy(&req->qsgl); +@@ -801,8 +838,8 @@ static void virtio_scsi_reset(VirtIODevice *vdev) + s->events_dropped = false; + } + +-void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +- uint32_t event, uint32_t reason) ++static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, ++ uint32_t event, uint32_t reason) + { + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIOSCSIReq *req; +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 2497530064..abdda2cbd0 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -94,42 +94,6 @@ struct VirtIOSCSI { + uint32_t host_features; + }; + +-typedef struct VirtIOSCSIReq { +- /* Note: +- * - fields up to resp_iov are initialized by virtio_scsi_init_req; +- * - fields starting at vring are zeroed by virtio_scsi_init_req. +- * */ +- VirtQueueElement elem; +- +- VirtIOSCSI *dev; +- VirtQueue *vq; +- QEMUSGList qsgl; +- QEMUIOVector resp_iov; +- +- union { +- /* Used for two-stage request submission */ +- QTAILQ_ENTRY(VirtIOSCSIReq) next; +- +- /* Used for cancellation of request during TMFs */ +- int remaining; +- }; +- +- SCSIRequest *sreq; +- size_t resp_size; +- enum SCSIXferMode mode; +- union { +- VirtIOSCSICmdResp cmd; +- VirtIOSCSICtrlTMFResp tmf; +- VirtIOSCSICtrlANResp an; +- VirtIOSCSIEvent event; +- } resp; +- union { +- VirtIOSCSICmdReq cmd; +- VirtIOSCSICtrlTMFReq tmf; +- VirtIOSCSICtrlANReq an; +- } req; +-} VirtIOSCSIReq; +- + static inline void virtio_scsi_acquire(VirtIOSCSI *s) + { + if (s->ctx) { +@@ -151,10 +115,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + Error **errp); + + void virtio_scsi_common_unrealize(DeviceState *dev); +-void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); +-void virtio_scsi_free_req(VirtIOSCSIReq *req); +-void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +- uint32_t event, uint32_t reason); + + void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp); + int virtio_scsi_dataplane_start(VirtIODevice *s); +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 5feb6bf..7b28b80 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 3%{?rcrel}%{?dist}%{?cc_suffix} +Release: 4%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -208,6 +208,38 @@ Patch26: kvm-Enable-virtio-iommu-pci-on-aarch64.patch Patch27: kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch # For bz#2037612 - [Win11][tpm][QL41112 PF] vfio_listener_region_add received unaligned region Patch28: kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch +# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' +Patch29: kvm-qapi-machine.json-Add-cluster-id.patch +# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' +Patch30: kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch +# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' +Patch31: kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch +# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' +Patch32: kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch +# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' +Patch33: kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch +# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' +Patch34: kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch +# For bz#2079938 - qemu coredump when boot with multi disks (qemu) failed to set up stack guard page: Cannot allocate memory +Patch35: kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch +# For bz#2079938 - qemu coredump when boot with multi disks (qemu) failed to set up stack guard page: Cannot allocate memory +Patch36: kvm-coroutine-Revert-to-constant-batch-size.patch +# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption +Patch37: kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch +# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption +Patch38: kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch +# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption +Patch39: kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch +# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption +Patch40: kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch +# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption +Patch41: kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch +# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption +Patch42: kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch +# For bz#1995710 - RFE: Allow virtio-scsi CD-ROM media change with IOThreads +Patch43: kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch +# For bz#2064530 - Rebuild qemu-kvm with clang-14 +Patch44: kvm-migration-Fix-operator-type.patch # Source-git patches @@ -1243,6 +1275,34 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu May 19 2022 Miroslav Rezanina - 7.0.0-4 +- kvm-qapi-machine.json-Add-cluster-id.patch [bz#2041823] +- kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch [bz#2041823] +- kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch [bz#2041823] +- kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch [bz#2041823] +- kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch [bz#2041823] +- kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch [bz#2041823] +- kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch [bz#2079938] +- kvm-coroutine-Revert-to-constant-batch-size.patch [bz#2079938] +- kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch [bz#2079347] +- kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch [bz#2079347] +- kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch [bz#2079347] +- kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch [bz#2079347] +- kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch [bz#2079347] +- kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch [bz#2079347] +- kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch [bz#1995710] +- kvm-migration-Fix-operator-type.patch [bz#2064530] +- Resolves: bz#2041823 + ([aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken') +- Resolves: bz#2079938 + (qemu coredump when boot with multi disks (qemu) failed to set up stack guard page: Cannot allocate memory) +- Resolves: bz#2079347 + (Guest boot blocked when scsi disks using same iothread and 100% CPU consumption) +- Resolves: bz#1995710 + (RFE: Allow virtio-scsi CD-ROM media change with IOThreads) +- Resolves: bz#2064530 + (Rebuild qemu-kvm with clang-14) + * Thu May 12 2022 Miroslav Rezanina - 7.0.0-3 - kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch [bz#2046029] - kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch [bz#2046029] From 4f42bcba8f520d62b55a6d062bf6923d39008053 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 30 May 2022 07:32:17 -0400 Subject: [PATCH 160/195] * Mon May 30 2022 Miroslav Rezanina - 7.0.0-5 - kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch [bz#1708300] - kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch [bz#1708300] - Resolves: bz#1708300 (RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN) --- ...-MULTI_CONN-for-shared-writable-expo.patch | 381 ++++++++++++++++++ ...ss-max-connections-to-blockdev-layer.patch | 92 +++++ qemu-kvm.spec | 12 +- 3 files changed, 484 insertions(+), 1 deletion(-) create mode 100644 kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch create mode 100644 kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch diff --git a/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch b/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch new file mode 100644 index 0000000..56abcb1 --- /dev/null +++ b/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch @@ -0,0 +1,381 @@ +From 4a9ddf42788d3f924bdad7746f7aca615f03d7c1 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 11 May 2022 19:49:24 -0500 +Subject: [PATCH 2/2] nbd/server: Allow MULTI_CONN for shared writable exports +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers +RH-Commit: [2/2] 53f0e885a5ed7f6e4bb14e74fe8e7957e6afe90f (ebblake/centos-qemu-kvm) +RH-Bugzilla: 1708300 +RH-Acked-by: Nir Soffer +RH-Acked-by: Kevin Wolf +RH-Acked-by: Daniel P. Berrangé + +According to the NBD spec, a server that advertises +NBD_FLAG_CAN_MULTI_CONN promises that multiple client connections will +not see any cache inconsistencies: when properly separated by a single +flush, actions performed by one client will be visible to another +client, regardless of which client did the flush. + +We always satisfy these conditions in qemu - even when we support +multiple clients, ALL clients go through a single point of reference +into the block layer, with no local caching. The effect of one client +is instantly visible to the next client. Even if our backend were a +network device, we argue that any multi-path caching effects that +would cause inconsistencies in back-to-back actions not seeing the +effect of previous actions would be a bug in that backend, and not the +fault of caching in qemu. As such, it is safe to unconditionally +advertise CAN_MULTI_CONN for any qemu NBD server situation that +supports parallel clients. + +Note, however, that we don't want to advertise CAN_MULTI_CONN when we +know that a second client cannot connect (for historical reasons, +qemu-nbd defaults to a single connection while nbd-server-add and QMP +commands default to unlimited connections; but we already have +existing means to let either style of NBD server creation alter those +defaults). This is visible by no longer advertising MULTI_CONN for +'qemu-nbd -r' without -e, as in the iotest nbd-qemu-allocation. + +The harder part of this patch is setting up an iotest to demonstrate +behavior of multiple NBD clients to a single server. It might be +possible with parallel qemu-io processes, but I found it easier to do +in python with the help of libnbd, and help from Nir and Vladimir in +writing the test. + +Signed-off-by: Eric Blake +Suggested-by: Nir Soffer +Suggested-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20220512004924.417153-3-eblake@redhat.com> +Signed-off-by: Kevin Wolf + +(cherry picked from commit 58a6fdcc9efb2a7c1ef4893dca4aa5e8020ca3dc) +Conflicts: + nbd/server.c - context, e5fb29d5 not backported +Signed-off-by: Eric Blake +--- + MAINTAINERS | 1 + + blockdev-nbd.c | 5 + + docs/interop/nbd.txt | 1 + + docs/tools/qemu-nbd.rst | 3 +- + include/block/nbd.h | 3 +- + nbd/server.c | 10 +- + qapi/block-export.json | 8 +- + tests/qemu-iotests/tests/nbd-multiconn | 145 ++++++++++++++++++ + tests/qemu-iotests/tests/nbd-multiconn.out | 5 + + .../tests/nbd-qemu-allocation.out | 2 +- + 10 files changed, 172 insertions(+), 11 deletions(-) + create mode 100755 tests/qemu-iotests/tests/nbd-multiconn + create mode 100644 tests/qemu-iotests/tests/nbd-multiconn.out + +diff --git a/MAINTAINERS b/MAINTAINERS +index 4ad2451e03..2fe20a49ab 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -3370,6 +3370,7 @@ F: qemu-nbd.* + F: blockdev-nbd.c + F: docs/interop/nbd.txt + F: docs/tools/qemu-nbd.rst ++F: tests/qemu-iotests/tests/*nbd* + T: git https://repo.or.cz/qemu/ericb.git nbd + T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index add41a23af..c6d9b0324c 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -44,6 +44,11 @@ bool nbd_server_is_running(void) + return nbd_server || qemu_nbd_connections >= 0; + } + ++int nbd_server_max_connections(void) ++{ ++ return nbd_server ? nbd_server->max_connections : qemu_nbd_connections; ++} ++ + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + { + nbd_client_put(client); +diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt +index bdb0f2a41a..f5ca25174a 100644 +--- a/docs/interop/nbd.txt ++++ b/docs/interop/nbd.txt +@@ -68,3 +68,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE + * 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports, + NBD_CMD_FLAG_FAST_ZERO + * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" ++* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports +diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst +index 4c950f6199..8e08a29e89 100644 +--- a/docs/tools/qemu-nbd.rst ++++ b/docs/tools/qemu-nbd.rst +@@ -139,8 +139,7 @@ driver options if :option:`--image-opts` is specified. + .. option:: -e, --shared=NUM + + Allow up to *NUM* clients to share the device (default +- ``1``), 0 for unlimited. Safe for readers, but for now, +- consistency is not guaranteed between multiple writers. ++ ``1``), 0 for unlimited. + + .. option:: -t, --persistent + +diff --git a/include/block/nbd.h b/include/block/nbd.h +index c5a29ce1c6..c74b7a9d2e 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2016-2020 Red Hat, Inc. ++ * Copyright (C) 2016-2022 Red Hat, Inc. + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device +@@ -346,6 +346,7 @@ void nbd_client_put(NBDClient *client); + + void nbd_server_is_qemu_nbd(int max_connections); + bool nbd_server_is_running(void); ++int nbd_server_max_connections(void); + void nbd_server_start(SocketAddress *addr, const char *tls_creds, + const char *tls_authz, uint32_t max_connections, + Error **errp); +diff --git a/nbd/server.c b/nbd/server.c +index c5644fd3f6..6e2157acfa 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2016-2021 Red Hat, Inc. ++ * Copyright (C) 2016-2022 Red Hat, Inc. + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device Server Side +@@ -1642,7 +1642,6 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, + int64_t size; + uint64_t perm, shared_perm; + bool readonly = !exp_args->writable; +- bool shared = !exp_args->writable; + strList *bitmaps; + size_t i; + int ret; +@@ -1693,11 +1692,12 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, + exp->description = g_strdup(arg->description); + exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH | + NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE); ++ ++ if (nbd_server_max_connections() != 1) { ++ exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; ++ } + if (readonly) { + exp->nbdflags |= NBD_FLAG_READ_ONLY; +- if (shared) { +- exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; +- } + } else { + exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES | + NBD_FLAG_SEND_FAST_ZERO); +diff --git a/qapi/block-export.json b/qapi/block-export.json +index 1e34927f85..755ccc89b1 100644 +--- a/qapi/block-export.json ++++ b/qapi/block-export.json +@@ -21,7 +21,9 @@ + # recreated on the fly while the NBD server is active. + # If missing, it will default to denying access (since 4.0). + # @max-connections: The maximum number of connections to allow at the same +-# time, 0 for unlimited. (since 5.2; default: 0) ++# time, 0 for unlimited. Setting this to 1 also stops ++# the server from advertising multiple client support ++# (since 5.2; default: 0) + # + # Since: 4.2 + ## +@@ -50,7 +52,9 @@ + # recreated on the fly while the NBD server is active. + # If missing, it will default to denying access (since 4.0). + # @max-connections: The maximum number of connections to allow at the same +-# time, 0 for unlimited. (since 5.2; default: 0) ++# time, 0 for unlimited. Setting this to 1 also stops ++# the server from advertising multiple client support ++# (since 5.2; default: 0). + # + # Returns: error if the server is already running. + # +diff --git a/tests/qemu-iotests/tests/nbd-multiconn b/tests/qemu-iotests/tests/nbd-multiconn +new file mode 100755 +index 0000000000..b121f2e363 +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-multiconn +@@ -0,0 +1,145 @@ ++#!/usr/bin/env python3 ++# group: rw auto quick ++# ++# Test cases for NBD multi-conn advertisement ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++import os ++from contextlib import contextmanager ++import iotests ++from iotests import qemu_img_create, qemu_io ++ ++ ++disk = os.path.join(iotests.test_dir, 'disk') ++size = '4M' ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd_sock') ++nbd_uri = 'nbd+unix:///{}?socket=' + nbd_sock ++ ++ ++@contextmanager ++def open_nbd(export_name): ++ h = nbd.NBD() ++ try: ++ h.connect_uri(nbd_uri.format(export_name)) ++ yield h ++ finally: ++ h.shutdown() ++ ++class TestNbdMulticonn(iotests.QMPTestCase): ++ def setUp(self): ++ qemu_img_create('-f', iotests.imgfmt, disk, size) ++ qemu_io('-c', 'w -P 1 0 2M', '-c', 'w -P 2 2M 2M', disk) ++ ++ self.vm = iotests.VM() ++ self.vm.launch() ++ result = self.vm.qmp('blockdev-add', { ++ 'driver': 'qcow2', ++ 'node-name': 'n', ++ 'file': {'driver': 'file', 'filename': disk} ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ os.remove(disk) ++ try: ++ os.remove(nbd_sock) ++ except OSError: ++ pass ++ ++ @contextmanager ++ def run_server(self, max_connections=None): ++ args = { ++ 'addr': { ++ 'type': 'unix', ++ 'data': {'path': nbd_sock} ++ } ++ } ++ if max_connections is not None: ++ args['max-connections'] = max_connections ++ ++ result = self.vm.qmp('nbd-server-start', args) ++ self.assert_qmp(result, 'return', {}) ++ yield ++ ++ result = self.vm.qmp('nbd-server-stop') ++ self.assert_qmp(result, 'return', {}) ++ ++ def add_export(self, name, writable=None): ++ args = { ++ 'type': 'nbd', ++ 'id': name, ++ 'node-name': 'n', ++ 'name': name, ++ } ++ if writable is not None: ++ args['writable'] = writable ++ ++ result = self.vm.qmp('block-export-add', args) ++ self.assert_qmp(result, 'return', {}) ++ ++ def test_default_settings(self): ++ with self.run_server(): ++ self.add_export('r') ++ self.add_export('w', writable=True) ++ with open_nbd('r') as h: ++ self.assertTrue(h.can_multi_conn()) ++ with open_nbd('w') as h: ++ self.assertTrue(h.can_multi_conn()) ++ ++ def test_limited_connections(self): ++ with self.run_server(max_connections=1): ++ self.add_export('r') ++ self.add_export('w', writable=True) ++ with open_nbd('r') as h: ++ self.assertFalse(h.can_multi_conn()) ++ with open_nbd('w') as h: ++ self.assertFalse(h.can_multi_conn()) ++ ++ def test_parallel_writes(self): ++ with self.run_server(): ++ self.add_export('w', writable=True) ++ ++ clients = [nbd.NBD() for _ in range(3)] ++ for c in clients: ++ c.connect_uri(nbd_uri.format('w')) ++ self.assertTrue(c.can_multi_conn()) ++ ++ initial_data = clients[0].pread(1024 * 1024, 0) ++ self.assertEqual(initial_data, b'\x01' * 1024 * 1024) ++ ++ updated_data = b'\x03' * 1024 * 1024 ++ clients[1].pwrite(updated_data, 0) ++ clients[2].flush() ++ current_data = clients[0].pread(1024 * 1024, 0) ++ ++ self.assertEqual(updated_data, current_data) ++ ++ for i in range(3): ++ clients[i].shutdown() ++ ++ ++if __name__ == '__main__': ++ try: ++ # Easier to use libnbd than to try and set up parallel ++ # 'qemu-nbd --list' or 'qemu-io' processes, but not all systems ++ # have libnbd installed. ++ import nbd # type: ignore ++ ++ iotests.main(supported_fmts=['qcow2']) ++ except ImportError: ++ iotests.notrun('libnbd not installed') +diff --git a/tests/qemu-iotests/tests/nbd-multiconn.out b/tests/qemu-iotests/tests/nbd-multiconn.out +new file mode 100644 +index 0000000000..8d7e996700 +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-multiconn.out +@@ -0,0 +1,5 @@ ++... ++---------------------------------------------------------------------- ++Ran 3 tests ++ ++OK +diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out +index 0bf1abb063..9d938db24e 100644 +--- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out ++++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out +@@ -17,7 +17,7 @@ wrote 2097152/2097152 bytes at offset 1048576 + exports available: 1 + export: '' + size: 4194304 +- flags: 0x58f ( readonly flush fua df multi cache ) ++ flags: 0x48f ( readonly flush fua df cache ) + min block: 1 + opt block: 4096 + max block: 33554432 +-- +2.31.1 + diff --git a/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch b/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch new file mode 100644 index 0000000..9acff58 --- /dev/null +++ b/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch @@ -0,0 +1,92 @@ +From e6aae1d0368a152924c38775e517f4e83c1d898b Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 11 May 2022 19:49:23 -0500 +Subject: [PATCH 1/2] qemu-nbd: Pass max connections to blockdev layer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers +RH-Commit: [1/2] b0e33fd125bf3523b8b9a4dead3c8bb2342bfd4e (ebblake/centos-qemu-kvm) +RH-Bugzilla: 1708300 +RH-Acked-by: Nir Soffer +RH-Acked-by: Kevin Wolf +RH-Acked-by: Daniel P. Berrangé + +The next patch wants to adjust whether the NBD server code advertises +MULTI_CONN based on whether it is known if the server limits to +exactly one client. For a server started by QMP, this information is +obtained through nbd_server_start (which can support more than one +export); but for qemu-nbd (which supports exactly one export), it is +controlled only by the command-line option -e/--shared. Since we +already have a hook function used by qemu-nbd, it's easiest to just +alter its signature to fit our needs. + +Signed-off-by: Eric Blake +Message-Id: <20220512004924.417153-2-eblake@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit a5fced40212ed73c715ca298a2929dd4d99c9999) +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 8 ++++---- + include/block/nbd.h | 2 +- + qemu-nbd.c | 2 +- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 9840d25a82..add41a23af 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -30,18 +30,18 @@ typedef struct NBDServerData { + } NBDServerData; + + static NBDServerData *nbd_server; +-static bool is_qemu_nbd; ++static int qemu_nbd_connections = -1; /* Non-negative if this is qemu-nbd */ + + static void nbd_update_server_watch(NBDServerData *s); + +-void nbd_server_is_qemu_nbd(bool value) ++void nbd_server_is_qemu_nbd(int max_connections) + { +- is_qemu_nbd = value; ++ qemu_nbd_connections = max_connections; + } + + bool nbd_server_is_running(void) + { +- return nbd_server || is_qemu_nbd; ++ return nbd_server || qemu_nbd_connections >= 0; + } + + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) +diff --git a/include/block/nbd.h b/include/block/nbd.h +index a98eb665da..c5a29ce1c6 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -344,7 +344,7 @@ void nbd_client_new(QIOChannelSocket *sioc, + void nbd_client_get(NBDClient *client); + void nbd_client_put(NBDClient *client); + +-void nbd_server_is_qemu_nbd(bool value); ++void nbd_server_is_qemu_nbd(int max_connections); + bool nbd_server_is_running(void); + void nbd_server_start(SocketAddress *addr, const char *tls_creds, + const char *tls_authz, uint32_t max_connections, +diff --git a/qemu-nbd.c b/qemu-nbd.c +index 713e7557a9..8c25ae93df 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -1087,7 +1087,7 @@ int main(int argc, char **argv) + + bs->detect_zeroes = detect_zeroes; + +- nbd_server_is_qemu_nbd(true); ++ nbd_server_is_qemu_nbd(shared); + + export_opts = g_new(BlockExportOptions, 1); + *export_opts = (BlockExportOptions) { +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 7b28b80..da9fadb 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 4%{?rcrel}%{?dist}%{?cc_suffix} +Release: 5%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -240,6 +240,10 @@ Patch42: kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch Patch43: kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch # For bz#2064530 - Rebuild qemu-kvm with clang-14 Patch44: kvm-migration-Fix-operator-type.patch +# For bz#1708300 - RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN +Patch45: kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch +# For bz#1708300 - RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN +Patch46: kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch # Source-git patches @@ -1275,6 +1279,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon May 30 2022 Miroslav Rezanina - 7.0.0-5 +- kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch [bz#1708300] +- kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch [bz#1708300] +- Resolves: bz#1708300 + (RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN) + * Thu May 19 2022 Miroslav Rezanina - 7.0.0-4 - kvm-qapi-machine.json-Add-cluster-id.patch [bz#2041823] - kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch [bz#2041823] From 0552c42c392f9ef0d01b98e248d64a86401c0e59 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 13 Jun 2022 00:34:04 -0400 Subject: [PATCH 161/195] * Mon Jun 13 2022 Miroslav Rezanina - 7.0.0-6 - kvm-Introduce-event-loop-base-abstract-class.patch [bz#2031024] - kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch [bz#2031024] - kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch [bz#2031024] - kvm-qcow2-Improve-refcount-structure-rebuilding.patch [bz#2072379] - kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch [bz#2072379] - kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch [bz#2072379] - kvm-iotests-108-Fix-when-missing-user_allow_other.patch [bz#2072379] - kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch [bz#2070804] - kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch [bz#2070804] - kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch [bz#2070804] - kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch [bz#2070804] - kvm-vhost-vdpa-backend-feature-should-set-only-once.patch [bz#2070804] - kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch [bz#2070804] - kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch [bz#2070804] - kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch [bz#2094270] - kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch [bz#2086262] - Resolves: bz#2031024 (Add support for fixing thread pool size [QEMU]) - Resolves: bz#2072379 (Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)) - Resolves: bz#2070804 (PXE boot crash qemu when using multiqueue vDPA) - Resolves: bz#2094270 (Do not set the hard vCPU limit to the soft vCPU limit in downstream qemu-kvm anymore) - Resolves: bz#2086262 ([Win11][tpm]vfio_listener_region_del received unaligned region) --- ...oduce-event-loop-base-abstract-class.patch | 503 ++++++++++++++++++ ...lly-limit-the-maximum-number-of-CPUs.patch | 58 ++ ...08-Fix-when-missing-user_allow_other.patch | 52 ++ ...-Test-new-refcount-rebuild-algorithm.patch | 445 ++++++++++++++++ ...d-errp-to-rebuild_refcount_structure.patch | 162 ++++++ ...mprove-refcount-structure-rebuilding.patch | 465 ++++++++++++++++ ...base-Introduce-options-to-set-the-th.patch | 385 ++++++++++++++ ...oop-Introduce-the-main-loop-into-QOM.patch | 233 ++++++++ ...ve-spurious-warning-on-vfio_listener.patch | 78 +++ ...-improper-cleanup-in-vhost_net_start.patch | 56 ++ ...backend-feature-should-set-only-once.patch | 58 ++ ...e-name-and-polarity-for-vhost_vdpa_o.patch | 123 +++++ ...mproper-cleanup-in-net_init_vhost_vd.patch | 48 ++ ...-ctrl_vq-index-for-non-mq-guest-for-.patch | 143 +++++ ...-handle-mq-request-in-userspace-hand.patch | 109 ++++ ...-vhost_dev-and-notifiers-for-cvq-onl.patch | 52 ++ qemu-kvm.spec | 62 ++- 17 files changed, 3031 insertions(+), 1 deletion(-) create mode 100644 kvm-Introduce-event-loop-base-abstract-class.patch create mode 100644 kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch create mode 100644 kvm-iotests-108-Fix-when-missing-user_allow_other.patch create mode 100644 kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch create mode 100644 kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch create mode 100644 kvm-qcow2-Improve-refcount-structure-rebuilding.patch create mode 100644 kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch create mode 100644 kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch create mode 100644 kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch create mode 100644 kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch create mode 100644 kvm-vhost-vdpa-backend-feature-should-set-only-once.patch create mode 100644 kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch create mode 100644 kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch create mode 100644 kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch create mode 100644 kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch create mode 100644 kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch diff --git a/kvm-Introduce-event-loop-base-abstract-class.patch b/kvm-Introduce-event-loop-base-abstract-class.patch new file mode 100644 index 0000000..9f987ea --- /dev/null +++ b/kvm-Introduce-event-loop-base-abstract-class.patch @@ -0,0 +1,503 @@ +From 1163da281c178359dd7e1cf1ced5c98caa600f8e Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Mon, 25 Apr 2022 09:57:21 +0200 +Subject: [PATCH 01/16] Introduce event-loop-base abstract class + +RH-Author: Nicolas Saenz Julienne +RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size +RH-Commit: [1/3] 5817205d8f56cc4aa98bd5963ecac54a59bad990 +RH-Bugzilla: 2031024 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +Introduce the 'event-loop-base' abstract class, it'll hold the +properties common to all event loops and provide the necessary hooks for +their creation and maintenance. Then have iothread inherit from it. + +EventLoopBaseClass is defined as user creatable and provides a hook for +its children to attach themselves to the user creatable class 'complete' +function. It also provides an update_params() callback to propagate +property changes onto its children. + +The new 'event-loop-base' class will live in the root directory. It is +built on its own using the 'link_whole' option (there are no direct +function dependencies between the class and its children, it all happens +trough 'constructor' magic). And also imposes new compilation +dependencies: + + qom <- event-loop-base <- blockdev (iothread.c) + +And in subsequent patches: + + qom <- event-loop-base <- qemuutil (util/main-loop.c) + +All this forced some amount of reordering in meson.build: + + - Moved qom build definition before qemuutil. Doing it the other way + around (i.e. moving qemuutil after qom) isn't possible as a lot of + core libraries that live in between the two depend on it. + + - Process the 'hw' subdir earlier, as it introduces files into the + 'qom' source set. + +No functional changes intended. + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Stefan Hajnoczi +Acked-by: Markus Armbruster +Message-id: 20220425075723.20019-2-nsaenzju@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 7d5983e3c8c40b1d0668faba31d79905c4fadd7d) +--- + event-loop-base.c | 104 +++++++++++++++++++++++++++++++ + include/sysemu/event-loop-base.h | 36 +++++++++++ + include/sysemu/iothread.h | 6 +- + iothread.c | 65 ++++++------------- + meson.build | 23 ++++--- + qapi/qom.json | 22 +++++-- + 6 files changed, 192 insertions(+), 64 deletions(-) + create mode 100644 event-loop-base.c + create mode 100644 include/sysemu/event-loop-base.h + +diff --git a/event-loop-base.c b/event-loop-base.c +new file mode 100644 +index 0000000000..a924c73a7c +--- /dev/null ++++ b/event-loop-base.c +@@ -0,0 +1,104 @@ ++/* ++ * QEMU event-loop base ++ * ++ * Copyright (C) 2022 Red Hat Inc ++ * ++ * Authors: ++ * Stefan Hajnoczi ++ * Nicolas Saenz Julienne ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qom/object_interfaces.h" ++#include "qapi/error.h" ++#include "sysemu/event-loop-base.h" ++ ++typedef struct { ++ const char *name; ++ ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ ++} EventLoopBaseParamInfo; ++ ++static EventLoopBaseParamInfo aio_max_batch_info = { ++ "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), ++}; ++ ++static void event_loop_base_get_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj); ++ EventLoopBaseParamInfo *info = opaque; ++ int64_t *field = (void *)event_loop_base + info->offset; ++ ++ visit_type_int64(v, name, field, errp); ++} ++ ++static void event_loop_base_set_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj); ++ EventLoopBase *base = EVENT_LOOP_BASE(obj); ++ EventLoopBaseParamInfo *info = opaque; ++ int64_t *field = (void *)base + info->offset; ++ int64_t value; ++ ++ if (!visit_type_int64(v, name, &value, errp)) { ++ return; ++ } ++ ++ if (value < 0) { ++ error_setg(errp, "%s value must be in range [0, %" PRId64 "]", ++ info->name, INT64_MAX); ++ return; ++ } ++ ++ *field = value; ++ ++ if (bc->update_params) { ++ bc->update_params(base, errp); ++ } ++ ++ return; ++} ++ ++static void event_loop_base_complete(UserCreatable *uc, Error **errp) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); ++ EventLoopBase *base = EVENT_LOOP_BASE(uc); ++ ++ if (bc->init) { ++ bc->init(base, errp); ++ } ++} ++ ++static void event_loop_base_class_init(ObjectClass *klass, void *class_data) ++{ ++ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); ++ ucc->complete = event_loop_base_complete; ++ ++ object_class_property_add(klass, "aio-max-batch", "int", ++ event_loop_base_get_param, ++ event_loop_base_set_param, ++ NULL, &aio_max_batch_info); ++} ++ ++static const TypeInfo event_loop_base_info = { ++ .name = TYPE_EVENT_LOOP_BASE, ++ .parent = TYPE_OBJECT, ++ .instance_size = sizeof(EventLoopBase), ++ .class_size = sizeof(EventLoopBaseClass), ++ .class_init = event_loop_base_class_init, ++ .abstract = true, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static void register_types(void) ++{ ++ type_register_static(&event_loop_base_info); ++} ++type_init(register_types); +diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h +new file mode 100644 +index 0000000000..8e77d8b69f +--- /dev/null ++++ b/include/sysemu/event-loop-base.h +@@ -0,0 +1,36 @@ ++/* ++ * QEMU event-loop backend ++ * ++ * Copyright (C) 2022 Red Hat Inc ++ * ++ * Authors: ++ * Nicolas Saenz Julienne ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef QEMU_EVENT_LOOP_BASE_H ++#define QEMU_EVENT_LOOP_BASE_H ++ ++#include "qom/object.h" ++#include "block/aio.h" ++#include "qemu/typedefs.h" ++ ++#define TYPE_EVENT_LOOP_BASE "event-loop-base" ++OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass, ++ EVENT_LOOP_BASE) ++ ++struct EventLoopBaseClass { ++ ObjectClass parent_class; ++ ++ void (*init)(EventLoopBase *base, Error **errp); ++ void (*update_params)(EventLoopBase *base, Error **errp); ++}; ++ ++struct EventLoopBase { ++ Object parent; ++ ++ /* AioContext AIO engine parameters */ ++ int64_t aio_max_batch; ++}; ++#endif +diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h +index 7f714bd136..8f8601d6ab 100644 +--- a/include/sysemu/iothread.h ++++ b/include/sysemu/iothread.h +@@ -17,11 +17,12 @@ + #include "block/aio.h" + #include "qemu/thread.h" + #include "qom/object.h" ++#include "sysemu/event-loop-base.h" + + #define TYPE_IOTHREAD "iothread" + + struct IOThread { +- Object parent_obj; ++ EventLoopBase parent_obj; + + QemuThread thread; + AioContext *ctx; +@@ -37,9 +38,6 @@ struct IOThread { + int64_t poll_max_ns; + int64_t poll_grow; + int64_t poll_shrink; +- +- /* AioContext AIO engine parameters */ +- int64_t aio_max_batch; + }; + typedef struct IOThread IOThread; + +diff --git a/iothread.c b/iothread.c +index 0f98af0f2a..8fa2f3bfb8 100644 +--- a/iothread.c ++++ b/iothread.c +@@ -17,6 +17,7 @@ + #include "qemu/module.h" + #include "block/aio.h" + #include "block/block.h" ++#include "sysemu/event-loop-base.h" + #include "sysemu/iothread.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-misc.h" +@@ -152,10 +153,15 @@ static void iothread_init_gcontext(IOThread *iothread) + iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); + } + +-static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) ++static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) + { ++ IOThread *iothread = IOTHREAD(base); + ERRP_GUARD(); + ++ if (!iothread->ctx) { ++ return; ++ } ++ + aio_context_set_poll_params(iothread->ctx, + iothread->poll_max_ns, + iothread->poll_grow, +@@ -166,14 +172,15 @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) + } + + aio_context_set_aio_params(iothread->ctx, +- iothread->aio_max_batch, ++ iothread->parent_obj.aio_max_batch, + errp); + } + +-static void iothread_complete(UserCreatable *obj, Error **errp) ++ ++static void iothread_init(EventLoopBase *base, Error **errp) + { + Error *local_error = NULL; +- IOThread *iothread = IOTHREAD(obj); ++ IOThread *iothread = IOTHREAD(base); + char *thread_name; + + iothread->stopping = false; +@@ -189,7 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) + */ + iothread_init_gcontext(iothread); + +- iothread_set_aio_context_params(iothread, &local_error); ++ iothread_set_aio_context_params(base, &local_error); + if (local_error) { + error_propagate(errp, local_error); + aio_context_unref(iothread->ctx); +@@ -201,7 +208,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) + * to inherit. + */ + thread_name = g_strdup_printf("IO %s", +- object_get_canonical_path_component(OBJECT(obj))); ++ object_get_canonical_path_component(OBJECT(base))); + qemu_thread_create(&iothread->thread, thread_name, iothread_run, + iothread, QEMU_THREAD_JOINABLE); + g_free(thread_name); +@@ -226,9 +233,6 @@ static IOThreadParamInfo poll_grow_info = { + static IOThreadParamInfo poll_shrink_info = { + "poll-shrink", offsetof(IOThread, poll_shrink), + }; +-static IOThreadParamInfo aio_max_batch_info = { +- "aio-max-batch", offsetof(IOThread, aio_max_batch), +-}; + + static void iothread_get_param(Object *obj, Visitor *v, + const char *name, IOThreadParamInfo *info, Error **errp) +@@ -288,35 +292,12 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, + } + } + +-static void iothread_get_aio_param(Object *obj, Visitor *v, +- const char *name, void *opaque, Error **errp) +-{ +- IOThreadParamInfo *info = opaque; +- +- iothread_get_param(obj, v, name, info, errp); +-} +- +-static void iothread_set_aio_param(Object *obj, Visitor *v, +- const char *name, void *opaque, Error **errp) +-{ +- IOThread *iothread = IOTHREAD(obj); +- IOThreadParamInfo *info = opaque; +- +- if (!iothread_set_param(obj, v, name, info, errp)) { +- return; +- } +- +- if (iothread->ctx) { +- aio_context_set_aio_params(iothread->ctx, +- iothread->aio_max_batch, +- errp); +- } +-} +- + static void iothread_class_init(ObjectClass *klass, void *class_data) + { +- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); +- ucc->complete = iothread_complete; ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass); ++ ++ bc->init = iothread_init; ++ bc->update_params = iothread_set_aio_context_params; + + object_class_property_add(klass, "poll-max-ns", "int", + iothread_get_poll_param, +@@ -330,23 +311,15 @@ static void iothread_class_init(ObjectClass *klass, void *class_data) + iothread_get_poll_param, + iothread_set_poll_param, + NULL, &poll_shrink_info); +- object_class_property_add(klass, "aio-max-batch", "int", +- iothread_get_aio_param, +- iothread_set_aio_param, +- NULL, &aio_max_batch_info); + } + + static const TypeInfo iothread_info = { + .name = TYPE_IOTHREAD, +- .parent = TYPE_OBJECT, ++ .parent = TYPE_EVENT_LOOP_BASE, + .class_init = iothread_class_init, + .instance_size = sizeof(IOThread), + .instance_init = iothread_instance_init, + .instance_finalize = iothread_instance_finalize, +- .interfaces = (InterfaceInfo[]) { +- {TYPE_USER_CREATABLE}, +- {} +- }, + }; + + static void iothread_register_types(void) +@@ -383,7 +356,7 @@ static int query_one_iothread(Object *object, void *opaque) + info->poll_max_ns = iothread->poll_max_ns; + info->poll_grow = iothread->poll_grow; + info->poll_shrink = iothread->poll_shrink; +- info->aio_max_batch = iothread->aio_max_batch; ++ info->aio_max_batch = iothread->parent_obj.aio_max_batch; + + QAPI_LIST_APPEND(*tail, info); + return 0; +diff --git a/meson.build b/meson.build +index 6f7e430f0f..b9c919a55e 100644 +--- a/meson.build ++++ b/meson.build +@@ -2804,6 +2804,7 @@ subdir('qom') + subdir('authz') + subdir('crypto') + subdir('ui') ++subdir('hw') + + + if enable_modules +@@ -2811,6 +2812,18 @@ if enable_modules + modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO') + endif + ++qom_ss = qom_ss.apply(config_host, strict: false) ++libqom = static_library('qom', qom_ss.sources() + genh, ++ dependencies: [qom_ss.dependencies()], ++ name_suffix: 'fa') ++qom = declare_dependency(link_whole: libqom) ++ ++event_loop_base = files('event-loop-base.c') ++event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh, ++ build_by_default: true) ++event_loop_base = declare_dependency(link_whole: event_loop_base, ++ dependencies: [qom]) ++ + stub_ss = stub_ss.apply(config_all, strict: false) + + util_ss.add_all(trace_ss) +@@ -2897,7 +2910,6 @@ subdir('monitor') + subdir('net') + subdir('replay') + subdir('semihosting') +-subdir('hw') + subdir('tcg') + subdir('fpu') + subdir('accel') +@@ -3022,13 +3034,6 @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms', + capture: true, + command: [undefsym, nm, '@INPUT@']) + +-qom_ss = qom_ss.apply(config_host, strict: false) +-libqom = static_library('qom', qom_ss.sources() + genh, +- dependencies: [qom_ss.dependencies()], +- name_suffix: 'fa') +- +-qom = declare_dependency(link_whole: libqom) +- + authz_ss = authz_ss.apply(config_host, strict: false) + libauthz = static_library('authz', authz_ss.sources() + genh, + dependencies: [authz_ss.dependencies()], +@@ -3081,7 +3086,7 @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, + build_by_default: false) + + blockdev = declare_dependency(link_whole: [libblockdev], +- dependencies: [block]) ++ dependencies: [block, event_loop_base]) + + qmp_ss = qmp_ss.apply(config_host, strict: false) + libqmp = static_library('qmp', qmp_ss.sources() + genh, +diff --git a/qapi/qom.json b/qapi/qom.json +index eeb5395ff3..a2439533c5 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -499,6 +499,20 @@ + '*repeat': 'bool', + '*grab-toggle': 'GrabToggleKeys' } } + ++## ++# @EventLoopBaseProperties: ++# ++# Common properties for event loops ++# ++# @aio-max-batch: maximum number of requests in a batch for the AIO engine, ++# 0 means that the engine will use its default. ++# (default: 0) ++# ++# Since: 7.1 ++## ++{ 'struct': 'EventLoopBaseProperties', ++ 'data': { '*aio-max-batch': 'int' } } ++ + ## + # @IothreadProperties: + # +@@ -516,17 +530,15 @@ + # algorithm detects it is spending too long polling without + # encountering events. 0 selects a default behaviour (default: 0) + # +-# @aio-max-batch: maximum number of requests in a batch for the AIO engine, +-# 0 means that the engine will use its default +-# (default:0, since 6.1) ++# The @aio-max-batch option is available since 6.1. + # + # Since: 2.0 + ## + { 'struct': 'IothreadProperties', ++ 'base': 'EventLoopBaseProperties', + 'data': { '*poll-max-ns': 'int', + '*poll-grow': 'int', +- '*poll-shrink': 'int', +- '*aio-max-batch': 'int' } } ++ '*poll-shrink': 'int' } } + + ## + # @MemoryBackendProperties: +-- +2.31.1 + diff --git a/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch b/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..7740d0b --- /dev/null +++ b/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,58 @@ +From 5ab8613582fd56b847fe75750acb5b7255900b35 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 9 Jun 2022 11:55:15 +0200 +Subject: [PATCH 15/16] Revert "globally limit the maximum number of CPUs" + +RH-Author: Vitaly Kuznetsov +RH-MergeRequest: 99: Revert "globally limit the maximum number of CPUs" +RH-Commit: [1/1] 13100d4a2209b2190a3654c1f9cf4ebade1e8d24 (vkuznets/qemu-kvm-c9s) +RH-Bugzilla: 2094270 +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094270 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871149 +Upstream Status: RHEL-only +Tested: with upstream kernel + +Downstream QEMU carries a patch that sets the hard limit of possible vCPUs +to the value that the KVM code of the kernel recommends as soft limit. +Upstream KVM code has been changed recently to not use an arbitrary soft +limit anymore, but to cap the value on the amount of available physical +CPUs of the host. This defeats the purpose of the downstream change in +QEMU completely. Drop the downstream-only patch to allow CPU overcommit. + +This reverts commit 6669f6fa677d43144f39d6ad59725b7ba622f1c2. + +Signed-off-by: Vitaly Kuznetsov +--- + accel/kvm/kvm-all.c | 12 ------------ + 1 file changed, 12 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index fdf0e4d429..5f1377ca04 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2430,18 +2430,6 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + +-#ifdef HOST_PPC64 +- /* +- * On POWER, the kernel advertises a soft limit based on the +- * number of CPU threads on the host. We want to allow exceeding +- * this for testing purposes, so we don't want to set hard limit +- * to soft limit as on x86. +- */ +-#else +- /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ +- hard_vcpus_limit = soft_vcpus_limit; +-#endif +- + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +-- +2.31.1 + diff --git a/kvm-iotests-108-Fix-when-missing-user_allow_other.patch b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch new file mode 100644 index 0000000..a37ea6f --- /dev/null +++ b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch @@ -0,0 +1,52 @@ +From 447bca651c9156d7aba6b7495c75f19b5e4ed53f Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Thu, 21 Apr 2022 16:24:35 +0200 +Subject: [PATCH 07/16] iotests/108: Fix when missing user_allow_other + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [4/4] a51ab8606fc9d8dea2b6539f4e795d5813892a5c (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +FUSE exports' allow-other option defaults to "auto", which means that it +will try passing allow_other as a mount option, and fall back to not +using it when an error occurs. We make no effort to hide fusermount's +error message (because it would be difficult, and because users might +want to know about the fallback occurring), and so when allow_other does +not work (primarily when /etc/fuse.conf does not contain +user_allow_other), this error message will appear and break the +reference output. + +We do not need allow_other here, though, so we can just pass +allow-other=off to fix that. + +Reported-by: Markus Armbruster +Signed-off-by: Hanna Reitz +Message-Id: <20220421142435.569600-1-hreitz@redhat.com> +Tested-by: Markus Armbruster +Tested-by: Eric Blake +(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index a3090e2875..4681c7c769 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -326,7 +326,7 @@ else + + $QSD \ + --blockdev file,node-name=export-node,filename="$TEST_IMG" \ +- --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \ + --pidfile "$TEST_DIR/qsd.pid" \ + & + +-- +2.31.1 + diff --git a/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch new file mode 100644 index 0000000..7a968f6 --- /dev/null +++ b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch @@ -0,0 +1,445 @@ +From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:51 +0200 +Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +One clear problem with how qcow2's refcount structure rebuild algorithm +used to be before "qcow2: Improve refcount structure rebuilding" was +that it is prone to failure for qcow2 images on block devices: There is +generally unused space after the actual image, and if that exceeds what +one refblock covers, the old algorithm would invariably write the +reftable past the block device's end, which cannot work. The new +algorithm does not have this problem. + +Test it with three tests: +(1) Create an image with more empty space at the end than what one + refblock covers, see whether rebuilding the refcount structures + results in a change in the image file length. (It should not.) + +(2) Leave precisely enough space somewhere at the beginning of the image + for the new reftable (and the refblock for that place), see whether + the new algorithm puts the reftable there. (It should.) + +(3) Test the original problem: Create (something like) a block device + with a fixed size, then create a qcow2 image in there, write some + data, and then have qemu-img check rebuild the refcount structures. + Before HEAD^, the reftable would have been written past the image + file end, i.e. outside of what the block device provides, which + cannot work. HEAD^ should have fixed that. + ("Something like a block device" means a loop device if we can use + one ("sudo -n losetup" works), or a FUSE block export with + growable=false otherwise.) + +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-3-hreitz@redhat.com> +(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162) + +Conflicts: +- 108: The downstream qemu-storage-daemon does not support --daemonize, + so this switch has been replaced by a loop waiting for the PID file to + appear + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 263 ++++++++++++++++++++++++++++++++++++- + tests/qemu-iotests/108.out | 81 ++++++++++++ + 2 files changed, 343 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index 56339ab2c5..a3090e2875 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -30,13 +30,20 @@ status=1 # failure is the default! + + _cleanup() + { +- _cleanup_test_img ++ _cleanup_test_img ++ if [ -f "$TEST_DIR/qsd.pid" ]; then ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -KILL "$qsd_pid" ++ fusermount -u "$TEST_DIR/fuse-export" &>/dev/null ++ fi ++ rm -f "$TEST_DIR/fuse-export" + } + trap "_cleanup; exit \$status" 0 1 2 3 15 + + # get standard environment, filters and checks + . ./common.rc + . ./common.filter ++. ./common.qemu + + # This tests qcow2-specific low-level functionality + _supported_fmt qcow2 +@@ -47,6 +54,22 @@ _supported_os Linux + # files + _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file + ++# This test either needs sudo -n losetup or FUSE exports to work ++if sudo -n losetup &>/dev/null; then ++ loopdev=true ++else ++ loopdev=false ++ ++ # QSD --export fuse will either yield "Parameter 'id' is missing" ++ # or "Invalid parameter 'fuse'", depending on whether there is ++ # FUSE support or not. ++ error=$($QSD --export fuse 2>&1) ++ if [[ $error = *"'fuse'"* ]]; then ++ _notrun 'Passwordless sudo for losetup or FUSE support required, but' \ ++ 'neither is available' ++ fi ++fi ++ + echo + echo '=== Repairing an image without any refcount table ===' + echo +@@ -138,6 +161,244 @@ _make_test_img 64M + poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00" + _check_test_img -r all + ++echo ++echo '=== Check rebuilt reftable location ===' ++ ++# In an earlier version of the refcount rebuild algorithm, the ++# reftable was generally placed at the image end (unless something was ++# allocated in the area covered by the refblock right before the image ++# file end, then we would try to place the reftable in that refblock). ++# This was later changed so the reftable would be placed in the ++# earliest possible location. Test this. ++ ++echo ++echo '--- Does the image size increase? ---' ++echo ++ ++# First test: Just create some image, write some data to it, and ++# resize it so there is free space at the end of the image (enough ++# that it spans at least one full refblock, which for cluster_size=512 ++# images, spans 128k). With the old algorithm, the reftable would ++# have then been placed at the end of the image file, but with the new ++# one, it will be put in that free space. ++# We want to check whether the size of the image file increases due to ++# rebuilding the refcount structures (it should not). ++ ++_make_test_img -o 'cluster_size=512' 1M ++# Write something ++$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io ++ ++# Add free space ++file_len=$(stat -c '%s' "$TEST_IMG") ++truncate -s $((file_len + 256 * 1024)) "$TEST_IMG" ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++# Check whether rebuilding the refcount structures increases the image ++# file size ++file_len=$(stat -c '%s' "$TEST_IMG") ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++post_repair_file_len=$(stat -c '%s' "$TEST_IMG") ++ ++if [[ $file_len -eq $post_repair_file_len ]]; then ++ echo 'OK: Image size did not change' ++else ++ echo 'ERROR: Image size differs' \ ++ "($file_len before, $post_repair_file_len after)" ++fi ++ ++echo ++echo '--- Will the reftable occupy a hole specifically left for it? ---' ++echo ++ ++# Note: With cluster_size=512, every refblock covers 128k. ++# The reftable covers 8M per reftable cluster. ++ ++# Create an image that requires two reftable clusters (just because ++# this is more interesting than a single-clustered reftable). ++_make_test_img -o 'cluster_size=512' 9M ++$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io ++ ++# Writing 8M will have resized the reftable. Unfortunately, doing so ++# will leave holes in the file, so we need to fill them up so we can ++# be sure the whole file is allocated. Do that by writing ++# consecutively smaller chunks starting from 8 MB, until the file ++# length increases even with a chunk size of 512. Then we must have ++# filled all holes. ++ofs=$((8 * 1024 * 1024)) ++block_len=$((16 * 1024)) ++while [[ $block_len -ge 512 ]]; do ++ file_len=$(stat -c '%s' "$TEST_IMG") ++ while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do ++ # Do not include this in the reference output, it does not ++ # really matter which qemu-io calls we do here exactly ++ $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null ++ ofs=$((ofs + block_len)) ++ done ++ block_len=$((block_len / 2)) ++done ++ ++# Fill up to 9M (do not include this in the reference output either, ++# $ofs is random for all we know) ++$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null ++ ++# Make space as follows: ++# - For the first refblock: Right at the beginning of the image (this ++# refblock is placed in the first place possible), ++# - For the reftable somewhere soon afterwards, still near the ++# beginning of the image (i.e. covered by the first refblock); the ++# reftable too is placed in the first place possible, but only after ++# all refblocks have been placed) ++# No space is needed for the other refblocks, because no refblock is ++# put before the space it covers. In this test case, we do not mind ++# if they are placed at the image file's end. ++ ++# Before we make that space, we have to find out the host offset of ++# the area that belonged to the two data clusters at guest offset 4k, ++# because we expect the reftable to be placed there, and we will have ++# to verify that it is. ++ ++l1_offset=$(peek_file_be "$TEST_IMG" 40 8) ++l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8) ++l2_offset=$((l2_offset & 0x00fffffffffffe00)) ++data_4k_offset=$(peek_file_be "$TEST_IMG" \ ++ $((l2_offset + 4096 / 512 * 8)) 8) ++data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00)) ++ ++$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++ ++# Check whether the reftable was put where we expected ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++if [[ $rt_offset -eq $data_4k_offset ]]; then ++ echo 'OK: Reftable is where we expect it' ++else ++ echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset" ++fi ++ ++echo ++echo '--- Rebuilding refcount structures on block devices ---' ++echo ++ ++# A block device cannot really grow, at least not during qemu-img ++# check. As mentioned in the above cases, rebuilding the refcount ++# structure may lead to new refcount structures being written after ++# the end of the image, and in the past that happened even if there ++# was more than sufficient space in the image. Such post-EOF writes ++# will not work on block devices, so test that the new algorithm ++# avoids it. ++ ++# If we have passwordless sudo and losetup, we can use those to create ++# a block device. Otherwise, we can resort to qemu's FUSE export to ++# create a file that isn't growable, which effectively tests the same ++# thing. ++ ++_cleanup_test_img ++truncate -s $((64 * 1024 * 1024)) "$TEST_IMG" ++ ++if $loopdev; then ++ export_mp=$(sudo -n losetup --show -f "$TEST_IMG") ++ export_mp_driver=host_device ++ sudo -n chmod go+rw "$export_mp" ++else ++ # Create non-growable FUSE export that is a bit like an empty ++ # block device ++ export_mp="$TEST_DIR/fuse-export" ++ export_mp_driver=file ++ touch "$export_mp" ++ ++ $QSD \ ++ --blockdev file,node-name=export-node,filename="$TEST_IMG" \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --pidfile "$TEST_DIR/qsd.pid" \ ++ & ++ ++ while [ ! -f "$TEST_DIR/qsd.pid" ]; do ++ sleep 0.1 ++ done ++fi ++ ++# Now create a qcow2 image on the device -- unfortunately, qemu-img ++# create force-creates the file, so we have to resort to the ++# blockdev-create job. ++_launch_qemu \ ++ --blockdev $export_mp_driver,node-name=file,filename="$export_mp" ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "qmp_capabilities" }' \ ++ 'return' ++ ++# Small cluster size again, so the image needs multiple refblocks ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "qcow2", ++ "file": "file", ++ "size": '$((64 * 1024 * 1024))', ++ "cluster-size": 512 ++ } } }' \ ++ '"concluded"' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \ ++ 'return' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "quit" }' \ ++ 'return' ++ ++wait=y _cleanup_qemu ++echo ++ ++# Write some data ++$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$export_mp" 48 8) ++rb_offset=$(peek_file_be "$export_mp" $rt_offset 8) ++poke_file "$export_mp" $rb_offset "\x00\x00" ++ ++# Repairing such a simple case should just work ++# (We used to put the reftable at the end of the image file, which can ++# never work for non-growable devices.) ++echo ++TEST_IMG="$export_mp" _check_test_img -r all \ ++ | grep -v '^Repairing cluster.*refcount=1 reference=0' ++ ++if $loopdev; then ++ sudo -n losetup -d "$export_mp" ++else ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -TERM "$qsd_pid" ++ # Wait for process to exit (cannot `wait` because the QSD is daemonized) ++ while [ -f "$TEST_DIR/qsd.pid" ]; do ++ true ++ done ++fi ++ + # success, all done + echo '*** done' + rm -f $seq.full +diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out +index 75bab8dc84..b5401d788d 100644 +--- a/tests/qemu-iotests/108.out ++++ b/tests/qemu-iotests/108.out +@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired: + 0 leaked clusters + 1 corruptions + ++Double checking the fixed image now... ++No errors were found on the image. ++ ++=== Check rebuilt reftable location === ++ ++--- Does the image size increase? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Image size did not change ++ ++--- Will the reftable occupy a hole specifically left for it? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184 ++wrote 8388608/8388608 bytes at offset 0 ++8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 1024/1024 bytes at offset 4096 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Reftable is where we expect it ++ ++--- Rebuilding refcount structures on block devices --- ++ ++{ "execute": "qmp_capabilities" } ++{"return": {}} ++{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "IMGFMT", ++ "file": "file", ++ "size": 67108864, ++ "cluster-size": 512 ++ } } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}} ++{ "execute": "job-dismiss", "arguments": { "id": "create" } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} ++{"return": {}} ++{ "execute": "quit" } ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++ ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ + Double checking the fixed image now... + No errors were found on the image. + *** done +-- +2.31.1 + diff --git a/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch new file mode 100644 index 0000000..9010d3d --- /dev/null +++ b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch @@ -0,0 +1,162 @@ +From 5e385a0e49a520550a83299632be175857b63f19 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:52 +0200 +Subject: [PATCH 06/16] qcow2: Add errp to rebuild_refcount_structure() + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [3/4] 937b89a7eab6ec6b18618d59bc1526976ad03290 (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Instead of fprint()-ing error messages in rebuild_refcount_structure() +and its rebuild_refcounts_write_refblocks() helper, pass them through an +Error object to qcow2_check_refcounts() (which will then print it). + +Suggested-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-4-hreitz@redhat.com> +Reviewed-by: Eric Blake +(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index c5669eaa51..ed0ecfaa89 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2465,7 +2465,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + static int rebuild_refcounts_write_refblocks( + BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, + int64_t first_cluster, int64_t end_cluster, +- uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr, ++ Error **errp + ) + { + BDRVQcow2State *s = bs->opaque; +@@ -2516,8 +2517,8 @@ static int rebuild_refcounts_write_refblocks( + nb_clusters, + &first_free_cluster); + if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); ++ error_setg_errno(errp, -refblock_offset, ++ "ERROR allocating refblock"); + return refblock_offset; + } + +@@ -2539,6 +2540,7 @@ static int rebuild_refcounts_write_refblocks( + on_disk_reftable_entries * + REFTABLE_ENTRY_SIZE); + if (!on_disk_reftable) { ++ error_setg(errp, "ERROR allocating reftable memory"); + return -ENOMEM; + } + +@@ -2562,7 +2564,7 @@ static int rebuild_refcounts_write_refblocks( + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2578,7 +2580,7 @@ static int rebuild_refcounts_write_refblocks( + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2601,7 +2603,8 @@ static int rebuild_refcounts_write_refblocks( + static int rebuild_refcount_structure(BlockDriverState *bs, + BdrvCheckResult *res, + void **refcount_table, +- int64_t *nb_clusters) ++ int64_t *nb_clusters, ++ Error **errp) + { + BDRVQcow2State *s = bs->opaque; + int64_t reftable_offset = -1; +@@ -2652,7 +2655,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, + 0, *nb_clusters, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2676,8 +2679,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + refcount_table, nb_clusters, + &first_free_cluster); + if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); ++ error_setg_errno(errp, -reftable_offset, ++ "ERROR allocating reftable"); + res->check_errors++; + ret = reftable_offset; + goto fail; +@@ -2695,7 +2698,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + reftable_start_cluster, + reftable_end_cluster, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2725,7 +2728,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2733,7 +2736,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, + reftable_length); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2746,7 +2749,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + &reftable_offset_and_clusters, + sizeof(reftable_offset_and_clusters)); + if (ret < 0) { +- fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR setting reftable"); + goto fail; + } + +@@ -2814,11 +2817,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + if (rebuild && (fix & BDRV_FIX_ERRORS)) { + BdrvCheckResult old_res = *res; + int fresh_leaks = 0; ++ Error *local_err = NULL; + + fprintf(stderr, "Rebuilding refcount structure\n"); + ret = rebuild_refcount_structure(bs, res, &refcount_table, +- &nb_clusters); ++ &nb_clusters, &local_err); + if (ret < 0) { ++ error_report_err(local_err); + goto fail; + } + +-- +2.31.1 + diff --git a/kvm-qcow2-Improve-refcount-structure-rebuilding.patch b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch new file mode 100644 index 0000000..cdc92b8 --- /dev/null +++ b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch @@ -0,0 +1,465 @@ +From b453cf6be8429f4438d51eb24fcf49e7d9f14db6 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:50 +0200 +Subject: [PATCH 04/16] qcow2: Improve refcount structure rebuilding + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [1/4] a3606b7abcaebb4930b566e95b1090aead62dfae (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +When rebuilding the refcount structures (when qemu-img check -r found +errors with refcount = 0, but reference count > 0), the new refcount +table defaults to being put at the image file end[1]. There is no good +reason for that except that it means we will not have to rewrite any +refblocks we already wrote to disk. + +Changing the code to rewrite those refblocks is not too difficult, +though, so let us do that. That is beneficial for images on block +devices, where we cannot really write beyond the end of the image file. + +Use this opportunity to add extensive comments to the code, and refactor +it a bit, getting rid of the backwards-jumping goto. + +[1] Unless there is something allocated in the area pointed to by the + last refblock, so we have to write that refblock. In that case, we + try to put the reftable in there. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071 +Closes: https://gitlab.com/qemu-project/qemu/-/issues/941 +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-2-hreitz@redhat.com> +(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------ + 1 file changed, 235 insertions(+), 97 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index b91499410c..c5669eaa51 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2438,111 +2438,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + } + + /* +- * Creates a new refcount structure based solely on the in-memory information +- * given through *refcount_table. All necessary allocations will be reflected +- * in that array. ++ * Helper function for rebuild_refcount_structure(). + * +- * On success, the old refcount structure is leaked (it will be covered by the +- * new refcount structure). ++ * Scan the range of clusters [first_cluster, end_cluster) for allocated ++ * clusters and write all corresponding refblocks to disk. The refblock ++ * and allocation data is taken from the in-memory refcount table ++ * *refcount_table[] (of size *nb_clusters), which is basically one big ++ * (unlimited size) refblock for the whole image. ++ * ++ * For these refblocks, clusters are allocated using said in-memory ++ * refcount table. Care is taken that these allocations are reflected ++ * in the refblocks written to disk. ++ * ++ * The refblocks' offsets are written into a reftable, which is ++ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If ++ * that reftable is of insufficient size, it will be resized to fit. ++ * This reftable is not written to disk. ++ * ++ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed ++ * to point to existing valid refblocks that do not need to be allocated ++ * again.) ++ * ++ * Return whether the on-disk reftable array was resized (true/false), ++ * or -errno on error. + */ +-static int rebuild_refcount_structure(BlockDriverState *bs, +- BdrvCheckResult *res, +- void **refcount_table, +- int64_t *nb_clusters) ++static int rebuild_refcounts_write_refblocks( ++ BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, ++ int64_t first_cluster, int64_t end_cluster, ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ ) + { + BDRVQcow2State *s = bs->opaque; +- int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; ++ int64_t cluster; + int64_t refblock_offset, refblock_start, refblock_index; +- uint32_t reftable_size = 0; +- uint64_t *on_disk_reftable = NULL; ++ int64_t first_free_cluster = 0; ++ uint64_t *on_disk_reftable = *on_disk_reftable_ptr; ++ uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr; + void *on_disk_refblock; +- int ret = 0; +- struct { +- uint64_t reftable_offset; +- uint32_t reftable_clusters; +- } QEMU_PACKED reftable_offset_and_clusters; +- +- qcow2_cache_empty(bs, s->refcount_block_cache); ++ bool reftable_grown = false; ++ int ret; + +-write_refblocks: +- for (; cluster < *nb_clusters; cluster++) { ++ for (cluster = first_cluster; cluster < end_cluster; cluster++) { ++ /* Check all clusters to find refblocks that contain non-zero entries */ + if (!s->get_refcount(*refcount_table, cluster)) { + continue; + } + ++ /* ++ * This cluster is allocated, so we need to create a refblock ++ * for it. The data we will write to disk is just the ++ * respective slice from *refcount_table, so it will contain ++ * accurate refcounts for all clusters belonging to this ++ * refblock. After we have written it, we will therefore skip ++ * all remaining clusters in this refblock. ++ */ ++ + refblock_index = cluster >> s->refcount_block_bits; + refblock_start = refblock_index << s->refcount_block_bits; + +- /* Don't allocate a cluster in a refblock already written to disk */ +- if (first_free_cluster < refblock_start) { +- first_free_cluster = refblock_start; +- } +- refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, +- nb_clusters, &first_free_cluster); +- if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); +- res->check_errors++; +- ret = refblock_offset; +- goto fail; +- } ++ if (on_disk_reftable_entries > refblock_index && ++ on_disk_reftable[refblock_index]) ++ { ++ /* ++ * We can get here after a `goto write_refblocks`: We have a ++ * reftable from a previous run, and the refblock is already ++ * allocated. No need to allocate it again. ++ */ ++ refblock_offset = on_disk_reftable[refblock_index]; ++ } else { ++ int64_t refblock_cluster_index; + +- if (reftable_size <= refblock_index) { +- uint32_t old_reftable_size = reftable_size; +- uint64_t *new_on_disk_reftable; ++ /* Don't allocate a cluster in a refblock already written to disk */ ++ if (first_free_cluster < refblock_start) { ++ first_free_cluster = refblock_start; ++ } ++ refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, ++ nb_clusters, ++ &first_free_cluster); ++ if (refblock_offset < 0) { ++ fprintf(stderr, "ERROR allocating refblock: %s\n", ++ strerror(-refblock_offset)); ++ return refblock_offset; ++ } + +- reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, +- s->cluster_size) / REFTABLE_ENTRY_SIZE; +- new_on_disk_reftable = g_try_realloc(on_disk_reftable, +- reftable_size * +- REFTABLE_ENTRY_SIZE); +- if (!new_on_disk_reftable) { +- res->check_errors++; +- ret = -ENOMEM; +- goto fail; ++ refblock_cluster_index = refblock_offset / s->cluster_size; ++ if (refblock_cluster_index >= end_cluster) { ++ /* ++ * We must write the refblock that holds this refblock's ++ * refcount ++ */ ++ end_cluster = refblock_cluster_index + 1; + } +- on_disk_reftable = new_on_disk_reftable; + +- memset(on_disk_reftable + old_reftable_size, 0, +- (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); ++ if (on_disk_reftable_entries <= refblock_index) { ++ on_disk_reftable_entries = ++ ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, ++ s->cluster_size) / REFTABLE_ENTRY_SIZE; ++ on_disk_reftable = ++ g_try_realloc(on_disk_reftable, ++ on_disk_reftable_entries * ++ REFTABLE_ENTRY_SIZE); ++ if (!on_disk_reftable) { ++ return -ENOMEM; ++ } + +- /* The offset we have for the reftable is now no longer valid; +- * this will leak that range, but we can easily fix that by running +- * a leak-fixing check after this rebuild operation */ +- reftable_offset = -1; +- } else { +- assert(on_disk_reftable); +- } +- on_disk_reftable[refblock_index] = refblock_offset; ++ memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0, ++ (on_disk_reftable_entries - ++ *on_disk_reftable_entries_ptr) * ++ REFTABLE_ENTRY_SIZE); + +- /* If this is apparently the last refblock (for now), try to squeeze the +- * reftable in */ +- if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && +- reftable_offset < 0) +- { +- uint64_t reftable_clusters = size_to_clusters(s, reftable_size * +- REFTABLE_ENTRY_SIZE); +- reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, +- refcount_table, nb_clusters, +- &first_free_cluster); +- if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); +- res->check_errors++; +- ret = reftable_offset; +- goto fail; ++ *on_disk_reftable_ptr = on_disk_reftable; ++ *on_disk_reftable_entries_ptr = on_disk_reftable_entries; ++ ++ reftable_grown = true; ++ } else { ++ assert(on_disk_reftable); + } ++ on_disk_reftable[refblock_index] = refblock_offset; + } + ++ /* Refblock is allocated, write it to disk */ ++ + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* The size of *refcount_table is always cluster-aligned, therefore the +- * write operation will not overflow */ ++ /* ++ * The refblock is simply a slice of *refcount_table. ++ * Note that the size of *refcount_table is always aligned to ++ * whole clusters, so the write operation will not result in ++ * out-of-bounds accesses. ++ */ + on_disk_refblock = (void *)((char *) *refcount_table + + refblock_index * s->cluster_size); + +@@ -2550,23 +2579,99 @@ write_refblocks: + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* Go to the end of this refblock */ ++ /* This refblock is done, skip to its end */ + cluster = refblock_start + s->refcount_block_size - 1; + } + +- if (reftable_offset < 0) { +- uint64_t post_refblock_start, reftable_clusters; ++ return reftable_grown; ++} ++ ++/* ++ * Creates a new refcount structure based solely on the in-memory information ++ * given through *refcount_table (this in-memory information is basically just ++ * the concatenation of all refblocks). All necessary allocations will be ++ * reflected in that array. ++ * ++ * On success, the old refcount structure is leaked (it will be covered by the ++ * new refcount structure). ++ */ ++static int rebuild_refcount_structure(BlockDriverState *bs, ++ BdrvCheckResult *res, ++ void **refcount_table, ++ int64_t *nb_clusters) ++{ ++ BDRVQcow2State *s = bs->opaque; ++ int64_t reftable_offset = -1; ++ int64_t reftable_length = 0; ++ int64_t reftable_clusters; ++ int64_t refblock_index; ++ uint32_t on_disk_reftable_entries = 0; ++ uint64_t *on_disk_reftable = NULL; ++ int ret = 0; ++ int reftable_size_changed = 0; ++ struct { ++ uint64_t reftable_offset; ++ uint32_t reftable_clusters; ++ } QEMU_PACKED reftable_offset_and_clusters; ++ ++ qcow2_cache_empty(bs, s->refcount_block_cache); ++ ++ /* ++ * For each refblock containing entries, we try to allocate a ++ * cluster (in the in-memory refcount table) and write its offset ++ * into on_disk_reftable[]. We then write the whole refblock to ++ * disk (as a slice of the in-memory refcount table). ++ * This is done by rebuild_refcounts_write_refblocks(). ++ * ++ * Once we have scanned all clusters, we try to find space for the ++ * reftable. This will dirty the in-memory refcount table (i.e. ++ * make it differ from the refblocks we have already written), so we ++ * need to run rebuild_refcounts_write_refblocks() again for the ++ * range of clusters where the reftable has been allocated. ++ * ++ * This second run might make the reftable grow again, in which case ++ * we will need to allocate another space for it, which is why we ++ * repeat all this until the reftable stops growing. ++ * ++ * (This loop will terminate, because with every cluster the ++ * reftable grows, it can accomodate a multitude of more refcounts, ++ * so that at some point this must be able to cover the reftable ++ * and all refblocks describing it.) ++ * ++ * We then convert the reftable to big-endian and write it to disk. ++ * ++ * Note that we never free any reftable allocations. Doing so would ++ * needlessly complicate the algorithm: The eventual second check ++ * run we do will clean up all leaks we have caused. ++ */ ++ ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ 0, *nb_clusters, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * There was no reftable before, so rebuild_refcounts_write_refblocks() ++ * must have increased its size (from 0 to something). ++ */ ++ assert(reftable_size_changed); ++ ++ do { ++ int64_t reftable_start_cluster, reftable_end_cluster; ++ int64_t first_free_cluster = 0; ++ ++ reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE; ++ reftable_clusters = size_to_clusters(s, reftable_length); + +- post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); +- reftable_clusters = +- size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); +- /* Not pretty but simple */ +- if (first_free_cluster < post_refblock_start) { +- first_free_cluster = post_refblock_start; +- } + reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, + refcount_table, nb_clusters, + &first_free_cluster); +@@ -2578,24 +2683,55 @@ write_refblocks: + goto fail; + } + +- goto write_refblocks; +- } ++ /* ++ * We need to update the affected refblocks, so re-run the ++ * write_refblocks loop for the reftable's range of clusters. ++ */ ++ assert(offset_into_cluster(s, reftable_offset) == 0); ++ reftable_start_cluster = reftable_offset / s->cluster_size; ++ reftable_end_cluster = reftable_start_cluster + reftable_clusters; ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ reftable_start_cluster, ++ reftable_end_cluster, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * If the reftable size has changed, we will need to find a new ++ * allocation, repeating the loop. ++ */ ++ } while (reftable_size_changed); + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ /* The above loop must have run at least once */ ++ assert(reftable_offset >= 0); ++ ++ /* ++ * All allocations are done, all refblocks are written, convert the ++ * reftable to big-endian and write it to disk. ++ */ ++ ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + cpu_to_be64s(&on_disk_reftable[refblock_index]); + } + +- ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, +- reftable_size * REFTABLE_ENTRY_SIZE, ++ ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; + } + +- assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); ++ assert(reftable_length < INT_MAX); + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, +- reftable_size * REFTABLE_ENTRY_SIZE); ++ reftable_length); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; +@@ -2604,7 +2740,7 @@ write_refblocks: + /* Enter new reftable into the image header */ + reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); + reftable_offset_and_clusters.reftable_clusters = +- cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); ++ cpu_to_be32(reftable_clusters); + ret = bdrv_pwrite_sync(bs->file, + offsetof(QCowHeader, refcount_table_offset), + &reftable_offset_and_clusters, +@@ -2614,12 +2750,14 @@ write_refblocks: + goto fail; + } + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + be64_to_cpus(&on_disk_reftable[refblock_index]); + } + s->refcount_table = on_disk_reftable; + s->refcount_table_offset = reftable_offset; +- s->refcount_table_size = reftable_size; ++ s->refcount_table_size = on_disk_reftable_entries; + update_max_refcount_table_index(s); + + return 0; +-- +2.31.1 + diff --git a/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch b/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch new file mode 100644 index 0000000..77929a6 --- /dev/null +++ b/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch @@ -0,0 +1,385 @@ +From 7a6fa42d4a4263c94b9bf18290f9e7680ea9e7f4 Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Mon, 25 Apr 2022 09:57:23 +0200 +Subject: [PATCH 03/16] util/event-loop-base: Introduce options to set the + thread pool size + +RH-Author: Nicolas Saenz Julienne +RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size +RH-Commit: [3/3] af78a88ff3c69701cbb5f9e980c3d6ebbd13ff98 +RH-Bugzilla: 2031024 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +The thread pool regulates itself: when idle, it kills threads until +empty, when in demand, it creates new threads until full. This behaviour +doesn't play well with latency sensitive workloads where the price of +creating a new thread is too high. For example, when paired with qemu's +'-mlock', or using safety features like SafeStack, creating a new thread +has been measured take multiple milliseconds. + +In order to mitigate this let's introduce a new 'EventLoopBase' +property to set the thread pool size. The threads will be created during +the pool's initialization or upon updating the property's value, remain +available during its lifetime regardless of demand, and destroyed upon +freeing it. A properly characterized workload will then be able to +configure the pool to avoid any latency spikes. + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Stefan Hajnoczi +Acked-by: Markus Armbruster +Message-id: 20220425075723.20019-4-nsaenzju@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 71ad4713cc1d7fca24388b828ef31ae6cb38a31c) +--- + event-loop-base.c | 23 +++++++++++++ + include/block/aio.h | 10 ++++++ + include/block/thread-pool.h | 3 ++ + include/sysemu/event-loop-base.h | 4 +++ + iothread.c | 3 ++ + qapi/qom.json | 10 +++++- + util/aio-posix.c | 1 + + util/async.c | 20 ++++++++++++ + util/main-loop.c | 9 ++++++ + util/thread-pool.c | 55 +++++++++++++++++++++++++++++--- + 10 files changed, 133 insertions(+), 5 deletions(-) + +diff --git a/event-loop-base.c b/event-loop-base.c +index e7f99a6ec8..d5be4dc6fc 100644 +--- a/event-loop-base.c ++++ b/event-loop-base.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qom/object_interfaces.h" + #include "qapi/error.h" ++#include "block/thread-pool.h" + #include "sysemu/event-loop-base.h" + + typedef struct { +@@ -21,9 +22,22 @@ typedef struct { + ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ + } EventLoopBaseParamInfo; + ++static void event_loop_base_instance_init(Object *obj) ++{ ++ EventLoopBase *base = EVENT_LOOP_BASE(obj); ++ ++ base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; ++} ++ + static EventLoopBaseParamInfo aio_max_batch_info = { + "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), + }; ++static EventLoopBaseParamInfo thread_pool_min_info = { ++ "thread-pool-min", offsetof(EventLoopBase, thread_pool_min), ++}; ++static EventLoopBaseParamInfo thread_pool_max_info = { ++ "thread-pool-max", offsetof(EventLoopBase, thread_pool_max), ++}; + + static void event_loop_base_get_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +@@ -95,12 +109,21 @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data) + event_loop_base_get_param, + event_loop_base_set_param, + NULL, &aio_max_batch_info); ++ object_class_property_add(klass, "thread-pool-min", "int", ++ event_loop_base_get_param, ++ event_loop_base_set_param, ++ NULL, &thread_pool_min_info); ++ object_class_property_add(klass, "thread-pool-max", "int", ++ event_loop_base_get_param, ++ event_loop_base_set_param, ++ NULL, &thread_pool_max_info); + } + + static const TypeInfo event_loop_base_info = { + .name = TYPE_EVENT_LOOP_BASE, + .parent = TYPE_OBJECT, + .instance_size = sizeof(EventLoopBase), ++ .instance_init = event_loop_base_instance_init, + .class_size = sizeof(EventLoopBaseClass), + .class_init = event_loop_base_class_init, + .abstract = true, +diff --git a/include/block/aio.h b/include/block/aio.h +index 5634173b12..d128558f1d 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -192,6 +192,8 @@ struct AioContext { + QSLIST_HEAD(, Coroutine) scheduled_coroutines; + QEMUBH *co_schedule_bh; + ++ int thread_pool_min; ++ int thread_pool_max; + /* Thread pool for performing work and receiving completion callbacks. + * Has its own locking. + */ +@@ -769,4 +771,12 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, + Error **errp); + ++/** ++ * aio_context_set_thread_pool_params: ++ * @ctx: the aio context ++ * @min: min number of threads to have readily available in the thread pool ++ * @min: max number of threads the thread pool can contain ++ */ ++void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, ++ int64_t max, Error **errp); + #endif +diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h +index 7dd7d730a0..2020bcc92d 100644 +--- a/include/block/thread-pool.h ++++ b/include/block/thread-pool.h +@@ -20,6 +20,8 @@ + + #include "block/block.h" + ++#define THREAD_POOL_MAX_THREADS_DEFAULT 64 ++ + typedef int ThreadPoolFunc(void *opaque); + + typedef struct ThreadPool ThreadPool; +@@ -33,5 +35,6 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, + int coroutine_fn thread_pool_submit_co(ThreadPool *pool, + ThreadPoolFunc *func, void *arg); + void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); ++void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); + + #endif +diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h +index fced4c9fea..2748bf6ae1 100644 +--- a/include/sysemu/event-loop-base.h ++++ b/include/sysemu/event-loop-base.h +@@ -33,5 +33,9 @@ struct EventLoopBase { + + /* AioContext AIO engine parameters */ + int64_t aio_max_batch; ++ ++ /* AioContext thread pool parameters */ ++ int64_t thread_pool_min; ++ int64_t thread_pool_max; + }; + #endif +diff --git a/iothread.c b/iothread.c +index 8fa2f3bfb8..529194a566 100644 +--- a/iothread.c ++++ b/iothread.c +@@ -174,6 +174,9 @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) + aio_context_set_aio_params(iothread->ctx, + iothread->parent_obj.aio_max_batch, + errp); ++ ++ aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min, ++ base->thread_pool_max, errp); + } + + +diff --git a/qapi/qom.json b/qapi/qom.json +index 7d4a2ac1b9..6a653c6636 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -508,10 +508,18 @@ + # 0 means that the engine will use its default. + # (default: 0) + # ++# @thread-pool-min: minimum number of threads reserved in the thread pool ++# (default:0) ++# ++# @thread-pool-max: maximum number of threads the thread pool can contain ++# (default:64) ++# + # Since: 7.1 + ## + { 'struct': 'EventLoopBaseProperties', +- 'data': { '*aio-max-batch': 'int' } } ++ 'data': { '*aio-max-batch': 'int', ++ '*thread-pool-min': 'int', ++ '*thread-pool-max': 'int' } } + + ## + # @IothreadProperties: +diff --git a/util/aio-posix.c b/util/aio-posix.c +index be0182a3c6..731f3826c0 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -15,6 +15,7 @@ + + #include "qemu/osdep.h" + #include "block/block.h" ++#include "block/thread-pool.h" + #include "qemu/main-loop.h" + #include "qemu/rcu.h" + #include "qemu/rcu_queue.h" +diff --git a/util/async.c b/util/async.c +index 2ea1172f3e..554ba70cca 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -563,6 +563,9 @@ AioContext *aio_context_new(Error **errp) + + ctx->aio_max_batch = 0; + ++ ctx->thread_pool_min = 0; ++ ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; ++ + return ctx; + fail: + g_source_destroy(&ctx->source); +@@ -696,3 +699,20 @@ void qemu_set_current_aio_context(AioContext *ctx) + assert(!get_my_aiocontext()); + set_my_aiocontext(ctx); + } ++ ++void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, ++ int64_t max, Error **errp) ++{ ++ ++ if (min > max || !max || min > INT_MAX || max > INT_MAX) { ++ error_setg(errp, "bad thread-pool-min/thread-pool-max values"); ++ return; ++ } ++ ++ ctx->thread_pool_min = min; ++ ctx->thread_pool_max = max; ++ ++ if (ctx->thread_pool) { ++ thread_pool_update_params(ctx->thread_pool, ctx); ++ } ++} +diff --git a/util/main-loop.c b/util/main-loop.c +index 5b13f456fa..a0f48186ab 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -30,6 +30,7 @@ + #include "sysemu/replay.h" + #include "qemu/main-loop.h" + #include "block/aio.h" ++#include "block/thread-pool.h" + #include "qemu/error-report.h" + #include "qemu/queue.h" + #include "qemu/compiler.h" +@@ -187,12 +188,20 @@ int qemu_init_main_loop(Error **errp) + + static void main_loop_update_params(EventLoopBase *base, Error **errp) + { ++ ERRP_GUARD(); ++ + if (!qemu_aio_context) { + error_setg(errp, "qemu aio context not ready"); + return; + } + + aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); ++ if (*errp) { ++ return; ++ } ++ ++ aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min, ++ base->thread_pool_max, errp); + } + + MainLoop *mloop; +diff --git a/util/thread-pool.c b/util/thread-pool.c +index d763cea505..196835b4d3 100644 +--- a/util/thread-pool.c ++++ b/util/thread-pool.c +@@ -58,7 +58,6 @@ struct ThreadPool { + QemuMutex lock; + QemuCond worker_stopped; + QemuSemaphore sem; +- int max_threads; + QEMUBH *new_thread_bh; + + /* The following variables are only accessed from one AioContext. */ +@@ -71,8 +70,27 @@ struct ThreadPool { + int new_threads; /* backlog of threads we need to create */ + int pending_threads; /* threads created but not running yet */ + bool stopping; ++ int min_threads; ++ int max_threads; + }; + ++static inline bool back_to_sleep(ThreadPool *pool, int ret) ++{ ++ /* ++ * The semaphore timed out, we should exit the loop except when: ++ * - There is work to do, we raced with the signal. ++ * - The max threads threshold just changed, we raced with the signal. ++ * - The thread pool forces a minimum number of readily available threads. ++ */ ++ if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) || ++ pool->cur_threads > pool->max_threads || ++ pool->cur_threads <= pool->min_threads)) { ++ return true; ++ } ++ ++ return false; ++} ++ + static void *worker_thread(void *opaque) + { + ThreadPool *pool = opaque; +@@ -91,8 +109,9 @@ static void *worker_thread(void *opaque) + ret = qemu_sem_timedwait(&pool->sem, 10000); + qemu_mutex_lock(&pool->lock); + pool->idle_threads--; +- } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list)); +- if (ret == -1 || pool->stopping) { ++ } while (back_to_sleep(pool, ret)); ++ if (ret == -1 || pool->stopping || ++ pool->cur_threads > pool->max_threads) { + break; + } + +@@ -294,6 +313,33 @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg) + thread_pool_submit_aio(pool, func, arg, NULL, NULL); + } + ++void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) ++{ ++ qemu_mutex_lock(&pool->lock); ++ ++ pool->min_threads = ctx->thread_pool_min; ++ pool->max_threads = ctx->thread_pool_max; ++ ++ /* ++ * We either have to: ++ * - Increase the number available of threads until over the min_threads ++ * threshold. ++ * - Decrease the number of available threads until under the max_threads ++ * threshold. ++ * - Do nothing. The current number of threads fall in between the min and ++ * max thresholds. We'll let the pool manage itself. ++ */ ++ for (int i = pool->cur_threads; i < pool->min_threads; i++) { ++ spawn_thread(pool); ++ } ++ ++ for (int i = pool->cur_threads; i > pool->max_threads; i--) { ++ qemu_sem_post(&pool->sem); ++ } ++ ++ qemu_mutex_unlock(&pool->lock); ++} ++ + static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) + { + if (!ctx) { +@@ -306,11 +352,12 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) + qemu_mutex_init(&pool->lock); + qemu_cond_init(&pool->worker_stopped); + qemu_sem_init(&pool->sem, 0); +- pool->max_threads = 64; + pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); + + QLIST_INIT(&pool->head); + QTAILQ_INIT(&pool->request_list); ++ ++ thread_pool_update_params(pool, ctx); + } + + ThreadPool *thread_pool_new(AioContext *ctx) +-- +2.31.1 + diff --git a/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch b/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch new file mode 100644 index 0000000..2104424 --- /dev/null +++ b/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch @@ -0,0 +1,233 @@ +From b4969662de01848f887a3918e97e516efc213f71 Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Mon, 25 Apr 2022 09:57:22 +0200 +Subject: [PATCH 02/16] util/main-loop: Introduce the main loop into QOM + +RH-Author: Nicolas Saenz Julienne +RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size +RH-Commit: [2/3] a481b77e25ad50d13dcbe26b36c551b18c89bddd +RH-Bugzilla: 2031024 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +'event-loop-base' provides basic property handling for all 'AioContext' +based event loops. So let's define a new 'MainLoopClass' that inherits +from it. This will permit tweaking the main loop's properties through +qapi as well as through the command line using the '-object' keyword[1]. +Only one instance of 'MainLoopClass' might be created at any time. + +'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to +mark 'MainLoop' as non-deletable. + +[1] For example: + -object main-loop,id=main-loop,aio-max-batch= + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Stefan Hajnoczi +Acked-by: Markus Armbruster +Message-id: 20220425075723.20019-3-nsaenzju@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 70ac26b9e5ca8374bb3ef3f30b871726673c9f27) +--- + event-loop-base.c | 13 ++++++++ + include/qemu/main-loop.h | 10 ++++++ + include/sysemu/event-loop-base.h | 1 + + meson.build | 3 +- + qapi/qom.json | 13 ++++++++ + util/main-loop.c | 56 ++++++++++++++++++++++++++++++++ + 6 files changed, 95 insertions(+), 1 deletion(-) + +diff --git a/event-loop-base.c b/event-loop-base.c +index a924c73a7c..e7f99a6ec8 100644 +--- a/event-loop-base.c ++++ b/event-loop-base.c +@@ -73,10 +73,23 @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp) + } + } + ++static bool event_loop_base_can_be_deleted(UserCreatable *uc) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); ++ EventLoopBase *backend = EVENT_LOOP_BASE(uc); ++ ++ if (bc->can_be_deleted) { ++ return bc->can_be_deleted(backend); ++ } ++ ++ return true; ++} ++ + static void event_loop_base_class_init(ObjectClass *klass, void *class_data) + { + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); + ucc->complete = event_loop_base_complete; ++ ucc->can_be_deleted = event_loop_base_can_be_deleted; + + object_class_property_add(klass, "aio-max-batch", "int", + event_loop_base_get_param, +diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h +index d3750c8e76..20c9387654 100644 +--- a/include/qemu/main-loop.h ++++ b/include/qemu/main-loop.h +@@ -26,9 +26,19 @@ + #define QEMU_MAIN_LOOP_H + + #include "block/aio.h" ++#include "qom/object.h" ++#include "sysemu/event-loop-base.h" + + #define SIG_IPI SIGUSR1 + ++#define TYPE_MAIN_LOOP "main-loop" ++OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP) ++ ++struct MainLoop { ++ EventLoopBase parent_obj; ++}; ++typedef struct MainLoop MainLoop; ++ + /** + * qemu_init_main_loop: Set up the process so that it can run the main loop. + * +diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h +index 8e77d8b69f..fced4c9fea 100644 +--- a/include/sysemu/event-loop-base.h ++++ b/include/sysemu/event-loop-base.h +@@ -25,6 +25,7 @@ struct EventLoopBaseClass { + + void (*init)(EventLoopBase *base, Error **errp); + void (*update_params)(EventLoopBase *base, Error **errp); ++ bool (*can_be_deleted)(EventLoopBase *base); + }; + + struct EventLoopBase { +diff --git a/meson.build b/meson.build +index b9c919a55e..5a7c10e639 100644 +--- a/meson.build ++++ b/meson.build +@@ -2832,7 +2832,8 @@ libqemuutil = static_library('qemuutil', + sources: util_ss.sources() + stub_ss.sources() + genh, + dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman]) + qemuutil = declare_dependency(link_with: libqemuutil, +- sources: genh + version_res) ++ sources: genh + version_res, ++ dependencies: [event_loop_base]) + + if have_system or have_user + decodetree = generator(find_program('scripts/decodetree.py'), +diff --git a/qapi/qom.json b/qapi/qom.json +index a2439533c5..7d4a2ac1b9 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -540,6 +540,17 @@ + '*poll-grow': 'int', + '*poll-shrink': 'int' } } + ++## ++# @MainLoopProperties: ++# ++# Properties for the main-loop object. ++# ++# Since: 7.1 ++## ++{ 'struct': 'MainLoopProperties', ++ 'base': 'EventLoopBaseProperties', ++ 'data': {} } ++ + ## + # @MemoryBackendProperties: + # +@@ -830,6 +841,7 @@ + { 'name': 'input-linux', + 'if': 'CONFIG_LINUX' }, + 'iothread', ++ 'main-loop', + { 'name': 'memory-backend-epc', + 'if': 'CONFIG_LINUX' }, + 'memory-backend-file', +@@ -895,6 +907,7 @@ + 'input-linux': { 'type': 'InputLinuxProperties', + 'if': 'CONFIG_LINUX' }, + 'iothread': 'IothreadProperties', ++ 'main-loop': 'MainLoopProperties', + 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', + 'if': 'CONFIG_LINUX' }, + 'memory-backend-file': 'MemoryBackendFileProperties', +diff --git a/util/main-loop.c b/util/main-loop.c +index b7b0ce4ca0..5b13f456fa 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -33,6 +33,7 @@ + #include "qemu/error-report.h" + #include "qemu/queue.h" + #include "qemu/compiler.h" ++#include "qom/object.h" + + #ifndef _WIN32 + #include +@@ -184,6 +185,61 @@ int qemu_init_main_loop(Error **errp) + return 0; + } + ++static void main_loop_update_params(EventLoopBase *base, Error **errp) ++{ ++ if (!qemu_aio_context) { ++ error_setg(errp, "qemu aio context not ready"); ++ return; ++ } ++ ++ aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); ++} ++ ++MainLoop *mloop; ++ ++static void main_loop_init(EventLoopBase *base, Error **errp) ++{ ++ MainLoop *m = MAIN_LOOP(base); ++ ++ if (mloop) { ++ error_setg(errp, "only one main-loop instance allowed"); ++ return; ++ } ++ ++ main_loop_update_params(base, errp); ++ ++ mloop = m; ++ return; ++} ++ ++static bool main_loop_can_be_deleted(EventLoopBase *base) ++{ ++ return false; ++} ++ ++static void main_loop_class_init(ObjectClass *oc, void *class_data) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc); ++ ++ bc->init = main_loop_init; ++ bc->update_params = main_loop_update_params; ++ bc->can_be_deleted = main_loop_can_be_deleted; ++} ++ ++static const TypeInfo main_loop_info = { ++ .name = TYPE_MAIN_LOOP, ++ .parent = TYPE_EVENT_LOOP_BASE, ++ .class_init = main_loop_class_init, ++ .instance_size = sizeof(MainLoop), ++}; ++ ++static void main_loop_register_types(void) ++{ ++ type_register_static(&main_loop_info); ++} ++ ++type_init(main_loop_register_types) ++ + static int max_priority; + + #ifndef _WIN32 +-- +2.31.1 + diff --git a/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch b/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch new file mode 100644 index 0000000..7e644c5 --- /dev/null +++ b/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch @@ -0,0 +1,78 @@ +From 3de8fb9f3dba18d04efa10b70bcec641035effc5 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 24 May 2022 05:14:05 -0400 +Subject: [PATCH 16/16] vfio/common: remove spurious warning on + vfio_listener_region_del + +RH-Author: Eric Auger +RH-MergeRequest: 101: vfio/common: remove spurious warning on vfio_listener_region_del +RH-Commit: [1/1] dac688b8a981ebb964fea79ea198c329b9cdb551 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2086262 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Alex Williamson + + Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2086262 + Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45876133 + Upstream Status: YES + Tested: With TPM-CRB and VFIO + +851d6d1a0f ("vfio/common: remove spurious tpm-crb-cmd misalignment +warning") removed the warning on vfio_listener_region_add() path. + +However the same warning also hits on region_del path. Let's remove +it and reword the dynamic trace as this can be called on both +map and unmap path. + +Contextual Conflict in hw/vfio/common.c +We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") + +Signed-off-by: Eric Auger +Reviewed-by: Cornelia Huck +Link: https://lore.kernel.org/r/20220524091405.416256-1-eric.auger@redhat.com +Fixes: 851d6d1a0ff2 ("vfio/common: remove spurious tpm-crb-cmd misalignment warning") +Signed-off-by: Alex Williamson +(cherry picked from commit ec6600be0dc16982181c7ad80d94c143c0807dd2) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 10 +++++++++- + hw/vfio/trace-events | 2 +- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 0fbe0d47af..637981f9a1 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1145,7 +1145,15 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (unlikely((section->offset_within_address_space & + ~qemu_real_host_page_mask) != + (section->offset_within_region & ~qemu_real_host_page_mask))) { +- error_report("%s received unaligned region", __func__); ++ if (!vfio_known_safe_misalignment(section)) { ++ error_report("%s received unaligned region %s iova=0x%"PRIx64 ++ " offset_within_region=0x%"PRIx64 ++ " qemu_real_host_page_size=0x%"PRIxPTR, ++ __func__, memory_region_name(section->mr), ++ section->offset_within_address_space, ++ section->offset_within_region, ++ qemu_real_host_page_size); ++ } + return; + } + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 582882db91..73dffe9e00 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -100,7 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add + vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" + vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" +-vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" ++vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR + vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" + vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 +-- +2.31.1 + diff --git a/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch new file mode 100644 index 0000000..70e8f59 --- /dev/null +++ b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch @@ -0,0 +1,56 @@ +From edb2bd99355f300b512c040e91f5870ea14a5d7e Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:15 -0700 +Subject: [PATCH 11/16] vhost-net: fix improper cleanup in vhost_net_start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [4/7] e88e482dd4b344f0cc887a358268beaed4d62917 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +vhost_net_start() missed a corresponding stop_one() upon error from +vhost_set_vring_enable(). While at it, make the error handling for +err_start more robust. No real issue was found due to this though. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-5-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6f3910b5eee00b8cc959e94659c0d524c482a418) +Signed-off-by: Jason Wang +--- + hw/net/vhost_net.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 30379d2ca4..d6d7c51f62 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + r = vhost_set_vring_enable(peer, peer->vring_enable); + + if (r < 0) { ++ vhost_net_stop_one(get_vhost_net(peer), dev); + goto err_start; + } + } +@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + + err_start: + while (--i >= 0) { +- peer = qemu_get_peer(ncs , i); ++ peer = qemu_get_peer(ncs, i < data_queue_pairs ? ++ i : n->max_queue_pairs); + vhost_net_stop_one(get_vhost_net(peer), dev); + } + e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch new file mode 100644 index 0000000..747bf5f --- /dev/null +++ b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch @@ -0,0 +1,58 @@ +From 46c5a35aa56cf0dd55376638dbf7d46e85f497e1 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:16 -0700 +Subject: [PATCH 12/16] vhost-vdpa: backend feature should set only once +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [5/7] 7531bb8da0c99b29997e8bfc6d1e811daf3cdd38 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +The vhost_vdpa_one_time_request() branch in +vhost_vdpa_set_backend_cap() incorrectly sends down +ioctls on vhost_dev with non-zero index. This may +end up with multiple VHOST_SET_BACKEND_FEATURES +ioctl calls sent down on the vhost-vdpa fd that is +shared between all these vhost_dev's. + +To fix it, send down ioctl only once via the first +vhost_dev with index 0. Toggle the polarity of the +vhost_vdpa_one_time_request() test should do the +trick. + +Fixes: 4d191cfdc7de ("vhost-vdpa: classify one time request") +Signed-off-by: Si-Wei Liu +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +Acked-by: Eugenio Pérez +Message-Id: <1651890498-24478-6-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6aee7e4233f6467f69531fcd352adff028f3f5ea) +Signed-off-by: Jason Wang +--- + hw/virtio/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 8adf7c0b92..6e3dbd9e89 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -665,7 +665,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_one_time_request(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch new file mode 100644 index 0000000..2466557 --- /dev/null +++ b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch @@ -0,0 +1,123 @@ +From 58acdab17ec00ab76105ab92a51c5ba4dec3df5a Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:17 -0700 +Subject: [PATCH 13/16] vhost-vdpa: change name and polarity for + vhost_vdpa_one_time_request() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [6/7] 7029778f463a136ff412c63b86b6953390e47bf8 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +The name vhost_vdpa_one_time_request() was confusing. No +matter whatever it returns, its typical occurrence had +always been at requests that only need to be applied once. +And the name didn't suggest what it actually checks for. +Change it to vhost_vdpa_first_dev() with polarity flipped +for better readibility of code. That way it is able to +reflect what the check is really about. + +This call is applicable to request which performs operation +only once, before queues are set up, and usually at the beginning +of the caller function. Document the requirement for it in place. + +Signed-off-by: Si-Wei Liu +Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +(cherry picked from commit d71b0609fc04217e28d17009f04d74b08be6f466) +Signed-off-by: Jason Wang +--- + hw/virtio/vhost-vdpa.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 6e3dbd9e89..33dcaa135e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -366,11 +366,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) + v->iova_range.last); + } + +-static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) ++/* ++ * The use of this function is for requests that only need to be ++ * applied once. Typically such request occurs at the beginning ++ * of operation, and before setting up queues. It should not be ++ * used for request that performs operation until all queues are ++ * set, which would need to check dev->vq_index_end instead. ++ */ ++static bool vhost_vdpa_first_dev(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; + +- return v->index != 0; ++ return v->index == 0; + } + + static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, +@@ -451,7 +458,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + + vhost_vdpa_get_iova_range(v); + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -594,7 +601,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) + static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -623,7 +630,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, + struct vhost_vdpa *v = dev->opaque; + int ret; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -665,7 +672,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (!vhost_vdpa_one_time_request(dev)) { ++ if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +@@ -1118,7 +1125,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) + { + struct vhost_vdpa *v = dev->opaque; +- if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -1240,7 +1247,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, + + static int vhost_vdpa_set_owner(struct vhost_dev *dev) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch new file mode 100644 index 0000000..7716cbf --- /dev/null +++ b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch @@ -0,0 +1,48 @@ +From 3142102adb98f46518c0ac1773b0c48710c6bed6 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:14 -0700 +Subject: [PATCH 10/16] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [3/7] c83ff6c97d34cfae3c3447edde934b42a9ace75f (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +... such that no memory leaks on dangling net clients in case of +error. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-4-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9bd055073e375c8a0d7ebce925e05d914d69fc7f) +Signed-off-by: Jason Wang +--- + net/vhost-vdpa.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 1e9fe47c03..df1e69ee72 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + + err: + if (i) { +- qemu_del_net_client(ncs[0]); ++ for (i--; i >= 0; i--) { ++ qemu_del_net_client(ncs[i]); ++ } + } + qemu_close(vdpa_device_fd); + +-- +2.31.1 + diff --git a/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch new file mode 100644 index 0000000..9da7ea7 --- /dev/null +++ b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch @@ -0,0 +1,143 @@ +From 316b73277de233c7a9b6917077c00d7012060944 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:13 -0700 +Subject: [PATCH 09/16] virtio-net: align ctrl_vq index for non-mq guest for + vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [2/7] 7f764bbb579c7b473ad67fc25b46e698d277e781 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +With MQ enabled vdpa device and non-MQ supporting guest e.g. +booting vdpa with mq=on over OVMF of single vqp, below assert +failure is seen: + +../hw/virtio/vhost-vdpa.c:560: vhost_vdpa_get_vq_index: Assertion `idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs' failed. + +0 0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6 +1 0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6 +2 0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6 +3 0x00007f8ce3fec252 in () at /lib64/libc.so.6 +4 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:563 +5 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:558 +6 0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=) at ../hw/virtio/vhost.c:1557 +7 0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false) + at ../hw/virtio/virtio-pci.c:974 +8 0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019 +9 0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:361 +10 0x0000558f52d4e5e7 in virtio_net_set_status (status=, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289 +11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370 +12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945 +13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=, val=, size=) at ../hw/virtio/virtio-pci.c:1292 +14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=, size=1, shift=, mask=, attrs=...) + at ../softmmu/memory.c:492 +15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=, access_size_max=, access_fn=0x558f52d15cf0 , mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554 +16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=, op=, attrs=attrs@entry=...) + at ../softmmu/memory.c:1504 +17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=, l=, mr=0x558f568f19d0) at /home/opc/qemu-upstream/include/qemu/host-utils.h:165 +18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822 +19 0x0000558f52d0b36b in address_space_write (as=, addr=, attrs=..., buf=buf@entry=0x7f8ce6300028, len=) + at ../softmmu/physmem.c:2914 +20 0x0000558f52d0b3da in address_space_rw (as=, addr=, attrs=..., + attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=, is_write=) at ../softmmu/physmem.c:2924 +21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903 +22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49 +23 0x0000558f52f9f04a in qemu_thread_start (args=) at ../util/qemu-thread-posix.c:556 +24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0 +25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6 + +The cause for the assert failure is due to that the vhost_dev index +for the ctrl vq was not aligned with actual one in use by the guest. +Upon multiqueue feature negotiation in virtio_net_set_multiqueue(), +if guest doesn't support multiqueue, the guest vq layout would shrink +to a single queue pair, consisting of 3 vqs in total (rx, tx and ctrl). +This results in ctrl_vq taking a different vhost_dev group index than +the default. We can map vq to the correct vhost_dev group by checking +if MQ is supported by guest and successfully negotiated. Since the +MQ feature is only present along with CTRL_VQ, we ensure the index +2 is only meant for the control vq while MQ is not supported by guest. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Suggested-by: Jason Wang +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-3-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 68b0a6395f36a8f48f56f46d05f30be2067598b0) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 33 +++++++++++++++++++++++++++++++-- + 1 file changed, 31 insertions(+), 2 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index ffb3475201..f0bb29c741 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qemu/atomic.h" + #include "qemu/iov.h" ++#include "qemu/log.h" + #include "qemu/main-loop.h" + #include "qemu/module.h" + #include "hw/virtio/virtio.h" +@@ -3171,8 +3172,22 @@ static NetClientInfo net_virtio_info = { + static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return false; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } + +@@ -3180,8 +3195,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), + vdev, idx, mask); + } +-- +2.31.1 + diff --git a/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch new file mode 100644 index 0000000..3930cc2 --- /dev/null +++ b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch @@ -0,0 +1,109 @@ +From 521a1953bc11ab6823dcbbee773bcf86e926a9e7 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:18 -0700 +Subject: [PATCH 14/16] virtio-net: don't handle mq request in userspace + handler for vhost-vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [7/7] 9781cab45448ae16a00fbf10cf7995df6b984a0a (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +virtio_queue_host_notifier_read() tends to read pending event +left behind on ioeventfd in the vhost_net_stop() path, and +attempts to handle outstanding kicks from userspace vq handler. +However, in the ctrl_vq handler, virtio_net_handle_mq() has a +recursive call into virtio_net_set_status(), which may lead to +segmentation fault as shown in below stack trace: + +0 0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at ../hw/core/qdev.c:376 +1 0x000055f800c68ad8 in virtio_bus_device_iommu_enabled (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331 +2 0x000055f800d70d7f in vhost_memory_unmap (dev=) at ../hw/virtio/vhost.c:318 +3 0x000055f800d70d7f in vhost_memory_unmap (dev=, buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at ../hw/virtio/vhost.c:336 +4 0x000055f800d71867 in vhost_virtqueue_stop (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241 +5 0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839 +6 0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315 +7 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +8 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +9 0x000055f800d4e628 in virtio_net_set_status (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=, iov=, cmd=0 '\000', n=0x55f8044ec590) at ../hw/net/virtio-net.c:1408 +11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452 +12 0x000055f800d69f37 in virtio_queue_host_notifier_read (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331 +13 0x000055f800d69f37 in virtio_queue_host_notifier_read (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575 +14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier (bus=, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312 +15 0x000055f800d73106 in vhost_dev_disable_notifiers (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590) + at ../../../include/hw/virtio/virtio-bus.h:35 +16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316 +17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +18 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, val=) at ../hw/virtio/virtio.c:1945 +21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333 +22 0x000055f800d04e7a in do_vm_stop (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) at ../softmmu/cpus.c:262 +23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280 +24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812 +25 0x000055f800ad5b13 in main (argc=, argv=, envp=) at ../softmmu/main.c:51 + +For now, temporarily disable handling MQ request from the ctrl_vq +userspace hanlder to avoid the recursive virtio_net_set_status() +call. Some rework is needed to allow changing the number of +queues without going through a full virtio_net_set_status cycle, +particularly for vhost-vdpa backend. + +This patch will need to be reverted as soon as future patches of +having the change of #queues handled in userspace is merged. + +Fixes: 402378407db ("vhost-vdpa: multiqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-8-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2a7888cc3aa31faee839fa5dddad354ff8941f4c) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index f0bb29c741..099e65036d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1381,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + { + VirtIODevice *vdev = VIRTIO_DEVICE(n); + uint16_t queue_pairs; ++ NetClientState *nc = qemu_get_queue(n->nic); + + virtio_net_disable_rss(n); + if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { +@@ -1412,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + return VIRTIO_NET_ERR; + } + ++ /* Avoid changing the number of queue_pairs for vdpa device in ++ * userspace handler. A future fix is needed to handle the mq ++ * change in userspace handler with vhost-vdpa. Let's disable ++ * the mq handling from userspace for now and only allow get ++ * done through the kernel. Ripples may be seen when falling ++ * back to userspace, but without doing it qemu process would ++ * crash on a recursive entry to virtio_net_set_status(). ++ */ ++ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { ++ return VIRTIO_NET_ERR; ++ } ++ + n->curr_queue_pairs = queue_pairs; + /* stop the backend before changing the number of queue_pairs to avoid handling a + * disabled queue */ +-- +2.31.1 + diff --git a/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch new file mode 100644 index 0000000..f6072d2 --- /dev/null +++ b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch @@ -0,0 +1,52 @@ +From 9e737aba614e94da4458f02d4ff97e95ffffd19f Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:12 -0700 +Subject: [PATCH 08/16] virtio-net: setup vhost_dev and notifiers for cvq only + when feature is negotiated +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [1/7] a5c5a2862b2e4d15ef7c09da3e4234fdef37cc66 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +When the control virtqueue feature is absent or not negotiated, +vhost_net_start() still tries to set up vhost_dev and install +vhost notifiers for the control virtqueue, which results in +erroneous ioctl calls with incorrect queue index sending down +to driver. Do that only when needed. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-2-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit aa8581945a13712ff3eed0ad3ba7a9664fc1604b) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 1067e72b39..ffb3475201 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -245,7 +245,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) + VirtIODevice *vdev = VIRTIO_DEVICE(n); + NetClientState *nc = qemu_get_queue(n->nic); + int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; +- int cvq = n->max_ncs - n->max_queue_pairs; ++ int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? ++ n->max_ncs - n->max_queue_pairs : 0; + + if (!get_vhost_net(nc->peer)) { + return; +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index da9fadb..26f0c0a 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 5%{?rcrel}%{?dist}%{?cc_suffix} +Release: 6%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -244,6 +244,38 @@ Patch44: kvm-migration-Fix-operator-type.patch Patch45: kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch # For bz#1708300 - RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN Patch46: kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch +# For bz#2031024 - Add support for fixing thread pool size [QEMU] +Patch47: kvm-Introduce-event-loop-base-abstract-class.patch +# For bz#2031024 - Add support for fixing thread pool size [QEMU] +Patch48: kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch +# For bz#2031024 - Add support for fixing thread pool size [QEMU] +Patch49: kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch +# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch50: kvm-qcow2-Improve-refcount-structure-rebuilding.patch +# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch51: kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch +# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch52: kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch +# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch53: kvm-iotests-108-Fix-when-missing-user_allow_other.patch +# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA +Patch54: kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch +# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA +Patch55: kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch +# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA +Patch56: kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch +# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA +Patch57: kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch +# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA +Patch58: kvm-vhost-vdpa-backend-feature-should-set-only-once.patch +# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA +Patch59: kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch +# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA +Patch60: kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch +# For bz#2094270 - Do not set the hard vCPU limit to the soft vCPU limit in downstream qemu-kvm anymore +Patch61: kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch +# For bz#2086262 - [Win11][tpm]vfio_listener_region_del received unaligned region +Patch62: kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch # Source-git patches @@ -1279,6 +1311,34 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jun 13 2022 Miroslav Rezanina - 7.0.0-6 +- kvm-Introduce-event-loop-base-abstract-class.patch [bz#2031024] +- kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch [bz#2031024] +- kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch [bz#2031024] +- kvm-qcow2-Improve-refcount-structure-rebuilding.patch [bz#2072379] +- kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch [bz#2072379] +- kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch [bz#2072379] +- kvm-iotests-108-Fix-when-missing-user_allow_other.patch [bz#2072379] +- kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch [bz#2070804] +- kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch [bz#2070804] +- kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch [bz#2070804] +- kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch [bz#2070804] +- kvm-vhost-vdpa-backend-feature-should-set-only-once.patch [bz#2070804] +- kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch [bz#2070804] +- kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch [bz#2070804] +- kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch [bz#2094270] +- kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch [bz#2086262] +- Resolves: bz#2031024 + (Add support for fixing thread pool size [QEMU]) +- Resolves: bz#2072379 + (Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)) +- Resolves: bz#2070804 + (PXE boot crash qemu when using multiqueue vDPA) +- Resolves: bz#2094270 + (Do not set the hard vCPU limit to the soft vCPU limit in downstream qemu-kvm anymore) +- Resolves: bz#2086262 + ([Win11][tpm]vfio_listener_region_del received unaligned region) + * Mon May 30 2022 Miroslav Rezanina - 7.0.0-5 - kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch [bz#1708300] - kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch [bz#1708300] From 0ddb1bc43319b7848d7ecaf465d74395079ee194 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 27 Jun 2022 04:48:14 -0400 Subject: [PATCH 162/195] * Mon Jun 27 2022 Miroslav Rezanina - 7.0.0-7 - kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch [bz#1952483] - kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch [bz#1952483] - kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch [bz#1952483] - kvm-Enable-virtio-iommu-pci-on-x86_64.patch [bz#2094252] - kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch [bz#2092788] - kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch [bz#2092788] - Resolves: bz#1952483 (RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures) - Resolves: bz#2094252 (Compile the virtio-iommu device on x86_64) - Resolves: bz#2092788 (Stalled IO Operations in VM) --- kvm-Enable-virtio-iommu-pci-on-x86_64.patch | 41 ++++++ ...ontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 132 +++++++++++++++++ ...outine-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 139 ++++++++++++++++++ ...-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 99 +++++++++++++ ...n-why-max-batch-is-checked-in-laio_i.patch | 49 ++++++ ...balanced-plugged-counter-in-laio_io_.patch | 56 +++++++ qemu-kvm.spec | 28 +++- 7 files changed, 543 insertions(+), 1 deletion(-) create mode 100644 kvm-Enable-virtio-iommu-pci-on-x86_64.patch create mode 100644 kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch create mode 100644 kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch create mode 100644 kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch create mode 100644 kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch create mode 100644 kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch diff --git a/kvm-Enable-virtio-iommu-pci-on-x86_64.patch b/kvm-Enable-virtio-iommu-pci-on-x86_64.patch new file mode 100644 index 0000000..2eb24df --- /dev/null +++ b/kvm-Enable-virtio-iommu-pci-on-x86_64.patch @@ -0,0 +1,41 @@ +From c531a39171201f8a1d063e6af752e5d629c1b4bf Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 9 Jun 2022 11:35:18 +0200 +Subject: [PATCH 4/6] Enable virtio-iommu-pci on x86_64 + +RH-Author: Eric Auger +RH-MergeRequest: 100: Enable virtio-iommu-pci on x86_64 +RH-Commit: [1/1] a164af477efc7cb9d3d76a0e644f198f7c9fb2b5 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2094252 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094252 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871185 +Upstream Status: RHEL-only +Tested: With virtio-net-pci and virtio-block-pci + +let's enable the virtio-iommu-pci device on x86_64 by +turning CONFIG_VIRTIO_IOMMU on. + +Signed-off-by: Eric Auger +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index d0c9e66641..3850b9de72 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -90,6 +90,7 @@ CONFIG_VHOST_USER_BLK=y + CONFIG_VIRTIO_MEM=y + CONFIG_VIRTIO_PCI=y + CONFIG_VIRTIO_VGA=y ++CONFIG_VIRTIO_IOMMU=y + CONFIG_VMMOUSE=y + CONFIG_VMPORT=y + CONFIG_VTD=y +-- +2.31.1 + diff --git a/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..963cf04 --- /dev/null +++ b/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,132 @@ +From ffbd90e5f4eba620c7cd631b04f0ed31beb22ffa Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 12:07:56 +0100 +Subject: [PATCH 1/6] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables +RH-Commit: [1/3] a9782fe8e919c4bd317b7e8744c7ff57d898add3 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 1952483 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-2-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407) +Signed-off-by: Stefan Hajnoczi +--- + util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++-------------- + 1 file changed, 24 insertions(+), 14 deletions(-) + +diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c +index 904b375192..127d5a13c8 100644 +--- a/util/coroutine-ucontext.c ++++ b/util/coroutine-ucontext.c +@@ -25,6 +25,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + + #ifdef CONFIG_VALGRIND_H + #include +@@ -66,8 +67,8 @@ typedef struct { + /** + * Per-thread coroutine bookkeeping + */ +-static __thread CoroutineUContext leader; +-static __thread Coroutine *current; ++QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader); + + /* + * va_args to makecontext() must be type 'int', so passing +@@ -97,14 +98,15 @@ static inline __attribute__((always_inline)) + void finish_switch_fiber(void *fake_stack_save) + { + #ifdef CONFIG_ASAN ++ CoroutineUContext *leaderp = get_ptr_leader(); + const void *bottom_old; + size_t size_old; + + __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); + +- if (!leader.stack) { +- leader.stack = (void *)bottom_old; +- leader.stack_size = size_old; ++ if (!leaderp->stack) { ++ leaderp->stack = (void *)bottom_old; ++ leaderp->stack_size = size_old; + } + #endif + #ifdef CONFIG_TSAN +@@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1) + + /* Initialize longjmp environment and switch back the caller */ + if (!sigsetjmp(self->env, 0)) { +- start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack, +- leader.stack_size); ++ CoroutineUContext *leaderp = get_ptr_leader(); ++ ++ start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, ++ leaderp->stack, leaderp->stack_size); + start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */ + siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); + } +@@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + int ret; + void *fake_stack_save = NULL; + +- current = to_; ++ set_current(to_); + + ret = sigsetjmp(from->env, 0); + if (ret == 0) { +@@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + + Coroutine *qemu_coroutine_self(void) + { +- if (!current) { +- current = &leader.base; ++ Coroutine *self = get_current(); ++ CoroutineUContext *leaderp = get_ptr_leader(); ++ ++ if (!self) { ++ self = &leaderp->base; ++ set_current(self); + } + #ifdef CONFIG_TSAN +- if (!leader.tsan_co_fiber) { +- leader.tsan_co_fiber = __tsan_get_current_fiber(); ++ if (!leaderp->tsan_co_fiber) { ++ leaderp->tsan_co_fiber = __tsan_get_current_fiber(); + } + #endif +- return current; ++ return self; + } + + bool qemu_in_coroutine(void) + { +- return current && current->caller; ++ Coroutine *self = get_current(); ++ ++ return self && self->caller; + } +-- +2.31.1 + diff --git a/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..9d0f811 --- /dev/null +++ b/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,139 @@ +From 9c2e55d25fec6ffb21e344513b7dbeed7e21f641 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 12:08:04 +0100 +Subject: [PATCH 2/6] coroutine: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables +RH-Commit: [2/3] 68a8847e406e2eace6ddc31b0c5676a60600d606 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 1952483 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. +The alloc_pool QSLIST needs a typedef so the return value of +get_ptr_alloc_pool() can be stored in a local variable. + +One example of why this code is necessary: a coroutine that yields +before calling qemu_coroutine_create() to create another coroutine is +affected by the TLS issue. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-3-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90) + +Conflicts: +- Context conflicts due to commit 5411171c3ef4 ("coroutine: Revert to + constant batch size"). + +Signed-off-by: Stefan Hajnoczi +--- + util/qemu-coroutine.c | 41 ++++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 804f672e0a..4a8bd63ef0 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -18,6 +18,7 @@ + #include "qemu/atomic.h" + #include "qemu/coroutine.h" + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + #include "block/aio.h" + + /** +@@ -35,17 +36,20 @@ enum { + static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); + static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; + static unsigned int release_pool_size; +-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); +-static __thread unsigned int alloc_pool_size; +-static __thread Notifier coroutine_pool_cleanup_notifier; ++ ++typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); ++QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); ++QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); + + static void coroutine_pool_cleanup(Notifier *n, void *value) + { + Coroutine *co; + Coroutine *tmp; ++ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); + +- QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { +- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); ++ QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { ++ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); + qemu_coroutine_delete(co); + } + } +@@ -55,27 +59,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + Coroutine *co = NULL; + + if (CONFIG_COROUTINE_POOL) { +- co = QSLIST_FIRST(&alloc_pool); ++ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); ++ ++ co = QSLIST_FIRST(alloc_pool); + if (!co) { + if (release_pool_size > POOL_MIN_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ +- if (!coroutine_pool_cleanup_notifier.notify) { +- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; +- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); ++ Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); ++ if (!notifier->notify) { ++ notifier->notify = coroutine_pool_cleanup; ++ qemu_thread_atexit_add(notifier); + } + + /* This is not exact; there could be a little skew between + * release_pool_size and the actual size of release_pool. But + * it is just a heuristic, it does not need to be perfect. + */ +- alloc_pool_size = qatomic_xchg(&release_pool_size, 0); +- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); +- co = QSLIST_FIRST(&alloc_pool); ++ set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); ++ QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); ++ co = QSLIST_FIRST(alloc_pool); + } + } + if (co) { +- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); +- alloc_pool_size--; ++ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); ++ set_alloc_pool_size(get_alloc_pool_size() - 1); + } + } + +@@ -99,9 +106,9 @@ static void coroutine_delete(Coroutine *co) + qatomic_inc(&release_pool_size); + return; + } +- if (alloc_pool_size < qatomic_read(&pool_max_size)) { +- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); +- alloc_pool_size++; ++ if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { ++ QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); ++ set_alloc_pool_size(get_alloc_pool_size() + 1); + return; + } + } +-- +2.31.1 + diff --git a/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..1665319 --- /dev/null +++ b/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,99 @@ +From 336581e6e9ace3f1ddd24ad0a258db9785f9b0ed Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 12:08:12 +0100 +Subject: [PATCH 3/6] coroutine-win32: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables +RH-Commit: [3/3] 55b35dfdae1bc7d6f614ac9f81a92f5c6431f713 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 1952483 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. + +I think coroutine-win32.c could get away with __thread because the +variables are only used in situations where either the stale value is +correct (current) or outside coroutine context (loading leader when +current is NULL). Due to the difficulty of being sure that this is +really safe in all scenarios it seems worth converting it anyway. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-4-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit c1fe694357a328c807ae3cc6961c19e923448fcc) +Signed-off-by: Stefan Hajnoczi +--- + util/coroutine-win32.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c +index de6bd4fd3e..c02a62c896 100644 +--- a/util/coroutine-win32.c ++++ b/util/coroutine-win32.c +@@ -25,6 +25,7 @@ + #include "qemu/osdep.h" + #include "qemu-common.h" + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + + typedef struct + { +@@ -34,8 +35,8 @@ typedef struct + CoroutineAction action; + } CoroutineWin32; + +-static __thread CoroutineWin32 leader; +-static __thread Coroutine *current; ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader); ++QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); + + /* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it +@@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); + CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); + +- current = to_; ++ set_current(to_); + + to->action = action; + SwitchToFiber(to->fiber); +@@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_) + + Coroutine *qemu_coroutine_self(void) + { ++ Coroutine *current = get_current(); ++ + if (!current) { +- current = &leader.base; +- leader.fiber = ConvertThreadToFiber(NULL); ++ CoroutineWin32 *leader = get_ptr_leader(); ++ ++ current = &leader->base; ++ set_current(current); ++ leader->fiber = ConvertThreadToFiber(NULL); + } + return current; + } + + bool qemu_in_coroutine(void) + { ++ Coroutine *current = get_current(); ++ + return current && current->caller; + } +-- +2.31.1 + diff --git a/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch b/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch new file mode 100644 index 0000000..f12b8ec --- /dev/null +++ b/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch @@ -0,0 +1,49 @@ +From 49d9c9dced7278517105e9cfec34ea4af716432d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:12 +0100 +Subject: [PATCH 6/6] linux-aio: explain why max batch is checked in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [2/2] b3d6421086bde50d4baad2343b2df89c5f66950e (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2092788 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +It may not be obvious why laio_io_unplug() checks max batch. I discussed +this with Stefano and have added a comment summarizing the reason. + +Cc: Stefano Garzarella +Cc: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 99b969fbe105117f5af6060d3afef40ca39cc9c1) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 6078da7e42..9c2393a2f7 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -365,6 +365,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + assert(s->io_q.plugged); + s->io_q.plugged--; + ++ /* ++ * Why max batch checking is performed here: ++ * Another BDS may have queued requests with a higher dev_max_batch and ++ * therefore in_queue could now exceed our dev_max_batch. Re-check the max ++ * batch so we can honor our device's dev_max_batch. ++ */ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || + (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { +-- +2.31.1 + diff --git a/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch b/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch new file mode 100644 index 0000000..ed9b5ee --- /dev/null +++ b/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch @@ -0,0 +1,56 @@ +From e7326c3a7e0fc022aa5c0ae07bc1e19ad1b6f2ed Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:11 +0100 +Subject: [PATCH 5/6] linux-aio: fix unbalanced plugged counter in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [1/2] 8a71da371c72521f1d70b8767ee564575e0d522b (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2092788 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +Every laio_io_plug() call has a matching laio_io_unplug() call. There is +a plugged counter that tracks the number of levels of plugging and +allows for nesting. + +The plugged counter must reflect the balance between laio_io_plug() and +laio_io_unplug() calls accurately. Otherwise I/O stalls occur since +io_submit(2) calls are skipped while plugged. + +Reported-by: Nikolay Tenev +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-2-stefanha@redhat.com +Cc: Stefano Garzarella +Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()") +[Stefano Garzarella suggested adding a Fixes tag. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 4c423fcccf..6078da7e42 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + uint64_t dev_max_batch) + { + assert(s->io_q.plugged); ++ s->io_q.plugged--; ++ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || +- (--s->io_q.plugged == 0 && ++ (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { + ioq_submit(s); + } +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 26f0c0a..901b08b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 6%{?rcrel}%{?dist}%{?cc_suffix} +Release: 7%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -276,6 +276,18 @@ Patch60: kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch Patch61: kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch # For bz#2086262 - [Win11][tpm]vfio_listener_region_del received unaligned region Patch62: kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch +# For bz#1952483 - RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures +Patch63: kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch +# For bz#1952483 - RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures +Patch64: kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch +# For bz#1952483 - RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures +Patch65: kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch +# For bz#2094252 - Compile the virtio-iommu device on x86_64 +Patch66: kvm-Enable-virtio-iommu-pci-on-x86_64.patch +# For bz#2092788 - Stalled IO Operations in VM +Patch67: kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch +# For bz#2092788 - Stalled IO Operations in VM +Patch68: kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch # Source-git patches @@ -1311,6 +1323,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jun 27 2022 Miroslav Rezanina - 7.0.0-7 +- kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch [bz#1952483] +- kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch [bz#1952483] +- kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch [bz#1952483] +- kvm-Enable-virtio-iommu-pci-on-x86_64.patch [bz#2094252] +- kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch [bz#2092788] +- kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch [bz#2092788] +- Resolves: bz#1952483 + (RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures) +- Resolves: bz#2094252 + (Compile the virtio-iommu device on x86_64) +- Resolves: bz#2092788 + (Stalled IO Operations in VM) + * Mon Jun 13 2022 Miroslav Rezanina - 7.0.0-6 - kvm-Introduce-event-loop-base-abstract-class.patch [bz#2031024] - kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch [bz#2031024] From da38d5c28e20aa584793bf57606c8231adc96d60 Mon Sep 17 00:00:00 2001 From: Camilla Conte Date: Tue, 5 Jul 2022 10:49:23 +0000 Subject: [PATCH 163/195] * Tue Jul 05 2022 Camilla Conte - 7.0.0-8 - kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch [bz#2060839] - kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch [bz#2060839] - kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch [bz#2060839] - kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch [bz#2060839] - kvm-target-s390x-deprecate-CPUs-older-than-z14.patch [bz#2060839] - kvm-target-arm-deprecate-named-CPU-models.patch [bz#2060839] - kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch [bz#1968509] - kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch [bz#1968509] - kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch [bz#1968509] - kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch [bz#1968509] - kvm-migration-Add-migrate_use_tls-helper.patch [bz#1968509] - kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch [bz#1968509] - kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch [bz#1968509] - kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch [bz#1968509] - kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch [bz#1968509] - kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch [bz#1968509] - kvm-migration-Change-zero_copy_send-from-migration-param.patch [bz#1968509] - kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch [bz#2096143] - Resolves: bz#2060839 (Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9) - Resolves: bz#1968509 (Use MSG_ZEROCOPY on QEMU Live Migration) - Resolves: bz#2096143 (The migration port is not released if use it again for recovering postcopy migration) --- ...lags-on-io_writev-and-introduce-io_f.patch | 420 ++++++++++++++++++ ...-Fix-zero-copy-send-so-socket-flush-.patch | 58 +++ ...-Implement-io_writev-zero-copy-flag-.patch | 249 +++++++++++ ...-Introduce-assert-and-reduce-ifdefs-.patch | 82 ++++ ...y-AArch64-Drop-unsupported-CPU-types.patch | 237 ++++++++++ ...avocado-Switch-aarch64-tests-from-a5.patch | 95 ++++ ...docker-test-build-alpine-when-includ.patch | 87 ++++ ...migration-Add-migrate_use_tls-helper.patch | 106 +++++ ...ro-copy-send-parameter-for-QMP-HMP-f.patch | 250 +++++++++++ ...migrate-recover-to-run-multiple-time.patch | 98 ++++ ...-zero_copy_send-from-migration-param.patch | 289 ++++++++++++ ...t-zero-copy-write-in-multifd-migrati.patch | 182 ++++++++ ...der-packet-without-flags-if-zero-cop.patch | 102 +++++ ...send_sync_main-now-returns-negative-.patch | 163 +++++++ ...arget-arm-deprecate-named-CPU-models.patch | 129 ++++++ ...recate-CPUs-older-than-x86_64-v2-ABI.patch | 273 ++++++++++++ ...-s390x-deprecate-CPUs-older-than-z14.patch | 194 ++++++++ ...date-aarch64_virt-test-to-exercise-c.patch | 157 +++++++ qemu-kvm.spec | 64 ++- 19 files changed, 3234 insertions(+), 1 deletion(-) create mode 100644 kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch create mode 100644 kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch create mode 100644 kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch create mode 100644 kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch create mode 100644 kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch create mode 100644 kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch create mode 100644 kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch create mode 100644 kvm-migration-Add-migrate_use_tls-helper.patch create mode 100644 kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch create mode 100644 kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch create mode 100644 kvm-migration-Change-zero_copy_send-from-migration-param.patch create mode 100644 kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch create mode 100644 kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch create mode 100644 kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch create mode 100644 kvm-target-arm-deprecate-named-CPU-models.patch create mode 100644 kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch create mode 100644 kvm-target-s390x-deprecate-CPUs-older-than-z14.patch create mode 100644 kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch diff --git a/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch b/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch new file mode 100644 index 0000000..c7b8898 --- /dev/null +++ b/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch @@ -0,0 +1,420 @@ +From cda3fcf14f2883fea633e25256f6c14a71271adf Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:31 -0300 +Subject: [PATCH 08/18] QIOChannel: Add flags on io_writev and introduce + io_flush callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [2/11] 06acfb6b0cb2c25733c2eb198011f7623b5a7024 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Add flags to io_writev and introduce io_flush as optional callback to +QIOChannelClass, allowing the implementation of zero copy writes by +subclasses. + +How to use them: +- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY), +- Wait write completion with qio_channel_flush(). + +Notes: +As some zero copy write implementations work asynchronously, it's +recommended to keep the write buffer untouched until the return of +qio_channel_flush(), to avoid the risk of sending an updated buffer +instead of the buffer state during write. + +As io_flush callback is optional, if a subclass does not implement it, then: +- io_flush will return 0 without changing anything. + +Also, some functions like qio_channel_writev_full_all() were adapted to +receive a flag parameter. That allows shared code between zero copy and +non-zero copy writev, and also an easier implementation on new flags. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-3-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad) +Signed-off-by: Leonardo Bras +--- + chardev/char-io.c | 2 +- + hw/remote/mpqemu-link.c | 2 +- + include/io/channel.h | 38 +++++++++++++++++++++- + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-socket.c | 2 ++ + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 49 +++++++++++++++++++++++------ + migration/rdma.c | 1 + + scsi/pr-manager-helper.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + 13 files changed, 88 insertions(+), 14 deletions(-) + +diff --git a/chardev/char-io.c b/chardev/char-io.c +index 8ced184160..4451128cba 100644 +--- a/chardev/char-io.c ++++ b/chardev/char-io.c +@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc, + + ret = qio_channel_writev_full( + ioc, &iov, 1, +- fds, nfds, NULL); ++ fds, nfds, 0, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + if (offset) { + return offset; +diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c +index 7e841820e5..e8f556bd27 100644 +--- a/hw/remote/mpqemu-link.c ++++ b/hw/remote/mpqemu-link.c +@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) + } + + if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), +- fds, nfds, errp)) { ++ fds, nfds, 0, errp)) { + ret = true; + } else { + trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); +diff --git a/include/io/channel.h b/include/io/channel.h +index 88988979f8..c680ee7480 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_ERR_BLOCK -2 + ++#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_FD_PASS, + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, + }; + + +@@ -104,6 +107,7 @@ struct QIOChannelClass { + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + ssize_t (*io_readv)(QIOChannel *ioc, + const struct iovec *iov, +@@ -136,6 +140,8 @@ struct QIOChannelClass { + IOHandler *io_read, + IOHandler *io_write, + void *opaque); ++ int (*io_flush)(QIOChannel *ioc, ++ Error **errp); + }; + + /* General I/O handling functions */ +@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Write data to the IO channel, reading it from the +@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + + /** +@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * +@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * to be written, yielding from the current coroutine + * if required. + * ++ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags, ++ * instead of waiting for all requested data to be written, ++ * this function will wait until it's all queued for writing. ++ * In this case, if the buffer gets changed between queueing and ++ * sending, the updated buffer will be sent. If this is not a ++ * desired behavior, it's suggested to call qio_channel_flush() ++ * before reusing the buffer. ++ * + * Returns: 0 if all bytes were written, or -1 on error + */ + +@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp); ++ int flags, Error **errp); ++ ++/** ++ * qio_channel_flush: ++ * @ioc: the channel object ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Will block until every packet queued with ++ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY ++ * is sent, or return in case of any error. ++ * ++ * If not implemented, acts as a no-op, and returns 0. ++ * ++ * Returns -1 if any error is found, ++ * 1 if every send failed to use zero copy. ++ * 0 otherwise. ++ */ ++ ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp); + + #endif /* QIO_CHANNEL_H */ +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index baa4e2b089..bf52011be2 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index 338da73ade..54560464ae 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index d7cf6d278f..ef6807a6be 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 7a8d9f69c9..a1be2197ca 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 2ae1b92fc0..4ce890a538 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index 55145a6a8c..9619906ac3 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index e8b019dc36..0640941ac5 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + +- if ((fds || nfds) && +- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ if (fds || nfds) { ++ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support file descriptor passing"); ++ return -1; ++ } ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ error_setg_errno(errp, EINVAL, ++ "Zero Copy does not support file descriptor passing"); ++ return -1; ++ } ++ } ++ ++ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + error_setg_errno(errp, EINVAL, +- "Channel does not support file descriptor passing"); ++ "Requested Zero Copy feature is not available"); + return -1; + } + +- return klass->io_writev(ioc, iov, niov, fds, nfds, errp); ++ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp); + } + + int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp) ++ int flags, Error **errp) + { + int ret = -1; + struct iovec *local_iov = g_new(struct iovec, niov); +@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + + while (nlocal_iov > 0) { + ssize_t len; +- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds, +- errp); ++ ++ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, ++ nfds, flags, errp); ++ + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_OUT); +@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp); + } + + +@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; +- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp); + } + + +@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc, + return klass->io_seek(ioc, offset, whence, errp); + } + ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); ++ ++ if (!klass->io_flush || ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ return 0; ++ } ++ ++ return klass->io_flush(ioc, errp); ++} ++ + + static void qio_channel_restart_read(void *opaque) + { +diff --git a/migration/rdma.c b/migration/rdma.c +index ef1e65ec36..672d1958a9 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c +index 451c7631b7..3be52a98d5 100644 +--- a/scsi/pr-manager-helper.c ++++ b/scsi/pr-manager-helper.c +@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr, + iov.iov_base = (void *)buf; + iov.iov_len = sz; + n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1, +- nfds ? &fd : NULL, nfds, errp); ++ nfds ? &fd : NULL, nfds, 0, errp); + + if (n_written <= 0) { + assert(n_written != QIO_CHANNEL_ERR_BLOCK); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index c49eec1f03..6713886d02 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iosend), + fdsend, + G_N_ELEMENTS(fdsend), ++ 0, + &error_abort); + + qio_channel_readv_full(dst, +-- +2.35.3 + diff --git a/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch b/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch new file mode 100644 index 0000000..9d134e6 --- /dev/null +++ b/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch @@ -0,0 +1,58 @@ +From e70f01749addd7d0b7aa7fa4fdedb664f98e6b9b Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:43 -0300 +Subject: [PATCH 16/18] QIOChannelSocket: Fix zero-copy send so socket flush + works +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [10/11] a2dfac987e24026b1a78e90b86234ca206b6401f (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial +part of the flushing mechanism got missing: incrementing zero_copy_queued. + +Without that, the flushing interface becomes a no-op, and there is no +guarantee the buffer is really sent. + +This can go as bad as causing a corruption in RAM during migration. + +Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Reported-by: 徐闯 +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 7490e5943d..8ae8b212cf 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + "Unable to write to socket"); + return -1; + } ++ ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sioc->zero_copy_queued++; ++ } ++ + return ret; + } + #else /* WIN32 */ +-- +2.35.3 + diff --git a/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch b/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch new file mode 100644 index 0000000..89aa806 --- /dev/null +++ b/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch @@ -0,0 +1,249 @@ +From 4aeba0365d30dabe2e70dc172683f0878a4a9621 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:32 -0300 +Subject: [PATCH 09/18] QIOChannelSocket: Implement io_writev zero copy flag & + io_flush for CONFIG_LINUX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [3/11] 9afeac1f5ac7675624660a0281726c09c8321180 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +For CONFIG_LINUX, implement the new zero copy flag and the optional callback +io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY +feature is available in the host kernel, which is checked on +qio_channel_socket_connect_sync() + +qio_channel_socket_flush() was implemented by counting how many times +sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the +socket's error queue, in order to find how many of them finished sending. +Flush will loop until those counters are the same, or until some error occurs. + +Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY: +1: Buffer +- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying, +some caution is necessary to avoid overwriting any buffer before it's sent. +If something like this happen, a newer version of the buffer may be sent instead. +- If this is a problem, it's recommended to call qio_channel_flush() before freeing +or re-using the buffer. + +2: Locked memory +- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and +unlocked after it's sent. +- Depending on the size of each buffer, and how often it's sent, it may require +a larger amount of locked memory than usually available to non-root user. +- If the required amount of locked memory is not available, writev_zero_copy +will return an error, which can abort an operation like migration, +- Because of this, when an user code wants to add zero copy as a feature, it +requires a mechanism to disable it, so it can still be accessible to less +privileged users. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-4-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d) +Signed-off-by: Leonardo Bras +--- + include/io/channel-socket.h | 2 + + io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++-- + 2 files changed, 114 insertions(+), 4 deletions(-) + +diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h +index e747e63514..513c428fe4 100644 +--- a/include/io/channel-socket.h ++++ b/include/io/channel-socket.h +@@ -47,6 +47,8 @@ struct QIOChannelSocket { + socklen_t localAddrLen; + struct sockaddr_storage remoteAddr; + socklen_t remoteAddrLen; ++ ssize_t zero_copy_queued; ++ ssize_t zero_copy_sent; + }; + + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index a1be2197ca..fbd2214d20 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -26,6 +26,14 @@ + #include "io/channel-watch.h" + #include "trace.h" + #include "qapi/clone-visitor.h" ++#ifdef CONFIG_LINUX ++#include ++#include ++ ++#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY)) ++#define QEMU_MSG_ZEROCOPY ++#endif ++#endif + + #define SOCKET_MAX_FDS 16 + +@@ -55,6 +63,8 @@ qio_channel_socket_new(void) + + sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); + sioc->fd = -1; ++ sioc->zero_copy_queued = 0; ++ sioc->zero_copy_sent = 0; + + ioc = QIO_CHANNEL(sioc); + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); +@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + return -1; + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ int ret, v = 1; ++ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v)); ++ if (ret == 0) { ++ /* Zero copy available on host */ ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY); ++ } ++#endif ++ + return 0; + } + +@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; + size_t fdsize = sizeof(int) * nfds; + struct cmsghdr *cmsg; ++ int sflags = 0; + + memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); + +@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sflags = MSG_ZEROCOPY; ++ } ++#endif ++ + retry: +- ret = sendmsg(sioc->fd, &msg, 0); ++ ret = sendmsg(sioc->fd, &msg, sflags); + if (ret <= 0) { +- if (errno == EAGAIN) { ++ switch (errno) { ++ case EAGAIN: + return QIO_CHANNEL_ERR_BLOCK; +- } +- if (errno == EINTR) { ++ case EINTR: + goto retry; ++#ifdef QEMU_MSG_ZEROCOPY ++ case ENOBUFS: ++ if (sflags & MSG_ZEROCOPY) { ++ error_setg_errno(errp, errno, ++ "Process can't lock enough memory for using MSG_ZEROCOPY"); ++ return -1; ++ } ++ break; ++#endif + } ++ + error_setg_errno(errp, errno, + "Unable to write to socket"); + return -1; +@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + } + #endif /* WIN32 */ + ++ ++#ifdef QEMU_MSG_ZEROCOPY ++static int qio_channel_socket_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); ++ struct msghdr msg = {}; ++ struct sock_extended_err *serr; ++ struct cmsghdr *cm; ++ char control[CMSG_SPACE(sizeof(*serr))]; ++ int received; ++ int ret = 1; ++ ++ msg.msg_control = control; ++ msg.msg_controllen = sizeof(control); ++ memset(control, 0, sizeof(control)); ++ ++ while (sioc->zero_copy_sent < sioc->zero_copy_queued) { ++ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); ++ if (received < 0) { ++ switch (errno) { ++ case EAGAIN: ++ /* Nothing on errqueue, wait until something is available */ ++ qio_channel_wait(ioc, G_IO_ERR); ++ continue; ++ case EINTR: ++ continue; ++ default: ++ error_setg_errno(errp, errno, ++ "Unable to read errqueue"); ++ return -1; ++ } ++ } ++ ++ cm = CMSG_FIRSTHDR(&msg); ++ if (cm->cmsg_level != SOL_IP && ++ cm->cmsg_type != IP_RECVERR) { ++ error_setg_errno(errp, EPROTOTYPE, ++ "Wrong cmsg in errqueue"); ++ return -1; ++ } ++ ++ serr = (void *) CMSG_DATA(cm); ++ if (serr->ee_errno != SO_EE_ORIGIN_NONE) { ++ error_setg_errno(errp, serr->ee_errno, ++ "Error on socket"); ++ return -1; ++ } ++ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { ++ error_setg_errno(errp, serr->ee_origin, ++ "Error not from zero copy"); ++ return -1; ++ } ++ ++ /* No errors, count successfully finished sendmsg()*/ ++ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1; ++ ++ /* If any sendmsg() succeeded using zero copy, return 0 at the end */ ++ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) { ++ ret = 0; ++ } ++ } ++ ++ return ret; ++} ++ ++#endif /* QEMU_MSG_ZEROCOPY */ ++ + static int + qio_channel_socket_set_blocking(QIOChannel *ioc, + bool enabled, +@@ -790,6 +895,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, + ioc_klass->io_set_delay = qio_channel_socket_set_delay; + ioc_klass->io_create_watch = qio_channel_socket_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; ++#ifdef QEMU_MSG_ZEROCOPY ++ ioc_klass->io_flush = qio_channel_socket_flush; ++#endif + } + + static const TypeInfo qio_channel_socket_info = { +-- +2.35.3 + diff --git a/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch b/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch new file mode 100644 index 0000000..6fc0c76 --- /dev/null +++ b/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch @@ -0,0 +1,82 @@ +From 60bf942a58db12c821f2a6a49e2e0b04b99bec30 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:42 -0300 +Subject: [PATCH 15/18] QIOChannelSocket: Introduce assert and reduce ifdefs to + improve readability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [9/11] eaa02d68301852ccc98bdacc7387d8d03be1cb05 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were +introduced, particularly at qio_channel_socket_writev(). + +Rewrite some of those changes so it's easier to read. + +Also, introduce an assert to help detect incorrect zero-copy usage is when +it's disabled on build. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached +(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index fbd2214d20..7490e5943d 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + +-#ifdef QEMU_MSG_ZEROCOPY + if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++#ifdef QEMU_MSG_ZEROCOPY + sflags = MSG_ZEROCOPY; +- } ++#else ++ /* ++ * We expect QIOChannel class entry point to have ++ * blocked this code path already ++ */ ++ g_assert_not_reached(); + #endif ++ } + + retry: + ret = sendmsg(sioc->fd, &msg, sflags); +@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + return QIO_CHANNEL_ERR_BLOCK; + case EINTR: + goto retry; +-#ifdef QEMU_MSG_ZEROCOPY + case ENOBUFS: +- if (sflags & MSG_ZEROCOPY) { ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { + error_setg_errno(errp, errno, + "Process can't lock enough memory for using MSG_ZEROCOPY"); + return -1; + } + break; +-#endif + } + + error_setg_errno(errp, errno, +-- +2.35.3 + diff --git a/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch b/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch new file mode 100644 index 0000000..8a0aeb0 --- /dev/null +++ b/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch @@ -0,0 +1,237 @@ +From 055edf068196622a3e1868c9e4c991d410272a6d Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 15 Jun 2022 15:28:27 +0200 +Subject: [PATCH 03/18] RHEL-only: AArch64: Drop unsupported CPU types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [3/6] 21f54c86dc87e5e75a64459b5a385686bc09640c (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 +Upstream Status: RHEL only + +We only need to support AArch64 cpu types and we only need three +types: + 1) A base type to use with TCG, i.e. a cpu type with only base + features. 'cortex-a57' serves this role and is currently used + by libguestfs. + 2) The 'max' type, which is for both KVM and TCG and is good for + tests that just specify 'max' but run under both. 'max' with + TCG also provides the VM with all the CPU features TCG + supports, which is good for VMs that need features not + provided by the basic cortex-a57. + 3) The host type which is used with KVM. + +Signed-off-by: Andrew Jones +--- + hw/arm/virt.c | 4 ++++ + target/arm/cpu64.c | 6 ++++++ + target/arm/cpu_tcg.c | 12 ++---------- + tests/qtest/arm-cpu-features.c | 6 ++++++ + 4 files changed, 18 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 95d012d6eb..74119976d3 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -239,12 +239,16 @@ static const int a15irqmap[] = { + }; + + static const char *valid_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), + ARM_CPU_TYPE_NAME("cortex-a53"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("cortex-a57"), ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a72"), + ARM_CPU_TYPE_NAME("a64fx"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), + }; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index eb44c05822..e80b831073 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -146,6 +146,7 @@ static void aarch64_a57_initfn(Object *obj) + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a53_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -249,6 +250,7 @@ static void aarch64_a72_initfn(Object *obj) + cpu->gic_vprebits = 5; + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); + } ++#endif /* disabled for RHEL */ + + void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) + { +@@ -923,6 +925,7 @@ static void aarch64_max_initfn(Object *obj) + qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a64fx_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -969,12 +972,15 @@ static void aarch64_a64fx_initfn(Object *obj) + + /* TODO: Add A64FX specific HPC extension registers */ + } ++#endif /* disabled for RHEL */ + + static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, + { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, ++#endif /* disabled for RHEL */ + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 3826fa5122..74727fc92c 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -19,10 +19,10 @@ + #include "hw/boards.h" + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -376,7 +376,6 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } +-#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -402,7 +401,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL + }; + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -448,7 +446,6 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } +-#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -492,7 +489,6 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -933,7 +929,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } +-#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1013,7 +1008,6 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1029,9 +1023,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, +-#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1062,7 +1054,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, +-#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +@@ -1090,3 +1081,4 @@ static void arm_tcg_cpu_register_types(void) + type_init(arm_tcg_cpu_register_types) + + #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ ++#endif /* disabled for RHEL */ +diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c +index f76652143a..fe2a0a070d 100644 +--- a/tests/qtest/arm-cpu-features.c ++++ b/tests/qtest/arm-cpu-features.c +@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); ++#endif /* disabled for RHEL */ + + /* Enabling and disabling pmu should always work. */ + assert_has_feature_enabled(qts, "max", "pmu"); +@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a57", "pmu"); + assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "a64fx", "pmu"); + assert_has_feature_enabled(qts, "a64fx", "aarch64"); + /* +@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) + "{ 'sve384': true }"); + assert_error(qts, "a64fx", "cannot enable sve640", + "{ 'sve640': true }"); ++#endif /* disabled for RHEL */ + + sve_tests_default(qts, "max"); + pauth_tests_default(qts, "max"); +@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + QDict *resp; + char *error; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_error(qts, "cortex-a15", + "We cannot guarantee the CPU type 'cortex-a15' works " + "with KVM on this host", NULL); ++#endif /* disabled for RHEL */ + + assert_has_feature_enabled(qts, "host", "aarch64"); + +-- +2.35.3 + diff --git a/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch b/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch new file mode 100644 index 0000000..a1cc4c7 --- /dev/null +++ b/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch @@ -0,0 +1,95 @@ +From d710394f68eb0b6116dd8ac76f619c192e0d5972 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 15 Jun 2022 15:28:27 +0200 +Subject: [PATCH 02/18] RHEL-only: tests/avocado: Switch aarch64 tests from a53 + to a57 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [2/6] e85ef69b42c411a6997e4da10ba05176368769b3 (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 +Upstream Status: RHEL only + +We plan to remove the cortex-a53 from the supported cpu types. Switch +all avocado tests that use it to the cortex-a57, which will work the +same and we intend to keep. We don't want to try and upstream this +change since the better upstream change would be to switch from the +a53 to 'max', but the upstream tests also need to use later guest +kernels to use 'max' (see qemu upstream commit 0942820408dc +("hw/arm/virt: Disable LPA2 for -machine virt-6.2") + +Signed-off-by: Andrew Jones +--- + tests/avocado/replay_kernel.py | 2 +- + tests/avocado/reverse_debugging.py | 2 +- + tests/avocado/tcg_plugins.py | 6 +++--- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py +index 0b2b0dc692..3a7b5f0748 100644 +--- a/tests/avocado/replay_kernel.py ++++ b/tests/avocado/replay_kernel.py +@@ -147,7 +147,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py +index d2921e70c3..66d185ed42 100644 +--- a/tests/avocado/reverse_debugging.py ++++ b/tests/avocado/reverse_debugging.py +@@ -198,7 +198,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py +index 642d2e49e3..93b3afd823 100644 +--- a/tests/avocado/tcg_plugins.py ++++ b/tests/avocado/tcg_plugins.py +@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +-- +2.35.3 + diff --git a/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch b/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch new file mode 100644 index 0000000..0da63bf --- /dev/null +++ b/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch @@ -0,0 +1,87 @@ +From 7c489b54b0bb33445113fbf16e88feb23be68013 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:30 -0300 +Subject: [PATCH 07/18] meson.build: Fix docker-test-build@alpine when + including linux/errqueue.h +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [1/11] f058eb846fcf611d527a1dd3b0cc399cdc17e3ee (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +A build error happens in alpine CI when linux/errqueue.h is included +in io/channel-socket.c, due to redefining of 'struct __kernel_timespec': + +=== +ninja: job failed: [...] +In file included from /usr/include/linux/errqueue.h:6, + from ../io/channel-socket.c:29: +/usr/include/linux/time_types.h:7:8: error: redefinition of 'struct __kernel_timespec' + 7 | struct __kernel_timespec { + | ^~~~~~~~~~~~~~~~~ +In file included from /usr/include/liburing.h:19, + from /builds/user/qemu/include/block/aio.h:18, + from /builds/user/qemu/include/io/channel.h:26, + from /builds/user/qemu/include/io/channel-socket.h:24, + from ../io/channel-socket.c:24: +/usr/include/liburing/compat.h:9:8: note: originally defined here + 9 | struct __kernel_timespec { + | ^~~~~~~~~~~~~~~~~ +ninja: subcommand failed +=== + +As above error message suggests, 'struct __kernel_timespec' was already +defined by liburing/compat.h. + +Fix alpine CI by adding test to disable liburing in configure step if a +redefinition happens between linux/errqueue.h and liburing/compat.h. + +[dgilbert: This has been fixed in Alpine issue 13813 and liburing] + +Signed-off-by: Leonardo Bras +Message-Id: <20220513062836.965425-2-leobras@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 354081d43de44ebd3497fe08f7f0121a5517d528) +Signed-off-by: Leonardo Bras +--- + meson.build | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/meson.build b/meson.build +index 5a7c10e639..13e3323380 100644 +--- a/meson.build ++++ b/meson.build +@@ -471,12 +471,23 @@ if not get_option('linux_aio').auto() or have_block + required: get_option('linux_aio'), + kwargs: static_kwargs) + endif ++ ++linux_io_uring_test = ''' ++ #include ++ #include ++ ++ int main(void) { return 0; }''' ++ + linux_io_uring = not_found + if not get_option('linux_io_uring').auto() or have_block + linux_io_uring = dependency('liburing', version: '>=0.3', + required: get_option('linux_io_uring'), + method: 'pkg-config', kwargs: static_kwargs) ++ if not cc.links(linux_io_uring_test) ++ linux_io_uring = not_found ++ endif + endif ++ + libnfs = not_found + if not get_option('libnfs').auto() or have_block + libnfs = dependency('libnfs', version: '>=1.9.3', +-- +2.35.3 + diff --git a/kvm-migration-Add-migrate_use_tls-helper.patch b/kvm-migration-Add-migrate_use_tls-helper.patch new file mode 100644 index 0000000..0fe0d91 --- /dev/null +++ b/kvm-migration-Add-migrate_use_tls-helper.patch @@ -0,0 +1,106 @@ +From 828f6c106eedcb7a48e551ffda15af56ff92a899 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:34 -0300 +Subject: [PATCH 11/18] migration: Add migrate_use_tls() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [5/11] 06e945297c3b9c0ce5864885aafcdba1e5746bc2 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +A lot of places check parameters.tls_creds in order to evaluate if TLS is +in use, and sometimes call migrate_get_current() just for that test. + +Add new helper function migrate_use_tls() in order to simplify testing +for TLS usage. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-6-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d2fafb6a6814a8998607d0baf691265032996a0f) +Signed-off-by: Leonardo Bras +--- + migration/channel.c | 3 +-- + migration/migration.c | 9 +++++++++ + migration/migration.h | 1 + + migration/multifd.c | 5 +---- + 4 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index c4fc000a1a..086b5c0d8b 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) + trace_migration_set_incoming_channel( + ioc, object_get_typename(OBJECT(ioc))); + +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + migration_tls_channel_process_incoming(s, ioc, &local_err); +diff --git a/migration/migration.c b/migration/migration.c +index 0a6b3b9f4d..d91efb66fe 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2582,6 +2582,15 @@ bool migrate_use_zero_copy_send(void) + } + #endif + ++int migrate_use_tls(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.tls_creds && *s->parameters.tls_creds; ++} ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 5bcb7628ef..c2cabb8a14 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -381,6 +381,7 @@ bool migrate_use_zero_copy_send(void); + #else + #define migrate_use_zero_copy_send() (false) + #endif ++int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 76b57a7177..43998ad117 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -784,14 +784,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error *error) + { +- MigrationState *s = migrate_get_current(); +- + trace_multifd_set_outgoing_channel( + ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); + + if (!error) { +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + multifd_tls_channel_connect(p, ioc, &error); +-- +2.35.3 + diff --git a/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch b/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch new file mode 100644 index 0000000..206ac3d --- /dev/null +++ b/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch @@ -0,0 +1,250 @@ +From d6500340dc3c1152b5efe04ef3daa50c17a55e30 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:33 -0300 +Subject: [PATCH 10/18] migration: Add zero-copy-send parameter for QMP/HMP for + Linux +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [4/11] 514d98d595992c53ff98de750035e080ded8972e (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Add property that allows zero-copy migration of memory pages +on the sending side, and also includes a helper function +migrate_use_zero_copy_send() to check if it's enabled. + +No code is introduced to actually do the migration, but it allow +future implementations to enable/disable this feature. + +On non-Linux builds this parameter is compiled-out. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Message-Id: <20220513062836.965425-5-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit abb6295b3ace5d17c3a65936913fc346616dbf14) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 32 ++++++++++++++++++++++++++++++++ + migration/migration.h | 5 +++++ + migration/socket.c | 11 +++++++++-- + monitor/hmp-cmds.c | 6 ++++++ + qapi/migration.json | 24 ++++++++++++++++++++++++ + 5 files changed, 76 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 695f0f2900..0a6b3b9f4d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -899,6 +899,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++ params->zero_copy_send = s->parameters.zero_copy_send; ++#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1555,6 +1559,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ dest->zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1667,6 +1676,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ s->parameters.zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2557,6 +2571,17 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.zero_copy_send; ++} ++#endif ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +@@ -4200,6 +4225,10 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_BOOL("zero_copy_send", MigrationState, ++ parameters.zero_copy_send, false), ++#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4297,6 +4326,9 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/migration/migration.h b/migration/migration.h +index 2de861df01..5bcb7628ef 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -376,6 +376,11 @@ MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void); ++#else ++#define migrate_use_zero_copy_send() (false) ++#endif + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/socket.c b/migration/socket.c +index 05705a32d8..3754d8f72c 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -74,9 +74,16 @@ static void socket_outgoing_migration(QIOTask *task, + + if (qio_task_propagate_error(task, &err)) { + trace_migration_socket_outgoing_error(error_get_pretty(err)); +- } else { +- trace_migration_socket_outgoing_connected(data->hostname); ++ goto out; + } ++ ++ trace_migration_socket_outgoing_connected(data->hostname); ++ ++ if (migrate_use_zero_copy_send()) { ++ error_setg(&err, "Zero copy send not available in migration"); ++ } ++ ++out: + migration_channel_connect(data->s, sioc, data->hostname, err); + object_unref(OBJECT(sioc)); + } +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 634968498b..55b48d3733 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1309,6 +1309,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; ++#ifdef CONFIG_LINUX ++ case MIGRATION_PARAMETER_ZERO_COPY_SEND: ++ p->has_zero_copy_send = true; ++ visit_type_bool(v, param, &p->zero_copy_send, &err); ++ break; ++#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 27d7b28158..4d833ecdd6 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -741,6 +741,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -780,6 +787,7 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', ++ { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -906,6 +914,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -960,6 +975,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1106,6 +1122,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1158,6 +1181,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch b/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch new file mode 100644 index 0000000..29dc0ea --- /dev/null +++ b/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch @@ -0,0 +1,98 @@ +From fd6f516a94e635bc42e58448f314db575814a834 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 31 Mar 2022 11:08:45 -0400 +Subject: [PATCH 18/18] migration: Allow migrate-recover to run multiple times +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 104: migration: Allow migrate-recover to run multiple times +RH-Commit: [1/1] afd726e54c069ae800e2d01f34e768d6bac7dcb9 (peterx/qemu-kvm) +RH-Bugzilla: 2096143 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Hanna Reitz +RH-Acked-by: Dr. David Alan Gilbert + +Previously migration didn't have an easy way to cleanup the listening +transport, migrate recovery only allows to execute once. That's done with a +trick flag in postcopy_recover_triggered. + +Now the facility is already there. + +Drop postcopy_recover_triggered and instead allows a new migrate-recover to +release the previous listener transport. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Peter Xu +Message-Id: <20220331150857.74406-8-peterx@redhat.com> +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 08401c0426bc1a5ce4609afd1cda5dd39abbf9fa) +Signed-off-by: Peter Xu +--- + migration/migration.c | 13 ++----------- + migration/migration.h | 1 - + migration/savevm.c | 3 --- + 3 files changed, 2 insertions(+), 15 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 2a141bfaf3..8fb3eae910 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2166,11 +2166,8 @@ void qmp_migrate_recover(const char *uri, Error **errp) + return; + } + +- if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, +- false, true) == true) { +- error_setg(errp, "Migrate recovery is triggered already"); +- return; +- } ++ /* If there's an existing transport, release it */ ++ migration_incoming_transport_cleanup(mis); + + /* + * Note that this call will never start a real migration; it will +@@ -2178,12 +2175,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) + * to continue using that newly established channel. + */ + qemu_start_incoming_migration(uri, errp); +- +- /* Safe to dereference with the assert above */ +- if (*errp) { +- /* Reset the flag so user could still retry */ +- qatomic_set(&mis->postcopy_recover_triggered, false); +- } + } + + void qmp_migrate_pause(Error **errp) +diff --git a/migration/migration.h b/migration/migration.h +index c2cabb8a14..fbc8690ec8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -139,7 +139,6 @@ struct MigrationIncomingState { + struct PostcopyBlocktimeContext *blocktime_ctx; + + /* notify PAUSED postcopy incoming migrations to try to continue */ +- bool postcopy_recover_triggered; + QemuSemaphore postcopy_pause_sem_dst; + QemuSemaphore postcopy_pause_sem_fault; + +diff --git a/migration/savevm.c b/migration/savevm.c +index 02ed94c180..d9076897b8 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2589,9 +2589,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) + + assert(migrate_postcopy_ram()); + +- /* Clear the triggered bit to allow one recovery */ +- mis->postcopy_recover_triggered = false; +- + /* + * Unregister yank with either from/to src would work, since ioc behind it + * is the same +-- +2.35.3 + diff --git a/kvm-migration-Change-zero_copy_send-from-migration-param.patch b/kvm-migration-Change-zero_copy_send-from-migration-param.patch new file mode 100644 index 0000000..abeeeb6 --- /dev/null +++ b/kvm-migration-Change-zero_copy_send-from-migration-param.patch @@ -0,0 +1,289 @@ +From 7e2a037f3f349c21201152cecce32d8c8ff0bea0 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:45 -0300 +Subject: [PATCH 17/18] migration: Change zero_copy_send from migration + parameter to migration capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [11/11] e4a955607947896a49398ac8400241a0adac51a1 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +When originally implemented, zero_copy_send was designed as a Migration +paramenter. + +But taking into account how is that supposed to work, and how +the difference between a capability and a parameter, it only makes sense +that zero-copy-send would work better as a capability. + +Taking into account how recently the change got merged, it was decided +that it's still time to make it right, and convert zero_copy_send into +a Migration capability. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Acked-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: always define the capability, even on non-Linux but error if +set; avoids build problems with the capability +(cherry picked from commit 1abaec9a1b2c23f7aa94709a422128d9e42c3e0b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 58 +++++++++++++++++++------------------------ + monitor/hmp-cmds.c | 6 ----- + qapi/migration.json | 33 +++++++----------------- + 3 files changed, 34 insertions(+), 63 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 102236fba0..2a141bfaf3 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -163,7 +163,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_COMPRESS, + MIGRATION_CAPABILITY_XBZRLE, + MIGRATION_CAPABILITY_X_COLO, +- MIGRATION_CAPABILITY_VALIDATE_UUID); ++ MIGRATION_CAPABILITY_VALIDATE_UUID, ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND); + + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add +@@ -899,10 +900,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +- params->zero_copy_send = s->parameters.zero_copy_send; +-#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1263,6 +1260,24 @@ static bool migrate_caps_check(bool *cap_list, + } + } + ++#ifdef CONFIG_LINUX ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || ++ migrate_use_compression() || ++ migrate_use_tls())) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#else ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ error_setg(errp, ++ "Zero copy currently only available on Linux"); ++ return false; ++ } ++#endif ++ ++ + /* incoming side only */ + if (runstate_check(RUN_STATE_INMIGRATE) && + !migrate_multifd_is_allowed() && +@@ -1485,16 +1500,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +-#ifdef CONFIG_LINUX +- if (params->zero_copy_send && +- (!migrate_use_multifd() || +- params->multifd_compression != MULTIFD_COMPRESSION_NONE || +- (params->tls_creds && *params->tls_creds))) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#endif + return true; + } + +@@ -1568,11 +1573,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- dest->zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1685,11 +1685,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- s->parameters.zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2587,7 +2582,7 @@ bool migrate_use_zero_copy_send(void) + + s = migrate_get_current(); + +- return s->parameters.zero_copy_send; ++ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } + #endif + +@@ -4243,10 +4238,6 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +-#ifdef CONFIG_LINUX +- DEFINE_PROP_BOOL("zero_copy_send", MigrationState, +- parameters.zero_copy_send, false), +-#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4284,6 +4275,10 @@ static Property migration_properties[] = { + DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), + DEFINE_PROP_MIG_CAP("x-background-snapshot", + MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_MIG_CAP("x-zero-copy-send", ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND), ++#endif + + DEFINE_PROP_END_OF_LIST(), + }; +@@ -4344,9 +4339,6 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +-#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 55b48d3733..634968498b 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1309,12 +1309,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; +-#ifdef CONFIG_LINUX +- case MIGRATION_PARAMETER_ZERO_COPY_SEND: +- p->has_zero_copy_send = true; +- visit_type_bool(v, param, &p->zero_copy_send, &err); +- break; +-#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 4d833ecdd6..5105790cd0 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -463,6 +463,13 @@ + # procedure starts. The VM RAM is saved with running VM. + # (since 6.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# (since 7.1) ++# + # Features: + # @unstable: Members @x-colo and @x-ignore-shared are experimental. + # +@@ -476,7 +483,8 @@ + 'block', 'return-path', 'pause-before-switchover', 'multifd', + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', + { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, +- 'validate-uuid', 'background-snapshot'] } ++ 'validate-uuid', 'background-snapshot', ++ 'zero-copy-send'] } + + ## + # @MigrationCapabilityStatus: +@@ -741,12 +749,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) + # + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such +@@ -787,7 +789,6 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', +- { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -914,13 +915,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -975,7 +969,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1122,13 +1115,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1181,7 +1167,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch b/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch new file mode 100644 index 0000000..c7159e1 --- /dev/null +++ b/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch @@ -0,0 +1,182 @@ +From c1a2866d158ac67179fa0d17f1710302eb9a3866 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:37 -0300 +Subject: [PATCH 14/18] multifd: Implement zero copy write in multifd migration + (multifd-zero-copy) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [8/11] b93009cc94b2cc4b464b4f68ebfb37b870dd6f7d (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Implement zero copy send on nocomp_send_write(), by making use of QIOChannel +writev + flags & flush interface. + +Change multifd_send_sync_main() so flush_zero_copy() can be called +after each iteration in order to make sure all dirty pages are sent before +a new iteration is started. It will also flush at the beginning and at the +end of migration. + +Also make it return -1 if flush_zero_copy() fails, in order to cancel +the migration process, and avoid resuming the guest in the target host +without receiving all current RAM. + +This will work fine on RAM migration because the RAM pages are not usually freed, +and there is no problem on changing the pages content between writev_zero_copy() and +the actual sending of the buffer, because this change will dirty the page and +cause it to be re-sent on a next iteration anyway. + +A lot of locked memory may be needed in order to use multifd migration +with zero-copy enabled, so disabling the feature should be necessary for +low-privileged users trying to perform multifd migrations. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-9-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 11 ++++++++++- + migration/multifd.c | 37 +++++++++++++++++++++++++++++++++++-- + migration/multifd.h | 2 ++ + migration/socket.c | 5 +++-- + 4 files changed, 50 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index d91efb66fe..102236fba0 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1485,7 +1485,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +- ++#ifdef CONFIG_LINUX ++ if (params->zero_copy_send && ++ (!migrate_use_multifd() || ++ params->multifd_compression != MULTIFD_COMPRESSION_NONE || ++ (params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif + return true; + } + +diff --git a/migration/multifd.c b/migration/multifd.c +index 8fca6c970e..0b5b41c53f 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -571,6 +571,7 @@ void multifd_save_cleanup(void) + int multifd_send_sync_main(QEMUFile *f) + { + int i; ++ bool flush_zero_copy; + + if (!migrate_use_multifd()) { + return 0; +@@ -581,6 +582,20 @@ int multifd_send_sync_main(QEMUFile *f) + return -1; + } + } ++ ++ /* ++ * When using zero-copy, it's necessary to flush the pages before any of ++ * the pages can be sent again, so we'll make sure the new version of the ++ * pages will always arrive _later_ than the old pages. ++ * ++ * Currently we achieve this by flushing the zero-page requested writes ++ * per ram iteration, but in the future we could potentially optimize it ++ * to be less frequent, e.g. only after we finished one whole scanning of ++ * all the dirty bitmaps. ++ */ ++ ++ flush_zero_copy = migrate_use_zero_copy_send(); ++ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -602,6 +617,17 @@ int multifd_send_sync_main(QEMUFile *f) + ram_counters.transferred += p->packet_len; + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); ++ ++ if (flush_zero_copy && p->c) { ++ int ret; ++ Error *err = NULL; ++ ++ ret = qio_channel_flush(p->c, &err); ++ if (ret < 0) { ++ error_report_err(err); ++ return -1; ++ } ++ } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -686,8 +712,8 @@ static void *multifd_send_thread(void *opaque) + p->iov[0].iov_base = p->packet; + } + +- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, +- &local_err); ++ ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, ++ 0, p->write_flags, &local_err); + if (ret != 0) { + break; + } +@@ -928,6 +954,13 @@ int multifd_save_setup(Error **errp) + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); + p->normal = g_new0(ram_addr_t, page_count); ++ ++ if (migrate_use_zero_copy_send()) { ++ p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; ++ } else { ++ p->write_flags = 0; ++ } ++ + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index cd495195ce..7ec688fb4f 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -96,6 +96,8 @@ typedef struct { + uint32_t packet_len; + /* pointer to the packet */ + MultiFDPacket_t *packet; ++ /* multifd flags for sending ram */ ++ int write_flags; + /* multifd flags for each packet */ + uint32_t flags; + /* size of the next packet that contains pages */ +diff --git a/migration/socket.c b/migration/socket.c +index 3754d8f72c..4fd5e85f50 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task, + + trace_migration_socket_outgoing_connected(data->hostname); + +- if (migrate_use_zero_copy_send()) { +- error_setg(&err, "Zero copy send not available in migration"); ++ if (migrate_use_zero_copy_send() && ++ !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ error_setg(&err, "Zero copy send feature not detected in host kernel"); + } + + out: +-- +2.35.3 + diff --git a/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch b/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch new file mode 100644 index 0000000..415e3a9 --- /dev/null +++ b/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch @@ -0,0 +1,102 @@ +From 63255c13492f42a3236d96e706e5f8e70bb4e219 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:36 -0300 +Subject: [PATCH 13/18] multifd: Send header packet without flags if + zero-copy-send is enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [7/11] 137eea685e387d3d6aff187ec3fcac05bc16b6e3 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Since d48c3a0445 ("multifd: Use a single writev on the send side"), +sending the header packet and the memory pages happens in the same +writev, which can potentially make the migration faster. + +Using channel-socket as example, this works well with the default copying +mechanism of sendmsg(), but with zero-copy-send=true, it will cause +the migration to often break. + +This happens because the header packet buffer gets reused quite often, +and there is a high chance that by the time the MSG_ZEROCOPY mechanism get +to send the buffer, it has already changed, sending the wrong data and +causing the migration to abort. + +It means that, as it is, the buffer for the header packet is not suitable +for sending with MSG_ZEROCOPY. + +In order to enable zero copy for multifd, send the header packet on an +individual write(), without any flags, and the remanining pages with a +writev(), as it was happening before. This only changes how a migration +with zero-copy-send=true works, not changing any current behavior for +migrations with zero-copy-send=false. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-8-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b7dbdd8e76cd03453c234dbb9578d20969859d74) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 22 +++++++++++++++++++--- + 1 file changed, 19 insertions(+), 3 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cdb57439a7..8fca6c970e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -619,6 +619,7 @@ static void *multifd_send_thread(void *opaque) + MultiFDSendParams *p = opaque; + Error *local_err = NULL; + int ret = 0; ++ bool use_zero_copy_send = migrate_use_zero_copy_send(); + + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); +@@ -641,9 +642,14 @@ static void *multifd_send_thread(void *opaque) + if (p->pending_job) { + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; +- p->iovs_num = 1; + p->normal_num = 0; + ++ if (use_zero_copy_send) { ++ p->iovs_num = 0; ++ } else { ++ p->iovs_num = 1; ++ } ++ + for (int i = 0; i < p->pages->num; i++) { + p->normal[p->normal_num] = p->pages->offset[i]; + p->normal_num++; +@@ -667,8 +673,18 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send(p->id, packet_num, p->normal_num, flags, + p->next_packet_size); + +- p->iov[0].iov_len = p->packet_len; +- p->iov[0].iov_base = p->packet; ++ if (use_zero_copy_send) { ++ /* Send header first, without zerocopy */ ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret != 0) { ++ break; ++ } ++ } else { ++ /* Send header using the same writev call */ ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ } + + ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, + &local_err); +-- +2.35.3 + diff --git a/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch b/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch new file mode 100644 index 0000000..e6d726a --- /dev/null +++ b/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch @@ -0,0 +1,163 @@ +From 4ca5375a936bc87829c6e2b4620f56c73a5efc70 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:35 -0300 +Subject: [PATCH 12/18] multifd: multifd_send_sync_main now returns negative on + error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [6/11] c8ebdee4327d463c74f4b2eeb42d3c964f314c94 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Even though multifd_send_sync_main() currently emits error_reports, it's +callers don't really check it before continuing. + +Change multifd_send_sync_main() to return -1 on error and 0 on success. +Also change all it's callers to make use of this change and possibly fail +earlier. + +(This change is important to next patch on multifd zero copy +implementation, to make it sure an error in zero-copy flush does not go +unnoticed. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Message-Id: <20220513062836.965425-7-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 33d70973a3a6e8c6b62bcbc64d9e488961981007) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 10 ++++++---- + migration/multifd.h | 2 +- + migration/ram.c | 29 ++++++++++++++++++++++------- + 3 files changed, 29 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 43998ad117..cdb57439a7 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -568,17 +568,17 @@ void multifd_save_cleanup(void) + multifd_send_state = NULL; + } + +-void multifd_send_sync_main(QEMUFile *f) ++int multifd_send_sync_main(QEMUFile *f) + { + int i; + + if (!migrate_use_multifd()) { +- return; ++ return 0; + } + if (multifd_send_state->pages->num) { + if (multifd_send_pages(f) < 0) { + error_report("%s: multifd_send_pages fail", __func__); +- return; ++ return -1; + } + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -591,7 +591,7 @@ void multifd_send_sync_main(QEMUFile *f) + if (p->quit) { + error_report("%s: channel %d has already quit", __func__, i); + qemu_mutex_unlock(&p->mutex); +- return; ++ return -1; + } + + p->packet_num = multifd_send_state->packet_num++; +@@ -610,6 +610,8 @@ void multifd_send_sync_main(QEMUFile *f) + qemu_sem_wait(&p->sem_sync); + } + trace_multifd_send_sync_main(multifd_send_state->packet_num); ++ ++ return 0; + } + + static void *multifd_send_thread(void *opaque) +diff --git a/migration/multifd.h b/migration/multifd.h +index 4dda900a0b..cd495195ce 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); + bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); +-void multifd_send_sync_main(QEMUFile *f); ++int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); + + /* Multifd Compression flags */ +diff --git a/migration/ram.c b/migration/ram.c +index 0ef4bd63eb..fb6db54642 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2903,6 +2903,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + { + RAMState **rsp = opaque; + RAMBlock *block; ++ int ret; + + if (compress_threads_save_setup()) { + return -1; +@@ -2937,7 +2938,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + ram_control_before_iterate(f, RAM_CONTROL_SETUP); + ram_control_after_iterate(f, RAM_CONTROL_SETUP); + +- multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -3046,7 +3051,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + out: + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { +- multifd_send_sync_main(rs->f); ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + ram_transferred_add(8); +@@ -3106,13 +3115,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } + +- if (ret >= 0) { +- multifd_send_sync_main(rs->f); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); ++ if (ret < 0) { ++ return ret; + } + +- return ret; ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ ++ return 0; + } + + static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, +-- +2.35.3 + diff --git a/kvm-target-arm-deprecate-named-CPU-models.patch b/kvm-target-arm-deprecate-named-CPU-models.patch new file mode 100644 index 0000000..dbe8d24 --- /dev/null +++ b/kvm-target-arm-deprecate-named-CPU-models.patch @@ -0,0 +1,129 @@ +From 1f8528b71d96c01dd6106f11681f4a4e2776ef5f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 21 Mar 2022 12:05:42 +0000 +Subject: [PATCH 06/18] target/arm: deprecate named CPU models +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [6/6] afddeb9e898206fd04499f01c48caf7dc1a8b8ef (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +KVM requires use of the 'host' CPU model, so named CPU models are only +needed for TCG. Since we don't consider TCG to be supported we can +deprecate all the named CPU models. TCG users can rely on 'max' model. + +Note: this has the effect of deprecating the default built-in CPU +model 'cortex-a57'. Applications using QEMU are expected to make an +explicit choice about which CPU model they want, since no builtin +default can suit all purposes. + +https://bugzilla.redhat.com/show_bug.cgi?id=2060839 +Signed-off-by: Daniel P. Berrangé +--- + target/arm/cpu-qom.h | 1 + + target/arm/cpu.c | 5 +++++ + target/arm/cpu.h | 2 ++ + target/arm/cpu64.c | 8 +++++++- + target/arm/helper.c | 2 ++ + 5 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h +index 64c44cef2d..82e97249bc 100644 +--- a/target/arm/cpu-qom.h ++++ b/target/arm/cpu-qom.h +@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); ++ const char *deprecation_note; + } ARMCPUInfo; + + void arm_cpu_register(const ARMCPUInfo *info); +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 5d4ca7a227..c74b0fb462 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2105,8 +2105,13 @@ static void arm_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void arm_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 23879de5fa..c0c9f680e5 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -33,6 +33,8 @@ + #define KVM_HAVE_MCE_INJECTION 1 + #endif + ++#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" ++ + #define EXCP_UDEF 1 /* undefined instruction */ + #define EXCP_SWI 2 /* software interrupt */ + #define EXCP_PREFETCH_ABORT 3 +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index e80b831073..c8f152891c 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -975,7 +975,8 @@ static void aarch64_a64fx_initfn(Object *obj) + #endif /* disabled for RHEL */ + + static const ARMCPUInfo aarch64_cpus[] = { +- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, ++ .deprecation_note = RHEL_CPU_DEPRECATION }, + #if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, +@@ -1052,8 +1053,13 @@ static void aarch64_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void aarch64_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 7d14650615..3d34f63e49 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -8560,6 +8560,7 @@ void arm_cpu_list(void) + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -8569,6 +8570,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } +-- +2.35.3 + diff --git a/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch b/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch new file mode 100644 index 0000000..d63bfdb --- /dev/null +++ b/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch @@ -0,0 +1,273 @@ +From 577b04770e47aed0f88acb4a415ed04ddbe087f1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Thu, 17 Mar 2022 17:59:22 +0000 +Subject: [PATCH 04/18] target/i386: deprecate CPUs older than x86_64-v2 ABI +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [4/6] 71f6043f11b31ffa841a2e14d24972e571c18a9e (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +RHEL-9 is compiled with the x86_64-v2 ABI. We use this as a baseline to +select which CPUs we want to support, such that there is at least one +supported guest CPU that can be launched for every physical machine +capable of running RHEL-9 KVM. + +Supported CPUs: + + * QEMU models + + base (QEMU internal) + host (host passthrough) + max (host passthrough for KVM, + all emulated features for TCG) + + * Intel models + + Icelake-Server + Icelake-Server-noTSX + Cascadelake-Server (2019) + Cascadelake-Server-noTSX (2019) + Skylake-Server (2016) + Skylake-Server-IBRS (2016) + Skylake-Server-noTSX-IBRS (2016) + Skylake-Client (2015) + Skylake-Client-IBRS (2015) + Skylake-Client-noTSX-IBRS (2015) + Broadwell (2014) + Broadwell-IBRS (2014) + Broadwell-noTSX (2014) + Broadwell-noTSX-IBRS (2014) + Haswell (2013) + Haswell-IBRS (2013) + Haswell-noTSX (2013) + Haswell-noTSX-IBRS (2013) + IvyBridge (2012) + IvyBridge-IBRS (2012) + SandyBridge (2011) + SandyBridge-IBRS (2011) + Westmere (2010) + Westmere-IBRS (2010) + Nehalem (2008) + Nehalem-IBRS (2008) + + Cooperlake (2020) + Snowridge (2019) + KnightsMill (2017) + Denverton (2016) + + * AMD models + + EPYC-Milan (2021) + EPYC-Rome (2019) + EPYC (2017) + EPYC-IBPB (2017) + Opteron_G5 (2012) + Opteron_G4 (2011) + + * Other + + Dhyana (2018) + +(I've omitted the many -vNNN versions for brevity) + +Deprecated CPUs: + + 486 + athlon + Conroe + core2duo + coreduo + Icelake-Client (already deprecated upstream) + Icelake-Client-noTSX (already deprecated upstream) + kvm32 + kvm64 + n270 + Opteron_G1 + Opteron_G2 + Opteron_G3 + Penryn + pentium2 + pentium3 + pentium + phenom + qemu32 + qemu64 + +The deprecated CPU models are subject to removal in a future +major version of RHEL. + +Note: this has the effect of deprecating the default built-in CPU +model 'qemu64'. Applications using QEMU are expected to make an +explicit choice about which CPU model they want, since no builtin +default can suit all purposes. + +https://bugzilla.redhat.com/show_bug.cgi?id=2060839 +Signed-off-by: Daniel P. Berrangé +--- + target/i386/cpu.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cb6b5467d0..87cb641b5f 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1780,9 +1780,13 @@ static const CPUCaches epyc_milan_cache_info = { + * PT in VMX operation + */ + ++#define RHEL_CPU_DEPRECATION \ ++ "use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'" ++ + static const X86CPUDefinition builtin_x86_defs[] = { + { + .name = "qemu64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -1803,6 +1807,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "phenom", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, +@@ -1835,6 +1840,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "core2duo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1877,6 +1883,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -1918,6 +1925,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "qemu32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 4, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1932,6 +1940,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -1962,6 +1971,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "coreduo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1995,6 +2005,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "486", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 4, +@@ -2007,6 +2018,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 5, +@@ -2019,6 +2031,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2031,6 +2044,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 3, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2043,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "athlon", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_AMD, + .family = 6, +@@ -2058,6 +2073,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "n270", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2083,6 +2099,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Conroe", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2123,6 +2140,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Penryn", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -3832,6 +3850,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G1", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3852,6 +3871,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3874,6 +3894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, +-- +2.35.3 + diff --git a/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch b/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch new file mode 100644 index 0000000..212900d --- /dev/null +++ b/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch @@ -0,0 +1,194 @@ +From 8459c305914e2a7a19dcd1662d54a89def7acfa6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Thu, 17 Mar 2022 17:59:22 +0000 +Subject: [PATCH 05/18] target/s390x: deprecate CPUs older than z14 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [5/6] 2da9e06cf452287673f94f880a7eb8b2b37b7278 (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +RHEL-9 is compiled with the z14 ABI. We use this as a baseline to +select which CPUs we want to support, such that there is at least one +supported guest CPU that can be launched for every physical +machine capable of running RHEL-9 KVM. + +Supported CPUs: + + gen15a-base + gen15a + gen15b-base + gen15b + gen16a-base + gen16a + gen16b-base + gen16b + max + qemu + z14.2-base + z14.2 + z14-base + z14 + z14ZR1-base + z14ZR1 + +Deprecated CPUs: + + z10BC.2-base + z10BC.2 + z10BC-base + z10BC + z10EC.2-base + z10EC.2 + z10EC.3-base + z10EC.3 + z10EC-base + z10EC + z114-base + z114 + z13.2-base + z13.2 + z13-base + z13s-base + z13s + z13 + z196.2-base + z196.2 + z196-base + z196 + z800-base + z800 + z890.2-base + z890.2 + z890.3-base + z890.3 + z890-base + z890 + z900.2-base + z900.2 + z900.3-base + z900.3 + z900-base + z900 + z990.2-base + z990.2 + z990.3-base + z990.3 + z990.4-base + z990.4 + z990.5-base + z990.5 + z990-base + z990 + z9BC.2-base + z9BC.2 + z9BC-base + z9BC + z9EC.2-base + z9EC.2 + z9EC.3-base + z9EC.3 + z9EC-base + z9EC + zBC12-base + zBC12 + zEC12.2-base + zEC12.2 + zEC12-base + zEC12 + +https://bugzilla.redhat.com/show_bug.cgi?id=2060839 +Signed-off-by: Daniel P. Berrangé +--- + target/s390x/cpu_models.c | 11 +++++++++++ + target/s390x/cpu_models.h | 2 ++ + target/s390x/cpu_models_sysemu.c | 2 ++ + 3 files changed, 15 insertions(+) + +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index 6d71428056..9b9fc41676 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -45,6 +45,9 @@ + * of a following release have been a superset of the previous release. With + * generation 15 one base feature and one optional feature have been deprecated. + */ ++ ++#define RHEL_CPU_DEPRECATION "use at least 'z14', or 'host' / 'qemu' / 'max'" ++ + static S390CPUDef s390_cpu_defs[] = { + CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), + CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), +@@ -852,22 +855,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) + static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* all base models are migration safe */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->is_static = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* model that can change between QEMU versions */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) +diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h +index 74d1f87e4f..372160bcd7 100644 +--- a/target/s390x/cpu_models.h ++++ b/target/s390x/cpu_models.h +@@ -38,6 +38,8 @@ struct S390CPUDef { + S390FeatBitmap full_feat; + /* used to init full_feat from generated data */ + S390FeatInit full_init; ++ /* if deprecated, provides a suggestion */ ++ const char *deprecation_note; + }; + + /* CPU model based on a CPU definition */ +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 6a04ccab1b..f3b7c304ec 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -61,6 +61,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + CpuDefinitionInfo *info; + char *name = g_strdup(object_class_get_name(klass)); + S390CPUClass *scc = S390_CPU_CLASS(klass); ++ CPUClass *cc = CPU_CLASS(klass); + + /* strip off the -s390x-cpu */ + g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; +@@ -70,6 +71,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + info->migration_safe = scc->is_migration_safe; + info->q_static = scc->is_static; + info->q_typename = g_strdup(object_class_get_name(klass)); ++ info->deprecated = !!cc->deprecation_note; + /* check for unavailable features */ + if (cpu_list_data->model) { + Object *obj; +-- +2.35.3 + diff --git a/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch b/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch new file mode 100644 index 0000000..4fcf786 --- /dev/null +++ b/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch @@ -0,0 +1,157 @@ +From f52aa60217634c96fef59ce76b803a94610bf5c8 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 15 Jun 2022 15:28:27 +0200 +Subject: [PATCH 01/18] tests/avocado: update aarch64_virt test to exercise + -cpu max +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [1/6] df6839e567180a4c32afd98852f68b2279e00f7c (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 + +commit 11593544df6f8febb3ce87015c22b429bf43c4c7 +Author: Alex Bennée +Date: Tue Apr 19 10:09:56 2022 +0100 + + tests/avocado: update aarch64_virt test to exercise -cpu max + + The Fedora 29 kernel is quite old and importantly fails when running + in LPA2 scenarios. As it's not really exercising much of the CPU space + replace it with a custom 5.16.12 kernel with all the architecture + options turned on. There is a minimal buildroot initramfs included in + the kernel which has a few tools for stress testing the memory + subsystem. The userspace also targets the Neoverse N1 processor so + would fail with a v8.0 cpu like cortex-a53. + + While we are at it move the test into its own file so it can have an + assigned maintainer. + + Signed-off-by: Alex Bennée + Acked-by: Richard Henderson + Tested-by: Richard Henderson + Message-Id: <20220419091020.3008144-2-alex.bennee@linaro.org> + +Signed-off-by: Andrew Jones +--- + MAINTAINERS | 1 + + tests/avocado/boot_linux_console.py | 25 ------------- + tests/avocado/machine_aarch64_virt.py | 51 +++++++++++++++++++++++++++ + 3 files changed, 52 insertions(+), 25 deletions(-) + create mode 100644 tests/avocado/machine_aarch64_virt.py + +diff --git a/MAINTAINERS b/MAINTAINERS +index 2fe20a49ab..bfe8806f60 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -942,6 +942,7 @@ S: Maintained + F: hw/arm/virt* + F: include/hw/arm/virt.h + F: docs/system/arm/virt.rst ++F: tests/avocado/machine_aarch64_virt.py + + Xilinx Zynq + M: Edgar E. Iglesias +diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py +index b40a3abc81..45a2ceda22 100644 +--- a/tests/avocado/boot_linux_console.py ++++ b/tests/avocado/boot_linux_console.py +@@ -325,31 +325,6 @@ def test_mips_malta32el_nanomips_64k_dbg(self): + kernel_hash = '18d1c68f2e23429e266ca39ba5349ccd0aeb7180' + self.do_test_mips_malta32el_nanomips(kernel_url, kernel_hash) + +- def test_aarch64_virt(self): +- """ +- :avocado: tags=arch:aarch64 +- :avocado: tags=machine:virt +- :avocado: tags=accel:tcg +- :avocado: tags=cpu:cortex-a53 +- """ +- kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' +- '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +- '/vmlinuz') +- kernel_hash = '8c73e469fc6ea06a58dc83a628fc695b693b8493' +- kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) +- +- self.vm.set_console() +- kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +- 'console=ttyAMA0') +- self.require_accelerator("tcg") +- self.vm.add_args('-cpu', 'cortex-a53', +- '-accel', 'tcg', +- '-kernel', kernel_path, +- '-append', kernel_command_line) +- self.vm.launch() +- console_pattern = 'Kernel command line: %s' % kernel_command_line +- self.wait_for_console_pattern(console_pattern) +- + def test_aarch64_xlnx_versal_virt(self): + """ + :avocado: tags=arch:aarch64 +diff --git a/tests/avocado/machine_aarch64_virt.py b/tests/avocado/machine_aarch64_virt.py +new file mode 100644 +index 0000000000..21848cba70 +--- /dev/null ++++ b/tests/avocado/machine_aarch64_virt.py +@@ -0,0 +1,51 @@ ++# Functional test that boots a Linux kernel and checks the console ++# ++# Copyright (c) 2022 Linaro Ltd. ++# ++# Author: ++# Alex Bennée ++# ++# SPDX-License-Identifier: GPL-2.0-or-later ++ ++import time ++ ++from avocado_qemu import QemuSystemTest ++from avocado_qemu import wait_for_console_pattern ++from avocado_qemu import exec_command ++ ++class Aarch64VirtMachine(QemuSystemTest): ++ KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 ' ++ ++ def wait_for_console_pattern(self, success_message, vm=None): ++ wait_for_console_pattern(self, success_message, ++ failure_message='Kernel panic - not syncing', ++ vm=vm) ++ ++ def test_aarch64_virt(self): ++ """ ++ :avocado: tags=arch:aarch64 ++ :avocado: tags=machine:virt ++ :avocado: tags=accel:tcg ++ :avocado: tags=cpu:max ++ """ ++ kernel_url = ('https://fileserver.linaro.org/s/' ++ 'z6B2ARM7DQT3HWN/download') ++ ++ kernel_hash = 'ed11daab50c151dde0e1e9c9cb8b2d9bd3215347' ++ kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) ++ ++ self.vm.set_console() ++ kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + ++ 'console=ttyAMA0') ++ self.require_accelerator("tcg") ++ self.vm.add_args('-cpu', 'max,pauth-impdef=on', ++ '-accel', 'tcg', ++ '-kernel', kernel_path, ++ '-append', kernel_command_line) ++ self.vm.launch() ++ self.wait_for_console_pattern('Welcome to Buildroot') ++ time.sleep(0.1) ++ exec_command(self, 'root') ++ time.sleep(0.1) ++ exec_command(self, 'cat /proc/self/maps') ++ time.sleep(0.1) +-- +2.35.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 901b08b..a8042fb 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 7%{?rcrel}%{?dist}%{?cc_suffix} +Release: 8%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -288,6 +288,42 @@ Patch66: kvm-Enable-virtio-iommu-pci-on-x86_64.patch Patch67: kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch # For bz#2092788 - Stalled IO Operations in VM Patch68: kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch +# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 +Patch69: kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch +# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 +Patch70: kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch +# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 +Patch71: kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch +# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 +Patch72: kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch +# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 +Patch73: kvm-target-s390x-deprecate-CPUs-older-than-z14.patch +# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 +Patch74: kvm-target-arm-deprecate-named-CPU-models.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch75: kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch76: kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch77: kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch78: kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch79: kvm-migration-Add-migrate_use_tls-helper.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch80: kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch81: kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch82: kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch83: kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch84: kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch +# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration +Patch85: kvm-migration-Change-zero_copy_send-from-migration-param.patch +# For bz#2096143 - The migration port is not released if use it again for recovering postcopy migration +Patch86: kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch # Source-git patches @@ -1323,6 +1359,32 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Jul 05 2022 Camilla Conte - 7.0.0-8 +- kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch [bz#2060839] +- kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch [bz#2060839] +- kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch [bz#2060839] +- kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch [bz#2060839] +- kvm-target-s390x-deprecate-CPUs-older-than-z14.patch [bz#2060839] +- kvm-target-arm-deprecate-named-CPU-models.patch [bz#2060839] +- kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch [bz#1968509] +- kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch [bz#1968509] +- kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch [bz#1968509] +- kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch [bz#1968509] +- kvm-migration-Add-migrate_use_tls-helper.patch [bz#1968509] +- kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch [bz#1968509] +- kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch [bz#1968509] +- kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch [bz#1968509] +- kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch [bz#1968509] +- kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch [bz#1968509] +- kvm-migration-Change-zero_copy_send-from-migration-param.patch [bz#1968509] +- kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch [bz#2096143] +- Resolves: bz#2060839 + (Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9) +- Resolves: bz#1968509 + (Use MSG_ZEROCOPY on QEMU Live Migration) +- Resolves: bz#2096143 + (The migration port is not released if use it again for recovering postcopy migration) + * Mon Jun 27 2022 Miroslav Rezanina - 7.0.0-7 - kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch [bz#1952483] - kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch [bz#1952483] From fbb94cc7060bd488539b3164ba1edb2487893643 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 18 Jul 2022 02:51:23 -0400 Subject: [PATCH 164/195] * Mon Jul 18 2022 Miroslav Rezanina - 7.0.0-9 - kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch [bz#2100106] - kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch [bz#2100106] - kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch [bz#2100106] - kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch [bz#2100106] - kvm-virtio-iommu-Fix-migration-regression.patch [bz#2100106] - kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch [bz#2098077] - kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch [bz#2098077] - kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch [bz#2098077] - kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch [bz#2098077] - kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch [bz#2098077] - kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch [bz#2098077] - kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch [bz#2098077] - kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch [bz#2098077] - kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch [bz#2098077] - kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch [bz#2098077] - kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch [bz#1951522] - kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch [bz#1951522] - Resolves: bz#2100106 (Fix virtio-iommu/vfio bypass) - Resolves: bz#2098077 (virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions) - Resolves: bz#1951522 (CVE-2021-3507 qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-9.0]) --- ...vent-end-of-track-overrun-CVE-2021-3.patch | 96 +++++++ ...-Split-virtio-scsi-code-from-virtio_.patch | 180 +++++++++++++ ...-bootmap-Improve-the-guessing-logic-.patch | 102 +++++++ ...-netboot.mak-Ignore-Clang-s-warnings.patch | 78 ++++++ ...-virtio-Beautify-the-code-for-readin.patch | 56 ++++ ...-virtio-Introduce-a-macro-for-the-DA.patch | 63 +++++ ...-virtio-Read-device-config-after-fea.patch | 67 +++++ ...-virtio-Set-missing-status-bits-whil.patch | 93 +++++++ ...-virtio-blkdev-Remove-virtio_assume_.patch | 101 +++++++ ...-virtio-blkdev-Request-the-right-fea.patch | 63 +++++ ...-virtio-blkdev-Simplify-fix-virtio_i.patch | 124 +++++++++ ...test-Add-a-regression-test-for-CVE-2.patch | 119 +++++++++ ...-an-assert-check-in-translate-routin.patch | 46 ++++ ...-bypass-mode-support-to-assigned-dev.patch | 250 ++++++++++++++++++ ...irtio-iommu-Fix-migration-regression.patch | 54 ++++ ...ix-the-partial-copy-of-probe-request.patch | 67 +++++ ...Use-recursive-lock-to-avoid-deadlock.patch | 141 ++++++++++ qemu-kvm.spec | 61 ++++- 18 files changed, 1760 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch create mode 100644 kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch create mode 100644 kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch create mode 100644 kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch create mode 100644 kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch create mode 100644 kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch create mode 100644 kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch create mode 100644 kvm-virtio-iommu-Fix-migration-regression.patch create mode 100644 kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch create mode 100644 kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch diff --git a/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch b/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch new file mode 100644 index 0000000..1bdad27 --- /dev/null +++ b/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch @@ -0,0 +1,96 @@ +From 6ee4a8718dcce2d6da43ee200534b75baf1d7bbe Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 18 Nov 2021 12:57:32 +0100 +Subject: [PATCH 16/17] hw/block/fdc: Prevent end-of-track overrun + (CVE-2021-3507) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [1/2] 9ffc5290348884d20b894fa79f4d0c8089247f8b (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1951522 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Miroslav Rezanina + +Per the 82078 datasheet, if the end-of-track (EOT byte in +the FIFO) is more than the number of sectors per side, the +command is terminated unsuccessfully: + +* 5.2.5 DATA TRANSFER TERMINATION + + The 82078 supports terminal count explicitly through + the TC pin and implicitly through the underrun/over- + run and end-of-track (EOT) functions. For full sector + transfers, the EOT parameter can define the last + sector to be transferred in a single or multisector + transfer. If the last sector to be transferred is a par- + tial sector, the host can stop transferring the data in + mid-sector, and the 82078 will continue to complete + the sector as if a hardware TC was received. The + only difference between these implicit functions and + TC is that they return "abnormal termination" result + status. Such status indications can be ignored if they + were expected. + +* 6.1.3 READ TRACK + + This command terminates when the EOT specified + number of sectors have been read. If the 82078 + does not find an I D Address Mark on the diskette + after the second· occurrence of a pulse on the + INDX# pin, then it sets the IC code in Status Regis- + ter 0 to "01" (Abnormal termination), sets the MA bit + in Status Register 1 to "1", and terminates the com- + mand. + +* 6.1.6 VERIFY + + Refer to Table 6-6 and Table 6-7 for information + concerning the values of MT and EC versus SC and + EOT value. + +* Table 6·6. Result Phase Table + +* Table 6-7. Verify Command Result Phase Table + +Fix by aborting the transfer when EOT > # Sectors Per Side. + +Cc: qemu-stable@nongnu.org +Cc: Hervé Poussineau +Fixes: baca51faff0 ("floppy driver: disk geometry auto detect") +Reported-by: Alexander Bulekov +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339 +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20211118115733.4038610-2-philmd@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367) +Signed-off-by: Jon Maloy +--- + hw/block/fdc.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index ca1776121f..6481ec0cfb 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -1532,6 +1532,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) + int tmp; + fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); + tmp = (fdctrl->fifo[6] - ks + 1); ++ if (tmp < 0) { ++ FLOPPY_DPRINTF("invalid EOT: %d\n", tmp); ++ fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); ++ fdctrl->fifo[3] = kt; ++ fdctrl->fifo[4] = kh; ++ fdctrl->fifo[5] = ks; ++ return; ++ } + if (fdctrl->fifo[0] & 0x80) + tmp += fdctrl->fifo[6]; + fdctrl->data_len *= tmp; +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch b/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch new file mode 100644 index 0000000..b212194 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch @@ -0,0 +1,180 @@ +From 2e38b4ec5c53b2b98539a70105d3046e1c452ab8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 13/17] pc-bios/s390-ccw: Split virtio-scsi code from + virtio_blk_setup_device() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [8/10] f49c5fb77e05c9dc09ed9f037e37f6a461e4bba6 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit cf30b7c4a9b2c64518be8037c2e6670aacdb00b9 +Author: Thomas Huth +Date: Mon Jul 4 13:19:00 2022 +0200 + + pc-bios/s390-ccw: Split virtio-scsi code from virtio_blk_setup_device() + + The next patch is going to add more virtio-block specific code to + virtio_blk_setup_device(), and if the virtio-scsi code is also in + there, this is more cumbersome. And the calling function virtio_setup() + in main.c looks at the device type already anyway, so it's more + logical to separate the virtio-scsi stuff into a new function in + virtio-scsi.c instead. + + Message-Id: <20220704111903.62400-10-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/main.c | 24 +++++++++++++++++------- + pc-bios/s390-ccw/virtio-blkdev.c | 20 ++------------------ + pc-bios/s390-ccw/virtio-scsi.c | 19 ++++++++++++++++++- + pc-bios/s390-ccw/virtio-scsi.h | 2 +- + 4 files changed, 38 insertions(+), 27 deletions(-) + +diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c +index 5d2b7ba94d..13e1d8fdf7 100644 +--- a/pc-bios/s390-ccw/main.c ++++ b/pc-bios/s390-ccw/main.c +@@ -14,6 +14,7 @@ + #include "s390-ccw.h" + #include "cio.h" + #include "virtio.h" ++#include "virtio-scsi.h" + #include "dasd-ipl.h" + + char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); +@@ -218,6 +219,7 @@ static int virtio_setup(void) + { + VDev *vdev = virtio_get_device(); + QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; ++ int ret; + + memcpy(&qipl, early_qipl, sizeof(QemuIplParameters)); + +@@ -225,18 +227,26 @@ static int virtio_setup(void) + menu_setup(); + } + +- if (virtio_get_device_type() == VIRTIO_ID_NET) { ++ switch (vdev->senseid.cu_model) { ++ case VIRTIO_ID_NET: + sclp_print("Network boot device detected\n"); + vdev->netboot_start_addr = qipl.netboot_start_addr; +- } else { +- int ret = virtio_blk_setup_device(blk_schid); +- if (ret) { +- return ret; +- } ++ return 0; ++ case VIRTIO_ID_BLOCK: ++ ret = virtio_blk_setup_device(blk_schid); ++ break; ++ case VIRTIO_ID_SCSI: ++ ret = virtio_scsi_setup_device(blk_schid); ++ break; ++ default: ++ panic("\n! No IPL device available !\n"); ++ } ++ ++ if (!ret) { + IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + } + +- return 0; ++ return ret; + } + + static void ipl_boot_device(void) +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index db1f7f44aa..c175b66a47 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -222,27 +222,11 @@ uint64_t virtio_get_blocks(void) + int virtio_blk_setup_device(SubChannelId schid) + { + VDev *vdev = virtio_get_device(); +- int ret = 0; + + vdev->schid = schid; + virtio_setup_ccw(vdev); + +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- sclp_print("Using virtio-blk.\n"); +- break; +- case VIRTIO_ID_SCSI: +- IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, +- "Config: sense size mismatch"); +- IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, +- "Config: CDB size mismatch"); ++ sclp_print("Using virtio-blk.\n"); + +- sclp_print("Using virtio-scsi.\n"); +- ret = virtio_scsi_setup(vdev); +- break; +- default: +- panic("\n! No IPL device available !\n"); +- } +- +- return ret; ++ return 0; + } +diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c +index 2c8d0f3097..3b7069270c 100644 +--- a/pc-bios/s390-ccw/virtio-scsi.c ++++ b/pc-bios/s390-ccw/virtio-scsi.c +@@ -329,7 +329,7 @@ static void scsi_parse_capacity_report(void *data, + } + } + +-int virtio_scsi_setup(VDev *vdev) ++static int virtio_scsi_setup(VDev *vdev) + { + int retry_test_unit_ready = 3; + uint8_t data[256]; +@@ -430,3 +430,20 @@ int virtio_scsi_setup(VDev *vdev) + + return 0; + } ++ ++int virtio_scsi_setup_device(SubChannelId schid) ++{ ++ VDev *vdev = virtio_get_device(); ++ ++ vdev->schid = schid; ++ virtio_setup_ccw(vdev); ++ ++ IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, ++ "Config: sense size mismatch"); ++ IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, ++ "Config: CDB size mismatch"); ++ ++ sclp_print("Using virtio-scsi.\n"); ++ ++ return virtio_scsi_setup(vdev); ++} +diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h +index 4b14c2c2f9..e6b6cd4815 100644 +--- a/pc-bios/s390-ccw/virtio-scsi.h ++++ b/pc-bios/s390-ccw/virtio-scsi.h +@@ -67,8 +67,8 @@ static inline bool virtio_scsi_response_ok(const VirtioScsiCmdResp *r) + return r->response == VIRTIO_SCSI_S_OK && r->status == CDB_STATUS_GOOD; + } + +-int virtio_scsi_setup(VDev *vdev); + int virtio_scsi_read_many(VDev *vdev, + ulong sector, void *load_addr, int sec_num); ++int virtio_scsi_setup_device(SubChannelId schid); + + #endif /* VIRTIO_SCSI_H */ +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch b/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch new file mode 100644 index 0000000..231a8a0 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch @@ -0,0 +1,102 @@ +From 64fa56e0520215e3909e442f09d8073c1870648a Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 07/17] pc-bios/s390-ccw/bootmap: Improve the guessing logic in + zipl_load_vblk() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [2/10] ca8f5e847617cf4ac2fd6c38edb2982f32fa3eba (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 422865f6672ee1482b98d18321b55c1ecfb06c82 +Author: Thomas Huth +Date: Mon Jul 4 13:18:54 2022 +0200 + + pc-bios/s390-ccw/bootmap: Improve the guessing logic in zipl_load_vblk() + + The logic of trying an final ISO or ECKD boot on virtio-block devices is + very weird: Since the geometry hardly ever matches in virtio_disk_is_scsi(), + virtio_blk_setup_device() always sets a "guessed" disk geometry via + virtio_assume_scsi() (which is certainly also wrong in a lot of cases). + + zipl_load_vblk() then sees that there's been a "virtio_guessed_disk_nature" + and tries to fix up the geometry again via virtio_assume_iso9660() before + always trying to do ipl_iso_el_torito(). That's a very brain-twisting + way of attempting to boot from ISO images, which won't work anymore after + the following patches that will clean up the virtio_assume_scsi() mess + (and thus get rid of the "virtio_guessed_disk_nature" here). + + Let's try a better approach instead: ISO files always have a magic + string "CD001" at offset 0x8001 (see e.g. the ECMA-119 specification) + which we can use to decide whether we should try to boot in ISO 9660 + mode (which we should also try if we see a sector size of 2048). + + And if we were not able to boot in ISO mode here, the final boot attempt + before panicking is to boot in ECKD mode. Since this is our last boot + attempt anyway, simply always assume the ECKD geometry here (if the sector + size was not 4096 yet), so that we also do not depend on the guessed disk + geometry from virtio_blk_setup_device() here anymore. + + Message-Id: <20220704111903.62400-4-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/bootmap.c | 27 +++++++++++++++++++++++---- + 1 file changed, 23 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c +index 56411ab3b6..994e59c0b0 100644 +--- a/pc-bios/s390-ccw/bootmap.c ++++ b/pc-bios/s390-ccw/bootmap.c +@@ -780,18 +780,37 @@ static void ipl_iso_el_torito(void) + } + } + ++/** ++ * Detect whether we're trying to boot from an .ISO image. ++ * These always have a signature string "CD001" at offset 0x8001. ++ */ ++static bool has_iso_signature(void) ++{ ++ int blksize = virtio_get_block_size(); ++ ++ if (!blksize || virtio_read(0x8000 / blksize, sec)) { ++ return false; ++ } ++ ++ return !memcmp("CD001", &sec[1], 5); ++} ++ + /*********************************************************************** + * Bus specific IPL sequences + */ + + static void zipl_load_vblk(void) + { +- if (virtio_guessed_disk_nature()) { +- virtio_assume_iso9660(); ++ int blksize = virtio_get_block_size(); ++ ++ if (blksize == VIRTIO_ISO_BLOCK_SIZE || has_iso_signature()) { ++ if (blksize != VIRTIO_ISO_BLOCK_SIZE) { ++ virtio_assume_iso9660(); ++ } ++ ipl_iso_el_torito(); + } +- ipl_iso_el_torito(); + +- if (virtio_guessed_disk_nature()) { ++ if (blksize != VIRTIO_DASD_DEFAULT_BLOCK_SIZE) { + sclp_print("Using guessed DASD geometry.\n"); + virtio_assume_eckd(); + } +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch b/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch new file mode 100644 index 0000000..00601aa --- /dev/null +++ b/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch @@ -0,0 +1,78 @@ +From 56674ee1f25f12978a6a8a1390e11b55b3e0fabe Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 15/17] pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings + about GNU extensions + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [10/10] 037dab4df23ebb2b42871bca8c842a53a7204b50 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit e2269220acb03e6c6a460c3090d804835e202239 +Author: Thomas Huth +Date: Mon Jul 4 13:19:03 2022 +0200 + + pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings about GNU extensions + + When compiling the s390-ccw bios with Clang (v14.0), there is currently + an unuseful warning like this: + + CC pc-bios/s390-ccw/ipv6.o + ../../roms/SLOF/lib/libnet/ipv6.c:447:18: warning: variable length array + folded to constant array as an extension [-Wgnu-folding-constant] + unsigned short raw[ip6size]; + ^ + + SLOF is currently GCC-only and cannot be compiled with Clang yet, so + it is expected that such extensions sneak in there - and as long as + we don't want to compile the code with a compiler that is neither GCC + or Clang, it is also not necessary to avoid such extensions. + + Thus these GNU-extension related warnings are completely useless in + the s390-ccw bios, especially in the code that is coming from SLOF, + so we should simply disable the related warnings here now. + + Message-Id: <20220704111903.62400-13-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/netboot.mak | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak +index 68b4d7edcb..ad41898cb6 100644 +--- a/pc-bios/s390-ccw/netboot.mak ++++ b/pc-bios/s390-ccw/netboot.mak +@@ -16,9 +16,12 @@ s390-netboot.elf: $(NETOBJS) libnet.a libc.a + s390-netboot.img: s390-netboot.elf + $(call quiet-command,$(STRIP) --strip-unneeded $< -o $@,"STRIP","$(TARGET_DIR)$@") + ++# SLOF is GCC-only, so ignore warnings about GNU extensions with Clang here ++NO_GNU_WARN := $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-gnu) ++ + # libc files: + +-LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ ++LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ + -MMD -MP -MT $@ -MF $(@:%.o=%.d) + + CTYPE_OBJS = isdigit.o isxdigit.o toupper.o +@@ -52,7 +55,7 @@ libc.a: $(LIBCOBJS) + + LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o bootp.o \ + dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o +-LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ ++LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ + -DDHCPARCH=0x1F -MMD -MP -MT $@ -MF $(@:%.o=%.d) + + %.o : $(SLOF_DIR)/lib/libnet/%.c +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch b/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch new file mode 100644 index 0000000..5e4b689 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch @@ -0,0 +1,56 @@ +From 430e76fd964390db86c8486f76b916a1cf7f74c2 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 12/17] pc-bios/s390-ccw/virtio: Beautify the code for reading + virtqueue configuration + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [7/10] b15c06b4c5431837672b6cb5d57d09da20718441 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 070824885741f5d2a66626d3c4ecb2773c8e0552 +Author: Thomas Huth +Date: Mon Jul 4 13:18:59 2022 +0200 + + pc-bios/s390-ccw/virtio: Beautify the code for reading virtqueue configuration + + It looks nicer if we separate the run_ccw() from the IPL_assert() + statement, and the error message should talk about "virtio device" + instead of "block device", since this code is nowadays used for + non-block (i.e. network) devices, too. + + Message-Id: <20220704111903.62400-9-thuth@redhat.com> + Reviewed-by: Cornelia Huck + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index d8c2b52710..f37510f312 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -289,9 +289,8 @@ void virtio_setup_ccw(VDev *vdev) + .num = 0, + }; + +- IPL_assert( +- run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0, +- "Could not get block device VQ configuration"); ++ rc = run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false); ++ IPL_assert(rc == 0, "Could not get virtio device VQ configuration"); + info.num = config.num; + vring_init(&vdev->vrings[i], &info); + vdev->vrings[i].schid = vdev->schid; +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch b/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch new file mode 100644 index 0000000..04ab605 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch @@ -0,0 +1,63 @@ +From 7d4f2454f95bfc087ad3f2fe3bc4625dcea3568e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 06/17] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD + block size + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [1/10] 71033934e1e9988bcf71362e02665ceb7449009d (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 1f2c2ee48e87ea743f8e23cc7569dd26c4cf9623 +Author: Thomas Huth +Date: Mon Jul 4 13:18:53 2022 +0200 + + pc-bios/s390-ccw/virtio: Introduce a macro for the DASD block size + + Use VIRTIO_DASD_DEFAULT_BLOCK_SIZE instead of the magic value 4096. + + Message-Id: <20220704111903.62400-3-thuth@redhat.com> + Reviewed-by: Eric Farman + Reviewed-by: Cornelia Huck + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 2 +- + pc-bios/s390-ccw/virtio.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 7d35050292..6483307630 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -155,7 +155,7 @@ void virtio_assume_eckd(void) + vdev->config.blk.physical_block_exp = 0; + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: +- vdev->config.blk.blk_size = 4096; ++ vdev->config.blk.blk_size = VIRTIO_DASD_DEFAULT_BLOCK_SIZE; + break; + case VIRTIO_ID_SCSI: + vdev->config.blk.blk_size = vdev->scsi_block_size; +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 19fceb6495..9e410bde6f 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -198,6 +198,7 @@ extern int virtio_read_many(ulong sector, void *load_addr, int sec_num); + #define VIRTIO_SECTOR_SIZE 512 + #define VIRTIO_ISO_BLOCK_SIZE 2048 + #define VIRTIO_SCSI_BLOCK_SIZE 512 ++#define VIRTIO_DASD_DEFAULT_BLOCK_SIZE 4096 + + static inline ulong virtio_sector_adjust(ulong sector) + { +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch b/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch new file mode 100644 index 0000000..41ae538 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch @@ -0,0 +1,67 @@ +From 20f8724d0837acbe642c8c7698a4b256f34c1209 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 11/17] pc-bios/s390-ccw/virtio: Read device config after + feature negotiation + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [6/10] 54d21e430b2dfba9e0a0823d6bb8ec7e7f8ff2ff (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit aa5c69ce99411c4886bcd051f288afc02b6d968d +Author: Thomas Huth +Date: Mon Jul 4 13:18:58 2022 +0200 + + pc-bios/s390-ccw/virtio: Read device config after feature negotiation + + Feature negotiation should be done first, since some fields in the + config area can depend on the negotiated features and thus should + rather be read afterwards. + + While we're at it, also adjust the error message here a little bit + (the code is nowadays used for non-block virtio devices, too). + + Message-Id: <20220704111903.62400-8-thuth@redhat.com> + Reviewed-by: Eric Farman + Reviewed-by: Cornelia Huck + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index 4e85a2eb82..d8c2b52710 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -262,10 +262,6 @@ void virtio_setup_ccw(VDev *vdev) + rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); + IPL_assert(rc == 0, "Could not write DRIVER status to host"); + +- IPL_assert( +- run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, +- "Could not get block device configuration"); +- + /* Feature negotiation */ + for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) { + feats.features = 0; +@@ -278,6 +274,9 @@ void virtio_setup_ccw(VDev *vdev) + IPL_assert(rc == 0, "Could not set features bits"); + } + ++ rc = run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false); ++ IPL_assert(rc == 0, "Could not get virtio device configuration"); ++ + for (i = 0; i < vdev->nr_vqs; i++) { + VqInfo info = { + .queue = (unsigned long long) ring_area + (i * VIRTIO_RING_SIZE), +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch b/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch new file mode 100644 index 0000000..e976047 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch @@ -0,0 +1,93 @@ +From 303fb3ddcdbbd1373c5b1aa28e03f90507e217f3 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 10/17] pc-bios/s390-ccw/virtio: Set missing status bits while + initializing + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [5/10] 4bc44d9adae055fb60b79d04a2f08535b4d38d2b (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 175aa06a152ef6b58ba9b2e47a1296b024dea70c +Author: Thomas Huth +Date: Mon Jul 4 13:18:57 2022 +0200 + + pc-bios/s390-ccw/virtio: Set missing status bits while initializing + + According chapter "3.1.1 Driver Requirements: Device Initialization" + of the Virtio specification (v1.1), a driver for a device has to set + the ACKNOWLEDGE and DRIVER bits in the status field after resetting + the device. The s390-ccw bios skipped these steps so far and seems + like QEMU never cared. Anyway, it's better to follow the spec, so + let's set these bits now in the right spots, too. + + Message-Id: <20220704111903.62400-7-thuth@redhat.com> + Acked-by: Christian Borntraeger + Reviewed-by: Cornelia Huck + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index 5d2c6e3381..4e85a2eb82 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -220,7 +220,7 @@ int virtio_run(VDev *vdev, int vqid, VirtioCmd *cmd) + void virtio_setup_ccw(VDev *vdev) + { + int i, rc, cfg_size = 0; +- unsigned char status = VIRTIO_CONFIG_S_DRIVER_OK; ++ uint8_t status; + struct VirtioFeatureDesc { + uint32_t features; + uint8_t index; +@@ -234,6 +234,10 @@ void virtio_setup_ccw(VDev *vdev) + + run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false); + ++ status = VIRTIO_CONFIG_S_ACKNOWLEDGE; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write ACKNOWLEDGE status to host"); ++ + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_NET: + vdev->nr_vqs = 2; +@@ -253,6 +257,11 @@ void virtio_setup_ccw(VDev *vdev) + default: + panic("Unsupported virtio device\n"); + } ++ ++ status |= VIRTIO_CONFIG_S_DRIVER; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write DRIVER status to host"); ++ + IPL_assert( + run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, + "Could not get block device configuration"); +@@ -291,9 +300,10 @@ void virtio_setup_ccw(VDev *vdev) + run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0, + "Cannot set VQ info"); + } +- IPL_assert( +- run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0, +- "Could not write status to host"); ++ ++ status |= VIRTIO_CONFIG_S_DRIVER_OK; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write DRIVER_OK status to host"); + } + + bool virtio_is_supported(SubChannelId schid) +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch new file mode 100644 index 0000000..109b98e --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch @@ -0,0 +1,101 @@ +From d3335a98a7b6e084aadf4907968536a67cf8e64c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 09/17] pc-bios/s390-ccw/virtio-blkdev: Remove + virtio_assume_scsi() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [4/10] bf27f75344f220a03475a2918ed49ec9cd5ba317 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 5447de2619050a0a4dd480b97f88a9b58da360d1 +Author: Thomas Huth +Date: Mon Jul 4 13:18:56 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Remove virtio_assume_scsi() + + The virtio_assume_scsi() function is very questionable: First, it + is only called for virtio-blk, and not for virtio-scsi, so the naming + is already quite confusing. Second, it is called if we detected a + "invalid" IPL disk, trying to fix it by blindly setting a sector + size of 512. This of course won't work in most cases since disks + might have a different sector size for a reason. + + Thus let's remove this strange function now. The calling code can + also be removed completely, since there is another spot in main.c + that does "IPL_assert(virtio_ipl_disk_is_valid(), ...)" to make + sure that we do not try to IPL from an invalid device. + + Message-Id: <20220704111903.62400-6-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 24 ------------------------ + pc-bios/s390-ccw/virtio.h | 1 - + 2 files changed, 25 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 7e13155589..db1f7f44aa 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -112,23 +112,6 @@ VirtioGDN virtio_guessed_disk_nature(void) + return virtio_get_device()->guessed_disk_nature; + } + +-void virtio_assume_scsi(void) +-{ +- VDev *vdev = virtio_get_device(); +- +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- vdev->guessed_disk_nature = VIRTIO_GDN_SCSI; +- vdev->config.blk.blk_size = VIRTIO_SCSI_BLOCK_SIZE; +- vdev->config.blk.physical_block_exp = 0; +- vdev->blk_factor = 1; +- break; +- case VIRTIO_ID_SCSI: +- vdev->scsi_block_size = VIRTIO_SCSI_BLOCK_SIZE; +- break; +- } +-} +- + void virtio_assume_iso9660(void) + { + VDev *vdev = virtio_get_device(); +@@ -247,13 +230,6 @@ int virtio_blk_setup_device(SubChannelId schid) + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: + sclp_print("Using virtio-blk.\n"); +- if (!virtio_ipl_disk_is_valid()) { +- /* make sure all getters but blocksize return 0 for +- * invalid IPL disk +- */ +- memset(&vdev->config.blk, 0, sizeof(vdev->config.blk)); +- virtio_assume_scsi(); +- } + break; + case VIRTIO_ID_SCSI: + IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 241730effe..600ba5052b 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -182,7 +182,6 @@ enum guessed_disk_nature_type { + typedef enum guessed_disk_nature_type VirtioGDN; + + VirtioGDN virtio_guessed_disk_nature(void); +-void virtio_assume_scsi(void); + void virtio_assume_eckd(void); + void virtio_assume_iso9660(void); + +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch new file mode 100644 index 0000000..8bc7a11 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch @@ -0,0 +1,63 @@ +From db58915fcaf3d24b64fe2c34cc15b5596b9a81bb Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 14/17] pc-bios/s390-ccw/virtio-blkdev: Request the right + feature bits + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [9/10] 9dcd8c2f659f366f9487ab6473d1f0d7778b40a7 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 9125a314cca4a1838b09305a87d8efb98f80ab67 +Author: Thomas Huth +Date: Mon Jul 4 13:19:01 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Request the right feature bits + + The virtio-blk code uses the block size and geometry fields in the + config area. According to the virtio-spec, these have to be negotiated + with the right feature bits during initialization, otherwise they + might not be available. QEMU is so far very forgiving and always + provides them, but we should not rely on this behavior, so let's + better request them properly via the VIRTIO_BLK_F_GEOMETRY and + VIRTIO_BLK_F_BLK_SIZE feature bits. + + Message-Id: <20220704111903.62400-11-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index c175b66a47..8271c47296 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -13,6 +13,9 @@ + #include "virtio.h" + #include "virtio-scsi.h" + ++#define VIRTIO_BLK_F_GEOMETRY (1 << 4) ++#define VIRTIO_BLK_F_BLK_SIZE (1 << 6) ++ + static int virtio_blk_read_many(VDev *vdev, ulong sector, void *load_addr, + int sec_num) + { +@@ -223,6 +226,7 @@ int virtio_blk_setup_device(SubChannelId schid) + { + VDev *vdev = virtio_get_device(); + ++ vdev->guest_features[0] = VIRTIO_BLK_F_GEOMETRY | VIRTIO_BLK_F_BLK_SIZE; + vdev->schid = schid; + virtio_setup_ccw(vdev); + +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch new file mode 100644 index 0000000..818e515 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch @@ -0,0 +1,124 @@ +From f07e4629a7c58407f903810a038660c88c6a6315 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 08/17] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix + virtio_ipl_disk_is_valid() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [3/10] fb06830a3e50d9da3d84913b50bb227865cc44b3 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit bbf615f7b707f009ef8e757d170902ad33b90644 +Author: Thomas Huth +Date: Mon Jul 4 13:18:55 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Simplify/fix virtio_ipl_disk_is_valid() + + The s390-ccw bios fails to boot if the boot disk is a virtio-blk + disk with a sector size of 4096. For example: + + dasdfmt -b 4096 -d cdl -y -p -M quick /dev/dasdX + fdasd -a /dev/dasdX + install a guest onto /dev/dasdX1 using virtio-blk + qemu-system-s390x -nographic -hda /dev/dasdX1 + + The bios then bails out with: + + ! Cannot read block 0 ! + + Looking at virtio_ipl_disk_is_valid() and especially the function + virtio_disk_is_scsi(), it does not really make sense that we expect + only such a limited disk geometry (like a block size of 512) for + our boot disks. Let's relax the check and allow everything that + remotely looks like a sane disk. + + Message-Id: <20220704111903.62400-5-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 41 ++++++-------------------------- + pc-bios/s390-ccw/virtio.h | 2 -- + 2 files changed, 7 insertions(+), 36 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 6483307630..7e13155589 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -166,46 +166,19 @@ void virtio_assume_eckd(void) + virtio_eckd_sectors_for_block_size(vdev->config.blk.blk_size); + } + +-bool virtio_disk_is_scsi(void) +-{ +- VDev *vdev = virtio_get_device(); +- +- if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI) { +- return true; +- } +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- return (vdev->config.blk.geometry.heads == 255) +- && (vdev->config.blk.geometry.sectors == 63) +- && (virtio_get_block_size() == VIRTIO_SCSI_BLOCK_SIZE); +- case VIRTIO_ID_SCSI: +- return true; +- } +- return false; +-} +- +-bool virtio_disk_is_eckd(void) ++bool virtio_ipl_disk_is_valid(void) + { ++ int blksize = virtio_get_block_size(); + VDev *vdev = virtio_get_device(); +- const int block_size = virtio_get_block_size(); + +- if (vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { ++ if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI || ++ vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { + return true; + } +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- return (vdev->config.blk.geometry.heads == 15) +- && (vdev->config.blk.geometry.sectors == +- virtio_eckd_sectors_for_block_size(block_size)); +- case VIRTIO_ID_SCSI: +- return false; +- } +- return false; +-} + +-bool virtio_ipl_disk_is_valid(void) +-{ +- return virtio_disk_is_scsi() || virtio_disk_is_eckd(); ++ return (vdev->senseid.cu_model == VIRTIO_ID_BLOCK || ++ vdev->senseid.cu_model == VIRTIO_ID_SCSI) && ++ blksize >= 512 && blksize <= 4096; + } + + int virtio_get_block_size(void) +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 9e410bde6f..241730effe 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -186,8 +186,6 @@ void virtio_assume_scsi(void); + void virtio_assume_eckd(void); + void virtio_assume_iso9660(void); + +-extern bool virtio_disk_is_scsi(void); +-extern bool virtio_disk_is_eckd(void); + extern bool virtio_ipl_disk_is_valid(void); + extern int virtio_get_block_size(void); + extern uint8_t virtio_get_heads(void); +-- +2.31.1 + diff --git a/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch b/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch new file mode 100644 index 0000000..7b9a8f3 --- /dev/null +++ b/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch @@ -0,0 +1,119 @@ +From cea7b15c613a11ea15a1458d6990be7044df6643 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 18 Nov 2021 12:57:33 +0100 +Subject: [PATCH 17/17] tests/qtest/fdc-test: Add a regression test for + CVE-2021-3507 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [2/2] 067c052df790959c28c1fcc16547676d36523bd9 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1951522 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Miroslav Rezanina + +Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339 + +Without the previous commit, when running 'make check-qtest-i386' +with QEMU configured with '--enable-sanitizers' we get: + + ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x619000062a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0 + READ of size 786432 at 0x619000062a00 thread T0 + #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919) + #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13 + #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14 + #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18 + #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16 + #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5 + #6 0x5626d0bd5649 in cpu_physical_memory_write include/exec/cpu-common.h:82:5 + #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9 + #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13 + #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13 + #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13 + #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9 + #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17 + + 0x619000062a00 is located 0 bytes to the right of 512-byte region [0x619000062800,0x619000062a00) + allocated by thread T0 here: + #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec) + #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11 + #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27 + #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20 + #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5 + #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13 + + SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) in __asan_memcpy + Shadow bytes around the buggy address: + 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd + Shadow byte legend (one shadow byte represents 8 application bytes): + Addressable: 00 + Heap left redzone: fa + Freed heap region: fd + ==4028352==ABORTING + +[ kwolf: Added snapshot=on to prevent write file lock failure ] + +Reported-by: Alexander Bulekov +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Alexander Bulekov +Signed-off-by: Kevin Wolf +(cherry picked from commit 46609b90d9e3a6304def11038a76b58ff43f77bc) +Signed-off-by: Jon Maloy +--- + tests/qtest/fdc-test.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c +index b0d40012e6..1d4f852128 100644 +--- a/tests/qtest/fdc-test.c ++++ b/tests/qtest/fdc-test.c +@@ -583,6 +583,26 @@ static void test_cve_2021_20196(void) + qtest_quit(s); + } + ++static void test_cve_2021_3507(void) ++{ ++ QTestState *s; ++ ++ s = qtest_initf("-nographic -m 32M -nodefaults " ++ "-drive file=%s,format=raw,if=floppy,snapshot=on", ++ test_image); ++ qtest_outl(s, 0x9, 0x0a0206); ++ qtest_outw(s, 0x3f4, 0x1600); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0200); ++ qtest_outw(s, 0x3f4, 0x0200); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_quit(s); ++} ++ + int main(int argc, char **argv) + { + int fd; +@@ -614,6 +634,7 @@ int main(int argc, char **argv) + qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); + qtest_add_func("/fdc/fuzz-registers", fuzz_registers); + qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196); ++ qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507); + + ret = g_test_run(); + +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch b/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch new file mode 100644 index 0000000..2a72cc7 --- /dev/null +++ b/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch @@ -0,0 +1,46 @@ +From 643d9c28ff8b15c333cc748c5e712659ad2a257c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 13 Jun 2022 14:10:10 +0800 +Subject: [PATCH 03/17] virtio-iommu: Add an assert check in translate routine + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [3/5] 19f309fd0beda40d65f51c454e37936658ac9f38 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +With address space switch supported, dma access translation only +happen after endpoint is attached to a non-bypass domain. + +Signed-off-by: Zhenzhong Duan +Message-Id: <20220613061010.2674054-4-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 23b5f0ff6d923d3bca11cf44eed3daf7a0a836a8) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 440a1c28a7..e970d4d5a6 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -866,6 +866,10 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + qemu_rec_mutex_lock(&s->mutex); + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); ++ ++ if (bypass_allowed) ++ assert(ep && ep->domain && !ep->domain->bypass); ++ + if (!ep) { + if (!bypass_allowed) { + error_report_once("%s sid=%d is not known!!", __func__, sid); +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch b/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch new file mode 100644 index 0000000..3352666 --- /dev/null +++ b/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch @@ -0,0 +1,250 @@ +From d60774ee3168eefb21a4120a38107cd36ae17e07 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 13 Jun 2022 14:10:08 +0800 +Subject: [PATCH 01/17] virtio-iommu: Add bypass mode support to assigned + device + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [1/5] 4777815533b31c7f4f09af8902e378fd3fc1186a (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +Currently assigned devices can not work in virtio-iommu bypass mode. +Guest driver fails to probe the device due to DMA failure. And the +reason is because of lacking GPA -> HPA mappings when VM is created. + +Add a root container memory region to hold both bypass memory region +and iommu memory region, so the switch between them is supported +just like the implementation in virtual VT-d. + +Signed-off-by: Zhenzhong Duan +Message-Id: <20220613061010.2674054-2-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 90519b90539b16258d1d52b908b199f44877dc18) +Signed-off-by: Eric Auger +--- + hw/virtio/trace-events | 1 + + hw/virtio/virtio-iommu.c | 115 ++++++++++++++++++++++++++++++- + include/hw/virtio/virtio-iommu.h | 2 + + 3 files changed, 116 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index a5102eac9e..2ab5881b88 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -114,6 +114,7 @@ virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uin + virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64 + virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" + virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" ++virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" + + # virtio-mem.c + virtio_mem_send_response(uint16_t type) "type=%" PRIu16 +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 6d5ea0bdf1..5e99e6c62b 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -70,6 +70,77 @@ static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) + return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); + } + ++static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) ++{ ++ uint32_t sid; ++ bool bypassed; ++ VirtIOIOMMU *s = sdev->viommu; ++ VirtIOIOMMUEndpoint *ep; ++ ++ sid = virtio_iommu_get_bdf(sdev); ++ ++ qemu_mutex_lock(&s->mutex); ++ /* need to check bypass before system reset */ ++ if (!s->endpoints) { ++ bypassed = s->config.bypass; ++ goto unlock; ++ } ++ ++ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); ++ if (!ep || !ep->domain) { ++ bypassed = s->config.bypass; ++ } else { ++ bypassed = ep->domain->bypass; ++ } ++ ++unlock: ++ qemu_mutex_unlock(&s->mutex); ++ return bypassed; ++} ++ ++/* Return whether the device is using IOMMU translation. */ ++static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) ++{ ++ bool use_remapping; ++ ++ assert(sdev); ++ ++ use_remapping = !virtio_iommu_device_bypassed(sdev); ++ ++ trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), ++ PCI_SLOT(sdev->devfn), ++ PCI_FUNC(sdev->devfn), ++ use_remapping); ++ ++ /* Turn off first then on the other */ ++ if (use_remapping) { ++ memory_region_set_enabled(&sdev->bypass_mr, false); ++ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); ++ } else { ++ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); ++ memory_region_set_enabled(&sdev->bypass_mr, true); ++ } ++ ++ return use_remapping; ++} ++ ++static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) ++{ ++ GHashTableIter iter; ++ IOMMUPciBus *iommu_pci_bus; ++ int i; ++ ++ g_hash_table_iter_init(&iter, s->as_by_busptr); ++ while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { ++ for (i = 0; i < PCI_DEVFN_MAX; i++) { ++ if (!iommu_pci_bus->pbdev[i]) { ++ continue; ++ } ++ virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); ++ } ++ } ++} ++ + /** + * The bus number is used for lookup when SID based operations occur. + * In that case we lazily populate the IOMMUPciBus array from the bus hash +@@ -214,6 +285,7 @@ static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, + static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) + { + VirtIOIOMMUDomain *domain = ep->domain; ++ IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); + + if (!ep->domain) { + return; +@@ -222,6 +294,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) + ep->iommu_mr); + QLIST_REMOVE(ep, next); + ep->domain = NULL; ++ virtio_iommu_switch_address_space(sdev); + } + + static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, +@@ -324,12 +397,39 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, + + trace_virtio_iommu_init_iommu_mr(name); + ++ memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); ++ address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); ++ ++ /* ++ * Build the IOMMU disabled container with aliases to the ++ * shared MRs. Note that aliasing to a shared memory region ++ * could help the memory API to detect same FlatViews so we ++ * can have devices to share the same FlatView when in bypass ++ * mode. (either by not configuring virtio-iommu driver or with ++ * "iommu=pt"). It will greatly reduce the total number of ++ * FlatViews of the system hence VM runs faster. ++ */ ++ memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), ++ "system", get_system_memory(), 0, ++ memory_region_size(get_system_memory())); ++ + memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), + TYPE_VIRTIO_IOMMU_MEMORY_REGION, + OBJECT(s), name, + UINT64_MAX); +- address_space_init(&sdev->as, +- MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU); ++ ++ /* ++ * Hook both the containers under the root container, we ++ * switch between iommu & bypass MRs by enable/disable ++ * corresponding sub-containers ++ */ ++ memory_region_add_subregion_overlap(&sdev->root, 0, ++ MEMORY_REGION(&sdev->iommu_mr), ++ 0); ++ memory_region_add_subregion_overlap(&sdev->root, 0, ++ &sdev->bypass_mr, 0); ++ ++ virtio_iommu_switch_address_space(sdev); + g_free(name); + } + return &sdev->as; +@@ -343,6 +443,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, + uint32_t flags = le32_to_cpu(req->flags); + VirtIOIOMMUDomain *domain; + VirtIOIOMMUEndpoint *ep; ++ IOMMUDevice *sdev; + + trace_virtio_iommu_attach(domain_id, ep_id); + +@@ -376,6 +477,8 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, + QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); + + ep->domain = domain; ++ sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); ++ virtio_iommu_switch_address_space(sdev); + + /* Replay domain mappings on the associated memory region */ + g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, +@@ -888,6 +991,7 @@ static void virtio_iommu_set_config(VirtIODevice *vdev, + return; + } + dev_config->bypass = in_config->bypass; ++ virtio_iommu_switch_address_space_all(dev); + } + + trace_virtio_iommu_set_config(in_config->bypass); +@@ -1027,6 +1131,8 @@ static void virtio_iommu_system_reset(void *opaque) + * system reset + */ + s->config.bypass = s->boot_bypass; ++ virtio_iommu_switch_address_space_all(s); ++ + } + + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) +@@ -1043,6 +1149,11 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + virtio_iommu_handle_command); + s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); + ++ /* ++ * config.bypass is needed to get initial address space early, such as ++ * in vfio realize ++ */ ++ s->config.bypass = s->boot_bypass; + s->config.page_size_mask = TARGET_PAGE_MASK; + s->config.input_range.end = UINT64_MAX; + s->config.domain_range.end = UINT32_MAX; +diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h +index 84391f8448..102eeefa73 100644 +--- a/include/hw/virtio/virtio-iommu.h ++++ b/include/hw/virtio/virtio-iommu.h +@@ -37,6 +37,8 @@ typedef struct IOMMUDevice { + int devfn; + IOMMUMemoryRegion iommu_mr; + AddressSpace as; ++ MemoryRegion root; /* The root container of the device */ ++ MemoryRegion bypass_mr; /* The alias of shared memory MR */ + } IOMMUDevice; + + typedef struct IOMMUPciBus { +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Fix-migration-regression.patch b/kvm-virtio-iommu-Fix-migration-regression.patch new file mode 100644 index 0000000..f5ae4d6 --- /dev/null +++ b/kvm-virtio-iommu-Fix-migration-regression.patch @@ -0,0 +1,54 @@ +From 8d45902b4884315ec090e607e9f03606b21001cf Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Fri, 24 Jun 2022 17:37:40 +0800 +Subject: [PATCH 05/17] virtio-iommu: Fix migration regression + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [5/5] 9652c4aaaf88e24083fab1fbc3d1423260c93ca6 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +We also need to switch to the right address space on dest side +after loading the device status. DMA to wrong address space is +destructive. + +Fixes: 3facd774962fd ("virtio-iommu: Add bypass mode support to assigned device") +Suggested-by: Eric Auger +Signed-off-by: Zhenzhong Duan +Message-Id: <20220624093740.3525267-1-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Eric Auger +(cherry picked from commit d355566bd958e24e7e384da6ea89a9fc88d7bfed) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 44a041dec9..2012835554 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -1324,6 +1324,14 @@ static int iommu_post_load(void *opaque, int version_id) + VirtIOIOMMU *s = opaque; + + g_tree_foreach(s->domains, reconstruct_endpoints, s); ++ ++ /* ++ * Memory regions are dynamically turned on/off depending on ++ * 'config.bypass' and attached domain type if there is. After ++ * migration, we need to make sure the memory regions are ++ * still correct. ++ */ ++ virtio_iommu_switch_address_space_all(s); + return 0; + } + +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch b/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch new file mode 100644 index 0000000..7747bfe --- /dev/null +++ b/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch @@ -0,0 +1,67 @@ +From b681247c29b59af40c86f8f0ae5709138ae9bf1a Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 23 Jun 2022 10:31:52 +0800 +Subject: [PATCH 04/17] virtio-iommu: Fix the partial copy of probe request + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [4/5] c402164414a8e69bbb6df20af3c2b6d2589d6f3e (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +The structure of probe request doesn't include the tail, this leads +to a few field missed to be copied. Currently this isn't an issue as +those missed field belong to reserved field, just in case reserved +field will be used in the future. + +Changed 4th parameter of virtio_iommu_iov_to_req() to receive size +of device-readable part. + +Fixes: 1733eebb9e75b ("virtio-iommu: Implement RESV_MEM probe request") +Signed-off-by: Zhenzhong Duan +Message-Id: <20220623023152.3473231-1-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +Reviewed-by: Eric Auger +(cherry picked from commit 45461aace83d961e933b27519b81d17b4c690514) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index e970d4d5a6..44a041dec9 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -676,11 +676,10 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, + + static int virtio_iommu_iov_to_req(struct iovec *iov, + unsigned int iov_cnt, +- void *req, size_t req_sz) ++ void *req, size_t payload_sz) + { +- size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail); ++ size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); + +- sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); + if (unlikely(sz != payload_sz)) { + return VIRTIO_IOMMU_S_INVAL; + } +@@ -693,7 +692,8 @@ static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ + unsigned int iov_cnt) \ + { \ + struct virtio_iommu_req_ ## __req req; \ +- int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \ ++ int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ ++ sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ + \ + return ret ? ret : virtio_iommu_ ## __req(s, &req); \ + } +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch b/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch new file mode 100644 index 0000000..df961b0 --- /dev/null +++ b/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch @@ -0,0 +1,141 @@ +From 881c999e302e7ee1212b47c523a2cf442c549417 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 13 Jun 2022 14:10:09 +0800 +Subject: [PATCH 02/17] virtio-iommu: Use recursive lock to avoid deadlock + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [2/5] 67dce1eecb49555f728f119f8efac00417ff65bf (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +When switching address space with mutex lock hold, mapping will be +replayed for assigned device. This will trigger relock deadlock. + +Also release the mutex resource in unrealize routine. + +Signed-off-by: Zhenzhong Duan +Message-Id: <20220613061010.2674054-3-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 08f2030a2e46f1e93d186b3a683e5caef1df562b) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 20 +++++++++++--------- + include/hw/virtio/virtio-iommu.h | 2 +- + 2 files changed, 12 insertions(+), 10 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 5e99e6c62b..440a1c28a7 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -79,7 +79,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) + + sid = virtio_iommu_get_bdf(sdev); + +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + /* need to check bypass before system reset */ + if (!s->endpoints) { + bypassed = s->config.bypass; +@@ -94,7 +94,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) + } + + unlock: +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + return bypassed; + } + +@@ -746,7 +746,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) + tail.status = VIRTIO_IOMMU_S_DEVERR; + goto out; + } +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + switch (head.type) { + case VIRTIO_IOMMU_T_ATTACH: + tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); +@@ -775,7 +775,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) + default: + tail.status = VIRTIO_IOMMU_S_UNSUPP; + } +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + + out: + sz = iov_from_buf(elem->in_sg, elem->in_num, 0, +@@ -863,7 +863,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + sid = virtio_iommu_get_bdf(sdev); + + trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); + if (!ep) { +@@ -947,7 +947,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); + + unlock: +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + return entry; + } + +@@ -1036,7 +1036,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) + + sid = virtio_iommu_get_bdf(sdev); + +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + + if (!s->endpoints) { + goto unlock; +@@ -1050,7 +1050,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) + g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); + + unlock: +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + } + + static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, +@@ -1169,7 +1169,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); + +- qemu_mutex_init(&s->mutex); ++ qemu_rec_mutex_init(&s->mutex); + + s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + +@@ -1197,6 +1197,8 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) + g_tree_destroy(s->endpoints); + } + ++ qemu_rec_mutex_destroy(&s->mutex); ++ + virtio_delete_queue(s->req_vq); + virtio_delete_queue(s->event_vq); + virtio_cleanup(vdev); +diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h +index 102eeefa73..2ad5ee320b 100644 +--- a/include/hw/virtio/virtio-iommu.h ++++ b/include/hw/virtio/virtio-iommu.h +@@ -58,7 +58,7 @@ struct VirtIOIOMMU { + ReservedRegion *reserved_regions; + uint32_t nb_reserved_regions; + GTree *domains; +- QemuMutex mutex; ++ QemuRecMutex mutex; + GTree *endpoints; + bool boot_bypass; + }; +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index a8042fb..207d6e7 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 8%{?rcrel}%{?dist}%{?cc_suffix} +Release: 9%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -324,6 +324,40 @@ Patch84: kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch Patch85: kvm-migration-Change-zero_copy_send-from-migration-param.patch # For bz#2096143 - The migration port is not released if use it again for recovering postcopy migration Patch86: kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch +# For bz#2100106 - Fix virtio-iommu/vfio bypass +Patch87: kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch +# For bz#2100106 - Fix virtio-iommu/vfio bypass +Patch88: kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch +# For bz#2100106 - Fix virtio-iommu/vfio bypass +Patch89: kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch +# For bz#2100106 - Fix virtio-iommu/vfio bypass +Patch90: kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch +# For bz#2100106 - Fix virtio-iommu/vfio bypass +Patch91: kvm-virtio-iommu-Fix-migration-regression.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch92: kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch93: kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch94: kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch95: kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch96: kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch97: kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch98: kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch99: kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch100: kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch +# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch101: kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch +# For bz#1951522 - CVE-2021-3507 qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-9.0] +Patch102: kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch +# For bz#1951522 - CVE-2021-3507 qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-9.0] +Patch103: kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch # Source-git patches @@ -1359,6 +1393,31 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jul 18 2022 Miroslav Rezanina - 7.0.0-9 +- kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch [bz#2100106] +- kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch [bz#2100106] +- kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch [bz#2100106] +- kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch [bz#2100106] +- kvm-virtio-iommu-Fix-migration-regression.patch [bz#2100106] +- kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch [bz#2098077] +- kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch [bz#2098077] +- kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch [bz#2098077] +- kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch [bz#2098077] +- kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch [bz#2098077] +- kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch [bz#2098077] +- kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch [bz#2098077] +- kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch [bz#2098077] +- kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch [bz#2098077] +- kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch [bz#2098077] +- kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch [bz#1951522] +- kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch [bz#1951522] +- Resolves: bz#2100106 + (Fix virtio-iommu/vfio bypass) +- Resolves: bz#2098077 + (virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions) +- Resolves: bz#1951522 + (CVE-2021-3507 qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-9.0]) + * Tue Jul 05 2022 Camilla Conte - 7.0.0-8 - kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch [bz#2060839] - kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch [bz#2060839] From 23d471091d195de28bc247989696d18ac6397ce5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 8 Aug 2022 04:22:43 -0400 Subject: [PATCH 165/195] * Mon Aug 08 2022 Miroslav Rezanina - 7.0.0-10 - kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch [bz#1939363] - kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch [bz#1939363] - kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch [bz#1939363] - kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch [bz#1939363] - kvm-meson-create-have_vhost_-variables.patch [bz#1939363] - kvm-meson-use-have_vhost_-variables-to-pick-sources.patch [bz#1939363] - kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch [bz#1939363] - kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch [bz#1939363] - kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch [bz#1939363] - kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch [bz#1939363] - kvm-vhost-Reorder-vhost_svq_kick.patch [bz#1939363] - kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch [bz#1939363] - kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch [bz#1939363] - kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch [bz#1939363] - kvm-vhost-Add-SVQDescState.patch [bz#1939363] - kvm-vhost-Track-number-of-descs-in-SVQDescState.patch [bz#1939363] - kvm-vhost-add-vhost_svq_push_elem.patch [bz#1939363] - kvm-vhost-Expose-vhost_svq_add.patch [bz#1939363] - kvm-vhost-add-vhost_svq_poll.patch [bz#1939363] - kvm-vhost-Add-svq-avail_handler-callback.patch [bz#1939363] - kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch [bz#1939363] - kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch [bz#1939363] - kvm-vdpa-manual-forward-CVQ-buffers.patch [bz#1939363] - kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch [bz#1939363] - kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch [bz#1939363] - kvm-vdpa-Add-device-migration-blocker.patch [bz#1939363] - kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch [bz#1939363] - kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch [bz#2111994] - kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch [bz#2111994] - kvm-kvm-don-t-use-perror-without-useful-errno.patch [bz#2095608] - kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch [bz#2099934] - kvm-Revert-migration-Simplify-unqueue_page.patch [bz#2099934] - Resolves: bz#1939363 (vDPA control virtqueue support in Qemu) - Resolves: bz#2111994 (RHEL9: skey test in kvm_unit_test got failed) - Resolves: bz#2095608 (Please correct the error message when try to start qemu with "-M kernel-irqchip=split") - Resolves: bz#2099934 (Guest reboot on destination host after postcopy migration completed) --- ...vert-migration-Simplify-unqueue_page.patch | 134 ++++++++ ...virtio-Replace-g_memdup-by-g_memdup2.patch | 95 ++++++ ...on-t-use-perror-without-useful-errno.patch | 62 ++++ kvm-meson-create-have_vhost_-variables.patch | 154 +++++++++ ...ave_vhost_-variables-to-pick-sources.patch | 213 ++++++++++++ ...es-before-compressing-them-with-zlib.patch | 142 ++++++++ ...nux-headers-linux-kvm.h-to-v5.18-rc6.patch | 106 ++++++ ...-Honor-storage-keys-during-emulation.patch | 103 ++++++ kvm-vdpa-Add-device-migration-blocker.patch | 106 ++++++ ...-Add-x-svq-to-NetdevVhostVDPAOptions.patch | 223 ++++++++++++ ...compiler-to-squash-reads-to-used-idx.patch | 65 ++++ ...ffer-CVQ-support-on-shadow-virtqueue.patch | 323 ++++++++++++++++++ ...t-vhost_vdpa_dma_map-and-unmap-calls.patch | 84 +++++ ...-features-part-from-vhost_vdpa_get_m.patch | 108 ++++++ kvm-vdpa-manual-forward-CVQ-buffers.patch | 166 +++++++++ kvm-vhost-Add-SVQDescState.patch | 135 ++++++++ ...vhost-Add-svq-avail_handler-callback.patch | 164 +++++++++ ...heck-for-queue-full-at-vhost_svq_add.patch | 134 ++++++++ ...-vhost_svq_add-from-VirtQueueElement.patch | 138 ++++++++ kvm-vhost-Expose-vhost_svq_add.patch | 73 ++++ ...Fix-device-s-used-descriptor-dequeue.patch | 83 +++++ ...Fix-element-in-vhost_svq_add-failure.patch | 68 ++++ ...vhost_svq_kick-call-to-vhost_svq_add.patch | 61 ++++ kvm-vhost-Reorder-vhost_svq_kick.patch | 88 +++++ ...k-descriptor-chain-in-private-at-SVQ.patch | 123 +++++++ ...rack-number-of-descs-in-SVQDescState.patch | 81 +++++ kvm-vhost-add-vhost_svq_poll.patch | 92 +++++ kvm-vhost-add-vhost_svq_push_elem.patch | 83 +++++ ...iptor-translation-to-vhost_svq_vring.patch | 120 +++++++ ...dd-stubs-for-when-no-virtio-net-devi.patch | 87 +++++ kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch | 69 ++++ ...rtio-net-Expose-ctrl-virtqueue-logic.patch | 169 +++++++++ qemu-kvm.spec | 108 +++++- 33 files changed, 3959 insertions(+), 1 deletion(-) create mode 100644 kvm-Revert-migration-Simplify-unqueue_page.patch create mode 100644 kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch create mode 100644 kvm-kvm-don-t-use-perror-without-useful-errno.patch create mode 100644 kvm-meson-create-have_vhost_-variables.patch create mode 100644 kvm-meson-use-have_vhost_-variables-to-pick-sources.patch create mode 100644 kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch create mode 100644 kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch create mode 100644 kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch create mode 100644 kvm-vdpa-Add-device-migration-blocker.patch create mode 100644 kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch create mode 100644 kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch create mode 100644 kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch create mode 100644 kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch create mode 100644 kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch create mode 100644 kvm-vdpa-manual-forward-CVQ-buffers.patch create mode 100644 kvm-vhost-Add-SVQDescState.patch create mode 100644 kvm-vhost-Add-svq-avail_handler-callback.patch create mode 100644 kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch create mode 100644 kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch create mode 100644 kvm-vhost-Expose-vhost_svq_add.patch create mode 100644 kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch create mode 100644 kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch create mode 100644 kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch create mode 100644 kvm-vhost-Reorder-vhost_svq_kick.patch create mode 100644 kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch create mode 100644 kvm-vhost-Track-number-of-descs-in-SVQDescState.patch create mode 100644 kvm-vhost-add-vhost_svq_poll.patch create mode 100644 kvm-vhost-add-vhost_svq_push_elem.patch create mode 100644 kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch create mode 100644 kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch create mode 100644 kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch create mode 100644 kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch diff --git a/kvm-Revert-migration-Simplify-unqueue_page.patch b/kvm-Revert-migration-Simplify-unqueue_page.patch new file mode 100644 index 0000000..f5c97f6 --- /dev/null +++ b/kvm-Revert-migration-Simplify-unqueue_page.patch @@ -0,0 +1,134 @@ +From 5ea59b17866add54e5ae8c76d3cb472c67e1fa91 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 2 Aug 2022 08:19:49 +0200 +Subject: [PATCH 32/32] Revert "migration: Simplify unqueue_page()" + +RH-Author: Thomas Huth +RH-MergeRequest: 112: Fix postcopy migration on s390x +RH-Commit: [2/2] 3913c9ed3f27f4b66245913da29d0c46db0c6567 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2099934 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +This reverts commit cfd66f30fb0f735df06ff4220e5000290a43dad3. + +The simplification of unqueue_page() introduced a bug that sometimes +breaks migration on s390x hosts. + +The problem is not fully understood yet, but since we are already in +the freeze for QEMU 7.1 and we need something working there, let's +revert this patch for the upcoming release. The optimization can be +redone later again in a proper way if necessary. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 +Signed-off-by: Thomas Huth +Message-Id: <20220802061949.331576-1-thuth@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 777f53c75983dd10756f5dbfc8af50fe11da81c1) +Conflicts: + migration/trace-events + (trivial contextual conflict) +Signed-off-by: Thomas Huth +--- + migration/ram.c | 37 ++++++++++++++++++++++++++----------- + migration/trace-events | 3 ++- + 2 files changed, 28 insertions(+), 12 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index fb6db54642..ee40e4a718 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1548,7 +1548,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) + { + struct RAMSrcPageRequest *entry; + RAMBlock *block = NULL; +- size_t page_size; + + if (!postcopy_has_request(rs)) { + return NULL; +@@ -1565,13 +1564,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) + entry = QSIMPLEQ_FIRST(&rs->src_page_requests); + block = entry->rb; + *offset = entry->offset; +- page_size = qemu_ram_pagesize(block); +- /* Each page request should only be multiple page size of the ramblock */ +- assert((entry->len % page_size) == 0); + +- if (entry->len > page_size) { +- entry->len -= page_size; +- entry->offset += page_size; ++ if (entry->len > TARGET_PAGE_SIZE) { ++ entry->len -= TARGET_PAGE_SIZE; ++ entry->offset += TARGET_PAGE_SIZE; + } else { + memory_region_unref(block->mr); + QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); +@@ -1579,9 +1575,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) + migration_consume_urgent_request(); + } + +- trace_unqueue_page(block->idstr, *offset, +- test_bit((*offset >> TARGET_PAGE_BITS), block->bmap)); +- + return block; + } + +@@ -1956,8 +1949,30 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) + { + RAMBlock *block; + ram_addr_t offset; ++ bool dirty; ++ ++ do { ++ block = unqueue_page(rs, &offset); ++ /* ++ * We're sending this page, and since it's postcopy nothing else ++ * will dirty it, and we must make sure it doesn't get sent again ++ * even if this queue request was received after the background ++ * search already sent it. ++ */ ++ if (block) { ++ unsigned long page; ++ ++ page = offset >> TARGET_PAGE_BITS; ++ dirty = test_bit(page, block->bmap); ++ if (!dirty) { ++ trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, ++ page); ++ } else { ++ trace_get_queued_page(block->idstr, (uint64_t)offset, page); ++ } ++ } + +- block = unqueue_page(rs, &offset); ++ } while (block && !dirty); + + if (!block) { + /* +diff --git a/migration/trace-events b/migration/trace-events +index 1aec580e92..09d61ed1f4 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -85,6 +85,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" + qemu_file_fclose(void) "" + + # ram.c ++get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" ++get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" + migration_bitmap_sync_start(void) "" + migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 + migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" +@@ -110,7 +112,6 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI + ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64 + ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" + ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" +-unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d" + + # multifd.c + multifd_new_send_channel_async(uint8_t id) "channel %u" +-- +2.31.1 + diff --git a/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch b/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch new file mode 100644 index 0000000..44897ac --- /dev/null +++ b/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch @@ -0,0 +1,95 @@ +From 4dad0e9abbc843fba4e5fee6e7aa1b0db13f5898 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:27:35 +0200 +Subject: [PATCH 03/32] hw/virtio: Replace g_memdup() by g_memdup2() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [3/27] ae196903eb1a7aebbf999100e997cf82e5024cb6 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit d792199de55ca5cb5334016884039c740290b5c7 +Author: Philippe Mathieu-Daudé +Date: Thu May 12 19:57:46 2022 +0200 + + hw/virtio: Replace g_memdup() by g_memdup2() + + Per https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538 + + The old API took the size of the memory to duplicate as a guint, + whereas most memory functions take memory sizes as a gsize. This + made it easy to accidentally pass a gsize to g_memdup(). For large + values, that would lead to a silent truncation of the size from 64 + to 32 bits, and result in a heap area being returned which is + significantly smaller than what the caller expects. This can likely + be exploited in various modules to cause a heap buffer overflow. + + Replace g_memdup() by the safer g_memdup2() wrapper. + + Acked-by: Jason Wang + Acked-by: Eugenio Pérez + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20220512175747.142058-6-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/net/virtio-net.c | 3 ++- + hw/virtio/virtio-crypto.c | 6 +++--- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 099e65036d..633de61513 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1458,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + } + + iov_cnt = elem->out_num; +- iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); ++ iov2 = iov = g_memdup2(elem->out_sg, ++ sizeof(struct iovec) * elem->out_num); + s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); + iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); + if (s != sizeof(ctrl)) { +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index dcd80b904d..0e31e3cc04 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + } + + out_num = elem->out_num; +- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); ++ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov = out_iov_copy; + + in_num = elem->in_num; +@@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) + } + + out_num = elem->out_num; +- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); ++ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov = out_iov_copy; + + in_num = elem->in_num; +- in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num); ++ in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num); + in_iov = in_iov_copy; + + if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req)) +-- +2.31.1 + diff --git a/kvm-kvm-don-t-use-perror-without-useful-errno.patch b/kvm-kvm-don-t-use-perror-without-useful-errno.patch new file mode 100644 index 0000000..a78c089 --- /dev/null +++ b/kvm-kvm-don-t-use-perror-without-useful-errno.patch @@ -0,0 +1,62 @@ +From 9ddefaedf423ec03eadaf17496c14e0d7b2381c8 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Thu, 28 Jul 2022 16:24:46 +0200 +Subject: [PATCH 30/32] kvm: don't use perror() without useful errno + +RH-Author: Cornelia Huck +RH-MergeRequest: 110: kvm: don't use perror() without useful errno +RH-Commit: [1/1] 20e51aac6767c1f89f74c7d692d1fb7689eff5f0 (cohuck/qemu-kvm-c9s) +RH-Bugzilla: 2095608 +RH-Acked-by: Eric Auger +RH-Acked-by: Thomas Huth +RH-Acked-by: Gavin Shan + +perror() is designed to append the decoded errno value to a +string. This, however, only makes sense if we called something that +actually sets errno prior to that. + +For the callers that check for split irqchip support that is not the +case, and we end up with confusing error messages that end in +"success". Use error_report() instead. + +Signed-off-by: Cornelia Huck +Message-Id: <20220728142446.438177-1-cohuck@redhat.com> +Signed-off-by: Paolo Bonzini + +https://bugzilla.redhat.com/show_bug.cgi?id=2095608 +(cherry picked from commit 47c182fe8b03c0c40059fb95840923e65c9bdb4f) +Signed-off-by: Cornelia Huck +--- + accel/kvm/kvm-all.c | 2 +- + target/arm/kvm.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5f1377ca04..e9c7947640 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2254,7 +2254,7 @@ static void kvm_irqchip_create(KVMState *s) + ret = kvm_arch_irqchip_create(s); + if (ret == 0) { + if (s->kernel_irqchip_split == ON_OFF_AUTO_ON) { +- perror("Split IRQ chip mode not supported."); ++ error_report("Split IRQ chip mode not supported."); + exit(1); + } else { + ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index bbf1ce7ba3..0a2ba1f8e3 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -960,7 +960,7 @@ void kvm_arch_init_irq_routing(KVMState *s) + int kvm_arch_irqchip_create(KVMState *s) + { + if (kvm_kernel_irqchip_split()) { +- perror("-machine kernel_irqchip=split is not supported on ARM."); ++ error_report("-machine kernel_irqchip=split is not supported on ARM."); + exit(1); + } + +-- +2.31.1 + diff --git a/kvm-meson-create-have_vhost_-variables.patch b/kvm-meson-create-have_vhost_-variables.patch new file mode 100644 index 0000000..fcae620 --- /dev/null +++ b/kvm-meson-create-have_vhost_-variables.patch @@ -0,0 +1,154 @@ +From 51c310097832724bafac26aed81399da40128400 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:50:43 +0200 +Subject: [PATCH 05/32] meson: create have_vhost_* variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [5/27] 3b30f89e6d639923dc9d9a92a4261bb4509e5c83 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 2a3129a37652e5e81d12f6e16dd3c447f09831f9 +Author: Paolo Bonzini +Date: Wed Apr 20 17:34:05 2022 +0200 + + meson: create have_vhost_* variables + + When using Meson options rather than config-host.h, the "when" clauses + have to be changed to if statements (which is not necessarily great, + though at least it highlights which parts of the build are per-target + and which are not). + + Do that before moving vhost logic to meson.build, though for now + the variables are just based on config-host.mak data. + + Reviewed-by: Marc-André Lureau + Signed-off-by: Paolo Bonzini + +Signed-off-by: Eugenio Pérez +--- + meson.build | 30 ++++++++++++++++++++---------- + tests/meson.build | 2 +- + tools/meson.build | 2 +- + 3 files changed, 22 insertions(+), 12 deletions(-) + +diff --git a/meson.build b/meson.build +index 13e3323380..735f538497 100644 +--- a/meson.build ++++ b/meson.build +@@ -298,6 +298,15 @@ have_tpm = get_option('tpm') \ + .require(targetos != 'windows', error_message: 'TPM emulation only available on POSIX systems') \ + .allowed() + ++# vhost ++have_vhost_user = 'CONFIG_VHOST_USER' in config_host ++have_vhost_vdpa = 'CONFIG_VHOST_VDPA' in config_host ++have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host ++have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host ++have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host ++have_vhost_net = 'CONFIG_VHOST_NET' in config_host ++have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host ++ + # Target-specific libraries and flags + libm = cc.find_library('m', required: false) + threads = dependency('threads') +@@ -1335,7 +1344,7 @@ has_statx_mnt_id = cc.links(statx_mnt_id_test) + have_vhost_user_blk_server = get_option('vhost_user_blk_server') \ + .require(targetos == 'linux', + error_message: 'vhost_user_blk_server requires linux') \ +- .require('CONFIG_VHOST_USER' in config_host, ++ .require(have_vhost_user, + error_message: 'vhost_user_blk_server requires vhost-user support') \ + .disable_auto_if(not have_system) \ + .allowed() +@@ -2116,9 +2125,9 @@ host_kconfig = \ + (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \ + ('CONFIG_OPENGL' in config_host ? ['CONFIG_OPENGL=y'] : []) + \ + (x11.found() ? ['CONFIG_X11=y'] : []) + \ +- ('CONFIG_VHOST_USER' in config_host ? ['CONFIG_VHOST_USER=y'] : []) + \ +- ('CONFIG_VHOST_VDPA' in config_host ? ['CONFIG_VHOST_VDPA=y'] : []) + \ +- ('CONFIG_VHOST_KERNEL' in config_host ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ ++ (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \ ++ (have_vhost_vdpa ? ['CONFIG_VHOST_VDPA=y'] : []) + \ ++ (have_vhost_kernel ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ + (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \ + ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \ + ('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : []) + \ +@@ -2799,7 +2808,7 @@ if have_system or have_user + endif + + vhost_user = not_found +-if targetos == 'linux' and 'CONFIG_VHOST_USER' in config_host ++if targetos == 'linux' and have_vhost_user + libvhost_user = subproject('libvhost-user') + vhost_user = libvhost_user.get_variable('vhost_user_dep') + endif +@@ -3386,7 +3395,7 @@ if have_tools + dependencies: qemuutil, + install: true) + +- if 'CONFIG_VHOST_USER' in config_host ++ if have_vhost_user + subdir('contrib/vhost-user-blk') + subdir('contrib/vhost-user-gpu') + subdir('contrib/vhost-user-input') +@@ -3516,15 +3525,16 @@ if 'simple' in get_option('trace_backends') + endif + summary_info += {'D-Bus display': dbus_display} + summary_info += {'QOM debugging': get_option('qom_cast_debug')} +-summary_info += {'vhost-kernel support': config_host.has_key('CONFIG_VHOST_KERNEL')} +-summary_info += {'vhost-net support': config_host.has_key('CONFIG_VHOST_NET')} +-summary_info += {'vhost-crypto support': config_host.has_key('CONFIG_VHOST_CRYPTO')} ++summary_info += {'vhost-kernel support': have_vhost_kernel} ++summary_info += {'vhost-net support': have_vhost_net} ++summary_info += {'vhost-user support': have_vhost_user} ++summary_info += {'vhost-user-crypto support': have_vhost_user_crypto} + summary_info += {'vhost-scsi support': config_host.has_key('CONFIG_VHOST_SCSI')} + summary_info += {'vhost-vsock support': config_host.has_key('CONFIG_VHOST_VSOCK')} +-summary_info += {'vhost-user support': config_host.has_key('CONFIG_VHOST_USER')} + summary_info += {'vhost-user-blk server support': have_vhost_user_blk_server} + summary_info += {'vhost-user-fs support': config_host.has_key('CONFIG_VHOST_USER_FS')} + summary_info += {'vhost-vdpa support': config_host.has_key('CONFIG_VHOST_VDPA')} ++summary_info += {'vhost-vdpa support': have_vhost_vdpa} + summary_info += {'build guest agent': have_ga} + summary(summary_info, bool_yn: true, section: 'Configurable features') + +diff --git a/tests/meson.build b/tests/meson.build +index 1d05109eb4..bbe41c8559 100644 +--- a/tests/meson.build ++++ b/tests/meson.build +@@ -70,7 +70,7 @@ test_deps = { + 'test-qht-par': qht_bench, + } + +-if have_tools and 'CONFIG_VHOST_USER' in config_host and 'CONFIG_LINUX' in config_host ++if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host + executable('vhost-user-bridge', + sources: files('vhost-user-bridge.c'), + dependencies: [qemuutil, vhost_user]) +diff --git a/tools/meson.build b/tools/meson.build +index 46977af84f..10eb3a043f 100644 +--- a/tools/meson.build ++++ b/tools/meson.build +@@ -3,7 +3,7 @@ have_virtiofsd = get_option('virtiofsd') \ + error_message: 'virtiofsd requires Linux') \ + .require(seccomp.found() and libcap_ng.found(), + error_message: 'virtiofsd requires libcap-ng-devel and seccomp-devel') \ +- .require('CONFIG_VHOST_USER' in config_host, ++ .require(have_vhost_user, + error_message: 'virtiofsd needs vhost-user-support') \ + .disable_auto_if(not have_tools and not have_system) \ + .allowed() +-- +2.31.1 + diff --git a/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch b/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch new file mode 100644 index 0000000..99d86c1 --- /dev/null +++ b/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch @@ -0,0 +1,213 @@ +From a7d57a09e33275d5e6649273b5c9da1bc3c92491 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:51:53 +0200 +Subject: [PATCH 06/32] meson: use have_vhost_* variables to pick sources +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [6/27] bc3db1efb759c0bc97fde2f4fbb3d6dc404c8d3d (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 43b6d7ee1fbc5b5fb7c85d8131fdac1863214ad6 +Author: Paolo Bonzini +Date: Wed Apr 20 17:34:06 2022 +0200 + + meson: use have_vhost_* variables to pick sources + + Reviewed-by: Marc-André Lureau + Signed-off-by: Paolo Bonzini + +Signed-off-by: Eugenio Pérez +--- + Kconfig.host | 3 --- + backends/meson.build | 8 ++++++-- + hw/net/meson.build | 8 ++++++-- + hw/virtio/Kconfig | 3 --- + hw/virtio/meson.build | 25 ++++++++++++++++--------- + meson.build | 1 + + net/meson.build | 12 +++++++----- + tests/qtest/meson.build | 4 +++- + 8 files changed, 39 insertions(+), 25 deletions(-) + +diff --git a/Kconfig.host b/Kconfig.host +index 60b9c07b5e..1165c4eacd 100644 +--- a/Kconfig.host ++++ b/Kconfig.host +@@ -22,15 +22,12 @@ config TPM + + config VHOST_USER + bool +- select VHOST + + config VHOST_VDPA + bool +- select VHOST + + config VHOST_KERNEL + bool +- select VHOST + + config VIRTFS + bool +diff --git a/backends/meson.build b/backends/meson.build +index 6e68945528..cb92f639ca 100644 +--- a/backends/meson.build ++++ b/backends/meson.build +@@ -12,9 +12,13 @@ softmmu_ss.add([files( + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c')) + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c')) + softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c')) +-softmmu_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_VIRTIO'], if_true: files('vhost-user.c')) ++if have_vhost_user ++ softmmu_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) ++endif + softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) +-softmmu_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VHOST_CRYPTO'], if_true: files('cryptodev-vhost-user.c')) ++if have_vhost_user_crypto ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) ++endif + softmmu_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus-vmstate.c'), gio]) + softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c')) + +diff --git a/hw/net/meson.build b/hw/net/meson.build +index 685b75badb..ebac261542 100644 +--- a/hw/net/meson.build ++++ b/hw/net/meson.build +@@ -46,8 +46,12 @@ specific_ss.add(when: 'CONFIG_XILINX_ETHLITE', if_true: files('xilinx_ethlite.c' + softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('net_rx_pkt.c')) + specific_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('virtio-net.c')) + +-softmmu_ss.add(when: ['CONFIG_VIRTIO_NET', 'CONFIG_VHOST_NET'], if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) +-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) ++if have_vhost_net ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) ++ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) ++else ++ softmmu_ss.add(files('vhost_net-stub.c')) ++endif + + softmmu_ss.add(when: 'CONFIG_ETSEC', if_true: files( + 'fsl_etsec/etsec.c', +diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig +index c144d42f9b..8ca7b3d9d6 100644 +--- a/hw/virtio/Kconfig ++++ b/hw/virtio/Kconfig +@@ -1,6 +1,3 @@ +-config VHOST +- bool +- + config VIRTIO + bool + +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index 67dc77e00f..30a832eb4a 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -2,18 +2,22 @@ softmmu_virtio_ss = ss.source_set() + softmmu_virtio_ss.add(files('virtio-bus.c')) + softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) + softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) +-softmmu_virtio_ss.add(when: 'CONFIG_VHOST', if_false: files('vhost-stub.c')) +- +-softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) +-softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) +- +-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) + + virtio_ss = ss.source_set() + virtio_ss.add(files('virtio.c')) +-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) +-virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c')) +-virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-shadow-virtqueue.c', 'vhost-vdpa.c')) ++ ++if have_vhost ++ virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) ++ if have_vhost_user ++ virtio_ss.add(files('vhost-user.c')) ++ endif ++ if have_vhost_vdpa ++ virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c')) ++ endif ++else ++ softmmu_virtio_ss.add(files('vhost-stub.c')) ++endif ++ + virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) + virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) + virtio_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VIRTIO_PCI'], if_true: files('virtio-crypto-pci.c')) +@@ -53,3 +57,6 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) + virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) + + specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: virtio_ss) ++softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) ++softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) ++softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) +diff --git a/meson.build b/meson.build +index 735f538497..9ba675f098 100644 +--- a/meson.build ++++ b/meson.build +@@ -305,6 +305,7 @@ have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host + have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host + have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host + have_vhost_net = 'CONFIG_VHOST_NET' in config_host ++have_vhost = have_vhost_user or have_vhost_vdpa or have_vhost_kernel + have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host + + # Target-specific libraries and flags +diff --git a/net/meson.build b/net/meson.build +index 847bc2ac85..c965e83b26 100644 +--- a/net/meson.build ++++ b/net/meson.build +@@ -26,10 +26,10 @@ softmmu_ss.add(when: vde, if_true: files('vde.c')) + if have_netmap + softmmu_ss.add(files('netmap.c')) + endif +-vhost_user_ss = ss.source_set() +-vhost_user_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) +-softmmu_ss.add_all(when: 'CONFIG_VHOST_NET_USER', if_true: vhost_user_ss) +-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) ++if have_vhost_net_user ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) ++ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) ++endif + + softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('tap-linux.c')) + softmmu_ss.add(when: 'CONFIG_BSD', if_true: files('tap-bsd.c')) +@@ -40,6 +40,8 @@ if not config_host.has_key('CONFIG_LINUX') and not config_host.has_key('CONFIG_B + endif + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) + softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) +-softmmu_ss.add(when: 'CONFIG_VHOST_NET_VDPA', if_true: files('vhost-vdpa.c')) ++if have_vhost_net_vdpa ++ softmmu_ss.add(files('vhost-vdpa.c')) ++endif + + subdir('can') +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index 67cd32def1..9f550df900 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -269,7 +269,9 @@ qos_test_ss.add( + if have_virtfs + qos_test_ss.add(files('virtio-9p-test.c')) + endif +-qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c')) ++if have_vhost_user ++ qos_test_ss.add(files('vhost-user-test.c')) ++endif + if have_tools and have_vhost_user_blk_server + qos_test_ss.add(files('vhost-user-blk-test.c')) + endif +-- +2.31.1 + diff --git a/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch b/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch new file mode 100644 index 0000000..ea89a9f --- /dev/null +++ b/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch @@ -0,0 +1,142 @@ +From 1d280070748b604c60a7be4d4c3c3a28e3964f37 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 2 Aug 2022 10:11:21 +0200 +Subject: [PATCH 31/32] multifd: Copy pages before compressing them with zlib + +RH-Author: Thomas Huth +RH-MergeRequest: 112: Fix postcopy migration on s390x +RH-Commit: [1/2] fd5a0221e22b4563bd1cb7f8a8b95f0bfe8f5fc9 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2099934 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 + +zlib_send_prepare() compresses pages of a running VM. zlib does not +make any thread-safety guarantees with respect to changing deflate() +input concurrently with deflate() [1]. + +One can observe problems due to this with the IBM zEnterprise Data +Compression accelerator capable zlib [2]. When the hardware +acceleration is enabled, migration/multifd/tcp/plain/zlib test fails +intermittently [3] due to sliding window corruption. The accelerator's +architecture explicitly discourages concurrent accesses [4]: + + Page 26-57, "Other Conditions": + + As observed by this CPU, other CPUs, and channel + programs, references to the parameter block, first, + second, and third operands may be multiple-access + references, accesses to these storage locations are + not necessarily block-concurrent, and the sequence + of these accesses or references is undefined. + +Mark Adler pointed out that vanilla zlib performs double fetches under +certain circumstances as well [5], therefore we need to copy data +before passing it to deflate(). + +[1] https://zlib.net/manual.html +[2] https://github.com/madler/zlib/pull/410 +[3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html +[4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf +[5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html + +Signed-off-by: Ilya Leoshkevich +Message-Id: <20220705203559.2960949-1-iii@linux.ibm.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 007e179ef0e97eafda4c9ff2a9d665a1947c7c6d) +Signed-off-by: Thomas Huth +--- + migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++-------- + 1 file changed, 30 insertions(+), 8 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 3a7ae44485..18213a9513 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -27,6 +27,8 @@ struct zlib_data { + uint8_t *zbuff; + /* size of compressed buffer */ + uint32_t zbuff_len; ++ /* uncompressed buffer of size qemu_target_page_size() */ ++ uint8_t *buf; + }; + + /* Multifd zlib compression */ +@@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + { + struct zlib_data *z = g_new0(struct zlib_data, 1); + z_stream *zs = &z->zs; ++ const char *err_msg; + + zs->zalloc = Z_NULL; + zs->zfree = Z_NULL; + zs->opaque = Z_NULL; + if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { +- g_free(z); +- error_setg(errp, "multifd %u: deflate init failed", p->id); +- return -1; ++ err_msg = "deflate init failed"; ++ goto err_free_z; + } + /* This is the maxium size of the compressed buffer */ + z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { +- deflateEnd(&z->zs); +- g_free(z); +- error_setg(errp, "multifd %u: out of memory for zbuff", p->id); +- return -1; ++ err_msg = "out of memory for zbuff"; ++ goto err_deflate_end; ++ } ++ z->buf = g_try_malloc(qemu_target_page_size()); ++ if (!z->buf) { ++ err_msg = "out of memory for buf"; ++ goto err_free_zbuff; + } + p->data = z; + return 0; ++ ++err_free_zbuff: ++ g_free(z->zbuff); ++err_deflate_end: ++ deflateEnd(&z->zs); ++err_free_z: ++ g_free(z); ++ error_setg(errp, "multifd %u: %s", p->id, err_msg); ++ return -1; + } + + /** +@@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + deflateEnd(&z->zs); + g_free(z->zbuff); + z->zbuff = NULL; ++ g_free(z->buf); ++ z->buf = NULL; + g_free(p->data); + p->data = NULL; + } +@@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + flush = Z_SYNC_FLUSH; + } + ++ /* ++ * Since the VM might be running, the page may be changing concurrently ++ * with compression. zlib does not guarantee that this is safe, ++ * therefore copy the page before calling deflate(). ++ */ ++ memcpy(z->buf, p->pages->block->host + p->normal[i], page_size); + zs->avail_in = page_size; +- zs->next_in = p->pages->block->host + p->normal[i]; ++ zs->next_in = z->buf; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; +-- +2.31.1 + diff --git a/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch b/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch new file mode 100644 index 0000000..f027c45 --- /dev/null +++ b/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch @@ -0,0 +1,106 @@ +From 236f216309261bc924e49014267998fdc2ef7f46 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 Jul 2022 16:55:34 +0200 +Subject: [PATCH 28/32] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6 + +RH-Author: Thomas Huth +RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions +RH-Commit: [1/2] f306d7ff8efa64b14158388b95815ac556a25d8a (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2111994 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Claudio Imbrenda + +Upstream Status: RHEL-only +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 + +Based on upstream commit e4082063e47e9731dbeb1c26174c17f6038f577f +("linux-headers: Update to v5.18-rc6"), but this is focusing on +the file linux-headers/linux/kvm.h only (since the other changes +related to the VFIO renaming might break some stuff). + +Signed-off-by: Thomas Huth +--- + linux-headers/linux/kvm.h | 27 +++++++++++++++++++++------ + 1 file changed, 21 insertions(+), 6 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index d232feaae9..0d05d02ee4 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -445,7 +445,11 @@ struct kvm_run { + #define KVM_SYSTEM_EVENT_RESET 2 + #define KVM_SYSTEM_EVENT_CRASH 3 + __u32 type; +- __u64 flags; ++ __u32 ndata; ++ union { ++ __u64 flags; ++ __u64 data[16]; ++ }; + } system_event; + /* KVM_EXIT_S390_STSI */ + struct { +@@ -562,9 +566,12 @@ struct kvm_s390_mem_op { + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { +- __u8 ar; /* the access register number */ ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ }; + __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* should be set to 0 */ ++ __u8 reserved[32]; /* ignored */ + }; + }; + /* types for kvm_s390_mem_op->op */ +@@ -572,9 +579,12 @@ struct kvm_s390_mem_op { + #define KVM_S390_MEMOP_LOGICAL_WRITE 1 + #define KVM_S390_MEMOP_SIDA_READ 2 + #define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 + /* flags for kvm_s390_mem_op->flags */ + #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) + #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + + /* for KVM_INTERRUPT */ + struct kvm_interrupt { +@@ -1134,6 +1144,12 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_GPA_BITS 207 + #define KVM_CAP_XSAVE2 208 + #define KVM_CAP_SYS_ATTRIBUTES 209 ++#define KVM_CAP_PPC_AIL_MODE_3 210 ++#define KVM_CAP_S390_MEM_OP_EXTENSION 211 ++#define KVM_CAP_PMU_CAPABILITY 212 ++#define KVM_CAP_DISABLE_QUIRKS2 213 ++/* #define KVM_CAP_VM_TSC_CONTROL 214 */ ++#define KVM_CAP_SYSTEM_EVENT_DATA 215 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1624,9 +1640,6 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +-/* Available with KVM_CAP_XSAVE2 */ +-#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +- + struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +@@ -1973,6 +1986,8 @@ struct kvm_dirty_gfn { + #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) + #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + ++#define KVM_PMU_CAP_DISABLE (1 << 0) ++ + /** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. +-- +2.31.1 + diff --git a/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch b/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch new file mode 100644 index 0000000..61752c7 --- /dev/null +++ b/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch @@ -0,0 +1,103 @@ +From 27c1d979a994f5afc59c3520af58d15aa5aae723 Mon Sep 17 00:00:00 2001 +From: Janis Schoetterl-Glausch +Date: Fri, 6 May 2022 17:39:56 +0200 +Subject: [PATCH 29/32] target/s390x: kvm: Honor storage keys during emulation + +RH-Author: Thomas Huth +RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions +RH-Commit: [2/2] 346dee1e13bfe1c074e4c6a4417091711d852f9c (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2111994 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Claudio Imbrenda + +Storage key controlled protection is currently not honored when +emulating instructions. +If available, enable key protection for the MEM_OP ioctl, thereby +enabling it for the s390_cpu_virt_mem_* functions, when using kvm. +As a result, the emulation of the following instructions honors storage +keys: + +* CLP + The Synch I/O CLP command would need special handling in order + to support storage keys, but is currently not supported. +* CHSC + Performing commands asynchronously would require special + handling, but commands are currently always synchronous. +* STSI +* TSCH + Must (and does) not change channel if terminated due to + protection. +* MSCH + Suppressed on protection, works because fetching instruction. +* SSCH + Suppressed on protection, works because fetching instruction. +* STSCH +* STCRW + Suppressed on protection, this works because no partial store is + possible, because the operand cannot span multiple pages. +* PCISTB +* MPCIFC +* STPCIFC + +Signed-off-by: Janis Schoetterl-Glausch +Message-Id: <20220506153956.2217601-3-scgl@linux.ibm.com> +Signed-off-by: Thomas Huth + +(cherry picked from commit 54354861d21b69ec0781f43e67b8d4f6edad7e3f) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 +Signed-off-by: Thomas Huth +--- + target/s390x/kvm/kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 74f089d87f..1f1d1a33b8 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -152,12 +152,15 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + static int cap_sync_regs; + static int cap_async_pf; + static int cap_mem_op; ++static int cap_mem_op_extension; + static int cap_s390_irq; + static int cap_ri; + static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; + ++static bool mem_op_storage_key_support; ++ + static int active_cmma; + + static int kvm_s390_query_mem_limit(uint64_t *memory_limit) +@@ -355,6 +358,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); + cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); + cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); ++ cap_mem_op_extension = kvm_check_extension(s, KVM_CAP_S390_MEM_OP_EXTENSION); ++ mem_op_storage_key_support = cap_mem_op_extension > 0; + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); +@@ -843,6 +848,7 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + : KVM_S390_MEMOP_LOGICAL_READ, + .buf = (uint64_t)hostbuf, + .ar = ar, ++ .key = (cpu->env.psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY, + }; + int ret; + +@@ -852,6 +858,9 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + if (!hostbuf) { + mem_op.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; + } ++ if (mem_op_storage_key_support) { ++ mem_op.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; ++ } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); + if (ret < 0) { +-- +2.31.1 + diff --git a/kvm-vdpa-Add-device-migration-blocker.patch b/kvm-vdpa-Add-device-migration-blocker.patch new file mode 100644 index 0000000..1b83c98 --- /dev/null +++ b/kvm-vdpa-Add-device-migration-blocker.patch @@ -0,0 +1,106 @@ +From 8e0fdce814af4cfc84dce5e5920da989b1f1a86d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:06:05 +0200 +Subject: [PATCH 26/32] vdpa: Add device migration blocker +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [26/27] 53d94d45b5e5e88f12b95f9b0f243696cfcbd7ce (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit c156d5bf2b142dcc06808ccee06882144f230aec +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:45 2022 +0200 + + vdpa: Add device migration blocker + + Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if + it uses CVQ. + + However, qemu is able to migrate simple devices with no CVQ as long as + they use SVQ. To allow it, add a placeholder error to vhost_vdpa, and + only add to vhost_dev when used. vhost_dev machinery place the migration + blocker if needed. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-vdpa.c | 15 +++++++++++++++ + include/hw/virtio/vhost-vdpa.h | 1 + + 2 files changed, 16 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 49effe5462..e3e5bce4bb 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -20,6 +20,7 @@ + #include "hw/virtio/vhost-shadow-virtqueue.h" + #include "hw/virtio/vhost-vdpa.h" + #include "exec/address-spaces.h" ++#include "migration/blocker.h" + #include "qemu/main-loop.h" + #include "cpu.h" + #include "trace.h" +@@ -1020,6 +1021,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + return true; + } + ++ if (v->migration_blocker) { ++ int r = migrate_add_blocker(v->migration_blocker, &err); ++ if (unlikely(r < 0)) { ++ return false; ++ } ++ } ++ + for (i = 0; i < v->shadow_vqs->len; ++i) { + VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); +@@ -1062,6 +1070,10 @@ err: + vhost_svq_stop(svq); + } + ++ if (v->migration_blocker) { ++ migrate_del_blocker(v->migration_blocker); ++ } ++ + return false; + } + +@@ -1081,6 +1093,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) + } + } + ++ if (v->migration_blocker) { ++ migrate_del_blocker(v->migration_blocker); ++ } + return true; + } + +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index 1111d85643..d10a89303e 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -35,6 +35,7 @@ typedef struct vhost_vdpa { + bool shadow_vqs_enabled; + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; ++ Error *migration_blocker; + GPtrArray *shadow_vqs; + const VhostShadowVirtqueueOps *shadow_vq_ops; + void *shadow_vq_ops_opaque; +-- +2.31.1 + diff --git a/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch b/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch new file mode 100644 index 0000000..8a7b600 --- /dev/null +++ b/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch @@ -0,0 +1,223 @@ +From 0b27781f9984c67625c49a516c3e38fbf5fa1b1b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:06:16 +0200 +Subject: [PATCH 27/32] vdpa: Add x-svq to NetdevVhostVDPAOptions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [27/27] bd85496c2a8c1ebf34f908fca2be2ab9852fd0e9 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 1576dbb5bbc49344c606e969ec749be70c0fd94e +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:46 2022 +0200 + + vdpa: Add x-svq to NetdevVhostVDPAOptions + + Finally offering the possibility to enable SVQ from the command line. + + Signed-off-by: Eugenio Pérez + Acked-by: Markus Armbruster + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++-- + qapi/net.json | 9 +++++- + 2 files changed, 77 insertions(+), 4 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 8b76dac966..50672bcd66 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = { + VHOST_INVALID_FEATURE_BIT + }; + ++/** Supported device specific feature bits with SVQ */ ++static const uint64_t vdpa_svq_device_features = ++ BIT_ULL(VIRTIO_NET_F_CSUM) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | ++ BIT_ULL(VIRTIO_NET_F_MTU) | ++ BIT_ULL(VIRTIO_NET_F_MAC) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | ++ BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | ++ BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | ++ BIT_ULL(VIRTIO_NET_F_HOST_ECN) | ++ BIT_ULL(VIRTIO_NET_F_HOST_UFO) | ++ BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | ++ BIT_ULL(VIRTIO_NET_F_STATUS) | ++ BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | ++ BIT_ULL(VIRTIO_F_ANY_LAYOUT) | ++ BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | ++ BIT_ULL(VIRTIO_NET_F_RSC_EXT) | ++ BIT_ULL(VIRTIO_NET_F_STANDBY); ++ + VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +@@ -133,9 +155,13 @@ err_init: + static void vhost_vdpa_cleanup(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ struct vhost_dev *dev = &s->vhost_net->dev; + + qemu_vfree(s->cvq_cmd_out_buffer); + qemu_vfree(s->cvq_cmd_in_buffer); ++ if (dev->vq_index + dev->nvqs == dev->vq_index_end) { ++ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); ++ } + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); +@@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + int vdpa_device_fd, + int queue_pair_index, + int nvqs, +- bool is_datapath) ++ bool is_datapath, ++ bool svq, ++ VhostIOVATree *iova_tree) + { + NetClientState *nc = NULL; + VhostVDPAState *s; +@@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; ++ s->vhost_vdpa.shadow_vqs_enabled = svq; ++ s->vhost_vdpa.iova_tree = iova_tree; + if (!is_datapath) { + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, + vhost_vdpa_net_cvq_cmd_page_len()); +@@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; ++ error_setg(&s->vhost_vdpa.migration_blocker, ++ "Migration disabled: vhost-vdpa uses CVQ."); + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +@@ -474,6 +506,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + return nc; + } + ++static int vhost_vdpa_get_iova_range(int fd, ++ struct vhost_vdpa_iova_range *iova_range) ++{ ++ int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); ++ ++ return ret < 0 ? -errno : 0; ++} ++ + static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) + { + int ret = ioctl(fd, VHOST_GET_FEATURES, features); +@@ -524,6 +564,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + uint64_t features; + int vdpa_device_fd; + g_autofree NetClientState **ncs = NULL; ++ g_autoptr(VhostIOVATree) iova_tree = NULL; + NetClientState *nc; + int queue_pairs, r, i, has_cvq = 0; + +@@ -551,22 +592,45 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + return queue_pairs; + } + ++ if (opts->x_svq) { ++ struct vhost_vdpa_iova_range iova_range; ++ ++ uint64_t invalid_dev_features = ++ features & ~vdpa_svq_device_features & ++ /* Transport are all accepted at this point */ ++ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, ++ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); ++ ++ if (invalid_dev_features) { ++ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, ++ invalid_dev_features); ++ goto err_svq; ++ } ++ ++ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); ++ iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); ++ } ++ + ncs = g_malloc0(sizeof(*ncs) * queue_pairs); + + for (i = 0; i < queue_pairs; i++) { + ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, +- vdpa_device_fd, i, 2, true); ++ vdpa_device_fd, i, 2, true, opts->x_svq, ++ iova_tree); + if (!ncs[i]) + goto err; + } + + if (has_cvq) { + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, +- vdpa_device_fd, i, 1, false); ++ vdpa_device_fd, i, 1, false, ++ opts->x_svq, iova_tree); + if (!nc) + goto err; + } + ++ /* iova_tree ownership belongs to last NetClientState */ ++ g_steal_pointer(&iova_tree); + return 0; + + err: +@@ -575,6 +639,8 @@ err: + qemu_del_net_client(ncs[i]); + } + } ++ ++err_svq: + qemu_close(vdpa_device_fd); + + return -1; +diff --git a/qapi/net.json b/qapi/net.json +index b92f3f5fb4..92848e4362 100644 +--- a/qapi/net.json ++++ b/qapi/net.json +@@ -445,12 +445,19 @@ + # @queues: number of queues to be created for multiqueue vhost-vdpa + # (default: 1) + # ++# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1) ++# (default: false) ++# ++# Features: ++# @unstable: Member @x-svq is experimental. ++# + # Since: 5.1 + ## + { 'struct': 'NetdevVhostVDPAOptions', + 'data': { + '*vhostdev': 'str', +- '*queues': 'int' } } ++ '*queues': 'int', ++ '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } } + + ## + # @NetClientDriver: +-- +2.31.1 + diff --git a/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch b/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch new file mode 100644 index 0000000..acd45e0 --- /dev/null +++ b/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch @@ -0,0 +1,65 @@ +From df06ce560ddfefde98bef822ec2020382059921f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 10/32] vdpa: Avoid compiler to squash reads to used idx +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [10/27] b28789302d4f64749da26f413763f918161d9b70 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit c381abc37f0aba42ed2e3b41cdace8f8438829e4 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:29 2022 +0200 + + vdpa: Avoid compiler to squash reads to used idx + + In the next patch we will allow busypolling of this value. The compiler + have a running path where shadow_used_idx, last_used_idx, and vring used + idx are not modified within the same thread busypolling. + + This was not an issue before since we always cleared device event + notifier before checking it, and that could act as memory barrier. + However, the busypoll needs something similar to kernel READ_ONCE. + + Let's add it here, sepparated from the polling. + + Signed-off-by: Eugenio Pérez + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3fbda1e3d4..9c46c3a8fa 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -327,11 +327,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n) + + static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) + { ++ uint16_t *used_idx = &svq->vring.used->idx; + if (svq->last_used_idx != svq->shadow_used_idx) { + return true; + } + +- svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); ++ svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); + + return svq->last_used_idx != svq->shadow_used_idx; + } +-- +2.31.1 + diff --git a/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch b/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch new file mode 100644 index 0000000..243aec8 --- /dev/null +++ b/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch @@ -0,0 +1,323 @@ +From 881945094c0e4d33614d40959bfc20e395f5a478 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:05:40 +0200 +Subject: [PATCH 24/32] vdpa: Buffer CVQ support on shadow virtqueue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [24/27] 5486f80141a3ad968a32e782bdcdead32f417352 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 2df4dd31e194c94da7d28c02e92449f4a989fca9 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:43 2022 +0200 + + vdpa: Buffer CVQ support on shadow virtqueue + + Introduce the control virtqueue support for vDPA shadow virtqueue. This + is needed for advanced networking features like rx filtering. + + Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid + TOCTOU with the guest's or device's memory every time there is a device + model change. Otherwise, the guest could change the memory content in + the time between qemu and the device read it. + + To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is + implemented. If the virtio-net driver changes MAC the virtio-net device + model will be updated with the new one, and a rx filtering change event + will be raised. + + More cvq commands could be added here straightforwardly but they have + not been tested. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 213 +++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 205 insertions(+), 8 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 2e3b6b10d8..df42822463 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -33,6 +33,9 @@ typedef struct VhostVDPAState { + NetClientState nc; + struct vhost_vdpa vhost_vdpa; + VHostNetState *vhost_net; ++ ++ /* Control commands shadow buffers */ ++ void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; + bool started; + } VhostVDPAState; + +@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + ++ qemu_vfree(s->cvq_cmd_out_buffer); ++ qemu_vfree(s->cvq_cmd_in_buffer); + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); +@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) ++{ ++ VhostIOVATree *tree = v->iova_tree; ++ DMAMap needle = { ++ /* ++ * No need to specify size or to look for more translations since ++ * this contiguous chunk was allocated by us. ++ */ ++ .translated_addr = (hwaddr)(uintptr_t)addr, ++ }; ++ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); ++ int r; ++ ++ if (unlikely(!map)) { ++ error_report("Cannot locate expected map"); ++ return; ++ } ++ ++ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); ++ if (unlikely(r != 0)) { ++ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); ++ } ++ ++ vhost_iova_tree_remove(tree, map); ++} ++ ++static size_t vhost_vdpa_net_cvq_cmd_len(void) ++{ ++ /* ++ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. ++ * In buffer is always 1 byte, so it should fit here ++ */ ++ return sizeof(struct virtio_net_ctrl_hdr) + ++ 2 * sizeof(struct virtio_net_ctrl_mac) + ++ MAC_TABLE_ENTRIES * ETH_ALEN; ++} ++ ++static size_t vhost_vdpa_net_cvq_cmd_page_len(void) ++{ ++ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size); ++} ++ ++/** Copy and map a guest buffer. */ ++static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, ++ const struct iovec *out_data, ++ size_t out_num, size_t data_len, void *buf, ++ size_t *written, bool write) ++{ ++ DMAMap map = {}; ++ int r; ++ ++ if (unlikely(!data_len)) { ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", ++ __func__, write ? "in" : "out"); ++ return false; ++ } ++ ++ *written = iov_to_buf(out_data, out_num, 0, buf, data_len); ++ map.translated_addr = (hwaddr)(uintptr_t)buf; ++ map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; ++ map.perm = write ? IOMMU_RW : IOMMU_RO, ++ r = vhost_iova_tree_map_alloc(v->iova_tree, &map); ++ if (unlikely(r != IOVA_OK)) { ++ error_report("Cannot map injected element"); ++ return false; ++ } ++ ++ r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, ++ !write); ++ if (unlikely(r < 0)) { ++ goto dma_map_err; ++ } ++ ++ return true; ++ ++dma_map_err: ++ vhost_iova_tree_remove(v->iova_tree, &map); ++ return false; ++} ++ + /** +- * Forward buffer for the moment. ++ * Copy the guest element into a dedicated buffer suitable to be sent to NIC ++ * ++ * @iov: [0] is the out buffer, [1] is the in one ++ */ ++static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, ++ VirtQueueElement *elem, ++ struct iovec *iov) ++{ ++ size_t in_copied; ++ bool ok; ++ ++ iov[0].iov_base = s->cvq_cmd_out_buffer; ++ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, ++ vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, ++ &iov[0].iov_len, false); ++ if (unlikely(!ok)) { ++ return false; ++ } ++ ++ iov[1].iov_base = s->cvq_cmd_in_buffer; ++ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, ++ sizeof(virtio_net_ctrl_ack), iov[1].iov_base, ++ &in_copied, true); ++ if (unlikely(!ok)) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); ++ return false; ++ } ++ ++ iov[1].iov_len = sizeof(virtio_net_ctrl_ack); ++ return true; ++} ++ ++/** ++ * Do not forward commands not supported by SVQ. Otherwise, the device could ++ * accept it and qemu would not know how to update the device model. ++ */ ++static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, ++ size_t out_num) ++{ ++ struct virtio_net_ctrl_hdr ctrl; ++ size_t n; ++ ++ n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); ++ if (unlikely(n < sizeof(ctrl))) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: invalid legnth of out buffer %zu\n", __func__, n); ++ return false; ++ } ++ ++ switch (ctrl.class) { ++ case VIRTIO_NET_CTRL_MAC: ++ switch (ctrl.cmd) { ++ case VIRTIO_NET_CTRL_MAC_ADDR_SET: ++ return true; ++ default: ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n", ++ __func__, ctrl.cmd); ++ }; ++ break; ++ default: ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", ++ __func__, ctrl.class); ++ }; ++ ++ return false; ++} ++ ++/** ++ * Validate and copy control virtqueue commands. ++ * ++ * Following QEMU guidelines, we offer a copy of the buffers to the device to ++ * prevent TOCTOU bugs. + */ + static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + VirtQueueElement *elem, + void *opaque) + { +- unsigned int n = elem->out_num + elem->in_num; +- g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); ++ VhostVDPAState *s = opaque; + size_t in_len, dev_written; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; +- int r; ++ /* out and in buffers sent to the device */ ++ struct iovec dev_buffers[2] = { ++ { .iov_base = s->cvq_cmd_out_buffer }, ++ { .iov_base = s->cvq_cmd_in_buffer }, ++ }; ++ /* in buffer used for device model */ ++ const struct iovec in = { ++ .iov_base = &status, ++ .iov_len = sizeof(status), ++ }; ++ int r = -EINVAL; ++ bool ok; ++ ++ ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); ++ if (unlikely(!ok)) { ++ goto out; ++ } + +- memcpy(dev_buffers, elem->out_sg, elem->out_num); +- memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); ++ ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); ++ if (unlikely(!ok)) { ++ goto out; ++ } + +- r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], +- elem->in_num, elem); ++ r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); + if (unlikely(r != 0)) { + if (unlikely(r == -ENOSPC)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", +@@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + dev_written = vhost_svq_poll(svq); + if (unlikely(dev_written < sizeof(status))) { + error_report("Insufficient written data (%zu)", dev_written); ++ goto out; ++ } ++ ++ memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); ++ if (status != VIRTIO_NET_OK) { ++ goto out; ++ } ++ ++ status = VIRTIO_NET_ERR; ++ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); ++ if (status != VIRTIO_NET_OK) { ++ error_report("Bad CVQ processing in model"); + } + + out: +@@ -234,6 +418,12 @@ out: + } + vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); + g_free(elem); ++ if (dev_buffers[0].iov_base) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); ++ } ++ if (dev_buffers[1].iov_base) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); ++ } + return r; + } + +@@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; + if (!is_datapath) { ++ s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, ++ vhost_vdpa_net_cvq_cmd_page_len()); ++ memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); ++ s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size, ++ vhost_vdpa_net_cvq_cmd_page_len()); ++ memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); ++ + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; + } +-- +2.31.1 + diff --git a/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch b/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch new file mode 100644 index 0000000..d6e72ac --- /dev/null +++ b/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch @@ -0,0 +1,84 @@ +From 3a5d325fcb2958318262efac31d5fd25fb062523 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 21/32] vdpa: Export vhost_vdpa_dma_map and unmap calls +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [21/27] 97e7a583bbd3c12a0786d53132812ec41702c190 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 463ba1e3b8cf080812895c5f26d95d8d7db2e692 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:40 2022 +0200 + + vdpa: Export vhost_vdpa_dma_map and unmap calls + + Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from + the guest that could set a different state in qemu device model and vdpa + device. + + To do so, it needs to be able to map these new buffers to the device. + + Signed-off-by: Eugenio Pérez + Acked-by: Jason Wang + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-vdpa.c | 7 +++---- + include/hw/virtio/vhost-vdpa.h | 4 ++++ + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 28df57b12e..14b02fe079 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -71,8 +71,8 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, + return false; + } + +-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, +- void *vaddr, bool readonly) ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, ++ void *vaddr, bool readonly) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; +@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, + return ret; + } + +-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, +- hwaddr size) ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index a29dbb3f53..7214eb47dc 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -39,4 +39,8 @@ typedef struct vhost_vdpa { + VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; + } VhostVDPA; + ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, ++ void *vaddr, bool readonly); ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); ++ + #endif +-- +2.31.1 + diff --git a/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch b/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch new file mode 100644 index 0000000..44e97af --- /dev/null +++ b/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch @@ -0,0 +1,108 @@ +From 9a290bd74f983f3a65aa9ec5df2da9aa94bfdecd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:05:42 +0200 +Subject: [PATCH 25/32] vdpa: Extract get features part from + vhost_vdpa_get_max_queue_pairs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [25/27] 654ad68e10a4df84cced923c64e72d500721ad67 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 8170ab3f43989680491d00f1017f60b25d346114 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:44 2022 +0200 + + vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs + + To know the device features is needed for CVQ SVQ, so SVQ knows if it + can handle all commands or not. Extract from + vhost_vdpa_get_max_queue_pairs so we can reuse it. + + Signed-off-by: Eugenio Pérez + Acked-by: Jason Wang + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 30 ++++++++++++++++++++---------- + 1 file changed, 20 insertions(+), 10 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index df42822463..8b76dac966 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -474,20 +474,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + return nc; + } + +-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) ++static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) ++{ ++ int ret = ioctl(fd, VHOST_GET_FEATURES, features); ++ if (unlikely(ret < 0)) { ++ error_setg_errno(errp, errno, ++ "Fail to query features from vhost-vDPA device"); ++ } ++ return ret; ++} ++ ++static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, ++ int *has_cvq, Error **errp) + { + unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); + g_autofree struct vhost_vdpa_config *config = NULL; + __virtio16 *max_queue_pairs; +- uint64_t features; + int ret; + +- ret = ioctl(fd, VHOST_GET_FEATURES, &features); +- if (ret) { +- error_setg(errp, "Fail to query features from vhost-vDPA device"); +- return ret; +- } +- + if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { + *has_cvq = 1; + } else { +@@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) + { + const NetdevVhostVDPAOptions *opts; ++ uint64_t features; + int vdpa_device_fd; + g_autofree NetClientState **ncs = NULL; + NetClientState *nc; +- int queue_pairs, i, has_cvq = 0; ++ int queue_pairs, r, i, has_cvq = 0; + + assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); + opts = &netdev->u.vhost_vdpa; +@@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + return -errno; + } + +- queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, ++ r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); ++ if (unlikely(r < 0)) { ++ return r; ++ } ++ ++ queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, + &has_cvq, errp); + if (queue_pairs < 0) { + qemu_close(vdpa_device_fd); +-- +2.31.1 + diff --git a/kvm-vdpa-manual-forward-CVQ-buffers.patch b/kvm-vdpa-manual-forward-CVQ-buffers.patch new file mode 100644 index 0000000..61909ff --- /dev/null +++ b/kvm-vdpa-manual-forward-CVQ-buffers.patch @@ -0,0 +1,166 @@ +From c33bc0b7f2b5cfa330a6d89d60ee94de129c65c1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:05:38 +0200 +Subject: [PATCH 23/32] vdpa: manual forward CVQ buffers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [23/27] ce128d5152be7eebf87e186eb8b58c2ed95aff6d (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit bd907ae4b00ebedad5e586af05ea3d6490318d45 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:42 2022 +0200 + + vdpa: manual forward CVQ buffers + + Do a simple forwarding of CVQ buffers, the same work SVQ could do but + through callbacks. No functional change intended. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-vdpa.c | 3 +- + include/hw/virtio/vhost-vdpa.h | 3 ++ + net/vhost-vdpa.c | 58 ++++++++++++++++++++++++++++++++++ + 3 files changed, 63 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 14b02fe079..49effe5462 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -417,7 +417,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + for (unsigned n = 0; n < hdev->nvqs; ++n) { + g_autoptr(VhostShadowVirtqueue) svq; + +- svq = vhost_svq_new(v->iova_tree, NULL, NULL); ++ svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, ++ v->shadow_vq_ops_opaque); + if (unlikely(!svq)) { + error_setg(errp, "Cannot create svq %u", n); + return -1; +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index 7214eb47dc..1111d85643 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -15,6 +15,7 @@ + #include + + #include "hw/virtio/vhost-iova-tree.h" ++#include "hw/virtio/vhost-shadow-virtqueue.h" + #include "hw/virtio/virtio.h" + #include "standard-headers/linux/vhost_types.h" + +@@ -35,6 +36,8 @@ typedef struct vhost_vdpa { + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; + GPtrArray *shadow_vqs; ++ const VhostShadowVirtqueueOps *shadow_vq_ops; ++ void *shadow_vq_ops_opaque; + struct vhost_dev *dev; + VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; + } VhostVDPA; +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index df1e69ee72..2e3b6b10d8 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -11,11 +11,14 @@ + + #include "qemu/osdep.h" + #include "clients.h" ++#include "hw/virtio/virtio-net.h" + #include "net/vhost_net.h" + #include "net/vhost-vdpa.h" + #include "hw/virtio/vhost-vdpa.h" + #include "qemu/config-file.h" + #include "qemu/error-report.h" ++#include "qemu/log.h" ++#include "qemu/memalign.h" + #include "qemu/option.h" + #include "qapi/error.h" + #include +@@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++/** ++ * Forward buffer for the moment. ++ */ ++static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, ++ VirtQueueElement *elem, ++ void *opaque) ++{ ++ unsigned int n = elem->out_num + elem->in_num; ++ g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); ++ size_t in_len, dev_written; ++ virtio_net_ctrl_ack status = VIRTIO_NET_ERR; ++ int r; ++ ++ memcpy(dev_buffers, elem->out_sg, elem->out_num); ++ memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); ++ ++ r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], ++ elem->in_num, elem); ++ if (unlikely(r != 0)) { ++ if (unlikely(r == -ENOSPC)) { ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", ++ __func__); ++ } ++ goto out; ++ } ++ ++ /* ++ * We can poll here since we've had BQL from the time we sent the ++ * descriptor. Also, we need to take the answer before SVQ pulls by itself, ++ * when BQL is released ++ */ ++ dev_written = vhost_svq_poll(svq); ++ if (unlikely(dev_written < sizeof(status))) { ++ error_report("Insufficient written data (%zu)", dev_written); ++ } ++ ++out: ++ in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, ++ sizeof(status)); ++ if (unlikely(in_len < sizeof(status))) { ++ error_report("Bad device CVQ written length"); ++ } ++ vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); ++ g_free(elem); ++ return r; ++} ++ ++static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { ++ .avail_handler = vhost_vdpa_net_handle_ctrl_avail, ++}; ++ + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + const char *device, + const char *name, +@@ -211,6 +265,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; ++ if (!is_datapath) { ++ s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; ++ s->vhost_vdpa.shadow_vq_ops_opaque = s; ++ } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { + qemu_del_net_client(nc); +-- +2.31.1 + diff --git a/kvm-vhost-Add-SVQDescState.patch b/kvm-vhost-Add-SVQDescState.patch new file mode 100644 index 0000000..b1ea4bb --- /dev/null +++ b/kvm-vhost-Add-SVQDescState.patch @@ -0,0 +1,135 @@ +From 14200f493243f73152ea4a4b97274f0ec4fb36fa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 15/32] vhost: Add SVQDescState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [15/27] 2e2866f22e37cace8598ff44dfcdc07fcc915d6d (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 9e87868fcaf5785c8e1490c290505fa32305ff91 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:34 2022 +0200 + + vhost: Add SVQDescState + + This will allow SVQ to add context to the different queue elements. + + This patch only store the actual element, no functional change intended. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++-------- + hw/virtio/vhost-shadow-virtqueue.h | 8 ++++++-- + 2 files changed, 14 insertions(+), 10 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3cec03d709..a08e3d4025 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -256,7 +256,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + return -EINVAL; + } + +- svq->ring_id_maps[qemu_head] = elem; ++ svq->desc_state[qemu_head].elem = elem; + vhost_svq_kick(svq); + return 0; + } +@@ -411,21 +411,21 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- if (unlikely(!svq->ring_id_maps[used_elem.id])) { ++ if (unlikely(!svq->desc_state[used_elem.id].elem)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s says index %u is used, but it was not available", + svq->vdev->name, used_elem.id); + return NULL; + } + +- num = svq->ring_id_maps[used_elem.id]->in_num + +- svq->ring_id_maps[used_elem.id]->out_num; ++ num = svq->desc_state[used_elem.id].elem->in_num + ++ svq->desc_state[used_elem.id].elem->out_num; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +- return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); ++ return g_steal_pointer(&svq->desc_state[used_elem.id].elem); + } + + static void vhost_svq_flush(VhostShadowVirtqueue *svq, +@@ -595,7 +595,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + memset(svq->vring.desc, 0, driver_size); + svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); + memset(svq->vring.used, 0, device_size); +- svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); ++ svq->desc_state = g_new0(SVQDescState, svq->vring.num); + svq->desc_next = g_new0(uint16_t, svq->vring.num); + for (unsigned i = 0; i < svq->vring.num - 1; i++) { + svq->desc_next[i] = cpu_to_le16(i + 1); +@@ -620,7 +620,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + + for (unsigned i = 0; i < svq->vring.num; ++i) { + g_autofree VirtQueueElement *elem = NULL; +- elem = g_steal_pointer(&svq->ring_id_maps[i]); ++ elem = g_steal_pointer(&svq->desc_state[i].elem); + if (elem) { + virtqueue_detach_element(svq->vq, elem, 0); + } +@@ -632,7 +632,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + } + svq->vq = NULL; + g_free(svq->desc_next); +- g_free(svq->ring_id_maps); ++ g_free(svq->desc_state); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); + } +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index c132c994e9..d646c35054 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -15,6 +15,10 @@ + #include "standard-headers/linux/vhost_types.h" + #include "hw/virtio/vhost-iova-tree.h" + ++typedef struct SVQDescState { ++ VirtQueueElement *elem; ++} SVQDescState; ++ + /* Shadow virtqueue to relay notifications */ + typedef struct VhostShadowVirtqueue { + /* Shadow vring */ +@@ -47,8 +51,8 @@ typedef struct VhostShadowVirtqueue { + /* IOVA mapping */ + VhostIOVATree *iova_tree; + +- /* Map for use the guest's descriptors */ +- VirtQueueElement **ring_id_maps; ++ /* SVQ vring descriptors state */ ++ SVQDescState *desc_state; + + /* Next VirtQueue element that guest made available */ + VirtQueueElement *next_guest_avail_elem; +-- +2.31.1 + diff --git a/kvm-vhost-Add-svq-avail_handler-callback.patch b/kvm-vhost-Add-svq-avail_handler-callback.patch new file mode 100644 index 0000000..a8b585d --- /dev/null +++ b/kvm-vhost-Add-svq-avail_handler-callback.patch @@ -0,0 +1,164 @@ +From 433106c286a1961737300ebaece6f10b2747e7d8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 20/32] vhost: Add svq avail_handler callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [20/27] d228eb89d204f8be623bc870503bbf0078dfc9ae (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit e966c0b781aebabd2c0f5eef91678f08ce1d068c +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:39 2022 +0200 + + vhost: Add svq avail_handler callback + + This allows external handlers to be aware of new buffers that the guest + places in the virtqueue. + + When this callback is defined the ownership of the guest's virtqueue + element is transferred to the callback. This means that if the user + wants to forward the descriptor it needs to manually inject it. The + callback is also free to process the command by itself and use the + element with svq_push. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 14 ++++++++++++-- + hw/virtio/vhost-shadow-virtqueue.h | 31 +++++++++++++++++++++++++++++- + hw/virtio/vhost-vdpa.c | 3 ++- + 3 files changed, 44 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 95d0d7a7ee..e53aac45f6 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -306,7 +306,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + break; + } + +- r = vhost_svq_add_element(svq, elem); ++ if (svq->ops) { ++ r = svq->ops->avail_handler(svq, elem, svq->ops_opaque); ++ } else { ++ r = vhost_svq_add_element(svq, elem); ++ } + if (unlikely(r != 0)) { + if (r == -ENOSPC) { + /* +@@ -685,12 +689,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + * shadow methods and file descriptors. + * + * @iova_tree: Tree to perform descriptors translations ++ * @ops: SVQ owner callbacks ++ * @ops_opaque: ops opaque pointer + * + * Returns the new virtqueue or NULL. + * + * In case of error, reason is reported through error_report. + */ +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) ++VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, ++ const VhostShadowVirtqueueOps *ops, ++ void *ops_opaque) + { + g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); + int r; +@@ -712,6 +720,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); + event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); + svq->iova_tree = iova_tree; ++ svq->ops = ops; ++ svq->ops_opaque = ops_opaque; + return g_steal_pointer(&svq); + + err_init_hdev_call: +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index cf442f7dea..d04c34a589 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -25,6 +25,27 @@ typedef struct SVQDescState { + unsigned int ndescs; + } SVQDescState; + ++typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; ++ ++/** ++ * Callback to handle an avail buffer. ++ * ++ * @svq: Shadow virtqueue ++ * @elem: Element placed in the queue by the guest ++ * @vq_callback_opaque: Opaque ++ * ++ * Returns 0 if the vq is running as expected. ++ * ++ * Note that ownership of elem is transferred to the callback. ++ */ ++typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq, ++ VirtQueueElement *elem, ++ void *vq_callback_opaque); ++ ++typedef struct VhostShadowVirtqueueOps { ++ VirtQueueAvailCallback avail_handler; ++} VhostShadowVirtqueueOps; ++ + /* Shadow virtqueue to relay notifications */ + typedef struct VhostShadowVirtqueue { + /* Shadow vring */ +@@ -69,6 +90,12 @@ typedef struct VhostShadowVirtqueue { + */ + uint16_t *desc_next; + ++ /* Caller callbacks */ ++ const VhostShadowVirtqueueOps *ops; ++ ++ /* Caller callbacks opaque */ ++ void *ops_opaque; ++ + /* Next head to expose to the device */ + uint16_t shadow_avail_idx; + +@@ -102,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + VirtQueue *vq); + void vhost_svq_stop(VhostShadowVirtqueue *svq); + +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); ++VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, ++ const VhostShadowVirtqueueOps *ops, ++ void *ops_opaque); + + void vhost_svq_free(gpointer vq); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 33dcaa135e..28df57b12e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -416,8 +416,9 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + + shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); + for (unsigned n = 0; n < hdev->nvqs; ++n) { +- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); ++ g_autoptr(VhostShadowVirtqueue) svq; + ++ svq = vhost_svq_new(v->iova_tree, NULL, NULL); + if (unlikely(!svq)) { + error_setg(errp, "Cannot create svq %u", n); + return -1; +-- +2.31.1 + diff --git a/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch b/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch new file mode 100644 index 0000000..9b09d42 --- /dev/null +++ b/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch @@ -0,0 +1,134 @@ +From 893dffb820973361bcef33612a6b924554a856c1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 13/32] vhost: Check for queue full at vhost_svq_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [13/27] d4bd8299fb7733a1e190618dfc92b4b53b7bbeb3 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit f20b70eb5a68cfd8fef74a13ccdd494ef1cb0221 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:32 2022 +0200 + + vhost: Check for queue full at vhost_svq_add + + The series need to expose vhost_svq_add with full functionality, + including checking for full queue. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 59 +++++++++++++++++------------- + 1 file changed, 33 insertions(+), 26 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index e3fc3c2658..1d2bab287b 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -233,21 +233,29 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + * Add an element to a SVQ. + * + * The caller must check that there is enough slots for the new element. It +- * takes ownership of the element: In case of failure, it is free and the SVQ +- * is considered broken. ++ * takes ownership of the element: In case of failure not ENOSPC, it is free. ++ * ++ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ +-static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) ++static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + { + unsigned qemu_head; +- bool ok = vhost_svq_add_split(svq, elem, &qemu_head); ++ unsigned ndescs = elem->in_num + elem->out_num; ++ bool ok; ++ ++ if (unlikely(ndescs > vhost_svq_available_slots(svq))) { ++ return -ENOSPC; ++ } ++ ++ ok = vhost_svq_add_split(svq, elem, &qemu_head); + if (unlikely(!ok)) { + g_free(elem); +- return false; ++ return -EINVAL; + } + + svq->ring_id_maps[qemu_head] = elem; + vhost_svq_kick(svq); +- return true; ++ return 0; + } + + /** +@@ -274,7 +282,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + + while (true) { + VirtQueueElement *elem; +- bool ok; ++ int r; + + if (svq->next_guest_avail_elem) { + elem = g_steal_pointer(&svq->next_guest_avail_elem); +@@ -286,25 +294,24 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + break; + } + +- if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { +- /* +- * This condition is possible since a contiguous buffer in GPA +- * does not imply a contiguous buffer in qemu's VA +- * scatter-gather segments. If that happens, the buffer exposed +- * to the device needs to be a chain of descriptors at this +- * moment. +- * +- * SVQ cannot hold more available buffers if we are here: +- * queue the current guest descriptor and ignore further kicks +- * until some elements are used. +- */ +- svq->next_guest_avail_elem = elem; +- return; +- } +- +- ok = vhost_svq_add(svq, elem); +- if (unlikely(!ok)) { +- /* VQ is broken, just return and ignore any other kicks */ ++ r = vhost_svq_add(svq, elem); ++ if (unlikely(r != 0)) { ++ if (r == -ENOSPC) { ++ /* ++ * This condition is possible since a contiguous buffer in ++ * GPA does not imply a contiguous buffer in qemu's VA ++ * scatter-gather segments. If that happens, the buffer ++ * exposed to the device needs to be a chain of descriptors ++ * at this moment. ++ * ++ * SVQ cannot hold more available buffers if we are here: ++ * queue the current guest descriptor and ignore kicks ++ * until some elements are used. ++ */ ++ svq->next_guest_avail_elem = elem; ++ } ++ ++ /* VQ is full or broken, just return and ignore kicks */ + return; + } + } +-- +2.31.1 + diff --git a/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch b/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch new file mode 100644 index 0000000..6755aad --- /dev/null +++ b/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch @@ -0,0 +1,138 @@ +From 5c8de23e185a1a1f0b19eac3c9fa03411c9f545c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 14/32] vhost: Decouple vhost_svq_add from VirtQueueElement +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [14/27] 463087dd316adc91b9c7a4e6634c6fc1745c1849 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 1f46ae65d85f677b660bda46685dd3e94885a7cb +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:33 2022 +0200 + + vhost: Decouple vhost_svq_add from VirtQueueElement + + VirtQueueElement comes from the guest, but we're heading SVQ to be able + to modify the element presented to the device without the guest's + knowledge. + + To do so, make SVQ accept sg buffers directly, instead of using + VirtQueueElement. + + Add vhost_svq_add_element to maintain element convenience. + + Signed-off-by: Eugenio Pérez + Acked-by: Jason Wang + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 33 ++++++++++++++++++++---------- + 1 file changed, 22 insertions(+), 11 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 1d2bab287b..3cec03d709 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -172,30 +172,31 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +- VirtQueueElement *elem, unsigned *head) ++ const struct iovec *out_sg, size_t out_num, ++ const struct iovec *in_sg, size_t in_num, ++ unsigned *head) + { + unsigned avail_idx; + vring_avail_t *avail = svq->vring.avail; + bool ok; +- g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); ++ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); + + *head = svq->free_head; + + /* We need some descriptors here */ +- if (unlikely(!elem->out_num && !elem->in_num)) { ++ if (unlikely(!out_num && !in_num)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Guest provided element with no descriptors"); + return false; + } + +- ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, +- elem->in_num > 0, false); ++ ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, ++ false); + if (unlikely(!ok)) { + return false; + } + +- ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, +- true); ++ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); + if (unlikely(!ok)) { + return false; + } +@@ -237,17 +238,19 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + * + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ +-static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) ++static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ++ size_t out_num, const struct iovec *in_sg, ++ size_t in_num, VirtQueueElement *elem) + { + unsigned qemu_head; +- unsigned ndescs = elem->in_num + elem->out_num; ++ unsigned ndescs = in_num + out_num; + bool ok; + + if (unlikely(ndescs > vhost_svq_available_slots(svq))) { + return -ENOSPC; + } + +- ok = vhost_svq_add_split(svq, elem, &qemu_head); ++ ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); + if (unlikely(!ok)) { + g_free(elem); + return -EINVAL; +@@ -258,6 +261,14 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + return 0; + } + ++/* Convenience wrapper to add a guest's element to SVQ */ ++static int vhost_svq_add_element(VhostShadowVirtqueue *svq, ++ VirtQueueElement *elem) ++{ ++ return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, ++ elem->in_num, elem); ++} ++ + /** + * Forward available buffers. + * +@@ -294,7 +305,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + break; + } + +- r = vhost_svq_add(svq, elem); ++ r = vhost_svq_add_element(svq, elem); + if (unlikely(r != 0)) { + if (r == -ENOSPC) { + /* +-- +2.31.1 + diff --git a/kvm-vhost-Expose-vhost_svq_add.patch b/kvm-vhost-Expose-vhost_svq_add.patch new file mode 100644 index 0000000..70dc774 --- /dev/null +++ b/kvm-vhost-Expose-vhost_svq_add.patch @@ -0,0 +1,73 @@ +From cefd6583a8483c7a80f9cde8f7ad4705983af9e7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 18/32] vhost: Expose vhost_svq_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [18/27] bfb44f597d350336113783bcc9b3c9d9d32ff8c0 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit d0291f3f284d3bc220cdb13b0d8ac8a44eb5fd4c +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:37 2022 +0200 + + vhost: Expose vhost_svq_add + + This allows external parts of SVQ to forward custom buffers to the + device. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 6 +++--- + hw/virtio/vhost-shadow-virtqueue.h | 3 +++ + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 1ce52d5b4a..cb879e7b88 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -238,9 +238,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + * + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ +-static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, +- size_t out_num, const struct iovec *in_sg, +- size_t in_num, VirtQueueElement *elem) ++int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ++ size_t out_num, const struct iovec *in_sg, size_t in_num, ++ VirtQueueElement *elem) + { + unsigned qemu_head; + unsigned ndescs = in_num + out_num; +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index d9fc1f1799..dd78f4bec2 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -86,6 +86,9 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp); + + void vhost_svq_push_elem(VhostShadowVirtqueue *svq, + const VirtQueueElement *elem, uint32_t len); ++int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ++ size_t out_num, const struct iovec *in_sg, size_t in_num, ++ VirtQueueElement *elem); + + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); +-- +2.31.1 + diff --git a/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch b/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch new file mode 100644 index 0000000..f149c05 --- /dev/null +++ b/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch @@ -0,0 +1,83 @@ +From 793d6d56190397624efdcaf6e0112bd12e39c05d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:25:01 +0200 +Subject: [PATCH 02/32] vhost: Fix device's used descriptor dequeue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [2/27] b92803a0681c94c65d243dd07424522387594760 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 81abfa5724c9a6502d7a1d3a67c55f2a303a1170 +Author: Eugenio Pérez +Date: Thu May 12 19:57:43 2022 +0200 + + vhost: Fix device's used descriptor dequeue + + Only the first one of them were properly enqueued back. + + Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") + + Signed-off-by: Eugenio Pérez + Message-Id: <20220512175747.142058-3-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3155801f50..31fc50907d 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -334,12 +334,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) + svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + } + ++static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, ++ uint16_t num, uint16_t i) ++{ ++ for (uint16_t j = 0; j < (num - 1); ++j) { ++ i = le16_to_cpu(svq->desc_next[i]); ++ } ++ ++ return i; ++} ++ + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + uint32_t *len) + { + const vring_used_t *used = svq->vring.used; + vring_used_elem_t used_elem; +- uint16_t last_used; ++ uint16_t last_used, last_used_chain, num; + + if (!vhost_svq_more_used(svq)) { + return NULL; +@@ -365,7 +375,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- svq->desc_next[used_elem.id] = svq->free_head; ++ num = svq->ring_id_maps[used_elem.id]->in_num + ++ svq->ring_id_maps[used_elem.id]->out_num; ++ last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); ++ svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +-- +2.31.1 + diff --git a/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch b/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch new file mode 100644 index 0000000..51eb700 --- /dev/null +++ b/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch @@ -0,0 +1,68 @@ +From aa99cf129923e0203c0caeb3b4e94a0eb973746f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:36:38 +0200 +Subject: [PATCH 04/32] vhost: Fix element in vhost_svq_add failure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [4/27] 96689c99a47dd49591c0d126cb1fbb975b2f79b4 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 5181db132b587754dda3a520eec923b87a65bbb7 +Author: Eugenio Pérez +Date: Thu May 12 19:57:47 2022 +0200 + + vhost: Fix element in vhost_svq_add failure + + Coverity rightly reports that is not free in that case. + + Fixes: Coverity CID 1487559 + Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") + + Signed-off-by: Eugenio Pérez + Message-Id: <20220512175747.142058-7-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 31fc50907d..06d0bb39d9 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -199,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return true; + } + ++/** ++ * Add an element to a SVQ. ++ * ++ * The caller must check that there is enough slots for the new element. It ++ * takes ownership of the element: In case of failure, it is free and the SVQ ++ * is considered broken. ++ */ + static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + { + unsigned qemu_head; + bool ok = vhost_svq_add_split(svq, elem, &qemu_head); + if (unlikely(!ok)) { ++ g_free(elem); + return false; + } + +-- +2.31.1 + diff --git a/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch b/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch new file mode 100644 index 0000000..513d7b4 --- /dev/null +++ b/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch @@ -0,0 +1,61 @@ +From 3a944d8cd3d35b2398ff68d9ed8ea51d27dfab3c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 12/32] vhost: Move vhost_svq_kick call to vhost_svq_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [12/27] 29a7e1fb4992c4beca1e9a3379bb4c8a0f567459 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 98b5adef8493a2bfad6655cfee84299e88bedbf7 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:31 2022 +0200 + + vhost: Move vhost_svq_kick call to vhost_svq_add + + The series needs to expose vhost_svq_add with full functionality, + including kick + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 05cd39d1eb..e3fc3c2658 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -246,6 +246,7 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + } + + svq->ring_id_maps[qemu_head] = elem; ++ vhost_svq_kick(svq); + return true; + } + +@@ -306,7 +307,6 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + /* VQ is broken, just return and ignore any other kicks */ + return; + } +- vhost_svq_kick(svq); + } + + virtio_queue_set_notification(svq->vq, true); +-- +2.31.1 + diff --git a/kvm-vhost-Reorder-vhost_svq_kick.patch b/kvm-vhost-Reorder-vhost_svq_kick.patch new file mode 100644 index 0000000..f61f3c3 --- /dev/null +++ b/kvm-vhost-Reorder-vhost_svq_kick.patch @@ -0,0 +1,88 @@ +From fdbf66e4c70de16ab36d70ea591322b1b24df591 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 11/32] vhost: Reorder vhost_svq_kick +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [11/27] 1d08b97eb3960a0f85f2dd48c3331b803f7ea205 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit d93a2405ca6efa9dc1c420cee5a34bd8242818d0 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:30 2022 +0200 + + vhost: Reorder vhost_svq_kick + + Future code needs to call it from vhost_svq_add. + + No functional change intended. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 9c46c3a8fa..05cd39d1eb 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -215,6 +215,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return true; + } + ++static void vhost_svq_kick(VhostShadowVirtqueue *svq) ++{ ++ /* ++ * We need to expose the available array entries before checking the used ++ * flags ++ */ ++ smp_mb(); ++ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { ++ return; ++ } ++ ++ event_notifier_set(&svq->hdev_kick); ++} ++ + /** + * Add an element to a SVQ. + * +@@ -235,20 +249,6 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + return true; + } + +-static void vhost_svq_kick(VhostShadowVirtqueue *svq) +-{ +- /* +- * We need to expose the available array entries before checking the used +- * flags +- */ +- smp_mb(); +- if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { +- return; +- } +- +- event_notifier_set(&svq->hdev_kick); +-} +- + /** + * Forward available buffers. + * +-- +2.31.1 + diff --git a/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch b/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch new file mode 100644 index 0000000..31bfccc --- /dev/null +++ b/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch @@ -0,0 +1,123 @@ +From 486647551223cc01f4dba87197030bbf4e674f0f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:24:48 +0200 +Subject: [PATCH 01/32] vhost: Track descriptor chain in private at SVQ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [1/27] 26d16dc383e3064ac6e4288d5c52b39fee0ad204 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 495fe3a78749c39c0e772c4e1a55d6cb8a7e5292 +Author: Eugenio Pérez +Date: Thu May 12 19:57:42 2022 +0200 + + vhost: Track descriptor chain in private at SVQ + + The device could have access to modify them, and it definitely have + access when we implement packed vq. Harden SVQ maintaining a private + copy of the descriptor chain. Other fields like buffer addresses are + already maintained sepparatedly. + + Signed-off-by: Eugenio Pérez + Message-Id: <20220512175747.142058-2-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 12 +++++++----- + hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ + 2 files changed, 13 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index b232803d1b..3155801f50 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + for (n = 0; n < num; n++) { + if (more_descs || (n + 1 < num)) { + descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); ++ descs[i].next = cpu_to_le16(svq->desc_next[i]); + } else { + descs[i].flags = flags; + } +@@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + descs[i].len = cpu_to_le32(iovec[n].iov_len); + + last = i; +- i = cpu_to_le16(descs[i].next); ++ i = cpu_to_le16(svq->desc_next[i]); + } + +- svq->free_head = le16_to_cpu(descs[last].next); ++ svq->free_head = le16_to_cpu(svq->desc_next[last]); + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +@@ -336,7 +337,6 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + uint32_t *len) + { +- vring_desc_t *descs = svq->vring.desc; + const vring_used_t *used = svq->vring.used; + vring_used_elem_t used_elem; + uint16_t last_used; +@@ -365,7 +365,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- descs[used_elem.id].next = svq->free_head; ++ svq->desc_next[used_elem.id] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +@@ -540,8 +540,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); + memset(svq->vring.used, 0, device_size); + svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); ++ svq->desc_next = g_new0(uint16_t, svq->vring.num); + for (unsigned i = 0; i < svq->vring.num - 1; i++) { +- svq->vring.desc[i].next = cpu_to_le16(i + 1); ++ svq->desc_next[i] = cpu_to_le16(i + 1); + } + } + +@@ -574,6 +575,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + virtqueue_detach_element(svq->vq, next_avail_elem, 0); + } + svq->vq = NULL; ++ g_free(svq->desc_next); + g_free(svq->ring_id_maps); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index e5e24c536d..c132c994e9 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue { + /* Next VirtQueue element that guest made available */ + VirtQueueElement *next_guest_avail_elem; + ++ /* ++ * Backup next field for each descriptor so we can recover securely, not ++ * needing to trust the device access. ++ */ ++ uint16_t *desc_next; ++ + /* Next head to expose to the device */ + uint16_t shadow_avail_idx; + +-- +2.31.1 + diff --git a/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch b/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch new file mode 100644 index 0000000..6a2e147 --- /dev/null +++ b/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch @@ -0,0 +1,81 @@ +From 24b8cf88f53f9fc7cb393c9cad908f759980bfee Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 16/32] vhost: Track number of descs in SVQDescState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [16/27] 26f30cb6dd35c1eb1ddabe25113431bed3d744aa (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit ac4cfdc6f39c06732d27554523f9d5f8a53b4ffa +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:35 2022 +0200 + + vhost: Track number of descs in SVQDescState + + A guest's buffer continuos on GPA may need multiple descriptors on + qemu's VA, so SVQ should track its length sepparatedly. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- + hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index a08e3d4025..4d99075e73 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -257,6 +257,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + } + + svq->desc_state[qemu_head].elem = elem; ++ svq->desc_state[qemu_head].ndescs = ndescs; + vhost_svq_kick(svq); + return 0; + } +@@ -418,8 +419,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- num = svq->desc_state[used_elem.id].elem->in_num + +- svq->desc_state[used_elem.id].elem->out_num; ++ num = svq->desc_state[used_elem.id].ndescs; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index d646c35054..5c7e7cbab6 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -17,6 +17,12 @@ + + typedef struct SVQDescState { + VirtQueueElement *elem; ++ ++ /* ++ * Number of descriptors exposed to the device. May or may not match ++ * guest's ++ */ ++ unsigned int ndescs; + } SVQDescState; + + /* Shadow virtqueue to relay notifications */ +-- +2.31.1 + diff --git a/kvm-vhost-add-vhost_svq_poll.patch b/kvm-vhost-add-vhost_svq_poll.patch new file mode 100644 index 0000000..fa27e5e --- /dev/null +++ b/kvm-vhost-add-vhost_svq_poll.patch @@ -0,0 +1,92 @@ +From 0ab3da1092362470d256b433c546bd365d34f930 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 19/32] vhost: add vhost_svq_poll +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [19/27] 6807bb0bb6e5183b46a03b12b4027c7d767e8555 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 3f44d13dda83d390cc9563e56e7d337e4f6223f4 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:38 2022 +0200 + + vhost: add vhost_svq_poll + + It allows the Shadow Control VirtQueue to wait for the device to use the + available buffers. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++++++++ + hw/virtio/vhost-shadow-virtqueue.h | 1 + + 2 files changed, 28 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index cb879e7b88..95d0d7a7ee 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -485,6 +485,33 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, + } while (!vhost_svq_enable_notification(svq)); + } + ++/** ++ * Poll the SVQ for one device used buffer. ++ * ++ * This function race with main event loop SVQ polling, so extra ++ * synchronization is needed. ++ * ++ * Return the length written by the device. ++ */ ++size_t vhost_svq_poll(VhostShadowVirtqueue *svq) ++{ ++ int64_t start_us = g_get_monotonic_time(); ++ do { ++ uint32_t len; ++ VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); ++ if (elem) { ++ return len; ++ } ++ ++ if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { ++ return 0; ++ } ++ ++ /* Make sure we read new used_idx */ ++ smp_rmb(); ++ } while (true); ++} ++ + /** + * Forward used buffers. + * +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index dd78f4bec2..cf442f7dea 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, + int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + size_t out_num, const struct iovec *in_sg, size_t in_num, + VirtQueueElement *elem); ++size_t vhost_svq_poll(VhostShadowVirtqueue *svq); + + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); +-- +2.31.1 + diff --git a/kvm-vhost-add-vhost_svq_push_elem.patch b/kvm-vhost-add-vhost_svq_push_elem.patch new file mode 100644 index 0000000..2a9ec40 --- /dev/null +++ b/kvm-vhost-add-vhost_svq_push_elem.patch @@ -0,0 +1,83 @@ +From a26eb02b3a49c5d1163685ba5b83b67138c09047 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 17/32] vhost: add vhost_svq_push_elem +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [17/27] d064b40a262f2dfdc9f648d250aa8c8020c40385 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 432efd144e990b6e040862de25f8f0b6a6eeb03d +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:36 2022 +0200 + + vhost: add vhost_svq_push_elem + + This function allows external SVQ users to return guest's available + buffers. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++++++++++ + hw/virtio/vhost-shadow-virtqueue.h | 3 +++ + 2 files changed, 19 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 4d99075e73..1ce52d5b4a 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -428,6 +428,22 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return g_steal_pointer(&svq->desc_state[used_elem.id].elem); + } + ++/** ++ * Push an element to SVQ, returning it to the guest. ++ */ ++void vhost_svq_push_elem(VhostShadowVirtqueue *svq, ++ const VirtQueueElement *elem, uint32_t len) ++{ ++ virtqueue_push(svq->vq, elem, len); ++ if (svq->next_guest_avail_elem) { ++ /* ++ * Avail ring was full when vhost_svq_flush was called, so it's a ++ * good moment to make more descriptors available if possible. ++ */ ++ vhost_handle_guest_kick(svq); ++ } ++} ++ + static void vhost_svq_flush(VhostShadowVirtqueue *svq, + bool check_for_avail_queue) + { +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index 5c7e7cbab6..d9fc1f1799 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -84,6 +84,9 @@ typedef struct VhostShadowVirtqueue { + + bool vhost_svq_valid_features(uint64_t features, Error **errp); + ++void vhost_svq_push_elem(VhostShadowVirtqueue *svq, ++ const VirtQueueElement *elem, uint32_t len); ++ + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); + void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, +-- +2.31.1 + diff --git a/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch b/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch new file mode 100644 index 0000000..08bcaf2 --- /dev/null +++ b/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch @@ -0,0 +1,120 @@ +From 2bdea90bfbce3b8d5bfa86178a942a470b85b835 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 07/32] vhost: move descriptor translation to + vhost_svq_vring_write_descs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [7/27] 5533c72065e4ebf8ea7db966c976a3b29bdafb82 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 009c2549bb9dc7f7061009eb87f2a53d4b364983 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:26 2022 +0200 + + vhost: move descriptor translation to vhost_svq_vring_write_descs + + It's done for both in and out descriptors so it's better placed here. + + Acked-by: Jason Wang + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 38 +++++++++++++++++++++--------- + 1 file changed, 27 insertions(+), 11 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 06d0bb39d9..3fbda1e3d4 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, + return true; + } + +-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, +- const struct iovec *iovec, size_t num, +- bool more_descs, bool write) ++/** ++ * Write descriptors to SVQ vring ++ * ++ * @svq: The shadow virtqueue ++ * @sg: Cache for hwaddr ++ * @iovec: The iovec from the guest ++ * @num: iovec length ++ * @more_descs: True if more descriptors come in the chain ++ * @write: True if they are writeable descriptors ++ * ++ * Return true if success, false otherwise and print error. ++ */ ++static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, ++ const struct iovec *iovec, size_t num, ++ bool more_descs, bool write) + { + uint16_t i = svq->free_head, last = svq->free_head; + unsigned n; + uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; + vring_desc_t *descs = svq->vring.desc; ++ bool ok; + + if (num == 0) { +- return; ++ return true; ++ } ++ ++ ok = vhost_svq_translate_addr(svq, sg, iovec, num); ++ if (unlikely(!ok)) { ++ return false; + } + + for (n = 0; n < num; n++) { +@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + } + + svq->free_head = le16_to_cpu(svq->desc_next[last]); ++ return true; + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +@@ -169,21 +188,18 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return false; + } + +- ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); ++ ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, ++ elem->in_num > 0, false); + if (unlikely(!ok)) { + return false; + } +- vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, +- elem->in_num > 0, false); +- + +- ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); ++ ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, ++ true); + if (unlikely(!ok)) { + return false; + } + +- vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); +- + /* + * Put the entry in the available array (but don't update avail->idx until + * they do sync). +-- +2.31.1 + diff --git a/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch b/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch new file mode 100644 index 0000000..31677fd --- /dev/null +++ b/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch @@ -0,0 +1,87 @@ +From a9095850da8dd4ea3fdb725cb7f79118144e22fa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:39:27 +0200 +Subject: [PATCH 22/32] vhost-net-vdpa: add stubs for when no virtio-net device + is present +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [22/27] a2b25a805bb06094a5fab27ce8f82bee12a9fcb5 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 94c643732dc110d04bbdf0eb43c41bce23b3593e +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:41 2022 +0200 + + vhost-net-vdpa: add stubs for when no virtio-net device is present + + net/vhost-vdpa.c will need functions that are declared in + vhost-shadow-virtqueue.c, that needs functions of virtio-net.c. + + Copy the vhost-vdpa-stub.c code so + only the constructor net_init_vhost_vdpa needs to be defined. + + Signed-off-by: Eugenio Pérez + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/meson.build | 3 ++- + net/vhost-vdpa-stub.c | 21 +++++++++++++++++++++ + 2 files changed, 23 insertions(+), 1 deletion(-) + create mode 100644 net/vhost-vdpa-stub.c + +diff --git a/net/meson.build b/net/meson.build +index c965e83b26..116a9e7cbb 100644 +--- a/net/meson.build ++++ b/net/meson.build +@@ -41,7 +41,8 @@ endif + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) + softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) + if have_vhost_net_vdpa +- softmmu_ss.add(files('vhost-vdpa.c')) ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c')) ++ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c')) + endif + + subdir('can') +diff --git a/net/vhost-vdpa-stub.c b/net/vhost-vdpa-stub.c +new file mode 100644 +index 0000000000..1732ed2443 +--- /dev/null ++++ b/net/vhost-vdpa-stub.c +@@ -0,0 +1,21 @@ ++/* ++ * vhost-vdpa-stub.c ++ * ++ * Copyright (c) 2022 Red Hat, Inc. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "clients.h" ++#include "net/vhost-vdpa.h" ++#include "qapi/error.h" ++ ++int net_init_vhost_vdpa(const Netdev *netdev, const char *name, ++ NetClientState *peer, Error **errp) ++{ ++ error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); ++ return -1; ++} +-- +2.31.1 + diff --git a/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch b/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch new file mode 100644 index 0000000..4ae4cc4 --- /dev/null +++ b/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch @@ -0,0 +1,69 @@ +From dffe24d5c1f5a4676e9d2a5bc032effd420b008f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 08/32] virtio-net: Expose MAC_TABLE_ENTRIES +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [8/27] 5c3b96215ddf853cafc594da47f57d7e157db4ee (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 6758c01f054c2a842d41d927d628b09f649d3254 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:27 2022 +0200 + + virtio-net: Expose MAC_TABLE_ENTRIES + + vhost-vdpa control virtqueue needs to know the maximum entries supported + by the virtio-net device, so we know if it is possible to apply the + filter. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/net/virtio-net.c | 1 - + include/hw/virtio/virtio-net.h | 3 +++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 633de61513..2a127f0a3b 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -49,7 +49,6 @@ + + #define VIRTIO_NET_VM_VERSION 11 + +-#define MAC_TABLE_ENTRIES 64 + #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ + + /* previously fixed value */ +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index eb87032627..cce1c554f7 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) + * and latency. */ + #define TX_BURST 256 + ++/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */ ++#define MAC_TABLE_ENTRIES 64 ++ + typedef struct virtio_net_conf + { + uint32_t txtimer; +-- +2.31.1 + diff --git a/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch b/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch new file mode 100644 index 0000000..b4b9012 --- /dev/null +++ b/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch @@ -0,0 +1,169 @@ +From 49e91b34b62f5da147fa2fb80d203dd675c48f64 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 09/32] virtio-net: Expose ctrl virtqueue logic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [9/27] c4ab1e35f4ca728df82a687763c662369282c513 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 640b8a1c588b56349b3307d88459ea1cd86181fb +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:28 2022 +0200 + + virtio-net: Expose ctrl virtqueue logic + + This allows external vhost-net devices to modify the state of the + VirtIO device model once the vhost-vdpa device has acknowledged the + control commands. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/net/virtio-net.c | 84 ++++++++++++++++++++-------------- + include/hw/virtio/virtio-net.h | 4 ++ + 2 files changed, 53 insertions(+), 35 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 2a127f0a3b..59bedba681 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1433,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + return VIRTIO_NET_OK; + } + +-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) ++size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, ++ const struct iovec *in_sg, unsigned in_num, ++ const struct iovec *out_sg, ++ unsigned out_num) + { + VirtIONet *n = VIRTIO_NET(vdev); + struct virtio_net_ctrl_hdr ctrl; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; +- VirtQueueElement *elem; + size_t s; + struct iovec *iov, *iov2; +- unsigned int iov_cnt; ++ ++ if (iov_size(in_sg, in_num) < sizeof(status) || ++ iov_size(out_sg, out_num) < sizeof(ctrl)) { ++ virtio_error(vdev, "virtio-net ctrl missing headers"); ++ return 0; ++ } ++ ++ iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); ++ s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); ++ iov_discard_front(&iov, &out_num, sizeof(ctrl)); ++ if (s != sizeof(ctrl)) { ++ status = VIRTIO_NET_ERR; ++ } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { ++ status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { ++ status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { ++ status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { ++ status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { ++ status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { ++ status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); ++ } ++ ++ s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); ++ assert(s == sizeof(status)); ++ ++ g_free(iov2); ++ return sizeof(status); ++} ++ ++static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) ++{ ++ VirtQueueElement *elem; + + for (;;) { ++ size_t written; + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + break; + } +- if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || +- iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { +- virtio_error(vdev, "virtio-net ctrl missing headers"); ++ ++ written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, ++ elem->out_sg, elem->out_num); ++ if (written > 0) { ++ virtqueue_push(vq, elem, written); ++ virtio_notify(vdev, vq); ++ g_free(elem); ++ } else { + virtqueue_detach_element(vq, elem, 0); + g_free(elem); + break; + } +- +- iov_cnt = elem->out_num; +- iov2 = iov = g_memdup2(elem->out_sg, +- sizeof(struct iovec) * elem->out_num); +- s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); +- iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); +- if (s != sizeof(ctrl)) { +- status = VIRTIO_NET_ERR; +- } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { +- status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { +- status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { +- status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { +- status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { +- status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { +- status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); +- } +- +- s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); +- assert(s == sizeof(status)); +- +- virtqueue_push(vq, elem, sizeof(status)); +- virtio_notify(vdev, vq); +- g_free(iov2); +- g_free(elem); + } + } + +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index cce1c554f7..ef234ffe7e 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -221,6 +221,10 @@ struct VirtIONet { + struct EBPFRSSContext ebpf_rss; + }; + ++size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, ++ const struct iovec *in_sg, unsigned in_num, ++ const struct iovec *out_sg, ++ unsigned out_num); + void virtio_net_set_netclient_name(VirtIONet *n, const char *name, + const char *type); + +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 207d6e7..9c43f37 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 9%{?rcrel}%{?dist}%{?cc_suffix} +Release: 10%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -358,6 +358,70 @@ Patch101: kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch Patch102: kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch # For bz#1951522 - CVE-2021-3507 qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-9.0] Patch103: kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch104: kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch105: kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch106: kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch107: kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch108: kvm-meson-create-have_vhost_-variables.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch109: kvm-meson-use-have_vhost_-variables-to-pick-sources.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch110: kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch111: kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch112: kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch113: kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch114: kvm-vhost-Reorder-vhost_svq_kick.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch115: kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch116: kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch117: kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch118: kvm-vhost-Add-SVQDescState.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch119: kvm-vhost-Track-number-of-descs-in-SVQDescState.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch120: kvm-vhost-add-vhost_svq_push_elem.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch121: kvm-vhost-Expose-vhost_svq_add.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch122: kvm-vhost-add-vhost_svq_poll.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch123: kvm-vhost-Add-svq-avail_handler-callback.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch124: kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch125: kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch126: kvm-vdpa-manual-forward-CVQ-buffers.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch127: kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch128: kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch129: kvm-vdpa-Add-device-migration-blocker.patch +# For bz#1939363 - vDPA control virtqueue support in Qemu +Patch130: kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch +# For bz#2111994 - RHEL9: skey test in kvm_unit_test got failed +Patch131: kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch +# For bz#2111994 - RHEL9: skey test in kvm_unit_test got failed +Patch132: kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch +# For bz#2095608 - Please correct the error message when try to start qemu with "-M kernel-irqchip=split" +Patch133: kvm-kvm-don-t-use-perror-without-useful-errno.patch +# For bz#2099934 - Guest reboot on destination host after postcopy migration completed +Patch134: kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch +# For bz#2099934 - Guest reboot on destination host after postcopy migration completed +Patch135: kvm-Revert-migration-Simplify-unqueue_page.patch # Source-git patches @@ -1393,6 +1457,48 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Aug 08 2022 Miroslav Rezanina - 7.0.0-10 +- kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch [bz#1939363] +- kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch [bz#1939363] +- kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch [bz#1939363] +- kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch [bz#1939363] +- kvm-meson-create-have_vhost_-variables.patch [bz#1939363] +- kvm-meson-use-have_vhost_-variables-to-pick-sources.patch [bz#1939363] +- kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch [bz#1939363] +- kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch [bz#1939363] +- kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch [bz#1939363] +- kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch [bz#1939363] +- kvm-vhost-Reorder-vhost_svq_kick.patch [bz#1939363] +- kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch [bz#1939363] +- kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch [bz#1939363] +- kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch [bz#1939363] +- kvm-vhost-Add-SVQDescState.patch [bz#1939363] +- kvm-vhost-Track-number-of-descs-in-SVQDescState.patch [bz#1939363] +- kvm-vhost-add-vhost_svq_push_elem.patch [bz#1939363] +- kvm-vhost-Expose-vhost_svq_add.patch [bz#1939363] +- kvm-vhost-add-vhost_svq_poll.patch [bz#1939363] +- kvm-vhost-Add-svq-avail_handler-callback.patch [bz#1939363] +- kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch [bz#1939363] +- kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch [bz#1939363] +- kvm-vdpa-manual-forward-CVQ-buffers.patch [bz#1939363] +- kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch [bz#1939363] +- kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch [bz#1939363] +- kvm-vdpa-Add-device-migration-blocker.patch [bz#1939363] +- kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch [bz#1939363] +- kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch [bz#2111994] +- kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch [bz#2111994] +- kvm-kvm-don-t-use-perror-without-useful-errno.patch [bz#2095608] +- kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch [bz#2099934] +- kvm-Revert-migration-Simplify-unqueue_page.patch [bz#2099934] +- Resolves: bz#1939363 + (vDPA control virtqueue support in Qemu) +- Resolves: bz#2111994 + (RHEL9: skey test in kvm_unit_test got failed) +- Resolves: bz#2095608 + (Please correct the error message when try to start qemu with "-M kernel-irqchip=split") +- Resolves: bz#2099934 + (Guest reboot on destination host after postcopy migration completed) + * Mon Jul 18 2022 Miroslav Rezanina - 7.0.0-9 - kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch [bz#2100106] - kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch [bz#2100106] From 85d5f0ed1b70c57bdd5c2b484f55a01524f3e4a4 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 15 Aug 2022 07:19:39 -0400 Subject: [PATCH 166/195] * Mon Aug 15 2022 Miroslav Rezanina - 7.0.0-11 - kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch [bz#2107466] - kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch [bz#2107466] - kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch [bz#2107466] - kvm-migration-Avoid-false-positive-on-non-supported-scen.patch [bz#2107466] - kvm-migration-add-remaining-params-has_-true-in-migratio.patch [bz#2107466] - kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch [bz#2107466] - kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch [bz#2112303] - kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch [bz#2116876] - kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch [bz#2116876] - kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch [bz#2116876] - kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch [bz#2116876] - Resolves: bz#2107466 (zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together) - Resolves: bz#2112303 (virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions) - Resolves: bz#2116876 (Fixes for vDPA control virtqueue support in Qemu) --- ...sync-missed-zero-copy-migration-stat.patch | 87 +++++++++++++++++ ...et-Add-support-for-MSG_ZEROCOPY-IPV6.patch | 56 +++++++++++ ...-Fix-zero-copy-flush-returning-code-.patch | 65 +++++++++++++ ...false-positive-on-non-supported-scen.patch | 93 +++++++++++++++++++ ...maining-params-has_-true-in-migratio.patch | 62 +++++++++++++ ...d-Report-to-user-when-zerocopy-not-w.patch | 83 +++++++++++++++++ ...-Fix-booting-with-logical-block-size.patch | 63 +++++++++++++ ...ex-calculus-at-vhost_vdpa_get_vring_.patch | 50 ++++++++++ ...escriptor-leak-on-get-features-error.patch | 58 ++++++++++++ ...ex-calculus-at-vhost_vdpa_svqs_start.patch | 45 +++++++++ ...mory-listener-deletions-of-iova-tree.patch | 61 ++++++++++++ qemu-kvm.spec | 43 ++++++++- 12 files changed, 765 insertions(+), 1 deletion(-) create mode 100644 kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch create mode 100644 kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch create mode 100644 kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch create mode 100644 kvm-migration-Avoid-false-positive-on-non-supported-scen.patch create mode 100644 kvm-migration-add-remaining-params-has_-true-in-migratio.patch create mode 100644 kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch create mode 100644 kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch create mode 100644 kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch create mode 100644 kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch create mode 100644 kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch create mode 100644 kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch diff --git a/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch b/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch new file mode 100644 index 0000000..30c28f7 --- /dev/null +++ b/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch @@ -0,0 +1,87 @@ +From ac346634c5731407baa9de709dbd4d5cc6f45301 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:12 -0300 +Subject: [PATCH 02/11] Add dirty-sync-missed-zero-copy migration stat +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 111: zero-copy-send fixes & improvements +RH-Commit: [2/6] 115035fd0a4e4b9439c91fb0f5d1a2f9244ba369 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 2107466 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Leonardo Bras +Acked-by: Markus Armbruster +Acked-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220711211112.18951-3-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 2 ++ + monitor/hmp-cmds.c | 5 +++++ + qapi/migration.json | 7 ++++++- + 3 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8fb3eae910..3a3a7a4a50 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1017,6 +1017,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->normal_bytes = ram_counters.normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; ++ info->ram->dirty_sync_missed_zero_copy = ++ ram_counters.dirty_sync_missed_zero_copy; + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 634968498b..9cec01de38 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", + info->ram->postcopy_bytes >> 10); + } ++ if (info->ram->dirty_sync_missed_zero_copy) { ++ monitor_printf(mon, ++ "Zero-copy-send fallbacks happened: %" PRIu64 " times\n", ++ info->ram->dirty_sync_missed_zero_copy); ++ } + } + + if (info->has_disk) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 5105790cd0..9b38b3c21c 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -55,6 +55,10 @@ + # @postcopy-bytes: The number of bytes sent during the post-copy phase + # (since 7.0). + # ++# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could ++# not avoid copying dirty pages. This is between ++# 0 and @dirty-sync-count * @multifd-channels. ++# (since 7.1) + # Since: 0.14 + ## + { 'struct': 'MigrationStats', +@@ -65,7 +69,8 @@ + 'postcopy-requests' : 'int', 'page-size' : 'int', + 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', + 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', +- 'postcopy-bytes' : 'uint64' } } ++ 'postcopy-bytes' : 'uint64', ++ 'dirty-sync-missed-zero-copy' : 'uint64' } } + + ## + # @XBZRLECacheStats: +-- +2.31.1 + diff --git a/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch b/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch new file mode 100644 index 0000000..0fd4b6c --- /dev/null +++ b/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch @@ -0,0 +1,56 @@ +From cb6dc39a5e5d2d981b4b1e983042b3fbb529d5d1 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Thu, 4 Aug 2022 04:10:43 -0300 +Subject: [PATCH 06/11] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 111: zero-copy-send fixes & improvements +RH-Commit: [6/6] 2eb1aba8ebf267a6f67cfba2e489dc88619c7fd4 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 2107466 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Dr. David Alan Gilbert + +For using MSG_ZEROCOPY, there are two steps: +1 - io_writev() the packet, which enqueues the packet for sending, and +2 - io_flush(), which gets confirmation that all packets got correctly sent + +Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will +be reported in (1), but it will fail in the first time (2) happens. + +This happens because (2) currently checks for cmsg_level & cmsg_type +associated with IPV4 only, before reporting any error. + +Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable +support for MSG_ZEROCOPY + IPV6 + +Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Signed-off-by: Leonardo Bras +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index eb7baa2184..efd5f60808 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc, + } + + cm = CMSG_FIRSTHDR(&msg); +- if (cm->cmsg_level != SOL_IP && +- cm->cmsg_type != IP_RECVERR) { ++ if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR && ++ cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) { + error_setg_errno(errp, EPROTOTYPE, + "Wrong cmsg in errqueue"); + return -1; +-- +2.31.1 + diff --git a/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch b/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch new file mode 100644 index 0000000..b382a59 --- /dev/null +++ b/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch @@ -0,0 +1,65 @@ +From 678981c6bb7c964e1591f6f8aba49e9602f64852 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:11 -0300 +Subject: [PATCH 01/11] QIOChannelSocket: Fix zero-copy flush returning code 1 + when nothing sent +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 111: zero-copy-send fixes & improvements +RH-Commit: [1/6] cebc887cb61de1572d8ae3232cde45e80c339404 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 2107466 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Dr. David Alan Gilbert + +If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently +returns 1. This return code should be used only when Linux fails to use +MSG_ZEROCOPY on a lot of sendmsg(). + +Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY) +was attempted. + +Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Acked-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Message-Id: <20220711211112.18951-2-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 8ae8b212cf..eb7baa2184 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc, + struct cmsghdr *cm; + char control[CMSG_SPACE(sizeof(*serr))]; + int received; +- int ret = 1; ++ int ret; ++ ++ if (sioc->zero_copy_queued == sioc->zero_copy_sent) { ++ return 0; ++ } + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + memset(control, 0, sizeof(control)); + ++ ret = 1; ++ + while (sioc->zero_copy_sent < sioc->zero_copy_queued) { + received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); + if (received < 0) { +-- +2.31.1 + diff --git a/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch b/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch new file mode 100644 index 0000000..9f440eb --- /dev/null +++ b/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch @@ -0,0 +1,93 @@ +From 0753565af588dfa78b3529e359b1590e15fcbdb3 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 19 Jul 2022 09:23:45 -0300 +Subject: [PATCH 04/11] migration: Avoid false-positive on non-supported + scenarios for zero-copy-send +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 111: zero-copy-send fixes & improvements +RH-Commit: [4/6] f5c7ed6710d92668acb81d0118a71fab0b4e3d43 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 2107466 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Dr. David Alan Gilbert + +Migration with zero-copy-send currently has it's limitations, as it can't +be used with TLS nor any kind of compression. In such scenarios, it should +output errors during parameter / capability setting. + +But currently there are some ways of setting this not-supported scenarios +without printing the error message: + +!) For 'compression' capability, it works by enabling it together with +zero-copy-send. This happens because the validity test for zero-copy uses +the helper unction migrate_use_compression(), which check for compression +presence in s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]. + +The point here is: the validity test happens before the capability gets +enabled. If all of them get enabled together, this test will not return +error. + +In order to fix that, replace migrate_use_compression() by directly testing +the cap_list parameter migrate_caps_check(). + +2) For features enabled by parameters such as TLS & 'multifd_compression', +there was also a possibility of setting non-supported scenarios: setting +zero-copy-send first, then setting the unsupported parameter. + +In order to fix that, also add a check for parameters conflicting with +zero-copy-send on migrate_params_check(). + +3) XBZRLE is also a compression capability, so it makes sense to also add +it to the list of capabilities which are not supported with zero-copy-send. + +Fixes: 1abaec9a1b2c ("migration: Change zero_copy_send from migration parameter to migration capability") +Signed-off-by: Leonardo Bras +Message-Id: <20220719122345.253713-1-leobras@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 90eb69e4f1a16b388d0483543bf6bfc69a9966e4) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 3a3a7a4a50..343629d59c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1265,7 +1265,9 @@ static bool migrate_caps_check(bool *cap_list, + #ifdef CONFIG_LINUX + if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && + (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || +- migrate_use_compression() || ++ cap_list[MIGRATION_CAPABILITY_COMPRESS] || ++ cap_list[MIGRATION_CAPABILITY_XBZRLE] || ++ migrate_multifd_compression() || + migrate_use_tls())) { + error_setg(errp, + "Zero copy only available for non-compressed non-TLS multifd migration"); +@@ -1502,6 +1504,17 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } ++ ++#ifdef CONFIG_LINUX ++ if (migrate_use_zero_copy_send() && ++ ((params->has_multifd_compression && params->multifd_compression) || ++ (params->has_tls_creds && params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif ++ + return true; + } + +-- +2.31.1 + diff --git a/kvm-migration-add-remaining-params-has_-true-in-migratio.patch b/kvm-migration-add-remaining-params-has_-true-in-migratio.patch new file mode 100644 index 0000000..bcaff3b --- /dev/null +++ b/kvm-migration-add-remaining-params-has_-true-in-migratio.patch @@ -0,0 +1,62 @@ +From 9698c0e8dd9b4f5dbc237a3f98ac46297dac85fb Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 25 Jul 2022 22:02:35 -0300 +Subject: [PATCH 05/11] migration: add remaining params->has_* = true in + migration_instance_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 111: zero-copy-send fixes & improvements +RH-Commit: [5/6] 50bbad254e2356b3ae16f6e00a3db8fd0b22dde9 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 2107466 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Dr. David Alan Gilbert + +Some of params->has_* = true are missing in migration_instance_init, this +causes migrate_params_check() to skip some tests, allowing some +unsupported scenarios. + +Fix this by adding all missing params->has_* = true in +migration_instance_init(). + +Fixes: 69ef1f36b0 ("migration: define 'tls-creds' and 'tls-hostname' migration parameters") +Fixes: 1d58872a91 ("migration: do not wait for free thread") +Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration parameter") +Signed-off-by: Leonardo Bras +Message-Id: <20220726010235.342927-1-leobras@redhat.com> +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit df67aa3e61e2c83459da7d815962d9706f1528fc) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index 343629d59c..5e78028df4 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -4332,6 +4332,7 @@ static void migration_instance_init(Object *obj) + /* Set has_* up only for parameter checks */ + params->has_compress_level = true; + params->has_compress_threads = true; ++ params->has_compress_wait_thread = true; + params->has_decompress_threads = true; + params->has_throttle_trigger_threshold = true; + params->has_cpu_throttle_initial = true; +@@ -4352,6 +4353,9 @@ static void migration_instance_init(Object *obj) + params->has_announce_max = true; + params->has_announce_rounds = true; + params->has_announce_step = true; ++ params->has_tls_creds = true; ++ params->has_tls_hostname = true; ++ params->has_tls_authz = true; + + qemu_sem_init(&ms->postcopy_pause_sem, 0); + qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); +-- +2.31.1 + diff --git a/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch b/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch new file mode 100644 index 0000000..d7b1ab3 --- /dev/null +++ b/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch @@ -0,0 +1,83 @@ +From 78bbe28d5f5691330239041448cccfb339eed779 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:13 -0300 +Subject: [PATCH 03/11] migration/multifd: Report to user when zerocopy not + working +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 111: zero-copy-send fixes & improvements +RH-Commit: [3/6] 4f9165325b3cb8ff16d8b3b7649ff780fae0e2ad (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 2107466 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Dr. David Alan Gilbert + +Some errors, like the lack of Scatter-Gather support by the network +interface(NETIF_F_SG) may cause sendmsg(...,MSG_ZEROCOPY) to fail on using +zero-copy, which causes it to fall back to the default copying mechanism. + +After each full dirty-bitmap scan there should be a zero-copy flush +happening, which checks for errors each of the previous calls to +sendmsg(...,MSG_ZEROCOPY). If all of them failed to use zero-copy, then +increment dirty_sync_missed_zero_copy migration stat to let the user know +about it. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Acked-by: Peter Xu +Message-Id: <20220711211112.18951-4-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d59c40cc483729f2e67c80e58df769ad19976fe9) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 2 ++ + migration/ram.c | 5 +++++ + migration/ram.h | 2 ++ + 3 files changed, 9 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 0b5b41c53f..96e5f0a058 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -626,6 +626,8 @@ int multifd_send_sync_main(QEMUFile *f) + if (ret < 0) { + error_report_err(err); + return -1; ++ } else if (ret == 1) { ++ dirty_sync_missed_zero_copy(); + } + } + } +diff --git a/migration/ram.c b/migration/ram.c +index ee40e4a718..c437ff1b1f 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -406,6 +406,11 @@ static void ram_transferred_add(uint64_t bytes) + ram_counters.transferred += bytes; + } + ++void dirty_sync_missed_zero_copy(void) ++{ ++ ram_counters.dirty_sync_missed_zero_copy++; ++} ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* Current block being searched */ +diff --git a/migration/ram.h b/migration/ram.h +index 2c6dc3675d..34adf5cb92 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -86,4 +86,6 @@ void ram_write_tracking_prepare(void); + int ram_write_tracking_start(void); + void ram_write_tracking_stop(void); + ++void dirty_sync_missed_zero_copy(void); ++ + #endif +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch b/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch new file mode 100644 index 0000000..1bb8ea5 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch @@ -0,0 +1,63 @@ +From 03996a8a826c9186e4a16e1b4757f1ef5947a503 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 5 Aug 2022 11:42:14 +0200 +Subject: [PATCH 07/11] pc-bios/s390-ccw: Fix booting with logical block size < + physical block size + +RH-Author: Thomas Huth +RH-MergeRequest: 113: pc-bios/s390-ccw: Fix booting with logical block size < physical block size +RH-Commit: [1/1] a45ff477bc7d7011ea6c4d42a1aade213d1e4690 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2112303 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Claudio Imbrenda + +For accessing single blocks during boot, it's the logical block size that +matters. (Physical block sizes are rather interesting e.g. for creating +file systems with the correct alignment for speed reasons etc.). +So the s390-ccw bios has to use the logical block size for calculating +sector numbers during the boot phase, the "physical_block_exp" shift +value must not be taken into account. This change fixes the boot process +when the guest hast been installed on a disk where the logical block size +differs from the physical one, e.g. if the guest has been installed +like this: + + qemu-system-s390x -nographic -accel kvm -m 2G \ + -drive if=none,id=d1,file=fedora.iso,format=raw,media=cdrom \ + -device virtio-scsi -device scsi-cd,drive=d1 \ + -drive if=none,id=d2,file=test.qcow2,format=qcow2 + -device virtio-blk,drive=d2,physical_block_size=4096,logical_block_size=512 + +Linux correctly uses the logical block size of 512 for the installation, +but the s390-ccw bios tries to boot from a disk with 4096 block size so +far, as long as this patch has not been applied yet (well, it used to work +by accident in the past due to the virtio_assume_scsi() hack that used to +enforce 512 byte sectors on all virtio-block disks, but that hack has been +well removed in commit 5447de2619050a0a4d to fix other scenarios). + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2112303 +Message-Id: <20220805094214.285223-1-thuth@redhat.com> +Reviewed-by: Cornelia Huck +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit 393296de19650e1400ca265914cfdeb313725363) +--- + pc-bios/s390-ccw/virtio-blkdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 8271c47296..794f99b42c 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -173,7 +173,7 @@ int virtio_get_block_size(void) + + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: +- return vdev->config.blk.blk_size << vdev->config.blk.physical_block_exp; ++ return vdev->config.blk.blk_size; + case VIRTIO_ID_SCSI: + return vdev->scsi_block_size; + } +-- +2.31.1 + diff --git a/kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch b/kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch new file mode 100644 index 0000000..2d0d55f --- /dev/null +++ b/kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch @@ -0,0 +1,50 @@ +From e19adb058502e24580dbc4f6f944cd951ca288ed Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 12 May 2022 19:57:44 +0200 +Subject: [PATCH 08/11] vdpa: Fix bad index calculus at + vhost_vdpa_get_vring_base +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree +RH-Commit: [1/4] 754fb8960684fa7a91bddb18c8df58c3b947ee75 (eperezmartin/qemu-kvm) +RH-Bugzilla: 2116876 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Fixes: 6d0b222666 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ") + +Acked-by: Jason Wang +Signed-off-by: Eugenio Pérez +Message-Id: <20220512175747.142058-4-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 639036477ef890958415967e753ca2cbb348c16c) +--- + hw/virtio/vhost-vdpa.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index e3e5bce4bb..a7dfac530f 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1193,11 +1193,11 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) + { + struct vhost_vdpa *v = dev->opaque; ++ int vdpa_idx = ring->index - dev->vq_index; + int ret; + + if (v->shadow_vqs_enabled) { +- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, +- ring->index); ++ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); + + /* + * Setting base as last used idx, so destination will see as available +-- +2.31.1 + diff --git a/kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch b/kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch new file mode 100644 index 0000000..1757d3f --- /dev/null +++ b/kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch @@ -0,0 +1,58 @@ +From 71857062b7aea29fc418e107244cf4083cd78cd7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 2 Aug 2022 13:24:46 +0200 +Subject: [PATCH 11/11] vdpa: Fix file descriptor leak on get features error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree +RH-Commit: [4/4] bdfe6ed4539ecf68dc8bc4519755f9d5c096447d (eperezmartin/qemu-kvm) +RH-Bugzilla: 2116876 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +File descriptor vdpa_device_fd is not free in the case of returning +error from vhost_vdpa_get_features. Fixing it by making all errors go to +the same error path. + +Resolves: Coverity CID 1490785 +Fixes: 8170ab3f43 ("vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs") + +Signed-off-by: Eugenio Pérez +Reviewed-by: Laurent Vivier +Reviewed-by: Michael S. Tsirkin +Message-Id: <20220802112447.249436-2-eperezma@redhat.com> +Signed-off-by: Laurent Vivier +(cherry picked from commit aed5da45daf734ddc543c0791e877dac75e16f61) +--- + net/vhost-vdpa.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 50672bcd66..411e71e6c2 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -566,7 +566,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + g_autofree NetClientState **ncs = NULL; + g_autoptr(VhostIOVATree) iova_tree = NULL; + NetClientState *nc; +- int queue_pairs, r, i, has_cvq = 0; ++ int queue_pairs, r, i = 0, has_cvq = 0; + + assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); + opts = &netdev->u.vhost_vdpa; +@@ -582,7 +582,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + + r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); + if (unlikely(r < 0)) { +- return r; ++ goto err; + } + + queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, +-- +2.31.1 + diff --git a/kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch b/kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch new file mode 100644 index 0000000..8125cb2 --- /dev/null +++ b/kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch @@ -0,0 +1,45 @@ +From 6335431b70dd55c1d52152d726fa462db2e10eb8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 12 May 2022 19:57:45 +0200 +Subject: [PATCH 09/11] vdpa: Fix index calculus at vhost_vdpa_svqs_start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree +RH-Commit: [2/4] 9ce732e6bba426f8e00020ee6ad77f972f3e75b5 (eperezmartin/qemu-kvm) +RH-Bugzilla: 2116876 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +With the introduction of MQ the index of the vq needs to be calculated +with the device model vq_index. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20220512175747.142058-5-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1c82fdfef8a227518ffecae9d419bcada995c202) +--- + hw/virtio/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index a7dfac530f..f877b354fa 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1032,7 +1032,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + struct vhost_vring_addr addr = { +- .index = i, ++ .index = dev->vq_index + i, + }; + int r; + bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); +-- +2.31.1 + diff --git a/kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch b/kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch new file mode 100644 index 0000000..e6f1d39 --- /dev/null +++ b/kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch @@ -0,0 +1,61 @@ +From b212edc97a471c75f8b8b44ee2a3a2cf82ef14d9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 22 Jul 2022 10:26:30 +0200 +Subject: [PATCH 10/11] vdpa: Fix memory listener deletions of iova tree +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree +RH-Commit: [3/4] ad71f098b3fa8654962ac7872b5393c37c9825f2 (eperezmartin/qemu-kvm) +RH-Bugzilla: 2116876 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +vhost_vdpa_listener_region_del is always deleting the first iova entry +of the tree, since it's using the needle iova instead of the result's +one. + +This was detected using a vga virtual device in the VM using vdpa SVQ. +It makes some extra memory adding and deleting, so the wrong one was +mapped / unmapped. This was undetected before since all the memory was +mappend and unmapped totally without that device, but other conditions +could trigger it too: + +* mem_region was with .iova = 0, .translated_addr = (correct GPA). +* iova_tree_find_iova returned right result, but does not update + mem_region. +* iova_tree_remove always removed region with .iova = 0. Right iova were + sent to the device. +* Next map will fill the first region with .iova = 0, causing a mapping + with the same iova and device complains, if the next action is a map. +* Next unmap will cause to try to unmap again iova = 0, causing the + device to complain that no region was mapped at iova = 0. + +Fixes: 34e3c94edaef ("vdpa: Add custom IOTLB translations to SVQ") +Reported-by: Lei Yang +Signed-off-by: Eugenio Pérez +Signed-off-by: Jason Wang +(cherry picked from commit 75a8ce64f6e37513698857fb4284170da163ed06) +--- + hw/virtio/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index f877b354fa..03dc6014b0 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -288,7 +288,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + + result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); + iova = result->iova; +- vhost_iova_tree_remove(v->iova_tree, &mem_region); ++ vhost_iova_tree_remove(v->iova_tree, result); + } + vhost_vdpa_iotlb_batch_begin_once(v); + ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 9c43f37..0c86edd 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 10%{?rcrel}%{?dist}%{?cc_suffix} +Release: 11%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -422,6 +422,28 @@ Patch133: kvm-kvm-don-t-use-perror-without-useful-errno.patch Patch134: kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch # For bz#2099934 - Guest reboot on destination host after postcopy migration completed Patch135: kvm-Revert-migration-Simplify-unqueue_page.patch +# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch136: kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch +# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch137: kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch +# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch138: kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch +# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch139: kvm-migration-Avoid-false-positive-on-non-supported-scen.patch +# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch140: kvm-migration-add-remaining-params-has_-true-in-migratio.patch +# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch141: kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch +# For bz#2112303 - virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions +Patch142: kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch +# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu +Patch143: kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch +# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu +Patch144: kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch +# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu +Patch145: kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch +# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu +Patch146: kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch # Source-git patches @@ -1457,6 +1479,25 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Aug 15 2022 Miroslav Rezanina - 7.0.0-11 +- kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch [bz#2107466] +- kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch [bz#2107466] +- kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch [bz#2107466] +- kvm-migration-Avoid-false-positive-on-non-supported-scen.patch [bz#2107466] +- kvm-migration-add-remaining-params-has_-true-in-migratio.patch [bz#2107466] +- kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch [bz#2107466] +- kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch [bz#2112303] +- kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch [bz#2116876] +- kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch [bz#2116876] +- kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch [bz#2116876] +- kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch [bz#2116876] +- Resolves: bz#2107466 + (zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together) +- Resolves: bz#2112303 + (virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions) +- Resolves: bz#2116876 + (Fixes for vDPA control virtqueue support in Qemu) + * Mon Aug 08 2022 Miroslav Rezanina - 7.0.0-10 - kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch [bz#1939363] - kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch [bz#1939363] From 716a3942b3a0a7ca817f8bacce542171c76529e0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 26 Aug 2022 03:05:53 -0400 Subject: [PATCH 167/195] * Fri Aug 26 2022 Miroslav Rezanina - 7.0.0-12 - kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch [bz#2120275] - kvm-vhost-Get-vring-base-from-vq-not-svq.patch [bz#2114060] - kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch [bz#2114060] - kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch [bz#2114060] - kvm-util-Return-void-on-iova_tree_remove.patch [bz#2114060] - kvm-util-accept-iova_tree_remove_parameter-by-value.patch [bz#2114060] - kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch [bz#2114060] - kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch [bz#2114060] - kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch [bz#2114060] - kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch [bz#2114060] - kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch [bz#2114060] - kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch [bz#2114060] - kvm-vhost-Delete-useless-read-memory-barrier.patch [bz#2114060] - kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch [bz#2114060] - kvm-vhost_net-Add-NetClientInfo-start-callback.patch [bz#2114060] - kvm-vhost_net-Add-NetClientInfo-stop-callback.patch [bz#2114060] - kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch [bz#2114060] - kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch [bz#2114060] - kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch [bz#2114060] - kvm-vhost_net-add-NetClientState-load-callback.patch [bz#2114060] - kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch [bz#2114060] - kvm-vdpa-Delete-CVQ-migration-blocker.patch [bz#2114060] - kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch [bz#2099541] - Resolves: bz#2120275 (Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-9.1]) - Resolves: bz#2114060 (vDPA state restore support through control virtqueue in Qemu) - Resolves: bz#2099541 (qemu coredump with error Assertion `qemu_mutex_iothread_locked()' failed when repeatly hotplug/unplug disks in pause status) --- ...c-Fix-emulated-block-limits-VPD-page.patch | 96 +++++++ ...util-Return-void-on-iova_tree_remove.patch | 70 +++++ ...-iova_tree_remove_parameter-by-value.patch | 182 +++++++++++++ ...tio-net-mac-address-via-CVQ-at-start.patch | 87 ++++++ kvm-vdpa-Delete-CVQ-migration-blocker.patch | 98 +++++++ ...Make-SVQ-vring-unmapping-return-void.patch | 133 ++++++++++ ...d-buffers-map-to-start-of-net-device.patch | 251 ++++++++++++++++++ ...SVQ-vring-from-iova_tree-at-shutdown.patch | 49 ++++ ...a-Skip-the-maps-not-in-the-iova-tree.patch | 48 ++++ ...-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch | 79 ++++++ ...et_vhost_vdpa_cvq_info-NetClientInfo.patch | 62 +++++ ...ave-failed-dma-maps-in-SVQ-iova-tree.patch | 83 ++++++ ...st_vdpa_net_cvq_add-from-vhost_vdpa_.patch | 153 +++++++++++ ...re-new-kick-fd-on-vhost_svq_set_svq_.patch | 67 +++++ ...t-Delete-useless-read-memory-barrier.patch | 47 ++++ ...end-on-NULL-VirtQueueElement-on-vhos.patch | 63 +++++ ...vhost-Get-vring-base-from-vq-not-svq.patch | 87 ++++++ ...fer-elem-ownership-in-vhost_handle_g.patch | 80 ++++++ ...ement-ndescs-instead-of-opaque-data-.patch | 55 ++++ ...net-Add-NetClientInfo-start-callback.patch | 73 +++++ ..._net-Add-NetClientInfo-stop-callback.patch | 68 +++++ ...net-add-NetClientState-load-callback.patch | 73 +++++ ...-race-in-virtio_scsi_dataplane_start.patch | 117 ++++++++ qemu-kvm.spec | 79 +++++- 24 files changed, 2199 insertions(+), 1 deletion(-) create mode 100644 kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch create mode 100644 kvm-util-Return-void-on-iova_tree_remove.patch create mode 100644 kvm-util-accept-iova_tree_remove_parameter-by-value.patch create mode 100644 kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch create mode 100644 kvm-vdpa-Delete-CVQ-migration-blocker.patch create mode 100644 kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch create mode 100644 kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch create mode 100644 kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch create mode 100644 kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch create mode 100644 kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch create mode 100644 kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch create mode 100644 kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch create mode 100644 kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch create mode 100644 kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch create mode 100644 kvm-vhost-Delete-useless-read-memory-barrier.patch create mode 100644 kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch create mode 100644 kvm-vhost-Get-vring-base-from-vq-not-svq.patch create mode 100644 kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch create mode 100644 kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch create mode 100644 kvm-vhost_net-Add-NetClientInfo-start-callback.patch create mode 100644 kvm-vhost_net-Add-NetClientInfo-stop-callback.patch create mode 100644 kvm-vhost_net-add-NetClientState-load-callback.patch create mode 100644 kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch diff --git a/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch b/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch new file mode 100644 index 0000000..cee10e7 --- /dev/null +++ b/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch @@ -0,0 +1,96 @@ +From e5360c1e76fee8b8dcbcba7efbb1e36f0b48ac40 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 22 Aug 2022 14:53:20 +0200 +Subject: [PATCH 01/23] scsi-generic: Fix emulated block limits VPD page + +RH-Author: Kevin Wolf +RH-MergeRequest: 115: scsi-generic: Fix emulated block limits VPD page +RH-Bugzilla: 2120275 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Reitz +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/1] 336ba583311a80beeadd1900336056404f63211a (kmwolf/centos-qemu-kvm) +Commits 01ef8185b80 amd 24b36e9813e updated the way that the maximum +transfer length is calculated for patching block limits VPD page in an +INQUIRY response. + +The same updates also need to be made for the case where the host device +does not support the block limits VPD page at all and we emulate the +whole page. + +Without this fix, on host block devices a maximum transfer length of +(INT_MAX - sector_size) bytes is advertised to the guest, resulting in +I/O errors when a request that exceeds the host limits is made by the +guest. (Prior to commit 24b36e9813e, this code path would use the +max_transfer value from the host instead of INT_MAX, but still miss the +fix from 01ef8185b80 where max_transfer is also capped to max_iov +host pages, so it would be less wrong, but still wrong.) + +Cc: qemu-stable@nongnu.org +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2096251 +Fixes: 01ef8185b809af9d287e1a03a3f9d8ea8231118a +Fixes: 24b36e9813ec15da7db62e3b3621730710c5f020 +Signed-off-by: Kevin Wolf +Message-Id: <20220822125320.48257-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 51e15194b0a091e5c40aab2eb234a1d36c5c58ee) + +Resolved conflict: qemu_real_host_page_size() is a getter function in +current upstream, but still just a public global variable downstream. + +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-generic.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 0306ccc7b1..3742899839 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -147,6 +147,18 @@ static int execute_command(BlockBackend *blk, + return 0; + } + ++static uint64_t calculate_max_transfer(SCSIDevice *s) ++{ ++ uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); ++ uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); ++ ++ assert(max_transfer); ++ max_transfer = MIN_NON_ZERO(max_transfer, ++ max_iov * qemu_real_host_page_size); ++ ++ return max_transfer / s->blocksize; ++} ++ + static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) + { + uint8_t page, page_idx; +@@ -179,12 +191,7 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) + (r->req.cmd.buf[1] & 0x01)) { + page = r->req.cmd.buf[2]; + if (page == 0xb0) { +- uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); +- uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); +- +- assert(max_transfer); +- max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size) +- / s->blocksize; ++ uint64_t max_transfer = calculate_max_transfer(s); + stl_be_p(&r->buf[8], max_transfer); + /* Also take care of the opt xfer len. */ + stl_be_p(&r->buf[12], +@@ -230,7 +237,7 @@ static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s) + uint8_t buf[64]; + + SCSIBlockLimits bl = { +- .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize ++ .max_io_sectors = calculate_max_transfer(s), + }; + + memset(r->buf, 0, r->buflen); +-- +2.31.1 + diff --git a/kvm-util-Return-void-on-iova_tree_remove.patch b/kvm-util-Return-void-on-iova_tree_remove.patch new file mode 100644 index 0000000..07c6f8e --- /dev/null +++ b/kvm-util-Return-void-on-iova_tree_remove.patch @@ -0,0 +1,70 @@ +From 74c829f82eafa8e42ae94f7ace55c8aaed3bb5f4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 27 Apr 2022 17:49:31 +0200 +Subject: [PATCH 05/23] util: Return void on iova_tree_remove +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/21] 252287acca896eba7b5d2b62fc6247cfc565ba57 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: Merged + +It always returns IOVA_OK so nobody uses it. + +Acked-by: Jason Wang +Reviewed-by: Peter Xu +Signed-off-by: Eugenio Pérez +Message-Id: <20220427154931.3166388-1-eperezma@redhat.com> +Signed-off-by: Laurent Vivier +(cherry picked from commit 832fef7cc14d65f99d523f883ef384014e6476a7) +--- + include/qemu/iova-tree.h | 4 +--- + util/iova-tree.c | 4 +--- + 2 files changed, 2 insertions(+), 6 deletions(-) + +diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h +index c938fb0793..16bbfdf5f8 100644 +--- a/include/qemu/iova-tree.h ++++ b/include/qemu/iova-tree.h +@@ -72,10 +72,8 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map); + * provided. The range does not need to be exactly what has inserted, + * all the mappings that are included in the provided range will be + * removed from the tree. Here map->translated_addr is meaningless. +- * +- * Return: 0 if succeeded, or <0 if error. + */ +-int iova_tree_remove(IOVATree *tree, const DMAMap *map); ++void iova_tree_remove(IOVATree *tree, const DMAMap *map); + + /** + * iova_tree_find: +diff --git a/util/iova-tree.c b/util/iova-tree.c +index 6dff29c1f6..fee530a579 100644 +--- a/util/iova-tree.c ++++ b/util/iova-tree.c +@@ -164,15 +164,13 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) + g_tree_foreach(tree->tree, iova_tree_traverse, iterator); + } + +-int iova_tree_remove(IOVATree *tree, const DMAMap *map) ++void iova_tree_remove(IOVATree *tree, const DMAMap *map) + { + const DMAMap *overlap; + + while ((overlap = iova_tree_find(tree, map))) { + g_tree_remove(tree->tree, overlap); + } +- +- return IOVA_OK; + } + + /** +-- +2.31.1 + diff --git a/kvm-util-accept-iova_tree_remove_parameter-by-value.patch b/kvm-util-accept-iova_tree_remove_parameter-by-value.patch new file mode 100644 index 0000000..cd073da --- /dev/null +++ b/kvm-util-accept-iova_tree_remove_parameter-by-value.patch @@ -0,0 +1,182 @@ +From 90697579eaf598614293d75f684d6e8c55f8ab9b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:04 +0200 +Subject: [PATCH 06/23] util: accept iova_tree_remove_parameter by value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/21] ddaf052789e7ab3c67a77c038347113301587ffb (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +It's convenient to call iova_tree_remove from a map returned from +iova_tree_find or iova_tree_find_iova. With the current code this is not +possible, since we will free it, and then we will try to search for it +again. + +Fix it making accepting the map by value, forcing a copy of the +argument. Not applying a fixes tag, since there is no use like that at +the moment. + +Signed-off-by: Eugenio Pérez +Signed-off-by: Jason Wang +(cherry picked from commit d69ba6677405de86b3b617fc7688b549f84cf013) +--- + hw/i386/intel_iommu.c | 6 +++--- + hw/virtio/vhost-iova-tree.c | 2 +- + hw/virtio/vhost-iova-tree.h | 2 +- + hw/virtio/vhost-vdpa.c | 6 +++--- + include/qemu/iova-tree.h | 2 +- + net/vhost-vdpa.c | 4 ++-- + util/iova-tree.c | 4 ++-- + 7 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index c64aa81a83..6738cf0929 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -1157,7 +1157,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) + return ret; + } + /* Drop any existing mapping */ +- iova_tree_remove(as->iova_tree, &target); ++ iova_tree_remove(as->iova_tree, target); + /* Recover the correct type */ + event->type = IOMMU_NOTIFIER_MAP; + entry->perm = cache_perm; +@@ -1170,7 +1170,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) + trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); + return 0; + } +- iova_tree_remove(as->iova_tree, &target); ++ iova_tree_remove(as->iova_tree, target); + } + + trace_vtd_page_walk_one(info->domain_id, entry->iova, +@@ -3532,7 +3532,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) + + map.iova = n->start; + map.size = size; +- iova_tree_remove(as->iova_tree, &map); ++ iova_tree_remove(as->iova_tree, map); + } + + static void vtd_address_space_unmap_all(IntelIOMMUState *s) +diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c +index 55fed1fefb..1339a4de8b 100644 +--- a/hw/virtio/vhost-iova-tree.c ++++ b/hw/virtio/vhost-iova-tree.c +@@ -104,7 +104,7 @@ int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map) + * @iova_tree: The vhost iova tree + * @map: The map to remove + */ +-void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map) ++void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map) + { + iova_tree_remove(iova_tree->iova_taddr_map, map); + } +diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h +index 6a4f24e0f9..4adfd79ff0 100644 +--- a/hw/virtio/vhost-iova-tree.h ++++ b/hw/virtio/vhost-iova-tree.h +@@ -22,6 +22,6 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete); + const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree, + const DMAMap *map); + int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); +-void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map); ++void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); + + #endif +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index cc15b7d8ee..39aa70f52d 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -238,7 +238,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + + fail_map: + if (v->shadow_vqs_enabled) { +- vhost_iova_tree_remove(v->iova_tree, &mem_region); ++ vhost_iova_tree_remove(v->iova_tree, mem_region); + } + + fail: +@@ -298,7 +298,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + return; + } + iova = result->iova; +- vhost_iova_tree_remove(v->iova_tree, result); ++ vhost_iova_tree_remove(v->iova_tree, *result); + } + vhost_vdpa_iotlb_batch_begin_once(v); + ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); +@@ -942,7 +942,7 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, + needle->perm == IOMMU_RO); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Cannot map region to device"); +- vhost_iova_tree_remove(v->iova_tree, needle); ++ vhost_iova_tree_remove(v->iova_tree, *needle); + } + + return r == 0; +diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h +index 16bbfdf5f8..8528e5c98f 100644 +--- a/include/qemu/iova-tree.h ++++ b/include/qemu/iova-tree.h +@@ -73,7 +73,7 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map); + * all the mappings that are included in the provided range will be + * removed from the tree. Here map->translated_addr is meaningless. + */ +-void iova_tree_remove(IOVATree *tree, const DMAMap *map); ++void iova_tree_remove(IOVATree *tree, DMAMap map); + + /** + * iova_tree_find: +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 411e71e6c2..ba65736f83 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -244,7 +244,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) + error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); + } + +- vhost_iova_tree_remove(tree, map); ++ vhost_iova_tree_remove(tree, *map); + } + + static size_t vhost_vdpa_net_cvq_cmd_len(void) +@@ -297,7 +297,7 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, + return true; + + dma_map_err: +- vhost_iova_tree_remove(v->iova_tree, &map); ++ vhost_iova_tree_remove(v->iova_tree, map); + return false; + } + +diff --git a/util/iova-tree.c b/util/iova-tree.c +index fee530a579..536789797e 100644 +--- a/util/iova-tree.c ++++ b/util/iova-tree.c +@@ -164,11 +164,11 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) + g_tree_foreach(tree->tree, iova_tree_traverse, iterator); + } + +-void iova_tree_remove(IOVATree *tree, const DMAMap *map) ++void iova_tree_remove(IOVATree *tree, DMAMap map) + { + const DMAMap *overlap; + +- while ((overlap = iova_tree_find(tree, map))) { ++ while ((overlap = iova_tree_find(tree, &map))) { + g_tree_remove(tree->tree, overlap); + } + } +-- +2.31.1 + diff --git a/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch b/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch new file mode 100644 index 0000000..4dede70 --- /dev/null +++ b/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch @@ -0,0 +1,87 @@ +From e1f9986cf77e4b2f16aca7b2523bc75bae0c4d3c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:36 +0200 +Subject: [PATCH 21/23] vdpa: Add virtio-net mac address via CVQ at start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [20/21] a7920816d5faf7a0cfbb7c2731a48ddfc456b8d4 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +This is needed so the destination vdpa device see the same state a the +guest set in the source. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit f34cd09b13855657a0d49c5ea6a1e37ba9dc2334) +--- + net/vhost-vdpa.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index f09f044ec1..79ebda7de1 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -363,11 +363,51 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, + return vhost_svq_poll(svq); + } + ++static int vhost_vdpa_net_load(NetClientState *nc) ++{ ++ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ const struct vhost_vdpa *v = &s->vhost_vdpa; ++ const VirtIONet *n; ++ uint64_t features; ++ ++ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); ++ ++ if (!v->shadow_vqs_enabled) { ++ return 0; ++ } ++ ++ n = VIRTIO_NET(v->dev->vdev); ++ features = n->parent_obj.guest_features; ++ if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { ++ const struct virtio_net_ctrl_hdr ctrl = { ++ .class = VIRTIO_NET_CTRL_MAC, ++ .cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET, ++ }; ++ char *cursor = s->cvq_cmd_out_buffer; ++ ssize_t dev_written; ++ ++ memcpy(cursor, &ctrl, sizeof(ctrl)); ++ cursor += sizeof(ctrl); ++ memcpy(cursor, n->mac, sizeof(n->mac)); ++ ++ dev_written = vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + sizeof(n->mac), ++ sizeof(virtio_net_ctrl_ack)); ++ if (unlikely(dev_written < 0)) { ++ return dev_written; ++ } ++ ++ return *((virtio_net_ctrl_ack *)s->cvq_cmd_in_buffer) != VIRTIO_NET_OK; ++ } ++ ++ return 0; ++} ++ + static NetClientInfo net_vhost_vdpa_cvq_info = { + .type = NET_CLIENT_DRIVER_VHOST_VDPA, + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, + .start = vhost_vdpa_net_cvq_start, ++ .load = vhost_vdpa_net_load, + .stop = vhost_vdpa_net_cvq_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, +-- +2.31.1 + diff --git a/kvm-vdpa-Delete-CVQ-migration-blocker.patch b/kvm-vdpa-Delete-CVQ-migration-blocker.patch new file mode 100644 index 0000000..87dfb5a --- /dev/null +++ b/kvm-vdpa-Delete-CVQ-migration-blocker.patch @@ -0,0 +1,98 @@ +From 896f7749c72afe988ab28ac6af77b9c53b685c03 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:37 +0200 +Subject: [PATCH 22/23] vdpa: Delete CVQ migration blocker +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [21/21] 286f55177a132a8845c2912fb28cb4add472005a (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +We can restore the device state in the destination via CVQ now. Remove +the migration blocker. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit fe2b0cd71cddbec4eaf6e325eaf357a4e72a469d) +--- + hw/virtio/vhost-vdpa.c | 15 --------------- + include/hw/virtio/vhost-vdpa.h | 1 - + net/vhost-vdpa.c | 2 -- + 3 files changed, 18 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 0bea1e1eb9..b61e313953 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1031,13 +1031,6 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + return true; + } + +- if (v->migration_blocker) { +- int r = migrate_add_blocker(v->migration_blocker, &err); +- if (unlikely(r < 0)) { +- return false; +- } +- } +- + for (i = 0; i < v->shadow_vqs->len; ++i) { + VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); +@@ -1080,10 +1073,6 @@ err: + vhost_svq_stop(svq); + } + +- if (v->migration_blocker) { +- migrate_del_blocker(v->migration_blocker); +- } +- + return false; + } + +@@ -1099,10 +1088,6 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + vhost_vdpa_svq_unmap_rings(dev, svq); + } +- +- if (v->migration_blocker) { +- migrate_del_blocker(v->migration_blocker); +- } + } + + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index d10a89303e..1111d85643 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -35,7 +35,6 @@ typedef struct vhost_vdpa { + bool shadow_vqs_enabled; + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; +- Error *migration_blocker; + GPtrArray *shadow_vqs; + const VhostShadowVirtqueueOps *shadow_vq_ops; + void *shadow_vq_ops_opaque; +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 79ebda7de1..f4f16583e4 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -555,8 +555,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; +- error_setg(&s->vhost_vdpa.migration_blocker, +- "Migration disabled: vhost-vdpa uses CVQ."); + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +-- +2.31.1 + diff --git a/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch b/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch new file mode 100644 index 0000000..e45a198 --- /dev/null +++ b/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch @@ -0,0 +1,133 @@ +From 8e36feb4d3480b7c09d9dcbde18c9db1e8063f18 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:06 +0200 +Subject: [PATCH 08/23] vdpa: Make SVQ vring unmapping return void +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/21] 3366340dc7ae65f83894f5d0da0d1e0f64713751 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +Nothing actually reads the return value, but an error in cleaning some +entries could cause device stop to abort, making a restart impossible. +Better ignore explicitely the return value. + +Reported-by: Lei Yang +Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit bb5cf89ef2338ab6be946ede6821c3f61347eb1b) +--- + hw/virtio/vhost-vdpa.c | 32 ++++++++++---------------------- + 1 file changed, 10 insertions(+), 22 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index e5c264fb29..8eddf39f2a 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -882,7 +882,7 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + /** + * Unmap a SVQ area in the device + */ +-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, ++static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + const DMAMap *needle) + { + const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); +@@ -891,38 +891,33 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + + if (unlikely(!result)) { + error_report("Unable to find SVQ address to unmap"); +- return false; ++ return; + } + + size = ROUND_UP(result->size, qemu_real_host_page_size); + r = vhost_vdpa_dma_unmap(v, result->iova, size); + if (unlikely(r < 0)) { + error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); +- return false; ++ return; + } + + vhost_iova_tree_remove(v->iova_tree, *result); +- return r == 0; + } + +-static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, ++static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, + const VhostShadowVirtqueue *svq) + { + DMAMap needle = {}; + struct vhost_vdpa *v = dev->opaque; + struct vhost_vring_addr svq_addr; +- bool ok; + + vhost_svq_get_vring_addr(svq, &svq_addr); + + needle.translated_addr = svq_addr.desc_user_addr; +- ok = vhost_vdpa_svq_unmap_ring(v, &needle); +- if (unlikely(!ok)) { +- return false; +- } ++ vhost_vdpa_svq_unmap_ring(v, &needle); + + needle.translated_addr = svq_addr.used_user_addr; +- return vhost_vdpa_svq_unmap_ring(v, &needle); ++ vhost_vdpa_svq_unmap_ring(v, &needle); + } + + /** +@@ -1093,26 +1088,22 @@ err: + return false; + } + +-static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) ++static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; + + if (!v->shadow_vqs) { +- return true; ++ return; + } + + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); +- bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); +- if (unlikely(!ok)) { +- return false; +- } ++ vhost_vdpa_svq_unmap_rings(dev, svq); + } + + if (v->migration_blocker) { + migrate_del_blocker(v->migration_blocker); + } +- return true; + } + + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) +@@ -1129,10 +1120,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + } + vhost_vdpa_set_vring_ready(dev); + } else { +- ok = vhost_vdpa_svqs_stop(dev); +- if (unlikely(!ok)) { +- return -1; +- } ++ vhost_vdpa_svqs_stop(dev); + vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); + } + +-- +2.31.1 + diff --git a/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch b/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch new file mode 100644 index 0000000..7cdf05c --- /dev/null +++ b/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch @@ -0,0 +1,251 @@ +From 70c72316c26e95cd18b4d46b83e78ba3a148212c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:33 +0200 +Subject: [PATCH 18/23] vdpa: Move command buffers map to start of net device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [17/21] 7a9824fa618f5c2904648b50e3078474cd3987aa (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +As this series will reuse them to restore the device state at the end of +a migration (or a device start), let's allocate only once at the device +start so we don't duplicate their map and unmap. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit d7d73dec14cebcebd8de774424795aeb821236c1) +--- + net/vhost-vdpa.c | 123 ++++++++++++++++++++++------------------------- + 1 file changed, 58 insertions(+), 65 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 03e4cf1abc..17626feb8d 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -263,29 +263,20 @@ static size_t vhost_vdpa_net_cvq_cmd_page_len(void) + return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size); + } + +-/** Copy and map a guest buffer. */ +-static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, +- const struct iovec *out_data, +- size_t out_num, size_t data_len, void *buf, +- size_t *written, bool write) ++/** Map CVQ buffer. */ ++static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, ++ bool write) + { + DMAMap map = {}; + int r; + +- if (unlikely(!data_len)) { +- qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", +- __func__, write ? "in" : "out"); +- return false; +- } +- +- *written = iov_to_buf(out_data, out_num, 0, buf, data_len); + map.translated_addr = (hwaddr)(uintptr_t)buf; +- map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; ++ map.size = size - 1; + map.perm = write ? IOMMU_RW : IOMMU_RO, + r = vhost_iova_tree_map_alloc(v->iova_tree, &map); + if (unlikely(r != IOVA_OK)) { + error_report("Cannot map injected element"); +- return false; ++ return r; + } + + r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, +@@ -294,50 +285,58 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, + goto dma_map_err; + } + +- return true; ++ return 0; + + dma_map_err: + vhost_iova_tree_remove(v->iova_tree, map); +- return false; ++ return r; + } + +-/** +- * Copy the guest element into a dedicated buffer suitable to be sent to NIC +- * +- * @iov: [0] is the out buffer, [1] is the in one +- */ +-static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, +- VirtQueueElement *elem, +- struct iovec *iov) ++static int vhost_vdpa_net_cvq_start(NetClientState *nc) + { +- size_t in_copied; +- bool ok; ++ VhostVDPAState *s; ++ int r; + +- iov[0].iov_base = s->cvq_cmd_out_buffer; +- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, +- vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, +- &iov[0].iov_len, false); +- if (unlikely(!ok)) { +- return false; ++ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); ++ ++ s = DO_UPCAST(VhostVDPAState, nc, nc); ++ if (!s->vhost_vdpa.shadow_vqs_enabled) { ++ return 0; + } + +- iov[1].iov_base = s->cvq_cmd_in_buffer; +- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, +- sizeof(virtio_net_ctrl_ack), iov[1].iov_base, +- &in_copied, true); +- if (unlikely(!ok)) { ++ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, ++ vhost_vdpa_net_cvq_cmd_page_len(), false); ++ if (unlikely(r < 0)) { ++ return r; ++ } ++ ++ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer, ++ vhost_vdpa_net_cvq_cmd_page_len(), true); ++ if (unlikely(r < 0)) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); +- return false; + } + +- iov[1].iov_len = sizeof(virtio_net_ctrl_ack); +- return true; ++ return r; ++} ++ ++static void vhost_vdpa_net_cvq_stop(NetClientState *nc) ++{ ++ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ ++ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); ++ ++ if (s->vhost_vdpa.shadow_vqs_enabled) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer); ++ } + } + + static NetClientInfo net_vhost_vdpa_cvq_info = { + .type = NET_CLIENT_DRIVER_VHOST_VDPA, + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, ++ .start = vhost_vdpa_net_cvq_start, ++ .stop = vhost_vdpa_net_cvq_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, + .has_ufo = vhost_vdpa_has_ufo, +@@ -348,19 +347,17 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { + * Do not forward commands not supported by SVQ. Otherwise, the device could + * accept it and qemu would not know how to update the device model. + */ +-static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, +- size_t out_num) ++static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) + { + struct virtio_net_ctrl_hdr ctrl; +- size_t n; + +- n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); +- if (unlikely(n < sizeof(ctrl))) { ++ if (unlikely(len < sizeof(ctrl))) { + qemu_log_mask(LOG_GUEST_ERROR, +- "%s: invalid legnth of out buffer %zu\n", __func__, n); ++ "%s: invalid legnth of out buffer %zu\n", __func__, len); + return false; + } + ++ memcpy(&ctrl, out_buf, sizeof(ctrl)); + switch (ctrl.class) { + case VIRTIO_NET_CTRL_MAC: + switch (ctrl.cmd) { +@@ -392,10 +389,14 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + VhostVDPAState *s = opaque; + size_t in_len, dev_written; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; +- /* out and in buffers sent to the device */ +- struct iovec dev_buffers[2] = { +- { .iov_base = s->cvq_cmd_out_buffer }, +- { .iov_base = s->cvq_cmd_in_buffer }, ++ /* Out buffer sent to both the vdpa device and the device model */ ++ struct iovec out = { ++ .iov_base = s->cvq_cmd_out_buffer, ++ }; ++ /* In buffer sent to the device */ ++ const struct iovec dev_in = { ++ .iov_base = s->cvq_cmd_in_buffer, ++ .iov_len = sizeof(virtio_net_ctrl_ack), + }; + /* in buffer used for device model */ + const struct iovec in = { +@@ -405,17 +406,15 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + int r = -EINVAL; + bool ok; + +- ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); +- if (unlikely(!ok)) { +- goto out; +- } +- +- ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); ++ out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, ++ s->cvq_cmd_out_buffer, ++ vhost_vdpa_net_cvq_cmd_len()); ++ ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len); + if (unlikely(!ok)) { + goto out; + } + +- r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); ++ r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); + if (unlikely(r != 0)) { + if (unlikely(r == -ENOSPC)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", +@@ -435,13 +434,13 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + goto out; + } + +- memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); ++ memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); + if (status != VIRTIO_NET_OK) { + goto out; + } + + status = VIRTIO_NET_ERR; +- virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); ++ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1); + if (status != VIRTIO_NET_OK) { + error_report("Bad CVQ processing in model"); + } +@@ -454,12 +453,6 @@ out: + } + vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); + g_free(elem); +- if (dev_buffers[0].iov_base) { +- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); +- } +- if (dev_buffers[1].iov_base) { +- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); +- } + return r; + } + +-- +2.31.1 + diff --git a/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch b/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch new file mode 100644 index 0000000..b23d64f --- /dev/null +++ b/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch @@ -0,0 +1,49 @@ +From 51c1e9cf1612727ec4c6e795576ae8fa0c0b2d4c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:05 +0200 +Subject: [PATCH 07/23] vdpa: Remove SVQ vring from iova_tree at shutdown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/21] f72e67b9c90103151cbf86bff53e8f14b30f0e5b (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +Although the device will be reset before usage, the right thing to do is +to clean it. + +Reported-by: Lei Yang +Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") +Signed-off-by: Eugenio Pérez +Signed-off-by: Jason Wang +(cherry picked from commit 0c45fa6c420ec3a1dd9ea9c40fa11bd943bb3be9) +--- + hw/virtio/vhost-vdpa.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 39aa70f52d..e5c264fb29 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -896,6 +896,12 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + + size = ROUND_UP(result->size, qemu_real_host_page_size); + r = vhost_vdpa_dma_unmap(v, result->iova, size); ++ if (unlikely(r < 0)) { ++ error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); ++ return false; ++ } ++ ++ vhost_iova_tree_remove(v->iova_tree, *result); + return r == 0; + } + +-- +2.31.1 + diff --git a/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch b/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch new file mode 100644 index 0000000..98697cb --- /dev/null +++ b/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch @@ -0,0 +1,48 @@ +From edde0b6a805085255bccc0ccdc3b9b6f81cef37b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:02 +0200 +Subject: [PATCH 03/23] vdpa: Skip the maps not in the iova tree +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/21] 73acd16375a17cdf4c58830386541dd3a1b18bf7 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +Next patch will skip the registering of dma maps that the vdpa device +rejects in the iova tree. We need to consider that here or we cause a +SIGSEGV accessing result. + +Reported-by: Lei Yang +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit a92ca0ffee5858636432a6059eb2790df1c9c77f) +--- + hw/virtio/vhost-vdpa.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 96334ab5b6..aa7765c6bc 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -287,6 +287,10 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + }; + + result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); ++ if (!result) { ++ /* The memory listener map wasn't mapped */ ++ return; ++ } + iova = result->iova; + vhost_iova_tree_remove(v->iova_tree, result); + } +-- +2.31.1 + diff --git a/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch b/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch new file mode 100644 index 0000000..8398415 --- /dev/null +++ b/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch @@ -0,0 +1,79 @@ +From 89a67e0ce3e4c7b9f9b2d4cfb9fc5eeebc5643ac Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:08 +0200 +Subject: [PATCH 10/23] vdpa: Use ring hwaddr at vhost_vdpa_svq_unmap_ring +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/21] 4420134d7be60fa8b04dc9a56566524bf8daddd4 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +Reduce code duplication. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 5a92452da95b2edfbffdd42ddc2612a7d09a5db0) +--- + hw/virtio/vhost-vdpa.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 8eddf39f2a..0bea1e1eb9 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -882,10 +882,12 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + /** + * Unmap a SVQ area in the device + */ +-static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, +- const DMAMap *needle) ++static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) + { +- const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); ++ const DMAMap needle = { ++ .translated_addr = addr, ++ }; ++ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); + hwaddr size; + int r; + +@@ -907,17 +909,14 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, + const VhostShadowVirtqueue *svq) + { +- DMAMap needle = {}; + struct vhost_vdpa *v = dev->opaque; + struct vhost_vring_addr svq_addr; + + vhost_svq_get_vring_addr(svq, &svq_addr); + +- needle.translated_addr = svq_addr.desc_user_addr; +- vhost_vdpa_svq_unmap_ring(v, &needle); ++ vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); + +- needle.translated_addr = svq_addr.used_user_addr; +- vhost_vdpa_svq_unmap_ring(v, &needle); ++ vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); + } + + /** +@@ -995,7 +994,7 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, + ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); + if (unlikely(!ok)) { + error_prepend(errp, "Cannot create vq device region: "); +- vhost_vdpa_svq_unmap_ring(v, &driver_region); ++ vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); + } + addr->used_user_addr = device_region.iova; + +-- +2.31.1 + diff --git a/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch b/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch new file mode 100644 index 0000000..e1da31d --- /dev/null +++ b/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch @@ -0,0 +1,62 @@ +From f92b0ef80b4889ae0beb0b2a026ec3892d576d79 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:32 +0200 +Subject: [PATCH 17/23] vdpa: add net_vhost_vdpa_cvq_info NetClientInfo +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [16/21] c80c9fd89e81fc389e7d02e9d764331ab9fc7a0a (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +Next patches will add a new info callback to restore NIC status through +CVQ. Since only the CVQ vhost device is needed, create it with a new +NetClientInfo. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 9d379453404303069f93f9b8163ae3805bcd8c2e) +--- + net/vhost-vdpa.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index ba65736f83..03e4cf1abc 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -334,6 +334,16 @@ static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, + return true; + } + ++static NetClientInfo net_vhost_vdpa_cvq_info = { ++ .type = NET_CLIENT_DRIVER_VHOST_VDPA, ++ .size = sizeof(VhostVDPAState), ++ .receive = vhost_vdpa_receive, ++ .cleanup = vhost_vdpa_cleanup, ++ .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, ++ .has_ufo = vhost_vdpa_has_ufo, ++ .check_peer_type = vhost_vdpa_check_peer_type, ++}; ++ + /** + * Do not forward commands not supported by SVQ. Otherwise, the device could + * accept it and qemu would not know how to update the device model. +@@ -475,7 +485,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, + name); + } else { +- nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer, ++ nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, + device, name); + } + snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); +-- +2.31.1 + diff --git a/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch b/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch new file mode 100644 index 0000000..8c66f19 --- /dev/null +++ b/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch @@ -0,0 +1,83 @@ +From 6d16102aca24bab16c846fe6457071f4466b8e35 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:03 +0200 +Subject: [PATCH 04/23] vdpa: do not save failed dma maps in SVQ iova tree +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/21] f9bea39f7fa14c5ef0f85774cbad0ca3b52c4498 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +If a map fails for whatever reason, it must not be saved in the tree. +Otherwise, qemu will try to unmap it in cleanup, leaving to more errors. + +Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") +Reported-by: Lei Yang +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 6cc2ec65382fde205511ac00a324995ce6ee8f28) +--- + hw/virtio/vhost-vdpa.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index aa7765c6bc..cc15b7d8ee 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -174,6 +174,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) + static void vhost_vdpa_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { ++ DMAMap mem_region = {}; + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + hwaddr iova; + Int128 llend, llsize; +@@ -210,13 +211,13 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + + llsize = int128_sub(llend, int128_make64(iova)); + if (v->shadow_vqs_enabled) { +- DMAMap mem_region = { +- .translated_addr = (hwaddr)(uintptr_t)vaddr, +- .size = int128_get64(llsize) - 1, +- .perm = IOMMU_ACCESS_FLAG(true, section->readonly), +- }; ++ int r; + +- int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); ++ mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, ++ mem_region.size = int128_get64(llsize) - 1, ++ mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), ++ ++ r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); + if (unlikely(r != IOVA_OK)) { + error_report("Can't allocate a mapping (%d)", r); + goto fail; +@@ -230,11 +231,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + vaddr, section->readonly); + if (ret) { + error_report("vhost vdpa map fail!"); +- goto fail; ++ goto fail_map; + } + + return; + ++fail_map: ++ if (v->shadow_vqs_enabled) { ++ vhost_iova_tree_remove(v->iova_tree, &mem_region); ++ } ++ + fail: + /* + * On the initfn path, store the first error in the container so we +-- +2.31.1 + diff --git a/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch b/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch new file mode 100644 index 0000000..3cc011f --- /dev/null +++ b/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch @@ -0,0 +1,153 @@ +From 56f4bebc591893e590481617da7cd7ecffeb166d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:34 +0200 +Subject: [PATCH 19/23] vdpa: extract vhost_vdpa_net_cvq_add from + vhost_vdpa_net_handle_ctrl_avail +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [18/21] 08ab71dbf050f5c2e97c622d1915f71a56c135b8 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +So we can reuse it to inject state messages. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +-- +v7: +* Remove double free error + +v6: +* Do not assume in buffer sent to the device is sizeof(virtio_net_ctrl_ack) + +v5: +* Do not use an artificial !NULL VirtQueueElement +* Use only out size instead of iovec dev_buffers for these functions. + +Signed-off-by: Jason Wang +(cherry picked from commit d9afb1f0ee4d662ed67d3bc1220b943f7e4cfa6f) +--- + net/vhost-vdpa.c | 59 +++++++++++++++++++++++++++++++----------------- + 1 file changed, 38 insertions(+), 21 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 17626feb8d..f09f044ec1 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -331,6 +331,38 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) + } + } + ++static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, ++ size_t in_len) ++{ ++ /* Buffers for the device */ ++ const struct iovec out = { ++ .iov_base = s->cvq_cmd_out_buffer, ++ .iov_len = out_len, ++ }; ++ const struct iovec in = { ++ .iov_base = s->cvq_cmd_in_buffer, ++ .iov_len = sizeof(virtio_net_ctrl_ack), ++ }; ++ VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); ++ int r; ++ ++ r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); ++ if (unlikely(r != 0)) { ++ if (unlikely(r == -ENOSPC)) { ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", ++ __func__); ++ } ++ return r; ++ } ++ ++ /* ++ * We can poll here since we've had BQL from the time we sent the ++ * descriptor. Also, we need to take the answer before SVQ pulls by itself, ++ * when BQL is released ++ */ ++ return vhost_svq_poll(svq); ++} ++ + static NetClientInfo net_vhost_vdpa_cvq_info = { + .type = NET_CLIENT_DRIVER_VHOST_VDPA, + .size = sizeof(VhostVDPAState), +@@ -387,23 +419,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + void *opaque) + { + VhostVDPAState *s = opaque; +- size_t in_len, dev_written; ++ size_t in_len; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + /* Out buffer sent to both the vdpa device and the device model */ + struct iovec out = { + .iov_base = s->cvq_cmd_out_buffer, + }; +- /* In buffer sent to the device */ +- const struct iovec dev_in = { +- .iov_base = s->cvq_cmd_in_buffer, +- .iov_len = sizeof(virtio_net_ctrl_ack), +- }; + /* in buffer used for device model */ + const struct iovec in = { + .iov_base = &status, + .iov_len = sizeof(status), + }; +- int r = -EINVAL; ++ ssize_t dev_written = -EINVAL; + bool ok; + + out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, +@@ -414,21 +441,11 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + goto out; + } + +- r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); +- if (unlikely(r != 0)) { +- if (unlikely(r == -ENOSPC)) { +- qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", +- __func__); +- } ++ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); ++ if (unlikely(dev_written < 0)) { + goto out; + } + +- /* +- * We can poll here since we've had BQL from the time we sent the +- * descriptor. Also, we need to take the answer before SVQ pulls by itself, +- * when BQL is released +- */ +- dev_written = vhost_svq_poll(svq); + if (unlikely(dev_written < sizeof(status))) { + error_report("Insufficient written data (%zu)", dev_written); + goto out; +@@ -436,7 +453,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + + memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); + if (status != VIRTIO_NET_OK) { +- goto out; ++ return VIRTIO_NET_ERR; + } + + status = VIRTIO_NET_ERR; +@@ -453,7 +470,7 @@ out: + } + vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); + g_free(elem); +- return r; ++ return dev_written < 0 ? dev_written : 0; + } + + static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { +-- +2.31.1 + diff --git a/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch b/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch new file mode 100644 index 0000000..9b6155b --- /dev/null +++ b/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch @@ -0,0 +1,67 @@ +From 6cde15c70c86819033337771eb522e94e3ea9e34 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:07 +0200 +Subject: [PATCH 09/23] vhost: Always store new kick fd on + vhost_svq_set_svq_kick_fd +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/21] a09b8851c39d7cea67414560f6d322e988b9d59a (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +We can unbind twice a file descriptor if we call twice +vhost_svq_set_svq_kick_fd because of this. Since it comes from vhost and +not from SVQ, that file descriptor could be a different thing that +guest's vhost notifier. + +Likewise, it can happens the same if a guest start and stop the device +multiple times. + +Reported-by: Lei Yang +Fixes: dff4426fa6 ("vhost: Add Shadow VirtQueue kick forwarding capabilities") +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 6867f29c1425add7e0e8d1d8d58cc0ffbb8df0e4) +--- + hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index e53aac45f6..f420311b89 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -602,13 +602,13 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) + event_notifier_set_handler(svq_kick, NULL); + } + ++ event_notifier_init_fd(svq_kick, svq_kick_fd); + /* + * event_notifier_set_handler already checks for guest's notifications if + * they arrive at the new file descriptor in the switch, so there is no + * need to explicitly check for them. + */ + if (poll_start) { +- event_notifier_init_fd(svq_kick, svq_kick_fd); + event_notifier_set(svq_kick); + event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); + } +@@ -655,7 +655,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + */ + void vhost_svq_stop(VhostShadowVirtqueue *svq) + { +- event_notifier_set_handler(&svq->svq_kick, NULL); ++ vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND); + g_autofree VirtQueueElement *next_avail_elem = NULL; + + if (!svq->vq) { +-- +2.31.1 + diff --git a/kvm-vhost-Delete-useless-read-memory-barrier.patch b/kvm-vhost-Delete-useless-read-memory-barrier.patch new file mode 100644 index 0000000..f5aad51 --- /dev/null +++ b/kvm-vhost-Delete-useless-read-memory-barrier.patch @@ -0,0 +1,47 @@ +From 773d1bb4e9ea9ca704372e52569955937f91f15c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:28 +0200 +Subject: [PATCH 13/23] vhost: Delete useless read memory barrier +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/21] 0e238fe934b1fc2c7e10b6f693468bc25ea3243f (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +As discussed in previous series [1], this memory barrier is useless with +the atomic read of used idx at vhost_svq_more_used. Deleting it. + +[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg02616.html + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit cdfb1612ba0f9b76367c96ce26ba94fedc7a0e61) +--- + hw/virtio/vhost-shadow-virtqueue.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 7792f3db1d..d36afbc547 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -509,9 +509,6 @@ size_t vhost_svq_poll(VhostShadowVirtqueue *svq) + if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { + return 0; + } +- +- /* Make sure we read new used_idx */ +- smp_rmb(); + } while (true); + } + +-- +2.31.1 + diff --git a/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch b/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch new file mode 100644 index 0000000..81ed89e --- /dev/null +++ b/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch @@ -0,0 +1,63 @@ +From 2f134d800a7ac521a637a0da2116b2603b12c8c0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:29 +0200 +Subject: [PATCH 14/23] vhost: Do not depend on !NULL VirtQueueElement on + vhost_svq_flush +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/21] 93ec7baa2a29031db25d86b7dc1a949388623370 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +Since QEMU will be able to inject new elements on CVQ to restore the +state, we need not to depend on a VirtQueueElement to know if a new +element has been used by the device or not. Instead of check that, check +if there are new elements only using used idx on vhost_svq_flush. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 7599f71c11c08b90f173c35ded1aaa1fdca86f1b) +--- + hw/virtio/vhost-shadow-virtqueue.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index d36afbc547..c0e3c92e96 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -499,17 +499,20 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, + size_t vhost_svq_poll(VhostShadowVirtqueue *svq) + { + int64_t start_us = g_get_monotonic_time(); ++ uint32_t len; ++ + do { +- uint32_t len; +- VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); +- if (elem) { +- return len; ++ if (vhost_svq_more_used(svq)) { ++ break; + } + + if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { + return 0; + } + } while (true); ++ ++ vhost_svq_get_buf(svq, &len); ++ return len; + } + + /** +-- +2.31.1 + diff --git a/kvm-vhost-Get-vring-base-from-vq-not-svq.patch b/kvm-vhost-Get-vring-base-from-vq-not-svq.patch new file mode 100644 index 0000000..1c8e586 --- /dev/null +++ b/kvm-vhost-Get-vring-base-from-vq-not-svq.patch @@ -0,0 +1,87 @@ +From 3f2ba7cce6b272a8b5c8953e8923e799e4aa7b88 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Mon, 18 Jul 2022 14:05:45 +0200 +Subject: [PATCH 02/23] vhost: Get vring base from vq, not svq +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/21] e7e0294bbc98f69ccdbc4af4715857e77b017f80 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: Merged + +The SVQ vring used idx usually match with the guest visible one, as long +as all the guest buffers (GPA) maps to exactly one buffer within qemu's +VA. However, as we can see in virtqueue_map_desc, a single guest buffer +could map to many buffers in SVQ vring. + +Also, its also a mistake to rewind them at the source of migration. +Since VirtQueue is able to migrate the inflight descriptors, its +responsability of the destination to perform the rewind just in case it +cannot report the inflight descriptors to the device. + +This makes easier to migrate between backends or to recover them in +vhost devices that support set in flight descriptors. + +Fixes: 6d0b22266633 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ") +Signed-off-by: Eugenio Pérez +Signed-off-by: Jason Wang +(cherry picked from commit 2fdac348fd3d243bb964937236af3cc27ae7af2b) +--- + hw/virtio/vhost-vdpa.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 03dc6014b0..96334ab5b6 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1177,7 +1177,18 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) + { + struct vhost_vdpa *v = dev->opaque; ++ VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index); + ++ /* ++ * vhost-vdpa devices does not support in-flight requests. Set all of them ++ * as available. ++ * ++ * TODO: This is ok for networking, but other kinds of devices might ++ * have problems with these retransmissions. ++ */ ++ while (virtqueue_rewind(vq, 1)) { ++ continue; ++ } + if (v->shadow_vqs_enabled) { + /* + * Device vring base was set at device start. SVQ base is handled by +@@ -1193,21 +1204,10 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) + { + struct vhost_vdpa *v = dev->opaque; +- int vdpa_idx = ring->index - dev->vq_index; + int ret; + + if (v->shadow_vqs_enabled) { +- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); +- +- /* +- * Setting base as last used idx, so destination will see as available +- * all the entries that the device did not use, including the in-flight +- * processing ones. +- * +- * TODO: This is ok for networking, but other kinds of devices might +- * have problems with these retransmissions. +- */ +- ring->num = svq->last_used_idx; ++ ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index); + return 0; + } + +-- +2.31.1 + diff --git a/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch b/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch new file mode 100644 index 0000000..7125f6a --- /dev/null +++ b/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch @@ -0,0 +1,80 @@ +From 45305ab202fa2191962152e5a501a9a13e31a0b2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:26 +0200 +Subject: [PATCH 11/23] vhost: stop transfer elem ownership in + vhost_handle_guest_kick +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/21] 697a5c0ad59efe27abf447f7965091993bc39756 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +It was easier to allow vhost_svq_add to handle the memory. Now that we +will allow qemu to add elements to a SVQ without the guest's knowledge, +it's better to handle it in the caller. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit eb42df8bb2c92a7313343d97409cd99ccba25b25) +--- + hw/virtio/vhost-shadow-virtqueue.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index f420311b89..2ae47d90a1 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -233,9 +233,6 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + /** + * Add an element to a SVQ. + * +- * The caller must check that there is enough slots for the new element. It +- * takes ownership of the element: In case of failure not ENOSPC, it is free. +- * + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ + int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, +@@ -252,7 +249,6 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + + ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); + if (unlikely(!ok)) { +- g_free(elem); + return -EINVAL; + } + +@@ -293,7 +289,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + virtio_queue_set_notification(svq->vq, false); + + while (true) { +- VirtQueueElement *elem; ++ g_autofree VirtQueueElement *elem; + int r; + + if (svq->next_guest_avail_elem) { +@@ -324,12 +320,14 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + * queue the current guest descriptor and ignore kicks + * until some elements are used. + */ +- svq->next_guest_avail_elem = elem; ++ svq->next_guest_avail_elem = g_steal_pointer(&elem); + } + + /* VQ is full or broken, just return and ignore kicks */ + return; + } ++ /* elem belongs to SVQ or external caller now */ ++ elem = NULL; + } + + virtio_queue_set_notification(svq->vq, true); +-- +2.31.1 + diff --git a/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch b/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch new file mode 100644 index 0000000..b908739 --- /dev/null +++ b/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch @@ -0,0 +1,55 @@ +From 78b7d9af26ae802b3ca0d7b794b366ab4d515647 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:27 +0200 +Subject: [PATCH 12/23] vhost: use SVQ element ndescs instead of opaque data + for desc validation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/21] 536ba65ff7241c4dc66362294ba8de4354260d6f (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +Since we're going to allow SVQ to add elements without the guest's +knowledge and without its own VirtQueueElement, it's easier to check if +an element is a valid head checking a different thing than the +VirtQueueElement. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 70e0841722deb363b53cdcd465af12a0d1461b60) +--- + hw/virtio/vhost-shadow-virtqueue.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 2ae47d90a1..7792f3db1d 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -414,7 +414,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- if (unlikely(!svq->desc_state[used_elem.id].elem)) { ++ if (unlikely(!svq->desc_state[used_elem.id].ndescs)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s says index %u is used, but it was not available", + svq->vdev->name, used_elem.id); +@@ -422,6 +422,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + } + + num = svq->desc_state[used_elem.id].ndescs; ++ svq->desc_state[used_elem.id].ndescs = 0; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; +-- +2.31.1 + diff --git a/kvm-vhost_net-Add-NetClientInfo-start-callback.patch b/kvm-vhost_net-Add-NetClientInfo-start-callback.patch new file mode 100644 index 0000000..40bf5f6 --- /dev/null +++ b/kvm-vhost_net-Add-NetClientInfo-start-callback.patch @@ -0,0 +1,73 @@ +From 6a6999311742b6dccdfce09f30742a63d72d1bd7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:30 +0200 +Subject: [PATCH 15/23] vhost_net: Add NetClientInfo start callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [14/21] df6a96ae3aec02ecae793bdbd8e9c2fcfac7871a (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +This is used by the backend to perform actions before the device is +started. + +In particular, vdpa net use it to map CVQ buffers to the device, so it +can send control commands using them. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 80bda0e674fd0b439ac627ab7ecdbd4a1b46d525) +--- + hw/net/vhost_net.c | 7 +++++++ + include/net/net.h | 2 ++ + 2 files changed, 9 insertions(+) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index d6d7c51f62..1005f9d8e6 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -244,6 +244,13 @@ static int vhost_net_start_one(struct vhost_net *net, + struct vhost_vring_file file = { }; + int r; + ++ if (net->nc->info->start) { ++ r = net->nc->info->start(net->nc); ++ if (r < 0) { ++ return r; ++ } ++ } ++ + r = vhost_dev_enable_notifiers(&net->dev, dev); + if (r < 0) { + goto fail_notifiers; +diff --git a/include/net/net.h b/include/net/net.h +index 523136c7ac..ad9e80083a 100644 +--- a/include/net/net.h ++++ b/include/net/net.h +@@ -44,6 +44,7 @@ typedef struct NICConf { + + typedef void (NetPoll)(NetClientState *, bool enable); + typedef bool (NetCanReceive)(NetClientState *); ++typedef int (NetStart)(NetClientState *); + typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); + typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); + typedef void (NetCleanup) (NetClientState *); +@@ -71,6 +72,7 @@ typedef struct NetClientInfo { + NetReceive *receive_raw; + NetReceiveIOV *receive_iov; + NetCanReceive *can_receive; ++ NetStart *start; + NetCleanup *cleanup; + LinkStatusChanged *link_status_changed; + QueryRxFilter *query_rx_filter; +-- +2.31.1 + diff --git a/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch b/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch new file mode 100644 index 0000000..c622824 --- /dev/null +++ b/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch @@ -0,0 +1,68 @@ +From effd0ed379deb43bb850f1aeff24fa85935d7f52 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:31 +0200 +Subject: [PATCH 16/23] vhost_net: Add NetClientInfo stop callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [15/21] 9f8a3e9bfb0d21fa0479f54a7a17cb738aa46359 (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +Used by the backend to perform actions after the device is stopped. + +In particular, vdpa net use it to unmap CVQ buffers to the device, +cleaning the actions performed in prepare(). + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit c6544e2331d721627fa7356da3592bcb46340f1b) +--- + hw/net/vhost_net.c | 3 +++ + include/net/net.h | 2 ++ + 2 files changed, 5 insertions(+) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 1005f9d8e6..275ece5324 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -320,6 +320,9 @@ static void vhost_net_stop_one(struct vhost_net *net, + net->nc->info->poll(net->nc, true); + } + vhost_dev_stop(&net->dev, dev); ++ if (net->nc->info->stop) { ++ net->nc->info->stop(net->nc); ++ } + vhost_dev_disable_notifiers(&net->dev, dev); + } + +diff --git a/include/net/net.h b/include/net/net.h +index ad9e80083a..476ad45b9a 100644 +--- a/include/net/net.h ++++ b/include/net/net.h +@@ -45,6 +45,7 @@ typedef struct NICConf { + typedef void (NetPoll)(NetClientState *, bool enable); + typedef bool (NetCanReceive)(NetClientState *); + typedef int (NetStart)(NetClientState *); ++typedef void (NetStop)(NetClientState *); + typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); + typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); + typedef void (NetCleanup) (NetClientState *); +@@ -73,6 +74,7 @@ typedef struct NetClientInfo { + NetReceiveIOV *receive_iov; + NetCanReceive *can_receive; + NetStart *start; ++ NetStop *stop; + NetCleanup *cleanup; + LinkStatusChanged *link_status_changed; + QueryRxFilter *query_rx_filter; +-- +2.31.1 + diff --git a/kvm-vhost_net-add-NetClientState-load-callback.patch b/kvm-vhost_net-add-NetClientState-load-callback.patch new file mode 100644 index 0000000..92a9078 --- /dev/null +++ b/kvm-vhost_net-add-NetClientState-load-callback.patch @@ -0,0 +1,73 @@ +From 6a5c236b95ce475c556ccd92c2135ad48474e8fb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:35 +0200 +Subject: [PATCH 20/23] vhost_net: add NetClientState->load() callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 116: vdpa: Restore device state on destination +RH-Bugzilla: 2114060 +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [19/21] 439b4133a757b2f1c5f4a1441eca25329896491a (eperezmartin/qemu-kvm) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 +Upstream status: git@github.com:jasowang/qemu.git net-next + +It allows per-net client operations right after device's successful +start. In particular, to load the device status. + +Vhost-vdpa net will use it to add the CVQ buffers to restore the device +status. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 302f3d20e68a8a120d431f7ff7cb02a75917f54c) +--- + hw/net/vhost_net.c | 7 +++++++ + include/net/net.h | 2 ++ + 2 files changed, 9 insertions(+) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 275ece5324..ea3a8be1c9 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -281,6 +281,13 @@ static int vhost_net_start_one(struct vhost_net *net, + } + } + } ++ ++ if (net->nc->info->load) { ++ r = net->nc->info->load(net->nc); ++ if (r < 0) { ++ goto fail; ++ } ++ } + return 0; + fail: + file.fd = -1; +diff --git a/include/net/net.h b/include/net/net.h +index 476ad45b9a..81d0b21def 100644 +--- a/include/net/net.h ++++ b/include/net/net.h +@@ -45,6 +45,7 @@ typedef struct NICConf { + typedef void (NetPoll)(NetClientState *, bool enable); + typedef bool (NetCanReceive)(NetClientState *); + typedef int (NetStart)(NetClientState *); ++typedef int (NetLoad)(NetClientState *); + typedef void (NetStop)(NetClientState *); + typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); + typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); +@@ -74,6 +75,7 @@ typedef struct NetClientInfo { + NetReceiveIOV *receive_iov; + NetCanReceive *can_receive; + NetStart *start; ++ NetLoad *load; + NetStop *stop; + NetCleanup *cleanup; + LinkStatusChanged *link_status_changed; +-- +2.31.1 + diff --git a/kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch b/kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch new file mode 100644 index 0000000..8f1fb3e --- /dev/null +++ b/kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch @@ -0,0 +1,117 @@ +From cbcab5ed1686fddeb2c6adb3a3f6ed0678a36e71 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 8 Aug 2022 12:21:34 -0400 +Subject: [PATCH 23/23] virtio-scsi: fix race in virtio_scsi_dataplane_start() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 211: virtio-scsi: fix race in virtio_scsi_dataplane_start() (RHEL src-git) +RH-Commit: [1/1] 2d4964d8863e259326a73fb918fa2f5f63b4a60a +RH-Bugzilla: 2099541 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz +RH-Acked-by: Paolo Bonzini + +As soon as virtio_scsi_data_plane_start() attaches host notifiers the +IOThread may start virtqueue processing. There is a race between +IOThread virtqueue processing and virtio_scsi_data_plane_start() because +it only assigns s->dataplane_started after attaching host notifiers. + +When a virtqueue handler function in the IOThread calls +virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and +attempt to start dataplane even though we're already in the IOThread: + + #0 0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c) + #1 0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56) + #2 0x00007f67b358e833 abort (libc.so.6 + 0x28833) + #3 0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b) + #4 0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6) + #5 0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b) + #6 0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811) + #7 0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836) + #8 0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e) + #9 0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62) + #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610) + #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a) + #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902) + #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e) + #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761) + #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052) + #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a) + +This patch assigns s->dataplane_started before attaching host notifiers +so that virtqueue handler functions that run in the IOThread before +virtio_scsi_data_plane_start() returns correctly identify that dataplane +does not need to be started. This fix is taken from the virtio-blk +dataplane code and it's worth adding a comment in virtio-blk as well to +explain why it works. + +Note that s->dataplane_started does not need the AioContext lock because +it is set before attaching host notifiers and cleared after detaching +host notifiers. In other words, the IOThread always sees the value true +and the main loop thread does not modify it while the IOThread is +active. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541 +Reported-by: Qing Wang +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220808162134.240405-1-stefanha@redhat.com> +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9a4b6a63aee885931622549c85669dcca03bed39) +Signed-off-by: Stefan Hajnoczi +Signed-off-by: Miroslav Rezanina +--- + hw/block/dataplane/virtio-blk.c | 5 +++++ + hw/scsi/virtio-scsi-dataplane.c | 11 ++++++++--- + 2 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 49276e46f2..26f965cabc 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -219,6 +219,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + + memory_region_transaction_commit(); + ++ /* ++ * These fields are visible to the IOThread so we rely on implicit barriers ++ * in aio_context_acquire() on the write side and aio_notify_accept() on ++ * the read side. ++ */ + s->starting = false; + vblk->dataplane_started = true; + trace_virtio_blk_data_plane_start(s); +diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c +index 8bb6e6acfc..20bb91766e 100644 +--- a/hw/scsi/virtio-scsi-dataplane.c ++++ b/hw/scsi/virtio-scsi-dataplane.c +@@ -136,6 +136,14 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + + memory_region_transaction_commit(); + ++ /* ++ * These fields are visible to the IOThread so we rely on implicit barriers ++ * in aio_context_acquire() on the write side and aio_notify_accept() on ++ * the read side. ++ */ ++ s->dataplane_starting = false; ++ s->dataplane_started = true; ++ + aio_context_acquire(s->ctx); + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); + virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); +@@ -143,9 +151,6 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + for (i = 0; i < vs->conf.num_queues; i++) { + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); + } +- +- s->dataplane_starting = false; +- s->dataplane_started = true; + aio_context_release(s->ctx); + return 0; + +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 0c86edd..7e3e56e 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.0.0 -Release: 11%{?rcrel}%{?dist}%{?cc_suffix} +Release: 12%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -444,6 +444,52 @@ Patch144: kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch Patch145: kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch # For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu Patch146: kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch +# For bz#2120275 - Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-9.1] +Patch147: kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch148: kvm-vhost-Get-vring-base-from-vq-not-svq.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch149: kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch150: kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch151: kvm-util-Return-void-on-iova_tree_remove.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch152: kvm-util-accept-iova_tree_remove_parameter-by-value.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch153: kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch154: kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch155: kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch156: kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch157: kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch158: kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch159: kvm-vhost-Delete-useless-read-memory-barrier.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch160: kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch161: kvm-vhost_net-Add-NetClientInfo-start-callback.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch162: kvm-vhost_net-Add-NetClientInfo-stop-callback.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch163: kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch164: kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch165: kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch166: kvm-vhost_net-add-NetClientState-load-callback.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch167: kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch +# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu +Patch168: kvm-vdpa-Delete-CVQ-migration-blocker.patch +# For bz#2099541 - qemu coredump with error Assertion `qemu_mutex_iothread_locked()' failed when repeatly hotplug/unplug disks in pause status +Patch169: kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch # Source-git patches @@ -1479,6 +1525,37 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Aug 26 2022 Miroslav Rezanina - 7.0.0-12 +- kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch [bz#2120275] +- kvm-vhost-Get-vring-base-from-vq-not-svq.patch [bz#2114060] +- kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch [bz#2114060] +- kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch [bz#2114060] +- kvm-util-Return-void-on-iova_tree_remove.patch [bz#2114060] +- kvm-util-accept-iova_tree_remove_parameter-by-value.patch [bz#2114060] +- kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch [bz#2114060] +- kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch [bz#2114060] +- kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch [bz#2114060] +- kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch [bz#2114060] +- kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch [bz#2114060] +- kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch [bz#2114060] +- kvm-vhost-Delete-useless-read-memory-barrier.patch [bz#2114060] +- kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch [bz#2114060] +- kvm-vhost_net-Add-NetClientInfo-start-callback.patch [bz#2114060] +- kvm-vhost_net-Add-NetClientInfo-stop-callback.patch [bz#2114060] +- kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch [bz#2114060] +- kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch [bz#2114060] +- kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch [bz#2114060] +- kvm-vhost_net-add-NetClientState-load-callback.patch [bz#2114060] +- kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch [bz#2114060] +- kvm-vdpa-Delete-CVQ-migration-blocker.patch [bz#2114060] +- kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch [bz#2099541] +- Resolves: bz#2120275 + (Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-9.1]) +- Resolves: bz#2114060 + (vDPA state restore support through control virtqueue in Qemu) +- Resolves: bz#2099541 + (qemu coredump with error Assertion `qemu_mutex_iothread_locked()' failed when repeatly hotplug/unplug disks in pause status) + * Mon Aug 15 2022 Miroslav Rezanina - 7.0.0-11 - kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch [bz#2107466] - kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch [bz#2107466] From 91ddd1ed64f7eef6ed21642bd126f1b25a42cf98 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 5 Sep 2022 04:13:56 -0400 Subject: [PATCH 168/195] * Mon Sep 05 2022 Miroslav Rezanina - 7.1.0-1 - Rebase to QEMU 7.1.0 [bz#2111769] - Resolves: bz#2111769 (Rebase to QEMU 7.1.0) --- .gitignore | 2 + 0004-Initial-redhat-build.patch | 38 +- 0005-Re-enable-capstone-internal-build.patch | 251 +++++++++ ...0006-Enable-disable-devices-for-RHEL.patch | 147 ++--- ...Machine-type-related-general-changes.patch | 101 ++-- ...ch => 0008-Add-aarch64-machine-types.patch | 351 +++++++++++- ...atch => 0009-Add-ppc64-machine-types.patch | 58 +- ...atch => 0010-Add-s390x-machine-types.patch | 148 ++++-- ...tch => 0011-Add-x86_64-machine-types.patch | 279 +++++++--- ...heck.patch => 0012-Enable-make-check.patch | 107 +++- ...mber-of-devices-that-can-be-assigned.patch | 22 +- ...Add-support-statement-to-help-output.patch | 8 +- ...lly-limit-the-maximum-number-of-CPUs.patch | 45 -- ...documentation-instead-of-qemu-system.patch | 6 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 4 +- ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 66 --- ...on-warning-when-opening-v2-images-rw.patch | 6 +- ...ntroduce-upstream-7.0-compat-changes.patch | 116 ++++ ...oduce-RHEL-9.0.0-hw-compat-structure.patch | 135 ----- ...90x-machine-type-compatibility-for-r.patch | 38 -- ...ve-s3-s4-suspend-disabling-to-compat.patch | 70 --- ...sync-missed-zero-copy-migration-stat.patch | 87 --- kvm-Enable-virtio-iommu-pci-on-aarch64.patch | 41 -- kvm-Enable-virtio-iommu-pci-on-x86_64.patch | 41 -- ...oduce-event-loop-base-abstract-class.patch | 503 ------------------ ...lags-on-io_writev-and-introduce-io_f.patch | 420 --------------- ...et-Add-support-for-MSG_ZEROCOPY-IPV6.patch | 56 -- ...-Fix-zero-copy-flush-returning-code-.patch | 65 --- ...-Fix-zero-copy-send-so-socket-flush-.patch | 58 -- ...-Implement-io_writev-zero-copy-flag-.patch | 249 --------- ...-Introduce-assert-and-reduce-ifdefs-.patch | 82 --- ...y-AArch64-Drop-unsupported-CPU-types.patch | 237 --------- ...avocado-Switch-aarch64-tests-from-a5.patch | 95 ---- ...lly-limit-the-maximum-number-of-CPUs.patch | 58 -- ...vert-migration-Simplify-unqueue_page.patch | 134 ----- ...si-Reject-scsi-cd-if-data-plane-enab.patch | 51 -- ...aarch64-softmmu-Enable-CONFIG_VIRTIO.patch | 41 -- ...ame-qemu_coroutine_inc-dec_pool_size.patch | 101 ---- ...outine-Revert-to-constant-batch-size.patch | 138 ----- ...ontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 132 ----- ...outine-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 139 ----- ...-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 99 ---- ...d-Use-existing-CPU-topology-to-build.patch | 179 ------- ...ider-SMP-configuration-in-CPU-topolo.patch | 74 --- ...-virt-Fix-CPU-s-default-NUMA-node-ID.patch | 88 --- ...missing-initialization-in-instance-c.patch | 56 -- ...ve-the-dtb-kaslr-seed-machine-option.patch | 76 --- ...vent-end-of-track-overrun-CVE-2021-3.patch | 96 ---- ...virtio-Replace-g_memdup-by-g_memdup2.patch | 95 ---- ...08-Fix-when-missing-user_allow_other.patch | 52 -- ...-Test-new-refcount-rebuild-algorithm.patch | 445 ---------------- ...on-t-use-perror-without-useful-errno.patch | 62 --- ...n-why-max-batch-is-checked-in-laio_i.patch | 49 -- ...balanced-plugged-counter-in-laio_io_.patch | 56 -- kvm-meson-create-have_vhost_-variables.patch | 154 ------ ...ave_vhost_-variables-to-pick-sources.patch | 213 -------- ...docker-test-build-alpine-when-includ.patch | 87 --- ...migration-Add-migrate_use_tls-helper.patch | 106 ---- ...ro-copy-send-parameter-for-QMP-HMP-f.patch | 250 --------- ...migrate-recover-to-run-multiple-time.patch | 98 ---- ...false-positive-on-non-supported-scen.patch | 93 ---- ...-zero_copy_send-from-migration-param.patch | 289 ---------- kvm-migration-Fix-operator-type.patch | 47 -- ...maining-params-has_-true-in-migratio.patch | 62 --- ...d-Report-to-user-when-zerocopy-not-w.patch | 83 --- ...es-before-compressing-them-with-zlib.patch | 142 ----- ...t-zero-copy-write-in-multifd-migrati.patch | 182 ------- ...der-packet-without-flags-if-zero-cop.patch | 102 ---- ...send_sync_main-now-returns-negative-.patch | 163 ------ ...-MULTI_CONN-for-shared-writable-expo.patch | 381 ------------- ...-Fix-booting-with-logical-block-size.patch | 63 --- ...-Split-virtio-scsi-code-from-virtio_.patch | 180 ------- ...-bootmap-Improve-the-guessing-logic-.patch | 102 ---- ...-netboot.mak-Ignore-Clang-s-warnings.patch | 78 --- ...-virtio-Beautify-the-code-for-readin.patch | 56 -- ...-virtio-Introduce-a-macro-for-the-DA.patch | 63 --- ...-virtio-Read-device-config-after-fea.patch | 67 --- ...-virtio-Set-missing-status-bits-whil.patch | 93 ---- ...-virtio-blkdev-Remove-virtio_assume_.patch | 101 ---- ...-virtio-blkdev-Request-the-right-fea.patch | 63 --- ...-virtio-blkdev-Simplify-fix-virtio_i.patch | 124 ----- kvm-qapi-machine.json-Add-cluster-id.patch | 126 ----- ...d-errp-to-rebuild_refcount_structure.patch | 162 ------ ...mprove-refcount-structure-rebuilding.patch | 465 ---------------- ...ss-max-connections-to-blockdev-layer.patch | 92 ---- ...Correct-CPU-and-NUMA-association-in-.patch | 100 ---- ...Specify-CPU-topology-in-aarch64_numa.patch | 68 --- ...nux-headers-linux-kvm.h-to-v5.18-rc6.patch | 106 ---- ...c-Fix-emulated-block-limits-VPD-page.patch | 96 ---- ...m-Add-a-stub-function-for-TPM_IS_CRB.patch | 54 -- ...arget-arm-deprecate-named-CPU-models.patch | 129 ----- ...recate-CPUs-older-than-x86_64-v2-ABI.patch | 273 ---------- ...odels-Fix-ppc_cpu_aliases-list-for-R.patch | 48 -- ...-s390x-deprecate-CPUs-older-than-z14.patch | 194 ------- ...-Honor-storage-keys-during-emulation.patch | 103 ---- ...date-aarch64_virt-test-to-exercise-c.patch | 157 ------ ...test-Add-a-regression-test-for-CVE-2.patch | 119 ----- ...util-Return-void-on-iova_tree_remove.patch | 70 --- ...-iova_tree_remove_parameter-by-value.patch | 182 ------- ...base-Introduce-options-to-set-the-th.patch | 385 -------------- ...oop-Introduce-the-main-loop-into-QOM.patch | 233 -------- kvm-vdpa-Add-device-migration-blocker.patch | 106 ---- ...tio-net-mac-address-via-CVQ-at-start.patch | 87 --- ...-Add-x-svq-to-NetdevVhostVDPAOptions.patch | 223 -------- ...compiler-to-squash-reads-to-used-idx.patch | 65 --- ...ffer-CVQ-support-on-shadow-virtqueue.patch | 323 ----------- kvm-vdpa-Delete-CVQ-migration-blocker.patch | 98 ---- ...t-vhost_vdpa_dma_map-and-unmap-calls.patch | 84 --- ...-features-part-from-vhost_vdpa_get_m.patch | 108 ---- ...ex-calculus-at-vhost_vdpa_get_vring_.patch | 50 -- ...escriptor-leak-on-get-features-error.patch | 58 -- ...ex-calculus-at-vhost_vdpa_svqs_start.patch | 45 -- ...mory-listener-deletions-of-iova-tree.patch | 61 --- ...Make-SVQ-vring-unmapping-return-void.patch | 133 ----- ...d-buffers-map-to-start-of-net-device.patch | 251 --------- ...SVQ-vring-from-iova_tree-at-shutdown.patch | 49 -- ...a-Skip-the-maps-not-in-the-iova-tree.patch | 48 -- ...-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch | 79 --- ...et_vhost_vdpa_cvq_info-NetClientInfo.patch | 62 --- ...ave-failed-dma-maps-in-SVQ-iova-tree.patch | 83 --- ...st_vdpa_net_cvq_add-from-vhost_vdpa_.patch | 153 ------ kvm-vdpa-manual-forward-CVQ-buffers.patch | 166 ------ ...ve-spurious-tpm-crb-cmd-misalignment.patch | 114 ---- ...ve-spurious-warning-on-vfio_listener.patch | 78 --- kvm-vhost-Add-SVQDescState.patch | 135 ----- ...vhost-Add-svq-avail_handler-callback.patch | 164 ------ ...re-new-kick-fd-on-vhost_svq_set_svq_.patch | 67 --- ...heck-for-queue-full-at-vhost_svq_add.patch | 134 ----- ...-vhost_svq_add-from-VirtQueueElement.patch | 138 ----- ...t-Delete-useless-read-memory-barrier.patch | 47 -- ...end-on-NULL-VirtQueueElement-on-vhos.patch | 63 --- kvm-vhost-Expose-vhost_svq_add.patch | 73 --- ...Fix-device-s-used-descriptor-dequeue.patch | 83 --- ...Fix-element-in-vhost_svq_add-failure.patch | 68 --- ...vhost-Get-vring-base-from-vq-not-svq.patch | 87 --- ...vhost_svq_kick-call-to-vhost_svq_add.patch | 61 --- kvm-vhost-Reorder-vhost_svq_kick.patch | 88 --- ...k-descriptor-chain-in-private-at-SVQ.patch | 123 ----- ...rack-number-of-descs-in-SVQDescState.patch | 81 --- kvm-vhost-add-vhost_svq_poll.patch | 92 ---- kvm-vhost-add-vhost_svq_push_elem.patch | 83 --- ...iptor-translation-to-vhost_svq_vring.patch | 120 ----- ...-improper-cleanup-in-vhost_net_start.patch | 56 -- ...dd-stubs-for-when-no-virtio-net-devi.patch | 87 --- ...fer-elem-ownership-in-vhost_handle_g.patch | 80 --- ...ement-ndescs-instead-of-opaque-data-.patch | 55 -- ...backend-feature-should-set-only-once.patch | 58 -- ...e-name-and-polarity-for-vhost_vdpa_o.patch | 123 ----- ...mproper-cleanup-in-net_init_vhost_vd.patch | 48 -- ...net-Add-NetClientInfo-start-callback.patch | 73 --- ..._net-Add-NetClientInfo-stop-callback.patch | 68 --- ...net-add-NetClientState-load-callback.patch | 73 --- ...-an-assert-check-in-translate-routin.patch | 46 -- ...-bypass-mode-support-to-assigned-dev.patch | 250 --------- ...irtio-iommu-Fix-migration-regression.patch | 54 -- ...ix-the-partial-copy-of-probe-request.patch | 67 --- ...Use-recursive-lock-to-avoid-deadlock.patch | 141 ----- kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch | 69 --- ...rtio-net-Expose-ctrl-virtqueue-logic.patch | 169 ------ ...-ctrl_vq-index-for-non-mq-guest-for-.patch | 143 ----- ...-handle-mq-request-in-userspace-hand.patch | 109 ---- ...-vhost_dev-and-notifiers-for-cvq-onl.patch | 52 -- ...i-clean-up-virtio_scsi_handle_cmd_vq.patch | 77 --- ...-clean-up-virtio_scsi_handle_ctrl_vq.patch | 65 --- ...clean-up-virtio_scsi_handle_event_vq.patch | 62 --- ...t-waste-CPU-polling-the-event-virtqu.patch | 103 ---- ...ctrl-and-event-handler-functions-in-.patch | 119 ----- ...-race-in-virtio_scsi_dataplane_start.patch | 117 ---- ...-request-related-items-from-.h-to-.c.patch | 168 ------ qemu-kvm.spec | 375 +------------ sources | 3 +- 171 files changed, 1352 insertions(+), 18546 deletions(-) create mode 100644 0005-Re-enable-capstone-internal-build.patch rename 0005-Enable-disable-devices-for-RHEL.patch => 0006-Enable-disable-devices-for-RHEL.patch (84%) rename 0006-Machine-type-related-general-changes.patch => 0007-Machine-type-related-general-changes.patch (89%) rename 0007-Add-aarch64-machine-types.patch => 0008-Add-aarch64-machine-types.patch (50%) rename 0008-Add-ppc64-machine-types.patch => 0009-Add-ppc64-machine-types.patch (90%) rename 0009-Add-s390x-machine-types.patch => 0010-Add-s390x-machine-types.patch (54%) rename 0010-Add-x86_64-machine-types.patch => 0011-Add-x86_64-machine-types.patch (76%) rename 0011-Enable-make-check.patch => 0012-Enable-make-check.patch (64%) rename 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch (87%) rename 0013-Add-support-statement-to-help-output.patch => 0014-Add-support-statement-to-help-output.patch (88%) delete mode 100644 0014-globally-limit-the-maximum-number-of-CPUs.patch rename 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch => 0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch (95%) delete mode 100644 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch rename 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch => 0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch (94%) create mode 100644 0018-Introduce-upstream-7.0-compat-changes.patch delete mode 100644 0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch delete mode 100644 0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch delete mode 100644 0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch delete mode 100644 kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch delete mode 100644 kvm-Enable-virtio-iommu-pci-on-aarch64.patch delete mode 100644 kvm-Enable-virtio-iommu-pci-on-x86_64.patch delete mode 100644 kvm-Introduce-event-loop-base-abstract-class.patch delete mode 100644 kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch delete mode 100644 kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch delete mode 100644 kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch delete mode 100644 kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch delete mode 100644 kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch delete mode 100644 kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch delete mode 100644 kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch delete mode 100644 kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch delete mode 100644 kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch delete mode 100644 kvm-Revert-migration-Simplify-unqueue_page.patch delete mode 100644 kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch delete mode 100644 kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch delete mode 100644 kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch delete mode 100644 kvm-coroutine-Revert-to-constant-batch-size.patch delete mode 100644 kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch delete mode 100644 kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch delete mode 100644 kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch delete mode 100644 kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch delete mode 100644 kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch delete mode 100644 kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch delete mode 100644 kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch delete mode 100644 kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch delete mode 100644 kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch delete mode 100644 kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch delete mode 100644 kvm-iotests-108-Fix-when-missing-user_allow_other.patch delete mode 100644 kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch delete mode 100644 kvm-kvm-don-t-use-perror-without-useful-errno.patch delete mode 100644 kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch delete mode 100644 kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch delete mode 100644 kvm-meson-create-have_vhost_-variables.patch delete mode 100644 kvm-meson-use-have_vhost_-variables-to-pick-sources.patch delete mode 100644 kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch delete mode 100644 kvm-migration-Add-migrate_use_tls-helper.patch delete mode 100644 kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch delete mode 100644 kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch delete mode 100644 kvm-migration-Avoid-false-positive-on-non-supported-scen.patch delete mode 100644 kvm-migration-Change-zero_copy_send-from-migration-param.patch delete mode 100644 kvm-migration-Fix-operator-type.patch delete mode 100644 kvm-migration-add-remaining-params-has_-true-in-migratio.patch delete mode 100644 kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch delete mode 100644 kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch delete mode 100644 kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch delete mode 100644 kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch delete mode 100644 kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch delete mode 100644 kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch delete mode 100644 kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch delete mode 100644 kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch delete mode 100644 kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch delete mode 100644 kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch delete mode 100644 kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch delete mode 100644 kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch delete mode 100644 kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch delete mode 100644 kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch delete mode 100644 kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch delete mode 100644 kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch delete mode 100644 kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch delete mode 100644 kvm-qapi-machine.json-Add-cluster-id.patch delete mode 100644 kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch delete mode 100644 kvm-qcow2-Improve-refcount-structure-rebuilding.patch delete mode 100644 kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch delete mode 100644 kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch delete mode 100644 kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch delete mode 100644 kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch delete mode 100644 kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch delete mode 100644 kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch delete mode 100644 kvm-target-arm-deprecate-named-CPU-models.patch delete mode 100644 kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch delete mode 100644 kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch delete mode 100644 kvm-target-s390x-deprecate-CPUs-older-than-z14.patch delete mode 100644 kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch delete mode 100644 kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch delete mode 100644 kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch delete mode 100644 kvm-util-Return-void-on-iova_tree_remove.patch delete mode 100644 kvm-util-accept-iova_tree_remove_parameter-by-value.patch delete mode 100644 kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch delete mode 100644 kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch delete mode 100644 kvm-vdpa-Add-device-migration-blocker.patch delete mode 100644 kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch delete mode 100644 kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch delete mode 100644 kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch delete mode 100644 kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch delete mode 100644 kvm-vdpa-Delete-CVQ-migration-blocker.patch delete mode 100644 kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch delete mode 100644 kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch delete mode 100644 kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch delete mode 100644 kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch delete mode 100644 kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch delete mode 100644 kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch delete mode 100644 kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch delete mode 100644 kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch delete mode 100644 kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch delete mode 100644 kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch delete mode 100644 kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch delete mode 100644 kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch delete mode 100644 kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch delete mode 100644 kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch delete mode 100644 kvm-vdpa-manual-forward-CVQ-buffers.patch delete mode 100644 kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch delete mode 100644 kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch delete mode 100644 kvm-vhost-Add-SVQDescState.patch delete mode 100644 kvm-vhost-Add-svq-avail_handler-callback.patch delete mode 100644 kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch delete mode 100644 kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch delete mode 100644 kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch delete mode 100644 kvm-vhost-Delete-useless-read-memory-barrier.patch delete mode 100644 kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch delete mode 100644 kvm-vhost-Expose-vhost_svq_add.patch delete mode 100644 kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch delete mode 100644 kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch delete mode 100644 kvm-vhost-Get-vring-base-from-vq-not-svq.patch delete mode 100644 kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch delete mode 100644 kvm-vhost-Reorder-vhost_svq_kick.patch delete mode 100644 kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch delete mode 100644 kvm-vhost-Track-number-of-descs-in-SVQDescState.patch delete mode 100644 kvm-vhost-add-vhost_svq_poll.patch delete mode 100644 kvm-vhost-add-vhost_svq_push_elem.patch delete mode 100644 kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch delete mode 100644 kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch delete mode 100644 kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch delete mode 100644 kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch delete mode 100644 kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch delete mode 100644 kvm-vhost-vdpa-backend-feature-should-set-only-once.patch delete mode 100644 kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch delete mode 100644 kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch delete mode 100644 kvm-vhost_net-Add-NetClientInfo-start-callback.patch delete mode 100644 kvm-vhost_net-Add-NetClientInfo-stop-callback.patch delete mode 100644 kvm-vhost_net-add-NetClientState-load-callback.patch delete mode 100644 kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch delete mode 100644 kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch delete mode 100644 kvm-virtio-iommu-Fix-migration-regression.patch delete mode 100644 kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch delete mode 100644 kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch delete mode 100644 kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch delete mode 100644 kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch delete mode 100644 kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch delete mode 100644 kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch delete mode 100644 kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch delete mode 100644 kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch delete mode 100644 kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch delete mode 100644 kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch delete mode 100644 kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch delete mode 100644 kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch delete mode 100644 kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch delete mode 100644 kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch diff --git a/.gitignore b/.gitignore index 908f9b5..cf165db 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ /qemu-6.1.0.tar.xz /qemu-6.2.0.tar.xz /qemu-7.0.0.tar.xz +/capstone.tar.gz +/qemu-7.1.0.tar.xz diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch index 94cf91c..c9e1d04 100644 --- a/0004-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From fc113ecd7c99646a7ced0b99570b5927ae6d595f Mon Sep 17 00:00:00 2001 +From 476f040f14a9287efb6f0bf5b3ca97844bf9fdc3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-6.2.0-13.el9 +This rebase is based on qemu-kvm-7.0.0-11.el9 Signed-off-by: Miroslav Rezanina -- @@ -50,6 +50,12 @@ Rebase changes (7.0.0): - Change permissions on installing tests/Makefile.include - Remove ssh block driver +Rebase changes (7.1.0 rc0): +- --disable-vnc-png renamed to --disable-png (upstream) +- removed --disable-vhost-vsock and --disable-vhost-scsi +- capstone submodule removed +- Temporary include capstone build + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -146,7 +152,7 @@ Signed-off-by: Miroslav Rezanina --- .distro/85-kvm.preset | 5 - .distro/Makefile | 100 + - .distro/Makefile.common | 40 + + .distro/Makefile.common | 41 + .distro/README.tests | 39 + .distro/ksm.service | 13 - .distro/ksm.sysconfig | 4 - @@ -158,18 +164,17 @@ Signed-off-by: Miroslav Rezanina .distro/kvm-setup.service | 14 - .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4034 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 4256 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + .gitignore | 1 + README.systemtap | 43 + - meson.build | 4 +- scripts/qemu-guest-agent/fsfreeze-hook | 2 +- scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + tests/check-block.sh | 2 + ui/vnc-auth-sasl.c | 2 +- - 25 files changed, 4290 insertions(+), 339 deletions(-) + 24 files changed, 4510 insertions(+), 338 deletions(-) delete mode 100644 .distro/85-kvm.preset create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common @@ -237,21 +242,6 @@ index 0000000000..ad913fc990 + +3. Translate the trace record to readable format. + # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log -diff --git a/meson.build b/meson.build -index 861de93c4f..6f7e430f0f 100644 ---- a/meson.build -+++ b/meson.build -@@ -2394,7 +2394,9 @@ if capstone_opt == 'internal' - # Include all configuration defines via a header file, which will wind up - # as a dependency on the object file, and thus changes here will result - # in a rebuild. -- '-include', 'capstone-defs.h' -+ '-include', 'capstone-defs.h', -+ -+ '-Wp,-D_GLIBCXX_ASSERTIONS', - ] - - libcapstone = static_library('capstone', diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook index 13aafd4845..e9b84ec028 100755 --- a/scripts/qemu-guest-agent/fsfreeze-hook @@ -283,11 +273,11 @@ index 0000000000..c04abf9449 @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} diff --git a/tests/check-block.sh b/tests/check-block.sh -index f59496396c..d900d8b35e 100755 +index 5de2c1ba0b..6af743f441 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh -@@ -48,6 +48,8 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then - skip "bash version too old ==> Not running the qemu-iotests." +@@ -22,6 +22,8 @@ if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then + skip "No qemu-system binary available ==> Not running the qemu-iotests." fi +exit 0 diff --git a/0005-Re-enable-capstone-internal-build.patch b/0005-Re-enable-capstone-internal-build.patch new file mode 100644 index 0000000..29a7649 --- /dev/null +++ b/0005-Re-enable-capstone-internal-build.patch @@ -0,0 +1,251 @@ +From 963cd2a0d78f6cec0ee5203ca2d2de77094bf047 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 1 Jun 2022 05:45:58 -0400 +Subject: Re-enable capstone internal build + +Until capstone component is added to RHEL 9 we revert changes removing +internal capstone usage. + +Signed-off-by: Miroslav Rezanina +--- + .distro/Makefile.common | 3 +- + .distro/capstone.tar.gz | Bin 0 -> 5765837 bytes + .distro/qemu-kvm.spec.template | 5 +- + configure | 12 ++++ + meson.build | 116 +++++++++++++++++++++++++++++++-- + meson_options.txt | 3 +- + scripts/meson-buildoptions.sh | 5 +- + 7 files changed, 135 insertions(+), 9 deletions(-) + create mode 100644 .distro/capstone.tar.gz + +diff --git a/configure b/configure +index 72ab03f11a..448b0c82cb 100755 +--- a/configure ++++ b/configure +@@ -322,8 +322,10 @@ vfio_user_server="disabled" + + # 1. Track which submodules are needed + if test "$default_feature" = no ; then ++ capstone="disabled" + slirp="disabled" + else ++ capstone="auto" + slirp="auto" + fi + fdt="auto" +@@ -902,6 +904,15 @@ for opt do + --enable-uuid|--disable-uuid) + echo "$0: $opt is obsolete, UUID support is always built" >&2 + ;; ++ --disable-capstone) capstone="disabled" ++ ;; ++ --enable-capstone) capstone="enabled" ++ ;; ++ --enable-capstone=git) capstone="internal" ++ ;; ++ --enable-capstone=*) capstone="$optarg" ++ ;; ++ + --with-git=*) git="$optarg" + ;; + --with-git-submodules=*) +@@ -2742,6 +2753,7 @@ if test "$skip_meson" = no; then + test "$werror" = yes && meson_option_add -Dwerror=true + + # QEMU options ++ test "$capstone" != auto && meson_option_add "-Dcapstone=$capstone" + test "$cfi" != false && meson_option_add "-Dcfi=$cfi" + test "$fdt" != auto && meson_option_add "-Dfdt=$fdt" + test -n "${LIB_FUZZING_ENGINE+xxx}" && meson_option_add "-Dfuzzing_engine=$LIB_FUZZING_ENGINE" +diff --git a/meson.build b/meson.build +index 20fddbd707..9e6a979c13 100644 +--- a/meson.build ++++ b/meson.build +@@ -2596,10 +2596,13 @@ genh += custom_target('config-poison.h', + ############## + + capstone = not_found +-if not get_option('capstone').auto() or have_system or have_user ++capstone_opt = get_option('capstone') ++if capstone_opt in ['enabled', 'auto', 'system'] ++ have_internal = fs.exists(meson.current_source_dir() / 'capstone/Makefile') + capstone = dependency('capstone', version: '>=3.0.5', + kwargs: static_kwargs, method: 'pkg-config', +- required: get_option('capstone')) ++ required: capstone_opt == 'system' or ++ capstone_opt == 'enabled' and not have_internal) + + # Some versions of capstone have broken pkg-config file + # that reports a wrong -I path, causing the #include to +@@ -2608,10 +2611,113 @@ if not get_option('capstone').auto() or have_system or have_user + if capstone.found() and not cc.compiles('#include ', + dependencies: [capstone]) + capstone = not_found +- if get_option('capstone').enabled() +- error('capstone requested, but it does not appear to work') ++ if capstone_opt == 'system' ++ error('system capstone requested, it does not appear to work') + endif + endif ++ ++ if capstone.found() ++ capstone_opt = 'system' ++ elif have_internal ++ capstone_opt = 'internal' ++ else ++ capstone_opt = 'disabled' ++ endif ++endif ++if capstone_opt == 'internal' ++ capstone_data = configuration_data() ++ capstone_data.set('CAPSTONE_USE_SYS_DYN_MEM', '1') ++ ++ capstone_files = files( ++ 'capstone/cs.c', ++ 'capstone/MCInst.c', ++ 'capstone/MCInstrDesc.c', ++ 'capstone/MCRegisterInfo.c', ++ 'capstone/SStream.c', ++ 'capstone/utils.c' ++ ) ++ ++ if 'CONFIG_ARM_DIS' in config_all_disas ++ capstone_data.set('CAPSTONE_HAS_ARM', '1') ++ capstone_files += files( ++ 'capstone/arch/ARM/ARMDisassembler.c', ++ 'capstone/arch/ARM/ARMInstPrinter.c', ++ 'capstone/arch/ARM/ARMMapping.c', ++ 'capstone/arch/ARM/ARMModule.c' ++ ) ++ endif ++ ++ # FIXME: This config entry currently depends on a c++ compiler. ++ # Which is needed for building libvixl, but not for capstone. ++ if 'CONFIG_ARM_A64_DIS' in config_all_disas ++ capstone_data.set('CAPSTONE_HAS_ARM64', '1') ++ capstone_files += files( ++ 'capstone/arch/AArch64/AArch64BaseInfo.c', ++ 'capstone/arch/AArch64/AArch64Disassembler.c', ++ 'capstone/arch/AArch64/AArch64InstPrinter.c', ++ 'capstone/arch/AArch64/AArch64Mapping.c', ++ 'capstone/arch/AArch64/AArch64Module.c' ++ ) ++ endif ++ ++ if 'CONFIG_PPC_DIS' in config_all_disas ++ capstone_data.set('CAPSTONE_HAS_POWERPC', '1') ++ capstone_files += files( ++ 'capstone/arch/PowerPC/PPCDisassembler.c', ++ 'capstone/arch/PowerPC/PPCInstPrinter.c', ++ 'capstone/arch/PowerPC/PPCMapping.c', ++ 'capstone/arch/PowerPC/PPCModule.c' ++ ) ++ endif ++ ++ if 'CONFIG_S390_DIS' in config_all_disas ++ capstone_data.set('CAPSTONE_HAS_SYSZ', '1') ++ capstone_files += files( ++ 'capstone/arch/SystemZ/SystemZDisassembler.c', ++ 'capstone/arch/SystemZ/SystemZInstPrinter.c', ++ 'capstone/arch/SystemZ/SystemZMapping.c', ++ 'capstone/arch/SystemZ/SystemZModule.c', ++ 'capstone/arch/SystemZ/SystemZMCTargetDesc.c' ++ ) ++ endif ++ ++ if 'CONFIG_I386_DIS' in config_all_disas ++ capstone_data.set('CAPSTONE_HAS_X86', 1) ++ capstone_files += files( ++ 'capstone/arch/X86/X86Disassembler.c', ++ 'capstone/arch/X86/X86DisassemblerDecoder.c', ++ 'capstone/arch/X86/X86ATTInstPrinter.c', ++ 'capstone/arch/X86/X86IntelInstPrinter.c', ++ 'capstone/arch/X86/X86InstPrinterCommon.c', ++ 'capstone/arch/X86/X86Mapping.c', ++ 'capstone/arch/X86/X86Module.c' ++ ) ++ endif ++ ++ configure_file(output: 'capstone-defs.h', configuration: capstone_data) ++ ++ capstone_cargs = [ ++ # FIXME: There does not seem to be a way to completely replace the c_args ++ # that come from add_project_arguments() -- we can only add to them. ++ # So: disable all warnings with a big hammer. ++ '-Wno-error', '-w', ++ ++ # Include all configuration defines via a header file, which will wind up ++ # as a dependency on the object file, and thus changes here will result ++ # in a rebuild. ++ '-include', 'capstone-defs.h', ++ ++ '-Wp,-D_GLIBCXX_ASSERTIONS', ++ ++ ] ++ ++ libcapstone = static_library('capstone', ++ build_by_default: false, ++ sources: capstone_files, ++ c_args: capstone_cargs, ++ include_directories: 'capstone/include') ++ capstone = declare_dependency(link_with: libcapstone, ++ include_directories: 'capstone/include/capstone') + endif + + slirp = not_found +@@ -3977,7 +4083,7 @@ summary_info += {'bzip2 support': libbzip2} + summary_info += {'lzfse support': liblzfse} + summary_info += {'zstd support': zstd} + summary_info += {'NUMA host support': numa} +-summary_info += {'capstone': capstone} ++summary_info += {'capstone': capstone_opt == 'internal' ? capstone_opt : capstone} + summary_info += {'libpmem support': libpmem} + summary_info += {'libdaxctl support': libdaxctl} + summary_info += {'libudev': libudev} +diff --git a/meson_options.txt b/meson_options.txt +index e58e158396..7cd920fcd6 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -262,7 +262,8 @@ option('libvduse', type: 'feature', value: 'auto', + option('vduse_blk_export', type: 'feature', value: 'auto', + description: 'VDUSE block export support') + +-option('capstone', type: 'feature', value: 'auto', ++option('capstone', type: 'combo', value: 'auto', ++ choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], + description: 'Whether and how to find the capstone library') + option('slirp', type: 'combo', value: 'auto', + choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 359b04e0e6..b1001aa1db 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -16,6 +16,9 @@ meson_options_help() { + printf "%s\n" ' --enable-block-drv-whitelist-in-tools' + printf "%s\n" ' use block whitelist also in tools instead of only' + printf "%s\n" ' QEMU' ++ printf "%s\n" ' --enable-capstone[=CHOICE]' ++ printf "%s\n" ' Whether and how to find the capstone library' ++ printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' + printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' + printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' + printf "%s\n" ' --enable-debug-mutex mutex debugging support' +@@ -75,7 +78,6 @@ meson_options_help() { + printf "%s\n" ' bzip2 bzip2 support for DMG images' + printf "%s\n" ' canokey CanoKey support' + printf "%s\n" ' cap-ng cap_ng support' +- printf "%s\n" ' capstone Whether and how to find the capstone library' + printf "%s\n" ' cloop cloop image format support' + printf "%s\n" ' cocoa Cocoa user interface (macOS only)' + printf "%s\n" ' coreaudio CoreAudio sound support' +@@ -216,6 +218,7 @@ _meson_option_parse() { + --disable-cap-ng) printf "%s" -Dcap_ng=disabled ;; + --enable-capstone) printf "%s" -Dcapstone=enabled ;; + --disable-capstone) printf "%s" -Dcapstone=disabled ;; ++ --enable-capstone=*) quote_sh "-Dcapstone=$2" ;; + --enable-cfi) printf "%s" -Dcfi=true ;; + --disable-cfi) printf "%s" -Dcfi=false ;; + --enable-cfi-debug) printf "%s" -Dcfi_debug=true ;; +-- +2.31.1 + diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch similarity index 84% rename from 0005-Enable-disable-devices-for-RHEL.patch rename to 0006-Enable-disable-devices-for-RHEL.patch index 1ffbe97..a53abec 100644 --- a/0005-Enable-disable-devices-for-RHEL.patch +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 51ec7495d69fe4b4d0b61642ca6c0e7fd7a1032d Mon Sep 17 00:00:00 2001 +From ae20ca5826cd237e727cff1663177f7f863fab21 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 15 Jul 2021 03:22:36 -0400 Subject: Enable/disable devices for RHEL @@ -22,6 +22,13 @@ Rebase notes (7.0.0): - Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG - Removed upstream devices +Rebase notes (7.1.0 rc0): +- Added CONFIG_VHOST_VSOCK and CONFIG_VHOST_USER_VSOCK configs +- Added CONFIG_CXL and CONFIG_CXL_MEM_DEVICE for aarch64 and x86_64 + +Rebase notes (7.1.0 rc3): +- Added CONFIG_VHOST_USER_FS option (all archs) + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -37,14 +44,19 @@ Merged patches (6.2.0): Merged patches (7.0.0): - fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64 - c9e68ea451 Enable SGX -- RH Only + +Merged patches (7.1.0 rc0): +- 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/ich9.c chunk) +- 8f663466c6 configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM +- 1bf372717a Enable virtio-iommu-pci on aarch64 +- ae3f269458 Enable virtio-iommu-pci on x86_64 --- .distro/qemu-kvm.spec.template | 18 +-- - .../aarch64-softmmu/aarch64-rh-devices.mak | 34 ++++++ - .../ppc64-softmmu/ppc64-rh-devices.mak | 35 ++++++ + .../aarch64-softmmu/aarch64-rh-devices.mak | 41 +++++++ + .../ppc64-softmmu/ppc64-rh-devices.mak | 37 ++++++ configs/devices/rh-virtio.mak | 10 ++ - .../s390x-softmmu/s390x-rh-devices.mak | 15 +++ - .../x86_64-softmmu/x86_64-rh-devices.mak | 103 ++++++++++++++++++ - hw/acpi/ich9.c | 4 +- + .../s390x-softmmu/s390x-rh-devices.mak | 18 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 109 ++++++++++++++++++ hw/arm/meson.build | 2 +- hw/block/fdc.c | 10 ++ hw/cpu/meson.build | 5 +- @@ -58,7 +70,7 @@ Merged patches (7.0.0): target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 20 files changed, 269 insertions(+), 15 deletions(-) + 19 files changed, 285 insertions(+), 13 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -67,10 +79,10 @@ Merged patches (7.0.0): diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..5f6ee1de5b +index 0000000000..720ec0cb57 --- /dev/null +++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -0,0 +1,34 @@ +@@ -0,0 +1,41 @@ +include ../rh-virtio.mak + +CONFIG_ARM_GIC_KVM=y @@ -79,6 +91,8 @@ index 0000000000..5f6ee1de5b +CONFIG_ARM_SMMUV3=y +CONFIG_ARM_V7M=y +CONFIG_ARM_VIRT=y ++CONFIG_CXL=y ++CONFIG_CXL_MEM_DEVICE=y +CONFIG_EDID=y +CONFIG_PCIE_PORT=y +CONFIG_PCI_DEVICES=y @@ -95,6 +109,8 @@ index 0000000000..5f6ee1de5b +CONFIG_VFIO_PCI=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_MEM=y ++CONFIG_VIRTIO_IOMMU=y +CONFIG_XIO3130=y +CONFIG_NVDIMM=y +CONFIG_ACPI_APEI=y @@ -105,12 +121,15 @@ index 0000000000..5f6ee1de5b +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y +CONFIG_PVPANIC_PCI=y +CONFIG_PXB=y ++CONFIG_VHOST_VSOCK=y ++CONFIG_VHOST_USER_VSOCK=y ++CONFIG_VHOST_USER_FS=y diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak new file mode 100644 -index 0000000000..6a3e3f0227 +index 0000000000..dbb7d30829 --- /dev/null +++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak -@@ -0,0 +1,35 @@ +@@ -0,0 +1,37 @@ +include ../rh-virtio.mak + +CONFIG_DIMM=y @@ -146,6 +165,8 @@ index 0000000000..6a3e3f0227 +CONFIG_TPM=y +CONFIG_TPM_SPAPR=y +CONFIG_TPM_EMULATOR=y ++CONFIG_VHOST_VSOCK=y ++CONFIG_VHOST_USER_VSOCK=y diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak new file mode 100644 index 0000000000..94ede1b5f6 @@ -164,10 +185,10 @@ index 0000000000..94ede1b5f6 +CONFIG_VIRTIO_SERIAL=y diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak new file mode 100644 -index 0000000000..d3b38312e1 +index 0000000000..69a799adbd --- /dev/null +++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak -@@ -0,0 +1,15 @@ +@@ -0,0 +1,18 @@ +include ../rh-virtio.mak + +CONFIG_PCI=y @@ -183,12 +204,15 @@ index 0000000000..d3b38312e1 +CONFIG_VHOST_USER=y +CONFIG_VIRTIO_CCW=y +CONFIG_WDT_DIAG288=y ++CONFIG_VHOST_VSOCK=y ++CONFIG_VHOST_USER_VSOCK=y ++CONFIG_VHOST_USER_FS=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..d0c9e66641 +index 0000000000..10cb0a14e0 --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -0,0 +1,103 @@ +@@ -0,0 +1,109 @@ +include ../rh-virtio.mak + +CONFIG_ACPI=y @@ -204,6 +228,8 @@ index 0000000000..d0c9e66641 +CONFIG_APIC=y +CONFIG_APM=y +CONFIG_BOCHS_DISPLAY=y ++CONFIG_CXL=y ++CONFIG_CXL_MEM_DEVICE=y +CONFIG_DIMM=y +CONFIG_E1000E_PCI_EXPRESS=y +CONFIG_E1000_PCI=y @@ -281,6 +307,7 @@ index 0000000000..d0c9e66641 +CONFIG_VIRTIO_MEM=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VGA=y ++CONFIG_VIRTIO_IOMMU=y +CONFIG_VMMOUSE=y +CONFIG_VMPORT=y +CONFIG_VTD=y @@ -292,26 +319,14 @@ index 0000000000..d0c9e66641 +CONFIG_TPM_TIS_ISA=y +CONFIG_TPM_EMULATOR=y +CONFIG_SGX=y -diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index bd9bbade70..de1e401cdf 100644 ---- a/hw/acpi/ich9.c -+++ b/hw/acpi/ich9.c -@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) - static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; - pm->acpi_memory_hotplug.is_enabled = true; - pm->cpu_hotplug_legacy = true; -- pm->disable_s3 = 0; -- pm->disable_s4 = 0; -+ pm->disable_s3 = 1; -+ pm->disable_s4 = 1; - pm->s4_val = 2; - pm->use_acpi_hotplug_bridge = true; - pm->keep_pci_slot_hpc = true; ++CONFIG_VHOST_VSOCK=y ++CONFIG_VHOST_USER_VSOCK=y ++CONFIG_VHOST_USER_FS=y diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index 721a8eb8be..87ed4dd914 100644 +index 92f9f6e000..c5e94c997c 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build -@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) +@@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) @@ -321,7 +336,7 @@ index 721a8eb8be..87ed4dd914 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 347875a0cd..ca1776121f 100644 +index 64ae4a6899..9b8e782c19 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -49,6 +49,8 @@ @@ -333,7 +348,7 @@ index 347875a0cd..ca1776121f 100644 /********************************************************/ /* debug Floppy devices */ -@@ -2338,6 +2340,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) +@@ -2346,6 +2348,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) FDrive *drive; static int command_tables_inited = 0; @@ -379,10 +394,10 @@ index 3bb6a58698..6447fdb02e 100644 * Also accept 8 MB/16 MB for backward compatibility. */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index ce89fd0aa3..fbcf802b13 100644 +index 9a9b28078e..f3ce3fbcee 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -197,7 +197,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -392,7 +407,7 @@ index ce89fd0aa3..fbcf802b13 100644 } static const TypeInfo piix3_ide_info = { -@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -220,6 +221,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -402,12 +417,12 @@ index ce89fd0aa3..fbcf802b13 100644 static const TypeInfo piix4_ide_info = { diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c -index 4efdf75620..5143ebaa27 100644 +index b92b63bedc..3b6235dde6 100644 --- a/hw/input/pckbd.c +++ b/hw/input/pckbd.c -@@ -814,6 +814,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) +@@ -957,6 +957,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) dc->vmsd = &vmstate_kbd_isa; - isa->build_aml = i8042_build_aml; + adevc->build_dev_aml = i8042_build_aml; set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + /* Disabled for Red Hat Enterprise Linux: */ + dc->user_creatable = false; @@ -415,10 +430,10 @@ index 4efdf75620..5143ebaa27 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index f5bc81296d..282d01e374 100644 +index e26e0a64c1..41492fae79 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = { +@@ -1824,6 +1824,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -426,7 +441,7 @@ index f5bc81296d..282d01e374 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = { +@@ -1836,6 +1837,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -452,7 +467,7 @@ index 8a4861f45a..fcb5dfe792 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index de853d780d..0776ae6a20 100644 +index 793df42e21..cd3c305471 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade @@ -465,10 +480,10 @@ index de853d780d..0776ae6a20 100644 endif diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 13d0e9b195..3826fa5122 100644 +index 3099b38e32..10d91c4ef0 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -22,6 +22,7 @@ +@@ -147,6 +147,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) @@ -476,31 +491,31 @@ index 13d0e9b195..3826fa5122 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) - cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ +@@ -500,6 +501,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } +#endif /* disabled for RHEL */ #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - REGINFO_SENTINEL +@@ -524,6 +526,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; +#if 0 /* Disabled for Red Hat Enterprise Linux */ static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj) - cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ +@@ -572,6 +575,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } +#endif /* disabled for RHEL */ static void cortex_a15_initfn(Object *obj) { -@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj) +@@ -618,6 +622,7 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -508,7 +523,7 @@ index 13d0e9b195..3826fa5122 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1058,6 +1063,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -516,7 +531,7 @@ index 13d0e9b195..3826fa5122 100644 #ifndef TARGET_AARCH64 /* -@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj) +@@ -1125,6 +1131,7 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -524,7 +539,7 @@ index 13d0e9b195..3826fa5122 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1140,7 +1147,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -534,7 +549,7 @@ index 13d0e9b195..3826fa5122 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1171,6 +1180,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -543,7 +558,7 @@ index 13d0e9b195..3826fa5122 100644 { .name = "max", .initfn = arm_max_initfn }, #endif diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 976be5e0d1..dd78883410 100644 +index 912b037c63..cd3ff700ac 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -573,7 +588,7 @@ index 976be5e0d1..dd78883410 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -897,12 +901,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -896,12 +900,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -589,7 +604,7 @@ index 976be5e0d1..dd78883410 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -912,6 +919,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -911,12 +918,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power10", "power10_v2.0" }, #endif @@ -597,18 +612,18 @@ index 976be5e0d1..dd78883410 100644 /* Generic PowerPCs */ #if defined(TARGET_PPC64) { "ppc64", "970fx_v3.1" }, -@@ -919,5 +927,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + #endif { "ppc32", "604" }, { "ppc", "604" }, - { "default", "604" }, +#endif + { NULL, NULL } }; diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 05c3ccaaff..6a04ccab1b 100644 +index d8a141a023..d086b1c39c 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c -@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, +@@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, (max_model->def->gen == model->def->gen && max_model->def->ec_ga < model->def->ec_ga)) { list_add_feat("type", unavailable); @@ -619,10 +634,10 @@ index 05c3ccaaff..6a04ccab1b 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 6acf14d5ec..74f089d87f 100644 +index 7bd8db0e7b..81cb489694 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c -@@ -2512,6 +2512,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2520,6 +2520,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } diff --git a/0006-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch similarity index 89% rename from 0006-Machine-type-related-general-changes.patch rename to 0007-Machine-type-related-general-changes.patch index c3b08a4..6d1d2b0 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0007-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From a525db3951dc68c469d1f51bdc69ab6e75e72c37 Mon Sep 17 00:00:00 2001 +From 8ee73de7f30f39293388932bbb0d69b6c9435ab1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -19,6 +19,10 @@ Rebase notes (7.0.0): - Remove downstream changes leftovers in hw/rtc/mc146818rtc.c - Remove unnecessary change in hw/usb/hcd-uhci.c +Rebase notes (7.1.0 rc0): +- Moved adding rhel_old_machine_deprecation variable from s390x to general machine types commit +- Moved adding hw_compat_rhel_8_6 struct from x86_64 to general machine types commit + Merged patches (6.1.0): - f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 - 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 @@ -35,28 +39,32 @@ Merged patches (6.2.0): Merged patches (7.0.0): - ef5afcc86d Fix virtio-net-pci* "vectors" compat - 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes + +Merged patches (7.1.0 rc0): +- 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/piix4.c chunk) +- 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/core/machine.c and include/hw/boards.h chunk) --- - hw/acpi/piix4.c | 6 +- + hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 186 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 211 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + hw/net/rtl8139.c | 4 +- - hw/smbios/smbios.c | 46 ++++++++- + hw/smbios/smbios.c | 46 +++++++- hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-xhci-pci.c | 59 ++++++++--- + hw/usb/hcd-xhci-pci.c | 59 +++++++--- hw/usb/hcd-xhci-pci.h | 1 + - include/hw/boards.h | 21 ++++ + include/hw/boards.h | 28 +++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 14 files changed, 316 insertions(+), 25 deletions(-) + 14 files changed, 346 insertions(+), 23 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index fe5625d07a..28544e78c3 100644 +index 0a81f1ad93..dbfb362a8f 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -287,7 +287,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) +@@ -248,7 +248,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -65,22 +73,11 @@ index fe5625d07a..28544e78c3 100644 .post_load = vmstate_acpi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), -@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) - - static Property piix4_pm_properties[] = { - DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), - DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), - DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, - use_acpi_hotplug_bridge, true), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d2e5ecd234..6a84031fd7 100644 +index 9633f822f3..389d6882dd 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1596,7 +1596,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1617,7 +1617,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -90,13 +87,38 @@ index d2e5ecd234..6a84031fd7 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, diff --git a/hw/core/machine.c b/hw/core/machine.c -index 1e23fdc14b..ea430d844e 100644 +index a673302cce..909f75770b 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -37,6 +37,192 @@ - #include "hw/virtio/virtio.h" +@@ -40,6 +40,217 @@ #include "hw/virtio/virtio-pci.h" + #include "qom/object_interfaces.h" ++/* ++ * RHEL only: machine types for previous major releases are deprecated ++ */ ++const char *rhel_old_machine_deprecation = ++ "machine types for previous major releases are deprecated"; ++ ++/* ++ * Mostly the same as hw_compat_6_2 ++ */ ++GlobalProperty hw_compat_rhel_9_0[] = { ++ /* hw_compat_rhel_9_0 from hw_compat_6_2 */ ++ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, ++}; ++const size_t hw_compat_rhel_9_0_len = G_N_ELEMENTS(hw_compat_rhel_9_0); ++ ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2065589 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ +/* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ @@ -283,9 +305,9 @@ index 1e23fdc14b..ea430d844e 100644 +}; +const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + - GlobalProperty hw_compat_6_2[] = { - { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, - }; + GlobalProperty hw_compat_7_0[] = { + { "arm-gicv3-common", "force-8-bit-prio", "on" }, + { "nvme-ns", "eui64-default", "on"}, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 46abbc5653..505467059b 100644 --- a/hw/display/vga-isa.c @@ -300,10 +322,10 @@ index 46abbc5653..505467059b 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index b72c03d0a6..c797e98312 100644 +index 20962c34e7..1ec5d6a4f8 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, +@@ -185,6 +185,8 @@ static void pc_init1(MachineState *machine, smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -313,10 +335,10 @@ index b72c03d0a6..c797e98312 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 1780f79bc1..b695f88c45 100644 +index 2e5dae9a89..ef471f6664 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) +@@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -555,13 +577,19 @@ index c193f79443..086a1feb1e 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index c92ac8815c..c90a19b4d1 100644 +index 7b416c9787..bfd757c561 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -449,4 +449,25 @@ extern const size_t hw_compat_2_2_len; +@@ -451,4 +451,32 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_9_0[]; ++extern const size_t hw_compat_rhel_9_0_len; ++ ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ +extern GlobalProperty hw_compat_rhel_8_5[]; +extern const size_t hw_compat_rhel_8_5_len; + @@ -583,6 +611,7 @@ index c92ac8815c..c90a19b4d1 100644 +extern GlobalProperty hw_compat_rhel_7_6[]; +extern const size_t hw_compat_rhel_7_6_len; + ++extern const char *rhel_old_machine_deprecation; #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h index 4b7ad77a44..9acff96a86 100644 @@ -601,10 +630,10 @@ index 4b7ad77a44..9acff96a86 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 1a27de9c8b..91331059d9 100644 +index 8435733bd6..4d9e95a091 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -113,6 +113,9 @@ struct PCMachineClass { +@@ -112,6 +112,9 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; diff --git a/0007-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch similarity index 50% rename from 0007-Add-aarch64-machine-types.patch rename to 0008-Add-aarch64-machine-types.patch index 3c44b11..91d4194 100644 --- a/0007-Add-aarch64-machine-types.patch +++ b/0008-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 697aaa43e3c0f20fc312f06be6c1093f1ba907e1 Mon Sep 17 00:00:00 2001 +From dfe5c09a8cca1dcbff5798951fa88b7f540ea4ed Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -17,6 +17,12 @@ Rebase notes (7.0.0): - Added dtb-kaslr-seed option - Set no_tcg_lpa2 to true +Rebase notes (7.1.0 rc0): +- replace dtb_kaslr_seed by dtb_randomness + +Rebase notes (7.1.0 rc3): +- Updated dtb_randomness comment + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -29,13 +35,28 @@ Merged patches (7.0.0): - a1d1b6eeb6 hw/arm/virt: Expose the 'RAS' option - 47f8fe1b82 hw/arm/virt: Add 9.0 machine type and remove 8.5 one - ed2346788f hw/arm/virt: Check no_tcg_its and minor style changes + +Merged patches (7.0.0): +- f79b31bdef hw/arm/virt: Remove the dtb-kaslr-seed machine option +- b6fca85f4a hw/arm/virt: Fix missing initialization in instance/class_init() + +Merged patches (7.1.0 rc0): +- ac97dd4f9f RHEL-only: AArch64: Drop unsupported CPU types +- e9c0a70664 target/arm: deprecate named CPU models --- - hw/arm/virt.c | 234 +++++++++++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 8 ++ - 2 files changed, 241 insertions(+), 1 deletion(-) + hw/arm/virt.c | 236 ++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 8 ++ + target/arm/cpu-qom.h | 1 + + target/arm/cpu.c | 5 + + target/arm/cpu.h | 2 + + target/arm/cpu64.c | 14 +- + target/arm/cpu_tcg.c | 12 +- + target/arm/helper.c | 2 + + tests/qtest/arm-cpu-features.c | 6 + + 9 files changed, 274 insertions(+), 12 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6a84031fd7..e06862d22a 100644 +index 389d6882dd..9737b77eca 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -80,6 +80,7 @@ @@ -96,7 +117,26 @@ index 6a84031fd7..e06862d22a 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -2250,6 +2292,7 @@ static void machvirt_init(MachineState *machine) +@@ -197,14 +239,18 @@ static const int a15irqmap[] = { + }; + + static const char *valid_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), + ARM_CPU_TYPE_NAME("cortex-a53"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("cortex-a57"), ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a72"), + ARM_CPU_TYPE_NAME("cortex-a76"), + ARM_CPU_TYPE_NAME("a64fx"), + ARM_CPU_TYPE_NAME("neoverse-n1"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), + }; +@@ -2288,6 +2334,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -104,7 +144,7 @@ index 6a84031fd7..e06862d22a 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2277,6 +2320,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2315,6 +2362,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -112,7 +152,23 @@ index 6a84031fd7..e06862d22a 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2402,6 +2446,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2344,6 +2392,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) + vms->its = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_dtb_randomness(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2357,6 +2406,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) + + vms->dtb_randomness = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_oem_id(Object *obj, Error **errp) + { +@@ -2440,6 +2490,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -120,7 +176,7 @@ index 6a84031fd7..e06862d22a 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2415,6 +2460,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2453,6 +2504,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -128,7 +184,7 @@ index 6a84031fd7..e06862d22a 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2818,6 +2864,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2884,6 +2936,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -136,7 +192,7 @@ index 6a84031fd7..e06862d22a 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3206,3 +3253,188 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3285,3 +3338,184 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -171,6 +227,7 @@ index 6a84031fd7..e06862d22a 100644 + hc->unplug_request = virt_machine_device_unplug_request_cb; + hc->unplug = virt_machine_device_unplug_cb; + mc->nvdimm_supported = true; ++ mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; + mc->default_ram_id = "mach-virt.ram"; @@ -233,13 +290,6 @@ index 6a84031fd7..e06862d22a 100644 + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); -+ -+ object_class_property_add_bool(oc, "dtb-kaslr-seed", -+ virt_get_dtb_kaslr_seed, -+ virt_set_dtb_kaslr_seed); -+ object_class_property_set_description(oc, "dtb-kaslr-seed", -+ "Set off to disable passing of kaslr-seed " -+ "dtb node to guest"); +} + +static void rhel_virt_instance_init(Object *obj) @@ -258,6 +308,8 @@ index 6a84031fd7..e06862d22a 100644 + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; ++ vms->highmem_mmio = true; ++ vms->highmem_redists = true; + + if (vmc->no_its) { + vms->its = false; @@ -284,8 +336,8 @@ index 6a84031fd7..e06862d22a 100644 + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + -+ /* Supply a kaslr-seed by default */ -+ vms->dtb_kaslr_seed = true; ++ /* Supply kaslr-seed and rng-seed by default, non-configurable for RHEL */ ++ vms->dtb_randomness = true; + + vms->irqmap = a15irqmap; + @@ -326,10 +378,10 @@ index 6a84031fd7..e06862d22a 100644 +} +DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 7e76ee2619..9b1efe8f0e 100644 +index 6ec479ca2b..22b54ec510 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -179,9 +179,17 @@ struct VirtMachineState { +@@ -180,9 +180,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -347,6 +399,261 @@ index 7e76ee2619..9b1efe8f0e 100644 void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); +diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h +index 64c44cef2d..82e97249bc 100644 +--- a/target/arm/cpu-qom.h ++++ b/target/arm/cpu-qom.h +@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); ++ const char *deprecation_note; + } ARMCPUInfo; + + void arm_cpu_register(const ARMCPUInfo *info); +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 7ec3281da9..86174077f1 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2200,8 +2200,13 @@ static void arm_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void arm_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 5168e3d837..876ca7cebb 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -34,6 +34,8 @@ + #define KVM_HAVE_MCE_INJECTION 1 + #endif + ++#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" ++ + #define EXCP_UDEF 1 /* undefined instruction */ + #define EXCP_SWI 2 /* software interrupt */ + #define EXCP_PREFETCH_ABORT 3 +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 78e27f778a..1a16c9dccc 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -94,6 +94,7 @@ static void aarch64_a57_initfn(Object *obj) + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a53_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -343,6 +344,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) + /* From D5.1 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x410c3000; + } ++#endif /* disabled for RHEL */ + + void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) + { +@@ -1108,6 +1110,7 @@ static void aarch64_max_initfn(Object *obj) + qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a64fx_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -1156,14 +1159,18 @@ static void aarch64_a64fx_initfn(Object *obj) + + /* TODO: Add A64FX specific HPC extension registers */ + } ++#endif /* disabled for RHEL */ + + static const ARMCPUInfo aarch64_cpus[] = { +- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, ++ .deprecation_note = RHEL_CPU_DEPRECATION }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, + { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, + { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, + { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, ++#endif /* disabled for RHEL */ + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +@@ -1235,8 +1242,13 @@ static void aarch64_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void aarch64_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 10d91c4ef0..33cbc2cfe8 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -144,10 +144,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) + } + #endif /* !CONFIG_USER_ONLY */ + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -501,7 +501,6 @@ static void cortex_a9_initfn(Object *obj) + cpu->isar.reset_pmcr_el0 = 0x41093000; + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } +-#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -526,7 +525,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + }; + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -575,7 +573,6 @@ static void cortex_a7_initfn(Object *obj) + cpu->isar.reset_pmcr_el0 = 0x41072000; + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } +-#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -622,7 +619,6 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -1063,7 +1059,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } +-#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1131,7 +1126,6 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1147,9 +1141,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, +-#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1180,7 +1172,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, +-#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +@@ -1208,3 +1199,4 @@ static void arm_tcg_cpu_register_types(void) + type_init(arm_tcg_cpu_register_types) + + #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ ++#endif /* disabled for RHEL */ +diff --git a/target/arm/helper.c b/target/arm/helper.c +index d7bc467a2a..a91494b7d3 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -8213,6 +8213,7 @@ void arm_cpu_list(void) + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -8222,6 +8223,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } +diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c +index 5a14527386..a3579fc303 100644 +--- a/tests/qtest/arm-cpu-features.c ++++ b/tests/qtest/arm-cpu-features.c +@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); ++#endif /* disabled for RHEL */ + + /* Enabling and disabling pmu should always work. */ + assert_has_feature_enabled(qts, "max", "pmu"); +@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a57", "pmu"); + assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "a64fx", "pmu"); + assert_has_feature_enabled(qts, "a64fx", "aarch64"); + /* +@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) + "{ 'sve384': true }"); + assert_error(qts, "a64fx", "cannot enable sve640", + "{ 'sve640': true }"); ++#endif /* disabled for RHEL */ + + sve_tests_default(qts, "max"); + pauth_tests_default(qts, "max"); +@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + QDict *resp; + char *error; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_error(qts, "cortex-a15", + "We cannot guarantee the CPU type 'cortex-a15' works " + "with KVM on this host", NULL); ++#endif /* disabled for RHEL */ + + assert_has_feature_enabled(qts, "host", "aarch64"); + -- 2.31.1 diff --git a/0008-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch similarity index 90% rename from 0008-Add-ppc64-machine-types.patch rename to 0009-Add-ppc64-machine-types.patch index 860e803..28548b3 100644 --- a/0008-Add-ppc64-machine-types.patch +++ b/0009-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From f61b3d7dc000886e23943457ee9baf1d4cae43b4 Mon Sep 17 00:00:00 2001 +From 77a23381d2a445ee499c4335816f3df08d545aed Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -19,21 +19,25 @@ Merged patches (6.1.0): - 0215eb3356 Remove RHEL 7.3.0 machine types (only ppc64 changes) - af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes) - 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes) + +Merged patches (7.1.0 rc0): +- baa6790171 target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL --- hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_cpu_core.c | 13 +++ include/hw/ppc/spapr.h | 4 + target/ppc/compat.c | 13 ++- + target/ppc/cpu-models.c | 1 + target/ppc/cpu.h | 1 + target/ppc/kvm.c | 27 +++++ target/ppc/kvm_ppc.h | 13 +++ - 7 files changed, 313 insertions(+), 1 deletion(-) + 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index a4372ba189..5fdf8b506d 100644 +index bc9ba6e6dc..5d0989f87a 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1622,6 +1622,9 @@ static void spapr_machine_reset(MachineState *machine) +@@ -1633,6 +1633,9 @@ static void spapr_machine_reset(MachineState *machine) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -43,7 +47,7 @@ index a4372ba189..5fdf8b506d 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3317,6 +3320,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3344,6 +3347,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -64,7 +68,7 @@ index a4372ba189..5fdf8b506d 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3395,6 +3412,12 @@ static void spapr_instance_init(Object *obj) +@@ -3422,6 +3439,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -77,7 +81,7 @@ index a4372ba189..5fdf8b506d 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4652,6 +4675,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4679,6 +4702,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -85,15 +89,15 @@ index a4372ba189..5fdf8b506d 100644 } static const TypeInfo spapr_machine_info = { -@@ -4703,6 +4727,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4730,6 +4754,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-7.0 + * pseries-7.1 */ -@@ -4830,6 +4855,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4868,6 +4893,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -101,7 +105,7 @@ index a4372ba189..5fdf8b506d 100644 /* * pseries-4.0 -@@ -4849,6 +4875,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4887,6 +4913,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; return true; } @@ -110,7 +114,7 @@ index a4372ba189..5fdf8b506d 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5176,6 +5204,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5214,6 +5242,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -371,7 +375,7 @@ index fcb5dfe792..ab8fb5bf62 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index f5c33dcc86..4a68e0a901 100644 +index 530d739b1d..6f96972392 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -154,6 +154,7 @@ struct SpaprMachineClass { @@ -382,7 +386,7 @@ index f5c33dcc86..4a68e0a901 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -241,6 +242,9 @@ struct SpaprMachineState { +@@ -256,6 +257,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -417,11 +421,23 @@ index 7949a24f5a..f207a9ba01 100644 { const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index cd3ff700ac..1cb49c8087 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -746,6 +746,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "405", "405d4" }, + { "405cr", "405crc" }, + { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 047b24ba50..79c5ac50b9 100644 +index a4c893cfad..c6575493b7 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1462,6 +1462,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1464,6 +1464,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -430,10 +446,10 @@ index 047b24ba50..79c5ac50b9 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index dc93b99189..154888cce5 100644 +index 466d0d2f4c..22c100e227 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c -@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; +@@ -89,6 +89,7 @@ static int cap_ppc_nested_kvm_hv; static int cap_large_decr; static int cap_fwnmi; static int cap_rpt_invalidate; @@ -441,7 +457,7 @@ index dc93b99189..154888cce5 100644 static uint32_t debug_inst_opcode; -@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) +@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -449,7 +465,7 @@ index dc93b99189..154888cce5 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void) +@@ -2570,6 +2572,16 @@ int kvmppc_has_cap_rpt_invalidate(void) return cap_rpt_invalidate; } @@ -466,7 +482,7 @@ index dc93b99189..154888cce5 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2966,3 +2978,18 @@ bool kvm_arch_cpu_check_are_resettable(void) { return true; } diff --git a/0009-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch similarity index 54% rename from 0009-Add-s390x-machine-types.patch rename to 0010-Add-s390x-machine-types.patch index 2d8b554..584b8e0 100644 --- a/0009-Add-s390x-machine-types.patch +++ b/0010-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 680f343e58a50a99d17bc7dedd3ee90980912023 Mon Sep 17 00:00:00 2001 +From 6a14fc5e35a8cec7f049c203d6dc2390fac175f1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -7,6 +7,10 @@ Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina -- + +Rebase changes (7.1.0 rc0): +- Moved adding rhel_old_machine_deprecation variable to general machine types commit + Merged patches (6.1.0): - 64a9a5c971 hw/s390x: Remove the RHEL7-only machine type - 395516d62b redhat: s390x: add rhel-8.5.0 compat machine @@ -18,62 +22,47 @@ Merged patches (7.0.0): - e6ff4de4f7 redhat: Add s390x machine type compatibility handling for the rebase to v6.2 - 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x - dcc64971bf RHEL: mark old machine types as deprecated (partialy) ---- - hw/core/machine.c | 6 +++ - hw/s390x/s390-virtio-ccw.c | 104 ++++++++++++++++++++++++++++++++++++- - include/hw/boards.h | 2 + - 3 files changed, 111 insertions(+), 1 deletion(-) -diff --git a/hw/core/machine.c b/hw/core/machine.c -index ea430d844e..77202a3570 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -37,6 +37,12 @@ - #include "hw/virtio/virtio.h" - #include "hw/virtio/virtio-pci.h" - -+/* -+ * RHEL only: machine types for previous major releases are deprecated -+ */ -+const char *rhel_old_machine_deprecation = -+ "machine types for previous major releases are deprecated"; -+ - /* - * Mostly the same as hw_compat_6_0 and hw_compat_6_1 - */ +Merged patches (7.1.0 rc0): +- 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/s390x/s390-virtio-ccw.c chunk) +- c8ad21ca31 redhat: Update s390x machine type compatibility for rebase to QEMU 7.0.0 +- 5bcf8d874c target/s390x: deprecate CPUs older than z14 +--- + hw/s390x/s390-virtio-ccw.c | 107 +++++++++++++++++++++++++++++++ + target/s390x/cpu_models.c | 11 ++++ + target/s390x/cpu_models.h | 2 + + target/s390x/cpu_models_sysemu.c | 2 + + 4 files changed, 122 insertions(+) + diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 90480e7cf9..ec4176a1e0 100644 +index cc3097bfee..4c68d72000 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -767,7 +767,7 @@ bool css_migration_enabled(void) - { \ - MachineClass *mc = MACHINE_CLASS(oc); \ - ccw_machine_##suffix##_class_options(mc); \ -- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ -+ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ - if (latest) { \ - mc->alias = "s390-ccw-virtio"; \ - mc->is_default = true; \ -@@ -791,6 +791,7 @@ bool css_migration_enabled(void) +@@ -792,6 +792,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_7_0_instance_options(MachineState *machine) + static void ccw_machine_7_1_instance_options(MachineState *machine) { } -@@ -1115,6 +1116,107 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1131,6 +1132,112 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + ++ +static void ccw_machine_rhel900_instance_options(MachineState *machine) +{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; ++ ++ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); +} + +static void ccw_machine_rhel900_class_options(MachineClass *mc) +{ ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); +} +DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); + @@ -170,17 +159,84 @@ index 90480e7cf9..ec4176a1e0 100644 static void ccw_machine_register_types(void) { -diff --git a/include/hw/boards.h b/include/hw/boards.h -index c90a19b4d1..bf59275f18 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -470,4 +470,6 @@ extern const size_t hw_compat_rhel_8_0_len; - extern GlobalProperty hw_compat_rhel_7_6[]; - extern const size_t hw_compat_rhel_7_6_len; - -+extern const char *rhel_old_machine_deprecation; +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index c3a4f80633..739770dc15 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -45,6 +45,9 @@ + * of a following release have been a superset of the previous release. With + * generation 15 one base feature and one optional feature have been deprecated. + */ + - #endif ++#define RHEL_CPU_DEPRECATION "use at least 'z14', or 'host' / 'qemu' / 'max'" ++ + static S390CPUDef s390_cpu_defs[] = { + CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), + CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), +@@ -854,22 +857,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) + static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* all base models are migration safe */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->is_static = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* model that can change between QEMU versions */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) +diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h +index 74d1f87e4f..372160bcd7 100644 +--- a/target/s390x/cpu_models.h ++++ b/target/s390x/cpu_models.h +@@ -38,6 +38,8 @@ struct S390CPUDef { + S390FeatBitmap full_feat; + /* used to init full_feat from generated data */ + S390FeatInit full_init; ++ /* if deprecated, provides a suggestion */ ++ const char *deprecation_note; + }; + + /* CPU model based on a CPU definition */ +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index d086b1c39c..1b9cc66405 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + CpuDefinitionInfo *info; + char *name = g_strdup(object_class_get_name(klass)); + S390CPUClass *scc = S390_CPU_CLASS(klass); ++ CPUClass *cc = CPU_CLASS(klass); + + /* strip off the -s390x-cpu */ + g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; +@@ -69,6 +70,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + info->migration_safe = scc->is_migration_safe; + info->q_static = scc->is_static; + info->q_typename = g_strdup(object_class_get_name(klass)); ++ info->deprecated = !!cc->deprecation_note; + /* check for unavailable features */ + if (cpu_list_data->model) { + Object *obj; -- 2.31.1 diff --git a/0010-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch similarity index 76% rename from 0010-Add-x86_64-machine-types.patch rename to 0011-Add-x86_64-machine-types.patch index 7c48967..75c17f0 100644 --- a/0010-Add-x86_64-machine-types.patch +++ b/0011-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 427a575ca57966bc72e1ebf218081da530d435d7 Mon Sep 17 00:00:00 2001 +From 68c1bbec1fae27b527f85f2666c54fff3d499eaf Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -31,45 +31,29 @@ Merged patches (7.0.0): - 6110d865e5 x86: Add q35 RHEL 9.0.0 machine type - dcc64971bf RHEL: mark old machine types as deprecated (partialy) - 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 + +Merged patches (7.1.0 rc0): +- 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/i386/pc.c chunk) +- 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (x86_64 specific changes) +- 35b5c8554f target/i386: deprecate CPUs older than x86_64-v2 ABI --- - hw/core/machine.c | 10 ++ - hw/i386/pc.c | 135 +++++++++++++++++++++- - hw/i386/pc_piix.c | 79 ++++++++++++- - hw/i386/pc_q35.c | 227 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc.c | 147 ++++++++++++++++++++++- + hw/i386/pc_piix.c | 83 ++++++++++++- + hw/i386/pc_q35.c | 231 ++++++++++++++++++++++++++++++++++++- hw/s390x/s390-virtio-ccw.c | 1 + - include/hw/boards.h | 5 + - include/hw/i386/pc.h | 24 ++++ + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 27 +++++ + target/i386/cpu.c | 21 ++++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 10 files changed, 484 insertions(+), 7 deletions(-) + 10 files changed, 515 insertions(+), 7 deletions(-) -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 77202a3570..28989b6e7b 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -43,6 +43,16 @@ - const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - -+GlobalProperty hw_compat_rhel_8_6[] = { -+ /* hw_compat_rhel_8_6 bz 2065589 */ -+ /* -+ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so -+ * we need do disable it downstream on the latest hw_compat_rhel_8. -+ */ -+ { "vhost-vsock-device", "seqpacket", "off" }, -+}; -+const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); -+ - /* - * Mostly the same as hw_compat_6_0 and hw_compat_6_1 - */ diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index fd55fc725c..263d882af6 100644 +index 7280c02ce3..09b62db9e9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -375,6 +375,137 @@ GlobalProperty pc_compat_1_4[] = { +@@ -390,6 +390,149 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -78,6 +62,12 @@ index fd55fc725c..263d882af6 100644 + * machine type. + */ +GlobalProperty pc_rhel_compat[] = { ++ /* we don't support s3/s4 suspend */ ++ { "PIIX4_PM", "disable_s3", "1" }, ++ { "PIIX4_PM", "disable_s4", "1" }, ++ { "ICH9-LPC", "disable_s3", "1" }, ++ { "ICH9-LPC", "disable_s4", "1" }, ++ + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, + { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, @@ -89,6 +79,12 @@ index fd55fc725c..263d882af6 100644 +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_0_compat[] = { ++ /* pc_rhel_9_0_compat from pc_compat_6_2 */ ++ { "virtio-mem", "unplugged-inaccessible", "off" }, ++}; ++const size_t pc_rhel_9_0_compat_len = G_N_ELEMENTS(pc_rhel_9_0_compat); ++ +GlobalProperty pc_rhel_8_5_compat[] = { + /* pc_rhel_8_5_compat from pc_compat_6_0 */ + { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, @@ -207,7 +203,7 @@ index fd55fc725c..263d882af6 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1738,6 +1869,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1910,6 +2053,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -215,7 +211,7 @@ index fd55fc725c..263d882af6 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1748,7 +1880,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1920,7 +2064,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -226,10 +222,10 @@ index fd55fc725c..263d882af6 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index c797e98312..0cacc0d623 100644 +index 1ec5d6a4f8..52111697cb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -50,6 +50,7 @@ +@@ -51,6 +51,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -237,7 +233,7 @@ index c797e98312..0cacc0d623 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, +@@ -182,8 +183,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -248,7 +244,7 @@ index c797e98312..0cacc0d623 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -314,6 +315,7 @@ static void pc_init1(MachineState *machine, +@@ -331,6 +332,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -256,7 +252,7 @@ index c797e98312..0cacc0d623 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -967,3 +969,76 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -880,3 +882,80 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -305,6 +301,10 @@ index c797e98312..0cacc0d623 100644 + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_6, + hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, @@ -334,10 +334,10 @@ index c797e98312..0cacc0d623 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index b695f88c45..157160e069 100644 +index ef471f6664..4e7f1a707c 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) +@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -348,7 +348,7 @@ index b695f88c45..157160e069 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine) +@@ -352,6 +352,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -356,7 +356,7 @@ index b695f88c45..157160e069 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -631,3 +632,225 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -654,3 +655,229 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -397,6 +397,10 @@ index b695f88c45..157160e069 100644 + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, @@ -583,10 +587,10 @@ index b695f88c45..157160e069 100644 +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index ec4176a1e0..465a2a09d2 100644 +index 4c68d72000..e1e6b6b5a7 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -1136,6 +1136,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) +@@ -1157,6 +1157,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) static void ccw_machine_rhel860_class_options(MachineClass *mc) { ccw_machine_rhel900_class_options(mc); @@ -595,7 +599,7 @@ index ec4176a1e0..465a2a09d2 100644 /* All RHEL machines for prior major releases are deprecated */ mc->deprecation_reason = rhel_old_machine_deprecation; diff --git a/include/hw/boards.h b/include/hw/boards.h -index bf59275f18..d1555665df 100644 +index bfd757c561..0d22c19f4a 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -266,6 +266,8 @@ struct MachineClass { @@ -607,27 +611,20 @@ index bf59275f18..d1555665df 100644 bool ignore_boot_device_suffixes; bool smbus_no_migration_support; bool nvdimm_supported; -@@ -449,6 +451,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_8_6[]; -+extern const size_t hw_compat_rhel_8_6_len; -+ - extern GlobalProperty hw_compat_rhel_8_5[]; - extern const size_t hw_compat_rhel_8_5_len; - diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 91331059d9..419a6ec24b 100644 +index 4d9e95a091..f96bf85df1 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -289,6 +289,30 @@ extern const size_t pc_compat_1_5_len; +@@ -290,6 +290,33 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_0_compat[]; ++extern const size_t pc_rhel_9_0_compat_len; ++ +extern GlobalProperty pc_rhel_8_5_compat[]; +extern const size_t pc_rhel_8_5_compat_len; + @@ -649,11 +646,165 @@ index 91331059d9..419a6ec24b 100644 +extern GlobalProperty pc_rhel_7_6_compat[]; +extern const size_t pc_rhel_7_6_compat_len; + - /* Helper for setting model-id for CPU models that changed model-id - * depending on QEMU versions up to QEMU 2.4. + #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \ + static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ + { \ +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 1db1278a59..db97eeb8b0 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1832,9 +1832,13 @@ static const CPUCaches epyc_milan_cache_info = { + * PT in VMX operation */ + ++#define RHEL_CPU_DEPRECATION \ ++ "use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'" ++ + static const X86CPUDefinition builtin_x86_defs[] = { + { + .name = "qemu64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -1855,6 +1859,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "phenom", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, +@@ -1887,6 +1892,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "core2duo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1929,6 +1935,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -1970,6 +1977,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "qemu32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 4, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1984,6 +1992,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -2014,6 +2023,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "coreduo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2047,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "486", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 4, +@@ -2059,6 +2070,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 5, +@@ -2071,6 +2083,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2083,6 +2096,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 3, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2095,6 +2109,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "athlon", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_AMD, + .family = 6, +@@ -2110,6 +2125,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "n270", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2135,6 +2151,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Conroe", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2175,6 +2192,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Penryn", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -3762,6 +3780,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G1", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3782,6 +3801,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3804,6 +3824,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c -index 5eb955ce9a..74c1396a93 100644 +index 7237378a7d..7b8a3d5af0 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c @@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = { @@ -665,10 +816,10 @@ index 5eb955ce9a..74c1396a93 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 9cf8e03669..6d1e009443 100644 +index f148a6d52f..4e5d4bafc4 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3488,6 +3488,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3626,6 +3626,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -676,7 +827,7 @@ index 9cf8e03669..6d1e009443 100644 kvm_msr_buf_reset(cpu); -@@ -3822,6 +3823,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3981,6 +3982,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -687,7 +838,7 @@ index 9cf8e03669..6d1e009443 100644 case MSR_KVM_ASYNC_PF_INT: env->async_pf_int_msr = msrs[i].data; diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c -index 6dcad2db49..580c2c43d2 100644 +index bc7b7dfc39..96e6dee3a1 100644 --- a/tests/qtest/pvpanic-test.c +++ b/tests/qtest/pvpanic-test.c @@ -17,7 +17,7 @@ static void test_panic_nopause(void) diff --git a/0011-Enable-make-check.patch b/0012-Enable-make-check.patch similarity index 64% rename from 0011-Enable-make-check.patch rename to 0012-Enable-make-check.patch index 832b38d..3987a1c 100644 --- a/0011-Enable-make-check.patch +++ b/0012-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 5e419e5e0a721bdbbfa6d9b82c8be5c5b3d26a01 Mon Sep 17 00:00:00 2001 +From 0833b7e925c98253c90c9de18758517f2778f77d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -24,22 +24,88 @@ Rebase changes (7.0.0): - Remove unnecessary changes in iotest 051 - Remove changes in bios-tables-test.c and prom-env-test.c qtests +Rebase changes (7.1.0 rc0): +- Disable bcm2835-dma-test (added upstream) + Merged patches (6.1.0): - 2f129df7d3 redhat: Enable the 'test-block-iothread' test again + +Merged patches (7.1.0 rc0): +- 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57 --- .distro/qemu-kvm.spec.template | 5 ++--- + tests/avocado/replay_kernel.py | 2 +- + tests/avocado/reverse_debugging.py | 2 +- + tests/avocado/tcg_plugins.py | 6 +++--- tests/qtest/fuzz-e1000e-test.c | 2 +- tests/qtest/fuzz-virtio-scsi-test.c | 2 +- tests/qtest/intel-hda-test.c | 2 +- tests/qtest/libqos/meson.build | 2 +- tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 4 ---- + tests/qtest/meson.build | 7 +------ tests/qtest/usb-hcd-xhci-test.c | 4 ++++ tests/qtest/virtio-net-failover.c | 1 + - 9 files changed, 12 insertions(+), 12 deletions(-) + 12 files changed, 18 insertions(+), 19 deletions(-) +diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py +index 0b2b0dc692..3a7b5f0748 100644 +--- a/tests/avocado/replay_kernel.py ++++ b/tests/avocado/replay_kernel.py +@@ -147,7 +147,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py +index d2921e70c3..66d185ed42 100644 +--- a/tests/avocado/reverse_debugging.py ++++ b/tests/avocado/reverse_debugging.py +@@ -198,7 +198,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py +index 642d2e49e3..93b3afd823 100644 +--- a/tests/avocado/tcg_plugins.py ++++ b/tests/avocado/tcg_plugins.py +@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c -index 66229e6096..947fba73b7 100644 +index 5052883fb6..b5286f4b12 100644 --- a/tests/qtest/fuzz-e1000e-test.c +++ b/tests/qtest/fuzz-e1000e-test.c @@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) @@ -52,7 +118,7 @@ index 66229e6096..947fba73b7 100644 qtest_outl(s, 0xcf8, 0x80001010); qtest_outl(s, 0xcfc, 0xe1020000); diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c -index aaf6d10e18..43727d62ac 100644 +index 71c91b0356..dae4139c17 100644 --- a/tests/qtest/fuzz-virtio-scsi-test.c +++ b/tests/qtest/fuzz-virtio-scsi-test.c @@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) @@ -78,20 +144,20 @@ index a58c98e4d1..c8387e39ce 100644 qtest_outl(s, 0xcf8, 0x80000804); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index e988d15791..46f7dcb81a 100644 +index fd5d6e5ae1..d9409560cd 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build -@@ -41,7 +41,7 @@ libqos_srcs = files('../libqtest.c', +@@ -44,7 +44,7 @@ libqos_srcs = files( 'virtio-rng.c', 'virtio-scsi.c', 'virtio-serial.c', - 'virtio-iommu.c', +# 'virtio-iommu.c', + 'generic-pcihost.c', # qgraph machines: - 'aarch64-xlnx-zcu102-machine.c', diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c -index fe0bef9980..7a9d51579b 100644 +index 8ac95b89f7..cd2102555c 100644 --- a/tests/qtest/lpc-ich9-test.c +++ b/tests/qtest/lpc-ich9-test.c @@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) @@ -104,18 +170,18 @@ index fe0bef9980..7a9d51579b 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index d25f82bb5a..67cd32def1 100644 +index be4b30dea2..2c3d62a9fe 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -73,7 +73,6 @@ qtests_i386 = \ +@@ -76,7 +76,6 @@ qtests_i386 = \ config_all_devices.has_key('CONFIG_Q35') and \ config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ slirp.found() ? ['virtio-net-failover'] : []) + \ - (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ qtests_pci + \ + qtests_cxl + \ ['fdc-test', - 'ide-test', -@@ -86,7 +85,6 @@ qtests_i386 = \ +@@ -90,7 +89,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -123,7 +189,7 @@ index d25f82bb5a..67cd32def1 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -216,7 +214,6 @@ qtests_arm = \ +@@ -212,15 +210,13 @@ qtests_arm = \ # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional qtests_aarch64 = \ @@ -131,7 +197,16 @@ index d25f82bb5a..67cd32def1 100644 (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \ (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \ -@@ -231,7 +228,6 @@ qtests_s390x = \ + ['arm-cpu-features', + 'numa-test', + 'boot-serial-test', +- 'migration-test', +- 'bcm2835-dma-test'] ++ 'migration-test'] + + qtests_s390x = \ + (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ +@@ -228,7 +224,6 @@ qtests_s390x = \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ ['boot-serial-test', 'drive_del-test', @@ -170,7 +245,7 @@ index 10ef9d2a91..3855873050 100644 qtest_start("-device nec-usb-xhci,id=xhci" diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c -index 78811f1c92..44de8af00c 100644 +index 443ee56de9..1bccb3bab9 100644 --- a/tests/qtest/virtio-net-failover.c +++ b/tests/qtest/virtio-net-failover.c @@ -25,6 +25,7 @@ diff --git a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 87% rename from 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch index c9e42b2..65d06ee 100644 --- a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From c358fd4c224a9c3f64b4a8fff34cc6b1dc201fa0 Mon Sep 17 00:00:00 2001 +From 2092b90c5d7791bedbdb4ba067c90ae44d355e66 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,20 +32,20 @@ Signed-off-by: Bandan Das 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 67a183f17b..1e20f9fd59 100644 +index 939dcc3d4a..acbc6673ce 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c -@@ -45,6 +45,9 @@ - - #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" +@@ -48,6 +48,9 @@ + /* Protected by BQL */ + static KVMRouteChange vfio_route_change; +/* RHEL only: Set once for the first assigned dev */ +static uint16_t device_limit; + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); - -@@ -2810,9 +2813,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + static void vfio_msi_disable_common(VFIOPCIDevice *vdev); +@@ -2854,9 +2857,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) ssize_t len; struct stat st; int groupid; @@ -74,10 +74,10 @@ index 67a183f17b..1e20f9fd59 100644 + return; + } + - if (!vdev->vbasedev.sysfsdev) { + if (!vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3249,6 +3273,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3293,6 +3317,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -88,10 +88,10 @@ index 67a183f17b..1e20f9fd59 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 64777516d1..e0fe6ca97e 100644 +index 7c236a52f4..7b7d036a8f 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h -@@ -139,6 +139,7 @@ struct VFIOPCIDevice { +@@ -140,6 +140,7 @@ struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); diff --git a/0013-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch similarity index 88% rename from 0013-Add-support-statement-to-help-output.patch rename to 0014-Add-support-statement-to-help-output.patch index 4826ea4..b040f61 100644 --- a/0013-Add-support-statement-to-help-output.patch +++ b/0014-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From ba0c7a5f6b9a1f75666db6b3b795ddf03695dc26 Mon Sep 17 00:00:00 2001 +From 793720efdf835b13246f02191f6c07a60a726841 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 6f646531a0..9d5dab43d2 100644 +index 706bd7cff7..e08ef71108 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -831,9 +831,17 @@ static void version(void) +@@ -836,9 +836,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index 6f646531a0..9d5dab43d2 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", g_get_prgname()); -@@ -859,6 +867,7 @@ static void help(int exitcode) +@@ -864,6 +872,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/0014-globally-limit-the-maximum-number-of-CPUs.patch b/0014-globally-limit-the-maximum-number-of-CPUs.patch deleted file mode 100644 index 6764a84..0000000 --- a/0014-globally-limit-the-maximum-number-of-CPUs.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 9ebfd2f6cfa8e79c92e58fd169f90cc768fb865a Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Tue, 21 Jan 2014 10:46:52 +0100 -Subject: globally limit the maximum number of CPUs - -We now globally limit the number of VCPUs. -Especially, there is no way one can specify more than -max_cpus VCPUs for a VM. - -This allows us the restore the ppc max_cpus limitation to the upstream -default and minimize the ppc hack in kvm-all.c. - -Signed-off-by: David Hildenbrand -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo Cesar Lemes de Paula ---- - accel/kvm/kvm-all.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 5f1377ca04..fdf0e4d429 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2430,6 +2430,18 @@ static int kvm_init(MachineState *ms) - soft_vcpus_limit = kvm_recommended_vcpus(s); - hard_vcpus_limit = kvm_max_vcpus(s); - -+#ifdef HOST_PPC64 -+ /* -+ * On POWER, the kernel advertises a soft limit based on the -+ * number of CPU threads on the host. We want to allow exceeding -+ * this for testing purposes, so we don't want to set hard limit -+ * to soft limit as on x86. -+ */ -+#else -+ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ -+ hard_vcpus_limit = soft_vcpus_limit; -+#endif -+ - while (nc->name) { - if (nc->num > soft_vcpus_limit) { - warn_report("Number of %s cpus requested (%d) exceeds " --- -2.31.1 - diff --git a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 722484d..02ec067 100644 --- a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 4b6c8cdc52fdf94d4098d278defb3833dce1d189 Mon Sep 17 00:00:00 2001 +From 62589d41546b4200fe5cff7504514bbe3aa72e45 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 34e9b32a5c..924f61ab6d 100644 +index 31c04f7eea..1290fab5ba 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3233,11 +3233,11 @@ SRST +@@ -3250,11 +3250,11 @@ SRST :: diff --git a/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch similarity index 95% rename from 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename to 0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index 2bc687c..e1c2169 100644 --- a/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From 64a06662cdea0ff62efb122be4eab506b2a842d9 Mon Sep 17 00:00:00 2001 +From e0b9e638d4145e576409c754f525b83f630d7bb0 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts @@ -32,7 +32,7 @@ Signed-off-by: Danilo C. L. de Paula 1 file changed, 7 insertions(+) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index 655ab856a0..6aa7f93df9 100644 +index b4283055c1..59b88aadff 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, diff --git a/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch deleted file mode 100644 index 9f08024..0000000 --- a/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +++ /dev/null @@ -1,66 +0,0 @@ -From b72e04cb7e417d9e1c973223747ab3a27abda8b4 Mon Sep 17 00:00:00 2001 -From: Fam Zheng -Date: Wed, 14 Jun 2017 15:37:01 +0200 -Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] - -RH-Author: Fam Zheng -Message-id: <20170614153701.14757-1-famz@redhat.com> -Patchwork-id: 75613 -O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] -Bugzilla: 1378816 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Max Reitz - -We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't -ready. If it were, the changes will be too invasive. To have an idea: - -https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html - -is an incomplete attempt to fix part of the issue, and the remaining -work unfortunately involve even more complex changes. - -As a band-aid, this partially reverts the effect of ef8875b -(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot -simply revert that commit as a whole because we already shipped it in -qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should -only block what has been broken. Also, faithfully reverting the above -commit means adding back the removed op blocker, but that is not enough, -because it still crashes when inserting media into an initially empty -scsi-cd. - -All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable -unless the scsi-cd never enters an empty state, so, disable it -altogether. Otherwise it would be much more difficult to avoid -crashing. - -Signed-off-by: Fam Zheng -Signed-off-by: Miroslav Rezanina -Signed-off-by: Danilo C. L. de Paula ---- - hw/scsi/virtio-scsi.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 34a968ecfb..7f6da33a8a 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - AioContext *old_context; - int ret; - -+ /* XXX: Remove this check once block backend is capable of handling -+ * AioContext change upon eject/insert. -+ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if -+ * data plane is not used, both cases are safe for scsi-cd. */ -+ if (s->ctx && s->ctx != qemu_get_aio_context() && -+ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { -+ error_setg(errp, "scsi-cd is not supported by data plane"); -+ return; -+ } - if (s->ctx && !s->dataplane_fenced) { - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; --- -2.31.1 - diff --git a/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch similarity index 94% rename from 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch rename to 0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index d7401d5..3a0c99a 100644 --- a/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From 54f9157a918e1404f2f17ce89a9c8b9088c1bc06 Mon Sep 17 00:00:00 2001 +From e87482425c6d8dafe0bd447a1e68710ea6969906 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,10 +44,10 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index b5c47931ef..a795e457ac 100644 +index c6c6692fb7..bccfbc0024 100644 --- a/block/qcow2.c +++ b/block/qcow2.c -@@ -1337,6 +1337,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, +@@ -1335,6 +1335,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, ret = -ENOTSUP; goto fail; } diff --git a/0018-Introduce-upstream-7.0-compat-changes.patch b/0018-Introduce-upstream-7.0-compat-changes.patch new file mode 100644 index 0000000..fa3a289 --- /dev/null +++ b/0018-Introduce-upstream-7.0-compat-changes.patch @@ -0,0 +1,116 @@ +From 0be2889fa221ba98bd436fa4b4542e57f623d31b Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 1 Jun 2022 08:09:04 -0400 +Subject: Introduce upstream 7.0 compat changes + +Addding upstream compat changes to 9.1 structure + +Signed-off-by: Miroslav Rezanina + +--- + +Rebase notes (weekly-220608): +- Added new 7.0 compat added upstream + +Rebase notes (QEMU 7.1.0 RC3): +- Setting legacy_no_rng_seed to true for RHEL machine types +--- + hw/arm/virt.c | 1 + + hw/core/machine.c | 11 +++++++++++ + hw/i386/pc_piix.c | 3 +++ + hw/i386/pc_q35.c | 3 +++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 6 files changed, 22 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 9737b77eca..e80c5b7d8b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3514,6 +3514,7 @@ static void rhel900_virt_options(MachineClass *mc) + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 909f75770b..5a3867692d 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -46,6 +46,17 @@ + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++/* ++ * Mostly the same as hw_compat_7_0 ++ */ ++GlobalProperty hw_compat_rhel_9_1[] = { ++ /* hw_compat_rhel_9_1 from hw_compat_7_0 */ ++ { "arm-gicv3-common", "force-8-bit-prio", "on" }, ++ /* hw_compat_rhel_9_1 from hw_compat_7_0 */ ++ { "nvme-ns", "eui64-default", "on"}, ++}; ++const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); ++ + /* + * Mostly the same as hw_compat_6_2 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 52111697cb..7f56ef4e81 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -927,6 +927,9 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ pcmc->legacy_no_rng_seed = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_1, ++ hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, + hw_compat_rhel_9_0_len); + compat_props_add(m->compat_props, pc_rhel_9_0_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 4e7f1a707c..f16dc3bff5 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -692,6 +692,9 @@ static void pc_q35_machine_rhel900_options(MachineClass *m) + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; ++ pcmc->legacy_no_rng_seed = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_1, ++ hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, + hw_compat_rhel_9_0_len); + compat_props_add(m->compat_props, pc_rhel_9_0_compat, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index e1e6b6b5a7..de748a0a57 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1144,6 +1144,7 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine) + + static void ccw_machine_rhel900_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + } + DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 0d22c19f4a..130f1c3424 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -453,6 +453,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_1[]; ++extern const size_t hw_compat_rhel_9_1_len; ++ + extern GlobalProperty hw_compat_rhel_9_0[]; + extern const size_t hw_compat_rhel_9_0_len; + +-- +2.31.1 + diff --git a/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch b/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch deleted file mode 100644 index 285cd6b..0000000 --- a/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch +++ /dev/null @@ -1,135 +0,0 @@ -From 1d6439527aa6ccabb58208c94417778ccc19de39 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 9 Feb 2022 04:16:25 -0500 -Subject: WRB: Introduce RHEL 9.0.0 hw compat structure - -General compatibility structure for post RHEL 9.0.0 rebase. - -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 9 +++++++++ - hw/i386/pc.c | 6 ++++++ - hw/i386/pc_piix.c | 4 ++++ - hw/i386/pc_q35.c | 4 ++++ - hw/s390x/s390-virtio-ccw.c | 2 ++ - include/hw/boards.h | 3 +++ - include/hw/i386/pc.h | 3 +++ - 7 files changed, 31 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 28989b6e7b..dffc3ef4ab 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -53,6 +53,15 @@ GlobalProperty hw_compat_rhel_8_6[] = { - }; - const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); - -+/* -+ * Mostly the same as hw_compat_6_2 -+ */ -+GlobalProperty hw_compat_rhel_9_0[] = { -+ /* hw_compat_rhel_9_0 from hw_compat_6_2 */ -+ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, -+}; -+const size_t hw_compat_rhel_9_0_len = G_N_ELEMENTS(hw_compat_rhel_9_0); -+ - /* - * Mostly the same as hw_compat_6_0 and hw_compat_6_1 - */ -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 263d882af6..0886cfe3fe 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -391,6 +391,12 @@ GlobalProperty pc_rhel_compat[] = { - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_9_0_compat[] = { -+ /* pc_rhel_9_0_compat from pc_compat_6_2 */ -+ { "virtio-mem", "unplugged-inaccessible", "off" }, -+}; -+const size_t pc_rhel_9_0_compat_len = G_N_ELEMENTS(pc_rhel_9_0_compat); -+ - GlobalProperty pc_rhel_8_5_compat[] = { - /* pc_rhel_8_5_compat from pc_compat_6_0 */ - { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 0cacc0d623..dc987fe93b 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -1014,6 +1014,10 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->kvmclock_create_always = false; - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; -+ compat_props_add(m->compat_props, hw_compat_rhel_9_0, -+ hw_compat_rhel_9_0_len); -+ compat_props_add(m->compat_props, pc_rhel_9_0_compat, -+ pc_rhel_9_0_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_6, - hw_compat_rhel_8_6_len); - compat_props_add(m->compat_props, hw_compat_rhel_8_5, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 157160e069..52c253c570 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -669,6 +669,10 @@ static void pc_q35_machine_rhel900_options(MachineClass *m) - m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.0.0"; -+ compat_props_add(m->compat_props, hw_compat_rhel_9_0, -+ hw_compat_rhel_9_0_len); -+ compat_props_add(m->compat_props, pc_rhel_9_0_compat, -+ pc_rhel_9_0_compat_len); - } - - DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 465a2a09d2..08e0f6a79b 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1118,12 +1118,14 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) - DEFINE_CCW_MACHINE(2_4, "2.4", false); - #endif - -+ - static void ccw_machine_rhel900_instance_options(MachineState *machine) - { - } - - static void ccw_machine_rhel900_class_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); - } - DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index d1555665df..635e45dd71 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -451,6 +451,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_9_0[]; -+extern const size_t hw_compat_rhel_9_0_len; -+ - extern GlobalProperty hw_compat_rhel_8_6[]; - extern const size_t hw_compat_rhel_8_6_len; - -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 419a6ec24b..a492c420b5 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -292,6 +292,9 @@ extern const size_t pc_compat_1_4_len; - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_9_0_compat[]; -+extern const size_t pc_rhel_9_0_compat_len; -+ - extern GlobalProperty pc_rhel_8_5_compat[]; - extern const size_t pc_rhel_8_5_compat_len; - --- -2.31.1 - diff --git a/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch b/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch deleted file mode 100644 index d3b91d0..0000000 --- a/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch +++ /dev/null @@ -1,38 +0,0 @@ -From c8ad21ca31892f8798cf82508c2b2c61bf3b9895 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 4 Apr 2022 12:15:50 +0200 -Subject: redhat: Update s390x machine type compatibility for rebase to QEMU - 7.0.0 - -RH-Author: Thomas Huth -RH-MergeRequest: 143: Update machine type compatibility for QEMU 7.0.0 update [s390x] -RH-Commit: [23/23] 0ecf97d7bdddc50565b5779c64744b353f715cbd -RH-Bugzilla: 2064782 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand - -No s390x-specific machine class property updates required this time, -only an update to the default qemu cpu model. - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 08e0f6a79b..4a491d4988 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1121,6 +1121,9 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); - - static void ccw_machine_rhel900_instance_options(MachineState *machine) - { -+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; -+ -+ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); - } - - static void ccw_machine_rhel900_class_options(MachineClass *mc) --- -2.31.1 - diff --git a/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch b/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch deleted file mode 100644 index f9535a8..0000000 --- a/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 38b89dc24551258b630f09d1c654b6c72b265c79 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 14 Apr 2022 14:58:43 +0100 -Subject: pc: Move s3/s4 suspend disabling to compat - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 155: 7.0 machine type fixes (x86) -RH-Commit: [26/26] 7d666032d5f5dab1444ebba085f92f2de4e86699 -RH-Bugzilla: 2064771 - -Our downstream patches currently have tweaks in the C code to disable -s3/s4; Thomas pointed out we can just set the property. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/acpi/ich9.c | 4 ++-- - hw/acpi/piix4.c | 4 ++-- - hw/i386/pc.c | 6 ++++++ - 3 files changed, 10 insertions(+), 4 deletions(-) - -diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c -index de1e401cdf..bd9bbade70 100644 ---- a/hw/acpi/ich9.c -+++ b/hw/acpi/ich9.c -@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) - static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; - pm->acpi_memory_hotplug.is_enabled = true; - pm->cpu_hotplug_legacy = true; -- pm->disable_s3 = 1; -- pm->disable_s4 = 1; -+ pm->disable_s3 = 0; -+ pm->disable_s4 = 0; - pm->s4_val = 2; - pm->use_acpi_hotplug_bridge = true; - pm->keep_pci_slot_hpc = true; -diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 28544e78c3..2fb2b43248 100644 ---- a/hw/acpi/piix4.c -+++ b/hw/acpi/piix4.c -@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) - - static Property piix4_pm_properties[] = { - DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), -- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), -+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), - DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), - DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, - use_acpi_hotplug_bridge, true), -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 0886cfe3fe..f98f842f80 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -380,6 +380,12 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); - * machine type. - */ - GlobalProperty pc_rhel_compat[] = { -+ /* we don't support s3/s4 suspend */ -+ { "PIIX4_PM", "disable_s3", "1" }, -+ { "PIIX4_PM", "disable_s4", "1" }, -+ { "ICH9-LPC", "disable_s3", "1" }, -+ { "ICH9-LPC", "disable_s4", "1" }, -+ - { TYPE_X86_CPU, "host-phys-bits", "on" }, - { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, - { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, --- -2.31.1 - diff --git a/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch b/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch deleted file mode 100644 index 30c28f7..0000000 --- a/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch +++ /dev/null @@ -1,87 +0,0 @@ -From ac346634c5731407baa9de709dbd4d5cc6f45301 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 11 Jul 2022 18:11:12 -0300 -Subject: [PATCH 02/11] Add dirty-sync-missed-zero-copy migration stat -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [2/6] 115035fd0a4e4b9439c91fb0f5d1a2f9244ba369 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -Signed-off-by: Leonardo Bras -Acked-by: Markus Armbruster -Acked-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Message-Id: <20220711211112.18951-3-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 2 ++ - monitor/hmp-cmds.c | 5 +++++ - qapi/migration.json | 7 ++++++- - 3 files changed, 13 insertions(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8fb3eae910..3a3a7a4a50 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1017,6 +1017,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->normal_bytes = ram_counters.normal * page_size; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = ram_counters.dirty_sync_count; -+ info->ram->dirty_sync_missed_zero_copy = -+ ram_counters.dirty_sync_missed_zero_copy; - info->ram->postcopy_requests = ram_counters.postcopy_requests; - info->ram->page_size = page_size; - info->ram->multifd_bytes = ram_counters.multifd_bytes; -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 634968498b..9cec01de38 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) - monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", - info->ram->postcopy_bytes >> 10); - } -+ if (info->ram->dirty_sync_missed_zero_copy) { -+ monitor_printf(mon, -+ "Zero-copy-send fallbacks happened: %" PRIu64 " times\n", -+ info->ram->dirty_sync_missed_zero_copy); -+ } - } - - if (info->has_disk) { -diff --git a/qapi/migration.json b/qapi/migration.json -index 5105790cd0..9b38b3c21c 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -55,6 +55,10 @@ - # @postcopy-bytes: The number of bytes sent during the post-copy phase - # (since 7.0). - # -+# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could -+# not avoid copying dirty pages. This is between -+# 0 and @dirty-sync-count * @multifd-channels. -+# (since 7.1) - # Since: 0.14 - ## - { 'struct': 'MigrationStats', -@@ -65,7 +69,8 @@ - 'postcopy-requests' : 'int', 'page-size' : 'int', - 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', - 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', -- 'postcopy-bytes' : 'uint64' } } -+ 'postcopy-bytes' : 'uint64', -+ 'dirty-sync-missed-zero-copy' : 'uint64' } } - - ## - # @XBZRLECacheStats: --- -2.31.1 - diff --git a/kvm-Enable-virtio-iommu-pci-on-aarch64.patch b/kvm-Enable-virtio-iommu-pci-on-aarch64.patch deleted file mode 100644 index 3aafd3c..0000000 --- a/kvm-Enable-virtio-iommu-pci-on-aarch64.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 85781b8745fa1581a66f64011d61a4f0c4e103dc Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 6 May 2022 17:03:11 +0200 -Subject: [PATCH 3/5] Enable virtio-iommu-pci on aarch64 - -RH-Author: Eric Auger -RH-MergeRequest: 83: Enable virtio-iommu-pci on aarch64 -RH-Commit: [1/1] 23e5c0832e52c66adf5fd6daccdc3edddc7ecb8b (eauger1/centos-qemu-kvm) -RH-Bugzilla: 1477099 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477099 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45128798 -Upstream Status: RHEL-only -Tested: With virtio-net-pci and virtio-block-pci - -let's enable the virtio-iommu-pci device on aarch64 by -turning CONFIG_VIRTIO_IOMMU on. - -Signed-off-by: Eric Auger ---- - configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -index 187938573f..1618d31b89 100644 ---- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -23,6 +23,7 @@ CONFIG_VFIO_PCI=y - CONFIG_VIRTIO_MMIO=y - CONFIG_VIRTIO_PCI=y - CONFIG_VIRTIO_MEM=y -+CONFIG_VIRTIO_IOMMU=y - CONFIG_XIO3130=y - CONFIG_NVDIMM=y - CONFIG_ACPI_APEI=y --- -2.31.1 - diff --git a/kvm-Enable-virtio-iommu-pci-on-x86_64.patch b/kvm-Enable-virtio-iommu-pci-on-x86_64.patch deleted file mode 100644 index 2eb24df..0000000 --- a/kvm-Enable-virtio-iommu-pci-on-x86_64.patch +++ /dev/null @@ -1,41 +0,0 @@ -From c531a39171201f8a1d063e6af752e5d629c1b4bf Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 9 Jun 2022 11:35:18 +0200 -Subject: [PATCH 4/6] Enable virtio-iommu-pci on x86_64 - -RH-Author: Eric Auger -RH-MergeRequest: 100: Enable virtio-iommu-pci on x86_64 -RH-Commit: [1/1] a164af477efc7cb9d3d76a0e644f198f7c9fb2b5 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2094252 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094252 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871185 -Upstream Status: RHEL-only -Tested: With virtio-net-pci and virtio-block-pci - -let's enable the virtio-iommu-pci device on x86_64 by -turning CONFIG_VIRTIO_IOMMU on. - -Signed-off-by: Eric Auger ---- - configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -index d0c9e66641..3850b9de72 100644 ---- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -90,6 +90,7 @@ CONFIG_VHOST_USER_BLK=y - CONFIG_VIRTIO_MEM=y - CONFIG_VIRTIO_PCI=y - CONFIG_VIRTIO_VGA=y -+CONFIG_VIRTIO_IOMMU=y - CONFIG_VMMOUSE=y - CONFIG_VMPORT=y - CONFIG_VTD=y --- -2.31.1 - diff --git a/kvm-Introduce-event-loop-base-abstract-class.patch b/kvm-Introduce-event-loop-base-abstract-class.patch deleted file mode 100644 index 9f987ea..0000000 --- a/kvm-Introduce-event-loop-base-abstract-class.patch +++ /dev/null @@ -1,503 +0,0 @@ -From 1163da281c178359dd7e1cf1ced5c98caa600f8e Mon Sep 17 00:00:00 2001 -From: Nicolas Saenz Julienne -Date: Mon, 25 Apr 2022 09:57:21 +0200 -Subject: [PATCH 01/16] Introduce event-loop-base abstract class - -RH-Author: Nicolas Saenz Julienne -RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size -RH-Commit: [1/3] 5817205d8f56cc4aa98bd5963ecac54a59bad990 -RH-Bugzilla: 2031024 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -Introduce the 'event-loop-base' abstract class, it'll hold the -properties common to all event loops and provide the necessary hooks for -their creation and maintenance. Then have iothread inherit from it. - -EventLoopBaseClass is defined as user creatable and provides a hook for -its children to attach themselves to the user creatable class 'complete' -function. It also provides an update_params() callback to propagate -property changes onto its children. - -The new 'event-loop-base' class will live in the root directory. It is -built on its own using the 'link_whole' option (there are no direct -function dependencies between the class and its children, it all happens -trough 'constructor' magic). And also imposes new compilation -dependencies: - - qom <- event-loop-base <- blockdev (iothread.c) - -And in subsequent patches: - - qom <- event-loop-base <- qemuutil (util/main-loop.c) - -All this forced some amount of reordering in meson.build: - - - Moved qom build definition before qemuutil. Doing it the other way - around (i.e. moving qemuutil after qom) isn't possible as a lot of - core libraries that live in between the two depend on it. - - - Process the 'hw' subdir earlier, as it introduces files into the - 'qom' source set. - -No functional changes intended. - -Signed-off-by: Nicolas Saenz Julienne -Reviewed-by: Stefan Hajnoczi -Acked-by: Markus Armbruster -Message-id: 20220425075723.20019-2-nsaenzju@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 7d5983e3c8c40b1d0668faba31d79905c4fadd7d) ---- - event-loop-base.c | 104 +++++++++++++++++++++++++++++++ - include/sysemu/event-loop-base.h | 36 +++++++++++ - include/sysemu/iothread.h | 6 +- - iothread.c | 65 ++++++------------- - meson.build | 23 ++++--- - qapi/qom.json | 22 +++++-- - 6 files changed, 192 insertions(+), 64 deletions(-) - create mode 100644 event-loop-base.c - create mode 100644 include/sysemu/event-loop-base.h - -diff --git a/event-loop-base.c b/event-loop-base.c -new file mode 100644 -index 0000000000..a924c73a7c ---- /dev/null -+++ b/event-loop-base.c -@@ -0,0 +1,104 @@ -+/* -+ * QEMU event-loop base -+ * -+ * Copyright (C) 2022 Red Hat Inc -+ * -+ * Authors: -+ * Stefan Hajnoczi -+ * Nicolas Saenz Julienne -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include "qom/object_interfaces.h" -+#include "qapi/error.h" -+#include "sysemu/event-loop-base.h" -+ -+typedef struct { -+ const char *name; -+ ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ -+} EventLoopBaseParamInfo; -+ -+static EventLoopBaseParamInfo aio_max_batch_info = { -+ "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), -+}; -+ -+static void event_loop_base_get_param(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj); -+ EventLoopBaseParamInfo *info = opaque; -+ int64_t *field = (void *)event_loop_base + info->offset; -+ -+ visit_type_int64(v, name, field, errp); -+} -+ -+static void event_loop_base_set_param(Object *obj, Visitor *v, -+ const char *name, void *opaque, Error **errp) -+{ -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj); -+ EventLoopBase *base = EVENT_LOOP_BASE(obj); -+ EventLoopBaseParamInfo *info = opaque; -+ int64_t *field = (void *)base + info->offset; -+ int64_t value; -+ -+ if (!visit_type_int64(v, name, &value, errp)) { -+ return; -+ } -+ -+ if (value < 0) { -+ error_setg(errp, "%s value must be in range [0, %" PRId64 "]", -+ info->name, INT64_MAX); -+ return; -+ } -+ -+ *field = value; -+ -+ if (bc->update_params) { -+ bc->update_params(base, errp); -+ } -+ -+ return; -+} -+ -+static void event_loop_base_complete(UserCreatable *uc, Error **errp) -+{ -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); -+ EventLoopBase *base = EVENT_LOOP_BASE(uc); -+ -+ if (bc->init) { -+ bc->init(base, errp); -+ } -+} -+ -+static void event_loop_base_class_init(ObjectClass *klass, void *class_data) -+{ -+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); -+ ucc->complete = event_loop_base_complete; -+ -+ object_class_property_add(klass, "aio-max-batch", "int", -+ event_loop_base_get_param, -+ event_loop_base_set_param, -+ NULL, &aio_max_batch_info); -+} -+ -+static const TypeInfo event_loop_base_info = { -+ .name = TYPE_EVENT_LOOP_BASE, -+ .parent = TYPE_OBJECT, -+ .instance_size = sizeof(EventLoopBase), -+ .class_size = sizeof(EventLoopBaseClass), -+ .class_init = event_loop_base_class_init, -+ .abstract = true, -+ .interfaces = (InterfaceInfo[]) { -+ { TYPE_USER_CREATABLE }, -+ { } -+ } -+}; -+ -+static void register_types(void) -+{ -+ type_register_static(&event_loop_base_info); -+} -+type_init(register_types); -diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h -new file mode 100644 -index 0000000000..8e77d8b69f ---- /dev/null -+++ b/include/sysemu/event-loop-base.h -@@ -0,0 +1,36 @@ -+/* -+ * QEMU event-loop backend -+ * -+ * Copyright (C) 2022 Red Hat Inc -+ * -+ * Authors: -+ * Nicolas Saenz Julienne -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+#ifndef QEMU_EVENT_LOOP_BASE_H -+#define QEMU_EVENT_LOOP_BASE_H -+ -+#include "qom/object.h" -+#include "block/aio.h" -+#include "qemu/typedefs.h" -+ -+#define TYPE_EVENT_LOOP_BASE "event-loop-base" -+OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass, -+ EVENT_LOOP_BASE) -+ -+struct EventLoopBaseClass { -+ ObjectClass parent_class; -+ -+ void (*init)(EventLoopBase *base, Error **errp); -+ void (*update_params)(EventLoopBase *base, Error **errp); -+}; -+ -+struct EventLoopBase { -+ Object parent; -+ -+ /* AioContext AIO engine parameters */ -+ int64_t aio_max_batch; -+}; -+#endif -diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h -index 7f714bd136..8f8601d6ab 100644 ---- a/include/sysemu/iothread.h -+++ b/include/sysemu/iothread.h -@@ -17,11 +17,12 @@ - #include "block/aio.h" - #include "qemu/thread.h" - #include "qom/object.h" -+#include "sysemu/event-loop-base.h" - - #define TYPE_IOTHREAD "iothread" - - struct IOThread { -- Object parent_obj; -+ EventLoopBase parent_obj; - - QemuThread thread; - AioContext *ctx; -@@ -37,9 +38,6 @@ struct IOThread { - int64_t poll_max_ns; - int64_t poll_grow; - int64_t poll_shrink; -- -- /* AioContext AIO engine parameters */ -- int64_t aio_max_batch; - }; - typedef struct IOThread IOThread; - -diff --git a/iothread.c b/iothread.c -index 0f98af0f2a..8fa2f3bfb8 100644 ---- a/iothread.c -+++ b/iothread.c -@@ -17,6 +17,7 @@ - #include "qemu/module.h" - #include "block/aio.h" - #include "block/block.h" -+#include "sysemu/event-loop-base.h" - #include "sysemu/iothread.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-misc.h" -@@ -152,10 +153,15 @@ static void iothread_init_gcontext(IOThread *iothread) - iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); - } - --static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) -+static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) - { -+ IOThread *iothread = IOTHREAD(base); - ERRP_GUARD(); - -+ if (!iothread->ctx) { -+ return; -+ } -+ - aio_context_set_poll_params(iothread->ctx, - iothread->poll_max_ns, - iothread->poll_grow, -@@ -166,14 +172,15 @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) - } - - aio_context_set_aio_params(iothread->ctx, -- iothread->aio_max_batch, -+ iothread->parent_obj.aio_max_batch, - errp); - } - --static void iothread_complete(UserCreatable *obj, Error **errp) -+ -+static void iothread_init(EventLoopBase *base, Error **errp) - { - Error *local_error = NULL; -- IOThread *iothread = IOTHREAD(obj); -+ IOThread *iothread = IOTHREAD(base); - char *thread_name; - - iothread->stopping = false; -@@ -189,7 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) - */ - iothread_init_gcontext(iothread); - -- iothread_set_aio_context_params(iothread, &local_error); -+ iothread_set_aio_context_params(base, &local_error); - if (local_error) { - error_propagate(errp, local_error); - aio_context_unref(iothread->ctx); -@@ -201,7 +208,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) - * to inherit. - */ - thread_name = g_strdup_printf("IO %s", -- object_get_canonical_path_component(OBJECT(obj))); -+ object_get_canonical_path_component(OBJECT(base))); - qemu_thread_create(&iothread->thread, thread_name, iothread_run, - iothread, QEMU_THREAD_JOINABLE); - g_free(thread_name); -@@ -226,9 +233,6 @@ static IOThreadParamInfo poll_grow_info = { - static IOThreadParamInfo poll_shrink_info = { - "poll-shrink", offsetof(IOThread, poll_shrink), - }; --static IOThreadParamInfo aio_max_batch_info = { -- "aio-max-batch", offsetof(IOThread, aio_max_batch), --}; - - static void iothread_get_param(Object *obj, Visitor *v, - const char *name, IOThreadParamInfo *info, Error **errp) -@@ -288,35 +292,12 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, - } - } - --static void iothread_get_aio_param(Object *obj, Visitor *v, -- const char *name, void *opaque, Error **errp) --{ -- IOThreadParamInfo *info = opaque; -- -- iothread_get_param(obj, v, name, info, errp); --} -- --static void iothread_set_aio_param(Object *obj, Visitor *v, -- const char *name, void *opaque, Error **errp) --{ -- IOThread *iothread = IOTHREAD(obj); -- IOThreadParamInfo *info = opaque; -- -- if (!iothread_set_param(obj, v, name, info, errp)) { -- return; -- } -- -- if (iothread->ctx) { -- aio_context_set_aio_params(iothread->ctx, -- iothread->aio_max_batch, -- errp); -- } --} -- - static void iothread_class_init(ObjectClass *klass, void *class_data) - { -- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); -- ucc->complete = iothread_complete; -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass); -+ -+ bc->init = iothread_init; -+ bc->update_params = iothread_set_aio_context_params; - - object_class_property_add(klass, "poll-max-ns", "int", - iothread_get_poll_param, -@@ -330,23 +311,15 @@ static void iothread_class_init(ObjectClass *klass, void *class_data) - iothread_get_poll_param, - iothread_set_poll_param, - NULL, &poll_shrink_info); -- object_class_property_add(klass, "aio-max-batch", "int", -- iothread_get_aio_param, -- iothread_set_aio_param, -- NULL, &aio_max_batch_info); - } - - static const TypeInfo iothread_info = { - .name = TYPE_IOTHREAD, -- .parent = TYPE_OBJECT, -+ .parent = TYPE_EVENT_LOOP_BASE, - .class_init = iothread_class_init, - .instance_size = sizeof(IOThread), - .instance_init = iothread_instance_init, - .instance_finalize = iothread_instance_finalize, -- .interfaces = (InterfaceInfo[]) { -- {TYPE_USER_CREATABLE}, -- {} -- }, - }; - - static void iothread_register_types(void) -@@ -383,7 +356,7 @@ static int query_one_iothread(Object *object, void *opaque) - info->poll_max_ns = iothread->poll_max_ns; - info->poll_grow = iothread->poll_grow; - info->poll_shrink = iothread->poll_shrink; -- info->aio_max_batch = iothread->aio_max_batch; -+ info->aio_max_batch = iothread->parent_obj.aio_max_batch; - - QAPI_LIST_APPEND(*tail, info); - return 0; -diff --git a/meson.build b/meson.build -index 6f7e430f0f..b9c919a55e 100644 ---- a/meson.build -+++ b/meson.build -@@ -2804,6 +2804,7 @@ subdir('qom') - subdir('authz') - subdir('crypto') - subdir('ui') -+subdir('hw') - - - if enable_modules -@@ -2811,6 +2812,18 @@ if enable_modules - modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO') - endif - -+qom_ss = qom_ss.apply(config_host, strict: false) -+libqom = static_library('qom', qom_ss.sources() + genh, -+ dependencies: [qom_ss.dependencies()], -+ name_suffix: 'fa') -+qom = declare_dependency(link_whole: libqom) -+ -+event_loop_base = files('event-loop-base.c') -+event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh, -+ build_by_default: true) -+event_loop_base = declare_dependency(link_whole: event_loop_base, -+ dependencies: [qom]) -+ - stub_ss = stub_ss.apply(config_all, strict: false) - - util_ss.add_all(trace_ss) -@@ -2897,7 +2910,6 @@ subdir('monitor') - subdir('net') - subdir('replay') - subdir('semihosting') --subdir('hw') - subdir('tcg') - subdir('fpu') - subdir('accel') -@@ -3022,13 +3034,6 @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms', - capture: true, - command: [undefsym, nm, '@INPUT@']) - --qom_ss = qom_ss.apply(config_host, strict: false) --libqom = static_library('qom', qom_ss.sources() + genh, -- dependencies: [qom_ss.dependencies()], -- name_suffix: 'fa') -- --qom = declare_dependency(link_whole: libqom) -- - authz_ss = authz_ss.apply(config_host, strict: false) - libauthz = static_library('authz', authz_ss.sources() + genh, - dependencies: [authz_ss.dependencies()], -@@ -3081,7 +3086,7 @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, - build_by_default: false) - - blockdev = declare_dependency(link_whole: [libblockdev], -- dependencies: [block]) -+ dependencies: [block, event_loop_base]) - - qmp_ss = qmp_ss.apply(config_host, strict: false) - libqmp = static_library('qmp', qmp_ss.sources() + genh, -diff --git a/qapi/qom.json b/qapi/qom.json -index eeb5395ff3..a2439533c5 100644 ---- a/qapi/qom.json -+++ b/qapi/qom.json -@@ -499,6 +499,20 @@ - '*repeat': 'bool', - '*grab-toggle': 'GrabToggleKeys' } } - -+## -+# @EventLoopBaseProperties: -+# -+# Common properties for event loops -+# -+# @aio-max-batch: maximum number of requests in a batch for the AIO engine, -+# 0 means that the engine will use its default. -+# (default: 0) -+# -+# Since: 7.1 -+## -+{ 'struct': 'EventLoopBaseProperties', -+ 'data': { '*aio-max-batch': 'int' } } -+ - ## - # @IothreadProperties: - # -@@ -516,17 +530,15 @@ - # algorithm detects it is spending too long polling without - # encountering events. 0 selects a default behaviour (default: 0) - # --# @aio-max-batch: maximum number of requests in a batch for the AIO engine, --# 0 means that the engine will use its default --# (default:0, since 6.1) -+# The @aio-max-batch option is available since 6.1. - # - # Since: 2.0 - ## - { 'struct': 'IothreadProperties', -+ 'base': 'EventLoopBaseProperties', - 'data': { '*poll-max-ns': 'int', - '*poll-grow': 'int', -- '*poll-shrink': 'int', -- '*aio-max-batch': 'int' } } -+ '*poll-shrink': 'int' } } - - ## - # @MemoryBackendProperties: --- -2.31.1 - diff --git a/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch b/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch deleted file mode 100644 index c7b8898..0000000 --- a/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch +++ /dev/null @@ -1,420 +0,0 @@ -From cda3fcf14f2883fea633e25256f6c14a71271adf Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:31 -0300 -Subject: [PATCH 08/18] QIOChannel: Add flags on io_writev and introduce - io_flush callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [2/11] 06acfb6b0cb2c25733c2eb198011f7623b5a7024 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Add flags to io_writev and introduce io_flush as optional callback to -QIOChannelClass, allowing the implementation of zero copy writes by -subclasses. - -How to use them: -- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY), -- Wait write completion with qio_channel_flush(). - -Notes: -As some zero copy write implementations work asynchronously, it's -recommended to keep the write buffer untouched until the return of -qio_channel_flush(), to avoid the risk of sending an updated buffer -instead of the buffer state during write. - -As io_flush callback is optional, if a subclass does not implement it, then: -- io_flush will return 0 without changing anything. - -Also, some functions like qio_channel_writev_full_all() were adapted to -receive a flag parameter. That allows shared code between zero copy and -non-zero copy writev, and also an easier implementation on new flags. - -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Message-Id: <20220513062836.965425-3-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad) -Signed-off-by: Leonardo Bras ---- - chardev/char-io.c | 2 +- - hw/remote/mpqemu-link.c | 2 +- - include/io/channel.h | 38 +++++++++++++++++++++- - io/channel-buffer.c | 1 + - io/channel-command.c | 1 + - io/channel-file.c | 1 + - io/channel-socket.c | 2 ++ - io/channel-tls.c | 1 + - io/channel-websock.c | 1 + - io/channel.c | 49 +++++++++++++++++++++++------ - migration/rdma.c | 1 + - scsi/pr-manager-helper.c | 2 +- - tests/unit/test-io-channel-socket.c | 1 + - 13 files changed, 88 insertions(+), 14 deletions(-) - -diff --git a/chardev/char-io.c b/chardev/char-io.c -index 8ced184160..4451128cba 100644 ---- a/chardev/char-io.c -+++ b/chardev/char-io.c -@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc, - - ret = qio_channel_writev_full( - ioc, &iov, 1, -- fds, nfds, NULL); -+ fds, nfds, 0, NULL); - if (ret == QIO_CHANNEL_ERR_BLOCK) { - if (offset) { - return offset; -diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c -index 7e841820e5..e8f556bd27 100644 ---- a/hw/remote/mpqemu-link.c -+++ b/hw/remote/mpqemu-link.c -@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) - } - - if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), -- fds, nfds, errp)) { -+ fds, nfds, 0, errp)) { - ret = true; - } else { - trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); -diff --git a/include/io/channel.h b/include/io/channel.h -index 88988979f8..c680ee7480 100644 ---- a/include/io/channel.h -+++ b/include/io/channel.h -@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, - - #define QIO_CHANNEL_ERR_BLOCK -2 - -+#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 -+ - typedef enum QIOChannelFeature QIOChannelFeature; - - enum QIOChannelFeature { - QIO_CHANNEL_FEATURE_FD_PASS, - QIO_CHANNEL_FEATURE_SHUTDOWN, - QIO_CHANNEL_FEATURE_LISTEN, -+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, - }; - - -@@ -104,6 +107,7 @@ struct QIOChannelClass { - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp); - ssize_t (*io_readv)(QIOChannel *ioc, - const struct iovec *iov, -@@ -136,6 +140,8 @@ struct QIOChannelClass { - IOHandler *io_read, - IOHandler *io_write, - void *opaque); -+ int (*io_flush)(QIOChannel *ioc, -+ Error **errp); - }; - - /* General I/O handling functions */ -@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - * @niov: the length of the @iov array - * @fds: an array of file handles to send - * @nfds: number of file handles in @fds -+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) - * @errp: pointer to a NULL-initialized error object - * - * Write data to the IO channel, reading it from the -@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp); - - /** -@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc, - * @niov: the length of the @iov array - * @fds: an array of file handles to send - * @nfds: number of file handles in @fds -+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) - * @errp: pointer to a NULL-initialized error object - * - * -@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc, - * to be written, yielding from the current coroutine - * if required. - * -+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags, -+ * instead of waiting for all requested data to be written, -+ * this function will wait until it's all queued for writing. -+ * In this case, if the buffer gets changed between queueing and -+ * sending, the updated buffer will be sent. If this is not a -+ * desired behavior, it's suggested to call qio_channel_flush() -+ * before reusing the buffer. -+ * - * Returns: 0 if all bytes were written, or -1 on error - */ - -@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc, - const struct iovec *iov, - size_t niov, - int *fds, size_t nfds, -- Error **errp); -+ int flags, Error **errp); -+ -+/** -+ * qio_channel_flush: -+ * @ioc: the channel object -+ * @errp: pointer to a NULL-initialized error object -+ * -+ * Will block until every packet queued with -+ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY -+ * is sent, or return in case of any error. -+ * -+ * If not implemented, acts as a no-op, and returns 0. -+ * -+ * Returns -1 if any error is found, -+ * 1 if every send failed to use zero copy. -+ * 0 otherwise. -+ */ -+ -+int qio_channel_flush(QIOChannel *ioc, -+ Error **errp); - - #endif /* QIO_CHANNEL_H */ -diff --git a/io/channel-buffer.c b/io/channel-buffer.c -index baa4e2b089..bf52011be2 100644 ---- a/io/channel-buffer.c -+++ b/io/channel-buffer.c -@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); -diff --git a/io/channel-command.c b/io/channel-command.c -index 338da73ade..54560464ae 100644 ---- a/io/channel-command.c -+++ b/io/channel-command.c -@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); -diff --git a/io/channel-file.c b/io/channel-file.c -index d7cf6d278f..ef6807a6be 100644 ---- a/io/channel-file.c -+++ b/io/channel-file.c -@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); -diff --git a/io/channel-socket.c b/io/channel-socket.c -index 7a8d9f69c9..a1be2197ca 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); -@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); -diff --git a/io/channel-tls.c b/io/channel-tls.c -index 2ae1b92fc0..4ce890a538 100644 ---- a/io/channel-tls.c -+++ b/io/channel-tls.c -@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); -diff --git a/io/channel-websock.c b/io/channel-websock.c -index 55145a6a8c..9619906ac3 100644 ---- a/io/channel-websock.c -+++ b/io/channel-websock.c -@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); -diff --git a/io/channel.c b/io/channel.c -index e8b019dc36..0640941ac5 100644 ---- a/io/channel.c -+++ b/io/channel.c -@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); - -- if ((fds || nfds) && -- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { -+ if (fds || nfds) { -+ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { -+ error_setg_errno(errp, EINVAL, -+ "Channel does not support file descriptor passing"); -+ return -1; -+ } -+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { -+ error_setg_errno(errp, EINVAL, -+ "Zero Copy does not support file descriptor passing"); -+ return -1; -+ } -+ } -+ -+ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) && -+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { - error_setg_errno(errp, EINVAL, -- "Channel does not support file descriptor passing"); -+ "Requested Zero Copy feature is not available"); - return -1; - } - -- return klass->io_writev(ioc, iov, niov, fds, nfds, errp); -+ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp); - } - - -@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc, - size_t niov, - Error **errp) - { -- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp); -+ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp); - } - - int qio_channel_writev_full_all(QIOChannel *ioc, - const struct iovec *iov, - size_t niov, - int *fds, size_t nfds, -- Error **errp) -+ int flags, Error **errp) - { - int ret = -1; - struct iovec *local_iov = g_new(struct iovec, niov); -@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc, - - while (nlocal_iov > 0) { - ssize_t len; -- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds, -- errp); -+ -+ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, -+ nfds, flags, errp); -+ - if (len == QIO_CHANNEL_ERR_BLOCK) { - if (qemu_in_coroutine()) { - qio_channel_yield(ioc, G_IO_OUT); -@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc, - size_t niov, - Error **errp) - { -- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp); -+ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp); - } - - -@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc, - Error **errp) - { - struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; -- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp); -+ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp); - } - - -@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc, - return klass->io_seek(ioc, offset, whence, errp); - } - -+int qio_channel_flush(QIOChannel *ioc, -+ Error **errp) -+{ -+ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); -+ -+ if (!klass->io_flush || -+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { -+ return 0; -+ } -+ -+ return klass->io_flush(ioc, errp); -+} -+ - - static void qio_channel_restart_read(void *opaque) - { -diff --git a/migration/rdma.c b/migration/rdma.c -index ef1e65ec36..672d1958a9 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, - size_t niov, - int *fds, - size_t nfds, -+ int flags, - Error **errp) - { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); -diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c -index 451c7631b7..3be52a98d5 100644 ---- a/scsi/pr-manager-helper.c -+++ b/scsi/pr-manager-helper.c -@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr, - iov.iov_base = (void *)buf; - iov.iov_len = sz; - n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1, -- nfds ? &fd : NULL, nfds, errp); -+ nfds ? &fd : NULL, nfds, 0, errp); - - if (n_written <= 0) { - assert(n_written != QIO_CHANNEL_ERR_BLOCK); -diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c -index c49eec1f03..6713886d02 100644 ---- a/tests/unit/test-io-channel-socket.c -+++ b/tests/unit/test-io-channel-socket.c -@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void) - G_N_ELEMENTS(iosend), - fdsend, - G_N_ELEMENTS(fdsend), -+ 0, - &error_abort); - - qio_channel_readv_full(dst, --- -2.35.3 - diff --git a/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch b/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch deleted file mode 100644 index 0fd4b6c..0000000 --- a/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch +++ /dev/null @@ -1,56 +0,0 @@ -From cb6dc39a5e5d2d981b4b1e983042b3fbb529d5d1 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Thu, 4 Aug 2022 04:10:43 -0300 -Subject: [PATCH 06/11] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [6/6] 2eb1aba8ebf267a6f67cfba2e489dc88619c7fd4 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -For using MSG_ZEROCOPY, there are two steps: -1 - io_writev() the packet, which enqueues the packet for sending, and -2 - io_flush(), which gets confirmation that all packets got correctly sent - -Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will -be reported in (1), but it will fail in the first time (2) happens. - -This happens because (2) currently checks for cmsg_level & cmsg_type -associated with IPV4 only, before reporting any error. - -Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable -support for MSG_ZEROCOPY + IPV6 - -Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") -Signed-off-by: Leonardo Bras -Signed-off-by: Daniel P. Berrangé -(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290) -Signed-off-by: Leonardo Bras ---- - io/channel-socket.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index eb7baa2184..efd5f60808 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc, - } - - cm = CMSG_FIRSTHDR(&msg); -- if (cm->cmsg_level != SOL_IP && -- cm->cmsg_type != IP_RECVERR) { -+ if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR && -+ cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) { - error_setg_errno(errp, EPROTOTYPE, - "Wrong cmsg in errqueue"); - return -1; --- -2.31.1 - diff --git a/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch b/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch deleted file mode 100644 index b382a59..0000000 --- a/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 678981c6bb7c964e1591f6f8aba49e9602f64852 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 11 Jul 2022 18:11:11 -0300 -Subject: [PATCH 01/11] QIOChannelSocket: Fix zero-copy flush returning code 1 - when nothing sent -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [1/6] cebc887cb61de1572d8ae3232cde45e80c339404 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently -returns 1. This return code should be used only when Linux fails to use -MSG_ZEROCOPY on a lot of sendmsg(). - -Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY) -was attempted. - -Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Acked-by: Daniel P. Berrangé -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Message-Id: <20220711211112.18951-2-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223) -Signed-off-by: Leonardo Bras ---- - io/channel-socket.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index 8ae8b212cf..eb7baa2184 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc, - struct cmsghdr *cm; - char control[CMSG_SPACE(sizeof(*serr))]; - int received; -- int ret = 1; -+ int ret; -+ -+ if (sioc->zero_copy_queued == sioc->zero_copy_sent) { -+ return 0; -+ } - - msg.msg_control = control; - msg.msg_controllen = sizeof(control); - memset(control, 0, sizeof(control)); - -+ ret = 1; -+ - while (sioc->zero_copy_sent < sioc->zero_copy_queued) { - received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); - if (received < 0) { --- -2.31.1 - diff --git a/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch b/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch deleted file mode 100644 index 9d134e6..0000000 --- a/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch +++ /dev/null @@ -1,58 +0,0 @@ -From e70f01749addd7d0b7aa7fa4fdedb664f98e6b9b Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 20 Jun 2022 02:39:43 -0300 -Subject: [PATCH 16/18] QIOChannelSocket: Fix zero-copy send so socket flush - works -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [10/11] a2dfac987e24026b1a78e90b86234ca206b6401f (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial -part of the flushing mechanism got missing: incrementing zero_copy_queued. - -Without that, the flushing interface becomes a no-op, and there is no -guarantee the buffer is really sent. - -This can go as bad as causing a corruption in RAM during migration. - -Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") -Reported-by: 徐闯 -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1) -Signed-off-by: Leonardo Bras ---- - io/channel-socket.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index 7490e5943d..8ae8b212cf 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - "Unable to write to socket"); - return -1; - } -+ -+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { -+ sioc->zero_copy_queued++; -+ } -+ - return ret; - } - #else /* WIN32 */ --- -2.35.3 - diff --git a/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch b/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch deleted file mode 100644 index 89aa806..0000000 --- a/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch +++ /dev/null @@ -1,249 +0,0 @@ -From 4aeba0365d30dabe2e70dc172683f0878a4a9621 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:32 -0300 -Subject: [PATCH 09/18] QIOChannelSocket: Implement io_writev zero copy flag & - io_flush for CONFIG_LINUX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [3/11] 9afeac1f5ac7675624660a0281726c09c8321180 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -For CONFIG_LINUX, implement the new zero copy flag and the optional callback -io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY -feature is available in the host kernel, which is checked on -qio_channel_socket_connect_sync() - -qio_channel_socket_flush() was implemented by counting how many times -sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the -socket's error queue, in order to find how many of them finished sending. -Flush will loop until those counters are the same, or until some error occurs. - -Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY: -1: Buffer -- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying, -some caution is necessary to avoid overwriting any buffer before it's sent. -If something like this happen, a newer version of the buffer may be sent instead. -- If this is a problem, it's recommended to call qio_channel_flush() before freeing -or re-using the buffer. - -2: Locked memory -- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and -unlocked after it's sent. -- Depending on the size of each buffer, and how often it's sent, it may require -a larger amount of locked memory than usually available to non-root user. -- If the required amount of locked memory is not available, writev_zero_copy -will return an error, which can abort an operation like migration, -- Because of this, when an user code wants to add zero copy as a feature, it -requires a mechanism to disable it, so it can still be accessible to less -privileged users. - -Signed-off-by: Leonardo Bras -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Juan Quintela -Message-Id: <20220513062836.965425-4-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d) -Signed-off-by: Leonardo Bras ---- - include/io/channel-socket.h | 2 + - io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++-- - 2 files changed, 114 insertions(+), 4 deletions(-) - -diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h -index e747e63514..513c428fe4 100644 ---- a/include/io/channel-socket.h -+++ b/include/io/channel-socket.h -@@ -47,6 +47,8 @@ struct QIOChannelSocket { - socklen_t localAddrLen; - struct sockaddr_storage remoteAddr; - socklen_t remoteAddrLen; -+ ssize_t zero_copy_queued; -+ ssize_t zero_copy_sent; - }; - - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index a1be2197ca..fbd2214d20 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -26,6 +26,14 @@ - #include "io/channel-watch.h" - #include "trace.h" - #include "qapi/clone-visitor.h" -+#ifdef CONFIG_LINUX -+#include -+#include -+ -+#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY)) -+#define QEMU_MSG_ZEROCOPY -+#endif -+#endif - - #define SOCKET_MAX_FDS 16 - -@@ -55,6 +63,8 @@ qio_channel_socket_new(void) - - sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); - sioc->fd = -1; -+ sioc->zero_copy_queued = 0; -+ sioc->zero_copy_sent = 0; - - ioc = QIO_CHANNEL(sioc); - qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); -@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, - return -1; - } - -+#ifdef QEMU_MSG_ZEROCOPY -+ int ret, v = 1; -+ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v)); -+ if (ret == 0) { -+ /* Zero copy available on host */ -+ qio_channel_set_feature(QIO_CHANNEL(ioc), -+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY); -+ } -+#endif -+ - return 0; - } - -@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; - size_t fdsize = sizeof(int) * nfds; - struct cmsghdr *cmsg; -+ int sflags = 0; - - memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); - -@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - memcpy(CMSG_DATA(cmsg), fds, fdsize); - } - -+#ifdef QEMU_MSG_ZEROCOPY -+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { -+ sflags = MSG_ZEROCOPY; -+ } -+#endif -+ - retry: -- ret = sendmsg(sioc->fd, &msg, 0); -+ ret = sendmsg(sioc->fd, &msg, sflags); - if (ret <= 0) { -- if (errno == EAGAIN) { -+ switch (errno) { -+ case EAGAIN: - return QIO_CHANNEL_ERR_BLOCK; -- } -- if (errno == EINTR) { -+ case EINTR: - goto retry; -+#ifdef QEMU_MSG_ZEROCOPY -+ case ENOBUFS: -+ if (sflags & MSG_ZEROCOPY) { -+ error_setg_errno(errp, errno, -+ "Process can't lock enough memory for using MSG_ZEROCOPY"); -+ return -1; -+ } -+ break; -+#endif - } -+ - error_setg_errno(errp, errno, - "Unable to write to socket"); - return -1; -@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - } - #endif /* WIN32 */ - -+ -+#ifdef QEMU_MSG_ZEROCOPY -+static int qio_channel_socket_flush(QIOChannel *ioc, -+ Error **errp) -+{ -+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); -+ struct msghdr msg = {}; -+ struct sock_extended_err *serr; -+ struct cmsghdr *cm; -+ char control[CMSG_SPACE(sizeof(*serr))]; -+ int received; -+ int ret = 1; -+ -+ msg.msg_control = control; -+ msg.msg_controllen = sizeof(control); -+ memset(control, 0, sizeof(control)); -+ -+ while (sioc->zero_copy_sent < sioc->zero_copy_queued) { -+ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); -+ if (received < 0) { -+ switch (errno) { -+ case EAGAIN: -+ /* Nothing on errqueue, wait until something is available */ -+ qio_channel_wait(ioc, G_IO_ERR); -+ continue; -+ case EINTR: -+ continue; -+ default: -+ error_setg_errno(errp, errno, -+ "Unable to read errqueue"); -+ return -1; -+ } -+ } -+ -+ cm = CMSG_FIRSTHDR(&msg); -+ if (cm->cmsg_level != SOL_IP && -+ cm->cmsg_type != IP_RECVERR) { -+ error_setg_errno(errp, EPROTOTYPE, -+ "Wrong cmsg in errqueue"); -+ return -1; -+ } -+ -+ serr = (void *) CMSG_DATA(cm); -+ if (serr->ee_errno != SO_EE_ORIGIN_NONE) { -+ error_setg_errno(errp, serr->ee_errno, -+ "Error on socket"); -+ return -1; -+ } -+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { -+ error_setg_errno(errp, serr->ee_origin, -+ "Error not from zero copy"); -+ return -1; -+ } -+ -+ /* No errors, count successfully finished sendmsg()*/ -+ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1; -+ -+ /* If any sendmsg() succeeded using zero copy, return 0 at the end */ -+ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) { -+ ret = 0; -+ } -+ } -+ -+ return ret; -+} -+ -+#endif /* QEMU_MSG_ZEROCOPY */ -+ - static int - qio_channel_socket_set_blocking(QIOChannel *ioc, - bool enabled, -@@ -790,6 +895,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, - ioc_klass->io_set_delay = qio_channel_socket_set_delay; - ioc_klass->io_create_watch = qio_channel_socket_create_watch; - ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; -+#ifdef QEMU_MSG_ZEROCOPY -+ ioc_klass->io_flush = qio_channel_socket_flush; -+#endif - } - - static const TypeInfo qio_channel_socket_info = { --- -2.35.3 - diff --git a/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch b/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch deleted file mode 100644 index 6fc0c76..0000000 --- a/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 60bf942a58db12c821f2a6a49e2e0b04b99bec30 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 20 Jun 2022 02:39:42 -0300 -Subject: [PATCH 15/18] QIOChannelSocket: Introduce assert and reduce ifdefs to - improve readability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [9/11] eaa02d68301852ccc98bdacc7387d8d03be1cb05 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were -introduced, particularly at qio_channel_socket_writev(). - -Rewrite some of those changes so it's easier to read. - -Also, introduce an assert to help detect incorrect zero-copy usage is when -it's disabled on build. - -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert - dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached -(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f) -Signed-off-by: Leonardo Bras ---- - io/channel-socket.c | 14 +++++++++----- - 1 file changed, 9 insertions(+), 5 deletions(-) - -diff --git a/io/channel-socket.c b/io/channel-socket.c -index fbd2214d20..7490e5943d 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - memcpy(CMSG_DATA(cmsg), fds, fdsize); - } - --#ifdef QEMU_MSG_ZEROCOPY - if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { -+#ifdef QEMU_MSG_ZEROCOPY - sflags = MSG_ZEROCOPY; -- } -+#else -+ /* -+ * We expect QIOChannel class entry point to have -+ * blocked this code path already -+ */ -+ g_assert_not_reached(); - #endif -+ } - - retry: - ret = sendmsg(sioc->fd, &msg, sflags); -@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, - return QIO_CHANNEL_ERR_BLOCK; - case EINTR: - goto retry; --#ifdef QEMU_MSG_ZEROCOPY - case ENOBUFS: -- if (sflags & MSG_ZEROCOPY) { -+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { - error_setg_errno(errp, errno, - "Process can't lock enough memory for using MSG_ZEROCOPY"); - return -1; - } - break; --#endif - } - - error_setg_errno(errp, errno, --- -2.35.3 - diff --git a/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch b/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch deleted file mode 100644 index 8a0aeb0..0000000 --- a/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch +++ /dev/null @@ -1,237 +0,0 @@ -From 055edf068196622a3e1868c9e4c991d410272a6d Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Wed, 15 Jun 2022 15:28:27 +0200 -Subject: [PATCH 03/18] RHEL-only: AArch64: Drop unsupported CPU types -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [3/6] 21f54c86dc87e5e75a64459b5a385686bc09640c (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 -Upstream Status: RHEL only - -We only need to support AArch64 cpu types and we only need three -types: - 1) A base type to use with TCG, i.e. a cpu type with only base - features. 'cortex-a57' serves this role and is currently used - by libguestfs. - 2) The 'max' type, which is for both KVM and TCG and is good for - tests that just specify 'max' but run under both. 'max' with - TCG also provides the VM with all the CPU features TCG - supports, which is good for VMs that need features not - provided by the basic cortex-a57. - 3) The host type which is used with KVM. - -Signed-off-by: Andrew Jones ---- - hw/arm/virt.c | 4 ++++ - target/arm/cpu64.c | 6 ++++++ - target/arm/cpu_tcg.c | 12 ++---------- - tests/qtest/arm-cpu-features.c | 6 ++++++ - 4 files changed, 18 insertions(+), 10 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 95d012d6eb..74119976d3 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -239,12 +239,16 @@ static const int a15irqmap[] = { - }; - - static const char *valid_cpus[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a7"), - ARM_CPU_TYPE_NAME("cortex-a15"), - ARM_CPU_TYPE_NAME("cortex-a53"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("cortex-a57"), -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a72"), - ARM_CPU_TYPE_NAME("a64fx"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("host"), - ARM_CPU_TYPE_NAME("max"), - }; -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index eb44c05822..e80b831073 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -146,6 +146,7 @@ static void aarch64_a57_initfn(Object *obj) - define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a53_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -249,6 +250,7 @@ static void aarch64_a72_initfn(Object *obj) - cpu->gic_vprebits = 5; - define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); - } -+#endif /* disabled for RHEL */ - - void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) - { -@@ -923,6 +925,7 @@ static void aarch64_max_initfn(Object *obj) - qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a64fx_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -969,12 +972,15 @@ static void aarch64_a64fx_initfn(Object *obj) - - /* TODO: Add A64FX specific HPC extension registers */ - } -+#endif /* disabled for RHEL */ - - static const ARMCPUInfo aarch64_cpus[] = { - { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, - { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, - { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, -+#endif /* disabled for RHEL */ - { .name = "max", .initfn = aarch64_max_initfn }, - #if defined(CONFIG_KVM) || defined(CONFIG_HVF) - { .name = "host", .initfn = aarch64_host_initfn }, -diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 3826fa5122..74727fc92c 100644 ---- a/target/arm/cpu_tcg.c -+++ b/target/arm/cpu_tcg.c -@@ -19,10 +19,10 @@ - #include "hw/boards.h" - #endif - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* CPU models. These are not needed for the AArch64 linux-user build. */ - #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) - { -@@ -376,7 +376,6 @@ static void cortex_a9_initfn(Object *obj) - cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ - define_arm_cp_regs(cpu, cortexa9_cp_reginfo); - } --#endif /* disabled for RHEL */ - - #ifndef CONFIG_USER_ONLY - static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -402,7 +401,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - REGINFO_SENTINEL - }; - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_a7_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -448,7 +446,6 @@ static void cortex_a7_initfn(Object *obj) - cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ - } --#endif /* disabled for RHEL */ - - static void cortex_a15_initfn(Object *obj) - { -@@ -492,7 +489,6 @@ static void cortex_a15_initfn(Object *obj) - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); - } - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_m0_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -933,7 +929,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) - - cc->gdb_core_xml_file = "arm-m-profile.xml"; - } --#endif /* disabled for RHEL */ - - #ifndef TARGET_AARCH64 - /* -@@ -1013,7 +1008,6 @@ static void arm_max_initfn(Object *obj) - #endif /* !TARGET_AARCH64 */ - - static const ARMCPUInfo arm_tcg_cpus[] = { --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "arm926", .initfn = arm926_initfn }, - { .name = "arm946", .initfn = arm946_initfn }, - { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1029,9 +1023,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "cortex-a7", .initfn = cortex_a7_initfn }, - { .name = "cortex-a8", .initfn = cortex_a8_initfn }, - { .name = "cortex-a9", .initfn = cortex_a9_initfn }, --#endif /* disabled for RHEL */ - { .name = "cortex-a15", .initfn = cortex_a15_initfn }, --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1062,7 +1054,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, --#endif /* disabled for RHEL */ - #ifndef TARGET_AARCH64 - { .name = "max", .initfn = arm_max_initfn }, - #endif -@@ -1090,3 +1081,4 @@ static void arm_tcg_cpu_register_types(void) - type_init(arm_tcg_cpu_register_types) - - #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ -+#endif /* disabled for RHEL */ -diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index f76652143a..fe2a0a070d 100644 ---- a/tests/qtest/arm-cpu-features.c -+++ b/tests/qtest/arm-cpu-features.c -@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) - assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); - - /* Test expected feature presence/absence for some cpu types */ -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); -+#endif /* disabled for RHEL */ - - /* Enabling and disabling pmu should always work. */ - assert_has_feature_enabled(qts, "max", "pmu"); -@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) - assert_has_feature_enabled(qts, "cortex-a57", "pmu"); - assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "a64fx", "pmu"); - assert_has_feature_enabled(qts, "a64fx", "aarch64"); - /* -@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) - "{ 'sve384': true }"); - assert_error(qts, "a64fx", "cannot enable sve640", - "{ 'sve640': true }"); -+#endif /* disabled for RHEL */ - - sve_tests_default(qts, "max"); - pauth_tests_default(qts, "max"); -@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - QDict *resp; - char *error; - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_error(qts, "cortex-a15", - "We cannot guarantee the CPU type 'cortex-a15' works " - "with KVM on this host", NULL); -+#endif /* disabled for RHEL */ - - assert_has_feature_enabled(qts, "host", "aarch64"); - --- -2.35.3 - diff --git a/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch b/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch deleted file mode 100644 index a1cc4c7..0000000 --- a/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch +++ /dev/null @@ -1,95 +0,0 @@ -From d710394f68eb0b6116dd8ac76f619c192e0d5972 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Wed, 15 Jun 2022 15:28:27 +0200 -Subject: [PATCH 02/18] RHEL-only: tests/avocado: Switch aarch64 tests from a53 - to a57 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [2/6] e85ef69b42c411a6997e4da10ba05176368769b3 (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 -Upstream Status: RHEL only - -We plan to remove the cortex-a53 from the supported cpu types. Switch -all avocado tests that use it to the cortex-a57, which will work the -same and we intend to keep. We don't want to try and upstream this -change since the better upstream change would be to switch from the -a53 to 'max', but the upstream tests also need to use later guest -kernels to use 'max' (see qemu upstream commit 0942820408dc -("hw/arm/virt: Disable LPA2 for -machine virt-6.2") - -Signed-off-by: Andrew Jones ---- - tests/avocado/replay_kernel.py | 2 +- - tests/avocado/reverse_debugging.py | 2 +- - tests/avocado/tcg_plugins.py | 6 +++--- - 3 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index 0b2b0dc692..3a7b5f0748 100644 ---- a/tests/avocado/replay_kernel.py -+++ b/tests/avocado/replay_kernel.py -@@ -147,7 +147,7 @@ def test_aarch64_virt(self): - """ - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' - '/linux/releases/29/Everything/aarch64/os/images/pxeboot' -diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index d2921e70c3..66d185ed42 100644 ---- a/tests/avocado/reverse_debugging.py -+++ b/tests/avocado/reverse_debugging.py -@@ -198,7 +198,7 @@ def test_aarch64_virt(self): - """ - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' - '/linux/releases/29/Everything/aarch64/os/images/pxeboot' -diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py -index 642d2e49e3..93b3afd823 100644 ---- a/tests/avocado/tcg_plugins.py -+++ b/tests/avocado/tcg_plugins.py -@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): - :avocado: tags=accel:tcg - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_path = self._grab_aarch64_kernel() - kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): - :avocado: tags=accel:tcg - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_path = self._grab_aarch64_kernel() - kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): - :avocado: tags=accel:tcg - :avocado: tags=arch:aarch64 - :avocado: tags=machine:virt -- :avocado: tags=cpu:cortex-a53 -+ :avocado: tags=cpu:cortex-a57 - """ - kernel_path = self._grab_aarch64_kernel() - kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + --- -2.35.3 - diff --git a/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch b/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch deleted file mode 100644 index 7740d0b..0000000 --- a/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 5ab8613582fd56b847fe75750acb5b7255900b35 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Thu, 9 Jun 2022 11:55:15 +0200 -Subject: [PATCH 15/16] Revert "globally limit the maximum number of CPUs" - -RH-Author: Vitaly Kuznetsov -RH-MergeRequest: 99: Revert "globally limit the maximum number of CPUs" -RH-Commit: [1/1] 13100d4a2209b2190a3654c1f9cf4ebade1e8d24 (vkuznets/qemu-kvm-c9s) -RH-Bugzilla: 2094270 -RH-Acked-by: Andrew Jones -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094270 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871149 -Upstream Status: RHEL-only -Tested: with upstream kernel - -Downstream QEMU carries a patch that sets the hard limit of possible vCPUs -to the value that the KVM code of the kernel recommends as soft limit. -Upstream KVM code has been changed recently to not use an arbitrary soft -limit anymore, but to cap the value on the amount of available physical -CPUs of the host. This defeats the purpose of the downstream change in -QEMU completely. Drop the downstream-only patch to allow CPU overcommit. - -This reverts commit 6669f6fa677d43144f39d6ad59725b7ba622f1c2. - -Signed-off-by: Vitaly Kuznetsov ---- - accel/kvm/kvm-all.c | 12 ------------ - 1 file changed, 12 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index fdf0e4d429..5f1377ca04 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2430,18 +2430,6 @@ static int kvm_init(MachineState *ms) - soft_vcpus_limit = kvm_recommended_vcpus(s); - hard_vcpus_limit = kvm_max_vcpus(s); - --#ifdef HOST_PPC64 -- /* -- * On POWER, the kernel advertises a soft limit based on the -- * number of CPU threads on the host. We want to allow exceeding -- * this for testing purposes, so we don't want to set hard limit -- * to soft limit as on x86. -- */ --#else -- /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ -- hard_vcpus_limit = soft_vcpus_limit; --#endif -- - while (nc->name) { - if (nc->num > soft_vcpus_limit) { - warn_report("Number of %s cpus requested (%d) exceeds " --- -2.31.1 - diff --git a/kvm-Revert-migration-Simplify-unqueue_page.patch b/kvm-Revert-migration-Simplify-unqueue_page.patch deleted file mode 100644 index f5c97f6..0000000 --- a/kvm-Revert-migration-Simplify-unqueue_page.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 5ea59b17866add54e5ae8c76d3cb472c67e1fa91 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 2 Aug 2022 08:19:49 +0200 -Subject: [PATCH 32/32] Revert "migration: Simplify unqueue_page()" - -RH-Author: Thomas Huth -RH-MergeRequest: 112: Fix postcopy migration on s390x -RH-Commit: [2/2] 3913c9ed3f27f4b66245913da29d0c46db0c6567 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2099934 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Peter Xu - -This reverts commit cfd66f30fb0f735df06ff4220e5000290a43dad3. - -The simplification of unqueue_page() introduced a bug that sometimes -breaks migration on s390x hosts. - -The problem is not fully understood yet, but since we are already in -the freeze for QEMU 7.1 and we need something working there, let's -revert this patch for the upcoming release. The optimization can be -redone later again in a proper way if necessary. - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 -Signed-off-by: Thomas Huth -Message-Id: <20220802061949.331576-1-thuth@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 777f53c75983dd10756f5dbfc8af50fe11da81c1) -Conflicts: - migration/trace-events - (trivial contextual conflict) -Signed-off-by: Thomas Huth ---- - migration/ram.c | 37 ++++++++++++++++++++++++++----------- - migration/trace-events | 3 ++- - 2 files changed, 28 insertions(+), 12 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index fb6db54642..ee40e4a718 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1548,7 +1548,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) - { - struct RAMSrcPageRequest *entry; - RAMBlock *block = NULL; -- size_t page_size; - - if (!postcopy_has_request(rs)) { - return NULL; -@@ -1565,13 +1564,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) - entry = QSIMPLEQ_FIRST(&rs->src_page_requests); - block = entry->rb; - *offset = entry->offset; -- page_size = qemu_ram_pagesize(block); -- /* Each page request should only be multiple page size of the ramblock */ -- assert((entry->len % page_size) == 0); - -- if (entry->len > page_size) { -- entry->len -= page_size; -- entry->offset += page_size; -+ if (entry->len > TARGET_PAGE_SIZE) { -+ entry->len -= TARGET_PAGE_SIZE; -+ entry->offset += TARGET_PAGE_SIZE; - } else { - memory_region_unref(block->mr); - QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); -@@ -1579,9 +1575,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) - migration_consume_urgent_request(); - } - -- trace_unqueue_page(block->idstr, *offset, -- test_bit((*offset >> TARGET_PAGE_BITS), block->bmap)); -- - return block; - } - -@@ -1956,8 +1949,30 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) - { - RAMBlock *block; - ram_addr_t offset; -+ bool dirty; -+ -+ do { -+ block = unqueue_page(rs, &offset); -+ /* -+ * We're sending this page, and since it's postcopy nothing else -+ * will dirty it, and we must make sure it doesn't get sent again -+ * even if this queue request was received after the background -+ * search already sent it. -+ */ -+ if (block) { -+ unsigned long page; -+ -+ page = offset >> TARGET_PAGE_BITS; -+ dirty = test_bit(page, block->bmap); -+ if (!dirty) { -+ trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, -+ page); -+ } else { -+ trace_get_queued_page(block->idstr, (uint64_t)offset, page); -+ } -+ } - -- block = unqueue_page(rs, &offset); -+ } while (block && !dirty); - - if (!block) { - /* -diff --git a/migration/trace-events b/migration/trace-events -index 1aec580e92..09d61ed1f4 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -85,6 +85,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" - qemu_file_fclose(void) "" - - # ram.c -+get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" -+get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" - migration_bitmap_sync_start(void) "" - migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 - migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" -@@ -110,7 +112,6 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI - ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64 - ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" - ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" --unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d" - - # multifd.c - multifd_new_send_channel_async(uint8_t id) "channel %u" --- -2.31.1 - diff --git a/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch b/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch deleted file mode 100644 index e8eb35d..0000000 --- a/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 733acef2caea0758edd74fb634b095ce09bf5914 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 9 May 2022 03:46:23 -0400 -Subject: [PATCH 15/16] Revert "virtio-scsi: Reject scsi-cd if data plane - enabled [RHEL only]" - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 91: Revert "virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]" -RH-Commit: [1/1] 1af55d792bc9166e5c86272afe8093c76ab41bb4 (eesposit/qemu-kvm) -RH-Bugzilla: 1995710 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi - -This reverts commit 4e17b1126e. - -Over time AioContext usage and coverage has increased, and now block -backend is capable of handling AioContext change upon eject and insert. -Therefore the above downstream-only commit is not necessary anymore, -and can be safely reverted. - -X-downstream-only: true - -Signed-off-by: Emanuele Giuseppe Esposito ---- - hw/scsi/virtio-scsi.c | 9 --------- - 1 file changed, 9 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 2450c9438c..db54d104be 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -937,15 +937,6 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, - AioContext *old_context; - int ret; - -- /* XXX: Remove this check once block backend is capable of handling -- * AioContext change upon eject/insert. -- * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if -- * data plane is not used, both cases are safe for scsi-cd. */ -- if (s->ctx && s->ctx != qemu_get_aio_context() && -- object_dynamic_cast(OBJECT(dev), "scsi-cd")) { -- error_setg(errp, "scsi-cd is not supported by data plane"); -- return; -- } - if (s->ctx && !s->dataplane_fenced) { - if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { - return; --- -2.31.1 - diff --git a/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch b/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch deleted file mode 100644 index a948e57..0000000 --- a/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 3a0e9bb88e82cc76ca5efc0595ce94b5dc34749e Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Mon, 25 Apr 2022 13:42:46 +0800 -Subject: [PATCH 1/2] configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM - -RH-Author: Gavin Shan -RH-MergeRequest: 80: Enable virtio-mem for aarch64 -RH-Commit: [1/1] 1afbd08da6d7c860da8d617a0a932d3660514878 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2044162 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Acked-by: David Hildenbrand - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044162 - -This enables virtio-mem device on aarch64 since all needed commits -are ready. - - b1b87327a9 hw/arm/virt: Support for virtio-mem-pci - 1263615efe virtio-mem: Correct default THP size for ARM64 - -Signed-off-by: Gavin Shan ---- - configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -index 5f6ee1de5b..187938573f 100644 ---- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak -@@ -22,6 +22,7 @@ CONFIG_VFIO=y - CONFIG_VFIO_PCI=y - CONFIG_VIRTIO_MMIO=y - CONFIG_VIRTIO_PCI=y -+CONFIG_VIRTIO_MEM=y - CONFIG_XIO3130=y - CONFIG_NVDIMM=y - CONFIG_ACPI_APEI=y --- -2.35.1 - diff --git a/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch b/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch deleted file mode 100644 index c1f3683..0000000 --- a/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch +++ /dev/null @@ -1,101 +0,0 @@ -From e3cb8849862a9f0dd20f2913d540336a037d43c7 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 10 May 2022 17:10:19 +0200 -Subject: [PATCH 07/16] coroutine: Rename qemu_coroutine_inc/dec_pool_size() - -RH-Author: Kevin Wolf -RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size -RH-Commit: [1/2] 6389b11f70225f221784c270d9b90c1ea43ca8fb (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 2079938 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella - -It's true that these functions currently affect the batch size in which -coroutines are reused (i.e. moved from the global release pool to the -allocation pool of a specific thread), but this is a bug and will be -fixed in a separate patch. - -In fact, the comment in the header file already just promises that it -influences the pool size, so reflect this in the name of the functions. -As a nice side effect, the shorter function name makes some line -wrapping unnecessary. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20220510151020.105528-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 98e3ab35054b946f7c2aba5408822532b0920b53) -Signed-off-by: Kevin Wolf ---- - hw/block/virtio-blk.c | 6 ++---- - include/qemu/coroutine.h | 6 +++--- - util/qemu-coroutine.c | 4 ++-- - 3 files changed, 7 insertions(+), 9 deletions(-) - -diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c -index 540c38f829..6a1cc41877 100644 ---- a/hw/block/virtio-blk.c -+++ b/hw/block/virtio-blk.c -@@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) - for (i = 0; i < conf->num_queues; i++) { - virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); - } -- qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size -- / 2); -+ qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); - virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); - if (err != NULL) { - error_propagate(errp, err); -@@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) - for (i = 0; i < conf->num_queues; i++) { - virtio_del_queue(vdev, i); - } -- qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size -- / 2); -+ qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); - qemu_del_vm_change_state_handler(s->change); - blockdev_mark_auto_del(s->blk); - virtio_cleanup(vdev); -diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h -index c828a95ee0..5b621d1295 100644 ---- a/include/qemu/coroutine.h -+++ b/include/qemu/coroutine.h -@@ -334,12 +334,12 @@ void coroutine_fn yield_until_fd_readable(int fd); - /** - * Increase coroutine pool size - */ --void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size); -+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size); - - /** -- * Devcrease coroutine pool size -+ * Decrease coroutine pool size - */ --void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size); -+void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size); - - #include "qemu/lockable.h" - -diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c -index c03b2422ff..faca0ca97c 100644 ---- a/util/qemu-coroutine.c -+++ b/util/qemu-coroutine.c -@@ -205,12 +205,12 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) - return co->ctx; - } - --void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size) -+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) - { - qatomic_add(&pool_batch_size, additional_pool_size); - } - --void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size) -+void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) - { - qatomic_sub(&pool_batch_size, removing_pool_size); - } --- -2.31.1 - diff --git a/kvm-coroutine-Revert-to-constant-batch-size.patch b/kvm-coroutine-Revert-to-constant-batch-size.patch deleted file mode 100644 index 2973510..0000000 --- a/kvm-coroutine-Revert-to-constant-batch-size.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 10 May 2022 17:10:20 +0200 -Subject: [PATCH 08/16] coroutine: Revert to constant batch size - -RH-Author: Kevin Wolf -RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size -RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm) -RH-Bugzilla: 2079938 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella - -Commit 4c41c69e changed the way the coroutine pool is sized because for -virtio-blk devices with a large queue size and heavy I/O, it was just -too small and caused coroutines to be deleted and reallocated soon -afterwards. The change made the size dynamic based on the number of -queues and the queue size of virtio-blk devices. - -There are two important numbers here: Slightly simplified, when a -coroutine terminates, it is generally stored in the global release pool -up to a certain pool size, and if the pool is full, it is freed. -Conversely, when allocating a new coroutine, the coroutines in the -release pool are reused if the pool already has reached a certain -minimum size (the batch size), otherwise we allocate new coroutines. - -The problem after commit 4c41c69e is that it not only increases the -maximum pool size (which is the intended effect), but also the batch -size for reusing coroutines (which is a bug). It means that in cases -with many devices and/or a large queue size (which defaults to the -number of vcpus for virtio-blk-pci), many thousand coroutines could be -sitting in the release pool without being reused. - -This is not only a waste of memory and allocations, but it actually -makes the QEMU process likely to hit the vm.max_map_count limit on Linux -because each coroutine requires two mappings (its stack and the guard -page for the stack), causing it to abort() in qemu_alloc_stack() because -when the limit is hit, mprotect() starts to fail with ENOMEM. - -In order to fix the problem, change the batch size back to 64 to avoid -uselessly accumulating coroutines in the release pool, but keep the -dynamic maximum pool size so that coroutines aren't freed too early -in heavy I/O scenarios. - -Note that this fix doesn't strictly make it impossible to hit the limit, -but this would only happen if most of the coroutines are actually in use -at the same time, not just sitting in a pool. This is the same behaviour -as we already had before commit 4c41c69e. Fully preventing this would -require allowing qemu_coroutine_create() to return an error, but it -doesn't seem to be a scenario that people hit in practice. - -Cc: qemu-stable@nongnu.org -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938 -Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b -Signed-off-by: Kevin Wolf -Message-Id: <20220510151020.105528-3-kwolf@redhat.com> -Tested-by: Hiroki Narukawa -Signed-off-by: Kevin Wolf -(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52) - -Conflicts: - util/qemu-coroutine.c - -Trivial merge conflict because we don't have commit ac387a08 downstream. - -Signed-off-by: Kevin Wolf ---- - util/qemu-coroutine.c | 22 ++++++++++++++-------- - 1 file changed, 14 insertions(+), 8 deletions(-) - -diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c -index faca0ca97c..804f672e0a 100644 ---- a/util/qemu-coroutine.c -+++ b/util/qemu-coroutine.c -@@ -20,14 +20,20 @@ - #include "qemu/coroutine_int.h" - #include "block/aio.h" - --/** Initial batch size is 64, and is increased on demand */ -+/** -+ * The minimal batch size is always 64, coroutines from the release_pool are -+ * reused as soon as there are 64 coroutines in it. The maximum pool size starts -+ * with 64 and is increased on demand so that coroutines are not deleted even if -+ * they are not immediately reused. -+ */ - enum { -- POOL_INITIAL_BATCH_SIZE = 64, -+ POOL_MIN_BATCH_SIZE = 64, -+ POOL_INITIAL_MAX_SIZE = 64, - }; - - /** Free list to speed up creation */ - static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); --static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE; -+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; - static unsigned int release_pool_size; - static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); - static __thread unsigned int alloc_pool_size; -@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) - if (CONFIG_COROUTINE_POOL) { - co = QSLIST_FIRST(&alloc_pool); - if (!co) { -- if (release_pool_size > qatomic_read(&pool_batch_size)) { -+ if (release_pool_size > POOL_MIN_BATCH_SIZE) { - /* Slow path; a good place to register the destructor, too. */ - if (!coroutine_pool_cleanup_notifier.notify) { - coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; -@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co) - co->caller = NULL; - - if (CONFIG_COROUTINE_POOL) { -- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) { -+ if (release_pool_size < qatomic_read(&pool_max_size) * 2) { - QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); - qatomic_inc(&release_pool_size); - return; - } -- if (alloc_pool_size < qatomic_read(&pool_batch_size)) { -+ if (alloc_pool_size < qatomic_read(&pool_max_size)) { - QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); - alloc_pool_size++; - return; -@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) - - void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) - { -- qatomic_add(&pool_batch_size, additional_pool_size); -+ qatomic_add(&pool_max_size, additional_pool_size); - } - - void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) - { -- qatomic_sub(&pool_batch_size, removing_pool_size); -+ qatomic_sub(&pool_max_size, removing_pool_size); - } --- -2.31.1 - diff --git a/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch deleted file mode 100644 index 963cf04..0000000 --- a/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch +++ /dev/null @@ -1,132 +0,0 @@ -From ffbd90e5f4eba620c7cd631b04f0ed31beb22ffa Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 12:07:56 +0100 -Subject: [PATCH 1/6] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables -RH-Commit: [1/3] a9782fe8e919c4bd317b7e8744c7ff57d898add3 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 1952483 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf - -Thread-Local Storage variables cannot be used directly from coroutine -code because the compiler may optimize TLS variable accesses across -qemu_coroutine_yield() calls. When the coroutine is re-entered from -another thread the TLS variables from the old thread must no longer be -used. - -Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220307153853.602859-2-stefanha@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407) -Signed-off-by: Stefan Hajnoczi ---- - util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++-------------- - 1 file changed, 24 insertions(+), 14 deletions(-) - -diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c -index 904b375192..127d5a13c8 100644 ---- a/util/coroutine-ucontext.c -+++ b/util/coroutine-ucontext.c -@@ -25,6 +25,7 @@ - #include "qemu/osdep.h" - #include - #include "qemu/coroutine_int.h" -+#include "qemu/coroutine-tls.h" - - #ifdef CONFIG_VALGRIND_H - #include -@@ -66,8 +67,8 @@ typedef struct { - /** - * Per-thread coroutine bookkeeping - */ --static __thread CoroutineUContext leader; --static __thread Coroutine *current; -+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); -+QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader); - - /* - * va_args to makecontext() must be type 'int', so passing -@@ -97,14 +98,15 @@ static inline __attribute__((always_inline)) - void finish_switch_fiber(void *fake_stack_save) - { - #ifdef CONFIG_ASAN -+ CoroutineUContext *leaderp = get_ptr_leader(); - const void *bottom_old; - size_t size_old; - - __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); - -- if (!leader.stack) { -- leader.stack = (void *)bottom_old; -- leader.stack_size = size_old; -+ if (!leaderp->stack) { -+ leaderp->stack = (void *)bottom_old; -+ leaderp->stack_size = size_old; - } - #endif - #ifdef CONFIG_TSAN -@@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1) - - /* Initialize longjmp environment and switch back the caller */ - if (!sigsetjmp(self->env, 0)) { -- start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack, -- leader.stack_size); -+ CoroutineUContext *leaderp = get_ptr_leader(); -+ -+ start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, -+ leaderp->stack, leaderp->stack_size); - start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */ - siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); - } -@@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, - int ret; - void *fake_stack_save = NULL; - -- current = to_; -+ set_current(to_); - - ret = sigsetjmp(from->env, 0); - if (ret == 0) { -@@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, - - Coroutine *qemu_coroutine_self(void) - { -- if (!current) { -- current = &leader.base; -+ Coroutine *self = get_current(); -+ CoroutineUContext *leaderp = get_ptr_leader(); -+ -+ if (!self) { -+ self = &leaderp->base; -+ set_current(self); - } - #ifdef CONFIG_TSAN -- if (!leader.tsan_co_fiber) { -- leader.tsan_co_fiber = __tsan_get_current_fiber(); -+ if (!leaderp->tsan_co_fiber) { -+ leaderp->tsan_co_fiber = __tsan_get_current_fiber(); - } - #endif -- return current; -+ return self; - } - - bool qemu_in_coroutine(void) - { -- return current && current->caller; -+ Coroutine *self = get_current(); -+ -+ return self && self->caller; - } --- -2.31.1 - diff --git a/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch deleted file mode 100644 index 9d0f811..0000000 --- a/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 9c2e55d25fec6ffb21e344513b7dbeed7e21f641 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 12:08:04 +0100 -Subject: [PATCH 2/6] coroutine: use QEMU_DEFINE_STATIC_CO_TLS() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables -RH-Commit: [2/3] 68a8847e406e2eace6ddc31b0c5676a60600d606 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 1952483 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf - -Thread-Local Storage variables cannot be used directly from coroutine -code because the compiler may optimize TLS variable accesses across -qemu_coroutine_yield() calls. When the coroutine is re-entered from -another thread the TLS variables from the old thread must no longer be -used. - -Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. -The alloc_pool QSLIST needs a typedef so the return value of -get_ptr_alloc_pool() can be stored in a local variable. - -One example of why this code is necessary: a coroutine that yields -before calling qemu_coroutine_create() to create another coroutine is -affected by the TLS issue. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220307153853.602859-3-stefanha@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90) - -Conflicts: -- Context conflicts due to commit 5411171c3ef4 ("coroutine: Revert to - constant batch size"). - -Signed-off-by: Stefan Hajnoczi ---- - util/qemu-coroutine.c | 41 ++++++++++++++++++++++++----------------- - 1 file changed, 24 insertions(+), 17 deletions(-) - -diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c -index 804f672e0a..4a8bd63ef0 100644 ---- a/util/qemu-coroutine.c -+++ b/util/qemu-coroutine.c -@@ -18,6 +18,7 @@ - #include "qemu/atomic.h" - #include "qemu/coroutine.h" - #include "qemu/coroutine_int.h" -+#include "qemu/coroutine-tls.h" - #include "block/aio.h" - - /** -@@ -35,17 +36,20 @@ enum { - static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); - static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; - static unsigned int release_pool_size; --static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); --static __thread unsigned int alloc_pool_size; --static __thread Notifier coroutine_pool_cleanup_notifier; -+ -+typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; -+QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); -+QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); -+QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); - - static void coroutine_pool_cleanup(Notifier *n, void *value) - { - Coroutine *co; - Coroutine *tmp; -+ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); - -- QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { -- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); -+ QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { -+ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); - qemu_coroutine_delete(co); - } - } -@@ -55,27 +59,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) - Coroutine *co = NULL; - - if (CONFIG_COROUTINE_POOL) { -- co = QSLIST_FIRST(&alloc_pool); -+ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); -+ -+ co = QSLIST_FIRST(alloc_pool); - if (!co) { - if (release_pool_size > POOL_MIN_BATCH_SIZE) { - /* Slow path; a good place to register the destructor, too. */ -- if (!coroutine_pool_cleanup_notifier.notify) { -- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; -- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); -+ Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); -+ if (!notifier->notify) { -+ notifier->notify = coroutine_pool_cleanup; -+ qemu_thread_atexit_add(notifier); - } - - /* This is not exact; there could be a little skew between - * release_pool_size and the actual size of release_pool. But - * it is just a heuristic, it does not need to be perfect. - */ -- alloc_pool_size = qatomic_xchg(&release_pool_size, 0); -- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); -- co = QSLIST_FIRST(&alloc_pool); -+ set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); -+ QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); -+ co = QSLIST_FIRST(alloc_pool); - } - } - if (co) { -- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); -- alloc_pool_size--; -+ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); -+ set_alloc_pool_size(get_alloc_pool_size() - 1); - } - } - -@@ -99,9 +106,9 @@ static void coroutine_delete(Coroutine *co) - qatomic_inc(&release_pool_size); - return; - } -- if (alloc_pool_size < qatomic_read(&pool_max_size)) { -- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); -- alloc_pool_size++; -+ if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { -+ QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); -+ set_alloc_pool_size(get_alloc_pool_size() + 1); - return; - } - } --- -2.31.1 - diff --git a/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch deleted file mode 100644 index 1665319..0000000 --- a/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 336581e6e9ace3f1ddd24ad0a258db9785f9b0ed Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 12:08:12 +0100 -Subject: [PATCH 3/6] coroutine-win32: use QEMU_DEFINE_STATIC_CO_TLS() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables -RH-Commit: [3/3] 55b35dfdae1bc7d6f614ac9f81a92f5c6431f713 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 1952483 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf - -Thread-Local Storage variables cannot be used directly from coroutine -code because the compiler may optimize TLS variable accesses across -qemu_coroutine_yield() calls. When the coroutine is re-entered from -another thread the TLS variables from the old thread must no longer be -used. - -Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. - -I think coroutine-win32.c could get away with __thread because the -variables are only used in situations where either the stale value is -correct (current) or outside coroutine context (loading leader when -current is NULL). Due to the difficulty of being sure that this is -really safe in all scenarios it seems worth converting it anyway. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220307153853.602859-4-stefanha@redhat.com> -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit c1fe694357a328c807ae3cc6961c19e923448fcc) -Signed-off-by: Stefan Hajnoczi ---- - util/coroutine-win32.c | 18 +++++++++++++----- - 1 file changed, 13 insertions(+), 5 deletions(-) - -diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c -index de6bd4fd3e..c02a62c896 100644 ---- a/util/coroutine-win32.c -+++ b/util/coroutine-win32.c -@@ -25,6 +25,7 @@ - #include "qemu/osdep.h" - #include "qemu-common.h" - #include "qemu/coroutine_int.h" -+#include "qemu/coroutine-tls.h" - - typedef struct - { -@@ -34,8 +35,8 @@ typedef struct - CoroutineAction action; - } CoroutineWin32; - --static __thread CoroutineWin32 leader; --static __thread Coroutine *current; -+QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader); -+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); - - /* This function is marked noinline to prevent GCC from inlining it - * into coroutine_trampoline(). If we allow it to do that then it -@@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, - CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); - CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); - -- current = to_; -+ set_current(to_); - - to->action = action; - SwitchToFiber(to->fiber); -@@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_) - - Coroutine *qemu_coroutine_self(void) - { -+ Coroutine *current = get_current(); -+ - if (!current) { -- current = &leader.base; -- leader.fiber = ConvertThreadToFiber(NULL); -+ CoroutineWin32 *leader = get_ptr_leader(); -+ -+ current = &leader->base; -+ set_current(current); -+ leader->fiber = ConvertThreadToFiber(NULL); - } - return current; - } - - bool qemu_in_coroutine(void) - { -+ Coroutine *current = get_current(); -+ - return current && current->caller; - } --- -2.31.1 - diff --git a/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch b/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch deleted file mode 100644 index 2795dcd..0000000 --- a/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 8a12049e97149056f61f7748d9869606d282d16e Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 06/16] hw/acpi/aml-build: Use existing CPU topology to build - PPTT table - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [6/6] 53fa376531c204cf706cc1a7a0499019756106cb (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -When the PPTT table is built, the CPU topology is re-calculated, but -it's unecessary because the CPU topology has been populated in -virt_possible_cpu_arch_ids() on arm/virt machine. - -This reworks build_pptt() to avoid by reusing the existing IDs in -ms->possible_cpus. Currently, the only user of build_pptt() is -arm/virt machine. - -Signed-off-by: Gavin Shan -Tested-by: Yanan Wang -Reviewed-by: Yanan Wang -Acked-by: Igor Mammedov -Acked-by: Michael S. Tsirkin -Message-id: 20220503140304.855514-7-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit ae9141d4a3265553503bf07d3574b40f84615a34) -Signed-off-by: Gavin Shan ---- - hw/acpi/aml-build.c | 111 +++++++++++++++++++------------------------- - 1 file changed, 48 insertions(+), 63 deletions(-) - -diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c -index 4086879ebf..e6bfac95c7 100644 ---- a/hw/acpi/aml-build.c -+++ b/hw/acpi/aml-build.c -@@ -2002,86 +2002,71 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, - const char *oem_id, const char *oem_table_id) - { - MachineClass *mc = MACHINE_GET_CLASS(ms); -- GQueue *list = g_queue_new(); -- guint pptt_start = table_data->len; -- guint parent_offset; -- guint length, i; -- int uid = 0; -- int socket; -+ CPUArchIdList *cpus = ms->possible_cpus; -+ int64_t socket_id = -1, cluster_id = -1, core_id = -1; -+ uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0; -+ uint32_t pptt_start = table_data->len; -+ int n; - AcpiTable table = { .sig = "PPTT", .rev = 2, - .oem_id = oem_id, .oem_table_id = oem_table_id }; - - acpi_table_begin(&table, table_data); - -- for (socket = 0; socket < ms->smp.sockets; socket++) { -- g_queue_push_tail(list, -- GUINT_TO_POINTER(table_data->len - pptt_start)); -- build_processor_hierarchy_node( -- table_data, -- /* -- * Physical package - represents the boundary -- * of a physical package -- */ -- (1 << 0), -- 0, socket, NULL, 0); -- } -- -- if (mc->smp_props.clusters_supported) { -- length = g_queue_get_length(list); -- for (i = 0; i < length; i++) { -- int cluster; -- -- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); -- for (cluster = 0; cluster < ms->smp.clusters; cluster++) { -- g_queue_push_tail(list, -- GUINT_TO_POINTER(table_data->len - pptt_start)); -- build_processor_hierarchy_node( -- table_data, -- (0 << 0), /* not a physical package */ -- parent_offset, cluster, NULL, 0); -- } -+ /* -+ * This works with the assumption that cpus[n].props.*_id has been -+ * sorted from top to down levels in mc->possible_cpu_arch_ids(). -+ * Otherwise, the unexpected and duplicated containers will be -+ * created. -+ */ -+ for (n = 0; n < cpus->len; n++) { -+ if (cpus->cpus[n].props.socket_id != socket_id) { -+ assert(cpus->cpus[n].props.socket_id > socket_id); -+ socket_id = cpus->cpus[n].props.socket_id; -+ cluster_id = -1; -+ core_id = -1; -+ socket_offset = table_data->len - pptt_start; -+ build_processor_hierarchy_node(table_data, -+ (1 << 0), /* Physical package */ -+ 0, socket_id, NULL, 0); - } -- } - -- length = g_queue_get_length(list); -- for (i = 0; i < length; i++) { -- int core; -- -- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); -- for (core = 0; core < ms->smp.cores; core++) { -- if (ms->smp.threads > 1) { -- g_queue_push_tail(list, -- GUINT_TO_POINTER(table_data->len - pptt_start)); -- build_processor_hierarchy_node( -- table_data, -- (0 << 0), /* not a physical package */ -- parent_offset, core, NULL, 0); -- } else { -- build_processor_hierarchy_node( -- table_data, -- (1 << 1) | /* ACPI Processor ID valid */ -- (1 << 3), /* Node is a Leaf */ -- parent_offset, uid++, NULL, 0); -+ if (mc->smp_props.clusters_supported) { -+ if (cpus->cpus[n].props.cluster_id != cluster_id) { -+ assert(cpus->cpus[n].props.cluster_id > cluster_id); -+ cluster_id = cpus->cpus[n].props.cluster_id; -+ core_id = -1; -+ cluster_offset = table_data->len - pptt_start; -+ build_processor_hierarchy_node(table_data, -+ (0 << 0), /* Not a physical package */ -+ socket_offset, cluster_id, NULL, 0); - } -+ } else { -+ cluster_offset = socket_offset; - } -- } - -- length = g_queue_get_length(list); -- for (i = 0; i < length; i++) { -- int thread; -+ if (ms->smp.threads == 1) { -+ build_processor_hierarchy_node(table_data, -+ (1 << 1) | /* ACPI Processor ID valid */ -+ (1 << 3), /* Node is a Leaf */ -+ cluster_offset, n, NULL, 0); -+ } else { -+ if (cpus->cpus[n].props.core_id != core_id) { -+ assert(cpus->cpus[n].props.core_id > core_id); -+ core_id = cpus->cpus[n].props.core_id; -+ core_offset = table_data->len - pptt_start; -+ build_processor_hierarchy_node(table_data, -+ (0 << 0), /* Not a physical package */ -+ cluster_offset, core_id, NULL, 0); -+ } - -- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); -- for (thread = 0; thread < ms->smp.threads; thread++) { -- build_processor_hierarchy_node( -- table_data, -+ build_processor_hierarchy_node(table_data, - (1 << 1) | /* ACPI Processor ID valid */ - (1 << 2) | /* Processor is a Thread */ - (1 << 3), /* Node is a Leaf */ -- parent_offset, uid++, NULL, 0); -+ core_offset, n, NULL, 0); - } - } - -- g_queue_free(list); - acpi_table_end(linker, &table); - } - --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch b/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch deleted file mode 100644 index 240aead..0000000 --- a/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 3b05d3464945295112b5d02d142422f524a52054 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 03/16] hw/arm/virt: Consider SMP configuration in CPU topology - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [3/6] 7125b41f038c2b1cb33377d0ef1222f1ea42b648 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -Currently, the SMP configuration isn't considered when the CPU -topology is populated. In this case, it's impossible to provide -the default CPU-to-NUMA mapping or association based on the socket -ID of the given CPU. - -This takes account of SMP configuration when the CPU topology -is populated. The die ID for the given CPU isn't assigned since -it's not supported on arm/virt machine. Besides, the used SMP -configuration in qtest/numa-test/aarch64_numa_cpu() is corrcted -to avoid testing failure - -Signed-off-by: Gavin Shan -Reviewed-by: Yanan Wang -Acked-by: Igor Mammedov -Message-id: 20220503140304.855514-4-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit c9ec4cb5e4936f980889e717524e73896b0200ed) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 8be12e121d..a87c8d396a 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2553,6 +2553,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - int n; - unsigned int max_cpus = ms->smp.max_cpus; - VirtMachineState *vms = VIRT_MACHINE(ms); -+ MachineClass *mc = MACHINE_GET_CLASS(vms); - - if (ms->possible_cpus) { - assert(ms->possible_cpus->len == max_cpus); -@@ -2566,8 +2567,20 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) - ms->possible_cpus->cpus[n].type = ms->cpu_type; - ms->possible_cpus->cpus[n].arch_id = - virt_cpu_mp_affinity(vms, n); -+ -+ assert(!mc->smp_props.dies_supported); -+ ms->possible_cpus->cpus[n].props.has_socket_id = true; -+ ms->possible_cpus->cpus[n].props.socket_id = -+ n / (ms->smp.clusters * ms->smp.cores * ms->smp.threads); -+ ms->possible_cpus->cpus[n].props.has_cluster_id = true; -+ ms->possible_cpus->cpus[n].props.cluster_id = -+ (n / (ms->smp.cores * ms->smp.threads)) % ms->smp.clusters; -+ ms->possible_cpus->cpus[n].props.has_core_id = true; -+ ms->possible_cpus->cpus[n].props.core_id = -+ (n / ms->smp.threads) % ms->smp.cores; - ms->possible_cpus->cpus[n].props.has_thread_id = true; -- ms->possible_cpus->cpus[n].props.thread_id = n; -+ ms->possible_cpus->cpus[n].props.thread_id = -+ n % ms->smp.threads; - } - return ms->possible_cpus; - } --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch b/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch deleted file mode 100644 index 6b60b70..0000000 --- a/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 14e49ad3b98f01c1ad6fe456469d40a96a43dc3c Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 05/16] hw/arm/virt: Fix CPU's default NUMA node ID - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [5/6] 5336f62bc0c53c0417db1d71ef89544907bc28c0 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -When CPU-to-NUMA association isn't explicitly provided by users, -the default one is given by mc->get_default_cpu_node_id(). However, -the CPU topology isn't fully considered in the default association -and this causes CPU topology broken warnings on booting Linux guest. - -For example, the following warning messages are observed when the -Linux guest is booted with the following command lines. - -/home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ --accel kvm -machine virt,gic-version=host \ --cpu host \ --smp 6,sockets=2,cores=3,threads=1 \ --m 1024M,slots=16,maxmem=64G \ --object memory-backend-ram,id=mem0,size=128M \ --object memory-backend-ram,id=mem1,size=128M \ --object memory-backend-ram,id=mem2,size=128M \ --object memory-backend-ram,id=mem3,size=128M \ --object memory-backend-ram,id=mem4,size=128M \ --object memory-backend-ram,id=mem4,size=384M \ --numa node,nodeid=0,memdev=mem0 \ --numa node,nodeid=1,memdev=mem1 \ --numa node,nodeid=2,memdev=mem2 \ --numa node,nodeid=3,memdev=mem3 \ --numa node,nodeid=4,memdev=mem4 \ --numa node,nodeid=5,memdev=mem5 -: -alternatives: patching kernel code -BUG: arch topology borken -the CLS domain not a subset of the MC domain - -BUG: arch topology borken -the DIE domain not a subset of the NODE domain - -With current implementation of mc->get_default_cpu_node_id(), -CPU#0 to CPU#5 are associated with NODE#0 to NODE#5 separately. -That's incorrect because CPU#0/1/2 should be associated with same -NUMA node because they're seated in same socket. - -This fixes the issue by considering the socket ID when the default -CPU-to-NUMA association is provided in virt_possible_cpu_arch_ids(). -With this applied, no more CPU topology broken warnings are seen -from the Linux guest. The 6 CPUs are associated with NODE#0/1, but -there are no CPUs associated with NODE#2/3/4/5. - -Signed-off-by: Gavin Shan -Reviewed-by: Igor Mammedov -Reviewed-by: Yanan Wang -Message-id: 20220503140304.855514-6-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 4c18bc192386dfbca530e7f550e0992df657818a) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index a87c8d396a..95d012d6eb 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2545,7 +2545,9 @@ virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) - - static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) - { -- return idx % ms->numa_state->num_nodes; -+ int64_t socket_id = ms->possible_cpus->cpus[idx].props.socket_id; -+ -+ return socket_id % ms->numa_state->num_nodes; - } - - static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch b/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch deleted file mode 100644 index 78b9ee0..0000000 --- a/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch +++ /dev/null @@ -1,56 +0,0 @@ -From e25c40735d2f022c07481b548d20476222006657 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 4 May 2022 11:11:54 +0200 -Subject: [PATCH 2/5] hw/arm/virt: Fix missing initialization in - instance/class_init() - -RH-Author: Eric Auger -RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option -RH-Commit: [2/2] 22cbbfc30cf57a09b8acfb25d8a4dff2754c630c (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2046029 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 -Upstream Status: RHEL-only -Tested: Boot RHEL guest and check migration from 8.6 to 9.1 - (with custom additions) - -During the 7.0 rebase, the initialization of highmem_mmio and -highmem_redists was forgotten in rhel_virt_instance_init(). -Fix it to match virt_instance_init() code. - -Also mc->smp_props.clusters_supported was missing in -rhel_machine_class_init(). - -Signed-off-by: Eric Auger ---- - hw/arm/virt.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index bde4f77994..8be12e121d 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3286,6 +3286,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - hc->unplug_request = virt_machine_device_unplug_request_cb; - hc->unplug = virt_machine_device_unplug_cb; - mc->nvdimm_supported = true; -+ mc->smp_props.clusters_supported = true; - mc->auto_enable_numa_with_memhp = true; - mc->auto_enable_numa_with_memdev = true; - mc->default_ram_id = "mach-virt.ram"; -@@ -3366,6 +3367,8 @@ static void rhel_virt_instance_init(Object *obj) - vms->gic_version = VIRT_GIC_VERSION_NOSEL; - - vms->highmem_ecam = !vmc->no_highmem_ecam; -+ vms->highmem_mmio = true; -+ vms->highmem_redists = true; - - if (vmc->no_its) { - vms->its = false; --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch b/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch deleted file mode 100644 index 10af6c0..0000000 --- a/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 69f771c3dc641431f3e98497cbd3832edb69284f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 3 May 2022 08:56:52 +0200 -Subject: [PATCH 1/5] hw/arm/virt: Remove the dtb-kaslr-seed machine option - -RH-Author: Eric Auger -RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option -RH-Commit: [1/2] a89dcd7f22e04ae39de99795d3f34cdd0b831bc0 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2046029 -RH-Acked-by: Gavin Shan -RH-Acked-by: Andrew Jones -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 -Upstream Status: RHEL-only -Tested: Boot RHEL guest and check the option is not available - -In RHEL we do not want to expose the dtb-kaslr-seed virt machine -option. Indeed the default 'on' value matches our need as -random data in the DTB does not cause any boot failure and we -want to support KASLR for the guest. - -Signed-off-by: Eric Auger - ---- ---- - hw/arm/virt.c | 11 +++-------- - 1 file changed, 3 insertions(+), 8 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index e06862d22a..bde4f77994 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2350,6 +2350,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) - vms->its = value; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static bool virt_get_dtb_kaslr_seed(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2363,6 +2364,7 @@ static void virt_set_dtb_kaslr_seed(Object *obj, bool value, Error **errp) - - vms->dtb_kaslr_seed = value; - } -+#endif /* disabled for RHEL */ - - static char *virt_get_oem_id(Object *obj, Error **errp) - { -@@ -3346,13 +3348,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - "Override the default value of field OEM Table ID " - "in ACPI table header." - "The string may be up to 8 bytes in size"); -- -- object_class_property_add_bool(oc, "dtb-kaslr-seed", -- virt_get_dtb_kaslr_seed, -- virt_set_dtb_kaslr_seed); -- object_class_property_set_description(oc, "dtb-kaslr-seed", -- "Set off to disable passing of kaslr-seed " -- "dtb node to guest"); - } - - static void rhel_virt_instance_init(Object *obj) -@@ -3397,7 +3392,7 @@ static void rhel_virt_instance_init(Object *obj) - /* MTE is disabled by default and non-configurable for RHEL */ - vms->mte = false; - -- /* Supply a kaslr-seed by default */ -+ /* Supply a kaslr-seed by default and non-configurable for RHEL */ - vms->dtb_kaslr_seed = true; - - vms->irqmap = a15irqmap; --- -2.31.1 - diff --git a/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch b/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch deleted file mode 100644 index 1bdad27..0000000 --- a/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 6ee4a8718dcce2d6da43ee200534b75baf1d7bbe Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Thu, 18 Nov 2021 12:57:32 +0100 -Subject: [PATCH 16/17] hw/block/fdc: Prevent end-of-track overrun - (CVE-2021-3507) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) -RH-Commit: [1/2] 9ffc5290348884d20b894fa79f4d0c8089247f8b (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1951522 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Miroslav Rezanina - -Per the 82078 datasheet, if the end-of-track (EOT byte in -the FIFO) is more than the number of sectors per side, the -command is terminated unsuccessfully: - -* 5.2.5 DATA TRANSFER TERMINATION - - The 82078 supports terminal count explicitly through - the TC pin and implicitly through the underrun/over- - run and end-of-track (EOT) functions. For full sector - transfers, the EOT parameter can define the last - sector to be transferred in a single or multisector - transfer. If the last sector to be transferred is a par- - tial sector, the host can stop transferring the data in - mid-sector, and the 82078 will continue to complete - the sector as if a hardware TC was received. The - only difference between these implicit functions and - TC is that they return "abnormal termination" result - status. Such status indications can be ignored if they - were expected. - -* 6.1.3 READ TRACK - - This command terminates when the EOT specified - number of sectors have been read. If the 82078 - does not find an I D Address Mark on the diskette - after the second· occurrence of a pulse on the - INDX# pin, then it sets the IC code in Status Regis- - ter 0 to "01" (Abnormal termination), sets the MA bit - in Status Register 1 to "1", and terminates the com- - mand. - -* 6.1.6 VERIFY - - Refer to Table 6-6 and Table 6-7 for information - concerning the values of MT and EC versus SC and - EOT value. - -* Table 6·6. Result Phase Table - -* Table 6-7. Verify Command Result Phase Table - -Fix by aborting the transfer when EOT > # Sectors Per Side. - -Cc: qemu-stable@nongnu.org -Cc: Hervé Poussineau -Fixes: baca51faff0 ("floppy driver: disk geometry auto detect") -Reported-by: Alexander Bulekov -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339 -Signed-off-by: Philippe Mathieu-Daudé -Message-Id: <20211118115733.4038610-2-philmd@redhat.com> -Reviewed-by: Hanna Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367) -Signed-off-by: Jon Maloy ---- - hw/block/fdc.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index ca1776121f..6481ec0cfb 100644 ---- a/hw/block/fdc.c -+++ b/hw/block/fdc.c -@@ -1532,6 +1532,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) - int tmp; - fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); - tmp = (fdctrl->fifo[6] - ks + 1); -+ if (tmp < 0) { -+ FLOPPY_DPRINTF("invalid EOT: %d\n", tmp); -+ fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); -+ fdctrl->fifo[3] = kt; -+ fdctrl->fifo[4] = kh; -+ fdctrl->fifo[5] = ks; -+ return; -+ } - if (fdctrl->fifo[0] & 0x80) - tmp += fdctrl->fifo[6]; - fdctrl->data_len *= tmp; --- -2.31.1 - diff --git a/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch b/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch deleted file mode 100644 index 44897ac..0000000 --- a/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 4dad0e9abbc843fba4e5fee6e7aa1b0db13f5898 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:27:35 +0200 -Subject: [PATCH 03/32] hw/virtio: Replace g_memdup() by g_memdup2() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [3/27] ae196903eb1a7aebbf999100e997cf82e5024cb6 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit d792199de55ca5cb5334016884039c740290b5c7 -Author: Philippe Mathieu-Daudé -Date: Thu May 12 19:57:46 2022 +0200 - - hw/virtio: Replace g_memdup() by g_memdup2() - - Per https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538 - - The old API took the size of the memory to duplicate as a guint, - whereas most memory functions take memory sizes as a gsize. This - made it easy to accidentally pass a gsize to g_memdup(). For large - values, that would lead to a silent truncation of the size from 64 - to 32 bits, and result in a heap area being returned which is - significantly smaller than what the caller expects. This can likely - be exploited in various modules to cause a heap buffer overflow. - - Replace g_memdup() by the safer g_memdup2() wrapper. - - Acked-by: Jason Wang - Acked-by: Eugenio Pérez - Signed-off-by: Philippe Mathieu-Daudé - Message-Id: <20220512175747.142058-6-eperezma@redhat.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -Signed-off-by: Eugenio Pérez ---- - hw/net/virtio-net.c | 3 ++- - hw/virtio/virtio-crypto.c | 6 +++--- - 2 files changed, 5 insertions(+), 4 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 099e65036d..633de61513 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -1458,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) - } - - iov_cnt = elem->out_num; -- iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); -+ iov2 = iov = g_memdup2(elem->out_sg, -+ sizeof(struct iovec) * elem->out_num); - s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); - iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); - if (s != sizeof(ctrl)) { -diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c -index dcd80b904d..0e31e3cc04 100644 ---- a/hw/virtio/virtio-crypto.c -+++ b/hw/virtio/virtio-crypto.c -@@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) - } - - out_num = elem->out_num; -- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); -+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); - out_iov = out_iov_copy; - - in_num = elem->in_num; -@@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) - } - - out_num = elem->out_num; -- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); -+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); - out_iov = out_iov_copy; - - in_num = elem->in_num; -- in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num); -+ in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num); - in_iov = in_iov_copy; - - if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req)) --- -2.31.1 - diff --git a/kvm-iotests-108-Fix-when-missing-user_allow_other.patch b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch deleted file mode 100644 index a37ea6f..0000000 --- a/kvm-iotests-108-Fix-when-missing-user_allow_other.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 447bca651c9156d7aba6b7495c75f19b5e4ed53f Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Thu, 21 Apr 2022 16:24:35 +0200 -Subject: [PATCH 07/16] iotests/108: Fix when missing user_allow_other - -RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [4/4] a51ab8606fc9d8dea2b6539f4e795d5813892a5c (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -FUSE exports' allow-other option defaults to "auto", which means that it -will try passing allow_other as a mount option, and fall back to not -using it when an error occurs. We make no effort to hide fusermount's -error message (because it would be difficult, and because users might -want to know about the fallback occurring), and so when allow_other does -not work (primarily when /etc/fuse.conf does not contain -user_allow_other), this error message will appear and break the -reference output. - -We do not need allow_other here, though, so we can just pass -allow-other=off to fix that. - -Reported-by: Markus Armbruster -Signed-off-by: Hanna Reitz -Message-Id: <20220421142435.569600-1-hreitz@redhat.com> -Tested-by: Markus Armbruster -Tested-by: Eric Blake -(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba) -Signed-off-by: Hanna Reitz ---- - tests/qemu-iotests/108 | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 -index a3090e2875..4681c7c769 100755 ---- a/tests/qemu-iotests/108 -+++ b/tests/qemu-iotests/108 -@@ -326,7 +326,7 @@ else - - $QSD \ - --blockdev file,node-name=export-node,filename="$TEST_IMG" \ -- --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ -+ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \ - --pidfile "$TEST_DIR/qsd.pid" \ - & - --- -2.31.1 - diff --git a/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch deleted file mode 100644 index 7a968f6..0000000 --- a/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch +++ /dev/null @@ -1,445 +0,0 @@ -From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Tue, 5 Apr 2022 15:46:51 +0200 -Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm - -RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -One clear problem with how qcow2's refcount structure rebuild algorithm -used to be before "qcow2: Improve refcount structure rebuilding" was -that it is prone to failure for qcow2 images on block devices: There is -generally unused space after the actual image, and if that exceeds what -one refblock covers, the old algorithm would invariably write the -reftable past the block device's end, which cannot work. The new -algorithm does not have this problem. - -Test it with three tests: -(1) Create an image with more empty space at the end than what one - refblock covers, see whether rebuilding the refcount structures - results in a change in the image file length. (It should not.) - -(2) Leave precisely enough space somewhere at the beginning of the image - for the new reftable (and the refblock for that place), see whether - the new algorithm puts the reftable there. (It should.) - -(3) Test the original problem: Create (something like) a block device - with a fixed size, then create a qcow2 image in there, write some - data, and then have qemu-img check rebuild the refcount structures. - Before HEAD^, the reftable would have been written past the image - file end, i.e. outside of what the block device provides, which - cannot work. HEAD^ should have fixed that. - ("Something like a block device" means a loop device if we can use - one ("sudo -n losetup" works), or a FUSE block export with - growable=false otherwise.) - -Reviewed-by: Eric Blake -Signed-off-by: Hanna Reitz -Message-Id: <20220405134652.19278-3-hreitz@redhat.com> -(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162) - -Conflicts: -- 108: The downstream qemu-storage-daemon does not support --daemonize, - so this switch has been replaced by a loop waiting for the PID file to - appear - -Signed-off-by: Hanna Reitz ---- - tests/qemu-iotests/108 | 263 ++++++++++++++++++++++++++++++++++++- - tests/qemu-iotests/108.out | 81 ++++++++++++ - 2 files changed, 343 insertions(+), 1 deletion(-) - -diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 -index 56339ab2c5..a3090e2875 100755 ---- a/tests/qemu-iotests/108 -+++ b/tests/qemu-iotests/108 -@@ -30,13 +30,20 @@ status=1 # failure is the default! - - _cleanup() - { -- _cleanup_test_img -+ _cleanup_test_img -+ if [ -f "$TEST_DIR/qsd.pid" ]; then -+ qsd_pid=$(cat "$TEST_DIR/qsd.pid") -+ kill -KILL "$qsd_pid" -+ fusermount -u "$TEST_DIR/fuse-export" &>/dev/null -+ fi -+ rm -f "$TEST_DIR/fuse-export" - } - trap "_cleanup; exit \$status" 0 1 2 3 15 - - # get standard environment, filters and checks - . ./common.rc - . ./common.filter -+. ./common.qemu - - # This tests qcow2-specific low-level functionality - _supported_fmt qcow2 -@@ -47,6 +54,22 @@ _supported_os Linux - # files - _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file - -+# This test either needs sudo -n losetup or FUSE exports to work -+if sudo -n losetup &>/dev/null; then -+ loopdev=true -+else -+ loopdev=false -+ -+ # QSD --export fuse will either yield "Parameter 'id' is missing" -+ # or "Invalid parameter 'fuse'", depending on whether there is -+ # FUSE support or not. -+ error=$($QSD --export fuse 2>&1) -+ if [[ $error = *"'fuse'"* ]]; then -+ _notrun 'Passwordless sudo for losetup or FUSE support required, but' \ -+ 'neither is available' -+ fi -+fi -+ - echo - echo '=== Repairing an image without any refcount table ===' - echo -@@ -138,6 +161,244 @@ _make_test_img 64M - poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00" - _check_test_img -r all - -+echo -+echo '=== Check rebuilt reftable location ===' -+ -+# In an earlier version of the refcount rebuild algorithm, the -+# reftable was generally placed at the image end (unless something was -+# allocated in the area covered by the refblock right before the image -+# file end, then we would try to place the reftable in that refblock). -+# This was later changed so the reftable would be placed in the -+# earliest possible location. Test this. -+ -+echo -+echo '--- Does the image size increase? ---' -+echo -+ -+# First test: Just create some image, write some data to it, and -+# resize it so there is free space at the end of the image (enough -+# that it spans at least one full refblock, which for cluster_size=512 -+# images, spans 128k). With the old algorithm, the reftable would -+# have then been placed at the end of the image file, but with the new -+# one, it will be put in that free space. -+# We want to check whether the size of the image file increases due to -+# rebuilding the refcount structures (it should not). -+ -+_make_test_img -o 'cluster_size=512' 1M -+# Write something -+$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io -+ -+# Add free space -+file_len=$(stat -c '%s' "$TEST_IMG") -+truncate -s $((file_len + 256 * 1024)) "$TEST_IMG" -+ -+# Corrupt the image by saying the image header was not allocated -+rt_offset=$(peek_file_be "$TEST_IMG" 48 8) -+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) -+poke_file "$TEST_IMG" $rb_offset "\x00\x00" -+ -+# Check whether rebuilding the refcount structures increases the image -+# file size -+file_len=$(stat -c '%s' "$TEST_IMG") -+echo -+# The only leaks there can be are the old refcount structures that are -+# leaked during rebuilding, no need to clutter the output with them -+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' -+echo -+post_repair_file_len=$(stat -c '%s' "$TEST_IMG") -+ -+if [[ $file_len -eq $post_repair_file_len ]]; then -+ echo 'OK: Image size did not change' -+else -+ echo 'ERROR: Image size differs' \ -+ "($file_len before, $post_repair_file_len after)" -+fi -+ -+echo -+echo '--- Will the reftable occupy a hole specifically left for it? ---' -+echo -+ -+# Note: With cluster_size=512, every refblock covers 128k. -+# The reftable covers 8M per reftable cluster. -+ -+# Create an image that requires two reftable clusters (just because -+# this is more interesting than a single-clustered reftable). -+_make_test_img -o 'cluster_size=512' 9M -+$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io -+ -+# Writing 8M will have resized the reftable. Unfortunately, doing so -+# will leave holes in the file, so we need to fill them up so we can -+# be sure the whole file is allocated. Do that by writing -+# consecutively smaller chunks starting from 8 MB, until the file -+# length increases even with a chunk size of 512. Then we must have -+# filled all holes. -+ofs=$((8 * 1024 * 1024)) -+block_len=$((16 * 1024)) -+while [[ $block_len -ge 512 ]]; do -+ file_len=$(stat -c '%s' "$TEST_IMG") -+ while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do -+ # Do not include this in the reference output, it does not -+ # really matter which qemu-io calls we do here exactly -+ $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null -+ ofs=$((ofs + block_len)) -+ done -+ block_len=$((block_len / 2)) -+done -+ -+# Fill up to 9M (do not include this in the reference output either, -+# $ofs is random for all we know) -+$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null -+ -+# Make space as follows: -+# - For the first refblock: Right at the beginning of the image (this -+# refblock is placed in the first place possible), -+# - For the reftable somewhere soon afterwards, still near the -+# beginning of the image (i.e. covered by the first refblock); the -+# reftable too is placed in the first place possible, but only after -+# all refblocks have been placed) -+# No space is needed for the other refblocks, because no refblock is -+# put before the space it covers. In this test case, we do not mind -+# if they are placed at the image file's end. -+ -+# Before we make that space, we have to find out the host offset of -+# the area that belonged to the two data clusters at guest offset 4k, -+# because we expect the reftable to be placed there, and we will have -+# to verify that it is. -+ -+l1_offset=$(peek_file_be "$TEST_IMG" 40 8) -+l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8) -+l2_offset=$((l2_offset & 0x00fffffffffffe00)) -+data_4k_offset=$(peek_file_be "$TEST_IMG" \ -+ $((l2_offset + 4096 / 512 * 8)) 8) -+data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00)) -+ -+$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io -+ -+# Corrupt the image by saying the image header was not allocated -+rt_offset=$(peek_file_be "$TEST_IMG" 48 8) -+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) -+poke_file "$TEST_IMG" $rb_offset "\x00\x00" -+ -+echo -+# The only leaks there can be are the old refcount structures that are -+# leaked during rebuilding, no need to clutter the output with them -+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' -+echo -+ -+# Check whether the reftable was put where we expected -+rt_offset=$(peek_file_be "$TEST_IMG" 48 8) -+if [[ $rt_offset -eq $data_4k_offset ]]; then -+ echo 'OK: Reftable is where we expect it' -+else -+ echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset" -+fi -+ -+echo -+echo '--- Rebuilding refcount structures on block devices ---' -+echo -+ -+# A block device cannot really grow, at least not during qemu-img -+# check. As mentioned in the above cases, rebuilding the refcount -+# structure may lead to new refcount structures being written after -+# the end of the image, and in the past that happened even if there -+# was more than sufficient space in the image. Such post-EOF writes -+# will not work on block devices, so test that the new algorithm -+# avoids it. -+ -+# If we have passwordless sudo and losetup, we can use those to create -+# a block device. Otherwise, we can resort to qemu's FUSE export to -+# create a file that isn't growable, which effectively tests the same -+# thing. -+ -+_cleanup_test_img -+truncate -s $((64 * 1024 * 1024)) "$TEST_IMG" -+ -+if $loopdev; then -+ export_mp=$(sudo -n losetup --show -f "$TEST_IMG") -+ export_mp_driver=host_device -+ sudo -n chmod go+rw "$export_mp" -+else -+ # Create non-growable FUSE export that is a bit like an empty -+ # block device -+ export_mp="$TEST_DIR/fuse-export" -+ export_mp_driver=file -+ touch "$export_mp" -+ -+ $QSD \ -+ --blockdev file,node-name=export-node,filename="$TEST_IMG" \ -+ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ -+ --pidfile "$TEST_DIR/qsd.pid" \ -+ & -+ -+ while [ ! -f "$TEST_DIR/qsd.pid" ]; do -+ sleep 0.1 -+ done -+fi -+ -+# Now create a qcow2 image on the device -- unfortunately, qemu-img -+# create force-creates the file, so we have to resort to the -+# blockdev-create job. -+_launch_qemu \ -+ --blockdev $export_mp_driver,node-name=file,filename="$export_mp" -+ -+_send_qemu_cmd \ -+ $QEMU_HANDLE \ -+ '{ "execute": "qmp_capabilities" }' \ -+ 'return' -+ -+# Small cluster size again, so the image needs multiple refblocks -+_send_qemu_cmd \ -+ $QEMU_HANDLE \ -+ '{ "execute": "blockdev-create", -+ "arguments": { -+ "job-id": "create", -+ "options": { -+ "driver": "qcow2", -+ "file": "file", -+ "size": '$((64 * 1024 * 1024))', -+ "cluster-size": 512 -+ } } }' \ -+ '"concluded"' -+ -+_send_qemu_cmd \ -+ $QEMU_HANDLE \ -+ '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \ -+ 'return' -+ -+_send_qemu_cmd \ -+ $QEMU_HANDLE \ -+ '{ "execute": "quit" }' \ -+ 'return' -+ -+wait=y _cleanup_qemu -+echo -+ -+# Write some data -+$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io -+ -+# Corrupt the image by saying the image header was not allocated -+rt_offset=$(peek_file_be "$export_mp" 48 8) -+rb_offset=$(peek_file_be "$export_mp" $rt_offset 8) -+poke_file "$export_mp" $rb_offset "\x00\x00" -+ -+# Repairing such a simple case should just work -+# (We used to put the reftable at the end of the image file, which can -+# never work for non-growable devices.) -+echo -+TEST_IMG="$export_mp" _check_test_img -r all \ -+ | grep -v '^Repairing cluster.*refcount=1 reference=0' -+ -+if $loopdev; then -+ sudo -n losetup -d "$export_mp" -+else -+ qsd_pid=$(cat "$TEST_DIR/qsd.pid") -+ kill -TERM "$qsd_pid" -+ # Wait for process to exit (cannot `wait` because the QSD is daemonized) -+ while [ -f "$TEST_DIR/qsd.pid" ]; do -+ true -+ done -+fi -+ - # success, all done - echo '*** done' - rm -f $seq.full -diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out -index 75bab8dc84..b5401d788d 100644 ---- a/tests/qemu-iotests/108.out -+++ b/tests/qemu-iotests/108.out -@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired: - 0 leaked clusters - 1 corruptions - -+Double checking the fixed image now... -+No errors were found on the image. -+ -+=== Check rebuilt reftable location === -+ -+--- Does the image size increase? --- -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 -+wrote 65536/65536 bytes at offset 0 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+ERROR cluster 0 refcount=0 reference=1 -+Rebuilding refcount structure -+The following inconsistencies were found and repaired: -+ -+ 0 leaked clusters -+ 1 corruptions -+ -+Double checking the fixed image now... -+No errors were found on the image. -+ -+OK: Image size did not change -+ -+--- Will the reftable occupy a hole specifically left for it? --- -+ -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184 -+wrote 8388608/8388608 bytes at offset 0 -+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+discard 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+discard 1024/1024 bytes at offset 4096 -+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+ERROR cluster 0 refcount=0 reference=1 -+Rebuilding refcount structure -+The following inconsistencies were found and repaired: -+ -+ 0 leaked clusters -+ 1 corruptions -+ -+Double checking the fixed image now... -+No errors were found on the image. -+ -+OK: Reftable is where we expect it -+ -+--- Rebuilding refcount structures on block devices --- -+ -+{ "execute": "qmp_capabilities" } -+{"return": {}} -+{ "execute": "blockdev-create", -+ "arguments": { -+ "job-id": "create", -+ "options": { -+ "driver": "IMGFMT", -+ "file": "file", -+ "size": 67108864, -+ "cluster-size": 512 -+ } } } -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}} -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}} -+{ "execute": "job-dismiss", "arguments": { "id": "create" } } -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} -+{"return": {}} -+{ "execute": "quit" } -+{"return": {}} -+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} -+ -+wrote 65536/65536 bytes at offset 0 -+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+ERROR cluster 0 refcount=0 reference=1 -+Rebuilding refcount structure -+The following inconsistencies were found and repaired: -+ -+ 0 leaked clusters -+ 1 corruptions -+ - Double checking the fixed image now... - No errors were found on the image. - *** done --- -2.31.1 - diff --git a/kvm-kvm-don-t-use-perror-without-useful-errno.patch b/kvm-kvm-don-t-use-perror-without-useful-errno.patch deleted file mode 100644 index a78c089..0000000 --- a/kvm-kvm-don-t-use-perror-without-useful-errno.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 9ddefaedf423ec03eadaf17496c14e0d7b2381c8 Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Thu, 28 Jul 2022 16:24:46 +0200 -Subject: [PATCH 30/32] kvm: don't use perror() without useful errno - -RH-Author: Cornelia Huck -RH-MergeRequest: 110: kvm: don't use perror() without useful errno -RH-Commit: [1/1] 20e51aac6767c1f89f74c7d692d1fb7689eff5f0 (cohuck/qemu-kvm-c9s) -RH-Bugzilla: 2095608 -RH-Acked-by: Eric Auger -RH-Acked-by: Thomas Huth -RH-Acked-by: Gavin Shan - -perror() is designed to append the decoded errno value to a -string. This, however, only makes sense if we called something that -actually sets errno prior to that. - -For the callers that check for split irqchip support that is not the -case, and we end up with confusing error messages that end in -"success". Use error_report() instead. - -Signed-off-by: Cornelia Huck -Message-Id: <20220728142446.438177-1-cohuck@redhat.com> -Signed-off-by: Paolo Bonzini - -https://bugzilla.redhat.com/show_bug.cgi?id=2095608 -(cherry picked from commit 47c182fe8b03c0c40059fb95840923e65c9bdb4f) -Signed-off-by: Cornelia Huck ---- - accel/kvm/kvm-all.c | 2 +- - target/arm/kvm.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index 5f1377ca04..e9c7947640 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2254,7 +2254,7 @@ static void kvm_irqchip_create(KVMState *s) - ret = kvm_arch_irqchip_create(s); - if (ret == 0) { - if (s->kernel_irqchip_split == ON_OFF_AUTO_ON) { -- perror("Split IRQ chip mode not supported."); -+ error_report("Split IRQ chip mode not supported."); - exit(1); - } else { - ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); -diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index bbf1ce7ba3..0a2ba1f8e3 100644 ---- a/target/arm/kvm.c -+++ b/target/arm/kvm.c -@@ -960,7 +960,7 @@ void kvm_arch_init_irq_routing(KVMState *s) - int kvm_arch_irqchip_create(KVMState *s) - { - if (kvm_kernel_irqchip_split()) { -- perror("-machine kernel_irqchip=split is not supported on ARM."); -+ error_report("-machine kernel_irqchip=split is not supported on ARM."); - exit(1); - } - --- -2.31.1 - diff --git a/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch b/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch deleted file mode 100644 index f12b8ec..0000000 --- a/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 49d9c9dced7278517105e9cfec34ea4af716432d Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 9 Jun 2022 17:47:12 +0100 -Subject: [PATCH 6/6] linux-aio: explain why max batch is checked in - laio_io_unplug() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() -RH-Commit: [2/2] b3d6421086bde50d4baad2343b2df89c5f66950e (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2092788 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -It may not be obvious why laio_io_unplug() checks max batch. I discussed -this with Stefano and have added a comment summarizing the reason. - -Cc: Stefano Garzarella -Cc: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Stefano Garzarella -Message-id: 20220609164712.1539045-3-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 99b969fbe105117f5af6060d3afef40ca39cc9c1) -Signed-off-by: Stefan Hajnoczi ---- - block/linux-aio.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/block/linux-aio.c b/block/linux-aio.c -index 6078da7e42..9c2393a2f7 100644 ---- a/block/linux-aio.c -+++ b/block/linux-aio.c -@@ -365,6 +365,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, - assert(s->io_q.plugged); - s->io_q.plugged--; - -+ /* -+ * Why max batch checking is performed here: -+ * Another BDS may have queued requests with a higher dev_max_batch and -+ * therefore in_queue could now exceed our dev_max_batch. Re-check the max -+ * batch so we can honor our device's dev_max_batch. -+ */ - if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || - (!s->io_q.plugged && - !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { --- -2.31.1 - diff --git a/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch b/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch deleted file mode 100644 index ed9b5ee..0000000 --- a/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch +++ /dev/null @@ -1,56 +0,0 @@ -From e7326c3a7e0fc022aa5c0ae07bc1e19ad1b6f2ed Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Thu, 9 Jun 2022 17:47:11 +0100 -Subject: [PATCH 5/6] linux-aio: fix unbalanced plugged counter in - laio_io_unplug() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() -RH-Commit: [1/2] 8a71da371c72521f1d70b8767ee564575e0d522b (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2092788 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -Every laio_io_plug() call has a matching laio_io_unplug() call. There is -a plugged counter that tracks the number of levels of plugging and -allows for nesting. - -The plugged counter must reflect the balance between laio_io_plug() and -laio_io_unplug() calls accurately. Otherwise I/O stalls occur since -io_submit(2) calls are skipped while plugged. - -Reported-by: Nikolay Tenev -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Stefano Garzarella -Message-id: 20220609164712.1539045-2-stefanha@redhat.com -Cc: Stefano Garzarella -Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()") -[Stefano Garzarella suggested adding a Fixes tag. ---Stefan] -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7) -Signed-off-by: Stefan Hajnoczi ---- - block/linux-aio.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/block/linux-aio.c b/block/linux-aio.c -index 4c423fcccf..6078da7e42 100644 ---- a/block/linux-aio.c -+++ b/block/linux-aio.c -@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, - uint64_t dev_max_batch) - { - assert(s->io_q.plugged); -+ s->io_q.plugged--; -+ - if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || -- (--s->io_q.plugged == 0 && -+ (!s->io_q.plugged && - !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { - ioq_submit(s); - } --- -2.31.1 - diff --git a/kvm-meson-create-have_vhost_-variables.patch b/kvm-meson-create-have_vhost_-variables.patch deleted file mode 100644 index fcae620..0000000 --- a/kvm-meson-create-have_vhost_-variables.patch +++ /dev/null @@ -1,154 +0,0 @@ -From 51c310097832724bafac26aed81399da40128400 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:50:43 +0200 -Subject: [PATCH 05/32] meson: create have_vhost_* variables -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [5/27] 3b30f89e6d639923dc9d9a92a4261bb4509e5c83 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 2a3129a37652e5e81d12f6e16dd3c447f09831f9 -Author: Paolo Bonzini -Date: Wed Apr 20 17:34:05 2022 +0200 - - meson: create have_vhost_* variables - - When using Meson options rather than config-host.h, the "when" clauses - have to be changed to if statements (which is not necessarily great, - though at least it highlights which parts of the build are per-target - and which are not). - - Do that before moving vhost logic to meson.build, though for now - the variables are just based on config-host.mak data. - - Reviewed-by: Marc-André Lureau - Signed-off-by: Paolo Bonzini - -Signed-off-by: Eugenio Pérez ---- - meson.build | 30 ++++++++++++++++++++---------- - tests/meson.build | 2 +- - tools/meson.build | 2 +- - 3 files changed, 22 insertions(+), 12 deletions(-) - -diff --git a/meson.build b/meson.build -index 13e3323380..735f538497 100644 ---- a/meson.build -+++ b/meson.build -@@ -298,6 +298,15 @@ have_tpm = get_option('tpm') \ - .require(targetos != 'windows', error_message: 'TPM emulation only available on POSIX systems') \ - .allowed() - -+# vhost -+have_vhost_user = 'CONFIG_VHOST_USER' in config_host -+have_vhost_vdpa = 'CONFIG_VHOST_VDPA' in config_host -+have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host -+have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host -+have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host -+have_vhost_net = 'CONFIG_VHOST_NET' in config_host -+have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host -+ - # Target-specific libraries and flags - libm = cc.find_library('m', required: false) - threads = dependency('threads') -@@ -1335,7 +1344,7 @@ has_statx_mnt_id = cc.links(statx_mnt_id_test) - have_vhost_user_blk_server = get_option('vhost_user_blk_server') \ - .require(targetos == 'linux', - error_message: 'vhost_user_blk_server requires linux') \ -- .require('CONFIG_VHOST_USER' in config_host, -+ .require(have_vhost_user, - error_message: 'vhost_user_blk_server requires vhost-user support') \ - .disable_auto_if(not have_system) \ - .allowed() -@@ -2116,9 +2125,9 @@ host_kconfig = \ - (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \ - ('CONFIG_OPENGL' in config_host ? ['CONFIG_OPENGL=y'] : []) + \ - (x11.found() ? ['CONFIG_X11=y'] : []) + \ -- ('CONFIG_VHOST_USER' in config_host ? ['CONFIG_VHOST_USER=y'] : []) + \ -- ('CONFIG_VHOST_VDPA' in config_host ? ['CONFIG_VHOST_VDPA=y'] : []) + \ -- ('CONFIG_VHOST_KERNEL' in config_host ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ -+ (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \ -+ (have_vhost_vdpa ? ['CONFIG_VHOST_VDPA=y'] : []) + \ -+ (have_vhost_kernel ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ - (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \ - ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \ - ('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : []) + \ -@@ -2799,7 +2808,7 @@ if have_system or have_user - endif - - vhost_user = not_found --if targetos == 'linux' and 'CONFIG_VHOST_USER' in config_host -+if targetos == 'linux' and have_vhost_user - libvhost_user = subproject('libvhost-user') - vhost_user = libvhost_user.get_variable('vhost_user_dep') - endif -@@ -3386,7 +3395,7 @@ if have_tools - dependencies: qemuutil, - install: true) - -- if 'CONFIG_VHOST_USER' in config_host -+ if have_vhost_user - subdir('contrib/vhost-user-blk') - subdir('contrib/vhost-user-gpu') - subdir('contrib/vhost-user-input') -@@ -3516,15 +3525,16 @@ if 'simple' in get_option('trace_backends') - endif - summary_info += {'D-Bus display': dbus_display} - summary_info += {'QOM debugging': get_option('qom_cast_debug')} --summary_info += {'vhost-kernel support': config_host.has_key('CONFIG_VHOST_KERNEL')} --summary_info += {'vhost-net support': config_host.has_key('CONFIG_VHOST_NET')} --summary_info += {'vhost-crypto support': config_host.has_key('CONFIG_VHOST_CRYPTO')} -+summary_info += {'vhost-kernel support': have_vhost_kernel} -+summary_info += {'vhost-net support': have_vhost_net} -+summary_info += {'vhost-user support': have_vhost_user} -+summary_info += {'vhost-user-crypto support': have_vhost_user_crypto} - summary_info += {'vhost-scsi support': config_host.has_key('CONFIG_VHOST_SCSI')} - summary_info += {'vhost-vsock support': config_host.has_key('CONFIG_VHOST_VSOCK')} --summary_info += {'vhost-user support': config_host.has_key('CONFIG_VHOST_USER')} - summary_info += {'vhost-user-blk server support': have_vhost_user_blk_server} - summary_info += {'vhost-user-fs support': config_host.has_key('CONFIG_VHOST_USER_FS')} - summary_info += {'vhost-vdpa support': config_host.has_key('CONFIG_VHOST_VDPA')} -+summary_info += {'vhost-vdpa support': have_vhost_vdpa} - summary_info += {'build guest agent': have_ga} - summary(summary_info, bool_yn: true, section: 'Configurable features') - -diff --git a/tests/meson.build b/tests/meson.build -index 1d05109eb4..bbe41c8559 100644 ---- a/tests/meson.build -+++ b/tests/meson.build -@@ -70,7 +70,7 @@ test_deps = { - 'test-qht-par': qht_bench, - } - --if have_tools and 'CONFIG_VHOST_USER' in config_host and 'CONFIG_LINUX' in config_host -+if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host - executable('vhost-user-bridge', - sources: files('vhost-user-bridge.c'), - dependencies: [qemuutil, vhost_user]) -diff --git a/tools/meson.build b/tools/meson.build -index 46977af84f..10eb3a043f 100644 ---- a/tools/meson.build -+++ b/tools/meson.build -@@ -3,7 +3,7 @@ have_virtiofsd = get_option('virtiofsd') \ - error_message: 'virtiofsd requires Linux') \ - .require(seccomp.found() and libcap_ng.found(), - error_message: 'virtiofsd requires libcap-ng-devel and seccomp-devel') \ -- .require('CONFIG_VHOST_USER' in config_host, -+ .require(have_vhost_user, - error_message: 'virtiofsd needs vhost-user-support') \ - .disable_auto_if(not have_tools and not have_system) \ - .allowed() --- -2.31.1 - diff --git a/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch b/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch deleted file mode 100644 index 99d86c1..0000000 --- a/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch +++ /dev/null @@ -1,213 +0,0 @@ -From a7d57a09e33275d5e6649273b5c9da1bc3c92491 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:51:53 +0200 -Subject: [PATCH 06/32] meson: use have_vhost_* variables to pick sources -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [6/27] bc3db1efb759c0bc97fde2f4fbb3d6dc404c8d3d (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 43b6d7ee1fbc5b5fb7c85d8131fdac1863214ad6 -Author: Paolo Bonzini -Date: Wed Apr 20 17:34:06 2022 +0200 - - meson: use have_vhost_* variables to pick sources - - Reviewed-by: Marc-André Lureau - Signed-off-by: Paolo Bonzini - -Signed-off-by: Eugenio Pérez ---- - Kconfig.host | 3 --- - backends/meson.build | 8 ++++++-- - hw/net/meson.build | 8 ++++++-- - hw/virtio/Kconfig | 3 --- - hw/virtio/meson.build | 25 ++++++++++++++++--------- - meson.build | 1 + - net/meson.build | 12 +++++++----- - tests/qtest/meson.build | 4 +++- - 8 files changed, 39 insertions(+), 25 deletions(-) - -diff --git a/Kconfig.host b/Kconfig.host -index 60b9c07b5e..1165c4eacd 100644 ---- a/Kconfig.host -+++ b/Kconfig.host -@@ -22,15 +22,12 @@ config TPM - - config VHOST_USER - bool -- select VHOST - - config VHOST_VDPA - bool -- select VHOST - - config VHOST_KERNEL - bool -- select VHOST - - config VIRTFS - bool -diff --git a/backends/meson.build b/backends/meson.build -index 6e68945528..cb92f639ca 100644 ---- a/backends/meson.build -+++ b/backends/meson.build -@@ -12,9 +12,13 @@ softmmu_ss.add([files( - softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c')) - softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c')) - softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c')) --softmmu_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_VIRTIO'], if_true: files('vhost-user.c')) -+if have_vhost_user -+ softmmu_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) -+endif - softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) --softmmu_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VHOST_CRYPTO'], if_true: files('cryptodev-vhost-user.c')) -+if have_vhost_user_crypto -+ softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) -+endif - softmmu_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus-vmstate.c'), gio]) - softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c')) - -diff --git a/hw/net/meson.build b/hw/net/meson.build -index 685b75badb..ebac261542 100644 ---- a/hw/net/meson.build -+++ b/hw/net/meson.build -@@ -46,8 +46,12 @@ specific_ss.add(when: 'CONFIG_XILINX_ETHLITE', if_true: files('xilinx_ethlite.c' - softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('net_rx_pkt.c')) - specific_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('virtio-net.c')) - --softmmu_ss.add(when: ['CONFIG_VIRTIO_NET', 'CONFIG_VHOST_NET'], if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) --softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) -+if have_vhost_net -+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) -+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) -+else -+ softmmu_ss.add(files('vhost_net-stub.c')) -+endif - - softmmu_ss.add(when: 'CONFIG_ETSEC', if_true: files( - 'fsl_etsec/etsec.c', -diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig -index c144d42f9b..8ca7b3d9d6 100644 ---- a/hw/virtio/Kconfig -+++ b/hw/virtio/Kconfig -@@ -1,6 +1,3 @@ --config VHOST -- bool -- - config VIRTIO - bool - -diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build -index 67dc77e00f..30a832eb4a 100644 ---- a/hw/virtio/meson.build -+++ b/hw/virtio/meson.build -@@ -2,18 +2,22 @@ softmmu_virtio_ss = ss.source_set() - softmmu_virtio_ss.add(files('virtio-bus.c')) - softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) - softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) --softmmu_virtio_ss.add(when: 'CONFIG_VHOST', if_false: files('vhost-stub.c')) -- --softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) --softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) -- --softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) - - virtio_ss = ss.source_set() - virtio_ss.add(files('virtio.c')) --virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) --virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c')) --virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-shadow-virtqueue.c', 'vhost-vdpa.c')) -+ -+if have_vhost -+ virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) -+ if have_vhost_user -+ virtio_ss.add(files('vhost-user.c')) -+ endif -+ if have_vhost_vdpa -+ virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c')) -+ endif -+else -+ softmmu_virtio_ss.add(files('vhost-stub.c')) -+endif -+ - virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) - virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) - virtio_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VIRTIO_PCI'], if_true: files('virtio-crypto-pci.c')) -@@ -53,3 +57,6 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) - virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) - - specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: virtio_ss) -+softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) -+softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) -+softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) -diff --git a/meson.build b/meson.build -index 735f538497..9ba675f098 100644 ---- a/meson.build -+++ b/meson.build -@@ -305,6 +305,7 @@ have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host - have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host - have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host - have_vhost_net = 'CONFIG_VHOST_NET' in config_host -+have_vhost = have_vhost_user or have_vhost_vdpa or have_vhost_kernel - have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host - - # Target-specific libraries and flags -diff --git a/net/meson.build b/net/meson.build -index 847bc2ac85..c965e83b26 100644 ---- a/net/meson.build -+++ b/net/meson.build -@@ -26,10 +26,10 @@ softmmu_ss.add(when: vde, if_true: files('vde.c')) - if have_netmap - softmmu_ss.add(files('netmap.c')) - endif --vhost_user_ss = ss.source_set() --vhost_user_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) --softmmu_ss.add_all(when: 'CONFIG_VHOST_NET_USER', if_true: vhost_user_ss) --softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) -+if have_vhost_net_user -+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) -+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) -+endif - - softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('tap-linux.c')) - softmmu_ss.add(when: 'CONFIG_BSD', if_true: files('tap-bsd.c')) -@@ -40,6 +40,8 @@ if not config_host.has_key('CONFIG_LINUX') and not config_host.has_key('CONFIG_B - endif - softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) - softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) --softmmu_ss.add(when: 'CONFIG_VHOST_NET_VDPA', if_true: files('vhost-vdpa.c')) -+if have_vhost_net_vdpa -+ softmmu_ss.add(files('vhost-vdpa.c')) -+endif - - subdir('can') -diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 67cd32def1..9f550df900 100644 ---- a/tests/qtest/meson.build -+++ b/tests/qtest/meson.build -@@ -269,7 +269,9 @@ qos_test_ss.add( - if have_virtfs - qos_test_ss.add(files('virtio-9p-test.c')) - endif --qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c')) -+if have_vhost_user -+ qos_test_ss.add(files('vhost-user-test.c')) -+endif - if have_tools and have_vhost_user_blk_server - qos_test_ss.add(files('vhost-user-blk-test.c')) - endif --- -2.31.1 - diff --git a/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch b/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch deleted file mode 100644 index 0da63bf..0000000 --- a/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 7c489b54b0bb33445113fbf16e88feb23be68013 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:30 -0300 -Subject: [PATCH 07/18] meson.build: Fix docker-test-build@alpine when - including linux/errqueue.h -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [1/11] f058eb846fcf611d527a1dd3b0cc399cdc17e3ee (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -A build error happens in alpine CI when linux/errqueue.h is included -in io/channel-socket.c, due to redefining of 'struct __kernel_timespec': - -=== -ninja: job failed: [...] -In file included from /usr/include/linux/errqueue.h:6, - from ../io/channel-socket.c:29: -/usr/include/linux/time_types.h:7:8: error: redefinition of 'struct __kernel_timespec' - 7 | struct __kernel_timespec { - | ^~~~~~~~~~~~~~~~~ -In file included from /usr/include/liburing.h:19, - from /builds/user/qemu/include/block/aio.h:18, - from /builds/user/qemu/include/io/channel.h:26, - from /builds/user/qemu/include/io/channel-socket.h:24, - from ../io/channel-socket.c:24: -/usr/include/liburing/compat.h:9:8: note: originally defined here - 9 | struct __kernel_timespec { - | ^~~~~~~~~~~~~~~~~ -ninja: subcommand failed -=== - -As above error message suggests, 'struct __kernel_timespec' was already -defined by liburing/compat.h. - -Fix alpine CI by adding test to disable liburing in configure step if a -redefinition happens between linux/errqueue.h and liburing/compat.h. - -[dgilbert: This has been fixed in Alpine issue 13813 and liburing] - -Signed-off-by: Leonardo Bras -Message-Id: <20220513062836.965425-2-leobras@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 354081d43de44ebd3497fe08f7f0121a5517d528) -Signed-off-by: Leonardo Bras ---- - meson.build | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/meson.build b/meson.build -index 5a7c10e639..13e3323380 100644 ---- a/meson.build -+++ b/meson.build -@@ -471,12 +471,23 @@ if not get_option('linux_aio').auto() or have_block - required: get_option('linux_aio'), - kwargs: static_kwargs) - endif -+ -+linux_io_uring_test = ''' -+ #include -+ #include -+ -+ int main(void) { return 0; }''' -+ - linux_io_uring = not_found - if not get_option('linux_io_uring').auto() or have_block - linux_io_uring = dependency('liburing', version: '>=0.3', - required: get_option('linux_io_uring'), - method: 'pkg-config', kwargs: static_kwargs) -+ if not cc.links(linux_io_uring_test) -+ linux_io_uring = not_found -+ endif - endif -+ - libnfs = not_found - if not get_option('libnfs').auto() or have_block - libnfs = dependency('libnfs', version: '>=1.9.3', --- -2.35.3 - diff --git a/kvm-migration-Add-migrate_use_tls-helper.patch b/kvm-migration-Add-migrate_use_tls-helper.patch deleted file mode 100644 index 0fe0d91..0000000 --- a/kvm-migration-Add-migrate_use_tls-helper.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 828f6c106eedcb7a48e551ffda15af56ff92a899 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:34 -0300 -Subject: [PATCH 11/18] migration: Add migrate_use_tls() helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [5/11] 06e945297c3b9c0ce5864885aafcdba1e5746bc2 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -A lot of places check parameters.tls_creds in order to evaluate if TLS is -in use, and sometimes call migrate_get_current() just for that test. - -Add new helper function migrate_use_tls() in order to simplify testing -for TLS usage. - -Signed-off-by: Leonardo Bras -Reviewed-by: Juan Quintela -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Message-Id: <20220513062836.965425-6-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d2fafb6a6814a8998607d0baf691265032996a0f) -Signed-off-by: Leonardo Bras ---- - migration/channel.c | 3 +-- - migration/migration.c | 9 +++++++++ - migration/migration.h | 1 + - migration/multifd.c | 5 +---- - 4 files changed, 12 insertions(+), 6 deletions(-) - -diff --git a/migration/channel.c b/migration/channel.c -index c4fc000a1a..086b5c0d8b 100644 ---- a/migration/channel.c -+++ b/migration/channel.c -@@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) - trace_migration_set_incoming_channel( - ioc, object_get_typename(OBJECT(ioc))); - -- if (s->parameters.tls_creds && -- *s->parameters.tls_creds && -+ if (migrate_use_tls() && - !object_dynamic_cast(OBJECT(ioc), - TYPE_QIO_CHANNEL_TLS)) { - migration_tls_channel_process_incoming(s, ioc, &local_err); -diff --git a/migration/migration.c b/migration/migration.c -index 0a6b3b9f4d..d91efb66fe 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2582,6 +2582,15 @@ bool migrate_use_zero_copy_send(void) - } - #endif - -+int migrate_use_tls(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.tls_creds && *s->parameters.tls_creds; -+} -+ - int migrate_use_xbzrle(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 5bcb7628ef..c2cabb8a14 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -381,6 +381,7 @@ bool migrate_use_zero_copy_send(void); - #else - #define migrate_use_zero_copy_send() (false) - #endif -+int migrate_use_tls(void); - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); - bool migrate_colo_enabled(void); -diff --git a/migration/multifd.c b/migration/multifd.c -index 76b57a7177..43998ad117 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -784,14 +784,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p, - QIOChannel *ioc, - Error *error) - { -- MigrationState *s = migrate_get_current(); -- - trace_multifd_set_outgoing_channel( - ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); - - if (!error) { -- if (s->parameters.tls_creds && -- *s->parameters.tls_creds && -+ if (migrate_use_tls() && - !object_dynamic_cast(OBJECT(ioc), - TYPE_QIO_CHANNEL_TLS)) { - multifd_tls_channel_connect(p, ioc, &error); --- -2.35.3 - diff --git a/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch b/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch deleted file mode 100644 index 206ac3d..0000000 --- a/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch +++ /dev/null @@ -1,250 +0,0 @@ -From d6500340dc3c1152b5efe04ef3daa50c17a55e30 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:33 -0300 -Subject: [PATCH 10/18] migration: Add zero-copy-send parameter for QMP/HMP for - Linux -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [4/11] 514d98d595992c53ff98de750035e080ded8972e (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Add property that allows zero-copy migration of memory pages -on the sending side, and also includes a helper function -migrate_use_zero_copy_send() to check if it's enabled. - -No code is introduced to actually do the migration, but it allow -future implementations to enable/disable this feature. - -On non-Linux builds this parameter is compiled-out. - -Signed-off-by: Leonardo Bras -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Juan Quintela -Acked-by: Markus Armbruster -Message-Id: <20220513062836.965425-5-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit abb6295b3ace5d17c3a65936913fc346616dbf14) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 32 ++++++++++++++++++++++++++++++++ - migration/migration.h | 5 +++++ - migration/socket.c | 11 +++++++++-- - monitor/hmp-cmds.c | 6 ++++++ - qapi/migration.json | 24 ++++++++++++++++++++++++ - 5 files changed, 76 insertions(+), 2 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 695f0f2900..0a6b3b9f4d 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -899,6 +899,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - params->multifd_zlib_level = s->parameters.multifd_zlib_level; - params->has_multifd_zstd_level = true; - params->multifd_zstd_level = s->parameters.multifd_zstd_level; -+#ifdef CONFIG_LINUX -+ params->has_zero_copy_send = true; -+ params->zero_copy_send = s->parameters.zero_copy_send; -+#endif - params->has_xbzrle_cache_size = true; - params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; - params->has_max_postcopy_bandwidth = true; -@@ -1555,6 +1559,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, - if (params->has_multifd_compression) { - dest->multifd_compression = params->multifd_compression; - } -+#ifdef CONFIG_LINUX -+ if (params->has_zero_copy_send) { -+ dest->zero_copy_send = params->zero_copy_send; -+ } -+#endif - if (params->has_xbzrle_cache_size) { - dest->xbzrle_cache_size = params->xbzrle_cache_size; - } -@@ -1667,6 +1676,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) - if (params->has_multifd_compression) { - s->parameters.multifd_compression = params->multifd_compression; - } -+#ifdef CONFIG_LINUX -+ if (params->has_zero_copy_send) { -+ s->parameters.zero_copy_send = params->zero_copy_send; -+ } -+#endif - if (params->has_xbzrle_cache_size) { - s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; - xbzrle_cache_resize(params->xbzrle_cache_size, errp); -@@ -2557,6 +2571,17 @@ int migrate_multifd_zstd_level(void) - return s->parameters.multifd_zstd_level; - } - -+#ifdef CONFIG_LINUX -+bool migrate_use_zero_copy_send(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.zero_copy_send; -+} -+#endif -+ - int migrate_use_xbzrle(void) - { - MigrationState *s; -@@ -4200,6 +4225,10 @@ static Property migration_properties[] = { - DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, - parameters.multifd_zstd_level, - DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), -+#ifdef CONFIG_LINUX -+ DEFINE_PROP_BOOL("zero_copy_send", MigrationState, -+ parameters.zero_copy_send, false), -+#endif - DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, - parameters.xbzrle_cache_size, - DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -@@ -4297,6 +4326,9 @@ static void migration_instance_init(Object *obj) - params->has_multifd_compression = true; - params->has_multifd_zlib_level = true; - params->has_multifd_zstd_level = true; -+#ifdef CONFIG_LINUX -+ params->has_zero_copy_send = true; -+#endif - params->has_xbzrle_cache_size = true; - params->has_max_postcopy_bandwidth = true; - params->has_max_cpu_throttle = true; -diff --git a/migration/migration.h b/migration/migration.h -index 2de861df01..5bcb7628ef 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -376,6 +376,11 @@ MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); - -+#ifdef CONFIG_LINUX -+bool migrate_use_zero_copy_send(void); -+#else -+#define migrate_use_zero_copy_send() (false) -+#endif - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); - bool migrate_colo_enabled(void); -diff --git a/migration/socket.c b/migration/socket.c -index 05705a32d8..3754d8f72c 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -74,9 +74,16 @@ static void socket_outgoing_migration(QIOTask *task, - - if (qio_task_propagate_error(task, &err)) { - trace_migration_socket_outgoing_error(error_get_pretty(err)); -- } else { -- trace_migration_socket_outgoing_connected(data->hostname); -+ goto out; - } -+ -+ trace_migration_socket_outgoing_connected(data->hostname); -+ -+ if (migrate_use_zero_copy_send()) { -+ error_setg(&err, "Zero copy send not available in migration"); -+ } -+ -+out: - migration_channel_connect(data->s, sioc, data->hostname, err); - object_unref(OBJECT(sioc)); - } -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 634968498b..55b48d3733 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -1309,6 +1309,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - p->has_multifd_zstd_level = true; - visit_type_uint8(v, param, &p->multifd_zstd_level, &err); - break; -+#ifdef CONFIG_LINUX -+ case MIGRATION_PARAMETER_ZERO_COPY_SEND: -+ p->has_zero_copy_send = true; -+ visit_type_bool(v, param, &p->zero_copy_send, &err); -+ break; -+#endif - case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: - p->has_xbzrle_cache_size = true; - if (!visit_type_size(v, param, &cache_size, &err)) { -diff --git a/qapi/migration.json b/qapi/migration.json -index 27d7b28158..4d833ecdd6 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -741,6 +741,13 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # -+# @zero-copy-send: Controls behavior on sending memory pages on migration. -+# When true, enables a zero-copy mechanism for sending -+# memory pages, if host supports it. -+# Requires that QEMU be permitted to use locked memory -+# for guest RAM pages. -+# Defaults to false. (Since 7.1) -+# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -780,6 +787,7 @@ - 'xbzrle-cache-size', 'max-postcopy-bandwidth', - 'max-cpu-throttle', 'multifd-compression', - 'multifd-zlib-level' ,'multifd-zstd-level', -+ { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, - 'block-bitmap-mapping' ] } - - ## -@@ -906,6 +914,13 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # -+# @zero-copy-send: Controls behavior on sending memory pages on migration. -+# When true, enables a zero-copy mechanism for sending -+# memory pages, if host supports it. -+# Requires that QEMU be permitted to use locked memory -+# for guest RAM pages. -+# Defaults to false. (Since 7.1) -+# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -960,6 +975,7 @@ - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'uint8', - '*multifd-zstd-level': 'uint8', -+ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, - '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## -@@ -1106,6 +1122,13 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # -+# @zero-copy-send: Controls behavior on sending memory pages on migration. -+# When true, enables a zero-copy mechanism for sending -+# memory pages, if host supports it. -+# Requires that QEMU be permitted to use locked memory -+# for guest RAM pages. -+# Defaults to false. (Since 7.1) -+# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -1158,6 +1181,7 @@ - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'uint8', - '*multifd-zstd-level': 'uint8', -+ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, - '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## --- -2.35.3 - diff --git a/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch b/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch deleted file mode 100644 index 29dc0ea..0000000 --- a/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch +++ /dev/null @@ -1,98 +0,0 @@ -From fd6f516a94e635bc42e58448f314db575814a834 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Thu, 31 Mar 2022 11:08:45 -0400 -Subject: [PATCH 18/18] migration: Allow migrate-recover to run multiple times -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 104: migration: Allow migrate-recover to run multiple times -RH-Commit: [1/1] afd726e54c069ae800e2d01f34e768d6bac7dcb9 (peterx/qemu-kvm) -RH-Bugzilla: 2096143 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Hanna Reitz -RH-Acked-by: Dr. David Alan Gilbert - -Previously migration didn't have an easy way to cleanup the listening -transport, migrate recovery only allows to execute once. That's done with a -trick flag in postcopy_recover_triggered. - -Now the facility is already there. - -Drop postcopy_recover_triggered and instead allows a new migrate-recover to -release the previous listener transport. - -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Peter Xu -Message-Id: <20220331150857.74406-8-peterx@redhat.com> -Reviewed-by: Daniel P. Berrangé -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 08401c0426bc1a5ce4609afd1cda5dd39abbf9fa) -Signed-off-by: Peter Xu ---- - migration/migration.c | 13 ++----------- - migration/migration.h | 1 - - migration/savevm.c | 3 --- - 3 files changed, 2 insertions(+), 15 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 2a141bfaf3..8fb3eae910 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2166,11 +2166,8 @@ void qmp_migrate_recover(const char *uri, Error **errp) - return; - } - -- if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, -- false, true) == true) { -- error_setg(errp, "Migrate recovery is triggered already"); -- return; -- } -+ /* If there's an existing transport, release it */ -+ migration_incoming_transport_cleanup(mis); - - /* - * Note that this call will never start a real migration; it will -@@ -2178,12 +2175,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) - * to continue using that newly established channel. - */ - qemu_start_incoming_migration(uri, errp); -- -- /* Safe to dereference with the assert above */ -- if (*errp) { -- /* Reset the flag so user could still retry */ -- qatomic_set(&mis->postcopy_recover_triggered, false); -- } - } - - void qmp_migrate_pause(Error **errp) -diff --git a/migration/migration.h b/migration/migration.h -index c2cabb8a14..fbc8690ec8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -139,7 +139,6 @@ struct MigrationIncomingState { - struct PostcopyBlocktimeContext *blocktime_ctx; - - /* notify PAUSED postcopy incoming migrations to try to continue */ -- bool postcopy_recover_triggered; - QemuSemaphore postcopy_pause_sem_dst; - QemuSemaphore postcopy_pause_sem_fault; - -diff --git a/migration/savevm.c b/migration/savevm.c -index 02ed94c180..d9076897b8 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -2589,9 +2589,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) - - assert(migrate_postcopy_ram()); - -- /* Clear the triggered bit to allow one recovery */ -- mis->postcopy_recover_triggered = false; -- - /* - * Unregister yank with either from/to src would work, since ioc behind it - * is the same --- -2.35.3 - diff --git a/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch b/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch deleted file mode 100644 index 9f440eb..0000000 --- a/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 0753565af588dfa78b3529e359b1590e15fcbdb3 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Tue, 19 Jul 2022 09:23:45 -0300 -Subject: [PATCH 04/11] migration: Avoid false-positive on non-supported - scenarios for zero-copy-send -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [4/6] f5c7ed6710d92668acb81d0118a71fab0b4e3d43 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -Migration with zero-copy-send currently has it's limitations, as it can't -be used with TLS nor any kind of compression. In such scenarios, it should -output errors during parameter / capability setting. - -But currently there are some ways of setting this not-supported scenarios -without printing the error message: - -!) For 'compression' capability, it works by enabling it together with -zero-copy-send. This happens because the validity test for zero-copy uses -the helper unction migrate_use_compression(), which check for compression -presence in s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]. - -The point here is: the validity test happens before the capability gets -enabled. If all of them get enabled together, this test will not return -error. - -In order to fix that, replace migrate_use_compression() by directly testing -the cap_list parameter migrate_caps_check(). - -2) For features enabled by parameters such as TLS & 'multifd_compression', -there was also a possibility of setting non-supported scenarios: setting -zero-copy-send first, then setting the unsupported parameter. - -In order to fix that, also add a check for parameters conflicting with -zero-copy-send on migrate_params_check(). - -3) XBZRLE is also a compression capability, so it makes sense to also add -it to the list of capabilities which are not supported with zero-copy-send. - -Fixes: 1abaec9a1b2c ("migration: Change zero_copy_send from migration parameter to migration capability") -Signed-off-by: Leonardo Bras -Message-Id: <20220719122345.253713-1-leobras@redhat.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 90eb69e4f1a16b388d0483543bf6bfc69a9966e4) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 3a3a7a4a50..343629d59c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1265,7 +1265,9 @@ static bool migrate_caps_check(bool *cap_list, - #ifdef CONFIG_LINUX - if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && - (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || -- migrate_use_compression() || -+ cap_list[MIGRATION_CAPABILITY_COMPRESS] || -+ cap_list[MIGRATION_CAPABILITY_XBZRLE] || -+ migrate_multifd_compression() || - migrate_use_tls())) { - error_setg(errp, - "Zero copy only available for non-compressed non-TLS multifd migration"); -@@ -1502,6 +1504,17 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); - return false; - } -+ -+#ifdef CONFIG_LINUX -+ if (migrate_use_zero_copy_send() && -+ ((params->has_multifd_compression && params->multifd_compression) || -+ (params->has_tls_creds && params->tls_creds && *params->tls_creds))) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#endif -+ - return true; - } - --- -2.31.1 - diff --git a/kvm-migration-Change-zero_copy_send-from-migration-param.patch b/kvm-migration-Change-zero_copy_send-from-migration-param.patch deleted file mode 100644 index abeeeb6..0000000 --- a/kvm-migration-Change-zero_copy_send-from-migration-param.patch +++ /dev/null @@ -1,289 +0,0 @@ -From 7e2a037f3f349c21201152cecce32d8c8ff0bea0 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 20 Jun 2022 02:39:45 -0300 -Subject: [PATCH 17/18] migration: Change zero_copy_send from migration - parameter to migration capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [11/11] e4a955607947896a49398ac8400241a0adac51a1 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -When originally implemented, zero_copy_send was designed as a Migration -paramenter. - -But taking into account how is that supposed to work, and how -the difference between a capability and a parameter, it only makes sense -that zero-copy-send would work better as a capability. - -Taking into account how recently the change got merged, it was decided -that it's still time to make it right, and convert zero_copy_send into -a Migration capability. - -Signed-off-by: Leonardo Bras -Reviewed-by: Juan Quintela -Acked-by: Markus Armbruster -Acked-by: Peter Xu -Signed-off-by: Juan Quintela -Signed-off-by: Dr. David Alan Gilbert - dgilbert: always define the capability, even on non-Linux but error if -set; avoids build problems with the capability -(cherry picked from commit 1abaec9a1b2c23f7aa94709a422128d9e42c3e0b) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 58 +++++++++++++++++++------------------------ - monitor/hmp-cmds.c | 6 ----- - qapi/migration.json | 33 +++++++----------------- - 3 files changed, 34 insertions(+), 63 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 102236fba0..2a141bfaf3 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -163,7 +163,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, - MIGRATION_CAPABILITY_COMPRESS, - MIGRATION_CAPABILITY_XBZRLE, - MIGRATION_CAPABILITY_X_COLO, -- MIGRATION_CAPABILITY_VALIDATE_UUID); -+ MIGRATION_CAPABILITY_VALIDATE_UUID, -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND); - - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add -@@ -899,10 +900,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - params->multifd_zlib_level = s->parameters.multifd_zlib_level; - params->has_multifd_zstd_level = true; - params->multifd_zstd_level = s->parameters.multifd_zstd_level; --#ifdef CONFIG_LINUX -- params->has_zero_copy_send = true; -- params->zero_copy_send = s->parameters.zero_copy_send; --#endif - params->has_xbzrle_cache_size = true; - params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; - params->has_max_postcopy_bandwidth = true; -@@ -1263,6 +1260,24 @@ static bool migrate_caps_check(bool *cap_list, - } - } - -+#ifdef CONFIG_LINUX -+ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -+ (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || -+ migrate_use_compression() || -+ migrate_use_tls())) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#else -+ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -+ error_setg(errp, -+ "Zero copy currently only available on Linux"); -+ return false; -+ } -+#endif -+ -+ - /* incoming side only */ - if (runstate_check(RUN_STATE_INMIGRATE) && - !migrate_multifd_is_allowed() && -@@ -1485,16 +1500,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); - return false; - } --#ifdef CONFIG_LINUX -- if (params->zero_copy_send && -- (!migrate_use_multifd() || -- params->multifd_compression != MULTIFD_COMPRESSION_NONE || -- (params->tls_creds && *params->tls_creds))) { -- error_setg(errp, -- "Zero copy only available for non-compressed non-TLS multifd migration"); -- return false; -- } --#endif - return true; - } - -@@ -1568,11 +1573,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, - if (params->has_multifd_compression) { - dest->multifd_compression = params->multifd_compression; - } --#ifdef CONFIG_LINUX -- if (params->has_zero_copy_send) { -- dest->zero_copy_send = params->zero_copy_send; -- } --#endif - if (params->has_xbzrle_cache_size) { - dest->xbzrle_cache_size = params->xbzrle_cache_size; - } -@@ -1685,11 +1685,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) - if (params->has_multifd_compression) { - s->parameters.multifd_compression = params->multifd_compression; - } --#ifdef CONFIG_LINUX -- if (params->has_zero_copy_send) { -- s->parameters.zero_copy_send = params->zero_copy_send; -- } --#endif - if (params->has_xbzrle_cache_size) { - s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; - xbzrle_cache_resize(params->xbzrle_cache_size, errp); -@@ -2587,7 +2582,7 @@ bool migrate_use_zero_copy_send(void) - - s = migrate_get_current(); - -- return s->parameters.zero_copy_send; -+ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } - #endif - -@@ -4243,10 +4238,6 @@ static Property migration_properties[] = { - DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, - parameters.multifd_zstd_level, - DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), --#ifdef CONFIG_LINUX -- DEFINE_PROP_BOOL("zero_copy_send", MigrationState, -- parameters.zero_copy_send, false), --#endif - DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, - parameters.xbzrle_cache_size, - DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -@@ -4284,6 +4275,10 @@ static Property migration_properties[] = { - DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), - DEFINE_PROP_MIG_CAP("x-background-snapshot", - MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), -+#ifdef CONFIG_LINUX -+ DEFINE_PROP_MIG_CAP("x-zero-copy-send", -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND), -+#endif - - DEFINE_PROP_END_OF_LIST(), - }; -@@ -4344,9 +4339,6 @@ static void migration_instance_init(Object *obj) - params->has_multifd_compression = true; - params->has_multifd_zlib_level = true; - params->has_multifd_zstd_level = true; --#ifdef CONFIG_LINUX -- params->has_zero_copy_send = true; --#endif - params->has_xbzrle_cache_size = true; - params->has_max_postcopy_bandwidth = true; - params->has_max_cpu_throttle = true; -diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 55b48d3733..634968498b 100644 ---- a/monitor/hmp-cmds.c -+++ b/monitor/hmp-cmds.c -@@ -1309,12 +1309,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - p->has_multifd_zstd_level = true; - visit_type_uint8(v, param, &p->multifd_zstd_level, &err); - break; --#ifdef CONFIG_LINUX -- case MIGRATION_PARAMETER_ZERO_COPY_SEND: -- p->has_zero_copy_send = true; -- visit_type_bool(v, param, &p->zero_copy_send, &err); -- break; --#endif - case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: - p->has_xbzrle_cache_size = true; - if (!visit_type_size(v, param, &cache_size, &err)) { -diff --git a/qapi/migration.json b/qapi/migration.json -index 4d833ecdd6..5105790cd0 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -463,6 +463,13 @@ - # procedure starts. The VM RAM is saved with running VM. - # (since 6.0) - # -+# @zero-copy-send: Controls behavior on sending memory pages on migration. -+# When true, enables a zero-copy mechanism for sending -+# memory pages, if host supports it. -+# Requires that QEMU be permitted to use locked memory -+# for guest RAM pages. -+# (since 7.1) -+# - # Features: - # @unstable: Members @x-colo and @x-ignore-shared are experimental. - # -@@ -476,7 +483,8 @@ - 'block', 'return-path', 'pause-before-switchover', 'multifd', - 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', - { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, -- 'validate-uuid', 'background-snapshot'] } -+ 'validate-uuid', 'background-snapshot', -+ 'zero-copy-send'] } - - ## - # @MigrationCapabilityStatus: -@@ -741,12 +749,6 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # --# @zero-copy-send: Controls behavior on sending memory pages on migration. --# When true, enables a zero-copy mechanism for sending --# memory pages, if host supports it. --# Requires that QEMU be permitted to use locked memory --# for guest RAM pages. --# Defaults to false. (Since 7.1) - # - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such -@@ -787,7 +789,6 @@ - 'xbzrle-cache-size', 'max-postcopy-bandwidth', - 'max-cpu-throttle', 'multifd-compression', - 'multifd-zlib-level' ,'multifd-zstd-level', -- { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, - 'block-bitmap-mapping' ] } - - ## -@@ -914,13 +915,6 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # --# @zero-copy-send: Controls behavior on sending memory pages on migration. --# When true, enables a zero-copy mechanism for sending --# memory pages, if host supports it. --# Requires that QEMU be permitted to use locked memory --# for guest RAM pages. --# Defaults to false. (Since 7.1) --# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -975,7 +969,6 @@ - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'uint8', - '*multifd-zstd-level': 'uint8', -- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, - '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## -@@ -1122,13 +1115,6 @@ - # will consume more CPU. - # Defaults to 1. (Since 5.0) - # --# @zero-copy-send: Controls behavior on sending memory pages on migration. --# When true, enables a zero-copy mechanism for sending --# memory pages, if host supports it. --# Requires that QEMU be permitted to use locked memory --# for guest RAM pages. --# Defaults to false. (Since 7.1) --# - # @block-bitmap-mapping: Maps block nodes and bitmaps on them to - # aliases for the purpose of dirty bitmap migration. Such - # aliases may for example be the corresponding names on the -@@ -1181,7 +1167,6 @@ - '*multifd-compression': 'MultiFDCompression', - '*multifd-zlib-level': 'uint8', - '*multifd-zstd-level': 'uint8', -- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, - '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } - - ## --- -2.35.3 - diff --git a/kvm-migration-Fix-operator-type.patch b/kvm-migration-Fix-operator-type.patch deleted file mode 100644 index f6a462a..0000000 --- a/kvm-migration-Fix-operator-type.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 4bd48e784ae0c38c89f1a944b06c997fd28c4d37 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Thu, 19 May 2022 04:15:33 -0400 -Subject: [PATCH 16/16] migration: Fix operator type -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 92: Fix build using clang 14 -RH-Commit: [1/1] ad9980e64cf2e39085d68f1ff601444bf2afe228 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 2064530 -RH-Acked-by: Daniel P. Berrangé -RH-Acked-by: Dr. David Alan Gilbert - -Clang spotted an & that should have been an &&; fix it. - -Reported by: David Binderman / https://gitlab.com/dcb -Fixes: 65dacaa04fa ("migration: introduce save_normal_page()") -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/963 -Signed-off-by: Dr. David Alan Gilbert -Message-Id: <20220406102515.96320-1-dgilbert@redhat.com> -Reviewed-by: Peter Maydell -Reviewed-by: Peter Xu -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit f912ec5b2d65644116ff496b58d7c9145c19e4c0) -Signed-off-by: Miroslav Rezanina ---- - migration/ram.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 3532f64ecb..0ef4bd63eb 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1289,7 +1289,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, - offset | RAM_SAVE_FLAG_PAGE)); - if (async) { - qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, -- migrate_release_ram() & -+ migrate_release_ram() && - migration_in_postcopy()); - } else { - qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); --- -2.31.1 - diff --git a/kvm-migration-add-remaining-params-has_-true-in-migratio.patch b/kvm-migration-add-remaining-params-has_-true-in-migratio.patch deleted file mode 100644 index bcaff3b..0000000 --- a/kvm-migration-add-remaining-params-has_-true-in-migratio.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 9698c0e8dd9b4f5dbc237a3f98ac46297dac85fb Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 25 Jul 2022 22:02:35 -0300 -Subject: [PATCH 05/11] migration: add remaining params->has_* = true in - migration_instance_init() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [5/6] 50bbad254e2356b3ae16f6e00a3db8fd0b22dde9 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -Some of params->has_* = true are missing in migration_instance_init, this -causes migrate_params_check() to skip some tests, allowing some -unsupported scenarios. - -Fix this by adding all missing params->has_* = true in -migration_instance_init(). - -Fixes: 69ef1f36b0 ("migration: define 'tls-creds' and 'tls-hostname' migration parameters") -Fixes: 1d58872a91 ("migration: do not wait for free thread") -Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration parameter") -Signed-off-by: Leonardo Bras -Message-Id: <20220726010235.342927-1-leobras@redhat.com> -Reviewed-by: Peter Xu -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit df67aa3e61e2c83459da7d815962d9706f1528fc) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/migration/migration.c b/migration/migration.c -index 343629d59c..5e78028df4 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -4332,6 +4332,7 @@ static void migration_instance_init(Object *obj) - /* Set has_* up only for parameter checks */ - params->has_compress_level = true; - params->has_compress_threads = true; -+ params->has_compress_wait_thread = true; - params->has_decompress_threads = true; - params->has_throttle_trigger_threshold = true; - params->has_cpu_throttle_initial = true; -@@ -4352,6 +4353,9 @@ static void migration_instance_init(Object *obj) - params->has_announce_max = true; - params->has_announce_rounds = true; - params->has_announce_step = true; -+ params->has_tls_creds = true; -+ params->has_tls_hostname = true; -+ params->has_tls_authz = true; - - qemu_sem_init(&ms->postcopy_pause_sem, 0); - qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); --- -2.31.1 - diff --git a/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch b/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch deleted file mode 100644 index d7b1ab3..0000000 --- a/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 78bbe28d5f5691330239041448cccfb339eed779 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Mon, 11 Jul 2022 18:11:13 -0300 -Subject: [PATCH 03/11] migration/multifd: Report to user when zerocopy not - working -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 111: zero-copy-send fixes & improvements -RH-Commit: [3/6] 4f9165325b3cb8ff16d8b3b7649ff780fae0e2ad (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 2107466 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Dr. David Alan Gilbert - -Some errors, like the lack of Scatter-Gather support by the network -interface(NETIF_F_SG) may cause sendmsg(...,MSG_ZEROCOPY) to fail on using -zero-copy, which causes it to fall back to the default copying mechanism. - -After each full dirty-bitmap scan there should be a zero-copy flush -happening, which checks for errors each of the previous calls to -sendmsg(...,MSG_ZEROCOPY). If all of them failed to use zero-copy, then -increment dirty_sync_missed_zero_copy migration stat to let the user know -about it. - -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Acked-by: Peter Xu -Message-Id: <20220711211112.18951-4-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit d59c40cc483729f2e67c80e58df769ad19976fe9) -Signed-off-by: Leonardo Bras ---- - migration/multifd.c | 2 ++ - migration/ram.c | 5 +++++ - migration/ram.h | 2 ++ - 3 files changed, 9 insertions(+) - -diff --git a/migration/multifd.c b/migration/multifd.c -index 0b5b41c53f..96e5f0a058 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -626,6 +626,8 @@ int multifd_send_sync_main(QEMUFile *f) - if (ret < 0) { - error_report_err(err); - return -1; -+ } else if (ret == 1) { -+ dirty_sync_missed_zero_copy(); - } - } - } -diff --git a/migration/ram.c b/migration/ram.c -index ee40e4a718..c437ff1b1f 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -406,6 +406,11 @@ static void ram_transferred_add(uint64_t bytes) - ram_counters.transferred += bytes; - } - -+void dirty_sync_missed_zero_copy(void) -+{ -+ ram_counters.dirty_sync_missed_zero_copy++; -+} -+ - /* used by the search for pages to send */ - struct PageSearchStatus { - /* Current block being searched */ -diff --git a/migration/ram.h b/migration/ram.h -index 2c6dc3675d..34adf5cb92 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -86,4 +86,6 @@ void ram_write_tracking_prepare(void); - int ram_write_tracking_start(void); - void ram_write_tracking_stop(void); - -+void dirty_sync_missed_zero_copy(void); -+ - #endif --- -2.31.1 - diff --git a/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch b/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch deleted file mode 100644 index ea89a9f..0000000 --- a/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 1d280070748b604c60a7be4d4c3c3a28e3964f37 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 2 Aug 2022 10:11:21 +0200 -Subject: [PATCH 31/32] multifd: Copy pages before compressing them with zlib - -RH-Author: Thomas Huth -RH-MergeRequest: 112: Fix postcopy migration on s390x -RH-Commit: [1/2] fd5a0221e22b4563bd1cb7f8a8b95f0bfe8f5fc9 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2099934 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Peter Xu - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 - -zlib_send_prepare() compresses pages of a running VM. zlib does not -make any thread-safety guarantees with respect to changing deflate() -input concurrently with deflate() [1]. - -One can observe problems due to this with the IBM zEnterprise Data -Compression accelerator capable zlib [2]. When the hardware -acceleration is enabled, migration/multifd/tcp/plain/zlib test fails -intermittently [3] due to sliding window corruption. The accelerator's -architecture explicitly discourages concurrent accesses [4]: - - Page 26-57, "Other Conditions": - - As observed by this CPU, other CPUs, and channel - programs, references to the parameter block, first, - second, and third operands may be multiple-access - references, accesses to these storage locations are - not necessarily block-concurrent, and the sequence - of these accesses or references is undefined. - -Mark Adler pointed out that vanilla zlib performs double fetches under -certain circumstances as well [5], therefore we need to copy data -before passing it to deflate(). - -[1] https://zlib.net/manual.html -[2] https://github.com/madler/zlib/pull/410 -[3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html -[4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf -[5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html - -Signed-off-by: Ilya Leoshkevich -Message-Id: <20220705203559.2960949-1-iii@linux.ibm.com> -Reviewed-by: Dr. David Alan Gilbert -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 007e179ef0e97eafda4c9ff2a9d665a1947c7c6d) -Signed-off-by: Thomas Huth ---- - migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++-------- - 1 file changed, 30 insertions(+), 8 deletions(-) - -diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c -index 3a7ae44485..18213a9513 100644 ---- a/migration/multifd-zlib.c -+++ b/migration/multifd-zlib.c -@@ -27,6 +27,8 @@ struct zlib_data { - uint8_t *zbuff; - /* size of compressed buffer */ - uint32_t zbuff_len; -+ /* uncompressed buffer of size qemu_target_page_size() */ -+ uint8_t *buf; - }; - - /* Multifd zlib compression */ -@@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) - { - struct zlib_data *z = g_new0(struct zlib_data, 1); - z_stream *zs = &z->zs; -+ const char *err_msg; - - zs->zalloc = Z_NULL; - zs->zfree = Z_NULL; - zs->opaque = Z_NULL; - if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { -- g_free(z); -- error_setg(errp, "multifd %u: deflate init failed", p->id); -- return -1; -+ err_msg = "deflate init failed"; -+ goto err_free_z; - } - /* This is the maxium size of the compressed buffer */ - z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); - z->zbuff = g_try_malloc(z->zbuff_len); - if (!z->zbuff) { -- deflateEnd(&z->zs); -- g_free(z); -- error_setg(errp, "multifd %u: out of memory for zbuff", p->id); -- return -1; -+ err_msg = "out of memory for zbuff"; -+ goto err_deflate_end; -+ } -+ z->buf = g_try_malloc(qemu_target_page_size()); -+ if (!z->buf) { -+ err_msg = "out of memory for buf"; -+ goto err_free_zbuff; - } - p->data = z; - return 0; -+ -+err_free_zbuff: -+ g_free(z->zbuff); -+err_deflate_end: -+ deflateEnd(&z->zs); -+err_free_z: -+ g_free(z); -+ error_setg(errp, "multifd %u: %s", p->id, err_msg); -+ return -1; - } - - /** -@@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) - deflateEnd(&z->zs); - g_free(z->zbuff); - z->zbuff = NULL; -+ g_free(z->buf); -+ z->buf = NULL; - g_free(p->data); - p->data = NULL; - } -@@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) - flush = Z_SYNC_FLUSH; - } - -+ /* -+ * Since the VM might be running, the page may be changing concurrently -+ * with compression. zlib does not guarantee that this is safe, -+ * therefore copy the page before calling deflate(). -+ */ -+ memcpy(z->buf, p->pages->block->host + p->normal[i], page_size); - zs->avail_in = page_size; -- zs->next_in = p->pages->block->host + p->normal[i]; -+ zs->next_in = z->buf; - - zs->avail_out = available; - zs->next_out = z->zbuff + out_size; --- -2.31.1 - diff --git a/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch b/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch deleted file mode 100644 index c7159e1..0000000 --- a/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch +++ /dev/null @@ -1,182 +0,0 @@ -From c1a2866d158ac67179fa0d17f1710302eb9a3866 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:37 -0300 -Subject: [PATCH 14/18] multifd: Implement zero copy write in multifd migration - (multifd-zero-copy) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [8/11] b93009cc94b2cc4b464b4f68ebfb37b870dd6f7d (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Implement zero copy send on nocomp_send_write(), by making use of QIOChannel -writev + flags & flush interface. - -Change multifd_send_sync_main() so flush_zero_copy() can be called -after each iteration in order to make sure all dirty pages are sent before -a new iteration is started. It will also flush at the beginning and at the -end of migration. - -Also make it return -1 if flush_zero_copy() fails, in order to cancel -the migration process, and avoid resuming the guest in the target host -without receiving all current RAM. - -This will work fine on RAM migration because the RAM pages are not usually freed, -and there is no problem on changing the pages content between writev_zero_copy() and -the actual sending of the buffer, because this change will dirty the page and -cause it to be re-sent on a next iteration anyway. - -A lot of locked memory may be needed in order to use multifd migration -with zero-copy enabled, so disabling the feature should be necessary for -low-privileged users trying to perform multifd migrations. - -Signed-off-by: Leonardo Bras -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Message-Id: <20220513062836.965425-9-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b) -Signed-off-by: Leonardo Bras ---- - migration/migration.c | 11 ++++++++++- - migration/multifd.c | 37 +++++++++++++++++++++++++++++++++++-- - migration/multifd.h | 2 ++ - migration/socket.c | 5 +++-- - 4 files changed, 50 insertions(+), 5 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index d91efb66fe..102236fba0 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1485,7 +1485,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); - return false; - } -- -+#ifdef CONFIG_LINUX -+ if (params->zero_copy_send && -+ (!migrate_use_multifd() || -+ params->multifd_compression != MULTIFD_COMPRESSION_NONE || -+ (params->tls_creds && *params->tls_creds))) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#endif - return true; - } - -diff --git a/migration/multifd.c b/migration/multifd.c -index 8fca6c970e..0b5b41c53f 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -571,6 +571,7 @@ void multifd_save_cleanup(void) - int multifd_send_sync_main(QEMUFile *f) - { - int i; -+ bool flush_zero_copy; - - if (!migrate_use_multifd()) { - return 0; -@@ -581,6 +582,20 @@ int multifd_send_sync_main(QEMUFile *f) - return -1; - } - } -+ -+ /* -+ * When using zero-copy, it's necessary to flush the pages before any of -+ * the pages can be sent again, so we'll make sure the new version of the -+ * pages will always arrive _later_ than the old pages. -+ * -+ * Currently we achieve this by flushing the zero-page requested writes -+ * per ram iteration, but in the future we could potentially optimize it -+ * to be less frequent, e.g. only after we finished one whole scanning of -+ * all the dirty bitmaps. -+ */ -+ -+ flush_zero_copy = migrate_use_zero_copy_send(); -+ - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; - -@@ -602,6 +617,17 @@ int multifd_send_sync_main(QEMUFile *f) - ram_counters.transferred += p->packet_len; - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); -+ -+ if (flush_zero_copy && p->c) { -+ int ret; -+ Error *err = NULL; -+ -+ ret = qio_channel_flush(p->c, &err); -+ if (ret < 0) { -+ error_report_err(err); -+ return -1; -+ } -+ } - } - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; -@@ -686,8 +712,8 @@ static void *multifd_send_thread(void *opaque) - p->iov[0].iov_base = p->packet; - } - -- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, -- &local_err); -+ ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, -+ 0, p->write_flags, &local_err); - if (ret != 0) { - break; - } -@@ -928,6 +954,13 @@ int multifd_save_setup(Error **errp) - /* We need one extra place for the packet header */ - p->iov = g_new0(struct iovec, page_count + 1); - p->normal = g_new0(ram_addr_t, page_count); -+ -+ if (migrate_use_zero_copy_send()) { -+ p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; -+ } else { -+ p->write_flags = 0; -+ } -+ - socket_send_channel_create(multifd_new_send_channel_async, p); - } - -diff --git a/migration/multifd.h b/migration/multifd.h -index cd495195ce..7ec688fb4f 100644 ---- a/migration/multifd.h -+++ b/migration/multifd.h -@@ -96,6 +96,8 @@ typedef struct { - uint32_t packet_len; - /* pointer to the packet */ - MultiFDPacket_t *packet; -+ /* multifd flags for sending ram */ -+ int write_flags; - /* multifd flags for each packet */ - uint32_t flags; - /* size of the next packet that contains pages */ -diff --git a/migration/socket.c b/migration/socket.c -index 3754d8f72c..4fd5e85f50 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task, - - trace_migration_socket_outgoing_connected(data->hostname); - -- if (migrate_use_zero_copy_send()) { -- error_setg(&err, "Zero copy send not available in migration"); -+ if (migrate_use_zero_copy_send() && -+ !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { -+ error_setg(&err, "Zero copy send feature not detected in host kernel"); - } - - out: --- -2.35.3 - diff --git a/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch b/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch deleted file mode 100644 index 415e3a9..0000000 --- a/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 63255c13492f42a3236d96e706e5f8e70bb4e219 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:36 -0300 -Subject: [PATCH 13/18] multifd: Send header packet without flags if - zero-copy-send is enabled -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [7/11] 137eea685e387d3d6aff187ec3fcac05bc16b6e3 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Since d48c3a0445 ("multifd: Use a single writev on the send side"), -sending the header packet and the memory pages happens in the same -writev, which can potentially make the migration faster. - -Using channel-socket as example, this works well with the default copying -mechanism of sendmsg(), but with zero-copy-send=true, it will cause -the migration to often break. - -This happens because the header packet buffer gets reused quite often, -and there is a high chance that by the time the MSG_ZEROCOPY mechanism get -to send the buffer, it has already changed, sending the wrong data and -causing the migration to abort. - -It means that, as it is, the buffer for the header packet is not suitable -for sending with MSG_ZEROCOPY. - -In order to enable zero copy for multifd, send the header packet on an -individual write(), without any flags, and the remanining pages with a -writev(), as it was happening before. This only changes how a migration -with zero-copy-send=true works, not changing any current behavior for -migrations with zero-copy-send=false. - -Signed-off-by: Leonardo Bras -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrangé -Message-Id: <20220513062836.965425-8-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit b7dbdd8e76cd03453c234dbb9578d20969859d74) -Signed-off-by: Leonardo Bras ---- - migration/multifd.c | 22 +++++++++++++++++++--- - 1 file changed, 19 insertions(+), 3 deletions(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index cdb57439a7..8fca6c970e 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -619,6 +619,7 @@ static void *multifd_send_thread(void *opaque) - MultiFDSendParams *p = opaque; - Error *local_err = NULL; - int ret = 0; -+ bool use_zero_copy_send = migrate_use_zero_copy_send(); - - trace_multifd_send_thread_start(p->id); - rcu_register_thread(); -@@ -641,9 +642,14 @@ static void *multifd_send_thread(void *opaque) - if (p->pending_job) { - uint64_t packet_num = p->packet_num; - uint32_t flags = p->flags; -- p->iovs_num = 1; - p->normal_num = 0; - -+ if (use_zero_copy_send) { -+ p->iovs_num = 0; -+ } else { -+ p->iovs_num = 1; -+ } -+ - for (int i = 0; i < p->pages->num; i++) { - p->normal[p->normal_num] = p->pages->offset[i]; - p->normal_num++; -@@ -667,8 +673,18 @@ static void *multifd_send_thread(void *opaque) - trace_multifd_send(p->id, packet_num, p->normal_num, flags, - p->next_packet_size); - -- p->iov[0].iov_len = p->packet_len; -- p->iov[0].iov_base = p->packet; -+ if (use_zero_copy_send) { -+ /* Send header first, without zerocopy */ -+ ret = qio_channel_write_all(p->c, (void *)p->packet, -+ p->packet_len, &local_err); -+ if (ret != 0) { -+ break; -+ } -+ } else { -+ /* Send header using the same writev call */ -+ p->iov[0].iov_len = p->packet_len; -+ p->iov[0].iov_base = p->packet; -+ } - - ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, - &local_err); --- -2.35.3 - diff --git a/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch b/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch deleted file mode 100644 index e6d726a..0000000 --- a/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch +++ /dev/null @@ -1,163 +0,0 @@ -From 4ca5375a936bc87829c6e2b4620f56c73a5efc70 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:35 -0300 -Subject: [PATCH 12/18] multifd: multifd_send_sync_main now returns negative on - error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [6/11] c8ebdee4327d463c74f4b2eeb42d3c964f314c94 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: Peter Xu - -Even though multifd_send_sync_main() currently emits error_reports, it's -callers don't really check it before continuing. - -Change multifd_send_sync_main() to return -1 on error and 0 on success. -Also change all it's callers to make use of this change and possibly fail -earlier. - -(This change is important to next patch on multifd zero copy -implementation, to make it sure an error in zero-copy flush does not go -unnoticed. - -Signed-off-by: Leonardo Bras -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Peter Xu -Message-Id: <20220513062836.965425-7-leobras@redhat.com> -Signed-off-by: Dr. David Alan Gilbert -(cherry picked from commit 33d70973a3a6e8c6b62bcbc64d9e488961981007) -Signed-off-by: Leonardo Bras ---- - migration/multifd.c | 10 ++++++---- - migration/multifd.h | 2 +- - migration/ram.c | 29 ++++++++++++++++++++++------- - 3 files changed, 29 insertions(+), 12 deletions(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index 43998ad117..cdb57439a7 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -568,17 +568,17 @@ void multifd_save_cleanup(void) - multifd_send_state = NULL; - } - --void multifd_send_sync_main(QEMUFile *f) -+int multifd_send_sync_main(QEMUFile *f) - { - int i; - - if (!migrate_use_multifd()) { -- return; -+ return 0; - } - if (multifd_send_state->pages->num) { - if (multifd_send_pages(f) < 0) { - error_report("%s: multifd_send_pages fail", __func__); -- return; -+ return -1; - } - } - for (i = 0; i < migrate_multifd_channels(); i++) { -@@ -591,7 +591,7 @@ void multifd_send_sync_main(QEMUFile *f) - if (p->quit) { - error_report("%s: channel %d has already quit", __func__, i); - qemu_mutex_unlock(&p->mutex); -- return; -+ return -1; - } - - p->packet_num = multifd_send_state->packet_num++; -@@ -610,6 +610,8 @@ void multifd_send_sync_main(QEMUFile *f) - qemu_sem_wait(&p->sem_sync); - } - trace_multifd_send_sync_main(multifd_send_state->packet_num); -+ -+ return 0; - } - - static void *multifd_send_thread(void *opaque) -diff --git a/migration/multifd.h b/migration/multifd.h -index 4dda900a0b..cd495195ce 100644 ---- a/migration/multifd.h -+++ b/migration/multifd.h -@@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp); - bool multifd_recv_all_channels_created(void); - bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); - void multifd_recv_sync_main(void); --void multifd_send_sync_main(QEMUFile *f); -+int multifd_send_sync_main(QEMUFile *f); - int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); - - /* Multifd Compression flags */ -diff --git a/migration/ram.c b/migration/ram.c -index 0ef4bd63eb..fb6db54642 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2903,6 +2903,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - { - RAMState **rsp = opaque; - RAMBlock *block; -+ int ret; - - if (compress_threads_save_setup()) { - return -1; -@@ -2937,7 +2938,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - ram_control_before_iterate(f, RAM_CONTROL_SETUP); - ram_control_after_iterate(f, RAM_CONTROL_SETUP); - -- multifd_send_sync_main(f); -+ ret = multifd_send_sync_main(f); -+ if (ret < 0) { -+ return ret; -+ } -+ - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -3046,7 +3051,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - out: - if (ret >= 0 - && migration_is_setup_or_active(migrate_get_current()->state)) { -- multifd_send_sync_main(rs->f); -+ ret = multifd_send_sync_main(rs->f); -+ if (ret < 0) { -+ return ret; -+ } -+ - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - ram_transferred_add(8); -@@ -3106,13 +3115,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - ram_control_after_iterate(f, RAM_CONTROL_FINISH); - } - -- if (ret >= 0) { -- multifd_send_sync_main(rs->f); -- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -- qemu_fflush(f); -+ if (ret < 0) { -+ return ret; - } - -- return ret; -+ ret = multifd_send_sync_main(rs->f); -+ if (ret < 0) { -+ return ret; -+ } -+ -+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -+ qemu_fflush(f); -+ -+ return 0; - } - - static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, --- -2.35.3 - diff --git a/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch b/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch deleted file mode 100644 index 56abcb1..0000000 --- a/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch +++ /dev/null @@ -1,381 +0,0 @@ -From 4a9ddf42788d3f924bdad7746f7aca615f03d7c1 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 11 May 2022 19:49:24 -0500 -Subject: [PATCH 2/2] nbd/server: Allow MULTI_CONN for shared writable exports -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Blake -RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers -RH-Commit: [2/2] 53f0e885a5ed7f6e4bb14e74fe8e7957e6afe90f (ebblake/centos-qemu-kvm) -RH-Bugzilla: 1708300 -RH-Acked-by: Nir Soffer -RH-Acked-by: Kevin Wolf -RH-Acked-by: Daniel P. Berrangé - -According to the NBD spec, a server that advertises -NBD_FLAG_CAN_MULTI_CONN promises that multiple client connections will -not see any cache inconsistencies: when properly separated by a single -flush, actions performed by one client will be visible to another -client, regardless of which client did the flush. - -We always satisfy these conditions in qemu - even when we support -multiple clients, ALL clients go through a single point of reference -into the block layer, with no local caching. The effect of one client -is instantly visible to the next client. Even if our backend were a -network device, we argue that any multi-path caching effects that -would cause inconsistencies in back-to-back actions not seeing the -effect of previous actions would be a bug in that backend, and not the -fault of caching in qemu. As such, it is safe to unconditionally -advertise CAN_MULTI_CONN for any qemu NBD server situation that -supports parallel clients. - -Note, however, that we don't want to advertise CAN_MULTI_CONN when we -know that a second client cannot connect (for historical reasons, -qemu-nbd defaults to a single connection while nbd-server-add and QMP -commands default to unlimited connections; but we already have -existing means to let either style of NBD server creation alter those -defaults). This is visible by no longer advertising MULTI_CONN for -'qemu-nbd -r' without -e, as in the iotest nbd-qemu-allocation. - -The harder part of this patch is setting up an iotest to demonstrate -behavior of multiple NBD clients to a single server. It might be -possible with parallel qemu-io processes, but I found it easier to do -in python with the help of libnbd, and help from Nir and Vladimir in -writing the test. - -Signed-off-by: Eric Blake -Suggested-by: Nir Soffer -Suggested-by: Vladimir Sementsov-Ogievskiy -Message-Id: <20220512004924.417153-3-eblake@redhat.com> -Signed-off-by: Kevin Wolf - -(cherry picked from commit 58a6fdcc9efb2a7c1ef4893dca4aa5e8020ca3dc) -Conflicts: - nbd/server.c - context, e5fb29d5 not backported -Signed-off-by: Eric Blake ---- - MAINTAINERS | 1 + - blockdev-nbd.c | 5 + - docs/interop/nbd.txt | 1 + - docs/tools/qemu-nbd.rst | 3 +- - include/block/nbd.h | 3 +- - nbd/server.c | 10 +- - qapi/block-export.json | 8 +- - tests/qemu-iotests/tests/nbd-multiconn | 145 ++++++++++++++++++ - tests/qemu-iotests/tests/nbd-multiconn.out | 5 + - .../tests/nbd-qemu-allocation.out | 2 +- - 10 files changed, 172 insertions(+), 11 deletions(-) - create mode 100755 tests/qemu-iotests/tests/nbd-multiconn - create mode 100644 tests/qemu-iotests/tests/nbd-multiconn.out - -diff --git a/MAINTAINERS b/MAINTAINERS -index 4ad2451e03..2fe20a49ab 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -3370,6 +3370,7 @@ F: qemu-nbd.* - F: blockdev-nbd.c - F: docs/interop/nbd.txt - F: docs/tools/qemu-nbd.rst -+F: tests/qemu-iotests/tests/*nbd* - T: git https://repo.or.cz/qemu/ericb.git nbd - T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd - -diff --git a/blockdev-nbd.c b/blockdev-nbd.c -index add41a23af..c6d9b0324c 100644 ---- a/blockdev-nbd.c -+++ b/blockdev-nbd.c -@@ -44,6 +44,11 @@ bool nbd_server_is_running(void) - return nbd_server || qemu_nbd_connections >= 0; - } - -+int nbd_server_max_connections(void) -+{ -+ return nbd_server ? nbd_server->max_connections : qemu_nbd_connections; -+} -+ - static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) - { - nbd_client_put(client); -diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt -index bdb0f2a41a..f5ca25174a 100644 ---- a/docs/interop/nbd.txt -+++ b/docs/interop/nbd.txt -@@ -68,3 +68,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE - * 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports, - NBD_CMD_FLAG_FAST_ZERO - * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" -+* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports -diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst -index 4c950f6199..8e08a29e89 100644 ---- a/docs/tools/qemu-nbd.rst -+++ b/docs/tools/qemu-nbd.rst -@@ -139,8 +139,7 @@ driver options if :option:`--image-opts` is specified. - .. option:: -e, --shared=NUM - - Allow up to *NUM* clients to share the device (default -- ``1``), 0 for unlimited. Safe for readers, but for now, -- consistency is not guaranteed between multiple writers. -+ ``1``), 0 for unlimited. - - .. option:: -t, --persistent - -diff --git a/include/block/nbd.h b/include/block/nbd.h -index c5a29ce1c6..c74b7a9d2e 100644 ---- a/include/block/nbd.h -+++ b/include/block/nbd.h -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2016-2020 Red Hat, Inc. -+ * Copyright (C) 2016-2022 Red Hat, Inc. - * Copyright (C) 2005 Anthony Liguori - * - * Network Block Device -@@ -346,6 +346,7 @@ void nbd_client_put(NBDClient *client); - - void nbd_server_is_qemu_nbd(int max_connections); - bool nbd_server_is_running(void); -+int nbd_server_max_connections(void); - void nbd_server_start(SocketAddress *addr, const char *tls_creds, - const char *tls_authz, uint32_t max_connections, - Error **errp); -diff --git a/nbd/server.c b/nbd/server.c -index c5644fd3f6..6e2157acfa 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1,5 +1,5 @@ - /* -- * Copyright (C) 2016-2021 Red Hat, Inc. -+ * Copyright (C) 2016-2022 Red Hat, Inc. - * Copyright (C) 2005 Anthony Liguori - * - * Network Block Device Server Side -@@ -1642,7 +1642,6 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, - int64_t size; - uint64_t perm, shared_perm; - bool readonly = !exp_args->writable; -- bool shared = !exp_args->writable; - strList *bitmaps; - size_t i; - int ret; -@@ -1693,11 +1692,12 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, - exp->description = g_strdup(arg->description); - exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH | - NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE); -+ -+ if (nbd_server_max_connections() != 1) { -+ exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; -+ } - if (readonly) { - exp->nbdflags |= NBD_FLAG_READ_ONLY; -- if (shared) { -- exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; -- } - } else { - exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES | - NBD_FLAG_SEND_FAST_ZERO); -diff --git a/qapi/block-export.json b/qapi/block-export.json -index 1e34927f85..755ccc89b1 100644 ---- a/qapi/block-export.json -+++ b/qapi/block-export.json -@@ -21,7 +21,9 @@ - # recreated on the fly while the NBD server is active. - # If missing, it will default to denying access (since 4.0). - # @max-connections: The maximum number of connections to allow at the same --# time, 0 for unlimited. (since 5.2; default: 0) -+# time, 0 for unlimited. Setting this to 1 also stops -+# the server from advertising multiple client support -+# (since 5.2; default: 0) - # - # Since: 4.2 - ## -@@ -50,7 +52,9 @@ - # recreated on the fly while the NBD server is active. - # If missing, it will default to denying access (since 4.0). - # @max-connections: The maximum number of connections to allow at the same --# time, 0 for unlimited. (since 5.2; default: 0) -+# time, 0 for unlimited. Setting this to 1 also stops -+# the server from advertising multiple client support -+# (since 5.2; default: 0). - # - # Returns: error if the server is already running. - # -diff --git a/tests/qemu-iotests/tests/nbd-multiconn b/tests/qemu-iotests/tests/nbd-multiconn -new file mode 100755 -index 0000000000..b121f2e363 ---- /dev/null -+++ b/tests/qemu-iotests/tests/nbd-multiconn -@@ -0,0 +1,145 @@ -+#!/usr/bin/env python3 -+# group: rw auto quick -+# -+# Test cases for NBD multi-conn advertisement -+# -+# Copyright (C) 2022 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+import os -+from contextlib import contextmanager -+import iotests -+from iotests import qemu_img_create, qemu_io -+ -+ -+disk = os.path.join(iotests.test_dir, 'disk') -+size = '4M' -+nbd_sock = os.path.join(iotests.sock_dir, 'nbd_sock') -+nbd_uri = 'nbd+unix:///{}?socket=' + nbd_sock -+ -+ -+@contextmanager -+def open_nbd(export_name): -+ h = nbd.NBD() -+ try: -+ h.connect_uri(nbd_uri.format(export_name)) -+ yield h -+ finally: -+ h.shutdown() -+ -+class TestNbdMulticonn(iotests.QMPTestCase): -+ def setUp(self): -+ qemu_img_create('-f', iotests.imgfmt, disk, size) -+ qemu_io('-c', 'w -P 1 0 2M', '-c', 'w -P 2 2M 2M', disk) -+ -+ self.vm = iotests.VM() -+ self.vm.launch() -+ result = self.vm.qmp('blockdev-add', { -+ 'driver': 'qcow2', -+ 'node-name': 'n', -+ 'file': {'driver': 'file', 'filename': disk} -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ def tearDown(self): -+ self.vm.shutdown() -+ os.remove(disk) -+ try: -+ os.remove(nbd_sock) -+ except OSError: -+ pass -+ -+ @contextmanager -+ def run_server(self, max_connections=None): -+ args = { -+ 'addr': { -+ 'type': 'unix', -+ 'data': {'path': nbd_sock} -+ } -+ } -+ if max_connections is not None: -+ args['max-connections'] = max_connections -+ -+ result = self.vm.qmp('nbd-server-start', args) -+ self.assert_qmp(result, 'return', {}) -+ yield -+ -+ result = self.vm.qmp('nbd-server-stop') -+ self.assert_qmp(result, 'return', {}) -+ -+ def add_export(self, name, writable=None): -+ args = { -+ 'type': 'nbd', -+ 'id': name, -+ 'node-name': 'n', -+ 'name': name, -+ } -+ if writable is not None: -+ args['writable'] = writable -+ -+ result = self.vm.qmp('block-export-add', args) -+ self.assert_qmp(result, 'return', {}) -+ -+ def test_default_settings(self): -+ with self.run_server(): -+ self.add_export('r') -+ self.add_export('w', writable=True) -+ with open_nbd('r') as h: -+ self.assertTrue(h.can_multi_conn()) -+ with open_nbd('w') as h: -+ self.assertTrue(h.can_multi_conn()) -+ -+ def test_limited_connections(self): -+ with self.run_server(max_connections=1): -+ self.add_export('r') -+ self.add_export('w', writable=True) -+ with open_nbd('r') as h: -+ self.assertFalse(h.can_multi_conn()) -+ with open_nbd('w') as h: -+ self.assertFalse(h.can_multi_conn()) -+ -+ def test_parallel_writes(self): -+ with self.run_server(): -+ self.add_export('w', writable=True) -+ -+ clients = [nbd.NBD() for _ in range(3)] -+ for c in clients: -+ c.connect_uri(nbd_uri.format('w')) -+ self.assertTrue(c.can_multi_conn()) -+ -+ initial_data = clients[0].pread(1024 * 1024, 0) -+ self.assertEqual(initial_data, b'\x01' * 1024 * 1024) -+ -+ updated_data = b'\x03' * 1024 * 1024 -+ clients[1].pwrite(updated_data, 0) -+ clients[2].flush() -+ current_data = clients[0].pread(1024 * 1024, 0) -+ -+ self.assertEqual(updated_data, current_data) -+ -+ for i in range(3): -+ clients[i].shutdown() -+ -+ -+if __name__ == '__main__': -+ try: -+ # Easier to use libnbd than to try and set up parallel -+ # 'qemu-nbd --list' or 'qemu-io' processes, but not all systems -+ # have libnbd installed. -+ import nbd # type: ignore -+ -+ iotests.main(supported_fmts=['qcow2']) -+ except ImportError: -+ iotests.notrun('libnbd not installed') -diff --git a/tests/qemu-iotests/tests/nbd-multiconn.out b/tests/qemu-iotests/tests/nbd-multiconn.out -new file mode 100644 -index 0000000000..8d7e996700 ---- /dev/null -+++ b/tests/qemu-iotests/tests/nbd-multiconn.out -@@ -0,0 +1,5 @@ -+... -+---------------------------------------------------------------------- -+Ran 3 tests -+ -+OK -diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out -index 0bf1abb063..9d938db24e 100644 ---- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out -+++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out -@@ -17,7 +17,7 @@ wrote 2097152/2097152 bytes at offset 1048576 - exports available: 1 - export: '' - size: 4194304 -- flags: 0x58f ( readonly flush fua df multi cache ) -+ flags: 0x48f ( readonly flush fua df cache ) - min block: 1 - opt block: 4096 - max block: 33554432 --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch b/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch deleted file mode 100644 index 1bb8ea5..0000000 --- a/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 03996a8a826c9186e4a16e1b4757f1ef5947a503 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 5 Aug 2022 11:42:14 +0200 -Subject: [PATCH 07/11] pc-bios/s390-ccw: Fix booting with logical block size < - physical block size - -RH-Author: Thomas Huth -RH-MergeRequest: 113: pc-bios/s390-ccw: Fix booting with logical block size < physical block size -RH-Commit: [1/1] a45ff477bc7d7011ea6c4d42a1aade213d1e4690 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2112303 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Claudio Imbrenda - -For accessing single blocks during boot, it's the logical block size that -matters. (Physical block sizes are rather interesting e.g. for creating -file systems with the correct alignment for speed reasons etc.). -So the s390-ccw bios has to use the logical block size for calculating -sector numbers during the boot phase, the "physical_block_exp" shift -value must not be taken into account. This change fixes the boot process -when the guest hast been installed on a disk where the logical block size -differs from the physical one, e.g. if the guest has been installed -like this: - - qemu-system-s390x -nographic -accel kvm -m 2G \ - -drive if=none,id=d1,file=fedora.iso,format=raw,media=cdrom \ - -device virtio-scsi -device scsi-cd,drive=d1 \ - -drive if=none,id=d2,file=test.qcow2,format=qcow2 - -device virtio-blk,drive=d2,physical_block_size=4096,logical_block_size=512 - -Linux correctly uses the logical block size of 512 for the installation, -but the s390-ccw bios tries to boot from a disk with 4096 block size so -far, as long as this patch has not been applied yet (well, it used to work -by accident in the past due to the virtio_assume_scsi() hack that used to -enforce 512 byte sectors on all virtio-block disks, but that hack has been -well removed in commit 5447de2619050a0a4d to fix other scenarios). - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2112303 -Message-Id: <20220805094214.285223-1-thuth@redhat.com> -Reviewed-by: Cornelia Huck -Reviewed-by: Eric Farman -Signed-off-by: Thomas Huth -(cherry picked from commit 393296de19650e1400ca265914cfdeb313725363) ---- - pc-bios/s390-ccw/virtio-blkdev.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index 8271c47296..794f99b42c 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -173,7 +173,7 @@ int virtio_get_block_size(void) - - switch (vdev->senseid.cu_model) { - case VIRTIO_ID_BLOCK: -- return vdev->config.blk.blk_size << vdev->config.blk.physical_block_exp; -+ return vdev->config.blk.blk_size; - case VIRTIO_ID_SCSI: - return vdev->scsi_block_size; - } --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch b/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch deleted file mode 100644 index b212194..0000000 --- a/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch +++ /dev/null @@ -1,180 +0,0 @@ -From 2e38b4ec5c53b2b98539a70105d3046e1c452ab8 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 13/17] pc-bios/s390-ccw: Split virtio-scsi code from - virtio_blk_setup_device() - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [8/10] f49c5fb77e05c9dc09ed9f037e37f6a461e4bba6 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit cf30b7c4a9b2c64518be8037c2e6670aacdb00b9 -Author: Thomas Huth -Date: Mon Jul 4 13:19:00 2022 +0200 - - pc-bios/s390-ccw: Split virtio-scsi code from virtio_blk_setup_device() - - The next patch is going to add more virtio-block specific code to - virtio_blk_setup_device(), and if the virtio-scsi code is also in - there, this is more cumbersome. And the calling function virtio_setup() - in main.c looks at the device type already anyway, so it's more - logical to separate the virtio-scsi stuff into a new function in - virtio-scsi.c instead. - - Message-Id: <20220704111903.62400-10-thuth@redhat.com> - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/main.c | 24 +++++++++++++++++------- - pc-bios/s390-ccw/virtio-blkdev.c | 20 ++------------------ - pc-bios/s390-ccw/virtio-scsi.c | 19 ++++++++++++++++++- - pc-bios/s390-ccw/virtio-scsi.h | 2 +- - 4 files changed, 38 insertions(+), 27 deletions(-) - -diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c -index 5d2b7ba94d..13e1d8fdf7 100644 ---- a/pc-bios/s390-ccw/main.c -+++ b/pc-bios/s390-ccw/main.c -@@ -14,6 +14,7 @@ - #include "s390-ccw.h" - #include "cio.h" - #include "virtio.h" -+#include "virtio-scsi.h" - #include "dasd-ipl.h" - - char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); -@@ -218,6 +219,7 @@ static int virtio_setup(void) - { - VDev *vdev = virtio_get_device(); - QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; -+ int ret; - - memcpy(&qipl, early_qipl, sizeof(QemuIplParameters)); - -@@ -225,18 +227,26 @@ static int virtio_setup(void) - menu_setup(); - } - -- if (virtio_get_device_type() == VIRTIO_ID_NET) { -+ switch (vdev->senseid.cu_model) { -+ case VIRTIO_ID_NET: - sclp_print("Network boot device detected\n"); - vdev->netboot_start_addr = qipl.netboot_start_addr; -- } else { -- int ret = virtio_blk_setup_device(blk_schid); -- if (ret) { -- return ret; -- } -+ return 0; -+ case VIRTIO_ID_BLOCK: -+ ret = virtio_blk_setup_device(blk_schid); -+ break; -+ case VIRTIO_ID_SCSI: -+ ret = virtio_scsi_setup_device(blk_schid); -+ break; -+ default: -+ panic("\n! No IPL device available !\n"); -+ } -+ -+ if (!ret) { - IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); - } - -- return 0; -+ return ret; - } - - static void ipl_boot_device(void) -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index db1f7f44aa..c175b66a47 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -222,27 +222,11 @@ uint64_t virtio_get_blocks(void) - int virtio_blk_setup_device(SubChannelId schid) - { - VDev *vdev = virtio_get_device(); -- int ret = 0; - - vdev->schid = schid; - virtio_setup_ccw(vdev); - -- switch (vdev->senseid.cu_model) { -- case VIRTIO_ID_BLOCK: -- sclp_print("Using virtio-blk.\n"); -- break; -- case VIRTIO_ID_SCSI: -- IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, -- "Config: sense size mismatch"); -- IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, -- "Config: CDB size mismatch"); -+ sclp_print("Using virtio-blk.\n"); - -- sclp_print("Using virtio-scsi.\n"); -- ret = virtio_scsi_setup(vdev); -- break; -- default: -- panic("\n! No IPL device available !\n"); -- } -- -- return ret; -+ return 0; - } -diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c -index 2c8d0f3097..3b7069270c 100644 ---- a/pc-bios/s390-ccw/virtio-scsi.c -+++ b/pc-bios/s390-ccw/virtio-scsi.c -@@ -329,7 +329,7 @@ static void scsi_parse_capacity_report(void *data, - } - } - --int virtio_scsi_setup(VDev *vdev) -+static int virtio_scsi_setup(VDev *vdev) - { - int retry_test_unit_ready = 3; - uint8_t data[256]; -@@ -430,3 +430,20 @@ int virtio_scsi_setup(VDev *vdev) - - return 0; - } -+ -+int virtio_scsi_setup_device(SubChannelId schid) -+{ -+ VDev *vdev = virtio_get_device(); -+ -+ vdev->schid = schid; -+ virtio_setup_ccw(vdev); -+ -+ IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, -+ "Config: sense size mismatch"); -+ IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, -+ "Config: CDB size mismatch"); -+ -+ sclp_print("Using virtio-scsi.\n"); -+ -+ return virtio_scsi_setup(vdev); -+} -diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h -index 4b14c2c2f9..e6b6cd4815 100644 ---- a/pc-bios/s390-ccw/virtio-scsi.h -+++ b/pc-bios/s390-ccw/virtio-scsi.h -@@ -67,8 +67,8 @@ static inline bool virtio_scsi_response_ok(const VirtioScsiCmdResp *r) - return r->response == VIRTIO_SCSI_S_OK && r->status == CDB_STATUS_GOOD; - } - --int virtio_scsi_setup(VDev *vdev); - int virtio_scsi_read_many(VDev *vdev, - ulong sector, void *load_addr, int sec_num); -+int virtio_scsi_setup_device(SubChannelId schid); - - #endif /* VIRTIO_SCSI_H */ --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch b/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch deleted file mode 100644 index 231a8a0..0000000 --- a/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 64fa56e0520215e3909e442f09d8073c1870648a Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 07/17] pc-bios/s390-ccw/bootmap: Improve the guessing logic in - zipl_load_vblk() - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [2/10] ca8f5e847617cf4ac2fd6c38edb2982f32fa3eba (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 422865f6672ee1482b98d18321b55c1ecfb06c82 -Author: Thomas Huth -Date: Mon Jul 4 13:18:54 2022 +0200 - - pc-bios/s390-ccw/bootmap: Improve the guessing logic in zipl_load_vblk() - - The logic of trying an final ISO or ECKD boot on virtio-block devices is - very weird: Since the geometry hardly ever matches in virtio_disk_is_scsi(), - virtio_blk_setup_device() always sets a "guessed" disk geometry via - virtio_assume_scsi() (which is certainly also wrong in a lot of cases). - - zipl_load_vblk() then sees that there's been a "virtio_guessed_disk_nature" - and tries to fix up the geometry again via virtio_assume_iso9660() before - always trying to do ipl_iso_el_torito(). That's a very brain-twisting - way of attempting to boot from ISO images, which won't work anymore after - the following patches that will clean up the virtio_assume_scsi() mess - (and thus get rid of the "virtio_guessed_disk_nature" here). - - Let's try a better approach instead: ISO files always have a magic - string "CD001" at offset 0x8001 (see e.g. the ECMA-119 specification) - which we can use to decide whether we should try to boot in ISO 9660 - mode (which we should also try if we see a sector size of 2048). - - And if we were not able to boot in ISO mode here, the final boot attempt - before panicking is to boot in ECKD mode. Since this is our last boot - attempt anyway, simply always assume the ECKD geometry here (if the sector - size was not 4096 yet), so that we also do not depend on the guessed disk - geometry from virtio_blk_setup_device() here anymore. - - Message-Id: <20220704111903.62400-4-thuth@redhat.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/bootmap.c | 27 +++++++++++++++++++++++---- - 1 file changed, 23 insertions(+), 4 deletions(-) - -diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c -index 56411ab3b6..994e59c0b0 100644 ---- a/pc-bios/s390-ccw/bootmap.c -+++ b/pc-bios/s390-ccw/bootmap.c -@@ -780,18 +780,37 @@ static void ipl_iso_el_torito(void) - } - } - -+/** -+ * Detect whether we're trying to boot from an .ISO image. -+ * These always have a signature string "CD001" at offset 0x8001. -+ */ -+static bool has_iso_signature(void) -+{ -+ int blksize = virtio_get_block_size(); -+ -+ if (!blksize || virtio_read(0x8000 / blksize, sec)) { -+ return false; -+ } -+ -+ return !memcmp("CD001", &sec[1], 5); -+} -+ - /*********************************************************************** - * Bus specific IPL sequences - */ - - static void zipl_load_vblk(void) - { -- if (virtio_guessed_disk_nature()) { -- virtio_assume_iso9660(); -+ int blksize = virtio_get_block_size(); -+ -+ if (blksize == VIRTIO_ISO_BLOCK_SIZE || has_iso_signature()) { -+ if (blksize != VIRTIO_ISO_BLOCK_SIZE) { -+ virtio_assume_iso9660(); -+ } -+ ipl_iso_el_torito(); - } -- ipl_iso_el_torito(); - -- if (virtio_guessed_disk_nature()) { -+ if (blksize != VIRTIO_DASD_DEFAULT_BLOCK_SIZE) { - sclp_print("Using guessed DASD geometry.\n"); - virtio_assume_eckd(); - } --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch b/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch deleted file mode 100644 index 00601aa..0000000 --- a/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 56674ee1f25f12978a6a8a1390e11b55b3e0fabe Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 15/17] pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings - about GNU extensions - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [10/10] 037dab4df23ebb2b42871bca8c842a53a7204b50 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit e2269220acb03e6c6a460c3090d804835e202239 -Author: Thomas Huth -Date: Mon Jul 4 13:19:03 2022 +0200 - - pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings about GNU extensions - - When compiling the s390-ccw bios with Clang (v14.0), there is currently - an unuseful warning like this: - - CC pc-bios/s390-ccw/ipv6.o - ../../roms/SLOF/lib/libnet/ipv6.c:447:18: warning: variable length array - folded to constant array as an extension [-Wgnu-folding-constant] - unsigned short raw[ip6size]; - ^ - - SLOF is currently GCC-only and cannot be compiled with Clang yet, so - it is expected that such extensions sneak in there - and as long as - we don't want to compile the code with a compiler that is neither GCC - or Clang, it is also not necessary to avoid such extensions. - - Thus these GNU-extension related warnings are completely useless in - the s390-ccw bios, especially in the code that is coming from SLOF, - so we should simply disable the related warnings here now. - - Message-Id: <20220704111903.62400-13-thuth@redhat.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/netboot.mak | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak -index 68b4d7edcb..ad41898cb6 100644 ---- a/pc-bios/s390-ccw/netboot.mak -+++ b/pc-bios/s390-ccw/netboot.mak -@@ -16,9 +16,12 @@ s390-netboot.elf: $(NETOBJS) libnet.a libc.a - s390-netboot.img: s390-netboot.elf - $(call quiet-command,$(STRIP) --strip-unneeded $< -o $@,"STRIP","$(TARGET_DIR)$@") - -+# SLOF is GCC-only, so ignore warnings about GNU extensions with Clang here -+NO_GNU_WARN := $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-gnu) -+ - # libc files: - --LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ -+LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ - -MMD -MP -MT $@ -MF $(@:%.o=%.d) - - CTYPE_OBJS = isdigit.o isxdigit.o toupper.o -@@ -52,7 +55,7 @@ libc.a: $(LIBCOBJS) - - LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o bootp.o \ - dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o --LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ -+LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ - -DDHCPARCH=0x1F -MMD -MP -MT $@ -MF $(@:%.o=%.d) - - %.o : $(SLOF_DIR)/lib/libnet/%.c --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch b/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch deleted file mode 100644 index 5e4b689..0000000 --- a/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 430e76fd964390db86c8486f76b916a1cf7f74c2 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 12/17] pc-bios/s390-ccw/virtio: Beautify the code for reading - virtqueue configuration - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [7/10] b15c06b4c5431837672b6cb5d57d09da20718441 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 070824885741f5d2a66626d3c4ecb2773c8e0552 -Author: Thomas Huth -Date: Mon Jul 4 13:18:59 2022 +0200 - - pc-bios/s390-ccw/virtio: Beautify the code for reading virtqueue configuration - - It looks nicer if we separate the run_ccw() from the IPL_assert() - statement, and the error message should talk about "virtio device" - instead of "block device", since this code is nowadays used for - non-block (i.e. network) devices, too. - - Message-Id: <20220704111903.62400-9-thuth@redhat.com> - Reviewed-by: Cornelia Huck - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c -index d8c2b52710..f37510f312 100644 ---- a/pc-bios/s390-ccw/virtio.c -+++ b/pc-bios/s390-ccw/virtio.c -@@ -289,9 +289,8 @@ void virtio_setup_ccw(VDev *vdev) - .num = 0, - }; - -- IPL_assert( -- run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0, -- "Could not get block device VQ configuration"); -+ rc = run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false); -+ IPL_assert(rc == 0, "Could not get virtio device VQ configuration"); - info.num = config.num; - vring_init(&vdev->vrings[i], &info); - vdev->vrings[i].schid = vdev->schid; --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch b/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch deleted file mode 100644 index 04ab605..0000000 --- a/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 7d4f2454f95bfc087ad3f2fe3bc4625dcea3568e Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 06/17] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD - block size - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [1/10] 71033934e1e9988bcf71362e02665ceb7449009d (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 1f2c2ee48e87ea743f8e23cc7569dd26c4cf9623 -Author: Thomas Huth -Date: Mon Jul 4 13:18:53 2022 +0200 - - pc-bios/s390-ccw/virtio: Introduce a macro for the DASD block size - - Use VIRTIO_DASD_DEFAULT_BLOCK_SIZE instead of the magic value 4096. - - Message-Id: <20220704111903.62400-3-thuth@redhat.com> - Reviewed-by: Eric Farman - Reviewed-by: Cornelia Huck - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio-blkdev.c | 2 +- - pc-bios/s390-ccw/virtio.h | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index 7d35050292..6483307630 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -155,7 +155,7 @@ void virtio_assume_eckd(void) - vdev->config.blk.physical_block_exp = 0; - switch (vdev->senseid.cu_model) { - case VIRTIO_ID_BLOCK: -- vdev->config.blk.blk_size = 4096; -+ vdev->config.blk.blk_size = VIRTIO_DASD_DEFAULT_BLOCK_SIZE; - break; - case VIRTIO_ID_SCSI: - vdev->config.blk.blk_size = vdev->scsi_block_size; -diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h -index 19fceb6495..9e410bde6f 100644 ---- a/pc-bios/s390-ccw/virtio.h -+++ b/pc-bios/s390-ccw/virtio.h -@@ -198,6 +198,7 @@ extern int virtio_read_many(ulong sector, void *load_addr, int sec_num); - #define VIRTIO_SECTOR_SIZE 512 - #define VIRTIO_ISO_BLOCK_SIZE 2048 - #define VIRTIO_SCSI_BLOCK_SIZE 512 -+#define VIRTIO_DASD_DEFAULT_BLOCK_SIZE 4096 - - static inline ulong virtio_sector_adjust(ulong sector) - { --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch b/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch deleted file mode 100644 index 41ae538..0000000 --- a/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 20f8724d0837acbe642c8c7698a4b256f34c1209 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 11/17] pc-bios/s390-ccw/virtio: Read device config after - feature negotiation - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [6/10] 54d21e430b2dfba9e0a0823d6bb8ec7e7f8ff2ff (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit aa5c69ce99411c4886bcd051f288afc02b6d968d -Author: Thomas Huth -Date: Mon Jul 4 13:18:58 2022 +0200 - - pc-bios/s390-ccw/virtio: Read device config after feature negotiation - - Feature negotiation should be done first, since some fields in the - config area can depend on the negotiated features and thus should - rather be read afterwards. - - While we're at it, also adjust the error message here a little bit - (the code is nowadays used for non-block virtio devices, too). - - Message-Id: <20220704111903.62400-8-thuth@redhat.com> - Reviewed-by: Eric Farman - Reviewed-by: Cornelia Huck - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio.c | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c -index 4e85a2eb82..d8c2b52710 100644 ---- a/pc-bios/s390-ccw/virtio.c -+++ b/pc-bios/s390-ccw/virtio.c -@@ -262,10 +262,6 @@ void virtio_setup_ccw(VDev *vdev) - rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); - IPL_assert(rc == 0, "Could not write DRIVER status to host"); - -- IPL_assert( -- run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, -- "Could not get block device configuration"); -- - /* Feature negotiation */ - for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) { - feats.features = 0; -@@ -278,6 +274,9 @@ void virtio_setup_ccw(VDev *vdev) - IPL_assert(rc == 0, "Could not set features bits"); - } - -+ rc = run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false); -+ IPL_assert(rc == 0, "Could not get virtio device configuration"); -+ - for (i = 0; i < vdev->nr_vqs; i++) { - VqInfo info = { - .queue = (unsigned long long) ring_area + (i * VIRTIO_RING_SIZE), --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch b/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch deleted file mode 100644 index e976047..0000000 --- a/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 303fb3ddcdbbd1373c5b1aa28e03f90507e217f3 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 10/17] pc-bios/s390-ccw/virtio: Set missing status bits while - initializing - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [5/10] 4bc44d9adae055fb60b79d04a2f08535b4d38d2b (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 175aa06a152ef6b58ba9b2e47a1296b024dea70c -Author: Thomas Huth -Date: Mon Jul 4 13:18:57 2022 +0200 - - pc-bios/s390-ccw/virtio: Set missing status bits while initializing - - According chapter "3.1.1 Driver Requirements: Device Initialization" - of the Virtio specification (v1.1), a driver for a device has to set - the ACKNOWLEDGE and DRIVER bits in the status field after resetting - the device. The s390-ccw bios skipped these steps so far and seems - like QEMU never cared. Anyway, it's better to follow the spec, so - let's set these bits now in the right spots, too. - - Message-Id: <20220704111903.62400-7-thuth@redhat.com> - Acked-by: Christian Borntraeger - Reviewed-by: Cornelia Huck - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio.c | 18 ++++++++++++++---- - 1 file changed, 14 insertions(+), 4 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c -index 5d2c6e3381..4e85a2eb82 100644 ---- a/pc-bios/s390-ccw/virtio.c -+++ b/pc-bios/s390-ccw/virtio.c -@@ -220,7 +220,7 @@ int virtio_run(VDev *vdev, int vqid, VirtioCmd *cmd) - void virtio_setup_ccw(VDev *vdev) - { - int i, rc, cfg_size = 0; -- unsigned char status = VIRTIO_CONFIG_S_DRIVER_OK; -+ uint8_t status; - struct VirtioFeatureDesc { - uint32_t features; - uint8_t index; -@@ -234,6 +234,10 @@ void virtio_setup_ccw(VDev *vdev) - - run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false); - -+ status = VIRTIO_CONFIG_S_ACKNOWLEDGE; -+ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); -+ IPL_assert(rc == 0, "Could not write ACKNOWLEDGE status to host"); -+ - switch (vdev->senseid.cu_model) { - case VIRTIO_ID_NET: - vdev->nr_vqs = 2; -@@ -253,6 +257,11 @@ void virtio_setup_ccw(VDev *vdev) - default: - panic("Unsupported virtio device\n"); - } -+ -+ status |= VIRTIO_CONFIG_S_DRIVER; -+ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); -+ IPL_assert(rc == 0, "Could not write DRIVER status to host"); -+ - IPL_assert( - run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, - "Could not get block device configuration"); -@@ -291,9 +300,10 @@ void virtio_setup_ccw(VDev *vdev) - run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0, - "Cannot set VQ info"); - } -- IPL_assert( -- run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0, -- "Could not write status to host"); -+ -+ status |= VIRTIO_CONFIG_S_DRIVER_OK; -+ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); -+ IPL_assert(rc == 0, "Could not write DRIVER_OK status to host"); - } - - bool virtio_is_supported(SubChannelId schid) --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch deleted file mode 100644 index 109b98e..0000000 --- a/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch +++ /dev/null @@ -1,101 +0,0 @@ -From d3335a98a7b6e084aadf4907968536a67cf8e64c Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 09/17] pc-bios/s390-ccw/virtio-blkdev: Remove - virtio_assume_scsi() - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [4/10] bf27f75344f220a03475a2918ed49ec9cd5ba317 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 5447de2619050a0a4dd480b97f88a9b58da360d1 -Author: Thomas Huth -Date: Mon Jul 4 13:18:56 2022 +0200 - - pc-bios/s390-ccw/virtio-blkdev: Remove virtio_assume_scsi() - - The virtio_assume_scsi() function is very questionable: First, it - is only called for virtio-blk, and not for virtio-scsi, so the naming - is already quite confusing. Second, it is called if we detected a - "invalid" IPL disk, trying to fix it by blindly setting a sector - size of 512. This of course won't work in most cases since disks - might have a different sector size for a reason. - - Thus let's remove this strange function now. The calling code can - also be removed completely, since there is another spot in main.c - that does "IPL_assert(virtio_ipl_disk_is_valid(), ...)" to make - sure that we do not try to IPL from an invalid device. - - Message-Id: <20220704111903.62400-6-thuth@redhat.com> - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio-blkdev.c | 24 ------------------------ - pc-bios/s390-ccw/virtio.h | 1 - - 2 files changed, 25 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index 7e13155589..db1f7f44aa 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -112,23 +112,6 @@ VirtioGDN virtio_guessed_disk_nature(void) - return virtio_get_device()->guessed_disk_nature; - } - --void virtio_assume_scsi(void) --{ -- VDev *vdev = virtio_get_device(); -- -- switch (vdev->senseid.cu_model) { -- case VIRTIO_ID_BLOCK: -- vdev->guessed_disk_nature = VIRTIO_GDN_SCSI; -- vdev->config.blk.blk_size = VIRTIO_SCSI_BLOCK_SIZE; -- vdev->config.blk.physical_block_exp = 0; -- vdev->blk_factor = 1; -- break; -- case VIRTIO_ID_SCSI: -- vdev->scsi_block_size = VIRTIO_SCSI_BLOCK_SIZE; -- break; -- } --} -- - void virtio_assume_iso9660(void) - { - VDev *vdev = virtio_get_device(); -@@ -247,13 +230,6 @@ int virtio_blk_setup_device(SubChannelId schid) - switch (vdev->senseid.cu_model) { - case VIRTIO_ID_BLOCK: - sclp_print("Using virtio-blk.\n"); -- if (!virtio_ipl_disk_is_valid()) { -- /* make sure all getters but blocksize return 0 for -- * invalid IPL disk -- */ -- memset(&vdev->config.blk, 0, sizeof(vdev->config.blk)); -- virtio_assume_scsi(); -- } - break; - case VIRTIO_ID_SCSI: - IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, -diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h -index 241730effe..600ba5052b 100644 ---- a/pc-bios/s390-ccw/virtio.h -+++ b/pc-bios/s390-ccw/virtio.h -@@ -182,7 +182,6 @@ enum guessed_disk_nature_type { - typedef enum guessed_disk_nature_type VirtioGDN; - - VirtioGDN virtio_guessed_disk_nature(void); --void virtio_assume_scsi(void); - void virtio_assume_eckd(void); - void virtio_assume_iso9660(void); - --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch deleted file mode 100644 index 8bc7a11..0000000 --- a/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch +++ /dev/null @@ -1,63 +0,0 @@ -From db58915fcaf3d24b64fe2c34cc15b5596b9a81bb Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 14/17] pc-bios/s390-ccw/virtio-blkdev: Request the right - feature bits - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [9/10] 9dcd8c2f659f366f9487ab6473d1f0d7778b40a7 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit 9125a314cca4a1838b09305a87d8efb98f80ab67 -Author: Thomas Huth -Date: Mon Jul 4 13:19:01 2022 +0200 - - pc-bios/s390-ccw/virtio-blkdev: Request the right feature bits - - The virtio-blk code uses the block size and geometry fields in the - config area. According to the virtio-spec, these have to be negotiated - with the right feature bits during initialization, otherwise they - might not be available. QEMU is so far very forgiving and always - provides them, but we should not rely on this behavior, so let's - better request them properly via the VIRTIO_BLK_F_GEOMETRY and - VIRTIO_BLK_F_BLK_SIZE feature bits. - - Message-Id: <20220704111903.62400-11-thuth@redhat.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio-blkdev.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index c175b66a47..8271c47296 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -13,6 +13,9 @@ - #include "virtio.h" - #include "virtio-scsi.h" - -+#define VIRTIO_BLK_F_GEOMETRY (1 << 4) -+#define VIRTIO_BLK_F_BLK_SIZE (1 << 6) -+ - static int virtio_blk_read_many(VDev *vdev, ulong sector, void *load_addr, - int sec_num) - { -@@ -223,6 +226,7 @@ int virtio_blk_setup_device(SubChannelId schid) - { - VDev *vdev = virtio_get_device(); - -+ vdev->guest_features[0] = VIRTIO_BLK_F_GEOMETRY | VIRTIO_BLK_F_BLK_SIZE; - vdev->schid = schid; - virtio_setup_ccw(vdev); - --- -2.31.1 - diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch deleted file mode 100644 index 818e515..0000000 --- a/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch +++ /dev/null @@ -1,124 +0,0 @@ -From f07e4629a7c58407f903810a038660c88c6a6315 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 08/17] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix - virtio_ipl_disk_is_valid() - -RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [3/10] fb06830a3e50d9da3d84913b50bb227865cc44b3 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cornelia Huck - -Bugzilla: http://bugzilla.redhat.com/2098077 - -commit bbf615f7b707f009ef8e757d170902ad33b90644 -Author: Thomas Huth -Date: Mon Jul 4 13:18:55 2022 +0200 - - pc-bios/s390-ccw/virtio-blkdev: Simplify/fix virtio_ipl_disk_is_valid() - - The s390-ccw bios fails to boot if the boot disk is a virtio-blk - disk with a sector size of 4096. For example: - - dasdfmt -b 4096 -d cdl -y -p -M quick /dev/dasdX - fdasd -a /dev/dasdX - install a guest onto /dev/dasdX1 using virtio-blk - qemu-system-s390x -nographic -hda /dev/dasdX1 - - The bios then bails out with: - - ! Cannot read block 0 ! - - Looking at virtio_ipl_disk_is_valid() and especially the function - virtio_disk_is_scsi(), it does not really make sense that we expect - only such a limited disk geometry (like a block size of 512) for - our boot disks. Let's relax the check and allow everything that - remotely looks like a sane disk. - - Message-Id: <20220704111903.62400-5-thuth@redhat.com> - Reviewed-by: Eric Farman - Signed-off-by: Thomas Huth - -Signed-off-by: Thomas Huth ---- - pc-bios/s390-ccw/virtio-blkdev.c | 41 ++++++-------------------------- - pc-bios/s390-ccw/virtio.h | 2 -- - 2 files changed, 7 insertions(+), 36 deletions(-) - -diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c -index 6483307630..7e13155589 100644 ---- a/pc-bios/s390-ccw/virtio-blkdev.c -+++ b/pc-bios/s390-ccw/virtio-blkdev.c -@@ -166,46 +166,19 @@ void virtio_assume_eckd(void) - virtio_eckd_sectors_for_block_size(vdev->config.blk.blk_size); - } - --bool virtio_disk_is_scsi(void) --{ -- VDev *vdev = virtio_get_device(); -- -- if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI) { -- return true; -- } -- switch (vdev->senseid.cu_model) { -- case VIRTIO_ID_BLOCK: -- return (vdev->config.blk.geometry.heads == 255) -- && (vdev->config.blk.geometry.sectors == 63) -- && (virtio_get_block_size() == VIRTIO_SCSI_BLOCK_SIZE); -- case VIRTIO_ID_SCSI: -- return true; -- } -- return false; --} -- --bool virtio_disk_is_eckd(void) -+bool virtio_ipl_disk_is_valid(void) - { -+ int blksize = virtio_get_block_size(); - VDev *vdev = virtio_get_device(); -- const int block_size = virtio_get_block_size(); - -- if (vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { -+ if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI || -+ vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { - return true; - } -- switch (vdev->senseid.cu_model) { -- case VIRTIO_ID_BLOCK: -- return (vdev->config.blk.geometry.heads == 15) -- && (vdev->config.blk.geometry.sectors == -- virtio_eckd_sectors_for_block_size(block_size)); -- case VIRTIO_ID_SCSI: -- return false; -- } -- return false; --} - --bool virtio_ipl_disk_is_valid(void) --{ -- return virtio_disk_is_scsi() || virtio_disk_is_eckd(); -+ return (vdev->senseid.cu_model == VIRTIO_ID_BLOCK || -+ vdev->senseid.cu_model == VIRTIO_ID_SCSI) && -+ blksize >= 512 && blksize <= 4096; - } - - int virtio_get_block_size(void) -diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h -index 9e410bde6f..241730effe 100644 ---- a/pc-bios/s390-ccw/virtio.h -+++ b/pc-bios/s390-ccw/virtio.h -@@ -186,8 +186,6 @@ void virtio_assume_scsi(void); - void virtio_assume_eckd(void); - void virtio_assume_iso9660(void); - --extern bool virtio_disk_is_scsi(void); --extern bool virtio_disk_is_eckd(void); - extern bool virtio_ipl_disk_is_valid(void); - extern int virtio_get_block_size(void); - extern uint8_t virtio_get_heads(void); --- -2.31.1 - diff --git a/kvm-qapi-machine.json-Add-cluster-id.patch b/kvm-qapi-machine.json-Add-cluster-id.patch deleted file mode 100644 index 2b2a22a..0000000 --- a/kvm-qapi-machine.json-Add-cluster-id.patch +++ /dev/null @@ -1,126 +0,0 @@ -From e97c563f7146098119839aa146a6f25070eb7148 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:02 +0800 -Subject: [PATCH 01/16] qapi/machine.json: Add cluster-id - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [1/6] 44d7d83008c6d28485ae44f7cced792f4987b919 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -This adds cluster-id in CPU instance properties, which will be used -by arm/virt machine. Besides, the cluster-id is also verified or -dumped in various spots: - - * hw/core/machine.c::machine_set_cpu_numa_node() to associate - CPU with its NUMA node. - - * hw/core/machine.c::machine_numa_finish_cpu_init() to record - CPU slots with no NUMA mapping set. - - * hw/core/machine-hmp-cmds.c::hmp_hotpluggable_cpus() to dump - cluster-id. - -Signed-off-by: Gavin Shan -Reviewed-by: Yanan Wang -Acked-by: Igor Mammedov -Message-id: 20220503140304.855514-2-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 1dcf7001d4bae651129d46d5628b29e93a411d0b) -Signed-off-by: Gavin Shan ---- - hw/core/machine-hmp-cmds.c | 4 ++++ - hw/core/machine.c | 16 ++++++++++++++++ - qapi/machine.json | 6 ++++-- - 3 files changed, 24 insertions(+), 2 deletions(-) - -diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c -index 4e2f319aeb..5cb5eecbfc 100644 ---- a/hw/core/machine-hmp-cmds.c -+++ b/hw/core/machine-hmp-cmds.c -@@ -77,6 +77,10 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) - if (c->has_die_id) { - monitor_printf(mon, " die-id: \"%" PRIu64 "\"\n", c->die_id); - } -+ if (c->has_cluster_id) { -+ monitor_printf(mon, " cluster-id: \"%" PRIu64 "\"\n", -+ c->cluster_id); -+ } - if (c->has_core_id) { - monitor_printf(mon, " core-id: \"%" PRIu64 "\"\n", c->core_id); - } -diff --git a/hw/core/machine.c b/hw/core/machine.c -index dffc3ef4ab..168f4de910 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -890,6 +890,11 @@ void machine_set_cpu_numa_node(MachineState *machine, - return; - } - -+ if (props->has_cluster_id && !slot->props.has_cluster_id) { -+ error_setg(errp, "cluster-id is not supported"); -+ return; -+ } -+ - if (props->has_socket_id && !slot->props.has_socket_id) { - error_setg(errp, "socket-id is not supported"); - return; -@@ -909,6 +914,11 @@ void machine_set_cpu_numa_node(MachineState *machine, - continue; - } - -+ if (props->has_cluster_id && -+ props->cluster_id != slot->props.cluster_id) { -+ continue; -+ } -+ - if (props->has_die_id && props->die_id != slot->props.die_id) { - continue; - } -@@ -1203,6 +1213,12 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) - } - g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id); - } -+ if (cpu->props.has_cluster_id) { -+ if (s->len) { -+ g_string_append_printf(s, ", "); -+ } -+ g_string_append_printf(s, "cluster-id: %"PRId64, cpu->props.cluster_id); -+ } - if (cpu->props.has_core_id) { - if (s->len) { - g_string_append_printf(s, ", "); -diff --git a/qapi/machine.json b/qapi/machine.json -index d25a481ce4..4c417e32a5 100644 ---- a/qapi/machine.json -+++ b/qapi/machine.json -@@ -868,10 +868,11 @@ - # @node-id: NUMA node ID the CPU belongs to - # @socket-id: socket number within node/board the CPU belongs to - # @die-id: die number within socket the CPU belongs to (since 4.1) --# @core-id: core number within die the CPU belongs to -+# @cluster-id: cluster number within die the CPU belongs to (since 7.1) -+# @core-id: core number within cluster the CPU belongs to - # @thread-id: thread number within core the CPU belongs to - # --# Note: currently there are 5 properties that could be present -+# Note: currently there are 6 properties that could be present - # but management should be prepared to pass through other - # properties with device_add command to allow for future - # interface extension. This also requires the filed names to be kept in -@@ -883,6 +884,7 @@ - 'data': { '*node-id': 'int', - '*socket-id': 'int', - '*die-id': 'int', -+ '*cluster-id': 'int', - '*core-id': 'int', - '*thread-id': 'int' - } --- -2.31.1 - diff --git a/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch deleted file mode 100644 index 9010d3d..0000000 --- a/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 5e385a0e49a520550a83299632be175857b63f19 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Tue, 5 Apr 2022 15:46:52 +0200 -Subject: [PATCH 06/16] qcow2: Add errp to rebuild_refcount_structure() - -RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [3/4] 937b89a7eab6ec6b18618d59bc1526976ad03290 (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -Instead of fprint()-ing error messages in rebuild_refcount_structure() -and its rebuild_refcounts_write_refblocks() helper, pass them through an -Error object to qcow2_check_refcounts() (which will then print it). - -Suggested-by: Eric Blake -Signed-off-by: Hanna Reitz -Message-Id: <20220405134652.19278-4-hreitz@redhat.com> -Reviewed-by: Eric Blake -(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a) -Signed-off-by: Hanna Reitz ---- - block/qcow2-refcount.c | 33 +++++++++++++++++++-------------- - 1 file changed, 19 insertions(+), 14 deletions(-) - -diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c -index c5669eaa51..ed0ecfaa89 100644 ---- a/block/qcow2-refcount.c -+++ b/block/qcow2-refcount.c -@@ -2465,7 +2465,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, - static int rebuild_refcounts_write_refblocks( - BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, - int64_t first_cluster, int64_t end_cluster, -- uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr -+ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr, -+ Error **errp - ) - { - BDRVQcow2State *s = bs->opaque; -@@ -2516,8 +2517,8 @@ static int rebuild_refcounts_write_refblocks( - nb_clusters, - &first_free_cluster); - if (refblock_offset < 0) { -- fprintf(stderr, "ERROR allocating refblock: %s\n", -- strerror(-refblock_offset)); -+ error_setg_errno(errp, -refblock_offset, -+ "ERROR allocating refblock"); - return refblock_offset; - } - -@@ -2539,6 +2540,7 @@ static int rebuild_refcounts_write_refblocks( - on_disk_reftable_entries * - REFTABLE_ENTRY_SIZE); - if (!on_disk_reftable) { -+ error_setg(errp, "ERROR allocating reftable memory"); - return -ENOMEM; - } - -@@ -2562,7 +2564,7 @@ static int rebuild_refcounts_write_refblocks( - ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, - s->cluster_size, false); - if (ret < 0) { -- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR writing refblock"); - return ret; - } - -@@ -2578,7 +2580,7 @@ static int rebuild_refcounts_write_refblocks( - ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, - s->cluster_size); - if (ret < 0) { -- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR writing refblock"); - return ret; - } - -@@ -2601,7 +2603,8 @@ static int rebuild_refcounts_write_refblocks( - static int rebuild_refcount_structure(BlockDriverState *bs, - BdrvCheckResult *res, - void **refcount_table, -- int64_t *nb_clusters) -+ int64_t *nb_clusters, -+ Error **errp) - { - BDRVQcow2State *s = bs->opaque; - int64_t reftable_offset = -1; -@@ -2652,7 +2655,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, - 0, *nb_clusters, - &on_disk_reftable, -- &on_disk_reftable_entries); -+ &on_disk_reftable_entries, errp); - if (reftable_size_changed < 0) { - res->check_errors++; - ret = reftable_size_changed; -@@ -2676,8 +2679,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - refcount_table, nb_clusters, - &first_free_cluster); - if (reftable_offset < 0) { -- fprintf(stderr, "ERROR allocating reftable: %s\n", -- strerror(-reftable_offset)); -+ error_setg_errno(errp, -reftable_offset, -+ "ERROR allocating reftable"); - res->check_errors++; - ret = reftable_offset; - goto fail; -@@ -2695,7 +2698,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - reftable_start_cluster, - reftable_end_cluster, - &on_disk_reftable, -- &on_disk_reftable_entries); -+ &on_disk_reftable_entries, errp); - if (reftable_size_changed < 0) { - res->check_errors++; - ret = reftable_size_changed; -@@ -2725,7 +2728,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, - false); - if (ret < 0) { -- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR writing reftable"); - goto fail; - } - -@@ -2733,7 +2736,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, - reftable_length); - if (ret < 0) { -- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR writing reftable"); - goto fail; - } - -@@ -2746,7 +2749,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, - &reftable_offset_and_clusters, - sizeof(reftable_offset_and_clusters)); - if (ret < 0) { -- fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); -+ error_setg_errno(errp, -ret, "ERROR setting reftable"); - goto fail; - } - -@@ -2814,11 +2817,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, - if (rebuild && (fix & BDRV_FIX_ERRORS)) { - BdrvCheckResult old_res = *res; - int fresh_leaks = 0; -+ Error *local_err = NULL; - - fprintf(stderr, "Rebuilding refcount structure\n"); - ret = rebuild_refcount_structure(bs, res, &refcount_table, -- &nb_clusters); -+ &nb_clusters, &local_err); - if (ret < 0) { -+ error_report_err(local_err); - goto fail; - } - --- -2.31.1 - diff --git a/kvm-qcow2-Improve-refcount-structure-rebuilding.patch b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch deleted file mode 100644 index cdc92b8..0000000 --- a/kvm-qcow2-Improve-refcount-structure-rebuilding.patch +++ /dev/null @@ -1,465 +0,0 @@ -From b453cf6be8429f4438d51eb24fcf49e7d9f14db6 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Tue, 5 Apr 2022 15:46:50 +0200 -Subject: [PATCH 04/16] qcow2: Improve refcount structure rebuilding - -RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [1/4] a3606b7abcaebb4930b566e95b1090aead62dfae (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi - -When rebuilding the refcount structures (when qemu-img check -r found -errors with refcount = 0, but reference count > 0), the new refcount -table defaults to being put at the image file end[1]. There is no good -reason for that except that it means we will not have to rewrite any -refblocks we already wrote to disk. - -Changing the code to rewrite those refblocks is not too difficult, -though, so let us do that. That is beneficial for images on block -devices, where we cannot really write beyond the end of the image file. - -Use this opportunity to add extensive comments to the code, and refactor -it a bit, getting rid of the backwards-jumping goto. - -[1] Unless there is something allocated in the area pointed to by the - last refblock, so we have to write that refblock. In that case, we - try to put the reftable in there. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071 -Closes: https://gitlab.com/qemu-project/qemu/-/issues/941 -Reviewed-by: Eric Blake -Signed-off-by: Hanna Reitz -Message-Id: <20220405134652.19278-2-hreitz@redhat.com> -(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e) -Signed-off-by: Hanna Reitz ---- - block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------ - 1 file changed, 235 insertions(+), 97 deletions(-) - -diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c -index b91499410c..c5669eaa51 100644 ---- a/block/qcow2-refcount.c -+++ b/block/qcow2-refcount.c -@@ -2438,111 +2438,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, - } - - /* -- * Creates a new refcount structure based solely on the in-memory information -- * given through *refcount_table. All necessary allocations will be reflected -- * in that array. -+ * Helper function for rebuild_refcount_structure(). - * -- * On success, the old refcount structure is leaked (it will be covered by the -- * new refcount structure). -+ * Scan the range of clusters [first_cluster, end_cluster) for allocated -+ * clusters and write all corresponding refblocks to disk. The refblock -+ * and allocation data is taken from the in-memory refcount table -+ * *refcount_table[] (of size *nb_clusters), which is basically one big -+ * (unlimited size) refblock for the whole image. -+ * -+ * For these refblocks, clusters are allocated using said in-memory -+ * refcount table. Care is taken that these allocations are reflected -+ * in the refblocks written to disk. -+ * -+ * The refblocks' offsets are written into a reftable, which is -+ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If -+ * that reftable is of insufficient size, it will be resized to fit. -+ * This reftable is not written to disk. -+ * -+ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed -+ * to point to existing valid refblocks that do not need to be allocated -+ * again.) -+ * -+ * Return whether the on-disk reftable array was resized (true/false), -+ * or -errno on error. - */ --static int rebuild_refcount_structure(BlockDriverState *bs, -- BdrvCheckResult *res, -- void **refcount_table, -- int64_t *nb_clusters) -+static int rebuild_refcounts_write_refblocks( -+ BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, -+ int64_t first_cluster, int64_t end_cluster, -+ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr -+ ) - { - BDRVQcow2State *s = bs->opaque; -- int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; -+ int64_t cluster; - int64_t refblock_offset, refblock_start, refblock_index; -- uint32_t reftable_size = 0; -- uint64_t *on_disk_reftable = NULL; -+ int64_t first_free_cluster = 0; -+ uint64_t *on_disk_reftable = *on_disk_reftable_ptr; -+ uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr; - void *on_disk_refblock; -- int ret = 0; -- struct { -- uint64_t reftable_offset; -- uint32_t reftable_clusters; -- } QEMU_PACKED reftable_offset_and_clusters; -- -- qcow2_cache_empty(bs, s->refcount_block_cache); -+ bool reftable_grown = false; -+ int ret; - --write_refblocks: -- for (; cluster < *nb_clusters; cluster++) { -+ for (cluster = first_cluster; cluster < end_cluster; cluster++) { -+ /* Check all clusters to find refblocks that contain non-zero entries */ - if (!s->get_refcount(*refcount_table, cluster)) { - continue; - } - -+ /* -+ * This cluster is allocated, so we need to create a refblock -+ * for it. The data we will write to disk is just the -+ * respective slice from *refcount_table, so it will contain -+ * accurate refcounts for all clusters belonging to this -+ * refblock. After we have written it, we will therefore skip -+ * all remaining clusters in this refblock. -+ */ -+ - refblock_index = cluster >> s->refcount_block_bits; - refblock_start = refblock_index << s->refcount_block_bits; - -- /* Don't allocate a cluster in a refblock already written to disk */ -- if (first_free_cluster < refblock_start) { -- first_free_cluster = refblock_start; -- } -- refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, -- nb_clusters, &first_free_cluster); -- if (refblock_offset < 0) { -- fprintf(stderr, "ERROR allocating refblock: %s\n", -- strerror(-refblock_offset)); -- res->check_errors++; -- ret = refblock_offset; -- goto fail; -- } -+ if (on_disk_reftable_entries > refblock_index && -+ on_disk_reftable[refblock_index]) -+ { -+ /* -+ * We can get here after a `goto write_refblocks`: We have a -+ * reftable from a previous run, and the refblock is already -+ * allocated. No need to allocate it again. -+ */ -+ refblock_offset = on_disk_reftable[refblock_index]; -+ } else { -+ int64_t refblock_cluster_index; - -- if (reftable_size <= refblock_index) { -- uint32_t old_reftable_size = reftable_size; -- uint64_t *new_on_disk_reftable; -+ /* Don't allocate a cluster in a refblock already written to disk */ -+ if (first_free_cluster < refblock_start) { -+ first_free_cluster = refblock_start; -+ } -+ refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, -+ nb_clusters, -+ &first_free_cluster); -+ if (refblock_offset < 0) { -+ fprintf(stderr, "ERROR allocating refblock: %s\n", -+ strerror(-refblock_offset)); -+ return refblock_offset; -+ } - -- reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, -- s->cluster_size) / REFTABLE_ENTRY_SIZE; -- new_on_disk_reftable = g_try_realloc(on_disk_reftable, -- reftable_size * -- REFTABLE_ENTRY_SIZE); -- if (!new_on_disk_reftable) { -- res->check_errors++; -- ret = -ENOMEM; -- goto fail; -+ refblock_cluster_index = refblock_offset / s->cluster_size; -+ if (refblock_cluster_index >= end_cluster) { -+ /* -+ * We must write the refblock that holds this refblock's -+ * refcount -+ */ -+ end_cluster = refblock_cluster_index + 1; - } -- on_disk_reftable = new_on_disk_reftable; - -- memset(on_disk_reftable + old_reftable_size, 0, -- (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); -+ if (on_disk_reftable_entries <= refblock_index) { -+ on_disk_reftable_entries = -+ ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, -+ s->cluster_size) / REFTABLE_ENTRY_SIZE; -+ on_disk_reftable = -+ g_try_realloc(on_disk_reftable, -+ on_disk_reftable_entries * -+ REFTABLE_ENTRY_SIZE); -+ if (!on_disk_reftable) { -+ return -ENOMEM; -+ } - -- /* The offset we have for the reftable is now no longer valid; -- * this will leak that range, but we can easily fix that by running -- * a leak-fixing check after this rebuild operation */ -- reftable_offset = -1; -- } else { -- assert(on_disk_reftable); -- } -- on_disk_reftable[refblock_index] = refblock_offset; -+ memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0, -+ (on_disk_reftable_entries - -+ *on_disk_reftable_entries_ptr) * -+ REFTABLE_ENTRY_SIZE); - -- /* If this is apparently the last refblock (for now), try to squeeze the -- * reftable in */ -- if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && -- reftable_offset < 0) -- { -- uint64_t reftable_clusters = size_to_clusters(s, reftable_size * -- REFTABLE_ENTRY_SIZE); -- reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, -- refcount_table, nb_clusters, -- &first_free_cluster); -- if (reftable_offset < 0) { -- fprintf(stderr, "ERROR allocating reftable: %s\n", -- strerror(-reftable_offset)); -- res->check_errors++; -- ret = reftable_offset; -- goto fail; -+ *on_disk_reftable_ptr = on_disk_reftable; -+ *on_disk_reftable_entries_ptr = on_disk_reftable_entries; -+ -+ reftable_grown = true; -+ } else { -+ assert(on_disk_reftable); - } -+ on_disk_reftable[refblock_index] = refblock_offset; - } - -+ /* Refblock is allocated, write it to disk */ -+ - ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, - s->cluster_size, false); - if (ret < 0) { - fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); -- goto fail; -+ return ret; - } - -- /* The size of *refcount_table is always cluster-aligned, therefore the -- * write operation will not overflow */ -+ /* -+ * The refblock is simply a slice of *refcount_table. -+ * Note that the size of *refcount_table is always aligned to -+ * whole clusters, so the write operation will not result in -+ * out-of-bounds accesses. -+ */ - on_disk_refblock = (void *)((char *) *refcount_table + - refblock_index * s->cluster_size); - -@@ -2550,23 +2579,99 @@ write_refblocks: - s->cluster_size); - if (ret < 0) { - fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); -- goto fail; -+ return ret; - } - -- /* Go to the end of this refblock */ -+ /* This refblock is done, skip to its end */ - cluster = refblock_start + s->refcount_block_size - 1; - } - -- if (reftable_offset < 0) { -- uint64_t post_refblock_start, reftable_clusters; -+ return reftable_grown; -+} -+ -+/* -+ * Creates a new refcount structure based solely on the in-memory information -+ * given through *refcount_table (this in-memory information is basically just -+ * the concatenation of all refblocks). All necessary allocations will be -+ * reflected in that array. -+ * -+ * On success, the old refcount structure is leaked (it will be covered by the -+ * new refcount structure). -+ */ -+static int rebuild_refcount_structure(BlockDriverState *bs, -+ BdrvCheckResult *res, -+ void **refcount_table, -+ int64_t *nb_clusters) -+{ -+ BDRVQcow2State *s = bs->opaque; -+ int64_t reftable_offset = -1; -+ int64_t reftable_length = 0; -+ int64_t reftable_clusters; -+ int64_t refblock_index; -+ uint32_t on_disk_reftable_entries = 0; -+ uint64_t *on_disk_reftable = NULL; -+ int ret = 0; -+ int reftable_size_changed = 0; -+ struct { -+ uint64_t reftable_offset; -+ uint32_t reftable_clusters; -+ } QEMU_PACKED reftable_offset_and_clusters; -+ -+ qcow2_cache_empty(bs, s->refcount_block_cache); -+ -+ /* -+ * For each refblock containing entries, we try to allocate a -+ * cluster (in the in-memory refcount table) and write its offset -+ * into on_disk_reftable[]. We then write the whole refblock to -+ * disk (as a slice of the in-memory refcount table). -+ * This is done by rebuild_refcounts_write_refblocks(). -+ * -+ * Once we have scanned all clusters, we try to find space for the -+ * reftable. This will dirty the in-memory refcount table (i.e. -+ * make it differ from the refblocks we have already written), so we -+ * need to run rebuild_refcounts_write_refblocks() again for the -+ * range of clusters where the reftable has been allocated. -+ * -+ * This second run might make the reftable grow again, in which case -+ * we will need to allocate another space for it, which is why we -+ * repeat all this until the reftable stops growing. -+ * -+ * (This loop will terminate, because with every cluster the -+ * reftable grows, it can accomodate a multitude of more refcounts, -+ * so that at some point this must be able to cover the reftable -+ * and all refblocks describing it.) -+ * -+ * We then convert the reftable to big-endian and write it to disk. -+ * -+ * Note that we never free any reftable allocations. Doing so would -+ * needlessly complicate the algorithm: The eventual second check -+ * run we do will clean up all leaks we have caused. -+ */ -+ -+ reftable_size_changed = -+ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, -+ 0, *nb_clusters, -+ &on_disk_reftable, -+ &on_disk_reftable_entries); -+ if (reftable_size_changed < 0) { -+ res->check_errors++; -+ ret = reftable_size_changed; -+ goto fail; -+ } -+ -+ /* -+ * There was no reftable before, so rebuild_refcounts_write_refblocks() -+ * must have increased its size (from 0 to something). -+ */ -+ assert(reftable_size_changed); -+ -+ do { -+ int64_t reftable_start_cluster, reftable_end_cluster; -+ int64_t first_free_cluster = 0; -+ -+ reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE; -+ reftable_clusters = size_to_clusters(s, reftable_length); - -- post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); -- reftable_clusters = -- size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); -- /* Not pretty but simple */ -- if (first_free_cluster < post_refblock_start) { -- first_free_cluster = post_refblock_start; -- } - reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, - refcount_table, nb_clusters, - &first_free_cluster); -@@ -2578,24 +2683,55 @@ write_refblocks: - goto fail; - } - -- goto write_refblocks; -- } -+ /* -+ * We need to update the affected refblocks, so re-run the -+ * write_refblocks loop for the reftable's range of clusters. -+ */ -+ assert(offset_into_cluster(s, reftable_offset) == 0); -+ reftable_start_cluster = reftable_offset / s->cluster_size; -+ reftable_end_cluster = reftable_start_cluster + reftable_clusters; -+ reftable_size_changed = -+ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, -+ reftable_start_cluster, -+ reftable_end_cluster, -+ &on_disk_reftable, -+ &on_disk_reftable_entries); -+ if (reftable_size_changed < 0) { -+ res->check_errors++; -+ ret = reftable_size_changed; -+ goto fail; -+ } -+ -+ /* -+ * If the reftable size has changed, we will need to find a new -+ * allocation, repeating the loop. -+ */ -+ } while (reftable_size_changed); - -- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { -+ /* The above loop must have run at least once */ -+ assert(reftable_offset >= 0); -+ -+ /* -+ * All allocations are done, all refblocks are written, convert the -+ * reftable to big-endian and write it to disk. -+ */ -+ -+ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; -+ refblock_index++) -+ { - cpu_to_be64s(&on_disk_reftable[refblock_index]); - } - -- ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, -- reftable_size * REFTABLE_ENTRY_SIZE, -+ ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, - false); - if (ret < 0) { - fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); - goto fail; - } - -- assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); -+ assert(reftable_length < INT_MAX); - ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, -- reftable_size * REFTABLE_ENTRY_SIZE); -+ reftable_length); - if (ret < 0) { - fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); - goto fail; -@@ -2604,7 +2740,7 @@ write_refblocks: - /* Enter new reftable into the image header */ - reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); - reftable_offset_and_clusters.reftable_clusters = -- cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); -+ cpu_to_be32(reftable_clusters); - ret = bdrv_pwrite_sync(bs->file, - offsetof(QCowHeader, refcount_table_offset), - &reftable_offset_and_clusters, -@@ -2614,12 +2750,14 @@ write_refblocks: - goto fail; - } - -- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { -+ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; -+ refblock_index++) -+ { - be64_to_cpus(&on_disk_reftable[refblock_index]); - } - s->refcount_table = on_disk_reftable; - s->refcount_table_offset = reftable_offset; -- s->refcount_table_size = reftable_size; -+ s->refcount_table_size = on_disk_reftable_entries; - update_max_refcount_table_index(s); - - return 0; --- -2.31.1 - diff --git a/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch b/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch deleted file mode 100644 index 9acff58..0000000 --- a/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch +++ /dev/null @@ -1,92 +0,0 @@ -From e6aae1d0368a152924c38775e517f4e83c1d898b Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Wed, 11 May 2022 19:49:23 -0500 -Subject: [PATCH 1/2] qemu-nbd: Pass max connections to blockdev layer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Blake -RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers -RH-Commit: [1/2] b0e33fd125bf3523b8b9a4dead3c8bb2342bfd4e (ebblake/centos-qemu-kvm) -RH-Bugzilla: 1708300 -RH-Acked-by: Nir Soffer -RH-Acked-by: Kevin Wolf -RH-Acked-by: Daniel P. Berrangé - -The next patch wants to adjust whether the NBD server code advertises -MULTI_CONN based on whether it is known if the server limits to -exactly one client. For a server started by QMP, this information is -obtained through nbd_server_start (which can support more than one -export); but for qemu-nbd (which supports exactly one export), it is -controlled only by the command-line option -e/--shared. Since we -already have a hook function used by qemu-nbd, it's easiest to just -alter its signature to fit our needs. - -Signed-off-by: Eric Blake -Message-Id: <20220512004924.417153-2-eblake@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit a5fced40212ed73c715ca298a2929dd4d99c9999) -Signed-off-by: Eric Blake ---- - blockdev-nbd.c | 8 ++++---- - include/block/nbd.h | 2 +- - qemu-nbd.c | 2 +- - 3 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/blockdev-nbd.c b/blockdev-nbd.c -index 9840d25a82..add41a23af 100644 ---- a/blockdev-nbd.c -+++ b/blockdev-nbd.c -@@ -30,18 +30,18 @@ typedef struct NBDServerData { - } NBDServerData; - - static NBDServerData *nbd_server; --static bool is_qemu_nbd; -+static int qemu_nbd_connections = -1; /* Non-negative if this is qemu-nbd */ - - static void nbd_update_server_watch(NBDServerData *s); - --void nbd_server_is_qemu_nbd(bool value) -+void nbd_server_is_qemu_nbd(int max_connections) - { -- is_qemu_nbd = value; -+ qemu_nbd_connections = max_connections; - } - - bool nbd_server_is_running(void) - { -- return nbd_server || is_qemu_nbd; -+ return nbd_server || qemu_nbd_connections >= 0; - } - - static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) -diff --git a/include/block/nbd.h b/include/block/nbd.h -index a98eb665da..c5a29ce1c6 100644 ---- a/include/block/nbd.h -+++ b/include/block/nbd.h -@@ -344,7 +344,7 @@ void nbd_client_new(QIOChannelSocket *sioc, - void nbd_client_get(NBDClient *client); - void nbd_client_put(NBDClient *client); - --void nbd_server_is_qemu_nbd(bool value); -+void nbd_server_is_qemu_nbd(int max_connections); - bool nbd_server_is_running(void); - void nbd_server_start(SocketAddress *addr, const char *tls_creds, - const char *tls_authz, uint32_t max_connections, -diff --git a/qemu-nbd.c b/qemu-nbd.c -index 713e7557a9..8c25ae93df 100644 ---- a/qemu-nbd.c -+++ b/qemu-nbd.c -@@ -1087,7 +1087,7 @@ int main(int argc, char **argv) - - bs->detect_zeroes = detect_zeroes; - -- nbd_server_is_qemu_nbd(true); -+ nbd_server_is_qemu_nbd(shared); - - export_opts = g_new(BlockExportOptions, 1); - *export_opts = (BlockExportOptions) { --- -2.31.1 - diff --git a/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch b/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch deleted file mode 100644 index 9c2ac99..0000000 --- a/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch +++ /dev/null @@ -1,100 +0,0 @@ -From a039ed652e6d2f5edcef9d5d1d3baec17ce7f929 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 04/16] qtest/numa-test: Correct CPU and NUMA association in - aarch64_numa_cpu() - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [4/6] 64e9908a179eb4fb586d662f70f275a81808e50c (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -In aarch64_numa_cpu(), the CPU and NUMA association is something -like below. Two threads in the same core/cluster/socket are -associated with two individual NUMA nodes, which is unreal as -Igor Mammedov mentioned. We don't expect the association to break -NUMA-to-socket boundary, which matches with the real world. - -NUMA-node socket cluster core thread ------------------------------------------- -0 0 0 0 0 -1 0 0 0 1 - -This corrects the topology for CPUs and their association with -NUMA nodes. After this patch is applied, the CPU and NUMA -association becomes something like below, which looks real. -Besides, socket/cluster/core/thread IDs are all checked when -the NUMA node IDs are verified. It helps to check if the CPU -topology is properly populated or not. - -NUMA-node socket cluster core thread ------------------------------------------- -0 1 0 0 0 -1 0 0 0 0 - -Suggested-by: Igor Mammedov -Signed-off-by: Gavin Shan -Acked-by: Igor Mammedov -Message-id: 20220503140304.855514-5-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit e280ecb39bc1629f74ea5479d464fd1608dc8f76) -Signed-off-by: Gavin Shan ---- - tests/qtest/numa-test.c | 18 ++++++++++++------ - 1 file changed, 12 insertions(+), 6 deletions(-) - -diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c -index aeda8c774c..32e35daaae 100644 ---- a/tests/qtest/numa-test.c -+++ b/tests/qtest/numa-test.c -@@ -224,17 +224,17 @@ static void aarch64_numa_cpu(const void *data) - g_autofree char *cli = NULL; - - cli = make_cli(data, "-machine " -- "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " -+ "smp.cpus=2,smp.sockets=2,smp.clusters=1,smp.cores=1,smp.threads=1 " - "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " -- "-numa cpu,node-id=1,thread-id=0 " -- "-numa cpu,node-id=0,thread-id=1"); -+ "-numa cpu,node-id=0,socket-id=1,cluster-id=0,core-id=0,thread-id=0 " -+ "-numa cpu,node-id=1,socket-id=0,cluster-id=0,core-id=0,thread-id=0"); - qts = qtest_init(cli); - cpus = get_cpus(qts, &resp); - g_assert(cpus); - - while ((e = qlist_pop(cpus))) { - QDict *cpu, *props; -- int64_t thread, node; -+ int64_t socket, cluster, core, thread, node; - - cpu = qobject_to(QDict, e); - g_assert(qdict_haskey(cpu, "props")); -@@ -242,12 +242,18 @@ static void aarch64_numa_cpu(const void *data) - - g_assert(qdict_haskey(props, "node-id")); - node = qdict_get_int(props, "node-id"); -+ g_assert(qdict_haskey(props, "socket-id")); -+ socket = qdict_get_int(props, "socket-id"); -+ g_assert(qdict_haskey(props, "cluster-id")); -+ cluster = qdict_get_int(props, "cluster-id"); -+ g_assert(qdict_haskey(props, "core-id")); -+ core = qdict_get_int(props, "core-id"); - g_assert(qdict_haskey(props, "thread-id")); - thread = qdict_get_int(props, "thread-id"); - -- if (thread == 0) { -+ if (socket == 0 && cluster == 0 && core == 0 && thread == 0) { - g_assert_cmpint(node, ==, 1); -- } else if (thread == 1) { -+ } else if (socket == 1 && cluster == 0 && core == 0 && thread == 0) { - g_assert_cmpint(node, ==, 0); - } else { - g_assert(false); --- -2.31.1 - diff --git a/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch b/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch deleted file mode 100644 index a87abc0..0000000 --- a/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 66f3928b40991d8467a3da086688f73d061886c8 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 11 May 2022 18:01:35 +0800 -Subject: [PATCH 02/16] qtest/numa-test: Specify CPU topology in - aarch64_numa_cpu() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gavin Shan -RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology -RH-Commit: [2/6] b851e7ad59e057825392ddf75e9040cc102a0385 (gwshan/qemu-rhel-9) -RH-Bugzilla: 2041823 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 - -The CPU topology isn't enabled on arm/virt machine yet, but we're -going to do it in next patch. After the CPU topology is enabled by -next patch, "thread-id=1" becomes invalid because the CPU core is -preferred on arm/virt machine. It means these two CPUs have 0/1 -as their core IDs, but their thread IDs are all 0. It will trigger -test failure as the following message indicates: - -[14/21 qemu:qtest+qtest-aarch64 / qtest-aarch64/numa-test ERROR -1.48s killed by signal 6 SIGABRT ->>> G_TEST_DBUS_DAEMON=/home/gavin/sandbox/qemu.main/tests/dbus-vmstate-daemon.sh \ -QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon \ -QTEST_QEMU_BINARY=./qemu-system-aarch64 \ -QTEST_QEMU_IMG=./qemu-img MALLOC_PERTURB_=83 \ -/home/gavin/sandbox/qemu.main/build/tests/qtest/numa-test --tap -k -―――――――――――――――――――――――――――――――――――――――――――――― -stderr: -qemu-system-aarch64: -numa cpu,node-id=0,thread-id=1: no match found - -This fixes the issue by providing comprehensive SMP configurations -in aarch64_numa_cpu(). The SMP configurations aren't used before -the CPU topology is enabled in next patch. - -Signed-off-by: Gavin Shan -Reviewed-by: Yanan Wang -Message-id: 20220503140304.855514-3-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit ac7199a2523ce2ccf8e685087a5d177eeca89b09) -Signed-off-by: Gavin Shan ---- - tests/qtest/numa-test.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c -index 90bf68a5b3..aeda8c774c 100644 ---- a/tests/qtest/numa-test.c -+++ b/tests/qtest/numa-test.c -@@ -223,7 +223,8 @@ static void aarch64_numa_cpu(const void *data) - QTestState *qts; - g_autofree char *cli = NULL; - -- cli = make_cli(data, "-machine smp.cpus=2 " -+ cli = make_cli(data, "-machine " -+ "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " - "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " - "-numa cpu,node-id=1,thread-id=0 " - "-numa cpu,node-id=0,thread-id=1"); --- -2.31.1 - diff --git a/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch b/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch deleted file mode 100644 index f027c45..0000000 --- a/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 236f216309261bc924e49014267998fdc2ef7f46 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Fri, 29 Jul 2022 16:55:34 +0200 -Subject: [PATCH 28/32] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6 - -RH-Author: Thomas Huth -RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions -RH-Commit: [1/2] f306d7ff8efa64b14158388b95815ac556a25d8a (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2111994 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Claudio Imbrenda - -Upstream Status: RHEL-only -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 - -Based on upstream commit e4082063e47e9731dbeb1c26174c17f6038f577f -("linux-headers: Update to v5.18-rc6"), but this is focusing on -the file linux-headers/linux/kvm.h only (since the other changes -related to the VFIO renaming might break some stuff). - -Signed-off-by: Thomas Huth ---- - linux-headers/linux/kvm.h | 27 +++++++++++++++++++++------ - 1 file changed, 21 insertions(+), 6 deletions(-) - -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index d232feaae9..0d05d02ee4 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -445,7 +445,11 @@ struct kvm_run { - #define KVM_SYSTEM_EVENT_RESET 2 - #define KVM_SYSTEM_EVENT_CRASH 3 - __u32 type; -- __u64 flags; -+ __u32 ndata; -+ union { -+ __u64 flags; -+ __u64 data[16]; -+ }; - } system_event; - /* KVM_EXIT_S390_STSI */ - struct { -@@ -562,9 +566,12 @@ struct kvm_s390_mem_op { - __u32 op; /* type of operation */ - __u64 buf; /* buffer in userspace */ - union { -- __u8 ar; /* the access register number */ -+ struct { -+ __u8 ar; /* the access register number */ -+ __u8 key; /* access key, ignored if flag unset */ -+ }; - __u32 sida_offset; /* offset into the sida */ -- __u8 reserved[32]; /* should be set to 0 */ -+ __u8 reserved[32]; /* ignored */ - }; - }; - /* types for kvm_s390_mem_op->op */ -@@ -572,9 +579,12 @@ struct kvm_s390_mem_op { - #define KVM_S390_MEMOP_LOGICAL_WRITE 1 - #define KVM_S390_MEMOP_SIDA_READ 2 - #define KVM_S390_MEMOP_SIDA_WRITE 3 -+#define KVM_S390_MEMOP_ABSOLUTE_READ 4 -+#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 - /* flags for kvm_s390_mem_op->flags */ - #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) - #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -+#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) - - /* for KVM_INTERRUPT */ - struct kvm_interrupt { -@@ -1134,6 +1144,12 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_VM_GPA_BITS 207 - #define KVM_CAP_XSAVE2 208 - #define KVM_CAP_SYS_ATTRIBUTES 209 -+#define KVM_CAP_PPC_AIL_MODE_3 210 -+#define KVM_CAP_S390_MEM_OP_EXTENSION 211 -+#define KVM_CAP_PMU_CAPABILITY 212 -+#define KVM_CAP_DISABLE_QUIRKS2 213 -+/* #define KVM_CAP_VM_TSC_CONTROL 214 */ -+#define KVM_CAP_SYSTEM_EVENT_DATA 215 - - #ifdef KVM_CAP_IRQ_ROUTING - -@@ -1624,9 +1640,6 @@ struct kvm_enc_region { - #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) - #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) - --/* Available with KVM_CAP_XSAVE2 */ --#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) -- - struct kvm_s390_pv_sec_parm { - __u64 origin; - __u64 length; -@@ -1973,6 +1986,8 @@ struct kvm_dirty_gfn { - #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) - #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) - -+#define KVM_PMU_CAP_DISABLE (1 << 0) -+ - /** - * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. - * @flags: Some extra information for header, always 0 for now. --- -2.31.1 - diff --git a/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch b/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch deleted file mode 100644 index cee10e7..0000000 --- a/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch +++ /dev/null @@ -1,96 +0,0 @@ -From e5360c1e76fee8b8dcbcba7efbb1e36f0b48ac40 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Mon, 22 Aug 2022 14:53:20 +0200 -Subject: [PATCH 01/23] scsi-generic: Fix emulated block limits VPD page - -RH-Author: Kevin Wolf -RH-MergeRequest: 115: scsi-generic: Fix emulated block limits VPD page -RH-Bugzilla: 2120275 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Reitz -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/1] 336ba583311a80beeadd1900336056404f63211a (kmwolf/centos-qemu-kvm) -Commits 01ef8185b80 amd 24b36e9813e updated the way that the maximum -transfer length is calculated for patching block limits VPD page in an -INQUIRY response. - -The same updates also need to be made for the case where the host device -does not support the block limits VPD page at all and we emulate the -whole page. - -Without this fix, on host block devices a maximum transfer length of -(INT_MAX - sector_size) bytes is advertised to the guest, resulting in -I/O errors when a request that exceeds the host limits is made by the -guest. (Prior to commit 24b36e9813e, this code path would use the -max_transfer value from the host instead of INT_MAX, but still miss the -fix from 01ef8185b80 where max_transfer is also capped to max_iov -host pages, so it would be less wrong, but still wrong.) - -Cc: qemu-stable@nongnu.org -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2096251 -Fixes: 01ef8185b809af9d287e1a03a3f9d8ea8231118a -Fixes: 24b36e9813ec15da7db62e3b3621730710c5f020 -Signed-off-by: Kevin Wolf -Message-Id: <20220822125320.48257-1-kwolf@redhat.com> -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Kevin Wolf -(cherry picked from commit 51e15194b0a091e5c40aab2eb234a1d36c5c58ee) - -Resolved conflict: qemu_real_host_page_size() is a getter function in -current upstream, but still just a public global variable downstream. - -Signed-off-by: Kevin Wolf ---- - hw/scsi/scsi-generic.c | 21 ++++++++++++++------- - 1 file changed, 14 insertions(+), 7 deletions(-) - -diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c -index 0306ccc7b1..3742899839 100644 ---- a/hw/scsi/scsi-generic.c -+++ b/hw/scsi/scsi-generic.c -@@ -147,6 +147,18 @@ static int execute_command(BlockBackend *blk, - return 0; - } - -+static uint64_t calculate_max_transfer(SCSIDevice *s) -+{ -+ uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); -+ uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); -+ -+ assert(max_transfer); -+ max_transfer = MIN_NON_ZERO(max_transfer, -+ max_iov * qemu_real_host_page_size); -+ -+ return max_transfer / s->blocksize; -+} -+ - static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) - { - uint8_t page, page_idx; -@@ -179,12 +191,7 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) - (r->req.cmd.buf[1] & 0x01)) { - page = r->req.cmd.buf[2]; - if (page == 0xb0) { -- uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); -- uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); -- -- assert(max_transfer); -- max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size) -- / s->blocksize; -+ uint64_t max_transfer = calculate_max_transfer(s); - stl_be_p(&r->buf[8], max_transfer); - /* Also take care of the opt xfer len. */ - stl_be_p(&r->buf[12], -@@ -230,7 +237,7 @@ static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s) - uint8_t buf[64]; - - SCSIBlockLimits bl = { -- .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize -+ .max_io_sectors = calculate_max_transfer(s), - }; - - memset(r->buf, 0, r->buflen); --- -2.31.1 - diff --git a/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch b/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch deleted file mode 100644 index 8fd2e16..0000000 --- a/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 74b3e92dcb9e343e135a681259514b4fd28086ea Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 6 May 2022 15:25:09 +0200 -Subject: [PATCH 4/5] sysemu: tpm: Add a stub function for TPM_IS_CRB - -RH-Author: Eric Auger -RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning -RH-Commit: [1/2] 0ab55ca1aa12a3a7cbdef5a378928f75e030e536 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2037612 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 -Upstream Status: YES -Tested: With TPM-CRB and VFIO - -In a subsequent patch, VFIO will need to recognize if -a memory region owner is a TPM CRB device. Hence VFIO -needs to use TPM_IS_CRB() even if CONFIG_TPM is unset. So -let's add a stub function. - -Signed-off-by: Eric Auger -Suggested-by: Cornelia Huck -Reviewed-by: Stefan Berger -Link: https://lore.kernel.org/r/20220506132510.1847942-2-eric.auger@redhat.com -Signed-off-by: Alex Williamson -(cherry picked from commit 4168cdad398843ed53d650a27651868b4d3e21c9) -Signed-off-by: Eric Auger ---- - include/sysemu/tpm.h | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h -index 68b2206463..fb40e30ff6 100644 ---- a/include/sysemu/tpm.h -+++ b/include/sysemu/tpm.h -@@ -80,6 +80,12 @@ static inline TPMVersion tpm_get_version(TPMIf *ti) - #define tpm_init() (0) - #define tpm_cleanup() - -+/* needed for an alignment check in non-tpm code */ -+static inline Object *TPM_IS_CRB(Object *obj) -+{ -+ return NULL; -+} -+ - #endif /* CONFIG_TPM */ - - #endif /* QEMU_TPM_H */ --- -2.31.1 - diff --git a/kvm-target-arm-deprecate-named-CPU-models.patch b/kvm-target-arm-deprecate-named-CPU-models.patch deleted file mode 100644 index dbe8d24..0000000 --- a/kvm-target-arm-deprecate-named-CPU-models.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 1f8528b71d96c01dd6106f11681f4a4e2776ef5f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Mon, 21 Mar 2022 12:05:42 +0000 -Subject: [PATCH 06/18] target/arm: deprecate named CPU models -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [6/6] afddeb9e898206fd04499f01c48caf7dc1a8b8ef (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -KVM requires use of the 'host' CPU model, so named CPU models are only -needed for TCG. Since we don't consider TCG to be supported we can -deprecate all the named CPU models. TCG users can rely on 'max' model. - -Note: this has the effect of deprecating the default built-in CPU -model 'cortex-a57'. Applications using QEMU are expected to make an -explicit choice about which CPU model they want, since no builtin -default can suit all purposes. - -https://bugzilla.redhat.com/show_bug.cgi?id=2060839 -Signed-off-by: Daniel P. Berrangé ---- - target/arm/cpu-qom.h | 1 + - target/arm/cpu.c | 5 +++++ - target/arm/cpu.h | 2 ++ - target/arm/cpu64.c | 8 +++++++- - target/arm/helper.c | 2 ++ - 5 files changed, 17 insertions(+), 1 deletion(-) - -diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h -index 64c44cef2d..82e97249bc 100644 ---- a/target/arm/cpu-qom.h -+++ b/target/arm/cpu-qom.h -@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { - const char *name; - void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); -+ const char *deprecation_note; - } ARMCPUInfo; - - void arm_cpu_register(const ARMCPUInfo *info); -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 5d4ca7a227..c74b0fb462 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2105,8 +2105,13 @@ static void arm_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void arm_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 23879de5fa..c0c9f680e5 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -33,6 +33,8 @@ - #define KVM_HAVE_MCE_INJECTION 1 - #endif - -+#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" -+ - #define EXCP_UDEF 1 /* undefined instruction */ - #define EXCP_SWI 2 /* software interrupt */ - #define EXCP_PREFETCH_ABORT 3 -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index e80b831073..c8f152891c 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -975,7 +975,8 @@ static void aarch64_a64fx_initfn(Object *obj) - #endif /* disabled for RHEL */ - - static const ARMCPUInfo aarch64_cpus[] = { -- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, -+ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, -+ .deprecation_note = RHEL_CPU_DEPRECATION }, - #if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, - { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, -@@ -1052,8 +1053,13 @@ static void aarch64_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void aarch64_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/helper.c b/target/arm/helper.c -index 7d14650615..3d34f63e49 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -8560,6 +8560,7 @@ void arm_cpu_list(void) - static void arm_cpu_add_definition(gpointer data, gpointer user_data) - { - ObjectClass *oc = data; -+ CPUClass *cc = CPU_CLASS(oc); - CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfo *info; - const char *typename; -@@ -8569,6 +8570,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); - info->q_typename = g_strdup(typename); -+ info->deprecated = !!cc->deprecation_note; - - QAPI_LIST_PREPEND(*cpu_list, info); - } --- -2.35.3 - diff --git a/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch b/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch deleted file mode 100644 index d63bfdb..0000000 --- a/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch +++ /dev/null @@ -1,273 +0,0 @@ -From 577b04770e47aed0f88acb4a415ed04ddbe087f1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Thu, 17 Mar 2022 17:59:22 +0000 -Subject: [PATCH 04/18] target/i386: deprecate CPUs older than x86_64-v2 ABI -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [4/6] 71f6043f11b31ffa841a2e14d24972e571c18a9e (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -RHEL-9 is compiled with the x86_64-v2 ABI. We use this as a baseline to -select which CPUs we want to support, such that there is at least one -supported guest CPU that can be launched for every physical machine -capable of running RHEL-9 KVM. - -Supported CPUs: - - * QEMU models - - base (QEMU internal) - host (host passthrough) - max (host passthrough for KVM, - all emulated features for TCG) - - * Intel models - - Icelake-Server - Icelake-Server-noTSX - Cascadelake-Server (2019) - Cascadelake-Server-noTSX (2019) - Skylake-Server (2016) - Skylake-Server-IBRS (2016) - Skylake-Server-noTSX-IBRS (2016) - Skylake-Client (2015) - Skylake-Client-IBRS (2015) - Skylake-Client-noTSX-IBRS (2015) - Broadwell (2014) - Broadwell-IBRS (2014) - Broadwell-noTSX (2014) - Broadwell-noTSX-IBRS (2014) - Haswell (2013) - Haswell-IBRS (2013) - Haswell-noTSX (2013) - Haswell-noTSX-IBRS (2013) - IvyBridge (2012) - IvyBridge-IBRS (2012) - SandyBridge (2011) - SandyBridge-IBRS (2011) - Westmere (2010) - Westmere-IBRS (2010) - Nehalem (2008) - Nehalem-IBRS (2008) - - Cooperlake (2020) - Snowridge (2019) - KnightsMill (2017) - Denverton (2016) - - * AMD models - - EPYC-Milan (2021) - EPYC-Rome (2019) - EPYC (2017) - EPYC-IBPB (2017) - Opteron_G5 (2012) - Opteron_G4 (2011) - - * Other - - Dhyana (2018) - -(I've omitted the many -vNNN versions for brevity) - -Deprecated CPUs: - - 486 - athlon - Conroe - core2duo - coreduo - Icelake-Client (already deprecated upstream) - Icelake-Client-noTSX (already deprecated upstream) - kvm32 - kvm64 - n270 - Opteron_G1 - Opteron_G2 - Opteron_G3 - Penryn - pentium2 - pentium3 - pentium - phenom - qemu32 - qemu64 - -The deprecated CPU models are subject to removal in a future -major version of RHEL. - -Note: this has the effect of deprecating the default built-in CPU -model 'qemu64'. Applications using QEMU are expected to make an -explicit choice about which CPU model they want, since no builtin -default can suit all purposes. - -https://bugzilla.redhat.com/show_bug.cgi?id=2060839 -Signed-off-by: Daniel P. Berrangé ---- - target/i386/cpu.c | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index cb6b5467d0..87cb641b5f 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1780,9 +1780,13 @@ static const CPUCaches epyc_milan_cache_info = { - * PT in VMX operation - */ - -+#define RHEL_CPU_DEPRECATION \ -+ "use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'" -+ - static const X86CPUDefinition builtin_x86_defs[] = { - { - .name = "qemu64", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 0xd, - .vendor = CPUID_VENDOR_AMD, - .family = 15, -@@ -1803,6 +1807,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "phenom", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_AMD, - .family = 16, -@@ -1835,6 +1840,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "core2duo", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -1877,6 +1883,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "kvm64", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 15, -@@ -1918,6 +1925,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "qemu32", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 4, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -1932,6 +1940,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "kvm32", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_INTEL, - .family = 15, -@@ -1962,6 +1971,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "coreduo", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -1995,6 +2005,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "486", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 1, - .vendor = CPUID_VENDOR_INTEL, - .family = 4, -@@ -2007,6 +2018,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "pentium", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 1, - .vendor = CPUID_VENDOR_INTEL, - .family = 5, -@@ -2019,6 +2031,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "pentium2", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 2, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -2031,6 +2044,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "pentium3", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 3, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -2043,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "athlon", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 2, - .vendor = CPUID_VENDOR_AMD, - .family = 6, -@@ -2058,6 +2073,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "n270", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -2083,6 +2099,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Conroe", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -2123,6 +2140,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Penryn", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 10, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, -@@ -3832,6 +3850,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Opteron_G1", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_AMD, - .family = 15, -@@ -3852,6 +3871,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Opteron_G2", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_AMD, - .family = 15, -@@ -3874,6 +3894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { - }, - { - .name = "Opteron_G3", -+ .deprecation_note = RHEL_CPU_DEPRECATION, - .level = 5, - .vendor = CPUID_VENDOR_AMD, - .family = 16, --- -2.35.3 - diff --git a/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch b/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch deleted file mode 100644 index c940cdb..0000000 --- a/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 39642d0d37e2ef61ce7fde0bc284d37a365e4482 Mon Sep 17 00:00:00 2001 -From: Murilo Opsfelder Araujo -Date: Mon, 2 May 2022 17:59:11 -0300 -Subject: [PATCH 2/2] target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Murilo Opsfelder Araújo -RH-MergeRequest: 81: target/ppc/cpu-models: remove extraneous "#endif" -RH-Commit: [1/1] 5fff003ad3deb84c6a8e69ab90552a31edb3b058 (mopsfelder/centos-stream-src-qemu-kvm) -RH-Bugzilla: 2081022 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Laurent Vivier - -The commit b9d28ecdedaf ("Enable/disable devices for RHEL") removed the -"#if 0" from the beginning of the ppc_cpu_aliases list, which broke the -build on ppc64le: - - ../target/ppc/cpu-models.c:904:2: error: #endif without #if - #endif - ^ - 1 error generated. - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2081022 - -Fixes: b9d28ecdedaf (Enable/disable devices for RHEL) -Signed-off-by: Murilo Opsfelder Araujo ---- - target/ppc/cpu-models.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index dd78883410..528467eac1 100644 ---- a/target/ppc/cpu-models.c -+++ b/target/ppc/cpu-models.c -@@ -746,6 +746,7 @@ - /* PowerPC CPU aliases */ - - PowerPCCPUAlias ppc_cpu_aliases[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { "405", "405d4" }, - { "405cr", "405crc" }, - { "405gp", "405gpd" }, --- -2.35.1 - diff --git a/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch b/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch deleted file mode 100644 index 212900d..0000000 --- a/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch +++ /dev/null @@ -1,194 +0,0 @@ -From 8459c305914e2a7a19dcd1662d54a89def7acfa6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= -Date: Thu, 17 Mar 2022 17:59:22 +0000 -Subject: [PATCH 05/18] target/s390x: deprecate CPUs older than z14 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [5/6] 2da9e06cf452287673f94f880a7eb8b2b37b7278 (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -RHEL-9 is compiled with the z14 ABI. We use this as a baseline to -select which CPUs we want to support, such that there is at least one -supported guest CPU that can be launched for every physical -machine capable of running RHEL-9 KVM. - -Supported CPUs: - - gen15a-base - gen15a - gen15b-base - gen15b - gen16a-base - gen16a - gen16b-base - gen16b - max - qemu - z14.2-base - z14.2 - z14-base - z14 - z14ZR1-base - z14ZR1 - -Deprecated CPUs: - - z10BC.2-base - z10BC.2 - z10BC-base - z10BC - z10EC.2-base - z10EC.2 - z10EC.3-base - z10EC.3 - z10EC-base - z10EC - z114-base - z114 - z13.2-base - z13.2 - z13-base - z13s-base - z13s - z13 - z196.2-base - z196.2 - z196-base - z196 - z800-base - z800 - z890.2-base - z890.2 - z890.3-base - z890.3 - z890-base - z890 - z900.2-base - z900.2 - z900.3-base - z900.3 - z900-base - z900 - z990.2-base - z990.2 - z990.3-base - z990.3 - z990.4-base - z990.4 - z990.5-base - z990.5 - z990-base - z990 - z9BC.2-base - z9BC.2 - z9BC-base - z9BC - z9EC.2-base - z9EC.2 - z9EC.3-base - z9EC.3 - z9EC-base - z9EC - zBC12-base - zBC12 - zEC12.2-base - zEC12.2 - zEC12-base - zEC12 - -https://bugzilla.redhat.com/show_bug.cgi?id=2060839 -Signed-off-by: Daniel P. Berrangé ---- - target/s390x/cpu_models.c | 11 +++++++++++ - target/s390x/cpu_models.h | 2 ++ - target/s390x/cpu_models_sysemu.c | 2 ++ - 3 files changed, 15 insertions(+) - -diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 6d71428056..9b9fc41676 100644 ---- a/target/s390x/cpu_models.c -+++ b/target/s390x/cpu_models.c -@@ -45,6 +45,9 @@ - * of a following release have been a superset of the previous release. With - * generation 15 one base feature and one optional feature have been deprecated. - */ -+ -+#define RHEL_CPU_DEPRECATION "use at least 'z14', or 'host' / 'qemu' / 'max'" -+ - static S390CPUDef s390_cpu_defs[] = { - CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), - CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -852,22 +855,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) - static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) - { - S390CPUClass *xcc = S390_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - /* all base models are migration safe */ - xcc->cpu_def = (const S390CPUDef *) data; - xcc->is_migration_safe = true; - xcc->is_static = true; - xcc->desc = xcc->cpu_def->desc; -+ if (xcc->cpu_def->gen < 14) { -+ cc->deprecation_note = RHEL_CPU_DEPRECATION; -+ } - } - - static void s390_cpu_model_class_init(ObjectClass *oc, void *data) - { - S390CPUClass *xcc = S390_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - /* model that can change between QEMU versions */ - xcc->cpu_def = (const S390CPUDef *) data; - xcc->is_migration_safe = true; - xcc->desc = xcc->cpu_def->desc; -+ if (xcc->cpu_def->gen < 14) { -+ cc->deprecation_note = RHEL_CPU_DEPRECATION; -+ } - } - - static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) -diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h -index 74d1f87e4f..372160bcd7 100644 ---- a/target/s390x/cpu_models.h -+++ b/target/s390x/cpu_models.h -@@ -38,6 +38,8 @@ struct S390CPUDef { - S390FeatBitmap full_feat; - /* used to init full_feat from generated data */ - S390FeatInit full_init; -+ /* if deprecated, provides a suggestion */ -+ const char *deprecation_note; - }; - - /* CPU model based on a CPU definition */ -diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index 6a04ccab1b..f3b7c304ec 100644 ---- a/target/s390x/cpu_models_sysemu.c -+++ b/target/s390x/cpu_models_sysemu.c -@@ -61,6 +61,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) - CpuDefinitionInfo *info; - char *name = g_strdup(object_class_get_name(klass)); - S390CPUClass *scc = S390_CPU_CLASS(klass); -+ CPUClass *cc = CPU_CLASS(klass); - - /* strip off the -s390x-cpu */ - g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; -@@ -70,6 +71,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) - info->migration_safe = scc->is_migration_safe; - info->q_static = scc->is_static; - info->q_typename = g_strdup(object_class_get_name(klass)); -+ info->deprecated = !!cc->deprecation_note; - /* check for unavailable features */ - if (cpu_list_data->model) { - Object *obj; --- -2.35.3 - diff --git a/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch b/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch deleted file mode 100644 index 61752c7..0000000 --- a/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 27c1d979a994f5afc59c3520af58d15aa5aae723 Mon Sep 17 00:00:00 2001 -From: Janis Schoetterl-Glausch -Date: Fri, 6 May 2022 17:39:56 +0200 -Subject: [PATCH 29/32] target/s390x: kvm: Honor storage keys during emulation - -RH-Author: Thomas Huth -RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions -RH-Commit: [2/2] 346dee1e13bfe1c074e4c6a4417091711d852f9c (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2111994 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Claudio Imbrenda - -Storage key controlled protection is currently not honored when -emulating instructions. -If available, enable key protection for the MEM_OP ioctl, thereby -enabling it for the s390_cpu_virt_mem_* functions, when using kvm. -As a result, the emulation of the following instructions honors storage -keys: - -* CLP - The Synch I/O CLP command would need special handling in order - to support storage keys, but is currently not supported. -* CHSC - Performing commands asynchronously would require special - handling, but commands are currently always synchronous. -* STSI -* TSCH - Must (and does) not change channel if terminated due to - protection. -* MSCH - Suppressed on protection, works because fetching instruction. -* SSCH - Suppressed on protection, works because fetching instruction. -* STSCH -* STCRW - Suppressed on protection, this works because no partial store is - possible, because the operand cannot span multiple pages. -* PCISTB -* MPCIFC -* STPCIFC - -Signed-off-by: Janis Schoetterl-Glausch -Message-Id: <20220506153956.2217601-3-scgl@linux.ibm.com> -Signed-off-by: Thomas Huth - -(cherry picked from commit 54354861d21b69ec0781f43e67b8d4f6edad7e3f) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 -Signed-off-by: Thomas Huth ---- - target/s390x/kvm/kvm.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 74f089d87f..1f1d1a33b8 100644 ---- a/target/s390x/kvm/kvm.c -+++ b/target/s390x/kvm/kvm.c -@@ -152,12 +152,15 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { - static int cap_sync_regs; - static int cap_async_pf; - static int cap_mem_op; -+static int cap_mem_op_extension; - static int cap_s390_irq; - static int cap_ri; - static int cap_hpage_1m; - static int cap_vcpu_resets; - static int cap_protected; - -+static bool mem_op_storage_key_support; -+ - static int active_cmma; - - static int kvm_s390_query_mem_limit(uint64_t *memory_limit) -@@ -355,6 +358,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); - cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); - cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); -+ cap_mem_op_extension = kvm_check_extension(s, KVM_CAP_S390_MEM_OP_EXTENSION); -+ mem_op_storage_key_support = cap_mem_op_extension > 0; - cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); - cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); - cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); -@@ -843,6 +848,7 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, - : KVM_S390_MEMOP_LOGICAL_READ, - .buf = (uint64_t)hostbuf, - .ar = ar, -+ .key = (cpu->env.psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY, - }; - int ret; - -@@ -852,6 +858,9 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, - if (!hostbuf) { - mem_op.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; - } -+ if (mem_op_storage_key_support) { -+ mem_op.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; -+ } - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); - if (ret < 0) { --- -2.31.1 - diff --git a/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch b/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch deleted file mode 100644 index 4fcf786..0000000 --- a/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch +++ /dev/null @@ -1,157 +0,0 @@ -From f52aa60217634c96fef59ce76b803a94610bf5c8 Mon Sep 17 00:00:00 2001 -From: Andrew Jones -Date: Wed, 15 Jun 2022 15:28:27 +0200 -Subject: [PATCH 01/18] tests/avocado: update aarch64_virt test to exercise - -cpu max -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Daniel P. Berrangé -RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models -RH-Commit: [1/6] df6839e567180a4c32afd98852f68b2279e00f7c (berrange/centos-src-qemu) -RH-Bugzilla: 2060839 -RH-Acked-by: Thomas Huth -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 - -commit 11593544df6f8febb3ce87015c22b429bf43c4c7 -Author: Alex Bennée -Date: Tue Apr 19 10:09:56 2022 +0100 - - tests/avocado: update aarch64_virt test to exercise -cpu max - - The Fedora 29 kernel is quite old and importantly fails when running - in LPA2 scenarios. As it's not really exercising much of the CPU space - replace it with a custom 5.16.12 kernel with all the architecture - options turned on. There is a minimal buildroot initramfs included in - the kernel which has a few tools for stress testing the memory - subsystem. The userspace also targets the Neoverse N1 processor so - would fail with a v8.0 cpu like cortex-a53. - - While we are at it move the test into its own file so it can have an - assigned maintainer. - - Signed-off-by: Alex Bennée - Acked-by: Richard Henderson - Tested-by: Richard Henderson - Message-Id: <20220419091020.3008144-2-alex.bennee@linaro.org> - -Signed-off-by: Andrew Jones ---- - MAINTAINERS | 1 + - tests/avocado/boot_linux_console.py | 25 ------------- - tests/avocado/machine_aarch64_virt.py | 51 +++++++++++++++++++++++++++ - 3 files changed, 52 insertions(+), 25 deletions(-) - create mode 100644 tests/avocado/machine_aarch64_virt.py - -diff --git a/MAINTAINERS b/MAINTAINERS -index 2fe20a49ab..bfe8806f60 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -942,6 +942,7 @@ S: Maintained - F: hw/arm/virt* - F: include/hw/arm/virt.h - F: docs/system/arm/virt.rst -+F: tests/avocado/machine_aarch64_virt.py - - Xilinx Zynq - M: Edgar E. Iglesias -diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py -index b40a3abc81..45a2ceda22 100644 ---- a/tests/avocado/boot_linux_console.py -+++ b/tests/avocado/boot_linux_console.py -@@ -325,31 +325,6 @@ def test_mips_malta32el_nanomips_64k_dbg(self): - kernel_hash = '18d1c68f2e23429e266ca39ba5349ccd0aeb7180' - self.do_test_mips_malta32el_nanomips(kernel_url, kernel_hash) - -- def test_aarch64_virt(self): -- """ -- :avocado: tags=arch:aarch64 -- :avocado: tags=machine:virt -- :avocado: tags=accel:tcg -- :avocado: tags=cpu:cortex-a53 -- """ -- kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' -- '/linux/releases/29/Everything/aarch64/os/images/pxeboot' -- '/vmlinuz') -- kernel_hash = '8c73e469fc6ea06a58dc83a628fc695b693b8493' -- kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) -- -- self.vm.set_console() -- kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -- 'console=ttyAMA0') -- self.require_accelerator("tcg") -- self.vm.add_args('-cpu', 'cortex-a53', -- '-accel', 'tcg', -- '-kernel', kernel_path, -- '-append', kernel_command_line) -- self.vm.launch() -- console_pattern = 'Kernel command line: %s' % kernel_command_line -- self.wait_for_console_pattern(console_pattern) -- - def test_aarch64_xlnx_versal_virt(self): - """ - :avocado: tags=arch:aarch64 -diff --git a/tests/avocado/machine_aarch64_virt.py b/tests/avocado/machine_aarch64_virt.py -new file mode 100644 -index 0000000000..21848cba70 ---- /dev/null -+++ b/tests/avocado/machine_aarch64_virt.py -@@ -0,0 +1,51 @@ -+# Functional test that boots a Linux kernel and checks the console -+# -+# Copyright (c) 2022 Linaro Ltd. -+# -+# Author: -+# Alex Bennée -+# -+# SPDX-License-Identifier: GPL-2.0-or-later -+ -+import time -+ -+from avocado_qemu import QemuSystemTest -+from avocado_qemu import wait_for_console_pattern -+from avocado_qemu import exec_command -+ -+class Aarch64VirtMachine(QemuSystemTest): -+ KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 ' -+ -+ def wait_for_console_pattern(self, success_message, vm=None): -+ wait_for_console_pattern(self, success_message, -+ failure_message='Kernel panic - not syncing', -+ vm=vm) -+ -+ def test_aarch64_virt(self): -+ """ -+ :avocado: tags=arch:aarch64 -+ :avocado: tags=machine:virt -+ :avocado: tags=accel:tcg -+ :avocado: tags=cpu:max -+ """ -+ kernel_url = ('https://fileserver.linaro.org/s/' -+ 'z6B2ARM7DQT3HWN/download') -+ -+ kernel_hash = 'ed11daab50c151dde0e1e9c9cb8b2d9bd3215347' -+ kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) -+ -+ self.vm.set_console() -+ kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -+ 'console=ttyAMA0') -+ self.require_accelerator("tcg") -+ self.vm.add_args('-cpu', 'max,pauth-impdef=on', -+ '-accel', 'tcg', -+ '-kernel', kernel_path, -+ '-append', kernel_command_line) -+ self.vm.launch() -+ self.wait_for_console_pattern('Welcome to Buildroot') -+ time.sleep(0.1) -+ exec_command(self, 'root') -+ time.sleep(0.1) -+ exec_command(self, 'cat /proc/self/maps') -+ time.sleep(0.1) --- -2.35.3 - diff --git a/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch b/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch deleted file mode 100644 index 7b9a8f3..0000000 --- a/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch +++ /dev/null @@ -1,119 +0,0 @@ -From cea7b15c613a11ea15a1458d6990be7044df6643 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= -Date: Thu, 18 Nov 2021 12:57:33 +0100 -Subject: [PATCH 17/17] tests/qtest/fdc-test: Add a regression test for - CVE-2021-3507 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jon Maloy -RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) -RH-Commit: [2/2] 067c052df790959c28c1fcc16547676d36523bd9 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1951522 -RH-Acked-by: Hanna Reitz -RH-Acked-by: Miroslav Rezanina - -Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339 - -Without the previous commit, when running 'make check-qtest-i386' -with QEMU configured with '--enable-sanitizers' we get: - - ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x619000062a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0 - READ of size 786432 at 0x619000062a00 thread T0 - #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919) - #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13 - #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14 - #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18 - #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16 - #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5 - #6 0x5626d0bd5649 in cpu_physical_memory_write include/exec/cpu-common.h:82:5 - #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9 - #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13 - #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13 - #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13 - #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9 - #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17 - - 0x619000062a00 is located 0 bytes to the right of 512-byte region [0x619000062800,0x619000062a00) - allocated by thread T0 here: - #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec) - #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11 - #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27 - #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20 - #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5 - #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13 - - SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) in __asan_memcpy - Shadow bytes around the buggy address: - 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa - 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd - Shadow byte legend (one shadow byte represents 8 application bytes): - Addressable: 00 - Heap left redzone: fa - Freed heap region: fd - ==4028352==ABORTING - -[ kwolf: Added snapshot=on to prevent write file lock failure ] - -Reported-by: Alexander Bulekov -Signed-off-by: Philippe Mathieu-Daudé -Reviewed-by: Alexander Bulekov -Signed-off-by: Kevin Wolf -(cherry picked from commit 46609b90d9e3a6304def11038a76b58ff43f77bc) -Signed-off-by: Jon Maloy ---- - tests/qtest/fdc-test.c | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c -index b0d40012e6..1d4f852128 100644 ---- a/tests/qtest/fdc-test.c -+++ b/tests/qtest/fdc-test.c -@@ -583,6 +583,26 @@ static void test_cve_2021_20196(void) - qtest_quit(s); - } - -+static void test_cve_2021_3507(void) -+{ -+ QTestState *s; -+ -+ s = qtest_initf("-nographic -m 32M -nodefaults " -+ "-drive file=%s,format=raw,if=floppy,snapshot=on", -+ test_image); -+ qtest_outl(s, 0x9, 0x0a0206); -+ qtest_outw(s, 0x3f4, 0x1600); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0200); -+ qtest_outw(s, 0x3f4, 0x0200); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_outw(s, 0x3f4, 0x0000); -+ qtest_quit(s); -+} -+ - int main(int argc, char **argv) - { - int fd; -@@ -614,6 +634,7 @@ int main(int argc, char **argv) - qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); - qtest_add_func("/fdc/fuzz-registers", fuzz_registers); - qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196); -+ qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507); - - ret = g_test_run(); - --- -2.31.1 - diff --git a/kvm-util-Return-void-on-iova_tree_remove.patch b/kvm-util-Return-void-on-iova_tree_remove.patch deleted file mode 100644 index 07c6f8e..0000000 --- a/kvm-util-Return-void-on-iova_tree_remove.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 74c829f82eafa8e42ae94f7ace55c8aaed3bb5f4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 27 Apr 2022 17:49:31 +0200 -Subject: [PATCH 05/23] util: Return void on iova_tree_remove -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/21] 252287acca896eba7b5d2b62fc6247cfc565ba57 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: Merged - -It always returns IOVA_OK so nobody uses it. - -Acked-by: Jason Wang -Reviewed-by: Peter Xu -Signed-off-by: Eugenio Pérez -Message-Id: <20220427154931.3166388-1-eperezma@redhat.com> -Signed-off-by: Laurent Vivier -(cherry picked from commit 832fef7cc14d65f99d523f883ef384014e6476a7) ---- - include/qemu/iova-tree.h | 4 +--- - util/iova-tree.c | 4 +--- - 2 files changed, 2 insertions(+), 6 deletions(-) - -diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h -index c938fb0793..16bbfdf5f8 100644 ---- a/include/qemu/iova-tree.h -+++ b/include/qemu/iova-tree.h -@@ -72,10 +72,8 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map); - * provided. The range does not need to be exactly what has inserted, - * all the mappings that are included in the provided range will be - * removed from the tree. Here map->translated_addr is meaningless. -- * -- * Return: 0 if succeeded, or <0 if error. - */ --int iova_tree_remove(IOVATree *tree, const DMAMap *map); -+void iova_tree_remove(IOVATree *tree, const DMAMap *map); - - /** - * iova_tree_find: -diff --git a/util/iova-tree.c b/util/iova-tree.c -index 6dff29c1f6..fee530a579 100644 ---- a/util/iova-tree.c -+++ b/util/iova-tree.c -@@ -164,15 +164,13 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) - g_tree_foreach(tree->tree, iova_tree_traverse, iterator); - } - --int iova_tree_remove(IOVATree *tree, const DMAMap *map) -+void iova_tree_remove(IOVATree *tree, const DMAMap *map) - { - const DMAMap *overlap; - - while ((overlap = iova_tree_find(tree, map))) { - g_tree_remove(tree->tree, overlap); - } -- -- return IOVA_OK; - } - - /** --- -2.31.1 - diff --git a/kvm-util-accept-iova_tree_remove_parameter-by-value.patch b/kvm-util-accept-iova_tree_remove_parameter-by-value.patch deleted file mode 100644 index cd073da..0000000 --- a/kvm-util-accept-iova_tree_remove_parameter-by-value.patch +++ /dev/null @@ -1,182 +0,0 @@ -From 90697579eaf598614293d75f684d6e8c55f8ab9b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:04 +0200 -Subject: [PATCH 06/23] util: accept iova_tree_remove_parameter by value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/21] ddaf052789e7ab3c67a77c038347113301587ffb (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -It's convenient to call iova_tree_remove from a map returned from -iova_tree_find or iova_tree_find_iova. With the current code this is not -possible, since we will free it, and then we will try to search for it -again. - -Fix it making accepting the map by value, forcing a copy of the -argument. Not applying a fixes tag, since there is no use like that at -the moment. - -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit d69ba6677405de86b3b617fc7688b549f84cf013) ---- - hw/i386/intel_iommu.c | 6 +++--- - hw/virtio/vhost-iova-tree.c | 2 +- - hw/virtio/vhost-iova-tree.h | 2 +- - hw/virtio/vhost-vdpa.c | 6 +++--- - include/qemu/iova-tree.h | 2 +- - net/vhost-vdpa.c | 4 ++-- - util/iova-tree.c | 4 ++-- - 7 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index c64aa81a83..6738cf0929 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -1157,7 +1157,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) - return ret; - } - /* Drop any existing mapping */ -- iova_tree_remove(as->iova_tree, &target); -+ iova_tree_remove(as->iova_tree, target); - /* Recover the correct type */ - event->type = IOMMU_NOTIFIER_MAP; - entry->perm = cache_perm; -@@ -1170,7 +1170,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) - trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); - return 0; - } -- iova_tree_remove(as->iova_tree, &target); -+ iova_tree_remove(as->iova_tree, target); - } - - trace_vtd_page_walk_one(info->domain_id, entry->iova, -@@ -3532,7 +3532,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) - - map.iova = n->start; - map.size = size; -- iova_tree_remove(as->iova_tree, &map); -+ iova_tree_remove(as->iova_tree, map); - } - - static void vtd_address_space_unmap_all(IntelIOMMUState *s) -diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c -index 55fed1fefb..1339a4de8b 100644 ---- a/hw/virtio/vhost-iova-tree.c -+++ b/hw/virtio/vhost-iova-tree.c -@@ -104,7 +104,7 @@ int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map) - * @iova_tree: The vhost iova tree - * @map: The map to remove - */ --void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map) -+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map) - { - iova_tree_remove(iova_tree->iova_taddr_map, map); - } -diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h -index 6a4f24e0f9..4adfd79ff0 100644 ---- a/hw/virtio/vhost-iova-tree.h -+++ b/hw/virtio/vhost-iova-tree.h -@@ -22,6 +22,6 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete); - const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree, - const DMAMap *map); - int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); --void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map); -+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); - - #endif -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index cc15b7d8ee..39aa70f52d 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -238,7 +238,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - - fail_map: - if (v->shadow_vqs_enabled) { -- vhost_iova_tree_remove(v->iova_tree, &mem_region); -+ vhost_iova_tree_remove(v->iova_tree, mem_region); - } - - fail: -@@ -298,7 +298,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - return; - } - iova = result->iova; -- vhost_iova_tree_remove(v->iova_tree, result); -+ vhost_iova_tree_remove(v->iova_tree, *result); - } - vhost_vdpa_iotlb_batch_begin_once(v); - ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); -@@ -942,7 +942,7 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, - needle->perm == IOMMU_RO); - if (unlikely(r != 0)) { - error_setg_errno(errp, -r, "Cannot map region to device"); -- vhost_iova_tree_remove(v->iova_tree, needle); -+ vhost_iova_tree_remove(v->iova_tree, *needle); - } - - return r == 0; -diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h -index 16bbfdf5f8..8528e5c98f 100644 ---- a/include/qemu/iova-tree.h -+++ b/include/qemu/iova-tree.h -@@ -73,7 +73,7 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map); - * all the mappings that are included in the provided range will be - * removed from the tree. Here map->translated_addr is meaningless. - */ --void iova_tree_remove(IOVATree *tree, const DMAMap *map); -+void iova_tree_remove(IOVATree *tree, DMAMap map); - - /** - * iova_tree_find: -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 411e71e6c2..ba65736f83 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -244,7 +244,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); - } - -- vhost_iova_tree_remove(tree, map); -+ vhost_iova_tree_remove(tree, *map); - } - - static size_t vhost_vdpa_net_cvq_cmd_len(void) -@@ -297,7 +297,7 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, - return true; - - dma_map_err: -- vhost_iova_tree_remove(v->iova_tree, &map); -+ vhost_iova_tree_remove(v->iova_tree, map); - return false; - } - -diff --git a/util/iova-tree.c b/util/iova-tree.c -index fee530a579..536789797e 100644 ---- a/util/iova-tree.c -+++ b/util/iova-tree.c -@@ -164,11 +164,11 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) - g_tree_foreach(tree->tree, iova_tree_traverse, iterator); - } - --void iova_tree_remove(IOVATree *tree, const DMAMap *map) -+void iova_tree_remove(IOVATree *tree, DMAMap map) - { - const DMAMap *overlap; - -- while ((overlap = iova_tree_find(tree, map))) { -+ while ((overlap = iova_tree_find(tree, &map))) { - g_tree_remove(tree->tree, overlap); - } - } --- -2.31.1 - diff --git a/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch b/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch deleted file mode 100644 index 77929a6..0000000 --- a/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch +++ /dev/null @@ -1,385 +0,0 @@ -From 7a6fa42d4a4263c94b9bf18290f9e7680ea9e7f4 Mon Sep 17 00:00:00 2001 -From: Nicolas Saenz Julienne -Date: Mon, 25 Apr 2022 09:57:23 +0200 -Subject: [PATCH 03/16] util/event-loop-base: Introduce options to set the - thread pool size - -RH-Author: Nicolas Saenz Julienne -RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size -RH-Commit: [3/3] af78a88ff3c69701cbb5f9e980c3d6ebbd13ff98 -RH-Bugzilla: 2031024 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -The thread pool regulates itself: when idle, it kills threads until -empty, when in demand, it creates new threads until full. This behaviour -doesn't play well with latency sensitive workloads where the price of -creating a new thread is too high. For example, when paired with qemu's -'-mlock', or using safety features like SafeStack, creating a new thread -has been measured take multiple milliseconds. - -In order to mitigate this let's introduce a new 'EventLoopBase' -property to set the thread pool size. The threads will be created during -the pool's initialization or upon updating the property's value, remain -available during its lifetime regardless of demand, and destroyed upon -freeing it. A properly characterized workload will then be able to -configure the pool to avoid any latency spikes. - -Signed-off-by: Nicolas Saenz Julienne -Reviewed-by: Stefan Hajnoczi -Acked-by: Markus Armbruster -Message-id: 20220425075723.20019-4-nsaenzju@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 71ad4713cc1d7fca24388b828ef31ae6cb38a31c) ---- - event-loop-base.c | 23 +++++++++++++ - include/block/aio.h | 10 ++++++ - include/block/thread-pool.h | 3 ++ - include/sysemu/event-loop-base.h | 4 +++ - iothread.c | 3 ++ - qapi/qom.json | 10 +++++- - util/aio-posix.c | 1 + - util/async.c | 20 ++++++++++++ - util/main-loop.c | 9 ++++++ - util/thread-pool.c | 55 +++++++++++++++++++++++++++++--- - 10 files changed, 133 insertions(+), 5 deletions(-) - -diff --git a/event-loop-base.c b/event-loop-base.c -index e7f99a6ec8..d5be4dc6fc 100644 ---- a/event-loop-base.c -+++ b/event-loop-base.c -@@ -14,6 +14,7 @@ - #include "qemu/osdep.h" - #include "qom/object_interfaces.h" - #include "qapi/error.h" -+#include "block/thread-pool.h" - #include "sysemu/event-loop-base.h" - - typedef struct { -@@ -21,9 +22,22 @@ typedef struct { - ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ - } EventLoopBaseParamInfo; - -+static void event_loop_base_instance_init(Object *obj) -+{ -+ EventLoopBase *base = EVENT_LOOP_BASE(obj); -+ -+ base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; -+} -+ - static EventLoopBaseParamInfo aio_max_batch_info = { - "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), - }; -+static EventLoopBaseParamInfo thread_pool_min_info = { -+ "thread-pool-min", offsetof(EventLoopBase, thread_pool_min), -+}; -+static EventLoopBaseParamInfo thread_pool_max_info = { -+ "thread-pool-max", offsetof(EventLoopBase, thread_pool_max), -+}; - - static void event_loop_base_get_param(Object *obj, Visitor *v, - const char *name, void *opaque, Error **errp) -@@ -95,12 +109,21 @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data) - event_loop_base_get_param, - event_loop_base_set_param, - NULL, &aio_max_batch_info); -+ object_class_property_add(klass, "thread-pool-min", "int", -+ event_loop_base_get_param, -+ event_loop_base_set_param, -+ NULL, &thread_pool_min_info); -+ object_class_property_add(klass, "thread-pool-max", "int", -+ event_loop_base_get_param, -+ event_loop_base_set_param, -+ NULL, &thread_pool_max_info); - } - - static const TypeInfo event_loop_base_info = { - .name = TYPE_EVENT_LOOP_BASE, - .parent = TYPE_OBJECT, - .instance_size = sizeof(EventLoopBase), -+ .instance_init = event_loop_base_instance_init, - .class_size = sizeof(EventLoopBaseClass), - .class_init = event_loop_base_class_init, - .abstract = true, -diff --git a/include/block/aio.h b/include/block/aio.h -index 5634173b12..d128558f1d 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -192,6 +192,8 @@ struct AioContext { - QSLIST_HEAD(, Coroutine) scheduled_coroutines; - QEMUBH *co_schedule_bh; - -+ int thread_pool_min; -+ int thread_pool_max; - /* Thread pool for performing work and receiving completion callbacks. - * Has its own locking. - */ -@@ -769,4 +771,12 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, - void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, - Error **errp); - -+/** -+ * aio_context_set_thread_pool_params: -+ * @ctx: the aio context -+ * @min: min number of threads to have readily available in the thread pool -+ * @min: max number of threads the thread pool can contain -+ */ -+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, -+ int64_t max, Error **errp); - #endif -diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h -index 7dd7d730a0..2020bcc92d 100644 ---- a/include/block/thread-pool.h -+++ b/include/block/thread-pool.h -@@ -20,6 +20,8 @@ - - #include "block/block.h" - -+#define THREAD_POOL_MAX_THREADS_DEFAULT 64 -+ - typedef int ThreadPoolFunc(void *opaque); - - typedef struct ThreadPool ThreadPool; -@@ -33,5 +35,6 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, - int coroutine_fn thread_pool_submit_co(ThreadPool *pool, - ThreadPoolFunc *func, void *arg); - void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); -+void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); - - #endif -diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h -index fced4c9fea..2748bf6ae1 100644 ---- a/include/sysemu/event-loop-base.h -+++ b/include/sysemu/event-loop-base.h -@@ -33,5 +33,9 @@ struct EventLoopBase { - - /* AioContext AIO engine parameters */ - int64_t aio_max_batch; -+ -+ /* AioContext thread pool parameters */ -+ int64_t thread_pool_min; -+ int64_t thread_pool_max; - }; - #endif -diff --git a/iothread.c b/iothread.c -index 8fa2f3bfb8..529194a566 100644 ---- a/iothread.c -+++ b/iothread.c -@@ -174,6 +174,9 @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) - aio_context_set_aio_params(iothread->ctx, - iothread->parent_obj.aio_max_batch, - errp); -+ -+ aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min, -+ base->thread_pool_max, errp); - } - - -diff --git a/qapi/qom.json b/qapi/qom.json -index 7d4a2ac1b9..6a653c6636 100644 ---- a/qapi/qom.json -+++ b/qapi/qom.json -@@ -508,10 +508,18 @@ - # 0 means that the engine will use its default. - # (default: 0) - # -+# @thread-pool-min: minimum number of threads reserved in the thread pool -+# (default:0) -+# -+# @thread-pool-max: maximum number of threads the thread pool can contain -+# (default:64) -+# - # Since: 7.1 - ## - { 'struct': 'EventLoopBaseProperties', -- 'data': { '*aio-max-batch': 'int' } } -+ 'data': { '*aio-max-batch': 'int', -+ '*thread-pool-min': 'int', -+ '*thread-pool-max': 'int' } } - - ## - # @IothreadProperties: -diff --git a/util/aio-posix.c b/util/aio-posix.c -index be0182a3c6..731f3826c0 100644 ---- a/util/aio-posix.c -+++ b/util/aio-posix.c -@@ -15,6 +15,7 @@ - - #include "qemu/osdep.h" - #include "block/block.h" -+#include "block/thread-pool.h" - #include "qemu/main-loop.h" - #include "qemu/rcu.h" - #include "qemu/rcu_queue.h" -diff --git a/util/async.c b/util/async.c -index 2ea1172f3e..554ba70cca 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -563,6 +563,9 @@ AioContext *aio_context_new(Error **errp) - - ctx->aio_max_batch = 0; - -+ ctx->thread_pool_min = 0; -+ ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; -+ - return ctx; - fail: - g_source_destroy(&ctx->source); -@@ -696,3 +699,20 @@ void qemu_set_current_aio_context(AioContext *ctx) - assert(!get_my_aiocontext()); - set_my_aiocontext(ctx); - } -+ -+void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, -+ int64_t max, Error **errp) -+{ -+ -+ if (min > max || !max || min > INT_MAX || max > INT_MAX) { -+ error_setg(errp, "bad thread-pool-min/thread-pool-max values"); -+ return; -+ } -+ -+ ctx->thread_pool_min = min; -+ ctx->thread_pool_max = max; -+ -+ if (ctx->thread_pool) { -+ thread_pool_update_params(ctx->thread_pool, ctx); -+ } -+} -diff --git a/util/main-loop.c b/util/main-loop.c -index 5b13f456fa..a0f48186ab 100644 ---- a/util/main-loop.c -+++ b/util/main-loop.c -@@ -30,6 +30,7 @@ - #include "sysemu/replay.h" - #include "qemu/main-loop.h" - #include "block/aio.h" -+#include "block/thread-pool.h" - #include "qemu/error-report.h" - #include "qemu/queue.h" - #include "qemu/compiler.h" -@@ -187,12 +188,20 @@ int qemu_init_main_loop(Error **errp) - - static void main_loop_update_params(EventLoopBase *base, Error **errp) - { -+ ERRP_GUARD(); -+ - if (!qemu_aio_context) { - error_setg(errp, "qemu aio context not ready"); - return; - } - - aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); -+ if (*errp) { -+ return; -+ } -+ -+ aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min, -+ base->thread_pool_max, errp); - } - - MainLoop *mloop; -diff --git a/util/thread-pool.c b/util/thread-pool.c -index d763cea505..196835b4d3 100644 ---- a/util/thread-pool.c -+++ b/util/thread-pool.c -@@ -58,7 +58,6 @@ struct ThreadPool { - QemuMutex lock; - QemuCond worker_stopped; - QemuSemaphore sem; -- int max_threads; - QEMUBH *new_thread_bh; - - /* The following variables are only accessed from one AioContext. */ -@@ -71,8 +70,27 @@ struct ThreadPool { - int new_threads; /* backlog of threads we need to create */ - int pending_threads; /* threads created but not running yet */ - bool stopping; -+ int min_threads; -+ int max_threads; - }; - -+static inline bool back_to_sleep(ThreadPool *pool, int ret) -+{ -+ /* -+ * The semaphore timed out, we should exit the loop except when: -+ * - There is work to do, we raced with the signal. -+ * - The max threads threshold just changed, we raced with the signal. -+ * - The thread pool forces a minimum number of readily available threads. -+ */ -+ if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) || -+ pool->cur_threads > pool->max_threads || -+ pool->cur_threads <= pool->min_threads)) { -+ return true; -+ } -+ -+ return false; -+} -+ - static void *worker_thread(void *opaque) - { - ThreadPool *pool = opaque; -@@ -91,8 +109,9 @@ static void *worker_thread(void *opaque) - ret = qemu_sem_timedwait(&pool->sem, 10000); - qemu_mutex_lock(&pool->lock); - pool->idle_threads--; -- } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list)); -- if (ret == -1 || pool->stopping) { -+ } while (back_to_sleep(pool, ret)); -+ if (ret == -1 || pool->stopping || -+ pool->cur_threads > pool->max_threads) { - break; - } - -@@ -294,6 +313,33 @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg) - thread_pool_submit_aio(pool, func, arg, NULL, NULL); - } - -+void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) -+{ -+ qemu_mutex_lock(&pool->lock); -+ -+ pool->min_threads = ctx->thread_pool_min; -+ pool->max_threads = ctx->thread_pool_max; -+ -+ /* -+ * We either have to: -+ * - Increase the number available of threads until over the min_threads -+ * threshold. -+ * - Decrease the number of available threads until under the max_threads -+ * threshold. -+ * - Do nothing. The current number of threads fall in between the min and -+ * max thresholds. We'll let the pool manage itself. -+ */ -+ for (int i = pool->cur_threads; i < pool->min_threads; i++) { -+ spawn_thread(pool); -+ } -+ -+ for (int i = pool->cur_threads; i > pool->max_threads; i--) { -+ qemu_sem_post(&pool->sem); -+ } -+ -+ qemu_mutex_unlock(&pool->lock); -+} -+ - static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) - { - if (!ctx) { -@@ -306,11 +352,12 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) - qemu_mutex_init(&pool->lock); - qemu_cond_init(&pool->worker_stopped); - qemu_sem_init(&pool->sem, 0); -- pool->max_threads = 64; - pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); - - QLIST_INIT(&pool->head); - QTAILQ_INIT(&pool->request_list); -+ -+ thread_pool_update_params(pool, ctx); - } - - ThreadPool *thread_pool_new(AioContext *ctx) --- -2.31.1 - diff --git a/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch b/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch deleted file mode 100644 index 2104424..0000000 --- a/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch +++ /dev/null @@ -1,233 +0,0 @@ -From b4969662de01848f887a3918e97e516efc213f71 Mon Sep 17 00:00:00 2001 -From: Nicolas Saenz Julienne -Date: Mon, 25 Apr 2022 09:57:22 +0200 -Subject: [PATCH 02/16] util/main-loop: Introduce the main loop into QOM - -RH-Author: Nicolas Saenz Julienne -RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size -RH-Commit: [2/3] a481b77e25ad50d13dcbe26b36c551b18c89bddd -RH-Bugzilla: 2031024 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Stefan Hajnoczi - -'event-loop-base' provides basic property handling for all 'AioContext' -based event loops. So let's define a new 'MainLoopClass' that inherits -from it. This will permit tweaking the main loop's properties through -qapi as well as through the command line using the '-object' keyword[1]. -Only one instance of 'MainLoopClass' might be created at any time. - -'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to -mark 'MainLoop' as non-deletable. - -[1] For example: - -object main-loop,id=main-loop,aio-max-batch= - -Signed-off-by: Nicolas Saenz Julienne -Reviewed-by: Stefan Hajnoczi -Acked-by: Markus Armbruster -Message-id: 20220425075723.20019-3-nsaenzju@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 70ac26b9e5ca8374bb3ef3f30b871726673c9f27) ---- - event-loop-base.c | 13 ++++++++ - include/qemu/main-loop.h | 10 ++++++ - include/sysemu/event-loop-base.h | 1 + - meson.build | 3 +- - qapi/qom.json | 13 ++++++++ - util/main-loop.c | 56 ++++++++++++++++++++++++++++++++ - 6 files changed, 95 insertions(+), 1 deletion(-) - -diff --git a/event-loop-base.c b/event-loop-base.c -index a924c73a7c..e7f99a6ec8 100644 ---- a/event-loop-base.c -+++ b/event-loop-base.c -@@ -73,10 +73,23 @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp) - } - } - -+static bool event_loop_base_can_be_deleted(UserCreatable *uc) -+{ -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); -+ EventLoopBase *backend = EVENT_LOOP_BASE(uc); -+ -+ if (bc->can_be_deleted) { -+ return bc->can_be_deleted(backend); -+ } -+ -+ return true; -+} -+ - static void event_loop_base_class_init(ObjectClass *klass, void *class_data) - { - UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); - ucc->complete = event_loop_base_complete; -+ ucc->can_be_deleted = event_loop_base_can_be_deleted; - - object_class_property_add(klass, "aio-max-batch", "int", - event_loop_base_get_param, -diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h -index d3750c8e76..20c9387654 100644 ---- a/include/qemu/main-loop.h -+++ b/include/qemu/main-loop.h -@@ -26,9 +26,19 @@ - #define QEMU_MAIN_LOOP_H - - #include "block/aio.h" -+#include "qom/object.h" -+#include "sysemu/event-loop-base.h" - - #define SIG_IPI SIGUSR1 - -+#define TYPE_MAIN_LOOP "main-loop" -+OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP) -+ -+struct MainLoop { -+ EventLoopBase parent_obj; -+}; -+typedef struct MainLoop MainLoop; -+ - /** - * qemu_init_main_loop: Set up the process so that it can run the main loop. - * -diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h -index 8e77d8b69f..fced4c9fea 100644 ---- a/include/sysemu/event-loop-base.h -+++ b/include/sysemu/event-loop-base.h -@@ -25,6 +25,7 @@ struct EventLoopBaseClass { - - void (*init)(EventLoopBase *base, Error **errp); - void (*update_params)(EventLoopBase *base, Error **errp); -+ bool (*can_be_deleted)(EventLoopBase *base); - }; - - struct EventLoopBase { -diff --git a/meson.build b/meson.build -index b9c919a55e..5a7c10e639 100644 ---- a/meson.build -+++ b/meson.build -@@ -2832,7 +2832,8 @@ libqemuutil = static_library('qemuutil', - sources: util_ss.sources() + stub_ss.sources() + genh, - dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman]) - qemuutil = declare_dependency(link_with: libqemuutil, -- sources: genh + version_res) -+ sources: genh + version_res, -+ dependencies: [event_loop_base]) - - if have_system or have_user - decodetree = generator(find_program('scripts/decodetree.py'), -diff --git a/qapi/qom.json b/qapi/qom.json -index a2439533c5..7d4a2ac1b9 100644 ---- a/qapi/qom.json -+++ b/qapi/qom.json -@@ -540,6 +540,17 @@ - '*poll-grow': 'int', - '*poll-shrink': 'int' } } - -+## -+# @MainLoopProperties: -+# -+# Properties for the main-loop object. -+# -+# Since: 7.1 -+## -+{ 'struct': 'MainLoopProperties', -+ 'base': 'EventLoopBaseProperties', -+ 'data': {} } -+ - ## - # @MemoryBackendProperties: - # -@@ -830,6 +841,7 @@ - { 'name': 'input-linux', - 'if': 'CONFIG_LINUX' }, - 'iothread', -+ 'main-loop', - { 'name': 'memory-backend-epc', - 'if': 'CONFIG_LINUX' }, - 'memory-backend-file', -@@ -895,6 +907,7 @@ - 'input-linux': { 'type': 'InputLinuxProperties', - 'if': 'CONFIG_LINUX' }, - 'iothread': 'IothreadProperties', -+ 'main-loop': 'MainLoopProperties', - 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', - 'if': 'CONFIG_LINUX' }, - 'memory-backend-file': 'MemoryBackendFileProperties', -diff --git a/util/main-loop.c b/util/main-loop.c -index b7b0ce4ca0..5b13f456fa 100644 ---- a/util/main-loop.c -+++ b/util/main-loop.c -@@ -33,6 +33,7 @@ - #include "qemu/error-report.h" - #include "qemu/queue.h" - #include "qemu/compiler.h" -+#include "qom/object.h" - - #ifndef _WIN32 - #include -@@ -184,6 +185,61 @@ int qemu_init_main_loop(Error **errp) - return 0; - } - -+static void main_loop_update_params(EventLoopBase *base, Error **errp) -+{ -+ if (!qemu_aio_context) { -+ error_setg(errp, "qemu aio context not ready"); -+ return; -+ } -+ -+ aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); -+} -+ -+MainLoop *mloop; -+ -+static void main_loop_init(EventLoopBase *base, Error **errp) -+{ -+ MainLoop *m = MAIN_LOOP(base); -+ -+ if (mloop) { -+ error_setg(errp, "only one main-loop instance allowed"); -+ return; -+ } -+ -+ main_loop_update_params(base, errp); -+ -+ mloop = m; -+ return; -+} -+ -+static bool main_loop_can_be_deleted(EventLoopBase *base) -+{ -+ return false; -+} -+ -+static void main_loop_class_init(ObjectClass *oc, void *class_data) -+{ -+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc); -+ -+ bc->init = main_loop_init; -+ bc->update_params = main_loop_update_params; -+ bc->can_be_deleted = main_loop_can_be_deleted; -+} -+ -+static const TypeInfo main_loop_info = { -+ .name = TYPE_MAIN_LOOP, -+ .parent = TYPE_EVENT_LOOP_BASE, -+ .class_init = main_loop_class_init, -+ .instance_size = sizeof(MainLoop), -+}; -+ -+static void main_loop_register_types(void) -+{ -+ type_register_static(&main_loop_info); -+} -+ -+type_init(main_loop_register_types) -+ - static int max_priority; - - #ifndef _WIN32 --- -2.31.1 - diff --git a/kvm-vdpa-Add-device-migration-blocker.patch b/kvm-vdpa-Add-device-migration-blocker.patch deleted file mode 100644 index 1b83c98..0000000 --- a/kvm-vdpa-Add-device-migration-blocker.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 8e0fdce814af4cfc84dce5e5920da989b1f1a86d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:06:05 +0200 -Subject: [PATCH 26/32] vdpa: Add device migration blocker -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [26/27] 53d94d45b5e5e88f12b95f9b0f243696cfcbd7ce (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit c156d5bf2b142dcc06808ccee06882144f230aec -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:45 2022 +0200 - - vdpa: Add device migration blocker - - Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if - it uses CVQ. - - However, qemu is able to migrate simple devices with no CVQ as long as - they use SVQ. To allow it, add a placeholder error to vhost_vdpa, and - only add to vhost_dev when used. vhost_dev machinery place the migration - blocker if needed. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-vdpa.c | 15 +++++++++++++++ - include/hw/virtio/vhost-vdpa.h | 1 + - 2 files changed, 16 insertions(+) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 49effe5462..e3e5bce4bb 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -20,6 +20,7 @@ - #include "hw/virtio/vhost-shadow-virtqueue.h" - #include "hw/virtio/vhost-vdpa.h" - #include "exec/address-spaces.h" -+#include "migration/blocker.h" - #include "qemu/main-loop.h" - #include "cpu.h" - #include "trace.h" -@@ -1020,6 +1021,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - return true; - } - -+ if (v->migration_blocker) { -+ int r = migrate_add_blocker(v->migration_blocker, &err); -+ if (unlikely(r < 0)) { -+ return false; -+ } -+ } -+ - for (i = 0; i < v->shadow_vqs->len; ++i) { - VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -@@ -1062,6 +1070,10 @@ err: - vhost_svq_stop(svq); - } - -+ if (v->migration_blocker) { -+ migrate_del_blocker(v->migration_blocker); -+ } -+ - return false; - } - -@@ -1081,6 +1093,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) - } - } - -+ if (v->migration_blocker) { -+ migrate_del_blocker(v->migration_blocker); -+ } - return true; - } - -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index 1111d85643..d10a89303e 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -35,6 +35,7 @@ typedef struct vhost_vdpa { - bool shadow_vqs_enabled; - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; -+ Error *migration_blocker; - GPtrArray *shadow_vqs; - const VhostShadowVirtqueueOps *shadow_vq_ops; - void *shadow_vq_ops_opaque; --- -2.31.1 - diff --git a/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch b/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch deleted file mode 100644 index 4dede70..0000000 --- a/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch +++ /dev/null @@ -1,87 +0,0 @@ -From e1f9986cf77e4b2f16aca7b2523bc75bae0c4d3c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:36 +0200 -Subject: [PATCH 21/23] vdpa: Add virtio-net mac address via CVQ at start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [20/21] a7920816d5faf7a0cfbb7c2731a48ddfc456b8d4 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -This is needed so the destination vdpa device see the same state a the -guest set in the source. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit f34cd09b13855657a0d49c5ea6a1e37ba9dc2334) ---- - net/vhost-vdpa.c | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index f09f044ec1..79ebda7de1 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -363,11 +363,51 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, - return vhost_svq_poll(svq); - } - -+static int vhost_vdpa_net_load(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ const struct vhost_vdpa *v = &s->vhost_vdpa; -+ const VirtIONet *n; -+ uint64_t features; -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ if (!v->shadow_vqs_enabled) { -+ return 0; -+ } -+ -+ n = VIRTIO_NET(v->dev->vdev); -+ features = n->parent_obj.guest_features; -+ if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { -+ const struct virtio_net_ctrl_hdr ctrl = { -+ .class = VIRTIO_NET_CTRL_MAC, -+ .cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET, -+ }; -+ char *cursor = s->cvq_cmd_out_buffer; -+ ssize_t dev_written; -+ -+ memcpy(cursor, &ctrl, sizeof(ctrl)); -+ cursor += sizeof(ctrl); -+ memcpy(cursor, n->mac, sizeof(n->mac)); -+ -+ dev_written = vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + sizeof(n->mac), -+ sizeof(virtio_net_ctrl_ack)); -+ if (unlikely(dev_written < 0)) { -+ return dev_written; -+ } -+ -+ return *((virtio_net_ctrl_ack *)s->cvq_cmd_in_buffer) != VIRTIO_NET_OK; -+ } -+ -+ return 0; -+} -+ - static NetClientInfo net_vhost_vdpa_cvq_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, - .start = vhost_vdpa_net_cvq_start, -+ .load = vhost_vdpa_net_load, - .stop = vhost_vdpa_net_cvq_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, --- -2.31.1 - diff --git a/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch b/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch deleted file mode 100644 index 8a7b600..0000000 --- a/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch +++ /dev/null @@ -1,223 +0,0 @@ -From 0b27781f9984c67625c49a516c3e38fbf5fa1b1b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:06:16 +0200 -Subject: [PATCH 27/32] vdpa: Add x-svq to NetdevVhostVDPAOptions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [27/27] bd85496c2a8c1ebf34f908fca2be2ab9852fd0e9 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 1576dbb5bbc49344c606e969ec749be70c0fd94e -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:46 2022 +0200 - - vdpa: Add x-svq to NetdevVhostVDPAOptions - - Finally offering the possibility to enable SVQ from the command line. - - Signed-off-by: Eugenio Pérez - Acked-by: Markus Armbruster - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++-- - qapi/net.json | 9 +++++- - 2 files changed, 77 insertions(+), 4 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 8b76dac966..50672bcd66 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = { - VHOST_INVALID_FEATURE_BIT - }; - -+/** Supported device specific feature bits with SVQ */ -+static const uint64_t vdpa_svq_device_features = -+ BIT_ULL(VIRTIO_NET_F_CSUM) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | -+ BIT_ULL(VIRTIO_NET_F_MTU) | -+ BIT_ULL(VIRTIO_NET_F_MAC) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | -+ BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | -+ BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | -+ BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | -+ BIT_ULL(VIRTIO_NET_F_HOST_ECN) | -+ BIT_ULL(VIRTIO_NET_F_HOST_UFO) | -+ BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | -+ BIT_ULL(VIRTIO_NET_F_STATUS) | -+ BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | -+ BIT_ULL(VIRTIO_F_ANY_LAYOUT) | -+ BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | -+ BIT_ULL(VIRTIO_NET_F_RSC_EXT) | -+ BIT_ULL(VIRTIO_NET_F_STANDBY); -+ - VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -@@ -133,9 +155,13 @@ err_init: - static void vhost_vdpa_cleanup(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ struct vhost_dev *dev = &s->vhost_net->dev; - - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->cvq_cmd_in_buffer); -+ if (dev->vq_index + dev->nvqs == dev->vq_index_end) { -+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -+ } - if (s->vhost_net) { - vhost_net_cleanup(s->vhost_net); - g_free(s->vhost_net); -@@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - int vdpa_device_fd, - int queue_pair_index, - int nvqs, -- bool is_datapath) -+ bool is_datapath, -+ bool svq, -+ VhostIOVATree *iova_tree) - { - NetClientState *nc = NULL; - VhostVDPAState *s; -@@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; -+ s->vhost_vdpa.shadow_vqs_enabled = svq; -+ s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, - vhost_vdpa_net_cvq_cmd_page_len()); -@@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; -+ error_setg(&s->vhost_vdpa.migration_blocker, -+ "Migration disabled: vhost-vdpa uses CVQ."); - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { -@@ -474,6 +506,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - return nc; - } - -+static int vhost_vdpa_get_iova_range(int fd, -+ struct vhost_vdpa_iova_range *iova_range) -+{ -+ int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); -+ -+ return ret < 0 ? -errno : 0; -+} -+ - static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) - { - int ret = ioctl(fd, VHOST_GET_FEATURES, features); -@@ -524,6 +564,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - uint64_t features; - int vdpa_device_fd; - g_autofree NetClientState **ncs = NULL; -+ g_autoptr(VhostIOVATree) iova_tree = NULL; - NetClientState *nc; - int queue_pairs, r, i, has_cvq = 0; - -@@ -551,22 +592,45 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - return queue_pairs; - } - -+ if (opts->x_svq) { -+ struct vhost_vdpa_iova_range iova_range; -+ -+ uint64_t invalid_dev_features = -+ features & ~vdpa_svq_device_features & -+ /* Transport are all accepted at this point */ -+ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, -+ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); -+ -+ if (invalid_dev_features) { -+ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, -+ invalid_dev_features); -+ goto err_svq; -+ } -+ -+ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); -+ iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); -+ } -+ - ncs = g_malloc0(sizeof(*ncs) * queue_pairs); - - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, -- vdpa_device_fd, i, 2, true); -+ vdpa_device_fd, i, 2, true, opts->x_svq, -+ iova_tree); - if (!ncs[i]) - goto err; - } - - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, -- vdpa_device_fd, i, 1, false); -+ vdpa_device_fd, i, 1, false, -+ opts->x_svq, iova_tree); - if (!nc) - goto err; - } - -+ /* iova_tree ownership belongs to last NetClientState */ -+ g_steal_pointer(&iova_tree); - return 0; - - err: -@@ -575,6 +639,8 @@ err: - qemu_del_net_client(ncs[i]); - } - } -+ -+err_svq: - qemu_close(vdpa_device_fd); - - return -1; -diff --git a/qapi/net.json b/qapi/net.json -index b92f3f5fb4..92848e4362 100644 ---- a/qapi/net.json -+++ b/qapi/net.json -@@ -445,12 +445,19 @@ - # @queues: number of queues to be created for multiqueue vhost-vdpa - # (default: 1) - # -+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1) -+# (default: false) -+# -+# Features: -+# @unstable: Member @x-svq is experimental. -+# - # Since: 5.1 - ## - { 'struct': 'NetdevVhostVDPAOptions', - 'data': { - '*vhostdev': 'str', -- '*queues': 'int' } } -+ '*queues': 'int', -+ '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } } - - ## - # @NetClientDriver: --- -2.31.1 - diff --git a/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch b/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch deleted file mode 100644 index acd45e0..0000000 --- a/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch +++ /dev/null @@ -1,65 +0,0 @@ -From df06ce560ddfefde98bef822ec2020382059921f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 10/32] vdpa: Avoid compiler to squash reads to used idx -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [10/27] b28789302d4f64749da26f413763f918161d9b70 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit c381abc37f0aba42ed2e3b41cdace8f8438829e4 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:29 2022 +0200 - - vdpa: Avoid compiler to squash reads to used idx - - In the next patch we will allow busypolling of this value. The compiler - have a running path where shadow_used_idx, last_used_idx, and vring used - idx are not modified within the same thread busypolling. - - This was not an issue before since we always cleared device event - notifier before checking it, and that could act as memory barrier. - However, the busypoll needs something similar to kernel READ_ONCE. - - Let's add it here, sepparated from the polling. - - Signed-off-by: Eugenio Pérez - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 3fbda1e3d4..9c46c3a8fa 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -327,11 +327,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n) - - static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) - { -+ uint16_t *used_idx = &svq->vring.used->idx; - if (svq->last_used_idx != svq->shadow_used_idx) { - return true; - } - -- svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); -+ svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); - - return svq->last_used_idx != svq->shadow_used_idx; - } --- -2.31.1 - diff --git a/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch b/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch deleted file mode 100644 index 243aec8..0000000 --- a/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch +++ /dev/null @@ -1,323 +0,0 @@ -From 881945094c0e4d33614d40959bfc20e395f5a478 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:05:40 +0200 -Subject: [PATCH 24/32] vdpa: Buffer CVQ support on shadow virtqueue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [24/27] 5486f80141a3ad968a32e782bdcdead32f417352 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 2df4dd31e194c94da7d28c02e92449f4a989fca9 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:43 2022 +0200 - - vdpa: Buffer CVQ support on shadow virtqueue - - Introduce the control virtqueue support for vDPA shadow virtqueue. This - is needed for advanced networking features like rx filtering. - - Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid - TOCTOU with the guest's or device's memory every time there is a device - model change. Otherwise, the guest could change the memory content in - the time between qemu and the device read it. - - To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is - implemented. If the virtio-net driver changes MAC the virtio-net device - model will be updated with the new one, and a rx filtering change event - will be raised. - - More cvq commands could be added here straightforwardly but they have - not been tested. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - net/vhost-vdpa.c | 213 +++++++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 205 insertions(+), 8 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 2e3b6b10d8..df42822463 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -33,6 +33,9 @@ typedef struct VhostVDPAState { - NetClientState nc; - struct vhost_vdpa vhost_vdpa; - VHostNetState *vhost_net; -+ -+ /* Control commands shadow buffers */ -+ void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; - bool started; - } VhostVDPAState; - -@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - -+ qemu_vfree(s->cvq_cmd_out_buffer); -+ qemu_vfree(s->cvq_cmd_in_buffer); - if (s->vhost_net) { - vhost_net_cleanup(s->vhost_net); - g_free(s->vhost_net); -@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) -+{ -+ VhostIOVATree *tree = v->iova_tree; -+ DMAMap needle = { -+ /* -+ * No need to specify size or to look for more translations since -+ * this contiguous chunk was allocated by us. -+ */ -+ .translated_addr = (hwaddr)(uintptr_t)addr, -+ }; -+ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); -+ int r; -+ -+ if (unlikely(!map)) { -+ error_report("Cannot locate expected map"); -+ return; -+ } -+ -+ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); -+ if (unlikely(r != 0)) { -+ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); -+ } -+ -+ vhost_iova_tree_remove(tree, map); -+} -+ -+static size_t vhost_vdpa_net_cvq_cmd_len(void) -+{ -+ /* -+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. -+ * In buffer is always 1 byte, so it should fit here -+ */ -+ return sizeof(struct virtio_net_ctrl_hdr) + -+ 2 * sizeof(struct virtio_net_ctrl_mac) + -+ MAC_TABLE_ENTRIES * ETH_ALEN; -+} -+ -+static size_t vhost_vdpa_net_cvq_cmd_page_len(void) -+{ -+ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size); -+} -+ -+/** Copy and map a guest buffer. */ -+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, -+ const struct iovec *out_data, -+ size_t out_num, size_t data_len, void *buf, -+ size_t *written, bool write) -+{ -+ DMAMap map = {}; -+ int r; -+ -+ if (unlikely(!data_len)) { -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", -+ __func__, write ? "in" : "out"); -+ return false; -+ } -+ -+ *written = iov_to_buf(out_data, out_num, 0, buf, data_len); -+ map.translated_addr = (hwaddr)(uintptr_t)buf; -+ map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; -+ map.perm = write ? IOMMU_RW : IOMMU_RO, -+ r = vhost_iova_tree_map_alloc(v->iova_tree, &map); -+ if (unlikely(r != IOVA_OK)) { -+ error_report("Cannot map injected element"); -+ return false; -+ } -+ -+ r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, -+ !write); -+ if (unlikely(r < 0)) { -+ goto dma_map_err; -+ } -+ -+ return true; -+ -+dma_map_err: -+ vhost_iova_tree_remove(v->iova_tree, &map); -+ return false; -+} -+ - /** -- * Forward buffer for the moment. -+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC -+ * -+ * @iov: [0] is the out buffer, [1] is the in one -+ */ -+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, -+ VirtQueueElement *elem, -+ struct iovec *iov) -+{ -+ size_t in_copied; -+ bool ok; -+ -+ iov[0].iov_base = s->cvq_cmd_out_buffer; -+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, -+ vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, -+ &iov[0].iov_len, false); -+ if (unlikely(!ok)) { -+ return false; -+ } -+ -+ iov[1].iov_base = s->cvq_cmd_in_buffer; -+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, -+ sizeof(virtio_net_ctrl_ack), iov[1].iov_base, -+ &in_copied, true); -+ if (unlikely(!ok)) { -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -+ return false; -+ } -+ -+ iov[1].iov_len = sizeof(virtio_net_ctrl_ack); -+ return true; -+} -+ -+/** -+ * Do not forward commands not supported by SVQ. Otherwise, the device could -+ * accept it and qemu would not know how to update the device model. -+ */ -+static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, -+ size_t out_num) -+{ -+ struct virtio_net_ctrl_hdr ctrl; -+ size_t n; -+ -+ n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); -+ if (unlikely(n < sizeof(ctrl))) { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "%s: invalid legnth of out buffer %zu\n", __func__, n); -+ return false; -+ } -+ -+ switch (ctrl.class) { -+ case VIRTIO_NET_CTRL_MAC: -+ switch (ctrl.cmd) { -+ case VIRTIO_NET_CTRL_MAC_ADDR_SET: -+ return true; -+ default: -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n", -+ __func__, ctrl.cmd); -+ }; -+ break; -+ default: -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", -+ __func__, ctrl.class); -+ }; -+ -+ return false; -+} -+ -+/** -+ * Validate and copy control virtqueue commands. -+ * -+ * Following QEMU guidelines, we offer a copy of the buffers to the device to -+ * prevent TOCTOU bugs. - */ - static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - VirtQueueElement *elem, - void *opaque) - { -- unsigned int n = elem->out_num + elem->in_num; -- g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); -+ VhostVDPAState *s = opaque; - size_t in_len, dev_written; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; -- int r; -+ /* out and in buffers sent to the device */ -+ struct iovec dev_buffers[2] = { -+ { .iov_base = s->cvq_cmd_out_buffer }, -+ { .iov_base = s->cvq_cmd_in_buffer }, -+ }; -+ /* in buffer used for device model */ -+ const struct iovec in = { -+ .iov_base = &status, -+ .iov_len = sizeof(status), -+ }; -+ int r = -EINVAL; -+ bool ok; -+ -+ ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); -+ if (unlikely(!ok)) { -+ goto out; -+ } - -- memcpy(dev_buffers, elem->out_sg, elem->out_num); -- memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); -+ ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); -+ if (unlikely(!ok)) { -+ goto out; -+ } - -- r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], -- elem->in_num, elem); -+ r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); - if (unlikely(r != 0)) { - if (unlikely(r == -ENOSPC)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -@@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - dev_written = vhost_svq_poll(svq); - if (unlikely(dev_written < sizeof(status))) { - error_report("Insufficient written data (%zu)", dev_written); -+ goto out; -+ } -+ -+ memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); -+ if (status != VIRTIO_NET_OK) { -+ goto out; -+ } -+ -+ status = VIRTIO_NET_ERR; -+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); -+ if (status != VIRTIO_NET_OK) { -+ error_report("Bad CVQ processing in model"); - } - - out: -@@ -234,6 +418,12 @@ out: - } - vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); - g_free(elem); -+ if (dev_buffers[0].iov_base) { -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); -+ } -+ if (dev_buffers[1].iov_base) { -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); -+ } - return r; - } - -@@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; - if (!is_datapath) { -+ s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, -+ vhost_vdpa_net_cvq_cmd_page_len()); -+ memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); -+ s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size, -+ vhost_vdpa_net_cvq_cmd_page_len()); -+ memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); -+ - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; - } --- -2.31.1 - diff --git a/kvm-vdpa-Delete-CVQ-migration-blocker.patch b/kvm-vdpa-Delete-CVQ-migration-blocker.patch deleted file mode 100644 index 87dfb5a..0000000 --- a/kvm-vdpa-Delete-CVQ-migration-blocker.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 896f7749c72afe988ab28ac6af77b9c53b685c03 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:37 +0200 -Subject: [PATCH 22/23] vdpa: Delete CVQ migration blocker -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [21/21] 286f55177a132a8845c2912fb28cb4add472005a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -We can restore the device state in the destination via CVQ now. Remove -the migration blocker. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit fe2b0cd71cddbec4eaf6e325eaf357a4e72a469d) ---- - hw/virtio/vhost-vdpa.c | 15 --------------- - include/hw/virtio/vhost-vdpa.h | 1 - - net/vhost-vdpa.c | 2 -- - 3 files changed, 18 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 0bea1e1eb9..b61e313953 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1031,13 +1031,6 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - return true; - } - -- if (v->migration_blocker) { -- int r = migrate_add_blocker(v->migration_blocker, &err); -- if (unlikely(r < 0)) { -- return false; -- } -- } -- - for (i = 0; i < v->shadow_vqs->len; ++i) { - VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -@@ -1080,10 +1073,6 @@ err: - vhost_svq_stop(svq); - } - -- if (v->migration_blocker) { -- migrate_del_blocker(v->migration_blocker); -- } -- - return false; - } - -@@ -1099,10 +1088,6 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - vhost_vdpa_svq_unmap_rings(dev, svq); - } -- -- if (v->migration_blocker) { -- migrate_del_blocker(v->migration_blocker); -- } - } - - static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index d10a89303e..1111d85643 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -35,7 +35,6 @@ typedef struct vhost_vdpa { - bool shadow_vqs_enabled; - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; -- Error *migration_blocker; - GPtrArray *shadow_vqs; - const VhostShadowVirtqueueOps *shadow_vq_ops; - void *shadow_vq_ops_opaque; -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 79ebda7de1..f4f16583e4 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -555,8 +555,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; -- error_setg(&s->vhost_vdpa.migration_blocker, -- "Migration disabled: vhost-vdpa uses CVQ."); - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { --- -2.31.1 - diff --git a/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch b/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch deleted file mode 100644 index d6e72ac..0000000 --- a/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 3a5d325fcb2958318262efac31d5fd25fb062523 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 21/32] vdpa: Export vhost_vdpa_dma_map and unmap calls -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [21/27] 97e7a583bbd3c12a0786d53132812ec41702c190 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 463ba1e3b8cf080812895c5f26d95d8d7db2e692 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:40 2022 +0200 - - vdpa: Export vhost_vdpa_dma_map and unmap calls - - Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from - the guest that could set a different state in qemu device model and vdpa - device. - - To do so, it needs to be able to map these new buffers to the device. - - Signed-off-by: Eugenio Pérez - Acked-by: Jason Wang - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-vdpa.c | 7 +++---- - include/hw/virtio/vhost-vdpa.h | 4 ++++ - 2 files changed, 7 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 28df57b12e..14b02fe079 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -71,8 +71,8 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, - return false; - } - --static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -- void *vaddr, bool readonly) -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -+ void *vaddr, bool readonly) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; -@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, - return ret; - } - --static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, -- hwaddr size) -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index a29dbb3f53..7214eb47dc 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -39,4 +39,8 @@ typedef struct vhost_vdpa { - VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; - } VhostVDPA; - -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -+ void *vaddr, bool readonly); -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); -+ - #endif --- -2.31.1 - diff --git a/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch b/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch deleted file mode 100644 index 44e97af..0000000 --- a/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 9a290bd74f983f3a65aa9ec5df2da9aa94bfdecd Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:05:42 +0200 -Subject: [PATCH 25/32] vdpa: Extract get features part from - vhost_vdpa_get_max_queue_pairs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [25/27] 654ad68e10a4df84cced923c64e72d500721ad67 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 8170ab3f43989680491d00f1017f60b25d346114 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:44 2022 +0200 - - vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs - - To know the device features is needed for CVQ SVQ, so SVQ knows if it - can handle all commands or not. Extract from - vhost_vdpa_get_max_queue_pairs so we can reuse it. - - Signed-off-by: Eugenio Pérez - Acked-by: Jason Wang - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - net/vhost-vdpa.c | 30 ++++++++++++++++++++---------- - 1 file changed, 20 insertions(+), 10 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index df42822463..8b76dac966 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -474,20 +474,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - return nc; - } - --static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) -+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) -+{ -+ int ret = ioctl(fd, VHOST_GET_FEATURES, features); -+ if (unlikely(ret < 0)) { -+ error_setg_errno(errp, errno, -+ "Fail to query features from vhost-vDPA device"); -+ } -+ return ret; -+} -+ -+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, -+ int *has_cvq, Error **errp) - { - unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); - g_autofree struct vhost_vdpa_config *config = NULL; - __virtio16 *max_queue_pairs; -- uint64_t features; - int ret; - -- ret = ioctl(fd, VHOST_GET_FEATURES, &features); -- if (ret) { -- error_setg(errp, "Fail to query features from vhost-vDPA device"); -- return ret; -- } -- - if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { - *has_cvq = 1; - } else { -@@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - NetClientState *peer, Error **errp) - { - const NetdevVhostVDPAOptions *opts; -+ uint64_t features; - int vdpa_device_fd; - g_autofree NetClientState **ncs = NULL; - NetClientState *nc; -- int queue_pairs, i, has_cvq = 0; -+ int queue_pairs, r, i, has_cvq = 0; - - assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); - opts = &netdev->u.vhost_vdpa; -@@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - return -errno; - } - -- queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, -+ r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ -+ queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, - &has_cvq, errp); - if (queue_pairs < 0) { - qemu_close(vdpa_device_fd); --- -2.31.1 - diff --git a/kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch b/kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch deleted file mode 100644 index 2d0d55f..0000000 --- a/kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch +++ /dev/null @@ -1,50 +0,0 @@ -From e19adb058502e24580dbc4f6f944cd951ca288ed Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 12 May 2022 19:57:44 +0200 -Subject: [PATCH 08/11] vdpa: Fix bad index calculus at - vhost_vdpa_get_vring_base -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree -RH-Commit: [1/4] 754fb8960684fa7a91bddb18c8df58c3b947ee75 (eperezmartin/qemu-kvm) -RH-Bugzilla: 2116876 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Fixes: 6d0b222666 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ") - -Acked-by: Jason Wang -Signed-off-by: Eugenio Pérez -Message-Id: <20220512175747.142058-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 639036477ef890958415967e753ca2cbb348c16c) ---- - hw/virtio/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e3e5bce4bb..a7dfac530f 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1193,11 +1193,11 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, - struct vhost_vring_state *ring) - { - struct vhost_vdpa *v = dev->opaque; -+ int vdpa_idx = ring->index - dev->vq_index; - int ret; - - if (v->shadow_vqs_enabled) { -- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, -- ring->index); -+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); - - /* - * Setting base as last used idx, so destination will see as available --- -2.31.1 - diff --git a/kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch b/kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch deleted file mode 100644 index 1757d3f..0000000 --- a/kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 71857062b7aea29fc418e107244cf4083cd78cd7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 2 Aug 2022 13:24:46 +0200 -Subject: [PATCH 11/11] vdpa: Fix file descriptor leak on get features error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree -RH-Commit: [4/4] bdfe6ed4539ecf68dc8bc4519755f9d5c096447d (eperezmartin/qemu-kvm) -RH-Bugzilla: 2116876 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -File descriptor vdpa_device_fd is not free in the case of returning -error from vhost_vdpa_get_features. Fixing it by making all errors go to -the same error path. - -Resolves: Coverity CID 1490785 -Fixes: 8170ab3f43 ("vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs") - -Signed-off-by: Eugenio Pérez -Reviewed-by: Laurent Vivier -Reviewed-by: Michael S. Tsirkin -Message-Id: <20220802112447.249436-2-eperezma@redhat.com> -Signed-off-by: Laurent Vivier -(cherry picked from commit aed5da45daf734ddc543c0791e877dac75e16f61) ---- - net/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 50672bcd66..411e71e6c2 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -566,7 +566,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - g_autofree NetClientState **ncs = NULL; - g_autoptr(VhostIOVATree) iova_tree = NULL; - NetClientState *nc; -- int queue_pairs, r, i, has_cvq = 0; -+ int queue_pairs, r, i = 0, has_cvq = 0; - - assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); - opts = &netdev->u.vhost_vdpa; -@@ -582,7 +582,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - - r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); - if (unlikely(r < 0)) { -- return r; -+ goto err; - } - - queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, --- -2.31.1 - diff --git a/kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch b/kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch deleted file mode 100644 index 8125cb2..0000000 --- a/kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 6335431b70dd55c1d52152d726fa462db2e10eb8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 12 May 2022 19:57:45 +0200 -Subject: [PATCH 09/11] vdpa: Fix index calculus at vhost_vdpa_svqs_start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree -RH-Commit: [2/4] 9ce732e6bba426f8e00020ee6ad77f972f3e75b5 (eperezmartin/qemu-kvm) -RH-Bugzilla: 2116876 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -With the introduction of MQ the index of the vq needs to be calculated -with the device model vq_index. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20220512175747.142058-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1c82fdfef8a227518ffecae9d419bcada995c202) ---- - hw/virtio/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index a7dfac530f..f877b354fa 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1032,7 +1032,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - struct vhost_vring_addr addr = { -- .index = i, -+ .index = dev->vq_index + i, - }; - int r; - bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); --- -2.31.1 - diff --git a/kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch b/kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch deleted file mode 100644 index e6f1d39..0000000 --- a/kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch +++ /dev/null @@ -1,61 +0,0 @@ -From b212edc97a471c75f8b8b44ee2a3a2cf82ef14d9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 22 Jul 2022 10:26:30 +0200 -Subject: [PATCH 10/11] vdpa: Fix memory listener deletions of iova tree -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 114: vdpa: Fix memory listener deletions of iova tree -RH-Commit: [3/4] ad71f098b3fa8654962ac7872b5393c37c9825f2 (eperezmartin/qemu-kvm) -RH-Bugzilla: 2116876 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -vhost_vdpa_listener_region_del is always deleting the first iova entry -of the tree, since it's using the needle iova instead of the result's -one. - -This was detected using a vga virtual device in the VM using vdpa SVQ. -It makes some extra memory adding and deleting, so the wrong one was -mapped / unmapped. This was undetected before since all the memory was -mappend and unmapped totally without that device, but other conditions -could trigger it too: - -* mem_region was with .iova = 0, .translated_addr = (correct GPA). -* iova_tree_find_iova returned right result, but does not update - mem_region. -* iova_tree_remove always removed region with .iova = 0. Right iova were - sent to the device. -* Next map will fill the first region with .iova = 0, causing a mapping - with the same iova and device complains, if the next action is a map. -* Next unmap will cause to try to unmap again iova = 0, causing the - device to complain that no region was mapped at iova = 0. - -Fixes: 34e3c94edaef ("vdpa: Add custom IOTLB translations to SVQ") -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit 75a8ce64f6e37513698857fb4284170da163ed06) ---- - hw/virtio/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index f877b354fa..03dc6014b0 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -288,7 +288,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - - result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); - iova = result->iova; -- vhost_iova_tree_remove(v->iova_tree, &mem_region); -+ vhost_iova_tree_remove(v->iova_tree, result); - } - vhost_vdpa_iotlb_batch_begin_once(v); - ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); --- -2.31.1 - diff --git a/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch b/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch deleted file mode 100644 index e45a198..0000000 --- a/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 8e36feb4d3480b7c09d9dcbde18c9db1e8063f18 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:06 +0200 -Subject: [PATCH 08/23] vdpa: Make SVQ vring unmapping return void -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/21] 3366340dc7ae65f83894f5d0da0d1e0f64713751 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Nothing actually reads the return value, but an error in cleaning some -entries could cause device stop to abort, making a restart impossible. -Better ignore explicitely the return value. - -Reported-by: Lei Yang -Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit bb5cf89ef2338ab6be946ede6821c3f61347eb1b) ---- - hw/virtio/vhost-vdpa.c | 32 ++++++++++---------------------- - 1 file changed, 10 insertions(+), 22 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e5c264fb29..8eddf39f2a 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -882,7 +882,7 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - /** - * Unmap a SVQ area in the device - */ --static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, -+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - const DMAMap *needle) - { - const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); -@@ -891,38 +891,33 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - - if (unlikely(!result)) { - error_report("Unable to find SVQ address to unmap"); -- return false; -+ return; - } - - size = ROUND_UP(result->size, qemu_real_host_page_size); - r = vhost_vdpa_dma_unmap(v, result->iova, size); - if (unlikely(r < 0)) { - error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); -- return false; -+ return; - } - - vhost_iova_tree_remove(v->iova_tree, *result); -- return r == 0; - } - --static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, -+static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, - const VhostShadowVirtqueue *svq) - { - DMAMap needle = {}; - struct vhost_vdpa *v = dev->opaque; - struct vhost_vring_addr svq_addr; -- bool ok; - - vhost_svq_get_vring_addr(svq, &svq_addr); - - needle.translated_addr = svq_addr.desc_user_addr; -- ok = vhost_vdpa_svq_unmap_ring(v, &needle); -- if (unlikely(!ok)) { -- return false; -- } -+ vhost_vdpa_svq_unmap_ring(v, &needle); - - needle.translated_addr = svq_addr.used_user_addr; -- return vhost_vdpa_svq_unmap_ring(v, &needle); -+ vhost_vdpa_svq_unmap_ring(v, &needle); - } - - /** -@@ -1093,26 +1088,22 @@ err: - return false; - } - --static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) -+static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - { - struct vhost_vdpa *v = dev->opaque; - - if (!v->shadow_vqs) { -- return true; -+ return; - } - - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -- bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); -- if (unlikely(!ok)) { -- return false; -- } -+ vhost_vdpa_svq_unmap_rings(dev, svq); - } - - if (v->migration_blocker) { - migrate_del_blocker(v->migration_blocker); - } -- return true; - } - - static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) -@@ -1129,10 +1120,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) - } - vhost_vdpa_set_vring_ready(dev); - } else { -- ok = vhost_vdpa_svqs_stop(dev); -- if (unlikely(!ok)) { -- return -1; -- } -+ vhost_vdpa_svqs_stop(dev); - vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); - } - --- -2.31.1 - diff --git a/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch b/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch deleted file mode 100644 index 7cdf05c..0000000 --- a/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch +++ /dev/null @@ -1,251 +0,0 @@ -From 70c72316c26e95cd18b4d46b83e78ba3a148212c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:33 +0200 -Subject: [PATCH 18/23] vdpa: Move command buffers map to start of net device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [17/21] 7a9824fa618f5c2904648b50e3078474cd3987aa (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -As this series will reuse them to restore the device state at the end of -a migration (or a device start), let's allocate only once at the device -start so we don't duplicate their map and unmap. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit d7d73dec14cebcebd8de774424795aeb821236c1) ---- - net/vhost-vdpa.c | 123 ++++++++++++++++++++++------------------------- - 1 file changed, 58 insertions(+), 65 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 03e4cf1abc..17626feb8d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -263,29 +263,20 @@ static size_t vhost_vdpa_net_cvq_cmd_page_len(void) - return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size); - } - --/** Copy and map a guest buffer. */ --static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, -- const struct iovec *out_data, -- size_t out_num, size_t data_len, void *buf, -- size_t *written, bool write) -+/** Map CVQ buffer. */ -+static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, -+ bool write) - { - DMAMap map = {}; - int r; - -- if (unlikely(!data_len)) { -- qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", -- __func__, write ? "in" : "out"); -- return false; -- } -- -- *written = iov_to_buf(out_data, out_num, 0, buf, data_len); - map.translated_addr = (hwaddr)(uintptr_t)buf; -- map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; -+ map.size = size - 1; - map.perm = write ? IOMMU_RW : IOMMU_RO, - r = vhost_iova_tree_map_alloc(v->iova_tree, &map); - if (unlikely(r != IOVA_OK)) { - error_report("Cannot map injected element"); -- return false; -+ return r; - } - - r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, -@@ -294,50 +285,58 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, - goto dma_map_err; - } - -- return true; -+ return 0; - - dma_map_err: - vhost_iova_tree_remove(v->iova_tree, map); -- return false; -+ return r; - } - --/** -- * Copy the guest element into a dedicated buffer suitable to be sent to NIC -- * -- * @iov: [0] is the out buffer, [1] is the in one -- */ --static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, -- VirtQueueElement *elem, -- struct iovec *iov) -+static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { -- size_t in_copied; -- bool ok; -+ VhostVDPAState *s; -+ int r; - -- iov[0].iov_base = s->cvq_cmd_out_buffer; -- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, -- vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, -- &iov[0].iov_len, false); -- if (unlikely(!ok)) { -- return false; -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ s = DO_UPCAST(VhostVDPAState, nc, nc); -+ if (!s->vhost_vdpa.shadow_vqs_enabled) { -+ return 0; - } - -- iov[1].iov_base = s->cvq_cmd_in_buffer; -- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, -- sizeof(virtio_net_ctrl_ack), iov[1].iov_base, -- &in_copied, true); -- if (unlikely(!ok)) { -+ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, -+ vhost_vdpa_net_cvq_cmd_page_len(), false); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ -+ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer, -+ vhost_vdpa_net_cvq_cmd_page_len(), true); -+ if (unlikely(r < 0)) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -- return false; - } - -- iov[1].iov_len = sizeof(virtio_net_ctrl_ack); -- return true; -+ return r; -+} -+ -+static void vhost_vdpa_net_cvq_stop(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ if (s->vhost_vdpa.shadow_vqs_enabled) { -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer); -+ } - } - - static NetClientInfo net_vhost_vdpa_cvq_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, -+ .start = vhost_vdpa_net_cvq_start, -+ .stop = vhost_vdpa_net_cvq_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, - .has_ufo = vhost_vdpa_has_ufo, -@@ -348,19 +347,17 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { - * Do not forward commands not supported by SVQ. Otherwise, the device could - * accept it and qemu would not know how to update the device model. - */ --static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, -- size_t out_num) -+static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) - { - struct virtio_net_ctrl_hdr ctrl; -- size_t n; - -- n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); -- if (unlikely(n < sizeof(ctrl))) { -+ if (unlikely(len < sizeof(ctrl))) { - qemu_log_mask(LOG_GUEST_ERROR, -- "%s: invalid legnth of out buffer %zu\n", __func__, n); -+ "%s: invalid legnth of out buffer %zu\n", __func__, len); - return false; - } - -+ memcpy(&ctrl, out_buf, sizeof(ctrl)); - switch (ctrl.class) { - case VIRTIO_NET_CTRL_MAC: - switch (ctrl.cmd) { -@@ -392,10 +389,14 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - VhostVDPAState *s = opaque; - size_t in_len, dev_written; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; -- /* out and in buffers sent to the device */ -- struct iovec dev_buffers[2] = { -- { .iov_base = s->cvq_cmd_out_buffer }, -- { .iov_base = s->cvq_cmd_in_buffer }, -+ /* Out buffer sent to both the vdpa device and the device model */ -+ struct iovec out = { -+ .iov_base = s->cvq_cmd_out_buffer, -+ }; -+ /* In buffer sent to the device */ -+ const struct iovec dev_in = { -+ .iov_base = s->cvq_cmd_in_buffer, -+ .iov_len = sizeof(virtio_net_ctrl_ack), - }; - /* in buffer used for device model */ - const struct iovec in = { -@@ -405,17 +406,15 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - int r = -EINVAL; - bool ok; - -- ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); -- if (unlikely(!ok)) { -- goto out; -- } -- -- ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); -+ out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, -+ s->cvq_cmd_out_buffer, -+ vhost_vdpa_net_cvq_cmd_len()); -+ ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len); - if (unlikely(!ok)) { - goto out; - } - -- r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); -+ r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); - if (unlikely(r != 0)) { - if (unlikely(r == -ENOSPC)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -@@ -435,13 +434,13 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - goto out; - } - -- memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); -+ memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); - if (status != VIRTIO_NET_OK) { - goto out; - } - - status = VIRTIO_NET_ERR; -- virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); -+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1); - if (status != VIRTIO_NET_OK) { - error_report("Bad CVQ processing in model"); - } -@@ -454,12 +453,6 @@ out: - } - vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); - g_free(elem); -- if (dev_buffers[0].iov_base) { -- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); -- } -- if (dev_buffers[1].iov_base) { -- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); -- } - return r; - } - --- -2.31.1 - diff --git a/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch b/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch deleted file mode 100644 index b23d64f..0000000 --- a/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 51c1e9cf1612727ec4c6e795576ae8fa0c0b2d4c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:05 +0200 -Subject: [PATCH 07/23] vdpa: Remove SVQ vring from iova_tree at shutdown -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/21] f72e67b9c90103151cbf86bff53e8f14b30f0e5b (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Although the device will be reset before usage, the right thing to do is -to clean it. - -Reported-by: Lei Yang -Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit 0c45fa6c420ec3a1dd9ea9c40fa11bd943bb3be9) ---- - hw/virtio/vhost-vdpa.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 39aa70f52d..e5c264fb29 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -896,6 +896,12 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - - size = ROUND_UP(result->size, qemu_real_host_page_size); - r = vhost_vdpa_dma_unmap(v, result->iova, size); -+ if (unlikely(r < 0)) { -+ error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); -+ return false; -+ } -+ -+ vhost_iova_tree_remove(v->iova_tree, *result); - return r == 0; - } - --- -2.31.1 - diff --git a/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch b/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch deleted file mode 100644 index 98697cb..0000000 --- a/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch +++ /dev/null @@ -1,48 +0,0 @@ -From edde0b6a805085255bccc0ccdc3b9b6f81cef37b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:02 +0200 -Subject: [PATCH 03/23] vdpa: Skip the maps not in the iova tree -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/21] 73acd16375a17cdf4c58830386541dd3a1b18bf7 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Next patch will skip the registering of dma maps that the vdpa device -rejects in the iova tree. We need to consider that here or we cause a -SIGSEGV accessing result. - -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit a92ca0ffee5858636432a6059eb2790df1c9c77f) ---- - hw/virtio/vhost-vdpa.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 96334ab5b6..aa7765c6bc 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -287,6 +287,10 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - }; - - result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); -+ if (!result) { -+ /* The memory listener map wasn't mapped */ -+ return; -+ } - iova = result->iova; - vhost_iova_tree_remove(v->iova_tree, result); - } --- -2.31.1 - diff --git a/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch b/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch deleted file mode 100644 index 8398415..0000000 --- a/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 89a67e0ce3e4c7b9f9b2d4cfb9fc5eeebc5643ac Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:08 +0200 -Subject: [PATCH 10/23] vdpa: Use ring hwaddr at vhost_vdpa_svq_unmap_ring -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/21] 4420134d7be60fa8b04dc9a56566524bf8daddd4 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Reduce code duplication. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 5a92452da95b2edfbffdd42ddc2612a7d09a5db0) ---- - hw/virtio/vhost-vdpa.c | 17 ++++++++--------- - 1 file changed, 8 insertions(+), 9 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 8eddf39f2a..0bea1e1eb9 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -882,10 +882,12 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - /** - * Unmap a SVQ area in the device - */ --static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, -- const DMAMap *needle) -+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) - { -- const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); -+ const DMAMap needle = { -+ .translated_addr = addr, -+ }; -+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); - hwaddr size; - int r; - -@@ -907,17 +909,14 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, - const VhostShadowVirtqueue *svq) - { -- DMAMap needle = {}; - struct vhost_vdpa *v = dev->opaque; - struct vhost_vring_addr svq_addr; - - vhost_svq_get_vring_addr(svq, &svq_addr); - -- needle.translated_addr = svq_addr.desc_user_addr; -- vhost_vdpa_svq_unmap_ring(v, &needle); -+ vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); - -- needle.translated_addr = svq_addr.used_user_addr; -- vhost_vdpa_svq_unmap_ring(v, &needle); -+ vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); - } - - /** -@@ -995,7 +994,7 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, - ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); - if (unlikely(!ok)) { - error_prepend(errp, "Cannot create vq device region: "); -- vhost_vdpa_svq_unmap_ring(v, &driver_region); -+ vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); - } - addr->used_user_addr = device_region.iova; - --- -2.31.1 - diff --git a/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch b/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch deleted file mode 100644 index e1da31d..0000000 --- a/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch +++ /dev/null @@ -1,62 +0,0 @@ -From f92b0ef80b4889ae0beb0b2a026ec3892d576d79 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:32 +0200 -Subject: [PATCH 17/23] vdpa: add net_vhost_vdpa_cvq_info NetClientInfo -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [16/21] c80c9fd89e81fc389e7d02e9d764331ab9fc7a0a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Next patches will add a new info callback to restore NIC status through -CVQ. Since only the CVQ vhost device is needed, create it with a new -NetClientInfo. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 9d379453404303069f93f9b8163ae3805bcd8c2e) ---- - net/vhost-vdpa.c | 12 +++++++++++- - 1 file changed, 11 insertions(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index ba65736f83..03e4cf1abc 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -334,6 +334,16 @@ static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, - return true; - } - -+static NetClientInfo net_vhost_vdpa_cvq_info = { -+ .type = NET_CLIENT_DRIVER_VHOST_VDPA, -+ .size = sizeof(VhostVDPAState), -+ .receive = vhost_vdpa_receive, -+ .cleanup = vhost_vdpa_cleanup, -+ .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, -+ .has_ufo = vhost_vdpa_has_ufo, -+ .check_peer_type = vhost_vdpa_check_peer_type, -+}; -+ - /** - * Do not forward commands not supported by SVQ. Otherwise, the device could - * accept it and qemu would not know how to update the device model. -@@ -475,7 +485,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, - name); - } else { -- nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer, -+ nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, - device, name); - } - snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); --- -2.31.1 - diff --git a/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch b/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch deleted file mode 100644 index 8c66f19..0000000 --- a/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 6d16102aca24bab16c846fe6457071f4466b8e35 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:03 +0200 -Subject: [PATCH 04/23] vdpa: do not save failed dma maps in SVQ iova tree -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/21] f9bea39f7fa14c5ef0f85774cbad0ca3b52c4498 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -If a map fails for whatever reason, it must not be saved in the tree. -Otherwise, qemu will try to unmap it in cleanup, leaving to more errors. - -Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 6cc2ec65382fde205511ac00a324995ce6ee8f28) ---- - hw/virtio/vhost-vdpa.c | 20 +++++++++++++------- - 1 file changed, 13 insertions(+), 7 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index aa7765c6bc..cc15b7d8ee 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -174,6 +174,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) - static void vhost_vdpa_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -+ DMAMap mem_region = {}; - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); - hwaddr iova; - Int128 llend, llsize; -@@ -210,13 +211,13 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - - llsize = int128_sub(llend, int128_make64(iova)); - if (v->shadow_vqs_enabled) { -- DMAMap mem_region = { -- .translated_addr = (hwaddr)(uintptr_t)vaddr, -- .size = int128_get64(llsize) - 1, -- .perm = IOMMU_ACCESS_FLAG(true, section->readonly), -- }; -+ int r; - -- int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); -+ mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, -+ mem_region.size = int128_get64(llsize) - 1, -+ mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), -+ -+ r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); - if (unlikely(r != IOVA_OK)) { - error_report("Can't allocate a mapping (%d)", r); - goto fail; -@@ -230,11 +231,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - vaddr, section->readonly); - if (ret) { - error_report("vhost vdpa map fail!"); -- goto fail; -+ goto fail_map; - } - - return; - -+fail_map: -+ if (v->shadow_vqs_enabled) { -+ vhost_iova_tree_remove(v->iova_tree, &mem_region); -+ } -+ - fail: - /* - * On the initfn path, store the first error in the container so we --- -2.31.1 - diff --git a/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch b/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch deleted file mode 100644 index 3cc011f..0000000 --- a/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 56f4bebc591893e590481617da7cd7ecffeb166d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:34 +0200 -Subject: [PATCH 19/23] vdpa: extract vhost_vdpa_net_cvq_add from - vhost_vdpa_net_handle_ctrl_avail -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [18/21] 08ab71dbf050f5c2e97c622d1915f71a56c135b8 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -So we can reuse it to inject state messages. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang --- -v7: -* Remove double free error - -v6: -* Do not assume in buffer sent to the device is sizeof(virtio_net_ctrl_ack) - -v5: -* Do not use an artificial !NULL VirtQueueElement -* Use only out size instead of iovec dev_buffers for these functions. - -Signed-off-by: Jason Wang -(cherry picked from commit d9afb1f0ee4d662ed67d3bc1220b943f7e4cfa6f) ---- - net/vhost-vdpa.c | 59 +++++++++++++++++++++++++++++++----------------- - 1 file changed, 38 insertions(+), 21 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 17626feb8d..f09f044ec1 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -331,6 +331,38 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) - } - } - -+static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, -+ size_t in_len) -+{ -+ /* Buffers for the device */ -+ const struct iovec out = { -+ .iov_base = s->cvq_cmd_out_buffer, -+ .iov_len = out_len, -+ }; -+ const struct iovec in = { -+ .iov_base = s->cvq_cmd_in_buffer, -+ .iov_len = sizeof(virtio_net_ctrl_ack), -+ }; -+ VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); -+ int r; -+ -+ r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); -+ if (unlikely(r != 0)) { -+ if (unlikely(r == -ENOSPC)) { -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -+ __func__); -+ } -+ return r; -+ } -+ -+ /* -+ * We can poll here since we've had BQL from the time we sent the -+ * descriptor. Also, we need to take the answer before SVQ pulls by itself, -+ * when BQL is released -+ */ -+ return vhost_svq_poll(svq); -+} -+ - static NetClientInfo net_vhost_vdpa_cvq_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), -@@ -387,23 +419,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - void *opaque) - { - VhostVDPAState *s = opaque; -- size_t in_len, dev_written; -+ size_t in_len; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; - /* Out buffer sent to both the vdpa device and the device model */ - struct iovec out = { - .iov_base = s->cvq_cmd_out_buffer, - }; -- /* In buffer sent to the device */ -- const struct iovec dev_in = { -- .iov_base = s->cvq_cmd_in_buffer, -- .iov_len = sizeof(virtio_net_ctrl_ack), -- }; - /* in buffer used for device model */ - const struct iovec in = { - .iov_base = &status, - .iov_len = sizeof(status), - }; -- int r = -EINVAL; -+ ssize_t dev_written = -EINVAL; - bool ok; - - out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, -@@ -414,21 +441,11 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - goto out; - } - -- r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); -- if (unlikely(r != 0)) { -- if (unlikely(r == -ENOSPC)) { -- qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -- __func__); -- } -+ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); -+ if (unlikely(dev_written < 0)) { - goto out; - } - -- /* -- * We can poll here since we've had BQL from the time we sent the -- * descriptor. Also, we need to take the answer before SVQ pulls by itself, -- * when BQL is released -- */ -- dev_written = vhost_svq_poll(svq); - if (unlikely(dev_written < sizeof(status))) { - error_report("Insufficient written data (%zu)", dev_written); - goto out; -@@ -436,7 +453,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - - memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); - if (status != VIRTIO_NET_OK) { -- goto out; -+ return VIRTIO_NET_ERR; - } - - status = VIRTIO_NET_ERR; -@@ -453,7 +470,7 @@ out: - } - vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); - g_free(elem); -- return r; -+ return dev_written < 0 ? dev_written : 0; - } - - static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { --- -2.31.1 - diff --git a/kvm-vdpa-manual-forward-CVQ-buffers.patch b/kvm-vdpa-manual-forward-CVQ-buffers.patch deleted file mode 100644 index 61909ff..0000000 --- a/kvm-vdpa-manual-forward-CVQ-buffers.patch +++ /dev/null @@ -1,166 +0,0 @@ -From c33bc0b7f2b5cfa330a6d89d60ee94de129c65c1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 16:05:38 +0200 -Subject: [PATCH 23/32] vdpa: manual forward CVQ buffers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [23/27] ce128d5152be7eebf87e186eb8b58c2ed95aff6d (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit bd907ae4b00ebedad5e586af05ea3d6490318d45 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:42 2022 +0200 - - vdpa: manual forward CVQ buffers - - Do a simple forwarding of CVQ buffers, the same work SVQ could do but - through callbacks. No functional change intended. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-vdpa.c | 3 +- - include/hw/virtio/vhost-vdpa.h | 3 ++ - net/vhost-vdpa.c | 58 ++++++++++++++++++++++++++++++++++ - 3 files changed, 63 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 14b02fe079..49effe5462 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -417,7 +417,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - for (unsigned n = 0; n < hdev->nvqs; ++n) { - g_autoptr(VhostShadowVirtqueue) svq; - -- svq = vhost_svq_new(v->iova_tree, NULL, NULL); -+ svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, -+ v->shadow_vq_ops_opaque); - if (unlikely(!svq)) { - error_setg(errp, "Cannot create svq %u", n); - return -1; -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index 7214eb47dc..1111d85643 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -15,6 +15,7 @@ - #include - - #include "hw/virtio/vhost-iova-tree.h" -+#include "hw/virtio/vhost-shadow-virtqueue.h" - #include "hw/virtio/virtio.h" - #include "standard-headers/linux/vhost_types.h" - -@@ -35,6 +36,8 @@ typedef struct vhost_vdpa { - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; - GPtrArray *shadow_vqs; -+ const VhostShadowVirtqueueOps *shadow_vq_ops; -+ void *shadow_vq_ops_opaque; - struct vhost_dev *dev; - VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; - } VhostVDPA; -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index df1e69ee72..2e3b6b10d8 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -11,11 +11,14 @@ - - #include "qemu/osdep.h" - #include "clients.h" -+#include "hw/virtio/virtio-net.h" - #include "net/vhost_net.h" - #include "net/vhost-vdpa.h" - #include "hw/virtio/vhost-vdpa.h" - #include "qemu/config-file.h" - #include "qemu/error-report.h" -+#include "qemu/log.h" -+#include "qemu/memalign.h" - #include "qemu/option.h" - #include "qapi/error.h" - #include -@@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+/** -+ * Forward buffer for the moment. -+ */ -+static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, -+ VirtQueueElement *elem, -+ void *opaque) -+{ -+ unsigned int n = elem->out_num + elem->in_num; -+ g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); -+ size_t in_len, dev_written; -+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR; -+ int r; -+ -+ memcpy(dev_buffers, elem->out_sg, elem->out_num); -+ memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); -+ -+ r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], -+ elem->in_num, elem); -+ if (unlikely(r != 0)) { -+ if (unlikely(r == -ENOSPC)) { -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -+ __func__); -+ } -+ goto out; -+ } -+ -+ /* -+ * We can poll here since we've had BQL from the time we sent the -+ * descriptor. Also, we need to take the answer before SVQ pulls by itself, -+ * when BQL is released -+ */ -+ dev_written = vhost_svq_poll(svq); -+ if (unlikely(dev_written < sizeof(status))) { -+ error_report("Insufficient written data (%zu)", dev_written); -+ } -+ -+out: -+ in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, -+ sizeof(status)); -+ if (unlikely(in_len < sizeof(status))) { -+ error_report("Bad device CVQ written length"); -+ } -+ vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); -+ g_free(elem); -+ return r; -+} -+ -+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { -+ .avail_handler = vhost_vdpa_net_handle_ctrl_avail, -+}; -+ - static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - const char *device, - const char *name, -@@ -211,6 +265,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; -+ if (!is_datapath) { -+ s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; -+ s->vhost_vdpa.shadow_vq_ops_opaque = s; -+ } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { - qemu_del_net_client(nc); --- -2.31.1 - diff --git a/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch b/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch deleted file mode 100644 index 26083c1..0000000 --- a/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch +++ /dev/null @@ -1,114 +0,0 @@ -From b90a5878355bd549200ed1eff52ea084325bfc8a Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 6 May 2022 15:25:10 +0200 -Subject: [PATCH 5/5] vfio/common: remove spurious tpm-crb-cmd misalignment - warning -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning -RH-Commit: [2/2] 9b73a9aec59cb50d5e3468cc553464bf4a73d0a1 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2037612 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Acked-by: Andrew Jones - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 -Upstream Status: YES -Tested: With TPM-CRB and VFIO - -The CRB command buffer currently is a RAM MemoryRegion and given -its base address alignment, it causes an error report on -vfio_listener_region_add(). This region could have been a RAM device -region, easing the detection of such safe situation but this option -was not well received. So let's add a helper function that uses the -memory region owner type to detect the situation is safe wrt -the assignment. Other device types can be checked here if such kind -of problem occurs again. - -Conflicts in hw/vfio/common.c -We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") - -Signed-off-by: Eric Auger -Reviewed-by: Philippe Mathieu-Daudé -Acked-by: Stefan Berger -Reviewed-by: Cornelia Huck -Link: https://lore.kernel.org/r/20220506132510.1847942-3-eric.auger@redhat.com -Signed-off-by: Alex Williamson -(cherry picked from commit 851d6d1a0ff29a87ec588205842edf6b86d99b5c) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 27 ++++++++++++++++++++++++++- - hw/vfio/trace-events | 1 + - 2 files changed, 27 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 080046e3f5..0fbe0d47af 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -40,6 +40,7 @@ - #include "trace.h" - #include "qapi/error.h" - #include "migration/migration.h" -+#include "sysemu/tpm.h" - - VFIOGroupList vfio_group_list = - QLIST_HEAD_INITIALIZER(vfio_group_list); -@@ -861,6 +862,22 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container, - g_free(vrdl); - } - -+static bool vfio_known_safe_misalignment(MemoryRegionSection *section) -+{ -+ MemoryRegion *mr = section->mr; -+ -+ if (!TPM_IS_CRB(mr->owner)) { -+ return false; -+ } -+ -+ /* this is a known safe misaligned region, just trace for debug purpose */ -+ trace_vfio_known_safe_misalignment(memory_region_name(mr), -+ section->offset_within_address_space, -+ section->offset_within_region, -+ qemu_real_host_page_size); -+ return true; -+} -+ - static void vfio_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -@@ -884,7 +901,15 @@ static void vfio_listener_region_add(MemoryListener *listener, - if (unlikely((section->offset_within_address_space & - ~qemu_real_host_page_mask) != - (section->offset_within_region & ~qemu_real_host_page_mask))) { -- error_report("%s received unaligned region", __func__); -+ if (!vfio_known_safe_misalignment(section)) { -+ error_report("%s received unaligned region %s iova=0x%"PRIx64 -+ " offset_within_region=0x%"PRIx64 -+ " qemu_real_host_page_size=0x%"PRIxPTR, -+ __func__, memory_region_name(section->mr), -+ section->offset_within_address_space, -+ section->offset_within_region, -+ qemu_real_host_page_size); -+ } - return; - } - -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 0ef1b5f4a6..582882db91 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -100,6 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add - vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" - vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 - vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" -+vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" - vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" - vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 - vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 --- -2.31.1 - diff --git a/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch b/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch deleted file mode 100644 index 7e644c5..0000000 --- a/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 3de8fb9f3dba18d04efa10b70bcec641035effc5 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 24 May 2022 05:14:05 -0400 -Subject: [PATCH 16/16] vfio/common: remove spurious warning on - vfio_listener_region_del - -RH-Author: Eric Auger -RH-MergeRequest: 101: vfio/common: remove spurious warning on vfio_listener_region_del -RH-Commit: [1/1] dac688b8a981ebb964fea79ea198c329b9cdb551 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2086262 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cornelia Huck -RH-Acked-by: Alex Williamson - - Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2086262 - Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45876133 - Upstream Status: YES - Tested: With TPM-CRB and VFIO - -851d6d1a0f ("vfio/common: remove spurious tpm-crb-cmd misalignment -warning") removed the warning on vfio_listener_region_add() path. - -However the same warning also hits on region_del path. Let's remove -it and reword the dynamic trace as this can be called on both -map and unmap path. - -Contextual Conflict in hw/vfio/common.c -We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") - -Signed-off-by: Eric Auger -Reviewed-by: Cornelia Huck -Link: https://lore.kernel.org/r/20220524091405.416256-1-eric.auger@redhat.com -Fixes: 851d6d1a0ff2 ("vfio/common: remove spurious tpm-crb-cmd misalignment warning") -Signed-off-by: Alex Williamson -(cherry picked from commit ec6600be0dc16982181c7ad80d94c143c0807dd2) -Signed-off-by: Eric Auger ---- - hw/vfio/common.c | 10 +++++++++- - hw/vfio/trace-events | 2 +- - 2 files changed, 10 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 0fbe0d47af..637981f9a1 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -1145,7 +1145,15 @@ static void vfio_listener_region_del(MemoryListener *listener, - if (unlikely((section->offset_within_address_space & - ~qemu_real_host_page_mask) != - (section->offset_within_region & ~qemu_real_host_page_mask))) { -- error_report("%s received unaligned region", __func__); -+ if (!vfio_known_safe_misalignment(section)) { -+ error_report("%s received unaligned region %s iova=0x%"PRIx64 -+ " offset_within_region=0x%"PRIx64 -+ " qemu_real_host_page_size=0x%"PRIxPTR, -+ __func__, memory_region_name(section->mr), -+ section->offset_within_address_space, -+ section->offset_within_region, -+ qemu_real_host_page_size); -+ } - return; - } - -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 582882db91..73dffe9e00 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -100,7 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add - vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" - vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 - vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" --vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" -+vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR - vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" - vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 - vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 --- -2.31.1 - diff --git a/kvm-vhost-Add-SVQDescState.patch b/kvm-vhost-Add-SVQDescState.patch deleted file mode 100644 index b1ea4bb..0000000 --- a/kvm-vhost-Add-SVQDescState.patch +++ /dev/null @@ -1,135 +0,0 @@ -From 14200f493243f73152ea4a4b97274f0ec4fb36fa Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 15/32] vhost: Add SVQDescState -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [15/27] 2e2866f22e37cace8598ff44dfcdc07fcc915d6d (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 9e87868fcaf5785c8e1490c290505fa32305ff91 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:34 2022 +0200 - - vhost: Add SVQDescState - - This will allow SVQ to add context to the different queue elements. - - This patch only store the actual element, no functional change intended. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++-------- - hw/virtio/vhost-shadow-virtqueue.h | 8 ++++++-- - 2 files changed, 14 insertions(+), 10 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 3cec03d709..a08e3d4025 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -256,7 +256,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - return -EINVAL; - } - -- svq->ring_id_maps[qemu_head] = elem; -+ svq->desc_state[qemu_head].elem = elem; - vhost_svq_kick(svq); - return 0; - } -@@ -411,21 +411,21 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- if (unlikely(!svq->ring_id_maps[used_elem.id])) { -+ if (unlikely(!svq->desc_state[used_elem.id].elem)) { - qemu_log_mask(LOG_GUEST_ERROR, - "Device %s says index %u is used, but it was not available", - svq->vdev->name, used_elem.id); - return NULL; - } - -- num = svq->ring_id_maps[used_elem.id]->in_num + -- svq->ring_id_maps[used_elem.id]->out_num; -+ num = svq->desc_state[used_elem.id].elem->in_num + -+ svq->desc_state[used_elem.id].elem->out_num; - last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); - svq->desc_next[last_used_chain] = svq->free_head; - svq->free_head = used_elem.id; - - *len = used_elem.len; -- return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); -+ return g_steal_pointer(&svq->desc_state[used_elem.id].elem); - } - - static void vhost_svq_flush(VhostShadowVirtqueue *svq, -@@ -595,7 +595,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - memset(svq->vring.desc, 0, driver_size); - svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); - memset(svq->vring.used, 0, device_size); -- svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); -+ svq->desc_state = g_new0(SVQDescState, svq->vring.num); - svq->desc_next = g_new0(uint16_t, svq->vring.num); - for (unsigned i = 0; i < svq->vring.num - 1; i++) { - svq->desc_next[i] = cpu_to_le16(i + 1); -@@ -620,7 +620,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - - for (unsigned i = 0; i < svq->vring.num; ++i) { - g_autofree VirtQueueElement *elem = NULL; -- elem = g_steal_pointer(&svq->ring_id_maps[i]); -+ elem = g_steal_pointer(&svq->desc_state[i].elem); - if (elem) { - virtqueue_detach_element(svq->vq, elem, 0); - } -@@ -632,7 +632,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - } - svq->vq = NULL; - g_free(svq->desc_next); -- g_free(svq->ring_id_maps); -+ g_free(svq->desc_state); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); - } -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index c132c994e9..d646c35054 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -15,6 +15,10 @@ - #include "standard-headers/linux/vhost_types.h" - #include "hw/virtio/vhost-iova-tree.h" - -+typedef struct SVQDescState { -+ VirtQueueElement *elem; -+} SVQDescState; -+ - /* Shadow virtqueue to relay notifications */ - typedef struct VhostShadowVirtqueue { - /* Shadow vring */ -@@ -47,8 +51,8 @@ typedef struct VhostShadowVirtqueue { - /* IOVA mapping */ - VhostIOVATree *iova_tree; - -- /* Map for use the guest's descriptors */ -- VirtQueueElement **ring_id_maps; -+ /* SVQ vring descriptors state */ -+ SVQDescState *desc_state; - - /* Next VirtQueue element that guest made available */ - VirtQueueElement *next_guest_avail_elem; --- -2.31.1 - diff --git a/kvm-vhost-Add-svq-avail_handler-callback.patch b/kvm-vhost-Add-svq-avail_handler-callback.patch deleted file mode 100644 index a8b585d..0000000 --- a/kvm-vhost-Add-svq-avail_handler-callback.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 433106c286a1961737300ebaece6f10b2747e7d8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 20/32] vhost: Add svq avail_handler callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [20/27] d228eb89d204f8be623bc870503bbf0078dfc9ae (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit e966c0b781aebabd2c0f5eef91678f08ce1d068c -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:39 2022 +0200 - - vhost: Add svq avail_handler callback - - This allows external handlers to be aware of new buffers that the guest - places in the virtqueue. - - When this callback is defined the ownership of the guest's virtqueue - element is transferred to the callback. This means that if the user - wants to forward the descriptor it needs to manually inject it. The - callback is also free to process the command by itself and use the - element with svq_push. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 14 ++++++++++++-- - hw/virtio/vhost-shadow-virtqueue.h | 31 +++++++++++++++++++++++++++++- - hw/virtio/vhost-vdpa.c | 3 ++- - 3 files changed, 44 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 95d0d7a7ee..e53aac45f6 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -306,7 +306,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - break; - } - -- r = vhost_svq_add_element(svq, elem); -+ if (svq->ops) { -+ r = svq->ops->avail_handler(svq, elem, svq->ops_opaque); -+ } else { -+ r = vhost_svq_add_element(svq, elem); -+ } - if (unlikely(r != 0)) { - if (r == -ENOSPC) { - /* -@@ -685,12 +689,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - * shadow methods and file descriptors. - * - * @iova_tree: Tree to perform descriptors translations -+ * @ops: SVQ owner callbacks -+ * @ops_opaque: ops opaque pointer - * - * Returns the new virtqueue or NULL. - * - * In case of error, reason is reported through error_report. - */ --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) -+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -+ const VhostShadowVirtqueueOps *ops, -+ void *ops_opaque) - { - g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); - int r; -@@ -712,6 +720,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); - event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->iova_tree = iova_tree; -+ svq->ops = ops; -+ svq->ops_opaque = ops_opaque; - return g_steal_pointer(&svq); - - err_init_hdev_call: -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index cf442f7dea..d04c34a589 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -25,6 +25,27 @@ typedef struct SVQDescState { - unsigned int ndescs; - } SVQDescState; - -+typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; -+ -+/** -+ * Callback to handle an avail buffer. -+ * -+ * @svq: Shadow virtqueue -+ * @elem: Element placed in the queue by the guest -+ * @vq_callback_opaque: Opaque -+ * -+ * Returns 0 if the vq is running as expected. -+ * -+ * Note that ownership of elem is transferred to the callback. -+ */ -+typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq, -+ VirtQueueElement *elem, -+ void *vq_callback_opaque); -+ -+typedef struct VhostShadowVirtqueueOps { -+ VirtQueueAvailCallback avail_handler; -+} VhostShadowVirtqueueOps; -+ - /* Shadow virtqueue to relay notifications */ - typedef struct VhostShadowVirtqueue { - /* Shadow vring */ -@@ -69,6 +90,12 @@ typedef struct VhostShadowVirtqueue { - */ - uint16_t *desc_next; - -+ /* Caller callbacks */ -+ const VhostShadowVirtqueueOps *ops; -+ -+ /* Caller callbacks opaque */ -+ void *ops_opaque; -+ - /* Next head to expose to the device */ - uint16_t shadow_avail_idx; - -@@ -102,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - VirtQueue *vq); - void vhost_svq_stop(VhostShadowVirtqueue *svq); - --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); -+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -+ const VhostShadowVirtqueueOps *ops, -+ void *ops_opaque); - - void vhost_svq_free(gpointer vq); - G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 33dcaa135e..28df57b12e 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -416,8 +416,9 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - - shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); - for (unsigned n = 0; n < hdev->nvqs; ++n) { -- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); -+ g_autoptr(VhostShadowVirtqueue) svq; - -+ svq = vhost_svq_new(v->iova_tree, NULL, NULL); - if (unlikely(!svq)) { - error_setg(errp, "Cannot create svq %u", n); - return -1; --- -2.31.1 - diff --git a/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch b/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch deleted file mode 100644 index 9b6155b..0000000 --- a/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 6cde15c70c86819033337771eb522e94e3ea9e34 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:07 +0200 -Subject: [PATCH 09/23] vhost: Always store new kick fd on - vhost_svq_set_svq_kick_fd -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/21] a09b8851c39d7cea67414560f6d322e988b9d59a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -We can unbind twice a file descriptor if we call twice -vhost_svq_set_svq_kick_fd because of this. Since it comes from vhost and -not from SVQ, that file descriptor could be a different thing that -guest's vhost notifier. - -Likewise, it can happens the same if a guest start and stop the device -multiple times. - -Reported-by: Lei Yang -Fixes: dff4426fa6 ("vhost: Add Shadow VirtQueue kick forwarding capabilities") -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 6867f29c1425add7e0e8d1d8d58cc0ffbb8df0e4) ---- - hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index e53aac45f6..f420311b89 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -602,13 +602,13 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) - event_notifier_set_handler(svq_kick, NULL); - } - -+ event_notifier_init_fd(svq_kick, svq_kick_fd); - /* - * event_notifier_set_handler already checks for guest's notifications if - * they arrive at the new file descriptor in the switch, so there is no - * need to explicitly check for them. - */ - if (poll_start) { -- event_notifier_init_fd(svq_kick, svq_kick_fd); - event_notifier_set(svq_kick); - event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); - } -@@ -655,7 +655,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - */ - void vhost_svq_stop(VhostShadowVirtqueue *svq) - { -- event_notifier_set_handler(&svq->svq_kick, NULL); -+ vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND); - g_autofree VirtQueueElement *next_avail_elem = NULL; - - if (!svq->vq) { --- -2.31.1 - diff --git a/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch b/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch deleted file mode 100644 index 9b09d42..0000000 --- a/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 893dffb820973361bcef33612a6b924554a856c1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 13/32] vhost: Check for queue full at vhost_svq_add -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [13/27] d4bd8299fb7733a1e190618dfc92b4b53b7bbeb3 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit f20b70eb5a68cfd8fef74a13ccdd494ef1cb0221 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:32 2022 +0200 - - vhost: Check for queue full at vhost_svq_add - - The series need to expose vhost_svq_add with full functionality, - including checking for full queue. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 59 +++++++++++++++++------------- - 1 file changed, 33 insertions(+), 26 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index e3fc3c2658..1d2bab287b 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -233,21 +233,29 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) - * Add an element to a SVQ. - * - * The caller must check that there is enough slots for the new element. It -- * takes ownership of the element: In case of failure, it is free and the SVQ -- * is considered broken. -+ * takes ownership of the element: In case of failure not ENOSPC, it is free. -+ * -+ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full - */ --static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) -+static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - { - unsigned qemu_head; -- bool ok = vhost_svq_add_split(svq, elem, &qemu_head); -+ unsigned ndescs = elem->in_num + elem->out_num; -+ bool ok; -+ -+ if (unlikely(ndescs > vhost_svq_available_slots(svq))) { -+ return -ENOSPC; -+ } -+ -+ ok = vhost_svq_add_split(svq, elem, &qemu_head); - if (unlikely(!ok)) { - g_free(elem); -- return false; -+ return -EINVAL; - } - - svq->ring_id_maps[qemu_head] = elem; - vhost_svq_kick(svq); -- return true; -+ return 0; - } - - /** -@@ -274,7 +282,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - - while (true) { - VirtQueueElement *elem; -- bool ok; -+ int r; - - if (svq->next_guest_avail_elem) { - elem = g_steal_pointer(&svq->next_guest_avail_elem); -@@ -286,25 +294,24 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - break; - } - -- if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { -- /* -- * This condition is possible since a contiguous buffer in GPA -- * does not imply a contiguous buffer in qemu's VA -- * scatter-gather segments. If that happens, the buffer exposed -- * to the device needs to be a chain of descriptors at this -- * moment. -- * -- * SVQ cannot hold more available buffers if we are here: -- * queue the current guest descriptor and ignore further kicks -- * until some elements are used. -- */ -- svq->next_guest_avail_elem = elem; -- return; -- } -- -- ok = vhost_svq_add(svq, elem); -- if (unlikely(!ok)) { -- /* VQ is broken, just return and ignore any other kicks */ -+ r = vhost_svq_add(svq, elem); -+ if (unlikely(r != 0)) { -+ if (r == -ENOSPC) { -+ /* -+ * This condition is possible since a contiguous buffer in -+ * GPA does not imply a contiguous buffer in qemu's VA -+ * scatter-gather segments. If that happens, the buffer -+ * exposed to the device needs to be a chain of descriptors -+ * at this moment. -+ * -+ * SVQ cannot hold more available buffers if we are here: -+ * queue the current guest descriptor and ignore kicks -+ * until some elements are used. -+ */ -+ svq->next_guest_avail_elem = elem; -+ } -+ -+ /* VQ is full or broken, just return and ignore kicks */ - return; - } - } --- -2.31.1 - diff --git a/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch b/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch deleted file mode 100644 index 6755aad..0000000 --- a/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 5c8de23e185a1a1f0b19eac3c9fa03411c9f545c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 14/32] vhost: Decouple vhost_svq_add from VirtQueueElement -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [14/27] 463087dd316adc91b9c7a4e6634c6fc1745c1849 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 1f46ae65d85f677b660bda46685dd3e94885a7cb -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:33 2022 +0200 - - vhost: Decouple vhost_svq_add from VirtQueueElement - - VirtQueueElement comes from the guest, but we're heading SVQ to be able - to modify the element presented to the device without the guest's - knowledge. - - To do so, make SVQ accept sg buffers directly, instead of using - VirtQueueElement. - - Add vhost_svq_add_element to maintain element convenience. - - Signed-off-by: Eugenio Pérez - Acked-by: Jason Wang - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 33 ++++++++++++++++++++---------- - 1 file changed, 22 insertions(+), 11 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 1d2bab287b..3cec03d709 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -172,30 +172,31 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, - } - - static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, -- VirtQueueElement *elem, unsigned *head) -+ const struct iovec *out_sg, size_t out_num, -+ const struct iovec *in_sg, size_t in_num, -+ unsigned *head) - { - unsigned avail_idx; - vring_avail_t *avail = svq->vring.avail; - bool ok; -- g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); -+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); - - *head = svq->free_head; - - /* We need some descriptors here */ -- if (unlikely(!elem->out_num && !elem->in_num)) { -+ if (unlikely(!out_num && !in_num)) { - qemu_log_mask(LOG_GUEST_ERROR, - "Guest provided element with no descriptors"); - return false; - } - -- ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, -- elem->in_num > 0, false); -+ ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, -+ false); - if (unlikely(!ok)) { - return false; - } - -- ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, -- true); -+ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); - if (unlikely(!ok)) { - return false; - } -@@ -237,17 +238,19 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) - * - * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full - */ --static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) -+static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -+ size_t out_num, const struct iovec *in_sg, -+ size_t in_num, VirtQueueElement *elem) - { - unsigned qemu_head; -- unsigned ndescs = elem->in_num + elem->out_num; -+ unsigned ndescs = in_num + out_num; - bool ok; - - if (unlikely(ndescs > vhost_svq_available_slots(svq))) { - return -ENOSPC; - } - -- ok = vhost_svq_add_split(svq, elem, &qemu_head); -+ ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); - if (unlikely(!ok)) { - g_free(elem); - return -EINVAL; -@@ -258,6 +261,14 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - return 0; - } - -+/* Convenience wrapper to add a guest's element to SVQ */ -+static int vhost_svq_add_element(VhostShadowVirtqueue *svq, -+ VirtQueueElement *elem) -+{ -+ return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, -+ elem->in_num, elem); -+} -+ - /** - * Forward available buffers. - * -@@ -294,7 +305,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - break; - } - -- r = vhost_svq_add(svq, elem); -+ r = vhost_svq_add_element(svq, elem); - if (unlikely(r != 0)) { - if (r == -ENOSPC) { - /* --- -2.31.1 - diff --git a/kvm-vhost-Delete-useless-read-memory-barrier.patch b/kvm-vhost-Delete-useless-read-memory-barrier.patch deleted file mode 100644 index f5aad51..0000000 --- a/kvm-vhost-Delete-useless-read-memory-barrier.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 773d1bb4e9ea9ca704372e52569955937f91f15c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:28 +0200 -Subject: [PATCH 13/23] vhost: Delete useless read memory barrier -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/21] 0e238fe934b1fc2c7e10b6f693468bc25ea3243f (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -As discussed in previous series [1], this memory barrier is useless with -the atomic read of used idx at vhost_svq_more_used. Deleting it. - -[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg02616.html - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit cdfb1612ba0f9b76367c96ce26ba94fedc7a0e61) ---- - hw/virtio/vhost-shadow-virtqueue.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 7792f3db1d..d36afbc547 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -509,9 +509,6 @@ size_t vhost_svq_poll(VhostShadowVirtqueue *svq) - if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { - return 0; - } -- -- /* Make sure we read new used_idx */ -- smp_rmb(); - } while (true); - } - --- -2.31.1 - diff --git a/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch b/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch deleted file mode 100644 index 81ed89e..0000000 --- a/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 2f134d800a7ac521a637a0da2116b2603b12c8c0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:29 +0200 -Subject: [PATCH 14/23] vhost: Do not depend on !NULL VirtQueueElement on - vhost_svq_flush -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/21] 93ec7baa2a29031db25d86b7dc1a949388623370 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Since QEMU will be able to inject new elements on CVQ to restore the -state, we need not to depend on a VirtQueueElement to know if a new -element has been used by the device or not. Instead of check that, check -if there are new elements only using used idx on vhost_svq_flush. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 7599f71c11c08b90f173c35ded1aaa1fdca86f1b) ---- - hw/virtio/vhost-shadow-virtqueue.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index d36afbc547..c0e3c92e96 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -499,17 +499,20 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, - size_t vhost_svq_poll(VhostShadowVirtqueue *svq) - { - int64_t start_us = g_get_monotonic_time(); -+ uint32_t len; -+ - do { -- uint32_t len; -- VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); -- if (elem) { -- return len; -+ if (vhost_svq_more_used(svq)) { -+ break; - } - - if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { - return 0; - } - } while (true); -+ -+ vhost_svq_get_buf(svq, &len); -+ return len; - } - - /** --- -2.31.1 - diff --git a/kvm-vhost-Expose-vhost_svq_add.patch b/kvm-vhost-Expose-vhost_svq_add.patch deleted file mode 100644 index 70dc774..0000000 --- a/kvm-vhost-Expose-vhost_svq_add.patch +++ /dev/null @@ -1,73 +0,0 @@ -From cefd6583a8483c7a80f9cde8f7ad4705983af9e7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 18/32] vhost: Expose vhost_svq_add -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [18/27] bfb44f597d350336113783bcc9b3c9d9d32ff8c0 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit d0291f3f284d3bc220cdb13b0d8ac8a44eb5fd4c -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:37 2022 +0200 - - vhost: Expose vhost_svq_add - - This allows external parts of SVQ to forward custom buffers to the - device. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 6 +++--- - hw/virtio/vhost-shadow-virtqueue.h | 3 +++ - 2 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 1ce52d5b4a..cb879e7b88 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -238,9 +238,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) - * - * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full - */ --static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -- size_t out_num, const struct iovec *in_sg, -- size_t in_num, VirtQueueElement *elem) -+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -+ size_t out_num, const struct iovec *in_sg, size_t in_num, -+ VirtQueueElement *elem) - { - unsigned qemu_head; - unsigned ndescs = in_num + out_num; -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index d9fc1f1799..dd78f4bec2 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -86,6 +86,9 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp); - - void vhost_svq_push_elem(VhostShadowVirtqueue *svq, - const VirtQueueElement *elem, uint32_t len); -+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -+ size_t out_num, const struct iovec *in_sg, size_t in_num, -+ VirtQueueElement *elem); - - void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); - void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); --- -2.31.1 - diff --git a/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch b/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch deleted file mode 100644 index f149c05..0000000 --- a/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 793d6d56190397624efdcaf6e0112bd12e39c05d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:25:01 +0200 -Subject: [PATCH 02/32] vhost: Fix device's used descriptor dequeue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [2/27] b92803a0681c94c65d243dd07424522387594760 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 81abfa5724c9a6502d7a1d3a67c55f2a303a1170 -Author: Eugenio Pérez -Date: Thu May 12 19:57:43 2022 +0200 - - vhost: Fix device's used descriptor dequeue - - Only the first one of them were properly enqueued back. - - Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") - - Signed-off-by: Eugenio Pérez - Message-Id: <20220512175747.142058-3-eperezma@redhat.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 17 +++++++++++++++-- - 1 file changed, 15 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 3155801f50..31fc50907d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -334,12 +334,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) - svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); - } - -+static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, -+ uint16_t num, uint16_t i) -+{ -+ for (uint16_t j = 0; j < (num - 1); ++j) { -+ i = le16_to_cpu(svq->desc_next[i]); -+ } -+ -+ return i; -+} -+ - static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - uint32_t *len) - { - const vring_used_t *used = svq->vring.used; - vring_used_elem_t used_elem; -- uint16_t last_used; -+ uint16_t last_used, last_used_chain, num; - - if (!vhost_svq_more_used(svq)) { - return NULL; -@@ -365,7 +375,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- svq->desc_next[used_elem.id] = svq->free_head; -+ num = svq->ring_id_maps[used_elem.id]->in_num + -+ svq->ring_id_maps[used_elem.id]->out_num; -+ last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); -+ svq->desc_next[last_used_chain] = svq->free_head; - svq->free_head = used_elem.id; - - *len = used_elem.len; --- -2.31.1 - diff --git a/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch b/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch deleted file mode 100644 index 51eb700..0000000 --- a/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch +++ /dev/null @@ -1,68 +0,0 @@ -From aa99cf129923e0203c0caeb3b4e94a0eb973746f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:36:38 +0200 -Subject: [PATCH 04/32] vhost: Fix element in vhost_svq_add failure -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [4/27] 96689c99a47dd49591c0d126cb1fbb975b2f79b4 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 5181db132b587754dda3a520eec923b87a65bbb7 -Author: Eugenio Pérez -Date: Thu May 12 19:57:47 2022 +0200 - - vhost: Fix element in vhost_svq_add failure - - Coverity rightly reports that is not free in that case. - - Fixes: Coverity CID 1487559 - Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") - - Signed-off-by: Eugenio Pérez - Message-Id: <20220512175747.142058-7-eperezma@redhat.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 31fc50907d..06d0bb39d9 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -199,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, - return true; - } - -+/** -+ * Add an element to a SVQ. -+ * -+ * The caller must check that there is enough slots for the new element. It -+ * takes ownership of the element: In case of failure, it is free and the SVQ -+ * is considered broken. -+ */ - static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - { - unsigned qemu_head; - bool ok = vhost_svq_add_split(svq, elem, &qemu_head); - if (unlikely(!ok)) { -+ g_free(elem); - return false; - } - --- -2.31.1 - diff --git a/kvm-vhost-Get-vring-base-from-vq-not-svq.patch b/kvm-vhost-Get-vring-base-from-vq-not-svq.patch deleted file mode 100644 index 1c8e586..0000000 --- a/kvm-vhost-Get-vring-base-from-vq-not-svq.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 3f2ba7cce6b272a8b5c8953e8923e799e4aa7b88 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Mon, 18 Jul 2022 14:05:45 +0200 -Subject: [PATCH 02/23] vhost: Get vring base from vq, not svq -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/21] e7e0294bbc98f69ccdbc4af4715857e77b017f80 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: Merged - -The SVQ vring used idx usually match with the guest visible one, as long -as all the guest buffers (GPA) maps to exactly one buffer within qemu's -VA. However, as we can see in virtqueue_map_desc, a single guest buffer -could map to many buffers in SVQ vring. - -Also, its also a mistake to rewind them at the source of migration. -Since VirtQueue is able to migrate the inflight descriptors, its -responsability of the destination to perform the rewind just in case it -cannot report the inflight descriptors to the device. - -This makes easier to migrate between backends or to recover them in -vhost devices that support set in flight descriptors. - -Fixes: 6d0b22266633 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ") -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit 2fdac348fd3d243bb964937236af3cc27ae7af2b) ---- - hw/virtio/vhost-vdpa.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 03dc6014b0..96334ab5b6 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1177,7 +1177,18 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, - struct vhost_vring_state *ring) - { - struct vhost_vdpa *v = dev->opaque; -+ VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index); - -+ /* -+ * vhost-vdpa devices does not support in-flight requests. Set all of them -+ * as available. -+ * -+ * TODO: This is ok for networking, but other kinds of devices might -+ * have problems with these retransmissions. -+ */ -+ while (virtqueue_rewind(vq, 1)) { -+ continue; -+ } - if (v->shadow_vqs_enabled) { - /* - * Device vring base was set at device start. SVQ base is handled by -@@ -1193,21 +1204,10 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, - struct vhost_vring_state *ring) - { - struct vhost_vdpa *v = dev->opaque; -- int vdpa_idx = ring->index - dev->vq_index; - int ret; - - if (v->shadow_vqs_enabled) { -- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); -- -- /* -- * Setting base as last used idx, so destination will see as available -- * all the entries that the device did not use, including the in-flight -- * processing ones. -- * -- * TODO: This is ok for networking, but other kinds of devices might -- * have problems with these retransmissions. -- */ -- ring->num = svq->last_used_idx; -+ ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index); - return 0; - } - --- -2.31.1 - diff --git a/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch b/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch deleted file mode 100644 index 513d7b4..0000000 --- a/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 3a944d8cd3d35b2398ff68d9ed8ea51d27dfab3c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 12/32] vhost: Move vhost_svq_kick call to vhost_svq_add -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [12/27] 29a7e1fb4992c4beca1e9a3379bb4c8a0f567459 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 98b5adef8493a2bfad6655cfee84299e88bedbf7 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:31 2022 +0200 - - vhost: Move vhost_svq_kick call to vhost_svq_add - - The series needs to expose vhost_svq_add with full functionality, - including kick - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 05cd39d1eb..e3fc3c2658 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -246,6 +246,7 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - } - - svq->ring_id_maps[qemu_head] = elem; -+ vhost_svq_kick(svq); - return true; - } - -@@ -306,7 +307,6 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - /* VQ is broken, just return and ignore any other kicks */ - return; - } -- vhost_svq_kick(svq); - } - - virtio_queue_set_notification(svq->vq, true); --- -2.31.1 - diff --git a/kvm-vhost-Reorder-vhost_svq_kick.patch b/kvm-vhost-Reorder-vhost_svq_kick.patch deleted file mode 100644 index f61f3c3..0000000 --- a/kvm-vhost-Reorder-vhost_svq_kick.patch +++ /dev/null @@ -1,88 +0,0 @@ -From fdbf66e4c70de16ab36d70ea591322b1b24df591 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 11/32] vhost: Reorder vhost_svq_kick -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [11/27] 1d08b97eb3960a0f85f2dd48c3331b803f7ea205 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit d93a2405ca6efa9dc1c420cee5a34bd8242818d0 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:30 2022 +0200 - - vhost: Reorder vhost_svq_kick - - Future code needs to call it from vhost_svq_add. - - No functional change intended. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 28 ++++++++++++++-------------- - 1 file changed, 14 insertions(+), 14 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 9c46c3a8fa..05cd39d1eb 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -215,6 +215,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, - return true; - } - -+static void vhost_svq_kick(VhostShadowVirtqueue *svq) -+{ -+ /* -+ * We need to expose the available array entries before checking the used -+ * flags -+ */ -+ smp_mb(); -+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { -+ return; -+ } -+ -+ event_notifier_set(&svq->hdev_kick); -+} -+ - /** - * Add an element to a SVQ. - * -@@ -235,20 +249,6 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) - return true; - } - --static void vhost_svq_kick(VhostShadowVirtqueue *svq) --{ -- /* -- * We need to expose the available array entries before checking the used -- * flags -- */ -- smp_mb(); -- if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { -- return; -- } -- -- event_notifier_set(&svq->hdev_kick); --} -- - /** - * Forward available buffers. - * --- -2.31.1 - diff --git a/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch b/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch deleted file mode 100644 index 31bfccc..0000000 --- a/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 486647551223cc01f4dba87197030bbf4e674f0f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:24:48 +0200 -Subject: [PATCH 01/32] vhost: Track descriptor chain in private at SVQ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [1/27] 26d16dc383e3064ac6e4288d5c52b39fee0ad204 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 495fe3a78749c39c0e772c4e1a55d6cb8a7e5292 -Author: Eugenio Pérez -Date: Thu May 12 19:57:42 2022 +0200 - - vhost: Track descriptor chain in private at SVQ - - The device could have access to modify them, and it definitely have - access when we implement packed vq. Harden SVQ maintaining a private - copy of the descriptor chain. Other fields like buffer addresses are - already maintained sepparatedly. - - Signed-off-by: Eugenio Pérez - Message-Id: <20220512175747.142058-2-eperezma@redhat.com> - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 12 +++++++----- - hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ - 2 files changed, 13 insertions(+), 5 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index b232803d1b..3155801f50 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, - for (n = 0; n < num; n++) { - if (more_descs || (n + 1 < num)) { - descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); -+ descs[i].next = cpu_to_le16(svq->desc_next[i]); - } else { - descs[i].flags = flags; - } -@@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, - descs[i].len = cpu_to_le32(iovec[n].iov_len); - - last = i; -- i = cpu_to_le16(descs[i].next); -+ i = cpu_to_le16(svq->desc_next[i]); - } - -- svq->free_head = le16_to_cpu(descs[last].next); -+ svq->free_head = le16_to_cpu(svq->desc_next[last]); - } - - static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, -@@ -336,7 +337,6 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) - static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - uint32_t *len) - { -- vring_desc_t *descs = svq->vring.desc; - const vring_used_t *used = svq->vring.used; - vring_used_elem_t used_elem; - uint16_t last_used; -@@ -365,7 +365,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- descs[used_elem.id].next = svq->free_head; -+ svq->desc_next[used_elem.id] = svq->free_head; - svq->free_head = used_elem.id; - - *len = used_elem.len; -@@ -540,8 +540,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); - memset(svq->vring.used, 0, device_size); - svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); -+ svq->desc_next = g_new0(uint16_t, svq->vring.num); - for (unsigned i = 0; i < svq->vring.num - 1; i++) { -- svq->vring.desc[i].next = cpu_to_le16(i + 1); -+ svq->desc_next[i] = cpu_to_le16(i + 1); - } - } - -@@ -574,6 +575,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - virtqueue_detach_element(svq->vq, next_avail_elem, 0); - } - svq->vq = NULL; -+ g_free(svq->desc_next); - g_free(svq->ring_id_maps); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index e5e24c536d..c132c994e9 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue { - /* Next VirtQueue element that guest made available */ - VirtQueueElement *next_guest_avail_elem; - -+ /* -+ * Backup next field for each descriptor so we can recover securely, not -+ * needing to trust the device access. -+ */ -+ uint16_t *desc_next; -+ - /* Next head to expose to the device */ - uint16_t shadow_avail_idx; - --- -2.31.1 - diff --git a/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch b/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch deleted file mode 100644 index 6a2e147..0000000 --- a/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 24b8cf88f53f9fc7cb393c9cad908f759980bfee Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 16/32] vhost: Track number of descs in SVQDescState -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [16/27] 26f30cb6dd35c1eb1ddabe25113431bed3d744aa (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit ac4cfdc6f39c06732d27554523f9d5f8a53b4ffa -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:35 2022 +0200 - - vhost: Track number of descs in SVQDescState - - A guest's buffer continuos on GPA may need multiple descriptors on - qemu's VA, so SVQ should track its length sepparatedly. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- - hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index a08e3d4025..4d99075e73 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -257,6 +257,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - } - - svq->desc_state[qemu_head].elem = elem; -+ svq->desc_state[qemu_head].ndescs = ndescs; - vhost_svq_kick(svq); - return 0; - } -@@ -418,8 +419,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- num = svq->desc_state[used_elem.id].elem->in_num + -- svq->desc_state[used_elem.id].elem->out_num; -+ num = svq->desc_state[used_elem.id].ndescs; - last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); - svq->desc_next[last_used_chain] = svq->free_head; - svq->free_head = used_elem.id; -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index d646c35054..5c7e7cbab6 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -17,6 +17,12 @@ - - typedef struct SVQDescState { - VirtQueueElement *elem; -+ -+ /* -+ * Number of descriptors exposed to the device. May or may not match -+ * guest's -+ */ -+ unsigned int ndescs; - } SVQDescState; - - /* Shadow virtqueue to relay notifications */ --- -2.31.1 - diff --git a/kvm-vhost-add-vhost_svq_poll.patch b/kvm-vhost-add-vhost_svq_poll.patch deleted file mode 100644 index fa27e5e..0000000 --- a/kvm-vhost-add-vhost_svq_poll.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 0ab3da1092362470d256b433c546bd365d34f930 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 19/32] vhost: add vhost_svq_poll -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [19/27] 6807bb0bb6e5183b46a03b12b4027c7d767e8555 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 3f44d13dda83d390cc9563e56e7d337e4f6223f4 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:38 2022 +0200 - - vhost: add vhost_svq_poll - - It allows the Shadow Control VirtQueue to wait for the device to use the - available buffers. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++++++++ - hw/virtio/vhost-shadow-virtqueue.h | 1 + - 2 files changed, 28 insertions(+) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index cb879e7b88..95d0d7a7ee 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -485,6 +485,33 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, - } while (!vhost_svq_enable_notification(svq)); - } - -+/** -+ * Poll the SVQ for one device used buffer. -+ * -+ * This function race with main event loop SVQ polling, so extra -+ * synchronization is needed. -+ * -+ * Return the length written by the device. -+ */ -+size_t vhost_svq_poll(VhostShadowVirtqueue *svq) -+{ -+ int64_t start_us = g_get_monotonic_time(); -+ do { -+ uint32_t len; -+ VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); -+ if (elem) { -+ return len; -+ } -+ -+ if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { -+ return 0; -+ } -+ -+ /* Make sure we read new used_idx */ -+ smp_rmb(); -+ } while (true); -+} -+ - /** - * Forward used buffers. - * -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index dd78f4bec2..cf442f7dea 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, - int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - size_t out_num, const struct iovec *in_sg, size_t in_num, - VirtQueueElement *elem); -+size_t vhost_svq_poll(VhostShadowVirtqueue *svq); - - void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); - void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); --- -2.31.1 - diff --git a/kvm-vhost-add-vhost_svq_push_elem.patch b/kvm-vhost-add-vhost_svq_push_elem.patch deleted file mode 100644 index 2a9ec40..0000000 --- a/kvm-vhost-add-vhost_svq_push_elem.patch +++ /dev/null @@ -1,83 +0,0 @@ -From a26eb02b3a49c5d1163685ba5b83b67138c09047 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 17/32] vhost: add vhost_svq_push_elem -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [17/27] d064b40a262f2dfdc9f648d250aa8c8020c40385 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 432efd144e990b6e040862de25f8f0b6a6eeb03d -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:36 2022 +0200 - - vhost: add vhost_svq_push_elem - - This function allows external SVQ users to return guest's available - buffers. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++++++++++ - hw/virtio/vhost-shadow-virtqueue.h | 3 +++ - 2 files changed, 19 insertions(+) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 4d99075e73..1ce52d5b4a 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -428,6 +428,22 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return g_steal_pointer(&svq->desc_state[used_elem.id].elem); - } - -+/** -+ * Push an element to SVQ, returning it to the guest. -+ */ -+void vhost_svq_push_elem(VhostShadowVirtqueue *svq, -+ const VirtQueueElement *elem, uint32_t len) -+{ -+ virtqueue_push(svq->vq, elem, len); -+ if (svq->next_guest_avail_elem) { -+ /* -+ * Avail ring was full when vhost_svq_flush was called, so it's a -+ * good moment to make more descriptors available if possible. -+ */ -+ vhost_handle_guest_kick(svq); -+ } -+} -+ - static void vhost_svq_flush(VhostShadowVirtqueue *svq, - bool check_for_avail_queue) - { -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index 5c7e7cbab6..d9fc1f1799 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -84,6 +84,9 @@ typedef struct VhostShadowVirtqueue { - - bool vhost_svq_valid_features(uint64_t features, Error **errp); - -+void vhost_svq_push_elem(VhostShadowVirtqueue *svq, -+ const VirtQueueElement *elem, uint32_t len); -+ - void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); - void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); - void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, --- -2.31.1 - diff --git a/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch b/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch deleted file mode 100644 index 08bcaf2..0000000 --- a/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 2bdea90bfbce3b8d5bfa86178a942a470b85b835 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 07/32] vhost: move descriptor translation to - vhost_svq_vring_write_descs -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [7/27] 5533c72065e4ebf8ea7db966c976a3b29bdafb82 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 009c2549bb9dc7f7061009eb87f2a53d4b364983 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:26 2022 +0200 - - vhost: move descriptor translation to vhost_svq_vring_write_descs - - It's done for both in and out descriptors so it's better placed here. - - Acked-by: Jason Wang - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/virtio/vhost-shadow-virtqueue.c | 38 +++++++++++++++++++++--------- - 1 file changed, 27 insertions(+), 11 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 06d0bb39d9..3fbda1e3d4 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, - return true; - } - --static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, -- const struct iovec *iovec, size_t num, -- bool more_descs, bool write) -+/** -+ * Write descriptors to SVQ vring -+ * -+ * @svq: The shadow virtqueue -+ * @sg: Cache for hwaddr -+ * @iovec: The iovec from the guest -+ * @num: iovec length -+ * @more_descs: True if more descriptors come in the chain -+ * @write: True if they are writeable descriptors -+ * -+ * Return true if success, false otherwise and print error. -+ */ -+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, -+ const struct iovec *iovec, size_t num, -+ bool more_descs, bool write) - { - uint16_t i = svq->free_head, last = svq->free_head; - unsigned n; - uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; - vring_desc_t *descs = svq->vring.desc; -+ bool ok; - - if (num == 0) { -- return; -+ return true; -+ } -+ -+ ok = vhost_svq_translate_addr(svq, sg, iovec, num); -+ if (unlikely(!ok)) { -+ return false; - } - - for (n = 0; n < num; n++) { -@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, - } - - svq->free_head = le16_to_cpu(svq->desc_next[last]); -+ return true; - } - - static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, -@@ -169,21 +188,18 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, - return false; - } - -- ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); -+ ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, -+ elem->in_num > 0, false); - if (unlikely(!ok)) { - return false; - } -- vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, -- elem->in_num > 0, false); -- - -- ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); -+ ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, -+ true); - if (unlikely(!ok)) { - return false; - } - -- vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); -- - /* - * Put the entry in the available array (but don't update avail->idx until - * they do sync). --- -2.31.1 - diff --git a/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch deleted file mode 100644 index 70e8f59..0000000 --- a/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch +++ /dev/null @@ -1,56 +0,0 @@ -From edb2bd99355f300b512c040e91f5870ea14a5d7e Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:15 -0700 -Subject: [PATCH 11/16] vhost-net: fix improper cleanup in vhost_net_start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [4/7] e88e482dd4b344f0cc887a358268beaed4d62917 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -vhost_net_start() missed a corresponding stop_one() upon error from -vhost_set_vring_enable(). While at it, make the error handling for -err_start more robust. No real issue was found due to this though. - -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-5-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6f3910b5eee00b8cc959e94659c0d524c482a418) -Signed-off-by: Jason Wang ---- - hw/net/vhost_net.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 30379d2ca4..d6d7c51f62 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, - r = vhost_set_vring_enable(peer, peer->vring_enable); - - if (r < 0) { -+ vhost_net_stop_one(get_vhost_net(peer), dev); - goto err_start; - } - } -@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, - - err_start: - while (--i >= 0) { -- peer = qemu_get_peer(ncs , i); -+ peer = qemu_get_peer(ncs, i < data_queue_pairs ? -+ i : n->max_queue_pairs); - vhost_net_stop_one(get_vhost_net(peer), dev); - } - e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); --- -2.31.1 - diff --git a/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch b/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch deleted file mode 100644 index 31677fd..0000000 --- a/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch +++ /dev/null @@ -1,87 +0,0 @@ -From a9095850da8dd4ea3fdb725cb7f79118144e22fa Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:39:27 +0200 -Subject: [PATCH 22/32] vhost-net-vdpa: add stubs for when no virtio-net device - is present -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [22/27] a2b25a805bb06094a5fab27ce8f82bee12a9fcb5 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 94c643732dc110d04bbdf0eb43c41bce23b3593e -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:41 2022 +0200 - - vhost-net-vdpa: add stubs for when no virtio-net device is present - - net/vhost-vdpa.c will need functions that are declared in - vhost-shadow-virtqueue.c, that needs functions of virtio-net.c. - - Copy the vhost-vdpa-stub.c code so - only the constructor net_init_vhost_vdpa needs to be defined. - - Signed-off-by: Eugenio Pérez - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - net/meson.build | 3 ++- - net/vhost-vdpa-stub.c | 21 +++++++++++++++++++++ - 2 files changed, 23 insertions(+), 1 deletion(-) - create mode 100644 net/vhost-vdpa-stub.c - -diff --git a/net/meson.build b/net/meson.build -index c965e83b26..116a9e7cbb 100644 ---- a/net/meson.build -+++ b/net/meson.build -@@ -41,7 +41,8 @@ endif - softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) - softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) - if have_vhost_net_vdpa -- softmmu_ss.add(files('vhost-vdpa.c')) -+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c')) -+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c')) - endif - - subdir('can') -diff --git a/net/vhost-vdpa-stub.c b/net/vhost-vdpa-stub.c -new file mode 100644 -index 0000000000..1732ed2443 ---- /dev/null -+++ b/net/vhost-vdpa-stub.c -@@ -0,0 +1,21 @@ -+/* -+ * vhost-vdpa-stub.c -+ * -+ * Copyright (c) 2022 Red Hat, Inc. -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ * -+ */ -+ -+#include "qemu/osdep.h" -+#include "clients.h" -+#include "net/vhost-vdpa.h" -+#include "qapi/error.h" -+ -+int net_init_vhost_vdpa(const Netdev *netdev, const char *name, -+ NetClientState *peer, Error **errp) -+{ -+ error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); -+ return -1; -+} --- -2.31.1 - diff --git a/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch b/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch deleted file mode 100644 index 7125f6a..0000000 --- a/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 45305ab202fa2191962152e5a501a9a13e31a0b2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:26 +0200 -Subject: [PATCH 11/23] vhost: stop transfer elem ownership in - vhost_handle_guest_kick -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/21] 697a5c0ad59efe27abf447f7965091993bc39756 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -It was easier to allow vhost_svq_add to handle the memory. Now that we -will allow qemu to add elements to a SVQ without the guest's knowledge, -it's better to handle it in the caller. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit eb42df8bb2c92a7313343d97409cd99ccba25b25) ---- - hw/virtio/vhost-shadow-virtqueue.c | 10 ++++------ - 1 file changed, 4 insertions(+), 6 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index f420311b89..2ae47d90a1 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -233,9 +233,6 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) - /** - * Add an element to a SVQ. - * -- * The caller must check that there is enough slots for the new element. It -- * takes ownership of the element: In case of failure not ENOSPC, it is free. -- * - * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full - */ - int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -@@ -252,7 +249,6 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); - if (unlikely(!ok)) { -- g_free(elem); - return -EINVAL; - } - -@@ -293,7 +289,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - virtio_queue_set_notification(svq->vq, false); - - while (true) { -- VirtQueueElement *elem; -+ g_autofree VirtQueueElement *elem; - int r; - - if (svq->next_guest_avail_elem) { -@@ -324,12 +320,14 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - * queue the current guest descriptor and ignore kicks - * until some elements are used. - */ -- svq->next_guest_avail_elem = elem; -+ svq->next_guest_avail_elem = g_steal_pointer(&elem); - } - - /* VQ is full or broken, just return and ignore kicks */ - return; - } -+ /* elem belongs to SVQ or external caller now */ -+ elem = NULL; - } - - virtio_queue_set_notification(svq->vq, true); --- -2.31.1 - diff --git a/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch b/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch deleted file mode 100644 index b908739..0000000 --- a/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 78b7d9af26ae802b3ca0d7b794b366ab4d515647 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:27 +0200 -Subject: [PATCH 12/23] vhost: use SVQ element ndescs instead of opaque data - for desc validation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/21] 536ba65ff7241c4dc66362294ba8de4354260d6f (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Since we're going to allow SVQ to add elements without the guest's -knowledge and without its own VirtQueueElement, it's easier to check if -an element is a valid head checking a different thing than the -VirtQueueElement. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 70e0841722deb363b53cdcd465af12a0d1461b60) ---- - hw/virtio/vhost-shadow-virtqueue.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 2ae47d90a1..7792f3db1d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -414,7 +414,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- if (unlikely(!svq->desc_state[used_elem.id].elem)) { -+ if (unlikely(!svq->desc_state[used_elem.id].ndescs)) { - qemu_log_mask(LOG_GUEST_ERROR, - "Device %s says index %u is used, but it was not available", - svq->vdev->name, used_elem.id); -@@ -422,6 +422,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - } - - num = svq->desc_state[used_elem.id].ndescs; -+ svq->desc_state[used_elem.id].ndescs = 0; - last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); - svq->desc_next[last_used_chain] = svq->free_head; - svq->free_head = used_elem.id; --- -2.31.1 - diff --git a/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch deleted file mode 100644 index 747bf5f..0000000 --- a/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 46c5a35aa56cf0dd55376638dbf7d46e85f497e1 Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:16 -0700 -Subject: [PATCH 12/16] vhost-vdpa: backend feature should set only once -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [5/7] 7531bb8da0c99b29997e8bfc6d1e811daf3cdd38 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -The vhost_vdpa_one_time_request() branch in -vhost_vdpa_set_backend_cap() incorrectly sends down -ioctls on vhost_dev with non-zero index. This may -end up with multiple VHOST_SET_BACKEND_FEATURES -ioctl calls sent down on the vhost-vdpa fd that is -shared between all these vhost_dev's. - -To fix it, send down ioctl only once via the first -vhost_dev with index 0. Toggle the polarity of the -vhost_vdpa_one_time_request() test should do the -trick. - -Fixes: 4d191cfdc7de ("vhost-vdpa: classify one time request") -Signed-off-by: Si-Wei Liu -Reviewed-by: Stefano Garzarella -Acked-by: Jason Wang -Acked-by: Eugenio Pérez -Message-Id: <1651890498-24478-6-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6aee7e4233f6467f69531fcd352adff028f3f5ea) -Signed-off-by: Jason Wang ---- - hw/virtio/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 8adf7c0b92..6e3dbd9e89 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -665,7 +665,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) - - features &= f; - -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_one_time_request(dev)) { - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return -EFAULT; --- -2.31.1 - diff --git a/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch deleted file mode 100644 index 2466557..0000000 --- a/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 58acdab17ec00ab76105ab92a51c5ba4dec3df5a Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:17 -0700 -Subject: [PATCH 13/16] vhost-vdpa: change name and polarity for - vhost_vdpa_one_time_request() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [6/7] 7029778f463a136ff412c63b86b6953390e47bf8 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -The name vhost_vdpa_one_time_request() was confusing. No -matter whatever it returns, its typical occurrence had -always been at requests that only need to be applied once. -And the name didn't suggest what it actually checks for. -Change it to vhost_vdpa_first_dev() with polarity flipped -for better readibility of code. That way it is able to -reflect what the check is really about. - -This call is applicable to request which performs operation -only once, before queues are set up, and usually at the beginning -of the caller function. Document the requirement for it in place. - -Signed-off-by: Si-Wei Liu -Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Stefano Garzarella -Acked-by: Jason Wang -(cherry picked from commit d71b0609fc04217e28d17009f04d74b08be6f466) -Signed-off-by: Jason Wang ---- - hw/virtio/vhost-vdpa.c | 23 +++++++++++++++-------- - 1 file changed, 15 insertions(+), 8 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 6e3dbd9e89..33dcaa135e 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -366,11 +366,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) - v->iova_range.last); - } - --static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) -+/* -+ * The use of this function is for requests that only need to be -+ * applied once. Typically such request occurs at the beginning -+ * of operation, and before setting up queues. It should not be -+ * used for request that performs operation until all queues are -+ * set, which would need to check dev->vq_index_end instead. -+ */ -+static bool vhost_vdpa_first_dev(struct vhost_dev *dev) - { - struct vhost_vdpa *v = dev->opaque; - -- return v->index != 0; -+ return v->index == 0; - } - - static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, -@@ -451,7 +458,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - - vhost_vdpa_get_iova_range(v); - -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_first_dev(dev)) { - return 0; - } - -@@ -594,7 +601,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) - static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, - struct vhost_memory *mem) - { -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_first_dev(dev)) { - return 0; - } - -@@ -623,7 +630,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, - struct vhost_vdpa *v = dev->opaque; - int ret; - -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_first_dev(dev)) { - return 0; - } - -@@ -665,7 +672,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) - - features &= f; - -- if (!vhost_vdpa_one_time_request(dev)) { -+ if (vhost_vdpa_first_dev(dev)) { - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return -EFAULT; -@@ -1118,7 +1125,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, - struct vhost_log *log) - { - struct vhost_vdpa *v = dev->opaque; -- if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { -+ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { - return 0; - } - -@@ -1240,7 +1247,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, - - static int vhost_vdpa_set_owner(struct vhost_dev *dev) - { -- if (vhost_vdpa_one_time_request(dev)) { -+ if (!vhost_vdpa_first_dev(dev)) { - return 0; - } - --- -2.31.1 - diff --git a/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch deleted file mode 100644 index 7716cbf..0000000 --- a/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 3142102adb98f46518c0ac1773b0c48710c6bed6 Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:14 -0700 -Subject: [PATCH 10/16] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [3/7] c83ff6c97d34cfae3c3447edde934b42a9ace75f (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -... such that no memory leaks on dangling net clients in case of -error. - -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-4-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 9bd055073e375c8a0d7ebce925e05d914d69fc7f) -Signed-off-by: Jason Wang ---- - net/vhost-vdpa.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1e9fe47c03..df1e69ee72 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - - err: - if (i) { -- qemu_del_net_client(ncs[0]); -+ for (i--; i >= 0; i--) { -+ qemu_del_net_client(ncs[i]); -+ } - } - qemu_close(vdpa_device_fd); - --- -2.31.1 - diff --git a/kvm-vhost_net-Add-NetClientInfo-start-callback.patch b/kvm-vhost_net-Add-NetClientInfo-start-callback.patch deleted file mode 100644 index 40bf5f6..0000000 --- a/kvm-vhost_net-Add-NetClientInfo-start-callback.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 6a6999311742b6dccdfce09f30742a63d72d1bd7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:30 +0200 -Subject: [PATCH 15/23] vhost_net: Add NetClientInfo start callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [14/21] df6a96ae3aec02ecae793bdbd8e9c2fcfac7871a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -This is used by the backend to perform actions before the device is -started. - -In particular, vdpa net use it to map CVQ buffers to the device, so it -can send control commands using them. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 80bda0e674fd0b439ac627ab7ecdbd4a1b46d525) ---- - hw/net/vhost_net.c | 7 +++++++ - include/net/net.h | 2 ++ - 2 files changed, 9 insertions(+) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index d6d7c51f62..1005f9d8e6 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -244,6 +244,13 @@ static int vhost_net_start_one(struct vhost_net *net, - struct vhost_vring_file file = { }; - int r; - -+ if (net->nc->info->start) { -+ r = net->nc->info->start(net->nc); -+ if (r < 0) { -+ return r; -+ } -+ } -+ - r = vhost_dev_enable_notifiers(&net->dev, dev); - if (r < 0) { - goto fail_notifiers; -diff --git a/include/net/net.h b/include/net/net.h -index 523136c7ac..ad9e80083a 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -44,6 +44,7 @@ typedef struct NICConf { - - typedef void (NetPoll)(NetClientState *, bool enable); - typedef bool (NetCanReceive)(NetClientState *); -+typedef int (NetStart)(NetClientState *); - typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); - typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); - typedef void (NetCleanup) (NetClientState *); -@@ -71,6 +72,7 @@ typedef struct NetClientInfo { - NetReceive *receive_raw; - NetReceiveIOV *receive_iov; - NetCanReceive *can_receive; -+ NetStart *start; - NetCleanup *cleanup; - LinkStatusChanged *link_status_changed; - QueryRxFilter *query_rx_filter; --- -2.31.1 - diff --git a/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch b/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch deleted file mode 100644 index c622824..0000000 --- a/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch +++ /dev/null @@ -1,68 +0,0 @@ -From effd0ed379deb43bb850f1aeff24fa85935d7f52 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:31 +0200 -Subject: [PATCH 16/23] vhost_net: Add NetClientInfo stop callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [15/21] 9f8a3e9bfb0d21fa0479f54a7a17cb738aa46359 (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -Used by the backend to perform actions after the device is stopped. - -In particular, vdpa net use it to unmap CVQ buffers to the device, -cleaning the actions performed in prepare(). - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit c6544e2331d721627fa7356da3592bcb46340f1b) ---- - hw/net/vhost_net.c | 3 +++ - include/net/net.h | 2 ++ - 2 files changed, 5 insertions(+) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 1005f9d8e6..275ece5324 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -320,6 +320,9 @@ static void vhost_net_stop_one(struct vhost_net *net, - net->nc->info->poll(net->nc, true); - } - vhost_dev_stop(&net->dev, dev); -+ if (net->nc->info->stop) { -+ net->nc->info->stop(net->nc); -+ } - vhost_dev_disable_notifiers(&net->dev, dev); - } - -diff --git a/include/net/net.h b/include/net/net.h -index ad9e80083a..476ad45b9a 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -45,6 +45,7 @@ typedef struct NICConf { - typedef void (NetPoll)(NetClientState *, bool enable); - typedef bool (NetCanReceive)(NetClientState *); - typedef int (NetStart)(NetClientState *); -+typedef void (NetStop)(NetClientState *); - typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); - typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); - typedef void (NetCleanup) (NetClientState *); -@@ -73,6 +74,7 @@ typedef struct NetClientInfo { - NetReceiveIOV *receive_iov; - NetCanReceive *can_receive; - NetStart *start; -+ NetStop *stop; - NetCleanup *cleanup; - LinkStatusChanged *link_status_changed; - QueryRxFilter *query_rx_filter; --- -2.31.1 - diff --git a/kvm-vhost_net-add-NetClientState-load-callback.patch b/kvm-vhost_net-add-NetClientState-load-callback.patch deleted file mode 100644 index 92a9078..0000000 --- a/kvm-vhost_net-add-NetClientState-load-callback.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 6a5c236b95ce475c556ccd92c2135ad48474e8fb Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:35 +0200 -Subject: [PATCH 20/23] vhost_net: add NetClientState->load() callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 116: vdpa: Restore device state on destination -RH-Bugzilla: 2114060 -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [19/21] 439b4133a757b2f1c5f4a1441eca25329896491a (eperezmartin/qemu-kvm) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2114060 -Upstream status: git@github.com:jasowang/qemu.git net-next - -It allows per-net client operations right after device's successful -start. In particular, to load the device status. - -Vhost-vdpa net will use it to add the CVQ buffers to restore the device -status. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 302f3d20e68a8a120d431f7ff7cb02a75917f54c) ---- - hw/net/vhost_net.c | 7 +++++++ - include/net/net.h | 2 ++ - 2 files changed, 9 insertions(+) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 275ece5324..ea3a8be1c9 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -281,6 +281,13 @@ static int vhost_net_start_one(struct vhost_net *net, - } - } - } -+ -+ if (net->nc->info->load) { -+ r = net->nc->info->load(net->nc); -+ if (r < 0) { -+ goto fail; -+ } -+ } - return 0; - fail: - file.fd = -1; -diff --git a/include/net/net.h b/include/net/net.h -index 476ad45b9a..81d0b21def 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -45,6 +45,7 @@ typedef struct NICConf { - typedef void (NetPoll)(NetClientState *, bool enable); - typedef bool (NetCanReceive)(NetClientState *); - typedef int (NetStart)(NetClientState *); -+typedef int (NetLoad)(NetClientState *); - typedef void (NetStop)(NetClientState *); - typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); - typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); -@@ -74,6 +75,7 @@ typedef struct NetClientInfo { - NetReceiveIOV *receive_iov; - NetCanReceive *can_receive; - NetStart *start; -+ NetLoad *load; - NetStop *stop; - NetCleanup *cleanup; - LinkStatusChanged *link_status_changed; --- -2.31.1 - diff --git a/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch b/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch deleted file mode 100644 index 2a72cc7..0000000 --- a/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 643d9c28ff8b15c333cc748c5e712659ad2a257c Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Mon, 13 Jun 2022 14:10:10 +0800 -Subject: [PATCH 03/17] virtio-iommu: Add an assert check in translate routine - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [3/5] 19f309fd0beda40d65f51c454e37936658ac9f38 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -With address space switch supported, dma access translation only -happen after endpoint is attached to a non-bypass domain. - -Signed-off-by: Zhenzhong Duan -Message-Id: <20220613061010.2674054-4-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 23b5f0ff6d923d3bca11cf44eed3daf7a0a836a8) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 440a1c28a7..e970d4d5a6 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -866,6 +866,10 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, - qemu_rec_mutex_lock(&s->mutex); - - ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); -+ -+ if (bypass_allowed) -+ assert(ep && ep->domain && !ep->domain->bypass); -+ - if (!ep) { - if (!bypass_allowed) { - error_report_once("%s sid=%d is not known!!", __func__, sid); --- -2.31.1 - diff --git a/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch b/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch deleted file mode 100644 index 3352666..0000000 --- a/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch +++ /dev/null @@ -1,250 +0,0 @@ -From d60774ee3168eefb21a4120a38107cd36ae17e07 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Mon, 13 Jun 2022 14:10:08 +0800 -Subject: [PATCH 01/17] virtio-iommu: Add bypass mode support to assigned - device - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [1/5] 4777815533b31c7f4f09af8902e378fd3fc1186a (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -Currently assigned devices can not work in virtio-iommu bypass mode. -Guest driver fails to probe the device due to DMA failure. And the -reason is because of lacking GPA -> HPA mappings when VM is created. - -Add a root container memory region to hold both bypass memory region -and iommu memory region, so the switch between them is supported -just like the implementation in virtual VT-d. - -Signed-off-by: Zhenzhong Duan -Message-Id: <20220613061010.2674054-2-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 90519b90539b16258d1d52b908b199f44877dc18) -Signed-off-by: Eric Auger ---- - hw/virtio/trace-events | 1 + - hw/virtio/virtio-iommu.c | 115 ++++++++++++++++++++++++++++++- - include/hw/virtio/virtio-iommu.h | 2 + - 3 files changed, 116 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index a5102eac9e..2ab5881b88 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -114,6 +114,7 @@ virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uin - virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64 - virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" - virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" -+virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" - - # virtio-mem.c - virtio_mem_send_response(uint16_t type) "type=%" PRIu16 -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 6d5ea0bdf1..5e99e6c62b 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -70,6 +70,77 @@ static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) - return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); - } - -+static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) -+{ -+ uint32_t sid; -+ bool bypassed; -+ VirtIOIOMMU *s = sdev->viommu; -+ VirtIOIOMMUEndpoint *ep; -+ -+ sid = virtio_iommu_get_bdf(sdev); -+ -+ qemu_mutex_lock(&s->mutex); -+ /* need to check bypass before system reset */ -+ if (!s->endpoints) { -+ bypassed = s->config.bypass; -+ goto unlock; -+ } -+ -+ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); -+ if (!ep || !ep->domain) { -+ bypassed = s->config.bypass; -+ } else { -+ bypassed = ep->domain->bypass; -+ } -+ -+unlock: -+ qemu_mutex_unlock(&s->mutex); -+ return bypassed; -+} -+ -+/* Return whether the device is using IOMMU translation. */ -+static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) -+{ -+ bool use_remapping; -+ -+ assert(sdev); -+ -+ use_remapping = !virtio_iommu_device_bypassed(sdev); -+ -+ trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), -+ PCI_SLOT(sdev->devfn), -+ PCI_FUNC(sdev->devfn), -+ use_remapping); -+ -+ /* Turn off first then on the other */ -+ if (use_remapping) { -+ memory_region_set_enabled(&sdev->bypass_mr, false); -+ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); -+ } else { -+ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); -+ memory_region_set_enabled(&sdev->bypass_mr, true); -+ } -+ -+ return use_remapping; -+} -+ -+static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) -+{ -+ GHashTableIter iter; -+ IOMMUPciBus *iommu_pci_bus; -+ int i; -+ -+ g_hash_table_iter_init(&iter, s->as_by_busptr); -+ while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { -+ for (i = 0; i < PCI_DEVFN_MAX; i++) { -+ if (!iommu_pci_bus->pbdev[i]) { -+ continue; -+ } -+ virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); -+ } -+ } -+} -+ - /** - * The bus number is used for lookup when SID based operations occur. - * In that case we lazily populate the IOMMUPciBus array from the bus hash -@@ -214,6 +285,7 @@ static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, - static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) - { - VirtIOIOMMUDomain *domain = ep->domain; -+ IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); - - if (!ep->domain) { - return; -@@ -222,6 +294,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) - ep->iommu_mr); - QLIST_REMOVE(ep, next); - ep->domain = NULL; -+ virtio_iommu_switch_address_space(sdev); - } - - static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, -@@ -324,12 +397,39 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, - - trace_virtio_iommu_init_iommu_mr(name); - -+ memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); -+ address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); -+ -+ /* -+ * Build the IOMMU disabled container with aliases to the -+ * shared MRs. Note that aliasing to a shared memory region -+ * could help the memory API to detect same FlatViews so we -+ * can have devices to share the same FlatView when in bypass -+ * mode. (either by not configuring virtio-iommu driver or with -+ * "iommu=pt"). It will greatly reduce the total number of -+ * FlatViews of the system hence VM runs faster. -+ */ -+ memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), -+ "system", get_system_memory(), 0, -+ memory_region_size(get_system_memory())); -+ - memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), - TYPE_VIRTIO_IOMMU_MEMORY_REGION, - OBJECT(s), name, - UINT64_MAX); -- address_space_init(&sdev->as, -- MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU); -+ -+ /* -+ * Hook both the containers under the root container, we -+ * switch between iommu & bypass MRs by enable/disable -+ * corresponding sub-containers -+ */ -+ memory_region_add_subregion_overlap(&sdev->root, 0, -+ MEMORY_REGION(&sdev->iommu_mr), -+ 0); -+ memory_region_add_subregion_overlap(&sdev->root, 0, -+ &sdev->bypass_mr, 0); -+ -+ virtio_iommu_switch_address_space(sdev); - g_free(name); - } - return &sdev->as; -@@ -343,6 +443,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, - uint32_t flags = le32_to_cpu(req->flags); - VirtIOIOMMUDomain *domain; - VirtIOIOMMUEndpoint *ep; -+ IOMMUDevice *sdev; - - trace_virtio_iommu_attach(domain_id, ep_id); - -@@ -376,6 +477,8 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, - QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); - - ep->domain = domain; -+ sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); -+ virtio_iommu_switch_address_space(sdev); - - /* Replay domain mappings on the associated memory region */ - g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, -@@ -888,6 +991,7 @@ static void virtio_iommu_set_config(VirtIODevice *vdev, - return; - } - dev_config->bypass = in_config->bypass; -+ virtio_iommu_switch_address_space_all(dev); - } - - trace_virtio_iommu_set_config(in_config->bypass); -@@ -1027,6 +1131,8 @@ static void virtio_iommu_system_reset(void *opaque) - * system reset - */ - s->config.bypass = s->boot_bypass; -+ virtio_iommu_switch_address_space_all(s); -+ - } - - static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) -@@ -1043,6 +1149,11 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - virtio_iommu_handle_command); - s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); - -+ /* -+ * config.bypass is needed to get initial address space early, such as -+ * in vfio realize -+ */ -+ s->config.bypass = s->boot_bypass; - s->config.page_size_mask = TARGET_PAGE_MASK; - s->config.input_range.end = UINT64_MAX; - s->config.domain_range.end = UINT32_MAX; -diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h -index 84391f8448..102eeefa73 100644 ---- a/include/hw/virtio/virtio-iommu.h -+++ b/include/hw/virtio/virtio-iommu.h -@@ -37,6 +37,8 @@ typedef struct IOMMUDevice { - int devfn; - IOMMUMemoryRegion iommu_mr; - AddressSpace as; -+ MemoryRegion root; /* The root container of the device */ -+ MemoryRegion bypass_mr; /* The alias of shared memory MR */ - } IOMMUDevice; - - typedef struct IOMMUPciBus { --- -2.31.1 - diff --git a/kvm-virtio-iommu-Fix-migration-regression.patch b/kvm-virtio-iommu-Fix-migration-regression.patch deleted file mode 100644 index f5ae4d6..0000000 --- a/kvm-virtio-iommu-Fix-migration-regression.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 8d45902b4884315ec090e607e9f03606b21001cf Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Fri, 24 Jun 2022 17:37:40 +0800 -Subject: [PATCH 05/17] virtio-iommu: Fix migration regression - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [5/5] 9652c4aaaf88e24083fab1fbc3d1423260c93ca6 (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -We also need to switch to the right address space on dest side -after loading the device status. DMA to wrong address space is -destructive. - -Fixes: 3facd774962fd ("virtio-iommu: Add bypass mode support to assigned device") -Suggested-by: Eric Auger -Signed-off-by: Zhenzhong Duan -Message-Id: <20220624093740.3525267-1-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Eric Auger -(cherry picked from commit d355566bd958e24e7e384da6ea89a9fc88d7bfed) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 44a041dec9..2012835554 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -1324,6 +1324,14 @@ static int iommu_post_load(void *opaque, int version_id) - VirtIOIOMMU *s = opaque; - - g_tree_foreach(s->domains, reconstruct_endpoints, s); -+ -+ /* -+ * Memory regions are dynamically turned on/off depending on -+ * 'config.bypass' and attached domain type if there is. After -+ * migration, we need to make sure the memory regions are -+ * still correct. -+ */ -+ virtio_iommu_switch_address_space_all(s); - return 0; - } - --- -2.31.1 - diff --git a/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch b/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch deleted file mode 100644 index 7747bfe..0000000 --- a/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch +++ /dev/null @@ -1,67 +0,0 @@ -From b681247c29b59af40c86f8f0ae5709138ae9bf1a Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Thu, 23 Jun 2022 10:31:52 +0800 -Subject: [PATCH 04/17] virtio-iommu: Fix the partial copy of probe request - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [4/5] c402164414a8e69bbb6df20af3c2b6d2589d6f3e (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -The structure of probe request doesn't include the tail, this leads -to a few field missed to be copied. Currently this isn't an issue as -those missed field belong to reserved field, just in case reserved -field will be used in the future. - -Changed 4th parameter of virtio_iommu_iov_to_req() to receive size -of device-readable part. - -Fixes: 1733eebb9e75b ("virtio-iommu: Implement RESV_MEM probe request") -Signed-off-by: Zhenzhong Duan -Message-Id: <20220623023152.3473231-1-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -Reviewed-by: Eric Auger -(cherry picked from commit 45461aace83d961e933b27519b81d17b4c690514) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index e970d4d5a6..44a041dec9 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -676,11 +676,10 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, - - static int virtio_iommu_iov_to_req(struct iovec *iov, - unsigned int iov_cnt, -- void *req, size_t req_sz) -+ void *req, size_t payload_sz) - { -- size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail); -+ size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); - -- sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); - if (unlikely(sz != payload_sz)) { - return VIRTIO_IOMMU_S_INVAL; - } -@@ -693,7 +692,8 @@ static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ - unsigned int iov_cnt) \ - { \ - struct virtio_iommu_req_ ## __req req; \ -- int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \ -+ int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ -+ sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ - \ - return ret ? ret : virtio_iommu_ ## __req(s, &req); \ - } --- -2.31.1 - diff --git a/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch b/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch deleted file mode 100644 index df961b0..0000000 --- a/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 881c999e302e7ee1212b47c523a2cf442c549417 Mon Sep 17 00:00:00 2001 -From: Zhenzhong Duan -Date: Mon, 13 Jun 2022 14:10:09 +0800 -Subject: [PATCH 02/17] virtio-iommu: Use recursive lock to avoid deadlock - -RH-Author: Eric Auger -RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices -RH-Commit: [2/5] 67dce1eecb49555f728f119f8efac00417ff65bf (eauger1/centos-qemu-kvm) -RH-Bugzilla: 2100106 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Peter Xu -RH-Acked-by: Cornelia Huck - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 - -When switching address space with mutex lock hold, mapping will be -replayed for assigned device. This will trigger relock deadlock. - -Also release the mutex resource in unrealize routine. - -Signed-off-by: Zhenzhong Duan -Message-Id: <20220613061010.2674054-3-zhenzhong.duan@intel.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 08f2030a2e46f1e93d186b3a683e5caef1df562b) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 20 +++++++++++--------- - include/hw/virtio/virtio-iommu.h | 2 +- - 2 files changed, 12 insertions(+), 10 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 5e99e6c62b..440a1c28a7 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -79,7 +79,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) - - sid = virtio_iommu_get_bdf(sdev); - -- qemu_mutex_lock(&s->mutex); -+ qemu_rec_mutex_lock(&s->mutex); - /* need to check bypass before system reset */ - if (!s->endpoints) { - bypassed = s->config.bypass; -@@ -94,7 +94,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) - } - - unlock: -- qemu_mutex_unlock(&s->mutex); -+ qemu_rec_mutex_unlock(&s->mutex); - return bypassed; - } - -@@ -746,7 +746,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) - tail.status = VIRTIO_IOMMU_S_DEVERR; - goto out; - } -- qemu_mutex_lock(&s->mutex); -+ qemu_rec_mutex_lock(&s->mutex); - switch (head.type) { - case VIRTIO_IOMMU_T_ATTACH: - tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); -@@ -775,7 +775,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) - default: - tail.status = VIRTIO_IOMMU_S_UNSUPP; - } -- qemu_mutex_unlock(&s->mutex); -+ qemu_rec_mutex_unlock(&s->mutex); - - out: - sz = iov_from_buf(elem->in_sg, elem->in_num, 0, -@@ -863,7 +863,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, - sid = virtio_iommu_get_bdf(sdev); - - trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); -- qemu_mutex_lock(&s->mutex); -+ qemu_rec_mutex_lock(&s->mutex); - - ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); - if (!ep) { -@@ -947,7 +947,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, - trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); - - unlock: -- qemu_mutex_unlock(&s->mutex); -+ qemu_rec_mutex_unlock(&s->mutex); - return entry; - } - -@@ -1036,7 +1036,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) - - sid = virtio_iommu_get_bdf(sdev); - -- qemu_mutex_lock(&s->mutex); -+ qemu_rec_mutex_lock(&s->mutex); - - if (!s->endpoints) { - goto unlock; -@@ -1050,7 +1050,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) - g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); - - unlock: -- qemu_mutex_unlock(&s->mutex); -+ qemu_rec_mutex_unlock(&s->mutex); - } - - static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, -@@ -1169,7 +1169,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); - virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); - -- qemu_mutex_init(&s->mutex); -+ qemu_rec_mutex_init(&s->mutex); - - s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); - -@@ -1197,6 +1197,8 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) - g_tree_destroy(s->endpoints); - } - -+ qemu_rec_mutex_destroy(&s->mutex); -+ - virtio_delete_queue(s->req_vq); - virtio_delete_queue(s->event_vq); - virtio_cleanup(vdev); -diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h -index 102eeefa73..2ad5ee320b 100644 ---- a/include/hw/virtio/virtio-iommu.h -+++ b/include/hw/virtio/virtio-iommu.h -@@ -58,7 +58,7 @@ struct VirtIOIOMMU { - ReservedRegion *reserved_regions; - uint32_t nb_reserved_regions; - GTree *domains; -- QemuMutex mutex; -+ QemuRecMutex mutex; - GTree *endpoints; - bool boot_bypass; - }; --- -2.31.1 - diff --git a/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch b/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch deleted file mode 100644 index 4ae4cc4..0000000 --- a/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch +++ /dev/null @@ -1,69 +0,0 @@ -From dffe24d5c1f5a4676e9d2a5bc032effd420b008f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 08/32] virtio-net: Expose MAC_TABLE_ENTRIES -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [8/27] 5c3b96215ddf853cafc594da47f57d7e157db4ee (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 6758c01f054c2a842d41d927d628b09f649d3254 -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:27 2022 +0200 - - virtio-net: Expose MAC_TABLE_ENTRIES - - vhost-vdpa control virtqueue needs to know the maximum entries supported - by the virtio-net device, so we know if it is possible to apply the - filter. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/net/virtio-net.c | 1 - - include/hw/virtio/virtio-net.h | 3 +++ - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 633de61513..2a127f0a3b 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -49,7 +49,6 @@ - - #define VIRTIO_NET_VM_VERSION 11 - --#define MAC_TABLE_ENTRIES 64 - #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ - - /* previously fixed value */ -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index eb87032627..cce1c554f7 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) - * and latency. */ - #define TX_BURST 256 - -+/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */ -+#define MAC_TABLE_ENTRIES 64 -+ - typedef struct virtio_net_conf - { - uint32_t txtimer; --- -2.31.1 - diff --git a/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch b/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch deleted file mode 100644 index b4b9012..0000000 --- a/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 49e91b34b62f5da147fa2fb80d203dd675c48f64 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 21 Jul 2022 15:38:55 +0200 -Subject: [PATCH 09/32] virtio-net: Expose ctrl virtqueue logic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 108: Net Control Virtqueue shadow Support -RH-Commit: [9/27] c4ab1e35f4ca728df82a687763c662369282c513 (eperezmartin/qemu-kvm) -RH-Bugzilla: 1939363 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Cindy Lu -RH-Acked-by: Laurent Vivier - -Bugzilla: https://bugzilla.redhat.com/1939363 - -Upstream Status: git://git.qemu.org/qemu.git - -commit 640b8a1c588b56349b3307d88459ea1cd86181fb -Author: Eugenio Pérez -Date: Wed Jul 20 08:59:28 2022 +0200 - - virtio-net: Expose ctrl virtqueue logic - - This allows external vhost-net devices to modify the state of the - VirtIO device model once the vhost-vdpa device has acknowledged the - control commands. - - Signed-off-by: Eugenio Pérez - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Jason Wang - -Signed-off-by: Eugenio Pérez ---- - hw/net/virtio-net.c | 84 ++++++++++++++++++++-------------- - include/hw/virtio/virtio-net.h | 4 ++ - 2 files changed, 53 insertions(+), 35 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 2a127f0a3b..59bedba681 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -1433,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, - return VIRTIO_NET_OK; - } - --static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) -+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, -+ const struct iovec *in_sg, unsigned in_num, -+ const struct iovec *out_sg, -+ unsigned out_num) - { - VirtIONet *n = VIRTIO_NET(vdev); - struct virtio_net_ctrl_hdr ctrl; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; -- VirtQueueElement *elem; - size_t s; - struct iovec *iov, *iov2; -- unsigned int iov_cnt; -+ -+ if (iov_size(in_sg, in_num) < sizeof(status) || -+ iov_size(out_sg, out_num) < sizeof(ctrl)) { -+ virtio_error(vdev, "virtio-net ctrl missing headers"); -+ return 0; -+ } -+ -+ iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); -+ s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); -+ iov_discard_front(&iov, &out_num, sizeof(ctrl)); -+ if (s != sizeof(ctrl)) { -+ status = VIRTIO_NET_ERR; -+ } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { -+ status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { -+ status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { -+ status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { -+ status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { -+ status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); -+ } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { -+ status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); -+ } -+ -+ s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); -+ assert(s == sizeof(status)); -+ -+ g_free(iov2); -+ return sizeof(status); -+} -+ -+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) -+{ -+ VirtQueueElement *elem; - - for (;;) { -+ size_t written; - elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); - if (!elem) { - break; - } -- if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || -- iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { -- virtio_error(vdev, "virtio-net ctrl missing headers"); -+ -+ written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, -+ elem->out_sg, elem->out_num); -+ if (written > 0) { -+ virtqueue_push(vq, elem, written); -+ virtio_notify(vdev, vq); -+ g_free(elem); -+ } else { - virtqueue_detach_element(vq, elem, 0); - g_free(elem); - break; - } -- -- iov_cnt = elem->out_num; -- iov2 = iov = g_memdup2(elem->out_sg, -- sizeof(struct iovec) * elem->out_num); -- s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); -- iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); -- if (s != sizeof(ctrl)) { -- status = VIRTIO_NET_ERR; -- } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { -- status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { -- status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { -- status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { -- status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { -- status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); -- } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { -- status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); -- } -- -- s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); -- assert(s == sizeof(status)); -- -- virtqueue_push(vq, elem, sizeof(status)); -- virtio_notify(vdev, vq); -- g_free(iov2); -- g_free(elem); - } - } - -diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h -index cce1c554f7..ef234ffe7e 100644 ---- a/include/hw/virtio/virtio-net.h -+++ b/include/hw/virtio/virtio-net.h -@@ -221,6 +221,10 @@ struct VirtIONet { - struct EBPFRSSContext ebpf_rss; - }; - -+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, -+ const struct iovec *in_sg, unsigned in_num, -+ const struct iovec *out_sg, -+ unsigned out_num); - void virtio_net_set_netclient_name(VirtIONet *n, const char *name, - const char *type); - --- -2.31.1 - diff --git a/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch deleted file mode 100644 index 9da7ea7..0000000 --- a/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 316b73277de233c7a9b6917077c00d7012060944 Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:13 -0700 -Subject: [PATCH 09/16] virtio-net: align ctrl_vq index for non-mq guest for - vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [2/7] 7f764bbb579c7b473ad67fc25b46e698d277e781 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -With MQ enabled vdpa device and non-MQ supporting guest e.g. -booting vdpa with mq=on over OVMF of single vqp, below assert -failure is seen: - -../hw/virtio/vhost-vdpa.c:560: vhost_vdpa_get_vq_index: Assertion `idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs' failed. - -0 0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6 -1 0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6 -2 0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6 -3 0x00007f8ce3fec252 in () at /lib64/libc.so.6 -4 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:563 -5 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:558 -6 0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=) at ../hw/virtio/vhost.c:1557 -7 0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false) - at ../hw/virtio/virtio-pci.c:974 -8 0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019 -9 0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1) - at ../hw/net/vhost_net.c:361 -10 0x0000558f52d4e5e7 in virtio_net_set_status (status=, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289 -11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370 -12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945 -13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=, val=, size=) at ../hw/virtio/virtio-pci.c:1292 -14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=, size=1, shift=, mask=, attrs=...) - at ../softmmu/memory.c:492 -15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=, access_size_max=, access_fn=0x558f52d15cf0 , mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554 -16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=, op=, attrs=attrs@entry=...) - at ../softmmu/memory.c:1504 -17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=, l=, mr=0x558f568f19d0) at /home/opc/qemu-upstream/include/qemu/host-utils.h:165 -18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822 -19 0x0000558f52d0b36b in address_space_write (as=, addr=, attrs=..., buf=buf@entry=0x7f8ce6300028, len=) - at ../softmmu/physmem.c:2914 -20 0x0000558f52d0b3da in address_space_rw (as=, addr=, attrs=..., - attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=, is_write=) at ../softmmu/physmem.c:2924 -21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903 -22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49 -23 0x0000558f52f9f04a in qemu_thread_start (args=) at ../util/qemu-thread-posix.c:556 -24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0 -25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6 - -The cause for the assert failure is due to that the vhost_dev index -for the ctrl vq was not aligned with actual one in use by the guest. -Upon multiqueue feature negotiation in virtio_net_set_multiqueue(), -if guest doesn't support multiqueue, the guest vq layout would shrink -to a single queue pair, consisting of 3 vqs in total (rx, tx and ctrl). -This results in ctrl_vq taking a different vhost_dev group index than -the default. We can map vq to the correct vhost_dev group by checking -if MQ is supported by guest and successfully negotiated. Since the -MQ feature is only present along with CTRL_VQ, we ensure the index -2 is only meant for the control vq while MQ is not supported by guest. - -Fixes: 22288fe ("virtio-net: vhost control virtqueue support") -Suggested-by: Jason Wang -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-3-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 68b0a6395f36a8f48f56f46d05f30be2067598b0) -Signed-off-by: Jason Wang ---- - hw/net/virtio-net.c | 33 +++++++++++++++++++++++++++++++-- - 1 file changed, 31 insertions(+), 2 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index ffb3475201..f0bb29c741 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -14,6 +14,7 @@ - #include "qemu/osdep.h" - #include "qemu/atomic.h" - #include "qemu/iov.h" -+#include "qemu/log.h" - #include "qemu/main-loop.h" - #include "qemu/module.h" - #include "hw/virtio/virtio.h" -@@ -3171,8 +3172,22 @@ static NetClientInfo net_virtio_info = { - static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) - { - VirtIONet *n = VIRTIO_NET(vdev); -- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); -+ NetClientState *nc; - assert(n->vhost_started); -+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { -+ /* Must guard against invalid features and bogus queue index -+ * from being set by malicious guest, or penetrated through -+ * buggy migration stream. -+ */ -+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "%s: bogus vq index ignored\n", __func__); -+ return false; -+ } -+ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); -+ } else { -+ nc = qemu_get_subqueue(n->nic, vq2q(idx)); -+ } - return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); - } - -@@ -3180,8 +3195,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, - bool mask) - { - VirtIONet *n = VIRTIO_NET(vdev); -- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); -+ NetClientState *nc; - assert(n->vhost_started); -+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { -+ /* Must guard against invalid features and bogus queue index -+ * from being set by malicious guest, or penetrated through -+ * buggy migration stream. -+ */ -+ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { -+ qemu_log_mask(LOG_GUEST_ERROR, -+ "%s: bogus vq index ignored\n", __func__); -+ return; -+ } -+ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); -+ } else { -+ nc = qemu_get_subqueue(n->nic, vq2q(idx)); -+ } - vhost_net_virtqueue_mask(get_vhost_net(nc->peer), - vdev, idx, mask); - } --- -2.31.1 - diff --git a/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch deleted file mode 100644 index 3930cc2..0000000 --- a/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 521a1953bc11ab6823dcbbee773bcf86e926a9e7 Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:18 -0700 -Subject: [PATCH 14/16] virtio-net: don't handle mq request in userspace - handler for vhost-vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [7/7] 9781cab45448ae16a00fbf10cf7995df6b984a0a (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -virtio_queue_host_notifier_read() tends to read pending event -left behind on ioeventfd in the vhost_net_stop() path, and -attempts to handle outstanding kicks from userspace vq handler. -However, in the ctrl_vq handler, virtio_net_handle_mq() has a -recursive call into virtio_net_set_status(), which may lead to -segmentation fault as shown in below stack trace: - -0 0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at ../hw/core/qdev.c:376 -1 0x000055f800c68ad8 in virtio_bus_device_iommu_enabled (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331 -2 0x000055f800d70d7f in vhost_memory_unmap (dev=) at ../hw/virtio/vhost.c:318 -3 0x000055f800d70d7f in vhost_memory_unmap (dev=, buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at ../hw/virtio/vhost.c:336 -4 0x000055f800d71867 in vhost_virtqueue_stop (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241 -5 0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839 -6 0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315 -7 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) - at ../hw/net/vhost_net.c:423 -8 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 -9 0x000055f800d4e628 in virtio_net_set_status (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 -10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=, iov=, cmd=0 '\000', n=0x55f8044ec590) at ../hw/net/virtio-net.c:1408 -11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452 -12 0x000055f800d69f37 in virtio_queue_host_notifier_read (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331 -13 0x000055f800d69f37 in virtio_queue_host_notifier_read (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575 -14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier (bus=, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312 -15 0x000055f800d73106 in vhost_dev_disable_notifiers (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590) - at ../../../include/hw/virtio/virtio-bus.h:35 -16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316 -17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) - at ../hw/net/vhost_net.c:423 -18 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 -19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 -20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, val=) at ../hw/virtio/virtio.c:1945 -21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333 -22 0x000055f800d04e7a in do_vm_stop (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) at ../softmmu/cpus.c:262 -23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280 -24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812 -25 0x000055f800ad5b13 in main (argc=, argv=, envp=) at ../softmmu/main.c:51 - -For now, temporarily disable handling MQ request from the ctrl_vq -userspace hanlder to avoid the recursive virtio_net_set_status() -call. Some rework is needed to allow changing the number of -queues without going through a full virtio_net_set_status cycle, -particularly for vhost-vdpa backend. - -This patch will need to be reverted as soon as future patches of -having the change of #queues handled in userspace is merged. - -Fixes: 402378407db ("vhost-vdpa: multiqueue support") -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-8-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 2a7888cc3aa31faee839fa5dddad354ff8941f4c) -Signed-off-by: Jason Wang ---- - hw/net/virtio-net.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index f0bb29c741..099e65036d 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -1381,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, - { - VirtIODevice *vdev = VIRTIO_DEVICE(n); - uint16_t queue_pairs; -+ NetClientState *nc = qemu_get_queue(n->nic); - - virtio_net_disable_rss(n); - if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { -@@ -1412,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, - return VIRTIO_NET_ERR; - } - -+ /* Avoid changing the number of queue_pairs for vdpa device in -+ * userspace handler. A future fix is needed to handle the mq -+ * change in userspace handler with vhost-vdpa. Let's disable -+ * the mq handling from userspace for now and only allow get -+ * done through the kernel. Ripples may be seen when falling -+ * back to userspace, but without doing it qemu process would -+ * crash on a recursive entry to virtio_net_set_status(). -+ */ -+ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { -+ return VIRTIO_NET_ERR; -+ } -+ - n->curr_queue_pairs = queue_pairs; - /* stop the backend before changing the number of queue_pairs to avoid handling a - * disabled queue */ --- -2.31.1 - diff --git a/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch deleted file mode 100644 index f6072d2..0000000 --- a/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 9e737aba614e94da4458f02d4ff97e95ffffd19f Mon Sep 17 00:00:00 2001 -From: Si-Wei Liu -Date: Fri, 6 May 2022 19:28:12 -0700 -Subject: [PATCH 08/16] virtio-net: setup vhost_dev and notifiers for cvq only - when feature is negotiated -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [1/7] a5c5a2862b2e4d15ef7c09da3e4234fdef37cc66 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu - -When the control virtqueue feature is absent or not negotiated, -vhost_net_start() still tries to set up vhost_dev and install -vhost notifiers for the control virtqueue, which results in -erroneous ioctl calls with incorrect queue index sending down -to driver. Do that only when needed. - -Fixes: 22288fe ("virtio-net: vhost control virtqueue support") -Signed-off-by: Si-Wei Liu -Acked-by: Jason Wang -Message-Id: <1651890498-24478-2-git-send-email-si-wei.liu@oracle.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit aa8581945a13712ff3eed0ad3ba7a9664fc1604b) -Signed-off-by: Jason Wang ---- - hw/net/virtio-net.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 1067e72b39..ffb3475201 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -245,7 +245,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) - VirtIODevice *vdev = VIRTIO_DEVICE(n); - NetClientState *nc = qemu_get_queue(n->nic); - int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; -- int cvq = n->max_ncs - n->max_queue_pairs; -+ int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? -+ n->max_ncs - n->max_queue_pairs : 0; - - if (!get_vhost_net(nc->peer)) { - return; --- -2.31.1 - diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch deleted file mode 100644 index 897e04c..0000000 --- a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 975af1b9f1811e113e1babd928ae70f8e4ebefb5 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:28:19 +0100 -Subject: [PATCH 13/16] virtio-scsi: clean up virtio_scsi_handle_cmd_vq() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [5/6] 27b0225783fa9bbb8fe5ee692bd3f0a888d49d07 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -virtio_scsi_handle_cmd_vq() is only called from hw/scsi/virtio-scsi.c -now and its return value is no longer used. Remove the function -prototype from virtio-scsi.h and drop the return value. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-6-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit ad482b57ef841b2d4883c5079d20ba44ff5e4b3e) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 5 +---- - include/hw/virtio/virtio-scsi.h | 1 - - 2 files changed, 1 insertion(+), 5 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index a47033d91d..df5ff8bab7 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -685,12 +685,11 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req) - scsi_req_unref(sreq); - } - --bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) -+static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - { - VirtIOSCSIReq *req, *next; - int ret = 0; - bool suppress_notifications = virtio_queue_get_notification(vq); -- bool progress = false; - - QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); - -@@ -700,7 +699,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - } - - while ((req = virtio_scsi_pop_req(s, vq))) { -- progress = true; - ret = virtio_scsi_handle_cmd_req_prepare(s, req); - if (!ret) { - QTAILQ_INSERT_TAIL(&reqs, req, next); -@@ -725,7 +723,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) - QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { - virtio_scsi_handle_cmd_req_submit(s, req); - } -- return progress; - } - - static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 44dc3b81ec..2497530064 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, - Error **errp); - - void virtio_scsi_common_unrealize(DeviceState *dev); --bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); - void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); - void virtio_scsi_free_req(VirtIOSCSIReq *req); - void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, --- -2.31.1 - diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch deleted file mode 100644 index 30f012f..0000000 --- a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch +++ /dev/null @@ -1,65 +0,0 @@ -From c6e16a7a5a18ec2bc4f8a6f5cc1c887e18b16cdf Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:28:12 +0100 -Subject: [PATCH 12/16] virtio-scsi: clean up virtio_scsi_handle_ctrl_vq() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [4/6] ca3751b7bfad5163c5b1c81b8525936a848d42ea (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -virtio_scsi_handle_ctrl_vq() is only called from hw/scsi/virtio-scsi.c -now and its return value is no longer used. Remove the function -prototype from virtio-scsi.h and drop the return value. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-5-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 73b3b49f1880f236b4d0ffd7efb00280c05a5fab) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 5 +---- - include/hw/virtio/virtio-scsi.h | 1 - - 2 files changed, 1 insertion(+), 5 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index dd2185b943..a47033d91d 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -460,16 +460,13 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req) - } - } - --bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) -+static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) - { - VirtIOSCSIReq *req; -- bool progress = false; - - while ((req = virtio_scsi_pop_req(s, vq))) { -- progress = true; - virtio_scsi_handle_ctrl_req(s, req); - } -- return progress; - } - - /* -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 5957597825..44dc3b81ec 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -152,7 +152,6 @@ void virtio_scsi_common_realize(DeviceState *dev, - - void virtio_scsi_common_unrealize(DeviceState *dev); - bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); --bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); - void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); - void virtio_scsi_free_req(VirtIOSCSIReq *req); - void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, --- -2.31.1 - diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch deleted file mode 100644 index bfdd39b..0000000 --- a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 019d5a0ca5d13f837a59b9e2815e2fd7ac120807 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:28:06 +0100 -Subject: [PATCH 11/16] virtio-scsi: clean up virtio_scsi_handle_event_vq() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [3/6] f8dbc4c1991c61e4cf8dea50942c3cd509c9c4bd (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -virtio_scsi_handle_event_vq() is only called from hw/scsi/virtio-scsi.c -now and its return value is no longer used. Remove the function -prototype from virtio-scsi.h and drop the return value. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-4-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 37ce2de95169dacab3fb53d11bd4509b9c2e3a4c) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 4 +--- - include/hw/virtio/virtio-scsi.h | 1 - - 2 files changed, 1 insertion(+), 4 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 7b69eeed64..dd2185b943 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -856,13 +856,11 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, - virtio_scsi_complete_req(req); - } - --bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) -+static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) - { - if (s->events_dropped) { - virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); -- return true; - } -- return false; - } - - static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 543681bc18..5957597825 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, - Error **errp); - - void virtio_scsi_common_unrealize(DeviceState *dev); --bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); - bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); - bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); - void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); --- -2.31.1 - diff --git a/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch b/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch deleted file mode 100644 index 5ba11a2..0000000 --- a/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 1b609b2af303fb6498b2ef94ac4f2e900dc8c1b2 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:27:45 +0100 -Subject: [PATCH 10/16] virtio-scsi: don't waste CPU polling the event - virtqueue - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [2/6] 7e613d9b9fa8ceb668c78cb3ce7ebe1d73a004b5 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -The virtio-scsi event virtqueue is not emptied by its handler function. -This is typical for rx virtqueues where the device uses buffers when -some event occurs (e.g. a packet is received, an error condition -happens, etc). - -Polling non-empty virtqueues wastes CPU cycles. We are not waiting for -new buffers to become available, we are waiting for an event to occur, -so it's a misuse of CPU resources to poll for buffers. - -Introduce the new virtio_queue_aio_attach_host_notifier_no_poll() API, -which is identical to virtio_queue_aio_attach_host_notifier() except -that it does not poll the virtqueue. - -Before this patch the following command-line consumed 100% CPU in the -IOThread polling and calling virtio_scsi_handle_event(): - - $ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \ - --object iothread,id=iothread0 \ - --device virtio-scsi-pci,iothread=iothread0 \ - --blockdev file,filename=test.img,aio=native,cache.direct=on,node-name=drive0 \ - --device scsi-hd,drive=drive0 - -After this patch CPU is no longer wasted. - -Reported-by: Nir Soffer -Signed-off-by: Stefan Hajnoczi -Tested-by: Nir Soffer -Message-id: 20220427143541.119567-3-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 38738f7dbbda90fbc161757b7f4be35b52205552) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi-dataplane.c | 2 +- - hw/virtio/virtio.c | 13 +++++++++++++ - include/hw/virtio/virtio.h | 1 + - 3 files changed, 15 insertions(+), 1 deletion(-) - -diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c -index 29575cbaf6..8bb6e6acfc 100644 ---- a/hw/scsi/virtio-scsi-dataplane.c -+++ b/hw/scsi/virtio-scsi-dataplane.c -@@ -138,7 +138,7 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - - aio_context_acquire(s->ctx); - virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); -- virtio_queue_aio_attach_host_notifier(vs->event_vq, s->ctx); -+ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); - - for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index 9d637e043e..67a873f54a 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -3534,6 +3534,19 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) - virtio_queue_host_notifier_aio_poll_end); - } - -+/* -+ * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use -+ * this for rx virtqueues and similar cases where the virtqueue handler -+ * function does not pop all elements. When the virtqueue is left non-empty -+ * polling consumes CPU cycles and should not be used. -+ */ -+void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) -+{ -+ aio_set_event_notifier(ctx, &vq->host_notifier, true, -+ virtio_queue_host_notifier_read, -+ NULL, NULL); -+} -+ - void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) - { - aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index b31c4507f5..b62a35fdca 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -317,6 +317,7 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); - void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled); - void virtio_queue_host_notifier_read(EventNotifier *n); - void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx); -+void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx); - void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); - VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); - VirtQueue *virtio_vector_next_queue(VirtQueue *vq); --- -2.31.1 - diff --git a/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch b/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch deleted file mode 100644 index 1f22ba0..0000000 --- a/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 5aaf33dbbbc89d58a52337985641723b9ee13541 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Wed, 27 Apr 2022 15:35:36 +0100 -Subject: [PATCH 09/16] virtio-scsi: fix ctrl and event handler functions in - dataplane mode - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [1/6] 3087889041b960f14a6b3893243f78523a78f637 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -Commit f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare -virtio_scsi_handle_cmd for dataplane") prepared the virtio-scsi cmd -virtqueue handler function to be used in both the dataplane and -non-datpalane code paths. - -It failed to convert the ctrl and event virtqueue handler functions, -which are not designed to be called from the dataplane code path but -will be since the ioeventfd is set up for those virtqueues when -dataplane starts. - -Convert the ctrl and event virtqueue handler functions now so they -operate correctly when called from the dataplane code path. Avoid code -duplication by extracting this code into a helper function. - -Fixes: f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare virtio_scsi_handle_cmd for dataplane") -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-2-stefanha@redhat.com -[Fixed s/by used/be used/ typo pointed out by Michael Tokarev -. ---Stefan] -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 2f743ef6366c2df4ef51ef3ae318138cdc0125ab) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 42 +++++++++++++++++++++++++++--------------- - 1 file changed, 27 insertions(+), 15 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 7f6da33a8a..7b69eeed64 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -472,16 +472,32 @@ bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) - return progress; - } - -+/* -+ * If dataplane is configured but not yet started, do so now and return true on -+ * success. -+ * -+ * Dataplane is started by the core virtio code but virtqueue handler functions -+ * can also be invoked when a guest kicks before DRIVER_OK, so this helper -+ * function helps us deal with manually starting ioeventfd in that case. -+ */ -+static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s) -+{ -+ if (!s->ctx || s->dataplane_started) { -+ return false; -+ } -+ -+ virtio_device_start_ioeventfd(&s->parent_obj.parent_obj); -+ return !s->dataplane_fenced; -+} -+ - static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) - { - VirtIOSCSI *s = (VirtIOSCSI *)vdev; - -- if (s->ctx) { -- virtio_device_start_ioeventfd(vdev); -- if (!s->dataplane_fenced) { -- return; -- } -+ if (virtio_scsi_defer_to_dataplane(s)) { -+ return; - } -+ - virtio_scsi_acquire(s); - virtio_scsi_handle_ctrl_vq(s, vq); - virtio_scsi_release(s); -@@ -720,12 +736,10 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) - /* use non-QOM casts in the data path */ - VirtIOSCSI *s = (VirtIOSCSI *)vdev; - -- if (s->ctx && !s->dataplane_started) { -- virtio_device_start_ioeventfd(vdev); -- if (!s->dataplane_fenced) { -- return; -- } -+ if (virtio_scsi_defer_to_dataplane(s)) { -+ return; - } -+ - virtio_scsi_acquire(s); - virtio_scsi_handle_cmd_vq(s, vq); - virtio_scsi_release(s); -@@ -855,12 +869,10 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) - { - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - -- if (s->ctx) { -- virtio_device_start_ioeventfd(vdev); -- if (!s->dataplane_fenced) { -- return; -- } -+ if (virtio_scsi_defer_to_dataplane(s)) { -+ return; - } -+ - virtio_scsi_acquire(s); - virtio_scsi_handle_event_vq(s, vq); - virtio_scsi_release(s); --- -2.31.1 - diff --git a/kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch b/kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch deleted file mode 100644 index 8f1fb3e..0000000 --- a/kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch +++ /dev/null @@ -1,117 +0,0 @@ -From cbcab5ed1686fddeb2c6adb3a3f6ed0678a36e71 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 8 Aug 2022 12:21:34 -0400 -Subject: [PATCH 23/23] virtio-scsi: fix race in virtio_scsi_dataplane_start() - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 211: virtio-scsi: fix race in virtio_scsi_dataplane_start() (RHEL src-git) -RH-Commit: [1/1] 2d4964d8863e259326a73fb918fa2f5f63b4a60a -RH-Bugzilla: 2099541 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Kevin Wolf -RH-Acked-by: Hanna Reitz -RH-Acked-by: Paolo Bonzini - -As soon as virtio_scsi_data_plane_start() attaches host notifiers the -IOThread may start virtqueue processing. There is a race between -IOThread virtqueue processing and virtio_scsi_data_plane_start() because -it only assigns s->dataplane_started after attaching host notifiers. - -When a virtqueue handler function in the IOThread calls -virtio_scsi_defer_to_dataplane() it may see !s->dataplane_started and -attempt to start dataplane even though we're already in the IOThread: - - #0 0x00007f67b360857c __pthread_kill_implementation (libc.so.6 + 0xa257c) - #1 0x00007f67b35bbd56 raise (libc.so.6 + 0x55d56) - #2 0x00007f67b358e833 abort (libc.so.6 + 0x28833) - #3 0x00007f67b358e75b __assert_fail_base.cold (libc.so.6 + 0x2875b) - #4 0x00007f67b35b4cd6 __assert_fail (libc.so.6 + 0x4ecd6) - #5 0x000055ca87fd411b memory_region_transaction_commit (qemu-kvm + 0x67511b) - #6 0x000055ca87e17811 virtio_pci_ioeventfd_assign (qemu-kvm + 0x4b8811) - #7 0x000055ca87e14836 virtio_bus_set_host_notifier (qemu-kvm + 0x4b5836) - #8 0x000055ca87f8e14e virtio_scsi_set_host_notifier (qemu-kvm + 0x62f14e) - #9 0x000055ca87f8dd62 virtio_scsi_dataplane_start (qemu-kvm + 0x62ed62) - #10 0x000055ca87e14610 virtio_bus_start_ioeventfd (qemu-kvm + 0x4b5610) - #11 0x000055ca87f8c29a virtio_scsi_handle_ctrl (qemu-kvm + 0x62d29a) - #12 0x000055ca87fa5902 virtio_queue_host_notifier_read (qemu-kvm + 0x646902) - #13 0x000055ca882c099e aio_dispatch_handler (qemu-kvm + 0x96199e) - #14 0x000055ca882c1761 aio_poll (qemu-kvm + 0x962761) - #15 0x000055ca880e1052 iothread_run (qemu-kvm + 0x782052) - #16 0x000055ca882c562a qemu_thread_start (qemu-kvm + 0x96662a) - -This patch assigns s->dataplane_started before attaching host notifiers -so that virtqueue handler functions that run in the IOThread before -virtio_scsi_data_plane_start() returns correctly identify that dataplane -does not need to be started. This fix is taken from the virtio-blk -dataplane code and it's worth adding a comment in virtio-blk as well to -explain why it works. - -Note that s->dataplane_started does not need the AioContext lock because -it is set before attaching host notifiers and cleared after detaching -host notifiers. In other words, the IOThread always sees the value true -and the main loop thread does not modify it while the IOThread is -active. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099541 -Reported-by: Qing Wang -Signed-off-by: Stefan Hajnoczi -Message-Id: <20220808162134.240405-1-stefanha@redhat.com> -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 9a4b6a63aee885931622549c85669dcca03bed39) -Signed-off-by: Stefan Hajnoczi -Signed-off-by: Miroslav Rezanina ---- - hw/block/dataplane/virtio-blk.c | 5 +++++ - hw/scsi/virtio-scsi-dataplane.c | 11 ++++++++--- - 2 files changed, 13 insertions(+), 3 deletions(-) - -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index 49276e46f2..26f965cabc 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -219,6 +219,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) - - memory_region_transaction_commit(); - -+ /* -+ * These fields are visible to the IOThread so we rely on implicit barriers -+ * in aio_context_acquire() on the write side and aio_notify_accept() on -+ * the read side. -+ */ - s->starting = false; - vblk->dataplane_started = true; - trace_virtio_blk_data_plane_start(s); -diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c -index 8bb6e6acfc..20bb91766e 100644 ---- a/hw/scsi/virtio-scsi-dataplane.c -+++ b/hw/scsi/virtio-scsi-dataplane.c -@@ -136,6 +136,14 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - - memory_region_transaction_commit(); - -+ /* -+ * These fields are visible to the IOThread so we rely on implicit barriers -+ * in aio_context_acquire() on the write side and aio_notify_accept() on -+ * the read side. -+ */ -+ s->dataplane_starting = false; -+ s->dataplane_started = true; -+ - aio_context_acquire(s->ctx); - virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); - virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); -@@ -143,9 +151,6 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) - for (i = 0; i < vs->conf.num_queues; i++) { - virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); - } -- -- s->dataplane_starting = false; -- s->dataplane_started = true; - aio_context_release(s->ctx); - return 0; - --- -2.31.1 - diff --git a/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch b/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch deleted file mode 100644 index 8487f5c..0000000 --- a/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch +++ /dev/null @@ -1,168 +0,0 @@ -From 6603f216dbc07a1d221b1665409cfec6cc9960e2 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 17 May 2022 09:28:26 +0100 -Subject: [PATCH 14/16] virtio-scsi: move request-related items from .h to .c - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads -RH-Commit: [6/6] ecdf5289abd04062c85c5ed8e577a5249684a3b0 (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2079347 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefano Garzarella - -There is no longer a need to expose the request and related APIs in -virtio-scsi.h since there are no callers outside virtio-scsi.c. - -Note the block comment in VirtIOSCSIReq has been adjusted to meet the -coding style. - -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Paolo Bonzini -Message-id: 20220427143541.119567-7-stefanha@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 3dc584abeef0e1277c2de8c1c1974cb49444eb0a) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 45 ++++++++++++++++++++++++++++++--- - include/hw/virtio/virtio-scsi.h | 40 ----------------------------- - 2 files changed, 41 insertions(+), 44 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index df5ff8bab7..2450c9438c 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -29,6 +29,43 @@ - #include "hw/virtio/virtio-access.h" - #include "trace.h" - -+typedef struct VirtIOSCSIReq { -+ /* -+ * Note: -+ * - fields up to resp_iov are initialized by virtio_scsi_init_req; -+ * - fields starting at vring are zeroed by virtio_scsi_init_req. -+ */ -+ VirtQueueElement elem; -+ -+ VirtIOSCSI *dev; -+ VirtQueue *vq; -+ QEMUSGList qsgl; -+ QEMUIOVector resp_iov; -+ -+ union { -+ /* Used for two-stage request submission */ -+ QTAILQ_ENTRY(VirtIOSCSIReq) next; -+ -+ /* Used for cancellation of request during TMFs */ -+ int remaining; -+ }; -+ -+ SCSIRequest *sreq; -+ size_t resp_size; -+ enum SCSIXferMode mode; -+ union { -+ VirtIOSCSICmdResp cmd; -+ VirtIOSCSICtrlTMFResp tmf; -+ VirtIOSCSICtrlANResp an; -+ VirtIOSCSIEvent event; -+ } resp; -+ union { -+ VirtIOSCSICmdReq cmd; -+ VirtIOSCSICtrlTMFReq tmf; -+ VirtIOSCSICtrlANReq an; -+ } req; -+} VirtIOSCSIReq; -+ - static inline int virtio_scsi_get_lun(uint8_t *lun) - { - return ((lun[2] << 8) | lun[3]) & 0x3FFF; -@@ -45,7 +82,7 @@ static inline SCSIDevice *virtio_scsi_device_get(VirtIOSCSI *s, uint8_t *lun) - return scsi_device_get(&s->bus, 0, lun[1], virtio_scsi_get_lun(lun)); - } - --void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) -+static void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) - { - VirtIODevice *vdev = VIRTIO_DEVICE(s); - const size_t zero_skip = -@@ -58,7 +95,7 @@ void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) - memset((uint8_t *)req + zero_skip, 0, sizeof(*req) - zero_skip); - } - --void virtio_scsi_free_req(VirtIOSCSIReq *req) -+static void virtio_scsi_free_req(VirtIOSCSIReq *req) - { - qemu_iovec_destroy(&req->resp_iov); - qemu_sglist_destroy(&req->qsgl); -@@ -801,8 +838,8 @@ static void virtio_scsi_reset(VirtIODevice *vdev) - s->events_dropped = false; - } - --void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, -- uint32_t event, uint32_t reason) -+static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, -+ uint32_t event, uint32_t reason) - { - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); - VirtIOSCSIReq *req; -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index 2497530064..abdda2cbd0 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -94,42 +94,6 @@ struct VirtIOSCSI { - uint32_t host_features; - }; - --typedef struct VirtIOSCSIReq { -- /* Note: -- * - fields up to resp_iov are initialized by virtio_scsi_init_req; -- * - fields starting at vring are zeroed by virtio_scsi_init_req. -- * */ -- VirtQueueElement elem; -- -- VirtIOSCSI *dev; -- VirtQueue *vq; -- QEMUSGList qsgl; -- QEMUIOVector resp_iov; -- -- union { -- /* Used for two-stage request submission */ -- QTAILQ_ENTRY(VirtIOSCSIReq) next; -- -- /* Used for cancellation of request during TMFs */ -- int remaining; -- }; -- -- SCSIRequest *sreq; -- size_t resp_size; -- enum SCSIXferMode mode; -- union { -- VirtIOSCSICmdResp cmd; -- VirtIOSCSICtrlTMFResp tmf; -- VirtIOSCSICtrlANResp an; -- VirtIOSCSIEvent event; -- } resp; -- union { -- VirtIOSCSICmdReq cmd; -- VirtIOSCSICtrlTMFReq tmf; -- VirtIOSCSICtrlANReq an; -- } req; --} VirtIOSCSIReq; -- - static inline void virtio_scsi_acquire(VirtIOSCSI *s) - { - if (s->ctx) { -@@ -151,10 +115,6 @@ void virtio_scsi_common_realize(DeviceState *dev, - Error **errp); - - void virtio_scsi_common_unrealize(DeviceState *dev); --void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); --void virtio_scsi_free_req(VirtIOSCSIReq *req); --void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, -- uint32_t event, uint32_t reason); - - void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp); - int virtio_scsi_dataplane_start(VirtIODevice *s); --- -2.31.1 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 7e3e56e..f8c08dd 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -150,8 +150,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 7.0.0 -Release: 12%{?rcrel}%{?dist}%{?cc_suffix} +Version: 7.1.0 +Release: 1%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -175,323 +175,24 @@ Source30: kvm-s390x.conf Source31: kvm-x86.conf Source36: README.tests +Source37: capstone.tar.gz + Patch0004: 0004-Initial-redhat-build.patch -Patch0005: 0005-Enable-disable-devices-for-RHEL.patch -Patch0006: 0006-Machine-type-related-general-changes.patch -Patch0007: 0007-Add-aarch64-machine-types.patch -Patch0008: 0008-Add-ppc64-machine-types.patch -Patch0009: 0009-Add-s390x-machine-types.patch -Patch0010: 0010-Add-x86_64-machine-types.patch -Patch0011: 0011-Enable-make-check.patch -Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0013: 0013-Add-support-statement-to-help-output.patch -Patch0014: 0014-globally-limit-the-maximum-number-of-CPUs.patch +Patch0005: 0005-Re-enable-capstone-internal-build.patch +Patch0006: 0006-Enable-disable-devices-for-RHEL.patch +Patch0007: 0007-Machine-type-related-general-changes.patch +Patch0008: 0008-Add-aarch64-machine-types.patch +Patch0009: 0009-Add-ppc64-machine-types.patch +Patch0010: 0010-Add-s390x-machine-types.patch +Patch0011: 0011-Add-x86_64-machine-types.patch +Patch0012: 0012-Enable-make-check.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0016: 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch -Patch0017: 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0018: 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0019: 0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch -Patch0020: 0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch -Patch0021: 0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch -# For bz#2044162 - [RHEL9.1] Enable virtio-mem as tech-preview on ARM64 QEMU -Patch22: kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch -# For bz#2081022 - Build regression on ppc64le with c9s qemu-kvm 7.0.0-1 changes -Patch23: kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch -# For bz#2046029 - [WRB] New machine type property - dtb-kaslr-seed -Patch24: kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch -# For bz#2046029 - [WRB] New machine type property - dtb-kaslr-seed -Patch25: kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch -# For bz#1477099 - virtio-iommu (including ACPI, VHOST/VFIO integration, migration support) -Patch26: kvm-Enable-virtio-iommu-pci-on-aarch64.patch -# For bz#2037612 - [Win11][tpm][QL41112 PF] vfio_listener_region_add received unaligned region -Patch27: kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch -# For bz#2037612 - [Win11][tpm][QL41112 PF] vfio_listener_region_add received unaligned region -Patch28: kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch29: kvm-qapi-machine.json-Add-cluster-id.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch30: kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch31: kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch32: kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch33: kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch -# For bz#2041823 - [aarch64][numa] When there are at least 6 Numa nodes serial log shows 'arch topology borken' -Patch34: kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch -# For bz#2079938 - qemu coredump when boot with multi disks (qemu) failed to set up stack guard page: Cannot allocate memory -Patch35: kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch -# For bz#2079938 - qemu coredump when boot with multi disks (qemu) failed to set up stack guard page: Cannot allocate memory -Patch36: kvm-coroutine-Revert-to-constant-batch-size.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch37: kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch38: kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch39: kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch40: kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch41: kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch -# For bz#2079347 - Guest boot blocked when scsi disks using same iothread and 100% CPU consumption -Patch42: kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch -# For bz#1995710 - RFE: Allow virtio-scsi CD-ROM media change with IOThreads -Patch43: kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch -# For bz#2064530 - Rebuild qemu-kvm with clang-14 -Patch44: kvm-migration-Fix-operator-type.patch -# For bz#1708300 - RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN -Patch45: kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch -# For bz#1708300 - RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN -Patch46: kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch -# For bz#2031024 - Add support for fixing thread pool size [QEMU] -Patch47: kvm-Introduce-event-loop-base-abstract-class.patch -# For bz#2031024 - Add support for fixing thread pool size [QEMU] -Patch48: kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch -# For bz#2031024 - Add support for fixing thread pool size [QEMU] -Patch49: kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch -# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) -Patch50: kvm-qcow2-Improve-refcount-structure-rebuilding.patch -# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) -Patch51: kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch -# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) -Patch52: kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch -# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) -Patch53: kvm-iotests-108-Fix-when-missing-user_allow_other.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch54: kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch55: kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch56: kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch57: kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch58: kvm-vhost-vdpa-backend-feature-should-set-only-once.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch59: kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch -# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA -Patch60: kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch -# For bz#2094270 - Do not set the hard vCPU limit to the soft vCPU limit in downstream qemu-kvm anymore -Patch61: kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch -# For bz#2086262 - [Win11][tpm]vfio_listener_region_del received unaligned region -Patch62: kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch -# For bz#1952483 - RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures -Patch63: kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch -# For bz#1952483 - RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures -Patch64: kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch -# For bz#1952483 - RFE: QEMU's coroutines fail with CFLAGS=-flto on non-x86_64 architectures -Patch65: kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch -# For bz#2094252 - Compile the virtio-iommu device on x86_64 -Patch66: kvm-Enable-virtio-iommu-pci-on-x86_64.patch -# For bz#2092788 - Stalled IO Operations in VM -Patch67: kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch -# For bz#2092788 - Stalled IO Operations in VM -Patch68: kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch69: kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch70: kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch71: kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch72: kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch73: kvm-target-s390x-deprecate-CPUs-older-than-z14.patch -# For bz#2060839 - Consider deprecating CPU models like "kvm64" / "qemu64" on RHEL 9 -Patch74: kvm-target-arm-deprecate-named-CPU-models.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch75: kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch76: kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch77: kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch78: kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch79: kvm-migration-Add-migrate_use_tls-helper.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch80: kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch81: kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch82: kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch83: kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch84: kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch -# For bz#1968509 - Use MSG_ZEROCOPY on QEMU Live Migration -Patch85: kvm-migration-Change-zero_copy_send-from-migration-param.patch -# For bz#2096143 - The migration port is not released if use it again for recovering postcopy migration -Patch86: kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch87: kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch88: kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch89: kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch90: kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch -# For bz#2100106 - Fix virtio-iommu/vfio bypass -Patch91: kvm-virtio-iommu-Fix-migration-regression.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch92: kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch93: kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch94: kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch95: kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch96: kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch97: kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch98: kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch99: kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch100: kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch -# For bz#2098077 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions -Patch101: kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch -# For bz#1951522 - CVE-2021-3507 qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-9.0] -Patch102: kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch -# For bz#1951522 - CVE-2021-3507 qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-9.0] -Patch103: kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch104: kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch105: kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch106: kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch107: kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch108: kvm-meson-create-have_vhost_-variables.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch109: kvm-meson-use-have_vhost_-variables-to-pick-sources.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch110: kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch111: kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch112: kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch113: kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch114: kvm-vhost-Reorder-vhost_svq_kick.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch115: kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch116: kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch117: kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch118: kvm-vhost-Add-SVQDescState.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch119: kvm-vhost-Track-number-of-descs-in-SVQDescState.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch120: kvm-vhost-add-vhost_svq_push_elem.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch121: kvm-vhost-Expose-vhost_svq_add.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch122: kvm-vhost-add-vhost_svq_poll.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch123: kvm-vhost-Add-svq-avail_handler-callback.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch124: kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch125: kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch126: kvm-vdpa-manual-forward-CVQ-buffers.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch127: kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch128: kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch129: kvm-vdpa-Add-device-migration-blocker.patch -# For bz#1939363 - vDPA control virtqueue support in Qemu -Patch130: kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch -# For bz#2111994 - RHEL9: skey test in kvm_unit_test got failed -Patch131: kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch -# For bz#2111994 - RHEL9: skey test in kvm_unit_test got failed -Patch132: kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch -# For bz#2095608 - Please correct the error message when try to start qemu with "-M kernel-irqchip=split" -Patch133: kvm-kvm-don-t-use-perror-without-useful-errno.patch -# For bz#2099934 - Guest reboot on destination host after postcopy migration completed -Patch134: kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch -# For bz#2099934 - Guest reboot on destination host after postcopy migration completed -Patch135: kvm-Revert-migration-Simplify-unqueue_page.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch136: kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch137: kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch138: kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch139: kvm-migration-Avoid-false-positive-on-non-supported-scen.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch140: kvm-migration-add-remaining-params-has_-true-in-migratio.patch -# For bz#2107466 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together -Patch141: kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch -# For bz#2112303 - virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions -Patch142: kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch -# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu -Patch143: kvm-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch -# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu -Patch144: kvm-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch -# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu -Patch145: kvm-vdpa-Fix-memory-listener-deletions-of-iova-tree.patch -# For bz#2116876 - Fixes for vDPA control virtqueue support in Qemu -Patch146: kvm-vdpa-Fix-file-descriptor-leak-on-get-features-error.patch -# For bz#2120275 - Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-9.1] -Patch147: kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch148: kvm-vhost-Get-vring-base-from-vq-not-svq.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch149: kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch150: kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch151: kvm-util-Return-void-on-iova_tree_remove.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch152: kvm-util-accept-iova_tree_remove_parameter-by-value.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch153: kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch154: kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch155: kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch156: kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch157: kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch158: kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch159: kvm-vhost-Delete-useless-read-memory-barrier.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch160: kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch161: kvm-vhost_net-Add-NetClientInfo-start-callback.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch162: kvm-vhost_net-Add-NetClientInfo-stop-callback.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch163: kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch164: kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch165: kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch166: kvm-vhost_net-add-NetClientState-load-callback.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch167: kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch -# For bz#2114060 - vDPA state restore support through control virtqueue in Qemu -Patch168: kvm-vdpa-Delete-CVQ-migration-blocker.patch -# For bz#2099541 - qemu coredump with error Assertion `qemu_mutex_iothread_locked()' failed when repeatly hotplug/unplug disks in pause status -Patch169: kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch - -# Source-git patches +Patch0016: 0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0017: 0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +Patch0018: 0018-Introduce-upstream-7.0-compat-changes.patch %if %{have_clang} BuildRequires: clang @@ -803,6 +504,7 @@ This package provides usbredir support. %prep %setup -q -n qemu-%{version}%{?rcstr} %autopatch -p1 +/usr/bin/gzip -dc %{SOURCE37} | /usr/bin/tar -xof - %global qemu_kvm_build qemu_kvm_build mkdir -p %{qemu_kvm_build} @@ -921,17 +623,15 @@ mkdir -p %{qemu_kvm_build} --disable-vhost-crypto \\\ --disable-vhost-kernel \\\ --disable-vhost-net \\\ - --disable-vhost-scsi \\\ --disable-vhost-user \\\ --disable-vhost-user-blk-server \\\ --disable-vhost-vdpa \\\ - --disable-vhost-vsock \\\ --disable-virglrenderer \\\ --disable-virtfs \\\ --disable-virtiofsd \\\ --disable-vnc \\\ --disable-vnc-jpeg \\\ - --disable-vnc-png \\\ + --disable-png \\\ --disable-vnc-sasl \\\ --disable-vte \\\ --disable-vvfat \\\ @@ -1046,9 +746,8 @@ run_configure \ --enable-vhost-user \ --enable-vhost-user-blk-server \ --enable-vhost-vdpa \ - --enable-vhost-vsock \ --enable-vnc \ - --enable-vnc-png \ + --enable-png \ --enable-vnc-sasl \ %if %{enable_werror} --enable-werror \ @@ -1525,36 +1224,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog -* Fri Aug 26 2022 Miroslav Rezanina - 7.0.0-12 -- kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch [bz#2120275] -- kvm-vhost-Get-vring-base-from-vq-not-svq.patch [bz#2114060] -- kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch [bz#2114060] -- kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch [bz#2114060] -- kvm-util-Return-void-on-iova_tree_remove.patch [bz#2114060] -- kvm-util-accept-iova_tree_remove_parameter-by-value.patch [bz#2114060] -- kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch [bz#2114060] -- kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch [bz#2114060] -- kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch [bz#2114060] -- kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch [bz#2114060] -- kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch [bz#2114060] -- kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch [bz#2114060] -- kvm-vhost-Delete-useless-read-memory-barrier.patch [bz#2114060] -- kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch [bz#2114060] -- kvm-vhost_net-Add-NetClientInfo-start-callback.patch [bz#2114060] -- kvm-vhost_net-Add-NetClientInfo-stop-callback.patch [bz#2114060] -- kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch [bz#2114060] -- kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch [bz#2114060] -- kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch [bz#2114060] -- kvm-vhost_net-add-NetClientState-load-callback.patch [bz#2114060] -- kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch [bz#2114060] -- kvm-vdpa-Delete-CVQ-migration-blocker.patch [bz#2114060] -- kvm-virtio-scsi-fix-race-in-virtio_scsi_dataplane_start.patch [bz#2099541] -- Resolves: bz#2120275 - (Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-9.1]) -- Resolves: bz#2114060 - (vDPA state restore support through control virtqueue in Qemu) -- Resolves: bz#2099541 - (qemu coredump with error Assertion `qemu_mutex_iothread_locked()' failed when repeatly hotplug/unplug disks in pause status) +* Mon Sep 05 2022 Miroslav Rezanina - 7.1.0-1 +- Rebase to QEMU 7.1.0 [bz#2111769] +- Resolves: bz#2111769 + (Rebase to QEMU 7.1.0) * Mon Aug 15 2022 Miroslav Rezanina - 7.0.0-11 - kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch [bz#2107466] diff --git a/sources b/sources index 9f53caf..5506976 100644 --- a/sources +++ b/sources @@ -1 +1,2 @@ -SHA512 (qemu-7.0.0.tar.xz) = 44ecd10c018a3763e1bc87d1d35b98890d0d5636acd69fe9b5cadf5024d5af6a31684d60cbe1c3370e02986434c1fb0ad99224e0e6f6fe7eda169992508157b1 +SHA512 (capstone.tar.gz) = 14c5a3f3807c9294258de5bf294563fcdb56b50630cf3080dc681ae1415d938dce9485d7b0fef61cfb4a2381696f0e74c7da149b2b6218cdbb00521cd365c7e4 +SHA512 (qemu-7.1.0.tar.xz) = c60c5ff8ec99b7552e485768908920658fdd8035ff7a6fa370fb6881957dc8b7e5f18ff1a8f49bd6aa22909ede2a7c084986d8244f12074ccd33ebe40a0c411f From 19bc18cc9eec0d2d0301153bad2ed986aaf1f61d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 29 Sep 2022 03:59:37 -0400 Subject: [PATCH 169/195] * Thu Sep 29 2022 Miroslav Rezanina - 7.1.0-2 - kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch [RHELX-57] - kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch [RHELX-57] - kvm-util-accept-iova_tree_remove_parameter-by-value.patch [RHELX-57] - kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch [RHELX-57] - kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch [RHELX-57] - kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch [RHELX-57] - kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch [RHELX-57] - kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch [RHELX-57] - kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch [RHELX-57] - kvm-vhost-Delete-useless-read-memory-barrier.patch [RHELX-57] - kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch [RHELX-57] - kvm-vhost_net-Add-NetClientInfo-start-callback.patch [RHELX-57] - kvm-vhost_net-Add-NetClientInfo-stop-callback.patch [RHELX-57] - kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch [RHELX-57] - kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch [RHELX-57] - kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch [RHELX-57] - kvm-vhost_net-add-NetClientState-load-callback.patch [RHELX-57] - kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch [RHELX-57] - kvm-vdpa-Delete-CVQ-migration-blocker.patch [RHELX-57] - kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch [RHELX-57] - kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch [RHELX-57] - kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch [RHELX-57] - kvm-vdpa-validate-MQ-CVQ-commands.patch [RHELX-57] - kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch [RHELX-57] - kvm-vdpa-Allow-MQ-feature-in-SVQ.patch [RHELX-57] - kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch [bz#2125281] - kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch [bz#2125281] - kvm-Revert-Re-enable-capstone-internal-build.patch [bz#2127825] - kvm-spec-Use-capstone-package.patch [bz#2127825] - Resolves: RHELX-57 (vDPA SVQ Multiqueue support ) - Resolves: bz#2125281 ([RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-9.2.0]) - Resolves: bz#2127825 (Use capstone for qemu-kvm build) --- ...rt-Re-enable-capstone-internal-build.patch | 252 ++++++++++++++++++ ...msr_feature_control-first-thing-when.patch | 66 +++++ ...eset-KVM-nested-state-upon-CPU-reset.patch | 93 +++++++ ...-iova_tree_remove_parameter-by-value.patch | 182 +++++++++++++ kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch | 74 +++++ ...tio-net-mac-address-via-CVQ-at-start.patch | 87 ++++++ kvm-vdpa-Allow-MQ-feature-in-SVQ.patch | 41 +++ kvm-vdpa-Delete-CVQ-migration-blocker.patch | 98 +++++++ ...Make-SVQ-vring-unmapping-return-void.patch | 133 +++++++++ ...DPAState-cvq_cmd_in_buffer-control-a.patch | 113 ++++++++ ...d-buffers-map-to-start-of-net-device.patch | 251 +++++++++++++++++ ...SVQ-vring-from-iova_tree-at-shutdown.patch | 49 ++++ ...a-Skip-the-maps-not-in-the-iova-tree.patch | 48 ++++ ...-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch | 79 ++++++ ...et_vhost_vdpa_cvq_info-NetClientInfo.patch | 62 +++++ ...ave-failed-dma-maps-in-SVQ-iova-tree.patch | 83 ++++++ ...st_vdpa_net_cvq_add-from-vhost_vdpa_.patch | 153 +++++++++++ ...st_vdpa_net_load_mac-from-vhost_vdpa.patch | 115 ++++++++ kvm-vdpa-validate-MQ-CVQ-commands.patch | 50 ++++ ...re-new-kick-fd-on-vhost_svq_set_svq_.patch | 67 +++++ ...t-Delete-useless-read-memory-barrier.patch | 47 ++++ ...end-on-NULL-VirtQueueElement-on-vhos.patch | 63 +++++ ...fer-elem-ownership-in-vhost_handle_g.patch | 80 ++++++ ...ement-ndescs-instead-of-opaque-data-.patch | 55 ++++ ...net-Add-NetClientInfo-start-callback.patch | 73 +++++ ..._net-Add-NetClientInfo-stop-callback.patch | 68 +++++ ...net-add-NetClientState-load-callback.patch | 73 +++++ ...e-virtio-net-curr_queue_pairs-in-vdp.patch | 61 +++++ qemu-kvm.spec | 102 ++++++- sources | 1 - 30 files changed, 2713 insertions(+), 6 deletions(-) create mode 100644 kvm-Revert-Re-enable-capstone-internal-build.patch create mode 100644 kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch create mode 100644 kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch create mode 100644 kvm-util-accept-iova_tree_remove_parameter-by-value.patch create mode 100644 kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch create mode 100644 kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch create mode 100644 kvm-vdpa-Allow-MQ-feature-in-SVQ.patch create mode 100644 kvm-vdpa-Delete-CVQ-migration-blocker.patch create mode 100644 kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch create mode 100644 kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch create mode 100644 kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch create mode 100644 kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch create mode 100644 kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch create mode 100644 kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch create mode 100644 kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch create mode 100644 kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch create mode 100644 kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch create mode 100644 kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch create mode 100644 kvm-vdpa-validate-MQ-CVQ-commands.patch create mode 100644 kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch create mode 100644 kvm-vhost-Delete-useless-read-memory-barrier.patch create mode 100644 kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch create mode 100644 kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch create mode 100644 kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch create mode 100644 kvm-vhost_net-Add-NetClientInfo-start-callback.patch create mode 100644 kvm-vhost_net-Add-NetClientInfo-stop-callback.patch create mode 100644 kvm-vhost_net-add-NetClientState-load-callback.patch create mode 100644 kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch diff --git a/kvm-Revert-Re-enable-capstone-internal-build.patch b/kvm-Revert-Re-enable-capstone-internal-build.patch new file mode 100644 index 0000000..3dbb5ca --- /dev/null +++ b/kvm-Revert-Re-enable-capstone-internal-build.patch @@ -0,0 +1,252 @@ +From 4ce18f26f30cfb8860153825c504289f43800f5e Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 19 Sep 2022 03:23:41 -0400 +Subject: [PATCH 28/29] Revert "Re-enable capstone internal build" + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 119: Use capstone package for qemu-kvm build +RH-Bugzilla: 2127825 +RH-Acked-by: Thomas Huth +RH-Commit: [1/2] bd58ace2233e3071703a69ea9e7bfcd82416cda1 (mrezanin/centos-src-qemu-kvm) + +This reverts commit c2c10b636a97d1cb9c4abbc4152a34ebf2f44817. + +Signed-off-by: Miroslav Rezanina +--- + configure | 12 ---- + meson.build | 116 ++-------------------------------- + meson_options.txt | 3 +- + scripts/meson-buildoptions.sh | 5 +- + 4 files changed, 7 insertions(+), 129 deletions(-) + +diff --git a/configure b/configure +index 448b0c82cb..72ab03f11a 100755 +--- a/configure ++++ b/configure +@@ -322,10 +322,8 @@ vfio_user_server="disabled" + + # 1. Track which submodules are needed + if test "$default_feature" = no ; then +- capstone="disabled" + slirp="disabled" + else +- capstone="auto" + slirp="auto" + fi + fdt="auto" +@@ -904,15 +902,6 @@ for opt do + --enable-uuid|--disable-uuid) + echo "$0: $opt is obsolete, UUID support is always built" >&2 + ;; +- --disable-capstone) capstone="disabled" +- ;; +- --enable-capstone) capstone="enabled" +- ;; +- --enable-capstone=git) capstone="internal" +- ;; +- --enable-capstone=*) capstone="$optarg" +- ;; +- + --with-git=*) git="$optarg" + ;; + --with-git-submodules=*) +@@ -2753,7 +2742,6 @@ if test "$skip_meson" = no; then + test "$werror" = yes && meson_option_add -Dwerror=true + + # QEMU options +- test "$capstone" != auto && meson_option_add "-Dcapstone=$capstone" + test "$cfi" != false && meson_option_add "-Dcfi=$cfi" + test "$fdt" != auto && meson_option_add "-Dfdt=$fdt" + test -n "${LIB_FUZZING_ENGINE+xxx}" && meson_option_add "-Dfuzzing_engine=$LIB_FUZZING_ENGINE" +diff --git a/meson.build b/meson.build +index 9e6a979c13..20fddbd707 100644 +--- a/meson.build ++++ b/meson.build +@@ -2596,13 +2596,10 @@ genh += custom_target('config-poison.h', + ############## + + capstone = not_found +-capstone_opt = get_option('capstone') +-if capstone_opt in ['enabled', 'auto', 'system'] +- have_internal = fs.exists(meson.current_source_dir() / 'capstone/Makefile') ++if not get_option('capstone').auto() or have_system or have_user + capstone = dependency('capstone', version: '>=3.0.5', + kwargs: static_kwargs, method: 'pkg-config', +- required: capstone_opt == 'system' or +- capstone_opt == 'enabled' and not have_internal) ++ required: get_option('capstone')) + + # Some versions of capstone have broken pkg-config file + # that reports a wrong -I path, causing the #include to +@@ -2611,113 +2608,10 @@ if capstone_opt in ['enabled', 'auto', 'system'] + if capstone.found() and not cc.compiles('#include ', + dependencies: [capstone]) + capstone = not_found +- if capstone_opt == 'system' +- error('system capstone requested, it does not appear to work') ++ if get_option('capstone').enabled() ++ error('capstone requested, but it does not appear to work') + endif + endif +- +- if capstone.found() +- capstone_opt = 'system' +- elif have_internal +- capstone_opt = 'internal' +- else +- capstone_opt = 'disabled' +- endif +-endif +-if capstone_opt == 'internal' +- capstone_data = configuration_data() +- capstone_data.set('CAPSTONE_USE_SYS_DYN_MEM', '1') +- +- capstone_files = files( +- 'capstone/cs.c', +- 'capstone/MCInst.c', +- 'capstone/MCInstrDesc.c', +- 'capstone/MCRegisterInfo.c', +- 'capstone/SStream.c', +- 'capstone/utils.c' +- ) +- +- if 'CONFIG_ARM_DIS' in config_all_disas +- capstone_data.set('CAPSTONE_HAS_ARM', '1') +- capstone_files += files( +- 'capstone/arch/ARM/ARMDisassembler.c', +- 'capstone/arch/ARM/ARMInstPrinter.c', +- 'capstone/arch/ARM/ARMMapping.c', +- 'capstone/arch/ARM/ARMModule.c' +- ) +- endif +- +- # FIXME: This config entry currently depends on a c++ compiler. +- # Which is needed for building libvixl, but not for capstone. +- if 'CONFIG_ARM_A64_DIS' in config_all_disas +- capstone_data.set('CAPSTONE_HAS_ARM64', '1') +- capstone_files += files( +- 'capstone/arch/AArch64/AArch64BaseInfo.c', +- 'capstone/arch/AArch64/AArch64Disassembler.c', +- 'capstone/arch/AArch64/AArch64InstPrinter.c', +- 'capstone/arch/AArch64/AArch64Mapping.c', +- 'capstone/arch/AArch64/AArch64Module.c' +- ) +- endif +- +- if 'CONFIG_PPC_DIS' in config_all_disas +- capstone_data.set('CAPSTONE_HAS_POWERPC', '1') +- capstone_files += files( +- 'capstone/arch/PowerPC/PPCDisassembler.c', +- 'capstone/arch/PowerPC/PPCInstPrinter.c', +- 'capstone/arch/PowerPC/PPCMapping.c', +- 'capstone/arch/PowerPC/PPCModule.c' +- ) +- endif +- +- if 'CONFIG_S390_DIS' in config_all_disas +- capstone_data.set('CAPSTONE_HAS_SYSZ', '1') +- capstone_files += files( +- 'capstone/arch/SystemZ/SystemZDisassembler.c', +- 'capstone/arch/SystemZ/SystemZInstPrinter.c', +- 'capstone/arch/SystemZ/SystemZMapping.c', +- 'capstone/arch/SystemZ/SystemZModule.c', +- 'capstone/arch/SystemZ/SystemZMCTargetDesc.c' +- ) +- endif +- +- if 'CONFIG_I386_DIS' in config_all_disas +- capstone_data.set('CAPSTONE_HAS_X86', 1) +- capstone_files += files( +- 'capstone/arch/X86/X86Disassembler.c', +- 'capstone/arch/X86/X86DisassemblerDecoder.c', +- 'capstone/arch/X86/X86ATTInstPrinter.c', +- 'capstone/arch/X86/X86IntelInstPrinter.c', +- 'capstone/arch/X86/X86InstPrinterCommon.c', +- 'capstone/arch/X86/X86Mapping.c', +- 'capstone/arch/X86/X86Module.c' +- ) +- endif +- +- configure_file(output: 'capstone-defs.h', configuration: capstone_data) +- +- capstone_cargs = [ +- # FIXME: There does not seem to be a way to completely replace the c_args +- # that come from add_project_arguments() -- we can only add to them. +- # So: disable all warnings with a big hammer. +- '-Wno-error', '-w', +- +- # Include all configuration defines via a header file, which will wind up +- # as a dependency on the object file, and thus changes here will result +- # in a rebuild. +- '-include', 'capstone-defs.h', +- +- '-Wp,-D_GLIBCXX_ASSERTIONS', +- +- ] +- +- libcapstone = static_library('capstone', +- build_by_default: false, +- sources: capstone_files, +- c_args: capstone_cargs, +- include_directories: 'capstone/include') +- capstone = declare_dependency(link_with: libcapstone, +- include_directories: 'capstone/include/capstone') + endif + + slirp = not_found +@@ -4083,7 +3977,7 @@ summary_info += {'bzip2 support': libbzip2} + summary_info += {'lzfse support': liblzfse} + summary_info += {'zstd support': zstd} + summary_info += {'NUMA host support': numa} +-summary_info += {'capstone': capstone_opt == 'internal' ? capstone_opt : capstone} ++summary_info += {'capstone': capstone} + summary_info += {'libpmem support': libpmem} + summary_info += {'libdaxctl support': libdaxctl} + summary_info += {'libudev': libudev} +diff --git a/meson_options.txt b/meson_options.txt +index 7cd920fcd6..e58e158396 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -262,8 +262,7 @@ option('libvduse', type: 'feature', value: 'auto', + option('vduse_blk_export', type: 'feature', value: 'auto', + description: 'VDUSE block export support') + +-option('capstone', type: 'combo', value: 'auto', +- choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], ++option('capstone', type: 'feature', value: 'auto', + description: 'Whether and how to find the capstone library') + option('slirp', type: 'combo', value: 'auto', + choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index b1001aa1db..359b04e0e6 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -16,9 +16,6 @@ meson_options_help() { + printf "%s\n" ' --enable-block-drv-whitelist-in-tools' + printf "%s\n" ' use block whitelist also in tools instead of only' + printf "%s\n" ' QEMU' +- printf "%s\n" ' --enable-capstone[=CHOICE]' +- printf "%s\n" ' Whether and how to find the capstone library' +- printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' + printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' + printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' + printf "%s\n" ' --enable-debug-mutex mutex debugging support' +@@ -78,6 +75,7 @@ meson_options_help() { + printf "%s\n" ' bzip2 bzip2 support for DMG images' + printf "%s\n" ' canokey CanoKey support' + printf "%s\n" ' cap-ng cap_ng support' ++ printf "%s\n" ' capstone Whether and how to find the capstone library' + printf "%s\n" ' cloop cloop image format support' + printf "%s\n" ' cocoa Cocoa user interface (macOS only)' + printf "%s\n" ' coreaudio CoreAudio sound support' +@@ -218,7 +216,6 @@ _meson_option_parse() { + --disable-cap-ng) printf "%s" -Dcap_ng=disabled ;; + --enable-capstone) printf "%s" -Dcapstone=enabled ;; + --disable-capstone) printf "%s" -Dcapstone=disabled ;; +- --enable-capstone=*) quote_sh "-Dcapstone=$2" ;; + --enable-cfi) printf "%s" -Dcfi=true ;; + --disable-cfi) printf "%s" -Dcfi=false ;; + --enable-cfi-debug) printf "%s" -Dcfi_debug=true ;; +-- +2.31.1 + diff --git a/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch b/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch new file mode 100644 index 0000000..2e9fa92 --- /dev/null +++ b/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch @@ -0,0 +1,66 @@ +From aba2a5cb19efa33be871dd951366439cf99c5f13 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 18 Aug 2022 17:01:13 +0200 +Subject: [PATCH 27/29] i386: do kvm_put_msr_feature_control() first thing when + vCPU is reset + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 118: Synchronize qemu-kvm-7.0.0-13.el9 +RH-Bugzilla: 2125281 +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [2/2] 4986c35a04255c8fe2b62a48f5ea489339f3826a (mrezanin/centos-src-qemu-kvm) + +kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when +it is in VMX root operation. Do kvm_put_msr_feature_control() before +kvm_put_sregs2() to (possibly) kick vCPU out of VMX root operation. It also +seems logical to do kvm_put_msr_feature_control() before +kvm_put_nested_state() and not after it, especially when 'real' nested +state is set. + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20220818150113.479917-3-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 45ed68a1a3a19754ade954d75a3c9d13ff560e5c) +Signed-off-by: Vitaly Kuznetsov +--- + target/i386/kvm/kvm.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index fd3237310b..a9eba247a5 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -4533,6 +4533,18 @@ int kvm_arch_put_registers(CPUState *cpu, int level) + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + ++ /* ++ * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX ++ * root operation upon vCPU reset. kvm_put_msr_feature_control() should also ++ * preceed kvm_put_nested_state() when 'real' nested state is set. ++ */ ++ if (level >= KVM_PUT_RESET_STATE) { ++ ret = kvm_put_msr_feature_control(x86_cpu); ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ + /* must be before kvm_put_nested_state so that EFER.SVME is set */ + ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu); + if (ret < 0) { +@@ -4544,11 +4556,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level) + if (ret < 0) { + return ret; + } +- +- ret = kvm_put_msr_feature_control(x86_cpu); +- if (ret < 0) { +- return ret; +- } + } + + if (level == KVM_PUT_FULL_STATE) { +-- +2.31.1 + diff --git a/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch b/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch new file mode 100644 index 0000000..27ccde7 --- /dev/null +++ b/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch @@ -0,0 +1,93 @@ +From 6f650e08efc35cc04730bf99cea7be8d4faa6e74 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 18 Aug 2022 17:01:12 +0200 +Subject: [PATCH 26/29] i386: reset KVM nested state upon CPU reset + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 118: Synchronize qemu-kvm-7.0.0-13.el9 +RH-Bugzilla: 2125281 +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [1/2] b34da74a40fe32ef210c8127ba8bb032aaab6381 (mrezanin/centos-src-qemu-kvm) + +Make sure env->nested_state is cleaned up when a vCPU is reset, it may +be stale after an incoming migration, kvm_arch_put_registers() may +end up failing or putting vCPU in a weird state. + +Reviewed-by: Maxim Levitsky +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20220818150113.479917-2-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3cafdb67504a34a0305260f0c86a73d5a3fb000b) +Signed-off-by: Vitaly Kuznetsov +--- + target/i386/kvm/kvm.c | 37 +++++++++++++++++++++++++++---------- + 1 file changed, 27 insertions(+), 10 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 4e5d4bafc4..fd3237310b 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1695,6 +1695,30 @@ static void kvm_init_xsave(CPUX86State *env) + env->xsave_buf_len); + } + ++static void kvm_init_nested_state(CPUX86State *env) ++{ ++ struct kvm_vmx_nested_state_hdr *vmx_hdr; ++ uint32_t size; ++ ++ if (!env->nested_state) { ++ return; ++ } ++ ++ size = env->nested_state->size; ++ ++ memset(env->nested_state, 0, size); ++ env->nested_state->size = size; ++ ++ if (cpu_has_vmx(env)) { ++ env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; ++ vmx_hdr = &env->nested_state->hdr.vmx; ++ vmx_hdr->vmxon_pa = -1ull; ++ vmx_hdr->vmcs12_pa = -1ull; ++ } else if (cpu_has_svm(env)) { ++ env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; ++ } ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { + struct { +@@ -2122,19 +2146,10 @@ int kvm_arch_init_vcpu(CPUState *cs) + assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); + + if (cpu_has_vmx(env) || cpu_has_svm(env)) { +- struct kvm_vmx_nested_state_hdr *vmx_hdr; +- + env->nested_state = g_malloc0(max_nested_state_len); + env->nested_state->size = max_nested_state_len; + +- if (cpu_has_vmx(env)) { +- env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; +- vmx_hdr = &env->nested_state->hdr.vmx; +- vmx_hdr->vmxon_pa = -1ull; +- vmx_hdr->vmcs12_pa = -1ull; +- } else { +- env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; +- } ++ kvm_init_nested_state(env); + } + } + +@@ -2199,6 +2214,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) + /* enabled by default */ + env->poll_control_msr = 1; + ++ kvm_init_nested_state(env); ++ + sev_es_set_reset_vector(CPU(cpu)); + } + +-- +2.31.1 + diff --git a/kvm-util-accept-iova_tree_remove_parameter-by-value.patch b/kvm-util-accept-iova_tree_remove_parameter-by-value.patch new file mode 100644 index 0000000..5cd76c3 --- /dev/null +++ b/kvm-util-accept-iova_tree_remove_parameter-by-value.patch @@ -0,0 +1,182 @@ +From 3320d1883222bc551cf8ffd048882be4a97e872f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:04 +0200 +Subject: [PATCH 03/29] util: accept iova_tree_remove_parameter by value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [3/25] 98190376f758aed31bc31ce3e478438787eb357c (redhat/centos-stream/src/qemu-kvm) + +It's convenient to call iova_tree_remove from a map returned from +iova_tree_find or iova_tree_find_iova. With the current code this is not +possible, since we will free it, and then we will try to search for it +again. + +Fix it making accepting the map by value, forcing a copy of the +argument. Not applying a fixes tag, since there is no use like that at +the moment. + +Signed-off-by: Eugenio Pérez +Signed-off-by: Jason Wang +(cherry picked from commit 69292a8e40f4dae8af5f04724e06392cdf03c09e) +Signed-off-by: Laurent Vivier +--- + hw/i386/intel_iommu.c | 6 +++--- + hw/virtio/vhost-iova-tree.c | 2 +- + hw/virtio/vhost-iova-tree.h | 2 +- + hw/virtio/vhost-vdpa.c | 6 +++--- + include/qemu/iova-tree.h | 2 +- + net/vhost-vdpa.c | 4 ++-- + util/iova-tree.c | 4 ++-- + 7 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 2162394e08..05d53a1aa9 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -1187,7 +1187,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) + return ret; + } + /* Drop any existing mapping */ +- iova_tree_remove(as->iova_tree, &target); ++ iova_tree_remove(as->iova_tree, target); + /* Recover the correct type */ + event->type = IOMMU_NOTIFIER_MAP; + entry->perm = cache_perm; +@@ -1200,7 +1200,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) + trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); + return 0; + } +- iova_tree_remove(as->iova_tree, &target); ++ iova_tree_remove(as->iova_tree, target); + } + + trace_vtd_page_walk_one(info->domain_id, entry->iova, +@@ -3563,7 +3563,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) + + map.iova = n->start; + map.size = size; +- iova_tree_remove(as->iova_tree, &map); ++ iova_tree_remove(as->iova_tree, map); + } + + static void vtd_address_space_unmap_all(IntelIOMMUState *s) +diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c +index 67bf6d57ab..3d03395a77 100644 +--- a/hw/virtio/vhost-iova-tree.c ++++ b/hw/virtio/vhost-iova-tree.c +@@ -104,7 +104,7 @@ int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map) + * @iova_tree: The vhost iova tree + * @map: The map to remove + */ +-void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map) ++void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map) + { + iova_tree_remove(iova_tree->iova_taddr_map, map); + } +diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h +index 6a4f24e0f9..4adfd79ff0 100644 +--- a/hw/virtio/vhost-iova-tree.h ++++ b/hw/virtio/vhost-iova-tree.h +@@ -22,6 +22,6 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete); + const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree, + const DMAMap *map); + int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); +-void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map); ++void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); + + #endif +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 7e28d2f674..87e0ad393f 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -240,7 +240,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + + fail_map: + if (v->shadow_vqs_enabled) { +- vhost_iova_tree_remove(v->iova_tree, &mem_region); ++ vhost_iova_tree_remove(v->iova_tree, mem_region); + } + + fail: +@@ -300,7 +300,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + return; + } + iova = result->iova; +- vhost_iova_tree_remove(v->iova_tree, result); ++ vhost_iova_tree_remove(v->iova_tree, *result); + } + vhost_vdpa_iotlb_batch_begin_once(v); + ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); +@@ -944,7 +944,7 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, + needle->perm == IOMMU_RO); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Cannot map region to device"); +- vhost_iova_tree_remove(v->iova_tree, needle); ++ vhost_iova_tree_remove(v->iova_tree, *needle); + } + + return r == 0; +diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h +index 16bbfdf5f8..8528e5c98f 100644 +--- a/include/qemu/iova-tree.h ++++ b/include/qemu/iova-tree.h +@@ -73,7 +73,7 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map); + * all the mappings that are included in the provided range will be + * removed from the tree. Here map->translated_addr is meaningless. + */ +-void iova_tree_remove(IOVATree *tree, const DMAMap *map); ++void iova_tree_remove(IOVATree *tree, DMAMap map); + + /** + * iova_tree_find: +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 303447a68e..a49e7e649d 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -244,7 +244,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) + error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); + } + +- vhost_iova_tree_remove(tree, map); ++ vhost_iova_tree_remove(tree, *map); + } + + static size_t vhost_vdpa_net_cvq_cmd_len(void) +@@ -297,7 +297,7 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, + return true; + + dma_map_err: +- vhost_iova_tree_remove(v->iova_tree, &map); ++ vhost_iova_tree_remove(v->iova_tree, map); + return false; + } + +diff --git a/util/iova-tree.c b/util/iova-tree.c +index fee530a579..536789797e 100644 +--- a/util/iova-tree.c ++++ b/util/iova-tree.c +@@ -164,11 +164,11 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) + g_tree_foreach(tree->tree, iova_tree_traverse, iterator); + } + +-void iova_tree_remove(IOVATree *tree, const DMAMap *map) ++void iova_tree_remove(IOVATree *tree, DMAMap map) + { + const DMAMap *overlap; + +- while ((overlap = iova_tree_find(tree, map))) { ++ while ((overlap = iova_tree_find(tree, &map))) { + g_tree_remove(tree->tree, overlap); + } + } +-- +2.31.1 + diff --git a/kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch b/kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch new file mode 100644 index 0000000..423cff9 --- /dev/null +++ b/kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch @@ -0,0 +1,74 @@ +From 466adb0e641f5c918cbea84e962ae9352f440663 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 11 Aug 2022 14:28:47 +0200 +Subject: [PATCH 22/29] vdpa: Add vhost_vdpa_net_load_mq +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [22/25] 01e861ad39d6b8e15870296f508726565101213b (redhat/centos-stream/src/qemu-kvm) + +Upstream: Not merged yet + +Same way as with the MAC, restore the expected number of queues at +device's start. + +Signed-off-by: Eugenio Pérez +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index e799e744cd..3950e4f25d 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -400,6 +400,28 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) + return 0; + } + ++static int vhost_vdpa_net_load_mq(VhostVDPAState *s, ++ const VirtIONet *n) ++{ ++ struct virtio_net_ctrl_mq mq; ++ uint64_t features = n->parent_obj.guest_features; ++ ssize_t dev_written; ++ ++ if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { ++ return 0; ++ } ++ ++ mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs); ++ dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ, ++ VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq, ++ sizeof(mq)); ++ if (unlikely(dev_written < 0)) { ++ return dev_written; ++ } ++ ++ return *s->status != VIRTIO_NET_OK; ++} ++ + static int vhost_vdpa_net_load(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +@@ -418,6 +440,10 @@ static int vhost_vdpa_net_load(NetClientState *nc) + if (unlikely(r < 0)) { + return r; + } ++ r = vhost_vdpa_net_load_mq(s, n); ++ if (unlikely(r)) { ++ return r; ++ } + + return 0; + } +-- +2.31.1 + diff --git a/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch b/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch new file mode 100644 index 0000000..c338a29 --- /dev/null +++ b/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch @@ -0,0 +1,87 @@ +From 10157c62f06e86f2ccf1fd4130ef55f7f9beac2f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:36 +0200 +Subject: [PATCH 18/29] vdpa: Add virtio-net mac address via CVQ at start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [18/25] f5b7a59a70e51450df8c58b48e4eb30ef2a44189 (redhat/centos-stream/src/qemu-kvm) + +This is needed so the destination vdpa device see the same state a the +guest set in the source. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit dd036d8d278e6882803bccaa8c51b8527ea33f45) +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 3575bf64ee..640434d1ea 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -363,11 +363,51 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, + return vhost_svq_poll(svq); + } + ++static int vhost_vdpa_net_load(NetClientState *nc) ++{ ++ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ const struct vhost_vdpa *v = &s->vhost_vdpa; ++ const VirtIONet *n; ++ uint64_t features; ++ ++ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); ++ ++ if (!v->shadow_vqs_enabled) { ++ return 0; ++ } ++ ++ n = VIRTIO_NET(v->dev->vdev); ++ features = n->parent_obj.guest_features; ++ if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { ++ const struct virtio_net_ctrl_hdr ctrl = { ++ .class = VIRTIO_NET_CTRL_MAC, ++ .cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET, ++ }; ++ char *cursor = s->cvq_cmd_out_buffer; ++ ssize_t dev_written; ++ ++ memcpy(cursor, &ctrl, sizeof(ctrl)); ++ cursor += sizeof(ctrl); ++ memcpy(cursor, n->mac, sizeof(n->mac)); ++ ++ dev_written = vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + sizeof(n->mac), ++ sizeof(virtio_net_ctrl_ack)); ++ if (unlikely(dev_written < 0)) { ++ return dev_written; ++ } ++ ++ return *((virtio_net_ctrl_ack *)s->cvq_cmd_in_buffer) != VIRTIO_NET_OK; ++ } ++ ++ return 0; ++} ++ + static NetClientInfo net_vhost_vdpa_cvq_info = { + .type = NET_CLIENT_DRIVER_VHOST_VDPA, + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, + .start = vhost_vdpa_net_cvq_start, ++ .load = vhost_vdpa_net_load, + .stop = vhost_vdpa_net_cvq_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, +-- +2.31.1 + diff --git a/kvm-vdpa-Allow-MQ-feature-in-SVQ.patch b/kvm-vdpa-Allow-MQ-feature-in-SVQ.patch new file mode 100644 index 0000000..1d308aa --- /dev/null +++ b/kvm-vdpa-Allow-MQ-feature-in-SVQ.patch @@ -0,0 +1,41 @@ +From 4a1688ed7d06aef31ef48a018b1f4be7690481fd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 11 Aug 2022 14:54:22 +0200 +Subject: [PATCH 25/29] vdpa: Allow MQ feature in SVQ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [25/25] e416f00fdbcf7af3ddd504e76519510e3bdc57b7 (redhat/centos-stream/src/qemu-kvm) + +Upstream: Not merged yet + +Finally enable SVQ with MQ feature. + +Signed-off-by: Eugenio Pérez +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index c6cbe2fb5c..4bc3fd01a8 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -94,6 +94,7 @@ static const uint64_t vdpa_svq_device_features = + BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | + BIT_ULL(VIRTIO_NET_F_STATUS) | + BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | ++ BIT_ULL(VIRTIO_NET_F_MQ) | + BIT_ULL(VIRTIO_F_ANY_LAYOUT) | + BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | + BIT_ULL(VIRTIO_NET_F_RSC_EXT) | +-- +2.31.1 + diff --git a/kvm-vdpa-Delete-CVQ-migration-blocker.patch b/kvm-vdpa-Delete-CVQ-migration-blocker.patch new file mode 100644 index 0000000..f99983b --- /dev/null +++ b/kvm-vdpa-Delete-CVQ-migration-blocker.patch @@ -0,0 +1,98 @@ +From caa8a1d41ca1f2b9c4d1c6cc287c8ae22063b488 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:37 +0200 +Subject: [PATCH 19/29] vdpa: Delete CVQ migration blocker +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [19/25] d3e6c009f66e1dc0069323684af28936ae10d155 (redhat/centos-stream/src/qemu-kvm) + +We can restore the device state in the destination via CVQ now. Remove +the migration blocker. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 0e3fdcffead7c651ce06ab50cffb89e806f04e2b) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-vdpa.c | 15 --------------- + include/hw/virtio/vhost-vdpa.h | 1 - + net/vhost-vdpa.c | 2 -- + 3 files changed, 18 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 23ae5ef48b..7468e44b87 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1033,13 +1033,6 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + return true; + } + +- if (v->migration_blocker) { +- int r = migrate_add_blocker(v->migration_blocker, &err); +- if (unlikely(r < 0)) { +- return false; +- } +- } +- + for (i = 0; i < v->shadow_vqs->len; ++i) { + VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); +@@ -1082,10 +1075,6 @@ err: + vhost_svq_stop(svq); + } + +- if (v->migration_blocker) { +- migrate_del_blocker(v->migration_blocker); +- } +- + return false; + } + +@@ -1101,10 +1090,6 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + vhost_vdpa_svq_unmap_rings(dev, svq); + } +- +- if (v->migration_blocker) { +- migrate_del_blocker(v->migration_blocker); +- } + } + + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index d10a89303e..1111d85643 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -35,7 +35,6 @@ typedef struct vhost_vdpa { + bool shadow_vqs_enabled; + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; +- Error *migration_blocker; + GPtrArray *shadow_vqs; + const VhostShadowVirtqueueOps *shadow_vq_ops; + void *shadow_vq_ops_opaque; +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 640434d1ea..6ce68fcd3f 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -555,8 +555,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; +- error_setg(&s->vhost_vdpa.migration_blocker, +- "Migration disabled: vhost-vdpa uses CVQ."); + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +-- +2.31.1 + diff --git a/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch b/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch new file mode 100644 index 0000000..8b6dd7e --- /dev/null +++ b/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch @@ -0,0 +1,133 @@ +From 08d9ea9f9218ad628771f3962d52fb4b6c110262 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:06 +0200 +Subject: [PATCH 05/29] vdpa: Make SVQ vring unmapping return void +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [5/25] 340a2246e85d30b6d30ab24198af0fb65520276e (redhat/centos-stream/src/qemu-kvm) + +Nothing actually reads the return value, but an error in cleaning some +entries could cause device stop to abort, making a restart impossible. +Better ignore explicitely the return value. + +Reported-by: Lei Yang +Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 5b590f51b923776a14d3bcafcb393279c1b72022) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-vdpa.c | 32 ++++++++++---------------------- + 1 file changed, 10 insertions(+), 22 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index e16e0e222e..e208dd000e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -884,7 +884,7 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + /** + * Unmap a SVQ area in the device + */ +-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, ++static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + const DMAMap *needle) + { + const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); +@@ -893,38 +893,33 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + + if (unlikely(!result)) { + error_report("Unable to find SVQ address to unmap"); +- return false; ++ return; + } + + size = ROUND_UP(result->size, qemu_real_host_page_size()); + r = vhost_vdpa_dma_unmap(v, result->iova, size); + if (unlikely(r < 0)) { + error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); +- return false; ++ return; + } + + vhost_iova_tree_remove(v->iova_tree, *result); +- return r == 0; + } + +-static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, ++static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, + const VhostShadowVirtqueue *svq) + { + DMAMap needle = {}; + struct vhost_vdpa *v = dev->opaque; + struct vhost_vring_addr svq_addr; +- bool ok; + + vhost_svq_get_vring_addr(svq, &svq_addr); + + needle.translated_addr = svq_addr.desc_user_addr; +- ok = vhost_vdpa_svq_unmap_ring(v, &needle); +- if (unlikely(!ok)) { +- return false; +- } ++ vhost_vdpa_svq_unmap_ring(v, &needle); + + needle.translated_addr = svq_addr.used_user_addr; +- return vhost_vdpa_svq_unmap_ring(v, &needle); ++ vhost_vdpa_svq_unmap_ring(v, &needle); + } + + /** +@@ -1095,26 +1090,22 @@ err: + return false; + } + +-static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) ++static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; + + if (!v->shadow_vqs) { +- return true; ++ return; + } + + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); +- bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); +- if (unlikely(!ok)) { +- return false; +- } ++ vhost_vdpa_svq_unmap_rings(dev, svq); + } + + if (v->migration_blocker) { + migrate_del_blocker(v->migration_blocker); + } +- return true; + } + + static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) +@@ -1131,10 +1122,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + } + vhost_vdpa_set_vring_ready(dev); + } else { +- ok = vhost_vdpa_svqs_stop(dev); +- if (unlikely(!ok)) { +- return -1; +- } ++ vhost_vdpa_svqs_stop(dev); + vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); + } + +-- +2.31.1 + diff --git a/kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch b/kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch new file mode 100644 index 0000000..c762cf4 --- /dev/null +++ b/kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch @@ -0,0 +1,113 @@ +From d44701ad634f05c31a1b0f0b84b168ed1ec19f71 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 24 Aug 2022 20:28:35 +0200 +Subject: [PATCH 20/29] vdpa: Make VhostVDPAState cvq_cmd_in_buffer control ack + type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [20/25] fd7012502f7002f61ea2e0c90baac013e09282de (redhat/centos-stream/src/qemu-kvm) + +Upstream: Not merged yet + +This allows to simplify the code. Rename to status while we're at it. + +Signed-off-by: Eugenio Pérez +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 6ce68fcd3f..535315c1d0 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -35,7 +35,9 @@ typedef struct VhostVDPAState { + VHostNetState *vhost_net; + + /* Control commands shadow buffers */ +- void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; ++ void *cvq_cmd_out_buffer; ++ virtio_net_ctrl_ack *status; ++ + bool started; + } VhostVDPAState; + +@@ -158,7 +160,7 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + struct vhost_dev *dev = &s->vhost_net->dev; + + qemu_vfree(s->cvq_cmd_out_buffer); +- qemu_vfree(s->cvq_cmd_in_buffer); ++ qemu_vfree(s->status); + if (dev->vq_index + dev->nvqs == dev->vq_index_end) { + g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); + } +@@ -310,7 +312,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + return r; + } + +- r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer, ++ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status, + vhost_vdpa_net_cvq_cmd_page_len(), true); + if (unlikely(r < 0)) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); +@@ -327,7 +329,7 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) + + if (s->vhost_vdpa.shadow_vqs_enabled) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); +- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer); ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); + } + } + +@@ -340,7 +342,7 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, + .iov_len = out_len, + }; + const struct iovec in = { +- .iov_base = s->cvq_cmd_in_buffer, ++ .iov_base = s->status, + .iov_len = sizeof(virtio_net_ctrl_ack), + }; + VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); +@@ -396,7 +398,7 @@ static int vhost_vdpa_net_load(NetClientState *nc) + return dev_written; + } + +- return *((virtio_net_ctrl_ack *)s->cvq_cmd_in_buffer) != VIRTIO_NET_OK; ++ return *s->status != VIRTIO_NET_OK; + } + + return 0; +@@ -491,8 +493,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + goto out; + } + +- memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); +- if (status != VIRTIO_NET_OK) { ++ if (*s->status != VIRTIO_NET_OK) { + return VIRTIO_NET_ERR; + } + +@@ -549,9 +550,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), + vhost_vdpa_net_cvq_cmd_page_len()); + memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); +- s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(), +- vhost_vdpa_net_cvq_cmd_page_len()); +- memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); ++ s->status = qemu_memalign(qemu_real_host_page_size(), ++ vhost_vdpa_net_cvq_cmd_page_len()); ++ memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len()); + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; +-- +2.31.1 + diff --git a/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch b/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch new file mode 100644 index 0000000..ab07d88 --- /dev/null +++ b/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch @@ -0,0 +1,251 @@ +From 0c03e18c49b62241d046ecb15c0ee3e7f9c2e547 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:33 +0200 +Subject: [PATCH 15/29] vdpa: Move command buffers map to start of net device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [15/25] 216c18aa307f7bdef1575f581b767b6f023a73bd (redhat/centos-stream/src/qemu-kvm) + +As this series will reuse them to restore the device state at the end of +a migration (or a device start), let's allocate only once at the device +start so we don't duplicate their map and unmap. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 7a7f87e94c4e75ca177564491595dd17b7e41a62) +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 123 ++++++++++++++++++++++------------------------- + 1 file changed, 58 insertions(+), 65 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 1a597c2e92..452d10ed93 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -263,29 +263,20 @@ static size_t vhost_vdpa_net_cvq_cmd_page_len(void) + return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); + } + +-/** Copy and map a guest buffer. */ +-static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, +- const struct iovec *out_data, +- size_t out_num, size_t data_len, void *buf, +- size_t *written, bool write) ++/** Map CVQ buffer. */ ++static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, ++ bool write) + { + DMAMap map = {}; + int r; + +- if (unlikely(!data_len)) { +- qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", +- __func__, write ? "in" : "out"); +- return false; +- } +- +- *written = iov_to_buf(out_data, out_num, 0, buf, data_len); + map.translated_addr = (hwaddr)(uintptr_t)buf; +- map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; ++ map.size = size - 1; + map.perm = write ? IOMMU_RW : IOMMU_RO, + r = vhost_iova_tree_map_alloc(v->iova_tree, &map); + if (unlikely(r != IOVA_OK)) { + error_report("Cannot map injected element"); +- return false; ++ return r; + } + + r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, +@@ -294,50 +285,58 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, + goto dma_map_err; + } + +- return true; ++ return 0; + + dma_map_err: + vhost_iova_tree_remove(v->iova_tree, map); +- return false; ++ return r; + } + +-/** +- * Copy the guest element into a dedicated buffer suitable to be sent to NIC +- * +- * @iov: [0] is the out buffer, [1] is the in one +- */ +-static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, +- VirtQueueElement *elem, +- struct iovec *iov) ++static int vhost_vdpa_net_cvq_start(NetClientState *nc) + { +- size_t in_copied; +- bool ok; ++ VhostVDPAState *s; ++ int r; + +- iov[0].iov_base = s->cvq_cmd_out_buffer; +- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, +- vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, +- &iov[0].iov_len, false); +- if (unlikely(!ok)) { +- return false; ++ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); ++ ++ s = DO_UPCAST(VhostVDPAState, nc, nc); ++ if (!s->vhost_vdpa.shadow_vqs_enabled) { ++ return 0; + } + +- iov[1].iov_base = s->cvq_cmd_in_buffer; +- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, +- sizeof(virtio_net_ctrl_ack), iov[1].iov_base, +- &in_copied, true); +- if (unlikely(!ok)) { ++ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, ++ vhost_vdpa_net_cvq_cmd_page_len(), false); ++ if (unlikely(r < 0)) { ++ return r; ++ } ++ ++ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer, ++ vhost_vdpa_net_cvq_cmd_page_len(), true); ++ if (unlikely(r < 0)) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); +- return false; + } + +- iov[1].iov_len = sizeof(virtio_net_ctrl_ack); +- return true; ++ return r; ++} ++ ++static void vhost_vdpa_net_cvq_stop(NetClientState *nc) ++{ ++ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ ++ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); ++ ++ if (s->vhost_vdpa.shadow_vqs_enabled) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer); ++ } + } + + static NetClientInfo net_vhost_vdpa_cvq_info = { + .type = NET_CLIENT_DRIVER_VHOST_VDPA, + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, ++ .start = vhost_vdpa_net_cvq_start, ++ .stop = vhost_vdpa_net_cvq_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, + .has_ufo = vhost_vdpa_has_ufo, +@@ -348,19 +347,17 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { + * Do not forward commands not supported by SVQ. Otherwise, the device could + * accept it and qemu would not know how to update the device model. + */ +-static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, +- size_t out_num) ++static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) + { + struct virtio_net_ctrl_hdr ctrl; +- size_t n; + +- n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); +- if (unlikely(n < sizeof(ctrl))) { ++ if (unlikely(len < sizeof(ctrl))) { + qemu_log_mask(LOG_GUEST_ERROR, +- "%s: invalid legnth of out buffer %zu\n", __func__, n); ++ "%s: invalid legnth of out buffer %zu\n", __func__, len); + return false; + } + ++ memcpy(&ctrl, out_buf, sizeof(ctrl)); + switch (ctrl.class) { + case VIRTIO_NET_CTRL_MAC: + switch (ctrl.cmd) { +@@ -392,10 +389,14 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + VhostVDPAState *s = opaque; + size_t in_len, dev_written; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; +- /* out and in buffers sent to the device */ +- struct iovec dev_buffers[2] = { +- { .iov_base = s->cvq_cmd_out_buffer }, +- { .iov_base = s->cvq_cmd_in_buffer }, ++ /* Out buffer sent to both the vdpa device and the device model */ ++ struct iovec out = { ++ .iov_base = s->cvq_cmd_out_buffer, ++ }; ++ /* In buffer sent to the device */ ++ const struct iovec dev_in = { ++ .iov_base = s->cvq_cmd_in_buffer, ++ .iov_len = sizeof(virtio_net_ctrl_ack), + }; + /* in buffer used for device model */ + const struct iovec in = { +@@ -405,17 +406,15 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + int r = -EINVAL; + bool ok; + +- ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); +- if (unlikely(!ok)) { +- goto out; +- } +- +- ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); ++ out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, ++ s->cvq_cmd_out_buffer, ++ vhost_vdpa_net_cvq_cmd_len()); ++ ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len); + if (unlikely(!ok)) { + goto out; + } + +- r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); ++ r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); + if (unlikely(r != 0)) { + if (unlikely(r == -ENOSPC)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", +@@ -435,13 +434,13 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + goto out; + } + +- memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); ++ memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); + if (status != VIRTIO_NET_OK) { + goto out; + } + + status = VIRTIO_NET_ERR; +- virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); ++ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1); + if (status != VIRTIO_NET_OK) { + error_report("Bad CVQ processing in model"); + } +@@ -454,12 +453,6 @@ out: + } + vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); + g_free(elem); +- if (dev_buffers[0].iov_base) { +- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); +- } +- if (dev_buffers[1].iov_base) { +- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); +- } + return r; + } + +-- +2.31.1 + diff --git a/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch b/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch new file mode 100644 index 0000000..8c3aae4 --- /dev/null +++ b/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch @@ -0,0 +1,49 @@ +From dae6d9efac6d7307ccd1e1bebf0a14014f2a4f34 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:05 +0200 +Subject: [PATCH 04/29] vdpa: Remove SVQ vring from iova_tree at shutdown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [4/25] 813fb80fc3c9872729e6b345e1e9209548aa7481 (redhat/centos-stream/src/qemu-kvm) + +Although the device will be reset before usage, the right thing to do is +to clean it. + +Reported-by: Lei Yang +Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") +Signed-off-by: Eugenio Pérez +Signed-off-by: Jason Wang +(cherry picked from commit b37c12be962f95fd1e93b470a5ff05f6e2035d46) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-vdpa.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 87e0ad393f..e16e0e222e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -898,6 +898,12 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + + size = ROUND_UP(result->size, qemu_real_host_page_size()); + r = vhost_vdpa_dma_unmap(v, result->iova, size); ++ if (unlikely(r < 0)) { ++ error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); ++ return false; ++ } ++ ++ vhost_iova_tree_remove(v->iova_tree, *result); + return r == 0; + } + +-- +2.31.1 + diff --git a/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch b/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch new file mode 100644 index 0000000..ab58a35 --- /dev/null +++ b/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch @@ -0,0 +1,48 @@ +From 67291df3eca8b3d74567c0e8211c9f7da65e74d4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:02 +0200 +Subject: [PATCH 01/29] vdpa: Skip the maps not in the iova tree +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [1/25] d385d5b600ac4f1a9f9fd4f523e5d4078df8478a (redhat/centos-stream/src/qemu-kvm) + +Next patch will skip the registering of dma maps that the vdpa device +rejects in the iova tree. We need to consider that here or we cause a +SIGSEGV accessing result. + +Reported-by: Lei Yang +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 10dab9f2635b9bab23a2b29974b526e62bb61268) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-vdpa.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 3ff9ce3501..983d3697b0 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -289,6 +289,10 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + }; + + result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); ++ if (!result) { ++ /* The memory listener map wasn't mapped */ ++ return; ++ } + iova = result->iova; + vhost_iova_tree_remove(v->iova_tree, result); + } +-- +2.31.1 + diff --git a/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch b/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch new file mode 100644 index 0000000..7fdb0e7 --- /dev/null +++ b/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch @@ -0,0 +1,79 @@ +From c91852883439c3a5349f6787b11b7bc71d6504a5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:08 +0200 +Subject: [PATCH 07/29] vdpa: Use ring hwaddr at vhost_vdpa_svq_unmap_ring +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [7/25] 961d9854ae1088fc487b32b605fef207aad08924 (redhat/centos-stream/src/qemu-kvm) + +Reduce code duplication. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 8b6d6119ad7fd983d192f60c4960fb6a9197d995) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-vdpa.c | 17 ++++++++--------- + 1 file changed, 8 insertions(+), 9 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index e208dd000e..23ae5ef48b 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -884,10 +884,12 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + /** + * Unmap a SVQ area in the device + */ +-static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, +- const DMAMap *needle) ++static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) + { +- const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); ++ const DMAMap needle = { ++ .translated_addr = addr, ++ }; ++ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); + hwaddr size; + int r; + +@@ -909,17 +911,14 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, + static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, + const VhostShadowVirtqueue *svq) + { +- DMAMap needle = {}; + struct vhost_vdpa *v = dev->opaque; + struct vhost_vring_addr svq_addr; + + vhost_svq_get_vring_addr(svq, &svq_addr); + +- needle.translated_addr = svq_addr.desc_user_addr; +- vhost_vdpa_svq_unmap_ring(v, &needle); ++ vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); + +- needle.translated_addr = svq_addr.used_user_addr; +- vhost_vdpa_svq_unmap_ring(v, &needle); ++ vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); + } + + /** +@@ -997,7 +996,7 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, + ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); + if (unlikely(!ok)) { + error_prepend(errp, "Cannot create vq device region: "); +- vhost_vdpa_svq_unmap_ring(v, &driver_region); ++ vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); + } + addr->used_user_addr = device_region.iova; + +-- +2.31.1 + diff --git a/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch b/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch new file mode 100644 index 0000000..1bbfee9 --- /dev/null +++ b/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch @@ -0,0 +1,62 @@ +From a32ab5c3f2156ab098e8914437f1aa00c095450e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:32 +0200 +Subject: [PATCH 14/29] vdpa: add net_vhost_vdpa_cvq_info NetClientInfo +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [14/25] 579b8389d759ae973552ade34369318e8c50aa90 (redhat/centos-stream/src/qemu-kvm) + +Next patches will add a new info callback to restore NIC status through +CVQ. Since only the CVQ vhost device is needed, create it with a new +NetClientInfo. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit f8972b56eeace10a410990f032406250abe18d64) +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index a49e7e649d..1a597c2e92 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -334,6 +334,16 @@ static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, + return true; + } + ++static NetClientInfo net_vhost_vdpa_cvq_info = { ++ .type = NET_CLIENT_DRIVER_VHOST_VDPA, ++ .size = sizeof(VhostVDPAState), ++ .receive = vhost_vdpa_receive, ++ .cleanup = vhost_vdpa_cleanup, ++ .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, ++ .has_ufo = vhost_vdpa_has_ufo, ++ .check_peer_type = vhost_vdpa_check_peer_type, ++}; ++ + /** + * Do not forward commands not supported by SVQ. Otherwise, the device could + * accept it and qemu would not know how to update the device model. +@@ -475,7 +485,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, + name); + } else { +- nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer, ++ nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, + device, name); + } + snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); +-- +2.31.1 + diff --git a/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch b/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch new file mode 100644 index 0000000..ba35d21 --- /dev/null +++ b/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch @@ -0,0 +1,83 @@ +From 8b85c33c0efb0c6f2dc3705ee83082438db9d397 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:03 +0200 +Subject: [PATCH 02/29] vdpa: do not save failed dma maps in SVQ iova tree +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [2/25] fc285fecfd400702f81345cef445f5218bcbacad (redhat/centos-stream/src/qemu-kvm) + +If a map fails for whatever reason, it must not be saved in the tree. +Otherwise, qemu will try to unmap it in cleanup, leaving to more errors. + +Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") +Reported-by: Lei Yang +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 7dab70bec397e3522211e7bcc36d879bad8154c5) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-vdpa.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 983d3697b0..7e28d2f674 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -176,6 +176,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) + static void vhost_vdpa_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { ++ DMAMap mem_region = {}; + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + hwaddr iova; + Int128 llend, llsize; +@@ -212,13 +213,13 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + + llsize = int128_sub(llend, int128_make64(iova)); + if (v->shadow_vqs_enabled) { +- DMAMap mem_region = { +- .translated_addr = (hwaddr)(uintptr_t)vaddr, +- .size = int128_get64(llsize) - 1, +- .perm = IOMMU_ACCESS_FLAG(true, section->readonly), +- }; ++ int r; + +- int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); ++ mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, ++ mem_region.size = int128_get64(llsize) - 1, ++ mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), ++ ++ r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); + if (unlikely(r != IOVA_OK)) { + error_report("Can't allocate a mapping (%d)", r); + goto fail; +@@ -232,11 +233,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + vaddr, section->readonly); + if (ret) { + error_report("vhost vdpa map fail!"); +- goto fail; ++ goto fail_map; + } + + return; + ++fail_map: ++ if (v->shadow_vqs_enabled) { ++ vhost_iova_tree_remove(v->iova_tree, &mem_region); ++ } ++ + fail: + /* + * On the initfn path, store the first error in the container so we +-- +2.31.1 + diff --git a/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch b/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch new file mode 100644 index 0000000..7737060 --- /dev/null +++ b/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch @@ -0,0 +1,153 @@ +From 09b86938668bf6111fb6549fcd012f50418a7613 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:34 +0200 +Subject: [PATCH 16/29] vdpa: extract vhost_vdpa_net_cvq_add from + vhost_vdpa_net_handle_ctrl_avail +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [16/25] 7d577b06dcd889f836d5bcbaf6a64998fb138543 (redhat/centos-stream/src/qemu-kvm) + +So we can reuse it to inject state messages. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +-- +v7: +* Remove double free error + +v6: +* Do not assume in buffer sent to the device is sizeof(virtio_net_ctrl_ack) + +v5: +* Do not use an artificial !NULL VirtQueueElement +* Use only out size instead of iovec dev_buffers for these functions. + +Signed-off-by: Jason Wang +(cherry picked from commit be4278b65fc1be8fce87e1e7c01bc52602d304eb) +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 59 +++++++++++++++++++++++++++++++----------------- + 1 file changed, 38 insertions(+), 21 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 452d10ed93..3575bf64ee 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -331,6 +331,38 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) + } + } + ++static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, ++ size_t in_len) ++{ ++ /* Buffers for the device */ ++ const struct iovec out = { ++ .iov_base = s->cvq_cmd_out_buffer, ++ .iov_len = out_len, ++ }; ++ const struct iovec in = { ++ .iov_base = s->cvq_cmd_in_buffer, ++ .iov_len = sizeof(virtio_net_ctrl_ack), ++ }; ++ VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); ++ int r; ++ ++ r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); ++ if (unlikely(r != 0)) { ++ if (unlikely(r == -ENOSPC)) { ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", ++ __func__); ++ } ++ return r; ++ } ++ ++ /* ++ * We can poll here since we've had BQL from the time we sent the ++ * descriptor. Also, we need to take the answer before SVQ pulls by itself, ++ * when BQL is released ++ */ ++ return vhost_svq_poll(svq); ++} ++ + static NetClientInfo net_vhost_vdpa_cvq_info = { + .type = NET_CLIENT_DRIVER_VHOST_VDPA, + .size = sizeof(VhostVDPAState), +@@ -387,23 +419,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + void *opaque) + { + VhostVDPAState *s = opaque; +- size_t in_len, dev_written; ++ size_t in_len; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + /* Out buffer sent to both the vdpa device and the device model */ + struct iovec out = { + .iov_base = s->cvq_cmd_out_buffer, + }; +- /* In buffer sent to the device */ +- const struct iovec dev_in = { +- .iov_base = s->cvq_cmd_in_buffer, +- .iov_len = sizeof(virtio_net_ctrl_ack), +- }; + /* in buffer used for device model */ + const struct iovec in = { + .iov_base = &status, + .iov_len = sizeof(status), + }; +- int r = -EINVAL; ++ ssize_t dev_written = -EINVAL; + bool ok; + + out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, +@@ -414,21 +441,11 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + goto out; + } + +- r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); +- if (unlikely(r != 0)) { +- if (unlikely(r == -ENOSPC)) { +- qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", +- __func__); +- } ++ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); ++ if (unlikely(dev_written < 0)) { + goto out; + } + +- /* +- * We can poll here since we've had BQL from the time we sent the +- * descriptor. Also, we need to take the answer before SVQ pulls by itself, +- * when BQL is released +- */ +- dev_written = vhost_svq_poll(svq); + if (unlikely(dev_written < sizeof(status))) { + error_report("Insufficient written data (%zu)", dev_written); + goto out; +@@ -436,7 +453,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + + memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); + if (status != VIRTIO_NET_OK) { +- goto out; ++ return VIRTIO_NET_ERR; + } + + status = VIRTIO_NET_ERR; +@@ -453,7 +470,7 @@ out: + } + vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); + g_free(elem); +- return r; ++ return dev_written < 0 ? dev_written : 0; + } + + static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { +-- +2.31.1 + diff --git a/kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch b/kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch new file mode 100644 index 0000000..707013a --- /dev/null +++ b/kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch @@ -0,0 +1,115 @@ +From e03f7e670e608e98fa771d3860574b95908ef3a1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 11 Aug 2022 14:12:14 +0200 +Subject: [PATCH 21/29] vdpa: extract vhost_vdpa_net_load_mac from + vhost_vdpa_net_load +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [21/25] b4b30be584aab265004648352361f25587e0ed98 (redhat/centos-stream/src/qemu-kvm) + +Upstream: Not merged yet + +Since there may be many commands we need to issue to load the NIC +state, let's split them in individual functions + +Signed-off-by: Eugenio Pérez +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 62 +++++++++++++++++++++++++++++++----------------- + 1 file changed, 40 insertions(+), 22 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 535315c1d0..e799e744cd 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -365,12 +365,47 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, + return vhost_svq_poll(svq); + } + ++static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, ++ uint8_t cmd, const void *data, ++ size_t data_size) ++{ ++ const struct virtio_net_ctrl_hdr ctrl = { ++ .class = class, ++ .cmd = cmd, ++ }; ++ ++ assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); ++ ++ memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl)); ++ memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size); ++ ++ return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size, ++ sizeof(virtio_net_ctrl_ack)); ++} ++ ++static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) ++{ ++ uint64_t features = n->parent_obj.guest_features; ++ if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { ++ ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC, ++ VIRTIO_NET_CTRL_MAC_ADDR_SET, ++ n->mac, sizeof(n->mac)); ++ if (unlikely(dev_written < 0)) { ++ return dev_written; ++ } ++ ++ return *s->status != VIRTIO_NET_OK; ++ } ++ ++ return 0; ++} ++ + static int vhost_vdpa_net_load(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +- const struct vhost_vdpa *v = &s->vhost_vdpa; ++ struct vhost_vdpa *v = &s->vhost_vdpa; + const VirtIONet *n; +- uint64_t features; ++ int r; + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + +@@ -379,26 +414,9 @@ static int vhost_vdpa_net_load(NetClientState *nc) + } + + n = VIRTIO_NET(v->dev->vdev); +- features = n->parent_obj.guest_features; +- if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { +- const struct virtio_net_ctrl_hdr ctrl = { +- .class = VIRTIO_NET_CTRL_MAC, +- .cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET, +- }; +- char *cursor = s->cvq_cmd_out_buffer; +- ssize_t dev_written; +- +- memcpy(cursor, &ctrl, sizeof(ctrl)); +- cursor += sizeof(ctrl); +- memcpy(cursor, n->mac, sizeof(n->mac)); +- +- dev_written = vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + sizeof(n->mac), +- sizeof(virtio_net_ctrl_ack)); +- if (unlikely(dev_written < 0)) { +- return dev_written; +- } +- +- return *s->status != VIRTIO_NET_OK; ++ r = vhost_vdpa_net_load_mac(s, n); ++ if (unlikely(r < 0)) { ++ return r; + } + + return 0; +-- +2.31.1 + diff --git a/kvm-vdpa-validate-MQ-CVQ-commands.patch b/kvm-vdpa-validate-MQ-CVQ-commands.patch new file mode 100644 index 0000000..2e816eb --- /dev/null +++ b/kvm-vdpa-validate-MQ-CVQ-commands.patch @@ -0,0 +1,50 @@ +From b4a0334826d5b28dd8f63edaa606cc123b60a538 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 11 Aug 2022 14:53:10 +0200 +Subject: [PATCH 23/29] vdpa: validate MQ CVQ commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [23/25] b727a8bba49a364c6c9afe3d7bfcc70e3ee942f4 (redhat/centos-stream/src/qemu-kvm) + +Upstream: Not merged yet + +So we are sure we can update the device model properly before sending to +the device. + +Signed-off-by: Eugenio Pérez +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 3950e4f25d..c6cbe2fb5c 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -486,6 +486,15 @@ static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) + __func__, ctrl.cmd); + }; + break; ++ case VIRTIO_NET_CTRL_MQ: ++ switch (ctrl.cmd) { ++ case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: ++ return true; ++ default: ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mq cmd %u\n", ++ __func__, ctrl.cmd); ++ }; ++ break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", + __func__, ctrl.class); +-- +2.31.1 + diff --git a/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch b/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch new file mode 100644 index 0000000..b01d7aa --- /dev/null +++ b/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch @@ -0,0 +1,67 @@ +From 88ea456e00f5af59417ef2c397adfea4cf9c685e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:20:07 +0200 +Subject: [PATCH 06/29] vhost: Always store new kick fd on + vhost_svq_set_svq_kick_fd +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [6/25] 1c2ec6d321446505b9f9d0cc0cf0d812cfddd959 (redhat/centos-stream/src/qemu-kvm) + +We can unbind twice a file descriptor if we call twice +vhost_svq_set_svq_kick_fd because of this. Since it comes from vhost and +not from SVQ, that file descriptor could be a different thing that +guest's vhost notifier. + +Likewise, it can happens the same if a guest start and stop the device +multiple times. + +Reported-by: Lei Yang +Fixes: dff4426fa6 ("vhost: Add Shadow VirtQueue kick forwarding capabilities") +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 8b64e486423b09db4463799727bf1fad62fe496a) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index e4956728dd..82a784d250 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -602,13 +602,13 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) + event_notifier_set_handler(svq_kick, NULL); + } + ++ event_notifier_init_fd(svq_kick, svq_kick_fd); + /* + * event_notifier_set_handler already checks for guest's notifications if + * they arrive at the new file descriptor in the switch, so there is no + * need to explicitly check for them. + */ + if (poll_start) { +- event_notifier_init_fd(svq_kick, svq_kick_fd); + event_notifier_set(svq_kick); + event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); + } +@@ -655,7 +655,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + */ + void vhost_svq_stop(VhostShadowVirtqueue *svq) + { +- event_notifier_set_handler(&svq->svq_kick, NULL); ++ vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND); + g_autofree VirtQueueElement *next_avail_elem = NULL; + + if (!svq->vq) { +-- +2.31.1 + diff --git a/kvm-vhost-Delete-useless-read-memory-barrier.patch b/kvm-vhost-Delete-useless-read-memory-barrier.patch new file mode 100644 index 0000000..7938963 --- /dev/null +++ b/kvm-vhost-Delete-useless-read-memory-barrier.patch @@ -0,0 +1,47 @@ +From 878a37760e34b54a3d92569f44b0b2f073bfa46a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:28 +0200 +Subject: [PATCH 10/29] vhost: Delete useless read memory barrier +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [10/25] 13fb2b317093323caf33a17f9de00a94a862ca2e (redhat/centos-stream/src/qemu-kvm) + +As discussed in previous series [1], this memory barrier is useless with +the atomic read of used idx at vhost_svq_more_used. Deleting it. + +[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg02616.html + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 9e193cec5db949e4001070442a2f7de7042ef09b) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-shadow-virtqueue.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index b35aeef4bd..8df5296f24 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -509,9 +509,6 @@ size_t vhost_svq_poll(VhostShadowVirtqueue *svq) + if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { + return 0; + } +- +- /* Make sure we read new used_idx */ +- smp_rmb(); + } while (true); + } + +-- +2.31.1 + diff --git a/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch b/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch new file mode 100644 index 0000000..858128e --- /dev/null +++ b/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch @@ -0,0 +1,63 @@ +From 39659fb33b282188f005ba26bd2c40ce8b7a173c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:29 +0200 +Subject: [PATCH 11/29] vhost: Do not depend on !NULL VirtQueueElement on + vhost_svq_flush +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [11/25] 2fec9b6bb72cf8ef42d08a28df3dc8b540f6f43f (redhat/centos-stream/src/qemu-kvm) + +Since QEMU will be able to inject new elements on CVQ to restore the +state, we need not to depend on a VirtQueueElement to know if a new +element has been used by the device or not. Instead of check that, check +if there are new elements only using used idx on vhost_svq_flush. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit d368c0b052ad95d3bf4fcc5a5d25715a35c91d4b) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-shadow-virtqueue.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 8df5296f24..e8e5bbc368 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -499,17 +499,20 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, + size_t vhost_svq_poll(VhostShadowVirtqueue *svq) + { + int64_t start_us = g_get_monotonic_time(); ++ uint32_t len; ++ + do { +- uint32_t len; +- VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); +- if (elem) { +- return len; ++ if (vhost_svq_more_used(svq)) { ++ break; + } + + if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { + return 0; + } + } while (true); ++ ++ vhost_svq_get_buf(svq, &len); ++ return len; + } + + /** +-- +2.31.1 + diff --git a/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch b/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch new file mode 100644 index 0000000..72707ff --- /dev/null +++ b/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch @@ -0,0 +1,80 @@ +From 33c22dd3353f79a037f2473a69176932ac1a1c05 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:26 +0200 +Subject: [PATCH 08/29] vhost: stop transfer elem ownership in + vhost_handle_guest_kick +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [8/25] e9c6314fddeb1f7bc738efea90f2788cae27bab7 (redhat/centos-stream/src/qemu-kvm) + +It was easier to allow vhost_svq_add to handle the memory. Now that we +will allow qemu to add elements to a SVQ without the guest's knowledge, +it's better to handle it in the caller. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 9c2ab2f1ec333be8614cc12272d4b91960704dbe) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-shadow-virtqueue.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 82a784d250..a1261d4a0f 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -233,9 +233,6 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + /** + * Add an element to a SVQ. + * +- * The caller must check that there is enough slots for the new element. It +- * takes ownership of the element: In case of failure not ENOSPC, it is free. +- * + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ + int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, +@@ -252,7 +249,6 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + + ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); + if (unlikely(!ok)) { +- g_free(elem); + return -EINVAL; + } + +@@ -293,7 +289,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + virtio_queue_set_notification(svq->vq, false); + + while (true) { +- VirtQueueElement *elem; ++ g_autofree VirtQueueElement *elem; + int r; + + if (svq->next_guest_avail_elem) { +@@ -324,12 +320,14 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + * queue the current guest descriptor and ignore kicks + * until some elements are used. + */ +- svq->next_guest_avail_elem = elem; ++ svq->next_guest_avail_elem = g_steal_pointer(&elem); + } + + /* VQ is full or broken, just return and ignore kicks */ + return; + } ++ /* elem belongs to SVQ or external caller now */ ++ elem = NULL; + } + + virtio_queue_set_notification(svq->vq, true); +-- +2.31.1 + diff --git a/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch b/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch new file mode 100644 index 0000000..628cc8d --- /dev/null +++ b/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch @@ -0,0 +1,55 @@ +From cf08dbe33683a66a79ec07b8450f9d3d27cff1c4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:27 +0200 +Subject: [PATCH 09/29] vhost: use SVQ element ndescs instead of opaque data + for desc validation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [9/25] 071eb2a0db612d516d630a15a1f0fd908ed86fd3 (redhat/centos-stream/src/qemu-kvm) + +Since we're going to allow SVQ to add elements without the guest's +knowledge and without its own VirtQueueElement, it's easier to check if +an element is a valid head checking a different thing than the +VirtQueueElement. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 86f5f2546f03a3dfde421c715187b262e29b2848) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-shadow-virtqueue.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index a1261d4a0f..b35aeef4bd 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -414,7 +414,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- if (unlikely(!svq->desc_state[used_elem.id].elem)) { ++ if (unlikely(!svq->desc_state[used_elem.id].ndescs)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s says index %u is used, but it was not available", + svq->vdev->name, used_elem.id); +@@ -422,6 +422,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + } + + num = svq->desc_state[used_elem.id].ndescs; ++ svq->desc_state[used_elem.id].ndescs = 0; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; +-- +2.31.1 + diff --git a/kvm-vhost_net-Add-NetClientInfo-start-callback.patch b/kvm-vhost_net-Add-NetClientInfo-start-callback.patch new file mode 100644 index 0000000..99073b5 --- /dev/null +++ b/kvm-vhost_net-Add-NetClientInfo-start-callback.patch @@ -0,0 +1,73 @@ +From 0db23ec6808c3ff628d1b1940d2cd01fda0757d1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:30 +0200 +Subject: [PATCH 12/29] vhost_net: Add NetClientInfo start callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [12/25] b448657fa858a885879986059694d26d870155bc (redhat/centos-stream/src/qemu-kvm) + +This is used by the backend to perform actions before the device is +started. + +In particular, vdpa net use it to map CVQ buffers to the device, so it +can send control commands using them. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit eb92b75380fc0f2368e22be45d1e2d1e2cd2f79c) +Signed-off-by: Laurent Vivier +--- + hw/net/vhost_net.c | 7 +++++++ + include/net/net.h | 2 ++ + 2 files changed, 9 insertions(+) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index ccac5b7a64..2e0baeba26 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -244,6 +244,13 @@ static int vhost_net_start_one(struct vhost_net *net, + struct vhost_vring_file file = { }; + int r; + ++ if (net->nc->info->start) { ++ r = net->nc->info->start(net->nc); ++ if (r < 0) { ++ return r; ++ } ++ } ++ + r = vhost_dev_enable_notifiers(&net->dev, dev); + if (r < 0) { + goto fail_notifiers; +diff --git a/include/net/net.h b/include/net/net.h +index 523136c7ac..ad9e80083a 100644 +--- a/include/net/net.h ++++ b/include/net/net.h +@@ -44,6 +44,7 @@ typedef struct NICConf { + + typedef void (NetPoll)(NetClientState *, bool enable); + typedef bool (NetCanReceive)(NetClientState *); ++typedef int (NetStart)(NetClientState *); + typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); + typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); + typedef void (NetCleanup) (NetClientState *); +@@ -71,6 +72,7 @@ typedef struct NetClientInfo { + NetReceive *receive_raw; + NetReceiveIOV *receive_iov; + NetCanReceive *can_receive; ++ NetStart *start; + NetCleanup *cleanup; + LinkStatusChanged *link_status_changed; + QueryRxFilter *query_rx_filter; +-- +2.31.1 + diff --git a/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch b/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch new file mode 100644 index 0000000..5b51f8b --- /dev/null +++ b/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch @@ -0,0 +1,68 @@ +From cc3e96b81280fe45a34a26586718079072dbcf39 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:31 +0200 +Subject: [PATCH 13/29] vhost_net: Add NetClientInfo stop callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [13/25] cb90c1228e9af493def4818ea3b49e2b0cfae456 (redhat/centos-stream/src/qemu-kvm) + +Used by the backend to perform actions after the device is stopped. + +In particular, vdpa net use it to unmap CVQ buffers to the device, +cleaning the actions performed in prepare(). + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit c5e5269d8a955a0f924218911c2f4a0b34e87a21) +Signed-off-by: Laurent Vivier +--- + hw/net/vhost_net.c | 3 +++ + include/net/net.h | 2 ++ + 2 files changed, 5 insertions(+) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 2e0baeba26..9d4b334453 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -320,6 +320,9 @@ static void vhost_net_stop_one(struct vhost_net *net, + net->nc->info->poll(net->nc, true); + } + vhost_dev_stop(&net->dev, dev); ++ if (net->nc->info->stop) { ++ net->nc->info->stop(net->nc); ++ } + vhost_dev_disable_notifiers(&net->dev, dev); + } + +diff --git a/include/net/net.h b/include/net/net.h +index ad9e80083a..476ad45b9a 100644 +--- a/include/net/net.h ++++ b/include/net/net.h +@@ -45,6 +45,7 @@ typedef struct NICConf { + typedef void (NetPoll)(NetClientState *, bool enable); + typedef bool (NetCanReceive)(NetClientState *); + typedef int (NetStart)(NetClientState *); ++typedef void (NetStop)(NetClientState *); + typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); + typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); + typedef void (NetCleanup) (NetClientState *); +@@ -73,6 +74,7 @@ typedef struct NetClientInfo { + NetReceiveIOV *receive_iov; + NetCanReceive *can_receive; + NetStart *start; ++ NetStop *stop; + NetCleanup *cleanup; + LinkStatusChanged *link_status_changed; + QueryRxFilter *query_rx_filter; +-- +2.31.1 + diff --git a/kvm-vhost_net-add-NetClientState-load-callback.patch b/kvm-vhost_net-add-NetClientState-load-callback.patch new file mode 100644 index 0000000..ecd279f --- /dev/null +++ b/kvm-vhost_net-add-NetClientState-load-callback.patch @@ -0,0 +1,73 @@ +From d91546b3bc3dd147b6327a4d8c5b523104a09aa6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 23 Aug 2022 20:30:35 +0200 +Subject: [PATCH 17/29] vhost_net: add NetClientState->load() callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [17/25] de71f2e8fc7b25f5197101703fbb5ff054ada984 (redhat/centos-stream/src/qemu-kvm) + +It allows per-net client operations right after device's successful +start. In particular, to load the device status. + +Vhost-vdpa net will use it to add the CVQ buffers to restore the device +status. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Signed-off-by: Jason Wang +(cherry picked from commit 539573c317dc0b8d50a128db60550f2f2898d2fc) +Signed-off-by: Laurent Vivier +--- + hw/net/vhost_net.c | 7 +++++++ + include/net/net.h | 2 ++ + 2 files changed, 9 insertions(+) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 9d4b334453..d28f8b974b 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -281,6 +281,13 @@ static int vhost_net_start_one(struct vhost_net *net, + } + } + } ++ ++ if (net->nc->info->load) { ++ r = net->nc->info->load(net->nc); ++ if (r < 0) { ++ goto fail; ++ } ++ } + return 0; + fail: + file.fd = -1; +diff --git a/include/net/net.h b/include/net/net.h +index 476ad45b9a..81d0b21def 100644 +--- a/include/net/net.h ++++ b/include/net/net.h +@@ -45,6 +45,7 @@ typedef struct NICConf { + typedef void (NetPoll)(NetClientState *, bool enable); + typedef bool (NetCanReceive)(NetClientState *); + typedef int (NetStart)(NetClientState *); ++typedef int (NetLoad)(NetClientState *); + typedef void (NetStop)(NetClientState *); + typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); + typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); +@@ -74,6 +75,7 @@ typedef struct NetClientInfo { + NetReceiveIOV *receive_iov; + NetCanReceive *can_receive; + NetStart *start; ++ NetLoad *load; + NetStop *stop; + NetCleanup *cleanup; + LinkStatusChanged *link_status_changed; +-- +2.31.1 + diff --git a/kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch b/kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch new file mode 100644 index 0000000..7629017 --- /dev/null +++ b/kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch @@ -0,0 +1,61 @@ +From 9e23182c5249f876e56ef9a31b22476b5268f246 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 11 Aug 2022 16:40:07 +0200 +Subject: [PATCH 24/29] virtio-net: Update virtio-net curr_queue_pairs in vdpa + backends +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 117: vDPA SVQ Multiqueue support +RH-Jira: RHELX-57 +RH-Acked-by: Jason Wang +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Commit: [24/25] e0e6978394d6496a7e12cf8424b2e9cb87281a90 (redhat/centos-stream/src/qemu-kvm) + +Upstream: Not merged yet + +It was returned as error before. Instead of it, simply update the +corresponding field so qemu can send it in the migration data. + +Signed-off-by: Eugenio Pérez +Signed-off-by: Laurent Vivier +--- + hw/net/virtio-net.c | 17 ++++++----------- + 1 file changed, 6 insertions(+), 11 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index dd0d056fde..63a8332cd0 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1412,19 +1412,14 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + return VIRTIO_NET_ERR; + } + +- /* Avoid changing the number of queue_pairs for vdpa device in +- * userspace handler. A future fix is needed to handle the mq +- * change in userspace handler with vhost-vdpa. Let's disable +- * the mq handling from userspace for now and only allow get +- * done through the kernel. Ripples may be seen when falling +- * back to userspace, but without doing it qemu process would +- * crash on a recursive entry to virtio_net_set_status(). +- */ ++ n->curr_queue_pairs = queue_pairs; + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { +- return VIRTIO_NET_ERR; ++ /* ++ * Avoid updating the backend for a vdpa device: We're only interested ++ * in updating the device model queues. ++ */ ++ return VIRTIO_NET_OK; + } +- +- n->curr_queue_pairs = queue_pairs; + /* stop the backend before changing the number of queue_pairs to avoid handling a + * disabled queue */ + virtio_net_set_status(vdev, vdev->status); +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index f8c08dd..138d06d 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.1.0 -Release: 1%{?rcrel}%{?dist}%{?cc_suffix} +Release: 2%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -175,8 +175,6 @@ Source30: kvm-s390x.conf Source31: kvm-x86.conf Source36: README.tests -Source37: capstone.tar.gz - Patch0004: 0004-Initial-redhat-build.patch Patch0005: 0005-Re-enable-capstone-internal-build.patch @@ -193,6 +191,62 @@ Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0016: 0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch Patch0017: 0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch Patch0018: 0018-Introduce-upstream-7.0-compat-changes.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch19: kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch20: kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch21: kvm-util-accept-iova_tree_remove_parameter-by-value.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch22: kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch23: kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch24: kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch25: kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch26: kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch27: kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch28: kvm-vhost-Delete-useless-read-memory-barrier.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch29: kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch30: kvm-vhost_net-Add-NetClientInfo-start-callback.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch31: kvm-vhost_net-Add-NetClientInfo-stop-callback.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch32: kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch33: kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch34: kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch35: kvm-vhost_net-add-NetClientState-load-callback.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch36: kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch37: kvm-vdpa-Delete-CVQ-migration-blocker.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch38: kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch39: kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch40: kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch41: kvm-vdpa-validate-MQ-CVQ-commands.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch42: kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch +# For RHELX-57 - vDPA SVQ Multiqueue support +Patch43: kvm-vdpa-Allow-MQ-feature-in-SVQ.patch +# For bz#2125281 - [RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-9.2.0] +Patch44: kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch +# For bz#2125281 - [RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-9.2.0] +Patch45: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch +# For bz#2127825 - Use capstone for qemu-kvm build +Patch46: kvm-Revert-Re-enable-capstone-internal-build.patch %if %{have_clang} BuildRequires: clang @@ -263,6 +317,7 @@ BuildRequires: perl-Test-Harness BuildRequires: libslirp-devel BuildRequires: pulseaudio-libs-devel BuildRequires: spice-protocol +BuildRequires: capstone-devel # Requires for qemu-kvm package Requires: %{name}-core = %{epoch}:%{version}-%{release} @@ -290,6 +345,7 @@ Requires: edk2-ovmf %ifarch aarch64 Requires: edk2-aarch64 %endif +Requires: capstone Requires: libseccomp >= %{libseccomp_version} Requires: libusbx >= %{libusbx_version} @@ -504,7 +560,6 @@ This package provides usbredir support. %prep %setup -q -n qemu-%{version}%{?rcstr} %autopatch -p1 -/usr/bin/gzip -dc %{SOURCE37} | /usr/bin/tar -xof - %global qemu_kvm_build qemu_kvm_build mkdir -p %{qemu_kvm_build} @@ -691,7 +746,7 @@ run_configure \ %endif --enable-attr \ --enable-cap-ng \ - --enable-capstone=internal \ + --enable-capstone \ --enable-coroutine-pool \ --enable-curl \ --enable-debug-info \ @@ -1224,6 +1279,43 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Sep 29 2022 Miroslav Rezanina - 7.1.0-2 +- kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch [RHELX-57] +- kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch [RHELX-57] +- kvm-util-accept-iova_tree_remove_parameter-by-value.patch [RHELX-57] +- kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch [RHELX-57] +- kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch [RHELX-57] +- kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch [RHELX-57] +- kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch [RHELX-57] +- kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch [RHELX-57] +- kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch [RHELX-57] +- kvm-vhost-Delete-useless-read-memory-barrier.patch [RHELX-57] +- kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch [RHELX-57] +- kvm-vhost_net-Add-NetClientInfo-start-callback.patch [RHELX-57] +- kvm-vhost_net-Add-NetClientInfo-stop-callback.patch [RHELX-57] +- kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch [RHELX-57] +- kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch [RHELX-57] +- kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch [RHELX-57] +- kvm-vhost_net-add-NetClientState-load-callback.patch [RHELX-57] +- kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch [RHELX-57] +- kvm-vdpa-Delete-CVQ-migration-blocker.patch [RHELX-57] +- kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch [RHELX-57] +- kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch [RHELX-57] +- kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch [RHELX-57] +- kvm-vdpa-validate-MQ-CVQ-commands.patch [RHELX-57] +- kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch [RHELX-57] +- kvm-vdpa-Allow-MQ-feature-in-SVQ.patch [RHELX-57] +- kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch [bz#2125281] +- kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch [bz#2125281] +- kvm-Revert-Re-enable-capstone-internal-build.patch [bz#2127825] +- kvm-spec-Use-capstone-package.patch [bz#2127825] +- Resolves: RHELX-57 + (vDPA SVQ Multiqueue support ) +- Resolves: bz#2125281 + ([RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-9.2.0]) +- Resolves: bz#2127825 + (Use capstone for qemu-kvm build) + * Mon Sep 05 2022 Miroslav Rezanina - 7.1.0-1 - Rebase to QEMU 7.1.0 [bz#2111769] - Resolves: bz#2111769 diff --git a/sources b/sources index 5506976..9b00967 100644 --- a/sources +++ b/sources @@ -1,2 +1 @@ -SHA512 (capstone.tar.gz) = 14c5a3f3807c9294258de5bf294563fcdb56b50630cf3080dc681ae1415d938dce9485d7b0fef61cfb4a2381696f0e74c7da149b2b6218cdbb00521cd365c7e4 SHA512 (qemu-7.1.0.tar.xz) = c60c5ff8ec99b7552e485768908920658fdd8035ff7a6fa370fb6881957dc8b7e5f18ff1a8f49bd6aa22909ede2a7c084986d8244f12074ccd33ebe40a0c411f From afd495b34255e99eb11848d2d88f6aab7063b0bd Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 13 Oct 2022 15:41:45 +0000 Subject: [PATCH 170/195] * Thu Oct 13 2022 Jon Maloy - 7.1.0-3 - kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch [bz#2108531] - Resolves: bz#2108531 (Windows guest reboot after migration with wsl2 installed inside) --- ...fix-kvmclock_current_nsec-Assertion-.patch | 60 +++++++++++++++++++ qemu-kvm.spec | 9 ++- 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch diff --git a/kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch b/kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch new file mode 100644 index 0000000..e849747 --- /dev/null +++ b/kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch @@ -0,0 +1,60 @@ +From f141182484fca38685cb246f77e311643cd2f4c7 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Fri, 7 Oct 2022 13:56:02 +0200 +Subject: [PATCH] target/i386/kvm: fix kvmclock_current_nsec: Assertion + `time.tsc_timestamp <= migration_tsc' failed + +RH-Author: Vitaly Kuznetsov +RH-MergeRequest: 120: target/i386/kvm: fix kvmclock_current_nsec: Assertion `time.tsc_timestamp <= migration_tsc' failed +RH-Bugzilla: 2108531 +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Paolo Bonzini +RH-Commit: [1/1] edc5bb2578f7f31ab4d87e343925f6f08e812c29 (vkuznets/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2108531 + +commit c4ef867f2949bf2a2ae18a4e27cf1a34bbc8aecb +Author: Ray Zhang +Date: Thu Sep 22 18:05:23 2022 +0800 + + target/i386/kvm: fix kvmclock_current_nsec: Assertion `time.tsc_timestamp <= migration_tsc' failed + + New KVM_CLOCK flags were added in the kernel.(c68dc1b577eabd5605c6c7c08f3e07ae18d30d5d) + ``` + + #define KVM_CLOCK_VALID_FLAGS \ + + (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) + + case KVM_CAP_ADJUST_CLOCK: + - r = KVM_CLOCK_TSC_STABLE; + + r = KVM_CLOCK_VALID_FLAGS; + ``` + + kvm_has_adjust_clock_stable needs to handle additional flags, + so that s->clock_is_reliable can be true and kvmclock_current_nsec doesn't need to be called. + + Signed-off-by: Ray Zhang + Message-Id: <20220922100523.2362205-1-zhanglei002@gmail.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Vitaly Kuznetsov +--- + target/i386/kvm/kvm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a9eba247a5..ba98b99d8f 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -157,7 +157,7 @@ bool kvm_has_adjust_clock_stable(void) + { + int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); + +- return (ret == KVM_CLOCK_TSC_STABLE); ++ return (ret & KVM_CLOCK_TSC_STABLE); + } + + bool kvm_has_adjust_clock(void) +-- +2.37.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 138d06d..87c6f31 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.1.0 -Release: 2%{?rcrel}%{?dist}%{?cc_suffix} +Release: 3%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -247,6 +247,8 @@ Patch44: kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch Patch45: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch # For bz#2127825 - Use capstone for qemu-kvm build Patch46: kvm-Revert-Re-enable-capstone-internal-build.patch +# For bz#2108531 - Windows guest reboot after migration with wsl2 installed inside +Patch47: kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch %if %{have_clang} BuildRequires: clang @@ -1279,6 +1281,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Oct 13 2022 Jon Maloy - 7.1.0-3 +- kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch [bz#2108531] +- Resolves: bz#2108531 + (Windows guest reboot after migration with wsl2 installed inside) + * Thu Sep 29 2022 Miroslav Rezanina - 7.1.0-2 - kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch [RHELX-57] - kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch [RHELX-57] From 3e616168bc1b7a14522b50849d71e619b94c0f7d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 1 Nov 2022 06:30:42 -0400 Subject: [PATCH 171/195] * Tue Nov 01 2022 Miroslav Rezanina - 7.1.0-4 - kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch [bz#2126095] - Resolves: bz#2126095 ([rhel9.2][intel_iommu]Booting guest with "-device intel-iommu,intremap=on,device-iotlb=on,caching-mode=on" causes kernel call trace) --- ...mu-Fix-irqchip-X2APIC-configuration-.patch | 84 +++++++++++++++++++ qemu-kvm.spec | 9 +- 2 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch diff --git a/kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch b/kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch new file mode 100644 index 0000000..99d2be9 --- /dev/null +++ b/kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch @@ -0,0 +1,84 @@ +From 7f5289f426b25cf1113a450a3aa311170ac30397 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Mon, 26 Sep 2022 11:32:06 -0400 +Subject: [PATCH] Revert "intel_iommu: Fix irqchip / X2APIC configuration + checks" + +RH-Author: Peter Xu +RH-MergeRequest: 121: Revert "intel_iommu: Fix irqchip / X2APIC configuration checks" +RH-Bugzilla: 2126095 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Jason Wang +RH-Commit: [1/1] 6218c5f81c205ab160f4ccdb203ae39d4af3339e (peterx/qemu-kvm) + +It's true that when vcpus<=255 we don't require the length of 32bit APIC +IDs. However here since we already have EIM=ON it means the hypervisor +will declare the VM as x2apic supported (e.g. VT-d ECAP register will have +EIM bit 4 set), so the guest should assume the APIC IDs are 32bits width +even if vcpus<=255. In short, commit 77250171bdc breaks any simple cmdline +that wants to boot a VM with >=9 but <=255 vcpus with: + + -device intel-iommu,intremap=on + +For anyone who does not want to enable x2apic, we can use eim=off in the +intel-iommu parameters to skip enabling KVM x2apic. + +This partly reverts commit 77250171bdc02aee106083fd2a068147befa1a38, while +keeping the valid bit on checking split irqchip, but revert the other change. + +One thing to mention is that this patch may break migration compatibility +of such VM, however that's probably the best thing we can do, because the +old behavior was simply wrong and not working for >8 vcpus. For <=8 vcpus, +there could be a light guest ABI change (by enabling KVM x2apic after this +patch), but logically it shouldn't affect the migration from working. + +Also, this is not the 1st commit to change x2apic behavior. Igor provided +a full history of how this evolved for the past few years: + +https://lore.kernel.org/qemu-devel/20220922154617.57d1a1fb@redhat.com/ + +Relevant commits for reference: + + fb506e701e ("intel_iommu: reject broken EIM", 2016-10-17) + c1bb5418e3 ("target/i386: Support up to 32768 CPUs without IRQ remapping", 2020-12-10) + 77250171bd ("intel_iommu: Fix irqchip / X2APIC configuration checks", 2022-05-16) + dc89f32d92 ("target/i386: Fix sanity check on max APIC ID / X2APIC enablement", 2022-05-16) + +We may want to have this for stable too (mostly for 7.1.0 only). Adding a +fixes tag. + +Cc: David Woodhouse +Cc: Claudio Fontana +Cc: Igor Mammedov +Fixes: 77250171bd ("intel_iommu: Fix irqchip / X2APIC configuration checks") +Signed-off-by: Peter Xu +Message-Id: <20220926153206.10881-1-peterx@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Igor Mammedov +(cherry picked from commit 20ca47429e96df84e7b2e741f740bfce8a813fb2) +Signed-off-by: Peter Xu +--- + hw/i386/intel_iommu.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 05d53a1aa9..6524c2ee32 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -3818,6 +3818,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) + error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split"); + return false; + } ++ if (!kvm_enable_x2apic()) { ++ error_setg(errp, "eim=on requires support on the KVM side" ++ "(X2APIC_API, first shipped in v4.7)"); ++ return false; ++ } + } + + /* Currently only address widths supported are 39 and 48 bits */ +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 87c6f31..de9e909 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.1.0 -Release: 3%{?rcrel}%{?dist}%{?cc_suffix} +Release: 4%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -249,6 +249,8 @@ Patch45: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch Patch46: kvm-Revert-Re-enable-capstone-internal-build.patch # For bz#2108531 - Windows guest reboot after migration with wsl2 installed inside Patch47: kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch +# For bz#2126095 - [rhel9.2][intel_iommu]Booting guest with "-device intel-iommu,intremap=on,device-iotlb=on,caching-mode=on" causes kernel call trace +Patch48: kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch %if %{have_clang} BuildRequires: clang @@ -1281,6 +1283,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Nov 01 2022 Miroslav Rezanina - 7.1.0-4 +- kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch [bz#2126095] +- Resolves: bz#2126095 + ([rhel9.2][intel_iommu]Booting guest with "-device intel-iommu,intremap=on,device-iotlb=on,caching-mode=on" causes kernel call trace) + * Thu Oct 13 2022 Jon Maloy - 7.1.0-3 - kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch [bz#2108531] - Resolves: bz#2108531 From e961a3354a02ba557282196e8a8eedaad82c77e8 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 14 Nov 2022 03:29:01 -0500 Subject: [PATCH 172/195] * Mon Nov 14 2022 Miroslav Rezanina - 7.1.0-5 - kvm-rtl8139-Remove-unused-variable.patch [bz#2141218] - kvm-qemu-img-remove-unused-variable.patch [bz#2141218] - kvm-host-libusb-Remove-unused-variable.patch [bz#2141218] - Resolves: bz#2141218 (qemu-kvm build fails with clang 15.0.1 due to false unused variable error) --- kvm-host-libusb-Remove-unused-variable.patch | 64 ++++++++++++++++++++ kvm-qemu-img-remove-unused-variable.patch | 45 ++++++++++++++ kvm-rtl8139-Remove-unused-variable.patch | 44 ++++++++++++++ qemu-kvm.spec | 15 ++++- 4 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 kvm-host-libusb-Remove-unused-variable.patch create mode 100644 kvm-qemu-img-remove-unused-variable.patch create mode 100644 kvm-rtl8139-Remove-unused-variable.patch diff --git a/kvm-host-libusb-Remove-unused-variable.patch b/kvm-host-libusb-Remove-unused-variable.patch new file mode 100644 index 0000000..c21fc57 --- /dev/null +++ b/kvm-host-libusb-Remove-unused-variable.patch @@ -0,0 +1,64 @@ +From 529d7d039a8783cb5745330c9731626a608553db Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 9 Nov 2022 05:09:40 -0500 +Subject: [PATCH 3/3] host-libusb: Remove unused variable + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 122: Remove variables causing 'Unused but set variable' warning on Clang 15 +RH-Bugzilla: 2141218 +RH-Commit: [3/3] 460bd469b6f262074c407475c5003581f8993855 (mrezanin/centos-src-qemu-kvm) + +Variable unconnected used in usb_host_auto_check function is only incremented +but never read as line where it is read was disabled since introducing the code. +This causes 'Unused but set variable' warning on Clang 15.0.1 compiler. + +Removing the variable and disabled code to prevent the warning. + +Signed-off-by: Miroslav Rezanina +--- + hw/usb/host-libusb.c | 15 --------------- + 1 file changed, 15 deletions(-) + +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index 28f8af8941..176868d345 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -1837,7 +1837,6 @@ static void usb_host_auto_check(void *unused) + struct USBAutoFilter *f; + libusb_device **devs = NULL; + struct libusb_device_descriptor ddesc; +- int unconnected = 0; + int i, n; + + if (usb_host_init() != 0) { +@@ -1897,9 +1896,6 @@ static void usb_host_auto_check(void *unused) + libusb_free_device_list(devs, 1); + + QTAILQ_FOREACH(s, &hostdevs, next) { +- if (s->dh == NULL) { +- unconnected++; +- } + if (s->seen == 0) { + if (s->dh) { + usb_host_close(s); +@@ -1908,17 +1904,6 @@ static void usb_host_auto_check(void *unused) + } + s->seen = 0; + } +- +-#if 0 +- if (unconnected == 0) { +- /* nothing to watch */ +- if (usb_auto_timer) { +- timer_del(usb_auto_timer); +- trace_usb_host_auto_scan_disabled(); +- } +- return; +- } +-#endif + } + + if (!usb_vmstate) { +-- +2.31.1 + diff --git a/kvm-qemu-img-remove-unused-variable.patch b/kvm-qemu-img-remove-unused-variable.patch new file mode 100644 index 0000000..ceb6ea7 --- /dev/null +++ b/kvm-qemu-img-remove-unused-variable.patch @@ -0,0 +1,45 @@ +From 2458bcc8497fb12ad81e9fd5d05a6164d25f00d6 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 9 Nov 2022 05:12:46 -0500 +Subject: [PATCH 2/3] qemu-img: remove unused variable + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 122: Remove variables causing 'Unused but set variable' warning on Clang 15 +RH-Bugzilla: 2141218 +RH-Commit: [2/3] 9cd54891567781090accfb68aa5d80d2c6d68584 (mrezanin/centos-src-qemu-kvm) + +Variable block_count used in img_dd function is only incremented but never read. +This causes 'Unused but set variable' warning on Clang 15.0.1 compiler. + +Removing the variable to prevent the warning. + +Signed-off-by: Miroslav Rezanina +--- + qemu-img.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/qemu-img.c b/qemu-img.c +index 7d4b33b3da..987da256ef 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -4919,7 +4919,7 @@ static int img_dd(int argc, char **argv) + const char *out_fmt = "raw"; + const char *fmt = NULL; + int64_t size = 0; +- int64_t block_count = 0, out_pos, in_pos; ++ int64_t out_pos, in_pos; + bool force_share = false; + struct DdInfo dd = { + .flags = 0, +@@ -5119,7 +5119,7 @@ static int img_dd(int argc, char **argv) + + in.buf = g_new(uint8_t, in.bsz); + +- for (out_pos = 0; in_pos < size; block_count++) { ++ for (out_pos = 0; in_pos < size; ) { + int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz; + + ret = blk_pread(blk1, in_pos, bytes, in.buf, 0); +-- +2.31.1 + diff --git a/kvm-rtl8139-Remove-unused-variable.patch b/kvm-rtl8139-Remove-unused-variable.patch new file mode 100644 index 0000000..8fa05d0 --- /dev/null +++ b/kvm-rtl8139-Remove-unused-variable.patch @@ -0,0 +1,44 @@ +From a89fed7cdbee76f9a7083ca04e079ab991737eeb Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 9 Nov 2022 06:40:47 -0500 +Subject: [PATCH 1/3] rtl8139: Remove unused variable + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 122: Remove variables causing 'Unused but set variable' warning on Clang 15 +RH-Bugzilla: 2141218 +RH-Commit: [1/3] ffec7ff0238c6859a5ddaea80fcd8e66049fd3fc (mrezanin/centos-src-qemu-kvm) + +Variable send_count used in rtl8139_cplus_transmit_one function is only +incremented but never read. This causes 'Unused but set variable' warning +on Clang 15.0.1 compiler. + +Removing the variable to prevent the warning. + +Signed-off-by: Miroslav Rezanina +--- + hw/net/rtl8139.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 75dacabc43..445cbd700c 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -2156,7 +2156,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) + ip_data_len, saved_size - ETH_HLEN, large_send_mss); + + int tcp_send_offset = 0; +- int send_count = 0; + + /* maximum IP header length is 60 bytes */ + uint8_t saved_ip_header[60]; +@@ -2261,7 +2260,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) + /* add transferred count to TCP sequence number */ + stl_be_p(&p_tcp_hdr->th_seq, + chunk_size + ldl_be_p(&p_tcp_hdr->th_seq)); +- ++send_count; + } + + /* Stop sending this frame */ +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index de9e909..f6175f9 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.1.0 -Release: 4%{?rcrel}%{?dist}%{?cc_suffix} +Release: 5%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -251,6 +251,12 @@ Patch46: kvm-Revert-Re-enable-capstone-internal-build.patch Patch47: kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch # For bz#2126095 - [rhel9.2][intel_iommu]Booting guest with "-device intel-iommu,intremap=on,device-iotlb=on,caching-mode=on" causes kernel call trace Patch48: kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch +# For bz#2141218 - qemu-kvm build fails with clang 15.0.1 due to false unused variable error +Patch49: kvm-rtl8139-Remove-unused-variable.patch +# For bz#2141218 - qemu-kvm build fails with clang 15.0.1 due to false unused variable error +Patch50: kvm-qemu-img-remove-unused-variable.patch +# For bz#2141218 - qemu-kvm build fails with clang 15.0.1 due to false unused variable error +Patch51: kvm-host-libusb-Remove-unused-variable.patch %if %{have_clang} BuildRequires: clang @@ -1283,6 +1289,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Nov 14 2022 Miroslav Rezanina - 7.1.0-5 +- kvm-rtl8139-Remove-unused-variable.patch [bz#2141218] +- kvm-qemu-img-remove-unused-variable.patch [bz#2141218] +- kvm-host-libusb-Remove-unused-variable.patch [bz#2141218] +- Resolves: bz#2141218 + (qemu-kvm build fails with clang 15.0.1 due to false unused variable error) + * Tue Nov 01 2022 Miroslav Rezanina - 7.1.0-4 - kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch [bz#2126095] - Resolves: bz#2126095 From c9394359b0596e7c3973d7bb19f76d4b41f9a5c1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 2 Dec 2022 05:04:56 -0500 Subject: [PATCH 173/195] * Fri Dec 02 2022 Miroslav Rezanina - 7.1.0-6 - kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch [bz#2143170] - kvm-block-use-the-request-length-for-iov-alignment.patch [bz#2143170] - Resolves: bz#2143170 (The installation can not start when install files (iso) locate on a 4k disk) --- ...e-bdrv_qiov_is_aligned-to-file-posix.patch | 107 ++++++++++++++++++ ...the-request-length-for-iov-alignment.patch | 50 ++++++++ qemu-kvm.spec | 12 +- 3 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch create mode 100644 kvm-block-use-the-request-length-for-iov-alignment.patch diff --git a/kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch b/kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch new file mode 100644 index 0000000..1556ced --- /dev/null +++ b/kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch @@ -0,0 +1,107 @@ +From 2c9b536fac44c15c44af385ac1b440a9f5c05d01 Mon Sep 17 00:00:00 2001 +From: Keith Busch +Date: Thu, 29 Sep 2022 13:05:22 -0700 +Subject: [PATCH 1/2] block: move bdrv_qiov_is_aligned to file-posix + +RH-Author: Kevin Wolf +RH-MergeRequest: 123: block: Fix memory alignment of requests +RH-Bugzilla: 2143170 +RH-Acked-by: Alberto Faria +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [1/2] 77b6ed2aaedfbd3dba7769b9a999ab3743f642cd (kmwolf/centos-qemu-kvm) + +There is only user of bdrv_qiov_is_aligned(), so move the alignment +function to there and make it static. + +Signed-off-by: Keith Busch +Message-Id: <20220929200523.3218710-2-kbusch@meta.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit a7c5f67a78569f8c275ea4ea9962e9c79b9d03cb) +Signed-off-by: Kevin Wolf +--- + block/file-posix.c | 21 +++++++++++++++++++++ + block/io.c | 21 --------------------- + include/block/block-io.h | 1 - + 3 files changed, 21 insertions(+), 22 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 48cd096624..e3f3de2780 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2061,6 +2061,27 @@ static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs, + return thread_pool_submit_co(pool, func, arg); + } + ++/* ++ * Check if all memory in this vector is sector aligned. ++ */ ++static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) ++{ ++ int i; ++ size_t alignment = bdrv_min_mem_align(bs); ++ IO_CODE(); ++ ++ for (i = 0; i < qiov->niov; i++) { ++ if ((uintptr_t) qiov->iov[i].iov_base % alignment) { ++ return false; ++ } ++ if (qiov->iov[i].iov_len % alignment) { ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int type) + { +diff --git a/block/io.c b/block/io.c +index 0a8cbefe86..96edc7f7cb 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -3236,27 +3236,6 @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) + return mem; + } + +-/* +- * Check if all memory in this vector is sector aligned. +- */ +-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) +-{ +- int i; +- size_t alignment = bdrv_min_mem_align(bs); +- IO_CODE(); +- +- for (i = 0; i < qiov->niov; i++) { +- if ((uintptr_t) qiov->iov[i].iov_base % alignment) { +- return false; +- } +- if (qiov->iov[i].iov_len % alignment) { +- return false; +- } +- } +- +- return true; +-} +- + void bdrv_io_plug(BlockDriverState *bs) + { + BdrvChild *child; +diff --git a/include/block/block-io.h b/include/block/block-io.h +index fd25ffa9be..492f95fc05 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -150,7 +150,6 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size); + void *qemu_blockalign0(BlockDriverState *bs, size_t size); + void *qemu_try_blockalign(BlockDriverState *bs, size_t size); + void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); +-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); + + void bdrv_enable_copy_on_read(BlockDriverState *bs); + void bdrv_disable_copy_on_read(BlockDriverState *bs); +-- +2.31.1 + diff --git a/kvm-block-use-the-request-length-for-iov-alignment.patch b/kvm-block-use-the-request-length-for-iov-alignment.patch new file mode 100644 index 0000000..512e1d8 --- /dev/null +++ b/kvm-block-use-the-request-length-for-iov-alignment.patch @@ -0,0 +1,50 @@ +From 7e334715074c7a4090578ed178834f3318d4b969 Mon Sep 17 00:00:00 2001 +From: Keith Busch +Date: Thu, 29 Sep 2022 13:05:23 -0700 +Subject: [PATCH 2/2] block: use the request length for iov alignment + +RH-Author: Kevin Wolf +RH-MergeRequest: 123: block: Fix memory alignment of requests +RH-Bugzilla: 2143170 +RH-Acked-by: Alberto Faria +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [2/2] 50cfd394fff4dbad87d7c90c987e241ed2367746 (kmwolf/centos-qemu-kvm) + +An iov length needs to be aligned to the logical block size, which may +be larger than the memory alignment. + +Tested-by: Jens Axboe +Signed-off-by: Keith Busch +Message-Id: <20220929200523.3218710-3-kbusch@meta.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 25474d90aa50bd32e0de395a33d8de42dd6f2aef) +Signed-off-by: Kevin Wolf +--- + block/file-posix.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index e3f3de2780..af994aba2b 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2068,13 +2068,14 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) + { + int i; + size_t alignment = bdrv_min_mem_align(bs); ++ size_t len = bs->bl.request_alignment; + IO_CODE(); + + for (i = 0; i < qiov->niov; i++) { + if ((uintptr_t) qiov->iov[i].iov_base % alignment) { + return false; + } +- if (qiov->iov[i].iov_len % alignment) { ++ if (qiov->iov[i].iov_len % len) { + return false; + } + } +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index f6175f9..bb9309d 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.1.0 -Release: 5%{?rcrel}%{?dist}%{?cc_suffix} +Release: 6%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -257,6 +257,10 @@ Patch49: kvm-rtl8139-Remove-unused-variable.patch Patch50: kvm-qemu-img-remove-unused-variable.patch # For bz#2141218 - qemu-kvm build fails with clang 15.0.1 due to false unused variable error Patch51: kvm-host-libusb-Remove-unused-variable.patch +# For bz#2143170 - The installation can not start when install files (iso) locate on a 4k disk +Patch52: kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch +# For bz#2143170 - The installation can not start when install files (iso) locate on a 4k disk +Patch53: kvm-block-use-the-request-length-for-iov-alignment.patch %if %{have_clang} BuildRequires: clang @@ -1289,6 +1293,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Dec 02 2022 Miroslav Rezanina - 7.1.0-6 +- kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch [bz#2143170] +- kvm-block-use-the-request-length-for-iov-alignment.patch [bz#2143170] +- Resolves: bz#2143170 + (The installation can not start when install files (iso) locate on a 4k disk) + * Mon Nov 14 2022 Miroslav Rezanina - 7.1.0-5 - kvm-rtl8139-Remove-unused-variable.patch [bz#2141218] - kvm-qemu-img-remove-unused-variable.patch [bz#2141218] From 55259e29d6995dc584c46580a26d7100cf1c8eb6 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 14 Dec 2022 22:30:47 +0000 Subject: [PATCH 174/195] * Wed Dec 14 2022 Jon Maloy - 7.1.0-7 - kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch [bz#2149108] - Resolves: bz#2149108 (CVE-2022-4172 qemu-kvm: QEMU: ACPI ERST: memory corruption issues in read_erst_record and write_erst_record [rhel-9]) --- ...pi-erst.c-Fix-memory-handling-issues.patch | 83 +++++++++++++++++++ qemu-kvm.spec | 9 +- 2 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch diff --git a/kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch b/kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch new file mode 100644 index 0000000..0f4b726 --- /dev/null +++ b/kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch @@ -0,0 +1,83 @@ +From 108b687eb18d121d688e652ac13ba465083f4529 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH] hw/acpi/erst.c: Fix memory handling issues + +RH-Author: Jon Maloy +RH-MergeRequest: 125: ACPI ERST: memory corruption issues in read_erst_record and write_erst_record +RH-Bugzilla: 2149108 +RH-Acked-by: Gavin Shan +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Igor Mammedov +RH-Commit: [1/1] cccd8b6b8f0f360c623f913dbc02d4eda2fbf972 (jmaloy/jmaloy-qemu-kvm-centos) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2149108 +Upstream: Merged +CVE: CVE-2022-4172 + +commit defb70980f6bed36100b74e84220f1764c0dd544 +Author: Christian A. Ehrhardt +Date: Mon Oct 24 17:42:33 2022 +0200 + + hw/acpi/erst.c: Fix memory handling issues + + - Fix memset argument order: The second argument is + the value, the length goes last. + - Fix an integer overflow reported by Alexander Bulekov. + + Both issues allow the guest to overrun the host buffer + allocated for the ERST memory device. + + Cc: Eric DeVolder + Cc: qemu-stable@nongnu.org + Fixes: f7e26ffa590 ("ACPI ERST: support for ACPI ERST feature") + Tested-by: Alexander Bulekov + Signed-off-by: Christian A. Ehrhardt + Message-Id: <20221024154233.1043347-1-lk@c--e.de> + Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1268 + Reviewed-by: Alexander Bulekov + Reviewed-by: Eric DeVolder + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit defb70980f6bed36100b74e84220f1764c0dd544) +Jon Maloy +--- + hw/acpi/erst.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c +index df856b2669..aefcc03ad6 100644 +--- a/hw/acpi/erst.c ++++ b/hw/acpi/erst.c +@@ -635,7 +635,7 @@ static unsigned read_erst_record(ERSTDeviceState *s) + if (record_length < UEFI_CPER_RECORD_MIN_SIZE) { + rc = STATUS_FAILED; + } +- if ((s->record_offset + record_length) > exchange_length) { ++ if (record_length > exchange_length - s->record_offset) { + rc = STATUS_FAILED; + } + /* If all is ok, copy the record to the exchange buffer */ +@@ -684,7 +684,7 @@ static unsigned write_erst_record(ERSTDeviceState *s) + if (record_length < UEFI_CPER_RECORD_MIN_SIZE) { + return STATUS_FAILED; + } +- if ((s->record_offset + record_length) > exchange_length) { ++ if (record_length > exchange_length - s->record_offset) { + return STATUS_FAILED; + } + +@@ -716,7 +716,7 @@ static unsigned write_erst_record(ERSTDeviceState *s) + if (nvram) { + /* Write the record into the slot */ + memcpy(nvram, exchange, record_length); +- memset(nvram + record_length, exchange_length - record_length, 0xFF); ++ memset(nvram + record_length, 0xFF, exchange_length - record_length); + /* If a new record, increment the record_count */ + if (!record_found) { + uint32_t record_count; +-- +2.37.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index bb9309d..cab37a6 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.1.0 -Release: 6%{?rcrel}%{?dist}%{?cc_suffix} +Release: 7%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -261,6 +261,8 @@ Patch51: kvm-host-libusb-Remove-unused-variable.patch Patch52: kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch # For bz#2143170 - The installation can not start when install files (iso) locate on a 4k disk Patch53: kvm-block-use-the-request-length-for-iov-alignment.patch +# For bz#2149108 - CVE-2022-4172 qemu-kvm: QEMU: ACPI ERST: memory corruption issues in read_erst_record and write_erst_record [rhel-9] +Patch54: kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch %if %{have_clang} BuildRequires: clang @@ -1293,6 +1295,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Wed Dec 14 2022 Jon Maloy - 7.1.0-7 +- kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch [bz#2149108] +- Resolves: bz#2149108 + (CVE-2022-4172 qemu-kvm: QEMU: ACPI ERST: memory corruption issues in read_erst_record and write_erst_record [rhel-9]) + * Fri Dec 02 2022 Miroslav Rezanina - 7.1.0-6 - kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch [bz#2143170] - kvm-block-use-the-request-length-for-iov-alignment.patch [bz#2143170] From 9f4495a7b60022b70fc798f834dbe84ef7ad254f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 15 Dec 2022 01:04:15 -0500 Subject: [PATCH 175/195] * Fri Dec 16 2022 Miroslav Rezanina - 7.2.0-1 - Rebase to QEMU 7.2.0 [bz#2135806] - Resolves: bz#2135806 (Rebase to QEMU 7.2 for RHEL 9.2.0) --- .gitignore | 1 + 0004-Initial-redhat-build.patch | 43 ++- ...0005-Enable-disable-devices-for-RHEL.patch | 52 ++-- 0005-Re-enable-capstone-internal-build.patch | 251 ----------------- ...Machine-type-related-general-changes.patch | 75 ++++-- ...ch => 0007-Add-aarch64-machine-types.patch | 112 ++++---- ...atch => 0008-Add-ppc64-machine-types.patch | 34 +-- ...atch => 0009-Add-s390x-machine-types.patch | 20 +- ...tch => 0010-Add-x86_64-machine-types.patch | 57 ++-- ...heck.patch => 0011-Enable-make-check.patch | 30 +-- ...mber-of-devices-that-can-be-assigned.patch | 2 +- ...Add-support-statement-to-help-output.patch | 8 +- ...documentation-instead-of-qemu-system.patch | 8 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 2 +- ...on-warning-when-opening-v2-images-rw.patch | 6 +- ...ompat-bits-for-RHEL-9.1-machine-type.patch | 26 ++ ...ntroduce-upstream-7.0-compat-changes.patch | 116 -------- ...90x-machine-type-compatibility-for-Q.patch | 47 ++++ ...ch64-add-rhel9.2.0-virt-machine-type.patch | 43 +++ ...dd-new-rhel-9.2.0-s390x-machine-type.patch | 62 +++++ 0022-x86-rhel-9.2.0-machine-type.patch | 75 ++++++ ...rt-Re-enable-capstone-internal-build.patch | 252 ------------------ ...mu-Fix-irqchip-X2APIC-configuration-.patch | 84 ------ ...e-bdrv_qiov_is_aligned-to-file-posix.patch | 107 -------- ...the-request-length-for-iov-alignment.patch | 50 ---- kvm-host-libusb-Remove-unused-variable.patch | 64 ----- ...pi-erst.c-Fix-memory-handling-issues.patch | 83 ------ ...msr_feature_control-first-thing-when.patch | 66 ----- ...eset-KVM-nested-state-upon-CPU-reset.patch | 93 ------- kvm-qemu-img-remove-unused-variable.patch | 45 ---- kvm-rtl8139-Remove-unused-variable.patch | 44 --- ...fix-kvmclock_current_nsec-Assertion-.patch | 60 ----- ...-iova_tree_remove_parameter-by-value.patch | 182 ------------- kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch | 74 ----- ...tio-net-mac-address-via-CVQ-at-start.patch | 87 ------ kvm-vdpa-Allow-MQ-feature-in-SVQ.patch | 41 --- kvm-vdpa-Delete-CVQ-migration-blocker.patch | 98 ------- ...Make-SVQ-vring-unmapping-return-void.patch | 133 --------- ...DPAState-cvq_cmd_in_buffer-control-a.patch | 113 -------- ...d-buffers-map-to-start-of-net-device.patch | 251 ----------------- ...SVQ-vring-from-iova_tree-at-shutdown.patch | 49 ---- ...a-Skip-the-maps-not-in-the-iova-tree.patch | 48 ---- ...-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch | 79 ------ ...et_vhost_vdpa_cvq_info-NetClientInfo.patch | 62 ----- ...ave-failed-dma-maps-in-SVQ-iova-tree.patch | 83 ------ ...st_vdpa_net_cvq_add-from-vhost_vdpa_.patch | 153 ----------- ...st_vdpa_net_load_mac-from-vhost_vdpa.patch | 115 -------- kvm-vdpa-validate-MQ-CVQ-commands.patch | 50 ---- ...re-new-kick-fd-on-vhost_svq_set_svq_.patch | 67 ----- ...t-Delete-useless-read-memory-barrier.patch | 47 ---- ...end-on-NULL-VirtQueueElement-on-vhos.patch | 63 ----- ...fer-elem-ownership-in-vhost_handle_g.patch | 80 ------ ...ement-ndescs-instead-of-opaque-data-.patch | 55 ---- ...net-Add-NetClientInfo-start-callback.patch | 73 ----- ..._net-Add-NetClientInfo-stop-callback.patch | 68 ----- ...net-add-NetClientState-load-callback.patch | 73 ----- ...e-virtio-net-curr_queue_pairs-in-vdp.patch | 61 ----- qemu-kvm.spec | 163 +++-------- sources | 2 +- 59 files changed, 544 insertions(+), 3844 deletions(-) rename 0006-Enable-disable-devices-for-RHEL.patch => 0005-Enable-disable-devices-for-RHEL.patch (93%) delete mode 100644 0005-Re-enable-capstone-internal-build.patch rename 0007-Machine-type-related-general-changes.patch => 0006-Machine-type-related-general-changes.patch (92%) rename 0008-Add-aarch64-machine-types.patch => 0007-Add-aarch64-machine-types.patch (89%) rename 0009-Add-ppc64-machine-types.patch => 0008-Add-ppc64-machine-types.patch (95%) rename 0010-Add-s390x-machine-types.patch => 0009-Add-s390x-machine-types.patch (93%) rename 0011-Add-x86_64-machine-types.patch => 0010-Add-x86_64-machine-types.patch (95%) rename 0012-Enable-make-check.patch => 0011-Enable-make-check.patch (94%) rename 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch => 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch (98%) rename 0014-Add-support-statement-to-help-output.patch => 0013-Add-support-statement-to-help-output.patch (88%) rename 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch => 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch (91%) rename 0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch => 0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch (97%) rename 0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch => 0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch (94%) create mode 100644 0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch delete mode 100644 0018-Introduce-upstream-7.0-compat-changes.patch create mode 100644 0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch create mode 100644 0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch create mode 100644 0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch create mode 100644 0022-x86-rhel-9.2.0-machine-type.patch delete mode 100644 kvm-Revert-Re-enable-capstone-internal-build.patch delete mode 100644 kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch delete mode 100644 kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch delete mode 100644 kvm-block-use-the-request-length-for-iov-alignment.patch delete mode 100644 kvm-host-libusb-Remove-unused-variable.patch delete mode 100644 kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch delete mode 100644 kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch delete mode 100644 kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch delete mode 100644 kvm-qemu-img-remove-unused-variable.patch delete mode 100644 kvm-rtl8139-Remove-unused-variable.patch delete mode 100644 kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch delete mode 100644 kvm-util-accept-iova_tree_remove_parameter-by-value.patch delete mode 100644 kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch delete mode 100644 kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch delete mode 100644 kvm-vdpa-Allow-MQ-feature-in-SVQ.patch delete mode 100644 kvm-vdpa-Delete-CVQ-migration-blocker.patch delete mode 100644 kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch delete mode 100644 kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch delete mode 100644 kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch delete mode 100644 kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch delete mode 100644 kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch delete mode 100644 kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch delete mode 100644 kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch delete mode 100644 kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch delete mode 100644 kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch delete mode 100644 kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch delete mode 100644 kvm-vdpa-validate-MQ-CVQ-commands.patch delete mode 100644 kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch delete mode 100644 kvm-vhost-Delete-useless-read-memory-barrier.patch delete mode 100644 kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch delete mode 100644 kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch delete mode 100644 kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch delete mode 100644 kvm-vhost_net-Add-NetClientInfo-start-callback.patch delete mode 100644 kvm-vhost_net-Add-NetClientInfo-stop-callback.patch delete mode 100644 kvm-vhost_net-add-NetClientState-load-callback.patch delete mode 100644 kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch diff --git a/.gitignore b/.gitignore index cf165db..8f95454 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ /qemu-7.0.0.tar.xz /capstone.tar.gz /qemu-7.1.0.tar.xz +/qemu-7.2.0.tar.xz diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch index c9e1d04..0f9cc55 100644 --- a/0004-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 476f040f14a9287efb6f0bf5b3ca97844bf9fdc3 Mon Sep 17 00:00:00 2001 +From ccc4a5bdc8c2f27678312364a7c12aeafd009bb6 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-7.0.0-11.el9 +This rebase is based on qemu-kvm-7.1.0-7.el9 Signed-off-by: Miroslav Rezanina -- @@ -56,6 +56,16 @@ Rebase changes (7.1.0 rc0): - capstone submodule removed - Temporary include capstone build +Rebase changes (7.2.0 rc0): +- Switch --enable-slirp=system to --enable-slirp + +Rebaes changes (7.2.0 rc2): +- Added new configure options (blkio and sndio, both disabled) + +Rebase changes (7.2.0): +- Fix SRPM name generation to work on Fedora 37 +- Switch back to system meson + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -148,25 +158,23 @@ Merged patches (7.0.0): - d46d2710b2 spec: Obsolete old usb redir subpackage - 6f52a50b68 spec: Obsolete ssh driver +Merged patches (7.2.0 rc4): +- 8c6834feb6 Remove opengl display device subpackages (C9S MR 124) +- 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124) + Signed-off-by: Miroslav Rezanina + +fix --- - .distro/85-kvm.preset | 5 - .distro/Makefile | 100 + .distro/Makefile.common | 41 + .distro/README.tests | 39 + - .distro/ksm.service | 13 - - .distro/ksm.sysconfig | 4 - - .distro/ksmctl.c | 77 - - .distro/ksmtuned | 139 - - .distro/ksmtuned.conf | 21 - - .distro/ksmtuned.service | 12 - - .distro/kvm-setup | 49 - - .distro/kvm-setup.service | 14 - .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4256 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 4315 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + + .distro/scripts/process-patches.sh | 4 + .gitignore | 1 + README.systemtap | 43 + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- @@ -174,19 +182,10 @@ Signed-off-by: Miroslav Rezanina scripts/systemtap/script.d/qemu_kvm.stp | 1 + tests/check-block.sh | 2 + ui/vnc-auth-sasl.c | 2 +- - 24 files changed, 4510 insertions(+), 338 deletions(-) - delete mode 100644 .distro/85-kvm.preset + 16 files changed, 4573 insertions(+), 4 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests - delete mode 100644 .distro/ksm.service - delete mode 100644 .distro/ksm.sysconfig - delete mode 100644 .distro/ksmctl.c - delete mode 100644 .distro/ksmtuned - delete mode 100644 .distro/ksmtuned.conf - delete mode 100644 .distro/ksmtuned.service - delete mode 100644 .distro/kvm-setup - delete mode 100644 .distro/kvm-setup.service create mode 100644 .distro/modules-load.conf create mode 100644 .distro/qemu-kvm.spec.template create mode 100644 README.systemtap diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch similarity index 93% rename from 0006-Enable-disable-devices-for-RHEL.patch rename to 0005-Enable-disable-devices-for-RHEL.patch index a53abec..767389f 100644 --- a/0006-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,6 +1,6 @@ -From ae20ca5826cd237e727cff1663177f7f863fab21 Mon Sep 17 00:00:00 2001 +From 90366cd2ead5a5301aaceed56477d2e6d9f1b3cd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina -Date: Thu, 15 Jul 2021 03:22:36 -0400 +Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL This commit adds all changes related to changes in supported devices. @@ -29,6 +29,9 @@ Rebase notes (7.1.0 rc0): Rebase notes (7.1.0 rc3): - Added CONFIG_VHOST_USER_FS option (all archs) +Rebase notes (7.2.0 rc20): +- Removed disabling a15mpcore.c as no longer needed + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -59,7 +62,7 @@ Merged patches (7.1.0 rc0): .../x86_64-softmmu/x86_64-rh-devices.mak | 109 ++++++++++++++++++ hw/arm/meson.build | 2 +- hw/block/fdc.c | 10 ++ - hw/cpu/meson.build | 5 +- + hw/cpu/meson.build | 3 +- hw/display/cirrus_vga.c | 5 +- hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + @@ -70,7 +73,7 @@ Merged patches (7.1.0 rc0): target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 19 files changed, 285 insertions(+), 13 deletions(-) + 19 files changed, 283 insertions(+), 13 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -364,24 +367,21 @@ index 64ae4a6899..9b8e782c19 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index 9e52fee9e7..bb71c9f3e7 100644 +index 9e52fee9e7..87c209a754 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build -@@ -1,6 +1,7 @@ +@@ -1,4 +1,5 @@ -softmmu_ss.add(files('core.c', 'cluster.c')) +#softmmu_ss.add(files('core.c', 'cluster.c')) +softmmu_ss.add(files('core.c')) specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) - specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) --specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) -+#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 3bb6a58698..6447fdb02e 100644 +index 6e8c747c46..1948ebee8e 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2945,7 +2945,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2946,7 +2946,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -394,10 +394,10 @@ index 3bb6a58698..6447fdb02e 100644 * Also accept 8 MB/16 MB for backward compatibility. */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 9a9b28078e..f3ce3fbcee 100644 +index 267dbf37db..87fcda4062 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -197,7 +197,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -199,7 +199,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -407,7 +407,7 @@ index 9a9b28078e..f3ce3fbcee 100644 } static const TypeInfo piix3_ide_info = { -@@ -220,6 +221,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -222,6 +223,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -480,10 +480,10 @@ index 793df42e21..cd3c305471 100644 endif diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 3099b38e32..10d91c4ef0 100644 +index 9a2cef7d05..a528ff9a3d 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -147,6 +147,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +@@ -151,6 +151,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) @@ -491,7 +491,7 @@ index 3099b38e32..10d91c4ef0 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -500,6 +501,7 @@ static void cortex_a9_initfn(Object *obj) +@@ -504,6 +505,7 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } @@ -499,7 +499,7 @@ index 3099b38e32..10d91c4ef0 100644 #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -524,6 +526,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { +@@ -528,6 +530,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; @@ -507,7 +507,7 @@ index 3099b38e32..10d91c4ef0 100644 static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -572,6 +575,7 @@ static void cortex_a7_initfn(Object *obj) +@@ -576,6 +579,7 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } @@ -515,7 +515,7 @@ index 3099b38e32..10d91c4ef0 100644 static void cortex_a15_initfn(Object *obj) { -@@ -618,6 +622,7 @@ static void cortex_a15_initfn(Object *obj) +@@ -624,6 +628,7 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -523,7 +523,7 @@ index 3099b38e32..10d91c4ef0 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1058,6 +1063,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1065,6 +1070,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -531,7 +531,7 @@ index 3099b38e32..10d91c4ef0 100644 #ifndef TARGET_AARCH64 /* -@@ -1125,6 +1131,7 @@ static void arm_max_initfn(Object *obj) +@@ -1132,6 +1138,7 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -539,7 +539,7 @@ index 3099b38e32..10d91c4ef0 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1140,7 +1147,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1147,7 +1154,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -549,7 +549,7 @@ index 3099b38e32..10d91c4ef0 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1171,6 +1180,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1178,6 +1187,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -634,10 +634,10 @@ index d8a141a023..d086b1c39c 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 7bd8db0e7b..81cb489694 100644 +index 3ac7ec9acf..97da1a6424 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c -@@ -2520,6 +2520,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2529,6 +2529,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } diff --git a/0005-Re-enable-capstone-internal-build.patch b/0005-Re-enable-capstone-internal-build.patch deleted file mode 100644 index 29a7649..0000000 --- a/0005-Re-enable-capstone-internal-build.patch +++ /dev/null @@ -1,251 +0,0 @@ -From 963cd2a0d78f6cec0ee5203ca2d2de77094bf047 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 1 Jun 2022 05:45:58 -0400 -Subject: Re-enable capstone internal build - -Until capstone component is added to RHEL 9 we revert changes removing -internal capstone usage. - -Signed-off-by: Miroslav Rezanina ---- - .distro/Makefile.common | 3 +- - .distro/capstone.tar.gz | Bin 0 -> 5765837 bytes - .distro/qemu-kvm.spec.template | 5 +- - configure | 12 ++++ - meson.build | 116 +++++++++++++++++++++++++++++++-- - meson_options.txt | 3 +- - scripts/meson-buildoptions.sh | 5 +- - 7 files changed, 135 insertions(+), 9 deletions(-) - create mode 100644 .distro/capstone.tar.gz - -diff --git a/configure b/configure -index 72ab03f11a..448b0c82cb 100755 ---- a/configure -+++ b/configure -@@ -322,8 +322,10 @@ vfio_user_server="disabled" - - # 1. Track which submodules are needed - if test "$default_feature" = no ; then -+ capstone="disabled" - slirp="disabled" - else -+ capstone="auto" - slirp="auto" - fi - fdt="auto" -@@ -902,6 +904,15 @@ for opt do - --enable-uuid|--disable-uuid) - echo "$0: $opt is obsolete, UUID support is always built" >&2 - ;; -+ --disable-capstone) capstone="disabled" -+ ;; -+ --enable-capstone) capstone="enabled" -+ ;; -+ --enable-capstone=git) capstone="internal" -+ ;; -+ --enable-capstone=*) capstone="$optarg" -+ ;; -+ - --with-git=*) git="$optarg" - ;; - --with-git-submodules=*) -@@ -2742,6 +2753,7 @@ if test "$skip_meson" = no; then - test "$werror" = yes && meson_option_add -Dwerror=true - - # QEMU options -+ test "$capstone" != auto && meson_option_add "-Dcapstone=$capstone" - test "$cfi" != false && meson_option_add "-Dcfi=$cfi" - test "$fdt" != auto && meson_option_add "-Dfdt=$fdt" - test -n "${LIB_FUZZING_ENGINE+xxx}" && meson_option_add "-Dfuzzing_engine=$LIB_FUZZING_ENGINE" -diff --git a/meson.build b/meson.build -index 20fddbd707..9e6a979c13 100644 ---- a/meson.build -+++ b/meson.build -@@ -2596,10 +2596,13 @@ genh += custom_target('config-poison.h', - ############## - - capstone = not_found --if not get_option('capstone').auto() or have_system or have_user -+capstone_opt = get_option('capstone') -+if capstone_opt in ['enabled', 'auto', 'system'] -+ have_internal = fs.exists(meson.current_source_dir() / 'capstone/Makefile') - capstone = dependency('capstone', version: '>=3.0.5', - kwargs: static_kwargs, method: 'pkg-config', -- required: get_option('capstone')) -+ required: capstone_opt == 'system' or -+ capstone_opt == 'enabled' and not have_internal) - - # Some versions of capstone have broken pkg-config file - # that reports a wrong -I path, causing the #include to -@@ -2608,10 +2611,113 @@ if not get_option('capstone').auto() or have_system or have_user - if capstone.found() and not cc.compiles('#include ', - dependencies: [capstone]) - capstone = not_found -- if get_option('capstone').enabled() -- error('capstone requested, but it does not appear to work') -+ if capstone_opt == 'system' -+ error('system capstone requested, it does not appear to work') - endif - endif -+ -+ if capstone.found() -+ capstone_opt = 'system' -+ elif have_internal -+ capstone_opt = 'internal' -+ else -+ capstone_opt = 'disabled' -+ endif -+endif -+if capstone_opt == 'internal' -+ capstone_data = configuration_data() -+ capstone_data.set('CAPSTONE_USE_SYS_DYN_MEM', '1') -+ -+ capstone_files = files( -+ 'capstone/cs.c', -+ 'capstone/MCInst.c', -+ 'capstone/MCInstrDesc.c', -+ 'capstone/MCRegisterInfo.c', -+ 'capstone/SStream.c', -+ 'capstone/utils.c' -+ ) -+ -+ if 'CONFIG_ARM_DIS' in config_all_disas -+ capstone_data.set('CAPSTONE_HAS_ARM', '1') -+ capstone_files += files( -+ 'capstone/arch/ARM/ARMDisassembler.c', -+ 'capstone/arch/ARM/ARMInstPrinter.c', -+ 'capstone/arch/ARM/ARMMapping.c', -+ 'capstone/arch/ARM/ARMModule.c' -+ ) -+ endif -+ -+ # FIXME: This config entry currently depends on a c++ compiler. -+ # Which is needed for building libvixl, but not for capstone. -+ if 'CONFIG_ARM_A64_DIS' in config_all_disas -+ capstone_data.set('CAPSTONE_HAS_ARM64', '1') -+ capstone_files += files( -+ 'capstone/arch/AArch64/AArch64BaseInfo.c', -+ 'capstone/arch/AArch64/AArch64Disassembler.c', -+ 'capstone/arch/AArch64/AArch64InstPrinter.c', -+ 'capstone/arch/AArch64/AArch64Mapping.c', -+ 'capstone/arch/AArch64/AArch64Module.c' -+ ) -+ endif -+ -+ if 'CONFIG_PPC_DIS' in config_all_disas -+ capstone_data.set('CAPSTONE_HAS_POWERPC', '1') -+ capstone_files += files( -+ 'capstone/arch/PowerPC/PPCDisassembler.c', -+ 'capstone/arch/PowerPC/PPCInstPrinter.c', -+ 'capstone/arch/PowerPC/PPCMapping.c', -+ 'capstone/arch/PowerPC/PPCModule.c' -+ ) -+ endif -+ -+ if 'CONFIG_S390_DIS' in config_all_disas -+ capstone_data.set('CAPSTONE_HAS_SYSZ', '1') -+ capstone_files += files( -+ 'capstone/arch/SystemZ/SystemZDisassembler.c', -+ 'capstone/arch/SystemZ/SystemZInstPrinter.c', -+ 'capstone/arch/SystemZ/SystemZMapping.c', -+ 'capstone/arch/SystemZ/SystemZModule.c', -+ 'capstone/arch/SystemZ/SystemZMCTargetDesc.c' -+ ) -+ endif -+ -+ if 'CONFIG_I386_DIS' in config_all_disas -+ capstone_data.set('CAPSTONE_HAS_X86', 1) -+ capstone_files += files( -+ 'capstone/arch/X86/X86Disassembler.c', -+ 'capstone/arch/X86/X86DisassemblerDecoder.c', -+ 'capstone/arch/X86/X86ATTInstPrinter.c', -+ 'capstone/arch/X86/X86IntelInstPrinter.c', -+ 'capstone/arch/X86/X86InstPrinterCommon.c', -+ 'capstone/arch/X86/X86Mapping.c', -+ 'capstone/arch/X86/X86Module.c' -+ ) -+ endif -+ -+ configure_file(output: 'capstone-defs.h', configuration: capstone_data) -+ -+ capstone_cargs = [ -+ # FIXME: There does not seem to be a way to completely replace the c_args -+ # that come from add_project_arguments() -- we can only add to them. -+ # So: disable all warnings with a big hammer. -+ '-Wno-error', '-w', -+ -+ # Include all configuration defines via a header file, which will wind up -+ # as a dependency on the object file, and thus changes here will result -+ # in a rebuild. -+ '-include', 'capstone-defs.h', -+ -+ '-Wp,-D_GLIBCXX_ASSERTIONS', -+ -+ ] -+ -+ libcapstone = static_library('capstone', -+ build_by_default: false, -+ sources: capstone_files, -+ c_args: capstone_cargs, -+ include_directories: 'capstone/include') -+ capstone = declare_dependency(link_with: libcapstone, -+ include_directories: 'capstone/include/capstone') - endif - - slirp = not_found -@@ -3977,7 +4083,7 @@ summary_info += {'bzip2 support': libbzip2} - summary_info += {'lzfse support': liblzfse} - summary_info += {'zstd support': zstd} - summary_info += {'NUMA host support': numa} --summary_info += {'capstone': capstone} -+summary_info += {'capstone': capstone_opt == 'internal' ? capstone_opt : capstone} - summary_info += {'libpmem support': libpmem} - summary_info += {'libdaxctl support': libdaxctl} - summary_info += {'libudev': libudev} -diff --git a/meson_options.txt b/meson_options.txt -index e58e158396..7cd920fcd6 100644 ---- a/meson_options.txt -+++ b/meson_options.txt -@@ -262,7 +262,8 @@ option('libvduse', type: 'feature', value: 'auto', - option('vduse_blk_export', type: 'feature', value: 'auto', - description: 'VDUSE block export support') - --option('capstone', type: 'feature', value: 'auto', -+option('capstone', type: 'combo', value: 'auto', -+ choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], - description: 'Whether and how to find the capstone library') - option('slirp', type: 'combo', value: 'auto', - choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], -diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh -index 359b04e0e6..b1001aa1db 100644 ---- a/scripts/meson-buildoptions.sh -+++ b/scripts/meson-buildoptions.sh -@@ -16,6 +16,9 @@ meson_options_help() { - printf "%s\n" ' --enable-block-drv-whitelist-in-tools' - printf "%s\n" ' use block whitelist also in tools instead of only' - printf "%s\n" ' QEMU' -+ printf "%s\n" ' --enable-capstone[=CHOICE]' -+ printf "%s\n" ' Whether and how to find the capstone library' -+ printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' - printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' - printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' - printf "%s\n" ' --enable-debug-mutex mutex debugging support' -@@ -75,7 +78,6 @@ meson_options_help() { - printf "%s\n" ' bzip2 bzip2 support for DMG images' - printf "%s\n" ' canokey CanoKey support' - printf "%s\n" ' cap-ng cap_ng support' -- printf "%s\n" ' capstone Whether and how to find the capstone library' - printf "%s\n" ' cloop cloop image format support' - printf "%s\n" ' cocoa Cocoa user interface (macOS only)' - printf "%s\n" ' coreaudio CoreAudio sound support' -@@ -216,6 +218,7 @@ _meson_option_parse() { - --disable-cap-ng) printf "%s" -Dcap_ng=disabled ;; - --enable-capstone) printf "%s" -Dcapstone=enabled ;; - --disable-capstone) printf "%s" -Dcapstone=disabled ;; -+ --enable-capstone=*) quote_sh "-Dcapstone=$2" ;; - --enable-cfi) printf "%s" -Dcfi=true ;; - --disable-cfi) printf "%s" -Dcfi=false ;; - --enable-cfi-debug) printf "%s" -Dcfi_debug=true ;; --- -2.31.1 - diff --git a/0007-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch similarity index 92% rename from 0007-Machine-type-related-general-changes.patch rename to 0006-Machine-type-related-general-changes.patch index 6d1d2b0..fc2a89d 100644 --- a/0007-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 8ee73de7f30f39293388932bbb0d69b6c9435ab1 Mon Sep 17 00:00:00 2001 +From 0208f38671b9de4036c0d56142a7f22e5091bae0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -43,10 +43,13 @@ Merged patches (7.0.0): Merged patches (7.1.0 rc0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/piix4.c chunk) - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/core/machine.c and include/hw/boards.h chunk) + +Merged patches (7.2.0 rc0): +- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) --- hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 211 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 222 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + @@ -55,10 +58,10 @@ Merged patches (7.1.0 rc0): hw/timer/i8254_common.c | 2 +- hw/usb/hcd-xhci-pci.c | 59 +++++++--- hw/usb/hcd-xhci-pci.h | 1 + - include/hw/boards.h | 28 +++++ + include/hw/boards.h | 31 +++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 14 files changed, 346 insertions(+), 23 deletions(-) + 14 files changed, 360 insertions(+), 23 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 0a81f1ad93..dbfb362a8f 100644 @@ -74,10 +77,10 @@ index 0a81f1ad93..dbfb362a8f 100644 .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 9633f822f3..389d6882dd 100644 +index b871350856..d633300fdc 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1617,7 +1617,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1619,7 +1619,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -87,10 +90,10 @@ index 9633f822f3..389d6882dd 100644 smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, diff --git a/hw/core/machine.c b/hw/core/machine.c -index a673302cce..909f75770b 100644 +index 8d34caa31d..9edec1ca05 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -40,6 +40,217 @@ +@@ -40,6 +40,228 @@ #include "hw/virtio/virtio-pci.h" #include "qom/object_interfaces.h" @@ -101,6 +104,17 @@ index a673302cce..909f75770b 100644 + "machine types for previous major releases are deprecated"; + +/* ++ * Mostly the same as hw_compat_7_0 ++ */ ++GlobalProperty hw_compat_rhel_9_1[] = { ++ /* hw_compat_rhel_9_1 from hw_compat_7_0 */ ++ { "arm-gicv3-common", "force-8-bit-prio", "on" }, ++ /* hw_compat_rhel_9_1 from hw_compat_7_0 */ ++ { "nvme-ns", "eui64-default", "on"}, ++}; ++const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); ++ ++/* + * Mostly the same as hw_compat_6_2 + */ +GlobalProperty hw_compat_rhel_9_0[] = { @@ -305,14 +319,14 @@ index a673302cce..909f75770b 100644 +}; +const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); + - GlobalProperty hw_compat_7_0[] = { - { "arm-gicv3-common", "force-8-bit-prio", "on" }, - { "nvme-ns", "eui64-default", "on"}, + GlobalProperty hw_compat_7_1[] = { + { "virtio-device", "queue_reset", "false" }, + }; diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 46abbc5653..505467059b 100644 +index 2a5437d803..0db2c2b2a1 100644 --- a/hw/display/vga-isa.c +++ b/hw/display/vga-isa.c -@@ -88,7 +88,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) +@@ -89,7 +89,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) } static Property vga_isa_properties[] = { @@ -322,10 +336,10 @@ index 46abbc5653..505467059b 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 20962c34e7..1ec5d6a4f8 100644 +index 0ad0ed1603..0985ff67d2 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -185,6 +185,8 @@ static void pc_init1(MachineState *machine, +@@ -187,6 +187,8 @@ static void pc_init1(MachineState *machine, smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -335,7 +349,7 @@ index 20962c34e7..1ec5d6a4f8 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 2e5dae9a89..ef471f6664 100644 +index a496bd6e74..ea582254e3 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine) @@ -348,10 +362,10 @@ index 2e5dae9a89..ef471f6664 100644 } diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 6b65823b4b..75dacabc43 100644 +index 700b1b66b6..13693aeb4f 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -360,7 +374,7 @@ index 6b65823b4b..75dacabc43 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3259,7 +3259,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -371,7 +385,7 @@ index 6b65823b4b..75dacabc43 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 60349ee402..0edcc98434 100644 +index b4243de735..c5ad69237e 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -57,6 +57,9 @@ static bool smbios_legacy = true; @@ -384,7 +398,7 @@ index 60349ee402..0edcc98434 100644 uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -639,7 +642,7 @@ static void smbios_build_type_1_table(void) +@@ -669,7 +672,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -393,7 +407,7 @@ index 60349ee402..0edcc98434 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -914,7 +917,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -977,7 +980,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -405,7 +419,7 @@ index 60349ee402..0edcc98434 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -935,11 +941,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -998,11 +1004,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -466,7 +480,7 @@ index 050875b497..32935da46c 100644 vmstate_pit_channel, PITChannelState), VMSTATE_INT64(channels[0].next_transition_time, diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c -index e934b1a5b1..e18b05e528 100644 +index 643d4643e4..529bad9366 100644 --- a/hw/usb/hcd-xhci-pci.c +++ b/hw/usb/hcd-xhci-pci.c @@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) @@ -577,13 +591,16 @@ index c193f79443..086a1feb1e 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index 7b416c9787..bfd757c561 100644 +index 90f1dd3aeb..2209d4e416 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -451,4 +451,32 @@ extern const size_t hw_compat_2_2_len; +@@ -454,4 +454,35 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_9_1[]; ++extern const size_t hw_compat_rhel_9_1_len; ++ +extern GlobalProperty hw_compat_rhel_9_0[]; +extern const size_t hw_compat_rhel_9_0_len; + @@ -614,10 +631,10 @@ index 7b416c9787..bfd757c561 100644 +extern const char *rhel_old_machine_deprecation; #endif diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 4b7ad77a44..9acff96a86 100644 +index 7f3259a630..d24b3ccd32 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h -@@ -272,7 +272,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); +@@ -294,7 +294,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); void smbios_set_cpuid(uint32_t version, uint32_t features); void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -630,7 +647,7 @@ index 4b7ad77a44..9acff96a86 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 8435733bd6..4d9e95a091 100644 +index c95333514e..3754eaa97d 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -112,6 +112,9 @@ struct PCMachineClass { diff --git a/0008-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch similarity index 89% rename from 0008-Add-aarch64-machine-types.patch rename to 0007-Add-aarch64-machine-types.patch index 91d4194..06611e7 100644 --- a/0008-Add-aarch64-machine-types.patch +++ b/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From dfe5c09a8cca1dcbff5798951fa88b7f540ea4ed Mon Sep 17 00:00:00 2001 +From 8501581c99760ed8a800d0c98eeb17a4bf450366 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -23,6 +23,9 @@ Rebase notes (7.1.0 rc0): Rebase notes (7.1.0 rc3): - Updated dtb_randomness comment +Rebase notes (7.2.0 rc0): +- Disabled cortex-a35 + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -43,20 +46,23 @@ Merged patches (7.0.0): Merged patches (7.1.0 rc0): - ac97dd4f9f RHEL-only: AArch64: Drop unsupported CPU types - e9c0a70664 target/arm: deprecate named CPU models + +Merged patches (7.2.0 rc0): +- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) --- - hw/arm/virt.c | 236 ++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 237 ++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ target/arm/cpu-qom.h | 1 + target/arm/cpu.c | 5 + target/arm/cpu.h | 2 + - target/arm/cpu64.c | 14 +- + target/arm/cpu64.c | 16 ++- target/arm/cpu_tcg.c | 12 +- target/arm/helper.c | 2 + tests/qtest/arm-cpu-features.c | 6 + - 9 files changed, 274 insertions(+), 12 deletions(-) + 9 files changed, 277 insertions(+), 12 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 389d6882dd..9737b77eca 100644 +index d633300fdc..dfcab40a73 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -80,6 +80,7 @@ @@ -117,13 +123,14 @@ index 389d6882dd..9737b77eca 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -197,14 +239,18 @@ static const int a15irqmap[] = { +@@ -197,15 +239,19 @@ static const int a15irqmap[] = { }; static const char *valid_cpus[] = { +#if 0 /* Disabled for Red Hat Enterprise Linux */ ARM_CPU_TYPE_NAME("cortex-a7"), ARM_CPU_TYPE_NAME("cortex-a15"), + ARM_CPU_TYPE_NAME("cortex-a35"), ARM_CPU_TYPE_NAME("cortex-a53"), +#endif /* disabled for RHEL */ ARM_CPU_TYPE_NAME("cortex-a57"), @@ -136,7 +143,7 @@ index 389d6882dd..9737b77eca 100644 ARM_CPU_TYPE_NAME("host"), ARM_CPU_TYPE_NAME("max"), }; -@@ -2288,6 +2334,7 @@ static void machvirt_init(MachineState *machine) +@@ -2290,6 +2336,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -144,7 +151,7 @@ index 389d6882dd..9737b77eca 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2315,6 +2362,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2317,6 +2364,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -152,7 +159,7 @@ index 389d6882dd..9737b77eca 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2344,6 +2392,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) +@@ -2346,6 +2394,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -160,7 +167,7 @@ index 389d6882dd..9737b77eca 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2357,6 +2406,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2359,6 +2408,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -168,7 +175,7 @@ index 389d6882dd..9737b77eca 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2440,6 +2490,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2442,6 +2492,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -176,7 +183,7 @@ index 389d6882dd..9737b77eca 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2453,6 +2504,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2455,6 +2506,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -184,7 +191,7 @@ index 389d6882dd..9737b77eca 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2884,6 +2936,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2886,6 +2938,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -192,7 +199,7 @@ index 389d6882dd..9737b77eca 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3285,3 +3338,184 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3294,3 +3347,185 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -372,6 +379,7 @@ index 389d6882dd..9737b77eca 100644 + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; @@ -412,10 +420,10 @@ index 64c44cef2d..82e97249bc 100644 void arm_cpu_register(const ARMCPUInfo *info); diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 7ec3281da9..86174077f1 100644 +index 38d066c294..a845814bfb 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2200,8 +2200,13 @@ static void arm_cpu_instance_init(Object *obj) +@@ -2250,8 +2250,13 @@ static void arm_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -430,7 +438,7 @@ index 7ec3281da9..86174077f1 100644 void arm_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 5168e3d837..876ca7cebb 100644 +index 9aeed3c848..f9f504d89e 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -34,6 +34,8 @@ @@ -443,10 +451,26 @@ index 5168e3d837..876ca7cebb 100644 #define EXCP_SWI 2 /* software interrupt */ #define EXCP_PREFETCH_ABORT 3 diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 78e27f778a..1a16c9dccc 100644 +index 3d74f134f5..4b330a52b5 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c -@@ -94,6 +94,7 @@ static void aarch64_a57_initfn(Object *obj) +@@ -36,6 +36,7 @@ + #include "hw/qdev-properties.h" + #include "internals.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a35_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -115,6 +116,7 @@ static void aarch64_a35_initfn(Object *obj) + /* These values are the same with A53/A57/A72. */ + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } ++#endif /* disabled for RHEL */ + + void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) + { +@@ -735,6 +737,7 @@ static void aarch64_a57_initfn(Object *obj) define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -454,30 +478,22 @@ index 78e27f778a..1a16c9dccc 100644 static void aarch64_a53_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -343,6 +344,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) +@@ -1033,6 +1036,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) /* From D5.1 AArch64 PMU register summary */ cpu->isar.reset_pmcr_el0 = 0x410c3000; } +#endif /* disabled for RHEL */ - void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) + static void aarch64_host_initfn(Object *obj) { -@@ -1108,6 +1110,7 @@ static void aarch64_max_initfn(Object *obj) - qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); +@@ -1240,13 +1244,18 @@ static void aarch64_max_initfn(Object *obj) } -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a64fx_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -1156,14 +1159,18 @@ static void aarch64_a64fx_initfn(Object *obj) - - /* TODO: Add A64FX specific HPC extension registers */ - } -+#endif /* disabled for RHEL */ - static const ARMCPUInfo aarch64_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, - { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++#endif /* disabled for RHEL */ + { .name = "cortex-a57", .initfn = aarch64_a57_initfn, + .deprecation_note = RHEL_CPU_DEPRECATION }, +#if 0 /* Disabled for Red Hat Enterprise Linux */ @@ -490,7 +506,7 @@ index 78e27f778a..1a16c9dccc 100644 { .name = "max", .initfn = aarch64_max_initfn }, #if defined(CONFIG_KVM) || defined(CONFIG_HVF) { .name = "host", .initfn = aarch64_host_initfn }, -@@ -1235,8 +1242,13 @@ static void aarch64_cpu_instance_init(Object *obj) +@@ -1318,8 +1327,13 @@ static void aarch64_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -505,10 +521,10 @@ index 78e27f778a..1a16c9dccc 100644 void aarch64_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 10d91c4ef0..33cbc2cfe8 100644 +index a528ff9a3d..053f70e399 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -144,10 +144,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +@@ -148,10 +148,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) } #endif /* !CONFIG_USER_ONLY */ @@ -520,7 +536,7 @@ index 10d91c4ef0..33cbc2cfe8 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -501,7 +501,6 @@ static void cortex_a9_initfn(Object *obj) +@@ -505,7 +505,6 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } @@ -528,7 +544,7 @@ index 10d91c4ef0..33cbc2cfe8 100644 #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -526,7 +525,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { +@@ -530,7 +529,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; @@ -536,7 +552,7 @@ index 10d91c4ef0..33cbc2cfe8 100644 static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -575,7 +573,6 @@ static void cortex_a7_initfn(Object *obj) +@@ -579,7 +577,6 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } @@ -544,7 +560,7 @@ index 10d91c4ef0..33cbc2cfe8 100644 static void cortex_a15_initfn(Object *obj) { -@@ -622,7 +619,6 @@ static void cortex_a15_initfn(Object *obj) +@@ -628,7 +625,6 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -552,7 +568,7 @@ index 10d91c4ef0..33cbc2cfe8 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1063,7 +1059,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1070,7 +1066,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -560,7 +576,7 @@ index 10d91c4ef0..33cbc2cfe8 100644 #ifndef TARGET_AARCH64 /* -@@ -1131,7 +1126,6 @@ static void arm_max_initfn(Object *obj) +@@ -1138,7 +1133,6 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -568,7 +584,7 @@ index 10d91c4ef0..33cbc2cfe8 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1147,9 +1141,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1154,9 +1148,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -578,7 +594,7 @@ index 10d91c4ef0..33cbc2cfe8 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1180,7 +1172,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1187,7 +1179,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -586,16 +602,16 @@ index 10d91c4ef0..33cbc2cfe8 100644 #ifndef TARGET_AARCH64 { .name = "max", .initfn = arm_max_initfn }, #endif -@@ -1208,3 +1199,4 @@ static void arm_tcg_cpu_register_types(void) +@@ -1215,3 +1206,4 @@ static void arm_tcg_cpu_register_types(void) type_init(arm_tcg_cpu_register_types) #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ +#endif /* disabled for RHEL */ diff --git a/target/arm/helper.c b/target/arm/helper.c -index d7bc467a2a..a91494b7d3 100644 +index d8c8223ec3..ad9d235773 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c -@@ -8213,6 +8213,7 @@ void arm_cpu_list(void) +@@ -8476,6 +8476,7 @@ void arm_cpu_list(void) static void arm_cpu_add_definition(gpointer data, gpointer user_data) { ObjectClass *oc = data; @@ -603,7 +619,7 @@ index d7bc467a2a..a91494b7d3 100644 CpuDefinitionInfoList **cpu_list = user_data; CpuDefinitionInfo *info; const char *typename; -@@ -8222,6 +8223,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) +@@ -8485,6 +8486,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) info->name = g_strndup(typename, strlen(typename) - strlen("-" TYPE_ARM_CPU)); info->q_typename = g_strdup(typename); diff --git a/0009-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch similarity index 95% rename from 0009-Add-ppc64-machine-types.patch rename to 0008-Add-ppc64-machine-types.patch index 28548b3..a3cb0a3 100644 --- a/0009-Add-ppc64-machine-types.patch +++ b/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 77a23381d2a445ee499c4335816f3df08d545aed Mon Sep 17 00:00:00 2001 +From 2c523f1b6c9470e1cd517ba99e414cde02727e16 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -34,10 +34,10 @@ Merged patches (7.1.0 rc0): 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index bc9ba6e6dc..5d0989f87a 100644 +index 66b414d2e9..499eb49253 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1633,6 +1633,9 @@ static void spapr_machine_reset(MachineState *machine) +@@ -1633,6 +1633,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -47,7 +47,7 @@ index bc9ba6e6dc..5d0989f87a 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3344,6 +3347,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3347,6 +3350,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -68,7 +68,7 @@ index bc9ba6e6dc..5d0989f87a 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3422,6 +3439,12 @@ static void spapr_instance_init(Object *obj) +@@ -3425,6 +3442,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -81,7 +81,7 @@ index bc9ba6e6dc..5d0989f87a 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4679,6 +4702,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4682,6 +4705,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -89,15 +89,15 @@ index bc9ba6e6dc..5d0989f87a 100644 } static const TypeInfo spapr_machine_info = { -@@ -4730,6 +4754,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4733,6 +4757,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-7.1 + * pseries-7.2 */ -@@ -4868,6 +4893,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4882,6 +4907,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -105,7 +105,7 @@ index bc9ba6e6dc..5d0989f87a 100644 /* * pseries-4.0 -@@ -4887,6 +4913,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4901,6 +4927,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; return true; } @@ -114,7 +114,7 @@ index bc9ba6e6dc..5d0989f87a 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5214,6 +5242,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5228,6 +5256,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -375,7 +375,7 @@ index fcb5dfe792..ab8fb5bf62 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 530d739b1d..6f96972392 100644 +index 04a95669ab..d5f4cf5e03 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -154,6 +154,7 @@ struct SpaprMachineClass { @@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644 { "405cr", "405crc" }, { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index a4c893cfad..c6575493b7 100644 +index 81d4263a07..508fbed90b 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1464,6 +1464,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1467,6 +1467,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -446,7 +446,7 @@ index a4c893cfad..c6575493b7 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 466d0d2f4c..22c100e227 100644 +index 7c25348b7b..83671c955f 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -89,6 +89,7 @@ static int cap_ppc_nested_kvm_hv; @@ -482,9 +482,9 @@ index 466d0d2f4c..22c100e227 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2966,3 +2978,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2970,3 +2982,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + void kvm_arch_accel_class_init(ObjectClass *oc) { - return true; } + +void kvmppc_svm_allow(Error **errp) diff --git a/0010-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch similarity index 93% rename from 0010-Add-s390x-machine-types.patch rename to 0009-Add-s390x-machine-types.patch index 584b8e0..5860009 100644 --- a/0010-Add-s390x-machine-types.patch +++ b/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 6a14fc5e35a8cec7f049c203d6dc2390fac175f1 Mon Sep 17 00:00:00 2001 +From 1973257ed781a93943f27f1518933e8c09c50f88 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -27,26 +27,29 @@ Merged patches (7.1.0 rc0): - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/s390x/s390-virtio-ccw.c chunk) - c8ad21ca31 redhat: Update s390x machine type compatibility for rebase to QEMU 7.0.0 - 5bcf8d874c target/s390x: deprecate CPUs older than z14 + +Merged patches (7.2.0 rc0): +- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) --- - hw/s390x/s390-virtio-ccw.c | 107 +++++++++++++++++++++++++++++++ + hw/s390x/s390-virtio-ccw.c | 108 +++++++++++++++++++++++++++++++ target/s390x/cpu_models.c | 11 ++++ target/s390x/cpu_models.h | 2 + target/s390x/cpu_models_sysemu.c | 2 + - 4 files changed, 122 insertions(+) + 4 files changed, 123 insertions(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index cc3097bfee..4c68d72000 100644 +index 2e64ffab45..8d5221fbb1 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -792,6 +792,7 @@ bool css_migration_enabled(void) +@@ -823,6 +823,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_7_1_instance_options(MachineState *machine) + static void ccw_machine_7_2_instance_options(MachineState *machine) { } -@@ -1131,6 +1132,112 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1186,6 +1187,113 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); @@ -62,6 +65,7 @@ index cc3097bfee..4c68d72000 100644 + +static void ccw_machine_rhel900_class_options(MachineClass *mc) +{ ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); +} +DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); @@ -205,7 +209,7 @@ index c3a4f80633..739770dc15 100644 static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h -index 74d1f87e4f..372160bcd7 100644 +index fb1adc8b21..d76745afa9 100644 --- a/target/s390x/cpu_models.h +++ b/target/s390x/cpu_models.h @@ -38,6 +38,8 @@ struct S390CPUDef { diff --git a/0011-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch similarity index 95% rename from 0011-Add-x86_64-machine-types.patch rename to 0010-Add-x86_64-machine-types.patch index 75c17f0..181342a 100644 --- a/0011-Add-x86_64-machine-types.patch +++ b/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 68c1bbec1fae27b527f85f2666c54fff3d499eaf Mon Sep 17 00:00:00 2001 +From 0935624ccdddc286d6eeeb0c1b70d78983c21aa2 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -36,10 +36,13 @@ Merged patches (7.1.0 rc0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/i386/pc.c chunk) - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (x86_64 specific changes) - 35b5c8554f target/i386: deprecate CPUs older than x86_64-v2 ABI + +Merged patches (7.2.0 rc0): +- 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) --- hw/i386/pc.c | 147 ++++++++++++++++++++++- - hw/i386/pc_piix.c | 83 ++++++++++++- - hw/i386/pc_q35.c | 231 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 86 +++++++++++++- + hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++++++++++- hw/s390x/s390-virtio-ccw.c | 1 + include/hw/boards.h | 2 + include/hw/i386/pc.h | 27 +++++ @@ -47,13 +50,13 @@ Merged patches (7.1.0 rc0): target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 10 files changed, 515 insertions(+), 7 deletions(-) + 10 files changed, 521 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 7280c02ce3..09b62db9e9 100644 +index 546b703cb4..c7b1350e64 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -390,6 +390,149 @@ GlobalProperty pc_compat_1_4[] = { +@@ -393,6 +393,149 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -203,7 +206,7 @@ index 7280c02ce3..09b62db9e9 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1910,6 +2053,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1907,6 +2050,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -211,7 +214,7 @@ index 7280c02ce3..09b62db9e9 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1920,7 +2064,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1917,7 +2061,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -222,10 +225,10 @@ index 7280c02ce3..09b62db9e9 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 1ec5d6a4f8..52111697cb 100644 +index 0985ff67d2..173a1fd10b 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -51,6 +51,7 @@ +@@ -53,6 +53,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -233,7 +236,7 @@ index 1ec5d6a4f8..52111697cb 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -182,8 +183,8 @@ static void pc_init1(MachineState *machine, +@@ -184,8 +185,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -244,7 +247,7 @@ index 1ec5d6a4f8..52111697cb 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -331,6 +332,7 @@ static void pc_init1(MachineState *machine, +@@ -334,6 +335,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -252,7 +255,7 @@ index 1ec5d6a4f8..52111697cb 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -880,3 +882,80 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -896,3 +898,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -301,6 +304,9 @@ index 1ec5d6a4f8..52111697cb 100644 + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ pcmc->legacy_no_rng_seed = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_1, ++ hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, + hw_compat_rhel_9_0_len); + compat_props_add(m->compat_props, pc_rhel_9_0_compat, @@ -334,7 +340,7 @@ index 1ec5d6a4f8..52111697cb 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index ef471f6664..4e7f1a707c 100644 +index ea582254e3..97c3630021 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) @@ -356,7 +362,7 @@ index ef471f6664..4e7f1a707c 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -654,3 +655,229 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -666,3 +667,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -397,6 +403,9 @@ index ef471f6664..4e7f1a707c 100644 + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; ++ pcmc->legacy_no_rng_seed = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_1, ++ hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, + hw_compat_rhel_9_0_len); + compat_props_add(m->compat_props, pc_rhel_9_0_compat, @@ -587,10 +596,10 @@ index ef471f6664..4e7f1a707c 100644 +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 4c68d72000..e1e6b6b5a7 100644 +index 8d5221fbb1..ba640e3d9e 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -1157,6 +1157,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) +@@ -1213,6 +1213,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) static void ccw_machine_rhel860_class_options(MachineClass *mc) { ccw_machine_rhel900_class_options(mc); @@ -599,7 +608,7 @@ index 4c68d72000..e1e6b6b5a7 100644 /* All RHEL machines for prior major releases are deprecated */ mc->deprecation_reason = rhel_old_machine_deprecation; diff --git a/include/hw/boards.h b/include/hw/boards.h -index bfd757c561..0d22c19f4a 100644 +index 2209d4e416..fd75f551b1 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -266,6 +266,8 @@ struct MachineClass { @@ -612,10 +621,10 @@ index bfd757c561..0d22c19f4a 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 4d9e95a091..f96bf85df1 100644 +index 3754eaa97d..4266fe2fdb 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -290,6 +290,33 @@ extern const size_t pc_compat_1_5_len; +@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; @@ -650,7 +659,7 @@ index 4d9e95a091..f96bf85df1 100644 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 1db1278a59..db97eeb8b0 100644 +index 22b681ca37..f7c526cbe6 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1832,9 +1832,13 @@ static const CPUCaches epyc_milan_cache_info = { @@ -816,10 +825,10 @@ index 7237378a7d..7b8a3d5af0 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index f148a6d52f..4e5d4bafc4 100644 +index a213209379..81526a1575 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3626,6 +3626,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3707,6 +3707,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -827,7 +836,7 @@ index f148a6d52f..4e5d4bafc4 100644 kvm_msr_buf_reset(cpu); -@@ -3981,6 +3982,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4062,6 +4063,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; diff --git a/0012-Enable-make-check.patch b/0011-Enable-make-check.patch similarity index 94% rename from 0012-Enable-make-check.patch rename to 0011-Enable-make-check.patch index 3987a1c..d0be8e6 100644 --- a/0012-Enable-make-check.patch +++ b/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 0833b7e925c98253c90c9de18758517f2778f77d Mon Sep 17 00:00:00 2001 +From badfb1290c8eea8a2e1769b2392c7899d5077698 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -48,7 +48,7 @@ Merged patches (7.1.0 rc0): 12 files changed, 18 insertions(+), 19 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index 0b2b0dc692..3a7b5f0748 100644 +index 00a26e4a0c..fe5ecf238a 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py @@ -147,7 +147,7 @@ def test_aarch64_virt(self): @@ -118,20 +118,20 @@ index 5052883fb6..b5286f4b12 100644 qtest_outl(s, 0xcf8, 0x80001010); qtest_outl(s, 0xcfc, 0xe1020000); diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c -index 71c91b0356..dae4139c17 100644 +index e37b48b2cc..88647da054 100644 --- a/tests/qtest/fuzz-virtio-scsi-test.c +++ b/tests/qtest/fuzz-virtio-scsi-test.c @@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) { QTestState *s; -- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " -+ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " +- s = qtest_init("-M pc-q35-5.2 -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -m 512M " "-device virtio-scsi,num_queues=8,addr=03.0 "); qtest_outl(s, 0xcf8, 0x80001811); diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c -index a58c98e4d1..c8387e39ce 100644 +index d4a8db6fd6..1a796ec15a 100644 --- a/tests/qtest/intel-hda-test.c +++ b/tests/qtest/intel-hda-test.c @@ -38,7 +38,7 @@ static void test_issue542_ich6(void) @@ -144,18 +144,18 @@ index a58c98e4d1..c8387e39ce 100644 qtest_outl(s, 0xcf8, 0x80000804); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index fd5d6e5ae1..d9409560cd 100644 +index 32f028872c..1e78a1a055 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build -@@ -44,7 +44,7 @@ libqos_srcs = files( +@@ -43,7 +43,7 @@ libqos_srcs = files( 'virtio-rng.c', 'virtio-scsi.c', 'virtio-serial.c', - 'virtio-iommu.c', +# 'virtio-iommu.c', + 'virtio-gpio.c', 'generic-pcihost.c', - # qgraph machines: diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c index 8ac95b89f7..cd2102555c 100644 --- a/tests/qtest/lpc-ich9-test.c @@ -170,10 +170,10 @@ index 8ac95b89f7..cd2102555c 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index be4b30dea2..2c3d62a9fe 100644 +index c07a5b1a5f..9df3f9f8b9 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -76,7 +76,6 @@ qtests_i386 = \ +@@ -82,7 +82,6 @@ qtests_i386 = \ config_all_devices.has_key('CONFIG_Q35') and \ config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ slirp.found() ? ['virtio-net-failover'] : []) + \ @@ -181,7 +181,7 @@ index be4b30dea2..2c3d62a9fe 100644 qtests_pci + \ qtests_cxl + \ ['fdc-test', -@@ -90,7 +89,6 @@ qtests_i386 = \ +@@ -96,7 +95,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -189,7 +189,7 @@ index be4b30dea2..2c3d62a9fe 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -212,15 +210,13 @@ qtests_arm = \ +@@ -209,15 +207,13 @@ qtests_arm = \ # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional qtests_aarch64 = \ @@ -206,7 +206,7 @@ index be4b30dea2..2c3d62a9fe 100644 qtests_s390x = \ (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ -@@ -228,7 +224,6 @@ qtests_s390x = \ +@@ -225,7 +221,6 @@ qtests_s390x = \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ ['boot-serial-test', 'drive_del-test', @@ -245,7 +245,7 @@ index 10ef9d2a91..3855873050 100644 qtest_start("-device nec-usb-xhci,id=xhci" diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c -index 443ee56de9..1bccb3bab9 100644 +index 4a809590bf..1bf3fa641c 100644 --- a/tests/qtest/virtio-net-failover.c +++ b/tests/qtest/virtio-net-failover.c @@ -25,6 +25,7 @@ diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch similarity index 98% rename from 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch rename to 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 65d06ee..477a75d 100644 --- a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 2092b90c5d7791bedbdb4ba067c90ae44d355e66 Mon Sep 17 00:00:00 2001 +From 0804844e4755377be6d2ebad578794ad9f4f3f31 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned diff --git a/0014-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch similarity index 88% rename from 0014-Add-support-statement-to-help-output.patch rename to 0013-Add-support-statement-to-help-output.patch index b040f61..022f194 100644 --- a/0014-Add-support-statement-to-help-output.patch +++ b/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 793720efdf835b13246f02191f6c07a60a726841 Mon Sep 17 00:00:00 2001 +From 283a0e258dc2f3b83c58e6f948bafe430cd2c1d5 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,10 +21,10 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 706bd7cff7..e08ef71108 100644 +index 5115221efe..17188df528 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c -@@ -836,9 +836,17 @@ static void version(void) +@@ -834,9 +834,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index 706bd7cff7..e08ef71108 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", g_get_prgname()); -@@ -864,6 +872,7 @@ static void help(int exitcode) +@@ -862,6 +870,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); diff --git a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch similarity index 91% rename from 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch rename to 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index 02ec067..e39555b 100644 --- a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 62589d41546b4200fe5cff7504514bbe3aa72e45 Mon Sep 17 00:00:00 2001 +From d8ded821aa698b3b03bd9089fbd6c2b33da87b9e Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 31c04f7eea..1290fab5ba 100644 +index 7f99d15b23..ea02ca3a45 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3250,11 +3250,11 @@ SRST +@@ -3300,11 +3300,11 @@ SRST :: @@ -54,7 +54,7 @@ index 31c04f7eea..1290fab5ba 100644 + -netdev type=vhost-user,id=net0,chardev=chr0 \ + -device virtio-net-pci,netdev=net0 - ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` + ``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` Establish a vhost-vdpa netdev. -- 2.31.1 diff --git a/0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch similarity index 97% rename from 0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename to 0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch index e1c2169..2bedb0b 100644 --- a/0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ b/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -1,4 +1,4 @@ -From e0b9e638d4145e576409c754f525b83f630d7bb0 Mon Sep 17 00:00:00 2001 +From 9c6acadb444c9300d7c18b6939ce4f96484aeacc Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 6 Feb 2019 03:58:56 +0000 Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts diff --git a/0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch similarity index 94% rename from 0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch rename to 0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 3a0c99a..cee5476 100644 --- a/0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From e87482425c6d8dafe0bd447a1e68710ea6969906 Mon Sep 17 00:00:00 2001 +From 02fde2a0cbd679ebd4104fe5522572c31ec23abd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,10 +44,10 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index c6c6692fb7..bccfbc0024 100644 +index 4d6666d3ff..d2ba263e9d 100644 --- a/block/qcow2.c +++ b/block/qcow2.c -@@ -1335,6 +1335,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, +@@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, ret = -ENOTSUP; goto fail; } diff --git a/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch b/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch new file mode 100644 index 0000000..001880b --- /dev/null +++ b/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch @@ -0,0 +1,26 @@ +From 21ed34787b9492c2cfe3d8fc12a32748bcf02307 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 9 Nov 2022 07:08:32 -0500 +Subject: Addd 7.2 compat bits for RHEL 9.1 machine type + +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 9edec1ca05..3d851d34da 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -54,6 +54,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { + { "arm-gicv3-common", "force-8-bit-prio", "on" }, + /* hw_compat_rhel_9_1 from hw_compat_7_0 */ + { "nvme-ns", "eui64-default", "on"}, ++ /* hw_compat_rhel_9_1 from hw_compat_7_1 */ ++ { "virtio-device", "queue_reset", "false" }, + }; + const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); + +-- +2.31.1 + diff --git a/0018-Introduce-upstream-7.0-compat-changes.patch b/0018-Introduce-upstream-7.0-compat-changes.patch deleted file mode 100644 index fa3a289..0000000 --- a/0018-Introduce-upstream-7.0-compat-changes.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 0be2889fa221ba98bd436fa4b4542e57f623d31b Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 1 Jun 2022 08:09:04 -0400 -Subject: Introduce upstream 7.0 compat changes - -Addding upstream compat changes to 9.1 structure - -Signed-off-by: Miroslav Rezanina - ---- - -Rebase notes (weekly-220608): -- Added new 7.0 compat added upstream - -Rebase notes (QEMU 7.1.0 RC3): -- Setting legacy_no_rng_seed to true for RHEL machine types ---- - hw/arm/virt.c | 1 + - hw/core/machine.c | 11 +++++++++++ - hw/i386/pc_piix.c | 3 +++ - hw/i386/pc_q35.c | 3 +++ - hw/s390x/s390-virtio-ccw.c | 1 + - include/hw/boards.h | 3 +++ - 6 files changed, 22 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 9737b77eca..e80c5b7d8b 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3514,6 +3514,7 @@ static void rhel900_virt_options(MachineClass *mc) - VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); - - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ - vmc->no_tcg_lpa2 = true; -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 909f75770b..5a3867692d 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -46,6 +46,17 @@ - const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - -+/* -+ * Mostly the same as hw_compat_7_0 -+ */ -+GlobalProperty hw_compat_rhel_9_1[] = { -+ /* hw_compat_rhel_9_1 from hw_compat_7_0 */ -+ { "arm-gicv3-common", "force-8-bit-prio", "on" }, -+ /* hw_compat_rhel_9_1 from hw_compat_7_0 */ -+ { "nvme-ns", "eui64-default", "on"}, -+}; -+const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); -+ - /* - * Mostly the same as hw_compat_6_2 - */ -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 52111697cb..7f56ef4e81 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -927,6 +927,9 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->kvmclock_create_always = false; - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; -+ pcmc->legacy_no_rng_seed = true; -+ compat_props_add(m->compat_props, hw_compat_rhel_9_1, -+ hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, - hw_compat_rhel_9_0_len); - compat_props_add(m->compat_props, pc_rhel_9_0_compat, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 4e7f1a707c..f16dc3bff5 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -692,6 +692,9 @@ static void pc_q35_machine_rhel900_options(MachineClass *m) - m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.0.0"; -+ pcmc->legacy_no_rng_seed = true; -+ compat_props_add(m->compat_props, hw_compat_rhel_9_1, -+ hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, - hw_compat_rhel_9_0_len); - compat_props_add(m->compat_props, pc_rhel_9_0_compat, -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index e1e6b6b5a7..de748a0a57 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1144,6 +1144,7 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine) - - static void ccw_machine_rhel900_class_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); - } - DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 0d22c19f4a..130f1c3424 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -453,6 +453,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_9_1[]; -+extern const size_t hw_compat_rhel_9_1_len; -+ - extern GlobalProperty hw_compat_rhel_9_0[]; - extern const size_t hw_compat_rhel_9_0_len; - --- -2.31.1 - diff --git a/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch b/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch new file mode 100644 index 0000000..2642b30 --- /dev/null +++ b/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch @@ -0,0 +1,47 @@ +From 27c188c6a4cbd908269cf06affd24025708ecb5c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 17 Nov 2022 16:47:16 +0100 +Subject: redhat: Update s390x machine type compatibility for QEMU 7.2.0 update + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2143585 +Upstream Status: n/a (rhel-only) + +Add the compatibility handling for the rebase from QEMU 7.1 to 7.2, +i.e. the settings from ccw_machine_7_1_class_options() and +ccw_machine_7_1_instance_options() to the rhel9.1.0 machine type +(earlier settings have been added by previous rebases already). + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index ba640e3d9e..97e868ada0 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1195,12 +1195,21 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine) + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; + + s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); + } + + static void ccw_machine_rhel900_class_options(MachineClass *mc) + { ++ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); ++ s390mc->max_threads = S390_MAX_CPUS; + } + DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); + +-- +2.31.1 + diff --git a/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch b/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch new file mode 100644 index 0000000..cb69b93 --- /dev/null +++ b/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch @@ -0,0 +1,43 @@ +From c1a21266d8bed27f1ef1f705818fde5f9350b73f Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Wed, 23 Nov 2022 14:15:37 +0100 +Subject: redhat: aarch64: add rhel9.2.0 virt machine type + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2131982 +Upstream: RHEL only + +Signed-off-by: Cornelia Huck +--- + hw/arm/virt.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index dfcab40a73..0a94f31dd1 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3518,14 +3518,21 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + ++static void rhel920_virt_options(MachineClass *mc) ++{ ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++ + static void rhel900_virt_options(MachineClass *mc) + { + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + ++ rhel920_virt_options(mc); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; + } +-DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) ++DEFINE_RHEL_MACHINE(9, 0, 0) +-- +2.31.1 + diff --git a/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch b/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch new file mode 100644 index 0000000..144bd92 --- /dev/null +++ b/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch @@ -0,0 +1,62 @@ +From a932b8d4296066be01613ada84241b501488f99f Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Thu, 17 Nov 2022 17:03:24 +0100 +Subject: redhat: Add new rhel-9.2.0 s390x machine type + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2136473 +Upstream Status: n/a (rhel-only) + +RHEL 9.2 will be an EUS release - we want to have a new machine +type here to make sure that we have a spot where we can wire up +fixes later. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 97e868ada0..aa142a1a4e 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1190,10 +1190,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + + ++static void ccw_machine_rhel920_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel920_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); ++ + static void ccw_machine_rhel900_instance_options(MachineState *machine) + { + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; + ++ ccw_machine_rhel920_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); + s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); + } +@@ -1206,12 +1217,14 @@ static void ccw_machine_rhel900_class_options(MachineClass *mc) + { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, + }; + ++ ccw_machine_rhel920_class_options(mc); ++ + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + s390mc->max_threads = S390_MAX_CPUS; + } +-DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); ++DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false); + + static void ccw_machine_rhel860_instance_options(MachineState *machine) + { +-- +2.31.1 + diff --git a/0022-x86-rhel-9.2.0-machine-type.patch b/0022-x86-rhel-9.2.0-machine-type.patch new file mode 100644 index 0000000..8502b91 --- /dev/null +++ b/0022-x86-rhel-9.2.0-machine-type.patch @@ -0,0 +1,75 @@ +From f33ca8aed4744238230f1f2cc47df77aa4c9e0ac Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 17 Nov 2022 12:36:30 +0000 +Subject: x86: rhel 9.2.0 machine type + +Add a 9.2.0 x86 machine type, and fix up the compatibility +for 9.0.0 and older. + +pc_compat_7_1 and pc_compat_7_0 are both empty upstream so there's +nothing to do there. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 21 ++++++++++++++++++++- + 2 files changed, 21 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 173a1fd10b..fc06877344 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -944,6 +944,7 @@ static void pc_machine_rhel760_options(MachineClass *m) + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; + pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 97c3630021..52cfe3bf45 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -692,6 +692,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel920(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel920_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.2.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, ++ pc_q35_machine_rhel920_options); ++ + static void pc_q35_init_rhel900(MachineState *machine) + { + pc_q35_init(machine); +@@ -700,11 +717,13 @@ static void pc_q35_init_rhel900(MachineState *machine) + static void pc_q35_machine_rhel900_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel920_options(m); + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; + pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +-- +2.31.1 + diff --git a/kvm-Revert-Re-enable-capstone-internal-build.patch b/kvm-Revert-Re-enable-capstone-internal-build.patch deleted file mode 100644 index 3dbb5ca..0000000 --- a/kvm-Revert-Re-enable-capstone-internal-build.patch +++ /dev/null @@ -1,252 +0,0 @@ -From 4ce18f26f30cfb8860153825c504289f43800f5e Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 19 Sep 2022 03:23:41 -0400 -Subject: [PATCH 28/29] Revert "Re-enable capstone internal build" - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 119: Use capstone package for qemu-kvm build -RH-Bugzilla: 2127825 -RH-Acked-by: Thomas Huth -RH-Commit: [1/2] bd58ace2233e3071703a69ea9e7bfcd82416cda1 (mrezanin/centos-src-qemu-kvm) - -This reverts commit c2c10b636a97d1cb9c4abbc4152a34ebf2f44817. - -Signed-off-by: Miroslav Rezanina ---- - configure | 12 ---- - meson.build | 116 ++-------------------------------- - meson_options.txt | 3 +- - scripts/meson-buildoptions.sh | 5 +- - 4 files changed, 7 insertions(+), 129 deletions(-) - -diff --git a/configure b/configure -index 448b0c82cb..72ab03f11a 100755 ---- a/configure -+++ b/configure -@@ -322,10 +322,8 @@ vfio_user_server="disabled" - - # 1. Track which submodules are needed - if test "$default_feature" = no ; then -- capstone="disabled" - slirp="disabled" - else -- capstone="auto" - slirp="auto" - fi - fdt="auto" -@@ -904,15 +902,6 @@ for opt do - --enable-uuid|--disable-uuid) - echo "$0: $opt is obsolete, UUID support is always built" >&2 - ;; -- --disable-capstone) capstone="disabled" -- ;; -- --enable-capstone) capstone="enabled" -- ;; -- --enable-capstone=git) capstone="internal" -- ;; -- --enable-capstone=*) capstone="$optarg" -- ;; -- - --with-git=*) git="$optarg" - ;; - --with-git-submodules=*) -@@ -2753,7 +2742,6 @@ if test "$skip_meson" = no; then - test "$werror" = yes && meson_option_add -Dwerror=true - - # QEMU options -- test "$capstone" != auto && meson_option_add "-Dcapstone=$capstone" - test "$cfi" != false && meson_option_add "-Dcfi=$cfi" - test "$fdt" != auto && meson_option_add "-Dfdt=$fdt" - test -n "${LIB_FUZZING_ENGINE+xxx}" && meson_option_add "-Dfuzzing_engine=$LIB_FUZZING_ENGINE" -diff --git a/meson.build b/meson.build -index 9e6a979c13..20fddbd707 100644 ---- a/meson.build -+++ b/meson.build -@@ -2596,13 +2596,10 @@ genh += custom_target('config-poison.h', - ############## - - capstone = not_found --capstone_opt = get_option('capstone') --if capstone_opt in ['enabled', 'auto', 'system'] -- have_internal = fs.exists(meson.current_source_dir() / 'capstone/Makefile') -+if not get_option('capstone').auto() or have_system or have_user - capstone = dependency('capstone', version: '>=3.0.5', - kwargs: static_kwargs, method: 'pkg-config', -- required: capstone_opt == 'system' or -- capstone_opt == 'enabled' and not have_internal) -+ required: get_option('capstone')) - - # Some versions of capstone have broken pkg-config file - # that reports a wrong -I path, causing the #include to -@@ -2611,113 +2608,10 @@ if capstone_opt in ['enabled', 'auto', 'system'] - if capstone.found() and not cc.compiles('#include ', - dependencies: [capstone]) - capstone = not_found -- if capstone_opt == 'system' -- error('system capstone requested, it does not appear to work') -+ if get_option('capstone').enabled() -+ error('capstone requested, but it does not appear to work') - endif - endif -- -- if capstone.found() -- capstone_opt = 'system' -- elif have_internal -- capstone_opt = 'internal' -- else -- capstone_opt = 'disabled' -- endif --endif --if capstone_opt == 'internal' -- capstone_data = configuration_data() -- capstone_data.set('CAPSTONE_USE_SYS_DYN_MEM', '1') -- -- capstone_files = files( -- 'capstone/cs.c', -- 'capstone/MCInst.c', -- 'capstone/MCInstrDesc.c', -- 'capstone/MCRegisterInfo.c', -- 'capstone/SStream.c', -- 'capstone/utils.c' -- ) -- -- if 'CONFIG_ARM_DIS' in config_all_disas -- capstone_data.set('CAPSTONE_HAS_ARM', '1') -- capstone_files += files( -- 'capstone/arch/ARM/ARMDisassembler.c', -- 'capstone/arch/ARM/ARMInstPrinter.c', -- 'capstone/arch/ARM/ARMMapping.c', -- 'capstone/arch/ARM/ARMModule.c' -- ) -- endif -- -- # FIXME: This config entry currently depends on a c++ compiler. -- # Which is needed for building libvixl, but not for capstone. -- if 'CONFIG_ARM_A64_DIS' in config_all_disas -- capstone_data.set('CAPSTONE_HAS_ARM64', '1') -- capstone_files += files( -- 'capstone/arch/AArch64/AArch64BaseInfo.c', -- 'capstone/arch/AArch64/AArch64Disassembler.c', -- 'capstone/arch/AArch64/AArch64InstPrinter.c', -- 'capstone/arch/AArch64/AArch64Mapping.c', -- 'capstone/arch/AArch64/AArch64Module.c' -- ) -- endif -- -- if 'CONFIG_PPC_DIS' in config_all_disas -- capstone_data.set('CAPSTONE_HAS_POWERPC', '1') -- capstone_files += files( -- 'capstone/arch/PowerPC/PPCDisassembler.c', -- 'capstone/arch/PowerPC/PPCInstPrinter.c', -- 'capstone/arch/PowerPC/PPCMapping.c', -- 'capstone/arch/PowerPC/PPCModule.c' -- ) -- endif -- -- if 'CONFIG_S390_DIS' in config_all_disas -- capstone_data.set('CAPSTONE_HAS_SYSZ', '1') -- capstone_files += files( -- 'capstone/arch/SystemZ/SystemZDisassembler.c', -- 'capstone/arch/SystemZ/SystemZInstPrinter.c', -- 'capstone/arch/SystemZ/SystemZMapping.c', -- 'capstone/arch/SystemZ/SystemZModule.c', -- 'capstone/arch/SystemZ/SystemZMCTargetDesc.c' -- ) -- endif -- -- if 'CONFIG_I386_DIS' in config_all_disas -- capstone_data.set('CAPSTONE_HAS_X86', 1) -- capstone_files += files( -- 'capstone/arch/X86/X86Disassembler.c', -- 'capstone/arch/X86/X86DisassemblerDecoder.c', -- 'capstone/arch/X86/X86ATTInstPrinter.c', -- 'capstone/arch/X86/X86IntelInstPrinter.c', -- 'capstone/arch/X86/X86InstPrinterCommon.c', -- 'capstone/arch/X86/X86Mapping.c', -- 'capstone/arch/X86/X86Module.c' -- ) -- endif -- -- configure_file(output: 'capstone-defs.h', configuration: capstone_data) -- -- capstone_cargs = [ -- # FIXME: There does not seem to be a way to completely replace the c_args -- # that come from add_project_arguments() -- we can only add to them. -- # So: disable all warnings with a big hammer. -- '-Wno-error', '-w', -- -- # Include all configuration defines via a header file, which will wind up -- # as a dependency on the object file, and thus changes here will result -- # in a rebuild. -- '-include', 'capstone-defs.h', -- -- '-Wp,-D_GLIBCXX_ASSERTIONS', -- -- ] -- -- libcapstone = static_library('capstone', -- build_by_default: false, -- sources: capstone_files, -- c_args: capstone_cargs, -- include_directories: 'capstone/include') -- capstone = declare_dependency(link_with: libcapstone, -- include_directories: 'capstone/include/capstone') - endif - - slirp = not_found -@@ -4083,7 +3977,7 @@ summary_info += {'bzip2 support': libbzip2} - summary_info += {'lzfse support': liblzfse} - summary_info += {'zstd support': zstd} - summary_info += {'NUMA host support': numa} --summary_info += {'capstone': capstone_opt == 'internal' ? capstone_opt : capstone} -+summary_info += {'capstone': capstone} - summary_info += {'libpmem support': libpmem} - summary_info += {'libdaxctl support': libdaxctl} - summary_info += {'libudev': libudev} -diff --git a/meson_options.txt b/meson_options.txt -index 7cd920fcd6..e58e158396 100644 ---- a/meson_options.txt -+++ b/meson_options.txt -@@ -262,8 +262,7 @@ option('libvduse', type: 'feature', value: 'auto', - option('vduse_blk_export', type: 'feature', value: 'auto', - description: 'VDUSE block export support') - --option('capstone', type: 'combo', value: 'auto', -- choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], -+option('capstone', type: 'feature', value: 'auto', - description: 'Whether and how to find the capstone library') - option('slirp', type: 'combo', value: 'auto', - choices: ['disabled', 'enabled', 'auto', 'system', 'internal'], -diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh -index b1001aa1db..359b04e0e6 100644 ---- a/scripts/meson-buildoptions.sh -+++ b/scripts/meson-buildoptions.sh -@@ -16,9 +16,6 @@ meson_options_help() { - printf "%s\n" ' --enable-block-drv-whitelist-in-tools' - printf "%s\n" ' use block whitelist also in tools instead of only' - printf "%s\n" ' QEMU' -- printf "%s\n" ' --enable-capstone[=CHOICE]' -- printf "%s\n" ' Whether and how to find the capstone library' -- printf "%s\n" ' (choices: auto/disabled/enabled/internal/system)' - printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' - printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' - printf "%s\n" ' --enable-debug-mutex mutex debugging support' -@@ -78,6 +75,7 @@ meson_options_help() { - printf "%s\n" ' bzip2 bzip2 support for DMG images' - printf "%s\n" ' canokey CanoKey support' - printf "%s\n" ' cap-ng cap_ng support' -+ printf "%s\n" ' capstone Whether and how to find the capstone library' - printf "%s\n" ' cloop cloop image format support' - printf "%s\n" ' cocoa Cocoa user interface (macOS only)' - printf "%s\n" ' coreaudio CoreAudio sound support' -@@ -218,7 +216,6 @@ _meson_option_parse() { - --disable-cap-ng) printf "%s" -Dcap_ng=disabled ;; - --enable-capstone) printf "%s" -Dcapstone=enabled ;; - --disable-capstone) printf "%s" -Dcapstone=disabled ;; -- --enable-capstone=*) quote_sh "-Dcapstone=$2" ;; - --enable-cfi) printf "%s" -Dcfi=true ;; - --disable-cfi) printf "%s" -Dcfi=false ;; - --enable-cfi-debug) printf "%s" -Dcfi_debug=true ;; --- -2.31.1 - diff --git a/kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch b/kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch deleted file mode 100644 index 99d2be9..0000000 --- a/kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 7f5289f426b25cf1113a450a3aa311170ac30397 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Mon, 26 Sep 2022 11:32:06 -0400 -Subject: [PATCH] Revert "intel_iommu: Fix irqchip / X2APIC configuration - checks" - -RH-Author: Peter Xu -RH-MergeRequest: 121: Revert "intel_iommu: Fix irqchip / X2APIC configuration checks" -RH-Bugzilla: 2126095 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Jason Wang -RH-Commit: [1/1] 6218c5f81c205ab160f4ccdb203ae39d4af3339e (peterx/qemu-kvm) - -It's true that when vcpus<=255 we don't require the length of 32bit APIC -IDs. However here since we already have EIM=ON it means the hypervisor -will declare the VM as x2apic supported (e.g. VT-d ECAP register will have -EIM bit 4 set), so the guest should assume the APIC IDs are 32bits width -even if vcpus<=255. In short, commit 77250171bdc breaks any simple cmdline -that wants to boot a VM with >=9 but <=255 vcpus with: - - -device intel-iommu,intremap=on - -For anyone who does not want to enable x2apic, we can use eim=off in the -intel-iommu parameters to skip enabling KVM x2apic. - -This partly reverts commit 77250171bdc02aee106083fd2a068147befa1a38, while -keeping the valid bit on checking split irqchip, but revert the other change. - -One thing to mention is that this patch may break migration compatibility -of such VM, however that's probably the best thing we can do, because the -old behavior was simply wrong and not working for >8 vcpus. For <=8 vcpus, -there could be a light guest ABI change (by enabling KVM x2apic after this -patch), but logically it shouldn't affect the migration from working. - -Also, this is not the 1st commit to change x2apic behavior. Igor provided -a full history of how this evolved for the past few years: - -https://lore.kernel.org/qemu-devel/20220922154617.57d1a1fb@redhat.com/ - -Relevant commits for reference: - - fb506e701e ("intel_iommu: reject broken EIM", 2016-10-17) - c1bb5418e3 ("target/i386: Support up to 32768 CPUs without IRQ remapping", 2020-12-10) - 77250171bd ("intel_iommu: Fix irqchip / X2APIC configuration checks", 2022-05-16) - dc89f32d92 ("target/i386: Fix sanity check on max APIC ID / X2APIC enablement", 2022-05-16) - -We may want to have this for stable too (mostly for 7.1.0 only). Adding a -fixes tag. - -Cc: David Woodhouse -Cc: Claudio Fontana -Cc: Igor Mammedov -Fixes: 77250171bd ("intel_iommu: Fix irqchip / X2APIC configuration checks") -Signed-off-by: Peter Xu -Message-Id: <20220926153206.10881-1-peterx@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Igor Mammedov -(cherry picked from commit 20ca47429e96df84e7b2e741f740bfce8a813fb2) -Signed-off-by: Peter Xu ---- - hw/i386/intel_iommu.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index 05d53a1aa9..6524c2ee32 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -3818,6 +3818,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) - error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split"); - return false; - } -+ if (!kvm_enable_x2apic()) { -+ error_setg(errp, "eim=on requires support on the KVM side" -+ "(X2APIC_API, first shipped in v4.7)"); -+ return false; -+ } - } - - /* Currently only address widths supported are 39 and 48 bits */ --- -2.31.1 - diff --git a/kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch b/kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch deleted file mode 100644 index 1556ced..0000000 --- a/kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 2c9b536fac44c15c44af385ac1b440a9f5c05d01 Mon Sep 17 00:00:00 2001 -From: Keith Busch -Date: Thu, 29 Sep 2022 13:05:22 -0700 -Subject: [PATCH 1/2] block: move bdrv_qiov_is_aligned to file-posix - -RH-Author: Kevin Wolf -RH-MergeRequest: 123: block: Fix memory alignment of requests -RH-Bugzilla: 2143170 -RH-Acked-by: Alberto Faria -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/2] 77b6ed2aaedfbd3dba7769b9a999ab3743f642cd (kmwolf/centos-qemu-kvm) - -There is only user of bdrv_qiov_is_aligned(), so move the alignment -function to there and make it static. - -Signed-off-by: Keith Busch -Message-Id: <20220929200523.3218710-2-kbusch@meta.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit a7c5f67a78569f8c275ea4ea9962e9c79b9d03cb) -Signed-off-by: Kevin Wolf ---- - block/file-posix.c | 21 +++++++++++++++++++++ - block/io.c | 21 --------------------- - include/block/block-io.h | 1 - - 3 files changed, 21 insertions(+), 22 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index 48cd096624..e3f3de2780 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2061,6 +2061,27 @@ static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs, - return thread_pool_submit_co(pool, func, arg); - } - -+/* -+ * Check if all memory in this vector is sector aligned. -+ */ -+static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) -+{ -+ int i; -+ size_t alignment = bdrv_min_mem_align(bs); -+ IO_CODE(); -+ -+ for (i = 0; i < qiov->niov; i++) { -+ if ((uintptr_t) qiov->iov[i].iov_base % alignment) { -+ return false; -+ } -+ if (qiov->iov[i].iov_len % alignment) { -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, QEMUIOVector *qiov, int type) - { -diff --git a/block/io.c b/block/io.c -index 0a8cbefe86..96edc7f7cb 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -3236,27 +3236,6 @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) - return mem; - } - --/* -- * Check if all memory in this vector is sector aligned. -- */ --bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) --{ -- int i; -- size_t alignment = bdrv_min_mem_align(bs); -- IO_CODE(); -- -- for (i = 0; i < qiov->niov; i++) { -- if ((uintptr_t) qiov->iov[i].iov_base % alignment) { -- return false; -- } -- if (qiov->iov[i].iov_len % alignment) { -- return false; -- } -- } -- -- return true; --} -- - void bdrv_io_plug(BlockDriverState *bs) - { - BdrvChild *child; -diff --git a/include/block/block-io.h b/include/block/block-io.h -index fd25ffa9be..492f95fc05 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -150,7 +150,6 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size); - void *qemu_blockalign0(BlockDriverState *bs, size_t size); - void *qemu_try_blockalign(BlockDriverState *bs, size_t size); - void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); --bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); - - void bdrv_enable_copy_on_read(BlockDriverState *bs); - void bdrv_disable_copy_on_read(BlockDriverState *bs); --- -2.31.1 - diff --git a/kvm-block-use-the-request-length-for-iov-alignment.patch b/kvm-block-use-the-request-length-for-iov-alignment.patch deleted file mode 100644 index 512e1d8..0000000 --- a/kvm-block-use-the-request-length-for-iov-alignment.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 7e334715074c7a4090578ed178834f3318d4b969 Mon Sep 17 00:00:00 2001 -From: Keith Busch -Date: Thu, 29 Sep 2022 13:05:23 -0700 -Subject: [PATCH 2/2] block: use the request length for iov alignment - -RH-Author: Kevin Wolf -RH-MergeRequest: 123: block: Fix memory alignment of requests -RH-Bugzilla: 2143170 -RH-Acked-by: Alberto Faria -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [2/2] 50cfd394fff4dbad87d7c90c987e241ed2367746 (kmwolf/centos-qemu-kvm) - -An iov length needs to be aligned to the logical block size, which may -be larger than the memory alignment. - -Tested-by: Jens Axboe -Signed-off-by: Keith Busch -Message-Id: <20220929200523.3218710-3-kbusch@meta.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 25474d90aa50bd32e0de395a33d8de42dd6f2aef) -Signed-off-by: Kevin Wolf ---- - block/file-posix.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index e3f3de2780..af994aba2b 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -2068,13 +2068,14 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) - { - int i; - size_t alignment = bdrv_min_mem_align(bs); -+ size_t len = bs->bl.request_alignment; - IO_CODE(); - - for (i = 0; i < qiov->niov; i++) { - if ((uintptr_t) qiov->iov[i].iov_base % alignment) { - return false; - } -- if (qiov->iov[i].iov_len % alignment) { -+ if (qiov->iov[i].iov_len % len) { - return false; - } - } --- -2.31.1 - diff --git a/kvm-host-libusb-Remove-unused-variable.patch b/kvm-host-libusb-Remove-unused-variable.patch deleted file mode 100644 index c21fc57..0000000 --- a/kvm-host-libusb-Remove-unused-variable.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 529d7d039a8783cb5745330c9731626a608553db Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 9 Nov 2022 05:09:40 -0500 -Subject: [PATCH 3/3] host-libusb: Remove unused variable - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 122: Remove variables causing 'Unused but set variable' warning on Clang 15 -RH-Bugzilla: 2141218 -RH-Commit: [3/3] 460bd469b6f262074c407475c5003581f8993855 (mrezanin/centos-src-qemu-kvm) - -Variable unconnected used in usb_host_auto_check function is only incremented -but never read as line where it is read was disabled since introducing the code. -This causes 'Unused but set variable' warning on Clang 15.0.1 compiler. - -Removing the variable and disabled code to prevent the warning. - -Signed-off-by: Miroslav Rezanina ---- - hw/usb/host-libusb.c | 15 --------------- - 1 file changed, 15 deletions(-) - -diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c -index 28f8af8941..176868d345 100644 ---- a/hw/usb/host-libusb.c -+++ b/hw/usb/host-libusb.c -@@ -1837,7 +1837,6 @@ static void usb_host_auto_check(void *unused) - struct USBAutoFilter *f; - libusb_device **devs = NULL; - struct libusb_device_descriptor ddesc; -- int unconnected = 0; - int i, n; - - if (usb_host_init() != 0) { -@@ -1897,9 +1896,6 @@ static void usb_host_auto_check(void *unused) - libusb_free_device_list(devs, 1); - - QTAILQ_FOREACH(s, &hostdevs, next) { -- if (s->dh == NULL) { -- unconnected++; -- } - if (s->seen == 0) { - if (s->dh) { - usb_host_close(s); -@@ -1908,17 +1904,6 @@ static void usb_host_auto_check(void *unused) - } - s->seen = 0; - } -- --#if 0 -- if (unconnected == 0) { -- /* nothing to watch */ -- if (usb_auto_timer) { -- timer_del(usb_auto_timer); -- trace_usb_host_auto_scan_disabled(); -- } -- return; -- } --#endif - } - - if (!usb_vmstate) { --- -2.31.1 - diff --git a/kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch b/kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch deleted file mode 100644 index 0f4b726..0000000 --- a/kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 108b687eb18d121d688e652ac13ba465083f4529 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Mon, 5 Dec 2022 15:32:55 -0500 -Subject: [PATCH] hw/acpi/erst.c: Fix memory handling issues - -RH-Author: Jon Maloy -RH-MergeRequest: 125: ACPI ERST: memory corruption issues in read_erst_record and write_erst_record -RH-Bugzilla: 2149108 -RH-Acked-by: Gavin Shan -RH-Acked-by: Gerd Hoffmann -RH-Acked-by: Igor Mammedov -RH-Commit: [1/1] cccd8b6b8f0f360c623f913dbc02d4eda2fbf972 (jmaloy/jmaloy-qemu-kvm-centos) - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2149108 -Upstream: Merged -CVE: CVE-2022-4172 - -commit defb70980f6bed36100b74e84220f1764c0dd544 -Author: Christian A. Ehrhardt -Date: Mon Oct 24 17:42:33 2022 +0200 - - hw/acpi/erst.c: Fix memory handling issues - - - Fix memset argument order: The second argument is - the value, the length goes last. - - Fix an integer overflow reported by Alexander Bulekov. - - Both issues allow the guest to overrun the host buffer - allocated for the ERST memory device. - - Cc: Eric DeVolder - Cc: qemu-stable@nongnu.org - Fixes: f7e26ffa590 ("ACPI ERST: support for ACPI ERST feature") - Tested-by: Alexander Bulekov - Signed-off-by: Christian A. Ehrhardt - Message-Id: <20221024154233.1043347-1-lk@c--e.de> - Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1268 - Reviewed-by: Alexander Bulekov - Reviewed-by: Eric DeVolder - Reviewed-by: Michael S. Tsirkin - Signed-off-by: Michael S. Tsirkin - -(cherry picked from commit defb70980f6bed36100b74e84220f1764c0dd544) -Jon Maloy ---- - hw/acpi/erst.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c -index df856b2669..aefcc03ad6 100644 ---- a/hw/acpi/erst.c -+++ b/hw/acpi/erst.c -@@ -635,7 +635,7 @@ static unsigned read_erst_record(ERSTDeviceState *s) - if (record_length < UEFI_CPER_RECORD_MIN_SIZE) { - rc = STATUS_FAILED; - } -- if ((s->record_offset + record_length) > exchange_length) { -+ if (record_length > exchange_length - s->record_offset) { - rc = STATUS_FAILED; - } - /* If all is ok, copy the record to the exchange buffer */ -@@ -684,7 +684,7 @@ static unsigned write_erst_record(ERSTDeviceState *s) - if (record_length < UEFI_CPER_RECORD_MIN_SIZE) { - return STATUS_FAILED; - } -- if ((s->record_offset + record_length) > exchange_length) { -+ if (record_length > exchange_length - s->record_offset) { - return STATUS_FAILED; - } - -@@ -716,7 +716,7 @@ static unsigned write_erst_record(ERSTDeviceState *s) - if (nvram) { - /* Write the record into the slot */ - memcpy(nvram, exchange, record_length); -- memset(nvram + record_length, exchange_length - record_length, 0xFF); -+ memset(nvram + record_length, 0xFF, exchange_length - record_length); - /* If a new record, increment the record_count */ - if (!record_found) { - uint32_t record_count; --- -2.37.3 - diff --git a/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch b/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch deleted file mode 100644 index 2e9fa92..0000000 --- a/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch +++ /dev/null @@ -1,66 +0,0 @@ -From aba2a5cb19efa33be871dd951366439cf99c5f13 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Thu, 18 Aug 2022 17:01:13 +0200 -Subject: [PATCH 27/29] i386: do kvm_put_msr_feature_control() first thing when - vCPU is reset - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 118: Synchronize qemu-kvm-7.0.0-13.el9 -RH-Bugzilla: 2125281 -RH-Acked-by: Vitaly Kuznetsov -RH-Commit: [2/2] 4986c35a04255c8fe2b62a48f5ea489339f3826a (mrezanin/centos-src-qemu-kvm) - -kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when -it is in VMX root operation. Do kvm_put_msr_feature_control() before -kvm_put_sregs2() to (possibly) kick vCPU out of VMX root operation. It also -seems logical to do kvm_put_msr_feature_control() before -kvm_put_nested_state() and not after it, especially when 'real' nested -state is set. - -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20220818150113.479917-3-vkuznets@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 45ed68a1a3a19754ade954d75a3c9d13ff560e5c) -Signed-off-by: Vitaly Kuznetsov ---- - target/i386/kvm/kvm.c | 17 ++++++++++++----- - 1 file changed, 12 insertions(+), 5 deletions(-) - -diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index fd3237310b..a9eba247a5 100644 ---- a/target/i386/kvm/kvm.c -+++ b/target/i386/kvm/kvm.c -@@ -4533,6 +4533,18 @@ int kvm_arch_put_registers(CPUState *cpu, int level) - - assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); - -+ /* -+ * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX -+ * root operation upon vCPU reset. kvm_put_msr_feature_control() should also -+ * preceed kvm_put_nested_state() when 'real' nested state is set. -+ */ -+ if (level >= KVM_PUT_RESET_STATE) { -+ ret = kvm_put_msr_feature_control(x86_cpu); -+ if (ret < 0) { -+ return ret; -+ } -+ } -+ - /* must be before kvm_put_nested_state so that EFER.SVME is set */ - ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu); - if (ret < 0) { -@@ -4544,11 +4556,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level) - if (ret < 0) { - return ret; - } -- -- ret = kvm_put_msr_feature_control(x86_cpu); -- if (ret < 0) { -- return ret; -- } - } - - if (level == KVM_PUT_FULL_STATE) { --- -2.31.1 - diff --git a/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch b/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch deleted file mode 100644 index 27ccde7..0000000 --- a/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 6f650e08efc35cc04730bf99cea7be8d4faa6e74 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Thu, 18 Aug 2022 17:01:12 +0200 -Subject: [PATCH 26/29] i386: reset KVM nested state upon CPU reset - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 118: Synchronize qemu-kvm-7.0.0-13.el9 -RH-Bugzilla: 2125281 -RH-Acked-by: Vitaly Kuznetsov -RH-Commit: [1/2] b34da74a40fe32ef210c8127ba8bb032aaab6381 (mrezanin/centos-src-qemu-kvm) - -Make sure env->nested_state is cleaned up when a vCPU is reset, it may -be stale after an incoming migration, kvm_arch_put_registers() may -end up failing or putting vCPU in a weird state. - -Reviewed-by: Maxim Levitsky -Signed-off-by: Vitaly Kuznetsov -Message-Id: <20220818150113.479917-2-vkuznets@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 3cafdb67504a34a0305260f0c86a73d5a3fb000b) -Signed-off-by: Vitaly Kuznetsov ---- - target/i386/kvm/kvm.c | 37 +++++++++++++++++++++++++++---------- - 1 file changed, 27 insertions(+), 10 deletions(-) - -diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index 4e5d4bafc4..fd3237310b 100644 ---- a/target/i386/kvm/kvm.c -+++ b/target/i386/kvm/kvm.c -@@ -1695,6 +1695,30 @@ static void kvm_init_xsave(CPUX86State *env) - env->xsave_buf_len); - } - -+static void kvm_init_nested_state(CPUX86State *env) -+{ -+ struct kvm_vmx_nested_state_hdr *vmx_hdr; -+ uint32_t size; -+ -+ if (!env->nested_state) { -+ return; -+ } -+ -+ size = env->nested_state->size; -+ -+ memset(env->nested_state, 0, size); -+ env->nested_state->size = size; -+ -+ if (cpu_has_vmx(env)) { -+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; -+ vmx_hdr = &env->nested_state->hdr.vmx; -+ vmx_hdr->vmxon_pa = -1ull; -+ vmx_hdr->vmcs12_pa = -1ull; -+ } else if (cpu_has_svm(env)) { -+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; -+ } -+} -+ - int kvm_arch_init_vcpu(CPUState *cs) - { - struct { -@@ -2122,19 +2146,10 @@ int kvm_arch_init_vcpu(CPUState *cs) - assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); - - if (cpu_has_vmx(env) || cpu_has_svm(env)) { -- struct kvm_vmx_nested_state_hdr *vmx_hdr; -- - env->nested_state = g_malloc0(max_nested_state_len); - env->nested_state->size = max_nested_state_len; - -- if (cpu_has_vmx(env)) { -- env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; -- vmx_hdr = &env->nested_state->hdr.vmx; -- vmx_hdr->vmxon_pa = -1ull; -- vmx_hdr->vmcs12_pa = -1ull; -- } else { -- env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; -- } -+ kvm_init_nested_state(env); - } - } - -@@ -2199,6 +2214,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) - /* enabled by default */ - env->poll_control_msr = 1; - -+ kvm_init_nested_state(env); -+ - sev_es_set_reset_vector(CPU(cpu)); - } - --- -2.31.1 - diff --git a/kvm-qemu-img-remove-unused-variable.patch b/kvm-qemu-img-remove-unused-variable.patch deleted file mode 100644 index ceb6ea7..0000000 --- a/kvm-qemu-img-remove-unused-variable.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 2458bcc8497fb12ad81e9fd5d05a6164d25f00d6 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 9 Nov 2022 05:12:46 -0500 -Subject: [PATCH 2/3] qemu-img: remove unused variable - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 122: Remove variables causing 'Unused but set variable' warning on Clang 15 -RH-Bugzilla: 2141218 -RH-Commit: [2/3] 9cd54891567781090accfb68aa5d80d2c6d68584 (mrezanin/centos-src-qemu-kvm) - -Variable block_count used in img_dd function is only incremented but never read. -This causes 'Unused but set variable' warning on Clang 15.0.1 compiler. - -Removing the variable to prevent the warning. - -Signed-off-by: Miroslav Rezanina ---- - qemu-img.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/qemu-img.c b/qemu-img.c -index 7d4b33b3da..987da256ef 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -4919,7 +4919,7 @@ static int img_dd(int argc, char **argv) - const char *out_fmt = "raw"; - const char *fmt = NULL; - int64_t size = 0; -- int64_t block_count = 0, out_pos, in_pos; -+ int64_t out_pos, in_pos; - bool force_share = false; - struct DdInfo dd = { - .flags = 0, -@@ -5119,7 +5119,7 @@ static int img_dd(int argc, char **argv) - - in.buf = g_new(uint8_t, in.bsz); - -- for (out_pos = 0; in_pos < size; block_count++) { -+ for (out_pos = 0; in_pos < size; ) { - int bytes = (in_pos + in.bsz > size) ? size - in_pos : in.bsz; - - ret = blk_pread(blk1, in_pos, bytes, in.buf, 0); --- -2.31.1 - diff --git a/kvm-rtl8139-Remove-unused-variable.patch b/kvm-rtl8139-Remove-unused-variable.patch deleted file mode 100644 index 8fa05d0..0000000 --- a/kvm-rtl8139-Remove-unused-variable.patch +++ /dev/null @@ -1,44 +0,0 @@ -From a89fed7cdbee76f9a7083ca04e079ab991737eeb Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 9 Nov 2022 06:40:47 -0500 -Subject: [PATCH 1/3] rtl8139: Remove unused variable - -RH-Author: Miroslav Rezanina -RH-MergeRequest: 122: Remove variables causing 'Unused but set variable' warning on Clang 15 -RH-Bugzilla: 2141218 -RH-Commit: [1/3] ffec7ff0238c6859a5ddaea80fcd8e66049fd3fc (mrezanin/centos-src-qemu-kvm) - -Variable send_count used in rtl8139_cplus_transmit_one function is only -incremented but never read. This causes 'Unused but set variable' warning -on Clang 15.0.1 compiler. - -Removing the variable to prevent the warning. - -Signed-off-by: Miroslav Rezanina ---- - hw/net/rtl8139.c | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 75dacabc43..445cbd700c 100644 ---- a/hw/net/rtl8139.c -+++ b/hw/net/rtl8139.c -@@ -2156,7 +2156,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) - ip_data_len, saved_size - ETH_HLEN, large_send_mss); - - int tcp_send_offset = 0; -- int send_count = 0; - - /* maximum IP header length is 60 bytes */ - uint8_t saved_ip_header[60]; -@@ -2261,7 +2260,6 @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) - /* add transferred count to TCP sequence number */ - stl_be_p(&p_tcp_hdr->th_seq, - chunk_size + ldl_be_p(&p_tcp_hdr->th_seq)); -- ++send_count; - } - - /* Stop sending this frame */ --- -2.31.1 - diff --git a/kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch b/kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch deleted file mode 100644 index e849747..0000000 --- a/kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch +++ /dev/null @@ -1,60 +0,0 @@ -From f141182484fca38685cb246f77e311643cd2f4c7 Mon Sep 17 00:00:00 2001 -From: Vitaly Kuznetsov -Date: Fri, 7 Oct 2022 13:56:02 +0200 -Subject: [PATCH] target/i386/kvm: fix kvmclock_current_nsec: Assertion - `time.tsc_timestamp <= migration_tsc' failed - -RH-Author: Vitaly Kuznetsov -RH-MergeRequest: 120: target/i386/kvm: fix kvmclock_current_nsec: Assertion `time.tsc_timestamp <= migration_tsc' failed -RH-Bugzilla: 2108531 -RH-Acked-by: Marcelo Tosatti -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Paolo Bonzini -RH-Commit: [1/1] edc5bb2578f7f31ab4d87e343925f6f08e812c29 (vkuznets/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2108531 - -commit c4ef867f2949bf2a2ae18a4e27cf1a34bbc8aecb -Author: Ray Zhang -Date: Thu Sep 22 18:05:23 2022 +0800 - - target/i386/kvm: fix kvmclock_current_nsec: Assertion `time.tsc_timestamp <= migration_tsc' failed - - New KVM_CLOCK flags were added in the kernel.(c68dc1b577eabd5605c6c7c08f3e07ae18d30d5d) - ``` - + #define KVM_CLOCK_VALID_FLAGS \ - + (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) - - case KVM_CAP_ADJUST_CLOCK: - - r = KVM_CLOCK_TSC_STABLE; - + r = KVM_CLOCK_VALID_FLAGS; - ``` - - kvm_has_adjust_clock_stable needs to handle additional flags, - so that s->clock_is_reliable can be true and kvmclock_current_nsec doesn't need to be called. - - Signed-off-by: Ray Zhang - Message-Id: <20220922100523.2362205-1-zhanglei002@gmail.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Vitaly Kuznetsov ---- - target/i386/kvm/kvm.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index a9eba247a5..ba98b99d8f 100644 ---- a/target/i386/kvm/kvm.c -+++ b/target/i386/kvm/kvm.c -@@ -157,7 +157,7 @@ bool kvm_has_adjust_clock_stable(void) - { - int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); - -- return (ret == KVM_CLOCK_TSC_STABLE); -+ return (ret & KVM_CLOCK_TSC_STABLE); - } - - bool kvm_has_adjust_clock(void) --- -2.37.3 - diff --git a/kvm-util-accept-iova_tree_remove_parameter-by-value.patch b/kvm-util-accept-iova_tree_remove_parameter-by-value.patch deleted file mode 100644 index 5cd76c3..0000000 --- a/kvm-util-accept-iova_tree_remove_parameter-by-value.patch +++ /dev/null @@ -1,182 +0,0 @@ -From 3320d1883222bc551cf8ffd048882be4a97e872f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:04 +0200 -Subject: [PATCH 03/29] util: accept iova_tree_remove_parameter by value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [3/25] 98190376f758aed31bc31ce3e478438787eb357c (redhat/centos-stream/src/qemu-kvm) - -It's convenient to call iova_tree_remove from a map returned from -iova_tree_find or iova_tree_find_iova. With the current code this is not -possible, since we will free it, and then we will try to search for it -again. - -Fix it making accepting the map by value, forcing a copy of the -argument. Not applying a fixes tag, since there is no use like that at -the moment. - -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit 69292a8e40f4dae8af5f04724e06392cdf03c09e) -Signed-off-by: Laurent Vivier ---- - hw/i386/intel_iommu.c | 6 +++--- - hw/virtio/vhost-iova-tree.c | 2 +- - hw/virtio/vhost-iova-tree.h | 2 +- - hw/virtio/vhost-vdpa.c | 6 +++--- - include/qemu/iova-tree.h | 2 +- - net/vhost-vdpa.c | 4 ++-- - util/iova-tree.c | 4 ++-- - 7 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index 2162394e08..05d53a1aa9 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -1187,7 +1187,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) - return ret; - } - /* Drop any existing mapping */ -- iova_tree_remove(as->iova_tree, &target); -+ iova_tree_remove(as->iova_tree, target); - /* Recover the correct type */ - event->type = IOMMU_NOTIFIER_MAP; - entry->perm = cache_perm; -@@ -1200,7 +1200,7 @@ static int vtd_page_walk_one(IOMMUTLBEvent *event, vtd_page_walk_info *info) - trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask); - return 0; - } -- iova_tree_remove(as->iova_tree, &target); -+ iova_tree_remove(as->iova_tree, target); - } - - trace_vtd_page_walk_one(info->domain_id, entry->iova, -@@ -3563,7 +3563,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) - - map.iova = n->start; - map.size = size; -- iova_tree_remove(as->iova_tree, &map); -+ iova_tree_remove(as->iova_tree, map); - } - - static void vtd_address_space_unmap_all(IntelIOMMUState *s) -diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c -index 67bf6d57ab..3d03395a77 100644 ---- a/hw/virtio/vhost-iova-tree.c -+++ b/hw/virtio/vhost-iova-tree.c -@@ -104,7 +104,7 @@ int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map) - * @iova_tree: The vhost iova tree - * @map: The map to remove - */ --void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map) -+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map) - { - iova_tree_remove(iova_tree->iova_taddr_map, map); - } -diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h -index 6a4f24e0f9..4adfd79ff0 100644 ---- a/hw/virtio/vhost-iova-tree.h -+++ b/hw/virtio/vhost-iova-tree.h -@@ -22,6 +22,6 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete); - const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree, - const DMAMap *map); - int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); --void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map); -+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); - - #endif -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 7e28d2f674..87e0ad393f 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -240,7 +240,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - - fail_map: - if (v->shadow_vqs_enabled) { -- vhost_iova_tree_remove(v->iova_tree, &mem_region); -+ vhost_iova_tree_remove(v->iova_tree, mem_region); - } - - fail: -@@ -300,7 +300,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - return; - } - iova = result->iova; -- vhost_iova_tree_remove(v->iova_tree, result); -+ vhost_iova_tree_remove(v->iova_tree, *result); - } - vhost_vdpa_iotlb_batch_begin_once(v); - ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); -@@ -944,7 +944,7 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, - needle->perm == IOMMU_RO); - if (unlikely(r != 0)) { - error_setg_errno(errp, -r, "Cannot map region to device"); -- vhost_iova_tree_remove(v->iova_tree, needle); -+ vhost_iova_tree_remove(v->iova_tree, *needle); - } - - return r == 0; -diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h -index 16bbfdf5f8..8528e5c98f 100644 ---- a/include/qemu/iova-tree.h -+++ b/include/qemu/iova-tree.h -@@ -73,7 +73,7 @@ int iova_tree_insert(IOVATree *tree, const DMAMap *map); - * all the mappings that are included in the provided range will be - * removed from the tree. Here map->translated_addr is meaningless. - */ --void iova_tree_remove(IOVATree *tree, const DMAMap *map); -+void iova_tree_remove(IOVATree *tree, DMAMap map); - - /** - * iova_tree_find: -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 303447a68e..a49e7e649d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -244,7 +244,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); - } - -- vhost_iova_tree_remove(tree, map); -+ vhost_iova_tree_remove(tree, *map); - } - - static size_t vhost_vdpa_net_cvq_cmd_len(void) -@@ -297,7 +297,7 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, - return true; - - dma_map_err: -- vhost_iova_tree_remove(v->iova_tree, &map); -+ vhost_iova_tree_remove(v->iova_tree, map); - return false; - } - -diff --git a/util/iova-tree.c b/util/iova-tree.c -index fee530a579..536789797e 100644 ---- a/util/iova-tree.c -+++ b/util/iova-tree.c -@@ -164,11 +164,11 @@ void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator) - g_tree_foreach(tree->tree, iova_tree_traverse, iterator); - } - --void iova_tree_remove(IOVATree *tree, const DMAMap *map) -+void iova_tree_remove(IOVATree *tree, DMAMap map) - { - const DMAMap *overlap; - -- while ((overlap = iova_tree_find(tree, map))) { -+ while ((overlap = iova_tree_find(tree, &map))) { - g_tree_remove(tree->tree, overlap); - } - } --- -2.31.1 - diff --git a/kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch b/kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch deleted file mode 100644 index 423cff9..0000000 --- a/kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 466adb0e641f5c918cbea84e962ae9352f440663 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 11 Aug 2022 14:28:47 +0200 -Subject: [PATCH 22/29] vdpa: Add vhost_vdpa_net_load_mq -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [22/25] 01e861ad39d6b8e15870296f508726565101213b (redhat/centos-stream/src/qemu-kvm) - -Upstream: Not merged yet - -Same way as with the MAC, restore the expected number of queues at -device's start. - -Signed-off-by: Eugenio Pérez -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 26 ++++++++++++++++++++++++++ - 1 file changed, 26 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index e799e744cd..3950e4f25d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -400,6 +400,28 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) - return 0; - } - -+static int vhost_vdpa_net_load_mq(VhostVDPAState *s, -+ const VirtIONet *n) -+{ -+ struct virtio_net_ctrl_mq mq; -+ uint64_t features = n->parent_obj.guest_features; -+ ssize_t dev_written; -+ -+ if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { -+ return 0; -+ } -+ -+ mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs); -+ dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ, -+ VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq, -+ sizeof(mq)); -+ if (unlikely(dev_written < 0)) { -+ return dev_written; -+ } -+ -+ return *s->status != VIRTIO_NET_OK; -+} -+ - static int vhost_vdpa_net_load(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -@@ -418,6 +440,10 @@ static int vhost_vdpa_net_load(NetClientState *nc) - if (unlikely(r < 0)) { - return r; - } -+ r = vhost_vdpa_net_load_mq(s, n); -+ if (unlikely(r)) { -+ return r; -+ } - - return 0; - } --- -2.31.1 - diff --git a/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch b/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch deleted file mode 100644 index c338a29..0000000 --- a/kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 10157c62f06e86f2ccf1fd4130ef55f7f9beac2f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:36 +0200 -Subject: [PATCH 18/29] vdpa: Add virtio-net mac address via CVQ at start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [18/25] f5b7a59a70e51450df8c58b48e4eb30ef2a44189 (redhat/centos-stream/src/qemu-kvm) - -This is needed so the destination vdpa device see the same state a the -guest set in the source. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit dd036d8d278e6882803bccaa8c51b8527ea33f45) -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 3575bf64ee..640434d1ea 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -363,11 +363,51 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, - return vhost_svq_poll(svq); - } - -+static int vhost_vdpa_net_load(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ const struct vhost_vdpa *v = &s->vhost_vdpa; -+ const VirtIONet *n; -+ uint64_t features; -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ if (!v->shadow_vqs_enabled) { -+ return 0; -+ } -+ -+ n = VIRTIO_NET(v->dev->vdev); -+ features = n->parent_obj.guest_features; -+ if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { -+ const struct virtio_net_ctrl_hdr ctrl = { -+ .class = VIRTIO_NET_CTRL_MAC, -+ .cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET, -+ }; -+ char *cursor = s->cvq_cmd_out_buffer; -+ ssize_t dev_written; -+ -+ memcpy(cursor, &ctrl, sizeof(ctrl)); -+ cursor += sizeof(ctrl); -+ memcpy(cursor, n->mac, sizeof(n->mac)); -+ -+ dev_written = vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + sizeof(n->mac), -+ sizeof(virtio_net_ctrl_ack)); -+ if (unlikely(dev_written < 0)) { -+ return dev_written; -+ } -+ -+ return *((virtio_net_ctrl_ack *)s->cvq_cmd_in_buffer) != VIRTIO_NET_OK; -+ } -+ -+ return 0; -+} -+ - static NetClientInfo net_vhost_vdpa_cvq_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, - .start = vhost_vdpa_net_cvq_start, -+ .load = vhost_vdpa_net_load, - .stop = vhost_vdpa_net_cvq_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, --- -2.31.1 - diff --git a/kvm-vdpa-Allow-MQ-feature-in-SVQ.patch b/kvm-vdpa-Allow-MQ-feature-in-SVQ.patch deleted file mode 100644 index 1d308aa..0000000 --- a/kvm-vdpa-Allow-MQ-feature-in-SVQ.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 4a1688ed7d06aef31ef48a018b1f4be7690481fd Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 11 Aug 2022 14:54:22 +0200 -Subject: [PATCH 25/29] vdpa: Allow MQ feature in SVQ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [25/25] e416f00fdbcf7af3ddd504e76519510e3bdc57b7 (redhat/centos-stream/src/qemu-kvm) - -Upstream: Not merged yet - -Finally enable SVQ with MQ feature. - -Signed-off-by: Eugenio Pérez -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index c6cbe2fb5c..4bc3fd01a8 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -94,6 +94,7 @@ static const uint64_t vdpa_svq_device_features = - BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | - BIT_ULL(VIRTIO_NET_F_STATUS) | - BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | -+ BIT_ULL(VIRTIO_NET_F_MQ) | - BIT_ULL(VIRTIO_F_ANY_LAYOUT) | - BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | - BIT_ULL(VIRTIO_NET_F_RSC_EXT) | --- -2.31.1 - diff --git a/kvm-vdpa-Delete-CVQ-migration-blocker.patch b/kvm-vdpa-Delete-CVQ-migration-blocker.patch deleted file mode 100644 index f99983b..0000000 --- a/kvm-vdpa-Delete-CVQ-migration-blocker.patch +++ /dev/null @@ -1,98 +0,0 @@ -From caa8a1d41ca1f2b9c4d1c6cc287c8ae22063b488 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:37 +0200 -Subject: [PATCH 19/29] vdpa: Delete CVQ migration blocker -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [19/25] d3e6c009f66e1dc0069323684af28936ae10d155 (redhat/centos-stream/src/qemu-kvm) - -We can restore the device state in the destination via CVQ now. Remove -the migration blocker. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 0e3fdcffead7c651ce06ab50cffb89e806f04e2b) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-vdpa.c | 15 --------------- - include/hw/virtio/vhost-vdpa.h | 1 - - net/vhost-vdpa.c | 2 -- - 3 files changed, 18 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 23ae5ef48b..7468e44b87 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1033,13 +1033,6 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - return true; - } - -- if (v->migration_blocker) { -- int r = migrate_add_blocker(v->migration_blocker, &err); -- if (unlikely(r < 0)) { -- return false; -- } -- } -- - for (i = 0; i < v->shadow_vqs->len; ++i) { - VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -@@ -1082,10 +1075,6 @@ err: - vhost_svq_stop(svq); - } - -- if (v->migration_blocker) { -- migrate_del_blocker(v->migration_blocker); -- } -- - return false; - } - -@@ -1101,10 +1090,6 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - vhost_vdpa_svq_unmap_rings(dev, svq); - } -- -- if (v->migration_blocker) { -- migrate_del_blocker(v->migration_blocker); -- } - } - - static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index d10a89303e..1111d85643 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -35,7 +35,6 @@ typedef struct vhost_vdpa { - bool shadow_vqs_enabled; - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; -- Error *migration_blocker; - GPtrArray *shadow_vqs; - const VhostShadowVirtqueueOps *shadow_vq_ops; - void *shadow_vq_ops_opaque; -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 640434d1ea..6ce68fcd3f 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -555,8 +555,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; -- error_setg(&s->vhost_vdpa.migration_blocker, -- "Migration disabled: vhost-vdpa uses CVQ."); - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { --- -2.31.1 - diff --git a/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch b/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch deleted file mode 100644 index 8b6dd7e..0000000 --- a/kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 08d9ea9f9218ad628771f3962d52fb4b6c110262 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:06 +0200 -Subject: [PATCH 05/29] vdpa: Make SVQ vring unmapping return void -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [5/25] 340a2246e85d30b6d30ab24198af0fb65520276e (redhat/centos-stream/src/qemu-kvm) - -Nothing actually reads the return value, but an error in cleaning some -entries could cause device stop to abort, making a restart impossible. -Better ignore explicitely the return value. - -Reported-by: Lei Yang -Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 5b590f51b923776a14d3bcafcb393279c1b72022) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-vdpa.c | 32 ++++++++++---------------------- - 1 file changed, 10 insertions(+), 22 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e16e0e222e..e208dd000e 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -884,7 +884,7 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - /** - * Unmap a SVQ area in the device - */ --static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, -+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - const DMAMap *needle) - { - const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); -@@ -893,38 +893,33 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - - if (unlikely(!result)) { - error_report("Unable to find SVQ address to unmap"); -- return false; -+ return; - } - - size = ROUND_UP(result->size, qemu_real_host_page_size()); - r = vhost_vdpa_dma_unmap(v, result->iova, size); - if (unlikely(r < 0)) { - error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); -- return false; -+ return; - } - - vhost_iova_tree_remove(v->iova_tree, *result); -- return r == 0; - } - --static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, -+static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, - const VhostShadowVirtqueue *svq) - { - DMAMap needle = {}; - struct vhost_vdpa *v = dev->opaque; - struct vhost_vring_addr svq_addr; -- bool ok; - - vhost_svq_get_vring_addr(svq, &svq_addr); - - needle.translated_addr = svq_addr.desc_user_addr; -- ok = vhost_vdpa_svq_unmap_ring(v, &needle); -- if (unlikely(!ok)) { -- return false; -- } -+ vhost_vdpa_svq_unmap_ring(v, &needle); - - needle.translated_addr = svq_addr.used_user_addr; -- return vhost_vdpa_svq_unmap_ring(v, &needle); -+ vhost_vdpa_svq_unmap_ring(v, &needle); - } - - /** -@@ -1095,26 +1090,22 @@ err: - return false; - } - --static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) -+static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - { - struct vhost_vdpa *v = dev->opaque; - - if (!v->shadow_vqs) { -- return true; -+ return; - } - - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); -- bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); -- if (unlikely(!ok)) { -- return false; -- } -+ vhost_vdpa_svq_unmap_rings(dev, svq); - } - - if (v->migration_blocker) { - migrate_del_blocker(v->migration_blocker); - } -- return true; - } - - static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) -@@ -1131,10 +1122,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) - } - vhost_vdpa_set_vring_ready(dev); - } else { -- ok = vhost_vdpa_svqs_stop(dev); -- if (unlikely(!ok)) { -- return -1; -- } -+ vhost_vdpa_svqs_stop(dev); - vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); - } - --- -2.31.1 - diff --git a/kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch b/kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch deleted file mode 100644 index c762cf4..0000000 --- a/kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch +++ /dev/null @@ -1,113 +0,0 @@ -From d44701ad634f05c31a1b0f0b84b168ed1ec19f71 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 24 Aug 2022 20:28:35 +0200 -Subject: [PATCH 20/29] vdpa: Make VhostVDPAState cvq_cmd_in_buffer control ack - type -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [20/25] fd7012502f7002f61ea2e0c90baac013e09282de (redhat/centos-stream/src/qemu-kvm) - -Upstream: Not merged yet - -This allows to simplify the code. Rename to status while we're at it. - -Signed-off-by: Eugenio Pérez -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 23 ++++++++++++----------- - 1 file changed, 12 insertions(+), 11 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 6ce68fcd3f..535315c1d0 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -35,7 +35,9 @@ typedef struct VhostVDPAState { - VHostNetState *vhost_net; - - /* Control commands shadow buffers */ -- void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; -+ void *cvq_cmd_out_buffer; -+ virtio_net_ctrl_ack *status; -+ - bool started; - } VhostVDPAState; - -@@ -158,7 +160,7 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - struct vhost_dev *dev = &s->vhost_net->dev; - - qemu_vfree(s->cvq_cmd_out_buffer); -- qemu_vfree(s->cvq_cmd_in_buffer); -+ qemu_vfree(s->status); - if (dev->vq_index + dev->nvqs == dev->vq_index_end) { - g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); - } -@@ -310,7 +312,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - return r; - } - -- r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer, -+ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status, - vhost_vdpa_net_cvq_cmd_page_len(), true); - if (unlikely(r < 0)) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -@@ -327,7 +329,7 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) - - if (s->vhost_vdpa.shadow_vqs_enabled) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer); -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); - } - } - -@@ -340,7 +342,7 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, - .iov_len = out_len, - }; - const struct iovec in = { -- .iov_base = s->cvq_cmd_in_buffer, -+ .iov_base = s->status, - .iov_len = sizeof(virtio_net_ctrl_ack), - }; - VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); -@@ -396,7 +398,7 @@ static int vhost_vdpa_net_load(NetClientState *nc) - return dev_written; - } - -- return *((virtio_net_ctrl_ack *)s->cvq_cmd_in_buffer) != VIRTIO_NET_OK; -+ return *s->status != VIRTIO_NET_OK; - } - - return 0; -@@ -491,8 +493,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - goto out; - } - -- memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); -- if (status != VIRTIO_NET_OK) { -+ if (*s->status != VIRTIO_NET_OK) { - return VIRTIO_NET_ERR; - } - -@@ -549,9 +550,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), - vhost_vdpa_net_cvq_cmd_page_len()); - memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); -- s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(), -- vhost_vdpa_net_cvq_cmd_page_len()); -- memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); -+ s->status = qemu_memalign(qemu_real_host_page_size(), -+ vhost_vdpa_net_cvq_cmd_page_len()); -+ memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len()); - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; --- -2.31.1 - diff --git a/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch b/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch deleted file mode 100644 index ab07d88..0000000 --- a/kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch +++ /dev/null @@ -1,251 +0,0 @@ -From 0c03e18c49b62241d046ecb15c0ee3e7f9c2e547 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:33 +0200 -Subject: [PATCH 15/29] vdpa: Move command buffers map to start of net device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [15/25] 216c18aa307f7bdef1575f581b767b6f023a73bd (redhat/centos-stream/src/qemu-kvm) - -As this series will reuse them to restore the device state at the end of -a migration (or a device start), let's allocate only once at the device -start so we don't duplicate their map and unmap. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 7a7f87e94c4e75ca177564491595dd17b7e41a62) -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 123 ++++++++++++++++++++++------------------------- - 1 file changed, 58 insertions(+), 65 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1a597c2e92..452d10ed93 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -263,29 +263,20 @@ static size_t vhost_vdpa_net_cvq_cmd_page_len(void) - return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); - } - --/** Copy and map a guest buffer. */ --static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, -- const struct iovec *out_data, -- size_t out_num, size_t data_len, void *buf, -- size_t *written, bool write) -+/** Map CVQ buffer. */ -+static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, -+ bool write) - { - DMAMap map = {}; - int r; - -- if (unlikely(!data_len)) { -- qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", -- __func__, write ? "in" : "out"); -- return false; -- } -- -- *written = iov_to_buf(out_data, out_num, 0, buf, data_len); - map.translated_addr = (hwaddr)(uintptr_t)buf; -- map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; -+ map.size = size - 1; - map.perm = write ? IOMMU_RW : IOMMU_RO, - r = vhost_iova_tree_map_alloc(v->iova_tree, &map); - if (unlikely(r != IOVA_OK)) { - error_report("Cannot map injected element"); -- return false; -+ return r; - } - - r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, -@@ -294,50 +285,58 @@ static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, - goto dma_map_err; - } - -- return true; -+ return 0; - - dma_map_err: - vhost_iova_tree_remove(v->iova_tree, map); -- return false; -+ return r; - } - --/** -- * Copy the guest element into a dedicated buffer suitable to be sent to NIC -- * -- * @iov: [0] is the out buffer, [1] is the in one -- */ --static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, -- VirtQueueElement *elem, -- struct iovec *iov) -+static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { -- size_t in_copied; -- bool ok; -+ VhostVDPAState *s; -+ int r; - -- iov[0].iov_base = s->cvq_cmd_out_buffer; -- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, -- vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, -- &iov[0].iov_len, false); -- if (unlikely(!ok)) { -- return false; -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ s = DO_UPCAST(VhostVDPAState, nc, nc); -+ if (!s->vhost_vdpa.shadow_vqs_enabled) { -+ return 0; - } - -- iov[1].iov_base = s->cvq_cmd_in_buffer; -- ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, -- sizeof(virtio_net_ctrl_ack), iov[1].iov_base, -- &in_copied, true); -- if (unlikely(!ok)) { -+ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, -+ vhost_vdpa_net_cvq_cmd_page_len(), false); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ -+ r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer, -+ vhost_vdpa_net_cvq_cmd_page_len(), true); -+ if (unlikely(r < 0)) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -- return false; - } - -- iov[1].iov_len = sizeof(virtio_net_ctrl_ack); -- return true; -+ return r; -+} -+ -+static void vhost_vdpa_net_cvq_stop(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ -+ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); -+ -+ if (s->vhost_vdpa.shadow_vqs_enabled) { -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); -+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_in_buffer); -+ } - } - - static NetClientInfo net_vhost_vdpa_cvq_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, -+ .start = vhost_vdpa_net_cvq_start, -+ .stop = vhost_vdpa_net_cvq_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, - .has_ufo = vhost_vdpa_has_ufo, -@@ -348,19 +347,17 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { - * Do not forward commands not supported by SVQ. Otherwise, the device could - * accept it and qemu would not know how to update the device model. - */ --static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, -- size_t out_num) -+static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) - { - struct virtio_net_ctrl_hdr ctrl; -- size_t n; - -- n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); -- if (unlikely(n < sizeof(ctrl))) { -+ if (unlikely(len < sizeof(ctrl))) { - qemu_log_mask(LOG_GUEST_ERROR, -- "%s: invalid legnth of out buffer %zu\n", __func__, n); -+ "%s: invalid legnth of out buffer %zu\n", __func__, len); - return false; - } - -+ memcpy(&ctrl, out_buf, sizeof(ctrl)); - switch (ctrl.class) { - case VIRTIO_NET_CTRL_MAC: - switch (ctrl.cmd) { -@@ -392,10 +389,14 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - VhostVDPAState *s = opaque; - size_t in_len, dev_written; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; -- /* out and in buffers sent to the device */ -- struct iovec dev_buffers[2] = { -- { .iov_base = s->cvq_cmd_out_buffer }, -- { .iov_base = s->cvq_cmd_in_buffer }, -+ /* Out buffer sent to both the vdpa device and the device model */ -+ struct iovec out = { -+ .iov_base = s->cvq_cmd_out_buffer, -+ }; -+ /* In buffer sent to the device */ -+ const struct iovec dev_in = { -+ .iov_base = s->cvq_cmd_in_buffer, -+ .iov_len = sizeof(virtio_net_ctrl_ack), - }; - /* in buffer used for device model */ - const struct iovec in = { -@@ -405,17 +406,15 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - int r = -EINVAL; - bool ok; - -- ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); -- if (unlikely(!ok)) { -- goto out; -- } -- -- ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); -+ out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, -+ s->cvq_cmd_out_buffer, -+ vhost_vdpa_net_cvq_cmd_len()); -+ ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len); - if (unlikely(!ok)) { - goto out; - } - -- r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); -+ r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); - if (unlikely(r != 0)) { - if (unlikely(r == -ENOSPC)) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -@@ -435,13 +434,13 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - goto out; - } - -- memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); -+ memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); - if (status != VIRTIO_NET_OK) { - goto out; - } - - status = VIRTIO_NET_ERR; -- virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); -+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1); - if (status != VIRTIO_NET_OK) { - error_report("Bad CVQ processing in model"); - } -@@ -454,12 +453,6 @@ out: - } - vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); - g_free(elem); -- if (dev_buffers[0].iov_base) { -- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); -- } -- if (dev_buffers[1].iov_base) { -- vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); -- } - return r; - } - --- -2.31.1 - diff --git a/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch b/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch deleted file mode 100644 index 8c3aae4..0000000 --- a/kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch +++ /dev/null @@ -1,49 +0,0 @@ -From dae6d9efac6d7307ccd1e1bebf0a14014f2a4f34 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:05 +0200 -Subject: [PATCH 04/29] vdpa: Remove SVQ vring from iova_tree at shutdown -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [4/25] 813fb80fc3c9872729e6b345e1e9209548aa7481 (redhat/centos-stream/src/qemu-kvm) - -Although the device will be reset before usage, the right thing to do is -to clean it. - -Reported-by: Lei Yang -Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") -Signed-off-by: Eugenio Pérez -Signed-off-by: Jason Wang -(cherry picked from commit b37c12be962f95fd1e93b470a5ff05f6e2035d46) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-vdpa.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 87e0ad393f..e16e0e222e 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -898,6 +898,12 @@ static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - - size = ROUND_UP(result->size, qemu_real_host_page_size()); - r = vhost_vdpa_dma_unmap(v, result->iova, size); -+ if (unlikely(r < 0)) { -+ error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); -+ return false; -+ } -+ -+ vhost_iova_tree_remove(v->iova_tree, *result); - return r == 0; - } - --- -2.31.1 - diff --git a/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch b/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch deleted file mode 100644 index ab58a35..0000000 --- a/kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 67291df3eca8b3d74567c0e8211c9f7da65e74d4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:02 +0200 -Subject: [PATCH 01/29] vdpa: Skip the maps not in the iova tree -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/25] d385d5b600ac4f1a9f9fd4f523e5d4078df8478a (redhat/centos-stream/src/qemu-kvm) - -Next patch will skip the registering of dma maps that the vdpa device -rejects in the iova tree. We need to consider that here or we cause a -SIGSEGV accessing result. - -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 10dab9f2635b9bab23a2b29974b526e62bb61268) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-vdpa.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 3ff9ce3501..983d3697b0 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -289,6 +289,10 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - }; - - result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); -+ if (!result) { -+ /* The memory listener map wasn't mapped */ -+ return; -+ } - iova = result->iova; - vhost_iova_tree_remove(v->iova_tree, result); - } --- -2.31.1 - diff --git a/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch b/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch deleted file mode 100644 index 7fdb0e7..0000000 --- a/kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch +++ /dev/null @@ -1,79 +0,0 @@ -From c91852883439c3a5349f6787b11b7bc71d6504a5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:08 +0200 -Subject: [PATCH 07/29] vdpa: Use ring hwaddr at vhost_vdpa_svq_unmap_ring -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [7/25] 961d9854ae1088fc487b32b605fef207aad08924 (redhat/centos-stream/src/qemu-kvm) - -Reduce code duplication. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 8b6d6119ad7fd983d192f60c4960fb6a9197d995) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-vdpa.c | 17 ++++++++--------- - 1 file changed, 8 insertions(+), 9 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e208dd000e..23ae5ef48b 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -884,10 +884,12 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - /** - * Unmap a SVQ area in the device - */ --static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, -- const DMAMap *needle) -+static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) - { -- const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle); -+ const DMAMap needle = { -+ .translated_addr = addr, -+ }; -+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); - hwaddr size; - int r; - -@@ -909,17 +911,14 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, - static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, - const VhostShadowVirtqueue *svq) - { -- DMAMap needle = {}; - struct vhost_vdpa *v = dev->opaque; - struct vhost_vring_addr svq_addr; - - vhost_svq_get_vring_addr(svq, &svq_addr); - -- needle.translated_addr = svq_addr.desc_user_addr; -- vhost_vdpa_svq_unmap_ring(v, &needle); -+ vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); - -- needle.translated_addr = svq_addr.used_user_addr; -- vhost_vdpa_svq_unmap_ring(v, &needle); -+ vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); - } - - /** -@@ -997,7 +996,7 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, - ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); - if (unlikely(!ok)) { - error_prepend(errp, "Cannot create vq device region: "); -- vhost_vdpa_svq_unmap_ring(v, &driver_region); -+ vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); - } - addr->used_user_addr = device_region.iova; - --- -2.31.1 - diff --git a/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch b/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch deleted file mode 100644 index 1bbfee9..0000000 --- a/kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch +++ /dev/null @@ -1,62 +0,0 @@ -From a32ab5c3f2156ab098e8914437f1aa00c095450e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:32 +0200 -Subject: [PATCH 14/29] vdpa: add net_vhost_vdpa_cvq_info NetClientInfo -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [14/25] 579b8389d759ae973552ade34369318e8c50aa90 (redhat/centos-stream/src/qemu-kvm) - -Next patches will add a new info callback to restore NIC status through -CVQ. Since only the CVQ vhost device is needed, create it with a new -NetClientInfo. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit f8972b56eeace10a410990f032406250abe18d64) -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 12 +++++++++++- - 1 file changed, 11 insertions(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index a49e7e649d..1a597c2e92 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -334,6 +334,16 @@ static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, - return true; - } - -+static NetClientInfo net_vhost_vdpa_cvq_info = { -+ .type = NET_CLIENT_DRIVER_VHOST_VDPA, -+ .size = sizeof(VhostVDPAState), -+ .receive = vhost_vdpa_receive, -+ .cleanup = vhost_vdpa_cleanup, -+ .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, -+ .has_ufo = vhost_vdpa_has_ufo, -+ .check_peer_type = vhost_vdpa_check_peer_type, -+}; -+ - /** - * Do not forward commands not supported by SVQ. Otherwise, the device could - * accept it and qemu would not know how to update the device model. -@@ -475,7 +485,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, - name); - } else { -- nc = qemu_new_net_control_client(&net_vhost_vdpa_info, peer, -+ nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, - device, name); - } - snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); --- -2.31.1 - diff --git a/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch b/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch deleted file mode 100644 index ba35d21..0000000 --- a/kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 8b85c33c0efb0c6f2dc3705ee83082438db9d397 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:03 +0200 -Subject: [PATCH 02/29] vdpa: do not save failed dma maps in SVQ iova tree -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [2/25] fc285fecfd400702f81345cef445f5218bcbacad (redhat/centos-stream/src/qemu-kvm) - -If a map fails for whatever reason, it must not be saved in the tree. -Otherwise, qemu will try to unmap it in cleanup, leaving to more errors. - -Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") -Reported-by: Lei Yang -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 7dab70bec397e3522211e7bcc36d879bad8154c5) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-vdpa.c | 20 +++++++++++++------- - 1 file changed, 13 insertions(+), 7 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 983d3697b0..7e28d2f674 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -176,6 +176,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) - static void vhost_vdpa_listener_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { -+ DMAMap mem_region = {}; - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); - hwaddr iova; - Int128 llend, llsize; -@@ -212,13 +213,13 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - - llsize = int128_sub(llend, int128_make64(iova)); - if (v->shadow_vqs_enabled) { -- DMAMap mem_region = { -- .translated_addr = (hwaddr)(uintptr_t)vaddr, -- .size = int128_get64(llsize) - 1, -- .perm = IOMMU_ACCESS_FLAG(true, section->readonly), -- }; -+ int r; - -- int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); -+ mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, -+ mem_region.size = int128_get64(llsize) - 1, -+ mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), -+ -+ r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); - if (unlikely(r != IOVA_OK)) { - error_report("Can't allocate a mapping (%d)", r); - goto fail; -@@ -232,11 +233,16 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - vaddr, section->readonly); - if (ret) { - error_report("vhost vdpa map fail!"); -- goto fail; -+ goto fail_map; - } - - return; - -+fail_map: -+ if (v->shadow_vqs_enabled) { -+ vhost_iova_tree_remove(v->iova_tree, &mem_region); -+ } -+ - fail: - /* - * On the initfn path, store the first error in the container so we --- -2.31.1 - diff --git a/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch b/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch deleted file mode 100644 index 7737060..0000000 --- a/kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 09b86938668bf6111fb6549fcd012f50418a7613 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:34 +0200 -Subject: [PATCH 16/29] vdpa: extract vhost_vdpa_net_cvq_add from - vhost_vdpa_net_handle_ctrl_avail -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [16/25] 7d577b06dcd889f836d5bcbaf6a64998fb138543 (redhat/centos-stream/src/qemu-kvm) - -So we can reuse it to inject state messages. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang --- -v7: -* Remove double free error - -v6: -* Do not assume in buffer sent to the device is sizeof(virtio_net_ctrl_ack) - -v5: -* Do not use an artificial !NULL VirtQueueElement -* Use only out size instead of iovec dev_buffers for these functions. - -Signed-off-by: Jason Wang -(cherry picked from commit be4278b65fc1be8fce87e1e7c01bc52602d304eb) -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 59 +++++++++++++++++++++++++++++++----------------- - 1 file changed, 38 insertions(+), 21 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 452d10ed93..3575bf64ee 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -331,6 +331,38 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) - } - } - -+static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, -+ size_t in_len) -+{ -+ /* Buffers for the device */ -+ const struct iovec out = { -+ .iov_base = s->cvq_cmd_out_buffer, -+ .iov_len = out_len, -+ }; -+ const struct iovec in = { -+ .iov_base = s->cvq_cmd_in_buffer, -+ .iov_len = sizeof(virtio_net_ctrl_ack), -+ }; -+ VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); -+ int r; -+ -+ r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); -+ if (unlikely(r != 0)) { -+ if (unlikely(r == -ENOSPC)) { -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -+ __func__); -+ } -+ return r; -+ } -+ -+ /* -+ * We can poll here since we've had BQL from the time we sent the -+ * descriptor. Also, we need to take the answer before SVQ pulls by itself, -+ * when BQL is released -+ */ -+ return vhost_svq_poll(svq); -+} -+ - static NetClientInfo net_vhost_vdpa_cvq_info = { - .type = NET_CLIENT_DRIVER_VHOST_VDPA, - .size = sizeof(VhostVDPAState), -@@ -387,23 +419,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - void *opaque) - { - VhostVDPAState *s = opaque; -- size_t in_len, dev_written; -+ size_t in_len; - virtio_net_ctrl_ack status = VIRTIO_NET_ERR; - /* Out buffer sent to both the vdpa device and the device model */ - struct iovec out = { - .iov_base = s->cvq_cmd_out_buffer, - }; -- /* In buffer sent to the device */ -- const struct iovec dev_in = { -- .iov_base = s->cvq_cmd_in_buffer, -- .iov_len = sizeof(virtio_net_ctrl_ack), -- }; - /* in buffer used for device model */ - const struct iovec in = { - .iov_base = &status, - .iov_len = sizeof(status), - }; -- int r = -EINVAL; -+ ssize_t dev_written = -EINVAL; - bool ok; - - out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, -@@ -414,21 +441,11 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - goto out; - } - -- r = vhost_svq_add(svq, &out, 1, &dev_in, 1, elem); -- if (unlikely(r != 0)) { -- if (unlikely(r == -ENOSPC)) { -- qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", -- __func__); -- } -+ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); -+ if (unlikely(dev_written < 0)) { - goto out; - } - -- /* -- * We can poll here since we've had BQL from the time we sent the -- * descriptor. Also, we need to take the answer before SVQ pulls by itself, -- * when BQL is released -- */ -- dev_written = vhost_svq_poll(svq); - if (unlikely(dev_written < sizeof(status))) { - error_report("Insufficient written data (%zu)", dev_written); - goto out; -@@ -436,7 +453,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - - memcpy(&status, s->cvq_cmd_in_buffer, sizeof(status)); - if (status != VIRTIO_NET_OK) { -- goto out; -+ return VIRTIO_NET_ERR; - } - - status = VIRTIO_NET_ERR; -@@ -453,7 +470,7 @@ out: - } - vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); - g_free(elem); -- return r; -+ return dev_written < 0 ? dev_written : 0; - } - - static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { --- -2.31.1 - diff --git a/kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch b/kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch deleted file mode 100644 index 707013a..0000000 --- a/kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch +++ /dev/null @@ -1,115 +0,0 @@ -From e03f7e670e608e98fa771d3860574b95908ef3a1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 11 Aug 2022 14:12:14 +0200 -Subject: [PATCH 21/29] vdpa: extract vhost_vdpa_net_load_mac from - vhost_vdpa_net_load -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [21/25] b4b30be584aab265004648352361f25587e0ed98 (redhat/centos-stream/src/qemu-kvm) - -Upstream: Not merged yet - -Since there may be many commands we need to issue to load the NIC -state, let's split them in individual functions - -Signed-off-by: Eugenio Pérez -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 62 +++++++++++++++++++++++++++++++----------------- - 1 file changed, 40 insertions(+), 22 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 535315c1d0..e799e744cd 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -365,12 +365,47 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, - return vhost_svq_poll(svq); - } - -+static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, -+ uint8_t cmd, const void *data, -+ size_t data_size) -+{ -+ const struct virtio_net_ctrl_hdr ctrl = { -+ .class = class, -+ .cmd = cmd, -+ }; -+ -+ assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); -+ -+ memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl)); -+ memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size); -+ -+ return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size, -+ sizeof(virtio_net_ctrl_ack)); -+} -+ -+static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) -+{ -+ uint64_t features = n->parent_obj.guest_features; -+ if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { -+ ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC, -+ VIRTIO_NET_CTRL_MAC_ADDR_SET, -+ n->mac, sizeof(n->mac)); -+ if (unlikely(dev_written < 0)) { -+ return dev_written; -+ } -+ -+ return *s->status != VIRTIO_NET_OK; -+ } -+ -+ return 0; -+} -+ - static int vhost_vdpa_net_load(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -- const struct vhost_vdpa *v = &s->vhost_vdpa; -+ struct vhost_vdpa *v = &s->vhost_vdpa; - const VirtIONet *n; -- uint64_t features; -+ int r; - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - -@@ -379,26 +414,9 @@ static int vhost_vdpa_net_load(NetClientState *nc) - } - - n = VIRTIO_NET(v->dev->vdev); -- features = n->parent_obj.guest_features; -- if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { -- const struct virtio_net_ctrl_hdr ctrl = { -- .class = VIRTIO_NET_CTRL_MAC, -- .cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET, -- }; -- char *cursor = s->cvq_cmd_out_buffer; -- ssize_t dev_written; -- -- memcpy(cursor, &ctrl, sizeof(ctrl)); -- cursor += sizeof(ctrl); -- memcpy(cursor, n->mac, sizeof(n->mac)); -- -- dev_written = vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + sizeof(n->mac), -- sizeof(virtio_net_ctrl_ack)); -- if (unlikely(dev_written < 0)) { -- return dev_written; -- } -- -- return *s->status != VIRTIO_NET_OK; -+ r = vhost_vdpa_net_load_mac(s, n); -+ if (unlikely(r < 0)) { -+ return r; - } - - return 0; --- -2.31.1 - diff --git a/kvm-vdpa-validate-MQ-CVQ-commands.patch b/kvm-vdpa-validate-MQ-CVQ-commands.patch deleted file mode 100644 index 2e816eb..0000000 --- a/kvm-vdpa-validate-MQ-CVQ-commands.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b4a0334826d5b28dd8f63edaa606cc123b60a538 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 11 Aug 2022 14:53:10 +0200 -Subject: [PATCH 23/29] vdpa: validate MQ CVQ commands -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [23/25] b727a8bba49a364c6c9afe3d7bfcc70e3ee942f4 (redhat/centos-stream/src/qemu-kvm) - -Upstream: Not merged yet - -So we are sure we can update the device model properly before sending to -the device. - -Signed-off-by: Eugenio Pérez -Signed-off-by: Laurent Vivier ---- - net/vhost-vdpa.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 3950e4f25d..c6cbe2fb5c 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -486,6 +486,15 @@ static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) - __func__, ctrl.cmd); - }; - break; -+ case VIRTIO_NET_CTRL_MQ: -+ switch (ctrl.cmd) { -+ case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: -+ return true; -+ default: -+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mq cmd %u\n", -+ __func__, ctrl.cmd); -+ }; -+ break; - default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", - __func__, ctrl.class); --- -2.31.1 - diff --git a/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch b/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch deleted file mode 100644 index b01d7aa..0000000 --- a/kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 88ea456e00f5af59417ef2c397adfea4cf9c685e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:20:07 +0200 -Subject: [PATCH 06/29] vhost: Always store new kick fd on - vhost_svq_set_svq_kick_fd -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [6/25] 1c2ec6d321446505b9f9d0cc0cf0d812cfddd959 (redhat/centos-stream/src/qemu-kvm) - -We can unbind twice a file descriptor if we call twice -vhost_svq_set_svq_kick_fd because of this. Since it comes from vhost and -not from SVQ, that file descriptor could be a different thing that -guest's vhost notifier. - -Likewise, it can happens the same if a guest start and stop the device -multiple times. - -Reported-by: Lei Yang -Fixes: dff4426fa6 ("vhost: Add Shadow VirtQueue kick forwarding capabilities") -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 8b64e486423b09db4463799727bf1fad62fe496a) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index e4956728dd..82a784d250 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -602,13 +602,13 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) - event_notifier_set_handler(svq_kick, NULL); - } - -+ event_notifier_init_fd(svq_kick, svq_kick_fd); - /* - * event_notifier_set_handler already checks for guest's notifications if - * they arrive at the new file descriptor in the switch, so there is no - * need to explicitly check for them. - */ - if (poll_start) { -- event_notifier_init_fd(svq_kick, svq_kick_fd); - event_notifier_set(svq_kick); - event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier); - } -@@ -655,7 +655,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - */ - void vhost_svq_stop(VhostShadowVirtqueue *svq) - { -- event_notifier_set_handler(&svq->svq_kick, NULL); -+ vhost_svq_set_svq_kick_fd(svq, VHOST_FILE_UNBIND); - g_autofree VirtQueueElement *next_avail_elem = NULL; - - if (!svq->vq) { --- -2.31.1 - diff --git a/kvm-vhost-Delete-useless-read-memory-barrier.patch b/kvm-vhost-Delete-useless-read-memory-barrier.patch deleted file mode 100644 index 7938963..0000000 --- a/kvm-vhost-Delete-useless-read-memory-barrier.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 878a37760e34b54a3d92569f44b0b2f073bfa46a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:28 +0200 -Subject: [PATCH 10/29] vhost: Delete useless read memory barrier -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [10/25] 13fb2b317093323caf33a17f9de00a94a862ca2e (redhat/centos-stream/src/qemu-kvm) - -As discussed in previous series [1], this memory barrier is useless with -the atomic read of used idx at vhost_svq_more_used. Deleting it. - -[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg02616.html - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 9e193cec5db949e4001070442a2f7de7042ef09b) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-shadow-virtqueue.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index b35aeef4bd..8df5296f24 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -509,9 +509,6 @@ size_t vhost_svq_poll(VhostShadowVirtqueue *svq) - if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { - return 0; - } -- -- /* Make sure we read new used_idx */ -- smp_rmb(); - } while (true); - } - --- -2.31.1 - diff --git a/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch b/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch deleted file mode 100644 index 858128e..0000000 --- a/kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch +++ /dev/null @@ -1,63 +0,0 @@ -From 39659fb33b282188f005ba26bd2c40ce8b7a173c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:29 +0200 -Subject: [PATCH 11/29] vhost: Do not depend on !NULL VirtQueueElement on - vhost_svq_flush -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [11/25] 2fec9b6bb72cf8ef42d08a28df3dc8b540f6f43f (redhat/centos-stream/src/qemu-kvm) - -Since QEMU will be able to inject new elements on CVQ to restore the -state, we need not to depend on a VirtQueueElement to know if a new -element has been used by the device or not. Instead of check that, check -if there are new elements only using used idx on vhost_svq_flush. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit d368c0b052ad95d3bf4fcc5a5d25715a35c91d4b) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-shadow-virtqueue.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 8df5296f24..e8e5bbc368 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -499,17 +499,20 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, - size_t vhost_svq_poll(VhostShadowVirtqueue *svq) - { - int64_t start_us = g_get_monotonic_time(); -+ uint32_t len; -+ - do { -- uint32_t len; -- VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); -- if (elem) { -- return len; -+ if (vhost_svq_more_used(svq)) { -+ break; - } - - if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { - return 0; - } - } while (true); -+ -+ vhost_svq_get_buf(svq, &len); -+ return len; - } - - /** --- -2.31.1 - diff --git a/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch b/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch deleted file mode 100644 index 72707ff..0000000 --- a/kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 33c22dd3353f79a037f2473a69176932ac1a1c05 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:26 +0200 -Subject: [PATCH 08/29] vhost: stop transfer elem ownership in - vhost_handle_guest_kick -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [8/25] e9c6314fddeb1f7bc738efea90f2788cae27bab7 (redhat/centos-stream/src/qemu-kvm) - -It was easier to allow vhost_svq_add to handle the memory. Now that we -will allow qemu to add elements to a SVQ without the guest's knowledge, -it's better to handle it in the caller. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 9c2ab2f1ec333be8614cc12272d4b91960704dbe) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-shadow-virtqueue.c | 10 ++++------ - 1 file changed, 4 insertions(+), 6 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 82a784d250..a1261d4a0f 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -233,9 +233,6 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) - /** - * Add an element to a SVQ. - * -- * The caller must check that there is enough slots for the new element. It -- * takes ownership of the element: In case of failure not ENOSPC, it is free. -- * - * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full - */ - int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, -@@ -252,7 +249,6 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); - if (unlikely(!ok)) { -- g_free(elem); - return -EINVAL; - } - -@@ -293,7 +289,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - virtio_queue_set_notification(svq->vq, false); - - while (true) { -- VirtQueueElement *elem; -+ g_autofree VirtQueueElement *elem; - int r; - - if (svq->next_guest_avail_elem) { -@@ -324,12 +320,14 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) - * queue the current guest descriptor and ignore kicks - * until some elements are used. - */ -- svq->next_guest_avail_elem = elem; -+ svq->next_guest_avail_elem = g_steal_pointer(&elem); - } - - /* VQ is full or broken, just return and ignore kicks */ - return; - } -+ /* elem belongs to SVQ or external caller now */ -+ elem = NULL; - } - - virtio_queue_set_notification(svq->vq, true); --- -2.31.1 - diff --git a/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch b/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch deleted file mode 100644 index 628cc8d..0000000 --- a/kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch +++ /dev/null @@ -1,55 +0,0 @@ -From cf08dbe33683a66a79ec07b8450f9d3d27cff1c4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:27 +0200 -Subject: [PATCH 09/29] vhost: use SVQ element ndescs instead of opaque data - for desc validation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [9/25] 071eb2a0db612d516d630a15a1f0fd908ed86fd3 (redhat/centos-stream/src/qemu-kvm) - -Since we're going to allow SVQ to add elements without the guest's -knowledge and without its own VirtQueueElement, it's easier to check if -an element is a valid head checking a different thing than the -VirtQueueElement. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 86f5f2546f03a3dfde421c715187b262e29b2848) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-shadow-virtqueue.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index a1261d4a0f..b35aeef4bd 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -414,7 +414,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - return NULL; - } - -- if (unlikely(!svq->desc_state[used_elem.id].elem)) { -+ if (unlikely(!svq->desc_state[used_elem.id].ndescs)) { - qemu_log_mask(LOG_GUEST_ERROR, - "Device %s says index %u is used, but it was not available", - svq->vdev->name, used_elem.id); -@@ -422,6 +422,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, - } - - num = svq->desc_state[used_elem.id].ndescs; -+ svq->desc_state[used_elem.id].ndescs = 0; - last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); - svq->desc_next[last_used_chain] = svq->free_head; - svq->free_head = used_elem.id; --- -2.31.1 - diff --git a/kvm-vhost_net-Add-NetClientInfo-start-callback.patch b/kvm-vhost_net-Add-NetClientInfo-start-callback.patch deleted file mode 100644 index 99073b5..0000000 --- a/kvm-vhost_net-Add-NetClientInfo-start-callback.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 0db23ec6808c3ff628d1b1940d2cd01fda0757d1 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:30 +0200 -Subject: [PATCH 12/29] vhost_net: Add NetClientInfo start callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [12/25] b448657fa858a885879986059694d26d870155bc (redhat/centos-stream/src/qemu-kvm) - -This is used by the backend to perform actions before the device is -started. - -In particular, vdpa net use it to map CVQ buffers to the device, so it -can send control commands using them. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit eb92b75380fc0f2368e22be45d1e2d1e2cd2f79c) -Signed-off-by: Laurent Vivier ---- - hw/net/vhost_net.c | 7 +++++++ - include/net/net.h | 2 ++ - 2 files changed, 9 insertions(+) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index ccac5b7a64..2e0baeba26 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -244,6 +244,13 @@ static int vhost_net_start_one(struct vhost_net *net, - struct vhost_vring_file file = { }; - int r; - -+ if (net->nc->info->start) { -+ r = net->nc->info->start(net->nc); -+ if (r < 0) { -+ return r; -+ } -+ } -+ - r = vhost_dev_enable_notifiers(&net->dev, dev); - if (r < 0) { - goto fail_notifiers; -diff --git a/include/net/net.h b/include/net/net.h -index 523136c7ac..ad9e80083a 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -44,6 +44,7 @@ typedef struct NICConf { - - typedef void (NetPoll)(NetClientState *, bool enable); - typedef bool (NetCanReceive)(NetClientState *); -+typedef int (NetStart)(NetClientState *); - typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); - typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); - typedef void (NetCleanup) (NetClientState *); -@@ -71,6 +72,7 @@ typedef struct NetClientInfo { - NetReceive *receive_raw; - NetReceiveIOV *receive_iov; - NetCanReceive *can_receive; -+ NetStart *start; - NetCleanup *cleanup; - LinkStatusChanged *link_status_changed; - QueryRxFilter *query_rx_filter; --- -2.31.1 - diff --git a/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch b/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch deleted file mode 100644 index 5b51f8b..0000000 --- a/kvm-vhost_net-Add-NetClientInfo-stop-callback.patch +++ /dev/null @@ -1,68 +0,0 @@ -From cc3e96b81280fe45a34a26586718079072dbcf39 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:31 +0200 -Subject: [PATCH 13/29] vhost_net: Add NetClientInfo stop callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [13/25] cb90c1228e9af493def4818ea3b49e2b0cfae456 (redhat/centos-stream/src/qemu-kvm) - -Used by the backend to perform actions after the device is stopped. - -In particular, vdpa net use it to unmap CVQ buffers to the device, -cleaning the actions performed in prepare(). - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit c5e5269d8a955a0f924218911c2f4a0b34e87a21) -Signed-off-by: Laurent Vivier ---- - hw/net/vhost_net.c | 3 +++ - include/net/net.h | 2 ++ - 2 files changed, 5 insertions(+) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 2e0baeba26..9d4b334453 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -320,6 +320,9 @@ static void vhost_net_stop_one(struct vhost_net *net, - net->nc->info->poll(net->nc, true); - } - vhost_dev_stop(&net->dev, dev); -+ if (net->nc->info->stop) { -+ net->nc->info->stop(net->nc); -+ } - vhost_dev_disable_notifiers(&net->dev, dev); - } - -diff --git a/include/net/net.h b/include/net/net.h -index ad9e80083a..476ad45b9a 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -45,6 +45,7 @@ typedef struct NICConf { - typedef void (NetPoll)(NetClientState *, bool enable); - typedef bool (NetCanReceive)(NetClientState *); - typedef int (NetStart)(NetClientState *); -+typedef void (NetStop)(NetClientState *); - typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); - typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); - typedef void (NetCleanup) (NetClientState *); -@@ -73,6 +74,7 @@ typedef struct NetClientInfo { - NetReceiveIOV *receive_iov; - NetCanReceive *can_receive; - NetStart *start; -+ NetStop *stop; - NetCleanup *cleanup; - LinkStatusChanged *link_status_changed; - QueryRxFilter *query_rx_filter; --- -2.31.1 - diff --git a/kvm-vhost_net-add-NetClientState-load-callback.patch b/kvm-vhost_net-add-NetClientState-load-callback.patch deleted file mode 100644 index ecd279f..0000000 --- a/kvm-vhost_net-add-NetClientState-load-callback.patch +++ /dev/null @@ -1,73 +0,0 @@ -From d91546b3bc3dd147b6327a4d8c5b523104a09aa6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 23 Aug 2022 20:30:35 +0200 -Subject: [PATCH 17/29] vhost_net: add NetClientState->load() callback -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [17/25] de71f2e8fc7b25f5197101703fbb5ff054ada984 (redhat/centos-stream/src/qemu-kvm) - -It allows per-net client operations right after device's successful -start. In particular, to load the device status. - -Vhost-vdpa net will use it to add the CVQ buffers to restore the device -status. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Signed-off-by: Jason Wang -(cherry picked from commit 539573c317dc0b8d50a128db60550f2f2898d2fc) -Signed-off-by: Laurent Vivier ---- - hw/net/vhost_net.c | 7 +++++++ - include/net/net.h | 2 ++ - 2 files changed, 9 insertions(+) - -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 9d4b334453..d28f8b974b 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -281,6 +281,13 @@ static int vhost_net_start_one(struct vhost_net *net, - } - } - } -+ -+ if (net->nc->info->load) { -+ r = net->nc->info->load(net->nc); -+ if (r < 0) { -+ goto fail; -+ } -+ } - return 0; - fail: - file.fd = -1; -diff --git a/include/net/net.h b/include/net/net.h -index 476ad45b9a..81d0b21def 100644 ---- a/include/net/net.h -+++ b/include/net/net.h -@@ -45,6 +45,7 @@ typedef struct NICConf { - typedef void (NetPoll)(NetClientState *, bool enable); - typedef bool (NetCanReceive)(NetClientState *); - typedef int (NetStart)(NetClientState *); -+typedef int (NetLoad)(NetClientState *); - typedef void (NetStop)(NetClientState *); - typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); - typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); -@@ -74,6 +75,7 @@ typedef struct NetClientInfo { - NetReceiveIOV *receive_iov; - NetCanReceive *can_receive; - NetStart *start; -+ NetLoad *load; - NetStop *stop; - NetCleanup *cleanup; - LinkStatusChanged *link_status_changed; --- -2.31.1 - diff --git a/kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch b/kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch deleted file mode 100644 index 7629017..0000000 --- a/kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 9e23182c5249f876e56ef9a31b22476b5268f246 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 11 Aug 2022 16:40:07 +0200 -Subject: [PATCH 24/29] virtio-net: Update virtio-net curr_queue_pairs in vdpa - backends -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 117: vDPA SVQ Multiqueue support -RH-Jira: RHELX-57 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Commit: [24/25] e0e6978394d6496a7e12cf8424b2e9cb87281a90 (redhat/centos-stream/src/qemu-kvm) - -Upstream: Not merged yet - -It was returned as error before. Instead of it, simply update the -corresponding field so qemu can send it in the migration data. - -Signed-off-by: Eugenio Pérez -Signed-off-by: Laurent Vivier ---- - hw/net/virtio-net.c | 17 ++++++----------- - 1 file changed, 6 insertions(+), 11 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index dd0d056fde..63a8332cd0 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -1412,19 +1412,14 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, - return VIRTIO_NET_ERR; - } - -- /* Avoid changing the number of queue_pairs for vdpa device in -- * userspace handler. A future fix is needed to handle the mq -- * change in userspace handler with vhost-vdpa. Let's disable -- * the mq handling from userspace for now and only allow get -- * done through the kernel. Ripples may be seen when falling -- * back to userspace, but without doing it qemu process would -- * crash on a recursive entry to virtio_net_set_status(). -- */ -+ n->curr_queue_pairs = queue_pairs; - if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { -- return VIRTIO_NET_ERR; -+ /* -+ * Avoid updating the backend for a vdpa device: We're only interested -+ * in updating the device model queues. -+ */ -+ return VIRTIO_NET_OK; - } -- -- n->curr_queue_pairs = queue_pairs; - /* stop the backend before changing the number of queue_pairs to avoid handling a - * disabled queue */ - virtio_net_set_status(vdev, vdev->status); --- -2.31.1 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index cab37a6..acf722f 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -1,7 +1,7 @@ %global libfdt_version 1.6.0 %global libseccomp_version 2.4.0 %global libusbx_version 1.0.23 -%global meson_version 0.58.2 +%global meson_version 0.61.3 %global usbredir_version 0.7.1 %global ipxe_version 20200823-5.git4bd064de @@ -113,16 +113,13 @@ Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ Requires: %{name}-ui-egl-headless = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} \ -Requires: %{name}-device-display-virtio-gpu-gl = %{epoch}:%{version}-%{release} \ %ifarch s390x \ Requires: %{name}-device-display-virtio-gpu-ccw = %{epoch}:%{version}-%{release} \ %else \ Requires: %{name}-device-display-virtio-gpu-pci = %{epoch}:%{version}-%{release} \ -Requires: %{name}-device-display-virtio-gpu-pci-gl = %{epoch}:%{version}-%{release} \ %endif \ %ifarch x86_64 %{power64} \ Requires: %{name}-device-display-virtio-vga = %{epoch}:%{version}-%{release} \ -Requires: %{name}-device-display-virtio-vga-gl = %{epoch}:%{version}-%{release} \ %endif \ Requires: %{name}-device-usb-host = %{epoch}:%{version}-%{release} \ %if %{have_usbredir} \ @@ -150,8 +147,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 7.1.0 -Release: 7%{?rcrel}%{?dist}%{?cc_suffix} +Version: 7.2.0 +Release: 1%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -177,92 +174,23 @@ Source36: README.tests Patch0004: 0004-Initial-redhat-build.patch -Patch0005: 0005-Re-enable-capstone-internal-build.patch -Patch0006: 0006-Enable-disable-devices-for-RHEL.patch -Patch0007: 0007-Machine-type-related-general-changes.patch -Patch0008: 0008-Add-aarch64-machine-types.patch -Patch0009: 0009-Add-ppc64-machine-types.patch -Patch0010: 0010-Add-s390x-machine-types.patch -Patch0011: 0011-Add-x86_64-machine-types.patch -Patch0012: 0012-Enable-make-check.patch -Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch -Patch0014: 0014-Add-support-statement-to-help-output.patch -Patch0015: 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0016: 0016-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0017: 0017-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0018: 0018-Introduce-upstream-7.0-compat-changes.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch19: kvm-vdpa-Skip-the-maps-not-in-the-iova-tree.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch20: kvm-vdpa-do-not-save-failed-dma-maps-in-SVQ-iova-tree.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch21: kvm-util-accept-iova_tree_remove_parameter-by-value.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch22: kvm-vdpa-Remove-SVQ-vring-from-iova_tree-at-shutdown.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch23: kvm-vdpa-Make-SVQ-vring-unmapping-return-void.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch24: kvm-vhost-Always-store-new-kick-fd-on-vhost_svq_set_svq_.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch25: kvm-vdpa-Use-ring-hwaddr-at-vhost_vdpa_svq_unmap_ring.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch26: kvm-vhost-stop-transfer-elem-ownership-in-vhost_handle_g.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch27: kvm-vhost-use-SVQ-element-ndescs-instead-of-opaque-data-.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch28: kvm-vhost-Delete-useless-read-memory-barrier.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch29: kvm-vhost-Do-not-depend-on-NULL-VirtQueueElement-on-vhos.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch30: kvm-vhost_net-Add-NetClientInfo-start-callback.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch31: kvm-vhost_net-Add-NetClientInfo-stop-callback.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch32: kvm-vdpa-add-net_vhost_vdpa_cvq_info-NetClientInfo.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch33: kvm-vdpa-Move-command-buffers-map-to-start-of-net-device.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch34: kvm-vdpa-extract-vhost_vdpa_net_cvq_add-from-vhost_vdpa_.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch35: kvm-vhost_net-add-NetClientState-load-callback.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch36: kvm-vdpa-Add-virtio-net-mac-address-via-CVQ-at-start.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch37: kvm-vdpa-Delete-CVQ-migration-blocker.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch38: kvm-vdpa-Make-VhostVDPAState-cvq_cmd_in_buffer-control-a.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch39: kvm-vdpa-extract-vhost_vdpa_net_load_mac-from-vhost_vdpa.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch40: kvm-vdpa-Add-vhost_vdpa_net_load_mq.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch41: kvm-vdpa-validate-MQ-CVQ-commands.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch42: kvm-virtio-net-Update-virtio-net-curr_queue_pairs-in-vdp.patch -# For RHELX-57 - vDPA SVQ Multiqueue support -Patch43: kvm-vdpa-Allow-MQ-feature-in-SVQ.patch -# For bz#2125281 - [RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-9.2.0] -Patch44: kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch -# For bz#2125281 - [RHEL9.1] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-9.2.0] -Patch45: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch -# For bz#2127825 - Use capstone for qemu-kvm build -Patch46: kvm-Revert-Re-enable-capstone-internal-build.patch -# For bz#2108531 - Windows guest reboot after migration with wsl2 installed inside -Patch47: kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch -# For bz#2126095 - [rhel9.2][intel_iommu]Booting guest with "-device intel-iommu,intremap=on,device-iotlb=on,caching-mode=on" causes kernel call trace -Patch48: kvm-Revert-intel_iommu-Fix-irqchip-X2APIC-configuration-.patch -# For bz#2141218 - qemu-kvm build fails with clang 15.0.1 due to false unused variable error -Patch49: kvm-rtl8139-Remove-unused-variable.patch -# For bz#2141218 - qemu-kvm build fails with clang 15.0.1 due to false unused variable error -Patch50: kvm-qemu-img-remove-unused-variable.patch -# For bz#2141218 - qemu-kvm build fails with clang 15.0.1 due to false unused variable error -Patch51: kvm-host-libusb-Remove-unused-variable.patch -# For bz#2143170 - The installation can not start when install files (iso) locate on a 4k disk -Patch52: kvm-block-move-bdrv_qiov_is_aligned-to-file-posix.patch -# For bz#2143170 - The installation can not start when install files (iso) locate on a 4k disk -Patch53: kvm-block-use-the-request-length-for-iov-alignment.patch -# For bz#2149108 - CVE-2022-4172 qemu-kvm: QEMU: ACPI ERST: memory corruption issues in read_erst_record and write_erst_record [rhel-9] -Patch54: kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch +Patch0005: 0005-Enable-disable-devices-for-RHEL.patch +Patch0006: 0006-Machine-type-related-general-changes.patch +Patch0007: 0007-Add-aarch64-machine-types.patch +Patch0008: 0008-Add-ppc64-machine-types.patch +Patch0009: 0009-Add-s390x-machine-types.patch +Patch0010: 0010-Add-x86_64-machine-types.patch +Patch0011: 0011-Enable-make-check.patch +Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0013: 0013-Add-support-statement-to-help-output.patch +Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0015: 0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0016: 0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +Patch0018: 0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch +Patch0019: 0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch +Patch0020: 0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch +Patch0021: 0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch +Patch0022: 0022-x86-rhel-9.2.0-machine-type.patch %if %{have_clang} BuildRequires: clang @@ -361,10 +289,10 @@ Requires: edk2-ovmf %ifarch aarch64 Requires: edk2-aarch64 %endif -Requires: capstone Requires: libseccomp >= %{libseccomp_version} Requires: libusbx >= %{libusbx_version} +Requires: capstone %if %{have_fdt} Requires: libfdt >= %{libfdt_version} %endif @@ -515,30 +443,20 @@ Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} %description device-display-virtio-gpu This package provides the virtio-gpu display device for QEMU. -%package device-display-virtio-gpu-gl -Summary: QEMU virtio-gpu-gl display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-virtio-gpu-gl -This package provides the virtio-gpu-gl display device for QEMU. - %ifarch s390x %package device-display-virtio-gpu-ccw Summary: QEMU virtio-gpu-ccw display device Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} %description device-display-virtio-gpu-ccw This package provides the virtio-gpu-ccw display device for QEMU. %else %package device-display-virtio-gpu-pci Summary: QEMU virtio-gpu-pci display device Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: %{name}-device-display-virtio-gpu = %{epoch}:%{version}-%{release} %description device-display-virtio-gpu-pci This package provides the virtio-gpu-pci display device for QEMU. - -%package device-display-virtio-gpu-pci-gl -Summary: QEMU virtio-gpu-pci-gl display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-virtio-gpu-pci-gl -This package provides the virtio-gpu-pci-gl display device for QEMU. %endif %ifarch x86_64 %{power64} @@ -547,12 +465,6 @@ Summary: QEMU virtio-vga display device Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} %description device-display-virtio-vga This package provides the virtio-vga display device for QEMU. - -%package device-display-virtio-vga-gl -Summary: QEMU virtio-vga-gl display device -Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} -%description device-display-virtio-vga-gl -This package provides the virtio-vga-gl display device for QEMU. %endif %package device-usb-host @@ -589,6 +501,7 @@ mkdir -p %{qemu_kvm_build} --disable-auth-pam \\\ --disable-avx2 \\\ --disable-avx512f \\\ + --disable-blkio \\\ --disable-block-drv-whitelist-in-tools \\\ --disable-bochs \\\ --disable-bpf \\\ @@ -678,6 +591,7 @@ mkdir -p %{qemu_kvm_build} --disable-slirp-smbd \\\ --disable-smartcard \\\ --disable-snappy \\\ + --disable-sndio \\\ --disable-sparse \\\ --disable-spice \\\ --disable-spice-protocol \\\ @@ -733,7 +647,7 @@ run_configure() { --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ - --meson="internal" \ + --meson="%{__meson}" \ --enable-trace-backend=dtrace \ --with-coroutine=ucontext \ --with-git=git \ @@ -800,7 +714,7 @@ run_configure \ %endif --enable-seccomp \ --enable-selinux \ - --enable-slirp=system \ + --enable-slirp \ --enable-snappy \ --enable-spice-protocol \ --enable-system \ @@ -1085,6 +999,16 @@ install -D -m 0644 %{_sourcedir}/bridge.conf %{buildroot}%{_sysconfdir}/%{name}/ install -m 0644 contrib/systemd/qemu-pr-helper.service %{buildroot}%{_unitdir} install -m 0644 contrib/systemd/qemu-pr-helper.socket %{buildroot}%{_unitdir} +# We do not support gl display devices so we can remove their modules as they +# do not have expected functionality included. +# +# https://gitlab.com/qemu-project/qemu/-/issues/1352 was filed to stop building these +# modules in case all dependencies are not satisfied. + +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so +rm -rf %{buildroot}%{_libdir}/%{name}/hw-display-virtio-vga-gl.so + # We need to make the block device modules and other qemu SO files executable # otherwise RPM won't pick up their dependencies. chmod +x %{buildroot}%{_libdir}/%{name}/*.so @@ -1242,25 +1166,17 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files device-display-virtio-gpu %{_libdir}/%{name}/hw-display-virtio-gpu.so -%files device-display-virtio-gpu-gl -%{_libdir}/%{name}/hw-display-virtio-gpu-gl.so - %ifarch s390x %files device-display-virtio-gpu-ccw %{_libdir}/%{name}/hw-s390x-virtio-gpu-ccw.so %else %files device-display-virtio-gpu-pci %{_libdir}/%{name}/hw-display-virtio-gpu-pci.so - -%files device-display-virtio-gpu-pci-gl - %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so %endif %ifarch x86_64 %{power64} %files device-display-virtio-vga %{_libdir}/%{name}/hw-display-virtio-vga.so -%files device-display-virtio-vga-gl - %{_libdir}/%{name}/hw-display-virtio-vga-gl.so %endif %files tests @@ -1295,6 +1211,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Dec 15 2022 Miroslav Rezanina - 7.2.0-1 +- Rebase to QEMU 7.2.0 [bz#2135806] +- Resolves: bz#2135806 + (Rebase to QEMU 7.2 for RHEL 9.2.0) + * Wed Dec 14 2022 Jon Maloy - 7.1.0-7 - kvm-hw-acpi-erst.c-Fix-memory-handling-issues.patch [bz#2149108] - Resolves: bz#2149108 diff --git a/sources b/sources index 9b00967..c45f059 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-7.1.0.tar.xz) = c60c5ff8ec99b7552e485768908920658fdd8035ff7a6fa370fb6881957dc8b7e5f18ff1a8f49bd6aa22909ede2a7c084986d8244f12074ccd33ebe40a0c411f +SHA512 (qemu-7.2.0.tar.xz) = f3cfa00da739ba819a218d7e6e95c77fb79a8e0f487b024ddd281602e785249b81144595e3f8c746c32a4f5c4d1a88c6aebae3c162603edfbb50ae3722d7ed13 From 896f01d9a314a798c7cc783d09b2e5332015dd0a Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 20 Dec 2022 07:02:54 +0100 Subject: [PATCH 176/195] * Tue Dec 20 2022 Miroslav Rezanina - 7.2.0-2 - Fix updating from 7.1.0 - kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch[bz#2154640] - Resolves: bz#2154640 ([aarch64] qemu fails to load "efi-virtio.rom" romfile when creating virtio-net-pci) --- ...dhat-fix-virt-rhel9.2.0-compat-props.patch | 43 +++++++++++++++++++ qemu-kvm.spec | 17 +++++++- 2 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch diff --git a/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch b/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch new file mode 100644 index 0000000..1a2e863 --- /dev/null +++ b/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch @@ -0,0 +1,43 @@ +From 546e4213c4e8a7b2e369315a71bc9aec091eed6e Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Mon, 19 Dec 2022 10:30:26 +0100 +Subject: redhat: fix virt-rhel9.2.0 compat props + +RH-Author: Cornelia Huck +RH-MergeRequest: 127: redhat: fix virt-rhel9.2.0 compat props +RH-Bugzilla: 2154640 +RH-Acked-by: Eric Auger +RH-Acked-by: Gavin Shan +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 49635fdc1d9a934ece78abd160b07c19909f876a (cohuck/qemu-kvm-c9s) + +We need to include arm_rhel_compat props in the latest machine. + +Signed-off-by: Cornelia Huck +--- + hw/arm/virt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 0a94f31dd1..bf18838b87 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3520,6 +3520,7 @@ type_init(rhel_machine_init); + + static void rhel920_virt_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) + +@@ -3529,7 +3530,6 @@ static void rhel900_virt_options(MachineClass *mc) + + rhel920_virt_options(mc); + +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ +-- +2.38.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index acf722f..1e052fb 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 1%{?rcrel}%{?dist}%{?cc_suffix} +Release: 2%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -191,6 +191,7 @@ Patch0019: 0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch Patch0020: 0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch Patch0021: 0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch Patch0022: 0022-x86-rhel-9.2.0-machine-type.patch +Patch23: kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch %if %{have_clang} BuildRequires: clang @@ -327,6 +328,10 @@ Requires: seabios-bin >= 1.10.2-1 Requires: seavgabios-bin >= 1.12.0-3 Requires: ipxe-roms-qemu >= %{ipxe_version} %endif +# Removal -gl modules as they do not provide any functionality - see bz#2149022 +Obsoletes: %{name}-device-display-virtio-gpu-gl <= %{epoch}:%{version} +Obsoletes: %{name}-device-display-virtio-gpu-pci-gl <= %{epoch}:%{version} +Obsoletes: %{name}-device-display-virtio-vga-gl <= %{epoch}:%{version} %description common %{name} is an open source virtualizer that provides hardware emulation for @@ -494,6 +499,10 @@ mkdir -p %{qemu_kvm_build} %build + +# Necessary hack for ZUUL CI +ulimit -n 10240 + %define disable_everything \\\ --audio-drv-list= \\\ --disable-alsa \\\ @@ -1211,6 +1220,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Dec 20 2022 Miroslav Rezanina - 7.2.0-2 +- Fix updating from 7.1.0 +- kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch[bz#2154640] +- Resolves: bz#2154640 + ([aarch64] qemu fails to load "efi-virtio.rom" romfile when creating virtio-net-pci) + * Thu Dec 15 2022 Miroslav Rezanina - 7.2.0-1 - Rebase to QEMU 7.2.0 [bz#2135806] - Resolves: bz#2135806 From 5252804d5520ec74c63d082c20a13efe52d37f86 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 4 Jan 2023 04:47:00 -0500 Subject: [PATCH 177/195] * Wed Jan 04 2023 Miroslav Rezanina - 7.2.0-3 - kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch [bz#2113840] - kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch [bz#2113840] - kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch [bz#2113840] - kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch [bz#2113840] - kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch [bz#2113840] - kvm-hw-arm-virt-Add-compact-highmem-property.patch [bz#2113840] - kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch [bz#2113840] - kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch [bz#2113840] - Resolves: bz#2113840 ([RHEL9.2] Memory mapping optimization for virt machine) --- ...rm-virt-Add-compact-highmem-property.patch | 169 +++++++++++++++++ ...properties-to-disable-high-memory-re.patch | 179 ++++++++++++++++++ ...le-compat-high-memory-region-address.patch | 51 +++++ ...ove-high-memory-region-address-assig.patch | 112 +++++++++++ ...oduce-variable-region_base-in-virt_s.patch | 82 ++++++++ ...oduce-virt_get_high_memmap_enabled-h.patch | 95 ++++++++++ ...ntroduce-virt_set_high_memmap-helper.patch | 130 +++++++++++++ ...me-variable-size-to-region_size-in-v.patch | 83 ++++++++ qemu-kvm.spec | 30 ++- 9 files changed, 930 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-arm-virt-Add-compact-highmem-property.patch create mode 100644 kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch create mode 100644 kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch create mode 100644 kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch create mode 100644 kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch create mode 100644 kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch create mode 100644 kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch create mode 100644 kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch diff --git a/kvm-hw-arm-virt-Add-compact-highmem-property.patch b/kvm-hw-arm-virt-Add-compact-highmem-property.patch new file mode 100644 index 0000000..bc65e2f --- /dev/null +++ b/kvm-hw-arm-virt-Add-compact-highmem-property.patch @@ -0,0 +1,169 @@ +From 4ab2aff624908e49b099f00609875f4d03e9e1ec Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 6/8] hw/arm/virt: Add 'compact-highmem' property + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/8] 781506f3445493f05b511547370b6d88ef092457 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +After the improvement to high memory region address assignment is +applied, the memory layout can be changed, introducing possible +migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region +is disabled or enabled when the optimization is applied or not, with +the following configuration. The configuration is only achievable by +modifying the source code until more properties are added to allow +users selectively disable those high memory regions. + + pa_bits = 40; + vms->highmem_redists = false; + vms->highmem_ecam = false; + vms->highmem_mmio = true; + + # qemu-system-aarch64 -accel kvm -cpu host \ + -machine virt-7.2,compact-highmem={on, off} \ + -m 4G,maxmem=511G -monitor stdio + + Region compact-highmem=off compact-highmem=on + ---------------------------------------------------------------- + MEM [1GB 512GB] [1GB 512GB] + HIGH_GIC_REDISTS2 [512GB 512GB+64MB] [disabled] + HIGH_PCIE_ECAM [512GB+256MB 512GB+512MB] [disabled] + HIGH_PCIE_MMIO [disabled] [512GB 1TB] + +In order to keep backwords compatibility, we need to disable the +optimization on machine, which is virt-7.1 or ealier than it. It +means the optimization is enabled by default from virt-7.2. Besides, +'compact-highmem' property is added so that the optimization can be +explicitly enabled or disabled on all machine types by users. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-7-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit f40408a9fe5d1db70a75a33d2b26c8af8a5d57b0) +Signed-off-by: Gavin Shan +Conflicts: + hw/arm/virt.c + Comment out the handlers of property 'compact-highmem' since + the property isn't exposed. +--- + docs/system/arm/virt.rst | 4 ++++ + hw/arm/virt.c | 34 ++++++++++++++++++++++++++++++++++ + include/hw/arm/virt.h | 1 + + 3 files changed, 39 insertions(+) + +diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst +index 20442ea2c1..4454706392 100644 +--- a/docs/system/arm/virt.rst ++++ b/docs/system/arm/virt.rst +@@ -94,6 +94,10 @@ highmem + address space above 32 bits. The default is ``on`` for machine types + later than ``virt-2.12``. + ++compact-highmem ++ Set ``on``/``off`` to enable/disable the compact layout for high memory regions. ++ The default is ``on`` for machine types later than ``virt-7.2``. ++ + gic-version + Specify the version of the Generic Interrupt Controller (GIC) to provide. + Valid values are: +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6896e0ca0f..6087511ae9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -216,6 +216,12 @@ static const MemMapEntry base_memmap[] = { + * Note the extended_memmap is sized so that it eventually also includes the + * base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last + * index of base_memmap). ++ * ++ * The memory map for these Highmem IO Regions can be in legacy or compact ++ * layout, depending on 'compact-highmem' property. With legacy layout, the ++ * PA space for one specific region is always reserved, even if the region ++ * has been disabled or doesn't fit into the PA space. However, the PA space ++ * for the region won't be reserved in these circumstances with compact layout. + */ + static MemMapEntry extended_memmap[] = { + /* Additional 64 MB redist region (can contain up to 512 redistributors) */ +@@ -2400,6 +2406,22 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) + vms->highmem = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ ++static bool virt_get_compact_highmem(Object *obj, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ return vms->highmem_compact; ++} ++ ++static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ vms->highmem_compact = value; ++} ++#endif /* disabled for RHEL */ ++ + static bool virt_get_its(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -3023,6 +3045,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable using " + "physical address space above 32 bits"); + ++ object_class_property_add_bool(oc, "compact-highmem", ++ virt_get_compact_highmem, ++ virt_set_compact_highmem); ++ object_class_property_set_description(oc, "compact-highmem", ++ "Set on/off to enable/disable compact " ++ "layout for high memory regions"); ++ + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_class_property_set_description(oc, "gic-version", +@@ -3107,6 +3136,7 @@ static void virt_instance_init(Object *obj) + + /* High memory is enabled by default */ + vms->highmem = true; ++ vms->highmem_compact = !vmc->no_highmem_compact; + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; +@@ -3176,8 +3206,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2) + + static void virt_machine_7_1_options(MachineClass *mc) + { ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ + virt_machine_7_2_options(mc); + compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len); ++ /* Compact layout for high memory regions was introduced with 7.2 */ ++ vmc->no_highmem_compact = true; + } + DEFINE_VIRT_MACHINE(7, 1) + +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 15bd291311..85e7d61868 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -125,6 +125,7 @@ struct VirtMachineClass { + bool no_pmu; + bool claim_edge_triggered_timers; + bool smbios_old_sys_ver; ++ bool no_highmem_compact; + bool no_highmem_ecam; + bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */ + bool kvm_no_adjvtime; +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch b/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch new file mode 100644 index 0000000..df691a7 --- /dev/null +++ b/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch @@ -0,0 +1,179 @@ +From 30e86a7c4fbcdc95b74bcb2a15745cb221783091 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 7/8] hw/arm/virt: Add properties to disable high memory + regions + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/8] 16f8762393b447a590b31c9e4d8d3c58c6bc9fa8 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +The 3 high memory regions are usually enabled by default, but they may +be not used. For example, VIRT_HIGH_GIC_REDIST2 isn't needed by GICv2. +This leads to waste in the PA space. + +Add properties ("highmem-redists", "highmem-ecam", "highmem-mmio") to +allow users selectively disable them if needed. After that, the high +memory region for GICv3 or GICv4 redistributor can be disabled by user, +the number of maximal supported CPUs needs to be calculated based on +'vms->highmem_redists'. The follow-up error message is also improved +to indicate if the high memory region for GICv3 and GICv4 has been +enabled or not. + +Suggested-by: Marc Zyngier +Signed-off-by: Gavin Shan +Reviewed-by: Marc Zyngier +Reviewed-by: Cornelia Huck +Reviewed-by: Eric Auger +Message-id: 20221029224307.138822-8-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 6a48c64eec355ab1aff694eb4522d07a8e461368) +Signed-off-by: Gavin Shan +Conflicts: + hw/arm/virt.c + Comment out the handlers of the property 'highmem-redists', + 'highmem-ecam' and 'highmem-mmio' since they aren't exposed. +--- + docs/system/arm/virt.rst | 13 +++++++ + hw/arm/virt.c | 75 ++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 86 insertions(+), 2 deletions(-) + +diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst +index 4454706392..188a4f211f 100644 +--- a/docs/system/arm/virt.rst ++++ b/docs/system/arm/virt.rst +@@ -98,6 +98,19 @@ compact-highmem + Set ``on``/``off`` to enable/disable the compact layout for high memory regions. + The default is ``on`` for machine types later than ``virt-7.2``. + ++highmem-redists ++ Set ``on``/``off`` to enable/disable the high memory region for GICv3 or ++ GICv4 redistributor. The default is ``on``. Setting this to ``off`` will ++ limit the maximum number of CPUs when GICv3 or GICv4 is used. ++ ++highmem-ecam ++ Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM. ++ The default is ``on`` for machine types later than ``virt-3.0``. ++ ++highmem-mmio ++ Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO. ++ The default is ``on``. ++ + gic-version + Specify the version of the Generic Interrupt Controller (GIC) to provide. + Valid values are: +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6087511ae9..304fa0d6e7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2142,14 +2142,20 @@ static void machvirt_init(MachineState *machine) + if (vms->gic_version == VIRT_GIC_VERSION_2) { + virt_max_cpus = GIC_NCPU; + } else { +- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) + +- virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); ++ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST); ++ if (vms->highmem_redists) { ++ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); ++ } + } + + if (max_cpus > virt_max_cpus) { + error_report("Number of SMP CPUs requested (%d) exceeds max CPUs " + "supported by machine 'mach-virt' (%d)", + max_cpus, virt_max_cpus); ++ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) { ++ error_printf("Try 'highmem-redists=on' for more CPUs\n"); ++ } ++ + exit(1); + } + +@@ -2420,6 +2426,49 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) + + vms->highmem_compact = value; + } ++ ++static bool virt_get_highmem_redists(Object *obj, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ return vms->highmem_redists; ++} ++ ++static void virt_set_highmem_redists(Object *obj, bool value, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ vms->highmem_redists = value; ++} ++ ++static bool virt_get_highmem_ecam(Object *obj, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ return vms->highmem_ecam; ++} ++ ++static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ vms->highmem_ecam = value; ++} ++ ++static bool virt_get_highmem_mmio(Object *obj, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ return vms->highmem_mmio; ++} ++ ++static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ ++ vms->highmem_mmio = value; ++} ++ + #endif /* disabled for RHEL */ + + static bool virt_get_its(Object *obj, Error **errp) +@@ -3052,6 +3101,28 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable compact " + "layout for high memory regions"); + ++ object_class_property_add_bool(oc, "highmem-redists", ++ virt_get_highmem_redists, ++ virt_set_highmem_redists); ++ object_class_property_set_description(oc, "highmem-redists", ++ "Set on/off to enable/disable high " ++ "memory region for GICv3 or GICv4 " ++ "redistributor"); ++ ++ object_class_property_add_bool(oc, "highmem-ecam", ++ virt_get_highmem_ecam, ++ virt_set_highmem_ecam); ++ object_class_property_set_description(oc, "highmem-ecam", ++ "Set on/off to enable/disable high " ++ "memory region for PCI ECAM"); ++ ++ object_class_property_add_bool(oc, "highmem-mmio", ++ virt_get_highmem_mmio, ++ virt_set_highmem_mmio); ++ object_class_property_set_description(oc, "highmem-mmio", ++ "Set on/off to enable/disable high " ++ "memory region for PCI MMIO"); ++ + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_class_property_set_description(oc, "gic-version", +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch b/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch new file mode 100644 index 0000000..6b20bb8 --- /dev/null +++ b/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch @@ -0,0 +1,51 @@ +From 969ea1ff46b52c5fe6d87f2eeb1625871a2dfb2a Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 8/8] hw/arm/virt: Enable compat high memory region address + assignment for 9.2.0 machine + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/8] beda1791c0c35dce5c669efd47685302b8468032 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 +Upstream: RHEL only + +The compact high memory region address assignment is enabled for 9.2.0, +but it's kept as disabled for 9.0.0, to keep the backwards compatibility +on 9.0.0. Note that these newly added properties ('compact-highmem', +'highmem-redists', 'highmem-ecam', and 'highmem-mmio') in the upstream +aren't exposed for the downstream. + +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 304fa0d6e7..e41c0b462c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3581,6 +3581,7 @@ static void rhel_virt_instance_init(Object *obj) + + /* High memory is enabled by default */ + vms->highmem = true; ++ vms->highmem_compact = !vmc->no_highmem_compact; + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; +@@ -3659,5 +3660,7 @@ static void rhel900_virt_options(MachineClass *mc) + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; ++ /* Compact layout for high memory regions was introduced with 9.2.0 */ ++ vmc->no_highmem_compact = true; + } + DEFINE_RHEL_MACHINE(9, 0, 0) +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch b/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch new file mode 100644 index 0000000..9dcdf61 --- /dev/null +++ b/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch @@ -0,0 +1,112 @@ +From 1c7fad3776a14ca35b24dc2fdb262d4ddf40d6eb Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 5/8] hw/arm/virt: Improve high memory region address + assignment + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/8] 4d77fa78b5258a1bd8d30405cec5ba3311d42f92 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +There are three high memory regions, which are VIRT_HIGH_REDIST2, +VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses +are floating on highest RAM address. However, they can be disabled +in several cases. + +(1) One specific high memory region is likely to be disabled by + code by toggling vms->highmem_{redists, ecam, mmio}. + +(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is + 'virt-2.12' or ealier than it. + +(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded + on 32-bits system. + +(4) One specific high memory region is disabled when it breaks the + PA space limit. + +The current implementation of virt_set_{memmap, high_memmap}() isn't +optimized because the high memory region's PA space is always reserved, +regardless of whatever the actual state in the corresponding +vms->highmem_{redists, ecam, mmio} flag. In the code, 'base' and +'vms->highest_gpa' are always increased for case (1), (2) and (3). +It's unnecessary since the assigned PA space for the disabled high +memory region won't be used afterwards. + +Improve the address assignment for those three high memory region by +skipping the address assignment for one specific high memory region if +it has been disabled in case (1), (2) and (3). The memory layout may +be changed after the improvement is applied, which leads to potential +migration breakage. So 'vms->highmem_compact' is added to control if +the improvement should be applied. For now, 'vms->highmem_compact' is +set to false, meaning that we don't have memory layout change until it +becomes configurable through property 'compact-highmem' in next patch. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-6-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 4a4ff9edc6a8fdc76082af5b41b059217138c09b) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 15 ++++++++++----- + include/hw/arm/virt.h | 1 + + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6e3b9fc060..6896e0ca0f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1768,18 +1768,23 @@ static void virt_set_high_memmap(VirtMachineState *vms, + vms->memmap[i].size = region_size; + + /* +- * Check each device to see if they fit in the PA space, +- * moving highest_gpa as we go. ++ * Check each device to see if it fits in the PA space, ++ * moving highest_gpa as we go. For compatibility, move ++ * highest_gpa for disabled fitting devices as well, if ++ * the compact layout has been disabled. + * + * For each device that doesn't fit, disable it. + */ + fits = (region_base + region_size) <= BIT_ULL(pa_bits); +- if (fits) { +- vms->highest_gpa = region_base + region_size - 1; ++ *region_enabled &= fits; ++ if (vms->highmem_compact && !*region_enabled) { ++ continue; + } + +- *region_enabled &= fits; + base = region_base + region_size; ++ if (fits) { ++ vms->highest_gpa = base - 1; ++ } + } + } + +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 22b54ec510..15bd291311 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -144,6 +144,7 @@ struct VirtMachineState { + PFlashCFI01 *flash[2]; + bool secure; + bool highmem; ++ bool highmem_compact; + bool highmem_ecam; + bool highmem_mmio; + bool highmem_redists; +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch b/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch new file mode 100644 index 0000000..ea9cb1f --- /dev/null +++ b/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch @@ -0,0 +1,82 @@ +From 305a369fd18f29914bf96cc181add532d435d8ed Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 3/8] hw/arm/virt: Introduce variable region_base in + virt_set_high_memmap() + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/8] 15de90df217d680ccc858b679898b3993e1c050a + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +This introduces variable 'region_base' for the base address of the +specific high memory region. It's the preparatory work to optimize +high memory region address assignment. + +No functional change intended. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-4-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit fa245799b9407fc7b561da185b3d889df5e16a88) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ca098d40b8..ddcf7ee2f8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1739,15 +1739,15 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) + static void virt_set_high_memmap(VirtMachineState *vms, + hwaddr base, int pa_bits) + { +- hwaddr region_size; ++ hwaddr region_base, region_size; + bool fits; + int i; + + for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { ++ region_base = ROUND_UP(base, extended_memmap[i].size); + region_size = extended_memmap[i].size; + +- base = ROUND_UP(base, region_size); +- vms->memmap[i].base = base; ++ vms->memmap[i].base = region_base; + vms->memmap[i].size = region_size; + + /* +@@ -1756,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, + * + * For each device that doesn't fit, disable it. + */ +- fits = (base + region_size) <= BIT_ULL(pa_bits); ++ fits = (region_base + region_size) <= BIT_ULL(pa_bits); + if (fits) { +- vms->highest_gpa = base + region_size - 1; ++ vms->highest_gpa = region_base + region_size - 1; + } + + switch (i) { +@@ -1773,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, + break; + } + +- base += region_size; ++ base = region_base + region_size; + } + } + +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch b/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch new file mode 100644 index 0000000..659faeb --- /dev/null +++ b/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch @@ -0,0 +1,95 @@ +From a2ddd68c8365ec602db6b2a9cf83bb441ca701cc Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 4/8] hw/arm/virt: Introduce virt_get_high_memmap_enabled() + helper + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/8] 65524de2fc106600bbaff641caa8c4f2f8027114 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +This introduces virt_get_high_memmap_enabled() helper, which returns +the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will +be used in the subsequent patches. + +No functional change intended. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-5-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit a5cb1350b19a5c2a58ab4edddf609ed429c13085) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 32 +++++++++++++++++++------------- + 1 file changed, 19 insertions(+), 13 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ddcf7ee2f8..6e3b9fc060 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1736,14 +1736,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) + return arm_cpu_mp_affinity(idx, clustersz); + } + ++static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, ++ int index) ++{ ++ bool *enabled_array[] = { ++ &vms->highmem_redists, ++ &vms->highmem_ecam, ++ &vms->highmem_mmio, ++ }; ++ ++ assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST == ++ ARRAY_SIZE(enabled_array)); ++ assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array)); ++ ++ return enabled_array[index - VIRT_LOWMEMMAP_LAST]; ++} ++ + static void virt_set_high_memmap(VirtMachineState *vms, + hwaddr base, int pa_bits) + { + hwaddr region_base, region_size; +- bool fits; ++ bool *region_enabled, fits; + int i; + + for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { ++ region_enabled = virt_get_high_memmap_enabled(vms, i); + region_base = ROUND_UP(base, extended_memmap[i].size); + region_size = extended_memmap[i].size; + +@@ -1761,18 +1778,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, + vms->highest_gpa = region_base + region_size - 1; + } + +- switch (i) { +- case VIRT_HIGH_GIC_REDIST2: +- vms->highmem_redists &= fits; +- break; +- case VIRT_HIGH_PCIE_ECAM: +- vms->highmem_ecam &= fits; +- break; +- case VIRT_HIGH_PCIE_MMIO: +- vms->highmem_mmio &= fits; +- break; +- } +- ++ *region_enabled &= fits; + base = region_base + region_size; + } + } +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch b/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch new file mode 100644 index 0000000..f55c06a --- /dev/null +++ b/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch @@ -0,0 +1,130 @@ +From 5dff87c5ea60054709021025c9513ec259433ce2 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 1/8] hw/arm/virt: Introduce virt_set_high_memmap() helper + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/8] 5f6ba5af7a2c21d8473c58e088ee99b11336c673 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +This introduces virt_set_high_memmap() helper. The logic of high +memory region address assignment is moved to the helper. The intention +is to make the subsequent optimization for high memory region address +assignment easier. + +No functional change intended. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-2-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 4af6b6edece5ef273d29972d53547f823d2bc1c0) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 74 ++++++++++++++++++++++++++++----------------------- + 1 file changed, 41 insertions(+), 33 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bf18838b87..bea5f54720 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1736,6 +1736,46 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) + return arm_cpu_mp_affinity(idx, clustersz); + } + ++static void virt_set_high_memmap(VirtMachineState *vms, ++ hwaddr base, int pa_bits) ++{ ++ int i; ++ ++ for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { ++ hwaddr size = extended_memmap[i].size; ++ bool fits; ++ ++ base = ROUND_UP(base, size); ++ vms->memmap[i].base = base; ++ vms->memmap[i].size = size; ++ ++ /* ++ * Check each device to see if they fit in the PA space, ++ * moving highest_gpa as we go. ++ * ++ * For each device that doesn't fit, disable it. ++ */ ++ fits = (base + size) <= BIT_ULL(pa_bits); ++ if (fits) { ++ vms->highest_gpa = base + size - 1; ++ } ++ ++ switch (i) { ++ case VIRT_HIGH_GIC_REDIST2: ++ vms->highmem_redists &= fits; ++ break; ++ case VIRT_HIGH_PCIE_ECAM: ++ vms->highmem_ecam &= fits; ++ break; ++ case VIRT_HIGH_PCIE_MMIO: ++ vms->highmem_mmio &= fits; ++ break; ++ } ++ ++ base += size; ++ } ++} ++ + static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + { + MachineState *ms = MACHINE(vms); +@@ -1791,39 +1831,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) + /* We know for sure that at least the memory fits in the PA space */ + vms->highest_gpa = memtop - 1; + +- for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { +- hwaddr size = extended_memmap[i].size; +- bool fits; +- +- base = ROUND_UP(base, size); +- vms->memmap[i].base = base; +- vms->memmap[i].size = size; +- +- /* +- * Check each device to see if they fit in the PA space, +- * moving highest_gpa as we go. +- * +- * For each device that doesn't fit, disable it. +- */ +- fits = (base + size) <= BIT_ULL(pa_bits); +- if (fits) { +- vms->highest_gpa = base + size - 1; +- } +- +- switch (i) { +- case VIRT_HIGH_GIC_REDIST2: +- vms->highmem_redists &= fits; +- break; +- case VIRT_HIGH_PCIE_ECAM: +- vms->highmem_ecam &= fits; +- break; +- case VIRT_HIGH_PCIE_MMIO: +- vms->highmem_mmio &= fits; +- break; +- } +- +- base += size; +- } ++ virt_set_high_memmap(vms, base, pa_bits); + + if (device_memory_size > 0) { + ms->device_memory = g_malloc0(sizeof(*ms->device_memory)); +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch b/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch new file mode 100644 index 0000000..27bc6bb --- /dev/null +++ b/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch @@ -0,0 +1,83 @@ +From bd5b7edbf8f4425f4b4e0d49a00cbdd48d9c6f48 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 21 Dec 2022 08:48:45 +0800 +Subject: [PATCH 2/8] hw/arm/virt: Rename variable size to region_size in + virt_set_high_memmap() + +RH-Author: Gavin Shan +RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment +RH-Bugzilla: 2113840 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/8] 1cadf1b00686cceb45821a58fdcb509bc5da335d + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 + +This renames variable 'size' to 'region_size' in virt_set_high_memmap(). +Its counterpart ('region_base') will be introduced in next patch. + +No functional change intended. + +Signed-off-by: Gavin Shan +Reviewed-by: Eric Auger +Reviewed-by: Cornelia Huck +Reviewed-by: Marc Zyngier +Tested-by: Zhenyu Zhang +Message-id: 20221029224307.138822-3-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 370bea9d1c78796eec235ed6cb4310f489931a62) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bea5f54720..ca098d40b8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1739,15 +1739,16 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) + static void virt_set_high_memmap(VirtMachineState *vms, + hwaddr base, int pa_bits) + { ++ hwaddr region_size; ++ bool fits; + int i; + + for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { +- hwaddr size = extended_memmap[i].size; +- bool fits; ++ region_size = extended_memmap[i].size; + +- base = ROUND_UP(base, size); ++ base = ROUND_UP(base, region_size); + vms->memmap[i].base = base; +- vms->memmap[i].size = size; ++ vms->memmap[i].size = region_size; + + /* + * Check each device to see if they fit in the PA space, +@@ -1755,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, + * + * For each device that doesn't fit, disable it. + */ +- fits = (base + size) <= BIT_ULL(pa_bits); ++ fits = (base + region_size) <= BIT_ULL(pa_bits); + if (fits) { +- vms->highest_gpa = base + size - 1; ++ vms->highest_gpa = base + region_size - 1; + } + + switch (i) { +@@ -1772,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, + break; + } + +- base += size; ++ base += region_size; + } + } + +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 1e052fb..49a09a7 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 2%{?rcrel}%{?dist}%{?cc_suffix} +Release: 3%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -192,6 +192,22 @@ Patch0020: 0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch Patch0021: 0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch Patch0022: 0022-x86-rhel-9.2.0-machine-type.patch Patch23: kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch24: kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch25: kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch26: kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch27: kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch28: kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch29: kvm-hw-arm-virt-Add-compact-highmem-property.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch30: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch +# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine +Patch31: kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch %if %{have_clang} BuildRequires: clang @@ -1220,6 +1236,18 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Wed Jan 04 2023 Miroslav Rezanina - 7.2.0-3 +- kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch [bz#2113840] +- kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch [bz#2113840] +- kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch [bz#2113840] +- kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch [bz#2113840] +- kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch [bz#2113840] +- kvm-hw-arm-virt-Add-compact-highmem-property.patch [bz#2113840] +- kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch [bz#2113840] +- kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch [bz#2113840] +- Resolves: bz#2113840 + ([RHEL9.2] Memory mapping optimization for virt machine) + * Tue Dec 20 2022 Miroslav Rezanina - 7.2.0-2 - Fix updating from 7.1.0 - kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch[bz#2154640] From 408bed44fe02b53549e514966ba19cacc9873d43 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 12 Jan 2023 09:06:23 -0500 Subject: [PATCH 178/195] * Thu Jan 12 2023 Miroslav Rezanina - 7.2.0-4 - kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch [bz#2155749] - kvm-Update-QGA-service-for-new-command-line.patch [bz#2156515] - Resolves: bz#2155749 ([regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X) - Resolves: bz#2156515 ([guest-agent] Replace '-blacklist' to '-block-rpcs' in qemu-ga config file) --- ...pci-fix-migration-compat-for-vectors.patch | 53 +++++++++++++++++++ qemu-ga.sysconfig | 8 +-- qemu-guest-agent.service | 2 +- qemu-kvm.spec | 12 ++++- 4 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch diff --git a/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch b/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch new file mode 100644 index 0000000..0555a68 --- /dev/null +++ b/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch @@ -0,0 +1,53 @@ +From 35ffe28a91a2ef08dd181d1a22695050ccbb6995 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 9 Jan 2023 16:04:43 +0000 +Subject: [PATCH 1/2] virtio-rng-pci: fix migration compat for vectors + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 131: virtio-rng-pci: fix migration compat for vectors +RH-Bugzilla: 2155749 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Thomas Huth +RH-Commit: [1/1] 1a866491dd191b073d71ae1aa5f4d76ee885de6d (dagrh/c-9-s-qemu-kvm) + +Fixup the migration compatibility for existing machine types +so that they do not enable msi-x. + +Symptom: + +(qemu) qemu: get_pci_config_device: Bad config data: i=0x34 read: 84 device: 98 cmask: ff wmask: 0 w1cmask:0 +qemu: Failed to load PCIDevice:config +qemu: Failed to load virtio-rng:virtio +qemu: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-rng' +qemu: load of migration failed: Invalid argument + +Note: This fix will break migration from 7.2->7.2-fixed with this patch + +bz: https://bugzilla.redhat.com/show_bug.cgi?id=2155749 +Fixes: 9ea02e8f1 ("virtio-rng-pci: Allow setting nvectors, so we can use MSI-X") + +This downstream fix is the equivalent of an upstream fix I've posted to +the 7.2 machine type compatibility. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 3d851d34da..7adbac6f87 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -56,6 +56,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { + { "nvme-ns", "eui64-default", "on"}, + /* hw_compat_rhel_9_1 from hw_compat_7_1 */ + { "virtio-device", "queue_reset", "false" }, ++ /* hw_compat_rhel_9_1 bz 2155749 */ ++ { "virtio-rng-pci", "vectors", "0" }, + }; + const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); + +-- +2.31.1 + diff --git a/qemu-ga.sysconfig b/qemu-ga.sysconfig index 67bad0c..a78b428 100644 --- a/qemu-ga.sysconfig +++ b/qemu-ga.sysconfig @@ -1,11 +1,11 @@ # This is a systemd environment file, not a shell script. # It provides settings for "/lib/systemd/system/qemu-guest-agent.service". -# Comma-separated blacklist of RPCs to disable, or empty list to enable all. +# Comma-separated blocked RPCs to disable, or empty list to enable all. # -# You can get the list of RPC commands using "qemu-ga --blacklist='?'". -# There should be no spaces between commas and commands in the blacklist. -BLACKLIST_RPC=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status +# You can get the list of RPC commands using "qemu-ga --block-rpcs='?'". +# There should be no spaces between commas and commands in the block list. +BLOCK_RPCS=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status # Fsfreeze hook script specification. # diff --git a/qemu-guest-agent.service b/qemu-guest-agent.service index b3157d5..244da02 100644 --- a/qemu-guest-agent.service +++ b/qemu-guest-agent.service @@ -10,7 +10,7 @@ EnvironmentFile=/etc/sysconfig/qemu-ga ExecStart=/usr/bin/qemu-ga \ --method=virtio-serial \ --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ - --blacklist=${BLACKLIST_RPC} \ + --block-rpcs=${BLOCK_RPCS} \ -F${FSFREEZE_HOOK_PATHNAME} Restart=always RestartSec=0 diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 49a09a7..e143966 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 3%{?rcrel}%{?dist}%{?cc_suffix} +Release: 4%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -208,6 +208,8 @@ Patch29: kvm-hw-arm-virt-Add-compact-highmem-property.patch Patch30: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch # For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine Patch31: kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch +# For bz#2155749 - [regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X +Patch32: kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch %if %{have_clang} BuildRequires: clang @@ -1236,6 +1238,14 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Jan 12 2023 Miroslav Rezanina - 7.2.0-4 +- kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch [bz#2155749] +- kvm-Update-QGA-service-for-new-command-line.patch [bz#2156515] +- Resolves: bz#2155749 + ([regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X) +- Resolves: bz#2156515 + ([guest-agent] Replace '-blacklist' to '-block-rpcs' in qemu-ga config file) + * Wed Jan 04 2023 Miroslav Rezanina - 7.2.0-3 - kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch [bz#2113840] - kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch [bz#2113840] From 2fe1fc7b2dc5d24847dc85ab149adf5f5f12fde1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 17 Jan 2023 07:06:28 -0500 Subject: [PATCH 179/195] * Tue Jan 17 2023 Miroslav Rezanina - 7.2.0-5 - kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch [bz#1905805] - kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch [bz#1905805] - kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch [bz#1905805] - kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch [bz#1905805] - kvm-vhost-vdpa-add-support-for-config-interrupt.patch [bz#1905805] - kvm-virtio-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-vhost-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-virtio-net-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-virtio-mmio-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-virtio-pci-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch [bz#2159408] - kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch [bz#2124856] - kvm-block-drop-bdrv_remove_filter_or_cow_child.patch [bz#2155112] - kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch [bz#2155112] - kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch [bz#2155112] - kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch [bz#2155112] - kvm-block-Remove-drained_end_counter.patch [bz#2155112] - kvm-block-Inline-bdrv_drain_invoke.patch [bz#2155112] - kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch [bz#2155112] - kvm-block-Drain-individual-nodes-during-reopen.patch [bz#2155112] - kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch [bz#2155112] - kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch [bz#2155112] - kvm-block-Remove-subtree-drains.patch [bz#2155112] - kvm-block-Call-drain-callbacks-only-once.patch [bz#2155112] - kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch [bz#2155112] - kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch [bz#2155112] - kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch [bz#2155112] - kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch [bz#2155112] - kvm-accel-introduce-accelerator-blocker-API.patch [bz#1979276] - kvm-KVM-keep-track-of-running-ioctls.patch [bz#1979276] - kvm-kvm-Atomic-memslot-updates.patch [bz#1979276] - Resolves: bz#1905805 (support config interrupt in vhost-vdpa qemu) - Resolves: bz#2159408 ([s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8) - Resolves: bz#2124856 (VM with virtio interface and iommu=on will crash when try to migrate) - Resolves: bz#2155112 (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)) - Resolves: bz#1979276 (SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on) --- kvm-KVM-keep-track-of-running-ioctls.patch | 82 ++ ...el-introduce-accelerator-blocker-API.patch | 348 +++++++ ...block-Call-drain-callbacks-only-once.patch | 250 +++++ ...-t-poll-in-bdrv_replace_child_noperm.patch | 298 ++++++ ...subtree-drains-in-bdrv_drop_intermed.patch | 54 ++ ...Drain-individual-nodes-during-reopen.patch | 157 +++ ...f-coroutine-in-bdrv_do_drained_begin.patch | 96 ++ ...-locking-for-bdrv_reopen_queue_child.patch | 67 ++ kvm-block-Inline-bdrv_drain_invoke.patch | 81 ++ kvm-block-Remove-drained_end_counter.patch | 433 +++++++++ ...ore_bds_parents-parameter-from-drain.patch | 274 ++++++ ...l-parameter-from-bdrv_parent_drained.patch | 106 +++ kvm-block-Remove-subtree-drains.patch | 896 ++++++++++++++++++ ...rv_drained_begin-end-to-non-coroutin.patch | 302 ++++++ ...drop-bdrv_remove_filter_or_cow_child.patch | 70 ++ kvm-kvm-Atomic-memslot-updates.patch | 286 ++++++ ...n-t-yield-in-bdrv_qed_co_drain_begin.patch | 84 ++ ...o-ccw-Activate-zPCI-features-on-s390.patch | 70 ++ ...ubtree-drain-with-a-single-node-drai.patch | 159 ++++ ...Don-t-yield-in-.bdrv_co_drained_begi.patch | 153 +++ ...-add-support-for-configure-interrupt.patch | 185 ++++ ...ty-bitmap-syncing-when-vIOMMU-is-ena.patch | 157 +++ ...e-new-VhostOps-vhost_set_config_call.patch | 56 ++ ...dpa-add-support-for-config-interrupt.patch | 73 ++ ...-add-support-for-configure-interrupt.patch | 115 +++ ...ntroduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch | 262 +++++ ...-add-support-for-configure-interrupt.patch | 80 ++ ...-add-support-for-configure-interrupt.patch | 115 +++ ...-add-support-for-configure-interrupt.patch | 274 ++++++ ...uple-notifier-from-interrupt-process.patch | 272 ++++++ ...ple-the-single-vector-from-the-inter.patch | 212 +++++ qemu-kvm.spec | 107 ++- 32 files changed, 6173 insertions(+), 1 deletion(-) create mode 100644 kvm-KVM-keep-track-of-running-ioctls.patch create mode 100644 kvm-accel-introduce-accelerator-blocker-API.patch create mode 100644 kvm-block-Call-drain-callbacks-only-once.patch create mode 100644 kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch create mode 100644 kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch create mode 100644 kvm-block-Drain-individual-nodes-during-reopen.patch create mode 100644 kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch create mode 100644 kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch create mode 100644 kvm-block-Inline-bdrv_drain_invoke.patch create mode 100644 kvm-block-Remove-drained_end_counter.patch create mode 100644 kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch create mode 100644 kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch create mode 100644 kvm-block-Remove-subtree-drains.patch create mode 100644 kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch create mode 100644 kvm-block-drop-bdrv_remove_filter_or_cow_child.patch create mode 100644 kvm-kvm-Atomic-memslot-updates.patch create mode 100644 kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch create mode 100644 kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch create mode 100644 kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch create mode 100644 kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch create mode 100644 kvm-vhost-add-support-for-configure-interrupt.patch create mode 100644 kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch create mode 100644 kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch create mode 100644 kvm-vhost-vdpa-add-support-for-config-interrupt.patch create mode 100644 kvm-virtio-add-support-for-configure-interrupt.patch create mode 100644 kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch create mode 100644 kvm-virtio-mmio-add-support-for-configure-interrupt.patch create mode 100644 kvm-virtio-net-add-support-for-configure-interrupt.patch create mode 100644 kvm-virtio-pci-add-support-for-configure-interrupt.patch create mode 100644 kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch create mode 100644 kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch diff --git a/kvm-KVM-keep-track-of-running-ioctls.patch b/kvm-KVM-keep-track-of-running-ioctls.patch new file mode 100644 index 0000000..b7aba7e --- /dev/null +++ b/kvm-KVM-keep-track-of-running-ioctls.patch @@ -0,0 +1,82 @@ +From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:17:23 -0500 +Subject: [PATCH 30/31] KVM: keep track of running ioctls + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 138: accel: introduce accelerator blocker API +RH-Bugzilla: 1979276 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 + +commit a27dd2de68f37ba96fe164a42121daa5f0750afc +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:57 2022 -0500 + + KVM: keep track of running ioctls + + Using the new accel-blocker API, mark where ioctls are being called + in KVM. Next, we will implement the critical section that will take + care of performing memslots modifications atomically, therefore + preventing any new ioctl from running and allowing the running ones + to finish. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-3-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index f99b0becd8..ff660fd469 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms) + assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size()); + + s->sigmask_len = 8; ++ accel_blocker_init(); + + #ifdef KVM_CAP_SET_GUEST_DEBUG + QTAILQ_INIT(&s->kvm_sw_breakpoints); +@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) + va_end(ap); + + trace_kvm_vm_ioctl(type, arg); ++ accel_ioctl_begin(); + ret = ioctl(s->vmfd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) + va_end(ap); + + trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg); ++ accel_cpu_ioctl_begin(cpu); + ret = ioctl(cpu->kvm_fd, type, arg); ++ accel_cpu_ioctl_end(cpu); + if (ret == -1) { + ret = -errno; + } +@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...) + va_end(ap); + + trace_kvm_device_ioctl(fd, type, arg); ++ accel_ioctl_begin(); + ret = ioctl(fd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +-- +2.31.1 + diff --git a/kvm-accel-introduce-accelerator-blocker-API.patch b/kvm-accel-introduce-accelerator-blocker-API.patch new file mode 100644 index 0000000..29a8ac5 --- /dev/null +++ b/kvm-accel-introduce-accelerator-blocker-API.patch @@ -0,0 +1,348 @@ +From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:16:41 -0500 +Subject: [PATCH 29/31] accel: introduce accelerator blocker API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 138: accel: introduce accelerator blocker API +RH-Bugzilla: 1979276 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 + +commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1 +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:56 2022 -0500 + + accel: introduce accelerator blocker API + + This API allows the accelerators to prevent vcpus from issuing + new ioctls while execting a critical section marked with the + accel_ioctl_inhibit_begin/end functions. + + Note that all functions submitting ioctls must mark where the + ioctl is being called with accel_{cpu_}ioctl_begin/end(). + + This API requires the caller to always hold the BQL. + API documentation is in sysemu/accel-blocker.h + + Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt + (to minimize cache line bouncing) to keep avoid that new ioctls + run when the critical section starts, and a QemuEvent to wait + that all running ioctls finish. + + Signed-off-by: Emanuele Giuseppe Esposito + Reviewed-by: Philippe Mathieu-Daudé + Message-Id: <20221111154758.1372674-2-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Conflicts: + util/meson.build: "interval-tree.c" does not exist + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++ + accel/meson.build | 2 +- + hw/core/cpu-common.c | 2 + + include/hw/core/cpu.h | 3 + + include/sysemu/accel-blocker.h | 56 ++++++++++++ + util/meson.build | 2 +- + 6 files changed, 217 insertions(+), 2 deletions(-) + create mode 100644 accel/accel-blocker.c + create mode 100644 include/sysemu/accel-blocker.h + +diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c +new file mode 100644 +index 0000000000..1e7f423462 +--- /dev/null ++++ b/accel/accel-blocker.c +@@ -0,0 +1,154 @@ ++/* ++ * Lock to inhibit accelerator ioctls ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/thread.h" ++#include "qemu/main-loop.h" ++#include "hw/core/cpu.h" ++#include "sysemu/accel-blocker.h" ++ ++static QemuLockCnt accel_in_ioctl_lock; ++static QemuEvent accel_in_ioctl_event; ++ ++void accel_blocker_init(void) ++{ ++ qemu_lockcnt_init(&accel_in_ioctl_lock); ++ qemu_event_init(&accel_in_ioctl_event, false); ++} ++ ++void accel_ioctl_begin(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_end(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&accel_in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++void accel_cpu_ioctl_begin(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&cpu->in_ioctl_lock); ++} ++ ++void accel_cpu_ioctl_end(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&cpu->in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++static bool accel_has_to_wait(void) ++{ ++ CPUState *cpu; ++ bool needs_to_wait = false; ++ ++ CPU_FOREACH(cpu) { ++ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) { ++ /* exit the ioctl, if vcpu is running it */ ++ qemu_cpu_kick(cpu); ++ needs_to_wait = true; ++ } ++ } ++ ++ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_inhibit_begin(void) ++{ ++ CPUState *cpu; ++ ++ /* ++ * We allow to inhibit only when holding the BQL, so we can identify ++ * when an inhibitor wants to issue an ioctl easily. ++ */ ++ g_assert(qemu_mutex_iothread_locked()); ++ ++ /* Block further invocations of the ioctls outside the BQL. */ ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_lock(&cpu->in_ioctl_lock); ++ } ++ qemu_lockcnt_lock(&accel_in_ioctl_lock); ++ ++ /* Keep waiting until there are running ioctls */ ++ while (true) { ++ ++ /* Reset event to FREE. */ ++ qemu_event_reset(&accel_in_ioctl_event); ++ ++ if (accel_has_to_wait()) { ++ /* ++ * If event is still FREE, and there are ioctls still in progress, ++ * wait. ++ * ++ * If an ioctl finishes before qemu_event_wait(), it will change ++ * the event state to SET. This will prevent qemu_event_wait() from ++ * blocking, but it's not a problem because if other ioctls are ++ * still running the loop will iterate once more and reset the event ++ * status to FREE so that it can wait properly. ++ * ++ * If an ioctls finishes while qemu_event_wait() is blocking, then ++ * it will be waken up, but also here the while loop makes sure ++ * to re-enter the wait if there are other running ioctls. ++ */ ++ qemu_event_wait(&accel_in_ioctl_event); ++ } else { ++ /* No ioctl is running */ ++ return; ++ } ++ } ++} ++ ++void accel_ioctl_inhibit_end(void) ++{ ++ CPUState *cpu; ++ ++ qemu_lockcnt_unlock(&accel_in_ioctl_lock); ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_unlock(&cpu->in_ioctl_lock); ++ } ++} ++ +diff --git a/accel/meson.build b/accel/meson.build +index 259c35c4c8..061332610f 100644 +--- a/accel/meson.build ++++ b/accel/meson.build +@@ -1,4 +1,4 @@ +-specific_ss.add(files('accel-common.c')) ++specific_ss.add(files('accel-common.c', 'accel-blocker.c')) + softmmu_ss.add(files('accel-softmmu.c')) + user_ss.add(files('accel-user.c')) + +diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c +index f9fdd46b9d..8d6a4b1b65 100644 +--- a/hw/core/cpu-common.c ++++ b/hw/core/cpu-common.c +@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj) + cpu->nr_threads = 1; + + qemu_mutex_init(&cpu->work_mutex); ++ qemu_lockcnt_init(&cpu->in_ioctl_lock); + QSIMPLEQ_INIT(&cpu->work_list); + QTAILQ_INIT(&cpu->breakpoints); + QTAILQ_INIT(&cpu->watchpoints); +@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj) + { + CPUState *cpu = CPU(obj); + ++ qemu_lockcnt_destroy(&cpu->in_ioctl_lock); + qemu_mutex_destroy(&cpu->work_mutex); + } + +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 8830546121..2417597236 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -398,6 +398,9 @@ struct CPUState { + uint32_t kvm_fetch_index; + uint64_t dirty_pages; + ++ /* Use by accel-block: CPU is executing an ioctl() */ ++ QemuLockCnt in_ioctl_lock; ++ + /* Used for events with 'vcpu' and *without* the 'disabled' properties */ + DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); + DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); +diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h +new file mode 100644 +index 0000000000..72020529ef +--- /dev/null ++++ b/include/sysemu/accel-blocker.h +@@ -0,0 +1,56 @@ ++/* ++ * Accelerator blocking API, to prevent new ioctls from starting and wait the ++ * running ones finish. ++ * This mechanism differs from pause/resume_all_vcpus() in that it does not ++ * release the BQL. ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef ACCEL_BLOCKER_H ++#define ACCEL_BLOCKER_H ++ ++#include "qemu/osdep.h" ++#include "sysemu/cpus.h" ++ ++extern void accel_blocker_init(void); ++ ++/* ++ * accel_{cpu_}ioctl_begin/end: ++ * Mark when ioctl is about to run or just finished. ++ * ++ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is ++ * called, preventing new ioctls to run. They will continue only after ++ * accel_ioctl_inibith_end(). ++ */ ++extern void accel_ioctl_begin(void); ++extern void accel_ioctl_end(void); ++extern void accel_cpu_ioctl_begin(CPUState *cpu); ++extern void accel_cpu_ioctl_end(CPUState *cpu); ++ ++/* ++ * accel_ioctl_inhibit_begin: start critical section ++ * ++ * This function makes sure that: ++ * 1) incoming accel_{cpu_}ioctl_begin() calls block ++ * 2) wait that all ioctls that were already running reach ++ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary. ++ * ++ * This allows the caller to access shared data or perform operations without ++ * worrying of concurrent vcpus accesses. ++ */ ++extern void accel_ioctl_inhibit_begin(void); ++ ++/* ++ * accel_ioctl_inhibit_end: end critical section started by ++ * accel_ioctl_inhibit_begin() ++ * ++ * This function allows blocked accel_{cpu_}ioctl_begin() to continue. ++ */ ++extern void accel_ioctl_inhibit_end(void); ++ ++#endif /* ACCEL_BLOCKER_H */ +diff --git a/util/meson.build b/util/meson.build +index 25b9b61f98..85a5504c4d 100644 +--- a/util/meson.build ++++ b/util/meson.build +@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c')) + util_ss.add(files('yank.c')) + util_ss.add(files('int128.c')) + util_ss.add(files('memalign.c')) ++util_ss.add(files('lockcnt.c')) + + if have_user + util_ss.add(files('selfmap.c')) +@@ -71,7 +72,6 @@ endif + if have_block or have_ga + util_ss.add(files('aiocb.c', 'async.c')) + util_ss.add(files('base64.c')) +- util_ss.add(files('lockcnt.c')) + util_ss.add(files('main-loop.c')) + util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c')) + util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND']))) +-- +2.31.1 + diff --git a/kvm-block-Call-drain-callbacks-only-once.patch b/kvm-block-Call-drain-callbacks-only-once.patch new file mode 100644 index 0000000..04f1dda --- /dev/null +++ b/kvm-block-Call-drain-callbacks-only-once.patch @@ -0,0 +1,250 @@ +From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:06 +0100 +Subject: [PATCH 24/31] block: Call drain callbacks only once + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s) + +We only need to call both the BlockDriver's callback and the parent +callbacks when going from undrained to drained or vice versa. A second +drain section doesn't make a difference for the driver or the parent, +they weren't supposed to send new requests before and after the second +drain. + +One thing that gets in the way is the 'ignore_bds_parents' parameter in +bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that +bdrv_drain_all_begin() increases bs->quiesce_counter, but does not +quiesce the parent through BdrvChildClass callbacks. If an additional +drain section is started now, bs->quiesce_counter will be non-zero, but +we would still need to quiesce the parent through BdrvChildClass in +order to keep things consistent (and unquiesce it on the matching +bdrv_drained_end(), even though the counter would not reach 0 yet as +long as the bdrv_drain_all() section is still active). + +Instead of keeping track of this, let's just get rid of the parameter. +It was introduced in commit 6cd5c9d7b2d as an optimisation so that +during bdrv_drain_all(), we wouldn't recursively drain all parents up to +the root for each node, resulting in quadratic complexity. As it happens, +calling the callbacks only once solves the same problem, so as of this +patch, we'll still have O(n) complexity and ignore_bds_parents is not +needed any more. + +This patch only ignores the 'ignore_bds_parents' parameter. It will be +removed in a separate patch. + +Signed-off-by: Kevin Wolf +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-12-kwolf@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1) +Signed-off-by: Stefano Garzarella +--- + block.c | 25 +++++++------------------ + block/io.c | 30 ++++++++++++++++++------------ + include/block/block_int-common.h | 8 ++++---- + tests/unit/test-bdrv-drain.c | 16 ++++++++++------ + 4 files changed, 39 insertions(+), 40 deletions(-) + +diff --git a/block.c b/block.c +index e0e3b21790..5a583e260d 100644 +--- a/block.c ++++ b/block.c +@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + { + BlockDriverState *old_bs = child->bs; + int new_bs_quiesce_counter; +- int drain_saldo; + + assert(!child->frozen); + assert(old_bs != new_bs); +@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); + } + +- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); +- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; +- + /* + * If the new child node is drained but the old one was not, flush + * all outstanding requests to the old child node. + */ +- while (drain_saldo > 0 && child->klass->drained_begin) { ++ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); ++ if (new_bs_quiesce_counter && !child->quiesced_parent) { + bdrv_parent_drained_begin_single(child, true); +- drain_saldo--; + } + + if (old_bs) { +@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + if (new_bs) { + assert_bdrv_graph_writable(new_bs); + QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); +- +- /* +- * Polling in bdrv_parent_drained_begin_single() may have led to the new +- * node's quiesce_counter having been decreased. Not a problem, we just +- * need to recognize this here and then invoke drained_end appropriately +- * more often. +- */ +- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); +- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; +- + if (child->klass->attach) { + child->klass->attach(child); + } +@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + /* + * If the old child node was drained but the new one is not, allow + * requests to come in only after the new node has been attached. ++ * ++ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() ++ * polls, which could have changed the value. + */ +- while (drain_saldo < 0 && child->klass->drained_end) { ++ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); ++ if (!new_bs_quiesce_counter && child->quiesced_parent) { + bdrv_parent_drained_end_single(child); +- drain_saldo++; + } + } + +diff --git a/block/io.c b/block/io.c +index 75224480d0..87d6f22ec4 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c) + { + IO_OR_GS_CODE(); + +- assert(c->parent_quiesce_counter > 0); +- c->parent_quiesce_counter--; ++ assert(c->quiesced_parent); ++ c->quiesced_parent = false; ++ + if (c->klass->drained_end) { + c->klass->drained_end(c); + } +@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) + { + AioContext *ctx = bdrv_child_get_parent_aio_context(c); + IO_OR_GS_CODE(); +- c->parent_quiesce_counter++; ++ ++ assert(!c->quiesced_parent); ++ c->quiesced_parent = true; ++ + if (c->klass->drained_begin) { + c->klass->drained_begin(c); + } +@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + aio_disable_external(bdrv_get_aio_context(bs)); +- } + +- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); +- if (bs->drv && bs->drv->bdrv_drain_begin) { +- bs->drv->bdrv_drain_begin(bs); ++ /* TODO Remove ignore_bds_parents, we don't consider it any more */ ++ bdrv_parent_drained_begin(bs, parent, false); ++ if (bs->drv && bs->drv->bdrv_drain_begin) { ++ bs->drv->bdrv_drain_begin(bs); ++ } + } + } + +@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ +- if (bs->drv && bs->drv->bdrv_drain_end) { +- bs->drv->bdrv_drain_end(bs); +- } +- bdrv_parent_drained_end(bs, parent, ignore_bds_parents); +- + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); + if (old_quiesce_counter == 1) { ++ if (bs->drv && bs->drv->bdrv_drain_end) { ++ bs->drv->bdrv_drain_end(bs); ++ } ++ /* TODO Remove ignore_bds_parents, we don't consider it any more */ ++ bdrv_parent_drained_end(bs, parent, false); ++ + aio_enable_external(bdrv_get_aio_context(bs)); + } + } +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 791dddfd7d..a6bc6b7fe9 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -980,13 +980,13 @@ struct BdrvChild { + bool frozen; + + /* +- * How many times the parent of this child has been drained ++ * True if the parent of this child has been drained by this BdrvChild + * (through klass->drained_*). +- * Usually, this is equal to bs->quiesce_counter (potentially +- * reduced by bdrv_drain_all_count). It may differ while the ++ * ++ * It is generally true if bs->quiesce_counter > 0. It may differ while the + * child is entering or leaving a drained section. + */ +- int parent_quiesce_counter; ++ bool quiesced_parent; + + QLIST_ENTRY(BdrvChild) next; + QLIST_ENTRY(BdrvChild) next_parent; +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index dda08de8db..172bc6debc 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) + + do_drain_begin(drain_type, bs); + +- g_assert_cmpint(bs->quiesce_counter, ==, 1); ++ if (drain_type == BDRV_DRAIN_ALL) { ++ g_assert_cmpint(bs->quiesce_counter, ==, 2); ++ } else { ++ g_assert_cmpint(bs->quiesce_counter, ==, 1); ++ } + g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); + + do_drain_end(drain_type, bs); +@@ -348,8 +352,8 @@ static void test_nested(void) + + for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { + for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { +- int backing_quiesce = (outer != BDRV_DRAIN) + +- (inner != BDRV_DRAIN); ++ int backing_quiesce = (outer == BDRV_DRAIN_ALL) + ++ (inner == BDRV_DRAIN_ALL); + + g_assert_cmpint(bs->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); +@@ -359,10 +363,10 @@ static void test_nested(void) + do_drain_begin(outer, bs); + do_drain_begin(inner, bs); + +- g_assert_cmpint(bs->quiesce_counter, ==, 2); ++ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce); + g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); +- g_assert_cmpint(s->drain_count, ==, 2); +- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); ++ g_assert_cmpint(s->drain_count, ==, 1); ++ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce); + + do_drain_end(inner, bs); + do_drain_end(outer, bs); +-- +2.31.1 + diff --git a/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch b/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch new file mode 100644 index 0000000..80018cc --- /dev/null +++ b/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch @@ -0,0 +1,298 @@ +From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:09 +0100 +Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s) + +In order to make sure that bdrv_replace_child_noperm() doesn't have to +poll any more, get rid of the bdrv_parent_drained_begin_single() call. + +This is possible now because we can require that the parent is already +drained through the child in question when the function is called and we +don't call the parent drain callbacks more than once. + +The additional drain calls needed in callers cause the test case to run +its code in the drain handler too early (bdrv_attach_child() drains +now), so modify it to only enable the code after the test setup has +completed. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-15-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4) +Signed-off-by: Stefano Garzarella +--- + block.c | 103 ++++++++++++++++++++++++++++++----- + block/io.c | 2 +- + include/block/block-io.h | 8 +++ + tests/unit/test-bdrv-drain.c | 10 ++++ + 4 files changed, 108 insertions(+), 15 deletions(-) + +diff --git a/block.c b/block.c +index af31a94863..65588d313a 100644 +--- a/block.c ++++ b/block.c +@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque) + + GLOBAL_STATE_CODE(); + /* old_bs reference is transparently moved from @s to @s->child */ ++ if (!s->child->bs) { ++ /* ++ * The parents were undrained when removing old_bs from the child. New ++ * requests can't have been made, though, because the child was empty. ++ * ++ * TODO Make bdrv_replace_child_noperm() transactionable to avoid ++ * undraining the parent in the first place. Once this is done, having ++ * new_bs drained when calling bdrv_replace_child_tran() is not a ++ * requirement any more. ++ */ ++ bdrv_parent_drained_begin_single(s->child, false); ++ assert(!bdrv_parent_drained_poll_single(s->child)); ++ } ++ assert(s->child->quiesced_parent); + bdrv_replace_child_noperm(s->child, s->old_bs); + bdrv_unref(new_bs); + } +@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = { + * + * Note: real unref of old_bs is done only on commit. + * ++ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be ++ * kept drained until the transaction is completed. ++ * + * The function doesn't update permissions, caller is responsible for this. + */ + static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, + Transaction *tran) + { + BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); ++ ++ assert(child->quiesced_parent); ++ assert(!new_bs || new_bs->quiesce_counter); ++ + *s = (BdrvReplaceChildState) { + .child = child, + .old_bs = child->bs, +@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) + return permissions[qapi_perm]; + } + ++/* ++ * Replaces the node that a BdrvChild points to without updating permissions. ++ * ++ * If @new_bs is non-NULL, the parent of @child must already be drained through ++ * @child. ++ * ++ * This function does not poll. ++ */ + static void bdrv_replace_child_noperm(BdrvChild *child, + BlockDriverState *new_bs) + { +@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + int new_bs_quiesce_counter; + + assert(!child->frozen); ++ ++ /* ++ * If we want to change the BdrvChild to point to a drained node as its new ++ * child->bs, we need to make sure that its new parent is drained, too. In ++ * other words, either child->quiesce_parent must already be true or we must ++ * be able to set it and keep the parent's quiesce_counter consistent with ++ * that, but without polling or starting new requests (this function ++ * guarantees that it doesn't poll, and starting new requests would be ++ * against the invariants of drain sections). ++ * ++ * To keep things simple, we pick the first option (child->quiesce_parent ++ * must already be true). We also generalise the rule a bit to make it ++ * easier to verify in callers and more likely to be covered in test cases: ++ * The parent must be quiesced through this child even if new_bs isn't ++ * currently drained. ++ * ++ * The only exception is for callers that always pass new_bs == NULL. In ++ * this case, we obviously never need to consider the case of a drained ++ * new_bs, so we can keep the callers simpler by allowing them not to drain ++ * the parent. ++ */ ++ assert(!new_bs || child->quiesced_parent); + assert(old_bs != new_bs); + GLOBAL_STATE_CODE(); + +@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); + } + +- /* +- * If the new child node is drained but the old one was not, flush +- * all outstanding requests to the old child node. +- */ +- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); +- if (new_bs_quiesce_counter && !child->quiesced_parent) { +- bdrv_parent_drained_begin_single(child, true); +- } +- + if (old_bs) { + if (child->klass->detach) { + child->klass->detach(child); +@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + } + + /* +- * If the old child node was drained but the new one is not, allow +- * requests to come in only after the new node has been attached. +- * +- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() +- * polls, which could have changed the value. ++ * If the parent was drained through this BdrvChild previously, but new_bs ++ * is not drained, allow requests to come in only after the new node has ++ * been attached. + */ + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + if (!new_bs_quiesce_counter && child->quiesced_parent) { +@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, + } + + bdrv_ref(child_bs); ++ /* ++ * Let every new BdrvChild start with a drained parent. Inserting the child ++ * in the graph with bdrv_replace_child_noperm() will undrain it if ++ * @child_bs is not drained. ++ * ++ * The child was only just created and is not yet visible in global state ++ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody ++ * could have sent requests and polling is not necessary. ++ * ++ * Note that this means that the parent isn't fully drained yet, we only ++ * stop new requests from coming in. This is fine, we don't care about the ++ * old requests here, they are not for this child. If another place enters a ++ * drain section for the same parent, but wants it to be fully quiesced, it ++ * will not run most of the the code in .drained_begin() again (which is not ++ * a problem, we already did this), but it will still poll until the parent ++ * is fully quiesced, so it will not be negatively affected either. ++ */ ++ bdrv_parent_drained_begin_single(new_child, false); + bdrv_replace_child_noperm(new_child, child_bs); + + BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); +@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) + } + + if (child->bs) { ++ BlockDriverState *bs = child->bs; ++ bdrv_drained_begin(bs); + bdrv_replace_child_tran(child, NULL, tran); ++ bdrv_drained_end(bs); + } + + tran_add(tran, &bdrv_remove_child_drv, child); + } + ++static void undrain_on_clean_cb(void *opaque) ++{ ++ bdrv_drained_end(opaque); ++} ++ ++static TransactionActionDrv undrain_on_clean = { ++ .clean = undrain_on_clean_cb, ++}; ++ + static int bdrv_replace_node_noperm(BlockDriverState *from, + BlockDriverState *to, + bool auto_skip, Transaction *tran, +@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, + + GLOBAL_STATE_CODE(); + ++ bdrv_drained_begin(from); ++ bdrv_drained_begin(to); ++ tran_add(tran, &undrain_on_clean, from); ++ tran_add(tran, &undrain_on_clean, to); ++ + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + assert(c->bs == from); + if (!should_update_child(c, to)) { +diff --git a/block/io.c b/block/io.c +index 5e9150d92c..ae64830eac 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) + } + } + +-static bool bdrv_parent_drained_poll_single(BdrvChild *c) ++bool bdrv_parent_drained_poll_single(BdrvChild *c) + { + if (c->klass->drained_poll) { + return c->klass->drained_poll(c); +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 8f5e75756a..65e6d2569b 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + */ + void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); + ++/** ++ * bdrv_parent_drained_poll_single: ++ * ++ * Returns true if there is any pending activity to cease before @c can be ++ * called quiesced, false otherwise. ++ */ ++bool bdrv_parent_drained_poll_single(BdrvChild *c); ++ + /** + * bdrv_parent_drained_end_single: + * +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 172bc6debc..2686a8acee 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void) + + + typedef struct BDRVReplaceTestState { ++ bool setup_completed; + bool was_drained; + bool was_undrained; + bool has_read; +@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + ++ if (!s->setup_completed) { ++ return; ++ } ++ + if (!s->drain_count) { + s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); + bdrv_inc_in_flight(bs); +@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + ++ if (!s->setup_completed) { ++ return; ++ } ++ + g_assert(s->drain_count > 0); + if (!--s->drain_count) { + s->was_undrained = true; +@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count, + bdrv_ref(old_child_bs); + bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, + BDRV_CHILD_COW, &error_abort); ++ parent_s->setup_completed = true; + + for (i = 0; i < old_drain_count; i++) { + bdrv_drained_begin(old_child_bs); +-- +2.31.1 + diff --git a/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch b/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch new file mode 100644 index 0000000..e3bf1e2 --- /dev/null +++ b/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch @@ -0,0 +1,54 @@ +From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:03 +0100 +Subject: [PATCH 21/31] block: Don't use subtree drains in + bdrv_drop_intermediate() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s) + +Instead of using a subtree drain from the top node (which also drains +child nodes of base that we're not even interested in), use a normal +drain for base, which automatically drains all of the parents, too. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-9-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37) +Signed-off-by: Stefano Garzarella +--- + block.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block.c b/block.c +index cb5e96b1cf..b3449a312e 100644 +--- a/block.c ++++ b/block.c +@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + GLOBAL_STATE_CODE(); + + bdrv_ref(top); +- bdrv_subtree_drained_begin(top); ++ bdrv_drained_begin(base); + + if (!top->drv || !base->drv) { + goto exit; +@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + + ret = 0; + exit: +- bdrv_subtree_drained_end(top); ++ bdrv_drained_end(base); + bdrv_unref(top); + return ret; + } +-- +2.31.1 + diff --git a/kvm-block-Drain-individual-nodes-during-reopen.patch b/kvm-block-Drain-individual-nodes-during-reopen.patch new file mode 100644 index 0000000..24661fb --- /dev/null +++ b/kvm-block-Drain-individual-nodes-during-reopen.patch @@ -0,0 +1,157 @@ +From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:02 +0100 +Subject: [PATCH 20/31] block: Drain individual nodes during reopen + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s) + +bdrv_reopen() and friends use subtree drains as a lazy way of covering +all the nodes they touch. Turns out that this lazy way is a lot more +complicated than just draining the nodes individually, even not +accounting for the additional complexity in the drain mechanism itself. + +Simplify the code by switching to draining the individual nodes that are +already managed in the BlockReopenQueue anyway. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-8-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8) +Signed-off-by: Stefano Garzarella +--- + block.c | 16 +++++++++------- + block/replication.c | 6 ------ + blockdev.c | 13 ------------- + 3 files changed, 9 insertions(+), 26 deletions(-) + +diff --git a/block.c b/block.c +index 46df410b07..cb5e96b1cf 100644 +--- a/block.c ++++ b/block.c +@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, + * returns a pointer to bs_queue, which is either the newly allocated + * bs_queue, or the existing bs_queue being used. + * +- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). ++ * bs is drained here and undrained by bdrv_reopen_queue_free(). + * + * To be called with bs->aio_context locked. + */ +@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + int flags; + QemuOpts *opts; + +- /* Make sure that the caller remembered to use a drained section. This is +- * important to avoid graph changes between the recursive queuing here and +- * bdrv_reopen_multiple(). */ +- assert(bs->quiesce_counter > 0); + GLOBAL_STATE_CODE(); + ++ bdrv_drained_begin(bs); ++ + if (bs_queue == NULL) { + bs_queue = g_new0(BlockReopenQueue, 1); + QTAILQ_INIT(bs_queue); +@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) + if (bs_queue) { + BlockReopenQueueEntry *bs_entry, *next; + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { ++ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); ++ ++ aio_context_acquire(ctx); ++ bdrv_drained_end(bs_entry->state.bs); ++ aio_context_release(ctx); ++ + qobject_unref(bs_entry->state.explicit_options); + qobject_unref(bs_entry->state.options); + g_free(bs_entry); +@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + + GLOBAL_STATE_CODE(); + +- bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + + if (ctx != qemu_get_aio_context()) { +@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + if (ctx != qemu_get_aio_context()) { + aio_context_acquire(ctx); + } +- bdrv_subtree_drained_end(bs); + + return ret; + } +diff --git a/block/replication.c b/block/replication.c +index f1eed25e43..c62f48a874 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, + s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); + } + +- bdrv_subtree_drained_begin(hidden_disk->bs); +- bdrv_subtree_drained_begin(secondary_disk->bs); +- + if (s->orig_hidden_read_only) { + QDict *opts = qdict_new(); + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); +@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, + aio_context_acquire(ctx); + } + } +- +- bdrv_subtree_drained_end(hidden_disk->bs); +- bdrv_subtree_drained_end(secondary_disk->bs); + } + + static void backup_job_cleanup(BlockDriverState *bs) +diff --git a/blockdev.c b/blockdev.c +index 3f1dec6242..8ffb3d9537 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3547,8 +3547,6 @@ fail: + void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + { + BlockReopenQueue *queue = NULL; +- GSList *drained = NULL; +- GSList *p; + + /* Add each one of the BDS that we want to reopen to the queue */ + for (; reopen_list != NULL; reopen_list = reopen_list->next) { +@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + +- bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(queue, bs, qdict, false); +- drained = g_slist_prepend(drained, bs); + + aio_context_release(ctx); + } +@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + + fail: + bdrv_reopen_queue_free(queue); +- for (p = drained; p; p = p->next) { +- BlockDriverState *bs = p->data; +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- aio_context_acquire(ctx); +- bdrv_subtree_drained_end(bs); +- aio_context_release(ctx); +- } +- g_slist_free(drained); + } + + void qmp_blockdev_del(const char *node_name, Error **errp) +-- +2.31.1 + diff --git a/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch b/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch new file mode 100644 index 0000000..1ae73c7 --- /dev/null +++ b/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch @@ -0,0 +1,96 @@ +From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:08 +0100 +Subject: [PATCH 26/31] block: Drop out of coroutine in + bdrv_do_drained_begin_quiesce() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s) + +The next patch adds a parent drain to bdrv_attach_child_common(), which +shouldn't be, but is currently called from coroutines in some cases (e.g. +.bdrv_co_create implementations generally open new nodes). Therefore, +the assertion that we're not in a coroutine doesn't hold true any more. + +We could just remove the assertion because there is nothing in the +function that should be in conflict with running in a coroutine, but +just to be on the safe side, we can reverse the caller relationship +between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so +that the latter also just drops out of coroutine context and we can +still be certain in the future that any drain code doesn't run in +coroutines. + +As a nice side effect, the structure of bdrv_do_drained_begin() is now +symmetrical with bdrv_do_drained_end(). + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-14-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553) +Signed-off-by: Stefano Garzarella +--- + block/io.c | 25 ++++++++++++------------- + 1 file changed, 12 insertions(+), 13 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 2e9503df6a..5e9150d92c 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + } + } + +-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) ++static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, ++ bool poll) + { + IO_OR_GS_CODE(); +- assert(!qemu_in_coroutine()); ++ ++ if (qemu_in_coroutine()) { ++ bdrv_co_yield_to_drain(bs, true, parent, poll); ++ return; ++ } + + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { +@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) + bs->drv->bdrv_drain_begin(bs); + } + } +-} +- +-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +- bool poll) +-{ +- if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, true, parent, poll); +- return; +- } +- +- bdrv_do_drained_begin_quiesce(bs, parent); + + /* + * Wait for drained requests to finish. +@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + } + } + ++void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) ++{ ++ bdrv_do_drained_begin(bs, parent, false); ++} ++ + void bdrv_drained_begin(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +-- +2.31.1 + diff --git a/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch b/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch new file mode 100644 index 0000000..b73b8fe --- /dev/null +++ b/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch @@ -0,0 +1,67 @@ +From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:01 +0100 +Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s) + +Callers don't agree whether bdrv_reopen_queue_child() should be called +with the AioContext lock held or not. Standardise on holding the lock +(as done by QMP blockdev-reopen and the replication block driver) and +fix bdrv_reopen() to do the same. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-7-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814) +Signed-off-by: Stefano Garzarella +--- + block.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block.c b/block.c +index 7999fd08c5..46df410b07 100644 +--- a/block.c ++++ b/block.c +@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, + * bs_queue, or the existing bs_queue being used. + * + * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). ++ * ++ * To be called with bs->aio_context locked. + */ + static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + BlockDriverState *bs, +@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + return bs_queue; + } + ++/* To be called with bs->aio_context locked */ + BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, + QDict *options, bool keep_old_opts) +@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + GLOBAL_STATE_CODE(); + + bdrv_subtree_drained_begin(bs); ++ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); ++ + if (ctx != qemu_get_aio_context()) { + aio_context_release(ctx); + } +- +- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + ret = bdrv_reopen_multiple(queue, errp); + + if (ctx != qemu_get_aio_context()) { +-- +2.31.1 + diff --git a/kvm-block-Inline-bdrv_drain_invoke.patch b/kvm-block-Inline-bdrv_drain_invoke.patch new file mode 100644 index 0000000..07160dc --- /dev/null +++ b/kvm-block-Inline-bdrv_drain_invoke.patch @@ -0,0 +1,81 @@ +From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:00 +0100 +Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s) + +bdrv_drain_invoke() has now two entirely separate cases that share no +code any more and are selected depending on a bool parameter. Each case +has only one caller. Just inline the function. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-6-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121) +Signed-off-by: Stefano Garzarella +--- + block/io.c | 23 ++++++----------------- + 1 file changed, 6 insertions(+), 17 deletions(-) + +diff --git a/block/io.c b/block/io.c +index f4ca62b034..a25103be6f 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -242,21 +242,6 @@ typedef struct { + bool ignore_bds_parents; + } BdrvCoDrainData; + +-/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ +-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) +-{ +- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || +- (!begin && !bs->drv->bdrv_drain_end)) { +- return; +- } +- +- if (begin) { +- bs->drv->bdrv_drain_begin(bs); +- } else { +- bs->drv->bdrv_drain_end(bs); +- } +-} +- + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ + bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent, bool ignore_bds_parents) +@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); +- bdrv_drain_invoke(bs, true); ++ if (bs->drv && bs->drv->bdrv_drain_begin) { ++ bs->drv->bdrv_drain_begin(bs); ++ } + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ +- bdrv_drain_invoke(bs, false); ++ if (bs->drv && bs->drv->bdrv_drain_end) { ++ bs->drv->bdrv_drain_end(bs); ++ } + bdrv_parent_drained_end(bs, parent, ignore_bds_parents); + + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); +-- +2.31.1 + diff --git a/kvm-block-Remove-drained_end_counter.patch b/kvm-block-Remove-drained_end_counter.patch new file mode 100644 index 0000000..cfafc33 --- /dev/null +++ b/kvm-block-Remove-drained_end_counter.patch @@ -0,0 +1,433 @@ +From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:59 +0100 +Subject: [PATCH 17/31] block: Remove drained_end_counter + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s) + +drained_end_counter is unused now, nobody changes its value any more. It +can be removed. + +In cases where we had two almost identical functions that only differed +in whether the caller passes drained_end_counter, or whether they would +poll for a local drained_end_counter to reach 0, these become a single +function. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Message-Id: <20221118174110.55183-5-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f) +Signed-off-by: Stefano Garzarella +--- + block.c | 5 +- + block/block-backend.c | 4 +- + block/io.c | 98 ++++++++------------------------ + blockjob.c | 2 +- + include/block/block-io.h | 24 -------- + include/block/block_int-common.h | 6 +- + 6 files changed, 30 insertions(+), 109 deletions(-) + +diff --git a/block.c b/block.c +index 16a62a329c..7999fd08c5 100644 +--- a/block.c ++++ b/block.c +@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child) + return bdrv_drain_poll(bs, false, NULL, false); + } + +-static void bdrv_child_cb_drained_end(BdrvChild *child, +- int *drained_end_counter) ++static void bdrv_child_cb_drained_end(BdrvChild *child) + { + BlockDriverState *bs = child->opaque; +- bdrv_drained_end_no_poll(bs, drained_end_counter); ++ bdrv_drained_end(bs); + } + + static int bdrv_child_cb_inactivate(BdrvChild *child) +diff --git a/block/block-backend.c b/block/block-backend.c +index d98a96ff37..feaf2181fa 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, + } + static void blk_root_drained_begin(BdrvChild *child); + static bool blk_root_drained_poll(BdrvChild *child); +-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); ++static void blk_root_drained_end(BdrvChild *child); + + static void blk_root_change_media(BdrvChild *child, bool load); + static void blk_root_resize(BdrvChild *child); +@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child) + return busy || !!blk->in_flight; + } + +-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) ++static void blk_root_drained_end(BdrvChild *child) + { + BlockBackend *blk = child->opaque; + assert(blk->quiesce_counter); +diff --git a/block/io.c b/block/io.c +index c2ed4b2af9..f4ca62b034 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, + } + } + +-static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, +- int *drained_end_counter) ++void bdrv_parent_drained_end_single(BdrvChild *c) + { ++ IO_OR_GS_CODE(); ++ + assert(c->parent_quiesce_counter > 0); + c->parent_quiesce_counter--; + if (c->klass->drained_end) { +- c->klass->drained_end(c, drained_end_counter); ++ c->klass->drained_end(c); + } + } + +-void bdrv_parent_drained_end_single(BdrvChild *c) +-{ +- int drained_end_counter = 0; +- AioContext *ctx = bdrv_child_get_parent_aio_context(c); +- IO_OR_GS_CODE(); +- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); +- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0); +-} +- + static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, +- bool ignore_bds_parents, +- int *drained_end_counter) ++ bool ignore_bds_parents) + { + BdrvChild *c; + +@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, + if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + continue; + } +- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter); ++ bdrv_parent_drained_end_single(c); + } + } + +@@ -249,12 +240,10 @@ typedef struct { + bool poll; + BdrvChild *parent; + bool ignore_bds_parents; +- int *drained_end_counter; + } BdrvCoDrainData; + + /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ +-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, +- int *drained_end_counter) ++static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) + { + if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || + (!begin && !bs->drv->bdrv_drain_end)) { +@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents, + bool poll); + static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- int *drained_end_counter); ++ BdrvChild *parent, bool ignore_bds_parents); + + static void bdrv_co_drain_bh_cb(void *opaque) + { +@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque) + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { +- assert(!data->drained_end_counter); + bdrv_do_drained_begin(bs, data->recursive, data->parent, + data->ignore_bds_parents, data->poll); + } else { + assert(!data->poll); + bdrv_do_drained_end(bs, data->recursive, data->parent, +- data->ignore_bds_parents, +- data->drained_end_counter); ++ data->ignore_bds_parents); + } + aio_context_release(ctx); + } else { +@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + bool begin, bool recursive, + BdrvChild *parent, + bool ignore_bds_parents, +- bool poll, +- int *drained_end_counter) ++ bool poll) + { + BdrvCoDrainData data; + Coroutine *self = qemu_coroutine_self(); +@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + .parent = parent, + .ignore_bds_parents = ignore_bds_parents, + .poll = poll, +- .drained_end_counter = drained_end_counter, + }; + + if (bs) { +@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); +- bdrv_drain_invoke(bs, true, NULL); ++ bdrv_drain_invoke(bs, true); + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, +- poll, NULL); ++ poll); + return; + } + +@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) + + /** + * This function does not poll, nor must any of its recursively called +- * functions. The *drained_end_counter pointee will be incremented +- * once for every background operation scheduled, and decremented once +- * the operation settles. Therefore, the pointer must remain valid +- * until the pointee reaches 0. That implies that whoever sets up the +- * pointee has to poll until it is 0. +- * +- * We use atomic operations to access *drained_end_counter, because +- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of +- * @bs may contain nodes in different AioContexts, +- * (2) bdrv_drain_all_end() uses the same counter for all nodes, +- * regardless of which AioContext they are in. ++ * functions. + */ + static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- int *drained_end_counter) ++ BdrvChild *parent, bool ignore_bds_parents) + { + BdrvChild *child; + int old_quiesce_counter; + +- assert(drained_end_counter != NULL); +- + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, +- false, drained_end_counter); ++ false); + return; + } + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ +- bdrv_drain_invoke(bs, false, drained_end_counter); +- bdrv_parent_drained_end(bs, parent, ignore_bds_parents, +- drained_end_counter); ++ bdrv_drain_invoke(bs, false); ++ bdrv_parent_drained_end(bs, parent, ignore_bds_parents); + + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); + if (old_quiesce_counter == 1) { +@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + assert(!ignore_bds_parents); + bs->recursive_quiesce_counter--; + QLIST_FOREACH(child, &bs->children, next) { +- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents, +- drained_end_counter); ++ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); + } + } + } + + void bdrv_drained_end(BlockDriverState *bs) + { +- int drained_end_counter = 0; + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); +- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); +-} +- +-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) +-{ +- IO_CODE(); +- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); ++ bdrv_do_drained_end(bs, false, NULL, false); + } + + void bdrv_subtree_drained_end(BlockDriverState *bs) + { +- int drained_end_counter = 0; + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); +- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); ++ bdrv_do_drained_end(bs, true, NULL, false); + } + + void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) +@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) + + void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) + { +- int drained_end_counter = 0; + int i; + IO_OR_GS_CODE(); + + for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { +- bdrv_do_drained_end(child->bs, true, child, false, +- &drained_end_counter); ++ bdrv_do_drained_end(child->bs, true, child, false); + } +- +- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); + } + + void bdrv_drain(BlockDriverState *bs) +@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void) + GLOBAL_STATE_CODE(); + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); ++ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); + return; + } + +@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void) + + void bdrv_drain_all_end_quiesce(BlockDriverState *bs) + { +- int drained_end_counter = 0; + GLOBAL_STATE_CODE(); + + g_assert(bs->quiesce_counter > 0); + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { +- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); ++ bdrv_do_drained_end(bs, false, NULL, true); + } +- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); + } + + void bdrv_drain_all_end(void) + { + BlockDriverState *bs = NULL; +- int drained_end_counter = 0; + GLOBAL_STATE_CODE(); + + /* +@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); ++ bdrv_do_drained_end(bs, false, NULL, true); + aio_context_release(aio_context); + } + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0); +- + assert(bdrv_drain_all_count > 0); + bdrv_drain_all_count--; + } +diff --git a/blockjob.c b/blockjob.c +index f51d4e18f3..0ab721e139 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c) + } + } + +-static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) ++static void child_job_drained_end(BdrvChild *c) + { + BlockJob *job = c->opaque; + job_resume(&job->job); +diff --git a/include/block/block-io.h b/include/block/block-io.h +index b099d7db45..054e964c9b 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, + int64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +-/** +- * bdrv_drained_end_no_poll: +- * +- * Same as bdrv_drained_end(), but do not poll for the subgraph to +- * actually become unquiesced. Therefore, no graph changes will occur +- * with this function. +- * +- * *drained_end_counter is incremented for every background operation +- * that is scheduled, and will be decremented for every operation once +- * it settles. The caller must poll until it reaches 0. The counter +- * should be accessed using atomic operations only. +- */ +-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); +- +- + /* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. +@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); + * bdrv_parent_drained_end_single: + * + * End a quiesced section for the parent of @c. +- * +- * This polls @bs's AioContext until all scheduled sub-drained_ends +- * have settled, which may result in graph changes. + */ + void bdrv_parent_drained_end_single(BdrvChild *c); + +@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); + * bdrv_drained_end: + * + * End a quiescent section started by bdrv_drained_begin(). +- * +- * This polls @bs's AioContext until all scheduled sub-drained_ends +- * have settled. On one hand, that may result in graph changes. On +- * the other, this requires that the caller either runs in the main +- * loop; or that all involved nodes (@bs and all of its parents) are +- * in the caller's AioContext. + */ + void bdrv_drained_end(BlockDriverState *bs); + +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 40d646d1ed..2b97576f6d 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -939,15 +939,11 @@ struct BdrvChildClass { + * These functions must not change the graph (and therefore also must not + * call aio_poll(), which could change the graph indirectly). + * +- * If drained_end() schedules background operations, it must atomically +- * increment *drained_end_counter for each such operation and atomically +- * decrement it once the operation has settled. +- * + * Note that this can be nested. If drained_begin() was called twice, new + * I/O is allowed only after drained_end() was called twice, too. + */ + void (*drained_begin)(BdrvChild *child); +- void (*drained_end)(BdrvChild *child, int *drained_end_counter); ++ void (*drained_end)(BdrvChild *child); + + /* + * Returns whether the parent has pending requests for the child. This +-- +2.31.1 + diff --git a/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch b/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch new file mode 100644 index 0000000..aa64bec --- /dev/null +++ b/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch @@ -0,0 +1,274 @@ +From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:07 +0100 +Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from + drain_begin/end. + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s) + +ignore_bds_parents is now ignored during drain_begin and drain_end, so +we can just remove it there. It is still a valid optimisation for +drain_all in bdrv_drained_poll(), so leave it around there. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-13-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270) +Signed-off-by: Stefano Garzarella +--- + block.c | 2 +- + block/io.c | 58 +++++++++++++++------------------------- + include/block/block-io.h | 3 +-- + 3 files changed, 24 insertions(+), 39 deletions(-) + +diff --git a/block.c b/block.c +index 5a583e260d..af31a94863 100644 +--- a/block.c ++++ b/block.c +@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) + static void bdrv_child_cb_drained_begin(BdrvChild *child) + { + BlockDriverState *bs = child->opaque; +- bdrv_do_drained_begin_quiesce(bs, NULL, false); ++ bdrv_do_drained_begin_quiesce(bs, NULL); + } + + static bool bdrv_child_cb_drained_poll(BdrvChild *child) +diff --git a/block/io.c b/block/io.c +index 87d6f22ec4..2e9503df6a 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs); + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int64_t bytes, BdrvRequestFlags flags); + +-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, +- bool ignore_bds_parents) ++static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) + { + BdrvChild *c, *next; + + QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { +- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { ++ if (c == ignore) { + continue; + } + bdrv_parent_drained_begin_single(c, false); +@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c) + } + } + +-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, +- bool ignore_bds_parents) ++static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) + { + BdrvChild *c; + + QLIST_FOREACH(c, &bs->parents, next_parent) { +- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { ++ if (c == ignore) { + continue; + } + bdrv_parent_drained_end_single(c); +@@ -242,7 +240,6 @@ typedef struct { + bool begin; + bool poll; + BdrvChild *parent; +- bool ignore_bds_parents; + } BdrvCoDrainData; + + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents, bool poll); +-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents); ++ bool poll); ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); + + static void bdrv_co_drain_bh_cb(void *opaque) + { +@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque) + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { +- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, +- data->poll); ++ bdrv_do_drained_begin(bs, data->parent, data->poll); + } else { + assert(!data->poll); +- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); ++ bdrv_do_drained_end(bs, data->parent); + } + aio_context_release(ctx); + } else { +@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) + static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + bool begin, + BdrvChild *parent, +- bool ignore_bds_parents, + bool poll) + { + BdrvCoDrainData data; +@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + .done = false, + .begin = begin, + .parent = parent, +- .ignore_bds_parents = ignore_bds_parents, + .poll = poll, + }; + +@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + } + } + +-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, +- BdrvChild *parent, bool ignore_bds_parents) ++void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) + { + IO_OR_GS_CODE(); + assert(!qemu_in_coroutine()); +@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + aio_disable_external(bdrv_get_aio_context(bs)); +- +- /* TODO Remove ignore_bds_parents, we don't consider it any more */ +- bdrv_parent_drained_begin(bs, parent, false); ++ bdrv_parent_drained_begin(bs, parent); + if (bs->drv && bs->drv->bdrv_drain_begin) { + bs->drv->bdrv_drain_begin(bs); + } +@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents, bool poll) ++ bool poll) + { + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); ++ bdrv_co_yield_to_drain(bs, true, parent, poll); + return; + } + +- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); ++ bdrv_do_drained_begin_quiesce(bs, parent); + + /* + * Wait for drained requests to finish. +@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + * nodes. + */ + if (poll) { +- assert(!ignore_bds_parents); + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); + } + } +@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + void bdrv_drained_begin(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_begin(bs, NULL, false, true); ++ bdrv_do_drained_begin(bs, NULL, true); + } + + /** + * This function does not poll, nor must any of its recursively called + * functions. + */ +-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents) ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) + { + int old_quiesce_counter; + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); ++ bdrv_co_yield_to_drain(bs, false, parent, false); + return; + } + assert(bs->quiesce_counter > 0); +@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + if (bs->drv && bs->drv->bdrv_drain_end) { + bs->drv->bdrv_drain_end(bs); + } +- /* TODO Remove ignore_bds_parents, we don't consider it any more */ +- bdrv_parent_drained_end(bs, parent, false); +- ++ bdrv_parent_drained_end(bs, parent); + aio_enable_external(bdrv_get_aio_context(bs)); + } + } +@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + void bdrv_drained_end(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, NULL, false); ++ bdrv_do_drained_end(bs, NULL); + } + + void bdrv_drain(BlockDriverState *bs) +@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void) + GLOBAL_STATE_CODE(); + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(NULL, true, NULL, true, true); ++ bdrv_co_yield_to_drain(NULL, true, NULL, true); + return; + } + +@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_begin(bs, NULL, true, false); ++ bdrv_do_drained_begin(bs, NULL, false); + aio_context_release(aio_context); + } + +@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { +- bdrv_do_drained_end(bs, NULL, true); ++ bdrv_do_drained_end(bs, NULL); + } + } + +@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_end(bs, NULL, true); ++ bdrv_do_drained_end(bs, NULL); + aio_context_release(aio_context); + } + +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 9c36a16a1f..8f5e75756a 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs); + * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already + * running requests to complete. + */ +-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, +- BdrvChild *parent, bool ignore_bds_parents); ++void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); + + /** + * bdrv_drained_end: +-- +2.31.1 + diff --git a/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch b/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch new file mode 100644 index 0000000..94eba86 --- /dev/null +++ b/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch @@ -0,0 +1,106 @@ +From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:10 +0100 +Subject: [PATCH 28/31] block: Remove poll parameter from + bdrv_parent_drained_begin_single() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s) + +All callers of bdrv_parent_drained_begin_single() pass poll=false now, +so we don't need the parameter any more. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-16-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069) +Signed-off-by: Stefano Garzarella +--- + block.c | 4 ++-- + block/io.c | 8 ++------ + include/block/block-io.h | 5 ++--- + 3 files changed, 6 insertions(+), 11 deletions(-) + +diff --git a/block.c b/block.c +index 65588d313a..0d78711416 100644 +--- a/block.c ++++ b/block.c +@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque) + * new_bs drained when calling bdrv_replace_child_tran() is not a + * requirement any more. + */ +- bdrv_parent_drained_begin_single(s->child, false); ++ bdrv_parent_drained_begin_single(s->child); + assert(!bdrv_parent_drained_poll_single(s->child)); + } + assert(s->child->quiesced_parent); +@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, + * a problem, we already did this), but it will still poll until the parent + * is fully quiesced, so it will not be negatively affected either. + */ +- bdrv_parent_drained_begin_single(new_child, false); ++ bdrv_parent_drained_begin_single(new_child); + bdrv_replace_child_noperm(new_child, child_bs); + + BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); +diff --git a/block/io.c b/block/io.c +index ae64830eac..38e57d1f67 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) + if (c == ignore) { + continue; + } +- bdrv_parent_drained_begin_single(c, false); ++ bdrv_parent_drained_begin_single(c); + } + } + +@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, + return busy; + } + +-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) ++void bdrv_parent_drained_begin_single(BdrvChild *c) + { +- AioContext *ctx = bdrv_child_get_parent_aio_context(c); + IO_OR_GS_CODE(); + + assert(!c->quiesced_parent); +@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) + if (c->klass->drained_begin) { + c->klass->drained_begin(c); + } +- if (poll) { +- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); +- } + } + + static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 65e6d2569b..92aaa7c1e9 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + /** + * bdrv_parent_drained_begin_single: + * +- * Begin a quiesced section for the parent of @c. If @poll is true, wait for +- * any pending activity to cease. ++ * Begin a quiesced section for the parent of @c. + */ +-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); ++void bdrv_parent_drained_begin_single(BdrvChild *c); + + /** + * bdrv_parent_drained_poll_single: +-- +2.31.1 + diff --git a/kvm-block-Remove-subtree-drains.patch b/kvm-block-Remove-subtree-drains.patch new file mode 100644 index 0000000..af9c0ff --- /dev/null +++ b/kvm-block-Remove-subtree-drains.patch @@ -0,0 +1,896 @@ +From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:05 +0100 +Subject: [PATCH 23/31] block: Remove subtree drains + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s) + +Subtree drains are not used any more. Remove them. + +After this, BdrvChildClass.attach/detach() don't poll any more. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-11-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066) +Signed-off-by: Stefano Garzarella +--- + block.c | 20 +-- + block/io.c | 121 +++----------- + include/block/block-io.h | 18 +-- + include/block/block_int-common.h | 1 - + include/block/block_int-io.h | 12 -- + tests/unit/test-bdrv-drain.c | 261 ++----------------------------- + 6 files changed, 44 insertions(+), 389 deletions(-) + +diff --git a/block.c b/block.c +index 5330e89903..e0e3b21790 100644 +--- a/block.c ++++ b/block.c +@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) + static bool bdrv_child_cb_drained_poll(BdrvChild *child) + { + BlockDriverState *bs = child->opaque; +- return bdrv_drain_poll(bs, false, NULL, false); ++ return bdrv_drain_poll(bs, NULL, false); + } + + static void bdrv_child_cb_drained_end(BdrvChild *child) +@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child) + assert(!bs->file); + bs->file = child; + } +- +- bdrv_apply_subtree_drain(child, bs); + } + + static void bdrv_child_cb_detach(BdrvChild *child) +@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child) + bdrv_backing_detach(child); + } + +- bdrv_unapply_subtree_drain(child, bs); +- + assert_bdrv_graph_writable(bs); + QLIST_REMOVE(child, next); + if (child == bs->backing) { +@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + } + + if (old_bs) { +- /* Detach first so that the recursive drain sections coming from @child +- * are already gone and we only end the drain sections that came from +- * elsewhere. */ + if (child->klass->detach) { + child->klass->detach(child); + } +@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); + + /* +- * Detaching the old node may have led to the new node's +- * quiesce_counter having been decreased. Not a problem, we +- * just need to recognize this here and then invoke +- * drained_end appropriately more often. ++ * Polling in bdrv_parent_drained_begin_single() may have led to the new ++ * node's quiesce_counter having been decreased. Not a problem, we just ++ * need to recognize this here and then invoke drained_end appropriately ++ * more often. + */ + assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); + drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; + +- /* Attach only after starting new drained sections, so that recursive +- * drain sections coming from @child don't get an extra .drained_begin +- * callback. */ + if (child->klass->attach) { + child->klass->attach(child); + } +diff --git a/block/io.c b/block/io.c +index a25103be6f..75224480d0 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -236,17 +236,15 @@ typedef struct { + BlockDriverState *bs; + bool done; + bool begin; +- bool recursive; + bool poll; + BdrvChild *parent; + bool ignore_bds_parents; + } BdrvCoDrainData; + + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, +- BdrvChild *ignore_parent, bool ignore_bds_parents) ++bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, ++ bool ignore_bds_parents) + { +- BdrvChild *child, *next; + IO_OR_GS_CODE(); + + if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { +@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + return true; + } + +- if (recursive) { +- assert(!ignore_bds_parents); +- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { +- if (bdrv_drain_poll(child->bs, recursive, child, false)) { +- return true; +- } +- } +- } +- + return false; + } + +-static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, ++static bool bdrv_drain_poll_top_level(BlockDriverState *bs, + BdrvChild *ignore_parent) + { +- return bdrv_drain_poll(bs, recursive, ignore_parent, false); ++ return bdrv_drain_poll(bs, ignore_parent, false); + } + +-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- bool poll); +-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents); ++static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents, bool poll); ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents); + + static void bdrv_co_drain_bh_cb(void *opaque) + { +@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque) + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { +- bdrv_do_drained_begin(bs, data->recursive, data->parent, +- data->ignore_bds_parents, data->poll); ++ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, ++ data->poll); + } else { + assert(!data->poll); +- bdrv_do_drained_end(bs, data->recursive, data->parent, +- data->ignore_bds_parents); ++ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); + } + aio_context_release(ctx); + } else { +@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) + } + + static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, +- bool begin, bool recursive, ++ bool begin, + BdrvChild *parent, + bool ignore_bds_parents, + bool poll) +@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + .bs = bs, + .done = false, + .begin = begin, +- .recursive = recursive, + .parent = parent, + .ignore_bds_parents = ignore_bds_parents, + .poll = poll, +@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + } + +-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- bool poll) ++static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents, bool poll) + { +- BdrvChild *child, *next; +- + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, +- poll); ++ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); + return; + } + + bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); + +- if (recursive) { +- assert(!ignore_bds_parents); +- bs->recursive_quiesce_counter++; +- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { +- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, +- false); +- } +- } +- + /* + * Wait for drained requests to finish. + * +@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + */ + if (poll) { + assert(!ignore_bds_parents); +- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); ++ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); + } + } + + void bdrv_drained_begin(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_begin(bs, false, NULL, false, true); +-} +- +-void bdrv_subtree_drained_begin(BlockDriverState *bs) +-{ +- IO_OR_GS_CODE(); +- bdrv_do_drained_begin(bs, true, NULL, false, true); ++ bdrv_do_drained_begin(bs, NULL, false, true); + } + + /** + * This function does not poll, nor must any of its recursively called + * functions. + */ +-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents) ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents) + { +- BdrvChild *child; + int old_quiesce_counter; + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, +- false); ++ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); + return; + } + assert(bs->quiesce_counter > 0); +@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + if (old_quiesce_counter == 1) { + aio_enable_external(bdrv_get_aio_context(bs)); + } +- +- if (recursive) { +- assert(!ignore_bds_parents); +- bs->recursive_quiesce_counter--; +- QLIST_FOREACH(child, &bs->children, next) { +- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); +- } +- } + } + + void bdrv_drained_end(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, false, NULL, false); +-} +- +-void bdrv_subtree_drained_end(BlockDriverState *bs) +-{ +- IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, true, NULL, false); +-} +- +-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) +-{ +- int i; +- IO_OR_GS_CODE(); +- +- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { +- bdrv_do_drained_begin(child->bs, true, child, false, true); +- } +-} +- +-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) +-{ +- int i; +- IO_OR_GS_CODE(); +- +- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { +- bdrv_do_drained_end(child->bs, true, child, false); +- } ++ bdrv_do_drained_end(bs, NULL, false); + } + + void bdrv_drain(BlockDriverState *bs) +@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void) + while ((bs = bdrv_next_all_states(bs))) { + AioContext *aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); +- result |= bdrv_drain_poll(bs, false, NULL, true); ++ result |= bdrv_drain_poll(bs, NULL, true); + aio_context_release(aio_context); + } + +@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void) + GLOBAL_STATE_CODE(); + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); ++ bdrv_co_yield_to_drain(NULL, true, NULL, true, true); + return; + } + +@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_begin(bs, false, NULL, true, false); ++ bdrv_do_drained_begin(bs, NULL, true, false); + aio_context_release(aio_context); + } + +@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { +- bdrv_do_drained_end(bs, false, NULL, true); ++ bdrv_do_drained_end(bs, NULL, true); + } + } + +@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_end(bs, false, NULL, true); ++ bdrv_do_drained_end(bs, NULL, true); + aio_context_release(aio_context); + } + +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 054e964c9b..9c36a16a1f 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c); + /** + * bdrv_drain_poll: + * +- * Poll for pending requests in @bs, its parents (except for @ignore_parent), +- * and if @recursive is true its children as well (used for subtree drain). ++ * Poll for pending requests in @bs and its parents (except for @ignore_parent). + * + * If @ignore_bds_parents is true, parents that are BlockDriverStates must + * ignore the drain request because they will be drained separately (used for +@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c); + * + * This is part of bdrv_drained_begin. + */ +-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, +- BdrvChild *ignore_parent, bool ignore_bds_parents); ++bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, ++ bool ignore_bds_parents); + + /** + * bdrv_drained_begin: +@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs); + void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent, bool ignore_bds_parents); + +-/** +- * Like bdrv_drained_begin, but recursively begins a quiesced section for +- * exclusive access to all child nodes as well. +- */ +-void bdrv_subtree_drained_begin(BlockDriverState *bs); +- + /** + * bdrv_drained_end: + * +@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); + */ + void bdrv_drained_end(BlockDriverState *bs); + +-/** +- * End a quiescent section started by bdrv_subtree_drained_begin(). +- */ +-void bdrv_subtree_drained_end(BlockDriverState *bs); +- + #endif /* BLOCK_IO_H */ +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 2b97576f6d..791dddfd7d 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -1184,7 +1184,6 @@ struct BlockDriverState { + + /* Accessed with atomic ops. */ + int quiesce_counter; +- int recursive_quiesce_counter; + + unsigned int write_gen; /* Current data generation */ + +diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h +index 4b0b3e17ef..8bc061ebb8 100644 +--- a/include/block/block_int-io.h ++++ b/include/block/block_int-io.h +@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, + */ + void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); + +- +-/* +- * "I/O or GS" API functions. These functions can run without +- * the BQL, but only in one specific iothread/main loop. +- * +- * See include/block/block-io.h for more information about +- * the "I/O or GS" API. +- */ +- +-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); +-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); +- + #endif /* BLOCK_INT_IO_H */ +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 695519ee02..dda08de8db 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void)) + enum drain_type { + BDRV_DRAIN_ALL, + BDRV_DRAIN, +- BDRV_SUBTREE_DRAIN, + DRAIN_TYPE_MAX, + }; + +@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) + switch (drain_type) { + case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; + case BDRV_DRAIN: bdrv_drained_begin(bs); break; +- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; + default: g_assert_not_reached(); + } + } +@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) + switch (drain_type) { + case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; + case BDRV_DRAIN: bdrv_drained_end(bs); break; +- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; + default: g_assert_not_reached(); + } + } +@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void) + test_drv_cb_common(BDRV_DRAIN, false); + } + +-static void test_drv_cb_drain_subtree(void) +-{ +- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); +-} +- + static void test_drv_cb_co_drain_all(void) + { + call_in_coroutine(test_drv_cb_drain_all); +@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void) + call_in_coroutine(test_drv_cb_drain); + } + +-static void test_drv_cb_co_drain_subtree(void) +-{ +- call_in_coroutine(test_drv_cb_drain_subtree); +-} +- + static void test_quiesce_common(enum drain_type drain_type, bool recursive) + { + BlockBackend *blk; +@@ -332,11 +319,6 @@ static void test_quiesce_drain(void) + test_quiesce_common(BDRV_DRAIN, false); + } + +-static void test_quiesce_drain_subtree(void) +-{ +- test_quiesce_common(BDRV_SUBTREE_DRAIN, true); +-} +- + static void test_quiesce_co_drain_all(void) + { + call_in_coroutine(test_quiesce_drain_all); +@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void) + call_in_coroutine(test_quiesce_drain); + } + +-static void test_quiesce_co_drain_subtree(void) +-{ +- call_in_coroutine(test_quiesce_drain_subtree); +-} +- + static void test_nested(void) + { + BlockBackend *blk; +@@ -402,158 +379,6 @@ static void test_nested(void) + blk_unref(blk); + } + +-static void test_multiparent(void) +-{ +- BlockBackend *blk_a, *blk_b; +- BlockDriverState *bs_a, *bs_b, *backing; +- BDRVTestState *a_s, *b_s, *backing_s; +- +- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, +- &error_abort); +- a_s = bs_a->opaque; +- blk_insert_bs(blk_a, bs_a, &error_abort); +- +- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, +- &error_abort); +- b_s = bs_b->opaque; +- blk_insert_bs(blk_b, bs_b, &error_abort); +- +- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); +- backing_s = backing->opaque; +- bdrv_set_backing_hd(bs_a, backing, &error_abort); +- bdrv_set_backing_hd(bs_b, backing, &error_abort); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); +- g_assert_cmpint(backing->quiesce_counter, ==, 1); +- g_assert_cmpint(a_s->drain_count, ==, 1); +- g_assert_cmpint(b_s->drain_count, ==, 1); +- g_assert_cmpint(backing_s->drain_count, ==, 1); +- +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 2); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); +- g_assert_cmpint(backing->quiesce_counter, ==, 2); +- g_assert_cmpint(a_s->drain_count, ==, 2); +- g_assert_cmpint(b_s->drain_count, ==, 2); +- g_assert_cmpint(backing_s->drain_count, ==, 2); +- +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); +- g_assert_cmpint(backing->quiesce_counter, ==, 1); +- g_assert_cmpint(a_s->drain_count, ==, 1); +- g_assert_cmpint(b_s->drain_count, ==, 1); +- g_assert_cmpint(backing_s->drain_count, ==, 1); +- +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- bdrv_unref(backing); +- bdrv_unref(bs_a); +- bdrv_unref(bs_b); +- blk_unref(blk_a); +- blk_unref(blk_b); +-} +- +-static void test_graph_change_drain_subtree(void) +-{ +- BlockBackend *blk_a, *blk_b; +- BlockDriverState *bs_a, *bs_b, *backing; +- BDRVTestState *a_s, *b_s, *backing_s; +- +- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, +- &error_abort); +- a_s = bs_a->opaque; +- blk_insert_bs(blk_a, bs_a, &error_abort); +- +- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, +- &error_abort); +- b_s = bs_b->opaque; +- blk_insert_bs(blk_b, bs_b, &error_abort); +- +- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); +- backing_s = backing->opaque; +- bdrv_set_backing_hd(bs_a, backing, &error_abort); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); +- +- bdrv_set_backing_hd(bs_b, backing, &error_abort); +- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); +- g_assert_cmpint(backing->quiesce_counter, ==, 5); +- g_assert_cmpint(a_s->drain_count, ==, 5); +- g_assert_cmpint(b_s->drain_count, ==, 5); +- g_assert_cmpint(backing_s->drain_count, ==, 5); +- +- bdrv_set_backing_hd(bs_b, NULL, &error_abort); +- g_assert_cmpint(bs_a->quiesce_counter, ==, 3); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); +- g_assert_cmpint(backing->quiesce_counter, ==, 3); +- g_assert_cmpint(a_s->drain_count, ==, 3); +- g_assert_cmpint(b_s->drain_count, ==, 2); +- g_assert_cmpint(backing_s->drain_count, ==, 3); +- +- bdrv_set_backing_hd(bs_b, backing, &error_abort); +- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); +- g_assert_cmpint(backing->quiesce_counter, ==, 5); +- g_assert_cmpint(a_s->drain_count, ==, 5); +- g_assert_cmpint(b_s->drain_count, ==, 5); +- g_assert_cmpint(backing_s->drain_count, ==, 5); +- +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- bdrv_unref(backing); +- bdrv_unref(bs_a); +- bdrv_unref(bs_b); +- blk_unref(blk_a); +- blk_unref(blk_b); +-} +- + static void test_graph_change_drain_all(void) + { + BlockBackend *blk_a, *blk_b; +@@ -773,12 +598,6 @@ static void test_iothread_drain(void) + test_iothread_common(BDRV_DRAIN, 1); + } + +-static void test_iothread_drain_subtree(void) +-{ +- test_iothread_common(BDRV_SUBTREE_DRAIN, 0); +- test_iothread_common(BDRV_SUBTREE_DRAIN, 1); +-} +- + + typedef struct TestBlockJob { + BlockJob common; +@@ -863,7 +682,6 @@ enum test_job_result { + enum test_job_drain_node { + TEST_JOB_DRAIN_SRC, + TEST_JOB_DRAIN_SRC_CHILD, +- TEST_JOB_DRAIN_SRC_PARENT, + }; + + static void test_blockjob_common_drain_node(enum drain_type drain_type, +@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + case TEST_JOB_DRAIN_SRC_CHILD: + drain_bs = src_backing; + break; +- case TEST_JOB_DRAIN_SRC_PARENT: +- drain_bs = src_overlay; +- break; + default: + g_assert_not_reached(); + } +@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + TEST_JOB_DRAIN_SRC); + test_blockjob_common_drain_node(drain_type, use_iothread, result, + TEST_JOB_DRAIN_SRC_CHILD); +- if (drain_type == BDRV_SUBTREE_DRAIN) { +- test_blockjob_common_drain_node(drain_type, use_iothread, result, +- TEST_JOB_DRAIN_SRC_PARENT); +- } + } + + static void test_blockjob_drain_all(void) +@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void) + test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); + } + +-static void test_blockjob_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); +-} +- + static void test_blockjob_error_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); +@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void) + test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); + } + +-static void test_blockjob_error_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); +-} +- + static void test_blockjob_iothread_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); +@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void) + test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); + } + +-static void test_blockjob_iothread_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); +-} +- + static void test_blockjob_iothread_error_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); +@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void) + test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); + } + +-static void test_blockjob_iothread_error_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); +-} +- + + typedef struct BDRVTestTopState { + BdrvChild *wait_child; +@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, + bdrv_drain(child_bs); + bdrv_unref(child_bs); + break; +- case BDRV_SUBTREE_DRAIN: +- /* Would have to ref/unref bs here for !detach_instead_of_delete, but +- * then the whole test becomes pointless because the graph changes +- * don't occur during the drain any more. */ +- assert(detach_instead_of_delete); +- bdrv_subtree_drained_begin(bs); +- bdrv_subtree_drained_end(bs); +- break; + case BDRV_DRAIN_ALL: + bdrv_drain_all_begin(); + bdrv_drain_all_end(); +@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void) + do_test_delete_by_drain(true, BDRV_DRAIN); + } + +-static void test_detach_by_drain_subtree(void) +-{ +- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); +-} +- + + struct detach_by_parent_data { + BlockDriverState *parent_b; +@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb) + g_assert(acb != NULL); + + /* Drain and check the expected result */ +- bdrv_subtree_drained_begin(parent_b); ++ bdrv_drained_begin(parent_b); ++ bdrv_drained_begin(a); ++ bdrv_drained_begin(b); ++ bdrv_drained_begin(c); + + g_assert(detach_by_parent_data.child_c != NULL); + +@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb) + g_assert(QLIST_NEXT(child_a, next) == NULL); + + g_assert_cmpint(parent_a->quiesce_counter, ==, 1); +- g_assert_cmpint(parent_b->quiesce_counter, ==, 1); ++ g_assert_cmpint(parent_b->quiesce_counter, ==, 3); + g_assert_cmpint(a->quiesce_counter, ==, 1); +- g_assert_cmpint(b->quiesce_counter, ==, 0); ++ g_assert_cmpint(b->quiesce_counter, ==, 1); + g_assert_cmpint(c->quiesce_counter, ==, 1); + +- bdrv_subtree_drained_end(parent_b); ++ bdrv_drained_end(parent_b); ++ bdrv_drained_end(a); ++ bdrv_drained_end(b); ++ bdrv_drained_end(c); + + bdrv_unref(parent_b); + blk_unref(blk); +@@ -2202,70 +1984,47 @@ int main(int argc, char **argv) + + g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); + g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); +- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", +- test_drv_cb_drain_subtree); + + g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", + test_drv_cb_co_drain_all); + g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); +- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", +- test_drv_cb_co_drain_subtree); +- + + g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); + g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); +- g_test_add_func("/bdrv-drain/quiesce/drain_subtree", +- test_quiesce_drain_subtree); + + g_test_add_func("/bdrv-drain/quiesce/co/drain_all", + test_quiesce_co_drain_all); + g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); +- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", +- test_quiesce_co_drain_subtree); + + g_test_add_func("/bdrv-drain/nested", test_nested); +- g_test_add_func("/bdrv-drain/multiparent", test_multiparent); + +- g_test_add_func("/bdrv-drain/graph-change/drain_subtree", +- test_graph_change_drain_subtree); + g_test_add_func("/bdrv-drain/graph-change/drain_all", + test_graph_change_drain_all); + + g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); + g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); +- g_test_add_func("/bdrv-drain/iothread/drain_subtree", +- test_iothread_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); + g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); +- g_test_add_func("/bdrv-drain/blockjob/drain_subtree", +- test_blockjob_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/error/drain_all", + test_blockjob_error_drain_all); + g_test_add_func("/bdrv-drain/blockjob/error/drain", + test_blockjob_error_drain); +- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", +- test_blockjob_error_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", + test_blockjob_iothread_drain_all); + g_test_add_func("/bdrv-drain/blockjob/iothread/drain", + test_blockjob_iothread_drain); +- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", +- test_blockjob_iothread_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", + test_blockjob_iothread_error_drain_all); + g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", + test_blockjob_iothread_error_drain); +- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", +- test_blockjob_iothread_error_drain_subtree); + + g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); + g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); + g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); +- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); + g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); + g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); + +-- +2.31.1 + diff --git a/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch b/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch new file mode 100644 index 0000000..1529fdb --- /dev/null +++ b/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch @@ -0,0 +1,302 @@ +From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:58 +0100 +Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to + non-coroutine_fn + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s) + +Polling during bdrv_drained_end() can be problematic (and in the future, +we may get cases for bdrv_drained_begin() where polling is forbidden, +and we don't care about already in-flight requests, but just want to +prevent new requests from arriving). + +The .bdrv_drained_begin/end callbacks running in a coroutine is the only +reason why we have to do this polling, so make them non-coroutine +callbacks again. None of the callers actually yield any more. + +This means that bdrv_drained_end() effectively doesn't poll any more, +even if AIO_WAIT_WHILE() loops are still there (their condition is false +from the beginning). This is generally not a problem, but in +test-bdrv-drain, some additional explicit aio_poll() calls need to be +added because the test case wants to verify the final state after BHs +have executed. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63) +Signed-off-by: Stefano Garzarella +--- + block.c | 4 +-- + block/io.c | 49 +++++--------------------------- + block/qed.c | 6 ++-- + block/throttle.c | 8 +++--- + include/block/block_int-common.h | 10 ++++--- + tests/unit/test-bdrv-drain.c | 18 ++++++------ + 6 files changed, 32 insertions(+), 63 deletions(-) + +diff --git a/block.c b/block.c +index ec184150a2..16a62a329c 100644 +--- a/block.c ++++ b/block.c +@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, + assert(is_power_of_2(bs->bl.request_alignment)); + + for (i = 0; i < bs->quiesce_counter; i++) { +- if (drv->bdrv_co_drain_begin) { +- drv->bdrv_co_drain_begin(bs); ++ if (drv->bdrv_drain_begin) { ++ drv->bdrv_drain_begin(bs); + } + } + +diff --git a/block/io.c b/block/io.c +index b9424024f9..c2ed4b2af9 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -252,55 +252,20 @@ typedef struct { + int *drained_end_counter; + } BdrvCoDrainData; + +-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) +-{ +- BdrvCoDrainData *data = opaque; +- BlockDriverState *bs = data->bs; +- +- if (data->begin) { +- bs->drv->bdrv_co_drain_begin(bs); +- } else { +- bs->drv->bdrv_co_drain_end(bs); +- } +- +- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ +- qatomic_mb_set(&data->done, true); +- if (!data->begin) { +- qatomic_dec(data->drained_end_counter); +- } +- bdrv_dec_in_flight(bs); +- +- g_free(data); +-} +- +-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ ++/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ + static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, + int *drained_end_counter) + { +- BdrvCoDrainData *data; +- +- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || +- (!begin && !bs->drv->bdrv_co_drain_end)) { ++ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || ++ (!begin && !bs->drv->bdrv_drain_end)) { + return; + } + +- data = g_new(BdrvCoDrainData, 1); +- *data = (BdrvCoDrainData) { +- .bs = bs, +- .done = false, +- .begin = begin, +- .drained_end_counter = drained_end_counter, +- }; +- +- if (!begin) { +- qatomic_inc(drained_end_counter); ++ if (begin) { ++ bs->drv->bdrv_drain_begin(bs); ++ } else { ++ bs->drv->bdrv_drain_end(bs); + } +- +- /* Make sure the driver callback completes during the polling phase for +- * drain_begin. */ +- bdrv_inc_in_flight(bs); +- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); +- aio_co_schedule(bdrv_get_aio_context(bs), data->co); + } + + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +diff --git a/block/qed.c b/block/qed.c +index 013f826c44..c2691a85b1 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s) + assert(!s->allocating_write_reqs_plugged); + if (s->allocating_acb != NULL) { + /* Another allocating write came concurrently. This cannot happen +- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs. ++ * from bdrv_qed_drain_begin, but it can happen when the timer runs. + */ + qemu_co_mutex_unlock(&s->table_lock); + return false; +@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, + } + } + +-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) ++static void bdrv_qed_drain_begin(BlockDriverState *bs) + { + BDRVQEDState *s = bs->opaque; + +@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = { + .bdrv_co_check = bdrv_qed_co_check, + .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, + .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, +- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin, ++ .bdrv_drain_begin = bdrv_qed_drain_begin, + }; + + static void bdrv_qed_init(void) +diff --git a/block/throttle.c b/block/throttle.c +index 131eba3ab4..88851c84f4 100644 +--- a/block/throttle.c ++++ b/block/throttle.c +@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state) + reopen_state->opaque = NULL; + } + +-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) ++static void throttle_drain_begin(BlockDriverState *bs) + { + ThrottleGroupMember *tgm = bs->opaque; + if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { +@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) + } + } + +-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs) ++static void throttle_drain_end(BlockDriverState *bs) + { + ThrottleGroupMember *tgm = bs->opaque; + assert(tgm->io_limits_disabled); +@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = { + .bdrv_reopen_commit = throttle_reopen_commit, + .bdrv_reopen_abort = throttle_reopen_abort, + +- .bdrv_co_drain_begin = throttle_co_drain_begin, +- .bdrv_co_drain_end = throttle_co_drain_end, ++ .bdrv_drain_begin = throttle_drain_begin, ++ .bdrv_drain_end = throttle_drain_end, + + .is_filter = true, + .strong_runtime_opts = throttle_strong_runtime_opts, +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 31ae91e56e..40d646d1ed 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -735,17 +735,19 @@ struct BlockDriver { + void (*bdrv_io_unplug)(BlockDriverState *bs); + + /** +- * bdrv_co_drain_begin is called if implemented in the beginning of a ++ * bdrv_drain_begin is called if implemented in the beginning of a + * drain operation to drain and stop any internal sources of requests in + * the driver. +- * bdrv_co_drain_end is called if implemented at the end of the drain. ++ * bdrv_drain_end is called if implemented at the end of the drain. + * + * They should be used by the driver to e.g. manage scheduled I/O + * requests, or toggle an internal state. After the end of the drain new + * requests will continue normally. ++ * ++ * Implementations of both functions must not call aio_poll(). + */ +- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); +- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); ++ void (*bdrv_drain_begin)(BlockDriverState *bs); ++ void (*bdrv_drain_end)(BlockDriverState *bs); + + bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); + bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 24f34e24ad..695519ee02 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque) + bdrv_dec_in_flight(bs); + } + +-static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) ++static void bdrv_test_drain_begin(BlockDriverState *bs) + { + BDRVTestState *s = bs->opaque; + s->drain_count++; +@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) + } + } + +-static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) ++static void bdrv_test_drain_end(BlockDriverState *bs) + { + BDRVTestState *s = bs->opaque; + s->drain_count--; +@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = { + .bdrv_close = bdrv_test_close, + .bdrv_co_preadv = bdrv_test_co_preadv, + +- .bdrv_co_drain_begin = bdrv_test_co_drain_begin, +- .bdrv_co_drain_end = bdrv_test_co_drain_end, ++ .bdrv_drain_begin = bdrv_test_drain_begin, ++ .bdrv_drain_end = bdrv_test_drain_end, + + .bdrv_child_perm = bdrv_default_perms, + +@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void) + bdrv_drained_begin(bs_child); + g_assert(!job_has_completed); + bdrv_drained_end(bs_child); ++ aio_poll(qemu_get_aio_context(), false); + g_assert(job_has_completed); + + bdrv_unref(bs_parents[0]); +@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void) + + g_assert(!job_has_completed); + ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); ++ aio_poll(qemu_get_aio_context(), false); + g_assert(ret == 0); + g_assert(job_has_completed); + +@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) + * .was_drained. + * Increment .drain_count. + */ +-static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) ++static void bdrv_replace_test_drain_begin(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + +@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) + * If .drain_count reaches 0 and the node has a backing file, issue a + * read request. + */ +-static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) ++static void bdrv_replace_test_drain_end(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + +@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = { + .bdrv_close = bdrv_replace_test_close, + .bdrv_co_preadv = bdrv_replace_test_co_preadv, + +- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, +- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, ++ .bdrv_drain_begin = bdrv_replace_test_drain_begin, ++ .bdrv_drain_end = bdrv_replace_test_drain_end, + + .bdrv_child_perm = bdrv_default_perms, + }; +-- +2.31.1 + diff --git a/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch b/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch new file mode 100644 index 0000000..19d52b5 --- /dev/null +++ b/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch @@ -0,0 +1,70 @@ +From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Mon, 7 Nov 2022 19:35:56 +0300 +Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s) + +Drop this simple wrapper used only in one place. We have too many graph +modifying functions even without it. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264) +Signed-off-by: Stefano Garzarella +--- + block.c | 15 +-------------- + 1 file changed, 1 insertion(+), 14 deletions(-) + +diff --git a/block.c b/block.c +index a18f052374..ec184150a2 100644 +--- a/block.c ++++ b/block.c +@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, + static void bdrv_replace_child_noperm(BdrvChild *child, + BlockDriverState *new_bs); + static void bdrv_remove_child(BdrvChild *child, Transaction *tran); +-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, +- Transaction *tran); + + static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, +@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) + tran_add(tran, &bdrv_remove_child_drv, child); + } + +-/* +- * A function to remove backing-chain child of @bs if exists: cow child for +- * format nodes (always .backing) and filter child for filters (may be .file or +- * .backing) +- */ +-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, +- Transaction *tran) +-{ +- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); +-} +- + static int bdrv_replace_node_noperm(BlockDriverState *from, + BlockDriverState *to, + bool auto_skip, Transaction *tran, +@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, + } + + if (detach_subchain) { +- bdrv_remove_filter_or_cow_child(to_cow_parent, tran); ++ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); + } + + found = g_hash_table_new(NULL, NULL); +-- +2.31.1 + diff --git a/kvm-kvm-Atomic-memslot-updates.patch b/kvm-kvm-Atomic-memslot-updates.patch new file mode 100644 index 0000000..14e9e32 --- /dev/null +++ b/kvm-kvm-Atomic-memslot-updates.patch @@ -0,0 +1,286 @@ +From e13fdc97ff05cdee46c112c2dee70b6ef33e7fa7 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:17:31 -0500 +Subject: [PATCH 31/31] kvm: Atomic memslot updates + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 138: accel: introduce accelerator blocker API +RH-Bugzilla: 1979276 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 9f03181ebcad2474fbe859acbce7b9891caa216b (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 + +commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39 +Author: David Hildenbrand +Date: Fri Nov 11 10:47:58 2022 -0500 + + kvm: Atomic memslot updates + + If we update an existing memslot (e.g., resize, split), we temporarily + remove the memslot to re-add it immediately afterwards. These updates + are not atomic, especially not for KVM VCPU threads, such that we can + get spurious faults. + + Let's inhibit most KVM ioctls while performing relevant updates, such + that we can perform the update just as if it would happen atomically + without additional kernel support. + + We capture the add/del changes and apply them in the notifier commit + stage instead. There, we can check for overlaps and perform the ioctl + inhibiting only if really required (-> overlap). + + To keep things simple we don't perform additional checks that wouldn't + actually result in an overlap -- such as !RAM memory regions in some + cases (see kvm_set_phys_mem()). + + To minimize cache-line bouncing, use a separate indicator + (in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock + while performing both actions (removing+re-adding). + + We have to wait until all IOCTLs were exited and block new ones from + getting executed. + + This approach cannot result in a deadlock as long as the inhibitor does + not hold any locks that might hinder an IOCTL from getting finished and + exited - something fairly unusual. The inhibitor will always hold the BQL. + + AFAIKs, one possible candidate would be userfaultfd. If a page cannot be + placed (e.g., during postcopy), because we're waiting for a lock, or if the + userfaultfd thread cannot process a fault, because it is waiting for a + lock, there could be a deadlock. However, the BQL is not applicable here, + because any other guest memory access while holding the BQL would already + result in a deadlock. + + Nothing else in the kernel should block forever and wait for userspace + intervention. + + Note: pause_all_vcpus()/resume_all_vcpus() or + start_exclusive()/end_exclusive() cannot be used, as they either drop + the BQL or require to be called without the BQL - something inhibitors + cannot handle. We need a low-level locking mechanism that is + deadlock-free even when not releasing the BQL. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Tested-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-4-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++----- + include/sysemu/kvm_int.h | 8 ++++ + 2 files changed, 98 insertions(+), 11 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ff660fd469..39ed30ab59 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -31,6 +31,7 @@ + #include "sysemu/kvm_int.h" + #include "sysemu/runstate.h" + #include "sysemu/cpus.h" ++#include "sysemu/accel-blocker.h" + #include "qemu/bswap.h" + #include "exec/memory.h" + #include "exec/ram_addr.h" +@@ -46,6 +47,7 @@ + #include "sysemu/hw_accel.h" + #include "kvm-cpus.h" + #include "sysemu/dirtylimit.h" ++#include "qemu/range.h" + + #include "hw/boards.h" + #include "monitor/stats.h" +@@ -1292,6 +1294,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) + kvm_max_slot_size = max_slot_size; + } + ++/* Called with KVMMemoryListener.slots_lock held */ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) + { +@@ -1326,14 +1329,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram = memory_region_get_ram_ptr(mr) + mr_offset; + ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset; + +- kvm_slots_lock(); +- + if (!add) { + do { + slot_size = MIN(kvm_max_slot_size, size); + mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { +- goto out; ++ return; + } + if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* +@@ -1371,7 +1372,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + start_addr += slot_size; + size -= slot_size; + } while (size); +- goto out; ++ return; + } + + /* register the new slot */ +@@ -1396,9 +1397,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram += slot_size; + size -= slot_size; + } while (size); +- +-out: +- kvm_slots_unlock(); + } + + static void *kvm_dirty_ring_reaper_thread(void *data) +@@ -1455,18 +1453,95 @@ static void kvm_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; + +- memory_region_ref(section->mr); +- kvm_set_phys_mem(kml, section, true); ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next); + } + + static void kvm_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; ++ ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next); ++} ++ ++static void kvm_region_commit(MemoryListener *listener) ++{ ++ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, ++ listener); ++ KVMMemoryUpdate *u1, *u2; ++ bool need_inhibit = false; ++ ++ if (QSIMPLEQ_EMPTY(&kml->transaction_add) && ++ QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ return; ++ } ++ ++ /* ++ * We have to be careful when regions to add overlap with ranges to remove. ++ * We have to simulate atomic KVM memslot updates by making sure no ioctl() ++ * is currently active. ++ * ++ * The lists are order by addresses, so it's easy to find overlaps. ++ */ ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ u2 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ while (u1 && u2) { ++ Range r1, r2; ++ ++ range_init_nofail(&r1, u1->section.offset_within_address_space, ++ int128_get64(u1->section.size)); ++ range_init_nofail(&r2, u2->section.offset_within_address_space, ++ int128_get64(u2->section.size)); ++ ++ if (range_overlaps_range(&r1, &r2)) { ++ need_inhibit = true; ++ break; ++ } ++ if (range_lob(&r1) < range_lob(&r2)) { ++ u1 = QSIMPLEQ_NEXT(u1, next); ++ } else { ++ u2 = QSIMPLEQ_NEXT(u2, next); ++ } ++ } ++ ++ kvm_slots_lock(); ++ if (need_inhibit) { ++ accel_ioctl_inhibit_begin(); ++ } ++ ++ /* Remove all memslots before adding the new ones. */ ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next); + +- kvm_set_phys_mem(kml, section, false); +- memory_region_unref(section->mr); ++ kvm_set_phys_mem(kml, &u1->section, false); ++ memory_region_unref(u1->section.mr); ++ ++ g_free(u1); ++ } ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next); ++ ++ memory_region_ref(u1->section.mr); ++ kvm_set_phys_mem(kml, &u1->section, true); ++ ++ g_free(u1); ++ } ++ ++ if (need_inhibit) { ++ accel_ioctl_inhibit_end(); ++ } ++ kvm_slots_unlock(); + } + + static void kvm_log_sync(MemoryListener *listener, +@@ -1610,8 +1685,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + kml->slots[i].slot = i; + } + ++ QSIMPLEQ_INIT(&kml->transaction_add); ++ QSIMPLEQ_INIT(&kml->transaction_del); ++ + kml->listener.region_add = kvm_region_add; + kml->listener.region_del = kvm_region_del; ++ kml->listener.commit = kvm_region_commit; + kml->listener.log_start = kvm_log_start; + kml->listener.log_stop = kvm_log_stop; + kml->listener.priority = 10; +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 3b4adcdc10..60b520a13e 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -12,6 +12,7 @@ + #include "exec/memory.h" + #include "qapi/qapi-types-common.h" + #include "qemu/accel.h" ++#include "qemu/queue.h" + #include "sysemu/kvm.h" + + typedef struct KVMSlot +@@ -31,10 +32,17 @@ typedef struct KVMSlot + ram_addr_t ram_start_offset; + } KVMSlot; + ++typedef struct KVMMemoryUpdate { ++ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next; ++ MemoryRegionSection section; ++} KVMMemoryUpdate; ++ + typedef struct KVMMemoryListener { + MemoryListener listener; + KVMSlot *slots; + int as_id; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; + } KVMMemoryListener; + + #define KVM_MSI_HASHTAB_SIZE 256 +-- +2.31.1 + diff --git a/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch b/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch new file mode 100644 index 0000000..d2dacbc --- /dev/null +++ b/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch @@ -0,0 +1,84 @@ +From f628a08d20b9ab6be24c2ab18b38a934a314c78b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:56 +0100 +Subject: [PATCH 14/31] qed: Don't yield in bdrv_qed_co_drain_begin() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [2/16] f18e9aebb7e04a62e309b656bac8f2ab83df657f (sgarzarella/qemu-kvm-c-9-s) + +We want to change .bdrv_co_drained_begin() back to be a non-coroutine +callback, so in preparation, avoid yielding in its implementation. + +Because we increase bs->in_flight and bdrv_drained_begin() polls, the +behaviour is unchanged. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 6d47eb0c8bf2d50682c7dccae74d24104076fe23) +Signed-off-by: Stefano Garzarella +--- + block/qed.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +diff --git a/block/qed.c b/block/qed.c +index 2f36ad342c..013f826c44 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s) + qemu_co_mutex_unlock(&s->table_lock); + } + +-static void coroutine_fn qed_need_check_timer_entry(void *opaque) ++static void coroutine_fn qed_need_check_timer(BDRVQEDState *s) + { +- BDRVQEDState *s = opaque; + int ret; + + trace_qed_need_check_timer_cb(s); +@@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque) + (void) ret; + } + ++static void coroutine_fn qed_need_check_timer_entry(void *opaque) ++{ ++ BDRVQEDState *s = opaque; ++ ++ qed_need_check_timer(opaque); ++ bdrv_dec_in_flight(s->bs); ++} ++ + static void qed_need_check_timer_cb(void *opaque) + { ++ BDRVQEDState *s = opaque; + Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); ++ ++ bdrv_inc_in_flight(s->bs); + qemu_coroutine_enter(co); + } + +@@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) + * header is flushed. + */ + if (s->need_check_timer && timer_pending(s->need_check_timer)) { ++ Coroutine *co; ++ + qed_cancel_need_check_timer(s); +- qed_need_check_timer_entry(s); ++ co = qemu_coroutine_create(qed_need_check_timer_entry, s); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), co); + } + } + +-- +2.31.1 + diff --git a/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch b/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch new file mode 100644 index 0000000..42114a1 --- /dev/null +++ b/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch @@ -0,0 +1,70 @@ +From 51fcf352a97f2e99a6a3fb8ae663b45436304120 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 10 Jan 2023 14:25:34 +0100 +Subject: [PATCH 11/31] s390x/s390-virtio-ccw: Activate zPCI features on + s390-ccw-virtio-rhel8.6.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 133: s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 +RH-Bugzilla: 2159408 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 1ed82e56fe74a283a1726c4893dc3387e645072c (clegoate/qemu-kvm-c9s) + +commit c7b14d3af7 ("s390x/s390-virtio-ccw: Switch off zPCI enhancements +on older machines") activated zPCI enhancement features (interpretation +and forward assist) silently on the s390-ccw-virtio-rhel8.6.0 machine +for RHEL8.8. It didn't seem to be a problem since migration is not +possible but it broke LEAPP upgrade to RHEL9 when the machine is +defined with a passthrough device. Activate the zPCI features also on +RHEL9.2 for the machines to be alike in both latest RHEL distros. + +Upstream Status: RHEL-only +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2159408 + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-virtio-ccw.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index aa142a1a4e..4cdd59c394 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1234,8 +1234,14 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "on", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", }, ++ }; ++ + ccw_machine_rhel900_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; +@@ -1259,8 +1265,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ + ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); +-- +2.31.1 + diff --git a/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch b/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch new file mode 100644 index 0000000..a8e3957 --- /dev/null +++ b/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch @@ -0,0 +1,159 @@ +From 5defda06ec4c24818a34126c5048be5e274b63f5 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:04 +0100 +Subject: [PATCH 22/31] stream: Replace subtree drain with a single node drain + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [10/16] a93250b1f6ef296e903df0ba5d8b29bc2ed540a8 (sgarzarella/qemu-kvm-c-9-s) + +The subtree drain was introduced in commit b1e1af394d9 as a way to avoid +graph changes between finding the base node and changing the block graph +as necessary on completion of the image streaming job. + +The block graph could change between these two points because +bdrv_set_backing_hd() first drains the parent node, which involved +polling and can do anything. + +Subtree draining was an imperfect way to make this less likely (because +with it, fewer callbacks are called during this window). Everyone agreed +that it's not really the right solution, and it was only committed as a +stopgap solution. + +This replaces the subtree drain with a solution that simply drains the +parent node before we try to find the base node, and then call a version +of bdrv_set_backing_hd() that doesn't drain, but just asserts that the +parent node is already drained. + +This way, any graph changes caused by draining happen before we start +looking at the graph and things stay consistent between finding the base +node and changing the graph. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-10-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 92140b9f3f07d80e2c27edcc6e32f392be2135e6) +Signed-off-by: Stefano Garzarella +--- + block.c | 17 ++++++++++++++--- + block/stream.c | 26 ++++++++++++++++---------- + include/block/block-global-state.h | 3 +++ + 3 files changed, 33 insertions(+), 13 deletions(-) + +diff --git a/block.c b/block.c +index b3449a312e..5330e89903 100644 +--- a/block.c ++++ b/block.c +@@ -3403,14 +3403,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, + return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); + } + +-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, +- Error **errp) ++int bdrv_set_backing_hd_drained(BlockDriverState *bs, ++ BlockDriverState *backing_hd, ++ Error **errp) + { + int ret; + Transaction *tran = tran_new(); + + GLOBAL_STATE_CODE(); +- bdrv_drained_begin(bs); ++ assert(bs->quiesce_counter > 0); + + ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); + if (ret < 0) { +@@ -3420,7 +3421,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + ret = bdrv_refresh_perms(bs, errp); + out: + tran_finalize(tran, ret); ++ return ret; ++} + ++int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, ++ Error **errp) ++{ ++ int ret; ++ GLOBAL_STATE_CODE(); ++ ++ bdrv_drained_begin(bs); ++ ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); + bdrv_drained_end(bs); + + return ret; +diff --git a/block/stream.c b/block/stream.c +index 694709bd25..8744ad103f 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -64,13 +64,16 @@ static int stream_prepare(Job *job) + bdrv_cor_filter_drop(s->cor_filter_bs); + s->cor_filter_bs = NULL; + +- bdrv_subtree_drained_begin(s->above_base); ++ /* ++ * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain ++ * already here and use bdrv_set_backing_hd_drained() instead because ++ * the polling during drained_begin() might change the graph, and if we do ++ * this only later, we may end up working with the wrong base node (or it ++ * might even have gone away by the time we want to use it). ++ */ ++ bdrv_drained_begin(unfiltered_bs); + + base = bdrv_filter_or_cow_bs(s->above_base); +- if (base) { +- bdrv_ref(base); +- } +- + unfiltered_base = bdrv_skip_filters(base); + + if (bdrv_cow_child(unfiltered_bs)) { +@@ -82,7 +85,13 @@ static int stream_prepare(Job *job) + } + } + +- bdrv_set_backing_hd(unfiltered_bs, base, &local_err); ++ bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); ++ ++ /* ++ * This call will do I/O, so the graph can change again from here on. ++ * We have already completed the graph change, so we are not in danger ++ * of operating on the wrong node any more if this happens. ++ */ + ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false); + if (local_err) { + error_report_err(local_err); +@@ -92,10 +101,7 @@ static int stream_prepare(Job *job) + } + + out: +- if (base) { +- bdrv_unref(base); +- } +- bdrv_subtree_drained_end(s->above_base); ++ bdrv_drained_end(unfiltered_bs); + return ret; + } + +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index c7bd4a2088..00e0cf8aea 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename, + BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); + int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); ++int bdrv_set_backing_hd_drained(BlockDriverState *bs, ++ BlockDriverState *backing_hd, ++ Error **errp); + int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + const char *bdref_key, Error **errp); + BlockDriverState *bdrv_open(const char *filename, const char *reference, +-- +2.31.1 + diff --git a/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch b/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch new file mode 100644 index 0000000..268c263 --- /dev/null +++ b/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch @@ -0,0 +1,153 @@ +From 093c4a6834f3ec5a05390a3630ae4edec80885b8 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:57 +0100 +Subject: [PATCH 15/31] test-bdrv-drain: Don't yield in + .bdrv_co_drained_begin/end() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [3/16] 5282d3e13cb85dfb480edb11b7eb2769248465df (sgarzarella/qemu-kvm-c-9-s) + +We want to change .bdrv_co_drained_begin/end() back to be non-coroutine +callbacks, so in preparation, avoid yielding in their implementation. + +This does almost the same as the existing logic in bdrv_drain_invoke(), +by creating and entering coroutines internally. However, since the test +case is by far the heaviest user of coroutine code in drain callbacks, +it is preferable to have the complexity in the test case rather than the +drain core, which is already complicated enough without this. + +The behaviour for bdrv_drain_begin() is unchanged because we increase +bs->in_flight and this is still polled. However, bdrv_drain_end() +doesn't wait for the spawned coroutine to complete any more. This is +fine, we don't rely on bdrv_drain_end() restarting all operations +immediately before the next aio_poll(). + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7bce1c299834557bffd92294608ea528648cfe75) +Signed-off-by: Stefano Garzarella +--- + tests/unit/test-bdrv-drain.c | 64 ++++++++++++++++++++++++++---------- + 1 file changed, 46 insertions(+), 18 deletions(-) + +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 09dc4a4891..24f34e24ad 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -38,12 +38,22 @@ typedef struct BDRVTestState { + bool sleep_in_drain_begin; + } BDRVTestState; + ++static void coroutine_fn sleep_in_drain_begin(void *opaque) ++{ ++ BlockDriverState *bs = opaque; ++ ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); ++ bdrv_dec_in_flight(bs); ++} ++ + static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) + { + BDRVTestState *s = bs->opaque; + s->drain_count++; + if (s->sleep_in_drain_begin) { +- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); ++ Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), co); + } + } + +@@ -1916,6 +1926,21 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, + return 0; + } + ++static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) ++{ ++ BlockDriverState *bs = opaque; ++ BDRVReplaceTestState *s = bs->opaque; ++ ++ /* Keep waking io_co up until it is done */ ++ while (s->io_co) { ++ aio_co_wake(s->io_co); ++ s->io_co = NULL; ++ qemu_coroutine_yield(); ++ } ++ s->drain_co = NULL; ++ bdrv_dec_in_flight(bs); ++} ++ + /** + * If .drain_count is 0, wake up .io_co if there is one; and set + * .was_drained. +@@ -1926,20 +1951,27 @@ static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) + BDRVReplaceTestState *s = bs->opaque; + + if (!s->drain_count) { +- /* Keep waking io_co up until it is done */ +- s->drain_co = qemu_coroutine_self(); +- while (s->io_co) { +- aio_co_wake(s->io_co); +- s->io_co = NULL; +- qemu_coroutine_yield(); +- } +- s->drain_co = NULL; +- ++ s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), s->drain_co); + s->was_drained = true; + } + s->drain_count++; + } + ++static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) ++{ ++ BlockDriverState *bs = opaque; ++ char data; ++ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); ++ int ret; ++ ++ /* Queue a read request post-drain */ ++ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); ++ g_assert(ret >= 0); ++ bdrv_dec_in_flight(bs); ++} ++ + /** + * Reduce .drain_count, set .was_undrained once it reaches 0. + * If .drain_count reaches 0 and the node has a backing file, issue a +@@ -1951,17 +1983,13 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) + + g_assert(s->drain_count > 0); + if (!--s->drain_count) { +- int ret; +- + s->was_undrained = true; + + if (bs->backing) { +- char data; +- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); +- +- /* Queue a read request post-drain */ +- ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); +- g_assert(ret >= 0); ++ Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry, ++ bs); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), co); + } + } + } +-- +2.31.1 + diff --git a/kvm-vhost-add-support-for-configure-interrupt.patch b/kvm-vhost-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..a7cfb2f --- /dev/null +++ b/kvm-vhost-add-support-for-configure-interrupt.patch @@ -0,0 +1,185 @@ +From 42818e2bc6fa537fe52f7f0e6b094774a1eb00e1 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:48 +0800 +Subject: [PATCH 07/31] vhost: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/10] d58b439eb093f5dd3b7ca081af0ab75780e42917 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add functions to support configure interrupt. +The configure interrupt process will start in vhost_dev_start +and stop in vhost_dev_stop. + +Also add the functions to support vhost_config_pending and +vhost_config_mask. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-8-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f9a09ca3ea69d108d828b7c82f1bd61b2df6fc96) +Signed-off-by: Cindy Lu +--- + hw/virtio/vhost.c | 78 ++++++++++++++++++++++++++++++++++++++- + include/hw/virtio/vhost.h | 4 ++ + 2 files changed, 81 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 7fb008bc9e..84dbb39e07 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -1596,7 +1596,68 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, + file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); + r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); + if (r < 0) { +- VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed"); ++ error_report("vhost_set_vring_call failed %d", -r); ++ } ++} ++ ++bool vhost_config_pending(struct vhost_dev *hdev) ++{ ++ assert(hdev->vhost_ops); ++ if ((hdev->started == false) || ++ (hdev->vhost_ops->vhost_set_config_call == NULL)) { ++ return false; ++ } ++ ++ EventNotifier *notifier = ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ return event_notifier_test_and_clear(notifier); ++} ++ ++void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask) ++{ ++ int fd; ++ int r; ++ EventNotifier *notifier = ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ EventNotifier *config_notifier = &vdev->config_notifier; ++ assert(hdev->vhost_ops); ++ ++ if ((hdev->started == false) || ++ (hdev->vhost_ops->vhost_set_config_call == NULL)) { ++ return; ++ } ++ if (mask) { ++ assert(vdev->use_guest_notifier_mask); ++ fd = event_notifier_get_fd(notifier); ++ } else { ++ fd = event_notifier_get_fd(config_notifier); ++ } ++ r = hdev->vhost_ops->vhost_set_config_call(hdev, fd); ++ if (r < 0) { ++ error_report("vhost_set_config_call failed %d", -r); ++ } ++} ++ ++static void vhost_stop_config_intr(struct vhost_dev *dev) ++{ ++ int fd = -1; ++ assert(dev->vhost_ops); ++ if (dev->vhost_ops->vhost_set_config_call) { ++ dev->vhost_ops->vhost_set_config_call(dev, fd); ++ } ++} ++ ++static void vhost_start_config_intr(struct vhost_dev *dev) ++{ ++ int r; ++ ++ assert(dev->vhost_ops); ++ int fd = event_notifier_get_fd(&dev->vdev->config_notifier); ++ if (dev->vhost_ops->vhost_set_config_call) { ++ r = dev->vhost_ops->vhost_set_config_call(dev, fd); ++ if (!r) { ++ event_notifier_set(&dev->vdev->config_notifier); ++ } + } + } + +@@ -1836,6 +1897,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + } + } + ++ r = event_notifier_init( ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0); ++ if (r < 0) { ++ return r; ++ } ++ event_notifier_test_and_clear( ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); ++ if (!vdev->use_guest_notifier_mask) { ++ vhost_config_mask(hdev, vdev, true); ++ } + if (hdev->log_enabled) { + uint64_t log_base; + +@@ -1874,6 +1945,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + vhost_device_iotlb_miss(hdev, vq->used_phys, true); + } + } ++ vhost_start_config_intr(hdev); + return 0; + fail_start: + if (vrings) { +@@ -1903,6 +1975,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + + /* should only be called after backend is connected */ + assert(hdev->vhost_ops); ++ event_notifier_test_and_clear( ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); ++ event_notifier_test_and_clear(&vdev->config_notifier); + + trace_vhost_dev_stop(hdev, vdev->name, vrings); + +@@ -1925,6 +2000,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + } + memory_listener_unregister(&hdev->iommu_listener); + } ++ vhost_stop_config_intr(hdev); + vhost_log_put(hdev, true); + hdev->started = false; + vdev->vhost_started = false; +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 67a6807fac..05bedb2416 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -33,6 +33,7 @@ struct vhost_virtqueue { + unsigned used_size; + EventNotifier masked_notifier; + EventNotifier error_notifier; ++ EventNotifier masked_config_notifier; + struct vhost_dev *dev; + }; + +@@ -41,6 +42,7 @@ typedef unsigned long vhost_log_chunk_t; + #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) + #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) + #define VHOST_INVALID_FEATURE_BIT (0xff) ++#define VHOST_QUEUE_NUM_CONFIG_INR 0 + + struct vhost_log { + unsigned long long size; +@@ -168,6 +170,8 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); + * Disable direct notifications to vhost device. + */ + void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); ++bool vhost_config_pending(struct vhost_dev *hdev); ++void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask); + + /** + * vhost_dev_is_started() - report status of vhost device +-- +2.31.1 + diff --git a/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch b/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch new file mode 100644 index 0000000..ca93785 --- /dev/null +++ b/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch @@ -0,0 +1,157 @@ +From 55aad90e347599e88747888ddbefcba33427f386 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Fri, 16 Dec 2022 11:35:52 +0800 +Subject: [PATCH 12/31] vhost: fix vq dirty bitmap syncing when vIOMMU is + enabled + +RH-Author: Eric Auger +RH-MergeRequest: 134: vhost: fix vq dirty bitmap syncing when vIOMMU is enabled +RH-Bugzilla: 2124856 +RH-Acked-by: Peter Xu +RH-Acked-by: Jason Wang +RH-Acked-by: Laurent Vivier +RH-Commit: [1/1] 57ef499b63dc2cca6e64ee84d1dc127635868ca2 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124856 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=49989924 +Upstream: yes + +When vIOMMU is enabled, the vq->used_phys is actually the IOVA not +GPA. So we need to translate it to GPA before the syncing otherwise we +may hit the following crash since IOVA could be out of the scope of +the GPA log size. This could be noted when using virtio-IOMMU with +vhost using 1G memory. + +Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support") +Cc: qemu-stable@nongnu.org +Tested-by: Lei Yang +Reported-by: Yalan Zhang +Signed-off-by: Jason Wang +Message-Id: <20221216033552.77087-1-jasowang@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 345cc1cbcbce2bab00abc2b88338d7d89c702d6b) +Signed-off-by: Eric Auger +--- + hw/virtio/vhost.c | 84 ++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 64 insertions(+), 20 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 84dbb39e07..2c566dc539 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -20,6 +20,7 @@ + #include "qemu/range.h" + #include "qemu/error-report.h" + #include "qemu/memfd.h" ++#include "qemu/log.h" + #include "standard-headers/linux/vhost_types.h" + #include "hw/virtio/virtio-bus.h" + #include "hw/virtio/virtio-access.h" +@@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev, + } + } + ++static bool vhost_dev_has_iommu(struct vhost_dev *dev) ++{ ++ VirtIODevice *vdev = dev->vdev; ++ ++ /* ++ * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support ++ * incremental memory mapping API via IOTLB API. For platform that ++ * does not have IOMMU, there's no need to enable this feature ++ * which may cause unnecessary IOTLB miss/update transactions. ++ */ ++ if (vdev) { ++ return virtio_bus_device_iommu_enabled(vdev) && ++ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ } else { ++ return false; ++ } ++} ++ + static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + MemoryRegionSection *section, + hwaddr first, +@@ -137,8 +156,51 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + continue; + } + +- vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, +- range_get_last(vq->used_phys, vq->used_size)); ++ if (vhost_dev_has_iommu(dev)) { ++ IOMMUTLBEntry iotlb; ++ hwaddr used_phys = vq->used_phys, used_size = vq->used_size; ++ hwaddr phys, s, offset; ++ ++ while (used_size) { ++ rcu_read_lock(); ++ iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as, ++ used_phys, ++ true, ++ MEMTXATTRS_UNSPECIFIED); ++ rcu_read_unlock(); ++ ++ if (!iotlb.target_as) { ++ qemu_log_mask(LOG_GUEST_ERROR, "translation " ++ "failure for used_iova %"PRIx64"\n", ++ used_phys); ++ return -EINVAL; ++ } ++ ++ offset = used_phys & iotlb.addr_mask; ++ phys = iotlb.translated_addr + offset; ++ ++ /* ++ * Distance from start of used ring until last byte of ++ * IOMMU page. ++ */ ++ s = iotlb.addr_mask - offset; ++ /* ++ * Size of used ring, or of the part of it until end ++ * of IOMMU page. To avoid zero result, do the adding ++ * outside of MIN(). ++ */ ++ s = MIN(s, used_size - 1) + 1; ++ ++ vhost_dev_sync_region(dev, section, start_addr, end_addr, phys, ++ range_get_last(phys, s)); ++ used_size -= s; ++ used_phys += s; ++ } ++ } else { ++ vhost_dev_sync_region(dev, section, start_addr, ++ end_addr, vq->used_phys, ++ range_get_last(vq->used_phys, vq->used_size)); ++ } + } + return 0; + } +@@ -306,24 +368,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) + dev->log_size = size; + } + +-static bool vhost_dev_has_iommu(struct vhost_dev *dev) +-{ +- VirtIODevice *vdev = dev->vdev; +- +- /* +- * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support +- * incremental memory mapping API via IOTLB API. For platform that +- * does not have IOMMU, there's no need to enable this feature +- * which may cause unnecessary IOTLB miss/update transactions. +- */ +- if (vdev) { +- return virtio_bus_device_iommu_enabled(vdev) && +- virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +- } else { +- return false; +- } +-} +- + static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, + hwaddr *plen, bool is_write) + { +-- +2.31.1 + diff --git a/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch b/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch new file mode 100644 index 0000000..1b48f5d --- /dev/null +++ b/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch @@ -0,0 +1,56 @@ +From d135303da1187d9f214e520a977fe7c47e5ce1f0 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:45 +0800 +Subject: [PATCH 04/31] vhost: introduce new VhostOps vhost_set_config_call +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/10] c2492838d9c1415e42d2507f2956d640a30325f2 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +This patch introduces new VhostOps vhost_set_config_call. +This function allows the qemu to set the config +event fd to kernel driver. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-5-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9b30cdf9bbf9524a4f4f8a6eb551eb13cbbd3893) +Signed-off-by: Cindy Lu +--- + include/hw/virtio/vhost-backend.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index eab46d7f0b..c5ab49051e 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -128,6 +128,8 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); + + typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); + ++typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev, ++ int fd); + typedef struct VhostOps { + VhostBackendType backend_type; + vhost_backend_init vhost_backend_init; +@@ -174,6 +176,7 @@ typedef struct VhostOps { + vhost_vq_get_addr_op vhost_vq_get_addr; + vhost_get_device_id_op vhost_get_device_id; + vhost_force_iommu_op vhost_force_iommu; ++ vhost_set_config_call_op vhost_set_config_call; + } VhostOps; + + int vhost_backend_update_device_iotlb(struct vhost_dev *dev, +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-add-support-for-config-interrupt.patch b/kvm-vhost-vdpa-add-support-for-config-interrupt.patch new file mode 100644 index 0000000..88d4df6 --- /dev/null +++ b/kvm-vhost-vdpa-add-support-for-config-interrupt.patch @@ -0,0 +1,73 @@ +From e01563a8de9a45937ffd8d4c1d74a6890ffb6eb6 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:46 +0800 +Subject: [PATCH 05/31] vhost-vdpa: add support for config interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/10] 49bfd214a503f8e199ff93f4bbfcbd4c4f2405b5 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add new call back function in vhost-vdpa, The function +vhost_set_config_call can set the event fd to kernel. +This function will be called in the vhost_dev_start +and vhost_dev_stop + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-6-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 259f3acc1c675dd77ebbdb28a483f5d0220bdbf6) +Signed-off-by: Cindy Lu +--- + hw/virtio/trace-events | 1 + + hw/virtio/vhost-vdpa.c | 8 ++++++++ + 2 files changed, 9 insertions(+) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 14fc5b9bb2..46f2faf04e 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -62,6 +62,7 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI + vhost_vdpa_set_owner(void *dev) "dev: %p" + vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 + vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64 ++vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d" + + # virtio.c + virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 7468e44b87..c5be2645b0 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -754,6 +754,13 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) + return 0; + } + ++static int vhost_vdpa_set_config_call(struct vhost_dev *dev, ++ int fd) ++{ ++ trace_vhost_vdpa_set_config_call(dev, fd); ++ return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); ++} ++ + static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, + uint32_t config_len) + { +@@ -1310,4 +1317,5 @@ const VhostOps vdpa_ops = { + .vhost_get_device_id = vhost_vdpa_get_device_id, + .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, + .vhost_force_iommu = vhost_vdpa_force_iommu, ++ .vhost_set_config_call = vhost_vdpa_set_config_call, + }; +-- +2.31.1 + diff --git a/kvm-virtio-add-support-for-configure-interrupt.patch b/kvm-virtio-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..02f4666 --- /dev/null +++ b/kvm-virtio-add-support-for-configure-interrupt.patch @@ -0,0 +1,115 @@ +From e04c76339580effae41617b690b58a6605e0f40b Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:47 +0800 +Subject: [PATCH 06/31] virtio: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/10] 7048eb488b732578686d451684babaf17b582b05 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add the functions to support the configure interrupt in virtio +The function virtio_config_guest_notifier_read will notify the +guest if there is an configure interrupt. +The function virtio_config_set_guest_notifier_fd_handler is +to set the fd hander for the notifier + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-7-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7d847d0c9b93b91160f40d69a65c904d76f1edd8) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio.c | 29 +++++++++++++++++++++++++++++ + include/hw/virtio/virtio.h | 4 ++++ + 2 files changed, 33 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index eb6347ab5d..34e9c5d141 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -4012,7 +4012,14 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n) + virtio_irq(vq); + } + } ++static void virtio_config_guest_notifier_read(EventNotifier *n) ++{ ++ VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier); + ++ if (event_notifier_test_and_clear(n)) { ++ virtio_notify_config(vdev); ++ } ++} + void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + bool with_irqfd) + { +@@ -4029,6 +4036,23 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + } + } + ++void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, ++ bool assign, bool with_irqfd) ++{ ++ EventNotifier *n; ++ n = &vdev->config_notifier; ++ if (assign && !with_irqfd) { ++ event_notifier_set_handler(n, virtio_config_guest_notifier_read); ++ } else { ++ event_notifier_set_handler(n, NULL); ++ } ++ if (!assign) { ++ /* Test and clear notifier before closing it,*/ ++ /* in case poll callback didn't have time to run. */ ++ virtio_config_guest_notifier_read(n); ++ } ++} ++ + EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) + { + return &vq->guest_notifier; +@@ -4109,6 +4133,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) + return &vq->host_notifier; + } + ++EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev) ++{ ++ return &vdev->config_notifier; ++} ++ + void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled) + { + vq->host_notifier_enabled = enabled; +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 1f4a41b958..9c3a4642f2 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -138,6 +138,7 @@ struct VirtIODevice + AddressSpace *dma_as; + QLIST_HEAD(, VirtQueue) *vector_queues; + QTAILQ_ENTRY(VirtIODevice) next; ++ EventNotifier config_notifier; + }; + + struct VirtioDeviceClass { +@@ -360,6 +361,9 @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); + VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); + VirtQueue *virtio_vector_next_queue(VirtQueue *vq); ++EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev); ++void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, ++ bool assign, bool with_irqfd); + + static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) + { +-- +2.31.1 + diff --git a/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch b/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch new file mode 100644 index 0000000..ea2589a --- /dev/null +++ b/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch @@ -0,0 +1,262 @@ +From 34a267758cf016f34b327318500efdbf0f606033 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:42 +0800 +Subject: [PATCH 01/31] virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/10] f374aaae221bc5a4c2521a267d21350b812e11ba (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +To support configure interrupt for vhost-vdpa +Introduce VIRTIO_CONFIG_IRQ_IDX -1 as configure interrupt's queue index, +Then we can reuse the functions guest_notifier_mask and guest_notifier_pending. +Add the check of queue index in these drivers, if the driver does not support +configure interrupt, the function will just return + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-2-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 544f0278afcab2bebab61b14e4c2c58e65911f5b) +Signed-off-by: Cindy Lu +--- + hw/display/vhost-user-gpu.c | 18 ++++++++++++++++++ + hw/net/virtio-net.c | 22 ++++++++++++++++++++-- + hw/virtio/vhost-user-fs.c | 18 ++++++++++++++++++ + hw/virtio/vhost-user-gpio.c | 10 ++++++++++ + hw/virtio/vhost-vsock-common.c | 18 ++++++++++++++++++ + hw/virtio/virtio-crypto.c | 18 ++++++++++++++++++ + include/hw/virtio/virtio.h | 3 +++ + 7 files changed, 105 insertions(+), 2 deletions(-) + +diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c +index 19c0e20103..4380a5e672 100644 +--- a/hw/display/vhost-user-gpu.c ++++ b/hw/display/vhost-user-gpu.c +@@ -486,6 +486,15 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VhostUserGPU *g = VHOST_USER_GPU(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_virtqueue_pending(&g->vhost->dev, idx); + } + +@@ -494,6 +503,15 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) + { + VhostUserGPU *g = VHOST_USER_GPU(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask); + } + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index aba12759d5..bee35d6f9f 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3316,6 +3316,15 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return false ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } + +@@ -3339,8 +3348,17 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } +- vhost_net_virtqueue_mask(get_vhost_net(nc->peer), +- vdev, idx, mask); ++ /* ++ *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } ++ ++ vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); + } + + static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index d97b179e6f..f5049735ac 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -159,6 +159,15 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx, + { + VHostUserFS *fs = VHOST_USER_FS(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask); + } + +@@ -166,6 +175,15 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VHostUserFS *fs = VHOST_USER_FS(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_virtqueue_pending(&fs->vhost_dev, idx); + } + +diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c +index b7b82a1099..fe3da32c74 100644 +--- a/hw/virtio/vhost-user-gpio.c ++++ b/hw/virtio/vhost-user-gpio.c +@@ -191,6 +191,16 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) + { + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } ++ + vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); + } + +diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c +index d21c72b401..d2b5519d5a 100644 +--- a/hw/virtio/vhost-vsock-common.c ++++ b/hw/virtio/vhost-vsock-common.c +@@ -127,6 +127,15 @@ static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, + { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask); + } + +@@ -135,6 +144,15 @@ static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, + { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_virtqueue_pending(&vvc->vhost_dev, idx); + } + +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 97da74e719..516425e26a 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -1182,6 +1182,15 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, + + assert(vcrypto->vhost_started); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask); + } + +@@ -1192,6 +1201,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) + + assert(vcrypto->vhost_started); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return cryptodev_vhost_virtqueue_pending(vdev, queue, idx); + } + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index acfd4df125..1f4a41b958 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -79,6 +79,9 @@ typedef struct VirtQueueElement + + #define VIRTIO_NO_VECTOR 0xffff + ++/* special index value used internally for config irqs */ ++#define VIRTIO_CONFIG_IRQ_IDX -1 ++ + #define TYPE_VIRTIO_DEVICE "virtio-device" + OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE) + +-- +2.31.1 + diff --git a/kvm-virtio-mmio-add-support-for-configure-interrupt.patch b/kvm-virtio-mmio-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..275b197 --- /dev/null +++ b/kvm-virtio-mmio-add-support-for-configure-interrupt.patch @@ -0,0 +1,80 @@ +From 181705090c9963c2da97811838ace5bb058737c6 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:50 +0800 +Subject: [PATCH 09/31] virtio-mmio: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/10] 742cc2b425ffd7bbd393772526e7481446ee131c (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add configure interrupt support in virtio-mmio bus. +add function to set configure guest notifier. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-10-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit cd336e834620ea78edef049c3567f312974e475b) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-mmio.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c +index d240efef97..103260ec15 100644 +--- a/hw/virtio/virtio-mmio.c ++++ b/hw/virtio/virtio-mmio.c +@@ -670,7 +670,30 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign, + + return 0; + } ++static int virtio_mmio_set_config_guest_notifier(DeviceState *d, bool assign, ++ bool with_irqfd) ++{ ++ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); ++ EventNotifier *notifier = virtio_config_get_guest_notifier(vdev); ++ int r = 0; + ++ if (assign) { ++ r = event_notifier_init(notifier, 0); ++ if (r < 0) { ++ return r; ++ } ++ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); ++ } else { ++ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); ++ event_notifier_cleanup(notifier); ++ } ++ if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) { ++ vdc->guest_notifier_mask(vdev, VIRTIO_CONFIG_IRQ_IDX, !assign); ++ } ++ return r; ++} + static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, + bool assign) + { +@@ -692,6 +715,10 @@ static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, + goto assign_error; + } + } ++ r = virtio_mmio_set_config_guest_notifier(d, assign, with_irqfd); ++ if (r < 0) { ++ goto assign_error; ++ } + + return 0; + +-- +2.31.1 + diff --git a/kvm-virtio-net-add-support-for-configure-interrupt.patch b/kvm-virtio-net-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..74b956a --- /dev/null +++ b/kvm-virtio-net-add-support-for-configure-interrupt.patch @@ -0,0 +1,115 @@ +From 2b8e3409edb8a17d89c3829cfa3d92bdfdd43c53 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:49 +0800 +Subject: [PATCH 08/31] virtio-net: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/10] 1b125169bea6c81c508b154fa1bae68af153b312 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add functions to support configure interrupt in virtio_net +Add the functions to support vhost_net_config_pending +and vhost_net_config_mask. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-9-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8aab0d1dbe90c7b5ac6672a1a09b0578178f5f4c) +Signed-off-by: Cindy Lu +--- + hw/net/vhost_net-stub.c | 9 +++++++++ + hw/net/vhost_net.c | 9 +++++++++ + hw/net/virtio-net.c | 4 ++-- + include/net/vhost_net.h | 2 ++ + 4 files changed, 22 insertions(+), 2 deletions(-) + +diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c +index 9f7daae99c..c36f258201 100644 +--- a/hw/net/vhost_net-stub.c ++++ b/hw/net/vhost_net-stub.c +@@ -82,6 +82,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + { + } + ++bool vhost_net_config_pending(VHostNetState *net) ++{ ++ return false; ++} ++ ++void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) ++{ ++} ++ + int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) + { + return -1; +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 043058ff43..6a55f5a473 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -478,6 +478,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + vhost_virtqueue_mask(&net->dev, dev, idx, mask); + } + ++bool vhost_net_config_pending(VHostNetState *net) ++{ ++ return vhost_config_pending(&net->dev); ++} ++ ++void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) ++{ ++ vhost_config_mask(&net->dev, dev, mask); ++} + VHostNetState *get_vhost_net(NetClientState *nc) + { + VHostNetState *vhost_net = 0; +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index bee35d6f9f..ec974f7a76 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3323,7 +3323,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + */ + + if (idx == VIRTIO_CONFIG_IRQ_IDX) { +- return false; ++ return vhost_net_config_pending(get_vhost_net(nc->peer)); + } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } +@@ -3355,9 +3355,9 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + */ + + if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); + return; + } +- + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); + } + +diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h +index 40b9a40074..dbbd0dc04e 100644 +--- a/include/net/vhost_net.h ++++ b/include/net/vhost_net.h +@@ -39,6 +39,8 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, + bool vhost_net_virtqueue_pending(VHostNetState *net, int n); + void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + int idx, bool mask); ++bool vhost_net_config_pending(VHostNetState *net); ++void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask); + int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr); + VHostNetState *get_vhost_net(NetClientState *nc); + +-- +2.31.1 + diff --git a/kvm-virtio-pci-add-support-for-configure-interrupt.patch b/kvm-virtio-pci-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..14070a4 --- /dev/null +++ b/kvm-virtio-pci-add-support-for-configure-interrupt.patch @@ -0,0 +1,274 @@ +From 61ac1476d3820c97e1cc103af422b17bc94c6ca5 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:51 +0800 +Subject: [PATCH 10/31] virtio-pci: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/10] ebd6a11d7699660d8ac5a4e44a790f823daea57c (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add process to handle the configure interrupt, The function's +logic is the same with vq interrupt.Add extra process to check +the configure interrupt + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-11-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1680542862edd963e6380dd4121a5e85df55581f) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 118 +++++++++++++++++++++++++++------ + include/hw/virtio/virtio-pci.h | 4 +- + 2 files changed, 102 insertions(+), 20 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index ec816ea367..3f00e91718 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -751,7 +751,8 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, + VirtQueue *vq; + + if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { +- return -1; ++ *n = virtio_config_get_guest_notifier(vdev); ++ *vector = vdev->config_vector; + } else { + if (!virtio_queue_get_num(vdev, queue_no)) { + return -1; +@@ -811,7 +812,7 @@ undo: + } + return ret; + } +-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) ++static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; + int ret = 0; +@@ -826,6 +827,10 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + return ret; + } + ++static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy) ++{ ++ return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX); ++} + + static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, + int queue_no) +@@ -850,7 +855,7 @@ static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, + kvm_virtio_pci_vq_vector_release(proxy, vector); + } + +-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) ++static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +@@ -863,6 +868,11 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) + } + } + ++static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) ++{ ++ kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX); ++} ++ + static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, +@@ -944,9 +954,19 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, + } + vq = virtio_vector_next_queue(vq); + } +- ++ /* unmask config intr */ ++ if (vector == vdev->config_vector) { ++ n = virtio_config_get_guest_notifier(vdev); ++ ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, ++ msg, n); ++ if (ret < 0) { ++ goto undo_config; ++ } ++ } + return 0; +- ++undo_config: ++ n = virtio_config_get_guest_notifier(vdev); ++ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); + undo: + vq = virtio_vector_first_queue(vdev, vector); + while (vq && unmasked >= 0) { +@@ -980,6 +1000,11 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) + } + vq = virtio_vector_next_queue(vq); + } ++ ++ if (vector == vdev->config_vector) { ++ n = virtio_config_get_guest_notifier(vdev); ++ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); ++ } + } + + static void virtio_pci_vector_poll(PCIDevice *dev, +@@ -1011,6 +1036,34 @@ static void virtio_pci_vector_poll(PCIDevice *dev, + msix_set_pending(dev, vector); + } + } ++ /* poll the config intr */ ++ ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, ¬ifier, ++ &vector); ++ if (ret < 0) { ++ return; ++ } ++ if (vector < vector_start || vector >= vector_end || ++ !msix_is_masked(dev, vector)) { ++ return; ++ } ++ if (k->guest_notifier_pending) { ++ if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) { ++ msix_set_pending(dev, vector); ++ } ++ } else if (event_notifier_test_and_clear(notifier)) { ++ msix_set_pending(dev, vector); ++ } ++} ++ ++void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, ++ int n, bool assign, ++ bool with_irqfd) ++{ ++ if (n == VIRTIO_CONFIG_IRQ_IDX) { ++ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); ++ } else { ++ virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd); ++ } + } + + static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, +@@ -1019,17 +1072,25 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); +- VirtQueue *vq = virtio_get_queue(vdev, n); +- EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); ++ VirtQueue *vq = NULL; ++ EventNotifier *notifier = NULL; ++ ++ if (n == VIRTIO_CONFIG_IRQ_IDX) { ++ notifier = virtio_config_get_guest_notifier(vdev); ++ } else { ++ vq = virtio_get_queue(vdev, n); ++ notifier = virtio_queue_get_guest_notifier(vq); ++ } + + if (assign) { + int r = event_notifier_init(notifier, 0); + if (r < 0) { + return r; + } +- virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd); ++ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd); + } else { +- virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd); ++ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false, ++ with_irqfd); + event_notifier_cleanup(notifier); + } + +@@ -1072,10 +1133,13 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + proxy->nvqs_with_notifiers = nvqs; + + /* Must unset vector notifier while guest notifier is still assigned */ +- if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) { ++ if ((proxy->vector_irqfd || ++ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && ++ !assign) { + msix_unset_vector_notifiers(&proxy->pci_dev); + if (proxy->vector_irqfd) { +- kvm_virtio_pci_vector_release(proxy, nvqs); ++ kvm_virtio_pci_vector_vq_release(proxy, nvqs); ++ kvm_virtio_pci_vector_config_release(proxy); + g_free(proxy->vector_irqfd); + proxy->vector_irqfd = NULL; + } +@@ -1091,20 +1155,30 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + goto assign_error; + } + } +- ++ r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign, ++ with_irqfd); ++ if (r < 0) { ++ goto config_assign_error; ++ } + /* Must set vector notifier after guest notifier has been assigned */ +- if ((with_irqfd || k->guest_notifier_mask) && assign) { ++ if ((with_irqfd || ++ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && ++ assign) { + if (with_irqfd) { + proxy->vector_irqfd = + g_malloc0(sizeof(*proxy->vector_irqfd) * + msix_nr_vectors_allocated(&proxy->pci_dev)); +- r = kvm_virtio_pci_vector_use(proxy, nvqs); ++ r = kvm_virtio_pci_vector_vq_use(proxy, nvqs); ++ if (r < 0) { ++ goto config_assign_error; ++ } ++ r = kvm_virtio_pci_vector_config_use(proxy); + if (r < 0) { +- goto assign_error; ++ goto config_error; + } + } +- r = msix_set_vector_notifiers(&proxy->pci_dev, +- virtio_pci_vector_unmask, ++ ++ r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask, + virtio_pci_vector_mask, + virtio_pci_vector_poll); + if (r < 0) { +@@ -1117,9 +1191,15 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + notifiers_error: + if (with_irqfd) { + assert(assign); +- kvm_virtio_pci_vector_release(proxy, nvqs); ++ kvm_virtio_pci_vector_vq_release(proxy, nvqs); + } +- ++config_error: ++ if (with_irqfd) { ++ kvm_virtio_pci_vector_config_release(proxy); ++ } ++config_assign_error: ++ virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign, ++ with_irqfd); + assign_error: + /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ + assert(assign); +diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h +index 938799e8f6..c02e278f46 100644 +--- a/include/hw/virtio/virtio-pci.h ++++ b/include/hw/virtio/virtio-pci.h +@@ -256,5 +256,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); + * @fixed_queues. + */ + unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); +- ++void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, ++ int n, bool assign, ++ bool with_irqfd); + #endif +-- +2.31.1 + diff --git a/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch b/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch new file mode 100644 index 0000000..a8c32a2 --- /dev/null +++ b/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch @@ -0,0 +1,272 @@ +From 9a234f849273d3480e4a88042cb1ea06a37a626b Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:43 +0800 +Subject: [PATCH 02/31] virtio-pci: decouple notifier from interrupt process +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/10] a20f4c9ff38b239531d12cbcc7deaa649c86abc3 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +To reuse the notifier process. We add the virtio_pci_get_notifier +to get the notifier and vector. The INPUT for this function is IDX, +The OUTPUT is the notifier and the vector + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-3-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2e07f69d0c828e21515b63dc22884d548540b382) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 88 +++++++++++++++++++++++++++--------------- + 1 file changed, 57 insertions(+), 31 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index a1c9dfa7bb..52c7692fff 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -728,29 +728,41 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, + } + + static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, +- unsigned int queue_no, ++ EventNotifier *n, + unsigned int vector) + { + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; +- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +- VirtQueue *vq = virtio_get_queue(vdev, queue_no); +- EventNotifier *n = virtio_queue_get_guest_notifier(vq); + return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); + } + + static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, +- unsigned int queue_no, ++ EventNotifier *n , + unsigned int vector) + { +- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +- VirtQueue *vq = virtio_get_queue(vdev, queue_no); +- EventNotifier *n = virtio_queue_get_guest_notifier(vq); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int ret; + + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); + assert(ret == 0); + } ++static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, ++ EventNotifier **n, unsigned int *vector) ++{ ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ VirtQueue *vq; ++ ++ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { ++ return -1; ++ } else { ++ if (!virtio_queue_get_num(vdev, queue_no)) { ++ return -1; ++ } ++ *vector = virtio_queue_vector(vdev, queue_no); ++ vq = virtio_get_queue(vdev, queue_no); ++ *n = virtio_queue_get_guest_notifier(vq); ++ } ++ return 0; ++} + + static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + { +@@ -759,12 +771,15 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + unsigned int vector; + int ret, queue_no; +- ++ EventNotifier *n; + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } +- vector = virtio_queue_vector(vdev, queue_no); ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ break; ++ } + if (vector >= msix_nr_vectors_allocated(dev)) { + continue; + } +@@ -776,7 +791,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + * Otherwise, delay until unmasked in the frontend. + */ + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + if (ret < 0) { + kvm_virtio_pci_vq_vector_release(proxy, vector); + goto undo; +@@ -792,7 +807,11 @@ undo: + continue; + } + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ break; ++ } ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); + } + kvm_virtio_pci_vq_vector_release(proxy, vector); + } +@@ -806,12 +825,16 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) + unsigned int vector; + int queue_no; + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- ++ EventNotifier *n; ++ int ret ; + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } +- vector = virtio_queue_vector(vdev, queue_no); ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ break; ++ } + if (vector >= msix_nr_vectors_allocated(dev)) { + continue; + } +@@ -819,21 +842,20 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) + * Otherwise, it was cleaned when masked in the frontend. + */ + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); + } + kvm_virtio_pci_vq_vector_release(proxy, vector); + } + } + +-static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, ++static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, +- MSIMessage msg) ++ MSIMessage msg, ++ EventNotifier *n) + { + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- VirtQueue *vq = virtio_get_queue(vdev, queue_no); +- EventNotifier *n = virtio_queue_get_guest_notifier(vq); + VirtIOIRQFD *irqfd; + int ret = 0; + +@@ -860,14 +882,15 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, + event_notifier_set(n); + } + } else { +- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + } + return ret; + } + +-static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, ++static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy, + unsigned int queue_no, +- unsigned int vector) ++ unsigned int vector, ++ EventNotifier *n) + { + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +@@ -878,7 +901,7 @@ static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { + k->guest_notifier_mask(vdev, queue_no, true); + } else { +- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); + } + } + +@@ -888,6 +911,7 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtQueue *vq = virtio_vector_first_queue(vdev, vector); ++ EventNotifier *n; + int ret, index, unmasked = 0; + + while (vq) { +@@ -896,7 +920,8 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, + break; + } + if (index < proxy->nvqs_with_notifiers) { +- ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); ++ n = virtio_queue_get_guest_notifier(vq); ++ ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n); + if (ret < 0) { + goto undo; + } +@@ -912,7 +937,8 @@ undo: + while (vq && unmasked >= 0) { + index = virtio_get_queue_index(vq); + if (index < proxy->nvqs_with_notifiers) { +- virtio_pci_vq_vector_mask(proxy, index, vector); ++ n = virtio_queue_get_guest_notifier(vq); ++ virtio_pci_one_vector_mask(proxy, index, vector, n); + --unmasked; + } + vq = virtio_vector_next_queue(vq); +@@ -925,15 +951,17 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtQueue *vq = virtio_vector_first_queue(vdev, vector); ++ EventNotifier *n; + int index; + + while (vq) { + index = virtio_get_queue_index(vq); ++ n = virtio_queue_get_guest_notifier(vq); + if (!virtio_queue_get_num(vdev, index)) { + break; + } + if (index < proxy->nvqs_with_notifiers) { +- virtio_pci_vq_vector_mask(proxy, index, vector); ++ virtio_pci_one_vector_mask(proxy, index, vector, n); + } + vq = virtio_vector_next_queue(vq); + } +@@ -949,19 +977,17 @@ static void virtio_pci_vector_poll(PCIDevice *dev, + int queue_no; + unsigned int vector; + EventNotifier *notifier; +- VirtQueue *vq; ++ int ret; + + for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { +- if (!virtio_queue_get_num(vdev, queue_no)) { ++ ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector); ++ if (ret < 0) { + break; + } +- vector = virtio_queue_vector(vdev, queue_no); + if (vector < vector_start || vector >= vector_end || + !msix_is_masked(dev, vector)) { + continue; + } +- vq = virtio_get_queue(vdev, queue_no); +- notifier = virtio_queue_get_guest_notifier(vq); + if (k->guest_notifier_pending) { + if (k->guest_notifier_pending(vdev, queue_no)) { + msix_set_pending(dev, vector); +-- +2.31.1 + diff --git a/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch b/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch new file mode 100644 index 0000000..be9b3c7 --- /dev/null +++ b/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch @@ -0,0 +1,212 @@ +From 58cd577ff157cfaf7506bba135db58e75c330ff0 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:44 +0800 +Subject: [PATCH 03/31] virtio-pci: decouple the single vector from the + interrupt process +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/10] 2c79cb678f005fb2f53b2db0f237347634ab3422 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 + +To reuse the interrupt process in configure interrupt +Need to decouple the single vector from the interrupt process. +We add new function kvm_virtio_pci_vector_use_one and _release_one. +These functions are used for the single vector, the whole process will +finish in the loop with vq number. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-4-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit ee3b8dc6cc496ba7f4e27aed4493275c706a7942) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 131 +++++++++++++++++++++++------------------ + 1 file changed, 73 insertions(+), 58 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 52c7692fff..ec816ea367 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -699,7 +699,6 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev, + } + + static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, +- unsigned int queue_no, + unsigned int vector) + { + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; +@@ -764,87 +763,103 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, + return 0; + } + +-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) ++static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) + { ++ unsigned int vector; ++ int ret; ++ EventNotifier *n; + PCIDevice *dev = &proxy->pci_dev; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- unsigned int vector; +- int ret, queue_no; +- EventNotifier *n; +- for (queue_no = 0; queue_no < nvqs; queue_no++) { +- if (!virtio_queue_get_num(vdev, queue_no)) { +- break; +- } +- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); +- if (ret < 0) { +- break; +- } +- if (vector >= msix_nr_vectors_allocated(dev)) { +- continue; +- } +- ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector); ++ ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ return ret; ++ } ++ if (vector >= msix_nr_vectors_allocated(dev)) { ++ return 0; ++ } ++ ret = kvm_virtio_pci_vq_vector_use(proxy, vector); ++ if (ret < 0) { ++ goto undo; ++ } ++ /* ++ * If guest supports masking, set up irqfd now. ++ * Otherwise, delay until unmasked in the frontend. ++ */ ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + if (ret < 0) { ++ kvm_virtio_pci_vq_vector_release(proxy, vector); + goto undo; + } +- /* If guest supports masking, set up irqfd now. +- * Otherwise, delay until unmasked in the frontend. +- */ +- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); +- if (ret < 0) { +- kvm_virtio_pci_vq_vector_release(proxy, vector); +- goto undo; +- } +- } + } +- return 0; + ++ return 0; + undo: +- while (--queue_no >= 0) { +- vector = virtio_queue_vector(vdev, queue_no); +- if (vector >= msix_nr_vectors_allocated(dev)) { +- continue; ++ ++ vector = virtio_queue_vector(vdev, queue_no); ++ if (vector >= msix_nr_vectors_allocated(dev)) { ++ return ret; ++ } ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ return ret; + } +- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); +- if (ret < 0) { +- break; +- } +- kvm_virtio_pci_irqfd_release(proxy, n, vector); ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); ++ } ++ return ret; ++} ++static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) ++{ ++ int queue_no; ++ int ret = 0; ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ ++ for (queue_no = 0; queue_no < nvqs; queue_no++) { ++ if (!virtio_queue_get_num(vdev, queue_no)) { ++ return -1; + } +- kvm_virtio_pci_vq_vector_release(proxy, vector); ++ ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); + } + return ret; + } + +-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) ++ ++static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, ++ int queue_no) + { +- PCIDevice *dev = &proxy->pci_dev; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + unsigned int vector; +- int queue_no; +- VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + EventNotifier *n; +- int ret ; ++ int ret; ++ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); ++ PCIDevice *dev = &proxy->pci_dev; ++ ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ return; ++ } ++ if (vector >= msix_nr_vectors_allocated(dev)) { ++ return; ++ } ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); ++ } ++ kvm_virtio_pci_vq_vector_release(proxy, vector); ++} ++ ++static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) ++{ ++ int queue_no; ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } +- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); +- if (ret < 0) { +- break; +- } +- if (vector >= msix_nr_vectors_allocated(dev)) { +- continue; +- } +- /* If guest supports masking, clean up irqfd now. +- * Otherwise, it was cleaned when masked in the frontend. +- */ +- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- kvm_virtio_pci_irqfd_release(proxy, n, vector); +- } +- kvm_virtio_pci_vq_vector_release(proxy, vector); ++ kvm_virtio_pci_vector_release_one(proxy, queue_no); + } + } + +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e143966..b01376f 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 4%{?rcrel}%{?dist}%{?cc_suffix} +Release: 5%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -210,6 +210,68 @@ Patch30: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch Patch31: kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch # For bz#2155749 - [regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X Patch32: kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch33: kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch34: kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch35: kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch36: kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch37: kvm-vhost-vdpa-add-support-for-config-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch38: kvm-virtio-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch39: kvm-vhost-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch40: kvm-virtio-net-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch41: kvm-virtio-mmio-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch42: kvm-virtio-pci-add-support-for-configure-interrupt.patch +# For bz#2159408 - [s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8 +Patch43: kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch +# For bz#2124856 - VM with virtio interface and iommu=on will crash when try to migrate +Patch44: kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch45: kvm-block-drop-bdrv_remove_filter_or_cow_child.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch46: kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch47: kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch48: kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch49: kvm-block-Remove-drained_end_counter.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch50: kvm-block-Inline-bdrv_drain_invoke.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch51: kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch52: kvm-block-Drain-individual-nodes-during-reopen.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch53: kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch54: kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch55: kvm-block-Remove-subtree-drains.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch56: kvm-block-Call-drain-callbacks-only-once.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch57: kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch58: kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch59: kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch60: kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch +# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch61: kvm-accel-introduce-accelerator-blocker-API.patch +# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch62: kvm-KVM-keep-track-of-running-ioctls.patch +# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch63: kvm-kvm-Atomic-memslot-updates.patch %if %{have_clang} BuildRequires: clang @@ -1238,6 +1300,49 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Jan 17 2023 Miroslav Rezanina - 7.2.0-5 +- kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch [bz#1905805] +- kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch [bz#1905805] +- kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch [bz#1905805] +- kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch [bz#1905805] +- kvm-vhost-vdpa-add-support-for-config-interrupt.patch [bz#1905805] +- kvm-virtio-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-vhost-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-virtio-net-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-virtio-mmio-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-virtio-pci-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch [bz#2159408] +- kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch [bz#2124856] +- kvm-block-drop-bdrv_remove_filter_or_cow_child.patch [bz#2155112] +- kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch [bz#2155112] +- kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch [bz#2155112] +- kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch [bz#2155112] +- kvm-block-Remove-drained_end_counter.patch [bz#2155112] +- kvm-block-Inline-bdrv_drain_invoke.patch [bz#2155112] +- kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch [bz#2155112] +- kvm-block-Drain-individual-nodes-during-reopen.patch [bz#2155112] +- kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch [bz#2155112] +- kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch [bz#2155112] +- kvm-block-Remove-subtree-drains.patch [bz#2155112] +- kvm-block-Call-drain-callbacks-only-once.patch [bz#2155112] +- kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch [bz#2155112] +- kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch [bz#2155112] +- kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch [bz#2155112] +- kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch [bz#2155112] +- kvm-accel-introduce-accelerator-blocker-API.patch [bz#1979276] +- kvm-KVM-keep-track-of-running-ioctls.patch [bz#1979276] +- kvm-kvm-Atomic-memslot-updates.patch [bz#1979276] +- Resolves: bz#1905805 + (support config interrupt in vhost-vdpa qemu) +- Resolves: bz#2159408 + ([s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8) +- Resolves: bz#2124856 + (VM with virtio interface and iommu=on will crash when try to migrate) +- Resolves: bz#2155112 + (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)) +- Resolves: bz#1979276 + (SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on) + * Thu Jan 12 2023 Miroslav Rezanina - 7.2.0-4 - kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch [bz#2155749] - kvm-Update-QGA-service-for-new-command-line.patch [bz#2156515] From dd0eece2ef0928536ea22c19773a1140902d78c5 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 30 Jan 2023 03:00:34 -0500 Subject: [PATCH 180/195] * Mon Jan 30 2023 Miroslav Rezanina - 7.2.0-6 - kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch [bz#2141088] - kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch [bz#2141088] - kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch [bz#2141088] - kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch [bz#2141088] - kvm-s390x-pv-Implement-a-CGS-check-helper.patch [bz#2122523] - kvm-s390x-pci-coalesce-unmap-operations.patch [bz#2163701] - kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch [bz#2163701] - kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch [bz#2163701] - kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch [bz#2149191] - Resolves: bz#2141088 (vDPA SVQ guest announce support) - Resolves: bz#2122523 (Secure guest can't boot with maximal number of vcpus (248)) - Resolves: bz#2163701 ([s390x] VM fails to start with ISM passed through) - Resolves: bz#2149191 ([RFE][guest-agent] - USB bus type support) --- ...-add-usb-support-to-guest-get-fsinfo.patch | 49 ++++++ kvm-s390x-pci-coalesce-unmap-operations.patch | 125 +++++++++++++++ ...ISM-passthrough-devices-on-shutdown-.patch | 147 ++++++++++++++++++ ...-DMA-aperture-to-be-bound-by-vfio-DM.patch | 91 +++++++++++ ...390x-pv-Implement-a-CGS-check-helper.patch | 109 +++++++++++++ ...le-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch | 44 ++++++ ...IO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch | 59 +++++++ ...y-virtio_net_get_config-to-early-ret.patch | 74 +++++++++ ...VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch | 46 ++++++ qemu-kvm.spec | 39 ++++- 10 files changed, 782 insertions(+), 1 deletion(-) create mode 100644 kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch create mode 100644 kvm-s390x-pci-coalesce-unmap-operations.patch create mode 100644 kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch create mode 100644 kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch create mode 100644 kvm-s390x-pv-Implement-a-CGS-check-helper.patch create mode 100644 kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch create mode 100644 kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch create mode 100644 kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch create mode 100644 kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch diff --git a/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch b/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch new file mode 100644 index 0000000..d039212 --- /dev/null +++ b/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch @@ -0,0 +1,49 @@ +From 48f45171b89b8ed24f2b2484d63b00ea7818b5c3 Mon Sep 17 00:00:00 2001 +From: Kfir Manor +Date: Sun, 22 Jan 2023 17:33:07 +0200 +Subject: [PATCH 9/9] qga/linux: add usb support to guest-get-fsinfo + +RH-Author: Kostiantyn Kostiuk +RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo +RH-Bugzilla: 2149191 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: yvugenfi +RH-Commit: [1/1] bae929a2d0d0ad20e7308ede69c26499fc2119c7 (kostyanf14/redhat_centos-stream_src_qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2149191 +Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.com/ + +Signed-off-by: Kfir Manor +Reviewed-by: Konstantin Kostiuk +Signed-off-by: Konstantin Kostiuk +--- + qga/commands-posix.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 32493d6383..f1b2b87c13 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -877,7 +877,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, + g_str_equal(driver, "sym53c8xx") || + g_str_equal(driver, "virtio-pci") || + g_str_equal(driver, "ahci") || +- g_str_equal(driver, "nvme"))) { ++ g_str_equal(driver, "nvme") || ++ g_str_equal(driver, "xhci_hcd") || ++ g_str_equal(driver, "ehci-pci"))) { + break; + } + +@@ -974,6 +976,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, + } + } else if (strcmp(driver, "nvme") == 0) { + disk->bus_type = GUEST_DISK_BUS_TYPE_NVME; ++ } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) { ++ disk->bus_type = GUEST_DISK_BUS_TYPE_USB; + } else { + g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath); + goto cleanup; +-- +2.31.1 + diff --git a/kvm-s390x-pci-coalesce-unmap-operations.patch b/kvm-s390x-pci-coalesce-unmap-operations.patch new file mode 100644 index 0000000..8bf1f61 --- /dev/null +++ b/kvm-s390x-pci-coalesce-unmap-operations.patch @@ -0,0 +1,125 @@ +From ed90f91b61844abd2dff2eb970f721a6cf072235 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:57 -0400 +Subject: [PATCH 6/9] s390x/pci: coalesce unmap operations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163701 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 80c3a2c1d720057ae2a80b338ea06c9c6c804532 (clegoate/qemu-kvm-c9s) + +Currently, each unmapped page is handled as an individual iommu +region notification. Attempt to group contiguous unmap operations +into fewer notifications to reduce overhead. + +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit ef536007c3301bbd6a787e4c2210ea289adaa6f0) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 51 insertions(+) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 7cc4bcf850..66e764f901 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + } + g_hash_table_remove(iommu->iotlb, &entry->iova); + inc_dma_avail(iommu); ++ /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */ ++ goto out; + } else { + if (cache) { + if (cache->perm == entry->perm && +@@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + dec_dma_avail(iommu); + } + ++ /* ++ * All associated iotlb entries have already been cleared, trigger the ++ * unmaps. ++ */ + memory_region_notify_iommu(&iommu->iommu_mr, 0, event); + + out: + return iommu->dma_limit ? iommu->dma_limit->avail : 1; + } + ++static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova, ++ uint64_t len) ++{ ++ uint64_t remain = len, start = iova, end = start + len - 1, mask, size; ++ IOMMUTLBEvent event = { ++ .type = IOMMU_NOTIFIER_UNMAP, ++ .entry = { ++ .target_as = &address_space_memory, ++ .translated_addr = 0, ++ .perm = IOMMU_NONE, ++ }, ++ }; ++ ++ while (remain >= TARGET_PAGE_SIZE) { ++ mask = dma_aligned_pow2_mask(start, end, 64); ++ size = mask + 1; ++ event.entry.iova = start; ++ event.entry.addr_mask = mask; ++ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); ++ start += size; ++ remain -= size; ++ } ++} ++ + int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + { + CPUS390XState *env = &cpu->env; ++ uint64_t iova, coalesce = 0; + uint32_t fh; + uint16_t error = 0; + S390PCIBusDevice *pbdev; +@@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + break; + } + ++ /* ++ * If this is an unmap of a PTE, let's try to coalesce multiple unmaps ++ * into as few notifier events as possible. ++ */ ++ if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) { ++ if (coalesce == 0) { ++ iova = entry.iova; ++ } ++ coalesce += entry.len; ++ } else if (coalesce > 0) { ++ /* Unleash the coalesced unmap before processing a new map */ ++ s390_pci_batch_unmap(iommu, iova, coalesce); ++ coalesce = 0; ++ } ++ + start += entry.len; + while (entry.iova < start && entry.iova < end) { + if (dma_avail > 0 || entry.perm == IOMMU_NONE) { +@@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + } + } ++ if (coalesce) { ++ /* Unleash the coalesced unmap before finishing rpcit */ ++ s390_pci_batch_unmap(iommu, iova, coalesce); ++ coalesce = 0; ++ } + if (again && dma_avail > 0) + goto retry; + err: +-- +2.31.1 + diff --git a/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch b/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch new file mode 100644 index 0000000..bbe2595 --- /dev/null +++ b/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch @@ -0,0 +1,147 @@ +From 1ed1f8fc20a4883bc0bc1f58d299b0278abc5442 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 9 Dec 2022 14:57:00 -0500 +Subject: [PATCH 8/9] s390x/pci: reset ISM passthrough devices on shutdown and + system reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163701 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] c531352b9d57f51ba938d4c46ee19a5706ade697 (clegoate/qemu-kvm-c9s) + +ISM device firmware stores unique state information that can +can cause a wholesale unmap of the associated IOMMU (e.g. when +we get a termination signal for QEMU) to trigger firmware errors +because firmware believes we are attempting to invalidate entries +that are still in-use by the guest OS (when in fact that guest is +in the process of being terminated or rebooted). +To alleviate this, register both a shutdown notifier (for unexpected +termination cases e.g. virsh destroy) as well as a reset callback +(for cases like guest OS reboot). For each of these scenarios, trigger +PCI device reset; this is enough to indicate to firmware that the IOMMU +is no longer in-use by the guest OS, making it safe to invalidate any +associated IOMMU entries. + +Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") +Signed-off-by: Matthew Rosato +Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +[thuth: Adjusted the hunk in s390-pci-vfio.c due to different context] +Signed-off-by: Thomas Huth +(cherry picked from commit 03451953c79e6b31f7860ee0c35b28e181d573c1) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ + hw/s390x/s390-pci-vfio.c | 2 ++ + include/hw/s390x/s390-pci-bus.h | 5 +++++ + 3 files changed, 35 insertions(+) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 977e7daa15..02751f3597 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -24,6 +24,8 @@ + #include "hw/pci/msi.h" + #include "qemu/error-report.h" + #include "qemu/module.h" ++#include "sysemu/reset.h" ++#include "sysemu/runstate.h" + + #ifndef DEBUG_S390PCI_BUS + #define DEBUG_S390PCI_BUS 0 +@@ -150,10 +152,30 @@ out: + psccb->header.response_code = cpu_to_be16(rc); + } + ++static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) ++{ ++ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, ++ shutdown_notifier); ++ ++ pci_device_reset(pbdev->pdev); ++} ++ ++static void s390_pci_reset_cb(void *opaque) ++{ ++ S390PCIBusDevice *pbdev = opaque; ++ ++ pci_device_reset(pbdev->pdev); ++} ++ + static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) + { + HotplugHandler *hotplug_ctrl; + ++ if (pbdev->pft == ZPCI_PFT_ISM) { ++ notifier_remove(&pbdev->shutdown_notifier); ++ qemu_unregister_reset(s390_pci_reset_cb, pbdev); ++ } ++ + /* Unplug the PCI device */ + if (pbdev->pdev) { + DeviceState *pdev = DEVICE(pbdev->pdev); +@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + pbdev->fh |= FH_SHM_VFIO; + pbdev->forwarding_assist = false; + } ++ /* Register shutdown notifier and reset callback for ISM devices */ ++ if (pbdev->pft == ZPCI_PFT_ISM) { ++ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; ++ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); ++ qemu_register_reset(s390_pci_reset_cb, pbdev); ++ } + } else { + pbdev->fh |= FH_SHM_EMUL; + /* Always intercept emulated devices */ +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index f7bf36cec8..f51190d466 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + /* The following values remain 0 until we support other FMB formats */ + pbdev->zpci_fn.fmbl = 0; + pbdev->zpci_fn.pft = 0; ++ /* Store function type separately for type-specific behavior */ ++ pbdev->pft = cap->pft; + + /* + * If appropriate, reduce the size of the supported DMA aperture reported +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 1c46e3a269..e0a9f9385b 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -39,6 +39,9 @@ + #define UID_CHECKING_ENABLED 0x01 + #define ZPCI_DTSM 0x40 + ++/* zPCI Function Types */ ++#define ZPCI_PFT_ISM 5 ++ + OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE) +@@ -344,6 +347,7 @@ struct S390PCIBusDevice { + uint16_t noi; + uint16_t maxstbl; + uint8_t sum; ++ uint8_t pft; + S390PCIGroup *pci_group; + ClpRspQueryPci zpci_fn; + S390MsixInfo msix; +@@ -352,6 +356,7 @@ struct S390PCIBusDevice { + MemoryRegion msix_notify_mr; + IndAddr *summary_ind; + IndAddr *indicator; ++ Notifier shutdown_notifier; + bool pci_unplug_request_processed; + bool unplug_requested; + bool interp; +-- +2.31.1 + diff --git a/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch b/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch new file mode 100644 index 0000000..0992724 --- /dev/null +++ b/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch @@ -0,0 +1,91 @@ +From ee69c8c57fe62fc200f749c4ce3927c88803644d Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:58 -0400 +Subject: [PATCH 7/9] s390x/pci: shrink DMA aperture to be bound by vfio DMA + limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163701 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] 0956bbb4773dd0085f6aed59d6284c704b4fed3b (clegoate/qemu-kvm-c9s) + +Currently, s390x-pci performs accounting against the vfio DMA +limit and triggers the guest to clean up mappings when the limit +is reached. Let's go a step further and also limit the size of +the supported DMA aperture reported to the guest based upon the +initial vfio DMA limit reported for the container (if less than +than the size reported by the firmware/host zPCI layer). This +avoids processing sections of the guest DMA table during global +refresh that, for common use cases, will never be used anway, and +makes exhausting the vfio DMA limit due to mismatch between guest +aperture size and host limit far less likely and more indicitive +of an error. + +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit df202e3ff3fccb49868e08f20d0bda86cb953fbe) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 11 +++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + 2 files changed, 12 insertions(+) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 5f0adb0b4a..f7bf36cec8 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + cnt->users = 1; + cnt->avail = avail; + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); ++ pbdev->iommu->max_dma_limit = avail; + return cnt; + } + +@@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + struct vfio_info_cap_header *hdr; + struct vfio_device_info_cap_zpci_base *cap; + VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ uint64_t vfio_size; + + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); + +@@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + /* The following values remain 0 until we support other FMB formats */ + pbdev->zpci_fn.fmbl = 0; + pbdev->zpci_fn.pft = 0; ++ ++ /* ++ * If appropriate, reduce the size of the supported DMA aperture reported ++ * to the guest based upon the vfio DMA limit. ++ */ ++ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS; ++ if (vfio_size < (cap->end_dma - cap->start_dma + 1)) { ++ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1; ++ } + } + + static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 0605fcea24..1c46e3a269 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -278,6 +278,7 @@ struct S390PCIIOMMU { + uint64_t g_iota; + uint64_t pba; + uint64_t pal; ++ uint64_t max_dma_limit; + GHashTable *iotlb; + S390PCIDMACount *dma_limit; + }; +-- +2.31.1 + diff --git a/kvm-s390x-pv-Implement-a-CGS-check-helper.patch b/kvm-s390x-pv-Implement-a-CGS-check-helper.patch new file mode 100644 index 0000000..c3383af --- /dev/null +++ b/kvm-s390x-pv-Implement-a-CGS-check-helper.patch @@ -0,0 +1,109 @@ +From 9452246e59a5f16f44fdf9a7d514b947faf1d5fc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 16 Jan 2023 18:46:05 +0100 +Subject: [PATCH 5/9] s390x/pv: Implement a CGS check helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 139: s390x/pv: Implement a CGS check helper +RH-Bugzilla: 2122523 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Commit: [1/1] 8551ce772b10de653b4e1c8be60aae60ec98b421 (clegoate/qemu-kvm-c9s) + +When a protected VM is started with the maximum number of CPUs (248), +the service call providing information on the CPUs requires more +buffer space than allocated and QEMU disgracefully aborts : + + LOADPARM=[........] + Using virtio-blk. + Using SCSI scheme. + ................................................................................... + qemu-system-s390x: KVM_S390_MEM_OP failed: Argument list too long + +When protected virtualization is initialized, compute the maximum +number of vCPUs supported by the machine and return useful information +to the user before the machine starts in case of error. + +Suggested-by: Thomas Huth +Reviewed-by: Thomas Huth +Signed-off-by: Cédric Le Goater +Message-Id: <20230116174607.2459498-2-clg@kaod.org> +Signed-off-by: Thomas Huth +(cherry picked from commit 75d7150c636569f6687f7e70a33be893be43eb5f) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/pv.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 8dfe92d8df..8a1c71436b 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -20,6 +20,7 @@ + #include "exec/confidential-guest-support.h" + #include "hw/s390x/ipl.h" + #include "hw/s390x/pv.h" ++#include "hw/s390x/sclp.h" + #include "target/s390x/kvm/kvm_s390x.h" + + static bool info_valid; +@@ -249,6 +250,41 @@ struct S390PVGuestClass { + ConfidentialGuestSupportClass parent_class; + }; + ++/* ++ * If protected virtualization is enabled, the amount of data that the ++ * Read SCP Info Service Call can use is limited to one page. The ++ * available space also depends on the Extended-Length SCCB (ELS) ++ * feature which can take more buffer space to store feature ++ * information. This impacts the maximum number of CPUs supported in ++ * the machine. ++ */ ++static uint32_t s390_pv_get_max_cpus(void) ++{ ++ int offset_cpu = s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) ? ++ offsetof(ReadInfo, entries) : SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET; ++ ++ return (TARGET_PAGE_SIZE - offset_cpu) / sizeof(CPUEntry); ++} ++ ++static bool s390_pv_check_cpus(Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ uint32_t pv_max_cpus = s390_pv_get_max_cpus(); ++ ++ if (ms->smp.max_cpus > pv_max_cpus) { ++ error_setg(errp, "Protected VMs support a maximum of %d CPUs", ++ pv_max_cpus); ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ return s390_pv_check_cpus(errp); ++} ++ + int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { +@@ -261,6 +297,10 @@ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return -1; + } + ++ if (!s390_pv_guest_check(cgs, errp)) { ++ return -1; ++ } ++ + cgs->ready = true; + + return 0; +-- +2.31.1 + diff --git a/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch b/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch new file mode 100644 index 0000000..d800258 --- /dev/null +++ b/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch @@ -0,0 +1,44 @@ +From fbb177ad84d562a20e51e71c73257d2ef85be2d9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 21 Dec 2022 12:50:15 +0100 +Subject: [PATCH 4/9] vdpa: do not handle VIRTIO_NET_F_GUEST_ANNOUNCE in + vhost-vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 137: vDPA net SVQ guest announce support +RH-Bugzilla: 2141088 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Jason Wang +RH-Commit: [4/4] b3960a8b3e4ca569b1b1e6ceccf2051d8c4b1079 (eperezmartin/qemu-kvm) + +So qemu emulates it even in case the device does not support it. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221221115015.1400889-5-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 980003debddd18306ea2e1364b96598383c0e257) +--- + net/vhost-vdpa.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 52ef9cb3a2..b06540ac89 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -72,7 +72,6 @@ const int vdpa_feature_bits[] = { + VIRTIO_F_RING_RESET, + VIRTIO_NET_F_RSS, + VIRTIO_NET_F_HASH_REPORT, +- VIRTIO_NET_F_GUEST_ANNOUNCE, + VIRTIO_NET_F_STATUS, + VHOST_INVALID_FEATURE_BIT + }; +-- +2.31.1 + diff --git a/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch b/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch new file mode 100644 index 0000000..ebb7f38 --- /dev/null +++ b/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch @@ -0,0 +1,59 @@ +From b71724e94c94acd6e09fed2b47be2901799c2353 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 21 Dec 2022 12:50:14 +0100 +Subject: [PATCH 3/9] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in + vhost_vdpa_net_handle_ctrl_avail +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 137: vDPA net SVQ guest announce support +RH-Bugzilla: 2141088 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Jason Wang +RH-Commit: [3/4] c4ef5b62a5d41911565b8960a88bb48d746ff6c7 (eperezmartin/qemu-kvm) + +Since this capability is emulated by qemu shadowed CVQ cannot forward it +to the device. Process all that command within qemu. + +Signed-off-by: Eugenio Pérez +Message-Id: <20221221115015.1400889-4-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit 3f9a3eeb7ca6acd899e2205a9118928b4cd94e47) +--- + net/vhost-vdpa.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 2b4b85d8f8..52ef9cb3a2 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -489,9 +489,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, + s->cvq_cmd_out_buffer, + vhost_vdpa_net_cvq_cmd_len()); +- dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); +- if (unlikely(dev_written < 0)) { +- goto out; ++ if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) { ++ /* ++ * Guest announce capability is emulated by qemu, so don't forward to ++ * the device. ++ */ ++ dev_written = sizeof(status); ++ *s->status = VIRTIO_NET_OK; ++ } else { ++ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); ++ if (unlikely(dev_written < 0)) { ++ goto out; ++ } + } + + if (unlikely(dev_written < sizeof(status))) { +-- +2.31.1 + diff --git a/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch b/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch new file mode 100644 index 0000000..d797023 --- /dev/null +++ b/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch @@ -0,0 +1,74 @@ +From 3f55d12df35552ae948587a62d6f9015664adc13 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 21 Dec 2022 12:50:12 +0100 +Subject: [PATCH 1/9] virtio_net: Modify virtio_net_get_config to early return +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 137: vDPA net SVQ guest announce support +RH-Bugzilla: 2141088 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Jason Wang +RH-Commit: [1/4] 4f5e79afd54e157f32e6fff56ae33e2b71492525 (eperezmartin/qemu-kvm) + +Next patches introduce more code on vhost-vdpa branch, with already have +too much indentation. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Acked-by: Jason Wang +Message-Id: <20221221115015.1400889-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit ebc141a62508dc91901373c1a19fe7e2cf560dfb) +--- + hw/net/virtio-net.c | 28 +++++++++++++++------------- + 1 file changed, 15 insertions(+), 13 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index ec974f7a76..5935e55653 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -168,20 +168,22 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { + ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, + n->config_size); +- if (ret != -1) { +- /* +- * Some NIC/kernel combinations present 0 as the mac address. As +- * that is not a legal address, try to proceed with the +- * address from the QEMU command line in the hope that the +- * address has been configured correctly elsewhere - just not +- * reported by the device. +- */ +- if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { +- info_report("Zero hardware mac address detected. Ignoring."); +- memcpy(netcfg.mac, n->mac, ETH_ALEN); +- } +- memcpy(config, &netcfg, n->config_size); ++ if (ret == -1) { ++ return; + } ++ ++ /* ++ * Some NIC/kernel combinations present 0 as the mac address. As that ++ * is not a legal address, try to proceed with the address from the ++ * QEMU command line in the hope that the address has been configured ++ * correctly elsewhere - just not reported by the device. ++ */ ++ if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { ++ info_report("Zero hardware mac address detected. Ignoring."); ++ memcpy(netcfg.mac, n->mac, ETH_ALEN); ++ } ++ ++ memcpy(config, &netcfg, n->config_size); + } + } + +-- +2.31.1 + diff --git a/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch b/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch new file mode 100644 index 0000000..866957c --- /dev/null +++ b/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch @@ -0,0 +1,46 @@ +From b3d728b53abaae0c9884dfb5e9c216b1088196e3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Wed, 21 Dec 2022 12:50:13 +0100 +Subject: [PATCH 2/9] virtio_net: copy VIRTIO_NET_S_ANNOUNCE if device model + has it +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 137: vDPA net SVQ guest announce support +RH-Bugzilla: 2141088 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Jason Wang +RH-Commit: [2/4] fb04186829eb93bab3c9ececf90fa5b035ffa2ec (eperezmartin/qemu-kvm) + +Status part of the emulated feature. It will follow device model, so we +must copy it as long as NIC device model has it set. + +Signed-off-by: Eugenio Pérez +Message-Id: <20221221115015.1400889-3-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit 4f93aafc8f9d731c6588f5dc5594c6a1dd1fbe66) +--- + hw/net/virtio-net.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 5935e55653..948bcf33cf 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -183,6 +183,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) + memcpy(netcfg.mac, n->mac, ETH_ALEN); + } + ++ netcfg.status |= virtio_tswap16(vdev, ++ n->status & VIRTIO_NET_S_ANNOUNCE); + memcpy(config, &netcfg, n->config_size); + } + } +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b01376f..e117ede 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 5%{?rcrel}%{?dist}%{?cc_suffix} +Release: 6%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -272,6 +272,24 @@ Patch61: kvm-accel-introduce-accelerator-blocker-API.patch Patch62: kvm-KVM-keep-track-of-running-ioctls.patch # For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on Patch63: kvm-kvm-Atomic-memslot-updates.patch +# For bz#2141088 - vDPA SVQ guest announce support +Patch64: kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch +# For bz#2141088 - vDPA SVQ guest announce support +Patch65: kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch +# For bz#2141088 - vDPA SVQ guest announce support +Patch66: kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch +# For bz#2141088 - vDPA SVQ guest announce support +Patch67: kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch +# For bz#2122523 - Secure guest can't boot with maximal number of vcpus (248) +Patch68: kvm-s390x-pv-Implement-a-CGS-check-helper.patch +# For bz#2163701 - [s390x] VM fails to start with ISM passed through +Patch69: kvm-s390x-pci-coalesce-unmap-operations.patch +# For bz#2163701 - [s390x] VM fails to start with ISM passed through +Patch70: kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch +# For bz#2163701 - [s390x] VM fails to start with ISM passed through +Patch71: kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch +# For bz#2149191 - [RFE][guest-agent] - USB bus type support +Patch72: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch %if %{have_clang} BuildRequires: clang @@ -1300,6 +1318,25 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jan 30 2023 Miroslav Rezanina - 7.2.0-6 +- kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch [bz#2141088] +- kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch [bz#2141088] +- kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch [bz#2141088] +- kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch [bz#2141088] +- kvm-s390x-pv-Implement-a-CGS-check-helper.patch [bz#2122523] +- kvm-s390x-pci-coalesce-unmap-operations.patch [bz#2163701] +- kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch [bz#2163701] +- kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch [bz#2163701] +- kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch [bz#2149191] +- Resolves: bz#2141088 + (vDPA SVQ guest announce support) +- Resolves: bz#2122523 + (Secure guest can't boot with maximal number of vcpus (248)) +- Resolves: bz#2163701 + ([s390x] VM fails to start with ISM passed through) +- Resolves: bz#2149191 + ([RFE][guest-agent] - USB bus type support) + * Tue Jan 17 2023 Miroslav Rezanina - 7.2.0-5 - kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch [bz#1905805] - kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch [bz#1905805] From 9b81b4ad6ba37cab041d2b89fc967a66d1552705 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 6 Feb 2023 10:05:42 -0500 Subject: [PATCH 181/195] * Mon Feb 06 2023 Miroslav Rezanina - 7.2.0-7 - kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch [bz#2104412] - kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch [bz#2104412] - kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch [bz#2104412] - kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch [bz#2104412] - kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch [bz#2104412] - kvm-vdpa-request-iova_range-only-once.patch [bz#2104412] - kvm-vdpa-move-SVQ-vring-features-check-to-net.patch [bz#2104412] - kvm-vdpa-allocate-SVQ-array-unconditionally.patch [bz#2104412] - kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch [bz#2104412] - kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch [bz#2104412] - kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch [bz#2104412] - kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch [bz#2104412] - kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch [bz#2104412] - kvm-spec-Disable-VDUSE.patch [bz#2128222] - Resolves: bz#2104412 (vDPA ASID support in Qemu) - Resolves: bz#2128222 (VDUSE block export should be disabled in builds for now) --- ...arameter-to-vhost_vdpa_dma_map-unmap.patch | 221 ++++++++++++++++++ kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch | 94 ++++++++ ...dd-vhost_vdpa_net_valid_svq_features.patch | 76 ++++++ ...a-allocate-SVQ-array-unconditionally.patch | 50 ++++ ...ys-start-CVQ-in-SVQ-mode-if-possible.patch | 193 +++++++++++++++ ...HOST_BACKEND_F_IOTLB_ASID-flag-check.patch | 48 ++++ ...move-SVQ-vring-features-check-to-net.patch | 118 ++++++++++ kvm-vdpa-request-iova_range-only-once.patch | 145 ++++++++++++ ...re-x-svq-parameter-in-VhostVDPAState.patch | 62 +++++ ...w_vqs_enabled-in-vhost_vdpa_svqs_sta.patch | 58 +++++ ...VQ-device-file-descriptors-at-device.patch | 171 ++++++++++++++ ...ove-iova_tree-set-to-vhost_svq_start.patch | 122 ++++++++++ ...SVQ-device-call-handler-at-SVQ-start.patch | 73 ++++++ qemu-kvm.spec | 50 +++- 14 files changed, 1480 insertions(+), 1 deletion(-) create mode 100644 kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch create mode 100644 kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch create mode 100644 kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch create mode 100644 kvm-vdpa-allocate-SVQ-array-unconditionally.patch create mode 100644 kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch create mode 100644 kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch create mode 100644 kvm-vdpa-move-SVQ-vring-features-check-to-net.patch create mode 100644 kvm-vdpa-request-iova_range-only-once.patch create mode 100644 kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch create mode 100644 kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch create mode 100644 kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch create mode 100644 kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch create mode 100644 kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch diff --git a/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch b/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch new file mode 100644 index 0000000..a56c6eb --- /dev/null +++ b/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch @@ -0,0 +1,221 @@ +From d0e7f24a8d941ab142f2a1973ae18ed1bfdc074f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:41 +0100 +Subject: [PATCH 09/14] vdpa: add asid parameter to vhost_vdpa_dma_map/unmap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/13] 3e7f89e57f73661017ccf0206f2ea77a72ca46bb (eperezmartin/qemu-kvm) + +So the caller can choose which ASID is destined. + +No need to update the batch functions as they will always be called from +memory listener updates at the moment. Memory listener updates will +always update ASID 0, as it's the passthrough ASID. + +All vhost devices's ASID are 0 at this moment. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-10-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit cd831ed5c4add8ed6ee980c3645b241cbef5130f) +--- + hw/virtio/trace-events | 4 ++-- + hw/virtio/vhost-vdpa.c | 36 +++++++++++++++++++++++----------- + include/hw/virtio/vhost-vdpa.h | 14 ++++++++++--- + net/vhost-vdpa.c | 6 +++--- + 4 files changed, 41 insertions(+), 19 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 46f2faf04e..a87c5f39a2 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -30,8 +30,8 @@ vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" + vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p" + + # vhost-vdpa.c +-vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 +-vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 ++vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 ++vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 + vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 + vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 + vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index dd2768634b..0ecf2bbaa0 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -72,22 +72,28 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, + return false; + } + +-int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, +- void *vaddr, bool readonly) ++/* ++ * The caller must set asid = 0 if the device does not support asid. ++ * This is not an ABI break since it is set to 0 by the initializer anyway. ++ */ ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, ++ hwaddr size, void *vaddr, bool readonly) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; + int ret = 0; + + msg.type = v->msg_type; ++ msg.asid = asid; + msg.iotlb.iova = iova; + msg.iotlb.size = size; + msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; + msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; + msg.iotlb.type = VHOST_IOTLB_UPDATE; + +- trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, +- msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); ++ trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, ++ msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, ++ msg.iotlb.type); + + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { + error_report("failed to write, fd=%d, errno=%d (%s)", +@@ -98,18 +104,24 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, + return ret; + } + +-int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) ++/* ++ * The caller must set asid = 0 if the device does not support asid. ++ * This is not an ABI break since it is set to 0 by the initializer anyway. ++ */ ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, ++ hwaddr size) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; + int ret = 0; + + msg.type = v->msg_type; ++ msg.asid = asid; + msg.iotlb.iova = iova; + msg.iotlb.size = size; + msg.iotlb.type = VHOST_IOTLB_INVALIDATE; + +- trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, ++ trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, + msg.iotlb.size, msg.iotlb.type); + + if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { +@@ -229,8 +241,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + } + + vhost_vdpa_iotlb_batch_begin_once(v); +- ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), +- vaddr, section->readonly); ++ ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, ++ int128_get64(llsize), vaddr, section->readonly); + if (ret) { + error_report("vhost vdpa map fail!"); + goto fail_map; +@@ -303,7 +315,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + vhost_iova_tree_remove(v->iova_tree, *result); + } + vhost_vdpa_iotlb_batch_begin_once(v); +- ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); ++ ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, ++ int128_get64(llsize)); + if (ret) { + error_report("vhost_vdpa dma unmap error!"); + } +@@ -876,7 +889,7 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) + } + + size = ROUND_UP(result->size, qemu_real_host_page_size()); +- r = vhost_vdpa_dma_unmap(v, result->iova, size); ++ r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); + if (unlikely(r < 0)) { + error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); + return; +@@ -916,7 +929,8 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, + return false; + } + +- r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, ++ r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, ++ needle->size + 1, + (void *)(uintptr_t)needle->translated_addr, + needle->perm == IOMMU_RO); + if (unlikely(r != 0)) { +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index 1111d85643..e57dfa1fd1 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -19,6 +19,12 @@ + #include "hw/virtio/virtio.h" + #include "standard-headers/linux/vhost_types.h" + ++/* ++ * ASID dedicated to map guest's addresses. If SVQ is disabled it maps GPA to ++ * qemu's IOVA. If SVQ is enabled it maps also the SVQ vring here ++ */ ++#define VHOST_VDPA_GUEST_PA_ASID 0 ++ + typedef struct VhostVDPAHostNotifier { + MemoryRegion mr; + void *addr; +@@ -29,6 +35,7 @@ typedef struct vhost_vdpa { + int index; + uint32_t msg_type; + bool iotlb_batch_begin_sent; ++ uint32_t address_space_id; + MemoryListener listener; + struct vhost_vdpa_iova_range iova_range; + uint64_t acked_features; +@@ -42,8 +49,9 @@ typedef struct vhost_vdpa { + VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; + } VhostVDPA; + +-int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, +- void *vaddr, bool readonly); +-int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, ++ hwaddr size, void *vaddr, bool readonly); ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, ++ hwaddr size); + + #endif +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 85aa0da39a..c2f319eb88 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -258,7 +258,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) + return; + } + +- r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); ++ r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1); + if (unlikely(r != 0)) { + error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); + } +@@ -298,8 +298,8 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, + return r; + } + +- r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, +- !write); ++ r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova, ++ vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); + if (unlikely(r < 0)) { + goto dma_map_err; + } +-- +2.31.1 + diff --git a/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch b/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch new file mode 100644 index 0000000..57c38d1 --- /dev/null +++ b/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch @@ -0,0 +1,94 @@ +From 6282a83619f274ca45a52d61577c10a05a0714dc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:43 +0100 +Subject: [PATCH 11/14] vdpa: add shadow_data to vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/13] 9d317add1318b555ba06e19e4c67849069e047b9 (eperezmartin/qemu-kvm) + +The memory listener that thells the device how to convert GPA to qemu's +va is registered against CVQ vhost_vdpa. memory listener translations +are always ASID 0, CVQ ones are ASID 1 if supported. + +Let's tell the listener if it needs to register them on iova tree or +not. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-12-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6188d78a19894ac8f2bf9484d48a5235a529d3b7) +--- + hw/virtio/vhost-vdpa.c | 6 +++--- + include/hw/virtio/vhost-vdpa.h | 2 ++ + net/vhost-vdpa.c | 1 + + 3 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 0ecf2bbaa0..dc3498e995 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -224,7 +224,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + vaddr, section->readonly); + + llsize = int128_sub(llend, int128_make64(iova)); +- if (v->shadow_vqs_enabled) { ++ if (v->shadow_data) { + int r; + + mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, +@@ -251,7 +251,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + return; + + fail_map: +- if (v->shadow_vqs_enabled) { ++ if (v->shadow_data) { + vhost_iova_tree_remove(v->iova_tree, mem_region); + } + +@@ -296,7 +296,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + + llsize = int128_sub(llend, int128_make64(iova)); + +- if (v->shadow_vqs_enabled) { ++ if (v->shadow_data) { + const DMAMap *result; + const void *vaddr = memory_region_get_ram_ptr(section->mr) + + section->offset_within_region + +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index e57dfa1fd1..45b969a311 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -40,6 +40,8 @@ typedef struct vhost_vdpa { + struct vhost_vdpa_iova_range iova_range; + uint64_t acked_features; + bool shadow_vqs_enabled; ++ /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ ++ bool shadow_data; + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; + GPtrArray *shadow_vqs; +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 1757f1d028..eea7a0df12 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -581,6 +581,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->always_svq = svq; + s->vhost_vdpa.shadow_vqs_enabled = svq; + s->vhost_vdpa.iova_range = iova_range; ++ s->vhost_vdpa.shadow_data = svq; + s->vhost_vdpa.iova_tree = iova_tree; + if (!is_datapath) { + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), +-- +2.31.1 + diff --git a/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch b/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch new file mode 100644 index 0000000..c54a831 --- /dev/null +++ b/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch @@ -0,0 +1,76 @@ +From 0f3a28e1e128754184c4af6a578f27e16c6a61d5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:37 +0100 +Subject: [PATCH 05/14] vdpa: add vhost_vdpa_net_valid_svq_features +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/13] 0b27e04f178ec73cb800f4fb05c17a92576142e4 (eperezmartin/qemu-kvm) + +It will be reused at vdpa device start so let's extract in its own +function. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-6-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 36e4647247f200b6fa4d2f656133f567036e8a85) +--- + net/vhost-vdpa.c | 26 +++++++++++++++++--------- + 1 file changed, 17 insertions(+), 9 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index b06540ac89..16a5ebe2dd 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -106,6 +106,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) + return s->vhost_net; + } + ++static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) ++{ ++ uint64_t invalid_dev_features = ++ features & ~vdpa_svq_device_features & ++ /* Transport are all accepted at this point */ ++ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, ++ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); ++ ++ if (invalid_dev_features) { ++ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, ++ invalid_dev_features); ++ } ++ ++ return !invalid_dev_features; ++} ++ + static int vhost_vdpa_net_check_device_id(struct vhost_net *net) + { + uint32_t device_id; +@@ -684,15 +700,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + if (opts->x_svq) { + struct vhost_vdpa_iova_range iova_range; + +- uint64_t invalid_dev_features = +- features & ~vdpa_svq_device_features & +- /* Transport are all accepted at this point */ +- ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, +- VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); +- +- if (invalid_dev_features) { +- error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, +- invalid_dev_features); ++ if (!vhost_vdpa_net_valid_svq_features(features, errp)) { + goto err_svq; + } + +-- +2.31.1 + diff --git a/kvm-vdpa-allocate-SVQ-array-unconditionally.patch b/kvm-vdpa-allocate-SVQ-array-unconditionally.patch new file mode 100644 index 0000000..22c5955 --- /dev/null +++ b/kvm-vdpa-allocate-SVQ-array-unconditionally.patch @@ -0,0 +1,50 @@ +From 72f296870805750df8dfe5eaad77dd7d435a8f41 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:40 +0100 +Subject: [PATCH 08/14] vdpa: allocate SVQ array unconditionally +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/13] 08cd86d0859f82d768794e29241cfeff25df667c (eperezmartin/qemu-kvm) + +SVQ may run or not in a device depending on runtime conditions (for +example, if the device can move CVQ to its own group or not). + +Allocate the SVQ array unconditionally at startup, since its hard to +move this allocation elsewhere. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-9-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 273e0003f0005cc17292dedae01e5edb0064b69c) +--- + hw/virtio/vhost-vdpa.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 84218ce078..dd2768634b 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -532,10 +532,6 @@ static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) + struct vhost_vdpa *v = dev->opaque; + size_t idx; + +- if (!v->shadow_vqs) { +- return; +- } +- + for (idx = 0; idx < v->shadow_vqs->len; ++idx) { + vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); + } +-- +2.31.1 + diff --git a/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch b/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch new file mode 100644 index 0000000..9b78b5c --- /dev/null +++ b/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch @@ -0,0 +1,193 @@ +From 84c203faa570b85eec006215768c83371c9f0399 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:44 +0100 +Subject: [PATCH 12/14] vdpa: always start CVQ in SVQ mode if possible +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/13] 83f94b3e163ca38d08dbf7c111a4cfa7a44e3dc2 (eperezmartin/qemu-kvm) + +Isolate control virtqueue in its own group, allowing to intercept control +commands but letting dataplane run totally passthrough to the guest. + +Signed-off-by: Eugenio Pérez +Message-Id: <20221215113144.322011-13-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit c1a1008685af0327d9d03f03d43bdb77e7af5bea) +--- + hw/virtio/vhost-vdpa.c | 3 +- + net/vhost-vdpa.c | 110 ++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 111 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index dc3498e995..72ff06673c 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -638,7 +638,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + { + uint64_t features; + uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | +- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; ++ 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | ++ 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID; + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index eea7a0df12..07d33dae26 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -101,6 +101,8 @@ static const uint64_t vdpa_svq_device_features = + BIT_ULL(VIRTIO_NET_F_RSC_EXT) | + BIT_ULL(VIRTIO_NET_F_STANDBY); + ++#define VHOST_VDPA_NET_CVQ_ASID 1 ++ + VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +@@ -242,6 +244,40 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) ++{ ++ struct vhost_vring_state state = { ++ .index = vq_index, ++ }; ++ int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); ++ ++ if (unlikely(r < 0)) { ++ error_report("Cannot get VQ %u group: %s", vq_index, ++ g_strerror(errno)); ++ return r; ++ } ++ ++ return state.num; ++} ++ ++static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, ++ unsigned vq_group, ++ unsigned asid_num) ++{ ++ struct vhost_vring_state asid = { ++ .index = vq_group, ++ .num = asid_num, ++ }; ++ int r; ++ ++ r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); ++ if (unlikely(r < 0)) { ++ error_report("Can't set vq group %u asid %u, errno=%d (%s)", ++ asid.index, asid.num, errno, g_strerror(errno)); ++ } ++ return r; ++} ++ + static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) + { + VhostIOVATree *tree = v->iova_tree; +@@ -316,11 +352,75 @@ dma_map_err: + static int vhost_vdpa_net_cvq_start(NetClientState *nc) + { + VhostVDPAState *s; +- int r; ++ struct vhost_vdpa *v; ++ uint64_t backend_features; ++ int64_t cvq_group; ++ int cvq_index, r; + + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + + s = DO_UPCAST(VhostVDPAState, nc, nc); ++ v = &s->vhost_vdpa; ++ ++ v->shadow_data = s->always_svq; ++ v->shadow_vqs_enabled = s->always_svq; ++ s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; ++ ++ if (s->always_svq) { ++ /* SVQ is already configured for all virtqueues */ ++ goto out; ++ } ++ ++ /* ++ * If we early return in these cases SVQ will not be enabled. The migration ++ * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. ++ * ++ * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev ++ * yet. ++ */ ++ r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); ++ if (unlikely(r < 0)) { ++ error_report("Cannot get vdpa backend_features: %s(%d)", ++ g_strerror(errno), errno); ++ return -1; ++ } ++ if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || ++ !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { ++ return 0; ++ } ++ ++ /* ++ * Check if all the virtqueues of the virtio device are in a different vq ++ * than the last vq. VQ group of last group passed in cvq_group. ++ */ ++ cvq_index = v->dev->vq_index_end - 1; ++ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); ++ if (unlikely(cvq_group < 0)) { ++ return cvq_group; ++ } ++ for (int i = 0; i < cvq_index; ++i) { ++ int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); ++ ++ if (unlikely(group < 0)) { ++ return group; ++ } ++ ++ if (group == cvq_group) { ++ return 0; ++ } ++ } ++ ++ r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); ++ if (unlikely(r < 0)) { ++ return r; ++ } ++ ++ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, ++ v->iova_range.last); ++ v->shadow_vqs_enabled = true; ++ s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; ++ ++out: + if (!s->vhost_vdpa.shadow_vqs_enabled) { + return 0; + } +@@ -349,6 +449,14 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) + if (s->vhost_vdpa.shadow_vqs_enabled) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); ++ if (!s->always_svq) { ++ /* ++ * If only the CVQ is shadowed we can delete this safely. ++ * If all the VQs are shadows this will be needed by the time the ++ * device is started again to register SVQ vrings and similar. ++ */ ++ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); ++ } + } + } + +-- +2.31.1 + diff --git a/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch b/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch new file mode 100644 index 0000000..bb55256 --- /dev/null +++ b/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch @@ -0,0 +1,48 @@ +From 46e80a9350a02fdb5689638df96bc7389e953cf8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Tue, 17 Jan 2023 11:53:08 +0100 +Subject: [PATCH 13/14] vdpa: fix VHOST_BACKEND_F_IOTLB_ASID flag check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/13] b7fb4b8e9ea26b6664a9179ed0a88376acf5115f (eperezmartin/qemu-kvm) + +VHOST_BACKEND_F_IOTLB_ASID is the feature bit, not the bitmask. Since +the device under test also provided VHOST_BACKEND_F_IOTLB_MSG_V2 and +VHOST_BACKEND_F_IOTLB_BATCH, this went unnoticed. + +Fixes: c1a1008685 ("vdpa: always start CVQ in SVQ mode if possible") +Signed-off-by: Eugenio Pérez +Reviewed-by: Michael S. Tsirkin +Acked-by: Jason Wang +Signed-off-by: Jason Wang + +Upstream status: git@github.com:jasowang/qemu.git +(cherry picked from commit 2bd492bca521ee8594f1d5db8dc9aac126fc4f85) +--- + net/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 07d33dae26..7d9c4ea09d 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -384,7 +384,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + g_strerror(errno), errno); + return -1; + } +- if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || ++ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || + !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { + return 0; + } +-- +2.31.1 + diff --git a/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch b/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch new file mode 100644 index 0000000..7cda847 --- /dev/null +++ b/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch @@ -0,0 +1,118 @@ +From 63a45add7c9f7bb2b7775ae4cb2d7df22f7f2033 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:39 +0100 +Subject: [PATCH 07/14] vdpa: move SVQ vring features check to net/ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/13] a24189aea4dbde3ed4486f685d0d88aeee1a0ee7 (eperezmartin/qemu-kvm) + +The next patches will start control SVQ if possible. However, we don't +know if that will be possible at qemu boot anymore. + +Since the moved checks will be already evaluated at net/ to know if it +is ok to shadow CVQ, move them. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-8-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 258a03941fd23108a322d09abc9c55341e09688d) +--- + hw/virtio/vhost-vdpa.c | 32 ++------------------------------ + net/vhost-vdpa.c | 3 ++- + 2 files changed, 4 insertions(+), 31 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 9e7cbf1776..84218ce078 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -389,29 +389,9 @@ static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, + return ret; + } + +-static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, +- Error **errp) ++static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) + { + g_autoptr(GPtrArray) shadow_vqs = NULL; +- uint64_t dev_features, svq_features; +- int r; +- bool ok; +- +- if (!v->shadow_vqs_enabled) { +- return 0; +- } +- +- r = vhost_vdpa_get_dev_features(hdev, &dev_features); +- if (r != 0) { +- error_setg_errno(errp, -r, "Can't get vdpa device features"); +- return r; +- } +- +- svq_features = dev_features; +- ok = vhost_svq_valid_features(svq_features, errp); +- if (unlikely(!ok)) { +- return -1; +- } + + shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); + for (unsigned n = 0; n < hdev->nvqs; ++n) { +@@ -422,7 +402,6 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + } + + v->shadow_vqs = g_steal_pointer(&shadow_vqs); +- return 0; + } + + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) +@@ -447,10 +426,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + dev->opaque = opaque ; + v->listener = vhost_vdpa_memory_listener; + v->msg_type = VHOST_IOTLB_MSG_V2; +- ret = vhost_vdpa_init_svq(dev, v, errp); +- if (ret) { +- goto err; +- } ++ vhost_vdpa_init_svq(dev, v); + + if (!vhost_vdpa_first_dev(dev)) { + return 0; +@@ -460,10 +436,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + VIRTIO_CONFIG_S_DRIVER); + + return 0; +- +-err: +- ram_block_discard_disable(false); +- return ret; + } + + static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 8d3ed095d0..85aa0da39a 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -117,9 +117,10 @@ static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) + if (invalid_dev_features) { + error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, + invalid_dev_features); ++ return false; + } + +- return !invalid_dev_features; ++ return vhost_svq_valid_features(features, errp); + } + + static int vhost_vdpa_net_check_device_id(struct vhost_net *net) +-- +2.31.1 + diff --git a/kvm-vdpa-request-iova_range-only-once.patch b/kvm-vdpa-request-iova_range-only-once.patch new file mode 100644 index 0000000..041e8f7 --- /dev/null +++ b/kvm-vdpa-request-iova_range-only-once.patch @@ -0,0 +1,145 @@ +From 760169d538a4e6ba61006f6796cd55af967a7f1e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:38 +0100 +Subject: [PATCH 06/14] vdpa: request iova_range only once +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/13] 2a8ae2f46ae88f01c5535038f38cb7895098b610 (eperezmartin/qemu-kvm) + +Currently iova range is requested once per queue pair in the case of +net. Reduce the number of ioctls asking it once at initialization and +reusing that value for each vhost_vdpa. + +Signed-off-by: Eugenio Pérez +Message-Id: <20221215113144.322011-7-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit a585fad26b2e6ccca156d9e65158ad1c5efd268d) +--- + hw/virtio/vhost-vdpa.c | 15 --------------- + net/vhost-vdpa.c | 27 ++++++++++++++------------- + 2 files changed, 14 insertions(+), 28 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index e65603022f..9e7cbf1776 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -365,19 +365,6 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) + return 0; + } + +-static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) +-{ +- int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, +- &v->iova_range); +- if (ret != 0) { +- v->iova_range.first = 0; +- v->iova_range.last = UINT64_MAX; +- } +- +- trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, +- v->iova_range.last); +-} +- + /* + * The use of this function is for requests that only need to be + * applied once. Typically such request occurs at the beginning +@@ -465,8 +452,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + goto err; + } + +- vhost_vdpa_get_iova_range(v); +- + if (!vhost_vdpa_first_dev(dev)) { + return 0; + } +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 16a5ebe2dd..8d3ed095d0 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -549,14 +549,15 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { + }; + + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, +- const char *device, +- const char *name, +- int vdpa_device_fd, +- int queue_pair_index, +- int nvqs, +- bool is_datapath, +- bool svq, +- VhostIOVATree *iova_tree) ++ const char *device, ++ const char *name, ++ int vdpa_device_fd, ++ int queue_pair_index, ++ int nvqs, ++ bool is_datapath, ++ bool svq, ++ struct vhost_vdpa_iova_range iova_range, ++ VhostIOVATree *iova_tree) + { + NetClientState *nc = NULL; + VhostVDPAState *s; +@@ -575,6 +576,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; + s->vhost_vdpa.shadow_vqs_enabled = svq; ++ s->vhost_vdpa.iova_range = iova_range; + s->vhost_vdpa.iova_tree = iova_tree; + if (!is_datapath) { + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), +@@ -654,6 +656,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + int vdpa_device_fd; + g_autofree NetClientState **ncs = NULL; + g_autoptr(VhostIOVATree) iova_tree = NULL; ++ struct vhost_vdpa_iova_range iova_range; + NetClientState *nc; + int queue_pairs, r, i = 0, has_cvq = 0; + +@@ -697,14 +700,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + return queue_pairs; + } + ++ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); + if (opts->x_svq) { +- struct vhost_vdpa_iova_range iova_range; +- + if (!vhost_vdpa_net_valid_svq_features(features, errp)) { + goto err_svq; + } + +- vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); + iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); + } + +@@ -713,7 +714,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + for (i = 0; i < queue_pairs; i++) { + ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 2, true, opts->x_svq, +- iova_tree); ++ iova_range, iova_tree); + if (!ncs[i]) + goto err; + } +@@ -721,7 +722,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + if (has_cvq) { + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 1, false, +- opts->x_svq, iova_tree); ++ opts->x_svq, iova_range, iova_tree); + if (!nc) + goto err; + } +-- +2.31.1 + diff --git a/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch b/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch new file mode 100644 index 0000000..68c0c86 --- /dev/null +++ b/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch @@ -0,0 +1,62 @@ +From 28163d7d61b6b0b8312b78d57dabc8f44bf39c46 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:42 +0100 +Subject: [PATCH 10/14] vdpa: store x-svq parameter in VhostVDPAState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/13] 53f3b2698b4a5caca434f55e4300103a78778548 (eperezmartin/qemu-kvm) + +CVQ can be shadowed two ways: +- Device has x-svq=on parameter (current way) +- The device can isolate CVQ in its own vq group + +QEMU needs to check for the second condition dynamically, because CVQ +index is not known before the driver ack the features. Since this is +dynamic, the CVQ isolation could vary with different conditions, making +it possible to go from "not isolated group" to "isolated". + +Saving the cmdline parameter in an extra field so we never disable CVQ +SVQ in case the device was started with x-svq cmdline. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-11-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7f211a28fd5482f76583988beecd8ee61588d45e) +--- + net/vhost-vdpa.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index c2f319eb88..1757f1d028 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -38,6 +38,8 @@ typedef struct VhostVDPAState { + void *cvq_cmd_out_buffer; + virtio_net_ctrl_ack *status; + ++ /* The device always have SVQ enabled */ ++ bool always_svq; + bool started; + } VhostVDPAState; + +@@ -576,6 +578,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; ++ s->always_svq = svq; + s->vhost_vdpa.shadow_vqs_enabled = svq; + s->vhost_vdpa.iova_range = iova_range; + s->vhost_vdpa.iova_tree = iova_tree; +-- +2.31.1 + diff --git a/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch b/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch new file mode 100644 index 0000000..3d11438 --- /dev/null +++ b/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch @@ -0,0 +1,58 @@ +From cb974f2f9a0c5b9520b6ac80bd1d1e4a6b12bbdc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:33 +0100 +Subject: [PATCH 01/14] vdpa: use v->shadow_vqs_enabled in + vhost_vdpa_svqs_start & stop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/13] f0db50a95f87dd011418617be7b80aa6813a1146 (eperezmartin/qemu-kvm) + +This function used to trust in v->shadow_vqs != NULL to know if it must +start svq or not. + +This is not going to be valid anymore, as qemu is going to allocate svq +array unconditionally (but it will only start them conditionally). + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 712c1a3171cf62d501dac5af58f77d5fea70350d) +--- + hw/virtio/vhost-vdpa.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index c5be2645b0..44e6a9b7b3 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1036,7 +1036,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + Error *err = NULL; + unsigned i; + +- if (!v->shadow_vqs) { ++ if (!v->shadow_vqs_enabled) { + return true; + } + +@@ -1089,7 +1089,7 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; + +- if (!v->shadow_vqs) { ++ if (!v->shadow_vqs_enabled) { + return; + } + +-- +2.31.1 + diff --git a/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch b/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch new file mode 100644 index 0000000..940133b --- /dev/null +++ b/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch @@ -0,0 +1,171 @@ +From bffccbd59a2e2c641810cd7362c7b5ecf5989ed8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:35 +0100 +Subject: [PATCH 03/14] vhost: allocate SVQ device file descriptors at device + start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/13] bab2d43f0fc0d13a4917e706244b37e1a431b082 (eperezmartin/qemu-kvm) + +The next patches will start control SVQ if possible. However, we don't +know if that will be possible at qemu boot anymore. + +Delay device file descriptors until we know it at device start. This +will avoid to create them if the device does not support SVQ. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-4-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 3cfb4d069cd2977b707fb519c455d7d416e1f4b0) +--- + hw/virtio/vhost-shadow-virtqueue.c | 31 ++------------------------ + hw/virtio/vhost-vdpa.c | 35 ++++++++++++++++++++++++------ + 2 files changed, 30 insertions(+), 36 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 264ddc166d..3b05bab44d 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -715,43 +715,18 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + * @iova_tree: Tree to perform descriptors translations + * @ops: SVQ owner callbacks + * @ops_opaque: ops opaque pointer +- * +- * Returns the new virtqueue or NULL. +- * +- * In case of error, reason is reported through error_report. + */ + VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, + const VhostShadowVirtqueueOps *ops, + void *ops_opaque) + { +- g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); +- int r; +- +- r = event_notifier_init(&svq->hdev_kick, 0); +- if (r != 0) { +- error_report("Couldn't create kick event notifier: %s (%d)", +- g_strerror(errno), errno); +- goto err_init_hdev_kick; +- } +- +- r = event_notifier_init(&svq->hdev_call, 0); +- if (r != 0) { +- error_report("Couldn't create call event notifier: %s (%d)", +- g_strerror(errno), errno); +- goto err_init_hdev_call; +- } ++ VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); + + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); + svq->iova_tree = iova_tree; + svq->ops = ops; + svq->ops_opaque = ops_opaque; +- return g_steal_pointer(&svq); +- +-err_init_hdev_call: +- event_notifier_cleanup(&svq->hdev_kick); +- +-err_init_hdev_kick: +- return NULL; ++ return svq; + } + + /** +@@ -763,7 +738,5 @@ void vhost_svq_free(gpointer pvq) + { + VhostShadowVirtqueue *vq = pvq; + vhost_svq_stop(vq); +- event_notifier_cleanup(&vq->hdev_kick); +- event_notifier_cleanup(&vq->hdev_call); + g_free(vq); + } +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 44e6a9b7b3..530d2ca362 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -428,15 +428,11 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + + shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); + for (unsigned n = 0; n < hdev->nvqs; ++n) { +- g_autoptr(VhostShadowVirtqueue) svq; ++ VhostShadowVirtqueue *svq; + + svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, + v->shadow_vq_ops_opaque); +- if (unlikely(!svq)) { +- error_setg(errp, "Cannot create svq %u", n); +- return -1; +- } +- g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); ++ g_ptr_array_add(shadow_vqs, svq); + } + + v->shadow_vqs = g_steal_pointer(&shadow_vqs); +@@ -871,11 +867,23 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + const EventNotifier *event_notifier = &svq->hdev_kick; + int r; + ++ r = event_notifier_init(&svq->hdev_kick, 0); ++ if (r != 0) { ++ error_setg_errno(errp, -r, "Couldn't create kick event notifier"); ++ goto err_init_hdev_kick; ++ } ++ ++ r = event_notifier_init(&svq->hdev_call, 0); ++ if (r != 0) { ++ error_setg_errno(errp, -r, "Couldn't create call event notifier"); ++ goto err_init_hdev_call; ++ } ++ + file.fd = event_notifier_get_fd(event_notifier); + r = vhost_vdpa_set_vring_dev_kick(dev, &file); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Can't set device kick fd"); +- return r; ++ goto err_init_set_dev_fd; + } + + event_notifier = &svq->hdev_call; +@@ -883,8 +891,18 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, + r = vhost_vdpa_set_vring_dev_call(dev, &file); + if (unlikely(r != 0)) { + error_setg_errno(errp, -r, "Can't set device call fd"); ++ goto err_init_set_dev_fd; + } + ++ return 0; ++ ++err_init_set_dev_fd: ++ event_notifier_set_handler(&svq->hdev_call, NULL); ++ ++err_init_hdev_call: ++ event_notifier_cleanup(&svq->hdev_kick); ++ ++err_init_hdev_kick: + return r; + } + +@@ -1096,6 +1114,9 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + vhost_vdpa_svq_unmap_rings(dev, svq); ++ ++ event_notifier_cleanup(&svq->hdev_kick); ++ event_notifier_cleanup(&svq->hdev_call); + } + } + +-- +2.31.1 + diff --git a/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch b/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch new file mode 100644 index 0000000..de005ba --- /dev/null +++ b/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch @@ -0,0 +1,122 @@ +From 6584478deca49d0ea20add588e4fdb51cdc26f1d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:36 +0100 +Subject: [PATCH 04/14] vhost: move iova_tree set to vhost_svq_start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/13] 200d8e9b58e258a6e301430debc73ef7d962b732 (eperezmartin/qemu-kvm) + +Since we don't know if we will use SVQ at qemu initialization, let's +allocate iova_tree only if needed. To do so, accept it at SVQ start, not +at initialization. + +This will avoid to create it if the device does not support SVQ. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-5-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 5fde952bbdd521c10fc018ee04f922a7dca5f663) +--- + hw/virtio/vhost-shadow-virtqueue.c | 9 ++++----- + hw/virtio/vhost-shadow-virtqueue.h | 5 ++--- + hw/virtio/vhost-vdpa.c | 5 ++--- + 3 files changed, 8 insertions(+), 11 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3b05bab44d..4307296358 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -642,9 +642,10 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) + * @svq: Shadow Virtqueue + * @vdev: VirtIO device + * @vq: Virtqueue to shadow ++ * @iova_tree: Tree to perform descriptors translations + */ + void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, +- VirtQueue *vq) ++ VirtQueue *vq, VhostIOVATree *iova_tree) + { + size_t desc_size, driver_size, device_size; + +@@ -655,6 +656,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + svq->last_used_idx = 0; + svq->vdev = vdev; + svq->vq = vq; ++ svq->iova_tree = iova_tree; + + svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); + driver_size = vhost_svq_driver_area_size(svq); +@@ -712,18 +714,15 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + * Creates vhost shadow virtqueue, and instructs the vhost device to use the + * shadow methods and file descriptors. + * +- * @iova_tree: Tree to perform descriptors translations + * @ops: SVQ owner callbacks + * @ops_opaque: ops opaque pointer + */ +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, +- const VhostShadowVirtqueueOps *ops, ++VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, + void *ops_opaque) + { + VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); + + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); +- svq->iova_tree = iova_tree; + svq->ops = ops; + svq->ops_opaque = ops_opaque; + return svq; +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index d04c34a589..926a4897b1 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -126,11 +126,10 @@ size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq); + size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq); + + void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, +- VirtQueue *vq); ++ VirtQueue *vq, VhostIOVATree *iova_tree); + void vhost_svq_stop(VhostShadowVirtqueue *svq); + +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, +- const VhostShadowVirtqueueOps *ops, ++VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, + void *ops_opaque); + + void vhost_svq_free(gpointer vq); +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 530d2ca362..e65603022f 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -430,8 +430,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + for (unsigned n = 0; n < hdev->nvqs; ++n) { + VhostShadowVirtqueue *svq; + +- svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, +- v->shadow_vq_ops_opaque); ++ svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque); + g_ptr_array_add(shadow_vqs, svq); + } + +@@ -1070,7 +1069,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + goto err; + } + +- vhost_svq_start(svq, dev->vdev, vq); ++ vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); + ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); + if (unlikely(!ok)) { + goto err_map; +-- +2.31.1 + diff --git a/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch b/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch new file mode 100644 index 0000000..099dd73 --- /dev/null +++ b/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch @@ -0,0 +1,73 @@ +From 2906f8df3c5e915a3dc05a705b87990211f114b5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 15 Dec 2022 12:31:34 +0100 +Subject: [PATCH 02/14] vhost: set SVQ device call handler at SVQ start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 136: vDPA ASID support in Qemu +RH-Bugzilla: 2104412 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/13] ad90a6cc5c71b70d705904433d5a986e8fedb924 (eperezmartin/qemu-kvm) + +By the end of this series CVQ is shadowed as long as the features +support it. + +Since we don't know at the beginning of qemu running if this is +supported, move the event notifier handler setting to the start of the +SVQ, instead of the start of qemu run. This will avoid to create them if +the device does not support SVQ. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20221215113144.322011-3-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 20e7412bfd63c68f1798fbdb799aedb7e05fee88) +--- + hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 5bd14cad96..264ddc166d 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -648,6 +648,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + { + size_t desc_size, driver_size, device_size; + ++ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); + svq->next_guest_avail_elem = NULL; + svq->shadow_avail_idx = 0; + svq->shadow_used_idx = 0; +@@ -704,6 +705,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + g_free(svq->desc_state); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); ++ event_notifier_set_handler(&svq->hdev_call, NULL); + } + + /** +@@ -740,7 +742,6 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, + } + + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); +- event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); + svq->iova_tree = iova_tree; + svq->ops = ops; + svq->ops_opaque = ops_opaque; +@@ -763,7 +764,6 @@ void vhost_svq_free(gpointer pvq) + VhostShadowVirtqueue *vq = pvq; + vhost_svq_stop(vq); + event_notifier_cleanup(&vq->hdev_kick); +- event_notifier_set_handler(&vq->hdev_call, NULL); + event_notifier_cleanup(&vq->hdev_call); + g_free(vq); + } +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e117ede..6dedf40 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 6%{?rcrel}%{?dist}%{?cc_suffix} +Release: 7%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -290,6 +290,32 @@ Patch70: kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch Patch71: kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch # For bz#2149191 - [RFE][guest-agent] - USB bus type support Patch72: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch73: kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch74: kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch75: kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch76: kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch77: kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch78: kvm-vdpa-request-iova_range-only-once.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch79: kvm-vdpa-move-SVQ-vring-features-check-to-net.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch80: kvm-vdpa-allocate-SVQ-array-unconditionally.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch81: kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch82: kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch83: kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch84: kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch +# For bz#2104412 - vDPA ASID support in Qemu +Patch85: kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch %if %{have_clang} BuildRequires: clang @@ -657,6 +683,7 @@ ulimit -n 10240 --disable-libssh \\\ --disable-libudev \\\ --disable-libusb \\\ + --disable-libvduse \\\ --disable-linux-aio \\\ --disable-linux-io-uring \\\ --disable-linux-user \\\ @@ -712,6 +739,7 @@ ulimit -n 10240 --disable-user \\\ --disable-vde \\\ --disable-vdi \\\ + --disable-vduse-blk-export \\\ --disable-vhost-crypto \\\ --disable-vhost-kernel \\\ --disable-vhost-net \\\ @@ -1318,6 +1346,26 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Feb 06 2023 Miroslav Rezanina - 7.2.0-7 +- kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch [bz#2104412] +- kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch [bz#2104412] +- kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch [bz#2104412] +- kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch [bz#2104412] +- kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch [bz#2104412] +- kvm-vdpa-request-iova_range-only-once.patch [bz#2104412] +- kvm-vdpa-move-SVQ-vring-features-check-to-net.patch [bz#2104412] +- kvm-vdpa-allocate-SVQ-array-unconditionally.patch [bz#2104412] +- kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch [bz#2104412] +- kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch [bz#2104412] +- kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch [bz#2104412] +- kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch [bz#2104412] +- kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch [bz#2104412] +- kvm-spec-Disable-VDUSE.patch [bz#2128222] +- Resolves: bz#2104412 + (vDPA ASID support in Qemu) +- Resolves: bz#2128222 + (VDUSE block export should be disabled in builds for now) + * Mon Jan 30 2023 Miroslav Rezanina - 7.2.0-6 - kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch [bz#2141088] - kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch [bz#2141088] From a6628605f77857c32325e5f591521298137b1686 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 9 Feb 2023 09:45:40 -0500 Subject: [PATCH 182/195] * Thu Feb 09 2023 Miroslav Rezanina - 7.2.0-8 - kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch [bz#2150180] - kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch [bz#2150180] - kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch [bz#2150180] - kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch [bz#2150180] - kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch [bz#2165280] - kvm-block-Improve-empty-format-specific-info-dump.patch [bz#1860292] - kvm-block-file-Add-file-specific-image-info.patch [bz#1860292] - kvm-block-vmdk-Change-extent-info-type.patch [bz#1860292] - kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch [bz#1860292] - kvm-qemu-img-Use-BlockNodeInfo.patch [bz#1860292] - kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch [bz#1860292] - kvm-block-qapi-Introduce-BlockGraphInfo.patch [bz#1860292] - kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch [bz#1860292] - kvm-iotests-Filter-child-node-information.patch [bz#1860292] - kvm-iotests-106-214-308-Read-only-one-size-line.patch [bz#1860292] - kvm-qemu-img-Let-info-print-block-graph.patch [bz#1860292] - kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch [bz#1860292] - kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch [bz#2155173] - kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch [bz#2155173] - kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch [bz#2162569] - Resolves: bz#2150180 (qemu-img finishes successfully while having errors in commit or bitmaps operations) - Resolves: bz#2165280 ([kvm-unit-tests] debug-wp-migration fails) - Resolves: bz#1860292 (RFE: add extent_size_hint information to qemu-img info) - Resolves: bz#2155173 ([vhost-user] unable to start vhost net: 71: falling back on userspace) - Resolves: bz#2162569 ([transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2) --- ...r-Introduce-nested-event-loop-in-vho.patch | 140 ++++++++++ ...r-Monitor-slave-channel-in-vhost_use.patch | 143 ++++++++++ ...PUJumpCache-in-tb_jmp_cache_clear_pa.patch | 58 ++++ ...rove-empty-format-specific-info-dump.patch | 132 +++++++++ ...Split-BlockNodeInfo-off-of-ImageInfo.patch | 246 +++++++++++++++++ ...ck-file-Add-file-specific-image-info.patch | 145 ++++++++++ ...d-indentation-to-bdrv_node_info_dump.patch | 206 ++++++++++++++ kvm-block-qapi-Introduce-BlockGraphInfo.patch | 155 +++++++++++ ...pi-Let-bdrv_query_image_info-recurse.patch | 197 +++++++++++++ kvm-block-vmdk-Change-extent-info-type.patch | 140 ++++++++++ ...-106-214-308-Read-only-one-size-line.patch | 99 +++++++ ...otests-Filter-child-node-information.patch | 171 ++++++++++++ ...tical-corruption-in-store_bitmap-err.patch | 67 +++++ ...ge-info-key-names-for-protocol-nodes.patch | 197 +++++++++++++ kvm-qemu-img-Let-info-print-block-graph.patch | 261 ++++++++++++++++++ kvm-qemu-img-Use-BlockNodeInfo.patch | 241 ++++++++++++++++ ...Report-errors-while-closing-the-imag.patch | 70 +++++ ...Report-errors-while-closing-the-imag.patch | 67 +++++ ...t-qemu-img-bitmap-commit-exit-code-o.patch | 166 +++++++++++ ...ix-transitional-migration-compat-for.patch | 47 ++++ qemu-kvm.spec | 74 ++++- 21 files changed, 3021 insertions(+), 1 deletion(-) create mode 100644 kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch create mode 100644 kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch create mode 100644 kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch create mode 100644 kvm-block-Improve-empty-format-specific-info-dump.patch create mode 100644 kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch create mode 100644 kvm-block-file-Add-file-specific-image-info.patch create mode 100644 kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch create mode 100644 kvm-block-qapi-Introduce-BlockGraphInfo.patch create mode 100644 kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch create mode 100644 kvm-block-vmdk-Change-extent-info-type.patch create mode 100644 kvm-iotests-106-214-308-Read-only-one-size-line.patch create mode 100644 kvm-iotests-Filter-child-node-information.patch create mode 100644 kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch create mode 100644 kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch create mode 100644 kvm-qemu-img-Let-info-print-block-graph.patch create mode 100644 kvm-qemu-img-Use-BlockNodeInfo.patch create mode 100644 kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch create mode 100644 kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch create mode 100644 kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch create mode 100644 kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch diff --git a/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch b/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch new file mode 100644 index 0000000..752aa08 --- /dev/null +++ b/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch @@ -0,0 +1,140 @@ +From 0c19fb7c4a22a30830152b224b2e66963f829a7a Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 19 Jan 2023 18:24:24 +0100 +Subject: [PATCH 19/20] Revert "vhost-user: Introduce nested event loop in + vhost_user_read()" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 146: Fix vhost-user with dpdk +RH-Bugzilla: 2155173 +RH-Acked-by: Cindy Lu +RH-Acked-by: Greg Kurz (RH) +RH-Acked-by: Eugenio Pérez +RH-Commit: [2/2] 9b67041f92f29f70b7ccb41d8087801e4e4e38af (lvivier/qemu-kvm-centos) + +This reverts commit a7f523c7d114d445c5d83aecdba3efc038e5a692. + +The nested event loop is broken by design. It's only user was removed. +Drop the code as well so that nobody ever tries to use it again. + +I had to fix a couple of trivial conflicts around return values because +of 025faa872bcf ("vhost-user: stick to -errno error return convention"). + +Signed-off-by: Greg Kurz +Message-Id: <20230119172424.478268-3-groug@kaod.org> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Maxime Coquelin +(cherry picked from commit 4382138f642f69fdbc79ebf4e93d84be8061191f) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-user.c | 65 ++++-------------------------------------- + 1 file changed, 5 insertions(+), 60 deletions(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 0ac00eb901..7cb49c50f9 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -305,19 +305,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) + return 0; + } + +-struct vhost_user_read_cb_data { +- struct vhost_dev *dev; +- VhostUserMsg *msg; +- GMainLoop *loop; +- int ret; +-}; +- +-static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, +- gpointer opaque) ++static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) + { +- struct vhost_user_read_cb_data *data = opaque; +- struct vhost_dev *dev = data->dev; +- VhostUserMsg *msg = data->msg; + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->user->chr; + uint8_t *p = (uint8_t *) msg; +@@ -325,8 +314,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, + + r = vhost_user_read_header(dev, msg); + if (r < 0) { +- data->ret = r; +- goto end; ++ return r; + } + + /* validate message size is sane */ +@@ -334,8 +322,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, + error_report("Failed to read msg header." + " Size %d exceeds the maximum %zu.", msg->hdr.size, + VHOST_USER_PAYLOAD_SIZE); +- data->ret = -EPROTO; +- goto end; ++ return -EPROTO; + } + + if (msg->hdr.size) { +@@ -346,53 +333,11 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, + int saved_errno = errno; + error_report("Failed to read msg payload." + " Read %d instead of %d.", r, msg->hdr.size); +- data->ret = r < 0 ? -saved_errno : -EIO; +- goto end; ++ return r < 0 ? -saved_errno : -EIO; + } + } + +-end: +- g_main_loop_quit(data->loop); +- return G_SOURCE_REMOVE; +-} +- +-static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) +-{ +- struct vhost_user *u = dev->opaque; +- CharBackend *chr = u->user->chr; +- GMainContext *prev_ctxt = chr->chr->gcontext; +- GMainContext *ctxt = g_main_context_new(); +- GMainLoop *loop = g_main_loop_new(ctxt, FALSE); +- struct vhost_user_read_cb_data data = { +- .dev = dev, +- .loop = loop, +- .msg = msg, +- .ret = 0 +- }; +- +- /* +- * We want to be able to monitor the slave channel fd while waiting +- * for chr I/O. This requires an event loop, but we can't nest the +- * one to which chr is currently attached : its fd handlers might not +- * be prepared for re-entrancy. So we create a new one and switch chr +- * to use it. +- */ +- qemu_chr_be_update_read_handlers(chr->chr, ctxt); +- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); +- +- g_main_loop_run(loop); +- +- /* +- * Restore the previous event loop context. This also destroys/recreates +- * event sources : this guarantees that all pending events in the original +- * context that have been processed by the nested loop are purged. +- */ +- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); +- +- g_main_loop_unref(loop); +- g_main_context_unref(ctxt); +- +- return data.ret; ++ return 0; + } + + static int process_message_reply(struct vhost_dev *dev, +-- +2.31.1 + diff --git a/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch b/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch new file mode 100644 index 0000000..8e7b906 --- /dev/null +++ b/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch @@ -0,0 +1,143 @@ +From 9fb47ad317ad8cdda9960190d499ad6c3a9817f0 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Thu, 19 Jan 2023 18:24:23 +0100 +Subject: [PATCH 18/20] Revert "vhost-user: Monitor slave channel in + vhost_user_read()" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 146: Fix vhost-user with dpdk +RH-Bugzilla: 2155173 +RH-Acked-by: Cindy Lu +RH-Acked-by: Greg Kurz (RH) +RH-Acked-by: Eugenio Pérez +RH-Commit: [1/2] c583a7f121ca9c93c9a2ad17bf0ccf5c1241dc99 (lvivier/qemu-kvm-centos) + +This reverts commit db8a3772e300c1a656331a92da0785d81667dc81. + +Motivation : this is breaking vhost-user with DPDK as reported in [0]. + +Received unexpected msg type. Expected 22 received 40 +Fail to update device iotlb +Received unexpected msg type. Expected 40 received 22 +Received unexpected msg type. Expected 22 received 11 +Fail to update device iotlb +Received unexpected msg type. Expected 11 received 22 +vhost VQ 1 ring restore failed: -71: Protocol error (71) +Received unexpected msg type. Expected 22 received 11 +Fail to update device iotlb +Received unexpected msg type. Expected 11 received 22 +vhost VQ 0 ring restore failed: -71: Protocol error (71) +unable to start vhost net: 71: falling back on userspace virtio + +The failing sequence that leads to the first error is : +- QEMU sends a VHOST_USER_GET_STATUS (40) request to DPDK on the master + socket +- QEMU starts a nested event loop in order to wait for the + VHOST_USER_GET_STATUS response and to be able to process messages from + the slave channel +- DPDK sends a couple of legitimate IOTLB miss messages on the slave + channel +- QEMU processes each IOTLB request and sends VHOST_USER_IOTLB_MSG (22) + updates on the master socket +- QEMU assumes to receive a response for the latest VHOST_USER_IOTLB_MSG + but it gets the response for the VHOST_USER_GET_STATUS instead + +The subsequent errors have the same root cause : the nested event loop +breaks the order by design. It lures QEMU to expect responses to the +latest message sent on the master socket to arrive first. + +Since this was only needed for DAX enablement which is still not merged +upstream, just drop the code for now. A working solution will have to +be merged later on. Likely protect the master socket with a mutex +and service the slave channel with a separate thread, as discussed with +Maxime in the mail thread below. + +[0] https://lore.kernel.org/qemu-devel/43145ede-89dc-280e-b953-6a2b436de395@redhat.com/ + +Reported-by: Yanghang Liu +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155173 +Signed-off-by: Greg Kurz +Message-Id: <20230119172424.478268-2-groug@kaod.org> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Stefan Hajnoczi +Acked-by: Maxime Coquelin +(cherry picked from commit f340a59d5a852d75ae34555723694c7e8eafbd0c) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-user.c | 35 +++-------------------------------- + 1 file changed, 3 insertions(+), 32 deletions(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 8f635844af..0ac00eb901 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -356,35 +356,6 @@ end: + return G_SOURCE_REMOVE; + } + +-static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, +- gpointer opaque); +- +-/* +- * This updates the read handler to use a new event loop context. +- * Event sources are removed from the previous context : this ensures +- * that events detected in the previous context are purged. They will +- * be re-detected and processed in the new context. +- */ +-static void slave_update_read_handler(struct vhost_dev *dev, +- GMainContext *ctxt) +-{ +- struct vhost_user *u = dev->opaque; +- +- if (!u->slave_ioc) { +- return; +- } +- +- if (u->slave_src) { +- g_source_destroy(u->slave_src); +- g_source_unref(u->slave_src); +- } +- +- u->slave_src = qio_channel_add_watch_source(u->slave_ioc, +- G_IO_IN | G_IO_HUP, +- slave_read, dev, NULL, +- ctxt); +-} +- + static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) + { + struct vhost_user *u = dev->opaque; +@@ -406,7 +377,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) + * be prepared for re-entrancy. So we create a new one and switch chr + * to use it. + */ +- slave_update_read_handler(dev, ctxt); + qemu_chr_be_update_read_handlers(chr->chr, ctxt); + qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); + +@@ -418,7 +388,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) + * context that have been processed by the nested loop are purged. + */ + qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); +- slave_update_read_handler(dev, NULL); + + g_main_loop_unref(loop); + g_main_context_unref(ctxt); +@@ -1802,7 +1771,9 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) + return -ECONNREFUSED; + } + u->slave_ioc = ioc; +- slave_update_read_handler(dev, NULL); ++ u->slave_src = qio_channel_add_watch_source(u->slave_ioc, ++ G_IO_IN | G_IO_HUP, ++ slave_read, dev, NULL, NULL); + + if (reply_supported) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; +-- +2.31.1 + diff --git a/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch b/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch new file mode 100644 index 0000000..0680a26 --- /dev/null +++ b/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch @@ -0,0 +1,58 @@ +From ab68e13b7628f2348d41a4518a92508542af712f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 3 Feb 2023 18:15:10 +0100 +Subject: [PATCH 05/20] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page + +RH-Author: Eric Auger +RH-MergeRequest: 144: accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page +RH-Bugzilla: 2165280 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Gavin Shan +RH-Acked-by: Shaoqin Huang +RH-Commit: [1/1] 5b0863c34ba06c01c4e343d1ecd72402779c7de3 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/2165280 +Upstream: yes +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=50530041 +Test: 'kvm unit test ./run_tests.sh -g debug' does not SIGSEV anymore + +After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization +before registration"), it looks the CPUJumpCache pointer can be NULL. +This causes a SIGSEV when running debug-wp-migration kvm unit test. + +At the first place it should be clarified why this TCG code is called +with KVM acceleration. This may hide another bug. + +Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration") +Signed-off-by: Eric Auger +Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com> +Signed-off-by: Richard Henderson +(cherry picked from commit 99ab4d500af638ba3ebb20e8aa89d72201b70860) +Signed-off-by: Eric Auger +--- + accel/tcg/cputlb.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c +index 6f1c00682b..4244b0e4e3 100644 +--- a/accel/tcg/cputlb.c ++++ b/accel/tcg/cputlb.c +@@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, + + static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) + { +- int i, i0 = tb_jmp_cache_hash_page(page_addr); + CPUJumpCache *jc = cpu->tb_jmp_cache; ++ int i, i0; + ++ if (unlikely(!jc)) { ++ return; ++ } ++ ++ i0 = tb_jmp_cache_hash_page(page_addr); + for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { + qatomic_set(&jc->array[i0 + i].tb, NULL); + } +-- +2.31.1 + diff --git a/kvm-block-Improve-empty-format-specific-info-dump.patch b/kvm-block-Improve-empty-format-specific-info-dump.patch new file mode 100644 index 0000000..5b54210 --- /dev/null +++ b/kvm-block-Improve-empty-format-specific-info-dump.patch @@ -0,0 +1,132 @@ +From 074c89b05dae971c7118cb769fd34e22135c8f4c Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:53 +0200 +Subject: [PATCH 06/20] block: Improve empty format-specific info dump + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [1/12] be551e83f426e620e673302198b51368bfd324ce (hreitz/qemu-kvm-c-9-s) + +When a block driver supports obtaining format-specific information, but +that object only contains optional fields, it is possible that none of +them are present, so that dump_qobject() (called by +bdrv_image_info_specific_dump()) will not print anything. + +The callers of bdrv_image_info_specific_dump() put a header above this +information ("Format specific information:\n"), which will look strange +when there is nothing below. Modify bdrv_image_info_specific_dump() to +print this header instead of its callers, and only if there is indeed +something to be printed. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-2-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 3716470b24f0f63090d59bcf28ad8fe6fb7835bd) +Signed-off-by: Hanna Czenczek +--- + block/qapi.c | 41 +++++++++++++++++++++++++++++++++++++---- + include/block/qapi.h | 3 ++- + qemu-io-cmds.c | 4 ++-- + 3 files changed, 41 insertions(+), 7 deletions(-) + +diff --git a/block/qapi.c b/block/qapi.c +index cf557e3aea..51202b470a 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -777,7 +777,35 @@ static void dump_qdict(int indentation, QDict *dict) + } + } + +-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) ++/* ++ * Return whether dumping the given QObject with dump_qobject() would ++ * yield an empty dump, i.e. not print anything. ++ */ ++static bool qobject_is_empty_dump(const QObject *obj) ++{ ++ switch (qobject_type(obj)) { ++ case QTYPE_QNUM: ++ case QTYPE_QSTRING: ++ case QTYPE_QBOOL: ++ return false; ++ ++ case QTYPE_QDICT: ++ return qdict_size(qobject_to(QDict, obj)) == 0; ++ ++ case QTYPE_QLIST: ++ return qlist_empty(qobject_to(QList, obj)); ++ ++ default: ++ abort(); ++ } ++} ++ ++/** ++ * Dumps the given ImageInfoSpecific object in a human-readable form, ++ * prepending an optional prefix if the dump is not empty. ++ */ ++void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, ++ const char *prefix) + { + QObject *obj, *data; + Visitor *v = qobject_output_visitor_new(&obj); +@@ -785,7 +813,12 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) + visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort); + visit_complete(v, &obj); + data = qdict_get(qobject_to(QDict, obj), "data"); +- dump_qobject(1, data); ++ if (!qobject_is_empty_dump(data)) { ++ if (prefix) { ++ qemu_printf("%s", prefix); ++ } ++ dump_qobject(1, data); ++ } + qobject_unref(obj); + visit_free(v); + } +@@ -866,7 +899,7 @@ void bdrv_image_info_dump(ImageInfo *info) + } + + if (info->has_format_specific) { +- qemu_printf("Format specific information:\n"); +- bdrv_image_info_specific_dump(info->format_specific); ++ bdrv_image_info_specific_dump(info->format_specific, ++ "Format specific information:\n"); + } + } +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 22c7807c89..c09859ea78 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -40,6 +40,7 @@ void bdrv_query_image_info(BlockDriverState *bs, + Error **errp); + + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); +-void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec); ++void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, ++ const char *prefix); + void bdrv_image_info_dump(ImageInfo *info); + #endif +diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c +index 952dc940f1..f4a374528e 100644 +--- a/qemu-io-cmds.c ++++ b/qemu-io-cmds.c +@@ -1825,8 +1825,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) + return -EIO; + } + if (spec_info) { +- printf("Format specific information:\n"); +- bdrv_image_info_specific_dump(spec_info); ++ bdrv_image_info_specific_dump(spec_info, ++ "Format specific information:\n"); + qapi_free_ImageInfoSpecific(spec_info); + } + +-- +2.31.1 + diff --git a/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch b/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch new file mode 100644 index 0000000..2d95689 --- /dev/null +++ b/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch @@ -0,0 +1,246 @@ +From 54e290df4bc1c9e83be7357caed6a2b1ba4f21f0 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:56 +0200 +Subject: [PATCH 09/20] block: Split BlockNodeInfo off of ImageInfo + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [4/12] fc8d69d549bb9a929db218b91697ee3ae95c1ff6 (hreitz/qemu-kvm-c-9-s) + +ImageInfo sometimes contains flat information, and sometimes it does +not. Split off a BlockNodeInfo struct, which only contains information +about a single node and has no link to the backing image. + +We do this so we can extend BlockNodeInfo to a BlockGraphInfo struct, +which has links to all child nodes, not just the backing node. It would +be strange to base BlockGraphInfo on ImageInfo, because then this +extended struct would have two links to the backing node (one in +BlockGraphInfo as one of all the child links, and one in ImageInfo). + +Furthermore, it is quite common to ignore the backing-image field +altogether: bdrv_query_image_info() does not set it, and +bdrv_image_info_dump() does not evaluate it. That signals that we +should have different structs for describing a single node and one that +has a link to the backing image. + +Still, bdrv_query_image_info() and bdrv_image_info_dump() are not +changed too much in this patch. Follow-up patches will handle them. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-5-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit a2085f8909377b6df738f6c3f7ee6db4d16da8f7) +Signed-off-by: Hanna Czenczek +--- + block/qapi.c | 86 ++++++++++++++++++++++++++++++++------------ + include/block/qapi.h | 3 ++ + qapi/block-core.json | 24 +++++++++---- + 3 files changed, 85 insertions(+), 28 deletions(-) + +diff --git a/block/qapi.c b/block/qapi.c +index 51202b470a..e5022b4481 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -241,30 +241,18 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs, + } + + /** +- * bdrv_query_image_info: +- * @bs: block device to examine +- * @p_info: location to store image information +- * @errp: location to store error information +- * +- * Store "flat" image information in @p_info. +- * +- * "Flat" means it does *not* query backing image information, +- * i.e. (*pinfo)->has_backing_image will be set to false and +- * (*pinfo)->backing_image to NULL even when the image does in fact have +- * a backing image. +- * +- * @p_info will be set only on success. On error, store error in @errp. ++ * Helper function for other query info functions. Store information about @bs ++ * in @info, setting @errp on error. + */ +-void bdrv_query_image_info(BlockDriverState *bs, +- ImageInfo **p_info, +- Error **errp) ++static void bdrv_do_query_node_info(BlockDriverState *bs, ++ BlockNodeInfo *info, ++ Error **errp) + { + int64_t size; + const char *backing_filename; + BlockDriverInfo bdi; + int ret; + Error *err = NULL; +- ImageInfo *info; + + aio_context_acquire(bdrv_get_aio_context(bs)); + +@@ -277,7 +265,6 @@ void bdrv_query_image_info(BlockDriverState *bs, + + bdrv_refresh_filename(bs); + +- info = g_new0(ImageInfo, 1); + info->filename = g_strdup(bs->filename); + info->format = g_strdup(bdrv_get_format_name(bs)); + info->virtual_size = size; +@@ -298,7 +285,6 @@ void bdrv_query_image_info(BlockDriverState *bs, + info->format_specific = bdrv_get_specific_info(bs, &err); + if (err) { + error_propagate(errp, err); +- qapi_free_ImageInfo(info); + goto out; + } + info->has_format_specific = info->format_specific != NULL; +@@ -339,16 +325,72 @@ void bdrv_query_image_info(BlockDriverState *bs, + break; + default: + error_propagate(errp, err); +- qapi_free_ImageInfo(info); + goto out; + } + +- *p_info = info; +- + out: + aio_context_release(bdrv_get_aio_context(bs)); + } + ++/** ++ * bdrv_query_block_node_info: ++ * @bs: block node to examine ++ * @p_info: location to store node information ++ * @errp: location to store error information ++ * ++ * Store image information about @bs in @p_info. ++ * ++ * @p_info will be set only on success. On error, store error in @errp. ++ */ ++void bdrv_query_block_node_info(BlockDriverState *bs, ++ BlockNodeInfo **p_info, ++ Error **errp) ++{ ++ BlockNodeInfo *info; ++ ERRP_GUARD(); ++ ++ info = g_new0(BlockNodeInfo, 1); ++ bdrv_do_query_node_info(bs, info, errp); ++ if (*errp) { ++ qapi_free_BlockNodeInfo(info); ++ return; ++ } ++ ++ *p_info = info; ++} ++ ++/** ++ * bdrv_query_image_info: ++ * @bs: block node to examine ++ * @p_info: location to store image information ++ * @errp: location to store error information ++ * ++ * Store "flat" image information in @p_info. ++ * ++ * "Flat" means it does *not* query backing image information, ++ * i.e. (*pinfo)->has_backing_image will be set to false and ++ * (*pinfo)->backing_image to NULL even when the image does in fact have ++ * a backing image. ++ * ++ * @p_info will be set only on success. On error, store error in @errp. ++ */ ++void bdrv_query_image_info(BlockDriverState *bs, ++ ImageInfo **p_info, ++ Error **errp) ++{ ++ ImageInfo *info; ++ ERRP_GUARD(); ++ ++ info = g_new0(ImageInfo, 1); ++ bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); ++ if (*errp) { ++ qapi_free_ImageInfo(info); ++ return; ++ } ++ ++ *p_info = info; ++} ++ + /* @p_info will be set only on success. */ + static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, + Error **errp) +diff --git a/include/block/qapi.h b/include/block/qapi.h +index c09859ea78..c7de4e3fa9 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -35,6 +35,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + int bdrv_query_snapshot_info_list(BlockDriverState *bs, + SnapshotInfoList **p_list, + Error **errp); ++void bdrv_query_block_node_info(BlockDriverState *bs, ++ BlockNodeInfo **p_info, ++ Error **errp); + void bdrv_query_image_info(BlockDriverState *bs, + ImageInfo **p_info, + Error **errp); +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 4b9365167f..7720da0498 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -251,7 +251,7 @@ + } } + + ## +-# @ImageInfo: ++# @BlockNodeInfo: + # + # Information about a QEMU image file + # +@@ -279,22 +279,34 @@ + # + # @snapshots: list of VM snapshots + # +-# @backing-image: info of the backing image (since 1.6) +-# + # @format-specific: structure supplying additional format-specific + # information (since 1.7) + # +-# Since: 1.3 ++# Since: 8.0 + ## +-{ 'struct': 'ImageInfo', ++{ 'struct': 'BlockNodeInfo', + 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', + '*actual-size': 'int', 'virtual-size': 'int', + '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool', + '*backing-filename': 'str', '*full-backing-filename': 'str', + '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], +- '*backing-image': 'ImageInfo', + '*format-specific': 'ImageInfoSpecific' } } + ++## ++# @ImageInfo: ++# ++# Information about a QEMU image file, and potentially its backing image ++# ++# @backing-image: info of the backing image ++# ++# Since: 1.3 ++## ++{ 'struct': 'ImageInfo', ++ 'base': 'BlockNodeInfo', ++ 'data': { ++ '*backing-image': 'ImageInfo' ++ } } ++ + ## + # @ImageCheck: + # +-- +2.31.1 + diff --git a/kvm-block-file-Add-file-specific-image-info.patch b/kvm-block-file-Add-file-specific-image-info.patch new file mode 100644 index 0000000..a81b6b0 --- /dev/null +++ b/kvm-block-file-Add-file-specific-image-info.patch @@ -0,0 +1,145 @@ +From 4af86458d6bea2a6e15fd57d4d4bbe88e35f7e72 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:54 +0200 +Subject: [PATCH 07/20] block/file: Add file-specific image info + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [2/12] d8cc351d6c16c41b2000e41dc555f13093a9edce (hreitz/qemu-kvm-c-9-s) + +Add some (optional) information that the file driver can provide for +image files, namely the extent size hint. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-3-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 7f36a50ab4e7d39369cac67be4ba9d6ee4081dc0) +Signed-off-by: Hanna Czenczek +--- + block/file-posix.c | 30 ++++++++++++++++++++++++++++++ + qapi/block-core.json | 26 ++++++++++++++++++++++++-- + 2 files changed, 54 insertions(+), 2 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index b9647c5ffc..df3da79aed 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -3095,6 +3095,34 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) + return 0; + } + ++static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs, ++ Error **errp) ++{ ++ ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1); ++ ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); ++ ++ *spec_info = (ImageInfoSpecific){ ++ .type = IMAGE_INFO_SPECIFIC_KIND_FILE, ++ .u.file.data = file_info, ++ }; ++ ++#ifdef FS_IOC_FSGETXATTR ++ { ++ BDRVRawState *s = bs->opaque; ++ struct fsxattr attr; ++ int ret; ++ ++ ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr); ++ if (!ret && attr.fsx_extsize != 0) { ++ file_info->has_extent_size_hint = true; ++ file_info->extent_size_hint = attr.fsx_extsize; ++ } ++ } ++#endif ++ ++ return spec_info; ++} ++ + static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs) + { + BDRVRawState *s = bs->opaque; +@@ -3328,6 +3356,7 @@ BlockDriver bdrv_file = { + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, ++ .bdrv_get_specific_info = raw_get_specific_info, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + .bdrv_get_specific_stats = raw_get_specific_stats, +@@ -3700,6 +3729,7 @@ static BlockDriver bdrv_host_device = { + .bdrv_co_truncate = raw_co_truncate, + .bdrv_getlength = raw_getlength, + .bdrv_get_info = raw_get_info, ++ .bdrv_get_specific_info = raw_get_specific_info, + .bdrv_get_allocated_file_size + = raw_get_allocated_file_size, + .bdrv_get_specific_stats = hdev_get_specific_stats, +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 95ac4fa634..f5d822cbd6 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -139,16 +139,29 @@ + '*encryption-format': 'RbdImageEncryptionFormat' + } } + ++## ++# @ImageInfoSpecificFile: ++# ++# @extent-size-hint: Extent size hint (if available) ++# ++# Since: 8.0 ++## ++{ 'struct': 'ImageInfoSpecificFile', ++ 'data': { ++ '*extent-size-hint': 'size' ++ } } ++ + ## + # @ImageInfoSpecificKind: + # + # @luks: Since 2.7 + # @rbd: Since 6.1 ++# @file: Since 8.0 + # + # Since: 1.7 + ## + { 'enum': 'ImageInfoSpecificKind', +- 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] } ++ 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] } + + ## + # @ImageInfoSpecificQCow2Wrapper: +@@ -185,6 +198,14 @@ + { 'struct': 'ImageInfoSpecificRbdWrapper', + 'data': { 'data': 'ImageInfoSpecificRbd' } } + ++## ++# @ImageInfoSpecificFileWrapper: ++# ++# Since: 8.0 ++## ++{ 'struct': 'ImageInfoSpecificFileWrapper', ++ 'data': { 'data': 'ImageInfoSpecificFile' } } ++ + ## + # @ImageInfoSpecific: + # +@@ -199,7 +220,8 @@ + 'qcow2': 'ImageInfoSpecificQCow2Wrapper', + 'vmdk': 'ImageInfoSpecificVmdkWrapper', + 'luks': 'ImageInfoSpecificLUKSWrapper', +- 'rbd': 'ImageInfoSpecificRbdWrapper' ++ 'rbd': 'ImageInfoSpecificRbdWrapper', ++ 'file': 'ImageInfoSpecificFileWrapper' + } } + + ## +-- +2.31.1 + diff --git a/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch b/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch new file mode 100644 index 0000000..62979ef --- /dev/null +++ b/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch @@ -0,0 +1,206 @@ +From c8c282c2e1d74cfc5de6527f7e20dfc3e76b67ac Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:00 +0200 +Subject: [PATCH 13/20] block/qapi: Add indentation to bdrv_node_info_dump() + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [8/12] d3a697e81ab9828457198075e5815a592363c725 (hreitz/qemu-kvm-c-9-s) + +In order to let qemu-img info present a block graph, add a parameter to +bdrv_node_info_dump() and bdrv_image_info_specific_dump() so that the +information of nodes below the root level can be given an indentation. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-9-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 76c9e9750d1bd580e8ed4465f6be3a986434e7c3) +Signed-off-by: Hanna Czenczek +--- + block/monitor/block-hmp-cmds.c | 2 +- + block/qapi.c | 47 +++++++++++++++++++--------------- + include/block/qapi.h | 5 ++-- + qemu-img.c | 2 +- + qemu-io-cmds.c | 3 ++- + 5 files changed, 34 insertions(+), 25 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index aa37faa601..72824d4e2e 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, + monitor_printf(mon, "\nImages:\n"); + image_info = inserted->image; + while (1) { +- bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); ++ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); + if (image_info->has_backing_image) { + image_info = image_info->backing_image; + } else { +diff --git a/block/qapi.c b/block/qapi.c +index f208c21ccf..3e35603f0c 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -915,7 +915,8 @@ static bool qobject_is_empty_dump(const QObject *obj) + * prepending an optional prefix if the dump is not empty. + */ + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, +- const char *prefix) ++ const char *prefix, ++ int indentation) + { + QObject *obj, *data; + Visitor *v = qobject_output_visitor_new(&obj); +@@ -925,48 +926,51 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + data = qdict_get(qobject_to(QDict, obj), "data"); + if (!qobject_is_empty_dump(data)) { + if (prefix) { +- qemu_printf("%s", prefix); ++ qemu_printf("%*s%s", indentation * 4, "", prefix); + } +- dump_qobject(1, data); ++ dump_qobject(indentation + 1, data); + } + qobject_unref(obj); + visit_free(v); + } + +-void bdrv_node_info_dump(BlockNodeInfo *info) ++void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) + { + char *size_buf, *dsize_buf; ++ g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); ++ + if (!info->has_actual_size) { + dsize_buf = g_strdup("unavailable"); + } else { + dsize_buf = size_to_str(info->actual_size); + } + size_buf = size_to_str(info->virtual_size); +- qemu_printf("image: %s\n" +- "file format: %s\n" +- "virtual size: %s (%" PRId64 " bytes)\n" +- "disk size: %s\n", +- info->filename, info->format, size_buf, +- info->virtual_size, +- dsize_buf); ++ qemu_printf("%simage: %s\n" ++ "%sfile format: %s\n" ++ "%svirtual size: %s (%" PRId64 " bytes)\n" ++ "%sdisk size: %s\n", ++ ind_s, info->filename, ++ ind_s, info->format, ++ ind_s, size_buf, info->virtual_size, ++ ind_s, dsize_buf); + g_free(size_buf); + g_free(dsize_buf); + + if (info->has_encrypted && info->encrypted) { +- qemu_printf("encrypted: yes\n"); ++ qemu_printf("%sencrypted: yes\n", ind_s); + } + + if (info->has_cluster_size) { +- qemu_printf("cluster_size: %" PRId64 "\n", +- info->cluster_size); ++ qemu_printf("%scluster_size: %" PRId64 "\n", ++ ind_s, info->cluster_size); + } + + if (info->has_dirty_flag && info->dirty_flag) { +- qemu_printf("cleanly shut down: no\n"); ++ qemu_printf("%scleanly shut down: no\n", ind_s); + } + + if (info->has_backing_filename) { +- qemu_printf("backing file: %s", info->backing_filename); ++ qemu_printf("%sbacking file: %s", ind_s, info->backing_filename); + if (!info->has_full_backing_filename) { + qemu_printf(" (cannot determine actual path)"); + } else if (strcmp(info->backing_filename, +@@ -975,15 +979,16 @@ void bdrv_node_info_dump(BlockNodeInfo *info) + } + qemu_printf("\n"); + if (info->has_backing_filename_format) { +- qemu_printf("backing file format: %s\n", +- info->backing_filename_format); ++ qemu_printf("%sbacking file format: %s\n", ++ ind_s, info->backing_filename_format); + } + } + + if (info->has_snapshots) { + SnapshotInfoList *elem; + +- qemu_printf("Snapshot list:\n"); ++ qemu_printf("%sSnapshot list:\n", ind_s); ++ qemu_printf("%s", ind_s); + bdrv_snapshot_dump(NULL); + qemu_printf("\n"); + +@@ -1003,6 +1008,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) + + pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id); + pstrcpy(sn.name, sizeof(sn.name), elem->value->name); ++ qemu_printf("%s", ind_s); + bdrv_snapshot_dump(&sn); + qemu_printf("\n"); + } +@@ -1010,6 +1016,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) + + if (info->has_format_specific) { + bdrv_image_info_specific_dump(info->format_specific, +- "Format specific information:\n"); ++ "Format specific information:\n", ++ indentation); + } + } +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 196436020e..38855f2ae9 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -49,6 +49,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs, + + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, +- const char *prefix); +-void bdrv_node_info_dump(BlockNodeInfo *info); ++ const char *prefix, ++ int indentation); ++void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); + #endif +diff --git a/qemu-img.c b/qemu-img.c +index 3b2ca3bbcb..30b4ea58bb 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) + } + delim = true; + +- bdrv_node_info_dump(elem->value); ++ bdrv_node_info_dump(elem->value, 0); + } + } + +diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c +index f4a374528e..fdcb89211b 100644 +--- a/qemu-io-cmds.c ++++ b/qemu-io-cmds.c +@@ -1826,7 +1826,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) + } + if (spec_info) { + bdrv_image_info_specific_dump(spec_info, +- "Format specific information:\n"); ++ "Format specific information:\n", ++ 0); + qapi_free_ImageInfoSpecific(spec_info); + } + +-- +2.31.1 + diff --git a/kvm-block-qapi-Introduce-BlockGraphInfo.patch b/kvm-block-qapi-Introduce-BlockGraphInfo.patch new file mode 100644 index 0000000..e9a1622 --- /dev/null +++ b/kvm-block-qapi-Introduce-BlockGraphInfo.patch @@ -0,0 +1,155 @@ +From 0044e3848b02ef6edba5961d1f4b6297d137d207 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:59 +0200 +Subject: [PATCH 12/20] block/qapi: Introduce BlockGraphInfo + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [7/12] de47bac372cd552b812c774a2f35f95923af74ff (hreitz/qemu-kvm-c-9-s) + +Introduce a new QAPI type BlockGraphInfo and an associated +bdrv_query_block_graph_info() function that recursively gathers +BlockNodeInfo objects through a block graph. + +A follow-up patch is going to make "qemu-img info" use this to print +information about all nodes that are (usually implicitly) opened for a +given image file. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-8-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 6cab33997b91eb86e82a6a2ae58a24f835249d4a) +Signed-off-by: Hanna Czenczek +--- + block/qapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ + include/block/qapi.h | 3 +++ + qapi/block-core.json | 35 ++++++++++++++++++++++++++++++++ + 3 files changed, 86 insertions(+) + +diff --git a/block/qapi.c b/block/qapi.c +index 5d0a8d2ce3..f208c21ccf 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -411,6 +411,54 @@ fail: + qapi_free_ImageInfo(info); + } + ++/** ++ * bdrv_query_block_graph_info: ++ * @bs: root node to start from ++ * @p_info: location to store image information ++ * @errp: location to store error information ++ * ++ * Store image information about the graph starting from @bs in @p_info. ++ * ++ * @p_info will be set only on success. On error, store error in @errp. ++ */ ++void bdrv_query_block_graph_info(BlockDriverState *bs, ++ BlockGraphInfo **p_info, ++ Error **errp) ++{ ++ BlockGraphInfo *info; ++ BlockChildInfoList **children_list_tail; ++ BdrvChild *c; ++ ERRP_GUARD(); ++ ++ info = g_new0(BlockGraphInfo, 1); ++ bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp); ++ if (*errp) { ++ goto fail; ++ } ++ ++ children_list_tail = &info->children; ++ ++ QLIST_FOREACH(c, &bs->children, next) { ++ BlockChildInfo *c_info; ++ ++ c_info = g_new0(BlockChildInfo, 1); ++ QAPI_LIST_APPEND(children_list_tail, c_info); ++ ++ c_info->name = g_strdup(c->name); ++ bdrv_query_block_graph_info(c->bs, &c_info->info, errp); ++ if (*errp) { ++ goto fail; ++ } ++ } ++ ++ *p_info = info; ++ return; ++ ++fail: ++ assert(*errp != NULL); ++ qapi_free_BlockGraphInfo(info); ++} ++ + /* @p_info will be set only on success. */ + static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, + Error **errp) +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 2174bf8fa2..196436020e 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -43,6 +43,9 @@ void bdrv_query_image_info(BlockDriverState *bs, + bool flat, + bool skip_implicit_filters, + Error **errp); ++void bdrv_query_block_graph_info(BlockDriverState *bs, ++ BlockGraphInfo **p_info, ++ Error **errp); + + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 4cf2deeb6c..d703e0fb16 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -307,6 +307,41 @@ + '*backing-image': 'ImageInfo' + } } + ++## ++# @BlockChildInfo: ++# ++# Information about all nodes in the block graph starting at some node, ++# annotated with information about that node in relation to its parent. ++# ++# @name: Child name of the root node in the BlockGraphInfo struct, in its role ++# as the child of some undescribed parent node ++# ++# @info: Block graph information starting at this node ++# ++# Since: 8.0 ++## ++{ 'struct': 'BlockChildInfo', ++ 'data': { ++ 'name': 'str', ++ 'info': 'BlockGraphInfo' ++ } } ++ ++## ++# @BlockGraphInfo: ++# ++# Information about all nodes in a block (sub)graph in the form of BlockNodeInfo ++# data. ++# The base BlockNodeInfo struct contains the information for the (sub)graph's ++# root node. ++# ++# @children: Array of links to this node's child nodes' information ++# ++# Since: 8.0 ++## ++{ 'struct': 'BlockGraphInfo', ++ 'base': 'BlockNodeInfo', ++ 'data': { 'children': ['BlockChildInfo'] } } ++ + ## + # @ImageCheck: + # +-- +2.31.1 + diff --git a/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch b/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch new file mode 100644 index 0000000..e5c012a --- /dev/null +++ b/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch @@ -0,0 +1,197 @@ +From ae2c3df00d673d436fe4d8ec9103a3b76d7e6233 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:58 +0200 +Subject: [PATCH 11/20] block/qapi: Let bdrv_query_image_info() recurse + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [6/12] 451a83fd682cd6dd6026c22974d18c2f12ee06e3 (hreitz/qemu-kvm-c-9-s) + +There is no real reason why bdrv_query_image_info() should generally not +recurse. The ImageInfo struct has a pointer to the backing image, so it +should generally be filled, unless the caller explicitly opts out. + +This moves the recursing code from bdrv_block_device_info() into +bdrv_query_image_info(). + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-7-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 5d8813593f3f673fc96eed199beb35690cc46f58) + +Conflicts: + block/qapi.c: Conflicts with + 54fde4ff0621c22b15cbaaa3c74301cc0dbd1c9e ("qapi block: Elide + redundant has_FOO in generated C"), which dropped + `has_backing_image`. Without that commit (and 44ea9d9be before it), + we still need to set `has_backing_image` in + `bdrv_query_image_info()`. + +Signed-off-by: Hanna Czenczek +--- + block/qapi.c | 94 +++++++++++++++++++++++++++----------------- + include/block/qapi.h | 2 + + 2 files changed, 59 insertions(+), 37 deletions(-) + +diff --git a/block/qapi.c b/block/qapi.c +index ad88bf9b38..5d0a8d2ce3 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -47,8 +47,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + Error **errp) + { + ImageInfo **p_image_info; ++ ImageInfo *backing_info; + BlockDriverState *bs0, *backing; + BlockDeviceInfo *info; ++ ERRP_GUARD(); + + if (!bs->drv) { + error_setg(errp, "Block device %s is ejected", bs->node_name); +@@ -149,38 +151,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + bs0 = bs; + p_image_info = &info->image; + info->backing_file_depth = 0; +- while (1) { +- Error *local_err = NULL; +- bdrv_query_image_info(bs0, p_image_info, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- qapi_free_BlockDeviceInfo(info); +- return NULL; +- } +- +- /* stop gathering data for flat output */ +- if (flat) { +- break; +- } + +- if (bs0->drv && bdrv_filter_or_cow_child(bs0)) { +- /* +- * Put any filtered child here (for backwards compatibility to when +- * we put bs0->backing here, which might be any filtered child). +- */ +- info->backing_file_depth++; +- bs0 = bdrv_filter_or_cow_bs(bs0); +- (*p_image_info)->has_backing_image = true; +- p_image_info = &((*p_image_info)->backing_image); +- } else { +- break; +- } ++ /* ++ * Skip automatically inserted nodes that the user isn't aware of for ++ * query-block (blk != NULL), but not for query-named-block-nodes ++ */ ++ bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp); ++ if (*errp) { ++ qapi_free_BlockDeviceInfo(info); ++ return NULL; ++ } + +- /* Skip automatically inserted nodes that the user isn't aware of for +- * query-block (blk != NULL), but not for query-named-block-nodes */ +- if (blk) { +- bs0 = bdrv_skip_implicit_filters(bs0); +- } ++ backing_info = info->image->backing_image; ++ while (backing_info) { ++ info->backing_file_depth++; ++ backing_info = backing_info->backing_image; + } + + return info; +@@ -363,19 +348,28 @@ void bdrv_query_block_node_info(BlockDriverState *bs, + * bdrv_query_image_info: + * @bs: block node to examine + * @p_info: location to store image information ++ * @flat: skip backing node information ++ * @skip_implicit_filters: skip implicit filters in the backing chain + * @errp: location to store error information + * +- * Store "flat" image information in @p_info. ++ * Store image information in @p_info, potentially recursively covering the ++ * backing chain. + * +- * "Flat" means it does *not* query backing image information, +- * i.e. (*pinfo)->has_backing_image will be set to false and +- * (*pinfo)->backing_image to NULL even when the image does in fact have +- * a backing image. ++ * If @flat is true, do not query backing image information, i.e. ++ * (*p_info)->has_backing_image will be set to false and ++ * (*p_info)->backing_image to NULL even when the image does in fact have a ++ * backing image. ++ * ++ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain ++ * will be skipped when querying backing image information. ++ * (@skip_implicit_filters is ignored when @flat is true.) + * + * @p_info will be set only on success. On error, store error in @errp. + */ + void bdrv_query_image_info(BlockDriverState *bs, + ImageInfo **p_info, ++ bool flat, ++ bool skip_implicit_filters, + Error **errp) + { + ImageInfo *info; +@@ -384,11 +378,37 @@ void bdrv_query_image_info(BlockDriverState *bs, + info = g_new0(ImageInfo, 1); + bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); + if (*errp) { +- qapi_free_ImageInfo(info); +- return; ++ goto fail; ++ } ++ ++ if (!flat) { ++ BlockDriverState *backing; ++ ++ /* ++ * Use any filtered child here (for backwards compatibility to when ++ * we always took bs->backing, which might be any filtered child). ++ */ ++ backing = bdrv_filter_or_cow_bs(bs); ++ if (skip_implicit_filters) { ++ backing = bdrv_skip_implicit_filters(backing); ++ } ++ ++ if (backing) { ++ bdrv_query_image_info(backing, &info->backing_image, false, ++ skip_implicit_filters, errp); ++ if (*errp) { ++ goto fail; ++ } ++ info->has_backing_image = true; ++ } + } + + *p_info = info; ++ return; ++ ++fail: ++ assert(*errp); ++ qapi_free_ImageInfo(info); + } + + /* @p_info will be set only on success. */ +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 22198dcd0c..2174bf8fa2 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -40,6 +40,8 @@ void bdrv_query_block_node_info(BlockDriverState *bs, + Error **errp); + void bdrv_query_image_info(BlockDriverState *bs, + ImageInfo **p_info, ++ bool flat, ++ bool skip_implicit_filters, + Error **errp); + + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); +-- +2.31.1 + diff --git a/kvm-block-vmdk-Change-extent-info-type.patch b/kvm-block-vmdk-Change-extent-info-type.patch new file mode 100644 index 0000000..6b8f6a7 --- /dev/null +++ b/kvm-block-vmdk-Change-extent-info-type.patch @@ -0,0 +1,140 @@ +From d8caed018afb0f60f449e971398d2a8d6c2992e7 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:55 +0200 +Subject: [PATCH 08/20] block/vmdk: Change extent info type + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [3/12] efe50a2797c679ce6bb5faa423047461a34e6792 (hreitz/qemu-kvm-c-9-s) + +VMDK's implementation of .bdrv_get_specific_info() returns information +about its extent files, ostensibly in the form of ImageInfo objects. +However, it does not get this information through +bdrv_query_image_info(), but fills only a select few fields with custom +information that does not always match the fields' purposes. + +For example, @format, which is supposed to be a block driver name, is +filled with the extent type, e.g. SPARSE or FLAT. + +In ImageInfo, @compressed shows whether the data that can be seen in the +image is stored in compressed form or not. For example, a compressed +qcow2 image will store compressed data in its data file, but when +accessing the qcow2 node, you will see normal data. This is not how +VMDK uses the @compressed field for its extent files: Instead, it +signifies whether accessing the extent file will yield compressed data +(which the VMDK driver then (de-)compresses). + +Create a new structure to represent the extent information. This allows +us to clarify the fields' meanings, and it clearly shows that these are +not complete ImageInfo objects. (That is, if a user wants an extent +file's ImageInfo object, they will need to query it separately, and will +not get it from ImageInfoSpecificVmdk.extents.) + +Note that this removes the last use of ['ImageInfo'] (i.e. an array of +ImageInfo objects), so the QAPI generator will no longer generate +ImageInfoList by default. However, we use it in qemu-img.c, so we need +to create a dummy object to force the generate to create that type, +similarly to DummyForceArrays in machine.json (introduced in commit +9f08c8ec73878122ad4b061ed334f0437afaaa32 ("qapi: Lazy creation of array +types")). + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-4-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 456e75171a85c19a5bfa202eefcbdc4ef1692f05) +Signed-off-by: Hanna Czenczek +--- + block/vmdk.c | 8 ++++---- + qapi/block-core.json | 38 +++++++++++++++++++++++++++++++++++++- + 2 files changed, 41 insertions(+), 5 deletions(-) + +diff --git a/block/vmdk.c b/block/vmdk.c +index 26376352b9..4435b9880b 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2901,12 +2901,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs) + return 1; + } + +-static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) ++static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent) + { +- ImageInfo *info = g_new0(ImageInfo, 1); ++ VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1); + + bdrv_refresh_filename(extent->file->bs); +- *info = (ImageInfo){ ++ *info = (VmdkExtentInfo){ + .filename = g_strdup(extent->file->bs->filename), + .format = g_strdup(extent->type), + .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, +@@ -2985,7 +2985,7 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs, + int i; + BDRVVmdkState *s = bs->opaque; + ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1); +- ImageInfoList **tail; ++ VmdkExtentInfoList **tail; + + *spec_info = (ImageInfoSpecific){ + .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, +diff --git a/qapi/block-core.json b/qapi/block-core.json +index f5d822cbd6..4b9365167f 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -124,7 +124,33 @@ + 'create-type': 'str', + 'cid': 'int', + 'parent-cid': 'int', +- 'extents': ['ImageInfo'] ++ 'extents': ['VmdkExtentInfo'] ++ } } ++ ++## ++# @VmdkExtentInfo: ++# ++# Information about a VMDK extent file ++# ++# @filename: Name of the extent file ++# ++# @format: Extent type (e.g. FLAT or SPARSE) ++# ++# @virtual-size: Number of bytes covered by this extent ++# ++# @cluster-size: Cluster size in bytes (for non-flat extents) ++# ++# @compressed: Whether this extent contains compressed data ++# ++# Since: 8.0 ++## ++{ 'struct': 'VmdkExtentInfo', ++ 'data': { ++ 'filename': 'str', ++ 'format': 'str', ++ 'virtual-size': 'int', ++ '*cluster-size': 'int', ++ '*compressed': 'bool' + } } + + ## +@@ -5754,3 +5780,13 @@ + 'data': { 'device': 'str', '*id': 'str', '*name': 'str'}, + 'returns': 'SnapshotInfo', + 'allow-preconfig': true } ++ ++## ++# @DummyBlockCoreForceArrays: ++# ++# Not used by QMP; hack to let us use ImageInfoList internally ++# ++# Since: 8.0 ++## ++{ 'struct': 'DummyBlockCoreForceArrays', ++ 'data': { 'unused-image-info': ['ImageInfo'] } } +-- +2.31.1 + diff --git a/kvm-iotests-106-214-308-Read-only-one-size-line.patch b/kvm-iotests-106-214-308-Read-only-one-size-line.patch new file mode 100644 index 0000000..399acfc --- /dev/null +++ b/kvm-iotests-106-214-308-Read-only-one-size-line.patch @@ -0,0 +1,99 @@ +From 6727e92a97f8ee9f367a41111bef3f5cad4a479a Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:02 +0200 +Subject: [PATCH 15/20] iotests/106, 214, 308: Read only one size line + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [10/12] 1554e0a92b92ed101a251478ccae43f45f6e071e (hreitz/qemu-kvm-c-9-s) + +These tests read size information (sometimes disk size, sometimes +virtual size) from qemu-img info's output. Once qemu-img starts +printing info about child nodes, we are going to see multiple instances +of that per image, but these tests are only interested in the first one, +so use "head -n 1" to get it. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-11-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 74163adda3101b127943f7cbbf8fcccd2d472426) +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/106 | 4 ++-- + tests/qemu-iotests/214 | 6 ++++-- + tests/qemu-iotests/308 | 4 ++-- + 3 files changed, 8 insertions(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106 +index 9d6adb542d..ae0fc46691 100755 +--- a/tests/qemu-iotests/106 ++++ b/tests/qemu-iotests/106 +@@ -66,7 +66,7 @@ for create_mode in off falloc full; do + expected_size=$((expected_size + $GROWTH_SIZE)) + fi + +- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') ++ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) + actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') + + # The actual size may exceed the expected size, depending on the file +@@ -105,7 +105,7 @@ for growth_mode in falloc full; do + _make_test_img -o "extent_size_hint=0" 2G + $QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" +${GROWTH_SIZE}K + +- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') ++ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) + actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') + + if [ $actual_size -lt $GROWTH_SIZE ]; then +diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214 +index c66e246ba2..55ffcd7f44 100755 +--- a/tests/qemu-iotests/214 ++++ b/tests/qemu-iotests/214 +@@ -102,7 +102,8 @@ let data_size="8 * $cluster_size" + $QEMU_IO -c "write -P 0xaa 0 $data_size" "$TEST_IMG" \ + 2>&1 | _filter_qemu_io | _filter_testdir + sizeA=$($QEMU_IMG info --output=json "$TEST_IMG" | +- sed -n '/"actual-size":/ s/[^0-9]//gp') ++ sed -n '/"actual-size":/ s/[^0-9]//gp' | ++ head -n 1) + + _make_test_img 2M -o cluster_size=$cluster_size + echo "Write compressed data:" +@@ -124,7 +125,8 @@ $QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\ + _filter_qemu_io | _filter_testdir + + sizeB=$($QEMU_IMG info --output=json "$TEST_IMG" | +- sed -n '/"actual-size":/ s/[^0-9]//gp') ++ sed -n '/"actual-size":/ s/[^0-9]//gp' | ++ head -n 1) + + if [ $sizeA -lt $sizeB ] + then +diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 +index bde4aac2fa..09275e9a10 100755 +--- a/tests/qemu-iotests/308 ++++ b/tests/qemu-iotests/308 +@@ -217,12 +217,12 @@ echo + echo '=== Remove export ===' + + # Double-check that $EXT_MP appears as a non-empty file (the raw image) +-$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' ++$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 + + fuse_export_del 'export-mp' + + # See that the file appears empty again +-$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' ++$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 + + echo + echo '=== Writable export ===' +-- +2.31.1 + diff --git a/kvm-iotests-Filter-child-node-information.patch b/kvm-iotests-Filter-child-node-information.patch new file mode 100644 index 0000000..12eee3a --- /dev/null +++ b/kvm-iotests-Filter-child-node-information.patch @@ -0,0 +1,171 @@ +From 3102e62f80757729c97e58e2b3d62a6a9de952a7 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:01 +0200 +Subject: [PATCH 14/20] iotests: Filter child node information + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [9/12] 0b0a42d54397791f7f149e53c9175b7863707e70 (hreitz/qemu-kvm-c-9-s) + +Before we let qemu-img info print child node information, have +common.filter, common.rc, and iotests.py filter it from the test output +so we get as few reference output changes as possible. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-10-hreitz@redhat.com> +Tested-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit bcc6777ad6facede73c0cf8b1700045bf4365f7d) +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/common.filter | 22 ++++++++++++++-------- + tests/qemu-iotests/common.rc | 22 ++++++++++++++-------- + tests/qemu-iotests/iotests.py | 18 +++++++++++++++--- + 3 files changed, 43 insertions(+), 19 deletions(-) + +diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter +index 6a13757177..6ddda2ee64 100644 +--- a/tests/qemu-iotests/common.filter ++++ b/tests/qemu-iotests/common.filter +@@ -224,6 +224,7 @@ _filter_img_info() + + discard=0 + regex_json_spec_start='^ *"format-specific": \{' ++ regex_json_child_start='^ *"children": \[' + gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ + -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ + -e "s#$TEST_DIR#TEST_DIR#g" \ +@@ -252,20 +253,25 @@ _filter_img_info() + -e 's/\(compression type: \)\(zlib\|zstd\)/\1COMPRESSION_TYPE/' \ + -e "s/uuid: [-a-f0-9]\\+/uuid: 00000000-0000-0000-0000-000000000000/" | \ + while IFS='' read -r line; do +- if [[ $format_specific == 1 ]]; then +- discard=0 +- elif [[ $line == "Format specific information:" ]]; then +- discard=1 +- elif [[ $line =~ $regex_json_spec_start ]]; then +- discard=2 +- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" ++ if [[ $discard == 0 ]]; then ++ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then ++ discard=1 ++ elif [[ $line =~ "Child node '/" ]]; then ++ discard=1 ++ elif [[ $line =~ $regex_json_spec_start ]]; then ++ discard=2 ++ regex_json_end="^${line%%[^ ]*}\\},? *$" ++ elif [[ $line =~ $regex_json_child_start ]]; then ++ discard=2 ++ regex_json_end="^${line%%[^ ]*}\\],? *$" ++ fi + fi + if [[ $discard == 0 ]]; then + echo "$line" + elif [[ $discard == 1 && ! $line ]]; then + echo + discard=0 +- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then ++ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then + discard=0 + fi + done +diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc +index db757025cb..f4476b62f7 100644 +--- a/tests/qemu-iotests/common.rc ++++ b/tests/qemu-iotests/common.rc +@@ -711,6 +711,7 @@ _img_info() + + discard=0 + regex_json_spec_start='^ *"format-specific": \{' ++ regex_json_child_start='^ *"children": \[' + $QEMU_IMG info $QEMU_IMG_EXTRA_ARGS "$@" "$TEST_IMG" 2>&1 | \ + sed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ + -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ +@@ -721,20 +722,25 @@ _img_info() + -e "/^disk size:/ D" \ + -e "/actual-size/ D" | \ + while IFS='' read -r line; do +- if [[ $format_specific == 1 ]]; then +- discard=0 +- elif [[ $line == "Format specific information:" ]]; then +- discard=1 +- elif [[ $line =~ $regex_json_spec_start ]]; then +- discard=2 +- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" ++ if [[ $discard == 0 ]]; then ++ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then ++ discard=1 ++ elif [[ $line =~ "Child node '/" ]]; then ++ discard=1 ++ elif [[ $format_specific == 0 && $line =~ $regex_json_spec_start ]]; then ++ discard=2 ++ regex_json_end="^${line%%[^ ]*}\\},? *$" ++ elif [[ $line =~ $regex_json_child_start ]]; then ++ discard=2 ++ regex_json_end="^${line%%[^ ]*}\\],? *$" ++ fi + fi + if [[ $discard == 0 ]]; then + echo "$line" + elif [[ $discard == 1 && ! $line ]]; then + echo + discard=0 +- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then ++ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then + discard=0 + fi + done +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index da7d6637e1..94aeb3f3b2 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -329,7 +329,7 @@ def qemu_img_log(*args: str, check: bool = True + + def img_info_log(filename: str, filter_path: Optional[str] = None, + use_image_opts: bool = False, extra_args: Sequence[str] = (), +- check: bool = True, ++ check: bool = True, drop_child_info: bool = True, + ) -> None: + args = ['info'] + if use_image_opts: +@@ -342,7 +342,7 @@ def img_info_log(filename: str, filter_path: Optional[str] = None, + output = qemu_img(*args, check=check).stdout + if not filter_path: + filter_path = filename +- log(filter_img_info(output, filter_path)) ++ log(filter_img_info(output, filter_path, drop_child_info)) + + def qemu_io_wrap_args(args: Sequence[str]) -> List[str]: + if '-f' in args or '--image-opts' in args: +@@ -642,11 +642,23 @@ def _filter(_key, value): + def filter_generated_node_ids(msg): + return re.sub("#block[0-9]+", "NODE_NAME", msg) + +-def filter_img_info(output, filename): ++def filter_img_info(output: str, filename: str, ++ drop_child_info: bool = True) -> str: + lines = [] ++ drop_indented = False + for line in output.split('\n'): + if 'disk size' in line or 'actual-size' in line: + continue ++ ++ # Drop child node info ++ if drop_indented: ++ if line.startswith(' '): ++ continue ++ drop_indented = False ++ if drop_child_info and "Child node '/" in line: ++ drop_indented = True ++ continue ++ + line = line.replace(filename, 'TEST_IMG') + line = filter_testfiles(line) + line = line.replace(imgfmt, 'IMGFMT') +-- +2.31.1 + diff --git a/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch b/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch new file mode 100644 index 0000000..7f39f4a --- /dev/null +++ b/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch @@ -0,0 +1,67 @@ +From 46ead2c391924b68741d6da28f28f909b80f5914 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:51 +0100 +Subject: [PATCH 01/20] qcow2: Fix theoretical corruption in store_bitmap() + error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2150180 +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefano Garzarella +RH-Commit: [1/4] a6a497947179431567d330d0501247a3749fb9fd (kmwolf/centos-qemu-kvm) + +In order to write the bitmap table to the image file, it is converted to +big endian. If the write fails, it is passed to clear_bitmap_table() to +free all of the clusters it had allocated before. However, if we don't +convert it back to native endianness first, we'll free things at a wrong +offset. + +In practical terms, the offsets will be so high that we won't actually +free any allocated clusters, but just run into an error, but in theory +this can cause image corruption. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-2-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit b03dd9613bcf8fe948581b2b3585510cb525c382) +Signed-off-by: Kevin Wolf +--- + block/qcow2-bitmap.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c +index bcad567c0c..3dff99ba06 100644 +--- a/block/qcow2-bitmap.c ++++ b/block/qcow2-bitmap.c +@@ -115,7 +115,7 @@ static int update_header_sync(BlockDriverState *bs) + return bdrv_flush(bs->file->bs); + } + +-static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size) ++static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size) + { + size_t i; + +@@ -1401,9 +1401,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) + goto fail; + } + +- bitmap_table_to_be(tb, tb_size); ++ bitmap_table_bswap_be(tb, tb_size); + ret = bdrv_pwrite(bs->file, tb_offset, tb_size * sizeof(tb[0]), tb, 0); + if (ret < 0) { ++ bitmap_table_bswap_be(tb, tb_size); + error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", + bm_name); + goto fail; +-- +2.31.1 + diff --git a/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch b/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch new file mode 100644 index 0000000..eff4d2e --- /dev/null +++ b/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch @@ -0,0 +1,197 @@ +From b1970c733dc46b2a8f648997a7e1c5d12900ff54 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:04 +0200 +Subject: [PATCH 17/20] qemu-img: Change info key names for protocol nodes + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [12/12] 67c260aaa05466410503fecee6210bf9d47e8c7c (hreitz/qemu-kvm-c-9-s) + +Currently, when querying a qcow2 image, qemu-img info reports something +like this: + +image: test.qcow2 +file format: qcow2 +virtual size: 64 MiB (67108864 bytes) +disk size: 196 KiB +cluster_size: 65536 +Format specific information: + compat: 1.1 + compression type: zlib + lazy refcounts: false + refcount bits: 16 + corrupt: false + extended l2: false +Child node '/file': + image: test.qcow2 + file format: file + virtual size: 192 KiB (197120 bytes) + disk size: 196 KiB + Format specific information: + extent size hint: 1048576 + +Notably, the way the keys are named is specific for image files: The +filename is shown under "image", the BDS driver under "file format", and +the BDS length under "virtual size". This does not make much sense for +nodes that are not actually supposed to be guest images, like the /file +child node shown above. + +Give bdrv_node_info_dump() a @protocol parameter that gives a hint that +the respective node is probably just used for data storage and does not +necessarily present the data for a VM guest disk. This renames the keys +so that with this patch, the output becomes: + +image: test.qcow2 +[...] +Child node '/file': + filename: test.qcow2 + protocol type: file + file length: 192 KiB (197120 bytes) + disk size: 196 KiB + Format specific information: + extent size hint: 1048576 + +(Perhaps we should also rename "Format specific information", but I +could not come up with anything better that will not become problematic +if we guess wrong with the protocol "heuristic".) + +This change affects iotest 302, which has protocol node information in +its reference output. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-13-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit d570177b50c389f379f93183155a27d44856ab46) +Signed-off-by: Hanna Czenczek +--- + block/monitor/block-hmp-cmds.c | 2 +- + block/qapi.c | 39 ++++++++++++++++++++++++++++------ + include/block/qapi.h | 2 +- + qemu-img.c | 3 ++- + tests/qemu-iotests/302.out | 6 +++--- + 5 files changed, 39 insertions(+), 13 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index 72824d4e2e..4d83339a5d 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, + monitor_printf(mon, "\nImages:\n"); + image_info = inserted->image; + while (1) { +- bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); ++ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0, false); + if (image_info->has_backing_image) { + image_info = image_info->backing_image; + } else { +diff --git a/block/qapi.c b/block/qapi.c +index 3e35603f0c..56f398c500 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -934,24 +934,49 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + visit_free(v); + } + +-void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) ++/** ++ * Print the given @info object in human-readable form. Every field is indented ++ * using the given @indentation (four spaces per indentation level). ++ * ++ * When using this to print a whole block graph, @protocol can be set to true to ++ * signify that the given information is associated with a protocol node, i.e. ++ * just data storage for an image, such that the data it presents is not really ++ * a full VM disk. If so, several fields change name: For example, "virtual ++ * size" is printed as "file length". ++ * (Consider a qcow2 image, which is represented by a qcow2 node and a file ++ * node. Printing a "virtual size" for the file node does not make sense, ++ * because without the qcow2 node, it is not really a guest disk, so it does not ++ * have a "virtual size". Therefore, we call it "file length" instead.) ++ * ++ * @protocol is ignored when @indentation is 0, because we take that to mean ++ * that the associated node is the root node in the queried block graph, and ++ * thus is always to be interpreted as a standalone guest disk. ++ */ ++void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol) + { + char *size_buf, *dsize_buf; + g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); + ++ if (indentation == 0) { ++ /* Top level, consider this a normal image */ ++ protocol = false; ++ } ++ + if (!info->has_actual_size) { + dsize_buf = g_strdup("unavailable"); + } else { + dsize_buf = size_to_str(info->actual_size); + } + size_buf = size_to_str(info->virtual_size); +- qemu_printf("%simage: %s\n" +- "%sfile format: %s\n" +- "%svirtual size: %s (%" PRId64 " bytes)\n" ++ qemu_printf("%s%s: %s\n" ++ "%s%s: %s\n" ++ "%s%s: %s (%" PRId64 " bytes)\n" + "%sdisk size: %s\n", +- ind_s, info->filename, +- ind_s, info->format, +- ind_s, size_buf, info->virtual_size, ++ ind_s, protocol ? "filename" : "image", info->filename, ++ ind_s, protocol ? "protocol type" : "file format", ++ info->format, ++ ind_s, protocol ? "file length" : "virtual size", ++ size_buf, info->virtual_size, + ind_s, dsize_buf); + g_free(size_buf); + g_free(dsize_buf); +diff --git a/include/block/qapi.h b/include/block/qapi.h +index 38855f2ae9..26113da21a 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -51,5 +51,5 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + const char *prefix, + int indentation); +-void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); ++void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol); + #endif +diff --git a/qemu-img.c b/qemu-img.c +index e281011245..2943625c67 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2853,7 +2853,8 @@ static void dump_human_image_info(BlockGraphInfo *info, int indentation, + { + BlockChildInfoList *children_list; + +- bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); ++ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation, ++ info->children == NULL); + + for (children_list = info->children; children_list; + children_list = children_list->next) +diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out +index edfa1c4f05..7b5014cdd8 100644 +--- a/tests/qemu-iotests/302.out ++++ b/tests/qemu-iotests/302.out +@@ -5,9 +5,9 @@ file format: raw + virtual size: 448 KiB (458752 bytes) + disk size: unavailable + Child node '/file': +- image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock +- file format: nbd +- virtual size: 448 KiB (458752 bytes) ++ filename: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock ++ protocol type: nbd ++ file length: 448 KiB (458752 bytes) + disk size: unavailable + + === Converted image info === +-- +2.31.1 + diff --git a/kvm-qemu-img-Let-info-print-block-graph.patch b/kvm-qemu-img-Let-info-print-block-graph.patch new file mode 100644 index 0000000..536df69 --- /dev/null +++ b/kvm-qemu-img-Let-info-print-block-graph.patch @@ -0,0 +1,261 @@ +From ea73e9de42b446ce1049805c23f7706e4f87ed1f Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:27:03 +0200 +Subject: [PATCH 16/20] qemu-img: Let info print block graph + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [11/12] 2c1b8a03c918484449e876acf4c6663766848ad8 (hreitz/qemu-kvm-c-9-s) + +For every node in the backing chain, collect its BlockGraphInfo struct +using bdrv_query_block_graph_info(). Print all nodes' information, +indenting child nodes and labelling them with a path constructed from +the child names leading to the node from the root (e.g. /file/file). + +Note that we open each image with BDRV_O_NO_BACKING, so its backing +child is omitted from this graph, and thus presented in the previous +manner: By simply concatenating all images' information, separated with +blank lines. + +This affects two iotests: +- 065: Here we try to get the format node's format specific information. + The pre-patch code does so by taking all lines from "Format specific + information:" until an empty line. This format specific information + is no longer followed by an empty line, though, but by child node + information, so limit the range by "Child node '/file':". +- 302: Calls qemu_img() for qemu-img info directly, which does not + filter the output, so the child node information ends up in the + output. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-12-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit c04d0ab026201d21873a63f768cb69c4554dfec1) +Signed-off-by: Hanna Czenczek +--- + qapi/block-core.json | 4 +-- + qemu-img.c | 69 ++++++++++++++++++++++++++------------ + tests/qemu-iotests/065 | 2 +- + tests/qemu-iotests/302.out | 5 +++ + 4 files changed, 56 insertions(+), 24 deletions(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index d703e0fb16..7f331eb8ea 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -5831,9 +5831,9 @@ + ## + # @DummyBlockCoreForceArrays: + # +-# Not used by QMP; hack to let us use BlockNodeInfoList internally ++# Not used by QMP; hack to let us use BlockGraphInfoList internally + # + # Since: 8.0 + ## + { 'struct': 'DummyBlockCoreForceArrays', +- 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } ++ 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } } +diff --git a/qemu-img.c b/qemu-img.c +index 30b4ea58bb..e281011245 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) + g_free(sn_tab); + } + +-static void dump_json_block_node_info_list(BlockNodeInfoList *list) ++static void dump_json_block_graph_info_list(BlockGraphInfoList *list) + { + GString *str; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + +- visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); ++ visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort); + visit_complete(v, &obj); + str = qobject_to_json_pretty(obj, true); + assert(str != NULL); +@@ -2832,13 +2832,13 @@ static void dump_json_block_node_info_list(BlockNodeInfoList *list) + g_string_free(str, true); + } + +-static void dump_json_block_node_info(BlockNodeInfo *info) ++static void dump_json_block_graph_info(BlockGraphInfo *info) + { + GString *str; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + +- visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); ++ visit_type_BlockGraphInfo(v, NULL, &info, &error_abort); + visit_complete(v, &obj); + str = qobject_to_json_pretty(obj, true); + assert(str != NULL); +@@ -2848,9 +2848,29 @@ static void dump_json_block_node_info(BlockNodeInfo *info) + g_string_free(str, true); + } + +-static void dump_human_image_info_list(BlockNodeInfoList *list) ++static void dump_human_image_info(BlockGraphInfo *info, int indentation, ++ const char *path) + { +- BlockNodeInfoList *elem; ++ BlockChildInfoList *children_list; ++ ++ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); ++ ++ for (children_list = info->children; children_list; ++ children_list = children_list->next) ++ { ++ BlockChildInfo *child = children_list->value; ++ g_autofree char *child_path = NULL; ++ ++ printf("%*sChild node '%s%s':\n", ++ indentation * 4, "", path, child->name); ++ child_path = g_strdup_printf("%s%s/", path, child->name); ++ dump_human_image_info(child->info, indentation + 1, child_path); ++ } ++} ++ ++static void dump_human_image_info_list(BlockGraphInfoList *list) ++{ ++ BlockGraphInfoList *elem; + bool delim = false; + + for (elem = list; elem; elem = elem->next) { +@@ -2859,7 +2879,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) + } + delim = true; + +- bdrv_node_info_dump(elem->value, 0); ++ dump_human_image_info(elem->value, 0, "/"); + } + } + +@@ -2869,7 +2889,7 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) + } + + /** +- * Open an image file chain and return an BlockNodeInfoList ++ * Open an image file chain and return an BlockGraphInfoList + * + * @filename: topmost image filename + * @fmt: topmost image format (may be NULL to autodetect) +@@ -2880,13 +2900,13 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) + * opening an image file. If there was an error a message will have been + * printed to stderr. + */ +-static BlockNodeInfoList *collect_image_info_list(bool image_opts, +- const char *filename, +- const char *fmt, +- bool chain, bool force_share) ++static BlockGraphInfoList *collect_image_info_list(bool image_opts, ++ const char *filename, ++ const char *fmt, ++ bool chain, bool force_share) + { +- BlockNodeInfoList *head = NULL; +- BlockNodeInfoList **tail = &head; ++ BlockGraphInfoList *head = NULL; ++ BlockGraphInfoList **tail = &head; + GHashTable *filenames; + Error *err = NULL; + +@@ -2895,7 +2915,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, + while (filename) { + BlockBackend *blk; + BlockDriverState *bs; +- BlockNodeInfo *info; ++ BlockGraphInfo *info; + + if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { + error_report("Backing file '%s' creates an infinite loop.", +@@ -2912,7 +2932,14 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, + } + bs = blk_bs(blk); + +- bdrv_query_block_node_info(bs, &info, &err); ++ /* ++ * Note that the returned BlockGraphInfo object will not have ++ * information about this image's backing node, because we have opened ++ * it with BDRV_O_NO_BACKING. Printing this object will therefore not ++ * duplicate the backing chain information that we obtain by walking ++ * the chain manually here. ++ */ ++ bdrv_query_block_graph_info(bs, &info, &err); + if (err) { + error_report_err(err); + blk_unref(blk); +@@ -2945,7 +2972,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, + return head; + + err: +- qapi_free_BlockNodeInfoList(head); ++ qapi_free_BlockGraphInfoList(head); + g_hash_table_destroy(filenames); + return NULL; + } +@@ -2956,7 +2983,7 @@ static int img_info(int argc, char **argv) + OutputFormat output_format = OFORMAT_HUMAN; + bool chain = false; + const char *filename, *fmt, *output; +- BlockNodeInfoList *list; ++ BlockGraphInfoList *list; + bool image_opts = false; + bool force_share = false; + +@@ -3035,14 +3062,14 @@ static int img_info(int argc, char **argv) + break; + case OFORMAT_JSON: + if (chain) { +- dump_json_block_node_info_list(list); ++ dump_json_block_graph_info_list(list); + } else { +- dump_json_block_node_info(list->value); ++ dump_json_block_graph_info(list->value); + } + break; + } + +- qapi_free_BlockNodeInfoList(list); ++ qapi_free_BlockGraphInfoList(list); + return 0; + } + +diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065 +index b724c89c7c..b76701c71e 100755 +--- a/tests/qemu-iotests/065 ++++ b/tests/qemu-iotests/065 +@@ -56,7 +56,7 @@ class TestQemuImgInfo(TestImageInfoSpecific): + def test_human(self): + data = qemu_img('info', '--output=human', test_img).stdout.split('\n') + data = data[(data.index('Format specific information:') + 1) +- :data.index('')] ++ :data.index("Child node '/file':")] + for field in data: + self.assertTrue(re.match('^ {4}[^ ]', field) is not None) + data = [line.strip() for line in data] +diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out +index 3e7c281b91..edfa1c4f05 100644 +--- a/tests/qemu-iotests/302.out ++++ b/tests/qemu-iotests/302.out +@@ -4,6 +4,11 @@ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock + file format: raw + virtual size: 448 KiB (458752 bytes) + disk size: unavailable ++Child node '/file': ++ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock ++ file format: nbd ++ virtual size: 448 KiB (458752 bytes) ++ disk size: unavailable + + === Converted image info === + image: TEST_IMG +-- +2.31.1 + diff --git a/kvm-qemu-img-Use-BlockNodeInfo.patch b/kvm-qemu-img-Use-BlockNodeInfo.patch new file mode 100644 index 0000000..7bfb7e6 --- /dev/null +++ b/kvm-qemu-img-Use-BlockNodeInfo.patch @@ -0,0 +1,241 @@ +From dca4cbe680baff837ca8ac8bd39b77b46af3f64b Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 20 Jun 2022 18:26:57 +0200 +Subject: [PATCH 10/20] qemu-img: Use BlockNodeInfo + +RH-Author: Hanna Czenczek +RH-MergeRequest: 145: Show protocol-level information in qemu-img info +RH-Bugzilla: 1860292 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [5/12] b599af3ec05951a0ba11d9eae2ee19148d6bf624 (hreitz/qemu-kvm-c-9-s) + +qemu-img info never uses ImageInfo's backing-image field, because it +opens the backing chain one by one with BDRV_O_NO_BACKING, and prints +all backing chain nodes' information consecutively. Use BlockNodeInfo +to make it clear that we only print information about a single node, and +that we are not using the backing-image field. + +Notably, bdrv_image_info_dump() does not evaluate the backing-image +field, so we can easily make it take a BlockNodeInfo pointer (and +consequentially rename it to bdrv_node_info_dump()). It makes more +sense this way, because again, the interface now makes it syntactically +clear that backing-image is ignored by this function. + +Signed-off-by: Hanna Reitz +Message-Id: <20220620162704.80987-6-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit b1f4cd1589a16fec02f264a09bd3560e4ccce3c2) +Signed-off-by: Hanna Czenczek +--- + block/monitor/block-hmp-cmds.c | 2 +- + block/qapi.c | 2 +- + include/block/qapi.h | 2 +- + qapi/block-core.json | 4 +-- + qemu-img.c | 48 +++++++++++++++++----------------- + 5 files changed, 29 insertions(+), 29 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index b6135e9bfe..aa37faa601 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, + monitor_printf(mon, "\nImages:\n"); + image_info = inserted->image; + while (1) { +- bdrv_image_info_dump(image_info); ++ bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); + if (image_info->has_backing_image) { + image_info = image_info->backing_image; + } else { +diff --git a/block/qapi.c b/block/qapi.c +index e5022b4481..ad88bf9b38 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -865,7 +865,7 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + visit_free(v); + } + +-void bdrv_image_info_dump(ImageInfo *info) ++void bdrv_node_info_dump(BlockNodeInfo *info) + { + char *size_buf, *dsize_buf; + if (!info->has_actual_size) { +diff --git a/include/block/qapi.h b/include/block/qapi.h +index c7de4e3fa9..22198dcd0c 100644 +--- a/include/block/qapi.h ++++ b/include/block/qapi.h +@@ -45,5 +45,5 @@ void bdrv_query_image_info(BlockDriverState *bs, + void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); + void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, + const char *prefix); +-void bdrv_image_info_dump(ImageInfo *info); ++void bdrv_node_info_dump(BlockNodeInfo *info); + #endif +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 7720da0498..4cf2deeb6c 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -5796,9 +5796,9 @@ + ## + # @DummyBlockCoreForceArrays: + # +-# Not used by QMP; hack to let us use ImageInfoList internally ++# Not used by QMP; hack to let us use BlockNodeInfoList internally + # + # Since: 8.0 + ## + { 'struct': 'DummyBlockCoreForceArrays', +- 'data': { 'unused-image-info': ['ImageInfo'] } } ++ 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } +diff --git a/qemu-img.c b/qemu-img.c +index 2f85bb7ede..3b2ca3bbcb 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) + g_free(sn_tab); + } + +-static void dump_json_image_info_list(ImageInfoList *list) ++static void dump_json_block_node_info_list(BlockNodeInfoList *list) + { + GString *str; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + +- visit_type_ImageInfoList(v, NULL, &list, &error_abort); ++ visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); + visit_complete(v, &obj); + str = qobject_to_json_pretty(obj, true); + assert(str != NULL); +@@ -2832,13 +2832,13 @@ static void dump_json_image_info_list(ImageInfoList *list) + g_string_free(str, true); + } + +-static void dump_json_image_info(ImageInfo *info) ++static void dump_json_block_node_info(BlockNodeInfo *info) + { + GString *str; + QObject *obj; + Visitor *v = qobject_output_visitor_new(&obj); + +- visit_type_ImageInfo(v, NULL, &info, &error_abort); ++ visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); + visit_complete(v, &obj); + str = qobject_to_json_pretty(obj, true); + assert(str != NULL); +@@ -2848,9 +2848,9 @@ static void dump_json_image_info(ImageInfo *info) + g_string_free(str, true); + } + +-static void dump_human_image_info_list(ImageInfoList *list) ++static void dump_human_image_info_list(BlockNodeInfoList *list) + { +- ImageInfoList *elem; ++ BlockNodeInfoList *elem; + bool delim = false; + + for (elem = list; elem; elem = elem->next) { +@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(ImageInfoList *list) + } + delim = true; + +- bdrv_image_info_dump(elem->value); ++ bdrv_node_info_dump(elem->value); + } + } + +@@ -2869,24 +2869,24 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) + } + + /** +- * Open an image file chain and return an ImageInfoList ++ * Open an image file chain and return an BlockNodeInfoList + * + * @filename: topmost image filename + * @fmt: topmost image format (may be NULL to autodetect) + * @chain: true - enumerate entire backing file chain + * false - only topmost image file + * +- * Returns a list of ImageInfo objects or NULL if there was an error opening an +- * image file. If there was an error a message will have been printed to +- * stderr. ++ * Returns a list of BlockNodeInfo objects or NULL if there was an error ++ * opening an image file. If there was an error a message will have been ++ * printed to stderr. + */ +-static ImageInfoList *collect_image_info_list(bool image_opts, +- const char *filename, +- const char *fmt, +- bool chain, bool force_share) ++static BlockNodeInfoList *collect_image_info_list(bool image_opts, ++ const char *filename, ++ const char *fmt, ++ bool chain, bool force_share) + { +- ImageInfoList *head = NULL; +- ImageInfoList **tail = &head; ++ BlockNodeInfoList *head = NULL; ++ BlockNodeInfoList **tail = &head; + GHashTable *filenames; + Error *err = NULL; + +@@ -2895,7 +2895,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, + while (filename) { + BlockBackend *blk; + BlockDriverState *bs; +- ImageInfo *info; ++ BlockNodeInfo *info; + + if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { + error_report("Backing file '%s' creates an infinite loop.", +@@ -2912,7 +2912,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, + } + bs = blk_bs(blk); + +- bdrv_query_image_info(bs, &info, &err); ++ bdrv_query_block_node_info(bs, &info, &err); + if (err) { + error_report_err(err); + blk_unref(blk); +@@ -2945,7 +2945,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, + return head; + + err: +- qapi_free_ImageInfoList(head); ++ qapi_free_BlockNodeInfoList(head); + g_hash_table_destroy(filenames); + return NULL; + } +@@ -2956,7 +2956,7 @@ static int img_info(int argc, char **argv) + OutputFormat output_format = OFORMAT_HUMAN; + bool chain = false; + const char *filename, *fmt, *output; +- ImageInfoList *list; ++ BlockNodeInfoList *list; + bool image_opts = false; + bool force_share = false; + +@@ -3035,14 +3035,14 @@ static int img_info(int argc, char **argv) + break; + case OFORMAT_JSON: + if (chain) { +- dump_json_image_info_list(list); ++ dump_json_block_node_info_list(list); + } else { +- dump_json_image_info(list->value); ++ dump_json_block_node_info(list->value); + } + break; + } + +- qapi_free_ImageInfoList(list); ++ qapi_free_BlockNodeInfoList(list); + return 0; + } + +-- +2.31.1 + diff --git a/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch b/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch new file mode 100644 index 0000000..693049c --- /dev/null +++ b/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch @@ -0,0 +1,70 @@ +From d0d3d694b3a8d200442484ae0c9d263e0439cd04 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:53 +0100 +Subject: [PATCH 03/20] qemu-img bitmap: Report errors while closing the image +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2150180 +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefano Garzarella +RH-Commit: [3/4] 4a704fec2e3bcb47b2be1529e27fd1833d58c517 (kmwolf/centos-qemu-kvm) + +blk_unref() can't report any errors that happen while closing the image. +For example, if qcow2 hits an -ENOSPC error while writing out dirty +bitmaps when it's closed, it prints error messages to stderr, but +'qemu-img bitmap' won't see any error return value and will therefore +look successful with exit code 0. + +In order to fix this, manually inactivate the image first before calling +blk_unref(). This already performs the operations that would be most +likely to fail while closing the image, but it can still return errors. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1330 +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-4-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit c5e477110dcb8ef4642dce399777c3dee68fa96c) +Signed-off-by: Kevin Wolf +--- + qemu-img.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/qemu-img.c b/qemu-img.c +index 3cbdda9f76..2f85bb7ede 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -4646,6 +4646,7 @@ static int img_bitmap(int argc, char **argv) + QSIMPLEQ_HEAD(, ImgBitmapAction) actions; + ImgBitmapAction *act, *act_next; + const char *op; ++ int inactivate_ret; + + QSIMPLEQ_INIT(&actions); + +@@ -4830,6 +4831,16 @@ static int img_bitmap(int argc, char **argv) + ret = 0; + + out: ++ /* ++ * Manually inactivate the images first because this way we can know whether ++ * an error occurred. blk_unref() doesn't tell us about failures. ++ */ ++ inactivate_ret = bdrv_inactivate_all(); ++ if (inactivate_ret < 0) { ++ error_report("Error while closing the image: %s", strerror(-inactivate_ret)); ++ ret = 1; ++ } ++ + blk_unref(src); + blk_unref(blk); + qemu_opts_del(opts); +-- +2.31.1 + diff --git a/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch b/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch new file mode 100644 index 0000000..5cac3ba --- /dev/null +++ b/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch @@ -0,0 +1,67 @@ +From 2f5369f0effaa23be746f9b5d9f6a0bfc346fb7d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:52 +0100 +Subject: [PATCH 02/20] qemu-img commit: Report errors while closing the image + +RH-Author: Kevin Wolf +RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2150180 +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefano Garzarella +RH-Commit: [2/4] faedd43355463b1210a3f21ecd430f478bd06f5a (kmwolf/centos-qemu-kvm) + +blk_unref() can't report any errors that happen while closing the image. +For example, if qcow2 hits an -ENOSPC error while writing out dirty +bitmaps when it's closed, it prints error messages to stderr, but +'qemu-img commit' won't see any error return value and will therefore +look successful with exit code 0. + +In order to fix this, manually inactivate the image first before calling +blk_unref(). This already performs the operations that would be most +likely to fail while closing the image, but it can still return errors. + +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-3-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit 44efba2d713aca076c411594d0c1a2b99155eeb3) +Signed-off-by: Kevin Wolf +--- + qemu-img.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/qemu-img.c b/qemu-img.c +index a9b3a8103c..3cbdda9f76 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -449,6 +449,11 @@ static BlockBackend *img_open(bool image_opts, + blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet, + force_share); + } ++ ++ if (blk) { ++ blk_set_force_allow_inactivate(blk); ++ } ++ + return blk; + } + +@@ -1119,6 +1124,14 @@ unref_backing: + done: + qemu_progress_end(); + ++ /* ++ * Manually inactivate the image first because this way we can know whether ++ * an error occurred. blk_unref() doesn't tell us about failures. ++ */ ++ ret = bdrv_inactivate_all(); ++ if (ret < 0 && !local_err) { ++ error_setg_errno(&local_err, -ret, "Error while closing the image"); ++ } + blk_unref(blk); + + if (local_err) { +-- +2.31.1 + diff --git a/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch b/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch new file mode 100644 index 0000000..6b88e5c --- /dev/null +++ b/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch @@ -0,0 +1,166 @@ +From 06030aa79fcb2d90d6a670e75d959aa0c3204b5c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:54 +0100 +Subject: [PATCH 04/20] qemu-iotests: Test qemu-img bitmap/commit exit code on + error + +RH-Author: Kevin Wolf +RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2150180 +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefano Garzarella +RH-Commit: [4/4] b96bb671bcfb7ae18015fda14db70f42a83a6ea7 (kmwolf/centos-qemu-kvm) + +This tests that when an error happens while writing back bitmaps to the +image file in qcow2_inactivate(), 'qemu-img bitmap/commit' actually +return an error value in their exit code instead of making the operation +look successful to scripts. + +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-5-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit 07a4e1f8e5418f36424cd57d5d061b090a238c65) +Signed-off-by: Kevin Wolf +--- + .../qemu-iotests/tests/qemu-img-close-errors | 96 +++++++++++++++++++ + .../tests/qemu-img-close-errors.out | 23 +++++ + 2 files changed, 119 insertions(+) + create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors + create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out + +diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors b/tests/qemu-iotests/tests/qemu-img-close-errors +new file mode 100755 +index 0000000000..50bfb6cfa2 +--- /dev/null ++++ b/tests/qemu-iotests/tests/qemu-img-close-errors +@@ -0,0 +1,96 @@ ++#!/usr/bin/env bash ++# group: rw auto quick ++# ++# Check that errors while closing the image, in particular writing back dirty ++# bitmaps, is correctly reported with a failing qemu-img exit code. ++# ++# Copyright (C) 2023 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=kwolf@redhat.com ++ ++seq="$(basename $0)" ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt qcow2 ++_supported_proto file ++_supported_os Linux ++ ++size=1G ++ ++# The error we are going to use is ENOSPC. Depending on how many bitmaps we ++# create in the backing file (and therefore increase the used up space), we get ++# failures in different places. With a low number, only merging the bitmap ++# fails, whereas with a higher number, already 'qemu-img commit' fails. ++for max_bitmap in 6 7; do ++ echo ++ echo "=== Test with $max_bitmap bitmaps ===" ++ ++ TEST_IMG="$TEST_IMG.base" _make_test_img -q $size ++ for i in $(seq 1 $max_bitmap); do ++ $QEMU_IMG bitmap --add "$TEST_IMG.base" "stale-bitmap-$i" ++ done ++ ++ # Simulate a block device of 128 MB by resizing the image file accordingly ++ # and then enforcing the size with the raw driver ++ $QEMU_IO -f raw -c "truncate 128M" "$TEST_IMG.base" ++ BASE_JSON='json:{ ++ "driver": "qcow2", ++ "file": { ++ "driver": "raw", ++ "size": 134217728, ++ "file": { ++ "driver": "file", ++ "filename":"'"$TEST_IMG.base"'" ++ } ++ } ++ }' ++ ++ _make_test_img -q -b "$BASE_JSON" -F $IMGFMT ++ $QEMU_IMG bitmap --add "$TEST_IMG" "good-bitmap" ++ ++ $QEMU_IO -c 'write 0 126m' "$TEST_IMG" | _filter_qemu_io ++ ++ $QEMU_IMG commit -d "$TEST_IMG" 2>&1 | _filter_generated_node_ids ++ echo "qemu-img commit exit code: ${PIPESTATUS[0]}" ++ ++ $QEMU_IMG bitmap --add "$BASE_JSON" "good-bitmap" ++ echo "qemu-img bitmap --add exit code: $?" ++ ++ $QEMU_IMG bitmap --merge "good-bitmap" -b "$TEST_IMG" "$BASE_JSON" \ ++ "good-bitmap" 2>&1 | _filter_generated_node_ids ++ echo "qemu-img bitmap --merge exit code: ${PIPESTATUS[0]}" ++done ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 ++ +diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors.out b/tests/qemu-iotests/tests/qemu-img-close-errors.out +new file mode 100644 +index 0000000000..1bfe88f176 +--- /dev/null ++++ b/tests/qemu-iotests/tests/qemu-img-close-errors.out +@@ -0,0 +1,23 @@ ++QA output created by qemu-img-close-errors ++ ++=== Test with 6 bitmaps === ++wrote 132120576/132120576 bytes at offset 0 ++126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++Image committed. ++qemu-img commit exit code: 0 ++qemu-img bitmap --add exit code: 0 ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device ++qemu-img: Error while closing the image: Invalid argument ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device ++qemu-img bitmap --merge exit code: 1 ++ ++=== Test with 7 bitmaps === ++wrote 132120576/132120576 bytes at offset 0 ++126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device ++qemu-img: Error while closing the image: Invalid argument ++qemu-img commit exit code: 1 ++qemu-img bitmap --add exit code: 0 ++qemu-img bitmap --merge exit code: 0 ++*** done +-- +2.31.1 + diff --git a/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch b/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch new file mode 100644 index 0000000..e5288d6 --- /dev/null +++ b/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch @@ -0,0 +1,47 @@ +From 5413b8825db6eecc6f245854a6bce58e4dee3294 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 7 Feb 2023 17:57:39 +0000 +Subject: [PATCH 20/20] virtio-rng-pci: fix transitional migration compat for + vectors + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 147: virtio-rng-pci: fix transitional migration compat for vectors +RH-Bugzilla: 2162569 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Gerd Hoffmann +RH-Commit: [1/1] 6e2bd111cd56808fccf2c0464a40f7784fd893a2 (dagrh/c-9-s-qemu-kvm) + +In upstream bad9c5a5166/downstream 46e08bafe9ed I fixed the virito-rng-pci +migration compatibility, but it was discovered that we also need to fix +the other aliases of the device for the transitional cases. + +I've sent upstream: +https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg01926.html +but downstream we need to change the downstream machine type anyway, +so it's not quite identical. + +Fixes: 9ea02e8f1 ('virtio-rng-pci: Allow setting nvectors, so we can use MSI-X') + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/core/machine.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 7adbac6f87..3ee638394b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -58,6 +58,9 @@ GlobalProperty hw_compat_rhel_9_1[] = { + { "virtio-device", "queue_reset", "false" }, + /* hw_compat_rhel_9_1 bz 2155749 */ + { "virtio-rng-pci", "vectors", "0" }, ++ /* hw_compat_rhel_9_1 bz 2162569 */ ++ { "virtio-rng-pci-transitional", "vectors", "0" }, ++ { "virtio-rng-pci-non-transitional", "vectors", "0" }, + }; + const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); + +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 6dedf40..a9baa8e 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 7%{?rcrel}%{?dist}%{?cc_suffix} +Release: 8%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -316,6 +316,46 @@ Patch83: kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch Patch84: kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch # For bz#2104412 - vDPA ASID support in Qemu Patch85: kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch +# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch86: kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch +# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch87: kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch +# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch88: kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch +# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch89: kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch +# For bz#2165280 - [kvm-unit-tests] debug-wp-migration fails +Patch90: kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch91: kvm-block-Improve-empty-format-specific-info-dump.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch92: kvm-block-file-Add-file-specific-image-info.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch93: kvm-block-vmdk-Change-extent-info-type.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch94: kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch95: kvm-qemu-img-Use-BlockNodeInfo.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch96: kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch97: kvm-block-qapi-Introduce-BlockGraphInfo.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch98: kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch99: kvm-iotests-Filter-child-node-information.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch100: kvm-iotests-106-214-308-Read-only-one-size-line.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch101: kvm-qemu-img-Let-info-print-block-graph.patch +# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info +Patch102: kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch +# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace +Patch103: kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch +# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace +Patch104: kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch +# For bz#2162569 - [transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2 +Patch105: kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch %if %{have_clang} BuildRequires: clang @@ -1346,6 +1386,38 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Feb 09 2023 Miroslav Rezanina - 7.2.0-8 +- kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch [bz#2150180] +- kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch [bz#2150180] +- kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch [bz#2150180] +- kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch [bz#2150180] +- kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch [bz#2165280] +- kvm-block-Improve-empty-format-specific-info-dump.patch [bz#1860292] +- kvm-block-file-Add-file-specific-image-info.patch [bz#1860292] +- kvm-block-vmdk-Change-extent-info-type.patch [bz#1860292] +- kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch [bz#1860292] +- kvm-qemu-img-Use-BlockNodeInfo.patch [bz#1860292] +- kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch [bz#1860292] +- kvm-block-qapi-Introduce-BlockGraphInfo.patch [bz#1860292] +- kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch [bz#1860292] +- kvm-iotests-Filter-child-node-information.patch [bz#1860292] +- kvm-iotests-106-214-308-Read-only-one-size-line.patch [bz#1860292] +- kvm-qemu-img-Let-info-print-block-graph.patch [bz#1860292] +- kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch [bz#1860292] +- kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch [bz#2155173] +- kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch [bz#2155173] +- kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch [bz#2162569] +- Resolves: bz#2150180 + (qemu-img finishes successfully while having errors in commit or bitmaps operations) +- Resolves: bz#2165280 + ([kvm-unit-tests] debug-wp-migration fails) +- Resolves: bz#1860292 + (RFE: add extent_size_hint information to qemu-img info) +- Resolves: bz#2155173 + ([vhost-user] unable to start vhost net: 71: falling back on userspace) +- Resolves: bz#2162569 + ([transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2) + * Mon Feb 06 2023 Miroslav Rezanina - 7.2.0-7 - kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch [bz#2104412] - kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch [bz#2104412] From 382f65f59d97541bf005a4a2205fd08469b76ace Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 17 Feb 2023 02:08:06 -0500 Subject: [PATCH 183/195] * Fri Feb 17 2023 Miroslav Rezanina - 7.2.0-9 - kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch [bz#2169232] - kvm-net-stream-add-a-new-option-to-automatically-reconne.patch [bz#2169232] - kvm-linux-headers-Update-to-v6.1.patch [bz#2158704] - kvm-util-userfaultfd-Add-uffd_open.patch [bz#2158704] - kvm-util-userfaultfd-Support-dev-userfaultfd.patch [bz#2158704] - kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch [bz#2169732] - kvm-migration-check-magic-value-for-deciding-the-mapping.patch [bz#2169732] - kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch [bz#2168172] - Resolves: bz#2169232 (RFE: reconnect option for stream socket back-end) - Resolves: bz#2158704 (RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall) - Resolves: bz#2169732 (Multifd migration fails under a weak network/socket ordering race) - Resolves: bz#2168172 ([s390x] qemu-kvm coredumps when SE crashes) --- ...port-for-MSG_PEEK-for-socket-channel.patch | 386 ++++++++++++ kvm-linux-headers-Update-to-v6.1.patch | 577 ++++++++++++++++++ ...magic-value-for-deciding-the-mapping.patch | 330 ++++++++++ ...-new-option-to-automatically-reconne.patch | 325 ++++++++++ ...h_dump-Fix-memory-corruption-in-s390.patch | 50 ++ ...etdev-test-stream-and-dgram-backends.patch | 505 +++++++++++++++ kvm-util-userfaultfd-Add-uffd_open.patch | 169 +++++ ...-userfaultfd-Support-dev-userfaultfd.patch | 94 +++ qemu-kvm.spec | 36 +- 9 files changed, 2471 insertions(+), 1 deletion(-) create mode 100644 kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch create mode 100644 kvm-linux-headers-Update-to-v6.1.patch create mode 100644 kvm-migration-check-magic-value-for-deciding-the-mapping.patch create mode 100644 kvm-net-stream-add-a-new-option-to-automatically-reconne.patch create mode 100644 kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch create mode 100644 kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch create mode 100644 kvm-util-userfaultfd-Add-uffd_open.patch create mode 100644 kvm-util-userfaultfd-Support-dev-userfaultfd.patch diff --git a/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch b/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch new file mode 100644 index 0000000..22abf35 --- /dev/null +++ b/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch @@ -0,0 +1,386 @@ +From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:17 +0000 +Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders +RH-Bugzilla: 2169732 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm) + +MSG_PEEK peeks at the channel, The data is treated as unread and +the next read shall still return this data. This support is +currently added only for socket class. Extra parameter 'flags' +is added to io_readv calls to pass extra read flags like MSG_PEEK. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3) +Signed-off-by: Peter Xu +--- + chardev/char-socket.c | 4 ++-- + include/io/channel.h | 6 ++++++ + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-null.c | 1 + + io/channel-socket.c | 19 ++++++++++++++++++- + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 16 ++++++++++++---- + migration/channel-block.c | 1 + + migration/rdma.c | 1 + + scsi/qemu-pr-helper.c | 2 +- + tests/qtest/tpm-emu.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + util/vhost-user-server.c | 2 +- + 16 files changed, 50 insertions(+), 10 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 879564aa8a..5afce9a464 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len) + if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + &msgfds, &msgfds_num, +- NULL); ++ 0, NULL); + } else { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + NULL, NULL, +- NULL); ++ 0, NULL); + } + + if (msgfds_num) { +diff --git a/include/io/channel.h b/include/io/channel.h +index c680ee7480..716235d496 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 + ++#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { +@@ -41,6 +43,7 @@ enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, + QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK, + }; + + +@@ -114,6 +117,7 @@ struct QIOChannelClass { + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + int (*io_close)(QIOChannel *ioc, + Error **errp); +@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: pointer to an array that will received file handles + * @nfds: pointer filled with number of elements in @fds on return ++ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Read data from the IO channel, storing it in the +@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + + +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index bf52011be2..8096180f85 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index 74516252ba..e7edd091af 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index b67687c2aa..d76663e6ae 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-null.c b/io/channel-null.c +index 75e3781507..4fafdb770d 100644 +--- a/io/channel-null.c ++++ b/io/channel-null.c +@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc, + size_t niov, + int **fds G_GNUC_UNUSED, + size_t *nfds G_GNUC_UNUSED, ++ int flags, + Error **errp) + { + QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index b76dca9cc1..7aca84f61a 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + } + #endif + ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + return 0; + } + +@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + } + #endif /* WIN32 */ + ++ qio_channel_set_feature(QIO_CHANNEL(cioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); + return cioc; + +@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + + } + ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } ++ + retry: + ret = recvmsg(sioc->fd, &msg, sflags); + if (ret < 0) { +@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t done = 0; + ssize_t i; ++ int sflags = 0; ++ ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } + + for (i = 0; i < niov; i++) { + ssize_t ret; +@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + ret = recv(sioc->fd, + iov[i].iov_base, + iov[i].iov_len, +- 0); ++ sflags); + if (ret < 0) { + if (errno == EAGAIN) { + if (done) { +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 4ce890a538..c730cb8ec5 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index fb4932ade7..a12acc27cf 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index 0640941ac5..a8c7f11649 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); +@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + return -1; + } + +- return klass->io_readv(ioc, iov, niov, fds, nfds, errp); ++ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support peek read"); ++ return -1; ++ } ++ ++ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc, + while ((nlocal_iov > 0) || local_fds) { + ssize_t len; + len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, +- local_nfds, errp); ++ local_nfds, 0, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_IN); +@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp); + } + + +@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = buf, .iov_len = buflen }; +- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp); + } + + +diff --git a/migration/channel-block.c b/migration/channel-block.c +index f4ab53acdb..b7374363c3 100644 +--- a/migration/channel-block.c ++++ b/migration/channel-block.c +@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); +diff --git a/migration/rdma.c b/migration/rdma.c +index 94a55dd95b..d8b4632094 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index 196b78c00d..199227a556 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz, + iov.iov_base = buf; + iov.iov_len = sz; + n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1, +- &fds, &nfds, errp); ++ &fds, &nfds, 0, errp); + + if (n_read == QIO_CHANNEL_ERR_BLOCK) { + qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN); +diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c +index 2994d1cf42..3cf1acaf7d 100644 +--- a/tests/qtest/tpm-emu.c ++++ b/tests/qtest/tpm-emu.c +@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data) + int *pfd = NULL; + size_t nfd = 0; + +- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort); ++ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort); + cmd = be32_to_cpu(cmd); + g_assert_cmpint(cmd, ==, CMD_SET_DATAFD); + g_assert_cmpint(nfd, ==, 1); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index b36a5d972a..b964bb202d 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iorecv), + &fdrecv, + &nfdrecv, ++ 0, + &error_abort); + + g_assert(nfdrecv == G_N_ELEMENTS(fdsend)); +diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c +index 232984ace6..145eb17c08 100644 +--- a/util/vhost-user-server.c ++++ b/util/vhost-user-server.c +@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) + * qio_channel_readv_full may have short reads, keeping calling it + * until getting VHOST_USER_HDR_SIZE or 0 bytes in total + */ +- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err); ++ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err); + if (rc < 0) { + if (rc == QIO_CHANNEL_ERR_BLOCK) { + assert(local_err == NULL); +-- +2.31.1 + diff --git a/kvm-linux-headers-Update-to-v6.1.patch b/kvm-linux-headers-Update-to-v6.1.patch new file mode 100644 index 0000000..6ce9c7d --- /dev/null +++ b/kvm-linux-headers-Update-to-v6.1.patch @@ -0,0 +1,577 @@ +From cbe35c6a4794107ea1ddecf0b381ba4b1c8799f5 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 7 Feb 2023 15:57:10 -0500 +Subject: [PATCH 3/8] linux-headers: Update to v6.1 + +RH-Author: Peter Xu +RH-MergeRequest: 149: Support /dev/userfaultfd +RH-Bugzilla: 2158704 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 15d97026e802a0f01b5f80f81fb4414dc69b2b2d (peterx/qemu-kvm) + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Acked-by: Cornelia Huck +Signed-off-by: Juan Quintela +(cherry picked from commit 93e0932b7be2498024cd6ba8446a0fa2cb1769bc) +Signed-off-by: Peter Xu +--- + include/standard-headers/drm/drm_fourcc.h | 34 ++++- + include/standard-headers/linux/ethtool.h | 63 +++++++- + include/standard-headers/linux/fuse.h | 6 +- + .../linux/input-event-codes.h | 1 + + include/standard-headers/linux/virtio_blk.h | 19 +++ + linux-headers/asm-generic/hugetlb_encode.h | 26 ++-- + linux-headers/asm-generic/mman-common.h | 2 + + linux-headers/asm-mips/mman.h | 2 + + linux-headers/asm-riscv/kvm.h | 4 + + linux-headers/linux/kvm.h | 1 + + linux-headers/linux/psci.h | 14 ++ + linux-headers/linux/userfaultfd.h | 4 + + linux-headers/linux/vfio.h | 142 ++++++++++++++++++ + 13 files changed, 298 insertions(+), 20 deletions(-) + +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index 48b620cbef..b868488f93 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -98,18 +98,42 @@ extern "C" { + #define DRM_FORMAT_INVALID 0 + + /* color index */ ++#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */ ++#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */ ++#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */ + #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ + +-/* 8 bpp Red */ ++/* 1 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */ ++ ++/* 2 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */ ++ ++/* 4 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */ ++ ++/* 8 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */ ++ ++/* 1 bpp Red (direct relationship between channel value and brightness) */ ++#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */ ++ ++/* 2 bpp Red (direct relationship between channel value and brightness) */ ++#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */ ++ ++/* 4 bpp Red (direct relationship between channel value and brightness) */ ++#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */ ++ ++/* 8 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ + +-/* 10 bpp Red */ ++/* 10 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ + +-/* 12 bpp Red */ ++/* 12 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ + +-/* 16 bpp Red */ ++/* 16 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ + + /* 16 bpp RG */ +@@ -204,7 +228,9 @@ extern "C" { + #define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ + + #define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ ++#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */ + #define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ ++#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */ + #define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */ + #define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */ + +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index 4537da20cc..1dc56cdc0a 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -736,6 +736,51 @@ enum ethtool_module_power_mode { + ETHTOOL_MODULE_POWER_MODE_HIGH, + }; + ++/** ++ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE ++ * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState ++ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are ++ * unknown ++ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled ++ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled ++ */ ++enum ethtool_podl_pse_admin_state { ++ ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, ++ ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, ++ ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, ++}; ++ ++/** ++ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. ++ * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is ++ * asserted true when the PoDL PSE state diagram variable mr_pse_enable is ++ * false" ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is ++ * asserted true when either of the PSE state diagram variables ++ * pi_detecting or pi_classifying is true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” ++ * is asserted true when the PoDL PSE state diagram variable pi_powered is ++ * true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted ++ * true when the PoDL PSE state diagram variable pi_sleeping is true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true ++ * when the logical combination of the PoDL PSE state diagram variables ++ * pi_prebiased*!pi_sleeping is true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted ++ * true when the PoDL PSE state diagram variable overload_held is true." ++ */ ++enum ethtool_podl_pse_pw_d_status { ++ ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, ++}; ++ + /** + * struct ethtool_gstrings - string set for data tagging + * @cmd: Command number = %ETHTOOL_GSTRINGS +@@ -1840,6 +1885,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define MASTER_SLAVE_STATE_SLAVE 3 + #define MASTER_SLAVE_STATE_ERR 4 + ++/* These are used to throttle the rate of data on the phy interface when the ++ * native speed of the interface is higher than the link speed. These should ++ * not be used for phy interfaces which natively support multiple speeds (e.g. ++ * MII or SGMII). ++ */ ++/* No rate matching performed. */ ++#define RATE_MATCH_NONE 0 ++/* The phy sends pause frames to throttle the MAC. */ ++#define RATE_MATCH_PAUSE 1 ++/* The phy asserts CRS to prevent the MAC from transmitting. */ ++#define RATE_MATCH_CRS 2 ++/* The MAC is programmed with a sufficiently-large IPG. */ ++#define RATE_MATCH_OPEN_LOOP 3 ++ + /* Which connector port. */ + #define PORT_TP 0x00 + #define PORT_AUI 0x01 +@@ -2033,8 +2092,8 @@ enum ethtool_reset_flags { + * reported consistently by PHYLIB. Read-only. + * @master_slave_cfg: Master/slave port mode. + * @master_slave_state: Master/slave port state. ++ * @rate_matching: Rate adaptation performed by the PHY + * @reserved: Reserved for future use; see the note on reserved space. +- * @reserved1: Reserved for future use; see the note on reserved space. + * @link_mode_masks: Variable length bitmaps. + * + * If autonegotiation is disabled, the speed and @duplex represent the +@@ -2085,7 +2144,7 @@ struct ethtool_link_settings { + uint8_t transceiver; + uint8_t master_slave_cfg; + uint8_t master_slave_state; +- uint8_t reserved1[1]; ++ uint8_t rate_matching; + uint32_t reserved[7]; + uint32_t link_mode_masks[]; + /* layout of link_mode_masks fields: +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index bda06258be..713d259768 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -194,6 +194,9 @@ + * - add FUSE_SECURITY_CTX init flag + * - add security context to create, mkdir, symlink, and mknod requests + * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX ++ * ++ * 7.37 ++ * - add FUSE_TMPFILE + */ + + #ifndef _LINUX_FUSE_H +@@ -225,7 +228,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 36 ++#define FUSE_KERNEL_MINOR_VERSION 37 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -533,6 +536,7 @@ enum fuse_opcode { + FUSE_SETUPMAPPING = 48, + FUSE_REMOVEMAPPING = 49, + FUSE_SYNCFS = 50, ++ FUSE_TMPFILE = 51, + + /* CUSE specific operations */ + CUSE_INIT = 4096, +diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h +index 50790aee5a..815f7a1dff 100644 +--- a/include/standard-headers/linux/input-event-codes.h ++++ b/include/standard-headers/linux/input-event-codes.h +@@ -862,6 +862,7 @@ + #define ABS_TOOL_WIDTH 0x1c + + #define ABS_VOLUME 0x20 ++#define ABS_PROFILE 0x21 + + #define ABS_MISC 0x28 + +diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h +index 2dcc90826a..e81715cd70 100644 +--- a/include/standard-headers/linux/virtio_blk.h ++++ b/include/standard-headers/linux/virtio_blk.h +@@ -40,6 +40,7 @@ + #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ + #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ + #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ ++#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ + + /* Legacy feature bits */ + #ifndef VIRTIO_BLK_NO_LEGACY +@@ -119,6 +120,21 @@ struct virtio_blk_config { + uint8_t write_zeroes_may_unmap; + + uint8_t unused1[3]; ++ ++ /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ ++ /* ++ * The maximum secure erase sectors (in 512-byte sectors) for ++ * one segment. ++ */ ++ __virtio32 max_secure_erase_sectors; ++ /* ++ * The maximum number of secure erase segments in a ++ * secure erase command. ++ */ ++ __virtio32 max_secure_erase_seg; ++ /* Secure erase commands must be aligned to this number of sectors. */ ++ __virtio32 secure_erase_sector_alignment; ++ + } QEMU_PACKED; + + /* +@@ -153,6 +169,9 @@ struct virtio_blk_config { + /* Write zeroes command */ + #define VIRTIO_BLK_T_WRITE_ZEROES 13 + ++/* Secure erase command */ ++#define VIRTIO_BLK_T_SECURE_ERASE 14 ++ + #ifndef VIRTIO_BLK_NO_LEGACY + /* Barrier before this op. */ + #define VIRTIO_BLK_T_BARRIER 0x80000000 +diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h +index 4f3d5aaa11..de687009bf 100644 +--- a/linux-headers/asm-generic/hugetlb_encode.h ++++ b/linux-headers/asm-generic/hugetlb_encode.h +@@ -20,18 +20,18 @@ + #define HUGETLB_FLAG_ENCODE_SHIFT 26 + #define HUGETLB_FLAG_ENCODE_MASK 0x3f + +-#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT) + + #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ +diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h +index 6c1aa92a92..6ce1f1ceb4 100644 +--- a/linux-headers/asm-generic/mman-common.h ++++ b/linux-headers/asm-generic/mman-common.h +@@ -77,6 +77,8 @@ + + #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + ++#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ ++ + /* compatibility flags */ + #define MAP_FILE 0 + +diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h +index 1be428663c..c6e1fc77c9 100644 +--- a/linux-headers/asm-mips/mman.h ++++ b/linux-headers/asm-mips/mman.h +@@ -103,6 +103,8 @@ + + #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + ++#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ ++ + /* compatibility flags */ + #define MAP_FILE 0 + +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 7351417afd..8985ff234c 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -48,6 +48,7 @@ struct kvm_sregs { + /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ + struct kvm_riscv_config { + unsigned long isa; ++ unsigned long zicbom_block_size; + }; + + /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +@@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_M, + KVM_RISCV_ISA_EXT_SVPBMT, + KVM_RISCV_ISA_EXT_SSTC, ++ KVM_RISCV_ISA_EXT_SVINVAL, ++ KVM_RISCV_ISA_EXT_ZIHINTPAUSE, ++ KVM_RISCV_ISA_EXT_ZICBOM, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index ebdafa576d..b2783c5202 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 + #define KVM_CAP_S390_ZPCI_OP 221 + #define KVM_CAP_S390_CPU_TOPOLOGY 222 ++#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 + + #ifdef KVM_CAP_IRQ_ROUTING + +diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h +index 213b2a0f70..e60dfd8907 100644 +--- a/linux-headers/linux/psci.h ++++ b/linux-headers/linux/psci.h +@@ -48,12 +48,26 @@ + #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) + + #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) ++#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11) ++#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12) ++#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13) + #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) + #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) ++#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16) ++#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17) ++ + #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) ++#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) ++#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) + ++#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) ++#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) + #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) ++#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16) ++#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) ++ + #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) ++#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) + + /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ + #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff +diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h +index a3a377cd44..ba5d0df52f 100644 +--- a/linux-headers/linux/userfaultfd.h ++++ b/linux-headers/linux/userfaultfd.h +@@ -12,6 +12,10 @@ + + #include + ++/* ioctls for /dev/userfaultfd */ ++#define USERFAULTFD_IOC 0xAA ++#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) ++ + /* + * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and + * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index ede44b5572..bee7e42198 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -986,6 +986,148 @@ enum vfio_device_mig_state { + VFIO_DEVICE_STATE_RUNNING_P2P = 5, + }; + ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power ++ * state with the platform-based power management. Device use of lower power ++ * states depends on factors managed by the runtime power management core, ++ * including system level support and coordinating support among dependent ++ * devices. Enabling device low power entry does not guarantee lower power ++ * usage by the device, nor is a mechanism provided through this feature to ++ * know the current power state of the device. If any device access happens ++ * (either from the host or through the vfio uAPI) when the device is in the ++ * low power state, then the host will move the device out of the low power ++ * state as necessary prior to the access. Once the access is completed, the ++ * device may re-enter the low power state. For single shot low power support ++ * with wake-up notification, see ++ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd ++ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after ++ * calling LOW_POWER_EXIT. ++ */ ++#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 ++ ++/* ++ * This device feature has the same behavior as ++ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user ++ * provides an eventfd for wake-up notification. When the device moves out of ++ * the low power state for the wake-up, the host will not allow the device to ++ * re-enter a low power state without a subsequent user call to one of the low ++ * power entry device feature IOCTLs. Access to mmap'd device regions is ++ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the ++ * low power exit. The low power exit can happen either through LOW_POWER_EXIT ++ * or through any other access (where the wake-up notification has been ++ * generated). The access to mmap'd device regions will not trigger low power ++ * exit. ++ * ++ * The notification through the provided eventfd will be generated only when ++ * the device has entered and is resumed from a low power state after ++ * calling this device feature IOCTL. A device that has not entered low power ++ * state, as managed through the runtime power management core, will not ++ * generate a notification through the provided eventfd on access. Calling the ++ * LOW_POWER_EXIT feature is optional in the case where notification has been ++ * signaled on the provided eventfd that a resume from low power has occurred. ++ */ ++struct vfio_device_low_power_entry_with_wakeup { ++ __s32 wakeup_eventfd; ++ __u32 reserved; ++}; ++ ++#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as ++ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or ++ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. ++ * This device feature IOCTL may itself generate a wakeup eventfd notification ++ * in the latter case if the device had previously entered a low power state. ++ */ ++#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. ++ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports ++ * DMA logging. ++ * ++ * DMA logging allows a device to internally record what DMAs the device is ++ * initiating and report them back to userspace. It is part of the VFIO ++ * migration infrastructure that allows implementing dirty page tracking ++ * during the pre copy phase of live migration. Only DMA WRITEs are logged, ++ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. ++ * ++ * When DMA logging is started a range of IOVAs to monitor is provided and the ++ * device can optimize its logging to cover only the IOVA range given. Each ++ * DMA that the device initiates inside the range will be logged by the device ++ * for later retrieval. ++ * ++ * page_size is an input that hints what tracking granularity the device ++ * should try to achieve. If the device cannot do the hinted page size then ++ * it's the driver choice which page size to pick based on its support. ++ * On output the device will return the page size it selected. ++ * ++ * ranges is a pointer to an array of ++ * struct vfio_device_feature_dma_logging_range. ++ * ++ * The core kernel code guarantees to support by minimum num_ranges that fit ++ * into a single kernel page. User space can try higher values but should give ++ * up if the above can't be achieved as of some driver limitations. ++ * ++ * A single call to start device DMA logging can be issued and a matching stop ++ * should follow at the end. Another start is not allowed in the meantime. ++ */ ++struct vfio_device_feature_dma_logging_control { ++ __aligned_u64 page_size; ++ __u32 num_ranges; ++ __u32 __reserved; ++ __aligned_u64 ranges; ++}; ++ ++struct vfio_device_feature_dma_logging_range { ++ __aligned_u64 iova; ++ __aligned_u64 length; ++}; ++ ++#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started ++ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START ++ */ ++#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log ++ * ++ * Query the device's DMA log for written pages within the given IOVA range. ++ * During querying the log is cleared for the IOVA range. ++ * ++ * bitmap is a pointer to an array of u64s that will hold the output bitmap ++ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits ++ * is given by: ++ * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) ++ * ++ * The input page_size can be any power of two value and does not have to ++ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver ++ * will format its internal logging to match the reporting page size, possibly ++ * by replicating bits if the internal page size is lower than requested. ++ * ++ * The LOGGING_REPORT will only set bits in the bitmap and never clear or ++ * perform any initialization of the user provided bitmap. ++ * ++ * If any error is returned userspace should assume that the dirty log is ++ * corrupted. Error recovery is to consider all memory dirty and try to ++ * restart the dirty tracking, or to abort/restart the whole migration. ++ * ++ * If DMA logging is not enabled, an error will be returned. ++ * ++ */ ++struct vfio_device_feature_dma_logging_report { ++ __aligned_u64 iova; ++ __aligned_u64 length; ++ __aligned_u64 page_size; ++ __aligned_u64 bitmap; ++}; ++ ++#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 ++ + /* -------- API for Type1 VFIO IOMMU -------- */ + + /** +-- +2.31.1 + diff --git a/kvm-migration-check-magic-value-for-deciding-the-mapping.patch b/kvm-migration-check-magic-value-for-deciding-the-mapping.patch new file mode 100644 index 0000000..387d0b3 --- /dev/null +++ b/kvm-migration-check-magic-value-for-deciding-the-mapping.patch @@ -0,0 +1,330 @@ +From 29eee1fbb84c0e2f0ece9e6d996afa7238ed2912 Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:18 +0000 +Subject: [PATCH 7/8] migration: check magic value for deciding the mapping of + channels +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders +RH-Bugzilla: 2169732 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [2/2] 4fb9408478923415a91fe0527bf4b1a0f022f329 (peterx/qemu-kvm) + +Current logic assumes that channel connections on the destination side are +always established in the same order as the source and the first one will +always be the main channel followed by the multifid or post-copy +preemption channel. This may not be always true, as even if a channel has a +connection established on the source side it can be in the pending state on +the destination side and a newer connection can be established first. +Basically causing out of order mapping of channels on the destination side. +Currently, all channels except post-copy preempt send a magic number, this +patch uses that magic number to decide the type of channel. This logic is +applicable only for precopy(multifd) live migration, as mentioned, the +post-copy preempt channel does not send any magic number. Also, tls live +migrations already does tls handshake before creating other channels, so +this issue is not possible with tls, hence this logic is avoided for tls +live migrations. This patch uses read peek to check the magic number of +channels so that current data/control stream management remains +un-effected. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2) +Signed-off-by: Peter Xu +--- + migration/channel.c | 45 +++++++++++++++++++++++++++++++++ + migration/channel.h | 5 ++++ + migration/migration.c | 54 ++++++++++++++++++++++++++++------------ + migration/multifd.c | 19 +++++++------- + migration/multifd.h | 2 +- + migration/postcopy-ram.c | 5 +--- + migration/postcopy-ram.h | 2 +- + 7 files changed, 101 insertions(+), 31 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index 1b0815039f..ca3319a309 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s, + migrate_fd_connect(s, error); + error_free(error); + } ++ ++ ++/** ++ * @migration_channel_read_peek - Peek at migration channel, without ++ * actually removing it from channel buffer. ++ * ++ * @ioc: the channel object ++ * @buf: the memory region to read data into ++ * @buflen: the number of bytes to read in @buf ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Returns 0 if successful, returns -1 and sets @errp if fails. ++ */ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp) ++{ ++ ssize_t len = 0; ++ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; ++ ++ while (true) { ++ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, ++ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); ++ ++ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { ++ error_setg(errp, ++ "Failed to peek at channel"); ++ return -1; ++ } ++ ++ if (len == buflen) { ++ break; ++ } ++ ++ /* 1ms sleep. */ ++ if (qemu_in_coroutine()) { ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); ++ } else { ++ g_usleep(1000); ++ } ++ } ++ ++ return 0; ++} +diff --git a/migration/channel.h b/migration/channel.h +index 67a461c28a..5bdb8208a7 100644 +--- a/migration/channel.h ++++ b/migration/channel.h +@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error *error_in); ++ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp); + #endif +diff --git a/migration/migration.c b/migration/migration.c +index f485eea5fb..593dbd25de 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -31,6 +31,7 @@ + #include "migration.h" + #include "savevm.h" + #include "qemu-file.h" ++#include "channel.h" + #include "migration/vmstate.h" + #include "block/block.h" + #include "qapi/error.h" +@@ -663,10 +664,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + +- if (multifd_load_setup(errp) != 0) { +- return false; +- } +- + if (!mis->from_src_file) { + mis->from_src_file = f; + } +@@ -733,31 +730,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; +- bool start_migration; + QEMUFile *f; ++ bool default_channel = true; ++ uint32_t channel_magic = 0; ++ int ret = 0; + +- if (!mis->from_src_file) { +- /* The first connection (multifd may have multiple) */ ++ if (migrate_use_multifd() && !migrate_postcopy_ram() && ++ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ /* ++ * With multiple channels, it is possible that we receive channels ++ * out of order on destination side, causing incorrect mapping of ++ * source channels on destination side. Check channel MAGIC to ++ * decide type of channel. Please note this is best effort, postcopy ++ * preempt channel does not send any magic number so avoid it for ++ * postcopy live migration. Also tls live migration already does ++ * tls handshake while initializing main channel so with tls this ++ * issue is not possible. ++ */ ++ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, ++ sizeof(channel_magic), &local_err); ++ ++ if (ret != 0) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); ++ } else { ++ default_channel = !mis->from_src_file; ++ } ++ ++ if (multifd_load_setup(errp) != 0) { ++ error_setg(errp, "Failed to setup multifd channels"); ++ return; ++ } ++ ++ if (default_channel) { + f = qemu_file_new_input(ioc); + + if (!migration_incoming_setup(f, errp)) { + return; + } +- +- /* +- * Common migration only needs one channel, so we can start +- * right now. Some features need more than one channel, we wait. +- */ +- start_migration = !migration_needs_multiple_sockets(); + } else { + /* Multiple connections */ + assert(migration_needs_multiple_sockets()); + if (migrate_use_multifd()) { +- start_migration = multifd_recv_new_channel(ioc, &local_err); ++ multifd_recv_new_channel(ioc, &local_err); + } else { + assert(migrate_postcopy_preempt()); + f = qemu_file_new_input(ioc); +- start_migration = postcopy_preempt_new_channel(mis, f); ++ postcopy_preempt_new_channel(mis, f); + } + if (local_err) { + error_propagate(errp, local_err); +@@ -765,7 +787,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + } + } + +- if (start_migration) { ++ if (migration_has_all_channels()) { + /* If it's a recovery, we're done */ + if (postcopy_try_recover()) { + return; +diff --git a/migration/multifd.c b/migration/multifd.c +index 509bbbe3bf..c3385529cf 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1167,9 +1167,14 @@ int multifd_load_setup(Error **errp) + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + +- if (!migrate_use_multifd()) { ++ /* ++ * Return successfully if multiFD recv state is already initialised ++ * or multiFD is not enabled. ++ */ ++ if (multifd_recv_state || !migrate_use_multifd()) { + return 0; + } ++ + if (!migrate_multi_channels_is_allowed()) { + error_setg(errp, "multifd is not supported by current protocol"); + return -1; +@@ -1228,11 +1233,9 @@ bool multifd_recv_all_channels_created(void) + + /* + * Try to receive all multifd channels to get ready for the migration. +- * - Return true and do not set @errp when correctly receiving all channels; +- * - Return false and do not set @errp when correctly receiving the current one; +- * - Return false and set @errp when failing to receive the current channel. ++ * Sets @errp when failing to receive the current channel. + */ +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + { + MultiFDRecvParams *p; + Error *local_err = NULL; +@@ -1245,7 +1248,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + "failed to receive packet" + " via multifd channel %d: ", + qatomic_read(&multifd_recv_state->count)); +- return false; ++ return; + } + trace_multifd_recv_new_channel(id); + +@@ -1255,7 +1258,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + id); + multifd_recv_terminate_threads(local_err); + error_propagate(errp, local_err); +- return false; ++ return; + } + p->c = ioc; + object_ref(OBJECT(ioc)); +@@ -1266,6 +1269,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + qatomic_inc(&multifd_recv_state->count); +- return qatomic_read(&multifd_recv_state->count) == +- migrate_multifd_channels(); + } +diff --git a/migration/multifd.h b/migration/multifd.h +index 519f498643..913e4ba274 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -18,7 +18,7 @@ void multifd_save_cleanup(void); + int multifd_load_setup(Error **errp); + int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); + int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index 0c55df0e52..b98e95dab0 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -1538,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) + } + } + +-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) ++void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) + { + /* + * The new loading channel has its own threads, so it needs to be +@@ -1547,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) + qemu_file_set_blocking(file, true); + mis->postcopy_qemufile_dst = file; + trace_postcopy_preempt_new_channel(); +- +- /* Start the migration immediately */ +- return true; + } + + /* +diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h +index 6147bf7d1d..25881c4127 100644 +--- a/migration/postcopy-ram.h ++++ b/migration/postcopy-ram.h +@@ -190,7 +190,7 @@ enum PostcopyChannels { + RAM_CHANNEL_MAX, + }; + +-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); ++void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); + int postcopy_preempt_setup(MigrationState *s, Error **errp); + int postcopy_preempt_wait_channel(MigrationState *s); + +-- +2.31.1 + diff --git a/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch b/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch new file mode 100644 index 0000000..707c80f --- /dev/null +++ b/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch @@ -0,0 +1,325 @@ +From e5834364958a3914d7b8b46b985a1b054728b466 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 19 Jan 2023 11:16:45 +0100 +Subject: [PATCH 2/8] net: stream: add a new option to automatically reconnect +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect +RH-Bugzilla: 2169232 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [2/2] 9b87647a9ed2e7c1b91bdfa9d0a736e091c892a5 (lvivier/qemu-kvm-centos) + +In stream mode, if the server shuts down there is currently +no way to reconnect the client to a new server without removing +the NIC device and the netdev backend (or to reboot). + +This patch introduces a reconnect option that specifies a delay +to try to reconnect with the same parameters. + +Add a new test in qtest to test the reconnect option and the +connect/disconnect events. + +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit b95c0d4440950fba6dbef0f781962911fa42abdb) +--- + net/stream.c | 53 ++++++++++++++++++- + qapi/net.json | 7 ++- + qemu-options.hx | 6 +-- + tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++ + 4 files changed, 162 insertions(+), 5 deletions(-) + +diff --git a/net/stream.c b/net/stream.c +index 37ff727e0c..9204b4c96e 100644 +--- a/net/stream.c ++++ b/net/stream.c +@@ -39,6 +39,8 @@ + #include "io/channel-socket.h" + #include "io/net-listener.h" + #include "qapi/qapi-events-net.h" ++#include "qapi/qapi-visit-sockets.h" ++#include "qapi/clone-visitor.h" + + typedef struct NetStreamState { + NetClientState nc; +@@ -49,11 +51,15 @@ typedef struct NetStreamState { + guint ioc_write_tag; + SocketReadState rs; + unsigned int send_index; /* number of bytes sent*/ ++ uint32_t reconnect; ++ guint timer_tag; ++ SocketAddress *addr; + } NetStreamState; + + static void net_stream_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + void *opaque); ++static void net_stream_arm_reconnect(NetStreamState *s); + + static gboolean net_stream_writable(QIOChannel *ioc, + GIOCondition condition, +@@ -170,6 +176,7 @@ static gboolean net_stream_send(QIOChannel *ioc, + qemu_set_info_str(&s->nc, "%s", ""); + + qapi_event_send_netdev_stream_disconnected(s->nc.name); ++ net_stream_arm_reconnect(s); + + return G_SOURCE_REMOVE; + } +@@ -187,6 +194,14 @@ static gboolean net_stream_send(QIOChannel *ioc, + static void net_stream_cleanup(NetClientState *nc) + { + NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); ++ if (s->timer_tag) { ++ g_source_remove(s->timer_tag); ++ s->timer_tag = 0; ++ } ++ if (s->addr) { ++ qapi_free_SocketAddress(s->addr); ++ s->addr = NULL; ++ } + if (s->ioc) { + if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { + if (s->ioc_read_tag) { +@@ -346,12 +361,37 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque) + error: + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; ++ net_stream_arm_reconnect(s); ++} ++ ++static gboolean net_stream_reconnect(gpointer data) ++{ ++ NetStreamState *s = data; ++ QIOChannelSocket *sioc; ++ ++ s->timer_tag = 0; ++ ++ sioc = qio_channel_socket_new(); ++ s->ioc = QIO_CHANNEL(sioc); ++ qio_channel_socket_connect_async(sioc, s->addr, ++ net_stream_client_connected, s, ++ NULL, NULL); ++ return G_SOURCE_REMOVE; ++} ++ ++static void net_stream_arm_reconnect(NetStreamState *s) ++{ ++ if (s->reconnect && s->timer_tag == 0) { ++ s->timer_tag = g_timeout_add_seconds(s->reconnect, ++ net_stream_reconnect, s); ++ } + } + + static int net_stream_client_init(NetClientState *peer, + const char *model, + const char *name, + SocketAddress *addr, ++ uint32_t reconnect, + Error **errp) + { + NetStreamState *s; +@@ -364,6 +404,10 @@ static int net_stream_client_init(NetClientState *peer, + s->ioc = QIO_CHANNEL(sioc); + s->nc.link_down = true; + ++ s->reconnect = reconnect; ++ if (reconnect) { ++ s->addr = QAPI_CLONE(SocketAddress, addr); ++ } + qio_channel_socket_connect_async(sioc, addr, + net_stream_client_connected, s, + NULL, NULL); +@@ -380,7 +424,14 @@ int net_init_stream(const Netdev *netdev, const char *name, + sock = &netdev->u.stream; + + if (!sock->has_server || !sock->server) { +- return net_stream_client_init(peer, "stream", name, sock->addr, errp); ++ return net_stream_client_init(peer, "stream", name, sock->addr, ++ sock->has_reconnect ? sock->reconnect : 0, ++ errp); ++ } ++ if (sock->has_reconnect) { ++ error_setg(errp, "'reconnect' option is incompatible with " ++ "socket in server mode"); ++ return -1; + } + return net_stream_server_init(peer, "stream", name, sock->addr, errp); + } +diff --git a/qapi/net.json b/qapi/net.json +index 522ac582ed..d6eb30008b 100644 +--- a/qapi/net.json ++++ b/qapi/net.json +@@ -585,6 +585,10 @@ + # @addr: socket address to listen on (server=true) + # or connect to (server=false) + # @server: create server socket (default: false) ++# @reconnect: For a client socket, if a socket is disconnected, ++# then attempt a reconnect after the given number of seconds. ++# Setting this to zero disables this function. (default: 0) ++# (since 8.0) + # + # Only SocketAddress types 'unix', 'inet' and 'fd' are supported. + # +@@ -593,7 +597,8 @@ + { 'struct': 'NetdevStreamOptions', + 'data': { + 'addr': 'SocketAddress', +- '*server': 'bool' } } ++ '*server': 'bool', ++ '*reconnect': 'uint32' } } + + ## + # @NetdevDgramOptions: +diff --git a/qemu-options.hx b/qemu-options.hx +index ea02ca3a45..48eef4aa2c 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2766,9 +2766,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, + "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" + " configure a network backend to connect to another network\n" + " using an UDP tunnel\n" +- "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n" +- "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n" +- "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" ++ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n" ++ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n" ++ "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n" + " configure a network backend to connect to another network\n" + " using a socket connection in stream mode.\n" + "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" +diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c +index 6ba256e173..acc32c378b 100644 +--- a/tests/qtest/netdev-socket.c ++++ b/tests/qtest/netdev-socket.c +@@ -11,6 +11,10 @@ + #include + #include "../unit/socket-helpers.h" + #include "libqtest.h" ++#include "qapi/qmp/qstring.h" ++#include "qemu/sockets.h" ++#include "qapi/qobject-input-visitor.h" ++#include "qapi/qapi-visit-sockets.h" + + #define CONNECTION_TIMEOUT 5 + +@@ -142,6 +146,101 @@ static void test_stream_inet_ipv4(void) + qtest_quit(qts0); + } + ++static void wait_stream_connected(QTestState *qts, const char *id, ++ SocketAddress **addr) ++{ ++ QDict *resp, *data; ++ QString *qstr; ++ QObject *obj; ++ Visitor *v = NULL; ++ ++ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED"); ++ g_assert_nonnull(resp); ++ data = qdict_get_qdict(resp, "data"); ++ g_assert_nonnull(data); ++ ++ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); ++ g_assert_nonnull(data); ++ ++ g_assert(!strcmp(qstring_get_str(qstr), id)); ++ ++ obj = qdict_get(data, "addr"); ++ ++ v = qobject_input_visitor_new(obj); ++ visit_type_SocketAddress(v, NULL, addr, NULL); ++ visit_free(v); ++ qobject_unref(resp); ++} ++ ++static void wait_stream_disconnected(QTestState *qts, const char *id) ++{ ++ QDict *resp, *data; ++ QString *qstr; ++ ++ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED"); ++ g_assert_nonnull(resp); ++ data = qdict_get_qdict(resp, "data"); ++ g_assert_nonnull(data); ++ ++ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); ++ g_assert_nonnull(data); ++ ++ g_assert(!strcmp(qstring_get_str(qstr), id)); ++ qobject_unref(resp); ++} ++ ++static void test_stream_inet_reconnect(void) ++{ ++ QTestState *qts0, *qts1; ++ int port; ++ SocketAddress *addr; ++ ++ port = inet_get_free_port(false); ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,server=false,id=st0,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off,reconnect=1," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ wait_stream_connected(qts0, "st0", &addr); ++ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); ++ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); ++ qapi_free_SocketAddress(addr); ++ ++ /* kill server */ ++ qtest_quit(qts0); ++ ++ /* check client has been disconnected */ ++ wait_stream_disconnected(qts1, "st0"); ++ ++ /* restart server */ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ /* wait connection events*/ ++ wait_stream_connected(qts0, "st0", &addr); ++ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); ++ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); ++ qapi_free_SocketAddress(addr); ++ ++ wait_stream_connected(qts1, "st0", &addr); ++ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); ++ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); ++ g_assert_cmpint(atoi(addr->u.inet.port), ==, port); ++ qapi_free_SocketAddress(addr); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ + static void test_stream_inet_ipv6(void) + { + QTestState *qts0, *qts1; +@@ -418,6 +517,8 @@ int main(int argc, char **argv) + #ifndef _WIN32 + qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); + #endif ++ qtest_add_func("/netdev/stream/inet/reconnect", ++ test_stream_inet_reconnect); + } + if (has_ipv6) { + qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); +-- +2.31.1 + diff --git a/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch b/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch new file mode 100644 index 0000000..b9536c3 --- /dev/null +++ b/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch @@ -0,0 +1,50 @@ +From b330bf0a2ad5af73d3c62997f7f0fa5b61f1796b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 14 Feb 2023 14:48:37 +0100 +Subject: [PATCH 8/8] target/s390x/arch_dump: Fix memory corruption in + s390x_write_elf64_notes() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 152: Fix memory corruption in s390x_write_elf64_notes() +RH-Bugzilla: 2168172 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/1] 37a2c997b2c8b7524e0b6299891bf3ea7c9a46d0 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/2168172 +Upstream-Status: Posted (and reviewed, but not merged yet) + +"note_size" can be smaller than sizeof(note), so unconditionally calling +memset(notep, 0, sizeof(note)) could cause a memory corruption here in +case notep has been allocated dynamically, thus let's use note_size as +length argument for memset() instead. + +Fixes: 113d8f4e95 ("s390x: pv: Add dump support") +Message-Id: <20230214141056.680969-1-thuth@redhat.com> +Reviewed-by: Janosch Frank +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +--- + target/s390x/arch_dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index a2329141e8..a7c44ba49d 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name, + notep = g_malloc(note_size); + } + +- memset(notep, 0, sizeof(note)); ++ memset(notep, 0, note_size); + + /* Setup note header data */ + notep->hdr.n_descsz = cpu_to_be32(content_size); +-- +2.31.1 + diff --git a/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch b/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch new file mode 100644 index 0000000..ebd52cd --- /dev/null +++ b/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch @@ -0,0 +1,505 @@ +From 39d5761fe1f546e764dedf2ea32c55d8f5222696 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 18 Jan 2023 13:04:05 +0100 +Subject: [PATCH 1/8] tests/qtest: netdev: test stream and dgram backends +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect +RH-Bugzilla: 2169232 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [1/2] 75c71b47eea072e14651a96612d402b50d2b8f1e (lvivier/qemu-kvm-centos) + +Signed-off-by: Laurent Vivier +Acked-by: Michael S. Tsirkin +Message-Id: <20230118120405.1876329-1-lvivier@redhat.com> +Signed-off-by: Thomas Huth +(cherry picked from commit c95031a19f0d7f418a597243f6f84b031a858997) +--- + tests/qtest/meson.build | 2 + + tests/qtest/netdev-socket.c | 448 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 450 insertions(+) + create mode 100644 tests/qtest/netdev-socket.c + +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index 9df3f9f8b9..2e7c6fe5e3 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -27,6 +27,7 @@ qtests_generic = [ + 'test-hmp', + 'qos-test', + 'readconfig-test', ++ 'netdev-socket', + ] + if config_host.has_key('CONFIG_MODULES') + qtests_generic += [ 'modules-test' ] +@@ -299,6 +300,7 @@ qtests = { + 'tpm-tis-device-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'], + 'tpm-tis-device-test': [io, tpmemu_files, 'tpm-tis-util.c'], + 'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'), ++ 'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'), + } + + gvnc = dependency('gvnc-1.0', required: false) +diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c +new file mode 100644 +index 0000000000..6ba256e173 +--- /dev/null ++++ b/tests/qtest/netdev-socket.c +@@ -0,0 +1,448 @@ ++/* ++ * QTest testcase for netdev stream and dgram ++ * ++ * Copyright (c) 2022 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/sockets.h" ++#include ++#include "../unit/socket-helpers.h" ++#include "libqtest.h" ++ ++#define CONNECTION_TIMEOUT 5 ++ ++#define EXPECT_STATE(q, e, t) \ ++do { \ ++ char *resp = NULL; \ ++ g_test_timer_start(); \ ++ do { \ ++ g_free(resp); \ ++ resp = qtest_hmp(q, "info network"); \ ++ if (t) { \ ++ strrchr(resp, t)[0] = 0; \ ++ } \ ++ if (g_str_equal(resp, e)) { \ ++ break; \ ++ } \ ++ } while (g_test_timer_elapsed() < CONNECTION_TIMEOUT); \ ++ g_assert_cmpstr(resp, ==, e); \ ++ g_free(resp); \ ++} while (0) ++ ++static gchar *tmpdir; ++ ++static int inet_get_free_port_socket_ipv4(int sock) ++{ ++ struct sockaddr_in addr; ++ socklen_t len; ++ ++ memset(&addr, 0, sizeof(addr)); ++ addr.sin_family = AF_INET; ++ addr.sin_addr.s_addr = INADDR_ANY; ++ addr.sin_port = 0; ++ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { ++ return -1; ++ } ++ ++ len = sizeof(addr); ++ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { ++ return -1; ++ } ++ ++ return ntohs(addr.sin_port); ++} ++ ++static int inet_get_free_port_socket_ipv6(int sock) ++{ ++ struct sockaddr_in6 addr; ++ socklen_t len; ++ ++ memset(&addr, 0, sizeof(addr)); ++ addr.sin6_family = AF_INET6; ++ addr.sin6_addr = in6addr_any; ++ addr.sin6_port = 0; ++ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { ++ return -1; ++ } ++ ++ len = sizeof(addr); ++ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { ++ return -1; ++ } ++ ++ return ntohs(addr.sin6_port); ++} ++ ++static int inet_get_free_port_multiple(int nb, int *port, bool ipv6) ++{ ++ int sock[nb]; ++ int i; ++ ++ for (i = 0; i < nb; i++) { ++ sock[i] = socket(ipv6 ? AF_INET6 : AF_INET, SOCK_STREAM, 0); ++ if (sock[i] < 0) { ++ break; ++ } ++ port[i] = ipv6 ? inet_get_free_port_socket_ipv6(sock[i]) : ++ inet_get_free_port_socket_ipv4(sock[i]); ++ if (port[i] == -1) { ++ break; ++ } ++ } ++ ++ nb = i; ++ for (i = 0; i < nb; i++) { ++ closesocket(sock[i]); ++ } ++ ++ return nb; ++} ++ ++static int inet_get_free_port(bool ipv6) ++{ ++ int nb, port; ++ ++ nb = inet_get_free_port_multiple(1, &port, ipv6); ++ g_assert_cmpint(nb, ==, 1); ++ ++ return port; ++} ++ ++static void test_stream_inet_ipv4(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int port; ++ ++ port = inet_get_free_port(false); ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,server=false,id=st0,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,tcp:127.0.0.1:%d\r\n", ++ port); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ /* the port is unknown, check only the address */ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:127.0.0.1", ':'); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++static void test_stream_inet_ipv6(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int port; ++ ++ port = inet_get_free_port(true); ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=off,addr.ipv6=on," ++ "addr.host=::1,addr.port=%d", port); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,server=false,id=st0,addr.type=inet," ++ "addr.ipv4=off,addr.ipv6=on," ++ "addr.host=::1,addr.port=%d", port); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,tcp:::1:%d\r\n", ++ port); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ /* the port is unknown, check only the address */ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:::1", ':'); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++static void test_stream_unix(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ gchar *path; ++ ++ path = g_strconcat(tmpdir, "/stream_unix", NULL); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true," ++ "addr.type=unix,addr.path=%s,", ++ path); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=false," ++ "addr.type=unix,addr.path=%s", ++ path); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); ++ EXPECT_STATE(qts1, expect, 0); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ g_free(path); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++#ifdef CONFIG_LINUX ++static void test_stream_unix_abstract(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ gchar *path; ++ ++ path = g_strconcat(tmpdir, "/stream_unix_abstract", NULL); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true," ++ "addr.type=unix,addr.path=%s," ++ "addr.abstract=on", ++ path); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=false," ++ "addr.type=unix,addr.path=%s,addr.abstract=on", ++ path); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); ++ EXPECT_STATE(qts1, expect, 0); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ g_free(path); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++#endif ++ ++#ifndef _WIN32 ++static void test_stream_fd(void) ++{ ++ QTestState *qts0, *qts1; ++ int sock[2]; ++ int ret; ++ ++ ret = socketpair(AF_LOCAL, SOCK_STREAM, 0, sock); ++ g_assert_true(ret == 0); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", ++ sock[0]); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", ++ sock[1]); ++ ++ EXPECT_STATE(qts1, "st0: index=0,type=stream,unix:\r\n", 0); ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++ ++ closesocket(sock[0]); ++ closesocket(sock[1]); ++} ++#endif ++ ++static void test_dgram_inet(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int port[2]; ++ int nb; ++ ++ nb = inet_get_free_port_multiple(2, port, false); ++ g_assert_cmpint(nb, ==, 2); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0," ++ "local.type=inet,local.host=127.0.0.1,local.port=%d," ++ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", ++ port[0], port[1]); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram," ++ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", ++ port[0], port[1]); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0," ++ "local.type=inet,local.host=127.0.0.1,local.port=%d," ++ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", ++ port[1], port[0]); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram," ++ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", ++ port[1], port[0]); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++#ifndef _WIN32 ++static void test_dgram_mcast(void) ++{ ++ QTestState *qts; ++ ++ qts = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0," ++ "remote.type=inet,remote.host=230.0.0.1,remote.port=1234"); ++ ++ EXPECT_STATE(qts, "st0: index=0,type=dgram,mcast=230.0.0.1:1234\r\n", 0); ++ ++ qtest_quit(qts); ++} ++ ++static void test_dgram_unix(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ gchar *path0, *path1; ++ ++ path0 = g_strconcat(tmpdir, "/dgram_unix0", NULL); ++ path1 = g_strconcat(tmpdir, "/dgram_unix1", NULL); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=unix,local.path=%s," ++ "remote.type=unix,remote.path=%s", ++ path0, path1); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", ++ path0, path1); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=unix,local.path=%s," ++ "remote.type=unix,remote.path=%s", ++ path1, path0); ++ ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", ++ path1, path0); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ unlink(path0); ++ g_free(path0); ++ unlink(path1); ++ g_free(path1); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++static void test_dgram_fd(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int ret; ++ int sv[2]; ++ ++ ret = socketpair(PF_UNIX, SOCK_DGRAM, 0, sv); ++ g_assert_cmpint(ret, !=, -1); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=fd,local.str=%d", ++ sv[0]); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[0]); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=fd,local.str=%d", ++ sv[1]); ++ ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[1]); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++ ++ closesocket(sv[0]); ++ closesocket(sv[1]); ++} ++#endif ++ ++int main(int argc, char **argv) ++{ ++ int ret; ++ bool has_ipv4, has_ipv6, has_afunix; ++ g_autoptr(GError) err = NULL; ++ ++ socket_init(); ++ g_test_init(&argc, &argv, NULL); ++ ++ if (socket_check_protocol_support(&has_ipv4, &has_ipv6) < 0) { ++ g_error("socket_check_protocol_support() failed\n"); ++ } ++ ++ tmpdir = g_dir_make_tmp("netdev-socket.XXXXXX", &err); ++ if (tmpdir == NULL) { ++ g_error("Can't create temporary directory in %s: %s", ++ g_get_tmp_dir(), err->message); ++ } ++ ++ if (has_ipv4) { ++ qtest_add_func("/netdev/stream/inet/ipv4", test_stream_inet_ipv4); ++ qtest_add_func("/netdev/dgram/inet", test_dgram_inet); ++#ifndef _WIN32 ++ qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); ++#endif ++ } ++ if (has_ipv6) { ++ qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); ++ } ++ ++ socket_check_afunix_support(&has_afunix); ++ if (has_afunix) { ++#ifndef _WIN32 ++ qtest_add_func("/netdev/dgram/unix", test_dgram_unix); ++#endif ++ qtest_add_func("/netdev/stream/unix", test_stream_unix); ++#ifdef CONFIG_LINUX ++ qtest_add_func("/netdev/stream/unix/abstract", ++ test_stream_unix_abstract); ++#endif ++#ifndef _WIN32 ++ qtest_add_func("/netdev/stream/fd", test_stream_fd); ++ qtest_add_func("/netdev/dgram/fd", test_dgram_fd); ++#endif ++ } ++ ++ ret = g_test_run(); ++ ++ g_rmdir(tmpdir); ++ g_free(tmpdir); ++ ++ return ret; ++} +-- +2.31.1 + diff --git a/kvm-util-userfaultfd-Add-uffd_open.patch b/kvm-util-userfaultfd-Add-uffd_open.patch new file mode 100644 index 0000000..5a5f90c --- /dev/null +++ b/kvm-util-userfaultfd-Add-uffd_open.patch @@ -0,0 +1,169 @@ +From 80445fed73a7d1a87e8ce96f6cb7d505e437f845 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 1 Feb 2023 16:10:54 -0500 +Subject: [PATCH 4/8] util/userfaultfd: Add uffd_open() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 149: Support /dev/userfaultfd +RH-Bugzilla: 2158704 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] 4c81696314ab26db47c3415fa2c2501c6a572b5c (peterx/qemu-kvm) + +Add a helper to create the uffd handle. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Juan Quintela +Signed-off-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit d5890ea0722831eea76a0efd23a496b3e8815fe8) +Signed-off-by: Peter Xu +--- + include/qemu/userfaultfd.h | 12 ++++++++++++ + migration/postcopy-ram.c | 11 +++++------ + tests/qtest/migration-test.c | 4 ++-- + util/userfaultfd.c | 13 +++++++++++-- + 4 files changed, 30 insertions(+), 10 deletions(-) + +diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h +index 6b74f92792..d764496f0b 100644 +--- a/include/qemu/userfaultfd.h ++++ b/include/qemu/userfaultfd.h +@@ -13,10 +13,20 @@ + #ifndef USERFAULTFD_H + #define USERFAULTFD_H + ++#ifdef CONFIG_LINUX ++ + #include "qemu/osdep.h" + #include "exec/hwaddr.h" + #include + ++/** ++ * uffd_open(): Open an userfaultfd handle for current context. ++ * ++ * @flags: The flags we want to pass in when creating the handle. ++ * ++ * Returns: the uffd handle if >=0, or <0 if error happens. ++ */ ++int uffd_open(int flags); + int uffd_query_features(uint64_t *features); + int uffd_create_fd(uint64_t features, bool non_blocking); + void uffd_close_fd(int uffd_fd); +@@ -32,4 +42,6 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length); + int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count); + bool uffd_poll_events(int uffd_fd, int tmo); + ++#endif /* CONFIG_LINUX */ ++ + #endif /* USERFAULTFD_H */ +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index b9a37ef255..0c55df0e52 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -37,6 +37,7 @@ + #include "qemu-file.h" + #include "yank_functions.h" + #include "tls.h" ++#include "qemu/userfaultfd.h" + + /* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes +@@ -226,11 +227,9 @@ static bool receive_ufd_features(uint64_t *features) + int ufd; + bool ret = true; + +- /* if we are here __NR_userfaultfd should exists */ +- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ ufd = uffd_open(O_CLOEXEC); + if (ufd == -1) { +- error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, +- strerror(errno)); ++ error_report("%s: uffd_open() failed: %s", __func__, strerror(errno)); + return false; + } + +@@ -375,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + goto out; + } + +- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ ufd = uffd_open(O_CLOEXEC); + if (ufd == -1) { + error_report("%s: userfaultfd not available: %s", __func__, + strerror(errno)); +@@ -1160,7 +1159,7 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) + int postcopy_ram_incoming_setup(MigrationIncomingState *mis) + { + /* Open the fd for the kernel to give us userfaults */ +- mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); ++ mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); + if (mis->userfault_fd == -1) { + error_report("%s: Failed to open userfault fd: %s", __func__, + strerror(errno)); +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index dbde726adf..0100e1bdbc 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -61,14 +61,14 @@ static bool uffd_feature_thread_id; + #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) + #include + #include +-#include ++#include "qemu/userfaultfd.h" + + static bool ufd_version_check(void) + { + struct uffdio_api api_struct; + uint64_t ioctl_mask; + +- int ufd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ int ufd = uffd_open(O_CLOEXEC); + + if (ufd == -1) { + g_test_message("Skipping test: userfaultfd not available"); +diff --git a/util/userfaultfd.c b/util/userfaultfd.c +index f1cd6af2b1..4953b3137d 100644 +--- a/util/userfaultfd.c ++++ b/util/userfaultfd.c +@@ -19,6 +19,15 @@ + #include + #include + ++int uffd_open(int flags) ++{ ++#if defined(__NR_userfaultfd) ++ return syscall(__NR_userfaultfd, flags); ++#else ++ return -EINVAL; ++#endif ++} ++ + /** + * uffd_query_features: query UFFD features + * +@@ -32,7 +41,7 @@ int uffd_query_features(uint64_t *features) + struct uffdio_api api_struct = { 0 }; + int ret = -1; + +- uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ uffd_fd = uffd_open(O_CLOEXEC); + if (uffd_fd < 0) { + trace_uffd_query_features_nosys(errno); + return -1; +@@ -69,7 +78,7 @@ int uffd_create_fd(uint64_t features, bool non_blocking) + uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER); + + flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0); +- uffd_fd = syscall(__NR_userfaultfd, flags); ++ uffd_fd = uffd_open(flags); + if (uffd_fd < 0) { + trace_uffd_create_fd_nosys(errno); + return -1; +-- +2.31.1 + diff --git a/kvm-util-userfaultfd-Support-dev-userfaultfd.patch b/kvm-util-userfaultfd-Support-dev-userfaultfd.patch new file mode 100644 index 0000000..b0a22eb --- /dev/null +++ b/kvm-util-userfaultfd-Support-dev-userfaultfd.patch @@ -0,0 +1,94 @@ +From a91da7741464dadeb306a741b4fb562e49ffea57 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 7 Feb 2023 15:57:11 -0500 +Subject: [PATCH 5/8] util/userfaultfd: Support /dev/userfaultfd + +RH-Author: Peter Xu +RH-MergeRequest: 149: Support /dev/userfaultfd +RH-Bugzilla: 2158704 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 5f427d8c18c210ff8f66724c9e358a7120619e69 (peterx/qemu-kvm) + +Teach QEMU to use /dev/userfaultfd when it existed and fallback to the +system call if either it's not there or doesn't have enough permission. + +Firstly, as long as the app has permission to access /dev/userfaultfd, it +always have the ability to trap kernel faults which QEMU mostly wants. +Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be +forbidden, so it can be the major way to use postcopy in a restricted +environment with strict seccomp setup. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit c40c0463413b941c13fe5f99a90c02d7d6584828) +Signed-off-by: Peter Xu +--- + util/trace-events | 1 + + util/userfaultfd.c | 32 ++++++++++++++++++++++++++++++++ + 2 files changed, 33 insertions(+) + +diff --git a/util/trace-events b/util/trace-events +index c8f53d7d9f..16f78d8fe5 100644 +--- a/util/trace-events ++++ b/util/trace-events +@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz + qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p" + + #userfaultfd.c ++uffd_detect_open_mode(int mode) "%d" + uffd_query_features_nosys(int err) "errno: %i" + uffd_query_features_api_failed(int err) "errno: %i" + uffd_create_fd_nosys(int err) "errno: %i" +diff --git a/util/userfaultfd.c b/util/userfaultfd.c +index 4953b3137d..fdff4867e8 100644 +--- a/util/userfaultfd.c ++++ b/util/userfaultfd.c +@@ -18,10 +18,42 @@ + #include + #include + #include ++#include ++ ++typedef enum { ++ UFFD_UNINITIALIZED = 0, ++ UFFD_USE_DEV_PATH, ++ UFFD_USE_SYSCALL, ++} uffd_open_mode; + + int uffd_open(int flags) + { + #if defined(__NR_userfaultfd) ++ static uffd_open_mode open_mode; ++ static int uffd_dev; ++ ++ /* Detect how to generate uffd desc when run the 1st time */ ++ if (open_mode == UFFD_UNINITIALIZED) { ++ /* ++ * Make /dev/userfaultfd the default approach because it has better ++ * permission controls, meanwhile allows kernel faults without any ++ * privilege requirement (e.g. SYS_CAP_PTRACE). ++ */ ++ uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); ++ if (uffd_dev >= 0) { ++ open_mode = UFFD_USE_DEV_PATH; ++ } else { ++ /* Fallback to the system call */ ++ open_mode = UFFD_USE_SYSCALL; ++ } ++ trace_uffd_detect_open_mode(open_mode); ++ } ++ ++ if (open_mode == UFFD_USE_DEV_PATH) { ++ assert(uffd_dev >= 0); ++ return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags); ++ } ++ + return syscall(__NR_userfaultfd, flags); + #else + return -EINVAL; +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index a9baa8e..ff53eaf 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 8%{?rcrel}%{?dist}%{?cc_suffix} +Release: 9%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -356,6 +356,22 @@ Patch103: kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch Patch104: kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch # For bz#2162569 - [transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2 Patch105: kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch +# For bz#2169232 - RFE: reconnect option for stream socket back-end +Patch106: kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch +# For bz#2169232 - RFE: reconnect option for stream socket back-end +Patch107: kvm-net-stream-add-a-new-option-to-automatically-reconne.patch +# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall +Patch108: kvm-linux-headers-Update-to-v6.1.patch +# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall +Patch109: kvm-util-userfaultfd-Add-uffd_open.patch +# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall +Patch110: kvm-util-userfaultfd-Support-dev-userfaultfd.patch +# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race +Patch111: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch +# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race +Patch112: kvm-migration-check-magic-value-for-deciding-the-mapping.patch +# For bz#2168172 - [s390x] qemu-kvm coredumps when SE crashes +Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch %if %{have_clang} BuildRequires: clang @@ -1386,6 +1402,24 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Feb 17 2023 Miroslav Rezanina - 7.2.0-9 +- kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch [bz#2169232] +- kvm-net-stream-add-a-new-option-to-automatically-reconne.patch [bz#2169232] +- kvm-linux-headers-Update-to-v6.1.patch [bz#2158704] +- kvm-util-userfaultfd-Add-uffd_open.patch [bz#2158704] +- kvm-util-userfaultfd-Support-dev-userfaultfd.patch [bz#2158704] +- kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch [bz#2169732] +- kvm-migration-check-magic-value-for-deciding-the-mapping.patch [bz#2169732] +- kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch [bz#2168172] +- Resolves: bz#2169232 + (RFE: reconnect option for stream socket back-end) +- Resolves: bz#2158704 + (RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall) +- Resolves: bz#2169732 + (Multifd migration fails under a weak network/socket ordering race) +- Resolves: bz#2168172 + ([s390x] qemu-kvm coredumps when SE crashes) + * Thu Feb 09 2023 Miroslav Rezanina - 7.2.0-8 - kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch [bz#2150180] - kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch [bz#2150180] From c849977b025bcea793d9501ec3a028af9f3d2a70 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 21 Feb 2023 10:14:28 -0500 Subject: [PATCH 184/195] * Tue Feb 21 2023 Miroslav Rezanina - 7.2.0-10 - kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch [bz#2168209] - Resolves: bz#2168209 (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)) --- ...y-hold-the-new-AioContext-of-bs_top-.patch | 99 +++++++++++++++++++ qemu-kvm.spec | 9 +- 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch diff --git a/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch b/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch new file mode 100644 index 0000000..8d5a20a --- /dev/null +++ b/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch @@ -0,0 +1,99 @@ +From b952c8f1da6f8597736c0e040565830139369359 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 14 Feb 2023 18:16:21 +0100 +Subject: [PATCH] block: temporarily hold the new AioContext of bs_top in + bdrv_append() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 153: block: temporarily hold the new AioContext of bs_top in bdrv_append() +RH-Bugzilla: 2168209 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [1/1] 5b190426d996e8c9f7a781bd97aee8d25756dbd3 (sgarzarella/qemu-kvm-c-9-s) + +bdrv_append() is called with bs_top AioContext held, but +bdrv_attach_child_noperm() could change the AioContext of bs_top. + +bdrv_replace_node_noperm() calls bdrv_drained_begin() starting from +commit 2398747128 ("block: Don't poll in bdrv_replace_child_noperm()"). +bdrv_drained_begin() can call BDRV_POLL_WHILE that assumes the new lock +is taken, so let's temporarily hold the new AioContext to prevent QEMU +from failing in BDRV_POLL_WHILE when it tries to release the wrong +AioContext. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2168209 +Reported-by: Aihua Liang +Signed-off-by: Stefano Garzarella +Message-Id: <20230214171621.11574-1-sgarzare@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 60d90bf43c169b9d1dbcb17ed794b7b02c6862b1) +Signed-off-by: Stefano Garzarella +--- + block.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/block.c b/block.c +index 0d78711416..9e1dcb9e47 100644 +--- a/block.c ++++ b/block.c +@@ -5275,6 +5275,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) + * child. + * + * This function does not create any image files. ++ * ++ * The caller must hold the AioContext lock for @bs_top. + */ + int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp) +@@ -5282,11 +5284,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + int ret; + BdrvChild *child; + Transaction *tran = tran_new(); ++ AioContext *old_context, *new_context = NULL; + + GLOBAL_STATE_CODE(); + + assert(!bs_new->backing); + ++ old_context = bdrv_get_aio_context(bs_top); ++ + child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", + &child_of_bds, bdrv_backing_role(bs_new), + tran, errp); +@@ -5295,6 +5300,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + goto out; + } + ++ /* ++ * bdrv_attach_child_noperm could change the AioContext of bs_top. ++ * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily ++ * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE ++ * that assumes the new lock is taken. ++ */ ++ new_context = bdrv_get_aio_context(bs_top); ++ ++ if (old_context != new_context) { ++ aio_context_release(old_context); ++ aio_context_acquire(new_context); ++ } ++ + ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); + if (ret < 0) { + goto out; +@@ -5306,6 +5324,11 @@ out: + + bdrv_refresh_limits(bs_top, NULL, NULL); + ++ if (new_context && old_context != new_context) { ++ aio_context_release(new_context); ++ aio_context_acquire(old_context); ++ } ++ + return ret; + } + +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index ff53eaf..f0f675c 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 9%{?rcrel}%{?dist}%{?cc_suffix} +Release: 10%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -372,6 +372,8 @@ Patch111: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch Patch112: kvm-migration-check-magic-value-for-deciding-the-mapping.patch # For bz#2168172 - [s390x] qemu-kvm coredumps when SE crashes Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch +# For bz#2168209 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch114: kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch %if %{have_clang} BuildRequires: clang @@ -1402,6 +1404,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Feb 21 2023 Miroslav Rezanina - 7.2.0-10 +- kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch [bz#2168209] +- Resolves: bz#2168209 + (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)) + * Fri Feb 17 2023 Miroslav Rezanina - 7.2.0-9 - kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch [bz#2169232] - kvm-net-stream-add-a-new-option-to-automatically-reconne.patch [bz#2169232] From cbc169813b3d02e9125ab67ab5928bd686a9ff63 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 6 Mar 2023 02:34:27 -0500 Subject: [PATCH 185/195] * Mon Mar 06 2023 Miroslav Rezanina - 7.2.0-11 - kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch [bz#2169904] - Resolves: bz#2169904 ([SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022) --- ...fix-field-corruption-in-type-4-table.patch | 59 +++++++++++++++++++ qemu-kvm.spec | 11 +++- 2 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch diff --git a/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch b/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch new file mode 100644 index 0000000..b452281 --- /dev/null +++ b/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch @@ -0,0 +1,59 @@ +From 8b0c5c6d356fd6cce9092727e20097b70e07bba9 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Thu, 23 Feb 2023 13:57:47 +0100 +Subject: [PATCH] hw/smbios: fix field corruption in type 4 table + +RH-Author: Julia Suvorova +RH-MergeRequest: 156: hw/smbios: fix field corruption in type 4 table +RH-Bugzilla: 2169904 +RH-Acked-by: Igor Mammedov +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [1/1] ee6d9bb6dfa0fb2625915947072cb91a0926c4ec + +Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the +strings which follow immediately after the struct fields have been +overwritten by unconditional filling of later fields such as core_count2. +Make these fields dependent on the SMBIOS version. + +Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4") +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904 + +Signed-off-by: Julia Suvorova +Message-Id: <20230223125747.254914-1-jusual@redhat.com> +Reviewed-by: Igor Mammedov +Reviewed-by: Ani Sinha +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 60d09b8dc7dd4256d664ad680795cb1327805b2b) +--- + hw/smbios/smbios.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index c5ad69237e..2d2ece3edb 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -752,14 +752,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) + t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; + t->core_enabled = t->core_count; + +- t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); +- + t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads; +- t->thread_count2 = cpu_to_le16(ms->smp.threads); + + t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ + t->processor_family2 = cpu_to_le16(0x01); /* Other */ + ++ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { ++ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); ++ t->thread_count2 = cpu_to_le16(ms->smp.threads); ++ } ++ + SMBIOS_BUILD_TABLE_POST; + smbios_type4_count++; + } +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index f0f675c..1162fe0 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 10%{?rcrel}%{?dist}%{?cc_suffix} +Release: 11%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -374,6 +374,8 @@ Patch112: kvm-migration-check-magic-value-for-deciding-the-mapping.patch Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch # For bz#2168209 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) Patch114: kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch +# For bz#2169904 - [SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022 +Patch115: kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch %if %{have_clang} BuildRequires: clang @@ -451,7 +453,7 @@ Requires: %{name}-core = %{epoch}:%{version}-%{release} Requires: %{name}-docs = %{epoch}:%{version}-%{release} Requires: %{name}-tools = %{epoch}:%{version}-%{release} Requires: qemu-pr-helper = %{epoch}:%{version}-%{release} -Requires: virtiofsd = %{epoch}:%{version}-%{release} +Requires: virtiofsd >= 1.5.0 %{requires_all_modules} %description @@ -1404,6 +1406,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Mar 06 2023 Miroslav Rezanina - 7.2.0-11 +- kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch [bz#2169904] +- Resolves: bz#2169904 + ([SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022) + * Tue Feb 21 2023 Miroslav Rezanina - 7.2.0-10 - kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch [bz#2168209] - Resolves: bz#2168209 From de1852f0872d3dd9a328a6054dfbb3aa4341ffc3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Sun, 12 Mar 2023 23:37:54 -0400 Subject: [PATCH 186/195] * Sun Mar 12 2023 Miroslav Rezanina - 7.2.0-12 - kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch [bz#2155748] - kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch [bz#2155748] - kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch [bz#2155748] - kvm-qatomic-add-smp_mb__before-after_rmw.patch [bz#2175660] - kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch [bz#2175660] - kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch [bz#2175660] - kvm-edu-add-smp_mb__after_rmw.patch [bz#2175660] - kvm-aio-wait-switch-to-smp_mb__after_rmw.patch [bz#2175660] - kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch [bz#2175660] - kvm-physmem-add-missing-memory-barrier.patch [bz#2175660] - kvm-async-update-documentation-of-the-memory-barriers.patch [bz#2175660] - kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch [bz#2175660] - Resolves: bz#2155748 (qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed) - Resolves: bz#2175660 (Guest hangs when starting or rebooting) --- ...aio-wait-switch-to-smp_mb__after_rmw.patch | 50 +++ ...sage-of-barriers-in-the-polling-case.patch | 66 ++++ ...documentation-of-the-memory-barriers.patch | 111 ++++++ ...ent-dma_blk_cb-vs-dma_aio_cancel-rac.patch | 127 +++++++ kvm-edu-add-smp_mb__after_rmw.patch | 61 ++++ kvm-physmem-add-missing-memory-barrier.patch | 55 +++ ...qatomic-add-smp_mb__before-after_rmw.patch | 177 ++++++++++ ...coroutine-lock-add-smp_mb__after_rmw.patch | 75 ++++ ...posix-cleanup-fix-document-QemuEvent.patch | 146 ++++++++ ...win32-cleanup-fix-document-QemuEvent.patch | 162 +++++++++ ...otect-req-aiocb-with-AioContext-lock.patch | 176 ++++++++++ ...t-SCSI-devices-from-main-loop-thread.patch | 325 ++++++++++++++++++ qemu-kvm.spec | 44 ++- 13 files changed, 1574 insertions(+), 1 deletion(-) create mode 100644 kvm-aio-wait-switch-to-smp_mb__after_rmw.patch create mode 100644 kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch create mode 100644 kvm-async-update-documentation-of-the-memory-barriers.patch create mode 100644 kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch create mode 100644 kvm-edu-add-smp_mb__after_rmw.patch create mode 100644 kvm-physmem-add-missing-memory-barrier.patch create mode 100644 kvm-qatomic-add-smp_mb__before-after_rmw.patch create mode 100644 kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch create mode 100644 kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch create mode 100644 kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch create mode 100644 kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch create mode 100644 kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch diff --git a/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch b/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch new file mode 100644 index 0000000..ee7e7f9 --- /dev/null +++ b/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch @@ -0,0 +1,50 @@ +From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit b532526a07ef3b903ead2e055fe6cc87b41057a3 +Author: Paolo Bonzini +Date: Fri Mar 3 11:03:52 2023 +0100 + + aio-wait: switch to smp_mb__after_rmw() + + The barrier comes after an atomic increment, so it is enough to use + smp_mb__after_rmw(); this avoids a double barrier on x86 systems. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + include/block/aio-wait.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index dd9a7f6461..da13357bb8 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -85,7 +85,7 @@ extern AioWait global_aio_wait; + /* Increment wait_->num_waiters before evaluating cond. */ \ + qatomic_inc(&wait_->num_waiters); \ + /* Paired with smp_mb in aio_wait_kick(). */ \ +- smp_mb(); \ ++ smp_mb__after_rmw(); \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ +-- +2.39.1 + diff --git a/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch b/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch new file mode 100644 index 0000000..0e4a48d --- /dev/null +++ b/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch @@ -0,0 +1,66 @@ +From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 6229438cca037d42f44a96d38feb15cb102a444f +Author: Paolo Bonzini +Date: Mon Mar 6 10:43:52 2023 +0100 + + async: clarify usage of barriers in the polling case + + Explain that aio_context_notifier_poll() relies on + aio_notify_accept() to catch all the memory writes that were + done before ctx->notified was set to true. + + Reviewed-by: Richard Henderson + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/async.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 37d3e6036d..e0846baf93 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx) + qatomic_set(&ctx->notified, false); + + /* +- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb +- * in aio_notify. ++ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the ++ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs ++ * with smp_wmb() in aio_notify. + */ + smp_mb(); + } +@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque) + EventNotifier *e = opaque; + AioContext *ctx = container_of(e, AioContext, notifier); + ++ /* ++ * No need for load-acquire because we just want to kick the ++ * event loop. aio_notify_accept() takes care of synchronizing ++ * the event loop with the producers. ++ */ + return qatomic_read(&ctx->notified); + } + +-- +2.39.1 + diff --git a/kvm-async-update-documentation-of-the-memory-barriers.patch b/kvm-async-update-documentation-of-the-memory-barriers.patch new file mode 100644 index 0000000..cb92dc9 --- /dev/null +++ b/kvm-async-update-documentation-of-the-memory-barriers.patch @@ -0,0 +1,111 @@ +From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 11/12] async: update documentation of the memory barriers + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 8dd48650b43dfde4ebea34191ac267e474bcc29e +Author: Paolo Bonzini +Date: Mon Mar 6 10:15:06 2023 +0100 + + async: update documentation of the memory barriers + + Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)", + 2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll() + is happening when the bottom half is enqueued in the bh_list; not + when the flags are set. Update the documentation to match. + + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/async.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 63434ddae4..37d3e6036d 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) + unsigned old_flags; + + /* +- * The memory barrier implicit in qatomic_fetch_or makes sure that: +- * 1. idle & any writes needed by the callback are done before the +- * locations are read in the aio_bh_poll. +- * 2. ctx is loaded before the callback has a chance to execute and bh +- * could be freed. ++ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that ++ * insertion starts after BH_PENDING is set. + */ + old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags); ++ + if (!(old_flags & BH_PENDING)) { ++ /* ++ * At this point the bottom half becomes visible to aio_bh_poll(). ++ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in ++ * aio_bh_poll(), ensuring that: ++ * 1. any writes needed by the callback are visible from the callback ++ * after aio_bh_dequeue() returns bh. ++ * 2. ctx is loaded before the callback has a chance to execute and bh ++ * could be freed. ++ */ + QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); + } + +@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) + QSLIST_REMOVE_HEAD(head, next); + + /* +- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory +- * barrier ensures that the callback sees all writes done by the scheduling +- * thread. It also ensures that the scheduling thread sees the cleared +- * flag before bh->cb has run, and thus will call aio_notify again if +- * necessary. ++ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that ++ * the removal finishes before BH_PENDING is reset. + */ + *flags = qatomic_fetch_and(&bh->flags, + ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); +@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx) + BHListSlice *s; + int ret = 0; + ++ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ + QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); + QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); + +@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx) + void aio_notify(AioContext *ctx) + { + /* +- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in +- * aio_notify_accept. ++ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with ++ * smp_mb() in aio_notify_accept(). + */ + smp_wmb(); + qatomic_set(&ctx->notified, true); + + /* +- * Write ctx->notified before reading ctx->notify_me. Pairs +- * with smp_mb in aio_ctx_prepare or aio_poll. ++ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me. ++ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll. + */ + smp_mb(); + if (qatomic_read(&ctx->notify_me)) { +-- +2.39.1 + diff --git a/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch b/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch new file mode 100644 index 0000000..1a3c139 --- /dev/null +++ b/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch @@ -0,0 +1,127 @@ +From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:17 -0500 +Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel() + race + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread +RH-Bugzilla: 2155748 +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laszlo Ersek +RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm) + +dma_blk_cb() only takes the AioContext lock around ->io_func(). That +means the rest of dma_blk_cb() is not protected. In particular, the +DMAAIOCB field accesses happen outside the lock. + +There is a race when the main loop thread holds the AioContext lock and +invokes scsi_device_purge_requests() -> bdrv_aio_cancel() -> +dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb +field determines how cancellation proceeds. If dma_aio_cancel() sees +dbs->acb == NULL while dma_blk_cb() is still running, the request can be +completed twice (-ECANCELED and the actual return value). + +The following assertion can occur with virtio-scsi when an IOThread is +used: + + ../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed. + +Fix the race by holding the AioContext across dma_blk_cb(). Now +dma_aio_cancel() under the AioContext lock will not see +inconsistent/intermediate states. + +Cc: Paolo Bonzini +Reviewed-by: Eric Blake +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/scsi-disk.c | 4 +--- + softmmu/dma-helpers.c | 12 +++++++----- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 5327f93f4c..b12d8b0816 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -354,13 +354,12 @@ done: + scsi_req_unref(&r->req); + } + ++/* Called with AioContext lock held */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_dma_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) +diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c +index 7820fec54c..2463964805 100644 +--- a/softmmu/dma-helpers.c ++++ b/softmmu/dma-helpers.c +@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret) + static void dma_blk_cb(void *opaque, int ret) + { + DMAAIOCB *dbs = (DMAAIOCB *)opaque; ++ AioContext *ctx = dbs->ctx; + dma_addr_t cur_addr, cur_len; + void *mem; + + trace_dma_blk_cb(dbs, ret); + ++ aio_context_acquire(ctx); + dbs->acb = NULL; + dbs->offset += dbs->iov.size; + + if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { + dma_complete(dbs, ret); +- return; ++ goto out; + } + dma_blk_unmap(dbs); + +@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret) + + if (dbs->iov.size == 0) { + trace_dma_map_wait(dbs); +- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); ++ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); + cpu_register_map_client(dbs->bh); +- return; ++ goto out; + } + + if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { +@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret) + QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); + } + +- aio_context_acquire(dbs->ctx); + dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, + dma_blk_cb, dbs, dbs->io_func_opaque); +- aio_context_release(dbs->ctx); + assert(dbs->acb); ++out: ++ aio_context_release(ctx); + } + + static void dma_aio_cancel(BlockAIOCB *acb) +-- +2.39.1 + diff --git a/kvm-edu-add-smp_mb__after_rmw.patch b/kvm-edu-add-smp_mb__after_rmw.patch new file mode 100644 index 0000000..dd77648 --- /dev/null +++ b/kvm-edu-add-smp_mb__after_rmw.patch @@ -0,0 +1,61 @@ +From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 07/12] edu: add smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a +Author: Paolo Bonzini +Date: Thu Mar 2 11:16:13 2023 +0100 + + edu: add smp_mb__after_rmw() + + Ensure ordering between clearing the COMPUTING flag and checking + IRQFACT, and between setting the IRQFACT flag and checking + COMPUTING. This ensures that no wakeups are lost. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + hw/misc/edu.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/misc/edu.c b/hw/misc/edu.c +index e935c418d4..a1f8bc77e7 100644 +--- a/hw/misc/edu.c ++++ b/hw/misc/edu.c +@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val, + case 0x20: + if (val & EDU_STATUS_IRQFACT) { + qatomic_or(&edu->status, EDU_STATUS_IRQFACT); ++ /* Order check of the COMPUTING flag after setting IRQFACT. */ ++ smp_mb__after_rmw(); + } else { + qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT); + } +@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque) + qemu_mutex_unlock(&edu->thr_mutex); + qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING); + ++ /* Clear COMPUTING flag before checking IRQFACT. */ ++ smp_mb__after_rmw(); ++ + if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { + qemu_mutex_lock_iothread(); + edu_raise_irq(edu, FACT_IRQ); +-- +2.39.1 + diff --git a/kvm-physmem-add-missing-memory-barrier.patch b/kvm-physmem-add-missing-memory-barrier.patch new file mode 100644 index 0000000..3eafa78 --- /dev/null +++ b/kvm-physmem-add-missing-memory-barrier.patch @@ -0,0 +1,55 @@ +From 0dd4be411e35f00d006d89a15d9161f5d8783c1d Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 10/12] physmem: add missing memory barrier + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [7/9] ee4875cb8c564f0510e48b00a5d95c0e6ea6301b (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 33828ca11da08436e1b32f3e79dabce3061a0427 +Author: Paolo Bonzini +Date: Fri Mar 3 14:36:32 2023 +0100 + + physmem: add missing memory barrier + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + softmmu/physmem.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 1b606a3002..772c9896cd 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -3117,6 +3117,8 @@ void cpu_register_map_client(QEMUBH *bh) + qemu_mutex_lock(&map_client_list_lock); + client->bh = bh; + QLIST_INSERT_HEAD(&map_client_list, client, link); ++ /* Write map_client_list before reading in_use. */ ++ smp_mb(); + if (!qatomic_read(&bounce.in_use)) { + cpu_notify_map_clients_locked(); + } +@@ -3309,6 +3311,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + qemu_vfree(bounce.buffer); + bounce.buffer = NULL; + memory_region_unref(bounce.mr); ++ /* Clear in_use before reading map_client_list. */ + qatomic_mb_set(&bounce.in_use, false); + cpu_notify_map_clients(); + } +-- +2.39.1 + diff --git a/kvm-qatomic-add-smp_mb__before-after_rmw.patch b/kvm-qatomic-add-smp_mb__before-after_rmw.patch new file mode 100644 index 0000000..acc8c7d --- /dev/null +++ b/kvm-qatomic-add-smp_mb__before-after_rmw.patch @@ -0,0 +1,177 @@ +From 1fdc864f9ac927f3ea407f35f6771a4b2e8f509f Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 04/12] qatomic: add smp_mb__before/after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [1/9] e8d0b64670bff778d275b1fb477dcee0c109251a (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176 +Author: Paolo Bonzini +Date: Thu Mar 2 11:10:56 2023 +0100 + + qatomic: add smp_mb__before/after_rmw() + + On ARM, seqcst loads and stores (which QEMU does not use) are compiled + respectively as LDAR and STLR instructions. Even though LDAR is + also used for load-acquire operations, it also waits for all STLRs to + leave the store buffer. Thus, LDAR and STLR alone are load-acquire + and store-release operations, but LDAR also provides store-against-load + ordering as long as the previous store is a STLR. + + Compare this to ARMv7, where store-release is DMB+STR and load-acquire + is LDR+DMB, but an additional DMB is needed between store-seqcst and + load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides + load-acquire and store-release semantics and the two can be reordered. + + Likewise, on ARM sequentially consistent read-modify-write operations only + need to use LDAXR and STLXR respectively for the load and the store, while + on x86 they need to use the stronger LOCK prefix. + + In a strange twist of events, however, the _stronger_ semantics + of the ARM instructions can end up causing bugs on ARM, not on x86. + The problems occur when seqcst atomics are mixed with relaxed atomics. + + QEMU's atomics try to bridge the Linux API (that most of the developers + are familiar with) and the C11 API, and the two have a substantial + difference: + + - in Linux, strongly-ordered atomics such as atomic_add_return() affect + the global ordering of _all_ memory operations, including for example + READ_ONCE()/WRITE_ONCE() + + - in C11, sequentially consistent atomics (except for seq-cst fences) + only affect the ordering of sequentially consistent operations. + In particular, since relaxed loads are done with LDR on ARM, they are + not ordered against seqcst stores (which are done with STLR). + + QEMU implements high-level synchronization primitives with the idea that + the primitives contain the necessary memory barriers, and the callers can + use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses. + This is very much incompatible with the C11 view that seqcst accesses + are only ordered against other seqcst accesses, and requires using seqcst + fences as in the following example: + + qatomic_set(&y, 1); qatomic_set(&x, 1); + smp_mb(); smp_mb(); + ... qatomic_read(&x) ... ... qatomic_read(&y) ... + + When a qatomic_*() read-modify write operation is used instead of one + or both stores, developers that are more familiar with the Linux API may + be tempted to omit the smp_mb(), which will work on x86 but not on ARM. + + This nasty difference between Linux and C11 read-modify-write operations + has already caused issues in util/async.c and more are being found. + Provide something similar to Linux smp_mb__before/after_atomic(); this + has the double function of documenting clearly why there is a memory + barrier, and avoiding a double barrier on x86 and s390x systems. + + The new macro can already be put to use in qatomic_mb_set(). + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + docs/devel/atomics.rst | 26 +++++++++++++++++++++----- + include/qemu/atomic.h | 17 ++++++++++++++++- + 2 files changed, 37 insertions(+), 6 deletions(-) + +diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst +index 52baa0736d..10fbfc58bb 100644 +--- a/docs/devel/atomics.rst ++++ b/docs/devel/atomics.rst +@@ -25,7 +25,8 @@ provides macros that fall in three camps: + + - weak atomic access and manual memory barriers: ``qatomic_read()``, + ``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, +- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``; ++ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``, ++ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``; + + - sequentially consistent atomic access: everything else. + +@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU: + sequential consistency. + + - in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in +- the total ordering enforced by sequentially-consistent operations. ++ the ordering enforced by read-modify-write operations. + This is because QEMU uses the C11 memory model. The following example + is correct in Linux but not in QEMU: + +@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU: + because the read of ``y`` can be moved (by either the processor or the + compiler) before the write of ``x``. + +- Fixing this requires an ``smp_mb()`` memory barrier between the write +- of ``x`` and the read of ``y``. In the common case where only one thread +- writes ``x``, it is also possible to write it like this: ++ Fixing this requires a full memory barrier between the write of ``x`` and ++ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and ++ ``smp_mb__after_rmw()``; they act both as an optimization, ++ avoiding the memory barrier on processors where it is unnecessary, ++ and as a clarification of this corner case of the C11 memory model: ++ ++ +--------------------------------+ ++ | QEMU (correct) | ++ +================================+ ++ | :: | ++ | | ++ | a = qatomic_fetch_add(&x, 2);| ++ | smp_mb__after_rmw(); | ++ | b = qatomic_read(&y); | ++ +--------------------------------+ ++ ++ In the common case where only one thread writes ``x``, it is also possible ++ to write it like this: + + +--------------------------------+ + | QEMU (correct) | +diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h +index 874134fd19..f85834ee8b 100644 +--- a/include/qemu/atomic.h ++++ b/include/qemu/atomic.h +@@ -245,6 +245,20 @@ + #define smp_wmb() smp_mb_release() + #define smp_rmb() smp_mb_acquire() + ++/* ++ * SEQ_CST is weaker than the older __sync_* builtins and Linux ++ * kernel read-modify-write atomics. Provide a macro to obtain ++ * the same semantics. ++ */ ++#if !defined(QEMU_SANITIZE_THREAD) && \ ++ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) ++# define smp_mb__before_rmw() signal_barrier() ++# define smp_mb__after_rmw() signal_barrier() ++#else ++# define smp_mb__before_rmw() smp_mb() ++# define smp_mb__after_rmw() smp_mb() ++#endif ++ + /* qatomic_mb_read/set semantics map Java volatile variables. They are + * less expensive on some platforms (notably POWER) than fully + * sequentially consistent operations. +@@ -259,7 +273,8 @@ + #if !defined(QEMU_SANITIZE_THREAD) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) + /* This is more efficient than a store plus a fence. */ +-# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) ++# define qatomic_mb_set(ptr, i) \ ++ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) + #else + # define qatomic_mb_set(ptr, i) \ + ({ qatomic_store_release(ptr, i); smp_mb(); }) +-- +2.39.1 + diff --git a/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch b/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch new file mode 100644 index 0000000..86e94db --- /dev/null +++ b/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch @@ -0,0 +1,75 @@ +From 7a9907c65e3e2bbb0c119acdbbeb4381e7f1d902 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 09/12] qemu-coroutine-lock: add smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [6/9] 4b1723b1ad670ec4c85240390b4fc15ff361154f (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit e3a3b6ec8169eab2feb241b4982585001512cd55 +Author: Paolo Bonzini +Date: Fri Mar 3 10:52:59 2023 +0100 + + qemu-coroutine-lock: add smp_mb__after_rmw() + + mutex->from_push and mutex->handoff in qemu-coroutine-lock implement + the familiar pattern: + + write a write b + smp_mb() smp_mb() + read b read a + + The memory barrier is required by the C memory model even after a + SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC. + Add it and avoid the unclear qatomic_mb_read() operation. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-coroutine-lock.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c +index 45c6b57374..c5897bd963 100644 +--- a/util/qemu-coroutine-lock.c ++++ b/util/qemu-coroutine-lock.c +@@ -202,10 +202,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, + trace_qemu_co_mutex_lock_entry(mutex, self); + push_waiter(mutex, &w); + ++ /* ++ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set ++ * in qemu_co_mutex_unlock. ++ */ ++ smp_mb__after_rmw(); ++ + /* This is the "Responsibility Hand-Off" protocol; a lock() picks from + * a concurrent unlock() the responsibility of waking somebody up. + */ +- old_handoff = qatomic_mb_read(&mutex->handoff); ++ old_handoff = qatomic_read(&mutex->handoff); + if (old_handoff && + has_waiters(mutex) && + qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) { +@@ -304,6 +310,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) + } + + our_handoff = mutex->sequence; ++ /* Set handoff before checking for waiters. */ + qatomic_mb_set(&mutex->handoff, our_handoff); + if (!has_waiters(mutex)) { + /* The concurrent lock has not added itself yet, so it +-- +2.39.1 + diff --git a/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch b/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch new file mode 100644 index 0000000..25f30ff --- /dev/null +++ b/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch @@ -0,0 +1,146 @@ +From aa61e4c437d29a791ea09a01f7230231f1e53356 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 05/12] qemu-thread-posix: cleanup, fix, document QemuEvent + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [2/9] c3bdf75f884e137c667316aaac96bb4a0b9ec2d9 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593 +Author: Paolo Bonzini +Date: Thu Mar 2 11:19:52 2023 +0100 + + qemu-thread-posix: cleanup, fix, document QemuEvent + + QemuEvent is currently broken on ARM due to missing memory barriers + after qatomic_*(). Apart from adding the memory barrier, a closer look + reveals some unpaired memory barriers too. Document more clearly what + is going on. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------ + 1 file changed, 49 insertions(+), 20 deletions(-) + +diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c +index bae938c670..cc74f4ede0 100644 +--- a/util/qemu-thread-posix.c ++++ b/util/qemu-thread-posix.c +@@ -379,13 +379,21 @@ void qemu_event_destroy(QemuEvent *ev) + + void qemu_event_set(QemuEvent *ev) + { +- /* qemu_event_set has release semantics, but because it *loads* ++ assert(ev->initialized); ++ ++ /* ++ * Pairs with both qemu_event_reset() and qemu_event_wait(). ++ * ++ * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ +- assert(ev->initialized); + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { +- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { ++ int old = qatomic_xchg(&ev->value, EV_SET); ++ ++ /* Pairs with memory barrier in kernel futex_wait system call. */ ++ smp_mb__after_rmw(); ++ if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + qemu_futex_wake(ev, INT_MAX); + } +@@ -394,18 +402,19 @@ void qemu_event_set(QemuEvent *ev) + + void qemu_event_reset(QemuEvent *ev) + { +- unsigned value; +- + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); +- if (value == EV_SET) { +- /* +- * If there was a concurrent reset (or even reset+wait), +- * do nothing. Otherwise change EV_SET->EV_FREE. +- */ +- qatomic_or(&ev->value, EV_FREE); +- } ++ ++ /* ++ * If there was a concurrent reset (or even reset+wait), ++ * do nothing. Otherwise change EV_SET->EV_FREE. ++ */ ++ qatomic_or(&ev->value, EV_FREE); ++ ++ /* ++ * Order reset before checking the condition in the caller. ++ * Pairs with the first memory barrier in qemu_event_set(). ++ */ ++ smp_mb__after_rmw(); + } + + void qemu_event_wait(QemuEvent *ev) +@@ -413,20 +422,40 @@ void qemu_event_wait(QemuEvent *ev) + unsigned value; + + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); ++ ++ /* ++ * qemu_event_wait must synchronize with qemu_event_set even if it does ++ * not go down the slow path, so this load-acquire is needed that ++ * synchronizes with the first memory barrier in qemu_event_set(). ++ * ++ * If we do go down the slow path, there is no requirement at all: we ++ * might miss a qemu_event_set() here but ultimately the memory barrier in ++ * qemu_futex_wait() will ensure the check is done correctly. ++ */ ++ value = qatomic_load_acquire(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { + /* +- * Leave the event reset and tell qemu_event_set that there +- * are waiters. No need to retry, because there cannot be +- * a concurrent busy->free transition. After the CAS, the +- * event will be either set or busy. ++ * Leave the event reset and tell qemu_event_set that there are ++ * waiters. No need to retry, because there cannot be a concurrent ++ * busy->free transition. After the CAS, the event will be either ++ * set or busy. ++ * ++ * This cmpxchg doesn't have particular ordering requirements if it ++ * succeeds (moving the store earlier can only cause qemu_event_set() ++ * to issue _more_ wakeups), the failing case needs acquire semantics ++ * like the load above. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + return; + } + } ++ ++ /* ++ * This is the final check for a concurrent set, so it does need ++ * a smp_mb() pairing with the second barrier of qemu_event_set(). ++ * The barrier is inside the FUTEX_WAIT system call. ++ */ + qemu_futex_wait(ev, EV_BUSY); + } + } +-- +2.39.1 + diff --git a/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch b/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch new file mode 100644 index 0000000..631d541 --- /dev/null +++ b/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch @@ -0,0 +1,162 @@ +From 02347869410fe53d814487501fb586f7dc614375 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:24:36 -0500 +Subject: [PATCH 06/12] qemu-thread-win32: cleanup, fix, document QemuEvent + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2175660 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Eric Auger +RH-Commit: [3/9] d228e9d6a4a75dd1f0a23a6dceaf4fea23d69192 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 + +commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4 +Author: Paolo Bonzini +Date: Thu Mar 2 11:22:50 2023 +0100 + + qemu-thread-win32: cleanup, fix, document QemuEvent + + QemuEvent is currently broken on ARM due to missing memory barriers + after qatomic_*(). Apart from adding the memory barrier, a closer look + reveals some unpaired memory barriers that are not really needed and + complicated the functions unnecessarily. Also, it is relying on + a memory barrier in ResetEvent(); the barrier _ought_ to be there + but there is really no documentation about it, so make it explicit. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++------------- + 1 file changed, 56 insertions(+), 26 deletions(-) + +diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c +index 69db254ac7..a7fe3cc345 100644 +--- a/util/qemu-thread-win32.c ++++ b/util/qemu-thread-win32.c +@@ -272,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev) + void qemu_event_set(QemuEvent *ev) + { + assert(ev->initialized); +- /* qemu_event_set has release semantics, but because it *loads* ++ ++ /* ++ * Pairs with both qemu_event_reset() and qemu_event_wait(). ++ * ++ * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { +- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { ++ int old = qatomic_xchg(&ev->value, EV_SET); ++ ++ /* Pairs with memory barrier after ResetEvent. */ ++ smp_mb__after_rmw(); ++ if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + SetEvent(ev->event); + } +@@ -286,17 +294,19 @@ void qemu_event_set(QemuEvent *ev) + + void qemu_event_reset(QemuEvent *ev) + { +- unsigned value; +- + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); +- if (value == EV_SET) { +- /* If there was a concurrent reset (or even reset+wait), +- * do nothing. Otherwise change EV_SET->EV_FREE. +- */ +- qatomic_or(&ev->value, EV_FREE); +- } ++ ++ /* ++ * If there was a concurrent reset (or even reset+wait), ++ * do nothing. Otherwise change EV_SET->EV_FREE. ++ */ ++ qatomic_or(&ev->value, EV_FREE); ++ ++ /* ++ * Order reset before checking the condition in the caller. ++ * Pairs with the first memory barrier in qemu_event_set(). ++ */ ++ smp_mb__after_rmw(); + } + + void qemu_event_wait(QemuEvent *ev) +@@ -304,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev) + unsigned value; + + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); ++ ++ /* ++ * qemu_event_wait must synchronize with qemu_event_set even if it does ++ * not go down the slow path, so this load-acquire is needed that ++ * synchronizes with the first memory barrier in qemu_event_set(). ++ * ++ * If we do go down the slow path, there is no requirement at all: we ++ * might miss a qemu_event_set() here but ultimately the memory barrier in ++ * qemu_futex_wait() will ensure the check is done correctly. ++ */ ++ value = qatomic_load_acquire(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { +- /* qemu_event_set is not yet going to call SetEvent, but we are +- * going to do another check for EV_SET below when setting EV_BUSY. +- * At that point it is safe to call WaitForSingleObject. ++ /* ++ * Here the underlying kernel event is reset, but qemu_event_set is ++ * not yet going to call SetEvent. However, there will be another ++ * check for EV_SET below when setting EV_BUSY. At that point it ++ * is safe to call WaitForSingleObject. + */ + ResetEvent(ev->event); + +- /* Tell qemu_event_set that there are waiters. No need to retry +- * because there cannot be a concurrent busy->free transition. +- * After the CAS, the event will be either set or busy. ++ /* ++ * It is not clear whether ResetEvent provides this barrier; kernel ++ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! ++ */ ++ smp_mb(); ++ ++ /* ++ * Leave the event reset and tell qemu_event_set that there are ++ * waiters. No need to retry, because there cannot be a concurrent ++ * busy->free transition. After the CAS, the event will be either ++ * set or busy. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { +- value = EV_SET; +- } else { +- value = EV_BUSY; ++ return; + } + } +- if (value == EV_BUSY) { +- WaitForSingleObject(ev->event, INFINITE); +- } ++ ++ /* ++ * ev->value is now EV_BUSY. Since we didn't observe EV_SET, ++ * qemu_event_set() must observe EV_BUSY and call SetEvent(). ++ */ ++ WaitForSingleObject(ev->event, INFINITE); + } + } + +-- +2.39.1 + diff --git a/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch b/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch new file mode 100644 index 0000000..ca61286 --- /dev/null +++ b/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch @@ -0,0 +1,176 @@ +From 0a4f5bcc2a6f8ac31431e971c1dce9e6ab2191c2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:16 -0500 +Subject: [PATCH 01/12] scsi: protect req->aiocb with AioContext lock + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread +RH-Bugzilla: 2155748 +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laszlo Ersek +RH-Commit: [1/3] 61727297bd31dfe18220b61f1d265ced0649c60d (stefanha/centos-stream-qemu-kvm) + +If requests are being processed in the IOThread when a SCSIDevice is +unplugged, scsi_device_purge_requests() -> scsi_req_cancel_async() races +with I/O completion callbacks. Both threads load and store req->aiocb. +This can lead to assert(r->req.aiocb == NULL) failures and undefined +behavior. + +Protect r->req.aiocb with the AioContext lock to prevent the race. + +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7b7fc3d0102dafe8eb44802493036a526e921a71) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/scsi-disk.c | 23 ++++++++++++++++------- + hw/scsi/scsi-generic.c | 11 ++++++----- + 2 files changed, 22 insertions(+), 12 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index e493c28814..5327f93f4c 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -273,9 +273,11 @@ static void scsi_aio_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } +@@ -357,10 +359,11 @@ static void scsi_dma_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -393,10 +396,11 @@ static void scsi_read_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -446,10 +450,11 @@ static void scsi_do_read_cb(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -530,10 +535,11 @@ static void scsi_write_complete(void * opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -1737,10 +1743,11 @@ static void scsi_unmap_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (scsi_disk_req_check_error(r, ret, true)) { + scsi_req_unref(&r->req); + g_free(data); +@@ -1816,9 +1823,11 @@ static void scsi_write_same_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 92cce20a4d..ac9fa662b4 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret) + SCSIGenericReq *r = (SCSIGenericReq *)opaque; + SCSIDevice *s = r->req.dev; + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); + scsi_command_complete_noio(r, ret); + aio_context_release(blk_get_aio_context(s->conf.blk)); + } +@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret) + SCSIDevice *s = r->req.dev; + int len; + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); + goto done; +@@ -386,11 +387,11 @@ static void scsi_write_complete(void * opaque, int ret) + + trace_scsi_generic_write_complete(ret); + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); + goto done; +-- +2.39.1 + diff --git a/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch b/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch new file mode 100644 index 0000000..c951897 --- /dev/null +++ b/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch @@ -0,0 +1,325 @@ +From c64027b1ff9856031c01009f4b5c3560d92cc998 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:18 -0500 +Subject: [PATCH 03/12] virtio-scsi: reset SCSI devices from main loop thread + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread +RH-Bugzilla: 2155748 +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laszlo Ersek +RH-Commit: [3/3] 2a29cb9600709a799daadb4addb58a747ed2e3a3 (stefanha/centos-stream-qemu-kvm) + +When an IOThread is configured, the ctrl virtqueue is processed in the +IOThread. TMFs that reset SCSI devices are currently called directly +from the IOThread and trigger an assertion failure in blk_drain() from +the following call stack: + +virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset +-> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain + + ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed. + +The blk_drain() function is not designed to be called from an IOThread +because it needs the Big QEMU Lock (BQL). + +This patch defers TMFs that reset SCSI devices to a Bottom Half (BH) +that runs in the main loop thread under the BQL. This way it's safe to +call blk_drain() and the assertion failure is avoided. + +Introduce s->tmf_bh_list for tracking TMF requests that have been +deferred to the BH. When the BH runs it will grab the entire list and +process all requests. Care must be taken to clear the list when the +virtio-scsi device is reset or unrealized. Otherwise deferred TMF +requests could execute later and lead to use-after-free or other +undefined behavior. + +The s->resetting counter that's used by TMFs that reset SCSI devices is +accessed from multiple threads. This patch makes that explicit by using +atomic accessor functions. With this patch applied the counter is only +modified by the main loop thread under the BQL but can be read by any +thread. + +Reported-by: Qing Wang +Cc: Paolo Bonzini +Reviewed-by: Eric Blake +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-4-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 169 +++++++++++++++++++++++++------- + include/hw/virtio/virtio-scsi.h | 11 ++- + 2 files changed, 143 insertions(+), 37 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 6f6e2e32ba..7d27e4c2a1 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -42,13 +42,11 @@ typedef struct VirtIOSCSIReq { + QEMUSGList qsgl; + QEMUIOVector resp_iov; + +- union { +- /* Used for two-stage request submission */ +- QTAILQ_ENTRY(VirtIOSCSIReq) next; ++ /* Used for two-stage request submission and TMFs deferred to BH */ ++ QTAILQ_ENTRY(VirtIOSCSIReq) next; + +- /* Used for cancellation of request during TMFs */ +- int remaining; +- }; ++ /* Used for cancellation of request during TMFs */ ++ int remaining; + + SCSIRequest *sreq; + size_t resp_size; +@@ -293,6 +291,122 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d) + } + } + ++static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); ++ BusChild *kid; ++ int target; ++ ++ switch (req->req.tmf.subtype) { ++ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: ++ if (!d) { ++ req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; ++ goto out; ++ } ++ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { ++ req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN; ++ goto out; ++ } ++ qatomic_inc(&s->resetting); ++ device_cold_reset(&d->qdev); ++ qatomic_dec(&s->resetting); ++ break; ++ ++ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: ++ target = req->req.tmf.lun[1]; ++ qatomic_inc(&s->resetting); ++ ++ rcu_read_lock(); ++ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { ++ SCSIDevice *d1 = SCSI_DEVICE(kid->child); ++ if (d1->channel == 0 && d1->id == target) { ++ device_cold_reset(&d1->qdev); ++ } ++ } ++ rcu_read_unlock(); ++ ++ qatomic_dec(&s->resetting); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++out: ++ object_unref(OBJECT(d)); ++ ++ virtio_scsi_acquire(s); ++ virtio_scsi_complete_req(req); ++ virtio_scsi_release(s); ++} ++ ++/* Some TMFs must be processed from the main loop thread */ ++static void virtio_scsi_do_tmf_bh(void *opaque) ++{ ++ VirtIOSCSI *s = opaque; ++ QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); ++ VirtIOSCSIReq *req; ++ VirtIOSCSIReq *tmp; ++ ++ GLOBAL_STATE_CODE(); ++ ++ virtio_scsi_acquire(s); ++ ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ QTAILQ_INSERT_TAIL(&reqs, req, next); ++ } ++ ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; ++ ++ virtio_scsi_release(s); ++ ++ QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { ++ QTAILQ_REMOVE(&reqs, req, next); ++ virtio_scsi_do_one_tmf_bh(req); ++ } ++} ++ ++static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) ++{ ++ VirtIOSCSIReq *req; ++ VirtIOSCSIReq *tmp; ++ ++ GLOBAL_STATE_CODE(); ++ ++ virtio_scsi_acquire(s); ++ ++ if (s->tmf_bh) { ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; ++ } ++ ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ ++ /* SAM-6 6.3.2 Hard reset */ ++ req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; ++ virtio_scsi_complete_req(req); ++ } ++ ++ virtio_scsi_release(s); ++} ++ ++static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ ++ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); ++ ++ if (!s->tmf_bh) { ++ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); ++ qemu_bh_schedule(s->tmf_bh); ++ } ++} ++ + /* Return 0 if the request is ready to be completed and return to guest; + * -EINPROGRESS if the request is submitted and will be completed later, in the + * case of async cancellation. */ +@@ -300,8 +414,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + { + SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); + SCSIRequest *r, *next; +- BusChild *kid; +- int target; + int ret = 0; + + virtio_scsi_ctx_check(s, d); +@@ -358,15 +470,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + break; + + case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: +- if (!d) { +- goto fail; +- } +- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { +- goto incorrect_lun; +- } +- s->resetting++; +- device_cold_reset(&d->qdev); +- s->resetting--; ++ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: ++ virtio_scsi_defer_tmf_to_bh(req); ++ ret = -EINPROGRESS; + break; + + case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: +@@ -409,22 +515,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + } + break; + +- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: +- target = req->req.tmf.lun[1]; +- s->resetting++; +- +- rcu_read_lock(); +- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { +- SCSIDevice *d1 = SCSI_DEVICE(kid->child); +- if (d1->channel == 0 && d1->id == target) { +- device_cold_reset(&d1->qdev); +- } +- } +- rcu_read_unlock(); +- +- s->resetting--; +- break; +- + case VIRTIO_SCSI_T_TMF_CLEAR_ACA: + default: + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; +@@ -654,7 +744,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r) + if (!req) { + return; + } +- if (req->dev->resetting) { ++ if (qatomic_read(&req->dev->resetting)) { + req->resp.cmd.response = VIRTIO_SCSI_S_RESET; + } else { + req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED; +@@ -830,9 +920,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev) + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + + assert(!s->dataplane_started); +- s->resetting++; ++ ++ virtio_scsi_reset_tmf_bh(s); ++ ++ qatomic_inc(&s->resetting); + bus_cold_reset(BUS(&s->bus)); +- s->resetting--; ++ qatomic_dec(&s->resetting); + + vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; + vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; +@@ -1052,6 +1145,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) + VirtIOSCSI *s = VIRTIO_SCSI(dev); + Error *err = NULL; + ++ QTAILQ_INIT(&s->tmf_bh_list); ++ + virtio_scsi_common_realize(dev, + virtio_scsi_handle_ctrl, + virtio_scsi_handle_event, +@@ -1089,6 +1184,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) + { + VirtIOSCSI *s = VIRTIO_SCSI(dev); + ++ virtio_scsi_reset_tmf_bh(s); ++ + qbus_set_hotplug_handler(BUS(&s->bus), NULL); + virtio_scsi_common_unrealize(dev); + } +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index a36aad9c86..1c1cd77d6e 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -75,13 +75,22 @@ struct VirtIOSCSICommon { + VirtQueue **cmd_vqs; + }; + ++struct VirtIOSCSIReq; ++ + struct VirtIOSCSI { + VirtIOSCSICommon parent_obj; + + SCSIBus bus; +- int resetting; ++ int resetting; /* written from main loop thread, read from any thread */ + bool events_dropped; + ++ /* ++ * TMFs deferred to main loop BH. These fields are protected by ++ * virtio_scsi_acquire(). ++ */ ++ QEMUBH *tmf_bh; ++ QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; ++ + /* Fields for dataplane below */ + AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ + +-- +2.39.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 1162fe0..5061083 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 11%{?rcrel}%{?dist}%{?cc_suffix} +Release: 12%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -376,6 +376,30 @@ Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch Patch114: kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch # For bz#2169904 - [SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022 Patch115: kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch +# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed +Patch116: kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch +# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed +Patch117: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch +# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed +Patch118: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch119: kvm-qatomic-add-smp_mb__before-after_rmw.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch120: kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch121: kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch122: kvm-edu-add-smp_mb__after_rmw.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch123: kvm-aio-wait-switch-to-smp_mb__after_rmw.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch124: kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch125: kvm-physmem-add-missing-memory-barrier.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch126: kvm-async-update-documentation-of-the-memory-barriers.patch +# For bz#2175660 - Guest hangs when starting or rebooting +Patch127: kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch %if %{have_clang} BuildRequires: clang @@ -1406,6 +1430,24 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Sun Mar 12 2023 Miroslav Rezanina - 7.2.0-12 +- kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch [bz#2155748] +- kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch [bz#2155748] +- kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch [bz#2155748] +- kvm-qatomic-add-smp_mb__before-after_rmw.patch [bz#2175660] +- kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch [bz#2175660] +- kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch [bz#2175660] +- kvm-edu-add-smp_mb__after_rmw.patch [bz#2175660] +- kvm-aio-wait-switch-to-smp_mb__after_rmw.patch [bz#2175660] +- kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch [bz#2175660] +- kvm-physmem-add-missing-memory-barrier.patch [bz#2175660] +- kvm-async-update-documentation-of-the-memory-barriers.patch [bz#2175660] +- kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch [bz#2175660] +- Resolves: bz#2155748 + (qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed) +- Resolves: bz#2175660 + (Guest hangs when starting or rebooting) + * Mon Mar 06 2023 Miroslav Rezanina - 7.2.0-11 - kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch [bz#2169904] - Resolves: bz#2169904 From 9092ead740dbb0a0280d8fdd417a40f8e573b3e3 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 20 Mar 2023 01:59:30 -0400 Subject: [PATCH 187/195] * Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-13 - kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch [bz#2173590] - kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch [bz#2173590] - kvm-target-i386-Fix-BEXTR-instruction.patch [bz#2173590] - kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch [bz#2173590] - kvm-target-i386-fix-ADOX-followed-by-ADCX.patch [bz#2173590] - kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch [bz#2173590] - kvm-target-i386-Fix-BZHI-instruction.patch [bz#2173590] - kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch [bz#2156876] - Resolves: bz#2173590 (bugs in emulation of BMI instructions (for libguestfs without KVM)) - Resolves: bz#2156876 ([virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22)) --- ...-fail-DEVIOTLB_UNMAP-without-dt-mode.patch | 64 ++++ ...-32-bit-AD-CO-X-insns-in-64-bit-mode.patch | 144 +++++++++ kvm-target-i386-Fix-BEXTR-instruction.patch | 110 +++++++ kvm-target-i386-Fix-BZHI-instruction.patch | 77 +++++ ...i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch | 60 ++++ ...arget-i386-fix-ADOX-followed-by-ADCX.patch | 205 ++++++++++++ ...operand-size-of-unary-SSE-operations.patch | 77 +++++ ...Introduce-and-use-reg_t-consistently.patch | 299 ++++++++++++++++++ qemu-kvm.spec | 32 +- 9 files changed, 1067 insertions(+), 1 deletion(-) create mode 100644 kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch create mode 100644 kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch create mode 100644 kvm-target-i386-Fix-BEXTR-instruction.patch create mode 100644 kvm-target-i386-Fix-BZHI-instruction.patch create mode 100644 kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch create mode 100644 kvm-target-i386-fix-ADOX-followed-by-ADCX.patch create mode 100644 kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch create mode 100644 kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch diff --git a/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch b/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch new file mode 100644 index 0000000..0f321e4 --- /dev/null +++ b/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch @@ -0,0 +1,64 @@ +From cadcc1c6a001622d971c86d44925516905e3d104 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Thu, 23 Feb 2023 14:59:21 +0800 +Subject: [PATCH 8/8] intel-iommu: fail DEVIOTLB_UNMAP without dt mode + +RH-Author: Laurent Vivier +RH-MergeRequest: 157: intel-iommu: fail DEVIOTLB_UNMAP without dt mode +RH-Bugzilla: 2156876 +RH-Acked-by: Eric Auger +RH-Acked-by: Peter Xu +RH-Acked-by: MST +RH-Commit: [1/1] eb9dbae6140ef4ba10d90b9e66abd75540f6892d (lvivier/qemu-kvm-centos) + +Without dt mode, device IOTLB notifier won't work since guest won't +send device IOTLB invalidation descriptor in this case. Let's fail +early instead of misbehaving silently. + +Reviewed-by: Laurent Vivier +Tested-by: Laurent Vivier +Tested-by: Viktor Prutyanov +Buglink: https://bugzilla.redhat.com/2156876 +Signed-off-by: Jason Wang +Message-Id: <20230223065924.42503-3-jasowang@redhat.com> +Reviewed-by: Peter Xu +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 09adb0e021207b60a0c51a68939b4539d98d3ef3) + +Conflict in hw/i386/intel_iommu.c because of missing commit: + + 4ce27463ccce ("intel-iommu: fail MAP notifier without caching mode") +--- + hw/i386/intel_iommu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index a08ee85edf..d2983f40d3 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, + { + VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); + IntelIOMMUState *s = vtd_as->iommu_state; ++ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + + /* TODO: add support for VFIO and vhost users */ + if (s->snoop_control) { +@@ -3186,6 +3187,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, + "Snoop Control with vhost or VFIO is not supported"); + return -ENOTSUP; + } ++ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) { ++ error_setg_errno(errp, ENOTSUP, ++ "device %02x.%02x.%x requires device IOTLB mode", ++ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn), ++ PCI_FUNC(vtd_as->devfn)); ++ return -ENOTSUP; ++ } + + /* Update per-address-space notifier flags */ + vtd_as->notifier_flags = new; +-- +2.39.1 + diff --git a/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch b/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch new file mode 100644 index 0000000..52e73e7 --- /dev/null +++ b/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch @@ -0,0 +1,144 @@ +From e419493e6ec188461aa6f06c1b1cdc8a698859df Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 15:21:03 -1000 +Subject: [PATCH 6/8] target/i386: Fix 32-bit AD[CO]X insns in 64-bit mode +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [6/7] 0fa4d3858319d4f877a5b3f31776121a72e2c57a (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +Failure to truncate the inputs results in garbage for the carry-out. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1373 +Signed-off-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20230115012103.3131796-1-richard.henderson@linaro.org> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6fbef9426bac7184b5d5887589d8386e732865eb) +--- + target/i386/tcg/emit.c.inc | 2 + + tests/tcg/x86_64/Makefile.target | 3 ++ + tests/tcg/x86_64/adox.c | 69 ++++++++++++++++++++++++++++++++ + 3 files changed, 74 insertions(+) + create mode 100644 tests/tcg/x86_64/adox.c + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index 0d7c6e80ae..e61ae9a2e9 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1037,6 +1037,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) + #ifdef TARGET_X86_64 + case MO_32: + /* If TL is 64-bit just do everything in 64-bit arithmetic. */ ++ tcg_gen_ext32u_tl(s->T0, s->T0); ++ tcg_gen_ext32u_tl(s->T1, s->T1); + tcg_gen_add_i64(s->T0, s->T0, s->T1); + tcg_gen_add_i64(s->T0, s->T0, carry_in); + tcg_gen_shri_i64(carry_out, s->T0, 32); +diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target +index 4eac78293f..e64aab1b81 100644 +--- a/tests/tcg/x86_64/Makefile.target ++++ b/tests/tcg/x86_64/Makefile.target +@@ -12,11 +12,14 @@ ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET)) + X86_64_TESTS += vsyscall + X86_64_TESTS += noexec + X86_64_TESTS += cmpxchg ++X86_64_TESTS += adox + TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 + else + TESTS=$(MULTIARCH_TESTS) + endif + ++adox: CFLAGS=-O2 ++ + run-test-i386-ssse3: QEMU_OPTS += -cpu max + run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max + +diff --git a/tests/tcg/x86_64/adox.c b/tests/tcg/x86_64/adox.c +new file mode 100644 +index 0000000000..36be644c8b +--- /dev/null ++++ b/tests/tcg/x86_64/adox.c +@@ -0,0 +1,69 @@ ++/* See if ADOX give expected results */ ++ ++#include ++#include ++#include ++ ++static uint64_t adoxq(bool *c_out, uint64_t a, uint64_t b, bool c) ++{ ++ asm ("addl $0x7fffffff, %k1\n\t" ++ "adoxq %2, %0\n\t" ++ "seto %b1" ++ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); ++ *c_out = c; ++ return a; ++} ++ ++static uint64_t adoxl(bool *c_out, uint64_t a, uint64_t b, bool c) ++{ ++ asm ("addl $0x7fffffff, %k1\n\t" ++ "adoxl %k2, %k0\n\t" ++ "seto %b1" ++ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); ++ *c_out = c; ++ return a; ++} ++ ++int main() ++{ ++ uint64_t r; ++ bool c; ++ ++ r = adoxq(&c, 0, 0, 0); ++ assert(r == 0); ++ assert(c == 0); ++ ++ r = adoxl(&c, 0, 0, 0); ++ assert(r == 0); ++ assert(c == 0); ++ ++ r = adoxl(&c, 0x100000000, 0, 0); ++ assert(r == 0); ++ assert(c == 0); ++ ++ r = adoxq(&c, 0, 0, 1); ++ assert(r == 1); ++ assert(c == 0); ++ ++ r = adoxl(&c, 0, 0, 1); ++ assert(r == 1); ++ assert(c == 0); ++ ++ r = adoxq(&c, -1, -1, 0); ++ assert(r == -2); ++ assert(c == 1); ++ ++ r = adoxl(&c, -1, -1, 0); ++ assert(r == 0xfffffffe); ++ assert(c == 1); ++ ++ r = adoxq(&c, -1, -1, 1); ++ assert(r == -1); ++ assert(c == 1); ++ ++ r = adoxl(&c, -1, -1, 1); ++ assert(r == 0xffffffff); ++ assert(c == 1); ++ ++ return 0; ++} +-- +2.39.1 + diff --git a/kvm-target-i386-Fix-BEXTR-instruction.patch b/kvm-target-i386-Fix-BEXTR-instruction.patch new file mode 100644 index 0000000..0c28c7e --- /dev/null +++ b/kvm-target-i386-Fix-BEXTR-instruction.patch @@ -0,0 +1,110 @@ +From a019c203f0148e5fbb20e102a17453806f5296b6 Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 13:05:42 -1000 +Subject: [PATCH 3/8] target/i386: Fix BEXTR instruction + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [3/7] bd1e3b26c72d7152b44be2d34308fd40dc106424 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +There were two problems here: not limiting the input to operand bits, +and not correctly handling large extraction length. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1372 +Signed-off-by: Richard Henderson +Message-Id: <20230114230542.3116013-3-richard.henderson@linaro.org> +Cc: qemu-stable@nongnu.org +Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) +Signed-off-by: Paolo Bonzini +(cherry picked from commit b14c0098975264ed03144f145bca0179a6763a07) +--- + target/i386/tcg/emit.c.inc | 22 +++++++++++----------- + tests/tcg/i386/test-i386-bmi2.c | 12 ++++++++++++ + 2 files changed, 23 insertions(+), 11 deletions(-) + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index 7037ff91c6..99f6ba6e19 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1078,30 +1078,30 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + { + MemOp ot = decode->op[0].ot; +- TCGv bound, zero; ++ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); ++ TCGv zero = tcg_constant_tl(0); ++ TCGv mone = tcg_constant_tl(-1); + + /* + * Extract START, and shift the operand. + * Shifts larger than operand size get zeros. + */ + tcg_gen_ext8u_tl(s->A0, s->T1); ++ if (TARGET_LONG_BITS == 64 && ot == MO_32) { ++ tcg_gen_ext32u_tl(s->T0, s->T0); ++ } + tcg_gen_shr_tl(s->T0, s->T0, s->A0); + +- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); +- zero = tcg_constant_tl(0); + tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); + + /* +- * Extract the LEN into a mask. Lengths larger than +- * operand size get all ones. ++ * Extract the LEN into an inverse mask. Lengths larger than ++ * operand size get all zeros, length 0 gets all ones. + */ + tcg_gen_extract_tl(s->A0, s->T1, 8, 8); +- tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound); +- +- tcg_gen_movi_tl(s->T1, 1); +- tcg_gen_shl_tl(s->T1, s->T1, s->A0); +- tcg_gen_subi_tl(s->T1, s->T1, 1); +- tcg_gen_and_tl(s->T0, s->T0, s->T1); ++ tcg_gen_shl_tl(s->T1, mone, s->A0); ++ tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); ++ tcg_gen_andc_tl(s->T0, s->T0, s->T1); + + gen_op_update1_cc(s); + set_cc_op(s, CC_OP_LOGICB + ot); +diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c +index 3c3ef85513..982d4abda4 100644 +--- a/tests/tcg/i386/test-i386-bmi2.c ++++ b/tests/tcg/i386/test-i386-bmi2.c +@@ -99,6 +99,9 @@ int main(int argc, char *argv[]) { + result = bextrq(mask, 0x10f8); + assert(result == 0); + ++ result = bextrq(0xfedcba9876543210ull, 0x7f00); ++ assert(result == 0xfedcba9876543210ull); ++ + result = blsiq(0x30); + assert(result == 0x10); + +@@ -164,6 +167,15 @@ int main(int argc, char *argv[]) { + result = bextrl(mask, 0x1038); + assert(result == 0); + ++ result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018); ++ assert(result == 0x5a); ++ ++ result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00); ++ assert(result == 0x76543210u); ++ ++ result = bextrl(-1, 0); ++ assert(result == 0); ++ + result = blsil(0xffff); + assert(result == 1); + +-- +2.39.1 + diff --git a/kvm-target-i386-Fix-BZHI-instruction.patch b/kvm-target-i386-Fix-BZHI-instruction.patch new file mode 100644 index 0000000..bcf79f4 --- /dev/null +++ b/kvm-target-i386-Fix-BZHI-instruction.patch @@ -0,0 +1,77 @@ +From d49e5d193dfccf6f5cfa98ccce5bd491478d563d Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 13:32:06 -1000 +Subject: [PATCH 7/8] target/i386: Fix BZHI instruction + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [7/7] ad6b343c09c0304ac32cc68670c49d1fc12d8cf8 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +We did not correctly handle N >= operand size. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1374 +Signed-off-by: Richard Henderson +Message-Id: <20230114233206.3118472-1-richard.henderson@linaro.org> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9ad2ba6e8e7fc195d0dd0b76ab38bd2fceb1bdd4) +--- + target/i386/tcg/emit.c.inc | 14 +++++++------- + tests/tcg/i386/test-i386-bmi2.c | 3 +++ + 2 files changed, 10 insertions(+), 7 deletions(-) + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index e61ae9a2e9..0d01e13002 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1147,20 +1147,20 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + { + MemOp ot = decode->op[0].ot; +- TCGv bound; ++ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); ++ TCGv zero = tcg_constant_tl(0); ++ TCGv mone = tcg_constant_tl(-1); + +- tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]); +- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); ++ tcg_gen_ext8u_tl(s->T1, s->T1); + + /* + * Note that since we're using BMILG (in order to get O + * cleared) we need to store the inverse into C. + */ +- tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound); +- tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1); ++ tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound); + +- tcg_gen_movi_tl(s->A0, -1); +- tcg_gen_shl_tl(s->A0, s->A0, s->T1); ++ tcg_gen_shl_tl(s->A0, mone, s->T1); ++ tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); + tcg_gen_andc_tl(s->T0, s->T0, s->A0); + + gen_op_update1_cc(s); +diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c +index 982d4abda4..0244df7987 100644 +--- a/tests/tcg/i386/test-i386-bmi2.c ++++ b/tests/tcg/i386/test-i386-bmi2.c +@@ -123,6 +123,9 @@ int main(int argc, char *argv[]) { + result = bzhiq(mask, 0x1f); + assert(result == (mask & ~(-1 << 30))); + ++ result = bzhiq(mask, 0x40); ++ assert(result == mask); ++ + result = rorxq(0x2132435465768798, 8); + assert(result == 0x9821324354657687); + +-- +2.39.1 + diff --git a/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch b/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch new file mode 100644 index 0000000..7f3051f --- /dev/null +++ b/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch @@ -0,0 +1,60 @@ +From cb2b591e1677db2837810eaedac534a7ff3a7b1c Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 08:06:01 -1000 +Subject: [PATCH 4/8] target/i386: Fix C flag for BLSI, BLSMSK, BLSR + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [4/7] 173e23c492c830da6c5a4be0cfc20a69ac655b59 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +We forgot to set cc_src, which is used for computing C. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1370 +Signed-off-by: Richard Henderson +Message-Id: <20230114180601.2993644-1-richard.henderson@linaro.org> +Cc: qemu-stable@nongnu.org +Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) +Signed-off-by: Paolo Bonzini +(cherry picked from commit 99282098dc74c2055bde5652bde6cf0067d0c370) +--- + target/i386/tcg/emit.c.inc | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index 99f6ba6e19..4d7702c106 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1111,6 +1111,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + { + MemOp ot = decode->op[0].ot; + ++ tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_neg_tl(s->T1, s->T0); + tcg_gen_and_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); +@@ -1121,6 +1122,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode + { + MemOp ot = decode->op[0].ot; + ++ tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_xor_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); +@@ -1131,6 +1133,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) + { + MemOp ot = decode->op[0].ot; + ++ tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_and_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); +-- +2.39.1 + diff --git a/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch b/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch new file mode 100644 index 0000000..72ae8ee --- /dev/null +++ b/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch @@ -0,0 +1,205 @@ +From 54d3e58aabf9716f9a07aeb7044d7b7997e28123 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 31 Jan 2023 09:48:03 +0100 +Subject: [PATCH 5/8] target/i386: fix ADOX followed by ADCX + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [5/7] 64dbe4e602f08e4a88fdeacee5a8993ca4383563 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +When ADCX is followed by ADOX or vice versa, the second instruction's +carry comes from EFLAGS and the condition codes use the CC_OP_ADCOX +operation. Retrieving the carry from EFLAGS is handled by this bit +of gen_ADCOX: + + tcg_gen_extract_tl(carry_in, cpu_cc_src, + ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1); + +Unfortunately, in this case cc_op has been overwritten by the previous +"if" statement to CC_OP_ADCOX. This works by chance when the first +instruction is ADCX; however, if the first instruction is ADOX, +ADCX will incorrectly take its carry from OF instead of CF. + +Fix by moving the computation of the new cc_op at the end of the function. +The included exhaustive test case fails without this patch and passes +afterwards. + +Because ADCX/ADOX need not be invoked through the VEX prefix, this +regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement +0x0f 0x38, add AVX", 2022-10-18). However, the mistake happened a +little earlier, when BMI instructions were rewritten using the new +decoder framework. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471 +Reported-by: Paul Jolly +Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry picked from commit 60c7dd22e1383754d5f150bc9f7c2785c662a7b6) +--- + target/i386/tcg/emit.c.inc | 20 +++++---- + tests/tcg/i386/Makefile.target | 6 ++- + tests/tcg/i386/test-i386-adcox.c | 75 ++++++++++++++++++++++++++++++++ + 3 files changed, 91 insertions(+), 10 deletions(-) + create mode 100644 tests/tcg/i386/test-i386-adcox.c + +diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc +index 4d7702c106..0d7c6e80ae 100644 +--- a/target/i386/tcg/emit.c.inc ++++ b/target/i386/tcg/emit.c.inc +@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq) + + static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) + { ++ int opposite_cc_op; + TCGv carry_in = NULL; + TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); + TCGv zero; +@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) + if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) { + /* Re-use the carry-out from a previous round. */ + carry_in = carry_out; +- cc_op = s->cc_op; +- } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) { +- /* Merge with the carry-out from the opposite instruction. */ +- cc_op = CC_OP_ADCOX; +- } +- +- /* If we don't have a carry-in, get it out of EFLAGS. */ +- if (!carry_in) { ++ } else { ++ /* We don't have a carry-in, get it out of EFLAGS. */ + if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { + gen_compute_eflags(s); + } +@@ -1053,7 +1048,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) + tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); + break; + } +- set_cc_op(s, cc_op); ++ ++ opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX; ++ if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) { ++ /* Merge with the carry-out from the opposite instruction. */ ++ set_cc_op(s, CC_OP_ADCOX); ++ } else { ++ set_cc_op(s, cc_op); ++ } + } + + static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target +index 81831cafbc..bafd8c2180 100644 +--- a/tests/tcg/i386/Makefile.target ++++ b/tests/tcg/i386/Makefile.target +@@ -14,7 +14,7 @@ config-cc.mak: Makefile + I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c)) + ALL_X86_TESTS=$(I386_SRCS:.c=) + SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx +-X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) ++X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) + + test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse + run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max +@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2 + run-test-i386-bmi2: QEMU_OPTS += -cpu max + run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max + ++test-i386-adcox: CFLAGS=-O2 ++run-test-i386-adcox: QEMU_OPTS += -cpu max ++run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max ++ + # + # hello-i386 is a barebones app + # +diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c +new file mode 100644 +index 0000000000..16169efff8 +--- /dev/null ++++ b/tests/tcg/i386/test-i386-adcox.c +@@ -0,0 +1,75 @@ ++/* See if various BMI2 instructions give expected results */ ++#include ++#include ++#include ++ ++#define CC_C 1 ++#define CC_O (1 << 11) ++ ++#ifdef __x86_64__ ++#define REG uint64_t ++#else ++#define REG uint32_t ++#endif ++ ++void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) ++{ ++ REG flags; ++ REG out_adcx, out_adox; ++ ++ asm("pushf; pop %0" : "=r"(flags)); ++ flags &= ~(CC_C | CC_O); ++ flags |= (in_c ? CC_C : 0); ++ flags |= (in_o ? CC_O : 0); ++ ++ out_adcx = adcx_operand; ++ out_adox = adox_operand; ++ asm("push %0; popf;" ++ "adox %3, %2;" ++ "adcx %3, %1;" ++ "pushf; pop %0" ++ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) ++ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); ++ ++ assert(out_adcx == in_c + adcx_operand - 1); ++ assert(out_adox == in_o + adox_operand - 1); ++ assert(!!(flags & CC_C) == (in_c || adcx_operand)); ++ assert(!!(flags & CC_O) == (in_o || adox_operand)); ++} ++ ++void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) ++{ ++ REG flags; ++ REG out_adcx, out_adox; ++ ++ asm("pushf; pop %0" : "=r"(flags)); ++ flags &= ~(CC_C | CC_O); ++ flags |= (in_c ? CC_C : 0); ++ flags |= (in_o ? CC_O : 0); ++ ++ out_adcx = adcx_operand; ++ out_adox = adox_operand; ++ asm("push %0; popf;" ++ "adcx %3, %1;" ++ "adox %3, %2;" ++ "pushf; pop %0" ++ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) ++ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); ++ ++ assert(out_adcx == in_c + adcx_operand - 1); ++ assert(out_adox == in_o + adox_operand - 1); ++ assert(!!(flags & CC_C) == (in_c || adcx_operand)); ++ assert(!!(flags & CC_O) == (in_o || adox_operand)); ++} ++ ++int main(int argc, char *argv[]) { ++ /* try all combinations of input CF, input OF, CF from op1+op2, OF from op2+op1 */ ++ int i; ++ for (i = 0; i <= 15; i++) { ++ printf("%d\n", i); ++ test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); ++ test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); ++ } ++ return 0; ++} ++ +-- +2.39.1 + diff --git a/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch b/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch new file mode 100644 index 0000000..81a0003 --- /dev/null +++ b/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch @@ -0,0 +1,77 @@ +From f4ddcdd2395e0944c20f6683c66068ed0ac7d757 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Sat, 7 Jan 2023 18:14:20 +0100 +Subject: [PATCH 1/8] target/i386: fix operand size of unary SSE operations + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [1/7] 7041f3e30e19add6bd8e5355d8bebf92390a5c2e (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +VRCPSS, VRSQRTSS and VCVTSx2Sx have a 32-bit or 64-bit memory operand, +which is represented in the decoding tables by X86_VEX_REPScalar. Add it +to the tables, and make validate_vex() handle the case of an instruction +that is in exception type 4 without the REP prefix and exception type 5 +with it; this is the cas of VRCP and VRSQRT. + +Reported-by: yongwoo +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1377 +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d304620ec6c95f31db17acc132f42f243369299) +--- + target/i386/tcg/decode-new.c.inc | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc +index 80c579164f..d5fd8d965c 100644 +--- a/target/i386/tcg/decode-new.c.inc ++++ b/target/i386/tcg/decode-new.c.inc +@@ -105,6 +105,7 @@ + #define vex3 .vex_class = 3, + #define vex4 .vex_class = 4, + #define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned, ++#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar, + #define vex5 .vex_class = 5, + #define vex6 .vex_class = 6, + #define vex7 .vex_class = 7, +@@ -839,8 +840,8 @@ static const X86OpEntry opcodes_0F[256] = { + + [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66), + [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), +- [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), +- [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), ++ [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), ++ [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), + [0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */ + [0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */ + [0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */ +@@ -878,7 +879,7 @@ static const X86OpEntry opcodes_0F[256] = { + + [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), +- [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex3 p_00_66_f3_f2), ++ [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x5b] = X86_OP_GROUP0(0F5B), + [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), +@@ -1447,9 +1448,9 @@ static bool validate_vex(DisasContext *s, X86DecodedInsn *decode) + * Instructions which differ between 00/66 and F2/F3 in the + * exception classification and the size of the memory operand. + */ +- assert(e->vex_class == 1 || e->vex_class == 2); ++ assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { +- e->vex_class = 3; ++ e->vex_class = e->vex_class < 4 ? 3 : 5; + if (s->vex_l) { + goto illegal; + } +-- +2.39.1 + diff --git a/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch b/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch new file mode 100644 index 0000000..14388fe --- /dev/null +++ b/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch @@ -0,0 +1,299 @@ +From 120db3dfeb88c447f0e115c19b7ede704f8f80cb Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 14 Jan 2023 13:05:41 -1000 +Subject: [PATCH 2/8] tests/tcg/i386: Introduce and use reg_t consistently +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions +RH-Bugzilla: 2173590 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Bandan Das +RH-Commit: [2/7] 843a677555414170392db21c828bef3dc3c29300 (bonzini/rhel-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 +Upstream-Status: merged + +Define reg_t based on the actual register width. +Define the inlines using that type. This will allow +input registers to 32-bit insns to be set to 64-bit +values on x86-64, which allows testing various edge cases. + +Signed-off-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20230114230542.3116013-2-richard.henderson@linaro.org> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5d62d6649cd367b5b4a3676e7514d2f9ca86cb03) +--- + tests/tcg/i386/test-i386-bmi2.c | 182 ++++++++++++++++---------------- + 1 file changed, 93 insertions(+), 89 deletions(-) + +diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c +index 5fadf47510..3c3ef85513 100644 +--- a/tests/tcg/i386/test-i386-bmi2.c ++++ b/tests/tcg/i386/test-i386-bmi2.c +@@ -3,34 +3,40 @@ + #include + #include + ++#ifdef __x86_64 ++typedef uint64_t reg_t; ++#else ++typedef uint32_t reg_t; ++#endif ++ + #define insn1q(name, arg0) \ +-static inline uint64_t name##q(uint64_t arg0) \ ++static inline reg_t name##q(reg_t arg0) \ + { \ +- uint64_t result64; \ ++ reg_t result64; \ + asm volatile (#name "q %1, %0" : "=r"(result64) : "rm"(arg0)); \ + return result64; \ + } + + #define insn1l(name, arg0) \ +-static inline uint32_t name##l(uint32_t arg0) \ ++static inline reg_t name##l(reg_t arg0) \ + { \ +- uint32_t result32; \ ++ reg_t result32; \ + asm volatile (#name "l %k1, %k0" : "=r"(result32) : "rm"(arg0)); \ + return result32; \ + } + + #define insn2q(name, arg0, c0, arg1, c1) \ +-static inline uint64_t name##q(uint64_t arg0, uint64_t arg1) \ ++static inline reg_t name##q(reg_t arg0, reg_t arg1) \ + { \ +- uint64_t result64; \ ++ reg_t result64; \ + asm volatile (#name "q %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1)); \ + return result64; \ + } + + #define insn2l(name, arg0, c0, arg1, c1) \ +-static inline uint32_t name##l(uint32_t arg0, uint32_t arg1) \ ++static inline reg_t name##l(reg_t arg0, reg_t arg1) \ + { \ +- uint32_t result32; \ ++ reg_t result32; \ + asm volatile (#name "l %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1)); \ + return result32; \ + } +@@ -65,130 +71,128 @@ insn1l(blsr, src) + int main(int argc, char *argv[]) { + uint64_t ehlo = 0x202020204f4c4845ull; + uint64_t mask = 0xa080800302020001ull; +- uint32_t result32; ++ reg_t result; + + #ifdef __x86_64 +- uint64_t result64; +- + /* 64 bits */ +- result64 = andnq(mask, ehlo); +- assert(result64 == 0x002020204d4c4844); ++ result = andnq(mask, ehlo); ++ assert(result == 0x002020204d4c4844); + +- result64 = pextq(ehlo, mask); +- assert(result64 == 133); ++ result = pextq(ehlo, mask); ++ assert(result == 133); + +- result64 = pdepq(result64, mask); +- assert(result64 == (ehlo & mask)); ++ result = pdepq(result, mask); ++ assert(result == (ehlo & mask)); + +- result64 = pextq(-1ull, mask); +- assert(result64 == 511); /* mask has 9 bits set */ ++ result = pextq(-1ull, mask); ++ assert(result == 511); /* mask has 9 bits set */ + +- result64 = pdepq(-1ull, mask); +- assert(result64 == mask); ++ result = pdepq(-1ull, mask); ++ assert(result == mask); + +- result64 = bextrq(mask, 0x3f00); +- assert(result64 == (mask & ~INT64_MIN)); ++ result = bextrq(mask, 0x3f00); ++ assert(result == (mask & ~INT64_MIN)); + +- result64 = bextrq(mask, 0x1038); +- assert(result64 == 0xa0); ++ result = bextrq(mask, 0x1038); ++ assert(result == 0xa0); + +- result64 = bextrq(mask, 0x10f8); +- assert(result64 == 0); ++ result = bextrq(mask, 0x10f8); ++ assert(result == 0); + +- result64 = blsiq(0x30); +- assert(result64 == 0x10); ++ result = blsiq(0x30); ++ assert(result == 0x10); + +- result64 = blsiq(0x30ull << 32); +- assert(result64 == 0x10ull << 32); ++ result = blsiq(0x30ull << 32); ++ assert(result == 0x10ull << 32); + +- result64 = blsmskq(0x30); +- assert(result64 == 0x1f); ++ result = blsmskq(0x30); ++ assert(result == 0x1f); + +- result64 = blsrq(0x30); +- assert(result64 == 0x20); ++ result = blsrq(0x30); ++ assert(result == 0x20); + +- result64 = blsrq(0x30ull << 32); +- assert(result64 == 0x20ull << 32); ++ result = blsrq(0x30ull << 32); ++ assert(result == 0x20ull << 32); + +- result64 = bzhiq(mask, 0x3f); +- assert(result64 == (mask & ~INT64_MIN)); ++ result = bzhiq(mask, 0x3f); ++ assert(result == (mask & ~INT64_MIN)); + +- result64 = bzhiq(mask, 0x1f); +- assert(result64 == (mask & ~(-1 << 30))); ++ result = bzhiq(mask, 0x1f); ++ assert(result == (mask & ~(-1 << 30))); + +- result64 = rorxq(0x2132435465768798, 8); +- assert(result64 == 0x9821324354657687); ++ result = rorxq(0x2132435465768798, 8); ++ assert(result == 0x9821324354657687); + +- result64 = sarxq(0xffeeddccbbaa9988, 8); +- assert(result64 == 0xffffeeddccbbaa99); ++ result = sarxq(0xffeeddccbbaa9988, 8); ++ assert(result == 0xffffeeddccbbaa99); + +- result64 = sarxq(0x77eeddccbbaa9988, 8 | 64); +- assert(result64 == 0x0077eeddccbbaa99); ++ result = sarxq(0x77eeddccbbaa9988, 8 | 64); ++ assert(result == 0x0077eeddccbbaa99); + +- result64 = shrxq(0xffeeddccbbaa9988, 8); +- assert(result64 == 0x00ffeeddccbbaa99); ++ result = shrxq(0xffeeddccbbaa9988, 8); ++ assert(result == 0x00ffeeddccbbaa99); + +- result64 = shrxq(0x77eeddccbbaa9988, 8 | 192); +- assert(result64 == 0x0077eeddccbbaa99); ++ result = shrxq(0x77eeddccbbaa9988, 8 | 192); ++ assert(result == 0x0077eeddccbbaa99); + +- result64 = shlxq(0xffeeddccbbaa9988, 8); +- assert(result64 == 0xeeddccbbaa998800); ++ result = shlxq(0xffeeddccbbaa9988, 8); ++ assert(result == 0xeeddccbbaa998800); + #endif + + /* 32 bits */ +- result32 = andnl(mask, ehlo); +- assert(result32 == 0x04d4c4844); ++ result = andnl(mask, ehlo); ++ assert(result == 0x04d4c4844); + +- result32 = pextl((uint32_t) ehlo, mask); +- assert(result32 == 5); ++ result = pextl((uint32_t) ehlo, mask); ++ assert(result == 5); + +- result32 = pdepl(result32, mask); +- assert(result32 == (uint32_t)(ehlo & mask)); ++ result = pdepl(result, mask); ++ assert(result == (uint32_t)(ehlo & mask)); + +- result32 = pextl(-1u, mask); +- assert(result32 == 7); /* mask has 3 bits set */ ++ result = pextl(-1u, mask); ++ assert(result == 7); /* mask has 3 bits set */ + +- result32 = pdepl(-1u, mask); +- assert(result32 == (uint32_t)mask); ++ result = pdepl(-1u, mask); ++ assert(result == (uint32_t)mask); + +- result32 = bextrl(mask, 0x1f00); +- assert(result32 == (mask & ~INT32_MIN)); ++ result = bextrl(mask, 0x1f00); ++ assert(result == (mask & ~INT32_MIN)); + +- result32 = bextrl(ehlo, 0x1018); +- assert(result32 == 0x4f); ++ result = bextrl(ehlo, 0x1018); ++ assert(result == 0x4f); + +- result32 = bextrl(mask, 0x1038); +- assert(result32 == 0); ++ result = bextrl(mask, 0x1038); ++ assert(result == 0); + +- result32 = blsil(0xffff); +- assert(result32 == 1); ++ result = blsil(0xffff); ++ assert(result == 1); + +- result32 = blsmskl(0x300); +- assert(result32 == 0x1ff); ++ result = blsmskl(0x300); ++ assert(result == 0x1ff); + +- result32 = blsrl(0xffc); +- assert(result32 == 0xff8); ++ result = blsrl(0xffc); ++ assert(result == 0xff8); + +- result32 = bzhil(mask, 0xf); +- assert(result32 == 1); ++ result = bzhil(mask, 0xf); ++ assert(result == 1); + +- result32 = rorxl(0x65768798, 8); +- assert(result32 == 0x98657687); ++ result = rorxl(0x65768798, 8); ++ assert(result == 0x98657687); + +- result32 = sarxl(0xffeeddcc, 8); +- assert(result32 == 0xffffeedd); ++ result = sarxl(0xffeeddcc, 8); ++ assert(result == 0xffffeedd); + +- result32 = sarxl(0x77eeddcc, 8 | 32); +- assert(result32 == 0x0077eedd); ++ result = sarxl(0x77eeddcc, 8 | 32); ++ assert(result == 0x0077eedd); + +- result32 = shrxl(0xffeeddcc, 8); +- assert(result32 == 0x00ffeedd); ++ result = shrxl(0xffeeddcc, 8); ++ assert(result == 0x00ffeedd); + +- result32 = shrxl(0x77eeddcc, 8 | 128); +- assert(result32 == 0x0077eedd); ++ result = shrxl(0x77eeddcc, 8 | 128); ++ assert(result == 0x0077eedd); + +- result32 = shlxl(0xffeeddcc, 8); +- assert(result32 == 0xeeddcc00); ++ result = shlxl(0xffeeddcc, 8); ++ assert(result == 0xeeddcc00); + + return 0; + } +-- +2.39.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 5061083..f230c8c 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 12%{?rcrel}%{?dist}%{?cc_suffix} +Release: 13%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -400,6 +400,22 @@ Patch125: kvm-physmem-add-missing-memory-barrier.patch Patch126: kvm-async-update-documentation-of-the-memory-barriers.patch # For bz#2175660 - Guest hangs when starting or rebooting Patch127: kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch128: kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch129: kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch130: kvm-target-i386-Fix-BEXTR-instruction.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch131: kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch132: kvm-target-i386-fix-ADOX-followed-by-ADCX.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch133: kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch +# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) +Patch134: kvm-target-i386-Fix-BZHI-instruction.patch +# For bz#2156876 - [virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22) +Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch %if %{have_clang} BuildRequires: clang @@ -1430,6 +1446,20 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-13 +- kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch [bz#2173590] +- kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch [bz#2173590] +- kvm-target-i386-Fix-BEXTR-instruction.patch [bz#2173590] +- kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch [bz#2173590] +- kvm-target-i386-fix-ADOX-followed-by-ADCX.patch [bz#2173590] +- kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch [bz#2173590] +- kvm-target-i386-Fix-BZHI-instruction.patch [bz#2173590] +- kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch [bz#2156876] +- Resolves: bz#2173590 + (bugs in emulation of BMI instructions (for libguestfs without KVM)) +- Resolves: bz#2156876 + ([virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22)) + * Sun Mar 12 2023 Miroslav Rezanina - 7.2.0-12 - kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch [bz#2155748] - kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch [bz#2155748] From 42801435ce226f4cdf95178d30f2728fbdb5f3c1 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 20 Mar 2023 03:43:20 -0400 Subject: [PATCH 188/195] * Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-14 - Rebuild for 9.2 release - Resolves: bz#2173590 (bugs in emulation of BMI instructions (for libguestfs without KVM)) - Resolves: bz#2156876 ([virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22)) --- qemu-kvm.spec | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/qemu-kvm.spec b/qemu-kvm.spec index f230c8c..7b64c6d 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 13%{?rcrel}%{?dist}%{?cc_suffix} +Release: 14%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -1446,6 +1446,13 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-14 +- Rebuild for 9.2 release +- Resolves: bz#2173590 + (bugs in emulation of BMI instructions (for libguestfs without KVM)) +- Resolves: bz#2156876 + ([virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22)) + * Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-13 - kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch [bz#2173590] - kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch [bz#2173590] From 07d01bd47bfe4213a7ade4fde642af250eb16652 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Thu, 20 Apr 2023 02:33:14 -0400 Subject: [PATCH 189/195] * Thu Apr 20 2023 Miroslav Rezanina - 8.0.0-1 - Rebase to QEMU 8.0.0 - Resolves: bz#2180898 (Rebase to QEMU 8.0.0 for RHEL 9.3.0) --- .gitignore | 1 + 0004-Initial-redhat-build.patch | 40 +- 0005-Enable-disable-devices-for-RHEL.patch | 85 +- ...Machine-type-related-general-changes.patch | 78 +- 0007-Add-aarch64-machine-types.patch | 177 ++-- 0008-Add-ppc64-machine-types.patch | 52 +- 0009-Add-s390x-machine-types.patch | 68 +- 0010-Add-x86_64-machine-types.patch | 141 +-- 0011-Enable-make-check.patch | 136 ++- ...mber-of-devices-that-can-be-assigned.patch | 10 +- ...Add-support-statement-to-help-output.patch | 6 +- ...documentation-instead-of-qemu-system.patch | 8 +- ...e-at-least-64kiB-pages-for-downstrea.patch | 60 -- ...on-warning-when-opening-v2-images-rw.patch | 10 +- ...-add-usb-support-to-guest-get-fsinfo.patch | 16 +- 0017-Add-RHEL-9.2.0-compat-structure.patch | 110 +++ ...ompat-bits-for-RHEL-9.1-machine-type.patch | 26 - ...c-Update-x86-machine-type-compatibil.patch | 76 ++ 0019-Disable-unwanted-new-devices.patch | 83 ++ ...90x-machine-type-compatibility-for-Q.patch | 47 - ...ch64-add-rhel9.2.0-virt-machine-type.patch | 43 - ...dd-new-rhel-9.2.0-s390x-machine-type.patch | 62 -- 0022-x86-rhel-9.2.0-machine-type.patch | 75 -- kvm-KVM-keep-track-of-running-ioctls.patch | 82 -- ...r-Introduce-nested-event-loop-in-vho.patch | 140 --- ...r-Monitor-slave-channel-in-vhost_use.patch | 143 --- ...el-introduce-accelerator-blocker-API.patch | 348 ------- ...PUJumpCache-in-tb_jmp_cache_clear_pa.patch | 58 -- ...aio-wait-switch-to-smp_mb__after_rmw.patch | 50 - ...sage-of-barriers-in-the-polling-case.patch | 66 -- ...documentation-of-the-memory-barriers.patch | 111 --- ...block-Call-drain-callbacks-only-once.patch | 250 ----- ...-t-poll-in-bdrv_replace_child_noperm.patch | 298 ------ ...subtree-drains-in-bdrv_drop_intermed.patch | 54 -- ...Drain-individual-nodes-during-reopen.patch | 157 --- ...f-coroutine-in-bdrv_do_drained_begin.patch | 96 -- ...-locking-for-bdrv_reopen_queue_child.patch | 67 -- ...rove-empty-format-specific-info-dump.patch | 132 --- kvm-block-Inline-bdrv_drain_invoke.patch | 81 -- kvm-block-Remove-drained_end_counter.patch | 433 --------- ...ore_bds_parents-parameter-from-drain.patch | 274 ------ ...l-parameter-from-bdrv_parent_drained.patch | 106 --- kvm-block-Remove-subtree-drains.patch | 896 ------------------ ...rv_drained_begin-end-to-non-coroutin.patch | 302 ------ ...Split-BlockNodeInfo-off-of-ImageInfo.patch | 246 ----- ...drop-bdrv_remove_filter_or_cow_child.patch | 70 -- ...ck-file-Add-file-specific-image-info.patch | 145 --- ...d-indentation-to-bdrv_node_info_dump.patch | 206 ---- kvm-block-qapi-Introduce-BlockGraphInfo.patch | 155 --- ...pi-Let-bdrv_query_image_info-recurse.patch | 197 ---- ...y-hold-the-new-AioContext-of-bs_top-.patch | 99 -- kvm-block-vmdk-Change-extent-info-type.patch | 140 --- ...ent-dma_blk_cb-vs-dma_aio_cancel-rac.patch | 127 --- kvm-edu-add-smp_mb__after_rmw.patch | 61 -- ...rm-virt-Add-compact-highmem-property.patch | 169 ---- ...properties-to-disable-high-memory-re.patch | 179 ---- ...le-compat-high-memory-region-address.patch | 51 - ...ove-high-memory-region-address-assig.patch | 112 --- ...oduce-variable-region_base-in-virt_s.patch | 82 -- ...oduce-virt_get_high_memmap_enabled-h.patch | 95 -- ...ntroduce-virt_set_high_memmap-helper.patch | 130 --- ...me-variable-size-to-region_size-in-v.patch | 83 -- ...fix-field-corruption-in-type-4-table.patch | 59 -- ...-fail-DEVIOTLB_UNMAP-without-dt-mode.patch | 64 -- ...port-for-MSG_PEEK-for-socket-channel.patch | 386 -------- ...-106-214-308-Read-only-one-size-line.patch | 99 -- ...otests-Filter-child-node-information.patch | 171 ---- kvm-kvm-Atomic-memslot-updates.patch | 286 ------ kvm-linux-headers-Update-to-v6.1.patch | 577 ----------- ...magic-value-for-deciding-the-mapping.patch | 330 ------- ...-new-option-to-automatically-reconne.patch | 325 ------- kvm-physmem-add-missing-memory-barrier.patch | 55 -- ...qatomic-add-smp_mb__before-after_rmw.patch | 177 ---- ...tical-corruption-in-store_bitmap-err.patch | 67 -- ...n-t-yield-in-bdrv_qed_co_drain_begin.patch | 84 -- ...coroutine-lock-add-smp_mb__after_rmw.patch | 75 -- ...ge-info-key-names-for-protocol-nodes.patch | 197 ---- kvm-qemu-img-Let-info-print-block-graph.patch | 261 ----- kvm-qemu-img-Use-BlockNodeInfo.patch | 241 ----- ...Report-errors-while-closing-the-imag.patch | 70 -- ...Report-errors-while-closing-the-imag.patch | 67 -- ...t-qemu-img-bitmap-commit-exit-code-o.patch | 166 ---- ...posix-cleanup-fix-document-QemuEvent.patch | 146 --- ...win32-cleanup-fix-document-QemuEvent.patch | 162 ---- ...dhat-fix-virt-rhel9.2.0-compat-props.patch | 43 - kvm-s390x-pci-coalesce-unmap-operations.patch | 125 --- ...ISM-passthrough-devices-on-shutdown-.patch | 147 --- ...-DMA-aperture-to-be-bound-by-vfio-DM.patch | 91 -- ...390x-pv-Implement-a-CGS-check-helper.patch | 109 --- ...o-ccw-Activate-zPCI-features-on-s390.patch | 70 -- ...otect-req-aiocb-with-AioContext-lock.patch | 176 ---- ...ubtree-drain-with-a-single-node-drai.patch | 159 ---- ...-32-bit-AD-CO-X-insns-in-64-bit-mode.patch | 144 --- kvm-target-i386-Fix-BEXTR-instruction.patch | 110 --- kvm-target-i386-Fix-BZHI-instruction.patch | 77 -- ...i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch | 60 -- ...arget-i386-fix-ADOX-followed-by-ADCX.patch | 205 ---- ...operand-size-of-unary-SSE-operations.patch | 77 -- ...h_dump-Fix-memory-corruption-in-s390.patch | 50 - ...Don-t-yield-in-.bdrv_co_drained_begi.patch | 153 --- ...etdev-test-stream-and-dgram-backends.patch | 505 ---------- ...Introduce-and-use-reg_t-consistently.patch | 299 ------ kvm-util-userfaultfd-Add-uffd_open.patch | 169 ---- ...-userfaultfd-Support-dev-userfaultfd.patch | 94 -- ...arameter-to-vhost_vdpa_dma_map-unmap.patch | 221 ----- kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch | 94 -- ...dd-vhost_vdpa_net_valid_svq_features.patch | 76 -- ...a-allocate-SVQ-array-unconditionally.patch | 50 - ...ys-start-CVQ-in-SVQ-mode-if-possible.patch | 193 ---- ...le-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch | 44 - ...HOST_BACKEND_F_IOTLB_ASID-flag-check.patch | 48 - ...IO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch | 59 -- ...move-SVQ-vring-features-check-to-net.patch | 118 --- kvm-vdpa-request-iova_range-only-once.patch | 145 --- ...re-x-svq-parameter-in-VhostVDPAState.patch | 62 -- ...w_vqs_enabled-in-vhost_vdpa_svqs_sta.patch | 58 -- ...-add-support-for-configure-interrupt.patch | 185 ---- ...VQ-device-file-descriptors-at-device.patch | 171 ---- ...ty-bitmap-syncing-when-vIOMMU-is-ena.patch | 157 --- ...e-new-VhostOps-vhost_set_config_call.patch | 56 -- ...ove-iova_tree-set-to-vhost_svq_start.patch | 122 --- ...SVQ-device-call-handler-at-SVQ-start.patch | 73 -- ...dpa-add-support-for-config-interrupt.patch | 73 -- ...-add-support-for-configure-interrupt.patch | 115 --- ...ntroduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch | 262 ----- ...-add-support-for-configure-interrupt.patch | 80 -- ...-add-support-for-configure-interrupt.patch | 115 --- ...-add-support-for-configure-interrupt.patch | 274 ------ ...uple-notifier-from-interrupt-process.patch | 272 ------ ...ple-the-single-vector-from-the-inter.patch | 212 ----- ...pci-fix-migration-compat-for-vectors.patch | 53 -- ...ix-transitional-migration-compat-for.patch | 47 - ...t-SCSI-devices-from-main-loop-thread.patch | 325 ------- ...y-virtio_net_get_config-to-early-ret.patch | 74 -- ...VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch | 46 - qemu-kvm.spec | 264 +----- sources | 2 +- 137 files changed, 783 insertions(+), 18192 deletions(-) delete mode 100644 0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch rename 0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch => 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch (92%) rename kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch => 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch (81%) create mode 100644 0017-Add-RHEL-9.2.0-compat-structure.patch delete mode 100644 0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch create mode 100644 0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch create mode 100644 0019-Disable-unwanted-new-devices.patch delete mode 100644 0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch delete mode 100644 0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch delete mode 100644 0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch delete mode 100644 0022-x86-rhel-9.2.0-machine-type.patch delete mode 100644 kvm-KVM-keep-track-of-running-ioctls.patch delete mode 100644 kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch delete mode 100644 kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch delete mode 100644 kvm-accel-introduce-accelerator-blocker-API.patch delete mode 100644 kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch delete mode 100644 kvm-aio-wait-switch-to-smp_mb__after_rmw.patch delete mode 100644 kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch delete mode 100644 kvm-async-update-documentation-of-the-memory-barriers.patch delete mode 100644 kvm-block-Call-drain-callbacks-only-once.patch delete mode 100644 kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch delete mode 100644 kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch delete mode 100644 kvm-block-Drain-individual-nodes-during-reopen.patch delete mode 100644 kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch delete mode 100644 kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch delete mode 100644 kvm-block-Improve-empty-format-specific-info-dump.patch delete mode 100644 kvm-block-Inline-bdrv_drain_invoke.patch delete mode 100644 kvm-block-Remove-drained_end_counter.patch delete mode 100644 kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch delete mode 100644 kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch delete mode 100644 kvm-block-Remove-subtree-drains.patch delete mode 100644 kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch delete mode 100644 kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch delete mode 100644 kvm-block-drop-bdrv_remove_filter_or_cow_child.patch delete mode 100644 kvm-block-file-Add-file-specific-image-info.patch delete mode 100644 kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch delete mode 100644 kvm-block-qapi-Introduce-BlockGraphInfo.patch delete mode 100644 kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch delete mode 100644 kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch delete mode 100644 kvm-block-vmdk-Change-extent-info-type.patch delete mode 100644 kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch delete mode 100644 kvm-edu-add-smp_mb__after_rmw.patch delete mode 100644 kvm-hw-arm-virt-Add-compact-highmem-property.patch delete mode 100644 kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch delete mode 100644 kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch delete mode 100644 kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch delete mode 100644 kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch delete mode 100644 kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch delete mode 100644 kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch delete mode 100644 kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch delete mode 100644 kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch delete mode 100644 kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch delete mode 100644 kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch delete mode 100644 kvm-iotests-106-214-308-Read-only-one-size-line.patch delete mode 100644 kvm-iotests-Filter-child-node-information.patch delete mode 100644 kvm-kvm-Atomic-memslot-updates.patch delete mode 100644 kvm-linux-headers-Update-to-v6.1.patch delete mode 100644 kvm-migration-check-magic-value-for-deciding-the-mapping.patch delete mode 100644 kvm-net-stream-add-a-new-option-to-automatically-reconne.patch delete mode 100644 kvm-physmem-add-missing-memory-barrier.patch delete mode 100644 kvm-qatomic-add-smp_mb__before-after_rmw.patch delete mode 100644 kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch delete mode 100644 kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch delete mode 100644 kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch delete mode 100644 kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch delete mode 100644 kvm-qemu-img-Let-info-print-block-graph.patch delete mode 100644 kvm-qemu-img-Use-BlockNodeInfo.patch delete mode 100644 kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch delete mode 100644 kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch delete mode 100644 kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch delete mode 100644 kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch delete mode 100644 kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch delete mode 100644 kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch delete mode 100644 kvm-s390x-pci-coalesce-unmap-operations.patch delete mode 100644 kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch delete mode 100644 kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch delete mode 100644 kvm-s390x-pv-Implement-a-CGS-check-helper.patch delete mode 100644 kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch delete mode 100644 kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch delete mode 100644 kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch delete mode 100644 kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch delete mode 100644 kvm-target-i386-Fix-BEXTR-instruction.patch delete mode 100644 kvm-target-i386-Fix-BZHI-instruction.patch delete mode 100644 kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch delete mode 100644 kvm-target-i386-fix-ADOX-followed-by-ADCX.patch delete mode 100644 kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch delete mode 100644 kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch delete mode 100644 kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch delete mode 100644 kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch delete mode 100644 kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch delete mode 100644 kvm-util-userfaultfd-Add-uffd_open.patch delete mode 100644 kvm-util-userfaultfd-Support-dev-userfaultfd.patch delete mode 100644 kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch delete mode 100644 kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch delete mode 100644 kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch delete mode 100644 kvm-vdpa-allocate-SVQ-array-unconditionally.patch delete mode 100644 kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch delete mode 100644 kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch delete mode 100644 kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch delete mode 100644 kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch delete mode 100644 kvm-vdpa-move-SVQ-vring-features-check-to-net.patch delete mode 100644 kvm-vdpa-request-iova_range-only-once.patch delete mode 100644 kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch delete mode 100644 kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch delete mode 100644 kvm-vhost-add-support-for-configure-interrupt.patch delete mode 100644 kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch delete mode 100644 kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch delete mode 100644 kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch delete mode 100644 kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch delete mode 100644 kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch delete mode 100644 kvm-vhost-vdpa-add-support-for-config-interrupt.patch delete mode 100644 kvm-virtio-add-support-for-configure-interrupt.patch delete mode 100644 kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch delete mode 100644 kvm-virtio-mmio-add-support-for-configure-interrupt.patch delete mode 100644 kvm-virtio-net-add-support-for-configure-interrupt.patch delete mode 100644 kvm-virtio-pci-add-support-for-configure-interrupt.patch delete mode 100644 kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch delete mode 100644 kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch delete mode 100644 kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch delete mode 100644 kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch delete mode 100644 kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch delete mode 100644 kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch delete mode 100644 kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch diff --git a/.gitignore b/.gitignore index 8f95454..6df5e29 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ /capstone.tar.gz /qemu-7.1.0.tar.xz /qemu-7.2.0.tar.xz +/qemu-8.0.0.tar.xz diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch index 0f9cc55..612633e 100644 --- a/0004-Initial-redhat-build.patch +++ b/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From ccc4a5bdc8c2f27678312364a7c12aeafd009bb6 Mon Sep 17 00:00:00 2001 +From 84039bfc860878f3c3421de4a1836ac5d6300ed7 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-7.1.0-7.el9 +This rebase is based on qemu-kvm-7.2.0-14.el9 Signed-off-by: Miroslav Rezanina -- @@ -66,6 +66,16 @@ Rebase changes (7.2.0): - Fix SRPM name generation to work on Fedora 37 - Switch back to system meson +Rebase changes (8.0.0-rc1): +- use enable-dtrace-backands instead of enable-dtrace-backend +- Removed qemu virtiofsd bits + +Rebase changes (8.0.0-rc2): +- test/check-block.sh removed (upstream) + +Rebase changes (8.0.0-rc3): +- Add new --disable-* options for configure + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -162,16 +172,18 @@ Merged patches (7.2.0 rc4): - 8c6834feb6 Remove opengl display device subpackages (C9S MR 124) - 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124) -Signed-off-by: Miroslav Rezanina +Merged patches (8.0.0-rc1): +- 7754f6ba78 Minor packaging fixes +- 401af56187 spec: Disable VDUSE -fix +Signed-off-by: Miroslav Rezanina --- .distro/Makefile | 100 + .distro/Makefile.common | 41 + .distro/README.tests | 39 + .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4315 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 4528 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + .distro/scripts/process-patches.sh | 4 + @@ -180,9 +192,8 @@ fix scripts/qemu-guest-agent/fsfreeze-hook | 2 +- scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + - tests/check-block.sh | 2 + ui/vnc-auth-sasl.c | 2 +- - 16 files changed, 4573 insertions(+), 4 deletions(-) + 15 files changed, 4784 insertions(+), 4 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests @@ -271,19 +282,6 @@ index 0000000000..c04abf9449 +++ b/scripts/systemtap/script.d/qemu_kvm.stp @@ -0,0 +1 @@ +probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} -diff --git a/tests/check-block.sh b/tests/check-block.sh -index 5de2c1ba0b..6af743f441 100755 ---- a/tests/check-block.sh -+++ b/tests/check-block.sh -@@ -22,6 +22,8 @@ if [ -z "$(find . -name 'qemu-system-*' -print)" ]; then - skip "No qemu-system binary available ==> Not running the qemu-iotests." - fi - -+exit 0 -+ - cd tests/qemu-iotests - - # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c index 47fdae5b21..2a950caa2a 100644 --- a/ui/vnc-auth-sasl.c @@ -298,5 +296,5 @@ index 47fdae5b21..2a950caa2a 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.31.1 +2.39.1 diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch index 767389f..14dd3f9 100644 --- a/0005-Enable-disable-devices-for-RHEL.patch +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 90366cd2ead5a5301aaceed56477d2e6d9f1b3cd Mon Sep 17 00:00:00 2001 +From 63829772dbc2075fc014a9d52e3968735d228018 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL @@ -32,6 +32,11 @@ Rebase notes (7.1.0 rc3): Rebase notes (7.2.0 rc20): - Removed disabling a15mpcore.c as no longer needed +Rebase notes (8.0.0-rc1): +- Rename CONFIG_ACPI_X86_ICH to CONFIG_ACPI_ICH9 +- Inlude qemu/error-report.h in hw/display/cirrus_vga.c +- Change virtiofsd dependency version + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -63,7 +68,7 @@ Merged patches (7.1.0 rc0): hw/arm/meson.build | 2 +- hw/block/fdc.c | 10 ++ hw/cpu/meson.build | 3 +- - hw/display/cirrus_vga.c | 5 +- + hw/display/cirrus_vga.c | 7 +- hw/ide/piix.c | 5 +- hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + @@ -73,7 +78,7 @@ Merged patches (7.1.0 rc0): target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 19 files changed, 283 insertions(+), 13 deletions(-) + 19 files changed, 285 insertions(+), 13 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -212,7 +217,7 @@ index 0000000000..69a799adbd +CONFIG_VHOST_USER_FS=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..10cb0a14e0 +index 0000000000..668b2d0e18 --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak @@ -0,0 +1,109 @@ @@ -226,7 +231,7 @@ index 0000000000..10cb0a14e0 +CONFIG_ACPI_SMBUS=y +CONFIG_ACPI_VMGENID=y +CONFIG_ACPI_X86=y -+CONFIG_ACPI_X86_ICH=y ++CONFIG_ACPI_ICH9=y +CONFIG_AHCI=y +CONFIG_APIC=y +CONFIG_APM=y @@ -326,10 +331,10 @@ index 0000000000..10cb0a14e0 +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index 92f9f6e000..c5e94c997c 100644 +index b545ba0e4f..a41a16cba7 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build -@@ -30,7 +30,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) +@@ -29,7 +29,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) @@ -339,7 +344,7 @@ index 92f9f6e000..c5e94c997c 100644 arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index 64ae4a6899..9b8e782c19 100644 +index d7cc4d3ec1..12d0a60905 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -49,6 +49,8 @@ @@ -367,7 +372,7 @@ index 64ae4a6899..9b8e782c19 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index 9e52fee9e7..87c209a754 100644 +index e37490074f..4431e3731c 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build @@ -1,4 +1,5 @@ @@ -375,13 +380,29 @@ index 9e52fee9e7..87c209a754 100644 +#softmmu_ss.add(files('core.c', 'cluster.c')) +softmmu_ss.add(files('core.c')) - specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) - specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + softmmu_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index 6e8c747c46..1948ebee8e 100644 +index b80f98b6c4..cbde6a8f15 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c -@@ -2946,7 +2946,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -36,6 +36,7 @@ + #include "qemu/module.h" + #include "qemu/units.h" + #include "qemu/log.h" ++#include "qemu/error-report.h" + #include "sysemu/reset.h" + #include "qapi/error.h" + #include "trace.h" +@@ -47,6 +48,7 @@ + #include "qom/object.h" + #include "ui/console.h" + ++ + /* + * TODO: + * - destination write mask support not complete (bits 5..7) +@@ -2946,7 +2948,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; @@ -394,10 +415,10 @@ index 6e8c747c46..1948ebee8e 100644 * Also accept 8 MB/16 MB for backward compatibility. */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 267dbf37db..87fcda4062 100644 +index 41d60921e3..a4af45b4e8 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -199,7 +199,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -193,7 +193,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -407,7 +428,7 @@ index 267dbf37db..87fcda4062 100644 } static const TypeInfo piix3_ide_info = { -@@ -222,6 +223,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -216,6 +217,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -430,10 +451,10 @@ index b92b63bedc..3b6235dde6 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index e26e0a64c1..41492fae79 100644 +index 23d660619f..b75c9aa799 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1824,6 +1824,7 @@ static const E1000Info e1000_devices[] = { +@@ -1805,6 +1805,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -441,7 +462,7 @@ index e26e0a64c1..41492fae79 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1836,6 +1837,7 @@ static const E1000Info e1000_devices[] = { +@@ -1817,6 +1818,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -467,7 +488,7 @@ index 8a4861f45a..fcb5dfe792 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index 793df42e21..cd3c305471 100644 +index 599dc24f0d..905a994c3a 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade @@ -480,10 +501,10 @@ index 793df42e21..cd3c305471 100644 endif diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index 9a2cef7d05..a528ff9a3d 100644 +index df0c45e523..c154a4dcf2 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -151,6 +151,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +@@ -155,6 +155,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) @@ -491,7 +512,7 @@ index 9a2cef7d05..a528ff9a3d 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -504,6 +505,7 @@ static void cortex_a9_initfn(Object *obj) +@@ -508,6 +509,7 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } @@ -499,7 +520,7 @@ index 9a2cef7d05..a528ff9a3d 100644 #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -528,6 +530,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { +@@ -532,6 +534,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; @@ -507,7 +528,7 @@ index 9a2cef7d05..a528ff9a3d 100644 static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -576,6 +579,7 @@ static void cortex_a7_initfn(Object *obj) +@@ -580,6 +583,7 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } @@ -515,7 +536,7 @@ index 9a2cef7d05..a528ff9a3d 100644 static void cortex_a15_initfn(Object *obj) { -@@ -624,6 +628,7 @@ static void cortex_a15_initfn(Object *obj) +@@ -628,6 +632,7 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -523,7 +544,7 @@ index 9a2cef7d05..a528ff9a3d 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1065,6 +1070,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1110,6 +1115,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -531,7 +552,7 @@ index 9a2cef7d05..a528ff9a3d 100644 #ifndef TARGET_AARCH64 /* -@@ -1132,6 +1138,7 @@ static void arm_max_initfn(Object *obj) +@@ -1177,6 +1183,7 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -539,7 +560,7 @@ index 9a2cef7d05..a528ff9a3d 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1147,7 +1154,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1192,7 +1199,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -549,7 +570,7 @@ index 9a2cef7d05..a528ff9a3d 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1178,6 +1187,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1224,6 +1233,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -620,7 +641,7 @@ index 912b037c63..cd3ff700ac 100644 { NULL, NULL } }; diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index d8a141a023..d086b1c39c 100644 +index 63981bf36b..87a4480c05 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c @@ -35,6 +35,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, @@ -653,5 +674,5 @@ index 3ac7ec9acf..97da1a6424 100644 prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ -- -2.31.1 +2.39.1 diff --git a/0006-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch index fc2a89d..5dd591f 100644 --- a/0006-Machine-type-related-general-changes.patch +++ b/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From 0208f38671b9de4036c0d56142a7f22e5091bae0 Mon Sep 17 00:00:00 2001 +From c13f8e21b32aa06b08847e88080f2fdea5084a9b Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -46,28 +46,33 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- 21ed34787b Addd 7.2 compat bits for RHEL 9.1 machine type +- e5c8d5d603 virtio-rng-pci: fix migration compat for vectors +- 5a5fa77059 virtio-rng-pci: fix transitional migration compat for vectors --- hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 222 +++++++++++++++++++++++++++++++++++ + hw/core/machine.c | 229 +++++++++++++++++++++++++++++++++++ hw/display/vga-isa.c | 2 +- hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + hw/net/rtl8139.c | 4 +- - hw/smbios/smbios.c | 46 +++++++- + hw/smbios/smbios.c | 46 ++++++- hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-xhci-pci.c | 59 +++++++--- + hw/usb/hcd-xhci-pci.c | 59 ++++++--- hw/usb/hcd-xhci-pci.h | 1 + include/hw/boards.h | 31 +++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 14 files changed, 360 insertions(+), 23 deletions(-) + 14 files changed, 367 insertions(+), 23 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 0a81f1ad93..dbfb362a8f 100644 +index 63d2113b86..a24b9aac92 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -248,7 +248,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) +@@ -247,7 +247,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -77,25 +82,25 @@ index 0a81f1ad93..dbfb362a8f 100644 .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index b871350856..d633300fdc 100644 +index ac626b3bef..4a6e89c7bc 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1619,7 +1619,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1629,7 +1629,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, - true, SMBIOS_ENTRY_POINT_TYPE_64); + true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); - smbios_get_tables(MACHINE(vms), NULL, 0, - &smbios_tables, &smbios_tables_len, + /* build the array of physical mem area from base_memmap */ + mem_array.address = vms->memmap[VIRT_MEM].base; diff --git a/hw/core/machine.c b/hw/core/machine.c -index 8d34caa31d..9edec1ca05 100644 +index cd13b8b0a3..5aa567fad3 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -40,6 +40,228 @@ - #include "hw/virtio/virtio-pci.h" - #include "qom/object_interfaces.h" +@@ -46,6 +46,235 @@ GlobalProperty hw_compat_7_2[] = { + }; + const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); +/* + * RHEL only: machine types for previous major releases are deprecated @@ -111,6 +116,13 @@ index 8d34caa31d..9edec1ca05 100644 + { "arm-gicv3-common", "force-8-bit-prio", "on" }, + /* hw_compat_rhel_9_1 from hw_compat_7_0 */ + { "nvme-ns", "eui64-default", "on"}, ++ /* hw_compat_rhel_9_1 from hw_compat_7_1 */ ++ { "virtio-device", "queue_reset", "false" }, ++ /* hw_compat_rhel_9_1 bz 2155749 */ ++ { "virtio-rng-pci", "vectors", "0" }, ++ /* hw_compat_rhel_9_1 bz 2162569 */ ++ { "virtio-rng-pci-transitional", "vectors", "0" }, ++ { "virtio-rng-pci-non-transitional", "vectors", "0" }, +}; +const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); + @@ -321,7 +333,7 @@ index 8d34caa31d..9edec1ca05 100644 + GlobalProperty hw_compat_7_1[] = { { "virtio-device", "queue_reset", "false" }, - }; + { "virtio-rng-pci", "vectors", "0" }, diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c index 2a5437d803..0db2c2b2a1 100644 --- a/hw/display/vga-isa.c @@ -336,10 +348,10 @@ index 2a5437d803..0db2c2b2a1 100644 }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 0ad0ed1603..0985ff67d2 100644 +index 30eedd62a3..14a794081e 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -187,6 +187,8 @@ static void pc_init1(MachineState *machine, +@@ -201,6 +201,8 @@ static void pc_init1(MachineState *machine, smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -349,10 +361,10 @@ index 0ad0ed1603..0985ff67d2 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index a496bd6e74..ea582254e3 100644 +index 797ba347fd..dc0ba5f9e7 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -201,6 +201,8 @@ static void pc_q35_init(MachineState *machine) +@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine) smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, @@ -362,7 +374,7 @@ index a496bd6e74..ea582254e3 100644 } diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 700b1b66b6..13693aeb4f 100644 +index 5a5aaf868d..3d473d5869 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque) @@ -385,10 +397,10 @@ index 700b1b66b6..13693aeb4f 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index b4243de735..c5ad69237e 100644 +index d2007e70fb..319eae9e9d 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -57,6 +57,9 @@ static bool smbios_legacy = true; +@@ -58,6 +58,9 @@ static bool smbios_legacy = true; static bool smbios_uuid_encoded = true; /* end: legacy structures & constants for <= 2.0 machines */ @@ -398,7 +410,7 @@ index b4243de735..c5ad69237e 100644 uint8_t *smbios_tables; size_t smbios_tables_len; -@@ -669,7 +672,7 @@ static void smbios_build_type_1_table(void) +@@ -670,7 +673,7 @@ static void smbios_build_type_1_table(void) static void smbios_build_type_2_table(void) { @@ -407,7 +419,7 @@ index b4243de735..c5ad69237e 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -977,7 +980,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -980,7 +983,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -419,7 +431,7 @@ index b4243de735..c5ad69237e 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -998,11 +1004,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -1001,11 +1007,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -579,10 +591,10 @@ index 643d4643e4..529bad9366 100644 dc->vmsd = &vmstate_xhci_pci; set_bit(DEVICE_CATEGORY_USB, dc->categories); diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h -index c193f79443..086a1feb1e 100644 +index 08f70ce97c..1be7527c1b 100644 --- a/hw/usb/hcd-xhci-pci.h +++ b/hw/usb/hcd-xhci-pci.h -@@ -39,6 +39,7 @@ typedef struct XHCIPciState { +@@ -40,6 +40,7 @@ typedef struct XHCIPciState { XHCIState xhci; OnOffAuto msi; OnOffAuto msix; @@ -591,10 +603,10 @@ index c193f79443..086a1feb1e 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index 90f1dd3aeb..2209d4e416 100644 +index 6fbbfd56c8..c5a965d27f 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -454,4 +454,35 @@ extern const size_t hw_compat_2_2_len; +@@ -459,4 +459,35 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; @@ -647,10 +659,10 @@ index 7f3259a630..d24b3ccd32 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index c95333514e..3754eaa97d 100644 +index 8206d5405a..908a275736 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -112,6 +112,9 @@ struct PCMachineClass { +@@ -111,6 +111,9 @@ struct PCMachineClass { bool smbios_defaults; bool smbios_legacy_mode; bool smbios_uuid_encoded; @@ -661,5 +673,5 @@ index c95333514e..3754eaa97d 100644 /* RAM / address space compat: */ bool gigabyte_align; -- -2.31.1 +2.39.1 diff --git a/0007-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch index 06611e7..f47bbd0 100644 --- a/0007-Add-aarch64-machine-types.patch +++ b/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From 8501581c99760ed8a800d0c98eeb17a4bf450366 Mon Sep 17 00:00:00 2001 +From ec6468b65a3af0e2b84575c9f965f61916d0d8ea Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -26,6 +26,9 @@ Rebase notes (7.1.0 rc3): Rebase notes (7.2.0 rc0): - Disabled cortex-a35 +Rebase notes (8.0.0-rc1): +- Moved changed code from target/arm/helper.c to target/arm/arm-qmp-cmds.c + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -49,23 +52,27 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- c1a21266d8 redhat: aarch64: add rhel9.2.0 virt machine type +- d97cd7c513 redhat: fix virt-rhel9.2.0 compat props --- - hw/arm/virt.c | 237 ++++++++++++++++++++++++++++++++- + hw/arm/virt.c | 251 ++++++++++++++++++++++++++++++++- include/hw/arm/virt.h | 8 ++ + target/arm/arm-qmp-cmds.c | 2 + target/arm/cpu-qom.h | 1 + target/arm/cpu.c | 5 + target/arm/cpu.h | 2 + target/arm/cpu64.c | 16 ++- target/arm/cpu_tcg.c | 12 +- - target/arm/helper.c | 2 + tests/qtest/arm-cpu-features.c | 6 + - 9 files changed, 277 insertions(+), 12 deletions(-) + 9 files changed, 289 insertions(+), 14 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index d633300fdc..dfcab40a73 100644 +index 4a6e89c7bc..1ae1654be5 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -80,6 +80,7 @@ +@@ -81,6 +81,7 @@ #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -73,7 +80,7 @@ index d633300fdc..dfcab40a73 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -106,7 +107,48 @@ +@@ -107,7 +108,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) @@ -123,7 +130,7 @@ index d633300fdc..dfcab40a73 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -197,15 +239,19 @@ static const int a15irqmap[] = { +@@ -204,16 +246,20 @@ static const int a15irqmap[] = { }; static const char *valid_cpus[] = { @@ -132,6 +139,7 @@ index d633300fdc..dfcab40a73 100644 ARM_CPU_TYPE_NAME("cortex-a15"), ARM_CPU_TYPE_NAME("cortex-a35"), ARM_CPU_TYPE_NAME("cortex-a53"), + ARM_CPU_TYPE_NAME("cortex-a55"), +#endif /* disabled for RHEL */ ARM_CPU_TYPE_NAME("cortex-a57"), +#if 0 /* Disabled for Red Hat Enterprise Linux */ @@ -143,7 +151,7 @@ index d633300fdc..dfcab40a73 100644 ARM_CPU_TYPE_NAME("host"), ARM_CPU_TYPE_NAME("max"), }; -@@ -2290,6 +2336,7 @@ static void machvirt_init(MachineState *machine) +@@ -2339,6 +2385,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -151,7 +159,7 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2317,6 +2364,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2366,6 +2413,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -159,7 +167,25 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2346,6 +2394,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) +@@ -2380,7 +2428,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) + + vms->highmem = value; + } +- ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_compact_highmem(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2436,7 +2484,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) + + vms->highmem_mmio = value; + } +- ++#endif /* disabled for RHEL */ + + static bool virt_get_its(Object *obj, Error **errp) + { +@@ -2452,6 +2500,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -167,7 +193,7 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2359,6 +2408,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2465,6 +2514,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -175,7 +201,7 @@ index d633300fdc..dfcab40a73 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2442,6 +2492,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2548,6 +2598,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -183,7 +209,7 @@ index d633300fdc..dfcab40a73 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2455,6 +2506,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2561,6 +2612,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -191,7 +217,7 @@ index d633300fdc..dfcab40a73 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2886,6 +2938,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2988,6 +3040,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -199,7 +225,7 @@ index d633300fdc..dfcab40a73 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3294,3 +3347,185 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3441,3 +3494,195 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -312,6 +338,7 @@ index d633300fdc..dfcab40a73 100644 + + /* High memory is enabled by default */ + vms->highmem = true; ++ vms->highmem_compact = !vmc->no_highmem_compact; + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; @@ -374,22 +401,31 @@ index d633300fdc..dfcab40a73 100644 +} +type_init(rhel_machine_init); + ++static void rhel920_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++ +static void rhel900_virt_options(MachineClass *mc) +{ + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel920_virt_options(mc); ++ + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; ++ /* Compact layout for high memory regions was introduced with 9.2.0 */ ++ vmc->no_highmem_compact = true; +} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) ++DEFINE_RHEL_MACHINE(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 6ec479ca2b..22b54ec510 100644 +index e1ddbea96b..81c2363a40 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -180,9 +180,17 @@ struct VirtMachineState { +@@ -187,9 +187,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -407,8 +443,28 @@ index 6ec479ca2b..22b54ec510 100644 void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); +diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c +index c8fa524002..3aa089abf3 100644 +--- a/target/arm/arm-qmp-cmds.c ++++ b/target/arm/arm-qmp-cmds.c +@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h -index 64c44cef2d..82e97249bc 100644 +index 514c22ced9..f789173451 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { @@ -420,10 +476,10 @@ index 64c44cef2d..82e97249bc 100644 void arm_cpu_register(const ARMCPUInfo *info); diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 38d066c294..a845814bfb 100644 +index 5182ed0c91..6740a8b940 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c -@@ -2250,8 +2250,13 @@ static void arm_cpu_instance_init(Object *obj) +@@ -2290,8 +2290,13 @@ static void arm_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -438,7 +494,7 @@ index 38d066c294..a845814bfb 100644 void arm_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index 9aeed3c848..f9f504d89e 100644 +index c097cae988..829d4a2328 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -34,6 +34,8 @@ @@ -451,10 +507,10 @@ index 9aeed3c848..f9f504d89e 100644 #define EXCP_SWI 2 /* software interrupt */ #define EXCP_PREFETCH_ABORT 3 diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 3d74f134f5..4b330a52b5 100644 +index 0fb07cc7b6..47459627fb 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c -@@ -36,6 +36,7 @@ +@@ -31,6 +31,7 @@ #include "hw/qdev-properties.h" #include "internals.h" @@ -462,7 +518,7 @@ index 3d74f134f5..4b330a52b5 100644 static void aarch64_a35_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -115,6 +116,7 @@ static void aarch64_a35_initfn(Object *obj) +@@ -110,6 +111,7 @@ static void aarch64_a35_initfn(Object *obj) /* These values are the same with A53/A57/A72. */ define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -470,7 +526,7 @@ index 3d74f134f5..4b330a52b5 100644 void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) { -@@ -735,6 +737,7 @@ static void aarch64_a57_initfn(Object *obj) +@@ -730,6 +732,7 @@ static void aarch64_a57_initfn(Object *obj) define_cortex_a72_a57_a53_cp_reginfo(cpu); } @@ -478,15 +534,15 @@ index 3d74f134f5..4b330a52b5 100644 static void aarch64_a53_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1033,6 +1036,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) - /* From D5.1 AArch64 PMU register summary */ - cpu->isar.reset_pmcr_el0 = 0x410c3000; +@@ -1164,6 +1167,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) + + define_neoverse_n1_cp_reginfo(cpu); } +#endif /* disabled for RHEL */ static void aarch64_host_initfn(Object *obj) { -@@ -1240,13 +1244,18 @@ static void aarch64_max_initfn(Object *obj) +@@ -1373,14 +1377,19 @@ static void aarch64_max_initfn(Object *obj) } static const ARMCPUInfo aarch64_cpus[] = { @@ -498,6 +554,7 @@ index 3d74f134f5..4b330a52b5 100644 + .deprecation_note = RHEL_CPU_DEPRECATION }, +#if 0 /* Disabled for Red Hat Enterprise Linux */ { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, @@ -506,7 +563,7 @@ index 3d74f134f5..4b330a52b5 100644 { .name = "max", .initfn = aarch64_max_initfn }, #if defined(CONFIG_KVM) || defined(CONFIG_HVF) { .name = "host", .initfn = aarch64_host_initfn }, -@@ -1318,8 +1327,13 @@ static void aarch64_cpu_instance_init(Object *obj) +@@ -1452,8 +1461,13 @@ static void aarch64_cpu_instance_init(Object *obj) static void cpu_register_class_init(ObjectClass *oc, void *data) { ARMCPUClass *acc = ARM_CPU_CLASS(oc); @@ -521,10 +578,10 @@ index 3d74f134f5..4b330a52b5 100644 void aarch64_cpu_register(const ARMCPUInfo *info) diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index a528ff9a3d..053f70e399 100644 +index c154a4dcf2..f29425b656 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c -@@ -148,10 +148,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +@@ -152,10 +152,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) } #endif /* !CONFIG_USER_ONLY */ @@ -536,7 +593,7 @@ index a528ff9a3d..053f70e399 100644 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -505,7 +505,6 @@ static void cortex_a9_initfn(Object *obj) +@@ -509,7 +509,6 @@ static void cortex_a9_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41093000; define_arm_cp_regs(cpu, cortexa9_cp_reginfo); } @@ -544,7 +601,7 @@ index a528ff9a3d..053f70e399 100644 #ifndef CONFIG_USER_ONLY static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -530,7 +529,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { +@@ -534,7 +533,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, }; @@ -552,7 +609,7 @@ index a528ff9a3d..053f70e399 100644 static void cortex_a7_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -579,7 +577,6 @@ static void cortex_a7_initfn(Object *obj) +@@ -583,7 +581,6 @@ static void cortex_a7_initfn(Object *obj) cpu->isar.reset_pmcr_el0 = 0x41072000; define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ } @@ -560,7 +617,7 @@ index a528ff9a3d..053f70e399 100644 static void cortex_a15_initfn(Object *obj) { -@@ -628,7 +625,6 @@ static void cortex_a15_initfn(Object *obj) +@@ -632,7 +629,6 @@ static void cortex_a15_initfn(Object *obj) define_arm_cp_regs(cpu, cortexa15_cp_reginfo); } @@ -568,7 +625,7 @@ index a528ff9a3d..053f70e399 100644 static void cortex_m0_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1070,7 +1066,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) +@@ -1115,7 +1111,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->gdb_core_xml_file = "arm-m-profile.xml"; } @@ -576,7 +633,7 @@ index a528ff9a3d..053f70e399 100644 #ifndef TARGET_AARCH64 /* -@@ -1138,7 +1133,6 @@ static void arm_max_initfn(Object *obj) +@@ -1183,7 +1178,6 @@ static void arm_max_initfn(Object *obj) #endif /* !TARGET_AARCH64 */ static const ARMCPUInfo arm_tcg_cpus[] = { @@ -584,7 +641,7 @@ index a528ff9a3d..053f70e399 100644 { .name = "arm926", .initfn = arm926_initfn }, { .name = "arm946", .initfn = arm946_initfn }, { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1154,9 +1148,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1199,9 +1193,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "cortex-a7", .initfn = cortex_a7_initfn }, { .name = "cortex-a8", .initfn = cortex_a8_initfn }, { .name = "cortex-a9", .initfn = cortex_a9_initfn }, @@ -594,7 +651,7 @@ index a528ff9a3d..053f70e399 100644 { .name = "cortex-m0", .initfn = cortex_m0_initfn, .class_init = arm_v7m_class_init }, { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1187,7 +1179,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { +@@ -1233,7 +1225,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, @@ -602,36 +659,16 @@ index a528ff9a3d..053f70e399 100644 #ifndef TARGET_AARCH64 { .name = "max", .initfn = arm_max_initfn }, #endif -@@ -1215,3 +1206,4 @@ static void arm_tcg_cpu_register_types(void) +@@ -1261,3 +1252,4 @@ static void arm_tcg_cpu_register_types(void) type_init(arm_tcg_cpu_register_types) #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ +#endif /* disabled for RHEL */ -diff --git a/target/arm/helper.c b/target/arm/helper.c -index d8c8223ec3..ad9d235773 100644 ---- a/target/arm/helper.c -+++ b/target/arm/helper.c -@@ -8476,6 +8476,7 @@ void arm_cpu_list(void) - static void arm_cpu_add_definition(gpointer data, gpointer user_data) - { - ObjectClass *oc = data; -+ CPUClass *cc = CPU_CLASS(oc); - CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfo *info; - const char *typename; -@@ -8485,6 +8486,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); - info->q_typename = g_strdup(typename); -+ info->deprecated = !!cc->deprecation_note; - - QAPI_LIST_PREPEND(*cpu_list, info); - } diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index 5a14527386..a3579fc303 100644 +index 1cb08138ad..834497dfec 100644 --- a/tests/qtest/arm-cpu-features.c +++ b/tests/qtest/arm-cpu-features.c -@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -441,8 +441,10 @@ static void test_query_cpu_model_expansion(const void *data) assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); /* Test expected feature presence/absence for some cpu types */ @@ -642,7 +679,7 @@ index 5a14527386..a3579fc303 100644 /* Enabling and disabling pmu should always work. */ assert_has_feature_enabled(qts, "max", "pmu"); -@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -459,6 +461,7 @@ static void test_query_cpu_model_expansion(const void *data) assert_has_feature_enabled(qts, "cortex-a57", "pmu"); assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); @@ -650,7 +687,7 @@ index 5a14527386..a3579fc303 100644 assert_has_feature_enabled(qts, "a64fx", "pmu"); assert_has_feature_enabled(qts, "a64fx", "aarch64"); /* -@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) +@@ -471,6 +474,7 @@ static void test_query_cpu_model_expansion(const void *data) "{ 'sve384': true }"); assert_error(qts, "a64fx", "cannot enable sve640", "{ 'sve640': true }"); @@ -658,7 +695,7 @@ index 5a14527386..a3579fc303 100644 sve_tests_default(qts, "max"); pauth_tests_default(qts, "max"); -@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) +@@ -506,9 +510,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) QDict *resp; char *error; @@ -671,5 +708,5 @@ index 5a14527386..a3579fc303 100644 assert_has_feature_enabled(qts, "host", "aarch64"); -- -2.31.1 +2.39.1 diff --git a/0008-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch index a3cb0a3..ab78cae 100644 --- a/0008-Add-ppc64-machine-types.patch +++ b/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 2c523f1b6c9470e1cd517ba99e414cde02727e16 Mon Sep 17 00:00:00 2001 +From 401d0ebf1ee959fd944df6b5b4ae9c51c36d1244 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -34,10 +34,10 @@ Merged patches (7.1.0 rc0): 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 66b414d2e9..499eb49253 100644 +index 4921198b9d..e24b3e22e3 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1633,6 +1633,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) +@@ -1634,6 +1634,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -47,7 +47,7 @@ index 66b414d2e9..499eb49253 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3347,6 +3350,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3348,6 +3351,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -68,7 +68,7 @@ index 66b414d2e9..499eb49253 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3425,6 +3442,12 @@ static void spapr_instance_init(Object *obj) +@@ -3426,6 +3443,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -81,7 +81,7 @@ index 66b414d2e9..499eb49253 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4682,6 +4705,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4683,6 +4706,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -89,15 +89,15 @@ index 66b414d2e9..499eb49253 100644 } static const TypeInfo spapr_machine_info = { -@@ -4733,6 +4757,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4734,6 +4758,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-7.2 + * pseries-8.0 */ -@@ -4882,6 +4907,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4894,6 +4919,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -105,7 +105,7 @@ index 66b414d2e9..499eb49253 100644 /* * pseries-4.0 -@@ -4901,6 +4927,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, +@@ -4913,6 +4939,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, *nv2atsd = 0; return true; } @@ -114,7 +114,7 @@ index 66b414d2e9..499eb49253 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5228,6 +5256,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5240,6 +5268,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -375,10 +375,10 @@ index fcb5dfe792..ab8fb5bf62 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 04a95669ab..d5f4cf5e03 100644 +index 5c8aabd444..04489d5808 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -154,6 +154,7 @@ struct SpaprMachineClass { +@@ -155,6 +155,7 @@ struct SpaprMachineClass { bool pre_5_2_numa_associativity; bool pre_6_2_numa_affinity; @@ -386,7 +386,7 @@ index 04a95669ab..d5f4cf5e03 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -256,6 +257,9 @@ struct SpaprMachineState { +@@ -257,6 +258,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644 { "405cr", "405crc" }, { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 81d4263a07..508fbed90b 100644 +index 557d736dab..6646ec1c27 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1467,6 +1467,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1482,6 +1482,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -446,10 +446,10 @@ index 81d4263a07..508fbed90b 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 7c25348b7b..83671c955f 100644 +index 78f6fc50cd..68d06c3f8f 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c -@@ -89,6 +89,7 @@ static int cap_ppc_nested_kvm_hv; +@@ -88,6 +88,7 @@ static int cap_ppc_nested_kvm_hv; static int cap_large_decr; static int cap_fwnmi; static int cap_rpt_invalidate; @@ -457,7 +457,7 @@ index 7c25348b7b..83671c955f 100644 static uint32_t debug_inst_opcode; -@@ -136,6 +137,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) +@@ -135,6 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -465,7 +465,7 @@ index 7c25348b7b..83671c955f 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2570,6 +2572,16 @@ int kvmppc_has_cap_rpt_invalidate(void) +@@ -2569,6 +2571,16 @@ int kvmppc_has_cap_rpt_invalidate(void) return cap_rpt_invalidate; } @@ -482,7 +482,7 @@ index 7c25348b7b..83671c955f 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2970,3 +2982,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2969,3 +2981,18 @@ bool kvm_arch_cpu_check_are_resettable(void) void kvm_arch_accel_class_init(ObjectClass *oc) { } @@ -502,10 +502,10 @@ index 7c25348b7b..83671c955f 100644 + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index ee9325bf9a..20dbb95989 100644 +index 5fd9753953..b5ebfe2be0 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h -@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); +@@ -43,6 +43,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, bool radix, bool gtse, uint64_t proc_tbl); @@ -513,7 +513,7 @@ index ee9325bf9a..20dbb95989 100644 #ifndef CONFIG_USER_ONLY bool kvmppc_spapr_use_multitce(void); int kvmppc_spapr_enable_inkernel_multitce(void); -@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); +@@ -77,6 +78,8 @@ int kvmppc_get_cap_large_decr(void); int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); int kvmppc_has_cap_rpt_invalidate(void); int kvmppc_enable_hwrng(void); @@ -522,7 +522,7 @@ index ee9325bf9a..20dbb95989 100644 int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); void kvmppc_check_papr_resize_hpt(Error **errp); -@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) +@@ -396,6 +399,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) return false; } @@ -540,5 +540,5 @@ index ee9325bf9a..20dbb95989 100644 { return -1; -- -2.31.1 +2.39.1 diff --git a/0009-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch index 5860009..07dfb57 100644 --- a/0009-Add-s390x-machine-types.patch +++ b/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 1973257ed781a93943f27f1518933e8c09c50f88 Mon Sep 17 00:00:00 2001 +From 3c7647197729fcd76e219070c6f359bb3667d04d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -30,45 +30,72 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- 27c188c6a4 redhat: Update s390x machine type compatibility for QEMU 7.2.0 update +- a932b8d429 redhat: Add new rhel-9.2.0 s390x machine type +- ac88104bad s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 --- - hw/s390x/s390-virtio-ccw.c | 108 +++++++++++++++++++++++++++++++ - target/s390x/cpu_models.c | 11 ++++ + hw/s390x/s390-virtio-ccw.c | 143 +++++++++++++++++++++++++++++++ + target/s390x/cpu_models.c | 11 +++ target/s390x/cpu_models.h | 2 + target/s390x/cpu_models_sysemu.c | 2 + - 4 files changed, 123 insertions(+) + 4 files changed, 158 insertions(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 2e64ffab45..8d5221fbb1 100644 +index 503f212a31..dcd3b966b0 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -823,6 +823,7 @@ bool css_migration_enabled(void) +@@ -826,6 +826,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_7_2_instance_options(MachineState *machine) + static void ccw_machine_8_0_instance_options(MachineState *machine) { } -@@ -1186,6 +1187,113 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1201,6 +1202,148 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + + ++static void ccw_machine_rhel920_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel920_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); ++ +static void ccw_machine_rhel900_instance_options(MachineState *machine) +{ + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; + ++ ccw_machine_rhel920_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); +} + +static void ccw_machine_rhel900_class_options(MachineClass *mc) +{ ++ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ ++ ccw_machine_rhel920_class_options(mc); ++ ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); ++ s390mc->max_threads = S390_MAX_CPUS; +} -+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); ++DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false); + +static void ccw_machine_rhel860_instance_options(MachineState *machine) +{ @@ -78,7 +105,14 @@ index 2e64ffab45..8d5221fbb1 100644 + +static void ccw_machine_rhel860_class_options(MachineClass *mc) +{ ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "on", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", }, ++ }; ++ + ccw_machine_rhel900_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; @@ -102,8 +136,14 @@ index 2e64ffab45..8d5221fbb1 100644 + +static void ccw_machine_rhel850_class_options(MachineClass *mc) +{ ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ + ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->smp_props.prefer_sockets = true; +} +DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); @@ -164,10 +204,10 @@ index 2e64ffab45..8d5221fbb1 100644 static void ccw_machine_register_types(void) { diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index c3a4f80633..739770dc15 100644 +index 457b5cb10c..ff6b9463cb 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c -@@ -45,6 +45,9 @@ +@@ -46,6 +46,9 @@ * of a following release have been a superset of the previous release. With * generation 15 one base feature and one optional feature have been deprecated. */ @@ -177,7 +217,7 @@ index c3a4f80633..739770dc15 100644 static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -854,22 +857,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) +@@ -857,22 +860,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -222,7 +262,7 @@ index fb1adc8b21..d76745afa9 100644 /* CPU model based on a CPU definition */ diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c -index d086b1c39c..1b9cc66405 100644 +index 87a4480c05..28c1b0486c 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c @@ -60,6 +60,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) @@ -242,5 +282,5 @@ index d086b1c39c..1b9cc66405 100644 if (cpu_list_data->model) { Object *obj; -- -2.31.1 +2.39.1 diff --git a/0010-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch index 181342a..9685338 100644 --- a/0010-Add-x86_64-machine-types.patch +++ b/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 0935624ccdddc286d6eeeb0c1b70d78983c21aa2 Mon Sep 17 00:00:00 2001 +From 510291040cb280e1f68b793a84ec0f7d1c88aafa Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -13,6 +13,9 @@ Rebase notes (6.1.0): Rebase notes (7.0.0): - Reset alias for all machine-types except latest one +Rebase notes (8.0.0-rc1): +- remove legacy_no_rng_seed usage (removed upstream) + Merged patches (6.1.0): - 59c284ad3b x86: Add x86 rhel8.5 machine types - a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default @@ -39,24 +42,26 @@ Merged patches (7.1.0 rc0): Merged patches (7.2.0 rc0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) + +Merged patches (8.0.0-rc1): +- f33ca8aed4 x86: rhel 9.2.0 machine type --- - hw/i386/pc.c | 147 ++++++++++++++++++++++- - hw/i386/pc_piix.c | 86 +++++++++++++- - hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++++++++++- - hw/s390x/s390-virtio-ccw.c | 1 + + hw/i386/pc.c | 147 +++++++++++++++++++++- + hw/i386/pc_piix.c | 86 ++++++++++++- + hw/i386/pc_q35.c | 252 ++++++++++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 27 +++++ + include/hw/i386/pc.h | 27 ++++ target/i386/cpu.c | 21 ++++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 10 files changed, 521 insertions(+), 7 deletions(-) + 9 files changed, 538 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 546b703cb4..c7b1350e64 100644 +index 1489abf010..8abb1f872e 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -393,6 +393,149 @@ GlobalProperty pc_compat_1_4[] = { +@@ -407,6 +407,149 @@ GlobalProperty pc_compat_1_4[] = { }; const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); @@ -206,7 +211,7 @@ index 546b703cb4..c7b1350e64 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1907,6 +2050,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1944,6 +2087,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; assert(!mc->get_hotplug_handler); @@ -214,7 +219,7 @@ index 546b703cb4..c7b1350e64 100644 mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1917,7 +2061,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1954,7 +2098,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -225,10 +230,10 @@ index 546b703cb4..c7b1350e64 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 0985ff67d2..173a1fd10b 100644 +index 14a794081e..3e330fd36f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -53,6 +53,7 @@ +@@ -54,6 +54,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -236,7 +241,7 @@ index 0985ff67d2..173a1fd10b 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -184,8 +185,8 @@ static void pc_init1(MachineState *machine, +@@ -198,8 +199,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ @@ -247,7 +252,7 @@ index 0985ff67d2..173a1fd10b 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -334,6 +335,7 @@ static void pc_init1(MachineState *machine, +@@ -351,6 +352,7 @@ static void pc_init1(MachineState *machine, * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -255,7 +260,7 @@ index 0985ff67d2..173a1fd10b 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -896,3 +898,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -899,3 +901,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -304,7 +309,7 @@ index 0985ff67d2..173a1fd10b 100644 + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; -+ pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, @@ -340,10 +345,10 @@ index 0985ff67d2..173a1fd10b 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index ea582254e3..97c3630021 100644 +index dc0ba5f9e7..98601bb76f 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -198,8 +198,8 @@ static void pc_q35_init(MachineState *machine) +@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ @@ -354,7 +359,7 @@ index ea582254e3..97c3630021 100644 pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -352,6 +352,7 @@ static void pc_q35_init(MachineState *machine) +@@ -354,6 +354,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -362,7 +367,7 @@ index ea582254e3..97c3630021 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -666,3 +667,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -663,3 +664,250 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -391,6 +396,23 @@ index ea582254e3..97c3630021 100644 + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel920(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel920_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.2.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, ++ pc_q35_machine_rhel920_options); ++ +static void pc_q35_init_rhel900(MachineState *machine) +{ + pc_q35_init(machine); @@ -399,11 +421,12 @@ index ea582254e3..97c3630021 100644 +static void pc_q35_machine_rhel900_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel920_options(m); + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; -+ pcmc->legacy_no_rng_seed = true; ++ pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, @@ -595,23 +618,11 @@ index ea582254e3..97c3630021 100644 + +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 8d5221fbb1..ba640e3d9e 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1213,6 +1213,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) - static void ccw_machine_rhel860_class_options(MachineClass *mc) - { - ccw_machine_rhel900_class_options(mc); -+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); - - /* All RHEL machines for prior major releases are deprecated */ - mc->deprecation_reason = rhel_old_machine_deprecation; diff --git a/include/hw/boards.h b/include/hw/boards.h -index 2209d4e416..fd75f551b1 100644 +index c5a965d27f..5e7446ee40 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -266,6 +266,8 @@ struct MachineClass { +@@ -268,6 +268,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -621,12 +632,12 @@ index 2209d4e416..fd75f551b1 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 3754eaa97d..4266fe2fdb 100644 +index 908a275736..4376f64a47 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_5_len; - extern GlobalProperty pc_compat_1_4[]; - extern const size_t pc_compat_1_4_len; +@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_4_len; + + int pc_machine_kvm_type(MachineState *machine, const char *vm_type); +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; @@ -659,10 +670,10 @@ index 3754eaa97d..4266fe2fdb 100644 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 22b681ca37..f7c526cbe6 100644 +index 6576287e5b..0ef2bf1b93 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1832,9 +1832,13 @@ static const CPUCaches epyc_milan_cache_info = { +@@ -1834,9 +1834,13 @@ static const CPUCaches epyc_milan_cache_info = { * PT in VMX operation */ @@ -676,7 +687,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -1855,6 +1859,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1857,6 +1861,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "phenom", @@ -684,7 +695,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 16, -@@ -1887,6 +1892,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1889,6 +1894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "core2duo", @@ -692,7 +703,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1929,6 +1935,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1931,6 +1937,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm64", @@ -700,7 +711,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -1970,6 +1977,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1972,6 +1979,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "qemu32", @@ -708,7 +719,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 4, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1984,6 +1992,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -1986,6 +1994,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm32", @@ -716,7 +727,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -2014,6 +2023,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2016,6 +2025,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "coreduo", @@ -724,7 +735,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2047,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2049,6 +2059,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "486", @@ -732,7 +743,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 4, -@@ -2059,6 +2070,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2061,6 +2072,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium", @@ -740,7 +751,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 5, -@@ -2071,6 +2083,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2073,6 +2085,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium2", @@ -748,7 +759,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 2, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2083,6 +2096,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2085,6 +2098,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium3", @@ -756,7 +767,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 3, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2095,6 +2109,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2097,6 +2111,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "athlon", @@ -764,7 +775,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 2, .vendor = CPUID_VENDOR_AMD, .family = 6, -@@ -2110,6 +2125,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2112,6 +2127,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "n270", @@ -772,7 +783,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2135,6 +2151,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2137,6 +2153,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Conroe", @@ -780,7 +791,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2175,6 +2192,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2177,6 +2194,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Penryn", @@ -788,7 +799,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -3762,6 +3780,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3893,6 +3911,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G1", @@ -796,7 +807,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3782,6 +3801,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3913,6 +3932,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G2", @@ -804,7 +815,7 @@ index 22b681ca37..f7c526cbe6 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3804,6 +3824,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -3935,6 +3955,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G3", @@ -825,10 +836,10 @@ index 7237378a7d..7b8a3d5af0 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index a213209379..81526a1575 100644 +index de531842f6..8d82304609 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3707,6 +3707,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3822,6 +3822,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -836,7 +847,7 @@ index a213209379..81526a1575 100644 kvm_msr_buf_reset(cpu); -@@ -4062,6 +4063,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4177,6 +4178,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -847,7 +858,7 @@ index a213209379..81526a1575 100644 case MSR_KVM_ASYNC_PF_INT: env->async_pf_int_msr = msrs[i].data; diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c -index bc7b7dfc39..96e6dee3a1 100644 +index 78f1cf8186..ac954c9b06 100644 --- a/tests/qtest/pvpanic-test.c +++ b/tests/qtest/pvpanic-test.c @@ -17,7 +17,7 @@ static void test_panic_nopause(void) @@ -870,5 +881,5 @@ index bc7b7dfc39..96e6dee3a1 100644 val = qtest_inb(qts, 0x505); g_assert_cmpuint(val, ==, 3); -- -2.31.1 +2.39.1 diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch index d0be8e6..cc91302 100644 --- a/0011-Enable-make-check.patch +++ b/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From badfb1290c8eea8a2e1769b2392c7899d5077698 Mon Sep 17 00:00:00 2001 +From 738db8353055eb6fd902513949c6659af8b401d0 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -27,28 +27,37 @@ Rebase changes (7.0.0): Rebase changes (7.1.0 rc0): - Disable bcm2835-dma-test (added upstream) +Rebase changes (8.0.0-rc1): +- Removed chunks for disabling bios-table-test (protected upstream) + +Rebase change (8.0.0-rc2): +- Disable new qemu-iotests execution +- Revert change in tco qtest (blocking test run) + Merged patches (6.1.0): - 2f129df7d3 redhat: Enable the 'test-block-iothread' test again Merged patches (7.1.0 rc0): - 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57 --- - .distro/qemu-kvm.spec.template | 5 ++--- - tests/avocado/replay_kernel.py | 2 +- - tests/avocado/reverse_debugging.py | 2 +- - tests/avocado/tcg_plugins.py | 6 +++--- - tests/qtest/fuzz-e1000e-test.c | 2 +- - tests/qtest/fuzz-virtio-scsi-test.c | 2 +- - tests/qtest/intel-hda-test.c | 2 +- - tests/qtest/libqos/meson.build | 2 +- - tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 7 +------ - tests/qtest/usb-hcd-xhci-test.c | 4 ++++ - tests/qtest/virtio-net-failover.c | 1 + - 12 files changed, 18 insertions(+), 19 deletions(-) + .distro/qemu-kvm.spec.template | 4 ++-- + tests/avocado/replay_kernel.py | 2 +- + tests/avocado/reverse_debugging.py | 2 +- + tests/avocado/tcg_plugins.py | 6 ++--- + tests/qemu-iotests/meson.build | 34 ++++++++++++++--------------- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/intel-hda-test.c | 2 +- + tests/qtest/libqos/meson.build | 2 +- + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 2 -- + tests/qtest/tco-test.c | 2 +- + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + tests/qtest/virtio-net-failover.c | 1 + + 14 files changed, 35 insertions(+), 32 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index 00a26e4a0c..fe5ecf238a 100644 +index f13456e1ec..2fee270a42 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py @@ -147,7 +147,7 @@ def test_aarch64_virt(self): @@ -61,10 +70,10 @@ index 00a26e4a0c..fe5ecf238a 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index d2921e70c3..66d185ed42 100644 +index 680c314cfc..71eccb8fb6 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py -@@ -198,7 +198,7 @@ def test_aarch64_virt(self): +@@ -206,7 +206,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -104,6 +113,49 @@ index 642d2e49e3..93b3afd823 100644 """ kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build +index 9735071a29..32002335f4 100644 +--- a/tests/qemu-iotests/meson.build ++++ b/tests/qemu-iotests/meson.build +@@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats + check: true, + ) + +- foreach item: rc.stdout().strip().split() +- args = [qemu_iotests_check_cmd, +- '-tap', '-' + format, item, +- '--source-dir', meson.current_source_dir(), +- '--build-dir', meson.current_build_dir()] +- # Some individual tests take as long as 45 seconds +- # Bump the timeout to 3 minutes for some headroom +- # on slow machines to minimize spurious failures +- test('io-' + format + '-' + item, +- python, +- args: args, +- depends: qemu_iotests_binaries, +- env: qemu_iotests_env, +- protocol: 'tap', +- timeout: 180, +- suite: suites) +- endforeach ++# foreach item: rc.stdout().strip().split() ++# args = [qemu_iotests_check_cmd, ++# '-tap', '-' + format, item, ++# '--source-dir', meson.current_source_dir(), ++# '--build-dir', meson.current_build_dir()] ++# # Some individual tests take as long as 45 seconds ++# # Bump the timeout to 3 minutes for some headroom ++# # on slow machines to minimize spurious failures ++# test('io-' + format + '-' + item, ++# python, ++# args: args, ++# depends: qemu_iotests_binaries, ++# env: qemu_iotests_env, ++# protocol: 'tap', ++# timeout: 180, ++# suite: suites) ++# endforeach + endforeach diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c index 5052883fb6..b5286f4b12 100644 --- a/tests/qtest/fuzz-e1000e-test.c @@ -144,10 +196,10 @@ index d4a8db6fd6..1a796ec15a 100644 qtest_outl(s, 0xcf8, 0x80000804); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index 32f028872c..1e78a1a055 100644 +index cc209a8de5..42a7c529c9 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build -@@ -43,7 +43,7 @@ libqos_srcs = files( +@@ -44,7 +44,7 @@ libqos_srcs = files( 'virtio-rng.c', 'virtio-scsi.c', 'virtio-serial.c', @@ -170,18 +222,10 @@ index 8ac95b89f7..cd2102555c 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index c07a5b1a5f..9df3f9f8b9 100644 +index 85ea4e8d99..893afc8eeb 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -82,7 +82,6 @@ qtests_i386 = \ - config_all_devices.has_key('CONFIG_Q35') and \ - config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ - slirp.found() ? ['virtio-net-failover'] : []) + \ -- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ - qtests_pci + \ - qtests_cxl + \ - ['fdc-test', -@@ -96,7 +95,6 @@ qtests_i386 = \ +@@ -94,7 +94,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -189,24 +233,7 @@ index c07a5b1a5f..9df3f9f8b9 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -209,15 +207,13 @@ qtests_arm = \ - - # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional - qtests_aarch64 = \ -- (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ - (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \ - (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ - (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \ - ['arm-cpu-features', - 'numa-test', - 'boot-serial-test', -- 'migration-test', -- 'bcm2835-dma-test'] -+ 'migration-test'] - - qtests_s390x = \ - (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ -@@ -225,7 +221,6 @@ qtests_s390x = \ +@@ -223,7 +222,6 @@ qtests_s390x = \ (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ ['boot-serial-test', 'drive_del-test', @@ -214,6 +241,19 @@ index c07a5b1a5f..9df3f9f8b9 100644 'virtio-ccw-test', 'cpu-plug-test', 'migration-test'] +diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c +index 0547d41173..3756ce82d8 100644 +--- a/tests/qtest/tco-test.c ++++ b/tests/qtest/tco-test.c +@@ -60,7 +60,7 @@ static void test_init(TestData *d) + QTestState *qs; + + qs = qtest_initf("-machine q35 %s %s", +- d->noreboot ? "-global ICH9-LPC.noreboot=true" : "", ++ d->noreboot ? "" : "-global ICH9-LPC.noreboot=false", + !d->args ? "" : d->args); + qtest_irq_intercept_in(qs, "ioapic"); + diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c index 10ef9d2a91..3855873050 100644 --- a/tests/qtest/usb-hcd-xhci-test.c @@ -257,5 +297,5 @@ index 4a809590bf..1bf3fa641c 100644 "-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 " -- -2.31.1 +2.39.1 diff --git a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 477a75d..430959b 100644 --- a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 0804844e4755377be6d2ebad578794ad9f4f3f31 Mon Sep 17 00:00:00 2001 +From 34cb4f7ddd762ec46ed1a6a4261aebde39360ca4 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -32,7 +32,7 @@ Signed-off-by: Bandan Das 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 939dcc3d4a..acbc6673ce 100644 +index ec9a854361..a779053be3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -48,6 +48,9 @@ @@ -77,7 +77,7 @@ index 939dcc3d4a..acbc6673ce 100644 if (!vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3293,6 +3317,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3294,6 +3318,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -88,7 +88,7 @@ index 939dcc3d4a..acbc6673ce 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 7c236a52f4..7b7d036a8f 100644 +index 177abcc8fb..45235d38ba 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -140,6 +140,7 @@ struct VFIOPCIDevice { @@ -100,5 +100,5 @@ index 7c236a52f4..7b7d036a8f 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.31.1 +2.39.1 diff --git a/0013-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch index 022f194..25db0b8 100644 --- a/0013-Add-support-statement-to-help-output.patch +++ b/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 283a0e258dc2f3b83c58e6f948bafe430cd2c1d5 Mon Sep 17 00:00:00 2001 +From 8964a3e8835992442902d35b011a708787366d82 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -21,7 +21,7 @@ Signed-off-by: Eduardo Habkost 1 file changed, 9 insertions(+) diff --git a/softmmu/vl.c b/softmmu/vl.c -index 5115221efe..17188df528 100644 +index ea20b23e4c..ad4173138d 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -834,9 +834,17 @@ static void version(void) @@ -51,5 +51,5 @@ index 5115221efe..17188df528 100644 } -- -2.31.1 +2.39.1 diff --git a/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index e39555b..b97c844 100644 --- a/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From d8ded821aa698b3b03bd9089fbd6c2b33da87b9e Mon Sep 17 00:00:00 2001 +From 0b72d348fa0714de641ee242e5cee97df006e8fd Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 7f99d15b23..ea02ca3a45 100644 +index 59bdf67a2c..52b49f1f6a 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3300,11 +3300,11 @@ SRST +@@ -3296,11 +3296,11 @@ SRST :: @@ -57,5 +57,5 @@ index 7f99d15b23..ea02ca3a45 100644 ``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` Establish a vhost-vdpa netdev. -- -2.31.1 +2.39.1 diff --git a/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch deleted file mode 100644 index 2bedb0b..0000000 --- a/0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 9c6acadb444c9300d7c18b6939ce4f96484aeacc Mon Sep 17 00:00:00 2001 -From: David Gibson -Date: Wed, 6 Feb 2019 03:58:56 +0000 -Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts - -RH-Author: David Gibson -Message-id: <20190206035856.19058-1-dgibson@redhat.com> -Patchwork-id: 84246 -O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts -Bugzilla: 1653590 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Serhii Popovych -RH-Acked-by: Thomas Huth - -Most current POWER guests require 64kiB page support, so that's the default -for the cap-hpt-max-pagesize option in qemu which limits available guest -page sizes. We warn if the value is set smaller than that, but don't -outright fail upstream, because we need to allow for the possibility of -guest (and/or host) kernels configured for 4kiB page sizes. - -Downstream, however, we simply don't support 4kiB pagesize configured -kernels in guest or host, so we can have qemu simply error out in this -situation. - -Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified - it failed immediately with a qemu error - -Signed-off-by: David Gibson -Signed-off-by: Danilo C. L. de Paula ---- - hw/ppc/spapr_caps.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c -index b4283055c1..59b88aadff 100644 ---- a/hw/ppc/spapr_caps.c -+++ b/hw/ppc/spapr_caps.c -@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, - static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, - uint8_t val, Error **errp) - { -+#if 0 /* disabled for RHEL */ - if (val < 12) { - error_setg(errp, "Require at least 4kiB hpt-max-page-size"); - return; - } else if (val < 16) { - warn_report("Many guests require at least 64kiB hpt-max-page-size"); - } -+#else /* Only page sizes >=64kiB supported for RHEL */ -+ if (val < 16) { -+ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); -+ return; -+ } -+#endif - - spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); - } --- -2.31.1 - diff --git a/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch similarity index 92% rename from 0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch rename to 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index cee5476..1e2f8e1 100644 --- a/0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From 02fde2a0cbd679ebd4104fe5522572c31ec23abd Mon Sep 17 00:00:00 2001 +From bd6bcebfd783fa49e283d035d378fb5240423d84 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,10 +44,10 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 4d6666d3ff..d2ba263e9d 100644 +index 30fd53fa64..22084730f9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c -@@ -1336,6 +1336,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, +@@ -1337,6 +1337,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOTSUP; goto fail; } @@ -61,7 +61,7 @@ index 4d6666d3ff..d2ba263e9d 100644 s->qcow_version = header.version; diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index cc9f1a5891..6a13757177 100644 +index 6b32c7fbfa..6ddda2ee64 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -83,6 +83,7 @@ _filter_qemu() @@ -73,5 +73,5 @@ index cc9f1a5891..6a13757177 100644 } -- -2.31.1 +2.39.1 diff --git a/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch b/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch similarity index 81% rename from kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch rename to 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch index d039212..bb9455a 100644 --- a/kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +++ b/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch @@ -1,7 +1,7 @@ -From 48f45171b89b8ed24f2b2484d63b00ea7818b5c3 Mon Sep 17 00:00:00 2001 +From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001 From: Kfir Manor Date: Sun, 22 Jan 2023 17:33:07 +0200 -Subject: [PATCH 9/9] qga/linux: add usb support to guest-get-fsinfo +Subject: qga/linux: add usb support to guest-get-fsinfo RH-Author: Kostiantyn Kostiuk RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo @@ -16,15 +16,19 @@ Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.co Signed-off-by: Kfir Manor Reviewed-by: Konstantin Kostiuk Signed-off-by: Konstantin Kostiuk + +Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +Patch-id: 72 +Patch-present-in-specfile: True --- qga/commands-posix.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 32493d6383..f1b2b87c13 100644 +index 079689d79a..97754930c1 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c -@@ -877,7 +877,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, +@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, g_str_equal(driver, "sym53c8xx") || g_str_equal(driver, "virtio-pci") || g_str_equal(driver, "ahci") || @@ -35,7 +39,7 @@ index 32493d6383..f1b2b87c13 100644 break; } -@@ -974,6 +976,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, +@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, } } else if (strcmp(driver, "nvme") == 0) { disk->bus_type = GUEST_DISK_BUS_TYPE_NVME; @@ -45,5 +49,5 @@ index 32493d6383..f1b2b87c13 100644 g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath); goto cleanup; -- -2.31.1 +2.39.1 diff --git a/0017-Add-RHEL-9.2.0-compat-structure.patch b/0017-Add-RHEL-9.2.0-compat-structure.patch new file mode 100644 index 0000000..ce0ba5c --- /dev/null +++ b/0017-Add-RHEL-9.2.0-compat-structure.patch @@ -0,0 +1,110 @@ +From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 15 Feb 2023 02:03:17 -0500 +Subject: Add RHEL 9.2.0 compat structure + +Adding compatibility bits necessary to keep 9.2.0 machine +types same after rebase to 8.0. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (8.0.0 rc4): +- Added migration.x-preempt-pre-7-2 compat) +--- + hw/arm/virt.c | 1 + + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 3 +++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 6 files changed, 20 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 1ae1654be5..9be53e9355 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init); + static void rhel920_virt_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5aa567fad3..0e0120b7f2 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_9_2[] = { ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "e1000e", "migrate-timadj", "off" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "virtio-mem", "x-early-migration", "false" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "migration", "x-preempt-pre-7-2", "true" }, ++}; ++const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); ++ + /* + * Mostly the same as hw_compat_7_0 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 3e330fd36f..90fb6e2e03 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; + pcmc->enforce_amd_1tb_hole = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 98601bb76f..8945b69175 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) + m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.2.0"; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); + } + + DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index dcd3b966b0..6a0b93c63d 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine) + + static void ccw_machine_rhel920_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); + } + DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 5e7446ee40..5f08bd7550 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_2[]; ++extern const size_t hw_compat_rhel_9_2_len; ++ + extern GlobalProperty hw_compat_rhel_9_1[]; + extern const size_t hw_compat_rhel_9_1_len; + +-- +2.39.1 + diff --git a/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch b/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch deleted file mode 100644 index 001880b..0000000 --- a/0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 21ed34787b9492c2cfe3d8fc12a32748bcf02307 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 9 Nov 2022 07:08:32 -0500 -Subject: Addd 7.2 compat bits for RHEL 9.1 machine type - -Signed-off-by: Miroslav Rezanina ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 9edec1ca05..3d851d34da 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -54,6 +54,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { - { "arm-gicv3-common", "force-8-bit-prio", "on" }, - /* hw_compat_rhel_9_1 from hw_compat_7_0 */ - { "nvme-ns", "eui64-default", "on"}, -+ /* hw_compat_rhel_9_1 from hw_compat_7_1 */ -+ { "virtio-device", "queue_reset", "false" }, - }; - const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); - --- -2.31.1 - diff --git a/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch b/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch new file mode 100644 index 0000000..81993e9 --- /dev/null +++ b/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch @@ -0,0 +1,76 @@ +From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 Mar 2023 15:14:03 +0200 +Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU + 8.0.0 update + +Add pc_rhel_9_2_compat based on upstream pc_compat_7_2. + +Signed-off-by: Thomas Huth +--- + hw/i386/pc.c | 6 ++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + include/hw/i386/pc.h | 3 +++ + 4 files changed, 13 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 8abb1f872e..f216922cee 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_2_compat[] = { ++ /* pc_rhel_9_2_compat from pc_compat_7_2 */ ++ { "ICH9-LPC", "noreboot", "true" }, ++}; ++const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat); ++ + GlobalProperty pc_rhel_9_0_compat[] = { + /* pc_rhel_9_0_compat from pc_compat_6_2 */ + { "virtio-mem", "unplugged-inaccessible", "off" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 90fb6e2e03..fc704d783f 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->enforce_amd_1tb_hole = false; + compat_props_add(m->compat_props, hw_compat_rhel_9_2, + hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 8945b69175..e97655616a 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) + + compat_props_add(m->compat_props, hw_compat_rhel_9_2, + hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 4376f64a47..d218ad1628 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type); + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_2_compat[]; ++extern const size_t pc_rhel_9_2_compat_len; ++ + extern GlobalProperty pc_rhel_9_0_compat[]; + extern const size_t pc_rhel_9_0_compat_len; + +-- +2.39.1 + diff --git a/0019-Disable-unwanted-new-devices.patch b/0019-Disable-unwanted-new-devices.patch new file mode 100644 index 0000000..f656ca9 --- /dev/null +++ b/0019-Disable-unwanted-new-devices.patch @@ -0,0 +1,83 @@ +From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Mon, 17 Apr 2023 01:24:18 -0400 +Subject: Disable unwanted new devices + +QEMU 8.0 adds two new device we do not want to support that can't +be disabled using configure switch. + +1) ide-cf - virtual CompactFlash card + +2) i2c-echo - testing echo device + +Use manual disabling of the device by changing code (1) and meson configs (2). + +Signed-off-by: Miroslav Rezanina +--- + hw/ide/qdev.c | 9 +++++++++ + hw/misc/meson.build | 3 ++- + 2 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c +index 1b3b4da01d..454bfa5783 100644 +--- a/hw/ide/qdev.c ++++ b/hw/ide/qdev.c +@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) + ide_dev_initfn(dev, IDE_CD, errp); + } + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static void ide_cf_realize(IDEDevice *dev, Error **errp) + { + ide_dev_initfn(dev, IDE_CFATA, errp); + } ++#endif + + #define DEFINE_IDE_DEV_PROPERTIES() \ + DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ +@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { + .class_init = ide_cd_class_init, + }; + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static Property ide_cf_properties[] = { + DEFINE_IDE_DEV_PROPERTIES(), + DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), +@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { + .instance_size = sizeof(IDEDrive), + .class_init = ide_cf_class_init, + }; ++#endif + + static void ide_device_class_init(ObjectClass *klass, void *data) + { +@@ -396,7 +402,10 @@ static void ide_register_types(void) + type_register_static(&ide_bus_info); + type_register_static(&ide_hd_info); + type_register_static(&ide_cd_info); ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + type_register_static(&ide_cf_info); ++#endif + type_register_static(&ide_device_type_info); + } + +diff --git a/hw/misc/meson.build b/hw/misc/meson.build +index a40245ad44..9cc5a61ed7 100644 +--- a/hw/misc/meson.build ++++ b/hw/misc/meson.build +@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c')) + + softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c')) + +-softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) ++# Disabled for Red Hat Enterprise Linux ++# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) + + specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c')) + +-- +2.39.1 + diff --git a/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch b/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch deleted file mode 100644 index 2642b30..0000000 --- a/0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 27c188c6a4cbd908269cf06affd24025708ecb5c Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 17 Nov 2022 16:47:16 +0100 -Subject: redhat: Update s390x machine type compatibility for QEMU 7.2.0 update - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2143585 -Upstream Status: n/a (rhel-only) - -Add the compatibility handling for the rebase from QEMU 7.1 to 7.2, -i.e. the settings from ccw_machine_7_1_class_options() and -ccw_machine_7_1_instance_options() to the rhel9.1.0 machine type -(earlier settings have been added by previous rebases already). - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index ba640e3d9e..97e868ada0 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1195,12 +1195,21 @@ static void ccw_machine_rhel900_instance_options(MachineState *machine) - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; - - s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); -+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); - } - - static void ccw_machine_rhel900_class_options(MachineClass *mc) - { -+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); -+ static GlobalProperty compat[] = { -+ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, -+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, -+ }; -+ -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); -+ s390mc->max_threads = S390_MAX_CPUS; - } - DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); - --- -2.31.1 - diff --git a/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch b/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch deleted file mode 100644 index cb69b93..0000000 --- a/0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch +++ /dev/null @@ -1,43 +0,0 @@ -From c1a21266d8bed27f1ef1f705818fde5f9350b73f Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Wed, 23 Nov 2022 14:15:37 +0100 -Subject: redhat: aarch64: add rhel9.2.0 virt machine type - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2131982 -Upstream: RHEL only - -Signed-off-by: Cornelia Huck ---- - hw/arm/virt.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index dfcab40a73..0a94f31dd1 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3518,14 +3518,21 @@ static void rhel_machine_init(void) - } - type_init(rhel_machine_init); - -+static void rhel920_virt_options(MachineClass *mc) -+{ -+} -+DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) -+ - static void rhel900_virt_options(MachineClass *mc) - { - VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); - -+ rhel920_virt_options(mc); -+ - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ - vmc->no_tcg_lpa2 = true; - } --DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) -+DEFINE_RHEL_MACHINE(9, 0, 0) --- -2.31.1 - diff --git a/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch b/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch deleted file mode 100644 index 144bd92..0000000 --- a/0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch +++ /dev/null @@ -1,62 +0,0 @@ -From a932b8d4296066be01613ada84241b501488f99f Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 17 Nov 2022 17:03:24 +0100 -Subject: redhat: Add new rhel-9.2.0 s390x machine type - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2136473 -Upstream Status: n/a (rhel-only) - -RHEL 9.2 will be an EUS release - we want to have a new machine -type here to make sure that we have a spot where we can wire up -fixes later. - -Signed-off-by: Thomas Huth ---- - hw/s390x/s390-virtio-ccw.c | 15 ++++++++++++++- - 1 file changed, 14 insertions(+), 1 deletion(-) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 97e868ada0..aa142a1a4e 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1190,10 +1190,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); - #endif - - -+static void ccw_machine_rhel920_instance_options(MachineState *machine) -+{ -+} -+ -+static void ccw_machine_rhel920_class_options(MachineClass *mc) -+{ -+} -+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); -+ - static void ccw_machine_rhel900_instance_options(MachineState *machine) - { - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; - -+ ccw_machine_rhel920_instance_options(machine); -+ - s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); - s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAIE); - } -@@ -1206,12 +1217,14 @@ static void ccw_machine_rhel900_class_options(MachineClass *mc) - { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, - }; - -+ ccw_machine_rhel920_class_options(mc); -+ - compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); - s390mc->max_threads = S390_MAX_CPUS; - } --DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); -+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", false); - - static void ccw_machine_rhel860_instance_options(MachineState *machine) - { --- -2.31.1 - diff --git a/0022-x86-rhel-9.2.0-machine-type.patch b/0022-x86-rhel-9.2.0-machine-type.patch deleted file mode 100644 index 8502b91..0000000 --- a/0022-x86-rhel-9.2.0-machine-type.patch +++ /dev/null @@ -1,75 +0,0 @@ -From f33ca8aed4744238230f1f2cc47df77aa4c9e0ac Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Thu, 17 Nov 2022 12:36:30 +0000 -Subject: x86: rhel 9.2.0 machine type - -Add a 9.2.0 x86 machine type, and fix up the compatibility -for 9.0.0 and older. - -pc_compat_7_1 and pc_compat_7_0 are both empty upstream so there's -nothing to do there. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/i386/pc_piix.c | 1 + - hw/i386/pc_q35.c | 21 ++++++++++++++++++++- - 2 files changed, 21 insertions(+), 1 deletion(-) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 173a1fd10b..fc06877344 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -944,6 +944,7 @@ static void pc_machine_rhel760_options(MachineClass *m) - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; - pcmc->legacy_no_rng_seed = true; -+ pcmc->enforce_amd_1tb_hole = false; - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 97c3630021..52cfe3bf45 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -692,6 +692,23 @@ static void pc_q35_machine_rhel_options(MachineClass *m) - compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); - } - -+static void pc_q35_init_rhel920(MachineState *machine) -+{ -+ pc_q35_init(machine); -+} -+ -+static void pc_q35_machine_rhel920_options(MachineClass *m) -+{ -+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); -+ m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; -+ pcmc->smbios_stream_product = "RHEL"; -+ pcmc->smbios_stream_version = "9.2.0"; -+} -+ -+DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, -+ pc_q35_machine_rhel920_options); -+ - static void pc_q35_init_rhel900(MachineState *machine) - { - pc_q35_init(machine); -@@ -700,11 +717,13 @@ static void pc_q35_init_rhel900(MachineState *machine) - static void pc_q35_machine_rhel900_options(MachineClass *m) - { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -- pc_q35_machine_rhel_options(m); -+ pc_q35_machine_rhel920_options(m); - m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; -+ m->alias = NULL; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.0.0"; - pcmc->legacy_no_rng_seed = true; -+ pcmc->enforce_amd_1tb_hole = false; - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, --- -2.31.1 - diff --git a/kvm-KVM-keep-track-of-running-ioctls.patch b/kvm-KVM-keep-track-of-running-ioctls.patch deleted file mode 100644 index b7aba7e..0000000 --- a/kvm-KVM-keep-track-of-running-ioctls.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 16 Jan 2023 07:17:23 -0500 -Subject: [PATCH 30/31] KVM: keep track of running ioctls - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 138: accel: introduce accelerator blocker API -RH-Bugzilla: 1979276 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 - -commit a27dd2de68f37ba96fe164a42121daa5f0750afc -Author: Emanuele Giuseppe Esposito -Date: Fri Nov 11 10:47:57 2022 -0500 - - KVM: keep track of running ioctls - - Using the new accel-blocker API, mark where ioctls are being called - in KVM. Next, we will implement the critical section that will take - care of performing memslots modifications atomically, therefore - preventing any new ioctl from running and allowing the running ones - to finish. - - Signed-off-by: David Hildenbrand - Signed-off-by: Emanuele Giuseppe Esposito - Message-Id: <20221111154758.1372674-3-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - accel/kvm/kvm-all.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index f99b0becd8..ff660fd469 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms) - assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size()); - - s->sigmask_len = 8; -+ accel_blocker_init(); - - #ifdef KVM_CAP_SET_GUEST_DEBUG - QTAILQ_INIT(&s->kvm_sw_breakpoints); -@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) - va_end(ap); - - trace_kvm_vm_ioctl(type, arg); -+ accel_ioctl_begin(); - ret = ioctl(s->vmfd, type, arg); -+ accel_ioctl_end(); - if (ret == -1) { - ret = -errno; - } -@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) - va_end(ap); - - trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg); -+ accel_cpu_ioctl_begin(cpu); - ret = ioctl(cpu->kvm_fd, type, arg); -+ accel_cpu_ioctl_end(cpu); - if (ret == -1) { - ret = -errno; - } -@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...) - va_end(ap); - - trace_kvm_device_ioctl(fd, type, arg); -+ accel_ioctl_begin(); - ret = ioctl(fd, type, arg); -+ accel_ioctl_end(); - if (ret == -1) { - ret = -errno; - } --- -2.31.1 - diff --git a/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch b/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch deleted file mode 100644 index 752aa08..0000000 --- a/kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 0c19fb7c4a22a30830152b224b2e66963f829a7a Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Thu, 19 Jan 2023 18:24:24 +0100 -Subject: [PATCH 19/20] Revert "vhost-user: Introduce nested event loop in - vhost_user_read()" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 146: Fix vhost-user with dpdk -RH-Bugzilla: 2155173 -RH-Acked-by: Cindy Lu -RH-Acked-by: Greg Kurz (RH) -RH-Acked-by: Eugenio Pérez -RH-Commit: [2/2] 9b67041f92f29f70b7ccb41d8087801e4e4e38af (lvivier/qemu-kvm-centos) - -This reverts commit a7f523c7d114d445c5d83aecdba3efc038e5a692. - -The nested event loop is broken by design. It's only user was removed. -Drop the code as well so that nobody ever tries to use it again. - -I had to fix a couple of trivial conflicts around return values because -of 025faa872bcf ("vhost-user: stick to -errno error return convention"). - -Signed-off-by: Greg Kurz -Message-Id: <20230119172424.478268-3-groug@kaod.org> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Maxime Coquelin -(cherry picked from commit 4382138f642f69fdbc79ebf4e93d84be8061191f) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-user.c | 65 ++++-------------------------------------- - 1 file changed, 5 insertions(+), 60 deletions(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 0ac00eb901..7cb49c50f9 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -305,19 +305,8 @@ static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg) - return 0; - } - --struct vhost_user_read_cb_data { -- struct vhost_dev *dev; -- VhostUserMsg *msg; -- GMainLoop *loop; -- int ret; --}; -- --static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, -- gpointer opaque) -+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - { -- struct vhost_user_read_cb_data *data = opaque; -- struct vhost_dev *dev = data->dev; -- VhostUserMsg *msg = data->msg; - struct vhost_user *u = dev->opaque; - CharBackend *chr = u->user->chr; - uint8_t *p = (uint8_t *) msg; -@@ -325,8 +314,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - - r = vhost_user_read_header(dev, msg); - if (r < 0) { -- data->ret = r; -- goto end; -+ return r; - } - - /* validate message size is sane */ -@@ -334,8 +322,7 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - error_report("Failed to read msg header." - " Size %d exceeds the maximum %zu.", msg->hdr.size, - VHOST_USER_PAYLOAD_SIZE); -- data->ret = -EPROTO; -- goto end; -+ return -EPROTO; - } - - if (msg->hdr.size) { -@@ -346,53 +333,11 @@ static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition, - int saved_errno = errno; - error_report("Failed to read msg payload." - " Read %d instead of %d.", r, msg->hdr.size); -- data->ret = r < 0 ? -saved_errno : -EIO; -- goto end; -+ return r < 0 ? -saved_errno : -EIO; - } - } - --end: -- g_main_loop_quit(data->loop); -- return G_SOURCE_REMOVE; --} -- --static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) --{ -- struct vhost_user *u = dev->opaque; -- CharBackend *chr = u->user->chr; -- GMainContext *prev_ctxt = chr->chr->gcontext; -- GMainContext *ctxt = g_main_context_new(); -- GMainLoop *loop = g_main_loop_new(ctxt, FALSE); -- struct vhost_user_read_cb_data data = { -- .dev = dev, -- .loop = loop, -- .msg = msg, -- .ret = 0 -- }; -- -- /* -- * We want to be able to monitor the slave channel fd while waiting -- * for chr I/O. This requires an event loop, but we can't nest the -- * one to which chr is currently attached : its fd handlers might not -- * be prepared for re-entrancy. So we create a new one and switch chr -- * to use it. -- */ -- qemu_chr_be_update_read_handlers(chr->chr, ctxt); -- qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); -- -- g_main_loop_run(loop); -- -- /* -- * Restore the previous event loop context. This also destroys/recreates -- * event sources : this guarantees that all pending events in the original -- * context that have been processed by the nested loop are purged. -- */ -- qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); -- -- g_main_loop_unref(loop); -- g_main_context_unref(ctxt); -- -- return data.ret; -+ return 0; - } - - static int process_message_reply(struct vhost_dev *dev, --- -2.31.1 - diff --git a/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch b/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch deleted file mode 100644 index 8e7b906..0000000 --- a/kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 9fb47ad317ad8cdda9960190d499ad6c3a9817f0 Mon Sep 17 00:00:00 2001 -From: Greg Kurz -Date: Thu, 19 Jan 2023 18:24:23 +0100 -Subject: [PATCH 18/20] Revert "vhost-user: Monitor slave channel in - vhost_user_read()" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 146: Fix vhost-user with dpdk -RH-Bugzilla: 2155173 -RH-Acked-by: Cindy Lu -RH-Acked-by: Greg Kurz (RH) -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/2] c583a7f121ca9c93c9a2ad17bf0ccf5c1241dc99 (lvivier/qemu-kvm-centos) - -This reverts commit db8a3772e300c1a656331a92da0785d81667dc81. - -Motivation : this is breaking vhost-user with DPDK as reported in [0]. - -Received unexpected msg type. Expected 22 received 40 -Fail to update device iotlb -Received unexpected msg type. Expected 40 received 22 -Received unexpected msg type. Expected 22 received 11 -Fail to update device iotlb -Received unexpected msg type. Expected 11 received 22 -vhost VQ 1 ring restore failed: -71: Protocol error (71) -Received unexpected msg type. Expected 22 received 11 -Fail to update device iotlb -Received unexpected msg type. Expected 11 received 22 -vhost VQ 0 ring restore failed: -71: Protocol error (71) -unable to start vhost net: 71: falling back on userspace virtio - -The failing sequence that leads to the first error is : -- QEMU sends a VHOST_USER_GET_STATUS (40) request to DPDK on the master - socket -- QEMU starts a nested event loop in order to wait for the - VHOST_USER_GET_STATUS response and to be able to process messages from - the slave channel -- DPDK sends a couple of legitimate IOTLB miss messages on the slave - channel -- QEMU processes each IOTLB request and sends VHOST_USER_IOTLB_MSG (22) - updates on the master socket -- QEMU assumes to receive a response for the latest VHOST_USER_IOTLB_MSG - but it gets the response for the VHOST_USER_GET_STATUS instead - -The subsequent errors have the same root cause : the nested event loop -breaks the order by design. It lures QEMU to expect responses to the -latest message sent on the master socket to arrive first. - -Since this was only needed for DAX enablement which is still not merged -upstream, just drop the code for now. A working solution will have to -be merged later on. Likely protect the master socket with a mutex -and service the slave channel with a separate thread, as discussed with -Maxime in the mail thread below. - -[0] https://lore.kernel.org/qemu-devel/43145ede-89dc-280e-b953-6a2b436de395@redhat.com/ - -Reported-by: Yanghang Liu -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2155173 -Signed-off-by: Greg Kurz -Message-Id: <20230119172424.478268-2-groug@kaod.org> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Stefan Hajnoczi -Acked-by: Maxime Coquelin -(cherry picked from commit f340a59d5a852d75ae34555723694c7e8eafbd0c) -Signed-off-by: Laurent Vivier ---- - hw/virtio/vhost-user.c | 35 +++-------------------------------- - 1 file changed, 3 insertions(+), 32 deletions(-) - -diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c -index 8f635844af..0ac00eb901 100644 ---- a/hw/virtio/vhost-user.c -+++ b/hw/virtio/vhost-user.c -@@ -356,35 +356,6 @@ end: - return G_SOURCE_REMOVE; - } - --static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, -- gpointer opaque); -- --/* -- * This updates the read handler to use a new event loop context. -- * Event sources are removed from the previous context : this ensures -- * that events detected in the previous context are purged. They will -- * be re-detected and processed in the new context. -- */ --static void slave_update_read_handler(struct vhost_dev *dev, -- GMainContext *ctxt) --{ -- struct vhost_user *u = dev->opaque; -- -- if (!u->slave_ioc) { -- return; -- } -- -- if (u->slave_src) { -- g_source_destroy(u->slave_src); -- g_source_unref(u->slave_src); -- } -- -- u->slave_src = qio_channel_add_watch_source(u->slave_ioc, -- G_IO_IN | G_IO_HUP, -- slave_read, dev, NULL, -- ctxt); --} -- - static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - { - struct vhost_user *u = dev->opaque; -@@ -406,7 +377,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - * be prepared for re-entrancy. So we create a new one and switch chr - * to use it. - */ -- slave_update_read_handler(dev, ctxt); - qemu_chr_be_update_read_handlers(chr->chr, ctxt); - qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data); - -@@ -418,7 +388,6 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) - * context that have been processed by the nested loop are purged. - */ - qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt); -- slave_update_read_handler(dev, NULL); - - g_main_loop_unref(loop); - g_main_context_unref(ctxt); -@@ -1802,7 +1771,9 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) - return -ECONNREFUSED; - } - u->slave_ioc = ioc; -- slave_update_read_handler(dev, NULL); -+ u->slave_src = qio_channel_add_watch_source(u->slave_ioc, -+ G_IO_IN | G_IO_HUP, -+ slave_read, dev, NULL, NULL); - - if (reply_supported) { - msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; --- -2.31.1 - diff --git a/kvm-accel-introduce-accelerator-blocker-API.patch b/kvm-accel-introduce-accelerator-blocker-API.patch deleted file mode 100644 index 29a8ac5..0000000 --- a/kvm-accel-introduce-accelerator-blocker-API.patch +++ /dev/null @@ -1,348 +0,0 @@ -From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 16 Jan 2023 07:16:41 -0500 -Subject: [PATCH 29/31] accel: introduce accelerator blocker API -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 138: accel: introduce accelerator blocker API -RH-Bugzilla: 1979276 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 - -commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1 -Author: Emanuele Giuseppe Esposito -Date: Fri Nov 11 10:47:56 2022 -0500 - - accel: introduce accelerator blocker API - - This API allows the accelerators to prevent vcpus from issuing - new ioctls while execting a critical section marked with the - accel_ioctl_inhibit_begin/end functions. - - Note that all functions submitting ioctls must mark where the - ioctl is being called with accel_{cpu_}ioctl_begin/end(). - - This API requires the caller to always hold the BQL. - API documentation is in sysemu/accel-blocker.h - - Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt - (to minimize cache line bouncing) to keep avoid that new ioctls - run when the critical section starts, and a QemuEvent to wait - that all running ioctls finish. - - Signed-off-by: Emanuele Giuseppe Esposito - Reviewed-by: Philippe Mathieu-Daudé - Message-Id: <20221111154758.1372674-2-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Conflicts: - util/meson.build: "interval-tree.c" does not exist - -Signed-off-by: Emanuele Giuseppe Esposito ---- - accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++ - accel/meson.build | 2 +- - hw/core/cpu-common.c | 2 + - include/hw/core/cpu.h | 3 + - include/sysemu/accel-blocker.h | 56 ++++++++++++ - util/meson.build | 2 +- - 6 files changed, 217 insertions(+), 2 deletions(-) - create mode 100644 accel/accel-blocker.c - create mode 100644 include/sysemu/accel-blocker.h - -diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c -new file mode 100644 -index 0000000000..1e7f423462 ---- /dev/null -+++ b/accel/accel-blocker.c -@@ -0,0 +1,154 @@ -+/* -+ * Lock to inhibit accelerator ioctls -+ * -+ * Copyright (c) 2022 Red Hat Inc. -+ * -+ * Author: Emanuele Giuseppe Esposito -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a copy -+ * of this software and associated documentation files (the "Software"), to deal -+ * in the Software without restriction, including without limitation the rights -+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -+ * copies of the Software, and to permit persons to whom the Software is -+ * furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -+ * THE SOFTWARE. -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/thread.h" -+#include "qemu/main-loop.h" -+#include "hw/core/cpu.h" -+#include "sysemu/accel-blocker.h" -+ -+static QemuLockCnt accel_in_ioctl_lock; -+static QemuEvent accel_in_ioctl_event; -+ -+void accel_blocker_init(void) -+{ -+ qemu_lockcnt_init(&accel_in_ioctl_lock); -+ qemu_event_init(&accel_in_ioctl_event, false); -+} -+ -+void accel_ioctl_begin(void) -+{ -+ if (likely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ -+ qemu_lockcnt_inc(&accel_in_ioctl_lock); -+} -+ -+void accel_ioctl_end(void) -+{ -+ if (likely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ qemu_lockcnt_dec(&accel_in_ioctl_lock); -+ /* change event to SET. If event was BUSY, wake up all waiters */ -+ qemu_event_set(&accel_in_ioctl_event); -+} -+ -+void accel_cpu_ioctl_begin(CPUState *cpu) -+{ -+ if (unlikely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ -+ qemu_lockcnt_inc(&cpu->in_ioctl_lock); -+} -+ -+void accel_cpu_ioctl_end(CPUState *cpu) -+{ -+ if (unlikely(qemu_mutex_iothread_locked())) { -+ return; -+ } -+ -+ qemu_lockcnt_dec(&cpu->in_ioctl_lock); -+ /* change event to SET. If event was BUSY, wake up all waiters */ -+ qemu_event_set(&accel_in_ioctl_event); -+} -+ -+static bool accel_has_to_wait(void) -+{ -+ CPUState *cpu; -+ bool needs_to_wait = false; -+ -+ CPU_FOREACH(cpu) { -+ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) { -+ /* exit the ioctl, if vcpu is running it */ -+ qemu_cpu_kick(cpu); -+ needs_to_wait = true; -+ } -+ } -+ -+ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock); -+} -+ -+void accel_ioctl_inhibit_begin(void) -+{ -+ CPUState *cpu; -+ -+ /* -+ * We allow to inhibit only when holding the BQL, so we can identify -+ * when an inhibitor wants to issue an ioctl easily. -+ */ -+ g_assert(qemu_mutex_iothread_locked()); -+ -+ /* Block further invocations of the ioctls outside the BQL. */ -+ CPU_FOREACH(cpu) { -+ qemu_lockcnt_lock(&cpu->in_ioctl_lock); -+ } -+ qemu_lockcnt_lock(&accel_in_ioctl_lock); -+ -+ /* Keep waiting until there are running ioctls */ -+ while (true) { -+ -+ /* Reset event to FREE. */ -+ qemu_event_reset(&accel_in_ioctl_event); -+ -+ if (accel_has_to_wait()) { -+ /* -+ * If event is still FREE, and there are ioctls still in progress, -+ * wait. -+ * -+ * If an ioctl finishes before qemu_event_wait(), it will change -+ * the event state to SET. This will prevent qemu_event_wait() from -+ * blocking, but it's not a problem because if other ioctls are -+ * still running the loop will iterate once more and reset the event -+ * status to FREE so that it can wait properly. -+ * -+ * If an ioctls finishes while qemu_event_wait() is blocking, then -+ * it will be waken up, but also here the while loop makes sure -+ * to re-enter the wait if there are other running ioctls. -+ */ -+ qemu_event_wait(&accel_in_ioctl_event); -+ } else { -+ /* No ioctl is running */ -+ return; -+ } -+ } -+} -+ -+void accel_ioctl_inhibit_end(void) -+{ -+ CPUState *cpu; -+ -+ qemu_lockcnt_unlock(&accel_in_ioctl_lock); -+ CPU_FOREACH(cpu) { -+ qemu_lockcnt_unlock(&cpu->in_ioctl_lock); -+ } -+} -+ -diff --git a/accel/meson.build b/accel/meson.build -index 259c35c4c8..061332610f 100644 ---- a/accel/meson.build -+++ b/accel/meson.build -@@ -1,4 +1,4 @@ --specific_ss.add(files('accel-common.c')) -+specific_ss.add(files('accel-common.c', 'accel-blocker.c')) - softmmu_ss.add(files('accel-softmmu.c')) - user_ss.add(files('accel-user.c')) - -diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c -index f9fdd46b9d..8d6a4b1b65 100644 ---- a/hw/core/cpu-common.c -+++ b/hw/core/cpu-common.c -@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj) - cpu->nr_threads = 1; - - qemu_mutex_init(&cpu->work_mutex); -+ qemu_lockcnt_init(&cpu->in_ioctl_lock); - QSIMPLEQ_INIT(&cpu->work_list); - QTAILQ_INIT(&cpu->breakpoints); - QTAILQ_INIT(&cpu->watchpoints); -@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj) - { - CPUState *cpu = CPU(obj); - -+ qemu_lockcnt_destroy(&cpu->in_ioctl_lock); - qemu_mutex_destroy(&cpu->work_mutex); - } - -diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h -index 8830546121..2417597236 100644 ---- a/include/hw/core/cpu.h -+++ b/include/hw/core/cpu.h -@@ -398,6 +398,9 @@ struct CPUState { - uint32_t kvm_fetch_index; - uint64_t dirty_pages; - -+ /* Use by accel-block: CPU is executing an ioctl() */ -+ QemuLockCnt in_ioctl_lock; -+ - /* Used for events with 'vcpu' and *without* the 'disabled' properties */ - DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); - DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); -diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h -new file mode 100644 -index 0000000000..72020529ef ---- /dev/null -+++ b/include/sysemu/accel-blocker.h -@@ -0,0 +1,56 @@ -+/* -+ * Accelerator blocking API, to prevent new ioctls from starting and wait the -+ * running ones finish. -+ * This mechanism differs from pause/resume_all_vcpus() in that it does not -+ * release the BQL. -+ * -+ * Copyright (c) 2022 Red Hat Inc. -+ * -+ * Author: Emanuele Giuseppe Esposito -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+#ifndef ACCEL_BLOCKER_H -+#define ACCEL_BLOCKER_H -+ -+#include "qemu/osdep.h" -+#include "sysemu/cpus.h" -+ -+extern void accel_blocker_init(void); -+ -+/* -+ * accel_{cpu_}ioctl_begin/end: -+ * Mark when ioctl is about to run or just finished. -+ * -+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is -+ * called, preventing new ioctls to run. They will continue only after -+ * accel_ioctl_inibith_end(). -+ */ -+extern void accel_ioctl_begin(void); -+extern void accel_ioctl_end(void); -+extern void accel_cpu_ioctl_begin(CPUState *cpu); -+extern void accel_cpu_ioctl_end(CPUState *cpu); -+ -+/* -+ * accel_ioctl_inhibit_begin: start critical section -+ * -+ * This function makes sure that: -+ * 1) incoming accel_{cpu_}ioctl_begin() calls block -+ * 2) wait that all ioctls that were already running reach -+ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary. -+ * -+ * This allows the caller to access shared data or perform operations without -+ * worrying of concurrent vcpus accesses. -+ */ -+extern void accel_ioctl_inhibit_begin(void); -+ -+/* -+ * accel_ioctl_inhibit_end: end critical section started by -+ * accel_ioctl_inhibit_begin() -+ * -+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue. -+ */ -+extern void accel_ioctl_inhibit_end(void); -+ -+#endif /* ACCEL_BLOCKER_H */ -diff --git a/util/meson.build b/util/meson.build -index 25b9b61f98..85a5504c4d 100644 ---- a/util/meson.build -+++ b/util/meson.build -@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c')) - util_ss.add(files('yank.c')) - util_ss.add(files('int128.c')) - util_ss.add(files('memalign.c')) -+util_ss.add(files('lockcnt.c')) - - if have_user - util_ss.add(files('selfmap.c')) -@@ -71,7 +72,6 @@ endif - if have_block or have_ga - util_ss.add(files('aiocb.c', 'async.c')) - util_ss.add(files('base64.c')) -- util_ss.add(files('lockcnt.c')) - util_ss.add(files('main-loop.c')) - util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c')) - util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND']))) --- -2.31.1 - diff --git a/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch b/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch deleted file mode 100644 index 0680a26..0000000 --- a/kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch +++ /dev/null @@ -1,58 +0,0 @@ -From ab68e13b7628f2348d41a4518a92508542af712f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Fri, 3 Feb 2023 18:15:10 +0100 -Subject: [PATCH 05/20] accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page - -RH-Author: Eric Auger -RH-MergeRequest: 144: accel/tcg: Test CPUJumpCache in tb_jmp_cache_clear_page -RH-Bugzilla: 2165280 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Gavin Shan -RH-Acked-by: Shaoqin Huang -RH-Commit: [1/1] 5b0863c34ba06c01c4e343d1ecd72402779c7de3 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/2165280 -Upstream: yes -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=50530041 -Test: 'kvm unit test ./run_tests.sh -g debug' does not SIGSEV anymore - -After commit 4e4fa6c12d ("accel/tcg: Complete cpu initialization -before registration"), it looks the CPUJumpCache pointer can be NULL. -This causes a SIGSEV when running debug-wp-migration kvm unit test. - -At the first place it should be clarified why this TCG code is called -with KVM acceleration. This may hide another bug. - -Fixes: 4e4fa6c12d ("accel/tcg: Complete cpu initialization before registration") -Signed-off-by: Eric Auger -Message-Id: <20230203171510.2867451-1-eric.auger@redhat.com> -Signed-off-by: Richard Henderson -(cherry picked from commit 99ab4d500af638ba3ebb20e8aa89d72201b70860) -Signed-off-by: Eric Auger ---- - accel/tcg/cputlb.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c -index 6f1c00682b..4244b0e4e3 100644 ---- a/accel/tcg/cputlb.c -+++ b/accel/tcg/cputlb.c -@@ -100,9 +100,14 @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns, - - static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) - { -- int i, i0 = tb_jmp_cache_hash_page(page_addr); - CPUJumpCache *jc = cpu->tb_jmp_cache; -+ int i, i0; - -+ if (unlikely(!jc)) { -+ return; -+ } -+ -+ i0 = tb_jmp_cache_hash_page(page_addr); - for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { - qatomic_set(&jc->array[i0 + i].tb, NULL); - } --- -2.31.1 - diff --git a/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch b/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch deleted file mode 100644 index ee7e7f9..0000000 --- a/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch +++ /dev/null @@ -1,50 +0,0 @@ -From e9a9c0b023ae0dcbb14543b74063cca931d8230f Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 08/12] aio-wait: switch to smp_mb__after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [5/9] a90c96d148fdbec340a45dc6cedf3660d8be2aab (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit b532526a07ef3b903ead2e055fe6cc87b41057a3 -Author: Paolo Bonzini -Date: Fri Mar 3 11:03:52 2023 +0100 - - aio-wait: switch to smp_mb__after_rmw() - - The barrier comes after an atomic increment, so it is enough to use - smp_mb__after_rmw(); this avoids a double barrier on x86 systems. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - include/block/aio-wait.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h -index dd9a7f6461..da13357bb8 100644 ---- a/include/block/aio-wait.h -+++ b/include/block/aio-wait.h -@@ -85,7 +85,7 @@ extern AioWait global_aio_wait; - /* Increment wait_->num_waiters before evaluating cond. */ \ - qatomic_inc(&wait_->num_waiters); \ - /* Paired with smp_mb in aio_wait_kick(). */ \ -- smp_mb(); \ -+ smp_mb__after_rmw(); \ - if (ctx_ && in_aio_context_home_thread(ctx_)) { \ - while ((cond)) { \ - aio_poll(ctx_, true); \ --- -2.39.1 - diff --git a/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch b/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch deleted file mode 100644 index 0e4a48d..0000000 --- a/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch +++ /dev/null @@ -1,66 +0,0 @@ -From 3d823dda6832b76fd3d776131008107b0b0f7166 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 12/12] async: clarify usage of barriers in the polling case - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [9/9] b4ea298d75a75bb61e07a27d1296e0095fbc2bbf (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 6229438cca037d42f44a96d38feb15cb102a444f -Author: Paolo Bonzini -Date: Mon Mar 6 10:43:52 2023 +0100 - - async: clarify usage of barriers in the polling case - - Explain that aio_context_notifier_poll() relies on - aio_notify_accept() to catch all the memory writes that were - done before ctx->notified was set to true. - - Reviewed-by: Richard Henderson - Reviewed-by: Stefan Hajnoczi - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/async.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/util/async.c b/util/async.c -index 37d3e6036d..e0846baf93 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -472,8 +472,9 @@ void aio_notify_accept(AioContext *ctx) - qatomic_set(&ctx->notified, false); - - /* -- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb -- * in aio_notify. -+ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the -+ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs -+ * with smp_wmb() in aio_notify. - */ - smp_mb(); - } -@@ -496,6 +497,11 @@ static bool aio_context_notifier_poll(void *opaque) - EventNotifier *e = opaque; - AioContext *ctx = container_of(e, AioContext, notifier); - -+ /* -+ * No need for load-acquire because we just want to kick the -+ * event loop. aio_notify_accept() takes care of synchronizing -+ * the event loop with the producers. -+ */ - return qatomic_read(&ctx->notified); - } - --- -2.39.1 - diff --git a/kvm-async-update-documentation-of-the-memory-barriers.patch b/kvm-async-update-documentation-of-the-memory-barriers.patch deleted file mode 100644 index cb92dc9..0000000 --- a/kvm-async-update-documentation-of-the-memory-barriers.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 29bcf843d796ffc2a0906dea947e4cdfe9f7ec60 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 11/12] async: update documentation of the memory barriers - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [8/9] 5ca20e4c8983e0bc1ecee66bead3472777abe4d1 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 8dd48650b43dfde4ebea34191ac267e474bcc29e -Author: Paolo Bonzini -Date: Mon Mar 6 10:15:06 2023 +0100 - - async: update documentation of the memory barriers - - Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)", - 2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll() - is happening when the bottom half is enqueued in the bh_list; not - when the flags are set. Update the documentation to match. - - Reviewed-by: Stefan Hajnoczi - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/async.c | 33 +++++++++++++++++++-------------- - 1 file changed, 19 insertions(+), 14 deletions(-) - -diff --git a/util/async.c b/util/async.c -index 63434ddae4..37d3e6036d 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -73,14 +73,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) - unsigned old_flags; - - /* -- * The memory barrier implicit in qatomic_fetch_or makes sure that: -- * 1. idle & any writes needed by the callback are done before the -- * locations are read in the aio_bh_poll. -- * 2. ctx is loaded before the callback has a chance to execute and bh -- * could be freed. -+ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that -+ * insertion starts after BH_PENDING is set. - */ - old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags); -+ - if (!(old_flags & BH_PENDING)) { -+ /* -+ * At this point the bottom half becomes visible to aio_bh_poll(). -+ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in -+ * aio_bh_poll(), ensuring that: -+ * 1. any writes needed by the callback are visible from the callback -+ * after aio_bh_dequeue() returns bh. -+ * 2. ctx is loaded before the callback has a chance to execute and bh -+ * could be freed. -+ */ - QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); - } - -@@ -106,11 +113,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) - QSLIST_REMOVE_HEAD(head, next); - - /* -- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory -- * barrier ensures that the callback sees all writes done by the scheduling -- * thread. It also ensures that the scheduling thread sees the cleared -- * flag before bh->cb has run, and thus will call aio_notify again if -- * necessary. -+ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that -+ * the removal finishes before BH_PENDING is reset. - */ - *flags = qatomic_fetch_and(&bh->flags, - ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); -@@ -157,6 +161,7 @@ int aio_bh_poll(AioContext *ctx) - BHListSlice *s; - int ret = 0; - -+ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ - QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); - QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); - -@@ -446,15 +451,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx) - void aio_notify(AioContext *ctx) - { - /* -- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in -- * aio_notify_accept. -+ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with -+ * smp_mb() in aio_notify_accept(). - */ - smp_wmb(); - qatomic_set(&ctx->notified, true); - - /* -- * Write ctx->notified before reading ctx->notify_me. Pairs -- * with smp_mb in aio_ctx_prepare or aio_poll. -+ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me. -+ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll. - */ - smp_mb(); - if (qatomic_read(&ctx->notify_me)) { --- -2.39.1 - diff --git a/kvm-block-Call-drain-callbacks-only-once.patch b/kvm-block-Call-drain-callbacks-only-once.patch deleted file mode 100644 index 04f1dda..0000000 --- a/kvm-block-Call-drain-callbacks-only-once.patch +++ /dev/null @@ -1,250 +0,0 @@ -From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:06 +0100 -Subject: [PATCH 24/31] block: Call drain callbacks only once - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s) - -We only need to call both the BlockDriver's callback and the parent -callbacks when going from undrained to drained or vice versa. A second -drain section doesn't make a difference for the driver or the parent, -they weren't supposed to send new requests before and after the second -drain. - -One thing that gets in the way is the 'ignore_bds_parents' parameter in -bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that -bdrv_drain_all_begin() increases bs->quiesce_counter, but does not -quiesce the parent through BdrvChildClass callbacks. If an additional -drain section is started now, bs->quiesce_counter will be non-zero, but -we would still need to quiesce the parent through BdrvChildClass in -order to keep things consistent (and unquiesce it on the matching -bdrv_drained_end(), even though the counter would not reach 0 yet as -long as the bdrv_drain_all() section is still active). - -Instead of keeping track of this, let's just get rid of the parameter. -It was introduced in commit 6cd5c9d7b2d as an optimisation so that -during bdrv_drain_all(), we wouldn't recursively drain all parents up to -the root for each node, resulting in quadratic complexity. As it happens, -calling the callbacks only once solves the same problem, so as of this -patch, we'll still have O(n) complexity and ignore_bds_parents is not -needed any more. - -This patch only ignores the 'ignore_bds_parents' parameter. It will be -removed in a separate patch. - -Signed-off-by: Kevin Wolf -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-12-kwolf@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1) -Signed-off-by: Stefano Garzarella ---- - block.c | 25 +++++++------------------ - block/io.c | 30 ++++++++++++++++++------------ - include/block/block_int-common.h | 8 ++++---- - tests/unit/test-bdrv-drain.c | 16 ++++++++++------ - 4 files changed, 39 insertions(+), 40 deletions(-) - -diff --git a/block.c b/block.c -index e0e3b21790..5a583e260d 100644 ---- a/block.c -+++ b/block.c -@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - { - BlockDriverState *old_bs = child->bs; - int new_bs_quiesce_counter; -- int drain_saldo; - - assert(!child->frozen); - assert(old_bs != new_bs); -@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); - } - -- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; -- - /* - * If the new child node is drained but the old one was not, flush - * all outstanding requests to the old child node. - */ -- while (drain_saldo > 0 && child->klass->drained_begin) { -+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -+ if (new_bs_quiesce_counter && !child->quiesced_parent) { - bdrv_parent_drained_begin_single(child, true); -- drain_saldo--; - } - - if (old_bs) { -@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - if (new_bs) { - assert_bdrv_graph_writable(new_bs); - QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); -- -- /* -- * Polling in bdrv_parent_drained_begin_single() may have led to the new -- * node's quiesce_counter having been decreased. Not a problem, we just -- * need to recognize this here and then invoke drained_end appropriately -- * more often. -- */ -- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); -- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; -- - if (child->klass->attach) { - child->klass->attach(child); - } -@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - /* - * If the old child node was drained but the new one is not, allow - * requests to come in only after the new node has been attached. -+ * -+ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() -+ * polls, which could have changed the value. - */ -- while (drain_saldo < 0 && child->klass->drained_end) { -+ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -+ if (!new_bs_quiesce_counter && child->quiesced_parent) { - bdrv_parent_drained_end_single(child); -- drain_saldo++; - } - } - -diff --git a/block/io.c b/block/io.c -index 75224480d0..87d6f22ec4 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c) - { - IO_OR_GS_CODE(); - -- assert(c->parent_quiesce_counter > 0); -- c->parent_quiesce_counter--; -+ assert(c->quiesced_parent); -+ c->quiesced_parent = false; -+ - if (c->klass->drained_end) { - c->klass->drained_end(c); - } -@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) - { - AioContext *ctx = bdrv_child_get_parent_aio_context(c); - IO_OR_GS_CODE(); -- c->parent_quiesce_counter++; -+ -+ assert(!c->quiesced_parent); -+ c->quiesced_parent = true; -+ - if (c->klass->drained_begin) { - c->klass->drained_begin(c); - } -@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { - aio_disable_external(bdrv_get_aio_context(bs)); -- } - -- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); -- if (bs->drv && bs->drv->bdrv_drain_begin) { -- bs->drv->bdrv_drain_begin(bs); -+ /* TODO Remove ignore_bds_parents, we don't consider it any more */ -+ bdrv_parent_drained_begin(bs, parent, false); -+ if (bs->drv && bs->drv->bdrv_drain_begin) { -+ bs->drv->bdrv_drain_begin(bs); -+ } - } - } - -@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - assert(bs->quiesce_counter > 0); - - /* Re-enable things in child-to-parent order */ -- if (bs->drv && bs->drv->bdrv_drain_end) { -- bs->drv->bdrv_drain_end(bs); -- } -- bdrv_parent_drained_end(bs, parent, ignore_bds_parents); -- - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); - if (old_quiesce_counter == 1) { -+ if (bs->drv && bs->drv->bdrv_drain_end) { -+ bs->drv->bdrv_drain_end(bs); -+ } -+ /* TODO Remove ignore_bds_parents, we don't consider it any more */ -+ bdrv_parent_drained_end(bs, parent, false); -+ - aio_enable_external(bdrv_get_aio_context(bs)); - } - } -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 791dddfd7d..a6bc6b7fe9 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -980,13 +980,13 @@ struct BdrvChild { - bool frozen; - - /* -- * How many times the parent of this child has been drained -+ * True if the parent of this child has been drained by this BdrvChild - * (through klass->drained_*). -- * Usually, this is equal to bs->quiesce_counter (potentially -- * reduced by bdrv_drain_all_count). It may differ while the -+ * -+ * It is generally true if bs->quiesce_counter > 0. It may differ while the - * child is entering or leaving a drained section. - */ -- int parent_quiesce_counter; -+ bool quiesced_parent; - - QLIST_ENTRY(BdrvChild) next; - QLIST_ENTRY(BdrvChild) next_parent; -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index dda08de8db..172bc6debc 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) - - do_drain_begin(drain_type, bs); - -- g_assert_cmpint(bs->quiesce_counter, ==, 1); -+ if (drain_type == BDRV_DRAIN_ALL) { -+ g_assert_cmpint(bs->quiesce_counter, ==, 2); -+ } else { -+ g_assert_cmpint(bs->quiesce_counter, ==, 1); -+ } - g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); - - do_drain_end(drain_type, bs); -@@ -348,8 +352,8 @@ static void test_nested(void) - - for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { - for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { -- int backing_quiesce = (outer != BDRV_DRAIN) + -- (inner != BDRV_DRAIN); -+ int backing_quiesce = (outer == BDRV_DRAIN_ALL) + -+ (inner == BDRV_DRAIN_ALL); - - g_assert_cmpint(bs->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); -@@ -359,10 +363,10 @@ static void test_nested(void) - do_drain_begin(outer, bs); - do_drain_begin(inner, bs); - -- g_assert_cmpint(bs->quiesce_counter, ==, 2); -+ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce); - g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); -- g_assert_cmpint(s->drain_count, ==, 2); -- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); -+ g_assert_cmpint(s->drain_count, ==, 1); -+ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce); - - do_drain_end(inner, bs); - do_drain_end(outer, bs); --- -2.31.1 - diff --git a/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch b/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch deleted file mode 100644 index 80018cc..0000000 --- a/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch +++ /dev/null @@ -1,298 +0,0 @@ -From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:09 +0100 -Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s) - -In order to make sure that bdrv_replace_child_noperm() doesn't have to -poll any more, get rid of the bdrv_parent_drained_begin_single() call. - -This is possible now because we can require that the parent is already -drained through the child in question when the function is called and we -don't call the parent drain callbacks more than once. - -The additional drain calls needed in callers cause the test case to run -its code in the drain handler too early (bdrv_attach_child() drains -now), so modify it to only enable the code after the test setup has -completed. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-15-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4) -Signed-off-by: Stefano Garzarella ---- - block.c | 103 ++++++++++++++++++++++++++++++----- - block/io.c | 2 +- - include/block/block-io.h | 8 +++ - tests/unit/test-bdrv-drain.c | 10 ++++ - 4 files changed, 108 insertions(+), 15 deletions(-) - -diff --git a/block.c b/block.c -index af31a94863..65588d313a 100644 ---- a/block.c -+++ b/block.c -@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque) - - GLOBAL_STATE_CODE(); - /* old_bs reference is transparently moved from @s to @s->child */ -+ if (!s->child->bs) { -+ /* -+ * The parents were undrained when removing old_bs from the child. New -+ * requests can't have been made, though, because the child was empty. -+ * -+ * TODO Make bdrv_replace_child_noperm() transactionable to avoid -+ * undraining the parent in the first place. Once this is done, having -+ * new_bs drained when calling bdrv_replace_child_tran() is not a -+ * requirement any more. -+ */ -+ bdrv_parent_drained_begin_single(s->child, false); -+ assert(!bdrv_parent_drained_poll_single(s->child)); -+ } -+ assert(s->child->quiesced_parent); - bdrv_replace_child_noperm(s->child, s->old_bs); - bdrv_unref(new_bs); - } -@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = { - * - * Note: real unref of old_bs is done only on commit. - * -+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be -+ * kept drained until the transaction is completed. -+ * - * The function doesn't update permissions, caller is responsible for this. - */ - static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, - Transaction *tran) - { - BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); -+ -+ assert(child->quiesced_parent); -+ assert(!new_bs || new_bs->quiesce_counter); -+ - *s = (BdrvReplaceChildState) { - .child = child, - .old_bs = child->bs, -@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) - return permissions[qapi_perm]; - } - -+/* -+ * Replaces the node that a BdrvChild points to without updating permissions. -+ * -+ * If @new_bs is non-NULL, the parent of @child must already be drained through -+ * @child. -+ * -+ * This function does not poll. -+ */ - static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs) - { -@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - int new_bs_quiesce_counter; - - assert(!child->frozen); -+ -+ /* -+ * If we want to change the BdrvChild to point to a drained node as its new -+ * child->bs, we need to make sure that its new parent is drained, too. In -+ * other words, either child->quiesce_parent must already be true or we must -+ * be able to set it and keep the parent's quiesce_counter consistent with -+ * that, but without polling or starting new requests (this function -+ * guarantees that it doesn't poll, and starting new requests would be -+ * against the invariants of drain sections). -+ * -+ * To keep things simple, we pick the first option (child->quiesce_parent -+ * must already be true). We also generalise the rule a bit to make it -+ * easier to verify in callers and more likely to be covered in test cases: -+ * The parent must be quiesced through this child even if new_bs isn't -+ * currently drained. -+ * -+ * The only exception is for callers that always pass new_bs == NULL. In -+ * this case, we obviously never need to consider the case of a drained -+ * new_bs, so we can keep the callers simpler by allowing them not to drain -+ * the parent. -+ */ -+ assert(!new_bs || child->quiesced_parent); - assert(old_bs != new_bs); - GLOBAL_STATE_CODE(); - -@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); - } - -- /* -- * If the new child node is drained but the old one was not, flush -- * all outstanding requests to the old child node. -- */ -- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); -- if (new_bs_quiesce_counter && !child->quiesced_parent) { -- bdrv_parent_drained_begin_single(child, true); -- } -- - if (old_bs) { - if (child->klass->detach) { - child->klass->detach(child); -@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - } - - /* -- * If the old child node was drained but the new one is not, allow -- * requests to come in only after the new node has been attached. -- * -- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() -- * polls, which could have changed the value. -+ * If the parent was drained through this BdrvChild previously, but new_bs -+ * is not drained, allow requests to come in only after the new node has -+ * been attached. - */ - new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); - if (!new_bs_quiesce_counter && child->quiesced_parent) { -@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, - } - - bdrv_ref(child_bs); -+ /* -+ * Let every new BdrvChild start with a drained parent. Inserting the child -+ * in the graph with bdrv_replace_child_noperm() will undrain it if -+ * @child_bs is not drained. -+ * -+ * The child was only just created and is not yet visible in global state -+ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody -+ * could have sent requests and polling is not necessary. -+ * -+ * Note that this means that the parent isn't fully drained yet, we only -+ * stop new requests from coming in. This is fine, we don't care about the -+ * old requests here, they are not for this child. If another place enters a -+ * drain section for the same parent, but wants it to be fully quiesced, it -+ * will not run most of the the code in .drained_begin() again (which is not -+ * a problem, we already did this), but it will still poll until the parent -+ * is fully quiesced, so it will not be negatively affected either. -+ */ -+ bdrv_parent_drained_begin_single(new_child, false); - bdrv_replace_child_noperm(new_child, child_bs); - - BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); -@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) - } - - if (child->bs) { -+ BlockDriverState *bs = child->bs; -+ bdrv_drained_begin(bs); - bdrv_replace_child_tran(child, NULL, tran); -+ bdrv_drained_end(bs); - } - - tran_add(tran, &bdrv_remove_child_drv, child); - } - -+static void undrain_on_clean_cb(void *opaque) -+{ -+ bdrv_drained_end(opaque); -+} -+ -+static TransactionActionDrv undrain_on_clean = { -+ .clean = undrain_on_clean_cb, -+}; -+ - static int bdrv_replace_node_noperm(BlockDriverState *from, - BlockDriverState *to, - bool auto_skip, Transaction *tran, -@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, - - GLOBAL_STATE_CODE(); - -+ bdrv_drained_begin(from); -+ bdrv_drained_begin(to); -+ tran_add(tran, &undrain_on_clean, from); -+ tran_add(tran, &undrain_on_clean, to); -+ - QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { - assert(c->bs == from); - if (!should_update_child(c, to)) { -diff --git a/block/io.c b/block/io.c -index 5e9150d92c..ae64830eac 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) - } - } - --static bool bdrv_parent_drained_poll_single(BdrvChild *c) -+bool bdrv_parent_drained_poll_single(BdrvChild *c) - { - if (c->klass->drained_poll) { - return c->klass->drained_poll(c); -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 8f5e75756a..65e6d2569b 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); - */ - void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); - -+/** -+ * bdrv_parent_drained_poll_single: -+ * -+ * Returns true if there is any pending activity to cease before @c can be -+ * called quiesced, false otherwise. -+ */ -+bool bdrv_parent_drained_poll_single(BdrvChild *c); -+ - /** - * bdrv_parent_drained_end_single: - * -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 172bc6debc..2686a8acee 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void) - - - typedef struct BDRVReplaceTestState { -+ bool setup_completed; - bool was_drained; - bool was_undrained; - bool has_read; -@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -+ if (!s->setup_completed) { -+ return; -+ } -+ - if (!s->drain_count) { - s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); - bdrv_inc_in_flight(bs); -@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -+ if (!s->setup_completed) { -+ return; -+ } -+ - g_assert(s->drain_count > 0); - if (!--s->drain_count) { - s->was_undrained = true; -@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count, - bdrv_ref(old_child_bs); - bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, - BDRV_CHILD_COW, &error_abort); -+ parent_s->setup_completed = true; - - for (i = 0; i < old_drain_count; i++) { - bdrv_drained_begin(old_child_bs); --- -2.31.1 - diff --git a/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch b/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch deleted file mode 100644 index e3bf1e2..0000000 --- a/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:03 +0100 -Subject: [PATCH 21/31] block: Don't use subtree drains in - bdrv_drop_intermediate() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s) - -Instead of using a subtree drain from the top node (which also drains -child nodes of base that we're not even interested in), use a normal -drain for base, which automatically drains all of the parents, too. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-9-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37) -Signed-off-by: Stefano Garzarella ---- - block.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index cb5e96b1cf..b3449a312e 100644 ---- a/block.c -+++ b/block.c -@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - GLOBAL_STATE_CODE(); - - bdrv_ref(top); -- bdrv_subtree_drained_begin(top); -+ bdrv_drained_begin(base); - - if (!top->drv || !base->drv) { - goto exit; -@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, - - ret = 0; - exit: -- bdrv_subtree_drained_end(top); -+ bdrv_drained_end(base); - bdrv_unref(top); - return ret; - } --- -2.31.1 - diff --git a/kvm-block-Drain-individual-nodes-during-reopen.patch b/kvm-block-Drain-individual-nodes-during-reopen.patch deleted file mode 100644 index 24661fb..0000000 --- a/kvm-block-Drain-individual-nodes-during-reopen.patch +++ /dev/null @@ -1,157 +0,0 @@ -From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:02 +0100 -Subject: [PATCH 20/31] block: Drain individual nodes during reopen - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s) - -bdrv_reopen() and friends use subtree drains as a lazy way of covering -all the nodes they touch. Turns out that this lazy way is a lot more -complicated than just draining the nodes individually, even not -accounting for the additional complexity in the drain mechanism itself. - -Simplify the code by switching to draining the individual nodes that are -already managed in the BlockReopenQueue anyway. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-8-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8) -Signed-off-by: Stefano Garzarella ---- - block.c | 16 +++++++++------- - block/replication.c | 6 ------ - blockdev.c | 13 ------------- - 3 files changed, 9 insertions(+), 26 deletions(-) - -diff --git a/block.c b/block.c -index 46df410b07..cb5e96b1cf 100644 ---- a/block.c -+++ b/block.c -@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, - * returns a pointer to bs_queue, which is either the newly allocated - * bs_queue, or the existing bs_queue being used. - * -- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). -+ * bs is drained here and undrained by bdrv_reopen_queue_free(). - * - * To be called with bs->aio_context locked. - */ -@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - int flags; - QemuOpts *opts; - -- /* Make sure that the caller remembered to use a drained section. This is -- * important to avoid graph changes between the recursive queuing here and -- * bdrv_reopen_multiple(). */ -- assert(bs->quiesce_counter > 0); - GLOBAL_STATE_CODE(); - -+ bdrv_drained_begin(bs); -+ - if (bs_queue == NULL) { - bs_queue = g_new0(BlockReopenQueue, 1); - QTAILQ_INIT(bs_queue); -@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) - if (bs_queue) { - BlockReopenQueueEntry *bs_entry, *next; - QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { -+ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); -+ -+ aio_context_acquire(ctx); -+ bdrv_drained_end(bs_entry->state.bs); -+ aio_context_release(ctx); -+ - qobject_unref(bs_entry->state.explicit_options); - qobject_unref(bs_entry->state.options); - g_free(bs_entry); -@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - - GLOBAL_STATE_CODE(); - -- bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); - - if (ctx != qemu_get_aio_context()) { -@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - if (ctx != qemu_get_aio_context()) { - aio_context_acquire(ctx); - } -- bdrv_subtree_drained_end(bs); - - return ret; - } -diff --git a/block/replication.c b/block/replication.c -index f1eed25e43..c62f48a874 100644 ---- a/block/replication.c -+++ b/block/replication.c -@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, - s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); - } - -- bdrv_subtree_drained_begin(hidden_disk->bs); -- bdrv_subtree_drained_begin(secondary_disk->bs); -- - if (s->orig_hidden_read_only) { - QDict *opts = qdict_new(); - qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); -@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, - aio_context_acquire(ctx); - } - } -- -- bdrv_subtree_drained_end(hidden_disk->bs); -- bdrv_subtree_drained_end(secondary_disk->bs); - } - - static void backup_job_cleanup(BlockDriverState *bs) -diff --git a/blockdev.c b/blockdev.c -index 3f1dec6242..8ffb3d9537 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -3547,8 +3547,6 @@ fail: - void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - { - BlockReopenQueue *queue = NULL; -- GSList *drained = NULL; -- GSList *p; - - /* Add each one of the BDS that we want to reopen to the queue */ - for (; reopen_list != NULL; reopen_list = reopen_list->next) { -@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - ctx = bdrv_get_aio_context(bs); - aio_context_acquire(ctx); - -- bdrv_subtree_drained_begin(bs); - queue = bdrv_reopen_queue(queue, bs, qdict, false); -- drained = g_slist_prepend(drained, bs); - - aio_context_release(ctx); - } -@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) - - fail: - bdrv_reopen_queue_free(queue); -- for (p = drained; p; p = p->next) { -- BlockDriverState *bs = p->data; -- AioContext *ctx = bdrv_get_aio_context(bs); -- -- aio_context_acquire(ctx); -- bdrv_subtree_drained_end(bs); -- aio_context_release(ctx); -- } -- g_slist_free(drained); - } - - void qmp_blockdev_del(const char *node_name, Error **errp) --- -2.31.1 - diff --git a/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch b/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch deleted file mode 100644 index 1ae73c7..0000000 --- a/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:08 +0100 -Subject: [PATCH 26/31] block: Drop out of coroutine in - bdrv_do_drained_begin_quiesce() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s) - -The next patch adds a parent drain to bdrv_attach_child_common(), which -shouldn't be, but is currently called from coroutines in some cases (e.g. -.bdrv_co_create implementations generally open new nodes). Therefore, -the assertion that we're not in a coroutine doesn't hold true any more. - -We could just remove the assertion because there is nothing in the -function that should be in conflict with running in a coroutine, but -just to be on the safe side, we can reverse the caller relationship -between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so -that the latter also just drops out of coroutine context and we can -still be certain in the future that any drain code doesn't run in -coroutines. - -As a nice side effect, the structure of bdrv_do_drained_begin() is now -symmetrical with bdrv_do_drained_end(). - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-14-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553) -Signed-off-by: Stefano Garzarella ---- - block/io.c | 25 ++++++++++++------------- - 1 file changed, 12 insertions(+), 13 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 2e9503df6a..5e9150d92c 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - } - } - --void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) -+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -+ bool poll) - { - IO_OR_GS_CODE(); -- assert(!qemu_in_coroutine()); -+ -+ if (qemu_in_coroutine()) { -+ bdrv_co_yield_to_drain(bs, true, parent, poll); -+ return; -+ } - - /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { -@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) - bs->drv->bdrv_drain_begin(bs); - } - } --} -- --static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -- bool poll) --{ -- if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, true, parent, poll); -- return; -- } -- -- bdrv_do_drained_begin_quiesce(bs, parent); - - /* - * Wait for drained requests to finish. -@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - } - } - -+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) -+{ -+ bdrv_do_drained_begin(bs, parent, false); -+} -+ - void bdrv_drained_begin(BlockDriverState *bs) - { - IO_OR_GS_CODE(); --- -2.31.1 - diff --git a/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch b/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch deleted file mode 100644 index b73b8fe..0000000 --- a/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch +++ /dev/null @@ -1,67 +0,0 @@ -From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:01 +0100 -Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s) - -Callers don't agree whether bdrv_reopen_queue_child() should be called -with the AioContext lock held or not. Standardise on holding the lock -(as done by QMP blockdev-reopen and the replication block driver) and -fix bdrv_reopen() to do the same. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-7-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814) -Signed-off-by: Stefano Garzarella ---- - block.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/block.c b/block.c -index 7999fd08c5..46df410b07 100644 ---- a/block.c -+++ b/block.c -@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, - * bs_queue, or the existing bs_queue being used. - * - * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). -+ * -+ * To be called with bs->aio_context locked. - */ - static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - BlockDriverState *bs, -@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - return bs_queue; - } - -+/* To be called with bs->aio_context locked */ - BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - BlockDriverState *bs, - QDict *options, bool keep_old_opts) -@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, - GLOBAL_STATE_CODE(); - - bdrv_subtree_drained_begin(bs); -+ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); -+ - if (ctx != qemu_get_aio_context()) { - aio_context_release(ctx); - } -- -- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); - ret = bdrv_reopen_multiple(queue, errp); - - if (ctx != qemu_get_aio_context()) { --- -2.31.1 - diff --git a/kvm-block-Improve-empty-format-specific-info-dump.patch b/kvm-block-Improve-empty-format-specific-info-dump.patch deleted file mode 100644 index 5b54210..0000000 --- a/kvm-block-Improve-empty-format-specific-info-dump.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 074c89b05dae971c7118cb769fd34e22135c8f4c Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:53 +0200 -Subject: [PATCH 06/20] block: Improve empty format-specific info dump - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/12] be551e83f426e620e673302198b51368bfd324ce (hreitz/qemu-kvm-c-9-s) - -When a block driver supports obtaining format-specific information, but -that object only contains optional fields, it is possible that none of -them are present, so that dump_qobject() (called by -bdrv_image_info_specific_dump()) will not print anything. - -The callers of bdrv_image_info_specific_dump() put a header above this -information ("Format specific information:\n"), which will look strange -when there is nothing below. Modify bdrv_image_info_specific_dump() to -print this header instead of its callers, and only if there is indeed -something to be printed. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-2-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 3716470b24f0f63090d59bcf28ad8fe6fb7835bd) -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 41 +++++++++++++++++++++++++++++++++++++---- - include/block/qapi.h | 3 ++- - qemu-io-cmds.c | 4 ++-- - 3 files changed, 41 insertions(+), 7 deletions(-) - -diff --git a/block/qapi.c b/block/qapi.c -index cf557e3aea..51202b470a 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -777,7 +777,35 @@ static void dump_qdict(int indentation, QDict *dict) - } - } - --void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) -+/* -+ * Return whether dumping the given QObject with dump_qobject() would -+ * yield an empty dump, i.e. not print anything. -+ */ -+static bool qobject_is_empty_dump(const QObject *obj) -+{ -+ switch (qobject_type(obj)) { -+ case QTYPE_QNUM: -+ case QTYPE_QSTRING: -+ case QTYPE_QBOOL: -+ return false; -+ -+ case QTYPE_QDICT: -+ return qdict_size(qobject_to(QDict, obj)) == 0; -+ -+ case QTYPE_QLIST: -+ return qlist_empty(qobject_to(QList, obj)); -+ -+ default: -+ abort(); -+ } -+} -+ -+/** -+ * Dumps the given ImageInfoSpecific object in a human-readable form, -+ * prepending an optional prefix if the dump is not empty. -+ */ -+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -+ const char *prefix) - { - QObject *obj, *data; - Visitor *v = qobject_output_visitor_new(&obj); -@@ -785,7 +813,12 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec) - visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort); - visit_complete(v, &obj); - data = qdict_get(qobject_to(QDict, obj), "data"); -- dump_qobject(1, data); -+ if (!qobject_is_empty_dump(data)) { -+ if (prefix) { -+ qemu_printf("%s", prefix); -+ } -+ dump_qobject(1, data); -+ } - qobject_unref(obj); - visit_free(v); - } -@@ -866,7 +899,7 @@ void bdrv_image_info_dump(ImageInfo *info) - } - - if (info->has_format_specific) { -- qemu_printf("Format specific information:\n"); -- bdrv_image_info_specific_dump(info->format_specific); -+ bdrv_image_info_specific_dump(info->format_specific, -+ "Format specific information:\n"); - } - } -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 22c7807c89..c09859ea78 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -40,6 +40,7 @@ void bdrv_query_image_info(BlockDriverState *bs, - Error **errp); - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); --void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec); -+void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -+ const char *prefix); - void bdrv_image_info_dump(ImageInfo *info); - #endif -diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c -index 952dc940f1..f4a374528e 100644 ---- a/qemu-io-cmds.c -+++ b/qemu-io-cmds.c -@@ -1825,8 +1825,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) - return -EIO; - } - if (spec_info) { -- printf("Format specific information:\n"); -- bdrv_image_info_specific_dump(spec_info); -+ bdrv_image_info_specific_dump(spec_info, -+ "Format specific information:\n"); - qapi_free_ImageInfoSpecific(spec_info); - } - --- -2.31.1 - diff --git a/kvm-block-Inline-bdrv_drain_invoke.patch b/kvm-block-Inline-bdrv_drain_invoke.patch deleted file mode 100644 index 07160dc..0000000 --- a/kvm-block-Inline-bdrv_drain_invoke.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:00 +0100 -Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s) - -bdrv_drain_invoke() has now two entirely separate cases that share no -code any more and are selected depending on a bool parameter. Each case -has only one caller. Just inline the function. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-6-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121) -Signed-off-by: Stefano Garzarella ---- - block/io.c | 23 ++++++----------------- - 1 file changed, 6 insertions(+), 17 deletions(-) - -diff --git a/block/io.c b/block/io.c -index f4ca62b034..a25103be6f 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -242,21 +242,6 @@ typedef struct { - bool ignore_bds_parents; - } BdrvCoDrainData; - --/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ --static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) --{ -- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || -- (!begin && !bs->drv->bdrv_drain_end)) { -- return; -- } -- -- if (begin) { -- bs->drv->bdrv_drain_begin(bs); -- } else { -- bs->drv->bdrv_drain_end(bs); -- } --} -- - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ - bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents) -@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - - bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); -- bdrv_drain_invoke(bs, true); -+ if (bs->drv && bs->drv->bdrv_drain_begin) { -+ bs->drv->bdrv_drain_begin(bs); -+ } - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - assert(bs->quiesce_counter > 0); - - /* Re-enable things in child-to-parent order */ -- bdrv_drain_invoke(bs, false); -+ if (bs->drv && bs->drv->bdrv_drain_end) { -+ bs->drv->bdrv_drain_end(bs); -+ } - bdrv_parent_drained_end(bs, parent, ignore_bds_parents); - - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); --- -2.31.1 - diff --git a/kvm-block-Remove-drained_end_counter.patch b/kvm-block-Remove-drained_end_counter.patch deleted file mode 100644 index cfafc33..0000000 --- a/kvm-block-Remove-drained_end_counter.patch +++ /dev/null @@ -1,433 +0,0 @@ -From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:59 +0100 -Subject: [PATCH 17/31] block: Remove drained_end_counter - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s) - -drained_end_counter is unused now, nobody changes its value any more. It -can be removed. - -In cases where we had two almost identical functions that only differed -in whether the caller passes drained_end_counter, or whether they would -poll for a local drained_end_counter to reach 0, these become a single -function. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Message-Id: <20221118174110.55183-5-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Signed-off-by: Kevin Wolf -(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f) -Signed-off-by: Stefano Garzarella ---- - block.c | 5 +- - block/block-backend.c | 4 +- - block/io.c | 98 ++++++++------------------------ - blockjob.c | 2 +- - include/block/block-io.h | 24 -------- - include/block/block_int-common.h | 6 +- - 6 files changed, 30 insertions(+), 109 deletions(-) - -diff --git a/block.c b/block.c -index 16a62a329c..7999fd08c5 100644 ---- a/block.c -+++ b/block.c -@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child) - return bdrv_drain_poll(bs, false, NULL, false); - } - --static void bdrv_child_cb_drained_end(BdrvChild *child, -- int *drained_end_counter) -+static void bdrv_child_cb_drained_end(BdrvChild *child) - { - BlockDriverState *bs = child->opaque; -- bdrv_drained_end_no_poll(bs, drained_end_counter); -+ bdrv_drained_end(bs); - } - - static int bdrv_child_cb_inactivate(BdrvChild *child) -diff --git a/block/block-backend.c b/block/block-backend.c -index d98a96ff37..feaf2181fa 100644 ---- a/block/block-backend.c -+++ b/block/block-backend.c -@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, - } - static void blk_root_drained_begin(BdrvChild *child); - static bool blk_root_drained_poll(BdrvChild *child); --static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); -+static void blk_root_drained_end(BdrvChild *child); - - static void blk_root_change_media(BdrvChild *child, bool load); - static void blk_root_resize(BdrvChild *child); -@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child) - return busy || !!blk->in_flight; - } - --static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) -+static void blk_root_drained_end(BdrvChild *child) - { - BlockBackend *blk = child->opaque; - assert(blk->quiesce_counter); -diff --git a/block/io.c b/block/io.c -index c2ed4b2af9..f4ca62b034 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, - } - } - --static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, -- int *drained_end_counter) -+void bdrv_parent_drained_end_single(BdrvChild *c) - { -+ IO_OR_GS_CODE(); -+ - assert(c->parent_quiesce_counter > 0); - c->parent_quiesce_counter--; - if (c->klass->drained_end) { -- c->klass->drained_end(c, drained_end_counter); -+ c->klass->drained_end(c); - } - } - --void bdrv_parent_drained_end_single(BdrvChild *c) --{ -- int drained_end_counter = 0; -- AioContext *ctx = bdrv_child_get_parent_aio_context(c); -- IO_OR_GS_CODE(); -- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); -- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0); --} -- - static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, -- bool ignore_bds_parents, -- int *drained_end_counter) -+ bool ignore_bds_parents) - { - BdrvChild *c; - -@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, - if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { - continue; - } -- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter); -+ bdrv_parent_drained_end_single(c); - } - } - -@@ -249,12 +240,10 @@ typedef struct { - bool poll; - BdrvChild *parent; - bool ignore_bds_parents; -- int *drained_end_counter; - } BdrvCoDrainData; - - /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ --static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, -- int *drained_end_counter) -+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) - { - if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || - (!begin && !bs->drv->bdrv_drain_end)) { -@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - bool poll); - static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- int *drained_end_counter); -+ BdrvChild *parent, bool ignore_bds_parents); - - static void bdrv_co_drain_bh_cb(void *opaque) - { -@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque) - aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { -- assert(!data->drained_end_counter); - bdrv_do_drained_begin(bs, data->recursive, data->parent, - data->ignore_bds_parents, data->poll); - } else { - assert(!data->poll); - bdrv_do_drained_end(bs, data->recursive, data->parent, -- data->ignore_bds_parents, -- data->drained_end_counter); -+ data->ignore_bds_parents); - } - aio_context_release(ctx); - } else { -@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bool begin, bool recursive, - BdrvChild *parent, - bool ignore_bds_parents, -- bool poll, -- int *drained_end_counter) -+ bool poll) - { - BdrvCoDrainData data; - Coroutine *self = qemu_coroutine_self(); -@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - .parent = parent, - .ignore_bds_parents = ignore_bds_parents, - .poll = poll, -- .drained_end_counter = drained_end_counter, - }; - - if (bs) { -@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - - bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); -- bdrv_drain_invoke(bs, true, NULL); -+ bdrv_drain_invoke(bs, true); - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, -- poll, NULL); -+ poll); - return; - } - -@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) - - /** - * This function does not poll, nor must any of its recursively called -- * functions. The *drained_end_counter pointee will be incremented -- * once for every background operation scheduled, and decremented once -- * the operation settles. Therefore, the pointer must remain valid -- * until the pointee reaches 0. That implies that whoever sets up the -- * pointee has to poll until it is 0. -- * -- * We use atomic operations to access *drained_end_counter, because -- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of -- * @bs may contain nodes in different AioContexts, -- * (2) bdrv_drain_all_end() uses the same counter for all nodes, -- * regardless of which AioContext they are in. -+ * functions. - */ - static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- int *drained_end_counter) -+ BdrvChild *parent, bool ignore_bds_parents) - { - BdrvChild *child; - int old_quiesce_counter; - -- assert(drained_end_counter != NULL); -- - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, -- false, drained_end_counter); -+ false); - return; - } - assert(bs->quiesce_counter > 0); - - /* Re-enable things in child-to-parent order */ -- bdrv_drain_invoke(bs, false, drained_end_counter); -- bdrv_parent_drained_end(bs, parent, ignore_bds_parents, -- drained_end_counter); -+ bdrv_drain_invoke(bs, false); -+ bdrv_parent_drained_end(bs, parent, ignore_bds_parents); - - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); - if (old_quiesce_counter == 1) { -@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - assert(!ignore_bds_parents); - bs->recursive_quiesce_counter--; - QLIST_FOREACH(child, &bs->children, next) { -- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents, -- drained_end_counter); -+ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); - } - } - } - - void bdrv_drained_end(BlockDriverState *bs) - { -- int drained_end_counter = 0; - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); -- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); --} -- --void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) --{ -- IO_CODE(); -- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); -+ bdrv_do_drained_end(bs, false, NULL, false); - } - - void bdrv_subtree_drained_end(BlockDriverState *bs) - { -- int drained_end_counter = 0; - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); -- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); -+ bdrv_do_drained_end(bs, true, NULL, false); - } - - void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) -@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) - - void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) - { -- int drained_end_counter = 0; - int i; - IO_OR_GS_CODE(); - - for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { -- bdrv_do_drained_end(child->bs, true, child, false, -- &drained_end_counter); -+ bdrv_do_drained_end(child->bs, true, child, false); - } -- -- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); - } - - void bdrv_drain(BlockDriverState *bs) -@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void) - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); -+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); - return; - } - -@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void) - - void bdrv_drain_all_end_quiesce(BlockDriverState *bs) - { -- int drained_end_counter = 0; - GLOBAL_STATE_CODE(); - - g_assert(bs->quiesce_counter > 0); - g_assert(!bs->refcnt); - - while (bs->quiesce_counter) { -- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); -+ bdrv_do_drained_end(bs, false, NULL, true); - } -- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); - } - - void bdrv_drain_all_end(void) - { - BlockDriverState *bs = NULL; -- int drained_end_counter = 0; - GLOBAL_STATE_CODE(); - - /* -@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); -+ bdrv_do_drained_end(bs, false, NULL, true); - aio_context_release(aio_context); - } - - assert(qemu_get_current_aio_context() == qemu_get_aio_context()); -- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0); -- - assert(bdrv_drain_all_count > 0); - bdrv_drain_all_count--; - } -diff --git a/blockjob.c b/blockjob.c -index f51d4e18f3..0ab721e139 100644 ---- a/blockjob.c -+++ b/blockjob.c -@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c) - } - } - --static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) -+static void child_job_drained_end(BdrvChild *c) - { - BlockJob *job = c->opaque; - job_resume(&job->job); -diff --git a/include/block/block-io.h b/include/block/block-io.h -index b099d7db45..054e964c9b 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, - int64_t bytes, BdrvRequestFlags read_flags, - BdrvRequestFlags write_flags); - --/** -- * bdrv_drained_end_no_poll: -- * -- * Same as bdrv_drained_end(), but do not poll for the subgraph to -- * actually become unquiesced. Therefore, no graph changes will occur -- * with this function. -- * -- * *drained_end_counter is incremented for every background operation -- * that is scheduled, and will be decremented for every operation once -- * it settles. The caller must poll until it reaches 0. The counter -- * should be accessed using atomic operations only. -- */ --void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); -- -- - /* - * "I/O or GS" API functions. These functions can run without - * the BQL, but only in one specific iothread/main loop. -@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); - * bdrv_parent_drained_end_single: - * - * End a quiesced section for the parent of @c. -- * -- * This polls @bs's AioContext until all scheduled sub-drained_ends -- * have settled, which may result in graph changes. - */ - void bdrv_parent_drained_end_single(BdrvChild *c); - -@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); - * bdrv_drained_end: - * - * End a quiescent section started by bdrv_drained_begin(). -- * -- * This polls @bs's AioContext until all scheduled sub-drained_ends -- * have settled. On one hand, that may result in graph changes. On -- * the other, this requires that the caller either runs in the main -- * loop; or that all involved nodes (@bs and all of its parents) are -- * in the caller's AioContext. - */ - void bdrv_drained_end(BlockDriverState *bs); - -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 40d646d1ed..2b97576f6d 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -939,15 +939,11 @@ struct BdrvChildClass { - * These functions must not change the graph (and therefore also must not - * call aio_poll(), which could change the graph indirectly). - * -- * If drained_end() schedules background operations, it must atomically -- * increment *drained_end_counter for each such operation and atomically -- * decrement it once the operation has settled. -- * - * Note that this can be nested. If drained_begin() was called twice, new - * I/O is allowed only after drained_end() was called twice, too. - */ - void (*drained_begin)(BdrvChild *child); -- void (*drained_end)(BdrvChild *child, int *drained_end_counter); -+ void (*drained_end)(BdrvChild *child); - - /* - * Returns whether the parent has pending requests for the child. This --- -2.31.1 - diff --git a/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch b/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch deleted file mode 100644 index aa64bec..0000000 --- a/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch +++ /dev/null @@ -1,274 +0,0 @@ -From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:07 +0100 -Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from - drain_begin/end. - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s) - -ignore_bds_parents is now ignored during drain_begin and drain_end, so -we can just remove it there. It is still a valid optimisation for -drain_all in bdrv_drained_poll(), so leave it around there. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-13-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270) -Signed-off-by: Stefano Garzarella ---- - block.c | 2 +- - block/io.c | 58 +++++++++++++++------------------------- - include/block/block-io.h | 3 +-- - 3 files changed, 24 insertions(+), 39 deletions(-) - -diff --git a/block.c b/block.c -index 5a583e260d..af31a94863 100644 ---- a/block.c -+++ b/block.c -@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) - static void bdrv_child_cb_drained_begin(BdrvChild *child) - { - BlockDriverState *bs = child->opaque; -- bdrv_do_drained_begin_quiesce(bs, NULL, false); -+ bdrv_do_drained_begin_quiesce(bs, NULL); - } - - static bool bdrv_child_cb_drained_poll(BdrvChild *child) -diff --git a/block/io.c b/block/io.c -index 87d6f22ec4..2e9503df6a 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs); - static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, - int64_t offset, int64_t bytes, BdrvRequestFlags flags); - --static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, -- bool ignore_bds_parents) -+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) - { - BdrvChild *c, *next; - - QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { -- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { -+ if (c == ignore) { - continue; - } - bdrv_parent_drained_begin_single(c, false); -@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c) - } - } - --static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, -- bool ignore_bds_parents) -+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) - { - BdrvChild *c; - - QLIST_FOREACH(c, &bs->parents, next_parent) { -- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { -+ if (c == ignore) { - continue; - } - bdrv_parent_drained_end_single(c); -@@ -242,7 +240,6 @@ typedef struct { - bool begin; - bool poll; - BdrvChild *parent; -- bool ignore_bds_parents; - } BdrvCoDrainData; - - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents, bool poll); --static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents); -+ bool poll); -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); - - static void bdrv_co_drain_bh_cb(void *opaque) - { -@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque) - aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { -- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, -- data->poll); -+ bdrv_do_drained_begin(bs, data->parent, data->poll); - } else { - assert(!data->poll); -- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); -+ bdrv_do_drained_end(bs, data->parent); - } - aio_context_release(ctx); - } else { -@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) - static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bool begin, - BdrvChild *parent, -- bool ignore_bds_parents, - bool poll) - { - BdrvCoDrainData data; -@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - .done = false, - .begin = begin, - .parent = parent, -- .ignore_bds_parents = ignore_bds_parents, - .poll = poll, - }; - -@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - } - } - --void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, -- BdrvChild *parent, bool ignore_bds_parents) -+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) - { - IO_OR_GS_CODE(); - assert(!qemu_in_coroutine()); -@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - /* Stop things in parent-to-child order */ - if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { - aio_disable_external(bdrv_get_aio_context(bs)); -- -- /* TODO Remove ignore_bds_parents, we don't consider it any more */ -- bdrv_parent_drained_begin(bs, parent, false); -+ bdrv_parent_drained_begin(bs, parent); - if (bs->drv && bs->drv->bdrv_drain_begin) { - bs->drv->bdrv_drain_begin(bs); - } -@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - - static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents, bool poll) -+ bool poll) - { - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); -+ bdrv_co_yield_to_drain(bs, true, parent, poll); - return; - } - -- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); -+ bdrv_do_drained_begin_quiesce(bs, parent); - - /* - * Wait for drained requests to finish. -@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - * nodes. - */ - if (poll) { -- assert(!ignore_bds_parents); - BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); - } - } -@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - void bdrv_drained_begin(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_begin(bs, NULL, false, true); -+ bdrv_do_drained_begin(bs, NULL, true); - } - - /** - * This function does not poll, nor must any of its recursively called - * functions. - */ --static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -- bool ignore_bds_parents) -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) - { - int old_quiesce_counter; - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); -+ bdrv_co_yield_to_drain(bs, false, parent, false); - return; - } - assert(bs->quiesce_counter > 0); -@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - if (bs->drv && bs->drv->bdrv_drain_end) { - bs->drv->bdrv_drain_end(bs); - } -- /* TODO Remove ignore_bds_parents, we don't consider it any more */ -- bdrv_parent_drained_end(bs, parent, false); -- -+ bdrv_parent_drained_end(bs, parent); - aio_enable_external(bdrv_get_aio_context(bs)); - } - } -@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - void bdrv_drained_end(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, NULL, false); -+ bdrv_do_drained_end(bs, NULL); - } - - void bdrv_drain(BlockDriverState *bs) -@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void) - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(NULL, true, NULL, true, true); -+ bdrv_co_yield_to_drain(NULL, true, NULL, true); - return; - } - -@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_begin(bs, NULL, true, false); -+ bdrv_do_drained_begin(bs, NULL, false); - aio_context_release(aio_context); - } - -@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) - g_assert(!bs->refcnt); - - while (bs->quiesce_counter) { -- bdrv_do_drained_end(bs, NULL, true); -+ bdrv_do_drained_end(bs, NULL); - } - } - -@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_end(bs, NULL, true); -+ bdrv_do_drained_end(bs, NULL); - aio_context_release(aio_context); - } - -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 9c36a16a1f..8f5e75756a 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs); - * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already - * running requests to complete. - */ --void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, -- BdrvChild *parent, bool ignore_bds_parents); -+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); - - /** - * bdrv_drained_end: --- -2.31.1 - diff --git a/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch b/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch deleted file mode 100644 index 94eba86..0000000 --- a/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:10 +0100 -Subject: [PATCH 28/31] block: Remove poll parameter from - bdrv_parent_drained_begin_single() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s) - -All callers of bdrv_parent_drained_begin_single() pass poll=false now, -so we don't need the parameter any more. - -Signed-off-by: Kevin Wolf -Message-Id: <20221118174110.55183-16-kwolf@redhat.com> -Reviewed-by: Hanna Reitz -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Kevin Wolf -(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069) -Signed-off-by: Stefano Garzarella ---- - block.c | 4 ++-- - block/io.c | 8 ++------ - include/block/block-io.h | 5 ++--- - 3 files changed, 6 insertions(+), 11 deletions(-) - -diff --git a/block.c b/block.c -index 65588d313a..0d78711416 100644 ---- a/block.c -+++ b/block.c -@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque) - * new_bs drained when calling bdrv_replace_child_tran() is not a - * requirement any more. - */ -- bdrv_parent_drained_begin_single(s->child, false); -+ bdrv_parent_drained_begin_single(s->child); - assert(!bdrv_parent_drained_poll_single(s->child)); - } - assert(s->child->quiesced_parent); -@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, - * a problem, we already did this), but it will still poll until the parent - * is fully quiesced, so it will not be negatively affected either. - */ -- bdrv_parent_drained_begin_single(new_child, false); -+ bdrv_parent_drained_begin_single(new_child); - bdrv_replace_child_noperm(new_child, child_bs); - - BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); -diff --git a/block/io.c b/block/io.c -index ae64830eac..38e57d1f67 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) - if (c == ignore) { - continue; - } -- bdrv_parent_drained_begin_single(c, false); -+ bdrv_parent_drained_begin_single(c); - } - } - -@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, - return busy; - } - --void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) -+void bdrv_parent_drained_begin_single(BdrvChild *c) - { -- AioContext *ctx = bdrv_child_get_parent_aio_context(c); - IO_OR_GS_CODE(); - - assert(!c->quiesced_parent); -@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) - if (c->klass->drained_begin) { - c->klass->drained_begin(c); - } -- if (poll) { -- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); -- } - } - - static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 65e6d2569b..92aaa7c1e9 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); - /** - * bdrv_parent_drained_begin_single: - * -- * Begin a quiesced section for the parent of @c. If @poll is true, wait for -- * any pending activity to cease. -+ * Begin a quiesced section for the parent of @c. - */ --void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); -+void bdrv_parent_drained_begin_single(BdrvChild *c); - - /** - * bdrv_parent_drained_poll_single: --- -2.31.1 - diff --git a/kvm-block-Remove-subtree-drains.patch b/kvm-block-Remove-subtree-drains.patch deleted file mode 100644 index af9c0ff..0000000 --- a/kvm-block-Remove-subtree-drains.patch +++ /dev/null @@ -1,896 +0,0 @@ -From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:05 +0100 -Subject: [PATCH 23/31] block: Remove subtree drains - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s) - -Subtree drains are not used any more. Remove them. - -After this, BdrvChildClass.attach/detach() don't poll any more. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-11-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066) -Signed-off-by: Stefano Garzarella ---- - block.c | 20 +-- - block/io.c | 121 +++----------- - include/block/block-io.h | 18 +-- - include/block/block_int-common.h | 1 - - include/block/block_int-io.h | 12 -- - tests/unit/test-bdrv-drain.c | 261 ++----------------------------- - 6 files changed, 44 insertions(+), 389 deletions(-) - -diff --git a/block.c b/block.c -index 5330e89903..e0e3b21790 100644 ---- a/block.c -+++ b/block.c -@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) - static bool bdrv_child_cb_drained_poll(BdrvChild *child) - { - BlockDriverState *bs = child->opaque; -- return bdrv_drain_poll(bs, false, NULL, false); -+ return bdrv_drain_poll(bs, NULL, false); - } - - static void bdrv_child_cb_drained_end(BdrvChild *child) -@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child) - assert(!bs->file); - bs->file = child; - } -- -- bdrv_apply_subtree_drain(child, bs); - } - - static void bdrv_child_cb_detach(BdrvChild *child) -@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child) - bdrv_backing_detach(child); - } - -- bdrv_unapply_subtree_drain(child, bs); -- - assert_bdrv_graph_writable(bs); - QLIST_REMOVE(child, next); - if (child == bs->backing) { -@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - } - - if (old_bs) { -- /* Detach first so that the recursive drain sections coming from @child -- * are already gone and we only end the drain sections that came from -- * elsewhere. */ - if (child->klass->detach) { - child->klass->detach(child); - } -@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child, - QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); - - /* -- * Detaching the old node may have led to the new node's -- * quiesce_counter having been decreased. Not a problem, we -- * just need to recognize this here and then invoke -- * drained_end appropriately more often. -+ * Polling in bdrv_parent_drained_begin_single() may have led to the new -+ * node's quiesce_counter having been decreased. Not a problem, we just -+ * need to recognize this here and then invoke drained_end appropriately -+ * more often. - */ - assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); - drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; - -- /* Attach only after starting new drained sections, so that recursive -- * drain sections coming from @child don't get an extra .drained_begin -- * callback. */ - if (child->klass->attach) { - child->klass->attach(child); - } -diff --git a/block/io.c b/block/io.c -index a25103be6f..75224480d0 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -236,17 +236,15 @@ typedef struct { - BlockDriverState *bs; - bool done; - bool begin; -- bool recursive; - bool poll; - BdrvChild *parent; - bool ignore_bds_parents; - } BdrvCoDrainData; - - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ --bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, -- BdrvChild *ignore_parent, bool ignore_bds_parents) -+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, -+ bool ignore_bds_parents) - { -- BdrvChild *child, *next; - IO_OR_GS_CODE(); - - if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { -@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - return true; - } - -- if (recursive) { -- assert(!ignore_bds_parents); -- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { -- if (bdrv_drain_poll(child->bs, recursive, child, false)) { -- return true; -- } -- } -- } -- - return false; - } - --static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, -+static bool bdrv_drain_poll_top_level(BlockDriverState *bs, - BdrvChild *ignore_parent) - { -- return bdrv_drain_poll(bs, recursive, ignore_parent, false); -+ return bdrv_drain_poll(bs, ignore_parent, false); - } - --static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- bool poll); --static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents); -+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents, bool poll); -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents); - - static void bdrv_co_drain_bh_cb(void *opaque) - { -@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque) - aio_context_acquire(ctx); - bdrv_dec_in_flight(bs); - if (data->begin) { -- bdrv_do_drained_begin(bs, data->recursive, data->parent, -- data->ignore_bds_parents, data->poll); -+ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, -+ data->poll); - } else { - assert(!data->poll); -- bdrv_do_drained_end(bs, data->recursive, data->parent, -- data->ignore_bds_parents); -+ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); - } - aio_context_release(ctx); - } else { -@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) - } - - static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, -- bool begin, bool recursive, -+ bool begin, - BdrvChild *parent, - bool ignore_bds_parents, - bool poll) -@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - .bs = bs, - .done = false, - .begin = begin, -- .recursive = recursive, - .parent = parent, - .ignore_bds_parents = ignore_bds_parents, - .poll = poll, -@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - } - } - --static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents, -- bool poll) -+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents, bool poll) - { -- BdrvChild *child, *next; -- - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, -- poll); -+ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); - return; - } - - bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); - -- if (recursive) { -- assert(!ignore_bds_parents); -- bs->recursive_quiesce_counter++; -- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { -- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, -- false); -- } -- } -- - /* - * Wait for drained requests to finish. - * -@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - */ - if (poll) { - assert(!ignore_bds_parents); -- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); -+ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); - } - } - - void bdrv_drained_begin(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_begin(bs, false, NULL, false, true); --} -- --void bdrv_subtree_drained_begin(BlockDriverState *bs) --{ -- IO_OR_GS_CODE(); -- bdrv_do_drained_begin(bs, true, NULL, false, true); -+ bdrv_do_drained_begin(bs, NULL, false, true); - } - - /** - * This function does not poll, nor must any of its recursively called - * functions. - */ --static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, -- BdrvChild *parent, bool ignore_bds_parents) -+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, -+ bool ignore_bds_parents) - { -- BdrvChild *child; - int old_quiesce_counter; - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, -- false); -+ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); - return; - } - assert(bs->quiesce_counter > 0); -@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - if (old_quiesce_counter == 1) { - aio_enable_external(bdrv_get_aio_context(bs)); - } -- -- if (recursive) { -- assert(!ignore_bds_parents); -- bs->recursive_quiesce_counter--; -- QLIST_FOREACH(child, &bs->children, next) { -- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); -- } -- } - } - - void bdrv_drained_end(BlockDriverState *bs) - { - IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, false, NULL, false); --} -- --void bdrv_subtree_drained_end(BlockDriverState *bs) --{ -- IO_OR_GS_CODE(); -- bdrv_do_drained_end(bs, true, NULL, false); --} -- --void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) --{ -- int i; -- IO_OR_GS_CODE(); -- -- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { -- bdrv_do_drained_begin(child->bs, true, child, false, true); -- } --} -- --void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) --{ -- int i; -- IO_OR_GS_CODE(); -- -- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { -- bdrv_do_drained_end(child->bs, true, child, false); -- } -+ bdrv_do_drained_end(bs, NULL, false); - } - - void bdrv_drain(BlockDriverState *bs) -@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void) - while ((bs = bdrv_next_all_states(bs))) { - AioContext *aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); -- result |= bdrv_drain_poll(bs, false, NULL, true); -+ result |= bdrv_drain_poll(bs, NULL, true); - aio_context_release(aio_context); - } - -@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void) - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { -- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); -+ bdrv_co_yield_to_drain(NULL, true, NULL, true, true); - return; - } - -@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_begin(bs, false, NULL, true, false); -+ bdrv_do_drained_begin(bs, NULL, true, false); - aio_context_release(aio_context); - } - -@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) - g_assert(!bs->refcnt); - - while (bs->quiesce_counter) { -- bdrv_do_drained_end(bs, false, NULL, true); -+ bdrv_do_drained_end(bs, NULL, true); - } - } - -@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void) - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); -- bdrv_do_drained_end(bs, false, NULL, true); -+ bdrv_do_drained_end(bs, NULL, true); - aio_context_release(aio_context); - } - -diff --git a/include/block/block-io.h b/include/block/block-io.h -index 054e964c9b..9c36a16a1f 100644 ---- a/include/block/block-io.h -+++ b/include/block/block-io.h -@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c); - /** - * bdrv_drain_poll: - * -- * Poll for pending requests in @bs, its parents (except for @ignore_parent), -- * and if @recursive is true its children as well (used for subtree drain). -+ * Poll for pending requests in @bs and its parents (except for @ignore_parent). - * - * If @ignore_bds_parents is true, parents that are BlockDriverStates must - * ignore the drain request because they will be drained separately (used for -@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c); - * - * This is part of bdrv_drained_begin. - */ --bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, -- BdrvChild *ignore_parent, bool ignore_bds_parents); -+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, -+ bool ignore_bds_parents); - - /** - * bdrv_drained_begin: -@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs); - void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents); - --/** -- * Like bdrv_drained_begin, but recursively begins a quiesced section for -- * exclusive access to all child nodes as well. -- */ --void bdrv_subtree_drained_begin(BlockDriverState *bs); -- - /** - * bdrv_drained_end: - * -@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); - */ - void bdrv_drained_end(BlockDriverState *bs); - --/** -- * End a quiescent section started by bdrv_subtree_drained_begin(). -- */ --void bdrv_subtree_drained_end(BlockDriverState *bs); -- - #endif /* BLOCK_IO_H */ -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 2b97576f6d..791dddfd7d 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -1184,7 +1184,6 @@ struct BlockDriverState { - - /* Accessed with atomic ops. */ - int quiesce_counter; -- int recursive_quiesce_counter; - - unsigned int write_gen; /* Current data generation */ - -diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h -index 4b0b3e17ef..8bc061ebb8 100644 ---- a/include/block/block_int-io.h -+++ b/include/block/block_int-io.h -@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, - */ - void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); - -- --/* -- * "I/O or GS" API functions. These functions can run without -- * the BQL, but only in one specific iothread/main loop. -- * -- * See include/block/block-io.h for more information about -- * the "I/O or GS" API. -- */ -- --void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); --void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); -- - #endif /* BLOCK_INT_IO_H */ -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 695519ee02..dda08de8db 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void)) - enum drain_type { - BDRV_DRAIN_ALL, - BDRV_DRAIN, -- BDRV_SUBTREE_DRAIN, - DRAIN_TYPE_MAX, - }; - -@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) - switch (drain_type) { - case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; - case BDRV_DRAIN: bdrv_drained_begin(bs); break; -- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; - default: g_assert_not_reached(); - } - } -@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) - switch (drain_type) { - case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; - case BDRV_DRAIN: bdrv_drained_end(bs); break; -- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; - default: g_assert_not_reached(); - } - } -@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void) - test_drv_cb_common(BDRV_DRAIN, false); - } - --static void test_drv_cb_drain_subtree(void) --{ -- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); --} -- - static void test_drv_cb_co_drain_all(void) - { - call_in_coroutine(test_drv_cb_drain_all); -@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void) - call_in_coroutine(test_drv_cb_drain); - } - --static void test_drv_cb_co_drain_subtree(void) --{ -- call_in_coroutine(test_drv_cb_drain_subtree); --} -- - static void test_quiesce_common(enum drain_type drain_type, bool recursive) - { - BlockBackend *blk; -@@ -332,11 +319,6 @@ static void test_quiesce_drain(void) - test_quiesce_common(BDRV_DRAIN, false); - } - --static void test_quiesce_drain_subtree(void) --{ -- test_quiesce_common(BDRV_SUBTREE_DRAIN, true); --} -- - static void test_quiesce_co_drain_all(void) - { - call_in_coroutine(test_quiesce_drain_all); -@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void) - call_in_coroutine(test_quiesce_drain); - } - --static void test_quiesce_co_drain_subtree(void) --{ -- call_in_coroutine(test_quiesce_drain_subtree); --} -- - static void test_nested(void) - { - BlockBackend *blk; -@@ -402,158 +379,6 @@ static void test_nested(void) - blk_unref(blk); - } - --static void test_multiparent(void) --{ -- BlockBackend *blk_a, *blk_b; -- BlockDriverState *bs_a, *bs_b, *backing; -- BDRVTestState *a_s, *b_s, *backing_s; -- -- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, -- &error_abort); -- a_s = bs_a->opaque; -- blk_insert_bs(blk_a, bs_a, &error_abort); -- -- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, -- &error_abort); -- b_s = bs_b->opaque; -- blk_insert_bs(blk_b, bs_b, &error_abort); -- -- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); -- backing_s = backing->opaque; -- bdrv_set_backing_hd(bs_a, backing, &error_abort); -- bdrv_set_backing_hd(bs_b, backing, &error_abort); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); -- g_assert_cmpint(backing->quiesce_counter, ==, 1); -- g_assert_cmpint(a_s->drain_count, ==, 1); -- g_assert_cmpint(b_s->drain_count, ==, 1); -- g_assert_cmpint(backing_s->drain_count, ==, 1); -- -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 2); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); -- g_assert_cmpint(backing->quiesce_counter, ==, 2); -- g_assert_cmpint(a_s->drain_count, ==, 2); -- g_assert_cmpint(b_s->drain_count, ==, 2); -- g_assert_cmpint(backing_s->drain_count, ==, 2); -- -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); -- g_assert_cmpint(backing->quiesce_counter, ==, 1); -- g_assert_cmpint(a_s->drain_count, ==, 1); -- g_assert_cmpint(b_s->drain_count, ==, 1); -- g_assert_cmpint(backing_s->drain_count, ==, 1); -- -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- bdrv_unref(backing); -- bdrv_unref(bs_a); -- bdrv_unref(bs_b); -- blk_unref(blk_a); -- blk_unref(blk_b); --} -- --static void test_graph_change_drain_subtree(void) --{ -- BlockBackend *blk_a, *blk_b; -- BlockDriverState *bs_a, *bs_b, *backing; -- BDRVTestState *a_s, *b_s, *backing_s; -- -- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, -- &error_abort); -- a_s = bs_a->opaque; -- blk_insert_bs(blk_a, bs_a, &error_abort); -- -- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); -- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, -- &error_abort); -- b_s = bs_b->opaque; -- blk_insert_bs(blk_b, bs_b, &error_abort); -- -- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); -- backing_s = backing->opaque; -- bdrv_set_backing_hd(bs_a, backing, &error_abort); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); -- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); -- -- bdrv_set_backing_hd(bs_b, backing, &error_abort); -- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); -- g_assert_cmpint(backing->quiesce_counter, ==, 5); -- g_assert_cmpint(a_s->drain_count, ==, 5); -- g_assert_cmpint(b_s->drain_count, ==, 5); -- g_assert_cmpint(backing_s->drain_count, ==, 5); -- -- bdrv_set_backing_hd(bs_b, NULL, &error_abort); -- g_assert_cmpint(bs_a->quiesce_counter, ==, 3); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); -- g_assert_cmpint(backing->quiesce_counter, ==, 3); -- g_assert_cmpint(a_s->drain_count, ==, 3); -- g_assert_cmpint(b_s->drain_count, ==, 2); -- g_assert_cmpint(backing_s->drain_count, ==, 3); -- -- bdrv_set_backing_hd(bs_b, backing, &error_abort); -- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); -- g_assert_cmpint(backing->quiesce_counter, ==, 5); -- g_assert_cmpint(a_s->drain_count, ==, 5); -- g_assert_cmpint(b_s->drain_count, ==, 5); -- g_assert_cmpint(backing_s->drain_count, ==, 5); -- -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); -- -- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); -- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); -- g_assert_cmpint(backing->quiesce_counter, ==, 0); -- g_assert_cmpint(a_s->drain_count, ==, 0); -- g_assert_cmpint(b_s->drain_count, ==, 0); -- g_assert_cmpint(backing_s->drain_count, ==, 0); -- -- bdrv_unref(backing); -- bdrv_unref(bs_a); -- bdrv_unref(bs_b); -- blk_unref(blk_a); -- blk_unref(blk_b); --} -- - static void test_graph_change_drain_all(void) - { - BlockBackend *blk_a, *blk_b; -@@ -773,12 +598,6 @@ static void test_iothread_drain(void) - test_iothread_common(BDRV_DRAIN, 1); - } - --static void test_iothread_drain_subtree(void) --{ -- test_iothread_common(BDRV_SUBTREE_DRAIN, 0); -- test_iothread_common(BDRV_SUBTREE_DRAIN, 1); --} -- - - typedef struct TestBlockJob { - BlockJob common; -@@ -863,7 +682,6 @@ enum test_job_result { - enum test_job_drain_node { - TEST_JOB_DRAIN_SRC, - TEST_JOB_DRAIN_SRC_CHILD, -- TEST_JOB_DRAIN_SRC_PARENT, - }; - - static void test_blockjob_common_drain_node(enum drain_type drain_type, -@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, - case TEST_JOB_DRAIN_SRC_CHILD: - drain_bs = src_backing; - break; -- case TEST_JOB_DRAIN_SRC_PARENT: -- drain_bs = src_overlay; -- break; - default: - g_assert_not_reached(); - } -@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, - TEST_JOB_DRAIN_SRC); - test_blockjob_common_drain_node(drain_type, use_iothread, result, - TEST_JOB_DRAIN_SRC_CHILD); -- if (drain_type == BDRV_SUBTREE_DRAIN) { -- test_blockjob_common_drain_node(drain_type, use_iothread, result, -- TEST_JOB_DRAIN_SRC_PARENT); -- } - } - - static void test_blockjob_drain_all(void) -@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void) - test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); - } - --static void test_blockjob_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); --} -- - static void test_blockjob_error_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); -@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void) - test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); - } - --static void test_blockjob_error_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); -- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); --} -- - static void test_blockjob_iothread_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); -@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void) - test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); - } - --static void test_blockjob_iothread_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); --} -- - static void test_blockjob_iothread_error_drain_all(void) - { - test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); -@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void) - test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); - } - --static void test_blockjob_iothread_error_drain_subtree(void) --{ -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); -- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); --} -- - - typedef struct BDRVTestTopState { - BdrvChild *wait_child; -@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, - bdrv_drain(child_bs); - bdrv_unref(child_bs); - break; -- case BDRV_SUBTREE_DRAIN: -- /* Would have to ref/unref bs here for !detach_instead_of_delete, but -- * then the whole test becomes pointless because the graph changes -- * don't occur during the drain any more. */ -- assert(detach_instead_of_delete); -- bdrv_subtree_drained_begin(bs); -- bdrv_subtree_drained_end(bs); -- break; - case BDRV_DRAIN_ALL: - bdrv_drain_all_begin(); - bdrv_drain_all_end(); -@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void) - do_test_delete_by_drain(true, BDRV_DRAIN); - } - --static void test_detach_by_drain_subtree(void) --{ -- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); --} -- - - struct detach_by_parent_data { - BlockDriverState *parent_b; -@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb) - g_assert(acb != NULL); - - /* Drain and check the expected result */ -- bdrv_subtree_drained_begin(parent_b); -+ bdrv_drained_begin(parent_b); -+ bdrv_drained_begin(a); -+ bdrv_drained_begin(b); -+ bdrv_drained_begin(c); - - g_assert(detach_by_parent_data.child_c != NULL); - -@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb) - g_assert(QLIST_NEXT(child_a, next) == NULL); - - g_assert_cmpint(parent_a->quiesce_counter, ==, 1); -- g_assert_cmpint(parent_b->quiesce_counter, ==, 1); -+ g_assert_cmpint(parent_b->quiesce_counter, ==, 3); - g_assert_cmpint(a->quiesce_counter, ==, 1); -- g_assert_cmpint(b->quiesce_counter, ==, 0); -+ g_assert_cmpint(b->quiesce_counter, ==, 1); - g_assert_cmpint(c->quiesce_counter, ==, 1); - -- bdrv_subtree_drained_end(parent_b); -+ bdrv_drained_end(parent_b); -+ bdrv_drained_end(a); -+ bdrv_drained_end(b); -+ bdrv_drained_end(c); - - bdrv_unref(parent_b); - blk_unref(blk); -@@ -2202,70 +1984,47 @@ int main(int argc, char **argv) - - g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); - g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); -- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", -- test_drv_cb_drain_subtree); - - g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", - test_drv_cb_co_drain_all); - g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); -- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", -- test_drv_cb_co_drain_subtree); -- - - g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); - g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); -- g_test_add_func("/bdrv-drain/quiesce/drain_subtree", -- test_quiesce_drain_subtree); - - g_test_add_func("/bdrv-drain/quiesce/co/drain_all", - test_quiesce_co_drain_all); - g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); -- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", -- test_quiesce_co_drain_subtree); - - g_test_add_func("/bdrv-drain/nested", test_nested); -- g_test_add_func("/bdrv-drain/multiparent", test_multiparent); - -- g_test_add_func("/bdrv-drain/graph-change/drain_subtree", -- test_graph_change_drain_subtree); - g_test_add_func("/bdrv-drain/graph-change/drain_all", - test_graph_change_drain_all); - - g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); - g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); -- g_test_add_func("/bdrv-drain/iothread/drain_subtree", -- test_iothread_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); - g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); -- g_test_add_func("/bdrv-drain/blockjob/drain_subtree", -- test_blockjob_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/error/drain_all", - test_blockjob_error_drain_all); - g_test_add_func("/bdrv-drain/blockjob/error/drain", - test_blockjob_error_drain); -- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", -- test_blockjob_error_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", - test_blockjob_iothread_drain_all); - g_test_add_func("/bdrv-drain/blockjob/iothread/drain", - test_blockjob_iothread_drain); -- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", -- test_blockjob_iothread_drain_subtree); - - g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", - test_blockjob_iothread_error_drain_all); - g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", - test_blockjob_iothread_error_drain); -- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", -- test_blockjob_iothread_error_drain_subtree); - - g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); - g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); - g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); -- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); - g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); - g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); - --- -2.31.1 - diff --git a/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch b/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch deleted file mode 100644 index 1529fdb..0000000 --- a/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch +++ /dev/null @@ -1,302 +0,0 @@ -From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:58 +0100 -Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to - non-coroutine_fn - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s) - -Polling during bdrv_drained_end() can be problematic (and in the future, -we may get cases for bdrv_drained_begin() where polling is forbidden, -and we don't care about already in-flight requests, but just want to -prevent new requests from arriving). - -The .bdrv_drained_begin/end callbacks running in a coroutine is the only -reason why we have to do this polling, so make them non-coroutine -callbacks again. None of the callers actually yield any more. - -This means that bdrv_drained_end() effectively doesn't poll any more, -even if AIO_WAIT_WHILE() loops are still there (their condition is false -from the beginning). This is generally not a problem, but in -test-bdrv-drain, some additional explicit aio_poll() calls need to be -added because the test case wants to verify the final state after BHs -have executed. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63) -Signed-off-by: Stefano Garzarella ---- - block.c | 4 +-- - block/io.c | 49 +++++--------------------------- - block/qed.c | 6 ++-- - block/throttle.c | 8 +++--- - include/block/block_int-common.h | 10 ++++--- - tests/unit/test-bdrv-drain.c | 18 ++++++------ - 6 files changed, 32 insertions(+), 63 deletions(-) - -diff --git a/block.c b/block.c -index ec184150a2..16a62a329c 100644 ---- a/block.c -+++ b/block.c -@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, - assert(is_power_of_2(bs->bl.request_alignment)); - - for (i = 0; i < bs->quiesce_counter; i++) { -- if (drv->bdrv_co_drain_begin) { -- drv->bdrv_co_drain_begin(bs); -+ if (drv->bdrv_drain_begin) { -+ drv->bdrv_drain_begin(bs); - } - } - -diff --git a/block/io.c b/block/io.c -index b9424024f9..c2ed4b2af9 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -252,55 +252,20 @@ typedef struct { - int *drained_end_counter; - } BdrvCoDrainData; - --static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) --{ -- BdrvCoDrainData *data = opaque; -- BlockDriverState *bs = data->bs; -- -- if (data->begin) { -- bs->drv->bdrv_co_drain_begin(bs); -- } else { -- bs->drv->bdrv_co_drain_end(bs); -- } -- -- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ -- qatomic_mb_set(&data->done, true); -- if (!data->begin) { -- qatomic_dec(data->drained_end_counter); -- } -- bdrv_dec_in_flight(bs); -- -- g_free(data); --} -- --/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ -+/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ - static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, - int *drained_end_counter) - { -- BdrvCoDrainData *data; -- -- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || -- (!begin && !bs->drv->bdrv_co_drain_end)) { -+ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || -+ (!begin && !bs->drv->bdrv_drain_end)) { - return; - } - -- data = g_new(BdrvCoDrainData, 1); -- *data = (BdrvCoDrainData) { -- .bs = bs, -- .done = false, -- .begin = begin, -- .drained_end_counter = drained_end_counter, -- }; -- -- if (!begin) { -- qatomic_inc(drained_end_counter); -+ if (begin) { -+ bs->drv->bdrv_drain_begin(bs); -+ } else { -+ bs->drv->bdrv_drain_end(bs); - } -- -- /* Make sure the driver callback completes during the polling phase for -- * drain_begin. */ -- bdrv_inc_in_flight(bs); -- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); -- aio_co_schedule(bdrv_get_aio_context(bs), data->co); - } - - /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -diff --git a/block/qed.c b/block/qed.c -index 013f826c44..c2691a85b1 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s) - assert(!s->allocating_write_reqs_plugged); - if (s->allocating_acb != NULL) { - /* Another allocating write came concurrently. This cannot happen -- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs. -+ * from bdrv_qed_drain_begin, but it can happen when the timer runs. - */ - qemu_co_mutex_unlock(&s->table_lock); - return false; -@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, - } - } - --static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) -+static void bdrv_qed_drain_begin(BlockDriverState *bs) - { - BDRVQEDState *s = bs->opaque; - -@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = { - .bdrv_co_check = bdrv_qed_co_check, - .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, - .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, -- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin, -+ .bdrv_drain_begin = bdrv_qed_drain_begin, - }; - - static void bdrv_qed_init(void) -diff --git a/block/throttle.c b/block/throttle.c -index 131eba3ab4..88851c84f4 100644 ---- a/block/throttle.c -+++ b/block/throttle.c -@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state) - reopen_state->opaque = NULL; - } - --static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) -+static void throttle_drain_begin(BlockDriverState *bs) - { - ThrottleGroupMember *tgm = bs->opaque; - if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { -@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) - } - } - --static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs) -+static void throttle_drain_end(BlockDriverState *bs) - { - ThrottleGroupMember *tgm = bs->opaque; - assert(tgm->io_limits_disabled); -@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = { - .bdrv_reopen_commit = throttle_reopen_commit, - .bdrv_reopen_abort = throttle_reopen_abort, - -- .bdrv_co_drain_begin = throttle_co_drain_begin, -- .bdrv_co_drain_end = throttle_co_drain_end, -+ .bdrv_drain_begin = throttle_drain_begin, -+ .bdrv_drain_end = throttle_drain_end, - - .is_filter = true, - .strong_runtime_opts = throttle_strong_runtime_opts, -diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h -index 31ae91e56e..40d646d1ed 100644 ---- a/include/block/block_int-common.h -+++ b/include/block/block_int-common.h -@@ -735,17 +735,19 @@ struct BlockDriver { - void (*bdrv_io_unplug)(BlockDriverState *bs); - - /** -- * bdrv_co_drain_begin is called if implemented in the beginning of a -+ * bdrv_drain_begin is called if implemented in the beginning of a - * drain operation to drain and stop any internal sources of requests in - * the driver. -- * bdrv_co_drain_end is called if implemented at the end of the drain. -+ * bdrv_drain_end is called if implemented at the end of the drain. - * - * They should be used by the driver to e.g. manage scheduled I/O - * requests, or toggle an internal state. After the end of the drain new - * requests will continue normally. -+ * -+ * Implementations of both functions must not call aio_poll(). - */ -- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); -- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); -+ void (*bdrv_drain_begin)(BlockDriverState *bs); -+ void (*bdrv_drain_end)(BlockDriverState *bs); - - bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); - bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 24f34e24ad..695519ee02 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque) - bdrv_dec_in_flight(bs); - } - --static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) -+static void bdrv_test_drain_begin(BlockDriverState *bs) - { - BDRVTestState *s = bs->opaque; - s->drain_count++; -@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) - } - } - --static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) -+static void bdrv_test_drain_end(BlockDriverState *bs) - { - BDRVTestState *s = bs->opaque; - s->drain_count--; -@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = { - .bdrv_close = bdrv_test_close, - .bdrv_co_preadv = bdrv_test_co_preadv, - -- .bdrv_co_drain_begin = bdrv_test_co_drain_begin, -- .bdrv_co_drain_end = bdrv_test_co_drain_end, -+ .bdrv_drain_begin = bdrv_test_drain_begin, -+ .bdrv_drain_end = bdrv_test_drain_end, - - .bdrv_child_perm = bdrv_default_perms, - -@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void) - bdrv_drained_begin(bs_child); - g_assert(!job_has_completed); - bdrv_drained_end(bs_child); -+ aio_poll(qemu_get_aio_context(), false); - g_assert(job_has_completed); - - bdrv_unref(bs_parents[0]); -@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void) - - g_assert(!job_has_completed); - ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); -+ aio_poll(qemu_get_aio_context(), false); - g_assert(ret == 0); - g_assert(job_has_completed); - -@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) - * .was_drained. - * Increment .drain_count. - */ --static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) -+static void bdrv_replace_test_drain_begin(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) - * If .drain_count reaches 0 and the node has a backing file, issue a - * read request. - */ --static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) -+static void bdrv_replace_test_drain_end(BlockDriverState *bs) - { - BDRVReplaceTestState *s = bs->opaque; - -@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = { - .bdrv_close = bdrv_replace_test_close, - .bdrv_co_preadv = bdrv_replace_test_co_preadv, - -- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, -- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, -+ .bdrv_drain_begin = bdrv_replace_test_drain_begin, -+ .bdrv_drain_end = bdrv_replace_test_drain_end, - - .bdrv_child_perm = bdrv_default_perms, - }; --- -2.31.1 - diff --git a/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch b/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch deleted file mode 100644 index 2d95689..0000000 --- a/kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch +++ /dev/null @@ -1,246 +0,0 @@ -From 54e290df4bc1c9e83be7357caed6a2b1ba4f21f0 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:56 +0200 -Subject: [PATCH 09/20] block: Split BlockNodeInfo off of ImageInfo - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [4/12] fc8d69d549bb9a929db218b91697ee3ae95c1ff6 (hreitz/qemu-kvm-c-9-s) - -ImageInfo sometimes contains flat information, and sometimes it does -not. Split off a BlockNodeInfo struct, which only contains information -about a single node and has no link to the backing image. - -We do this so we can extend BlockNodeInfo to a BlockGraphInfo struct, -which has links to all child nodes, not just the backing node. It would -be strange to base BlockGraphInfo on ImageInfo, because then this -extended struct would have two links to the backing node (one in -BlockGraphInfo as one of all the child links, and one in ImageInfo). - -Furthermore, it is quite common to ignore the backing-image field -altogether: bdrv_query_image_info() does not set it, and -bdrv_image_info_dump() does not evaluate it. That signals that we -should have different structs for describing a single node and one that -has a link to the backing image. - -Still, bdrv_query_image_info() and bdrv_image_info_dump() are not -changed too much in this patch. Follow-up patches will handle them. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-5-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit a2085f8909377b6df738f6c3f7ee6db4d16da8f7) -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 86 ++++++++++++++++++++++++++++++++------------ - include/block/qapi.h | 3 ++ - qapi/block-core.json | 24 +++++++++---- - 3 files changed, 85 insertions(+), 28 deletions(-) - -diff --git a/block/qapi.c b/block/qapi.c -index 51202b470a..e5022b4481 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -241,30 +241,18 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs, - } - - /** -- * bdrv_query_image_info: -- * @bs: block device to examine -- * @p_info: location to store image information -- * @errp: location to store error information -- * -- * Store "flat" image information in @p_info. -- * -- * "Flat" means it does *not* query backing image information, -- * i.e. (*pinfo)->has_backing_image will be set to false and -- * (*pinfo)->backing_image to NULL even when the image does in fact have -- * a backing image. -- * -- * @p_info will be set only on success. On error, store error in @errp. -+ * Helper function for other query info functions. Store information about @bs -+ * in @info, setting @errp on error. - */ --void bdrv_query_image_info(BlockDriverState *bs, -- ImageInfo **p_info, -- Error **errp) -+static void bdrv_do_query_node_info(BlockDriverState *bs, -+ BlockNodeInfo *info, -+ Error **errp) - { - int64_t size; - const char *backing_filename; - BlockDriverInfo bdi; - int ret; - Error *err = NULL; -- ImageInfo *info; - - aio_context_acquire(bdrv_get_aio_context(bs)); - -@@ -277,7 +265,6 @@ void bdrv_query_image_info(BlockDriverState *bs, - - bdrv_refresh_filename(bs); - -- info = g_new0(ImageInfo, 1); - info->filename = g_strdup(bs->filename); - info->format = g_strdup(bdrv_get_format_name(bs)); - info->virtual_size = size; -@@ -298,7 +285,6 @@ void bdrv_query_image_info(BlockDriverState *bs, - info->format_specific = bdrv_get_specific_info(bs, &err); - if (err) { - error_propagate(errp, err); -- qapi_free_ImageInfo(info); - goto out; - } - info->has_format_specific = info->format_specific != NULL; -@@ -339,16 +325,72 @@ void bdrv_query_image_info(BlockDriverState *bs, - break; - default: - error_propagate(errp, err); -- qapi_free_ImageInfo(info); - goto out; - } - -- *p_info = info; -- - out: - aio_context_release(bdrv_get_aio_context(bs)); - } - -+/** -+ * bdrv_query_block_node_info: -+ * @bs: block node to examine -+ * @p_info: location to store node information -+ * @errp: location to store error information -+ * -+ * Store image information about @bs in @p_info. -+ * -+ * @p_info will be set only on success. On error, store error in @errp. -+ */ -+void bdrv_query_block_node_info(BlockDriverState *bs, -+ BlockNodeInfo **p_info, -+ Error **errp) -+{ -+ BlockNodeInfo *info; -+ ERRP_GUARD(); -+ -+ info = g_new0(BlockNodeInfo, 1); -+ bdrv_do_query_node_info(bs, info, errp); -+ if (*errp) { -+ qapi_free_BlockNodeInfo(info); -+ return; -+ } -+ -+ *p_info = info; -+} -+ -+/** -+ * bdrv_query_image_info: -+ * @bs: block node to examine -+ * @p_info: location to store image information -+ * @errp: location to store error information -+ * -+ * Store "flat" image information in @p_info. -+ * -+ * "Flat" means it does *not* query backing image information, -+ * i.e. (*pinfo)->has_backing_image will be set to false and -+ * (*pinfo)->backing_image to NULL even when the image does in fact have -+ * a backing image. -+ * -+ * @p_info will be set only on success. On error, store error in @errp. -+ */ -+void bdrv_query_image_info(BlockDriverState *bs, -+ ImageInfo **p_info, -+ Error **errp) -+{ -+ ImageInfo *info; -+ ERRP_GUARD(); -+ -+ info = g_new0(ImageInfo, 1); -+ bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); -+ if (*errp) { -+ qapi_free_ImageInfo(info); -+ return; -+ } -+ -+ *p_info = info; -+} -+ - /* @p_info will be set only on success. */ - static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, - Error **errp) -diff --git a/include/block/qapi.h b/include/block/qapi.h -index c09859ea78..c7de4e3fa9 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -35,6 +35,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - int bdrv_query_snapshot_info_list(BlockDriverState *bs, - SnapshotInfoList **p_list, - Error **errp); -+void bdrv_query_block_node_info(BlockDriverState *bs, -+ BlockNodeInfo **p_info, -+ Error **errp); - void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, - Error **errp); -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 4b9365167f..7720da0498 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -251,7 +251,7 @@ - } } - - ## --# @ImageInfo: -+# @BlockNodeInfo: - # - # Information about a QEMU image file - # -@@ -279,22 +279,34 @@ - # - # @snapshots: list of VM snapshots - # --# @backing-image: info of the backing image (since 1.6) --# - # @format-specific: structure supplying additional format-specific - # information (since 1.7) - # --# Since: 1.3 -+# Since: 8.0 - ## --{ 'struct': 'ImageInfo', -+{ 'struct': 'BlockNodeInfo', - 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', - '*actual-size': 'int', 'virtual-size': 'int', - '*cluster-size': 'int', '*encrypted': 'bool', '*compressed': 'bool', - '*backing-filename': 'str', '*full-backing-filename': 'str', - '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], -- '*backing-image': 'ImageInfo', - '*format-specific': 'ImageInfoSpecific' } } - -+## -+# @ImageInfo: -+# -+# Information about a QEMU image file, and potentially its backing image -+# -+# @backing-image: info of the backing image -+# -+# Since: 1.3 -+## -+{ 'struct': 'ImageInfo', -+ 'base': 'BlockNodeInfo', -+ 'data': { -+ '*backing-image': 'ImageInfo' -+ } } -+ - ## - # @ImageCheck: - # --- -2.31.1 - diff --git a/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch b/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch deleted file mode 100644 index 19d52b5..0000000 --- a/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch +++ /dev/null @@ -1,70 +0,0 @@ -From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001 -From: Vladimir Sementsov-Ogievskiy -Date: Mon, 7 Nov 2022 19:35:56 +0300 -Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s) - -Drop this simple wrapper used only in one place. We have too many graph -modifying functions even without it. - -Signed-off-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264) -Signed-off-by: Stefano Garzarella ---- - block.c | 15 +-------------- - 1 file changed, 1 insertion(+), 14 deletions(-) - -diff --git a/block.c b/block.c -index a18f052374..ec184150a2 100644 ---- a/block.c -+++ b/block.c -@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, - static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs); - static void bdrv_remove_child(BdrvChild *child, Transaction *tran); --static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, -- Transaction *tran); - - static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, -@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) - tran_add(tran, &bdrv_remove_child_drv, child); - } - --/* -- * A function to remove backing-chain child of @bs if exists: cow child for -- * format nodes (always .backing) and filter child for filters (may be .file or -- * .backing) -- */ --static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, -- Transaction *tran) --{ -- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); --} -- - static int bdrv_replace_node_noperm(BlockDriverState *from, - BlockDriverState *to, - bool auto_skip, Transaction *tran, -@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, - } - - if (detach_subchain) { -- bdrv_remove_filter_or_cow_child(to_cow_parent, tran); -+ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); - } - - found = g_hash_table_new(NULL, NULL); --- -2.31.1 - diff --git a/kvm-block-file-Add-file-specific-image-info.patch b/kvm-block-file-Add-file-specific-image-info.patch deleted file mode 100644 index a81b6b0..0000000 --- a/kvm-block-file-Add-file-specific-image-info.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 4af86458d6bea2a6e15fd57d4d4bbe88e35f7e72 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:54 +0200 -Subject: [PATCH 07/20] block/file: Add file-specific image info - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [2/12] d8cc351d6c16c41b2000e41dc555f13093a9edce (hreitz/qemu-kvm-c-9-s) - -Add some (optional) information that the file driver can provide for -image files, namely the extent size hint. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-3-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 7f36a50ab4e7d39369cac67be4ba9d6ee4081dc0) -Signed-off-by: Hanna Czenczek ---- - block/file-posix.c | 30 ++++++++++++++++++++++++++++++ - qapi/block-core.json | 26 ++++++++++++++++++++++++-- - 2 files changed, 54 insertions(+), 2 deletions(-) - -diff --git a/block/file-posix.c b/block/file-posix.c -index b9647c5ffc..df3da79aed 100644 ---- a/block/file-posix.c -+++ b/block/file-posix.c -@@ -3095,6 +3095,34 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) - return 0; - } - -+static ImageInfoSpecific *raw_get_specific_info(BlockDriverState *bs, -+ Error **errp) -+{ -+ ImageInfoSpecificFile *file_info = g_new0(ImageInfoSpecificFile, 1); -+ ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); -+ -+ *spec_info = (ImageInfoSpecific){ -+ .type = IMAGE_INFO_SPECIFIC_KIND_FILE, -+ .u.file.data = file_info, -+ }; -+ -+#ifdef FS_IOC_FSGETXATTR -+ { -+ BDRVRawState *s = bs->opaque; -+ struct fsxattr attr; -+ int ret; -+ -+ ret = ioctl(s->fd, FS_IOC_FSGETXATTR, &attr); -+ if (!ret && attr.fsx_extsize != 0) { -+ file_info->has_extent_size_hint = true; -+ file_info->extent_size_hint = attr.fsx_extsize; -+ } -+ } -+#endif -+ -+ return spec_info; -+} -+ - static BlockStatsSpecificFile get_blockstats_specific_file(BlockDriverState *bs) - { - BDRVRawState *s = bs->opaque; -@@ -3328,6 +3356,7 @@ BlockDriver bdrv_file = { - .bdrv_co_truncate = raw_co_truncate, - .bdrv_getlength = raw_getlength, - .bdrv_get_info = raw_get_info, -+ .bdrv_get_specific_info = raw_get_specific_info, - .bdrv_get_allocated_file_size - = raw_get_allocated_file_size, - .bdrv_get_specific_stats = raw_get_specific_stats, -@@ -3700,6 +3729,7 @@ static BlockDriver bdrv_host_device = { - .bdrv_co_truncate = raw_co_truncate, - .bdrv_getlength = raw_getlength, - .bdrv_get_info = raw_get_info, -+ .bdrv_get_specific_info = raw_get_specific_info, - .bdrv_get_allocated_file_size - = raw_get_allocated_file_size, - .bdrv_get_specific_stats = hdev_get_specific_stats, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 95ac4fa634..f5d822cbd6 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -139,16 +139,29 @@ - '*encryption-format': 'RbdImageEncryptionFormat' - } } - -+## -+# @ImageInfoSpecificFile: -+# -+# @extent-size-hint: Extent size hint (if available) -+# -+# Since: 8.0 -+## -+{ 'struct': 'ImageInfoSpecificFile', -+ 'data': { -+ '*extent-size-hint': 'size' -+ } } -+ - ## - # @ImageInfoSpecificKind: - # - # @luks: Since 2.7 - # @rbd: Since 6.1 -+# @file: Since 8.0 - # - # Since: 1.7 - ## - { 'enum': 'ImageInfoSpecificKind', -- 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd' ] } -+ 'data': [ 'qcow2', 'vmdk', 'luks', 'rbd', 'file' ] } - - ## - # @ImageInfoSpecificQCow2Wrapper: -@@ -185,6 +198,14 @@ - { 'struct': 'ImageInfoSpecificRbdWrapper', - 'data': { 'data': 'ImageInfoSpecificRbd' } } - -+## -+# @ImageInfoSpecificFileWrapper: -+# -+# Since: 8.0 -+## -+{ 'struct': 'ImageInfoSpecificFileWrapper', -+ 'data': { 'data': 'ImageInfoSpecificFile' } } -+ - ## - # @ImageInfoSpecific: - # -@@ -199,7 +220,8 @@ - 'qcow2': 'ImageInfoSpecificQCow2Wrapper', - 'vmdk': 'ImageInfoSpecificVmdkWrapper', - 'luks': 'ImageInfoSpecificLUKSWrapper', -- 'rbd': 'ImageInfoSpecificRbdWrapper' -+ 'rbd': 'ImageInfoSpecificRbdWrapper', -+ 'file': 'ImageInfoSpecificFileWrapper' - } } - - ## --- -2.31.1 - diff --git a/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch b/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch deleted file mode 100644 index 62979ef..0000000 --- a/kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch +++ /dev/null @@ -1,206 +0,0 @@ -From c8c282c2e1d74cfc5de6527f7e20dfc3e76b67ac Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:00 +0200 -Subject: [PATCH 13/20] block/qapi: Add indentation to bdrv_node_info_dump() - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [8/12] d3a697e81ab9828457198075e5815a592363c725 (hreitz/qemu-kvm-c-9-s) - -In order to let qemu-img info present a block graph, add a parameter to -bdrv_node_info_dump() and bdrv_image_info_specific_dump() so that the -information of nodes below the root level can be given an indentation. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-9-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 76c9e9750d1bd580e8ed4465f6be3a986434e7c3) -Signed-off-by: Hanna Czenczek ---- - block/monitor/block-hmp-cmds.c | 2 +- - block/qapi.c | 47 +++++++++++++++++++--------------- - include/block/qapi.h | 5 ++-- - qemu-img.c | 2 +- - qemu-io-cmds.c | 3 ++- - 5 files changed, 34 insertions(+), 25 deletions(-) - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index aa37faa601..72824d4e2e 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, - monitor_printf(mon, "\nImages:\n"); - image_info = inserted->image; - while (1) { -- bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); -+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); - if (image_info->has_backing_image) { - image_info = image_info->backing_image; - } else { -diff --git a/block/qapi.c b/block/qapi.c -index f208c21ccf..3e35603f0c 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -915,7 +915,8 @@ static bool qobject_is_empty_dump(const QObject *obj) - * prepending an optional prefix if the dump is not empty. - */ - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -- const char *prefix) -+ const char *prefix, -+ int indentation) - { - QObject *obj, *data; - Visitor *v = qobject_output_visitor_new(&obj); -@@ -925,48 +926,51 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - data = qdict_get(qobject_to(QDict, obj), "data"); - if (!qobject_is_empty_dump(data)) { - if (prefix) { -- qemu_printf("%s", prefix); -+ qemu_printf("%*s%s", indentation * 4, "", prefix); - } -- dump_qobject(1, data); -+ dump_qobject(indentation + 1, data); - } - qobject_unref(obj); - visit_free(v); - } - --void bdrv_node_info_dump(BlockNodeInfo *info) -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) - { - char *size_buf, *dsize_buf; -+ g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); -+ - if (!info->has_actual_size) { - dsize_buf = g_strdup("unavailable"); - } else { - dsize_buf = size_to_str(info->actual_size); - } - size_buf = size_to_str(info->virtual_size); -- qemu_printf("image: %s\n" -- "file format: %s\n" -- "virtual size: %s (%" PRId64 " bytes)\n" -- "disk size: %s\n", -- info->filename, info->format, size_buf, -- info->virtual_size, -- dsize_buf); -+ qemu_printf("%simage: %s\n" -+ "%sfile format: %s\n" -+ "%svirtual size: %s (%" PRId64 " bytes)\n" -+ "%sdisk size: %s\n", -+ ind_s, info->filename, -+ ind_s, info->format, -+ ind_s, size_buf, info->virtual_size, -+ ind_s, dsize_buf); - g_free(size_buf); - g_free(dsize_buf); - - if (info->has_encrypted && info->encrypted) { -- qemu_printf("encrypted: yes\n"); -+ qemu_printf("%sencrypted: yes\n", ind_s); - } - - if (info->has_cluster_size) { -- qemu_printf("cluster_size: %" PRId64 "\n", -- info->cluster_size); -+ qemu_printf("%scluster_size: %" PRId64 "\n", -+ ind_s, info->cluster_size); - } - - if (info->has_dirty_flag && info->dirty_flag) { -- qemu_printf("cleanly shut down: no\n"); -+ qemu_printf("%scleanly shut down: no\n", ind_s); - } - - if (info->has_backing_filename) { -- qemu_printf("backing file: %s", info->backing_filename); -+ qemu_printf("%sbacking file: %s", ind_s, info->backing_filename); - if (!info->has_full_backing_filename) { - qemu_printf(" (cannot determine actual path)"); - } else if (strcmp(info->backing_filename, -@@ -975,15 +979,16 @@ void bdrv_node_info_dump(BlockNodeInfo *info) - } - qemu_printf("\n"); - if (info->has_backing_filename_format) { -- qemu_printf("backing file format: %s\n", -- info->backing_filename_format); -+ qemu_printf("%sbacking file format: %s\n", -+ ind_s, info->backing_filename_format); - } - } - - if (info->has_snapshots) { - SnapshotInfoList *elem; - -- qemu_printf("Snapshot list:\n"); -+ qemu_printf("%sSnapshot list:\n", ind_s); -+ qemu_printf("%s", ind_s); - bdrv_snapshot_dump(NULL); - qemu_printf("\n"); - -@@ -1003,6 +1008,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) - - pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id); - pstrcpy(sn.name, sizeof(sn.name), elem->value->name); -+ qemu_printf("%s", ind_s); - bdrv_snapshot_dump(&sn); - qemu_printf("\n"); - } -@@ -1010,6 +1016,7 @@ void bdrv_node_info_dump(BlockNodeInfo *info) - - if (info->has_format_specific) { - bdrv_image_info_specific_dump(info->format_specific, -- "Format specific information:\n"); -+ "Format specific information:\n", -+ indentation); - } - } -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 196436020e..38855f2ae9 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -49,6 +49,7 @@ void bdrv_query_block_graph_info(BlockDriverState *bs, - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -- const char *prefix); --void bdrv_node_info_dump(BlockNodeInfo *info); -+ const char *prefix, -+ int indentation); -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); - #endif -diff --git a/qemu-img.c b/qemu-img.c -index 3b2ca3bbcb..30b4ea58bb 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) - } - delim = true; - -- bdrv_node_info_dump(elem->value); -+ bdrv_node_info_dump(elem->value, 0); - } - } - -diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c -index f4a374528e..fdcb89211b 100644 ---- a/qemu-io-cmds.c -+++ b/qemu-io-cmds.c -@@ -1826,7 +1826,8 @@ static int info_f(BlockBackend *blk, int argc, char **argv) - } - if (spec_info) { - bdrv_image_info_specific_dump(spec_info, -- "Format specific information:\n"); -+ "Format specific information:\n", -+ 0); - qapi_free_ImageInfoSpecific(spec_info); - } - --- -2.31.1 - diff --git a/kvm-block-qapi-Introduce-BlockGraphInfo.patch b/kvm-block-qapi-Introduce-BlockGraphInfo.patch deleted file mode 100644 index e9a1622..0000000 --- a/kvm-block-qapi-Introduce-BlockGraphInfo.patch +++ /dev/null @@ -1,155 +0,0 @@ -From 0044e3848b02ef6edba5961d1f4b6297d137d207 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:59 +0200 -Subject: [PATCH 12/20] block/qapi: Introduce BlockGraphInfo - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [7/12] de47bac372cd552b812c774a2f35f95923af74ff (hreitz/qemu-kvm-c-9-s) - -Introduce a new QAPI type BlockGraphInfo and an associated -bdrv_query_block_graph_info() function that recursively gathers -BlockNodeInfo objects through a block graph. - -A follow-up patch is going to make "qemu-img info" use this to print -information about all nodes that are (usually implicitly) opened for a -given image file. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-8-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 6cab33997b91eb86e82a6a2ae58a24f835249d4a) -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ - include/block/qapi.h | 3 +++ - qapi/block-core.json | 35 ++++++++++++++++++++++++++++++++ - 3 files changed, 86 insertions(+) - -diff --git a/block/qapi.c b/block/qapi.c -index 5d0a8d2ce3..f208c21ccf 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -411,6 +411,54 @@ fail: - qapi_free_ImageInfo(info); - } - -+/** -+ * bdrv_query_block_graph_info: -+ * @bs: root node to start from -+ * @p_info: location to store image information -+ * @errp: location to store error information -+ * -+ * Store image information about the graph starting from @bs in @p_info. -+ * -+ * @p_info will be set only on success. On error, store error in @errp. -+ */ -+void bdrv_query_block_graph_info(BlockDriverState *bs, -+ BlockGraphInfo **p_info, -+ Error **errp) -+{ -+ BlockGraphInfo *info; -+ BlockChildInfoList **children_list_tail; -+ BdrvChild *c; -+ ERRP_GUARD(); -+ -+ info = g_new0(BlockGraphInfo, 1); -+ bdrv_do_query_node_info(bs, qapi_BlockGraphInfo_base(info), errp); -+ if (*errp) { -+ goto fail; -+ } -+ -+ children_list_tail = &info->children; -+ -+ QLIST_FOREACH(c, &bs->children, next) { -+ BlockChildInfo *c_info; -+ -+ c_info = g_new0(BlockChildInfo, 1); -+ QAPI_LIST_APPEND(children_list_tail, c_info); -+ -+ c_info->name = g_strdup(c->name); -+ bdrv_query_block_graph_info(c->bs, &c_info->info, errp); -+ if (*errp) { -+ goto fail; -+ } -+ } -+ -+ *p_info = info; -+ return; -+ -+fail: -+ assert(*errp != NULL); -+ qapi_free_BlockGraphInfo(info); -+} -+ - /* @p_info will be set only on success. */ - static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, - Error **errp) -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 2174bf8fa2..196436020e 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -43,6 +43,9 @@ void bdrv_query_image_info(BlockDriverState *bs, - bool flat, - bool skip_implicit_filters, - Error **errp); -+void bdrv_query_block_graph_info(BlockDriverState *bs, -+ BlockGraphInfo **p_info, -+ Error **errp); - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 4cf2deeb6c..d703e0fb16 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -307,6 +307,41 @@ - '*backing-image': 'ImageInfo' - } } - -+## -+# @BlockChildInfo: -+# -+# Information about all nodes in the block graph starting at some node, -+# annotated with information about that node in relation to its parent. -+# -+# @name: Child name of the root node in the BlockGraphInfo struct, in its role -+# as the child of some undescribed parent node -+# -+# @info: Block graph information starting at this node -+# -+# Since: 8.0 -+## -+{ 'struct': 'BlockChildInfo', -+ 'data': { -+ 'name': 'str', -+ 'info': 'BlockGraphInfo' -+ } } -+ -+## -+# @BlockGraphInfo: -+# -+# Information about all nodes in a block (sub)graph in the form of BlockNodeInfo -+# data. -+# The base BlockNodeInfo struct contains the information for the (sub)graph's -+# root node. -+# -+# @children: Array of links to this node's child nodes' information -+# -+# Since: 8.0 -+## -+{ 'struct': 'BlockGraphInfo', -+ 'base': 'BlockNodeInfo', -+ 'data': { 'children': ['BlockChildInfo'] } } -+ - ## - # @ImageCheck: - # --- -2.31.1 - diff --git a/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch b/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch deleted file mode 100644 index e5c012a..0000000 --- a/kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch +++ /dev/null @@ -1,197 +0,0 @@ -From ae2c3df00d673d436fe4d8ec9103a3b76d7e6233 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:58 +0200 -Subject: [PATCH 11/20] block/qapi: Let bdrv_query_image_info() recurse - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [6/12] 451a83fd682cd6dd6026c22974d18c2f12ee06e3 (hreitz/qemu-kvm-c-9-s) - -There is no real reason why bdrv_query_image_info() should generally not -recurse. The ImageInfo struct has a pointer to the backing image, so it -should generally be filled, unless the caller explicitly opts out. - -This moves the recursing code from bdrv_block_device_info() into -bdrv_query_image_info(). - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-7-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 5d8813593f3f673fc96eed199beb35690cc46f58) - -Conflicts: - block/qapi.c: Conflicts with - 54fde4ff0621c22b15cbaaa3c74301cc0dbd1c9e ("qapi block: Elide - redundant has_FOO in generated C"), which dropped - `has_backing_image`. Without that commit (and 44ea9d9be before it), - we still need to set `has_backing_image` in - `bdrv_query_image_info()`. - -Signed-off-by: Hanna Czenczek ---- - block/qapi.c | 94 +++++++++++++++++++++++++++----------------- - include/block/qapi.h | 2 + - 2 files changed, 59 insertions(+), 37 deletions(-) - -diff --git a/block/qapi.c b/block/qapi.c -index ad88bf9b38..5d0a8d2ce3 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -47,8 +47,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - Error **errp) - { - ImageInfo **p_image_info; -+ ImageInfo *backing_info; - BlockDriverState *bs0, *backing; - BlockDeviceInfo *info; -+ ERRP_GUARD(); - - if (!bs->drv) { - error_setg(errp, "Block device %s is ejected", bs->node_name); -@@ -149,38 +151,21 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - bs0 = bs; - p_image_info = &info->image; - info->backing_file_depth = 0; -- while (1) { -- Error *local_err = NULL; -- bdrv_query_image_info(bs0, p_image_info, &local_err); -- if (local_err) { -- error_propagate(errp, local_err); -- qapi_free_BlockDeviceInfo(info); -- return NULL; -- } -- -- /* stop gathering data for flat output */ -- if (flat) { -- break; -- } - -- if (bs0->drv && bdrv_filter_or_cow_child(bs0)) { -- /* -- * Put any filtered child here (for backwards compatibility to when -- * we put bs0->backing here, which might be any filtered child). -- */ -- info->backing_file_depth++; -- bs0 = bdrv_filter_or_cow_bs(bs0); -- (*p_image_info)->has_backing_image = true; -- p_image_info = &((*p_image_info)->backing_image); -- } else { -- break; -- } -+ /* -+ * Skip automatically inserted nodes that the user isn't aware of for -+ * query-block (blk != NULL), but not for query-named-block-nodes -+ */ -+ bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp); -+ if (*errp) { -+ qapi_free_BlockDeviceInfo(info); -+ return NULL; -+ } - -- /* Skip automatically inserted nodes that the user isn't aware of for -- * query-block (blk != NULL), but not for query-named-block-nodes */ -- if (blk) { -- bs0 = bdrv_skip_implicit_filters(bs0); -- } -+ backing_info = info->image->backing_image; -+ while (backing_info) { -+ info->backing_file_depth++; -+ backing_info = backing_info->backing_image; - } - - return info; -@@ -363,19 +348,28 @@ void bdrv_query_block_node_info(BlockDriverState *bs, - * bdrv_query_image_info: - * @bs: block node to examine - * @p_info: location to store image information -+ * @flat: skip backing node information -+ * @skip_implicit_filters: skip implicit filters in the backing chain - * @errp: location to store error information - * -- * Store "flat" image information in @p_info. -+ * Store image information in @p_info, potentially recursively covering the -+ * backing chain. - * -- * "Flat" means it does *not* query backing image information, -- * i.e. (*pinfo)->has_backing_image will be set to false and -- * (*pinfo)->backing_image to NULL even when the image does in fact have -- * a backing image. -+ * If @flat is true, do not query backing image information, i.e. -+ * (*p_info)->has_backing_image will be set to false and -+ * (*p_info)->backing_image to NULL even when the image does in fact have a -+ * backing image. -+ * -+ * If @skip_implicit_filters is true, implicit filter nodes in the backing chain -+ * will be skipped when querying backing image information. -+ * (@skip_implicit_filters is ignored when @flat is true.) - * - * @p_info will be set only on success. On error, store error in @errp. - */ - void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, -+ bool flat, -+ bool skip_implicit_filters, - Error **errp) - { - ImageInfo *info; -@@ -384,11 +378,37 @@ void bdrv_query_image_info(BlockDriverState *bs, - info = g_new0(ImageInfo, 1); - bdrv_do_query_node_info(bs, qapi_ImageInfo_base(info), errp); - if (*errp) { -- qapi_free_ImageInfo(info); -- return; -+ goto fail; -+ } -+ -+ if (!flat) { -+ BlockDriverState *backing; -+ -+ /* -+ * Use any filtered child here (for backwards compatibility to when -+ * we always took bs->backing, which might be any filtered child). -+ */ -+ backing = bdrv_filter_or_cow_bs(bs); -+ if (skip_implicit_filters) { -+ backing = bdrv_skip_implicit_filters(backing); -+ } -+ -+ if (backing) { -+ bdrv_query_image_info(backing, &info->backing_image, false, -+ skip_implicit_filters, errp); -+ if (*errp) { -+ goto fail; -+ } -+ info->has_backing_image = true; -+ } - } - - *p_info = info; -+ return; -+ -+fail: -+ assert(*errp); -+ qapi_free_ImageInfo(info); - } - - /* @p_info will be set only on success. */ -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 22198dcd0c..2174bf8fa2 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -40,6 +40,8 @@ void bdrv_query_block_node_info(BlockDriverState *bs, - Error **errp); - void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, -+ bool flat, -+ bool skip_implicit_filters, - Error **errp); - - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); --- -2.31.1 - diff --git a/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch b/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch deleted file mode 100644 index 8d5a20a..0000000 --- a/kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch +++ /dev/null @@ -1,99 +0,0 @@ -From b952c8f1da6f8597736c0e040565830139369359 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 14 Feb 2023 18:16:21 +0100 -Subject: [PATCH] block: temporarily hold the new AioContext of bs_top in - bdrv_append() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 153: block: temporarily hold the new AioContext of bs_top in bdrv_append() -RH-Bugzilla: 2168209 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf -RH-Commit: [1/1] 5b190426d996e8c9f7a781bd97aee8d25756dbd3 (sgarzarella/qemu-kvm-c-9-s) - -bdrv_append() is called with bs_top AioContext held, but -bdrv_attach_child_noperm() could change the AioContext of bs_top. - -bdrv_replace_node_noperm() calls bdrv_drained_begin() starting from -commit 2398747128 ("block: Don't poll in bdrv_replace_child_noperm()"). -bdrv_drained_begin() can call BDRV_POLL_WHILE that assumes the new lock -is taken, so let's temporarily hold the new AioContext to prevent QEMU -from failing in BDRV_POLL_WHILE when it tries to release the wrong -AioContext. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2168209 -Reported-by: Aihua Liang -Signed-off-by: Stefano Garzarella -Message-Id: <20230214171621.11574-1-sgarzare@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 60d90bf43c169b9d1dbcb17ed794b7b02c6862b1) -Signed-off-by: Stefano Garzarella ---- - block.c | 23 +++++++++++++++++++++++ - 1 file changed, 23 insertions(+) - -diff --git a/block.c b/block.c -index 0d78711416..9e1dcb9e47 100644 ---- a/block.c -+++ b/block.c -@@ -5275,6 +5275,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) - * child. - * - * This function does not create any image files. -+ * -+ * The caller must hold the AioContext lock for @bs_top. - */ - int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - Error **errp) -@@ -5282,11 +5284,14 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - int ret; - BdrvChild *child; - Transaction *tran = tran_new(); -+ AioContext *old_context, *new_context = NULL; - - GLOBAL_STATE_CODE(); - - assert(!bs_new->backing); - -+ old_context = bdrv_get_aio_context(bs_top); -+ - child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", - &child_of_bds, bdrv_backing_role(bs_new), - tran, errp); -@@ -5295,6 +5300,19 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, - goto out; - } - -+ /* -+ * bdrv_attach_child_noperm could change the AioContext of bs_top. -+ * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily -+ * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE -+ * that assumes the new lock is taken. -+ */ -+ new_context = bdrv_get_aio_context(bs_top); -+ -+ if (old_context != new_context) { -+ aio_context_release(old_context); -+ aio_context_acquire(new_context); -+ } -+ - ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); - if (ret < 0) { - goto out; -@@ -5306,6 +5324,11 @@ out: - - bdrv_refresh_limits(bs_top, NULL, NULL); - -+ if (new_context && old_context != new_context) { -+ aio_context_release(new_context); -+ aio_context_acquire(old_context); -+ } -+ - return ret; - } - --- -2.31.1 - diff --git a/kvm-block-vmdk-Change-extent-info-type.patch b/kvm-block-vmdk-Change-extent-info-type.patch deleted file mode 100644 index 6b8f6a7..0000000 --- a/kvm-block-vmdk-Change-extent-info-type.patch +++ /dev/null @@ -1,140 +0,0 @@ -From d8caed018afb0f60f449e971398d2a8d6c2992e7 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:55 +0200 -Subject: [PATCH 08/20] block/vmdk: Change extent info type - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [3/12] efe50a2797c679ce6bb5faa423047461a34e6792 (hreitz/qemu-kvm-c-9-s) - -VMDK's implementation of .bdrv_get_specific_info() returns information -about its extent files, ostensibly in the form of ImageInfo objects. -However, it does not get this information through -bdrv_query_image_info(), but fills only a select few fields with custom -information that does not always match the fields' purposes. - -For example, @format, which is supposed to be a block driver name, is -filled with the extent type, e.g. SPARSE or FLAT. - -In ImageInfo, @compressed shows whether the data that can be seen in the -image is stored in compressed form or not. For example, a compressed -qcow2 image will store compressed data in its data file, but when -accessing the qcow2 node, you will see normal data. This is not how -VMDK uses the @compressed field for its extent files: Instead, it -signifies whether accessing the extent file will yield compressed data -(which the VMDK driver then (de-)compresses). - -Create a new structure to represent the extent information. This allows -us to clarify the fields' meanings, and it clearly shows that these are -not complete ImageInfo objects. (That is, if a user wants an extent -file's ImageInfo object, they will need to query it separately, and will -not get it from ImageInfoSpecificVmdk.extents.) - -Note that this removes the last use of ['ImageInfo'] (i.e. an array of -ImageInfo objects), so the QAPI generator will no longer generate -ImageInfoList by default. However, we use it in qemu-img.c, so we need -to create a dummy object to force the generate to create that type, -similarly to DummyForceArrays in machine.json (introduced in commit -9f08c8ec73878122ad4b061ed334f0437afaaa32 ("qapi: Lazy creation of array -types")). - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-4-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 456e75171a85c19a5bfa202eefcbdc4ef1692f05) -Signed-off-by: Hanna Czenczek ---- - block/vmdk.c | 8 ++++---- - qapi/block-core.json | 38 +++++++++++++++++++++++++++++++++++++- - 2 files changed, 41 insertions(+), 5 deletions(-) - -diff --git a/block/vmdk.c b/block/vmdk.c -index 26376352b9..4435b9880b 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2901,12 +2901,12 @@ static int vmdk_has_zero_init(BlockDriverState *bs) - return 1; - } - --static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) -+static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent) - { -- ImageInfo *info = g_new0(ImageInfo, 1); -+ VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1); - - bdrv_refresh_filename(extent->file->bs); -- *info = (ImageInfo){ -+ *info = (VmdkExtentInfo){ - .filename = g_strdup(extent->file->bs->filename), - .format = g_strdup(extent->type), - .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, -@@ -2985,7 +2985,7 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs, - int i; - BDRVVmdkState *s = bs->opaque; - ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1); -- ImageInfoList **tail; -+ VmdkExtentInfoList **tail; - - *spec_info = (ImageInfoSpecific){ - .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, -diff --git a/qapi/block-core.json b/qapi/block-core.json -index f5d822cbd6..4b9365167f 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -124,7 +124,33 @@ - 'create-type': 'str', - 'cid': 'int', - 'parent-cid': 'int', -- 'extents': ['ImageInfo'] -+ 'extents': ['VmdkExtentInfo'] -+ } } -+ -+## -+# @VmdkExtentInfo: -+# -+# Information about a VMDK extent file -+# -+# @filename: Name of the extent file -+# -+# @format: Extent type (e.g. FLAT or SPARSE) -+# -+# @virtual-size: Number of bytes covered by this extent -+# -+# @cluster-size: Cluster size in bytes (for non-flat extents) -+# -+# @compressed: Whether this extent contains compressed data -+# -+# Since: 8.0 -+## -+{ 'struct': 'VmdkExtentInfo', -+ 'data': { -+ 'filename': 'str', -+ 'format': 'str', -+ 'virtual-size': 'int', -+ '*cluster-size': 'int', -+ '*compressed': 'bool' - } } - - ## -@@ -5754,3 +5780,13 @@ - 'data': { 'device': 'str', '*id': 'str', '*name': 'str'}, - 'returns': 'SnapshotInfo', - 'allow-preconfig': true } -+ -+## -+# @DummyBlockCoreForceArrays: -+# -+# Not used by QMP; hack to let us use ImageInfoList internally -+# -+# Since: 8.0 -+## -+{ 'struct': 'DummyBlockCoreForceArrays', -+ 'data': { 'unused-image-info': ['ImageInfo'] } } --- -2.31.1 - diff --git a/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch b/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch deleted file mode 100644 index 1a3c139..0000000 --- a/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch +++ /dev/null @@ -1,127 +0,0 @@ -From b886411a682b56bfe674f0a35d40c67c8e9dc87a Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 21 Feb 2023 16:22:17 -0500 -Subject: [PATCH 02/12] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel() - race - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread -RH-Bugzilla: 2155748 -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Commit: [2/3] eeeea43c25d8f4fa84591b05547fb77e4058abff (stefanha/centos-stream-qemu-kvm) - -dma_blk_cb() only takes the AioContext lock around ->io_func(). That -means the rest of dma_blk_cb() is not protected. In particular, the -DMAAIOCB field accesses happen outside the lock. - -There is a race when the main loop thread holds the AioContext lock and -invokes scsi_device_purge_requests() -> bdrv_aio_cancel() -> -dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb -field determines how cancellation proceeds. If dma_aio_cancel() sees -dbs->acb == NULL while dma_blk_cb() is still running, the request can be -completed twice (-ECANCELED and the actual return value). - -The following assertion can occur with virtio-scsi when an IOThread is -used: - - ../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed. - -Fix the race by holding the AioContext across dma_blk_cb(). Now -dma_aio_cancel() under the AioContext lock will not see -inconsistent/intermediate states. - -Cc: Paolo Bonzini -Reviewed-by: Eric Blake -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230221212218.1378734-3-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/scsi-disk.c | 4 +--- - softmmu/dma-helpers.c | 12 +++++++----- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index 5327f93f4c..b12d8b0816 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -354,13 +354,12 @@ done: - scsi_req_unref(&r->req); - } - -+/* Called with AioContext lock held */ - static void scsi_dma_complete(void *opaque, int ret) - { - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -- - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -@@ -370,7 +369,6 @@ static void scsi_dma_complete(void *opaque, int ret) - block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); - } - scsi_dma_complete_noio(r, ret); -- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); - } - - static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) -diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c -index 7820fec54c..2463964805 100644 ---- a/softmmu/dma-helpers.c -+++ b/softmmu/dma-helpers.c -@@ -113,17 +113,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret) - static void dma_blk_cb(void *opaque, int ret) - { - DMAAIOCB *dbs = (DMAAIOCB *)opaque; -+ AioContext *ctx = dbs->ctx; - dma_addr_t cur_addr, cur_len; - void *mem; - - trace_dma_blk_cb(dbs, ret); - -+ aio_context_acquire(ctx); - dbs->acb = NULL; - dbs->offset += dbs->iov.size; - - if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { - dma_complete(dbs, ret); -- return; -+ goto out; - } - dma_blk_unmap(dbs); - -@@ -164,9 +166,9 @@ static void dma_blk_cb(void *opaque, int ret) - - if (dbs->iov.size == 0) { - trace_dma_map_wait(dbs); -- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); -+ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); - cpu_register_map_client(dbs->bh); -- return; -+ goto out; - } - - if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { -@@ -174,11 +176,11 @@ static void dma_blk_cb(void *opaque, int ret) - QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); - } - -- aio_context_acquire(dbs->ctx); - dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, - dma_blk_cb, dbs, dbs->io_func_opaque); -- aio_context_release(dbs->ctx); - assert(dbs->acb); -+out: -+ aio_context_release(ctx); - } - - static void dma_aio_cancel(BlockAIOCB *acb) --- -2.39.1 - diff --git a/kvm-edu-add-smp_mb__after_rmw.patch b/kvm-edu-add-smp_mb__after_rmw.patch deleted file mode 100644 index dd77648..0000000 --- a/kvm-edu-add-smp_mb__after_rmw.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 67bbeb056f75adc6c964468d876531ab68366fe0 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 07/12] edu: add smp_mb__after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [4/9] 2ad6fd6cb33fde39d2d017d94c0dde2152ad70c4 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a -Author: Paolo Bonzini -Date: Thu Mar 2 11:16:13 2023 +0100 - - edu: add smp_mb__after_rmw() - - Ensure ordering between clearing the COMPUTING flag and checking - IRQFACT, and between setting the IRQFACT flag and checking - COMPUTING. This ensures that no wakeups are lost. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - hw/misc/edu.c | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/hw/misc/edu.c b/hw/misc/edu.c -index e935c418d4..a1f8bc77e7 100644 ---- a/hw/misc/edu.c -+++ b/hw/misc/edu.c -@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val, - case 0x20: - if (val & EDU_STATUS_IRQFACT) { - qatomic_or(&edu->status, EDU_STATUS_IRQFACT); -+ /* Order check of the COMPUTING flag after setting IRQFACT. */ -+ smp_mb__after_rmw(); - } else { - qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT); - } -@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque) - qemu_mutex_unlock(&edu->thr_mutex); - qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING); - -+ /* Clear COMPUTING flag before checking IRQFACT. */ -+ smp_mb__after_rmw(); -+ - if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { - qemu_mutex_lock_iothread(); - edu_raise_irq(edu, FACT_IRQ); --- -2.39.1 - diff --git a/kvm-hw-arm-virt-Add-compact-highmem-property.patch b/kvm-hw-arm-virt-Add-compact-highmem-property.patch deleted file mode 100644 index bc65e2f..0000000 --- a/kvm-hw-arm-virt-Add-compact-highmem-property.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 4ab2aff624908e49b099f00609875f4d03e9e1ec Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 6/8] hw/arm/virt: Add 'compact-highmem' property - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/8] 781506f3445493f05b511547370b6d88ef092457 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -After the improvement to high memory region address assignment is -applied, the memory layout can be changed, introducing possible -migration breakage. For example, VIRT_HIGH_PCIE_MMIO memory region -is disabled or enabled when the optimization is applied or not, with -the following configuration. The configuration is only achievable by -modifying the source code until more properties are added to allow -users selectively disable those high memory regions. - - pa_bits = 40; - vms->highmem_redists = false; - vms->highmem_ecam = false; - vms->highmem_mmio = true; - - # qemu-system-aarch64 -accel kvm -cpu host \ - -machine virt-7.2,compact-highmem={on, off} \ - -m 4G,maxmem=511G -monitor stdio - - Region compact-highmem=off compact-highmem=on - ---------------------------------------------------------------- - MEM [1GB 512GB] [1GB 512GB] - HIGH_GIC_REDISTS2 [512GB 512GB+64MB] [disabled] - HIGH_PCIE_ECAM [512GB+256MB 512GB+512MB] [disabled] - HIGH_PCIE_MMIO [disabled] [512GB 1TB] - -In order to keep backwords compatibility, we need to disable the -optimization on machine, which is virt-7.1 or ealier than it. It -means the optimization is enabled by default from virt-7.2. Besides, -'compact-highmem' property is added so that the optimization can be -explicitly enabled or disabled on all machine types by users. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-7-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit f40408a9fe5d1db70a75a33d2b26c8af8a5d57b0) -Signed-off-by: Gavin Shan -Conflicts: - hw/arm/virt.c - Comment out the handlers of property 'compact-highmem' since - the property isn't exposed. ---- - docs/system/arm/virt.rst | 4 ++++ - hw/arm/virt.c | 34 ++++++++++++++++++++++++++++++++++ - include/hw/arm/virt.h | 1 + - 3 files changed, 39 insertions(+) - -diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst -index 20442ea2c1..4454706392 100644 ---- a/docs/system/arm/virt.rst -+++ b/docs/system/arm/virt.rst -@@ -94,6 +94,10 @@ highmem - address space above 32 bits. The default is ``on`` for machine types - later than ``virt-2.12``. - -+compact-highmem -+ Set ``on``/``off`` to enable/disable the compact layout for high memory regions. -+ The default is ``on`` for machine types later than ``virt-7.2``. -+ - gic-version - Specify the version of the Generic Interrupt Controller (GIC) to provide. - Valid values are: -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6896e0ca0f..6087511ae9 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -216,6 +216,12 @@ static const MemMapEntry base_memmap[] = { - * Note the extended_memmap is sized so that it eventually also includes the - * base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last - * index of base_memmap). -+ * -+ * The memory map for these Highmem IO Regions can be in legacy or compact -+ * layout, depending on 'compact-highmem' property. With legacy layout, the -+ * PA space for one specific region is always reserved, even if the region -+ * has been disabled or doesn't fit into the PA space. However, the PA space -+ * for the region won't be reserved in these circumstances with compact layout. - */ - static MemMapEntry extended_memmap[] = { - /* Additional 64 MB redist region (can contain up to 512 redistributors) */ -@@ -2400,6 +2406,22 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) - vms->highmem = value; - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ -+static bool virt_get_compact_highmem(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_compact; -+} -+ -+static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_compact = value; -+} -+#endif /* disabled for RHEL */ -+ - static bool virt_get_its(Object *obj, Error **errp) - { - VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -3023,6 +3045,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable using " - "physical address space above 32 bits"); - -+ object_class_property_add_bool(oc, "compact-highmem", -+ virt_get_compact_highmem, -+ virt_set_compact_highmem); -+ object_class_property_set_description(oc, "compact-highmem", -+ "Set on/off to enable/disable compact " -+ "layout for high memory regions"); -+ - object_class_property_add_str(oc, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_class_property_set_description(oc, "gic-version", -@@ -3107,6 +3136,7 @@ static void virt_instance_init(Object *obj) - - /* High memory is enabled by default */ - vms->highmem = true; -+ vms->highmem_compact = !vmc->no_highmem_compact; - vms->gic_version = VIRT_GIC_VERSION_NOSEL; - - vms->highmem_ecam = !vmc->no_highmem_ecam; -@@ -3176,8 +3206,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2) - - static void virt_machine_7_1_options(MachineClass *mc) - { -+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); -+ - virt_machine_7_2_options(mc); - compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len); -+ /* Compact layout for high memory regions was introduced with 7.2 */ -+ vmc->no_highmem_compact = true; - } - DEFINE_VIRT_MACHINE(7, 1) - -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 15bd291311..85e7d61868 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -125,6 +125,7 @@ struct VirtMachineClass { - bool no_pmu; - bool claim_edge_triggered_timers; - bool smbios_old_sys_ver; -+ bool no_highmem_compact; - bool no_highmem_ecam; - bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */ - bool kvm_no_adjvtime; --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch b/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch deleted file mode 100644 index df691a7..0000000 --- a/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 30e86a7c4fbcdc95b74bcb2a15745cb221783091 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 7/8] hw/arm/virt: Add properties to disable high memory - regions - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/8] 16f8762393b447a590b31c9e4d8d3c58c6bc9fa8 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -The 3 high memory regions are usually enabled by default, but they may -be not used. For example, VIRT_HIGH_GIC_REDIST2 isn't needed by GICv2. -This leads to waste in the PA space. - -Add properties ("highmem-redists", "highmem-ecam", "highmem-mmio") to -allow users selectively disable them if needed. After that, the high -memory region for GICv3 or GICv4 redistributor can be disabled by user, -the number of maximal supported CPUs needs to be calculated based on -'vms->highmem_redists'. The follow-up error message is also improved -to indicate if the high memory region for GICv3 and GICv4 has been -enabled or not. - -Suggested-by: Marc Zyngier -Signed-off-by: Gavin Shan -Reviewed-by: Marc Zyngier -Reviewed-by: Cornelia Huck -Reviewed-by: Eric Auger -Message-id: 20221029224307.138822-8-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 6a48c64eec355ab1aff694eb4522d07a8e461368) -Signed-off-by: Gavin Shan -Conflicts: - hw/arm/virt.c - Comment out the handlers of the property 'highmem-redists', - 'highmem-ecam' and 'highmem-mmio' since they aren't exposed. ---- - docs/system/arm/virt.rst | 13 +++++++ - hw/arm/virt.c | 75 ++++++++++++++++++++++++++++++++++++++-- - 2 files changed, 86 insertions(+), 2 deletions(-) - -diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst -index 4454706392..188a4f211f 100644 ---- a/docs/system/arm/virt.rst -+++ b/docs/system/arm/virt.rst -@@ -98,6 +98,19 @@ compact-highmem - Set ``on``/``off`` to enable/disable the compact layout for high memory regions. - The default is ``on`` for machine types later than ``virt-7.2``. - -+highmem-redists -+ Set ``on``/``off`` to enable/disable the high memory region for GICv3 or -+ GICv4 redistributor. The default is ``on``. Setting this to ``off`` will -+ limit the maximum number of CPUs when GICv3 or GICv4 is used. -+ -+highmem-ecam -+ Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM. -+ The default is ``on`` for machine types later than ``virt-3.0``. -+ -+highmem-mmio -+ Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO. -+ The default is ``on``. -+ - gic-version - Specify the version of the Generic Interrupt Controller (GIC) to provide. - Valid values are: -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6087511ae9..304fa0d6e7 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -2142,14 +2142,20 @@ static void machvirt_init(MachineState *machine) - if (vms->gic_version == VIRT_GIC_VERSION_2) { - virt_max_cpus = GIC_NCPU; - } else { -- virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) + -- virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); -+ virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST); -+ if (vms->highmem_redists) { -+ virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); -+ } - } - - if (max_cpus > virt_max_cpus) { - error_report("Number of SMP CPUs requested (%d) exceeds max CPUs " - "supported by machine 'mach-virt' (%d)", - max_cpus, virt_max_cpus); -+ if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) { -+ error_printf("Try 'highmem-redists=on' for more CPUs\n"); -+ } -+ - exit(1); - } - -@@ -2420,6 +2426,49 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) - - vms->highmem_compact = value; - } -+ -+static bool virt_get_highmem_redists(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_redists; -+} -+ -+static void virt_set_highmem_redists(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_redists = value; -+} -+ -+static bool virt_get_highmem_ecam(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_ecam; -+} -+ -+static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_ecam = value; -+} -+ -+static bool virt_get_highmem_mmio(Object *obj, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ return vms->highmem_mmio; -+} -+ -+static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) -+{ -+ VirtMachineState *vms = VIRT_MACHINE(obj); -+ -+ vms->highmem_mmio = value; -+} -+ - #endif /* disabled for RHEL */ - - static bool virt_get_its(Object *obj, Error **errp) -@@ -3052,6 +3101,28 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - "Set on/off to enable/disable compact " - "layout for high memory regions"); - -+ object_class_property_add_bool(oc, "highmem-redists", -+ virt_get_highmem_redists, -+ virt_set_highmem_redists); -+ object_class_property_set_description(oc, "highmem-redists", -+ "Set on/off to enable/disable high " -+ "memory region for GICv3 or GICv4 " -+ "redistributor"); -+ -+ object_class_property_add_bool(oc, "highmem-ecam", -+ virt_get_highmem_ecam, -+ virt_set_highmem_ecam); -+ object_class_property_set_description(oc, "highmem-ecam", -+ "Set on/off to enable/disable high " -+ "memory region for PCI ECAM"); -+ -+ object_class_property_add_bool(oc, "highmem-mmio", -+ virt_get_highmem_mmio, -+ virt_set_highmem_mmio); -+ object_class_property_set_description(oc, "highmem-mmio", -+ "Set on/off to enable/disable high " -+ "memory region for PCI MMIO"); -+ - object_class_property_add_str(oc, "gic-version", virt_get_gic_version, - virt_set_gic_version); - object_class_property_set_description(oc, "gic-version", --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch b/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch deleted file mode 100644 index 6b20bb8..0000000 --- a/kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 969ea1ff46b52c5fe6d87f2eeb1625871a2dfb2a Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 8/8] hw/arm/virt: Enable compat high memory region address - assignment for 9.2.0 machine - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/8] beda1791c0c35dce5c669efd47685302b8468032 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 -Upstream: RHEL only - -The compact high memory region address assignment is enabled for 9.2.0, -but it's kept as disabled for 9.0.0, to keep the backwards compatibility -on 9.0.0. Note that these newly added properties ('compact-highmem', -'highmem-redists', 'highmem-ecam', and 'highmem-mmio') in the upstream -aren't exposed for the downstream. - -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 304fa0d6e7..e41c0b462c 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3581,6 +3581,7 @@ static void rhel_virt_instance_init(Object *obj) - - /* High memory is enabled by default */ - vms->highmem = true; -+ vms->highmem_compact = !vmc->no_highmem_compact; - vms->gic_version = VIRT_GIC_VERSION_NOSEL; - - vms->highmem_ecam = !vmc->no_highmem_ecam; -@@ -3659,5 +3660,7 @@ static void rhel900_virt_options(MachineClass *mc) - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ - vmc->no_tcg_lpa2 = true; -+ /* Compact layout for high memory regions was introduced with 9.2.0 */ -+ vmc->no_highmem_compact = true; - } - DEFINE_RHEL_MACHINE(9, 0, 0) --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch b/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch deleted file mode 100644 index 9dcdf61..0000000 --- a/kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch +++ /dev/null @@ -1,112 +0,0 @@ -From 1c7fad3776a14ca35b24dc2fdb262d4ddf40d6eb Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 5/8] hw/arm/virt: Improve high memory region address - assignment - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/8] 4d77fa78b5258a1bd8d30405cec5ba3311d42f92 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -There are three high memory regions, which are VIRT_HIGH_REDIST2, -VIRT_HIGH_PCIE_ECAM and VIRT_HIGH_PCIE_MMIO. Their base addresses -are floating on highest RAM address. However, they can be disabled -in several cases. - -(1) One specific high memory region is likely to be disabled by - code by toggling vms->highmem_{redists, ecam, mmio}. - -(2) VIRT_HIGH_PCIE_ECAM region is disabled on machine, which is - 'virt-2.12' or ealier than it. - -(3) VIRT_HIGH_PCIE_ECAM region is disabled when firmware is loaded - on 32-bits system. - -(4) One specific high memory region is disabled when it breaks the - PA space limit. - -The current implementation of virt_set_{memmap, high_memmap}() isn't -optimized because the high memory region's PA space is always reserved, -regardless of whatever the actual state in the corresponding -vms->highmem_{redists, ecam, mmio} flag. In the code, 'base' and -'vms->highest_gpa' are always increased for case (1), (2) and (3). -It's unnecessary since the assigned PA space for the disabled high -memory region won't be used afterwards. - -Improve the address assignment for those three high memory region by -skipping the address assignment for one specific high memory region if -it has been disabled in case (1), (2) and (3). The memory layout may -be changed after the improvement is applied, which leads to potential -migration breakage. So 'vms->highmem_compact' is added to control if -the improvement should be applied. For now, 'vms->highmem_compact' is -set to false, meaning that we don't have memory layout change until it -becomes configurable through property 'compact-highmem' in next patch. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-6-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 4a4ff9edc6a8fdc76082af5b41b059217138c09b) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 15 ++++++++++----- - include/hw/arm/virt.h | 1 + - 2 files changed, 11 insertions(+), 5 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 6e3b9fc060..6896e0ca0f 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1768,18 +1768,23 @@ static void virt_set_high_memmap(VirtMachineState *vms, - vms->memmap[i].size = region_size; - - /* -- * Check each device to see if they fit in the PA space, -- * moving highest_gpa as we go. -+ * Check each device to see if it fits in the PA space, -+ * moving highest_gpa as we go. For compatibility, move -+ * highest_gpa for disabled fitting devices as well, if -+ * the compact layout has been disabled. - * - * For each device that doesn't fit, disable it. - */ - fits = (region_base + region_size) <= BIT_ULL(pa_bits); -- if (fits) { -- vms->highest_gpa = region_base + region_size - 1; -+ *region_enabled &= fits; -+ if (vms->highmem_compact && !*region_enabled) { -+ continue; - } - -- *region_enabled &= fits; - base = region_base + region_size; -+ if (fits) { -+ vms->highest_gpa = base - 1; -+ } - } - } - -diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 22b54ec510..15bd291311 100644 ---- a/include/hw/arm/virt.h -+++ b/include/hw/arm/virt.h -@@ -144,6 +144,7 @@ struct VirtMachineState { - PFlashCFI01 *flash[2]; - bool secure; - bool highmem; -+ bool highmem_compact; - bool highmem_ecam; - bool highmem_mmio; - bool highmem_redists; --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch b/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch deleted file mode 100644 index ea9cb1f..0000000 --- a/kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 305a369fd18f29914bf96cc181add532d435d8ed Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 3/8] hw/arm/virt: Introduce variable region_base in - virt_set_high_memmap() - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/8] 15de90df217d680ccc858b679898b3993e1c050a - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This introduces variable 'region_base' for the base address of the -specific high memory region. It's the preparatory work to optimize -high memory region address assignment. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-4-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit fa245799b9407fc7b561da185b3d889df5e16a88) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ca098d40b8..ddcf7ee2f8 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1739,15 +1739,15 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - static void virt_set_high_memmap(VirtMachineState *vms, - hwaddr base, int pa_bits) - { -- hwaddr region_size; -+ hwaddr region_base, region_size; - bool fits; - int i; - - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -+ region_base = ROUND_UP(base, extended_memmap[i].size); - region_size = extended_memmap[i].size; - -- base = ROUND_UP(base, region_size); -- vms->memmap[i].base = base; -+ vms->memmap[i].base = region_base; - vms->memmap[i].size = region_size; - - /* -@@ -1756,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, - * - * For each device that doesn't fit, disable it. - */ -- fits = (base + region_size) <= BIT_ULL(pa_bits); -+ fits = (region_base + region_size) <= BIT_ULL(pa_bits); - if (fits) { -- vms->highest_gpa = base + region_size - 1; -+ vms->highest_gpa = region_base + region_size - 1; - } - - switch (i) { -@@ -1773,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, - break; - } - -- base += region_size; -+ base = region_base + region_size; - } - } - --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch b/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch deleted file mode 100644 index 659faeb..0000000 --- a/kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch +++ /dev/null @@ -1,95 +0,0 @@ -From a2ddd68c8365ec602db6b2a9cf83bb441ca701cc Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 4/8] hw/arm/virt: Introduce virt_get_high_memmap_enabled() - helper - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/8] 65524de2fc106600bbaff641caa8c4f2f8027114 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This introduces virt_get_high_memmap_enabled() helper, which returns -the pointer to vms->highmem_{redists, ecam, mmio}. The pointer will -be used in the subsequent patches. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-5-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit a5cb1350b19a5c2a58ab4edddf609ed429c13085) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 32 +++++++++++++++++++------------- - 1 file changed, 19 insertions(+), 13 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ddcf7ee2f8..6e3b9fc060 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1736,14 +1736,31 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - return arm_cpu_mp_affinity(idx, clustersz); - } - -+static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, -+ int index) -+{ -+ bool *enabled_array[] = { -+ &vms->highmem_redists, -+ &vms->highmem_ecam, -+ &vms->highmem_mmio, -+ }; -+ -+ assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST == -+ ARRAY_SIZE(enabled_array)); -+ assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array)); -+ -+ return enabled_array[index - VIRT_LOWMEMMAP_LAST]; -+} -+ - static void virt_set_high_memmap(VirtMachineState *vms, - hwaddr base, int pa_bits) - { - hwaddr region_base, region_size; -- bool fits; -+ bool *region_enabled, fits; - int i; - - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -+ region_enabled = virt_get_high_memmap_enabled(vms, i); - region_base = ROUND_UP(base, extended_memmap[i].size); - region_size = extended_memmap[i].size; - -@@ -1761,18 +1778,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, - vms->highest_gpa = region_base + region_size - 1; - } - -- switch (i) { -- case VIRT_HIGH_GIC_REDIST2: -- vms->highmem_redists &= fits; -- break; -- case VIRT_HIGH_PCIE_ECAM: -- vms->highmem_ecam &= fits; -- break; -- case VIRT_HIGH_PCIE_MMIO: -- vms->highmem_mmio &= fits; -- break; -- } -- -+ *region_enabled &= fits; - base = region_base + region_size; - } - } --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch b/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch deleted file mode 100644 index f55c06a..0000000 --- a/kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch +++ /dev/null @@ -1,130 +0,0 @@ -From 5dff87c5ea60054709021025c9513ec259433ce2 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 1/8] hw/arm/virt: Introduce virt_set_high_memmap() helper - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/8] 5f6ba5af7a2c21d8473c58e088ee99b11336c673 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This introduces virt_set_high_memmap() helper. The logic of high -memory region address assignment is moved to the helper. The intention -is to make the subsequent optimization for high memory region address -assignment easier. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-2-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 4af6b6edece5ef273d29972d53547f823d2bc1c0) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 74 ++++++++++++++++++++++++++++----------------------- - 1 file changed, 41 insertions(+), 33 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index bf18838b87..bea5f54720 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1736,6 +1736,46 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - return arm_cpu_mp_affinity(idx, clustersz); - } - -+static void virt_set_high_memmap(VirtMachineState *vms, -+ hwaddr base, int pa_bits) -+{ -+ int i; -+ -+ for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -+ hwaddr size = extended_memmap[i].size; -+ bool fits; -+ -+ base = ROUND_UP(base, size); -+ vms->memmap[i].base = base; -+ vms->memmap[i].size = size; -+ -+ /* -+ * Check each device to see if they fit in the PA space, -+ * moving highest_gpa as we go. -+ * -+ * For each device that doesn't fit, disable it. -+ */ -+ fits = (base + size) <= BIT_ULL(pa_bits); -+ if (fits) { -+ vms->highest_gpa = base + size - 1; -+ } -+ -+ switch (i) { -+ case VIRT_HIGH_GIC_REDIST2: -+ vms->highmem_redists &= fits; -+ break; -+ case VIRT_HIGH_PCIE_ECAM: -+ vms->highmem_ecam &= fits; -+ break; -+ case VIRT_HIGH_PCIE_MMIO: -+ vms->highmem_mmio &= fits; -+ break; -+ } -+ -+ base += size; -+ } -+} -+ - static void virt_set_memmap(VirtMachineState *vms, int pa_bits) - { - MachineState *ms = MACHINE(vms); -@@ -1791,39 +1831,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) - /* We know for sure that at least the memory fits in the PA space */ - vms->highest_gpa = memtop - 1; - -- for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -- hwaddr size = extended_memmap[i].size; -- bool fits; -- -- base = ROUND_UP(base, size); -- vms->memmap[i].base = base; -- vms->memmap[i].size = size; -- -- /* -- * Check each device to see if they fit in the PA space, -- * moving highest_gpa as we go. -- * -- * For each device that doesn't fit, disable it. -- */ -- fits = (base + size) <= BIT_ULL(pa_bits); -- if (fits) { -- vms->highest_gpa = base + size - 1; -- } -- -- switch (i) { -- case VIRT_HIGH_GIC_REDIST2: -- vms->highmem_redists &= fits; -- break; -- case VIRT_HIGH_PCIE_ECAM: -- vms->highmem_ecam &= fits; -- break; -- case VIRT_HIGH_PCIE_MMIO: -- vms->highmem_mmio &= fits; -- break; -- } -- -- base += size; -- } -+ virt_set_high_memmap(vms, base, pa_bits); - - if (device_memory_size > 0) { - ms->device_memory = g_malloc0(sizeof(*ms->device_memory)); --- -2.31.1 - diff --git a/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch b/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch deleted file mode 100644 index 27bc6bb..0000000 --- a/kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch +++ /dev/null @@ -1,83 +0,0 @@ -From bd5b7edbf8f4425f4b4e0d49a00cbdd48d9c6f48 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Wed, 21 Dec 2022 08:48:45 +0800 -Subject: [PATCH 2/8] hw/arm/virt: Rename variable size to region_size in - virt_set_high_memmap() - -RH-Author: Gavin Shan -RH-MergeRequest: 126: hw/arm/virt: Optimize high memory region address assignment -RH-Bugzilla: 2113840 -RH-Acked-by: Eric Auger -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/8] 1cadf1b00686cceb45821a58fdcb509bc5da335d - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2113840 - -This renames variable 'size' to 'region_size' in virt_set_high_memmap(). -Its counterpart ('region_base') will be introduced in next patch. - -No functional change intended. - -Signed-off-by: Gavin Shan -Reviewed-by: Eric Auger -Reviewed-by: Cornelia Huck -Reviewed-by: Marc Zyngier -Tested-by: Zhenyu Zhang -Message-id: 20221029224307.138822-3-gshan@redhat.com -Signed-off-by: Peter Maydell -(cherry picked from commit 370bea9d1c78796eec235ed6cb4310f489931a62) -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 15 ++++++++------- - 1 file changed, 8 insertions(+), 7 deletions(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index bea5f54720..ca098d40b8 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -1739,15 +1739,16 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) - static void virt_set_high_memmap(VirtMachineState *vms, - hwaddr base, int pa_bits) - { -+ hwaddr region_size; -+ bool fits; - int i; - - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { -- hwaddr size = extended_memmap[i].size; -- bool fits; -+ region_size = extended_memmap[i].size; - -- base = ROUND_UP(base, size); -+ base = ROUND_UP(base, region_size); - vms->memmap[i].base = base; -- vms->memmap[i].size = size; -+ vms->memmap[i].size = region_size; - - /* - * Check each device to see if they fit in the PA space, -@@ -1755,9 +1756,9 @@ static void virt_set_high_memmap(VirtMachineState *vms, - * - * For each device that doesn't fit, disable it. - */ -- fits = (base + size) <= BIT_ULL(pa_bits); -+ fits = (base + region_size) <= BIT_ULL(pa_bits); - if (fits) { -- vms->highest_gpa = base + size - 1; -+ vms->highest_gpa = base + region_size - 1; - } - - switch (i) { -@@ -1772,7 +1773,7 @@ static void virt_set_high_memmap(VirtMachineState *vms, - break; - } - -- base += size; -+ base += region_size; - } - } - --- -2.31.1 - diff --git a/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch b/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch deleted file mode 100644 index b452281..0000000 --- a/kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 8b0c5c6d356fd6cce9092727e20097b70e07bba9 Mon Sep 17 00:00:00 2001 -From: Julia Suvorova -Date: Thu, 23 Feb 2023 13:57:47 +0100 -Subject: [PATCH] hw/smbios: fix field corruption in type 4 table - -RH-Author: Julia Suvorova -RH-MergeRequest: 156: hw/smbios: fix field corruption in type 4 table -RH-Bugzilla: 2169904 -RH-Acked-by: Igor Mammedov -RH-Acked-by: MST -RH-Acked-by: Ani Sinha -RH-Commit: [1/1] ee6d9bb6dfa0fb2625915947072cb91a0926c4ec - -Since table type 4 of SMBIOS version 2.6 is shorter than 3.0, the -strings which follow immediately after the struct fields have been -overwritten by unconditional filling of later fields such as core_count2. -Make these fields dependent on the SMBIOS version. - -Fixes: 05e27d74c7 ("hw/smbios: add core_count2 to smbios table type 4") -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2169904 - -Signed-off-by: Julia Suvorova -Message-Id: <20230223125747.254914-1-jusual@redhat.com> -Reviewed-by: Igor Mammedov -Reviewed-by: Ani Sinha -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 60d09b8dc7dd4256d664ad680795cb1327805b2b) ---- - hw/smbios/smbios.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index c5ad69237e..2d2ece3edb 100644 ---- a/hw/smbios/smbios.c -+++ b/hw/smbios/smbios.c -@@ -752,14 +752,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) - t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; - t->core_enabled = t->core_count; - -- t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); -- - t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads; -- t->thread_count2 = cpu_to_le16(ms->smp.threads); - - t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ - t->processor_family2 = cpu_to_le16(0x01); /* Other */ - -+ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { -+ t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); -+ t->thread_count2 = cpu_to_le16(ms->smp.threads); -+ } -+ - SMBIOS_BUILD_TABLE_POST; - smbios_type4_count++; - } --- -2.31.1 - diff --git a/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch b/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch deleted file mode 100644 index 0f321e4..0000000 --- a/kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch +++ /dev/null @@ -1,64 +0,0 @@ -From cadcc1c6a001622d971c86d44925516905e3d104 Mon Sep 17 00:00:00 2001 -From: Jason Wang -Date: Thu, 23 Feb 2023 14:59:21 +0800 -Subject: [PATCH 8/8] intel-iommu: fail DEVIOTLB_UNMAP without dt mode - -RH-Author: Laurent Vivier -RH-MergeRequest: 157: intel-iommu: fail DEVIOTLB_UNMAP without dt mode -RH-Bugzilla: 2156876 -RH-Acked-by: Eric Auger -RH-Acked-by: Peter Xu -RH-Acked-by: MST -RH-Commit: [1/1] eb9dbae6140ef4ba10d90b9e66abd75540f6892d (lvivier/qemu-kvm-centos) - -Without dt mode, device IOTLB notifier won't work since guest won't -send device IOTLB invalidation descriptor in this case. Let's fail -early instead of misbehaving silently. - -Reviewed-by: Laurent Vivier -Tested-by: Laurent Vivier -Tested-by: Viktor Prutyanov -Buglink: https://bugzilla.redhat.com/2156876 -Signed-off-by: Jason Wang -Message-Id: <20230223065924.42503-3-jasowang@redhat.com> -Reviewed-by: Peter Xu -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 09adb0e021207b60a0c51a68939b4539d98d3ef3) - -Conflict in hw/i386/intel_iommu.c because of missing commit: - - 4ce27463ccce ("intel-iommu: fail MAP notifier without caching mode") ---- - hw/i386/intel_iommu.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c -index a08ee85edf..d2983f40d3 100644 ---- a/hw/i386/intel_iommu.c -+++ b/hw/i386/intel_iommu.c -@@ -3179,6 +3179,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, - { - VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); - IntelIOMMUState *s = vtd_as->iommu_state; -+ X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - - /* TODO: add support for VFIO and vhost users */ - if (s->snoop_control) { -@@ -3186,6 +3187,13 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, - "Snoop Control with vhost or VFIO is not supported"); - return -ENOTSUP; - } -+ if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) { -+ error_setg_errno(errp, ENOTSUP, -+ "device %02x.%02x.%x requires device IOTLB mode", -+ pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn), -+ PCI_FUNC(vtd_as->devfn)); -+ return -ENOTSUP; -+ } - - /* Update per-address-space notifier flags */ - vtd_as->notifier_flags = new; --- -2.39.1 - diff --git a/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch b/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch deleted file mode 100644 index 22abf35..0000000 --- a/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch +++ /dev/null @@ -1,386 +0,0 @@ -From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001 -From: "manish.mishra" -Date: Tue, 20 Dec 2022 18:44:17 +0000 -Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders -RH-Bugzilla: 2169732 -RH-Acked-by: quintela1 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Dr. David Alan Gilbert -RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm) - -MSG_PEEK peeks at the channel, The data is treated as unread and -the next read shall still return this data. This support is -currently added only for socket class. Extra parameter 'flags' -is added to io_readv calls to pass extra read flags like MSG_PEEK. - -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrange -Reviewed-by: Juan Quintela -Suggested-by: Daniel P. Berrange -Signed-off-by: manish.mishra -Signed-off-by: Juan Quintela -(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3) -Signed-off-by: Peter Xu ---- - chardev/char-socket.c | 4 ++-- - include/io/channel.h | 6 ++++++ - io/channel-buffer.c | 1 + - io/channel-command.c | 1 + - io/channel-file.c | 1 + - io/channel-null.c | 1 + - io/channel-socket.c | 19 ++++++++++++++++++- - io/channel-tls.c | 1 + - io/channel-websock.c | 1 + - io/channel.c | 16 ++++++++++++---- - migration/channel-block.c | 1 + - migration/rdma.c | 1 + - scsi/qemu-pr-helper.c | 2 +- - tests/qtest/tpm-emu.c | 2 +- - tests/unit/test-io-channel-socket.c | 1 + - util/vhost-user-server.c | 2 +- - 16 files changed, 50 insertions(+), 10 deletions(-) - -diff --git a/chardev/char-socket.c b/chardev/char-socket.c -index 879564aa8a..5afce9a464 100644 ---- a/chardev/char-socket.c -+++ b/chardev/char-socket.c -@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len) - if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { - ret = qio_channel_readv_full(s->ioc, &iov, 1, - &msgfds, &msgfds_num, -- NULL); -+ 0, NULL); - } else { - ret = qio_channel_readv_full(s->ioc, &iov, 1, - NULL, NULL, -- NULL); -+ 0, NULL); - } - - if (msgfds_num) { -diff --git a/include/io/channel.h b/include/io/channel.h -index c680ee7480..716235d496 100644 ---- a/include/io/channel.h -+++ b/include/io/channel.h -@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, - - #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 - -+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 -+ - typedef enum QIOChannelFeature QIOChannelFeature; - - enum QIOChannelFeature { -@@ -41,6 +43,7 @@ enum QIOChannelFeature { - QIO_CHANNEL_FEATURE_SHUTDOWN, - QIO_CHANNEL_FEATURE_LISTEN, - QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, -+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK, - }; - - -@@ -114,6 +117,7 @@ struct QIOChannelClass { - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp); - int (*io_close)(QIOChannel *ioc, - Error **errp); -@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc, - * @niov: the length of the @iov array - * @fds: pointer to an array that will received file handles - * @nfds: pointer filled with number of elements in @fds on return -+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) - * @errp: pointer to a NULL-initialized error object - * - * Read data from the IO channel, storing it in the -@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp); - - -diff --git a/io/channel-buffer.c b/io/channel-buffer.c -index bf52011be2..8096180f85 100644 ---- a/io/channel-buffer.c -+++ b/io/channel-buffer.c -@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); -diff --git a/io/channel-command.c b/io/channel-command.c -index 74516252ba..e7edd091af 100644 ---- a/io/channel-command.c -+++ b/io/channel-command.c -@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); -diff --git a/io/channel-file.c b/io/channel-file.c -index b67687c2aa..d76663e6ae 100644 ---- a/io/channel-file.c -+++ b/io/channel-file.c -@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); -diff --git a/io/channel-null.c b/io/channel-null.c -index 75e3781507..4fafdb770d 100644 ---- a/io/channel-null.c -+++ b/io/channel-null.c -@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc, - size_t niov, - int **fds G_GNUC_UNUSED, - size_t *nfds G_GNUC_UNUSED, -+ int flags, - Error **errp) - { - QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); -diff --git a/io/channel-socket.c b/io/channel-socket.c -index b76dca9cc1..7aca84f61a 100644 ---- a/io/channel-socket.c -+++ b/io/channel-socket.c -@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, - } - #endif - -+ qio_channel_set_feature(QIO_CHANNEL(ioc), -+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); -+ - return 0; - } - -@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, - } - #endif /* WIN32 */ - -+ qio_channel_set_feature(QIO_CHANNEL(cioc), -+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); -+ - trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); - return cioc; - -@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); -@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - - } - -+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { -+ sflags |= MSG_PEEK; -+ } -+ - retry: - ret = recvmsg(sioc->fd, &msg, sflags); - if (ret < 0) { -@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); - ssize_t done = 0; - ssize_t i; -+ int sflags = 0; -+ -+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { -+ sflags |= MSG_PEEK; -+ } - - for (i = 0; i < niov; i++) { - ssize_t ret; -@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, - ret = recv(sioc->fd, - iov[i].iov_base, - iov[i].iov_len, -- 0); -+ sflags); - if (ret < 0) { - if (errno == EAGAIN) { - if (done) { -diff --git a/io/channel-tls.c b/io/channel-tls.c -index 4ce890a538..c730cb8ec5 100644 ---- a/io/channel-tls.c -+++ b/io/channel-tls.c -@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); -diff --git a/io/channel-websock.c b/io/channel-websock.c -index fb4932ade7..a12acc27cf 100644 ---- a/io/channel-websock.c -+++ b/io/channel-websock.c -@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); -diff --git a/io/channel.c b/io/channel.c -index 0640941ac5..a8c7f11649 100644 ---- a/io/channel.c -+++ b/io/channel.c -@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); -@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, - return -1; - } - -- return klass->io_readv(ioc, iov, niov, fds, nfds, errp); -+ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) && -+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { -+ error_setg_errno(errp, EINVAL, -+ "Channel does not support peek read"); -+ return -1; -+ } -+ -+ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp); - } - - -@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc, - while ((nlocal_iov > 0) || local_fds) { - ssize_t len; - len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, -- local_nfds, errp); -+ local_nfds, 0, errp); - if (len == QIO_CHANNEL_ERR_BLOCK) { - if (qemu_in_coroutine()) { - qio_channel_yield(ioc, G_IO_IN); -@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc, - size_t niov, - Error **errp) - { -- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); -+ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp); - } - - -@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc, - Error **errp) - { - struct iovec iov = { .iov_base = buf, .iov_len = buflen }; -- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); -+ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp); - } - - -diff --git a/migration/channel-block.c b/migration/channel-block.c -index f4ab53acdb..b7374363c3 100644 ---- a/migration/channel-block.c -+++ b/migration/channel-block.c -@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); -diff --git a/migration/rdma.c b/migration/rdma.c -index 94a55dd95b..d8b4632094 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, - size_t niov, - int **fds, - size_t *nfds, -+ int flags, - Error **errp) - { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); -diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c -index 196b78c00d..199227a556 100644 ---- a/scsi/qemu-pr-helper.c -+++ b/scsi/qemu-pr-helper.c -@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz, - iov.iov_base = buf; - iov.iov_len = sz; - n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1, -- &fds, &nfds, errp); -+ &fds, &nfds, 0, errp); - - if (n_read == QIO_CHANNEL_ERR_BLOCK) { - qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN); -diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c -index 2994d1cf42..3cf1acaf7d 100644 ---- a/tests/qtest/tpm-emu.c -+++ b/tests/qtest/tpm-emu.c -@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data) - int *pfd = NULL; - size_t nfd = 0; - -- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort); -+ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort); - cmd = be32_to_cpu(cmd); - g_assert_cmpint(cmd, ==, CMD_SET_DATAFD); - g_assert_cmpint(nfd, ==, 1); -diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c -index b36a5d972a..b964bb202d 100644 ---- a/tests/unit/test-io-channel-socket.c -+++ b/tests/unit/test-io-channel-socket.c -@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void) - G_N_ELEMENTS(iorecv), - &fdrecv, - &nfdrecv, -+ 0, - &error_abort); - - g_assert(nfdrecv == G_N_ELEMENTS(fdsend)); -diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c -index 232984ace6..145eb17c08 100644 ---- a/util/vhost-user-server.c -+++ b/util/vhost-user-server.c -@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) - * qio_channel_readv_full may have short reads, keeping calling it - * until getting VHOST_USER_HDR_SIZE or 0 bytes in total - */ -- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err); -+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err); - if (rc < 0) { - if (rc == QIO_CHANNEL_ERR_BLOCK) { - assert(local_err == NULL); --- -2.31.1 - diff --git a/kvm-iotests-106-214-308-Read-only-one-size-line.patch b/kvm-iotests-106-214-308-Read-only-one-size-line.patch deleted file mode 100644 index 399acfc..0000000 --- a/kvm-iotests-106-214-308-Read-only-one-size-line.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 6727e92a97f8ee9f367a41111bef3f5cad4a479a Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:02 +0200 -Subject: [PATCH 15/20] iotests/106, 214, 308: Read only one size line - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [10/12] 1554e0a92b92ed101a251478ccae43f45f6e071e (hreitz/qemu-kvm-c-9-s) - -These tests read size information (sometimes disk size, sometimes -virtual size) from qemu-img info's output. Once qemu-img starts -printing info about child nodes, we are going to see multiple instances -of that per image, but these tests are only interested in the first one, -so use "head -n 1" to get it. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-11-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 74163adda3101b127943f7cbbf8fcccd2d472426) -Signed-off-by: Hanna Czenczek ---- - tests/qemu-iotests/106 | 4 ++-- - tests/qemu-iotests/214 | 6 ++++-- - tests/qemu-iotests/308 | 4 ++-- - 3 files changed, 8 insertions(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106 -index 9d6adb542d..ae0fc46691 100755 ---- a/tests/qemu-iotests/106 -+++ b/tests/qemu-iotests/106 -@@ -66,7 +66,7 @@ for create_mode in off falloc full; do - expected_size=$((expected_size + $GROWTH_SIZE)) - fi - -- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') -+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) - actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') - - # The actual size may exceed the expected size, depending on the file -@@ -105,7 +105,7 @@ for growth_mode in falloc full; do - _make_test_img -o "extent_size_hint=0" 2G - $QEMU_IMG resize -f "$IMGFMT" --preallocation=$growth_mode "$TEST_IMG" +${GROWTH_SIZE}K - -- actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size') -+ actual_size=$($QEMU_IMG info -f "$IMGFMT" "$TEST_IMG" | grep 'disk size' | head -n 1) - actual_size=$(echo "$actual_size" | sed -e 's/^[^0-9]*\([0-9]\+\).*$/\1/') - - if [ $actual_size -lt $GROWTH_SIZE ]; then -diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214 -index c66e246ba2..55ffcd7f44 100755 ---- a/tests/qemu-iotests/214 -+++ b/tests/qemu-iotests/214 -@@ -102,7 +102,8 @@ let data_size="8 * $cluster_size" - $QEMU_IO -c "write -P 0xaa 0 $data_size" "$TEST_IMG" \ - 2>&1 | _filter_qemu_io | _filter_testdir - sizeA=$($QEMU_IMG info --output=json "$TEST_IMG" | -- sed -n '/"actual-size":/ s/[^0-9]//gp') -+ sed -n '/"actual-size":/ s/[^0-9]//gp' | -+ head -n 1) - - _make_test_img 2M -o cluster_size=$cluster_size - echo "Write compressed data:" -@@ -124,7 +125,8 @@ $QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\ - _filter_qemu_io | _filter_testdir - - sizeB=$($QEMU_IMG info --output=json "$TEST_IMG" | -- sed -n '/"actual-size":/ s/[^0-9]//gp') -+ sed -n '/"actual-size":/ s/[^0-9]//gp' | -+ head -n 1) - - if [ $sizeA -lt $sizeB ] - then -diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 -index bde4aac2fa..09275e9a10 100755 ---- a/tests/qemu-iotests/308 -+++ b/tests/qemu-iotests/308 -@@ -217,12 +217,12 @@ echo - echo '=== Remove export ===' - - # Double-check that $EXT_MP appears as a non-empty file (the raw image) --$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' -+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 - - fuse_export_del 'export-mp' - - # See that the file appears empty again --$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' -+$QEMU_IMG info -f raw "$EXT_MP" | grep 'virtual size' | head -n 1 - - echo - echo '=== Writable export ===' --- -2.31.1 - diff --git a/kvm-iotests-Filter-child-node-information.patch b/kvm-iotests-Filter-child-node-information.patch deleted file mode 100644 index 12eee3a..0000000 --- a/kvm-iotests-Filter-child-node-information.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 3102e62f80757729c97e58e2b3d62a6a9de952a7 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:01 +0200 -Subject: [PATCH 14/20] iotests: Filter child node information - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [9/12] 0b0a42d54397791f7f149e53c9175b7863707e70 (hreitz/qemu-kvm-c-9-s) - -Before we let qemu-img info print child node information, have -common.filter, common.rc, and iotests.py filter it from the test output -so we get as few reference output changes as possible. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-10-hreitz@redhat.com> -Tested-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit bcc6777ad6facede73c0cf8b1700045bf4365f7d) -Signed-off-by: Hanna Czenczek ---- - tests/qemu-iotests/common.filter | 22 ++++++++++++++-------- - tests/qemu-iotests/common.rc | 22 ++++++++++++++-------- - tests/qemu-iotests/iotests.py | 18 +++++++++++++++--- - 3 files changed, 43 insertions(+), 19 deletions(-) - -diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index 6a13757177..6ddda2ee64 100644 ---- a/tests/qemu-iotests/common.filter -+++ b/tests/qemu-iotests/common.filter -@@ -224,6 +224,7 @@ _filter_img_info() - - discard=0 - regex_json_spec_start='^ *"format-specific": \{' -+ regex_json_child_start='^ *"children": \[' - gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ - -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ - -e "s#$TEST_DIR#TEST_DIR#g" \ -@@ -252,20 +253,25 @@ _filter_img_info() - -e 's/\(compression type: \)\(zlib\|zstd\)/\1COMPRESSION_TYPE/' \ - -e "s/uuid: [-a-f0-9]\\+/uuid: 00000000-0000-0000-0000-000000000000/" | \ - while IFS='' read -r line; do -- if [[ $format_specific == 1 ]]; then -- discard=0 -- elif [[ $line == "Format specific information:" ]]; then -- discard=1 -- elif [[ $line =~ $regex_json_spec_start ]]; then -- discard=2 -- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" -+ if [[ $discard == 0 ]]; then -+ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then -+ discard=1 -+ elif [[ $line =~ "Child node '/" ]]; then -+ discard=1 -+ elif [[ $line =~ $regex_json_spec_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\},? *$" -+ elif [[ $line =~ $regex_json_child_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\],? *$" -+ fi - fi - if [[ $discard == 0 ]]; then - echo "$line" - elif [[ $discard == 1 && ! $line ]]; then - echo - discard=0 -- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then -+ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then - discard=0 - fi - done -diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc -index db757025cb..f4476b62f7 100644 ---- a/tests/qemu-iotests/common.rc -+++ b/tests/qemu-iotests/common.rc -@@ -711,6 +711,7 @@ _img_info() - - discard=0 - regex_json_spec_start='^ *"format-specific": \{' -+ regex_json_child_start='^ *"children": \[' - $QEMU_IMG info $QEMU_IMG_EXTRA_ARGS "$@" "$TEST_IMG" 2>&1 | \ - sed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \ - -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ -@@ -721,20 +722,25 @@ _img_info() - -e "/^disk size:/ D" \ - -e "/actual-size/ D" | \ - while IFS='' read -r line; do -- if [[ $format_specific == 1 ]]; then -- discard=0 -- elif [[ $line == "Format specific information:" ]]; then -- discard=1 -- elif [[ $line =~ $regex_json_spec_start ]]; then -- discard=2 -- regex_json_spec_end="^${line%%[^ ]*}\\},? *$" -+ if [[ $discard == 0 ]]; then -+ if [[ $format_specific == 0 && $line == "Format specific information:" ]]; then -+ discard=1 -+ elif [[ $line =~ "Child node '/" ]]; then -+ discard=1 -+ elif [[ $format_specific == 0 && $line =~ $regex_json_spec_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\},? *$" -+ elif [[ $line =~ $regex_json_child_start ]]; then -+ discard=2 -+ regex_json_end="^${line%%[^ ]*}\\],? *$" -+ fi - fi - if [[ $discard == 0 ]]; then - echo "$line" - elif [[ $discard == 1 && ! $line ]]; then - echo - discard=0 -- elif [[ $discard == 2 && $line =~ $regex_json_spec_end ]]; then -+ elif [[ $discard == 2 && $line =~ $regex_json_end ]]; then - discard=0 - fi - done -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index da7d6637e1..94aeb3f3b2 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -329,7 +329,7 @@ def qemu_img_log(*args: str, check: bool = True - - def img_info_log(filename: str, filter_path: Optional[str] = None, - use_image_opts: bool = False, extra_args: Sequence[str] = (), -- check: bool = True, -+ check: bool = True, drop_child_info: bool = True, - ) -> None: - args = ['info'] - if use_image_opts: -@@ -342,7 +342,7 @@ def img_info_log(filename: str, filter_path: Optional[str] = None, - output = qemu_img(*args, check=check).stdout - if not filter_path: - filter_path = filename -- log(filter_img_info(output, filter_path)) -+ log(filter_img_info(output, filter_path, drop_child_info)) - - def qemu_io_wrap_args(args: Sequence[str]) -> List[str]: - if '-f' in args or '--image-opts' in args: -@@ -642,11 +642,23 @@ def _filter(_key, value): - def filter_generated_node_ids(msg): - return re.sub("#block[0-9]+", "NODE_NAME", msg) - --def filter_img_info(output, filename): -+def filter_img_info(output: str, filename: str, -+ drop_child_info: bool = True) -> str: - lines = [] -+ drop_indented = False - for line in output.split('\n'): - if 'disk size' in line or 'actual-size' in line: - continue -+ -+ # Drop child node info -+ if drop_indented: -+ if line.startswith(' '): -+ continue -+ drop_indented = False -+ if drop_child_info and "Child node '/" in line: -+ drop_indented = True -+ continue -+ - line = line.replace(filename, 'TEST_IMG') - line = filter_testfiles(line) - line = line.replace(imgfmt, 'IMGFMT') --- -2.31.1 - diff --git a/kvm-kvm-Atomic-memslot-updates.patch b/kvm-kvm-Atomic-memslot-updates.patch deleted file mode 100644 index 14e9e32..0000000 --- a/kvm-kvm-Atomic-memslot-updates.patch +++ /dev/null @@ -1,286 +0,0 @@ -From e13fdc97ff05cdee46c112c2dee70b6ef33e7fa7 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Mon, 16 Jan 2023 07:17:31 -0500 -Subject: [PATCH 31/31] kvm: Atomic memslot updates - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 138: accel: introduce accelerator blocker API -RH-Bugzilla: 1979276 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 9f03181ebcad2474fbe859acbce7b9891caa216b (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 - -commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39 -Author: David Hildenbrand -Date: Fri Nov 11 10:47:58 2022 -0500 - - kvm: Atomic memslot updates - - If we update an existing memslot (e.g., resize, split), we temporarily - remove the memslot to re-add it immediately afterwards. These updates - are not atomic, especially not for KVM VCPU threads, such that we can - get spurious faults. - - Let's inhibit most KVM ioctls while performing relevant updates, such - that we can perform the update just as if it would happen atomically - without additional kernel support. - - We capture the add/del changes and apply them in the notifier commit - stage instead. There, we can check for overlaps and perform the ioctl - inhibiting only if really required (-> overlap). - - To keep things simple we don't perform additional checks that wouldn't - actually result in an overlap -- such as !RAM memory regions in some - cases (see kvm_set_phys_mem()). - - To minimize cache-line bouncing, use a separate indicator - (in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock - while performing both actions (removing+re-adding). - - We have to wait until all IOCTLs were exited and block new ones from - getting executed. - - This approach cannot result in a deadlock as long as the inhibitor does - not hold any locks that might hinder an IOCTL from getting finished and - exited - something fairly unusual. The inhibitor will always hold the BQL. - - AFAIKs, one possible candidate would be userfaultfd. If a page cannot be - placed (e.g., during postcopy), because we're waiting for a lock, or if the - userfaultfd thread cannot process a fault, because it is waiting for a - lock, there could be a deadlock. However, the BQL is not applicable here, - because any other guest memory access while holding the BQL would already - result in a deadlock. - - Nothing else in the kernel should block forever and wait for userspace - intervention. - - Note: pause_all_vcpus()/resume_all_vcpus() or - start_exclusive()/end_exclusive() cannot be used, as they either drop - the BQL or require to be called without the BQL - something inhibitors - cannot handle. We need a low-level locking mechanism that is - deadlock-free even when not releasing the BQL. - - Signed-off-by: David Hildenbrand - Signed-off-by: Emanuele Giuseppe Esposito - Tested-by: Emanuele Giuseppe Esposito - Message-Id: <20221111154758.1372674-4-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++----- - include/sysemu/kvm_int.h | 8 ++++ - 2 files changed, 98 insertions(+), 11 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index ff660fd469..39ed30ab59 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -31,6 +31,7 @@ - #include "sysemu/kvm_int.h" - #include "sysemu/runstate.h" - #include "sysemu/cpus.h" -+#include "sysemu/accel-blocker.h" - #include "qemu/bswap.h" - #include "exec/memory.h" - #include "exec/ram_addr.h" -@@ -46,6 +47,7 @@ - #include "sysemu/hw_accel.h" - #include "kvm-cpus.h" - #include "sysemu/dirtylimit.h" -+#include "qemu/range.h" - - #include "hw/boards.h" - #include "monitor/stats.h" -@@ -1292,6 +1294,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) - kvm_max_slot_size = max_slot_size; - } - -+/* Called with KVMMemoryListener.slots_lock held */ - static void kvm_set_phys_mem(KVMMemoryListener *kml, - MemoryRegionSection *section, bool add) - { -@@ -1326,14 +1329,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - ram = memory_region_get_ram_ptr(mr) + mr_offset; - ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset; - -- kvm_slots_lock(); -- - if (!add) { - do { - slot_size = MIN(kvm_max_slot_size, size); - mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); - if (!mem) { -- goto out; -+ return; - } - if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { - /* -@@ -1371,7 +1372,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - start_addr += slot_size; - size -= slot_size; - } while (size); -- goto out; -+ return; - } - - /* register the new slot */ -@@ -1396,9 +1397,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, - ram += slot_size; - size -= slot_size; - } while (size); -- --out: -- kvm_slots_unlock(); - } - - static void *kvm_dirty_ring_reaper_thread(void *data) -@@ -1455,18 +1453,95 @@ static void kvm_region_add(MemoryListener *listener, - MemoryRegionSection *section) - { - KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); -+ KVMMemoryUpdate *update; -+ -+ update = g_new0(KVMMemoryUpdate, 1); -+ update->section = *section; - -- memory_region_ref(section->mr); -- kvm_set_phys_mem(kml, section, true); -+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next); - } - - static void kvm_region_del(MemoryListener *listener, - MemoryRegionSection *section) - { - KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); -+ KVMMemoryUpdate *update; -+ -+ update = g_new0(KVMMemoryUpdate, 1); -+ update->section = *section; -+ -+ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next); -+} -+ -+static void kvm_region_commit(MemoryListener *listener) -+{ -+ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, -+ listener); -+ KVMMemoryUpdate *u1, *u2; -+ bool need_inhibit = false; -+ -+ if (QSIMPLEQ_EMPTY(&kml->transaction_add) && -+ QSIMPLEQ_EMPTY(&kml->transaction_del)) { -+ return; -+ } -+ -+ /* -+ * We have to be careful when regions to add overlap with ranges to remove. -+ * We have to simulate atomic KVM memslot updates by making sure no ioctl() -+ * is currently active. -+ * -+ * The lists are order by addresses, so it's easy to find overlaps. -+ */ -+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); -+ u2 = QSIMPLEQ_FIRST(&kml->transaction_add); -+ while (u1 && u2) { -+ Range r1, r2; -+ -+ range_init_nofail(&r1, u1->section.offset_within_address_space, -+ int128_get64(u1->section.size)); -+ range_init_nofail(&r2, u2->section.offset_within_address_space, -+ int128_get64(u2->section.size)); -+ -+ if (range_overlaps_range(&r1, &r2)) { -+ need_inhibit = true; -+ break; -+ } -+ if (range_lob(&r1) < range_lob(&r2)) { -+ u1 = QSIMPLEQ_NEXT(u1, next); -+ } else { -+ u2 = QSIMPLEQ_NEXT(u2, next); -+ } -+ } -+ -+ kvm_slots_lock(); -+ if (need_inhibit) { -+ accel_ioctl_inhibit_begin(); -+ } -+ -+ /* Remove all memslots before adding the new ones. */ -+ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) { -+ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); -+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next); - -- kvm_set_phys_mem(kml, section, false); -- memory_region_unref(section->mr); -+ kvm_set_phys_mem(kml, &u1->section, false); -+ memory_region_unref(u1->section.mr); -+ -+ g_free(u1); -+ } -+ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) { -+ u1 = QSIMPLEQ_FIRST(&kml->transaction_add); -+ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next); -+ -+ memory_region_ref(u1->section.mr); -+ kvm_set_phys_mem(kml, &u1->section, true); -+ -+ g_free(u1); -+ } -+ -+ if (need_inhibit) { -+ accel_ioctl_inhibit_end(); -+ } -+ kvm_slots_unlock(); - } - - static void kvm_log_sync(MemoryListener *listener, -@@ -1610,8 +1685,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, - kml->slots[i].slot = i; - } - -+ QSIMPLEQ_INIT(&kml->transaction_add); -+ QSIMPLEQ_INIT(&kml->transaction_del); -+ - kml->listener.region_add = kvm_region_add; - kml->listener.region_del = kvm_region_del; -+ kml->listener.commit = kvm_region_commit; - kml->listener.log_start = kvm_log_start; - kml->listener.log_stop = kvm_log_stop; - kml->listener.priority = 10; -diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h -index 3b4adcdc10..60b520a13e 100644 ---- a/include/sysemu/kvm_int.h -+++ b/include/sysemu/kvm_int.h -@@ -12,6 +12,7 @@ - #include "exec/memory.h" - #include "qapi/qapi-types-common.h" - #include "qemu/accel.h" -+#include "qemu/queue.h" - #include "sysemu/kvm.h" - - typedef struct KVMSlot -@@ -31,10 +32,17 @@ typedef struct KVMSlot - ram_addr_t ram_start_offset; - } KVMSlot; - -+typedef struct KVMMemoryUpdate { -+ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next; -+ MemoryRegionSection section; -+} KVMMemoryUpdate; -+ - typedef struct KVMMemoryListener { - MemoryListener listener; - KVMSlot *slots; - int as_id; -+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; -+ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; - } KVMMemoryListener; - - #define KVM_MSI_HASHTAB_SIZE 256 --- -2.31.1 - diff --git a/kvm-linux-headers-Update-to-v6.1.patch b/kvm-linux-headers-Update-to-v6.1.patch deleted file mode 100644 index 6ce9c7d..0000000 --- a/kvm-linux-headers-Update-to-v6.1.patch +++ /dev/null @@ -1,577 +0,0 @@ -From cbe35c6a4794107ea1ddecf0b381ba4b1c8799f5 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 7 Feb 2023 15:57:10 -0500 -Subject: [PATCH 3/8] linux-headers: Update to v6.1 - -RH-Author: Peter Xu -RH-MergeRequest: 149: Support /dev/userfaultfd -RH-Bugzilla: 2158704 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 15d97026e802a0f01b5f80f81fb4414dc69b2b2d (peterx/qemu-kvm) - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Acked-by: Cornelia Huck -Signed-off-by: Juan Quintela -(cherry picked from commit 93e0932b7be2498024cd6ba8446a0fa2cb1769bc) -Signed-off-by: Peter Xu ---- - include/standard-headers/drm/drm_fourcc.h | 34 ++++- - include/standard-headers/linux/ethtool.h | 63 +++++++- - include/standard-headers/linux/fuse.h | 6 +- - .../linux/input-event-codes.h | 1 + - include/standard-headers/linux/virtio_blk.h | 19 +++ - linux-headers/asm-generic/hugetlb_encode.h | 26 ++-- - linux-headers/asm-generic/mman-common.h | 2 + - linux-headers/asm-mips/mman.h | 2 + - linux-headers/asm-riscv/kvm.h | 4 + - linux-headers/linux/kvm.h | 1 + - linux-headers/linux/psci.h | 14 ++ - linux-headers/linux/userfaultfd.h | 4 + - linux-headers/linux/vfio.h | 142 ++++++++++++++++++ - 13 files changed, 298 insertions(+), 20 deletions(-) - -diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h -index 48b620cbef..b868488f93 100644 ---- a/include/standard-headers/drm/drm_fourcc.h -+++ b/include/standard-headers/drm/drm_fourcc.h -@@ -98,18 +98,42 @@ extern "C" { - #define DRM_FORMAT_INVALID 0 - - /* color index */ -+#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */ -+#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */ -+#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */ - #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ - --/* 8 bpp Red */ -+/* 1 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */ -+ -+/* 2 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */ -+ -+/* 4 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */ -+ -+/* 8 bpp Darkness (inverse relationship between channel value and brightness) */ -+#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */ -+ -+/* 1 bpp Red (direct relationship between channel value and brightness) */ -+#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */ -+ -+/* 2 bpp Red (direct relationship between channel value and brightness) */ -+#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */ -+ -+/* 4 bpp Red (direct relationship between channel value and brightness) */ -+#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */ -+ -+/* 8 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ - --/* 10 bpp Red */ -+/* 10 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ - --/* 12 bpp Red */ -+/* 12 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ - --/* 16 bpp Red */ -+/* 16 bpp Red (direct relationship between channel value and brightness) */ - #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ - - /* 16 bpp RG */ -@@ -204,7 +228,9 @@ extern "C" { - #define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ - - #define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ -+#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */ - #define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ -+#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */ - #define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */ - #define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */ - -diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h -index 4537da20cc..1dc56cdc0a 100644 ---- a/include/standard-headers/linux/ethtool.h -+++ b/include/standard-headers/linux/ethtool.h -@@ -736,6 +736,51 @@ enum ethtool_module_power_mode { - ETHTOOL_MODULE_POWER_MODE_HIGH, - }; - -+/** -+ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE -+ * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState -+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are -+ * unknown -+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled -+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled -+ */ -+enum ethtool_podl_pse_admin_state { -+ ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, -+ ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, -+ ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, -+}; -+ -+/** -+ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. -+ * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is -+ * asserted true when the PoDL PSE state diagram variable mr_pse_enable is -+ * false" -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is -+ * asserted true when either of the PSE state diagram variables -+ * pi_detecting or pi_classifying is true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” -+ * is asserted true when the PoDL PSE state diagram variable pi_powered is -+ * true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted -+ * true when the PoDL PSE state diagram variable pi_sleeping is true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true -+ * when the logical combination of the PoDL PSE state diagram variables -+ * pi_prebiased*!pi_sleeping is true." -+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted -+ * true when the PoDL PSE state diagram variable overload_held is true." -+ */ -+enum ethtool_podl_pse_pw_d_status { -+ ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, -+ ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, -+}; -+ - /** - * struct ethtool_gstrings - string set for data tagging - * @cmd: Command number = %ETHTOOL_GSTRINGS -@@ -1840,6 +1885,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex) - #define MASTER_SLAVE_STATE_SLAVE 3 - #define MASTER_SLAVE_STATE_ERR 4 - -+/* These are used to throttle the rate of data on the phy interface when the -+ * native speed of the interface is higher than the link speed. These should -+ * not be used for phy interfaces which natively support multiple speeds (e.g. -+ * MII or SGMII). -+ */ -+/* No rate matching performed. */ -+#define RATE_MATCH_NONE 0 -+/* The phy sends pause frames to throttle the MAC. */ -+#define RATE_MATCH_PAUSE 1 -+/* The phy asserts CRS to prevent the MAC from transmitting. */ -+#define RATE_MATCH_CRS 2 -+/* The MAC is programmed with a sufficiently-large IPG. */ -+#define RATE_MATCH_OPEN_LOOP 3 -+ - /* Which connector port. */ - #define PORT_TP 0x00 - #define PORT_AUI 0x01 -@@ -2033,8 +2092,8 @@ enum ethtool_reset_flags { - * reported consistently by PHYLIB. Read-only. - * @master_slave_cfg: Master/slave port mode. - * @master_slave_state: Master/slave port state. -+ * @rate_matching: Rate adaptation performed by the PHY - * @reserved: Reserved for future use; see the note on reserved space. -- * @reserved1: Reserved for future use; see the note on reserved space. - * @link_mode_masks: Variable length bitmaps. - * - * If autonegotiation is disabled, the speed and @duplex represent the -@@ -2085,7 +2144,7 @@ struct ethtool_link_settings { - uint8_t transceiver; - uint8_t master_slave_cfg; - uint8_t master_slave_state; -- uint8_t reserved1[1]; -+ uint8_t rate_matching; - uint32_t reserved[7]; - uint32_t link_mode_masks[]; - /* layout of link_mode_masks fields: -diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h -index bda06258be..713d259768 100644 ---- a/include/standard-headers/linux/fuse.h -+++ b/include/standard-headers/linux/fuse.h -@@ -194,6 +194,9 @@ - * - add FUSE_SECURITY_CTX init flag - * - add security context to create, mkdir, symlink, and mknod requests - * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX -+ * -+ * 7.37 -+ * - add FUSE_TMPFILE - */ - - #ifndef _LINUX_FUSE_H -@@ -225,7 +228,7 @@ - #define FUSE_KERNEL_VERSION 7 - - /** Minor version number of this interface */ --#define FUSE_KERNEL_MINOR_VERSION 36 -+#define FUSE_KERNEL_MINOR_VERSION 37 - - /** The node ID of the root inode */ - #define FUSE_ROOT_ID 1 -@@ -533,6 +536,7 @@ enum fuse_opcode { - FUSE_SETUPMAPPING = 48, - FUSE_REMOVEMAPPING = 49, - FUSE_SYNCFS = 50, -+ FUSE_TMPFILE = 51, - - /* CUSE specific operations */ - CUSE_INIT = 4096, -diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h -index 50790aee5a..815f7a1dff 100644 ---- a/include/standard-headers/linux/input-event-codes.h -+++ b/include/standard-headers/linux/input-event-codes.h -@@ -862,6 +862,7 @@ - #define ABS_TOOL_WIDTH 0x1c - - #define ABS_VOLUME 0x20 -+#define ABS_PROFILE 0x21 - - #define ABS_MISC 0x28 - -diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h -index 2dcc90826a..e81715cd70 100644 ---- a/include/standard-headers/linux/virtio_blk.h -+++ b/include/standard-headers/linux/virtio_blk.h -@@ -40,6 +40,7 @@ - #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ - #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ - #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ -+#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ - - /* Legacy feature bits */ - #ifndef VIRTIO_BLK_NO_LEGACY -@@ -119,6 +120,21 @@ struct virtio_blk_config { - uint8_t write_zeroes_may_unmap; - - uint8_t unused1[3]; -+ -+ /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ -+ /* -+ * The maximum secure erase sectors (in 512-byte sectors) for -+ * one segment. -+ */ -+ __virtio32 max_secure_erase_sectors; -+ /* -+ * The maximum number of secure erase segments in a -+ * secure erase command. -+ */ -+ __virtio32 max_secure_erase_seg; -+ /* Secure erase commands must be aligned to this number of sectors. */ -+ __virtio32 secure_erase_sector_alignment; -+ - } QEMU_PACKED; - - /* -@@ -153,6 +169,9 @@ struct virtio_blk_config { - /* Write zeroes command */ - #define VIRTIO_BLK_T_WRITE_ZEROES 13 - -+/* Secure erase command */ -+#define VIRTIO_BLK_T_SECURE_ERASE 14 -+ - #ifndef VIRTIO_BLK_NO_LEGACY - /* Barrier before this op. */ - #define VIRTIO_BLK_T_BARRIER 0x80000000 -diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h -index 4f3d5aaa11..de687009bf 100644 ---- a/linux-headers/asm-generic/hugetlb_encode.h -+++ b/linux-headers/asm-generic/hugetlb_encode.h -@@ -20,18 +20,18 @@ - #define HUGETLB_FLAG_ENCODE_SHIFT 26 - #define HUGETLB_FLAG_ENCODE_MASK 0x3f - --#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) --#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT) -+#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT) - - #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ -diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h -index 6c1aa92a92..6ce1f1ceb4 100644 ---- a/linux-headers/asm-generic/mman-common.h -+++ b/linux-headers/asm-generic/mman-common.h -@@ -77,6 +77,8 @@ - - #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ - -+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ -+ - /* compatibility flags */ - #define MAP_FILE 0 - -diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h -index 1be428663c..c6e1fc77c9 100644 ---- a/linux-headers/asm-mips/mman.h -+++ b/linux-headers/asm-mips/mman.h -@@ -103,6 +103,8 @@ - - #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ - -+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ -+ - /* compatibility flags */ - #define MAP_FILE 0 - -diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h -index 7351417afd..8985ff234c 100644 ---- a/linux-headers/asm-riscv/kvm.h -+++ b/linux-headers/asm-riscv/kvm.h -@@ -48,6 +48,7 @@ struct kvm_sregs { - /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ - struct kvm_riscv_config { - unsigned long isa; -+ unsigned long zicbom_block_size; - }; - - /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ -@@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID { - KVM_RISCV_ISA_EXT_M, - KVM_RISCV_ISA_EXT_SVPBMT, - KVM_RISCV_ISA_EXT_SSTC, -+ KVM_RISCV_ISA_EXT_SVINVAL, -+ KVM_RISCV_ISA_EXT_ZIHINTPAUSE, -+ KVM_RISCV_ISA_EXT_ZICBOM, - KVM_RISCV_ISA_EXT_MAX, - }; - -diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index ebdafa576d..b2783c5202 100644 ---- a/linux-headers/linux/kvm.h -+++ b/linux-headers/linux/kvm.h -@@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt { - #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 - #define KVM_CAP_S390_ZPCI_OP 221 - #define KVM_CAP_S390_CPU_TOPOLOGY 222 -+#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 - - #ifdef KVM_CAP_IRQ_ROUTING - -diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h -index 213b2a0f70..e60dfd8907 100644 ---- a/linux-headers/linux/psci.h -+++ b/linux-headers/linux/psci.h -@@ -48,12 +48,26 @@ - #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) - - #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) -+#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11) -+#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12) -+#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13) - #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) - #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) -+#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16) -+#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17) -+ - #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) -+#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) -+#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) - -+#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) -+#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) - #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) -+#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16) -+#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) -+ - #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) -+#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) - - /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ - #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff -diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h -index a3a377cd44..ba5d0df52f 100644 ---- a/linux-headers/linux/userfaultfd.h -+++ b/linux-headers/linux/userfaultfd.h -@@ -12,6 +12,10 @@ - - #include - -+/* ioctls for /dev/userfaultfd */ -+#define USERFAULTFD_IOC 0xAA -+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) -+ - /* - * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and - * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index ede44b5572..bee7e42198 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -986,6 +986,148 @@ enum vfio_device_mig_state { - VFIO_DEVICE_STATE_RUNNING_P2P = 5, - }; - -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power -+ * state with the platform-based power management. Device use of lower power -+ * states depends on factors managed by the runtime power management core, -+ * including system level support and coordinating support among dependent -+ * devices. Enabling device low power entry does not guarantee lower power -+ * usage by the device, nor is a mechanism provided through this feature to -+ * know the current power state of the device. If any device access happens -+ * (either from the host or through the vfio uAPI) when the device is in the -+ * low power state, then the host will move the device out of the low power -+ * state as necessary prior to the access. Once the access is completed, the -+ * device may re-enter the low power state. For single shot low power support -+ * with wake-up notification, see -+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd -+ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after -+ * calling LOW_POWER_EXIT. -+ */ -+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 -+ -+/* -+ * This device feature has the same behavior as -+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user -+ * provides an eventfd for wake-up notification. When the device moves out of -+ * the low power state for the wake-up, the host will not allow the device to -+ * re-enter a low power state without a subsequent user call to one of the low -+ * power entry device feature IOCTLs. Access to mmap'd device regions is -+ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the -+ * low power exit. The low power exit can happen either through LOW_POWER_EXIT -+ * or through any other access (where the wake-up notification has been -+ * generated). The access to mmap'd device regions will not trigger low power -+ * exit. -+ * -+ * The notification through the provided eventfd will be generated only when -+ * the device has entered and is resumed from a low power state after -+ * calling this device feature IOCTL. A device that has not entered low power -+ * state, as managed through the runtime power management core, will not -+ * generate a notification through the provided eventfd on access. Calling the -+ * LOW_POWER_EXIT feature is optional in the case where notification has been -+ * signaled on the provided eventfd that a resume from low power has occurred. -+ */ -+struct vfio_device_low_power_entry_with_wakeup { -+ __s32 wakeup_eventfd; -+ __u32 reserved; -+}; -+ -+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as -+ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or -+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. -+ * This device feature IOCTL may itself generate a wakeup eventfd notification -+ * in the latter case if the device had previously entered a low power state. -+ */ -+#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. -+ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports -+ * DMA logging. -+ * -+ * DMA logging allows a device to internally record what DMAs the device is -+ * initiating and report them back to userspace. It is part of the VFIO -+ * migration infrastructure that allows implementing dirty page tracking -+ * during the pre copy phase of live migration. Only DMA WRITEs are logged, -+ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. -+ * -+ * When DMA logging is started a range of IOVAs to monitor is provided and the -+ * device can optimize its logging to cover only the IOVA range given. Each -+ * DMA that the device initiates inside the range will be logged by the device -+ * for later retrieval. -+ * -+ * page_size is an input that hints what tracking granularity the device -+ * should try to achieve. If the device cannot do the hinted page size then -+ * it's the driver choice which page size to pick based on its support. -+ * On output the device will return the page size it selected. -+ * -+ * ranges is a pointer to an array of -+ * struct vfio_device_feature_dma_logging_range. -+ * -+ * The core kernel code guarantees to support by minimum num_ranges that fit -+ * into a single kernel page. User space can try higher values but should give -+ * up if the above can't be achieved as of some driver limitations. -+ * -+ * A single call to start device DMA logging can be issued and a matching stop -+ * should follow at the end. Another start is not allowed in the meantime. -+ */ -+struct vfio_device_feature_dma_logging_control { -+ __aligned_u64 page_size; -+ __u32 num_ranges; -+ __u32 __reserved; -+ __aligned_u64 ranges; -+}; -+ -+struct vfio_device_feature_dma_logging_range { -+ __aligned_u64 iova; -+ __aligned_u64 length; -+}; -+ -+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started -+ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START -+ */ -+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 -+ -+/* -+ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log -+ * -+ * Query the device's DMA log for written pages within the given IOVA range. -+ * During querying the log is cleared for the IOVA range. -+ * -+ * bitmap is a pointer to an array of u64s that will hold the output bitmap -+ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits -+ * is given by: -+ * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) -+ * -+ * The input page_size can be any power of two value and does not have to -+ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver -+ * will format its internal logging to match the reporting page size, possibly -+ * by replicating bits if the internal page size is lower than requested. -+ * -+ * The LOGGING_REPORT will only set bits in the bitmap and never clear or -+ * perform any initialization of the user provided bitmap. -+ * -+ * If any error is returned userspace should assume that the dirty log is -+ * corrupted. Error recovery is to consider all memory dirty and try to -+ * restart the dirty tracking, or to abort/restart the whole migration. -+ * -+ * If DMA logging is not enabled, an error will be returned. -+ * -+ */ -+struct vfio_device_feature_dma_logging_report { -+ __aligned_u64 iova; -+ __aligned_u64 length; -+ __aligned_u64 page_size; -+ __aligned_u64 bitmap; -+}; -+ -+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 -+ - /* -------- API for Type1 VFIO IOMMU -------- */ - - /** --- -2.31.1 - diff --git a/kvm-migration-check-magic-value-for-deciding-the-mapping.patch b/kvm-migration-check-magic-value-for-deciding-the-mapping.patch deleted file mode 100644 index 387d0b3..0000000 --- a/kvm-migration-check-magic-value-for-deciding-the-mapping.patch +++ /dev/null @@ -1,330 +0,0 @@ -From 29eee1fbb84c0e2f0ece9e6d996afa7238ed2912 Mon Sep 17 00:00:00 2001 -From: "manish.mishra" -Date: Tue, 20 Dec 2022 18:44:18 +0000 -Subject: [PATCH 7/8] migration: check magic value for deciding the mapping of - channels -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders -RH-Bugzilla: 2169732 -RH-Acked-by: quintela1 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Dr. David Alan Gilbert -RH-Commit: [2/2] 4fb9408478923415a91fe0527bf4b1a0f022f329 (peterx/qemu-kvm) - -Current logic assumes that channel connections on the destination side are -always established in the same order as the source and the first one will -always be the main channel followed by the multifid or post-copy -preemption channel. This may not be always true, as even if a channel has a -connection established on the source side it can be in the pending state on -the destination side and a newer connection can be established first. -Basically causing out of order mapping of channels on the destination side. -Currently, all channels except post-copy preempt send a magic number, this -patch uses that magic number to decide the type of channel. This logic is -applicable only for precopy(multifd) live migration, as mentioned, the -post-copy preempt channel does not send any magic number. Also, tls live -migrations already does tls handshake before creating other channels, so -this issue is not possible with tls, hence this logic is avoided for tls -live migrations. This patch uses read peek to check the magic number of -channels so that current data/control stream management remains -un-effected. - -Reviewed-by: Peter Xu -Reviewed-by: Daniel P. Berrange -Reviewed-by: Juan Quintela -Suggested-by: Daniel P. Berrange -Signed-off-by: manish.mishra -Signed-off-by: Juan Quintela -(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2) -Signed-off-by: Peter Xu ---- - migration/channel.c | 45 +++++++++++++++++++++++++++++++++ - migration/channel.h | 5 ++++ - migration/migration.c | 54 ++++++++++++++++++++++++++++------------ - migration/multifd.c | 19 +++++++------- - migration/multifd.h | 2 +- - migration/postcopy-ram.c | 5 +--- - migration/postcopy-ram.h | 2 +- - 7 files changed, 101 insertions(+), 31 deletions(-) - -diff --git a/migration/channel.c b/migration/channel.c -index 1b0815039f..ca3319a309 100644 ---- a/migration/channel.c -+++ b/migration/channel.c -@@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s, - migrate_fd_connect(s, error); - error_free(error); - } -+ -+ -+/** -+ * @migration_channel_read_peek - Peek at migration channel, without -+ * actually removing it from channel buffer. -+ * -+ * @ioc: the channel object -+ * @buf: the memory region to read data into -+ * @buflen: the number of bytes to read in @buf -+ * @errp: pointer to a NULL-initialized error object -+ * -+ * Returns 0 if successful, returns -1 and sets @errp if fails. -+ */ -+int migration_channel_read_peek(QIOChannel *ioc, -+ const char *buf, -+ const size_t buflen, -+ Error **errp) -+{ -+ ssize_t len = 0; -+ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; -+ -+ while (true) { -+ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, -+ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); -+ -+ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { -+ error_setg(errp, -+ "Failed to peek at channel"); -+ return -1; -+ } -+ -+ if (len == buflen) { -+ break; -+ } -+ -+ /* 1ms sleep. */ -+ if (qemu_in_coroutine()) { -+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); -+ } else { -+ g_usleep(1000); -+ } -+ } -+ -+ return 0; -+} -diff --git a/migration/channel.h b/migration/channel.h -index 67a461c28a..5bdb8208a7 100644 ---- a/migration/channel.h -+++ b/migration/channel.h -@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s, - QIOChannel *ioc, - const char *hostname, - Error *error_in); -+ -+int migration_channel_read_peek(QIOChannel *ioc, -+ const char *buf, -+ const size_t buflen, -+ Error **errp); - #endif -diff --git a/migration/migration.c b/migration/migration.c -index f485eea5fb..593dbd25de 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -31,6 +31,7 @@ - #include "migration.h" - #include "savevm.h" - #include "qemu-file.h" -+#include "channel.h" - #include "migration/vmstate.h" - #include "block/block.h" - #include "qapi/error.h" -@@ -663,10 +664,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - -- if (multifd_load_setup(errp) != 0) { -- return false; -- } -- - if (!mis->from_src_file) { - mis->from_src_file = f; - } -@@ -733,31 +730,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - Error *local_err = NULL; -- bool start_migration; - QEMUFile *f; -+ bool default_channel = true; -+ uint32_t channel_magic = 0; -+ int ret = 0; - -- if (!mis->from_src_file) { -- /* The first connection (multifd may have multiple) */ -+ if (migrate_use_multifd() && !migrate_postcopy_ram() && -+ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { -+ /* -+ * With multiple channels, it is possible that we receive channels -+ * out of order on destination side, causing incorrect mapping of -+ * source channels on destination side. Check channel MAGIC to -+ * decide type of channel. Please note this is best effort, postcopy -+ * preempt channel does not send any magic number so avoid it for -+ * postcopy live migration. Also tls live migration already does -+ * tls handshake while initializing main channel so with tls this -+ * issue is not possible. -+ */ -+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, -+ sizeof(channel_magic), &local_err); -+ -+ if (ret != 0) { -+ error_propagate(errp, local_err); -+ return; -+ } -+ -+ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); -+ } else { -+ default_channel = !mis->from_src_file; -+ } -+ -+ if (multifd_load_setup(errp) != 0) { -+ error_setg(errp, "Failed to setup multifd channels"); -+ return; -+ } -+ -+ if (default_channel) { - f = qemu_file_new_input(ioc); - - if (!migration_incoming_setup(f, errp)) { - return; - } -- -- /* -- * Common migration only needs one channel, so we can start -- * right now. Some features need more than one channel, we wait. -- */ -- start_migration = !migration_needs_multiple_sockets(); - } else { - /* Multiple connections */ - assert(migration_needs_multiple_sockets()); - if (migrate_use_multifd()) { -- start_migration = multifd_recv_new_channel(ioc, &local_err); -+ multifd_recv_new_channel(ioc, &local_err); - } else { - assert(migrate_postcopy_preempt()); - f = qemu_file_new_input(ioc); -- start_migration = postcopy_preempt_new_channel(mis, f); -+ postcopy_preempt_new_channel(mis, f); - } - if (local_err) { - error_propagate(errp, local_err); -@@ -765,7 +787,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - } - } - -- if (start_migration) { -+ if (migration_has_all_channels()) { - /* If it's a recovery, we're done */ - if (postcopy_try_recover()) { - return; -diff --git a/migration/multifd.c b/migration/multifd.c -index 509bbbe3bf..c3385529cf 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -1167,9 +1167,14 @@ int multifd_load_setup(Error **errp) - uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); - uint8_t i; - -- if (!migrate_use_multifd()) { -+ /* -+ * Return successfully if multiFD recv state is already initialised -+ * or multiFD is not enabled. -+ */ -+ if (multifd_recv_state || !migrate_use_multifd()) { - return 0; - } -+ - if (!migrate_multi_channels_is_allowed()) { - error_setg(errp, "multifd is not supported by current protocol"); - return -1; -@@ -1228,11 +1233,9 @@ bool multifd_recv_all_channels_created(void) - - /* - * Try to receive all multifd channels to get ready for the migration. -- * - Return true and do not set @errp when correctly receiving all channels; -- * - Return false and do not set @errp when correctly receiving the current one; -- * - Return false and set @errp when failing to receive the current channel. -+ * Sets @errp when failing to receive the current channel. - */ --bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) -+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - { - MultiFDRecvParams *p; - Error *local_err = NULL; -@@ -1245,7 +1248,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - "failed to receive packet" - " via multifd channel %d: ", - qatomic_read(&multifd_recv_state->count)); -- return false; -+ return; - } - trace_multifd_recv_new_channel(id); - -@@ -1255,7 +1258,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - id); - multifd_recv_terminate_threads(local_err); - error_propagate(errp, local_err); -- return false; -+ return; - } - p->c = ioc; - object_ref(OBJECT(ioc)); -@@ -1266,6 +1269,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) - qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, - QEMU_THREAD_JOINABLE); - qatomic_inc(&multifd_recv_state->count); -- return qatomic_read(&multifd_recv_state->count) == -- migrate_multifd_channels(); - } -diff --git a/migration/multifd.h b/migration/multifd.h -index 519f498643..913e4ba274 100644 ---- a/migration/multifd.h -+++ b/migration/multifd.h -@@ -18,7 +18,7 @@ void multifd_save_cleanup(void); - int multifd_load_setup(Error **errp); - int multifd_load_cleanup(Error **errp); - bool multifd_recv_all_channels_created(void); --bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); -+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); - void multifd_recv_sync_main(void); - int multifd_send_sync_main(QEMUFile *f); - int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index 0c55df0e52..b98e95dab0 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -1538,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) - } - } - --bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) -+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) - { - /* - * The new loading channel has its own threads, so it needs to be -@@ -1547,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) - qemu_file_set_blocking(file, true); - mis->postcopy_qemufile_dst = file; - trace_postcopy_preempt_new_channel(); -- -- /* Start the migration immediately */ -- return true; - } - - /* -diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h -index 6147bf7d1d..25881c4127 100644 ---- a/migration/postcopy-ram.h -+++ b/migration/postcopy-ram.h -@@ -190,7 +190,7 @@ enum PostcopyChannels { - RAM_CHANNEL_MAX, - }; - --bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); -+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); - int postcopy_preempt_setup(MigrationState *s, Error **errp); - int postcopy_preempt_wait_channel(MigrationState *s); - --- -2.31.1 - diff --git a/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch b/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch deleted file mode 100644 index 707c80f..0000000 --- a/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch +++ /dev/null @@ -1,325 +0,0 @@ -From e5834364958a3914d7b8b46b985a1b054728b466 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Thu, 19 Jan 2023 11:16:45 +0100 -Subject: [PATCH 2/8] net: stream: add a new option to automatically reconnect -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect -RH-Bugzilla: 2169232 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Cindy Lu -RH-Acked-by: MST -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [2/2] 9b87647a9ed2e7c1b91bdfa9d0a736e091c892a5 (lvivier/qemu-kvm-centos) - -In stream mode, if the server shuts down there is currently -no way to reconnect the client to a new server without removing -the NIC device and the netdev backend (or to reboot). - -This patch introduces a reconnect option that specifies a delay -to try to reconnect with the same parameters. - -Add a new test in qtest to test the reconnect option and the -connect/disconnect events. - -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit b95c0d4440950fba6dbef0f781962911fa42abdb) ---- - net/stream.c | 53 ++++++++++++++++++- - qapi/net.json | 7 ++- - qemu-options.hx | 6 +-- - tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++ - 4 files changed, 162 insertions(+), 5 deletions(-) - -diff --git a/net/stream.c b/net/stream.c -index 37ff727e0c..9204b4c96e 100644 ---- a/net/stream.c -+++ b/net/stream.c -@@ -39,6 +39,8 @@ - #include "io/channel-socket.h" - #include "io/net-listener.h" - #include "qapi/qapi-events-net.h" -+#include "qapi/qapi-visit-sockets.h" -+#include "qapi/clone-visitor.h" - - typedef struct NetStreamState { - NetClientState nc; -@@ -49,11 +51,15 @@ typedef struct NetStreamState { - guint ioc_write_tag; - SocketReadState rs; - unsigned int send_index; /* number of bytes sent*/ -+ uint32_t reconnect; -+ guint timer_tag; -+ SocketAddress *addr; - } NetStreamState; - - static void net_stream_listen(QIONetListener *listener, - QIOChannelSocket *cioc, - void *opaque); -+static void net_stream_arm_reconnect(NetStreamState *s); - - static gboolean net_stream_writable(QIOChannel *ioc, - GIOCondition condition, -@@ -170,6 +176,7 @@ static gboolean net_stream_send(QIOChannel *ioc, - qemu_set_info_str(&s->nc, "%s", ""); - - qapi_event_send_netdev_stream_disconnected(s->nc.name); -+ net_stream_arm_reconnect(s); - - return G_SOURCE_REMOVE; - } -@@ -187,6 +194,14 @@ static gboolean net_stream_send(QIOChannel *ioc, - static void net_stream_cleanup(NetClientState *nc) - { - NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); -+ if (s->timer_tag) { -+ g_source_remove(s->timer_tag); -+ s->timer_tag = 0; -+ } -+ if (s->addr) { -+ qapi_free_SocketAddress(s->addr); -+ s->addr = NULL; -+ } - if (s->ioc) { - if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { - if (s->ioc_read_tag) { -@@ -346,12 +361,37 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque) - error: - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; -+ net_stream_arm_reconnect(s); -+} -+ -+static gboolean net_stream_reconnect(gpointer data) -+{ -+ NetStreamState *s = data; -+ QIOChannelSocket *sioc; -+ -+ s->timer_tag = 0; -+ -+ sioc = qio_channel_socket_new(); -+ s->ioc = QIO_CHANNEL(sioc); -+ qio_channel_socket_connect_async(sioc, s->addr, -+ net_stream_client_connected, s, -+ NULL, NULL); -+ return G_SOURCE_REMOVE; -+} -+ -+static void net_stream_arm_reconnect(NetStreamState *s) -+{ -+ if (s->reconnect && s->timer_tag == 0) { -+ s->timer_tag = g_timeout_add_seconds(s->reconnect, -+ net_stream_reconnect, s); -+ } - } - - static int net_stream_client_init(NetClientState *peer, - const char *model, - const char *name, - SocketAddress *addr, -+ uint32_t reconnect, - Error **errp) - { - NetStreamState *s; -@@ -364,6 +404,10 @@ static int net_stream_client_init(NetClientState *peer, - s->ioc = QIO_CHANNEL(sioc); - s->nc.link_down = true; - -+ s->reconnect = reconnect; -+ if (reconnect) { -+ s->addr = QAPI_CLONE(SocketAddress, addr); -+ } - qio_channel_socket_connect_async(sioc, addr, - net_stream_client_connected, s, - NULL, NULL); -@@ -380,7 +424,14 @@ int net_init_stream(const Netdev *netdev, const char *name, - sock = &netdev->u.stream; - - if (!sock->has_server || !sock->server) { -- return net_stream_client_init(peer, "stream", name, sock->addr, errp); -+ return net_stream_client_init(peer, "stream", name, sock->addr, -+ sock->has_reconnect ? sock->reconnect : 0, -+ errp); -+ } -+ if (sock->has_reconnect) { -+ error_setg(errp, "'reconnect' option is incompatible with " -+ "socket in server mode"); -+ return -1; - } - return net_stream_server_init(peer, "stream", name, sock->addr, errp); - } -diff --git a/qapi/net.json b/qapi/net.json -index 522ac582ed..d6eb30008b 100644 ---- a/qapi/net.json -+++ b/qapi/net.json -@@ -585,6 +585,10 @@ - # @addr: socket address to listen on (server=true) - # or connect to (server=false) - # @server: create server socket (default: false) -+# @reconnect: For a client socket, if a socket is disconnected, -+# then attempt a reconnect after the given number of seconds. -+# Setting this to zero disables this function. (default: 0) -+# (since 8.0) - # - # Only SocketAddress types 'unix', 'inet' and 'fd' are supported. - # -@@ -593,7 +597,8 @@ - { 'struct': 'NetdevStreamOptions', - 'data': { - 'addr': 'SocketAddress', -- '*server': 'bool' } } -+ '*server': 'bool', -+ '*reconnect': 'uint32' } } - - ## - # @NetdevDgramOptions: -diff --git a/qemu-options.hx b/qemu-options.hx -index ea02ca3a45..48eef4aa2c 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -2766,9 +2766,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, - "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" - " configure a network backend to connect to another network\n" - " using an UDP tunnel\n" -- "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n" -- "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n" -- "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" -+ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n" -+ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n" -+ "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n" - " configure a network backend to connect to another network\n" - " using a socket connection in stream mode.\n" - "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" -diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c -index 6ba256e173..acc32c378b 100644 ---- a/tests/qtest/netdev-socket.c -+++ b/tests/qtest/netdev-socket.c -@@ -11,6 +11,10 @@ - #include - #include "../unit/socket-helpers.h" - #include "libqtest.h" -+#include "qapi/qmp/qstring.h" -+#include "qemu/sockets.h" -+#include "qapi/qobject-input-visitor.h" -+#include "qapi/qapi-visit-sockets.h" - - #define CONNECTION_TIMEOUT 5 - -@@ -142,6 +146,101 @@ static void test_stream_inet_ipv4(void) - qtest_quit(qts0); - } - -+static void wait_stream_connected(QTestState *qts, const char *id, -+ SocketAddress **addr) -+{ -+ QDict *resp, *data; -+ QString *qstr; -+ QObject *obj; -+ Visitor *v = NULL; -+ -+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED"); -+ g_assert_nonnull(resp); -+ data = qdict_get_qdict(resp, "data"); -+ g_assert_nonnull(data); -+ -+ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); -+ g_assert_nonnull(data); -+ -+ g_assert(!strcmp(qstring_get_str(qstr), id)); -+ -+ obj = qdict_get(data, "addr"); -+ -+ v = qobject_input_visitor_new(obj); -+ visit_type_SocketAddress(v, NULL, addr, NULL); -+ visit_free(v); -+ qobject_unref(resp); -+} -+ -+static void wait_stream_disconnected(QTestState *qts, const char *id) -+{ -+ QDict *resp, *data; -+ QString *qstr; -+ -+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED"); -+ g_assert_nonnull(resp); -+ data = qdict_get_qdict(resp, "data"); -+ g_assert_nonnull(data); -+ -+ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); -+ g_assert_nonnull(data); -+ -+ g_assert(!strcmp(qstring_get_str(qstr), id)); -+ qobject_unref(resp); -+} -+ -+static void test_stream_inet_reconnect(void) -+{ -+ QTestState *qts0, *qts1; -+ int port; -+ SocketAddress *addr; -+ -+ port = inet_get_free_port(false); -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,server=false,id=st0,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off,reconnect=1," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ wait_stream_connected(qts0, "st0", &addr); -+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); -+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); -+ qapi_free_SocketAddress(addr); -+ -+ /* kill server */ -+ qtest_quit(qts0); -+ -+ /* check client has been disconnected */ -+ wait_stream_disconnected(qts1, "st0"); -+ -+ /* restart server */ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ /* wait connection events*/ -+ wait_stream_connected(qts0, "st0", &addr); -+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); -+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); -+ qapi_free_SocketAddress(addr); -+ -+ wait_stream_connected(qts1, "st0", &addr); -+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); -+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); -+ g_assert_cmpint(atoi(addr->u.inet.port), ==, port); -+ qapi_free_SocketAddress(addr); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ - static void test_stream_inet_ipv6(void) - { - QTestState *qts0, *qts1; -@@ -418,6 +517,8 @@ int main(int argc, char **argv) - #ifndef _WIN32 - qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); - #endif -+ qtest_add_func("/netdev/stream/inet/reconnect", -+ test_stream_inet_reconnect); - } - if (has_ipv6) { - qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); --- -2.31.1 - diff --git a/kvm-physmem-add-missing-memory-barrier.patch b/kvm-physmem-add-missing-memory-barrier.patch deleted file mode 100644 index 3eafa78..0000000 --- a/kvm-physmem-add-missing-memory-barrier.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 0dd4be411e35f00d006d89a15d9161f5d8783c1d Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 10/12] physmem: add missing memory barrier - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [7/9] ee4875cb8c564f0510e48b00a5d95c0e6ea6301b (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 33828ca11da08436e1b32f3e79dabce3061a0427 -Author: Paolo Bonzini -Date: Fri Mar 3 14:36:32 2023 +0100 - - physmem: add missing memory barrier - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - softmmu/physmem.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/softmmu/physmem.c b/softmmu/physmem.c -index 1b606a3002..772c9896cd 100644 ---- a/softmmu/physmem.c -+++ b/softmmu/physmem.c -@@ -3117,6 +3117,8 @@ void cpu_register_map_client(QEMUBH *bh) - qemu_mutex_lock(&map_client_list_lock); - client->bh = bh; - QLIST_INSERT_HEAD(&map_client_list, client, link); -+ /* Write map_client_list before reading in_use. */ -+ smp_mb(); - if (!qatomic_read(&bounce.in_use)) { - cpu_notify_map_clients_locked(); - } -@@ -3309,6 +3311,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, - qemu_vfree(bounce.buffer); - bounce.buffer = NULL; - memory_region_unref(bounce.mr); -+ /* Clear in_use before reading map_client_list. */ - qatomic_mb_set(&bounce.in_use, false); - cpu_notify_map_clients(); - } --- -2.39.1 - diff --git a/kvm-qatomic-add-smp_mb__before-after_rmw.patch b/kvm-qatomic-add-smp_mb__before-after_rmw.patch deleted file mode 100644 index acc8c7d..0000000 --- a/kvm-qatomic-add-smp_mb__before-after_rmw.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 1fdc864f9ac927f3ea407f35f6771a4b2e8f509f Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 04/12] qatomic: add smp_mb__before/after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [1/9] e8d0b64670bff778d275b1fb477dcee0c109251a (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176 -Author: Paolo Bonzini -Date: Thu Mar 2 11:10:56 2023 +0100 - - qatomic: add smp_mb__before/after_rmw() - - On ARM, seqcst loads and stores (which QEMU does not use) are compiled - respectively as LDAR and STLR instructions. Even though LDAR is - also used for load-acquire operations, it also waits for all STLRs to - leave the store buffer. Thus, LDAR and STLR alone are load-acquire - and store-release operations, but LDAR also provides store-against-load - ordering as long as the previous store is a STLR. - - Compare this to ARMv7, where store-release is DMB+STR and load-acquire - is LDR+DMB, but an additional DMB is needed between store-seqcst and - load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides - load-acquire and store-release semantics and the two can be reordered. - - Likewise, on ARM sequentially consistent read-modify-write operations only - need to use LDAXR and STLXR respectively for the load and the store, while - on x86 they need to use the stronger LOCK prefix. - - In a strange twist of events, however, the _stronger_ semantics - of the ARM instructions can end up causing bugs on ARM, not on x86. - The problems occur when seqcst atomics are mixed with relaxed atomics. - - QEMU's atomics try to bridge the Linux API (that most of the developers - are familiar with) and the C11 API, and the two have a substantial - difference: - - - in Linux, strongly-ordered atomics such as atomic_add_return() affect - the global ordering of _all_ memory operations, including for example - READ_ONCE()/WRITE_ONCE() - - - in C11, sequentially consistent atomics (except for seq-cst fences) - only affect the ordering of sequentially consistent operations. - In particular, since relaxed loads are done with LDR on ARM, they are - not ordered against seqcst stores (which are done with STLR). - - QEMU implements high-level synchronization primitives with the idea that - the primitives contain the necessary memory barriers, and the callers can - use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses. - This is very much incompatible with the C11 view that seqcst accesses - are only ordered against other seqcst accesses, and requires using seqcst - fences as in the following example: - - qatomic_set(&y, 1); qatomic_set(&x, 1); - smp_mb(); smp_mb(); - ... qatomic_read(&x) ... ... qatomic_read(&y) ... - - When a qatomic_*() read-modify write operation is used instead of one - or both stores, developers that are more familiar with the Linux API may - be tempted to omit the smp_mb(), which will work on x86 but not on ARM. - - This nasty difference between Linux and C11 read-modify-write operations - has already caused issues in util/async.c and more are being found. - Provide something similar to Linux smp_mb__before/after_atomic(); this - has the double function of documenting clearly why there is a memory - barrier, and avoiding a double barrier on x86 and s390x systems. - - The new macro can already be put to use in qatomic_mb_set(). - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - docs/devel/atomics.rst | 26 +++++++++++++++++++++----- - include/qemu/atomic.h | 17 ++++++++++++++++- - 2 files changed, 37 insertions(+), 6 deletions(-) - -diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst -index 52baa0736d..10fbfc58bb 100644 ---- a/docs/devel/atomics.rst -+++ b/docs/devel/atomics.rst -@@ -25,7 +25,8 @@ provides macros that fall in three camps: - - - weak atomic access and manual memory barriers: ``qatomic_read()``, - ``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, -- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``; -+ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``, -+ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``; - - - sequentially consistent atomic access: everything else. - -@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU: - sequential consistency. - - - in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in -- the total ordering enforced by sequentially-consistent operations. -+ the ordering enforced by read-modify-write operations. - This is because QEMU uses the C11 memory model. The following example - is correct in Linux but not in QEMU: - -@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU: - because the read of ``y`` can be moved (by either the processor or the - compiler) before the write of ``x``. - -- Fixing this requires an ``smp_mb()`` memory barrier between the write -- of ``x`` and the read of ``y``. In the common case where only one thread -- writes ``x``, it is also possible to write it like this: -+ Fixing this requires a full memory barrier between the write of ``x`` and -+ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and -+ ``smp_mb__after_rmw()``; they act both as an optimization, -+ avoiding the memory barrier on processors where it is unnecessary, -+ and as a clarification of this corner case of the C11 memory model: -+ -+ +--------------------------------+ -+ | QEMU (correct) | -+ +================================+ -+ | :: | -+ | | -+ | a = qatomic_fetch_add(&x, 2);| -+ | smp_mb__after_rmw(); | -+ | b = qatomic_read(&y); | -+ +--------------------------------+ -+ -+ In the common case where only one thread writes ``x``, it is also possible -+ to write it like this: - - +--------------------------------+ - | QEMU (correct) | -diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h -index 874134fd19..f85834ee8b 100644 ---- a/include/qemu/atomic.h -+++ b/include/qemu/atomic.h -@@ -245,6 +245,20 @@ - #define smp_wmb() smp_mb_release() - #define smp_rmb() smp_mb_acquire() - -+/* -+ * SEQ_CST is weaker than the older __sync_* builtins and Linux -+ * kernel read-modify-write atomics. Provide a macro to obtain -+ * the same semantics. -+ */ -+#if !defined(QEMU_SANITIZE_THREAD) && \ -+ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) -+# define smp_mb__before_rmw() signal_barrier() -+# define smp_mb__after_rmw() signal_barrier() -+#else -+# define smp_mb__before_rmw() smp_mb() -+# define smp_mb__after_rmw() smp_mb() -+#endif -+ - /* qatomic_mb_read/set semantics map Java volatile variables. They are - * less expensive on some platforms (notably POWER) than fully - * sequentially consistent operations. -@@ -259,7 +273,8 @@ - #if !defined(QEMU_SANITIZE_THREAD) && \ - (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) - /* This is more efficient than a store plus a fence. */ --# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) -+# define qatomic_mb_set(ptr, i) \ -+ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) - #else - # define qatomic_mb_set(ptr, i) \ - ({ qatomic_store_release(ptr, i); smp_mb(); }) --- -2.39.1 - diff --git a/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch b/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch deleted file mode 100644 index 7f39f4a..0000000 --- a/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 46ead2c391924b68741d6da28f28f909b80f5914 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:51 +0100 -Subject: [PATCH 01/20] qcow2: Fix theoretical corruption in store_bitmap() - error path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [1/4] a6a497947179431567d330d0501247a3749fb9fd (kmwolf/centos-qemu-kvm) - -In order to write the bitmap table to the image file, it is converted to -big endian. If the write fails, it is passed to clear_bitmap_table() to -free all of the clusters it had allocated before. However, if we don't -convert it back to native endianness first, we'll free things at a wrong -offset. - -In practical terms, the offsets will be so high that we won't actually -free any allocated clusters, but just run into an error, but in theory -this can cause image corruption. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-2-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit b03dd9613bcf8fe948581b2b3585510cb525c382) -Signed-off-by: Kevin Wolf ---- - block/qcow2-bitmap.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c -index bcad567c0c..3dff99ba06 100644 ---- a/block/qcow2-bitmap.c -+++ b/block/qcow2-bitmap.c -@@ -115,7 +115,7 @@ static int update_header_sync(BlockDriverState *bs) - return bdrv_flush(bs->file->bs); - } - --static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size) -+static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size) - { - size_t i; - -@@ -1401,9 +1401,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) - goto fail; - } - -- bitmap_table_to_be(tb, tb_size); -+ bitmap_table_bswap_be(tb, tb_size); - ret = bdrv_pwrite(bs->file, tb_offset, tb_size * sizeof(tb[0]), tb, 0); - if (ret < 0) { -+ bitmap_table_bswap_be(tb, tb_size); - error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", - bm_name); - goto fail; --- -2.31.1 - diff --git a/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch b/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch deleted file mode 100644 index d2dacbc..0000000 --- a/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch +++ /dev/null @@ -1,84 +0,0 @@ -From f628a08d20b9ab6be24c2ab18b38a934a314c78b Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:56 +0100 -Subject: [PATCH 14/31] qed: Don't yield in bdrv_qed_co_drain_begin() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [2/16] f18e9aebb7e04a62e309b656bac8f2ab83df657f (sgarzarella/qemu-kvm-c-9-s) - -We want to change .bdrv_co_drained_begin() back to be a non-coroutine -callback, so in preparation, avoid yielding in its implementation. - -Because we increase bs->in_flight and bdrv_drained_begin() polls, the -behaviour is unchanged. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-2-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 6d47eb0c8bf2d50682c7dccae74d24104076fe23) -Signed-off-by: Stefano Garzarella ---- - block/qed.c | 20 +++++++++++++++++--- - 1 file changed, 17 insertions(+), 3 deletions(-) - -diff --git a/block/qed.c b/block/qed.c -index 2f36ad342c..013f826c44 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s) - qemu_co_mutex_unlock(&s->table_lock); - } - --static void coroutine_fn qed_need_check_timer_entry(void *opaque) -+static void coroutine_fn qed_need_check_timer(BDRVQEDState *s) - { -- BDRVQEDState *s = opaque; - int ret; - - trace_qed_need_check_timer_cb(s); -@@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque) - (void) ret; - } - -+static void coroutine_fn qed_need_check_timer_entry(void *opaque) -+{ -+ BDRVQEDState *s = opaque; -+ -+ qed_need_check_timer(opaque); -+ bdrv_dec_in_flight(s->bs); -+} -+ - static void qed_need_check_timer_cb(void *opaque) - { -+ BDRVQEDState *s = opaque; - Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); -+ -+ bdrv_inc_in_flight(s->bs); - qemu_coroutine_enter(co); - } - -@@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) - * header is flushed. - */ - if (s->need_check_timer && timer_pending(s->need_check_timer)) { -+ Coroutine *co; -+ - qed_cancel_need_check_timer(s); -- qed_need_check_timer_entry(s); -+ co = qemu_coroutine_create(qed_need_check_timer_entry, s); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), co); - } - } - --- -2.31.1 - diff --git a/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch b/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch deleted file mode 100644 index 86e94db..0000000 --- a/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 7a9907c65e3e2bbb0c119acdbbeb4381e7f1d902 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 09/12] qemu-coroutine-lock: add smp_mb__after_rmw() - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [6/9] 4b1723b1ad670ec4c85240390b4fc15ff361154f (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit e3a3b6ec8169eab2feb241b4982585001512cd55 -Author: Paolo Bonzini -Date: Fri Mar 3 10:52:59 2023 +0100 - - qemu-coroutine-lock: add smp_mb__after_rmw() - - mutex->from_push and mutex->handoff in qemu-coroutine-lock implement - the familiar pattern: - - write a write b - smp_mb() smp_mb() - read b read a - - The memory barrier is required by the C memory model even after a - SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC. - Add it and avoid the unclear qatomic_mb_read() operation. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/qemu-coroutine-lock.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c -index 45c6b57374..c5897bd963 100644 ---- a/util/qemu-coroutine-lock.c -+++ b/util/qemu-coroutine-lock.c -@@ -202,10 +202,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, - trace_qemu_co_mutex_lock_entry(mutex, self); - push_waiter(mutex, &w); - -+ /* -+ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set -+ * in qemu_co_mutex_unlock. -+ */ -+ smp_mb__after_rmw(); -+ - /* This is the "Responsibility Hand-Off" protocol; a lock() picks from - * a concurrent unlock() the responsibility of waking somebody up. - */ -- old_handoff = qatomic_mb_read(&mutex->handoff); -+ old_handoff = qatomic_read(&mutex->handoff); - if (old_handoff && - has_waiters(mutex) && - qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) { -@@ -304,6 +310,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) - } - - our_handoff = mutex->sequence; -+ /* Set handoff before checking for waiters. */ - qatomic_mb_set(&mutex->handoff, our_handoff); - if (!has_waiters(mutex)) { - /* The concurrent lock has not added itself yet, so it --- -2.39.1 - diff --git a/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch b/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch deleted file mode 100644 index eff4d2e..0000000 --- a/kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch +++ /dev/null @@ -1,197 +0,0 @@ -From b1970c733dc46b2a8f648997a7e1c5d12900ff54 Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:04 +0200 -Subject: [PATCH 17/20] qemu-img: Change info key names for protocol nodes - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [12/12] 67c260aaa05466410503fecee6210bf9d47e8c7c (hreitz/qemu-kvm-c-9-s) - -Currently, when querying a qcow2 image, qemu-img info reports something -like this: - -image: test.qcow2 -file format: qcow2 -virtual size: 64 MiB (67108864 bytes) -disk size: 196 KiB -cluster_size: 65536 -Format specific information: - compat: 1.1 - compression type: zlib - lazy refcounts: false - refcount bits: 16 - corrupt: false - extended l2: false -Child node '/file': - image: test.qcow2 - file format: file - virtual size: 192 KiB (197120 bytes) - disk size: 196 KiB - Format specific information: - extent size hint: 1048576 - -Notably, the way the keys are named is specific for image files: The -filename is shown under "image", the BDS driver under "file format", and -the BDS length under "virtual size". This does not make much sense for -nodes that are not actually supposed to be guest images, like the /file -child node shown above. - -Give bdrv_node_info_dump() a @protocol parameter that gives a hint that -the respective node is probably just used for data storage and does not -necessarily present the data for a VM guest disk. This renames the keys -so that with this patch, the output becomes: - -image: test.qcow2 -[...] -Child node '/file': - filename: test.qcow2 - protocol type: file - file length: 192 KiB (197120 bytes) - disk size: 196 KiB - Format specific information: - extent size hint: 1048576 - -(Perhaps we should also rename "Format specific information", but I -could not come up with anything better that will not become problematic -if we guess wrong with the protocol "heuristic".) - -This change affects iotest 302, which has protocol node information in -its reference output. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-13-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit d570177b50c389f379f93183155a27d44856ab46) -Signed-off-by: Hanna Czenczek ---- - block/monitor/block-hmp-cmds.c | 2 +- - block/qapi.c | 39 ++++++++++++++++++++++++++++------ - include/block/qapi.h | 2 +- - qemu-img.c | 3 ++- - tests/qemu-iotests/302.out | 6 +++--- - 5 files changed, 39 insertions(+), 13 deletions(-) - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index 72824d4e2e..4d83339a5d 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, - monitor_printf(mon, "\nImages:\n"); - image_info = inserted->image; - while (1) { -- bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0); -+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info), 0, false); - if (image_info->has_backing_image) { - image_info = image_info->backing_image; - } else { -diff --git a/block/qapi.c b/block/qapi.c -index 3e35603f0c..56f398c500 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -934,24 +934,49 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - visit_free(v); - } - --void bdrv_node_info_dump(BlockNodeInfo *info, int indentation) -+/** -+ * Print the given @info object in human-readable form. Every field is indented -+ * using the given @indentation (four spaces per indentation level). -+ * -+ * When using this to print a whole block graph, @protocol can be set to true to -+ * signify that the given information is associated with a protocol node, i.e. -+ * just data storage for an image, such that the data it presents is not really -+ * a full VM disk. If so, several fields change name: For example, "virtual -+ * size" is printed as "file length". -+ * (Consider a qcow2 image, which is represented by a qcow2 node and a file -+ * node. Printing a "virtual size" for the file node does not make sense, -+ * because without the qcow2 node, it is not really a guest disk, so it does not -+ * have a "virtual size". Therefore, we call it "file length" instead.) -+ * -+ * @protocol is ignored when @indentation is 0, because we take that to mean -+ * that the associated node is the root node in the queried block graph, and -+ * thus is always to be interpreted as a standalone guest disk. -+ */ -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol) - { - char *size_buf, *dsize_buf; - g_autofree char *ind_s = g_strdup_printf("%*s", indentation * 4, ""); - -+ if (indentation == 0) { -+ /* Top level, consider this a normal image */ -+ protocol = false; -+ } -+ - if (!info->has_actual_size) { - dsize_buf = g_strdup("unavailable"); - } else { - dsize_buf = size_to_str(info->actual_size); - } - size_buf = size_to_str(info->virtual_size); -- qemu_printf("%simage: %s\n" -- "%sfile format: %s\n" -- "%svirtual size: %s (%" PRId64 " bytes)\n" -+ qemu_printf("%s%s: %s\n" -+ "%s%s: %s\n" -+ "%s%s: %s (%" PRId64 " bytes)\n" - "%sdisk size: %s\n", -- ind_s, info->filename, -- ind_s, info->format, -- ind_s, size_buf, info->virtual_size, -+ ind_s, protocol ? "filename" : "image", info->filename, -+ ind_s, protocol ? "protocol type" : "file format", -+ info->format, -+ ind_s, protocol ? "file length" : "virtual size", -+ size_buf, info->virtual_size, - ind_s, dsize_buf); - g_free(size_buf); - g_free(dsize_buf); -diff --git a/include/block/qapi.h b/include/block/qapi.h -index 38855f2ae9..26113da21a 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -51,5 +51,5 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - const char *prefix, - int indentation); --void bdrv_node_info_dump(BlockNodeInfo *info, int indentation); -+void bdrv_node_info_dump(BlockNodeInfo *info, int indentation, bool protocol); - #endif -diff --git a/qemu-img.c b/qemu-img.c -index e281011245..2943625c67 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2853,7 +2853,8 @@ static void dump_human_image_info(BlockGraphInfo *info, int indentation, - { - BlockChildInfoList *children_list; - -- bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); -+ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation, -+ info->children == NULL); - - for (children_list = info->children; children_list; - children_list = children_list->next) -diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out -index edfa1c4f05..7b5014cdd8 100644 ---- a/tests/qemu-iotests/302.out -+++ b/tests/qemu-iotests/302.out -@@ -5,9 +5,9 @@ file format: raw - virtual size: 448 KiB (458752 bytes) - disk size: unavailable - Child node '/file': -- image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock -- file format: nbd -- virtual size: 448 KiB (458752 bytes) -+ filename: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock -+ protocol type: nbd -+ file length: 448 KiB (458752 bytes) - disk size: unavailable - - === Converted image info === --- -2.31.1 - diff --git a/kvm-qemu-img-Let-info-print-block-graph.patch b/kvm-qemu-img-Let-info-print-block-graph.patch deleted file mode 100644 index 536df69..0000000 --- a/kvm-qemu-img-Let-info-print-block-graph.patch +++ /dev/null @@ -1,261 +0,0 @@ -From ea73e9de42b446ce1049805c23f7706e4f87ed1f Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:27:03 +0200 -Subject: [PATCH 16/20] qemu-img: Let info print block graph - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [11/12] 2c1b8a03c918484449e876acf4c6663766848ad8 (hreitz/qemu-kvm-c-9-s) - -For every node in the backing chain, collect its BlockGraphInfo struct -using bdrv_query_block_graph_info(). Print all nodes' information, -indenting child nodes and labelling them with a path constructed from -the child names leading to the node from the root (e.g. /file/file). - -Note that we open each image with BDRV_O_NO_BACKING, so its backing -child is omitted from this graph, and thus presented in the previous -manner: By simply concatenating all images' information, separated with -blank lines. - -This affects two iotests: -- 065: Here we try to get the format node's format specific information. - The pre-patch code does so by taking all lines from "Format specific - information:" until an empty line. This format specific information - is no longer followed by an empty line, though, but by child node - information, so limit the range by "Child node '/file':". -- 302: Calls qemu_img() for qemu-img info directly, which does not - filter the output, so the child node information ends up in the - output. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-12-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit c04d0ab026201d21873a63f768cb69c4554dfec1) -Signed-off-by: Hanna Czenczek ---- - qapi/block-core.json | 4 +-- - qemu-img.c | 69 ++++++++++++++++++++++++++------------ - tests/qemu-iotests/065 | 2 +- - tests/qemu-iotests/302.out | 5 +++ - 4 files changed, 56 insertions(+), 24 deletions(-) - -diff --git a/qapi/block-core.json b/qapi/block-core.json -index d703e0fb16..7f331eb8ea 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -5831,9 +5831,9 @@ - ## - # @DummyBlockCoreForceArrays: - # --# Not used by QMP; hack to let us use BlockNodeInfoList internally -+# Not used by QMP; hack to let us use BlockGraphInfoList internally - # - # Since: 8.0 - ## - { 'struct': 'DummyBlockCoreForceArrays', -- 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } -+ 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } } -diff --git a/qemu-img.c b/qemu-img.c -index 30b4ea58bb..e281011245 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) - g_free(sn_tab); - } - --static void dump_json_block_node_info_list(BlockNodeInfoList *list) -+static void dump_json_block_graph_info_list(BlockGraphInfoList *list) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); -+ visit_type_BlockGraphInfoList(v, NULL, &list, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2832,13 +2832,13 @@ static void dump_json_block_node_info_list(BlockNodeInfoList *list) - g_string_free(str, true); - } - --static void dump_json_block_node_info(BlockNodeInfo *info) -+static void dump_json_block_graph_info(BlockGraphInfo *info) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); -+ visit_type_BlockGraphInfo(v, NULL, &info, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2848,9 +2848,29 @@ static void dump_json_block_node_info(BlockNodeInfo *info) - g_string_free(str, true); - } - --static void dump_human_image_info_list(BlockNodeInfoList *list) -+static void dump_human_image_info(BlockGraphInfo *info, int indentation, -+ const char *path) - { -- BlockNodeInfoList *elem; -+ BlockChildInfoList *children_list; -+ -+ bdrv_node_info_dump(qapi_BlockGraphInfo_base(info), indentation); -+ -+ for (children_list = info->children; children_list; -+ children_list = children_list->next) -+ { -+ BlockChildInfo *child = children_list->value; -+ g_autofree char *child_path = NULL; -+ -+ printf("%*sChild node '%s%s':\n", -+ indentation * 4, "", path, child->name); -+ child_path = g_strdup_printf("%s%s/", path, child->name); -+ dump_human_image_info(child->info, indentation + 1, child_path); -+ } -+} -+ -+static void dump_human_image_info_list(BlockGraphInfoList *list) -+{ -+ BlockGraphInfoList *elem; - bool delim = false; - - for (elem = list; elem; elem = elem->next) { -@@ -2859,7 +2879,7 @@ static void dump_human_image_info_list(BlockNodeInfoList *list) - } - delim = true; - -- bdrv_node_info_dump(elem->value, 0); -+ dump_human_image_info(elem->value, 0, "/"); - } - } - -@@ -2869,7 +2889,7 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) - } - - /** -- * Open an image file chain and return an BlockNodeInfoList -+ * Open an image file chain and return an BlockGraphInfoList - * - * @filename: topmost image filename - * @fmt: topmost image format (may be NULL to autodetect) -@@ -2880,13 +2900,13 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) - * opening an image file. If there was an error a message will have been - * printed to stderr. - */ --static BlockNodeInfoList *collect_image_info_list(bool image_opts, -- const char *filename, -- const char *fmt, -- bool chain, bool force_share) -+static BlockGraphInfoList *collect_image_info_list(bool image_opts, -+ const char *filename, -+ const char *fmt, -+ bool chain, bool force_share) - { -- BlockNodeInfoList *head = NULL; -- BlockNodeInfoList **tail = &head; -+ BlockGraphInfoList *head = NULL; -+ BlockGraphInfoList **tail = &head; - GHashTable *filenames; - Error *err = NULL; - -@@ -2895,7 +2915,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, - while (filename) { - BlockBackend *blk; - BlockDriverState *bs; -- BlockNodeInfo *info; -+ BlockGraphInfo *info; - - if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { - error_report("Backing file '%s' creates an infinite loop.", -@@ -2912,7 +2932,14 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, - } - bs = blk_bs(blk); - -- bdrv_query_block_node_info(bs, &info, &err); -+ /* -+ * Note that the returned BlockGraphInfo object will not have -+ * information about this image's backing node, because we have opened -+ * it with BDRV_O_NO_BACKING. Printing this object will therefore not -+ * duplicate the backing chain information that we obtain by walking -+ * the chain manually here. -+ */ -+ bdrv_query_block_graph_info(bs, &info, &err); - if (err) { - error_report_err(err); - blk_unref(blk); -@@ -2945,7 +2972,7 @@ static BlockNodeInfoList *collect_image_info_list(bool image_opts, - return head; - - err: -- qapi_free_BlockNodeInfoList(head); -+ qapi_free_BlockGraphInfoList(head); - g_hash_table_destroy(filenames); - return NULL; - } -@@ -2956,7 +2983,7 @@ static int img_info(int argc, char **argv) - OutputFormat output_format = OFORMAT_HUMAN; - bool chain = false; - const char *filename, *fmt, *output; -- BlockNodeInfoList *list; -+ BlockGraphInfoList *list; - bool image_opts = false; - bool force_share = false; - -@@ -3035,14 +3062,14 @@ static int img_info(int argc, char **argv) - break; - case OFORMAT_JSON: - if (chain) { -- dump_json_block_node_info_list(list); -+ dump_json_block_graph_info_list(list); - } else { -- dump_json_block_node_info(list->value); -+ dump_json_block_graph_info(list->value); - } - break; - } - -- qapi_free_BlockNodeInfoList(list); -+ qapi_free_BlockGraphInfoList(list); - return 0; - } - -diff --git a/tests/qemu-iotests/065 b/tests/qemu-iotests/065 -index b724c89c7c..b76701c71e 100755 ---- a/tests/qemu-iotests/065 -+++ b/tests/qemu-iotests/065 -@@ -56,7 +56,7 @@ class TestQemuImgInfo(TestImageInfoSpecific): - def test_human(self): - data = qemu_img('info', '--output=human', test_img).stdout.split('\n') - data = data[(data.index('Format specific information:') + 1) -- :data.index('')] -+ :data.index("Child node '/file':")] - for field in data: - self.assertTrue(re.match('^ {4}[^ ]', field) is not None) - data = [line.strip() for line in data] -diff --git a/tests/qemu-iotests/302.out b/tests/qemu-iotests/302.out -index 3e7c281b91..edfa1c4f05 100644 ---- a/tests/qemu-iotests/302.out -+++ b/tests/qemu-iotests/302.out -@@ -4,6 +4,11 @@ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock - file format: raw - virtual size: 448 KiB (458752 bytes) - disk size: unavailable -+Child node '/file': -+ image: nbd+unix:///exp?socket=SOCK_DIR/PID-nbd-sock -+ file format: nbd -+ virtual size: 448 KiB (458752 bytes) -+ disk size: unavailable - - === Converted image info === - image: TEST_IMG --- -2.31.1 - diff --git a/kvm-qemu-img-Use-BlockNodeInfo.patch b/kvm-qemu-img-Use-BlockNodeInfo.patch deleted file mode 100644 index 7bfb7e6..0000000 --- a/kvm-qemu-img-Use-BlockNodeInfo.patch +++ /dev/null @@ -1,241 +0,0 @@ -From dca4cbe680baff837ca8ac8bd39b77b46af3f64b Mon Sep 17 00:00:00 2001 -From: Hanna Reitz -Date: Mon, 20 Jun 2022 18:26:57 +0200 -Subject: [PATCH 10/20] qemu-img: Use BlockNodeInfo - -RH-Author: Hanna Czenczek -RH-MergeRequest: 145: Show protocol-level information in qemu-img info -RH-Bugzilla: 1860292 -RH-Acked-by: Kevin Wolf -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Stefano Garzarella -RH-Commit: [5/12] b599af3ec05951a0ba11d9eae2ee19148d6bf624 (hreitz/qemu-kvm-c-9-s) - -qemu-img info never uses ImageInfo's backing-image field, because it -opens the backing chain one by one with BDRV_O_NO_BACKING, and prints -all backing chain nodes' information consecutively. Use BlockNodeInfo -to make it clear that we only print information about a single node, and -that we are not using the backing-image field. - -Notably, bdrv_image_info_dump() does not evaluate the backing-image -field, so we can easily make it take a BlockNodeInfo pointer (and -consequentially rename it to bdrv_node_info_dump()). It makes more -sense this way, because again, the interface now makes it syntactically -clear that backing-image is ignored by this function. - -Signed-off-by: Hanna Reitz -Message-Id: <20220620162704.80987-6-hreitz@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit b1f4cd1589a16fec02f264a09bd3560e4ccce3c2) -Signed-off-by: Hanna Czenczek ---- - block/monitor/block-hmp-cmds.c | 2 +- - block/qapi.c | 2 +- - include/block/qapi.h | 2 +- - qapi/block-core.json | 4 +-- - qemu-img.c | 48 +++++++++++++++++----------------- - 5 files changed, 29 insertions(+), 29 deletions(-) - -diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c -index b6135e9bfe..aa37faa601 100644 ---- a/block/monitor/block-hmp-cmds.c -+++ b/block/monitor/block-hmp-cmds.c -@@ -734,7 +734,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info, - monitor_printf(mon, "\nImages:\n"); - image_info = inserted->image; - while (1) { -- bdrv_image_info_dump(image_info); -+ bdrv_node_info_dump(qapi_ImageInfo_base(image_info)); - if (image_info->has_backing_image) { - image_info = image_info->backing_image; - } else { -diff --git a/block/qapi.c b/block/qapi.c -index e5022b4481..ad88bf9b38 100644 ---- a/block/qapi.c -+++ b/block/qapi.c -@@ -865,7 +865,7 @@ void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - visit_free(v); - } - --void bdrv_image_info_dump(ImageInfo *info) -+void bdrv_node_info_dump(BlockNodeInfo *info) - { - char *size_buf, *dsize_buf; - if (!info->has_actual_size) { -diff --git a/include/block/qapi.h b/include/block/qapi.h -index c7de4e3fa9..22198dcd0c 100644 ---- a/include/block/qapi.h -+++ b/include/block/qapi.h -@@ -45,5 +45,5 @@ void bdrv_query_image_info(BlockDriverState *bs, - void bdrv_snapshot_dump(QEMUSnapshotInfo *sn); - void bdrv_image_info_specific_dump(ImageInfoSpecific *info_spec, - const char *prefix); --void bdrv_image_info_dump(ImageInfo *info); -+void bdrv_node_info_dump(BlockNodeInfo *info); - #endif -diff --git a/qapi/block-core.json b/qapi/block-core.json -index 7720da0498..4cf2deeb6c 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -5796,9 +5796,9 @@ - ## - # @DummyBlockCoreForceArrays: - # --# Not used by QMP; hack to let us use ImageInfoList internally -+# Not used by QMP; hack to let us use BlockNodeInfoList internally - # - # Since: 8.0 - ## - { 'struct': 'DummyBlockCoreForceArrays', -- 'data': { 'unused-image-info': ['ImageInfo'] } } -+ 'data': { 'unused-block-node-info': ['BlockNodeInfo'] } } -diff --git a/qemu-img.c b/qemu-img.c -index 2f85bb7ede..3b2ca3bbcb 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -2816,13 +2816,13 @@ static void dump_snapshots(BlockDriverState *bs) - g_free(sn_tab); - } - --static void dump_json_image_info_list(ImageInfoList *list) -+static void dump_json_block_node_info_list(BlockNodeInfoList *list) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_ImageInfoList(v, NULL, &list, &error_abort); -+ visit_type_BlockNodeInfoList(v, NULL, &list, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2832,13 +2832,13 @@ static void dump_json_image_info_list(ImageInfoList *list) - g_string_free(str, true); - } - --static void dump_json_image_info(ImageInfo *info) -+static void dump_json_block_node_info(BlockNodeInfo *info) - { - GString *str; - QObject *obj; - Visitor *v = qobject_output_visitor_new(&obj); - -- visit_type_ImageInfo(v, NULL, &info, &error_abort); -+ visit_type_BlockNodeInfo(v, NULL, &info, &error_abort); - visit_complete(v, &obj); - str = qobject_to_json_pretty(obj, true); - assert(str != NULL); -@@ -2848,9 +2848,9 @@ static void dump_json_image_info(ImageInfo *info) - g_string_free(str, true); - } - --static void dump_human_image_info_list(ImageInfoList *list) -+static void dump_human_image_info_list(BlockNodeInfoList *list) - { -- ImageInfoList *elem; -+ BlockNodeInfoList *elem; - bool delim = false; - - for (elem = list; elem; elem = elem->next) { -@@ -2859,7 +2859,7 @@ static void dump_human_image_info_list(ImageInfoList *list) - } - delim = true; - -- bdrv_image_info_dump(elem->value); -+ bdrv_node_info_dump(elem->value); - } - } - -@@ -2869,24 +2869,24 @@ static gboolean str_equal_func(gconstpointer a, gconstpointer b) - } - - /** -- * Open an image file chain and return an ImageInfoList -+ * Open an image file chain and return an BlockNodeInfoList - * - * @filename: topmost image filename - * @fmt: topmost image format (may be NULL to autodetect) - * @chain: true - enumerate entire backing file chain - * false - only topmost image file - * -- * Returns a list of ImageInfo objects or NULL if there was an error opening an -- * image file. If there was an error a message will have been printed to -- * stderr. -+ * Returns a list of BlockNodeInfo objects or NULL if there was an error -+ * opening an image file. If there was an error a message will have been -+ * printed to stderr. - */ --static ImageInfoList *collect_image_info_list(bool image_opts, -- const char *filename, -- const char *fmt, -- bool chain, bool force_share) -+static BlockNodeInfoList *collect_image_info_list(bool image_opts, -+ const char *filename, -+ const char *fmt, -+ bool chain, bool force_share) - { -- ImageInfoList *head = NULL; -- ImageInfoList **tail = &head; -+ BlockNodeInfoList *head = NULL; -+ BlockNodeInfoList **tail = &head; - GHashTable *filenames; - Error *err = NULL; - -@@ -2895,7 +2895,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, - while (filename) { - BlockBackend *blk; - BlockDriverState *bs; -- ImageInfo *info; -+ BlockNodeInfo *info; - - if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { - error_report("Backing file '%s' creates an infinite loop.", -@@ -2912,7 +2912,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, - } - bs = blk_bs(blk); - -- bdrv_query_image_info(bs, &info, &err); -+ bdrv_query_block_node_info(bs, &info, &err); - if (err) { - error_report_err(err); - blk_unref(blk); -@@ -2945,7 +2945,7 @@ static ImageInfoList *collect_image_info_list(bool image_opts, - return head; - - err: -- qapi_free_ImageInfoList(head); -+ qapi_free_BlockNodeInfoList(head); - g_hash_table_destroy(filenames); - return NULL; - } -@@ -2956,7 +2956,7 @@ static int img_info(int argc, char **argv) - OutputFormat output_format = OFORMAT_HUMAN; - bool chain = false; - const char *filename, *fmt, *output; -- ImageInfoList *list; -+ BlockNodeInfoList *list; - bool image_opts = false; - bool force_share = false; - -@@ -3035,14 +3035,14 @@ static int img_info(int argc, char **argv) - break; - case OFORMAT_JSON: - if (chain) { -- dump_json_image_info_list(list); -+ dump_json_block_node_info_list(list); - } else { -- dump_json_image_info(list->value); -+ dump_json_block_node_info(list->value); - } - break; - } - -- qapi_free_ImageInfoList(list); -+ qapi_free_BlockNodeInfoList(list); - return 0; - } - --- -2.31.1 - diff --git a/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch b/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch deleted file mode 100644 index 693049c..0000000 --- a/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch +++ /dev/null @@ -1,70 +0,0 @@ -From d0d3d694b3a8d200442484ae0c9d263e0439cd04 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:53 +0100 -Subject: [PATCH 03/20] qemu-img bitmap: Report errors while closing the image -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [3/4] 4a704fec2e3bcb47b2be1529e27fd1833d58c517 (kmwolf/centos-qemu-kvm) - -blk_unref() can't report any errors that happen while closing the image. -For example, if qcow2 hits an -ENOSPC error while writing out dirty -bitmaps when it's closed, it prints error messages to stderr, but -'qemu-img bitmap' won't see any error return value and will therefore -look successful with exit code 0. - -In order to fix this, manually inactivate the image first before calling -blk_unref(). This already performs the operations that would be most -likely to fail while closing the image, but it can still return errors. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1330 -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-4-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Kevin Wolf -(cherry picked from commit c5e477110dcb8ef4642dce399777c3dee68fa96c) -Signed-off-by: Kevin Wolf ---- - qemu-img.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/qemu-img.c b/qemu-img.c -index 3cbdda9f76..2f85bb7ede 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -4646,6 +4646,7 @@ static int img_bitmap(int argc, char **argv) - QSIMPLEQ_HEAD(, ImgBitmapAction) actions; - ImgBitmapAction *act, *act_next; - const char *op; -+ int inactivate_ret; - - QSIMPLEQ_INIT(&actions); - -@@ -4830,6 +4831,16 @@ static int img_bitmap(int argc, char **argv) - ret = 0; - - out: -+ /* -+ * Manually inactivate the images first because this way we can know whether -+ * an error occurred. blk_unref() doesn't tell us about failures. -+ */ -+ inactivate_ret = bdrv_inactivate_all(); -+ if (inactivate_ret < 0) { -+ error_report("Error while closing the image: %s", strerror(-inactivate_ret)); -+ ret = 1; -+ } -+ - blk_unref(src); - blk_unref(blk); - qemu_opts_del(opts); --- -2.31.1 - diff --git a/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch b/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch deleted file mode 100644 index 5cac3ba..0000000 --- a/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 2f5369f0effaa23be746f9b5d9f6a0bfc346fb7d Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:52 +0100 -Subject: [PATCH 02/20] qemu-img commit: Report errors while closing the image - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [2/4] faedd43355463b1210a3f21ecd430f478bd06f5a (kmwolf/centos-qemu-kvm) - -blk_unref() can't report any errors that happen while closing the image. -For example, if qcow2 hits an -ENOSPC error while writing out dirty -bitmaps when it's closed, it prints error messages to stderr, but -'qemu-img commit' won't see any error return value and will therefore -look successful with exit code 0. - -In order to fix this, manually inactivate the image first before calling -blk_unref(). This already performs the operations that would be most -likely to fail while closing the image, but it can still return errors. - -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-3-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Signed-off-by: Kevin Wolf -(cherry picked from commit 44efba2d713aca076c411594d0c1a2b99155eeb3) -Signed-off-by: Kevin Wolf ---- - qemu-img.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/qemu-img.c b/qemu-img.c -index a9b3a8103c..3cbdda9f76 100644 ---- a/qemu-img.c -+++ b/qemu-img.c -@@ -449,6 +449,11 @@ static BlockBackend *img_open(bool image_opts, - blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet, - force_share); - } -+ -+ if (blk) { -+ blk_set_force_allow_inactivate(blk); -+ } -+ - return blk; - } - -@@ -1119,6 +1124,14 @@ unref_backing: - done: - qemu_progress_end(); - -+ /* -+ * Manually inactivate the image first because this way we can know whether -+ * an error occurred. blk_unref() doesn't tell us about failures. -+ */ -+ ret = bdrv_inactivate_all(); -+ if (ret < 0 && !local_err) { -+ error_setg_errno(&local_err, -ret, "Error while closing the image"); -+ } - blk_unref(blk); - - if (local_err) { --- -2.31.1 - diff --git a/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch b/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch deleted file mode 100644 index 6b88e5c..0000000 --- a/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch +++ /dev/null @@ -1,166 +0,0 @@ -From 06030aa79fcb2d90d6a670e75d959aa0c3204b5c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 12 Jan 2023 20:14:54 +0100 -Subject: [PATCH 04/20] qemu-iotests: Test qemu-img bitmap/commit exit code on - error - -RH-Author: Kevin Wolf -RH-MergeRequest: 143: qemu-img: Fix exit code for errors closing the image -RH-Bugzilla: 2150180 -RH-Acked-by: Thomas Huth -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Stefano Garzarella -RH-Commit: [4/4] b96bb671bcfb7ae18015fda14db70f42a83a6ea7 (kmwolf/centos-qemu-kvm) - -This tests that when an error happens while writing back bitmaps to the -image file in qcow2_inactivate(), 'qemu-img bitmap/commit' actually -return an error value in their exit code instead of making the operation -look successful to scripts. - -Signed-off-by: Kevin Wolf -Message-Id: <20230112191454.169353-5-kwolf@redhat.com> -Reviewed-by: Hanna Czenczek -Signed-off-by: Kevin Wolf -(cherry picked from commit 07a4e1f8e5418f36424cd57d5d061b090a238c65) -Signed-off-by: Kevin Wolf ---- - .../qemu-iotests/tests/qemu-img-close-errors | 96 +++++++++++++++++++ - .../tests/qemu-img-close-errors.out | 23 +++++ - 2 files changed, 119 insertions(+) - create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors - create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out - -diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors b/tests/qemu-iotests/tests/qemu-img-close-errors -new file mode 100755 -index 0000000000..50bfb6cfa2 ---- /dev/null -+++ b/tests/qemu-iotests/tests/qemu-img-close-errors -@@ -0,0 +1,96 @@ -+#!/usr/bin/env bash -+# group: rw auto quick -+# -+# Check that errors while closing the image, in particular writing back dirty -+# bitmaps, is correctly reported with a failing qemu-img exit code. -+# -+# Copyright (C) 2023 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=kwolf@redhat.com -+ -+seq="$(basename $0)" -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt qcow2 -+_supported_proto file -+_supported_os Linux -+ -+size=1G -+ -+# The error we are going to use is ENOSPC. Depending on how many bitmaps we -+# create in the backing file (and therefore increase the used up space), we get -+# failures in different places. With a low number, only merging the bitmap -+# fails, whereas with a higher number, already 'qemu-img commit' fails. -+for max_bitmap in 6 7; do -+ echo -+ echo "=== Test with $max_bitmap bitmaps ===" -+ -+ TEST_IMG="$TEST_IMG.base" _make_test_img -q $size -+ for i in $(seq 1 $max_bitmap); do -+ $QEMU_IMG bitmap --add "$TEST_IMG.base" "stale-bitmap-$i" -+ done -+ -+ # Simulate a block device of 128 MB by resizing the image file accordingly -+ # and then enforcing the size with the raw driver -+ $QEMU_IO -f raw -c "truncate 128M" "$TEST_IMG.base" -+ BASE_JSON='json:{ -+ "driver": "qcow2", -+ "file": { -+ "driver": "raw", -+ "size": 134217728, -+ "file": { -+ "driver": "file", -+ "filename":"'"$TEST_IMG.base"'" -+ } -+ } -+ }' -+ -+ _make_test_img -q -b "$BASE_JSON" -F $IMGFMT -+ $QEMU_IMG bitmap --add "$TEST_IMG" "good-bitmap" -+ -+ $QEMU_IO -c 'write 0 126m' "$TEST_IMG" | _filter_qemu_io -+ -+ $QEMU_IMG commit -d "$TEST_IMG" 2>&1 | _filter_generated_node_ids -+ echo "qemu-img commit exit code: ${PIPESTATUS[0]}" -+ -+ $QEMU_IMG bitmap --add "$BASE_JSON" "good-bitmap" -+ echo "qemu-img bitmap --add exit code: $?" -+ -+ $QEMU_IMG bitmap --merge "good-bitmap" -b "$TEST_IMG" "$BASE_JSON" \ -+ "good-bitmap" 2>&1 | _filter_generated_node_ids -+ echo "qemu-img bitmap --merge exit code: ${PIPESTATUS[0]}" -+done -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -+ -diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors.out b/tests/qemu-iotests/tests/qemu-img-close-errors.out -new file mode 100644 -index 0000000000..1bfe88f176 ---- /dev/null -+++ b/tests/qemu-iotests/tests/qemu-img-close-errors.out -@@ -0,0 +1,23 @@ -+QA output created by qemu-img-close-errors -+ -+=== Test with 6 bitmaps === -+wrote 132120576/132120576 bytes at offset 0 -+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+Image committed. -+qemu-img commit exit code: 0 -+qemu-img bitmap --add exit code: 0 -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device -+qemu-img: Error while closing the image: Invalid argument -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device -+qemu-img bitmap --merge exit code: 1 -+ -+=== Test with 7 bitmaps === -+wrote 132120576/132120576 bytes at offset 0 -+126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device -+qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device -+qemu-img: Error while closing the image: Invalid argument -+qemu-img commit exit code: 1 -+qemu-img bitmap --add exit code: 0 -+qemu-img bitmap --merge exit code: 0 -+*** done --- -2.31.1 - diff --git a/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch b/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch deleted file mode 100644 index 25f30ff..0000000 --- a/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch +++ /dev/null @@ -1,146 +0,0 @@ -From aa61e4c437d29a791ea09a01f7230231f1e53356 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 05/12] qemu-thread-posix: cleanup, fix, document QemuEvent - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [2/9] c3bdf75f884e137c667316aaac96bb4a0b9ec2d9 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593 -Author: Paolo Bonzini -Date: Thu Mar 2 11:19:52 2023 +0100 - - qemu-thread-posix: cleanup, fix, document QemuEvent - - QemuEvent is currently broken on ARM due to missing memory barriers - after qatomic_*(). Apart from adding the memory barrier, a closer look - reveals some unpaired memory barriers too. Document more clearly what - is going on. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------ - 1 file changed, 49 insertions(+), 20 deletions(-) - -diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c -index bae938c670..cc74f4ede0 100644 ---- a/util/qemu-thread-posix.c -+++ b/util/qemu-thread-posix.c -@@ -379,13 +379,21 @@ void qemu_event_destroy(QemuEvent *ev) - - void qemu_event_set(QemuEvent *ev) - { -- /* qemu_event_set has release semantics, but because it *loads* -+ assert(ev->initialized); -+ -+ /* -+ * Pairs with both qemu_event_reset() and qemu_event_wait(). -+ * -+ * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ -- assert(ev->initialized); - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { -- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { -+ int old = qatomic_xchg(&ev->value, EV_SET); -+ -+ /* Pairs with memory barrier in kernel futex_wait system call. */ -+ smp_mb__after_rmw(); -+ if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - qemu_futex_wake(ev, INT_MAX); - } -@@ -394,18 +402,19 @@ void qemu_event_set(QemuEvent *ev) - - void qemu_event_reset(QemuEvent *ev) - { -- unsigned value; -- - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -- if (value == EV_SET) { -- /* -- * If there was a concurrent reset (or even reset+wait), -- * do nothing. Otherwise change EV_SET->EV_FREE. -- */ -- qatomic_or(&ev->value, EV_FREE); -- } -+ -+ /* -+ * If there was a concurrent reset (or even reset+wait), -+ * do nothing. Otherwise change EV_SET->EV_FREE. -+ */ -+ qatomic_or(&ev->value, EV_FREE); -+ -+ /* -+ * Order reset before checking the condition in the caller. -+ * Pairs with the first memory barrier in qemu_event_set(). -+ */ -+ smp_mb__after_rmw(); - } - - void qemu_event_wait(QemuEvent *ev) -@@ -413,20 +422,40 @@ void qemu_event_wait(QemuEvent *ev) - unsigned value; - - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -+ -+ /* -+ * qemu_event_wait must synchronize with qemu_event_set even if it does -+ * not go down the slow path, so this load-acquire is needed that -+ * synchronizes with the first memory barrier in qemu_event_set(). -+ * -+ * If we do go down the slow path, there is no requirement at all: we -+ * might miss a qemu_event_set() here but ultimately the memory barrier in -+ * qemu_futex_wait() will ensure the check is done correctly. -+ */ -+ value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { - /* -- * Leave the event reset and tell qemu_event_set that there -- * are waiters. No need to retry, because there cannot be -- * a concurrent busy->free transition. After the CAS, the -- * event will be either set or busy. -+ * Leave the event reset and tell qemu_event_set that there are -+ * waiters. No need to retry, because there cannot be a concurrent -+ * busy->free transition. After the CAS, the event will be either -+ * set or busy. -+ * -+ * This cmpxchg doesn't have particular ordering requirements if it -+ * succeeds (moving the store earlier can only cause qemu_event_set() -+ * to issue _more_ wakeups), the failing case needs acquire semantics -+ * like the load above. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - return; - } - } -+ -+ /* -+ * This is the final check for a concurrent set, so it does need -+ * a smp_mb() pairing with the second barrier of qemu_event_set(). -+ * The barrier is inside the FUTEX_WAIT system call. -+ */ - qemu_futex_wait(ev, EV_BUSY); - } - } --- -2.39.1 - diff --git a/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch b/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch deleted file mode 100644 index 631d541..0000000 --- a/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 02347869410fe53d814487501fb586f7dc614375 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Thu, 9 Mar 2023 08:24:36 -0500 -Subject: [PATCH 06/12] qemu-thread-win32: cleanup, fix, document QemuEvent - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 158: qatomic: add smp_mb__before/after_rmw() -RH-Bugzilla: 2175660 -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Eric Auger -RH-Commit: [3/9] d228e9d6a4a75dd1f0a23a6dceaf4fea23d69192 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2175660 - -commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4 -Author: Paolo Bonzini -Date: Thu Mar 2 11:22:50 2023 +0100 - - qemu-thread-win32: cleanup, fix, document QemuEvent - - QemuEvent is currently broken on ARM due to missing memory barriers - after qatomic_*(). Apart from adding the memory barrier, a closer look - reveals some unpaired memory barriers that are not really needed and - complicated the functions unnecessarily. Also, it is relying on - a memory barrier in ResetEvent(); the barrier _ought_ to be there - but there is really no documentation about it, so make it explicit. - - Reviewed-by: Richard Henderson - Reviewed-by: David Hildenbrand - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++------------- - 1 file changed, 56 insertions(+), 26 deletions(-) - -diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c -index 69db254ac7..a7fe3cc345 100644 ---- a/util/qemu-thread-win32.c -+++ b/util/qemu-thread-win32.c -@@ -272,12 +272,20 @@ void qemu_event_destroy(QemuEvent *ev) - void qemu_event_set(QemuEvent *ev) - { - assert(ev->initialized); -- /* qemu_event_set has release semantics, but because it *loads* -+ -+ /* -+ * Pairs with both qemu_event_reset() and qemu_event_wait(). -+ * -+ * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { -- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { -+ int old = qatomic_xchg(&ev->value, EV_SET); -+ -+ /* Pairs with memory barrier after ResetEvent. */ -+ smp_mb__after_rmw(); -+ if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - SetEvent(ev->event); - } -@@ -286,17 +294,19 @@ void qemu_event_set(QemuEvent *ev) - - void qemu_event_reset(QemuEvent *ev) - { -- unsigned value; -- - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -- if (value == EV_SET) { -- /* If there was a concurrent reset (or even reset+wait), -- * do nothing. Otherwise change EV_SET->EV_FREE. -- */ -- qatomic_or(&ev->value, EV_FREE); -- } -+ -+ /* -+ * If there was a concurrent reset (or even reset+wait), -+ * do nothing. Otherwise change EV_SET->EV_FREE. -+ */ -+ qatomic_or(&ev->value, EV_FREE); -+ -+ /* -+ * Order reset before checking the condition in the caller. -+ * Pairs with the first memory barrier in qemu_event_set(). -+ */ -+ smp_mb__after_rmw(); - } - - void qemu_event_wait(QemuEvent *ev) -@@ -304,29 +314,49 @@ void qemu_event_wait(QemuEvent *ev) - unsigned value; - - assert(ev->initialized); -- value = qatomic_read(&ev->value); -- smp_mb_acquire(); -+ -+ /* -+ * qemu_event_wait must synchronize with qemu_event_set even if it does -+ * not go down the slow path, so this load-acquire is needed that -+ * synchronizes with the first memory barrier in qemu_event_set(). -+ * -+ * If we do go down the slow path, there is no requirement at all: we -+ * might miss a qemu_event_set() here but ultimately the memory barrier in -+ * qemu_futex_wait() will ensure the check is done correctly. -+ */ -+ value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { -- /* qemu_event_set is not yet going to call SetEvent, but we are -- * going to do another check for EV_SET below when setting EV_BUSY. -- * At that point it is safe to call WaitForSingleObject. -+ /* -+ * Here the underlying kernel event is reset, but qemu_event_set is -+ * not yet going to call SetEvent. However, there will be another -+ * check for EV_SET below when setting EV_BUSY. At that point it -+ * is safe to call WaitForSingleObject. - */ - ResetEvent(ev->event); - -- /* Tell qemu_event_set that there are waiters. No need to retry -- * because there cannot be a concurrent busy->free transition. -- * After the CAS, the event will be either set or busy. -+ /* -+ * It is not clear whether ResetEvent provides this barrier; kernel -+ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! -+ */ -+ smp_mb(); -+ -+ /* -+ * Leave the event reset and tell qemu_event_set that there are -+ * waiters. No need to retry, because there cannot be a concurrent -+ * busy->free transition. After the CAS, the event will be either -+ * set or busy. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { -- value = EV_SET; -- } else { -- value = EV_BUSY; -+ return; - } - } -- if (value == EV_BUSY) { -- WaitForSingleObject(ev->event, INFINITE); -- } -+ -+ /* -+ * ev->value is now EV_BUSY. Since we didn't observe EV_SET, -+ * qemu_event_set() must observe EV_BUSY and call SetEvent(). -+ */ -+ WaitForSingleObject(ev->event, INFINITE); - } - } - --- -2.39.1 - diff --git a/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch b/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch deleted file mode 100644 index 1a2e863..0000000 --- a/kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 546e4213c4e8a7b2e369315a71bc9aec091eed6e Mon Sep 17 00:00:00 2001 -From: Cornelia Huck -Date: Mon, 19 Dec 2022 10:30:26 +0100 -Subject: redhat: fix virt-rhel9.2.0 compat props - -RH-Author: Cornelia Huck -RH-MergeRequest: 127: redhat: fix virt-rhel9.2.0 compat props -RH-Bugzilla: 2154640 -RH-Acked-by: Eric Auger -RH-Acked-by: Gavin Shan -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 49635fdc1d9a934ece78abd160b07c19909f876a (cohuck/qemu-kvm-c9s) - -We need to include arm_rhel_compat props in the latest machine. - -Signed-off-by: Cornelia Huck ---- - hw/arm/virt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 0a94f31dd1..bf18838b87 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3520,6 +3520,7 @@ type_init(rhel_machine_init); - - static void rhel920_virt_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - } - DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) - -@@ -3529,7 +3530,6 @@ static void rhel900_virt_options(MachineClass *mc) - - rhel920_virt_options(mc); - -- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); - compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); - - /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ --- -2.38.1 - diff --git a/kvm-s390x-pci-coalesce-unmap-operations.patch b/kvm-s390x-pci-coalesce-unmap-operations.patch deleted file mode 100644 index 8bf1f61..0000000 --- a/kvm-s390x-pci-coalesce-unmap-operations.patch +++ /dev/null @@ -1,125 +0,0 @@ -From ed90f91b61844abd2dff2eb970f721a6cf072235 Mon Sep 17 00:00:00 2001 -From: Matthew Rosato -Date: Fri, 28 Oct 2022 15:47:57 -0400 -Subject: [PATCH 6/9] s390x/pci: coalesce unmap operations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset -RH-Bugzilla: 2163701 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/3] 80c3a2c1d720057ae2a80b338ea06c9c6c804532 (clegoate/qemu-kvm-c9s) - -Currently, each unmapped page is handled as an individual iommu -region notification. Attempt to group contiguous unmap operations -into fewer notifications to reduce overhead. - -Signed-off-by: Matthew Rosato -Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com> -Reviewed-by: Eric Farman -Signed-off-by: Thomas Huth -(cherry picked from commit ef536007c3301bbd6a787e4c2210ea289adaa6f0) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 51 insertions(+) - -diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c -index 7cc4bcf850..66e764f901 100644 ---- a/hw/s390x/s390-pci-inst.c -+++ b/hw/s390x/s390-pci-inst.c -@@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, - } - g_hash_table_remove(iommu->iotlb, &entry->iova); - inc_dma_avail(iommu); -+ /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */ -+ goto out; - } else { - if (cache) { - if (cache->perm == entry->perm && -@@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, - dec_dma_avail(iommu); - } - -+ /* -+ * All associated iotlb entries have already been cleared, trigger the -+ * unmaps. -+ */ - memory_region_notify_iommu(&iommu->iommu_mr, 0, event); - - out: - return iommu->dma_limit ? iommu->dma_limit->avail : 1; - } - -+static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova, -+ uint64_t len) -+{ -+ uint64_t remain = len, start = iova, end = start + len - 1, mask, size; -+ IOMMUTLBEvent event = { -+ .type = IOMMU_NOTIFIER_UNMAP, -+ .entry = { -+ .target_as = &address_space_memory, -+ .translated_addr = 0, -+ .perm = IOMMU_NONE, -+ }, -+ }; -+ -+ while (remain >= TARGET_PAGE_SIZE) { -+ mask = dma_aligned_pow2_mask(start, end, 64); -+ size = mask + 1; -+ event.entry.iova = start; -+ event.entry.addr_mask = mask; -+ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); -+ start += size; -+ remain -= size; -+ } -+} -+ - int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - { - CPUS390XState *env = &cpu->env; -+ uint64_t iova, coalesce = 0; - uint32_t fh; - uint16_t error = 0; - S390PCIBusDevice *pbdev; -@@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - break; - } - -+ /* -+ * If this is an unmap of a PTE, let's try to coalesce multiple unmaps -+ * into as few notifier events as possible. -+ */ -+ if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) { -+ if (coalesce == 0) { -+ iova = entry.iova; -+ } -+ coalesce += entry.len; -+ } else if (coalesce > 0) { -+ /* Unleash the coalesced unmap before processing a new map */ -+ s390_pci_batch_unmap(iommu, iova, coalesce); -+ coalesce = 0; -+ } -+ - start += entry.len; - while (entry.iova < start && entry.iova < end) { - if (dma_avail > 0 || entry.perm == IOMMU_NONE) { -@@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) - } - } - } -+ if (coalesce) { -+ /* Unleash the coalesced unmap before finishing rpcit */ -+ s390_pci_batch_unmap(iommu, iova, coalesce); -+ coalesce = 0; -+ } - if (again && dma_avail > 0) - goto retry; - err: --- -2.31.1 - diff --git a/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch b/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch deleted file mode 100644 index bbe2595..0000000 --- a/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 1ed1f8fc20a4883bc0bc1f58d299b0278abc5442 Mon Sep 17 00:00:00 2001 -From: Matthew Rosato -Date: Fri, 9 Dec 2022 14:57:00 -0500 -Subject: [PATCH 8/9] s390x/pci: reset ISM passthrough devices on shutdown and - system reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset -RH-Bugzilla: 2163701 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] c531352b9d57f51ba938d4c46ee19a5706ade697 (clegoate/qemu-kvm-c9s) - -ISM device firmware stores unique state information that can -can cause a wholesale unmap of the associated IOMMU (e.g. when -we get a termination signal for QEMU) to trigger firmware errors -because firmware believes we are attempting to invalidate entries -that are still in-use by the guest OS (when in fact that guest is -in the process of being terminated or rebooted). -To alleviate this, register both a shutdown notifier (for unexpected -termination cases e.g. virsh destroy) as well as a reset callback -(for cases like guest OS reboot). For each of these scenarios, trigger -PCI device reset; this is enough to indicate to firmware that the IOMMU -is no longer in-use by the guest OS, making it safe to invalidate any -associated IOMMU entries. - -Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") -Signed-off-by: Matthew Rosato -Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com> -Reviewed-by: Eric Farman -[thuth: Adjusted the hunk in s390-pci-vfio.c due to different context] -Signed-off-by: Thomas Huth -(cherry picked from commit 03451953c79e6b31f7860ee0c35b28e181d573c1) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ - hw/s390x/s390-pci-vfio.c | 2 ++ - include/hw/s390x/s390-pci-bus.h | 5 +++++ - 3 files changed, 35 insertions(+) - -diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c -index 977e7daa15..02751f3597 100644 ---- a/hw/s390x/s390-pci-bus.c -+++ b/hw/s390x/s390-pci-bus.c -@@ -24,6 +24,8 @@ - #include "hw/pci/msi.h" - #include "qemu/error-report.h" - #include "qemu/module.h" -+#include "sysemu/reset.h" -+#include "sysemu/runstate.h" - - #ifndef DEBUG_S390PCI_BUS - #define DEBUG_S390PCI_BUS 0 -@@ -150,10 +152,30 @@ out: - psccb->header.response_code = cpu_to_be16(rc); - } - -+static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) -+{ -+ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, -+ shutdown_notifier); -+ -+ pci_device_reset(pbdev->pdev); -+} -+ -+static void s390_pci_reset_cb(void *opaque) -+{ -+ S390PCIBusDevice *pbdev = opaque; -+ -+ pci_device_reset(pbdev->pdev); -+} -+ - static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) - { - HotplugHandler *hotplug_ctrl; - -+ if (pbdev->pft == ZPCI_PFT_ISM) { -+ notifier_remove(&pbdev->shutdown_notifier); -+ qemu_unregister_reset(s390_pci_reset_cb, pbdev); -+ } -+ - /* Unplug the PCI device */ - if (pbdev->pdev) { - DeviceState *pdev = DEVICE(pbdev->pdev); -@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, - pbdev->fh |= FH_SHM_VFIO; - pbdev->forwarding_assist = false; - } -+ /* Register shutdown notifier and reset callback for ISM devices */ -+ if (pbdev->pft == ZPCI_PFT_ISM) { -+ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; -+ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); -+ qemu_register_reset(s390_pci_reset_cb, pbdev); -+ } - } else { - pbdev->fh |= FH_SHM_EMUL; - /* Always intercept emulated devices */ -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index f7bf36cec8..f51190d466 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, - /* The following values remain 0 until we support other FMB formats */ - pbdev->zpci_fn.fmbl = 0; - pbdev->zpci_fn.pft = 0; -+ /* Store function type separately for type-specific behavior */ -+ pbdev->pft = cap->pft; - - /* - * If appropriate, reduce the size of the supported DMA aperture reported -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index 1c46e3a269..e0a9f9385b 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -39,6 +39,9 @@ - #define UID_CHECKING_ENABLED 0x01 - #define ZPCI_DTSM 0x40 - -+/* zPCI Function Types */ -+#define ZPCI_PFT_ISM 5 -+ - OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) - OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) - OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE) -@@ -344,6 +347,7 @@ struct S390PCIBusDevice { - uint16_t noi; - uint16_t maxstbl; - uint8_t sum; -+ uint8_t pft; - S390PCIGroup *pci_group; - ClpRspQueryPci zpci_fn; - S390MsixInfo msix; -@@ -352,6 +356,7 @@ struct S390PCIBusDevice { - MemoryRegion msix_notify_mr; - IndAddr *summary_ind; - IndAddr *indicator; -+ Notifier shutdown_notifier; - bool pci_unplug_request_processed; - bool unplug_requested; - bool interp; --- -2.31.1 - diff --git a/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch b/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch deleted file mode 100644 index 0992724..0000000 --- a/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch +++ /dev/null @@ -1,91 +0,0 @@ -From ee69c8c57fe62fc200f749c4ce3927c88803644d Mon Sep 17 00:00:00 2001 -From: Matthew Rosato -Date: Fri, 28 Oct 2022 15:47:58 -0400 -Subject: [PATCH 7/9] s390x/pci: shrink DMA aperture to be bound by vfio DMA - limit -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 141: s390x/pci: reset ISM passthrough devices on shutdown and system reset -RH-Bugzilla: 2163701 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] 0956bbb4773dd0085f6aed59d6284c704b4fed3b (clegoate/qemu-kvm-c9s) - -Currently, s390x-pci performs accounting against the vfio DMA -limit and triggers the guest to clean up mappings when the limit -is reached. Let's go a step further and also limit the size of -the supported DMA aperture reported to the guest based upon the -initial vfio DMA limit reported for the container (if less than -than the size reported by the firmware/host zPCI layer). This -avoids processing sections of the guest DMA table during global -refresh that, for common use cases, will never be used anway, and -makes exhausting the vfio DMA limit due to mismatch between guest -aperture size and host limit far less likely and more indicitive -of an error. - -Signed-off-by: Matthew Rosato -Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com> -Reviewed-by: Eric Farman -Signed-off-by: Thomas Huth -(cherry picked from commit df202e3ff3fccb49868e08f20d0bda86cb953fbe) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-vfio.c | 11 +++++++++++ - include/hw/s390x/s390-pci-bus.h | 1 + - 2 files changed, 12 insertions(+) - -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index 5f0adb0b4a..f7bf36cec8 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, - cnt->users = 1; - cnt->avail = avail; - QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); -+ pbdev->iommu->max_dma_limit = avail; - return cnt; - } - -@@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, - struct vfio_info_cap_header *hdr; - struct vfio_device_info_cap_zpci_base *cap; - VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); -+ uint64_t vfio_size; - - hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); - -@@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, - /* The following values remain 0 until we support other FMB formats */ - pbdev->zpci_fn.fmbl = 0; - pbdev->zpci_fn.pft = 0; -+ -+ /* -+ * If appropriate, reduce the size of the supported DMA aperture reported -+ * to the guest based upon the vfio DMA limit. -+ */ -+ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS; -+ if (vfio_size < (cap->end_dma - cap->start_dma + 1)) { -+ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1; -+ } - } - - static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, -diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h -index 0605fcea24..1c46e3a269 100644 ---- a/include/hw/s390x/s390-pci-bus.h -+++ b/include/hw/s390x/s390-pci-bus.h -@@ -278,6 +278,7 @@ struct S390PCIIOMMU { - uint64_t g_iota; - uint64_t pba; - uint64_t pal; -+ uint64_t max_dma_limit; - GHashTable *iotlb; - S390PCIDMACount *dma_limit; - }; --- -2.31.1 - diff --git a/kvm-s390x-pv-Implement-a-CGS-check-helper.patch b/kvm-s390x-pv-Implement-a-CGS-check-helper.patch deleted file mode 100644 index c3383af..0000000 --- a/kvm-s390x-pv-Implement-a-CGS-check-helper.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 9452246e59a5f16f44fdf9a7d514b947faf1d5fc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 16 Jan 2023 18:46:05 +0100 -Subject: [PATCH 5/9] s390x/pv: Implement a CGS check helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 139: s390x/pv: Implement a CGS check helper -RH-Bugzilla: 2122523 -RH-Acked-by: Thomas Huth -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Commit: [1/1] 8551ce772b10de653b4e1c8be60aae60ec98b421 (clegoate/qemu-kvm-c9s) - -When a protected VM is started with the maximum number of CPUs (248), -the service call providing information on the CPUs requires more -buffer space than allocated and QEMU disgracefully aborts : - - LOADPARM=[........] - Using virtio-blk. - Using SCSI scheme. - ................................................................................... - qemu-system-s390x: KVM_S390_MEM_OP failed: Argument list too long - -When protected virtualization is initialized, compute the maximum -number of vCPUs supported by the machine and return useful information -to the user before the machine starts in case of error. - -Suggested-by: Thomas Huth -Reviewed-by: Thomas Huth -Signed-off-by: Cédric Le Goater -Message-Id: <20230116174607.2459498-2-clg@kaod.org> -Signed-off-by: Thomas Huth -(cherry picked from commit 75d7150c636569f6687f7e70a33be893be43eb5f) -Signed-off-by: Cédric Le Goater ---- - hw/s390x/pv.c | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -index 8dfe92d8df..8a1c71436b 100644 ---- a/hw/s390x/pv.c -+++ b/hw/s390x/pv.c -@@ -20,6 +20,7 @@ - #include "exec/confidential-guest-support.h" - #include "hw/s390x/ipl.h" - #include "hw/s390x/pv.h" -+#include "hw/s390x/sclp.h" - #include "target/s390x/kvm/kvm_s390x.h" - - static bool info_valid; -@@ -249,6 +250,41 @@ struct S390PVGuestClass { - ConfidentialGuestSupportClass parent_class; - }; - -+/* -+ * If protected virtualization is enabled, the amount of data that the -+ * Read SCP Info Service Call can use is limited to one page. The -+ * available space also depends on the Extended-Length SCCB (ELS) -+ * feature which can take more buffer space to store feature -+ * information. This impacts the maximum number of CPUs supported in -+ * the machine. -+ */ -+static uint32_t s390_pv_get_max_cpus(void) -+{ -+ int offset_cpu = s390_has_feat(S390_FEAT_EXTENDED_LENGTH_SCCB) ? -+ offsetof(ReadInfo, entries) : SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET; -+ -+ return (TARGET_PAGE_SIZE - offset_cpu) / sizeof(CPUEntry); -+} -+ -+static bool s390_pv_check_cpus(Error **errp) -+{ -+ MachineState *ms = MACHINE(qdev_get_machine()); -+ uint32_t pv_max_cpus = s390_pv_get_max_cpus(); -+ -+ if (ms->smp.max_cpus > pv_max_cpus) { -+ error_setg(errp, "Protected VMs support a maximum of %d CPUs", -+ pv_max_cpus); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) -+{ -+ return s390_pv_check_cpus(errp); -+} -+ - int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - { - if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { -@@ -261,6 +297,10 @@ int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - return -1; - } - -+ if (!s390_pv_guest_check(cgs, errp)) { -+ return -1; -+ } -+ - cgs->ready = true; - - return 0; --- -2.31.1 - diff --git a/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch b/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch deleted file mode 100644 index 42114a1..0000000 --- a/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 51fcf352a97f2e99a6a3fb8ae663b45436304120 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Tue, 10 Jan 2023 14:25:34 +0100 -Subject: [PATCH 11/31] s390x/s390-virtio-ccw: Activate zPCI features on - s390-ccw-virtio-rhel8.6.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 133: s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 -RH-Bugzilla: 2159408 -RH-Acked-by: Thomas Huth -RH-Acked-by: David Hildenbrand -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 1ed82e56fe74a283a1726c4893dc3387e645072c (clegoate/qemu-kvm-c9s) - -commit c7b14d3af7 ("s390x/s390-virtio-ccw: Switch off zPCI enhancements -on older machines") activated zPCI enhancement features (interpretation -and forward assist) silently on the s390-ccw-virtio-rhel8.6.0 machine -for RHEL8.8. It didn't seem to be a problem since migration is not -possible but it broke LEAPP upgrade to RHEL9 when the machine is -defined with a passthrough device. Activate the zPCI features also on -RHEL9.2 for the machines to be alike in both latest RHEL distros. - -Upstream Status: RHEL-only -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2159408 - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-virtio-ccw.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index aa142a1a4e..4cdd59c394 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1234,8 +1234,14 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) - - static void ccw_machine_rhel860_class_options(MachineClass *mc) - { -+ static GlobalProperty compat[] = { -+ { TYPE_S390_PCI_DEVICE, "interpret", "on", }, -+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", }, -+ }; -+ - ccw_machine_rhel900_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - - /* All RHEL machines for prior major releases are deprecated */ - mc->deprecation_reason = rhel_old_machine_deprecation; -@@ -1259,8 +1265,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) - - static void ccw_machine_rhel850_class_options(MachineClass *mc) - { -+ static GlobalProperty compat[] = { -+ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, -+ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, -+ }; -+ - ccw_machine_rhel860_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); -+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); - mc->smp_props.prefer_sockets = true; - } - DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); --- -2.31.1 - diff --git a/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch b/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch deleted file mode 100644 index ca61286..0000000 --- a/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch +++ /dev/null @@ -1,176 +0,0 @@ -From 0a4f5bcc2a6f8ac31431e971c1dce9e6ab2191c2 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 21 Feb 2023 16:22:16 -0500 -Subject: [PATCH 01/12] scsi: protect req->aiocb with AioContext lock - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread -RH-Bugzilla: 2155748 -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Commit: [1/3] 61727297bd31dfe18220b61f1d265ced0649c60d (stefanha/centos-stream-qemu-kvm) - -If requests are being processed in the IOThread when a SCSIDevice is -unplugged, scsi_device_purge_requests() -> scsi_req_cancel_async() races -with I/O completion callbacks. Both threads load and store req->aiocb. -This can lead to assert(r->req.aiocb == NULL) failures and undefined -behavior. - -Protect r->req.aiocb with the AioContext lock to prevent the race. - -Reviewed-by: Eric Blake -Reviewed-by: Kevin Wolf -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230221212218.1378734-2-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7b7fc3d0102dafe8eb44802493036a526e921a71) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/scsi-disk.c | 23 ++++++++++++++++------- - hw/scsi/scsi-generic.c | 11 ++++++----- - 2 files changed, 22 insertions(+), 12 deletions(-) - -diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c -index e493c28814..5327f93f4c 100644 ---- a/hw/scsi/scsi-disk.c -+++ b/hw/scsi/scsi-disk.c -@@ -273,9 +273,11 @@ static void scsi_aio_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - if (scsi_disk_req_check_error(r, ret, true)) { - goto done; - } -@@ -357,10 +359,11 @@ static void scsi_dma_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -393,10 +396,11 @@ static void scsi_read_complete(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -446,10 +450,11 @@ static void scsi_do_read_cb(void *opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -530,10 +535,11 @@ static void scsi_write_complete(void * opaque, int ret) - SCSIDiskReq *r = (SCSIDiskReq *)opaque; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert (r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (ret < 0) { - block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); - } else { -@@ -1737,10 +1743,11 @@ static void scsi_unmap_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); - if (scsi_disk_req_check_error(r, ret, true)) { - scsi_req_unref(&r->req); - g_free(data); -@@ -1816,9 +1823,11 @@ static void scsi_write_same_complete(void *opaque, int ret) - SCSIDiskReq *r = data->r; - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - -+ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; -- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); -+ - if (scsi_disk_req_check_error(r, ret, true)) { - goto done; - } -diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c -index 92cce20a4d..ac9fa662b4 100644 ---- a/hw/scsi/scsi-generic.c -+++ b/hw/scsi/scsi-generic.c -@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret) - SCSIGenericReq *r = (SCSIGenericReq *)opaque; - SCSIDevice *s = r->req.dev; - -+ aio_context_acquire(blk_get_aio_context(s->conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); - scsi_command_complete_noio(r, ret); - aio_context_release(blk_get_aio_context(s->conf.blk)); - } -@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret) - SCSIDevice *s = r->req.dev; - int len; - -+ aio_context_acquire(blk_get_aio_context(s->conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); - goto done; -@@ -386,11 +387,11 @@ static void scsi_write_complete(void * opaque, int ret) - - trace_scsi_generic_write_complete(ret); - -+ aio_context_acquire(blk_get_aio_context(s->conf.blk)); -+ - assert(r->req.aiocb != NULL); - r->req.aiocb = NULL; - -- aio_context_acquire(blk_get_aio_context(s->conf.blk)); -- - if (ret || r->req.io_canceled) { - scsi_command_complete_noio(r, ret); - goto done; --- -2.39.1 - diff --git a/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch b/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch deleted file mode 100644 index a8e3957..0000000 --- a/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 5defda06ec4c24818a34126c5048be5e274b63f5 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:41:04 +0100 -Subject: [PATCH 22/31] stream: Replace subtree drain with a single node drain - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [10/16] a93250b1f6ef296e903df0ba5d8b29bc2ed540a8 (sgarzarella/qemu-kvm-c-9-s) - -The subtree drain was introduced in commit b1e1af394d9 as a way to avoid -graph changes between finding the base node and changing the block graph -as necessary on completion of the image streaming job. - -The block graph could change between these two points because -bdrv_set_backing_hd() first drains the parent node, which involved -polling and can do anything. - -Subtree draining was an imperfect way to make this less likely (because -with it, fewer callbacks are called during this window). Everyone agreed -that it's not really the right solution, and it was only committed as a -stopgap solution. - -This replaces the subtree drain with a solution that simply drains the -parent node before we try to find the base node, and then call a version -of bdrv_set_backing_hd() that doesn't drain, but just asserts that the -parent node is already drained. - -This way, any graph changes caused by draining happen before we start -looking at the graph and things stay consistent between finding the base -node and changing the graph. - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-10-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 92140b9f3f07d80e2c27edcc6e32f392be2135e6) -Signed-off-by: Stefano Garzarella ---- - block.c | 17 ++++++++++++++--- - block/stream.c | 26 ++++++++++++++++---------- - include/block/block-global-state.h | 3 +++ - 3 files changed, 33 insertions(+), 13 deletions(-) - -diff --git a/block.c b/block.c -index b3449a312e..5330e89903 100644 ---- a/block.c -+++ b/block.c -@@ -3403,14 +3403,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, - return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); - } - --int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, -- Error **errp) -+int bdrv_set_backing_hd_drained(BlockDriverState *bs, -+ BlockDriverState *backing_hd, -+ Error **errp) - { - int ret; - Transaction *tran = tran_new(); - - GLOBAL_STATE_CODE(); -- bdrv_drained_begin(bs); -+ assert(bs->quiesce_counter > 0); - - ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); - if (ret < 0) { -@@ -3420,7 +3421,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - ret = bdrv_refresh_perms(bs, errp); - out: - tran_finalize(tran, ret); -+ return ret; -+} - -+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, -+ Error **errp) -+{ -+ int ret; -+ GLOBAL_STATE_CODE(); -+ -+ bdrv_drained_begin(bs); -+ ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); - bdrv_drained_end(bs); - - return ret; -diff --git a/block/stream.c b/block/stream.c -index 694709bd25..8744ad103f 100644 ---- a/block/stream.c -+++ b/block/stream.c -@@ -64,13 +64,16 @@ static int stream_prepare(Job *job) - bdrv_cor_filter_drop(s->cor_filter_bs); - s->cor_filter_bs = NULL; - -- bdrv_subtree_drained_begin(s->above_base); -+ /* -+ * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain -+ * already here and use bdrv_set_backing_hd_drained() instead because -+ * the polling during drained_begin() might change the graph, and if we do -+ * this only later, we may end up working with the wrong base node (or it -+ * might even have gone away by the time we want to use it). -+ */ -+ bdrv_drained_begin(unfiltered_bs); - - base = bdrv_filter_or_cow_bs(s->above_base); -- if (base) { -- bdrv_ref(base); -- } -- - unfiltered_base = bdrv_skip_filters(base); - - if (bdrv_cow_child(unfiltered_bs)) { -@@ -82,7 +85,13 @@ static int stream_prepare(Job *job) - } - } - -- bdrv_set_backing_hd(unfiltered_bs, base, &local_err); -+ bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); -+ -+ /* -+ * This call will do I/O, so the graph can change again from here on. -+ * We have already completed the graph change, so we are not in danger -+ * of operating on the wrong node any more if this happens. -+ */ - ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false); - if (local_err) { - error_report_err(local_err); -@@ -92,10 +101,7 @@ static int stream_prepare(Job *job) - } - - out: -- if (base) { -- bdrv_unref(base); -- } -- bdrv_subtree_drained_end(s->above_base); -+ bdrv_drained_end(unfiltered_bs); - return ret; - } - -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index c7bd4a2088..00e0cf8aea 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename, - BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); - int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp); -+int bdrv_set_backing_hd_drained(BlockDriverState *bs, -+ BlockDriverState *backing_hd, -+ Error **errp); - int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, - const char *bdref_key, Error **errp); - BlockDriverState *bdrv_open(const char *filename, const char *reference, --- -2.31.1 - diff --git a/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch b/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch deleted file mode 100644 index 52e73e7..0000000 --- a/kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch +++ /dev/null @@ -1,144 +0,0 @@ -From e419493e6ec188461aa6f06c1b1cdc8a698859df Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 15:21:03 -1000 -Subject: [PATCH 6/8] target/i386: Fix 32-bit AD[CO]X insns in 64-bit mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [6/7] 0fa4d3858319d4f877a5b3f31776121a72e2c57a (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -Failure to truncate the inputs results in garbage for the carry-out. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1373 -Signed-off-by: Richard Henderson -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20230115012103.3131796-1-richard.henderson@linaro.org> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 6fbef9426bac7184b5d5887589d8386e732865eb) ---- - target/i386/tcg/emit.c.inc | 2 + - tests/tcg/x86_64/Makefile.target | 3 ++ - tests/tcg/x86_64/adox.c | 69 ++++++++++++++++++++++++++++++++ - 3 files changed, 74 insertions(+) - create mode 100644 tests/tcg/x86_64/adox.c - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 0d7c6e80ae..e61ae9a2e9 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1037,6 +1037,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - #ifdef TARGET_X86_64 - case MO_32: - /* If TL is 64-bit just do everything in 64-bit arithmetic. */ -+ tcg_gen_ext32u_tl(s->T0, s->T0); -+ tcg_gen_ext32u_tl(s->T1, s->T1); - tcg_gen_add_i64(s->T0, s->T0, s->T1); - tcg_gen_add_i64(s->T0, s->T0, carry_in); - tcg_gen_shri_i64(carry_out, s->T0, 32); -diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target -index 4eac78293f..e64aab1b81 100644 ---- a/tests/tcg/x86_64/Makefile.target -+++ b/tests/tcg/x86_64/Makefile.target -@@ -12,11 +12,14 @@ ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET)) - X86_64_TESTS += vsyscall - X86_64_TESTS += noexec - X86_64_TESTS += cmpxchg -+X86_64_TESTS += adox - TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 - else - TESTS=$(MULTIARCH_TESTS) - endif - -+adox: CFLAGS=-O2 -+ - run-test-i386-ssse3: QEMU_OPTS += -cpu max - run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max - -diff --git a/tests/tcg/x86_64/adox.c b/tests/tcg/x86_64/adox.c -new file mode 100644 -index 0000000000..36be644c8b ---- /dev/null -+++ b/tests/tcg/x86_64/adox.c -@@ -0,0 +1,69 @@ -+/* See if ADOX give expected results */ -+ -+#include -+#include -+#include -+ -+static uint64_t adoxq(bool *c_out, uint64_t a, uint64_t b, bool c) -+{ -+ asm ("addl $0x7fffffff, %k1\n\t" -+ "adoxq %2, %0\n\t" -+ "seto %b1" -+ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); -+ *c_out = c; -+ return a; -+} -+ -+static uint64_t adoxl(bool *c_out, uint64_t a, uint64_t b, bool c) -+{ -+ asm ("addl $0x7fffffff, %k1\n\t" -+ "adoxl %k2, %k0\n\t" -+ "seto %b1" -+ : "+r"(a), "=&r"(c) : "r"(b), "1"((int)c)); -+ *c_out = c; -+ return a; -+} -+ -+int main() -+{ -+ uint64_t r; -+ bool c; -+ -+ r = adoxq(&c, 0, 0, 0); -+ assert(r == 0); -+ assert(c == 0); -+ -+ r = adoxl(&c, 0, 0, 0); -+ assert(r == 0); -+ assert(c == 0); -+ -+ r = adoxl(&c, 0x100000000, 0, 0); -+ assert(r == 0); -+ assert(c == 0); -+ -+ r = adoxq(&c, 0, 0, 1); -+ assert(r == 1); -+ assert(c == 0); -+ -+ r = adoxl(&c, 0, 0, 1); -+ assert(r == 1); -+ assert(c == 0); -+ -+ r = adoxq(&c, -1, -1, 0); -+ assert(r == -2); -+ assert(c == 1); -+ -+ r = adoxl(&c, -1, -1, 0); -+ assert(r == 0xfffffffe); -+ assert(c == 1); -+ -+ r = adoxq(&c, -1, -1, 1); -+ assert(r == -1); -+ assert(c == 1); -+ -+ r = adoxl(&c, -1, -1, 1); -+ assert(r == 0xffffffff); -+ assert(c == 1); -+ -+ return 0; -+} --- -2.39.1 - diff --git a/kvm-target-i386-Fix-BEXTR-instruction.patch b/kvm-target-i386-Fix-BEXTR-instruction.patch deleted file mode 100644 index 0c28c7e..0000000 --- a/kvm-target-i386-Fix-BEXTR-instruction.patch +++ /dev/null @@ -1,110 +0,0 @@ -From a019c203f0148e5fbb20e102a17453806f5296b6 Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 13:05:42 -1000 -Subject: [PATCH 3/8] target/i386: Fix BEXTR instruction - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [3/7] bd1e3b26c72d7152b44be2d34308fd40dc106424 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -There were two problems here: not limiting the input to operand bits, -and not correctly handling large extraction length. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1372 -Signed-off-by: Richard Henderson -Message-Id: <20230114230542.3116013-3-richard.henderson@linaro.org> -Cc: qemu-stable@nongnu.org -Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) -Signed-off-by: Paolo Bonzini -(cherry picked from commit b14c0098975264ed03144f145bca0179a6763a07) ---- - target/i386/tcg/emit.c.inc | 22 +++++++++++----------- - tests/tcg/i386/test-i386-bmi2.c | 12 ++++++++++++ - 2 files changed, 23 insertions(+), 11 deletions(-) - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 7037ff91c6..99f6ba6e19 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1078,30 +1078,30 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; -- TCGv bound, zero; -+ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -+ TCGv zero = tcg_constant_tl(0); -+ TCGv mone = tcg_constant_tl(-1); - - /* - * Extract START, and shift the operand. - * Shifts larger than operand size get zeros. - */ - tcg_gen_ext8u_tl(s->A0, s->T1); -+ if (TARGET_LONG_BITS == 64 && ot == MO_32) { -+ tcg_gen_ext32u_tl(s->T0, s->T0); -+ } - tcg_gen_shr_tl(s->T0, s->T0, s->A0); - -- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -- zero = tcg_constant_tl(0); - tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); - - /* -- * Extract the LEN into a mask. Lengths larger than -- * operand size get all ones. -+ * Extract the LEN into an inverse mask. Lengths larger than -+ * operand size get all zeros, length 0 gets all ones. - */ - tcg_gen_extract_tl(s->A0, s->T1, 8, 8); -- tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound); -- -- tcg_gen_movi_tl(s->T1, 1); -- tcg_gen_shl_tl(s->T1, s->T1, s->A0); -- tcg_gen_subi_tl(s->T1, s->T1, 1); -- tcg_gen_and_tl(s->T0, s->T0, s->T1); -+ tcg_gen_shl_tl(s->T1, mone, s->A0); -+ tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); -+ tcg_gen_andc_tl(s->T0, s->T0, s->T1); - - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); -diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c -index 3c3ef85513..982d4abda4 100644 ---- a/tests/tcg/i386/test-i386-bmi2.c -+++ b/tests/tcg/i386/test-i386-bmi2.c -@@ -99,6 +99,9 @@ int main(int argc, char *argv[]) { - result = bextrq(mask, 0x10f8); - assert(result == 0); - -+ result = bextrq(0xfedcba9876543210ull, 0x7f00); -+ assert(result == 0xfedcba9876543210ull); -+ - result = blsiq(0x30); - assert(result == 0x10); - -@@ -164,6 +167,15 @@ int main(int argc, char *argv[]) { - result = bextrl(mask, 0x1038); - assert(result == 0); - -+ result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018); -+ assert(result == 0x5a); -+ -+ result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00); -+ assert(result == 0x76543210u); -+ -+ result = bextrl(-1, 0); -+ assert(result == 0); -+ - result = blsil(0xffff); - assert(result == 1); - --- -2.39.1 - diff --git a/kvm-target-i386-Fix-BZHI-instruction.patch b/kvm-target-i386-Fix-BZHI-instruction.patch deleted file mode 100644 index bcf79f4..0000000 --- a/kvm-target-i386-Fix-BZHI-instruction.patch +++ /dev/null @@ -1,77 +0,0 @@ -From d49e5d193dfccf6f5cfa98ccce5bd491478d563d Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 13:32:06 -1000 -Subject: [PATCH 7/8] target/i386: Fix BZHI instruction - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [7/7] ad6b343c09c0304ac32cc68670c49d1fc12d8cf8 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -We did not correctly handle N >= operand size. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1374 -Signed-off-by: Richard Henderson -Message-Id: <20230114233206.3118472-1-richard.henderson@linaro.org> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9ad2ba6e8e7fc195d0dd0b76ab38bd2fceb1bdd4) ---- - target/i386/tcg/emit.c.inc | 14 +++++++------- - tests/tcg/i386/test-i386-bmi2.c | 3 +++ - 2 files changed, 10 insertions(+), 7 deletions(-) - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index e61ae9a2e9..0d01e13002 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1147,20 +1147,20 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; -- TCGv bound; -+ TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -+ TCGv zero = tcg_constant_tl(0); -+ TCGv mone = tcg_constant_tl(-1); - -- tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]); -- bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); -+ tcg_gen_ext8u_tl(s->T1, s->T1); - - /* - * Note that since we're using BMILG (in order to get O - * cleared) we need to store the inverse into C. - */ -- tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound); -- tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1); -+ tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound); - -- tcg_gen_movi_tl(s->A0, -1); -- tcg_gen_shl_tl(s->A0, s->A0, s->T1); -+ tcg_gen_shl_tl(s->A0, mone, s->T1); -+ tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); - tcg_gen_andc_tl(s->T0, s->T0, s->A0); - - gen_op_update1_cc(s); -diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c -index 982d4abda4..0244df7987 100644 ---- a/tests/tcg/i386/test-i386-bmi2.c -+++ b/tests/tcg/i386/test-i386-bmi2.c -@@ -123,6 +123,9 @@ int main(int argc, char *argv[]) { - result = bzhiq(mask, 0x1f); - assert(result == (mask & ~(-1 << 30))); - -+ result = bzhiq(mask, 0x40); -+ assert(result == mask); -+ - result = rorxq(0x2132435465768798, 8); - assert(result == 0x9821324354657687); - --- -2.39.1 - diff --git a/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch b/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch deleted file mode 100644 index 7f3051f..0000000 --- a/kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch +++ /dev/null @@ -1,60 +0,0 @@ -From cb2b591e1677db2837810eaedac534a7ff3a7b1c Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 08:06:01 -1000 -Subject: [PATCH 4/8] target/i386: Fix C flag for BLSI, BLSMSK, BLSR - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [4/7] 173e23c492c830da6c5a4be0cfc20a69ac655b59 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -We forgot to set cc_src, which is used for computing C. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1370 -Signed-off-by: Richard Henderson -Message-Id: <20230114180601.2993644-1-richard.henderson@linaro.org> -Cc: qemu-stable@nongnu.org -Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) -Signed-off-by: Paolo Bonzini -(cherry picked from commit 99282098dc74c2055bde5652bde6cf0067d0c370) ---- - target/i386/tcg/emit.c.inc | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 99f6ba6e19..4d7702c106 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1111,6 +1111,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; - -+ tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_neg_tl(s->T1, s->T0); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); -@@ -1121,6 +1122,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode - { - MemOp ot = decode->op[0].ot; - -+ tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_xor_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); -@@ -1131,6 +1133,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) - { - MemOp ot = decode->op[0].ot; - -+ tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); --- -2.39.1 - diff --git a/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch b/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch deleted file mode 100644 index 72ae8ee..0000000 --- a/kvm-target-i386-fix-ADOX-followed-by-ADCX.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 54d3e58aabf9716f9a07aeb7044d7b7997e28123 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Tue, 31 Jan 2023 09:48:03 +0100 -Subject: [PATCH 5/8] target/i386: fix ADOX followed by ADCX - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [5/7] 64dbe4e602f08e4a88fdeacee5a8993ca4383563 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -When ADCX is followed by ADOX or vice versa, the second instruction's -carry comes from EFLAGS and the condition codes use the CC_OP_ADCOX -operation. Retrieving the carry from EFLAGS is handled by this bit -of gen_ADCOX: - - tcg_gen_extract_tl(carry_in, cpu_cc_src, - ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1); - -Unfortunately, in this case cc_op has been overwritten by the previous -"if" statement to CC_OP_ADCOX. This works by chance when the first -instruction is ADCX; however, if the first instruction is ADOX, -ADCX will incorrectly take its carry from OF instead of CF. - -Fix by moving the computation of the new cc_op at the end of the function. -The included exhaustive test case fails without this patch and passes -afterwards. - -Because ADCX/ADOX need not be invoked through the VEX prefix, this -regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement -0x0f 0x38, add AVX", 2022-10-18). However, the mistake happened a -little earlier, when BMI instructions were rewritten using the new -decoder framework. - -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471 -Reported-by: Paul Jolly -Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18) -Cc: qemu-stable@nongnu.org -Signed-off-by: Paolo Bonzini -(cherry picked from commit 60c7dd22e1383754d5f150bc9f7c2785c662a7b6) ---- - target/i386/tcg/emit.c.inc | 20 +++++---- - tests/tcg/i386/Makefile.target | 6 ++- - tests/tcg/i386/test-i386-adcox.c | 75 ++++++++++++++++++++++++++++++++ - 3 files changed, 91 insertions(+), 10 deletions(-) - create mode 100644 tests/tcg/i386/test-i386-adcox.c - -diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc -index 4d7702c106..0d7c6e80ae 100644 ---- a/target/i386/tcg/emit.c.inc -+++ b/target/i386/tcg/emit.c.inc -@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq) - - static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - { -+ int opposite_cc_op; - TCGv carry_in = NULL; - TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); - TCGv zero; -@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) { - /* Re-use the carry-out from a previous round. */ - carry_in = carry_out; -- cc_op = s->cc_op; -- } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) { -- /* Merge with the carry-out from the opposite instruction. */ -- cc_op = CC_OP_ADCOX; -- } -- -- /* If we don't have a carry-in, get it out of EFLAGS. */ -- if (!carry_in) { -+ } else { -+ /* We don't have a carry-in, get it out of EFLAGS. */ - if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { - gen_compute_eflags(s); - } -@@ -1053,7 +1048,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) - tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); - break; - } -- set_cc_op(s, cc_op); -+ -+ opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX; -+ if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) { -+ /* Merge with the carry-out from the opposite instruction. */ -+ set_cc_op(s, CC_OP_ADCOX); -+ } else { -+ set_cc_op(s, cc_op); -+ } - } - - static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) -diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target -index 81831cafbc..bafd8c2180 100644 ---- a/tests/tcg/i386/Makefile.target -+++ b/tests/tcg/i386/Makefile.target -@@ -14,7 +14,7 @@ config-cc.mak: Makefile - I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c)) - ALL_X86_TESTS=$(I386_SRCS:.c=) - SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx --X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) -+X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS)) - - test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse - run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max -@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2 - run-test-i386-bmi2: QEMU_OPTS += -cpu max - run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max - -+test-i386-adcox: CFLAGS=-O2 -+run-test-i386-adcox: QEMU_OPTS += -cpu max -+run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max -+ - # - # hello-i386 is a barebones app - # -diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c -new file mode 100644 -index 0000000000..16169efff8 ---- /dev/null -+++ b/tests/tcg/i386/test-i386-adcox.c -@@ -0,0 +1,75 @@ -+/* See if various BMI2 instructions give expected results */ -+#include -+#include -+#include -+ -+#define CC_C 1 -+#define CC_O (1 << 11) -+ -+#ifdef __x86_64__ -+#define REG uint64_t -+#else -+#define REG uint32_t -+#endif -+ -+void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) -+{ -+ REG flags; -+ REG out_adcx, out_adox; -+ -+ asm("pushf; pop %0" : "=r"(flags)); -+ flags &= ~(CC_C | CC_O); -+ flags |= (in_c ? CC_C : 0); -+ flags |= (in_o ? CC_O : 0); -+ -+ out_adcx = adcx_operand; -+ out_adox = adox_operand; -+ asm("push %0; popf;" -+ "adox %3, %2;" -+ "adcx %3, %1;" -+ "pushf; pop %0" -+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) -+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); -+ -+ assert(out_adcx == in_c + adcx_operand - 1); -+ assert(out_adox == in_o + adox_operand - 1); -+ assert(!!(flags & CC_C) == (in_c || adcx_operand)); -+ assert(!!(flags & CC_O) == (in_o || adox_operand)); -+} -+ -+void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand) -+{ -+ REG flags; -+ REG out_adcx, out_adox; -+ -+ asm("pushf; pop %0" : "=r"(flags)); -+ flags &= ~(CC_C | CC_O); -+ flags |= (in_c ? CC_C : 0); -+ flags |= (in_o ? CC_O : 0); -+ -+ out_adcx = adcx_operand; -+ out_adox = adox_operand; -+ asm("push %0; popf;" -+ "adcx %3, %1;" -+ "adox %3, %2;" -+ "pushf; pop %0" -+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox) -+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox)); -+ -+ assert(out_adcx == in_c + adcx_operand - 1); -+ assert(out_adox == in_o + adox_operand - 1); -+ assert(!!(flags & CC_C) == (in_c || adcx_operand)); -+ assert(!!(flags & CC_O) == (in_o || adox_operand)); -+} -+ -+int main(int argc, char *argv[]) { -+ /* try all combinations of input CF, input OF, CF from op1+op2, OF from op2+op1 */ -+ int i; -+ for (i = 0; i <= 15; i++) { -+ printf("%d\n", i); -+ test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); -+ test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8)); -+ } -+ return 0; -+} -+ --- -2.39.1 - diff --git a/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch b/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch deleted file mode 100644 index 81a0003..0000000 --- a/kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch +++ /dev/null @@ -1,77 +0,0 @@ -From f4ddcdd2395e0944c20f6683c66068ed0ac7d757 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Sat, 7 Jan 2023 18:14:20 +0100 -Subject: [PATCH 1/8] target/i386: fix operand size of unary SSE operations - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [1/7] 7041f3e30e19add6bd8e5355d8bebf92390a5c2e (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -VRCPSS, VRSQRTSS and VCVTSx2Sx have a 32-bit or 64-bit memory operand, -which is represented in the decoding tables by X86_VEX_REPScalar. Add it -to the tables, and make validate_vex() handle the case of an instruction -that is in exception type 4 without the REP prefix and exception type 5 -with it; this is the cas of VRCP and VRSQRT. - -Reported-by: yongwoo -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1377 -Signed-off-by: Paolo Bonzini -(cherry picked from commit 3d304620ec6c95f31db17acc132f42f243369299) ---- - target/i386/tcg/decode-new.c.inc | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc -index 80c579164f..d5fd8d965c 100644 ---- a/target/i386/tcg/decode-new.c.inc -+++ b/target/i386/tcg/decode-new.c.inc -@@ -105,6 +105,7 @@ - #define vex3 .vex_class = 3, - #define vex4 .vex_class = 4, - #define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned, -+#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar, - #define vex5 .vex_class = 5, - #define vex6 .vex_class = 6, - #define vex7 .vex_class = 7, -@@ -839,8 +840,8 @@ static const X86OpEntry opcodes_0F[256] = { - - [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66), - [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), -- [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), -- [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex5 p_00_f3), -+ [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), -+ [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), - [0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */ - [0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */ - [0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */ -@@ -878,7 +879,7 @@ static const X86OpEntry opcodes_0F[256] = { - - [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), -- [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex3 p_00_66_f3_f2), -+ [0x5a] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x5b] = X86_OP_GROUP0(0F5B), - [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), -@@ -1447,9 +1448,9 @@ static bool validate_vex(DisasContext *s, X86DecodedInsn *decode) - * Instructions which differ between 00/66 and F2/F3 in the - * exception classification and the size of the memory operand. - */ -- assert(e->vex_class == 1 || e->vex_class == 2); -+ assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4); - if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { -- e->vex_class = 3; -+ e->vex_class = e->vex_class < 4 ? 3 : 5; - if (s->vex_l) { - goto illegal; - } --- -2.39.1 - diff --git a/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch b/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch deleted file mode 100644 index b9536c3..0000000 --- a/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b330bf0a2ad5af73d3c62997f7f0fa5b61f1796b Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 14 Feb 2023 14:48:37 +0100 -Subject: [PATCH 8/8] target/s390x/arch_dump: Fix memory corruption in - s390x_write_elf64_notes() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 152: Fix memory corruption in s390x_write_elf64_notes() -RH-Bugzilla: 2168172 -RH-Acked-by: Cornelia Huck -RH-Acked-by: David Hildenbrand -RH-Acked-by: Cédric Le Goater -RH-Commit: [1/1] 37a2c997b2c8b7524e0b6299891bf3ea7c9a46d0 (thuth/qemu-kvm-cs9) - -Bugzilla: https://bugzilla.redhat.com/2168172 -Upstream-Status: Posted (and reviewed, but not merged yet) - -"note_size" can be smaller than sizeof(note), so unconditionally calling -memset(notep, 0, sizeof(note)) could cause a memory corruption here in -case notep has been allocated dynamically, thus let's use note_size as -length argument for memset() instead. - -Fixes: 113d8f4e95 ("s390x: pv: Add dump support") -Message-Id: <20230214141056.680969-1-thuth@redhat.com> -Reviewed-by: Janosch Frank -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth ---- - target/s390x/arch_dump.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c -index a2329141e8..a7c44ba49d 100644 ---- a/target/s390x/arch_dump.c -+++ b/target/s390x/arch_dump.c -@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name, - notep = g_malloc(note_size); - } - -- memset(notep, 0, sizeof(note)); -+ memset(notep, 0, note_size); - - /* Setup note header data */ - notep->hdr.n_descsz = cpu_to_be32(content_size); --- -2.31.1 - diff --git a/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch b/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch deleted file mode 100644 index 268c263..0000000 --- a/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 093c4a6834f3ec5a05390a3630ae4edec80885b8 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Fri, 18 Nov 2022 18:40:57 +0100 -Subject: [PATCH 15/31] test-bdrv-drain: Don't yield in - .bdrv_co_drained_begin/end() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot -RH-Bugzilla: 2155112 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Kevin Wolf -RH-Commit: [3/16] 5282d3e13cb85dfb480edb11b7eb2769248465df (sgarzarella/qemu-kvm-c-9-s) - -We want to change .bdrv_co_drained_begin/end() back to be non-coroutine -callbacks, so in preparation, avoid yielding in their implementation. - -This does almost the same as the existing logic in bdrv_drain_invoke(), -by creating and entering coroutines internally. However, since the test -case is by far the heaviest user of coroutine code in drain callbacks, -it is preferable to have the complexity in the test case rather than the -drain core, which is already complicated enough without this. - -The behaviour for bdrv_drain_begin() is unchanged because we increase -bs->in_flight and this is still polled. However, bdrv_drain_end() -doesn't wait for the spawned coroutine to complete any more. This is -fine, we don't rely on bdrv_drain_end() restarting all operations -immediately before the next aio_poll(). - -Signed-off-by: Kevin Wolf -Reviewed-by: Vladimir Sementsov-Ogievskiy -Reviewed-by: Emanuele Giuseppe Esposito -Reviewed-by: Hanna Reitz -Message-Id: <20221118174110.55183-3-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 7bce1c299834557bffd92294608ea528648cfe75) -Signed-off-by: Stefano Garzarella ---- - tests/unit/test-bdrv-drain.c | 64 ++++++++++++++++++++++++++---------- - 1 file changed, 46 insertions(+), 18 deletions(-) - -diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c -index 09dc4a4891..24f34e24ad 100644 ---- a/tests/unit/test-bdrv-drain.c -+++ b/tests/unit/test-bdrv-drain.c -@@ -38,12 +38,22 @@ typedef struct BDRVTestState { - bool sleep_in_drain_begin; - } BDRVTestState; - -+static void coroutine_fn sleep_in_drain_begin(void *opaque) -+{ -+ BlockDriverState *bs = opaque; -+ -+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); -+ bdrv_dec_in_flight(bs); -+} -+ - static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) - { - BDRVTestState *s = bs->opaque; - s->drain_count++; - if (s->sleep_in_drain_begin) { -- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); -+ Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), co); - } - } - -@@ -1916,6 +1926,21 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, - return 0; - } - -+static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) -+{ -+ BlockDriverState *bs = opaque; -+ BDRVReplaceTestState *s = bs->opaque; -+ -+ /* Keep waking io_co up until it is done */ -+ while (s->io_co) { -+ aio_co_wake(s->io_co); -+ s->io_co = NULL; -+ qemu_coroutine_yield(); -+ } -+ s->drain_co = NULL; -+ bdrv_dec_in_flight(bs); -+} -+ - /** - * If .drain_count is 0, wake up .io_co if there is one; and set - * .was_drained. -@@ -1926,20 +1951,27 @@ static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) - BDRVReplaceTestState *s = bs->opaque; - - if (!s->drain_count) { -- /* Keep waking io_co up until it is done */ -- s->drain_co = qemu_coroutine_self(); -- while (s->io_co) { -- aio_co_wake(s->io_co); -- s->io_co = NULL; -- qemu_coroutine_yield(); -- } -- s->drain_co = NULL; -- -+ s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), s->drain_co); - s->was_drained = true; - } - s->drain_count++; - } - -+static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) -+{ -+ BlockDriverState *bs = opaque; -+ char data; -+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); -+ int ret; -+ -+ /* Queue a read request post-drain */ -+ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); -+ g_assert(ret >= 0); -+ bdrv_dec_in_flight(bs); -+} -+ - /** - * Reduce .drain_count, set .was_undrained once it reaches 0. - * If .drain_count reaches 0 and the node has a backing file, issue a -@@ -1951,17 +1983,13 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) - - g_assert(s->drain_count > 0); - if (!--s->drain_count) { -- int ret; -- - s->was_undrained = true; - - if (bs->backing) { -- char data; -- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); -- -- /* Queue a read request post-drain */ -- ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); -- g_assert(ret >= 0); -+ Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry, -+ bs); -+ bdrv_inc_in_flight(bs); -+ aio_co_enter(bdrv_get_aio_context(bs), co); - } - } - } --- -2.31.1 - diff --git a/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch b/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch deleted file mode 100644 index ebd52cd..0000000 --- a/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch +++ /dev/null @@ -1,505 +0,0 @@ -From 39d5761fe1f546e764dedf2ea32c55d8f5222696 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Wed, 18 Jan 2023 13:04:05 +0100 -Subject: [PATCH 1/8] tests/qtest: netdev: test stream and dgram backends -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect -RH-Bugzilla: 2169232 -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Cindy Lu -RH-Acked-by: MST -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [1/2] 75c71b47eea072e14651a96612d402b50d2b8f1e (lvivier/qemu-kvm-centos) - -Signed-off-by: Laurent Vivier -Acked-by: Michael S. Tsirkin -Message-Id: <20230118120405.1876329-1-lvivier@redhat.com> -Signed-off-by: Thomas Huth -(cherry picked from commit c95031a19f0d7f418a597243f6f84b031a858997) ---- - tests/qtest/meson.build | 2 + - tests/qtest/netdev-socket.c | 448 ++++++++++++++++++++++++++++++++++++ - 2 files changed, 450 insertions(+) - create mode 100644 tests/qtest/netdev-socket.c - -diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 9df3f9f8b9..2e7c6fe5e3 100644 ---- a/tests/qtest/meson.build -+++ b/tests/qtest/meson.build -@@ -27,6 +27,7 @@ qtests_generic = [ - 'test-hmp', - 'qos-test', - 'readconfig-test', -+ 'netdev-socket', - ] - if config_host.has_key('CONFIG_MODULES') - qtests_generic += [ 'modules-test' ] -@@ -299,6 +300,7 @@ qtests = { - 'tpm-tis-device-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'], - 'tpm-tis-device-test': [io, tpmemu_files, 'tpm-tis-util.c'], - 'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'), -+ 'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'), - } - - gvnc = dependency('gvnc-1.0', required: false) -diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c -new file mode 100644 -index 0000000000..6ba256e173 ---- /dev/null -+++ b/tests/qtest/netdev-socket.c -@@ -0,0 +1,448 @@ -+/* -+ * QTest testcase for netdev stream and dgram -+ * -+ * Copyright (c) 2022 Red Hat, Inc. -+ * -+ * SPDX-License-Identifier: GPL-2.0-or-later -+ */ -+ -+#include "qemu/osdep.h" -+#include "qemu/sockets.h" -+#include -+#include "../unit/socket-helpers.h" -+#include "libqtest.h" -+ -+#define CONNECTION_TIMEOUT 5 -+ -+#define EXPECT_STATE(q, e, t) \ -+do { \ -+ char *resp = NULL; \ -+ g_test_timer_start(); \ -+ do { \ -+ g_free(resp); \ -+ resp = qtest_hmp(q, "info network"); \ -+ if (t) { \ -+ strrchr(resp, t)[0] = 0; \ -+ } \ -+ if (g_str_equal(resp, e)) { \ -+ break; \ -+ } \ -+ } while (g_test_timer_elapsed() < CONNECTION_TIMEOUT); \ -+ g_assert_cmpstr(resp, ==, e); \ -+ g_free(resp); \ -+} while (0) -+ -+static gchar *tmpdir; -+ -+static int inet_get_free_port_socket_ipv4(int sock) -+{ -+ struct sockaddr_in addr; -+ socklen_t len; -+ -+ memset(&addr, 0, sizeof(addr)); -+ addr.sin_family = AF_INET; -+ addr.sin_addr.s_addr = INADDR_ANY; -+ addr.sin_port = 0; -+ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { -+ return -1; -+ } -+ -+ len = sizeof(addr); -+ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { -+ return -1; -+ } -+ -+ return ntohs(addr.sin_port); -+} -+ -+static int inet_get_free_port_socket_ipv6(int sock) -+{ -+ struct sockaddr_in6 addr; -+ socklen_t len; -+ -+ memset(&addr, 0, sizeof(addr)); -+ addr.sin6_family = AF_INET6; -+ addr.sin6_addr = in6addr_any; -+ addr.sin6_port = 0; -+ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { -+ return -1; -+ } -+ -+ len = sizeof(addr); -+ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { -+ return -1; -+ } -+ -+ return ntohs(addr.sin6_port); -+} -+ -+static int inet_get_free_port_multiple(int nb, int *port, bool ipv6) -+{ -+ int sock[nb]; -+ int i; -+ -+ for (i = 0; i < nb; i++) { -+ sock[i] = socket(ipv6 ? AF_INET6 : AF_INET, SOCK_STREAM, 0); -+ if (sock[i] < 0) { -+ break; -+ } -+ port[i] = ipv6 ? inet_get_free_port_socket_ipv6(sock[i]) : -+ inet_get_free_port_socket_ipv4(sock[i]); -+ if (port[i] == -1) { -+ break; -+ } -+ } -+ -+ nb = i; -+ for (i = 0; i < nb; i++) { -+ closesocket(sock[i]); -+ } -+ -+ return nb; -+} -+ -+static int inet_get_free_port(bool ipv6) -+{ -+ int nb, port; -+ -+ nb = inet_get_free_port_multiple(1, &port, ipv6); -+ g_assert_cmpint(nb, ==, 1); -+ -+ return port; -+} -+ -+static void test_stream_inet_ipv4(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int port; -+ -+ port = inet_get_free_port(false); -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,server=false,id=st0,addr.type=inet," -+ "addr.ipv4=on,addr.ipv6=off," -+ "addr.host=127.0.0.1,addr.port=%d", port); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,tcp:127.0.0.1:%d\r\n", -+ port); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ /* the port is unknown, check only the address */ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:127.0.0.1", ':'); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+static void test_stream_inet_ipv6(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int port; -+ -+ port = inet_get_free_port(true); -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true,addr.type=inet," -+ "addr.ipv4=off,addr.ipv6=on," -+ "addr.host=::1,addr.port=%d", port); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,server=false,id=st0,addr.type=inet," -+ "addr.ipv4=off,addr.ipv6=on," -+ "addr.host=::1,addr.port=%d", port); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,tcp:::1:%d\r\n", -+ port); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ /* the port is unknown, check only the address */ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:::1", ':'); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+static void test_stream_unix(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ gchar *path; -+ -+ path = g_strconcat(tmpdir, "/stream_unix", NULL); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true," -+ "addr.type=unix,addr.path=%s,", -+ path); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=false," -+ "addr.type=unix,addr.path=%s", -+ path); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); -+ EXPECT_STATE(qts1, expect, 0); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ g_free(path); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+#ifdef CONFIG_LINUX -+static void test_stream_unix_abstract(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ gchar *path; -+ -+ path = g_strconcat(tmpdir, "/stream_unix_abstract", NULL); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=true," -+ "addr.type=unix,addr.path=%s," -+ "addr.abstract=on", -+ path); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,server=false," -+ "addr.type=unix,addr.path=%s,addr.abstract=on", -+ path); -+ -+ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); -+ EXPECT_STATE(qts1, expect, 0); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ g_free(path); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+#endif -+ -+#ifndef _WIN32 -+static void test_stream_fd(void) -+{ -+ QTestState *qts0, *qts1; -+ int sock[2]; -+ int ret; -+ -+ ret = socketpair(AF_LOCAL, SOCK_STREAM, 0, sock); -+ g_assert_true(ret == 0); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", -+ sock[0]); -+ -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", -+ sock[1]); -+ -+ EXPECT_STATE(qts1, "st0: index=0,type=stream,unix:\r\n", 0); -+ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+ -+ closesocket(sock[0]); -+ closesocket(sock[1]); -+} -+#endif -+ -+static void test_dgram_inet(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int port[2]; -+ int nb; -+ -+ nb = inet_get_free_port_multiple(2, port, false); -+ g_assert_cmpint(nb, ==, 2); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0," -+ "local.type=inet,local.host=127.0.0.1,local.port=%d," -+ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", -+ port[0], port[1]); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram," -+ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", -+ port[0], port[1]); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0," -+ "local.type=inet,local.host=127.0.0.1,local.port=%d," -+ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", -+ port[1], port[0]); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram," -+ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", -+ port[1], port[0]); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+#ifndef _WIN32 -+static void test_dgram_mcast(void) -+{ -+ QTestState *qts; -+ -+ qts = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0," -+ "remote.type=inet,remote.host=230.0.0.1,remote.port=1234"); -+ -+ EXPECT_STATE(qts, "st0: index=0,type=dgram,mcast=230.0.0.1:1234\r\n", 0); -+ -+ qtest_quit(qts); -+} -+ -+static void test_dgram_unix(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ gchar *path0, *path1; -+ -+ path0 = g_strconcat(tmpdir, "/dgram_unix0", NULL); -+ path1 = g_strconcat(tmpdir, "/dgram_unix1", NULL); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=unix,local.path=%s," -+ "remote.type=unix,remote.path=%s", -+ path0, path1); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", -+ path0, path1); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=unix,local.path=%s," -+ "remote.type=unix,remote.path=%s", -+ path1, path0); -+ -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", -+ path1, path0); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ unlink(path0); -+ g_free(path0); -+ unlink(path1); -+ g_free(path1); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+} -+ -+static void test_dgram_fd(void) -+{ -+ QTestState *qts0, *qts1; -+ char *expect; -+ int ret; -+ int sv[2]; -+ -+ ret = socketpair(PF_UNIX, SOCK_DGRAM, 0, sv); -+ g_assert_cmpint(ret, !=, -1); -+ -+ qts0 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=fd,local.str=%d", -+ sv[0]); -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[0]); -+ EXPECT_STATE(qts0, expect, 0); -+ g_free(expect); -+ -+ qts1 = qtest_initf("-nodefaults -M none " -+ "-netdev dgram,id=st0,local.type=fd,local.str=%d", -+ sv[1]); -+ -+ -+ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[1]); -+ EXPECT_STATE(qts1, expect, 0); -+ g_free(expect); -+ -+ qtest_quit(qts1); -+ qtest_quit(qts0); -+ -+ closesocket(sv[0]); -+ closesocket(sv[1]); -+} -+#endif -+ -+int main(int argc, char **argv) -+{ -+ int ret; -+ bool has_ipv4, has_ipv6, has_afunix; -+ g_autoptr(GError) err = NULL; -+ -+ socket_init(); -+ g_test_init(&argc, &argv, NULL); -+ -+ if (socket_check_protocol_support(&has_ipv4, &has_ipv6) < 0) { -+ g_error("socket_check_protocol_support() failed\n"); -+ } -+ -+ tmpdir = g_dir_make_tmp("netdev-socket.XXXXXX", &err); -+ if (tmpdir == NULL) { -+ g_error("Can't create temporary directory in %s: %s", -+ g_get_tmp_dir(), err->message); -+ } -+ -+ if (has_ipv4) { -+ qtest_add_func("/netdev/stream/inet/ipv4", test_stream_inet_ipv4); -+ qtest_add_func("/netdev/dgram/inet", test_dgram_inet); -+#ifndef _WIN32 -+ qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); -+#endif -+ } -+ if (has_ipv6) { -+ qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); -+ } -+ -+ socket_check_afunix_support(&has_afunix); -+ if (has_afunix) { -+#ifndef _WIN32 -+ qtest_add_func("/netdev/dgram/unix", test_dgram_unix); -+#endif -+ qtest_add_func("/netdev/stream/unix", test_stream_unix); -+#ifdef CONFIG_LINUX -+ qtest_add_func("/netdev/stream/unix/abstract", -+ test_stream_unix_abstract); -+#endif -+#ifndef _WIN32 -+ qtest_add_func("/netdev/stream/fd", test_stream_fd); -+ qtest_add_func("/netdev/dgram/fd", test_dgram_fd); -+#endif -+ } -+ -+ ret = g_test_run(); -+ -+ g_rmdir(tmpdir); -+ g_free(tmpdir); -+ -+ return ret; -+} --- -2.31.1 - diff --git a/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch b/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch deleted file mode 100644 index 14388fe..0000000 --- a/kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch +++ /dev/null @@ -1,299 +0,0 @@ -From 120db3dfeb88c447f0e115c19b7ede704f8f80cb Mon Sep 17 00:00:00 2001 -From: Richard Henderson -Date: Sat, 14 Jan 2023 13:05:41 -1000 -Subject: [PATCH 2/8] tests/tcg/i386: Introduce and use reg_t consistently -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Paolo Bonzini -RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions -RH-Bugzilla: 2173590 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Bandan Das -RH-Commit: [2/7] 843a677555414170392db21c828bef3dc3c29300 (bonzini/rhel-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590 -Upstream-Status: merged - -Define reg_t based on the actual register width. -Define the inlines using that type. This will allow -input registers to 32-bit insns to be set to 64-bit -values on x86-64, which allows testing various edge cases. - -Signed-off-by: Richard Henderson -Reviewed-by: Philippe Mathieu-Daudé -Message-Id: <20230114230542.3116013-2-richard.henderson@linaro.org> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 5d62d6649cd367b5b4a3676e7514d2f9ca86cb03) ---- - tests/tcg/i386/test-i386-bmi2.c | 182 ++++++++++++++++---------------- - 1 file changed, 93 insertions(+), 89 deletions(-) - -diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c -index 5fadf47510..3c3ef85513 100644 ---- a/tests/tcg/i386/test-i386-bmi2.c -+++ b/tests/tcg/i386/test-i386-bmi2.c -@@ -3,34 +3,40 @@ - #include - #include - -+#ifdef __x86_64 -+typedef uint64_t reg_t; -+#else -+typedef uint32_t reg_t; -+#endif -+ - #define insn1q(name, arg0) \ --static inline uint64_t name##q(uint64_t arg0) \ -+static inline reg_t name##q(reg_t arg0) \ - { \ -- uint64_t result64; \ -+ reg_t result64; \ - asm volatile (#name "q %1, %0" : "=r"(result64) : "rm"(arg0)); \ - return result64; \ - } - - #define insn1l(name, arg0) \ --static inline uint32_t name##l(uint32_t arg0) \ -+static inline reg_t name##l(reg_t arg0) \ - { \ -- uint32_t result32; \ -+ reg_t result32; \ - asm volatile (#name "l %k1, %k0" : "=r"(result32) : "rm"(arg0)); \ - return result32; \ - } - - #define insn2q(name, arg0, c0, arg1, c1) \ --static inline uint64_t name##q(uint64_t arg0, uint64_t arg1) \ -+static inline reg_t name##q(reg_t arg0, reg_t arg1) \ - { \ -- uint64_t result64; \ -+ reg_t result64; \ - asm volatile (#name "q %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1)); \ - return result64; \ - } - - #define insn2l(name, arg0, c0, arg1, c1) \ --static inline uint32_t name##l(uint32_t arg0, uint32_t arg1) \ -+static inline reg_t name##l(reg_t arg0, reg_t arg1) \ - { \ -- uint32_t result32; \ -+ reg_t result32; \ - asm volatile (#name "l %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1)); \ - return result32; \ - } -@@ -65,130 +71,128 @@ insn1l(blsr, src) - int main(int argc, char *argv[]) { - uint64_t ehlo = 0x202020204f4c4845ull; - uint64_t mask = 0xa080800302020001ull; -- uint32_t result32; -+ reg_t result; - - #ifdef __x86_64 -- uint64_t result64; -- - /* 64 bits */ -- result64 = andnq(mask, ehlo); -- assert(result64 == 0x002020204d4c4844); -+ result = andnq(mask, ehlo); -+ assert(result == 0x002020204d4c4844); - -- result64 = pextq(ehlo, mask); -- assert(result64 == 133); -+ result = pextq(ehlo, mask); -+ assert(result == 133); - -- result64 = pdepq(result64, mask); -- assert(result64 == (ehlo & mask)); -+ result = pdepq(result, mask); -+ assert(result == (ehlo & mask)); - -- result64 = pextq(-1ull, mask); -- assert(result64 == 511); /* mask has 9 bits set */ -+ result = pextq(-1ull, mask); -+ assert(result == 511); /* mask has 9 bits set */ - -- result64 = pdepq(-1ull, mask); -- assert(result64 == mask); -+ result = pdepq(-1ull, mask); -+ assert(result == mask); - -- result64 = bextrq(mask, 0x3f00); -- assert(result64 == (mask & ~INT64_MIN)); -+ result = bextrq(mask, 0x3f00); -+ assert(result == (mask & ~INT64_MIN)); - -- result64 = bextrq(mask, 0x1038); -- assert(result64 == 0xa0); -+ result = bextrq(mask, 0x1038); -+ assert(result == 0xa0); - -- result64 = bextrq(mask, 0x10f8); -- assert(result64 == 0); -+ result = bextrq(mask, 0x10f8); -+ assert(result == 0); - -- result64 = blsiq(0x30); -- assert(result64 == 0x10); -+ result = blsiq(0x30); -+ assert(result == 0x10); - -- result64 = blsiq(0x30ull << 32); -- assert(result64 == 0x10ull << 32); -+ result = blsiq(0x30ull << 32); -+ assert(result == 0x10ull << 32); - -- result64 = blsmskq(0x30); -- assert(result64 == 0x1f); -+ result = blsmskq(0x30); -+ assert(result == 0x1f); - -- result64 = blsrq(0x30); -- assert(result64 == 0x20); -+ result = blsrq(0x30); -+ assert(result == 0x20); - -- result64 = blsrq(0x30ull << 32); -- assert(result64 == 0x20ull << 32); -+ result = blsrq(0x30ull << 32); -+ assert(result == 0x20ull << 32); - -- result64 = bzhiq(mask, 0x3f); -- assert(result64 == (mask & ~INT64_MIN)); -+ result = bzhiq(mask, 0x3f); -+ assert(result == (mask & ~INT64_MIN)); - -- result64 = bzhiq(mask, 0x1f); -- assert(result64 == (mask & ~(-1 << 30))); -+ result = bzhiq(mask, 0x1f); -+ assert(result == (mask & ~(-1 << 30))); - -- result64 = rorxq(0x2132435465768798, 8); -- assert(result64 == 0x9821324354657687); -+ result = rorxq(0x2132435465768798, 8); -+ assert(result == 0x9821324354657687); - -- result64 = sarxq(0xffeeddccbbaa9988, 8); -- assert(result64 == 0xffffeeddccbbaa99); -+ result = sarxq(0xffeeddccbbaa9988, 8); -+ assert(result == 0xffffeeddccbbaa99); - -- result64 = sarxq(0x77eeddccbbaa9988, 8 | 64); -- assert(result64 == 0x0077eeddccbbaa99); -+ result = sarxq(0x77eeddccbbaa9988, 8 | 64); -+ assert(result == 0x0077eeddccbbaa99); - -- result64 = shrxq(0xffeeddccbbaa9988, 8); -- assert(result64 == 0x00ffeeddccbbaa99); -+ result = shrxq(0xffeeddccbbaa9988, 8); -+ assert(result == 0x00ffeeddccbbaa99); - -- result64 = shrxq(0x77eeddccbbaa9988, 8 | 192); -- assert(result64 == 0x0077eeddccbbaa99); -+ result = shrxq(0x77eeddccbbaa9988, 8 | 192); -+ assert(result == 0x0077eeddccbbaa99); - -- result64 = shlxq(0xffeeddccbbaa9988, 8); -- assert(result64 == 0xeeddccbbaa998800); -+ result = shlxq(0xffeeddccbbaa9988, 8); -+ assert(result == 0xeeddccbbaa998800); - #endif - - /* 32 bits */ -- result32 = andnl(mask, ehlo); -- assert(result32 == 0x04d4c4844); -+ result = andnl(mask, ehlo); -+ assert(result == 0x04d4c4844); - -- result32 = pextl((uint32_t) ehlo, mask); -- assert(result32 == 5); -+ result = pextl((uint32_t) ehlo, mask); -+ assert(result == 5); - -- result32 = pdepl(result32, mask); -- assert(result32 == (uint32_t)(ehlo & mask)); -+ result = pdepl(result, mask); -+ assert(result == (uint32_t)(ehlo & mask)); - -- result32 = pextl(-1u, mask); -- assert(result32 == 7); /* mask has 3 bits set */ -+ result = pextl(-1u, mask); -+ assert(result == 7); /* mask has 3 bits set */ - -- result32 = pdepl(-1u, mask); -- assert(result32 == (uint32_t)mask); -+ result = pdepl(-1u, mask); -+ assert(result == (uint32_t)mask); - -- result32 = bextrl(mask, 0x1f00); -- assert(result32 == (mask & ~INT32_MIN)); -+ result = bextrl(mask, 0x1f00); -+ assert(result == (mask & ~INT32_MIN)); - -- result32 = bextrl(ehlo, 0x1018); -- assert(result32 == 0x4f); -+ result = bextrl(ehlo, 0x1018); -+ assert(result == 0x4f); - -- result32 = bextrl(mask, 0x1038); -- assert(result32 == 0); -+ result = bextrl(mask, 0x1038); -+ assert(result == 0); - -- result32 = blsil(0xffff); -- assert(result32 == 1); -+ result = blsil(0xffff); -+ assert(result == 1); - -- result32 = blsmskl(0x300); -- assert(result32 == 0x1ff); -+ result = blsmskl(0x300); -+ assert(result == 0x1ff); - -- result32 = blsrl(0xffc); -- assert(result32 == 0xff8); -+ result = blsrl(0xffc); -+ assert(result == 0xff8); - -- result32 = bzhil(mask, 0xf); -- assert(result32 == 1); -+ result = bzhil(mask, 0xf); -+ assert(result == 1); - -- result32 = rorxl(0x65768798, 8); -- assert(result32 == 0x98657687); -+ result = rorxl(0x65768798, 8); -+ assert(result == 0x98657687); - -- result32 = sarxl(0xffeeddcc, 8); -- assert(result32 == 0xffffeedd); -+ result = sarxl(0xffeeddcc, 8); -+ assert(result == 0xffffeedd); - -- result32 = sarxl(0x77eeddcc, 8 | 32); -- assert(result32 == 0x0077eedd); -+ result = sarxl(0x77eeddcc, 8 | 32); -+ assert(result == 0x0077eedd); - -- result32 = shrxl(0xffeeddcc, 8); -- assert(result32 == 0x00ffeedd); -+ result = shrxl(0xffeeddcc, 8); -+ assert(result == 0x00ffeedd); - -- result32 = shrxl(0x77eeddcc, 8 | 128); -- assert(result32 == 0x0077eedd); -+ result = shrxl(0x77eeddcc, 8 | 128); -+ assert(result == 0x0077eedd); - -- result32 = shlxl(0xffeeddcc, 8); -- assert(result32 == 0xeeddcc00); -+ result = shlxl(0xffeeddcc, 8); -+ assert(result == 0xeeddcc00); - - return 0; - } --- -2.39.1 - diff --git a/kvm-util-userfaultfd-Add-uffd_open.patch b/kvm-util-userfaultfd-Add-uffd_open.patch deleted file mode 100644 index 5a5f90c..0000000 --- a/kvm-util-userfaultfd-Add-uffd_open.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 80445fed73a7d1a87e8ce96f6cb7d505e437f845 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 1 Feb 2023 16:10:54 -0500 -Subject: [PATCH 4/8] util/userfaultfd: Add uffd_open() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 149: Support /dev/userfaultfd -RH-Bugzilla: 2158704 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/3] 4c81696314ab26db47c3415fa2c2501c6a572b5c (peterx/qemu-kvm) - -Add a helper to create the uffd handle. - -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Juan Quintela -Signed-off-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit d5890ea0722831eea76a0efd23a496b3e8815fe8) -Signed-off-by: Peter Xu ---- - include/qemu/userfaultfd.h | 12 ++++++++++++ - migration/postcopy-ram.c | 11 +++++------ - tests/qtest/migration-test.c | 4 ++-- - util/userfaultfd.c | 13 +++++++++++-- - 4 files changed, 30 insertions(+), 10 deletions(-) - -diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h -index 6b74f92792..d764496f0b 100644 ---- a/include/qemu/userfaultfd.h -+++ b/include/qemu/userfaultfd.h -@@ -13,10 +13,20 @@ - #ifndef USERFAULTFD_H - #define USERFAULTFD_H - -+#ifdef CONFIG_LINUX -+ - #include "qemu/osdep.h" - #include "exec/hwaddr.h" - #include - -+/** -+ * uffd_open(): Open an userfaultfd handle for current context. -+ * -+ * @flags: The flags we want to pass in when creating the handle. -+ * -+ * Returns: the uffd handle if >=0, or <0 if error happens. -+ */ -+int uffd_open(int flags); - int uffd_query_features(uint64_t *features); - int uffd_create_fd(uint64_t features, bool non_blocking); - void uffd_close_fd(int uffd_fd); -@@ -32,4 +42,6 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length); - int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count); - bool uffd_poll_events(int uffd_fd, int tmo); - -+#endif /* CONFIG_LINUX */ -+ - #endif /* USERFAULTFD_H */ -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index b9a37ef255..0c55df0e52 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -37,6 +37,7 @@ - #include "qemu-file.h" - #include "yank_functions.h" - #include "tls.h" -+#include "qemu/userfaultfd.h" - - /* Arbitrary limit on size of each discard command, - * keeps them around ~200 bytes -@@ -226,11 +227,9 @@ static bool receive_ufd_features(uint64_t *features) - int ufd; - bool ret = true; - -- /* if we are here __NR_userfaultfd should exists */ -- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ ufd = uffd_open(O_CLOEXEC); - if (ufd == -1) { -- error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, -- strerror(errno)); -+ error_report("%s: uffd_open() failed: %s", __func__, strerror(errno)); - return false; - } - -@@ -375,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - goto out; - } - -- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ ufd = uffd_open(O_CLOEXEC); - if (ufd == -1) { - error_report("%s: userfaultfd not available: %s", __func__, - strerror(errno)); -@@ -1160,7 +1159,7 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) - int postcopy_ram_incoming_setup(MigrationIncomingState *mis) - { - /* Open the fd for the kernel to give us userfaults */ -- mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); -+ mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); - if (mis->userfault_fd == -1) { - error_report("%s: Failed to open userfault fd: %s", __func__, - strerror(errno)); -diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c -index dbde726adf..0100e1bdbc 100644 ---- a/tests/qtest/migration-test.c -+++ b/tests/qtest/migration-test.c -@@ -61,14 +61,14 @@ static bool uffd_feature_thread_id; - #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) - #include - #include --#include -+#include "qemu/userfaultfd.h" - - static bool ufd_version_check(void) - { - struct uffdio_api api_struct; - uint64_t ioctl_mask; - -- int ufd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ int ufd = uffd_open(O_CLOEXEC); - - if (ufd == -1) { - g_test_message("Skipping test: userfaultfd not available"); -diff --git a/util/userfaultfd.c b/util/userfaultfd.c -index f1cd6af2b1..4953b3137d 100644 ---- a/util/userfaultfd.c -+++ b/util/userfaultfd.c -@@ -19,6 +19,15 @@ - #include - #include - -+int uffd_open(int flags) -+{ -+#if defined(__NR_userfaultfd) -+ return syscall(__NR_userfaultfd, flags); -+#else -+ return -EINVAL; -+#endif -+} -+ - /** - * uffd_query_features: query UFFD features - * -@@ -32,7 +41,7 @@ int uffd_query_features(uint64_t *features) - struct uffdio_api api_struct = { 0 }; - int ret = -1; - -- uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC); -+ uffd_fd = uffd_open(O_CLOEXEC); - if (uffd_fd < 0) { - trace_uffd_query_features_nosys(errno); - return -1; -@@ -69,7 +78,7 @@ int uffd_create_fd(uint64_t features, bool non_blocking) - uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER); - - flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0); -- uffd_fd = syscall(__NR_userfaultfd, flags); -+ uffd_fd = uffd_open(flags); - if (uffd_fd < 0) { - trace_uffd_create_fd_nosys(errno); - return -1; --- -2.31.1 - diff --git a/kvm-util-userfaultfd-Support-dev-userfaultfd.patch b/kvm-util-userfaultfd-Support-dev-userfaultfd.patch deleted file mode 100644 index b0a22eb..0000000 --- a/kvm-util-userfaultfd-Support-dev-userfaultfd.patch +++ /dev/null @@ -1,94 +0,0 @@ -From a91da7741464dadeb306a741b4fb562e49ffea57 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 7 Feb 2023 15:57:11 -0500 -Subject: [PATCH 5/8] util/userfaultfd: Support /dev/userfaultfd - -RH-Author: Peter Xu -RH-MergeRequest: 149: Support /dev/userfaultfd -RH-Bugzilla: 2158704 -RH-Acked-by: Dr. David Alan Gilbert -RH-Acked-by: quintela1 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/3] 5f427d8c18c210ff8f66724c9e358a7120619e69 (peterx/qemu-kvm) - -Teach QEMU to use /dev/userfaultfd when it existed and fallback to the -system call if either it's not there or doesn't have enough permission. - -Firstly, as long as the app has permission to access /dev/userfaultfd, it -always have the ability to trap kernel faults which QEMU mostly wants. -Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be -forbidden, so it can be the major way to use postcopy in a restricted -environment with strict seccomp setup. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit c40c0463413b941c13fe5f99a90c02d7d6584828) -Signed-off-by: Peter Xu ---- - util/trace-events | 1 + - util/userfaultfd.c | 32 ++++++++++++++++++++++++++++++++ - 2 files changed, 33 insertions(+) - -diff --git a/util/trace-events b/util/trace-events -index c8f53d7d9f..16f78d8fe5 100644 ---- a/util/trace-events -+++ b/util/trace-events -@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz - qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p" - - #userfaultfd.c -+uffd_detect_open_mode(int mode) "%d" - uffd_query_features_nosys(int err) "errno: %i" - uffd_query_features_api_failed(int err) "errno: %i" - uffd_create_fd_nosys(int err) "errno: %i" -diff --git a/util/userfaultfd.c b/util/userfaultfd.c -index 4953b3137d..fdff4867e8 100644 ---- a/util/userfaultfd.c -+++ b/util/userfaultfd.c -@@ -18,10 +18,42 @@ - #include - #include - #include -+#include -+ -+typedef enum { -+ UFFD_UNINITIALIZED = 0, -+ UFFD_USE_DEV_PATH, -+ UFFD_USE_SYSCALL, -+} uffd_open_mode; - - int uffd_open(int flags) - { - #if defined(__NR_userfaultfd) -+ static uffd_open_mode open_mode; -+ static int uffd_dev; -+ -+ /* Detect how to generate uffd desc when run the 1st time */ -+ if (open_mode == UFFD_UNINITIALIZED) { -+ /* -+ * Make /dev/userfaultfd the default approach because it has better -+ * permission controls, meanwhile allows kernel faults without any -+ * privilege requirement (e.g. SYS_CAP_PTRACE). -+ */ -+ uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); -+ if (uffd_dev >= 0) { -+ open_mode = UFFD_USE_DEV_PATH; -+ } else { -+ /* Fallback to the system call */ -+ open_mode = UFFD_USE_SYSCALL; -+ } -+ trace_uffd_detect_open_mode(open_mode); -+ } -+ -+ if (open_mode == UFFD_USE_DEV_PATH) { -+ assert(uffd_dev >= 0); -+ return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags); -+ } -+ - return syscall(__NR_userfaultfd, flags); - #else - return -EINVAL; --- -2.31.1 - diff --git a/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch b/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch deleted file mode 100644 index a56c6eb..0000000 --- a/kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch +++ /dev/null @@ -1,221 +0,0 @@ -From d0e7f24a8d941ab142f2a1973ae18ed1bfdc074f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:41 +0100 -Subject: [PATCH 09/14] vdpa: add asid parameter to vhost_vdpa_dma_map/unmap -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/13] 3e7f89e57f73661017ccf0206f2ea77a72ca46bb (eperezmartin/qemu-kvm) - -So the caller can choose which ASID is destined. - -No need to update the batch functions as they will always be called from -memory listener updates at the moment. Memory listener updates will -always update ASID 0, as it's the passthrough ASID. - -All vhost devices's ASID are 0 at this moment. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-10-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit cd831ed5c4add8ed6ee980c3645b241cbef5130f) ---- - hw/virtio/trace-events | 4 ++-- - hw/virtio/vhost-vdpa.c | 36 +++++++++++++++++++++++----------- - include/hw/virtio/vhost-vdpa.h | 14 ++++++++++--- - net/vhost-vdpa.c | 6 +++--- - 4 files changed, 41 insertions(+), 19 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 46f2faf04e..a87c5f39a2 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -30,8 +30,8 @@ vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" - vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p" - - # vhost-vdpa.c --vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 --vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 -+vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 -+vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 - vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 - vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 - vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index dd2768634b..0ecf2bbaa0 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -72,22 +72,28 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, - return false; - } - --int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -- void *vaddr, bool readonly) -+/* -+ * The caller must set asid = 0 if the device does not support asid. -+ * This is not an ABI break since it is set to 0 by the initializer anyway. -+ */ -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size, void *vaddr, bool readonly) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; - int ret = 0; - - msg.type = v->msg_type; -+ msg.asid = asid; - msg.iotlb.iova = iova; - msg.iotlb.size = size; - msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; - msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; - msg.iotlb.type = VHOST_IOTLB_UPDATE; - -- trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.iotlb.iova, msg.iotlb.size, -- msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); -+ trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, -+ msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, -+ msg.iotlb.type); - - if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { - error_report("failed to write, fd=%d, errno=%d (%s)", -@@ -98,18 +104,24 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, - return ret; - } - --int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) -+/* -+ * The caller must set asid = 0 if the device does not support asid. -+ * This is not an ABI break since it is set to 0 by the initializer anyway. -+ */ -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size) - { - struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; - int ret = 0; - - msg.type = v->msg_type; -+ msg.asid = asid; - msg.iotlb.iova = iova; - msg.iotlb.size = size; - msg.iotlb.type = VHOST_IOTLB_INVALIDATE; - -- trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.iotlb.iova, -+ trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, - msg.iotlb.size, msg.iotlb.type); - - if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { -@@ -229,8 +241,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - } - - vhost_vdpa_iotlb_batch_begin_once(v); -- ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), -- vaddr, section->readonly); -+ ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, -+ int128_get64(llsize), vaddr, section->readonly); - if (ret) { - error_report("vhost vdpa map fail!"); - goto fail_map; -@@ -303,7 +315,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - vhost_iova_tree_remove(v->iova_tree, *result); - } - vhost_vdpa_iotlb_batch_begin_once(v); -- ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize)); -+ ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, -+ int128_get64(llsize)); - if (ret) { - error_report("vhost_vdpa dma unmap error!"); - } -@@ -876,7 +889,7 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) - } - - size = ROUND_UP(result->size, qemu_real_host_page_size()); -- r = vhost_vdpa_dma_unmap(v, result->iova, size); -+ r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); - if (unlikely(r < 0)) { - error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); - return; -@@ -916,7 +929,8 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, - return false; - } - -- r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, -+ r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, -+ needle->size + 1, - (void *)(uintptr_t)needle->translated_addr, - needle->perm == IOMMU_RO); - if (unlikely(r != 0)) { -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index 1111d85643..e57dfa1fd1 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -19,6 +19,12 @@ - #include "hw/virtio/virtio.h" - #include "standard-headers/linux/vhost_types.h" - -+/* -+ * ASID dedicated to map guest's addresses. If SVQ is disabled it maps GPA to -+ * qemu's IOVA. If SVQ is enabled it maps also the SVQ vring here -+ */ -+#define VHOST_VDPA_GUEST_PA_ASID 0 -+ - typedef struct VhostVDPAHostNotifier { - MemoryRegion mr; - void *addr; -@@ -29,6 +35,7 @@ typedef struct vhost_vdpa { - int index; - uint32_t msg_type; - bool iotlb_batch_begin_sent; -+ uint32_t address_space_id; - MemoryListener listener; - struct vhost_vdpa_iova_range iova_range; - uint64_t acked_features; -@@ -42,8 +49,9 @@ typedef struct vhost_vdpa { - VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; - } VhostVDPA; - --int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, -- void *vaddr, bool readonly); --int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); -+int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size, void *vaddr, bool readonly); -+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, -+ hwaddr size); - - #endif -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 85aa0da39a..c2f319eb88 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -258,7 +258,7 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - return; - } - -- r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); -+ r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1); - if (unlikely(r != 0)) { - error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); - } -@@ -298,8 +298,8 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, - return r; - } - -- r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, -- !write); -+ r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova, -+ vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); - if (unlikely(r < 0)) { - goto dma_map_err; - } --- -2.31.1 - diff --git a/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch b/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch deleted file mode 100644 index 57c38d1..0000000 --- a/kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 6282a83619f274ca45a52d61577c10a05a0714dc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:43 +0100 -Subject: [PATCH 11/14] vdpa: add shadow_data to vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/13] 9d317add1318b555ba06e19e4c67849069e047b9 (eperezmartin/qemu-kvm) - -The memory listener that thells the device how to convert GPA to qemu's -va is registered against CVQ vhost_vdpa. memory listener translations -are always ASID 0, CVQ ones are ASID 1 if supported. - -Let's tell the listener if it needs to register them on iova tree or -not. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-12-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 6188d78a19894ac8f2bf9484d48a5235a529d3b7) ---- - hw/virtio/vhost-vdpa.c | 6 +++--- - include/hw/virtio/vhost-vdpa.h | 2 ++ - net/vhost-vdpa.c | 1 + - 3 files changed, 6 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 0ecf2bbaa0..dc3498e995 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -224,7 +224,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - vaddr, section->readonly); - - llsize = int128_sub(llend, int128_make64(iova)); -- if (v->shadow_vqs_enabled) { -+ if (v->shadow_data) { - int r; - - mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, -@@ -251,7 +251,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - return; - - fail_map: -- if (v->shadow_vqs_enabled) { -+ if (v->shadow_data) { - vhost_iova_tree_remove(v->iova_tree, mem_region); - } - -@@ -296,7 +296,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - - llsize = int128_sub(llend, int128_make64(iova)); - -- if (v->shadow_vqs_enabled) { -+ if (v->shadow_data) { - const DMAMap *result; - const void *vaddr = memory_region_get_ram_ptr(section->mr) + - section->offset_within_region + -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index e57dfa1fd1..45b969a311 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -40,6 +40,8 @@ typedef struct vhost_vdpa { - struct vhost_vdpa_iova_range iova_range; - uint64_t acked_features; - bool shadow_vqs_enabled; -+ /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ -+ bool shadow_data; - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; - GPtrArray *shadow_vqs; -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1757f1d028..eea7a0df12 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -581,6 +581,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->always_svq = svq; - s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; -+ s->vhost_vdpa.shadow_data = svq; - s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), --- -2.31.1 - diff --git a/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch b/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch deleted file mode 100644 index c54a831..0000000 --- a/kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 0f3a28e1e128754184c4af6a578f27e16c6a61d5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:37 +0100 -Subject: [PATCH 05/14] vdpa: add vhost_vdpa_net_valid_svq_features -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/13] 0b27e04f178ec73cb800f4fb05c17a92576142e4 (eperezmartin/qemu-kvm) - -It will be reused at vdpa device start so let's extract in its own -function. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-6-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 36e4647247f200b6fa4d2f656133f567036e8a85) ---- - net/vhost-vdpa.c | 26 +++++++++++++++++--------- - 1 file changed, 17 insertions(+), 9 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index b06540ac89..16a5ebe2dd 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -106,6 +106,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - return s->vhost_net; - } - -+static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) -+{ -+ uint64_t invalid_dev_features = -+ features & ~vdpa_svq_device_features & -+ /* Transport are all accepted at this point */ -+ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, -+ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); -+ -+ if (invalid_dev_features) { -+ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, -+ invalid_dev_features); -+ } -+ -+ return !invalid_dev_features; -+} -+ - static int vhost_vdpa_net_check_device_id(struct vhost_net *net) - { - uint32_t device_id; -@@ -684,15 +700,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (opts->x_svq) { - struct vhost_vdpa_iova_range iova_range; - -- uint64_t invalid_dev_features = -- features & ~vdpa_svq_device_features & -- /* Transport are all accepted at this point */ -- ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, -- VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); -- -- if (invalid_dev_features) { -- error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, -- invalid_dev_features); -+ if (!vhost_vdpa_net_valid_svq_features(features, errp)) { - goto err_svq; - } - --- -2.31.1 - diff --git a/kvm-vdpa-allocate-SVQ-array-unconditionally.patch b/kvm-vdpa-allocate-SVQ-array-unconditionally.patch deleted file mode 100644 index 22c5955..0000000 --- a/kvm-vdpa-allocate-SVQ-array-unconditionally.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 72f296870805750df8dfe5eaad77dd7d435a8f41 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:40 +0100 -Subject: [PATCH 08/14] vdpa: allocate SVQ array unconditionally -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/13] 08cd86d0859f82d768794e29241cfeff25df667c (eperezmartin/qemu-kvm) - -SVQ may run or not in a device depending on runtime conditions (for -example, if the device can move CVQ to its own group or not). - -Allocate the SVQ array unconditionally at startup, since its hard to -move this allocation elsewhere. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-9-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 273e0003f0005cc17292dedae01e5edb0064b69c) ---- - hw/virtio/vhost-vdpa.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 84218ce078..dd2768634b 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -532,10 +532,6 @@ static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) - struct vhost_vdpa *v = dev->opaque; - size_t idx; - -- if (!v->shadow_vqs) { -- return; -- } -- - for (idx = 0; idx < v->shadow_vqs->len; ++idx) { - vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); - } --- -2.31.1 - diff --git a/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch b/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch deleted file mode 100644 index 9b78b5c..0000000 --- a/kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch +++ /dev/null @@ -1,193 +0,0 @@ -From 84c203faa570b85eec006215768c83371c9f0399 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:44 +0100 -Subject: [PATCH 12/14] vdpa: always start CVQ in SVQ mode if possible -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/13] 83f94b3e163ca38d08dbf7c111a4cfa7a44e3dc2 (eperezmartin/qemu-kvm) - -Isolate control virtqueue in its own group, allowing to intercept control -commands but letting dataplane run totally passthrough to the guest. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221215113144.322011-13-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit c1a1008685af0327d9d03f03d43bdb77e7af5bea) ---- - hw/virtio/vhost-vdpa.c | 3 +- - net/vhost-vdpa.c | 110 ++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 111 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index dc3498e995..72ff06673c 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -638,7 +638,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) - { - uint64_t features; - uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | -- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH; -+ 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | -+ 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID; - int r; - - if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index eea7a0df12..07d33dae26 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -101,6 +101,8 @@ static const uint64_t vdpa_svq_device_features = - BIT_ULL(VIRTIO_NET_F_RSC_EXT) | - BIT_ULL(VIRTIO_NET_F_STANDBY); - -+#define VHOST_VDPA_NET_CVQ_ASID 1 -+ - VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -@@ -242,6 +244,40 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) -+{ -+ struct vhost_vring_state state = { -+ .index = vq_index, -+ }; -+ int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); -+ -+ if (unlikely(r < 0)) { -+ error_report("Cannot get VQ %u group: %s", vq_index, -+ g_strerror(errno)); -+ return r; -+ } -+ -+ return state.num; -+} -+ -+static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, -+ unsigned vq_group, -+ unsigned asid_num) -+{ -+ struct vhost_vring_state asid = { -+ .index = vq_group, -+ .num = asid_num, -+ }; -+ int r; -+ -+ r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); -+ if (unlikely(r < 0)) { -+ error_report("Can't set vq group %u asid %u, errno=%d (%s)", -+ asid.index, asid.num, errno, g_strerror(errno)); -+ } -+ return r; -+} -+ - static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) - { - VhostIOVATree *tree = v->iova_tree; -@@ -316,11 +352,75 @@ dma_map_err: - static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { - VhostVDPAState *s; -- int r; -+ struct vhost_vdpa *v; -+ uint64_t backend_features; -+ int64_t cvq_group; -+ int cvq_index, r; - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - - s = DO_UPCAST(VhostVDPAState, nc, nc); -+ v = &s->vhost_vdpa; -+ -+ v->shadow_data = s->always_svq; -+ v->shadow_vqs_enabled = s->always_svq; -+ s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; -+ -+ if (s->always_svq) { -+ /* SVQ is already configured for all virtqueues */ -+ goto out; -+ } -+ -+ /* -+ * If we early return in these cases SVQ will not be enabled. The migration -+ * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. -+ * -+ * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev -+ * yet. -+ */ -+ r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); -+ if (unlikely(r < 0)) { -+ error_report("Cannot get vdpa backend_features: %s(%d)", -+ g_strerror(errno), errno); -+ return -1; -+ } -+ if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || -+ !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { -+ return 0; -+ } -+ -+ /* -+ * Check if all the virtqueues of the virtio device are in a different vq -+ * than the last vq. VQ group of last group passed in cvq_group. -+ */ -+ cvq_index = v->dev->vq_index_end - 1; -+ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); -+ if (unlikely(cvq_group < 0)) { -+ return cvq_group; -+ } -+ for (int i = 0; i < cvq_index; ++i) { -+ int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); -+ -+ if (unlikely(group < 0)) { -+ return group; -+ } -+ -+ if (group == cvq_group) { -+ return 0; -+ } -+ } -+ -+ r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ -+ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, -+ v->iova_range.last); -+ v->shadow_vqs_enabled = true; -+ s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; -+ -+out: - if (!s->vhost_vdpa.shadow_vqs_enabled) { - return 0; - } -@@ -349,6 +449,14 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) - if (s->vhost_vdpa.shadow_vqs_enabled) { - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); - vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); -+ if (!s->always_svq) { -+ /* -+ * If only the CVQ is shadowed we can delete this safely. -+ * If all the VQs are shadows this will be needed by the time the -+ * device is started again to register SVQ vrings and similar. -+ */ -+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); -+ } - } - } - --- -2.31.1 - diff --git a/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch b/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch deleted file mode 100644 index d800258..0000000 --- a/kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch +++ /dev/null @@ -1,44 +0,0 @@ -From fbb177ad84d562a20e51e71c73257d2ef85be2d9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:15 +0100 -Subject: [PATCH 4/9] vdpa: do not handle VIRTIO_NET_F_GUEST_ANNOUNCE in - vhost-vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [4/4] b3960a8b3e4ca569b1b1e6ceccf2051d8c4b1079 (eperezmartin/qemu-kvm) - -So qemu emulates it even in case the device does not support it. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221221115015.1400889-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 980003debddd18306ea2e1364b96598383c0e257) ---- - net/vhost-vdpa.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 52ef9cb3a2..b06540ac89 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -72,7 +72,6 @@ const int vdpa_feature_bits[] = { - VIRTIO_F_RING_RESET, - VIRTIO_NET_F_RSS, - VIRTIO_NET_F_HASH_REPORT, -- VIRTIO_NET_F_GUEST_ANNOUNCE, - VIRTIO_NET_F_STATUS, - VHOST_INVALID_FEATURE_BIT - }; --- -2.31.1 - diff --git a/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch b/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch deleted file mode 100644 index bb55256..0000000 --- a/kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 46e80a9350a02fdb5689638df96bc7389e953cf8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Tue, 17 Jan 2023 11:53:08 +0100 -Subject: [PATCH 13/14] vdpa: fix VHOST_BACKEND_F_IOTLB_ASID flag check -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/13] b7fb4b8e9ea26b6664a9179ed0a88376acf5115f (eperezmartin/qemu-kvm) - -VHOST_BACKEND_F_IOTLB_ASID is the feature bit, not the bitmask. Since -the device under test also provided VHOST_BACKEND_F_IOTLB_MSG_V2 and -VHOST_BACKEND_F_IOTLB_BATCH, this went unnoticed. - -Fixes: c1a1008685 ("vdpa: always start CVQ in SVQ mode if possible") -Signed-off-by: Eugenio Pérez -Reviewed-by: Michael S. Tsirkin -Acked-by: Jason Wang -Signed-off-by: Jason Wang - -Upstream status: git@github.com:jasowang/qemu.git -(cherry picked from commit 2bd492bca521ee8594f1d5db8dc9aac126fc4f85) ---- - net/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 07d33dae26..7d9c4ea09d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -384,7 +384,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - g_strerror(errno), errno); - return -1; - } -- if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || -+ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || - !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { - return 0; - } --- -2.31.1 - diff --git a/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch b/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch deleted file mode 100644 index ebb7f38..0000000 --- a/kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch +++ /dev/null @@ -1,59 +0,0 @@ -From b71724e94c94acd6e09fed2b47be2901799c2353 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:14 +0100 -Subject: [PATCH 3/9] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in - vhost_vdpa_net_handle_ctrl_avail -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [3/4] c4ef5b62a5d41911565b8960a88bb48d746ff6c7 (eperezmartin/qemu-kvm) - -Since this capability is emulated by qemu shadowed CVQ cannot forward it -to the device. Process all that command within qemu. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221221115015.1400889-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit 3f9a3eeb7ca6acd899e2205a9118928b4cd94e47) ---- - net/vhost-vdpa.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 2b4b85d8f8..52ef9cb3a2 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -489,9 +489,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, - out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, - s->cvq_cmd_out_buffer, - vhost_vdpa_net_cvq_cmd_len()); -- dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); -- if (unlikely(dev_written < 0)) { -- goto out; -+ if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) { -+ /* -+ * Guest announce capability is emulated by qemu, so don't forward to -+ * the device. -+ */ -+ dev_written = sizeof(status); -+ *s->status = VIRTIO_NET_OK; -+ } else { -+ dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); -+ if (unlikely(dev_written < 0)) { -+ goto out; -+ } - } - - if (unlikely(dev_written < sizeof(status))) { --- -2.31.1 - diff --git a/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch b/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch deleted file mode 100644 index 7cda847..0000000 --- a/kvm-vdpa-move-SVQ-vring-features-check-to-net.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 63a45add7c9f7bb2b7775ae4cb2d7df22f7f2033 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:39 +0100 -Subject: [PATCH 07/14] vdpa: move SVQ vring features check to net/ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/13] a24189aea4dbde3ed4486f685d0d88aeee1a0ee7 (eperezmartin/qemu-kvm) - -The next patches will start control SVQ if possible. However, we don't -know if that will be possible at qemu boot anymore. - -Since the moved checks will be already evaluated at net/ to know if it -is ok to shadow CVQ, move them. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-8-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 258a03941fd23108a322d09abc9c55341e09688d) ---- - hw/virtio/vhost-vdpa.c | 32 ++------------------------------ - net/vhost-vdpa.c | 3 ++- - 2 files changed, 4 insertions(+), 31 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 9e7cbf1776..84218ce078 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -389,29 +389,9 @@ static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, - return ret; - } - --static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, -- Error **errp) -+static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) - { - g_autoptr(GPtrArray) shadow_vqs = NULL; -- uint64_t dev_features, svq_features; -- int r; -- bool ok; -- -- if (!v->shadow_vqs_enabled) { -- return 0; -- } -- -- r = vhost_vdpa_get_dev_features(hdev, &dev_features); -- if (r != 0) { -- error_setg_errno(errp, -r, "Can't get vdpa device features"); -- return r; -- } -- -- svq_features = dev_features; -- ok = vhost_svq_valid_features(svq_features, errp); -- if (unlikely(!ok)) { -- return -1; -- } - - shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); - for (unsigned n = 0; n < hdev->nvqs; ++n) { -@@ -422,7 +402,6 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - } - - v->shadow_vqs = g_steal_pointer(&shadow_vqs); -- return 0; - } - - static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) -@@ -447,10 +426,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - dev->opaque = opaque ; - v->listener = vhost_vdpa_memory_listener; - v->msg_type = VHOST_IOTLB_MSG_V2; -- ret = vhost_vdpa_init_svq(dev, v, errp); -- if (ret) { -- goto err; -- } -+ vhost_vdpa_init_svq(dev, v); - - if (!vhost_vdpa_first_dev(dev)) { - return 0; -@@ -460,10 +436,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - VIRTIO_CONFIG_S_DRIVER); - - return 0; -- --err: -- ram_block_discard_disable(false); -- return ret; - } - - static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 8d3ed095d0..85aa0da39a 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -117,9 +117,10 @@ static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) - if (invalid_dev_features) { - error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, - invalid_dev_features); -+ return false; - } - -- return !invalid_dev_features; -+ return vhost_svq_valid_features(features, errp); - } - - static int vhost_vdpa_net_check_device_id(struct vhost_net *net) --- -2.31.1 - diff --git a/kvm-vdpa-request-iova_range-only-once.patch b/kvm-vdpa-request-iova_range-only-once.patch deleted file mode 100644 index 041e8f7..0000000 --- a/kvm-vdpa-request-iova_range-only-once.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 760169d538a4e6ba61006f6796cd55af967a7f1e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:38 +0100 -Subject: [PATCH 06/14] vdpa: request iova_range only once -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/13] 2a8ae2f46ae88f01c5535038f38cb7895098b610 (eperezmartin/qemu-kvm) - -Currently iova range is requested once per queue pair in the case of -net. Reduce the number of ioctls asking it once at initialization and -reusing that value for each vhost_vdpa. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221215113144.322011-7-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit a585fad26b2e6ccca156d9e65158ad1c5efd268d) ---- - hw/virtio/vhost-vdpa.c | 15 --------------- - net/vhost-vdpa.c | 27 ++++++++++++++------------- - 2 files changed, 14 insertions(+), 28 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e65603022f..9e7cbf1776 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -365,19 +365,6 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) - return 0; - } - --static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) --{ -- int ret = vhost_vdpa_call(v->dev, VHOST_VDPA_GET_IOVA_RANGE, -- &v->iova_range); -- if (ret != 0) { -- v->iova_range.first = 0; -- v->iova_range.last = UINT64_MAX; -- } -- -- trace_vhost_vdpa_get_iova_range(v->dev, v->iova_range.first, -- v->iova_range.last); --} -- - /* - * The use of this function is for requests that only need to be - * applied once. Typically such request occurs at the beginning -@@ -465,8 +452,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) - goto err; - } - -- vhost_vdpa_get_iova_range(v); -- - if (!vhost_vdpa_first_dev(dev)) { - return 0; - } -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 16a5ebe2dd..8d3ed095d0 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -549,14 +549,15 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { - }; - - static NetClientState *net_vhost_vdpa_init(NetClientState *peer, -- const char *device, -- const char *name, -- int vdpa_device_fd, -- int queue_pair_index, -- int nvqs, -- bool is_datapath, -- bool svq, -- VhostIOVATree *iova_tree) -+ const char *device, -+ const char *name, -+ int vdpa_device_fd, -+ int queue_pair_index, -+ int nvqs, -+ bool is_datapath, -+ bool svq, -+ struct vhost_vdpa_iova_range iova_range, -+ VhostIOVATree *iova_tree) - { - NetClientState *nc = NULL; - VhostVDPAState *s; -@@ -575,6 +576,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; - s->vhost_vdpa.shadow_vqs_enabled = svq; -+ s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.iova_tree = iova_tree; - if (!is_datapath) { - s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), -@@ -654,6 +656,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - int vdpa_device_fd; - g_autofree NetClientState **ncs = NULL; - g_autoptr(VhostIOVATree) iova_tree = NULL; -+ struct vhost_vdpa_iova_range iova_range; - NetClientState *nc; - int queue_pairs, r, i = 0, has_cvq = 0; - -@@ -697,14 +700,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - return queue_pairs; - } - -+ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); - if (opts->x_svq) { -- struct vhost_vdpa_iova_range iova_range; -- - if (!vhost_vdpa_net_valid_svq_features(features, errp)) { - goto err_svq; - } - -- vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); - iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); - } - -@@ -713,7 +714,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 2, true, opts->x_svq, -- iova_tree); -+ iova_range, iova_tree); - if (!ncs[i]) - goto err; - } -@@ -721,7 +722,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 1, false, -- opts->x_svq, iova_tree); -+ opts->x_svq, iova_range, iova_tree); - if (!nc) - goto err; - } --- -2.31.1 - diff --git a/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch b/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch deleted file mode 100644 index 68c0c86..0000000 --- a/kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 28163d7d61b6b0b8312b78d57dabc8f44bf39c46 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:42 +0100 -Subject: [PATCH 10/14] vdpa: store x-svq parameter in VhostVDPAState -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/13] 53f3b2698b4a5caca434f55e4300103a78778548 (eperezmartin/qemu-kvm) - -CVQ can be shadowed two ways: -- Device has x-svq=on parameter (current way) -- The device can isolate CVQ in its own vq group - -QEMU needs to check for the second condition dynamically, because CVQ -index is not known before the driver ack the features. Since this is -dynamic, the CVQ isolation could vary with different conditions, making -it possible to go from "not isolated group" to "isolated". - -Saving the cmdline parameter in an extra field so we never disable CVQ -SVQ in case the device was started with x-svq cmdline. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-11-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 7f211a28fd5482f76583988beecd8ee61588d45e) ---- - net/vhost-vdpa.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index c2f319eb88..1757f1d028 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -38,6 +38,8 @@ typedef struct VhostVDPAState { - void *cvq_cmd_out_buffer; - virtio_net_ctrl_ack *status; - -+ /* The device always have SVQ enabled */ -+ bool always_svq; - bool started; - } VhostVDPAState; - -@@ -576,6 +578,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.device_fd = vdpa_device_fd; - s->vhost_vdpa.index = queue_pair_index; -+ s->always_svq = svq; - s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.iova_tree = iova_tree; --- -2.31.1 - diff --git a/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch b/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch deleted file mode 100644 index 3d11438..0000000 --- a/kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch +++ /dev/null @@ -1,58 +0,0 @@ -From cb974f2f9a0c5b9520b6ac80bd1d1e4a6b12bbdc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:33 +0100 -Subject: [PATCH 01/14] vdpa: use v->shadow_vqs_enabled in - vhost_vdpa_svqs_start & stop -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/13] f0db50a95f87dd011418617be7b80aa6813a1146 (eperezmartin/qemu-kvm) - -This function used to trust in v->shadow_vqs != NULL to know if it must -start svq or not. - -This is not going to be valid anymore, as qemu is going to allocate svq -array unconditionally (but it will only start them conditionally). - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 712c1a3171cf62d501dac5af58f77d5fea70350d) ---- - hw/virtio/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index c5be2645b0..44e6a9b7b3 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1036,7 +1036,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - Error *err = NULL; - unsigned i; - -- if (!v->shadow_vqs) { -+ if (!v->shadow_vqs_enabled) { - return true; - } - -@@ -1089,7 +1089,7 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - { - struct vhost_vdpa *v = dev->opaque; - -- if (!v->shadow_vqs) { -+ if (!v->shadow_vqs_enabled) { - return; - } - --- -2.31.1 - diff --git a/kvm-vhost-add-support-for-configure-interrupt.patch b/kvm-vhost-add-support-for-configure-interrupt.patch deleted file mode 100644 index a7cfb2f..0000000 --- a/kvm-vhost-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,185 +0,0 @@ -From 42818e2bc6fa537fe52f7f0e6b094774a1eb00e1 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:48 +0800 -Subject: [PATCH 07/31] vhost: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/10] d58b439eb093f5dd3b7ca081af0ab75780e42917 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add functions to support configure interrupt. -The configure interrupt process will start in vhost_dev_start -and stop in vhost_dev_stop. - -Also add the functions to support vhost_config_pending and -vhost_config_mask. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-8-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit f9a09ca3ea69d108d828b7c82f1bd61b2df6fc96) -Signed-off-by: Cindy Lu ---- - hw/virtio/vhost.c | 78 ++++++++++++++++++++++++++++++++++++++- - include/hw/virtio/vhost.h | 4 ++ - 2 files changed, 81 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 7fb008bc9e..84dbb39e07 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -1596,7 +1596,68 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, - file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); - r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); - if (r < 0) { -- VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed"); -+ error_report("vhost_set_vring_call failed %d", -r); -+ } -+} -+ -+bool vhost_config_pending(struct vhost_dev *hdev) -+{ -+ assert(hdev->vhost_ops); -+ if ((hdev->started == false) || -+ (hdev->vhost_ops->vhost_set_config_call == NULL)) { -+ return false; -+ } -+ -+ EventNotifier *notifier = -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; -+ return event_notifier_test_and_clear(notifier); -+} -+ -+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask) -+{ -+ int fd; -+ int r; -+ EventNotifier *notifier = -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; -+ EventNotifier *config_notifier = &vdev->config_notifier; -+ assert(hdev->vhost_ops); -+ -+ if ((hdev->started == false) || -+ (hdev->vhost_ops->vhost_set_config_call == NULL)) { -+ return; -+ } -+ if (mask) { -+ assert(vdev->use_guest_notifier_mask); -+ fd = event_notifier_get_fd(notifier); -+ } else { -+ fd = event_notifier_get_fd(config_notifier); -+ } -+ r = hdev->vhost_ops->vhost_set_config_call(hdev, fd); -+ if (r < 0) { -+ error_report("vhost_set_config_call failed %d", -r); -+ } -+} -+ -+static void vhost_stop_config_intr(struct vhost_dev *dev) -+{ -+ int fd = -1; -+ assert(dev->vhost_ops); -+ if (dev->vhost_ops->vhost_set_config_call) { -+ dev->vhost_ops->vhost_set_config_call(dev, fd); -+ } -+} -+ -+static void vhost_start_config_intr(struct vhost_dev *dev) -+{ -+ int r; -+ -+ assert(dev->vhost_ops); -+ int fd = event_notifier_get_fd(&dev->vdev->config_notifier); -+ if (dev->vhost_ops->vhost_set_config_call) { -+ r = dev->vhost_ops->vhost_set_config_call(dev, fd); -+ if (!r) { -+ event_notifier_set(&dev->vdev->config_notifier); -+ } - } - } - -@@ -1836,6 +1897,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - } - } - -+ r = event_notifier_init( -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0); -+ if (r < 0) { -+ return r; -+ } -+ event_notifier_test_and_clear( -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); -+ if (!vdev->use_guest_notifier_mask) { -+ vhost_config_mask(hdev, vdev, true); -+ } - if (hdev->log_enabled) { - uint64_t log_base; - -@@ -1874,6 +1945,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - vhost_device_iotlb_miss(hdev, vq->used_phys, true); - } - } -+ vhost_start_config_intr(hdev); - return 0; - fail_start: - if (vrings) { -@@ -1903,6 +1975,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - - /* should only be called after backend is connected */ - assert(hdev->vhost_ops); -+ event_notifier_test_and_clear( -+ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); -+ event_notifier_test_and_clear(&vdev->config_notifier); - - trace_vhost_dev_stop(hdev, vdev->name, vrings); - -@@ -1925,6 +2000,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) - } - memory_listener_unregister(&hdev->iommu_listener); - } -+ vhost_stop_config_intr(hdev); - vhost_log_put(hdev, true); - hdev->started = false; - vdev->vhost_started = false; -diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h -index 67a6807fac..05bedb2416 100644 ---- a/include/hw/virtio/vhost.h -+++ b/include/hw/virtio/vhost.h -@@ -33,6 +33,7 @@ struct vhost_virtqueue { - unsigned used_size; - EventNotifier masked_notifier; - EventNotifier error_notifier; -+ EventNotifier masked_config_notifier; - struct vhost_dev *dev; - }; - -@@ -41,6 +42,7 @@ typedef unsigned long vhost_log_chunk_t; - #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) - #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) - #define VHOST_INVALID_FEATURE_BIT (0xff) -+#define VHOST_QUEUE_NUM_CONFIG_INR 0 - - struct vhost_log { - unsigned long long size; -@@ -168,6 +170,8 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); - * Disable direct notifications to vhost device. - */ - void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); -+bool vhost_config_pending(struct vhost_dev *hdev); -+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask); - - /** - * vhost_dev_is_started() - report status of vhost device --- -2.31.1 - diff --git a/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch b/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch deleted file mode 100644 index 940133b..0000000 --- a/kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch +++ /dev/null @@ -1,171 +0,0 @@ -From bffccbd59a2e2c641810cd7362c7b5ecf5989ed8 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:35 +0100 -Subject: [PATCH 03/14] vhost: allocate SVQ device file descriptors at device - start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/13] bab2d43f0fc0d13a4917e706244b37e1a431b082 (eperezmartin/qemu-kvm) - -The next patches will start control SVQ if possible. However, we don't -know if that will be possible at qemu boot anymore. - -Delay device file descriptors until we know it at device start. This -will avoid to create them if the device does not support SVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-4-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 3cfb4d069cd2977b707fb519c455d7d416e1f4b0) ---- - hw/virtio/vhost-shadow-virtqueue.c | 31 ++------------------------ - hw/virtio/vhost-vdpa.c | 35 ++++++++++++++++++++++++------ - 2 files changed, 30 insertions(+), 36 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 264ddc166d..3b05bab44d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -715,43 +715,18 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - * @iova_tree: Tree to perform descriptors translations - * @ops: SVQ owner callbacks - * @ops_opaque: ops opaque pointer -- * -- * Returns the new virtqueue or NULL. -- * -- * In case of error, reason is reported through error_report. - */ - VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, - const VhostShadowVirtqueueOps *ops, - void *ops_opaque) - { -- g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); -- int r; -- -- r = event_notifier_init(&svq->hdev_kick, 0); -- if (r != 0) { -- error_report("Couldn't create kick event notifier: %s (%d)", -- g_strerror(errno), errno); -- goto err_init_hdev_kick; -- } -- -- r = event_notifier_init(&svq->hdev_call, 0); -- if (r != 0) { -- error_report("Couldn't create call event notifier: %s (%d)", -- g_strerror(errno), errno); -- goto err_init_hdev_call; -- } -+ VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); - - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); - svq->iova_tree = iova_tree; - svq->ops = ops; - svq->ops_opaque = ops_opaque; -- return g_steal_pointer(&svq); -- --err_init_hdev_call: -- event_notifier_cleanup(&svq->hdev_kick); -- --err_init_hdev_kick: -- return NULL; -+ return svq; - } - - /** -@@ -763,7 +738,5 @@ void vhost_svq_free(gpointer pvq) - { - VhostShadowVirtqueue *vq = pvq; - vhost_svq_stop(vq); -- event_notifier_cleanup(&vq->hdev_kick); -- event_notifier_cleanup(&vq->hdev_call); - g_free(vq); - } -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 44e6a9b7b3..530d2ca362 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -428,15 +428,11 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - - shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); - for (unsigned n = 0; n < hdev->nvqs; ++n) { -- g_autoptr(VhostShadowVirtqueue) svq; -+ VhostShadowVirtqueue *svq; - - svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, - v->shadow_vq_ops_opaque); -- if (unlikely(!svq)) { -- error_setg(errp, "Cannot create svq %u", n); -- return -1; -- } -- g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq)); -+ g_ptr_array_add(shadow_vqs, svq); - } - - v->shadow_vqs = g_steal_pointer(&shadow_vqs); -@@ -871,11 +867,23 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - const EventNotifier *event_notifier = &svq->hdev_kick; - int r; - -+ r = event_notifier_init(&svq->hdev_kick, 0); -+ if (r != 0) { -+ error_setg_errno(errp, -r, "Couldn't create kick event notifier"); -+ goto err_init_hdev_kick; -+ } -+ -+ r = event_notifier_init(&svq->hdev_call, 0); -+ if (r != 0) { -+ error_setg_errno(errp, -r, "Couldn't create call event notifier"); -+ goto err_init_hdev_call; -+ } -+ - file.fd = event_notifier_get_fd(event_notifier); - r = vhost_vdpa_set_vring_dev_kick(dev, &file); - if (unlikely(r != 0)) { - error_setg_errno(errp, -r, "Can't set device kick fd"); -- return r; -+ goto err_init_set_dev_fd; - } - - event_notifier = &svq->hdev_call; -@@ -883,8 +891,18 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, - r = vhost_vdpa_set_vring_dev_call(dev, &file); - if (unlikely(r != 0)) { - error_setg_errno(errp, -r, "Can't set device call fd"); -+ goto err_init_set_dev_fd; - } - -+ return 0; -+ -+err_init_set_dev_fd: -+ event_notifier_set_handler(&svq->hdev_call, NULL); -+ -+err_init_hdev_call: -+ event_notifier_cleanup(&svq->hdev_kick); -+ -+err_init_hdev_kick: - return r; - } - -@@ -1096,6 +1114,9 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - vhost_vdpa_svq_unmap_rings(dev, svq); -+ -+ event_notifier_cleanup(&svq->hdev_kick); -+ event_notifier_cleanup(&svq->hdev_call); - } - } - --- -2.31.1 - diff --git a/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch b/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch deleted file mode 100644 index ca93785..0000000 --- a/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 55aad90e347599e88747888ddbefcba33427f386 Mon Sep 17 00:00:00 2001 -From: Jason Wang -Date: Fri, 16 Dec 2022 11:35:52 +0800 -Subject: [PATCH 12/31] vhost: fix vq dirty bitmap syncing when vIOMMU is - enabled - -RH-Author: Eric Auger -RH-MergeRequest: 134: vhost: fix vq dirty bitmap syncing when vIOMMU is enabled -RH-Bugzilla: 2124856 -RH-Acked-by: Peter Xu -RH-Acked-by: Jason Wang -RH-Acked-by: Laurent Vivier -RH-Commit: [1/1] 57ef499b63dc2cca6e64ee84d1dc127635868ca2 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124856 -Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=49989924 -Upstream: yes - -When vIOMMU is enabled, the vq->used_phys is actually the IOVA not -GPA. So we need to translate it to GPA before the syncing otherwise we -may hit the following crash since IOVA could be out of the scope of -the GPA log size. This could be noted when using virtio-IOMMU with -vhost using 1G memory. - -Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support") -Cc: qemu-stable@nongnu.org -Tested-by: Lei Yang -Reported-by: Yalan Zhang -Signed-off-by: Jason Wang -Message-Id: <20221216033552.77087-1-jasowang@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 345cc1cbcbce2bab00abc2b88338d7d89c702d6b) -Signed-off-by: Eric Auger ---- - hw/virtio/vhost.c | 84 ++++++++++++++++++++++++++++++++++++----------- - 1 file changed, 64 insertions(+), 20 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index 84dbb39e07..2c566dc539 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -20,6 +20,7 @@ - #include "qemu/range.h" - #include "qemu/error-report.h" - #include "qemu/memfd.h" -+#include "qemu/log.h" - #include "standard-headers/linux/vhost_types.h" - #include "hw/virtio/virtio-bus.h" - #include "hw/virtio/virtio-access.h" -@@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev, - } - } - -+static bool vhost_dev_has_iommu(struct vhost_dev *dev) -+{ -+ VirtIODevice *vdev = dev->vdev; -+ -+ /* -+ * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support -+ * incremental memory mapping API via IOTLB API. For platform that -+ * does not have IOMMU, there's no need to enable this feature -+ * which may cause unnecessary IOTLB miss/update transactions. -+ */ -+ if (vdev) { -+ return virtio_bus_device_iommu_enabled(vdev) && -+ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); -+ } else { -+ return false; -+ } -+} -+ - static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, - MemoryRegionSection *section, - hwaddr first, -@@ -137,8 +156,51 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, - continue; - } - -- vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, -- range_get_last(vq->used_phys, vq->used_size)); -+ if (vhost_dev_has_iommu(dev)) { -+ IOMMUTLBEntry iotlb; -+ hwaddr used_phys = vq->used_phys, used_size = vq->used_size; -+ hwaddr phys, s, offset; -+ -+ while (used_size) { -+ rcu_read_lock(); -+ iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as, -+ used_phys, -+ true, -+ MEMTXATTRS_UNSPECIFIED); -+ rcu_read_unlock(); -+ -+ if (!iotlb.target_as) { -+ qemu_log_mask(LOG_GUEST_ERROR, "translation " -+ "failure for used_iova %"PRIx64"\n", -+ used_phys); -+ return -EINVAL; -+ } -+ -+ offset = used_phys & iotlb.addr_mask; -+ phys = iotlb.translated_addr + offset; -+ -+ /* -+ * Distance from start of used ring until last byte of -+ * IOMMU page. -+ */ -+ s = iotlb.addr_mask - offset; -+ /* -+ * Size of used ring, or of the part of it until end -+ * of IOMMU page. To avoid zero result, do the adding -+ * outside of MIN(). -+ */ -+ s = MIN(s, used_size - 1) + 1; -+ -+ vhost_dev_sync_region(dev, section, start_addr, end_addr, phys, -+ range_get_last(phys, s)); -+ used_size -= s; -+ used_phys += s; -+ } -+ } else { -+ vhost_dev_sync_region(dev, section, start_addr, -+ end_addr, vq->used_phys, -+ range_get_last(vq->used_phys, vq->used_size)); -+ } - } - return 0; - } -@@ -306,24 +368,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) - dev->log_size = size; - } - --static bool vhost_dev_has_iommu(struct vhost_dev *dev) --{ -- VirtIODevice *vdev = dev->vdev; -- -- /* -- * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support -- * incremental memory mapping API via IOTLB API. For platform that -- * does not have IOMMU, there's no need to enable this feature -- * which may cause unnecessary IOTLB miss/update transactions. -- */ -- if (vdev) { -- return virtio_bus_device_iommu_enabled(vdev) && -- virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); -- } else { -- return false; -- } --} -- - static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, - hwaddr *plen, bool is_write) - { --- -2.31.1 - diff --git a/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch b/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch deleted file mode 100644 index 1b48f5d..0000000 --- a/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch +++ /dev/null @@ -1,56 +0,0 @@ -From d135303da1187d9f214e520a977fe7c47e5ce1f0 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:45 +0800 -Subject: [PATCH 04/31] vhost: introduce new VhostOps vhost_set_config_call -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/10] c2492838d9c1415e42d2507f2956d640a30325f2 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -This patch introduces new VhostOps vhost_set_config_call. -This function allows the qemu to set the config -event fd to kernel driver. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-5-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 9b30cdf9bbf9524a4f4f8a6eb551eb13cbbd3893) -Signed-off-by: Cindy Lu ---- - include/hw/virtio/vhost-backend.h | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h -index eab46d7f0b..c5ab49051e 100644 ---- a/include/hw/virtio/vhost-backend.h -+++ b/include/hw/virtio/vhost-backend.h -@@ -128,6 +128,8 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); - - typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); - -+typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev, -+ int fd); - typedef struct VhostOps { - VhostBackendType backend_type; - vhost_backend_init vhost_backend_init; -@@ -174,6 +176,7 @@ typedef struct VhostOps { - vhost_vq_get_addr_op vhost_vq_get_addr; - vhost_get_device_id_op vhost_get_device_id; - vhost_force_iommu_op vhost_force_iommu; -+ vhost_set_config_call_op vhost_set_config_call; - } VhostOps; - - int vhost_backend_update_device_iotlb(struct vhost_dev *dev, --- -2.31.1 - diff --git a/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch b/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch deleted file mode 100644 index de005ba..0000000 --- a/kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch +++ /dev/null @@ -1,122 +0,0 @@ -From 6584478deca49d0ea20add588e4fdb51cdc26f1d Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:36 +0100 -Subject: [PATCH 04/14] vhost: move iova_tree set to vhost_svq_start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/13] 200d8e9b58e258a6e301430debc73ef7d962b732 (eperezmartin/qemu-kvm) - -Since we don't know if we will use SVQ at qemu initialization, let's -allocate iova_tree only if needed. To do so, accept it at SVQ start, not -at initialization. - -This will avoid to create it if the device does not support SVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-5-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 5fde952bbdd521c10fc018ee04f922a7dca5f663) ---- - hw/virtio/vhost-shadow-virtqueue.c | 9 ++++----- - hw/virtio/vhost-shadow-virtqueue.h | 5 ++--- - hw/virtio/vhost-vdpa.c | 5 ++--- - 3 files changed, 8 insertions(+), 11 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 3b05bab44d..4307296358 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -642,9 +642,10 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) - * @svq: Shadow Virtqueue - * @vdev: VirtIO device - * @vq: Virtqueue to shadow -+ * @iova_tree: Tree to perform descriptors translations - */ - void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, -- VirtQueue *vq) -+ VirtQueue *vq, VhostIOVATree *iova_tree) - { - size_t desc_size, driver_size, device_size; - -@@ -655,6 +656,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - svq->last_used_idx = 0; - svq->vdev = vdev; - svq->vq = vq; -+ svq->iova_tree = iova_tree; - - svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); - driver_size = vhost_svq_driver_area_size(svq); -@@ -712,18 +714,15 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - * Creates vhost shadow virtqueue, and instructs the vhost device to use the - * shadow methods and file descriptors. - * -- * @iova_tree: Tree to perform descriptors translations - * @ops: SVQ owner callbacks - * @ops_opaque: ops opaque pointer - */ --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -- const VhostShadowVirtqueueOps *ops, -+VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, - void *ops_opaque) - { - VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); - - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); -- svq->iova_tree = iova_tree; - svq->ops = ops; - svq->ops_opaque = ops_opaque; - return svq; -diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h -index d04c34a589..926a4897b1 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.h -+++ b/hw/virtio/vhost-shadow-virtqueue.h -@@ -126,11 +126,10 @@ size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq); - size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq); - - void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, -- VirtQueue *vq); -+ VirtQueue *vq, VhostIOVATree *iova_tree); - void vhost_svq_stop(VhostShadowVirtqueue *svq); - --VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, -- const VhostShadowVirtqueueOps *ops, -+VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops, - void *ops_opaque); - - void vhost_svq_free(gpointer vq); -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 530d2ca362..e65603022f 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -430,8 +430,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, - for (unsigned n = 0; n < hdev->nvqs; ++n) { - VhostShadowVirtqueue *svq; - -- svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, -- v->shadow_vq_ops_opaque); -+ svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque); - g_ptr_array_add(shadow_vqs, svq); - } - -@@ -1070,7 +1069,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) - goto err; - } - -- vhost_svq_start(svq, dev->vdev, vq); -+ vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); - ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); - if (unlikely(!ok)) { - goto err_map; --- -2.31.1 - diff --git a/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch b/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch deleted file mode 100644 index 099dd73..0000000 --- a/kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 2906f8df3c5e915a3dc05a705b87990211f114b5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 15 Dec 2022 12:31:34 +0100 -Subject: [PATCH 02/14] vhost: set SVQ device call handler at SVQ start -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 136: vDPA ASID support in Qemu -RH-Bugzilla: 2104412 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/13] ad90a6cc5c71b70d705904433d5a986e8fedb924 (eperezmartin/qemu-kvm) - -By the end of this series CVQ is shadowed as long as the features -support it. - -Since we don't know at the beginning of qemu running if this is -supported, move the event notifier handler setting to the start of the -SVQ, instead of the start of qemu run. This will avoid to create them if -the device does not support SVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang -Message-Id: <20221215113144.322011-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 20e7412bfd63c68f1798fbdb799aedb7e05fee88) ---- - hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c -index 5bd14cad96..264ddc166d 100644 ---- a/hw/virtio/vhost-shadow-virtqueue.c -+++ b/hw/virtio/vhost-shadow-virtqueue.c -@@ -648,6 +648,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, - { - size_t desc_size, driver_size, device_size; - -+ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->next_guest_avail_elem = NULL; - svq->shadow_avail_idx = 0; - svq->shadow_used_idx = 0; -@@ -704,6 +705,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) - g_free(svq->desc_state); - qemu_vfree(svq->vring.desc); - qemu_vfree(svq->vring.used); -+ event_notifier_set_handler(&svq->hdev_call, NULL); - } - - /** -@@ -740,7 +742,6 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, - } - - event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); -- event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); - svq->iova_tree = iova_tree; - svq->ops = ops; - svq->ops_opaque = ops_opaque; -@@ -763,7 +764,6 @@ void vhost_svq_free(gpointer pvq) - VhostShadowVirtqueue *vq = pvq; - vhost_svq_stop(vq); - event_notifier_cleanup(&vq->hdev_kick); -- event_notifier_set_handler(&vq->hdev_call, NULL); - event_notifier_cleanup(&vq->hdev_call); - g_free(vq); - } --- -2.31.1 - diff --git a/kvm-vhost-vdpa-add-support-for-config-interrupt.patch b/kvm-vhost-vdpa-add-support-for-config-interrupt.patch deleted file mode 100644 index 88d4df6..0000000 --- a/kvm-vhost-vdpa-add-support-for-config-interrupt.patch +++ /dev/null @@ -1,73 +0,0 @@ -From e01563a8de9a45937ffd8d4c1d74a6890ffb6eb6 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:46 +0800 -Subject: [PATCH 05/31] vhost-vdpa: add support for config interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/10] 49bfd214a503f8e199ff93f4bbfcbd4c4f2405b5 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add new call back function in vhost-vdpa, The function -vhost_set_config_call can set the event fd to kernel. -This function will be called in the vhost_dev_start -and vhost_dev_stop - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-6-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 259f3acc1c675dd77ebbdb28a483f5d0220bdbf6) -Signed-off-by: Cindy Lu ---- - hw/virtio/trace-events | 1 + - hw/virtio/vhost-vdpa.c | 8 ++++++++ - 2 files changed, 9 insertions(+) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 14fc5b9bb2..46f2faf04e 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -62,6 +62,7 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI - vhost_vdpa_set_owner(void *dev) "dev: %p" - vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 - vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64 -+vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d" - - # virtio.c - virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 7468e44b87..c5be2645b0 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -754,6 +754,13 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) - return 0; - } - -+static int vhost_vdpa_set_config_call(struct vhost_dev *dev, -+ int fd) -+{ -+ trace_vhost_vdpa_set_config_call(dev, fd); -+ return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); -+} -+ - static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, - uint32_t config_len) - { -@@ -1310,4 +1317,5 @@ const VhostOps vdpa_ops = { - .vhost_get_device_id = vhost_vdpa_get_device_id, - .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, - .vhost_force_iommu = vhost_vdpa_force_iommu, -+ .vhost_set_config_call = vhost_vdpa_set_config_call, - }; --- -2.31.1 - diff --git a/kvm-virtio-add-support-for-configure-interrupt.patch b/kvm-virtio-add-support-for-configure-interrupt.patch deleted file mode 100644 index 02f4666..0000000 --- a/kvm-virtio-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,115 +0,0 @@ -From e04c76339580effae41617b690b58a6605e0f40b Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:47 +0800 -Subject: [PATCH 06/31] virtio: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/10] 7048eb488b732578686d451684babaf17b582b05 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add the functions to support the configure interrupt in virtio -The function virtio_config_guest_notifier_read will notify the -guest if there is an configure interrupt. -The function virtio_config_set_guest_notifier_fd_handler is -to set the fd hander for the notifier - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-7-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 7d847d0c9b93b91160f40d69a65c904d76f1edd8) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio.c | 29 +++++++++++++++++++++++++++++ - include/hw/virtio/virtio.h | 4 ++++ - 2 files changed, 33 insertions(+) - -diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c -index eb6347ab5d..34e9c5d141 100644 ---- a/hw/virtio/virtio.c -+++ b/hw/virtio/virtio.c -@@ -4012,7 +4012,14 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n) - virtio_irq(vq); - } - } -+static void virtio_config_guest_notifier_read(EventNotifier *n) -+{ -+ VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier); - -+ if (event_notifier_test_and_clear(n)) { -+ virtio_notify_config(vdev); -+ } -+} - void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, - bool with_irqfd) - { -@@ -4029,6 +4036,23 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, - } - } - -+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, -+ bool assign, bool with_irqfd) -+{ -+ EventNotifier *n; -+ n = &vdev->config_notifier; -+ if (assign && !with_irqfd) { -+ event_notifier_set_handler(n, virtio_config_guest_notifier_read); -+ } else { -+ event_notifier_set_handler(n, NULL); -+ } -+ if (!assign) { -+ /* Test and clear notifier before closing it,*/ -+ /* in case poll callback didn't have time to run. */ -+ virtio_config_guest_notifier_read(n); -+ } -+} -+ - EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) - { - return &vq->guest_notifier; -@@ -4109,6 +4133,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) - return &vq->host_notifier; - } - -+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev) -+{ -+ return &vdev->config_notifier; -+} -+ - void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled) - { - vq->host_notifier_enabled = enabled; -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index 1f4a41b958..9c3a4642f2 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -138,6 +138,7 @@ struct VirtIODevice - AddressSpace *dma_as; - QLIST_HEAD(, VirtQueue) *vector_queues; - QTAILQ_ENTRY(VirtIODevice) next; -+ EventNotifier config_notifier; - }; - - struct VirtioDeviceClass { -@@ -360,6 +361,9 @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct - void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); - VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); - VirtQueue *virtio_vector_next_queue(VirtQueue *vq); -+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev); -+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, -+ bool assign, bool with_irqfd); - - static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) - { --- -2.31.1 - diff --git a/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch b/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch deleted file mode 100644 index ea2589a..0000000 --- a/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch +++ /dev/null @@ -1,262 +0,0 @@ -From 34a267758cf016f34b327318500efdbf0f606033 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:42 +0800 -Subject: [PATCH 01/31] virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/10] f374aaae221bc5a4c2521a267d21350b812e11ba (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -To support configure interrupt for vhost-vdpa -Introduce VIRTIO_CONFIG_IRQ_IDX -1 as configure interrupt's queue index, -Then we can reuse the functions guest_notifier_mask and guest_notifier_pending. -Add the check of queue index in these drivers, if the driver does not support -configure interrupt, the function will just return - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-2-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 544f0278afcab2bebab61b14e4c2c58e65911f5b) -Signed-off-by: Cindy Lu ---- - hw/display/vhost-user-gpu.c | 18 ++++++++++++++++++ - hw/net/virtio-net.c | 22 ++++++++++++++++++++-- - hw/virtio/vhost-user-fs.c | 18 ++++++++++++++++++ - hw/virtio/vhost-user-gpio.c | 10 ++++++++++ - hw/virtio/vhost-vsock-common.c | 18 ++++++++++++++++++ - hw/virtio/virtio-crypto.c | 18 ++++++++++++++++++ - include/hw/virtio/virtio.h | 3 +++ - 7 files changed, 105 insertions(+), 2 deletions(-) - -diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c -index 19c0e20103..4380a5e672 100644 ---- a/hw/display/vhost-user-gpu.c -+++ b/hw/display/vhost-user-gpu.c -@@ -486,6 +486,15 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx) - { - VhostUserGPU *g = VHOST_USER_GPU(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_virtqueue_pending(&g->vhost->dev, idx); - } - -@@ -494,6 +503,15 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) - { - VhostUserGPU *g = VHOST_USER_GPU(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask); - } - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index aba12759d5..bee35d6f9f 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3316,6 +3316,15 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) - } else { - nc = qemu_get_subqueue(n->nic, vq2q(idx)); - } -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return false -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); - } - -@@ -3339,8 +3348,17 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, - } else { - nc = qemu_get_subqueue(n->nic, vq2q(idx)); - } -- vhost_net_virtqueue_mask(get_vhost_net(nc->peer), -- vdev, idx, mask); -+ /* -+ *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } -+ -+ vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); - } - - static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) -diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c -index d97b179e6f..f5049735ac 100644 ---- a/hw/virtio/vhost-user-fs.c -+++ b/hw/virtio/vhost-user-fs.c -@@ -159,6 +159,15 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx, - { - VHostUserFS *fs = VHOST_USER_FS(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask); - } - -@@ -166,6 +175,15 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx) - { - VHostUserFS *fs = VHOST_USER_FS(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_virtqueue_pending(&fs->vhost_dev, idx); - } - -diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c -index b7b82a1099..fe3da32c74 100644 ---- a/hw/virtio/vhost-user-gpio.c -+++ b/hw/virtio/vhost-user-gpio.c -@@ -191,6 +191,16 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) - { - VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } -+ - vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); - } - -diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c -index d21c72b401..d2b5519d5a 100644 ---- a/hw/virtio/vhost-vsock-common.c -+++ b/hw/virtio/vhost-vsock-common.c -@@ -127,6 +127,15 @@ static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, - { - VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask); - } - -@@ -135,6 +144,15 @@ static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, - { - VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return vhost_virtqueue_pending(&vvc->vhost_dev, idx); - } - -diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c -index 97da74e719..516425e26a 100644 ---- a/hw/virtio/virtio-crypto.c -+++ b/hw/virtio/virtio-crypto.c -@@ -1182,6 +1182,15 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, - - assert(vcrypto->vhost_started); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return; -+ } - cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask); - } - -@@ -1192,6 +1201,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) - - assert(vcrypto->vhost_started); - -+ /* -+ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 -+ * as the Marco of configure interrupt's IDX, If this driver does not -+ * support, the function will return -+ */ -+ -+ if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ return false; -+ } - return cryptodev_vhost_virtqueue_pending(vdev, queue, idx); - } - -diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h -index acfd4df125..1f4a41b958 100644 ---- a/include/hw/virtio/virtio.h -+++ b/include/hw/virtio/virtio.h -@@ -79,6 +79,9 @@ typedef struct VirtQueueElement - - #define VIRTIO_NO_VECTOR 0xffff - -+/* special index value used internally for config irqs */ -+#define VIRTIO_CONFIG_IRQ_IDX -1 -+ - #define TYPE_VIRTIO_DEVICE "virtio-device" - OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE) - --- -2.31.1 - diff --git a/kvm-virtio-mmio-add-support-for-configure-interrupt.patch b/kvm-virtio-mmio-add-support-for-configure-interrupt.patch deleted file mode 100644 index 275b197..0000000 --- a/kvm-virtio-mmio-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,80 +0,0 @@ -From 181705090c9963c2da97811838ace5bb058737c6 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:50 +0800 -Subject: [PATCH 09/31] virtio-mmio: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/10] 742cc2b425ffd7bbd393772526e7481446ee131c (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add configure interrupt support in virtio-mmio bus. -add function to set configure guest notifier. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-10-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit cd336e834620ea78edef049c3567f312974e475b) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-mmio.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c -index d240efef97..103260ec15 100644 ---- a/hw/virtio/virtio-mmio.c -+++ b/hw/virtio/virtio-mmio.c -@@ -670,7 +670,30 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign, - - return 0; - } -+static int virtio_mmio_set_config_guest_notifier(DeviceState *d, bool assign, -+ bool with_irqfd) -+{ -+ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); -+ EventNotifier *notifier = virtio_config_get_guest_notifier(vdev); -+ int r = 0; - -+ if (assign) { -+ r = event_notifier_init(notifier, 0); -+ if (r < 0) { -+ return r; -+ } -+ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); -+ } else { -+ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); -+ event_notifier_cleanup(notifier); -+ } -+ if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) { -+ vdc->guest_notifier_mask(vdev, VIRTIO_CONFIG_IRQ_IDX, !assign); -+ } -+ return r; -+} - static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, - bool assign) - { -@@ -692,6 +715,10 @@ static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, - goto assign_error; - } - } -+ r = virtio_mmio_set_config_guest_notifier(d, assign, with_irqfd); -+ if (r < 0) { -+ goto assign_error; -+ } - - return 0; - --- -2.31.1 - diff --git a/kvm-virtio-net-add-support-for-configure-interrupt.patch b/kvm-virtio-net-add-support-for-configure-interrupt.patch deleted file mode 100644 index 74b956a..0000000 --- a/kvm-virtio-net-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 2b8e3409edb8a17d89c3829cfa3d92bdfdd43c53 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:49 +0800 -Subject: [PATCH 08/31] virtio-net: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/10] 1b125169bea6c81c508b154fa1bae68af153b312 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add functions to support configure interrupt in virtio_net -Add the functions to support vhost_net_config_pending -and vhost_net_config_mask. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-9-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 8aab0d1dbe90c7b5ac6672a1a09b0578178f5f4c) -Signed-off-by: Cindy Lu ---- - hw/net/vhost_net-stub.c | 9 +++++++++ - hw/net/vhost_net.c | 9 +++++++++ - hw/net/virtio-net.c | 4 ++-- - include/net/vhost_net.h | 2 ++ - 4 files changed, 22 insertions(+), 2 deletions(-) - -diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c -index 9f7daae99c..c36f258201 100644 ---- a/hw/net/vhost_net-stub.c -+++ b/hw/net/vhost_net-stub.c -@@ -82,6 +82,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, - { - } - -+bool vhost_net_config_pending(VHostNetState *net) -+{ -+ return false; -+} -+ -+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) -+{ -+} -+ - int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) - { - return -1; -diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c -index 043058ff43..6a55f5a473 100644 ---- a/hw/net/vhost_net.c -+++ b/hw/net/vhost_net.c -@@ -478,6 +478,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, - vhost_virtqueue_mask(&net->dev, dev, idx, mask); - } - -+bool vhost_net_config_pending(VHostNetState *net) -+{ -+ return vhost_config_pending(&net->dev); -+} -+ -+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) -+{ -+ vhost_config_mask(&net->dev, dev, mask); -+} - VHostNetState *get_vhost_net(NetClientState *nc) - { - VHostNetState *vhost_net = 0; -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index bee35d6f9f..ec974f7a76 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3323,7 +3323,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) - */ - - if (idx == VIRTIO_CONFIG_IRQ_IDX) { -- return false; -+ return vhost_net_config_pending(get_vhost_net(nc->peer)); - } - return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); - } -@@ -3355,9 +3355,9 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, - */ - - if (idx == VIRTIO_CONFIG_IRQ_IDX) { -+ vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); - return; - } -- - vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); - } - -diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h -index 40b9a40074..dbbd0dc04e 100644 ---- a/include/net/vhost_net.h -+++ b/include/net/vhost_net.h -@@ -39,6 +39,8 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, - bool vhost_net_virtqueue_pending(VHostNetState *net, int n); - void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, - int idx, bool mask); -+bool vhost_net_config_pending(VHostNetState *net); -+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask); - int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr); - VHostNetState *get_vhost_net(NetClientState *nc); - --- -2.31.1 - diff --git a/kvm-virtio-pci-add-support-for-configure-interrupt.patch b/kvm-virtio-pci-add-support-for-configure-interrupt.patch deleted file mode 100644 index 14070a4..0000000 --- a/kvm-virtio-pci-add-support-for-configure-interrupt.patch +++ /dev/null @@ -1,274 +0,0 @@ -From 61ac1476d3820c97e1cc103af422b17bc94c6ca5 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:51 +0800 -Subject: [PATCH 10/31] virtio-pci: add support for configure interrupt -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/10] ebd6a11d7699660d8ac5a4e44a790f823daea57c (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -Add process to handle the configure interrupt, The function's -logic is the same with vq interrupt.Add extra process to check -the configure interrupt - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-11-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1680542862edd963e6380dd4121a5e85df55581f) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 118 +++++++++++++++++++++++++++------ - include/hw/virtio/virtio-pci.h | 4 +- - 2 files changed, 102 insertions(+), 20 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index ec816ea367..3f00e91718 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -751,7 +751,8 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, - VirtQueue *vq; - - if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { -- return -1; -+ *n = virtio_config_get_guest_notifier(vdev); -+ *vector = vdev->config_vector; - } else { - if (!virtio_queue_get_num(vdev, queue_no)) { - return -1; -@@ -811,7 +812,7 @@ undo: - } - return ret; - } --static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) -+static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) - { - int queue_no; - int ret = 0; -@@ -826,6 +827,10 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - return ret; - } - -+static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy) -+{ -+ return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX); -+} - - static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, - int queue_no) -@@ -850,7 +855,7 @@ static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, - kvm_virtio_pci_vq_vector_release(proxy, vector); - } - --static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) -+static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) - { - int queue_no; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -@@ -863,6 +868,11 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) - } - } - -+static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) -+{ -+ kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX); -+} -+ - static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, - unsigned int queue_no, - unsigned int vector, -@@ -944,9 +954,19 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, - } - vq = virtio_vector_next_queue(vq); - } -- -+ /* unmask config intr */ -+ if (vector == vdev->config_vector) { -+ n = virtio_config_get_guest_notifier(vdev); -+ ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, -+ msg, n); -+ if (ret < 0) { -+ goto undo_config; -+ } -+ } - return 0; -- -+undo_config: -+ n = virtio_config_get_guest_notifier(vdev); -+ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); - undo: - vq = virtio_vector_first_queue(vdev, vector); - while (vq && unmasked >= 0) { -@@ -980,6 +1000,11 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) - } - vq = virtio_vector_next_queue(vq); - } -+ -+ if (vector == vdev->config_vector) { -+ n = virtio_config_get_guest_notifier(vdev); -+ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); -+ } - } - - static void virtio_pci_vector_poll(PCIDevice *dev, -@@ -1011,6 +1036,34 @@ static void virtio_pci_vector_poll(PCIDevice *dev, - msix_set_pending(dev, vector); - } - } -+ /* poll the config intr */ -+ ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, ¬ifier, -+ &vector); -+ if (ret < 0) { -+ return; -+ } -+ if (vector < vector_start || vector >= vector_end || -+ !msix_is_masked(dev, vector)) { -+ return; -+ } -+ if (k->guest_notifier_pending) { -+ if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) { -+ msix_set_pending(dev, vector); -+ } -+ } else if (event_notifier_test_and_clear(notifier)) { -+ msix_set_pending(dev, vector); -+ } -+} -+ -+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, -+ int n, bool assign, -+ bool with_irqfd) -+{ -+ if (n == VIRTIO_CONFIG_IRQ_IDX) { -+ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); -+ } else { -+ virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd); -+ } - } - - static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, -@@ -1019,17 +1072,25 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); -- VirtQueue *vq = virtio_get_queue(vdev, n); -- EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); -+ VirtQueue *vq = NULL; -+ EventNotifier *notifier = NULL; -+ -+ if (n == VIRTIO_CONFIG_IRQ_IDX) { -+ notifier = virtio_config_get_guest_notifier(vdev); -+ } else { -+ vq = virtio_get_queue(vdev, n); -+ notifier = virtio_queue_get_guest_notifier(vq); -+ } - - if (assign) { - int r = event_notifier_init(notifier, 0); - if (r < 0) { - return r; - } -- virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd); -+ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd); - } else { -- virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd); -+ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false, -+ with_irqfd); - event_notifier_cleanup(notifier); - } - -@@ -1072,10 +1133,13 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) - proxy->nvqs_with_notifiers = nvqs; - - /* Must unset vector notifier while guest notifier is still assigned */ -- if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) { -+ if ((proxy->vector_irqfd || -+ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && -+ !assign) { - msix_unset_vector_notifiers(&proxy->pci_dev); - if (proxy->vector_irqfd) { -- kvm_virtio_pci_vector_release(proxy, nvqs); -+ kvm_virtio_pci_vector_vq_release(proxy, nvqs); -+ kvm_virtio_pci_vector_config_release(proxy); - g_free(proxy->vector_irqfd); - proxy->vector_irqfd = NULL; - } -@@ -1091,20 +1155,30 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) - goto assign_error; - } - } -- -+ r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign, -+ with_irqfd); -+ if (r < 0) { -+ goto config_assign_error; -+ } - /* Must set vector notifier after guest notifier has been assigned */ -- if ((with_irqfd || k->guest_notifier_mask) && assign) { -+ if ((with_irqfd || -+ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && -+ assign) { - if (with_irqfd) { - proxy->vector_irqfd = - g_malloc0(sizeof(*proxy->vector_irqfd) * - msix_nr_vectors_allocated(&proxy->pci_dev)); -- r = kvm_virtio_pci_vector_use(proxy, nvqs); -+ r = kvm_virtio_pci_vector_vq_use(proxy, nvqs); -+ if (r < 0) { -+ goto config_assign_error; -+ } -+ r = kvm_virtio_pci_vector_config_use(proxy); - if (r < 0) { -- goto assign_error; -+ goto config_error; - } - } -- r = msix_set_vector_notifiers(&proxy->pci_dev, -- virtio_pci_vector_unmask, -+ -+ r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask, - virtio_pci_vector_mask, - virtio_pci_vector_poll); - if (r < 0) { -@@ -1117,9 +1191,15 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) - notifiers_error: - if (with_irqfd) { - assert(assign); -- kvm_virtio_pci_vector_release(proxy, nvqs); -+ kvm_virtio_pci_vector_vq_release(proxy, nvqs); - } -- -+config_error: -+ if (with_irqfd) { -+ kvm_virtio_pci_vector_config_release(proxy); -+ } -+config_assign_error: -+ virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign, -+ with_irqfd); - assign_error: - /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ - assert(assign); -diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h -index 938799e8f6..c02e278f46 100644 ---- a/include/hw/virtio/virtio-pci.h -+++ b/include/hw/virtio/virtio-pci.h -@@ -256,5 +256,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); - * @fixed_queues. - */ - unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); -- -+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, -+ int n, bool assign, -+ bool with_irqfd); - #endif --- -2.31.1 - diff --git a/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch b/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch deleted file mode 100644 index a8c32a2..0000000 --- a/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch +++ /dev/null @@ -1,272 +0,0 @@ -From 9a234f849273d3480e4a88042cb1ea06a37a626b Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:43 +0800 -Subject: [PATCH 02/31] virtio-pci: decouple notifier from interrupt process -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/10] a20f4c9ff38b239531d12cbcc7deaa649c86abc3 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 -To reuse the notifier process. We add the virtio_pci_get_notifier -to get the notifier and vector. The INPUT for this function is IDX, -The OUTPUT is the notifier and the vector - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-3-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 2e07f69d0c828e21515b63dc22884d548540b382) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 88 +++++++++++++++++++++++++++--------------- - 1 file changed, 57 insertions(+), 31 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index a1c9dfa7bb..52c7692fff 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -728,29 +728,41 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, - } - - static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, -- unsigned int queue_no, -+ EventNotifier *n, - unsigned int vector) - { - VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; -- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -- VirtQueue *vq = virtio_get_queue(vdev, queue_no); -- EventNotifier *n = virtio_queue_get_guest_notifier(vq); - return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); - } - - static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, -- unsigned int queue_no, -+ EventNotifier *n , - unsigned int vector) - { -- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -- VirtQueue *vq = virtio_get_queue(vdev, queue_no); -- EventNotifier *n = virtio_queue_get_guest_notifier(vq); - VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; - int ret; - - ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); - assert(ret == 0); - } -+static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, -+ EventNotifier **n, unsigned int *vector) -+{ -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ VirtQueue *vq; -+ -+ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { -+ return -1; -+ } else { -+ if (!virtio_queue_get_num(vdev, queue_no)) { -+ return -1; -+ } -+ *vector = virtio_queue_vector(vdev, queue_no); -+ vq = virtio_get_queue(vdev, queue_no); -+ *n = virtio_queue_get_guest_notifier(vq); -+ } -+ return 0; -+} - - static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - { -@@ -759,12 +771,15 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - unsigned int vector; - int ret, queue_no; -- -+ EventNotifier *n; - for (queue_no = 0; queue_no < nvqs; queue_no++) { - if (!virtio_queue_get_num(vdev, queue_no)) { - break; - } -- vector = virtio_queue_vector(vdev, queue_no); -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ break; -+ } - if (vector >= msix_nr_vectors_allocated(dev)) { - continue; - } -@@ -776,7 +791,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) - * Otherwise, delay until unmasked in the frontend. - */ - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); -+ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); - if (ret < 0) { - kvm_virtio_pci_vq_vector_release(proxy, vector); - goto undo; -@@ -792,7 +807,11 @@ undo: - continue; - } - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ break; -+ } -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - kvm_virtio_pci_vq_vector_release(proxy, vector); - } -@@ -806,12 +825,16 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) - unsigned int vector; - int queue_no; - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -- -+ EventNotifier *n; -+ int ret ; - for (queue_no = 0; queue_no < nvqs; queue_no++) { - if (!virtio_queue_get_num(vdev, queue_no)) { - break; - } -- vector = virtio_queue_vector(vdev, queue_no); -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ break; -+ } - if (vector >= msix_nr_vectors_allocated(dev)) { - continue; - } -@@ -819,21 +842,20 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) - * Otherwise, it was cleaned when masked in the frontend. - */ - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - kvm_virtio_pci_vq_vector_release(proxy, vector); - } - } - --static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, -+static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, - unsigned int queue_no, - unsigned int vector, -- MSIMessage msg) -+ MSIMessage msg, -+ EventNotifier *n) - { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -- VirtQueue *vq = virtio_get_queue(vdev, queue_no); -- EventNotifier *n = virtio_queue_get_guest_notifier(vq); - VirtIOIRQFD *irqfd; - int ret = 0; - -@@ -860,14 +882,15 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, - event_notifier_set(n); - } - } else { -- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); -+ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); - } - return ret; - } - --static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, -+static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy, - unsigned int queue_no, -- unsigned int vector) -+ unsigned int vector, -+ EventNotifier *n) - { - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -@@ -878,7 +901,7 @@ static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { - k->guest_notifier_mask(vdev, queue_no, true); - } else { -- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - } - -@@ -888,6 +911,7 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, - VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtQueue *vq = virtio_vector_first_queue(vdev, vector); -+ EventNotifier *n; - int ret, index, unmasked = 0; - - while (vq) { -@@ -896,7 +920,8 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, - break; - } - if (index < proxy->nvqs_with_notifiers) { -- ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); -+ n = virtio_queue_get_guest_notifier(vq); -+ ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n); - if (ret < 0) { - goto undo; - } -@@ -912,7 +937,8 @@ undo: - while (vq && unmasked >= 0) { - index = virtio_get_queue_index(vq); - if (index < proxy->nvqs_with_notifiers) { -- virtio_pci_vq_vector_mask(proxy, index, vector); -+ n = virtio_queue_get_guest_notifier(vq); -+ virtio_pci_one_vector_mask(proxy, index, vector, n); - --unmasked; - } - vq = virtio_vector_next_queue(vq); -@@ -925,15 +951,17 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) - VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtQueue *vq = virtio_vector_first_queue(vdev, vector); -+ EventNotifier *n; - int index; - - while (vq) { - index = virtio_get_queue_index(vq); -+ n = virtio_queue_get_guest_notifier(vq); - if (!virtio_queue_get_num(vdev, index)) { - break; - } - if (index < proxy->nvqs_with_notifiers) { -- virtio_pci_vq_vector_mask(proxy, index, vector); -+ virtio_pci_one_vector_mask(proxy, index, vector, n); - } - vq = virtio_vector_next_queue(vq); - } -@@ -949,19 +977,17 @@ static void virtio_pci_vector_poll(PCIDevice *dev, - int queue_no; - unsigned int vector; - EventNotifier *notifier; -- VirtQueue *vq; -+ int ret; - - for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { -- if (!virtio_queue_get_num(vdev, queue_no)) { -+ ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector); -+ if (ret < 0) { - break; - } -- vector = virtio_queue_vector(vdev, queue_no); - if (vector < vector_start || vector >= vector_end || - !msix_is_masked(dev, vector)) { - continue; - } -- vq = virtio_get_queue(vdev, queue_no); -- notifier = virtio_queue_get_guest_notifier(vq); - if (k->guest_notifier_pending) { - if (k->guest_notifier_pending(vdev, queue_no)) { - msix_set_pending(dev, vector); --- -2.31.1 - diff --git a/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch b/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch deleted file mode 100644 index be9b3c7..0000000 --- a/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch +++ /dev/null @@ -1,212 +0,0 @@ -From 58cd577ff157cfaf7506bba135db58e75c330ff0 Mon Sep 17 00:00:00 2001 -From: Cindy Lu -Date: Thu, 22 Dec 2022 15:04:44 +0800 -Subject: [PATCH 03/31] virtio-pci: decouple the single vector from the - interrupt process -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cindy Lu -RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa -RH-Bugzilla: 1905805 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/10] 2c79cb678f005fb2f53b2db0f237347634ab3422 (lulu6/qemu-kvm3) - -https://bugzilla.redhat.com/show_bug.cgi?id=1905805 - -To reuse the interrupt process in configure interrupt -Need to decouple the single vector from the interrupt process. -We add new function kvm_virtio_pci_vector_use_one and _release_one. -These functions are used for the single vector, the whole process will -finish in the loop with vq number. - -Signed-off-by: Cindy Lu -Message-Id: <20221222070451.936503-4-lulu@redhat.com> -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit ee3b8dc6cc496ba7f4e27aed4493275c706a7942) -Signed-off-by: Cindy Lu ---- - hw/virtio/virtio-pci.c | 131 +++++++++++++++++++++++------------------ - 1 file changed, 73 insertions(+), 58 deletions(-) - -diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c -index 52c7692fff..ec816ea367 100644 ---- a/hw/virtio/virtio-pci.c -+++ b/hw/virtio/virtio-pci.c -@@ -699,7 +699,6 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev, - } - - static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, -- unsigned int queue_no, - unsigned int vector) - { - VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; -@@ -764,87 +763,103 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, - return 0; - } - --static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) -+static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) - { -+ unsigned int vector; -+ int ret; -+ EventNotifier *n; - PCIDevice *dev = &proxy->pci_dev; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -- unsigned int vector; -- int ret, queue_no; -- EventNotifier *n; -- for (queue_no = 0; queue_no < nvqs; queue_no++) { -- if (!virtio_queue_get_num(vdev, queue_no)) { -- break; -- } -- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -- if (ret < 0) { -- break; -- } -- if (vector >= msix_nr_vectors_allocated(dev)) { -- continue; -- } -- ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector); -+ -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ return ret; -+ } -+ if (vector >= msix_nr_vectors_allocated(dev)) { -+ return 0; -+ } -+ ret = kvm_virtio_pci_vq_vector_use(proxy, vector); -+ if (ret < 0) { -+ goto undo; -+ } -+ /* -+ * If guest supports masking, set up irqfd now. -+ * Otherwise, delay until unmasked in the frontend. -+ */ -+ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -+ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); - if (ret < 0) { -+ kvm_virtio_pci_vq_vector_release(proxy, vector); - goto undo; - } -- /* If guest supports masking, set up irqfd now. -- * Otherwise, delay until unmasked in the frontend. -- */ -- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); -- if (ret < 0) { -- kvm_virtio_pci_vq_vector_release(proxy, vector); -- goto undo; -- } -- } - } -- return 0; - -+ return 0; - undo: -- while (--queue_no >= 0) { -- vector = virtio_queue_vector(vdev, queue_no); -- if (vector >= msix_nr_vectors_allocated(dev)) { -- continue; -+ -+ vector = virtio_queue_vector(vdev, queue_no); -+ if (vector >= msix_nr_vectors_allocated(dev)) { -+ return ret; -+ } -+ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ return ret; - } -- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -- if (ret < 0) { -- break; -- } -- kvm_virtio_pci_irqfd_release(proxy, n, vector); -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); -+ } -+ return ret; -+} -+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) -+{ -+ int queue_no; -+ int ret = 0; -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ -+ for (queue_no = 0; queue_no < nvqs; queue_no++) { -+ if (!virtio_queue_get_num(vdev, queue_no)) { -+ return -1; - } -- kvm_virtio_pci_vq_vector_release(proxy, vector); -+ ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); - } - return ret; - } - --static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) -+ -+static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, -+ int queue_no) - { -- PCIDevice *dev = &proxy->pci_dev; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - unsigned int vector; -- int queue_no; -- VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - EventNotifier *n; -- int ret ; -+ int ret; -+ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); -+ PCIDevice *dev = &proxy->pci_dev; -+ -+ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -+ if (ret < 0) { -+ return; -+ } -+ if (vector >= msix_nr_vectors_allocated(dev)) { -+ return; -+ } -+ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -+ kvm_virtio_pci_irqfd_release(proxy, n, vector); -+ } -+ kvm_virtio_pci_vq_vector_release(proxy, vector); -+} -+ -+static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) -+{ -+ int queue_no; -+ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); -+ - for (queue_no = 0; queue_no < nvqs; queue_no++) { - if (!virtio_queue_get_num(vdev, queue_no)) { - break; - } -- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); -- if (ret < 0) { -- break; -- } -- if (vector >= msix_nr_vectors_allocated(dev)) { -- continue; -- } -- /* If guest supports masking, clean up irqfd now. -- * Otherwise, it was cleaned when masked in the frontend. -- */ -- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { -- kvm_virtio_pci_irqfd_release(proxy, n, vector); -- } -- kvm_virtio_pci_vq_vector_release(proxy, vector); -+ kvm_virtio_pci_vector_release_one(proxy, queue_no); - } - } - --- -2.31.1 - diff --git a/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch b/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch deleted file mode 100644 index 0555a68..0000000 --- a/kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 35ffe28a91a2ef08dd181d1a22695050ccbb6995 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Mon, 9 Jan 2023 16:04:43 +0000 -Subject: [PATCH 1/2] virtio-rng-pci: fix migration compat for vectors - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 131: virtio-rng-pci: fix migration compat for vectors -RH-Bugzilla: 2155749 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Acked-by: Thomas Huth -RH-Commit: [1/1] 1a866491dd191b073d71ae1aa5f4d76ee885de6d (dagrh/c-9-s-qemu-kvm) - -Fixup the migration compatibility for existing machine types -so that they do not enable msi-x. - -Symptom: - -(qemu) qemu: get_pci_config_device: Bad config data: i=0x34 read: 84 device: 98 cmask: ff wmask: 0 w1cmask:0 -qemu: Failed to load PCIDevice:config -qemu: Failed to load virtio-rng:virtio -qemu: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-rng' -qemu: load of migration failed: Invalid argument - -Note: This fix will break migration from 7.2->7.2-fixed with this patch - -bz: https://bugzilla.redhat.com/show_bug.cgi?id=2155749 -Fixes: 9ea02e8f1 ("virtio-rng-pci: Allow setting nvectors, so we can use MSI-X") - -This downstream fix is the equivalent of an upstream fix I've posted to -the 7.2 machine type compatibility. - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 3d851d34da..7adbac6f87 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -56,6 +56,8 @@ GlobalProperty hw_compat_rhel_9_1[] = { - { "nvme-ns", "eui64-default", "on"}, - /* hw_compat_rhel_9_1 from hw_compat_7_1 */ - { "virtio-device", "queue_reset", "false" }, -+ /* hw_compat_rhel_9_1 bz 2155749 */ -+ { "virtio-rng-pci", "vectors", "0" }, - }; - const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); - --- -2.31.1 - diff --git a/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch b/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch deleted file mode 100644 index e5288d6..0000000 --- a/kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 5413b8825db6eecc6f245854a6bce58e4dee3294 Mon Sep 17 00:00:00 2001 -From: "Dr. David Alan Gilbert" -Date: Tue, 7 Feb 2023 17:57:39 +0000 -Subject: [PATCH 20/20] virtio-rng-pci: fix transitional migration compat for - vectors - -RH-Author: Dr. David Alan Gilbert -RH-MergeRequest: 147: virtio-rng-pci: fix transitional migration compat for vectors -RH-Bugzilla: 2162569 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Thomas Huth -RH-Acked-by: Gerd Hoffmann -RH-Commit: [1/1] 6e2bd111cd56808fccf2c0464a40f7784fd893a2 (dagrh/c-9-s-qemu-kvm) - -In upstream bad9c5a5166/downstream 46e08bafe9ed I fixed the virito-rng-pci -migration compatibility, but it was discovered that we also need to fix -the other aliases of the device for the transitional cases. - -I've sent upstream: -https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg01926.html -but downstream we need to change the downstream machine type anyway, -so it's not quite identical. - -Fixes: 9ea02e8f1 ('virtio-rng-pci: Allow setting nvectors, so we can use MSI-X') - -Signed-off-by: Dr. David Alan Gilbert ---- - hw/core/machine.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 7adbac6f87..3ee638394b 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -58,6 +58,9 @@ GlobalProperty hw_compat_rhel_9_1[] = { - { "virtio-device", "queue_reset", "false" }, - /* hw_compat_rhel_9_1 bz 2155749 */ - { "virtio-rng-pci", "vectors", "0" }, -+ /* hw_compat_rhel_9_1 bz 2162569 */ -+ { "virtio-rng-pci-transitional", "vectors", "0" }, -+ { "virtio-rng-pci-non-transitional", "vectors", "0" }, - }; - const size_t hw_compat_rhel_9_1_len = G_N_ELEMENTS(hw_compat_rhel_9_1); - --- -2.31.1 - diff --git a/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch b/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch deleted file mode 100644 index c951897..0000000 --- a/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch +++ /dev/null @@ -1,325 +0,0 @@ -From c64027b1ff9856031c01009f4b5c3560d92cc998 Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 21 Feb 2023 16:22:18 -0500 -Subject: [PATCH 03/12] virtio-scsi: reset SCSI devices from main loop thread - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 155: virtio-scsi: reset SCSI devices from main loop thread -RH-Bugzilla: 2155748 -RH-Acked-by: Eric Blake -RH-Acked-by: Kevin Wolf -RH-Acked-by: Laszlo Ersek -RH-Commit: [3/3] 2a29cb9600709a799daadb4addb58a747ed2e3a3 (stefanha/centos-stream-qemu-kvm) - -When an IOThread is configured, the ctrl virtqueue is processed in the -IOThread. TMFs that reset SCSI devices are currently called directly -from the IOThread and trigger an assertion failure in blk_drain() from -the following call stack: - -virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset --> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain - - ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed. - -The blk_drain() function is not designed to be called from an IOThread -because it needs the Big QEMU Lock (BQL). - -This patch defers TMFs that reset SCSI devices to a Bottom Half (BH) -that runs in the main loop thread under the BQL. This way it's safe to -call blk_drain() and the assertion failure is avoided. - -Introduce s->tmf_bh_list for tracking TMF requests that have been -deferred to the BH. When the BH runs it will grab the entire list and -process all requests. Care must be taken to clear the list when the -virtio-scsi device is reset or unrealized. Otherwise deferred TMF -requests could execute later and lead to use-after-free or other -undefined behavior. - -The s->resetting counter that's used by TMFs that reset SCSI devices is -accessed from multiple threads. This patch makes that explicit by using -atomic accessor functions. With this patch applied the counter is only -modified by the main loop thread under the BQL but can be read by any -thread. - -Reported-by: Qing Wang -Cc: Paolo Bonzini -Reviewed-by: Eric Blake -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230221212218.1378734-4-stefanha@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5) -Signed-off-by: Stefan Hajnoczi ---- - hw/scsi/virtio-scsi.c | 169 +++++++++++++++++++++++++------- - include/hw/virtio/virtio-scsi.h | 11 ++- - 2 files changed, 143 insertions(+), 37 deletions(-) - -diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c -index 6f6e2e32ba..7d27e4c2a1 100644 ---- a/hw/scsi/virtio-scsi.c -+++ b/hw/scsi/virtio-scsi.c -@@ -42,13 +42,11 @@ typedef struct VirtIOSCSIReq { - QEMUSGList qsgl; - QEMUIOVector resp_iov; - -- union { -- /* Used for two-stage request submission */ -- QTAILQ_ENTRY(VirtIOSCSIReq) next; -+ /* Used for two-stage request submission and TMFs deferred to BH */ -+ QTAILQ_ENTRY(VirtIOSCSIReq) next; - -- /* Used for cancellation of request during TMFs */ -- int remaining; -- }; -+ /* Used for cancellation of request during TMFs */ -+ int remaining; - - SCSIRequest *sreq; - size_t resp_size; -@@ -293,6 +291,122 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d) - } - } - -+static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) -+{ -+ VirtIOSCSI *s = req->dev; -+ SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); -+ BusChild *kid; -+ int target; -+ -+ switch (req->req.tmf.subtype) { -+ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: -+ if (!d) { -+ req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; -+ goto out; -+ } -+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { -+ req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN; -+ goto out; -+ } -+ qatomic_inc(&s->resetting); -+ device_cold_reset(&d->qdev); -+ qatomic_dec(&s->resetting); -+ break; -+ -+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: -+ target = req->req.tmf.lun[1]; -+ qatomic_inc(&s->resetting); -+ -+ rcu_read_lock(); -+ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { -+ SCSIDevice *d1 = SCSI_DEVICE(kid->child); -+ if (d1->channel == 0 && d1->id == target) { -+ device_cold_reset(&d1->qdev); -+ } -+ } -+ rcu_read_unlock(); -+ -+ qatomic_dec(&s->resetting); -+ break; -+ -+ default: -+ g_assert_not_reached(); -+ break; -+ } -+ -+out: -+ object_unref(OBJECT(d)); -+ -+ virtio_scsi_acquire(s); -+ virtio_scsi_complete_req(req); -+ virtio_scsi_release(s); -+} -+ -+/* Some TMFs must be processed from the main loop thread */ -+static void virtio_scsi_do_tmf_bh(void *opaque) -+{ -+ VirtIOSCSI *s = opaque; -+ QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); -+ VirtIOSCSIReq *req; -+ VirtIOSCSIReq *tmp; -+ -+ GLOBAL_STATE_CODE(); -+ -+ virtio_scsi_acquire(s); -+ -+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -+ QTAILQ_INSERT_TAIL(&reqs, req, next); -+ } -+ -+ qemu_bh_delete(s->tmf_bh); -+ s->tmf_bh = NULL; -+ -+ virtio_scsi_release(s); -+ -+ QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { -+ QTAILQ_REMOVE(&reqs, req, next); -+ virtio_scsi_do_one_tmf_bh(req); -+ } -+} -+ -+static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) -+{ -+ VirtIOSCSIReq *req; -+ VirtIOSCSIReq *tmp; -+ -+ GLOBAL_STATE_CODE(); -+ -+ virtio_scsi_acquire(s); -+ -+ if (s->tmf_bh) { -+ qemu_bh_delete(s->tmf_bh); -+ s->tmf_bh = NULL; -+ } -+ -+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { -+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); -+ -+ /* SAM-6 6.3.2 Hard reset */ -+ req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; -+ virtio_scsi_complete_req(req); -+ } -+ -+ virtio_scsi_release(s); -+} -+ -+static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) -+{ -+ VirtIOSCSI *s = req->dev; -+ -+ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); -+ -+ if (!s->tmf_bh) { -+ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); -+ qemu_bh_schedule(s->tmf_bh); -+ } -+} -+ - /* Return 0 if the request is ready to be completed and return to guest; - * -EINPROGRESS if the request is submitted and will be completed later, in the - * case of async cancellation. */ -@@ -300,8 +414,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) - { - SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); - SCSIRequest *r, *next; -- BusChild *kid; -- int target; - int ret = 0; - - virtio_scsi_ctx_check(s, d); -@@ -358,15 +470,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) - break; - - case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: -- if (!d) { -- goto fail; -- } -- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { -- goto incorrect_lun; -- } -- s->resetting++; -- device_cold_reset(&d->qdev); -- s->resetting--; -+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: -+ virtio_scsi_defer_tmf_to_bh(req); -+ ret = -EINPROGRESS; - break; - - case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: -@@ -409,22 +515,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) - } - break; - -- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: -- target = req->req.tmf.lun[1]; -- s->resetting++; -- -- rcu_read_lock(); -- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { -- SCSIDevice *d1 = SCSI_DEVICE(kid->child); -- if (d1->channel == 0 && d1->id == target) { -- device_cold_reset(&d1->qdev); -- } -- } -- rcu_read_unlock(); -- -- s->resetting--; -- break; -- - case VIRTIO_SCSI_T_TMF_CLEAR_ACA: - default: - req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; -@@ -654,7 +744,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r) - if (!req) { - return; - } -- if (req->dev->resetting) { -+ if (qatomic_read(&req->dev->resetting)) { - req->resp.cmd.response = VIRTIO_SCSI_S_RESET; - } else { - req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED; -@@ -830,9 +920,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev) - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); - - assert(!s->dataplane_started); -- s->resetting++; -+ -+ virtio_scsi_reset_tmf_bh(s); -+ -+ qatomic_inc(&s->resetting); - bus_cold_reset(BUS(&s->bus)); -- s->resetting--; -+ qatomic_dec(&s->resetting); - - vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; - vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; -@@ -1052,6 +1145,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) - VirtIOSCSI *s = VIRTIO_SCSI(dev); - Error *err = NULL; - -+ QTAILQ_INIT(&s->tmf_bh_list); -+ - virtio_scsi_common_realize(dev, - virtio_scsi_handle_ctrl, - virtio_scsi_handle_event, -@@ -1089,6 +1184,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) - { - VirtIOSCSI *s = VIRTIO_SCSI(dev); - -+ virtio_scsi_reset_tmf_bh(s); -+ - qbus_set_hotplug_handler(BUS(&s->bus), NULL); - virtio_scsi_common_unrealize(dev); - } -diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h -index a36aad9c86..1c1cd77d6e 100644 ---- a/include/hw/virtio/virtio-scsi.h -+++ b/include/hw/virtio/virtio-scsi.h -@@ -75,13 +75,22 @@ struct VirtIOSCSICommon { - VirtQueue **cmd_vqs; - }; - -+struct VirtIOSCSIReq; -+ - struct VirtIOSCSI { - VirtIOSCSICommon parent_obj; - - SCSIBus bus; -- int resetting; -+ int resetting; /* written from main loop thread, read from any thread */ - bool events_dropped; - -+ /* -+ * TMFs deferred to main loop BH. These fields are protected by -+ * virtio_scsi_acquire(). -+ */ -+ QEMUBH *tmf_bh; -+ QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; -+ - /* Fields for dataplane below */ - AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ - --- -2.39.1 - diff --git a/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch b/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch deleted file mode 100644 index d797023..0000000 --- a/kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 3f55d12df35552ae948587a62d6f9015664adc13 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:12 +0100 -Subject: [PATCH 1/9] virtio_net: Modify virtio_net_get_config to early return -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [1/4] 4f5e79afd54e157f32e6fff56ae33e2b71492525 (eperezmartin/qemu-kvm) - -Next patches introduce more code on vhost-vdpa branch, with already have -too much indentation. - -Signed-off-by: Eugenio Pérez -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Michael S. Tsirkin -Acked-by: Jason Wang -Message-Id: <20221221115015.1400889-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit ebc141a62508dc91901373c1a19fe7e2cf560dfb) ---- - hw/net/virtio-net.c | 28 +++++++++++++++------------- - 1 file changed, 15 insertions(+), 13 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index ec974f7a76..5935e55653 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -168,20 +168,22 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) - if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { - ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg, - n->config_size); -- if (ret != -1) { -- /* -- * Some NIC/kernel combinations present 0 as the mac address. As -- * that is not a legal address, try to proceed with the -- * address from the QEMU command line in the hope that the -- * address has been configured correctly elsewhere - just not -- * reported by the device. -- */ -- if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { -- info_report("Zero hardware mac address detected. Ignoring."); -- memcpy(netcfg.mac, n->mac, ETH_ALEN); -- } -- memcpy(config, &netcfg, n->config_size); -+ if (ret == -1) { -+ return; - } -+ -+ /* -+ * Some NIC/kernel combinations present 0 as the mac address. As that -+ * is not a legal address, try to proceed with the address from the -+ * QEMU command line in the hope that the address has been configured -+ * correctly elsewhere - just not reported by the device. -+ */ -+ if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) { -+ info_report("Zero hardware mac address detected. Ignoring."); -+ memcpy(netcfg.mac, n->mac, ETH_ALEN); -+ } -+ -+ memcpy(config, &netcfg, n->config_size); - } - } - --- -2.31.1 - diff --git a/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch b/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch deleted file mode 100644 index 866957c..0000000 --- a/kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch +++ /dev/null @@ -1,46 +0,0 @@ -From b3d728b53abaae0c9884dfb5e9c216b1088196e3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 21 Dec 2022 12:50:13 +0100 -Subject: [PATCH 2/9] virtio_net: copy VIRTIO_NET_S_ANNOUNCE if device model - has it -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 137: vDPA net SVQ guest announce support -RH-Bugzilla: 2141088 -RH-Acked-by: Laurent Vivier -RH-Acked-by: Cindy Lu -RH-Acked-by: Jason Wang -RH-Commit: [2/4] fb04186829eb93bab3c9ececf90fa5b035ffa2ec (eperezmartin/qemu-kvm) - -Status part of the emulated feature. It will follow device model, so we -must copy it as long as NIC device model has it set. - -Signed-off-by: Eugenio Pérez -Message-Id: <20221221115015.1400889-3-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang -(cherry picked from commit 4f93aafc8f9d731c6588f5dc5594c6a1dd1fbe66) ---- - hw/net/virtio-net.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 5935e55653..948bcf33cf 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -183,6 +183,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) - memcpy(netcfg.mac, n->mac, ETH_ALEN); - } - -+ netcfg.status |= virtio_tswap16(vdev, -+ n->status & VIRTIO_NET_S_ANNOUNCE); - memcpy(config, &netcfg, n->config_size); - } - } --- -2.31.1 - diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 7b64c6d..a3315a4 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -147,8 +147,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 7.2.0 -Release: 14%{?rcrel}%{?dist}%{?cc_suffix} +Version: 8.0.0 +Release: 1%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -184,238 +184,11 @@ Patch0011: 0011-Enable-make-check.patch Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0013: 0013-Add-support-statement-to-help-output.patch Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch -Patch0015: 0015-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch -Patch0016: 0016-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0018: 0018-Addd-7.2-compat-bits-for-RHEL-9.1-machine-type.patch -Patch0019: 0019-redhat-Update-s390x-machine-type-compatibility-for-Q.patch -Patch0020: 0020-redhat-aarch64-add-rhel9.2.0-virt-machine-type.patch -Patch0021: 0021-redhat-Add-new-rhel-9.2.0-s390x-machine-type.patch -Patch0022: 0022-x86-rhel-9.2.0-machine-type.patch -Patch23: kvm-redhat-fix-virt-rhel9.2.0-compat-props.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch24: kvm-hw-arm-virt-Introduce-virt_set_high_memmap-helper.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch25: kvm-hw-arm-virt-Rename-variable-size-to-region_size-in-v.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch26: kvm-hw-arm-virt-Introduce-variable-region_base-in-virt_s.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch27: kvm-hw-arm-virt-Introduce-virt_get_high_memmap_enabled-h.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch28: kvm-hw-arm-virt-Improve-high-memory-region-address-assig.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch29: kvm-hw-arm-virt-Add-compact-highmem-property.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch30: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch -# For bz#2113840 - [RHEL9.2] Memory mapping optimization for virt machine -Patch31: kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch -# For bz#2155749 - [regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X -Patch32: kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch33: kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch34: kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch35: kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch36: kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch37: kvm-vhost-vdpa-add-support-for-config-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch38: kvm-virtio-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch39: kvm-vhost-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch40: kvm-virtio-net-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch41: kvm-virtio-mmio-add-support-for-configure-interrupt.patch -# For bz#1905805 - support config interrupt in vhost-vdpa qemu -Patch42: kvm-virtio-pci-add-support-for-configure-interrupt.patch -# For bz#2159408 - [s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8 -Patch43: kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch -# For bz#2124856 - VM with virtio interface and iommu=on will crash when try to migrate -Patch44: kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch45: kvm-block-drop-bdrv_remove_filter_or_cow_child.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch46: kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch47: kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch48: kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch49: kvm-block-Remove-drained_end_counter.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch50: kvm-block-Inline-bdrv_drain_invoke.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch51: kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch52: kvm-block-Drain-individual-nodes-during-reopen.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch53: kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch54: kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch55: kvm-block-Remove-subtree-drains.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch56: kvm-block-Call-drain-callbacks-only-once.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch57: kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch58: kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch59: kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch -# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch60: kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch -# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on -Patch61: kvm-accel-introduce-accelerator-blocker-API.patch -# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on -Patch62: kvm-KVM-keep-track-of-running-ioctls.patch -# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on -Patch63: kvm-kvm-Atomic-memslot-updates.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch64: kvm-virtio_net-Modify-virtio_net_get_config-to-early-ret.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch65: kvm-virtio_net-copy-VIRTIO_NET_S_ANNOUNCE-if-device-mode.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch66: kvm-vdpa-handle-VIRTIO_NET_CTRL_ANNOUNCE-in-vhost_vdpa_n.patch -# For bz#2141088 - vDPA SVQ guest announce support -Patch67: kvm-vdpa-do-not-handle-VIRTIO_NET_F_GUEST_ANNOUNCE-in-vh.patch -# For bz#2122523 - Secure guest can't boot with maximal number of vcpus (248) -Patch68: kvm-s390x-pv-Implement-a-CGS-check-helper.patch -# For bz#2163701 - [s390x] VM fails to start with ISM passed through -Patch69: kvm-s390x-pci-coalesce-unmap-operations.patch -# For bz#2163701 - [s390x] VM fails to start with ISM passed through -Patch70: kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch -# For bz#2163701 - [s390x] VM fails to start with ISM passed through -Patch71: kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch -# For bz#2149191 - [RFE][guest-agent] - USB bus type support -Patch72: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch73: kvm-vdpa-use-v-shadow_vqs_enabled-in-vhost_vdpa_svqs_sta.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch74: kvm-vhost-set-SVQ-device-call-handler-at-SVQ-start.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch75: kvm-vhost-allocate-SVQ-device-file-descriptors-at-device.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch76: kvm-vhost-move-iova_tree-set-to-vhost_svq_start.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch77: kvm-vdpa-add-vhost_vdpa_net_valid_svq_features.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch78: kvm-vdpa-request-iova_range-only-once.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch79: kvm-vdpa-move-SVQ-vring-features-check-to-net.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch80: kvm-vdpa-allocate-SVQ-array-unconditionally.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch81: kvm-vdpa-add-asid-parameter-to-vhost_vdpa_dma_map-unmap.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch82: kvm-vdpa-store-x-svq-parameter-in-VhostVDPAState.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch83: kvm-vdpa-add-shadow_data-to-vhost_vdpa.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch84: kvm-vdpa-always-start-CVQ-in-SVQ-mode-if-possible.patch -# For bz#2104412 - vDPA ASID support in Qemu -Patch85: kvm-vdpa-fix-VHOST_BACKEND_F_IOTLB_ASID-flag-check.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch86: kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch87: kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch88: kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch -# For bz#2150180 - qemu-img finishes successfully while having errors in commit or bitmaps operations -Patch89: kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch -# For bz#2165280 - [kvm-unit-tests] debug-wp-migration fails -Patch90: kvm-accel-tcg-Test-CPUJumpCache-in-tb_jmp_cache_clear_pa.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch91: kvm-block-Improve-empty-format-specific-info-dump.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch92: kvm-block-file-Add-file-specific-image-info.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch93: kvm-block-vmdk-Change-extent-info-type.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch94: kvm-block-Split-BlockNodeInfo-off-of-ImageInfo.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch95: kvm-qemu-img-Use-BlockNodeInfo.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch96: kvm-block-qapi-Let-bdrv_query_image_info-recurse.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch97: kvm-block-qapi-Introduce-BlockGraphInfo.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch98: kvm-block-qapi-Add-indentation-to-bdrv_node_info_dump.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch99: kvm-iotests-Filter-child-node-information.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch100: kvm-iotests-106-214-308-Read-only-one-size-line.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch101: kvm-qemu-img-Let-info-print-block-graph.patch -# For bz#1860292 - RFE: add extent_size_hint information to qemu-img info -Patch102: kvm-qemu-img-Change-info-key-names-for-protocol-nodes.patch -# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace -Patch103: kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch -# For bz#2155173 - [vhost-user] unable to start vhost net: 71: falling back on userspace -Patch104: kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch -# For bz#2162569 - [transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2 -Patch105: kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch -# For bz#2169232 - RFE: reconnect option for stream socket back-end -Patch106: kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch -# For bz#2169232 - RFE: reconnect option for stream socket back-end -Patch107: kvm-net-stream-add-a-new-option-to-automatically-reconne.patch -# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall -Patch108: kvm-linux-headers-Update-to-v6.1.patch -# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall -Patch109: kvm-util-userfaultfd-Add-uffd_open.patch -# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall -Patch110: kvm-util-userfaultfd-Support-dev-userfaultfd.patch -# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race -Patch111: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch -# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race -Patch112: kvm-migration-check-magic-value-for-deciding-the-mapping.patch -# For bz#2168172 - [s390x] qemu-kvm coredumps when SE crashes -Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch -# For bz#2168209 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) -Patch114: kvm-block-temporarily-hold-the-new-AioContext-of-bs_top-.patch -# For bz#2169904 - [SVVP] job 'Check SMBIOS Table Specific Requirements' failed on win2022 -Patch115: kvm-hw-smbios-fix-field-corruption-in-type-4-table.patch -# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed -Patch116: kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch -# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed -Patch117: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch -# For bz#2155748 - qemu crash on void blk_drain(BlockBackend *): Assertion qemu_in_main_thread() failed -Patch118: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch119: kvm-qatomic-add-smp_mb__before-after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch120: kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch121: kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch122: kvm-edu-add-smp_mb__after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch123: kvm-aio-wait-switch-to-smp_mb__after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch124: kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch125: kvm-physmem-add-missing-memory-barrier.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch126: kvm-async-update-documentation-of-the-memory-barriers.patch -# For bz#2175660 - Guest hangs when starting or rebooting -Patch127: kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch128: kvm-target-i386-fix-operand-size-of-unary-SSE-operations.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch129: kvm-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch130: kvm-target-i386-Fix-BEXTR-instruction.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch131: kvm-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch132: kvm-target-i386-fix-ADOX-followed-by-ADCX.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch133: kvm-target-i386-Fix-32-bit-AD-CO-X-insns-in-64-bit-mode.patch -# For bz#2173590 - bugs in emulation of BMI instructions (for libguestfs without KVM) -Patch134: kvm-target-i386-Fix-BZHI-instruction.patch -# For bz#2156876 - [virtual network][rhel7.9_guest] qemu-kvm: vhost vring error in virtqueue 1: Invalid argument (22) -Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch +Patch0015: 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +Patch0016: 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +Patch0017: 0017-Add-RHEL-9.2.0-compat-structure.patch +Patch0018: 0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch +Patch0019: 0019-Disable-unwanted-new-devices.patch %if %{have_clang} BuildRequires: clang @@ -734,6 +507,7 @@ ulimit -n 10240 --disable-auth-pam \\\ --disable-avx2 \\\ --disable-avx512f \\\ + --disable-avx512bw \\\ --disable-blkio \\\ --disable-block-drv-whitelist-in-tools \\\ --disable-bochs \\\ @@ -777,6 +551,7 @@ ulimit -n 10240 --disable-kvm \\\ --disable-l2tpv3 \\\ --disable-libdaxctl \\\ + --disable-libdw \\\ --disable-libiscsi \\\ --disable-libnfs \\\ --disable-libpmem \\\ @@ -848,7 +623,6 @@ ulimit -n 10240 --disable-vhost-vdpa \\\ --disable-virglrenderer \\\ --disable-virtfs \\\ - --disable-virtiofsd \\\ --disable-vnc \\\ --disable-vnc-jpeg \\\ --disable-png \\\ @@ -883,7 +657,7 @@ run_configure() { --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ --meson="%{__meson}" \ - --enable-trace-backend=dtrace \ + --enable-trace-backends=dtrace \ --with-coroutine=ucontext \ --with-git=git \ --tls-priority=@QEMU,SYSTEM \ @@ -960,7 +734,6 @@ run_configure \ --enable-usb-redir \ %endif --enable-vdi \ - --enable-virtiofsd \ --enable-vhost-kernel \ --enable-vhost-net \ --enable-vhost-user \ @@ -1096,10 +869,6 @@ popd mkdir -p %{buildroot}%{_datadir}/systemtap/tapset -# Move vhost-user JSON files to the standard "qemu" directory -mkdir -p %{buildroot}%{_datadir}/qemu -mv %{buildroot}%{_datadir}/%{name}/vhost-user %{buildroot}%{_datadir}/qemu/ - install -m 0755 %{qemu_kvm_build}/%{kvm_target}-softmmu/qemu-system-%{kvm_target} %{buildroot}%{_libexecdir}/qemu-kvm install -m 0644 %{qemu_kvm_build}/qemu-kvm.stp %{buildroot}%{_datadir}/systemtap/tapset/ install -m 0644 %{qemu_kvm_build}/qemu-kvm-log.stp %{buildroot}%{_datadir}/systemtap/tapset/ @@ -1255,17 +1024,13 @@ rm -rf %{buildroot}%{qemudocdir}/specs # endif !tools_only %endif -# Remove virtiofsd (we use separate package for virtiofsd) -rm -rf %{buildroot}%{_mandir}/man1/virtiofsd.1* -rm -rf %{buildroot}%{_libexecdir}/virtiofsd -rm -rf %{buildroot}%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json - %check %if !%{tools_only} pushd %{qemu_kvm_build} echo "Testing %{name}-build" -%make_build check +#%make_build check +make V=1 check popd # endif !tools_only @@ -1446,6 +1211,11 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Apr 20 2023 Miroslav Rezanina - 8.0.0-1 +- Rebase to QEMU 8.0.0 +- Resolves: bz#2180898 + (Rebase to QEMU 8.0.0 for RHEL 9.3.0) + * Mon Mar 20 2023 Miroslav Rezanina - 7.2.0-14 - Rebuild for 9.2 release - Resolves: bz#2173590 diff --git a/sources b/sources index c45f059..b5ace58 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-7.2.0.tar.xz) = f3cfa00da739ba819a218d7e6e95c77fb79a8e0f487b024ddd281602e785249b81144595e3f8c746c32a4f5c4d1a88c6aebae3c162603edfbb50ae3722d7ed13 +SHA512 (qemu-8.0.0.tar.xz) = 1f31d1e653dec2d35f1b7a5468ee3f471553b48eca8c8afafffcf9243c6b2260e78a5b73da3fe567f9b85d4133573eebd397747b3aec501fb24076263eb07b27 From 0543c20dae5dcb062364759a3888afcdd4a6a9b9 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 8 May 2023 04:55:31 -0400 Subject: [PATCH 190/195] * Mon May 08 2023 Miroslav Rezanina - 8.0.0-2 - kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch [bz#2087047] - kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch [bz#1934134] - kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch [bz#1934134] - Resolves: bz#2087047 (Disk detach is unsuccessful while the guest is still booting) - Resolves: bz#1934134 (ACPI table limits warning when booting guest with 512 VCPUs) --- ...-allow-repeating-hot-unplug-requests.patch | 84 +++++++++++++++ ...i-blobs-as-resizable-on-RHEL-pc-mach.patch | 40 +++++++ ...rning-on-acpi-table-size-to-pc-machi.patch | 101 ++++++++++++++++++ qemu-kvm.spec | 17 ++- 4 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch create mode 100644 kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch create mode 100644 kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch diff --git a/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch b/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch new file mode 100644 index 0000000..b937d27 --- /dev/null +++ b/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch @@ -0,0 +1,84 @@ +From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 18 Apr 2023 11:04:49 +0200 +Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests + +RH-Author: Igor Mammedov +RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests +RH-Bugzilla: 2087047 +RH-Acked-by: Ani Sinha +RH-Acked-by: Julia Suvorova +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam) + +with Q35 using ACPI PCI hotplug by default, user's request to unplug +device is ignored when it's issued before guest OS has been booted. +And any additional attempt to request device hot-unplug afterwards +results in following error: + + "Device XYZ is already in the process of unplug" + +arguably it can be considered as a regression introduced by [2], +before which it was possible to issue unplug request multiple +times. + +Accept new uplug requests after timeout (1ms). This brings ACPI PCI +hotplug on par with native PCIe unplug behavior [1] and allows user +to repeat unplug requests at propper times. +Set expire timeout to arbitrary 1msec so user won't be able to +flood guest with SCI interrupts by calling device_del in tight loop. + +PS: +ACPI spec doesn't mandate what OSPM can do with GPEx.status +bits set before it's booted => it's impl. depended. +Status bits may be retained (I tested with one Windows version) +or cleared (Linux since 2.6 kernel times) during guest's ACPI +subsystem initialization. +Clearing status bits (though not wrong per se) hides the unplug +event from guest, and it's upto user to repeat device_del later +when guest is able to handle unplug requests. + +1) 18416c62e3 ("pcie: expire pending delete") +2) +Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del") +Signed-off-by: Igor Mammedov +Acked-by: Gerd Hoffmann +CC: mst@redhat.com +CC: anisinha@redhat.com +CC: jusual@redhat.com +CC: kraxel@redhat.com +Message-Id: <20230418090449.2155757-1-imammedo@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Ani Sinha +(cherry picked from commit 0f689cf5ada4d5df5ab95c7f7aa9fc221afa855d) +Signed-off-by: Igor Mammedov +--- + hw/acpi/pcihp.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c +index dcfb779a7a..cdd6f775a1 100644 +--- a/hw/acpi/pcihp.c ++++ b/hw/acpi/pcihp.c +@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, + * acpi_pcihp_eject_slot() when the operation is completed. + */ + pdev->qdev.pending_deleted_event = true; ++ /* if unplug was requested before OSPM is initialized, ++ * linux kernel will clear GPE0.sts[] bits during boot, which effectively ++ * hides unplug event. And than followup qmp_device_del() calls remain ++ * blocked by above flag permanently. ++ * Unblock qmp_device_del() by setting expire limit, so user can ++ * repeat unplug request later when OSPM has been booted. ++ */ ++ pdev->qdev.pending_deleted_expires_ms = ++ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */ ++ + s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); + acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); + } +-- +2.39.1 + diff --git a/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch b/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch new file mode 100644 index 0000000..67e702c --- /dev/null +++ b/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch @@ -0,0 +1,40 @@ +From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001 +From: Ani Sinha +Date: Tue, 2 May 2023 15:51:53 +0530 +Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines + version 7.6 and above + +RH-Author: Ani Sinha +RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 +RH-Bugzilla: 1934134 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm) + +Please look at QEMU upstream commit +1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3") +This patch adapts the above change so that it applies to RHEL pc machines of +version 7.6 and newer. These are the machine types that are currently supported +in RHEL. Q35 machines are not affected. + +Signed-off-by: Ani Sinha +--- + hw/i386/pc_piix.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 4d5880e249..6c7be628e1 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; + pcmc->default_nic_model = "e1000"; + pcmc->pci_root_uid = 0; ++ pcmc->resizable_acpi_blob = true; + m->default_display = "std"; + m->no_parallel = 1; + m->numa_mem_supported = true; +-- +2.39.1 + diff --git a/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch b/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch new file mode 100644 index 0000000..e06113a --- /dev/null +++ b/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch @@ -0,0 +1,101 @@ +From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001 +From: Ani Sinha +Date: Wed, 29 Mar 2023 10:27:26 +0530 +Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines + older than version 2.3 + +RH-Author: Ani Sinha +RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 +RH-Bugzilla: 1934134 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm) + +i440fx machine versions 2.3 and newer supports dynamic ram +resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") . +Currently supported all q35 machine types (versions 2.4 and newer) supports +resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table +size exceeds a pre-defined value does not apply to those machine versions. +Add a check limiting the warning message to only those machines that does not +support expandable ram blocks (that is, i440fx machines with version 2.2 +and older). + +Signed-off-by: Ani Sinha +Message-Id: <20230329045726.14028-1-anisinha@redhat.com> +Reviewed-by: Igor Mammedov +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074) +--- + hw/i386/acpi-build.c | 6 ++++-- + hw/i386/pc.c | 1 + + hw/i386/pc_piix.c | 1 + + include/hw/i386/pc.h | 3 +++ + 4 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index ec857a117e..9bc4d8a981 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + int legacy_table_size = + ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, + ACPI_BUILD_ALIGN_SIZE); +- if (tables_blob->len > legacy_table_size) { ++ if ((tables_blob->len > legacy_table_size) && ++ !pcmc->resizable_acpi_blob) { + /* Should happen only with PCI bridges and -M pc-i440fx-2.0. */ + warn_report("ACPI table size %u exceeds %d bytes," + " migration may not work", +@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + g_array_set_size(tables_blob, legacy_table_size); + } else { + /* Make sure we have a buffer in case we need to resize the tables. */ +- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { ++ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) && ++ !pcmc->resizable_acpi_blob) { + /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */ + warn_report("ACPI table size %u exceeds %d bytes," + " migration may not work", +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index f216922cee..7db5a2348f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->acpi_data_size = 0x20000 + 0x8000; + pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; ++ pcmc->resizable_acpi_blob = true; + assert(!mc->get_hotplug_handler); + mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index fc704d783f..4d5880e249 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len); + compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len); + pcmc->rsdp_in_ram = false; ++ pcmc->resizable_acpi_blob = false; + } + + DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index d218ad1628..2f514d13d8 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -130,6 +130,9 @@ struct PCMachineClass { + + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; ++ ++ /* resizable acpi blob compat */ ++ bool resizable_acpi_blob; + }; + + #define TYPE_PC_MACHINE "generic-pc-machine" +-- +2.39.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index a3315a4..89ff0fd 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 1%{?rcrel}%{?dist}%{?cc_suffix} +Release: 2%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -189,6 +189,12 @@ Patch0016: 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch Patch0017: 0017-Add-RHEL-9.2.0-compat-structure.patch Patch0018: 0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch Patch0019: 0019-Disable-unwanted-new-devices.patch +# For bz#2087047 - Disk detach is unsuccessful while the guest is still booting +Patch20: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch +# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs +Patch21: kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch +# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs +Patch22: kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch %if %{have_clang} BuildRequires: clang @@ -1211,6 +1217,15 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon May 08 2023 Miroslav Rezanina - 8.0.0-2 +- kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch [bz#2087047] +- kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch [bz#1934134] +- kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch [bz#1934134] +- Resolves: bz#2087047 + (Disk detach is unsuccessful while the guest is still booting) +- Resolves: bz#1934134 + (ACPI table limits warning when booting guest with 512 VCPUs) + * Thu Apr 20 2023 Miroslav Rezanina - 8.0.0-1 - Rebase to QEMU 8.0.0 - Resolves: bz#2180898 From 4d2081bbd8b575e50a5d2a61acd0b796ec47757c Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 15 May 2023 10:23:54 -0400 Subject: [PATCH 191/195] * Mon May 15 2023 Miroslav Rezanina - 8.0.0-3 - kvm-migration-Handle-block-device-inactivation-failures-.patch [bz#2058982] - kvm-migration-Minor-control-flow-simplification.patch [bz#2058982] - Resolves: bz#2058982 (Qemu core dump if cut off nfs storage during migration) --- ...-block-device-inactivation-failures-.patch | 116 ++++++++++++++++++ ...on-Minor-control-flow-simplification.patch | 52 ++++++++ qemu-kvm.spec | 12 +- 3 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 kvm-migration-Handle-block-device-inactivation-failures-.patch create mode 100644 kvm-migration-Minor-control-flow-simplification.patch diff --git a/kvm-migration-Handle-block-device-inactivation-failures-.patch b/kvm-migration-Handle-block-device-inactivation-failures-.patch new file mode 100644 index 0000000..26c8437 --- /dev/null +++ b/kvm-migration-Handle-block-device-inactivation-failures-.patch @@ -0,0 +1,116 @@ +From 2aac64623d8d2d06d248c1bcc71aa13572fc843c Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 14 Apr 2023 10:33:58 -0500 +Subject: [PATCH 1/2] migration: Handle block device inactivation failures + better + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/2] 5ae143c9234f6eee9fc5154944172bcd56975b36 (ebblake/centos-qemu-kvm) + +Consider what happens when performing a migration between two host +machines connected to an NFS server serving multiple block devices to +the guest, when the NFS server becomes unavailable. The migration +attempts to inactivate all block devices on the source (a necessary +step before the destination can take over); but if the NFS server is +non-responsive, the attempt to inactivate can itself fail. When that +happens, the destination fails to get the migrated guest (good, +because the source wasn't able to flush everything properly): + + (qemu) qemu-kvm: load of migration failed: Input/output error + +at which point, our only hope for the guest is for the source to take +back control. With the current code base, the host outputs a message, but then appears to resume: + + (qemu) qemu-kvm: qemu_savevm_state_complete_precopy_non_iterable: bdrv_inactivate_all() failed (-1) + + (src qemu)info status + VM status: running + +but a second migration attempt now asserts: + + (src qemu) qemu-kvm: ../block.c:6738: int bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & BDRV_O_INACTIVE)' failed. + +Whether the guest is recoverable on the source after the first failure +is debatable, but what we do not want is to have qemu itself fail due +to an assertion. It looks like the problem is as follows: + +In migration.c:migration_completion(), the source sets 'inactivate' to +true (since COLO is not enabled), then tries +savevm.c:qemu_savevm_state_complete_precopy() with a request to +inactivate block devices. In turn, this calls +block.c:bdrv_inactivate_all(), which fails when flushing runs up +against the non-responsive NFS server. With savevm failing, we are +now left in a state where some, but not all, of the block devices have +been inactivated; but migration_completion() then jumps to 'fail' +rather than 'fail_invalidate' and skips an attempt to reclaim those +those disks by calling bdrv_activate_all(). Even if we do attempt to +reclaim disks, we aren't taking note of failure there, either. + +Thus, we have reached a state where the migration engine has forgotten +all state about whether a block device is inactive, because we did not +set s->block_inactive in enough places; so migration allows the source +to reach vm_start() and resume execution, violating the block layer +invariant that the guest CPUs should not be restarted while a device +is inactive. Note that the code in migration.c:migrate_fd_cancel() +will also try to reactivate all block devices if s->block_inactive was +set, but because we failed to set that flag after the first failure, +the source assumes it has reclaimed all devices, even though it still +has remaining inactivated devices and does not try again. Normally, +qmp_cont() will also try to reactivate all disks (or correctly fail if +the disks are not reclaimable because NFS is not yet back up), but the +auto-resumption of the source after a migration failure does not go +through qmp_cont(). And because we have left the block layer in an +inconsistent state with devices still inactivated, the later migration +attempt is hitting the assertion failure. + +Since it is important to not resume the source with inactive disks, +this patch marks s->block_inactive before attempting inactivation, +rather than after succeeding, in order to prevent any vm_start() until +it has successfully reactivated all devices. + +See also https://bugzilla.redhat.com/show_bug.cgi?id=2058982 + +Signed-off-by: Eric Blake +Reviewed-by: Juan Quintela +Acked-by: Lukas Straub +Tested-by: Lukas Straub +Signed-off-by: Juan Quintela +(cherry picked from commit 403d18ae384239876764bbfa111d6cc5dcb673d1) +--- + migration/migration.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index bda4789193..cb0d42c061 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3444,13 +3444,11 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { ++ s->block_inactive = inactivate; + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + inactivate); + } +- if (inactivate && ret >= 0) { +- s->block_inactive = true; +- } + } + qemu_mutex_unlock_iothread(); + +@@ -3522,6 +3520,7 @@ fail_invalidate: + bdrv_activate_all(&local_err); + if (local_err) { + error_report_err(local_err); ++ s->block_inactive = true; + } else { + s->block_inactive = false; + } +-- +2.39.1 + diff --git a/kvm-migration-Minor-control-flow-simplification.patch b/kvm-migration-Minor-control-flow-simplification.patch new file mode 100644 index 0000000..a0dbdd9 --- /dev/null +++ b/kvm-migration-Minor-control-flow-simplification.patch @@ -0,0 +1,52 @@ +From c3bc974ea4b5186a76daa433209c1209d94dd0b7 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 20 Apr 2023 09:35:51 -0500 +Subject: [PATCH 2/2] migration: Minor control flow simplification + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [2/2] 5afd8c25d6f14bdb2a380ecc77bc6c2f2a26df87 (ebblake/centos-qemu-kvm) + +No need to declare a temporary variable. + +Suggested-by: Juan Quintela +Fixes: 1df36e8c6289 ("migration: Handle block device inactivation failures better") +Signed-off-by: Eric Blake +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 5d39f44d7ac5c63f53d4d0900ceba9521bc27e49) +--- + migration/migration.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index cb0d42c061..08007cef4e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3436,7 +3436,6 @@ static void migration_completion(MigrationState *s) + ret = global_state_store(); + + if (!ret) { +- bool inactivate = !migrate_colo_enabled(); + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + trace_migration_completion_vm_stop(ret); + if (ret >= 0) { +@@ -3444,10 +3443,10 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { +- s->block_inactive = inactivate; ++ s->block_inactive = !migrate_colo_enabled(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, +- inactivate); ++ s->block_inactive); + } + } + qemu_mutex_unlock_iothread(); +-- +2.39.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 89ff0fd..4cb63d3 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 2%{?rcrel}%{?dist}%{?cc_suffix} +Release: 3%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -195,6 +195,10 @@ Patch20: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch Patch21: kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch # For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs Patch22: kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch23: kvm-migration-Handle-block-device-inactivation-failures-.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch24: kvm-migration-Minor-control-flow-simplification.patch %if %{have_clang} BuildRequires: clang @@ -1217,6 +1221,12 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon May 15 2023 Miroslav Rezanina - 8.0.0-3 +- kvm-migration-Handle-block-device-inactivation-failures-.patch [bz#2058982] +- kvm-migration-Minor-control-flow-simplification.patch [bz#2058982] +- Resolves: bz#2058982 + (Qemu core dump if cut off nfs storage during migration) + * Mon May 08 2023 Miroslav Rezanina - 8.0.0-2 - kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch [bz#2087047] - kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch [bz#1934134] From 6ee9df2871b65e4d9a024042d587a4a3d12e396d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 22 May 2023 01:24:31 -0400 Subject: [PATCH 192/195] * Mon May 22 2023 Miroslav Rezanina - 8.0.0-4 - kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch [bz#2058982] - kvm-util-mmap-alloc-qemu_fd_getfs.patch [bz#2057267] - kvm-vl.c-Create-late-backends-before-migration-object.patch [bz#2057267] - kvm-migration-postcopy-Detect-file-system-on-dest-host.patch [bz#2057267] - kvm-migration-mark-mixed-functions-that-can-suspend.patch [bz#2057267] - kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch [bz#2057267] - kvm-migration-remove-extra-whitespace-character-for-code.patch [bz#2057267] - kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch [bz#2057267] - kvm-migration-Update-atomic-stats-out-of-the-mutex.patch [bz#2057267] - kvm-migration-Make-multifd_bytes-atomic.patch [bz#2057267] - kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch [bz#2057267] - kvm-migration-Make-precopy_bytes-atomic.patch [bz#2057267] - kvm-migration-Make-downtime_bytes-atomic.patch [bz#2057267] - kvm-migration-Make-dirty_sync_count-atomic.patch [bz#2057267] - kvm-migration-Make-postcopy_requests-atomic.patch [bz#2057267] - kvm-migration-Rename-duplicate-to-zero_pages.patch [bz#2057267] - kvm-migration-Rename-normal-to-normal_pages.patch [bz#2057267] - kvm-migration-rename-enabled_capabilities-to-capabilitie.patch [bz#2057267] - kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch [bz#2057267] - kvm-migration-move-migration_global_dump-to-migration-hm.patch [bz#2057267] - kvm-spice-move-client_migrate_info-command-to-ui.patch [bz#2057267] - kvm-migration-Create-migrate_cap_set.patch [bz#2057267] - kvm-migration-Create-options.c.patch [bz#2057267] - kvm-migration-Move-migrate_colo_enabled-to-options.c.patch [bz#2057267] - kvm-migration-Move-migrate_use_compression-to-options.c.patch [bz#2057267] - kvm-migration-Move-migrate_use_events-to-options.c.patch [bz#2057267] - kvm-migration-Move-migrate_use_multifd-to-options.c.patch [bz#2057267] - kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch [bz#2057267] - kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch [bz#2057267] - kvm-migration-Move-migrate_use_block-to-options.c.patch [bz#2057267] - kvm-migration-Move-migrate_use_return-to-options.c.patch [bz#2057267] - kvm-migration-Create-migrate_rdma_pin_all-function.patch [bz#2057267] - kvm-migration-Move-migrate_caps_check-to-options.c.patch [bz#2057267] - kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch [bz#2057267] - kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch [bz#2057267] - kvm-migration-Move-migrate_cap_set-to-options.c.patch [bz#2057267] - kvm-migration-Move-parameters-functions-to-option.c.patch [bz#2057267] - kvm-migration-Use-migrate_max_postcopy_bandwidth.patch [bz#2057267] - kvm-migration-Move-migrate_use_block_incremental-to-opti.patch [bz#2057267] - kvm-migration-Create-migrate_throttle_trigger_threshold.patch [bz#2057267] - kvm-migration-Create-migrate_checkpoint_delay.patch [bz#2057267] - kvm-migration-Create-migrate_max_cpu_throttle.patch [bz#2057267] - kvm-migration-Move-migrate_announce_params-to-option.c.patch [bz#2057267] - kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch [bz#2057267] - kvm-migration-Create-migrate_cpu_throttle_increment-func.patch [bz#2057267] - kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch [bz#2057267] - kvm-migration-Move-migrate_postcopy-to-options.c.patch [bz#2057267] - kvm-migration-Create-migrate_max_bandwidth-function.patch [bz#2057267] - kvm-migration-Move-migrate_use_tls-to-options.c.patch [bz#2057267] - kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch [bz#2057267] - kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch [bz#2057267] - kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch [bz#2185688] - kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch [bz#2185688] - kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch [bz#2185688] - kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch [bz#2185688] - kvm-Enable-Linux-io_uring.patch [bz#1947230] - Resolves: bz#2058982 (Qemu core dump if cut off nfs storage during migration) - Resolves: bz#2057267 (Migration with postcopy fail when vm set with shared memory) - Resolves: bz#2185688 ([qemu-kvm] no response with QMP command block_resize) - Resolves: bz#1947230 (Enable QEMU support for io_uring in RHEL9) --- ...-no_coroutine_fns-in-qmp_block_resiz.patch | 56 ++ ...o_unref-for-calls-in-coroutine-conte.patch | 386 +++++++ ...sizing-image-attached-to-an-iothread.patch | 132 +++ ...rnative-CPU-type-that-is-not-depreca.patch | 44 + ...postcopy_ram_supported_by_host-to-re.patch | 308 ++++++ ...t-disk-reactivation-in-more-failure-.patch | 111 +++ kvm-migration-Create-migrate_cap_set.patch | 93 ++ ...tion-Create-migrate_checkpoint_delay.patch | 84 ++ ...-migrate_cpu_throttle_increment-func.patch | 75 ++ ...-migrate_cpu_throttle_initial-to-opt.patch | 75 ++ ...-migrate_cpu_throttle_tailslow-funct.patch | 78 ++ ...reate-migrate_max_bandwidth-function.patch | 232 +++++ ...tion-Create-migrate_max_cpu_throttle.patch | 88 ++ ...Create-migrate_rdma_pin_all-function.patch | 95 ++ ...e-migrate_throttle_trigger_threshold.patch | 75 ++ kvm-migration-Create-options.c.patch | 524 ++++++++++ ...gration-Make-dirty_sync_count-atomic.patch | 105 ++ ...e-dirty_sync_missed_zero_copy-atomic.patch | 92 ++ ...migration-Make-downtime_bytes-atomic.patch | 68 ++ kvm-migration-Make-multifd_bytes-atomic.patch | 99 ++ ...ration-Make-postcopy_requests-atomic.patch | 69 ++ kvm-migration-Make-precopy_bytes-atomic.patch | 68 ++ ...ram_counters-and-ram_atomic_counters.patch | 270 +++++ ...-migrate_announce_params-to-option.c.patch | 90 ++ ...on-Move-migrate_cap_set-to-options.c.patch | 110 ++ ...Move-migrate_caps_check-to-options.c.patch | 458 +++++++++ ...ve-migrate_colo_enabled-to-options.c.patch | 136 +++ ...n-Move-migrate_postcopy-to-options.c.patch | 98 ++ ...-Move-migrate_use_block-to-options.c.patch | 134 +++ ...igrate_use_block_incremental-to-opti.patch | 121 +++ ...migrate_use_compression-to-options.c.patch | 183 ++++ ...Move-migrate_use_events-to-options.c.patch | 120 +++ ...ove-migrate_use_multifd-to-options.c.patch | 247 +++++ ...Move-migrate_use_return-to-options.c.patch | 138 +++ ...on-Move-migrate_use_tls-to-options.c.patch | 134 +++ ...Move-migrate_use_xbzrle-to-options.c.patch | 156 +++ ...igrate_use_zero_copy_send-to-options.patch | 167 ++++ ...ove-parameters-functions-to-option.c.patch | 317 ++++++ ...mp_migrate_set_capabilities-to-optio.patch | 100 ++ ...mp_migrate_set_parameters-to-options.patch | 943 ++++++++++++++++++ ...mp_query_migrate_capabilities-to-opt.patch | 100 ++ ...igrate_caps_check-the-old-and-new-ca.patch | 226 +++++ ...ation-Rename-duplicate-to-zero_pages.patch | 109 ++ ...ration-Rename-normal-to-normal_pages.patch | 109 ++ ...Update-atomic-stats-out-of-the-mutex.patch | 52 + ...n-Use-migrate_max_postcopy_bandwidth.patch | 40 + ...ark-mixed-functions-that-can-suspend.patch | 153 +++ ...igration_global_dump-to-migration-hm.patch | 121 +++ ...copy-Detect-file-system-on-dest-host.patch | 117 +++ ...-extra-whitespace-character-for-code.patch | 44 + ...-enabled_capabilities-to-capabilitie.patch | 329 ++++++ ...tcopy-ram-do-not-use-qatomic_mb_read.patch | 42 + ...ve-client_migrate_info-command-to-ui.patch | 248 +++++ kvm-util-mmap-alloc-qemu_fd_getfs.patch | 95 ++ ...ate-backends-before-migration-object.patch | 58 ++ qemu-kvm.spec | 180 +++- 56 files changed, 8901 insertions(+), 1 deletion(-) create mode 100644 kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch create mode 100644 kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch create mode 100644 kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch create mode 100644 kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch create mode 100644 kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch create mode 100644 kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch create mode 100644 kvm-migration-Create-migrate_cap_set.patch create mode 100644 kvm-migration-Create-migrate_checkpoint_delay.patch create mode 100644 kvm-migration-Create-migrate_cpu_throttle_increment-func.patch create mode 100644 kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch create mode 100644 kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch create mode 100644 kvm-migration-Create-migrate_max_bandwidth-function.patch create mode 100644 kvm-migration-Create-migrate_max_cpu_throttle.patch create mode 100644 kvm-migration-Create-migrate_rdma_pin_all-function.patch create mode 100644 kvm-migration-Create-migrate_throttle_trigger_threshold.patch create mode 100644 kvm-migration-Create-options.c.patch create mode 100644 kvm-migration-Make-dirty_sync_count-atomic.patch create mode 100644 kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch create mode 100644 kvm-migration-Make-downtime_bytes-atomic.patch create mode 100644 kvm-migration-Make-multifd_bytes-atomic.patch create mode 100644 kvm-migration-Make-postcopy_requests-atomic.patch create mode 100644 kvm-migration-Make-precopy_bytes-atomic.patch create mode 100644 kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch create mode 100644 kvm-migration-Move-migrate_announce_params-to-option.c.patch create mode 100644 kvm-migration-Move-migrate_cap_set-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_caps_check-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_colo_enabled-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_postcopy-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_use_block-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_use_block_incremental-to-opti.patch create mode 100644 kvm-migration-Move-migrate_use_compression-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_use_events-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_use_multifd-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_use_return-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_use_tls-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch create mode 100644 kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch create mode 100644 kvm-migration-Move-parameters-functions-to-option.c.patch create mode 100644 kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch create mode 100644 kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch create mode 100644 kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch create mode 100644 kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch create mode 100644 kvm-migration-Rename-duplicate-to-zero_pages.patch create mode 100644 kvm-migration-Rename-normal-to-normal_pages.patch create mode 100644 kvm-migration-Update-atomic-stats-out-of-the-mutex.patch create mode 100644 kvm-migration-Use-migrate_max_postcopy_bandwidth.patch create mode 100644 kvm-migration-mark-mixed-functions-that-can-suspend.patch create mode 100644 kvm-migration-move-migration_global_dump-to-migration-hm.patch create mode 100644 kvm-migration-postcopy-Detect-file-system-on-dest-host.patch create mode 100644 kvm-migration-remove-extra-whitespace-character-for-code.patch create mode 100644 kvm-migration-rename-enabled_capabilities-to-capabilitie.patch create mode 100644 kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch create mode 100644 kvm-spice-move-client_migrate_info-command-to-ui.patch create mode 100644 kvm-util-mmap-alloc-qemu_fd_getfs.patch create mode 100644 kvm-vl.c-Create-late-backends-before-migration-object.patch diff --git a/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch b/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch new file mode 100644 index 0000000..fbab82d --- /dev/null +++ b/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch @@ -0,0 +1,56 @@ +From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 4 May 2023 13:57:34 +0200 +Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in + qmp_block_resize() + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm) + +This QMP handler runs in a coroutine, so it must use the corresponding +no_co_wrappers instead. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688 +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-Id: <20230504115750.54437-5-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888) +Signed-off-by: Kevin Wolf +--- + blockdev.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index d7b5c18f0a..eb509cf964 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, + return; + } + +- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); ++ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); + if (!blk) { + return; + } +@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, + + bdrv_co_lock(bs); + bdrv_drained_end(bs); +- blk_unref(blk); ++ blk_co_unref(blk); + bdrv_co_unlock(bs); + } + +-- +2.39.1 + diff --git a/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch b/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch new file mode 100644 index 0000000..0f0347b --- /dev/null +++ b/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch @@ -0,0 +1,386 @@ +From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 4 May 2023 13:57:33 +0200 +Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine + context + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm) + +These functions must not be called in coroutine context, because they +need write access to the graph. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Message-Id: <20230504115750.54437-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786) +Signed-off-by: Kevin Wolf +--- + block.c | 2 +- + block/crypto.c | 6 +++--- + block/parallels.c | 6 +++--- + block/qcow.c | 6 +++--- + block/qcow2.c | 14 +++++++------- + block/qed.c | 6 +++--- + block/vdi.c | 6 +++--- + block/vhdx.c | 6 +++--- + block/vmdk.c | 18 +++++++++--------- + block/vpc.c | 6 +++--- + include/block/block-global-state.h | 3 ++- + include/sysemu/block-backend-global-state.h | 5 ++++- + 12 files changed, 44 insertions(+), 40 deletions(-) + +diff --git a/block.c b/block.c +index d79a52ca74..a48112f945 100644 +--- a/block.c ++++ b/block.c +@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, + + ret = 0; + out: +- blk_unref(blk); ++ blk_co_unref(blk); + return ret; + } + +diff --git a/block/crypto.c b/block/crypto.c +index ca67289187..8fd3ad0054 100644 +--- a/block/crypto.c ++++ b/block/crypto.c +@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size, + ret = 0; + cleanup: + qcrypto_block_free(crypto); +- blk_unref(blk); ++ blk_co_unref(blk); + return ret; + } + +@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) + + ret = 0; + fail: +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + return ret; + } + +@@ -730,7 +730,7 @@ fail: + bdrv_co_delete_file_noerr(bs); + } + +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_QCryptoBlockCreateOptions(create_opts); + qobject_unref(cryptoopts); + return ret; +diff --git a/block/parallels.c b/block/parallels.c +index 013684801a..b49c35929e 100644 +--- a/block/parallels.c ++++ b/block/parallels.c +@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, + + ret = 0; + out: +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + return ret; + + exit: +@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename, + + done: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/qcow.c b/block/qcow.c +index 490e4f819e..a0c701f578 100644 +--- a/block/qcow.c ++++ b/block/qcow.c +@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, + g_free(tmp); + ret = 0; + exit: +- blk_unref(qcow_blk); +- bdrv_unref(bs); ++ blk_co_unref(qcow_blk); ++ bdrv_co_unref(bs); + qcrypto_block_free(crypto); + return ret; + } +@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename, + fail: + g_free(backing_fmt); + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/qcow2.c b/block/qcow2.c +index 22084730f9..0b8beb8b47 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + goto out; + } + +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + + /* +@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + } + } + +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + + /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. +@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) + + ret = 0; + out: +- blk_unref(blk); +- bdrv_unref(bs); +- bdrv_unref(data_bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); ++ bdrv_co_unref(data_bs); + return ret; + } + +@@ -3949,8 +3949,8 @@ finish: + } + + qobject_unref(qdict); +- bdrv_unref(bs); +- bdrv_unref(data_bs); ++ bdrv_co_unref(bs); ++ bdrv_co_unref(data_bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/qed.c b/block/qed.c +index 0705a7b4e2..aff2a2076e 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, + ret = 0; /* success */ + out: + g_free(l1_table); +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + return ret; + } + +@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename, + + fail: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/vdi.c b/block/vdi.c +index f2434d6153..08331d2dd7 100644 +--- a/block/vdi.c ++++ b/block/vdi.c +@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, + + ret = 0; + exit: +- blk_unref(blk); +- bdrv_unref(bs_file); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs_file); + g_free(bmap); + return ret; + } +@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename, + done: + qobject_unref(qdict); + qapi_free_BlockdevCreateOptions(create_options); +- bdrv_unref(bs_file); ++ bdrv_co_unref(bs_file); + return ret; + } + +diff --git a/block/vhdx.c b/block/vhdx.c +index 81420722a1..00777da91a 100644 +--- a/block/vhdx.c ++++ b/block/vhdx.c +@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, + + ret = 0; + delete_and_exit: +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + g_free(creator); + return ret; + } +@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename, + + fail: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/block/vmdk.c b/block/vmdk.c +index f5f49018fe..01ca13c82b 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -2306,7 +2306,7 @@ exit: + if (pbb) { + *pbb = blk; + } else { +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + } + } +@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size, + if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) { + error_setg(errp, "Invalid backing file format: %s. Must be vmdk", + blk_bs(backing)->drv->format_name); +- blk_unref(backing); ++ blk_co_unref(backing); + ret = -EINVAL; + goto exit; + } + ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid); +- blk_unref(backing); ++ blk_co_unref(backing); + if (ret) { + error_setg(errp, "Failed to read parent CID"); + goto exit; +@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size, + blk_bs(extent_blk)->filename); + created_size += cur_size; + extent_idx++; +- blk_unref(extent_blk); ++ blk_co_unref(extent_blk); + } + + /* Check whether we got excess extents */ + extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain, + opaque, NULL); + if (extent_blk) { +- blk_unref(extent_blk); ++ blk_co_unref(extent_blk); + error_setg(errp, "List of extents contains unused extents"); + ret = -EINVAL; + goto exit; +@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size, + ret = 0; + exit: + if (blk) { +- blk_unref(blk); ++ blk_co_unref(blk); + } + g_free(desc); + g_free(parent_desc_line); +@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, + errp)) { + goto exit; + } +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + exit: + g_free(ext_filename); + return blk; +@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, + return NULL; + } + blk_set_allow_write_beyond_eof(blk, true); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + + if (size != -1) { + ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp); + if (ret) { +- blk_unref(blk); ++ blk_co_unref(blk); + blk = NULL; + } + } +diff --git a/block/vpc.c b/block/vpc.c +index b89b0ff8e2..07ddda5b99 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, + } + + out: +- blk_unref(blk); +- bdrv_unref(bs); ++ blk_co_unref(blk); ++ bdrv_co_unref(bs); + return ret; + } + +@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename, + + fail: + qobject_unref(qdict); +- bdrv_unref(bs); ++ bdrv_co_unref(bs); + qapi_free_BlockdevCreateOptions(create_options); + return ret; + } +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index 399200a9a3..cd4ea554bf 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt, + bool quiet, Error **errp); + + void bdrv_ref(BlockDriverState *bs); +-void bdrv_unref(BlockDriverState *bs); ++void no_coroutine_fn bdrv_unref(BlockDriverState *bs); ++void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs); + void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); + BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, +diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h +index 2b6d27db7c..fa83f9389c 100644 +--- a/include/sysemu/block-backend-global-state.h ++++ b/include/sysemu/block-backend-global-state.h +@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options, + + int blk_get_refcnt(BlockBackend *blk); + void blk_ref(BlockBackend *blk); +-void blk_unref(BlockBackend *blk); ++ ++void no_coroutine_fn blk_unref(BlockBackend *blk); ++void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk); ++ + void blk_remove_all_bs(void); + BlockBackend *blk_by_name(const char *name); + BlockBackend *blk_next(BlockBackend *blk); +-- +2.39.1 + diff --git a/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch b/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch new file mode 100644 index 0000000..4e91505 --- /dev/null +++ b/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch @@ -0,0 +1,132 @@ +From 2c9e6892369ff99decd4030642b8dcf3875e9ebf Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 9 May 2023 15:41:33 +0200 +Subject: [PATCH 55/56] iotests: Test resizing image attached to an iothread + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] 8d31752d1e6e8c6a422d68d9cb2251fbc34b7aef (kmwolf/centos-qemu-kvm) + +This tests that trying to resize an image with QMP block_resize doesn't +hang or otherwise fail when the image is attached to a device running in +an iothread. + +This is a regression test for the recent fix that changed +qmp_block_resize, which is a coroutine based QMP handler, to avoid +calling no_coroutine_fns directly. + +Signed-off-by: Kevin Wolf +Message-Id: <20230509134133.373408-1-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit e113362e4cdfdcfe1d497e569527f70a0021333a) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++ + tests/qemu-iotests/tests/iothreads-resize.out | 11 +++ + 2 files changed, 82 insertions(+) + create mode 100755 tests/qemu-iotests/tests/iothreads-resize + create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out + +diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize +new file mode 100755 +index 0000000000..36e4598c62 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iothreads-resize +@@ -0,0 +1,71 @@ ++#!/usr/bin/env bash ++# group: rw auto quick ++# ++# Test resizing an image that is attached to a separate iothread ++# ++# Copyright (C) 2023 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=kwolf@redhat.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++# Resizing images is only supported by a few block drivers ++_supported_fmt raw qcow2 qed ++_supported_proto file ++_require_devices virtio-scsi-pci ++ ++size=64M ++_make_test_img $size ++ ++qmp() { ++cat < +Date: Thu, 11 May 2023 13:03:22 +0200 +Subject: [PATCH 54/56] iotests: Use alternative CPU type that is not + deprecated in RHEL + +RH-Author: Kevin Wolf +RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() +RH-Bugzilla: 2185688 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 038d4718c0ee7a17ff5e6f4af8fc04d07e452f8d (kmwolf/centos-qemu-kvm) + +This is a downstream-only patch that is necessary because the default +CPU in RHEL is marked as deprecated. This makes test cases fail due to +the warning in the output: + +qemu-system-x86_64: warning: CPU model qemu64-x86_64-cpu is deprecated -- use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max' + +Fixes: 318178778db60b6475d1484509bee136317156d3 +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/testenv.py | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py +index 9a37ad9152..963514aab3 100644 +--- a/tests/qemu-iotests/testenv.py ++++ b/tests/qemu-iotests/testenv.py +@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, + if self.qemu_prog.endswith(f'qemu-system-{suffix}'): + self.qemu_options += f' -machine {machine}' + ++ if self.qemu_prog.endswith('qemu-system-x86_64'): ++ self.qemu_options += ' -cpu Nehalem' ++ + # QEMU_DEFAULT_MACHINE + self.qemu_default_machine = get_default_machine(self.qemu_prog) + +-- +2.39.1 + diff --git a/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch b/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch new file mode 100644 index 0000000..7c9748b --- /dev/null +++ b/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch @@ -0,0 +1,308 @@ +From e2c2910edf90186ca0d7d13c9943caa284e95ea9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 25 Apr 2023 21:15:14 -0400 +Subject: [PATCH 51/56] migration: Allow postcopy_ram_supported_by_host() to + report err +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [50/50] 08c44affc11c27ddf1aa7ce0dfacbaf5effb80cb (peterx/qemu-kvm) + +Instead of print it to STDERR, bring the error upwards so that it can be +reported via QMP responses. + +E.g.: + +{ "execute": "migrate-set-capabilities" , + "arguments": { "capabilities": + [ { "capability": "postcopy-ram", "state": true } ] } } + +{ "error": + { "class": "GenericError", + "desc": "Postcopy is not supported: Host backend files need to be TMPFS + or HUGETLBFS only" } } + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 74c38cf7fd24c60e4f0a90585d17250478260877) +Signed-off-by: Peter Xu +--- + migration/options.c | 8 ++---- + migration/postcopy-ram.c | 60 +++++++++++++++++++++------------------- + migration/postcopy-ram.h | 3 +- + migration/savevm.c | 3 +- + 4 files changed, 39 insertions(+), 35 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 4701c75a4d..e51d667e14 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -302,6 +302,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + ++ ERRP_GUARD(); + #ifndef CONFIG_LIVE_BLOCK_MIGRATION + if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { + error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " +@@ -327,11 +328,8 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + */ + if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && + runstate_check(RUN_STATE_INMIGRATE) && +- !postcopy_ram_supported_by_host(mis)) { +- /* postcopy_ram_supported_by_host will have emitted a more +- * detailed message +- */ +- error_setg(errp, "Postcopy is not supported"); ++ !postcopy_ram_supported_by_host(mis, errp)) { ++ error_prepend(errp, "Postcopy is not supported: "); + return false; + } + +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index 0711500036..75aa276bb1 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -283,11 +283,13 @@ static bool request_ufd_features(int ufd, uint64_t features) + return true; + } + +-static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) ++static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis, ++ Error **errp) + { + uint64_t asked_features = 0; + static uint64_t supported_features; + ++ ERRP_GUARD(); + /* + * it's not possible to + * request UFFD_API twice per one fd +@@ -295,7 +297,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + */ + if (!supported_features) { + if (!receive_ufd_features(&supported_features)) { +- error_report("%s failed", __func__); ++ error_setg(errp, "Userfault feature detection failed"); + return false; + } + } +@@ -317,8 +319,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + * userfault file descriptor + */ + if (!request_ufd_features(ufd, asked_features)) { +- error_report("%s failed: features %" PRIu64, __func__, +- asked_features); ++ error_setg(errp, "Failed features %" PRIu64, asked_features); + return false; + } + +@@ -329,7 +330,8 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS; + #endif + if (!have_hp) { +- error_report("Userfault on this host does not support huge pages"); ++ error_setg(errp, ++ "Userfault on this host does not support huge pages"); + return false; + } + } +@@ -338,7 +340,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + + /* Callback from postcopy_ram_supported_by_host block iterator. + */ +-static int test_ramblock_postcopiable(RAMBlock *rb) ++static int test_ramblock_postcopiable(RAMBlock *rb, Error **errp) + { + const char *block_name = qemu_ram_get_idstr(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); +@@ -346,16 +348,18 @@ static int test_ramblock_postcopiable(RAMBlock *rb) + QemuFsType fs; + + if (length % pagesize) { +- error_report("Postcopy requires RAM blocks to be a page size multiple," +- " block %s is 0x" RAM_ADDR_FMT " bytes with a " +- "page size of 0x%zx", block_name, length, pagesize); ++ error_setg(errp, ++ "Postcopy requires RAM blocks to be a page size multiple," ++ " block %s is 0x" RAM_ADDR_FMT " bytes with a " ++ "page size of 0x%zx", block_name, length, pagesize); + return 1; + } + + if (rb->fd >= 0) { + fs = qemu_fd_getfs(rb->fd); + if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { +- error_report("Host backend files need to be TMPFS or HUGETLBFS only"); ++ error_setg(errp, ++ "Host backend files need to be TMPFS or HUGETLBFS only"); + return 1; + } + } +@@ -368,7 +372,7 @@ static int test_ramblock_postcopiable(RAMBlock *rb) + * normally fine since if the postcopy succeeds it gets turned back on at the + * end. + */ +-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) ++bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) + { + long pagesize = qemu_real_host_page_size(); + int ufd = -1; +@@ -377,29 +381,27 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + struct uffdio_register reg_struct; + struct uffdio_range range_struct; + uint64_t feature_mask; +- Error *local_err = NULL; + RAMBlock *block; + ++ ERRP_GUARD(); + if (qemu_target_page_size() > pagesize) { +- error_report("Target page size bigger than host page size"); ++ error_setg(errp, "Target page size bigger than host page size"); + goto out; + } + + ufd = uffd_open(O_CLOEXEC); + if (ufd == -1) { +- error_report("%s: userfaultfd not available: %s", __func__, +- strerror(errno)); ++ error_setg(errp, "Userfaultfd not available: %s", strerror(errno)); + goto out; + } + + /* Give devices a chance to object */ +- if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) { +- error_report_err(local_err); ++ if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, errp)) { + goto out; + } + + /* Version and features check */ +- if (!ufd_check_and_apply(ufd, mis)) { ++ if (!ufd_check_and_apply(ufd, mis, errp)) { + goto out; + } + +@@ -417,7 +419,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + * affect in reality, or we can revisit. + */ + RAMBLOCK_FOREACH(block) { +- if (test_ramblock_postcopiable(block)) { ++ if (test_ramblock_postcopiable(block, errp)) { + goto out; + } + } +@@ -427,7 +429,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + * it was enabled. + */ + if (munlockall()) { +- error_report("%s: munlockall: %s", __func__, strerror(errno)); ++ error_setg(errp, "munlockall() failed: %s", strerror(errno)); + goto out; + } + +@@ -439,8 +441,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (testarea == MAP_FAILED) { +- error_report("%s: Failed to map test area: %s", __func__, +- strerror(errno)); ++ error_setg(errp, "Failed to map test area: %s", strerror(errno)); + goto out; + } + g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize)); +@@ -450,14 +451,14 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; + + if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { +- error_report("%s userfault register: %s", __func__, strerror(errno)); ++ error_setg(errp, "UFFDIO_REGISTER failed: %s", strerror(errno)); + goto out; + } + + range_struct.start = (uintptr_t)testarea; + range_struct.len = pagesize; + if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { +- error_report("%s userfault unregister: %s", __func__, strerror(errno)); ++ error_setg(errp, "UFFDIO_UNREGISTER failed: %s", strerror(errno)); + goto out; + } + +@@ -465,8 +466,8 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + (__u64)1 << _UFFDIO_COPY | + (__u64)1 << _UFFDIO_ZEROPAGE; + if ((reg_struct.ioctls & feature_mask) != feature_mask) { +- error_report("Missing userfault map features: %" PRIx64, +- (uint64_t)(~reg_struct.ioctls & feature_mask)); ++ error_setg(errp, "Missing userfault map features: %" PRIx64, ++ (uint64_t)(~reg_struct.ioctls & feature_mask)); + goto out; + } + +@@ -1188,6 +1189,8 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) + + int postcopy_ram_incoming_setup(MigrationIncomingState *mis) + { ++ Error *local_err = NULL; ++ + /* Open the fd for the kernel to give us userfaults */ + mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); + if (mis->userfault_fd == -1) { +@@ -1200,7 +1203,8 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) + * Although the host check already tested the API, we need to + * do the check again as an ABI handshake on the new fd. + */ +- if (!ufd_check_and_apply(mis->userfault_fd, mis)) { ++ if (!ufd_check_and_apply(mis->userfault_fd, mis, &local_err)) { ++ error_report_err(local_err); + return -1; + } + +@@ -1360,7 +1364,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) + { + } + +-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) ++bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) + { + error_report("%s: No OS support", __func__); + return false; +diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h +index b4867a32d5..442ab89752 100644 +--- a/migration/postcopy-ram.h ++++ b/migration/postcopy-ram.h +@@ -14,7 +14,8 @@ + #define QEMU_POSTCOPY_RAM_H + + /* Return true if the host supports everything we need to do postcopy-ram */ +-bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); ++bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, ++ Error **errp); + + /* + * Make all of RAM sensitive to accesses to areas that haven't yet been written +diff --git a/migration/savevm.c b/migration/savevm.c +index 9671211339..211eff3a8b 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1753,7 +1753,8 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + return -EINVAL; + } + +- if (!postcopy_ram_supported_by_host(mis)) { ++ if (!postcopy_ram_supported_by_host(mis, &local_err)) { ++ error_report_err(local_err); + postcopy_state_set(POSTCOPY_INCOMING_NONE); + return -1; + } +-- +2.39.1 + diff --git a/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch b/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch new file mode 100644 index 0000000..d1620f0 --- /dev/null +++ b/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch @@ -0,0 +1,111 @@ +From 3691bb5f956e3c60dbf6de183011b31dbc7a7801 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 2 May 2023 15:52:12 -0500 +Subject: [PATCH 01/56] migration: Attempt disk reactivation in more failure + scenarios + +RH-Author: Eric Blake +RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. +RH-Bugzilla: 2058982 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Acked-by: Kevin Wolf +RH-Commit: [1/1] 5999b747b314641259d3b8809033b057805eed3f (ebblake/centos-qemu-kvm) + +Commit fe904ea824 added a fail_inactivate label, which tries to +reactivate disks on the source after a failure while s->state == +MIGRATION_STATUS_ACTIVE, but didn't actually use the label if +qemu_savevm_state_complete_precopy() failed. This failure to +reactivate is also present in commit 6039dd5b1c (also covering the new +s->state == MIGRATION_STATUS_DEVICE state) and 403d18ae (ensuring +s->block_inactive is set more reliably). + +Consolidate the two labels back into one - no matter HOW migration is +failed, if there is any chance we can reach vm_start() after having +attempted inactivation, it is essential that we have tried to restart +disks before then. This also makes the cleanup more like +migrate_fd_cancel(). + +Suggested-by: Kevin Wolf +Signed-off-by: Eric Blake +Message-Id: <20230502205212.134680-1-eblake@redhat.com> +Acked-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 6dab4c93ecfae48e2e67b984d1032c1e988d3005) +[eblake: downstream migrate_colo() => migrate_colo_enabled()] +Signed-off-by: Eric Blake +--- + migration/migration.c | 24 ++++++++++++++---------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 08007cef4e..99f86bd6c2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3443,6 +3443,11 @@ static void migration_completion(MigrationState *s) + MIGRATION_STATUS_DEVICE); + } + if (ret >= 0) { ++ /* ++ * Inactivate disks except in COLO, and track that we ++ * have done so in order to remember to reactivate ++ * them if migration fails or is cancelled. ++ */ + s->block_inactive = !migrate_colo_enabled(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, +@@ -3487,13 +3492,13 @@ static void migration_completion(MigrationState *s) + rp_error = await_return_path_close_on_source(s); + trace_migration_return_path_end_after(rp_error); + if (rp_error) { +- goto fail_invalidate; ++ goto fail; + } + } + + if (qemu_file_get_error(s->to_dst_file)) { + trace_migration_completion_file_err(); +- goto fail_invalidate; ++ goto fail; + } + + if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { +@@ -3507,26 +3512,25 @@ static void migration_completion(MigrationState *s) + + return; + +-fail_invalidate: +- /* If not doing postcopy, vm_start() will be called: let's regain +- * control on images. +- */ +- if (s->state == MIGRATION_STATUS_ACTIVE || +- s->state == MIGRATION_STATUS_DEVICE) { ++fail: ++ if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || ++ s->state == MIGRATION_STATUS_DEVICE)) { ++ /* ++ * If not doing postcopy, vm_start() will be called: let's ++ * regain control on images. ++ */ + Error *local_err = NULL; + + qemu_mutex_lock_iothread(); + bdrv_activate_all(&local_err); + if (local_err) { + error_report_err(local_err); +- s->block_inactive = true; + } else { + s->block_inactive = false; + } + qemu_mutex_unlock_iothread(); + } + +-fail: + migrate_set_state(&s->state, current_active_state, + MIGRATION_STATUS_FAILED); + } +-- +2.39.1 + diff --git a/kvm-migration-Create-migrate_cap_set.patch b/kvm-migration-Create-migrate_cap_set.patch new file mode 100644 index 0000000..33268bb --- /dev/null +++ b/kvm-migration-Create-migrate_cap_set.patch @@ -0,0 +1,93 @@ +From d772464e9a51a085e10864b2dc7ffd49991fc23b Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 21:02:42 +0100 +Subject: [PATCH 22/56] migration: Create migrate_cap_set() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [21/50] 5b12f04013cf2d374a869134bb67c938c789e24d (peterx/qemu-kvm) + +And remove the convoluted use of qmp_migrate_set_capabilities() to +enable disable MIGRATION_CAPABILITY_BLOCK. + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 9eb1109cfba5415dd0b0cb82e80fc5e42fe861b7) +Signed-off-by: Peter Xu +--- + migration/migration.c | 34 ++++++++++++++++------------------ + 1 file changed, 16 insertions(+), 18 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index b745d829a4..18058fb597 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1912,25 +1912,24 @@ void migrate_set_state(int *state, int old_state, int new_state) + } + } + +-static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, +- bool state) ++static bool migrate_cap_set(int cap, bool value, Error **errp) + { +- MigrationCapabilityStatus *cap; +- +- cap = g_new0(MigrationCapabilityStatus, 1); +- cap->capability = index; +- cap->state = state; ++ MigrationState *s = migrate_get_current(); ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; + +- return cap; +-} ++ if (migration_is_running(s->state)) { ++ error_setg(errp, QERR_MIGRATION_ACTIVE); ++ return false; ++ } + +-void migrate_set_block_enabled(bool value, Error **errp) +-{ +- MigrationCapabilityStatusList *cap = NULL; ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ new_caps[cap] = value; + +- QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); +- qmp_migrate_set_capabilities(cap, errp); +- qapi_free_MigrationCapabilityStatusList(cap); ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { ++ return false; ++ } ++ s->capabilities[cap] = value; ++ return true; + } + + static void migrate_set_block_incremental(MigrationState *s, bool value) +@@ -1942,7 +1941,7 @@ static void block_cleanup_parameters(MigrationState *s) + { + if (s->must_remove_block_options) { + /* setting to false can never fail */ +- migrate_set_block_enabled(false, &error_abort); ++ migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort); + migrate_set_block_incremental(s, false); + s->must_remove_block_options = false; + } +@@ -2429,8 +2428,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + "current migration capabilities"); + return false; + } +- migrate_set_block_enabled(true, &local_err); +- if (local_err) { ++ if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) { + error_propagate(errp, local_err); + return false; + } +-- +2.39.1 + diff --git a/kvm-migration-Create-migrate_checkpoint_delay.patch b/kvm-migration-Create-migrate_checkpoint_delay.patch new file mode 100644 index 0000000..408d258 --- /dev/null +++ b/kvm-migration-Create-migrate_checkpoint_delay.patch @@ -0,0 +1,84 @@ +From a17bee3c8ab48daa471ec53bed0e2cb0bb41fc76 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:04:55 +0100 +Subject: [PATCH 41/56] migration: Create migrate_checkpoint_delay() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [40/50] b972d3f12e49dc27aa78eb723ca6d0fac4d174d8 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit f94a858fa3e72ba954a338c01ae9fecc15fcce5c) +Signed-off-by: Peter Xu +--- + migration/colo.c | 5 ++--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + 3 files changed, 12 insertions(+), 3 deletions(-) + +diff --git a/migration/colo.c b/migration/colo.c +index 93b78c9270..07bfa21fea 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -576,7 +576,7 @@ static void colo_process_checkpoint(MigrationState *s) + trace_colo_vm_state_change("stop", "run"); + + timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) + +- s->parameters.x_checkpoint_delay); ++ migrate_checkpoint_delay()); + + while (s->state == MIGRATION_STATUS_COLO) { + if (failover_get_state() != FAILOVER_STATUS_NONE) { +@@ -651,8 +651,7 @@ void colo_checkpoint_notify(void *opaque) + + qemu_event_set(&s->colo_checkpoint_event); + s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); +- next_notify_time = s->colo_checkpoint_time + +- s->parameters.x_checkpoint_delay; ++ next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay(); + timer_mod(s->colo_delay_timer, next_notify_time); + } + +diff --git a/migration/options.c b/migration/options.c +index b9f3815f7e..0e102e5700 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -472,6 +472,15 @@ bool migrate_block_incremental(void) + return s->parameters.block_incremental; + } + ++uint32_t migrate_checkpoint_delay(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.x_checkpoint_delay; ++} ++ + int migrate_compress_level(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index aa54443353..adc2879bbb 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -46,6 +46,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); + /* parameters */ + + bool migrate_block_incremental(void); ++uint32_t migrate_checkpoint_delay(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); +-- +2.39.1 + diff --git a/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch b/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch new file mode 100644 index 0000000..65bad3c --- /dev/null +++ b/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch @@ -0,0 +1,75 @@ +From 7ff430e011780dad00e5ebaad0318c5fa3aec102 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:20:49 +0100 +Subject: [PATCH 45/56] migration: Create migrate_cpu_throttle_increment() + function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [44/50] aec990a106a0347b265f5c056a516e0b91e8183c (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 9605c2ac282c565bb00b5f344217161bef29eff8) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/migration/options.c b/migration/options.c +index f7fb6999f7..31435d2b45 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) + return s->parameters.compress_wait_thread; + } + ++uint8_t migrate_cpu_throttle_increment(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.cpu_throttle_increment; ++} ++ + uint8_t migrate_cpu_throttle_initial(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index fd8b91d767..49b29bdafd 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); ++uint8_t migrate_cpu_throttle_increment(void); + uint8_t migrate_cpu_throttle_initial(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); +diff --git a/migration/ram.c b/migration/ram.c +index 5e855d5c22..5645745a42 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -713,7 +713,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + { + MigrationState *s = migrate_get_current(); + uint64_t pct_initial = migrate_cpu_throttle_initial(); +- uint64_t pct_increment = s->parameters.cpu_throttle_increment; ++ uint64_t pct_increment = migrate_cpu_throttle_increment(); + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; + int pct_max = migrate_max_cpu_throttle(); + +-- +2.39.1 + diff --git a/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch b/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch new file mode 100644 index 0000000..aab2013 --- /dev/null +++ b/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch @@ -0,0 +1,75 @@ +From fdc2f14bfb3ef8897310a7db63287a9bab1fb858 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:22:44 +0100 +Subject: [PATCH 44/56] migration: Create migrate_cpu_throttle_initial() to + option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [43/50] e0e0db7218f28aefd4bd022edbaec236e2030cb1 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 2a8ec38082f8098f2693bb3632175453c0c84a51) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 3 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/migration/options.c b/migration/options.c +index 418aafac64..f7fb6999f7 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) + return s->parameters.compress_wait_thread; + } + ++uint8_t migrate_cpu_throttle_initial(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.cpu_throttle_initial; ++} ++ + int migrate_decompress_threads(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 72b1a320b7..fd8b91d767 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); ++uint8_t migrate_cpu_throttle_initial(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); + int64_t migrate_max_postcopy_bandwidth(void); +diff --git a/migration/ram.c b/migration/ram.c +index 5c786513ef..5e855d5c22 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -712,7 +712,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t bytes_dirty_threshold) + { + MigrationState *s = migrate_get_current(); +- uint64_t pct_initial = s->parameters.cpu_throttle_initial; ++ uint64_t pct_initial = migrate_cpu_throttle_initial(); + uint64_t pct_increment = s->parameters.cpu_throttle_increment; + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; + int pct_max = migrate_max_cpu_throttle(); +-- +2.39.1 + diff --git a/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch b/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch new file mode 100644 index 0000000..e36f003 --- /dev/null +++ b/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch @@ -0,0 +1,78 @@ +From b88c51c4b02639e28da73143b1da7bd3d6706ce5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:29:51 +0100 +Subject: [PATCH 46/56] migration: Create migrate_cpu_throttle_tailslow() + function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [45/50] e93e96392405c60f75abbf288e4fddb191bbc996 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 873f674c559e3162a6e6e92994301d400c5cc873) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 3 +-- + 3 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 31435d2b45..615534c151 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -527,6 +527,15 @@ uint8_t migrate_cpu_throttle_initial(void) + return s->parameters.cpu_throttle_initial; + } + ++bool migrate_cpu_throttle_tailslow(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.cpu_throttle_tailslow; ++} ++ + int migrate_decompress_threads(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 49b29bdafd..99f6bbd7a1 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -52,6 +52,7 @@ int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + uint8_t migrate_cpu_throttle_increment(void); + uint8_t migrate_cpu_throttle_initial(void); ++bool migrate_cpu_throttle_tailslow(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); + int64_t migrate_max_postcopy_bandwidth(void); +diff --git a/migration/ram.c b/migration/ram.c +index 5645745a42..01356f60a4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -711,10 +711,9 @@ static size_t save_page_header(PageSearchStatus *pss, QEMUFile *f, + static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t bytes_dirty_threshold) + { +- MigrationState *s = migrate_get_current(); + uint64_t pct_initial = migrate_cpu_throttle_initial(); + uint64_t pct_increment = migrate_cpu_throttle_increment(); +- bool pct_tailslow = s->parameters.cpu_throttle_tailslow; ++ bool pct_tailslow = migrate_cpu_throttle_tailslow(); + int pct_max = migrate_max_cpu_throttle(); + + uint64_t throttle_now = cpu_throttle_get_percentage(); +-- +2.39.1 + diff --git a/kvm-migration-Create-migrate_max_bandwidth-function.patch b/kvm-migration-Create-migrate_max_bandwidth-function.patch new file mode 100644 index 0000000..ba1d34c --- /dev/null +++ b/kvm-migration-Create-migrate_max_bandwidth-function.patch @@ -0,0 +1,232 @@ +From b6228b3122f5c1f220f92042277ab1bfbb5ba086 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 11:00:12 +0100 +Subject: [PATCH 48/56] migration: Create migrate_max_bandwidth() function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [47/50] 3874656f70cb9c2a30f4d63e146539480d422326 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 9c894df3a37d675652390f7dbbe2f65b7bad7efa) +Signed-off-by: Peter Xu +--- + migration/migration.c | 70 +------------------------------------- + migration/options.c | 79 +++++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 1 + + 3 files changed, 81 insertions(+), 69 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 46a5ea4d42..c2e109329d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -886,74 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) + migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); + } + +-MigrationParameters *qmp_query_migrate_parameters(Error **errp) +-{ +- MigrationParameters *params; +- MigrationState *s = migrate_get_current(); +- +- /* TODO use QAPI_CLONE() instead of duplicating it inline */ +- params = g_malloc0(sizeof(*params)); +- params->has_compress_level = true; +- params->compress_level = s->parameters.compress_level; +- params->has_compress_threads = true; +- params->compress_threads = s->parameters.compress_threads; +- params->has_compress_wait_thread = true; +- params->compress_wait_thread = s->parameters.compress_wait_thread; +- params->has_decompress_threads = true; +- params->decompress_threads = s->parameters.decompress_threads; +- params->has_throttle_trigger_threshold = true; +- params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; +- params->has_cpu_throttle_initial = true; +- params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; +- params->has_cpu_throttle_increment = true; +- params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; +- params->has_cpu_throttle_tailslow = true; +- params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; +- params->tls_creds = g_strdup(s->parameters.tls_creds); +- params->tls_hostname = g_strdup(s->parameters.tls_hostname); +- params->tls_authz = g_strdup(s->parameters.tls_authz ? +- s->parameters.tls_authz : ""); +- params->has_max_bandwidth = true; +- params->max_bandwidth = s->parameters.max_bandwidth; +- params->has_downtime_limit = true; +- params->downtime_limit = s->parameters.downtime_limit; +- params->has_x_checkpoint_delay = true; +- params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; +- params->has_block_incremental = true; +- params->block_incremental = s->parameters.block_incremental; +- params->has_multifd_channels = true; +- params->multifd_channels = s->parameters.multifd_channels; +- params->has_multifd_compression = true; +- params->multifd_compression = s->parameters.multifd_compression; +- params->has_multifd_zlib_level = true; +- params->multifd_zlib_level = s->parameters.multifd_zlib_level; +- params->has_multifd_zstd_level = true; +- params->multifd_zstd_level = s->parameters.multifd_zstd_level; +- params->has_xbzrle_cache_size = true; +- params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; +- params->has_max_postcopy_bandwidth = true; +- params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; +- params->has_max_cpu_throttle = true; +- params->max_cpu_throttle = s->parameters.max_cpu_throttle; +- params->has_announce_initial = true; +- params->announce_initial = s->parameters.announce_initial; +- params->has_announce_max = true; +- params->announce_max = s->parameters.announce_max; +- params->has_announce_rounds = true; +- params->announce_rounds = s->parameters.announce_rounds; +- params->has_announce_step = true; +- params->announce_step = s->parameters.announce_step; +- +- if (s->parameters.has_block_bitmap_mapping) { +- params->has_block_bitmap_mapping = true; +- params->block_bitmap_mapping = +- QAPI_CLONE(BitmapMigrationNodeAliasList, +- s->parameters.block_bitmap_mapping); +- } +- +- return params; +-} +- + /* + * Return true if we're already in the middle of a migration + * (i.e. any of the active or setup states) +@@ -3775,7 +3707,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + XFER_LIMIT_RATIO; + } else { + /* This is a fresh new migration */ +- rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; ++ rate_limit = migrate_max_bandwidth() / XFER_LIMIT_RATIO; + + /* Notify before starting migration thread */ + notifier_list_notify(&migration_state_notifiers, s); +diff --git a/migration/options.c b/migration/options.c +index 8bd2d949ae..8e8753d9be 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -12,8 +12,10 @@ + */ + + #include "qemu/osdep.h" ++#include "qapi/clone-visitor.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" ++#include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qerror.h" + #include "sysemu/runstate.h" + #include "migration/misc.h" +@@ -562,6 +564,15 @@ uint8_t migrate_max_cpu_throttle(void) + return s->parameters.max_cpu_throttle; + } + ++uint64_t migrate_max_bandwidth(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.max_bandwidth; ++} ++ + int64_t migrate_max_postcopy_bandwidth(void) + { + MigrationState *s; +@@ -641,3 +652,71 @@ AnnounceParameters *migrate_announce_params(void) + + return ≈ + } ++ ++MigrationParameters *qmp_query_migrate_parameters(Error **errp) ++{ ++ MigrationParameters *params; ++ MigrationState *s = migrate_get_current(); ++ ++ /* TODO use QAPI_CLONE() instead of duplicating it inline */ ++ params = g_malloc0(sizeof(*params)); ++ params->has_compress_level = true; ++ params->compress_level = s->parameters.compress_level; ++ params->has_compress_threads = true; ++ params->compress_threads = s->parameters.compress_threads; ++ params->has_compress_wait_thread = true; ++ params->compress_wait_thread = s->parameters.compress_wait_thread; ++ params->has_decompress_threads = true; ++ params->decompress_threads = s->parameters.decompress_threads; ++ params->has_throttle_trigger_threshold = true; ++ params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; ++ params->has_cpu_throttle_initial = true; ++ params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; ++ params->has_cpu_throttle_increment = true; ++ params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; ++ params->has_cpu_throttle_tailslow = true; ++ params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; ++ params->tls_creds = g_strdup(s->parameters.tls_creds); ++ params->tls_hostname = g_strdup(s->parameters.tls_hostname); ++ params->tls_authz = g_strdup(s->parameters.tls_authz ? ++ s->parameters.tls_authz : ""); ++ params->has_max_bandwidth = true; ++ params->max_bandwidth = s->parameters.max_bandwidth; ++ params->has_downtime_limit = true; ++ params->downtime_limit = s->parameters.downtime_limit; ++ params->has_x_checkpoint_delay = true; ++ params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; ++ params->has_block_incremental = true; ++ params->block_incremental = s->parameters.block_incremental; ++ params->has_multifd_channels = true; ++ params->multifd_channels = s->parameters.multifd_channels; ++ params->has_multifd_compression = true; ++ params->multifd_compression = s->parameters.multifd_compression; ++ params->has_multifd_zlib_level = true; ++ params->multifd_zlib_level = s->parameters.multifd_zlib_level; ++ params->has_multifd_zstd_level = true; ++ params->multifd_zstd_level = s->parameters.multifd_zstd_level; ++ params->has_xbzrle_cache_size = true; ++ params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; ++ params->has_max_postcopy_bandwidth = true; ++ params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; ++ params->has_max_cpu_throttle = true; ++ params->max_cpu_throttle = s->parameters.max_cpu_throttle; ++ params->has_announce_initial = true; ++ params->announce_initial = s->parameters.announce_initial; ++ params->has_announce_max = true; ++ params->announce_max = s->parameters.announce_max; ++ params->has_announce_rounds = true; ++ params->announce_rounds = s->parameters.announce_rounds; ++ params->has_announce_step = true; ++ params->announce_step = s->parameters.announce_step; ++ ++ if (s->parameters.has_block_bitmap_mapping) { ++ params->has_block_bitmap_mapping = true; ++ params->block_bitmap_mapping = ++ QAPI_CLONE(BitmapMigrationNodeAliasList, ++ s->parameters.block_bitmap_mapping); ++ } ++ ++ return params; ++} +diff --git a/migration/options.h b/migration/options.h +index 093bc907a1..1b78fa9f3d 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -64,6 +64,7 @@ uint8_t migrate_cpu_throttle_initial(void); + bool migrate_cpu_throttle_tailslow(void); + int migrate_decompress_threads(void); + uint8_t migrate_max_cpu_throttle(void); ++uint64_t migrate_max_bandwidth(void); + int64_t migrate_max_postcopy_bandwidth(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); +-- +2.39.1 + diff --git a/kvm-migration-Create-migrate_max_cpu_throttle.patch b/kvm-migration-Create-migrate_max_cpu_throttle.patch new file mode 100644 index 0000000..6628b80 --- /dev/null +++ b/kvm-migration-Create-migrate_max_cpu_throttle.patch @@ -0,0 +1,88 @@ +From f0d4e34b00f66d2336b755a34a1ba226571641c4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:13:01 +0100 +Subject: [PATCH 42/56] migration: Create migrate_max_cpu_throttle() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [41/50] fc7537c06d8e1f53d7bb552661f6ddb0133a978d (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 24155bd0520035d5148c0af5b925932c4d8064a8) +Signed-off-by: Peter Xu +--- + migration/migration.h | 2 -- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 4 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.h b/migration/migration.h +index 86051af132..3ae938b19c 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -451,8 +451,6 @@ bool migrate_postcopy(void); + + int migrate_use_tls(void); + +-int migrate_max_cpu_throttle(void); +- + uint64_t ram_get_total_transferred_pages(void); + + /* Sending on the return path - generic and then for each message type */ +diff --git a/migration/options.c b/migration/options.c +index 0e102e5700..2cb04fbbd1 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -517,6 +517,15 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + ++uint8_t migrate_max_cpu_throttle(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.max_cpu_throttle; ++} ++ + int64_t migrate_max_postcopy_bandwidth(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index adc2879bbb..72b1a320b7 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -51,6 +51,7 @@ int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); ++uint8_t migrate_max_cpu_throttle(void); + int64_t migrate_max_postcopy_bandwidth(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); +diff --git a/migration/ram.c b/migration/ram.c +index e82cee97c3..5c786513ef 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -715,7 +715,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, + uint64_t pct_initial = s->parameters.cpu_throttle_initial; + uint64_t pct_increment = s->parameters.cpu_throttle_increment; + bool pct_tailslow = s->parameters.cpu_throttle_tailslow; +- int pct_max = s->parameters.max_cpu_throttle; ++ int pct_max = migrate_max_cpu_throttle(); + + uint64_t throttle_now = cpu_throttle_get_percentage(); + uint64_t cpu_now, cpu_ideal, throttle_inc; +-- +2.39.1 + diff --git a/kvm-migration-Create-migrate_rdma_pin_all-function.patch b/kvm-migration-Create-migrate_rdma_pin_all-function.patch new file mode 100644 index 0000000..c7799f1 --- /dev/null +++ b/kvm-migration-Create-migrate_rdma_pin_all-function.patch @@ -0,0 +1,95 @@ +From e4ef0f2cee6cdf2cf4bd225ac9e610f41d66dfcb Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:41:55 +0100 +Subject: [PATCH 32/56] migration: Create migrate_rdma_pin_all() function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [31/50] 206d96d47d9ee73ddc89dd01186560bf62ea5295 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy + +--- + +Fixed missing space after comma (fabiano) + +(cherry picked from commit 17cba690cdd42108369fafe6b07bff09872fbea6) +Signed-off-by: Peter Xu +--- + migration/options.c | 7 +++++++ + migration/options.h | 1 + + migration/rdma.c | 6 +++--- + 3 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 2003e413da..9c9b8e5863 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -138,6 +138,13 @@ bool migrate_postcopy_ram(void) + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; + } + ++bool migrate_rdma_pin_all(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]; ++} ++ + bool migrate_release_ram(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 316efd1063..25c002b37a 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -30,6 +30,7 @@ bool migrate_pause_before_switchover(void); + bool migrate_postcopy_blocktime(void); + bool migrate_postcopy_preempt(void); + bool migrate_postcopy_ram(void); ++bool migrate_rdma_pin_all(void); + bool migrate_release_ram(void); + bool migrate_return_path(void); + bool migrate_validate_uuid(void); +diff --git a/migration/rdma.c b/migration/rdma.c +index bf55e2f163..0af5e944f0 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -35,6 +35,7 @@ + #include + #include "trace.h" + #include "qom/object.h" ++#include "options.h" + #include + + /* +@@ -4178,8 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, + goto err; + } + +- ret = qemu_rdma_source_init(rdma, +- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ ret = qemu_rdma_source_init(rdma, migrate_rdma_pin_all(), errp); + + if (ret) { + goto err; +@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + ret = qemu_rdma_source_init(rdma_return_path, +- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ migrate_rdma_pin_all(), errp); + + if (ret) { + goto return_path_err; +-- +2.39.1 + diff --git a/kvm-migration-Create-migrate_throttle_trigger_threshold.patch b/kvm-migration-Create-migrate_throttle_trigger_threshold.patch new file mode 100644 index 0000000..5fc1072 --- /dev/null +++ b/kvm-migration-Create-migrate_throttle_trigger_threshold.patch @@ -0,0 +1,75 @@ +From 27862b9d31da6447b60f185cdad95764018c6bc6 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 00:59:13 +0100 +Subject: [PATCH 40/56] migration: Create migrate_throttle_trigger_threshold() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [39/50] b8af9080c49be3d38bd2784d61289be89c03db3e (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit 6499efdb16e5c1288b4c8390d3bf68b313329b8b) +Signed-off-by: Peter Xu +--- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 3 +-- + 3 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 2b6d88b4b9..b9f3815f7e 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -554,6 +554,15 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + ++uint8_t migrate_throttle_trigger_threshold(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.throttle_trigger_threshold; ++} ++ + uint64_t migrate_xbzrle_cache_size(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 96d5a8e6e4..aa54443353 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -55,6 +55,7 @@ int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); ++uint8_t migrate_throttle_trigger_threshold(void); + uint64_t migrate_xbzrle_cache_size(void); + + #endif +diff --git a/migration/ram.c b/migration/ram.c +index 4576d0d849..e82cee97c3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1178,8 +1178,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) + + static void migration_trigger_throttle(RAMState *rs) + { +- MigrationState *s = migrate_get_current(); +- uint64_t threshold = s->parameters.throttle_trigger_threshold; ++ uint64_t threshold = migrate_throttle_trigger_threshold(); + uint64_t bytes_xfer_period = + stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; + uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; +-- +2.39.1 + diff --git a/kvm-migration-Create-options.c.patch b/kvm-migration-Create-options.c.patch new file mode 100644 index 0000000..ea60202 --- /dev/null +++ b/kvm-migration-Create-options.c.patch @@ -0,0 +1,524 @@ +From 282634a835f4711c8b501dd76c344058bc399fbd Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 21:18:45 +0100 +Subject: [PATCH 23/56] migration: Create options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [22/50] 10c9be528b9fcfae93f1a12fcd09db1a69e58f64 (peterx/qemu-kvm) + +We move there all capabilities helpers from migration.c. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert + +--- + +Following David advise: +- looked through the history, capabilities are newer than 2012, so we + can remove that bit of the header. +- This part is posterior to Anthony. + Original Author is Orit. Once there, + I put myself. Peter Xu also did quite a bit of work here. + Anyone else wants/needs to be there? I didn't search too hard + because nobody asked before to be added. + +What do you think? + +(cherry picked from commit 1f0776f1c03312aad5d6a5f98871240bc3af01e5) +Signed-off-by: Peter Xu +--- + hw/virtio/virtio-balloon.c | 1 + + migration/block-dirty-bitmap.c | 1 + + migration/block.c | 1 + + migration/colo.c | 1 + + migration/meson.build | 1 + + migration/migration.c | 109 +---------------------------- + migration/migration.h | 12 ---- + migration/options.c | 124 +++++++++++++++++++++++++++++++++ + migration/options.h | 32 +++++++++ + migration/postcopy-ram.c | 1 + + migration/ram.c | 1 + + migration/savevm.c | 1 + + migration/socket.c | 1 + + 13 files changed, 166 insertions(+), 120 deletions(-) + create mode 100644 migration/options.c + create mode 100644 migration/options.h + +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 746f07c4d2..43092aa634 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -32,6 +32,7 @@ + #include "qemu/error-report.h" + #include "migration/misc.h" + #include "migration/migration.h" ++#include "migration/options.h" + + #include "hw/virtio/virtio-bus.h" + #include "hw/virtio/virtio-access.h" +diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c +index fe73aa94b1..a6ffae0002 100644 +--- a/migration/block-dirty-bitmap.c ++++ b/migration/block-dirty-bitmap.c +@@ -79,6 +79,7 @@ + #include "qapi/qapi-visit-migration.h" + #include "qapi/clone-visitor.h" + #include "trace.h" ++#include "options.h" + + #define CHUNK_SIZE (1 << 10) + +diff --git a/migration/block.c b/migration/block.c +index b2497bbd32..4b167fa5cf 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -28,6 +28,7 @@ + #include "migration/vmstate.h" + #include "sysemu/block-backend.h" + #include "trace.h" ++#include "options.h" + + #define BLK_MIG_BLOCK_SIZE (1ULL << 20) + #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS) +diff --git a/migration/colo.c b/migration/colo.c +index 0716e64689..93b78c9270 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -36,6 +36,7 @@ + #include "sysemu/cpus.h" + #include "sysemu/runstate.h" + #include "net/filter.h" ++#include "options.h" + + static bool vmstate_loading; + static Notifier packets_compare_notifier; +diff --git a/migration/meson.build b/migration/meson.build +index 0d1bb9f96e..480ff6854a 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -22,6 +22,7 @@ softmmu_ss.add(files( + 'migration.c', + 'multifd.c', + 'multifd-zlib.c', ++ 'options.c', + 'postcopy-ram.c', + 'savevm.c', + 'socket.c', +diff --git a/migration/migration.c b/migration/migration.c +index 18058fb597..66ea55be06 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -63,6 +63,7 @@ + #include "sysemu/cpus.h" + #include "yank_functions.h" + #include "sysemu/qtest.h" ++#include "options.h" + + #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +@@ -357,15 +358,6 @@ static void migrate_generate_event(int new_state) + } + } + +-static bool migrate_late_block_activate(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; +-} +- + /* + * Send a message on the return channel back to the source + * of the migration. +@@ -2525,56 +2517,11 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) + qemu_sem_post(&s->pause_sem); + } + +-bool migrate_release_ram(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; +-} +- +-bool migrate_postcopy_ram(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; +-} +- + bool migrate_postcopy(void) + { + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + +-bool migrate_auto_converge(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; +-} +- +-bool migrate_zero_blocks(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; +-} +- +-bool migrate_postcopy_blocktime(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; +-} +- + bool migrate_use_compression(void) + { + MigrationState *s; +@@ -2620,33 +2567,6 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + +-bool migrate_dirty_bitmaps(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; +-} +- +-bool migrate_ignore_shared(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; +-} +- +-bool migrate_validate_uuid(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; +-} +- + bool migrate_use_events(void) + { + MigrationState *s; +@@ -2665,15 +2585,6 @@ bool migrate_use_multifd(void) + return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; + } + +-bool migrate_pause_before_switchover(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; +-} +- + int migrate_multifd_channels(void) + { + MigrationState *s; +@@ -2785,24 +2696,6 @@ bool migrate_use_block_incremental(void) + return s->parameters.block_incremental; + } + +-bool migrate_background_snapshot(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; +-} +- +-bool migrate_postcopy_preempt(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; +-} +- + /* migration thread support */ + /* + * Something bad happened to the RP stream, mark an error +diff --git a/migration/migration.h b/migration/migration.h +index 04e0860b4e..a25fed6ef0 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -449,16 +449,7 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + +-bool migrate_release_ram(void); +-bool migrate_postcopy_ram(void); +-bool migrate_zero_blocks(void); +-bool migrate_dirty_bitmaps(void); +-bool migrate_ignore_shared(void); +-bool migrate_validate_uuid(void); +- +-bool migrate_auto_converge(void); + bool migrate_use_multifd(void); +-bool migrate_pause_before_switchover(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); +@@ -487,9 +478,6 @@ int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); + bool migrate_use_events(void); +-bool migrate_postcopy_blocktime(void); +-bool migrate_background_snapshot(void); +-bool migrate_postcopy_preempt(void); + + /* Sending on the return path - generic and then for each message type */ + void migrate_send_rp_shut(MigrationIncomingState *mis, +diff --git a/migration/options.c b/migration/options.c +new file mode 100644 +index 0000000000..88a9a45913 +--- /dev/null ++++ b/migration/options.c +@@ -0,0 +1,124 @@ ++/* ++ * QEMU migration capabilities ++ * ++ * Copyright (c) 2012-2023 Red Hat Inc ++ * ++ * Authors: ++ * Orit Wasserman ++ * Juan Quintela ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "migration.h" ++#include "options.h" ++ ++bool migrate_auto_converge(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; ++} ++ ++bool migrate_background_snapshot(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; ++} ++ ++bool migrate_dirty_bitmaps(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; ++} ++ ++bool migrate_ignore_shared(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; ++} ++ ++bool migrate_late_block_activate(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; ++} ++ ++bool migrate_pause_before_switchover(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; ++} ++ ++bool migrate_postcopy_blocktime(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; ++} ++ ++bool migrate_postcopy_preempt(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; ++} ++ ++bool migrate_postcopy_ram(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; ++} ++ ++bool migrate_release_ram(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; ++} ++ ++bool migrate_validate_uuid(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; ++} ++ ++bool migrate_zero_blocks(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; ++} +diff --git a/migration/options.h b/migration/options.h +new file mode 100644 +index 0000000000..0dfa0af245 +--- /dev/null ++++ b/migration/options.h +@@ -0,0 +1,32 @@ ++/* ++ * QEMU migration capabilities ++ * ++ * Copyright (c) 2012-2023 Red Hat Inc ++ * ++ * Authors: ++ * Orit Wasserman ++ * Juan Quintela ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#ifndef QEMU_MIGRATION_OPTIONS_H ++#define QEMU_MIGRATION_OPTIONS_H ++ ++/* capabilities */ ++ ++bool migrate_auto_converge(void); ++bool migrate_background_snapshot(void); ++bool migrate_dirty_bitmaps(void); ++bool migrate_ignore_shared(void); ++bool migrate_late_block_activate(void); ++bool migrate_pause_before_switchover(void); ++bool migrate_postcopy_blocktime(void); ++bool migrate_postcopy_preempt(void); ++bool migrate_postcopy_ram(void); ++bool migrate_release_ram(void); ++bool migrate_validate_uuid(void); ++bool migrate_zero_blocks(void); ++ ++#endif +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index d7b48dd920..0711500036 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -37,6 +37,7 @@ + #include "tls.h" + #include "qemu/userfaultfd.h" + #include "qemu/mmap-alloc.h" ++#include "options.h" + + /* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes +diff --git a/migration/ram.c b/migration/ram.c +index 229714045a..912ccd89fa 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -57,6 +57,7 @@ + #include "qemu/iov.h" + #include "multifd.h" + #include "sysemu/runstate.h" ++#include "options.h" + + #include "hw/boards.h" /* for machine_dump_guest_core() */ + +diff --git a/migration/savevm.c b/migration/savevm.c +index 589ef926ab..ebcf571e37 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -67,6 +67,7 @@ + #include "qemu/yank.h" + #include "yank_functions.h" + #include "sysemu/qtest.h" ++#include "options.h" + + const unsigned int postcopy_ram_discard_version; + +diff --git a/migration/socket.c b/migration/socket.c +index e6fdf3c5e1..ebf9ac41af 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -27,6 +27,7 @@ + #include "io/net-listener.h" + #include "trace.h" + #include "postcopy-ram.h" ++#include "options.h" + + struct SocketOutgoingArgs { + SocketAddress *saddr; +-- +2.39.1 + diff --git a/kvm-migration-Make-dirty_sync_count-atomic.patch b/kvm-migration-Make-dirty_sync_count-atomic.patch new file mode 100644 index 0000000..ad1de7b --- /dev/null +++ b/kvm-migration-Make-dirty_sync_count-atomic.patch @@ -0,0 +1,105 @@ +From 886b511e0a225b1c4428c646534d7bcc65bd9e2a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 18:02:34 +0200 +Subject: [PATCH 14/56] migration: Make dirty_sync_count atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [13/50] ef3ae8cdd960e944ba9e73a53d54c9a5a55bb1ce (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 536b5a4e56ec67c958f46e7d46cbd5ac34e5a239) +Signed-off-by: Peter Xu +--- + migration/migration.c | 3 ++- + migration/ram.c | 13 +++++++------ + migration/ram.h | 2 +- + 3 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8f2847d298..8fca751050 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1148,7 +1148,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->normal = stat64_get(&ram_counters.normal); + info->ram->normal_bytes = info->ram->normal * page_size; + info->ram->mbps = s->mbps; +- info->ram->dirty_sync_count = ram_counters.dirty_sync_count; ++ info->ram->dirty_sync_count = ++ stat64_get(&ram_counters.dirty_sync_count); + info->ram->dirty_sync_missed_zero_copy = + stat64_get(&ram_counters.dirty_sync_missed_zero_copy); + info->ram->postcopy_requests = ram_counters.postcopy_requests; +diff --git a/migration/ram.c b/migration/ram.c +index b1722b6071..3c13136559 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -764,7 +764,7 @@ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) + /* We don't care if this fails to allocate a new cache page + * as long as it updated an old one */ + cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, +- ram_counters.dirty_sync_count); ++ stat64_get(&ram_counters.dirty_sync_count)); + } + + #define ENCODING_FLAG_XBZRLE 0x1 +@@ -790,13 +790,13 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, + int encoded_len = 0, bytes_xbzrle; + uint8_t *prev_cached_page; + QEMUFile *file = pss->pss_channel; ++ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); + +- if (!cache_is_cached(XBZRLE.cache, current_addr, +- ram_counters.dirty_sync_count)) { ++ if (!cache_is_cached(XBZRLE.cache, current_addr, generation)) { + xbzrle_counters.cache_miss++; + if (!rs->last_stage) { + if (cache_insert(XBZRLE.cache, current_addr, *current_data, +- ram_counters.dirty_sync_count) == -1) { ++ generation) == -1) { + return -1; + } else { + /* update *current_data when the page has been +@@ -1209,7 +1209,7 @@ static void migration_bitmap_sync(RAMState *rs) + RAMBlock *block; + int64_t end_time; + +- ram_counters.dirty_sync_count++; ++ stat64_add(&ram_counters.dirty_sync_count, 1); + + if (!rs->time_last_bitmap_sync) { + rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +@@ -1246,7 +1246,8 @@ static void migration_bitmap_sync(RAMState *rs) + rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } + if (migrate_use_events()) { +- qapi_event_send_migration_pass(ram_counters.dirty_sync_count); ++ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); ++ qapi_event_send_migration_pass(generation); + } + } + +diff --git a/migration/ram.h b/migration/ram.h +index bb52632424..8c0d07c43a 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -42,7 +42,7 @@ + */ + typedef struct { + int64_t dirty_pages_rate; +- int64_t dirty_sync_count; ++ Stat64 dirty_sync_count; + Stat64 dirty_sync_missed_zero_copy; + Stat64 downtime_bytes; + Stat64 duplicate; +-- +2.39.1 + diff --git a/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch b/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch new file mode 100644 index 0000000..b7b0f60 --- /dev/null +++ b/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch @@ -0,0 +1,92 @@ +From e9ff20d7f7e6c2354f3696e8bca265e535eeb801 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 17:33:56 +0200 +Subject: [PATCH 11/56] migration: Make dirty_sync_missed_zero_copy atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [10/50] 041230abb087db0e7ffae02b4f85772490b805a0 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 4291823694fd8507831d26e2558d9cd0030841f7) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/multifd.c | 2 +- + migration/ram.c | 5 ----- + migration/ram.h | 4 +--- + 4 files changed, 3 insertions(+), 10 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index ca68808b5c..645fb4b3c5 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1150,7 +1150,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; + info->ram->dirty_sync_missed_zero_copy = +- ram_counters.dirty_sync_missed_zero_copy; ++ stat64_get(&ram_counters.dirty_sync_missed_zero_copy); + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); +diff --git a/migration/multifd.c b/migration/multifd.c +index 1c992abf53..903df2117b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -576,7 +576,7 @@ static int multifd_zero_copy_flush(QIOChannel *c) + return -1; + } + if (ret == 1) { +- dirty_sync_missed_zero_copy(); ++ stat64_add(&ram_counters.dirty_sync_missed_zero_copy, 1); + } + + return ret; +diff --git a/migration/ram.c b/migration/ram.c +index 71320ed27a..93e0a48af4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -472,11 +472,6 @@ void ram_transferred_add(uint64_t bytes) + stat64_add(&ram_counters.transferred, bytes); + } + +-void dirty_sync_missed_zero_copy(void) +-{ +- ram_counters.dirty_sync_missed_zero_copy++; +-} +- + struct MigrationOps { + int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss); + }; +diff --git a/migration/ram.h b/migration/ram.h +index ed70391317..2170c55e67 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -43,7 +43,7 @@ + typedef struct { + int64_t dirty_pages_rate; + int64_t dirty_sync_count; +- uint64_t dirty_sync_missed_zero_copy; ++ Stat64 dirty_sync_missed_zero_copy; + uint64_t downtime_bytes; + Stat64 duplicate; + Stat64 multifd_bytes; +@@ -114,6 +114,4 @@ void ram_write_tracking_prepare(void); + int ram_write_tracking_start(void); + void ram_write_tracking_stop(void); + +-void dirty_sync_missed_zero_copy(void); +- + #endif +-- +2.39.1 + diff --git a/kvm-migration-Make-downtime_bytes-atomic.patch b/kvm-migration-Make-downtime_bytes-atomic.patch new file mode 100644 index 0000000..9b206bc --- /dev/null +++ b/kvm-migration-Make-downtime_bytes-atomic.patch @@ -0,0 +1,68 @@ +From 4c6af064277b5445b31db4a598e1c4402ba56452 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 17:38:11 +0200 +Subject: [PATCH 13/56] migration: Make downtime_bytes atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [12/50] ebfc16aae8bc4a8c1fec431780a062950e6f50c4 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 296a4ac2aa63038b6b702f2ee8f0f93ae26727ae) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 2 +- + migration/ram.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 3a68d93d69..8f2847d298 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1156,7 +1156,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); +- info->ram->downtime_bytes = ram_counters.downtime_bytes; ++ info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); + info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + + if (migrate_use_xbzrle()) { +diff --git a/migration/ram.c b/migration/ram.c +index 0b4693215e..b1722b6071 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -467,7 +467,7 @@ void ram_transferred_add(uint64_t bytes) + } else if (migration_in_postcopy()) { + stat64_add(&ram_counters.postcopy_bytes, bytes); + } else { +- ram_counters.downtime_bytes += bytes; ++ stat64_add(&ram_counters.downtime_bytes, bytes); + } + stat64_add(&ram_counters.transferred, bytes); + } +diff --git a/migration/ram.h b/migration/ram.h +index a766b895fa..bb52632424 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -44,7 +44,7 @@ typedef struct { + int64_t dirty_pages_rate; + int64_t dirty_sync_count; + Stat64 dirty_sync_missed_zero_copy; +- uint64_t downtime_bytes; ++ Stat64 downtime_bytes; + Stat64 duplicate; + Stat64 multifd_bytes; + Stat64 normal; +-- +2.39.1 + diff --git a/kvm-migration-Make-multifd_bytes-atomic.patch b/kvm-migration-Make-multifd_bytes-atomic.patch new file mode 100644 index 0000000..b315fdc --- /dev/null +++ b/kvm-migration-Make-multifd_bytes-atomic.patch @@ -0,0 +1,99 @@ +From bfcc4bc8f60b541d545f1ea27b1ff156d8092d33 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 23 Nov 2022 20:36:56 +0100 +Subject: [PATCH 10/56] migration: Make multifd_bytes atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [9/50] c2bc6b173770a0ea81c3f9d850c583c651647070 (peterx/qemu-kvm) + +In the spirit of: + +commit 394d323bc3451e4d07f13341cb8817fac8dfbadd +Author: Peter Xu +Date: Tue Oct 11 17:55:51 2022 -0400 + + migration: Use atomic ops properly for page accountings + +Reviewed-by: David Edmondson +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit cf671116facf4e29d91fce9c9ffb535385ffac81) +Signed-off-by: Peter Xu +--- + migration/migration.c | 4 ++-- + migration/multifd.c | 4 ++-- + migration/ram.h | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index a91704d35c..ca68808b5c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1153,7 +1153,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + ram_counters.dirty_sync_missed_zero_copy; + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; +- info->ram->multifd_bytes = ram_counters.multifd_bytes; ++ info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = ram_counters.precopy_bytes; + info->ram->downtime_bytes = ram_counters.downtime_bytes; +@@ -3780,7 +3780,7 @@ static MigThrError migration_detect_error(MigrationState *s) + static uint64_t migration_total_bytes(MigrationState *s) + { + return qemu_file_total_transferred(s->to_dst_file) + +- ram_counters.multifd_bytes; ++ stat64_get(&ram_counters.multifd_bytes); + } + + static void migration_calculate_complete(MigrationState *s) +diff --git a/migration/multifd.c b/migration/multifd.c +index 6ef3a27938..1c992abf53 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -432,9 +432,9 @@ static int multifd_send_pages(QEMUFile *f) + p->pages = pages; + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; + qemu_file_acct_rate_limit(f, transferred); +- ram_counters.multifd_bytes += transferred; + qemu_mutex_unlock(&p->mutex); + stat64_add(&ram_counters.transferred, transferred); ++ stat64_add(&ram_counters.multifd_bytes, transferred); + qemu_sem_post(&p->sem); + + return 1; +@@ -627,9 +627,9 @@ int multifd_send_sync_main(QEMUFile *f) + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; + qemu_file_acct_rate_limit(f, p->packet_len); +- ram_counters.multifd_bytes += p->packet_len; + qemu_mutex_unlock(&p->mutex); + stat64_add(&ram_counters.transferred, p->packet_len); ++ stat64_add(&ram_counters.multifd_bytes, p->packet_len); + qemu_sem_post(&p->sem); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +diff --git a/migration/ram.h b/migration/ram.h +index 7c026b5242..ed70391317 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -46,7 +46,7 @@ typedef struct { + uint64_t dirty_sync_missed_zero_copy; + uint64_t downtime_bytes; + Stat64 duplicate; +- uint64_t multifd_bytes; ++ Stat64 multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; + int64_t postcopy_requests; +-- +2.39.1 + diff --git a/kvm-migration-Make-postcopy_requests-atomic.patch b/kvm-migration-Make-postcopy_requests-atomic.patch new file mode 100644 index 0000000..894419a --- /dev/null +++ b/kvm-migration-Make-postcopy_requests-atomic.patch @@ -0,0 +1,69 @@ +From e6ff4536a5e5f5bbfda370ecb525d0e066c3ab1c Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 18:04:59 +0200 +Subject: [PATCH 15/56] migration: Make postcopy_requests atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [14/50] d15c6052b77e7ded7bf34c66caa11bf86b75f2e8 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 3c764f9b2bc3e5eb5ed93ab45c2de6d599fef00f) +Signed-off-by: Peter Xu +--- + migration/migration.c | 3 ++- + migration/ram.c | 2 +- + migration/ram.h | 2 +- + 3 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8fca751050..39501a0ed8 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1152,7 +1152,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + stat64_get(&ram_counters.dirty_sync_count); + info->ram->dirty_sync_missed_zero_copy = + stat64_get(&ram_counters.dirty_sync_missed_zero_copy); +- info->ram->postcopy_requests = ram_counters.postcopy_requests; ++ info->ram->postcopy_requests = ++ stat64_get(&ram_counters.postcopy_requests); + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; +diff --git a/migration/ram.c b/migration/ram.c +index 3c13136559..fe69ecaef4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2169,7 +2169,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) + RAMBlock *ramblock; + RAMState *rs = ram_state; + +- ram_counters.postcopy_requests++; ++ stat64_add(&ram_counters.postcopy_requests, 1); + RCU_READ_LOCK_GUARD(); + + if (!rbname) { +diff --git a/migration/ram.h b/migration/ram.h +index 8c0d07c43a..afa68521d7 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -49,7 +49,7 @@ typedef struct { + Stat64 multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; +- int64_t postcopy_requests; ++ Stat64 postcopy_requests; + Stat64 precopy_bytes; + int64_t remaining; + Stat64 transferred; +-- +2.39.1 + diff --git a/kvm-migration-Make-precopy_bytes-atomic.patch b/kvm-migration-Make-precopy_bytes-atomic.patch new file mode 100644 index 0000000..8e6c177 --- /dev/null +++ b/kvm-migration-Make-precopy_bytes-atomic.patch @@ -0,0 +1,68 @@ +From 7e4d4316855f7f6556364eb16828f925b61c80d4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 11 Apr 2023 17:36:48 +0200 +Subject: [PATCH 12/56] migration: Make precopy_bytes atomic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [11/50] 23bec49b4b8f4d23c2192b401416139e3ca13626 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit b013b5d1f32ef88457e66c7ce576f6475238f97f) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 2 +- + migration/ram.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 645fb4b3c5..3a68d93d69 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1155,7 +1155,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->page_size = page_size; + info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); + info->ram->pages_per_second = s->pages_per_second; +- info->ram->precopy_bytes = ram_counters.precopy_bytes; ++ info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); + info->ram->downtime_bytes = ram_counters.downtime_bytes; + info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + +diff --git a/migration/ram.c b/migration/ram.c +index 93e0a48af4..0b4693215e 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -463,7 +463,7 @@ RAMStats ram_counters; + void ram_transferred_add(uint64_t bytes) + { + if (runstate_is_running()) { +- ram_counters.precopy_bytes += bytes; ++ stat64_add(&ram_counters.precopy_bytes, bytes); + } else if (migration_in_postcopy()) { + stat64_add(&ram_counters.postcopy_bytes, bytes); + } else { +diff --git a/migration/ram.h b/migration/ram.h +index 2170c55e67..a766b895fa 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -50,7 +50,7 @@ typedef struct { + Stat64 normal; + Stat64 postcopy_bytes; + int64_t postcopy_requests; +- uint64_t precopy_bytes; ++ Stat64 precopy_bytes; + int64_t remaining; + Stat64 transferred; + } RAMStats; +-- +2.39.1 + diff --git a/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch b/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch new file mode 100644 index 0000000..0679e89 --- /dev/null +++ b/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch @@ -0,0 +1,270 @@ +From 5a87058eea6ee56f37fb454486c35baaf693d691 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 22 Feb 2023 15:56:45 +0100 +Subject: [PATCH 08/56] migration: Merge ram_counters and ram_atomic_counters +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [7/50] 90e395de66aa32b886cf151f7996a680190471f5 (peterx/qemu-kvm) + +Using MgrationStats as type for ram_counters mean that we didn't have +to re-declare each value in another struct. The need of atomic +counters have make us to create MigrationAtomicStats for this atomic +counters. + +Create RAMStats type which is a merge of MigrationStats and +MigrationAtomicStats removing unused members. + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu + +--- + +Fix typos found by David Edmondson + +(cherry picked from commit abce5fa16d126ed085ccf8a5b3fe61a1efa20994) +Signed-off-by: Peter Xu +--- + migration/migration.c | 8 ++++---- + migration/multifd.c | 4 ++-- + migration/ram.c | 39 ++++++++++++++++----------------------- + migration/ram.h | 28 +++++++++++++++------------- + 4 files changed, 37 insertions(+), 42 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 99f86bd6c2..a91704d35c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1140,12 +1140,12 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + size_t page_size = qemu_target_page_size(); + + info->ram = g_malloc0(sizeof(*info->ram)); +- info->ram->transferred = stat64_get(&ram_atomic_counters.transferred); ++ info->ram->transferred = stat64_get(&ram_counters.transferred); + info->ram->total = ram_bytes_total(); +- info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate); ++ info->ram->duplicate = stat64_get(&ram_counters.duplicate); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; +- info->ram->normal = stat64_get(&ram_atomic_counters.normal); ++ info->ram->normal = stat64_get(&ram_counters.normal); + info->ram->normal_bytes = info->ram->normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; +@@ -1157,7 +1157,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = ram_counters.precopy_bytes; + info->ram->downtime_bytes = ram_counters.downtime_bytes; +- info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes); ++ info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + + if (migrate_use_xbzrle()) { + info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); +diff --git a/migration/multifd.c b/migration/multifd.c +index cbc0dfe39b..01fab01a92 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -433,7 +433,7 @@ static int multifd_send_pages(QEMUFile *f) + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; + qemu_file_acct_rate_limit(f, transferred); + ram_counters.multifd_bytes += transferred; +- stat64_add(&ram_atomic_counters.transferred, transferred); ++ stat64_add(&ram_counters.transferred, transferred); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + +@@ -628,7 +628,7 @@ int multifd_send_sync_main(QEMUFile *f) + p->pending_job++; + qemu_file_acct_rate_limit(f, p->packet_len); + ram_counters.multifd_bytes += p->packet_len; +- stat64_add(&ram_atomic_counters.transferred, p->packet_len); ++ stat64_add(&ram_counters.transferred, p->packet_len); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } +diff --git a/migration/ram.c b/migration/ram.c +index 0e68099bf9..71320ed27a 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -458,25 +458,18 @@ uint64_t ram_bytes_remaining(void) + 0; + } + +-/* +- * NOTE: not all stats in ram_counters are used in reality. See comments +- * for struct MigrationAtomicStats. The ultimate result of ram migration +- * counters will be a merged version with both ram_counters and the atomic +- * fields in ram_atomic_counters. +- */ +-MigrationStats ram_counters; +-MigrationAtomicStats ram_atomic_counters; ++RAMStats ram_counters; + + void ram_transferred_add(uint64_t bytes) + { + if (runstate_is_running()) { + ram_counters.precopy_bytes += bytes; + } else if (migration_in_postcopy()) { +- stat64_add(&ram_atomic_counters.postcopy_bytes, bytes); ++ stat64_add(&ram_counters.postcopy_bytes, bytes); + } else { + ram_counters.downtime_bytes += bytes; + } +- stat64_add(&ram_atomic_counters.transferred, bytes); ++ stat64_add(&ram_counters.transferred, bytes); + } + + void dirty_sync_missed_zero_copy(void) +@@ -756,7 +749,7 @@ void mig_throttle_counter_reset(void) + + rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + rs->num_dirty_pages_period = 0; +- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); ++ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } + + /** +@@ -1130,8 +1123,8 @@ uint64_t ram_pagesize_summary(void) + + uint64_t ram_get_total_transferred_pages(void) + { +- return stat64_get(&ram_atomic_counters.normal) + +- stat64_get(&ram_atomic_counters.duplicate) + ++ return stat64_get(&ram_counters.normal) + ++ stat64_get(&ram_counters.duplicate) + + compression_counters.pages + xbzrle_counters.pages; + } + +@@ -1192,7 +1185,7 @@ static void migration_trigger_throttle(RAMState *rs) + MigrationState *s = migrate_get_current(); + uint64_t threshold = s->parameters.throttle_trigger_threshold; + uint64_t bytes_xfer_period = +- stat64_get(&ram_atomic_counters.transferred) - rs->bytes_xfer_prev; ++ stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; + uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; + uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; + +@@ -1255,7 +1248,7 @@ static void migration_bitmap_sync(RAMState *rs) + /* reset period counters */ + rs->time_last_bitmap_sync = end_time; + rs->num_dirty_pages_period = 0; +- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); ++ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } + if (migrate_use_events()) { + qapi_event_send_migration_pass(ram_counters.dirty_sync_count); +@@ -1331,7 +1324,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, + int len = save_zero_page_to_file(pss, f, block, offset); + + if (len) { +- stat64_add(&ram_atomic_counters.duplicate, 1); ++ stat64_add(&ram_counters.duplicate, 1); + ram_transferred_add(len); + return 1; + } +@@ -1368,9 +1361,9 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + } + + if (bytes_xmit > 0) { +- stat64_add(&ram_atomic_counters.normal, 1); ++ stat64_add(&ram_counters.normal, 1); + } else if (bytes_xmit == 0) { +- stat64_add(&ram_atomic_counters.duplicate, 1); ++ stat64_add(&ram_counters.duplicate, 1); + } + + return true; +@@ -1402,7 +1395,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, + qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + } + ram_transferred_add(TARGET_PAGE_SIZE); +- stat64_add(&ram_atomic_counters.normal, 1); ++ stat64_add(&ram_counters.normal, 1); + return 1; + } + +@@ -1458,7 +1451,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, + if (multifd_queue_page(file, block, offset) < 0) { + return -1; + } +- stat64_add(&ram_atomic_counters.normal, 1); ++ stat64_add(&ram_counters.normal, 1); + + return 1; + } +@@ -1497,7 +1490,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) + ram_transferred_add(bytes_xmit); + + if (param->zero_page) { +- stat64_add(&ram_atomic_counters.duplicate, 1); ++ stat64_add(&ram_counters.duplicate, 1); + return; + } + +@@ -2632,9 +2625,9 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + uint64_t pages = size / TARGET_PAGE_SIZE; + + if (zero) { +- stat64_add(&ram_atomic_counters.duplicate, pages); ++ stat64_add(&ram_counters.duplicate, pages); + } else { +- stat64_add(&ram_atomic_counters.normal, pages); ++ stat64_add(&ram_counters.normal, pages); + ram_transferred_add(size); + qemu_file_credit_transfer(f, size); + } +diff --git a/migration/ram.h b/migration/ram.h +index 81cbb0947c..7c026b5242 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -35,25 +35,27 @@ + #include "qemu/stats64.h" + + /* +- * These are the migration statistic counters that need to be updated using +- * atomic ops (can be accessed by more than one thread). Here since we +- * cannot modify MigrationStats directly to use Stat64 as it was defined in +- * the QAPI scheme, we define an internal structure to hold them, and we +- * propagate the real values when QMP queries happen. +- * +- * IOW, the corresponding fields within ram_counters on these specific +- * fields will be always zero and not being used at all; they're just +- * placeholders to make it QAPI-compatible. ++ * These are the ram migration statistic counters. It is loosely ++ * based on MigrationStats. We change to Stat64 any counter that ++ * needs to be updated using atomic ops (can be accessed by more than ++ * one thread). + */ + typedef struct { +- Stat64 transferred; ++ int64_t dirty_pages_rate; ++ int64_t dirty_sync_count; ++ uint64_t dirty_sync_missed_zero_copy; ++ uint64_t downtime_bytes; + Stat64 duplicate; ++ uint64_t multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; +-} MigrationAtomicStats; ++ int64_t postcopy_requests; ++ uint64_t precopy_bytes; ++ int64_t remaining; ++ Stat64 transferred; ++} RAMStats; + +-extern MigrationAtomicStats ram_atomic_counters; +-extern MigrationStats ram_counters; ++extern RAMStats ram_counters; + extern XBZRLECacheStats xbzrle_counters; + extern CompressionStats compression_counters; + +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_announce_params-to-option.c.patch b/kvm-migration-Move-migrate_announce_params-to-option.c.patch new file mode 100644 index 0000000..24dcb16 --- /dev/null +++ b/kvm-migration-Move-migrate_announce_params-to-option.c.patch @@ -0,0 +1,90 @@ +From 1f5232d611ecaaf61bcac151e7d90b8b452ac161 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 01:17:23 +0100 +Subject: [PATCH 43/56] migration: Move migrate_announce_params() to option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [42/50] 541be7adc7f81c269058485aef5b14e787b2efe6 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas + +--- + +Fix extra whitespace (fabiano) + +(cherry picked from commit 2682c4eea72c621dfd0fb0151cbd758e81d1bdff) +Signed-off-by: Peter Xu +--- + migration/migration.c | 14 -------------- + migration/options.c | 17 +++++++++++++++++ + 2 files changed, 17 insertions(+), 14 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 724e841eb9..f27ce30be2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -954,20 +954,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + return params; + } + +-AnnounceParameters *migrate_announce_params(void) +-{ +- static AnnounceParameters ap; +- +- MigrationState *s = migrate_get_current(); +- +- ap.initial = s->parameters.announce_initial; +- ap.max = s->parameters.announce_max; +- ap.rounds = s->parameters.announce_rounds; +- ap.step = s->parameters.announce_step; +- +- return ≈ +-} +- + /* + * Return true if we're already in the middle of a migration + * (i.e. any of the active or setup states) +diff --git a/migration/options.c b/migration/options.c +index 2cb04fbbd1..418aafac64 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -16,6 +16,7 @@ + #include "qapi/qapi-commands-migration.h" + #include "qapi/qmp/qerror.h" + #include "sysemu/runstate.h" ++#include "migration/misc.h" + #include "migration.h" + #include "ram.h" + #include "options.h" +@@ -589,3 +590,19 @@ uint64_t migrate_xbzrle_cache_size(void) + + return s->parameters.xbzrle_cache_size; + } ++ ++/* parameters helpers */ ++ ++AnnounceParameters *migrate_announce_params(void) ++{ ++ static AnnounceParameters ap; ++ ++ MigrationState *s = migrate_get_current(); ++ ++ ap.initial = s->parameters.announce_initial; ++ ap.max = s->parameters.announce_max; ++ ap.rounds = s->parameters.announce_rounds; ++ ap.step = s->parameters.announce_step; ++ ++ return ≈ ++} +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_cap_set-to-options.c.patch b/kvm-migration-Move-migrate_cap_set-to-options.c.patch new file mode 100644 index 0000000..0e33c4c --- /dev/null +++ b/kvm-migration-Move-migrate_cap_set-to-options.c.patch @@ -0,0 +1,110 @@ +From 9c4f8d869f5bbdd07381f6baad2ed755b07d03f4 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:25:44 +0100 +Subject: [PATCH 36/56] migration: Move migrate_cap_set() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [35/50] d0cd6b8e9cf0534a56795d94c3da18622fa10ad7 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit f80196b772ddeeb07d3d80d5c8382cb5d1063fa2) +Signed-off-by: Peter Xu +--- + migration/migration.c | 20 -------------------- + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + 3 files changed, 22 insertions(+), 20 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 369cd91796..880a51210e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1666,26 +1666,6 @@ void migrate_set_state(int *state, int old_state, int new_state) + } + } + +-static bool migrate_cap_set(int cap, bool value, Error **errp) +-{ +- MigrationState *s = migrate_get_current(); +- bool new_caps[MIGRATION_CAPABILITY__MAX]; +- +- if (migration_is_running(s->state)) { +- error_setg(errp, QERR_MIGRATION_ACTIVE); +- return false; +- } +- +- memcpy(new_caps, s->capabilities, sizeof(new_caps)); +- new_caps[cap] = value; +- +- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { +- return false; +- } +- s->capabilities[cap] = value; +- return true; +-} +- + static void migrate_set_block_incremental(MigrationState *s, bool value) + { + s->parameters.block_incremental = value; +diff --git a/migration/options.c b/migration/options.c +index 4cbe77e35a..f3b2d6e482 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" ++#include "qapi/qmp/qerror.h" + #include "sysemu/runstate.h" + #include "migration.h" + #include "ram.h" +@@ -392,6 +393,26 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + return true; + } + ++bool migrate_cap_set(int cap, bool value, Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; ++ ++ if (migration_is_running(s->state)) { ++ error_setg(errp, QERR_MIGRATION_ACTIVE); ++ return false; ++ } ++ ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ new_caps[cap] = value; ++ ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { ++ return false; ++ } ++ s->capabilities[cap] = value; ++ return true; ++} ++ + MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) + { + MigrationCapabilityStatusList *head = NULL, **tail = &head; +diff --git a/migration/options.h b/migration/options.h +index e779f14161..5979e4ff90 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -41,5 +41,6 @@ bool migrate_zero_copy_send(void); + /* capabilities helpers */ + + bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); ++bool migrate_cap_set(int cap, bool value, Error **errp); + + #endif +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_caps_check-to-options.c.patch b/kvm-migration-Move-migrate_caps_check-to-options.c.patch new file mode 100644 index 0000000..0d6fa08 --- /dev/null +++ b/kvm-migration-Move-migrate_caps_check-to-options.c.patch @@ -0,0 +1,458 @@ +From 3af7c7aaf7407ec14c19e54d52a2229ce4dbb7c5 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:05:53 +0100 +Subject: [PATCH 33/56] migration: Move migrate_caps_check() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [32/50] 12999471063d97fffb2b04c6dcb80083b902f963 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 77608706459bd197e25ac1ef54591b9f8a0b46f8) +Signed-off-by: Peter Xu +--- + migration/migration.c | 190 ----------------------------------------- + migration/options.c | 192 ++++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 4 + + 3 files changed, 196 insertions(+), 190 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f7facecd66..d9e30ca918 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -136,39 +136,6 @@ enum mig_rp_message_type { + MIG_RP_MSG_MAX + }; + +-/* Migration capabilities set */ +-struct MigrateCapsSet { +- int size; /* Capability set size */ +- MigrationCapability caps[]; /* Variadic array of capabilities */ +-}; +-typedef struct MigrateCapsSet MigrateCapsSet; +- +-/* Define and initialize MigrateCapsSet */ +-#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ +- MigrateCapsSet _name = { \ +- .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ +- .caps = { __VA_ARGS__ } \ +- } +- +-/* Background-snapshot compatibility check list */ +-static const +-INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, +- MIGRATION_CAPABILITY_POSTCOPY_RAM, +- MIGRATION_CAPABILITY_DIRTY_BITMAPS, +- MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, +- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, +- MIGRATION_CAPABILITY_RETURN_PATH, +- MIGRATION_CAPABILITY_MULTIFD, +- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, +- MIGRATION_CAPABILITY_AUTO_CONVERGE, +- MIGRATION_CAPABILITY_RELEASE_RAM, +- MIGRATION_CAPABILITY_RDMA_PIN_ALL, +- MIGRATION_CAPABILITY_COMPRESS, +- MIGRATION_CAPABILITY_XBZRLE, +- MIGRATION_CAPABILITY_X_COLO, +- MIGRATION_CAPABILITY_VALIDATE_UUID, +- MIGRATION_CAPABILITY_ZERO_COPY_SEND); +- + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +@@ -1235,163 +1202,6 @@ static void fill_source_migration_info(MigrationInfo *info) + info->status = state; + } + +-typedef enum WriteTrackingSupport { +- WT_SUPPORT_UNKNOWN = 0, +- WT_SUPPORT_ABSENT, +- WT_SUPPORT_AVAILABLE, +- WT_SUPPORT_COMPATIBLE +-} WriteTrackingSupport; +- +-static +-WriteTrackingSupport migrate_query_write_tracking(void) +-{ +- /* Check if kernel supports required UFFD features */ +- if (!ram_write_tracking_available()) { +- return WT_SUPPORT_ABSENT; +- } +- /* +- * Check if current memory configuration is +- * compatible with required UFFD features. +- */ +- if (!ram_write_tracking_compatible()) { +- return WT_SUPPORT_AVAILABLE; +- } +- +- return WT_SUPPORT_COMPATIBLE; +-} +- +-/** +- * @migration_caps_check - check capability compatibility +- * +- * @old_caps: old capability list +- * @new_caps: new capability list +- * @errp: set *errp if the check failed, with reason +- * +- * Returns true if check passed, otherwise false. +- */ +-static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) +-{ +- MigrationIncomingState *mis = migration_incoming_get_current(); +- +-#ifndef CONFIG_LIVE_BLOCK_MIGRATION +- if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { +- error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " +- "block migration"); +- error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); +- return false; +- } +-#endif +- +-#ifndef CONFIG_REPLICATION +- if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { +- error_setg(errp, "QEMU compiled without replication module" +- " can't enable COLO"); +- error_append_hint(errp, "Please enable replication before COLO.\n"); +- return false; +- } +-#endif +- +- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { +- /* This check is reasonably expensive, so only when it's being +- * set the first time, also it's only the destination that needs +- * special support. +- */ +- if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && +- runstate_check(RUN_STATE_INMIGRATE) && +- !postcopy_ram_supported_by_host(mis)) { +- /* postcopy_ram_supported_by_host will have emitted a more +- * detailed message +- */ +- error_setg(errp, "Postcopy is not supported"); +- return false; +- } +- +- if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { +- error_setg(errp, "Postcopy is not compatible with ignore-shared"); +- return false; +- } +- } +- +- if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { +- WriteTrackingSupport wt_support; +- int idx; +- /* +- * Check if 'background-snapshot' capability is supported by +- * host kernel and compatible with guest memory configuration. +- */ +- wt_support = migrate_query_write_tracking(); +- if (wt_support < WT_SUPPORT_AVAILABLE) { +- error_setg(errp, "Background-snapshot is not supported by host kernel"); +- return false; +- } +- if (wt_support < WT_SUPPORT_COMPATIBLE) { +- error_setg(errp, "Background-snapshot is not compatible " +- "with guest memory configuration"); +- return false; +- } +- +- /* +- * Check if there are any migration capabilities +- * incompatible with 'background-snapshot'. +- */ +- for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { +- int incomp_cap = check_caps_background_snapshot.caps[idx]; +- if (new_caps[incomp_cap]) { +- error_setg(errp, +- "Background-snapshot is not compatible with %s", +- MigrationCapability_str(incomp_cap)); +- return false; +- } +- } +- } +- +-#ifdef CONFIG_LINUX +- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && +- (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || +- new_caps[MIGRATION_CAPABILITY_COMPRESS] || +- new_caps[MIGRATION_CAPABILITY_XBZRLE] || +- migrate_multifd_compression() || +- migrate_use_tls())) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#else +- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { +- error_setg(errp, +- "Zero copy currently only available on Linux"); +- return false; +- } +-#endif +- +- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { +- if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { +- error_setg(errp, "Postcopy preempt requires postcopy-ram"); +- return false; +- } +- +- /* +- * Preempt mode requires urgent pages to be sent in separate +- * channel, OTOH compression logic will disorder all pages into +- * different compression channels, which is not compatible with the +- * preempt assumptions on channel assignments. +- */ +- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { +- error_setg(errp, "Postcopy preempt not compatible with compress"); +- return false; +- } +- } +- +- if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { +- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { +- error_setg(errp, "Multifd is not compatible with compress"); +- return false; +- } +- } +- +- return true; +-} +- + static void fill_destination_migration_info(MigrationInfo *info) + { + MigrationIncomingState *mis = migration_incoming_get_current(); +diff --git a/migration/options.c b/migration/options.c +index 9c9b8e5863..367c930f46 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -12,7 +12,10 @@ + */ + + #include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "sysemu/runstate.h" + #include "migration.h" ++#include "ram.h" + #include "options.h" + + bool migrate_auto_converge(void) +@@ -198,3 +201,192 @@ bool migrate_zero_copy_send(void) + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } ++typedef enum WriteTrackingSupport { ++ WT_SUPPORT_UNKNOWN = 0, ++ WT_SUPPORT_ABSENT, ++ WT_SUPPORT_AVAILABLE, ++ WT_SUPPORT_COMPATIBLE ++} WriteTrackingSupport; ++ ++static ++WriteTrackingSupport migrate_query_write_tracking(void) ++{ ++ /* Check if kernel supports required UFFD features */ ++ if (!ram_write_tracking_available()) { ++ return WT_SUPPORT_ABSENT; ++ } ++ /* ++ * Check if current memory configuration is ++ * compatible with required UFFD features. ++ */ ++ if (!ram_write_tracking_compatible()) { ++ return WT_SUPPORT_AVAILABLE; ++ } ++ ++ return WT_SUPPORT_COMPATIBLE; ++} ++ ++/* Migration capabilities set */ ++struct MigrateCapsSet { ++ int size; /* Capability set size */ ++ MigrationCapability caps[]; /* Variadic array of capabilities */ ++}; ++typedef struct MigrateCapsSet MigrateCapsSet; ++ ++/* Define and initialize MigrateCapsSet */ ++#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ ++ MigrateCapsSet _name = { \ ++ .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ ++ .caps = { __VA_ARGS__ } \ ++ } ++ ++/* Background-snapshot compatibility check list */ ++static const ++INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, ++ MIGRATION_CAPABILITY_POSTCOPY_RAM, ++ MIGRATION_CAPABILITY_DIRTY_BITMAPS, ++ MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, ++ MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, ++ MIGRATION_CAPABILITY_RETURN_PATH, ++ MIGRATION_CAPABILITY_MULTIFD, ++ MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, ++ MIGRATION_CAPABILITY_AUTO_CONVERGE, ++ MIGRATION_CAPABILITY_RELEASE_RAM, ++ MIGRATION_CAPABILITY_RDMA_PIN_ALL, ++ MIGRATION_CAPABILITY_COMPRESS, ++ MIGRATION_CAPABILITY_XBZRLE, ++ MIGRATION_CAPABILITY_X_COLO, ++ MIGRATION_CAPABILITY_VALIDATE_UUID, ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND); ++ ++/** ++ * @migration_caps_check - check capability compatibility ++ * ++ * @old_caps: old capability list ++ * @new_caps: new capability list ++ * @errp: set *errp if the check failed, with reason ++ * ++ * Returns true if check passed, otherwise false. ++ */ ++bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) ++{ ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ ++#ifndef CONFIG_LIVE_BLOCK_MIGRATION ++ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { ++ error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " ++ "block migration"); ++ error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); ++ return false; ++ } ++#endif ++ ++#ifndef CONFIG_REPLICATION ++ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { ++ error_setg(errp, "QEMU compiled without replication module" ++ " can't enable COLO"); ++ error_append_hint(errp, "Please enable replication before COLO.\n"); ++ return false; ++ } ++#endif ++ ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ /* This check is reasonably expensive, so only when it's being ++ * set the first time, also it's only the destination that needs ++ * special support. ++ */ ++ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && ++ runstate_check(RUN_STATE_INMIGRATE) && ++ !postcopy_ram_supported_by_host(mis)) { ++ /* postcopy_ram_supported_by_host will have emitted a more ++ * detailed message ++ */ ++ error_setg(errp, "Postcopy is not supported"); ++ return false; ++ } ++ ++ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { ++ error_setg(errp, "Postcopy is not compatible with ignore-shared"); ++ return false; ++ } ++ } ++ ++ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { ++ WriteTrackingSupport wt_support; ++ int idx; ++ /* ++ * Check if 'background-snapshot' capability is supported by ++ * host kernel and compatible with guest memory configuration. ++ */ ++ wt_support = migrate_query_write_tracking(); ++ if (wt_support < WT_SUPPORT_AVAILABLE) { ++ error_setg(errp, "Background-snapshot is not supported by host kernel"); ++ return false; ++ } ++ if (wt_support < WT_SUPPORT_COMPATIBLE) { ++ error_setg(errp, "Background-snapshot is not compatible " ++ "with guest memory configuration"); ++ return false; ++ } ++ ++ /* ++ * Check if there are any migration capabilities ++ * incompatible with 'background-snapshot'. ++ */ ++ for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { ++ int incomp_cap = check_caps_background_snapshot.caps[idx]; ++ if (new_caps[incomp_cap]) { ++ error_setg(errp, ++ "Background-snapshot is not compatible with %s", ++ MigrationCapability_str(incomp_cap)); ++ return false; ++ } ++ } ++ } ++ ++#ifdef CONFIG_LINUX ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || ++ new_caps[MIGRATION_CAPABILITY_COMPRESS] || ++ new_caps[MIGRATION_CAPABILITY_XBZRLE] || ++ migrate_multifd_compression() || ++ migrate_use_tls())) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#else ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ error_setg(errp, ++ "Zero copy currently only available on Linux"); ++ return false; ++ } ++#endif ++ ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { ++ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ error_setg(errp, "Postcopy preempt requires postcopy-ram"); ++ return false; ++ } ++ ++ /* ++ * Preempt mode requires urgent pages to be sent in separate ++ * channel, OTOH compression logic will disorder all pages into ++ * different compression channels, which is not compatible with the ++ * preempt assumptions on channel assignments. ++ */ ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { ++ error_setg(errp, "Postcopy preempt not compatible with compress"); ++ return false; ++ } ++ } ++ ++ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { ++ error_setg(errp, "Multifd is not compatible with compress"); ++ return false; ++ } ++ } ++ ++ return true; ++} +diff --git a/migration/options.h b/migration/options.h +index 25c002b37a..e779f14161 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -38,4 +38,8 @@ bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); + bool migrate_zero_copy_send(void); + ++/* capabilities helpers */ ++ ++bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); ++ + #endif +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch b/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch new file mode 100644 index 0000000..47c6f83 --- /dev/null +++ b/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch @@ -0,0 +1,136 @@ +From 13da9060fa2dfc666cd6f4b9bc85b7cee0fef45e Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:00:16 +0100 +Subject: [PATCH 24/56] migration: Move migrate_colo_enabled() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [23/50] 4809b1091edee38bd222af41b6313133705785c7 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_colo() to be +consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 5e8046445575dc5879e63c5d07af893d174813d0) +Signed-off-by: Peter Xu +--- + migration/migration.c | 16 +++++----------- + migration/migration.h | 1 - + migration/options.c | 6 ++++++ + migration/options.h | 1 + + 4 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 66ea55be06..59ee0ef82b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2411,7 +2411,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + } + + if (blk || blk_inc) { +- if (migrate_colo_enabled()) { ++ if (migrate_colo()) { + error_setg(errp, "No disk migration is required in COLO mode"); + return false; + } +@@ -3304,7 +3304,7 @@ static void migration_completion(MigrationState *s) + * have done so in order to remember to reactivate + * them if migration fails or is cancelled. + */ +- s->block_inactive = !migrate_colo_enabled(); ++ s->block_inactive = !migrate_colo(); + qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); + ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, + s->block_inactive); +@@ -3357,7 +3357,7 @@ static void migration_completion(MigrationState *s) + goto fail; + } + +- if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { ++ if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { + /* COLO does not support postcopy */ + migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_COLO); +@@ -3435,12 +3435,6 @@ fail: + MIGRATION_STATUS_FAILED); + } + +-bool migrate_colo_enabled(void) +-{ +- MigrationState *s = migrate_get_current(); +- return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; +-} +- + typedef enum MigThrError { + /* No error detected */ + MIG_THR_ERR_NONE = 0, +@@ -3771,7 +3765,7 @@ static void migration_iteration_finish(MigrationState *s) + runstate_set(RUN_STATE_POSTMIGRATE); + break; + case MIGRATION_STATUS_COLO: +- if (!migrate_colo_enabled()) { ++ if (!migrate_colo()) { + error_report("%s: critical error: calling COLO code without " + "COLO enabled", __func__); + } +@@ -3967,7 +3961,7 @@ static void *migration_thread(void *opaque) + qemu_savevm_send_postcopy_advise(s->to_dst_file); + } + +- if (migrate_colo_enabled()) { ++ if (migrate_colo()) { + /* Notify migration destination that we enable COLO */ + qemu_savevm_send_colo_enable(s->to_dst_file); + } +diff --git a/migration/migration.h b/migration/migration.h +index a25fed6ef0..42f0c68b6f 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -463,7 +463,6 @@ bool migrate_use_zero_copy_send(void); + int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); +-bool migrate_colo_enabled(void); + + bool migrate_use_block(void); + bool migrate_use_block_incremental(void); +diff --git a/migration/options.c b/migration/options.c +index 88a9a45913..bd33c5da0a 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -33,6 +33,12 @@ bool migrate_background_snapshot(void) + return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + ++bool migrate_colo(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; ++} ++ + bool migrate_dirty_bitmaps(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 0dfa0af245..2a0ee61ff8 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -18,6 +18,7 @@ + + bool migrate_auto_converge(void); + bool migrate_background_snapshot(void); ++bool migrate_colo(void); + bool migrate_dirty_bitmaps(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_postcopy-to-options.c.patch b/kvm-migration-Move-migrate_postcopy-to-options.c.patch new file mode 100644 index 0000000..892ec9e --- /dev/null +++ b/kvm-migration-Move-migrate_postcopy-to-options.c.patch @@ -0,0 +1,98 @@ +From 710fe195a3c13ffe96795a7a2b550c00319997ea Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:44:20 +0100 +Subject: [PATCH 47/56] migration: Move migrate_postcopy() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [46/50] a4f3455b3524a331f44b481bf7a79318aef5abaa (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit f774fde5d4e97cbfc64dab6622c2c53c5fe5c9fe) +Signed-off-by: Peter Xu +--- + migration/migration.c | 5 ----- + migration/migration.h | 2 -- + migration/options.c | 8 ++++++++ + migration/options.h | 9 +++++++++ + 4 files changed, 17 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f27ce30be2..46a5ea4d42 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2245,11 +2245,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) + qemu_sem_post(&s->pause_sem); + } + +-bool migrate_postcopy(void) +-{ +- return migrate_postcopy_ram() || migrate_dirty_bitmaps(); +-} +- + int migrate_use_tls(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 3ae938b19c..dcf906868d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); + bool migration_in_postcopy(void); + MigrationState *migrate_get_current(void); + +-bool migrate_postcopy(void); +- + int migrate_use_tls(void); + + uint64_t ram_get_total_transferred_pages(void); +diff --git a/migration/options.c b/migration/options.c +index 615534c151..8bd2d949ae 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -204,6 +204,14 @@ bool migrate_zero_copy_send(void) + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } ++ ++/* pseudo capabilities */ ++ ++bool migrate_postcopy(void) ++{ ++ return migrate_postcopy_ram() || migrate_dirty_bitmaps(); ++} ++ + typedef enum WriteTrackingSupport { + WT_SUPPORT_UNKNOWN = 0, + WT_SUPPORT_ABSENT, +diff --git a/migration/options.h b/migration/options.h +index 99f6bbd7a1..093bc907a1 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -38,6 +38,15 @@ bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); + bool migrate_zero_copy_send(void); + ++/* ++ * pseudo capabilities ++ * ++ * These are functions that are used in a similar way to capabilities ++ * check, but they are not a capability. ++ */ ++ ++bool migrate_postcopy(void); ++ + /* capabilities helpers */ + + bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_use_block-to-options.c.patch b/kvm-migration-Move-migrate_use_block-to-options.c.patch new file mode 100644 index 0000000..f7cb338 --- /dev/null +++ b/kvm-migration-Move-migrate_use_block-to-options.c.patch @@ -0,0 +1,134 @@ +From 276877a71778a5cef0dc5bc843e2679f0fdabb77 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:23:57 +0100 +Subject: [PATCH 30/56] migration: Move migrate_use_block() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [29/50] fcaeb0e07cf828f3cd0d115515b30d913525a0a2 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_block() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 9d4b1e5f22a838285ebeb8f0eb7cc8df1161998f) +Signed-off-by: Peter Xu +--- + migration/block.c | 2 +- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/savevm.c | 2 +- + 6 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/migration/block.c b/migration/block.c +index 4b167fa5cf..f0977217cf 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -1001,7 +1001,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) + + static bool block_is_active(void *opaque) + { +- return migrate_use_block(); ++ return migrate_block(); + } + + static SaveVMHandlers savevm_block_handlers = { +diff --git a/migration/migration.c b/migration/migration.c +index a4ede4294e..96f82bd165 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2415,7 +2415,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + error_setg(errp, "No disk migration is required in COLO mode"); + return false; + } +- if (migrate_use_block() || migrate_use_block_incremental()) { ++ if (migrate_block() || migrate_use_block_incremental()) { + error_setg(errp, "Command options are incompatible with " + "current migration capabilities"); + return false; +@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) + return s->parameters.max_postcopy_bandwidth; + } + +-bool migrate_use_block(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; +-} +- + bool migrate_use_return_path(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index e2bb5b1e2f..d4b68b08a5 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -457,7 +457,6 @@ int migrate_multifd_zstd_level(void); + int migrate_use_tls(void); + uint64_t migrate_xbzrle_cache_size(void); + +-bool migrate_use_block(void); + bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); + bool migrate_use_return_path(void); +diff --git a/migration/options.c b/migration/options.c +index 25264c500e..fe1eadeed6 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -33,6 +33,15 @@ bool migrate_background_snapshot(void) + return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + ++bool migrate_block(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; ++} ++ + bool migrate_colo(void) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.h b/migration/options.h +index 8f76a88329..e985a5233e 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -18,6 +18,7 @@ + + bool migrate_auto_converge(void); + bool migrate_background_snapshot(void); ++bool migrate_block(void); + bool migrate_colo(void); + bool migrate_compress(void); + bool migrate_dirty_bitmaps(void); +diff --git a/migration/savevm.c b/migration/savevm.c +index ebcf571e37..9671211339 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1612,7 +1612,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + return -EINVAL; + } + +- if (migrate_use_block()) { ++ if (migrate_block()) { + error_setg(errp, "Block migration and snapshots are incompatible"); + return -EINVAL; + } +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch b/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch new file mode 100644 index 0000000..3f20289 --- /dev/null +++ b/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch @@ -0,0 +1,121 @@ +From def66503f4ccb97cf8029f88efe8e955edc8d32f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 00:49:47 +0100 +Subject: [PATCH 39/56] migration: Move migrate_use_block_incremental() to + option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [38/50] 961fda6464df3384fbcee88c726b56a33c26e14e (peterx/qemu-kvm) + +To be consistent with every other parameter, rename to +migrate_block_incremental(). + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 6f8be7080a1f79bf3832cf798fba1697c409c597) +Signed-off-by: Peter Xu +--- + migration/block.c | 2 +- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + 5 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/migration/block.c b/migration/block.c +index f0977217cf..6d532ac7a2 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -417,7 +417,7 @@ static int init_blk_migration(QEMUFile *f) + bmds->bulk_completed = 0; + bmds->total_sectors = sectors; + bmds->completed_sectors = 0; +- bmds->shared_base = migrate_use_block_incremental(); ++ bmds->shared_base = migrate_block_incremental(); + + assert(i < num_bs); + bmds_bs[i].bmds = bmds; +diff --git a/migration/migration.c b/migration/migration.c +index 78bca9a93f..724e841eb9 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2157,7 +2157,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + error_setg(errp, "No disk migration is required in COLO mode"); + return false; + } +- if (migrate_block() || migrate_use_block_incremental()) { ++ if (migrate_block() || migrate_block_incremental()) { + error_setg(errp, "Command options are incompatible with " + "current migration capabilities"); + return false; +@@ -2273,15 +2273,6 @@ int migrate_use_tls(void) + return s->parameters.tls_creds && *s->parameters.tls_creds; + } + +-bool migrate_use_block_incremental(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.block_incremental; +-} +- + /* migration thread support */ + /* + * Something bad happened to the RP stream, mark an error +diff --git a/migration/migration.h b/migration/migration.h +index 8451e5f2fe..86051af132 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -451,7 +451,6 @@ bool migrate_postcopy(void); + + int migrate_use_tls(void); + +-bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); + + uint64_t ram_get_total_transferred_pages(void); +diff --git a/migration/options.c b/migration/options.c +index 8d15be858c..2b6d88b4b9 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -463,6 +463,15 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + + /* parameters */ + ++bool migrate_block_incremental(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.block_incremental; ++} ++ + int migrate_compress_level(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index b24ee92283..96d5a8e6e4 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -45,6 +45,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); + + /* parameters */ + ++bool migrate_block_incremental(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_use_compression-to-options.c.patch b/kvm-migration-Move-migrate_use_compression-to-options.c.patch new file mode 100644 index 0000000..8b74183 --- /dev/null +++ b/kvm-migration-Move-migrate_use_compression-to-options.c.patch @@ -0,0 +1,183 @@ +From ae183bfc9d7b001d3c4929556b095a76203bc08d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:03:48 +0100 +Subject: [PATCH 25/56] migration: Move migrate_use_compression() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [24/50] 126b865f51bd4a1ae3a46411fdcd59033bfc5376 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_compress() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit a7a94d14358dd7b445e20c2f26218ff987747642) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 16 ++++++++-------- + 5 files changed, 19 insertions(+), 19 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 59ee0ef82b..c6e32555a8 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1133,7 +1133,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->xbzrle_cache->overflow = xbzrle_counters.overflow; + } + +- if (migrate_use_compression()) { ++ if (migrate_compress()) { + info->compression = g_malloc0(sizeof(*info->compression)); + info->compression->pages = compression_counters.pages; + info->compression->busy = compression_counters.busy; +@@ -2522,15 +2522,6 @@ bool migrate_postcopy(void) + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + +-bool migrate_use_compression(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; +-} +- + int migrate_compress_level(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 42f0c68b6f..77aa91c840 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -471,7 +471,6 @@ bool migrate_use_return_path(void); + + uint64_t ram_get_total_transferred_pages(void); + +-bool migrate_use_compression(void); + int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); +diff --git a/migration/options.c b/migration/options.c +index bd33c5da0a..fa7a13d3dc 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -39,6 +39,15 @@ bool migrate_colo(void) + return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; + } + ++bool migrate_compress(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; ++} ++ + bool migrate_dirty_bitmaps(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index 2a0ee61ff8..da2193fd94 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -19,6 +19,7 @@ + bool migrate_auto_converge(void); + bool migrate_background_snapshot(void); + bool migrate_colo(void); ++bool migrate_compress(void); + bool migrate_dirty_bitmaps(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); +diff --git a/migration/ram.c b/migration/ram.c +index 912ccd89fa..d050d0c5fd 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -586,7 +586,7 @@ static void compress_threads_save_cleanup(void) + { + int i, thread_count; + +- if (!migrate_use_compression() || !comp_param) { ++ if (!migrate_compress() || !comp_param) { + return; + } + +@@ -625,7 +625,7 @@ static int compress_threads_save_setup(void) + { + int i, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return 0; + } + thread_count = migrate_compress_threads(); +@@ -1155,7 +1155,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) + rs->xbzrle_bytes_prev = xbzrle_counters.bytes; + } + +- if (migrate_use_compression()) { ++ if (migrate_compress()) { + compression_counters.busy_rate = (double)(compression_counters.busy - + rs->compress_thread_busy_prev) / page_count; + rs->compress_thread_busy_prev = compression_counters.busy; +@@ -2270,7 +2270,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) + + static bool save_page_use_compression(RAMState *rs) + { +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return false; + } + +@@ -3734,7 +3734,7 @@ static int wait_for_decompress_done(void) + { + int idx, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return 0; + } + +@@ -3753,7 +3753,7 @@ static void compress_threads_load_cleanup(void) + { + int i, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return; + } + thread_count = migrate_decompress_threads(); +@@ -3794,7 +3794,7 @@ static int compress_threads_load_setup(QEMUFile *f) + { + int i, thread_count; + +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + return 0; + } + +@@ -4260,7 +4260,7 @@ static int ram_load_precopy(QEMUFile *f) + int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; + /* ADVISE is earlier, it shows the source has the postcopy capability on */ + bool postcopy_advised = migration_incoming_postcopy_advised(); +- if (!migrate_use_compression()) { ++ if (!migrate_compress()) { + invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; + } + +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_use_events-to-options.c.patch b/kvm-migration-Move-migrate_use_events-to-options.c.patch new file mode 100644 index 0000000..41e05c3 --- /dev/null +++ b/kvm-migration-Move-migrate_use_events-to-options.c.patch @@ -0,0 +1,120 @@ +From 940f1eb4347c72edb3e1abc02c8d7e7c95753dcf Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:08:09 +0100 +Subject: [PATCH 26/56] migration: Move migrate_use_events() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [25/50] b3acd949af2a0fae18061d360e4f51dc12d32c6c (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_events() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit b890902c9c025b87d02e718eec3090fd3525ab18) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + 5 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c6e32555a8..032cd5c050 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -353,7 +353,7 @@ void migration_incoming_state_destroy(void) + + static void migrate_generate_event(int new_state) + { +- if (migrate_use_events()) { ++ if (migrate_events()) { + qapi_event_send_migration(new_state); + } + } +@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + +-bool migrate_use_events(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; +-} +- + bool migrate_use_multifd(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 77aa91c840..bd06520c19 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -475,7 +475,6 @@ int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); +-bool migrate_use_events(void); + + /* Sending on the return path - generic and then for each message type */ + void migrate_send_rp_shut(MigrationIncomingState *mis, +diff --git a/migration/options.c b/migration/options.c +index fa7a13d3dc..d2219ee0e4 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -57,6 +57,15 @@ bool migrate_dirty_bitmaps(void) + return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; + } + ++bool migrate_events(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; ++} ++ + bool migrate_ignore_shared(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index da2193fd94..b998024eba 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -21,6 +21,7 @@ bool migrate_background_snapshot(void); + bool migrate_colo(void); + bool migrate_compress(void); + bool migrate_dirty_bitmaps(void); ++bool migrate_events(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); + bool migrate_pause_before_switchover(void); +diff --git a/migration/ram.c b/migration/ram.c +index d050d0c5fd..ee454a3849 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1246,7 +1246,7 @@ static void migration_bitmap_sync(RAMState *rs) + rs->num_dirty_pages_period = 0; + rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); + } +- if (migrate_use_events()) { ++ if (migrate_events()) { + uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); + qapi_event_send_migration_pass(generation); + } +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_use_multifd-to-options.c.patch b/kvm-migration-Move-migrate_use_multifd-to-options.c.patch new file mode 100644 index 0000000..97d6597 --- /dev/null +++ b/kvm-migration-Move-migrate_use_multifd-to-options.c.patch @@ -0,0 +1,247 @@ +From afd8fb766af2be5cff97753b026847b91b09a30e Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:10:29 +0100 +Subject: [PATCH 27/56] migration: Move migrate_use_multifd() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [26/50] f2d72eae9cc80b2402ef613e809b40aa296d2e4c (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_multifd() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 51b07548f7c31793adc178c7460c5f4369733c61) +Signed-off-by: Peter Xu +--- + migration/migration.c | 19 +++++-------------- + migration/migration.h | 1 - + migration/multifd.c | 16 ++++++++-------- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 2 +- + migration/socket.c | 2 +- + 7 files changed, 25 insertions(+), 25 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 032cd5c050..e1d7f25786 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -186,7 +186,7 @@ static void migrate_fd_cancel(MigrationState *s); + + static bool migration_needs_multiple_sockets(void) + { +- return migrate_use_multifd() || migrate_postcopy_preempt(); ++ return migrate_multifd() || migrate_postcopy_preempt(); + } + + static bool uri_supports_multi_channels(const char *uri) +@@ -732,7 +732,7 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp) + static bool migration_should_start_incoming(bool main_channel) + { + /* Multifd doesn't start unless all channels are established */ +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + return migration_has_all_channels(); + } + +@@ -759,7 +759,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + uint32_t channel_magic = 0; + int ret = 0; + +- if (migrate_use_multifd() && !migrate_postcopy_ram() && ++ if (migrate_multifd() && !migrate_postcopy_ram() && + qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { + /* + * With multiple channels, it is possible that we receive channels +@@ -798,7 +798,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + } else { + /* Multiple connections */ + assert(migration_needs_multiple_sockets()); +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + multifd_recv_new_channel(ioc, &local_err); + } else { + assert(migrate_postcopy_preempt()); +@@ -834,7 +834,7 @@ bool migration_has_all_channels(void) + return false; + } + +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + return multifd_recv_all_channels_created(); + } + +@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + +-bool migrate_use_multifd(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; +-} +- + int migrate_multifd_channels(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index bd06520c19..49c0e13f41 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -449,7 +449,6 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + +-bool migrate_use_multifd(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 903df2117b..6807328189 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -516,7 +516,7 @@ void multifd_save_cleanup(void) + { + int i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return; + } + multifd_send_terminate_threads(NULL); +@@ -587,7 +587,7 @@ int multifd_send_sync_main(QEMUFile *f) + int i; + bool flush_zero_copy; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return 0; + } + if (multifd_send_state->pages->num) { +@@ -911,7 +911,7 @@ int multifd_save_setup(Error **errp) + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return 0; + } + +@@ -1016,7 +1016,7 @@ static void multifd_recv_terminate_threads(Error *err) + + void multifd_load_shutdown(void) + { +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + multifd_recv_terminate_threads(NULL); + } + } +@@ -1025,7 +1025,7 @@ void multifd_load_cleanup(void) + { + int i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return; + } + multifd_recv_terminate_threads(NULL); +@@ -1072,7 +1072,7 @@ void multifd_recv_sync_main(void) + { + int i; + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return; + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -1170,7 +1170,7 @@ int multifd_load_setup(Error **errp) + * Return successfully if multiFD recv state is already initialised + * or multiFD is not enabled. + */ +- if (multifd_recv_state || !migrate_use_multifd()) { ++ if (multifd_recv_state || !migrate_multifd()) { + return 0; + } + +@@ -1216,7 +1216,7 @@ bool multifd_recv_all_channels_created(void) + { + int thread_count = migrate_multifd_channels(); + +- if (!migrate_use_multifd()) { ++ if (!migrate_multifd()) { + return true; + } + +diff --git a/migration/options.c b/migration/options.c +index d2219ee0e4..58673fc101 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -84,6 +84,15 @@ bool migrate_late_block_activate(void) + return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; + } + ++bool migrate_multifd(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; ++} ++ + bool migrate_pause_before_switchover(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index b998024eba..d07269ee38 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -24,6 +24,7 @@ bool migrate_dirty_bitmaps(void); + bool migrate_events(void); + bool migrate_ignore_shared(void); + bool migrate_late_block_activate(void); ++bool migrate_multifd(void); + bool migrate_pause_before_switchover(void); + bool migrate_postcopy_blocktime(void); + bool migrate_postcopy_preempt(void); +diff --git a/migration/ram.c b/migration/ram.c +index ee454a3849..859dd7b63f 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2362,7 +2362,7 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + * if host page size == guest page size the dest guest during run may + * still see partially copied pages which is data corruption. + */ +- if (migrate_use_multifd() && !migration_in_postcopy()) { ++ if (migrate_multifd() && !migration_in_postcopy()) { + return ram_save_multifd_page(pss->pss_channel, block, offset); + } + +diff --git a/migration/socket.c b/migration/socket.c +index ebf9ac41af..f4835a256a 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -183,7 +183,7 @@ socket_start_incoming_migration_internal(SocketAddress *saddr, + + qio_net_listener_set_name(listener, "migration-socket-listener"); + +- if (migrate_use_multifd()) { ++ if (migrate_multifd()) { + num = migrate_multifd_channels(); + } else if (migrate_postcopy_preempt()) { + num = RAM_CHANNEL_MAX; +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_use_return-to-options.c.patch b/kvm-migration-Move-migrate_use_return-to-options.c.patch new file mode 100644 index 0000000..b250d40 --- /dev/null +++ b/kvm-migration-Move-migrate_use_return-to-options.c.patch @@ -0,0 +1,138 @@ +From 145b630767dbc7020ddf39b20075f4691f71321a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:25:47 +0100 +Subject: [PATCH 31/56] migration: Move migrate_use_return() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [30/50] 5cc150188bcc61b69ea0844253597594ab18fc13 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_return_path() +to be consistent with all other capabilities. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 38ad1110e368bf91453c0abbd657224d57b65d47) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/rdma.c | 6 +++--- + 5 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 96f82bd165..f7facecd66 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) + return s->parameters.max_postcopy_bandwidth; + } + +-bool migrate_use_return_path(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; +-} +- + bool migrate_use_block_incremental(void) + { + MigrationState *s; +@@ -4175,7 +4166,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + * precopy, only if user specified "return-path" capability would + * QEMU uses the return path. + */ +- if (migrate_postcopy_ram() || migrate_use_return_path()) { ++ if (migrate_postcopy_ram() || migrate_return_path()) { + if (open_return_path_on_source(s, !resume)) { + error_report("Unable to open return-path for postcopy"); + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); +diff --git a/migration/migration.h b/migration/migration.h +index d4b68b08a5..24184622a8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -459,7 +459,6 @@ uint64_t migrate_xbzrle_cache_size(void); + + bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); +-bool migrate_use_return_path(void); + + uint64_t ram_get_total_transferred_pages(void); + +diff --git a/migration/options.c b/migration/options.c +index fe1eadeed6..2003e413da 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -147,6 +147,15 @@ bool migrate_release_ram(void) + return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + ++bool migrate_return_path(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; ++} ++ + bool migrate_validate_uuid(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index e985a5233e..316efd1063 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -31,6 +31,7 @@ bool migrate_postcopy_blocktime(void); + bool migrate_postcopy_preempt(void); + bool migrate_postcopy_ram(void); + bool migrate_release_ram(void); ++bool migrate_return_path(void); + bool migrate_validate_uuid(void); + bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); +diff --git a/migration/rdma.c b/migration/rdma.c +index f35f021963..bf55e2f163 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -3373,7 +3373,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) + * initialize the RDMAContext for return path for postcopy after first + * connection request reached. + */ +- if ((migrate_postcopy() || migrate_use_return_path()) ++ if ((migrate_postcopy() || migrate_return_path()) + && !rdma->is_return_path) { + rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL); + if (rdma_return_path == NULL) { +@@ -3456,7 +3456,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) + } + + /* Accept the second connection request for return path */ +- if ((migrate_postcopy() || migrate_use_return_path()) ++ if ((migrate_postcopy() || migrate_return_path()) + && !rdma->is_return_path) { + qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, + NULL, +@@ -4193,7 +4193,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + /* RDMA postcopy need a separate queue pair for return path */ +- if (migrate_postcopy() || migrate_use_return_path()) { ++ if (migrate_postcopy() || migrate_return_path()) { + rdma_return_path = qemu_rdma_data_init(host_port, errp); + + if (rdma_return_path == NULL) { +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_use_tls-to-options.c.patch b/kvm-migration-Move-migrate_use_tls-to-options.c.patch new file mode 100644 index 0000000..84734af --- /dev/null +++ b/kvm-migration-Move-migrate_use_tls-to-options.c.patch @@ -0,0 +1,134 @@ +From 2e2df63892e191e91216b8253171162f69b93387 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:41:23 +0100 +Subject: [PATCH 49/56] migration: Move migrate_use_tls() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [48/50] 314431b0f5e92d2211e58a8161f32d7b67d69e38 (peterx/qemu-kvm) + +Once there, rename it to migrate_tls() and make it return bool for +consistency. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy + +--- + +Fix typos found by fabiano + +(cherry picked from commit 10d4703be5d884bbbb6ecafe0e8bb270ad6ea937) +Signed-off-by: Peter Xu +--- + migration/migration.c | 9 --------- + migration/migration.h | 2 -- + migration/options.c | 11 ++++++++++- + migration/options.h | 1 + + migration/tls.c | 3 ++- + 5 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c2e109329d..22ef83c619 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2177,15 +2177,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) + qemu_sem_post(&s->pause_sem); + } + +-int migrate_use_tls(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.tls_creds && *s->parameters.tls_creds; +-} +- + /* migration thread support */ + /* + * Something bad happened to the RP stream, mark an error +diff --git a/migration/migration.h b/migration/migration.h +index dcf906868d..2b71df8617 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); + bool migration_in_postcopy(void); + MigrationState *migrate_get_current(void); + +-int migrate_use_tls(void); +- + uint64_t ram_get_total_transferred_pages(void); + + /* Sending on the return path - generic and then for each message type */ +diff --git a/migration/options.c b/migration/options.c +index 8e8753d9be..d4c0714683 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -214,6 +214,15 @@ bool migrate_postcopy(void) + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + ++bool migrate_tls(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.tls_creds && *s->parameters.tls_creds; ++} ++ + typedef enum WriteTrackingSupport { + WT_SUPPORT_UNKNOWN = 0, + WT_SUPPORT_ABSENT, +@@ -363,7 +372,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + new_caps[MIGRATION_CAPABILITY_COMPRESS] || + new_caps[MIGRATION_CAPABILITY_XBZRLE] || + migrate_multifd_compression() || +- migrate_use_tls())) { ++ migrate_tls())) { + error_setg(errp, + "Zero copy only available for non-compressed non-TLS multifd migration"); + return false; +diff --git a/migration/options.h b/migration/options.h +index 1b78fa9f3d..13318a16c7 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -46,6 +46,7 @@ bool migrate_zero_copy_send(void); + */ + + bool migrate_postcopy(void); ++bool migrate_tls(void); + + /* capabilities helpers */ + +diff --git a/migration/tls.c b/migration/tls.c +index 4d2166a209..acd38e0b62 100644 +--- a/migration/tls.c ++++ b/migration/tls.c +@@ -22,6 +22,7 @@ + #include "channel.h" + #include "migration.h" + #include "tls.h" ++#include "options.h" + #include "crypto/tlscreds.h" + #include "qemu/error-report.h" + #include "qapi/error.h" +@@ -165,7 +166,7 @@ void migration_tls_channel_connect(MigrationState *s, + + bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc) + { +- if (!migrate_use_tls()) { ++ if (!migrate_tls()) { + return false; + } + +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch b/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch new file mode 100644 index 0000000..e3a8bab --- /dev/null +++ b/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch @@ -0,0 +1,156 @@ +From 2184f7dae0df5fa52deba2dc884e09c6bdbc7b5f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:20:13 +0100 +Subject: [PATCH 29/56] migration: Move migrate_use_xbzrle() to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [28/50] fc8bee0f691a96e6bd0b41f2511abe507b81fea5 (peterx/qemu-kvm) + +Once that we are there, we rename the function to migrate_xbzrle() +to be consistent with all other capabilities. +We change the type to return bool also for consistency. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 87dca0c9bb63014ef73ad82f7aedea1cb5a822e7) +Signed-off-by: Peter Xu +--- + migration/migration.c | 11 +---------- + migration/migration.h | 1 - + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/ram.c | 10 +++++----- + 5 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 1d63718e88..a4ede4294e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1122,7 +1122,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); + info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); + +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); + info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); + info->xbzrle_cache->bytes = xbzrle_counters.bytes; +@@ -2604,15 +2604,6 @@ int migrate_use_tls(void) + return s->parameters.tls_creds && *s->parameters.tls_creds; + } + +-int migrate_use_xbzrle(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; +-} +- + uint64_t migrate_xbzrle_cache_size(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index c939f82d53..e2bb5b1e2f 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -455,7 +455,6 @@ int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + + int migrate_use_tls(void); +-int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + + bool migrate_use_block(void); +diff --git a/migration/options.c b/migration/options.c +index f357c99996..25264c500e 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -147,6 +147,15 @@ bool migrate_validate_uuid(void) + return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; + } + ++bool migrate_xbzrle(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; ++} ++ + bool migrate_zero_blocks(void) + { + MigrationState *s; +diff --git a/migration/options.h b/migration/options.h +index ad22f4d24a..8f76a88329 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -31,6 +31,7 @@ bool migrate_postcopy_preempt(void); + bool migrate_postcopy_ram(void); + bool migrate_release_ram(void); + bool migrate_validate_uuid(void); ++bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); + bool migrate_zero_copy_send(void); + +diff --git a/migration/ram.c b/migration/ram.c +index 859dd7b63f..4576d0d849 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -156,14 +156,14 @@ static struct { + + static void XBZRLE_cache_lock(void) + { +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + qemu_mutex_lock(&XBZRLE.lock); + } + } + + static void XBZRLE_cache_unlock(void) + { +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + qemu_mutex_unlock(&XBZRLE.lock); + } + } +@@ -1137,7 +1137,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) + return; + } + +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + double encoded_size, unencoded_size; + + xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - +@@ -1626,7 +1626,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) + /* Flag that we've looped */ + pss->complete_round = true; + /* After the first round, enable XBZRLE. */ +- if (migrate_use_xbzrle()) { ++ if (migrate_xbzrle()) { + rs->xbzrle_enabled = true; + } + } +@@ -2979,7 +2979,7 @@ static int xbzrle_init(void) + { + Error *local_err = NULL; + +- if (!migrate_use_xbzrle()) { ++ if (!migrate_xbzrle()) { + return 0; + } + +-- +2.39.1 + diff --git a/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch b/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch new file mode 100644 index 0000000..90031df --- /dev/null +++ b/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch @@ -0,0 +1,167 @@ +From 6eb252887378d639ad2e90dd426a1812d4b72ca6 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 22:17:14 +0100 +Subject: [PATCH 28/56] migration: Move migrate_use_zero_copy_send() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [27/50] 5a4c2b5e75c62e0f60f9c4121a2756bd140a60d9 (peterx/qemu-kvm) + +Once that we are there, we rename the function to +migrate_zero_copy_send() to be consistent with all other capabilities. + +We can remove the CONFIG_LINUX guard. We already check that we can't +setup this capability in migrate_caps_check(). + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit b4bc342c766640e0cb8a0b72f71e0ee5545fb790) +Signed-off-by: Peter Xu +--- + migration/migration.c | 13 +------------ + migration/migration.h | 5 ----- + migration/multifd.c | 8 ++++---- + migration/options.c | 9 +++++++++ + migration/options.h | 1 + + migration/socket.c | 2 +- + 6 files changed, 16 insertions(+), 22 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index e1d7f25786..1d63718e88 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1609,7 +1609,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + } + + #ifdef CONFIG_LINUX +- if (migrate_use_zero_copy_send() && ++ if (migrate_zero_copy_send() && + ((params->has_multifd_compression && params->multifd_compression) || + (params->tls_creds && *params->tls_creds))) { + error_setg(errp, +@@ -2595,17 +2595,6 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + +-#ifdef CONFIG_LINUX +-bool migrate_use_zero_copy_send(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; +-} +-#endif +- + int migrate_use_tls(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 49c0e13f41..c939f82d53 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -454,11 +454,6 @@ MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + +-#ifdef CONFIG_LINUX +-bool migrate_use_zero_copy_send(void); +-#else +-#define migrate_use_zero_copy_send() (false) +-#endif + int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 6807328189..cce3ad6988 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -25,7 +25,7 @@ + #include "trace.h" + #include "multifd.h" + #include "threadinfo.h" +- ++#include "options.h" + #include "qemu/yank.h" + #include "io/channel-socket.h" + #include "yank_functions.h" +@@ -608,7 +608,7 @@ int multifd_send_sync_main(QEMUFile *f) + * all the dirty bitmaps. + */ + +- flush_zero_copy = migrate_use_zero_copy_send(); ++ flush_zero_copy = migrate_zero_copy_send(); + + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -653,7 +653,7 @@ static void *multifd_send_thread(void *opaque) + MigrationThread *thread = NULL; + Error *local_err = NULL; + int ret = 0; +- bool use_zero_copy_send = migrate_use_zero_copy_send(); ++ bool use_zero_copy_send = migrate_zero_copy_send(); + + thread = MigrationThreadAdd(p->name, qemu_get_thread_id()); + +@@ -945,7 +945,7 @@ int multifd_save_setup(Error **errp) + p->page_size = qemu_target_page_size(); + p->page_count = page_count; + +- if (migrate_use_zero_copy_send()) { ++ if (migrate_zero_copy_send()) { + p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; + } else { + p->write_flags = 0; +diff --git a/migration/options.c b/migration/options.c +index 58673fc101..f357c99996 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -155,3 +155,12 @@ bool migrate_zero_blocks(void) + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; + } ++ ++bool migrate_zero_copy_send(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; ++} +diff --git a/migration/options.h b/migration/options.h +index d07269ee38..ad22f4d24a 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -32,5 +32,6 @@ bool migrate_postcopy_ram(void); + bool migrate_release_ram(void); + bool migrate_validate_uuid(void); + bool migrate_zero_blocks(void); ++bool migrate_zero_copy_send(void); + + #endif +diff --git a/migration/socket.c b/migration/socket.c +index f4835a256a..1b6f5baefb 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -98,7 +98,7 @@ static void socket_outgoing_migration(QIOTask *task, + + trace_migration_socket_outgoing_connected(data->hostname); + +- if (migrate_use_zero_copy_send() && ++ if (migrate_zero_copy_send() && + !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + error_setg(&err, "Zero copy send feature not detected in host kernel"); + } +-- +2.39.1 + diff --git a/kvm-migration-Move-parameters-functions-to-option.c.patch b/kvm-migration-Move-parameters-functions-to-option.c.patch new file mode 100644 index 0000000..ad4510b --- /dev/null +++ b/kvm-migration-Move-parameters-functions-to-option.c.patch @@ -0,0 +1,317 @@ +From d5ea4c82c44a59ac70313eb1eac77999ca5fde36 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 00:39:03 +0100 +Subject: [PATCH 37/56] migration: Move parameters functions to option.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [36/50] 2540921028025504723e762c0a1d2f295ac5a6d1 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 1dfc4b9e19bcf1ad41a1be9ac82db35b9647c3c1) +Signed-off-by: Peter Xu +--- + migration/migration.c | 91 --------------------------------------- + migration/migration.h | 11 ----- + migration/multifd-zlib.c | 1 + + migration/multifd-zstd.c | 1 + + migration/options.c | 93 ++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 13 ++++++ + 6 files changed, 108 insertions(+), 102 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 880a51210e..7f2e770deb 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2264,79 +2264,6 @@ bool migrate_postcopy(void) + return migrate_postcopy_ram() || migrate_dirty_bitmaps(); + } + +-int migrate_compress_level(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.compress_level; +-} +- +-int migrate_compress_threads(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.compress_threads; +-} +- +-int migrate_compress_wait_thread(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.compress_wait_thread; +-} +- +-int migrate_decompress_threads(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.decompress_threads; +-} +- +-int migrate_multifd_channels(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.multifd_channels; +-} +- +-MultiFDCompression migrate_multifd_compression(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); +- return s->parameters.multifd_compression; +-} +- +-int migrate_multifd_zlib_level(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.multifd_zlib_level; +-} +- +-int migrate_multifd_zstd_level(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.multifd_zstd_level; +-} +- + int migrate_use_tls(void) + { + MigrationState *s; +@@ -2346,24 +2273,6 @@ int migrate_use_tls(void) + return s->parameters.tls_creds && *s->parameters.tls_creds; + } + +-uint64_t migrate_xbzrle_cache_size(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.xbzrle_cache_size; +-} +- +-static int64_t migrate_max_postcopy_bandwidth(void) +-{ +- MigrationState *s; +- +- s = migrate_get_current(); +- +- return s->parameters.max_postcopy_bandwidth; +-} +- + bool migrate_use_block_incremental(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 24184622a8..8451e5f2fe 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -449,24 +449,13 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + +-int migrate_multifd_channels(void); +-MultiFDCompression migrate_multifd_compression(void); +-int migrate_multifd_zlib_level(void); +-int migrate_multifd_zstd_level(void); +- + int migrate_use_tls(void); +-uint64_t migrate_xbzrle_cache_size(void); + + bool migrate_use_block_incremental(void); + int migrate_max_cpu_throttle(void); + + uint64_t ram_get_total_transferred_pages(void); + +-int migrate_compress_level(void); +-int migrate_compress_threads(void); +-int migrate_compress_wait_thread(void); +-int migrate_decompress_threads(void); +- + /* Sending on the return path - generic and then for each message type */ + void migrate_send_rp_shut(MigrationIncomingState *mis, + uint32_t value); +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 37770248e1..81701250ad 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -18,6 +18,7 @@ + #include "qapi/error.h" + #include "migration.h" + #include "trace.h" ++#include "options.h" + #include "multifd.h" + + struct zlib_data { +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index f4a8e1ed1f..d1d29e76cc 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -18,6 +18,7 @@ + #include "qapi/error.h" + #include "migration.h" + #include "trace.h" ++#include "options.h" + #include "multifd.h" + + struct zstd_data { +diff --git a/migration/options.c b/migration/options.c +index f3b2d6e482..8d15be858c 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -460,3 +460,96 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + s->capabilities[cap->value->capability] = cap->value->state; + } + } ++ ++/* parameters */ ++ ++int migrate_compress_level(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_level; ++} ++ ++int migrate_compress_threads(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_threads; ++} ++ ++int migrate_compress_wait_thread(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_wait_thread; ++} ++ ++int migrate_decompress_threads(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.decompress_threads; ++} ++ ++int64_t migrate_max_postcopy_bandwidth(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.max_postcopy_bandwidth; ++} ++ ++int migrate_multifd_channels(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.multifd_channels; ++} ++ ++MultiFDCompression migrate_multifd_compression(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); ++ return s->parameters.multifd_compression; ++} ++ ++int migrate_multifd_zlib_level(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.multifd_zlib_level; ++} ++ ++int migrate_multifd_zstd_level(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.multifd_zstd_level; ++} ++ ++uint64_t migrate_xbzrle_cache_size(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.xbzrle_cache_size; ++} +diff --git a/migration/options.h b/migration/options.h +index 5979e4ff90..b24ee92283 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -43,4 +43,17 @@ bool migrate_zero_copy_send(void); + bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); + bool migrate_cap_set(int cap, bool value, Error **errp); + ++/* parameters */ ++ ++int migrate_compress_level(void); ++int migrate_compress_threads(void); ++int migrate_compress_wait_thread(void); ++int migrate_decompress_threads(void); ++int64_t migrate_max_postcopy_bandwidth(void); ++int migrate_multifd_channels(void); ++MultiFDCompression migrate_multifd_compression(void); ++int migrate_multifd_zlib_level(void); ++int migrate_multifd_zstd_level(void); ++uint64_t migrate_xbzrle_cache_size(void); ++ + #endif +-- +2.39.1 + diff --git a/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch b/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch new file mode 100644 index 0000000..10f185b --- /dev/null +++ b/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch @@ -0,0 +1,100 @@ +From d967ec22cdb20e0a846f050a2bc7bd4caa87940d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:18:02 +0100 +Subject: [PATCH 35/56] migration: Move qmp_migrate_set_capabilities() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [34/50] 16b62ca7e06c58d71389c449dc19c11939dd0882 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 45c1de13f09b1fd4ea26f54e6da12aae52f34cb8) +Signed-off-by: Peter Xu +--- + migration/migration.c | 26 -------------------------- + migration/options.c | 26 ++++++++++++++++++++++++++ + 2 files changed, 26 insertions(+), 26 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 3dc8ee4875..369cd91796 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1222,32 +1222,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) + return info; + } + +-void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, +- Error **errp) +-{ +- MigrationState *s = migrate_get_current(); +- MigrationCapabilityStatusList *cap; +- bool new_caps[MIGRATION_CAPABILITY__MAX]; +- +- if (migration_is_running(s->state)) { +- error_setg(errp, QERR_MIGRATION_ACTIVE); +- return; +- } +- +- memcpy(new_caps, s->capabilities, sizeof(new_caps)); +- for (cap = params; cap; cap = cap->next) { +- new_caps[cap->value->capability] = cap->value->state; +- } +- +- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { +- return; +- } +- +- for (cap = params; cap; cap = cap->next) { +- s->capabilities[cap->value->capability] = cap->value->state; +- } +-} +- + /* + * Check whether the parameters are valid. Error will be put into errp + * (if provided). Return true if valid, otherwise false. +diff --git a/migration/options.c b/migration/options.c +index ff621bdeb3..4cbe77e35a 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -413,3 +413,29 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) + + return head; + } ++ ++void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, ++ Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ MigrationCapabilityStatusList *cap; ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; ++ ++ if (migration_is_running(s->state)) { ++ error_setg(errp, QERR_MIGRATION_ACTIVE); ++ return; ++ } ++ ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ for (cap = params; cap; cap = cap->next) { ++ new_caps[cap->value->capability] = cap->value->state; ++ } ++ ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { ++ return; ++ } ++ ++ for (cap = params; cap; cap = cap->next) { ++ s->capabilities[cap->value->capability] = cap->value->state; ++ } ++} +-- +2.39.1 + diff --git a/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch b/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch new file mode 100644 index 0000000..3685a33 --- /dev/null +++ b/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch @@ -0,0 +1,943 @@ +From 944bf4759d1279c342ddd29c47d47c9670b64625 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:13:16 +0100 +Subject: [PATCH 50/56] migration: Move qmp_migrate_set_parameters() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [49/50] b55f7afe868e117d4212f1518b9a37514cc99b33 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 09d6c9658474e8573c5ada58dca8b20fe47dd99e) +Signed-off-by: Peter Xu +--- + migration/migration.c | 420 ------------------------------------------ + migration/options.c | 418 +++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 11 ++ + 3 files changed, 429 insertions(+), 420 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 22ef83c619..08f87f2b0e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -67,19 +67,10 @@ + + #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +-/* Amount of time to allocate to each "chunk" of bandwidth-throttled +- * data. */ +-#define BUFFER_DELAY 100 +-#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) +- + /* Time in milliseconds we are allowed to stop the source, + * for sending the last part */ + #define DEFAULT_MIGRATE_SET_DOWNTIME 300 + +-/* Maximum migrate downtime set to 2000 seconds */ +-#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 +-#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) +- + /* Default compression thread count */ + #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 + /* Default decompression thread count, usually decompression is at +@@ -1140,417 +1131,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) + return info; + } + +-/* +- * Check whether the parameters are valid. Error will be put into errp +- * (if provided). Return true if valid, otherwise false. +- */ +-static bool migrate_params_check(MigrationParameters *params, Error **errp) +-{ +- if (params->has_compress_level && +- (params->compress_level > 9)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", +- "a value between 0 and 9"); +- return false; +- } +- +- if (params->has_compress_threads && (params->compress_threads < 1)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "compress_threads", +- "a value between 1 and 255"); +- return false; +- } +- +- if (params->has_decompress_threads && (params->decompress_threads < 1)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "decompress_threads", +- "a value between 1 and 255"); +- return false; +- } +- +- if (params->has_throttle_trigger_threshold && +- (params->throttle_trigger_threshold < 1 || +- params->throttle_trigger_threshold > 100)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "throttle_trigger_threshold", +- "an integer in the range of 1 to 100"); +- return false; +- } +- +- if (params->has_cpu_throttle_initial && +- (params->cpu_throttle_initial < 1 || +- params->cpu_throttle_initial > 99)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "cpu_throttle_initial", +- "an integer in the range of 1 to 99"); +- return false; +- } +- +- if (params->has_cpu_throttle_increment && +- (params->cpu_throttle_increment < 1 || +- params->cpu_throttle_increment > 99)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "cpu_throttle_increment", +- "an integer in the range of 1 to 99"); +- return false; +- } +- +- if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "max_bandwidth", +- "an integer in the range of 0 to "stringify(SIZE_MAX) +- " bytes/second"); +- return false; +- } +- +- if (params->has_downtime_limit && +- (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "downtime_limit", +- "an integer in the range of 0 to " +- stringify(MAX_MIGRATE_DOWNTIME)" ms"); +- return false; +- } +- +- /* x_checkpoint_delay is now always positive */ +- +- if (params->has_multifd_channels && (params->multifd_channels < 1)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "multifd_channels", +- "a value between 1 and 255"); +- return false; +- } +- +- if (params->has_multifd_zlib_level && +- (params->multifd_zlib_level > 9)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", +- "a value between 0 and 9"); +- return false; +- } +- +- if (params->has_multifd_zstd_level && +- (params->multifd_zstd_level > 20)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", +- "a value between 0 and 20"); +- return false; +- } +- +- if (params->has_xbzrle_cache_size && +- (params->xbzrle_cache_size < qemu_target_page_size() || +- !is_power_of_2(params->xbzrle_cache_size))) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "xbzrle_cache_size", +- "a power of two no less than the target page size"); +- return false; +- } +- +- if (params->has_max_cpu_throttle && +- (params->max_cpu_throttle < params->cpu_throttle_initial || +- params->max_cpu_throttle > 99)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "max_cpu_throttle", +- "an integer in the range of cpu_throttle_initial to 99"); +- return false; +- } +- +- if (params->has_announce_initial && +- params->announce_initial > 100000) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_initial", +- "a value between 0 and 100000"); +- return false; +- } +- if (params->has_announce_max && +- params->announce_max > 100000) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_max", +- "a value between 0 and 100000"); +- return false; +- } +- if (params->has_announce_rounds && +- params->announce_rounds > 1000) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_rounds", +- "a value between 0 and 1000"); +- return false; +- } +- if (params->has_announce_step && +- (params->announce_step < 1 || +- params->announce_step > 10000)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +- "announce_step", +- "a value between 0 and 10000"); +- return false; +- } +- +- if (params->has_block_bitmap_mapping && +- !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { +- error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); +- return false; +- } +- +-#ifdef CONFIG_LINUX +- if (migrate_zero_copy_send() && +- ((params->has_multifd_compression && params->multifd_compression) || +- (params->tls_creds && *params->tls_creds))) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#endif +- +- return true; +-} +- +-static void migrate_params_test_apply(MigrateSetParameters *params, +- MigrationParameters *dest) +-{ +- *dest = migrate_get_current()->parameters; +- +- /* TODO use QAPI_CLONE() instead of duplicating it inline */ +- +- if (params->has_compress_level) { +- dest->compress_level = params->compress_level; +- } +- +- if (params->has_compress_threads) { +- dest->compress_threads = params->compress_threads; +- } +- +- if (params->has_compress_wait_thread) { +- dest->compress_wait_thread = params->compress_wait_thread; +- } +- +- if (params->has_decompress_threads) { +- dest->decompress_threads = params->decompress_threads; +- } +- +- if (params->has_throttle_trigger_threshold) { +- dest->throttle_trigger_threshold = params->throttle_trigger_threshold; +- } +- +- if (params->has_cpu_throttle_initial) { +- dest->cpu_throttle_initial = params->cpu_throttle_initial; +- } +- +- if (params->has_cpu_throttle_increment) { +- dest->cpu_throttle_increment = params->cpu_throttle_increment; +- } +- +- if (params->has_cpu_throttle_tailslow) { +- dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; +- } +- +- if (params->tls_creds) { +- assert(params->tls_creds->type == QTYPE_QSTRING); +- dest->tls_creds = params->tls_creds->u.s; +- } +- +- if (params->tls_hostname) { +- assert(params->tls_hostname->type == QTYPE_QSTRING); +- dest->tls_hostname = params->tls_hostname->u.s; +- } +- +- if (params->has_max_bandwidth) { +- dest->max_bandwidth = params->max_bandwidth; +- } +- +- if (params->has_downtime_limit) { +- dest->downtime_limit = params->downtime_limit; +- } +- +- if (params->has_x_checkpoint_delay) { +- dest->x_checkpoint_delay = params->x_checkpoint_delay; +- } +- +- if (params->has_block_incremental) { +- dest->block_incremental = params->block_incremental; +- } +- if (params->has_multifd_channels) { +- dest->multifd_channels = params->multifd_channels; +- } +- if (params->has_multifd_compression) { +- dest->multifd_compression = params->multifd_compression; +- } +- if (params->has_xbzrle_cache_size) { +- dest->xbzrle_cache_size = params->xbzrle_cache_size; +- } +- if (params->has_max_postcopy_bandwidth) { +- dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; +- } +- if (params->has_max_cpu_throttle) { +- dest->max_cpu_throttle = params->max_cpu_throttle; +- } +- if (params->has_announce_initial) { +- dest->announce_initial = params->announce_initial; +- } +- if (params->has_announce_max) { +- dest->announce_max = params->announce_max; +- } +- if (params->has_announce_rounds) { +- dest->announce_rounds = params->announce_rounds; +- } +- if (params->has_announce_step) { +- dest->announce_step = params->announce_step; +- } +- +- if (params->has_block_bitmap_mapping) { +- dest->has_block_bitmap_mapping = true; +- dest->block_bitmap_mapping = params->block_bitmap_mapping; +- } +-} +- +-static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +-{ +- MigrationState *s = migrate_get_current(); +- +- /* TODO use QAPI_CLONE() instead of duplicating it inline */ +- +- if (params->has_compress_level) { +- s->parameters.compress_level = params->compress_level; +- } +- +- if (params->has_compress_threads) { +- s->parameters.compress_threads = params->compress_threads; +- } +- +- if (params->has_compress_wait_thread) { +- s->parameters.compress_wait_thread = params->compress_wait_thread; +- } +- +- if (params->has_decompress_threads) { +- s->parameters.decompress_threads = params->decompress_threads; +- } +- +- if (params->has_throttle_trigger_threshold) { +- s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; +- } +- +- if (params->has_cpu_throttle_initial) { +- s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; +- } +- +- if (params->has_cpu_throttle_increment) { +- s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; +- } +- +- if (params->has_cpu_throttle_tailslow) { +- s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; +- } +- +- if (params->tls_creds) { +- g_free(s->parameters.tls_creds); +- assert(params->tls_creds->type == QTYPE_QSTRING); +- s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); +- } +- +- if (params->tls_hostname) { +- g_free(s->parameters.tls_hostname); +- assert(params->tls_hostname->type == QTYPE_QSTRING); +- s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); +- } +- +- if (params->tls_authz) { +- g_free(s->parameters.tls_authz); +- assert(params->tls_authz->type == QTYPE_QSTRING); +- s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); +- } +- +- if (params->has_max_bandwidth) { +- s->parameters.max_bandwidth = params->max_bandwidth; +- if (s->to_dst_file && !migration_in_postcopy()) { +- qemu_file_set_rate_limit(s->to_dst_file, +- s->parameters.max_bandwidth / XFER_LIMIT_RATIO); +- } +- } +- +- if (params->has_downtime_limit) { +- s->parameters.downtime_limit = params->downtime_limit; +- } +- +- if (params->has_x_checkpoint_delay) { +- s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; +- if (migration_in_colo_state()) { +- colo_checkpoint_notify(s); +- } +- } +- +- if (params->has_block_incremental) { +- s->parameters.block_incremental = params->block_incremental; +- } +- if (params->has_multifd_channels) { +- s->parameters.multifd_channels = params->multifd_channels; +- } +- if (params->has_multifd_compression) { +- s->parameters.multifd_compression = params->multifd_compression; +- } +- if (params->has_xbzrle_cache_size) { +- s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; +- xbzrle_cache_resize(params->xbzrle_cache_size, errp); +- } +- if (params->has_max_postcopy_bandwidth) { +- s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; +- if (s->to_dst_file && migration_in_postcopy()) { +- qemu_file_set_rate_limit(s->to_dst_file, +- s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); +- } +- } +- if (params->has_max_cpu_throttle) { +- s->parameters.max_cpu_throttle = params->max_cpu_throttle; +- } +- if (params->has_announce_initial) { +- s->parameters.announce_initial = params->announce_initial; +- } +- if (params->has_announce_max) { +- s->parameters.announce_max = params->announce_max; +- } +- if (params->has_announce_rounds) { +- s->parameters.announce_rounds = params->announce_rounds; +- } +- if (params->has_announce_step) { +- s->parameters.announce_step = params->announce_step; +- } +- +- if (params->has_block_bitmap_mapping) { +- qapi_free_BitmapMigrationNodeAliasList( +- s->parameters.block_bitmap_mapping); +- +- s->parameters.has_block_bitmap_mapping = true; +- s->parameters.block_bitmap_mapping = +- QAPI_CLONE(BitmapMigrationNodeAliasList, +- params->block_bitmap_mapping); +- } +-} +- +-void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) +-{ +- MigrationParameters tmp; +- +- /* TODO Rewrite "" to null instead */ +- if (params->tls_creds +- && params->tls_creds->type == QTYPE_QNULL) { +- qobject_unref(params->tls_creds->u.n); +- params->tls_creds->type = QTYPE_QSTRING; +- params->tls_creds->u.s = strdup(""); +- } +- /* TODO Rewrite "" to null instead */ +- if (params->tls_hostname +- && params->tls_hostname->type == QTYPE_QNULL) { +- qobject_unref(params->tls_hostname->u.n); +- params->tls_hostname->type = QTYPE_QSTRING; +- params->tls_hostname->u.s = strdup(""); +- } +- +- migrate_params_test_apply(params, &tmp); +- +- if (!migrate_params_check(&tmp, errp)) { +- /* Invalid parameter */ +- return; +- } +- +- migrate_params_apply(params, errp); +-} +- +- + void qmp_migrate_start_postcopy(Error **errp) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.c b/migration/options.c +index d4c0714683..4701c75a4d 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -12,17 +12,25 @@ + */ + + #include "qemu/osdep.h" ++#include "exec/target_page.h" + #include "qapi/clone-visitor.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qerror.h" ++#include "qapi/qmp/qnull.h" + #include "sysemu/runstate.h" ++#include "migration/colo.h" + #include "migration/misc.h" + #include "migration.h" ++#include "qemu-file.h" + #include "ram.h" + #include "options.h" + ++/* Maximum migrate downtime set to 2000 seconds */ ++#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 ++#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) ++ + bool migrate_auto_converge(void) + { + MigrationState *s; +@@ -729,3 +737,413 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + + return params; + } ++ ++/* ++ * Check whether the parameters are valid. Error will be put into errp ++ * (if provided). Return true if valid, otherwise false. ++ */ ++bool migrate_params_check(MigrationParameters *params, Error **errp) ++{ ++ if (params->has_compress_level && ++ (params->compress_level > 9)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", ++ "a value between 0 and 9"); ++ return false; ++ } ++ ++ if (params->has_compress_threads && (params->compress_threads < 1)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "compress_threads", ++ "a value between 1 and 255"); ++ return false; ++ } ++ ++ if (params->has_decompress_threads && (params->decompress_threads < 1)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "decompress_threads", ++ "a value between 1 and 255"); ++ return false; ++ } ++ ++ if (params->has_throttle_trigger_threshold && ++ (params->throttle_trigger_threshold < 1 || ++ params->throttle_trigger_threshold > 100)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "throttle_trigger_threshold", ++ "an integer in the range of 1 to 100"); ++ return false; ++ } ++ ++ if (params->has_cpu_throttle_initial && ++ (params->cpu_throttle_initial < 1 || ++ params->cpu_throttle_initial > 99)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "cpu_throttle_initial", ++ "an integer in the range of 1 to 99"); ++ return false; ++ } ++ ++ if (params->has_cpu_throttle_increment && ++ (params->cpu_throttle_increment < 1 || ++ params->cpu_throttle_increment > 99)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "cpu_throttle_increment", ++ "an integer in the range of 1 to 99"); ++ return false; ++ } ++ ++ if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "max_bandwidth", ++ "an integer in the range of 0 to "stringify(SIZE_MAX) ++ " bytes/second"); ++ return false; ++ } ++ ++ if (params->has_downtime_limit && ++ (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "downtime_limit", ++ "an integer in the range of 0 to " ++ stringify(MAX_MIGRATE_DOWNTIME)" ms"); ++ return false; ++ } ++ ++ /* x_checkpoint_delay is now always positive */ ++ ++ if (params->has_multifd_channels && (params->multifd_channels < 1)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "multifd_channels", ++ "a value between 1 and 255"); ++ return false; ++ } ++ ++ if (params->has_multifd_zlib_level && ++ (params->multifd_zlib_level > 9)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", ++ "a value between 0 and 9"); ++ return false; ++ } ++ ++ if (params->has_multifd_zstd_level && ++ (params->multifd_zstd_level > 20)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", ++ "a value between 0 and 20"); ++ return false; ++ } ++ ++ if (params->has_xbzrle_cache_size && ++ (params->xbzrle_cache_size < qemu_target_page_size() || ++ !is_power_of_2(params->xbzrle_cache_size))) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "xbzrle_cache_size", ++ "a power of two no less than the target page size"); ++ return false; ++ } ++ ++ if (params->has_max_cpu_throttle && ++ (params->max_cpu_throttle < params->cpu_throttle_initial || ++ params->max_cpu_throttle > 99)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "max_cpu_throttle", ++ "an integer in the range of cpu_throttle_initial to 99"); ++ return false; ++ } ++ ++ if (params->has_announce_initial && ++ params->announce_initial > 100000) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_initial", ++ "a value between 0 and 100000"); ++ return false; ++ } ++ if (params->has_announce_max && ++ params->announce_max > 100000) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_max", ++ "a value between 0 and 100000"); ++ return false; ++ } ++ if (params->has_announce_rounds && ++ params->announce_rounds > 1000) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_rounds", ++ "a value between 0 and 1000"); ++ return false; ++ } ++ if (params->has_announce_step && ++ (params->announce_step < 1 || ++ params->announce_step > 10000)) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, ++ "announce_step", ++ "a value between 0 and 10000"); ++ return false; ++ } ++ ++ if (params->has_block_bitmap_mapping && ++ !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { ++ error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); ++ return false; ++ } ++ ++#ifdef CONFIG_LINUX ++ if (migrate_zero_copy_send() && ++ ((params->has_multifd_compression && params->multifd_compression) || ++ (params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif ++ ++ return true; ++} ++ ++static void migrate_params_test_apply(MigrateSetParameters *params, ++ MigrationParameters *dest) ++{ ++ *dest = migrate_get_current()->parameters; ++ ++ /* TODO use QAPI_CLONE() instead of duplicating it inline */ ++ ++ if (params->has_compress_level) { ++ dest->compress_level = params->compress_level; ++ } ++ ++ if (params->has_compress_threads) { ++ dest->compress_threads = params->compress_threads; ++ } ++ ++ if (params->has_compress_wait_thread) { ++ dest->compress_wait_thread = params->compress_wait_thread; ++ } ++ ++ if (params->has_decompress_threads) { ++ dest->decompress_threads = params->decompress_threads; ++ } ++ ++ if (params->has_throttle_trigger_threshold) { ++ dest->throttle_trigger_threshold = params->throttle_trigger_threshold; ++ } ++ ++ if (params->has_cpu_throttle_initial) { ++ dest->cpu_throttle_initial = params->cpu_throttle_initial; ++ } ++ ++ if (params->has_cpu_throttle_increment) { ++ dest->cpu_throttle_increment = params->cpu_throttle_increment; ++ } ++ ++ if (params->has_cpu_throttle_tailslow) { ++ dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; ++ } ++ ++ if (params->tls_creds) { ++ assert(params->tls_creds->type == QTYPE_QSTRING); ++ dest->tls_creds = params->tls_creds->u.s; ++ } ++ ++ if (params->tls_hostname) { ++ assert(params->tls_hostname->type == QTYPE_QSTRING); ++ dest->tls_hostname = params->tls_hostname->u.s; ++ } ++ ++ if (params->has_max_bandwidth) { ++ dest->max_bandwidth = params->max_bandwidth; ++ } ++ ++ if (params->has_downtime_limit) { ++ dest->downtime_limit = params->downtime_limit; ++ } ++ ++ if (params->has_x_checkpoint_delay) { ++ dest->x_checkpoint_delay = params->x_checkpoint_delay; ++ } ++ ++ if (params->has_block_incremental) { ++ dest->block_incremental = params->block_incremental; ++ } ++ if (params->has_multifd_channels) { ++ dest->multifd_channels = params->multifd_channels; ++ } ++ if (params->has_multifd_compression) { ++ dest->multifd_compression = params->multifd_compression; ++ } ++ if (params->has_xbzrle_cache_size) { ++ dest->xbzrle_cache_size = params->xbzrle_cache_size; ++ } ++ if (params->has_max_postcopy_bandwidth) { ++ dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; ++ } ++ if (params->has_max_cpu_throttle) { ++ dest->max_cpu_throttle = params->max_cpu_throttle; ++ } ++ if (params->has_announce_initial) { ++ dest->announce_initial = params->announce_initial; ++ } ++ if (params->has_announce_max) { ++ dest->announce_max = params->announce_max; ++ } ++ if (params->has_announce_rounds) { ++ dest->announce_rounds = params->announce_rounds; ++ } ++ if (params->has_announce_step) { ++ dest->announce_step = params->announce_step; ++ } ++ ++ if (params->has_block_bitmap_mapping) { ++ dest->has_block_bitmap_mapping = true; ++ dest->block_bitmap_mapping = params->block_bitmap_mapping; ++ } ++} ++ ++static void migrate_params_apply(MigrateSetParameters *params, Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ /* TODO use QAPI_CLONE() instead of duplicating it inline */ ++ ++ if (params->has_compress_level) { ++ s->parameters.compress_level = params->compress_level; ++ } ++ ++ if (params->has_compress_threads) { ++ s->parameters.compress_threads = params->compress_threads; ++ } ++ ++ if (params->has_compress_wait_thread) { ++ s->parameters.compress_wait_thread = params->compress_wait_thread; ++ } ++ ++ if (params->has_decompress_threads) { ++ s->parameters.decompress_threads = params->decompress_threads; ++ } ++ ++ if (params->has_throttle_trigger_threshold) { ++ s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; ++ } ++ ++ if (params->has_cpu_throttle_initial) { ++ s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; ++ } ++ ++ if (params->has_cpu_throttle_increment) { ++ s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; ++ } ++ ++ if (params->has_cpu_throttle_tailslow) { ++ s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; ++ } ++ ++ if (params->tls_creds) { ++ g_free(s->parameters.tls_creds); ++ assert(params->tls_creds->type == QTYPE_QSTRING); ++ s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); ++ } ++ ++ if (params->tls_hostname) { ++ g_free(s->parameters.tls_hostname); ++ assert(params->tls_hostname->type == QTYPE_QSTRING); ++ s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); ++ } ++ ++ if (params->tls_authz) { ++ g_free(s->parameters.tls_authz); ++ assert(params->tls_authz->type == QTYPE_QSTRING); ++ s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); ++ } ++ ++ if (params->has_max_bandwidth) { ++ s->parameters.max_bandwidth = params->max_bandwidth; ++ if (s->to_dst_file && !migration_in_postcopy()) { ++ qemu_file_set_rate_limit(s->to_dst_file, ++ s->parameters.max_bandwidth / XFER_LIMIT_RATIO); ++ } ++ } ++ ++ if (params->has_downtime_limit) { ++ s->parameters.downtime_limit = params->downtime_limit; ++ } ++ ++ if (params->has_x_checkpoint_delay) { ++ s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; ++ if (migration_in_colo_state()) { ++ colo_checkpoint_notify(s); ++ } ++ } ++ ++ if (params->has_block_incremental) { ++ s->parameters.block_incremental = params->block_incremental; ++ } ++ if (params->has_multifd_channels) { ++ s->parameters.multifd_channels = params->multifd_channels; ++ } ++ if (params->has_multifd_compression) { ++ s->parameters.multifd_compression = params->multifd_compression; ++ } ++ if (params->has_xbzrle_cache_size) { ++ s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; ++ xbzrle_cache_resize(params->xbzrle_cache_size, errp); ++ } ++ if (params->has_max_postcopy_bandwidth) { ++ s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; ++ if (s->to_dst_file && migration_in_postcopy()) { ++ qemu_file_set_rate_limit(s->to_dst_file, ++ s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); ++ } ++ } ++ if (params->has_max_cpu_throttle) { ++ s->parameters.max_cpu_throttle = params->max_cpu_throttle; ++ } ++ if (params->has_announce_initial) { ++ s->parameters.announce_initial = params->announce_initial; ++ } ++ if (params->has_announce_max) { ++ s->parameters.announce_max = params->announce_max; ++ } ++ if (params->has_announce_rounds) { ++ s->parameters.announce_rounds = params->announce_rounds; ++ } ++ if (params->has_announce_step) { ++ s->parameters.announce_step = params->announce_step; ++ } ++ ++ if (params->has_block_bitmap_mapping) { ++ qapi_free_BitmapMigrationNodeAliasList( ++ s->parameters.block_bitmap_mapping); ++ ++ s->parameters.has_block_bitmap_mapping = true; ++ s->parameters.block_bitmap_mapping = ++ QAPI_CLONE(BitmapMigrationNodeAliasList, ++ params->block_bitmap_mapping); ++ } ++} ++ ++void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) ++{ ++ MigrationParameters tmp; ++ ++ /* TODO Rewrite "" to null instead */ ++ if (params->tls_creds ++ && params->tls_creds->type == QTYPE_QNULL) { ++ qobject_unref(params->tls_creds->u.n); ++ params->tls_creds->type = QTYPE_QSTRING; ++ params->tls_creds->u.s = strdup(""); ++ } ++ /* TODO Rewrite "" to null instead */ ++ if (params->tls_hostname ++ && params->tls_hostname->type == QTYPE_QNULL) { ++ qobject_unref(params->tls_hostname->u.n); ++ params->tls_hostname->type = QTYPE_QSTRING; ++ params->tls_hostname->u.s = strdup(""); ++ } ++ ++ migrate_params_test_apply(params, &tmp); ++ ++ if (!migrate_params_check(&tmp, errp)) { ++ /* Invalid parameter */ ++ return; ++ } ++ ++ migrate_params_apply(params, errp); ++} +diff --git a/migration/options.h b/migration/options.h +index 13318a16c7..89067e59a0 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -14,6 +14,13 @@ + #ifndef QEMU_MIGRATION_OPTIONS_H + #define QEMU_MIGRATION_OPTIONS_H + ++/* constants */ ++ ++/* Amount of time to allocate to each "chunk" of bandwidth-throttled ++ * data. */ ++#define BUFFER_DELAY 100 ++#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) ++ + /* capabilities */ + + bool migrate_auto_converge(void); +@@ -74,4 +81,8 @@ int migrate_multifd_zstd_level(void); + uint8_t migrate_throttle_trigger_threshold(void); + uint64_t migrate_xbzrle_cache_size(void); + ++/* parameters helpers */ ++ ++bool migrate_params_check(MigrationParameters *params, Error **errp); ++ + #endif +-- +2.39.1 + diff --git a/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch b/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch new file mode 100644 index 0000000..d2564de --- /dev/null +++ b/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch @@ -0,0 +1,100 @@ +From 00cc3c3598828588619a7b3696819060bddaddb8 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 23:15:59 +0100 +Subject: [PATCH 34/56] migration: Move qmp_query_migrate_capabilities() to + options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [33/50] dbfa8f1e7aa7e000b4622ce2da12d7d418710f19 (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 4d0c6b695bf5252402ebf967f83baebfd2f4b91e) +Signed-off-by: Peter Xu +--- + migration/migration.c | 22 ---------------------- + migration/options.c | 23 +++++++++++++++++++++++ + 2 files changed, 23 insertions(+), 22 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index d9e30ca918..3dc8ee4875 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -886,28 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) + migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); + } + +-MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) +-{ +- MigrationCapabilityStatusList *head = NULL, **tail = &head; +- MigrationCapabilityStatus *caps; +- MigrationState *s = migrate_get_current(); +- int i; +- +- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +-#ifndef CONFIG_LIVE_BLOCK_MIGRATION +- if (i == MIGRATION_CAPABILITY_BLOCK) { +- continue; +- } +-#endif +- caps = g_malloc0(sizeof(*caps)); +- caps->capability = i; +- caps->state = s->capabilities[i]; +- QAPI_LIST_APPEND(tail, caps); +- } +- +- return head; +-} +- + MigrationParameters *qmp_query_migrate_parameters(Error **errp) + { + MigrationParameters *params; +diff --git a/migration/options.c b/migration/options.c +index 367c930f46..ff621bdeb3 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -13,6 +13,7 @@ + + #include "qemu/osdep.h" + #include "qapi/error.h" ++#include "qapi/qapi-commands-migration.h" + #include "sysemu/runstate.h" + #include "migration.h" + #include "ram.h" +@@ -390,3 +391,25 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + + return true; + } ++ ++MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) ++{ ++ MigrationCapabilityStatusList *head = NULL, **tail = &head; ++ MigrationCapabilityStatus *caps; ++ MigrationState *s = migrate_get_current(); ++ int i; ++ ++ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { ++#ifndef CONFIG_LIVE_BLOCK_MIGRATION ++ if (i == MIGRATION_CAPABILITY_BLOCK) { ++ continue; ++ } ++#endif ++ caps = g_malloc0(sizeof(*caps)); ++ caps->capability = i; ++ caps->state = s->capabilities[i]; ++ QAPI_LIST_APPEND(tail, caps); ++ } ++ ++ return head; ++} +-- +2.39.1 + diff --git a/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch b/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch new file mode 100644 index 0000000..7339ce0 --- /dev/null +++ b/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch @@ -0,0 +1,226 @@ +From 4782b59a8b0b5762f87505ac7a83b37ddd2e0b3f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 20:28:56 +0100 +Subject: [PATCH 19/56] migration: Pass migrate_caps_check() the old and new + caps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [18/50] df78d680d03f15d7cb7401ad89e68a4fc93fa835 (peterx/qemu-kvm) + +We used to pass the old capabilities array and the new +capabilities as a list. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit b02c7fc9ef447787414e6fa67eff75e7b7b30180) +Signed-off-by: Peter Xu +--- + migration/migration.c | 80 +++++++++++++++++-------------------------- + 1 file changed, 31 insertions(+), 49 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index d8e5fb6226..e8f596bcfa 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1299,30 +1299,20 @@ WriteTrackingSupport migrate_query_write_tracking(void) + } + + /** +- * @migration_caps_check - check capability validity ++ * @migration_caps_check - check capability compatibility + * +- * @cap_list: old capability list, array of bool +- * @params: new capabilities to be applied soon ++ * @old_caps: old capability list ++ * @new_caps: new capability list + * @errp: set *errp if the check failed, with reason + * + * Returns true if check passed, otherwise false. + */ +-static bool migrate_caps_check(bool *cap_list, +- MigrationCapabilityStatusList *params, +- Error **errp) ++static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + { +- MigrationCapabilityStatusList *cap; +- bool old_postcopy_cap; + MigrationIncomingState *mis = migration_incoming_get_current(); + +- old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; +- +- for (cap = params; cap; cap = cap->next) { +- cap_list[cap->value->capability] = cap->value->state; +- } +- + #ifndef CONFIG_LIVE_BLOCK_MIGRATION +- if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { ++ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { + error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " + "block migration"); + error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); +@@ -1331,7 +1321,7 @@ static bool migrate_caps_check(bool *cap_list, + #endif + + #ifndef CONFIG_REPLICATION +- if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { ++ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { + error_setg(errp, "QEMU compiled without replication module" + " can't enable COLO"); + error_append_hint(errp, "Please enable replication before COLO.\n"); +@@ -1339,12 +1329,13 @@ static bool migrate_caps_check(bool *cap_list, + } + #endif + +- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { + /* This check is reasonably expensive, so only when it's being + * set the first time, also it's only the destination that needs + * special support. + */ +- if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && ++ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && ++ runstate_check(RUN_STATE_INMIGRATE) && + !postcopy_ram_supported_by_host(mis)) { + /* postcopy_ram_supported_by_host will have emitted a more + * detailed message +@@ -1353,13 +1344,13 @@ static bool migrate_caps_check(bool *cap_list, + return false; + } + +- if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { ++ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { + error_setg(errp, "Postcopy is not compatible with ignore-shared"); + return false; + } + } + +- if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { ++ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { + WriteTrackingSupport wt_support; + int idx; + /* +@@ -1383,7 +1374,7 @@ static bool migrate_caps_check(bool *cap_list, + */ + for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { + int incomp_cap = check_caps_background_snapshot.caps[idx]; +- if (cap_list[incomp_cap]) { ++ if (new_caps[incomp_cap]) { + error_setg(errp, + "Background-snapshot is not compatible with %s", + MigrationCapability_str(incomp_cap)); +@@ -1393,10 +1384,10 @@ static bool migrate_caps_check(bool *cap_list, + } + + #ifdef CONFIG_LINUX +- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && +- (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || +- cap_list[MIGRATION_CAPABILITY_COMPRESS] || +- cap_list[MIGRATION_CAPABILITY_XBZRLE] || ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || ++ new_caps[MIGRATION_CAPABILITY_COMPRESS] || ++ new_caps[MIGRATION_CAPABILITY_XBZRLE] || + migrate_multifd_compression() || + migrate_use_tls())) { + error_setg(errp, +@@ -1404,15 +1395,15 @@ static bool migrate_caps_check(bool *cap_list, + return false; + } + #else +- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { + error_setg(errp, + "Zero copy currently only available on Linux"); + return false; + } + #endif + +- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { +- if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { ++ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { ++ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { + error_setg(errp, "Postcopy preempt requires postcopy-ram"); + return false; + } +@@ -1423,14 +1414,14 @@ static bool migrate_caps_check(bool *cap_list, + * different compression channels, which is not compatible with the + * preempt assumptions on channel assignments. + */ +- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { + error_setg(errp, "Postcopy preempt not compatible with compress"); + return false; + } + } + +- if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) { +- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { ++ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { ++ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { + error_setg(errp, "Multifd is not compatible with compress"); + return false; + } +@@ -1486,15 +1477,19 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + { + MigrationState *s = migrate_get_current(); + MigrationCapabilityStatusList *cap; +- bool cap_list[MIGRATION_CAPABILITY__MAX]; ++ bool new_caps[MIGRATION_CAPABILITY__MAX]; + + if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return; + } + +- memcpy(cap_list, s->capabilities, sizeof(cap_list)); +- if (!migrate_caps_check(cap_list, params, errp)) { ++ memcpy(new_caps, s->capabilities, sizeof(new_caps)); ++ for (cap = params; cap; cap = cap->next) { ++ new_caps[cap->value->capability] = cap->value->state; ++ } ++ ++ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { + return; + } + +@@ -4634,27 +4629,14 @@ static void migration_instance_init(Object *obj) + */ + static bool migration_object_check(MigrationState *ms, Error **errp) + { +- MigrationCapabilityStatusList *head = NULL; + /* Assuming all off */ +- bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; +- int i; ++ bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; + + if (!migrate_params_check(&ms->parameters, errp)) { + return false; + } + +- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (ms->capabilities[i]) { +- QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); +- } +- } +- +- ret = migrate_caps_check(cap_list, head, errp); +- +- /* It works with head == NULL */ +- qapi_free_MigrationCapabilityStatusList(head); +- +- return ret; ++ return migrate_caps_check(old_caps, ms->capabilities, errp); + } + + static const TypeInfo migration_type = { +-- +2.39.1 + diff --git a/kvm-migration-Rename-duplicate-to-zero_pages.patch b/kvm-migration-Rename-duplicate-to-zero_pages.patch new file mode 100644 index 0000000..22acab5 --- /dev/null +++ b/kvm-migration-Rename-duplicate-to-zero_pages.patch @@ -0,0 +1,109 @@ +From 3cecf66655a0dd599666bcac8add2dee85d5651f Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 19 Apr 2023 18:16:05 +0200 +Subject: [PATCH 16/56] migration: Rename duplicate to zero_pages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [15/50] 89db3c8b167c0f411ba95ce2730540c0e8f1206b (peterx/qemu-kvm) + +Rest of counters that refer to pages has a _pages suffix. +And historically, this showed the number of pages composed of the same +character, here comes the name "duplicated". But since years ago, it +refers to the number of zero_pages. + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 1a386e8de5995fb5478ea99baa6d3e71abcf4b80) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 10 +++++----- + migration/ram.h | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 39501a0ed8..c15e2a61ca 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1142,7 +1142,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = stat64_get(&ram_counters.transferred); + info->ram->total = ram_bytes_total(); +- info->ram->duplicate = stat64_get(&ram_counters.duplicate); ++ info->ram->duplicate = stat64_get(&ram_counters.zero_pages); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; + info->ram->normal = stat64_get(&ram_counters.normal); +diff --git a/migration/ram.c b/migration/ram.c +index fe69ecaef4..19d345a030 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1119,7 +1119,7 @@ uint64_t ram_pagesize_summary(void) + uint64_t ram_get_total_transferred_pages(void) + { + return stat64_get(&ram_counters.normal) + +- stat64_get(&ram_counters.duplicate) + ++ stat64_get(&ram_counters.zero_pages) + + compression_counters.pages + xbzrle_counters.pages; + } + +@@ -1320,7 +1320,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, + int len = save_zero_page_to_file(pss, f, block, offset); + + if (len) { +- stat64_add(&ram_counters.duplicate, 1); ++ stat64_add(&ram_counters.zero_pages, 1); + ram_transferred_add(len); + return 1; + } +@@ -1359,7 +1359,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + if (bytes_xmit > 0) { + stat64_add(&ram_counters.normal, 1); + } else if (bytes_xmit == 0) { +- stat64_add(&ram_counters.duplicate, 1); ++ stat64_add(&ram_counters.zero_pages, 1); + } + + return true; +@@ -1486,7 +1486,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) + ram_transferred_add(bytes_xmit); + + if (param->zero_page) { +- stat64_add(&ram_counters.duplicate, 1); ++ stat64_add(&ram_counters.zero_pages, 1); + return; + } + +@@ -2621,7 +2621,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + uint64_t pages = size / TARGET_PAGE_SIZE; + + if (zero) { +- stat64_add(&ram_counters.duplicate, pages); ++ stat64_add(&ram_counters.zero_pages, pages); + } else { + stat64_add(&ram_counters.normal, pages); + ram_transferred_add(size); +diff --git a/migration/ram.h b/migration/ram.h +index afa68521d7..55258334fe 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -45,7 +45,7 @@ typedef struct { + Stat64 dirty_sync_count; + Stat64 dirty_sync_missed_zero_copy; + Stat64 downtime_bytes; +- Stat64 duplicate; ++ Stat64 zero_pages; + Stat64 multifd_bytes; + Stat64 normal; + Stat64 postcopy_bytes; +-- +2.39.1 + diff --git a/kvm-migration-Rename-normal-to-normal_pages.patch b/kvm-migration-Rename-normal-to-normal_pages.patch new file mode 100644 index 0000000..8ad6447 --- /dev/null +++ b/kvm-migration-Rename-normal-to-normal_pages.patch @@ -0,0 +1,109 @@ +From 7e27e7ea83856e1a7222ff46d91495f48fb6be4d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 19 Apr 2023 18:19:45 +0200 +Subject: [PATCH 17/56] migration: Rename normal to normal_pages +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [16/50] 7df8b946918def9657bbe357861a6d72b5399ac6 (peterx/qemu-kvm) + +Rest of counters that refer to pages has a _pages suffix. +And historically, this showed the number of full pages transferred. +The name "normal" refered to the fact that they were sent without any +optimization (compression, xbzrle, zero_page, ...). + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +(cherry picked from commit 8c0cda8fa0de0a50148e2c60552afca9cffca643) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + migration/ram.c | 10 +++++----- + migration/ram.h | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c15e2a61ca..f1b3439e5f 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1145,7 +1145,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->duplicate = stat64_get(&ram_counters.zero_pages); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; +- info->ram->normal = stat64_get(&ram_counters.normal); ++ info->ram->normal = stat64_get(&ram_counters.normal_pages); + info->ram->normal_bytes = info->ram->normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = +diff --git a/migration/ram.c b/migration/ram.c +index 19d345a030..229714045a 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1118,7 +1118,7 @@ uint64_t ram_pagesize_summary(void) + + uint64_t ram_get_total_transferred_pages(void) + { +- return stat64_get(&ram_counters.normal) + ++ return stat64_get(&ram_counters.normal_pages) + + stat64_get(&ram_counters.zero_pages) + + compression_counters.pages + xbzrle_counters.pages; + } +@@ -1357,7 +1357,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + } + + if (bytes_xmit > 0) { +- stat64_add(&ram_counters.normal, 1); ++ stat64_add(&ram_counters.normal_pages, 1); + } else if (bytes_xmit == 0) { + stat64_add(&ram_counters.zero_pages, 1); + } +@@ -1391,7 +1391,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, + qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); + } + ram_transferred_add(TARGET_PAGE_SIZE); +- stat64_add(&ram_counters.normal, 1); ++ stat64_add(&ram_counters.normal_pages, 1); + return 1; + } + +@@ -1447,7 +1447,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, + if (multifd_queue_page(file, block, offset) < 0) { + return -1; + } +- stat64_add(&ram_counters.normal, 1); ++ stat64_add(&ram_counters.normal_pages, 1); + + return 1; + } +@@ -2623,7 +2623,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + if (zero) { + stat64_add(&ram_counters.zero_pages, pages); + } else { +- stat64_add(&ram_counters.normal, pages); ++ stat64_add(&ram_counters.normal_pages, pages); + ram_transferred_add(size); + qemu_file_credit_transfer(f, size); + } +diff --git a/migration/ram.h b/migration/ram.h +index 55258334fe..a6e0d70226 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -47,7 +47,7 @@ typedef struct { + Stat64 downtime_bytes; + Stat64 zero_pages; + Stat64 multifd_bytes; +- Stat64 normal; ++ Stat64 normal_pages; + Stat64 postcopy_bytes; + Stat64 postcopy_requests; + Stat64 precopy_bytes; +-- +2.39.1 + diff --git a/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch b/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch new file mode 100644 index 0000000..7e78d82 --- /dev/null +++ b/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch @@ -0,0 +1,52 @@ +From c0d377e1bf442a09b82fddbb8588fcddf6439854 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 24 Nov 2022 17:26:19 +0100 +Subject: [PATCH 09/56] migration: Update atomic stats out of the mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [8/50] 88e9dbc9a3e5aef60a7c98c871144904c7062b1f (peterx/qemu-kvm) + +Reviewed-by: David Edmondson +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit 30fb22cda45bea43a3c0e26049ebdd71a9503ffd) +Signed-off-by: Peter Xu +--- + migration/multifd.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 01fab01a92..6ef3a27938 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -433,8 +433,8 @@ static int multifd_send_pages(QEMUFile *f) + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; + qemu_file_acct_rate_limit(f, transferred); + ram_counters.multifd_bytes += transferred; +- stat64_add(&ram_counters.transferred, transferred); + qemu_mutex_unlock(&p->mutex); ++ stat64_add(&ram_counters.transferred, transferred); + qemu_sem_post(&p->sem); + + return 1; +@@ -628,8 +628,8 @@ int multifd_send_sync_main(QEMUFile *f) + p->pending_job++; + qemu_file_acct_rate_limit(f, p->packet_len); + ram_counters.multifd_bytes += p->packet_len; +- stat64_add(&ram_counters.transferred, p->packet_len); + qemu_mutex_unlock(&p->mutex); ++ stat64_add(&ram_counters.transferred, p->packet_len); + qemu_sem_post(&p->sem); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +-- +2.39.1 + diff --git a/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch b/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch new file mode 100644 index 0000000..f179761 --- /dev/null +++ b/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch @@ -0,0 +1,40 @@ +From 8d203baa6cbd1f371e308c2c9d59a5ca7d29dca8 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Thu, 2 Mar 2023 10:55:30 +0100 +Subject: [PATCH 38/56] migration: Use migrate_max_postcopy_bandwidth() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [37/50] d62948e9ee40a85ed9b460a583c3b0e43cd5d47f (peterx/qemu-kvm) + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 5390adec03a7d8bc6bcf5887f726b0ddaeb90681) +Signed-off-by: Peter Xu +--- + migration/migration.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 7f2e770deb..78bca9a93f 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3799,7 +3799,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + + if (resume) { + /* This is a resumed migration */ +- rate_limit = s->parameters.max_postcopy_bandwidth / ++ rate_limit = migrate_max_postcopy_bandwidth() / + XFER_LIMIT_RATIO; + } else { + /* This is a fresh new migration */ +-- +2.39.1 + diff --git a/kvm-migration-mark-mixed-functions-that-can-suspend.patch b/kvm-migration-mark-mixed-functions-that-can-suspend.patch new file mode 100644 index 0000000..9451696 --- /dev/null +++ b/kvm-migration-mark-mixed-functions-that-can-suspend.patch @@ -0,0 +1,153 @@ +From cfdf5715a2334ad06b5966ec986d134bbd5ba08b Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 16 Dec 2022 12:48:16 +0100 +Subject: [PATCH 05/56] migration: mark mixed functions that can suspend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [4/50] 9f055b526edd06a3440999d5de91e5d624678c7d (peterx/qemu-kvm) + +There should be no paths from a coroutine_fn to aio_poll, however in +practice coroutine_mixed_fn will call aio_poll in the !qemu_in_coroutine() +path. By marking mixed functions, we can track accurately the call paths +that execute entirely in coroutine context, and find more missing +coroutine_fn markers. This results in more accurate checks that +coroutine code does not end up blocking. + +If the marking were extended transitively to all functions that call +these ones, static analysis could be done much more efficiently. +However, this is a start and makes it possible to use vrc's path-based +searches to find potential bugs where coroutine_fns call blocking functions. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 394b9407e4c515f96df6647d629ee28cbb86f07c) +Signed-off-by: Peter Xu +--- + include/migration/qemu-file-types.h | 4 ++-- + migration/qemu-file.c | 14 +++++++------- + migration/qemu-file.h | 6 +++--- + 3 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h +index 2867e3da84..1436f9ce92 100644 +--- a/include/migration/qemu-file-types.h ++++ b/include/migration/qemu-file-types.h +@@ -35,7 +35,7 @@ void qemu_put_byte(QEMUFile *f, int v); + void qemu_put_be16(QEMUFile *f, unsigned int v); + void qemu_put_be32(QEMUFile *f, unsigned int v); + void qemu_put_be64(QEMUFile *f, uint64_t v); +-size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); ++size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); + + int qemu_get_byte(QEMUFile *f); + +@@ -161,7 +161,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) + qemu_get_be64s(f, (uint64_t *)pv); + } + +-size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); ++size_t coroutine_mixed_fn qemu_get_counted_string(QEMUFile *f, char buf[256]); + + void qemu_put_counted_string(QEMUFile *f, const char *name); + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 102ab3b439..ee04240a21 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -392,7 +392,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, + * case if the underlying file descriptor gives a short read, and that can + * happen even on a blocking fd. + */ +-static ssize_t qemu_fill_buffer(QEMUFile *f) ++static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) + { + int len; + int pending; +@@ -585,7 +585,7 @@ void qemu_file_skip(QEMUFile *f, int size) + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + */ +-size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) ++size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) + { + ssize_t pending; + size_t index; +@@ -633,7 +633,7 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + */ +-size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) ++size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) + { + size_t pending = size; + size_t done = 0; +@@ -674,7 +674,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) + * Note: Since **buf may get changed, the caller should take care to + * keep a pointer to the original buffer if it needs to deallocate it. + */ +-size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) ++size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) + { + if (size < IO_BUF_SIZE) { + size_t res; +@@ -696,7 +696,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) + * Peeks a single byte from the buffer; this isn't guaranteed to work if + * offset leaves a gap after the previous read/peeked data. + */ +-int qemu_peek_byte(QEMUFile *f, int offset) ++int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset) + { + int index = f->buf_index + offset; + +@@ -713,7 +713,7 @@ int qemu_peek_byte(QEMUFile *f, int offset) + return f->buf[index]; + } + +-int qemu_get_byte(QEMUFile *f) ++int coroutine_mixed_fn qemu_get_byte(QEMUFile *f) + { + int result; + +@@ -894,7 +894,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) + * else 0 + * (Note a 0 length string will return 0 either way) + */ +-size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) ++size_t coroutine_fn qemu_get_counted_string(QEMUFile *f, char buf[256]) + { + size_t len = qemu_get_byte(f); + size_t res = qemu_get_buffer(f, (uint8_t *)buf, len); +diff --git a/migration/qemu-file.h b/migration/qemu-file.h +index 9d0155a2a1..d16cd50448 100644 +--- a/migration/qemu-file.h ++++ b/migration/qemu-file.h +@@ -108,8 +108,8 @@ bool qemu_file_is_writable(QEMUFile *f); + + #include "migration/qemu-file-types.h" + +-size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); +-size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); ++size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); ++size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); + ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, + const uint8_t *p, size_t size); + int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); +@@ -119,7 +119,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); + * is; you aren't guaranteed to be able to peak to +n bytes unless you've + * previously peeked +n-1. + */ +-int qemu_peek_byte(QEMUFile *f, int offset); ++int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset); + void qemu_file_skip(QEMUFile *f, int size); + /* + * qemu_file_credit_transfer: +-- +2.39.1 + diff --git a/kvm-migration-move-migration_global_dump-to-migration-hm.patch b/kvm-migration-move-migration_global_dump-to-migration-hm.patch new file mode 100644 index 0000000..4e73c80 --- /dev/null +++ b/kvm-migration-move-migration_global_dump-to-migration-hm.patch @@ -0,0 +1,121 @@ +From 96e6914cbfb18bb8287c57b9ac9a6b364d3e7a22 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 22 Feb 2023 17:18:05 +0100 +Subject: [PATCH 20/56] migration: move migration_global_dump() to + migration-hmp-cmds.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [19/50] c8d330a2833c706b9bd78f7154be882e3977ad06 (peterx/qemu-kvm) + +It is only used there, so we can make it static. +Once there, remove spice.h that it is not used. + +Signed-off-by: Juan Quintela +Reviewed-by: Peter Xu +Reviewed-by: Philippe Mathieu-Daudé + +--- + +fix David Edmonson ui/qemu-spice.h unintended removal + +(cherry picked from commit c938157713e723165a42cb6e8364adb6fcbd0e22) +Signed-off-by: Peter Xu +--- + include/migration/misc.h | 1 - + migration/migration-hmp-cmds.c | 22 +++++++++++++++++++++- + migration/migration.c | 19 ------------------- + 3 files changed, 21 insertions(+), 21 deletions(-) + +diff --git a/include/migration/misc.h b/include/migration/misc.h +index 8b49841016..5ebe13b4b9 100644 +--- a/include/migration/misc.h ++++ b/include/migration/misc.h +@@ -66,7 +66,6 @@ bool migration_has_finished(MigrationState *); + bool migration_has_failed(MigrationState *); + /* ...and after the device transmission */ + bool migration_in_postcopy_after_devices(MigrationState *); +-void migration_global_dump(Monitor *mon); + /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */ + bool migration_in_incoming_postcopy(void); + /* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */ +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 72519ea99f..71da91967a 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -15,7 +15,6 @@ + + #include "qemu/osdep.h" + #include "block/qapi.h" +-#include "migration/misc.h" + #include "migration/snapshot.h" + #include "monitor/hmp.h" + #include "monitor/monitor.h" +@@ -30,6 +29,27 @@ + #include "qemu/sockets.h" + #include "sysemu/runstate.h" + #include "ui/qemu-spice.h" ++#include "sysemu/sysemu.h" ++#include "migration.h" ++ ++static void migration_global_dump(Monitor *mon) ++{ ++ MigrationState *ms = migrate_get_current(); ++ ++ monitor_printf(mon, "globals:\n"); ++ monitor_printf(mon, "store-global-state: %s\n", ++ ms->store_global_state ? "on" : "off"); ++ monitor_printf(mon, "only-migratable: %s\n", ++ only_migratable ? "on" : "off"); ++ monitor_printf(mon, "send-configuration: %s\n", ++ ms->send_configuration ? "on" : "off"); ++ monitor_printf(mon, "send-section-footer: %s\n", ++ ms->send_section_footer ? "on" : "off"); ++ monitor_printf(mon, "decompress-error-check: %s\n", ++ ms->decompress_error_check ? "on" : "off"); ++ monitor_printf(mon, "clear-bitmap-shift: %u\n", ++ ms->clear_bitmap_shift); ++} + + void hmp_info_migrate(Monitor *mon, const QDict *qdict) + { +diff --git a/migration/migration.c b/migration/migration.c +index e8f596bcfa..aa96ffdc5b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -4420,25 +4420,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + s->migration_thread_running = true; + } + +-void migration_global_dump(Monitor *mon) +-{ +- MigrationState *ms = migrate_get_current(); +- +- monitor_printf(mon, "globals:\n"); +- monitor_printf(mon, "store-global-state: %s\n", +- ms->store_global_state ? "on" : "off"); +- monitor_printf(mon, "only-migratable: %s\n", +- only_migratable ? "on" : "off"); +- monitor_printf(mon, "send-configuration: %s\n", +- ms->send_configuration ? "on" : "off"); +- monitor_printf(mon, "send-section-footer: %s\n", +- ms->send_section_footer ? "on" : "off"); +- monitor_printf(mon, "decompress-error-check: %s\n", +- ms->decompress_error_check ? "on" : "off"); +- monitor_printf(mon, "clear-bitmap-shift: %u\n", +- ms->clear_bitmap_shift); +-} +- + #define DEFINE_PROP_MIG_CAP(name, x) \ + DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) + +-- +2.39.1 + diff --git a/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch b/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch new file mode 100644 index 0000000..7700466 --- /dev/null +++ b/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch @@ -0,0 +1,117 @@ +From 4827d5be5357ab89e0c46f606ad828bf97d36471 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 19 Apr 2023 12:17:38 -0400 +Subject: [PATCH 04/56] migration/postcopy: Detect file system on dest host +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [3/50] 121aeeda8a019f79dba6c077c7018bd1c86f3d71 (peterx/qemu-kvm) + +Postcopy requires the memory support userfaultfd to work. Right now we +check it but it's a bit too late (when switching to postcopy migration). + +Do that early right at enabling of postcopy. + +Note that this is still only a best effort because ramblocks can be +dynamically created. We can add check in hostmem creations and fail if +postcopy enabled, but maybe that's too aggressive. + +Still, we have chance to fail the most obvious where we know there's an +existing unsupported ramblock. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit ae30b9b2892b85e6c3d5c0b8d1949c4d77a2954a) +Signed-off-by: Peter Xu +--- + migration/postcopy-ram.c | 34 ++++++++++++++++++++++++++++++---- + 1 file changed, 30 insertions(+), 4 deletions(-) + +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index 93f39f8e06..bbb8af61ae 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -36,6 +36,7 @@ + #include "yank_functions.h" + #include "tls.h" + #include "qemu/userfaultfd.h" ++#include "qemu/mmap-alloc.h" + + /* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes +@@ -336,11 +337,12 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) + + /* Callback from postcopy_ram_supported_by_host block iterator. + */ +-static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) ++static int test_ramblock_postcopiable(RAMBlock *rb) + { + const char *block_name = qemu_ram_get_idstr(rb); + ram_addr_t length = qemu_ram_get_used_length(rb); + size_t pagesize = qemu_ram_pagesize(rb); ++ QemuFsType fs; + + if (length % pagesize) { + error_report("Postcopy requires RAM blocks to be a page size multiple," +@@ -348,6 +350,15 @@ static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) + "page size of 0x%zx", block_name, length, pagesize); + return 1; + } ++ ++ if (rb->fd >= 0) { ++ fs = qemu_fd_getfs(rb->fd); ++ if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { ++ error_report("Host backend files need to be TMPFS or HUGETLBFS only"); ++ return 1; ++ } ++ } ++ + return 0; + } + +@@ -366,6 +377,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + struct uffdio_range range_struct; + uint64_t feature_mask; + Error *local_err = NULL; ++ RAMBlock *block; + + if (qemu_target_page_size() > pagesize) { + error_report("Target page size bigger than host page size"); +@@ -390,9 +402,23 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + goto out; + } + +- /* We don't support postcopy with shared RAM yet */ +- if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) { +- goto out; ++ /* ++ * We don't support postcopy with some type of ramblocks. ++ * ++ * NOTE: we explicitly ignored ramblock_is_ignored() instead we checked ++ * all possible ramblocks. This is because this function can be called ++ * when creating the migration object, during the phase RAM_MIGRATABLE ++ * is not even properly set for all the ramblocks. ++ * ++ * A side effect of this is we'll also check against RAM_SHARED ++ * ramblocks even if migrate_ignore_shared() is set (in which case ++ * we'll never migrate RAM_SHARED at all), but normally this shouldn't ++ * affect in reality, or we can revisit. ++ */ ++ RAMBLOCK_FOREACH(block) { ++ if (test_ramblock_postcopiable(block)) { ++ goto out; ++ } + } + + /* +-- +2.39.1 + diff --git a/kvm-migration-remove-extra-whitespace-character-for-code.patch b/kvm-migration-remove-extra-whitespace-character-for-code.patch new file mode 100644 index 0000000..88eb791 --- /dev/null +++ b/kvm-migration-remove-extra-whitespace-character-for-code.patch @@ -0,0 +1,44 @@ +From 93c9a1ae812720d3a29980a3c5fcfc1e916993de Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E6=9D=8E=E7=9A=86=E4=BF=8A?= +Date: Fri, 17 Mar 2023 09:57:13 +0000 +Subject: [PATCH 07/56] migration: remove extra whitespace character for code + style +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [6/50] bc1cd812f8dfc18e47e1644b5333c703eae23d2d (peterx/qemu-kvm) + +Fix code style. + +Signed-off-by: 李皆俊 +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 8ebb6ecc3798e66a9ba98355983762bedfa1b72d) +Signed-off-by: Peter Xu +--- + migration/ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 79d881f735..0e68099bf9 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3293,7 +3293,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + + migration_ops = g_malloc0(sizeof(MigrationOps)); + migration_ops->ram_save_target_page = ram_save_target_page_legacy; +- ret = multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(f); + if (ret < 0) { + return ret; + } +-- +2.39.1 + diff --git a/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch b/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch new file mode 100644 index 0000000..52b19b3 --- /dev/null +++ b/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch @@ -0,0 +1,329 @@ +From ee566ec12099992f9134bda1db92dd568427245a Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 18:26:59 +0100 +Subject: [PATCH 18/56] migration: rename enabled_capabilities to capabilities +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [17/50] 841a27addf273d8f559bc8ebd2c854200e8ca673 (peterx/qemu-kvm) + +It is clear from the context what that means, and such a long name +with the extra long names of the capabilities make very difficilut to +stay inside the 80 columns limit. + +Signed-off-by: Juan Quintela +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 0cec2056ff67557c18d7b8ab1b70ab47c9e31f2f) +Signed-off-by: Peter Xu +--- + migration/migration.c | 52 +++++++++++++++++++++---------------------- + migration/migration.h | 2 +- + migration/rdma.c | 4 ++-- + migration/savevm.c | 6 ++--- + 4 files changed, 31 insertions(+), 33 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f1b3439e5f..d8e5fb6226 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -364,8 +364,7 @@ static bool migrate_late_block_activate(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[ +- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; ++ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; + } + + /* +@@ -944,7 +943,7 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) + #endif + caps = g_malloc0(sizeof(*caps)); + caps->capability = i; +- caps->state = s->enabled_capabilities[i]; ++ caps->state = s->capabilities[i]; + QAPI_LIST_APPEND(tail, caps); + } + +@@ -1494,13 +1493,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + return; + } + +- memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); ++ memcpy(cap_list, s->capabilities, sizeof(cap_list)); + if (!migrate_caps_check(cap_list, params, errp)) { + return; + } + + for (cap = params; cap; cap = cap->next) { +- s->enabled_capabilities[cap->value->capability] = cap->value->state; ++ s->capabilities[cap->value->capability] = cap->value->state; + } + } + +@@ -2569,7 +2568,7 @@ bool migrate_release_ram(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; ++ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + + bool migrate_postcopy_ram(void) +@@ -2578,7 +2577,7 @@ bool migrate_postcopy_ram(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; + } + + bool migrate_postcopy(void) +@@ -2592,7 +2591,7 @@ bool migrate_auto_converge(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; ++ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; + } + + bool migrate_zero_blocks(void) +@@ -2601,7 +2600,7 @@ bool migrate_zero_blocks(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; + } + + bool migrate_postcopy_blocktime(void) +@@ -2610,7 +2609,7 @@ bool migrate_postcopy_blocktime(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; + } + + bool migrate_use_compression(void) +@@ -2619,7 +2618,7 @@ bool migrate_use_compression(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; ++ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; + } + + int migrate_compress_level(void) +@@ -2664,7 +2663,7 @@ bool migrate_dirty_bitmaps(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; ++ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; + } + + bool migrate_ignore_shared(void) +@@ -2673,7 +2672,7 @@ bool migrate_ignore_shared(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; ++ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; + } + + bool migrate_validate_uuid(void) +@@ -2682,7 +2681,7 @@ bool migrate_validate_uuid(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; ++ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; + } + + bool migrate_use_events(void) +@@ -2691,7 +2690,7 @@ bool migrate_use_events(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; ++ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; + } + + bool migrate_use_multifd(void) +@@ -2700,7 +2699,7 @@ bool migrate_use_multifd(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; ++ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; + } + + bool migrate_pause_before_switchover(void) +@@ -2709,8 +2708,7 @@ bool migrate_pause_before_switchover(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[ +- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; ++ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; + } + + int migrate_multifd_channels(void) +@@ -2757,7 +2755,7 @@ bool migrate_use_zero_copy_send(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; ++ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } + #endif + +@@ -2776,7 +2774,7 @@ int migrate_use_xbzrle(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; ++ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; + } + + uint64_t migrate_xbzrle_cache_size(void) +@@ -2803,7 +2801,7 @@ bool migrate_use_block(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; ++ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; + } + + bool migrate_use_return_path(void) +@@ -2812,7 +2810,7 @@ bool migrate_use_return_path(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; ++ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; + } + + bool migrate_use_block_incremental(void) +@@ -2830,7 +2828,7 @@ bool migrate_background_snapshot(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; ++ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + + bool migrate_postcopy_preempt(void) +@@ -2839,7 +2837,7 @@ bool migrate_postcopy_preempt(void) + + s = migrate_get_current(); + +- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; ++ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; + } + + /* migration thread support */ +@@ -3584,7 +3582,7 @@ fail: + bool migrate_colo_enabled(void) + { + MigrationState *s = migrate_get_current(); +- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; ++ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; + } + + typedef enum MigThrError { +@@ -4447,7 +4445,7 @@ void migration_global_dump(Monitor *mon) + } + + #define DEFINE_PROP_MIG_CAP(name, x) \ +- DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) ++ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) + + static Property migration_properties[] = { + DEFINE_PROP_BOOL("store-global-state", MigrationState, +@@ -4646,7 +4644,7 @@ static bool migration_object_check(MigrationState *ms, Error **errp) + } + + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (ms->enabled_capabilities[i]) { ++ if (ms->capabilities[i]) { + QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); + } + } +diff --git a/migration/migration.h b/migration/migration.h +index 310ae8901b..04e0860b4e 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -310,7 +310,7 @@ struct MigrationState { + int64_t downtime_start; + int64_t downtime; + int64_t expected_downtime; +- bool enabled_capabilities[MIGRATION_CAPABILITY__MAX]; ++ bool capabilities[MIGRATION_CAPABILITY__MAX]; + int64_t setup_time; + /* + * Whether guest was running when we enter the completion stage. +diff --git a/migration/rdma.c b/migration/rdma.c +index df646be35e..f35f021963 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -4179,7 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + ret = qemu_rdma_source_init(rdma, +- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); + + if (ret) { + goto err; +@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, + } + + ret = qemu_rdma_source_init(rdma_return_path, +- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); ++ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); + + if (ret) { + goto return_path_err; +diff --git a/migration/savevm.c b/migration/savevm.c +index aa54a67fda..589ef926ab 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -253,7 +253,7 @@ static uint32_t get_validatable_capabilities_count(void) + uint32_t result = 0; + int i; + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (should_validate_capability(i) && s->enabled_capabilities[i]) { ++ if (should_validate_capability(i) && s->capabilities[i]) { + result++; + } + } +@@ -275,7 +275,7 @@ static int configuration_pre_save(void *opaque) + state->capabilities = g_renew(MigrationCapability, state->capabilities, + state->caps_count); + for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) { +- if (should_validate_capability(i) && s->enabled_capabilities[i]) { ++ if (should_validate_capability(i) && s->capabilities[i]) { + state->capabilities[j++] = i; + } + } +@@ -325,7 +325,7 @@ static bool configuration_validate_capabilities(SaveState *state) + continue; + } + source_state = test_bit(i, source_caps_bm); +- target_state = s->enabled_capabilities[i]; ++ target_state = s->capabilities[i]; + if (source_state != target_state) { + error_report("Capability %s is %s, but received capability is %s", + MigrationCapability_str(i), +-- +2.39.1 + diff --git a/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch b/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch new file mode 100644 index 0000000..0421e33 --- /dev/null +++ b/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch @@ -0,0 +1,42 @@ +From ab9b8620c62540f3267d005c198920671ef9abc3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 3 Mar 2023 11:15:28 +0100 +Subject: [PATCH 06/56] postcopy-ram: do not use qatomic_mb_read +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [5/50] 534c0e13362dfc994fa90c79bfb5ed6ee8c27dfc (peterx/qemu-kvm) + +It does not even pair with a qatomic_mb_set(), so it is clearer to use +load-acquire in this case; they are synonyms. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4592eaf38755a28300d113cd128f65b5b38495f2) +Signed-off-by: Peter Xu +--- + migration/postcopy-ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index bbb8af61ae..d7b48dd920 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -1526,7 +1526,7 @@ static PostcopyState incoming_postcopy_state; + + PostcopyState postcopy_state_get(void) + { +- return qatomic_mb_read(&incoming_postcopy_state); ++ return qatomic_load_acquire(&incoming_postcopy_state); + } + + /* Set the state and return the old state */ +-- +2.39.1 + diff --git a/kvm-spice-move-client_migrate_info-command-to-ui.patch b/kvm-spice-move-client_migrate_info-command-to-ui.patch new file mode 100644 index 0000000..f1de158 --- /dev/null +++ b/kvm-spice-move-client_migrate_info-command-to-ui.patch @@ -0,0 +1,248 @@ +From 00f6e941e75f378c84c773a15efde7dd085d9ce3 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 1 Mar 2023 19:40:14 +0100 +Subject: [PATCH 21/56] spice: move client_migrate_info command to ui/ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [20/50] a587bb001b51a1f9fdf2fcfb0978bb931ae443b6 (peterx/qemu-kvm) + +It has nothing to do with migration, except for the "migrate" in the +name of the command. Move it with the rest of the ui commands. + +Signed-off-by: Juan Quintela +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit f9e1ef7482f1ee289b04f4b45702a1701bc8929d) +Signed-off-by: Peter Xu +--- + migration/migration-hmp-cmds.c | 17 ----------------- + migration/migration.c | 30 ------------------------------ + qapi/migration.json | 28 ---------------------------- + qapi/ui.json | 28 ++++++++++++++++++++++++++++ + ui/ui-hmp-cmds.c | 17 +++++++++++++++++ + ui/ui-qmp-cmds.c | 29 +++++++++++++++++++++++++++++ + 6 files changed, 74 insertions(+), 75 deletions(-) + +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 71da91967a..4e9f00e7dc 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -636,23 +636,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + hmp_handle_error(mon, err); + } + +-void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) +-{ +- Error *err = NULL; +- const char *protocol = qdict_get_str(qdict, "protocol"); +- const char *hostname = qdict_get_str(qdict, "hostname"); +- bool has_port = qdict_haskey(qdict, "port"); +- int port = qdict_get_try_int(qdict, "port", -1); +- bool has_tls_port = qdict_haskey(qdict, "tls-port"); +- int tls_port = qdict_get_try_int(qdict, "tls-port", -1); +- const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); +- +- qmp_client_migrate_info(protocol, hostname, +- has_port, port, has_tls_port, tls_port, +- cert_subject, &err); +- hmp_handle_error(mon, err); +-} +- + void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) + { + Error *err = NULL; +diff --git a/migration/migration.c b/migration/migration.c +index aa96ffdc5b..b745d829a4 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -63,7 +63,6 @@ + #include "sysemu/cpus.h" + #include "yank_functions.h" + #include "sysemu/qtest.h" +-#include "ui/qemu-spice.h" + + #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ + +@@ -1018,35 +1017,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + return params; + } + +-void qmp_client_migrate_info(const char *protocol, const char *hostname, +- bool has_port, int64_t port, +- bool has_tls_port, int64_t tls_port, +- const char *cert_subject, +- Error **errp) +-{ +- if (strcmp(protocol, "spice") == 0) { +- if (!qemu_using_spice(errp)) { +- return; +- } +- +- if (!has_port && !has_tls_port) { +- error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); +- return; +- } +- +- if (qemu_spice.migrate_info(hostname, +- has_port ? port : -1, +- has_tls_port ? tls_port : -1, +- cert_subject)) { +- error_setg(errp, "Could not set up display for migration"); +- return; +- } +- return; +- } +- +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); +-} +- + AnnounceParameters *migrate_announce_params(void) + { + static AnnounceParameters ap; +diff --git a/qapi/migration.json b/qapi/migration.json +index c84fa10e86..2c35b7b9cf 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1203,34 +1203,6 @@ + { 'command': 'query-migrate-parameters', + 'returns': 'MigrationParameters' } + +-## +-# @client_migrate_info: +-# +-# Set migration information for remote display. This makes the server +-# ask the client to automatically reconnect using the new parameters +-# once migration finished successfully. Only implemented for SPICE. +-# +-# @protocol: must be "spice" +-# @hostname: migration target hostname +-# @port: spice tcp port for plaintext channels +-# @tls-port: spice tcp port for tls-secured channels +-# @cert-subject: server certificate subject +-# +-# Since: 0.14 +-# +-# Example: +-# +-# -> { "execute": "client_migrate_info", +-# "arguments": { "protocol": "spice", +-# "hostname": "virt42.lab.kraxel.org", +-# "port": 1234 } } +-# <- { "return": {} } +-# +-## +-{ 'command': 'client_migrate_info', +- 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', +- '*tls-port': 'int', '*cert-subject': 'str' } } +- + ## + # @migrate-start-postcopy: + # +diff --git a/qapi/ui.json b/qapi/ui.json +index 98322342f7..7ddd27a932 100644 +--- a/qapi/ui.json ++++ b/qapi/ui.json +@@ -1554,3 +1554,31 @@ + { 'command': 'display-update', + 'data': 'DisplayUpdateOptions', + 'boxed' : true } ++ ++## ++# @client_migrate_info: ++# ++# Set migration information for remote display. This makes the server ++# ask the client to automatically reconnect using the new parameters ++# once migration finished successfully. Only implemented for SPICE. ++# ++# @protocol: must be "spice" ++# @hostname: migration target hostname ++# @port: spice tcp port for plaintext channels ++# @tls-port: spice tcp port for tls-secured channels ++# @cert-subject: server certificate subject ++# ++# Since: 0.14 ++# ++# Example: ++# ++# -> { "execute": "client_migrate_info", ++# "arguments": { "protocol": "spice", ++# "hostname": "virt42.lab.kraxel.org", ++# "port": 1234 } } ++# <- { "return": {} } ++# ++## ++{ 'command': 'client_migrate_info', ++ 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', ++ '*tls-port': 'int', '*cert-subject': 'str' } } +diff --git a/ui/ui-hmp-cmds.c b/ui/ui-hmp-cmds.c +index 5c456ecc02..c671389473 100644 +--- a/ui/ui-hmp-cmds.c ++++ b/ui/ui-hmp-cmds.c +@@ -458,3 +458,20 @@ hmp_screendump(Monitor *mon, const QDict *qdict) + end: + hmp_handle_error(mon, err); + } ++ ++void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) ++{ ++ Error *err = NULL; ++ const char *protocol = qdict_get_str(qdict, "protocol"); ++ const char *hostname = qdict_get_str(qdict, "hostname"); ++ bool has_port = qdict_haskey(qdict, "port"); ++ int port = qdict_get_try_int(qdict, "port", -1); ++ bool has_tls_port = qdict_haskey(qdict, "tls-port"); ++ int tls_port = qdict_get_try_int(qdict, "tls-port", -1); ++ const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); ++ ++ qmp_client_migrate_info(protocol, hostname, ++ has_port, port, has_tls_port, tls_port, ++ cert_subject, &err); ++ hmp_handle_error(mon, err); ++} +diff --git a/ui/ui-qmp-cmds.c b/ui/ui-qmp-cmds.c +index dbc4afcd73..a37a7024f3 100644 +--- a/ui/ui-qmp-cmds.c ++++ b/ui/ui-qmp-cmds.c +@@ -175,3 +175,32 @@ void qmp_display_update(DisplayUpdateOptions *arg, Error **errp) + abort(); + } + } ++ ++void qmp_client_migrate_info(const char *protocol, const char *hostname, ++ bool has_port, int64_t port, ++ bool has_tls_port, int64_t tls_port, ++ const char *cert_subject, ++ Error **errp) ++{ ++ if (strcmp(protocol, "spice") == 0) { ++ if (!qemu_using_spice(errp)) { ++ return; ++ } ++ ++ if (!has_port && !has_tls_port) { ++ error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); ++ return; ++ } ++ ++ if (qemu_spice.migrate_info(hostname, ++ has_port ? port : -1, ++ has_tls_port ? tls_port : -1, ++ cert_subject)) { ++ error_setg(errp, "Could not set up display for migration"); ++ return; ++ } ++ return; ++ } ++ ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); ++} +-- +2.39.1 + diff --git a/kvm-util-mmap-alloc-qemu_fd_getfs.patch b/kvm-util-mmap-alloc-qemu_fd_getfs.patch new file mode 100644 index 0000000..b0e66f6 --- /dev/null +++ b/kvm-util-mmap-alloc-qemu_fd_getfs.patch @@ -0,0 +1,95 @@ +From 439a8cdd010dfd253fc2277ae4ec605b5ba621d9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 19 Apr 2023 12:17:36 -0400 +Subject: [PATCH 02/56] util/mmap-alloc: qemu_fd_getfs() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/50] 8970b5ae611a933d693e0c90cbf4eda073635494 (peterx/qemu-kvm) + +This new helper fetches file system type for a fd. Only Linux is +implemented so far. Currently only tmpfs and hugetlbfs are defined, +but it can grow as needed. + +Signed-off-by: Peter Xu +Reviewed-by: David Hildenbrand +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit fa45f8dab9613993c042176ea2d25552bfebc955) +Signed-off-by: Peter Xu +--- + include/qemu/mmap-alloc.h | 7 +++++++ + util/mmap-alloc.c | 28 ++++++++++++++++++++++++++++ + 2 files changed, 35 insertions(+) + +diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h +index 2825e231a7..8344daaa03 100644 +--- a/include/qemu/mmap-alloc.h ++++ b/include/qemu/mmap-alloc.h +@@ -1,8 +1,15 @@ + #ifndef QEMU_MMAP_ALLOC_H + #define QEMU_MMAP_ALLOC_H + ++typedef enum { ++ QEMU_FS_TYPE_UNKNOWN = 0, ++ QEMU_FS_TYPE_TMPFS, ++ QEMU_FS_TYPE_HUGETLBFS, ++ QEMU_FS_TYPE_NUM, ++} QemuFsType; + + size_t qemu_fd_getpagesize(int fd); ++QemuFsType qemu_fd_getfs(int fd); + + /** + * qemu_ram_mmap: mmap anonymous memory, the specified file or device. +diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c +index 5ed7d29183..ed14f9c64d 100644 +--- a/util/mmap-alloc.c ++++ b/util/mmap-alloc.c +@@ -27,8 +27,36 @@ + + #ifdef CONFIG_LINUX + #include ++#include + #endif + ++QemuFsType qemu_fd_getfs(int fd) ++{ ++#ifdef CONFIG_LINUX ++ struct statfs fs; ++ int ret; ++ ++ if (fd < 0) { ++ return QEMU_FS_TYPE_UNKNOWN; ++ } ++ ++ do { ++ ret = fstatfs(fd, &fs); ++ } while (ret != 0 && errno == EINTR); ++ ++ switch (fs.f_type) { ++ case TMPFS_MAGIC: ++ return QEMU_FS_TYPE_TMPFS; ++ case HUGETLBFS_MAGIC: ++ return QEMU_FS_TYPE_HUGETLBFS; ++ default: ++ return QEMU_FS_TYPE_UNKNOWN; ++ } ++#else ++ return QEMU_FS_TYPE_UNKNOWN; ++#endif ++} ++ + size_t qemu_fd_getpagesize(int fd) + { + #ifdef CONFIG_LINUX +-- +2.39.1 + diff --git a/kvm-vl.c-Create-late-backends-before-migration-object.patch b/kvm-vl.c-Create-late-backends-before-migration-object.patch new file mode 100644 index 0000000..e1eef6d --- /dev/null +++ b/kvm-vl.c-Create-late-backends-before-migration-object.patch @@ -0,0 +1,58 @@ +From 63e2339a6f38706c6fc5eb251426812520db6a6d Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 19 Apr 2023 12:17:37 -0400 +Subject: [PATCH 03/56] vl.c: Create late backends before migration object +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types +RH-Bugzilla: 2057267 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [2/50] 7209bb94faa48650388be8fef08c77afd26517d8 (peterx/qemu-kvm) + +The migration object may want to check against different types of memory +when initialized. Delay the creation to be after late backends. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: David Hildenbrand +Signed-off-by: Juan Quintela +(cherry picked from commit cb9d8b8ce1aaf38f53295fc59ec1b8b7eb4338d2) +Signed-off-by: Peter Xu +--- + softmmu/vl.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index ad4173138d..a44b49430b 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -3592,14 +3592,19 @@ void qemu_init(int argc, char **argv) + machine_class->name, machine_class->deprecation_reason); + } + ++ /* ++ * Create backends before creating migration objects, so that it can ++ * check against compatibilities on the backend memories (e.g. postcopy ++ * over memory-backend-file objects). ++ */ ++ qemu_create_late_backends(); ++ + /* + * Note: creates a QOM object, must run only after global and + * compat properties have been set up. + */ + migration_object_init(); + +- qemu_create_late_backends(); +- + /* parse features once if machine provides default cpu_type */ + current_machine->cpu_type = machine_class->default_cpu_type; + if (cpu_option) { +-- +2.39.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 4cb63d3..e7c5444 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 3%{?rcrel}%{?dist}%{?cc_suffix} +Release: 4%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -199,6 +199,116 @@ Patch22: kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch Patch23: kvm-migration-Handle-block-device-inactivation-failures-.patch # For bz#2058982 - Qemu core dump if cut off nfs storage during migration Patch24: kvm-migration-Minor-control-flow-simplification.patch +# For bz#2058982 - Qemu core dump if cut off nfs storage during migration +Patch25: kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch26: kvm-util-mmap-alloc-qemu_fd_getfs.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch27: kvm-vl.c-Create-late-backends-before-migration-object.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch28: kvm-migration-postcopy-Detect-file-system-on-dest-host.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch29: kvm-migration-mark-mixed-functions-that-can-suspend.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch30: kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch31: kvm-migration-remove-extra-whitespace-character-for-code.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch32: kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch33: kvm-migration-Update-atomic-stats-out-of-the-mutex.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch34: kvm-migration-Make-multifd_bytes-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch35: kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch36: kvm-migration-Make-precopy_bytes-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch37: kvm-migration-Make-downtime_bytes-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch38: kvm-migration-Make-dirty_sync_count-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch39: kvm-migration-Make-postcopy_requests-atomic.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch40: kvm-migration-Rename-duplicate-to-zero_pages.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch41: kvm-migration-Rename-normal-to-normal_pages.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch42: kvm-migration-rename-enabled_capabilities-to-capabilitie.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch43: kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch44: kvm-migration-move-migration_global_dump-to-migration-hm.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch45: kvm-spice-move-client_migrate_info-command-to-ui.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch46: kvm-migration-Create-migrate_cap_set.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch47: kvm-migration-Create-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch48: kvm-migration-Move-migrate_colo_enabled-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch49: kvm-migration-Move-migrate_use_compression-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch50: kvm-migration-Move-migrate_use_events-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch51: kvm-migration-Move-migrate_use_multifd-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch52: kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch53: kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch54: kvm-migration-Move-migrate_use_block-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch55: kvm-migration-Move-migrate_use_return-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch56: kvm-migration-Create-migrate_rdma_pin_all-function.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch57: kvm-migration-Move-migrate_caps_check-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch58: kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch59: kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch60: kvm-migration-Move-migrate_cap_set-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch61: kvm-migration-Move-parameters-functions-to-option.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch62: kvm-migration-Use-migrate_max_postcopy_bandwidth.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch63: kvm-migration-Move-migrate_use_block_incremental-to-opti.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch64: kvm-migration-Create-migrate_throttle_trigger_threshold.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch65: kvm-migration-Create-migrate_checkpoint_delay.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch66: kvm-migration-Create-migrate_max_cpu_throttle.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch67: kvm-migration-Move-migrate_announce_params-to-option.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch68: kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch69: kvm-migration-Create-migrate_cpu_throttle_increment-func.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch70: kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch71: kvm-migration-Move-migrate_postcopy-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch72: kvm-migration-Create-migrate_max_bandwidth-function.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch73: kvm-migration-Move-migrate_use_tls-to-options.c.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch74: kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch +# For bz#2057267 - Migration with postcopy fail when vm set with shared memory +Patch75: kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch76: kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch77: kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch78: kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch +# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize +Patch79: kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch %if %{have_clang} BuildRequires: clang @@ -215,6 +325,7 @@ BuildRequires: glib2-devel BuildRequires: gnutls-devel BuildRequires: cyrus-sasl-devel BuildRequires: libaio-devel +BuildRequires: liburing-devel BuildRequires: python3-devel BuildRequires: libattr-devel BuildRequires: libusbx-devel >= %{libusbx_version} @@ -713,6 +824,7 @@ run_configure \ --enable-libusb \ --enable-libudev \ --enable-linux-aio \ + --enable-linux-io-uring \ --enable-lzo \ --enable-malloc-trim \ --enable-modules \ @@ -1221,6 +1333,72 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon May 22 2023 Miroslav Rezanina - 8.0.0-4 +- kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch [bz#2058982] +- kvm-util-mmap-alloc-qemu_fd_getfs.patch [bz#2057267] +- kvm-vl.c-Create-late-backends-before-migration-object.patch [bz#2057267] +- kvm-migration-postcopy-Detect-file-system-on-dest-host.patch [bz#2057267] +- kvm-migration-mark-mixed-functions-that-can-suspend.patch [bz#2057267] +- kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch [bz#2057267] +- kvm-migration-remove-extra-whitespace-character-for-code.patch [bz#2057267] +- kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch [bz#2057267] +- kvm-migration-Update-atomic-stats-out-of-the-mutex.patch [bz#2057267] +- kvm-migration-Make-multifd_bytes-atomic.patch [bz#2057267] +- kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch [bz#2057267] +- kvm-migration-Make-precopy_bytes-atomic.patch [bz#2057267] +- kvm-migration-Make-downtime_bytes-atomic.patch [bz#2057267] +- kvm-migration-Make-dirty_sync_count-atomic.patch [bz#2057267] +- kvm-migration-Make-postcopy_requests-atomic.patch [bz#2057267] +- kvm-migration-Rename-duplicate-to-zero_pages.patch [bz#2057267] +- kvm-migration-Rename-normal-to-normal_pages.patch [bz#2057267] +- kvm-migration-rename-enabled_capabilities-to-capabilitie.patch [bz#2057267] +- kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch [bz#2057267] +- kvm-migration-move-migration_global_dump-to-migration-hm.patch [bz#2057267] +- kvm-spice-move-client_migrate_info-command-to-ui.patch [bz#2057267] +- kvm-migration-Create-migrate_cap_set.patch [bz#2057267] +- kvm-migration-Create-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_colo_enabled-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_compression-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_events-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_multifd-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch [bz#2057267] +- kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_block-to-options.c.patch [bz#2057267] +- kvm-migration-Move-migrate_use_return-to-options.c.patch [bz#2057267] +- kvm-migration-Create-migrate_rdma_pin_all-function.patch [bz#2057267] +- kvm-migration-Move-migrate_caps_check-to-options.c.patch [bz#2057267] +- kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch [bz#2057267] +- kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch [bz#2057267] +- kvm-migration-Move-migrate_cap_set-to-options.c.patch [bz#2057267] +- kvm-migration-Move-parameters-functions-to-option.c.patch [bz#2057267] +- kvm-migration-Use-migrate_max_postcopy_bandwidth.patch [bz#2057267] +- kvm-migration-Move-migrate_use_block_incremental-to-opti.patch [bz#2057267] +- kvm-migration-Create-migrate_throttle_trigger_threshold.patch [bz#2057267] +- kvm-migration-Create-migrate_checkpoint_delay.patch [bz#2057267] +- kvm-migration-Create-migrate_max_cpu_throttle.patch [bz#2057267] +- kvm-migration-Move-migrate_announce_params-to-option.c.patch [bz#2057267] +- kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch [bz#2057267] +- kvm-migration-Create-migrate_cpu_throttle_increment-func.patch [bz#2057267] +- kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch [bz#2057267] +- kvm-migration-Move-migrate_postcopy-to-options.c.patch [bz#2057267] +- kvm-migration-Create-migrate_max_bandwidth-function.patch [bz#2057267] +- kvm-migration-Move-migrate_use_tls-to-options.c.patch [bz#2057267] +- kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch [bz#2057267] +- kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch [bz#2057267] +- kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch [bz#2185688] +- kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch [bz#2185688] +- kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch [bz#2185688] +- kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch [bz#2185688] +- kvm-Enable-Linux-io_uring.patch [bz#1947230] +- Resolves: bz#2058982 + (Qemu core dump if cut off nfs storage during migration) +- Resolves: bz#2057267 + (Migration with postcopy fail when vm set with shared memory) +- Resolves: bz#2185688 + ([qemu-kvm] no response with QMP command block_resize) +- Resolves: bz#1947230 + (Enable QEMU support for io_uring in RHEL9) + * Mon May 15 2023 Miroslav Rezanina - 8.0.0-3 - kvm-migration-Handle-block-device-inactivation-failures-.patch [bz#2058982] - kvm-migration-Minor-control-flow-simplification.patch [bz#2058982] From a799a516c56f2f4aac0d32f4bcc0efcc04857229 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Tue, 13 Jun 2023 05:19:43 -0400 Subject: [PATCH 193/195] * Tue Jun 13 2023 Miroslav Rezanina - 8.0.0-5 - kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch [bz#2186725] - kvm-graph-lock-Disable-locking-for-now.patch [bz#2186725] - kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch [bz#2186725] - kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch [bz#2186725] - kvm-memory-prevent-dma-reentracy-issues.patch [RHEL-516] - kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch [RHEL-516] - kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch [RHEL-516] - kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch [RHEL-516] - kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch [RHEL-516] - kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch [RHEL-516] - kvm-raven-disable-reentrancy-detection-for-iomem.patch [RHEL-516] - kvm-apic-disable-reentrancy-detection-for-apic-msi.patch [RHEL-516] - kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch [RHEL-516] - kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch [RHEL-516] - kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch [RHEL-516] - kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch [RHEL-516] - kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch [RHEL-516] - kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch [bz#2189423] - kvm-multifd-Fix-the-number-of-channels-ready.patch [bz#2196289] - kvm-util-async-teardown-wire-up-query-command-line-optio.patch [bz#2168500] - kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch [bz#2168500] - Resolves: bz#2186725 (Qemu hang when commit during fio running(iothread enable)) - Resolves: RHEL-516 (CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9]) - Resolves: bz#2189423 (Failed to migrate VM from rhel 9.3 to rhel 9.2) - Resolves: bz#2196289 (Fix number of ready channels on multifd) - Resolves: bz#2168500 ([IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part) --- ...le-reentrancy-detection-for-apic-msi.patch | 55 ++ ...ional-reentrancy-guard-to-the-BH-API.patch | 231 +++++++++ ...-use-after-free-on-re-entrancy-guard.patch | 70 +++ ...-disable-reentrancy-detection-for-io.patch | 57 +++ ...t-assert_bdrv_graph_readable-by-defa.patch | 121 +++++ ...ch-add-qemu_bh_new-aio_bh_new-checks.patch | 55 ++ kvm-graph-lock-Disable-locking-for-now.patch | 153 ++++++ ...CI_ERR_UNCOR_MASK-register-for-machi.patch | 118 +++++ ...qemu_bh_new-calls-with-qemu_bh_new_g.patch | 470 ++++++++++++++++++ ...5a-Fix-reentrancy-issues-in-the-LSI-.patch | 141 ++++++ ...ommit-with-iothreads-and-ongoing-I-O.patch | 144 ++++++ ...loongarch_ipi_iocsr-re-entrnacy-safe.patch | 53 ++ ...le-reentrancy-detection-for-MMIO-reg.patch | 70 +++ ...le-reentrancy-detection-for-script-R.patch | 58 +++ kvm-memory-prevent-dma-reentracy-issues.patch | 150 ++++++ ...checks-prior-to-unsetting-engaged_in.patch | 67 +++ ...ifd-Fix-the-number-of-channels-ready.patch | 58 +++ ...rained_poll-to-wake-coroutine-in-rig.patch | 159 ++++++ ...sable-reentrancy-detection-for-iomem.patch | 54 ++ ...rious-warning-with-asynchronous-tear.patch | 129 +++++ ...own-wire-up-query-command-line-optio.patch | 180 +++++++ qemu-kvm.spec | 77 ++- 22 files changed, 2669 insertions(+), 1 deletion(-) create mode 100644 kvm-apic-disable-reentrancy-detection-for-apic-msi.patch create mode 100644 kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch create mode 100644 kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch create mode 100644 kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch create mode 100644 kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch create mode 100644 kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch create mode 100644 kvm-graph-lock-Disable-locking-for-now.patch create mode 100644 kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch create mode 100644 kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch create mode 100644 kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch create mode 100644 kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch create mode 100644 kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch create mode 100644 kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch create mode 100644 kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch create mode 100644 kvm-memory-prevent-dma-reentracy-issues.patch create mode 100644 kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch create mode 100644 kvm-multifd-Fix-the-number-of-channels-ready.patch create mode 100644 kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch create mode 100644 kvm-raven-disable-reentrancy-detection-for-iomem.patch create mode 100644 kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch create mode 100644 kvm-util-async-teardown-wire-up-query-command-line-optio.patch diff --git a/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch b/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch new file mode 100644 index 0000000..69505f8 --- /dev/null +++ b/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch @@ -0,0 +1,55 @@ +From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 50795ee051a342c681a9b45671c552fbd6274db8 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:13 2023 -0400 + + apic: disable reentrancy detection for apic-msi + + As the code is designed for re-entrant calls to apic-msi, mark apic-msi + as reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-9-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/intc/apic.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/intc/apic.c b/hw/intc/apic.c +index 20b5a94073..ac3d47d231 100644 +--- a/hw/intc/apic.c ++++ b/hw/intc/apic.c +@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) + memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", + APIC_SPACE_SIZE); + ++ /* ++ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can ++ * write back to apic-msi. As such mark the apic-msi region re-entrancy ++ * safe. ++ */ ++ s->io_memory.disable_reentrancy_guard = true; ++ + s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); + local_apics[s->id] = s; + +-- +2.39.3 + diff --git a/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch b/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch new file mode 100644 index 0000000..65ba3be --- /dev/null +++ b/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch @@ -0,0 +1,231 @@ +From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 9c86c97f12c060bf7484dd931f38634e166a81f0 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:07 2023 -0400 + + async: Add an optional reentrancy guard to the BH API + + Devices can pass their MemoryReentrancyGuard (from their DeviceState), + when creating new BHes. Then, the async API will toggle the guard + before/after calling the BH call-back. This prevents bh->mmio reentrancy + issues. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-3-alxndr@bu.edu> + [thuth: Fix "line over 90 characters" checkpatch.pl error] + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + docs/devel/multiple-iothreads.txt | 7 +++++++ + include/block/aio.h | 18 ++++++++++++++++-- + include/qemu/main-loop.h | 7 +++++-- + tests/unit/ptimer-test-stubs.c | 3 ++- + util/async.c | 18 +++++++++++++++++- + util/main-loop.c | 6 ++++-- + util/trace-events | 1 + + 7 files changed, 52 insertions(+), 8 deletions(-) + +diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt +index 343120f2ef..a3e949f6b3 100644 +--- a/docs/devel/multiple-iothreads.txt ++++ b/docs/devel/multiple-iothreads.txt +@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: + * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier + * LEGACY timer_new_ms() - create a timer + * LEGACY qemu_bh_new() - create a BH ++ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard + * LEGACY qemu_aio_wait() - run an event loop iteration + + Since they implicitly work on the main loop they cannot be used in code that +@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): + * aio_set_event_notifier() - monitor an event notifier + * aio_timer_new() - create a timer + * aio_bh_new() - create a BH ++ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard + * aio_poll() - run an event loop iteration + ++The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" ++argument, which is used to check for and prevent re-entrancy problems. For ++BHs associated with devices, the reentrancy-guard is contained in the ++corresponding DeviceState and named "mem_reentrancy_guard". ++ + The AioContext can be obtained from the IOThread using + iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). + Code that takes an AioContext argument works both in IOThreads or the main +diff --git a/include/block/aio.h b/include/block/aio.h +index 543717f294..db6f23c619 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -23,6 +23,8 @@ + #include "qemu/thread.h" + #include "qemu/timer.h" + #include "block/graph-lock.h" ++#include "hw/qdev-core.h" ++ + + typedef struct BlockAIOCB BlockAIOCB; + typedef void BlockCompletionFunc(void *opaque, int ret); +@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + * is opaque and must be allocated prior to its use. + * + * @name: A human-readable identifier for debugging purposes. ++ * @reentrancy_guard: A guard set when entering a cb to prevent ++ * device-reentrancy issues + */ + QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, +- const char *name); ++ const char *name, MemReentrancyGuard *reentrancy_guard); + + /** + * aio_bh_new: Allocate a new bottom half structure +@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + * string. + */ + #define aio_bh_new(ctx, cb, opaque) \ +- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) ++ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) ++ ++/** ++ * aio_bh_new_guarded: Allocate a new bottom half structure with a ++ * reentrancy_guard ++ * ++ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name ++ * string. ++ */ ++#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ ++ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) + + /** + * aio_notify: Force processing of pending events. +diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h +index b3e54e00bc..68e70e61aa 100644 +--- a/include/qemu/main-loop.h ++++ b/include/qemu/main-loop.h +@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); + + /* internal interfaces */ + ++#define qemu_bh_new_guarded(cb, opaque, guard) \ ++ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) + #define qemu_bh_new(cb, opaque) \ +- qemu_bh_new_full((cb), (opaque), (stringify(cb))) +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); ++ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard); + void qemu_bh_schedule_idle(QEMUBH *bh); + + enum { +diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c +index f2bfcede93..8c9407c560 100644 +--- a/tests/unit/ptimer-test-stubs.c ++++ b/tests/unit/ptimer-test-stubs.c +@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) + return deadline; + } + +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard) + { + QEMUBH *bh = g_new(QEMUBH, 1); + +diff --git a/util/async.c b/util/async.c +index 21016a1ac7..a9b528c370 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -65,6 +65,7 @@ struct QEMUBH { + void *opaque; + QSLIST_ENTRY(QEMUBH) next; + unsigned flags; ++ MemReentrancyGuard *reentrancy_guard; + }; + + /* Called concurrently from any thread */ +@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, + } + + QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, +- const char *name) ++ const char *name, MemReentrancyGuard *reentrancy_guard) + { + QEMUBH *bh; + bh = g_new(QEMUBH, 1); +@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, + .cb = cb, + .opaque = opaque, + .name = name, ++ .reentrancy_guard = reentrancy_guard, + }; + return bh; + } + + void aio_bh_call(QEMUBH *bh) + { ++ bool last_engaged_in_io = false; ++ ++ if (bh->reentrancy_guard) { ++ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; ++ if (bh->reentrancy_guard->engaged_in_io) { ++ trace_reentrant_aio(bh->ctx, bh->name); ++ } ++ bh->reentrancy_guard->engaged_in_io = true; ++ } ++ + bh->cb(bh->opaque); ++ ++ if (bh->reentrancy_guard) { ++ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; ++ } + } + + /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ +diff --git a/util/main-loop.c b/util/main-loop.c +index e180c85145..7022f02ef8 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) + + /* Functions to operate on the main QEMU AioContext. */ + +-QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) ++QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, ++ MemReentrancyGuard *reentrancy_guard) + { +- return aio_bh_new_full(qemu_aio_context, cb, opaque, name); ++ return aio_bh_new_full(qemu_aio_context, cb, opaque, name, ++ reentrancy_guard); + } + + /* +diff --git a/util/trace-events b/util/trace-events +index 16f78d8fe5..3f7e766683 100644 +--- a/util/trace-events ++++ b/util/trace-events +@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" + # async.c + aio_co_schedule(void *ctx, void *co) "ctx %p co %p" + aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" ++reentrant_aio(void *ctx, const char *name) "ctx %p name %s" + + # thread-pool.c + thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" +-- +2.39.3 + diff --git a/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch b/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch new file mode 100644 index 0000000..df71fa2 --- /dev/null +++ b/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch @@ -0,0 +1,70 @@ +From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 7915bd06f25e1803778081161bf6fa10c42dc7cd +Author: Alexander Bulekov +Date: Mon May 1 10:19:56 2023 -0400 + + async: avoid use-after-free on re-entrancy guard + + A BH callback can free the BH, causing a use-after-free in aio_bh_call. + Fix that by keeping a local copy of the re-entrancy guard pointer. + + Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513 + Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API") + Signed-off-by: Alexander Bulekov + Message-Id: <20230501141956.3444868-1-alxndr@bu.edu> + Reviewed-by: Thomas Huth + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + util/async.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/util/async.c b/util/async.c +index a9b528c370..cd1a1815f9 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh) + { + bool last_engaged_in_io = false; + +- if (bh->reentrancy_guard) { +- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; +- if (bh->reentrancy_guard->engaged_in_io) { ++ /* Make a copy of the guard-pointer as cb may free the bh */ ++ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard; ++ if (reentrancy_guard) { ++ last_engaged_in_io = reentrancy_guard->engaged_in_io; ++ if (reentrancy_guard->engaged_in_io) { + trace_reentrant_aio(bh->ctx, bh->name); + } +- bh->reentrancy_guard->engaged_in_io = true; ++ reentrancy_guard->engaged_in_io = true; + } + + bh->cb(bh->opaque); + +- if (bh->reentrancy_guard) { +- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; ++ if (reentrancy_guard) { ++ reentrancy_guard->engaged_in_io = last_engaged_in_io; + } + } + +-- +2.39.3 + diff --git a/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch b/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch new file mode 100644 index 0000000..6d9abb8 --- /dev/null +++ b/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch @@ -0,0 +1,57 @@ +From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for + iomem + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:11 2023 -0400 + + bcm2835_property: disable reentrancy detection for iomem + + As the code is designed for re-entrant calls from bcm2835_property to + bcm2835_mbox and back into bcm2835_property, mark iomem as + reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-7-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/misc/bcm2835_property.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c +index 890ae7bae5..de056ea2df 100644 +--- a/hw/misc/bcm2835_property.c ++++ b/hw/misc/bcm2835_property.c +@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) + + memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, + TYPE_BCM2835_PROPERTY, 0x10); ++ ++ /* ++ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from ++ * iomem. As such, mark iomem as re-entracy safe. ++ */ ++ s->iomem.disable_reentrancy_guard = true; ++ + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); + sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); + } +-- +2.39.3 + diff --git a/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch b/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch new file mode 100644 index 0000000..b6eebf3 --- /dev/null +++ b/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch @@ -0,0 +1,121 @@ +From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 1 May 2023 13:34:43 -0400 +Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by + default + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm) + +reader_count() is a performance bottleneck because the global +aio_context_list_lock mutex causes thread contention. Put this debugging +assertion behind a new ./configure --enable-debug-graph-lock option and +disable it by default. + +The --enable-debug-graph-lock option is also enabled by the more general +--enable-debug option. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230501173443.153062-1-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf) +Signed-off-by: Kevin Wolf +--- + block/graph-lock.c | 3 +++ + configure | 1 + + meson.build | 2 ++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 4 ++++ + 5 files changed, 12 insertions(+) + +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 454c31e691..259a7a0bde 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void) + + void assert_bdrv_graph_readable(void) + { ++ /* reader_count() is slow due to aio_context_list_lock lock contention */ ++#ifdef CONFIG_DEBUG_GRAPH_LOCK + assert(qemu_in_main_thread() || reader_count()); ++#endif + } + + void assert_bdrv_graph_writable(void) +diff --git a/configure b/configure +index 800b5850f4..a62a3e6be9 100755 +--- a/configure ++++ b/configure +@@ -806,6 +806,7 @@ for opt do + --enable-debug) + # Enable debugging options that aren't excessively noisy + debug_tcg="yes" ++ meson_option_parse --enable-debug-graph-lock "" + meson_option_parse --enable-debug-mutex "" + meson_option_add -Doptimization=0 + fortify_source="no" +diff --git a/meson.build b/meson.build +index c44d05a13f..d964e741e7 100644 +--- a/meson.build ++++ b/meson.build +@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool + have_coroutine_pool = false + endif + config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool) ++config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock')) + config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex')) + config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage')) + config_host_data.set('CONFIG_GPROF', get_option('gprof')) +@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')} + summary_info += {'static build': config_host.has_key('CONFIG_STATIC')} + summary_info += {'malloc trim support': has_malloc_trim} + summary_info += {'membarrier': have_membarrier} ++summary_info += {'debug graph lock': get_option('debug_graph_lock')} + summary_info += {'debug stack usage': get_option('debug_stack_usage')} + summary_info += {'mutex debugging': get_option('debug_mutex')} + summary_info += {'memory allocator': get_option('malloc')} +diff --git a/meson_options.txt b/meson_options.txt +index fc9447d267..bc857fe68b 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false, + description: 'dummy RNG, avoid using /dev/(u)random and getrandom()') + option('coroutine_pool', type: 'boolean', value: true, + description: 'coroutine freelist (better performance)') ++option('debug_graph_lock', type: 'boolean', value: false, ++ description: 'graph lock debugging support') + option('debug_mutex', type: 'boolean', value: false, + description: 'mutex debugging support') + option('debug_stack_usage', type: 'boolean', value: false, +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 009fab1515..30e1f25259 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -21,6 +21,8 @@ meson_options_help() { + printf "%s\n" ' QEMU' + printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' + printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' ++ printf "%s\n" ' --enable-debug-graph-lock' ++ printf "%s\n" ' graph lock debugging support' + printf "%s\n" ' --enable-debug-mutex mutex debugging support' + printf "%s\n" ' --enable-debug-stack-usage' + printf "%s\n" ' measure coroutine stack usage' +@@ -249,6 +251,8 @@ _meson_option_parse() { + --datadir=*) quote_sh "-Ddatadir=$2" ;; + --enable-dbus-display) printf "%s" -Ddbus_display=enabled ;; + --disable-dbus-display) printf "%s" -Ddbus_display=disabled ;; ++ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;; ++ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;; + --enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;; + --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;; + --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;; +-- +2.39.3 + diff --git a/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch b/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch new file mode 100644 index 0000000..4173648 --- /dev/null +++ b/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch @@ -0,0 +1,55 @@ +From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit ef56ffbdd6b0605dc1e305611287b948c970e236 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:08 2023 -0400 + + checkpatch: add qemu_bh_new/aio_bh_new checks + + Advise authors to use the _guarded versions of the APIs, instead. + + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-4-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + scripts/checkpatch.pl | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl +index d768171dcf..eeaec436eb 100755 +--- a/scripts/checkpatch.pl ++++ b/scripts/checkpatch.pl +@@ -2865,6 +2865,14 @@ sub process { + if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { + ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); + } ++# recommend qemu_bh_new_guarded instead of qemu_bh_new ++ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { ++ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); ++ } ++# recommend aio_bh_new_guarded instead of aio_bh_new ++ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { ++ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); ++ } + # check for module_init(), use category-specific init macros explicitly please + if ($line =~ /^module_init\s*\(/) { + ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); +-- +2.39.3 + diff --git a/kvm-graph-lock-Disable-locking-for-now.patch b/kvm-graph-lock-Disable-locking-for-now.patch new file mode 100644 index 0000000..77086e5 --- /dev/null +++ b/kvm-graph-lock-Disable-locking-for-now.patch @@ -0,0 +1,153 @@ +From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:32 +0200 +Subject: [PATCH 02/21] graph-lock: Disable locking for now + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm) + +In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They +come from callers that hold an AioContext lock, which is not allowed +during polling. In theory, we could temporarily release the lock, but +callers are inconsistent about whether they hold a lock, and if they do, +some are also confused about which one they hold. While all of this is +fixable, it's not trivial, and the best course of action for 8.0.1 is +probably just disabling the graph locking code temporarily. + +We don't currently rely on graph locking yet. It is supposed to replace +the AioContext lock eventually to enable multiqueue support, but as long +as we still have the AioContext lock, it is sufficient without the graph +lock. Once the AioContext lock goes away, the deadlock doesn't exist any +more either and this commit can be reverted. (Of course, it can also be +reverted while the AioContext lock still exists if the callers have been +fixed.) + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-2-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d) +Signed-off-by: Kevin Wolf +--- + block/graph-lock.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 259a7a0bde..2490926c90 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock; + /* Protects the list of aiocontext and orphaned_reader_count */ + static QemuMutex aio_context_list_lock; + ++#if 0 + /* Written and read with atomic operations. */ + static int has_writer; ++#endif + + /* + * A reader coroutine could move from an AioContext to another. +@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx) + g_free(ctx->bdrv_graph); + } + ++#if 0 + static uint32_t reader_count(void) + { + BdrvGraphRWlock *brdv_graph; +@@ -105,10 +108,17 @@ static uint32_t reader_count(void) + assert((int32_t)rd >= 0); + return rd; + } ++#endif + + void bdrv_graph_wrlock(void) + { + GLOBAL_STATE_CODE(); ++ /* ++ * TODO Some callers hold an AioContext lock when this is called, which ++ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or ++ * AioContext locks are gone). ++ */ ++#if 0 + assert(!qatomic_read(&has_writer)); + + /* Make sure that constantly arriving new I/O doesn't cause starvation */ +@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void) + } while (reader_count() >= 1); + + bdrv_drain_all_end(); ++#endif + } + + void bdrv_graph_wrunlock(void) + { + GLOBAL_STATE_CODE(); ++#if 0 + QEMU_LOCK_GUARD(&aio_context_list_lock); + assert(qatomic_read(&has_writer)); + +@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void) + + /* Wake up all coroutine that are waiting to read the graph */ + qemu_co_enter_all(&reader_queue, &aio_context_list_lock); ++#endif + } + + void coroutine_fn bdrv_graph_co_rdlock(void) + { ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + +@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void) + qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); + } + } ++#endif + } + + void coroutine_fn bdrv_graph_co_rdunlock(void) + { ++#if 0 + BdrvGraphRWlock *bdrv_graph; + bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; + +@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) + if (qatomic_read(&has_writer)) { + aio_wait_kick(); + } ++#endif + } + + void bdrv_graph_rdlock_main_loop(void) +@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void) + void assert_bdrv_graph_readable(void) + { + /* reader_count() is slow due to aio_context_list_lock lock contention */ ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + #ifdef CONFIG_DEBUG_GRAPH_LOCK + assert(qemu_in_main_thread() || reader_count()); + #endif ++#endif + } + + void assert_bdrv_graph_writable(void) + { + assert(qemu_in_main_thread()); ++ /* TODO Reenable when wrlock is reenabled */ ++#if 0 + assert(qatomic_read(&has_writer)); ++#endif + } +-- +2.39.3 + diff --git a/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch b/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch new file mode 100644 index 0000000..164bea7 --- /dev/null +++ b/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch @@ -0,0 +1,118 @@ +From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 2 May 2023 21:27:02 -0300 +Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine + type < 8.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 +RH-Bugzilla: 2189423 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm) + +Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK +set for machine types < 8.0 will cause migration to fail if the target +QEMU version is < 8.0.0 : + +qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0 +qemu-system-x86_64: Failed to load PCIDevice:config +qemu-system-x86_64: Failed to load e1000e:parent_obj +qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e' +qemu-system-x86_64: load of migration failed: Invalid argument + +The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0, +with this cmdline: + +./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX] + +In order to fix this, property x-pcie-err-unc-mask was introduced to +control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by +default, but is disabled if machine type <= 7.2. + +Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register") +Suggested-by: Michael S. Tsirkin +Signed-off-by: Leonardo Bras +Message-Id: <20230503002701.854329-1-leobras@redhat.com> +Reviewed-by: Jonathan Cameron +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576 +Tested-by: Fiona Ebner +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f) +Signed-off-by: Leonardo Bras +--- + hw/core/machine.c | 1 + + hw/pci/pci.c | 2 ++ + hw/pci/pcie_aer.c | 11 +++++++---- + include/hw/pci/pci.h | 2 ++ + 4 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 0e0120b7f2..c28702b690 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = { + { "e1000e", "migrate-timadj", "off" }, + { "virtio-mem", "x-early-migration", "false" }, + { "migration", "x-preempt-pre-7-2", "true" }, ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, + }; + const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index def5000e7b..8ad4349e96 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -79,6 +79,8 @@ static Property pci_props[] = { + DEFINE_PROP_STRING("failover_pair_id", PCIDevice, + failover_pair_id), + DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), ++ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, ++ QEMU_PCIE_ERR_UNC_MASK_BITNR, true), + DEFINE_PROP_END_OF_LIST() + }; + +diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c +index 103667c368..374d593ead 100644 +--- a/hw/pci/pcie_aer.c ++++ b/hw/pci/pcie_aer.c +@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, + + pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, + PCI_ERR_UNC_SUPPORTED); +- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- PCI_ERR_UNC_MASK_DEFAULT); +- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- PCI_ERR_UNC_SUPPORTED); ++ ++ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { ++ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ PCI_ERR_UNC_MASK_DEFAULT); ++ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ PCI_ERR_UNC_SUPPORTED); ++ } + + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, + PCI_ERR_UNC_SEVERITY_DEFAULT); +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index d5a40cd058..6dc6742fc4 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -207,6 +207,8 @@ enum { + QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), + #define QEMU_PCIE_CXL_BITNR 10 + QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), ++#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11 ++ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), + }; + + typedef struct PCIINTxRoute { +-- +2.39.3 + diff --git a/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch b/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch new file mode 100644 index 0000000..08ee94f --- /dev/null +++ b/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch @@ -0,0 +1,470 @@ +From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with + qemu_bh_new_guarded + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit f63192b0544af5d3e4d5edfd85ab520fcf671377 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:09 2023 -0400 + + hw: replace most qemu_bh_new calls with qemu_bh_new_guarded + + This protects devices from bh->mmio reentrancy issues. + + Thanks: Thomas Huth for diagnosing OS X test failure. + Signed-off-by: Alexander Bulekov + Reviewed-by: Darren Kenny + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Michael S. Tsirkin + Reviewed-by: Paul Durrant + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-5-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/9pfs/xen-9p-backend.c | 5 ++++- + hw/block/dataplane/virtio-blk.c | 3 ++- + hw/block/dataplane/xen-block.c | 5 +++-- + hw/char/virtio-serial-bus.c | 3 ++- + hw/display/qxl.c | 9 ++++++--- + hw/display/virtio-gpu.c | 6 ++++-- + hw/ide/ahci.c | 3 ++- + hw/ide/ahci_internal.h | 1 + + hw/ide/core.c | 4 +++- + hw/misc/imx_rngc.c | 6 ++++-- + hw/misc/macio/mac_dbdma.c | 2 +- + hw/net/virtio-net.c | 3 ++- + hw/nvme/ctrl.c | 6 ++++-- + hw/scsi/mptsas.c | 3 ++- + hw/scsi/scsi-bus.c | 3 ++- + hw/scsi/vmw_pvscsi.c | 3 ++- + hw/usb/dev-uas.c | 3 ++- + hw/usb/hcd-dwc2.c | 3 ++- + hw/usb/hcd-ehci.c | 3 ++- + hw/usb/hcd-uhci.c | 2 +- + hw/usb/host-libusb.c | 6 ++++-- + hw/usb/redirect.c | 6 ++++-- + hw/usb/xen-usb.c | 3 ++- + hw/virtio/virtio-balloon.c | 5 +++-- + hw/virtio/virtio-crypto.c | 3 ++- + 25 files changed, 66 insertions(+), 33 deletions(-) + +diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c +index 74f3a05f88..0e266c552b 100644 +--- a/hw/9pfs/xen-9p-backend.c ++++ b/hw/9pfs/xen-9p-backend.c +@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { + + int num_rings; + Xen9pfsRing *rings; ++ MemReentrancyGuard mem_reentrancy_guard; + } Xen9pfsDev; + + static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); +@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) + xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + + XEN_FLEX_RING_SIZE(ring_order); + +- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); ++ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, ++ &xen_9pdev->rings[i], ++ &xen_9pdev->mem_reentrancy_guard); + xen_9pdev->rings[i].out_cons = 0; + xen_9pdev->rings[i].out_size = 0; + xen_9pdev->rings[i].inprogress = false; +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index b28d81737e..a6202997ee 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + } else { + s->ctx = qemu_get_aio_context(); + } +- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); ++ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, ++ &DEVICE(vdev)->mem_reentrancy_guard); + s->batch_notify_vqs = bitmap_new(conf->num_queues); + + *dataplane = s; +diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c +index 734da42ea7..d8bc39d359 100644 +--- a/hw/block/dataplane/xen-block.c ++++ b/hw/block/dataplane/xen-block.c +@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, + } else { + dataplane->ctx = qemu_get_aio_context(); + } +- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, +- dataplane); ++ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, ++ dataplane, ++ &DEVICE(xendev)->mem_reentrancy_guard); + + return dataplane; + } +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index 7d4601cb5d..dd619f0731 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) + return; + } + +- port->bh = qemu_bh_new(flush_queued_data_bh, port); ++ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, ++ &dev->mem_reentrancy_guard); + port->elem = NULL; + } + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index 80ce1e9a93..f1c0eb7dfc 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) + + qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); + +- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); ++ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, ++ &DEVICE(qxl)->mem_reentrancy_guard); + qxl_reset_state(qxl); + +- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); +- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); ++ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, ++ &DEVICE(qxl)->mem_reentrancy_guard); ++ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, ++ &DEVICE(qxl)->mem_reentrancy_guard); + } + + static void qxl_realize_primary(PCIDevice *dev, Error **errp) +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index 5e15c79b94..66ac9b6cc5 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) + + g->ctrl_vq = virtio_get_queue(vdev, 0); + g->cursor_vq = virtio_get_queue(vdev, 1); +- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); +- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); ++ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, ++ &qdev->mem_reentrancy_guard); ++ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, ++ &qdev->mem_reentrancy_guard); + QTAILQ_INIT(&g->reslist); + QTAILQ_INIT(&g->cmdq); + QTAILQ_INIT(&g->fenceq); +diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c +index 55902e1df7..4e76d6b191 100644 +--- a/hw/ide/ahci.c ++++ b/hw/ide/ahci.c +@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) + ahci_write_fis_d2h(ad); + + if (ad->port_regs.cmd_issue && !ad->check_bh) { +- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); ++ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, ++ &ad->mem_reentrancy_guard); + qemu_bh_schedule(ad->check_bh); + } + } +diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h +index 303fcd7235..2480455372 100644 +--- a/hw/ide/ahci_internal.h ++++ b/hw/ide/ahci_internal.h +@@ -321,6 +321,7 @@ struct AHCIDevice { + bool init_d2h_sent; + AHCICmdHdr *cur_cmd; + NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; ++ MemReentrancyGuard mem_reentrancy_guard; + }; + + struct AHCIPCIState { +diff --git a/hw/ide/core.c b/hw/ide/core.c +index 45d14a25e9..de48ff9f86 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( + BlockCompletionFunc *cb, void *cb_opaque, void *opaque) + { + IDEState *s = opaque; ++ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; + TrimAIOCB *iocb; + + /* Paired with a decrement in ide_trim_bh_cb() */ +@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( + + iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); + iocb->s = s; +- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); ++ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, ++ &DEVICE(dev)->mem_reentrancy_guard); + iocb->ret = 0; + iocb->qiov = qiov; + iocb->i = -1; +diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c +index 632c03779c..082c6980ad 100644 +--- a/hw/misc/imx_rngc.c ++++ b/hw/misc/imx_rngc.c +@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) + sysbus_init_mmio(sbd, &s->iomem); + + sysbus_init_irq(sbd, &s->irq); +- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); +- s->seed_bh = qemu_bh_new(imx_rngc_seed, s); ++ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, ++ &dev->mem_reentrancy_guard); ++ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, ++ &dev->mem_reentrancy_guard); + } + + static void imx_rngc_reset(DeviceState *dev) +diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c +index 43bb1f56ba..80a789f32b 100644 +--- a/hw/misc/macio/mac_dbdma.c ++++ b/hw/misc/macio/mac_dbdma.c +@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) + { + DBDMAState *s = MAC_DBDMA(dev); + +- s->bh = qemu_bh_new(DBDMA_run_bh, s); ++ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); + } + + static void mac_dbdma_class_init(ObjectClass *oc, void *data) +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 53e1c32643..447f669921 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) + n->vqs[index].tx_vq = + virtio_add_queue(vdev, n->net_conf.tx_queue_size, + virtio_net_handle_tx_bh); +- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); ++ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], ++ &DEVICE(vdev)->mem_reentrancy_guard); + } + + n->vqs[index].tx_waiting = 0; +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index ac24eeb5ed..e5a468975e 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, + QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); + } + +- sq->bh = qemu_bh_new(nvme_process_sq, sq); ++ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, ++ &DEVICE(sq->ctrl)->mem_reentrancy_guard); + + if (n->dbbuf_enabled) { + sq->db_addr = n->dbbuf_dbs + (sqid << 3); +@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, + } + } + n->cq[cqid] = cq; +- cq->bh = qemu_bh_new(nvme_post_cqes, cq); ++ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, ++ &DEVICE(cq->ctrl)->mem_reentrancy_guard); + } + + static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) +diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c +index c485da792c..3de288b454 100644 +--- a/hw/scsi/mptsas.c ++++ b/hw/scsi/mptsas.c +@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) + } + s->max_devices = MPTSAS_NUM_PORTS; + +- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); ++ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, ++ &DEVICE(dev)->mem_reentrancy_guard); + + scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); + } +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index c97176110c..3c20b47ad0 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) + AioContext *ctx = blk_get_aio_context(s->conf.blk); + /* The reference is dropped in scsi_dma_restart_bh.*/ + object_ref(OBJECT(s)); +- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); ++ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, ++ &DEVICE(s)->mem_reentrancy_guard); + qemu_bh_schedule(s->bh); + } + } +diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c +index fa76696855..4de34536e9 100644 +--- a/hw/scsi/vmw_pvscsi.c ++++ b/hw/scsi/vmw_pvscsi.c +@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) + pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); + } + +- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); ++ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, ++ &DEVICE(pci_dev)->mem_reentrancy_guard); + + scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); + /* override default SCSI bus hotplug-handler, with pvscsi's one */ +diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c +index 88f99c05d5..f013ded91e 100644 +--- a/hw/usb/dev-uas.c ++++ b/hw/usb/dev-uas.c +@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) + + QTAILQ_INIT(&uas->results); + QTAILQ_INIT(&uas->requests); +- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); ++ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, ++ &d->mem_reentrancy_guard); + + dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); + scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); +diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c +index 8755e9cbb0..a0c4e782b2 100644 +--- a/hw/usb/hcd-dwc2.c ++++ b/hw/usb/hcd-dwc2.c +@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) + s->fi = USB_FRMINTVL - 1; + s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); +- s->async_bh = qemu_bh_new(dwc2_work_bh, s); ++ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, ++ &dev->mem_reentrancy_guard); + + sysbus_init_irq(sbd, &s->irq); + } +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index d4da8dcb8d..c930c60921 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) + } + + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); +- s->async_bh = qemu_bh_new(ehci_work_bh, s); ++ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, ++ &dev->mem_reentrancy_guard); + s->device = dev; + + s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 8ac1175ad2..77baaa7a6b 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); + } + } +- s->bh = qemu_bh_new(uhci_bh, s); ++ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); + s->num_ports_vmstate = NB_PORTS; + QTAILQ_INIT(&s->queues); +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index 176868d345..f500db85ab 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) + static void usb_host_nodev(USBHostDevice *s) + { + if (!s->bh_nodev) { +- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); ++ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, ++ &DEVICE(s)->mem_reentrancy_guard); + } + qemu_bh_schedule(s->bh_nodev); + } +@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) + USBHostDevice *dev = opaque; + + if (!dev->bh_postld) { +- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); ++ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); + } + qemu_bh_schedule(dev->bh_postld); + dev->bh_postld_pending = true; +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index fd7df599bc..39fbaaab16 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) + } + } + +- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); +- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); ++ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); ++ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, ++ &DEVICE(dev)->mem_reentrancy_guard); + dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); + + packet_id_queue_init(&dev->cancelled, dev, "cancelled"); +diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c +index 66cb3f7c24..38ee660a30 100644 +--- a/hw/usb/xen-usb.c ++++ b/hw/usb/xen-usb.c +@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) + + QTAILQ_INIT(&usbif->req_free_q); + QSIMPLEQ_INIT(&usbif->hotplug_q); +- usbif->bh = qemu_bh_new(usbback_bh, usbif); ++ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, ++ &DEVICE(xendev)->mem_reentrancy_guard); + } + + static int usbback_free(struct XenLegacyDevice *xendev) +diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c +index 43092aa634..5186e831dd 100644 +--- a/hw/virtio/virtio-balloon.c ++++ b/hw/virtio/virtio-balloon.c +@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) + precopy_add_notifier(&s->free_page_hint_notify); + + object_ref(OBJECT(s->iothread)); +- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), +- virtio_ballloon_get_free_page_hints, s); ++ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), ++ virtio_ballloon_get_free_page_hints, s, ++ &dev->mem_reentrancy_guard); + } + + if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 802e1b9659..2fe804510f 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) + vcrypto->vqs[i].dataq = + virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); + vcrypto->vqs[i].dataq_bh = +- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); ++ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], ++ &dev->mem_reentrancy_guard); + vcrypto->vqs[i].vcrypto = vcrypto; + } + +-- +2.39.3 + diff --git a/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch b/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch new file mode 100644 index 0000000..efa966e --- /dev/null +++ b/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch @@ -0,0 +1,141 @@ +From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 29 May 2023 14:21:08 -0400 +Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI + controller (CVE-2023-0330) + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit b987718bbb1d0eabf95499b976212dd5f0120d75 +Author: Thomas Huth +Date: Mon May 22 11:10:11 2023 +0200 + + hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330) + + We cannot use the generic reentrancy guard in the LSI code, so + we have to manually prevent endless reentrancy here. The problematic + lsi_execute_script() function has already a way to detect whether + too many instructions have been executed - we just have to slightly + change the logic here that it also takes into account if the function + has been called too often in a reentrant way. + + The code in fuzz-lsi53c895a-test.c has been taken from an earlier + patch by Mauro Matteo Cascella. + + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563 + Message-Id: <20230522091011.1082574-1-thuth@redhat.com> + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Alexander Bulekov + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 23 +++++++++++++++------ + tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++ + 2 files changed, 50 insertions(+), 6 deletions(-) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index 048436352b..f7d45b0b20 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s) + uint32_t addr, addr_high; + int opcode; + int insn_processed = 0; ++ static int reentrancy_level; ++ ++ reentrancy_level++; + + s->istat1 |= LSI_ISTAT1_SRUN; + again: +- if (++insn_processed > LSI_MAX_INSN) { +- /* Some windows drivers make the device spin waiting for a memory +- location to change. If we have been executed a lot of code then +- assume this is the case and force an unexpected device disconnect. +- This is apparently sufficient to beat the drivers into submission. +- */ ++ /* ++ * Some windows drivers make the device spin waiting for a memory location ++ * to change. If we have executed more than LSI_MAX_INSN instructions then ++ * assume this is the case and force an unexpected device disconnect. This ++ * is apparently sufficient to beat the drivers into submission. ++ * ++ * Another issue (CVE-2023-0330) can occur if the script is programmed to ++ * trigger itself again and again. Avoid this problem by stopping after ++ * being called multiple times in a reentrant way (8 is an arbitrary value ++ * which should be enough for all valid use cases). ++ */ ++ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) { + if (!(s->sien0 & LSI_SIST0_UDC)) { + qemu_log_mask(LOG_GUEST_ERROR, + "lsi_scsi: inf. loop with UDC masked"); +@@ -1596,6 +1605,8 @@ again: + } + } + trace_lsi_execute_script_stop(); ++ ++ reentrancy_level--; + } + + static uint8_t lsi_reg_readb(LSIState *s, int offset) +diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c +index 2012bd54b7..1b55928b9f 100644 +--- a/tests/qtest/fuzz-lsi53c895a-test.c ++++ b/tests/qtest/fuzz-lsi53c895a-test.c +@@ -8,6 +8,36 @@ + #include "qemu/osdep.h" + #include "libqtest.h" + ++/* ++ * This used to trigger a DMA reentrancy issue ++ * leading to memory corruption bugs like stack ++ * overflow or use-after-free ++ * https://gitlab.com/qemu-project/qemu/-/issues/1563 ++ */ ++static void test_lsi_dma_reentrancy(void) ++{ ++ QTestState *s; ++ ++ s = qtest_init("-M q35 -m 512M -nodefaults " ++ "-blockdev driver=null-co,node-name=null0 " ++ "-device lsi53c810 -device scsi-cd,drive=null0"); ++ ++ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */ ++ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */ ++ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */ ++ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/ ++ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */ ++ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/ ++ qtest_writel(s, 0xff000000, 0xc0000024); ++ qtest_writel(s, 0xff000114, 0x00000080); ++ qtest_writel(s, 0xff00012c, 0xff000000); ++ qtest_writel(s, 0xff000004, 0xff000114); ++ qtest_writel(s, 0xff000008, 0xff100014); ++ qtest_writel(s, 0xff10002f, 0x000000ff); ++ ++ qtest_quit(s); ++} ++ + /* + * This used to trigger a UAF in lsi_do_msgout() + * https://gitlab.com/qemu-project/qemu/-/issues/972 +@@ -124,5 +154,8 @@ int main(int argc, char **argv) + qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req", + test_lsi_do_msgout_cancel_req); + ++ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy", ++ test_lsi_dma_reentrancy); ++ + return g_test_run(); + } +-- +2.39.3 + diff --git a/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch b/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch new file mode 100644 index 0000000..1fc5697 --- /dev/null +++ b/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch @@ -0,0 +1,144 @@ +From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:34 +0200 +Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm) + +This tests exercises graph locking, draining, and graph modifications +with AioContext switches a lot. Amongst others, it serves as a +regression test for bdrv_graph_wrlock() deadlocking because it is called +with a locked AioContext and for AioContext handling in the NBD server. + +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-4-kwolf@redhat.com> +Tested-by: Eric Blake +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/iotests.py | 4 ++ + .../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++-- + .../tests/graph-changes-while-io.out | 4 +- + 3 files changed, 58 insertions(+), 6 deletions(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 3e82c634cf..7073579a7d 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ + assert self._qmp is not None + return self._qmp.cmd(cmd, args) + ++ def get_qmp(self) -> QEMUMonitorProtocol: ++ assert self._qmp is not None ++ return self._qmp ++ + def stop(self, kill_signal=15): + self._p.send_signal(kill_signal) + self._p.wait() +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io +index 7664f33689..750e7d4d38 100755 +--- a/tests/qemu-iotests/tests/graph-changes-while-io ++++ b/tests/qemu-iotests/tests/graph-changes-while-io +@@ -22,19 +22,19 @@ + import os + from threading import Thread + import iotests +-from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ +- QemuStorageDaemon ++from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ ++ QMPTestCase, QemuStorageDaemon + + + top = os.path.join(iotests.test_dir, 'top.img') + nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') + + +-def do_qemu_img_bench() -> None: ++def do_qemu_img_bench(count: int = 2000000) -> None: + """ + Do some I/O requests on `nbd_sock`. + """ +- qemu_img('bench', '-f', 'raw', '-c', '2000000', ++ qemu_img('bench', '-f', 'raw', '-c', str(count), + f'nbd+unix:///node0?socket={nbd_sock}') + + +@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase): + + bench_thr.join() + ++ def test_commit_while_io(self) -> None: ++ # Run qemu-img bench in the background ++ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, )) ++ bench_thr.start() ++ ++ qemu_io('-c', 'write 0 64k', top) ++ qemu_io('-c', 'write 128k 64k', top) ++ ++ result = self.qsd.qmp('blockdev-add', { ++ 'driver': imgfmt, ++ 'node-name': 'overlay', ++ 'backing': None, ++ 'file': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('blockdev-snapshot', { ++ 'node': 'node0', ++ 'overlay': 'overlay', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ # While qemu-img bench is running, repeatedly commit overlay to node0 ++ while bench_thr.is_alive(): ++ result = self.qsd.qmp('block-commit', { ++ 'job-id': 'job0', ++ 'device': 'overlay', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('block-job-cancel', { ++ 'device': 'job0', ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ cancelled = False ++ while not cancelled: ++ for event in self.qsd.get_qmp().get_events(wait=10.0): ++ if event['event'] != 'JOB_STATUS_CHANGE': ++ continue ++ if event['data']['status'] == 'null': ++ cancelled = True ++ ++ bench_thr.join() ++ + if __name__ == '__main__': + # Format must support raw backing files + iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out +index ae1213e6f8..fbc63e62f8 100644 +--- a/tests/qemu-iotests/tests/graph-changes-while-io.out ++++ b/tests/qemu-iotests/tests/graph-changes-while-io.out +@@ -1,5 +1,5 @@ +-. ++.. + ---------------------------------------------------------------------- +-Ran 1 tests ++Ran 2 tests + + OK +-- +2.39.3 + diff --git a/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch b/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch new file mode 100644 index 0000000..c1100a5 --- /dev/null +++ b/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch @@ -0,0 +1,53 @@ +From 6de2f37d9a5db6578554929227377e4fd6d2feb3 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 14/21] loongarch: mark loongarch_ipi_iocsr re-entrnacy safe + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/13] 02435b9148b906960137de32eb5a3c4961e44a57 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 6d0589e0e6c64b888864a2bf980537be20389264 +Author: Alexander Bulekov +Date: Sat May 6 07:21:45 2023 -0400 + + loongarch: mark loongarch_ipi_iocsr re-entrnacy safe + + loongarch_ipi_iocsr MRs rely on re-entrant IO through the ipi_send + function. As such, mark these MRs re-entrancy-safe. + + Fixes: a2e1753b80 ("memory: prevent dma-reentracy issues") + Signed-off-by: Alexander Bulekov + Reviewed-by: Song Gao + Message-Id: <20230506112145.3563708-1-alxndr@bu.edu> + Signed-off-by: Song Gao + +Signed-off-by: Jon Maloy +--- + hw/intc/loongarch_ipi.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c +index aa4bf9eb74..40e98af2ce 100644 +--- a/hw/intc/loongarch_ipi.c ++++ b/hw/intc/loongarch_ipi.c +@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj) + for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) { + memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops, + &lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48); ++ ++ /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ ++ s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true; ++ + sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]); + + memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops, +-- +2.39.3 + diff --git a/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch b/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch new file mode 100644 index 0000000..359d53f --- /dev/null +++ b/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch @@ -0,0 +1,70 @@ +From 0660a7a6994db0db9f6d0b84f6345aa06dc61761 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 29 May 2023 14:21:08 -0400 +Subject: [PATCH 16/21] lsi53c895a: disable reentrancy detection for MMIO + region, too + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/13] fb9da8b68cdf0dc0b0bd8fb8540849c944d0bf20 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e +Author: Thomas Huth +Date: Tue May 16 11:05:56 2023 +0200 + + lsi53c895a: disable reentrancy detection for MMIO region, too + + While trying to use a SCSI disk on the LSI controller with an + older version of Fedora (25), I'm getting: + + qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34 + + and the SCSI controller is not usable. Seems like we have to + disable the reentrancy checker for the MMIO region, too, to + get this working again. + + The problem could be reproduced it like this: + + ./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \ + -device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \ + -drive if=none,id=d0,file=.../somedisk.qcow2 \ + -cdrom Fedora-Everything-netinst-i386-25-1.3.iso + + Where somedisk.qcow2 is an image that contains already some partitions + and file systems. + + In the boot menu of Fedora, go to + "Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell" + + Then check "dmesg | grep -i 53c" for failure messages, and try to mount + a partition from somedisk.qcow2. + + Message-Id: <20230516090556.553813-1-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index db27872963..048436352b 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -2307,6 +2307,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) + * re-entrancy guard. + */ + s->ram_io.disable_reentrancy_guard = true; ++ s->mmio_io.disable_reentrancy_guard = true; + + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); + qdev_init_gpio_out(d, &s->ext_irq, 1); +-- +2.39.3 + diff --git a/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch b/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch new file mode 100644 index 0000000..e671c92 --- /dev/null +++ b/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch @@ -0,0 +1,58 @@ +From 621808c6c4da3adcc073231493d487d6360386c9 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 09/21] lsi53c895a: disable reentrancy detection for script RAM + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/13] 765d65fc3fb735eb4b52a408ccff91b538ad32b6 (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:10 2023 -0400 + + lsi53c895a: disable reentrancy detection for script RAM + + As the code is designed to use the memory APIs to access the script ram, + disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion. + + In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion. + + Reported-by: Fiona Ebner + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Reviewed-by: Darren Kenny + Message-Id: <20230427211013.2994127-6-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/scsi/lsi53c895a.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index af93557a9a..db27872963 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) + memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, + "lsi-io", 256); + ++ /* ++ * Since we use the address-space API to interact with ram_io, disable the ++ * re-entrancy guard. ++ */ ++ s->ram_io.disable_reentrancy_guard = true; ++ + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); + qdev_init_gpio_out(d, &s->ext_irq, 1); + +-- +2.39.3 + diff --git a/kvm-memory-prevent-dma-reentracy-issues.patch b/kvm-memory-prevent-dma-reentracy-issues.patch new file mode 100644 index 0000000..d3697dc --- /dev/null +++ b/kvm-memory-prevent-dma-reentracy-issues.patch @@ -0,0 +1,150 @@ +From 0bc9295be331781491e993b6f1b0dca959194f13 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 05/21] memory: prevent dma-reentracy issues + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/13] d4a762d3b156200a65d09cde58cd6d77b229071e (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 +CVE: CVE-2023-0330 + +commit a2e1753b8054344f32cf94f31c6399a58794a380 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:06 2023 -0400 + + memory: prevent dma-reentracy issues + + Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. + This flag is set/checked prior to calling a device's MemoryRegion + handlers, and set when device code initiates DMA. The purpose of this + flag is to prevent two types of DMA-based reentrancy issues: + + 1.) mmio -> dma -> mmio case + 2.) bh -> dma write -> mmio case + + These issues have led to problems such as stack-exhaustion and + use-after-frees. + + Summary of the problem from Peter Maydell: + https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com + + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 + Resolves: CVE-2023-0330 + + Signed-off-by: Alexander Bulekov + Reviewed-by: Thomas Huth + Message-Id: <20230427211013.2994127-2-alxndr@bu.edu> + [thuth: Replace warn_report() with warn_report_once()] + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + include/exec/memory.h | 5 +++++ + include/hw/qdev-core.h | 7 +++++++ + softmmu/memory.c | 16 ++++++++++++++++ + 3 files changed, 28 insertions(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 15ade918ba..e45ce6061f 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -767,6 +767,8 @@ struct MemoryRegion { + bool is_iommu; + RAMBlock *ram_block; + Object *owner; ++ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ ++ DeviceState *dev; + + const MemoryRegionOps *ops; + void *opaque; +@@ -791,6 +793,9 @@ struct MemoryRegion { + unsigned ioeventfd_nb; + MemoryRegionIoeventfd *ioeventfds; + RamDiscardManager *rdm; /* Only for RAM */ ++ ++ /* For devices designed to perform re-entrant IO into their own IO MRs */ ++ bool disable_reentrancy_guard; + }; + + struct IOMMUMemoryRegion { +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index bd50ad5ee1..7623703943 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -162,6 +162,10 @@ struct NamedClockList { + QLIST_ENTRY(NamedClockList) node; + }; + ++typedef struct { ++ bool engaged_in_io; ++} MemReentrancyGuard; ++ + /** + * DeviceState: + * @realized: Indicates whether the device has been fully constructed. +@@ -194,6 +198,9 @@ struct DeviceState { + int alias_required_for_version; + ResettableState reset; + GSList *unplug_blockers; ++ ++ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ ++ MemReentrancyGuard mem_reentrancy_guard; + }; + + struct DeviceListener { +diff --git a/softmmu/memory.c b/softmmu/memory.c +index b1a6cae6f5..b7b3386e9d 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_size_max = 4; + } + ++ /* Do not allow more than one simultaneous access to a device's IO Regions */ ++ if (mr->dev && !mr->disable_reentrancy_guard && ++ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { ++ if (mr->dev->mem_reentrancy_guard.engaged_in_io) { ++ warn_report_once("Blocked re-entrant IO on MemoryRegion: " ++ "%s at addr: 0x%" HWADDR_PRIX, ++ memory_region_name(mr), addr); ++ return MEMTX_ACCESS_ERROR; ++ } ++ mr->dev->mem_reentrancy_guard.engaged_in_io = true; ++ } ++ + /* FIXME: support unaligned access? */ + access_size = MAX(MIN(size, access_size_max), access_size_min); + access_mask = MAKE_64BIT_MASK(0, access_size * 8); +@@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_mask, attrs); + } + } ++ if (mr->dev) { ++ mr->dev->mem_reentrancy_guard.engaged_in_io = false; ++ } + return r; + } + +@@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, + } + mr->name = g_strdup(name); + mr->owner = owner; ++ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); + mr->ram_block = NULL; + + if (name) { +-- +2.39.3 + diff --git a/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch b/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch new file mode 100644 index 0000000..f45abea --- /dev/null +++ b/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch @@ -0,0 +1,67 @@ +From 3f2042e33acb6db91594e12ebd63b9abd9e753cc Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 7 Jun 2023 11:45:09 -0400 +Subject: [PATCH 15/21] memory: stricter checks prior to unsetting + engaged_in_io + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/13] b8e1a4b49dd7fa3b7948d32f46dfe1d7f7a4c1cf (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3 +Author: Alexander Bulekov +Date: Tue May 16 04:40:02 2023 -0400 + + memory: stricter checks prior to unsetting engaged_in_io + + engaged_in_io could be unset by an MR with re-entrancy checks disabled. + Ensure that only MRs that can set the engaged_in_io flag can unset it. + + Signed-off-by: Alexander Bulekov + Message-Id: <20230516084002.3813836-1-alxndr@bu.edu> + Reviewed-by: Darren Kenny + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + softmmu/memory.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/softmmu/memory.c b/softmmu/memory.c +index b7b3386e9d..26424f1d78 100644 +--- a/softmmu/memory.c ++++ b/softmmu/memory.c +@@ -534,6 +534,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + unsigned access_size; + unsigned i; + MemTxResult r = MEMTX_OK; ++ bool reentrancy_guard_applied = false; + + if (!access_size_min) { + access_size_min = 1; +@@ -552,6 +553,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + return MEMTX_ACCESS_ERROR; + } + mr->dev->mem_reentrancy_guard.engaged_in_io = true; ++ reentrancy_guard_applied = true; + } + + /* FIXME: support unaligned access? */ +@@ -568,7 +570,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, + access_mask, attrs); + } + } +- if (mr->dev) { ++ if (mr->dev && reentrancy_guard_applied) { + mr->dev->mem_reentrancy_guard.engaged_in_io = false; + } + return r; +-- +2.39.3 + diff --git a/kvm-multifd-Fix-the-number-of-channels-ready.patch b/kvm-multifd-Fix-the-number-of-channels-ready.patch new file mode 100644 index 0000000..abf21e6 --- /dev/null +++ b/kvm-multifd-Fix-the-number-of-channels-ready.patch @@ -0,0 +1,58 @@ +From af6f2a543c7db6d67d33fd12615a50e57fc3fe66 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 26 Apr 2023 12:20:36 +0200 +Subject: [PATCH 19/21] multifd: Fix the number of channels ready +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 171: multifd: Fix the number of channels ready +RH-Bugzilla: 2196289 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] a5e271ba249d85b27a68d3cff10480ca3a112c5d (LeoBras/centos-qemu-kvm) + +We don't wait in the sem when we are doing a sync_main. Make it wait +there. To make things clearer, we mark the channel ready at the +begining of the thread loop. + +Signed-off-by: Juan Quintela +Reviewed-by: Fabiano Rosas +(cherry picked from commit d2026ee117147893f8d80f060cede6d872ecbd7f) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cce3ad6988..6a59c03dd2 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -635,6 +635,7 @@ int multifd_send_sync_main(QEMUFile *f) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + ++ qemu_sem_wait(&multifd_send_state->channels_ready); + trace_multifd_send_sync_main_wait(p->id); + qemu_sem_wait(&p->sem_sync); + +@@ -668,6 +669,7 @@ static void *multifd_send_thread(void *opaque) + p->num_packets = 1; + + while (true) { ++ qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_wait(&p->sem); + + if (qatomic_read(&multifd_send_state->exiting)) { +@@ -736,7 +738,6 @@ static void *multifd_send_thread(void *opaque) + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&p->sem_sync); + } +- qemu_sem_post(&multifd_send_state->channels_ready); + } else if (p->quit) { + qemu_mutex_unlock(&p->mutex); + break; +-- +2.39.3 + diff --git a/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch b/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch new file mode 100644 index 0000000..214b6dd --- /dev/null +++ b/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch @@ -0,0 +1,159 @@ +From 639f65d2cd4c6627a1d22c4b418b41400fe40154 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 17 May 2023 17:28:33 +0200 +Subject: [PATCH 03/21] nbd/server: Fix drained_poll to wake coroutine in right + AioContext + +RH-Author: Kevin Wolf +RH-MergeRequest: 166: block/graph-lock: Disable locking for now +RH-Bugzilla: 2186725 +RH-Acked-by: Eric Blake +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 177092e61360c2feb04377890b32fdeb2d1cfefc (kmwolf/centos-qemu-kvm) + +nbd_drained_poll() generally runs in the main thread, not whatever +iothread the NBD server coroutine is meant to run in, so it can't +directly reenter the coroutines to wake them up. + +The code seems to have the right intention, it specifies the correct +AioContext when it calls qemu_aio_coroutine_enter(). However, this +functions doesn't schedule the coroutine to run in that AioContext, but +it assumes it is already called in the home thread of the AioContext. + +To fix this, add a new thread-safe qio_channel_wake_read() that can be +called in the main thread to wake up the coroutine in its AioContext, +and use this in nbd_drained_poll(). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230517152834.277483-3-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit 7c1f51bf38de8cea4ed5030467646c37b46edeb7) +Signed-off-by: Kevin Wolf +--- + include/io/channel.h | 10 ++++++++++ + io/channel.c | 33 +++++++++++++++++++++++++++------ + nbd/server.c | 3 +-- + 3 files changed, 38 insertions(+), 8 deletions(-) + +diff --git a/include/io/channel.h b/include/io/channel.h +index 153fbd2904..2b905423a9 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc); + void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition); + ++/** ++ * qio_channel_wake_read: ++ * @ioc: the channel object ++ * ++ * If qio_channel_yield() is currently waiting for the channel to become ++ * readable, interrupt it and reenter immediately. This function is safe to call ++ * from any thread. ++ */ ++void qio_channel_wake_read(QIOChannel *ioc); ++ + /** + * qio_channel_wait: + * @ioc: the channel object +diff --git a/io/channel.c b/io/channel.c +index a8c7f11649..3c9b7beb65 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include "block/aio-wait.h" + #include "io/channel.h" + #include "qapi/error.h" + #include "qemu/main-loop.h" +@@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc, + static void qio_channel_restart_read(void *opaque) + { + QIOChannel *ioc = opaque; +- Coroutine *co = ioc->read_coroutine; ++ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); ++ ++ if (!co) { ++ return; ++ } + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == +@@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque) + static void qio_channel_restart_write(void *opaque) + { + QIOChannel *ioc = opaque; +- Coroutine *co = ioc->write_coroutine; ++ Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL); ++ ++ if (!co) { ++ return; ++ } + + /* Assert that aio_co_wake() reenters the coroutine directly */ + assert(qemu_get_current_aio_context() == +@@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc) + void coroutine_fn qio_channel_yield(QIOChannel *ioc, + GIOCondition condition) + { ++ AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); ++ + assert(qemu_in_coroutine()); ++ assert(in_aio_context_home_thread(ioc_ctx)); ++ + if (condition == G_IO_IN) { + assert(!ioc->read_coroutine); + ioc->read_coroutine = qemu_coroutine_self(); +@@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, + } + qio_channel_set_aio_fd_handlers(ioc); + qemu_coroutine_yield(); ++ assert(in_aio_context_home_thread(ioc_ctx)); + + /* Allow interrupting the operation by reentering the coroutine other than + * through the aio_fd_handlers. */ +- if (condition == G_IO_IN && ioc->read_coroutine) { +- ioc->read_coroutine = NULL; ++ if (condition == G_IO_IN) { ++ assert(ioc->read_coroutine == NULL); + qio_channel_set_aio_fd_handlers(ioc); +- } else if (condition == G_IO_OUT && ioc->write_coroutine) { +- ioc->write_coroutine = NULL; ++ } else if (condition == G_IO_OUT) { ++ assert(ioc->write_coroutine == NULL); + qio_channel_set_aio_fd_handlers(ioc); + } + } + ++void qio_channel_wake_read(QIOChannel *ioc) ++{ ++ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); ++ if (co) { ++ aio_co_wake(co); ++ } ++} + + static gboolean qio_channel_wait_complete(QIOChannel *ioc, + GIOCondition condition, +diff --git a/nbd/server.c b/nbd/server.c +index 3d8d0d81df..ea47522e8f 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque) + * enter it here so we don't depend on the client to wake it up. + */ + if (client->recv_coroutine != NULL && client->read_yielding) { +- qemu_aio_coroutine_enter(exp->common.ctx, +- client->recv_coroutine); ++ qio_channel_wake_read(client->ioc); + } + + return true; +-- +2.39.3 + diff --git a/kvm-raven-disable-reentrancy-detection-for-iomem.patch b/kvm-raven-disable-reentrancy-detection-for-iomem.patch new file mode 100644 index 0000000..4a4a2cc --- /dev/null +++ b/kvm-raven-disable-reentrancy-detection-for-iomem.patch @@ -0,0 +1,54 @@ +From 936e21428a04524ccffeb36110d1aa61de9f44e5 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 9 May 2023 10:29:03 -0400 +Subject: [PATCH 11/21] raven: disable reentrancy detection for iomem + +RH-Author: Jon Maloy +RH-MergeRequest: 165: memory: prevent dma-reentracy issues +RH-Jira: RHEL-516 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/13] 48278583aa1ab08b912f49cd8b3a79d1bb3abf5f (jmaloy/jmaloy-qemu-kvm-2) + +Jira: https://issues.redhat.com/browse/RHEL-516 +Upstream: Merged +CVE: CVE-2023-2680 + +commit 6dad5a6810d9c60ca320d01276f6133bbcfa1fc7 +Author: Alexander Bulekov +Date: Thu Apr 27 17:10:12 2023 -0400 + + raven: disable reentrancy detection for iomem + + As the code is designed for re-entrant calls from raven_io_ops to + pci-conf, mark raven_io_ops as reentrancy-safe. + + Signed-off-by: Alexander Bulekov + Message-Id: <20230427211013.2994127-8-alxndr@bu.edu> + Signed-off-by: Thomas Huth + +Signed-off-by: Jon Maloy +--- + hw/pci-host/raven.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c +index 072ffe3c5e..9a11ac4b2b 100644 +--- a/hw/pci-host/raven.c ++++ b/hw/pci-host/raven.c +@@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) + memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); + address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); + ++ /* ++ * Raven's raven_io_ops use the address-space API to access pci-conf-idx ++ * (which is also owned by the raven device). As such, mark the ++ * pci_io_non_contiguous as re-entrancy safe. ++ */ ++ s->pci_io_non_contiguous.disable_reentrancy_guard = true; ++ + /* CPU address space */ + memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, + &s->pci_io); +-- +2.39.3 + diff --git a/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch b/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch new file mode 100644 index 0000000..ecf1353 --- /dev/null +++ b/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch @@ -0,0 +1,129 @@ +From 3cab2a638a10ece2b76d9f33a3c5dc6f64f1bbaa Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Wed, 10 May 2023 12:55:31 +0200 +Subject: [PATCH 21/21] s390x/pv: Fix spurious warning with asynchronous + teardown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests +RH-Bugzilla: 2168500 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cédric Le Goater +RH-Commit: [2/2] cb690d3155ea22c6df00a4d75b72f501515e5556 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 + +Kernel commit 292a7d6fca33 ("KVM: s390: pv: fix asynchronous teardown +for small VMs") causes the KVM_PV_ASYNC_CLEANUP_PREPARE ioctl to fail +if the VM is not larger than 2GiB. QEMU would attempt it and fail, +print an error message, and then proceed with a normal teardown. + +Avoid attempting to use asynchronous teardown altogether when the VM is +not larger than 2 GiB. This will avoid triggering the error message and +also avoid pointless overhead; normal teardown is fast enough for small +VMs. + +Reported-by: Marc Hartmayer +Fixes: c3a073c610 ("s390x/pv: Add support for asynchronous teardown for reboot") +Link: https://lore.kernel.org/all/20230421085036.52511-2-imbrenda@linux.ibm.com/ +Signed-off-by: Claudio Imbrenda +Message-Id: <20230510105531.30623-2-imbrenda@linux.ibm.com> +Reviewed-by: Thomas Huth +[thuth: Fix inline function parameter in pv.h] +Signed-off-by: Thomas Huth +(cherry picked from commit 88693ab2a53f2f3d25cb39a7b5034ab391bc5a81) +--- + hw/s390x/pv.c | 10 ++++++++-- + hw/s390x/s390-virtio-ccw.c | 2 +- + include/hw/s390x/pv.h | 6 +++--- + 3 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 49ea38236c..b63f3784c6 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -13,6 +13,7 @@ + + #include + ++#include "qemu/units.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/kvm.h" +@@ -115,7 +116,7 @@ static void *s390_pv_do_unprot_async_fn(void *p) + return NULL; + } + +-bool s390_pv_vm_try_disable_async(void) ++bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) + { + /* + * t is only needed to create the thread; once qemu_thread_create +@@ -123,7 +124,12 @@ bool s390_pv_vm_try_disable_async(void) + */ + QemuThread t; + +- if (!kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { ++ /* ++ * If the feature is not present or if the VM is not larger than 2 GiB, ++ * KVM_PV_ASYNC_CLEANUP_PREPARE fill fail; no point in attempting it. ++ */ ++ if ((MACHINE(ms)->maxram_size <= 2 * GiB) || ++ !kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { + return false; + } + if (s390_pv_cmd(KVM_PV_ASYNC_CLEANUP_PREPARE, NULL) != 0) { +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 6a0b93c63d..d95c595f88 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -330,7 +330,7 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) + + static void s390_machine_unprotect(S390CcwMachineState *ms) + { +- if (!s390_pv_vm_try_disable_async()) { ++ if (!s390_pv_vm_try_disable_async(ms)) { + s390_pv_vm_disable(); + } + ms->pv = false; +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index 966306a9db..7b935e2246 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -14,10 +14,10 @@ + + #include "qapi/error.h" + #include "sysemu/kvm.h" ++#include "hw/s390x/s390-virtio-ccw.h" + + #ifdef CONFIG_KVM + #include "cpu.h" +-#include "hw/s390x/s390-virtio-ccw.h" + + static inline bool s390_is_pv(void) + { +@@ -41,7 +41,7 @@ static inline bool s390_is_pv(void) + int s390_pv_query_info(void); + int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); +-bool s390_pv_vm_try_disable_async(void); ++bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); + int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); + int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); + void s390_pv_prep_reset(void); +@@ -61,7 +61,7 @@ static inline bool s390_is_pv(void) { return false; } + static inline int s390_pv_query_info(void) { return 0; } + static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} +-static inline bool s390_pv_vm_try_disable_async(void) { return false; } ++static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } + static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } + static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } + static inline void s390_pv_prep_reset(void) {} +-- +2.39.3 + diff --git a/kvm-util-async-teardown-wire-up-query-command-line-optio.patch b/kvm-util-async-teardown-wire-up-query-command-line-optio.patch new file mode 100644 index 0000000..8c468d8 --- /dev/null +++ b/kvm-util-async-teardown-wire-up-query-command-line-optio.patch @@ -0,0 +1,180 @@ +From c1502b0cd16378d6d5bd4259b90bf81a5fb5aad3 Mon Sep 17 00:00:00 2001 +From: Claudio Imbrenda +Date: Fri, 5 May 2023 14:00:51 +0200 +Subject: [PATCH 20/21] util/async-teardown: wire up query-command-line-options +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests +RH-Bugzilla: 2168500 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/2] 76e5f25df2c02721f5a29f552ee3061be589abb2 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 + +Add new -run-with option with an async-teardown=on|off parameter. It is +visible in the output of query-command-line-options QMP command, so it +can be discovered and used by libvirt. + +The option -async-teardown is now redundant, deprecate it. + +Reported-by: Boris Fiuczynski +Fixes: c891c24b1a ("os-posix: asynchronous teardown for shutdown on Linux") +Signed-off-by: Claudio Imbrenda +Message-Id: <20230505120051.36605-2-imbrenda@linux.ibm.com> +[thuth: Add curly braces to fix error with GCC 8.5, fix bug in deprecated.rst] +Signed-off-by: Thomas Huth + +(cherry picked from commit 80bd81cadd127c1e2fc784612a52abe392670ba4) +Conflicts: + docs/about/deprecated.rst (missing context from other patches) +Signed-off-by: Thomas Huth +--- + docs/about/deprecated.rst | 5 +++++ + os-posix.c | 14 ++++++++++++++ + qemu-options.hx | 34 +++++++++++++++++++++++----------- + util/async-teardown.c | 21 +++++++++++++++++++++ + 4 files changed, 63 insertions(+), 11 deletions(-) + +diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst +index 1ca9dc33d6..52893fcf38 100644 +--- a/docs/about/deprecated.rst ++++ b/docs/about/deprecated.rst +@@ -111,6 +111,11 @@ Use ``-machine acpi=off`` instead. + The HAXM project has been retired (see https://github.com/intel/haxm#status). + Use "whpx" (on Windows) or "hvf" (on macOS) instead. + ++``-async-teardown`` (since 8.1) ++''''''''''''''''''''''''''''''' ++ ++Use ``-run-with async-teardown=on`` instead. ++ + + QEMU Machine Protocol (QMP) commands + ------------------------------------ +diff --git a/os-posix.c b/os-posix.c +index 5adc69f560..90ea71725f 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -36,6 +36,8 @@ + #include "qemu/log.h" + #include "sysemu/runstate.h" + #include "qemu/cutils.h" ++#include "qemu/config-file.h" ++#include "qemu/option.h" + + #ifdef CONFIG_LINUX + #include +@@ -152,9 +154,21 @@ int os_parse_cmd_args(int index, const char *optarg) + daemonize = 1; + break; + #if defined(CONFIG_LINUX) ++ /* deprecated */ + case QEMU_OPTION_asyncteardown: + init_async_teardown(); + break; ++ case QEMU_OPTION_run_with: { ++ QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"), ++ optarg, false); ++ if (!opts) { ++ exit(1); ++ } ++ if (qemu_opt_get_bool(opts, "async-teardown", false)) { ++ init_async_teardown(); ++ } ++ break; ++ } + #endif + default: + return -1; +diff --git a/qemu-options.hx b/qemu-options.hx +index 52b49f1f6a..b18f933703 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -4766,20 +4766,32 @@ DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL) + DEF("async-teardown", 0, QEMU_OPTION_asyncteardown, + "-async-teardown enable asynchronous teardown\n", + QEMU_ARCH_ALL) +-#endif + SRST + ``-async-teardown`` +- Enable asynchronous teardown. A new process called "cleanup/" +- will be created at startup sharing the address space with the main qemu +- process, using clone. It will wait for the main qemu process to +- terminate completely, and then exit. +- This allows qemu to terminate very quickly even if the guest was +- huge, leaving the teardown of the address space to the cleanup +- process. Since the cleanup process shares the same cgroups as the +- main qemu process, accounting is performed correctly. This only +- works if the cleanup process is not forcefully killed with SIGKILL +- before the main qemu process has terminated completely. ++ This option is deprecated and should no longer be used. The new option ++ ``-run-with async-teardown=on`` is a replacement. + ERST ++DEF("run-with", HAS_ARG, QEMU_OPTION_run_with, ++ "-run-with async-teardown[=on|off]\n" ++ " misc QEMU process lifecycle options\n" ++ " async-teardown=on enables asynchronous teardown\n", ++ QEMU_ARCH_ALL) ++SRST ++``-run-with`` ++ Set QEMU process lifecycle options. ++ ++ ``async-teardown=on`` enables asynchronous teardown. A new process called ++ "cleanup/" will be created at startup sharing the address ++ space with the main QEMU process, using clone. It will wait for the ++ main QEMU process to terminate completely, and then exit. This allows ++ QEMU to terminate very quickly even if the guest was huge, leaving the ++ teardown of the address space to the cleanup process. Since the cleanup ++ process shares the same cgroups as the main QEMU process, accounting is ++ performed correctly. This only works if the cleanup process is not ++ forcefully killed with SIGKILL before the main QEMU process has ++ terminated completely. ++ERST ++#endif + + DEF("msg", HAS_ARG, QEMU_OPTION_msg, + "-msg [timestamp[=on|off]][,guest-name=[on|off]]\n" +diff --git a/util/async-teardown.c b/util/async-teardown.c +index 62cdeb0f20..3ab19c8740 100644 +--- a/util/async-teardown.c ++++ b/util/async-teardown.c +@@ -12,6 +12,9 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/config-file.h" ++#include "qemu/option.h" ++#include "qemu/module.h" + #include + #include + #include +@@ -144,3 +147,21 @@ void init_async_teardown(void) + clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL); + sigprocmask(SIG_SETMASK, &old_signals, NULL); + } ++ ++static QemuOptsList qemu_run_with_opts = { ++ .name = "run-with", ++ .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), ++ .desc = { ++ { ++ .name = "async-teardown", ++ .type = QEMU_OPT_BOOL, ++ }, ++ { /* end of list */ } ++ }, ++}; ++ ++static void register_teardown(void) ++{ ++ qemu_add_opts(&qemu_run_with_opts); ++} ++opts_init(register_teardown); +-- +2.39.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e7c5444..073081a 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 4%{?rcrel}%{?dist}%{?cc_suffix} +Release: 5%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -309,6 +309,48 @@ Patch77: kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch Patch78: kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch # For bz#2185688 - [qemu-kvm] no response with QMP command block_resize Patch79: kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch80: kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch81: kvm-graph-lock-Disable-locking-for-now.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch82: kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch +# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) +Patch83: kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch84: kvm-memory-prevent-dma-reentracy-issues.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch85: kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch86: kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch87: kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch88: kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch89: kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch90: kvm-raven-disable-reentrancy-detection-for-iomem.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch91: kvm-apic-disable-reentrancy-detection-for-apic-msi.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch92: kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch93: kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch94: kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch95: kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch +# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] +Patch96: kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch +# For bz#2189423 - Failed to migrate VM from rhel 9.3 to rhel 9.2 +Patch97: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch +# For bz#2196289 - Fix number of ready channels on multifd +Patch98: kvm-multifd-Fix-the-number-of-channels-ready.patch +# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part +Patch99: kvm-util-async-teardown-wire-up-query-command-line-optio.patch +# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part +Patch100: kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch %if %{have_clang} BuildRequires: clang @@ -1333,6 +1375,39 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Jun 13 2023 Miroslav Rezanina - 8.0.0-5 +- kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch [bz#2186725] +- kvm-graph-lock-Disable-locking-for-now.patch [bz#2186725] +- kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch [bz#2186725] +- kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch [bz#2186725] +- kvm-memory-prevent-dma-reentracy-issues.patch [RHEL-516] +- kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch [RHEL-516] +- kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch [RHEL-516] +- kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch [RHEL-516] +- kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch [RHEL-516] +- kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch [RHEL-516] +- kvm-raven-disable-reentrancy-detection-for-iomem.patch [RHEL-516] +- kvm-apic-disable-reentrancy-detection-for-apic-msi.patch [RHEL-516] +- kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch [RHEL-516] +- kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch [RHEL-516] +- kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch [RHEL-516] +- kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch [RHEL-516] +- kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch [RHEL-516] +- kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch [bz#2189423] +- kvm-multifd-Fix-the-number-of-channels-ready.patch [bz#2196289] +- kvm-util-async-teardown-wire-up-query-command-line-optio.patch [bz#2168500] +- kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch [bz#2168500] +- Resolves: bz#2186725 + (Qemu hang when commit during fio running(iothread enable)) +- Resolves: RHEL-516 + (CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9]) +- Resolves: bz#2189423 + (Failed to migrate VM from rhel 9.3 to rhel 9.2) +- Resolves: bz#2196289 + (Fix number of ready channels on multifd) +- Resolves: bz#2168500 + ([IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part) + * Mon May 22 2023 Miroslav Rezanina - 8.0.0-4 - kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch [bz#2058982] - kvm-util-mmap-alloc-qemu_fd_getfs.patch [bz#2057267] From b7384883871b64bdcb6d5c9ad5c20c1c3c8ad653 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 26 Jun 2023 03:05:15 -0400 Subject: [PATCH 194/195] * Mon Jun 26 2023 Miroslav Rezanina - 8.0.0-6 - kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch [bz#2216201] - kvm-target-i386-add-support-for-FB_CLEAR-feature.patch [bz#2216201] - kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch [bz#2180076] - kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch [bz#2180076] - kvm-Enable-libblkio-block-drivers.patch [bz#2213317] - Resolves: bz#2216201 ([qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode) - Resolves: bz#2180076 ([qemu-kvm] support fd passing for libblkio QEMU BlockDrivers) - Resolves: bz#2213317 (Enable libblkio-based block drivers in QEMU) --- ...qemu_open-to-support-fd-passing-for-.patch | 108 ++++++++++++++++++ ...eature-for-BlockdevOptionsVirtioBlkV.patch | 79 +++++++++++++ ...386-add-support-for-FB_CLEAR-feature.patch | 71 ++++++++++++ ...86-add-support-for-FLUSH_L1D-feature.patch | 70 ++++++++++++ qemu-kvm.spec | 42 ++++++- 5 files changed, 368 insertions(+), 2 deletions(-) create mode 100644 kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch create mode 100644 kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch create mode 100644 kvm-target-i386-add-support-for-FB_CLEAR-feature.patch create mode 100644 kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch diff --git a/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch b/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch new file mode 100644 index 0000000..3b32299 --- /dev/null +++ b/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch @@ -0,0 +1,108 @@ +From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 30 May 2023 09:19:40 +0200 +Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for + virtio-blk + +RH-Author: Stefano Garzarella +RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver +RH-Bugzilla: 2180076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s) + +Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd +passing. Let's expose this to the user, so the management layer +can pass the file descriptor of an already opened path. + +If the libblkio virtio-blk driver supports fd passing, let's always +use qemu_open() to open the `path`, so we can handle fd passing +from the management layer through the "/dev/fdset/N" special path. + +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Stefano Garzarella +Message-id: 20230530071941.8954-2-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68) +Signed-off-by: Stefano Garzarella +--- + block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 44 insertions(+), 9 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 0cdc99a729..6a6f20f923 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, + { + const char *path = qdict_get_try_str(options, "path"); + BDRVBlkioState *s = bs->opaque; +- int ret; ++ bool fd_supported = false; ++ int fd, ret; + + if (!path) { + error_setg(errp, "missing 'path' option"); + return -EINVAL; + } + +- ret = blkio_set_str(s->blkio, "path", path); +- qdict_del(options, "path"); +- if (ret < 0) { +- error_setg_errno(errp, -ret, "failed to set path: %s", +- blkio_get_error_msg()); +- return ret; +- } +- + if (!(flags & BDRV_O_NOCACHE)) { + error_setg(errp, "cache.direct=off is not supported"); + return -EINVAL; + } ++ ++ if (blkio_get_int(s->blkio, "fd", &fd) == 0) { ++ fd_supported = true; ++ } ++ ++ /* ++ * If the libblkio driver supports fd passing, let's always use qemu_open() ++ * to open the `path`, so we can handle fd passing from the management ++ * layer through the "/dev/fdset/N" special path. ++ */ ++ if (fd_supported) { ++ int open_flags; ++ ++ if (flags & BDRV_O_RDWR) { ++ open_flags = O_RDWR; ++ } else { ++ open_flags = O_RDONLY; ++ } ++ ++ fd = qemu_open(path, open_flags, errp); ++ if (fd < 0) { ++ return -EINVAL; ++ } ++ ++ ret = blkio_set_int(s->blkio, "fd", fd); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set fd: %s", ++ blkio_get_error_msg()); ++ qemu_close(fd); ++ return ret; ++ } ++ } else { ++ ret = blkio_set_str(s->blkio, "path", path); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "failed to set path: %s", ++ blkio_get_error_msg()); ++ return ret; ++ } ++ } ++ ++ qdict_del(options, "path"); ++ + return 0; + } + +-- +2.39.3 + diff --git a/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch b/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch new file mode 100644 index 0000000..abaadf8 --- /dev/null +++ b/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch @@ -0,0 +1,79 @@ +From 99f27e14856c528f442b628e8f4a7881e6e63179 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 30 May 2023 09:19:41 +0200 +Subject: [PATCH 4/5] qapi: add '@fdset' feature for + BlockdevOptionsVirtioBlkVhostVdpa + +RH-Author: Stefano Garzarella +RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver +RH-Bugzilla: 2180076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/2] abee2a542e41f9eaa17dd204b74778e232d1eb60 (sgarzarella/qemu-kvm-c-9-s) + +The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd +passing through the new 'fd' property. + +Since now we are using qemu_open() on '@path' if the virtio-blk driver +supports the fd passing, let's announce it. +In this way, the management layer can pass the file descriptor of an +already opened vhost-vdpa character device. This is useful especially +when the device can only be accessed with certain privileges. + +Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver +in libblkio supports it. + +Suggested-by: Markus Armbruster +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Stefano Garzarella +Message-id: 20230530071941.8954-3-sgarzare@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 98b126f5e3228a346c774e569e26689943b401dd) +- changed doc indentantion since QAPI parser failed downstream because + we don't have commit 08349786c84306863a3b659c8a9b28bb74c405c6 + downstream. It relaxed the indentation rules. +Signed-off-by: Stefano Garzarella +--- + meson.build | 4 ++++ + qapi/block-core.json | 6 ++++++ + 2 files changed, 10 insertions(+) + +diff --git a/meson.build b/meson.build +index d964e741e7..a18cc64531 100644 +--- a/meson.build ++++ b/meson.build +@@ -1843,6 +1843,10 @@ config_host_data.set('CONFIG_LZO', lzo.found()) + config_host_data.set('CONFIG_MPATH', mpathpersist.found()) + config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) + config_host_data.set('CONFIG_BLKIO', blkio.found()) ++if blkio.found() ++ config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD', ++ blkio.version().version_compare('>=1.3.0')) ++endif + config_host_data.set('CONFIG_CURL', curl.found()) + config_host_data.set('CONFIG_CURSES', curses.found()) + config_host_data.set('CONFIG_GBM', gbm.found()) +diff --git a/qapi/block-core.json b/qapi/block-core.json +index c05ad0c07e..81b48a8d3b 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -3841,10 +3841,16 @@ + # + # @path: path to the vhost-vdpa character device. + # ++# Features: ++# @fdset: Member @path supports the special "/dev/fdset/N" path ++# (since 8.1) ++# + # Since: 7.2 + ## + { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa', + 'data': { 'path': 'str' }, ++ 'features': [ { 'name' :'fdset', ++ 'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ], + 'if': 'CONFIG_BLKIO' } + + ## +-- +2.39.3 + diff --git a/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch b/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch new file mode 100644 index 0000000..2b1cbc9 --- /dev/null +++ b/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch @@ -0,0 +1,71 @@ +From 0d056d6da9e4147d5965bf3507f6d6d6a413924d Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Wed, 24 May 2023 06:52:43 -0400 +Subject: [PATCH 2/5] target/i386: add support for FB_CLEAR feature + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature +RH-Bugzilla: 2216201 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/2] 5f191964ba25754107a06ef907f4ac614280aaa1 (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 + +commit 22e1094ca82d5518c1b69aff3e87c550776ae1eb +Author: Emanuele Giuseppe Esposito +Date: Wed Feb 1 08:57:59 2023 -0500 + + target/i386: add support for FB_CLEAR feature + + As reported by the Intel's doc: + "FB_CLEAR: The processor will overwrite fill buffer values as part of + MD_CLEAR operations with the VERW instruction. + On these processors, L1D_FLUSH does not overwrite fill buffer values." + + If this cpu feature is present in host, allow QEMU to choose whether to + show it to the guest too. + One disadvantage of not exposing it is that the guest will report + a non existing vulnerability in + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + because the mitigation is present only when the cpu has + (FLUSH_L1D and MD_CLEAR) or FB_CLEAR + features enabled. + + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20230201135759.555607-3-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index caf6338cc0..839706b430 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1012,7 +1012,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "ssb-no", "mds-no", "pschange-mc-no", "tsx-ctrl", + "taa-no", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +- NULL, NULL, NULL, NULL, ++ NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 74fa649b60..c28b9df217 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -989,6 +989,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) + #define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) + #define MSR_ARCH_CAP_TAA_NO (1U << 8) ++#define MSR_ARCH_CAP_FB_CLEAR (1U << 17) + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +-- +2.39.3 + diff --git a/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch b/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch new file mode 100644 index 0000000..39f2542 --- /dev/null +++ b/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch @@ -0,0 +1,70 @@ +From 14eae569030805680570d93412100ad26242c7e6 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Wed, 24 May 2023 06:52:34 -0400 +Subject: [PATCH 1/5] target/i386: add support for FLUSH_L1D feature + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature +RH-Bugzilla: 2216201 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/2] e296c75c5cd7e1d16d3c70483d52aeba9f9eb2cd (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 + +commit 0e7e3bf1a552c178924867fa7c2f30ccc8a179e0 +Author: Emanuele Giuseppe Esposito +Date: Wed Feb 1 08:57:58 2023 -0500 + + target/i386: add support for FLUSH_L1D feature + + As reported by Intel's doc: + "L1D_FLUSH: Writeback and invalidate the L1 data cache" + + If this cpu feature is present in host, allow QEMU to choose whether to + show it to the guest too. + One disadvantage of not exposing it is that the guest will report + a non existing vulnerability in + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + because the mitigation is present only when the cpu has + (FLUSH_L1D and MD_CLEAR) or FB_CLEAR + features enabled. + + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20230201135759.555607-2-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0ef2bf1b93..caf6338cc0 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -860,7 +860,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "tsx-ldtrk", NULL, NULL /* pconfig */, "arch-lbr", + NULL, NULL, "amx-bf16", "avx512-fp16", + "amx-tile", "amx-int8", "spec-ctrl", "stibp", +- NULL, "arch-capabilities", "core-capability", "ssbd", ++ "flush-l1d", "arch-capabilities", "core-capability", "ssbd", + }, + .cpuid = { + .eax = 7, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index d243e290d3..74fa649b60 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -896,6 +896,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) + /* Single Thread Indirect Branch Predictors */ + #define CPUID_7_0_EDX_STIBP (1U << 27) ++/* Flush L1D cache */ ++#define CPUID_7_0_EDX_FLUSH_L1D (1U << 28) + /* Arch Capabilities */ + #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) + /* Core Capability */ +-- +2.39.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 073081a..e7cd2af 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -100,7 +100,7 @@ %endif %global target_list %{kvm_target}-softmmu -%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress +%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress,virtio-blk-vdpa-blk,virtio-blk-vfio-pci,virtio-blk-vhost-user,io_uring,nvme-io_uring %global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https %define qemudocdir %{_docdir}/%{name} %global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios" @@ -125,6 +125,7 @@ Requires: %{name}-device-usb-host = %{epoch}:%{version}-%{release} \ %if %{have_usbredir} \ Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \ %endif \ +Requires: %{name}-block-blkio = %{epoch}:%{version}-%{release} \ Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release} @@ -148,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 5%{?rcrel}%{?dist}%{?cc_suffix} +Release: 6%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -351,6 +352,14 @@ Patch98: kvm-multifd-Fix-the-number-of-channels-ready.patch Patch99: kvm-util-async-teardown-wire-up-query-command-line-optio.patch # For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part Patch100: kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch +# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode +Patch101: kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch +# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode +Patch102: kvm-target-i386-add-support-for-FB_CLEAR-feature.patch +# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers +Patch103: kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch +# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers +Patch104: kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch %if %{have_clang} BuildRequires: clang @@ -367,6 +376,7 @@ BuildRequires: glib2-devel BuildRequires: gnutls-devel BuildRequires: cyrus-sasl-devel BuildRequires: libaio-devel +BuildRequires: libblkio-devel BuildRequires: liburing-devel BuildRequires: python3-devel BuildRequires: libattr-devel @@ -554,6 +564,17 @@ Install this package if you want access to the avocado_qemu tests, or qemu-iotests. +%package block-blkio +Summary: QEMU libblkio block drivers +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description block-blkio +This package provides the additional libblkio block drivers for QEMU. + +Install this package if you want to use virtio-blk-vdpa-blk, +virtio-blk-vfio-pci, virtio-blk-vhost-user, io_uring, and nvme-io_uring block +drivers provided by libblkio. + + %package block-curl Summary: QEMU CURL block driver Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} @@ -847,6 +868,7 @@ run_configure \ --block-drv-ro-whitelist=%{block_drivers_ro_list} \ %endif --enable-attr \ + --enable-blkio \ --enable-cap-ng \ --enable-capstone \ --enable-coroutine-pool \ @@ -1347,6 +1369,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %{testsdir} %{_libdir}/%{name}/accel-qtest-%{kvm_target}.so +%files block-blkio +%{_libdir}/%{name}/block-blkio.so + %files block-curl %{_libdir}/%{name}/block-curl.so %if %{have_block_rbd} @@ -1375,6 +1400,19 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jun 26 2023 Miroslav Rezanina - 8.0.0-6 +- kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch [bz#2216201] +- kvm-target-i386-add-support-for-FB_CLEAR-feature.patch [bz#2216201] +- kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch [bz#2180076] +- kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch [bz#2180076] +- kvm-Enable-libblkio-block-drivers.patch [bz#2213317] +- Resolves: bz#2216201 + ([qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode) +- Resolves: bz#2180076 + ([qemu-kvm] support fd passing for libblkio QEMU BlockDrivers) +- Resolves: bz#2213317 + (Enable libblkio-based block drivers in QEMU) + * Tue Jun 13 2023 Miroslav Rezanina - 8.0.0-5 - kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch [bz#2186725] - kvm-graph-lock-Disable-locking-for-now.patch [bz#2186725] From b56a1fa35b187a80fb9a1b5fe6bb3fc5e71a3a78 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 10 Jul 2023 01:43:27 -0400 Subject: [PATCH 195/195] * Mon Jul 10 2023 Miroslav Rezanina - 8.0.0-7 - kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch [bz#2171363] - kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch [bz#2171363] - kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch [bz#2171363] - kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch [RHEL-330] - kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch [bz#2218644] - kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch [bz#2128929] - Resolves: bz#2171363 ([aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association) - Resolves: RHEL-330 ([virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed) - Resolves: bz#2218644 (query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone)) - Resolves: bz#2128929 ([rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest) --- ...idate-cluster-and-NUMA-node-boundary.patch | 60 +++++++ ...date-cluster-and-NUMA-node-boundary-.patch | 41 +++++ ...pu-stats-fd-to-avoid-vcpu-interrupti.patch | 160 ++++++++++++++++++ ...uster-and-NUMA-node-boundary-if-requ.patch | 145 ++++++++++++++++ ...host_dev_enable_notifiers-error-case.patch | 138 +++++++++++++++ ...t-cleanup-the-vdpa-vhost-net-structu.patch | 67 ++++++++ qemu-kvm.spec | 30 +++- 7 files changed, 640 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch create mode 100644 kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch create mode 100644 kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch create mode 100644 kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch create mode 100644 kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch create mode 100644 kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch diff --git a/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch b/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch new file mode 100644 index 0000000..e96bb10 --- /dev/null +++ b/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch @@ -0,0 +1,60 @@ +From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 27 Jun 2023 20:20:09 +1000 +Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary + +RH-Author: Gavin Shan +RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines +RH-Bugzilla: 2171363 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 + +There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'. +Both of them are required to follow cluster-NUMA-node boundary. To +enable the validation to warn about the irregular configuration where +multiple CPUs in one cluster have been associated with different NUMA +nodes. + +Signed-off-by: Gavin Shan +Acked-by: Igor Mammedov +Message-Id: <20230509002739.18388-3-gshan@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb) +Signed-off-by: Gavin Shan +--- + hw/arm/sbsa-ref.c | 2 ++ + hw/arm/virt.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c +index 0b93558dde..efb380e7c8 100644 +--- a/hw/arm/sbsa-ref.c ++++ b/hw/arm/sbsa-ref.c +@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) + mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; + mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; + mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + } + + static const TypeInfo sbsa_ref_info = { +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 9be53e9355..df6a0231bc 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + mc->default_ram_id = "mach-virt.ram"; + + object_class_property_add(oc, "acpi", "OnOffAuto", +-- +2.39.3 + diff --git a/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch b/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch new file mode 100644 index 0000000..42ec705 --- /dev/null +++ b/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch @@ -0,0 +1,41 @@ +From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 27 Jun 2023 20:20:09 +1000 +Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for + RHEL machines + +RH-Author: Gavin Shan +RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines +RH-Bugzilla: 2171363 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 +Upstream Status: RHEL only + +Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of +CPU cluster and NUMA node will be validated for 'virt-rhel*' machines. +A warning message will be printed if the boundary is broken. + +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index df6a0231bc..faf68488d5 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + mc->default_ram_id = "mach-virt.ram"; + + object_class_property_add(oc, "acpi", "OnOffAuto", +-- +2.39.3 + diff --git a/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch b/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch new file mode 100644 index 0000000..d6a6d73 --- /dev/null +++ b/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch @@ -0,0 +1,160 @@ +From a5857fb12fcad46e27c415fe82ce13c0cb5d09c7 Mon Sep 17 00:00:00 2001 +From: Marcelo Tosatti +Date: Thu, 29 Jun 2023 14:48:32 -0300 +Subject: [PATCH 5/6] kvm: reuse per-vcpu stats fd to avoid vcpu interruption +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marcelo Tosatti +RH-MergeRequest: 177: kvm: reuse per-vcpu stats fd to avoid vcpu interruption +RH-Bugzilla: 2218644 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Leonardo Brás +RH-Commit: [1/1] 4ec72385a9047888121485f49bacb1aff84f7018 (mtosatti/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218644 +Commit: 3b6f485275ae95a81eec589d2773b86ca9ddec4d + +A regression has been detected in latency testing of KVM guests. +More specifically, it was observed that the cyclictest +numbers inside of an isolated vcpu (running on isolated pcpu) are: + +Where a maximum of 50us is acceptable. + +The implementation of KVM_GET_STATS_FD uses run_on_cpu to query +per vcpu statistics, which interrupts the vcpu (and is unnecessary). + +To fix this, open the per vcpu stats fd on vcpu initialization, +and read from that fd from QEMU's main thread. + +Signed-off-by: Marcelo Tosatti +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 30 +++++++++++++++--------------- + include/hw/core/cpu.h | 1 + + 2 files changed, 16 insertions(+), 15 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index cf3a88d90e..fa7ca46c66 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) + "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)", + kvm_arch_vcpu_id(cpu)); + } ++ cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); ++ + err: + return ret; + } +@@ -3959,7 +3961,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd + + /* Read stats header */ + kvm_stats_header = &descriptors->kvm_stats_header; +- ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header)); ++ ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0); + if (ret != sizeof(*kvm_stats_header)) { + error_setg(errp, "KVM stats: failed to read stats header: " + "expected %zu actual %zu", +@@ -3990,7 +3992,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd + } + + static void query_stats(StatsResultList **result, StatsTarget target, +- strList *names, int stats_fd, Error **errp) ++ strList *names, int stats_fd, CPUState *cpu, ++ Error **errp) + { + struct kvm_stats_desc *kvm_stats_desc; + struct kvm_stats_header *kvm_stats_header; +@@ -4048,7 +4051,7 @@ static void query_stats(StatsResultList **result, StatsTarget target, + break; + case STATS_TARGET_VCPU: + add_stats_entry(result, STATS_PROVIDER_KVM, +- current_cpu->parent_obj.canonical_path, ++ cpu->parent_obj.canonical_path, + stats_list); + break; + default: +@@ -4085,10 +4088,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target, + add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list); + } + +-static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) ++static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) + { +- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; +- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); ++ int stats_fd = cpu->kvm_vcpu_stats_fd; + Error *local_err = NULL; + + if (stats_fd == -1) { +@@ -4097,14 +4099,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) + return; + } + query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU, +- kvm_stats_args->names, stats_fd, kvm_stats_args->errp); +- close(stats_fd); ++ kvm_stats_args->names, stats_fd, cpu, ++ kvm_stats_args->errp); + } + +-static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) ++static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) + { +- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; +- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); ++ int stats_fd = cpu->kvm_vcpu_stats_fd; + Error *local_err = NULL; + + if (stats_fd == -1) { +@@ -4114,7 +4115,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) + } + query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd, + kvm_stats_args->errp); +- close(stats_fd); + } + + static void query_stats_cb(StatsResultList **result, StatsTarget target, +@@ -4132,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, + error_setg_errno(errp, errno, "KVM stats: ioctl failed"); + return; + } +- query_stats(result, target, names, stats_fd, errp); ++ query_stats(result, target, names, stats_fd, NULL, errp); + close(stats_fd); + break; + } +@@ -4146,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, + if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) { + continue; + } +- run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); ++ query_stats_vcpu(cpu, &stats_args); + } + break; + } +@@ -4172,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) + if (first_cpu) { + stats_args.result.schema = result; + stats_args.errp = errp; +- run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); ++ query_stats_schema_vcpu(first_cpu, &stats_args); + } + } +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 397fd3ac68..ae96be07e7 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -399,6 +399,7 @@ struct CPUState { + struct kvm_dirty_gfn *kvm_dirty_gfns; + uint32_t kvm_fetch_index; + uint64_t dirty_pages; ++ int kvm_vcpu_stats_fd; + + /* Use by accel-block: CPU is executing an ioctl() */ + QemuLockCnt in_ioctl_lock; +-- +2.39.3 + diff --git a/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch b/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch new file mode 100644 index 0000000..66d68f1 --- /dev/null +++ b/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch @@ -0,0 +1,145 @@ +From 760a2f284f6d4cd3cd3b1685411bbca21c4ad233 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 27 Jun 2023 20:20:09 +1000 +Subject: [PATCH 1/6] numa: Validate cluster and NUMA node boundary if required +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gavin Shan +RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines +RH-Bugzilla: 2171363 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Commit: [1/3] 24580064b9a0076ec4d9a916839d85135ac48cd9 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 + +For some architectures like ARM64, multiple CPUs in one cluster can be +associated with different NUMA nodes, which is irregular configuration +because we shouldn't have this in baremetal environment. The irregular +configuration causes Linux guest to misbehave, as the following warning +messages indicate. + + -smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \ + -numa node,nodeid=0,cpus=0-1,memdev=ram0 \ + -numa node,nodeid=1,cpus=2-3,memdev=ram1 \ + -numa node,nodeid=2,cpus=4-5,memdev=ram2 \ + + ------------[ cut here ]------------ + WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910 + Modules linked in: + CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1 + pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : build_sched_domains+0x284/0x910 + lr : build_sched_domains+0x184/0x910 + sp : ffff80000804bd50 + x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000 + x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840 + x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508 + x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014 + x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e + x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0 + x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041 + x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001 + x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002 + x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001 + Call trace: + build_sched_domains+0x284/0x910 + sched_init_domains+0xac/0xe0 + sched_init_smp+0x48/0xc8 + kernel_init_freeable+0x140/0x1ac + kernel_init+0x28/0x140 + ret_from_fork+0x10/0x20 + +Improve the situation to warn when multiple CPUs in one cluster have +been associated with different NUMA nodes. However, one NUMA node is +allowed to be associated with different clusters. + +Signed-off-by: Gavin Shan +Acked-by: Philippe Mathieu-Daudé +Acked-by: Igor Mammedov +Message-Id: <20230509002739.18388-2-gshan@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a494fdb715832000ee9047a549a35aacfea8175e) +Signed-off-by: Gavin Shan +--- + hw/core/machine.c | 42 ++++++++++++++++++++++++++++++++++++++++++ + include/hw/boards.h | 1 + + 2 files changed, 43 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index c28702b690..5abdc8c39b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1496,6 +1496,45 @@ static void machine_numa_finish_cpu_init(MachineState *machine) + g_string_free(s, true); + } + ++static void validate_cpu_cluster_to_numa_boundary(MachineState *ms) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(ms); ++ NumaState *state = ms->numa_state; ++ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); ++ const CPUArchId *cpus = possible_cpus->cpus; ++ int i, j; ++ ++ if (state->num_nodes <= 1 || possible_cpus->len <= 1) { ++ return; ++ } ++ ++ /* ++ * The Linux scheduling domain can't be parsed when the multiple CPUs ++ * in one cluster have been associated with different NUMA nodes. However, ++ * it's fine to associate one NUMA node with CPUs in different clusters. ++ */ ++ for (i = 0; i < possible_cpus->len; i++) { ++ for (j = i + 1; j < possible_cpus->len; j++) { ++ if (cpus[i].props.has_socket_id && ++ cpus[i].props.has_cluster_id && ++ cpus[i].props.has_node_id && ++ cpus[j].props.has_socket_id && ++ cpus[j].props.has_cluster_id && ++ cpus[j].props.has_node_id && ++ cpus[i].props.socket_id == cpus[j].props.socket_id && ++ cpus[i].props.cluster_id == cpus[j].props.cluster_id && ++ cpus[i].props.node_id != cpus[j].props.node_id) { ++ warn_report("CPU-%d and CPU-%d in socket-%" PRId64 "-cluster-%" PRId64 ++ " have been associated with node-%" PRId64 " and node-%" PRId64 ++ " respectively. It can cause OSes like Linux to" ++ " misbehave", i, j, cpus[i].props.socket_id, ++ cpus[i].props.cluster_id, cpus[i].props.node_id, ++ cpus[j].props.node_id); ++ } ++ } ++ } ++} ++ + MemoryRegion *machine_consume_memdev(MachineState *machine, + HostMemoryBackend *backend) + { +@@ -1581,6 +1620,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error * + numa_complete_configuration(machine); + if (machine->numa_state->num_nodes) { + machine_numa_finish_cpu_init(machine); ++ if (machine_class->cpu_cluster_has_numa_boundary) { ++ validate_cpu_cluster_to_numa_boundary(machine); ++ } + } + } + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 5f08bd7550..3628671228 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -275,6 +275,7 @@ struct MachineClass { + bool nvdimm_supported; + bool numa_mem_supported; + bool auto_enable_numa; ++ bool cpu_cluster_has_numa_boundary; + SMPCompatProps smp_props; + const char *default_ram_id; + +-- +2.39.3 + diff --git a/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch b/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch new file mode 100644 index 0000000..3282c24 --- /dev/null +++ b/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch @@ -0,0 +1,138 @@ +From ac54f5f746782da89ab674733af5622e524b58eb Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Fri, 2 Jun 2023 18:27:35 +0200 +Subject: [PATCH 4/6] vhost: fix vhost_dev_enable_notifiers() error case +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 176: vhost: fix vhost_dev_enable_notifiers() error case +RH-Jira: RHEL-330 +RH-Acked-by: MST +RH-Acked-by: Cindy Lu +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Jason Wang +RH-Commit: [1/1] fd30d7501be59f7e5b9d6fc5ed84efcc4037d08e (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-330 + +in vhost_dev_enable_notifiers(), if virtio_bus_set_host_notifier(true) +fails, we call vhost_dev_disable_notifiers() that executes +virtio_bus_set_host_notifier(false) on all queues, even on queues that +have failed to be initialized. + +This triggers a core dump in memory_region_del_eventfd(): + + virtio_bus_set_host_notifier: unable to init event notifier: Too many open files (-24) + vhost VQ 1 notifier binding failed: 24 + .../softmmu/memory.c:2611: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. + +Fix the problem by providing to vhost_dev_disable_notifiers() the +number of queues to disable. + +Fixes: 8771589b6f81 ("vhost: simplify vhost_dev_enable_notifiers") +Cc: longpeng2@huawei.com +Signed-off-by: Laurent Vivier +Message-Id: <20230602162735.3670785-1-lvivier@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit 92099aa4e9a3bb6856c290afaf41c76f9e3dd9fd) +--- + hw/virtio/vhost.c | 65 ++++++++++++++++++++++++++--------------------- + 1 file changed, 36 insertions(+), 29 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index a266396576..ae0a033e60 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -1545,6 +1545,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) + memset(hdev, 0, sizeof(struct vhost_dev)); + } + ++static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, ++ VirtIODevice *vdev, ++ unsigned int nvqs) ++{ ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ int i, r; ++ ++ /* ++ * Batch all the host notifiers in a single transaction to avoid ++ * quadratic time complexity in address_space_update_ioeventfds(). ++ */ ++ memory_region_transaction_begin(); ++ ++ for (i = 0; i < nvqs; ++i) { ++ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, ++ false); ++ if (r < 0) { ++ error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); ++ } ++ assert(r >= 0); ++ } ++ ++ /* ++ * The transaction expects the ioeventfds to be open when it ++ * commits. Do it now, before the cleanup loop. ++ */ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < nvqs; ++i) { ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); ++ } ++ virtio_device_release_ioeventfd(vdev); ++} ++ + /* Stop processing guest IO notifications in qemu. + * Start processing them in vhost in kernel. + */ +@@ -1574,7 +1608,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) + if (r < 0) { + error_report("vhost VQ %d notifier binding failed: %d", i, -r); + memory_region_transaction_commit(); +- vhost_dev_disable_notifiers(hdev, vdev); ++ vhost_dev_disable_notifiers_nvqs(hdev, vdev, i); + return r; + } + } +@@ -1591,34 +1625,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) + */ + void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) + { +- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +- int i, r; +- +- /* +- * Batch all the host notifiers in a single transaction to avoid +- * quadratic time complexity in address_space_update_ioeventfds(). +- */ +- memory_region_transaction_begin(); +- +- for (i = 0; i < hdev->nvqs; ++i) { +- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, +- false); +- if (r < 0) { +- error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); +- } +- assert (r >= 0); +- } +- +- /* +- * The transaction expects the ioeventfds to be open when it +- * commits. Do it now, before the cleanup loop. +- */ +- memory_region_transaction_commit(); +- +- for (i = 0; i < hdev->nvqs; ++i) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); +- } +- virtio_device_release_ioeventfd(vdev); ++ vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs); + } + + /* Test and clear event pending status. +-- +2.39.3 + diff --git a/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch b/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch new file mode 100644 index 0000000..fd29eb7 --- /dev/null +++ b/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch @@ -0,0 +1,67 @@ +From 4e30ca551fb3740a428017a0debf0a6aab976639 Mon Sep 17 00:00:00 2001 +From: Ani Sinha +Date: Mon, 19 Jun 2023 12:22:09 +0530 +Subject: [PATCH 6/6] vhost-vdpa: do not cleanup the vdpa/vhost-net structures + if peer nic is present + +RH-Author: Ani Sinha +RH-MergeRequest: 174: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present +RH-Bugzilla: 2128929 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] c70d4e5fd93256326d318e0b507db6b9eb93ad86 (anisinha/centos-qemu-kvm) + +When a peer nic is still attached to the vdpa backend, it is too early to free +up the vhost-net and vdpa structures. If these structures are freed here, then +QEMU crashes when the guest is being shut down. The following call chain +would result in an assertion failure since the pointer returned from +vhost_vdpa_get_vhost_net() would be NULL: + +do_vm_stop() -> vm_state_notify() -> virtio_set_status() -> +virtio_net_vhost_status() -> get_vhost_net(). + +Therefore, we defer freeing up the structures until at guest shutdown +time when qemu_cleanup() calls net_cleanup() which then calls +qemu_del_net_client() which would eventually call vhost_vdpa_cleanup() +again to free up the structures. This time, the loop in net_cleanup() +ensures that vhost_vdpa_cleanup() will be called one last time when +all the peer nics are detached and freed. + +All unit tests pass with this change. + +CC: imammedo@redhat.com +CC: jusual@redhat.com +CC: mst@redhat.com +Fixes: CVE-2023-3301 +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929 +Signed-off-by: Ani Sinha +Message-Id: <20230619065209.442185-1-anisinha@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit a0d7215e339b61c7d7a7b3fcf754954d80d93eb8) +--- + net/vhost-vdpa.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 99904a0da7..8c8900f0f4 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -184,6 +184,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + ++ /* ++ * If a peer NIC is attached, do not cleanup anything. ++ * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() ++ * when the guest is shutting down. ++ */ ++ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { ++ return; ++ } + qemu_vfree(s->cvq_cmd_out_buffer); + qemu_vfree(s->status); + if (s->vhost_net) { +-- +2.39.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e7cd2af..ad8ea7e 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 6%{?rcrel}%{?dist}%{?cc_suffix} +Release: 7%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -360,6 +360,18 @@ Patch102: kvm-target-i386-add-support-for-FB_CLEAR-feature.patch Patch103: kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch # For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers Patch104: kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch +# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association +Patch105: kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch +# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association +Patch106: kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch +# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association +Patch107: kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch +# For RHEL-330 - [virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed +Patch108: kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch +# For bz#2218644 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) +Patch109: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch +# For bz#2128929 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest +Patch110: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch %if %{have_clang} BuildRequires: clang @@ -1400,6 +1412,22 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jul 10 2023 Miroslav Rezanina - 8.0.0-7 +- kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch [bz#2171363] +- kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch [bz#2171363] +- kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch [bz#2171363] +- kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch [RHEL-330] +- kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch [bz#2218644] +- kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch [bz#2128929] +- Resolves: bz#2171363 + ([aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association) +- Resolves: RHEL-330 + ([virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed) +- Resolves: bz#2218644 + (query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone)) +- Resolves: bz#2128929 + ([rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest) + * Mon Jun 26 2023 Miroslav Rezanina - 8.0.0-6 - kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch [bz#2216201] - kvm-target-i386-add-support-for-FB_CLEAR-feature.patch [bz#2216201]